standards/79067: /bin/sh should be more intelligent about IFS
Jilles Tjoelker
jilles at stack.nl
Sat Apr 4 04:06:50 PDT 2009
A patch similar to the 'read' part of this one was committed to
-CURRENT.
The other part of the patch in the PR is a bit different from NetBSD,
and is wrong: it drops a final empty argument in "$@". For example,
set -- a ''; set -- "$@"; echo $# should give 2, but gives 1.
So I put in the NetBSD code, which does not have this issue
(sh-ifs-expand-netbsd.patch). Additionally, it cleans up the code a bit.
However, the NetBSD code has a related issue: it drops a final empty
argument in "$@"$s where $s contains one or more characters of IFS
whitespace. For example, s=' '; set -- a ''; set -- "$@"; echo $#
should give 2, but gives 1. A fix for this is in sh-ifs-expand.patch.
With the read change and both patches, the tests from
http://www.research.att.com/~gsf/public/ifs.sh all pass. Some more tests
with "$@" and IFS are in moreifs.sh.
--
Jilles Tjoelker
-------------- next part --------------
--- src/bin/sh/expand.c.orig 2008-09-04 19:34:53.000000000 +0200
+++ src/bin/sh/expand.c 2009-04-04 01:32:07.000000000 +0200
@@ -82,7 +82,7 @@
struct ifsregion *next; /* next region in list */
int begoff; /* offset of start of region */
int endoff; /* offset of end of region */
- int nulonly; /* search for nul bytes only */
+ int inquotes; /* search for nul bytes only */
};
@@ -936,13 +936,19 @@
*/
STATIC void
-recordregion(int start, int end, int nulonly)
+recordregion(int start, int end, int inquotes)
{
struct ifsregion *ifsp;
if (ifslastp == NULL) {
ifsp = &ifsfirst;
} else {
+ if (ifslastp->endoff == start
+ && ifslastp->inquotes == inquotes) {
+ /* extend previous area */
+ ifslastp->endoff = end;
+ return;
+ }
ifsp = (struct ifsregion *)ckmalloc(sizeof (struct ifsregion));
ifslastp->next = ifsp;
}
@@ -950,7 +956,7 @@
ifslastp->next = NULL;
ifslastp->begoff = start;
ifslastp->endoff = end;
- ifslastp->nulonly = nulonly;
+ ifslastp->inquotes = inquotes;
}
@@ -969,75 +975,88 @@
char *p;
char *q;
char *ifs;
- int ifsspc;
- int nulonly;
-
+ const char *ifsspc;
+ int had_param_ch = 0;
start = string;
- ifsspc = 0;
- nulonly = 0;
- if (ifslastp != NULL) {
- ifsp = &ifsfirst;
- do {
- p = string + ifsp->begoff;
- nulonly = ifsp->nulonly;
- ifs = nulonly ? nullstr :
- ( ifsset() ? ifsval() : " \t\n" );
- ifsspc = 0;
- while (p < string + ifsp->endoff) {
- q = p;
- if (*p == CTLESC)
+
+ if (ifslastp == NULL) {
+ /* Return entire argument, IFS doesn't apply to any of it */
+ sp = (struct strlist *)stalloc(sizeof *sp);
+ sp->text = start;
+ *arglist->lastp = sp;
+ arglist->lastp = &sp->next;
+ return;
+ }
+
+ ifs = ifsset() ? ifsval() : " \t\n";
+
+ for (ifsp = &ifsfirst; ifsp != NULL; ifsp = ifsp->next) {
+ p = string + ifsp->begoff;
+ while (p < string + ifsp->endoff) {
+ had_param_ch = 1;
+ q = p;
+ if (*p == CTLESC)
+ p++;
+ if (ifsp->inquotes) {
+ /* Only NULs (should be from "$@") end args */
+ if (*p != 0) {
p++;
- if (strchr(ifs, *p)) {
- if (!nulonly)
- ifsspc = (strchr(" \t\n", *p) != NULL);
- /* Ignore IFS whitespace at start */
- if (q == start && ifsspc) {
- p++;
- start = p;
- continue;
- }
- *q = '\0';
- sp = (struct strlist *)stalloc(sizeof *sp);
- sp->text = start;
- *arglist->lastp = sp;
- arglist->lastp = &sp->next;
+ continue;
+ }
+ ifsspc = NULL;
+ } else {
+ if (!strchr(ifs, *p)) {
p++;
- if (!nulonly) {
- for (;;) {
- if (p >= string + ifsp->endoff) {
- break;
- }
- q = p;
- if (*p == CTLESC)
- p++;
- if (strchr(ifs, *p) == NULL ) {
- p = q;
- break;
- } else if (strchr(" \t\n",*p) == NULL) {
- if (ifsspc) {
- p++;
- ifsspc = 0;
- } else {
- p = q;
- break;
- }
- } else
- p++;
- }
- }
- start = p;
- } else
+ continue;
+ }
+ had_param_ch = 0;
+ ifsspc = strchr(" \t\n", *p);
+
+ /* Ignore IFS whitespace at start */
+ if (q == start && ifsspc != NULL) {
p++;
+ start = p;
+ continue;
+ }
}
- } while ((ifsp = ifsp->next) != NULL);
- if (*start || (!ifsspc && start > string)) {
+
+ /* Save this argument... */
+ *q = '\0';
sp = (struct strlist *)stalloc(sizeof *sp);
sp->text = start;
*arglist->lastp = sp;
arglist->lastp = &sp->next;
+ p++;
+
+ if (ifsspc != NULL) {
+ /* Ignore further trailing IFS whitespace */
+ for (; p < string + ifsp->endoff; p++) {
+ q = p;
+ if (*p == CTLESC)
+ p++;
+ if (strchr(ifs, *p) == NULL) {
+ p = q;
+ break;
+ }
+ if (strchr(" \t\n", *p) == NULL) {
+ p++;
+ break;
+ }
+ }
+ }
+ start = p;
}
- } else {
+ }
+
+ /*
+ * Save anything left as an argument.
+ * Traditionally we have treated 'IFS=':'; set -- x$IFS' as
+ * generating 2 arguments, the second of which is empty.
+ * Some recent clarification of the Posix spec say that it
+ * should only generate one....
+ */
+ if (had_param_ch || *start != 0) {
sp = (struct strlist *)stalloc(sizeof *sp);
sp->text = start;
*arglist->lastp = sp;
-------------- next part --------------
--- src/bin/sh/expand.c.netbsd 2009-04-04 01:32:07.000000000 +0200
+++ src/bin/sh/expand.c 2009-04-04 02:01:54.000000000 +0200
@@ -994,12 +994,12 @@
for (ifsp = &ifsfirst; ifsp != NULL; ifsp = ifsp->next) {
p = string + ifsp->begoff;
while (p < string + ifsp->endoff) {
- had_param_ch = 1;
q = p;
if (*p == CTLESC)
p++;
if (ifsp->inquotes) {
/* Only NULs (should be from "$@") end args */
+ had_param_ch = 1;
if (*p != 0) {
p++;
continue;
@@ -1007,10 +1007,10 @@
ifsspc = NULL;
} else {
if (!strchr(ifs, *p)) {
+ had_param_ch = 1;
p++;
continue;
}
- had_param_ch = 0;
ifsspc = strchr(" \t\n", *p);
/* Ignore IFS whitespace at start */
@@ -1019,6 +1019,7 @@
start = p;
continue;
}
+ had_param_ch = 0;
}
/* Save this argument... */
-------------- next part --------------
#!/bin/sh
# Copyright (c) 2009 Jilles Tjoelker
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
#
c=: e= s=' '
failures=''
ok=''
check_result() {
if [ "x$2" = "x$3" ]; then
ok=x$ok
else
failures=x$failures
echo "For $1, expected $3 actual $2"
fi
}
IFS='
'
set -- a ''
set -- "$@"
check_result 'set -- "$@"' "($#)($1)($2)" "(2)(a)()"
set -- a ''
set -- "$@"$e
check_result 'set -- "$@"$e' "($#)($1)($2)" "(2)(a)()"
set -- a ''
set -- "$@"$s
check_result 'set -- "$@"$s' "($#)($1)($2)" "(2)(a)()"
IFS="$c"
set -- a ''
set -- "$@"$c
check_result 'set -- "$@"$c' "($#)($1)($2)" "(2)(a)()"
test "x$failures" = x
More information about the freebsd-standards
mailing list