svn commit: r214512 - in head: bin/sh tools/regression/bin/sh/expansion

Jilles Tjoelker jilles at FreeBSD.org
Fri Oct 29 13:42:19 UTC 2010


Author: jilles
Date: Fri Oct 29 13:42:18 2010
New Revision: 214512
URL: http://svn.freebsd.org/changeset/base/214512

Log:
  sh: Do IFS splitting on word in ${v+word} and ${v-word}.
  
  The code is inspired by NetBSD sh somewhat, but different because we
  preserve the old Almquist/Bourne/Korn ability to have an unquoted part in a
  quoted ${v+word}. For example, "${v-"*"}" expands to $v as a single field if
  v is set, but generates filenames otherwise.
  
  Note that this is the only place where we split text literally from the
  script (the similar ${v=word} assigns to v and then expands $v). The parser
  must now add additional markers to allow the expansion code to know whether
  arbitrary characters in substitutions are quoted.
  
  Example:
    for i in ${$+a b c}; do echo $i; done
  
  Exp-run done by:	pav (with some other sh(1) changes)

Added:
  head/tools/regression/bin/sh/expansion/plus-minus6.0   (contents, props changed)
Modified:
  head/bin/sh/expand.c
  head/bin/sh/expand.h
  head/bin/sh/mksyntax.c
  head/bin/sh/parser.c
  head/bin/sh/parser.h

Modified: head/bin/sh/expand.c
==============================================================================
--- head/bin/sh/expand.c	Fri Oct 29 13:34:57 2010	(r214511)
+++ head/bin/sh/expand.c	Fri Oct 29 13:42:18 2010	(r214512)
@@ -216,7 +216,12 @@ argstr(char *p, int flag)
 	char c;
 	int quotes = flag & (EXP_FULL | EXP_CASE | EXP_REDIR);	/* do CTLESC */
 	int firsteq = 1;
+	int split_lit;
+	int lit_quoted;
 
+	split_lit = flag & EXP_SPLIT_LIT;
+	lit_quoted = flag & EXP_LIT_QUOTED;
+	flag &= ~(EXP_SPLIT_LIT | EXP_LIT_QUOTED);
 	if (*p == '~' && (flag & (EXP_TILDE | EXP_VARTILDE)))
 		p = exptilde(p, flag);
 	for (;;) {
@@ -225,17 +230,25 @@ argstr(char *p, int flag)
 		case CTLENDVAR:
 			goto breakloop;
 		case CTLQUOTEMARK:
+			lit_quoted = 1;
 			/* "$@" syntax adherence hack */
 			if (p[0] == CTLVAR && p[2] == '@' && p[3] == '=')
 				break;
 			if ((flag & EXP_FULL) != 0)
 				STPUTC(c, expdest);
 			break;
+		case CTLQUOTEEND:
+			lit_quoted = 0;
+			break;
 		case CTLESC:
 			if (quotes)
 				STPUTC(c, expdest);
 			c = *p++;
 			STPUTC(c, expdest);
+			if (split_lit && !lit_quoted)
+				recordregion(expdest - stackblock() -
+				    (quotes ? 2 : 1),
+				    expdest - stackblock(), 0);
 			break;
 		case CTLVAR:
 			p = evalvar(p, flag);
@@ -255,18 +268,21 @@ argstr(char *p, int flag)
 			 * assignments (after the first '=' and after ':'s).
 			 */
 			STPUTC(c, expdest);
-			if (flag & EXP_VARTILDE && *p == '~') {
-				if (c == '=') {
-					if (firsteq)
-						firsteq = 0;
-					else
-						break;
-				}
+			if (split_lit && !lit_quoted)
+				recordregion(expdest - stackblock() - 1,
+				    expdest - stackblock(), 0);
+			if (flag & EXP_VARTILDE && *p == '~' &&
+			    (c != '=' || firsteq)) {
+				if (c == '=')
+					firsteq = 0;
 				p = exptilde(p, flag);
 			}
 			break;
 		default:
 			STPUTC(c, expdest);
+			if (split_lit && !lit_quoted)
+				recordregion(expdest - stackblock() - 1,
+				    expdest - stackblock(), 0);
 		}
 	}
 breakloop:;
@@ -742,7 +758,8 @@ record:
 	case VSPLUS:
 	case VSMINUS:
 		if (!set) {
-			argstr(p, flag);
+			argstr(p, flag | (flag & EXP_FULL ? EXP_SPLIT_LIT : 0) |
+			    (varflags & VSQUOTE ? EXP_LIT_QUOTED : 0));
 			break;
 		}
 		if (easy)
@@ -1495,13 +1512,13 @@ rmescapes(char *str)
 	char *p, *q;
 
 	p = str;
-	while (*p != CTLESC && *p != CTLQUOTEMARK) {
+	while (*p != CTLESC && *p != CTLQUOTEMARK && *p != CTLQUOTEEND) {
 		if (*p++ == '\0')
 			return;
 	}
 	q = p;
 	while (*p) {
-		if (*p == CTLQUOTEMARK) {
+		if (*p == CTLQUOTEMARK || *p == CTLQUOTEEND) {
 			p++;
 			continue;
 		}

Modified: head/bin/sh/expand.h
==============================================================================
--- head/bin/sh/expand.h	Fri Oct 29 13:34:57 2010	(r214511)
+++ head/bin/sh/expand.h	Fri Oct 29 13:42:18 2010	(r214512)
@@ -52,6 +52,8 @@ struct arglist {
 #define	EXP_VARTILDE	0x4	/* expand tildes in an assignment */
 #define	EXP_REDIR	0x8	/* file glob for a redirection (1 match only) */
 #define EXP_CASE	0x10	/* keeps quotes around for CASE pattern */
+#define EXP_SPLIT_LIT	0x20	/* IFS split literal text ${v+-a b c} */
+#define EXP_LIT_QUOTED	0x40	/* for EXP_SPLIT_LIT, start off quoted */
 
 
 union node;

Modified: head/bin/sh/mksyntax.c
==============================================================================
--- head/bin/sh/mksyntax.c	Fri Oct 29 13:34:57 2010	(r214511)
+++ head/bin/sh/mksyntax.c	Fri Oct 29 13:42:18 2010	(r214512)
@@ -285,6 +285,7 @@ init(void)
 	syntax[base + CTLARI] = "CCTL";
 	syntax[base + CTLENDARI] = "CCTL";
 	syntax[base + CTLQUOTEMARK] = "CCTL";
+	syntax[base + CTLQUOTEEND] = "CCTL";
 }
 
 

Modified: head/bin/sh/parser.c
==============================================================================
--- head/bin/sh/parser.c	Fri Oct 29 13:34:57 2010	(r214511)
+++ head/bin/sh/parser.c	Fri Oct 29 13:42:18 2010	(r214512)
@@ -1161,7 +1161,7 @@ readtoken1(int firstc, char const *initi
 	loop: {	/* for each line, until end of word */
 		CHECKEND();	/* set c to PEOF if at end of here document */
 		for (;;) {	/* until end of line or end of word */
-			CHECKSTRSPACE(3, out);	/* permit 3 calls to USTPUTC */
+			CHECKSTRSPACE(4, out);	/* permit 4 calls to USTPUTC */
 
 			synentry = state[level].syntax[c];
 
@@ -1203,12 +1203,18 @@ readtoken1(int firstc, char const *initi
 						newvarnest == 0)) &&
 					    (c != '}' || state[level].category != TSTATE_VAR_OLD))
 						USTPUTC('\\', out);
+					if ((eofmark == NULL ||
+					    newvarnest > 0) &&
+					    state[level].syntax == BASESYNTAX)
+						USTPUTC(CTLQUOTEMARK, out);
 					if (SQSYNTAX[c] == CCTL)
 						USTPUTC(CTLESC, out);
-					else if (eofmark == NULL ||
-					    newvarnest > 0)
-						USTPUTC(CTLQUOTEMARK, out);
 					USTPUTC(c, out);
+					if ((eofmark == NULL ||
+					    newvarnest > 0) &&
+					    state[level].syntax == BASESYNTAX &&
+					    state[level].category == TSTATE_VAR_OLD)
+						USTPUTC(CTLQUOTEEND, out);
 					quotef++;
 				}
 				break;
@@ -1224,6 +1230,8 @@ readtoken1(int firstc, char const *initi
 				if (eofmark != NULL && newvarnest == 0)
 					USTPUTC(c, out);
 				else {
+					if (state[level].category == TSTATE_VAR_OLD)
+						USTPUTC(CTLQUOTEEND, out);
 					state[level].syntax = BASESYNTAX;
 					quotef++;
 				}

Modified: head/bin/sh/parser.h
==============================================================================
--- head/bin/sh/parser.h	Fri Oct 29 13:34:57 2010	(r214511)
+++ head/bin/sh/parser.h	Fri Oct 29 13:42:18 2010	(r214512)
@@ -43,6 +43,7 @@
 #define	CTLARI	'\206'
 #define	CTLENDARI '\207'
 #define	CTLQUOTEMARK '\210'
+#define	CTLQUOTEEND '\211' /* only for ${v+-...} */
 
 /* variable substitution byte (follows CTLVAR) */
 #define VSTYPE		0x0f	/* type of variable substitution */

Added: head/tools/regression/bin/sh/expansion/plus-minus6.0
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/tools/regression/bin/sh/expansion/plus-minus6.0	Fri Oct 29 13:42:18 2010	(r214512)
@@ -0,0 +1,34 @@
+# $FreeBSD$
+
+failures=0
+unset LC_ALL
+export LC_CTYPE=en_US.ISO8859-1
+nl='
+'
+i=1
+set -f
+while [ "$i" -le 255 ]; do
+	# A different byte still in the range 1..255.
+	i2=$((i^2+(i==2)))
+	# Add a character to work around command substitution's removal of
+	# final newlines, then remove it again.
+	c=$(printf \\"$(printf %o@ "$i")")
+	c=${c%@}
+	c2=$(printf \\"$(printf %o@ "$i2")")
+	c2=${c2%@}
+	case $c in
+		[\'$nl'$}();&|\"`']) c=M
+	esac
+	case $c2 in
+		[\'$nl'$}();&|\"`']) c2=N
+	esac
+	IFS=$c
+	command eval "set -- \${\$+$c2$c$c2$c$c2}"
+	if [ "$#" -ne 3 ] || [ "$1" != "$c2" ] || [ "$2" != "$c2" ] ||
+	    [ "$3" != "$c2" ]; then
+		echo "Bad results for separator $i (word $i2)" >&2
+		: $((failures += 1))
+	fi
+	i=$((i+1))
+done
+exit $((failures > 0))


More information about the svn-src-head mailing list