svn commit: r206145 - in head: bin/sh tools/regression/bin/sh/expansion tools/regression/bin/sh/parser

Jilles Tjoelker jilles at FreeBSD.org
Sat Apr 3 20:55:57 UTC 2010


Author: jilles
Date: Sat Apr  3 20:55:56 2010
New Revision: 206145
URL: http://svn.freebsd.org/changeset/base/206145

Log:
  sh: Fix various things about expansions:
  * remove the backslash from \} inside double quotes inside +-=?
    substitutions, e.g. "${$+\}a}"
  * maintain separate double-quote state for ${v#...} and ${v%...};
    single and double quotes are special inside, even in a double-quoted
    string or here document
  * keep track of correct order of substitutions and arithmetic
  
  This is different from dash's approach, which does not track individual
  double quotes in the parser, trying to fix this up during expansion.
  This treats single quotes inside "${v#...}" incorrectly, however.
  
  This is similar to NetBSD's approach (as submitted in PR bin/57554), but
  recognizes the difference between +-=? and #% substitutions hinted at in
  POSIX and is more refined for arithmetic expansion and here documents.
  
  PR:		bin/57554
  Exp-run done by:	erwin (with some other sh(1) changes)

Added:
  head/tools/regression/bin/sh/expansion/plus-minus2.0   (contents, props changed)
  head/tools/regression/bin/sh/parser/heredoc2.0   (contents, props changed)
Modified:
  head/bin/sh/parser.c

Modified: head/bin/sh/parser.c
==============================================================================
--- head/bin/sh/parser.c	Sat Apr  3 20:35:39 2010	(r206144)
+++ head/bin/sh/parser.c	Sat Apr  3 20:55:56 2010	(r206145)
@@ -79,6 +79,10 @@ struct heredoc {
 	int striptabs;		/* if set, strip leading tabs */
 };
 
+struct parser_temp {
+	struct parser_temp *next;
+	void *data;
+};
 
 
 STATIC struct heredoc *heredoclist;	/* list of here documents to read */
@@ -94,6 +98,7 @@ STATIC struct heredoc *heredoc;
 STATIC int quoteflag;		/* set if (part of) last token was quoted */
 STATIC int startlinno;		/* line # where last token started */
 STATIC int funclinno;		/* line # where the current function started */
+STATIC struct parser_temp *parser_temp;
 
 /* XXX When 'noaliases' is set to one, no alias expansion takes place. */
 static int noaliases = 0;
@@ -117,6 +122,73 @@ STATIC void synerror(const char *);
 STATIC void setprompt(int);
 
 
+STATIC void *
+parser_temp_alloc(size_t len)
+{
+	struct parser_temp *t;
+
+	INTOFF;
+	t = ckmalloc(sizeof(*t));
+	t->data = NULL;
+	t->next = parser_temp;
+	parser_temp = t;
+	t->data = ckmalloc(len);
+	INTON;
+	return t->data;
+}
+
+
+STATIC void *
+parser_temp_realloc(void *ptr, size_t len)
+{
+	struct parser_temp *t;
+
+	INTOFF;
+	t = parser_temp;
+	if (ptr != t->data)
+		error("bug: parser_temp_realloc misused");
+	t->data = ckrealloc(t->data, len);
+	INTON;
+	return t->data;
+}
+
+
+STATIC void
+parser_temp_free_upto(void *ptr)
+{
+	struct parser_temp *t;
+	int done = 0;
+
+	INTOFF;
+	while (parser_temp != NULL && !done) {
+		t = parser_temp;
+		parser_temp = t->next;
+		done = t->data == ptr;
+		ckfree(t->data);
+		ckfree(t);
+	}
+	INTON;
+	if (!done)
+		error("bug: parser_temp_free_upto misused");
+}
+
+
+STATIC void
+parser_temp_free_all(void)
+{
+	struct parser_temp *t;
+
+	INTOFF;
+	while (parser_temp != NULL) {
+		t = parser_temp;
+		parser_temp = t->next;
+		ckfree(t->data);
+		ckfree(t);
+	}
+	INTON;
+}
+
+
 /*
  * Read and parse a command.  Returns NEOF on end of file.  (NULL is a
  * valid parse tree indicating a blank line.)
@@ -127,6 +199,11 @@ parsecmd(int interact)
 {
 	int t;
 
+	/* This assumes the parser is not re-entered,
+	 * which could happen if we add command substitution on PS1/PS2.
+	 */
+	parser_temp_free_all();
+
 	tokpushback = 0;
 	doprompt = interact;
 	if (doprompt)
@@ -863,6 +940,21 @@ breakloop:
 }
 
 
+#define MAXNEST_STATIC 8
+struct tokenstate
+{
+	const char *syntax; /* *SYNTAX */
+	int parenlevel; /* levels of parentheses in arithmetic */
+	enum tokenstate_category
+	{
+		TSTATE_TOP,
+		TSTATE_VAR_OLD, /* ${var+-=?}, inherits dquotes */
+		TSTATE_VAR_NEW, /* other ${var...}, own dquote state */
+		TSTATE_ARITH
+	} category;
+};
+
+
 /*
  * Called to parse command substitutions.
  */
@@ -1040,7 +1132,7 @@ done:
 #define	PARSEARITH()	{goto parsearith; parsearith_return:;}
 
 STATIC int
-readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs)
+readtoken1(int firstc, char const *initialsyntax, char *eofmark, int striptabs)
 {
 	int c = firstc;
 	char *out;
@@ -1048,22 +1140,21 @@ readtoken1(int firstc, char const *synta
 	char line[EOFMARKLEN + 1];
 	struct nodelist *bqlist;
 	int quotef;
-	int dblquote;
-	int varnest;	/* levels of variables expansion */
-	int arinest;	/* levels of arithmetic expansion */
-	int parenlevel;	/* levels of parens in arithmetic */
-	char const *prevsyntax;	/* syntax before arithmetic */
+	int newvarnest;
+	int level;
 	int synentry;
+	struct tokenstate state_static[MAXNEST_STATIC];
+	int maxnest = MAXNEST_STATIC;
+	struct tokenstate *state = state_static;
 
 	startlinno = plinno;
-	dblquote = 0;
-	if (syntax == DQSYNTAX)
-		dblquote = 1;
 	quotef = 0;
 	bqlist = NULL;
-	varnest = 0;
-	arinest = 0;
-	parenlevel = 0;
+	newvarnest = 0;
+	level = 0;
+	state[level].syntax = initialsyntax;
+	state[level].parenlevel = 0;
+	state[level].category = TSTATE_TOP;
 
 	STARTSTACKSTR(out);
 	loop: {	/* for each line, until end of word */
@@ -1071,11 +1162,11 @@ readtoken1(int firstc, char const *synta
 		for (;;) {	/* until end of line or end of word */
 			CHECKSTRSPACE(3, out);	/* permit 3 calls to USTPUTC */
 
-			synentry = syntax[c];
+			synentry = state[level].syntax[c];
 
 			switch(synentry) {
 			case CNL:	/* '\n' */
-				if (syntax == BASESYNTAX)
+				if (state[level].syntax == BASESYNTAX)
 					goto endword;	/* exit outer loop */
 				USTPUTC(c, out);
 				plinno++;
@@ -1089,7 +1180,7 @@ readtoken1(int firstc, char const *synta
 				USTPUTC(c, out);
 				break;
 			case CCTL:
-				if (eofmark == NULL || dblquote)
+				if (eofmark == NULL || initialsyntax != SQSYNTAX)
 					USTPUTC(CTLESC, out);
 				USTPUTC(c, out);
 				break;
@@ -1105,41 +1196,37 @@ readtoken1(int firstc, char const *synta
 					else
 						setprompt(0);
 				} else {
-					if (dblquote && c != '\\' &&
-					    c != '`' && c != '$' &&
-					    (c != '"' || eofmark != NULL))
+					if (state[level].syntax == DQSYNTAX &&
+					    c != '\\' && c != '`' && c != '$' &&
+					    (c != '"' || (eofmark != NULL &&
+						newvarnest == 0)) &&
+					    (c != '}' || state[level].category != TSTATE_VAR_OLD))
 						USTPUTC('\\', out);
 					if (SQSYNTAX[c] == CCTL)
 						USTPUTC(CTLESC, out);
-					else if (eofmark == NULL)
+					else if (eofmark == NULL ||
+					    newvarnest > 0)
 						USTPUTC(CTLQUOTEMARK, out);
 					USTPUTC(c, out);
 					quotef++;
 				}
 				break;
 			case CSQUOTE:
-				if (eofmark == NULL)
-					USTPUTC(CTLQUOTEMARK, out);
-				syntax = SQSYNTAX;
+				USTPUTC(CTLQUOTEMARK, out);
+				state[level].syntax = SQSYNTAX;
 				break;
 			case CDQUOTE:
-				if (eofmark == NULL)
-					USTPUTC(CTLQUOTEMARK, out);
-				syntax = DQSYNTAX;
-				dblquote = 1;
+				USTPUTC(CTLQUOTEMARK, out);
+				state[level].syntax = DQSYNTAX;
 				break;
 			case CENDQUOTE:
-				if (eofmark != NULL && arinest == 0 &&
-				    varnest == 0) {
+				if (eofmark != NULL && newvarnest == 0)
 					USTPUTC(c, out);
-				} else {
-					if (arinest) {
-						syntax = ARISYNTAX;
-						dblquote = 0;
-					} else if (eofmark == NULL) {
-						syntax = BASESYNTAX;
-						dblquote = 0;
-					}
+				else {
+					if (state[level].category == TSTATE_ARITH)
+						state[level].syntax = ARISYNTAX;
+					else
+						state[level].syntax = BASESYNTAX;
 					quotef++;
 				}
 				break;
@@ -1147,30 +1234,33 @@ readtoken1(int firstc, char const *synta
 				PARSESUB();		/* parse substitution */
 				break;
 			case CENDVAR:	/* '}' */
-				if (varnest > 0) {
-					varnest--;
+				if (level > 0 &&
+				    (state[level].category == TSTATE_VAR_OLD ||
+				    state[level].category == TSTATE_VAR_NEW)) {
+					if (state[level].category == TSTATE_VAR_OLD)
+						state[level - 1].syntax = state[level].syntax;
+					else
+						newvarnest--;
+					level--;
 					USTPUTC(CTLENDVAR, out);
 				} else {
 					USTPUTC(c, out);
 				}
 				break;
 			case CLP:	/* '(' in arithmetic */
-				parenlevel++;
+				state[level].parenlevel++;
 				USTPUTC(c, out);
 				break;
 			case CRP:	/* ')' in arithmetic */
-				if (parenlevel > 0) {
+				if (state[level].parenlevel > 0) {
 					USTPUTC(c, out);
-					--parenlevel;
+					--state[level].parenlevel;
 				} else {
 					if (pgetc() == ')') {
-						if (--arinest == 0) {
+						if (level > 0 &&
+						    state[level].category == TSTATE_ARITH) {
+							level--;
 							USTPUTC(CTLENDARI, out);
-							syntax = prevsyntax;
-							if (syntax == DQSYNTAX)
-								dblquote = 1;
-							else
-								dblquote = 0;
 						} else
 							USTPUTC(')', out);
 					} else {
@@ -1184,13 +1274,15 @@ readtoken1(int firstc, char const *synta
 				}
 				break;
 			case CBQUOTE:	/* '`' */
-				out = parsebackq(out, &bqlist, 1, dblquote,
-						arinest || dblquote);
+				out = parsebackq(out, &bqlist, 1,
+				    state[level].syntax == DQSYNTAX &&
+				    (eofmark == NULL || newvarnest > 0),
+				    state[level].syntax == DQSYNTAX || state[level].syntax == ARISYNTAX);
 				break;
 			case CEOF:
 				goto endword;		/* exit outer loop */
 			default:
-				if (varnest == 0)
+				if (level == 0)
 					goto endword;	/* exit outer loop */
 				USTPUTC(c, out);
 			}
@@ -1198,14 +1290,17 @@ readtoken1(int firstc, char const *synta
 		}
 	}
 endword:
-	if (syntax == ARISYNTAX)
+	if (state[level].syntax == ARISYNTAX)
 		synerror("Missing '))'");
-	if (syntax != BASESYNTAX && eofmark == NULL)
+	if (state[level].syntax != BASESYNTAX && eofmark == NULL)
 		synerror("Unterminated quoted string");
-	if (varnest != 0) {
+	if (state[level].category == TSTATE_VAR_OLD ||
+	    state[level].category == TSTATE_VAR_NEW) {
 		startlinno = plinno;
 		synerror("Missing '}'");
 	}
+	if (state != state_static)
+		parser_temp_free_upto(state);
 	USTPUTC('\0', out);
 	len = out - stackblock();
 	out = stackblock();
@@ -1228,7 +1323,6 @@ endword:
 /* end of readtoken routine */
 
 
-
 /*
  * Check to see whether we are at the end of the here document.  When this
  * is called, c is set to the first character of the next input line.  If
@@ -1345,8 +1439,11 @@ parsesub: {
 			PARSEARITH();
 		} else {
 			pungetc();
-			out = parsebackq(out, &bqlist, 0, dblquote,
-					arinest || dblquote);
+			out = parsebackq(out, &bqlist, 0,
+			    state[level].syntax == DQSYNTAX &&
+			    (eofmark == NULL || newvarnest > 0),
+			    state[level].syntax == DQSYNTAX ||
+			    state[level].syntax == ARISYNTAX);
 		}
 	} else {
 		USTPUTC(CTLVAR, out);
@@ -1446,11 +1543,44 @@ parsesub: {
 			pungetc();
 		}
 		STPUTC('=', out);
-		if (subtype != VSLENGTH && (dblquote || arinest))
+		if (subtype != VSLENGTH && (state[level].syntax == DQSYNTAX ||
+		    state[level].syntax == ARISYNTAX))
 			flags |= VSQUOTE;
 		*(stackblock() + typeloc) = subtype | flags;
-		if (subtype != VSNORMAL)
-			varnest++;
+		if (subtype != VSNORMAL) {
+			if (level + 1 >= maxnest) {
+				maxnest *= 2;
+				if (state == state_static) {
+					state = parser_temp_alloc(
+					    maxnest * sizeof(*state));
+					memcpy(state, state_static,
+					    MAXNEST_STATIC * sizeof(*state));
+				} else
+					state = parser_temp_realloc(state,
+					    maxnest * sizeof(*state));
+			}
+			level++;
+			state[level].parenlevel = 0;
+			if (subtype == VSMINUS || subtype == VSPLUS ||
+			    subtype == VSQUESTION || subtype == VSASSIGN) {
+				/*
+				 * For operators that were in the Bourne shell,
+				 * inherit the double-quote state.
+				 */
+				state[level].syntax = state[level - 1].syntax;
+				state[level].category = TSTATE_VAR_OLD;
+			} else {
+				/*
+				 * The other operators take a pattern,
+				 * so go to BASESYNTAX.
+				 * Also, ' and " are now special, even
+				 * in here documents.
+				 */
+				state[level].syntax = BASESYNTAX;
+				state[level].category = TSTATE_VAR_NEW;
+				newvarnest++;
+			}
+		}
 	}
 	goto parsesub_return;
 }
@@ -1461,21 +1591,26 @@ parsesub: {
  */
 parsearith: {
 
-	if (++arinest == 1) {
-		prevsyntax = syntax;
-		syntax = ARISYNTAX;
-		USTPUTC(CTLARI, out);
-		if (dblquote)
-			USTPUTC('"',out);
-		else
-			USTPUTC(' ',out);
-	} else {
-		/*
-		 * we collapse embedded arithmetic expansion to
-		 * parenthesis, which should be equivalent
-		 */
-		USTPUTC('(', out);
+	if (level + 1 >= maxnest) {
+		maxnest *= 2;
+		if (state == state_static) {
+			state = parser_temp_alloc(
+			    maxnest * sizeof(*state));
+			memcpy(state, state_static,
+			    MAXNEST_STATIC * sizeof(*state));
+		} else
+			state = parser_temp_realloc(state,
+			    maxnest * sizeof(*state));
 	}
+	level++;
+	state[level].syntax = ARISYNTAX;
+	state[level].parenlevel = 0;
+	state[level].category = TSTATE_ARITH;
+	USTPUTC(CTLARI, out);
+	if (state[level - 1].syntax == DQSYNTAX)
+		USTPUTC('"',out);
+	else
+		USTPUTC(' ',out);
 	goto parsearith_return;
 }
 

Added: head/tools/regression/bin/sh/expansion/plus-minus2.0
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/tools/regression/bin/sh/expansion/plus-minus2.0	Sat Apr  3 20:55:56 2010	(r206145)
@@ -0,0 +1,4 @@
+# $FreeBSD$
+
+e=
+test "${e:-\}}" = '}'

Added: head/tools/regression/bin/sh/parser/heredoc2.0
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/tools/regression/bin/sh/parser/heredoc2.0	Sat Apr  3 20:55:56 2010	(r206145)
@@ -0,0 +1,44 @@
+# $FreeBSD$
+
+failures=0
+
+check() {
+	if ! eval "[ $* ]"; then
+		echo "Failed: $*"
+		: $((failures += 1))
+	fi
+}
+
+s='ast*que?non' sq=\' dq=\"
+
+check '"$(cat <<EOF
+${s}
+EOF
+)" = "ast*que?non"'
+
+check '"$(cat <<EOF
+${s+"x"}
+EOF
+)" = ${dq}x${dq}'
+
+check '"$(cat <<EOF
+${s+'$sq'x'$sq'}
+EOF
+)" = ${sq}x${sq}'
+
+check '"$(cat <<EOF
+${s#ast}
+EOF
+)" = "*que?non"'
+
+check '"$(cat <<EOF
+${s##"ast"}
+EOF
+)" = "*que?non"'
+
+check '"$(cat <<EOF
+${s##'$sq'ast'$sq'}
+EOF
+)" = "*que?non"'
+
+exit $((failures != 0))


More information about the svn-src-head mailing list