Bug in #! processing - One More Time

Maxim Sobolev sobomax at FreeBSD.org
Fri May 13 09:05:24 PDT 2005


Attached please find patch which rips any special processing of command 
line arguments. It should put FreeBSD into the very same ship with the 
rest of unices and linuces out there.

In addition it makes implemetation matching documentation, so that 
ENAMETOOLONG is only returned when interpreter name exceeds 
MAXSHELLCMDLEN. Previously, it has been returned when length of 
interpreter name plus length of all arguments exceeded MAXSHELLCMDLEN.

If there is no objections I'd like to commit it in the next few days. 
This corresponds to the case (3) from the Garance's original message.

I've run this change through buildworld.

-Maxim

Garance A Drosihn wrote:
> At 12:33 PM +0200 2/24/05, Maxim Sobolev wrote:
> 
>> Garance A Drosihn wrote:
>>
>>>
>>> As I see it, we have the following choices to fix this:
>>>
>>> 1) MFC the January 31st change to kern/imgact_shell.c to 5.3-stable,
>>>    as it is.  This means we haven't fixed the problem that people
>>>    complained about in 2002 and again in 2004.  And I still think
>>>    it is "not appropriate" for the execve() system to be deciding
>>>    what '#' means on that line.  The biggest advantage is that this
>>>    means 5.4-release will behave exactly the same as 3.5 through
>>>    5.3-release have behaved.
> 
> 
> We have the code-freeze coming up on Wednesday.  It turns out the
> Jan 31st change had some bugs in it, and afaik we are still coming
> up with a better version of that for *current*, never mind -stable.
> 
> At this point I think we should revert change 1.26.4.1 in -stable,
> such that 5.4-release will behave the same way as all previous
> releases.  Which is to say, it will continue to look for '#' on
> the shebang line, and ignore everything after one is found.
> 
> I have a patch ready to do this, assuming people aren't too upset
> at the idea.  I just don't feel comfortable trying to rush through
> multiple changes for this issue at the last minute.
> 

-------------- next part --------------
Index: kern/imgact_shell.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/imgact_shell.c,v
retrieving revision 1.32
diff -d -u -d -u -r1.32 imgact_shell.c
--- kern/imgact_shell.c	25 Feb 2005 10:17:53 -0000	1.32
+++ kern/imgact_shell.c	13 May 2005 15:40:58 -0000
@@ -42,6 +42,41 @@
 #define SHELLMAGIC	0x2321
 #endif
 
+static inline const char *
+find_token_end(const char *bcp, const char *ecp)
+{
+	const char *cp;
+
+	for (cp = bcp; cp < ecp; cp++)
+		if ((*cp == '\0') || (*cp == '\n') || (*cp == ' ') ||
+		    (*cp == '\t'))
+			break;
+	return cp;
+}
+
+static inline const char *
+find_whitespace_end(const char *bcp, const char *ecp)
+{
+	const char *cp;
+
+	for (cp = bcp; cp < ecp; cp++)
+		if ((*cp == '\0') || (*cp == '\n') || ((*cp != ' ') &&
+		    (*cp != '\t')))
+			break;
+	return cp;
+}
+
+static inline const char *
+find_line_end(const char *bcp, const char *ecp)
+{
+	const char *cp;
+
+	for (cp = bcp; cp < ecp; cp++)
+		if ((*cp == '\0') || (*cp == '\n'))
+			break;
+	return cp;
+}
+
 /*
  * Shell interpreter image activator. An interpreter name beginning
  *	at imgp->args->begin_argv is the minimal successful exit requirement.
@@ -50,14 +85,16 @@
 exec_shell_imgact(imgp)
 	struct image_params *imgp;
 {
-	const char *image_header = imgp->image_header;
-	const char *ihp;
-	int error, offset;
-	size_t length, clength;
+	const char *bcp, *ecp;
+	int error, alength;
+	size_t rlength;
 	struct vattr vattr;
+	const char *argv[2];
+	int argv_len[2];
 
+	bcp = imgp->image_header;
 	/* a shell script? */
-	if (((const short *) image_header)[0] != SHELLMAGIC)
+	if (((const short *)bcp)[0] != SHELLMAGIC)
 		return(-1);
 
 	/*
@@ -65,7 +102,7 @@
 	 *	script. :-)
 	 */
 	if (imgp->interpreted)
-		return(ENOEXEC);
+		return (ENOEXEC);
 
 	imgp->interpreted = 1;
 
@@ -78,68 +115,52 @@
 	error = VOP_GETATTR(imgp->vp, &vattr, imgp->proc->p_ucred, curthread);
 	if (error)
 		return (error);
+	if (vattr.va_size < 3)
+		return (ENOEXEC);
 
-	clength = (vattr.va_size > MAXSHELLCMDLEN) ?
-	    MAXSHELLCMDLEN : vattr.va_size;
+	ecp = bcp + vattr.va_size;
 	/*
-	 * Figure out the number of bytes that need to be reserved in the
+	 * Parse the first line of the shell script which is necessary to
+	 * figure out the number of bytes that need to be reserved in the
 	 * argument string to copy the contents of the interpreter's command
 	 * line into the argument string.
 	 */
-	ihp = &image_header[2];
-	offset = 0;
-	while (ihp < &image_header[clength]) {
-		/* Skip any whitespace */
-		if ((*ihp == ' ') || (*ihp == '\t')) {
-			ihp++;
-			continue;
-		}
-
-		/* End of line? */
-		if ((*ihp == '\n') || (*ihp == '#') || (*ihp == '\0'))
-			break;
-
-		/* Found a token */
-		do {
-			offset++;
-			ihp++;
-		} while ((*ihp != ' ') && (*ihp != '\t') && (*ihp != '\n') &&
-		    (*ihp != '#') && (*ihp != '\0') &&
-		    (ihp < &image_header[clength]));
-		/* Include terminating nulls in the offset */
-		offset++;
-	}
-
+	argv[0] = find_whitespace_end(bcp + 2, ecp);
+	argv_len[0] = find_token_end(argv[0], ecp) - argv[0];
+	/* Check that the name of interpreter is in allowed range */
+	if (argv_len[0] > MAXSHELLCMDLEN)
+		return (ENAMETOOLONG);
 	/* If the script gives a null line as the interpreter, we bail */
-	if (offset == 0)
+	if (argv_len[0] == 0)
 		return (ENOEXEC);
-
-	/* Check that we aren't too big */
-	if (ihp == &image_header[MAXSHELLCMDLEN])
-		return (ENAMETOOLONG);
+	argv[1] = find_whitespace_end(argv[0] + argv_len[0], ecp);
+	argv_len[1] = find_line_end(argv[1], ecp) - argv[1];
 
 	/*
 	 * The full path name of the original script file must be tagged
 	 * onto the end, adjust the offset to deal with it.
 	 *
-	 * The original argv[0] is being replaced, set 'length' to the number
-	 * of bytes being removed.  So 'offset' is the number of bytes being
-	 * added and 'length' is the number of bytes being removed.
+	 * The original argv[0] is being replaced, set 'rlength' to the number
+	 * of bytes being removed.  So 'alength' is the number of bytes being
+	 * added and 'rlength' is the number of bytes being removed.
 	 */
-	offset += strlen(imgp->args->fname) + 1;	/* add fname */
-	length = (imgp->args->argc == 0) ? 0 :
+	alength = argv_len[0] + argv_len[1] +
+	    strlen(imgp->args->fname) + 1;		/* add fname */
+	alength += (argv_len[1] == 0) ? 1 : 2;	/* compensate for NULs */
+	rlength = (imgp->args->argc == 0) ? 0 :
 	    strlen(imgp->args->begin_argv) + 1;		/* bytes to delete */
 
-	if (offset - length > imgp->args->stringspace)
+	if (alength - rlength > imgp->args->stringspace)
 		return (E2BIG);
 
-	bcopy(imgp->args->begin_argv + length, imgp->args->begin_argv + offset,
-	    imgp->args->endp - (imgp->args->begin_argv + length));
+	/* Move the rest of the arguments */
+	bcopy(imgp->args->begin_argv + rlength,
+	    imgp->args->begin_argv + alength,
+	    imgp->args->endp - (imgp->args->begin_argv + rlength));
 
-	offset -= length;		/* calculate actual adjustment */
-	imgp->args->begin_envv += offset;
-	imgp->args->endp += offset;
-	imgp->args->stringspace -= offset;
+	imgp->args->begin_envv += alength - rlength;
+	imgp->args->endp += alength - rlength;
+	imgp->args->stringspace -= alength - rlength;
 
 	/*
 	 * If there were no arguments then we've added one, otherwise
@@ -148,44 +169,29 @@
 	if (imgp->args->argc == 0)
 		imgp->args->argc = 1;
 
-	/*
-	 * Loop through the interpreter name yet again, copying as
-	 * we go.
-	 */
-	ihp = &image_header[2];
-	offset = 0;
-	while (ihp < &image_header[clength]) {
-		/* Skip whitespace */
-		if ((*ihp == ' ') || (*ihp == '\t')) {
-			ihp++;
-			continue;
-		}
-
-		/* End of line? */
-		if ((*ihp == '\n') || (*ihp == '#') || (*ihp == '\0'))
-			break;
-
-		/* Found a token, copy it */
-		do {
-			imgp->args->begin_argv[offset++] = *ihp++;
-		} while ((*ihp != ' ') && (*ihp != '\t') && (*ihp != '\n') &&
-		    (*ihp != '#') && (*ihp != '\0') &&
-		    (ihp < &image_header[MAXSHELLCMDLEN]));
-		imgp->args->begin_argv[offset++] = '\0';
+	/* Fill argv[0] and optionally argv[1] */
+	bcopy(argv[0], imgp->args->begin_argv, argv_len[0]);
+	imgp->args->begin_argv[argv_len[0]] = '\0';
+	imgp->args->argc++;
+	if (argv_len[1] != 0) {
+		bcopy(argv[1], imgp->args->begin_argv + argv_len[0] + 1,
+		    argv_len[1]);
+		imgp->args->begin_argv[argv_len[0] + 1 + argv_len[1]] = '\0';
 		imgp->args->argc++;
 	}
 
+	/* Add the filename as argv[1] or argv[2] for the interpreter to use */
+	error = copystr(imgp->args->fname, imgp->args->begin_argv +
+	    argv_len[0] + argv_len[1] + ((argv_len[1] == 0) ? 1 : 2),
+	    imgp->args->stringspace, &rlength);
+
 	/*
-	 * Finally, add the filename onto the end for the interpreter to
-	 * use and copy the interpreter's name to imgp->interpreter_name
-	 * for exec to use.
+	 * Finally copy the interpreter's name including terminating
+	 * NUL to imgp->interpreter_name for exec to use.
 	 */
-	error = copystr(imgp->args->fname, imgp->args->buf + offset,
-	    imgp->args->stringspace, &length);
-
 	if (error == 0)
-		error = copystr(imgp->args->begin_argv, imgp->interpreter_name,
-		    MAXSHELLCMDLEN, &length);
+		bcopy(imgp->args->begin_argv, imgp->interpreter_name,
+		    argv_len[0] + 1);
 
 	return (error);
 }


More information about the freebsd-arch mailing list