bin/123553: [patch] Prevent indent(1) from splitting unrecognized tokens

Fri May 9 14:20:04 UTC 2008

>Number:         123553
>Category:       bin
>Synopsis:       [patch] Prevent indent(1) from splitting unrecognized tokens
>Confidential:   no
>Severity:       non-critical
>Priority:       medium
>Responsible:    freebsd-bugs
>State:          open
>Quarter:        
>Keywords:       
>Date-Required:
>Class:          sw-bug
>Submitter-Id:   current-users
>Arrival-Date:   Fri May 09 14:20:04 UTC 2008
>Closed-Date:
>Last-Modified:
>Originator:     Romain Tartiere
>Release:        FreeBSD 7.0-STABLE i386
>Organization:
>Environment:
System: FreeBSD marvin.blogreen.org 7.0-STABLE FreeBSD 7.0-STABLE #14: Fri Apr 18 18:27:58 CEST 2008 root at marvin.blogreen.org:/usr/obj/usr/src/sys/MARVIN i386

>Description:

When using indent(1) to indent source code, unrecognized tokens such as "0b00101010" are split (e.g. "0b 00101010").

Such constructs are however valid using avr-gcc from the ports, and upcoming releases of gcc will support this binary notation [1].

References:
  1. As noticed by Frank Behrens: http://lists.freebsd.org/pipermail/freebsd-hackers/2008-April/024343.html

>How-To-Repeat:

% echo "int x = 0b00101010 ;" > foo.c
% avr-gcc -c foo.c
% indent foo.c
% avr-gcc -c foo.c
foo.c:1: error: expected ',' or ';' before 'b00101010'
% cat foo.c
int             x = 0 b00101010;

>Fix:

The following patch attempt to detect numbers in different bases, assert it is valid, but avoid splitting tokens on unrecognized data:

--- lexi.c.diff begins here ---
--- /usr/src/usr.bin/indent/lexi.c	2005-11-20 14:48:15.000000000 +0100
+++ lexi.c	2008-04-27 15:09:21.000000000 +0200
@@ -121,6 +121,10 @@
     1, 1, 1, 0, 3, 0, 3, 0
 };
 
+enum base {
+	BASE_2, BASE_8, BASE_10, BASE_16
+};
+
 int
 lexi(void)
 {
@@ -158,16 +162,37 @@
 	    int         seendot = 0,
 	                seenexp = 0,
 			seensfx = 0;
-	    if (*buf_ptr == '0' &&
-		    (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) {
-		*e_token++ = *buf_ptr++;
-		*e_token++ = *buf_ptr++;
-		while (isxdigit(*buf_ptr)) {
+	    enum base	in_base = BASE_10;
+
+	    if (*buf_ptr == '0') {
+		if (buf_ptr[1] == 'b' || buf_ptr[1] == 'B')
+		    in_base = BASE_2;
+		else if (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')
+		    in_base = BASE_16;
+		else
+		    in_base = BASE_8;
+	    }
+
+	    *e_token++ = *buf_ptr++;
+	    if (in_base == BASE_2 || in_base == BASE_16)
+		*e_token++ = *buf_ptr++;	/* Read the second character from
+						 * 0b... / 0x... expressions.
+						 */
+
+	    switch (in_base) {
+	    case BASE_2:
+		while (*buf_ptr == '0' || *buf_ptr == '1') {
 		    CHECK_SIZE_TOKEN;
 		    *e_token++ = *buf_ptr++;
 		}
-	    }
-	    else
+		break;
+	    case BASE_8:
+		while (*buf_ptr >= '0' && *buf_ptr <= '8') {
+		    CHECK_SIZE_TOKEN;
+		    *e_token++ = *buf_ptr++;
+		}
+		break;
+	    case BASE_10:
 		while (1) {
 		    if (*buf_ptr == '.') {
 			if (seendot)
@@ -209,6 +234,29 @@
 		}
 		break;
 	    }
+
+	    	break;
+	    case BASE_16:
+		while (isxdigit(*buf_ptr)) {
+		    CHECK_SIZE_TOKEN;
+		    *e_token++ = *buf_ptr++;
+		}
+	    	break;
+	    }
+	    if (isalnum(*buf_ptr)) {
+		char *buf;
+		/* current token is malformed */
+		if (asprintf(&buf, "Ignoring invalid numeric "
+		    "expression '%s%c...'", s_token, *buf_ptr)) {
+		    diag2(0, buf);
+		    free(buf);
+		}
+		/* finish to eat the current token */
+		while (isalnum(*buf_ptr)) {
+		    CHECK_SIZE_TOKEN;
+		    *e_token++ = *buf_ptr++;
+		}
+	    }
 	}
 	else
 	    while (chartype[(int)*buf_ptr] == alphanum || *buf_ptr == BACKSLASH) {
--- lexi.c.diff ends here ---


>Release-Note:
>Audit-Trail:
>Unformatted: