bin/123553: [patch] Prevent indent(1) from splitting unrecognized
tokens
Romain Tartiere
romain at blogreen.org
Fri May 9 14:20:04 UTC 2008
>Number: 123553
>Category: bin
>Synopsis: [patch] Prevent indent(1) from splitting unrecognized tokens
>Confidential: no
>Severity: non-critical
>Priority: medium
>Responsible: freebsd-bugs
>State: open
>Quarter:
>Keywords:
>Date-Required:
>Class: sw-bug
>Submitter-Id: current-users
>Arrival-Date: Fri May 09 14:20:04 UTC 2008
>Closed-Date:
>Last-Modified:
>Originator: Romain Tartiere
>Release: FreeBSD 7.0-STABLE i386
>Organization:
>Environment:
System: FreeBSD marvin.blogreen.org 7.0-STABLE FreeBSD 7.0-STABLE #14: Fri Apr 18 18:27:58 CEST 2008 root at marvin.blogreen.org:/usr/obj/usr/src/sys/MARVIN i386
>Description:
When using indent(1) to indent source code, unrecognized tokens such as "0b00101010" are split (e.g. "0b 00101010").
Such constructs are however valid using avr-gcc from the ports, and upcoming releases of gcc will support this binary notation [1].
References:
1. As noticed by Frank Behrens: http://lists.freebsd.org/pipermail/freebsd-hackers/2008-April/024343.html
>How-To-Repeat:
% echo "int x = 0b00101010 ;" > foo.c
% avr-gcc -c foo.c
% indent foo.c
% avr-gcc -c foo.c
foo.c:1: error: expected ',' or ';' before 'b00101010'
% cat foo.c
int x = 0 b00101010;
>Fix:
The following patch attempt to detect numbers in different bases, assert it is valid, but avoid splitting tokens on unrecognized data:
--- lexi.c.diff begins here ---
--- /usr/src/usr.bin/indent/lexi.c 2005-11-20 14:48:15.000000000 +0100
+++ lexi.c 2008-04-27 15:09:21.000000000 +0200
@@ -121,6 +121,10 @@
1, 1, 1, 0, 3, 0, 3, 0
};
+enum base {
+ BASE_2, BASE_8, BASE_10, BASE_16
+};
+
int
lexi(void)
{
@@ -158,16 +162,37 @@
int seendot = 0,
seenexp = 0,
seensfx = 0;
- if (*buf_ptr == '0' &&
- (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) {
- *e_token++ = *buf_ptr++;
- *e_token++ = *buf_ptr++;
- while (isxdigit(*buf_ptr)) {
+ enum base in_base = BASE_10;
+
+ if (*buf_ptr == '0') {
+ if (buf_ptr[1] == 'b' || buf_ptr[1] == 'B')
+ in_base = BASE_2;
+ else if (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')
+ in_base = BASE_16;
+ else
+ in_base = BASE_8;
+ }
+
+ *e_token++ = *buf_ptr++;
+ if (in_base == BASE_2 || in_base == BASE_16)
+ *e_token++ = *buf_ptr++; /* Read the second character from
+ * 0b... / 0x... expressions.
+ */
+
+ switch (in_base) {
+ case BASE_2:
+ while (*buf_ptr == '0' || *buf_ptr == '1') {
CHECK_SIZE_TOKEN;
*e_token++ = *buf_ptr++;
}
- }
- else
+ break;
+ case BASE_8:
+ while (*buf_ptr >= '0' && *buf_ptr <= '8') {
+ CHECK_SIZE_TOKEN;
+ *e_token++ = *buf_ptr++;
+ }
+ break;
+ case BASE_10:
while (1) {
if (*buf_ptr == '.') {
if (seendot)
@@ -209,6 +234,29 @@
}
break;
}
+
+ break;
+ case BASE_16:
+ while (isxdigit(*buf_ptr)) {
+ CHECK_SIZE_TOKEN;
+ *e_token++ = *buf_ptr++;
+ }
+ break;
+ }
+ if (isalnum(*buf_ptr)) {
+ char *buf;
+ /* current token is malformed */
+ if (asprintf(&buf, "Ignoring invalid numeric "
+ "expression '%s%c...'", s_token, *buf_ptr)) {
+ diag2(0, buf);
+ free(buf);
+ }
+ /* finish to eat the current token */
+ while (isalnum(*buf_ptr)) {
+ CHECK_SIZE_TOKEN;
+ *e_token++ = *buf_ptr++;
+ }
+ }
}
else
while (chartype[(int)*buf_ptr] == alphanum || *buf_ptr == BACKSLASH) {
--- lexi.c.diff ends here ---
>Release-Note:
>Audit-Trail:
>Unformatted:
More information about the freebsd-bugs
mailing list