svn commit: r354847 - head/usr.bin/unifdef

Conrad Meyer cem at FreeBSD.org
Tue Nov 19 04:30:24 UTC 2019


Author: cem
Date: Tue Nov 19 04:30:23 2019
New Revision: 354847
URL: https://svnweb.freebsd.org/changeset/base/354847

Log:
  unifdef(1): Improve worst-case bound on symbol resolution
  
  Use RB_TREE to make some algorithms O(lg N) and O(N lg N) instead of O(N)
  and O(N^2).  Because N is typically small and the former linear array also has
  great constant factors (as a property of CPU caching), this doesn't provide
  material benefit most or all of the time.
  
  While here, remove arbitrarily limit on number of macros understood.

Modified:
  head/usr.bin/unifdef/unifdef.c

Modified: head/usr.bin/unifdef/unifdef.c
==============================================================================
--- head/usr.bin/unifdef/unifdef.c	Tue Nov 19 04:23:57 2019	(r354846)
+++ head/usr.bin/unifdef/unifdef.c	Tue Nov 19 04:30:23 2019	(r354847)
@@ -45,8 +45,11 @@
  *   it possible to handle all "dodgy" directives correctly.
  */
 
+#include <sys/param.h>
 #include <sys/stat.h>
+#include <sys/tree.h>
 
+#include <assert.h>
 #include <ctype.h>
 #include <err.h>
 #include <stdarg.h>
@@ -149,7 +152,6 @@ static char const * const linestate_name[] = {
  */
 #define	MAXDEPTH        64			/* maximum #if nesting */
 #define	MAXLINE         4096			/* maximum length of line */
-#define	MAXSYMS         16384			/* maximum number of symbols */
 
 /*
  * Sometimes when editing a keyword the replacement text is longer, so
@@ -158,6 +160,26 @@ static char const * const linestate_name[] = {
 #define	EDITSLOP        10
 
 /*
+ * C17/18 allow 63 characters per macro name, but up to 127 arbitrarily large
+ * parameters.
+ */
+struct macro {
+	RB_ENTRY(macro)	entry;
+	const char	*name;
+	const char	*value;
+	bool		ignore;		/* -iDsym or -iUsym */
+};
+
+static int
+macro_cmp(struct macro *a, struct macro *b)
+{
+	return (strcmp(a->name, b->name));
+}
+
+static RB_HEAD(macrohd, macro) macro_tree = RB_INITIALIZER(&macro_tree);
+RB_GENERATE_STATIC(macrohd, macro, entry, macro_cmp);
+
+/*
  * Globals.
  */
 
@@ -174,11 +196,6 @@ static bool             symlist;		/* -s: output symbol
 static bool             symdepth;		/* -S: output symbol depth */
 static bool             text;			/* -t: this is a text file */
 
-static const char      *symname[MAXSYMS];	/* symbol name */
-static const char      *value[MAXSYMS];		/* -Dsym=value */
-static bool             ignore[MAXSYMS];	/* -iDsym or -iUsym */
-static int              nsyms;			/* number of symbols */
-
 static FILE            *input;			/* input file pointer */
 static const char      *filename;		/* input file name */
 static int              linenum;		/* current line number */
@@ -227,12 +244,12 @@ static char            *astrcat(const char *, const ch
 static void             cleantemp(void);
 static void             closeio(void);
 static void             debug(const char *, ...);
-static void             debugsym(const char *, int);
+static void             debugsym(const char *, const struct macro *);
 static bool             defundef(void);
 static void             defundefile(const char *);
 static void             done(void);
 static void             error(const char *);
-static int              findsym(const char **);
+static struct macro    *findsym(const char **);
 static void             flushline(bool);
 static void             hashline(void);
 static void             help(void);
@@ -807,7 +824,7 @@ static Linetype
 parseline(void)
 {
 	const char *cp;
-	int cursym;
+	struct macro *cursym;
 	Linetype retval;
 	Comment_state wascomment;
 
@@ -829,15 +846,15 @@ parseline(void)
 	if ((cp = matchsym("ifdef", keyword)) != NULL ||
 	    (cp = matchsym("ifndef", keyword)) != NULL) {
 		cp = skipcomment(cp);
-		if ((cursym = findsym(&cp)) < 0)
+		if ((cursym = findsym(&cp)) == NULL)
 			retval = LT_IF;
 		else {
 			retval = (keyword[2] == 'n')
 			    ? LT_FALSE : LT_TRUE;
-			if (value[cursym] == NULL)
+			if (cursym->value == NULL)
 				retval = (retval == LT_TRUE)
 				    ? LT_FALSE : LT_TRUE;
-			if (ignore[cursym])
+			if (cursym->ignore)
 				retval = (retval == LT_TRUE)
 				    ? LT_TRUEI : LT_FALSEI;
 		}
@@ -1037,7 +1054,7 @@ eval_unary(const struct ops *ops, long *valp, const ch
 {
 	const char *cp;
 	char *ep;
-	int sym;
+	struct macro *sym;
 	bool defparen;
 	Linetype lt;
 
@@ -1102,27 +1119,27 @@ eval_unary(const struct ops *ops, long *valp, const ch
 			debug("eval%d defined missing ')'", prec(ops));
 			return (LT_ERROR);
 		}
-		if (sym < 0) {
+		if (sym == NULL) {
 			debug("eval%d defined unknown", prec(ops));
 			lt = LT_IF;
 		} else {
-			debug("eval%d defined %s", prec(ops), symname[sym]);
-			*valp = (value[sym] != NULL);
+			debug("eval%d defined %s", prec(ops), sym->name);
+			*valp = (sym->value != NULL);
 			lt = *valp ? LT_TRUE : LT_FALSE;
 		}
 		constexpr = false;
 	} else if (!endsym(*cp)) {
 		debug("eval%d symbol", prec(ops));
 		sym = findsym(&cp);
-		if (sym < 0) {
+		if (sym == NULL) {
 			lt = LT_IF;
 			cp = skipargs(cp);
-		} else if (value[sym] == NULL) {
+		} else if (sym->value == NULL) {
 			*valp = 0;
 			lt = LT_FALSE;
 		} else {
-			*valp = strtol(value[sym], &ep, 0);
-			if (*ep != '\0' || ep == value[sym])
+			*valp = strtol(sym->value, &ep, 0);
+			if (*ep != '\0' || ep == sym->value)
 				return (LT_ERROR);
 			lt = *valp ? LT_TRUE : LT_FALSE;
 			cp = skipargs(cp);
@@ -1439,17 +1456,17 @@ matchsym(const char *s, const char *t)
  * Look for the symbol in the symbol table. If it is found, we return
  * the symbol table index, else we return -1.
  */
-static int
+static struct macro *
 findsym(const char **strp)
 {
 	const char *str;
-	int symind;
+	struct macro key, *res;
 
 	str = *strp;
 	*strp = skipsym(str);
 	if (symlist) {
 		if (*strp == str)
-			return (-1);
+			return (NULL);
 		if (symdepth && firstsym)
 			printf("%s%3d", zerosyms ? "" : "\n", depth);
 		firstsym = zerosyms = false;
@@ -1458,15 +1475,14 @@ findsym(const char **strp)
 		       (int)(*strp-str), str,
 		       symdepth ? "" : "\n");
 		/* we don't care about the value of the symbol */
-		return (0);
+		return (NULL);
 	}
-	for (symind = 0; symind < nsyms; ++symind) {
-		if (matchsym(symname[symind], str) != NULL) {
-			debugsym("findsym", symind);
-			return (symind);
-		}
-	}
-	return (-1);
+
+	key.name = str;
+	res = RB_FIND(macrohd, &macro_tree, &key);
+	if (res != NULL)
+		debugsym("findsym", res);
+	return (res);
 }
 
 /*
@@ -1476,22 +1492,23 @@ static void
 indirectsym(void)
 {
 	const char *cp;
-	int changed, sym, ind;
+	int changed;
+	struct macro *sym, *ind;
 
 	do {
 		changed = 0;
-		for (sym = 0; sym < nsyms; ++sym) {
-			if (value[sym] == NULL)
+		RB_FOREACH(sym, macrohd, &macro_tree) {
+			if (sym->value == NULL)
 				continue;
-			cp = value[sym];
+			cp = sym->value;
 			ind = findsym(&cp);
-			if (ind == -1 || ind == sym ||
+			if (ind == NULL || ind == sym ||
 			    *cp != '\0' ||
-			    value[ind] == NULL ||
-			    value[ind] == value[sym])
+			    ind->value == NULL ||
+			    ind->value == sym->value)
 				continue;
 			debugsym("indir...", sym);
-			value[sym] = value[ind];
+			sym->value = ind->value;
 			debugsym("...ectsym", sym);
 			changed++;
 		}
@@ -1523,29 +1540,29 @@ addsym1(bool ignorethis, bool definethis, char *symval
  * Add a symbol to the symbol table.
  */
 static void
-addsym2(bool ignorethis, const char *sym, const char *val)
+addsym2(bool ignorethis, const char *symname, const char *val)
 {
-	const char *cp = sym;
-	int symind;
+	const char *cp = symname;
+	struct macro *sym, *r;
 
-	symind = findsym(&cp);
-	if (symind < 0) {
-		if (nsyms >= MAXSYMS)
-			errx(2, "too many symbols");
-		symind = nsyms++;
+	sym = findsym(&cp);
+	if (sym == NULL) {
+		sym = calloc(1, sizeof(*sym));
+		sym->ignore = ignorethis;
+		sym->name = symname;
+		sym->value = val;
+		r = RB_INSERT(macrohd, &macro_tree, sym);
+		assert(r == NULL);
 	}
-	ignore[symind] = ignorethis;
-	symname[symind] = sym;
-	value[symind] = val;
-	debugsym("addsym", symind);
+	debugsym("addsym", sym);
 }
 
 static void
-debugsym(const char *why, int symind)
+debugsym(const char *why, const struct macro *sym)
 {
-	debug("%s %s%c%s", why, symname[symind],
-	    value[symind] ? '=' : ' ',
-	    value[symind] ? value[symind] : "undef");
+	debug("%s %s%c%s", why, sym->name,
+	    sym->value ? '=' : ' ',
+	    sym->value ? sym->value : "undef");
 }
 
 /*


More information about the svn-src-head mailing list