PERFORCE change 144011 for review

Konrad Jankowski konrad at FreeBSD.org
Tue Jun 24 06:46:39 UTC 2008


http://perforce.freebsd.org/chv.cgi?CH=144011

Change 144011 by konrad at vspredator on 2008/06/24 06:46:07

	style(9) fixes.
	Additional functionalty added.

Affected files ...

.. //depot/projects/soc2008/konrad_collation/colldef/colldef.c#4 edit

Differences ...

==== //depot/projects/soc2008/konrad_collation/colldef/colldef.c#4 (text+ko) ====

@@ -37,7 +37,6 @@
 #include <strings.h>
 #include <unistd.h>
 
-#define DEFAULT_IN_FILE		"../colldef_in.src.simple"
 #define DEFAULT_OUT_FILE	"LC_COLLATE"
 #define WEIGHT_TABLE_SIZE	(1 << 16)
 #define NWEIGHTS		4
@@ -45,21 +44,22 @@
 struct sym_entry {
 	char *name;
 	int val;
-	SLIST_ENTRY(sym_entry) sym_next;
+	SLIST_ENTRY(sym_entry) next;
 };
 
-SLIST_HEAD(, sym_entry) head;
+static SLIST_HEAD(, sym_entry) head;
 
 /*
  * This will have to be a structure, to at least accomodate symbol chaining.
  */
-uint8_t weight_table[WEIGHT_TABLE_SIZE][NWEIGHTS];
-struct weight_uncompressed {
+static uint8_t weight_table[WEIGHT_TABLE_SIZE][NWEIGHTS];
+
+static struct weight_uncompressed {
 	uint16_t w[NWEIGHTS];
 	char used;
 } weights_uncompressed[WEIGHT_TABLE_SIZE];
 
-int verbose = 0;
+static int verbose = 0;
 
 void usage(char *name);
 void assign_weights(int codepoint, char *weights);
@@ -76,9 +76,10 @@
 
 	if ((sym = malloc(sizeof(*sym))) == NULL)
 		err(1, "add_symbol: malloc(%d)", sizeof(*sym));
-	sym->name = strdup(name);
+	if ((sym->name = strdup(name)) == NULL)
+		err(1, "add_symbol: strdup(%d)", strlen(name) + 1);
 	sym->val = counter++;
-	SLIST_INSERT_HEAD(&head, sym, sym_next);
+	SLIST_INSERT_HEAD(&head, sym, next);
 }
 
 /*
@@ -90,10 +91,10 @@
 {
 	struct sym_entry *sym;
 
-	SLIST_FOREACH(sym, &head, sym_next) {
+	SLIST_FOREACH(sym, &head, next)
 		if (strcmp(sym->name, name) == 0)
 			return sym->val;
-	}
+	
 	return 0;
 }
 
@@ -102,10 +103,9 @@
 {
 	struct sym_entry *sym;
 
-	SLIST_FOREACH(sym, &head, sym_next) {
+	SLIST_FOREACH(sym, &head, next)
 		printf("sym->name=%s sym->val=%d\n",
 			sym->name, sym->val);
-	}
 }
 
 void
@@ -122,6 +122,10 @@
 	}
 }
 
+/*
+ * Decode a unicode codepoint stored in UTF-8 format, each byte coded
+ * as a hexadecimal constant.
+ */
 int
 get_codepoint(char *p, char **p_end)
 {
@@ -131,7 +135,10 @@
 	wchar_t out = 0;
 
 	do {
-		/* without leading "0x" will also work */
+		/*
+		 * Scanf without leading "0x" will also work,
+		 * but we don't have such cases in our input data.
+		 */
 		p[0] = '0';
 		sscanf(p, "%x%n", &tmp, &len);
 		p += len;
@@ -141,30 +148,31 @@
 	len = mbtowc(&out, synthesis, i);
 	assert(len == i || synthesis[0] == 0);
 	if (p_end != NULL)
-		*p_end = p;	/* return where we got to */
+		*p_end = p;	/* Return where we got to. */
 
 	return tmp;
 }
 
 void
-process_file(char *name)
+process_file(FILE *f)
 {
 	unsigned codepoint = 0;
 	char buf[512], *p;
 
 	bzero(weights_uncompressed, sizeof(weights_uncompressed));
 	SLIST_INIT(&head);
-	if (freopen(name, "r", stdin) == NULL)
-		err(1, "freopen: %s", name);
-	while (fgets(buf, sizeof(buf), stdin)) {
+	while (fgets(buf, sizeof(buf), f)) {
 #ifdef DEBUG
 		fputs(buf, stdout);
 #endif
 		buf[strlen(buf) - 1] = 0;
 		switch (buf[0]) {
 		case '<':
-			/*assert(buf[1] == 'X');*/
-			if (buf[1] == 'X')	/* XXX */
+			/*
+			 * XXX. The assumption here is that collating
+			 * symbols start with 'X'.
+			 */
+			if (buf[1] == 'X')
 				add_symbol(buf);
 			break;
 		case '\\':
@@ -174,10 +182,15 @@
 
 		}
 	}
-
-	fclose(stdin);	/* not really needed - freopen closes */
 }
 
+/*
+ * This function decodes a weight, which can be given as:
+ * 1. a symbol name in angle brackets - in which case we have to look it up in
+ * our symbol table.
+ * 2. IGNORE keyword - this weight should be ignored.
+ * 3. A literal hexadecimal value prefixed with "\x".
+ */
 int
 get_weight_val(char **p)
 {
@@ -189,17 +202,18 @@
 	case '<':
 		p3 = strchr(p2, '>');
 		assert(p3);
-		p3[1] = 0;	/* truncate the string for get_symval */
+		p3[1] = 0;	/* Truncate the string for get_symval. */
 		ret = get_symval(p2);
-		*p = p3 + 2;	/* skip over ';' also */
-		p3[1] = ';';	/* restore string; we shouldn't care... */
+		if (ret == 0)
+			errx(1, "get_weight_val: symbol %s not found",
+			    p2);
+		*p = p3 + 2;	/* Skip over ';' also. */
+		p3[1] = ';';	/* Restore string; we shouldn't care... */
 		return ret;
-	case 'I':
-		/* IGNORE */
-		*p += 7;
-		return 0;	/* IGNORE means 0 (I think) */
-	case '\\':
-		/* we get literal value, instead of symbol */
+	case 'I':		/* IGNORE */
+		*p += 7;	/* IGNORE has 6 letters, + ';'. */
+		return 0;	/* IGNORE means 0 (I think). */
+	case '\\':		/* Literal value. Decode it. */
 		ret = get_codepoint(p2, &p3);
 		*p = p3 + 1;
 		return ret;
@@ -209,21 +223,28 @@
 	}
 }
 
+/*
+ * Take a string of four weights, separated by semicolons,
+ * decode them and assign to the weight table at the given codepoint
+ * position.
+ */
 void
 assign_weights(int codepoint, char *weights)
 {
-	int i;
+	int i, val;
 
 	assert(weights != NULL);
 	weights_uncompressed[codepoint].used = 1;
 	for (i = 0; i < 4; i++) {
-		int val;
-
 		val = get_weight_val(&weights);
 		weights_uncompressed[codepoint].w[i] = val;
 	}
 }
 
+/*
+ * Assign new weight value to all codepoint with the given value 'val'.
+ * Do this only for the first level (w[0]).
+ */
 void
 reduce(int val, int new_val)
 {
@@ -242,15 +263,15 @@
  * Find 2 minimums from the given set.
  * Optimised to only make one pass throught the set. (data locality)
  */
-#define MIN_MAX (1<<15)
+#define MIN_MAX (1 << 15)
 int
 find_min(int start, int *min2_ret)
 {
 	int min = MIN_MAX, min2, min3 = MIN_MAX;
-	int i;
+	int i, val;
 
 	for (i = 0; i < WEIGHT_TABLE_SIZE; i++) {
-		int val = weights_uncompressed[i].w[0];
+		val = weights_uncompressed[i].w[0];
 		/* 1. case, at first we find no a minimum */
 		if (val >= start && val < min) {
 			min2 = min;
@@ -275,15 +296,14 @@
 	int i;
 
 	printf("%d: ", level);
-	for (i = 0; i < WEIGHT_TABLE_SIZE; i++) {
+	for (i = 0; i < WEIGHT_TABLE_SIZE; i++)
 		if (weights_uncompressed[i].used &&
 		    weights_uncompressed[i].w[0] == level)
 			printf(" (%d %d %d)", weights_uncompressed[i].w[1],
-			weights_uncompressed[i].w[2],
-			weights_uncompressed[i].w[3]);
+			    weights_uncompressed[i].w[2],
+			    weights_uncompressed[i].w[3]);
 
-	}
-	printf("\n");
+	putchar('\n');
 }
 
 /*
@@ -319,66 +339,75 @@
 }
 
 void
-binary_output(char *out_file)
+binary_output(FILE *f)
 {
 	int i, j;
-	int out;
-	
-	/*
-	 * I just use open and write, instead of stdio in this case.
-	 * This program isn't meant to be portable from UNIX.
-	 */
-	if ((out = open(out_file, O_WRONLY | O_CREAT | O_TRUNC, 0644)) == -1)
-		err(1, "open(%s)", out_file);
+
 	for (i = 0; i < WEIGHT_TABLE_SIZE; i++)
 		for (j = 0; j < NWEIGHTS; j++)
 			weight_table[i][j] = weights_uncompressed[i].w[j];
-	if (write(out, weight_table, sizeof(weight_table)) != 
-	    sizeof(weight_table))
+	if (fwrite(weight_table, sizeof(weight_table), 1, f) != 1)
 		errx(1, "not full write");
-	close(out);
 }
 
 /*
- * I divided the process into 3 main functions, so we could optionally
- * process multiple input files with better argument processing.
- * Another questionable optimisation.
+ * I divided the process into 3 main functions, so we can
+ * process multiple input files with one call.
  */
 int
 main(int argc, char *argv[])
 {
+	FILE *f, *of;
 	int ch;
-	char *in_file = DEFAULT_IN_FILE;
-	char *out_file = DEFAULT_OUT_FILE;
+	char name[512];
 
-	while ((ch = getopt(argc, argv, "hf:o:v")) != -1) {
+	while ((ch = getopt(argc, argv, "h:v")) != -1) {
 		switch (ch) {
-		case 'f':
-			in_file = optarg;
-			break;
-		case 'o':
-			out_file = optarg;
-			break;
 		case 'v':
 			verbose = 1;
-		break;
+			break;
 		default:
 			usage(argv[0]);
 		}
 	}
+	argv += optind;
+	argc -= optind;
 
 	/*
-	 * need to setlocale to an UTF-8 locale, so thet
-	 * mbtowc works correctly
+	 * Need to setlocale to an UTF-8 locale, so that
+	 * mbtowc works correctly.
 	 */
 	setlocale(LC_ALL, "en_US.UTF-8");
 
-	process_file(in_file);
+	if (argc) for (; argc; argc--, argv++) {
+		if ((f = fopen(argv[0], "r")) == NULL)
+			err(1, "fopen: %s", argv[0]);
+		process_file(f);
+		fclose(f);
+#ifdef DEBUG2
+		dump_table();
+#endif
+		compress_weights();
+		snprintf(name, sizeof(name),
+		    "%s.%s", argv[0], DEFAULT_OUT_FILE);
+		if ((of = fopen(name, "w")) == NULL)
+			err(1, "fopen: %s", name);
+		binary_output(of);
+		fclose(of);
+	} else {
+		process_file(stdin);
 #ifdef DEBUG2
-	dump_table();
+		dump_table();
 #endif
-	compress_weights();
-	binary_output(out_file);
+		compress_weights();
+		/*
+		 * We could write to stdout here...
+		 */
+		if ((of = fopen(DEFAULT_OUT_FILE, "w")) == NULL)
+			err(1, "fopen: %s", name);
+		binary_output(of);
+		fclose(of);
+	}
 
 	return 0;
 }
@@ -387,9 +416,11 @@
 usage(char *name)
 {
 	printf( "usage: "
-		"\t%s [-f input_file] [-o output_file]\n"
-		"\tdefault output file is LC_COLLATE\n"
+		"\t%s [-h] [-v] [input_file_1] ... [input_file_n] \n"
+		"\t output file name is LC_COLLATE\n"
+		"\t if one or more input files given as arguments, "
+		"\t output file name is the input file name with\n"
+		"LC_COLLATE concatenaded.\n"
 		, name);
 	exit(1);
 }
-


More information about the p4-projects mailing list