PERFORCE change 148978 for review
Konrad Jankowski
konrad at FreeBSD.org
Mon Sep 1 11:22:48 UTC 2008
http://perforce.freebsd.org/chv.cgi?CH=148978
Change 148978 by konrad at vspredator on 2008/09/01 11:22:34
Colldef with proper expansion support added. This is not a production version.
It will go through a process of space optimisation.
Affected files ...
.. //depot/projects/soc2008/konrad_collation/colldef.apple/collate.h#7 edit
.. //depot/projects/soc2008/konrad_collation/colldef.apple/parse.y#9 edit
Differences ...
==== //depot/projects/soc2008/konrad_collation/colldef.apple/collate.h#7 (text+ko) ====
@@ -33,6 +33,8 @@
#include <sys/cdefs.h>
#ifndef __LIBC__
#include <sys/types.h>
+#else
+#include <setlocale.h> /* for ENCODING_LEN */
#endif /* !__LIBC__ */
#include <limits.h>
@@ -46,6 +48,7 @@
#define COLLATE_VERSION1_1A "1.1A\n"
#define COLLATE_VERSION1_2 "1.2\n"
#define COLLATE_VERSION1_3 "1.3\n"
+#define COLLATE_VERSION1_4 "1.4\n"
/* see discussion in string/FreeBSD/strxfrm for this value */
#define COLLATE_MAX_PRIORITY ((1 << 24) - 1)
@@ -63,7 +66,7 @@
struct __collate_st_info {
__uint8_t directive[COLL_WEIGHTS_MAX];
__uint8_t flags;
-#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN
+#if _BYTE_ORDER == _LITTLE_ENDIAN
unsigned int directive_count:4;
unsigned int chain_max_len:4;
#else
@@ -76,10 +79,18 @@
__int32_t large_pri_count;
};
+struct weight_map_struct
+{
+ unsigned int v:4;
+};
+
+#define COLL_WEIGHTS_REAL (COLL_WEIGHTS_MAX * 4)
struct __collate_st_char_pri {
- __int32_t pri[COLL_WEIGHTS_MAX];
+ struct weight_map_struct map[COLL_WEIGHTS_MAX];
+ __int32_t pri[COLL_WEIGHTS_REAL];
};
struct __collate_st_chain_pri {
+ struct weight_map_struct map[COLL_WEIGHTS_MAX];
wchar_t str[STR_LEN];
__int32_t pri[COLL_WEIGHTS_MAX];
};
@@ -92,29 +103,34 @@
wchar_t str[STR_LEN];
};
-#ifndef __LIBC__
+#ifdef __LIBC__
+struct __locale_st_collate {
+ char __encoding[ENCODING_LEN + 1];
+ struct __collate_st_info __info;
+ struct __collate_st_subst *__substitute_table[COLL_WEIGHTS_MAX];
+ struct __collate_st_chain_pri *__chain_pri_table;
+ struct __collate_st_large_char_pri *__large_char_pri_table;
+ struct __collate_st_char_pri __char_pri_table[UCHAR_MAX + 1];
+};
+#endif
+
extern int __collate_load_error;
extern int __collate_substitute_nontrivial;
-#define __collate_char_pri_table (*__collate_char_pri_table_ptr)
-extern struct __collate_st_char_pri __collate_char_pri_table[UCHAR_MAX + 1];
-extern struct __collate_st_chain_pri *__collate_chain_pri_table;
-extern __int32_t *__collate_chain_equiv_table;
-extern struct __collate_st_info __collate_info;
-#endif /* !__LIBC__ */
+extern struct __locale_st_collate *__collate_data;
__BEGIN_DECLS
#ifdef __LIBC__
-wchar_t *__collate_mbstowcs(const char *, locale_t);
+wchar_t *__collate_mbstowcs(const char *);
wchar_t *__collate_wcsdup(const wchar_t *);
-wchar_t *__collate_substitute(const wchar_t *, int, locale_t);
-int __collate_load_tables(const char *, locale_t);
-void __collate_lookup_l(const wchar_t *, int *, int *, int *, locale_t);
-void __collate_lookup_which(const wchar_t *, int *, int *, int, locale_t);
-void __collate_xfrm(const wchar_t *, wchar_t **, locale_t);
-int __collate_range_cmp(wchar_t, wchar_t, locale_t);
-size_t __collate_collating_symbol(wchar_t *, size_t, const char *, size_t, mbstate_t *, locale_t);
-int __collate_equiv_class(const char *, size_t, mbstate_t *, locale_t);
-size_t __collate_equiv_match(int, wchar_t *, size_t, wchar_t, const char *, size_t, mbstate_t *, size_t *, locale_t);
+wchar_t *__collate_substitute(const wchar_t *, int);
+int __collate_load_tables(const char *);
+void __collate_lookup_l(const wchar_t *, int *, int *, int *);
+void __collate_lookup_which(const wchar_t *, int *, int *, int);
+void __collate_xfrm(const wchar_t *, wchar_t **);
+int __collate_range_cmp(wchar_t, wchar_t);
+size_t __collate_collating_symbol(wchar_t *, size_t, const char *, size_t, mbstate_t *);
+int __collate_equiv_class(const char *, size_t, mbstate_t *);
+size_t __collate_equiv_match(int, wchar_t *, size_t, wchar_t, const char *, size_t, mbstate_t *, size_t *);
#else /* !__LIBC__ */
void __collate_lookup(const unsigned char *, int *, int *, int *);
#endif /* __LIBC__ */
==== //depot/projects/soc2008/konrad_collation/colldef.apple/parse.y#9 (text+ko) ====
@@ -29,6 +29,7 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD: src/usr.bin/colldef/parse.y,v 1.31 2002/10/16 12:56:22 charnier Exp $");
+#include <assert.h>
#include <arpa/inet.h>
#include <err.h>
#include <stdarg.h>
@@ -89,12 +90,19 @@
static DB *chaindb;
static int nchain = 0;
static DB *stringdb;
-static struct symbol prev_weight_table[COLL_WEIGHTS_MAX];
-static struct symbol prev2_weight_table[COLL_WEIGHTS_MAX];
-static struct symbol weight_table[COLL_WEIGHTS_MAX];
+
+static struct symbol prev_weight_table[COLL_WEIGHTS_REAL];
+static struct symbol prev2_weight_table[COLL_WEIGHTS_REAL];
+static struct symbol weight_table[COLL_WEIGHTS_REAL];
+
+struct weight_map_struct weight_map[COLL_WEIGHTS_MAX];
+struct weight_map_struct prev_weight_map[COLL_WEIGHTS_MAX];
+struct weight_map_struct prev2_weight_map[COLL_WEIGHTS_MAX];
+
static int prev_line = LINE_NONE;
static struct symbol *prev_elem;
static int weight_index = 0;
+static int map_idx = 0;
static int allow_ellipsis = 0;
static struct symbol sym_ellipsis = {SYMBOL_ELLIPSIS, PRI_UNDEFINED, L"", {0}};
static struct symbol sym_ignore = {SYMBOL_IGNORE, PRI_IGNORE, L"", {0}};
@@ -113,7 +121,6 @@
#endif
struct __collate_st_info info = {{DIRECTIVE_FORWARD, DIRECTIVE_FORWARD}, 0, 0, 0, {PRI_UNDEFINED, PRI_UNDEFINED}, {PRI_UNDEFINED}, 0, 0};
-/* Some of the code expects COLL_WEIGHTS_MAX == 2 */
int directive_count = COLL_WEIGHTS_MAX;
const char *out_file = "LC_COLLATE";
@@ -209,8 +216,10 @@
/* we don't set the byte order of t->val, since we
* need it for sorting */
t->val = cval;
- for(z = 0; z < directive_count; z++)
+ for(z = 0; z < COLL_WEIGHTS_REAL; z++)
t->pri.pri[z] = htonl(p->pri[z]);
+ for (z = 0; z < directive_count; z++)
+ t->pri.map[z].v = p->map[z].v;
t++;
flags = R_NEXT;
}
@@ -232,17 +241,20 @@
int flags = R_FIRST;
DBT key, val;
struct symbol *v;
+
while((ret = charmapdb->seq(charmapdb, &key, &val, flags)) == 0) {
memcpy(&v, val.data, sizeof(struct symbol *));
switch(v->type) {
case SYMBOL_CHAR: {
struct __collate_st_char_pri *p = haspri(v->u.wc);
+
if (!p || p->pri[0] == PRI_UNDEFINED)
warnx("<%s> was not defined", showwcs((const wchar_t *)key.data, key.size / sizeof(wchar_t)));
break;
}
case SYMBOL_CHAIN: {
struct __collate_st_chain_pri *p = getchain(v->u.str, EXISTS);
+
if (p->pri[0] == PRI_UNDEFINED)
warnx("<%s> was not defined", showwcs((const wchar_t *)key.data, key.size / sizeof(wchar_t)));
break;
@@ -307,34 +319,34 @@
err(EX_UNAVAILABLE, "can't open destination file %s",
out_file);
- strcpy(__collate_version, COLLATE_VERSION1_3);
+ strcpy(__collate_version, COLLATE_VERSION1_4);
if (fwrite(__collate_version, sizeof(__collate_version), 1, fp) != 1)
err(EX_IOERR,
"IO error writting collate version to destination file %s",
out_file);
-#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN
+#if _BYTE_ORDER == _LITTLE_ENDIAN
for(z = 0; z < directive_count; z++) {
info.undef_pri[z] = htonl(info.undef_pri[z]);
info.subst_count[z] = htonl(info.subst_count[z]);
}
info.chain_count = htonl(info.chain_count);
info.large_pri_count = htonl(info.large_pri_count);
-#endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */
+#endif /* _BYTE_ORDER == _LITTLE_ENDIAN */
if (fwrite(&info, sizeof(info), 1, fp) != 1)
err(EX_IOERR,
"IO error writting collate info to destination file %s",
out_file);
-#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN
+#if _BYTE_ORDER == _LITTLE_ENDIAN
{
int i, z;
struct __collate_st_char_pri *p = __collate_char_pri_table;
for(i = UCHAR_MAX + 1; i-- > 0; p++) {
- for(z = 0; z < directive_count; z++)
+ for(z = 0; z < COLL_WEIGHTS_REAL; z++)
p->pri[z] = htonl(p->pri[z]);
}
}
-#endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */
+#endif /* _BYTE_ORDER == _LITTLE_ENDIAN */
if (fwrite(__collate_char_pri_table,
sizeof(__collate_char_pri_table), 1, fp) != 1)
err(EX_IOERR,
@@ -342,14 +354,15 @@
out_file);
for(z = 0; z < directive_count; z++) {
if (nsubst[z] > 0) {
-#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN
+#if _BYTE_ORDER == _LITTLE_ENDIAN
struct __collate_st_subst *t = __collate_substitute_table[z];
int i;
+
for(i = nsubst[z]; i > 0; i--) {
t->val = htonl(t->val);
t++;
}
-#endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */
+#endif /* _BYTE_ORDER == _LITTLE_ENDIAN */
if ((int)fwrite(__collate_substitute_table[z], sizeof(struct __collate_st_subst), nsubst[z], fp) != nsubst[z])
err(EX_IOERR,
"IO error writting large substprim table %d to destination file %s",
@@ -357,7 +370,7 @@
}
}
if (nchain > 0) {
-#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN
+#if _BYTE_ORDER == _LITTLE_ENDIAN
int i, j, z;
struct __collate_st_chain_pri *p = __collate_chain_pri_table;
wchar_t *w;
@@ -368,7 +381,7 @@
for(z = 0; z < directive_count; z++)
p->pri[z] = htonl(p->pri[z]);
}
-#endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */
+#endif /* _BYTE_ORDER == _LITTLE_ENDIAN */
if (fwrite(__collate_chain_pri_table,
sizeof(*__collate_chain_pri_table), nchain, fp) !=
(size_t)nchain)
@@ -378,14 +391,14 @@
}
if (nlargemap > 0) {
-#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN
+#if _BYTE_ORDER == _LITTLE_ENDIAN
struct __collate_st_large_char_pri *t = __collate_large_char_pri_table;
int i;
for(i = 0; i < nlargemap; i++) {
t->val = htonl(t->val);
t++;
}
-#endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */
+#endif /* _BYTE_ORDER == _LITTLE_ENDIAN */
if ((int)fwrite(__collate_large_char_pri_table, sizeof(struct __collate_st_large_char_pri), nlargemap, fp) != nlargemap)
err(EX_IOERR,
"IO error writting large pri tables to destination file %s",
@@ -396,7 +409,6 @@
err(EX_IOERR, "IO error closing destination file %s",
out_file);
#ifdef COLLATE_DEBUG
- /* Do it first, before conversion to network byte order. */
if (debug)
collate_print_tables();
#endif
@@ -511,7 +523,7 @@
| ELEM {
struct symbol *s = getsymbol($1, EXISTS);
-#ifdef VSDEBUG
+#ifdef VSDEBUG2
printf("\n%s(%d) ", showwcs(s->name, CHARMAP_SYMBOL_LEN), s->u.wc);
#endif
if (s->val != PRI_UNDEFINED)
@@ -540,6 +552,7 @@
s->val = prim_pri;
prim_pri = s->val + 1;
weight_index = 0;
+ map_idx = 0;
} weights2 {
int i;
struct symbol *s = getsymbol($1, EXISTS);
@@ -547,16 +560,21 @@
if (weight_index != 0)
yyerror("Can't specify weights for collating symbol <%s>", showwcs($1, CHARMAP_SYMBOL_LEN));
} else if (weight_index == 0) {
- for(i = 0; i < directive_count; i++)
+ for(i = 0; i < directive_count; i++) {
weight_table[i] = *s;
- } else if (weight_index != directive_count)
+ /* Store the end, inclusive. */
+ weight_map[i].v = i;
+ }
+ } else if (map_idx != directive_count)
yyerror("Not enough weights specified");
memcpy(prev_weight_table, weight_table, sizeof(weight_table));
+ memcpy(prev_weight_map, weight_map, sizeof(weight_map));
prev_line = LINE_NORMAL;
prev_elem = s;
}
- | ELLIPSIS { weight_index = 0; allow_ellipsis = 1; } weights {
+ | ELLIPSIS { weight_index = 0; allow_ellipsis = 1; map_idx = 0; } weights {
int i;
+
if (prev_line == LINE_ELLIPSIS)
yyerror("Illegal sequential ellipsis lines");
if (prev_line == LINE_UNDEFINED)
@@ -564,11 +582,13 @@
if (prev_line == LINE_NONE)
yyerror("Ellipsis line must follow a collating identifier lines");
if (weight_index == 0) {
- for(i = 0; i < directive_count; i++)
+ for(i = 0; i < directive_count; i++) {
weight_table[i] = sym_ellipsis;
- } else if (weight_index != directive_count)
+ weight_map[i].v = i;
+ }
+ } else if (map_idx != directive_count)
yyerror("Not enough weights specified");
- for(i = 0; i < directive_count; i++) {
+ for(i = 0; i < weight_index; i++) {
if (weight_table[i].type != SYMBOL_ELLIPSIS)
continue;
switch (prev_weight_table[i].type) {
@@ -584,6 +604,8 @@
}
memcpy(prev2_weight_table, prev_weight_table, sizeof(prev_weight_table));
memcpy(prev_weight_table, weight_table, sizeof(weight_table));
+ memcpy(prev2_weight_map, prev_weight_map, sizeof(weight_map));
+ memcpy(prev_weight_map, weight_map, sizeof(weight_map));
prev_line = LINE_ELLIPSIS;
allow_ellipsis = 0;
}
@@ -592,16 +614,21 @@
yyerror("Multiple UNDEFINED lines not allowed");
sym_undefined.val = prim_pri++;
weight_index = 0;
+ map_idx = 0;
allow_ellipsis = 1;
} weights {
int i;
if (weight_index == 0) {
weight_table[0] = sym_undefined;
- for(i = 1; i < directive_count; i++)
+ weight_map[0].v = 0;
+ for(i = 1; i < directive_count; i++) {
+ weight_map[i].v = i;
weight_table[i] = sym_ellipsis;
- } else if (weight_index != directive_count)
+ }
+ } else if (map_idx != directive_count)
yyerror("Not enough weights specified");
memcpy(prev_weight_table, weight_table, sizeof(weight_table));
+ memcpy(prev_weight_map, weight_map, sizeof(weight_map));
prev_line = LINE_UNDEFINED;
}
;
@@ -609,7 +636,7 @@
| order_lines2 order_line2 '\n'
;
order_line2 :
- | ELEM { weight_index = 0; } weights2 {
+ | ELEM { weight_index = 0; map_idx = 0; } weights2 {
int i;
struct symbol *s = getsymbol($1, EXISTS);
@@ -619,12 +646,15 @@
if (weight_index != 0)
yyerror("Can't specify weights for collating symbol <%s>", showwcs($1, CHARMAP_SYMBOL_LEN));
} else if (weight_index == 0) {
- for(i = 0; i < directive_count; i++)
+ for(i = 0; i < directive_count; i++) {
weight_table[i] = *s;
- } else if (weight_index != directive_count)
+ weight_map[i].v = i;
+ }
+ } else if (map_idx != directive_count)
yyerror("Not enough weights specified");
if (prev_line == LINE_ELLIPSIS) {
int w, x;
+
for(i = 0; i < directive_count; i++) {
switch (prev_weight_table[i].type) {
case SYMBOL_CHAR:
@@ -633,13 +663,13 @@
case SYMBOL_SYMBOL:
for (w = prev_elem->u.wc + 1; w < s->u.wc; w++) {
struct __collate_st_char_pri *p = getpri(w);
+
if (p->pri[i] != PRI_UNDEFINED)
yyerror("Char 0x%02x previously defined", w);
p->pri[i] = prev_weight_table[i].val;
}
break;
case SYMBOL_ELLIPSIS:
-
switch (weight_table[i].type) {
case SYMBOL_STRING:
yyerror("Strings can't be endpoints of ellipsis");
@@ -650,11 +680,14 @@
case SYMBOL_SYMBOL:
yyerror("Collation symbols can't be endpoints of ellipsis");
}
- if (s->val - prev_elem->val != weight_table[i].val - prev2_weight_table[i].val)
+ if (s->val - prev_elem->val !=
+ weight_table[i].val -
+ prev2_weight_table[i].val)
yyerror("Range mismatch in weight %d", i);
x = prev2_weight_table[i].val + 1;
for (w = prev_elem->u.wc + 1; w < s->u.wc; w++) {
struct __collate_st_char_pri *p = getpri(w);
+
if (p->pri[i] != PRI_UNDEFINED)
yyerror("Char 0x%02x previously defined", w);
p->pri[i] = x++;
@@ -663,6 +696,7 @@
case SYMBOL_STRING:
for (w = prev_elem->u.wc + 1; w < s->u.wc; w++) {
struct __collate_st_char_pri *p = getpri(w);
+
if (p->pri[i] != PRI_UNDEFINED)
yyerror("Char 0x%02x previously defined", w);
putsubst(w, i, prev_weight_table[i].u.str);
@@ -675,7 +709,13 @@
switch(s->type) {
case SYMBOL_CHAR: {
struct __collate_st_char_pri *p = getpri(s->u.wc);
- for(i = 0; i < directive_count; i++) {
+
+#ifdef VSDEBUG
+ fprintf(stderr, "%s ", showwcs(s->name, CHARMAP_SYMBOL_LEN));
+ fprintf(stderr, "weights=%d ", weight_index);
+#endif
+ /* This is the main piece of code. */
+ for(i = 0; i < weight_index; i++) {
switch (weight_table[i].type) {
case SYMBOL_CHAR:
case SYMBOL_CHAIN:
@@ -684,6 +724,9 @@
if (p->pri[i] != PRI_UNDEFINED)
yyerror("Char 0x%02x previously defined", s->u.wc);
p->pri[i] = weight_table[i].val;
+#ifdef VSDEBUG
+ fprintf(stderr, " weight[%d]=%d", i, p->pri[i]);
+#endif
break;
case SYMBOL_STRING:
if (p->pri[i] != PRI_UNDEFINED)
@@ -692,41 +735,59 @@
p->pri[i] = weight_table[i].val;
break;
}
+#if 0
+ default:
+ errx(1, "unrecognized symbol type: %d", weight_table[i].type);
+#endif
}
+#ifndef NDEBUG
+ for (i = 0; i < COLL_WEIGHTS_MAX - 1; i++)
+ assert(weight_map[i].v < weight_map[i + 1].v);
+#endif
+ memcpy(p->map, weight_map, sizeof(p->map));
+#ifdef VSDEBUG
+ fputc('\n', stderr);
+#endif
break;
}
case SYMBOL_CHAIN: {
struct __collate_st_chain_pri *p = getchain(s->u.str, EXISTS);
- for(i = 0; i < directive_count; i++) {
+
+ for(i = 0; i < weight_index; i++) {
switch (weight_table[i].type) {
case SYMBOL_CHAR:
case SYMBOL_CHAIN:
case SYMBOL_IGNORE:
case SYMBOL_SYMBOL:
if (p->pri[i] != PRI_UNDEFINED)
- yyerror("Chain %s previously defined", showwcs(s->u.str, STR_LEN));
+ yyerror("Chain %s previously defined",
+ showwcs(s->u.str, STR_LEN));
p->pri[i] = weight_table[i].val;
break;
case SYMBOL_STRING :
if (wcsncmp(s->u.str, weight_table[i].u.str, STR_LEN) != 0)
yyerror("Chain/string mismatch");
if (p->pri[i] != PRI_UNDEFINED)
- yyerror("Chain %s previously defined", showwcs(s->u.str, STR_LEN));
+ yyerror("Chain %s previously defined",
+ showwcs(s->u.str, STR_LEN));
/* negative value mean don't substitute
* the chain, but it is in an
* equivalence class */
p->pri[i] = -weight_table[i].val;
}
}
+ memcpy(p->map, weight_map, sizeof(p->map));
break;
}
}
memcpy(prev_weight_table, weight_table, sizeof(weight_table));
+ memcpy(prev_weight_map, weight_map, sizeof(weight_map));
prev_line = LINE_NORMAL;
prev_elem = s;
}
- | ELLIPSIS { weight_index = 0; allow_ellipsis = 1; } weights {
+ | ELLIPSIS { weight_index = 0; map_idx = 0; allow_ellipsis = 1; } weights {
int i;
+
if (prev_line == LINE_ELLIPSIS)
yyerror("Illegal sequential ellipsis lines");
if (prev_line == LINE_UNDEFINED)
@@ -734,11 +795,13 @@
if (prev_line == LINE_NONE)
yyerror("Ellipsis line must follow a collating identifier lines");
if (weight_index == 0) {
- for(i = 0; i < directive_count; i++)
+ for(i = 0; i < directive_count; i++) {
weight_table[i] = sym_ellipsis;
- } else if (weight_index != directive_count)
+ weight_map[i].v = i;
+ }
+ } else if (map_idx != directive_count)
yyerror("Not enough weights specified");
- for(i = 0; i < directive_count; i++) {
+ for(i = 0; i < weight_index; i++) {
if (weight_table[i].type != SYMBOL_ELLIPSIS)
continue;
switch (prev_weight_table[i].type) {
@@ -754,19 +817,23 @@
}
memcpy(prev2_weight_table, prev_weight_table, sizeof(prev_weight_table));
memcpy(prev_weight_table, weight_table, sizeof(weight_table));
+ memcpy(prev2_weight_map, prev_weight_map, sizeof(prev_weight_map));
+ memcpy(prev_weight_map, weight_map, sizeof(weight_map));
prev_line = LINE_ELLIPSIS;
allow_ellipsis = 0;
}
- | UNDEFINED { weight_index = 0; allow_ellipsis = 1; } weights {
+ | UNDEFINED { weight_index = 0; map_idx = 0; allow_ellipsis = 1; } weights {
int i;
if (weight_index == 0) {
weight_table[0] = sym_undefined;
- for(i = 1; i < directive_count; i++)
+ for(i = 1; i < directive_count; i++) {
weight_table[i] = sym_ellipsis;
- } else if (weight_index != directive_count)
+ weight_map[i].v = i;
+ }
+ } else if (map_idx != directive_count)
yyerror("Not enough weights specified");
- for(i = 0; i < directive_count; i++) {
+ for(i = 0; i < map_idx; i++) {
switch (weight_table[i].type) {
case SYMBOL_CHAR:
case SYMBOL_CHAIN:
@@ -784,7 +851,9 @@
yyerror("Strings can't be used with UNDEFINED");
}
}
+ /* Assume that UNDEFINED has 1->1 mapping. */
memcpy(prev_weight_table, weight_table, sizeof(weight_table));
+ memcpy(prev_weight_map, weight_map, sizeof(weight_map));
prev_line = LINE_UNDEFINED;
}
;
@@ -794,66 +863,70 @@
;
expansion :
weight_ex
- | expansion weight_ex_null
+ | expansion weight_ex
;
weight2 : weight
- | EXPAND expansion EXPAND {
-#ifdef VSDEBUG
- printf("[%d]=%d ",
- weight_index, weight_table[weight_index]->val);
-#endif
- weight_index++;
- }
+ | EXPAND {
+} expansion EXPAND {
+ /* Store the end (inclusive), not the beginning. */
+ weight_map[map_idx].v = weight_index - 1;
+ map_idx++;
+}
;
weights :
| weight
| weights ';' weight
;
-weight : weight_ex1
+weight : ELEM {
+ struct symbol *s;
+
+ assert(weight_index < COLL_WEIGHTS_REAL);
+ if (map_idx >= directive_count)
+ yyerror("More weights than specified by order_start (%d >= %d, %s)", map_idx,
+ directive_count, showwcs($1, CHARMAP_SYMBOL_LEN));
+ s = getsymbol($1, EXISTS);
+ if (order_pass && s->val == PRI_UNDEFINED)
+ printf("<%s> is undefined\n", showwcs($1, CHARMAP_SYMBOL_LEN));
+ weight_map[map_idx++].v = weight_index;
+ weight_table[weight_index++] = *s;
+}
| ELLIPSIS {
- if (weight_index >= directive_count)
+ if (map_idx >= directive_count)
yyerror("More weights than specified by order_start");
if (!allow_ellipsis)
yyerror("Ellipsis weight not allowed");
+ /* Mark the end of weights for this level */
+ weight_map[map_idx++].v = weight_index;
weight_table[weight_index++] = sym_ellipsis;
}
| IGNORE {
- if (weight_index >= directive_count)
+ if (map_idx >= directive_count)
yyerror("More weights than specified by order_start");
+ weight_map[map_idx++].v = weight_index;
weight_table[weight_index++] = sym_ignore;
}
| STRING {
- if (weight_index >= directive_count)
+ /* XXX This is broken now, and should be removed. */
+ if (map_idx >= directive_count)
yyerror("More weights than specified by order_start");
if (wcslen($1) > STR_LEN)
yyerror("String '%s' is too long", showwcs($1, STR_LEN));
+ /* Store the end, not the beginning. In the simple case it is equivalent. */
+ weight_map[map_idx++].v = weight_index;
weight_table[weight_index++] = *getstring($1);
}
;
-weight_ex1 : weight_ex { weight_index++; }
-;
weight_ex : ELEM {
struct symbol *s;
- if (weight_index >= directive_count)
- yyerror("More weights than specified by order_start (%d >= %d, %s)", weight_index,
+ assert(weight_index < COLL_WEIGHTS_REAL);
+ if (map_idx >= directive_count)
+ yyerror("More weights than specified by order_start (%d >= %d, %s)", map_idx,
directive_count, showwcs($1, CHARMAP_SYMBOL_LEN));
s = getsymbol($1, EXISTS);
if (order_pass && s->val == PRI_UNDEFINED)
printf("<%s> is undefined\n", showwcs($1, CHARMAP_SYMBOL_LEN));
- weight_table[weight_index] = *s;
-}
-;
-weight_ex_null : ELEM {
- struct symbol *s;
-
- if (weight_index >= directive_count)
- yyerror("More weights than specified by order_start (%d >= %d, %s)", weight_index,
- directive_count, showwcs($1, CHARMAP_SYMBOL_LEN));
- s = getsymbol($1, EXISTS);
- if (order_pass && s->val == PRI_UNDEFINED)
- printf("<%s> is undefined\n", showwcs($1, CHARMAP_SYMBOL_LEN));
- weight_table[weight_index].val += s->val;
+ weight_table[weight_index++] = *s;
}
;
order_end : ORDER_END '\n'
@@ -1026,7 +1099,7 @@
for (ch = 0; ch <= UCHAR_MAX; ch++)
for(z = 0; z < COLL_WEIGHTS_MAX; z++)
__collate_char_pri_table[ch].pri[z] = PRI_UNDEFINED;
- if (setlocale(LC_ALL, "en_US.UTF-8") == NULL)
+ if (setlocale(LC_ALL, "en_US.UTF-8") == NULL)
errx(1, "setlocale() failed");
#ifdef COLLATE_DEBUG
while((ch = getopt(ac, av, ":do:I:m:")) != -1) {
@@ -1077,28 +1150,36 @@
{
DBT key, val;
struct __collate_st_char_pri *p;
- int ret;
+ int ret, z;
+
+ if (c <= UCHAR_MAX) {
+ p = &__collate_char_pri_table[c];
+ for (z = 0; z < COLL_WEIGHTS_MAX; z++)
+ p->map[z].v = z;
+ for(z = 0; z < COLL_WEIGHTS_REAL; z++)
+ p->pri[z] = PRI_UNDEFINED;
- if (c <= UCHAR_MAX)
- return &__collate_char_pri_table[c];
+ return p;
+ }
key.data = &c;
key.size = sizeof(int32_t);
if ((ret = largemapdb->get(largemapdb, &key, &val, 0)) < 0)
err(1, "getpri: Error getting %s", charname(c));
if (ret != 0) {
- struct __collate_st_char_pri *pn;
- int z;
- if ((pn = (struct __collate_st_char_pri *)malloc(sizeof(struct __collate_st_char_pri))) == NULL)
+ if ((p = (struct __collate_st_char_pri *)
+ malloc(sizeof(struct __collate_st_char_pri))) == NULL)
err(1, "getpri: malloc");
for(z = 0; z < COLL_WEIGHTS_MAX; z++)
- pn->pri[z] = PRI_UNDEFINED;
- val.data = &pn;
+ p->map[z].v = z;
+ for(z = 0; z < COLL_WEIGHTS_REAL; z++)
+ p->pri[z] = PRI_UNDEFINED;
+ val.data = &p;
val.size = sizeof(struct __collate_st_char_pri *);
if (largemapdb->put(largemapdb, &key, &val, 0) < 0)
err(1, "getpri: Error storing %s", charname(c));
nlargemap++;
}
- memcpy(&p, val.data, sizeof(struct __collate_st_char_pri *));
+
return p;
}
@@ -1137,7 +1218,8 @@
int z;
if (exists > 0)
errx(1, "getchain: \"%s\" is not defined", showwcs(wcs, STR_LEN));
- if ((pn = (struct __collate_st_chain_pri *)malloc(sizeof(struct __collate_st_chain_pri))) == NULL)
+ if ((pn = (struct __collate_st_chain_pri *)
+ malloc(sizeof(struct __collate_st_chain_pri))) == NULL)
err(1, "getchain: malloc");
for(z = 0; z < COLL_WEIGHTS_MAX; z++)
pn->pri[z] = PRI_UNDEFINED;
@@ -1237,6 +1319,7 @@
struct symbol *p;
int ret;
+ errx(1, "internal error: getstring called");
key.data = (void *)wcs;
key.size = wcslen(wcs) * sizeof(wchar_t);
if ((ret = stringdb->get(stringdb, &key, &val, 0)) < 0)
@@ -1254,6 +1337,7 @@
err(1, "getstring: Error storing \"%s\"", showwcs(wcs, STR_LEN));
}
memcpy(&p, val.data, sizeof(struct symbol *));
+
return p;
}
@@ -1391,19 +1475,32 @@
static char *
show(int c)
{
- static char buf[5];
+ static char buf[40];
+ char *p, utfbuf[6];
+ int i;
if (c >=32 && c <= 126)
sprintf(buf, "'%c' ", c);
- else
+ else {
sprintf(buf, "\\x{%02x}", c);
+ }
+ if ((c = wctomb(utfbuf, c)) != -1) {
+ p = buf + strlen(buf);
+ *p++ = '(';
+ for (i = 0; i < c; i++) {
+ sprintf(p, "%X ", (unsigned char)utfbuf[i]);
+ p += strlen(p);
+ }
+ sprintf(p, ")");
+ }
+
return buf;
}
static void
collate_print_tables(void)
{
- int i, z;
+ int i, z, pos;
printf("Info: p=%d s=%d f=0x%02x m=%d dc=%d up=%d us=%d pc=%d sc=%d cc=%d lc=%d\n",
info.directive[0], info.directive[1],
@@ -1436,25 +1533,65 @@
putchar('\n');
}
}
- printf("Char priority table:\n");
+ printf("Char priority table: (utf8 in parens)\n");
{
struct __collate_st_char_pri *p2 = __collate_char_pri_table;
+
for (i = 0; i < UCHAR_MAX + 1; i++, p2++) {
+ if (p2->map[1].v == 0)
+ continue; /* Entry not used. */
printf("\t%s :", show(i));
- for(z = 0; z < info.directive_count; z++)
- printf(" %d", ntohl(p2->pri[z]));
+ for(z = 0, pos = 0; z < info.directive_count; z++) {
+ /*
+ * Is the last weight in the sequence on
+ * this position? The assertion here won't hold if some
+ * character wasn't used at all. Hence, the additional 'if'
+ * above.
+ */
+ assert(pos <= p2->map[z].v);
+ if (pos == p2->map[z].v) {
+ printf(" %d", ntohl(p2->pri[pos]));
+ pos++;
+ continue;
+ }
+ printf(" \"");
+ /* Output all expanded weights in sequence. */
+ while (pos <= p2->map[z].v) {
+ printf("%d%s", ntohl(p2->pri[pos]),
+ pos < p2->map[z].v ? " " : "");
+ pos++;
+ }
+ printf("\"");
+ }
putchar('\n');
}
}
if (info.large_pri_count > 0) {
struct __collate_st_large_char_pri *p2 = __collate_large_char_pri_table;
- printf("Large priority table:\n");
+
+ printf("Large priority table: (utf8 in parens)\n");
for (i = info.large_pri_count; i-- > 0; p2++) {
if (p2->val == 0)
break;
printf("\t%s :", show(ntohl(p2->val)));
- for(z = 0; z < info.directive_count; z++)
- printf(" %d", ntohl(p2->pri.pri[z]));
+ for(z = 0, pos = 0; z < info.directive_count; z++) {
+ /* Is the last weight in the sequence on
+ * this position? */
+ assert(pos <= p2->pri.map[z].v);
+ if (pos == p2->pri.map[z].v) {
+ printf(" %d", ntohl(p2->pri.pri[pos]));
+ pos++;
+ continue;
+ }
+ printf(" \"");
+ /* Output all expanded weights in sequence. */
+ while (pos <= p2->pri.map[z].v) {
+ printf("%d%s", ntohl(p2->pri.pri[pos]),
+ pos < p2->pri.map[z].v ? " " : "");
+ pos++;
+ }
+ printf("\"");
+ }
putchar('\n');
}
}
More information about the p4-projects
mailing list