PERFORCE change 145346 for review
Konrad Jankowski
konrad at FreeBSD.org
Wed Jul 16 21:44:33 UTC 2008
http://perforce.freebsd.org/chv.cgi?CH=145346
Change 145346 by konrad at vspredator on 2008/07/16 21:44:09
Support for specifing charmap on commandline added.
It is a big win when generating data for locales in UTF-8, because we will be
able to parse the charmap just once for all of them (when support for this
is added).
Affected files ...
.. //depot/projects/soc2008/konrad_collation/colldef.apple/collate.h#5 edit
.. //depot/projects/soc2008/konrad_collation/colldef.apple/common.h#3 edit
.. //depot/projects/soc2008/konrad_collation/colldef.apple/parse.y#3 edit
.. //depot/projects/soc2008/konrad_collation/colldef.apple/scan.l#5 edit
.. //depot/projects/soc2008/konrad_collation/scripts/localedef.sh#2 edit
Differences ...
==== //depot/projects/soc2008/konrad_collation/colldef.apple/collate.h#5 (text+ko) ====
@@ -45,6 +45,7 @@
#define COLLATE_VERSION1_1 "1.1\n"
#define COLLATE_VERSION1_1A "1.1A\n"
#define COLLATE_VERSION1_2 "1.2\n"
+#define COLLATE_VERSION1_3 "1.3\n"
/* see discussion in string/FreeBSD/strxfrm for this value */
#define COLLATE_MAX_PRIORITY ((1 << 24) - 1)
==== //depot/projects/soc2008/konrad_collation/colldef.apple/common.h#3 (text+ko) ====
@@ -29,6 +29,7 @@
};
extern int line_no;
+extern int charmap_cmdline;
struct symbol *getsymbol(const wchar_t *, int);
extern char *showwcs(const wchar_t *, int);
==== //depot/projects/soc2008/konrad_collation/colldef.apple/parse.y#3 (text+ko) ====
@@ -131,9 +131,10 @@
%token <str> ELEM
%token <ch> CHAR
%token <ch> ORDER_DIRECTIVE
-%token EXPAND
+%token EXPAND CHARMAP
%%
-collate : datafile {
+collate : CHARMAP { printf("parsing charmap\n"); } charmap_list
+ | datafile {
FILE *fp;
int localedef = (stringdb != NULL);
int z;
@@ -304,7 +305,7 @@
err(EX_UNAVAILABLE, "can't open destination file %s",
out_file);
- strcpy(__collate_version, COLLATE_VERSION1_1A);
+ strcpy(__collate_version, COLLATE_VERSION1_3);
if (fwrite(__collate_version, sizeof(__collate_version), 1, fp) != 1)
err(EX_IOERR,
"IO error writting collate version to destination file %s",
@@ -436,6 +437,9 @@
| collating_element
| collating_symbol
;
+charmap_list : charmap '\n'
+ | charmap_list charmap '\n'
+;
collating_element : COLLATING_ELEMENT ELEM FROM STRING {
int len;
struct symbol *s;
@@ -832,6 +836,10 @@
charmap : DEFN CHAR {
int len = wcslen($1);
struct symbol *s;
+
+#if 0
+ printf("charmap\n");
+#endif
if (len > CHARMAP_SYMBOL_LEN)
yyerror("Charmap symbol name '%s' is too long", showwcs($1, CHARMAP_SYMBOL_LEN));
s = getsymbol($1, NOTEXISTS);
@@ -993,10 +1001,12 @@
for (ch = 0; ch <= UCHAR_MAX; ch++)
for(z = 0; z < COLL_WEIGHTS_MAX; z++)
__collate_char_pri_table[ch].pri[z] = PRI_UNDEFINED;
+ if (setlocale(LC_ALL, "en_US.UTF-8") == NULL)
+ errx(1, "setlocale() failed");
#ifdef COLLATE_DEBUG
- while((ch = getopt(ac, av, ":do:I:")) != -1) {
+ while((ch = getopt(ac, av, ":do:I:m:")) != -1) {
#else
- while((ch = getopt(ac, av, ":o:I:")) != -1) {
+ while((ch = getopt(ac, av, ":o:I:m:")) != -1) {
#endif
switch (ch)
{
@@ -1013,6 +1023,14 @@
strlcpy(map_name, optarg, sizeof(map_name));
break;
+ case 'm':
+ charmap_cmdline = 1;
+ if ((yyin = fopen(optarg, "r")) == NULL)
+ err(EX_UNAVAILABLE, "can't open charmap file %s", optarg);
+ yyparse();
+ printf("charmap decoding finished\n");
+ break;
+
default:
usage();
}
@@ -1023,7 +1041,8 @@
if ((yyin = fopen(*av, "r")) == NULL)
err(EX_UNAVAILABLE, "can't open source file %s", *av);
}
- setlocale(LC_ALL, "en_US.UTF-8");
+ charmap_cmdline = 0;
+ line_no = 1;
yyparse();
return 0;
}
==== //depot/projects/soc2008/konrad_collation/colldef.apple/scan.l#5 (text+ko) ====
@@ -45,6 +45,7 @@
void yyerror(char *, ...);
int line_no = 1, save_no, fromsubs;
+int charmap_cmdline;
wchar_t buf0[BUFSIZE], *ptr;
wchar_t *buf = buf0;
wchar_t bufstr[BUFSIZE], *ptrsave;
@@ -112,7 +113,17 @@
return '\n';
}
<ldef>\< { ptr = buf; BEGIN(elem); }
-<INITIAL>\< { ptr = buf; fromsubs = 0; BEGIN(s_name); }
+<INITIAL>\< {
+ ptr = buf;
+ fromsubs = 0;
+ if (charmap_cmdline) {
+ ptr = buf;
+ *ptr++ = '<';
+ BEGIN(defn);
+ return CHARMAP;
+ } else
+ BEGIN(s_name);
+}
<*>^#.*\n line_no++;
^\n line_no++;
<INITIAL>\\\n line_no++;
@@ -130,7 +141,7 @@
}
<INITIAL,nchar>\n {
line_no++;
- if (map_fp != NULL) {
+ if (map_fp != NULL || charmap_cmdline) {
ptr = buf;
BEGIN(defn);
}
@@ -191,7 +202,7 @@
errx(EX_UNAVAILABLE, "map expected near line %u of %s",
line_no, map_name);
*ptr = 0;
- if (localedefmode && *buf == '<' && ptr[-1] == '>') {
+ if ((localedefmode || charmap_cmdline) && *buf == '<' && ptr[-1] == '>') {
if (ptr == buf + 2)
errx(EX_UNAVAILABLE, "map expected near line %u of %s",
line_no, map_name);
@@ -332,7 +343,10 @@
<s_name,string,defn,elem>\n {
const char *s = (map_fp != NULL) ? map_name : "input";
- errx(EX_UNAVAILABLE, "unterminated map/name/string near line %u of %s", line_no, s);
+ if (charmap_cmdline)
+ s = optarg;
+ errx(EX_UNAVAILABLE, "unterminated map/name/string near line %u of %s (ptr-buf=%d)",
+ line_no, s, ptr - buf);
}
<s_name,string,nchar,elem><<EOF>> {
const char *s = (map_fp != NULL) ? map_name : "input";
@@ -408,8 +422,10 @@
BEGIN(ldef);
else
BEGIN(INITIAL);
- } else
+ } else {
+ BEGIN(INITIAL);
yyterminate();
+ }
}
%%
#ifdef FLEX_DEBUG
==== //depot/projects/soc2008/konrad_collation/scripts/localedef.sh#2 (text+ko) ====
@@ -21,8 +21,8 @@
# Basically, just cut out the collation data.
sed -n -e "$LINE1,${LINE2}p" $SRC | tr -d '\r$' | sed -e 's/^*/#/g' > $OUTFILE
-
#printf "1a\ncharmap /usr/home/versus/colldef.apple/data2/UTF-8.cm.new\n.\nwq\n" | ed -s $OUTFILE
+# insert second line with charmap specification
printf "1a\ncharmap ../posix/UTF-8.cm\n.\nwq\n" | ed -s $OUTFILE
# Optional white space compression. Not needed for these colldef version.
More information about the p4-projects
mailing list