svn commit: r195730 - user/edwin/locale/tools
Edwin Groothuis
edwin at FreeBSD.org
Thu Jul 16 22:30:12 UTC 2009
Author: edwin
Date: Thu Jul 16 22:30:11 2009
New Revision: 195730
URL: http://svn.freebsd.org/changeset/base/195730
Log:
For in src/tools/tools/locale:
tools/charmaps.xml - datafile with the languages, countries and encodings.
tools/cldr2def.pl - convertor from the CLDR data.
tools/charmaps.pm - interface between the XML data file and the perl script.
Added:
user/edwin/locale/tools/
user/edwin/locale/tools/charmaps.pm
user/edwin/locale/tools/charmaps.xml
user/edwin/locale/tools/cldr2def.pl (contents, props changed)
Added: user/edwin/locale/tools/charmaps.pm
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ user/edwin/locale/tools/charmaps.pm Thu Jul 16 22:30:11 2009 (r195730)
@@ -0,0 +1,99 @@
+#!/usr/local/bin/perl -w
+
+use strict;
+use XML::Parser;
+use Data::Dumper;
+
+my %data = ();
+my %d = ();
+my $index = -1;
+
+sub get_xmldata {
+ open(FIN, "charmaps.xml");
+ my @xml = <FIN>;
+ chomp(@xml);
+ close(FIN);
+
+ my $xml = new XML::Parser(Handlers => {
+ Start => \&h_start,
+ End => \&h_end,
+ Char => \&h_char
+ });
+ $xml->parse(join("", @xml));
+ return %d;
+}
+
+sub h_start {
+ my $expat = shift;
+ my $element = shift;
+ my @attrs = @_;
+ my %attrs = ();
+
+
+ while ($#attrs >= 0) {
+ $attrs{$attrs[0]} = $attrs[1];
+ shift(@attrs);
+ shift(@attrs);
+ }
+
+ $data{element}{++$index} = $element;
+
+ if ($element eq "language") {
+ my $name = $attrs{name};
+ my $encoding = $attrs{encoding};
+ my $countries = $attrs{countries};
+ my $family = $attrs{family};
+ my $f = defined $attrs{family} ? $attrs{family} : "x";
+ my $link = $attrs{link};
+ my $fallback = $attrs{fallback};
+
+ $d{L}{$name}{$f}{fallback} = $fallback;
+ $d{L}{$name}{$f}{link} = $link;
+ $d{L}{$name}{$f}{family} = $family;
+ $d{L}{$name}{$f}{encoding} = $encoding;
+ $d{L}{$name}{$f}{countries} = $countries;
+ foreach my $c (split(" ", $countries)) {
+ if (defined $encoding) {
+ foreach my $e (split(" ", $encoding)) {
+ $d{L}{$name}{$f}{data}{$c}{$e} = undef;
+ }
+ }
+ $d{L}{$name}{$f}{data}{$c}{"UTF-8"} = undef;
+ }
+ return;
+ }
+
+ if ($element eq "translation") {
+ if (defined $attrs{hex}) {
+ my $k = "<" . $attrs{cldr} . ">";
+ my $hs = $attrs{hex};
+ $d{T}{$attrs{encoding}}{$k} = "";
+ while ($hs ne "") {
+ $d{T}{$attrs{encoding}}{$k} .=
+ chr(hex(substr($hs, 0, 2)));
+ $hs = substr($hs, 2);
+ }
+ }
+ if (defined $attrs{string}) {
+ $d{T}{$attrs{encoding}}{"<" . $attrs{cldr} . ">"} =
+ $attrs{string};
+ }
+ return;
+ }
+}
+
+sub h_end {
+ my $expat = shift;
+ my $element = shift;
+ $index--;
+}
+
+sub h_char {
+ my $expat = shift;
+ my $string = shift;
+}
+
+#use Data::Dumper;
+#my %D = get_xmldata();
+#print Dumper(%D);
+1;
Added: user/edwin/locale/tools/charmaps.xml
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ user/edwin/locale/tools/charmaps.xml Thu Jul 16 22:30:11 2009 (r195730)
@@ -0,0 +1,237 @@
+<data>
+<languages>
+ <!-- Attributes known:
+ name = langugage name - only one
+ encoding = encodings to be done - seperated by space
+ countries = countries to create - seperated by space
+ family = which font family - only one
+ link = only with family, create this original file too - only one
+ fallback = read this file if name_countries doesn't exist - only one
+
+ By default, the name of the input file is name_countries.
+ If family is defined, the name of the input file will be name_family_countries.
+ If fallback is defined, the name of the input file will be that.
+
+ By default, the name of the output file is name_countries.
+ If family is defined, the name of the output file will be name_family_countries.
+ For backwards compatibility you use link which is name_countries.
+
+ -->
+ <language name="af"
+ encoding="ISO8859-1 ISO8859-15"
+ countries="ZA" />
+ <language name="am"
+ countries="ET" /> <!-- UTF-8 only -->
+ <language name="be"
+ encoding="CP1131 CP1251 ISO8859-5"
+ countries="BY" />
+ <language name="bg"
+ encoding="CP1251"
+ countries="BG" />
+ <language name="ca"
+ fallback="ca_ES"
+ encoding="ISO8859-1 ISO8859-15"
+ countries="AD ES FR IT" /> <!-- Not defined for anything else -->
+ <language name="cs"
+ encoding="ISO8859-2"
+ countries="CZ" />
+ <language name="da"
+ encoding="ISO8859-1 ISO8859-15"
+ countries="DK" />
+ <language name="de"
+ encoding="ISO8859-1 ISO8859-15"
+ countries="AT CH DE" />
+ <language name="el"
+ encoding="ISO8859-7"
+ countries="GR" />
+ <language name="en"
+ encoding="ISO8859-1 ISO8859-15 US-ASCII"
+ countries="AU CA GB NZ US" />
+ <language name="en"
+ countries="IE" />
+ <language name="es"
+ encoding="ISO8859-1 ISO8859-15"
+ countries="ES" />
+ <language name="et"
+ encoding="ISO8859-15"
+ countries="EE" />
+ <language name="eu"
+ encoding="ISO8859-1 ISO8859-15"
+ countries="ES" />
+ <language name="fi"
+ encoding="ISO8859-1 ISO8859-15"
+ countries="FI" />
+ <language name="fr"
+ encoding="ISO8859-1 ISO8859-15"
+ countries="BE CA CH FR" />
+ <language name="he"
+ countries="IL" />
+ <language name="hi"
+ encoding="ISCII-DEV"
+ countries="IN" />
+ <language name="hr"
+ encoding="ISO8859-2"
+ countries="HR" />
+ <language name="hu"
+ encoding="ISO8859-2"
+ countries="HU" />
+ <language name="hy"
+ encoding="ARMSCII-8"
+ countries="AM" />
+ <language name="is"
+ encoding="ISO8859-1 ISO8859-15"
+ countries="IS" />
+ <language name="it"
+ encoding="ISO8859-1 ISO8859-15"
+ countries="CH IT" />
+ <language name="ja"
+ link="jp_JP"
+ encoding="SJIS eucJP"
+ countries="JP" />
+ <language name="kk"
+ family="Cyrl"
+ link="kk_KZ"
+ encoding="PT154"
+ countries="KZ" />
+ <language name="ko"
+ encoding="CP949 eucKR"
+ countries="KR" />
+ <language name="la"
+ encoding="ISO8859-1 ISO8859-2 ISO8859-4 ISO8859-15 US-ASCII"
+ countries="LN" />
+ <language name="lt"
+ encoding="ISO8859-4 ISO8859-13"
+ countries="LT" />
+ <language name="mn"
+ family="Cyrl"
+ link="mn_MN"
+ countries="MN" />
+ <language name="nb"
+ link="no_NO"
+ encoding="ISO8859-1 ISO8859-15"
+ countries="NO" />
+ <language name="nl"
+ encoding="ISO8859-1 ISO8859-15"
+ countries="BE NL" />
+ <language name="nn"
+ encoding="ISO8859-1 ISO8859-15"
+ countries="NO" />
+ <language name="pl"
+ encoding="ISO8859-2"
+ countries="PL" />
+ <language name="pt"
+ encoding="ISO8859-1 ISO8859-15"
+ countries="PT" />
+ <language name="ro"
+ encoding="ISO8859-2"
+ countries="RO" />
+ <language name="ru"
+ encoding="CP1251 CP866 ISO8859-5 KOI8-R"
+ countries="RU" />
+ <language name="sk"
+ encoding="ISO8859-2"
+ countries="SK" />
+ <language name="sl"
+ encoding="ISO8859-2"
+ countries="SI" />
+ <language name="sr"
+ family="Latn"
+ link="sr_YU"
+ encoding="ISO8859-2"
+ countries="RS" />
+ <language name="sr"
+ family="Cyrl"
+ link="sr_YU"
+ encoding="ISO8859-5"
+ countries="RS" />
+ <language name="sv"
+ encoding="ISO8859-1 ISO8859-15"
+ countries="SE" />
+ <language name="tr"
+ encoding="ISO8859-9"
+ countries="TR" />
+ <language name="uk"
+ encoding="CP1251 ISO8859-5 KOI8-U"
+ countries="UA" />
+ <language name="zh"
+ family="Hans"
+ link="zh_CN"
+ encoding="GB18030 GB2312 GBK eucCN"
+ countries="CN" />
+ <language name="zh"
+ family="Hant"
+ link="zh_HK"
+ encoding="Big5HKSCS"
+ countries="HK" />
+ <language name="zh"
+ family="Hant"
+ link="zh_TW"
+ encoding="Big5"
+ countries="TW" />
+</languages>
+
+<translations>
+ <!-- These don't have a special Euro sign so just use Eu for it -->
+ <translation encoding="ISO8859-1" cldr="EURO_SIGN" string="Eu" />
+ <translation encoding="ISO8859-2" cldr="EURO_SIGN" string="Eu" />
+
+ <!-- These don't have a special Kow sign so just use KRW for it -->
+ <translation encoding="CP949" cldr="WON_SIGN" hex="5C" />
+ <translation encoding="eucKR" cldr="WON_SIGN" hex="5C" />
+
+ <!-- Minus and dashes -->
+ <translation encoding="ISO8859-1" cldr="MINUS_SIGN" string="-" />
+ <translation encoding="ISO8859-4" cldr="MINUS_SIGN" string="-" />
+ <translation encoding="ISO8859-13" cldr="MINUS_SIGN" string="-" />
+ <translation encoding="ISO8859-15" cldr="MINUS_SIGN" string="-" />
+ <translation encoding="ISO8859-2" cldr="EN_DASH" string="-" />
+
+ <!-- Copied from the original FreeBSD src/share/monetdef -->
+ <translation encoding="CP1251" cldr="HRYVNIA_SIGN" hex="E3F0ED" />
+ <translation encoding="ISO8859-5" cldr="HRYVNIA_SIGN" hex="D3E0DD" />
+ <translation encoding="KOI8-U" cldr="HRYVNIA_SIGN" hex="C7D2CE" />
+
+ <!-- Value found in http://en.wikipedia.org/wiki/Pound_sign -->
+ <translation encoding="US-ASCII" cldr="POUND_SIGN" hex="A3" />
+
+ <!-- Values found in http://en.wikipedia.org/wiki/Ya_(Cyrillic) -->
+ <translation encoding="CP1251" cldr="CYRILLIC_SMALL_LETTER_YA" hex="FF" />
+ <translation encoding="ISO8859-5" cldr="CYRILLIC_SMALL_LETTER_YA" hex="EF" />
+ <translation encoding="KOI8-U" cldr="CYRILLIC_SMALL_LETTER_YA" hex="D1" />
+ <!-- Values found in http://en.wikipedia.org/wiki/Cyrillic_characters_in_Unicode -->
+ <translation encoding="ISO8859-2" cldr="CYRILLIC_CAPITAL_LETTER_A" string="A" />
+ <translation encoding="ISO8859-2" cldr="CYRILLIC_SMALL_LETTER_A" string="a" />
+ <translation encoding="ISO8859-2" cldr="CYRILLIC_SMALL_LETTER_KA" string="k" />
+ <translation encoding="ISO8859-2" cldr="CYRILLIC_SMALL_LETTER_O" string="o" />
+ <translation encoding="ISO8859-2" cldr="CYRILLIC_CAPITAL_LETTER_DE" string="D" />
+ <translation encoding="ISO8859-2" cldr="CYRILLIC_SMALL_LETTER_DE" string="d" />
+ <translation encoding="ISO8859-2" cldr="CYRILLIC_CAPITAL_LETTER_IE" string="E" />
+ <translation encoding="ISO8859-2" cldr="CYRILLIC_SMALL_LETTER_IE" string="e" />
+ <translation encoding="ISO8859-2" cldr="CYRILLIC_CAPITAL_LETTER_EN" string="N" />
+ <translation encoding="ISO8859-2" cldr="CYRILLIC_SMALL_LETTER_EN" string="n" />
+ <translation encoding="ISO8859-2" cldr="CYRILLIC_SMALL_LETTER_BE" string="b" />
+ <translation encoding="ISO8859-2" cldr="CYRILLIC_SMALL_LETTER_ER" string="r" />
+ <translation encoding="ISO8859-2" cldr="CYRILLIC_SMALL_LETTER_JE" string="j" />
+ <translation encoding="ISO8859-2" cldr="CYRILLIC_SMALL_LETTER_EL" string="l" />
+ <translation encoding="ISO8859-2" cldr="CYRILLIC_SMALL_LETTER_GHE" string="g" />
+ <translation encoding="ISO8859-2" cldr="CYRILLIC_SMALL_LETTER_PE" string="p" />
+ <translation encoding="ISO8859-2" cldr="CYRILLIC_SMALL_LETTER_TE" string="t" />
+ <translation encoding="ISO8859-2" cldr="CYRILLIC_SMALL_LETTER_VE" string="v" />
+
+ <!-- Not sure why these ones aren't known by libiconv -->
+ <translation encoding="ISO8859-2" cldr="a" string="a" />
+ <translation encoding="ISO8859-2" cldr="d" string="d" />
+ <translation encoding="ISO8859-2" cldr="e" string="e" />
+ <translation encoding="ISO8859-2" cldr="i" string="i" />
+ <translation encoding="ISO8859-2" cldr="n" string="n" />
+ <translation encoding="ISO8859-2" cldr="r" string="r" />
+
+ <translation encoding="ISO8859-5" cldr="t" string="t" />
+ <translation encoding="ISO8859-5" cldr="k" string="k" />
+
+ <!-- Just a . ? -->
+ <translation encoding="ISO8859-2" cldr="FULL_STOP" string="." />
+ <translation encoding="ARMSCII-8" cldr="ONE_DOT_LEADER" string="." />
+
+</translations>
+</data>
Added: user/edwin/locale/tools/cldr2def.pl
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ user/edwin/locale/tools/cldr2def.pl Thu Jul 16 22:30:11 2009 (r195730)
@@ -0,0 +1,527 @@
+#!/usr/bin/perl -wC
+
+use strict;
+use XML::Parser;
+use Text::Iconv;
+use Tie::IxHash;
+use Data::Dumper;
+use Digest::SHA qw(sha1_hex);
+require "charmaps.pm";
+
+if ($#ARGV < 2) {
+ print "Usage: $0 <cldrdir> <charmaps> <type> [la_CC]\n";
+ exit(1);
+}
+
+my $DEFENCODING = "UTF-8";
+my $DIR = shift(@ARGV);
+my $CHARMAPS = shift(@ARGV);
+my $TYPE = shift(@ARGV);
+my $doonly = shift(@ARGV);
+my @filter = ();
+
+my %convertors = ();
+
+my %values = ();
+my %hashtable = ();
+my %languages = ();
+my %translations = ();
+get_languages();
+
+my %cm = ();
+get_utf8map();
+
+my %keys = ();
+tie(%keys, "Tie::IxHash");
+tie(%hashtable, "Tie::IxHash");
+
+my %FILESNAMES = (
+ "monetdef" => "LC_MONETARY",
+ "timedef" => "LC_TIME",
+ "msgdef" => "LC_MESSAGES",
+ "numericdef" => "LC_NUMERIC"
+);
+
+my %callback = (
+ mdorder => \&callback_mdorder,
+);
+
+my %DESC = (
+
+ # numericdef
+ "decimal_point" => "decimal_point",
+ "thousands_sep" => "thousands_sep",
+ "grouping" => "grouping",
+
+ # monetdef
+ "int_curr_symbol" => "int_curr_symbol (last character always " .
+ "SPACE)",
+ "currency_symbol" => "currency_symbol",
+ "mon_decimal_point" => "mon_decimal_point",
+ "mon_thousands_sep" => "mon_thousands_sep",
+ "mon_grouping" => "mon_grouping",
+ "positive_sign" => "positive_sign",
+ "negative_sign" => "negative_sign",
+ "int_frac_digits" => "int_frac_digits",
+ "frac_digits" => "frac_digits",
+ "p_cs_precedes" => "p_cs_precedes",
+ "p_sep_by_space" => "p_sep_by_space",
+ "n_cs_precedes" => "n_cs_precedes",
+ "n_sep_by_space" => "n_sep_by_space",
+ "p_sign_posn" => "p_sign_posn",
+ "n_sign_posn" => "n_sign_posn",
+
+ # msgdef
+ "yesexpr" => "yesexpr",
+ "noexpr" => "noexpr",
+ "yesstr" => "yesstr",
+ "nostr" => "nostr",
+
+ # timedef
+ "abmon" => "Short month names",
+ "mon" => "Long month names (as in a date)",
+ "abday" => "Short weekday names",
+ "day" => "Long weekday names",
+ "t_fmt" => "X_fmt",
+ "d_fmt" => "x_fmt",
+ "XXX" => "c_fmt",
+ "am_pm" => "AM/PM",
+ "d_t_fmt" => "date_fmt",
+ "mon2" => "Long month names (without case ending)",
+ "md_order" => "md_order",
+ "t_fmt_ampm" => "ampm_fmt",
+
+);
+
+if ($TYPE eq "numericdef") {
+ %keys = (
+ "decimal_point" => "s",
+ "thousands_sep" => "s",
+ "grouping" => "ai",
+ );
+ get_fields();
+ print_fields();
+ make_makefile();
+}
+
+if ($TYPE eq "monetdef") {
+ %keys = (
+ "int_curr_symbol" => "s",
+ "currency_symbol" => "s",
+ "mon_decimal_point" => "s",
+ "mon_thousands_sep" => "s",
+ "mon_grouping" => "ai",
+ "positive_sign" => "s",
+ "negative_sign" => "s",
+ "int_frac_digits" => "i",
+ "frac_digits" => "i",
+ "p_cs_precedes" => "i",
+ "p_sep_by_space" => "i",
+ "n_cs_precedes" => "i",
+ "n_sep_by_space" => "i",
+ "p_sign_posn" => "i",
+ "n_sign_posn" => "i"
+ );
+ get_fields();
+ print_fields();
+ make_makefile();
+}
+
+if ($TYPE eq "msgdef") {
+ %keys = (
+ "yesexpr" => "s",
+ "noexpr" => "s",
+ "yesstr" => "s",
+ "nostr" => "s"
+ );
+ get_fields();
+ print_fields();
+ make_makefile();
+}
+
+if ($TYPE eq "timedef") {
+ %keys = (
+ "abmon" => "as",
+ "mon" => "as",
+ "abday" => "as",
+ "day" => "as",
+ "t_fmt" => "s",
+ "d_fmt" => "s",
+ "XXX" => "s",
+ "am_pm" => "as",
+ "d_fmt" => "s",
+ "d_t_fmt" => "s",
+ "mon2" => ">mon", # repeat them for now
+ "md_order" => "<mdorder<d_fmt<s",
+ "t_fmt_ampm" => "s",
+ );
+ get_fields();
+ print_fields();
+ make_makefile();
+}
+
+sub callback_mdorder {
+ my $s = shift;
+ return undef if (!defined $s);
+ $s =~ s/[^dm]//g;
+ return $s;
+};
+
+############################
+
+sub get_utf8map {
+ open(FIN, "$DIR/posix/$DEFENCODING.cm");
+ my @lines = <FIN>;
+ close(FIN);
+ chomp(@lines);
+ my $incharmap = 0;
+ foreach my $l (@lines) {
+ $l =~ s/\r//;
+ next if ($l =~ /^\#/);
+ next if ($l eq "");
+ if ($l eq "CHARMAP") {
+ $incharmap = 1;
+ next;
+ }
+ next if (!$incharmap);
+ last if ($l eq "END CHARMAP");
+ $l =~ /^([^\s]+)\s+(.*)/;
+ my $k = $1;
+ my $v = $2;
+ $v =~ s/\\x//g;
+ $cm{$k} = $v;
+ }
+}
+
+sub get_languages {
+ my %data = get_xmldata($CHARMAPS);
+ %languages = %{$data{L}};
+ %translations = %{$data{T}};
+
+ return if (!defined $doonly);
+
+ my @a = split(/_/, $doonly);
+ if ($#a == 1) {
+ $filter[0] = $a[0];
+ $filter[1] = "x";
+ $filter[2] = $a[1];
+ } elsif ($#a == 2) {
+ $filter[0] = $a[0];
+ $filter[1] = $a[1];
+ $filter[2] = $a[2];
+ }
+
+ print Dumper(@filter);
+ return;
+}
+
+sub get_fields {
+ foreach my $l (sort keys(%languages)) {
+ foreach my $f (sort keys(%{$languages{$l}})) {
+ foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
+ next if ($#filter == 2 && ($filter[0] ne $l
+ || $filter[1] ne $f || $filter[2] ne $c));
+
+ $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread
+ my $file;
+ $file = $l . "_";
+ $file .= $f . "_" if ($f ne "x");
+ $file .= $c;
+ if (!open(FIN, "$DIR/posix/$file.$DEFENCODING.src")) {
+ if (!defined $languages{$l}{$f}{fallback}) {
+ print STDERR
+ "Cannot open $file.$DEFENCODING.src\n";
+ next;
+ }
+ $file = $languages{$l}{$f}{fallback};
+ if (!open(FIN, "$DIR/posix/$file.$DEFENCODING.src")) {
+ print STDERR
+ "Cannot open fallback " .
+ "$file.$DEFENCODING.src\n";
+ next;
+ }
+ }
+ print "Reading from $file.$DEFENCODING.src for ${l}_${f}_${c}\n";
+ $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1; # read
+ my @lines = <FIN>;
+ chomp(@lines);
+ close(FIN);
+ my $continue = 0;
+ foreach my $k (keys(%keys)) {
+ foreach my $line (@lines) {
+ $line =~ s/\r//;
+ next if (!$continue && $line !~ /^$k\s/);
+ if ($continue) {
+ $line =~ s/^\s+//;
+ } else {
+ $line =~ s/^$k\s+//;
+ }
+
+ $values{$l}{$c}{$k} = ""
+ if (!defined $values{$l}{$c}{$k});
+
+ $continue = ($line =~ /\/$/);
+ $line =~ s/\/$// if ($continue);
+ $values{$l}{$c}{$k} .= $line;
+
+ last if (!$continue);
+ }
+ }
+ }
+ }
+ }
+}
+
+sub decodecldr {
+ my $s = shift;
+ my $v = $cm{$s};
+
+ return pack("C", hex($v)) if (length($v) == 2);
+ return pack("CC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2)))
+ if (length($v) == 4);
+ return pack("CCC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2)),
+ hex(substr($v, 4, 2))) if (length($v) == 6);
+ return "length = " . length($v);
+}
+
+sub translate {
+ my $enc = shift;
+ my $v = shift;
+
+ return $translations{$enc}{$v} if (defined $translations{$enc}{$v});
+ return undef;
+}
+
+sub print_fields {
+ foreach my $l (sort keys(%languages)) {
+ foreach my $f (sort keys(%{$languages{$l}})) {
+ foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
+ next if ($#filter == 2 && ($filter[0] ne $l
+ || $filter[1] ne $f || $filter[2] ne $c));
+ foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
+ if ($languages{$l}{$f}{data}{$c}{$DEFENCODING} eq "0") {
+ print "Skipping ${l}_" .
+ ($f eq "x" ? "" : "${f}_") .
+ "${c} - not read\n";
+ next;
+ }
+ my $file = $l;
+ $file .= "_" . $f if ($f ne "x");
+ $file .= "_" . $c;
+ print "Writing to $file in $enc\n";
+
+ eval {
+ $convertors{$enc} =
+ Text::Iconv->new($DEFENCODING, $enc);
+ } if (!defined $convertors{$enc});
+ if (!defined $convertors{$enc}) {
+ print "Failed! Cannot convert between " .
+ "$DEFENCODING and $enc.\n";
+ next;
+ };
+
+ open(FOUT, ">$TYPE/$file.$enc.new");
+ my $okay = 1;
+ my $output = "";
+ print FOUT <<EOF;
+# \$FreeBSD\$
+#
+# Warning: Do not edit. This file is automatically generated from the
+# tools in /usr/src/tools/tools/locale. The data is obtained from the
+# CLDR project, obtained from http://cldr.unicode.org/
+#
+# ${l}_$c in $enc
+#
+# -----------------------------------------------------------------------------
+EOF
+ foreach my $k (keys(%keys)) {
+ my $f = $keys{$k};
+
+ die("Unknown $k in \%DESC")
+ if (!defined $DESC{$k});
+
+ $output .= "#\n# $DESC{$k}\n";
+
+ if ($f =~ /^>/) {
+ $k = substr($f, 1);
+ $f = $keys{$k};
+ }
+ if ($f =~ /^\</) {
+ my @a = split(/\</, substr($f, 1));
+ my $rv =
+ &{$callback{$a[0]}}($values{$l}{$c}{$a[1]});
+ $values{$l}{$c}{$k} = $rv;
+ $f = $a[2];
+ }
+
+ my $v = $values{$l}{$c}{$k};
+ $v = "undef" if (!defined $v);
+
+ if ($f eq "i") {
+ $output .= "$v\n";
+ next;
+ }
+ if ($f eq "ai") {
+ $output .= "$v\n";
+ next;
+ }
+ if ($f eq "s") {
+ $v =~ s/^"//;
+ $v =~ s/"$//;
+ my $cm = "";
+ while ($v =~ /^(.*?)(<.*?>)(.*)/) {
+ $cm = $2;
+ $v = $1 . decodecldr($2) . $3;
+ }
+ my $fv =
+ $convertors{$enc}->convert("$v");
+ $fv = translate($enc, $cm)
+ if (!defined $fv);
+ if (!defined $fv) {
+ print STDERR
+ "Could not convert $k " .
+ "($cm) from $DEFENCODING " .
+ "to $enc\n";
+ $okay = 0;
+ next;
+ }
+ $output .= "$fv\n";
+ next;
+ }
+ if ($f eq "as") {
+ foreach my $v (split(/;/, $v)) {
+ $v =~ s/^"//;
+ $v =~ s/"$//;
+ my $cm = "";
+ while ($v =~ /^(.*?)(<.*?>)(.*)/) {
+ $cm = $2;
+ $v = $1 .
+ decodecldr($2) . $3;
+ }
+ my $fv =
+ $convertors{$enc}->convert("$v");
+ $fv = translate($enc, $cm)
+ if (!defined $fv);
+ if (!defined $fv) {
+ print STDERR
+ "Could not " .
+ "convert $k ($cm)" .
+ " from " .
+ "$DEFENCODING to " .
+ "$enc\n";
+ $okay = 0;
+ next;
+ }
+ $output .= "$fv\n";
+ }
+ next;
+ }
+
+ die("$k is '$f'");
+
+ }
+
+ $languages{$l}{$f}{data}{$c}{$enc} = sha1_hex($output);
+ $hashtable{sha1_hex($output)}{"${l}_${f}_${c}.$enc"} = 1;
+ print FOUT "$output# EOF\n";
+ close(FOUT);
+
+ if ($okay) {
+ rename("$TYPE/$file.$enc.new",
+ "$TYPE/$file.$enc.src");
+ } else {
+ rename("$TYPE/$file.$enc.new",
+ "$TYPE/$file.$enc.failed");
+ }
+ }
+ }
+ }
+ }
+}
+
+sub make_makefile {
+ return if ($#filter > -1);
+ print "Creating Makefile for $TYPE\n";
+ open(FOUT, ">$TYPE/Makefile");
+ print FOUT <<EOF;
+#
+# \$FreeBSD\$
+#
+# Warning: Do not edit. This file is automatically generated from the
+# tools in /usr/src/tools/tools/locale.
+#
+
+LOCALEDIR= /usr/share/locale
+FILESNAME= $FILESNAMES{$TYPE}
+.SUFFIXES: .src .out
+
+.src.out:
+ grep -v '^\#' < \${.IMPSRC} > \${.TARGET}
+
+EOF
+
+ foreach my $hash (keys(%hashtable)) {
+ my @files = sort(keys(%{$hashtable{$hash}}));
+ if ($#files > 0) {
+ my $link = shift(@files);
+ $link =~ s/_x_/_/; # strip family if none there
+ foreach my $file (@files) {
+ my @a = split(/_/, $file);
+ my @b = split(/\./, $a[-1]);
+ $file =~ s/_x_/_/;
+ print FOUT "SAME+=\t\t$link:$file\t#hash\n";
+ undef($languages{$a[0]}{$a[1]}{data}{$b[0]}{$b[1]});
+ }
+ }
+ }
+
+ foreach my $l (sort keys(%languages)) {
+ foreach my $f (sort keys(%{$languages{$l}})) {
+ foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
+ next if ($#filter == 2 && ($filter[0] ne $l
+ || $filter[1] ne $f || $filter[2] ne $c));
+ foreach my $e (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
+ my $file = $l . "_";
+ $file .= $f . "_" if ($f ne "x");
+ $file .= $c;
+ next if (!defined $languages{$l}{$f}{data}{$c}{$e});
+ print FOUT "LOCALES+=\t$file.$e\n";
+ }
+
+ if (defined $languages{$l}{$f}{link}) {
+ foreach my $e (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
+ my $file = $l . "_";
+ $file .= $f . "_" if ($f ne "x");
+ $file .= $c;
+ print FOUT "SAME+=\t\t$file.$e:$languages{$l}{$f}{link}.$e\t# legacy\n";
+
+ }
+
+ }
+
+ }
+ }
+ }
+
+ print FOUT <<EOF;
+
+FILES= \${LOCALES:S/\$/.out/}
+CLEANFILES= \${FILES}
+
+.for f in \${SAME}
+SYMLINKS+= ../\${f:C/:.*\$//}/\${FILESNAME} \${LOCALEDIR}/\${f:C/^.*://}
+.endfor
+
+.for f in \${LOCALES}
+FILESDIR_\${f}.out= \${LOCALEDIR}/\${f}
+.endfor
+
+
+src:
+ ./cldr2def.pl /home/edwin/cldr/1.7.0/ charmaps.xml timedef nl_NL
+
+.include <bsd.prog.mk>
+EOF
+
+ close(FOUT);
+}
More information about the svn-src-user
mailing list