svn commit: r195730 - user/edwin/locale/tools

Edwin Groothuis edwin at FreeBSD.org
Thu Jul 16 22:30:12 UTC 2009


Author: edwin
Date: Thu Jul 16 22:30:11 2009
New Revision: 195730
URL: http://svn.freebsd.org/changeset/base/195730

Log:
  For in src/tools/tools/locale:
  
  tools/charmaps.xml	- datafile with the languages, countries and encodings.
  tools/cldr2def.pl	- convertor from the CLDR data.
  tools/charmaps.pm	- interface between the XML data file and the perl script.

Added:
  user/edwin/locale/tools/
  user/edwin/locale/tools/charmaps.pm
  user/edwin/locale/tools/charmaps.xml
  user/edwin/locale/tools/cldr2def.pl   (contents, props changed)

Added: user/edwin/locale/tools/charmaps.pm
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ user/edwin/locale/tools/charmaps.pm	Thu Jul 16 22:30:11 2009	(r195730)
@@ -0,0 +1,99 @@
+#!/usr/local/bin/perl -w
+
+use strict;
+use XML::Parser;
+use Data::Dumper;
+
+my %data = ();
+my %d = ();
+my $index = -1;
+
+sub get_xmldata {
+	open(FIN, "charmaps.xml");
+	my @xml = <FIN>;
+	chomp(@xml);
+	close(FIN);
+
+	my $xml = new XML::Parser(Handlers => {
+					Start	=> \&h_start,
+					End	=> \&h_end,
+					Char	=> \&h_char
+					});
+	$xml->parse(join("", @xml));
+	return %d;
+}
+
+sub h_start {
+	my $expat = shift;
+	my $element = shift;
+	my @attrs = @_;
+	my %attrs = ();
+
+
+	while ($#attrs >= 0) {
+		$attrs{$attrs[0]} = $attrs[1];
+		shift(@attrs);
+		shift(@attrs);
+	}
+
+	$data{element}{++$index} = $element;
+
+	if ($element eq "language") {
+		my $name = $attrs{name};
+		my $encoding = $attrs{encoding};
+		my $countries = $attrs{countries};
+		my $family = $attrs{family};
+		my $f = defined $attrs{family} ? $attrs{family} : "x";
+		my $link = $attrs{link};
+		my $fallback = $attrs{fallback};
+
+		$d{L}{$name}{$f}{fallback} = $fallback;
+		$d{L}{$name}{$f}{link} = $link;
+		$d{L}{$name}{$f}{family} = $family;
+		$d{L}{$name}{$f}{encoding} = $encoding;
+		$d{L}{$name}{$f}{countries} = $countries;
+		foreach my $c (split(" ", $countries)) {
+			if (defined $encoding) {
+				foreach my $e (split(" ", $encoding)) {
+					$d{L}{$name}{$f}{data}{$c}{$e} = undef;
+				}
+			}
+			$d{L}{$name}{$f}{data}{$c}{"UTF-8"} = undef;
+		}
+		return;
+	}
+
+	if ($element eq "translation") {
+		if (defined $attrs{hex}) {
+			my $k = "<" . $attrs{cldr} . ">";
+			my $hs = $attrs{hex};
+			$d{T}{$attrs{encoding}}{$k} = "";
+			while ($hs ne "") {
+				$d{T}{$attrs{encoding}}{$k} .=
+					chr(hex(substr($hs, 0, 2)));
+				$hs = substr($hs, 2);
+			}
+		}
+		if (defined $attrs{string}) {
+			$d{T}{$attrs{encoding}}{"<" . $attrs{cldr} . ">"} =
+			    $attrs{string};
+		}
+		return;
+	}
+}
+
+sub h_end {
+	my $expat = shift;
+	my $element = shift;
+	$index--;
+}
+
+sub h_char {
+	my $expat = shift;
+	my $string = shift;
+}
+
+#use Data::Dumper;
+#my %D = get_xmldata();
+#print Dumper(%D);
+1;

Added: user/edwin/locale/tools/charmaps.xml
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ user/edwin/locale/tools/charmaps.xml	Thu Jul 16 22:30:11 2009	(r195730)
@@ -0,0 +1,237 @@
+<data>
+<languages>
+	<!-- Attributes known:
+		name = langugage name - only one
+		encoding = encodings to be done - seperated by space
+		countries = countries to create - seperated by space
+		family = which font family - only one
+		link = only with family, create this original file too - only one
+		fallback = read this file if name_countries doesn't exist - only one
+
+		By default, the name of the input file is name_countries.
+		If family is defined, the name of the input file will be name_family_countries.
+		If fallback is defined, the name of the input file will be that.
+
+		By default, the name of the output file is name_countries.
+		If family is defined, the name of the output file will be name_family_countries.
+		For backwards compatibility you use link which is name_countries.
+
+	-->
+	<language name="af"
+		encoding="ISO8859-1 ISO8859-15"
+		countries="ZA" />
+	<language name="am"
+		countries="ET" />		<!-- UTF-8 only -->
+	<language name="be"
+		encoding="CP1131 CP1251 ISO8859-5"
+		countries="BY" />
+	<language name="bg"
+		encoding="CP1251"
+		countries="BG" />
+	<language name="ca"
+		fallback="ca_ES"
+		encoding="ISO8859-1 ISO8859-15"
+		countries="AD ES FR IT" />	<!-- Not defined for anything else -->
+	<language name="cs"
+		encoding="ISO8859-2"
+		countries="CZ" />
+	<language name="da"
+		encoding="ISO8859-1 ISO8859-15"
+		countries="DK" />
+	<language name="de"
+		encoding="ISO8859-1 ISO8859-15"
+		countries="AT CH DE" />
+	<language name="el"
+		encoding="ISO8859-7"
+		countries="GR" />
+	<language name="en"
+		encoding="ISO8859-1 ISO8859-15 US-ASCII"
+		countries="AU CA GB NZ US" />
+	<language name="en"
+		countries="IE" />
+	<language name="es"
+		encoding="ISO8859-1 ISO8859-15"
+		countries="ES" />
+	<language name="et"
+		encoding="ISO8859-15"
+		countries="EE" />
+	<language name="eu"
+		encoding="ISO8859-1 ISO8859-15"
+		countries="ES" />
+	<language name="fi"
+		encoding="ISO8859-1 ISO8859-15"
+		countries="FI" />
+	<language name="fr"
+		encoding="ISO8859-1 ISO8859-15"
+		countries="BE CA CH FR" />
+	<language name="he"
+		countries="IL" />
+	<language name="hi"
+		encoding="ISCII-DEV"
+		countries="IN" />
+	<language name="hr"
+		encoding="ISO8859-2"
+		countries="HR" />
+	<language name="hu"
+		encoding="ISO8859-2"
+		countries="HU" />
+	<language name="hy"
+		encoding="ARMSCII-8"
+		countries="AM" />
+	<language name="is"
+		encoding="ISO8859-1 ISO8859-15"
+		countries="IS" />
+	<language name="it"
+		encoding="ISO8859-1 ISO8859-15"
+		countries="CH IT" />
+	<language name="ja"
+		link="jp_JP"
+		encoding="SJIS eucJP"
+		countries="JP" />
+	<language name="kk"
+		family="Cyrl"
+		link="kk_KZ"
+		encoding="PT154"
+		countries="KZ" />
+	<language name="ko"
+		encoding="CP949 eucKR"
+		countries="KR" />
+	<language name="la"
+		encoding="ISO8859-1 ISO8859-2 ISO8859-4 ISO8859-15 US-ASCII"
+		countries="LN" />
+	<language name="lt"
+		encoding="ISO8859-4 ISO8859-13"
+		countries="LT" />
+	<language name="mn"
+		family="Cyrl"
+		link="mn_MN"
+		countries="MN" />
+	<language name="nb"
+		link="no_NO"
+		encoding="ISO8859-1 ISO8859-15"
+		countries="NO" />
+	<language name="nl"
+		encoding="ISO8859-1 ISO8859-15"
+		countries="BE NL" />
+	<language name="nn"
+		encoding="ISO8859-1 ISO8859-15"
+		countries="NO" />
+	<language name="pl"
+		encoding="ISO8859-2"
+		countries="PL" />
+	<language name="pt"
+		encoding="ISO8859-1 ISO8859-15"
+		countries="PT" />
+	<language name="ro"
+		encoding="ISO8859-2"
+		countries="RO" />
+	<language name="ru"
+		encoding="CP1251 CP866 ISO8859-5 KOI8-R"
+		countries="RU" />
+	<language name="sk"
+		encoding="ISO8859-2"
+		countries="SK" />
+	<language name="sl"
+		encoding="ISO8859-2"
+		countries="SI" />
+	<language name="sr"
+		family="Latn"
+		link="sr_YU"
+		encoding="ISO8859-2"
+		countries="RS" />
+	<language name="sr"
+		family="Cyrl"
+		link="sr_YU"
+		encoding="ISO8859-5"
+		countries="RS" />
+	<language name="sv"
+		encoding="ISO8859-1 ISO8859-15"
+		countries="SE" />
+	<language name="tr"
+		encoding="ISO8859-9"
+		countries="TR" />
+	<language name="uk"
+		encoding="CP1251 ISO8859-5 KOI8-U"
+		countries="UA" />
+	<language name="zh"
+		family="Hans"
+		link="zh_CN"
+		encoding="GB18030 GB2312 GBK eucCN"
+		countries="CN" />
+	<language name="zh"
+		family="Hant"
+		link="zh_HK"
+		encoding="Big5HKSCS"
+		countries="HK" />
+	<language name="zh"
+		family="Hant"
+		link="zh_TW"
+		encoding="Big5"
+		countries="TW" />
+</languages>
+
+<translations>
+	<!-- These don't have a special Euro sign so just use Eu for it -->
+	<translation encoding="ISO8859-1" cldr="EURO_SIGN" string="Eu" />
+	<translation encoding="ISO8859-2" cldr="EURO_SIGN" string="Eu" />
+
+	<!-- These don't have a special Kow sign so just use KRW for it -->
+	<translation encoding="CP949" cldr="WON_SIGN" hex="5C" />
+	<translation encoding="eucKR" cldr="WON_SIGN" hex="5C" />
+
+	<!-- Minus and dashes -->
+	<translation encoding="ISO8859-1" cldr="MINUS_SIGN" string="-" />
+	<translation encoding="ISO8859-4" cldr="MINUS_SIGN" string="-" />
+	<translation encoding="ISO8859-13" cldr="MINUS_SIGN" string="-" />
+	<translation encoding="ISO8859-15" cldr="MINUS_SIGN" string="-" />
+	<translation encoding="ISO8859-2" cldr="EN_DASH" string="-" />
+
+	<!-- Copied from the original FreeBSD src/share/monetdef -->
+	<translation encoding="CP1251" cldr="HRYVNIA_SIGN" hex="E3F0ED" />
+	<translation encoding="ISO8859-5" cldr="HRYVNIA_SIGN" hex="D3E0DD" />
+	<translation encoding="KOI8-U" cldr="HRYVNIA_SIGN" hex="C7D2CE" />
+
+	<!-- Value found in http://en.wikipedia.org/wiki/Pound_sign -->
+	<translation encoding="US-ASCII" cldr="POUND_SIGN" hex="A3" />
+
+	<!-- Values found in http://en.wikipedia.org/wiki/Ya_(Cyrillic) -->
+	<translation encoding="CP1251" cldr="CYRILLIC_SMALL_LETTER_YA" hex="FF" />
+	<translation encoding="ISO8859-5" cldr="CYRILLIC_SMALL_LETTER_YA" hex="EF" />
+	<translation encoding="KOI8-U" cldr="CYRILLIC_SMALL_LETTER_YA" hex="D1" />
+	<!-- Values found in http://en.wikipedia.org/wiki/Cyrillic_characters_in_Unicode -->
+	<translation encoding="ISO8859-2" cldr="CYRILLIC_CAPITAL_LETTER_A" string="A" />
+	<translation encoding="ISO8859-2" cldr="CYRILLIC_SMALL_LETTER_A" string="a" />
+	<translation encoding="ISO8859-2" cldr="CYRILLIC_SMALL_LETTER_KA" string="k" />
+	<translation encoding="ISO8859-2" cldr="CYRILLIC_SMALL_LETTER_O" string="o" />
+	<translation encoding="ISO8859-2" cldr="CYRILLIC_CAPITAL_LETTER_DE" string="D" />
+	<translation encoding="ISO8859-2" cldr="CYRILLIC_SMALL_LETTER_DE" string="d" />
+	<translation encoding="ISO8859-2" cldr="CYRILLIC_CAPITAL_LETTER_IE" string="E" />
+	<translation encoding="ISO8859-2" cldr="CYRILLIC_SMALL_LETTER_IE" string="e" />
+	<translation encoding="ISO8859-2" cldr="CYRILLIC_CAPITAL_LETTER_EN" string="N" />
+	<translation encoding="ISO8859-2" cldr="CYRILLIC_SMALL_LETTER_EN" string="n" />
+	<translation encoding="ISO8859-2" cldr="CYRILLIC_SMALL_LETTER_BE" string="b" />
+	<translation encoding="ISO8859-2" cldr="CYRILLIC_SMALL_LETTER_ER" string="r" />
+	<translation encoding="ISO8859-2" cldr="CYRILLIC_SMALL_LETTER_JE" string="j" />
+	<translation encoding="ISO8859-2" cldr="CYRILLIC_SMALL_LETTER_EL" string="l" />
+	<translation encoding="ISO8859-2" cldr="CYRILLIC_SMALL_LETTER_GHE" string="g" />
+	<translation encoding="ISO8859-2" cldr="CYRILLIC_SMALL_LETTER_PE" string="p" />
+	<translation encoding="ISO8859-2" cldr="CYRILLIC_SMALL_LETTER_TE" string="t" />
+	<translation encoding="ISO8859-2" cldr="CYRILLIC_SMALL_LETTER_VE" string="v" />
+
+	<!-- Not sure why these ones aren't known by libiconv -->
+	<translation encoding="ISO8859-2" cldr="a" string="a" />
+	<translation encoding="ISO8859-2" cldr="d" string="d" />
+	<translation encoding="ISO8859-2" cldr="e" string="e" />
+	<translation encoding="ISO8859-2" cldr="i" string="i" />
+	<translation encoding="ISO8859-2" cldr="n" string="n" />
+	<translation encoding="ISO8859-2" cldr="r" string="r" />
+
+	<translation encoding="ISO8859-5" cldr="t" string="t" />
+	<translation encoding="ISO8859-5" cldr="k" string="k" />
+
+	<!-- Just a . ? -->
+	<translation encoding="ISO8859-2" cldr="FULL_STOP" string="." />
+	<translation encoding="ARMSCII-8" cldr="ONE_DOT_LEADER" string="." />
+
+</translations>
+</data>

Added: user/edwin/locale/tools/cldr2def.pl
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ user/edwin/locale/tools/cldr2def.pl	Thu Jul 16 22:30:11 2009	(r195730)
@@ -0,0 +1,527 @@
+#!/usr/bin/perl -wC
+
+use strict;
+use XML::Parser;
+use Text::Iconv;
+use Tie::IxHash;
+use Data::Dumper;
+use Digest::SHA qw(sha1_hex);
+require "charmaps.pm";
+
+if ($#ARGV < 2) {
+	print "Usage: $0 <cldrdir> <charmaps> <type> [la_CC]\n";
+	exit(1);
+}
+
+my $DEFENCODING = "UTF-8";
+my $DIR = shift(@ARGV);
+my $CHARMAPS = shift(@ARGV);
+my $TYPE = shift(@ARGV);
+my $doonly = shift(@ARGV);
+my @filter = ();
+
+my %convertors = ();
+
+my %values = ();
+my %hashtable = ();
+my %languages = ();
+my %translations = ();
+get_languages();
+
+my %cm = ();
+get_utf8map();
+
+my %keys = ();
+tie(%keys, "Tie::IxHash");
+tie(%hashtable, "Tie::IxHash");
+
+my %FILESNAMES = (
+	"monetdef"	=> "LC_MONETARY",
+	"timedef"	=> "LC_TIME",
+	"msgdef"	=> "LC_MESSAGES",
+	"numericdef"	=> "LC_NUMERIC"
+);
+
+my %callback = (
+	mdorder => \&callback_mdorder,
+);
+
+my %DESC = (
+
+	# numericdef
+	"decimal_point"	=> "decimal_point",
+	"thousands_sep"	=> "thousands_sep",
+	"grouping"	=> "grouping",
+
+	# monetdef
+	"int_curr_symbol"	=> "int_curr_symbol (last character always " .
+				   "SPACE)",
+	"currency_symbol"	=> "currency_symbol",
+	"mon_decimal_point"	=> "mon_decimal_point",
+	"mon_thousands_sep"	=> "mon_thousands_sep",
+	"mon_grouping"		=> "mon_grouping",
+	"positive_sign"		=> "positive_sign",
+	"negative_sign"		=> "negative_sign",
+	"int_frac_digits"	=> "int_frac_digits",
+	"frac_digits"		=> "frac_digits",
+	"p_cs_precedes"		=> "p_cs_precedes",
+	"p_sep_by_space"	=> "p_sep_by_space",
+	"n_cs_precedes"		=> "n_cs_precedes",
+	"n_sep_by_space"	=> "n_sep_by_space",
+	"p_sign_posn"		=> "p_sign_posn",
+	"n_sign_posn"		=> "n_sign_posn",
+
+	# msgdef
+	"yesexpr"	=> "yesexpr",
+	"noexpr"	=> "noexpr",
+	"yesstr"	=> "yesstr",
+	"nostr"		=> "nostr",
+
+	# timedef
+	"abmon"		=> "Short month names",
+	"mon"		=> "Long month names (as in a date)",
+	"abday"		=> "Short weekday names",
+	"day"		=> "Long weekday names",
+	"t_fmt"		=> "X_fmt",
+	"d_fmt"		=> "x_fmt",
+	"XXX"		=> "c_fmt",
+	"am_pm"		=> "AM/PM",
+	"d_t_fmt"	=> "date_fmt",
+	"mon2"		=> "Long month names (without case ending)",
+	"md_order"	=> "md_order",
+	"t_fmt_ampm"	=> "ampm_fmt",
+
+);
+
+if ($TYPE eq "numericdef") {
+	%keys = (
+	    "decimal_point"	=> "s",
+	    "thousands_sep"	=> "s",
+	    "grouping"		=> "ai",
+	);
+	get_fields();
+	print_fields();
+	make_makefile();
+}
+
+if ($TYPE eq "monetdef") {
+	%keys = (
+	    "int_curr_symbol"	=> "s",
+	    "currency_symbol"	=> "s",
+	    "mon_decimal_point"	=> "s",
+	    "mon_thousands_sep"	=> "s",
+	    "mon_grouping"	=> "ai",
+	    "positive_sign"	=> "s",
+	    "negative_sign"	=> "s",
+	    "int_frac_digits"	=> "i",
+	    "frac_digits"	=> "i",
+	    "p_cs_precedes"	=> "i",
+	    "p_sep_by_space"	=> "i",
+	    "n_cs_precedes"	=> "i",
+	    "n_sep_by_space"	=> "i",
+	    "p_sign_posn"	=> "i",
+	    "n_sign_posn"	=> "i"
+	);
+	get_fields();
+	print_fields();
+	make_makefile();
+}
+
+if ($TYPE eq "msgdef") {
+	%keys = (
+	    "yesexpr"		=> "s",
+	    "noexpr"		=> "s",
+	    "yesstr"		=> "s",
+	    "nostr"		=> "s"
+	);
+	get_fields();
+	print_fields();
+	make_makefile();
+}
+
+if ($TYPE eq "timedef") {
+	%keys = (
+	    "abmon"		=> "as",
+	    "mon"		=> "as",
+	    "abday"		=> "as",
+	    "day"		=> "as",
+	    "t_fmt"		=> "s",
+	    "d_fmt"		=> "s",
+	    "XXX"		=> "s",
+	    "am_pm"		=> "as",
+	    "d_fmt"		=> "s",
+	    "d_t_fmt"		=> "s",
+	    "mon2"		=> ">mon",		# repeat them for now
+	    "md_order"		=> "<mdorder<d_fmt<s",
+	    "t_fmt_ampm"	=> "s",
+	);
+	get_fields();
+	print_fields();
+	make_makefile();
+}
+
+sub callback_mdorder {
+	my $s = shift;
+	return undef if (!defined $s);
+	$s =~ s/[^dm]//g;
+	return $s;
+};
+
+############################
+
+sub get_utf8map {
+	open(FIN, "$DIR/posix/$DEFENCODING.cm");
+	my @lines = <FIN>;
+	close(FIN);
+	chomp(@lines);
+	my $incharmap = 0;
+	foreach my $l (@lines) {
+		$l =~ s/\r//;
+		next if ($l =~ /^\#/);
+		next if ($l eq "");
+		if ($l eq "CHARMAP") {
+			$incharmap = 1;
+			next;
+		}
+		next if (!$incharmap);
+		last if ($l eq "END CHARMAP");
+		$l =~ /^([^\s]+)\s+(.*)/;
+		my $k = $1;
+		my $v = $2;
+		$v =~ s/\\x//g;
+		$cm{$k} = $v;
+	}
+}
+
+sub get_languages {
+	my %data = get_xmldata($CHARMAPS);
+	%languages = %{$data{L}}; 
+	%translations = %{$data{T}}; 
+
+	return if (!defined $doonly);
+
+	my @a = split(/_/, $doonly);
+	if ($#a == 1) {
+		$filter[0] = $a[0];
+		$filter[1] = "x";
+		$filter[2] = $a[1];
+	} elsif ($#a == 2) {
+		$filter[0] = $a[0];
+		$filter[1] = $a[1];
+		$filter[2] = $a[2];
+	}
+
+	print Dumper(@filter);
+	return;
+}
+
+sub get_fields {
+	foreach my $l (sort keys(%languages)) {
+	foreach my $f (sort keys(%{$languages{$l}})) {
+	foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
+		next if ($#filter == 2 && ($filter[0] ne $l
+		    || $filter[1] ne $f || $filter[2] ne $c));
+
+		$languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0;	# unread
+		my $file;
+		$file = $l . "_";
+		$file .= $f . "_" if ($f ne "x");
+		$file .= $c;
+		if (!open(FIN, "$DIR/posix/$file.$DEFENCODING.src")) {
+			if (!defined $languages{$l}{$f}{fallback}) {
+				print STDERR
+				    "Cannot open $file.$DEFENCODING.src\n";
+				next;
+			}
+			$file = $languages{$l}{$f}{fallback};
+			if (!open(FIN, "$DIR/posix/$file.$DEFENCODING.src")) {
+				print STDERR
+				    "Cannot open fallback " .
+				    "$file.$DEFENCODING.src\n";
+				next;
+			}
+		}
+		print "Reading from $file.$DEFENCODING.src for ${l}_${f}_${c}\n";
+		$languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1;	# read
+		my @lines = <FIN>;
+		chomp(@lines);
+		close(FIN);
+		my $continue = 0;
+		foreach my $k (keys(%keys)) {
+			foreach my $line (@lines) {
+				$line =~ s/\r//;
+				next if (!$continue && $line !~ /^$k\s/);
+				if ($continue) {
+					$line =~ s/^\s+//;
+				} else {
+					$line =~ s/^$k\s+//;
+				}
+
+				$values{$l}{$c}{$k} = ""
+					if (!defined $values{$l}{$c}{$k});
+
+				$continue = ($line =~ /\/$/);
+				$line =~ s/\/$// if ($continue);
+				$values{$l}{$c}{$k} .= $line;
+
+				last if (!$continue);
+			}
+		}
+	}
+	}
+	}
+}
+
+sub decodecldr {
+	my $s = shift;
+	my $v = $cm{$s};
+
+	return pack("C", hex($v)) if (length($v) == 2);
+	return pack("CC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2)))
+		if (length($v) == 4);
+	return pack("CCC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2)),
+	    hex(substr($v, 4, 2))) if (length($v) == 6);
+	return "length = " . length($v);
+}
+
+sub translate {
+	my $enc = shift;
+	my $v = shift;
+
+	return $translations{$enc}{$v} if (defined $translations{$enc}{$v});
+	return undef;
+}
+
+sub print_fields {
+	foreach my $l (sort keys(%languages)) {
+	foreach my $f (sort keys(%{$languages{$l}})) {
+	foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
+		next if ($#filter == 2 && ($filter[0] ne $l
+		    || $filter[1] ne $f || $filter[2] ne $c));
+		foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
+			if ($languages{$l}{$f}{data}{$c}{$DEFENCODING} eq "0") {
+				print "Skipping ${l}_" .
+				    ($f eq "x" ? "" : "${f}_") .
+				    "${c} - not read\n";
+				next;
+			}
+			my $file = $l;
+			$file .= "_" . $f if ($f ne "x");
+			$file .= "_" . $c;
+			print "Writing to $file in $enc\n";
+
+			eval {
+				$convertors{$enc} =
+				    Text::Iconv->new($DEFENCODING, $enc);
+			} if (!defined $convertors{$enc});
+			if (!defined $convertors{$enc}) {
+				print "Failed! Cannot convert between " .
+				    "$DEFENCODING and $enc.\n";
+				next;
+			};
+
+			open(FOUT, ">$TYPE/$file.$enc.new");
+			my $okay = 1;
+			my $output = "";
+			print FOUT <<EOF;
+# \$FreeBSD\$
+#
+# Warning: Do not edit. This file is automatically generated from the
+# tools in /usr/src/tools/tools/locale. The data is obtained from the
+# CLDR project, obtained from http://cldr.unicode.org/
+#
+# ${l}_$c in $enc
+#
+# -----------------------------------------------------------------------------
+EOF
+			foreach my $k (keys(%keys)) {
+				my $f = $keys{$k};
+
+				die("Unknown $k in \%DESC")
+					if (!defined $DESC{$k});
+
+				$output .= "#\n# $DESC{$k}\n";
+
+				if ($f =~ /^>/) {
+					$k = substr($f, 1);
+					$f = $keys{$k};
+				}
+				if ($f =~ /^\</) {
+					my @a = split(/\</, substr($f, 1));
+					my $rv =
+					    &{$callback{$a[0]}}($values{$l}{$c}{$a[1]});
+					$values{$l}{$c}{$k} = $rv;
+					$f = $a[2];
+				}
+
+				my $v = $values{$l}{$c}{$k};
+				$v = "undef" if (!defined $v);
+
+				if ($f eq "i") {
+					$output .= "$v\n";
+					next;
+				}
+				if ($f eq "ai") {
+					$output .= "$v\n";
+					next;
+				}
+				if ($f eq "s") {
+					$v =~ s/^"//;
+					$v =~ s/"$//;
+					my $cm = "";
+					while ($v =~ /^(.*?)(<.*?>)(.*)/) {
+						$cm = $2;
+						$v = $1 . decodecldr($2) . $3;
+					}
+					my $fv =
+					    $convertors{$enc}->convert("$v");
+					$fv = translate($enc, $cm)
+						if (!defined $fv);
+					if (!defined $fv) {
+						print STDERR 
+						    "Could not convert $k " .
+						    "($cm) from $DEFENCODING " .
+						    "to $enc\n";
+						$okay = 0;
+						next;
+					}
+					$output .= "$fv\n";
+					next;
+				}
+				if ($f eq "as") {
+					foreach my $v (split(/;/, $v)) {
+						$v =~ s/^"//;
+						$v =~ s/"$//;
+						my $cm = "";
+						while ($v =~ /^(.*?)(<.*?>)(.*)/) {
+							$cm = $2;
+							$v = $1 .
+							    decodecldr($2) . $3;
+						}
+						my $fv =
+						    $convertors{$enc}->convert("$v");
+						$fv = translate($enc, $cm)
+							if (!defined $fv);
+						if (!defined $fv) {
+							print STDERR
+							    "Could not " .
+							    "convert $k ($cm)" .
+							    " from " .
+							    "$DEFENCODING to " .
+							    "$enc\n";
+							$okay = 0;
+							next;
+						}
+						$output .= "$fv\n";
+					}
+					next;
+				}
+
+				die("$k is '$f'");
+
+			}
+
+			$languages{$l}{$f}{data}{$c}{$enc} = sha1_hex($output);
+			$hashtable{sha1_hex($output)}{"${l}_${f}_${c}.$enc"} = 1;
+			print FOUT "$output# EOF\n";
+			close(FOUT);
+
+			if ($okay) {
+				rename("$TYPE/$file.$enc.new",
+				    "$TYPE/$file.$enc.src");
+			} else {
+				rename("$TYPE/$file.$enc.new",
+				    "$TYPE/$file.$enc.failed");
+			}
+		}
+	}
+	}
+	}
+}
+
+sub make_makefile {
+	return if ($#filter > -1);
+	print "Creating Makefile for $TYPE\n";
+	open(FOUT, ">$TYPE/Makefile");
+	print FOUT <<EOF;
+#
+# \$FreeBSD\$
+#
+# Warning: Do not edit. This file is automatically generated from the
+# tools in /usr/src/tools/tools/locale.
+# 
+
+LOCALEDIR=	/usr/share/locale
+FILESNAME=	$FILESNAMES{$TYPE}
+.SUFFIXES:	.src .out
+
+.src.out:
+	grep -v '^\#' < \${.IMPSRC} > \${.TARGET}
+
+EOF
+
+	foreach my $hash (keys(%hashtable)) {
+		my @files = sort(keys(%{$hashtable{$hash}}));
+		if ($#files > 0) {
+			my $link = shift(@files);
+			$link =~ s/_x_/_/;	# strip family if none there
+			foreach my $file (@files) {
+				my @a = split(/_/, $file);
+				my @b = split(/\./, $a[-1]);
+				$file =~ s/_x_/_/;
+				print FOUT "SAME+=\t\t$link:$file\t#hash\n";
+				undef($languages{$a[0]}{$a[1]}{data}{$b[0]}{$b[1]});
+			}
+		}
+	}
+
+	foreach my $l (sort keys(%languages)) {
+	foreach my $f (sort keys(%{$languages{$l}})) {
+	foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
+		next if ($#filter == 2 && ($filter[0] ne $l
+		    || $filter[1] ne $f || $filter[2] ne $c));
+		foreach my $e (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
+			my $file = $l . "_";
+			$file .= $f . "_" if ($f ne "x");
+			$file .= $c;
+			next if (!defined $languages{$l}{$f}{data}{$c}{$e});
+			print FOUT "LOCALES+=\t$file.$e\n";
+		}
+
+		if (defined $languages{$l}{$f}{link}) {
+			foreach my $e (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
+				my $file = $l . "_";
+				$file .= $f . "_" if ($f ne "x");
+				$file .= $c;
+				print FOUT "SAME+=\t\t$file.$e:$languages{$l}{$f}{link}.$e\t# legacy\n";
+				
+			}
+			
+		}
+
+	}
+	}
+	}
+
+	print FOUT <<EOF;
+
+FILES=		\${LOCALES:S/\$/.out/}
+CLEANFILES=	\${FILES}
+
+.for f in \${SAME}
+SYMLINKS+=	../\${f:C/:.*\$//}/\${FILESNAME} \${LOCALEDIR}/\${f:C/^.*://}
+.endfor
+
+.for f in \${LOCALES}
+FILESDIR_\${f}.out= \${LOCALEDIR}/\${f}
+.endfor
+
+
+src:
+	./cldr2def.pl /home/edwin/cldr/1.7.0/ charmaps.xml timedef nl_NL
+
+.include <bsd.prog.mk>
+EOF
+
+	close(FOUT);
+}


More information about the svn-src-user mailing list