svn commit: r196757 - user/edwin/locale/cldr/tools
Edwin Groothuis
edwin at FreeBSD.org
Wed Sep 2 09:53:32 UTC 2009
Author: edwin
Date: Wed Sep 2 09:53:32 2009
New Revision: 196757
URL: http://svn.freebsd.org/changeset/base/196757
Log:
Make sure only highascii and A-Za-z0-9 gets translated.
Modified:
user/edwin/locale/cldr/tools/UTF82encoding.pl
Modified: user/edwin/locale/cldr/tools/UTF82encoding.pl
==============================================================================
--- user/edwin/locale/cldr/tools/UTF82encoding.pl Wed Sep 2 09:52:26 2009 (r196756)
+++ user/edwin/locale/cldr/tools/UTF82encoding.pl Wed Sep 2 09:53:32 2009 (r196757)
@@ -3,6 +3,11 @@
use strict;
use Data::Dumper;
+if ($#ARGV != 1) {
+ print "Usage: $0 <cldr dir> <input file>\n";
+ exit;
+}
+
open(FIN, "$ARGV[0]/posix/UTF-8.cm");
my @lines = <FIN>;
chomp(@lines);
@@ -18,11 +23,10 @@ foreach my $line (@lines) {
next if ($#a != 1);
$a[1] =~ s/\\x//g;
- $cm{$a[1]} = $a[0];
+ $a[0] =~ s/_/ /g;
+ $cm{$a[1]} = $a[0] if (!defined $cm{$a[1]});
}
-print Dumper($cm{"4D"}), "\n";
-
open(FIN, $ARGV[1]);
@lines = <FIN>;
chomp(@lines);
@@ -37,6 +41,16 @@ foreach my $line (@lines) {
my @l = split(//, $line);
for (my $i = 0; $i <= $#l; $i++) {
my $hex = sprintf("%X", ord($l[$i]));
+
+ if (( $l[$i] gt "\x20")
+ && ($l[$i] lt "a" || $l[$i] gt "z")
+ && ($l[$i] lt "A" || $l[$i] gt "Z")
+ && ($l[$i] lt "0" || $l[$i] gt "9")
+ && ($l[$i] lt "\x80")) {
+ print $l[$i];
+ next;
+ }
+
if (defined $cm{$hex}) {
print $cm{$hex};
next;
More information about the svn-src-user
mailing list