svn commit: r195954 - user/edwin/locale/tools

Edwin Groothuis edwin at FreeBSD.org
Wed Jul 29 21:54:36 UTC 2009


Author: edwin
Date: Wed Jul 29 21:54:34 2009
New Revision: 195954
URL: http://svn.freebsd.org/changeset/base/195954

Log:
  Add small tool to convert UTF-8 encoded strings back into CLDR
  "markup" language.

Added:
  user/edwin/locale/tools/UTF82encoding.pl   (contents, props changed)

Added: user/edwin/locale/tools/UTF82encoding.pl
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ user/edwin/locale/tools/UTF82encoding.pl	Wed Jul 29 21:54:34 2009	(r195954)
@@ -0,0 +1,64 @@
+#!/usr/bin/perl -w
+
+use strict;
+use Data::Dumper;
+
+open(FIN, "$ARGV[0]/posix/UTF-8.cm");
+my @lines = <FIN>;
+chomp(@lines);
+close(FIN);
+
+my %cm = ();
+foreach my $line (@lines) {
+	next if ($line =~ /^#/);
+	next if ($line eq "");
+	next if ($line !~ /^</);
+
+	my @a = split(" ", $line);
+	next if ($#a != 1);
+
+	$a[1] =~ s/\\x//g;
+	$cm{$a[1]} = $a[0];
+}
+
+print Dumper($cm{"4D"}), "\n";
+
+open(FIN, $ARGV[1]);
+ at lines = <FIN>;
+chomp(@lines);
+close(FIN);
+
+foreach my $line (@lines) {
+	if ($line =~ /^#/) {
+		print "$line\n";
+		next;
+	}
+
+	my @l = split(//, $line);
+	for (my $i = 0; $i <= $#l; $i++) {
+		my $hex = sprintf("%X", ord($l[$i]));
+		if (defined $cm{$hex}) {
+			print $cm{$hex};
+			next;
+		}
+
+		$hex = sprintf("%X%X", ord($l[$i]), ord($l[$i + 1]));
+		if (defined $cm{$hex}) {
+			$i += 1;
+			print $cm{$hex};
+			next;
+		}
+
+		$hex = sprintf("%X%X%X",
+		    ord($l[$i]), ord($l[$i + 1]), ord($l[$i + 2 ]));
+		if (defined $cm{$hex}) {
+			$i += 2;
+			print $cm{$hex};
+			next;
+		}
+
+		print "\n--$hex--\n";
+	}
+	print "\n";
+
+}


More information about the svn-src-user mailing list