git: fb25fdcaa34f - main - locales: only generate unicode locales
Baptiste Daroussin
bapt at FreeBSD.org
Fri Jul 23 15:07:02 UTC 2021
The branch main has been updated by bapt:
URL: https://cgit.FreeBSD.org/src/commit/?id=fb25fdcaa34f35a4c984b2da12f251fce3d75b0a
commit fb25fdcaa34f35a4c984b2da12f251fce3d75b0a
Author: Baptiste Daroussin <bapt at FreeBSD.org>
AuthorDate: 2021-07-23 14:10:24 +0000
Commit: Baptiste Daroussin <bapt at FreeBSD.org>
CommitDate: 2021-07-23 14:58:20 +0000
locales: only generate unicode locales
---
tools/tools/locale/Makefile | 82 ++++++------------------------------
tools/tools/locale/etc/charmaps.xml | 47 ---------------------
tools/tools/locale/tools/cldr2def.pl | 35 ---------------
tools/tools/locale/tools/finalize | 34 +--------------
4 files changed, 14 insertions(+), 184 deletions(-)
diff --git a/tools/tools/locale/Makefile b/tools/tools/locale/Makefile
index 92f890b2f4d3..0efca83a971e 100644
--- a/tools/tools/locale/Makefile
+++ b/tools/tools/locale/Makefile
@@ -32,33 +32,9 @@ tools-test:
KNOWN= monetdef numericdef msgdef colldef ctypedef # timedef
TYPES?= ${KNOWN}
-COLLATION_SPECIAL?= \
- cs_CZ ISO8859-2 \
- da_DK ISO8859-1 \
- da_DK ISO8859-15 \
- hr_HR ISO8859-2 \
- hu_HU ISO8859-2 \
- nb_NO ISO8859-1 \
- nb_NO ISO8859-15 \
- sk_SK ISO8859-2 \
- sr_Latn_RS ISO8859-2 \
- sr_Cyrl_RS ISO8859-5 \
- zh_Hans_CN GB2312 \
- zh_Hans_CN eucCN \
- zh_Hant_TW Big5 \
- zh_Hans_CN GB18030 \
- zh_Hans_CN GBK \
- ja_JP eucJP \
- nn_NO ISO8859-15 \
- nn_NO ISO8859-1
-
-.for area enc in ${COLLATION_SPECIAL}
-COLLATIONS_SPECIAL_ENV+= ${area}.${enc}
-.endfor
SETENV= env -i \
PATH="${PATH}" \
TMPDIR="${TMPDIR}" \
- COLLATIONS_SPECIAL="${COLLATIONS_SPECIAL_ENV}" \
UNIDIR="${UNIDIR}" \
BASEDIR="${BASEDIR}" \
TOOLSDIR="${TOOLSDIR}" \
@@ -89,16 +65,22 @@ diff-${t}:
.endfor
install:
-.for t in ${TYPES}
+.for t in ${TYPES:Nctypedef}
. if ${KNOWN:M${t}}
install: install-${t}
install-${t}:
- cd ${LOCALESRCDIR}/${t} && \
+ cd ${LOCALESRCDIR}/${t}_unicode && \
rm -f Makefile *.src && \
cd ${.OBJDIR} && \
- install -m 644 ${t}/* ${LOCALESRCDIR}/${t}
+ install -m 644 ${t}/* ${LOCALESRCDIR}/${t}_unicode
. endif
.endfor
+install: install-ctypedef
+install-ctypedef:
+ cd ${LOCALESRCDIR}/ctypedef && \
+ rm -f C.UTF-8.src && \
+ cd ${.OBJDIR} && \
+ install -m 644 ctypedef/C.UTF-8.src ${LOCALESRCDIR}/ctypedef
post-install:
.for t in ${TYPES}
@@ -121,15 +103,6 @@ build-${t}: ${t}
${SETENV} OUTBASEDIR="${.OBJDIR}/${t}" ${TOOLSDIR}/finalize ${t}
.endfor
-static-colldef: colldef
-build-colldef: static-colldef
-
-static-colldef:
-.for area enc in ${COLLATION_SPECIAL}
- awk -f ${TOOLSDIR}/extract-colldef.awk \
- posix/${area}.${enc}.src > colldef.draft/${area}.${enc}.src
-.endfor
-
BASE_LOCALES_OF_INTEREST?= \
af_ZA am_ET ar_AE ar_EG ar_JO ar_MA ar_QA ar_SA \
be_BY bg_BG ca_AD ca_ES ca_FR ca_IT \
@@ -147,35 +120,14 @@ BASE_LOCALES_OF_INTEREST?= \
th_TH lo_LA bo_IN my_MM pa_Guru_IN ka_GE chr_US \
km_KH shi_Tfng_MA ii_CN vai_Vaii_LR vi_VN
-ENCODINGS= Big5 \
- CP1251 \
- CP866 \
- CP949 \
- eucCN \
- eucJP \
- eucKR \
- GB18030 \
- GB2312 \
- GBK \
- ISO8859-1 \
- ISO8859-13 \
- ISO8859-15 \
- ISO8859-2 \
- ISO8859-5 \
- ISO8859-7 \
- ISO8859-9 \
- KOI8-R \
- KOI8-U \
- SJIS \
- US-ASCII \
- UTF-8 \
+ENCODINGS= UTF-8 \
UTF-32
# CLDR files
CLDRFILES_CORE= https://unicode.org/Public/cldr/35/core.zip
CLDRFILES_KEY= https://unicode.org/Public/cldr/35/keyboards.zip
CLDRFILES_TOOLS=https://unicode.org/Public/cldr/35/tools.zip
-CLDRFILES_UCD= http://www.unicode.org/Public/zipped/latest/UCD.zip
+CLDRFILES_UCD= http://www.unicode.org/Public/zipped/13.0.0/UCD.zip
# fetch and extract targets
${UNIDIR}:
@@ -206,8 +158,8 @@ build-tools:
JAVA_CLDR= java -DCLDR_DIR=${UNIDIR:Q} -jar ${UNIDIR}/tools/java/cldr.jar
-posix: posixcm post-posixcm posixsrc posixcol
-.ORDER: posixcm post-posixcm posixsrc posixcol
+posix: posixcm post-posixcm posixsrc
+.ORDER: posixcm post-posixcm posixsrc
${UNIDIR}/posix:
ln -s -f ../posix ${.TARGET}
clean-posix:
@@ -232,14 +184,6 @@ posix/${area}.UTF-8.src:
${JAVA_CLDR} org.unicode.cldr.posix.GeneratePOSIX \
-d posix -m ${area} -c UTF-8
.endfor
-.for area encoding in ${COLLATION_SPECIAL}
-posixcol: build-tools posix/${area}.${encoding}.src
-.ORDER: build-tools posix/${area}.${encoding}.src
-posix/${area}.${encoding}.src:
- mkdir -p posix && \
- ${JAVA_CLDR} org.unicode.cldr.posix.GeneratePOSIX \
- -d posix -m ${area} -c ${encoding}
-.endfor
# generate widths.txt using the data from libut8proc
GETWIDTHS=${TOOLSDIR}/getwidths
diff --git a/tools/tools/locale/etc/charmaps.xml b/tools/tools/locale/etc/charmaps.xml
index 52e80f2dee05..9d42b1e5247c 100644
--- a/tools/tools/locale/etc/charmaps.xml
+++ b/tools/tools/locale/etc/charmaps.xml
@@ -28,169 +28,122 @@
-->
<language name="af"
- encoding="ISO8859-1 ISO8859-15"
countries="ZA" />
<language name="am"
countries="ET" /> <!-- UTF-8 only -->
<language name="ar"
countries="AE EG JO MA QA SA" />
<language name="be"
- encoding="CP1131 CP1251 ISO8859-5"
countries="BY" />
<language name="bg"
- encoding="CP1251"
countries="BG" />
<language name="ca"
fallback="ca_ES"
- encoding="ISO8859-1 ISO8859-15"
countries="AD ES FR IT" /> <!-- only ca_ES defined -->
<language name="cs"
- encoding="ISO8859-2"
countries="CZ" />
<language name="da"
- encoding="ISO8859-1 ISO8859-15"
countries="DK" />
<language name="de"
- encoding="ISO8859-1 ISO8859-15"
countries="AT CH DE" />
<language name="el"
- encoding="ISO8859-7"
countries="GR" />
<language name="en"
- encoding="ISO8859-1 ISO8859-15 US-ASCII"
countries="GB" />
<language name="en"
- encoding="ISO8859-1 ISO8859-15 US-ASCII"
countries="AU CA NZ US ZA" />
<language name="en"
- encoding="ISO8859-1 ISO8859-15"
countries="IE" />
<language name="en"
- encoding="ISO8859-1"
countries="HK SG" />
<language name="en"
countries="PH" /> <!-- UTF-8 only -->
<language name="es"
countries="CR" /> <!-- UTF-8 only -->
<language name="es"
- encoding="ISO8859-1 ISO8859-15"
countries="ES" />
<language name="es"
- encoding="ISO8859-1"
countries="AR MX" />
<language name="et"
- encoding="ISO8859-1 ISO8859-15"
countries="EE" />
<language name="eu"
- encoding="ISO8859-1 ISO8859-15"
countries="ES" />
<language name="fi"
- encoding="ISO8859-1 ISO8859-15"
countries="FI" />
<language name="fr"
- encoding="ISO8859-1 ISO8859-15"
countries="BE CH FR" />
<language name="fr"
- encoding="ISO8859-1 ISO8859-15"
countries="CA" />
<language name="ga"
countries="IE" /> <!-- UTF-8 only -->
<language name="he"
countries="IL" />
<language name="hi"
- encoding="ISCII-DEV"
countries="IN" />
<language name="hr"
- encoding="ISO8859-2"
countries="HR" />
<language name="hu"
- encoding="ISO8859-2"
countries="HU" />
<language name="hy"
- encoding="ARMSCII-8"
countries="AM" />
<language name="is"
- encoding="ISO8859-1 ISO8859-15"
countries="IS" />
<language name="it"
- encoding="ISO8859-1 ISO8859-15"
countries="CH IT" />
<language name="ja"
- encoding="SJIS eucJP"
countries="JP" />
<language name="kk"
countries="KZ" /> <!-- PT154 not available, UTF-8 -->
<language name="ko"
- encoding="eucKR"
- encoding_link="eucKR:CP949"
countries="KR" />
<language name="lt"
- encoding="ISO8859-13"
countries="LT" />
<language name="lv"
- encoding="ISO8859-13"
countries="LV" />
<language name="mn"
countries="MN" />
<language name="nb"
- encoding="ISO8859-1 ISO8859-15"
countries="NO" />
<language name="nl"
- encoding="ISO8859-1 ISO8859-15"
countries="BE NL" />
<language name="nn"
- encoding="ISO8859-1 ISO8859-15"
countries="NO" />
<language name="pl"
- encoding="ISO8859-2"
countries="PL" />
<language name="pt"
- encoding="ISO8859-1 ISO8859-15"
countries="PT" />
<language name="pt"
- encoding="ISO8859-1"
countries="BR" />
<language name="ro"
- encoding="ISO8859-2"
countries="RO" />
<language name="ru"
- encoding="CP1251 CP866 ISO8859-5 KOI8-R"
countries="RU" />
<language name="se"
countries="NO FI" />
<language name="sk"
- encoding="ISO8859-2"
countries="SK" />
<language name="sl"
- encoding="ISO8859-2"
countries="SI" />
<language name="sr"
family="Latn"
- encoding="ISO8859-2"
countries="RS" />
<language name="sr"
family="Cyrl"
- encoding="ISO8859-5"
countries="RS" />
<language name="sv"
- encoding="ISO8859-1 ISO8859-15"
countries="SE FI" />
<language name="tr"
- encoding="ISO8859-9"
countries="TR" />
<language name="uk"
- encoding="CP1251 ISO8859-5 KOI8-U"
countries="UA" />
<language name="zh"
family="Hans"
- encoding="GB18030 GB2312 GBK eucCN"
countries="CN" />
<language name="zh"
family="Hant"
countries="HK" />
<language name="zh"
family="Hant"
- encoding="Big5"
countries="TW" />
</languages>
diff --git a/tools/tools/locale/tools/cldr2def.pl b/tools/tools/locale/tools/cldr2def.pl
index fd475db714a0..70e0bdad525b 100755
--- a/tools/tools/locale/tools/cldr2def.pl
+++ b/tools/tools/locale/tools/cldr2def.pl
@@ -65,7 +65,6 @@ my %values = ();
my %hashtable = ();
my %languages = ();
my %translations = ();
-my %encodings = ();
my %alternativemonths = ();
get_languages();
@@ -74,7 +73,6 @@ $utfmap{'UTF-8'} = {};
$utfmap{'UTF-32'} = {};
get_utfmap("$UNIDIR/posix/$DEFENCODING.cm", $utfmap{'UTF-8'});
get_utfmap("$UNIDIR/posix/UTF-32.cm", $utfmap{'UTF-32'});
-get_encodings("$ETCDIR/charmaps");
my %keys = ();
tie(%keys, "Tie::IxHash");
@@ -384,44 +382,11 @@ sub resolve_enc_addition {
return $ret;
}
-sub get_encodings {
- my $dir = shift;
- foreach my $e (sort(keys(%encodings))) {
- if (!open(FIN, "$dir/$e.TXT")) {
- print "Cannot open charmap for $e\n";
- next;
-
- }
- $encodings{$e} = 1;
- my @lines = <FIN>;
- close(FIN);
- chomp(@lines);
- foreach my $l (@lines) {
- $l =~ s/\r//;
- next if ($l eq "");
-
- my @a = split(" ", $l);
- next if ($#a < 1);
- next if ($a[0] =~ /^\#/ or $a[1] =~ /^\#/);
- next if ($a[0] eq '' or $a[1] eq '');
-
- $a[0] = resolve_enc_addition($a[0]); # local
- $a[1] = resolve_enc_addition($a[1]); # UTF-32
- my $u32 = sprintf("%08X", hex($a[1]));
-# print STDERR "$a[1] => $u32\n";
-
- # Use UTF-32 as the indices.
- $convertors{$e}{$u32} = uc($a[0]);
- }
- }
-}
-
sub get_languages {
my %data = get_xmldata($ETCDIR);
%languages = %{$data{L}};
%translations = %{$data{T}};
%alternativemonths = %{$data{AM}};
- %encodings = %{$data{E}};
}
sub transform_ctypes {
diff --git a/tools/tools/locale/tools/finalize b/tools/tools/locale/tools/finalize
index 88dfcad0cb24..207b97ff3cb5 100755
--- a/tools/tools/locale/tools/finalize
+++ b/tools/tools/locale/tools/finalize
@@ -82,7 +82,6 @@ for i in *_*_*.*.src; do
nname=`echo $oldname | awk '{ split($0, a, "_"); print a[1]"_"a[3]"@"a[2];} '`
mv -f ${oldname}.src ${nname}.src
sed -i '' -e "s/${oldname}/${nname}/g" Makefile
- COLLATIONS_SPECIAL=$(echo ${COLLATIONS_SPECIAL} | sed -e "s/${oldname}/${nname}/g")
done
# For variable without @modifier ambiguity do not keep the @modifier
@@ -95,7 +94,6 @@ for i in *@*.src; do
if [ $(ls ${shortname}@* | wc -l) -eq 1 ] ; then
mv -f $i ${shortname}.src
sed -i '' -e "s/${oldname}/${shortname}/g" Makefile
- COLLATIONS_SPECIAL=$(echo ${COLLATIONS_SPECIAL} | sed -e "s/${oldname}/${shortname}/g")
fi
done
@@ -106,7 +104,6 @@ for i in *@Latn.src; do
fi
mv -f ${i} ${i%@*}@latin.src
sed -i '' -e "s/${i%.*}/${i%@*}@latin/g" Makefile
- COLLATIONS_SPECIAL=$(echo ${COLLATIONS_SPECIAL} | sed -e "s/${i%.*}/${i%@*}@latin/g")
done
for i in *@Cyrl.src; do
@@ -115,7 +112,6 @@ for i in *@Cyrl.src; do
fi
mv -f ${i} ${i%@*}@cyrillic.src
sed -i '' -e "s/${i%.*}/${i%@*}@cyrillic/g" Makefile
- COLLATIONS_SPECIAL=$(echo ${COLLATIONS_SPECIAL} | sed -e "s/${i%.*}/${i%@*}@cyrillic/g")
done
# On locales with multiple modifiers rename the "default" version without the @modifier
@@ -150,30 +146,6 @@ then
/usr/bin/sed -E -e 's/[ ]+/ /g' \
${UNIDIR}/posix/UTF-8.cm \
> ${ETCDIR}/final-maps/map.UTF-8
- /usr/bin/sed -E -e 's/[ ]+/ /g' \
- ${UNIDIR}/posix/eucCN.cm \
- > ${ETCDIR}/final-maps/map.eucCN
- /usr/bin/sed -E -e 's/[ ]+/ /g' \
- ${UNIDIR}/posix/eucCN.cm \
- > ${ETCDIR}/final-maps/map.GB2312
-
- # GB18030 and Big5 are pre-generated from CLDR data
- CHARMAPS="ARMSCII-8 CP1131 CP1251 \
- CP866 GBK ISCII-DEV ISO8859-1 \
- ISO8859-13 ISO8859-15 ISO8859-2 ISO8859-4 \
- ISO8859-5 ISO8859-7 ISO8859-9 KOI8-R KOI8-U \
- PT154 SJIS US-ASCII eucJP eucKR"
-
- for map in ${CHARMAPS}
- do
- encoding=${map}
- env ETCDIR="${ETCDIR}" \
- /usr/local/bin/perl ${TOOLSDIR}/convert_map.pl \
- ${ETCDIR}/charmaps/${map}.TXT ${encoding} \
- | /usr/bin/sed -E -e 's/ +/ /g' \
- > ${ETCDIR}/final-maps/map.${map}
- echo map ${map} converted.
- done
elif [ $1 = "colldef" ]
then
@@ -190,13 +162,9 @@ then
sed -i '' "/^SAME.*$line$/d" ${old}/Makefile
done
echo "" >> ${TEMP4}
- for enc in ${COLLATIONS_SPECIAL}; do
- sed -i '' "/^.*${enc}$/d" ${TEMP4}
- echo "LOCALES+= ${enc}" >> ${TEMP4}
- done
keep=$(cat ${TEMP} | awk '{ print $2 }')
- for original in ${keep} ${COLLATIONS_SPECIAL}
+ for original in ${keep}
do
cp ${old}/${original}.src ${new}/
done
More information about the dev-commits-src-all
mailing list