svn commit: r367476 - in head: include/xlocale lib/libc/locale share/colldef tools/tools/locale tools/tools/locale/tools usr.bin/localedef usr.bin/localedef/bootstrap

Thomas Munro tmunro at FreeBSD.org
Sun Nov 8 02:50:37 UTC 2020


Author: tmunro
Date: Sun Nov  8 02:50:34 2020
New Revision: 367476
URL: https://svnweb.freebsd.org/changeset/base/367476

Log:
  Add collation version support to querylocale(3).
  
  Provide a way to ask for an opaque version string for a locale_t, so
  that potential changes in sort order can be detected.  Similar to
  ICU's ucol_getVersion() and Windows' GetNLSVersionEx(), this API is
  intended to allow databases to detect when text order-based indexes
  might need to be rebuilt.
  
  The CLDR version is extracted from CLDR source data by the Makefile
  under tools/tools/locale, written into the machine-generated Makefile
  under shared/colldef, passed to localedef -V, and then written into
  LC_COLLATE file headers.  The initial version is 34.0.
  tools/tools/locale was recently updated to pull down 35.0, but the
  output hasn't been committed under share/colldef yet, so that will
  provide the first observable change when it happens.  Other versioning
  schemes are possible in future, because the format is unspecified.
  
  Reviewed by:	bapt, 0mp, kib, yuripv (albeit a long time ago)
  Differential Revision:	https://reviews.freebsd.org/D17166

Modified:
  head/include/xlocale/_locale.h
  head/lib/libc/locale/collate.c
  head/lib/libc/locale/collate.h
  head/lib/libc/locale/querylocale.3
  head/lib/libc/locale/xlocale.c
  head/lib/libc/locale/xlocale_private.h
  head/share/colldef/Makefile
  head/tools/tools/locale/Makefile
  head/tools/tools/locale/tools/cldr2def.pl
  head/usr.bin/localedef/bootstrap/bootstrap_xlocale_private.h
  head/usr.bin/localedef/collate.c
  head/usr.bin/localedef/localedef.1
  head/usr.bin/localedef/localedef.c
  head/usr.bin/localedef/localedef.h

Modified: head/include/xlocale/_locale.h
==============================================================================
--- head/include/xlocale/_locale.h	Sun Nov  8 02:46:04 2020	(r367475)
+++ head/include/xlocale/_locale.h	Sun Nov  8 02:50:34 2020	(r367476)
@@ -43,6 +43,7 @@
 #define LC_MESSAGES_MASK (1<<5)
 #define LC_ALL_MASK      (LC_COLLATE_MASK | LC_CTYPE_MASK | LC_MESSAGES_MASK | \
 			  LC_MONETARY_MASK | LC_NUMERIC_MASK | LC_TIME_MASK)
+#define LC_VERSION_MASK  (1<<6)
 #define LC_GLOBAL_LOCALE ((locale_t)-1)
 
 #ifndef _LOCALE_T_DEFINED

Modified: head/lib/libc/locale/collate.c
==============================================================================
--- head/lib/libc/locale/collate.c	Sun Nov  8 02:46:04 2020	(r367475)
+++ head/lib/libc/locale/collate.c	Sun Nov  8 02:50:34 2020	(r367476)
@@ -140,7 +140,9 @@ __collate_load_tables_l(const char *encoding, struct x
 		(void) _close(fd);
 		return (_LDP_ERROR);
 	}
-	if (sbuf.st_size < (COLLATE_STR_LEN + sizeof (info))) {
+	if (sbuf.st_size < (COLLATE_FMT_VERSION_LEN +
+			    XLOCALE_DEF_VERSION_LEN +
+			    sizeof (info))) {
 		(void) _close(fd);
 		errno = EINVAL;
 		return (_LDP_ERROR);
@@ -151,12 +153,14 @@ __collate_load_tables_l(const char *encoding, struct x
 		return (_LDP_ERROR);
 	}
 
-	if (strncmp(TMP, COLLATE_VERSION, COLLATE_STR_LEN) != 0) {
+	if (strncmp(TMP, COLLATE_FMT_VERSION, COLLATE_FMT_VERSION_LEN) != 0) {
 		(void) munmap(map, sbuf.st_size);
 		errno = EINVAL;
 		return (_LDP_ERROR);
 	}
-	TMP += COLLATE_STR_LEN;
+	TMP += COLLATE_FMT_VERSION_LEN;
+	strlcat(table->header.version, TMP, sizeof (table->header.version));
+	TMP += XLOCALE_DEF_VERSION_LEN;
 
 	info = (void *)TMP;
 	TMP += sizeof (*info);

Modified: head/lib/libc/locale/collate.h
==============================================================================
--- head/lib/libc/locale/collate.h	Sun Nov  8 02:46:04 2020	(r367475)
+++ head/lib/libc/locale/collate.h	Sun Nov  8 02:50:34 2020	(r367476)
@@ -53,8 +53,10 @@
 #endif
 
 #define	COLLATE_STR_LEN		24		/* should be 64-bit multiple */
-#define	COLLATE_VERSION		"BSD 1.0\n"
 
+#define	COLLATE_FMT_VERSION_LEN	12
+#define	COLLATE_FMT_VERSION	"BSD 1.0\n"
+
 #define	COLLATE_MAX_PRIORITY	(0x7fffffff)	/* max signed value */
 #define	COLLATE_SUBST_PRIORITY	(0x40000000)	/* bit indicates subst table */
 
@@ -69,7 +71,8 @@
 /*
  * The collate file format is as follows:
  *
- * char		version[COLLATE_STR_LEN];	// must be COLLATE_VERSION
+ * char	fmt_version[COLLATE_FMT_VERSION_LEN];	// must be COLLATE_FMT_VERSION
+ * char	def_version[XLOCALE_DEF_VERSION_LEN];	// NUL-terminated, may be empty
  * collate_info_t	info;			// see below, includes padding
  * collate_char_pri_t	char_data[256];		// 8 bit char values
  * collate_subst_t	subst[*];		// 0 or more substitutions

Modified: head/lib/libc/locale/querylocale.3
==============================================================================
--- head/lib/libc/locale/querylocale.3	Sun Nov  8 02:46:04 2020	(r367475)
+++ head/lib/libc/locale/querylocale.3	Sun Nov  8 02:50:34 2020	(r367476)
@@ -27,12 +27,12 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd May 3, 2013
+.Dd November 8, 2020
 .Dt QUERYLOCALE 3
 .Os
 .Sh NAME
 .Nm querylocale
-.Nd Look up the locale name for a specified category
+.Nd Look up the locale name or version for a specified category
 .Sh LIBRARY
 .Lb libc
 .Sh SYNOPSIS
@@ -40,11 +40,22 @@
 .Ft const char *
 .Fn querylocale "int mask" "locale_t locale"
 .Sh DESCRIPTION
-Returns the name of the locale for the category specified by
+Returns the name or version of the locale for the category specified by
 .Fa mask .
-This possible values for the mask are the same as those in
-.Xr newlocale 3 .
-If more than one bit in the mask is set, the returned value is undefined.
+The possible values for the mask are the same as those in
+.Xr newlocale 3 ,
+when requesting the locale name.
+Specify the bitwise OR of
+.Fa LC_VERSION_MASK
+and another mask value to request a version string.
+Version strings can be compared to detect changes to the locale's definition.
+The structure of the version string is unspecified.
+Currently, version information is only available for
+.Fa LC_COLLATE_MASK ,
+and an empty string is returned for other categories.
+If more than one bit in the mask is set, not counting
+.Fa LC_VERSION_MASK ,
+the returned value is undefined.
 .Sh SEE ALSO
 .Xr duplocale 3 ,
 .Xr freelocale 3 ,
@@ -52,3 +63,12 @@ If more than one bit in the mask is set, the returned 
 .Xr newlocale 3 ,
 .Xr uselocale 3 ,
 .Xr xlocale 3
+.Sh HISTORY
+The
+.Fn querylocale
+function first appeared in
+.Fx 9.1 ,
+and is based on the function of the same name in Darwin.
+.Fa LC_VERSION_MASK
+first appeared in
+.Fx 13.0 .

Modified: head/lib/libc/locale/xlocale.c
==============================================================================
--- head/lib/libc/locale/xlocale.c	Sun Nov  8 02:46:04 2020	(r367475)
+++ head/lib/libc/locale/xlocale.c	Sun Nov  8 02:50:34 2020	(r367476)
@@ -231,6 +231,8 @@ static int dupcomponent(int type, locale_t base, local
 		if (new->components[type]) {
 			strncpy(new->components[type]->locale, src->locale,
 			    ENCODING_LEN);
+			strncpy(new->components[type]->version, src->version,
+			    XLOCALE_DEF_VERSION_LEN);
 		}
 	} else if (base->components[type]) {
 		new->components[type] = xlocale_retain(base->components[type]);
@@ -346,17 +348,24 @@ freelocale(locale_t loc)
 }
 
 /*
- * Returns the name of the locale for a particular component of a locale_t.
+ * Returns the name or version of the locale for a particular component of a
+ * locale_t.
  */
 const char *querylocale(int mask, locale_t loc)
 {
-	int type = ffs(mask) - 1;
+	int type = ffs(mask & ~LC_VERSION_MASK) - 1;
 	FIX_LOCALE(loc);
 	if (type >= XLC_LAST)
 		return (NULL);
-	if (loc->components[type])
-		return (loc->components[type]->locale);
-	return ("C");
+	if (mask & LC_VERSION_MASK) {
+		if (loc->components[type])
+			return (loc->components[type]->version);
+		return ("");
+	} else {
+		if (loc->components[type])
+			return (loc->components[type]->locale);
+		return ("C");
+	}
 }
 
 /*

Modified: head/lib/libc/locale/xlocale_private.h
==============================================================================
--- head/lib/libc/locale/xlocale_private.h	Sun Nov  8 02:46:04 2020	(r367475)
+++ head/lib/libc/locale/xlocale_private.h	Sun Nov  8 02:50:34 2020	(r367476)
@@ -91,6 +91,9 @@ struct xlocale_refcounted {
 	/** Function used to destroy this component, if one is required*/
 	void(*destructor)(void*);
 };
+
+#define XLOCALE_DEF_VERSION_LEN 12
+
 /**
  * Header for a locale component.  All locale components must begin with this
  * header.
@@ -99,6 +102,8 @@ struct xlocale_component {
 	struct xlocale_refcounted header;
 	/** Name of the locale used for this component. */
 	char locale[ENCODING_LEN+1];
+	/** Version of the definition for this component. */
+	char version[XLOCALE_DEF_VERSION_LEN];
 };
 
 /**

Modified: head/share/colldef/Makefile
==============================================================================
--- head/share/colldef/Makefile	Sun Nov  8 02:46:04 2020	(r367475)
+++ head/share/colldef/Makefile	Sun Nov  8 02:50:34 2020	(r367476)
@@ -7,10 +7,13 @@ FILESNAME=	LC_COLLATE
 .SUFFIXES:	.src .LC_COLLATE
 MAPLOC=		${.CURDIR}/../../tools/tools/locale/etc/final-maps
 
+CLDR_VERSION=	"34.0"
+
 .include <bsd.endian.mk>
 
 .src.LC_COLLATE:
 	localedef ${LOCALEDEF_ENDIAN} -D -U -i ${.IMPSRC} \
+	-V ${CLDR_VERSION} \
 	-f ${MAPLOC}/map.${.TARGET:T:R:E:C/@.*//} ${.OBJDIR}/${.IMPSRC:T:R}
 
 LOCALES+=	af_ZA.UTF-8
@@ -227,6 +230,7 @@ FILES+=	$t.LC_COLLATE
 FILESDIR_$t.LC_COLLATE=	${LOCALEDIR}/$t
 $t.LC_COLLATE: ${.CURDIR}/$f.src
 	localedef ${LOCALEDEF_ENDIAN} -D -U -i ${.ALLSRC} \
+	-V ${CLDR_VERSION} \
 		-f ${MAPLOC}/map.${.TARGET:T:R:E:C/@.*//} \
 		${.OBJDIR}/${.TARGET:T:R}
 .endfor

Modified: head/tools/tools/locale/Makefile
==============================================================================
--- head/tools/tools/locale/Makefile	Sun Nov  8 02:46:04 2020	(r367475)
+++ head/tools/tools/locale/Makefile	Sun Nov  8 02:50:34 2020	(r367476)
@@ -187,6 +187,8 @@ extract-${CLDRFILES_${N}:T}:: ${CLDRFILES_${N}:T} ${UN
 	cd ${UNIDIR} && unzip -o ../${CLDRFILES_${N}:T}
 extract: extract-${CLDRFILES_${N}:T}
 .endfor
+	grep 'name="version"' ${UNIDIR}/tools/build.xml | \
+		sed 's/.* value="//;s/".*//' > ${UNIDIR}/cldr-version
 patch::
 .if exists(${PATCHDIR})
 	cd ${UNIDIR} && cat ${PATCHDIR}/patch-* | patch

Modified: head/tools/tools/locale/tools/cldr2def.pl
==============================================================================
--- head/tools/tools/locale/tools/cldr2def.pl	Sun Nov  8 02:46:04 2020	(r367475)
+++ head/tools/tools/locale/tools/cldr2def.pl	Sun Nov  8 02:50:34 2020	(r367476)
@@ -50,6 +50,8 @@ my $UNIDIR = undef;
 my $ETCDIR = undef;
 my $TYPE = undef;
 
+my $CLDR_VERSION = undef;
+
 my $result = GetOptions (
 		"unidir=s"	=> \$UNIDIR,
 		"etc=s"		=> \$ETCDIR,
@@ -500,6 +502,12 @@ EOF
 
 
 sub transform_collation {
+	# Read the CLDR version
+	open(FIN, "$UNIDIR/cldr-version") or die "Cannot open cldr-version";
+	read FIN, $CLDR_VERSION, -s FIN;
+	close(FIN);
+	$CLDR_VERSION =~ s/\s*$//;
+
 	foreach my $l (sort keys(%languages)) {
 	foreach my $f (sort keys(%{$languages{$l}})) {
 	foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
@@ -861,8 +869,11 @@ sub make_makefile {
 	my $SRCOUT4 = "";
 	my $MAPLOC;
 	if ($TYPE eq "colldef") {
+		# In future, we might want to try to put the CLDR version into
+		# the .src files with some new syntax, instead of the makefile.
 		$SRCOUT = "localedef \${LOCALEDEF_ENDIAN} -D -U " .
 			"-i \${.IMPSRC} \\\n" .
+			"\t-V \${CLDR_VERSION} \\\n" .
 			"\t-f \${MAPLOC}/map.\${.TARGET:T:R:E:C/@.*//} " .
 			"\${.OBJDIR}/\${.IMPSRC:T:R}";
 		$MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" .
@@ -875,6 +886,7 @@ sub make_makefile {
 			"\$t.LC_COLLATE: \${.CURDIR}/\$f.src\n" .
 			"\tlocaledef \${LOCALEDEF_ENDIAN} -D -U " .
 			"-i \${.ALLSRC} \\\n" .
+			"\t-V \${CLDR_VERSION} \\\n" .
 			"\t\t-f \${MAPLOC}/map.\${.TARGET:T:R:E:C/@.*//} \\\n" .
 			"\t\t\${.OBJDIR}/\${.TARGET:T:R}\n" .
 			".endfor\n\n";
@@ -916,6 +928,13 @@ FILESNAME=	$FILESNAMES{$TYPE}
 .SUFFIXES:	.src .${SRCOUT2}
 ${MAPLOC}
 EOF
+
+	if ($TYPE eq "colldef") {
+		print FOUT <<EOF;
+CLDR_VERSION=	"${CLDR_VERSION}"
+
+EOF
+	}
 
 	if ($TYPE eq "colldef" || $TYPE eq "ctypedef") {
 		print FOUT <<EOF;

Modified: head/usr.bin/localedef/bootstrap/bootstrap_xlocale_private.h
==============================================================================
--- head/usr.bin/localedef/bootstrap/bootstrap_xlocale_private.h	Sun Nov  8 02:46:04 2020	(r367475)
+++ head/usr.bin/localedef/bootstrap/bootstrap_xlocale_private.h	Sun Nov  8 02:50:34 2020	(r367476)
@@ -51,4 +51,7 @@ struct localedef_bootstrap_xlocale_component {
 	char unused;
 };
 
+/* This must agree with the definition in xlocale_private.h. */
+#define XLOCALE_DEF_VERSION_LEN 12
+
 #endif /* _LOCALDEF_BOOTSTRAP_XLOCALE_PRIVATE_H */

Modified: head/usr.bin/localedef/collate.c
==============================================================================
--- head/usr.bin/localedef/collate.c	Sun Nov  8 02:46:04 2020	(r367475)
+++ head/usr.bin/localedef/collate.c	Sun Nov  8 02:50:34 2020	(r367476)
@@ -1119,7 +1119,8 @@ dump_collate(void)
 	collelem_t		*ce;
 	collchar_t		*cc;
 	subst_t			*sb;
-	char			vers[COLLATE_STR_LEN];
+	char			fmt_version[COLLATE_FMT_VERSION_LEN];
+	char			def_version[XLOCALE_DEF_VERSION_LEN];
 	collate_char_t		chars[UCHAR_MAX + 1];
 	collate_large_t		*large;
 	collate_subst_t		*subst[COLL_WEIGHTS_MAX];
@@ -1160,8 +1161,11 @@ dump_collate(void)
 	}
 
 	(void) memset(&chars, 0, sizeof (chars));
-	(void) memset(vers, 0, COLLATE_STR_LEN);
-	(void) strlcpy(vers, COLLATE_VERSION, sizeof (vers));
+	(void) memset(fmt_version, 0, COLLATE_FMT_VERSION_LEN);
+	(void) strlcpy(fmt_version, COLLATE_FMT_VERSION, sizeof (fmt_version));
+	(void) memset(def_version, 0, XLOCALE_DEF_VERSION_LEN);
+	if (version)
+		(void) strlcpy(def_version, version, sizeof (def_version));
 
 	/*
 	 * We need to make sure we arrange for the UNDEFINED field
@@ -1301,7 +1305,8 @@ dump_collate(void)
 	collinfo.chain_count = htote(chain_count);
 	collinfo.large_count = htote(large_count);
 
-	if ((wr_category(vers, COLLATE_STR_LEN, f) < 0) ||
+	if ((wr_category(fmt_version, COLLATE_FMT_VERSION_LEN, f) < 0) ||
+	    (wr_category(def_version, XLOCALE_DEF_VERSION_LEN, f) < 0) ||
 	    (wr_category(&collinfo, sizeof (collinfo), f) < 0) ||
 	    (wr_category(&chars, sizeof (chars), f) < 0)) {
 		return;

Modified: head/usr.bin/localedef/localedef.1
==============================================================================
--- head/usr.bin/localedef/localedef.1	Sun Nov  8 02:46:04 2020	(r367475)
+++ head/usr.bin/localedef/localedef.1	Sun Nov  8 02:50:34 2020	(r367476)
@@ -33,7 +33,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd October 18, 2018
+.Dd November 8, 2020
 .Dt LOCALEDEF 1
 .Os
 .Sh NAME
@@ -135,6 +135,14 @@ If not supplied, then default screen widths will be as
 generally not account for East Asian encodings requiring more than a single
 character cell to display, nor for combining or accent marks that occupy
 no additional screen width.
+.It Fl V Ar version
+Specifies a version string describing the version of the locale definition.
+This string can be retrieved with
+.Xr querylocale 3 ,
+and is intended to allow applications to detect locale definition changes.
+Currently it is stored only for the
+.Sy LC_COLLATE
+category.
 .El
 .Pp
 The following operands are required:
@@ -198,6 +206,7 @@ If an error is detected, no permanent output will be c
 .Xr locale 1 ,
 .Xr iconv_open 3 ,
 .Xr nl_langinfo 3 ,
+.Xr querylocale 3 ,
 .Xr strftime 3 ,
 .Xr environ 7
 .Sh WARNINGS

Modified: head/usr.bin/localedef/localedef.c
==============================================================================
--- head/usr.bin/localedef/localedef.c	Sun Nov  8 02:46:04 2020	(r367475)
+++ head/usr.bin/localedef/localedef.c	Sun Nov  8 02:50:34 2020	(r367476)
@@ -48,6 +48,7 @@ __FBSDID("$FreeBSD$");
 #include <limits.h>
 #include <locale.h>
 #include <dirent.h>
+#include "collate.h"
 #include "localedef.h"
 #include "parser.h"
 
@@ -62,6 +63,7 @@ int undefok = 0;
 int warnok = 0;
 static char *locname = NULL;
 static char locpath[PATH_MAX];
+char *version = NULL;
 
 const char *
 category_name(void)
@@ -253,6 +255,7 @@ usage(void)
 	(void) fprintf(stderr, "  -u encoding : assume encoding\n");
 	(void) fprintf(stderr, "  -w widths   : use screen widths file\n");
 	(void) fprintf(stderr, "  -i locsrc   : source file for locale\n");
+	(void) fprintf(stderr, "  -V version  : version string for locale\n");
 	exit(4);
 }
 
@@ -279,7 +282,7 @@ main(int argc, char **argv)
 
 	(void) setlocale(LC_ALL, "");
 
-	while ((c = getopt(argc, argv, "blw:i:cf:u:vUD")) != -1) {
+	while ((c = getopt(argc, argv, "blw:i:cf:u:vUDV:")) != -1) {
 		switch (c) {
 		case 'D':
 			bsd = 1;
@@ -314,6 +317,9 @@ main(int argc, char **argv)
 		case '?':
 			usage();
 			break;
+		case 'V':
+			version = optarg;
+			break;
 		}
 	}
 
@@ -323,6 +329,11 @@ main(int argc, char **argv)
 	locname = argv[argc - 1];
 	if (verbose) {
 		(void) printf("Processing locale %s.\n", locname);
+	}
+
+	if (version && strlen(version) >= XLOCALE_DEF_VERSION_LEN) {
+		(void) fprintf(stderr, "Version string too long.\n");
+		exit(1);
 	}
 
 	if (cfname) {

Modified: head/usr.bin/localedef/localedef.h
==============================================================================
--- head/usr.bin/localedef/localedef.h	Sun Nov  8 02:46:04 2020	(r367475)
+++ head/usr.bin/localedef/localedef.h	Sun Nov  8 02:50:34 2020	(r367476)
@@ -55,6 +55,8 @@ extern int undefok;	/* mostly ignore undefined symbols
 extern int warnok;
 extern int warnings;
 
+extern char *version;
+
 int yylex(void);
 void yyerror(const char *);
 _Noreturn void errf(const char *, ...) __printflike(1, 2);


More information about the svn-src-head mailing list