socsvn commit: r269159 - in soc2014/ghostmansd/src/lib/libc: locale string

ghostmansd at FreeBSD.org ghostmansd at FreeBSD.org
Fri Jun 6 01:11:40 UTC 2014


Author: ghostmansd
Date: Fri Jun  6 01:11:38 2014
New Revision: 269159
URL: http://svnweb.FreeBSD.org/socsvn/?view=rev&rev=269159

Log:
  continue working on collation support

Modified:
  soc2014/ghostmansd/src/lib/libc/locale/collate.c
  soc2014/ghostmansd/src/lib/libc/string/strcoll.c
  soc2014/ghostmansd/src/lib/libc/string/strxfrm.c

Modified: soc2014/ghostmansd/src/lib/libc/locale/collate.c
==============================================================================
--- soc2014/ghostmansd/src/lib/libc/locale/collate.c	Fri Jun  6 00:24:04 2014	(r269158)
+++ soc2014/ghostmansd/src/lib/libc/locale/collate.c	Fri Jun  6 01:11:38 2014	(r269159)
@@ -80,23 +80,26 @@
 destruct_collate(void *t)
 {
 	struct xlocale_collate *table = t;
-	if (__collate_chain_pri_table) {
+
+	if (__collate_chain_pri_table)
 		free(__collate_chain_pri_table);
-	}
 	free(t);
 }
 
 void *
-__collate_load(const char *encoding, locale_t unused)
+__collate_load(const char *encoding, locale_t locale)
 {
-	if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) {
+	struct xlocale_collate *table = NULL;
+
+	(void) locale;
+	if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0)
 		return &__xlocale_C_collate;
-	}
-	struct xlocale_collate *table = calloc(sizeof(struct xlocale_collate), 1);
+	table = calloc(sizeof(struct xlocale_collate), 1);
 	table->header.header.destructor = destruct_collate;
 	// FIXME: Make sure that _LDP_CACHE is never returned.  We should be doing
 	// the caching outside of this section
-	if (__collate_load_tables_l(encoding, table) != _LDP_LOADED) {
+	if (__collate_load_tables_l(encoding, table) != _LDP_LOADED)
+	{
 		xlocale_release(table);
 		return NULL;
 	}
@@ -117,11 +120,16 @@
 int
 __collate_load_tables_l(const char *encoding, struct xlocale_collate *table)
 {
-	FILE *fp;
-	int i, saverr, chains;
-	uint32_t u32;
-	char strbuf[STR_LEN], buf[PATH_MAX];
-	void *TMP_substitute_table, *TMP_char_pri_table, *TMP_chain_pri_table;
+	int i = 0;
+	int error = 0;
+	int chains = 0;
+	FILE *fp = NULL;
+	uint32_t u32 = 0;
+	char buf[PATH_MAX] = {0};
+	char strbuf[STR_LEN] = {0};
+	void *TMP_substitute_table = NULL;
+	void *TMP_char_pri_table = NULL;
+	void *TMP_chain_pri_table = NULL;
 
 	/* 'encoding' must be already checked. */
 	if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) {
@@ -131,17 +139,18 @@
 
 	/* 'PathLocale' must be already set & checked. */
 	/* Range checking not needed, encoding has fixed size */
-	(void)strcpy(buf, _PathLocale);
-	(void)strcat(buf, "/");
-	(void)strcat(buf, encoding);
-	(void)strcat(buf, "/LC_COLLATE");
+	(void) strcpy(buf, _PathLocale);
+	(void) strcat(buf, "/");
+	(void) strcat(buf, encoding);
+	(void) strcat(buf, "/LC_COLLATE");
 	if ((fp = fopen(buf, "re")) == NULL)
 		return (_LDP_ERROR);
 
-	if (fread(strbuf, sizeof(strbuf), 1, fp) != 1) {
-		saverr = errno;
+	if (fread(strbuf, sizeof(strbuf), 1, fp) != 1)
+	{
+		error = errno;
 		(void)fclose(fp);
-		errno = saverr;
+		errno = error;
 		return (_LDP_ERROR);
 	}
 	chains = -1;
@@ -149,60 +158,56 @@
 		chains = 0;
 	else if (strcmp(strbuf, COLLATE_VERSION1_2) == 0)
 		chains = 1;
-	if (chains < 0) {
+	if (chains < 0)
+	{
 		(void)fclose(fp);
 		errno = EFTYPE;
 		return (_LDP_ERROR);
 	}
-	if (chains) {
-		if (fread(&u32, sizeof(u32), 1, fp) != 1) {
-			saverr = errno;
+	if (chains)
+	{
+		if (fread(&u32, sizeof(u32), 1, fp) != 1)
+		{
+			error = errno;
 			(void)fclose(fp);
-			errno = saverr;
+			errno = error;
 			return (_LDP_ERROR);
 		}
-		if ((chains = (int)ntohl(u32)) < 1) {
+		if ((chains = (int)ntohl(u32)) < 1)
+		{
 			(void)fclose(fp);
 			errno = EFTYPE;
 			return (_LDP_ERROR);
 		}
-	} else
+	}
+	else
 		chains = TABLE_SIZE;
 
-	if ((TMP_substitute_table =
-	     malloc(sizeof(__collate_substitute_table))) == NULL) {
-		saverr = errno;
+	TMP_substitute_table = malloc(sizeof(__collate_substitute_table));
+	TMP_char_pri_table = malloc(sizeof(__collate_char_pri_table));
+	TMP_chain_pri_table = malloc(sizeof(*__collate_chain_pri_table) * chains);
+	if ((TMP_substitute_table == NULL)
+	|| (TMP_char_pri_table == NULL)
+	|| (TMP_chain_pri_table == NULL))
+	{
+		error = errno;
 		(void)fclose(fp);
-		errno = saverr;
-		return (_LDP_ERROR);
-	}
-	if ((TMP_char_pri_table =
-	     malloc(sizeof(__collate_char_pri_table))) == NULL) {
-		saverr = errno;
-		free(TMP_substitute_table);
-		(void)fclose(fp);
-		errno = saverr;
-		return (_LDP_ERROR);
-	}
-	if ((TMP_chain_pri_table =
-	     malloc(sizeof(*__collate_chain_pri_table) * chains)) == NULL) {
-		saverr = errno;
 		free(TMP_substitute_table);
 		free(TMP_char_pri_table);
-		(void)fclose(fp);
-		errno = saverr;
+		free(TMP_chain_pri_table);
+		errno = error;
 		return (_LDP_ERROR);
 	}
 
 #define FREAD(a, b, c, d) \
 { \
 	if (fread(a, b, c, d) != c) { \
-		saverr = errno; \
+		error = errno; \
 		free(TMP_substitute_table); \
 		free(TMP_char_pri_table); \
 		free(TMP_chain_pri_table); \
 		(void)fclose(d); \
-		errno = saverr; \
+		errno = error; \
 		return (_LDP_ERROR); \
 	} \
 }
@@ -243,55 +248,179 @@
 		}
 	}
 	table->__collate_load_error = 0;
-
 	return (_LDP_LOADED);
 }
 
-u_char *
-__collate_substitute(struct xlocale_collate *table, const u_char *s)
+static int
+__collate_wcsnlen(const wchar_t *s, int len)
+{
+	int n = 0;
+	while (*s && n < len) {
+		s++;
+		n++;
+	}
+	return n;
+}
+
+wchar_t *
+__collate_substitute(const wchar_t *s, int which, locale_t locale)
 {
-	int dest_len, len, nlen;
-	int delta = strlen(s);
-	u_char *dest_str = NULL;
+	int n = 0;
+	int len = 0;
+	int nlen = 0;
+	int delta = 0;
+	int nsubst = 0;
+	int dest_len = 0;
+	const wchar_t *fp = NULL;
+	wchar_t *dest_str = NULL;
+	struct __collate_st_subst *subst = NULL;
+	struct __collate_st_subst *match = NULL;
 
+	(void) locale;
 	if (s == NULL || *s == '\0')
-		return (__collate_strdup(""));
-	delta += delta / 8;
-	dest_str = malloc(dest_len = delta);
+		return __collate_wcsdup(L"");
+	dest_len = wcslen(s);
+	nsubst = __collate_info->subst_count[which];
+	if (nsubst <= 0)
+		return __collate_wcsdup(s);
+	subst = __collate_substitute_table[which];
+	delta = (dest_len / 4);
+	if (delta < 2)
+		delta = 2;
+	dest_str = (wchar_t *) malloc((dest_len += delta) * sizeof(wchar_t));
 	if (dest_str == NULL)
 		__collate_err(EX_OSERR, __func__);
 	len = 0;
-	while (*s) {
-		nlen = len + strlen(__collate_substitute_table[*s]);
-		if (dest_len <= nlen) {
-			dest_str = reallocf(dest_str, dest_len = nlen + delta);
+	while (*s)
+	{
+		if ((match = substsearch(*s, subst, nsubst)) != NULL)
+		{
+			fp = match->str;
+			n = __collate_wcsnlen(fp, STR_LEN);
+		}
+		else
+		{
+			fp = s;
+			n = 1;
+		}
+		nlen = len + n;
+		if (dest_len <= nlen)
+		{
+			dest_str = reallocf(dest_str, (dest_len = nlen + delta) * sizeof(wchar_t));
 			if (dest_str == NULL)
 				__collate_err(EX_OSERR, __func__);
 		}
-		(void)strcpy(dest_str + len, __collate_substitute_table[*s++]);
-		len = nlen;
+		wcsncpy(dest_str + len, fp, n);
+		len += n;
+		s++;
 	}
-	return (dest_str);
+	dest_str[len] = 0;
+	return dest_str;
+}
+
+static struct __collate_st_chain_pri *
+chainsearch(const wchar_t *key, int *len, locale_t locale)
+{
+	int low = 0;
+	int high = __collate_info->chain_count - 1;
+	int next, compar, l;
+	struct __collate_st_chain_pri *p;
+	struct __collate_st_chain_pri *tab = __collate_chain_pri_table;
+
+	while (low <= high)
+	{
+		next = (low + high) / 2;
+		p = tab + next;
+		compar = *key - *p->str;
+		if (compar == 0)
+		{
+			l = __collate_wcsnlen(p->str, STR_LEN);
+			compar = wcsncmp(key, p->str, l);
+			if (compar == 0)
+			{
+				*len = l;
+				return p;
+			}
+		}
+		if (compar > 0)
+			low = next + 1;
+		else
+			high = next - 1;
+	}
+	return NULL;
+}
+
+static struct __collate_st_large_char_pri *
+largesearch(const wchar_t key, locale_t locale)
+{
+	int low = 0;
+	int high = __collate_info->large_pri_count - 1;
+	int next, compar;
+	struct __collate_st_large_char_pri *p;
+	struct __collate_st_large_char_pri *tab = __collate_large_char_pri_table;
+
+	while (low <= high) {
+		next = (low + high) / 2;
+		p = tab + next;
+		compar = key - p->val;
+		if (compar == 0)
+			return p;
+		if (compar > 0)
+			low = next + 1;
+		else
+			high = next - 1;
+	}
+	return NULL;
 }
 
 void
-__collate_lookup(struct xlocale_collate *table, const u_char *t, int *len, int *prim, int *sec)
+__collate_lookup_l(const wchar_t *t, int *len, int *prim, int *sec, locale_t locale)
 {
 	struct __collate_st_chain_pri *p2;
+	int l;
 
 	*len = 1;
 	*prim = *sec = 0;
-	for (p2 = __collate_chain_pri_table; p2->str[0] != '\0'; p2++) {
-		if (*t == p2->str[0] &&
-		    strncmp(t, p2->str, strlen(p2->str)) == 0) {
-			*len = strlen(p2->str);
-			*prim = p2->prim;
-			*sec = p2->sec;
+	FIX_LOCALE(locale);
+	p2 = chainsearch(t, &l, locale);
+	/* use the chain if prim >= 0 */
+	if (p2 && p2->pri[0] >= 0)
+	{
+		*len = l;
+		*prim = p2->pri[0];
+		*sec = p2->pri[1];
+		return;
+	}
+	if (*t <= UCHAR_MAX)
+	{
+		*prim = __collate_char_pri_table[*t].pri[0];
+		*sec = __collate_char_pri_table[*t].pri[1];
+		return;
+	}
+	if (__collate_info->large_pri_count > 0)
+	{
+		struct __collate_st_large_char_pri *match;
+		match = largesearch(*t, locale);
+		if (match)
+		{
+			*prim = match->pri.pri[0];
+			*sec = match->pri.pri[1];
 			return;
 		}
 	}
-	*prim = __collate_char_pri_table[*t].prim;
-	*sec = __collate_char_pri_table[*t].sec;
+	*prim = (l = __collate_info->undef_pri[0]) >= 0 ? l : *t - l;
+	*sec = (l = __collate_info->undef_pri[1]) >= 0 ? l : *t - l;
+}
+
+void
+__collate_lookup(const wchar_t *t, int *len, int *prim, int *sec)
+{
+	int error = 0;
+	locale_t locale = __get_locale();
+	wchar_t *wcs = __collate_mbstowcs((const char *)t, locale);
+
+	__collate_lookup_l(wcs, len, prim, sec, locale);
+	free(wcs);
 }
 
 wchar_t *
@@ -316,6 +445,119 @@
 	return wcs;
 }
 
+wchar_t *
+__collate_wcsdup(wchar_t *s)
+{
+        wchar_t *t = wcsdup(s);
+
+        if (t == NULL)
+		__collate_err(EX_OSERR, __func__);
+	return t;
+}
+
+void
+__collate_xfrm(const wchar_t *src, wchar_t **xf, locale_t locale)
+{
+	int pri = 0;
+	int len = 0;
+	int pass = 0;
+	int direc = 0;
+	int error = 0;
+	size_t slen = 0;
+	wchar_t *tt = NULL,
+	wchar_t *tr = NULL;
+	wchar_t *xfp = NULL;
+	const wchar_t *t = NULL;
+	struct __collate_st_info *info = __collate_info;
+
+	for(pass = 0; pass < COLL_WEIGHTS_MAX; pass++)
+		xf[pass] = NULL;
+	for(pass = 0; pass < info->directive_count; pass++)
+	{
+		direc = info->directive[pass];
+		if (pass == 0 || !(info->flags & COLLATE_SUBST_DUP))
+		{
+			error = errno;
+			free(tt);
+			errno = error;
+			tt = __collate_substitute(src, pass, locale);
+		}
+		if (direc & DIRECTIVE_BACKWARD)
+		{
+			wchar_t *bp, *fp, c;
+			error = errno;
+			free(tr);
+			errno = error;
+			tr = __collate_wcsdup(tt ? tt : src);
+			bp = tr;
+			fp = tr + wcslen(tr) - 1;
+			while(bp < fp)
+			{
+				c = *bp;
+				*bp++ = *fp;
+				*fp-- = c;
+			}
+			t = tr;
+		}
+		else if (tt)
+			t = tt;
+		else
+			t = src;
+		error = errno;
+		xf[pass] = (wchar_t *) malloc(sizeof(wchar_t) * (wcslen(t) + 1));
+		if (xf[pass] == NULL)
+		{
+			errno = error;
+			slen = 0;
+			goto end;
+		}
+		errno = error;
+		xfp = xf[pass];
+		if (direc & DIRECTIVE_POSITION)
+		{
+			while(*t)
+			{
+				__collate_lookup_which(t, &len, &pri, pass, locale);
+				t += len;
+				if (pri <= 0)
+				{
+					if (pri < 0)
+					{
+						errno = EINVAL;
+						slen = 0;
+						goto end;
+					}
+					pri = COLLATE_MAX_PRIORITY;
+				}
+				*xfp++ = pri;
+			}
+		}
+		else
+		{
+			while(*t)
+			{
+				__collate_lookup_which(t, &len, &pri, pass, locale);
+				t += len;
+				if (pri <= 0)
+				{
+					if (pri < 0)
+					{
+						errno = EINVAL;
+						slen = 0;
+						goto end;
+					}
+					continue;
+				}
+				*xfp++ = pri;
+			}
+ 		}
+		*xfp = 0;
+	}
+  end:
+	free(tt);
+	free(tr);
+}
+
 u_char *
 __collate_strdup(u_char *s)
 {
@@ -330,7 +572,7 @@
 __collate_err(int ex, const char *f)
 {
 	const char *s;
-	int serrno = errno;
+	int error = errno;
 
 	s = _getprogname();
 	_write(STDERR_FILENO, s, strlen(s));
@@ -338,7 +580,7 @@
 	s = f;
 	_write(STDERR_FILENO, s, strlen(s));
 	_write(STDERR_FILENO, ": ", 2);
-	s = strerror(serrno);
+	s = strerror(error);
 	_write(STDERR_FILENO, s, strlen(s));
 	_write(STDERR_FILENO, "\n", 1);
 	exit(ex);

Modified: soc2014/ghostmansd/src/lib/libc/string/strcoll.c
==============================================================================
--- soc2014/ghostmansd/src/lib/libc/string/strcoll.c	Fri Jun  6 00:24:04 2014	(r269158)
+++ soc2014/ghostmansd/src/lib/libc/string/strcoll.c	Fri Jun  6 01:11:38 2014	(r269159)
@@ -45,11 +45,10 @@
 	int ret = 0;
 	wchar_t *wcs1 = NULL;
 	wchar_t *wcs2 = NULL;
+	struct xlocale_collate *table = NULL;
 
 	FIX_LOCALE(locale);
-	struct xlocale_collate *table =
-		(struct xlocale_collate*)locale->components[XLC_COLLATE];
-
+	*table = (struct xlocale_collate*)locale->components[XLC_COLLATE];
 	if ((table->__collate_load_error)
 	||  (wcs1 = __collate_mbstowcs(mbs1, locale) == NULL)
 	||  (wcs2 = __collate_mbstowcs(mbs2, locale) == NULL))

Modified: soc2014/ghostmansd/src/lib/libc/string/strxfrm.c
==============================================================================
--- soc2014/ghostmansd/src/lib/libc/string/strxfrm.c	Fri Jun  6 00:24:04 2014	(r269158)
+++ soc2014/ghostmansd/src/lib/libc/string/strxfrm.c	Fri Jun  6 01:11:38 2014	(r269159)
@@ -37,12 +37,56 @@
 #include <string.h>
 #include "collate.h"
 
+/*
+ * In the non-POSIX case, we transform each character into a string of
+ * characters representing the character's priority.  Since char is usually
+ * signed, we are limited by 7 bits per byte.  To avoid zero, we need to add
+ * XFRM_OFFSET, so we can't use a full 7 bits.  For simplicity, we choose 6
+ * bits per byte.  We choose 4 bytes per character as a good compromise
+ * between maximum coverage and minimum size.  This gives 24 bits, or 16M
+ * priorities.  So we choose COLLATE_MAX_PRIORITY to be (2^24 - 1).  This
+ * this can be increased if more is needed.
+ */
+
+#define	XFRM_BYTES	4
+#define	XFRM_OFFSET	('0')	/* make all printable characters */
+#define	XFRM_SHIFT	6
+#define	XFRM_MASK	((1 << XFRM_SHIFT) - 1)
+
+static void
+xfrm(unsigned char *p, int pri)
+{
+
+	p[3] = (pri & XFRM_MASK) + XFRM_OFFSET;
+	pri >>= XFRM_SHIFT;
+	p[2] = (pri & XFRM_MASK) + XFRM_OFFSET;
+	pri >>= XFRM_SHIFT;
+	p[1] = (pri & XFRM_MASK) + XFRM_OFFSET;
+	pri >>= XFRM_SHIFT;
+	p[0] = (pri & XFRM_MASK) + XFRM_OFFSET;
+}
+
 size_t
-strxfrm_l(char * __restrict dest, const char * __restrict src, size_t len, locale_t loc);
-size_t
-strxfrm(char * __restrict dest, const char * __restrict src, size_t len)
+strxfrm_l(char * __restrict dest, const char * __restrict src, size_t len, locale_t locale)
 {
-	return strxfrm_l(dest, src, len, __get_locale());
+	int error = 0;
+	size_t slen = 0;
+	wchar_t *wcs = NULL;
+	wchar_t *xf[2] = {NULL, NULL};
+	struct xlocale_collate *table = NULL;
+
+	if (!*src && dest)
+	{
+		if (len > 0)
+			*dest = '\0';
+		return 0;
+	}
+	FIX_LOCALE(locale);
+	*table = (struct xlocale_collate*)locale->components[XLC_COLLATE];
+	if (table->__collate_load_error
+	||  (wcs = __collate_mbstowcs(mbs1, locale) == NULL))
+		return strlcpy(dest, src, len);
+	__collate_xfrm(wcs, xf, loc);
 }
 
 size_t
@@ -87,3 +131,10 @@
 
 	return slen;
 }
+
+size_t
+strxfrm(char * __restrict dest, const char * __restrict src, size_t len)
+{
+	return strxfrm_l(dest, src, len, __get_locale());
+}
+


More information about the svn-soc-all mailing list