socsvn commit: r269159 - in soc2014/ghostmansd/src/lib/libc: locale string
ghostmansd at FreeBSD.org
ghostmansd at FreeBSD.org
Fri Jun 6 01:11:40 UTC 2014
Author: ghostmansd
Date: Fri Jun 6 01:11:38 2014
New Revision: 269159
URL: http://svnweb.FreeBSD.org/socsvn/?view=rev&rev=269159
Log:
continue working on collation support
Modified:
soc2014/ghostmansd/src/lib/libc/locale/collate.c
soc2014/ghostmansd/src/lib/libc/string/strcoll.c
soc2014/ghostmansd/src/lib/libc/string/strxfrm.c
Modified: soc2014/ghostmansd/src/lib/libc/locale/collate.c
==============================================================================
--- soc2014/ghostmansd/src/lib/libc/locale/collate.c Fri Jun 6 00:24:04 2014 (r269158)
+++ soc2014/ghostmansd/src/lib/libc/locale/collate.c Fri Jun 6 01:11:38 2014 (r269159)
@@ -80,23 +80,26 @@
destruct_collate(void *t)
{
struct xlocale_collate *table = t;
- if (__collate_chain_pri_table) {
+
+ if (__collate_chain_pri_table)
free(__collate_chain_pri_table);
- }
free(t);
}
void *
-__collate_load(const char *encoding, locale_t unused)
+__collate_load(const char *encoding, locale_t locale)
{
- if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) {
+ struct xlocale_collate *table = NULL;
+
+ (void) locale;
+ if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0)
return &__xlocale_C_collate;
- }
- struct xlocale_collate *table = calloc(sizeof(struct xlocale_collate), 1);
+ table = calloc(sizeof(struct xlocale_collate), 1);
table->header.header.destructor = destruct_collate;
// FIXME: Make sure that _LDP_CACHE is never returned. We should be doing
// the caching outside of this section
- if (__collate_load_tables_l(encoding, table) != _LDP_LOADED) {
+ if (__collate_load_tables_l(encoding, table) != _LDP_LOADED)
+ {
xlocale_release(table);
return NULL;
}
@@ -117,11 +120,16 @@
int
__collate_load_tables_l(const char *encoding, struct xlocale_collate *table)
{
- FILE *fp;
- int i, saverr, chains;
- uint32_t u32;
- char strbuf[STR_LEN], buf[PATH_MAX];
- void *TMP_substitute_table, *TMP_char_pri_table, *TMP_chain_pri_table;
+ int i = 0;
+ int error = 0;
+ int chains = 0;
+ FILE *fp = NULL;
+ uint32_t u32 = 0;
+ char buf[PATH_MAX] = {0};
+ char strbuf[STR_LEN] = {0};
+ void *TMP_substitute_table = NULL;
+ void *TMP_char_pri_table = NULL;
+ void *TMP_chain_pri_table = NULL;
/* 'encoding' must be already checked. */
if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) {
@@ -131,17 +139,18 @@
/* 'PathLocale' must be already set & checked. */
/* Range checking not needed, encoding has fixed size */
- (void)strcpy(buf, _PathLocale);
- (void)strcat(buf, "/");
- (void)strcat(buf, encoding);
- (void)strcat(buf, "/LC_COLLATE");
+ (void) strcpy(buf, _PathLocale);
+ (void) strcat(buf, "/");
+ (void) strcat(buf, encoding);
+ (void) strcat(buf, "/LC_COLLATE");
if ((fp = fopen(buf, "re")) == NULL)
return (_LDP_ERROR);
- if (fread(strbuf, sizeof(strbuf), 1, fp) != 1) {
- saverr = errno;
+ if (fread(strbuf, sizeof(strbuf), 1, fp) != 1)
+ {
+ error = errno;
(void)fclose(fp);
- errno = saverr;
+ errno = error;
return (_LDP_ERROR);
}
chains = -1;
@@ -149,60 +158,56 @@
chains = 0;
else if (strcmp(strbuf, COLLATE_VERSION1_2) == 0)
chains = 1;
- if (chains < 0) {
+ if (chains < 0)
+ {
(void)fclose(fp);
errno = EFTYPE;
return (_LDP_ERROR);
}
- if (chains) {
- if (fread(&u32, sizeof(u32), 1, fp) != 1) {
- saverr = errno;
+ if (chains)
+ {
+ if (fread(&u32, sizeof(u32), 1, fp) != 1)
+ {
+ error = errno;
(void)fclose(fp);
- errno = saverr;
+ errno = error;
return (_LDP_ERROR);
}
- if ((chains = (int)ntohl(u32)) < 1) {
+ if ((chains = (int)ntohl(u32)) < 1)
+ {
(void)fclose(fp);
errno = EFTYPE;
return (_LDP_ERROR);
}
- } else
+ }
+ else
chains = TABLE_SIZE;
- if ((TMP_substitute_table =
- malloc(sizeof(__collate_substitute_table))) == NULL) {
- saverr = errno;
+ TMP_substitute_table = malloc(sizeof(__collate_substitute_table));
+ TMP_char_pri_table = malloc(sizeof(__collate_char_pri_table));
+ TMP_chain_pri_table = malloc(sizeof(*__collate_chain_pri_table) * chains);
+ if ((TMP_substitute_table == NULL)
+ || (TMP_char_pri_table == NULL)
+ || (TMP_chain_pri_table == NULL))
+ {
+ error = errno;
(void)fclose(fp);
- errno = saverr;
- return (_LDP_ERROR);
- }
- if ((TMP_char_pri_table =
- malloc(sizeof(__collate_char_pri_table))) == NULL) {
- saverr = errno;
- free(TMP_substitute_table);
- (void)fclose(fp);
- errno = saverr;
- return (_LDP_ERROR);
- }
- if ((TMP_chain_pri_table =
- malloc(sizeof(*__collate_chain_pri_table) * chains)) == NULL) {
- saverr = errno;
free(TMP_substitute_table);
free(TMP_char_pri_table);
- (void)fclose(fp);
- errno = saverr;
+ free(TMP_chain_pri_table);
+ errno = error;
return (_LDP_ERROR);
}
#define FREAD(a, b, c, d) \
{ \
if (fread(a, b, c, d) != c) { \
- saverr = errno; \
+ error = errno; \
free(TMP_substitute_table); \
free(TMP_char_pri_table); \
free(TMP_chain_pri_table); \
(void)fclose(d); \
- errno = saverr; \
+ errno = error; \
return (_LDP_ERROR); \
} \
}
@@ -243,55 +248,179 @@
}
}
table->__collate_load_error = 0;
-
return (_LDP_LOADED);
}
-u_char *
-__collate_substitute(struct xlocale_collate *table, const u_char *s)
+static int
+__collate_wcsnlen(const wchar_t *s, int len)
+{
+ int n = 0;
+ while (*s && n < len) {
+ s++;
+ n++;
+ }
+ return n;
+}
+
+wchar_t *
+__collate_substitute(const wchar_t *s, int which, locale_t locale)
{
- int dest_len, len, nlen;
- int delta = strlen(s);
- u_char *dest_str = NULL;
+ int n = 0;
+ int len = 0;
+ int nlen = 0;
+ int delta = 0;
+ int nsubst = 0;
+ int dest_len = 0;
+ const wchar_t *fp = NULL;
+ wchar_t *dest_str = NULL;
+ struct __collate_st_subst *subst = NULL;
+ struct __collate_st_subst *match = NULL;
+ (void) locale;
if (s == NULL || *s == '\0')
- return (__collate_strdup(""));
- delta += delta / 8;
- dest_str = malloc(dest_len = delta);
+ return __collate_wcsdup(L"");
+ dest_len = wcslen(s);
+ nsubst = __collate_info->subst_count[which];
+ if (nsubst <= 0)
+ return __collate_wcsdup(s);
+ subst = __collate_substitute_table[which];
+ delta = (dest_len / 4);
+ if (delta < 2)
+ delta = 2;
+ dest_str = (wchar_t *) malloc((dest_len += delta) * sizeof(wchar_t));
if (dest_str == NULL)
__collate_err(EX_OSERR, __func__);
len = 0;
- while (*s) {
- nlen = len + strlen(__collate_substitute_table[*s]);
- if (dest_len <= nlen) {
- dest_str = reallocf(dest_str, dest_len = nlen + delta);
+ while (*s)
+ {
+ if ((match = substsearch(*s, subst, nsubst)) != NULL)
+ {
+ fp = match->str;
+ n = __collate_wcsnlen(fp, STR_LEN);
+ }
+ else
+ {
+ fp = s;
+ n = 1;
+ }
+ nlen = len + n;
+ if (dest_len <= nlen)
+ {
+ dest_str = reallocf(dest_str, (dest_len = nlen + delta) * sizeof(wchar_t));
if (dest_str == NULL)
__collate_err(EX_OSERR, __func__);
}
- (void)strcpy(dest_str + len, __collate_substitute_table[*s++]);
- len = nlen;
+ wcsncpy(dest_str + len, fp, n);
+ len += n;
+ s++;
}
- return (dest_str);
+ dest_str[len] = 0;
+ return dest_str;
+}
+
+static struct __collate_st_chain_pri *
+chainsearch(const wchar_t *key, int *len, locale_t locale)
+{
+ int low = 0;
+ int high = __collate_info->chain_count - 1;
+ int next, compar, l;
+ struct __collate_st_chain_pri *p;
+ struct __collate_st_chain_pri *tab = __collate_chain_pri_table;
+
+ while (low <= high)
+ {
+ next = (low + high) / 2;
+ p = tab + next;
+ compar = *key - *p->str;
+ if (compar == 0)
+ {
+ l = __collate_wcsnlen(p->str, STR_LEN);
+ compar = wcsncmp(key, p->str, l);
+ if (compar == 0)
+ {
+ *len = l;
+ return p;
+ }
+ }
+ if (compar > 0)
+ low = next + 1;
+ else
+ high = next - 1;
+ }
+ return NULL;
+}
+
+static struct __collate_st_large_char_pri *
+largesearch(const wchar_t key, locale_t locale)
+{
+ int low = 0;
+ int high = __collate_info->large_pri_count - 1;
+ int next, compar;
+ struct __collate_st_large_char_pri *p;
+ struct __collate_st_large_char_pri *tab = __collate_large_char_pri_table;
+
+ while (low <= high) {
+ next = (low + high) / 2;
+ p = tab + next;
+ compar = key - p->val;
+ if (compar == 0)
+ return p;
+ if (compar > 0)
+ low = next + 1;
+ else
+ high = next - 1;
+ }
+ return NULL;
}
void
-__collate_lookup(struct xlocale_collate *table, const u_char *t, int *len, int *prim, int *sec)
+__collate_lookup_l(const wchar_t *t, int *len, int *prim, int *sec, locale_t locale)
{
struct __collate_st_chain_pri *p2;
+ int l;
*len = 1;
*prim = *sec = 0;
- for (p2 = __collate_chain_pri_table; p2->str[0] != '\0'; p2++) {
- if (*t == p2->str[0] &&
- strncmp(t, p2->str, strlen(p2->str)) == 0) {
- *len = strlen(p2->str);
- *prim = p2->prim;
- *sec = p2->sec;
+ FIX_LOCALE(locale);
+ p2 = chainsearch(t, &l, locale);
+ /* use the chain if prim >= 0 */
+ if (p2 && p2->pri[0] >= 0)
+ {
+ *len = l;
+ *prim = p2->pri[0];
+ *sec = p2->pri[1];
+ return;
+ }
+ if (*t <= UCHAR_MAX)
+ {
+ *prim = __collate_char_pri_table[*t].pri[0];
+ *sec = __collate_char_pri_table[*t].pri[1];
+ return;
+ }
+ if (__collate_info->large_pri_count > 0)
+ {
+ struct __collate_st_large_char_pri *match;
+ match = largesearch(*t, locale);
+ if (match)
+ {
+ *prim = match->pri.pri[0];
+ *sec = match->pri.pri[1];
return;
}
}
- *prim = __collate_char_pri_table[*t].prim;
- *sec = __collate_char_pri_table[*t].sec;
+ *prim = (l = __collate_info->undef_pri[0]) >= 0 ? l : *t - l;
+ *sec = (l = __collate_info->undef_pri[1]) >= 0 ? l : *t - l;
+}
+
+void
+__collate_lookup(const wchar_t *t, int *len, int *prim, int *sec)
+{
+ int error = 0;
+ locale_t locale = __get_locale();
+ wchar_t *wcs = __collate_mbstowcs((const char *)t, locale);
+
+ __collate_lookup_l(wcs, len, prim, sec, locale);
+ free(wcs);
}
wchar_t *
@@ -316,6 +445,119 @@
return wcs;
}
+wchar_t *
+__collate_wcsdup(wchar_t *s)
+{
+ wchar_t *t = wcsdup(s);
+
+ if (t == NULL)
+ __collate_err(EX_OSERR, __func__);
+ return t;
+}
+
+void
+__collate_xfrm(const wchar_t *src, wchar_t **xf, locale_t locale)
+{
+ int pri = 0;
+ int len = 0;
+ int pass = 0;
+ int direc = 0;
+ int error = 0;
+ size_t slen = 0;
+ wchar_t *tt = NULL,
+ wchar_t *tr = NULL;
+ wchar_t *xfp = NULL;
+ const wchar_t *t = NULL;
+ struct __collate_st_info *info = __collate_info;
+
+ for(pass = 0; pass < COLL_WEIGHTS_MAX; pass++)
+ xf[pass] = NULL;
+ for(pass = 0; pass < info->directive_count; pass++)
+ {
+ direc = info->directive[pass];
+ if (pass == 0 || !(info->flags & COLLATE_SUBST_DUP))
+ {
+ error = errno;
+ free(tt);
+ errno = error;
+ tt = __collate_substitute(src, pass, locale);
+ }
+ if (direc & DIRECTIVE_BACKWARD)
+ {
+ wchar_t *bp, *fp, c;
+ error = errno;
+ free(tr);
+ errno = error;
+ tr = __collate_wcsdup(tt ? tt : src);
+ bp = tr;
+ fp = tr + wcslen(tr) - 1;
+ while(bp < fp)
+ {
+ c = *bp;
+ *bp++ = *fp;
+ *fp-- = c;
+ }
+ t = tr;
+ }
+ else if (tt)
+ t = tt;
+ else
+ t = src;
+ error = errno;
+ xf[pass] = (wchar_t *) malloc(sizeof(wchar_t) * (wcslen(t) + 1));
+ if (xf[pass] == NULL)
+ {
+ errno = error;
+ slen = 0;
+ goto end;
+ }
+ errno = error;
+ xfp = xf[pass];
+ if (direc & DIRECTIVE_POSITION)
+ {
+ while(*t)
+ {
+ __collate_lookup_which(t, &len, &pri, pass, locale);
+ t += len;
+ if (pri <= 0)
+ {
+ if (pri < 0)
+ {
+ errno = EINVAL;
+ slen = 0;
+ goto end;
+ }
+ pri = COLLATE_MAX_PRIORITY;
+ }
+ *xfp++ = pri;
+ }
+ }
+ else
+ {
+ while(*t)
+ {
+ __collate_lookup_which(t, &len, &pri, pass, locale);
+ t += len;
+ if (pri <= 0)
+ {
+ if (pri < 0)
+ {
+ errno = EINVAL;
+ slen = 0;
+ goto end;
+ }
+ continue;
+ }
+ *xfp++ = pri;
+ }
+ }
+ *xfp = 0;
+ }
+ end:
+ free(tt);
+ free(tr);
+}
+
u_char *
__collate_strdup(u_char *s)
{
@@ -330,7 +572,7 @@
__collate_err(int ex, const char *f)
{
const char *s;
- int serrno = errno;
+ int error = errno;
s = _getprogname();
_write(STDERR_FILENO, s, strlen(s));
@@ -338,7 +580,7 @@
s = f;
_write(STDERR_FILENO, s, strlen(s));
_write(STDERR_FILENO, ": ", 2);
- s = strerror(serrno);
+ s = strerror(error);
_write(STDERR_FILENO, s, strlen(s));
_write(STDERR_FILENO, "\n", 1);
exit(ex);
Modified: soc2014/ghostmansd/src/lib/libc/string/strcoll.c
==============================================================================
--- soc2014/ghostmansd/src/lib/libc/string/strcoll.c Fri Jun 6 00:24:04 2014 (r269158)
+++ soc2014/ghostmansd/src/lib/libc/string/strcoll.c Fri Jun 6 01:11:38 2014 (r269159)
@@ -45,11 +45,10 @@
int ret = 0;
wchar_t *wcs1 = NULL;
wchar_t *wcs2 = NULL;
+ struct xlocale_collate *table = NULL;
FIX_LOCALE(locale);
- struct xlocale_collate *table =
- (struct xlocale_collate*)locale->components[XLC_COLLATE];
-
+ *table = (struct xlocale_collate*)locale->components[XLC_COLLATE];
if ((table->__collate_load_error)
|| (wcs1 = __collate_mbstowcs(mbs1, locale) == NULL)
|| (wcs2 = __collate_mbstowcs(mbs2, locale) == NULL))
Modified: soc2014/ghostmansd/src/lib/libc/string/strxfrm.c
==============================================================================
--- soc2014/ghostmansd/src/lib/libc/string/strxfrm.c Fri Jun 6 00:24:04 2014 (r269158)
+++ soc2014/ghostmansd/src/lib/libc/string/strxfrm.c Fri Jun 6 01:11:38 2014 (r269159)
@@ -37,12 +37,56 @@
#include <string.h>
#include "collate.h"
+/*
+ * In the non-POSIX case, we transform each character into a string of
+ * characters representing the character's priority. Since char is usually
+ * signed, we are limited by 7 bits per byte. To avoid zero, we need to add
+ * XFRM_OFFSET, so we can't use a full 7 bits. For simplicity, we choose 6
+ * bits per byte. We choose 4 bytes per character as a good compromise
+ * between maximum coverage and minimum size. This gives 24 bits, or 16M
+ * priorities. So we choose COLLATE_MAX_PRIORITY to be (2^24 - 1). This
+ * this can be increased if more is needed.
+ */
+
+#define XFRM_BYTES 4
+#define XFRM_OFFSET ('0') /* make all printable characters */
+#define XFRM_SHIFT 6
+#define XFRM_MASK ((1 << XFRM_SHIFT) - 1)
+
+static void
+xfrm(unsigned char *p, int pri)
+{
+
+ p[3] = (pri & XFRM_MASK) + XFRM_OFFSET;
+ pri >>= XFRM_SHIFT;
+ p[2] = (pri & XFRM_MASK) + XFRM_OFFSET;
+ pri >>= XFRM_SHIFT;
+ p[1] = (pri & XFRM_MASK) + XFRM_OFFSET;
+ pri >>= XFRM_SHIFT;
+ p[0] = (pri & XFRM_MASK) + XFRM_OFFSET;
+}
+
size_t
-strxfrm_l(char * __restrict dest, const char * __restrict src, size_t len, locale_t loc);
-size_t
-strxfrm(char * __restrict dest, const char * __restrict src, size_t len)
+strxfrm_l(char * __restrict dest, const char * __restrict src, size_t len, locale_t locale)
{
- return strxfrm_l(dest, src, len, __get_locale());
+ int error = 0;
+ size_t slen = 0;
+ wchar_t *wcs = NULL;
+ wchar_t *xf[2] = {NULL, NULL};
+ struct xlocale_collate *table = NULL;
+
+ if (!*src && dest)
+ {
+ if (len > 0)
+ *dest = '\0';
+ return 0;
+ }
+ FIX_LOCALE(locale);
+ *table = (struct xlocale_collate*)locale->components[XLC_COLLATE];
+ if (table->__collate_load_error
+ || (wcs = __collate_mbstowcs(mbs1, locale) == NULL))
+ return strlcpy(dest, src, len);
+ __collate_xfrm(wcs, xf, loc);
}
size_t
@@ -87,3 +131,10 @@
return slen;
}
+
+size_t
+strxfrm(char * __restrict dest, const char * __restrict src, size_t len)
+{
+ return strxfrm_l(dest, src, len, __get_locale());
+}
+
More information about the svn-soc-all
mailing list