socsvn commit: r272063 - in soc2014/ghostmansd/head: lib/libc/locale lib/libc/string lib/libc/unicode lib/libcolldb share/colldb

ghostmansd at FreeBSD.org ghostmansd at FreeBSD.org
Fri Aug 8 00:20:12 UTC 2014


Author: ghostmansd
Date: Fri Aug  8 00:20:09 2014
New Revision: 272063
URL: http://svnweb.FreeBSD.org/socsvn/?view=rev&rev=272063

Log:
  initial colldb adoption
  

Added:
  soc2014/ghostmansd/head/lib/libc/unicode/coll.h
  soc2014/ghostmansd/head/share/colldb/
  soc2014/ghostmansd/head/share/colldb/root.src
     - copied unchanged from r271972, soc2014/ghostmansd/head/lib/libcolldb/CLDR.src
Deleted:
  soc2014/ghostmansd/head/lib/libcolldb/CLDR.src
  soc2014/ghostmansd/head/lib/libcolldb/DUCET.src
Modified:
  soc2014/ghostmansd/head/lib/libc/locale/collate.c
  soc2014/ghostmansd/head/lib/libc/locale/xlocale_private.h
  soc2014/ghostmansd/head/lib/libc/string/wcscoll.c
  soc2014/ghostmansd/head/lib/libc/string/wcsxfrm.c
  soc2014/ghostmansd/head/lib/libc/unicode/ucscoll.c
  soc2014/ghostmansd/head/lib/libc/unicode/ucsxfrm.c
  soc2014/ghostmansd/head/lib/libcolldb/colldb.c
  soc2014/ghostmansd/head/lib/libcolldb/colldb.h
  soc2014/ghostmansd/head/lib/libcolldb/colldb.py

Modified: soc2014/ghostmansd/head/lib/libc/locale/collate.c
==============================================================================
--- soc2014/ghostmansd/head/lib/libc/locale/collate.c	Thu Aug  7 22:14:37 2014	(r272062)
+++ soc2014/ghostmansd/head/lib/libc/locale/collate.c	Fri Aug  8 00:20:09 2014	(r272063)
@@ -49,6 +49,316 @@
 
 #include "libc_private.h"
 
+
+#ifdef _UNICODE_SOURCE
+__colldb_t *
+__colldb_create(const char *path, int mode)
+{
+	DBT key;
+	DBT value;
+	DB *db = NULL;
+	int error = 0;
+	uint32_t version = 0;
+	int flags = (O_RDWR | O_CREAT | O_TRUNC);
+	__colldb_t *colldb = NULL;
+
+	colldb = calloc(1, sizeof(*colldb));
+	if (colldb == NULL)
+	{
+		errno = ENOMEM;
+		return (NULL);
+	}
+	db = dbopen(path, flags, mode, DB_HASH, NULL);
+	if (db == NULL)
+	{
+		error = errno;
+		free(colldb);
+		errno = error;
+		return (NULL);
+	}
+	colldb->version = __COLLATION_VERSION;
+
+	key.data = "TYPE";
+	value.data = "COLLATION";
+	key.size = (strlen("TYPE") + 1);
+	value.size = (strlen("COLLATION") + 1);
+	if (db->put(db, &key, &value, 0) == -1)
+	{
+		error = errno;
+		goto failure;
+	}
+
+	key.data = "VERSION";
+	version = htonl(colldb->version);
+	value.data = &version;
+	key.size = (strlen("VERSION") + 1);
+	value.size = sizeof(colldb->version);
+	if (db->put(db, &key, &value, 0) == -1)
+	{
+		error = errno;
+		goto failure;
+	}
+
+	colldb->handle = db;
+	return (colldb);
+
+failure:
+	(void) db->close(db);
+	free(colldb);
+	errno = error;
+	return (NULL);
+}
+
+
+__colldb_t *
+__colldb_open(const char *path)
+{
+	DBT key;
+	DBT value;
+	DB *db = NULL;
+	int error = 0;
+	int state = 0;
+	int flags = O_RDONLY;
+	__colldb_t *colldb = NULL;
+
+	colldb = calloc(1, sizeof(*colldb));
+	if (colldb == NULL)
+	{
+		errno = ENOMEM;
+		return (NULL);
+	}
+	db = dbopen(path, flags, 0, DB_HASH, NULL);
+	if (db == NULL)
+	{
+		error = errno;
+		free(colldb);
+		errno = error;
+		return (NULL);
+	}
+
+	key.data = "TYPE";
+	key.size = (strlen("TYPE") + 1);
+	state = db->get(db, &key, &value, 0);
+	if (state != 0)
+	{
+		if (state < 0)
+			error = errno;
+		else
+			error = EFTYPE;
+		goto failure;
+	}
+	if (strcmp(value.data, "COLLATION") != 0)
+	{
+		error = EFTYPE;
+		goto failure;
+	}
+
+	key.data = "VERSION";
+	key.size = (strlen("VERSION") + 1);
+	state = db->get(db, &key, &value, 0);
+	if (state != 0)
+	{
+		if (state < 0)
+			error = errno;
+		else
+			error = EFTYPE;
+		goto failure;
+	}
+	colldb->version = ntohl(*(const uint32_t*)value.data);
+
+	colldb->handle = db;
+	return (colldb);
+
+failure:
+	(void) db->close(db);
+	free(colldb);
+	errno = error;
+	return (NULL);
+}
+
+
+int
+__colldb_close(__colldb_t *colldb)
+{
+	DB *db = NULL;
+	int error = 0;
+
+	if (colldb == NULL)
+	{
+		errno = EINVAL;
+		return (-1);
+	}
+	db = colldb->handle;
+	if (db == NULL)
+	{
+		errno = EINVAL;
+		return (-1);
+	}
+	if (db->close(db) == -1)
+	{
+		error = errno;
+		free(colldb);
+		errno = error;
+		return (-1);
+	}
+	free(colldb);
+	return (0);
+}
+
+
+int
+__colldb_sync(__colldb_t *colldb)
+{
+	DB *db = NULL;
+
+	if (colldb == NULL)
+	{
+		errno = EINVAL;
+		return (-1);
+	}
+	db = colldb->handle;
+	if (db == NULL)
+	{
+		errno = EINVAL;
+		return (-1);
+	}
+	return db->sync(db, 0);
+}
+
+
+int
+__colldb_get(__colldb_t *colldb,
+		struct __colldb_key *key,
+		struct __colldb_value *value)
+{
+	DBT dbkey;
+	DBT dbvalue;
+	DB *db = NULL;
+	size_t i = 0;
+	int state = 0;
+	uint32_t *keybuf = NULL;
+	struct __colldb_weight *weights = NULL;
+
+	if ((colldb == NULL) || (key == NULL) || (value == NULL))
+	{
+		errno = EINVAL;
+		return (-1);
+	}
+	db = colldb->handle;
+	if ((db == NULL) || (key->chars == NULL) || (key->count == 0))
+	{
+		errno = EINVAL;
+		return (-1);
+	}
+
+	keybuf = key->chars;
+	if (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__)
+	{
+		keybuf = malloc(key->count * sizeof(*key->chars));
+		if (keybuf == NULL)
+		{
+			errno = ENOMEM;
+			return (-1);
+		}
+		for (i = 0; i < key->count; ++i)
+			keybuf[i] = htonl(key->chars[i]);
+	}
+
+	dbkey.data = keybuf;
+	dbkey.size = (key->count * sizeof(*key->chars));
+	state = db->get(db, &dbkey, &dbvalue, 0);
+	if (state != 0)
+		return (state);
+
+	weights = dbvalue.data;
+	if ((dbvalue.size / sizeof(*weights)) > value->count)
+	{
+		if (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__)
+			free(keybuf);
+		errno = ERANGE;
+		return (-1);
+	}
+	value->count = (dbvalue.size / sizeof(*weights));
+	for (i = 0; i < value->count; ++i)
+	{
+		value->weights[i].alternate = weights[i].alternate;
+		value->weights[i].level1 = ntohl(weights[i].level1);
+		value->weights[i].level2 = ntohl(weights[i].level2);
+		value->weights[i].level3 = ntohl(weights[i].level3);
+		value->weights[i].level4 = ntohl(weights[i].level4);
+	}
+	if (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__)
+		free(keybuf);
+	return (0);
+}
+
+
+int
+__colldb_put(__colldb_t *colldb,
+		struct __colldb_key *key,
+		struct __colldb_value *value)
+{
+	DBT dbkey;
+	DBT dbvalue;
+	DB *db = NULL;
+	size_t i = 0;
+	int state = 0;
+	int error = 0;
+	uint32_t *keybuf = NULL;
+	struct __colldb_weight *valuebuf = NULL;
+
+	if ((colldb == NULL) || (key == NULL) || (value == NULL))
+	{
+		errno = EINVAL;
+		return (-1);
+	}
+	db = colldb->handle;
+	if ((db == NULL) || (key->chars == NULL) || (key->count == 0))
+	{
+		errno = EINVAL;
+		return (-1);
+	}
+
+	keybuf = key->chars;
+	valuebuf = value->weights;
+	if (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__)
+	{
+		keybuf = malloc(key->count * sizeof(*key->chars));
+		valuebuf = malloc(value->count * sizeof(*value->weights));
+		if ((keybuf == NULL) || (valuebuf == NULL))
+		{
+			errno = ENOMEM;
+			return (-1);
+		}
+		for (i = 0; i < key->count; ++i)
+			keybuf[i] = htonl(key->chars[i]);
+		for (i = 0; i < value->count; ++i)
+		{
+			valuebuf[i].alternate = value->weights[i].alternate;
+			valuebuf[i].level1 = htonl(value->weights[i].level1);
+			valuebuf[i].level2 = htonl(value->weights[i].level2);
+			valuebuf[i].level3 = htonl(value->weights[i].level3);
+			valuebuf[i].level4 = htonl(value->weights[i].level4);
+		}
+	}
+
+	dbkey.data = keybuf;
+	dbvalue.data = valuebuf;
+	dbkey.size = (key->count * sizeof(*key->chars));
+	dbvalue.size = (value->count * sizeof(*value->weights));
+	state = db->put(db, &dbkey, &dbvalue, 0);
+	if (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__)
+	{
+		error = errno;
+		free(keybuf);
+		free(valuebuf);
+		errno = error;
+	}
+	return state;
+}
+#endif
+
+
 /*
  * To avoid modifying the original (single-threaded) code too much, we'll just
  * define the old globals as fields inside the table.

Modified: soc2014/ghostmansd/head/lib/libc/locale/xlocale_private.h
==============================================================================
--- soc2014/ghostmansd/head/lib/libc/locale/xlocale_private.h	Thu Aug  7 22:14:37 2014	(r272062)
+++ soc2014/ghostmansd/head/lib/libc/locale/xlocale_private.h	Fri Aug  8 00:20:09 2014	(r272063)
@@ -42,7 +42,6 @@
 
 
 #ifdef _UNICODE_SOURCE
-#include <colldb.h>
 #include <stdint.h>
 #include <unistd.h>
 
@@ -57,8 +56,39 @@
 #define	__UC_NFKC	4
 size_t	__ucsnorm(uint32_t*, const uint32_t*, size_t, int);
 
-size_t	__ucsxfrm(uint32_t*, const uint32_t*, size_t, struct __collation*);
-int	__ucscoll(const uint32_t*, const uint32_t*, struct __collation*);
+#define	COLLDB_VERSION		0x00000001
+#define	COLLDB_WEIGHTS_MAX	10
+struct __colldb_weight {
+	uint8_t alternate;
+	uint32_t level1;
+	uint32_t level2;
+	uint32_t level3;
+	uint32_t level4;
+};
+struct __colldb_key {
+	size_t count;
+	uint32_t *chars;
+};
+struct __colldb_value {
+	size_t count;
+	struct __colldb_weight *weights;
+};
+typedef struct __colldb __colldb_t;
+#define	__colldb_root (__colldb_root_handle())
+
+__colldb_t*	__colldb_create(const char*, int mode);
+__colldb_t*	__colldb_open(const char*);
+int	__colldb_close(__colldb_t*);
+int	__colldb_sync(__colldb_t*);
+int	__colldb_get(__colldb_t*,
+		     struct __colldb_key*,
+		     struct __colldb_value*);
+int	__colldb_put(__colldb_t*,
+		     struct __colldb_key*,
+		     struct __colldb_value*);
+
+size_t	__ucsxfrm(uint32_t*, const uint32_t*, size_t, const __colldb_t*);
+int	__ucscoll(const uint32_t*, const uint32_t*, const __colldb_t*);
 #endif
 
 

Modified: soc2014/ghostmansd/head/lib/libc/string/wcscoll.c
==============================================================================
--- soc2014/ghostmansd/head/lib/libc/string/wcscoll.c	Thu Aug  7 22:14:37 2014	(r272062)
+++ soc2014/ghostmansd/head/lib/libc/string/wcscoll.c	Fri Aug  8 00:20:09 2014	(r272063)
@@ -70,11 +70,8 @@
 #ifdef _UNICODE_SOURCE
 	const uint32_t *ucs1 = (const uint32_t*)ws1;
 	const uint32_t *ucs2 = (const uint32_t*)ws2;
-	const struct __collation *colltable = 
-		(const struct __collation*)locale->colltable;
-	size_t collsize = locale->collsize;
 
-	return __ucscoll(ucs1, ucs2, colltable, collsize);
+	return __ucscoll(ucs1, ucs2, locale->colldb);
 
 #else /* !_UNICODE_SOURCE */
 	char *mbs1, *mbs2;

Modified: soc2014/ghostmansd/head/lib/libc/string/wcsxfrm.c
==============================================================================
--- soc2014/ghostmansd/head/lib/libc/string/wcsxfrm.c	Thu Aug  7 22:14:37 2014	(r272062)
+++ soc2014/ghostmansd/head/lib/libc/string/wcsxfrm.c	Fri Aug  8 00:20:09 2014	(r272063)
@@ -50,11 +50,8 @@
 #ifdef _UNICODE_SOURCE
 	uint32_t *udst = (uint32_t*)dest;
 	const uint32_t *usrc = (const uint32_t*)src;
-	const struct __collation *colltable = 
-		(const struct __collation*)locale->colltable;
-	size_t collsize = locale->collsize;
 
-	return __ucsxfrm(udst, usrc, len, colltable, collsize);
+	return __ucsxfrm(udst, usrc, len, locale->colldb);
 #else
 	int prim, sec, l;
 	size_t slen;

Added: soc2014/ghostmansd/head/lib/libc/unicode/coll.h
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ soc2014/ghostmansd/head/lib/libc/unicode/coll.h	Fri Aug  8 00:20:09 2014	(r272063)
@@ -0,0 +1,109 @@
+/*-
+ * Copyright (c) 2014 Dmitry Selyutin.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DECOMPOSITION, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <pthread.h>
+#include "xlocale_private.h"
+static __colldb_t *handle = NULL;
+static pthread_once_t once = PTHREAD_ONCE_INIT;
+
+
+static __colldb_t
+__colldb_root_init(void)
+{
+	handle = __colldb_open("/usr/share/colldb/root.db");
+}
+
+
+__colldb_t *
+__colldb_root_handle(void)
+{
+	pthread_once(&once, &__colldb_root_init);
+	return handle;
+}
+
+
+static size_t
+__coll_iter(const uint32_t *iter, __colldb_t *colldb, struct __colldb_value *val,
+	    struct __colldb_weight *default_weights[2])
+{
+	int state = 0;
+	size_t shift = 0;
+	struct __colldb_key key;
+
+	for (shift = 1; shift != 18; ++shift)
+	{
+		if (*(iter + shift - 1) == 0)
+			break;
+		key.count = shift;
+		key.chars = norm;
+		state = __colldb_get(colldb, &key, val);
+		if (state == -1)
+			return (0);
+		if (state == 0)
+			break;
+	}
+
+	if ((state != 0) && (colldb != __colldb_root))
+	{
+		for (shift = 1; shift != 18; ++shift)
+		{
+			if (*(iter + shift - 1) == 0)
+				break;
+			key.count = shift;
+			key.chars = norm;
+			state = __colldb_get(__colldb_root, &key, val);
+			if (state == -1)
+				return (0);
+			if (state == 0)
+				break;
+		}
+	}
+
+	if (state != 0)
+	{
+		shift = 1;
+		(*default_weights)[0].level1 = 0xFBC0;
+		if (((0x4E00 <= *iter) && (*iter <= 0x9FCC)) ||
+		   (*iter == 0xFA0E) || (*iter == 0xFA0F) ||
+		   (*iter == 0xFA11) || (*iter == 0xFA13) ||
+		   (*iter == 0xFA14) || (*iter == 0xFA1F) ||
+		   (*iter == 0xFA21) || (*iter == 0xFA23) ||
+		   (*iter == 0xFA24) || (*iter == 0xFA27) ||
+		   (*iter == 0xFA28) || (*iter == 0xFA29))
+			(*default_weights)[0].level1 = 0xFB40;
+		else if (((0x3400 <= *iter) && (*iter <= 0x4DB5)) ||
+		   ((0x20000 <= *iter) && (*iter <= 0x2A6D6)) ||
+		   ((0x2A700 <= *iter) && (*iter <= 0x2B734)) ||
+		   ((0x2B740 <= *iter) && (*iter <= 0x2B81D)))
+			(*default_weights)[0].level1 = 0xFB80;
+		(*default_weights)[0].level1 = ((*default_weights)[0].level1 + (*iter >> 15));
+		(*default_weights)[1].level1 = ((*iter & 0x7FFF) | 0x8000);
+		val->weights = *default_weights;
+		val->count = 2;
+	}
+
+	return (shift);
+}

Modified: soc2014/ghostmansd/head/lib/libc/unicode/ucscoll.c
==============================================================================
--- soc2014/ghostmansd/head/lib/libc/unicode/ucscoll.c	Thu Aug  7 22:14:37 2014	(r272062)
+++ soc2014/ghostmansd/head/lib/libc/unicode/ucscoll.c	Fri Aug  8 00:20:09 2014	(r272063)
@@ -30,33 +30,96 @@
 
 
 int
-__ucscoll(const uint32_t *lhs, const uint32_t *rhs,
-	  const struct __collation_data *colldata, size_t collsize)
+__ucscoll(const uint32_t *lstr, const uint32_t *rstr, __colldb_t *colldb)
 {
+	int cmp = 0;
+	size_t i = 0;
+	int state = 0;
 	int error = 0;
-	int result = 0;
-	size_t lsize = 0;
-	size_t rsize = 0;
-	uint32_t *lcoll = NULL;
-	uint32_t *rcoll = NULL;
+	size_t index = 0;
+	size_t size[2] = {0, 0};
+	size_t count[2] = {0, 0};
+	struct __colldb_value val[2];
+	uint32_t (*str)[2] = {NULL, NULL};
+	uint32_t (*norm)[2] = {NULL, NULL};
+	uint32_t (*iter)[2] = {NULL, NULL};
+	struct __colldb_weight default_weights[2][2];
+	struct __colldb_weight weights[2][__COLLATION_WEIGHTS_MAX];
+	const int init_error = errno;
 
-	error = errno;
-	lsize = __ucsxfrm(NULL, lhs, 0, colldata, collsize);
-	rsize = __ucsxfrm(NULL, rhs, 0, colldata, collsize);
-	errno = error;
-	lcoll = malloc(lsize * sizeof(uint32_t));
-	rcoll = malloc(rsize * sizeof(uint32_t));
-	if ((lcoll == NULL) || (rcoll == NULL))
+	if ((lstr == NULL) || (rstr == NULL))
 	{
-		free(lcoll);
-		free(rcoll);
-		errno = ENOMEM;
+		errno = EINVAL;
 		return (0);
 	}
-	result = wcscmp((const wchar_t*)lcoll, (const wchar_t*)rcoll);
-	error = errno;
-	free(lcoll);
-	free(rcoll);
-	errno = error;
-	return (result);
+	for (i = 0; i < 2; ++i)
+	{
+		size[i] = __ucsnorm(NULL, str[i], 0, __UC_NFD);
+		norm[i] = malloc(size[i] * sizeof(uint32_t));
+		if (__ucsnorm(norm[i], str[i], size[i], __UC_NFD) > lsize)
+		{
+			error = errno;
+			free(norm[0]);
+			free(norm[1]);
+			errno = error;
+			return (0);
+		}
+		iter[i] = norm[i];
+	}
+	memset(generic, 0, sizeof(generic));
+	generic[0].level2 = 0x20;
+	generic[0].level3 = 0x02;
+	generic[1].level2 = 0x02;
+	generic[1].level3 = 0x01;
+
+	if (colldb == NULL)
+		colldb = __colldb_root;
+	while (*iter[0] != 0)
+	{
+		if (*iter[1] == 0)
+		{
+			free(norm[0]);
+			free(norm[1]);
+			return (+1);
+		}
+		for (i = 0; i < 2; ++i)
+		{
+			val[i].weights = weights[i];
+			val[i].count = __COLLATION_WEIGHTS_MAX;
+			shift = __coll_iter(iter[0], colldb, &val[i],
+					    &default_weights[i]);
+			if (shift == 0)
+			{
+				error = errno;
+				free(norm[0]);
+				free(norm[1]);
+				errno = error;
+				return (0);
+			}
+		}
+		iter[i] += shift;
+		if (val[0].count < val[1].count)
+			cmp = val[0].count;
+		else
+			cmp = val[1].count;
+		for (i = 0; i < cmp; ++i)
+		{
+			state = memcmp(val[0].weights[i], val[1].weights[i],
+				       sizeof(struct __colldb_weight));
+			if (state != 0)
+			{
+				free(norm[0]);
+				free(norm[1]);
+				errno = init_error;
+				return (state);
+			}
+		}
+		if (val[0].count < val[1].count)
+			state = -1;
+		else if (val[0].count > val[1].count)
+			state = +1;
+	}
+	if (*iter[1] != 0)
+		return (-1);
+	return (0);
 }

Modified: soc2014/ghostmansd/head/lib/libc/unicode/ucsxfrm.c
==============================================================================
--- soc2014/ghostmansd/head/lib/libc/unicode/ucsxfrm.c	Thu Aug  7 22:14:37 2014	(r272062)
+++ soc2014/ghostmansd/head/lib/libc/unicode/ucsxfrm.c	Fri Aug  8 00:20:09 2014	(r272063)
@@ -26,159 +26,96 @@
 
 #include <errno.h>
 #include <string.h>
-#include "xlocale_private.h"
+#include "coll.h"
 
 
 size_t
-__ucsxfrm(uint32_t *buffer, const uint32_t *str, size_t size,
-	  const struct __collation *colldata, size_t collsize)
+__ucsxfrm(uint32_t *buffer, const uint32_t *str, size_t size, __colldb_t *colldb)
 {
+	size_t i = 0;
+	int state = 0;
 	int error = 0;
-	size_t mid = 0;
-	size_t low = 0;
-	size_t high = 0;
-	size_t count = 0;
 	size_t shift = 0;
-	uint32_t hash = 0;
+	size_t count = 0;
 	size_t reqsize = 0;
-	size_t normsize = 0;
-	uint32_t *normstr = NULL;
-	const uint32_t *iter = str;
+	uint32_t *iter = NULL;
+	uint32_t *elements = NULL;
+	struct __colldb_value value;
+	struct __colldb_weight default_weights[2];
+	struct __colldb_weight weights[__COLLATION_WEIGHTS_MAX];
 	const int init_error = errno;
-	const uint32_t *elements = NULL;
-	size_t seqmax = 18; /* maximal decomposition length */
-	uint32_t generic[6] = {0x00, 0x00, 0x20, 0x01, 0x02, 0x01};
 
 	if ((str == NULL) || ((buffer != NULL) && (size == 0)))
 	{
 		errno = EINVAL;
-		return 0;
-	}
-	if (colldata == NULL)
-	{
-		colldata = __DUCET_COLLDATA;
-		collsize = __DUCET_COLLSIZE;
-		seqmax = 3; /* max DUCET sequence length */
+		return (SIZE_MAX);
 	}
-	error = errno;
 	normsize = __ucsnorm(NULL, str, 0, __UC_NFD);
-	errno = error;
-	normstr = malloc(normsize * sizeof(uint32_t));
-	if (normstr == NULL)
+	norm = malloc(normsize * sizeof(uint32_t));
+	if (norm == NULL)
 	{
 		errno = ENOMEM;
 		return (SIZE_MAX);
 	}
-	if (__ucsnorm(normstr, str, normsize, __UC_NFD) > normsize)
+	if (__ucsnorm(norm, str, normsize, __UC_NFD) > normsize)
 	{
 		error = errno;
-		free(normstr);
+		free(norm);
 		errno = error;
 		return (SIZE_MAX);
 	}
-
+	memset(generic, 0, sizeof(generic));
+	generic[0].level2 = 0x20;
+	generic[0].level3 = 0x02;
+	generic[1].level2 = 0x02;
+	generic[1].level3 = 0x01;
+	memset(match, 0, sizeof(match));
+
+	iter = norm;
+	if (colldb == NULL)
+		colldb = __colldb_root;
 	while (*iter != 0)
 	{
-
-		/*
-		 * Try to determine if collation table contains a sequence
-		 * which consists from 1 to 18 characters.
-		 * If DUCET is used, maximal count of characters is 3.
-		 */
-		for (shift = 1; shift != seqmax; ++shift)
+		value.weights = weights;
+		value.count = __COLLATION_WEIGHTS_MAX;
+		shift = __coll_iter(iter, colldb, &value, &default_weights);
+		if (shift == 0)
 		{
-			low = 0;
-			elements = NULL;
-			high = collsize;
-			if (*(iter + shift - 1) == 0)
-				break;
-			hash = __uchash(iter, shift);
-			while (low <= high)
-			{
-				mid = (low + ((high - low) / 2));
-				if (hash < colldata[mid].hash)
-					high = (mid - 1);
-				else if (hash > colldata[mid].hash)
-					low = (mid + 1);
-				else
-				{
-					count = colldata[mid].count;
-					elements = colldata[mid].elements;
-					break;
-				}
-			}
-			if (elements != NULL)
-				break;
+			error = errno;
+			free(elements);
+			free(norm);
+			errno = error;
+			return (SIZE_MAX);
 		}
 
-		/*
-		 * If collation table does not contain a sequence and
-		 * current collation table is not DUCET, repeat the algorithm
-		 * for DUCET with limit up to 3 characters.
-		 */
-		if ((elements == NULL) && (colldata != __DUCET_COLLDATA))
+		free(elements);
+		count = (value.count * 4);
+		elements = malloc(count * sizeof(uint32_t));
+		if (elements == NULL)
 		{
-			for (shift = 1; shift != 3; ++shift)
-			{
-				low = 0;
-				elements = NULL;
-				collsize = __DUCET_COLLSIZE;
-				if (*(iter + shift - 1) == 0)
-					break;
-				hash = __uchash(iter, shift);
-				while (low <= high)
-				{
-					mid = (low + ((high - low) / 2));
-					if (hash < __DUCET_COLLDATA[mid].hash)
-						high = (mid - 1);
-					else if (hash > __DUCET_COLLDATA[mid].hash)
-						low = (mid + 1);
-					else
-					{
-						count = __DUCET_COLLDATA[mid].count;
-						elements = __DUCET_COLLDATA[mid].elements;
-						break;
-					}
-				}
-				if (elements != NULL)
-					break;
-			}
+			free(elements);
+			free(norm);
+			errno = ENOMEM;
+			return (SIZE_MAX);
 		}
-
-		/* If no sequence was matched, generate default collation. */
-		if (elements == NULL)
+		for (i = 0; i < value.count; ++i)
 		{
-			shift = 1;
-			generic[0] = 0xFBC0;
-			if (((0x4E00 <= *str) && (*str <= 0x9FCC)) ||
-			   (*str == 0xFA0E) || (*str == 0xFA0F) ||
-			   (*str == 0xFA11) || (*str == 0xFA13) ||
-			   (*str == 0xFA14) || (*str == 0xFA1F) ||
-			   (*str == 0xFA21) || (*str == 0xFA23) ||
-			   (*str == 0xFA24) || (*str == 0xFA27) ||
-			   (*str == 0xFA28) || (*str == 0xFA29))
-				generic[0] = 0xFB40;
-			else if (((0x3400 <= *str) && (*str <= 0x4DB5)) ||
-			    ((0x20000 <= *str) && (*str <= 0x2A6D6)) ||
-			    ((0x2A700 <= *str) && (*str <= 0x2B734)) ||
-			    ((0x2B740 <= *str) && (*str <= 0x2B81D)))
-				generic[0] = 0xFB80;
-			generic[0] = (generic[0] + (*str >> 15));
-			generic[1] = ((*str & 0x7FFF) | 0x8000);
-			elements = generic;
-			count = 6;
+			elements[(count * 0) + i] = value.weights[i].level1;
+			elements[(count * 1) + i] = value.weights[i].level2;
+			elements[(count * 2) + i] = value.weights[i].level3;
+			elements[(count * 3) + i] = value.weights[i].level4;
 		}
-		else
-			count *= 3;
+		for (i = 0; i < count; ++i)
+			elements[i] = (elements[i] ? elements[i] : 1);
 
-		/* Write found or generated sequence into the buffer. */
 		if (((reqsize + count) > size) && (buffer != NULL))
 		{
-			size = (size - reqsize - 1);
-			memcpy(buffer, elements, (size * sizeof(uint32_t)));
-			*(buffer + size) = 0;
-			free(normstr);
-			return __ucsxfrm(NULL, str, 0, colldata, collsize);
+			count = (size - reqsize - 1);
+			memcpy(buffer, elements, (count * sizeof(uint32_t)));
+			*(buffer + count) = 0;
+			free(elements);
+			free(norm);
+			return __ucsxfrm(NULL, str, 0, colldb);
 		}
 		if (buffer != NULL)
 		{
@@ -186,13 +123,14 @@
 			buffer += count;
 		}
 		reqsize += count;
+		iter += shift;
 	}
 
 	if (buffer == NULL)
 		++reqsize;
 	else
 		*buffer = 0;
-	free(normstr);
+	free(norm);
 	errno = init_error;
 	return (reqsize);
 }

Modified: soc2014/ghostmansd/head/lib/libcolldb/colldb.c
==============================================================================
--- soc2014/ghostmansd/head/lib/libcolldb/colldb.c	Thu Aug  7 22:14:37 2014	(r272062)
+++ soc2014/ghostmansd/head/lib/libcolldb/colldb.c	Fri Aug  8 00:20:09 2014	(r272063)
@@ -30,8 +30,8 @@
 #include "colldb.h"
 
 
-struct collation *
-collation_create(const char *path, int mode)
+colldb_t *
+colldb_create(const char *path, int mode)
 {
 	DBT key;
 	DBT value;
@@ -39,10 +39,10 @@
 	int error = 0;
 	uint32_t version = 0;
 	int flags = (O_RDWR | O_CREAT | O_TRUNC);
-	struct collation *collation = NULL;
+	colldb_t *colldb = NULL;
 
-	collation = calloc(1, sizeof(*collation));
-	if (collation == NULL)
+	colldb = calloc(1, sizeof(*colldb));
+	if (colldb == NULL)
 	{
 		errno = ENOMEM;
 		return (NULL);
@@ -51,11 +51,11 @@
 	if (db == NULL)
 	{
 		error = errno;
-		free(collation);
+		free(colldb);
 		errno = error;
 		return (NULL);
 	}
-	collation->version = __COLLATION_VERSION;
+	colldb->version = __COLLATION_VERSION;
 
 	key.data = "TYPE";
 	value.data = "COLLATION";
@@ -68,29 +68,29 @@
 	}
 
 	key.data = "VERSION";
-	version = htonl(collation->version);
+	version = htonl(colldb->version);
 	value.data = &version;
 	key.size = (strlen("VERSION") + 1);
-	value.size = sizeof(collation->version);
+	value.size = sizeof(colldb->version);
 	if (db->put(db, &key, &value, 0) == -1)
 	{
 		error = errno;
 		goto failure;
 	}
 
-	collation->handle = db;
-	return (collation);
+	colldb->handle = db;
+	return (colldb);
 
 failure:
 	(void) db->close(db);
-	free(collation);
+	free(colldb);
 	errno = error;
 	return (NULL);
 }
 
 
-struct collation *
-collation_open(const char *path)
+colldb_t *
+colldb_open(const char *path)
 {
 	DBT key;
 	DBT value;
@@ -98,10 +98,10 @@
 	int error = 0;
 	int state = 0;
 	int flags = O_RDONLY;
-	struct collation *collation = NULL;
+	colldb_t *colldb = NULL;
 
-	collation = calloc(1, sizeof(*collation));
-	if (collation == NULL)
+	colldb = calloc(1, sizeof(*colldb));
+	if (colldb == NULL)
 	{
 		errno = ENOMEM;
 		return (NULL);
@@ -110,7 +110,7 @@
 	if (db == NULL)
 	{
 		error = errno;
-		free(collation);
+		free(colldb);
 		errno = error;
 		return (NULL);
 	}
@@ -143,31 +143,31 @@
 			error = EFTYPE;
 		goto failure;
 	}
-	collation->version = ntohl(*(const uint32_t*)value.data);
+	colldb->version = ntohl(*(const uint32_t*)value.data);
 
-	collation->handle = db;
-	return (collation);
+	colldb->handle = db;
+	return (colldb);
 
 failure:
 	(void) db->close(db);
-	free(collation);
+	free(colldb);
 	errno = error;
 	return (NULL);
 }
 
 
 int
-collation_close(struct collation *collation)
+colldb_close(colldb_t *colldb)
 {
 	DB *db = NULL;
 	int error = 0;
 
-	if (collation == NULL)
+	if (colldb == NULL)
 	{

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-soc-all mailing list