socsvn commit: r269600 - soc2014/ghostmansd/normalize

ghostmansd at FreeBSD.org ghostmansd at FreeBSD.org
Sun Jun 15 21:55:24 UTC 2014


Author: ghostmansd
Date: Sun Jun 15 21:55:22 2014
New Revision: 269600
URL: http://svnweb.FreeBSD.org/socsvn/?view=rev&rev=269600

Log:
  Unicode Normalization Algorithm: Hangul composition
  
  Unicode Normalization Algorithm is reimplemented.
  NFD and NFC algorithms are available as standalone functions.
  Hangul composition and decomposition are arithmetically-based.
  hangul_syllable() function was added to quickly check type
  of Hangul syllable (lead, vowel, trail or combination).
  The normalization functions are almost finished; the last
  part is to implement database lookup, using Unicode Database
  files (e.g. UnicodeData.txt).
  

Added:
  soc2014/ghostmansd/normalize/Makefile
  soc2014/ghostmansd/normalize/hangul.h
Modified:
  soc2014/ghostmansd/normalize/main.c
  soc2014/ghostmansd/normalize/strnorm.c
  soc2014/ghostmansd/normalize/wcsnorm.c

Added: soc2014/ghostmansd/normalize/Makefile
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ soc2014/ghostmansd/normalize/Makefile	Sun Jun 15 21:55:22 2014	(r269600)
@@ -0,0 +1,8 @@
+CFLAGS := -g -std=c89 -O0 -pedantic \
+-Werror -Wall -Wextra -Wundef -Wshadow -Waggregate-return -Wstrict-prototypes \
+-Wcast-qual -Wcast-align -Wswitch-default -Wswitch-enum -Wwrite-strings \
+-Wpointer-arith -Wno-long-long -Wno-format -Wno-unreachable-code \
+-Wno-unused-function
+
+all:
+	$(CC) $(CFLAGS) -o main main.c wcsnorm.c

Added: soc2014/ghostmansd/normalize/hangul.h
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ soc2014/ghostmansd/normalize/hangul.h	Sun Jun 15 21:55:22 2014	(r269600)
@@ -0,0 +1,600 @@
+/*
+ * Copyright (c) 2014 Dmitry Selyutin <ghostmansd at FreeBSD.org>
+ *     at Lomonosov Moscow State University - www.msu.ru
+ *                   All rights reserved.
+ *
+ * Copyright (c) 2014 The FreeBSD Foundation
+ * All rights reserved.
+ * Portions of this software were developed by David Chisnall
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _HANGUL_H_
+#define _HANGUL_H_
+
+
+#define HANGUL_LEAD_TYPE				1
+#define HANGUL_VOWEL_TYPE				2
+#define HANGUL_TRAIL_TYPE				3
+#define HANGUL_LEAD_VOWEL_TYPE			4
+#define HANGUL_LEAD_VOWEL_TRAIL_TYPE	5
+
+
+/* The source code was automatically generated from HangulSyllableType.txt.
+ * This function is used to check type of the Hangul syllables. */
+static int hangul_syllable(wchar_t code)
+{
+	if (((0x1100 <= code) && (code <= 0x115F))
+	||  ((0xA960 <= code) && (code <= 0xA97C)))
+		return HANGUL_LEAD_TYPE;
+
+	if (((0x1160 <= code) && (code <= 0x11A7))
+	||  ((0xD7B0 <= code) && (code <= 0xD7C6)))
+		return HANGUL_VOWEL_TYPE;
+
+	if (((0x11A8 <= code) && (code <= 0x11FF))
+	||  ((0xD7CB <= code) && (code <= 0xD7FB)))
+		return HANGUL_TRAIL_TYPE;
+
+	if ((code == 0xAC00)|| (code == 0xAC1C) || (code == 0xAC38)
+	||  (code == 0xAC54) || (code == 0xAC70) || (code == 0xAC8C)
+	||  (code == 0xACA8) || (code == 0xACC4) || (code == 0xACE0)
+	||  (code == 0xACFC) || (code == 0xAD18) || (code == 0xAD34)
+	||  (code == 0xAD50) || (code == 0xAD6C) || (code == 0xAD88)
+	||  (code == 0xADA4) || (code == 0xADC0) || (code == 0xADDC)
+	||  (code == 0xADF8) || (code == 0xAE14) || (code == 0xAE30)
+	||  (code == 0xAE4C) || (code == 0xAE68) || (code == 0xAE84)
+	||  (code == 0xAEA0) || (code == 0xAEBC) || (code == 0xAED8)
+	||  (code == 0xAEF4) || (code == 0xAF10) || (code == 0xAF2C)
+	||  (code == 0xAF48) || (code == 0xAF64) || (code == 0xAF80)
+	||  (code == 0xAF9C) || (code == 0xAFB8) || (code == 0xAFD4)
+	||  (code == 0xAFF0) || (code == 0xB00C) || (code == 0xB028)
+	||  (code == 0xB044) || (code == 0xB060) || (code == 0xB07C)
+	||  (code == 0xB098) || (code == 0xB0B4) || (code == 0xB0D0)
+	||  (code == 0xB0EC) || (code == 0xB108) || (code == 0xB124)
+	||  (code == 0xB140) || (code == 0xB15C) || (code == 0xB178)
+	||  (code == 0xB194) || (code == 0xB1B0) || (code == 0xB1CC)
+	||  (code == 0xB1E8) || (code == 0xB204) || (code == 0xB220)
+	||  (code == 0xB23C) || (code == 0xB258) || (code == 0xB274)
+	||  (code == 0xB290) || (code == 0xB2AC) || (code == 0xB2C8)
+	||  (code == 0xB2E4) || (code == 0xB300) || (code == 0xB31C)
+	||  (code == 0xB338) || (code == 0xB354) || (code == 0xB370)
+	||  (code == 0xB38C) || (code == 0xB3A8) || (code == 0xB3C4)
+	||  (code == 0xB3E0) || (code == 0xB3FC) || (code == 0xB418)
+	||  (code == 0xB434) || (code == 0xB450) || (code == 0xB46C)
+	||  (code == 0xB488) || (code == 0xB4A4) || (code == 0xB4C0)
+	||  (code == 0xB4DC) || (code == 0xB4F8) || (code == 0xB514)
+	||  (code == 0xB530) || (code == 0xB54C) || (code == 0xB568)
+	||  (code == 0xB584) || (code == 0xB5A0) || (code == 0xB5BC)
+	||  (code == 0xB5D8) || (code == 0xB5F4) || (code == 0xB610)
+	||  (code == 0xB62C) || (code == 0xB648) || (code == 0xB664)
+	||  (code == 0xB680) || (code == 0xB69C) || (code == 0xB6B8)
+	||  (code == 0xB6D4) || (code == 0xB6F0) || (code == 0xB70C)
+	||  (code == 0xB728) || (code == 0xB744) || (code == 0xB760)
+	||  (code == 0xB77C) || (code == 0xB798) || (code == 0xB7B4)
+	||  (code == 0xB7D0) || (code == 0xB7EC) || (code == 0xB808)
+	||  (code == 0xB824) || (code == 0xB840) || (code == 0xB85C)
+	||  (code == 0xB878) || (code == 0xB894) || (code == 0xB8B0)
+	||  (code == 0xB8CC) || (code == 0xB8E8) || (code == 0xB904)
+	||  (code == 0xB920) || (code == 0xB93C) || (code == 0xB958)
+	||  (code == 0xB974) || (code == 0xB990) || (code == 0xB9AC)
+	||  (code == 0xB9C8) || (code == 0xB9E4) || (code == 0xBA00)
+	||  (code == 0xBA1C) || (code == 0xBA38) || (code == 0xBA54)
+	||  (code == 0xBA70) || (code == 0xBA8C) || (code == 0xBAA8)
+	||  (code == 0xBAC4) || (code == 0xBAE0) || (code == 0xBAFC)
+	||  (code == 0xBB18) || (code == 0xBB34) || (code == 0xBB50)
+	||  (code == 0xBB6C) || (code == 0xBB88) || (code == 0xBBA4)
+	||  (code == 0xBBC0) || (code == 0xBBDC) || (code == 0xBBF8)
+	||  (code == 0xBC14) || (code == 0xBC30) || (code == 0xBC4C)
+	||  (code == 0xBC68) || (code == 0xBC84) || (code == 0xBCA0)
+	||  (code == 0xBCBC) || (code == 0xBCD8) || (code == 0xBCF4)
+	||  (code == 0xBD10) || (code == 0xBD2C) || (code == 0xBD48)
+	||  (code == 0xBD64) || (code == 0xBD80) || (code == 0xBD9C)
+	||  (code == 0xBDB8) || (code == 0xBDD4) || (code == 0xBDF0)
+	||  (code == 0xBE0C) || (code == 0xBE28) || (code == 0xBE44)
+	||  (code == 0xBE60) || (code == 0xBE7C) || (code == 0xBE98)
+	||  (code == 0xBEB4) || (code == 0xBED0) || (code == 0xBEEC)
+	||  (code == 0xBF08) || (code == 0xBF24) || (code == 0xBF40)
+	||  (code == 0xBF5C) || (code == 0xBF78) || (code == 0xBF94)
+	||  (code == 0xBFB0) || (code == 0xBFCC) || (code == 0xBFE8)
+	||  (code == 0xC004) || (code == 0xC020) || (code == 0xC03C)
+	||  (code == 0xC058) || (code == 0xC074) || (code == 0xC090)
+	||  (code == 0xC0AC) || (code == 0xC0C8) || (code == 0xC0E4)
+	||  (code == 0xC100) || (code == 0xC11C) || (code == 0xC138)
+	||  (code == 0xC154) || (code == 0xC170) || (code == 0xC18C)
+	||  (code == 0xC1A8) || (code == 0xC1C4) || (code == 0xC1E0)
+	||  (code == 0xC1FC) || (code == 0xC218) || (code == 0xC234)
+	||  (code == 0xC250) || (code == 0xC26C) || (code == 0xC288)
+	||  (code == 0xC2A4) || (code == 0xC2C0) || (code == 0xC2DC)
+	||  (code == 0xC2F8) || (code == 0xC314) || (code == 0xC330)
+	||  (code == 0xC34C) || (code == 0xC368) || (code == 0xC384)
+	||  (code == 0xC3A0) || (code == 0xC3BC) || (code == 0xC3D8)
+	||  (code == 0xC3F4) || (code == 0xC410) || (code == 0xC42C)
+	||  (code == 0xC448) || (code == 0xC464) || (code == 0xC480)
+	||  (code == 0xC49C) || (code == 0xC4B8) || (code == 0xC4D4)
+	||  (code == 0xC4F0) || (code == 0xC50C) || (code == 0xC528)
+	||  (code == 0xC544) || (code == 0xC560) || (code == 0xC57C)
+	||  (code == 0xC598) || (code == 0xC5B4) || (code == 0xC5D0)
+	||  (code == 0xC5EC) || (code == 0xC608) || (code == 0xC624)
+	||  (code == 0xC640) || (code == 0xC65C) || (code == 0xC678)
+	||  (code == 0xC694) || (code == 0xC6B0) || (code == 0xC6CC)
+	||  (code == 0xC6E8) || (code == 0xC704) || (code == 0xC720)
+	||  (code == 0xC73C) || (code == 0xC758) || (code == 0xC774)
+	||  (code == 0xC790) || (code == 0xC7AC) || (code == 0xC7C8)
+	||  (code == 0xC7E4) || (code == 0xC800) || (code == 0xC81C)
+	||  (code == 0xC838) || (code == 0xC854) || (code == 0xC870)
+	||  (code == 0xC88C) || (code == 0xC8A8) || (code == 0xC8C4)
+	||  (code == 0xC8E0) || (code == 0xC8FC) || (code == 0xC918)
+	||  (code == 0xC934) || (code == 0xC950) || (code == 0xC96C)
+	||  (code == 0xC988) || (code == 0xC9A4) || (code == 0xC9C0)
+	||  (code == 0xC9DC) || (code == 0xC9F8) || (code == 0xCA14)
+	||  (code == 0xCA30) || (code == 0xCA4C) || (code == 0xCA68)
+	||  (code == 0xCA84) || (code == 0xCAA0) || (code == 0xCABC)
+	||  (code == 0xCAD8) || (code == 0xCAF4) || (code == 0xCB10)
+	||  (code == 0xCB2C) || (code == 0xCB48) || (code == 0xCB64)
+	||  (code == 0xCB80) || (code == 0xCB9C) || (code == 0xCBB8)
+	||  (code == 0xCBD4) || (code == 0xCBF0) || (code == 0xCC0C)
+	||  (code == 0xCC28) || (code == 0xCC44) || (code == 0xCC60)
+	||  (code == 0xCC7C) || (code == 0xCC98) || (code == 0xCCB4)
+	||  (code == 0xCCD0) || (code == 0xCCEC) || (code == 0xCD08)
+	||  (code == 0xCD24) || (code == 0xCD40) || (code == 0xCD5C)
+	||  (code == 0xCD78) || (code == 0xCD94) || (code == 0xCDB0)
+	||  (code == 0xCDCC) || (code == 0xCDE8) || (code == 0xCE04)
+	||  (code == 0xCE20) || (code == 0xCE3C) || (code == 0xCE58)
+	||  (code == 0xCE74) || (code == 0xCE90) || (code == 0xCEAC)
+	||  (code == 0xCEC8) || (code == 0xCEE4) || (code == 0xCF00)
+	||  (code == 0xCF1C) || (code == 0xCF38) || (code == 0xCF54)
+	||  (code == 0xCF70) || (code == 0xCF8C) || (code == 0xCFA8)
+	||  (code == 0xCFC4) || (code == 0xCFE0) || (code == 0xCFFC)
+	||  (code == 0xD018) || (code == 0xD034) || (code == 0xD050)
+	||  (code == 0xD06C) || (code == 0xD088) || (code == 0xD0A4)
+	||  (code == 0xD0C0) || (code == 0xD0DC) || (code == 0xD0F8)
+	||  (code == 0xD114) || (code == 0xD130) || (code == 0xD14C)
+	||  (code == 0xD168) || (code == 0xD184) || (code == 0xD1A0)
+	||  (code == 0xD1BC) || (code == 0xD1D8) || (code == 0xD1F4)
+	||  (code == 0xD210) || (code == 0xD22C) || (code == 0xD248)
+	||  (code == 0xD264) || (code == 0xD280) || (code == 0xD29C)
+	||  (code == 0xD2B8) || (code == 0xD2D4) || (code == 0xD2F0)
+	||  (code == 0xD30C) || (code == 0xD328) || (code == 0xD344)
+	||  (code == 0xD360) || (code == 0xD37C) || (code == 0xD398)
+	||  (code == 0xD3B4) || (code == 0xD3D0) || (code == 0xD3EC)
+	||  (code == 0xD408) || (code == 0xD424) || (code == 0xD440)
+	||  (code == 0xD45C) || (code == 0xD478) || (code == 0xD494)
+	||  (code == 0xD4B0) || (code == 0xD4CC) || (code == 0xD4E8)
+	||  (code == 0xD504) || (code == 0xD520) || (code == 0xD53C)
+	||  (code == 0xD558) || (code == 0xD574) || (code == 0xD590)
+	||  (code == 0xD5AC) || (code == 0xD5C8) || (code == 0xD5E4)
+	||  (code == 0xD600) || (code == 0xD61C) || (code == 0xD638)
+	||  (code == 0xD654) || (code == 0xD670) || (code == 0xD68C)
+	||  (code == 0xD6A8) || (code == 0xD6C4) || (code == 0xD6E0)
+	||  (code == 0xD6FC) || (code == 0xD718) || (code == 0xD734)
+	||  (code == 0xD750) || (code == 0xD76C) || (code == 0xD788))
+		return HANGUL_LEAD_VOWEL_TYPE;
+
+	if (((0xAC01 <= code) && (code <= 0xAC1B))
+	||  ((0xAC1D <= code) && (code <= 0xAC37))
+	||  ((0xAC39 <= code) && (code <= 0xAC53))
+	||  ((0xAC55 <= code) && (code <= 0xAC6F))
+	||  ((0xAC71 <= code) && (code <= 0xAC8B))
+	||  ((0xAC8D <= code) && (code <= 0xACA7))
+	||  ((0xACA9 <= code) && (code <= 0xACC3))
+	||  ((0xACC5 <= code) && (code <= 0xACDF))
+	||  ((0xACE1 <= code) && (code <= 0xACFB))
+	||  ((0xACFD <= code) && (code <= 0xAD17))
+	||  ((0xAD19 <= code) && (code <= 0xAD33))
+	||  ((0xAD35 <= code) && (code <= 0xAD4F))
+	||  ((0xAD51 <= code) && (code <= 0xAD6B))
+	||  ((0xAD6D <= code) && (code <= 0xAD87))
+	||  ((0xAD89 <= code) && (code <= 0xADA3))
+	||  ((0xADA5 <= code) && (code <= 0xADBF))
+	||  ((0xADC1 <= code) && (code <= 0xADDB))
+	||  ((0xADDD <= code) && (code <= 0xADF7))
+	||  ((0xADF9 <= code) && (code <= 0xAE13))
+	||  ((0xAE15 <= code) && (code <= 0xAE2F))
+	||  ((0xAE31 <= code) && (code <= 0xAE4B))
+	||  ((0xAE4D <= code) && (code <= 0xAE67))
+	||  ((0xAE69 <= code) && (code <= 0xAE83))
+	||  ((0xAE85 <= code) && (code <= 0xAE9F))
+	||  ((0xAEA1 <= code) && (code <= 0xAEBB))
+	||  ((0xAEBD <= code) && (code <= 0xAED7))
+	||  ((0xAED9 <= code) && (code <= 0xAEF3))
+	||  ((0xAEF5 <= code) && (code <= 0xAF0F))
+	||  ((0xAF11 <= code) && (code <= 0xAF2B))
+	||  ((0xAF2D <= code) && (code <= 0xAF47))
+	||  ((0xAF49 <= code) && (code <= 0xAF63))
+	||  ((0xAF65 <= code) && (code <= 0xAF7F))
+	||  ((0xAF81 <= code) && (code <= 0xAF9B))
+	||  ((0xAF9D <= code) && (code <= 0xAFB7))
+	||  ((0xAFB9 <= code) && (code <= 0xAFD3))
+	||  ((0xAFD5 <= code) && (code <= 0xAFEF))
+	||  ((0xAFF1 <= code) && (code <= 0xB00B))
+	||  ((0xB00D <= code) && (code <= 0xB027))
+	||  ((0xB029 <= code) && (code <= 0xB043))
+	||  ((0xB045 <= code) && (code <= 0xB05F))
+	||  ((0xB061 <= code) && (code <= 0xB07B))
+	||  ((0xB07D <= code) && (code <= 0xB097))
+	||  ((0xB099 <= code) && (code <= 0xB0B3))
+	||  ((0xB0B5 <= code) && (code <= 0xB0CF))
+	||  ((0xB0D1 <= code) && (code <= 0xB0EB))
+	||  ((0xB0ED <= code) && (code <= 0xB107))
+	||  ((0xB109 <= code) && (code <= 0xB123))
+	||  ((0xB125 <= code) && (code <= 0xB13F))
+	||  ((0xB141 <= code) && (code <= 0xB15B))
+	||  ((0xB15D <= code) && (code <= 0xB177))
+	||  ((0xB179 <= code) && (code <= 0xB193))
+	||  ((0xB195 <= code) && (code <= 0xB1AF))
+	||  ((0xB1B1 <= code) && (code <= 0xB1CB))
+	||  ((0xB1CD <= code) && (code <= 0xB1E7))
+	||  ((0xB1E9 <= code) && (code <= 0xB203))
+	||  ((0xB205 <= code) && (code <= 0xB21F))
+	||  ((0xB221 <= code) && (code <= 0xB23B))
+	||  ((0xB23D <= code) && (code <= 0xB257))
+	||  ((0xB259 <= code) && (code <= 0xB273))
+	||  ((0xB275 <= code) && (code <= 0xB28F))
+	||  ((0xB291 <= code) && (code <= 0xB2AB))
+	||  ((0xB2AD <= code) && (code <= 0xB2C7))
+	||  ((0xB2C9 <= code) && (code <= 0xB2E3))
+	||  ((0xB2E5 <= code) && (code <= 0xB2FF))
+	||  ((0xB301 <= code) && (code <= 0xB31B))
+	||  ((0xB31D <= code) && (code <= 0xB337))
+	||  ((0xB339 <= code) && (code <= 0xB353))
+	||  ((0xB355 <= code) && (code <= 0xB36F))
+	||  ((0xB371 <= code) && (code <= 0xB38B))
+	||  ((0xB38D <= code) && (code <= 0xB3A7))
+	||  ((0xB3A9 <= code) && (code <= 0xB3C3))
+	||  ((0xB3C5 <= code) && (code <= 0xB3DF))
+	||  ((0xB3E1 <= code) && (code <= 0xB3FB))
+	||  ((0xB3FD <= code) && (code <= 0xB417))
+	||  ((0xB419 <= code) && (code <= 0xB433))
+	||  ((0xB435 <= code) && (code <= 0xB44F))
+	||  ((0xB451 <= code) && (code <= 0xB46B))
+	||  ((0xB46D <= code) && (code <= 0xB487))
+	||  ((0xB489 <= code) && (code <= 0xB4A3))
+	||  ((0xB4A5 <= code) && (code <= 0xB4BF))
+	||  ((0xB4C1 <= code) && (code <= 0xB4DB))
+	||  ((0xB4DD <= code) && (code <= 0xB4F7))
+	||  ((0xB4F9 <= code) && (code <= 0xB513))
+	||  ((0xB515 <= code) && (code <= 0xB52F))
+	||  ((0xB531 <= code) && (code <= 0xB54B))
+	||  ((0xB54D <= code) && (code <= 0xB567))
+	||  ((0xB569 <= code) && (code <= 0xB583))
+	||  ((0xB585 <= code) && (code <= 0xB59F))
+	||  ((0xB5A1 <= code) && (code <= 0xB5BB))
+	||  ((0xB5BD <= code) && (code <= 0xB5D7))
+	||  ((0xB5D9 <= code) && (code <= 0xB5F3))
+	||  ((0xB5F5 <= code) && (code <= 0xB60F))
+	||  ((0xB611 <= code) && (code <= 0xB62B))
+	||  ((0xB62D <= code) && (code <= 0xB647))
+	||  ((0xB649 <= code) && (code <= 0xB663))
+	||  ((0xB665 <= code) && (code <= 0xB67F))
+	||  ((0xB681 <= code) && (code <= 0xB69B))
+	||  ((0xB69D <= code) && (code <= 0xB6B7))
+	||  ((0xB6B9 <= code) && (code <= 0xB6D3))
+	||  ((0xB6D5 <= code) && (code <= 0xB6EF))
+	||  ((0xB6F1 <= code) && (code <= 0xB70B))
+	||  ((0xB70D <= code) && (code <= 0xB727))
+	||  ((0xB729 <= code) && (code <= 0xB743))
+	||  ((0xB745 <= code) && (code <= 0xB75F))
+	||  ((0xB761 <= code) && (code <= 0xB77B))
+	||  ((0xB77D <= code) && (code <= 0xB797))
+	||  ((0xB799 <= code) && (code <= 0xB7B3))
+	||  ((0xB7B5 <= code) && (code <= 0xB7CF))
+	||  ((0xB7D1 <= code) && (code <= 0xB7EB))
+	||  ((0xB7ED <= code) && (code <= 0xB807))
+	||  ((0xB809 <= code) && (code <= 0xB823))
+	||  ((0xB825 <= code) && (code <= 0xB83F))
+	||  ((0xB841 <= code) && (code <= 0xB85B))
+	||  ((0xB85D <= code) && (code <= 0xB877))
+	||  ((0xB879 <= code) && (code <= 0xB893))
+	||  ((0xB895 <= code) && (code <= 0xB8AF))
+	||  ((0xB8B1 <= code) && (code <= 0xB8CB))
+	||  ((0xB8CD <= code) && (code <= 0xB8E7))
+	||  ((0xB8E9 <= code) && (code <= 0xB903))
+	||  ((0xB905 <= code) && (code <= 0xB91F))
+	||  ((0xB921 <= code) && (code <= 0xB93B))
+	||  ((0xB93D <= code) && (code <= 0xB957))
+	||  ((0xB959 <= code) && (code <= 0xB973))
+	||  ((0xB975 <= code) && (code <= 0xB98F))
+	||  ((0xB991 <= code) && (code <= 0xB9AB))
+	||  ((0xB9AD <= code) && (code <= 0xB9C7))
+	||  ((0xB9C9 <= code) && (code <= 0xB9E3))
+	||  ((0xB9E5 <= code) && (code <= 0xB9FF))
+	||  ((0xBA01 <= code) && (code <= 0xBA1B))
+	||  ((0xBA1D <= code) && (code <= 0xBA37))
+	||  ((0xBA39 <= code) && (code <= 0xBA53))
+	||  ((0xBA55 <= code) && (code <= 0xBA6F))
+	||  ((0xBA71 <= code) && (code <= 0xBA8B))
+	||  ((0xBA8D <= code) && (code <= 0xBAA7))
+	||  ((0xBAA9 <= code) && (code <= 0xBAC3))
+	||  ((0xBAC5 <= code) && (code <= 0xBADF))
+	||  ((0xBAE1 <= code) && (code <= 0xBAFB))
+	||  ((0xBAFD <= code) && (code <= 0xBB17))
+	||  ((0xBB19 <= code) && (code <= 0xBB33))
+	||  ((0xBB35 <= code) && (code <= 0xBB4F))
+	||  ((0xBB51 <= code) && (code <= 0xBB6B))
+	||  ((0xBB6D <= code) && (code <= 0xBB87))
+	||  ((0xBB89 <= code) && (code <= 0xBBA3))
+	||  ((0xBBA5 <= code) && (code <= 0xBBBF))
+	||  ((0xBBC1 <= code) && (code <= 0xBBDB))
+	||  ((0xBBDD <= code) && (code <= 0xBBF7))
+	||  ((0xBBF9 <= code) && (code <= 0xBC13))
+	||  ((0xBC15 <= code) && (code <= 0xBC2F))
+	||  ((0xBC31 <= code) && (code <= 0xBC4B))
+	||  ((0xBC4D <= code) && (code <= 0xBC67))
+	||  ((0xBC69 <= code) && (code <= 0xBC83))
+	||  ((0xBC85 <= code) && (code <= 0xBC9F))
+	||  ((0xBCA1 <= code) && (code <= 0xBCBB))
+	||  ((0xBCBD <= code) && (code <= 0xBCD7))
+	||  ((0xBCD9 <= code) && (code <= 0xBCF3))
+	||  ((0xBCF5 <= code) && (code <= 0xBD0F))
+	||  ((0xBD11 <= code) && (code <= 0xBD2B))
+	||  ((0xBD2D <= code) && (code <= 0xBD47))
+	||  ((0xBD49 <= code) && (code <= 0xBD63))
+	||  ((0xBD65 <= code) && (code <= 0xBD7F))
+	||  ((0xBD81 <= code) && (code <= 0xBD9B))
+	||  ((0xBD9D <= code) && (code <= 0xBDB7))
+	||  ((0xBDB9 <= code) && (code <= 0xBDD3))
+	||  ((0xBDD5 <= code) && (code <= 0xBDEF))
+	||  ((0xBDF1 <= code) && (code <= 0xBE0B))
+	||  ((0xBE0D <= code) && (code <= 0xBE27))
+	||  ((0xBE29 <= code) && (code <= 0xBE43))
+	||  ((0xBE45 <= code) && (code <= 0xBE5F))
+	||  ((0xBE61 <= code) && (code <= 0xBE7B))
+	||  ((0xBE7D <= code) && (code <= 0xBE97))
+	||  ((0xBE99 <= code) && (code <= 0xBEB3))
+	||  ((0xBEB5 <= code) && (code <= 0xBECF))
+	||  ((0xBED1 <= code) && (code <= 0xBEEB))
+	||  ((0xBEED <= code) && (code <= 0xBF07))
+	||  ((0xBF09 <= code) && (code <= 0xBF23))
+	||  ((0xBF25 <= code) && (code <= 0xBF3F))
+	||  ((0xBF41 <= code) && (code <= 0xBF5B))
+	||  ((0xBF5D <= code) && (code <= 0xBF77))
+	||  ((0xBF79 <= code) && (code <= 0xBF93))
+	||  ((0xBF95 <= code) && (code <= 0xBFAF))
+	||  ((0xBFB1 <= code) && (code <= 0xBFCB))
+	||  ((0xBFCD <= code) && (code <= 0xBFE7))
+	||  ((0xBFE9 <= code) && (code <= 0xC003))
+	||  ((0xC005 <= code) && (code <= 0xC01F))
+	||  ((0xC021 <= code) && (code <= 0xC03B))
+	||  ((0xC03D <= code) && (code <= 0xC057))
+	||  ((0xC059 <= code) && (code <= 0xC073))
+	||  ((0xC075 <= code) && (code <= 0xC08F))
+	||  ((0xC091 <= code) && (code <= 0xC0AB))
+	||  ((0xC0AD <= code) && (code <= 0xC0C7))
+	||  ((0xC0C9 <= code) && (code <= 0xC0E3))
+	||  ((0xC0E5 <= code) && (code <= 0xC0FF))
+	||  ((0xC101 <= code) && (code <= 0xC11B))
+	||  ((0xC11D <= code) && (code <= 0xC137))
+	||  ((0xC139 <= code) && (code <= 0xC153))
+	||  ((0xC155 <= code) && (code <= 0xC16F))
+	||  ((0xC171 <= code) && (code <= 0xC18B))
+	||  ((0xC18D <= code) && (code <= 0xC1A7))
+	||  ((0xC1A9 <= code) && (code <= 0xC1C3))
+	||  ((0xC1C5 <= code) && (code <= 0xC1DF))
+	||  ((0xC1E1 <= code) && (code <= 0xC1FB))
+	||  ((0xC1FD <= code) && (code <= 0xC217))
+	||  ((0xC219 <= code) && (code <= 0xC233))
+	||  ((0xC235 <= code) && (code <= 0xC24F))
+	||  ((0xC251 <= code) && (code <= 0xC26B))
+	||  ((0xC26D <= code) && (code <= 0xC287))
+	||  ((0xC289 <= code) && (code <= 0xC2A3))
+	||  ((0xC2A5 <= code) && (code <= 0xC2BF))
+	||  ((0xC2C1 <= code) && (code <= 0xC2DB))
+	||  ((0xC2DD <= code) && (code <= 0xC2F7))
+	||  ((0xC2F9 <= code) && (code <= 0xC313))
+	||  ((0xC315 <= code) && (code <= 0xC32F))
+	||  ((0xC331 <= code) && (code <= 0xC34B))
+	||  ((0xC34D <= code) && (code <= 0xC367))
+	||  ((0xC369 <= code) && (code <= 0xC383))
+	||  ((0xC385 <= code) && (code <= 0xC39F))
+	||  ((0xC3A1 <= code) && (code <= 0xC3BB))
+	||  ((0xC3BD <= code) && (code <= 0xC3D7))
+	||  ((0xC3D9 <= code) && (code <= 0xC3F3))
+	||  ((0xC3F5 <= code) && (code <= 0xC40F))
+	||  ((0xC411 <= code) && (code <= 0xC42B))
+	||  ((0xC42D <= code) && (code <= 0xC447))
+	||  ((0xC449 <= code) && (code <= 0xC463))
+	||  ((0xC465 <= code) && (code <= 0xC47F))
+	||  ((0xC481 <= code) && (code <= 0xC49B))
+	||  ((0xC49D <= code) && (code <= 0xC4B7))
+	||  ((0xC4B9 <= code) && (code <= 0xC4D3))
+	||  ((0xC4D5 <= code) && (code <= 0xC4EF))
+	||  ((0xC4F1 <= code) && (code <= 0xC50B))
+	||  ((0xC50D <= code) && (code <= 0xC527))
+	||  ((0xC529 <= code) && (code <= 0xC543))
+	||  ((0xC545 <= code) && (code <= 0xC55F))
+	||  ((0xC561 <= code) && (code <= 0xC57B))
+	||  ((0xC57D <= code) && (code <= 0xC597))
+	||  ((0xC599 <= code) && (code <= 0xC5B3))
+	||  ((0xC5B5 <= code) && (code <= 0xC5CF))
+	||  ((0xC5D1 <= code) && (code <= 0xC5EB))
+	||  ((0xC5ED <= code) && (code <= 0xC607))
+	||  ((0xC609 <= code) && (code <= 0xC623))
+	||  ((0xC625 <= code) && (code <= 0xC63F))
+	||  ((0xC641 <= code) && (code <= 0xC65B))
+	||  ((0xC65D <= code) && (code <= 0xC677))
+	||  ((0xC679 <= code) && (code <= 0xC693))
+	||  ((0xC695 <= code) && (code <= 0xC6AF))
+	||  ((0xC6B1 <= code) && (code <= 0xC6CB))
+	||  ((0xC6CD <= code) && (code <= 0xC6E7))
+	||  ((0xC6E9 <= code) && (code <= 0xC703))
+	||  ((0xC705 <= code) && (code <= 0xC71F))
+	||  ((0xC721 <= code) && (code <= 0xC73B))
+	||  ((0xC73D <= code) && (code <= 0xC757))
+	||  ((0xC759 <= code) && (code <= 0xC773))
+	||  ((0xC775 <= code) && (code <= 0xC78F))
+	||  ((0xC791 <= code) && (code <= 0xC7AB))
+	||  ((0xC7AD <= code) && (code <= 0xC7C7))
+	||  ((0xC7C9 <= code) && (code <= 0xC7E3))
+	||  ((0xC7E5 <= code) && (code <= 0xC7FF))
+	||  ((0xC801 <= code) && (code <= 0xC81B))
+	||  ((0xC81D <= code) && (code <= 0xC837))
+	||  ((0xC839 <= code) && (code <= 0xC853))
+	||  ((0xC855 <= code) && (code <= 0xC86F))
+	||  ((0xC871 <= code) && (code <= 0xC88B))
+	||  ((0xC88D <= code) && (code <= 0xC8A7))
+	||  ((0xC8A9 <= code) && (code <= 0xC8C3))
+	||  ((0xC8C5 <= code) && (code <= 0xC8DF))
+	||  ((0xC8E1 <= code) && (code <= 0xC8FB))
+	||  ((0xC8FD <= code) && (code <= 0xC917))
+	||  ((0xC919 <= code) && (code <= 0xC933))
+	||  ((0xC935 <= code) && (code <= 0xC94F))
+	||  ((0xC951 <= code) && (code <= 0xC96B))
+	||  ((0xC96D <= code) && (code <= 0xC987))
+	||  ((0xC989 <= code) && (code <= 0xC9A3))
+	||  ((0xC9A5 <= code) && (code <= 0xC9BF))
+	||  ((0xC9C1 <= code) && (code <= 0xC9DB))
+	||  ((0xC9DD <= code) && (code <= 0xC9F7))
+	||  ((0xC9F9 <= code) && (code <= 0xCA13))
+	||  ((0xCA15 <= code) && (code <= 0xCA2F))
+	||  ((0xCA31 <= code) && (code <= 0xCA4B))
+	||  ((0xCA4D <= code) && (code <= 0xCA67))
+	||  ((0xCA69 <= code) && (code <= 0xCA83))
+	||  ((0xCA85 <= code) && (code <= 0xCA9F))
+	||  ((0xCAA1 <= code) && (code <= 0xCABB))
+	||  ((0xCABD <= code) && (code <= 0xCAD7))
+	||  ((0xCAD9 <= code) && (code <= 0xCAF3))
+	||  ((0xCAF5 <= code) && (code <= 0xCB0F))
+	||  ((0xCB11 <= code) && (code <= 0xCB2B))
+	||  ((0xCB2D <= code) && (code <= 0xCB47))
+	||  ((0xCB49 <= code) && (code <= 0xCB63))
+	||  ((0xCB65 <= code) && (code <= 0xCB7F))
+	||  ((0xCB81 <= code) && (code <= 0xCB9B))
+	||  ((0xCB9D <= code) && (code <= 0xCBB7))
+	||  ((0xCBB9 <= code) && (code <= 0xCBD3))
+	||  ((0xCBD5 <= code) && (code <= 0xCBEF))
+	||  ((0xCBF1 <= code) && (code <= 0xCC0B))
+	||  ((0xCC0D <= code) && (code <= 0xCC27))
+	||  ((0xCC29 <= code) && (code <= 0xCC43))
+	||  ((0xCC45 <= code) && (code <= 0xCC5F))
+	||  ((0xCC61 <= code) && (code <= 0xCC7B))
+	||  ((0xCC7D <= code) && (code <= 0xCC97))
+	||  ((0xCC99 <= code) && (code <= 0xCCB3))
+	||  ((0xCCB5 <= code) && (code <= 0xCCCF))
+	||  ((0xCCD1 <= code) && (code <= 0xCCEB))
+	||  ((0xCCED <= code) && (code <= 0xCD07))
+	||  ((0xCD09 <= code) && (code <= 0xCD23))
+	||  ((0xCD25 <= code) && (code <= 0xCD3F))
+	||  ((0xCD41 <= code) && (code <= 0xCD5B))
+	||  ((0xCD5D <= code) && (code <= 0xCD77))
+	||  ((0xCD79 <= code) && (code <= 0xCD93))
+	||  ((0xCD95 <= code) && (code <= 0xCDAF))
+	||  ((0xCDB1 <= code) && (code <= 0xCDCB))
+	||  ((0xCDCD <= code) && (code <= 0xCDE7))
+	||  ((0xCDE9 <= code) && (code <= 0xCE03))
+	||  ((0xCE05 <= code) && (code <= 0xCE1F))
+	||  ((0xCE21 <= code) && (code <= 0xCE3B))
+	||  ((0xCE3D <= code) && (code <= 0xCE57))
+	||  ((0xCE59 <= code) && (code <= 0xCE73))
+	||  ((0xCE75 <= code) && (code <= 0xCE8F))
+	||  ((0xCE91 <= code) && (code <= 0xCEAB))
+	||  ((0xCEAD <= code) && (code <= 0xCEC7))
+	||  ((0xCEC9 <= code) && (code <= 0xCEE3))
+	||  ((0xCEE5 <= code) && (code <= 0xCEFF))
+	||  ((0xCF01 <= code) && (code <= 0xCF1B))
+	||  ((0xCF1D <= code) && (code <= 0xCF37))
+	||  ((0xCF39 <= code) && (code <= 0xCF53))
+	||  ((0xCF55 <= code) && (code <= 0xCF6F))
+	||  ((0xCF71 <= code) && (code <= 0xCF8B))
+	||  ((0xCF8D <= code) && (code <= 0xCFA7))
+	||  ((0xCFA9 <= code) && (code <= 0xCFC3))
+	||  ((0xCFC5 <= code) && (code <= 0xCFDF))
+	||  ((0xCFE1 <= code) && (code <= 0xCFFB))
+	||  ((0xCFFD <= code) && (code <= 0xD017))
+	||  ((0xD019 <= code) && (code <= 0xD033))
+	||  ((0xD035 <= code) && (code <= 0xD04F))
+	||  ((0xD051 <= code) && (code <= 0xD06B))
+	||  ((0xD06D <= code) && (code <= 0xD087))
+	||  ((0xD089 <= code) && (code <= 0xD0A3))
+	||  ((0xD0A5 <= code) && (code <= 0xD0BF))
+	||  ((0xD0C1 <= code) && (code <= 0xD0DB))
+	||  ((0xD0DD <= code) && (code <= 0xD0F7))
+	||  ((0xD0F9 <= code) && (code <= 0xD113))
+	||  ((0xD115 <= code) && (code <= 0xD12F))
+	||  ((0xD131 <= code) && (code <= 0xD14B))
+	||  ((0xD14D <= code) && (code <= 0xD167))
+	||  ((0xD169 <= code) && (code <= 0xD183))
+	||  ((0xD185 <= code) && (code <= 0xD19F))
+	||  ((0xD1A1 <= code) && (code <= 0xD1BB))
+	||  ((0xD1BD <= code) && (code <= 0xD1D7))
+	||  ((0xD1D9 <= code) && (code <= 0xD1F3))
+	||  ((0xD1F5 <= code) && (code <= 0xD20F))
+	||  ((0xD211 <= code) && (code <= 0xD22B))
+	||  ((0xD22D <= code) && (code <= 0xD247))
+	||  ((0xD249 <= code) && (code <= 0xD263))
+	||  ((0xD265 <= code) && (code <= 0xD27F))
+	||  ((0xD281 <= code) && (code <= 0xD29B))
+	||  ((0xD29D <= code) && (code <= 0xD2B7))
+	||  ((0xD2B9 <= code) && (code <= 0xD2D3))
+	||  ((0xD2D5 <= code) && (code <= 0xD2EF))
+	||  ((0xD2F1 <= code) && (code <= 0xD30B))
+	||  ((0xD30D <= code) && (code <= 0xD327))
+	||  ((0xD329 <= code) && (code <= 0xD343))
+	||  ((0xD345 <= code) && (code <= 0xD35F))
+	||  ((0xD361 <= code) && (code <= 0xD37B))
+	||  ((0xD37D <= code) && (code <= 0xD397))
+	||  ((0xD399 <= code) && (code <= 0xD3B3))
+	||  ((0xD3B5 <= code) && (code <= 0xD3CF))
+	||  ((0xD3D1 <= code) && (code <= 0xD3EB))
+	||  ((0xD3ED <= code) && (code <= 0xD407))
+	||  ((0xD409 <= code) && (code <= 0xD423))
+	||  ((0xD425 <= code) && (code <= 0xD43F))
+	||  ((0xD441 <= code) && (code <= 0xD45B))
+	||  ((0xD45D <= code) && (code <= 0xD477))
+	||  ((0xD479 <= code) && (code <= 0xD493))
+	||  ((0xD495 <= code) && (code <= 0xD4AF))
+	||  ((0xD4B1 <= code) && (code <= 0xD4CB))
+	||  ((0xD4CD <= code) && (code <= 0xD4E7))
+	||  ((0xD4E9 <= code) && (code <= 0xD503))
+	||  ((0xD505 <= code) && (code <= 0xD51F))
+	||  ((0xD521 <= code) && (code <= 0xD53B))
+	||  ((0xD53D <= code) && (code <= 0xD557))
+	||  ((0xD559 <= code) && (code <= 0xD573))
+	||  ((0xD575 <= code) && (code <= 0xD58F))
+	||  ((0xD591 <= code) && (code <= 0xD5AB))
+	||  ((0xD5AD <= code) && (code <= 0xD5C7))
+	||  ((0xD5C9 <= code) && (code <= 0xD5E3))
+	||  ((0xD5E5 <= code) && (code <= 0xD5FF))
+	||  ((0xD601 <= code) && (code <= 0xD61B))
+	||  ((0xD61D <= code) && (code <= 0xD637))
+	||  ((0xD639 <= code) && (code <= 0xD653))
+	||  ((0xD655 <= code) && (code <= 0xD66F))
+	||  ((0xD671 <= code) && (code <= 0xD68B))
+	||  ((0xD68D <= code) && (code <= 0xD6A7))
+	||  ((0xD6A9 <= code) && (code <= 0xD6C3))
+	||  ((0xD6C5 <= code) && (code <= 0xD6DF))
+	||  ((0xD6E1 <= code) && (code <= 0xD6FB))
+	||  ((0xD6FD <= code) && (code <= 0xD717))
+	||  ((0xD719 <= code) && (code <= 0xD733))
+	||  ((0xD735 <= code) && (code <= 0xD74F))
+	||  ((0xD751 <= code) && (code <= 0xD76B))
+	||  ((0xD76D <= code) && (code <= 0xD787))
+	||  ((0xD789 <= code) && (code <= 0xD7A3)))
+		return HANGUL_LEAD_VOWEL_TRAIL_TYPE;
+
+	return (int) 0;
+}
+
+
+#endif /* _HANGUL_H_ */

Modified: soc2014/ghostmansd/normalize/main.c
==============================================================================
--- soc2014/ghostmansd/normalize/main.c	Sun Jun 15 20:14:11 2014	(r269599)
+++ soc2014/ghostmansd/normalize/main.c	Sun Jun 15 21:55:22 2014	(r269600)
@@ -32,25 +32,52 @@
 
 #include "normalize.h"
 #include <stdio.h>
+#include <errno.h>
+#include <stdlib.h>
 
 
 int main(int argc, char const **argv)
 {
-	size_t req = 0;
-	size_t size = 100;
+	size_t index = 0;
+	size_t reqsize = 0;
 	wchar_t buffer[100] = {0};
-	wchar_t const *ptr = NULL;
-	wchar_t const nfd[] = {
+	wchar_t const nfc[] = {
 		0x1100, 0x1162, 0x11AC,
-		0x1100, 0x1162, 0x11AC, 0x0000};
+		0x1100, 0x1162, 0x11AC,
+		0x1100, 0x1162, 0x11AC,
+		0x0000};
+	wchar_t const nfd[] = {0xac21, 0xac21, 0xac21, 0x0000};
+	size_t const size = 10;
+
+	/* compose */
+	(void) argc;
+	(void) argv;
+	wmemset(buffer, 0, size);
+	reqsize = __wcsnorm(buffer, size, nfc, __NORM_NFC);
+	if ((reqsize == 0) || (reqsize > size))
+	{
+		perror("NORM_NFC");
+		printf("\tArguments: size=%lu, reqsize=%lu\n", size, reqsize);
+		return EXIT_FAILURE;
+	}
+	printf("NFC: ");
+	for (index = 0; index < reqsize; ++index)
+		printf("\\u%04X", buffer[index]);
+	printf("\n");
 
 	/* decompose */
-	req = __wcsnorm(buffer, size, nfd, __NORM_NFC);
-	printf("size=%lu\n", size);
-	printf("req=%lu\n", req);
-	printf("buffer=");
-	for (ptr = buffer; *ptr; ++ptr)
-		printf("0x%04x,", (unsigned int) *ptr);
+	wmemset(buffer, 0, size);
+	reqsize = __wcsnorm(buffer, size, nfd, __NORM_NFD);
+	if ((reqsize == 0) || (reqsize > size))
+	{
+		perror("NORM_NFD");
+		printf("\tArguments: size=%lu, reqsize=%lu\n", size, reqsize);
+		return EXIT_FAILURE;
+	}
+	printf("NFD: ");
+	for (index = 0; index < reqsize; ++index)
+		printf("\\u%04X", buffer[index]);
 	printf("\n");
-	return 0;
+
+	return EXIT_SUCCESS;
 }

Modified: soc2014/ghostmansd/normalize/strnorm.c
==============================================================================
--- soc2014/ghostmansd/normalize/strnorm.c	Sun Jun 15 20:14:11 2014	(r269599)
+++ soc2014/ghostmansd/normalize/strnorm.c	Sun Jun 15 21:55:22 2014	(r269600)
@@ -32,12 +32,107 @@
 
 #include "strnorm.h"
 
-size_t
-__strnorm(char *buffer, size_t size, char const *str, int form)
+
+static size_t __norm_encode(char *buffer, size_t size, wchar_t const *wstr,
+							locale_t locale)
+{
+	static const mbstate_t initial;
+	size_t length = 0;
+	char *mbs = NULL;
+	wchar_t const *wcs = wstr;
+
+	FIX_LOCALE(locale);
+	length = wcsrtombs_l(NULL, &wcs, 0, &state, locale);
+	if (length == ((size_t)-1))
+		return 0;
+	else if (length > size)
+	{
+		if (size == 0)
+			return length;
+		errno = ERANGE;
+		return 0;
+	}
+	return wcsrtombs_l(mbs, &wstr, length, &state, locale);
+}
+
+
+static wchar_t *__norm_decode(char const *str, locale_t locale)
+{
+	static const mbstate_t initial;
+	size_t length = 0;
+	wchar_t *wcs = NULL;
+	const char *mbs = str;
+	mbstate_t state = initial;
+
+	FIX_LOCALE(locale);
+	length = mbsrtowcs_l(NULL, &mbs, 0, &state, locale);
+	if (length == ((size_t)-1))
+		return NULL;
+	if ((wcs = malloc((length + 1) * sizeof(wchar_t))) == NULL)
+		__collate_err(EX_OSERR, __func__);
+	mbsrtowcs_l(wcs, &str, length, &state, loc);
+	wcs[length] = 0;
+	state = initial;
+	return wcs;
+}
+
+
+size_t __strnorm_l(char *buffer, size_t size, char const *str, int form,
+				   locale_t locale)
 {
 	size_t wsize = 0;
+	size_t reqsize = 0;
+	size_t wreqsize = 0;
 	wchar_t *wstr = NULL;
 	wchar_t *wbuffer = NULL;
 
-	return 0;
+#define __strnorm_failure() \
+do { \
+	free(wbuffer); \
+	free(wstr); \
+	return 0; \
+} while (0)
+
+	/* Check initial arguments. */
+	if ((str == NULL) || ((buffer != NULL) && (size == 0)))
+	{
+		errno = EINVAL;
+		return 0;
+	}
+	switch (form)
+	{
+		case __NORM_NFD:
+		case __NORM_NFC:
+		case __NORM_NFKD:
+		case __NORM_NFKC:
+			break;
+		default:
+			errno = EINVAL;
+			return 0;
+	}
+
+	/* Acquire __wcsnorm() arguments. */
+	FIX_LOCALE(locale);
+	if (locale->__collate_load_error)
+		__strnorm_failure();
+	if ((wstr = __strnorm_mbstowcs(str, locale)) == NULL)
+		__strnorm_failure();
+	if ((wsize = __wcsnorm(NULL, 0, wstr, form) == 0))
+		__strnorm_failure();
+	if ((wbuffer = malloc(wsize * sizeof(wchar_t))) == NULL)
+		__strnorm_failure();
+
+	/* Normalize the wide string. */
+	wreqsize = __wcsnorm(wbuffer, wsize, wstr, form);
+	if ((wreqsize == 0) || (wreqsize > wsize))
+		__strnorm_failure();
+
+	/* Check if byte buffer is large enough. */
+	reqsize = __norm_encode(buffer, size, wbuffer);
+	if (reqsize == ((size_t)-1))
+		__strnorm_failure();
 }
+
+
+size_t __strnorm(char *buffer, size_t size, char const *str, int form)
+{ return __strnorm_l(buffer, size, str, form, __get_current_locale()); }

Modified: soc2014/ghostmansd/normalize/wcsnorm.c
==============================================================================
--- soc2014/ghostmansd/normalize/wcsnorm.c	Sun Jun 15 20:14:11 2014	(r269599)
+++ soc2014/ghostmansd/normalize/wcsnorm.c	Sun Jun 15 21:55:22 2014	(r269600)
@@ -30,150 +30,197 @@
  * SUCH DAMAGE.
  */
 
-#include "normalize.h"
 #include <errno.h>
 #include <stdint.h>
 #include <stdio.h>
+#include "normalize.h"
+#include "hangul.h"
 
 
 #define HANGUL_MIN			0xAC00
 #define HANGUL_MAX			0xD7A4
-
-#define HANGUL_BASE			0xAC00
-#define HANGUL_LEAD_BASE	0x1100
-#define HANGUL_VOWEL_BASE	0x1161
-#define HANGUL_TRAIL_BASE	0x11A7
+#define HANGUL_BASE			HANGUL_MIN
 
 #define HANGUL_LEAD_COUNT	19
 #define HANGUL_VOWEL_COUNT	21
 #define HANGUL_TRAIL_COUNT	28
 
+#define HANGUL_LEAD_MIN		0x1100
+#define HANGUL_VOWEL_MIN	0x1161
+#define HANGUL_TRAIL_MIN	0x11A7
+
+#define HANGUL_LEAD_MAX		((HANGUL_LEAD_MIN + HANGUL_LEAD_COUNT) - 1)
+#define HANGUL_VOWEL_MAX	((HANGUL_VOWEL_MIN + HANGUL_VOWEL_COUNT) - 1)
+#define HANGUL_TRAIL_MAX	((HANGUL_TRAIL_MIN + HANGUL_TRAIL_COUNT) - 1)
+
 #define HANGUL_BASE_COUNT	588
 #define HANGUL_FULL_COUNT	11172
 
 
-size_t
-__wcsnorm(wchar_t *buffer, size_t size, wchar_t const *str, int form)
+static size_t compose(wchar_t *buffer, size_t size, wchar_t const *str)
 {
-	int error = 0;
-	size_t count = 0;
+	int32_t prev = 0;
+	int32_t curr = 0;
+	size_t index = 0;
+	size_t length = 0;
 	size_t reqsize = 0;
-	wchar_t curr = 0xFFFF;
-	wchar_t last = 0xFFFF;
-	int32_t lead = 0xFFFF;
-	int32_t vowel = 0xFFFF;
-	int32_t trail = 0xFFFF;
-	int32_t hangul = 0xFFFF;
-	wchar_t const *iter = str;
 
-	if (!str || (buffer && !size))
-	{
-		errno = EINVAL;
-		return 0;
-	}
-	switch (form)
+	prev = *str;
+	length = wcslen(str);
+	if (buffer != NULL)
+		*buffer = prev;
+	for (index = 1; index < length; ++index)
 	{
-		case __NORM_NFD:
-		case __NORM_NFC:
-		case __NORM_NFKD:
-		case __NORM_NFKC:
-			break;
-		default:
-			errno = EINVAL;
-			return 0;
-	}
+		if ((buffer != NULL) && (reqsize > size))
+			return compose(NULL, 0, str);
+		curr = str[index];
 
-	size -= 1;
-	last = 0x00;
-	do {
-		curr = *iter;
-		if (curr == L'\0')
-			break;
-		if (reqsize > size)
+		/* Text exclusively containing ASCII characters (U+0000..U+007F)
+		* is left unaffected by all of the Normalization Forms.
+		* Text exclusively containing Latin-1 characters (U+0000..U+00FF)
+		* is left unaffected by NFC. This is effectively the same as saying
+		* that all Latin-1 text is already normalized to NFC. */
+		if (prev < 0xFF)
 		{
 			if (buffer != NULL)
-				buffer[++size] = L'\0';
-			return __wcsnorm(NULL, 0, str, form);
+				*buffer++ = prev;
+			prev = curr;
+			++reqsize;
+			continue;
 		}
 
-		/* Text exclusively containing ASCII characters (U+0000..U+007F)
-		 * is left unaffected by all of the Normalization Forms.
-		 * Text exclusively containing Latin-1 characters (U+0000..U+00FF)
-		 * is left unaffected by NFC. This is effectively the same as saying
-		 * that all Latin-1 text is already normalized to NFC. */
-		if ((curr < 0x80) || ((curr <= 0xFF) && (form == __NORM_NFC)))
+		/* Hangul script composition normalization algorithm. */
+		else if (((HANGUL_LEAD_MIN <= prev) && (prev <= HANGUL_LEAD_MAX))
+		&&       ((HANGUL_VOWEL_MIN <= curr) && (curr <= HANGUL_VOWEL_MAX)))
+		{
+			prev = (HANGUL_BASE \
+				+ ((prev - HANGUL_LEAD_MIN) * HANGUL_BASE_COUNT)
+				+ ((curr - HANGUL_VOWEL_MIN) * HANGUL_TRAIL_COUNT));
+		}
+		else if ((hangul_syllable(prev) == HANGUL_LEAD_VOWEL_TYPE)
+		&&      ((HANGUL_TRAIL_MIN <= curr) && (curr <= HANGUL_TRAIL_MAX)))
+			prev += (curr - HANGUL_TRAIL_MIN);
+		else
 		{
 			if (buffer != NULL)
-				*buffer = curr;
-			count = 1;
+				*buffer++ = prev;
+			prev = curr;
+			++reqsize;
 		}
+	}
 
-		/* Hangul script uses a special normalization algorithm. */
-		if (((HANGUL_MIN <= curr) && (curr <= HANGUL_MAX))
-		&&  ((form == __NORM_NFD) || (form == __NORM_NFKD)))
+	if ((reqsize + 2) > size)
+	{
+		if (buffer != NULL)
+			*--buffer = L'\0';
+		return (reqsize + 2);
+	}
+	++reqsize;
+	if ((reqsize + 1) > size)
+		return ++reqsize;
+	if (buffer != NULL)
+	{
+		*buffer++ = prev;
+		*buffer++ = L'\0';
+	}
+	return reqsize;
+}
+
+
+static size_t decompose(wchar_t *buffer, size_t size, wchar_t const *str)
+{
+	size_t count = 0;
+	size_t reqsize = 0;
+	int32_t curr = 0xFFFF;
+	int32_t lead = 0xFFFF;
+	int32_t vowel = 0xFFFF;
+	int32_t trail = 0xFFFF;
+	wchar_t const *iter = str;
+
+	do {
+		curr = *iter;
+		if (curr == L'\0')
+			break;
+		if ((buffer != NULL) && (reqsize > size))
+			return decompose(NULL, 0, str);
+
+		/* Hangul script decomposition normalization algorithm. */
+		if (hangul_syllable(curr) != 0)
 		{
-			hangul = (curr - HANGUL_BASE);
-			lead = (HANGUL_LEAD_BASE + (hangul / HANGUL_BASE_COUNT));
-			vowel = (HANGUL_VOWEL_BASE + \
-				((hangul % HANGUL_BASE_COUNT) / HANGUL_TRAIL_COUNT));
-			trail = (HANGUL_TRAIL_BASE + (hangul % HANGUL_TRAIL_COUNT));
-			count = ((trail != HANGUL_TRAIL_BASE) ? 3 : 2);
+			curr = (curr - HANGUL_BASE);
+			lead = (HANGUL_LEAD_MIN + (curr / HANGUL_BASE_COUNT));
+			vowel = (HANGUL_VOWEL_MIN + \
+				((curr % HANGUL_BASE_COUNT) / HANGUL_TRAIL_COUNT));
+			trail = (HANGUL_TRAIL_MIN + (curr % HANGUL_TRAIL_COUNT));
+			count = ((trail != HANGUL_TRAIL_MIN) ? 3 : 2);
 			if ((reqsize + count) > size)
 				count = (size - reqsize);
 			if (buffer != NULL)
 			{
 				if (count >= 1)
-					buffer[0] = lead;
+					buffer[0] = (wchar_t) lead;
 				if (count >= 2)
-					buffer[1] = vowel;
-				if ((count >= 3) && (trail != HANGUL_TRAIL_BASE))
-					buffer[2] = trail;
+					buffer[1] = (wchar_t) vowel;
+				if ((count >= 3) && (trail != HANGUL_TRAIL_MIN))
+					buffer[2] = (wchar_t) trail;
 			}
-			count = ((curr != HANGUL_TRAIL_BASE) ? 3 : 2);
+			count = ((trail != HANGUL_TRAIL_MIN) ? 3 : 2);
 		}
-		else

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-soc-all mailing list