svn commit: r250883 - in head: include include/xlocale lib/libc/locale sys/sys tools/regression/lib/libc/locale

Ed Schouten ed at FreeBSD.org
Tue May 21 19:59:41 UTC 2013


Author: ed
Date: Tue May 21 19:59:37 2013
New Revision: 250883
URL: http://svnweb.freebsd.org/changeset/base/250883

Log:
  Add <uchar.h>.
  
  The <uchar.h> header, part of C11, adds a small number of utility
  functions for 16/32-bit "universal" characters, which may or may not be
  UTF-16/32. As our wchar_t is already ISO 10646, simply add light-weight
  wrappers around wcrtomb() and mbrtowc().
  
  While there, also add (non-yet-standard) _l functions, similar to the
  ones we already have for the other locale-dependent functions.
  
  Reviewed by:	theraven

Added:
  head/include/uchar.h   (contents, props changed)
  head/include/xlocale/_uchar.h   (contents, props changed)
  head/lib/libc/locale/c16rtomb.c   (contents, props changed)
  head/lib/libc/locale/c32rtomb.c   (contents, props changed)
  head/lib/libc/locale/mbrtoc16.c   (contents, props changed)
  head/lib/libc/locale/mbrtoc32.c   (contents, props changed)
  head/tools/regression/lib/libc/locale/test-c16rtomb.c   (contents, props changed)
  head/tools/regression/lib/libc/locale/test-mbrtoc16.c   (contents, props changed)
Modified:
  head/include/Makefile
  head/include/stdatomic.h
  head/include/xlocale/Makefile
  head/lib/libc/locale/Makefile.inc
  head/lib/libc/locale/Symbol.map
  head/lib/libc/locale/mbrtowc.3
  head/lib/libc/locale/wcrtomb.3
  head/lib/libc/locale/xlocale_private.h
  head/sys/sys/_types.h
  head/tools/regression/lib/libc/locale/Makefile

Modified: head/include/Makefile
==============================================================================
--- head/include/Makefile	Tue May 21 19:56:03 2013	(r250882)
+++ head/include/Makefile	Tue May 21 19:59:37 2013	(r250883)
@@ -23,7 +23,7 @@ INCS=	a.out.h ar.h assert.h bitstring.h 
 	stdnoreturn.h stdio.h stdlib.h string.h stringlist.h \
 	strings.h sysexits.h tar.h termios.h tgmath.h \
 	time.h timeconv.h timers.h ttyent.h \
-	ulimit.h unistd.h utime.h utmpx.h uuid.h varargs.h \
+	uchar.h ulimit.h unistd.h utime.h utmpx.h uuid.h varargs.h \
 	wchar.h wctype.h wordexp.h xlocale.h
 
 .PATH: ${.CURDIR}/../contrib/libc-vis

Modified: head/include/stdatomic.h
==============================================================================
--- head/include/stdatomic.h	Tue May 21 19:56:03 2013	(r250882)
+++ head/include/stdatomic.h	Tue May 21 19:59:37 2013	(r250883)
@@ -145,10 +145,8 @@ typedef _Atomic(long)			atomic_long;
 typedef _Atomic(unsigned long)		atomic_ulong;
 typedef _Atomic(long long)		atomic_llong;
 typedef _Atomic(unsigned long long)	atomic_ullong;
-#if 0
 typedef _Atomic(__char16_t)		atomic_char16_t;
 typedef _Atomic(__char32_t)		atomic_char32_t;
-#endif
 typedef _Atomic(__wchar_t)		atomic_wchar_t;
 typedef _Atomic(__int_least8_t)		atomic_int_least8_t;
 typedef _Atomic(__uint_least8_t)	atomic_uint_least8_t;

Added: head/include/uchar.h
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/include/uchar.h	Tue May 21 19:59:37 2013	(r250883)
@@ -0,0 +1,60 @@
+/*-
+ * Copyright (c) 2013 Ed Schouten <ed at FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _UCHAR_H_
+#define	_UCHAR_H_
+
+#include <sys/cdefs.h>
+#include <sys/_types.h>
+
+#ifndef _MBSTATE_T_DECLARED
+typedef	__mbstate_t	mbstate_t;
+#define	_MBSTATE_T_DECLARED
+#endif
+
+#ifndef _SIZE_T_DECLARED
+typedef	__size_t	size_t;
+#define	_SIZE_T_DECLARED
+#endif
+
+typedef	__char16_t	char16_t;
+typedef	__char32_t	char32_t;
+
+__BEGIN_DECLS
+size_t	c16rtomb(char * __restrict, char16_t, mbstate_t * __restrict);
+size_t	c32rtomb(char * __restrict, char32_t, mbstate_t * __restrict);
+size_t	mbrtoc16(char16_t * __restrict, const char * __restrict, size_t,
+    mbstate_t * __restrict);
+size_t	mbrtoc32(char32_t * __restrict, const char * __restrict, size_t,
+    mbstate_t * __restrict);
+#if __BSD_VISIBLE || defined(_XLOCALE_H_)
+#include <xlocale/_uchar.h>
+#endif
+__END_DECLS
+
+#endif /* !_UCHAR_H_ */

Modified: head/include/xlocale/Makefile
==============================================================================
--- head/include/xlocale/Makefile	Tue May 21 19:56:03 2013	(r250882)
+++ head/include/xlocale/Makefile	Tue May 21 19:59:37 2013	(r250883)
@@ -2,7 +2,7 @@
 
 NO_OBJ=
 INCS=	_ctype.h _inttypes.h _langinfo.h _locale.h _monetary.h _stdio.h\
-	_stdlib.h _string.h _time.h _wchar.h
+	_stdlib.h _string.h _time.h _uchar.h _wchar.h
 INCSDIR=${INCLUDEDIR}/xlocale
 
 .include <bsd.prog.mk>

Added: head/include/xlocale/_uchar.h
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/include/xlocale/_uchar.h	Tue May 21 19:59:37 2013	(r250883)
@@ -0,0 +1,46 @@
+/*-
+ * Copyright (c) 2013 Ed Schouten <ed at FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _LOCALE_T_DEFINED
+#define _LOCALE_T_DEFINED
+typedef struct	_xlocale *locale_t;
+#endif
+
+#ifndef _XLOCALE_UCHAR_H_
+#define _XLOCALE_UCHAR_H_
+
+size_t	c16rtomb_l(char * __restrict, char16_t, mbstate_t * __restrict,
+    locale_t);
+size_t	c32rtomb_l(char * __restrict, char32_t, mbstate_t * __restrict,
+    locale_t);
+size_t	mbrtoc16_l(char16_t * __restrict, const char * __restrict, size_t,
+    mbstate_t * __restrict, locale_t);
+size_t	mbrtoc32_l(char32_t * __restrict, const char * __restrict, size_t,
+    mbstate_t * __restrict, locale_t);
+
+#endif /* _XLOCALE_UCHAR_H_ */

Modified: head/lib/libc/locale/Makefile.inc
==============================================================================
--- head/lib/libc/locale/Makefile.inc	Tue May 21 19:56:03 2013	(r250882)
+++ head/lib/libc/locale/Makefile.inc	Tue May 21 19:59:37 2013	(r250883)
@@ -4,11 +4,11 @@
 # locale sources
 .PATH: ${.CURDIR}/${LIBC_ARCH}/locale ${.CURDIR}/locale
 
-SRCS+=	ascii.c big5.c btowc.c collate.c collcmp.c euc.c fix_grouping.c \
-	gb18030.c gb2312.c gbk.c ctype.c isctype.c iswctype.c \
+SRCS+=	ascii.c big5.c btowc.c c16rtomb.c c32rtomb.c collate.c collcmp.c euc.c \
+	fix_grouping.c gb18030.c gb2312.c gbk.c ctype.c isctype.c iswctype.c \
 	ldpart.c lmessages.c lmonetary.c lnumeric.c localeconv.c mblen.c \
 	mbrlen.c \
-	mbrtowc.c mbsinit.c mbsnrtowcs.c \
+	mbrtoc16.c mbrtoc32.c mbrtowc.c mbsinit.c mbsnrtowcs.c \
 	mbsrtowcs.c mbtowc.c mbstowcs.c \
 	mskanji.c nextwctype.c nl_langinfo.c nomacros.c none.c rpmatch.c \
 	rune.c \
@@ -72,7 +72,9 @@ MLINKS+=iswalnum_l.3 iswalpha_l.3 iswaln
 	iswalnum_l.3 iswspecial_l.3 iswalnum_l.3 nextwctype_l.3 \
 	iswalnum_l.3 towctrans_l.3 iswalnum_l.3 wctrans_l.3
 MLINKS+=isxdigit.3 ishexnumber.3
+MLINKS+=mbrtowc.3 mbrtoc16.3 mbrtowc.3 mbrtoc32.3
 MLINKS+=mbsrtowcs.3 mbsnrtowcs.3
+MLINKS+=wcrtomb.3 c16rtomb.3 wcrtomb.3 c32rtomb.3
 MLINKS+=wcsrtombs.3 wcsnrtombs.3
 MLINKS+=wcstod.3 wcstof.3 wcstod.3 wcstold.3
 MLINKS+=wcstol.3 wcstoul.3 wcstol.3 wcstoll.3 wcstol.3 wcstoull.3 \

Modified: head/lib/libc/locale/Symbol.map
==============================================================================
--- head/lib/libc/locale/Symbol.map	Tue May 21 19:56:03 2013	(r250882)
+++ head/lib/libc/locale/Symbol.map	Tue May 21 19:59:37 2013	(r250883)
@@ -199,6 +199,14 @@ FBSD_1.3 {
 	__istype_l;
 	__runes_for_locale;
 	_ThreadRuneLocale;
+	c16rtomb;
+	c16rtomb_l;
+	c32rtomb;
+	c32rtomb_l;
+	mbrtoc16;
+	mbrtoc16_l;
+	mbrtoc32;
+	mbrtoc32_l;
 };
 
 FBSDprivate_1.0 {

Added: head/lib/libc/locale/c16rtomb.c
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/lib/libc/locale/c16rtomb.c	Tue May 21 19:59:37 2013	(r250883)
@@ -0,0 +1,81 @@
+/*-
+ * Copyright (c) 2013 Ed Schouten <ed at FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <errno.h>
+#include <uchar.h>
+#include "xlocale_private.h"
+
+typedef struct {
+	char16_t	lead_surrogate;
+	mbstate_t	c32_mbstate;
+} _Char16State;
+
+size_t
+c16rtomb_l(char * __restrict s, char16_t c16, mbstate_t * __restrict ps,
+    locale_t locale)
+{
+	_Char16State *cs;
+	char32_t c32;
+
+	FIX_LOCALE(locale);
+	if (ps == NULL)
+		ps = &locale->c16rtomb;
+	cs = (_Char16State *)ps;
+
+	/* If s is a null pointer, the value of parameter c16 is ignored. */
+	if (s == NULL) {
+		c32 = 0;
+	} else if (cs->lead_surrogate >= 0xd800 &&
+	    cs->lead_surrogate <= 0xdbff) {
+		/* We should see a trail surrogate now. */
+		if (c16 < 0xdc00 || c16 > 0xdfff) {
+			errno = EILSEQ;
+			return ((size_t)-1);
+		}
+		c32 = 0x10000 + ((cs->lead_surrogate & 0x3ff) << 10 |
+		    (c16 & 0x3ff));
+	} else if (c16 >= 0xd800 && c16 <= 0xdbff) {
+		/* Store lead surrogate for next invocation. */
+		cs->lead_surrogate = c16;
+		return (0);
+	} else {
+		/* Regular character. */
+		c32 = c16;
+	}
+	cs->lead_surrogate = 0;
+
+	return (c32rtomb_l(s, c32, &cs->c32_mbstate, locale));
+}
+
+size_t
+c16rtomb(char * __restrict s, char16_t c16, mbstate_t * __restrict ps)
+{
+
+	return (c16rtomb_l(s, c16, ps, __get_locale()));
+}

Added: head/lib/libc/locale/c32rtomb.c
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/lib/libc/locale/c32rtomb.c	Tue May 21 19:59:37 2013	(r250883)
@@ -0,0 +1,59 @@
+/*-
+ * Copyright (c) 2013 Ed Schouten <ed at FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <errno.h>
+#include <uchar.h>
+#include <wchar.h>
+#include "xlocale_private.h"
+
+size_t
+c32rtomb_l(char * __restrict s, char32_t c32, mbstate_t * __restrict ps,
+    locale_t locale)
+{
+
+	/* Unicode Standard 5.0, D90: ill-formed characters. */
+	if ((c32 >= 0xd800 && c32 <= 0xdfff) || c32 > 0x10ffff) {
+		errno = EILSEQ;
+		return ((size_t)-1);
+	}
+
+	FIX_LOCALE(locale);
+	if (ps == NULL)
+		ps = &locale->c32rtomb;
+
+	/* Assume wchar_t uses UTF-32. */
+	return (wcrtomb_l(s, c32, ps, locale));
+}
+
+size_t
+c32rtomb(char * __restrict s, char32_t c32, mbstate_t * __restrict ps)
+{
+
+	return (c32rtomb_l(s, c32, ps, __get_locale()));
+}

Added: head/lib/libc/locale/mbrtoc16.c
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/lib/libc/locale/mbrtoc16.c	Tue May 21 19:59:37 2013	(r250883)
@@ -0,0 +1,89 @@
+/*-
+ * Copyright (c) 2013 Ed Schouten <ed at FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <uchar.h>
+#include "xlocale_private.h"
+
+typedef struct {
+	char16_t	trail_surrogate;
+	mbstate_t	c32_mbstate;
+} _Char16State;
+
+size_t
+mbrtoc16_l(char16_t * __restrict pc16, const char * __restrict s, size_t n,
+    mbstate_t * __restrict ps, locale_t locale)
+{
+	_Char16State *cs;
+	char32_t c32;
+	ssize_t len;
+
+	FIX_LOCALE(locale);
+	if (ps == NULL)
+		ps = &locale->mbrtoc16;
+	cs = (_Char16State *)ps;
+
+	/*
+	 * Call straight into mbrtoc32_l() if we don't need to return a
+	 * character value. According to the spec, if s is a null
+	 * pointer, the value of parameter pc16 is also ignored.
+	 */
+	if (pc16 == NULL || s == NULL) {
+		cs->trail_surrogate = 0;
+		return (mbrtoc32_l(NULL, s, n, &cs->c32_mbstate, locale));
+	}
+
+	/* Return the trail surrogate from the previous invocation. */
+	if (cs->trail_surrogate >= 0xdc00 && cs->trail_surrogate <= 0xdfff) {
+		*pc16 = cs->trail_surrogate;
+		cs->trail_surrogate = 0;
+		return ((size_t)-3);
+	}
+
+	len = mbrtoc32_l(&c32, s, n, &cs->c32_mbstate, locale);
+	if (len >= 0) {
+		if (c32 < 0x10000) {
+			/* Fits in one UTF-16 character. */
+			*pc16 = c32;
+		} else {
+			/* Split up in a surrogate pair. */
+			c32 -= 0x10000;
+			*pc16 = 0xd800 | (c32 >> 10);
+			cs->trail_surrogate = 0xdc00 | (c32 & 0x3ff);
+		}
+	}
+	return (len);
+}
+
+size_t
+mbrtoc16(char16_t * __restrict pc16, const char * __restrict s, size_t n,
+    mbstate_t * __restrict ps)
+{
+
+	return (mbrtoc16_l(pc16, s, n, ps, __get_locale()));
+}

Added: head/lib/libc/locale/mbrtoc32.c
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/lib/libc/locale/mbrtoc32.c	Tue May 21 19:59:37 2013	(r250883)
@@ -0,0 +1,53 @@
+/*-
+ * Copyright (c) 2013 Ed Schouten <ed at FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <uchar.h>
+#include <wchar.h>
+#include "xlocale_private.h"
+
+size_t
+mbrtoc32_l(char32_t * __restrict pc32, const char * __restrict s, size_t n,
+    mbstate_t * __restrict ps, locale_t locale)
+{
+
+	FIX_LOCALE(locale);
+	if (ps == NULL)
+		ps = &locale->mbrtoc32;
+
+	/* Assume wchar_t uses UTF-32. */
+	return (mbrtowc_l(pc32, s, n, ps, locale));
+}
+
+size_t
+mbrtoc32(char32_t * __restrict pc32, const char * __restrict s, size_t n,
+    mbstate_t * __restrict ps)
+{
+
+	return (mbrtoc32_l(pc32, s, n, ps, __get_locale()));
+}

Modified: head/lib/libc/locale/mbrtowc.3
==============================================================================
--- head/lib/libc/locale/mbrtowc.3	Tue May 21 19:56:03 2013	(r250882)
+++ head/lib/libc/locale/mbrtowc.3	Tue May 21 19:59:37 2013	(r250883)
@@ -24,11 +24,13 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd April 8, 2004
+.Dd May 21, 2013
 .Dt MBRTOWC 3
 .Os
 .Sh NAME
-.Nm mbrtowc
+.Nm mbrtowc ,
+.Nm mbrtoc16 ,
+.Nm mbrtoc32
 .Nd "convert a character to a wide-character code (restartable)"
 .Sh LIBRARY
 .Lb libc
@@ -36,35 +38,51 @@
 .In wchar.h
 .Ft size_t
 .Fo mbrtowc
-.Fa "wchar_t * restrict pwc" "const char * restrict s" "size_t n"
+.Fa "wchar_t * restrict pc" "const char * restrict s" "size_t n"
+.Fa "mbstate_t * restrict ps"
+.Fc
+.In uchar.h
+.Ft size_t
+.Fo mbrtoc16
+.Fa "char16_t * restrict pc" "const char * restrict s" "size_t n"
+.Fa "mbstate_t * restrict ps"
+.Fc
+.Ft size_t
+.Fo mbrtoc32
+.Fa "char32_t * restrict pc" "const char * restrict s" "size_t n"
 .Fa "mbstate_t * restrict ps"
 .Fc
 .Sh DESCRIPTION
 The
-.Fn mbrtowc
-function inspects at most
+.Fn mbrtowc ,
+.Fn mbrtoc16
+and
+.Fn mbrtoc32
+functions inspect at most
 .Fa n
 bytes pointed to by
 .Fa s
 to determine the number of bytes needed to complete the next multibyte
 character.
 If a character can be completed, and
-.Fa pwc
+.Fa pc
 is not
 .Dv NULL ,
 the wide character which is represented by
 .Fa s
 is stored in the
-.Vt wchar_t
+.Vt wchar_t ,
+.Vt char16_t
+or
+.Vt char32_t
 it points to.
 .Pp
 If
 .Fa s
 is
 .Dv NULL ,
-.Fn mbrtowc
-behaves as if
-.Fa pwc
+these functions behave as if
+.Fa pc
 was
 .Dv NULL ,
 .Fa s
@@ -81,15 +99,24 @@ argument,
 is used to keep track of the shift state.
 If it is
 .Dv NULL ,
-.Fn mbrtowc
-uses an internal, static
+these functions use an internal, static
 .Vt mbstate_t
 object, which is initialized to the initial conversion state
 at program startup.
+.Pp
+As a single
+.Vt char16_t
+is not large enough to represent certain multibyte characters, the
+.Fn mbrtoc16
+function may need to be invoked multiple times to convert a single
+multibyte character sequence.
 .Sh RETURN VALUES
 The
-.Fn mbrtowc
-functions returns:
+.Fn mbrtowc ,
+.Fn mbrtoc16
+and
+.Fn mbrtoc32
+functions return:
 .Bl -tag -width indent
 .It 0
 The next
@@ -100,10 +127,13 @@ represent the null wide character
 .It >0
 The next
 .Fa n
-or fewer bytes
-represent a valid character,
-.Fn mbrtowc
-returns the number of bytes used to complete the multibyte character.
+or fewer bytes represent a valid character, these functions
+return the number of bytes used to complete the multibyte character.
+.It Po Vt size_t Pc Ns \-1
+An encoding error has occurred.
+The next
+.Fa n
+or fewer bytes do not contribute to a valid multibyte character.
 .It Po Vt size_t Pc Ns \-2
 The next
 .Fa n
@@ -111,16 +141,23 @@ contribute to, but do not complete, a va
 and all
 .Fa n
 bytes have been processed.
-.It Po Vt size_t Pc Ns \-1
-An encoding error has occurred.
-The next
-.Fa n
-or fewer bytes do not contribute to a valid multibyte character.
+.El
+.Pp
+The
+.Fn mbrtoc16
+function also returns:
+.Bl -tag -width indent
+.It Po Vt size_t Pc Ns \-3
+The next character resulting from a previous call has been stored.
+No bytes from the input have been consumed.
 .El
 .Sh ERRORS
 The
-.Fn mbrtowc
-function will fail if:
+.Fn mbrtowc ,
+.Fn mbrtoc16
+and
+.Fn mbrtoc32
+functions will fail if:
 .Bl -tag -width Er
 .It Bq Er EILSEQ
 An invalid multibyte sequence was detected.
@@ -134,6 +171,9 @@ The conversion state is invalid.
 .Xr wcrtomb 3
 .Sh STANDARDS
 The
-.Fn mbrtowc
-function conforms to
-.St -isoC-99 .
+.Fn mbrtowc ,
+.Fn mbrtoc16
+and
+.Fn mbrtoc32
+functions conform to
+.St -isoC-2011 .

Modified: head/lib/libc/locale/wcrtomb.3
==============================================================================
--- head/lib/libc/locale/wcrtomb.3	Tue May 21 19:56:03 2013	(r250882)
+++ head/lib/libc/locale/wcrtomb.3	Tue May 21 19:59:37 2013	(r250883)
@@ -24,24 +24,34 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd April 8, 2004
+.Dd May 21, 2013
 .Dt WCRTOMB 3
 .Os
 .Sh NAME
-.Nm wcrtomb
+.Nm wcrtomb ,
+.Nm c16rtomb ,
+.Nm c32rtomb
 .Nd "convert a wide-character code to a character (restartable)"
 .Sh LIBRARY
 .Lb libc
 .Sh SYNOPSIS
 .In wchar.h
 .Ft size_t
-.Fn wcrtomb "char * restrict s" "wchar_t wc" "mbstate_t * restrict ps"
+.Fn wcrtomb "char * restrict s" "wchar_t c" "mbstate_t * restrict ps"
+.In uchar.h
+.Ft size_t
+.Fn c16rtomb "char * restrict s" "char16_t c" "mbstate_t * restrict ps"
+.Ft size_t
+.Fn c32rtomb "char * restrict s" "char32_t c" "mbstate_t * restrict ps"
 .Sh DESCRIPTION
 The
-.Fn wcrtomb
-function stores a multibyte sequence representing the
+.Fn wcrtomb ,
+.Fn c16rtomb
+and
+.Fn c32rtomb
+functions store a multibyte sequence representing the
 wide character
-.Fa wc ,
+.Fa c ,
 including any necessary shift sequences, to the
 character array
 .Fa s ,
@@ -53,11 +63,10 @@ If
 .Fa s
 is
 .Dv NULL ,
-.Fn wcrtomb
-behaves as if
+these functions behave as if
 .Fa s
 pointed to an internal buffer and
-.Fa wc
+.Fa c
 was a null wide character (L'\e0').
 .Pp
 The
@@ -67,26 +76,32 @@ argument,
 is used to keep track of the shift state.
 If it is
 .Dv NULL ,
-.Fn wcrtomb
-uses an internal, static
+these functions use an internal, static
 .Vt mbstate_t
 object, which is initialized to the initial conversion state
 at program startup.
+.Pp
+As certain multibyte characters may only be represented by a series of
+16-bit characters, the
+.Fn c16rtomb
+may need to invoked multiple times before a multibyte sequence is
+returned.
 .Sh RETURN VALUES
-The
-.Fn wcrtomb
-functions returns the length (in bytes) of the multibyte sequence
+These functions return the length (in bytes) of the multibyte sequence
 needed to represent
-.Fa wc ,
+.Fa c ,
 or
 .Po Vt size_t Pc Ns \-1
 if
-.Fa wc
+.Fa c
 is not a valid wide character code.
 .Sh ERRORS
 The
-.Fn wcrtomb
-function will fail if:
+.Fn wcrtomb ,
+.Fn c16rtomb
+and
+.Fn c32rtomb
+functions will fail if:
 .Bl -tag -width Er
 .It Bq Er EILSEQ
 An invalid wide character code was specified.
@@ -100,6 +115,9 @@ The conversion state is invalid.
 .Xr wctomb 3
 .Sh STANDARDS
 The
-.Fn wcrtomb
-function conforms to
-.St -isoC-99 .
+.Fn wcrtomb ,
+.Fn c16rtomb
+and
+.Fn c32rtomb
+functions conform to
+.St -isoC-2011 .

Modified: head/lib/libc/locale/xlocale_private.h
==============================================================================
--- head/lib/libc/locale/xlocale_private.h	Tue May 21 19:56:03 2013	(r250882)
+++ head/lib/libc/locale/xlocale_private.h	Tue May 21 19:59:37 2013	(r250883)
@@ -109,6 +109,10 @@ struct _xlocale {
 	__mbstate_t mblen;
 	/** Persistent state used by mbrlen() calls. */
 	__mbstate_t mbrlen;
+	/** Persistent state used by mbrtoc16() calls. */
+	__mbstate_t mbrtoc16;
+	/** Persistent state used by mbrtoc32() calls. */
+	__mbstate_t mbrtoc32;
 	/** Persistent state used by mbrtowc() calls. */
 	__mbstate_t mbrtowc;
 	/** Persistent state used by mbsnrtowcs() calls. */
@@ -117,6 +121,10 @@ struct _xlocale {
 	__mbstate_t mbsrtowcs;
 	/** Persistent state used by mbtowc() calls. */
 	__mbstate_t mbtowc;
+	/** Persistent state used by c16rtomb() calls. */
+	__mbstate_t c16rtomb;
+	/** Persistent state used by c32rtomb() calls. */
+	__mbstate_t c32rtomb;
 	/** Persistent state used by wcrtomb() calls. */
 	__mbstate_t wcrtomb;
 	/** Persistent state used by wcsnrtombs() calls. */

Modified: head/sys/sys/_types.h
==============================================================================
--- head/sys/sys/_types.h	Tue May 21 19:56:03 2013	(r250882)
+++ head/sys/sys/_types.h	Tue May 21 19:59:37 2013	(r250883)
@@ -89,6 +89,12 @@ typedef	int		__ct_rune_t;	/* arg type fo
 typedef	__ct_rune_t	__rune_t;	/* rune_t (see above) */
 typedef	__ct_rune_t	__wint_t;	/* wint_t (see above) */
 
+/* Clang already provides these types as built-ins, but only in C++ mode. */
+#if !defined(__clang__) || !defined(__cplusplus)
+typedef	__uint_least16_t __char16_t;
+typedef	__uint_least32_t __char32_t;
+#endif
+
 typedef	__uint32_t	__dev_t;	/* device number */
 
 typedef	__uint32_t	__fixpt_t;	/* fixed point number */

Modified: head/tools/regression/lib/libc/locale/Makefile
==============================================================================
--- head/tools/regression/lib/libc/locale/Makefile	Tue May 21 19:56:03 2013	(r250882)
+++ head/tools/regression/lib/libc/locale/Makefile	Tue May 21 19:59:37 2013	(r250883)
@@ -14,7 +14,9 @@ TESTS=	test-mbrtowc	\
 	test-wcstombs	\
 	test-mblen	\
 	test-iswctype	\
-	test-towctrans
+	test-towctrans \
+	test-c16rtomb \
+	test-mbrtoc16
 
 .PHONY: tests
 tests: ${TESTS}

Added: head/tools/regression/lib/libc/locale/test-c16rtomb.c
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/tools/regression/lib/libc/locale/test-c16rtomb.c	Tue May 21 19:59:37 2013	(r250883)
@@ -0,0 +1,115 @@
+/*-
+ * Copyright (c) 2002 Tim J. Robbins
+ * All rights reserved.
+ *
+ * Copyright (c) 2013 Ed Schouten <ed at FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/*
+ * Test program for c16rtomb() as specified by ISO/IEC 9899:2011.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <assert.h>
+#include <errno.h>
+#include <limits.h>
+#include <locale.h>
+#include <stdio.h>
+#include <string.h>
+#include <uchar.h>
+
+int
+main(int argc, char *argv[])
+{
+	mbstate_t s;
+	char buf[MB_LEN_MAX + 1];
+
+	/*
+	 * C/POSIX locale.
+	 */
+
+	printf("1..1\n");
+
+	/*
+	 * If the buffer argument is NULL, c16 is implicitly 0,
+	 * c16rtomb() resets its internal state.
+	 */
+	assert(c16rtomb(NULL, L'\0', NULL) == 1);
+	assert(c16rtomb(NULL, 0xdc00, NULL) == 1);
+
+	/* Null wide character. */
+	memset(&s, 0, sizeof(s));
+	memset(buf, 0xcc, sizeof(buf));
+	assert(c16rtomb(buf, 0, &s) == 1);
+	assert((unsigned char)buf[0] == 0 && (unsigned char)buf[1] == 0xcc);
+
+	/* Latin letter A, internal state. */
+	assert(c16rtomb(NULL, L'\0', NULL) == 1);
+	assert(c16rtomb(NULL, L'A', NULL) == 1);
+
+	/* Latin letter A. */
+	memset(&s, 0, sizeof(s));
+	memset(buf, 0xcc, sizeof(buf));
+	assert(c16rtomb(buf, L'A', &s) == 1);
+	assert((unsigned char)buf[0] == 'A' && (unsigned char)buf[1] == 0xcc);
+
+	/* Unicode character 'Pile of poo'. */
+	memset(&s, 0, sizeof(s));
+	memset(buf, 0xcc, sizeof(buf));
+	assert(c16rtomb(buf, 0xd83d, &s) == 0);
+	assert(c16rtomb(buf, 0xdca9, &s) == (size_t)-1);
+	assert(errno == EILSEQ);
+
+	/*
+	 * UTF-8.
+	 */
+
+	assert(strcmp(setlocale(LC_CTYPE, "en_US.UTF-8"), "en_US.UTF-8") == 0);
+
+	/* Unicode character 'Pile of poo'. */
+	memset(&s, 0, sizeof(s));
+	memset(buf, 0xcc, sizeof(buf));
+	assert(c16rtomb(buf, 0xd83d, &s) == 0);
+	assert(c16rtomb(buf, 0xdca9, &s) == 4);
+	assert((unsigned char)buf[0] == 0xf0 && (unsigned char)buf[1] == 0x9f &&
+	    (unsigned char)buf[2] == 0x92 && (unsigned char)buf[3] == 0xa9 &&
+	    (unsigned char)buf[4] == 0xcc);
+
+	/* Invalid code; 'Pile of poo' without the trail surrogate. */
+	memset(&s, 0, sizeof(s));
+	memset(buf, 0xcc, sizeof(buf));
+	assert(c16rtomb(buf, 0xd83d, &s) == 0);
+	assert(c16rtomb(buf, L'A', &s) == (size_t)-1);
+	assert(errno == EILSEQ);
+
+	/* Invalid code; 'Pile of poo' without the lead surrogate. */
+	memset(&s, 0, sizeof(s));
+	memset(buf, 0xcc, sizeof(buf));
+	assert(c16rtomb(buf, 0xdca9, &s) == (size_t)-1);
+	assert(errno == EILSEQ);
+
+	printf("ok 1 - c16rtomb()\n");
+}

Added: head/tools/regression/lib/libc/locale/test-mbrtoc16.c
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/tools/regression/lib/libc/locale/test-mbrtoc16.c	Tue May 21 19:59:37 2013	(r250883)
@@ -0,0 +1,150 @@
+/*-
+ * Copyright (c) 2002 Tim J. Robbins
+ * All rights reserved.
+ *
+ * Copyright (c) 2013 Ed Schouten <ed at FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/*
+ * Test program for mbrtoc16() as specified by ISO/IEC 9899:2011.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <assert.h>
+#include <errno.h>
+#include <limits.h>
+#include <locale.h>
+#include <stdio.h>
+#include <string.h>
+#include <uchar.h>
+
+int
+main(int argc, char *argv[])
+{
+	mbstate_t s;
+	size_t len;
+	char16_t c16;
+
+	/*
+	 * C/POSIX locale.
+	 */
+
+	printf("1..1\n");
+
+	/* Null wide character, internal state. */
+	assert(mbrtoc16(&c16, "", 1, NULL) == 0);
+	assert(c16 == 0);
+
+	/* Null wide character. */

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-all mailing list