svn commit: r248302 - in head: contrib/libc-vis lib/libc/gen

Brooks Davis brooks at FreeBSD.org
Thu Mar 14 23:51:48 UTC 2013


Author: brooks
Date: Thu Mar 14 23:51:47 2013
New Revision: 248302
URL: http://svnweb.freebsd.org/changeset/base/248302

Log:
  Update to the latest (un)vis(3) sources from NetBSD.  This adds
  multibyte support[0] and the new functions strenvisx and strsenvisx.
  
  Add MLINKS for vis(3) functions add by this and the initial import from
  NetBSD[1].
  
  PR:		bin/166364, bin/175418
  Submitted by:	"J.R. Oldroyd" <fbsd at opal.com>[0]
  		stefanf[1]
  Obtained from:	NetBSD
  MFC after:	2 weeks

Modified:
  head/contrib/libc-vis/unvis.3
  head/contrib/libc-vis/unvis.c
  head/contrib/libc-vis/vis.3
  head/contrib/libc-vis/vis.c
  head/contrib/libc-vis/vis.h
  head/lib/libc/gen/Makefile.inc
  head/lib/libc/gen/Symbol.map
Directory Properties:
  head/contrib/libc-vis/   (props changed)

Modified: head/contrib/libc-vis/unvis.3
==============================================================================
--- head/contrib/libc-vis/unvis.3	Thu Mar 14 23:35:52 2013	(r248301)
+++ head/contrib/libc-vis/unvis.3	Thu Mar 14 23:51:47 2013	(r248302)
@@ -1,4 +1,4 @@
-.\"	$NetBSD: unvis.3,v 1.23 2011/03/17 14:06:29 wiz Exp $
+.\"	$NetBSD: unvis.3,v 1.27 2012/12/15 07:34:36 wiz Exp $
 .\"	$FreeBSD$
 .\"
 .\" Copyright (c) 1989, 1991, 1993
@@ -126,15 +126,17 @@ The
 function has several return codes that must be handled properly.
 They are:
 .Bl -tag -width UNVIS_VALIDPUSH
-.It Li \&0 (zero)
+.It Li \&0 No (zero)
 Another character is necessary; nothing has been recognized yet.
 .It Dv UNVIS_VALID
 A valid character has been recognized and is available at the location
-pointed to by cp.
+pointed to by
+.Fa cp .
 .It Dv UNVIS_VALIDPUSH
 A valid character has been recognized and is available at the location
-pointed to by cp; however, the character currently passed in should
-be passed in again.
+pointed to by
+.Fa cp ;
+however, the character currently passed in should be passed in again.
 .It Dv UNVIS_NOCHAR
 A valid sequence was detected, but no character was produced.
 This return code is necessary to indicate a logical break between characters.
@@ -150,7 +152,7 @@ one more time with flag set to
 to extract any remaining character (the character passed in is ignored).
 .Pp
 The
-.Ar flag
+.Fa flag
 argument is also used to specify the encoding style of the source.
 If set to
 .Dv VIS_HTTPSTYLE
@@ -161,7 +163,8 @@ will decode URI strings as specified in 
 If set to
 .Dv VIS_HTTP1866 ,
 .Fn unvis
-will decode URI strings as specified in RFC 1866.
+will decode entity references and numeric character references
+as specified in RFC 1866.
 If set to
 .Dv VIS_MIMESTYLE ,
 .Fn unvis
@@ -169,7 +172,9 @@ will decode MIME Quoted-Printable string
 If set to
 .Dv VIS_NOESCAPE ,
 .Fn unvis
-will not decode \e quoted characters.
+will not decode
+.Ql \e
+quoted characters.
 .Pp
 The following code fragment illustrates a proper use of
 .Fn unvis .
@@ -204,7 +209,7 @@ The functions
 and
 .Fn strnunvisx
 will return \-1 on error and set
-.Va errno 
+.Va errno
 to:
 .Bl -tag -width Er
 .It Bq Er EINVAL
@@ -212,7 +217,7 @@ An invalid escape sequence was detected,
 .El
 .Pp
 In addition the functions
-.Fn strnunvis 
+.Fn strnunvis
 and
 .Fn strnunvisx
 will can also set
@@ -244,4 +249,14 @@ and
 functions appeared in
 .Nx 6.0
 and
-.Fx 10.0 .
+.Fx 9.2 .
+.Sh BUGS
+The names
+.Dv VIS_HTTP1808
+and
+.Dv VIS_HTTP1866
+are wrong.
+Percent-encoding was defined in RFC 1738, the original RFC for URL.
+RFC 1866 defines HTML 2.0, an application of SGML, from which it
+inherits concepts of numeric character references and entity
+references.

Modified: head/contrib/libc-vis/unvis.c
==============================================================================
--- head/contrib/libc-vis/unvis.c	Thu Mar 14 23:35:52 2013	(r248301)
+++ head/contrib/libc-vis/unvis.c	Thu Mar 14 23:51:47 2013	(r248302)
@@ -1,4 +1,4 @@
-/*	$NetBSD: unvis.c,v 1.40 2012/12/14 21:31:01 christos Exp $	*/
+/*	$NetBSD: unvis.c,v 1.41 2012/12/15 04:29:53 matt Exp $	*/
 
 /*-
  * Copyright (c) 1989, 1993
@@ -34,7 +34,7 @@
 #if 0
 static char sccsid[] = "@(#)unvis.c	8.1 (Berkeley) 6/4/93";
 #else
-__RCSID("$NetBSD: unvis.c,v 1.40 2012/12/14 21:31:01 christos Exp $");
+__RCSID("$NetBSD: unvis.c,v 1.41 2012/12/15 04:29:53 matt Exp $");
 #endif
 #endif /* LIBC_SCCS and not lint */
 __FBSDID("$FreeBSD$");
@@ -90,7 +90,7 @@ __weak_alias(strnunvisx,_strnunvisx)
  * RFC 1866
  */
 static const struct nv {
-	const char name[7];
+	char name[7];
 	uint8_t value;
 } nv[] = {
 	{ "AElig",	198 }, /* capital AE diphthong (ligature)  */

Modified: head/contrib/libc-vis/vis.3
==============================================================================
--- head/contrib/libc-vis/vis.3	Thu Mar 14 23:35:52 2013	(r248301)
+++ head/contrib/libc-vis/vis.3	Thu Mar 14 23:51:47 2013	(r248302)
@@ -1,4 +1,4 @@
-.\"	$NetBSD: vis.3,v 1.29 2012/12/14 22:55:59 christos Exp $
+.\"	$NetBSD: vis.3,v 1.39 2013/02/20 20:05:26 christos Exp $
 .\"	$FreeBSD$
 .\"
 .\" Copyright (c) 1989, 1991, 1993
@@ -30,7 +30,7 @@
 .\"
 .\"     @(#)vis.3	8.1 (Berkeley) 6/9/93
 .\"
-.Dd December 14, 2012
+.Dd February 19, 2013
 .Dt VIS 3
 .Os
 .Sh NAME
@@ -40,12 +40,14 @@
 .Nm strnvis ,
 .Nm strvisx ,
 .Nm strnvisx ,
+.Nm strenvisx ,
 .Nm svis ,
 .Nm snvis ,
 .Nm strsvis ,
 .Nm strsnvis ,
-.Nm strsvisx
-.Nm strsnvisx
+.Nm strsvisx ,
+.Nm strsnvisx ,
+.Nm strsenvisx
 .Nd visually encode characters
 .Sh LIBRARY
 .Lb libc
@@ -63,6 +65,8 @@
 .Fn strvisx "char *dst" "const char *src" "size_t len" "int flag"
 .Ft int
 .Fn strnvisx "char *dst" "size_t dlen" "const char *src" "size_t len" "int flag"
+.Ft int
+.Fn strenvisx "char *dst" "size_t dlen" "const char *src" "size_t len" "int flag" "int *cerr_ptr"
 .Ft char *
 .Fn svis "char *dst" "int c" "int flag" "int nextc" "const char *extra"
 .Ft char *
@@ -75,6 +79,8 @@
 .Fn strsvisx "char *dst" "const char *src" "size_t len" "int flag" "const char *extra"
 .Ft int
 .Fn strsnvisx "char *dst" "size_t dlen" "const char *src" "size_t len" "int flag" "const char *extra"
+.Ft int
+.Fn strsenvisx "char *dst" "size_t dlen" "const char *src" "size_t len" "int flag" "const char *extra" "int *cerr_ptr"
 .Sh DESCRIPTION
 The
 .Fn vis
@@ -89,11 +95,11 @@ needs no encoding, it is copied in unalt
 The string is null terminated, and a pointer to the end of the string is
 returned.
 The maximum length of any encoding is four
-characters (not including the trailing
+bytes (not including the trailing
 .Dv NUL ) ;
 thus, when
 encoding a set of characters into a buffer, the size of the buffer should
-be four times the number of characters encoded, plus one for the trailing
+be four times the number of bytes encoded, plus one for the trailing
 .Dv NUL .
 The flag parameter is used for altering the default range of
 characters considered for encoding and for altering the visual
@@ -142,16 +148,17 @@ terminate
 The size of
 .Fa dst
 must be four times the number
-of characters encoded from
+of bytes encoded from
 .Fa src
 (plus one for the
 .Dv NUL ) .
 Both
-forms return the number of characters in dst (not including
-the trailing
+forms return the number of characters in
+.Fa dst
+(not including the trailing
 .Dv NUL ) .
 The
-.Dq n
+.Dq Nm n
 versions of the functions also take an additional argument
 .Fa dlen
 that indicates the length of the
@@ -159,7 +166,7 @@ that indicates the length of the
 buffer.
 If
 .Fa dlen
-is not large enough to fix the converted string then the
+is not large enough to fit the converted string then the
 .Fn strnvis
 and
 .Fn strnvisx
@@ -167,6 +174,14 @@ functions return \-1 and set
 .Va errno
 to
 .Dv ENOSPC .
+The
+.Fn strenvisx
+function takes an additional argument,
+.Fa cerr_ptr ,
+that is used to pass in and out a multibyte conversion error flag.
+This is useful when processing single characters at a time when
+it is possible that the locale may be set to something other
+than the locale of the characters in the input data.
 .Pp
 The functions
 .Fn svis ,
@@ -174,16 +189,18 @@ The functions
 .Fn strsvis ,
 .Fn strsnvis ,
 .Fn strsvisx ,
+.Fn strsnvisx ,
 and
-.Fn strsnvisx
+.Fn strsenvisx
 correspond to
 .Fn vis ,
 .Fn nvis ,
 .Fn strvis ,
 .Fn strnvis ,
 .Fn strvisx ,
+.Fn strnvisx ,
 and
-.Fn strnvisx
+.Fn strenvisx
 but have an additional argument
 .Fa extra ,
 pointing to a
@@ -214,14 +231,13 @@ and
 .Fn strnvisx ) ,
 and the type of representation used.
 By default, all non-graphic characters,
-except space, tab, and newline are encoded.
-(See
-.Xr isgraph 3 . )
+except space, tab, and newline are encoded (see
+.Xr isgraph 3 ) .
 The following flags
 alter this:
 .Bl -tag -width VIS_WHITEX
 .It Dv VIS_GLOB
-Also encode magic characters
+Also encode the magic characters
 .Ql ( * ,
 .Ql \&? ,
 .Ql \&[
@@ -243,11 +259,13 @@ Synonym for
 \&|
 .Dv VIS_NL .
 .It Dv VIS_SAFE
-Only encode "unsafe" characters.
+Only encode
+.Dq unsafe
+characters.
 Unsafe means control characters which may cause common terminals to perform
 unexpected functions.
 Currently this form allows space, tab, newline, backspace, bell, and
-return - in addition to all graphic characters - unencoded.
+return \(em in addition to all graphic characters \(em unencoded.
 .El
 .Pp
 (The above flags have no effect for
@@ -287,8 +305,8 @@ Use an
 to represent meta characters (characters with the 8th
 bit set), and use caret
 .Ql ^
-to represent control characters see
-.Pf ( Xr iscntrl 3 ) .
+to represent control characters (see
+.Xr iscntrl 3 ) .
 The following formats are used:
 .Bl -tag -width xxxxx
 .It Dv \e^C
@@ -335,19 +353,20 @@ Use C-style backslash sequences to repre
 characters.
 The following sequences are used to represent the indicated characters:
 .Bd -unfilled -offset indent
-.Li \ea Tn  - BEL No (007)
-.Li \eb Tn  - BS No (010)
-.Li \ef Tn  - NP No (014)
-.Li \en Tn  - NL No (012)
-.Li \er Tn  - CR No (015)
-.Li \es Tn  - SP No (040)
-.Li \et Tn  - HT No (011)
-.Li \ev Tn  - VT No (013)
-.Li \e0 Tn  - NUL No (000)
+.Li \ea Tn  \(em BEL No (007)
+.Li \eb Tn  \(em BS No (010)
+.Li \ef Tn  \(em NP No (014)
+.Li \en Tn  \(em NL No (012)
+.Li \er Tn  \(em CR No (015)
+.Li \es Tn  \(em SP No (040)
+.Li \et Tn  \(em HT No (011)
+.Li \ev Tn  \(em VT No (013)
+.Li \e0 Tn  \(em NUL No (000)
 .Ed
 .Pp
-When using this format, the nextc parameter is looked at to determine
-if a
+When using this format, the
+.Fa nextc
+parameter is looked at to determine if a
 .Dv NUL
 character can be encoded as
 .Ql \e0
@@ -374,8 +393,8 @@ represents a lower case hexadecimal digi
 .It Dv VIS_MIMESTYLE
 Use MIME Quoted-Printable encoding as described in RFC 2045, only don't
 break lines and don't handle CRLF.
-The form is:
-.Ql %XX
+The form is
+.Ql =XX
 where
 .Em X
 represents an upper case hexadecimal digit.
@@ -392,6 +411,41 @@ meta characters as
 .Ql M-C ) .
 With this flag set, the encoding is
 ambiguous and non-invertible.
+.Sh MULTIBYTE CHARACTER SUPPORT
+These functions support multibyte character input.
+The encoding conversion is influenced by the setting of the
+.Ev LC_CTYPE
+environment variable which defines the set of characters
+that can be copied without encoding.
+.Pp
+When 8-bit data is present in the input,
+.Ev LC_CTYPE
+must be set to the correct locale or to the C locale.
+If the locales of the data and the conversion are mismatched,
+multibyte character recognition may fail and encoding will be performed
+byte-by-byte instead.
+.Pp
+As noted above,
+.Fa dst
+must be four times the number of bytes processed from
+.Fa src .
+But note that each multibyte character can be up to
+.Dv MB_LEN_MAX
+bytes
+.\" (see
+.\" .Xr multibyte 3 )
+so in terms of multibyte characters,
+.Fa dst
+must be four times
+.Dv MB_LEN_MAX
+times the number of characters processed from
+.Fa src .
+.Sh ENVIRONMENT
+.Bl -tag -width ".Ev LC_CTYPE"
+.It Ev LC_CTYPE
+Specify the locale of the input data.
+Set to C if the input data locale is unknown.
+.El
 .Sh ERRORS
 The functions
 .Fn nvis
@@ -407,11 +461,11 @@ and
 .Fn strsnvisx ,
 will return \-1 when the
 .Fa dlen
-destination buffer length size is not enough to perform the conversion while
+destination buffer size is not enough to perform the conversion while
 setting
 .Va errno
 to:
-.Bl -tag -width Er
+.Bl -tag -width ".Bq Er ENOSPC"
 .It Bq Er ENOSPC
 The destination buffer size is not large enough to perform the conversion.
 .El
@@ -419,18 +473,23 @@ The destination buffer size is not large
 .Xr unvis 1 ,
 .Xr vis 1 ,
 .Xr glob 3 ,
+.\" .Xr multibyte 3 ,
 .Xr unvis 3
 .Rs
 .%A T. Berners-Lee
 .%T Uniform Resource Locators (URL)
-.%O RFC1738
+.%O "RFC 1738"
+.Re
+.Rs
+.%T "Multipurpose Internet Mail Extensions (MIME) Part One: Format of Internet Message Bodies"
+.%O "RFC 2045"
 .Re
 .Sh HISTORY
 The
 .Fn vis ,
 .Fn strvis ,
 and
-.Fa strvisx
+.Fn strvisx
 functions first appeared in
 .Bx 4.4 .
 The
@@ -441,7 +500,7 @@ and
 functions appeared in
 .Nx 1.5
 and
-.Fx 10.0 .
+.Fx 9.2 .
 The buffer size limited versions of the functions
 .Po Fn nvis ,
 .Fn strnvis ,
@@ -451,6 +510,9 @@ The buffer size limited versions of the 
 and
 .Fn strsnvisx Pc
 appeared in
-.Nx 6.0
 and
-.Fx 10.0 .
+.Fx 9.2 .
+Myltibyte character support was added in
+.Nx 7.0
+and
+.Fx 9.2 .

Modified: head/contrib/libc-vis/vis.c
==============================================================================
--- head/contrib/libc-vis/vis.c	Thu Mar 14 23:35:52 2013	(r248301)
+++ head/contrib/libc-vis/vis.c	Thu Mar 14 23:51:47 2013	(r248302)
@@ -1,4 +1,4 @@
-/*	$NetBSD: vis.c,v 1.45 2012/12/14 21:38:18 christos Exp $	*/
+/*	$NetBSD: vis.c,v 1.60 2013/02/21 16:21:20 joerg Exp $	*/
 
 /*-
  * Copyright (c) 1989, 1993
@@ -57,19 +57,23 @@
 
 #include <sys/cdefs.h>
 #if defined(LIBC_SCCS) && !defined(lint)
-__RCSID("$NetBSD: vis.c,v 1.45 2012/12/14 21:38:18 christos Exp $");
+__RCSID("$NetBSD: vis.c,v 1.60 2013/02/21 16:21:20 joerg Exp $");
 #endif /* LIBC_SCCS and not lint */
+#ifdef __FBSDID
 __FBSDID("$FreeBSD$");
+#define	_DIAGASSERT(x)	assert(x)
+#endif
 
 #include "namespace.h"
 #include <sys/types.h>
+#include <sys/param.h>
 
 #include <assert.h>
 #include <vis.h>
 #include <errno.h>
 #include <stdlib.h>
-
-#define	_DIAGASSERT(x)	assert(x)
+#include <wchar.h>
+#include <wctype.h>
 
 #ifdef __weak_alias
 __weak_alias(strvisx,_strvisx)
@@ -81,65 +85,66 @@ __weak_alias(strvisx,_strvisx)
 #include <stdio.h>
 #include <string.h>
 
-static char *do_svis(char *, size_t *, int, int, int, const char *);
+/*
+ * The reason for going through the trouble to deal with character encodings
+ * in vis(3), is that we use this to safe encode output of commands. This
+ * safe encoding varies depending on the character set. For example if we
+ * display ps output in French, we don't want to display French characters
+ * as M-foo.
+ */
+
+static wchar_t *do_svis(wchar_t *, wint_t, int, wint_t, const wchar_t *);
 
 #undef BELL
-#define BELL '\a'
+#define BELL L'\a'
+
+#define iswoctal(c)	(((u_char)(c)) >= L'0' && ((u_char)(c)) <= L'7')
+#define iswwhite(c)	(c == L' ' || c == L'\t' || c == L'\n')
+#define iswsafe(c)	(c == L'\b' || c == BELL || c == L'\r')
+#define xtoa(c)		L"0123456789abcdef"[c]
+#define XTOA(c)		L"0123456789ABCDEF"[c]
 
-#define isoctal(c)	(((u_char)(c)) >= '0' && ((u_char)(c)) <= '7')
-#define iswhite(c)	(c == ' ' || c == '\t' || c == '\n')
-#define issafe(c)	(c == '\b' || c == BELL || c == '\r')
-#define xtoa(c)		"0123456789abcdef"[c]
-#define XTOA(c)		"0123456789ABCDEF"[c]
-
-#define MAXEXTRAS	9
-
-#define MAKEEXTRALIST(flag, extra, orig_str)				      \
-do {									      \
-	const char *orig = orig_str;					      \
-	const char *o = orig;						      \
-	char *e;							      \
-	while (*o++)							      \
-		continue;						      \
-	extra = malloc((size_t)((o - orig) + MAXEXTRAS));		      \
-	if (!extra) break;						      \
-	for (o = orig, e = extra; (*e++ = *o++) != '\0';)		      \
-		continue;						      \
-	e--;								      \
-	if (flag & VIS_GLOB) {						      \
-		*e++ = '*';						      \
-		*e++ = '?';						      \
-		*e++ = '[';						      \
-		*e++ = '#';						      \
-	}								      \
-	if (flag & VIS_SP) *e++ = ' ';					      \
-	if (flag & VIS_TAB) *e++ = '\t';				      \
-	if (flag & VIS_NL) *e++ = '\n';					      \
-	if ((flag & VIS_NOSLASH) == 0) *e++ = '\\';			      \
-	*e = '\0';							      \
-} while (/*CONSTCOND*/0)
+#define MAXEXTRAS	10
+
+#if !HAVE_NBTOOL_CONFIG_H
+#ifndef __NetBSD__
+/*
+ * On NetBSD MB_LEN_MAX is currently 32 which does not fit on any integer
+ * integral type and it is probably wrong, since currently the maximum
+ * number of bytes and character needs is 6. Until this is fixed, the
+ * loops below are using sizeof(uint64_t) - 1 instead of MB_LEN_MAX, and
+ * the assertion is commented out.
+ */
+#ifdef __FreeBSD__
+/*
+ * On FreeBSD including <sys/systm.h> for CTASSERT only works in kernel
+ * mode.
+ */
+#ifndef CTASSERT
+#define CTASSERT(x)             _CTASSERT(x, __LINE__)
+#define _CTASSERT(x, y)         __CTASSERT(x, y)
+#define __CTASSERT(x, y)        typedef char __assert ## y[(x) ? 1 : -1]
+#endif
+#endif /* __FreeBSD__ */
+CTASSERT(MB_LEN_MAX <= sizeof(uint64_t));
+#endif /* !__NetBSD__ */
+#endif
 
 /*
  * This is do_hvis, for HTTP style (RFC 1808)
  */
-static char *
-do_hvis(char *dst, size_t *dlen, int c, int flag, int nextc, const char *extra)
+static wchar_t *
+do_hvis(wchar_t *dst, wint_t c, int flags, wint_t nextc, const wchar_t *extra)
 {
-
-	if ((isascii(c) && isalnum(c))
+	if (iswalnum(c)
 	    /* safe */
-	    || c == '$' || c == '-' || c == '_' || c == '.' || c == '+'
+	    || c == L'$' || c == L'-' || c == L'_' || c == L'.' || c == L'+'
 	    /* extra */
-	    || c == '!' || c == '*' || c == '\'' || c == '(' || c == ')'
-	    || c == ',') {
-		dst = do_svis(dst, dlen, c, flag, nextc, extra);
-	} else {
-		if (dlen) {
-			if (*dlen < 3)
-				return NULL;
-			*dlen -= 3;
-		}
-		*dst++ = '%';
+	    || c == L'!' || c == L'*' || c == L'\'' || c == L'(' || c == L')'
+	    || c == L',')
+		dst = do_svis(dst, c, flags, nextc, extra);
+	else {
+		*dst++ = L'%';
 		*dst++ = xtoa(((unsigned int)c >> 4) & 0xf);
 		*dst++ = xtoa((unsigned int)c & 0xf);
 	}
@@ -151,312 +156,448 @@ do_hvis(char *dst, size_t *dlen, int c, 
  * This is do_mvis, for Quoted-Printable MIME (RFC 2045)
  * NB: No handling of long lines or CRLF.
  */
-static char *
-do_mvis(char *dst, size_t *dlen, int c, int flag, int nextc, const char *extra)
+static wchar_t *
+do_mvis(wchar_t *dst, wint_t c, int flags, wint_t nextc, const wchar_t *extra)
 {
-	if ((c != '\n') &&
+	if ((c != L'\n') &&
 	    /* Space at the end of the line */
-	    ((isspace(c) && (nextc == '\r' || nextc == '\n')) ||
+	    ((iswspace(c) && (nextc == L'\r' || nextc == L'\n')) ||
 	    /* Out of range */
-	    (!isspace(c) && (c < 33 || (c > 60 && c < 62) || c > 126)) ||
-	    /* Specific char to be escaped */ 
-	    strchr("#$@[\\]^`{|}~", c) != NULL)) {
-		if (dlen) {
-			if (*dlen < 3)
-				return NULL;
-			*dlen -= 3;
-		}
-		*dst++ = '=';
+	    (!iswspace(c) && (c < 33 || (c > 60 && c < 62) || c > 126)) ||
+	    /* Specific char to be escaped */
+	    wcschr(L"#$@[\\]^`{|}~", c) != NULL)) {
+		*dst++ = L'=';
 		*dst++ = XTOA(((unsigned int)c >> 4) & 0xf);
 		*dst++ = XTOA((unsigned int)c & 0xf);
-	} else {
-		dst = do_svis(dst, dlen, c, flag, nextc, extra);
-	}
+	} else
+		dst = do_svis(dst, c, flags, nextc, extra);
 	return dst;
 }
 
 /*
- * This is do_vis, the central code of vis.
- * dst:	      Pointer to the destination buffer
- * c:	      Character to encode
- * flag:      Flag word
- * nextc:     The character following 'c'
- * extra:     Pointer to the list of extra characters to be
- *	      backslash-protected.
+ * Output single byte of multibyte character.
  */
-static char *
-do_svis(char *dst, size_t *dlen, int c, int flag, int nextc, const char *extra)
+static wchar_t *
+do_mbyte(wchar_t *dst, wint_t c, int flags, wint_t nextc, int iswextra)
 {
-	int isextra;
-	size_t odlen = dlen ? *dlen : 0;
-
-	isextra = strchr(extra, c) != NULL;
-#define HAVE(x) \
-	do { \
-		if (dlen) { \
-			if (*dlen < (x)) \
-				goto out; \
-			*dlen -= (x); \
-		} \
-	} while (/*CONSTCOND*/0)
-	if (!isextra && isascii(c) && (isgraph(c) || iswhite(c) ||
-	    ((flag & VIS_SAFE) && issafe(c)))) {
-		HAVE(1);
-		*dst++ = c;
-		return dst;
-	}
-	if (flag & VIS_CSTYLE) {
-		HAVE(2);
+	if (flags & VIS_CSTYLE) {
 		switch (c) {
-		case '\n':
-			*dst++ = '\\'; *dst++ = 'n';
+		case L'\n':
+			*dst++ = L'\\'; *dst++ = L'n';
 			return dst;
-		case '\r':
-			*dst++ = '\\'; *dst++ = 'r';
+		case L'\r':
+			*dst++ = L'\\'; *dst++ = L'r';
 			return dst;
-		case '\b':
-			*dst++ = '\\'; *dst++ = 'b';
+		case L'\b':
+			*dst++ = L'\\'; *dst++ = L'b';
 			return dst;
 		case BELL:
-			*dst++ = '\\'; *dst++ = 'a';
+			*dst++ = L'\\'; *dst++ = L'a';
 			return dst;
-		case '\v':
-			*dst++ = '\\'; *dst++ = 'v';
+		case L'\v':
+			*dst++ = L'\\'; *dst++ = L'v';
 			return dst;
-		case '\t':
-			*dst++ = '\\'; *dst++ = 't';
+		case L'\t':
+			*dst++ = L'\\'; *dst++ = L't';
 			return dst;
-		case '\f':
-			*dst++ = '\\'; *dst++ = 'f';
+		case L'\f':
+			*dst++ = L'\\'; *dst++ = L'f';
 			return dst;
-		case ' ':
-			*dst++ = '\\'; *dst++ = 's';
+		case L' ':
+			*dst++ = L'\\'; *dst++ = L's';
 			return dst;
-		case '\0':
-			*dst++ = '\\'; *dst++ = '0';
-			if (isoctal(nextc)) {
-				HAVE(2);
-				*dst++ = '0';
-				*dst++ = '0';
+		case L'\0':
+			*dst++ = L'\\'; *dst++ = L'0';
+			if (iswoctal(nextc)) {
+				*dst++ = L'0';
+				*dst++ = L'0';
 			}
 			return dst;
 		default:
-			if (isgraph(c)) {
-				*dst++ = '\\'; *dst++ = c;
+			if (iswgraph(c)) {
+				*dst++ = L'\\';
+				*dst++ = c;
 				return dst;
 			}
-			if (dlen)
-				*dlen = odlen;
 		}
 	}
-	if (isextra || ((c & 0177) == ' ') || (flag & VIS_OCTAL)) {
-		HAVE(4);
-		*dst++ = '\\';
-		*dst++ = (u_char)(((u_int32_t)(u_char)c >> 6) & 03) + '0';
-		*dst++ = (u_char)(((u_int32_t)(u_char)c >> 3) & 07) + '0';
-		*dst++ =			     (c	      & 07) + '0';
+	if (iswextra || ((c & 0177) == L' ') || (flags & VIS_OCTAL)) {
+		*dst++ = L'\\';
+		*dst++ = (u_char)(((u_int32_t)(u_char)c >> 6) & 03) + L'0';
+		*dst++ = (u_char)(((u_int32_t)(u_char)c >> 3) & 07) + L'0';
+		*dst++ =			     (c	      & 07) + L'0';
 	} else {
-		if ((flag & VIS_NOSLASH) == 0) {
-			HAVE(1);
-			*dst++ = '\\';
-		}
+		if ((flags & VIS_NOSLASH) == 0)
+			*dst++ = L'\\';
 
 		if (c & 0200) {
-			HAVE(1);
-			c &= 0177; *dst++ = 'M';
+			c &= 0177;
+			*dst++ = L'M';
 		}
 
-		if (iscntrl(c)) {
-			HAVE(2);
-			*dst++ = '^';
+		if (iswcntrl(c)) {
+			*dst++ = L'^';
 			if (c == 0177)
-				*dst++ = '?';
+				*dst++ = L'?';
 			else
-				*dst++ = c + '@';
+				*dst++ = c + L'@';
 		} else {
-			HAVE(2);
-			*dst++ = '-'; *dst++ = c;
+			*dst++ = L'-';
+			*dst++ = c;
 		}
 	}
+
+	return dst;
+}
+
+/*
+ * This is do_vis, the central code of vis.
+ * dst:	      Pointer to the destination buffer
+ * c:	      Character to encode
+ * flags:     Flags word
+ * nextc:     The character following 'c'
+ * extra:     Pointer to the list of extra characters to be
+ *	      backslash-protected.
+ */
+static wchar_t *
+do_svis(wchar_t *dst, wint_t c, int flags, wint_t nextc, const wchar_t *extra)
+{
+	int iswextra, i, shft;
+	uint64_t bmsk, wmsk;
+
+	iswextra = wcschr(extra, c) != NULL;
+	if (!iswextra && (iswgraph(c) || iswwhite(c) ||
+	    ((flags & VIS_SAFE) && iswsafe(c)))) {
+		*dst++ = c;
+		return dst;
+	}
+
+	/* See comment in istrsenvisx() output loop, below. */
+	wmsk = 0;
+	for (i = sizeof(wmsk) - 1; i >= 0; i--) {
+		shft = i * NBBY;
+		bmsk = (uint64_t)0xffLL << shft;
+		wmsk |= bmsk;
+		if ((c & wmsk) || i == 0)
+			dst = do_mbyte(dst, (wint_t)(
+			    (uint64_t)(c & bmsk) >> shft),
+			    flags, nextc, iswextra);
+	}
+
 	return dst;
-out:
-	*dlen = odlen;
-	return NULL;
 }
 
-typedef char *(*visfun_t)(char *, size_t *, int, int, int, const char *);
+typedef wchar_t *(*visfun_t)(wchar_t *, wint_t, int, wint_t, const wchar_t *);
 
 /*
  * Return the appropriate encoding function depending on the flags given.
  */
 static visfun_t
-getvisfun(int flag)
+getvisfun(int flags)
 {
-	if (flag & VIS_HTTPSTYLE)
+	if (flags & VIS_HTTPSTYLE)
 		return do_hvis;
-	if (flag & VIS_MIMESTYLE)
+	if (flags & VIS_MIMESTYLE)
 		return do_mvis;
 	return do_svis;
 }
 
 /*
- * isnvis - visually encode characters, also encoding the characters
- *	  pointed to by `extra'
+ * Expand list of extra characters to not visually encode.
  */
-static char *
-isnvis(char *dst, size_t *dlen, int c, int flag, int nextc, const char *extra)
+static wchar_t *
+makeextralist(int flags, const char *src)
 {
-	char *nextra = NULL;
-	visfun_t f;
+	wchar_t *dst, *d;
+	size_t len;
 
-	_DIAGASSERT(dst != NULL);
-	_DIAGASSERT(extra != NULL);
-	MAKEEXTRALIST(flag, nextra, extra);
-	if (!nextra) {
-		if (dlen && *dlen == 0) {
-			errno = ENOSPC;
-			return NULL;
-		}
-		*dst = '\0';		/* can't create nextra, return "" */
-		return dst;
-	}
-	f = getvisfun(flag);
-	dst = (*f)(dst, dlen, c, flag, nextc, nextra);
-	free(nextra);
-	if (dst == NULL || (dlen && *dlen == 0)) {
-		errno = ENOSPC;
+	len = strlen(src);
+	if ((dst = calloc(len + MAXEXTRAS, sizeof(*dst))) == NULL)
 		return NULL;
-	}
-	*dst = '\0';
-	return dst;
-}
 
-char *
-svis(char *dst, int c, int flag, int nextc, const char *extra)
-{
-	return isnvis(dst, NULL, c, flag, nextc, extra);
-}
+	if (mbstowcs(dst, src, len) == (size_t)-1) {
+		size_t i;
+		for (i = 0; i < len; i++)
+			dst[i] = (wint_t)(u_char)src[i];
+		d = dst + len;
+	} else
+		d = dst + wcslen(dst);
+
+	if (flags & VIS_GLOB) {
+		*d++ = L'*';
+		*d++ = L'?';
+		*d++ = L'[';
+		*d++ = L'#';
+	}
+
+	if (flags & VIS_SP) *d++ = L' ';
+	if (flags & VIS_TAB) *d++ = L'\t';
+	if (flags & VIS_NL) *d++ = L'\n';
+	if ((flags & VIS_NOSLASH) == 0) *d++ = L'\\';
+	*d = L'\0';
 
-char *
-snvis(char *dst, size_t dlen, int c, int flag, int nextc, const char *extra)
-{
-	return isnvis(dst, &dlen, c, flag, nextc, extra);
+	return dst;
 }
 
-
 /*
- * strsvis, strsvisx - visually encode characters from src into dst
- *
- *	Extra is a pointer to a \0-terminated list of characters to
- *	be encoded, too. These functions are useful e. g. to
- *	encode strings in such a way so that they are not interpreted
- *	by a shell.
- *
- *	Dst must be 4 times the size of src to account for possible
- *	expansion.  The length of dst, not including the trailing NULL,
- *	is returned.
- *
- *	Strsvisx encodes exactly len bytes from src into dst.
- *	This is useful for encoding a block of data.
+ * istrsenvisx()
+ * 	The main internal function.
+ *	All user-visible functions call this one.
  */
 static int
-istrsnvis(char *dst, size_t *dlen, const char *csrc, int flag, const char *extra)
+istrsenvisx(char *mbdst, size_t *dlen, const char *mbsrc, size_t mblength,
+    int flags, const char *mbextra, int *cerr_ptr)
 {
-	int c;
-	char *start;
-	char *nextra = NULL;
-	const unsigned char *src = (const unsigned char *)csrc;
+	wchar_t *dst, *src, *pdst, *psrc, *start, *extra;
+	size_t len, olen;
+	uint64_t bmsk, wmsk;
+	wint_t c;
 	visfun_t f;
+	int clen = 0, cerr = 0, error = -1, i, shft;
+	ssize_t mbslength, maxolen;
 
-	_DIAGASSERT(dst != NULL);
-	_DIAGASSERT(src != NULL);
-	_DIAGASSERT(extra != NULL);
-	MAKEEXTRALIST(flag, nextra, extra);
-	if (!nextra) {
-		*dst = '\0';		/* can't create nextra, return "" */
-		return 0;
+	_DIAGASSERT(mbdst != NULL);
+	_DIAGASSERT(mbsrc != NULL);
+	_DIAGASSERT(mbextra != NULL);
+
+	/*
+	 * Input (mbsrc) is a char string considered to be multibyte
+	 * characters.  The input loop will read this string pulling
+	 * one character, possibly multiple bytes, from mbsrc and
+	 * converting each to wchar_t in src.
+	 *
+	 * The vis conversion will be done using the wide char
+	 * wchar_t string.
+	 *
+	 * This will then be converted back to a multibyte string to
+	 * return to the caller.
+	 */
+
+	/* Allocate space for the wide char strings */
+	psrc = pdst = extra = NULL;
+	if (!mblength)
+		mblength = strlen(mbsrc);
+	if ((psrc = calloc(mblength + 1, sizeof(*psrc))) == NULL)
+		return -1;
+	if ((pdst = calloc((4 * mblength) + 1, sizeof(*pdst))) == NULL)
+		goto out;
+	dst = pdst;
+	src = psrc;
+
+	/* Use caller's multibyte conversion error flag. */
+	if (cerr_ptr)
+		cerr = *cerr_ptr;
+
+	/*
+	 * Input loop.
+	 * Handle up to mblength characters (not bytes).  We do not
+	 * stop at NULs because we may be processing a block of data
+	 * that includes NULs.
+	 */
+	mbslength = (ssize_t)mblength;
+	/*
+	 * When inputing a single character, must also read in the
+	 * next character for nextc, the look-ahead character.
+	 */
+	if (mbslength == 1)

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-all mailing list