PERFORCE change 127140 for review
Fredrik Lindberg
fli at FreeBSD.org
Wed Oct 3 13:59:01 PDT 2007
http://perforce.freebsd.org/chv.cgi?CH=127140
Change 127140 by fli at fli_nexus on 2007/10/03 20:58:34
- Fix off-by-one bug.
- Make utf8_tolower() return an int instead of ssize_t
- utf8_decode() returns the number of bytes needed if
the supplied buffer is too small.
- Whitespace fixes.
Affected files ...
.. //depot/projects/soc2007/fli-mdns_sd/mdnsd/utf8.c#3 edit
.. //depot/projects/soc2007/fli-mdns_sd/mdnsd/utf8.h#3 edit
Differences ...
==== //depot/projects/soc2007/fli-mdns_sd/mdnsd/utf8.c#3 (text+ko) ====
@@ -41,11 +41,11 @@
* src - Pointer to start of wide character string
* slen - Logical (not bytes) length of character string.
* dst - Destination where the encoded string should be placed.
- * dlen - Length of ``dst''
+ * dlen - Length of ``dst'' (including terminating \0)
* Return values
* The length of the encoded string is returned on success.
- * On failure a number less than zero is returned, this absoute value of
- * this number can be used as a hint if ``dst'' is too small.
+ * If the destination buffer is too small, the value required (excluding
+ * terminating \0) is returned as a negative value.
*
* This function is partially based on code from libarchive by Tim Kientzle
*/
@@ -62,15 +62,15 @@
wc = *wp;
if (wc <= 0x7f)
len++;
- else if (wc <= 0x7ff)
+ else if (wc <= 0x7ff)
len += 2;
- else if (wc <= 0xffff)
+ else if (wc <= 0xffff)
len += 3;
- else if (wc <= 0x10ffff)
+ else if (wc <= 0x10ffff)
len += 4;
}
- if (len > dlen)
+ if (len >= dlen)
return (-len);
for (wp = src, p = dst; *wp != L'\0'; wp++) {
@@ -104,11 +104,13 @@
* Decodes an UTF-8 byte sequence into a wide character string
* Arguments
* src - Pointer to start of UTF-8 string
- * slen - Length of UTF-8 byte sequence
+ * slen - Length of UTF-8 byte sequence (excluding terminating \0)
* dst - Destination where the decoded string should be placed
- * dlen - Size of ``dst'' (logical length, not byte length)
+ * dlen - Size of ``dst'' (logical length, not byte length), incl. \0
* Return values
- * Returns logical length of decoded string or -1 on failure
+ * Returns logical length of decoded string or a value less than 0 on failure.
+ * If the destination buffer is too small the required number of
+ * characters (excluding terminating \0) is returned as a negative value.
*/
ssize_t
utf8_decode(const char *src, size_t slen, wchar_t *dst, size_t dlen)
@@ -118,8 +120,8 @@
char c;
wchar_t *wp;
- if (dlen < slen)
- return (-1);
+ if (dlen <= slen)
+ return (-slen);
len = 0;
for (p = src, wp = dst; *p != '\0'; wp++) {
@@ -305,18 +307,18 @@
* Arguments
* src - Original string ('\0'-terminated)
* dst - Pointer to space where the new string is stored
- * dlen - Length of destination buffer
+ * dlen - Length of destination buffer (incl terminating \0)
*
* Returns the length of the converted lower case string or a value
* less than 0 if a failure occurs.
*/
-int
+ssize_t
utf8_tolower(const char *src, char *dst, size_t dlen)
{
const char *p, *pe;
char *q, *qe;
uint32_t val, nval;
- size_t slen, i;
+ size_t slen;
int l1, l2;
slen = strlen(src);
@@ -324,7 +326,7 @@
q = dst;
pe = src + slen;
qe = dst + dlen;
- for (i = 0; i < slen; i++) {
+ while (p != pe && q != (qe - 1)) {
l1 = chrdec(p, &val, pe);
nval = chrlcase(val);
l2 = chrenc(q, nval, qe);
@@ -333,6 +335,8 @@
p += l1;
q += l2;
}
+ if (p != pe)
+ return (-1);
*q = '\0';
return (q - dst);
}
==== //depot/projects/soc2007/fli-mdns_sd/mdnsd/utf8.h#3 (text+ko) ====
@@ -41,6 +41,6 @@
ssize_t utf8_encode(const wchar_t *, char *, size_t);
ssize_t utf8_decode(const char *, size_t, wchar_t *, size_t);
int utf8_casecmp(const char *, const char *);
-int utf8_tolower(const char *, char *, size_t);
+ssize_t utf8_tolower(const char *, char *, size_t);
#endif /* _UTF8_H_ */
More information about the p4-projects
mailing list