git: bae932999e0f - stable/14 - libc: Implement N2630.
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Thu, 07 Sep 2023 20:28:45 UTC
The branch stable/14 has been updated by des:
URL: https://cgit.FreeBSD.org/src/commit/?id=bae932999e0fa9e9a08d84ca992332c57e5b42be
commit bae932999e0fa9e9a08d84ca992332c57e5b42be
Author: Dag-Erling Smørgrav <des@FreeBSD.org>
AuthorDate: 2023-08-28 15:32:23 +0000
Commit: Dag-Erling Smørgrav <des@FreeBSD.org>
CommitDate: 2023-09-07 20:28:30 +0000
libc: Implement N2630.
This adds formatted input/output of binary integer numbers to the printf(), scanf(), and strtol() families, including their wide-character counterparts.
Reviewed by: imp, emaste
Differential Revision: https://reviews.freebsd.org/D41511
(cherry picked from commit d9dc1603d6e48cca84cad3ebe859129131b8387c)
libc: Add unit tests for N2630 and possible collateral damage.
Reviewed by: imp, emaste
Differential Revision: https://reviews.freebsd.org/D41512
(cherry picked from commit b9385720f34b536ef2568a642e8b1fad0450056f)
libc: Document support for binary integers.
Reviewed by: debdrup, emaste
Differential Revision: https://reviews.freebsd.org/D41522
(cherry picked from commit 76edfabbecdec686a570b8e009d5ea4112f943e0)
libc: Fix fixed-width case in the new integer parser.
Fixes: d9dc1603d6e4
Differential Revision: https://reviews.freebsd.org/D41622
(cherry picked from commit aca3bd1602577591e5cd237c4bb0bb71b3be0c75)
libc: Add a wide version of snprintf_test.
Reviewed by: imp, emaste
Differential Revision: https://reviews.freebsd.org/D41726
(cherry picked from commit 4ec9ee9912765ac4ca57353999caa92a23283d8e)
libc: Suppress format checks on printf() / scanf() tests.
Reviewed by: jrtc27, markj, emaste
Differential Revision: https://reviews.freebsd.org/D41727
(cherry picked from commit 294bd2827e61a78041f6613f4b82235fcc454157)
Approved by: re (gjb)
---
contrib/netbsd-tests/lib/libc/stdlib/t_strtol.c | 6 +
lib/libc/iconv/_strtol.h | 7 +
lib/libc/iconv/_strtoul.h | 7 +
lib/libc/locale/wcstoimax.c | 7 +
lib/libc/locale/wcstol.c | 7 +
lib/libc/locale/wcstoll.c | 7 +
lib/libc/locale/wcstoul.c | 7 +
lib/libc/locale/wcstoull.c | 7 +
lib/libc/locale/wcstoumax.c | 7 +
lib/libc/stdio/printf.3 | 34 ++-
lib/libc/stdio/printfcommon.h | 14 ++
lib/libc/stdio/scanf.3 | 29 ++-
lib/libc/stdio/vfprintf.c | 13 ++
lib/libc/stdio/vfscanf.c | 267 +++++++++++++-----------
lib/libc/stdio/vfwprintf.c | 13 ++
lib/libc/stdio/vfwscanf.c | 263 +++++++++++++----------
lib/libc/stdlib/strtoimax.c | 7 +
lib/libc/stdlib/strtol.3 | 4 +-
lib/libc/stdlib/strtol.c | 7 +
lib/libc/stdlib/strtoll.c | 12 +-
lib/libc/stdlib/strtoul.3 | 4 +-
lib/libc/stdlib/strtoul.c | 7 +
lib/libc/stdlib/strtoull.c | 7 +
lib/libc/stdlib/strtoumax.c | 7 +
lib/libc/tests/stdio/Makefile | 12 +-
lib/libc/tests/stdio/snprintf_test.c | 139 ++++++++++++
lib/libc/tests/stdio/sscanf_test.c | 266 +++++++++++++++++++++++
lib/libc/tests/stdio/swprintf_test.c | 140 +++++++++++++
lib/libc/tests/stdio/swscanf_test.c | 267 ++++++++++++++++++++++++
29 files changed, 1315 insertions(+), 259 deletions(-)
diff --git a/contrib/netbsd-tests/lib/libc/stdlib/t_strtol.c b/contrib/netbsd-tests/lib/libc/stdlib/t_strtol.c
index 54e190760656..d1027fcc7bb1 100644
--- a/contrib/netbsd-tests/lib/libc/stdlib/t_strtol.c
+++ b/contrib/netbsd-tests/lib/libc/stdlib/t_strtol.c
@@ -94,6 +94,12 @@ ATF_TC_BODY(strtol_base, tc)
{ "01234567", 342391, 0, NULL },
{ "0123456789", 123456789, 10, NULL },
{ "0x75bcd15", 123456789, 0, NULL },
+#ifdef __FreeBSD__
+ { "0x", 0, 0, "x" },
+ { "0b111010110111100110100010101", 123456789, 0, NULL },
+ { "0b0123", 1, 0, "23" },
+ { "0b", 0, 0, "b" },
+#endif
};
long long int lli;
diff --git a/lib/libc/iconv/_strtol.h b/lib/libc/iconv/_strtol.h
index d183edbe8c3a..94a13c56db98 100644
--- a/lib/libc/iconv/_strtol.h
+++ b/lib/libc/iconv/_strtol.h
@@ -91,6 +91,13 @@ _FUNCNAME(const char *nptr, char **endptr, int base)
s += 2;
base = 16;
}
+ if ((base == 0 || base == 2) &&
+ c == '0' && (*s == 'b' || *s == 'B') &&
+ (s[1] >= '0' && s[1] <= '1')) {
+ c = s[1];
+ s += 2;
+ base = 2;
+ }
if (base == 0)
base = (c == '0' ? 8 : 10);
diff --git a/lib/libc/iconv/_strtoul.h b/lib/libc/iconv/_strtoul.h
index eade72e9c2e6..4944e1fb06e0 100644
--- a/lib/libc/iconv/_strtoul.h
+++ b/lib/libc/iconv/_strtoul.h
@@ -87,6 +87,13 @@ _FUNCNAME(const char *nptr, char **endptr, int base)
s += 2;
base = 16;
}
+ if ((base == 0 || base == 2) &&
+ c == '0' && (*s == 'b' || *s == 'B') &&
+ (s[1] >= '0' && s[1] <= '1')) {
+ c = s[1];
+ s += 2;
+ base = 2;
+ }
if (base == 0)
base = (c == '0' ? 8 : 10);
diff --git a/lib/libc/locale/wcstoimax.c b/lib/libc/locale/wcstoimax.c
index 259faa2b011c..5ed949cd0531 100644
--- a/lib/libc/locale/wcstoimax.c
+++ b/lib/libc/locale/wcstoimax.c
@@ -86,6 +86,13 @@ wcstoimax_l(const wchar_t * __restrict nptr, wchar_t ** __restrict endptr,
s += 2;
base = 16;
}
+ if ((base == 0 || base == 2) &&
+ c == L'0' && (*s == L'b' || *s == L'B') &&
+ (s[1] >= L'0' && s[1] <= L'1')) {
+ c = s[1];
+ s += 2;
+ base = 2;
+ }
if (base == 0)
base = c == L'0' ? 8 : 10;
acc = any = 0;
diff --git a/lib/libc/locale/wcstol.c b/lib/libc/locale/wcstol.c
index b0b787384f39..1678b615ca1c 100644
--- a/lib/libc/locale/wcstol.c
+++ b/lib/libc/locale/wcstol.c
@@ -80,6 +80,13 @@ wcstol_l(const wchar_t * __restrict nptr, wchar_t ** __restrict endptr, int
s += 2;
base = 16;
}
+ if ((base == 0 || base == 2) &&
+ c == L'0' && (*s == L'b' || *s == L'B') &&
+ (s[1] >= L'0' && s[1] <= L'1')) {
+ c = s[1];
+ s += 2;
+ base = 2;
+ }
if (base == 0)
base = c == L'0' ? 8 : 10;
acc = any = 0;
diff --git a/lib/libc/locale/wcstoll.c b/lib/libc/locale/wcstoll.c
index ac07d6c6adbf..ef1e6ef58861 100644
--- a/lib/libc/locale/wcstoll.c
+++ b/lib/libc/locale/wcstoll.c
@@ -86,6 +86,13 @@ wcstoll_l(const wchar_t * __restrict nptr, wchar_t ** __restrict endptr,
s += 2;
base = 16;
}
+ if ((base == 0 || base == 2) &&
+ c == L'0' && (*s == L'b' || *s == L'B') &&
+ (s[1] >= L'0' && s[1] <= L'1')) {
+ c = s[1];
+ s += 2;
+ base = 2;
+ }
if (base == 0)
base = c == L'0' ? 8 : 10;
acc = any = 0;
diff --git a/lib/libc/locale/wcstoul.c b/lib/libc/locale/wcstoul.c
index 9f58db799c0e..2c9c8820b1f6 100644
--- a/lib/libc/locale/wcstoul.c
+++ b/lib/libc/locale/wcstoul.c
@@ -80,6 +80,13 @@ wcstoul_l(const wchar_t * __restrict nptr, wchar_t ** __restrict endptr,
s += 2;
base = 16;
}
+ if ((base == 0 || base == 2) &&
+ c == L'0' && (*s == L'b' || *s == L'B') &&
+ (s[1] >= L'0' && s[1] <= L'1')) {
+ c = s[1];
+ s += 2;
+ base = 2;
+ }
if (base == 0)
base = c == L'0' ? 8 : 10;
acc = any = 0;
diff --git a/lib/libc/locale/wcstoull.c b/lib/libc/locale/wcstoull.c
index cbc7253f884d..692eb90eef6b 100644
--- a/lib/libc/locale/wcstoull.c
+++ b/lib/libc/locale/wcstoull.c
@@ -86,6 +86,13 @@ wcstoull_l(const wchar_t * __restrict nptr, wchar_t ** __restrict endptr,
s += 2;
base = 16;
}
+ if ((base == 0 || base == 2) &&
+ c == L'0' && (*s == L'b' || *s == L'B') &&
+ (s[1] >= L'0' && s[1] <= L'1')) {
+ c = s[1];
+ s += 2;
+ base = 2;
+ }
if (base == 0)
base = c == L'0' ? 8 : 10;
acc = any = 0;
diff --git a/lib/libc/locale/wcstoumax.c b/lib/libc/locale/wcstoumax.c
index 4380cccf2424..c4f2ec3aaf41 100644
--- a/lib/libc/locale/wcstoumax.c
+++ b/lib/libc/locale/wcstoumax.c
@@ -86,6 +86,13 @@ wcstoumax_l(const wchar_t * __restrict nptr, wchar_t ** __restrict endptr,
s += 2;
base = 16;
}
+ if ((base == 0 || base == 2) &&
+ c == L'0' && (*s == L'b' || *s == L'B') &&
+ (s[1] >= L'0' && s[1] <= L'1')) {
+ c = s[1];
+ s += 2;
+ base = 2;
+ }
if (base == 0)
base = c == L'0' ? 8 : 10;
acc = any = 0;
diff --git a/lib/libc/stdio/printf.3 b/lib/libc/stdio/printf.3
index 3e5c6ca23511..110851e2a421 100644
--- a/lib/libc/stdio/printf.3
+++ b/lib/libc/stdio/printf.3
@@ -31,7 +31,7 @@
.\"
.\" @(#)printf.3 8.1 (Berkeley) 6/4/93
.\"
-.Dd May 22, 2018
+.Dd August 21, 2023
.Dt PRINTF 3
.Os
.Sh NAME
@@ -212,6 +212,17 @@ and
.Cm u
conversions, this option has no effect.
For
+.Cm b
+and
+.Cm B
+conversions, a non-zero result has the string
+.Ql 0b
+(or
+.Ql 0B
+for
+.Cm B
+conversions) prepended to it.
+For
.Cm o
conversions, the precision of the number is increased to force the first
character of the output string to a zero.
@@ -245,7 +256,7 @@ For all conversions except
.Cm n ,
the converted value is padded on the left with zeros rather than blanks.
If a precision is given with a numeric conversion
-.Cm ( d , i , o , u , i , x ,
+.Cm ( b , B , d , i , o , u , i , x ,
and
.Cm X ) ,
the
@@ -301,7 +312,7 @@ followed by an
optional digit string.
If the digit string is omitted, the precision is taken as zero.
This gives the minimum number of digits to appear for
-.Cm d , i , o , u , x ,
+.Cm b , B , d , i , o , u , x ,
and
.Cm X
conversions, the number of digits to appear after the decimal-point for
@@ -319,12 +330,12 @@ conversions.
.It
An optional length modifier, that specifies the size of the argument.
The following length modifiers are valid for the
-.Cm d , i , n , o , u , x ,
+.Cm b , B , d , i , n , o , u , x ,
or
.Cm X
conversion:
.Bl -column ".Cm q Em (deprecated)" ".Vt signed char" ".Vt unsigned long long" ".Vt long long *"
-.It Sy Modifier Ta Cm d , i Ta Cm o , u , x , X Ta Cm n
+.It Sy Modifier Ta Cm d , i Ta Cm b , B , o , u , x , X Ta Cm n
.It Cm hh Ta Vt "signed char" Ta Vt "unsigned char" Ta Vt "signed char *"
.It Cm h Ta Vt short Ta Vt "unsigned short" Ta Vt "short *"
.It Cm l No (ell) Ta Vt long Ta Vt "unsigned long" Ta Vt "long *"
@@ -339,7 +350,7 @@ Note:
the
.Cm t
modifier, when applied to a
-.Cm o , u , x ,
+.Cm b , B , o , u , x ,
or
.Cm X
conversion, indicates that the argument is of an unsigned type
@@ -403,11 +414,16 @@ If a single format directive mixes positional
and non-positional arguments, the results are undefined.
.Pp
The conversion specifiers and their meanings are:
-.Bl -tag -width ".Cm diouxX"
-.It Cm diouxX
+.Bl -tag -width ".Cm bBdiouxX"
+.It Cm bBdiouxX
The
.Vt int
-(or appropriate variant) argument is converted to signed decimal
+(or appropriate variant) argument is converted to
+unsigned binary
+.Cm ( b
+and
+.Cm B ) ,
+signed decimal
.Cm ( d
and
.Cm i ) ,
diff --git a/lib/libc/stdio/printfcommon.h b/lib/libc/stdio/printfcommon.h
index ac5aed0a5fcd..411b778dc234 100644
--- a/lib/libc/stdio/printfcommon.h
+++ b/lib/libc/stdio/printfcommon.h
@@ -194,6 +194,13 @@ __ultoa(u_long val, CHAR *endp, int base, int octzero, const char *xdigs)
} while (sval != 0);
break;
+ case 2:
+ do {
+ *--cp = to_char(val & 1);
+ val >>= 1;
+ } while (val);
+ break;
+
case 8:
do {
*--cp = to_char(val & 7);
@@ -244,6 +251,13 @@ __ujtoa(uintmax_t val, CHAR *endp, int base, int octzero, const char *xdigs)
} while (sval != 0);
break;
+ case 2:
+ do {
+ *--cp = to_char(val & 1);
+ val >>= 1;
+ } while (val);
+ break;
+
case 8:
do {
*--cp = to_char(val & 7);
diff --git a/lib/libc/stdio/scanf.3 b/lib/libc/stdio/scanf.3
index b1c50e10a795..6cefdb133983 100644
--- a/lib/libc/stdio/scanf.3
+++ b/lib/libc/stdio/scanf.3
@@ -31,7 +31,7 @@
.\"
.\" @(#)scanf.3 8.2 (Berkeley) 12/11/93
.\"
-.Dd April 2, 2022
+.Dd August 21, 2023
.Dt SCANF 3
.Os
.Sh NAME
@@ -141,7 +141,7 @@ The conversion that follows occurs as usual, but no pointer is used;
the result of the conversion is simply discarded.
.It Cm hh
Indicates that the conversion will be one of
-.Cm dioux
+.Cm bdioux
or
.Cm n
and the next pointer is a pointer to a
@@ -150,7 +150,7 @@ and the next pointer is a pointer to a
.Vt int ) .
.It Cm h
Indicates that the conversion will be one of
-.Cm dioux
+.Cm bdioux
or
.Cm n
and the next pointer is a pointer to a
@@ -159,7 +159,7 @@ and the next pointer is a pointer to a
.Vt int ) .
.It Cm l No (ell)
Indicates that the conversion will be one of
-.Cm dioux
+.Cm bdioux
or
.Cm n
and the next pointer is a pointer to a
@@ -185,7 +185,7 @@ and the next pointer is a pointer to an array of
.Vt char ) .
.It Cm ll No (ell ell)
Indicates that the conversion will be one of
-.Cm dioux
+.Cm bdioux
or
.Cm n
and the next pointer is a pointer to a
@@ -201,7 +201,7 @@ and the next pointer is a pointer to
.Vt "long double" .
.It Cm j
Indicates that the conversion will be one of
-.Cm dioux
+.Cm bdioux
or
.Cm n
and the next pointer is a pointer to a
@@ -210,7 +210,7 @@ and the next pointer is a pointer to a
.Vt int ) .
.It Cm t
Indicates that the conversion will be one of
-.Cm dioux
+.Cm bdioux
or
.Cm n
and the next pointer is a pointer to a
@@ -219,7 +219,7 @@ and the next pointer is a pointer to a
.Vt int ) .
.It Cm z
Indicates that the conversion will be one of
-.Cm dioux
+.Cm bdioux
or
.Cm n
and the next pointer is a pointer to a
@@ -229,7 +229,7 @@ and the next pointer is a pointer to a
.It Cm q
(deprecated.)
Indicates that the conversion will be one of
-.Cm dioux
+.Cm bdioux
or
.Cm n
and the next pointer is a pointer to a
@@ -273,6 +273,10 @@ matches a single input
.Ql %
character.
No conversion is done, and assignment does not occur.
+.It Cm b , B
+Matches an optionally signed binary integer;
+the next pointer must be a pointer to
+.Vt "unsigned int" .
.It Cm d
Matches an optionally signed decimal integer;
the next pointer must be a pointer to
@@ -281,7 +285,12 @@ the next pointer must be a pointer to
Matches an optionally signed integer;
the next pointer must be a pointer to
.Vt int .
-The integer is read in base 16 if it begins
+The integer is read
+in base 2 if it begins with
+.Ql 0b
+or
+.Ql 0B ,
+in base 16 if it begins
with
.Ql 0x
or
diff --git a/lib/libc/stdio/vfprintf.c b/lib/libc/stdio/vfprintf.c
index ad655c5d78d4..5e5a9b5e31c1 100644
--- a/lib/libc/stdio/vfprintf.c
+++ b/lib/libc/stdio/vfprintf.c
@@ -613,6 +613,19 @@ reswitch: switch (ch) {
case 'z':
flags |= SIZET;
goto rflag;
+ case 'B':
+ case 'b':
+ if (flags & INTMAX_SIZE)
+ ujval = UJARG();
+ else
+ ulval = UARG();
+ base = 2;
+ /* leading 0b/B only if non-zero */
+ if (flags & ALT &&
+ (flags & INTMAX_SIZE ? ujval != 0 : ulval != 0))
+ ox[1] = ch;
+ goto nosign;
+ break;
case 'C':
flags |= LONGINT;
/*FALLTHROUGH*/
diff --git a/lib/libc/stdio/vfscanf.c b/lib/libc/stdio/vfscanf.c
index cc2e1e428321..9727c9e70c34 100644
--- a/lib/libc/stdio/vfscanf.c
+++ b/lib/libc/stdio/vfscanf.c
@@ -6,6 +6,8 @@
*
* Copyright (c) 2011 The FreeBSD Foundation
*
+ * Copyright (c) 2023 Dag-Erling Smørgrav
+ *
* Portions of this software were developed by David Chisnall
* under sponsorship from the FreeBSD Foundation.
*
@@ -80,16 +82,6 @@ static char sccsid[] = "@(#)vfscanf.c 8.1 (Berkeley) 6/4/93";
#define SHORTSHORT 0x4000 /* hh: char */
#define UNSIGNED 0x8000 /* %[oupxX] conversions */
-/*
- * The following are used in integral conversions only:
- * SIGNOK, NDIGITS, PFXOK, and NZDIGITS
- */
-#define SIGNOK 0x40 /* +/- is (still) legal */
-#define NDIGITS 0x80 /* no digits detected */
-#define PFXOK 0x100 /* 0x prefix is (still) legal */
-#define NZDIGITS 0x200 /* no zero digits detected */
-#define HAVESIGN 0x10000 /* sign detected */
-
/*
* Conversion types.
*/
@@ -307,129 +299,160 @@ convert_wstring(FILE *fp, wchar_t *wcp, int width, locale_t locale)
return (n);
}
+enum parseint_state {
+ begin,
+ havesign,
+ havezero,
+ haveprefix,
+ any,
+};
+
+static __inline int
+parseint_fsm(int c, enum parseint_state *state, int *base)
+{
+ switch (c) {
+ case '+':
+ case '-':
+ if (*state == begin) {
+ *state = havesign;
+ return 1;
+ }
+ break;
+ case '0':
+ if (*state == begin || *state == havesign) {
+ *state = havezero;
+ } else {
+ *state = any;
+ }
+ return 1;
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ if (*state == havezero && *base == 0) {
+ *base = 8;
+ }
+ /* FALL THROUGH */
+ case '8':
+ case '9':
+ if (*state == begin ||
+ *state == havesign) {
+ if (*base == 0) {
+ *base = 10;
+ }
+ }
+ if (*state == begin ||
+ *state == havesign ||
+ *state == havezero ||
+ *state == haveprefix ||
+ *state == any) {
+ if (*base > c - '0') {
+ *state = any;
+ return 1;
+ }
+ }
+ break;
+ case 'b':
+ if (*state == havezero) {
+ if (*base == 0 || *base == 2) {
+ *state = haveprefix;
+ *base = 2;
+ return 1;
+ }
+ }
+ /* FALL THROUGH */
+ case 'a':
+ case 'c':
+ case 'd':
+ case 'e':
+ case 'f':
+ if (*state == begin ||
+ *state == havesign ||
+ *state == havezero ||
+ *state == haveprefix ||
+ *state == any) {
+ if (*base > c - 'a' + 10) {
+ *state = any;
+ return 1;
+ }
+ }
+ break;
+ case 'B':
+ if (*state == havezero) {
+ if (*base == 0 || *base == 2) {
+ *state = haveprefix;
+ *base = 2;
+ return 1;
+ }
+ }
+ /* FALL THROUGH */
+ case 'A':
+ case 'C':
+ case 'D':
+ case 'E':
+ case 'F':
+ if (*state == begin ||
+ *state == havesign ||
+ *state == havezero ||
+ *state == haveprefix ||
+ *state == any) {
+ if (*base > c - 'A' + 10) {
+ *state = any;
+ return 1;
+ }
+ }
+ break;
+ case 'x':
+ case 'X':
+ if (*state == havezero) {
+ if (*base == 0 || *base == 16) {
+ *state = haveprefix;
+ *base = 16;
+ return 1;
+ }
+ }
+ break;
+ }
+ return 0;
+}
+
/*
- * Read an integer, storing it in buf. The only relevant bit in the
- * flags argument is PFXOK.
+ * Read an integer, storing it in buf.
*
* Return 0 on a match failure, and the number of characters read
* otherwise.
*/
static __inline int
-parseint(FILE *fp, char * __restrict buf, int width, int base, int flags)
+parseint(FILE *fp, char * __restrict buf, int width, int base)
{
- /* `basefix' is used to avoid `if' tests */
- static const short basefix[17] =
- { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
+ enum parseint_state state = begin;
char *p;
int c;
- flags |= SIGNOK | NDIGITS | NZDIGITS;
for (p = buf; width; width--) {
- c = *fp->_p;
- /*
- * Switch on the character; `goto ok' if we accept it
- * as a part of number.
- */
- switch (c) {
-
- /*
- * The digit 0 is always legal, but is special. For
- * %i conversions, if no digits (zero or nonzero) have
- * been scanned (only signs), we will have base==0.
- * In that case, we should set it to 8 and enable 0x
- * prefixing. Also, if we have not scanned zero
- * digits before this, do not turn off prefixing
- * (someone else will turn it off if we have scanned
- * any nonzero digits).
- */
- case '0':
- if (base == 0) {
- base = 8;
- flags |= PFXOK;
- }
- if (flags & NZDIGITS)
- flags &= ~(SIGNOK|NZDIGITS|NDIGITS);
- else
- flags &= ~(SIGNOK|PFXOK|NDIGITS);
- goto ok;
-
- /* 1 through 7 always legal */
- case '1': case '2': case '3':
- case '4': case '5': case '6': case '7':
- base = basefix[base];
- flags &= ~(SIGNOK | PFXOK | NDIGITS);
- goto ok;
-
- /* digits 8 and 9 ok iff decimal or hex */
- case '8': case '9':
- base = basefix[base];
- if (base <= 8)
- break; /* not legal here */
- flags &= ~(SIGNOK | PFXOK | NDIGITS);
- goto ok;
-
- /* letters ok iff hex */
- case 'A': case 'B': case 'C':
- case 'D': case 'E': case 'F':
- case 'a': case 'b': case 'c':
- case 'd': case 'e': case 'f':
- /* no need to fix base here */
- if (base <= 10)
- break; /* not legal here */
- flags &= ~(SIGNOK | PFXOK | NDIGITS);
- goto ok;
-
- /* sign ok only as first character */
- case '+': case '-':
- if (flags & SIGNOK) {
- flags &= ~SIGNOK;
- flags |= HAVESIGN;
- goto ok;
- }
+ c = __sgetc(fp);
+ if (c == EOF)
break;
-
- /*
- * x ok iff flag still set & 2nd char (or 3rd char if
- * we have a sign).
- */
- case 'x': case 'X':
- if (flags & PFXOK && p ==
- buf + 1 + !!(flags & HAVESIGN)) {
- base = 16; /* if %i */
- flags &= ~PFXOK;
- goto ok;
- }
+ if (!parseint_fsm(c, &state, &base))
break;
- }
-
- /*
- * If we got here, c is not a legal character for a
- * number. Stop accumulating digits.
- */
- break;
- ok:
- /*
- * c is legal: store it and look at the next.
- */
*p++ = c;
- if (--fp->_r > 0)
- fp->_p++;
- else if (__srefill(fp))
- break; /* EOF */
}
/*
- * If we had only a sign, it is no good; push back the sign.
- * If the number ends in `x', it was [sign] '0' 'x', so push
- * back the x and treat it as [sign] '0'.
+ * If we only had a sign, push it back. If we only had a 0b or 0x
+ * prefix (possibly preceded by a sign), we view it as "0" and
+ * push back the letter. In all other cases, if we stopped
+ * because we read a non-number character, push it back.
*/
- if (flags & NDIGITS) {
- if (p > buf)
- (void) __ungetc(*(u_char *)--p, fp);
- return (0);
- }
- c = ((u_char *)p)[-1];
- if (c == 'x' || c == 'X') {
- --p;
+ if (state == havesign) {
+ p--;
+ (void) __ungetc(*(u_char *)p, fp);
+ } else if (state == haveprefix) {
+ p--;
+ (void) __ungetc(c, fp);
+ } else if (width && c != EOF) {
(void) __ungetc(c, fp);
}
return (p - buf);
@@ -554,6 +577,13 @@ literal:
/*
* Conversions.
*/
+ case 'B':
+ case 'b':
+ c = CT_INT;
+ flags |= UNSIGNED;
+ base = 2;
+ break;
+
case 'd':
c = CT_INT;
base = 10;
@@ -578,7 +608,6 @@ literal:
case 'X':
case 'x':
- flags |= PFXOK; /* enable 0x prefixing */
c = CT_INT;
flags |= UNSIGNED;
base = 16;
@@ -613,7 +642,7 @@ literal:
break;
case 'p': /* pointer format is like hex */
- flags |= POINTER | PFXOK;
+ flags |= POINTER;
c = CT_INT; /* assumes sizeof(uintmax_t) */
flags |= UNSIGNED; /* >= sizeof(uintptr_t) */
base = 16;
@@ -738,7 +767,7 @@ literal:
width = sizeof(buf) - 2;
width++;
#endif
- nr = parseint(fp, buf, width, base, flags);
+ nr = parseint(fp, buf, width, base);
if (nr == 0)
goto match_failure;
if ((flags & SUPPRESS) == 0) {
diff --git a/lib/libc/stdio/vfwprintf.c b/lib/libc/stdio/vfwprintf.c
index fc681e8d0575..259a86467ea7 100644
--- a/lib/libc/stdio/vfwprintf.c
+++ b/lib/libc/stdio/vfwprintf.c
@@ -684,6 +684,19 @@ reswitch: switch (ch) {
case 'z':
flags |= SIZET;
goto rflag;
+ case 'B':
+ case 'b':
+ if (flags & INTMAX_SIZE)
+ ujval = UJARG();
+ else
+ ulval = UARG();
+ base = 2;
+ /* leading 0b/B only if non-zero */
+ if (flags & ALT &&
+ (flags & INTMAX_SIZE ? ujval != 0 : ulval != 0))
+ ox[1] = ch;
+ goto nosign;
+ break;
case 'C':
flags |= LONGINT;
/*FALLTHROUGH*/
diff --git a/lib/libc/stdio/vfwscanf.c b/lib/libc/stdio/vfwscanf.c
index 1a28ff665247..b03c9dba0699 100644
--- a/lib/libc/stdio/vfwscanf.c
+++ b/lib/libc/stdio/vfwscanf.c
@@ -9,6 +9,8 @@
*
* Copyright (c) 2011 The FreeBSD Foundation
*
+ * Copyright (c) 2023 Dag-Erling Smørgrav
+ *
* Portions of this software were developed by David Chisnall
* under sponsorship from the FreeBSD Foundation.
*
@@ -78,16 +80,6 @@ static char sccsid[] = "@(#)vfscanf.c 8.1 (Berkeley) 6/4/93";
#define SHORTSHORT 0x4000 /* hh: char */
#define UNSIGNED 0x8000 /* %[oupxX] conversions */
-/*
- * The following are used in integral conversions only:
- * SIGNOK, NDIGITS, PFXOK, and NZDIGITS
- */
-#define SIGNOK 0x40 /* +/- is (still) legal */
-#define NDIGITS 0x80 /* no digits detected */
-#define PFXOK 0x100 /* 0x prefix is (still) legal */
-#define NZDIGITS 0x200 /* no zero digits detected */
-#define HAVESIGN 0x10000 /* sign detected */
-
/*
* Conversion types.
*/
@@ -289,128 +281,161 @@ convert_wstring(FILE *fp, wchar_t *wcp, int width, locale_t locale)
return (nread);
}
+enum parseint_state {
+ begin,
+ havesign,
+ havezero,
+ haveprefix,
+ any,
+};
+
+static __inline int
+parseint_fsm(wchar_t c, enum parseint_state *state, int *base)
+{
+ switch (c) {
+ case '+':
+ case '-':
+ if (*state == begin) {
+ *state = havesign;
+ return 1;
+ }
+ break;
+ case '0':
+ if (*state == begin || *state == havesign) {
+ *state = havezero;
+ } else {
+ *state = any;
+ }
+ return 1;
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ if (*state == havezero && *base == 0) {
+ *base = 8;
+ }
+ /* FALL THROUGH */
+ case '8':
+ case '9':
+ if (*state == begin ||
+ *state == havesign) {
+ if (*base == 0) {
+ *base = 10;
+ }
+ }
+ if (*state == begin ||
+ *state == havesign ||
+ *state == havezero ||
+ *state == haveprefix ||
+ *state == any) {
+ if (*base > c - '0') {
+ *state = any;
+ return 1;
+ }
+ }
+ break;
+ case 'b':
+ if (*state == havezero) {
+ if (*base == 0 || *base == 2) {
+ *state = haveprefix;
+ *base = 2;
+ return 1;
+ }
+ }
+ /* FALL THROUGH */
+ case 'a':
+ case 'c':
+ case 'd':
+ case 'e':
+ case 'f':
+ if (*state == begin ||
+ *state == havesign ||
+ *state == havezero ||
+ *state == haveprefix ||
+ *state == any) {
+ if (*base > c - 'a' + 10) {
+ *state = any;
+ return 1;
+ }
+ }
+ break;
+ case 'B':
+ if (*state == havezero) {
+ if (*base == 0 || *base == 2) {
+ *state = haveprefix;
*** 1248 LINES SKIPPED ***