git: 05c9a0158f68 - main - libc: Add strverscmp(3) and versionsort(3)

From: Konstantin Belousov <kib_at_FreeBSD.org>
Date: Thu, 25 Aug 2022 00:29:27 UTC
The branch main has been updated by kib:

URL: https://cgit.FreeBSD.org/src/commit/?id=05c9a0158f6837bb3a3781e4ed75f66115f6415a

commit 05c9a0158f6837bb3a3781e4ed75f66115f6415a
Author:     Aymeric Wibo <obiwac@gmail.com>
AuthorDate: 2022-08-24 23:20:13 +0000
Commit:     Konstantin Belousov <kib@FreeBSD.org>
CommitDate: 2022-08-25 00:29:03 +0000

    libc: Add strverscmp(3) and versionsort(3)
    
    Add a strverscmp(3) function to libc, a GNU extension I implemented by
    reading its glibc manual page. It orders strings following a much more
    natural ordering (e.g. "ent1 < ent2 < ent10" as opposed to
    "ent1 < ent10 < ent2" with strcmp(3)'s lexicographic ordering).
    
    Also add versionsort(3) for use as scandir(3)'s compar argument.
    
    Update manual page for scandir(3) and add one for strverscmp(3).
    
    Reviewed by:    pstef, gbe, kib
    MFC after:      1 week
    Differential Revision: https://reviews.freebsd.org/D35807
---
 include/dirent.h                        |  1 +
 include/string.h                        |  1 +
 lib/libc/gen/Makefile.inc               |  3 +-
 lib/libc/gen/Symbol.map                 |  1 +
 lib/libc/gen/scandir.3                  | 22 +++++++-
 lib/libc/gen/scandir.c                  |  7 +++
 lib/libc/string/Makefile.inc            |  4 +-
 lib/libc/string/Symbol.map              |  1 +
 lib/libc/string/strverscmp.3            | 56 ++++++++++++++++++++
 lib/libc/string/strverscmp.c            | 91 ++++++++++++++++++++++++++++++++
 lib/libc/tests/string/Makefile          |  5 +-
 lib/libc/tests/string/strverscmp_test.c | 93 +++++++++++++++++++++++++++++++++
 12 files changed, 278 insertions(+), 7 deletions(-)

diff --git a/include/dirent.h b/include/dirent.h
index 047206258471..751a016838c0 100644
--- a/include/dirent.h
+++ b/include/dirent.h
@@ -108,6 +108,7 @@ int	 alphasort(const struct dirent **, const struct dirent **);
 int	 dirfd(DIR *);
 #endif
 #if __BSD_VISIBLE
+int	 versionsort(const struct dirent **, const struct dirent **);
 DIR	*__opendir2(const char *, int);
 int	 fdclosedir(DIR *);
 ssize_t	 getdents(int, char *, size_t);
diff --git a/include/string.h b/include/string.h
index 15d4dc7e9701..dc5756830208 100644
--- a/include/string.h
+++ b/include/string.h
@@ -81,6 +81,7 @@ char	*strcat(char * __restrict, const char * __restrict);
 char	*strchr(const char *, int) __pure;
 #if __BSD_VISIBLE
 char	*strchrnul(const char*, int) __pure;
+int	 strverscmp(const char *, const char *) __pure;
 #endif
 int	 strcmp(const char *, const char *) __pure;
 int	 strcoll(const char *, const char *);
diff --git a/lib/libc/gen/Makefile.inc b/lib/libc/gen/Makefile.inc
index 6d0e542a1f7b..45977b6b9005 100644
--- a/lib/libc/gen/Makefile.inc
+++ b/lib/libc/gen/Makefile.inc
@@ -495,7 +495,8 @@ MLINKS+=rand48.3 _rand48.3 \
 MLINKS+=recv.2 recvmmsg.2
 MLINKS+=scandir.3 alphasort.3 \
 	scandir.3 scandirat.3 \
-	scandir.3 scandir_b.3
+	scandir.3 scandir_b.3 \
+	scandir.3 versionsort.3
 MLINKS+=sem_open.3 sem_close.3 \
 	sem_open.3 sem_unlink.3
 MLINKS+=sem_wait.3 sem_trywait.3
diff --git a/lib/libc/gen/Symbol.map b/lib/libc/gen/Symbol.map
index 74676ffe2270..0a20a41d0e20 100644
--- a/lib/libc/gen/Symbol.map
+++ b/lib/libc/gen/Symbol.map
@@ -443,6 +443,7 @@ FBSD_1.7 {
 	 sched_getaffinity;
 	 sched_setaffinity;
 	 sched_getcpu;
+	 versionsort;
 	 __cpuset_alloc;
 	 __cpuset_free;
 };
diff --git a/lib/libc/gen/scandir.3 b/lib/libc/gen/scandir.3
index b533b33ce7a7..07d8074ae592 100644
--- a/lib/libc/gen/scandir.3
+++ b/lib/libc/gen/scandir.3
@@ -35,7 +35,8 @@
 .Nm scandir ,
 .Nm scandirat ,
 .Nm scandir_b ,
-.Nm alphasort
+.Nm alphasort ,
+.Nm versionsort
 .Nd scan a directory
 .Sh LIBRARY
 .Lb libc
@@ -65,6 +66,8 @@
 .Fc
 .Ft int
 .Fn alphasort "const struct dirent **d1" "const struct dirent **d2"
+.Ft int
+.Fn versionsort "const struct dirent **d1" "const struct dirent **d2"
 .Sh DESCRIPTION
 The
 .Fn scandir
@@ -106,6 +109,13 @@ is a routine which can be used for the
 argument to sort the array alphabetically using
 .Xr strcoll 3 .
 .Pp
+The
+.Fn versionsort
+function is a routine which can be used for the
+.Fa compar
+argument to sort the array naturally using
+.Xr strverscmp 3 .
+.Pp
 The memory allocated for the array can be deallocated with
 .Xr free 3 ,
 by freeing each pointer in the array and then the array itself.
@@ -161,7 +171,12 @@ cannot allocate enough memory to hold all the data structures.
 .Xr malloc 3 ,
 .Xr qsort 3 ,
 .Xr strcoll 3 ,
+.Xr strverscmp 3 ,
 .Xr dir 5
+.Sh STANDARDS
+The
+.Fn versionsort
+function is a GNU extension and conforms to no standard.
 .Sh HISTORY
 The
 .Fn scandir
@@ -171,5 +186,8 @@ functions appeared in
 .Bx 4.2 .
 The
 .Fn scandirat
-function was added in
+and
+.Fn
+versionsort
+functions were added in
 .Fx 14.0 .
diff --git a/lib/libc/gen/scandir.c b/lib/libc/gen/scandir.c
index 3a891b0ad3f2..8a260adcd2f3 100644
--- a/lib/libc/gen/scandir.c
+++ b/lib/libc/gen/scandir.c
@@ -191,6 +191,13 @@ alphasort(const struct dirent **d1, const struct dirent **d2)
 	return (strcoll((*d1)->d_name, (*d2)->d_name));
 }
 
+int
+versionsort(const struct dirent **d1, const struct dirent **d2)
+{
+
+	return (strverscmp((*d1)->d_name, (*d2)->d_name));
+}
+
 static int
 alphasort_thunk(void *thunk, const void *p1, const void *p2)
 {
diff --git a/lib/libc/string/Makefile.inc b/lib/libc/string/Makefile.inc
index 1df3d40e329f..afc113eeb867 100644
--- a/lib/libc/string/Makefile.inc
+++ b/lib/libc/string/Makefile.inc
@@ -16,7 +16,7 @@ MISRCS+=bcmp.c bcopy.c bzero.c explicit_bzero.c \
 	strcspn.c strdup.c strerror.c strlcat.c strlcpy.c strlen.c strmode.c \
 	strncat.c strncmp.c strncpy.c strndup.c strnlen.c strnstr.c \
 	strpbrk.c strrchr.c strsep.c strsignal.c strspn.c strstr.c strtok.c \
-	strxfrm.c swab.c \
+	strverscmp.c strxfrm.c swab.c \
 	timingsafe_bcmp.c \
 	timingsafe_memcmp.c \
 	wcpcpy.c wcpncpy.c wcscasecmp.c wcscat.c \
@@ -46,7 +46,7 @@ MAN+=	bcmp.3 bcopy.3 bstring.3 bzero.3 ffs.3 index.3 memccpy.3 memchr.3 \
 	memcmp.3 memcpy.3 memmem.3 memmove.3 memset.3 strcasecmp.3 strcat.3 \
 	strchr.3 strcmp.3 strcoll.3 strcpy.3 strdup.3 strerror.3 \
 	string.3 strlcpy.3 strlen.3 strmode.3 strpbrk.3 strsep.3 \
-	strspn.3 strstr.3 strtok.3 strxfrm.3 swab.3 \
+	strspn.3 strstr.3 strtok.3 strverscmp.3 strxfrm.3 swab.3 \
 	timingsafe_bcmp.3 \
 	wcscoll.3 wcstok.3 \
 	wcswidth.3 wcsxfrm.3 wmemchr.3
diff --git a/lib/libc/string/Symbol.map b/lib/libc/string/Symbol.map
index ec45d7fd7ddb..4fcd194bafd8 100644
--- a/lib/libc/string/Symbol.map
+++ b/lib/libc/string/Symbol.map
@@ -116,6 +116,7 @@ FBSD_1.6 {
 
 FBSD_1.7 {
 	mempcpy;
+	strverscmp;
 	wmempcpy;
 };
 
diff --git a/lib/libc/string/strverscmp.3 b/lib/libc/string/strverscmp.3
new file mode 100644
index 000000000000..e4413fb96e36
--- /dev/null
+++ b/lib/libc/string/strverscmp.3
@@ -0,0 +1,56 @@
+.\" SPDX-License-Identifier: BSD-2-Clause
+.\" Copyright (c) 2022 Aymeric Wibo <obiwac@gmail.com>
+.Dd July 11, 2022
+.Dt STRVERSCMP 3
+.Os
+.Sh NAME
+.Nm strverscmp
+.Nd compare strings according to natural order
+.Sh LIBRARY
+.Lb libc
+.Sh SYNOPSIS
+.In string.h
+.Ft int
+.Fn strverscmp "const char *s1" "const char *s2"
+.Sh DESCRIPTION
+The
+.Fn strverscmp
+function
+compares the null-terminated strings
+.Fa s1
+and
+.Fa s2
+according to their natural order
+and returns an integer greater than, equal to, or less than 0,
+depending on whether
+.Fa s1
+is greater than, equal to, or less than
+.Fa s2 .
+.Pp
+More specifically, this natural order is found by iterating over both
+strings until a difference is found.
+If the difference is between non-decimal characters,
+.Fn strverscmp
+acts like
+.Xr strcmp 3
+(thus, the ordering would be "a", "b", "train").
+If a decimal digit is found, the whole number is read and compared
+(thus, the ordering would be "9", "10", "420" which is different to lexicographic order,
+what
+.Xr strcmp 3
+would have done).
+Numbers with leading zeroes are interpreted as fractional parts (even without a decimal point),
+and numbers with more leading zeroes are placed before numbers with fewer leading zeroes
+(thus, the ordering would be "000", "00", "01", "010", "09", "0", "1", "9", "10").
+.Sh SEE ALSO
+.Xr strcmp 3 ,
+.Xr versionsort 3
+.Sh STANDARDS
+The
+.Fn strverscmp
+function is a GNU extension and conforms to no standard.
+.Sh HISTORY
+The
+.Fn strverscmp
+function was added in
+.Fx 14.0 .
diff --git a/lib/libc/string/strverscmp.c b/lib/libc/string/strverscmp.c
new file mode 100644
index 000000000000..6051adb35499
--- /dev/null
+++ b/lib/libc/string/strverscmp.c
@@ -0,0 +1,91 @@
+/*-
+* SPDX-License-Identifier: BSD-2-Clause
+* Copyright (c) 2022 Aymeric Wibo <obiwac@gmail.com>
+*/
+
+#include <ctype.h>
+#include <stddef.h>
+
+int
+strverscmp(const char *s1, const char *s2)
+{
+	size_t digit_count_1, digit_count_2;
+	size_t zeros_count_1, zeros_count_2;
+	const unsigned char *num_1, *num_2;
+	const unsigned char *u1 = __DECONST(const unsigned char *, s1);
+	const unsigned char *u2 = __DECONST(const unsigned char *, s2);
+
+	/*
+	 * If pointers are the same, no need to go through to process of
+	 * comparing them.
+	 */
+	if (s1 == s2)
+		return (0);
+
+	while (*u1 != '\0' && *u2 != '\0') {
+		/* If either character is not a digit, act like strcmp(3). */
+
+		if (!isdigit(*u1) || !isdigit(*u2)) {
+			if (*u1 != *u2)
+				return (*u1 - *u2);
+			u1++;
+			u2++;
+			continue;
+		}
+		if (*u1 == '0' || *u2 == '0') {
+			/*
+			 * Treat leading zeros as if they were the fractional
+			 * part of a number, i.e. as if they had a decimal point
+			 * in front. First, count the leading zeros (more zeros
+			 * == smaller number).
+			 */
+			zeros_count_1 = 0;
+			zeros_count_2 = 0;
+			for (; *u1 == '0'; u1++)
+				zeros_count_1++;
+			for (; *u2 == '0'; u2++)
+				zeros_count_2++;
+			if (zeros_count_1 != zeros_count_2)
+				return (zeros_count_2 - zeros_count_1);
+
+			/* Handle the case where 0 < 09. */
+			if (!isdigit(*u1) && isdigit(*u2))
+				return (1);
+			if (!isdigit(*u2) && isdigit(*u1))
+				return (-1);
+		} else {
+			/*
+			 * No leading zeros; we're simply comparing two numbers.
+			 * It is necessary to first count how many digits there
+			 * are before going back to compare each digit, so that
+			 * e.g. 7 is not considered larger than 60.
+			 */
+			num_1 = u1;
+			num_2 = u2;
+
+			/* Count digits (more digits == larger number). */
+			for (; isdigit(*u1); u1++)
+				;
+			for (; isdigit(*u2); u2++)
+				;
+			digit_count_1 = u1 - num_1;
+			digit_count_2 = u2 - num_2;
+			if (digit_count_1 != digit_count_2)
+				return (digit_count_1 - digit_count_2);
+
+			/*
+			 * If there are the same number of digits, go back to
+			 * the start of the number.
+			 */
+			u1 = num_1;
+			u2 = num_2;
+		}
+
+		/* Compare each digit until there are none left. */
+		for (; isdigit(*u1) && isdigit(*u2); u1++, u2++) {
+			if (*u1 != *u2)
+				return (*u1 - *u2);
+		}
+	}
+	return (*u1 - *u2);
+}
diff --git a/lib/libc/tests/string/Makefile b/lib/libc/tests/string/Makefile
index c6a98572564d..eacf7e15c27c 100644
--- a/lib/libc/tests/string/Makefile
+++ b/lib/libc/tests/string/Makefile
@@ -4,10 +4,11 @@ ATF_TESTS_C+=		memcmp_test
 ATF_TESTS_C+=		memset_s_test
 ATF_TESTS_C+=		stpncpy_test
 ATF_TESTS_C+=		strerror2_test
-ATF_TESTS_C+=		wcscasecmp_test
-ATF_TESTS_C+=		wcsnlen_test
+ATF_TESTS_C+=		strverscmp_test
 ATF_TESTS_C+=		strxfrm_test
+ATF_TESTS_C+=		wcscasecmp_test
 ATF_TESTS_C+=		wcscoll_test
+ATF_TESTS_C+=		wcsnlen_test
 
 # TODO: popcount, stresep
 
diff --git a/lib/libc/tests/string/strverscmp_test.c b/lib/libc/tests/string/strverscmp_test.c
new file mode 100644
index 000000000000..fd6a2620cb48
--- /dev/null
+++ b/lib/libc/tests/string/strverscmp_test.c
@@ -0,0 +1,93 @@
+/*-
+* SPDX-License-Identifier: BSD-2-Clause
+* Copyright (c) 2022 Aymeric Wibo <obiwac@gmail.com>
+*/
+
+#include <atf-c.h>
+#include <string.h>
+
+static void
+check_all(size_t len, const char *ordered[len])
+{
+	const char *a, *b;
+
+	for (size_t i = 0; i < len; i++) {
+		for (size_t j = 0; j < len; j++) {
+			a = ordered[i];
+			b = ordered[j];
+
+			if (i == j)
+				ATF_CHECK_MSG(
+				    strverscmp(a, b) == 0,
+				    "strverscmp(\"%s\", \"%s\") == 0",
+				    a, b
+				);
+			else if (i < j)
+				ATF_CHECK_MSG(
+				    strverscmp(a, b) < 0,
+				    "strverscmp(\"%s\", \"%s\") < 0",
+				    a, b
+				);
+			else if (i > j)
+				ATF_CHECK_MSG(
+				    strverscmp(a, b) > 0,
+				    "strverscmp(\"%s\", \"%s\") > 0",
+				    a, b
+				);
+		}
+	}
+}
+
+#define	CHECK_ALL(...) do {                                     \
+	const char *ordered[] = { __VA_ARGS__ };                \
+	check_all(sizeof(ordered) / sizeof(*ordered), ordered); \
+} while (0)
+
+ATF_TC_WITHOUT_HEAD(strcmp_functionality);
+ATF_TC_BODY(strcmp_functionality, tc)
+{
+	CHECK_ALL("", "a", "b");
+}
+
+/* from Linux man page strverscmp(3) */
+
+ATF_TC_WITHOUT_HEAD(vers_ordering);
+ATF_TC_BODY(vers_ordering, tc)
+{
+	CHECK_ALL("000", "00", "01", "010", "09", "0", "1", "9", "10");
+}
+
+ATF_TC_WITHOUT_HEAD(natural_ordering);
+ATF_TC_BODY(natural_ordering, tc)
+{
+	CHECK_ALL("jan1", "jan2", "jan9", "jan10", "jan11", "jan19", "jan20");
+}
+
+/* https://sourceware.org/bugzilla/show_bug.cgi?id=9913 */
+
+ATF_TC_WITHOUT_HEAD(glibc_bug_9913);
+ATF_TC_BODY(glibc_bug_9913, tc)
+{
+	CHECK_ALL(
+	    "B0075022800016.gbp.corp.com",
+	    "B007502280067.gbp.corp.com",
+	    "B007502357019.GBP.CORP.COM"
+	);
+}
+
+ATF_TC_WITHOUT_HEAD(semver_ordering);
+ATF_TC_BODY(semver_ordering, tc)
+{
+	CHECK_ALL("2.6.20", "2.6.21");
+}
+
+ATF_TP_ADD_TCS(tp)
+{
+	ATF_TP_ADD_TC(tp, strcmp_functionality);
+	ATF_TP_ADD_TC(tp, vers_ordering);
+	ATF_TP_ADD_TC(tp, natural_ordering);
+	ATF_TP_ADD_TC(tp, glibc_bug_9913);
+	ATF_TP_ADD_TC(tp, semver_ordering);
+
+	return (atf_no_error());
+}