git: b91003acffe7 - main - lib/libc/aarch64/string: add strspn optimized implementation
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Fri, 10 Jan 2025 15:03:49 UTC
The branch main has been updated by fuz:
URL: https://cgit.FreeBSD.org/src/commit/?id=b91003acffe7b50dd6506be15116c6b42fc512c6
commit b91003acffe7b50dd6506be15116c6b42fc512c6
Author: Getz Mikalsen <getz@FreeBSD.org>
AuthorDate: 2024-08-26 18:13:54 +0000
Commit: Robert Clausecker <fuz@FreeBSD.org>
CommitDate: 2025-01-10 15:02:39 +0000
lib/libc/aarch64/string: add strspn optimized implementation
This is a port of the Scalar optimized variant of strspn for amd64
to aarch64.
It utilizes a LUT to speed up the function, a SIMD variant is still
under development.
See the DR for benchmark results.
Tested by: fuz (exprun)
Reviewed by: fuz, emaste
Sponsored by: Google LLC (GSoC 2024)
PR: 281175
Differential Revision: https://reviews.freebsd.org/D46396
---
lib/libc/aarch64/string/Makefile.inc | 4 +-
lib/libc/aarch64/string/strspn.S | 111 +++++++++++++++++++++++++++++++++++
2 files changed, 114 insertions(+), 1 deletion(-)
diff --git a/lib/libc/aarch64/string/Makefile.inc b/lib/libc/aarch64/string/Makefile.inc
index ba0947511872..09bfaef963eb 100644
--- a/lib/libc/aarch64/string/Makefile.inc
+++ b/lib/libc/aarch64/string/Makefile.inc
@@ -21,7 +21,9 @@ AARCH64_STRING_FUNCS= \
# SIMD-enhanced routines not derived from Arm's code
MDSRCS+= \
- strcmp.S
+ strcmp.S \
+ strspn.S
+
#
# Add the above functions. Generate an asm file that includes the needed
# Arm Optimized Routines file defining the function name to the libc name.
diff --git a/lib/libc/aarch64/string/strspn.S b/lib/libc/aarch64/string/strspn.S
new file mode 100644
index 000000000000..0ef42c2b737e
--- /dev/null
+++ b/lib/libc/aarch64/string/strspn.S
@@ -0,0 +1,111 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 Getz Mikalsen <getz@FreeBSD.org>
+*/
+
+#include <machine/asm.h>
+
+ .weak strspn
+ .set strspn, __strspn
+ .text
+
+ENTRY(__strspn)
+
+ /* check for special cases */
+ ldrb w4, [x1] // first character in set
+ cbz w4, .Lzero // empty set always returns 0
+
+ mov x15, #1 // preload register with 1 for stores
+
+ // set is only one character
+ ldrb w5, [x1, #1] // second character in the set
+ cbz w5, .Lsingle
+
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
+ sub sp, sp, #256 // allocate 256 bytes on the stack
+
+ /* no special case matches -- prepare lookup table */
+ mov w3, #28
+0: add x9, sp, x3, lsl #3
+ stp xzr, xzr, [x9]
+ stp xzr, xzr, [x9, #16]
+ subs w3, w3, #4
+ b.cs 0b
+
+ strb w15, [sp, x4] // register first character in set
+ add x1, x1, #2
+
+ /* process remaining chars in set */
+ .p2align 4
+
+
+0: ldrb w4, [x1] // next char in set
+ strb w15, [sp, x5] // register previous char
+ cbz w4, 1f // NUL encountered?
+
+ ldrb w5, [x1, #1]
+ add x1, x1, #2
+ strb w15, [sp, x4]
+ cbnz w5, 0b
+
+1: mov x5, x0 // stash a copy of src
+
+ /* find mismatch */
+ .p2align 4
+0: ldrb w8, [x0]
+ ldrb w9, [sp, x8]
+ cbz w9, 2f
+
+ ldrb w8, [x0, #1]
+ ldrb w9, [sp, x8]
+ cbz w9, 3f
+
+ ldrb w8, [x0, #2]
+ ldrb w9, [sp, x8]
+ cbz w9, 4f
+
+ ldrb w8, [x0, #3]
+ add x0, x0, #4
+ ldrb w9, [sp, x8]
+ cbnz w9, 0b
+
+ sub x0, x0, #3
+4: sub x5, x5, #1
+3: add x0, x0, #1
+2: sub x0, x0, x5
+ mov sp, x29
+ ldp x29, x30, [sp], #16
+ ret
+
+.Lzero:
+ mov x0, #0
+ ret
+
+.Lsingle:
+ ldrb w8, [x0, x5]
+ cmp w4, w8
+ b.ne 1f
+
+ add x5, x5, #1
+ ldrb w8, [x0, x5]
+ cmp w4, w8
+ b.ne 1f
+
+ add x5, x5, #1
+ ldrb w8, [x0, x5]
+ cmp w4, w8
+ b.ne 1f
+
+ add x5, x5, #1
+ ldrb w8, [x0, x5]
+ add x5, x5, #1
+ cmp w4, w8
+ b.eq .Lsingle
+
+ sub x5, x5, #1
+1: mov x0, x5
+ ret
+
+END(__strspn)