git: df21a004be23 - main - libc: scalar strrchr() in RISC-V assembly
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Fri, 31 Oct 2025 12:48:27 UTC
The branch main has been updated by fuz:
URL: https://cgit.FreeBSD.org/src/commit/?id=df21a004be237a1dccd03c7b47254625eea62fa9
commit df21a004be237a1dccd03c7b47254625eea62fa9
Author: Strahinja Stanišić <strajabot@FreeBSD.org>
AuthorDate: 2024-10-24 16:18:07 +0000
Commit: Robert Clausecker <fuz@FreeBSD.org>
CommitDate: 2025-10-31 12:47:57 +0000
libc: scalar strrchr() in RISC-V assembly
Implements strrchr in RISC-V assembly, leading to the following
improvements (performance measured on SiFive HF105-001)
os: FreeBSD
arch: riscv
│ strrchr_baseline │ strrchr_scalar │
│ sec/op │ sec/op vs base │
Short 837.2µ ± 1% 574.6µ ± 1% -31.37% (p=0.000 n=20+21)
Mid 639.7µ ± 0% 269.7µ ± 0% -57.84% (p=0.000 n=20+21)
Long 589.1µ ± 0% 176.7µ ± 0% -70.01% (p=0.000 n=20+21)
geomean 680.8µ 301.4µ -55.73%
│ strrchr_baseline │ strrchr_scalar │
│ MiB/s │ MiB/s vs base │
Short 149.3 ± 1% 217.6 ± 1% +45.71% (p=0.000 n=20+21)
Mid 195.4 ± 0% 463.6 ± 0% +137.22% (p=0.000 n=20+21)
Long 212.2 ± 0% 707.4 ± 0% +233.40% (p=0.000 n=20+21)
geomean 183.6 414.7 +125.88%
MFC after: 1 month
MFC to: stable/15
Approved by: mhorne, markj (mentor)
Sponsored by: Google LLC (GSoC 2024)
Differential Revision: https://reviews.freebsd.org/D47275
---
lib/libc/riscv/string/Makefile.inc | 2 +
lib/libc/riscv/string/strrchr.S | 124 +++++++++++++++++++++++++++++++++++++
2 files changed, 126 insertions(+)
diff --git a/lib/libc/riscv/string/Makefile.inc b/lib/libc/riscv/string/Makefile.inc
new file mode 100644
index 000000000000..a9cf8bf52481
--- /dev/null
+++ b/lib/libc/riscv/string/Makefile.inc
@@ -0,0 +1,2 @@
+MDSRCS+= \
+ strrchr.S
diff --git a/lib/libc/riscv/string/strrchr.S b/lib/libc/riscv/string/strrchr.S
new file mode 100644
index 000000000000..51f34ca21fac
--- /dev/null
+++ b/lib/libc/riscv/string/strrchr.S
@@ -0,0 +1,124 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 Strahinja Stanisic <strajabot@FreeBSD.org>
+ */
+
+#include <machine/asm.h>
+
+/*
+ * a0 - const char *s
+ * a1 - int c
+ */
+ENTRY(strrchr)
+ /*
+ * a0 - const char *ptr_align
+ * a1 - temporary
+ * a2 - temporary
+ * a3 - temporary
+ * a4 - temporary
+ * a5 - const char[8] cccccccc
+ * a6 - const uint64_t *save_align
+ * a7 - const uint64_t save_iter
+ * t0 - const uintr64_t REP8_0X01
+ * t1 - const uintr64_t REP8_0X80
+ */
+
+ /*
+ * save_align = 0
+ * save_iter = 0xFFFFFFFFFFFFFF00
+ * REP8_0X01 = 0x0101010101010101
+ * cccccccc = (char)c * REP8_0X01
+ * REP8_0X80 = (REP8_0X80 << 7) << ((str % 8) * 8)
+ * ptr_align = str - str % 8
+ */
+ li t0, 0x01010101
+ li a6, 0
+ slli a2, a0, 3
+ slli t1, t0, 32
+ li a7, 0xFFFFFFFFFFFFFF00
+ or t0, t0, t1
+ andi a1, a1, 0xFF
+ slli t1, t0, 7
+ andi a0, a0, ~0b111
+ mul a5, a1, t0
+ sll t1, t1, a2
+
+.Lloop: /* do { */
+ ld a1, 0(a0) /* a1 -> data = *ptr_align */
+ not a3, a1 /* a3 -> nhz = ~data */
+ xor a2, a1, a5 /* a2 -> iter = data ^ cccccccc */
+ sub a1, a1, t0 /* a1 -> hz = data - REP8_0X01 */
+ not a4, a2 /* a4 -> nhc = ~iter */
+ and a1, a1, a3 /* hz = hz & nhz */
+ sub a3, a2, t0 /* a3 -> hc = iter - REP8_0X01 */
+ and a1, a1, t1 /* hz = hz & REP8_0X80 */
+ and a3, a3, a4 /* hc = hc & nhc */
+ addi a4, a1, -1 /* a4 -> mask_end = hz - 1 */
+ and a3, a3, t1 /* hc = hc & REP8_0X80 */
+ xor a4, a4, a1 /* mask_end = mask_end ^ hz */
+ addi a0, a0, 8 /* ptr_align = ptr_align + 8 */
+ and a3, a3, a4 /* hc = hc & mask_end */
+ slli t1, t0, 7 /* REP8_0X80 = REP8_0X01 << 7 */
+ not a4, a4 /* mask_end = ~mask_end */
+
+ beqz a3, .Lskip_save /* if(!hc) goto skip_save */
+ or a2, a2, a4 /* iter = iter | mask_end */
+ addi a6, a0, -8 /* save_align = ptr_align - 8 */
+ mv a7, a2 /* save_iter = iter */
+
+.Lskip_save:
+ beqz a1, .Lloop /* } while(!hz) */
+
+.Lfind_char:
+ /*
+ * a1 -> iter = save_iter
+ * a2 -> mask_iter = 0xFF00000000000000
+ * a3 -> match_off = 7
+ */
+ li a2, 0xFF
+ mv a1, a7
+ slli a2, a2, 56
+ li a3, 7
+
+ and a0, a1, a2
+ srli a2, a2, 8
+ beqz a0, .Lret
+
+ addi a3, a3, -1
+ and a0, a1, a2
+ srli a2, a2, 8
+ beqz a0, .Lret
+
+ addi a3, a3, -1
+ and a0, a1, a2
+ srli a2, a2, 8
+ beqz a0, .Lret
+
+ addi a3, a3, -1
+ and a0, a1, a2
+ srli a2, a2, 8
+ beqz a0, .Lret
+
+ addi a3, a3, -1
+ and a0, a1, a2
+ srli a2, a2, 8
+ beqz a0, .Lret
+
+ addi a3, a3, -1
+ and a0, a1, a2
+ srli a2, a2, 8
+ beqz a0, .Lret
+
+ addi a3, a3, -1
+ and a0, a1, a2
+ srli a2, a2, 8
+ beqz a0, .Lret
+
+ addi a3, a3, -1
+
+.Lret:
+ /* return save_align + match_offset */
+ add a0, a6, a3
+ ret
+END(strrchr)