git: 30acc8427026 - main - libc/amd64: rewrite memrchr() scalar impl. to read the string from the back
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Sat, 09 Aug 2025 20:14:11 UTC
The branch main has been updated by fuz:
URL: https://cgit.FreeBSD.org/src/commit/?id=30acc84270266e41f66cf572f67c3290d923da2f
commit 30acc84270266e41f66cf572f67c3290d923da2f
Author: Robert Clausecker <fuz@FreeBSD.org>
AuthorDate: 2025-07-29 20:12:11 +0000
Commit: Robert Clausecker <fuz@FreeBSD.org>
CommitDate: 2025-08-09 20:13:27 +0000
libc/amd64: rewrite memrchr() scalar impl. to read the string from the back
A very simple implementation as I don't have the patience right now
to write a full SWAR kernel. Should still do the trick if you wish
to opt out of SSE for some reason.
Reported by: Mikael Simonsson <m@mikaelsimonsson.com>
Reviewed by: strajabot
PR: 288321
MFC after: 1 month
---
lib/libc/amd64/string/memrchr.S | 72 +++++++++++++++++++----------------------
1 file changed, 34 insertions(+), 38 deletions(-)
diff --git a/lib/libc/amd64/string/memrchr.S b/lib/libc/amd64/string/memrchr.S
index f1ba48d6bb41..80fb306af2a3 100644
--- a/lib/libc/amd64/string/memrchr.S
+++ b/lib/libc/amd64/string/memrchr.S
@@ -16,58 +16,54 @@ ARCHFUNCS(memrchr)
ENDARCHFUNCS(memrchr)
ARCHENTRY(memrchr, scalar)
- xor %eax, %eax # prospective return value
- sub $4, %rdx # 4 bytes left to process?
- jb 1f
+ lea -1(%rdi, %rdx, 1), %rax # point to last char in buffer
+ sub $4, %rdx # 4 bytes left to process?
+ jb .Ltail
ALIGN_TEXT
-0: xor %r8, %r8
- lea 2(%rdi), %r10
- cmp %sil, 2(%rdi)
- cmovne %r8, %r10 # point to null if no match
+0: cmp %sil, (%rax) # match at last entry?
+ je 1f
- cmp %sil, (%rdi)
- cmove %rdi, %r8 # point to first char if match
+ cmp %sil, -1(%rax) # match at second to last entry?
+ je 2f
- lea 1(%rdi), %r9
- cmp %sil, 1(%rdi)
- cmovne %r8, %r9 # point to first result if no match in second
+ cmp %sil, -2(%rax) # match at third to last entry?
+ je 3f
- lea 3(%rdi), %r11
- cmp %sil, 3(%rdi)
- cmovne %r10, %r11
+ cmp %sil, -3(%rax) # match at fourth to last entry?
+ je 4f
- test %r11, %r11
- cmovz %r9, %r11 # take first pair match if none in second
+ sub $4, %rax
+ sub $4, %rdx
+ jae 0b
- test %r11, %r11
- cmovnz %r11, %rax # take match in current set if any
+.Ltail: cmp $-3, %edx # at least one character left to process?
+ jb .Lnotfound
- add $4, %rdi
- sub $4, %rdx
- jae 0b
+ cmp %sil, (%rax)
+ je 1f
-1: cmp $-3, %edx # a least one character left to process?
- jb 2f
+ cmp $-2, %edx # at least two characters left to process?
+ jb .Lnotfound
- cmp %sil, (%rdi)
- cmove %rdi, %rax
+ cmp %sil, -1(%rax)
+ je 2f
- lea 1(%rdi), %rcx
- cmp $-2, %edx # at least two characters left to process?
- jb 2f
+ cmp $-1, %edx # at least three characters left to process?
+ jb .Lnotfound
- cmp %sil, 1(%rdi)
- cmove %rcx, %rax
+ cmp %sil, -2(%rax)
+ je 3f
- lea 2(%rdi), %rcx
- cmp $-1, %edx # at least three character left to process?
- jb 2f
-
- cmp %sil, 2(%rdi)
- cmove %rcx, %rax
+.Lnotfound:
+ xor %eax, %eax
+ ret
-2: ret
+ /* match found -- adjust rax to point to matching byte */
+4: dec %rax
+3: dec %rax
+2: dec %rax
+1: ret
ARCHEND(memrchr, scalar)
ARCHENTRY(memrchr, baseline)