git: 30acc8427026 - main - libc/amd64: rewrite memrchr() scalar impl. to read the string from the back

From: Robert Clausecker <fuz_at_FreeBSD.org>
Date: Sat, 09 Aug 2025 20:14:11 UTC
The branch main has been updated by fuz:

URL: https://cgit.FreeBSD.org/src/commit/?id=30acc84270266e41f66cf572f67c3290d923da2f

commit 30acc84270266e41f66cf572f67c3290d923da2f
Author:     Robert Clausecker <fuz@FreeBSD.org>
AuthorDate: 2025-07-29 20:12:11 +0000
Commit:     Robert Clausecker <fuz@FreeBSD.org>
CommitDate: 2025-08-09 20:13:27 +0000

    libc/amd64: rewrite memrchr() scalar impl. to read the string from the back
    
    A very simple implementation as I don't have the patience right now
    to write a full SWAR kernel.  Should still do the trick if you wish
    to opt out of SSE for some reason.
    
    Reported by:    Mikael Simonsson <m@mikaelsimonsson.com>
    Reviewed by:    strajabot
    PR:             288321
    MFC after:      1 month
---
 lib/libc/amd64/string/memrchr.S | 72 +++++++++++++++++++----------------------
 1 file changed, 34 insertions(+), 38 deletions(-)

diff --git a/lib/libc/amd64/string/memrchr.S b/lib/libc/amd64/string/memrchr.S
index f1ba48d6bb41..80fb306af2a3 100644
--- a/lib/libc/amd64/string/memrchr.S
+++ b/lib/libc/amd64/string/memrchr.S
@@ -16,58 +16,54 @@ ARCHFUNCS(memrchr)
 ENDARCHFUNCS(memrchr)
 
 ARCHENTRY(memrchr, scalar)
-	xor	%eax, %eax		# prospective return value
-	sub	$4, %rdx		# 4 bytes left to process?
-	jb	1f
+	lea		-1(%rdi, %rdx, 1), %rax	# point to last char in buffer
+	sub		$4, %rdx		# 4 bytes left to process?
+	jb		.Ltail
 
 	ALIGN_TEXT
-0:	xor	%r8, %r8
-	lea	2(%rdi), %r10
-	cmp	%sil, 2(%rdi)
-	cmovne	%r8, %r10		# point to null if no match
+0:	cmp		%sil, (%rax)		# match at last entry?
+	je		1f
 
-	cmp	%sil, (%rdi)
-	cmove	%rdi, %r8		# point to first char if match
+	cmp		%sil, -1(%rax)		# match at second to last entry?
+	je		2f
 
-	lea	1(%rdi), %r9
-	cmp	%sil, 1(%rdi)
-	cmovne	%r8, %r9		# point to first result if no match in second
+	cmp		%sil, -2(%rax)		# match at third to last entry?
+	je		3f
 
-	lea	3(%rdi), %r11
-	cmp	%sil, 3(%rdi)
-	cmovne	%r10, %r11
+	cmp		%sil, -3(%rax)		# match at fourth to last entry?
+	je		4f
 
-	test	%r11, %r11
-	cmovz	%r9, %r11		# take first pair match if none in second
+	sub		$4, %rax
+	sub		$4, %rdx
+	jae		0b
 
-	test	%r11, %r11
-	cmovnz	%r11, %rax		# take match in current set if any
+.Ltail:	cmp		$-3, %edx		# at least one character left to process?
+	jb		.Lnotfound
 
-	add	$4, %rdi
-	sub	$4, %rdx
-	jae	0b
+	cmp		%sil, (%rax)
+	je		1f
 
-1:	cmp	$-3, %edx		# a least one character left to process?
-	jb	2f
+	cmp		$-2, %edx		# at least two characters left to process?
+	jb		.Lnotfound
 
-	cmp	%sil, (%rdi)
-	cmove	%rdi, %rax
+	cmp		%sil, -1(%rax)
+	je		2f
 
-	lea	1(%rdi), %rcx
-	cmp	$-2, %edx		# at least two characters left to process?
-	jb	2f
+	cmp		$-1, %edx		# at least three characters left to process?
+	jb		.Lnotfound
 
-	cmp	%sil, 1(%rdi)
-	cmove	%rcx, %rax
+	cmp		%sil, -2(%rax)
+	je		3f
 
-	lea	2(%rdi), %rcx
-	cmp	$-1, %edx		# at least three character left to process?
-	jb	2f
-
-	cmp	%sil, 2(%rdi)
-	cmove	%rcx, %rax
+.Lnotfound:
+	xor		%eax, %eax
+	ret
 
-2:	ret
+	/* match found -- adjust rax to point to matching byte */
+4:	dec		%rax
+3:	dec		%rax
+2:	dec		%rax
+1:	ret
 ARCHEND(memrchr, scalar)
 
 ARCHENTRY(memrchr, baseline)