git: 7a605ba8f799 - stable/14 - lib/libc/amd64/string/strcat.S: enable use of SIMD
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Wed, 24 Jan 2024 19:44:45 UTC
The branch stable/14 has been updated by fuz:
URL: https://cgit.FreeBSD.org/src/commit/?id=7a605ba8f7996f38ba7b353a0120d84bae48da0f
commit 7a605ba8f7996f38ba7b353a0120d84bae48da0f
Author: Robert Clausecker <fuz@FreeBSD.org>
AuthorDate: 2023-11-14 18:09:08 +0000
Commit: Robert Clausecker <fuz@FreeBSD.org>
CommitDate: 2024-01-24 19:39:28 +0000
lib/libc/amd64/string/strcat.S: enable use of SIMD
strcat has a bespoke scalar assembly implementation we
inherited from NetBSD. While it performs well, it is
better to call into our SIMD implementations if any SIMD
features are available at all. So do that and implement
strcat() by calling into strlen() and strcpy() if these
are available.
Sponsored by: The FreeBSD Foundation
Tested by: developers@, exp-run
Approved by: mjg
MFC after: 1 month
MFC to: stable/14
PR: 275785
Differential Reviison: https://reviews.freebsd.org/D42600
(cherry picked from commit aff9143a242c0012b0195b3666e03fa3b7cd33e8)
---
lib/libc/amd64/string/strcat.S | 47 +++++++++++++++++++++++++++++++++++++-----
1 file changed, 42 insertions(+), 5 deletions(-)
diff --git a/lib/libc/amd64/string/strcat.S b/lib/libc/amd64/string/strcat.S
index 0834408acfb7..081e98840cee 100644
--- a/lib/libc/amd64/string/strcat.S
+++ b/lib/libc/amd64/string/strcat.S
@@ -1,6 +1,14 @@
-/*
- * Written by J.T. Conklin <jtc@acorntoolworks.com>
- * Public domain.
+/*-
+ * Copyright (c) 2023, The FreeBSD Foundation
+ *
+ * SPDX-License-Expression: BSD-2-Clause
+ *
+ * Portions of this software were developed by Robert Clausecker
+ * <fuz@FreeBSD.org> under sponsorship from the FreeBSD Foundation.
+ *
+ * Adapted from NetBSD's common/lib/libc/arch/x86_64/string/strcat.S
+ * written by J.T. Conklin <jtc@acorntoolworks.com>
+ * that was originally dedicated to the public domain
*/
#include <machine/asm.h>
@@ -8,7 +16,14 @@
RCSID("$NetBSD: strcat.S,v 1.4 2004/07/26 18:51:21 drochner Exp $")
#endif
-ENTRY(strcat)
+#include "amd64_archlevel.h"
+
+ARCHFUNCS(strcat)
+ ARCHFUNC(strcat, scalar)
+ ARCHFUNC(strcat, baseline)
+ENDARCHFUNCS(strcat)
+
+ARCHENTRY(strcat, scalar)
movq %rdi,%rax
movabsq $0x0101010101010101,%r8
movabsq $0x8080808080808080,%r9
@@ -161,6 +176,28 @@ ENTRY(strcat)
.Ldone:
ret
-END(strcat)
+ARCHEND(strcat, scalar)
+
+/*
+ * Call into strlen + strcpy if we have any SIMD at all.
+ * The scalar implementation above is better for the scalar
+ * case as it avoids the function call overhead, but pessimal
+ * if we could call SIMD routines instead.
+ */
+ARCHENTRY(strcat, baseline)
+ push %rbp
+ mov %rsp, %rbp
+ push %rsi
+ push %rbx
+ mov %rdi, %rbx # remember destination for later
+ call CNAME(strlen) # strlen(dest)
+ mov -8(%rbp), %rsi
+ lea (%rbx, %rax, 1), %rdi # dest + strlen(dest)
+ call CNAME(__stpcpy) # stpcpy(dest + strlen(dest), src)
+ mov %rbx, %rax # return dest
+ pop %rbx
+ leave
+ ret
+ARCHEND(strcat, baseline)
.section .note.GNU-stack,"",%progbits