git: aff9143a242c - main - lib/libc/amd64/string/strcat.S: enable use of SIMD
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Mon, 25 Dec 2023 14:25:51 UTC
The branch main has been updated by fuz: URL: https://cgit.FreeBSD.org/src/commit/?id=aff9143a242c0012b0195b3666e03fa3b7cd33e8 commit aff9143a242c0012b0195b3666e03fa3b7cd33e8 Author: Robert Clausecker <fuz@FreeBSD.org> AuthorDate: 2023-11-14 18:09:08 +0000 Commit: Robert Clausecker <fuz@FreeBSD.org> CommitDate: 2023-12-25 13:55:53 +0000 lib/libc/amd64/string/strcat.S: enable use of SIMD strcat has a bespoke scalar assembly implementation we inherited from NetBSD. While it performs well, it is better to call into our SIMD implementations if any SIMD features are available at all. So do that and implement strcat() by calling into strlen() and strcpy() if these are available. Sponsored by: The FreeBSD Foundation Tested by: developers@, exp-run Approved by: mjg MFC after: 1 month MFC to: stable/14 PR: 275785 Differential Reviison: https://reviews.freebsd.org/D42600 --- lib/libc/amd64/string/strcat.S | 47 +++++++++++++++++++++++++++++++++++++----- 1 file changed, 42 insertions(+), 5 deletions(-) diff --git a/lib/libc/amd64/string/strcat.S b/lib/libc/amd64/string/strcat.S index 0834408acfb7..081e98840cee 100644 --- a/lib/libc/amd64/string/strcat.S +++ b/lib/libc/amd64/string/strcat.S @@ -1,6 +1,14 @@ -/* - * Written by J.T. Conklin <jtc@acorntoolworks.com> - * Public domain. +/*- + * Copyright (c) 2023, The FreeBSD Foundation + * + * SPDX-License-Expression: BSD-2-Clause + * + * Portions of this software were developed by Robert Clausecker + * <fuz@FreeBSD.org> under sponsorship from the FreeBSD Foundation. + * + * Adapted from NetBSD's common/lib/libc/arch/x86_64/string/strcat.S + * written by J.T. Conklin <jtc@acorntoolworks.com> + * that was originally dedicated to the public domain */ #include <machine/asm.h> @@ -8,7 +16,14 @@ RCSID("$NetBSD: strcat.S,v 1.4 2004/07/26 18:51:21 drochner Exp $") #endif -ENTRY(strcat) +#include "amd64_archlevel.h" + +ARCHFUNCS(strcat) + ARCHFUNC(strcat, scalar) + ARCHFUNC(strcat, baseline) +ENDARCHFUNCS(strcat) + +ARCHENTRY(strcat, scalar) movq %rdi,%rax movabsq $0x0101010101010101,%r8 movabsq $0x8080808080808080,%r9 @@ -161,6 +176,28 @@ ENTRY(strcat) .Ldone: ret -END(strcat) +ARCHEND(strcat, scalar) + +/* + * Call into strlen + strcpy if we have any SIMD at all. + * The scalar implementation above is better for the scalar + * case as it avoids the function call overhead, but pessimal + * if we could call SIMD routines instead. + */ +ARCHENTRY(strcat, baseline) + push %rbp + mov %rsp, %rbp + push %rsi + push %rbx + mov %rdi, %rbx # remember destination for later + call CNAME(strlen) # strlen(dest) + mov -8(%rbp), %rsi + lea (%rbx, %rax, 1), %rdi # dest + strlen(dest) + call CNAME(__stpcpy) # stpcpy(dest + strlen(dest), src) + mov %rbx, %rax # return dest + pop %rbx + leave + ret +ARCHEND(strcat, baseline) .section .note.GNU-stack,"",%progbits