From nobody Wed Sep 21 09:46:48 2022 X-Original-To: dev-commits-src-branches@mlmmj.nyi.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2610:1c1:1:606c::19:1]) by mlmmj.nyi.freebsd.org (Postfix) with ESMTP id 4MXYTj1Czlz4cttQ; Wed, 21 Sep 2022 09:46:49 +0000 (UTC) (envelope-from git@FreeBSD.org) Received: from mxrelay.nyi.freebsd.org (mxrelay.nyi.freebsd.org [IPv6:2610:1c1:1:606c::19:3]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256 client-signature RSA-PSS (4096 bits) client-digest SHA256) (Client CN "mxrelay.nyi.freebsd.org", Issuer "R3" (verified OK)) by mx1.freebsd.org (Postfix) with ESMTPS id 4MXYTh5h2qz3fHT; Wed, 21 Sep 2022 09:46:48 +0000 (UTC) (envelope-from git@FreeBSD.org) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=freebsd.org; s=dkim; t=1663753608; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding; bh=hHuv53OpVzwChOC/naJDhhWo7uxiCdvQtE67zd/MU88=; b=tCPk9AIN3Uxs8qZjS3NhBzXK1O3DksUc4sVPzJbs4cjtODgO3PNq4byIqGJPJy6Ll1YDs/ n7ujk3RSosPZ0xkmN37mMB2fvFx7gEDt2dDyQE5iktm0i0j1dXneE1Q8Id+lJgJ2+n+qja oc/ecVY0HGFIWj5TOX1e9xWOhjTLZeCMtUIscco/1ACNBcvdfFEMCw0bdkSCkbweZj24fy JOupHQMC1KlfmBsaNY4TPwSPtn5amSSHANaysMOqbBm+O+qCdVas91OICkOqDXUjOUeltq IllF0v3KkFCBiCnBcnd/E0biyQ+al72ydY4KkLJ6jlOc5Jt2H+0thBYuexsp6A== Received: from gitrepo.freebsd.org (gitrepo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:5]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256) (Client did not present a certificate) by mxrelay.nyi.freebsd.org (Postfix) with ESMTPS id 4MXYTh3rCSz11vt; Wed, 21 Sep 2022 09:46:48 +0000 (UTC) (envelope-from git@FreeBSD.org) Received: from gitrepo.freebsd.org ([127.0.1.44]) by gitrepo.freebsd.org (8.16.1/8.16.1) with ESMTP id 28L9kmVf076202; Wed, 21 Sep 2022 09:46:48 GMT (envelope-from git@gitrepo.freebsd.org) Received: (from git@localhost) by gitrepo.freebsd.org (8.16.1/8.16.1/Submit) id 28L9kmIl076201; Wed, 21 Sep 2022 09:46:48 GMT (envelope-from git) Date: Wed, 21 Sep 2022 09:46:48 GMT Message-Id: <202209210946.28L9kmIl076201@gitrepo.freebsd.org> To: src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-branches@FreeBSD.org From: Andrew Turner Subject: git: 3af87126f68e - stable/13 - Import an optimized arm64 memcmp into the kernel List-Id: Commits to the stable branches of the FreeBSD src repository List-Archive: https://lists.freebsd.org/archives/dev-commits-src-branches List-Help: List-Post: List-Subscribe: List-Unsubscribe: Sender: owner-dev-commits-src-branches@freebsd.org X-BeenThere: dev-commits-src-branches@freebsd.org MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit X-Git-Committer: andrew X-Git-Repository: src X-Git-Refname: refs/heads/stable/13 X-Git-Reftype: branch X-Git-Commit: 3af87126f68e539453dc530925d7e297ee261c7f Auto-Submitted: auto-generated ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=freebsd.org; s=dkim; t=1663753608; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding; bh=hHuv53OpVzwChOC/naJDhhWo7uxiCdvQtE67zd/MU88=; b=TGGdZPKtuRRViio0nCV0ZDMKwb+uA6XBg8zuvnVzy7nPhqngHrJiDvB2lHsg+DpZz9tEbm y3AYx0c4uHSzpRLKqey6svjVYkGHQw5nf3nf0p26yU1wbyBn4qmrJIZENsEbMOyDxoXahK VavX/WiN1rugmYxHpCBfrwsA+B5aR/gBQffOoQ8ODhlw9uivrnCHa/5BTJxc/fgnfypGeL w3rurH/JrYePzReqprT7onKYRUyXqBtUpIiT5Ho/TGtqHZDOReIMlMSBZiZ7g1FgGxFxs8 OR+3Ahb4uXbguHvyYaU2n3ZBeomeqkhGLO1GRBr/iorv+Glhn886vRp2U3mNRA== ARC-Seal: i=1; s=dkim; d=freebsd.org; t=1663753608; a=rsa-sha256; cv=none; b=qw7Zxk9SYC/bZH80Y/WcBdSlOjCV1LQQ4+1u/0Lb2HMK+0vP6pVQHXS//6c7yWdujkN8bu JwHOR6SfjVzJ11vpabziY/QaaZkVxTDxStMt1QshaVkhTko2Kimrhl9hJXQaQR577JIUFs THYcYHeam2fYLmsWAYYIRjyFkrp5+F1E53GTH/VMGYhzh1xurcszhB4poNacc/bTgx2qAk vj7dlIVRITLLk/yUSbea1D+MAYqN1k6NXJ0u5tejX9G9sXXI0MLMWDHtz0q2hTNofaIId+ EsykYaX7DFf42bOpC/QR5SvcBlMFVCTnNRfO80hLHVoV4Ao0KZNTB/zXVEy13g== ARC-Authentication-Results: i=1; mx1.freebsd.org; none X-ThisMailContainsUnwantedMimeParts: N The branch stable/13 has been updated by andrew: URL: https://cgit.FreeBSD.org/src/commit/?id=3af87126f68e539453dc530925d7e297ee261c7f commit 3af87126f68e539453dc530925d7e297ee261c7f Author: Andrew Turner AuthorDate: 2022-09-07 11:12:30 +0000 Commit: Andrew Turner CommitDate: 2022-09-21 09:45:53 +0000 Import an optimized arm64 memcmp into the kernel Bring in a version of the Arm Optimized Routines memcpy from before the VFP registers were used. Imported with modification from: https://github.com/ARM-software/optimized-routines/blob/e823e3abf5f89ecb/string/aarch64/memcmp.S Sponsored by: The FreeBSD Foundation (cherry picked from commit 51a1bf7ba7eb79c760161a2054c113978dce38cb) --- sys/arm64/arm64/memcmp.S | 136 +++++++++++++++++++++++++++++++++++++++++++++++ sys/conf/files.arm64 | 3 +- 2 files changed, 137 insertions(+), 2 deletions(-) diff --git a/sys/arm64/arm64/memcmp.S b/sys/arm64/arm64/memcmp.S new file mode 100644 index 000000000000..8517a181f3f3 --- /dev/null +++ b/sys/arm64/arm64/memcmp.S @@ -0,0 +1,136 @@ +/* memcmp - compare memory + * + * Copyright (c) 2013-2020, Arm Limited. + * SPDX-License-Identifier: MIT + */ + +/* Assumptions: + * + * ARMv8-a, AArch64, unaligned accesses. + */ + +#include + +#define L(l) .L ## l + +/* Parameters and result. */ +#define src1 x0 +#define src2 x1 +#define limit x2 +#define result w0 + +/* Internal variables. */ +#define data1 x3 +#define data1w w3 +#define data1h x4 +#define data2 x5 +#define data2w w5 +#define data2h x6 +#define tmp1 x7 +#define tmp2 x8 + +ENTRY (memcmp) + subs limit, limit, 8 + b.lo L(less8) + + ldr data1, [src1], 8 + ldr data2, [src2], 8 + cmp data1, data2 + b.ne L(return) + + subs limit, limit, 8 + b.gt L(more16) + + ldr data1, [src1, limit] + ldr data2, [src2, limit] + b L(return) + +L(more16): + ldr data1, [src1], 8 + ldr data2, [src2], 8 + cmp data1, data2 + bne L(return) + + /* Jump directly to comparing the last 16 bytes for 32 byte (or less) + strings. */ + subs limit, limit, 16 + b.ls L(last_bytes) + + /* We overlap loads between 0-32 bytes at either side of SRC1 when we + try to align, so limit it only to strings larger than 128 bytes. */ + cmp limit, 96 + b.ls L(loop16) + + /* Align src1 and adjust src2 with bytes not yet done. */ + and tmp1, src1, 15 + add limit, limit, tmp1 + sub src1, src1, tmp1 + sub src2, src2, tmp1 + + /* Loop performing 16 bytes per iteration using aligned src1. + Limit is pre-decremented by 16 and must be larger than zero. + Exit if <= 16 bytes left to do or if the data is not equal. */ + .p2align 4 +L(loop16): + ldp data1, data1h, [src1], 16 + ldp data2, data2h, [src2], 16 + subs limit, limit, 16 + ccmp data1, data2, 0, hi + ccmp data1h, data2h, 0, eq + b.eq L(loop16) + + cmp data1, data2 + bne L(return) + mov data1, data1h + mov data2, data2h + cmp data1, data2 + bne L(return) + + /* Compare last 1-16 bytes using unaligned access. */ +L(last_bytes): + add src1, src1, limit + add src2, src2, limit + ldp data1, data1h, [src1] + ldp data2, data2h, [src2] + cmp data1, data2 + bne L(return) + mov data1, data1h + mov data2, data2h + cmp data1, data2 + + /* Compare data bytes and set return value to 0, -1 or 1. */ +L(return): +#ifndef __AARCH64EB__ + rev data1, data1 + rev data2, data2 +#endif + cmp data1, data2 +L(ret_eq): + cset result, ne + cneg result, result, lo + ret + + .p2align 4 + /* Compare up to 8 bytes. Limit is [-8..-1]. */ +L(less8): + adds limit, limit, 4 + b.lo L(less4) + ldr data1w, [src1], 4 + ldr data2w, [src2], 4 + cmp data1w, data2w + b.ne L(return) + sub limit, limit, 4 +L(less4): + adds limit, limit, 4 + beq L(ret_eq) +L(byte_loop): + ldrb data1w, [src1], 1 + ldrb data2w, [src2], 1 + subs limit, limit, 1 + ccmp data1w, data2w, 0, ne /* NZCV = 0b0000. */ + b.eq L(byte_loop) + sub result, data1w, data2w + ret + +END (memcmp) + diff --git a/sys/conf/files.arm64 b/sys/conf/files.arm64 index 86ada6e4c924..963ee0aef8f0 100644 --- a/sys/conf/files.arm64 +++ b/sys/conf/files.arm64 @@ -10,8 +10,6 @@ kern/subr_devmap.c standard kern/subr_intr.c optional intrng kern/subr_physmem.c standard libkern/bcmp.c standard -libkern/memcmp.c standard \ - compile-with "${NORMAL_C:N-fsanitize*}" libkern/memset.c standard \ compile-with "${NORMAL_C:N-fsanitize*}" libkern/strlen.c standard @@ -60,6 +58,7 @@ arm64/arm64/locore.S standard no-obj arm64/arm64/machdep.c standard arm64/arm64/machdep_boot.c standard arm64/arm64/mem.c standard +arm64/arm64/memcmp.S standard arm64/arm64/memcpy.S standard arm64/arm64/minidump_machdep.c standard arm64/arm64/mp_machdep.c optional smp