git: f2c98669fc1b - main - lib/libc/aarch64/string: add ASIMD-enhanced timingsafe_bcmp implementation
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Fri, 10 Jan 2025 15:04:04 UTC
The branch main has been updated by fuz:
URL: https://cgit.FreeBSD.org/src/commit/?id=f2c98669fc1b3fd2dbc7a7e3eedd098970a10dec
commit f2c98669fc1b3fd2dbc7a7e3eedd098970a10dec
Author: Robert Clausecker <fuz@FreeBSD.org>
AuthorDate: 2024-12-09 09:49:49 +0000
Commit: Robert Clausecker <fuz@FreeBSD.org>
CommitDate: 2025-01-10 15:02:41 +0000
lib/libc/aarch64/string: add ASIMD-enhanced timingsafe_bcmp implementation
A straightforward port of the amd64 implementation.
Approved by: security (cperciva)
Reviewed by: getz, cperciva
Event: EuroBSDcon 2024
Differential Revision: https://reviews.freebsd.org/D46757
---
lib/libc/aarch64/string/Makefile.inc | 1 +
lib/libc/aarch64/string/timingsafe_bcmp.S | 113 ++++++++++++++++++++++++++++++
2 files changed, 114 insertions(+)
diff --git a/lib/libc/aarch64/string/Makefile.inc b/lib/libc/aarch64/string/Makefile.inc
index 752cc6d9900b..8019ab4adafc 100644
--- a/lib/libc/aarch64/string/Makefile.inc
+++ b/lib/libc/aarch64/string/Makefile.inc
@@ -31,6 +31,7 @@ MDSRCS+= \
strncat.c \
strlcat.c \
strlen.S \
+ timingsafe_bcmp.S \
bcopy.c \
bzero.c
diff --git a/lib/libc/aarch64/string/timingsafe_bcmp.S b/lib/libc/aarch64/string/timingsafe_bcmp.S
new file mode 100644
index 000000000000..baa5c6f0940c
--- /dev/null
+++ b/lib/libc/aarch64/string/timingsafe_bcmp.S
@@ -0,0 +1,113 @@
+/*
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 Robert Clausecker
+ */
+
+#include <machine/asm.h>
+
+ENTRY(timingsafe_bcmp)
+ cmp x2, #32 // at least 33 bytes to process?
+ bhi .Lgt32
+
+ cmp x2, #16 // at least 17 bytes to process?
+ bhi .L1732
+
+ cmp x2, #8 // at least 9 bytes to process?
+ bhi .L0916
+
+ cmp x2, #4 // at least 5 bytes to process?
+ bhi .L0508
+
+ cmp x2, #2 // at least 3 bytes to process?
+ bhi .L0304
+
+ cbnz x2, .L0102 // buffer empty?
+
+ mov w0, #0 // empty buffer always matches
+ ret
+
+.L0102: ldrb w3, [x0] // load first bytes
+ ldrb w4, [x1]
+ sub x2, x2, #1
+ ldrb w5, [x0, x2] // load last bytes
+ ldrb w6, [x1, x2]
+ eor w3, w3, w4
+ eor w5, w5, w6
+ orr w0, w3, w5
+ ret
+
+.L0304: ldrh w3, [x0] // load first halfwords
+ ldrh w4, [x1]
+ sub x2, x2, #2
+ ldrh w5, [x0, x2] // load last halfwords
+ ldrh w6, [x1, x2]
+ eor w3, w3, w4
+ eor w5, w5, w6
+ orr w0, w3, w5
+ ret
+
+.L0508: ldr w3, [x0] // load first words
+ ldr w4, [x1]
+ sub x2, x2, #4
+ ldr w5, [x0, x2] // load last words
+ ldr w6, [x1, x2]
+ eor w3, w3, w4
+ eor w5, w5, w6
+ orr w0, w3, w5
+ ret
+
+.L0916: ldr x3, [x0]
+ ldr x4, [x1]
+ sub x2, x2, #8
+ ldr x5, [x0, x2]
+ ldr x6, [x1, x2]
+ eor x3, x3, x4
+ eor x5, x5, x6
+ orr x0, x3, x5
+ orr x0, x0, x0, lsr #32 // ensure low 32 bits are nonzero iff mismatch
+ ret
+
+.L1732: ldr q0, [x0]
+ ldr q1, [x1]
+ sub x2, x2, #16
+ ldr q2, [x0, x2]
+ ldr q3, [x1, x2]
+ eor v0.16b, v0.16b, v1.16b
+ eor v2.16b, v2.16b, v3.16b
+ orr v0.16b, v0.16b, v2.16b
+ umaxv s0, v0.4s // get a nonzero word if any
+ mov w0, v0.s[0]
+ ret
+
+ /* more than 32 bytes: process buffer in a loop */
+.Lgt32: ldp q0, q1, [x0], #32
+ ldp q2, q3, [x1], #32
+ eor v0.16b, v0.16b, v2.16b
+ eor v1.16b, v1.16b, v3.16b
+ orr v4.16b, v0.16b, v1.16b
+ subs x2, x2, #64 // enough left for another iteration?
+ bls .Ltail
+
+0: ldp q0, q1, [x0], #32
+ ldp q2, q3, [x1], #32
+ eor v0.16b, v0.16b, v2.16b
+ eor v1.16b, v1.16b, v3.16b
+ orr v0.16b, v0.16b, v1.16b
+ orr v4.16b, v4.16b, v0.16b
+ subs x2, x2, #32
+ bhi 0b
+
+ /* process last 32 bytes */
+.Ltail: add x0, x0, x2 // point to the last 32 bytes in the buffer
+ add x1, x1, x2
+ ldp q0, q1, [x0]
+ ldp q2, q3, [x1]
+ eor v0.16b, v0.16b, v2.16b
+ eor v1.16b, v1.16b, v3.16b
+ orr v0.16b, v0.16b, v1.16b
+ orr v4.16b, v4.16b, v0.16b
+ umaxv s0, v4.4s // get a nonzero word if any
+ mov w0, v0.s[0]
+ ret
+END(timingsafe_bcmp)