From nobody Thu Dec 28 17:20:16 2023 X-Original-To: dev-commits-src-branches@mlmmj.nyi.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2610:1c1:1:606c::19:1]) by mlmmj.nyi.freebsd.org (Postfix) with ESMTP id 4T1FdD53P5z56C9v; Thu, 28 Dec 2023 17:20:16 +0000 (UTC) (envelope-from git@FreeBSD.org) Received: from mxrelay.nyi.freebsd.org (mxrelay.nyi.freebsd.org [IPv6:2610:1c1:1:606c::19:3]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256 client-signature RSA-PSS (4096 bits) client-digest SHA256) (Client CN "mxrelay.nyi.freebsd.org", Issuer "R3" (verified OK)) by mx1.freebsd.org (Postfix) with ESMTPS id 4T1FdD2bXSz3Zr3; Thu, 28 Dec 2023 17:20:16 +0000 (UTC) (envelope-from git@FreeBSD.org) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=freebsd.org; s=dkim; t=1703784016; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding; bh=FTen5Uu2ukhUdWzOsje3nNDb4KAi9VfcdwDrT2jDNM0=; b=SSZCrWcClh4Kdhq0NRPVj1YST9FMc+mQRnGNFxqOQXMRZzPkkjrYnMc50G+aIqJNqXiEO4 MpNXIj4ibire5/JaIb33azR2o9AvZn5bpHLpHzypZsa8HRszQ63Rk8HtlIi38DKhJTuRtW AQ41H+EmetTcbl9DZ7/i1jRVbyCnXvGkC/h4xY+AmzJYz04HWJxaxbfNNo/SfvDbceJsKO vkZw7Lzk2VvHVo2jUUKIbIaS06c9dIwSozcfkeGkwVx8i7hzKiQ87VWsZNQ4ZVsQt3rxGp oyblCCRbBrp19q6Y7Y9esBblehIONZq5nLSWI0hr0yXthSMRxaeopAb46ZmiKA== ARC-Seal: i=1; s=dkim; d=freebsd.org; t=1703784016; a=rsa-sha256; cv=none; b=DNiCQzh5Be6BT4dQCDLv4OxsXQ11ojEtKi0dXCss0jAZ0r+BffN53cdzPD+wen31e1QtUl jWbZDIW0Dc/nayQZwRqQ60s8STqeMIU23gXQxKRkmT4j1wgEqpWfRzrGsqKHlauLES/4ue y+gClFLJ9ZRrJg/l0HUy+mHhcMac+WR/Be4ji7bgU2CGKGgj1r3U3Yzh6Z0TKJi9yXepl/ EDuCl2WrnIkTNf9eBloNllTvOesz8KwItjn1iDsPV+AvETfHWLJOJzD6LuSTW3ZZbjEXM9 b/mrYLUgFzMVU/H4A9pXbRpEosuBJCvbyO/ahMfV0sjt+iC6LpYYBSKQD7wozA== ARC-Authentication-Results: i=1; mx1.freebsd.org; none ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=freebsd.org; s=dkim; t=1703784016; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding; bh=FTen5Uu2ukhUdWzOsje3nNDb4KAi9VfcdwDrT2jDNM0=; b=Vps+jUpSpTfdXBCIe35zXI4Zm0lki0slr2qCkjOX09cCFdKyvrFMrJ87mTm7Ht27UOJq5a 9NgkxTPAbktgyWySVsNzc463QI1vGNityOjTY2l6EYSuxuiLlRjFe5UZs2rtLqxGgBBzID YVZB3IJi3Bts4jeGEW36NAGSwW61njea3gMMoW49+AsGZ9fOKgBRN7YqZ8rZqlrYg2AmOV XePId90XzyxFDCKBd+tgV5qej82AcdubzlbxEpJVal1ly2SBrmVM0XM7Z/wGwSFS0jVIYH MZ5EikgLr/0nb5ZXFuFFDQDHs+QYJ/ywbYbkBrnYPwUMlfAugolbxYfz0tBdVg== Received: from gitrepo.freebsd.org (gitrepo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:5]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256) (Client did not present a certificate) by mxrelay.nyi.freebsd.org (Postfix) with ESMTPS id 4T1FdD1j08zWxY; Thu, 28 Dec 2023 17:20:16 +0000 (UTC) (envelope-from git@FreeBSD.org) Received: from gitrepo.freebsd.org ([127.0.1.44]) by gitrepo.freebsd.org (8.17.1/8.17.1) with ESMTP id 3BSHKGmu081407; Thu, 28 Dec 2023 17:20:16 GMT (envelope-from git@gitrepo.freebsd.org) Received: (from git@localhost) by gitrepo.freebsd.org (8.17.1/8.17.1/Submit) id 3BSHKGnS081402; Thu, 28 Dec 2023 17:20:16 GMT (envelope-from git) Date: Thu, 28 Dec 2023 17:20:16 GMT Message-Id: <202312281720.3BSHKGnS081402@gitrepo.freebsd.org> To: src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-branches@FreeBSD.org From: Robert Clausecker Subject: git: 1347ec5d5845 - stable/14 - lib/libc/amd64/string: add timingsafe_bcmp(3) scalar, baseline implementations List-Id: Commits to the stable branches of the FreeBSD src repository List-Archive: https://lists.freebsd.org/archives/dev-commits-src-branches List-Help: List-Post: List-Subscribe: List-Unsubscribe: Sender: owner-dev-commits-src-branches@freebsd.org X-BeenThere: dev-commits-src-branches@freebsd.org MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit X-Git-Committer: fuz X-Git-Repository: src X-Git-Refname: refs/heads/stable/14 X-Git-Reftype: branch X-Git-Commit: 1347ec5d5845886a9da0903700f54110f0241b8a Auto-Submitted: auto-generated The branch stable/14 has been updated by fuz: URL: https://cgit.FreeBSD.org/src/commit/?id=1347ec5d5845886a9da0903700f54110f0241b8a commit 1347ec5d5845886a9da0903700f54110f0241b8a Author: Robert Clausecker AuthorDate: 2023-08-30 15:37:26 +0000 Commit: Robert Clausecker CommitDate: 2023-12-28 17:02:41 +0000 lib/libc/amd64/string: add timingsafe_bcmp(3) scalar, baseline implementations Very straightforward and similar to memcmp(3). The code has been written to use only instructions specified as having data operand independent timing by Intel. Sponsored by: The FreeBSD Foundation Approved by: security (cperciva) Differential Revision: https://reviews.freebsd.org/D41673 (cherry picked from commit 76c2b331bcd9f73c5c8c43a06e328fa0c7b8c39a) --- lib/libc/amd64/string/Makefile.inc | 3 +- lib/libc/amd64/string/timingsafe_bcmp.S | 232 ++++++++++++++++++++++++++++++++ 2 files changed, 234 insertions(+), 1 deletion(-) diff --git a/lib/libc/amd64/string/Makefile.inc b/lib/libc/amd64/string/Makefile.inc index 73973a6d69de..fc420de0450e 100644 --- a/lib/libc/amd64/string/Makefile.inc +++ b/lib/libc/amd64/string/Makefile.inc @@ -15,4 +15,5 @@ MDSRCS+= \ strcspn.S \ strlen.S \ strnlen.c \ - strspn.S + strspn.S \ + timingsafe_bcmp.S diff --git a/lib/libc/amd64/string/timingsafe_bcmp.S b/lib/libc/amd64/string/timingsafe_bcmp.S new file mode 100644 index 000000000000..c003da2ea9a7 --- /dev/null +++ b/lib/libc/amd64/string/timingsafe_bcmp.S @@ -0,0 +1,232 @@ +/*- + * Copyright (c) 2023 The FreeBSD Foundation + * + * This software was developed by Robert Clausecker + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ''AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE + */ + +#include + +#include "amd64_archlevel.h" + +#define ALIGN_TEXT .p2align 4,0x90 /* 16-byte alignment, nop filled */ + +ARCHFUNCS(timingsafe_bcmp) + ARCHFUNC(timingsafe_bcmp, scalar) + ARCHFUNC(timingsafe_bcmp, baseline) +ENDARCHFUNCS(timingsafe_bcmp) + +ARCHENTRY(timingsafe_bcmp, scalar) + cmp $16, %rdx # at least 17 bytes to process? + ja .Lgt16 + + cmp $8, %edx # at least 9 bytes to process? + ja .L0916 + + cmp $4, %edx # at least 5 bytes to process? + ja .L0508 + + cmp $2, %edx # at least 3 bytes to process? + ja .L0304 + + test %edx, %edx # buffer empty? + jnz .L0102 + + xor %eax, %eax # empty buffer always matches + ret + +.L0102: movzbl (%rdi), %eax # load 1--2 bytes from first buffer + movzbl -1(%rdi, %rdx, 1), %ecx + xor (%rsi), %al # xor in second buffer + xor -1(%rsi, %rdx, 1), %cl + or %ecx, %eax # mismatch in any of the two? + ret + +.L0304: movzwl (%rdi), %eax + movzwl -2(%rdi, %rdx, 1), %ecx + xor (%rsi), %ax + xor -2(%rsi, %rdx, 1), %cx + or %ecx, %eax + ret + +.L0508: mov (%rdi), %eax + mov -4(%rdi, %rdx, 1), %ecx + xor (%rsi), %eax + xor -4(%rsi, %rdx, 1), %ecx + or %ecx, %eax + ret + +.L0916: mov (%rdi), %rax + mov -8(%rdi, %rdx, 1), %rcx + xor (%rsi), %rax + xor -8(%rsi, %rdx, 1), %rcx + or %rcx, %rax + setnz %al # ensure EAX nonzero even if only + ret # high bits of RAX were set + + /* more than 16 bytes: process buffer in a loop */ +.Lgt16: mov (%rdi), %rax # process first 16 bytes + mov 8(%rdi), %r9 + mov $32, %ecx + xor (%rsi), %rax + xor 8(%rsi), %r9 + or %r9, %rax + + cmp %rdx, %rcx # enough left for a full iteration? + jae .Ltail + + /* main loop processing 16 bytes per iteration */ + ALIGN_TEXT +0: mov -16(%rdi, %rcx, 1), %r8 + mov -8(%rdi, %rcx, 1), %r9 + xor -16(%rsi, %rcx, 1), %r8 + xor -8(%rsi, %rcx, 1), %r9 + add $16, %rcx + or %r9, %r8 + or %r8, %rax + + cmp %rdx, %rcx + jb 0b + + /* process last 16 bytes */ +.Ltail: mov -16(%rdi, %rdx, 1), %r8 + mov -8(%rdi, %rdx, 1), %r9 + xor -16(%rsi, %rdx, 1), %r8 + xor -8(%rsi, %rdx, 1), %r9 + or %r9, %r8 + or %r8, %rax + setnz %al + ret +ARCHEND(timingsafe_bcmp, scalar) + +ARCHENTRY(timingsafe_bcmp, baseline) + cmp $32, %rdx # at least 33 bytes to process? + ja .Lgt32b + + cmp $16, %edx # at least 17 bytes to process? + ja .L1732b + + cmp $8, %edx # at least 9 bytes to process? + ja .L0916b + + cmp $4, %edx # at least 5 bytes to process? + ja .L0508b + + cmp $2, %edx # at least 3 bytes to process? + ja .L0304b + + test %edx, %edx # buffer empty? + jnz .L0102b + + xor %eax, %eax # empty buffer always matches + ret + +.L0102b: + movzbl (%rdi), %eax # load 1--2 bytes from first buffer + movzbl -1(%rdi, %rdx, 1), %ecx + xor (%rsi), %al # xor in second buffer + xor -1(%rsi, %rdx, 1), %cl + or %ecx, %eax # mismatch in any of the two? + ret + +.L0304b: + movzwl (%rdi), %eax + movzwl -2(%rdi, %rdx, 1), %ecx + xor (%rsi), %ax + xor -2(%rsi, %rdx, 1), %cx + or %ecx, %eax + ret + +.L0508b: + mov (%rdi), %eax + mov -4(%rdi, %rdx, 1), %ecx + xor (%rsi), %eax + xor -4(%rsi, %rdx, 1), %ecx + or %ecx, %eax + ret + +.L0916b: + mov (%rdi), %rax + mov -8(%rdi, %rdx, 1), %rcx + xor (%rsi), %rax + xor -8(%rsi, %rdx, 1), %rcx + or %rcx, %rax + setnz %al # ensure EAX nonzero even if only + ret # high bits of RAX were set + +.L1732b: + movdqu (%rdi), %xmm0 + movdqu (%rsi), %xmm2 + movdqu -16(%rdi, %rdx, 1), %xmm1 + movdqu -16(%rsi, %rdx, 1), %xmm3 + pcmpeqb %xmm2, %xmm0 + pcmpeqb %xmm3, %xmm1 + pand %xmm1, %xmm0 + pmovmskb %xmm0, %eax # 1 where equal + xor $0xffff, %eax # 1 where not equal + ret + + /* more than 32 bytes: process buffer in a loop */ +.Lgt32b: + movdqu (%rdi), %xmm4 + movdqu (%rsi), %xmm2 + movdqu 16(%rdi), %xmm1 + movdqu 16(%rsi), %xmm3 + mov $64, %ecx + pcmpeqb %xmm2, %xmm4 + pcmpeqb %xmm3, %xmm1 + pand %xmm1, %xmm4 + cmp %rdx, %rcx # enough left for a full iteration? + jae .Ltailb + + /* main loop processing 32 bytes per iteration */ + ALIGN_TEXT +0: movdqu -32(%rdi, %rcx, 1), %xmm0 + movdqu -32(%rsi, %rcx, 1), %xmm2 + movdqu -16(%rdi, %rcx, 1), %xmm1 + movdqu -16(%rsi, %rcx, 1), %xmm3 + add $32, %rcx + pcmpeqb %xmm2, %xmm0 + pcmpeqb %xmm3, %xmm1 + pand %xmm1, %xmm0 + pand %xmm0, %xmm4 + cmp %rdx, %rcx + jb 0b + + /* process last 32 bytes */ +.Ltailb: + movdqu -32(%rdi, %rdx, 1), %xmm0 + movdqu -32(%rsi, %rdx, 1), %xmm2 + movdqu -16(%rdi, %rdx, 1), %xmm1 + movdqu -16(%rsi, %rdx, 1), %xmm3 + pcmpeqb %xmm2, %xmm0 + pcmpeqb %xmm3, %xmm1 + pand %xmm1, %xmm0 + pand %xmm4, %xmm0 + pmovmskb %xmm0, %eax + xor $0xffff, %eax + ret +ARCHEND(timingsafe_bcmp, baseline) + + .section .note.GNU-stack,"",%progbits