git: 6c8358cd7ffd - main - stand: back out the most of the horrible aarch64 kludge

From: Warner Losh <imp_at_FreeBSD.org>
Date: Mon, 01 May 2023 21:04:24 UTC
The branch main has been updated by imp:

URL: https://cgit.FreeBSD.org/src/commit/?id=6c8358cd7ffd71acec74036429a8afb502720722

commit 6c8358cd7ffd71acec74036429a8afb502720722
Author:     Warner Losh <imp@FreeBSD.org>
AuthorDate: 2023-05-01 16:07:00 +0000
Commit:     Warner Losh <imp@FreeBSD.org>
CommitDate: 2023-05-01 21:02:54 +0000

    stand: back out the most of the horrible aarch64 kludge
    
    Add one ifdef to upstrem code and get rid of compiling the horrible
    checked-in aarch64 assembler for the boot loader that the loader will
    never use. I'll attempt to upstream this and adjust as needed.
    
    Sponsored by:           Netflix
    Differential Revision:  https://reviews.freebsd.org/D39897
---
 stand/libsa/zfs/Makefile.inc                       | 33 ++++------------------
 stand/libsa/zfs/blake3_impl_hack.c                 | 23 ++-------------
 .../openzfs/module/icp/algs/blake3/blake3_impl.c   | 13 +++++----
 3 files changed, 15 insertions(+), 54 deletions(-)

diff --git a/stand/libsa/zfs/Makefile.inc b/stand/libsa/zfs/Makefile.inc
index 7660f4ab7baf..6eded5659602 100644
--- a/stand/libsa/zfs/Makefile.inc
+++ b/stand/libsa/zfs/Makefile.inc
@@ -19,11 +19,6 @@ ZSTD_SRC+=	zstd_common.c
 ZSTD_SRC+=	zstd_ddict.c zstd_decompress.c zstd_decompress_block.c
 ZSTD_SRC+=	zstd_double_fast.c zstd_fast.c zstd_lazy.c zstd_ldm.c
 
-# This is completely bogus: We should be able to omit this code completely.
-.if ${MACHINE_ARCH} == "aarch64"
-ZFS_SRC_AS =	b3_aarch64_sse2.S b3_aarch64_sse41.S
-.endif
-
 SRCS+=		${ZFS_SRC} ${ZSTD_SRC} ${ZFS_SRC_AS}
 
 #
@@ -52,9 +47,9 @@ CFLAGS.$i+=	-include ${ZFSOSINC}/spl/sys/ccompile.h -Wformat -Wall -I${OZFS}/inc
 	-DNEED_SOLARIS_BOOLEAN
 .endfor
 
-CFLAGS_EARLY.blake3.c+= ${ZFS_EARLY}
-CFLAGS_EARLY.blake3_generic.c+= ${ZFS_EARLY}
-CFLAGS_EARLY.blake3_impl_hack.c+= ${ZFS_EARLY}
+CFLAGS_EARLY.blake3.c+= ${ZFS_EARLY} -DOMIT_SIMD
+CFLAGS_EARLY.blake3_generic.c+= ${ZFS_EARLY} -DOMIT_SIMD
+CFLAGS_EARLY.blake3_impl_hack.c+= ${ZFS_EARLY} -DOMIT_SIMD
 CFLAGS_EARLY.list.c+= ${ZFS_EARLY}
 CFLAGS_EARLY.zfs_zstd.c+= ${ZFS_EARLY}
 CFLAGS_EARLY.nvlist.c+= ${ZFS_EARLY}
@@ -73,9 +68,6 @@ CFLAGS.zfs.c+=	-DHAS_ZSTD_ZFS					\
 		-I${SYSDIR}/crypto/skein			\
 		-I${SRCTOP}/sys/cddl/contrib/opensolaris/common/lz4
 
-.for i in ${ZFS_SRC_AS}
-CFLAGS.$i+=	-DLOCORE
-.endfor
 #
 # ZSTD coding style has some issues, so suppress clang's warnings. Also, zstd's
 # use of BMI instrucitons is broken in this environment, so avoid them.
@@ -86,24 +78,9 @@ CFLAGS.$i+=	-U__BMI__ ${NO_WBITWISE_INSTEAD_OF_LOGICAL}
 
 CFLAGS.zfs_zstd.c+= -DIN_BASE -DIN_LIBSA
 
+CFLAGS.blake3_impl_hack.c+= -I${OZFS}/module/icp/algs/blake3 -I${OZFS}/module/icp/include
+
 # Do not unroll skein loops, reduce code size
 CFLAGS.skein_block.c+=	-DSKEIN_LOOP=111
 
-# To find blake3_impl.c in OpenZFS tree for our somehat ugly blake3_impl_hack.c
-# that's needed until the necessary tweaks can be upstreamed.
-# XXX the last import gutted all this since upstream changes broke this hack.
-CFLAGS.blake3_impl_hack.c+= -I${OZFS}/module/icp/algs/blake3 -I${OZFS}/module/icp/include
-
 CWARNFLAGS.zfs.c+= ${NO_WDANGLING_POINTER}
-
-# Needing to remove the -mgeneral-regs-only is a red flag that this is not quite
-# right. But it's needed at the moment due to the muddled upstream.
-b3_aarch64_sse2.o: b3_aarch64_sse2.S
-	${CC} -c ${CFLAGS:N-mgeneral-regs-only} ${WERROR} ${.IMPSRC} \
-	    -o ${.TARGET}
-	${CTFCONVERT_CMD}
-
-b3_aarch64_sse41.o: b3_aarch64_sse41.S
-	${CC} -c ${CFLAGS:N-mgeneral-regs-only} ${WERROR} ${.IMPSRC} \
-	     -o ${.TARGET}
-	${CTFCONVERT_CMD}
diff --git a/stand/libsa/zfs/blake3_impl_hack.c b/stand/libsa/zfs/blake3_impl_hack.c
index 2be6cc54e774..789807714e2c 100644
--- a/stand/libsa/zfs/blake3_impl_hack.c
+++ b/stand/libsa/zfs/blake3_impl_hack.c
@@ -5,8 +5,8 @@
  */
 
 /*
- * Hack for aarch64... There's no way to tell it omit the SIMD
- * versions, so we fake it here.
+ * Hack for aarch64...  Not sure why isspace isn't defined, but it sure doesn't
+ * belong here.
  */
 #ifndef isspace
 static __inline int isspace(int c)
@@ -16,22 +16,3 @@ static __inline int isspace(int c)
 #endif
 
 #include "blake3_impl.c"
-
-/*
-static inline boolean_t blake3_is_not_supported(void)
-{
-	return (B_FALSE);
-}
-
-const blake3_ops_t blake3_sse2_impl = {
-	.is_supported = blake3_is_not_supported,
-	.degree = 4,
-	.name = "fakesse2"
-};
-
-const blake3_ops_t blake3_sse41_impl = {
-	.is_supported = blake3_is_not_supported,
-	.degree = 4,
-	.name = "fakesse41"
-};
-*/
diff --git a/sys/contrib/openzfs/module/icp/algs/blake3/blake3_impl.c b/sys/contrib/openzfs/module/icp/algs/blake3/blake3_impl.c
index f3f48c2dfa1a..5684b4ff1a97 100644
--- a/sys/contrib/openzfs/module/icp/algs/blake3/blake3_impl.c
+++ b/sys/contrib/openzfs/module/icp/algs/blake3/blake3_impl.c
@@ -30,10 +30,13 @@
 
 #include "blake3_impl.h"
 
-#if defined(__aarch64__) || \
+#if !defined(OMIT_SIMD) && (defined(__aarch64__) ||  \
 	(defined(__x86_64) && defined(HAVE_SSE2)) || \
-	(defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
+    (defined(__PPC64__) && defined(__LITTLE_ENDIAN__)))
+#define USE_SIMD
+#endif
 
+#ifdef USE_SIMD
 extern void ASMABI zfs_blake3_compress_in_place_sse2(uint32_t cv[8],
     const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
     uint64_t counter, uint8_t flags);
@@ -96,9 +99,7 @@ const blake3_ops_t blake3_sse2_impl = {
 };
 #endif
 
-#if defined(__aarch64__) || \
-	(defined(__x86_64) && defined(HAVE_SSE2)) || \
-	(defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
+#ifdef USE_SIMD
 
 extern void ASMABI zfs_blake3_compress_in_place_sse41(uint32_t cv[8],
     const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
@@ -257,6 +258,7 @@ extern const blake3_ops_t blake3_generic_impl;
 
 static const blake3_ops_t *const blake3_impls[] = {
 	&blake3_generic_impl,
+#ifdef USE_SIMD
 #if defined(__aarch64__) || \
 	(defined(__x86_64) && defined(HAVE_SSE2)) || \
 	(defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
@@ -273,6 +275,7 @@ static const blake3_ops_t *const blake3_impls[] = {
 #if defined(__x86_64) && defined(HAVE_AVX512F) && defined(HAVE_AVX512VL)
 	&blake3_avx512_impl,
 #endif
+#endif
 };
 
 /* use the generic implementation functions */