git: 8ca78eb03fd4 - stable/13 - LinuxKPI: Make FPU sections thread-safe and use the NOCTX flag.

From: John Baldwin <jhb_at_FreeBSD.org>
Date: Wed, 22 Mar 2023 16:32:56 UTC
The branch stable/13 has been updated by jhb:

URL: https://cgit.FreeBSD.org/src/commit/?id=8ca78eb03fd4b3c9f514ea6c075fc44dc9c02d27

commit 8ca78eb03fd4b3c9f514ea6c075fc44dc9c02d27
Author:     Hans Petter Selasky <hselasky@FreeBSD.org>
AuthorDate: 2021-07-31 13:32:52 +0000
Commit:     John Baldwin <jhb@FreeBSD.org>
CommitDate: 2023-03-22 15:32:56 +0000

    LinuxKPI: Make FPU sections thread-safe and use the NOCTX flag.
    
    Reviewed by:    kib
    Submitted by:   greg@unrelenting.technology
    Differential Revision:  https://reviews.freebsd.org/D29921
    MFC after:      1 week
    Sponsored by:   NVIDIA Networking
    
    (cherry picked from commit 469884cf04a9b92677c7c83e229ca6b8814f8b0a)
---
 sys/compat/linuxkpi/common/include/asm/fpu/api.h | 40 ++++------------------
 sys/compat/linuxkpi/common/include/linux/sched.h |  3 +-
 sys/compat/linuxkpi/common/src/linux_fpu.c       | 43 ++++++++++++++++++------
 sys/conf/files.amd64                             |  3 --
 sys/conf/files.arm64                             |  4 ---
 sys/conf/files.i386                              |  3 --
 sys/modules/linuxkpi/Makefile                    |  3 +-
 7 files changed, 43 insertions(+), 56 deletions(-)

diff --git a/sys/compat/linuxkpi/common/include/asm/fpu/api.h b/sys/compat/linuxkpi/common/include/asm/fpu/api.h
index 4b598d88be03..133754abdc4b 100644
--- a/sys/compat/linuxkpi/common/include/asm/fpu/api.h
+++ b/sys/compat/linuxkpi/common/include/asm/fpu/api.h
@@ -28,41 +28,13 @@
 #ifndef	_LINUXKPI_ASM_FPU_API_H_
 #define	_LINUXKPI_ASM_FPU_API_H_
 
-#if defined(__aarch64__) || defined(__amd64__) || defined(__i386__)
+#define	kernel_fpu_begin() \
+   lkpi_kernel_fpu_begin()
 
-#include <machine/fpu.h>
+#define	kernel_fpu_end() \
+   lkpi_kernel_fpu_end()
 
-extern struct fpu_kern_ctx *__lkpi_fpu_ctx;
-extern unsigned int __lkpi_fpu_ctx_level;
-
-static inline void
-kernel_fpu_begin()
-{
-	if (__lkpi_fpu_ctx_level++ == 0) {
-		fpu_kern_enter(curthread, __lkpi_fpu_ctx, FPU_KERN_NORMAL);
-	}
-}
-
-static inline void
-kernel_fpu_end()
-{
-	if (--__lkpi_fpu_ctx_level == 0) {
-		fpu_kern_leave(curthread, __lkpi_fpu_ctx);
-	}
-}
-
-#else
-
-static inline void
-kernel_fpu_begin()
-{
-}
-
-static inline void
-kernel_fpu_end()
-{
-}
-
-#endif
+extern void lkpi_kernel_fpu_begin(void);
+extern void lkpi_kernel_fpu_end(void);
 
 #endif /* _LINUXKPI_ASM_FPU_API_H_ */
diff --git a/sys/compat/linuxkpi/common/include/linux/sched.h b/sys/compat/linuxkpi/common/include/linux/sched.h
index 6e0a01aa8466..f895cba0d894 100644
--- a/sys/compat/linuxkpi/common/include/linux/sched.h
+++ b/sys/compat/linuxkpi/common/include/linux/sched.h
@@ -89,7 +89,8 @@ struct task_struct {
 	int bsd_interrupt_value;
 	struct work_struct *work;	/* current work struct, if set */
 	struct task_struct *group_leader;
-  	unsigned rcu_section[TS_RCU_TYPE_MAX];
+	unsigned rcu_section[TS_RCU_TYPE_MAX];
+	unsigned int fpu_ctx_level;
 };
 
 #define	current	({ \
diff --git a/sys/compat/linuxkpi/common/src/linux_fpu.c b/sys/compat/linuxkpi/common/src/linux_fpu.c
index 976e55e68ca1..08f7e075d827 100644
--- a/sys/compat/linuxkpi/common/src/linux_fpu.c
+++ b/sys/compat/linuxkpi/common/src/linux_fpu.c
@@ -30,21 +30,44 @@
 #include <sys/proc.h>
 #include <sys/kernel.h>
 
+#include <linux/sched.h>
+
+#include <asm/fpu/api.h>
+
+#if defined(__aarch64__) || defined(__amd64__) || defined(__i386__)
+
 #include <machine/fpu.h>
 
-struct fpu_kern_ctx *__lkpi_fpu_ctx;
-unsigned int __lkpi_fpu_ctx_level = 0;
+/*
+ * Technically the Linux API isn't supposed to allow nesting sections
+ * either, but currently used versions of GPU drivers rely on nesting
+ * working, so we only enter the section on the outermost level.
+ */
+
+void
+lkpi_kernel_fpu_begin(void)
+{
+	if ((current->fpu_ctx_level)++ == 0)
+		fpu_kern_enter(curthread, NULL, FPU_KERN_NOCTX);
+}
+
+void
+lkpi_kernel_fpu_end(void)
+{
+	if (--(current->fpu_ctx_level) == 0)
+		fpu_kern_leave(curthread, NULL);
+}
+
+#else
 
-static void
-linux_fpu_init(void *arg __unused)
+void
+lkpi_kernel_fpu_begin(void)
 {
-	__lkpi_fpu_ctx = fpu_kern_alloc_ctx(0);
 }
-SYSINIT(linux_fpu, SI_SUB_EVENTHANDLER, SI_ORDER_SECOND, linux_fpu_init, NULL);
 
-static void
-linux_fpu_uninit(void *arg __unused)
+void
+lkpi_kernel_fpu_end(void)
 {
-	fpu_kern_free_ctx(__lkpi_fpu_ctx);
 }
-SYSUNINIT(linux_fpu, SI_SUB_EVENTHANDLER, SI_ORDER_SECOND, linux_fpu_uninit, NULL);
+
+#endif
diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64
index b967a3be2a4f..4ae6fc3f33a1 100644
--- a/sys/conf/files.amd64
+++ b/sys/conf/files.amd64
@@ -436,9 +436,6 @@ x86/xen/pv.c			optional	xenhvm
 x86/xen/pvcpu_enum.c		optional	xenhvm
 x86/xen/xen_pci_bus.c		optional	xenhvm
 
-compat/linuxkpi/common/src/linux_fpu.c		optional compat_linuxkpi \
-	compile-with "${LINUXKPI_C}"
-
 contrib/openzfs/module/zcommon/zfs_fletcher_avx512.c		optional zfs compile-with "${ZFS_C}"
 contrib/openzfs/module/zcommon/zfs_fletcher_intel.c		optional zfs compile-with "${ZFS_C}"
 contrib/openzfs/module/zcommon/zfs_fletcher_sse.c		optional zfs compile-with "${ZFS_C}"
diff --git a/sys/conf/files.arm64 b/sys/conf/files.arm64
index 6b96de28128d..3e91db3e22c6 100644
--- a/sys/conf/files.arm64
+++ b/sys/conf/files.arm64
@@ -575,10 +575,6 @@ arm64/rockchip/clk/rk3399_pmucru.c		optional fdt soc_rockchip_rk3399
 # Xilinx
 arm/xilinx/uart_dev_cdnc.c			optional uart soc_xilinx_zynq
 
-# Linuxkpi
-compat/linuxkpi/common/src/linux_fpu.c		optional compat_linuxkpi \
-	compile-with "${LINUXKPI_C}"
-
 # Cloudabi
 arm64/cloudabi32/cloudabi32_sysvec.c		optional compat_cloudabi32
 arm64/cloudabi64/cloudabi64_sysvec.c		optional compat_cloudabi64
diff --git a/sys/conf/files.i386 b/sys/conf/files.i386
index 4be98810b32e..89e1d47987c4 100644
--- a/sys/conf/files.i386
+++ b/sys/conf/files.i386
@@ -189,9 +189,6 @@ x86/x86/mptable.c		optional apic
 x86/x86/mptable_pci.c		optional apic pci
 x86/x86/msi.c			optional apic pci
 
-compat/linuxkpi/common/src/linux_fpu.c		optional compat_linuxkpi \
-	compile-with "${LINUXKPI_C}"
-
 # Clock calibration subroutine; uses floating-point arithmetic
 subr_clockcalib.o		standard				\
 	dependency	"$S/kern/subr_clockcalib.c"			\
diff --git a/sys/modules/linuxkpi/Makefile b/sys/modules/linuxkpi/Makefile
index 2ed6cda98b17..897c86f51733 100644
--- a/sys/modules/linuxkpi/Makefile
+++ b/sys/modules/linuxkpi/Makefile
@@ -8,6 +8,7 @@ SRCS=	linux_compat.c \
 	linux_dmi.c \
 	linux_domain.c \
 	linux_firmware.c \
+	linux_fpu.c \
 	linux_hrtimer.c \
 	linux_idr.c \
 	linux_interrupt.c \
@@ -35,7 +36,7 @@ SRCS=	linux_compat.c \
 
 .if ${MACHINE_CPUARCH} == "aarch64" || ${MACHINE_CPUARCH} == "amd64" || \
     ${MACHINE_CPUARCH} == "i386"
-SRCS+=	opt_acpi.h acpi_if.h linux_acpi.c linux_fpu.c
+SRCS+=	opt_acpi.h acpi_if.h linux_acpi.c
 .endif
 
 SRCS+=	opt_ddb.h