git: fa6603d4a725 - stable/13 - Revert "LinuxKPI: Make FPU sections thread-safe and use the NOCTX flag."

From: John Baldwin <jhb_at_FreeBSD.org>
Date: Sat, 27 May 2023 19:34:19 UTC
The branch stable/13 has been updated by jhb:

URL: https://cgit.FreeBSD.org/src/commit/?id=fa6603d4a7250bef7bc51bd3a2805b0d2ead3541

commit fa6603d4a7250bef7bc51bd3a2805b0d2ead3541
Author:     John Baldwin <jhb@FreeBSD.org>
AuthorDate: 2023-05-27 19:23:20 +0000
Commit:     John Baldwin <jhb@FreeBSD.org>
CommitDate: 2023-05-27 19:23:37 +0000

    Revert "LinuxKPI: Make FPU sections thread-safe and use the NOCTX flag."
    
    This broke part of the KBI used by drm-kmod.
    
    This reverts commit 8ca78eb03fd4b3c9f514ea6c075fc44dc9c02d27.
    
    Reported by:    manu
---
 sys/compat/linuxkpi/common/include/asm/fpu/api.h | 40 ++++++++++++++++++----
 sys/compat/linuxkpi/common/include/linux/sched.h |  3 +-
 sys/compat/linuxkpi/common/src/linux_fpu.c       | 43 ++++++------------------
 sys/conf/files.amd64                             |  3 ++
 sys/conf/files.arm64                             |  4 +++
 sys/conf/files.i386                              |  3 ++
 sys/modules/linuxkpi/Makefile                    |  3 +-
 7 files changed, 56 insertions(+), 43 deletions(-)

diff --git a/sys/compat/linuxkpi/common/include/asm/fpu/api.h b/sys/compat/linuxkpi/common/include/asm/fpu/api.h
index 133754abdc4b..4b598d88be03 100644
--- a/sys/compat/linuxkpi/common/include/asm/fpu/api.h
+++ b/sys/compat/linuxkpi/common/include/asm/fpu/api.h
@@ -28,13 +28,41 @@
 #ifndef	_LINUXKPI_ASM_FPU_API_H_
 #define	_LINUXKPI_ASM_FPU_API_H_
 
-#define	kernel_fpu_begin() \
-   lkpi_kernel_fpu_begin()
+#if defined(__aarch64__) || defined(__amd64__) || defined(__i386__)
 
-#define	kernel_fpu_end() \
-   lkpi_kernel_fpu_end()
+#include <machine/fpu.h>
 
-extern void lkpi_kernel_fpu_begin(void);
-extern void lkpi_kernel_fpu_end(void);
+extern struct fpu_kern_ctx *__lkpi_fpu_ctx;
+extern unsigned int __lkpi_fpu_ctx_level;
+
+static inline void
+kernel_fpu_begin()
+{
+	if (__lkpi_fpu_ctx_level++ == 0) {
+		fpu_kern_enter(curthread, __lkpi_fpu_ctx, FPU_KERN_NORMAL);
+	}
+}
+
+static inline void
+kernel_fpu_end()
+{
+	if (--__lkpi_fpu_ctx_level == 0) {
+		fpu_kern_leave(curthread, __lkpi_fpu_ctx);
+	}
+}
+
+#else
+
+static inline void
+kernel_fpu_begin()
+{
+}
+
+static inline void
+kernel_fpu_end()
+{
+}
+
+#endif
 
 #endif /* _LINUXKPI_ASM_FPU_API_H_ */
diff --git a/sys/compat/linuxkpi/common/include/linux/sched.h b/sys/compat/linuxkpi/common/include/linux/sched.h
index f895cba0d894..6e0a01aa8466 100644
--- a/sys/compat/linuxkpi/common/include/linux/sched.h
+++ b/sys/compat/linuxkpi/common/include/linux/sched.h
@@ -89,8 +89,7 @@ struct task_struct {
 	int bsd_interrupt_value;
 	struct work_struct *work;	/* current work struct, if set */
 	struct task_struct *group_leader;
-	unsigned rcu_section[TS_RCU_TYPE_MAX];
-	unsigned int fpu_ctx_level;
+  	unsigned rcu_section[TS_RCU_TYPE_MAX];
 };
 
 #define	current	({ \
diff --git a/sys/compat/linuxkpi/common/src/linux_fpu.c b/sys/compat/linuxkpi/common/src/linux_fpu.c
index 08f7e075d827..976e55e68ca1 100644
--- a/sys/compat/linuxkpi/common/src/linux_fpu.c
+++ b/sys/compat/linuxkpi/common/src/linux_fpu.c
@@ -30,44 +30,21 @@
 #include <sys/proc.h>
 #include <sys/kernel.h>
 
-#include <linux/sched.h>
-
-#include <asm/fpu/api.h>
-
-#if defined(__aarch64__) || defined(__amd64__) || defined(__i386__)
-
 #include <machine/fpu.h>
 
-/*
- * Technically the Linux API isn't supposed to allow nesting sections
- * either, but currently used versions of GPU drivers rely on nesting
- * working, so we only enter the section on the outermost level.
- */
-
-void
-lkpi_kernel_fpu_begin(void)
-{
-	if ((current->fpu_ctx_level)++ == 0)
-		fpu_kern_enter(curthread, NULL, FPU_KERN_NOCTX);
-}
-
-void
-lkpi_kernel_fpu_end(void)
-{
-	if (--(current->fpu_ctx_level) == 0)
-		fpu_kern_leave(curthread, NULL);
-}
-
-#else
+struct fpu_kern_ctx *__lkpi_fpu_ctx;
+unsigned int __lkpi_fpu_ctx_level = 0;
 
-void
-lkpi_kernel_fpu_begin(void)
+static void
+linux_fpu_init(void *arg __unused)
 {
+	__lkpi_fpu_ctx = fpu_kern_alloc_ctx(0);
 }
+SYSINIT(linux_fpu, SI_SUB_EVENTHANDLER, SI_ORDER_SECOND, linux_fpu_init, NULL);
 
-void
-lkpi_kernel_fpu_end(void)
+static void
+linux_fpu_uninit(void *arg __unused)
 {
+	fpu_kern_free_ctx(__lkpi_fpu_ctx);
 }
-
-#endif
+SYSUNINIT(linux_fpu, SI_SUB_EVENTHANDLER, SI_ORDER_SECOND, linux_fpu_uninit, NULL);
diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64
index 4ae6fc3f33a1..b967a3be2a4f 100644
--- a/sys/conf/files.amd64
+++ b/sys/conf/files.amd64
@@ -436,6 +436,9 @@ x86/xen/pv.c			optional	xenhvm
 x86/xen/pvcpu_enum.c		optional	xenhvm
 x86/xen/xen_pci_bus.c		optional	xenhvm
 
+compat/linuxkpi/common/src/linux_fpu.c		optional compat_linuxkpi \
+	compile-with "${LINUXKPI_C}"
+
 contrib/openzfs/module/zcommon/zfs_fletcher_avx512.c		optional zfs compile-with "${ZFS_C}"
 contrib/openzfs/module/zcommon/zfs_fletcher_intel.c		optional zfs compile-with "${ZFS_C}"
 contrib/openzfs/module/zcommon/zfs_fletcher_sse.c		optional zfs compile-with "${ZFS_C}"
diff --git a/sys/conf/files.arm64 b/sys/conf/files.arm64
index 3e91db3e22c6..6b96de28128d 100644
--- a/sys/conf/files.arm64
+++ b/sys/conf/files.arm64
@@ -575,6 +575,10 @@ arm64/rockchip/clk/rk3399_pmucru.c		optional fdt soc_rockchip_rk3399
 # Xilinx
 arm/xilinx/uart_dev_cdnc.c			optional uart soc_xilinx_zynq
 
+# Linuxkpi
+compat/linuxkpi/common/src/linux_fpu.c		optional compat_linuxkpi \
+	compile-with "${LINUXKPI_C}"
+
 # Cloudabi
 arm64/cloudabi32/cloudabi32_sysvec.c		optional compat_cloudabi32
 arm64/cloudabi64/cloudabi64_sysvec.c		optional compat_cloudabi64
diff --git a/sys/conf/files.i386 b/sys/conf/files.i386
index 89e1d47987c4..4be98810b32e 100644
--- a/sys/conf/files.i386
+++ b/sys/conf/files.i386
@@ -189,6 +189,9 @@ x86/x86/mptable.c		optional apic
 x86/x86/mptable_pci.c		optional apic pci
 x86/x86/msi.c			optional apic pci
 
+compat/linuxkpi/common/src/linux_fpu.c		optional compat_linuxkpi \
+	compile-with "${LINUXKPI_C}"
+
 # Clock calibration subroutine; uses floating-point arithmetic
 subr_clockcalib.o		standard				\
 	dependency	"$S/kern/subr_clockcalib.c"			\
diff --git a/sys/modules/linuxkpi/Makefile b/sys/modules/linuxkpi/Makefile
index 897c86f51733..2ed6cda98b17 100644
--- a/sys/modules/linuxkpi/Makefile
+++ b/sys/modules/linuxkpi/Makefile
@@ -8,7 +8,6 @@ SRCS=	linux_compat.c \
 	linux_dmi.c \
 	linux_domain.c \
 	linux_firmware.c \
-	linux_fpu.c \
 	linux_hrtimer.c \
 	linux_idr.c \
 	linux_interrupt.c \
@@ -36,7 +35,7 @@ SRCS=	linux_compat.c \
 
 .if ${MACHINE_CPUARCH} == "aarch64" || ${MACHINE_CPUARCH} == "amd64" || \
     ${MACHINE_CPUARCH} == "i386"
-SRCS+=	opt_acpi.h acpi_if.h linux_acpi.c
+SRCS+=	opt_acpi.h acpi_if.h linux_acpi.c linux_fpu.c
 .endif
 
 SRCS+=	opt_ddb.h