git: c0b8047bdc13 - main - LinuxKPI: Allow kmalloc to be called when FPU protection is enabled

From: Vladimir Kondratyev <wulf_at_FreeBSD.org>
Date: Sat, 10 Feb 2024 22:03:23 UTC
The branch main has been updated by wulf:

URL: https://cgit.FreeBSD.org/src/commit/?id=c0b8047bdc13040eafb162c4b7b5dba11034ff4b

commit c0b8047bdc13040eafb162c4b7b5dba11034ff4b
Author:     Vladimir Kondratyev <wulf@FreeBSD.org>
AuthorDate: 2024-02-10 22:01:50 +0000
Commit:     Vladimir Kondratyev <wulf@FreeBSD.org>
CommitDate: 2024-02-10 22:01:50 +0000

    LinuxKPI: Allow kmalloc to be called when FPU protection is enabled
    
    Amdgpu driver does a lot of memory allocations in FPU-protected sections
    of code for certain display cores, e.g. for DCN30. This does not work
    on FreeBSD as its malloc function can not be run within a critical
    section. Check this condition and temporally exit from FPU-protected
    context to workaround issue and reduce source code patching.
    
    Sponsored by:   Serenity Cyber Security, LLC
    Reviewed by:    manu (previous version)
    MFC after:      1 week
    Differential revision:  https://reviews.freebsd.org/D42822
---
 sys/compat/linuxkpi/common/include/linux/compat.h | 10 ++++++--
 sys/compat/linuxkpi/common/include/linux/slab.h   |  5 ++--
 sys/compat/linuxkpi/common/src/linux_fpu.c        | 28 ++++++++++++++++++++++-
 sys/compat/linuxkpi/common/src/linux_slab.c       | 24 +++++++++++++++++++
 4 files changed, 62 insertions(+), 5 deletions(-)

diff --git a/sys/compat/linuxkpi/common/include/linux/compat.h b/sys/compat/linuxkpi/common/include/linux/compat.h
index d1a02f612f42..8a5a6918bb7c 100644
--- a/sys/compat/linuxkpi/common/include/linux/compat.h
+++ b/sys/compat/linuxkpi/common/include/linux/compat.h
@@ -41,17 +41,20 @@ extern int linux_alloc_current(struct thread *, int flags);
 extern void linux_free_current(struct task_struct *);
 extern struct domainset *linux_get_vm_domain_set(int node);
 
+#define	__current_unallocated(td)	\
+	__predict_false((td)->td_lkpi_task == NULL)
+
 static inline void
 linux_set_current(struct thread *td)
 {
-	if (__predict_false(td->td_lkpi_task == NULL))
+	if (__current_unallocated(td))
 		lkpi_alloc_current(td, M_WAITOK);
 }
 
 static inline int
 linux_set_current_flags(struct thread *td, int flags)
 {
-	if (__predict_false(td->td_lkpi_task == NULL))
+	if (__current_unallocated(td))
 		return (lkpi_alloc_current(td, flags));
 	return (0);
 }
@@ -59,4 +62,7 @@ linux_set_current_flags(struct thread *td, int flags)
 #define	compat_ptr(x)		((void *)(uintptr_t)x)
 #define	ptr_to_compat(x)	((uintptr_t)x)
 
+typedef void fpu_safe_exec_cb_t(void *ctx);
+void lkpi_fpu_safe_exec(fpu_safe_exec_cb_t func, void *ctx);
+
 #endif	/* _LINUXKPI_LINUX_COMPAT_H_ */
diff --git a/sys/compat/linuxkpi/common/include/linux/slab.h b/sys/compat/linuxkpi/common/include/linux/slab.h
index 8557f831bb60..298306b6ea05 100644
--- a/sys/compat/linuxkpi/common/include/linux/slab.h
+++ b/sys/compat/linuxkpi/common/include/linux/slab.h
@@ -41,6 +41,7 @@
 
 MALLOC_DECLARE(M_KMALLOC);
 
+#define	kmalloc(size, flags)		lkpi_kmalloc(size, flags)
 #define	kvmalloc(size, flags)		kmalloc(size, flags)
 #define	kvzalloc(size, flags)		kmalloc(size, (flags) | __GFP_ZERO)
 #define	kvcalloc(n, size, flags)	kvmalloc_array(n, size, (flags) | __GFP_ZERO)
@@ -53,7 +54,6 @@ MALLOC_DECLARE(M_KMALLOC);
 #define	vmalloc_node(size, node)	__vmalloc_node(size, GFP_KERNEL, node)
 #define	vmalloc_user(size)		__vmalloc(size, GFP_KERNEL | __GFP_ZERO, 0)
 #define	vmalloc(size)			__vmalloc(size, GFP_KERNEL, 0)
-#define	__kmalloc(...)			kmalloc(__VA_ARGS__)
 
 /*
  * Prefix some functions with linux_ to avoid namespace conflict
@@ -107,7 +107,7 @@ linux_check_m_flags(gfp_t flags)
 }
 
 static inline void *
-kmalloc(size_t size, gfp_t flags)
+__kmalloc(size_t size, gfp_t flags)
 {
 	return (malloc(MAX(size, sizeof(struct llist_node)), M_KMALLOC,
 	    linux_check_m_flags(flags)));
@@ -218,6 +218,7 @@ ksize(const void *ptr)
 	return (malloc_usable_size(ptr));
 }
 
+extern void *lkpi_kmalloc(size_t size, gfp_t flags);
 extern struct linux_kmem_cache *linux_kmem_cache_create(const char *name,
     size_t size, size_t align, unsigned flags, linux_kmem_ctor_t *ctor);
 extern void *lkpi_kmem_cache_alloc(struct linux_kmem_cache *, gfp_t);
diff --git a/sys/compat/linuxkpi/common/src/linux_fpu.c b/sys/compat/linuxkpi/common/src/linux_fpu.c
index b26dce98774b..4e40a2b004bb 100644
--- a/sys/compat/linuxkpi/common/src/linux_fpu.c
+++ b/sys/compat/linuxkpi/common/src/linux_fpu.c
@@ -30,11 +30,13 @@
 #include <sys/proc.h>
 #include <sys/kernel.h>
 
+#include <linux/compat.h>
 #include <linux/sched.h>
 
 #include <asm/fpu/api.h>
 
-#if defined(__aarch64__) || defined(__amd64__) || defined(__i386__)
+#if defined(__aarch64__) || defined(__arm__) || defined(__amd64__) ||	\
+    defined(__i386__) || defined(__powerpc64__)
 
 #include <machine/fpu.h>
 
@@ -58,6 +60,24 @@ lkpi_kernel_fpu_end(void)
 		fpu_kern_leave(curthread, NULL);
 }
 
+void
+lkpi_fpu_safe_exec(fpu_safe_exec_cb_t func, void *ctx)
+{
+	unsigned int save_fpu_level;
+
+	save_fpu_level =
+	    __current_unallocated(curthread) ? 0 : current->fpu_ctx_level;
+	if (__predict_false(save_fpu_level != 0)) {
+		current->fpu_ctx_level = 1;
+		kernel_fpu_end();
+	}
+	func(ctx);
+	if (__predict_false(save_fpu_level != 0)) {
+		kernel_fpu_begin();
+		current->fpu_ctx_level = save_fpu_level;
+	}
+}
+
 #else
 
 void
@@ -70,4 +90,10 @@ lkpi_kernel_fpu_end(void)
 {
 }
 
+void
+lkpi_fpu_safe_exec(fpu_safe_exec_cb_t func, void *ctx)
+{
+	func(ctx);
+}
+
 #endif
diff --git a/sys/compat/linuxkpi/common/src/linux_slab.c b/sys/compat/linuxkpi/common/src/linux_slab.c
index 72097c55f94c..68117d1c9fa7 100644
--- a/sys/compat/linuxkpi/common/src/linux_slab.c
+++ b/sys/compat/linuxkpi/common/src/linux_slab.c
@@ -25,6 +25,7 @@
  */
 
 #include <sys/cdefs.h>
+#include <linux/compat.h>
 #include <linux/slab.h>
 #include <linux/rcupdate.h>
 #include <linux/kernel.h>
@@ -206,6 +207,29 @@ linux_kmem_cache_destroy(struct linux_kmem_cache *c)
 	free(c, M_KMALLOC);
 }
 
+struct lkpi_kmalloc_ctx {
+	size_t size;
+	gfp_t flags;
+	void *addr;
+};
+
+static void
+lkpi_kmalloc_cb(void *ctx)
+{
+	struct lkpi_kmalloc_ctx *lmc = ctx;
+
+	lmc->addr = __kmalloc(lmc->size, lmc->flags);
+}
+
+void *
+lkpi_kmalloc(size_t size, gfp_t flags)
+{
+	struct lkpi_kmalloc_ctx lmc = { .size = size, .flags = flags };
+
+	lkpi_fpu_safe_exec(&lkpi_kmalloc_cb, &lmc);
+	return(lmc.addr);
+}
+
 static void
 linux_kfree_async_fn(void *context, int pending)
 {