git: 165ba13fb806 - main - linuxkpi: guarantee allocations of task and mm for interrupt threads

Mon Apr 19 18:38:18 UTC 2021

The branch main has been updated by kib:

URL: https://cgit.FreeBSD.org/src/commit/?id=165ba13fb806c8596f868981883631a5ee78d6c8

commit 165ba13fb806c8596f868981883631a5ee78d6c8
Author:     Konstantin Belousov <konstantinb at nvidia.com>
AuthorDate: 2021-03-11 06:34:42 +0000
Commit:     Konstantin Belousov <kib at FreeBSD.org>
CommitDate: 2021-04-19 18:34:08 +0000

    linuxkpi: guarantee allocations of task and mm for interrupt threads
    
    Create and use zones for task and mm.  Reserve items in zones based on the
    estimation of the max number of interrupts in the system.  Use M_USE_RESERVE
    to allow to take reserved items when allocation occurs from the interrupt
    thread context.
    
    Of course, this would only work first time we allocate the task for
    interrupt thread. If interrupt is deallocated and allocated anew,
    creating a new thread, it might be that zone is depleted. It still
    should be good enough for practical uses.
    
    Reviewed by:    hselasky
    Sponsored by:   Mellanox Technologies/NVidia Networking
    MFC after:      1 week
---
 sys/compat/linuxkpi/common/src/linux_current.c | 66 ++++++++++++++++++++++----
 1 file changed, 58 insertions(+), 8 deletions(-)

diff --git a/sys/compat/linuxkpi/common/src/linux_current.c b/sys/compat/linuxkpi/common/src/linux_current.c
index 081eab8edab1..ef51acc1952e 100644
--- a/sys/compat/linuxkpi/common/src/linux_current.c
+++ b/sys/compat/linuxkpi/common/src/linux_current.c
@@ -31,14 +31,22 @@ __FBSDID("$FreeBSD$");
 #include <linux/completion.h>
 #include <linux/mm.h>
 #include <linux/kthread.h>
+#include <linux/moduleparam.h>
 
 #include <sys/kernel.h>
 #include <sys/eventhandler.h>
 #include <sys/malloc.h>
+#include <sys/sysctl.h>
+#include <vm/uma.h>
+
+#if defined(__i386__) || defined(__amd64__)
+extern u_int first_msi_irq, num_msi_irqs;
+#endif
 
 static eventhandler_tag linuxkpi_thread_dtor_tag;
 
-static MALLOC_DEFINE(M_LINUX_CURRENT, "linuxcurrent", "LinuxKPI task structure");
+static uma_zone_t linux_current_zone;
+static uma_zone_t linux_mm_zone;
 
 int
 linux_alloc_current(struct thread *td, int flags)
@@ -52,13 +60,23 @@ linux_alloc_current(struct thread *td, int flags)
 
 	MPASS(td->td_lkpi_task == NULL);
 
-	ts = malloc(sizeof(*ts), M_LINUX_CURRENT, flags | M_ZERO);
-	if (ts == NULL)
+	if ((td->td_pflags & TDP_ITHREAD) != 0 || !THREAD_CAN_SLEEP()) {
+		flags &= ~M_WAITOK;
+		flags |= M_NOWAIT | M_USE_RESERVE;
+	}
+
+	ts = uma_zalloc(linux_current_zone, flags | M_ZERO);
+	if (ts == NULL) {
+		if ((flags & (M_WAITOK | M_NOWAIT)) == M_WAITOK)
+			panic("linux_alloc_current: failed to allocate task");
 		return (ENOMEM);
+	}
 
-	mm = malloc(sizeof(*mm), M_LINUX_CURRENT, flags | M_ZERO);
+	mm = uma_zalloc(linux_mm_zone, flags | M_ZERO);
 	if (mm == NULL) {
-		free(ts, M_LINUX_CURRENT);
+		if ((flags & (M_WAITOK | M_NOWAIT)) == M_WAITOK)
+			panic("linux_alloc_current: failed to allocate mm");
+		uma_zfree(linux_current_zone, mm);
 		return (ENOMEM);
 	}
 
@@ -111,7 +129,7 @@ linux_alloc_current(struct thread *td, int flags)
 	PROC_UNLOCK(proc);
 
 	/* free mm_struct pointer, if any */
-	free(mm, M_LINUX_CURRENT);
+	uma_zfree(linux_mm_zone, mm);
 
 	return (0);
 }
@@ -132,14 +150,14 @@ linux_get_task_mm(struct task_struct *task)
 void
 linux_mm_dtor(struct mm_struct *mm)
 {
-	free(mm, M_LINUX_CURRENT);
+	uma_zfree(linux_mm_zone, mm);
 }
 
 void
 linux_free_current(struct task_struct *ts)
 {
 	mmput(ts->mm);
-	free(ts, M_LINUX_CURRENT);
+	uma_zfree(linux_current_zone, ts);
 }
 
 static void
@@ -229,12 +247,42 @@ linux_task_exiting(struct task_struct *task)
 	return (ret);
 }
 
+static int lkpi_task_resrv;
+SYSCTL_INT(_compat_linuxkpi, OID_AUTO, task_struct_reserve,
+    CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &lkpi_task_resrv, 0,
+    "Number of struct task and struct mm to reserve for non-sleepable "
+    "allocations");
+
 static void
 linux_current_init(void *arg __unused)
 {
 	lkpi_alloc_current = linux_alloc_current;
 	linuxkpi_thread_dtor_tag = EVENTHANDLER_REGISTER(thread_dtor,
 	    linuxkpi_thread_dtor, NULL, EVENTHANDLER_PRI_ANY);
+
+	TUNABLE_INT_FETCH("compat.linuxkpi.task_struct_reserve",
+	    &lkpi_task_resrv);
+	if (lkpi_task_resrv == 0) {
+#if defined(__i386__) || defined(__amd64__)
+		/*
+		 * Number of interrupt threads plus per-cpu callout
+		 * SWI threads.
+		 */
+		lkpi_task_resrv = first_msi_irq + num_msi_irqs + MAXCPU;
+#else
+		lkpi_task_resrv = 1024;		/* XXXKIB arbitrary */
+#endif
+	}
+	linux_current_zone = uma_zcreate("lkpicurr",
+	    sizeof(struct task_struct), NULL, NULL, NULL, NULL,
+	    UMA_ALIGN_PTR, 0);
+	uma_zone_reserve(linux_current_zone, lkpi_task_resrv);
+	uma_prealloc(linux_current_zone, lkpi_task_resrv);
+	linux_mm_zone = uma_zcreate("lkpimm",
+	    sizeof(struct task_struct), NULL, NULL, NULL, NULL,
+	    UMA_ALIGN_PTR, 0);
+	uma_zone_reserve(linux_mm_zone, lkpi_task_resrv);
+	uma_prealloc(linux_mm_zone, lkpi_task_resrv);
 }
 SYSINIT(linux_current, SI_SUB_EVENTHANDLER, SI_ORDER_SECOND,
     linux_current_init, NULL);
@@ -260,6 +308,8 @@ linux_current_uninit(void *arg __unused)
 	sx_sunlock(&allproc_lock);
 	EVENTHANDLER_DEREGISTER(thread_dtor, linuxkpi_thread_dtor_tag);
 	lkpi_alloc_current = linux_alloc_current_noop;
+	uma_zdestroy(linux_current_zone);
+	uma_zdestroy(linux_mm_zone);
 }
 SYSUNINIT(linux_current, SI_SUB_EVENTHANDLER, SI_ORDER_SECOND,
     linux_current_uninit, NULL);