git: b764a426534f - main - There is a window where threads are removed from the process list and where the thread destructor is invoked. Catch that window by waiting for all task_struct allocations to be returned before freeing the UMA zone in the LinuxKPI. Else UMA may fail to release the zone due to concurrent access and panic:

Hans Petter Selasky hselasky at FreeBSD.org
Fri May 21 11:21:17 UTC 2021


The branch main has been updated by hselasky:

URL: https://cgit.FreeBSD.org/src/commit/?id=b764a426534f2f5f86d6625288c74dafdbc94d2b

commit b764a426534f2f5f86d6625288c74dafdbc94d2b
Author:     Hans Petter Selasky <hselasky at FreeBSD.org>
AuthorDate: 2021-05-21 11:17:42 +0000
Commit:     Hans Petter Selasky <hselasky at FreeBSD.org>
CommitDate: 2021-05-21 11:18:41 +0000

    There is a window where threads are removed from the process list and where
    the thread destructor is invoked. Catch that window by waiting for all
    task_struct allocations to be returned before freeing the UMA zone in the
    LinuxKPI. Else UMA may fail to release the zone due to concurrent access
    and panic:
    
    panic() - Bad link element prev->next != elm
    zone_release()
    bucket_drain()
    bucket_free()
    zone_dtor()
    zone_free_item()
    uma_zdestroy()
    linux_current_uninit()
    
    This failure can be triggered by loading and unloading the LinuxKPI module
    in a loop:
    
    while true
    do
    kldload linuxkpi
    kldunload linuxkpi
    done
    
    Discussed with: kib@
    MFC after:      1 week
    Sponsored by:   Mellanox Technologies // NVIDIA Networking
---
 sys/compat/linuxkpi/common/src/linux_current.c | 35 ++++++++++++++++++++++----
 1 file changed, 30 insertions(+), 5 deletions(-)

diff --git a/sys/compat/linuxkpi/common/src/linux_current.c b/sys/compat/linuxkpi/common/src/linux_current.c
index 9bae7ee92e49..51e396081c04 100644
--- a/sys/compat/linuxkpi/common/src/linux_current.c
+++ b/sys/compat/linuxkpi/common/src/linux_current.c
@@ -45,6 +45,7 @@ extern u_int first_msi_irq, num_msi_irqs;
 
 static eventhandler_tag linuxkpi_thread_dtor_tag;
 
+static atomic_t linux_current_allocs;
 static uma_zone_t linux_current_zone;
 static uma_zone_t linux_mm_zone;
 
@@ -146,6 +147,10 @@ linux_alloc_current(struct thread *td, int flags)
 	/* free mm_struct pointer, if any */
 	uma_zfree(linux_mm_zone, mm);
 
+	/* keep track of number of allocations */
+	if (atomic_add_return(1, &linux_current_allocs) == INT_MAX)
+		panic("linux_alloc_current: Refcount too high!");
+
 	return (0);
 }
 
@@ -173,6 +178,10 @@ linux_free_current(struct task_struct *ts)
 {
 	mmput(ts->mm);
 	uma_zfree(linux_current_zone, ts);
+
+	/* keep track of number of allocations */
+	if (atomic_sub_return(1, &linux_current_allocs) < 0)
+		panic("linux_free_current: Negative refcount!");
 }
 
 static void
@@ -271,10 +280,6 @@ SYSCTL_INT(_compat_linuxkpi, OID_AUTO, task_struct_reserve,
 static void
 linux_current_init(void *arg __unused)
 {
-	lkpi_alloc_current = linux_alloc_current;
-	linuxkpi_thread_dtor_tag = EVENTHANDLER_REGISTER(thread_dtor,
-	    linuxkpi_thread_dtor, NULL, EVENTHANDLER_PRI_ANY);
-
 	TUNABLE_INT_FETCH("compat.linuxkpi.task_struct_reserve",
 	    &lkpi_task_resrv);
 	if (lkpi_task_resrv == 0) {
@@ -298,6 +303,12 @@ linux_current_init(void *arg __unused)
 	    UMA_ALIGN_PTR, 0);
 	uma_zone_reserve(linux_mm_zone, lkpi_task_resrv);
 	uma_prealloc(linux_mm_zone, lkpi_task_resrv);
+
+	atomic_thread_fence_seq_cst();
+
+	lkpi_alloc_current = linux_alloc_current;
+	linuxkpi_thread_dtor_tag = EVENTHANDLER_REGISTER(thread_dtor,
+	    linuxkpi_thread_dtor, NULL, EVENTHANDLER_PRI_ANY);
 }
 SYSINIT(linux_current, SI_SUB_EVENTHANDLER, SI_ORDER_SECOND,
     linux_current_init, NULL);
@@ -309,6 +320,10 @@ linux_current_uninit(void *arg __unused)
 	struct task_struct *ts;
 	struct thread *td;
 
+	lkpi_alloc_current = linux_alloc_current_noop;
+
+	atomic_thread_fence_seq_cst();
+
 	sx_slock(&allproc_lock);
 	FOREACH_PROC_IN_SYSTEM(p) {
 		PROC_LOCK(p);
@@ -321,8 +336,18 @@ linux_current_uninit(void *arg __unused)
 		PROC_UNLOCK(p);
 	}
 	sx_sunlock(&allproc_lock);
+
+	/*
+	 * There is a window where threads are removed from the
+	 * process list and where the thread destructor is invoked.
+	 * Catch that window by waiting for all task_struct
+	 * allocations to be returned before freeing the UMA zone.
+	 */
+	while (atomic_read(&linux_current_allocs) != 0)
+		pause("W", 1);
+
 	EVENTHANDLER_DEREGISTER(thread_dtor, linuxkpi_thread_dtor_tag);
-	lkpi_alloc_current = linux_alloc_current_noop;
+	
 	uma_zdestroy(linux_current_zone);
 	uma_zdestroy(linux_mm_zone);
 }


More information about the dev-commits-src-all mailing list