git: 2842ec6d99ce - main - REAP_KILL_PROC: kill processes in the threaded taskqueue context

From: Konstantin Belousov <kib_at_FreeBSD.org>
Date: Sat, 20 Aug 2022 17:35:01 UTC
The branch main has been updated by kib:

URL: https://cgit.FreeBSD.org/src/commit/?id=2842ec6d99ce3590eabb34d23eff5b0fed24eb98

commit 2842ec6d99ce3590eabb34d23eff5b0fed24eb98
Author:     Konstantin Belousov <kib@FreeBSD.org>
AuthorDate: 2022-08-12 19:37:08 +0000
Commit:     Konstantin Belousov <kib@FreeBSD.org>
CommitDate: 2022-08-20 17:34:11 +0000

    REAP_KILL_PROC: kill processes in the threaded taskqueue context
    
    There is a problem still left after the fixes to REAP_KILL_PROC.  The
    handling of the stopping signals by sig_suspend_threads() can occur
    outside the stopping process context by tdsendsignal(), and it uses
    mostly the same mechanism of aborting sleeps as suspension.  In other
    words, it badly interacts with thread_single(SINGLE_ALLPROC).
    
    But unlike single threading from the process context, we cannot wait by
    sleep for other single threading requests to pass, because we own
    spinlock(s).
    
    Fix this by moving both the thread_single(p2, SINGLE_ALLPROC), and the
    signalling, to the threaded taskqueue which cannot be single-threaded
    itself.
    
    Reported and tested by: pho
    Reviewed by:    markj
    Sponsored by:   The FreeBSD Foundation
    MFC after:      2 weeks
    Differential revision:  https://reviews.freebsd.org/D36207
---
 sys/kern/kern_procctl.c | 183 ++++++++++++++++++++++++++++++++----------------
 1 file changed, 124 insertions(+), 59 deletions(-)

diff --git a/sys/kern/kern_procctl.c b/sys/kern/kern_procctl.c
index 36372fd31bd4..1fb1183741d6 100644
--- a/sys/kern/kern_procctl.c
+++ b/sys/kern/kern_procctl.c
@@ -43,6 +43,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/sx.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysproto.h>
+#include <sys/taskqueue.h>
 #include <sys/wait.h>
 
 #include <vm/vm.h>
@@ -243,32 +244,29 @@ reap_getpids(struct thread *td, struct proc *p, void *data)
 	return (error);
 }
 
-static void
-reap_kill_proc_relock(struct proc *p, int xlocked)
-{
-	PROC_UNLOCK(p);
-	if (xlocked)
-		sx_xlock(&proctree_lock);
-	else
-		sx_slock(&proctree_lock);
-	PROC_LOCK(p);
-}
+struct reap_kill_proc_work {
+	struct ucred *cr;
+	struct proc *target;
+	ksiginfo_t *ksi;
+	struct procctl_reaper_kill *rk;
+	int *error;
+	struct task t;
+};
 
 static void
-reap_kill_proc_locked(struct thread *td, struct proc *p2,
-    ksiginfo_t *ksi, struct procctl_reaper_kill *rk, int *error)
+reap_kill_proc_locked(struct reap_kill_proc_work *w)
 {
-	int error1, r, xlocked;
+	int error1;
 	bool need_stop;
 
-	PROC_LOCK_ASSERT(p2, MA_OWNED);
-	PROC_ASSERT_HELD(p2);
+	PROC_LOCK_ASSERT(w->target, MA_OWNED);
+	PROC_ASSERT_HELD(w->target);
 
-	error1 = p_cansignal(td, p2, rk->rk_sig);
+	error1 = cr_cansignal(w->cr, w->target, w->rk->rk_sig);
 	if (error1 != 0) {
-		if (*error == ESRCH) {
-			rk->rk_fpid = p2->p_pid;
-			*error = error1;
+		if (*w->error == ESRCH) {
+			w->rk->rk_fpid = w->target->p_pid;
+			*w->error = error1;
 		}
 		return;
 	}
@@ -286,39 +284,34 @@ reap_kill_proc_locked(struct thread *td, struct proc *p2,
 	 * thread signals the whole subtree, it is an application
 	 * race.
 	 */
-	need_stop = p2 != td->td_proc &&
-	    (td->td_proc->p_flag2 & P2_WEXIT) == 0 &&
-	    (p2->p_flag & (P_KPROC | P_SYSTEM | P_STOPPED)) == 0 &&
-	    (rk->rk_flags & REAPER_KILL_CHILDREN) == 0;
-
-	if (need_stop) {
-		xlocked = sx_xlocked(&proctree_lock);
-		sx_unlock(&proctree_lock);
-		r = thread_single(p2, SINGLE_ALLPROC);
-		reap_kill_proc_relock(p2, xlocked);
-		if (r != 0)
-			need_stop = false;
-	}
+	if ((w->target->p_flag & (P_KPROC | P_SYSTEM | P_STOPPED)) == 0)
+		need_stop = thread_single(w->target, SINGLE_ALLPROC) == 0;
+	else
+		need_stop = false;
 
-	pksignal(p2, rk->rk_sig, ksi);
-	rk->rk_killed++;
-	*error = error1;
+	(void)pksignal(w->target, w->rk->rk_sig, w->ksi);
+	w->rk->rk_killed++;
+	*w->error = error1;
 
 	if (need_stop)
-		thread_single_end(p2, SINGLE_ALLPROC);
+		thread_single_end(w->target, SINGLE_ALLPROC);
 }
 
 static void
-reap_kill_proc(struct thread *td, struct proc *p2, ksiginfo_t *ksi,
-    struct procctl_reaper_kill *rk, int *error)
+reap_kill_proc_work(void *arg, int pending __unused)
 {
-	PROC_LOCK(p2);
-	if ((p2->p_flag2 & P2_WEXIT) == 0) {
-		_PHOLD_LITE(p2);
-		reap_kill_proc_locked(td, p2, ksi, rk, error);
-		_PRELE(p2);
-	}
-	PROC_UNLOCK(p2);
+	struct reap_kill_proc_work *w;
+
+	w = arg;
+	PROC_LOCK(w->target);
+	if ((w->target->p_flag2 & P2_WEXIT) == 0)
+		reap_kill_proc_locked(w);
+	PROC_UNLOCK(w->target);
+
+	sx_xlock(&proctree_lock);
+	w->target = NULL;
+	wakeup(&w->target);
+	sx_xunlock(&proctree_lock);
 }
 
 struct reap_kill_tracker {
@@ -357,25 +350,40 @@ reap_kill_children(struct thread *td, struct proc *reaper,
     struct procctl_reaper_kill *rk, ksiginfo_t *ksi, int *error)
 {
 	struct proc *p2;
+	int error1;
 
 	LIST_FOREACH(p2, &reaper->p_children, p_sibling) {
-		(void)reap_kill_proc(td, p2, ksi, rk, error);
-		/*
-		 * Do not end the loop on error, signal everything we
-		 * can.
-		 */
+		PROC_LOCK(p2);
+		if ((p2->p_flag2 & P2_WEXIT) == 0) {
+			error1 = p_cansignal(td, p2, rk->rk_sig);
+			if (error1 != 0) {
+				if (*error == ESRCH) {
+					rk->rk_fpid = p2->p_pid;
+					*error = error1;
+				}
+
+				/*
+				 * Do not end the loop on error,
+				 * signal everything we can.
+				 */
+			} else {
+				(void)pksignal(p2, rk->rk_sig, ksi);
+				rk->rk_killed++;
+			}
+		}
+		PROC_UNLOCK(p2);
 	}
 }
 
 static bool
 reap_kill_subtree_once(struct thread *td, struct proc *p, struct proc *reaper,
-    struct procctl_reaper_kill *rk, ksiginfo_t *ksi, int *error,
-    struct unrhdr *pids)
+    struct unrhdr *pids, struct reap_kill_proc_work *w)
 {
 	struct reap_kill_tracker_head tracker;
 	struct reap_kill_tracker *t;
 	struct proc *p2;
-	bool res;
+	int r, xlocked;
+	bool res, st;
 
 	res = false;
 	TAILQ_INIT(&tracker);
@@ -397,14 +405,54 @@ reap_kill_subtree_once(struct thread *td, struct proc *p, struct proc *reaper,
 
 		LIST_FOREACH(p2, &t->parent->p_reaplist, p_reapsibling) {
 			if (t->parent == reaper &&
-			    (rk->rk_flags & REAPER_KILL_SUBTREE) != 0 &&
-			    p2->p_reapsubtree != rk->rk_subtree)
+			    (w->rk->rk_flags & REAPER_KILL_SUBTREE) != 0 &&
+			    p2->p_reapsubtree != w->rk->rk_subtree)
 				continue;
 			if ((p2->p_treeflag & P_TREE_REAPER) != 0)
 				reap_kill_sched(&tracker, p2);
 			if (alloc_unr_specific(pids, p2->p_pid) != p2->p_pid)
 				continue;
-			reap_kill_proc(td, p2, ksi, rk, error);
+			if (p2 == td->td_proc) {
+				if ((p2->p_flag & P_HADTHREADS) != 0 &&
+				    (p2->p_flag2 & P2_WEXIT) == 0) {
+					xlocked = sx_xlocked(&proctree_lock);
+					sx_unlock(&proctree_lock);
+					st = true;
+				} else {
+					st = false;
+				}
+				PROC_LOCK(p2);
+				if (st)
+					r = thread_single(p2, SINGLE_NO_EXIT);
+				(void)pksignal(p2, w->rk->rk_sig, w->ksi);
+				w->rk->rk_killed++;
+				if (st && r == 0)
+					thread_single_end(p2, SINGLE_NO_EXIT);
+				PROC_UNLOCK(p2);
+				if (st) {
+					if (xlocked)
+						sx_xlock(&proctree_lock);
+					else
+						sx_slock(&proctree_lock);
+				}
+			} else {
+				PROC_LOCK(p2);
+				if ((p2->p_flag2 & P2_WEXIT) == 0) {
+					_PHOLD_LITE(p2);
+					PROC_UNLOCK(p2);
+					w->target = p2;
+					taskqueue_enqueue(taskqueue_thread,
+					    &w->t);
+					while (w->target != NULL) {
+						sx_sleep(&w->target,
+						    &proctree_lock, PWAIT,
+						    "reapst", 0);
+					}
+					PROC_LOCK(p2);
+					_PRELE(p2);
+				}
+				PROC_UNLOCK(p2);
+			}
 			res = true;
 		}
 		reap_kill_sched_free(t);
@@ -414,7 +462,7 @@ reap_kill_subtree_once(struct thread *td, struct proc *p, struct proc *reaper,
 
 static void
 reap_kill_subtree(struct thread *td, struct proc *p, struct proc *reaper,
-    struct procctl_reaper_kill *rk, ksiginfo_t *ksi, int *error)
+    struct reap_kill_proc_work *w)
 {
 	struct unrhdr pids;
 
@@ -431,7 +479,7 @@ reap_kill_subtree(struct thread *td, struct proc *p, struct proc *reaper,
 	}
 	td->td_proc->p_singlethr++;
 	PROC_UNLOCK(td->td_proc);
-	while (reap_kill_subtree_once(td, p, reaper, rk, ksi, error, &pids))
+	while (reap_kill_subtree_once(td, p, reaper, &pids, w))
 	       ;
 	PROC_LOCK(td->td_proc);
 	td->td_proc->p_singlethr--;
@@ -455,6 +503,7 @@ reap_kill_sapblk(struct thread *td __unused, void *data)
 static int
 reap_kill(struct thread *td, struct proc *p, void *data)
 {
+	struct reap_kill_proc_work w;
 	struct proc *reaper;
 	ksiginfo_t ksi;
 	struct procctl_reaper_kill *rk;
@@ -483,7 +532,23 @@ reap_kill(struct thread *td, struct proc *p, void *data)
 	if ((rk->rk_flags & REAPER_KILL_CHILDREN) != 0) {
 		reap_kill_children(td, reaper, rk, &ksi, &error);
 	} else {
-		reap_kill_subtree(td, p, reaper, rk, &ksi, &error);
+		w.cr = crhold(td->td_ucred);
+		w.ksi = &ksi;
+		w.rk = rk;
+		w.error = &error;
+		TASK_INIT(&w.t, 0, reap_kill_proc_work, &w);
+
+		/*
+		 * Prevent swapout, since w, ksi, and possibly rk, are
+		 * allocated on the stack.  We sleep in
+		 * reap_kill_subtree_once() waiting for task to
+		 * complete single-threading.
+		 */
+		PHOLD(td->td_proc);
+
+		reap_kill_subtree(td, p, reaper, &w);
+		PRELE(td->td_proc);
+		crfree(w.cr);
 	}
 	PROC_LOCK(p);
 	return (error);