git: 5ecb5444aa34 - main - jail: add process linkage

From: Mateusz Guzik <mjg_at_FreeBSD.org>
Date: Mon, 05 Sep 2022 11:56:14 UTC
The branch main has been updated by mjg:

URL: https://cgit.FreeBSD.org/src/commit/?id=5ecb5444aa349246568dc6f9a615630e29191f7c

commit 5ecb5444aa349246568dc6f9a615630e29191f7c
Author:     Mateusz Guzik <mjg@FreeBSD.org>
AuthorDate: 2022-03-10 18:42:37 +0000
Commit:     Mateusz Guzik <mjg@FreeBSD.org>
CommitDate: 2022-09-05 11:54:47 +0000

    jail: add process linkage
    
    It allows iteration over processes belonging to given jail instead of
    having to walk the entire allproc list.
    
    Note the iteration can miss processes which remains bug-compatible
    with previous code.
    
    Reviewed by:    jamie (previous version), markj (previous version)
    Differential Revision:  https://reviews.freebsd.org/D34522
---
 sys/kern/kern_exit.c |   1 +
 sys/kern/kern_fork.c |   1 +
 sys/kern/kern_jail.c | 103 ++++++++++++++++++++++++++++++++++++++++++---------
 sys/sys/jail.h       |   5 +++
 sys/sys/proc.h       |   1 +
 5 files changed, 93 insertions(+), 18 deletions(-)

diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c
index e7a6f8d66c39..3f64343aea0e 100644
--- a/sys/kern/kern_exit.c
+++ b/sys/kern/kern_exit.c
@@ -473,6 +473,7 @@ exit1(struct thread *td, int rval, int signo)
 	 */
 	p->p_list.le_prev = NULL;
 #endif
+	prison_proc_unlink(p->p_ucred->cr_prison, p);
 	sx_xunlock(&allproc_lock);
 
 	sx_xlock(&proctree_lock);
diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c
index 58f386f20544..87c14670ae10 100644
--- a/sys/kern/kern_fork.c
+++ b/sys/kern/kern_fork.c
@@ -398,6 +398,7 @@ do_fork(struct thread *td, struct fork_req *fr, struct proc *p2, struct thread *
 	sx_xlock(&allproc_lock);
 	LIST_INSERT_HEAD(&allproc, p2, p_list);
 	allproc_gen++;
+	prison_proc_link(p2->p_ucred->cr_prison, p2);
 	sx_xunlock(&allproc_lock);
 
 	sx_xlock(PIDHASHLOCK(p2->p_pid));
diff --git a/sys/kern/kern_jail.c b/sys/kern/kern_jail.c
index 108432c53bca..853d5b1faf5f 100644
--- a/sys/kern/kern_jail.c
+++ b/sys/kern/kern_jail.c
@@ -147,6 +147,8 @@ static int prison_lock_xlock(struct prison *pr, int flags);
 static void prison_cleanup(struct prison *pr);
 static void prison_free_not_last(struct prison *pr);
 static void prison_proc_free_not_last(struct prison *pr);
+static void prison_proc_relink(struct prison *opr, struct prison *npr,
+    struct proc *p);
 static void prison_set_allow_locked(struct prison *pr, unsigned flag,
     int enable);
 static char *prison_path(struct prison *pr1, struct prison *pr2);
@@ -2648,6 +2650,7 @@ do_jail_attach(struct thread *td, struct prison *pr, int drflags)
 	rctl_proc_ucred_changed(p, newcred);
 	crfree(newcred);
 #endif
+	prison_proc_relink(oldcred->cr_prison, pr, p);
 	prison_deref(oldcred->cr_prison, drflags);
 	crfree(oldcred);
 
@@ -2919,6 +2922,32 @@ prison_proc_free_not_last(struct prison *pr)
 #endif
 }
 
+void
+prison_proc_link(struct prison *pr, struct proc *p)
+{
+
+	sx_assert(&allproc_lock, SA_XLOCKED);
+	LIST_INSERT_HEAD(&pr->pr_proclist, p, p_jaillist);
+}
+
+void
+prison_proc_unlink(struct prison *pr, struct proc *p)
+{
+
+	sx_assert(&allproc_lock, SA_XLOCKED);
+	LIST_REMOVE(p, p_jaillist);
+}
+
+static void
+prison_proc_relink(struct prison *opr, struct prison *npr, struct proc *p)
+{
+
+	sx_xlock(&allproc_lock);
+	prison_proc_unlink(opr, p);
+	prison_proc_link(npr, p);
+	sx_xunlock(&allproc_lock);
+}
+
 /*
  * Complete a call to either prison_free or prison_proc_free.
  */
@@ -2940,6 +2969,60 @@ prison_complete(void *context, int pending)
 	prison_deref(pr, drflags);
 }
 
+static void
+prison_kill_processes_cb(struct proc *p, void *arg __unused)
+{
+
+	kern_psignal(p, SIGKILL);
+}
+
+/*
+ * Note the iteration does not guarantee acting on all processes.
+ * Most notably there may be fork or jail_attach in progress.
+ */
+void
+prison_proc_iterate(struct prison *pr, void (*cb)(struct proc *, void *),
+    void *cbarg)
+{
+	struct prison *ppr;
+	struct proc *p;
+
+	if (atomic_load_int(&pr->pr_childcount) == 0) {
+		sx_slock(&allproc_lock);
+		LIST_FOREACH(p, &pr->pr_proclist, p_jaillist) {
+			if (p->p_state == PRS_NEW)
+				continue;
+			PROC_LOCK(p);
+			cb(p, cbarg);
+			PROC_UNLOCK(p);
+		}
+		sx_sunlock(&allproc_lock);
+		if (atomic_load_int(&pr->pr_childcount) == 0)
+			return;
+		/*
+		 * Some jails popped up during the iteration, fall through to a
+		 * system-wide search.
+		 */
+	}
+
+	sx_slock(&allproc_lock);
+	FOREACH_PROC_IN_SYSTEM(p) {
+		PROC_LOCK(p);
+		if (p->p_state != PRS_NEW && p->p_ucred != NULL) {
+			for (ppr = p->p_ucred->cr_prison;
+			    ppr != &prison0;
+			    ppr = ppr->pr_parent) {
+				if (ppr == pr) {
+					cb(p, cbarg);
+					break;
+				}
+			}
+		}
+		PROC_UNLOCK(p);
+	}
+	sx_sunlock(&allproc_lock);
+}
+
 /*
  * Remove a prison reference and/or user reference (usually).
  * This assumes context that allows sleeping (for allprison_lock),
@@ -2953,7 +3036,6 @@ prison_deref(struct prison *pr, int flags)
 {
 	struct prisonlist freeprison;
 	struct prison *killpr, *rpr, *ppr, *tpr;
-	struct proc *p;
 
 	killpr = NULL;
 	TAILQ_INIT(&freeprison);
@@ -3064,23 +3146,8 @@ prison_deref(struct prison *pr, int flags)
 		sx_xunlock(&allprison_lock);
 
 	/* Kill any processes attached to a killed prison. */
-	if (killpr != NULL) {
-		sx_slock(&allproc_lock);
-		FOREACH_PROC_IN_SYSTEM(p) {
-			PROC_LOCK(p);
-			if (p->p_state != PRS_NEW && p->p_ucred != NULL) {
-				for (ppr = p->p_ucred->cr_prison;
-				     ppr != &prison0;
-				     ppr = ppr->pr_parent)
-					if (ppr == killpr) {
-						kern_psignal(p, SIGKILL);
-						break;
-					}
-			}
-			PROC_UNLOCK(p);
-		}
-		sx_sunlock(&allproc_lock);
-	}
+	if (killpr != NULL)
+		prison_proc_iterate(killpr, prison_kill_processes_cb, NULL);
 
 	/*
 	 * Finish removing any unreferenced prisons, which couldn't happen
diff --git a/sys/sys/jail.h b/sys/sys/jail.h
index b7ecfc198b4c..743b413bc06d 100644
--- a/sys/sys/jail.h
+++ b/sys/sys/jail.h
@@ -159,6 +159,7 @@ typedef enum {
  *
  * Lock key:
  *   (a) allprison_lock
+ *   (A) allproc_lock
  *   (c) set only during creation before the structure is shared, no mutex
  *       required to read
  *   (m) locked by pr_mtx
@@ -176,6 +177,7 @@ struct prison {
 	volatile u_int	 pr_uref;			/* (r) user (alive) refcount */
 	unsigned	 pr_flags;			/* (p) PR_* flags */
 	LIST_HEAD(, prison) pr_children;		/* (a) list of child jails */
+	LIST_HEAD(, proc) pr_proclist;			/* (A) list of jailed processes */
 	LIST_ENTRY(prison) pr_sibling;			/* (a) next in parent's list */
 	struct prison	*pr_parent;			/* (c) containing jail */
 	struct mtx	 pr_mtx;
@@ -432,6 +434,9 @@ void prison_hold(struct prison *pr);
 void prison_hold_locked(struct prison *pr);
 void prison_proc_hold(struct prison *);
 void prison_proc_free(struct prison *);
+void prison_proc_link(struct prison *, struct proc *);
+void prison_proc_unlink(struct prison *, struct proc *);
+void prison_proc_iterate(struct prison *, void (*)(struct proc *, void *), void *);
 void prison_set_allow(struct ucred *cred, unsigned flag, int enable);
 int prison_ischild(struct prison *, struct prison *);
 bool prison_isalive(const struct prison *);
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
index fb1553539b1c..08a74c7326cf 100644
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -773,6 +773,7 @@ struct proc {
 	LIST_HEAD(, proc) p_orphans;	/* (e) Pointer to list of orphans. */
 
 	TAILQ_HEAD(, kq_timer_cb_data)	p_kqtim_stop;	/* (c) */
+	LIST_ENTRY(proc) p_jaillist;	/* (d) Jail process linkage. */
 };
 
 #define	p_session	p_pgrp->pg_session