git: 6754ae2572eb - main - jail: Use refcount(9) for prison references.

Jamie Gritton jamie at FreeBSD.org
Wed Jan 20 23:08:43 UTC 2021


The branch main has been updated by jamie:

URL: https://cgit.FreeBSD.org/src/commit/?id=6754ae2572eb20dbc23d2452b5e8fe4e07125f93

commit 6754ae2572eb20dbc23d2452b5e8fe4e07125f93
Author:     Jamie Gritton <jamie at FreeBSD.org>
AuthorDate: 2021-01-20 23:08:27 +0000
Commit:     Jamie Gritton <jamie at FreeBSD.org>
CommitDate: 2021-01-20 23:08:27 +0000

    jail: Use refcount(9) for prison references.
    
    Use refcount(9) for both pr_ref and pr_uref in struct prison.  This
    allows prisons to held and freed without requiring the prison mutex.
    An exception to this is that dropping the last reference will still
    lock the prison, to keep the guarantee that a locked prison remains
    valid and alive (provided it was at the time it was locked).
    
    Among other things, this honors the promise made in a comment in
    crcopy(9), that it will not block, which hasn't been true for two
    decades.
---
 sys/kern/kern_jail.c | 145 ++++++++++++++++++++++++++++++---------------------
 sys/sys/jail.h       |   9 ++--
 2 files changed, 91 insertions(+), 63 deletions(-)

diff --git a/sys/kern/kern_jail.c b/sys/kern/kern_jail.c
index 757b8bd06b89..e869bafc96b8 100644
--- a/sys/kern/kern_jail.c
+++ b/sys/kern/kern_jail.c
@@ -1116,7 +1116,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
  name_again:
 			deadpr = NULL;
 			FOREACH_PRISON_CHILD(ppr, tpr) {
-				if (tpr != pr && tpr->pr_ref > 0 &&
+				if (tpr != pr &&
 				    !strcmp(tpr->pr_name + pnamelen, namelc)) {
 					mtx_lock(&tpr->pr_mtx);
 					if (prison_isalive(tpr)) {
@@ -1199,8 +1199,8 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
 			    prison_name(mypr, ppr));
 			goto done_deref;
 		}
-		ppr->pr_ref++;
-		ppr->pr_uref++;
+		prison_hold(ppr);
+		refcount_acquire(&ppr->pr_uref);
 		mtx_unlock(&ppr->pr_mtx);
 
 		if (jid == 0 && (jid = get_next_prid(&inspr)) == 0) {
@@ -1315,7 +1315,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
 		 * Grab a reference for existing prisons, to ensure they
 		 * continue to exist for the duration of the call.
 		 */
-		pr->pr_ref++;
+		prison_hold(pr);
 		drflags |= PD_DEREF;
 #if defined(VIMAGE) && (defined(INET) || defined(INET6))
 		if ((pr->pr_flags & PR_VNET) &&
@@ -1434,7 +1434,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
 #ifdef VIMAGE
 			    (tpr != tppr && (tpr->pr_flags & PR_VNET)) ||
 #endif
-			    tpr->pr_uref == 0) {
+			    refcount_load(&tpr->pr_uref) == 0) {
 				descend = 0;
 				continue;
 			}
@@ -1502,7 +1502,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
 #ifdef VIMAGE
 			    (tpr != tppr && (tpr->pr_flags & PR_VNET)) ||
 #endif
-			    tpr->pr_uref == 0) {
+			    refcount_load(&tpr->pr_uref) == 0) {
 				descend = 0;
 				continue;
 			}
@@ -1738,11 +1738,11 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
 	born = !prison_isalive(pr);
 	if (!created && (ch_flags & PR_PERSIST & (pr_flags ^ pr->pr_flags))) {
 		if (pr_flags & PR_PERSIST) {
-			pr->pr_ref++;
-			pr->pr_uref++;
+			prison_hold(pr);
+			refcount_acquire(&pr->pr_uref);
 		} else {
-			pr->pr_ref--;
-			pr->pr_uref--;
+			refcount_release(&pr->pr_uref);
+			refcount_release(&pr->pr_ref);
 		}
 	}
 	pr->pr_flags = (pr->pr_flags & ~ch_flags) | pr_flags;
@@ -1857,8 +1857,8 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
 		if (pr_flags & PR_PERSIST) {
 			mtx_lock(&pr->pr_mtx);
 			drflags |= PD_LOCKED;
-			pr->pr_ref++;
-			pr->pr_uref++;
+			refcount_acquire(&pr->pr_ref);
+			refcount_acquire(&pr->pr_uref);
 		} else {
 			/* Non-persistent jails need no further changes. */
 			pr = NULL;
@@ -1933,7 +1933,8 @@ get_next_prid(struct prison **insprp)
 			TAILQ_FOREACH(inspr, &allprison, pr_list) {
 				if (inspr->pr_id < jid)
 					continue;
-				if (inspr->pr_id > jid || inspr->pr_ref == 0) {
+				if (inspr->pr_id > jid ||
+				    refcount_load(&inspr->pr_ref) == 0) {
 					/*
 					 * Found an opening.  This may be a gap
 					 * in the list, or a dead jail with the
@@ -2096,7 +2097,7 @@ kern_jail_get(struct thread *td, struct uio *optuio, int flags)
 
  found_prison:
 	/* Get the parameters of the prison. */
-	pr->pr_ref++;
+	prison_hold(pr);
 	drflags |= PD_DEREF;
 	td->td_retval[0] = pr->pr_id;
 	error = vfs_setopt(opts, "jid", &pr->pr_id, sizeof(pr->pr_id));
@@ -2309,7 +2310,7 @@ sys_jail_remove(struct thread *td, struct jail_remove_args *uap)
 	}
 
 	/* Remove all descendants of this prison, then remove this prison. */
-	pr->pr_ref++;
+	prison_hold(pr);
 	if (!LIST_EMPTY(&pr->pr_children)) {
 		mtx_unlock(&pr->pr_mtx);
 		lpr = NULL;
@@ -2317,7 +2318,7 @@ sys_jail_remove(struct thread *td, struct jail_remove_args *uap)
 			mtx_lock(&cpr->pr_mtx);
 			if (prison_isvalid(cpr)) {
 				tpr = cpr;
-				cpr->pr_ref++;
+				prison_hold(cpr);
 			} else {
 				/* Already removed - do not do it again. */
 				tpr = NULL;
@@ -2351,18 +2352,19 @@ prison_remove_one(struct prison *pr)
 
 	/* If the prison was persistent, it is not anymore. */
 	if (pr->pr_flags & PR_PERSIST) {
-		pr->pr_ref--;
+		refcount_release(&pr->pr_ref);
 		drflags |= PD_DEUREF;
 		pr->pr_flags &= ~PR_PERSIST;
 	}
 
 	/*
 	 * jail_remove added a reference.  If that's the only one, remove
-	 * the prison now.
+	 * the prison now.  refcount(9) doesn't guarantee the cache coherence
+	 * of non-zero counters, so force it here.
 	 */
-	KASSERT(pr->pr_ref > 0,
+	KASSERT(refcount_load(&pr->pr_ref) > 0,
 	    ("prison_remove_one removing a dead prison (jid=%d)", pr->pr_id));
-	if (pr->pr_ref == 1) {
+	if (atomic_load_acq_int(&pr->pr_ref) == 1) {
 		prison_deref(pr, drflags);
 		return;
 	}
@@ -2440,8 +2442,8 @@ do_jail_attach(struct thread *td, struct prison *pr)
 	 * a process root from one prison, but attached to the jail
 	 * of another.
 	 */
-	pr->pr_ref++;
-	pr->pr_uref++;
+	refcount_acquire(&pr->pr_ref);
+	refcount_acquire(&pr->pr_uref);
 	mtx_unlock(&pr->pr_mtx);
 
 	/* Let modules do whatever they need to prepare for attaching. */
@@ -2614,19 +2616,21 @@ void
 prison_hold_locked(struct prison *pr)
 {
 
-	mtx_assert(&pr->pr_mtx, MA_OWNED);
-	KASSERT(pr->pr_ref > 0,
-	    ("Trying to hold dead prison %p (jid=%d).", pr, pr->pr_id));
-	pr->pr_ref++;
+	/* Locking is no longer required. */
+	prison_hold(pr);
 }
 
 void
 prison_hold(struct prison *pr)
 {
+#ifdef INVARIANTS
+	int was_valid = refcount_acquire_if_not_zero(&pr->pr_ref);
 
-	mtx_lock(&pr->pr_mtx);
-	prison_hold_locked(pr);
-	mtx_unlock(&pr->pr_mtx);
+	KASSERT(was_valid,
+	    ("Trying to hold dead prison %p (jid=%d).", pr, pr->pr_id));
+#else
+	refcount_acquire(&pr->pr_ref);
+#endif
 }
 
 /*
@@ -2637,14 +2641,17 @@ prison_hold(struct prison *pr)
 void
 prison_free_locked(struct prison *pr)
 {
-	int ref;
+	int lastref;
 
 	mtx_assert(&pr->pr_mtx, MA_OWNED);
-	ref = --pr->pr_ref;
+	KASSERT(refcount_load(&pr->pr_ref) > 0,
+	    ("Trying to free dead prison %p (jid=%d).",
+	     pr, pr->pr_id));
+	lastref = refcount_release(&pr->pr_ref);
 	mtx_unlock(&pr->pr_mtx);
-	if (ref == 0) {
+	if (lastref) {
 		/*
-		 * Don't remove the last reference in this context,
+		 * Don't remove the prison itself in this context,
 		 * in case there are locks held.
 		 */
 		taskqueue_enqueue(taskqueue_thread, &pr->pr_task);
@@ -2655,6 +2662,16 @@ void
 prison_free(struct prison *pr)
 {
 
+	/*
+	 * Locking is only required when releasing the last reference.
+	 * This allows assurance that a locked prison will remain valid
+	 * until it is unlocked.
+	 */
+	KASSERT(refcount_load(&pr->pr_ref) > 0,
+	    ("Trying to free dead prison %p (jid=%d).",
+	     pr, pr->pr_id));
+	if (refcount_release_if_not_last(&pr->pr_ref))
+		return;
 	mtx_lock(&pr->pr_mtx);
 	prison_free_locked(pr);
 }
@@ -2670,12 +2687,14 @@ prison_free(struct prison *pr)
 void
 prison_proc_hold(struct prison *pr)
 {
+#ifdef INVARIANTS
+	int was_alive = refcount_acquire_if_not_zero(&pr->pr_uref);
 
-	mtx_lock(&pr->pr_mtx);
-	KASSERT(pr->pr_uref > 0,
+	KASSERT(was_alive,
 	    ("Cannot add a process to a non-alive prison (jid=%d)", pr->pr_id));
-	pr->pr_uref++;
-	mtx_unlock(&pr->pr_mtx);
+#else
+	refcount_acquire(&pr->pr_uref);
+#endif
 }
 
 /*
@@ -2686,20 +2705,27 @@ prison_proc_hold(struct prison *pr)
 void
 prison_proc_free(struct prison *pr)
 {
+	int lasturef;
 
-	mtx_lock(&pr->pr_mtx);
-	KASSERT(pr->pr_uref > 0,
+	/*
+	 * Locking is only required when releasing the last reference.
+	 * This allows assurance that a locked prison will remain alive
+	 * until it is unlocked.
+	 */
+	KASSERT(refcount_load(&pr->pr_uref) > 0,
 	    ("Trying to kill a process in a dead prison (jid=%d)", pr->pr_id));
-	if (pr->pr_uref > 1)
-		pr->pr_uref--;
-	else {
+	if (refcount_release_if_not_last(&pr->pr_uref))
+		return;
+	mtx_lock(&pr->pr_mtx);
+	lasturef = refcount_release(&pr->pr_uref);
+	if (lasturef) {
 		/*
 		 * Don't remove the last user reference in this context,
 		 * which is expected to be a process that is not only locked,
 		 * but also half dead.  Add a reference so any calls to
 		 * prison_free() won't re-submit the task.
 		 */
-		pr->pr_ref++;
+		refcount_acquire(&pr->pr_ref);
 		mtx_unlock(&pr->pr_mtx);
 		taskqueue_enqueue(taskqueue_thread, &pr->pr_task);
 		return;
@@ -2723,7 +2749,7 @@ prison_complete(void *context, int pending)
 	 * was added.  No references are expected if this is completing a call
 	 * to prison_free, but prison_deref is still called for the cleanup.
 	 */
-	prison_deref(pr, pr->pr_uref > 0
+	prison_deref(pr, refcount_load(&pr->pr_uref) > 0
 	    ? PD_DEREF | PD_DEUREF | PD_LOCKED | PD_LIST_XLOCKED
 	    : PD_LOCKED | PD_LIST_XLOCKED);
 }
@@ -2740,29 +2766,30 @@ static void
 prison_deref(struct prison *pr, int flags)
 {
 	struct prison *ppr, *tpr;
-	int ref, lasturef;
+	int lastref, lasturef;
 
 	if (!(flags & PD_LOCKED))
 		mtx_lock(&pr->pr_mtx);
 	for (;;) {
 		if (flags & PD_DEUREF) {
-			KASSERT(pr->pr_uref > 0,
+			KASSERT(refcount_load(&pr->pr_uref) > 0,
 			    ("prison_deref PD_DEUREF on a dead prison (jid=%d)",
 			     pr->pr_id));
-			pr->pr_uref--;
-			lasturef = pr->pr_uref == 0;
+			lasturef = refcount_release(&pr->pr_uref);
 			if (lasturef)
-				pr->pr_ref++;
-			KASSERT(prison0.pr_uref != 0, ("prison0 pr_uref=0"));
+				refcount_acquire(&pr->pr_ref);
+			KASSERT(refcount_load(&prison0.pr_uref) > 0,
+			    ("prison0 pr_uref=0"));
 		} else
 			lasturef = 0;
 		if (flags & PD_DEREF) {
-			KASSERT(pr->pr_ref > 0,
+			KASSERT(refcount_load(&pr->pr_ref) > 0,
 			    ("prison_deref PD_DEREF on a dead prison (jid=%d)",
 			     pr->pr_id));
-			pr->pr_ref--;
+			lastref = refcount_release(&pr->pr_ref);
 		}
-		ref = pr->pr_ref;
+		else
+			lastref = refcount_load(&pr->pr_ref) == 0;
 		mtx_unlock(&pr->pr_mtx);
 
 		/*
@@ -2771,7 +2798,7 @@ prison_deref(struct prison *pr, int flags)
 		 */
 		if (lasturef) {
 			if (!(flags & (PD_LIST_SLOCKED | PD_LIST_XLOCKED))) {
-				if (ref > 1) {
+				if (atomic_load_acq_int(&pr->pr_ref) > 1) {
 					sx_slock(&allprison_lock);
 					flags |= PD_LIST_SLOCKED;
 				} else {
@@ -2781,12 +2808,12 @@ prison_deref(struct prison *pr, int flags)
 			}
 			(void)osd_jail_call(pr, PR_METHOD_REMOVE, NULL);
 			mtx_lock(&pr->pr_mtx);
-			ref = --pr->pr_ref;
+			lastref = refcount_release(&pr->pr_ref);
 			mtx_unlock(&pr->pr_mtx);
 		}
 
 		/* If the prison still has references, nothing else to do. */
-		if (ref > 0) {
+		if (!lastref) {
 			if (flags & PD_LIST_SLOCKED)
 				sx_sunlock(&allprison_lock);
 			else if (flags & PD_LIST_XLOCKED)
@@ -3008,9 +3035,9 @@ prison_isalive(struct prison *pr)
 {
 
 	mtx_assert(&pr->pr_mtx, MA_OWNED);
-	if (__predict_false(pr->pr_ref == 0))
+	if (__predict_false(refcount_load(&pr->pr_ref) == 0))
 		return (false);
-	if (__predict_false(pr->pr_uref == 0))
+	if (__predict_false(refcount_load(&pr->pr_uref) == 0))
 		return (false);
 	return (true);
 }
@@ -3025,7 +3052,7 @@ prison_isvalid(struct prison *pr)
 {
 
 	mtx_assert(&pr->pr_mtx, MA_OWNED);
-	if (__predict_false(pr->pr_ref == 0))
+	if (__predict_false(refcount_load(&pr->pr_ref) == 0))
 		return (false);
 	return (true);
 }
diff --git a/sys/sys/jail.h b/sys/sys/jail.h
index 67ef9347d093..2d1a26787b99 100644
--- a/sys/sys/jail.h
+++ b/sys/sys/jail.h
@@ -150,17 +150,18 @@ struct prison_racct;
  *
  * Lock key:
  *   (a) allprison_lock
+ *   (c) set only during creation before the structure is shared, no mutex
+ *       required to read
  *   (m) locked by pr_mtx
  *   (p) locked by pr_mtx, and also at least shared allprison_lock required
  *       to update
- *   (c) set only during creation before the structure is shared, no mutex
- *       required to read
+ *   (r) atomic via refcount(9), pr_mtx required to decrement to zero
  */
 struct prison {
 	TAILQ_ENTRY(prison) pr_list;			/* (a) all prisons */
 	int		 pr_id;				/* (c) prison id */
-	int		 pr_ref;			/* (m) refcount */
-	int		 pr_uref;			/* (m) user (alive) refcount */
+	volatile u_int	 pr_ref;			/* (r) refcount */
+	volatile u_int	 pr_uref;			/* (r) user (alive) refcount */
 	unsigned	 pr_flags;			/* (p) PR_* flags */
 	LIST_HEAD(, prison) pr_children;		/* (a) list of child jails */
 	LIST_ENTRY(prison) pr_sibling;			/* (a) next in parent's list */


More information about the dev-commits-src-all mailing list