svn commit: r367089 - head/sys/kern

Mateusz Guzik mjg at FreeBSD.org
Tue Oct 27 18:12:07 UTC 2020


Author: mjg
Date: Tue Oct 27 18:12:07 2020
New Revision: 367089
URL: https://svnweb.freebsd.org/changeset/base/367089

Log:
  vfs: fix vnode reclaim races against getnwevnode
  
  All vnodes allocated by UMA are present on the global list used by
  vnlru. getnewvnode modifies the state of the vnode (most notably
  altering v_holdcnt) but never locks it. Moreover filesystems also
  modify it in arbitrary manners sometimes before taking the vnode
  lock or adding any other indicator that the vnode can be used.
  
  Picking up such a vnode by vnlru would be problematic.
  
  To that end there are 2 fixes:
  - vlrureclaim, not recycling v_holdcnt == 0 vnodes, takes the
  interlock and verifies that v_mount has been set. It is an
  invariant that the vnode lock is held by that point, providing
  the necessary serialisation against locking after vhold.
  - vnlru_free_locked, only wanting to free v_holdcnt == 0 vnodes,
  now makes sure to only transition the count 0->1 and newly allocated
  vnodes start with v_holdcnt == VHOLD_NO_SMR. getnewvnode will only
  transition VHOLD_NO_SMR->1 once more making the hold fail
  
  Tested by:	pho

Modified:
  head/sys/kern/vfs_subr.c

Modified: head/sys/kern/vfs_subr.c
==============================================================================
--- head/sys/kern/vfs_subr.c	Tue Oct 27 18:11:11 2020	(r367088)
+++ head/sys/kern/vfs_subr.c	Tue Oct 27 18:12:07 2020	(r367089)
@@ -109,7 +109,7 @@ static void	syncer_shutdown(void *arg, int howto);
 static int	vtryrecycle(struct vnode *vp);
 static void	v_init_counters(struct vnode *);
 static void	vgonel(struct vnode *);
-static bool	vhold_recycle(struct vnode *);
+static bool	vhold_recycle_free(struct vnode *);
 static void	vfs_knllock(void *arg);
 static void	vfs_knlunlock(void *arg);
 static void	vfs_knl_assert_locked(void *arg);
@@ -561,6 +561,11 @@ vnode_init(void *mem, int size, int flags)
 
 	vp->v_dbatchcpu = NOCPU;
 
+	/*
+	 * Check vhold_recycle_free for an explanation.
+	 */
+	vp->v_holdcnt = VHOLD_NO_SMR;
+	vp->v_type = VNON;
 	mtx_lock(&vnode_list_mtx);
 	TAILQ_INSERT_BEFORE(vnode_list_free_marker, vp, v_vnodelist);
 	mtx_unlock(&vnode_list_mtx);
@@ -1127,8 +1132,25 @@ restart:
 			goto next_iter;
 		}
 
-		if (!vhold_recycle(vp))
+		/*
+		 * Handle races against vnode allocation. Filesystems lock the
+		 * vnode some time after it gets returned from getnewvnode,
+		 * despite type and hold count being manipulated earlier.
+		 * Resorting to checking v_mount restores guarantees present
+		 * before the global list was reworked to contain all vnodes.
+		 */
+		if (!VI_TRYLOCK(vp))
 			goto next_iter;
+		if (__predict_false(vp->v_type == VBAD || vp->v_type == VNON)) {
+			VI_UNLOCK(vp);
+			goto next_iter;
+		}
+		if (vp->v_mount == NULL) {
+			VI_UNLOCK(vp);
+			goto next_iter;
+		}
+		vholdl(vp);
+		VI_UNLOCK(vp);
 		TAILQ_REMOVE(&vnode_list, mvp, v_vnodelist);
 		TAILQ_INSERT_AFTER(&vnode_list, vp, mvp, v_vnodelist);
 		mtx_unlock(&vnode_list_mtx);
@@ -1228,13 +1250,13 @@ restart:
 		    mp->mnt_op != mnt_op)) {
 			continue;
 		}
-		TAILQ_REMOVE(&vnode_list, mvp, v_vnodelist);
-		TAILQ_INSERT_AFTER(&vnode_list, vp, mvp, v_vnodelist);
 		if (__predict_false(vp->v_type == VBAD || vp->v_type == VNON)) {
 			continue;
 		}
-		if (!vhold_recycle(vp))
+		if (!vhold_recycle_free(vp))
 			continue;
+		TAILQ_REMOVE(&vnode_list, mvp, v_vnodelist);
+		TAILQ_INSERT_AFTER(&vnode_list, vp, mvp, v_vnodelist);
 		count--;
 		mtx_unlock(&vnode_list_mtx);
 		vtryrecycle(vp);
@@ -3251,11 +3273,13 @@ vholdnz(struct vnode *vp)
  * However, while this is more performant, it hinders debugging by eliminating
  * the previously mentioned invariant.
  */
-static bool __always_inline
-_vhold_cond(struct vnode *vp)
+bool
+vhold_smr(struct vnode *vp)
 {
 	int count;
 
+	VFS_SMR_ASSERT_ENTERED();
+
 	count = atomic_load_int(&vp->v_holdcnt);
 	for (;;) {
 		if (count & VHOLD_NO_SMR) {
@@ -3263,7 +3287,6 @@ _vhold_cond(struct vnode *vp)
 			    ("non-zero hold count with flags %d\n", count));
 			return (false);
 		}
-
 		VNASSERT(count >= 0, vp, ("invalid hold count %d\n", count));
 		if (atomic_fcmpset_int(&vp->v_holdcnt, &count, count + 1)) {
 			if (count == 0)
@@ -3273,26 +3296,45 @@ _vhold_cond(struct vnode *vp)
 	}
 }
 
-bool
-vhold_smr(struct vnode *vp)
-{
-
-	VFS_SMR_ASSERT_ENTERED();
-	return (_vhold_cond(vp));
-}
-
 /*
- * Special case for vnode recycling.
+ * Hold a free vnode for recycling.
  *
- * Vnodes are present on the global list until UMA takes them out.
- * Attempts to recycle only need the relevant lock and have no use for SMR.
+ * Note: vnode_init references this comment.
+ *
+ * Attempts to recycle only need the global vnode list lock and have no use for
+ * SMR.
+ *
+ * However, vnodes get inserted into the global list before they get fully
+ * initialized and stay there until UMA decides to free the memory. This in
+ * particular means the target can be found before it becomes usable and after
+ * it becomes recycled. Picking up such vnodes is guarded with v_holdcnt set to
+ * VHOLD_NO_SMR.
+ *
+ * Note: the vnode may gain more references after we transition the count 0->1.
  */
 static bool
-vhold_recycle(struct vnode *vp)
+vhold_recycle_free(struct vnode *vp)
 {
+	int count;
 
 	mtx_assert(&vnode_list_mtx, MA_OWNED);
-	return (_vhold_cond(vp));
+
+	count = atomic_load_int(&vp->v_holdcnt);
+	for (;;) {
+		if (count & VHOLD_NO_SMR) {
+			VNASSERT((count & ~VHOLD_NO_SMR) == 0, vp,
+			    ("non-zero hold count with flags %d\n", count));
+			return (false);
+		}
+		VNASSERT(count >= 0, vp, ("invalid hold count %d\n", count));
+		if (count > 0) {
+			return (false);
+		}
+		if (atomic_fcmpset_int(&vp->v_holdcnt, &count, count + 1)) {
+			vn_freevnodes_dec();
+			return (true);
+		}
+	}
 }
 
 static void __noinline


More information about the svn-src-head mailing list