svn commit: r363874 - in head/sys: kern sys

Mateusz Guzik mjg at FreeBSD.org
Tue Aug 4 23:07:43 UTC 2020


Author: mjg
Date: Tue Aug  4 23:07:42 2020
New Revision: 363874
URL: https://svnweb.freebsd.org/changeset/base/363874

Log:
  vfs: support lockless dotdot lookup
  
  Tested by:	pho

Modified:
  head/sys/kern/vfs_cache.c
  head/sys/kern/vfs_subr.c
  head/sys/sys/vnode.h

Modified: head/sys/kern/vfs_cache.c
==============================================================================
--- head/sys/kern/vfs_cache.c	Tue Aug  4 23:07:00 2020	(r363873)
+++ head/sys/kern/vfs_cache.c	Tue Aug  4 23:07:42 2020	(r363874)
@@ -51,6 +51,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/fcntl.h>
+#include <sys/jail.h>
 #include <sys/mount.h>
 #include <sys/namei.h>
 #include <sys/proc.h>
@@ -957,16 +958,22 @@ cache_zap_locked(struct namecache *ncp)
 		SDT_PROBE3(vfs, namecache, zap, done, ncp->nc_dvp,
 		    ncp->nc_name, ncp->nc_vp);
 		TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, ncp, nc_dst);
-		if (ncp == ncp->nc_vp->v_cache_dd)
+		if (ncp == ncp->nc_vp->v_cache_dd) {
+			vn_seqc_write_begin_unheld(ncp->nc_vp);
 			ncp->nc_vp->v_cache_dd = NULL;
+			vn_seqc_write_end(ncp->nc_vp);
+		}
 	} else {
 		SDT_PROBE2(vfs, namecache, zap_negative, done, ncp->nc_dvp,
 		    ncp->nc_name);
 		cache_negative_remove(ncp);
 	}
 	if (ncp->nc_flag & NCF_ISDOTDOT) {
-		if (ncp == ncp->nc_dvp->v_cache_dd)
+		if (ncp == ncp->nc_dvp->v_cache_dd) {
+			vn_seqc_write_begin_unheld(ncp->nc_dvp);
 			ncp->nc_dvp->v_cache_dd = NULL;
+			vn_seqc_write_end(ncp->nc_dvp);
+		}
 	} else {
 		LIST_REMOVE(ncp, nc_src);
 		if (LIST_EMPTY(&ncp->nc_dvp->v_cache_src)) {
@@ -1306,7 +1313,9 @@ retry_dotdot:
 				mtx_unlock(dvlp2);
 			cache_free(ncp);
 		} else {
+			vn_seqc_write_begin(dvp);
 			dvp->v_cache_dd = NULL;
+			vn_seqc_write_end(dvp);
 			mtx_unlock(dvlp);
 			if (dvlp2 != NULL)
 				mtx_unlock(dvlp2);
@@ -1817,6 +1826,7 @@ cache_enter_dotdot_prep(struct vnode *dvp, struct vnod
 	cache_celockstate_init(&cel);
 	hash = cache_get_hash(cnp->cn_nameptr, len, dvp);
 	cache_enter_lock_dd(&cel, dvp, vp, hash);
+	vn_seqc_write_begin(dvp);
 	ncp = dvp->v_cache_dd;
 	if (ncp != NULL && (ncp->nc_flag & NCF_ISDOTDOT)) {
 		KASSERT(ncp->nc_dvp == dvp, ("wrong isdotdot parent"));
@@ -1825,6 +1835,7 @@ cache_enter_dotdot_prep(struct vnode *dvp, struct vnod
 		ncp = NULL;
 	}
 	dvp->v_cache_dd = NULL;
+	vn_seqc_write_end(dvp);
 	cache_enter_unlock(&cel);
 	cache_free(ncp);
 }
@@ -1939,7 +1950,9 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp, 
 			goto out_unlock_free;
 		KASSERT(vp == NULL || vp->v_type == VDIR,
 		    ("wrong vnode type %p", vp));
+		vn_seqc_write_begin(dvp);
 		dvp->v_cache_dd = ncp;
+		vn_seqc_write_end(dvp);
 	}
 
 	if (vp != NULL) {
@@ -1950,6 +1963,7 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp, 
 				 * directory name in it and the name ".." for the
 				 * directory's parent.
 				 */
+				vn_seqc_write_begin(vp);
 				if ((ndd = vp->v_cache_dd) != NULL) {
 					if ((ndd->nc_flag & NCF_ISDOTDOT) != 0)
 						cache_zap_locked(ndd);
@@ -1957,9 +1971,14 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp, 
 						ndd = NULL;
 				}
 				vp->v_cache_dd = ncp;
+				vn_seqc_write_end(vp);
 			}
 		} else {
-			vp->v_cache_dd = NULL;
+			if (vp->v_cache_dd != NULL) {
+				vn_seqc_write_begin(vp);
+				vp->v_cache_dd = NULL;
+				vn_seqc_write_end(vp);
+			}
 		}
 	}
 
@@ -3425,6 +3444,75 @@ cache_fplookup_dot(struct cache_fpl *fpl)
 	return (0);
 }
 
+static int __noinline
+cache_fplookup_dotdot(struct cache_fpl *fpl)
+{
+	struct nameidata *ndp;
+	struct componentname *cnp;
+	struct namecache *ncp;
+	struct vnode *dvp;
+	struct prison *pr;
+	u_char nc_flag;
+
+	ndp = fpl->ndp;
+	cnp = fpl->cnp;
+	dvp = fpl->dvp;
+
+	/*
+	 * XXX this is racy the same way regular lookup is
+	 */
+	for (pr = cnp->cn_cred->cr_prison; pr != NULL;
+	    pr = pr->pr_parent)
+		if (dvp == pr->pr_root)
+			break;
+
+	if (dvp == ndp->ni_rootdir ||
+	    dvp == ndp->ni_topdir ||
+	    dvp == rootvnode ||
+	    pr != NULL) {
+		fpl->tvp = dvp;
+		fpl->tvp_seqc = vn_seqc_read_any(dvp);
+		if (seqc_in_modify(fpl->tvp_seqc)) {
+			return (cache_fpl_aborted(fpl));
+		}
+		return (0);
+	}
+
+	if ((dvp->v_vflag & VV_ROOT) != 0) {
+		/*
+		 * TODO
+		 * The opposite of climb mount is needed here.
+		 */
+		return (cache_fpl_aborted(fpl));
+	}
+
+	ncp = atomic_load_ptr(&dvp->v_cache_dd);
+	if (ncp == NULL) {
+		return (cache_fpl_aborted(fpl));
+	}
+
+	nc_flag = atomic_load_char(&ncp->nc_flag);
+	if ((nc_flag & NCF_ISDOTDOT) != 0) {
+		if ((nc_flag & NCF_NEGATIVE) != 0)
+			return (cache_fpl_aborted(fpl));
+		fpl->tvp = ncp->nc_vp;
+	} else {
+		fpl->tvp = ncp->nc_dvp;
+	}
+
+	if (__predict_false(!cache_ncp_canuse(ncp))) {
+		return (cache_fpl_aborted(fpl));
+	}
+
+	fpl->tvp_seqc = vn_seqc_read_any(fpl->tvp);
+	if (seqc_in_modify(fpl->tvp_seqc)) {
+		return (cache_fpl_partial(fpl));
+	}
+
+	counter_u64_add(dotdothits, 1);
+	return (0);
+}
+
 static int
 cache_fplookup_next(struct cache_fpl *fpl)
 {
@@ -3782,11 +3870,6 @@ cache_fplookup_impl(struct vnode *dvp, struct cache_fp
 			break;
 		}
 
-		if (cnp->cn_flags & ISDOTDOT) {
-			error = cache_fpl_partial(fpl);
-			break;
-		}
-
 		VNPASS(cache_fplookup_vnode_supported(fpl->dvp), fpl->dvp);
 
 		error = VOP_FPLOOKUP_VEXEC(fpl->dvp, cnp->cn_cred, cnp->cn_thread);
@@ -3795,17 +3878,24 @@ cache_fplookup_impl(struct vnode *dvp, struct cache_fp
 			break;
 		}
 
-		error = cache_fplookup_next(fpl);
-		if (__predict_false(error != 0)) {
-			break;
-		}
-
-		VNPASS(!seqc_in_modify(fpl->tvp_seqc), fpl->tvp);
-
-		if (cache_fplookup_need_climb_mount(fpl)) {
-			error = cache_fplookup_climb_mount(fpl);
+		if (__predict_false(cnp->cn_flags & ISDOTDOT)) {
+			error = cache_fplookup_dotdot(fpl);
 			if (__predict_false(error != 0)) {
 				break;
+			}
+		} else {
+			error = cache_fplookup_next(fpl);
+			if (__predict_false(error != 0)) {
+				break;
+			}
+
+			VNPASS(!seqc_in_modify(fpl->tvp_seqc), fpl->tvp);
+
+			if (cache_fplookup_need_climb_mount(fpl)) {
+				error = cache_fplookup_climb_mount(fpl);
+				if (__predict_false(error != 0)) {
+					break;
+				}
 			}
 		}
 

Modified: head/sys/kern/vfs_subr.c
==============================================================================
--- head/sys/kern/vfs_subr.c	Tue Aug  4 23:07:00 2020	(r363873)
+++ head/sys/kern/vfs_subr.c	Tue Aug  4 23:07:42 2020	(r363874)
@@ -6881,12 +6881,15 @@ vn_dir_check_exec(struct vnode *vp, struct componentna
 	return (VOP_ACCESS(vp, VEXEC, cnp->cn_cred, cnp->cn_thread));
 }
 
+/*
+ * Do not use this variant unless you have means other than the hold count
+ * to prevent the vnode from getting freed.
+ */
 void
-vn_seqc_write_begin_locked(struct vnode *vp)
+vn_seqc_write_begin_unheld_locked(struct vnode *vp)
 {
 
 	ASSERT_VI_LOCKED(vp, __func__);
-	VNPASS(vp->v_holdcnt > 0, vp);
 	VNPASS(vp->v_seqc_users >= 0, vp);
 	vp->v_seqc_users++;
 	if (vp->v_seqc_users == 1)
@@ -6894,11 +6897,29 @@ vn_seqc_write_begin_locked(struct vnode *vp)
 }
 
 void
+vn_seqc_write_begin_locked(struct vnode *vp)
+{
+
+	ASSERT_VI_LOCKED(vp, __func__);
+	VNPASS(vp->v_holdcnt > 0, vp);
+	vn_seqc_write_begin_unheld_locked(vp);
+}
+
+void
 vn_seqc_write_begin(struct vnode *vp)
 {
 
 	VI_LOCK(vp);
 	vn_seqc_write_begin_locked(vp);
+	VI_UNLOCK(vp);
+}
+
+void
+vn_seqc_write_begin_unheld(struct vnode *vp)
+{
+
+	VI_LOCK(vp);
+	vn_seqc_write_begin_unheld_locked(vp);
 	VI_UNLOCK(vp);
 }
 

Modified: head/sys/sys/vnode.h
==============================================================================
--- head/sys/sys/vnode.h	Tue Aug  4 23:07:00 2020	(r363873)
+++ head/sys/sys/vnode.h	Tue Aug  4 23:07:42 2020	(r363874)
@@ -764,6 +764,8 @@ int	vn_io_fault_uiomove(char *data, int xfersize, stru
 int	vn_io_fault_pgmove(vm_page_t ma[], vm_offset_t offset, int xfersize,
 	    struct uio *uio);
 
+void	vn_seqc_write_begin_unheld_locked(struct vnode *vp);
+void	vn_seqc_write_begin_unheld(struct vnode *vp);
 void	vn_seqc_write_begin_locked(struct vnode *vp);
 void	vn_seqc_write_begin(struct vnode *vp);
 void	vn_seqc_write_end_locked(struct vnode *vp);


More information about the svn-src-head mailing list