git: 002e18eb7f3e - main - vfs: add FAILIFEXISTS flag

Mateusz Guzik mjg at FreeBSD.org
Mon Dec 28 02:03:18 UTC 2020


The branch main has been updated by mjg:

URL: https://cgit.FreeBSD.org/src/commit/?id=002e18eb7f3e1ae972827cb35705502e91c540e3

commit 002e18eb7f3e1ae972827cb35705502e91c540e3
Author:     Mateusz Guzik <mjg at FreeBSD.org>
AuthorDate: 2020-12-27 23:33:04 +0000
Commit:     Mateusz Guzik <mjg at FreeBSD.org>
CommitDate: 2020-12-28 01:53:27 +0000

    vfs: add FAILIFEXISTS flag
    
    Both FreeBSD and Linux mkdir -p walk the tree up ignoring any EEXIST on
    the way and both are used a lot when building respective kernels.
    
    This poses a problem as spurious locking avoidably interferes with
    concurrent operations like getdirentries on affected directories.
    
    Work around the problem by adding FAILIFEXISTS flag. In case of lockless
    lookup this manages to avoid any work to begin with, there is no speed
    up for the locked case but perhaps this can be augmented later on.
    
    For simplicity the only supported semantics are as used by mkdir.
    
    Reviewed by:    kib (previous version)
    Differential Revision:  https://reviews.freebsd.org/D27789
---
 sys/kern/vfs_cache.c    | 16 ++++++++++++++--
 sys/kern/vfs_lookup.c   | 33 ++++++++++++++++++++++++++++++++-
 sys/kern/vfs_syscalls.c | 18 +-----------------
 sys/sys/namei.h         |  2 +-
 4 files changed, 48 insertions(+), 21 deletions(-)

diff --git a/sys/kern/vfs_cache.c b/sys/kern/vfs_cache.c
index 22dfe01ac624..c9bc2074d5e6 100644
--- a/sys/kern/vfs_cache.c
+++ b/sys/kern/vfs_cache.c
@@ -3736,8 +3736,8 @@ cache_fpl_terminated(struct cache_fpl *fpl)
 
 #define CACHE_FPL_SUPPORTED_CN_FLAGS \
 	(NC_NOMAKEENTRY | NC_KEEPPOSENTRY | LOCKLEAF | LOCKPARENT | WANTPARENT | \
-	 FOLLOW | LOCKSHARED | SAVENAME | SAVESTART | WILLBEDIR | ISOPEN | \
-	 NOMACCHECK | AUDITVNODE1 | AUDITVNODE2 | NOCAPCHECK)
+	 FAILIFEXISTS | FOLLOW | LOCKSHARED | SAVENAME | SAVESTART | WILLBEDIR | \
+	 ISOPEN | NOMACCHECK | AUDITVNODE1 | AUDITVNODE2 | NOCAPCHECK)
 
 #define CACHE_FPL_INTERNAL_CN_FLAGS \
 	(ISDOTDOT | MAKEENTRY | ISLASTCN)
@@ -3990,6 +3990,11 @@ cache_fplookup_final_modifying(struct cache_fpl *fpl)
 		return (cache_fpl_handled(fpl, EROFS));
 	}
 
+	if (fpl->tvp != NULL && (cnp->cn_flags & FAILIFEXISTS) != 0) {
+		cache_fpl_smr_exit(fpl);
+		return (cache_fpl_handled(fpl, EEXIST));
+	}
+
 	/*
 	 * Secure access to dvp; check cache_fplookup_partial_setup for
 	 * reasoning.
@@ -4070,6 +4075,12 @@ cache_fplookup_final_modifying(struct cache_fpl *fpl)
 		return (cache_fpl_aborted(fpl));
 	}
 
+	if ((cnp->cn_flags & FAILIFEXISTS) != 0) {
+		vput(dvp);
+		vput(tvp);
+		return (cache_fpl_handled(fpl, EEXIST));
+	}
+
 	if ((cnp->cn_flags & LOCKLEAF) == 0) {
 		VOP_UNLOCK(tvp);
 	}
@@ -4221,6 +4232,7 @@ cache_fplookup_noentry(struct cache_fpl *fpl)
 	MPASS(!cache_fpl_isdotdot(cnp));
 
 	if (cnp->cn_nameiop != LOOKUP) {
+		fpl->tvp = NULL;
 		return (cache_fplookup_modifying(fpl));
 	}
 
diff --git a/sys/kern/vfs_lookup.c b/sys/kern/vfs_lookup.c
index 511efaa9d01a..da04dd09af40 100644
--- a/sys/kern/vfs_lookup.c
+++ b/sys/kern/vfs_lookup.c
@@ -545,6 +545,16 @@ namei(struct nameidata *ndp)
 	ndp->ni_debugflags |= NAMEI_DBG_CALLED;
 	if (ndp->ni_startdir != NULL)
 		ndp->ni_debugflags |= NAMEI_DBG_HADSTARTDIR;
+	if (cnp->cn_flags & FAILIFEXISTS) {
+		KASSERT(cnp->cn_nameiop == CREATE,
+		    ("%s: FAILIFEXISTS passed for op %d", __func__, cnp->cn_nameiop));
+		/*
+		 * The limitation below is to restrict hairy corner cases.
+		 */
+		KASSERT((cnp->cn_flags & (LOCKPARENT | LOCKLEAF)) == LOCKPARENT,
+		    ("%s: FAILIFEXISTS must be passed with LOCKPARENT and without LOCKLEAF",
+		    __func__));
+	}
 	/*
 	 * For NDVALIDATE.
 	 *
@@ -1295,8 +1305,11 @@ success:
 			goto bad2;
 		}
 	}
-	if (ndp->ni_vp != NULL && ndp->ni_vp->v_type == VDIR)
+	if (ndp->ni_vp != NULL) {
 		nameicap_tracker_add(ndp, ndp->ni_vp);
+		if ((cnp->cn_flags & (FAILIFEXISTS | ISSYMLINK)) == FAILIFEXISTS)
+			goto bad_eexist;
+	}
 	return (0);
 
 bad2:
@@ -1311,6 +1324,24 @@ bad:
 		vput(dp);
 	ndp->ni_vp = NULL;
 	return (error);
+bad_eexist:
+	/*
+	 * FAILIFEXISTS handling.
+	 *
+	 * XXX namei called with LOCKPARENT but not LOCKLEAF has the strange
+	 * behaviour of leaving the vnode unlocked if the target is the same
+	 * vnode as the parent.
+	 */
+	MPASS((cnp->cn_flags & ISSYMLINK) == 0);
+	if (ndp->ni_vp == ndp->ni_dvp)
+		vrele(ndp->ni_dvp);
+	else
+		vput(ndp->ni_dvp);
+	vrele(ndp->ni_vp);
+	ndp->ni_dvp = NULL;
+	ndp->ni_vp = NULL;
+	NDFREE(ndp, NDF_ONLY_PNBUF);
+	return (EEXIST);
 }
 
 /*
diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c
index cbfff229540a..5a30b06e4e9e 100644
--- a/sys/kern/vfs_syscalls.c
+++ b/sys/kern/vfs_syscalls.c
@@ -3768,7 +3768,6 @@ kern_mkdirat(struct thread *td, int fd, const char *path, enum uio_seg segflg,
     int mode)
 {
 	struct mount *mp;
-	struct vnode *vp;
 	struct vattr vattr;
 	struct nameidata nd;
 	int error;
@@ -3777,26 +3776,11 @@ kern_mkdirat(struct thread *td, int fd, const char *path, enum uio_seg segflg,
 restart:
 	bwillwrite();
 	NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 |
-	    NC_NOMAKEENTRY | NC_KEEPPOSENTRY, segflg, path, fd,
+	    NC_NOMAKEENTRY | NC_KEEPPOSENTRY | FAILIFEXISTS, segflg, path, fd,
 	    &cap_mkdirat_rights, td);
 	nd.ni_cnd.cn_flags |= WILLBEDIR;
 	if ((error = namei(&nd)) != 0)
 		return (error);
-	vp = nd.ni_vp;
-	if (vp != NULL) {
-		NDFREE(&nd, NDF_ONLY_PNBUF);
-		/*
-		 * XXX namei called with LOCKPARENT but not LOCKLEAF has
-		 * the strange behaviour of leaving the vnode unlocked
-		 * if the target is the same vnode as the parent.
-		 */
-		if (vp == nd.ni_dvp)
-			vrele(nd.ni_dvp);
-		else
-			vput(nd.ni_dvp);
-		vrele(vp);
-		return (EEXIST);
-	}
 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
 		vput(nd.ni_dvp);
diff --git a/sys/sys/namei.h b/sys/sys/namei.h
index dfb5e5e4cec3..ddba9cbd8912 100644
--- a/sys/sys/namei.h
+++ b/sys/sys/namei.h
@@ -143,7 +143,7 @@ int	cache_fplookup(struct nameidata *ndp, enum cache_fpl_status *status,
 #define	LOCKLEAF	0x0004	/* lock vnode on return */
 #define	LOCKPARENT	0x0008	/* want parent vnode returned locked */
 #define	WANTPARENT	0x0010	/* want parent vnode returned unlocked */
-/* UNUSED		0x0020 */
+#define	FAILIFEXISTS	0x0020	/* return EEXIST if found */
 #define	FOLLOW		0x0040	/* follow symbolic links */
 #define	BENEATH		0x0080	/* No escape from the start dir */
 #define	LOCKSHARED	0x0100	/* Shared lock leaf */


More information about the dev-commits-src-all mailing list