git: 002e18eb7f3e - main - vfs: add FAILIFEXISTS flag
Mateusz Guzik
mjg at FreeBSD.org
Mon Dec 28 02:03:18 UTC 2020
The branch main has been updated by mjg:
URL: https://cgit.FreeBSD.org/src/commit/?id=002e18eb7f3e1ae972827cb35705502e91c540e3
commit 002e18eb7f3e1ae972827cb35705502e91c540e3
Author: Mateusz Guzik <mjg at FreeBSD.org>
AuthorDate: 2020-12-27 23:33:04 +0000
Commit: Mateusz Guzik <mjg at FreeBSD.org>
CommitDate: 2020-12-28 01:53:27 +0000
vfs: add FAILIFEXISTS flag
Both FreeBSD and Linux mkdir -p walk the tree up ignoring any EEXIST on
the way and both are used a lot when building respective kernels.
This poses a problem as spurious locking avoidably interferes with
concurrent operations like getdirentries on affected directories.
Work around the problem by adding FAILIFEXISTS flag. In case of lockless
lookup this manages to avoid any work to begin with, there is no speed
up for the locked case but perhaps this can be augmented later on.
For simplicity the only supported semantics are as used by mkdir.
Reviewed by: kib (previous version)
Differential Revision: https://reviews.freebsd.org/D27789
---
sys/kern/vfs_cache.c | 16 ++++++++++++++--
sys/kern/vfs_lookup.c | 33 ++++++++++++++++++++++++++++++++-
sys/kern/vfs_syscalls.c | 18 +-----------------
sys/sys/namei.h | 2 +-
4 files changed, 48 insertions(+), 21 deletions(-)
diff --git a/sys/kern/vfs_cache.c b/sys/kern/vfs_cache.c
index 22dfe01ac624..c9bc2074d5e6 100644
--- a/sys/kern/vfs_cache.c
+++ b/sys/kern/vfs_cache.c
@@ -3736,8 +3736,8 @@ cache_fpl_terminated(struct cache_fpl *fpl)
#define CACHE_FPL_SUPPORTED_CN_FLAGS \
(NC_NOMAKEENTRY | NC_KEEPPOSENTRY | LOCKLEAF | LOCKPARENT | WANTPARENT | \
- FOLLOW | LOCKSHARED | SAVENAME | SAVESTART | WILLBEDIR | ISOPEN | \
- NOMACCHECK | AUDITVNODE1 | AUDITVNODE2 | NOCAPCHECK)
+ FAILIFEXISTS | FOLLOW | LOCKSHARED | SAVENAME | SAVESTART | WILLBEDIR | \
+ ISOPEN | NOMACCHECK | AUDITVNODE1 | AUDITVNODE2 | NOCAPCHECK)
#define CACHE_FPL_INTERNAL_CN_FLAGS \
(ISDOTDOT | MAKEENTRY | ISLASTCN)
@@ -3990,6 +3990,11 @@ cache_fplookup_final_modifying(struct cache_fpl *fpl)
return (cache_fpl_handled(fpl, EROFS));
}
+ if (fpl->tvp != NULL && (cnp->cn_flags & FAILIFEXISTS) != 0) {
+ cache_fpl_smr_exit(fpl);
+ return (cache_fpl_handled(fpl, EEXIST));
+ }
+
/*
* Secure access to dvp; check cache_fplookup_partial_setup for
* reasoning.
@@ -4070,6 +4075,12 @@ cache_fplookup_final_modifying(struct cache_fpl *fpl)
return (cache_fpl_aborted(fpl));
}
+ if ((cnp->cn_flags & FAILIFEXISTS) != 0) {
+ vput(dvp);
+ vput(tvp);
+ return (cache_fpl_handled(fpl, EEXIST));
+ }
+
if ((cnp->cn_flags & LOCKLEAF) == 0) {
VOP_UNLOCK(tvp);
}
@@ -4221,6 +4232,7 @@ cache_fplookup_noentry(struct cache_fpl *fpl)
MPASS(!cache_fpl_isdotdot(cnp));
if (cnp->cn_nameiop != LOOKUP) {
+ fpl->tvp = NULL;
return (cache_fplookup_modifying(fpl));
}
diff --git a/sys/kern/vfs_lookup.c b/sys/kern/vfs_lookup.c
index 511efaa9d01a..da04dd09af40 100644
--- a/sys/kern/vfs_lookup.c
+++ b/sys/kern/vfs_lookup.c
@@ -545,6 +545,16 @@ namei(struct nameidata *ndp)
ndp->ni_debugflags |= NAMEI_DBG_CALLED;
if (ndp->ni_startdir != NULL)
ndp->ni_debugflags |= NAMEI_DBG_HADSTARTDIR;
+ if (cnp->cn_flags & FAILIFEXISTS) {
+ KASSERT(cnp->cn_nameiop == CREATE,
+ ("%s: FAILIFEXISTS passed for op %d", __func__, cnp->cn_nameiop));
+ /*
+ * The limitation below is to restrict hairy corner cases.
+ */
+ KASSERT((cnp->cn_flags & (LOCKPARENT | LOCKLEAF)) == LOCKPARENT,
+ ("%s: FAILIFEXISTS must be passed with LOCKPARENT and without LOCKLEAF",
+ __func__));
+ }
/*
* For NDVALIDATE.
*
@@ -1295,8 +1305,11 @@ success:
goto bad2;
}
}
- if (ndp->ni_vp != NULL && ndp->ni_vp->v_type == VDIR)
+ if (ndp->ni_vp != NULL) {
nameicap_tracker_add(ndp, ndp->ni_vp);
+ if ((cnp->cn_flags & (FAILIFEXISTS | ISSYMLINK)) == FAILIFEXISTS)
+ goto bad_eexist;
+ }
return (0);
bad2:
@@ -1311,6 +1324,24 @@ bad:
vput(dp);
ndp->ni_vp = NULL;
return (error);
+bad_eexist:
+ /*
+ * FAILIFEXISTS handling.
+ *
+ * XXX namei called with LOCKPARENT but not LOCKLEAF has the strange
+ * behaviour of leaving the vnode unlocked if the target is the same
+ * vnode as the parent.
+ */
+ MPASS((cnp->cn_flags & ISSYMLINK) == 0);
+ if (ndp->ni_vp == ndp->ni_dvp)
+ vrele(ndp->ni_dvp);
+ else
+ vput(ndp->ni_dvp);
+ vrele(ndp->ni_vp);
+ ndp->ni_dvp = NULL;
+ ndp->ni_vp = NULL;
+ NDFREE(ndp, NDF_ONLY_PNBUF);
+ return (EEXIST);
}
/*
diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c
index cbfff229540a..5a30b06e4e9e 100644
--- a/sys/kern/vfs_syscalls.c
+++ b/sys/kern/vfs_syscalls.c
@@ -3768,7 +3768,6 @@ kern_mkdirat(struct thread *td, int fd, const char *path, enum uio_seg segflg,
int mode)
{
struct mount *mp;
- struct vnode *vp;
struct vattr vattr;
struct nameidata nd;
int error;
@@ -3777,26 +3776,11 @@ kern_mkdirat(struct thread *td, int fd, const char *path, enum uio_seg segflg,
restart:
bwillwrite();
NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 |
- NC_NOMAKEENTRY | NC_KEEPPOSENTRY, segflg, path, fd,
+ NC_NOMAKEENTRY | NC_KEEPPOSENTRY | FAILIFEXISTS, segflg, path, fd,
&cap_mkdirat_rights, td);
nd.ni_cnd.cn_flags |= WILLBEDIR;
if ((error = namei(&nd)) != 0)
return (error);
- vp = nd.ni_vp;
- if (vp != NULL) {
- NDFREE(&nd, NDF_ONLY_PNBUF);
- /*
- * XXX namei called with LOCKPARENT but not LOCKLEAF has
- * the strange behaviour of leaving the vnode unlocked
- * if the target is the same vnode as the parent.
- */
- if (vp == nd.ni_dvp)
- vrele(nd.ni_dvp);
- else
- vput(nd.ni_dvp);
- vrele(vp);
- return (EEXIST);
- }
if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
NDFREE(&nd, NDF_ONLY_PNBUF);
vput(nd.ni_dvp);
diff --git a/sys/sys/namei.h b/sys/sys/namei.h
index dfb5e5e4cec3..ddba9cbd8912 100644
--- a/sys/sys/namei.h
+++ b/sys/sys/namei.h
@@ -143,7 +143,7 @@ int cache_fplookup(struct nameidata *ndp, enum cache_fpl_status *status,
#define LOCKLEAF 0x0004 /* lock vnode on return */
#define LOCKPARENT 0x0008 /* want parent vnode returned locked */
#define WANTPARENT 0x0010 /* want parent vnode returned unlocked */
-/* UNUSED 0x0020 */
+#define FAILIFEXISTS 0x0020 /* return EEXIST if found */
#define FOLLOW 0x0040 /* follow symbolic links */
#define BENEATH 0x0080 /* No escape from the start dir */
#define LOCKSHARED 0x0100 /* Shared lock leaf */
More information about the dev-commits-src-all
mailing list