git: aeabf8d4b972 - main - nullfs: hash insertion without vnode lock upgrade
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Sat, 19 Mar 2022 10:50:34 UTC
The branch main has been updated by mjg:
URL: https://cgit.FreeBSD.org/src/commit/?id=aeabf8d4b9727201648887f8aa7dd1930cf0e154
commit aeabf8d4b9727201648887f8aa7dd1930cf0e154
Author: Mateusz Guzik <mjg@FreeBSD.org>
AuthorDate: 2022-03-07 11:43:43 +0000
Commit: Mateusz Guzik <mjg@FreeBSD.org>
CommitDate: 2022-03-19 10:47:10 +0000
nullfs: hash insertion without vnode lock upgrade
Use the hash lock to serialize instead.
This enables shared-locked ".." lookups.
Reviewed by: markj
Tested by: pho (previous version)
Differential Revision: https://reviews.freebsd.org/D34466
---
sys/fs/nullfs/null_subr.c | 114 ++++++++++++++++++++------------------------
sys/fs/nullfs/null_vfsops.c | 2 +-
2 files changed, 53 insertions(+), 63 deletions(-)
diff --git a/sys/fs/nullfs/null_subr.c b/sys/fs/nullfs/null_subr.c
index 6b422410b9ec..b2b6e6837b2d 100644
--- a/sys/fs/nullfs/null_subr.c
+++ b/sys/fs/nullfs/null_subr.c
@@ -65,7 +65,7 @@ static u_long null_hash_mask;
static MALLOC_DEFINE(M_NULLFSHASH, "nullfs_hash", "NULLFS hash table");
MALLOC_DEFINE(M_NULLFSNODE, "nullfs_node", "NULLFS vnode private part");
-static struct vnode * null_hashins(struct mount *, struct null_node *);
+static void null_hashins(struct mount *, struct null_node *);
/*
* Initialise cache headers
@@ -93,14 +93,15 @@ nullfs_uninit(struct vfsconf *vfsp)
* Return a VREF'ed alias for lower vnode if already exists, else 0.
* Lower vnode should be locked on entry and will be left locked on exit.
*/
-struct vnode *
-null_hashget(struct mount *mp, struct vnode *lowervp)
+static struct vnode *
+null_hashget_locked(struct mount *mp, struct vnode *lowervp)
{
struct null_node_hashhead *hd;
struct null_node *a;
struct vnode *vp;
ASSERT_VOP_LOCKED(lowervp, "null_hashget");
+ rw_assert(&null_hash_lock, RA_LOCKED);
/*
* Find hash base, and then search the (two-way) linked
@@ -109,9 +110,6 @@ null_hashget(struct mount *mp, struct vnode *lowervp)
* reference count (but NOT the lower vnode's VREF counter).
*/
hd = NULL_NHASH(lowervp);
- if (LIST_EMPTY(hd))
- return (NULLVP);
- rw_rlock(&null_hash_lock);
LIST_FOREACH(a, hd, null_hash) {
if (a->null_lowervp == lowervp && NULLTOV(a)->v_mount == mp) {
/*
@@ -122,43 +120,50 @@ null_hashget(struct mount *mp, struct vnode *lowervp)
*/
vp = NULLTOV(a);
vref(vp);
- rw_runlock(&null_hash_lock);
return (vp);
}
}
- rw_runlock(&null_hash_lock);
return (NULLVP);
}
-/*
- * Act like null_hashget, but add passed null_node to hash if no existing
- * node found.
- */
-static struct vnode *
+struct vnode *
+null_hashget(struct mount *mp, struct vnode *lowervp)
+{
+ struct null_node_hashhead *hd;
+ struct vnode *vp;
+
+ hd = NULL_NHASH(lowervp);
+ if (LIST_EMPTY(hd))
+ return (NULLVP);
+
+ rw_rlock(&null_hash_lock);
+ vp = null_hashget_locked(mp, lowervp);
+ rw_runlock(&null_hash_lock);
+
+ return (vp);
+}
+
+static void
null_hashins(struct mount *mp, struct null_node *xp)
{
struct null_node_hashhead *hd;
+#ifdef INVARIANTS
struct null_node *oxp;
- struct vnode *ovp;
+#endif
+
+ rw_assert(&null_hash_lock, RA_WLOCKED);
hd = NULL_NHASH(xp->null_lowervp);
- rw_wlock(&null_hash_lock);
+#ifdef INVARIANTS
LIST_FOREACH(oxp, hd, null_hash) {
if (oxp->null_lowervp == xp->null_lowervp &&
NULLTOV(oxp)->v_mount == mp) {
- /*
- * See null_hashget for a description of this
- * operation.
- */
- ovp = NULLTOV(oxp);
- vref(ovp);
- rw_wunlock(&null_hash_lock);
- return (ovp);
+ VNASSERT(0, NULLTOV(oxp),
+ ("vnode already in hash"));
}
}
+#endif
LIST_INSERT_HEAD(hd, xp, null_hash);
- rw_wunlock(&null_hash_lock);
- return (NULLVP);
}
static void
@@ -201,19 +206,6 @@ null_nodeget(struct mount *mp, struct vnode *lowervp, struct vnode **vpp)
return (0);
}
- /*
- * The insmntque1() call below requires the exclusive lock on
- * the nullfs vnode. Upgrade the lock now if hash failed to
- * provide ready to use vnode.
- */
- if (VOP_ISLOCKED(lowervp) != LK_EXCLUSIVE) {
- vn_lock(lowervp, LK_UPGRADE | LK_RETRY);
- if (VN_IS_DOOMED(lowervp)) {
- vput(lowervp);
- return (ENOENT);
- }
- }
-
/*
* We do not serialize vnode creation, instead we will check for
* duplicates later, when adding new vnode to hash.
@@ -229,20 +221,23 @@ null_nodeget(struct mount *mp, struct vnode *lowervp, struct vnode **vpp)
return (error);
}
+ VNPASS(vp->v_object == NULL, vp);
+ VNPASS((vn_irflag_read(vp) & VIRF_PGREAD) == 0, vp);
+
+ rw_wlock(&null_hash_lock);
xp->null_vnode = vp;
xp->null_lowervp = lowervp;
xp->null_flags = 0;
vp->v_type = lowervp->v_type;
vp->v_data = xp;
vp->v_vnlock = lowervp->v_vnlock;
- error = insmntque1(vp, mp);
- if (error != 0) {
- vput(lowervp);
+ *vpp = null_hashget_locked(mp, lowervp);
+ if (*vpp != NULL) {
+ rw_wunlock(&null_hash_lock);
+ vrele(lowervp);
null_destroy_proto(vp, xp);
- return (error);
+ return (0);
}
- if (lowervp == MOUNTTONULLMOUNT(mp)->nullm_lowerrootvp)
- vp->v_vflag |= VV_ROOT;
/*
* We might miss the case where lower vnode sets VIRF_PGREAD
@@ -251,28 +246,23 @@ null_nodeget(struct mount *mp, struct vnode *lowervp, struct vnode **vpp)
*/
if ((vn_irflag_read(lowervp) & VIRF_PGREAD) != 0) {
MPASS(lowervp->v_object != NULL);
- if ((vn_irflag_read(vp) & VIRF_PGREAD) == 0) {
- if (vp->v_object == NULL)
- vp->v_object = lowervp->v_object;
- else
- MPASS(vp->v_object == lowervp->v_object);
- vn_irflag_set_cond(vp, VIRF_PGREAD);
- } else {
- MPASS(vp->v_object != NULL);
- }
+ vp->v_object = lowervp->v_object;
+ vn_irflag_set(vp, VIRF_PGREAD);
}
+ if (lowervp == MOUNTTONULLMOUNT(mp)->nullm_lowerrootvp)
+ vp->v_vflag |= VV_ROOT;
- /*
- * Atomically insert our new node into the hash or vget existing
- * if someone else has beaten us to it.
- */
- *vpp = null_hashins(mp, xp);
- if (*vpp != NULL) {
- vrele(lowervp);
- vp->v_object = NULL; /* in case VIRF_PGREAD set it */
+ error = insmntque1(vp, mp);
+ if (error != 0) {
+ rw_wunlock(&null_hash_lock);
+ vput(lowervp);
+ vp->v_object = NULL;
null_destroy_proto(vp, xp);
- return (0);
+ return (error);
}
+
+ null_hashins(mp, xp);
+ rw_wunlock(&null_hash_lock);
*vpp = vp;
return (0);
diff --git a/sys/fs/nullfs/null_vfsops.c b/sys/fs/nullfs/null_vfsops.c
index 1dba5f51d619..21a28dd45570 100644
--- a/sys/fs/nullfs/null_vfsops.c
+++ b/sys/fs/nullfs/null_vfsops.c
@@ -207,7 +207,7 @@ nullfs_mount(struct mount *mp)
(MNTK_SHARED_WRITES | MNTK_LOOKUP_SHARED |
MNTK_EXTENDED_SHARED);
}
- mp->mnt_kern_flag |= MNTK_LOOKUP_EXCL_DOTDOT | MNTK_NOMSYNC;
+ mp->mnt_kern_flag |= MNTK_NOMSYNC | MNTK_UNLOCKED_INSMNTQUE;
mp->mnt_kern_flag |= lowerrootvp->v_mount->mnt_kern_flag &
(MNTK_USES_BCACHE | MNTK_NO_IOPF | MNTK_UNMAPPED_BUFS);
MNT_IUNLOCK(mp);