atomic v_usecount and v_holdcnt
Mateusz Guzik
mjguzik at gmail.com
Sat Nov 22 00:28:17 UTC 2014
The idea is that we don't need an interlock as long as we don't
transition either counter 1->0 or 0->1.
Patch itself is more of a PoC, so I didn't rename vholdl & friends just
yet.
It helps during lookups with same vnodes since the interlock which was
taken twice served as a serializatin point and this effect is now
reduced.
There are other places which can avoid VI_LOCK + vget scheme.
Patch below survived make -j 40 buildworld, poudriere with 40 workers
etc on a 2 package(s) x 10 core(s) x 2 SMT threads machine with and
without debugs (including DEBUG_VFS_LOCKS).
Perf difference:
in a crap microbenchmark of 40 threads doing a stat on
/foo/bar/baz/quux${N}, where each thread stats a separate file I got
over 4 times speed up on tmpfs.
Comments?
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/vnode.c b/sys/cddl/contrib/opensolaris/uts/common/fs/vnode.c
index 83f29c1..b587ebd 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/vnode.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/vnode.c
@@ -99,6 +99,6 @@ vn_rele_async(vnode_t *vp, taskq_t *taskq)
(task_func_t *)vn_rele_inactive, vp, TQ_SLEEP) != 0);
return;
}
- vp->v_usecount--;
+ refcount_release(&vp->v_usecount);
vdropl(vp);
}
diff --git a/sys/kern/vfs_cache.c b/sys/kern/vfs_cache.c
index 55e3217..ec26d35 100644
--- a/sys/kern/vfs_cache.c
+++ b/sys/kern/vfs_cache.c
@@ -665,12 +665,12 @@ success:
ltype = VOP_ISLOCKED(dvp);
VOP_UNLOCK(dvp, 0);
}
- VI_LOCK(*vpp);
+ vholdl(*vpp);
if (wlocked)
CACHE_WUNLOCK();
else
CACHE_RUNLOCK();
- error = vget(*vpp, cnp->cn_lkflags | LK_INTERLOCK, cnp->cn_thread);
+ error = vget_held(*vpp, cnp->cn_lkflags, cnp->cn_thread);
if (cnp->cn_flags & ISDOTDOT) {
vn_lock(dvp, ltype | LK_RETRY);
if (dvp->v_iflag & VI_DOOMED) {
@@ -1376,9 +1376,9 @@ vn_dir_dd_ino(struct vnode *vp)
if ((ncp->nc_flag & NCF_ISDOTDOT) != 0)
continue;
ddvp = ncp->nc_dvp;
- VI_LOCK(ddvp);
+ vholdl(ddvp);
CACHE_RUNLOCK();
- if (vget(ddvp, LK_INTERLOCK | LK_SHARED | LK_NOWAIT, curthread))
+ if (vget_held(ddvp, LK_SHARED | LK_NOWAIT, curthread))
return (NULL);
return (ddvp);
}
diff --git a/sys/kern/vfs_hash.c b/sys/kern/vfs_hash.c
index 0271e49..d2fdbba 100644
--- a/sys/kern/vfs_hash.c
+++ b/sys/kern/vfs_hash.c
@@ -83,9 +83,9 @@ vfs_hash_get(const struct mount *mp, u_int hash, int flags, struct thread *td, s
continue;
if (fn != NULL && fn(vp, arg))
continue;
- VI_LOCK(vp);
+ vhold(vp);
mtx_unlock(&vfs_hash_mtx);
- error = vget(vp, flags | LK_INTERLOCK, td);
+ error = vget_held(vp, flags, td);
if (error == ENOENT && (flags & LK_NOWAIT) == 0)
break;
if (error)
@@ -127,9 +127,9 @@ vfs_hash_insert(struct vnode *vp, u_int hash, int flags, struct thread *td, stru
continue;
if (fn != NULL && fn(vp2, arg))
continue;
- VI_LOCK(vp2);
+ vhold(vp2);
mtx_unlock(&vfs_hash_mtx);
- error = vget(vp2, flags | LK_INTERLOCK, td);
+ error = vget_held(vp2, flags, td);
if (error == ENOENT && (flags & LK_NOWAIT) == 0)
break;
mtx_lock(&vfs_hash_mtx);
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index 345aad6..53bdf0d 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -854,7 +854,7 @@ vnlru_free(int count)
*/
freevnodes--;
vp->v_iflag &= ~VI_FREE;
- vp->v_holdcnt++;
+ refcount_acquire(&vp->v_holdcnt);
mtx_unlock(&vnode_free_list_mtx);
VI_UNLOCK(vp);
@@ -2052,12 +2052,7 @@ v_incr_usecount(struct vnode *vp)
CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
vholdl(vp);
- vp->v_usecount++;
- if (vp->v_type == VCHR && vp->v_rdev != NULL) {
- dev_lock();
- vp->v_rdev->si_usecount++;
- dev_unlock();
- }
+ v_upgrade_usecount(vp);
}
/*
@@ -2067,9 +2062,24 @@ v_incr_usecount(struct vnode *vp)
static void
v_upgrade_usecount(struct vnode *vp)
{
+ int old, locked;
CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
- vp->v_usecount++;
+retry:
+ old = vp->v_usecount;
+ if (old > 0) {
+ if (atomic_cmpset_int(&vp->v_usecount, old, old + 1))
+ goto dev;
+ goto retry;
+ }
+
+ locked = mtx_owned(VI_MTX(vp));
+ if (!locked)
+ VI_LOCK(vp);
+ refcount_acquire(&vp->v_usecount);
+ if (!locked)
+ VI_UNLOCK(vp);
+dev:
if (vp->v_type == VCHR && vp->v_rdev != NULL) {
dev_lock();
vp->v_rdev->si_usecount++;
@@ -2086,16 +2096,10 @@ static void
v_decr_usecount(struct vnode *vp)
{
- ASSERT_VI_LOCKED(vp, __FUNCTION__);
VNASSERT(vp->v_usecount > 0, vp,
("v_decr_usecount: negative usecount"));
CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
- vp->v_usecount--;
- if (vp->v_type == VCHR && vp->v_rdev != NULL) {
- dev_lock();
- vp->v_rdev->si_usecount--;
- dev_unlock();
- }
+ v_decr_useonly(vp);
vdropl(vp);
}
@@ -2108,12 +2112,27 @@ v_decr_usecount(struct vnode *vp)
static void
v_decr_useonly(struct vnode *vp)
{
+ int old, locked;
- ASSERT_VI_LOCKED(vp, __FUNCTION__);
VNASSERT(vp->v_usecount > 0, vp,
("v_decr_useonly: negative usecount"));
CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
- vp->v_usecount--;
+retry:
+ old = vp->v_usecount;
+ if (old > 1) {
+ if (atomic_cmpset_int(&vp->v_usecount, old, old - 1)) {
+ goto dev;
+ }
+ goto retry;
+ }
+
+ locked = mtx_owned(VI_MTX(vp));
+ if (!locked)
+ VI_LOCK(vp);
+ refcount_release(&vp->v_usecount);
+ if (!locked)
+ VI_UNLOCK(vp);
+dev:
if (vp->v_type == VCHR && vp->v_rdev != NULL) {
dev_lock();
vp->v_rdev->si_usecount--;
@@ -2129,19 +2148,15 @@ v_decr_useonly(struct vnode *vp)
* vput try to do it here.
*/
int
-vget(struct vnode *vp, int flags, struct thread *td)
+vget_held(struct vnode *vp, int flags, struct thread *td)
{
int error;
- error = 0;
VNASSERT((flags & LK_TYPE_MASK) != 0, vp,
("vget: invalid lock operation"));
CTR3(KTR_VFS, "%s: vp %p with flags %d", __func__, vp, flags);
- if ((flags & LK_INTERLOCK) == 0)
- VI_LOCK(vp);
- vholdl(vp);
- if ((error = vn_lock(vp, flags | LK_INTERLOCK)) != 0) {
+ if ((error = vn_lock(vp, flags)) != 0) {
vdrop(vp);
CTR2(KTR_VFS, "%s: impossible to lock vnode %p", __func__,
vp);
@@ -2149,7 +2164,6 @@ vget(struct vnode *vp, int flags, struct thread *td)
}
if (vp->v_iflag & VI_DOOMED && (flags & LK_RETRY) == 0)
panic("vget: vn_lock failed to return ENOENT\n");
- VI_LOCK(vp);
/* Upgrade our holdcnt to a usecount. */
v_upgrade_usecount(vp);
/*
@@ -2159,15 +2173,26 @@ vget(struct vnode *vp, int flags, struct thread *td)
* we don't succeed no harm is done.
*/
if (vp->v_iflag & VI_OWEINACT) {
- if (VOP_ISLOCKED(vp) == LK_EXCLUSIVE &&
- (flags & LK_NOWAIT) == 0)
- vinactive(vp, td);
- vp->v_iflag &= ~VI_OWEINACT;
+ VI_LOCK(vp);
+ if (vp->v_iflag & VI_OWEINACT) {
+ if (VOP_ISLOCKED(vp) == LK_EXCLUSIVE &&
+ (flags & LK_NOWAIT) == 0)
+ vinactive(vp, td);
+ vp->v_iflag &= ~VI_OWEINACT;
+ }
+ VI_UNLOCK(vp);
}
- VI_UNLOCK(vp);
return (0);
}
+int
+vget(struct vnode *vp, int flags, struct thread *td)
+{
+
+ vhold(vp);
+ return (vget_held(vp, flags, td));
+}
+
/*
* Increase the reference count of a vnode.
*/
@@ -2176,9 +2201,7 @@ vref(struct vnode *vp)
{
CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
- VI_LOCK(vp);
v_incr_usecount(vp);
- VI_UNLOCK(vp);
}
/*
@@ -2195,9 +2218,7 @@ vrefcnt(struct vnode *vp)
{
int usecnt;
- VI_LOCK(vp);
usecnt = vp->v_usecount;
- VI_UNLOCK(vp);
return (usecnt);
}
@@ -2210,6 +2231,7 @@ static void
vputx(struct vnode *vp, int func)
{
int error;
+ int old;
KASSERT(vp != NULL, ("vputx: null vp"));
if (func == VPUTX_VUNREF)
@@ -2219,11 +2241,26 @@ vputx(struct vnode *vp, int func)
else
KASSERT(func == VPUTX_VRELE, ("vputx: wrong func"));
CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
+
+retry:
+ old = vp->v_usecount;
+ if (old > 1) {
+ if (atomic_cmpset_int(&vp->v_usecount, old, old - 1)) {
+ if (func == VPUTX_VPUT)
+ VOP_UNLOCK(vp, 0);
+ if (vp->v_type == VCHR && vp->v_rdev != NULL) {
+ dev_lock();
+ vp->v_rdev->si_usecount--;
+ dev_unlock();
+ }
+ vdropl(vp);
+ return;
+ }
+ goto retry;
+ }
+
VI_LOCK(vp);
- /* Skip this v_writecount check if we're going to panic below. */
- VNASSERT(vp->v_writecount < vp->v_usecount || vp->v_usecount < 1, vp,
- ("vputx: missed vn_close"));
error = 0;
if (vp->v_usecount > 1 || ((vp->v_iflag & VI_DOINGINACT) &&
@@ -2320,9 +2357,7 @@ void
vhold(struct vnode *vp)
{
- VI_LOCK(vp);
vholdl(vp);
- VI_UNLOCK(vp);
}
/*
@@ -2332,20 +2367,30 @@ void
vholdl(struct vnode *vp)
{
struct mount *mp;
+ int old;
+ int locked;
CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
-#ifdef INVARIANTS
- /* getnewvnode() calls v_incr_usecount() without holding interlock. */
- if (vp->v_type != VNON || vp->v_data != NULL) {
- ASSERT_VI_LOCKED(vp, "vholdl");
- VNASSERT(vp->v_holdcnt > 0 || (vp->v_iflag & VI_FREE) != 0,
- vp, ("vholdl: free vnode is held"));
+retry:
+ old = vp->v_holdcnt;
+ if (old > 0) {
+ if (atomic_cmpset_int(&vp->v_holdcnt, old, old + 1)) {
+ VNASSERT((vp->v_iflag & VI_FREE) == 0, vp,
+ ("vholdl: vnode with usecount is free"));
+ return;
+ }
+ goto retry;
}
-#endif
- vp->v_holdcnt++;
- if ((vp->v_iflag & VI_FREE) == 0)
+ locked = mtx_owned(VI_MTX(vp));
+ if (!locked)
+ VI_LOCK(vp);
+ if ((vp->v_iflag & VI_FREE) == 0) {
+ refcount_acquire(&vp->v_holdcnt);
+ if (!locked)
+ VI_UNLOCK(vp);
return;
- VNASSERT(vp->v_holdcnt == 1, vp, ("vholdl: wrong hold count"));
+ }
+ VNASSERT(vp->v_holdcnt == 0, vp, ("vholdl: wrong hold count"));
VNASSERT(vp->v_op != NULL, vp, ("vholdl: vnode already reclaimed."));
/*
* Remove a vnode from the free list, mark it as in use,
@@ -2362,6 +2407,9 @@ vholdl(struct vnode *vp)
TAILQ_INSERT_HEAD(&mp->mnt_activevnodelist, vp, v_actfreelist);
mp->mnt_activevnodelistsize++;
mtx_unlock(&vnode_free_list_mtx);
+ refcount_acquire(&vp->v_holdcnt);
+ if (!locked)
+ VI_UNLOCK(vp);
}
/*
@@ -2372,7 +2420,6 @@ void
vdrop(struct vnode *vp)
{
- VI_LOCK(vp);
vdropl(vp);
}
@@ -2387,15 +2434,27 @@ vdropl(struct vnode *vp)
struct bufobj *bo;
struct mount *mp;
int active;
+ int old;
+ int locked;
- ASSERT_VI_LOCKED(vp, "vdropl");
CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
if (vp->v_holdcnt <= 0)
panic("vdrop: holdcnt %d", vp->v_holdcnt);
- vp->v_holdcnt--;
- VNASSERT(vp->v_holdcnt >= vp->v_usecount, vp,
- ("hold count less than use count"));
- if (vp->v_holdcnt > 0) {
+ locked = mtx_owned(VI_MTX(vp));
+retry:
+ old = vp->v_holdcnt;
+ if (old > 1) {
+ if (atomic_cmpset_int(&vp->v_holdcnt, old, old -1)) {
+ if (locked)
+ VI_UNLOCK(vp);
+ return;
+ }
+ goto retry;
+ }
+
+ if (!locked)
+ VI_LOCK(vp);
+ if (refcount_release(&vp->v_holdcnt) == 0) {
VI_UNLOCK(vp);
return;
}
diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h
index c78b9d1..87eee4a 100644
--- a/sys/sys/vnode.h
+++ b/sys/sys/vnode.h
@@ -651,6 +651,7 @@ void vdrop(struct vnode *);
void vdropl(struct vnode *);
int vflush(struct mount *mp, int rootrefs, int flags, struct thread *td);
int vget(struct vnode *vp, int lockflag, struct thread *td);
+int vget_held(struct vnode *vp, int lockflag, struct thread *td);
void vgone(struct vnode *vp);
void vhold(struct vnode *);
void vholdl(struct vnode *);
More information about the freebsd-fs
mailing list