atomic v_usecount and v_holdcnt

Mateusz Guzik mjguzik at gmail.com
Sat Mar 14 22:52:33 UTC 2015


On Mon, Nov 24, 2014 at 11:52:52AM +0200, Konstantin Belousov wrote:
> On Sat, Nov 22, 2014 at 10:11:47PM +0100, Mateusz Guzik wrote:
> > On Sat, Nov 22, 2014 at 11:25:27AM +0200, Konstantin Belousov wrote:
> > > On Sat, Nov 22, 2014 at 01:28:12AM +0100, Mateusz Guzik wrote:
> > > > The idea is that we don't need an interlock as long as we don't
> > > > transition either counter 1->0 or 0->1.
> > > I already said that something along the lines of the patch should work.
> > > In fact, you need vnode lock when hold count changes between 0 and 1,
> > > and probably the same for use count.
> > > 
> > 
> > I don't see why this would be required (not that I'm an VFS expert).
> > vnode recycling seems to be protected with the interlock.
> > 
> > In fact I would argue that if this is really needed, current code is
> > buggy.
> Yes, it is already (somewhat) buggy.
> 
> Most need of the lock is for the case of counts coming from 1 to 0.
> The reason is the handling of the active vnode list, which is used
> for limiting the amount of vnode list walking in syncer.  When hold
> count is decremented to 0, vnode is removed from the active list.
> When use count is decremented to 0, vnode is supposedly inactivated,
> and vinactive() cleans the cached pages belonging to vnode.  In other
> words, VI_OWEINACT for dirty vnode is sort of bug.
> 

Modified the patch to no longer have the usecount + interlock dropped +
VI_OWEINACT set window.

Extended 0->1 hold count + vnode not locked window remains. I can fix
that if it is really necessary by having _vhold return with interlock
held if it did such transition.


> > > Similarly, I dislike vget_held().  Add a flag to vget(), see LK_EATTR_MASK
> > > in sys/lockmgr.h.
> > > 

Added LK_VNHELD.

==========================

Rationale for this project in the first place is vnode interlock contetion.

For performance considerations I did a simple test with 16 processes
opening + closing files of the form /1/2/3/file${proc} for 10 seconds.

I ran the test multiple times and it always got similar results.

sort -nk4 | tail -10:

       4        21      286087       59821      447061      0      0  0  61050  kern/subr_turnstile.c:551 (spin mutex:turnstile chain)
       3       202      150391       66590     2849045      0      0  0  38808  kern/vfs_subr.c:2294 (sleep mutex:vnode interlock)
       6         7     1811168       74290     8512307      0      0  0 164328  cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_acl.c:2126 (sx:zp->z_acl_lock)
      20        25     1419986       98319     2809304      0      0  0 148656  kern/vfs_subr.c:2539 (sleep mutex:vnode interlock)
      24         4     1109662      146941    14254913      0      0  0 180781  kern/subr_sleepqueue.c:251 (spin mutex:sleepq chain)
     480        39    92451121      996583     5674822     16      0  0 191116  kern/vfs_subr.c:2176 (lockmgr:zfs)
       5       386      630453     8320678     5675428      0      1  0 487331  kern/vfs_subr.c:2184 (sleep mutex:vnode interlock)
       8       440     3171462     9687352     5675274      0      1  0 800833  kern/vfs_cache.c:668 (sleep mutex:vnode interlock)
       8       381     1908209    41077673     2838476      0     14  0 1943683 kern/vfs_subr.c:2211 (sleep mutex:vnode interlock)
      16       380     4401208    52113492     8513892      0      6  0 2630199 kern/vfs_subr.c:2254 (sleep mutex:vnode interlock)

In comparison with atomic counters:

       4         3      417431       14942     2503166      0      0  0  29277  cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c:1373 (sx:arc_mfu->arcs_locks[i].arcs_lock)
       4         8      355527       41360     5125918      0      0  0  47753  kern/vfs_subr.c:2300 (sleep mutex:vnode interlock)
       6        10     7486379      993366    31423111      0      0  0 1644513 cddl/contrib/opensolaris/uts/common/fs/zfs/rrwlock.c:223 (sx:rrl->rr_lock)
       5        42     3122209     1591643    31423107      0      0  0 2369782 cddl/contrib/opensolaris/uts/common/fs/zfs/rrwlock.c:165 (sx:rrl->rr_lock)
       6       107      799818     2095611     3491446      0      0  0 1322296 kern/vfs_subr.c:2383 (sleep mutex:vnode_free_list)
       5        95      709042     2400191     3491446      0      0  0 1346013 kern/vfs_subr.c:2441 (sleep mutex:vnode_free_list)
      27        51     3727414     3382053    29383205      0      0  0 1066198 kern/subr_sleepqueue.c:251 (spin mutex:sleepq chain)
       4        44      809608     4020811     5837543      0      0  0 1115968 kern/vfs_subr.c:2190 (sleep mutex:vnode interlock)
     136       119    85703294     8917689    13965774      6      0  0 1215102 kern/vfs_subr.c:2169 (lockmgr:zfs)
      13       112     7178437    17880353    20948741      0      0  0 5322368 cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_acl.c:2126 (sx:zp->z_acl_lock)

ops/s were ~140k with the interlock and ~345k with atomics. With such a big
(repeatble) difference I did not consider making enough runs to make ministat
happy.

==========================

Patch below would use some tidy ups, but I consider it correct from
semantic pov.

For convenience it contains refcount bits which are about to be reviewed
in another thread. Indeed *_if_greater funcs should likely be converted
to just deal with zero and non-zero count, but that does not change the
outcome.

diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/vnode.c b/sys/cddl/contrib/opensolaris/uts/common/fs/vnode.c
index 83f29c1..b587ebd 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/vnode.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/vnode.c
@@ -99,6 +99,6 @@ vn_rele_async(vnode_t *vp, taskq_t *taskq)
 		    (task_func_t *)vn_rele_inactive, vp, TQ_SLEEP) != 0);
 		return;
 	}
-	vp->v_usecount--;
+	refcount_release(&vp->v_usecount);
 	vdropl(vp);
 }
diff --git a/sys/kern/vfs_cache.c b/sys/kern/vfs_cache.c
index d57dc90..f3f8339 100644
--- a/sys/kern/vfs_cache.c
+++ b/sys/kern/vfs_cache.c
@@ -665,12 +665,12 @@ success:
 		ltype = VOP_ISLOCKED(dvp);
 		VOP_UNLOCK(dvp, 0);
 	}
-	VI_LOCK(*vpp);
+	vhold(*vpp);
 	if (wlocked)
 		CACHE_WUNLOCK();
 	else
 		CACHE_RUNLOCK();
-	error = vget(*vpp, cnp->cn_lkflags | LK_INTERLOCK, cnp->cn_thread);
+	error = vget(*vpp, cnp->cn_lkflags | LK_VNHELD, cnp->cn_thread);
 	if (cnp->cn_flags & ISDOTDOT) {
 		vn_lock(dvp, ltype | LK_RETRY);
 		if (dvp->v_iflag & VI_DOOMED) {
@@ -1368,9 +1368,9 @@ vn_dir_dd_ino(struct vnode *vp)
 		if ((ncp->nc_flag & NCF_ISDOTDOT) != 0)
 			continue;
 		ddvp = ncp->nc_dvp;
-		VI_LOCK(ddvp);
+		vhold(ddvp);
 		CACHE_RUNLOCK();
-		if (vget(ddvp, LK_INTERLOCK | LK_SHARED | LK_NOWAIT, curthread))
+		if (vget(ddvp, LK_SHARED | LK_NOWAIT | LK_VNHELD, curthread))
 			return (NULL);
 		return (ddvp);
 	}
diff --git a/sys/kern/vfs_hash.c b/sys/kern/vfs_hash.c
index 930fca1..48601e7 100644
--- a/sys/kern/vfs_hash.c
+++ b/sys/kern/vfs_hash.c
@@ -84,9 +84,9 @@ vfs_hash_get(const struct mount *mp, u_int hash, int flags, struct thread *td, s
 				continue;
 			if (fn != NULL && fn(vp, arg))
 				continue;
-			VI_LOCK(vp);
+			vhold(vp);
 			rw_runlock(&vfs_hash_lock);
-			error = vget(vp, flags | LK_INTERLOCK, td);
+			error = vget(vp, flags | LK_VNHELD, td);
 			if (error == ENOENT && (flags & LK_NOWAIT) == 0)
 				break;
 			if (error)
@@ -128,9 +128,9 @@ vfs_hash_insert(struct vnode *vp, u_int hash, int flags, struct thread *td, stru
 				continue;
 			if (fn != NULL && fn(vp2, arg))
 				continue;
-			VI_LOCK(vp2);
+			vhold(vp2);
 			rw_wunlock(&vfs_hash_lock);
-			error = vget(vp2, flags | LK_INTERLOCK, td);
+			error = vget(vp2, flags | LK_VNHELD, td);
 			if (error == ENOENT && (flags & LK_NOWAIT) == 0)
 				break;
 			rw_wlock(&vfs_hash_lock);
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index fda80c9..85ff203 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -68,6 +68,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/pctrie.h>
 #include <sys/priv.h>
 #include <sys/reboot.h>
+#include <sys/refcount.h>
 #include <sys/rwlock.h>
 #include <sys/sched.h>
 #include <sys/sleepqueue.h>
@@ -102,9 +103,9 @@ static int	flushbuflist(struct bufv *bufv, int flags, struct bufobj *bo,
 static void	syncer_shutdown(void *arg, int howto);
 static int	vtryrecycle(struct vnode *vp);
 static void	v_incr_usecount(struct vnode *);
-static void	v_decr_usecount(struct vnode *);
-static void	v_decr_useonly(struct vnode *);
 static void	v_upgrade_usecount(struct vnode *);
+static void	v_incr_devcount(struct vnode *);
+static void	v_decr_devcount(struct vnode *);
 static void	vnlru_free(int);
 static void	vgonel(struct vnode *);
 static void	vfs_knllock(void *arg);
@@ -863,7 +864,7 @@ vnlru_free(int count)
 		 */
 		freevnodes--;
 		vp->v_iflag &= ~VI_FREE;
-		vp->v_holdcnt++;
+		refcount_acquire(&vp->v_holdcnt);
 
 		mtx_unlock(&vnode_free_list_mtx);
 		VI_UNLOCK(vp);
@@ -2082,70 +2083,50 @@ static void
 v_incr_usecount(struct vnode *vp)
 {
 
+	ASSERT_VI_UNLOCKED(vp, __func__);
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
-	vholdl(vp);
-	vp->v_usecount++;
-	if (vp->v_type == VCHR && vp->v_rdev != NULL) {
-		dev_lock();
-		vp->v_rdev->si_usecount++;
-		dev_unlock();
-	}
+	vhold(vp);
+	v_upgrade_usecount(vp);
 }
 
 /*
- * Turn a holdcnt into a use+holdcnt such that only one call to
- * v_decr_usecount is needed.
+ * Turn a holdcnt into a use+holdcnt.
  */
 static void
 v_upgrade_usecount(struct vnode *vp)
 {
 
+	ASSERT_VI_UNLOCKED(vp, __func__);
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
-	vp->v_usecount++;
-	if (vp->v_type == VCHR && vp->v_rdev != NULL) {
-		dev_lock();
-		vp->v_rdev->si_usecount++;
-		dev_unlock();
+	if (!refcount_acquire_if_greater(&vp->v_usecount, 0)) {
+		VI_LOCK(vp);
+		refcount_acquire(&vp->v_usecount);
+		VI_UNLOCK(vp);
 	}
+	v_incr_devcount(vp);
 }
 
 /*
- * Decrement the vnode use and hold count along with the driver's usecount
- * if this is a chardev.  The vdropl() below releases the vnode interlock
- * as it may free the vnode.
+ * Increment si_usecount of the associated device, if any.
  */
 static void
-v_decr_usecount(struct vnode *vp)
+v_incr_devcount(struct vnode *vp)
 {
 
-	ASSERT_VI_LOCKED(vp, __FUNCTION__);
-	VNASSERT(vp->v_usecount > 0, vp,
-	    ("v_decr_usecount: negative usecount"));
-	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
-	vp->v_usecount--;
 	if (vp->v_type == VCHR && vp->v_rdev != NULL) {
 		dev_lock();
-		vp->v_rdev->si_usecount--;
+		vp->v_rdev->si_usecount++;
 		dev_unlock();
 	}
-	vdropl(vp);
 }
 
 /*
- * Decrement only the use count and driver use count.  This is intended to
- * be paired with a follow on vdropl() to release the remaining hold count.
- * In this way we may vgone() a vnode with a 0 usecount without risk of
- * having it end up on a free list because the hold count is kept above 0.
+ * Increment si_usecount of the associated device, if any.
  */
 static void
-v_decr_useonly(struct vnode *vp)
+v_decr_devcount(struct vnode *vp)
 {
 
-	ASSERT_VI_LOCKED(vp, __FUNCTION__);
-	VNASSERT(vp->v_usecount > 0, vp,
-	    ("v_decr_useonly: negative usecount"));
-	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
-	vp->v_usecount--;
 	if (vp->v_type == VCHR && vp->v_rdev != NULL) {
 		dev_lock();
 		vp->v_rdev->si_usecount--;
@@ -2163,17 +2144,29 @@ v_decr_useonly(struct vnode *vp)
 int
 vget(struct vnode *vp, int flags, struct thread *td)
 {
-	int error;
+	int error, oweinact;
 
-	error = 0;
 	VNASSERT((flags & LK_TYPE_MASK) != 0, vp,
 	    ("vget: invalid lock operation"));
+
+	ASSERT_VI_LOCK(vp, __func__, (flags & LK_INTERLOCK) != 0);
+	if ((flags & LK_VNHELD) != 0)
+		VNASSERT((vp->v_holdcnt > 0), vp,
+		    ("vget: LK_VNHELD passed but vnode not held"));
+#if 0
+	else {
+		ASSERT_VI_LOCKED(vp, __func__);
+		KASSERT((flags & LK_INTERLOCK) != 0,
+		    ("vget: neither LK_INTERLOCK nor LK_VNHELD passed"));
+	}
+#endif
+
 	CTR3(KTR_VFS, "%s: vp %p with flags %d", __func__, vp, flags);
 
-	if ((flags & LK_INTERLOCK) == 0)
-		VI_LOCK(vp);
-	vholdl(vp);
-	if ((error = vn_lock(vp, flags | LK_INTERLOCK)) != 0) {
+	if ((flags & LK_VNHELD) == 0)
+		_vhold(vp, (flags & LK_INTERLOCK) != 0);
+
+	if ((error = vn_lock(vp, flags)) != 0) {
 		vdrop(vp);
 		CTR2(KTR_VFS, "%s: impossible to lock vnode %p", __func__,
 		    vp);
@@ -2181,22 +2174,33 @@ vget(struct vnode *vp, int flags, struct thread *td)
 	}
 	if (vp->v_iflag & VI_DOOMED && (flags & LK_RETRY) == 0)
 		panic("vget: vn_lock failed to return ENOENT\n");
-	VI_LOCK(vp);
-	/* Upgrade our holdcnt to a usecount. */
-	v_upgrade_usecount(vp);
 	/*
 	 * We don't guarantee that any particular close will
 	 * trigger inactive processing so just make a best effort
 	 * here at preventing a reference to a removed file.  If
 	 * we don't succeed no harm is done.
+	 *
+	 * Upgrade our holdcnt to a usecount.
 	 */
-	if (vp->v_iflag & VI_OWEINACT) {
-		if (VOP_ISLOCKED(vp) == LK_EXCLUSIVE &&
+	if (refcount_acquire_if_greater(&vp->v_usecount, 0)) {
+		v_incr_devcount(vp);
+		VNASSERT((vp->v_iflag & VI_OWEINACT) == 0, vp,
+		    ("vget: vnode with VI_OWEINACT and usecount"));
+	} else {
+		VI_LOCK(vp);
+		if ((vp->v_iflag & VI_OWEINACT) == 0) {
+			oweinact = 0;
+		} else {
+			oweinact = 1;
+			vp->v_iflag &= ~VI_OWEINACT;
+		}
+		refcount_acquire(&vp->v_usecount);
+		v_incr_devcount(vp);
+		if (oweinact && VOP_ISLOCKED(vp) == LK_EXCLUSIVE &&
 		    (flags & LK_NOWAIT) == 0)
 			vinactive(vp, td);
-		vp->v_iflag &= ~VI_OWEINACT;
+		VI_UNLOCK(vp);
 	}
-	VI_UNLOCK(vp);
 	return (0);
 }
 
@@ -2208,9 +2212,7 @@ vref(struct vnode *vp)
 {
 
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
-	VI_LOCK(vp);
 	v_incr_usecount(vp);
-	VI_UNLOCK(vp);
 }
 
 /*
@@ -2225,13 +2227,8 @@ vref(struct vnode *vp)
 int
 vrefcnt(struct vnode *vp)
 {
-	int usecnt;
-
-	VI_LOCK(vp);
-	usecnt = vp->v_usecount;
-	VI_UNLOCK(vp);
 
-	return (usecnt);
+	return (vp->v_usecount);
 }
 
 #define	VPUTX_VRELE	1
@@ -2250,33 +2247,42 @@ vputx(struct vnode *vp, int func)
 		ASSERT_VOP_LOCKED(vp, "vput");
 	else
 		KASSERT(func == VPUTX_VRELE, ("vputx: wrong func"));
+	ASSERT_VI_UNLOCKED(vp, __func__);
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
-	VI_LOCK(vp);
-
-	/* Skip this v_writecount check if we're going to panic below. */
-	VNASSERT(vp->v_writecount < vp->v_usecount || vp->v_usecount < 1, vp,
-	    ("vputx: missed vn_close"));
-	error = 0;
 
-	if (vp->v_usecount > 1 || ((vp->v_iflag & VI_DOINGINACT) &&
-	    vp->v_usecount == 1)) {
+	if (refcount_release_if_greater(&vp->v_usecount, 1)) {
 		if (func == VPUTX_VPUT)
 			VOP_UNLOCK(vp, 0);
-		v_decr_usecount(vp);
+		v_decr_devcount(vp);
+		vdrop(vp);
 		return;
 	}
 
-	if (vp->v_usecount != 1) {
-		vprint("vputx: negative ref count", vp);
-		panic("vputx: negative ref cnt");
-	}
-	CTR2(KTR_VFS, "%s: return vnode %p to the freelist", __func__, vp);
+	VI_LOCK(vp);
+
 	/*
 	 * We want to hold the vnode until the inactive finishes to
 	 * prevent vgone() races.  We drop the use count here and the
 	 * hold count below when we're done.
 	 */
-	v_decr_useonly(vp);
+	if (!refcount_release(&vp->v_usecount) || (vp->v_iflag & VI_DOINGINACT)) {
+		if (func == VPUTX_VPUT)
+			VOP_UNLOCK(vp, 0);
+		v_decr_devcount(vp);
+		vdropl(vp);
+		return;
+	}
+
+	v_decr_devcount(vp);
+
+	error = 0;
+
+	if (vp->v_usecount != 0) {
+		vprint("vputx: negative ref count", vp);
+		panic("vputx: negative ref cnt");
+	}
+	CTR2(KTR_VFS, "%s: return vnode %p to the freelist", __func__, vp);
+
 	/*
 	 * We must call VOP_INACTIVE with the node locked. Mark
 	 * as VI_DOINGINACT to avoid recursion.
@@ -2346,38 +2352,29 @@ vunref(struct vnode *vp)
 }
 
 /*
- * Somebody doesn't want the vnode recycled.
- */
-void
-vhold(struct vnode *vp)
-{
-
-	VI_LOCK(vp);
-	vholdl(vp);
-	VI_UNLOCK(vp);
-}
-
-/*
  * Increase the hold count and activate if this is the first reference.
  */
 void
-vholdl(struct vnode *vp)
+_vhold(struct vnode *vp, bool locked)
 {
 	struct mount *mp;
 
+	ASSERT_VI_LOCK(vp, __func__, locked);
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
-#ifdef INVARIANTS
-	/* getnewvnode() calls v_incr_usecount() without holding interlock. */
-	if (vp->v_type != VNON || vp->v_data != NULL) {
-		ASSERT_VI_LOCKED(vp, "vholdl");
-		VNASSERT(vp->v_holdcnt > 0 || (vp->v_iflag & VI_FREE) != 0,
-		    vp, ("vholdl: free vnode is held"));
+	if (refcount_acquire_if_greater(&vp->v_holdcnt, 0)) {
+		VNASSERT((vp->v_iflag & VI_FREE) == 0, vp,
+		    ("_vhold: vnode with holdcnt is free"));
+		return;
 	}
-#endif
-	vp->v_holdcnt++;
-	if ((vp->v_iflag & VI_FREE) == 0)
+	if (!locked)
+		VI_LOCK(vp);
+	if ((vp->v_iflag & VI_FREE) == 0) {
+		refcount_acquire(&vp->v_holdcnt);
+		if (!locked)
+			VI_UNLOCK(vp);
 		return;
-	VNASSERT(vp->v_holdcnt == 1, vp, ("vholdl: wrong hold count"));
+	}
+	VNASSERT(vp->v_holdcnt == 0, vp, ("vholdl: wrong hold count"));
 	VNASSERT(vp->v_op != NULL, vp, ("vholdl: vnode already reclaimed."));
 	/*
 	 * Remove a vnode from the free list, mark it as in use,
@@ -2394,18 +2391,9 @@ vholdl(struct vnode *vp)
 	TAILQ_INSERT_HEAD(&mp->mnt_activevnodelist, vp, v_actfreelist);
 	mp->mnt_activevnodelistsize++;
 	mtx_unlock(&vnode_free_list_mtx);
-}
-
-/*
- * Note that there is one less who cares about this vnode.
- * vdrop() is the opposite of vhold().
- */
-void
-vdrop(struct vnode *vp)
-{
-
-	VI_LOCK(vp);
-	vdropl(vp);
+	refcount_acquire(&vp->v_holdcnt);
+	if (!locked)
+		VI_UNLOCK(vp);
 }
 
 /*
@@ -2414,20 +2402,25 @@ vdrop(struct vnode *vp)
  * (marked VI_DOOMED) in which case we will free it.
  */
 void
-vdropl(struct vnode *vp)
+_vdrop(struct vnode *vp, bool locked)
 {
 	struct bufobj *bo;
 	struct mount *mp;
 	int active;
 
-	ASSERT_VI_LOCKED(vp, "vdropl");
+	ASSERT_VI_LOCK(vp, __func__, locked);
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
 	if (vp->v_holdcnt <= 0)
 		panic("vdrop: holdcnt %d", vp->v_holdcnt);
-	vp->v_holdcnt--;
-	VNASSERT(vp->v_holdcnt >= vp->v_usecount, vp,
-	    ("hold count less than use count"));
-	if (vp->v_holdcnt > 0) {
+	if (refcount_release_if_greater(&vp->v_holdcnt, 1)) {
+		if (locked)
+			VI_UNLOCK(vp);
+		return;
+	}
+
+	if (!locked)
+		VI_LOCK(vp);
+	if (refcount_release(&vp->v_holdcnt) == 0) {
 		VI_UNLOCK(vp);
 		return;
 	}
diff --git a/sys/sys/lockmgr.h b/sys/sys/lockmgr.h
index ff0473d..a74d5f5 100644
--- a/sys/sys/lockmgr.h
+++ b/sys/sys/lockmgr.h
@@ -159,6 +159,7 @@ _lockmgr_args_rw(struct lock *lk, u_int flags, struct rwlock *ilk,
 #define	LK_SLEEPFAIL	0x000800
 #define	LK_TIMELOCK	0x001000
 #define	LK_NODDLKTREAT	0x002000
+#define	LK_VNHELD	0x004000
 
 /*
  * Operations for lockmgr().
diff --git a/sys/sys/refcount.h b/sys/sys/refcount.h
index 4611664..b6ddb90 100644
--- a/sys/sys/refcount.h
+++ b/sys/sys/refcount.h
@@ -64,4 +64,64 @@ refcount_release(volatile u_int *count)
 	return (old == 1);
 }
 
+static __inline int
+refcount_acquire_if_greater(volatile u_int *count, int val)
+{
+	int old;
+
+	for (;;) {
+		old = *count;
+		if (old <= val)
+			return (0);
+		if (atomic_cmpset_int(count, old, old + 1))
+			return (1);
+	}
+}
+
+static __inline int
+refcount_release_if_greater(volatile u_int *count, int val)
+{
+	int old;
+
+	for (;;) {
+		old = *count;
+		if (old <= val)
+			return (0);
+		if (atomic_cmpset_int(count, old, old - 1))
+			return (1);
+	}
+}
+
+#define	_refcount_release_lock(count, lock, TYPE, LOCK_OP, UNLOCK_OP)	\
+({									\
+	TYPE *__lock;							\
+	volatile u_int *__cp;						\
+	int __ret;							\
+									\
+	__lock = (lock);						\
+	__cp = (count);							\
+									\
+	if (refcount_release_if_greater(__cp, 1)) {			\
+		__ret = 0;						\
+	} else {							\
+		LOCK_OP(__lock);					\
+		if (refcount_release(__cp)) {				\
+			__ret = 1;					\
+		} else {						\
+			UNLOCK_OP(__lock);				\
+			__ret = 0;					\
+		}							\
+	}								\
+	__ret;								\
+})
+
+#define	refcount_release_lock_mtx(count, lock) \
+	_refcount_release_lock(count, lock, struct mtx, mtx_lock, mtx_unlock)
+#define	refcount_release_lock_rmlock(count, lock) \
+	_refcount_release_lock(count, lock, struct rmlock, rm_wlock, rm_wunlock)
+#define	refcount_release_lock_rwlock(count, lock) \
+	_refcount_release_lock(count, lock, struct rwlock, rw_wlock, rw_wunlock)
+#define	refcount_release_lock_sx(count, lock) \
+	_refcount_release_lock(count, lock, struct sx, sx_xlock, sx_xunlock)
+
 #endif	/* ! __SYS_REFCOUNT_H__ */
diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h
index e1f912e..c695b52 100644
--- a/sys/sys/vnode.h
+++ b/sys/sys/vnode.h
@@ -547,6 +547,15 @@ void	assert_vop_unlocked(struct vnode *vp, const char *str);
 #define	ASSERT_VOP_UNLOCKED(vp, str)	((void)0)
 #endif /* DEBUG_VFS_LOCKS */
 
+static __inline void
+ASSERT_VI_LOCK(struct vnode *vp, const char *str, bool locked)
+{
+
+	if (locked)
+		ASSERT_VI_LOCKED(vp, str);
+	else
+		ASSERT_VI_UNLOCKED(vp, str);
+}
 
 /*
  * This call works for vnodes in the kernel.
@@ -649,13 +658,15 @@ int	vaccess_acl_posix1e(enum vtype type, uid_t file_uid,
 	    struct ucred *cred, int *privused);
 void	vattr_null(struct vattr *vap);
 int	vcount(struct vnode *vp);
-void	vdrop(struct vnode *);
-void	vdropl(struct vnode *);
+#define	vdrop(vp)	_vdrop((vp), 0)
+#define	vdropl(vp)	_vdrop((vp), 1)
+void	_vdrop(struct vnode *, bool);
 int	vflush(struct mount *mp, int rootrefs, int flags, struct thread *td);
 int	vget(struct vnode *vp, int lockflag, struct thread *td);
 void	vgone(struct vnode *vp);
-void	vhold(struct vnode *);
-void	vholdl(struct vnode *);
+#define	vhold(vp)	_vhold((vp), 0)
+#define	vholdl(vp)	_vhold((vp), 1)
+void	_vhold(struct vnode *, bool);
 void	vinactive(struct vnode *, struct thread *);
 int	vinvalbuf(struct vnode *vp, int save, int slpflag, int slptimeo);
 int	vtruncbuf(struct vnode *vp, struct ucred *cred, off_t length,

-- 
Mateusz Guzik <mjguzik gmail.com>


More information about the freebsd-fs mailing list