PERFORCE change 150582 for review

Nick Barkas snb at FreeBSD.org
Sat Sep 27 22:35:36 UTC 2008


http://perforce.freebsd.org/chv.cgi?CH=150582

Change 150582 by snb at snb_toro on 2008/09/27 22:35:21

	IFC

Affected files ...

.. //depot/projects/soc2008/snb-dirhash/sys-ufs-ufs/dirhash.h#5 integrate
.. //depot/projects/soc2008/snb-dirhash/sys-ufs-ufs/ufs_dirhash.c#11 integrate

Differences ...

==== //depot/projects/soc2008/snb-dirhash/sys-ufs-ufs/dirhash.h#5 (text+ko) ====

@@ -28,6 +28,9 @@
 #ifndef _UFS_UFS_DIRHASH_H_
 #define _UFS_UFS_DIRHASH_H_
 
+#include <sys/_lock.h>
+#include <sys/_sx.h>
+
 /*
  * For fast operations on large directories, we maintain a hash
  * that maps the file name to the offset of the directory entry within
@@ -80,7 +83,8 @@
     ((dh)->dh_hash[(slot) >> DH_BLKOFFSHIFT][(slot) & DH_BLKOFFMASK])
 
 struct dirhash {
-	struct lock dh_lock;	/* protects all fields except list & score */
+	struct sx dh_lock;	/* protects all fields except list & score */
+	int	dh_refcount;
 
 	doff_t	**dh_hash;	/* the hash array (2-level) */
 	int	dh_narrays;	/* number of entries in dh_hash */

==== //depot/projects/soc2008/snb-dirhash/sys-ufs-ufs/ufs_dirhash.c#11 (text+ko) ====

@@ -38,7 +38,6 @@
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
-#include <sys/lockmgr.h>
 #include <sys/mutex.h>
 #include <sys/malloc.h>
 #include <sys/fnv_hash.h>
@@ -47,7 +46,9 @@
 #include <sys/buf.h>
 #include <sys/vnode.h>
 #include <sys/mount.h>
+#include <sys/refcount.h>
 #include <sys/sysctl.h>
+#include <sys/sx.h>
 #include <sys/eventhandler.h>
 #include <sys/time.h>
 #include <vm/uma.h>
@@ -109,7 +110,7 @@
 #define DIRHASH_BLKALLOC_WAITOK() 	uma_zalloc(ufsdirhash_zone, M_WAITOK)
 #define DIRHASH_BLKFREE(ptr) 		uma_zfree(ufsdirhash_zone, (ptr))
 #define	DIRHASH_ASSERT_LOCKED(dh)					\
-    lockmgr_assert(&(dh)->dh_lock, KA_LOCKED)
+    sx_assert(&(dh)->dh_lock, SA_LOCKED)
 
 /* Dirhash list; recently-used entries are near the tail. */
 static TAILQ_HEAD(, dirhash) ufsdirhash_list;
@@ -123,7 +124,12 @@
  *
  * The relationship between inode and dirhash is protected either by an
  * exclusive vnode lock or the vnode interlock where a shared vnode lock
- * may be used.  The dirhash_mtx is acquired after the dirhash lock.
+ * may be used.  The dirhash_mtx is acquired after the dirhash lock.  To
+ * handle teardown races, code wishing to lock the dirhash for an inode
+ * when using a shared vnode lock must obtain a private reference on the
+ * dirhash while holding the vnode interlock.  They can drop it once they
+ * have obtained the dirhash lock and verified that the dirhash wasn't
+ * recycled while they waited for the dirhash lock.
  *
  * ufsdirhash_build() acquires a shared lock on the dirhash when it is
  * successful.  This lock is released after a call to ufsdirhash_lookup().
@@ -134,6 +140,23 @@
  * The dirhash lock may be held across io operations.
  */
 
+static void
+ufsdirhash_hold(struct dirhash *dh)
+{
+
+	refcount_acquire(&dh->dh_refcount);
+}
+
+static void
+ufsdirhash_drop(struct dirhash *dh)
+{
+
+	if (refcount_release(&dh->dh_refcount)) {
+		sx_destroy(&dh->dh_lock);
+		free(dh, M_DIRHASH);
+	}
+}
+
 /*
  * Release the lock on a dirhash.
  */
@@ -141,7 +164,7 @@
 ufsdirhash_release(struct dirhash *dh)
 {
 
-	lockmgr(&dh->dh_lock, LK_RELEASE, 0);
+	sx_unlock(&dh->dh_lock);
 }
 
 /*
@@ -169,8 +192,9 @@
 			    M_NOWAIT | M_ZERO);
 			if (ndh == NULL)
 				return (NULL);
-			lockinit(&ndh->dh_lock, PRIBIO, "dirhash", 0, 0);
-			lockmgr(&ndh->dh_lock, LK_EXCLUSIVE, NULL);
+			refcount_init(&ndh->dh_refcount, 1);
+			sx_init(&ndh->dh_lock, "dirhash");
+			sx_xlock(&ndh->dh_lock);
 		}
 		/*
 		 * Check i_dirhash.  If it's NULL just try to use a
@@ -185,31 +209,39 @@
 				continue;
 			return (ndh);
 		}
-		/* Try to acquire shared on existing hashes. */
-		if (lockmgr(&dh->dh_lock, LK_SHARED | LK_INTERLOCK,
-		    VI_MTX(vp)))
-			continue;
+		ufsdirhash_hold(dh);
+		VI_UNLOCK(vp);
+
+		/* Acquire a shared lock on existing hashes. */
+		sx_slock(&dh->dh_lock);
+
 		/* The hash could've been recycled while we were waiting. */
+		VI_LOCK(vp);
 		if (ip->i_dirhash != dh) {
+			VI_UNLOCK(vp);
 			ufsdirhash_release(dh);
+			ufsdirhash_drop(dh);
 			continue;
 		}
+		VI_UNLOCK(vp);
+		ufsdirhash_drop(dh);
+
 		/* If the hash is still valid we've succeeded. */
 		if (dh->dh_hash != NULL)
 			break;
 		/*
 		 * If the hash is NULL it has been recycled.  Try to upgrade
-		 * so we can recreate it.  If we fail the upgrade another
-		 * thread must've already exclusively locked it.
+		 * so we can recreate it.  If we fail the upgrade, drop our
+		 * lock and try again.
 		 */
-		if (lockmgr(&dh->dh_lock, LK_UPGRADE | LK_SLEEPFAIL, NULL) == 0)
+		if (sx_try_upgrade(&dh->dh_lock))
 			break;
+		sx_sunlock(&dh->dh_lock);
 	}
 	/* Free the preallocated structure if it was not necessary. */
 	if (ndh) {
-		lockmgr(&ndh->dh_lock, LK_RELEASE, NULL);
-		lockdestroy(&ndh->dh_lock);
-		FREE(ndh, M_DIRHASH);
+		ufsdirhash_release(ndh);
+		ufsdirhash_drop(ndh);
 	}
 	return (dh);
 }
@@ -231,7 +263,7 @@
 	dh = ip->i_dirhash;
 	if (dh == NULL)
 		return (NULL);
-	lockmgr(&dh->dh_lock, LK_EXCLUSIVE, 0);
+	sx_xlock(&dh->dh_lock);
 	if (dh->dh_hash != NULL)
 		return (dh);
 	ufsdirhash_free_locked(ip);
@@ -250,18 +282,34 @@
 
 	vp = ip->i_vnode;
 	for (;;) {
+		/* Grab a reference on this inode's dirhash if it has one. */
 		VI_LOCK(vp);
 		dh = ip->i_dirhash;
 		if (dh == NULL) {
 			VI_UNLOCK(vp);
 			return;
 		}
-		if (lockmgr(&dh->dh_lock, LK_EXCLUSIVE | LK_INTERLOCK,
-		    VI_MTX(vp)))
-			continue;
-		if (ip->i_dirhash == dh)
+		ufsdirhash_hold(dh);
+		VI_UNLOCK(vp);
+
+		/* Exclusively lock the dirhash. */
+		sx_xlock(&dh->dh_lock);
+
+		/* If this dirhash still belongs to this inode, then free it. */
+		VI_LOCK(vp);
+		if (ip->i_dirhash == dh) {
+			VI_UNLOCK(vp);
+			ufsdirhash_drop(dh);
 			break;
+		}
+		VI_UNLOCK(vp);
+
+		/*
+		 * This inode's dirhash has changed while we were
+		 * waiting for the dirhash lock, so try again.
+		 */
 		ufsdirhash_release(dh);
+		ufsdirhash_drop(dh);
 	}
 	ufsdirhash_free_locked(ip);
 }
@@ -395,7 +443,7 @@
 	TAILQ_INSERT_TAIL(&ufsdirhash_list, dh, dh_list);
 	dh->dh_onlist = 1;
 	DIRHASHLIST_UNLOCK();
-	lockmgr(&dh->dh_lock, LK_DOWNGRADE, 0);
+	sx_downgrade(&dh->dh_lock);
 	return (0);
 
 fail:
@@ -414,6 +462,7 @@
 	int i;
 
 	DIRHASH_ASSERT_LOCKED(ip->i_dirhash);
+
 	/*
 	 * Clear the pointer in the inode to prevent new threads from
 	 * finding the dead structure.
@@ -423,12 +472,25 @@
 	dh = ip->i_dirhash;
 	ip->i_dirhash = NULL;
 	VI_UNLOCK(vp);
+
+	/*
+	 * Remove the hash from the list since we are going to free its
+	 * memory.
+	 */
+	DIRHASHLIST_LOCK();
+	if (dh->dh_onlist)
+		TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list);
+	ufs_dirhashmem -= dh->dh_memreq;
+	DIRHASHLIST_UNLOCK();
+
 	/*
-	 * Drain waiters.  They will abort when they see that ip->i_dirhash
-	 * is NULL after locking.
+	 * At this point, any waiters for the lock should hold their
+	 * own reference on the dirhash structure.  They will drop
+	 * that reference once they grab the vnode interlock and see
+	 * that ip->i_dirhash is NULL.
 	 */
-	lockmgr(&dh->dh_lock, LK_RELEASE, 0);
-	lockmgr(&dh->dh_lock, LK_DRAIN, 0);
+	sx_xunlock(&dh->dh_lock);
+
 	/*
 	 * Handle partially recycled as well as fully constructed hashes.
 	 */
@@ -440,19 +502,11 @@
 		if (dh->dh_blkfree != NULL)
 			FREE(dh->dh_blkfree, M_DIRHASH);
 	}
-	DIRHASHLIST_LOCK();
-	if (dh->dh_onlist)
-		TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list);
-	ufs_dirhashmem -= dh->dh_memreq;
-	DIRHASHLIST_UNLOCK();
+
 	/*
-	 * Release the lock and reclaim datastructure memory.
+	 * Drop the inode's reference to the data structure.
 	 */
-	lockmgr(&dh->dh_lock, LK_RELEASE, 0);
-	lockdestroy(&dh->dh_lock);
-	FREE(dh, M_DIRHASH);
-
-	return;
+	ufsdirhash_drop(dh);
 }
 
 /*
@@ -1155,7 +1209,7 @@
 		 * If we can't lock it it's in use and we don't want to
 		 * recycle it anyway.
 		 */
-		if (lockmgr(&dh->dh_lock, LK_EXCLUSIVE | LK_NOWAIT, NULL)) {
+		if (!sx_try_xlock(&dh->dh_lock)) {
 			dh = TAILQ_NEXT(dh, dh_list);
 			continue;
 		}
@@ -1189,13 +1243,13 @@
 	 */
 	for (dh = TAILQ_FIRST(&ufsdirhash_list); dh != NULL; dh = 
 	     TAILQ_NEXT(dh, dh_list)) {
-		if (lockmgr(&dh->dh_lock, LK_EXCLUSIVE | LK_NOWAIT, NULL))
+		if (!sx_try_xlock(&dh->dh_lock))
 			continue;
 		if (time_second - dh->dh_lastused > ufs_dirhashreclaimage)
 			memfreed += ufsdirhash_destroy(dh);
 		/* Unlock if we didn't delete the dirhash */
 		else
-			lockmgr(&dh->dh_lock, LK_RELEASE, 0);
+			ufsdirhash_release(dh);
 	}
 	
 	/* 
@@ -1205,7 +1259,7 @@
 	 */
 	for (dh = TAILQ_FIRST(&ufsdirhash_list); memfreed < memwanted &&
 	     dh !=NULL; dh = TAILQ_NEXT(dh, dh_list)) {
-		if (lockmgr(&dh->dh_lock, LK_EXCLUSIVE | LK_NOWAIT, NULL))
+		if (!sx_try_xlock(&dh->dh_lock))
 			continue;
 		memfreed += ufsdirhash_destroy(dh);
 	}


More information about the p4-projects mailing list