PERFORCE change 180811 for review

Gleb Kurtsou gk at FreeBSD.org
Mon Jul 12 12:11:37 UTC 2010


http://p4web.freebsd.org/@@180811?ac=10

Change 180811 by gk at gk_h1 on 2010/07/12 12:10:54

	Add pool task queue used to clear unused and invalid lists
	Allow unused list to grow over max size, shrink it to max size later
	Add unused and invalid lists clear statistics
	Add per-mount inode list of dircache entries
	Add NO_DIRCACHE option for performance testing

Affected files ...

.. //depot/projects/soc2010/gk_namecache/sys/fs/tmpfs/tmpfs_subr.c#4 edit
.. //depot/projects/soc2010/gk_namecache/sys/fs/tmpfs/tmpfs_vfsops.c#3 edit
.. //depot/projects/soc2010/gk_namecache/sys/fs/tmpfs/tmpfs_vnops.c#4 edit
.. //depot/projects/soc2010/gk_namecache/sys/kern/vfs_dircache.c#5 edit
.. //depot/projects/soc2010/gk_namecache/sys/modules/tmpfs/Makefile#3 edit
.. //depot/projects/soc2010/gk_namecache/sys/sys/dircache.h#5 edit
.. //depot/projects/soc2010/gk_namecache/sys/sys/mount.h#3 edit

Differences ...

==== //depot/projects/soc2010/gk_namecache/sys/fs/tmpfs/tmpfs_subr.c#4 (text+ko) ====

@@ -401,8 +401,10 @@
 	error = insmntque(vp, mp);
 	if (error)
 		vp = NULL;
+#ifndef NO_DIRCACHE
 	else
 		dircache_allocvnode(vp, node->tn_id);
+#endif
 
 unlock:
 	TMPFS_NODE_LOCK(node);
@@ -522,7 +524,9 @@
 	 * insert the new node into the directory, an operation that
 	 * cannot fail. */
 	tmpfs_dir_attach(dvp, de);
+#ifndef NO_DIRCACHE
 	dircache_add(dvp, *vpp, cnp, DT_STRONG, node->tn_id);
+#endif
 
 out:
 

==== //depot/projects/soc2010/gk_namecache/sys/fs/tmpfs/tmpfs_vfsops.c#3 (text+ko) ====

@@ -254,7 +254,9 @@
 	vfs_getnewfsid(mp);
 	vfs_mountedfrom(mp, "tmpfs");
 
+#ifndef NO_DIRCACHE
 	dircache_init(mp, root->tn_id);
+#endif
 
 	return 0;
 }
@@ -325,7 +327,9 @@
 	mp->mnt_flag &= ~MNT_LOCAL;
 	MNT_IUNLOCK(mp);
 
+#ifndef NO_DIRCACHE
 	dircache_uninit(mp);
+#endif
 
 	return 0;
 }

==== //depot/projects/soc2010/gk_namecache/sys/fs/tmpfs/tmpfs_vnops.c#4 (text+ko) ====

@@ -185,8 +185,11 @@
 	 * request was for creation, as it does not improve timings on
 	 * emprical tests. */
 	if ((cnp->cn_flags & MAKEENTRY) && cnp->cn_nameiop != CREATE) {
+#ifndef NO_DIRCACHE
 		dircache_enter(dvp, *vpp, cnp);
+#else
 		cache_enter(dvp, *vpp, cnp);
+#endif
 	}
 
 out:
@@ -841,7 +844,9 @@
 	/* Remove the entry from the directory; as it is a file, we do not
 	 * have to change the number of hard links of the directory. */
 	tmpfs_dir_detach(dvp, de);
+#ifndef NO_DIRCACHE
 	dircache_remove(dvp, vp, v->a_cnp);
+#endif
 
 	/* Free the directory entry we just deleted.  Note that the node
 	 * referred by it will not be removed until the vnode is really
@@ -913,7 +918,9 @@
 
 	/* Insert the new directory entry into the appropriate directory. */
 	tmpfs_dir_attach(dvp, de);
+#ifndef NO_DIRCACHE
 	dircache_add(dvp, vp, cnp, DT_STRONG, node->tn_id);
+#endif
 
 	/* vp link count has changed, so update node times. */
 	node->tn_status |= TMPFS_NODE_CHANGED;
@@ -1138,7 +1145,9 @@
 		tmpfs_free_dirent(VFS_TO_TMPFS(tvp->v_mount), de, TRUE);
 	}
 
+#ifndef NO_DIRCACHE
 	dircache_rename(fdvp, fcnp, tdvp, tcnp);
+#endif
 
 	error = 0;
 
@@ -1234,7 +1243,9 @@
 
 	/* Detach the directory entry from the directory (dnode). */
 	tmpfs_dir_detach(dvp, de);
+#ifndef NO_DIRCACHE
 	dircache_remove(dvp, vp, v->a_cnp);
+#endif
 
 	/* No vnode should be allocated for this entry from this point */
 	TMPFS_NODE_LOCK(node);
@@ -1457,7 +1468,9 @@
 
 	MPASS(vp->v_data == NULL);
 
+#ifndef NO_DIRCACHE
 	dircache_reclaimvnode(vp);
+#endif
 
 	return 0;
 }
@@ -1563,7 +1576,11 @@
  */
 struct vop_vector tmpfs_vnodeop_entries = {
 	.vop_default =			&default_vnodeops,
+#ifndef NO_DIRCACHE
 	.vop_lookup =			vfs_dircache_lookup,
+#else
+	.vop_lookup =			vfs_cache_lookup,
+#endif
 	.vop_cachedlookup =		tmpfs_lookup,
 	.vop_create =			tmpfs_create,
 	.vop_mknod =			tmpfs_mknod,

==== //depot/projects/soc2010/gk_namecache/sys/kern/vfs_dircache.c#5 (text+ko) ====

@@ -45,8 +45,8 @@
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 #include <sys/taskqueue.h>
+#include <sys/tree.h>
 #include <sys/uio.h>
-#include <sys/tree.h>
 #include <sys/vnode.h>
 
 #include <sys/dircache.h>
@@ -55,29 +55,65 @@
 
 #define	DC_OP_VLOCK			0x00000001
 
+#define DP_UNUSED_MIN			512
+#define DP_THRESHOLD_DFLT		256
+
 #define	DCDEBUG(format, args...)					\
 	do {								\
 		if (dircache_debug != 0)				\
 			printf(format ,## args);			\
 	} while (0)
 
-#define DC_STAT_DEFINE(name, descr)					\
-static void __CONCAT(name, _add_proc) (void *dummy __unused)		\
-{									\
-	SYSCTL_ADD_PROC(NULL,						\
-	SYSCTL_STATIC_CHILDREN(_vfs_dircache_stats), OID_AUTO,		\
-	#name, CTLTYPE_LONG | CTLFLAG_RD | CTLFLAG_MPSAFE,		\
-	&DPCPU_NAME(dc_stats[__CONCAT(ds_, name)]), 0,			\
-	sysctl_dpcpu_long, "LU", descr);				\
-}									\
-SYSINIT(name, SI_SUB_VFS, SI_ORDER_SECOND, __CONCAT(name, _add_proc), NULL);
+struct dircache_pool {
+	struct mtx dp_mtx;
+	TAILQ_HEAD(, dircache) dp_unused;
+	TAILQ_HEAD(, dircache) dp_invalid;
+	struct task dp_task;
+	u_long dp_unused_cnt;
+	u_long dp_unused_limit;
+	u_long dp_unused_threshold;
+	u_long dp_unused_max;
+	u_long dp_invalid_cnt;
+	u_long dp_invalid_threshold;
+	u_long dp_invalid_limit;
+};
+
+struct dircache_mount {
+	struct mtx dm_mtx;
+	LIST_HEAD(, dircache) dm_inodehead;
+	struct dircache *dm_entry;
+};
+
+static void dp_unused_insert(struct dircache *dc);
+static void dp_unused_remove(struct dircache *dc);
+static void dp_unused_lazyclear(void);
+static void dp_invalid_lazyclear(void);
+
+static int dircache_debug = 1;
+static struct dircache_pool pool;
+static struct taskqueue *dc_tq;
+static task_fn_t dp_taskfunc;
+
+static MALLOC_DEFINE(M_DIRCACHE, "dircache buf", "dircache buffers");
+static uma_zone_t dircache_zone;
+
+static SYSCTL_NODE(_vfs, OID_AUTO, dircache, CTLFLAG_RW, 0, "Dircache");
+static SYSCTL_NODE(_vfs_dircache, OID_AUTO, stats, CTLFLAG_RD, 0,
+    "Dircache stats");
+
+SYSCTL_INT(_vfs_dircache, OID_AUTO, debug, CTLFLAG_RW, &dircache_debug, 0,
+    "Enable debug");
+SYSCTL_ULONG(_vfs_dircache, OID_AUTO, invalid_threshold, CTLFLAG_RD,
+    &pool.dp_invalid_threshold, 0, "Invalid entries threshold");
+SYSCTL_ULONG(_vfs_dircache, OID_AUTO, unused_max, CTLFLAG_RD,
+    &pool.dp_unused_max, 0, "Maximum number of unused entries");
+SYSCTL_ULONG(_vfs_dircache, OID_AUTO, unused_threshold, CTLFLAG_RD,
+    &pool.dp_unused_threshold, 0, "Unused entries threshold");
 
-#define DC_STAT_INC(ind)						\
-	do {								\
-		sched_pin();						\
-		DPCPU_GET(dc_stats[(ind)])++;				\
-		sched_unpin();						\
-	} while (0)
+SYSCTL_ULONG(_vfs_dircache_stats, OID_AUTO, invalid, CTLFLAG_RD,
+    &pool.dp_invalid_cnt, 0, "Invalid entries");
+SYSCTL_ULONG(_vfs_dircache_stats, OID_AUTO, unused, CTLFLAG_RD,
+    &pool.dp_unused_cnt, 0, "Unused entries");
 
 enum {
 	ds_hit,
@@ -102,35 +138,32 @@
 	ds_vinterlock_restart,
 	ds_lookup_restart,
 	ds_insert_restart,
+	ds_clearunused,
+	ds_clearunused_restart,
+	ds_clearinvalid,
 	ds_count
 };
 
-struct dircache_root {
-	struct mount *dr_mnt;
-	struct dircache *dr_entry;
-};
+static DPCPU_DEFINE(long, dc_stats[ds_count]);
 
-struct dircache_pool {
-	struct mtx dp_mtx;
-	TAILQ_HEAD(, dircache) dp_unused;
-	TAILQ_HEAD(, dircache) dp_invalid;
-	u_long dp_unusedcnt;
-	u_long dp_invalidcnt;
-};
+#define DC_STAT_DEFINE(name, descr)					\
+static void __CONCAT(name, _add_proc) (void *dummy __unused)		\
+{									\
+	SYSCTL_ADD_PROC(NULL,						\
+	SYSCTL_STATIC_CHILDREN(_vfs_dircache_stats), OID_AUTO,		\
+	#name, CTLTYPE_LONG | CTLFLAG_RD | CTLFLAG_MPSAFE,		\
+	&DPCPU_NAME(dc_stats[__CONCAT(ds_, name)]), 0,			\
+	sysctl_dpcpu_long, "LU", descr);				\
+}									\
+SYSINIT(name, SI_SUB_VFS, SI_ORDER_SECOND, __CONCAT(name, _add_proc), NULL);
 
-static struct dircache_pool pool;
-
-static SYSCTL_NODE(_vfs, OID_AUTO, dircache, CTLFLAG_RW, 0, "Dircache");
-static SYSCTL_NODE(_vfs_dircache, OID_AUTO, stats, CTLFLAG_RD, 0,
-    "Dircache stats");
-static MALLOC_DEFINE(M_DIRCACHE, "dircache buf", "dircache buffers");
-static uma_zone_t dircache_zone;
-
-static int dircache_debug = 1;
-SYSCTL_UINT(_vfs_dircache, OID_AUTO, debug, CTLFLAG_RW, &dircache_debug, 0,
-    "Enable debug");
+#define DC_STAT_INC(ind)						\
+	do {								\
+		sched_pin();						\
+		DPCPU_GET(dc_stats[(ind)])++;				\
+		sched_unpin();						\
+	} while (0)
 
-static DPCPU_DEFINE(long, dc_stats[ds_count]);
 DC_STAT_DEFINE(hit, "");
 DC_STAT_DEFINE(hit_negative, "");
 DC_STAT_DEFINE(miss, "");
@@ -153,24 +186,33 @@
 DC_STAT_DEFINE(vinterlock_restart, "vnode interlock restarts");
 DC_STAT_DEFINE(lookup_restart, "lookup restarts");
 DC_STAT_DEFINE(insert_restart, "insert restarts");
-
-SYSCTL_UINT(_vfs_dircache_stats, OID_AUTO, invalid, CTLFLAG_RD,
-    &pool.dp_invalidcnt, 0, "Invalid entries");
-SYSCTL_UINT(_vfs_dircache_stats, OID_AUTO, unused, CTLFLAG_RD,
-    &pool.dp_unusedcnt, 0, "Unused entries");
+DC_STAT_DEFINE(clearunused, "");
+DC_STAT_DEFINE(clearunused_restart, "");
+DC_STAT_DEFINE(clearinvalid, "");
 
-static void dp_insertunused(struct dircache *dc);
-static void dp_removeunused(struct dircache *dc);
-
 static void
 dircache_sysinit(void *arg __unused)
 {
 	mtx_init(&pool.dp_mtx, "dircache pool", NULL, MTX_DEF | MTX_DUPOK);
 	TAILQ_INIT(&pool.dp_unused);
 	TAILQ_INIT(&pool.dp_invalid);
+	pool.dp_unused_max = MAX(DP_UNUSED_MIN, desiredvnodes / 2);
+	pool.dp_unused_threshold = MAX(DP_THRESHOLD_DFLT,
+	    pool.dp_unused_max / 50);
+	pool.dp_unused_limit = pool.dp_unused_max + pool.dp_unused_threshold;
+	pool.dp_invalid_threshold = MIN(DP_THRESHOLD_DFLT,
+	    pool.dp_unused_max / 100);
+	pool.dp_invalid_limit = pool.dp_invalid_threshold;
+	TASK_INIT(&pool.dp_task, 0, dp_taskfunc, &pool);
+
 	dircache_zone = uma_zcreate("dircache",
 	    sizeof(struct dircache), NULL, NULL, NULL, NULL,
             UMA_ALIGN_PTR, 0);
+
+	dc_tq = taskqueue_create("dircache tq", M_WAITOK,
+	    taskqueue_thread_enqueue, &dc_tq);
+	taskqueue_start_threads(&dc_tq, 1, PWAIT, "dircache taskq");
+
 }
 SYSINIT(dircache, SI_SUB_VFS, SI_ORDER_SECOND, dircache_sysinit, NULL);
 
@@ -184,6 +226,16 @@
 }
 SYSUNINIT(dircache, SI_SUB_VFS, SI_ORDER_SECOND, dircache_sysuninit, NULL);
 
+#define dm_lock(dm)		mtx_lock(&(dm)->dm_mtx)
+
+#define dm_unlock(dm)		mtx_unlock(&(dm)->dm_mtx)
+
+static __inline struct dircache_mount *
+dm_get(struct vnode *vp)
+{
+	return (vp->v_mount->mnt_dircache);
+}
+
 static __inline int
 dc_cmpname(struct dircache *dc, char *name, u_int namelen)
 {
@@ -213,8 +265,6 @@
 
 RB_GENERATE_STATIC(dircache_tree, dircache, dc_tree, dc_cmp);
 
-#define DC_MTX(dc)		(&(dc)->dc_pool->dp_mtx)
-
 #define dc_lock(dc)		mtx_lock(&(dc)->dc_mtx)
 
 #define dc_trylock(dc)		mtx_trylock(&(dc)->dc_mtx)
@@ -284,13 +334,15 @@
 }
 
 static struct dircache *
-dc_alloc(enum dircache_type type, char *name, u_int namelen)
+dc_alloc(struct dircache_mount *dm, enum dircache_type type,
+    char *name, u_int namelen)
 {
 	struct dircache *dc;
 
 	dc = uma_zalloc(dircache_zone, M_WAITOK | M_ZERO);
 	DCDEBUG("alloc: %p %s\n", dc, name);
 
+	dc->dc_mount = dm;
 	dc->dc_type = type;
 	refcount_init(&dc->dc_holdcnt, 1);
 	mtx_init(&dc->dc_mtx, "dircache entry", NULL, MTX_DEF | MTX_DUPOK);
@@ -372,7 +424,7 @@
 	dc_hold(dc);
 	if (dc->dc_usecnt == 0) {
 		MPASS(dc->dc_vnode == NULL);
-		dp_removeunused(dc);
+		dp_unused_remove(dc);
 	}
 	dc->dc_usecnt++;
 	DCDEBUG("ref: %p usecnt=%d holdcnt=%d\n", dc, dc->dc_usecnt,
@@ -400,7 +452,7 @@
 	}
 
 	MPASS(dc->dc_vnode == NULL);
-	dp_insertunused(dc);
+	dp_unused_insert(dc);
 	dc_droplocked(dc);
 
 	return (1);
@@ -432,8 +484,9 @@
 		mtx_lock(&pool.dp_mtx);
 		TAILQ_REMOVE(&pool.dp_unused, dc, dc_list);
 		TAILQ_INSERT_HEAD(&pool.dp_invalid, dc, dc_list);
-		pool.dp_unusedcnt--;
-		pool.dp_invalidcnt++;
+		pool.dp_unused_cnt--;
+		pool.dp_invalid_cnt++;
+		dp_invalid_lazyclear();
 		mtx_unlock(&pool.dp_mtx);
 	}
 }
@@ -473,6 +526,38 @@
 	dc_rele(dc);
 }
 
+static void
+dc_setinode(struct dircache *dc, ino_t inode)
+{
+	struct dircache_mount *dm = dc->dc_mount;
+
+	dc_assertlock(dc, MA_OWNED);
+	MPASS(inode != 0);
+	MPASS(dm != NULL);
+	MPASS(dc->dc_inode == 0);
+
+	dm_lock(dm);
+	LIST_INSERT_HEAD(&dm->dm_inodehead, dc, dc_inodelist);
+	dm_unlock(dm);
+	dc->dc_inode = inode;
+}
+
+static void
+dc_zeroinode(struct dircache *dc)
+{
+	struct dircache_mount *dm = dc->dc_mount;
+
+	dc_assertlock(dc, MA_OWNED);
+	MPASS(dm != NULL);
+
+	if (dc->dc_inode == 0)
+		return;
+	dm_lock(dm);
+	LIST_REMOVE(dc, dc_inodelist);
+	dm_unlock(dm);
+	dc->dc_inode = 0;
+}
+
 static int
 dc_vinterlock(struct vnode *vp, struct dircache *dc)
 {
@@ -518,7 +603,7 @@
 	dc = TAILQ_FIRST(&vp->v_dircache);
 	if (dc == NULL) {
 		if ((vp->v_vflag & VV_ROOT) != 0) {
-			dc = vp->v_mount->mnt_dircache;
+			dc = dm_get(vp)->dm_entry;
 			VI_UNLOCK(vp);
 			DCDEBUG("getentry: root %p vp=%p\n", dc, vp);
 			MPASS(dc != NULL);
@@ -646,8 +731,8 @@
 }
 
 static struct dircache *
-dc_insert(struct dircache *pdc, struct dircache *dc, struct vnode *vp,
-    ino_t inode)
+dc_insert(struct dircache *pdc, struct dircache *dc,
+    struct vnode *vp, ino_t inode)
 {
 	struct dircache *col;
 
@@ -683,7 +768,7 @@
 				dc_lock(pdc);
 			}
 			col->dc_type = dc->dc_type;
-			col->dc_inode = inode;
+			dc_setinode(col, inode);
 			dc_unlock(pdc);
 			dc_drop(dc);
 			dc = col;
@@ -691,7 +776,7 @@
 			panic("dircache: insert: ivalid entry: %d %s\n",
 			    dc->dc_type, dc->dc_name);
 	} else {
-		dp_insertunused(dc);
+		dp_unused_insert(dc);
 		dc->dc_parent = pdc;
 		dc_ref(pdc);
 		dc_hold(dc);
@@ -729,7 +814,9 @@
 	parent = dc->dc_parent;
 	dc->dc_parent = NULL;
 	RB_REMOVE(dircache_tree, &parent->dc_children, dc);
-	dc_invalidate(dc);
+	if (dc->dc_type != DT_INVALID);
+		dc_invalidate(dc);
+	dc_zeroinode(dc);
 	haschildren = !RB_EMPTY(&dc->dc_children);
 	dc_rele(parent);
 	if (dc->dc_vnode != NULL) {
@@ -757,7 +844,7 @@
 	dc_updategen(dc->dc_parent);
 	dc->dc_type = DT_NEGATIVE;
 	dc_unlock(dc->dc_parent);
-	dc->dc_inode = 0;
+	dc_zeroinode(dc);
 	haschildren = !RB_EMPTY(&dc->dc_children);
 	if (haschildren != 0)
 		dc_hold(dc);
@@ -793,44 +880,97 @@
 }
 
 static void
-dp_insertunused(struct dircache *dc)
+dp_unused_insert(struct dircache *dc)
 {
 	dc_assertlock(dc, MA_OWNED);
 
 	mtx_lock(&pool.dp_mtx);
 	if (dc->dc_type == DT_INVALID) {
 		TAILQ_INSERT_HEAD(&pool.dp_invalid, dc, dc_list);
-		pool.dp_invalidcnt++;
+		pool.dp_invalid_cnt++;
 		DCDEBUG("insert unused: %p -> invalid list: type=%d\n",
 		    dc, dc->dc_type);
+		dp_invalid_lazyclear();
 	} else {
 		TAILQ_INSERT_TAIL(&pool.dp_unused, dc, dc_list);
-		pool.dp_unusedcnt++;
+		pool.dp_unused_cnt++;
 		DCDEBUG("insert unused: %p -> unused list: type=%d\n",
 		    dc, dc->dc_type);
+		dp_unused_lazyclear();
 	}
 	mtx_unlock(&pool.dp_mtx);
 }
 
 static void
-dp_removeunused(struct dircache *dc)
+dp_unused_remove(struct dircache *dc)
 {
+	dc_assertlock(dc, MA_OWNED);
+
 	mtx_lock(&pool.dp_mtx);
 	TAILQ_REMOVE(&pool.dp_unused, dc, dc_list);
-	pool.dp_unusedcnt--;
+	pool.dp_unused_cnt--;
 	mtx_unlock(&pool.dp_mtx);
 }
 
 static void
-dp_clearinvalid(void)
+dp_unused_clear(void)
+{
+	struct dircache *dc;
+	u_long shift = 0;
+
+	DC_STAT_INC(ds_clearunused);
+	dc = TAILQ_FIRST(&pool.dp_unused);
+	while (dc != NULL && pool.dp_unused_cnt > pool.dp_unused_max) {
+		if (dc_trylock(dc) != 0) {
+			dc = TAILQ_NEXT(dc, dc_list);
+			shift++;
+			if (shift >= pool.dp_unused_threshold)
+				break;
+			DC_STAT_INC(ds_clearunused_restart);
+			continue;
+		}
+		if (dc->dc_parent != NULL) {
+			if (dc_trylock(dc->dc_parent) != 0) {
+				dc_unlock(dc);
+				dc = TAILQ_NEXT(dc, dc_list);
+				shift++;
+				if (shift >= pool.dp_unused_threshold)
+					break;
+				DC_STAT_INC(ds_clearunused_restart);
+				continue;
+			}
+		}
+		mtx_unlock(&pool.dp_mtx);
+		MPASS(RB_EMPTY(&dc->dc_children));
+		dc_removeentry(dc);
+		mtx_lock(&pool.dp_mtx);
+		dc = TAILQ_FIRST(&pool.dp_unused);
+		shift = 0;
+	}
+	pool.dp_unused_limit = pool.dp_unused_cnt + pool.dp_unused_threshold;
+}
+
+static __inline void
+dp_unused_lazyclear(void)
+{
+	if (pool.dp_unused_cnt < pool.dp_unused_limit)
+		return;
+	pool.dp_unused_limit += pool.dp_unused_threshold;
+	taskqueue_enqueue(dc_tq, &pool.dp_task);
+}
+
+static void
+dp_invalid_clear(void)
 {
 	struct dircache *dc;
 
-	mtx_lock(&pool.dp_mtx);
+	mtx_assert(&pool.dp_mtx, MA_OWNED);
+
+	DC_STAT_INC(ds_clearinvalid);
 	while (!TAILQ_EMPTY(&pool.dp_invalid)) {
 		dc = TAILQ_FIRST(&pool.dp_invalid);
 		TAILQ_REMOVE(&pool.dp_invalid, dc, dc_list);
-		pool.dp_invalidcnt--;
+		pool.dp_invalid_cnt--;
 		mtx_unlock(&pool.dp_mtx);
 		dc_lock(dc);
 		if (dc_dropsafe(dc) == 0) {
@@ -841,12 +981,33 @@
 				dc_lock(dc->dc_parent);
 				dc_removeentry(dc);
 				dc_lock(dc);
-			}
-			dc_removechildren(dc);
+			} else
+				dc_removechildren(dc);
 			dc_drop(dc);
 		}
 		mtx_lock(&pool.dp_mtx);
 	}
+	MPASS(pool.dp_invalid_cnt == 0);
+	pool.dp_invalid_limit = pool.dp_invalid_threshold;
+}
+
+static __inline void
+dp_invalid_lazyclear(void)
+{
+	if (pool.dp_invalid_cnt < pool.dp_invalid_limit ||
+	    (pool.dp_task.ta_flags & TA_FLAGS_RUNNING) != 0)
+		return;
+	pool.dp_invalid_limit += pool.dp_invalid_threshold;
+	taskqueue_enqueue(dc_tq, &pool.dp_task);
+}
+
+static void
+dp_taskfunc(void *context, int pending)
+{
+	mtx_lock(&pool.dp_mtx);
+	dp_invalid_clear();
+	if (pool.dp_unused_cnt > pool.dp_unused_max)
+		dp_unused_clear();
 	mtx_unlock(&pool.dp_mtx);
 }
 
@@ -854,14 +1015,23 @@
 dircache_init(struct mount *mp, ino_t inode)
 {
 	struct dircache *dc;
+	struct dircache_mount *dm;
 
+	dm = malloc(sizeof(struct dircache_mount), M_DIRCACHE,
+	    M_WAITOK | M_ZERO);
+	mtx_init(&dm->dm_mtx, "dircache root", NULL, MTX_DEF);
+	LIST_INIT(&dm->dm_inodehead);
+
 	MPASS(mp->mnt_dircache == NULL);
-	dc = dc_alloc(DT_ROOT, NULL, 0);
+	dc = dc_alloc(dm, DT_ROOT, NULL, 0);
 	dc_lock(dc);
-	dp_insertunused(dc);
-	dc->dc_inode = inode;
-	mp->mnt_dircache = dc_ref(dc);
+	dp_unused_insert(dc);
+	dc_setinode(dc, inode);
+	dm->dm_entry = dc_ref(dc);
 	dc_unlock(dc);
+	MNT_ILOCK(mp);
+	mp->mnt_dircache = dm;
+	MNT_IUNLOCK(mp);
 	DCDEBUG("init: root=%p %d\n", dc, inode);
 }
 
@@ -869,15 +1039,23 @@
 dircache_uninit(struct mount *mp)
 {
 	struct dircache *dc, *child;
+	struct dircache_mount *dm;
 	int dropped, dchold;
 
 	MPASS(mp->mnt_dircache != NULL);
 
-	dp_clearinvalid();
+	mtx_lock(&pool.dp_mtx);
+	dp_invalid_clear();
+	mtx_unlock(&pool.dp_mtx);
+
+	MNT_ILOCK(mp);
+	dm = mp->mnt_dircache;
+	mp->mnt_dircache = NULL;
+	MNT_IUNLOCK(mp);
 
 restart:
-	DCDEBUG("uninit: root=%p\n", mp->mnt_dircache);
-	dc = mp->mnt_dircache;
+	DCDEBUG("uninit: root=%p\n", dm->dm_entry);
+	dc = dm->dm_entry;
 	dc_lock(dc);
 
 	while (dc != NULL && !RB_EMPTY(&dc->dc_children)) {
@@ -916,18 +1094,22 @@
 	}
 
 	if (dc == NULL) {
-		dc = mp->mnt_dircache;
+		dc = dm->dm_entry;
 		dc_lock(dc);
 	}
 
 	MPASS(RB_EMPTY(&dc->dc_children));
 
-	mp->mnt_dircache = NULL;
 	dc_invalidate(dc);
 	dropped = dc_rele(dc);
 	MPASS(dropped != 0);
 
-	dp_clearinvalid();
+	mtx_lock(&pool.dp_mtx);
+	dp_invalid_clear();
+	mtx_unlock(&pool.dp_mtx);
+
+	MPASS(LIST_EMPTY(&dm->dm_inodehead));
+	free(dm, M_DIRCACHE);
 }
 
 void
@@ -1098,7 +1280,7 @@
 	MPASS(type == DT_STRONG || type == DT_WEAK);
 
 	DCDEBUG("add: inode=%d %s; vp=%p\n", inode, cnp->cn_nameptr, vp);
-	ndc = dc_alloc(type, cnp->cn_nameptr, cnp->cn_namelen);
+	ndc = dc_alloc(dm_get(dvp), type, cnp->cn_nameptr, cnp->cn_namelen);
 	dc_lock(ndc);
 	pdc = dc_getentry(dvp, NULL, NULL);
 	if (pdc == NULL) {
@@ -1249,7 +1431,8 @@
 	    (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.')))
 		panic("dircache: set negative for '.' or '..'");
 
-	ndc = dc_alloc(DT_NEGATIVE, cnp->cn_nameptr, cnp->cn_namelen);
+	ndc = dc_alloc(dm_get(dvp), DT_NEGATIVE, cnp->cn_nameptr,
+	    cnp->cn_namelen);
 	dc_lock(ndc);
 	pdc = dc_getentry(dvp, NULL, NULL);
 	if (pdc == NULL) {

==== //depot/projects/soc2010/gk_namecache/sys/modules/tmpfs/Makefile#3 (text+ko) ====

@@ -7,4 +7,8 @@
 SRCS=	vnode_if.h \
 	tmpfs_vnops.c tmpfs_fifoops.c tmpfs_vfsops.c tmpfs_subr.c vfs_dircache.c
 
+.if defined(NO_DIRCACHE)
+CFLAGS+= -DNO_DIRCACHE
+.endif
+
 .include <bsd.kmod.mk>

==== //depot/projects/soc2010/gk_namecache/sys/sys/dircache.h#5 (text+ko) ====

@@ -40,6 +40,8 @@
 struct mount;
 struct vnode;
 
+struct dircache_mount;
+
 RB_HEAD(dircache_tree, dircache);
 
 struct dircache {
@@ -51,7 +53,9 @@
 	} dc_e;
 	struct dircache *dc_parent;
 	struct vnode *dc_vnode;
+	struct dircache_mount *dc_mount;
 	TAILQ_ENTRY(dircache) dc_list;
+	LIST_ENTRY(dircache) dc_inodelist;
 	char *dc_name;
 	volatile int dc_holdcnt;
 	int dc_usecnt;
@@ -65,12 +69,6 @@
 #define	dc_tree				dc_e.dce_tree
 #define	dc_tmplist			dc_e.dce_tmplist
 
-struct dircache_cursor {
-	struct dircache *dcr_parent;
-	int dcr_flags;
-	u_long dcr_gen;
-};
-
 void dircache_init(struct mount *mp, ino_t inode);
 void dircache_uninit(struct mount *mp);
 void dircache_purge_negative(struct vnode *dvp);

==== //depot/projects/soc2010/gk_namecache/sys/sys/mount.h#3 (text+ko) ====

@@ -127,7 +127,7 @@
 	long	f_spare[2];		/* unused spare */
 };
 
-struct dircache;
+struct dircache_mount;
 
 TAILQ_HEAD(vnodelst, vnode);
 
@@ -186,7 +186,7 @@
 	int		mnt_secondary_writes;   /* (i) # of secondary writes */
 	int		mnt_secondary_accwrites;/* (i) secondary wr. starts */
 	struct thread	*mnt_susp_owner;	/* (i) thread owning suspension */
-	struct dircache	*mnt_dircache;		/* dircache root node */
+	struct dircache_mount *mnt_dircache;	/* dircache root node */
 #define	mnt_endzero	mnt_gjprovider
 	char		*mnt_gjprovider;	/* gjournal provider name */
 	struct lock	mnt_explock;		/* vfs_export walkers lock */
@@ -329,6 +329,7 @@
 #define MNTK_EXTENDED_SHARED	0x00000040 /* Allow shared locking for more ops */
 #define	MNTK_SHARED_WRITES	0x00000080 /* Allow shared locking for writes */
 #define	MNTK_SUJ	0x00000100	/* Softdep journaling enabled */
+#define	MNTK_DIRCACHE	0x00000200	/* dircache enabled */
 #define MNTK_UNMOUNT	0x01000000	/* unmount in progress */
 #define	MNTK_MWAIT	0x02000000	/* waiting for unmount to finish */
 #define	MNTK_SUSPEND	0x08000000	/* request write suspension */


More information about the p4-projects mailing list