svn commit: r291460 - head/sys/kern
Kirk McKusick
mckusick at FreeBSD.org
Sun Nov 29 21:42:27 UTC 2015
Author: mckusick
Date: Sun Nov 29 21:42:26 2015
New Revision: 291460
URL: https://svnweb.freebsd.org/changeset/base/291460
Log:
As the kernel allocates and frees vnodes, it fully initializes them
on every allocation and fully releases them on every free. These
are not trivial costs: it starts by zeroing a large structure then
initializes a mutex, a lock manager lock, an rw lock, four lists,
and six pointers. And looking at vfs.vnodes_created, these operations
are being done millions of times an hour on a busy machine.
As a performance optimization, this code update uses the uma_init
and uma_fini routines to do these initializations and cleanups only
as the vnodes enter and leave the vnode_zone. With this change the
initializations are only done kern.maxvnodes times at system startup
and then only rarely again. The frees are done only if the vnode_zone
shrinks which never happens in practice. For those curious about the
avoided work, look at the vnode_init() and vnode_fini() functions in
kern/vfs_subr.c to see the code that has been removed from the main
vnode allocation/free path.
Reviewed by: kib
Tested by: Peter Holm
Modified:
head/sys/kern/vfs_subr.c
Modified: head/sys/kern/vfs_subr.c
==============================================================================
--- head/sys/kern/vfs_subr.c Sun Nov 29 21:01:02 2015 (r291459)
+++ head/sys/kern/vfs_subr.c Sun Nov 29 21:42:26 2015 (r291460)
@@ -346,6 +346,66 @@ PCTRIE_DEFINE(BUF, buf, b_lblkno, buf_tr
#ifndef MAXVNODES_MAX
#define MAXVNODES_MAX (512 * 1024 * 1024 / 64) /* 8M */
#endif
+
+/*
+ * Initialize a vnode as it first enters the zone.
+ */
+static int
+vnode_init(void *mem, int size, int flags)
+{
+ struct vnode *vp;
+ struct bufobj *bo;
+
+ vp = mem;
+ bzero(vp, size);
+ /*
+ * Setup locks.
+ */
+ vp->v_vnlock = &vp->v_lock;
+ mtx_init(&vp->v_interlock, "vnode interlock", NULL, MTX_DEF);
+ /*
+ * By default, don't allow shared locks unless filesystems opt-in.
+ */
+ lockinit(vp->v_vnlock, PVFS, "vnode", VLKTIMEOUT,
+ LK_NOSHARE | LK_IS_VNODE);
+ /*
+ * Initialize bufobj.
+ */
+ bo = &vp->v_bufobj;
+ bo->__bo_vnode = vp;
+ rw_init(BO_LOCKPTR(bo), "bufobj interlock");
+ bo->bo_private = vp;
+ TAILQ_INIT(&bo->bo_clean.bv_hd);
+ TAILQ_INIT(&bo->bo_dirty.bv_hd);
+ /*
+ * Initialize namecache.
+ */
+ LIST_INIT(&vp->v_cache_src);
+ TAILQ_INIT(&vp->v_cache_dst);
+ /*
+ * Initialize rangelocks.
+ */
+ rangelock_init(&vp->v_rl);
+ return (0);
+}
+
+/*
+ * Free a vnode when it is cleared from the zone.
+ */
+static void
+vnode_fini(void *mem, int size)
+{
+ struct vnode *vp;
+ struct bufobj *bo;
+
+ vp = mem;
+ rangelock_destroy(&vp->v_rl);
+ lockdestroy(vp->v_vnlock);
+ mtx_destroy(&vp->v_interlock);
+ bo = &vp->v_bufobj;
+ rw_destroy(BO_LOCKPTR(bo));
+}
+
static void
vntblinit(void *dummy __unused)
{
@@ -379,7 +439,7 @@ vntblinit(void *dummy __unused)
TAILQ_INIT(&vnode_free_list);
mtx_init(&vnode_free_list_mtx, "vnode_free_list", NULL, MTX_DEF);
vnode_zone = uma_zcreate("VNODE", sizeof (struct vnode), NULL, NULL,
- NULL, NULL, UMA_ALIGN_PTR, 0);
+ vnode_init, vnode_fini, UMA_ALIGN_PTR, 0);
vnodepoll_zone = uma_zcreate("VNODEPOLL", sizeof (struct vpollinfo),
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
/*
@@ -1223,8 +1283,8 @@ getnewvnode(const char *tag, struct moun
struct vnode **vpp)
{
struct vnode *vp;
- struct bufobj *bo;
struct thread *td;
+ struct lock_object *lo;
static int cyclecount;
int error;
@@ -1271,40 +1331,42 @@ getnewvnode(const char *tag, struct moun
mtx_unlock(&vnode_free_list_mtx);
alloc:
atomic_add_long(&vnodes_created, 1);
- vp = (struct vnode *) uma_zalloc(vnode_zone, M_WAITOK|M_ZERO);
+ vp = (struct vnode *) uma_zalloc(vnode_zone, M_WAITOK);
/*
- * Setup locks.
- */
- vp->v_vnlock = &vp->v_lock;
- mtx_init(&vp->v_interlock, "vnode interlock", NULL, MTX_DEF);
- /*
- * By default, don't allow shared locks unless filesystems
- * opt-in.
- */
- lockinit(vp->v_vnlock, PVFS, tag, VLKTIMEOUT, LK_NOSHARE | LK_IS_VNODE);
- /*
- * Initialize bufobj.
+ * Locks are given the generic name "vnode" when created.
+ * Follow the historic practice of using the filesystem
+ * name when they allocated, e.g., "zfs", "ufs", "nfs, etc.
+ *
+ * Locks live in a witness group keyed on their name. Thus,
+ * when a lock is renamed, it must also move from the witness
+ * group of its old name to the witness group of its new name.
+ *
+ * The change only needs to be made when the vnode moves
+ * from one filesystem type to another. We ensure that each
+ * filesystem use a single static name pointer for its tag so
+ * that we can compare pointers rather than doing a strcmp().
*/
- bo = &vp->v_bufobj;
- bo->__bo_vnode = vp;
- rw_init(BO_LOCKPTR(bo), "bufobj interlock");
- bo->bo_ops = &buf_ops_bio;
- bo->bo_private = vp;
- TAILQ_INIT(&bo->bo_clean.bv_hd);
- TAILQ_INIT(&bo->bo_dirty.bv_hd);
+ lo = &vp->v_vnlock->lock_object;
+ if (lo->lo_name != tag) {
+ lo->lo_name = tag;
+ WITNESS_DESTROY(lo);
+ WITNESS_INIT(lo, tag);
+ }
/*
- * Initialize namecache.
+ * By default, don't allow shared locks unless filesystems opt-in.
*/
- LIST_INIT(&vp->v_cache_src);
- TAILQ_INIT(&vp->v_cache_dst);
+ vp->v_vnlock->lock_object.lo_flags |= LK_NOSHARE;
/*
* Finalize various vnode identity bits.
*/
+ KASSERT(vp->v_object == NULL, ("stale v_object %p", vp));
+ KASSERT(vp->v_lockf == NULL, ("stale v_lockf %p", vp));
+ KASSERT(vp->v_pollinfo == NULL, ("stale v_pollinfo %p", vp));
vp->v_type = VNON;
vp->v_tag = tag;
vp->v_op = vops;
v_init_counters(vp);
- vp->v_data = NULL;
+ vp->v_bufobj.bo_ops = &buf_ops_bio;
#ifdef MAC
mac_vnode_init(vp);
if (mp != NULL && (mp->mnt_flag & MNT_MULTILABEL) == 0)
@@ -1313,11 +1375,10 @@ alloc:
printf("NULL mp in getnewvnode()\n");
#endif
if (mp != NULL) {
- bo->bo_bsize = mp->mnt_stat.f_iosize;
+ vp->v_bufobj.bo_bsize = mp->mnt_stat.f_iosize;
if ((mp->mnt_kern_flag & MNTK_NOKNOTE) != 0)
vp->v_vflag |= VV_NOKNOTE;
}
- rangelock_init(&vp->v_rl);
/*
* For the filesystems which do not use vfs_hash_insert(),
@@ -2683,6 +2744,12 @@ _vdrop(struct vnode *vp, bool locked)
}
/*
* The vnode has been marked for destruction, so free it.
+ *
+ * The vnode will be returned to the zone where it will
+ * normally remain until it is needed for another vnode. We
+ * need to cleanup (or verify that the cleanup has already
+ * been done) any residual data left from its current use
+ * so as not to contaminate the freshly allocated vnode.
*/
CTR2(KTR_VFS, "%s: destroying the vnode %p", __func__, vp);
atomic_subtract_long(&numvnodes, 1);
@@ -2707,16 +2774,17 @@ _vdrop(struct vnode *vp, bool locked)
#ifdef MAC
mac_vnode_destroy(vp);
#endif
- if (vp->v_pollinfo != NULL)
+ if (vp->v_pollinfo != NULL) {
destroy_vpollinfo(vp->v_pollinfo);
+ vp->v_pollinfo = NULL;
+ }
#ifdef INVARIANTS
/* XXX Elsewhere we detect an already freed vnode via NULL v_op. */
vp->v_op = NULL;
#endif
- rangelock_destroy(&vp->v_rl);
- lockdestroy(vp->v_vnlock);
- mtx_destroy(&vp->v_interlock);
- rw_destroy(BO_LOCKPTR(bo));
+ vp->v_iflag = 0;
+ vp->v_vflag = 0;
+ bo->bo_flag = 0;
uma_zfree(vnode_zone, vp);
}
@@ -3081,6 +3149,7 @@ vgonel(struct vnode *vp)
* Clear the advisory locks and wake up waiting threads.
*/
(void)VOP_ADVLOCKPURGE(vp);
+ vp->v_lockf = NULL;
/*
* Delete from old mount point vnode list.
*/
More information about the svn-src-all
mailing list