svn commit: r200049 - in user/kmacy/releng_8_fcs_buf_xen: cddl/lib/libzpool sys/cddl/contrib/opensolaris/uts/common/fs/zfs sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys

Kip Macy kmacy at FreeBSD.org
Thu Dec 3 00:27:17 UTC 2009


Author: kmacy
Date: Thu Dec  3 00:27:16 2009
New Revision: 200049
URL: http://svn.freebsd.org/changeset/base/200049

Log:
  - Minimize ARC churn by moving functions interfacing with the buffer cache to
    a separate file
  
  - consolidate I/O cache synchronization in zio_create

Added:
  user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_bio.h   (contents, props changed)
  user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_bio.c   (contents, props changed)
Modified:
  user/kmacy/releng_8_fcs_buf_xen/cddl/lib/libzpool/Makefile
  user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
  user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h
  user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c

Modified: user/kmacy/releng_8_fcs_buf_xen/cddl/lib/libzpool/Makefile
==============================================================================
--- user/kmacy/releng_8_fcs_buf_xen/cddl/lib/libzpool/Makefile	Wed Dec  2 21:58:34 2009	(r200048)
+++ user/kmacy/releng_8_fcs_buf_xen/cddl/lib/libzpool/Makefile	Thu Dec  3 00:27:16 2009	(r200049)
@@ -23,7 +23,7 @@ ATOMIC_SRCS=	opensolaris_atomic.c
 
 LIB=		zpool
 
-ZFS_COMMON_SRCS= ${ZFS_COMMON_OBJS:C/.o$/.c/} vdev_file.c
+ZFS_COMMON_SRCS= ${ZFS_COMMON_OBJS:C/.o$/.c/} vdev_file.c zfs_bio.c
 ZFS_SHARED_SRCS= ${ZFS_SHARED_OBJS:C/.o$/.c/}
 KERNEL_SRCS=	kernel.c taskq.c util.c
 LIST_SRCS=	list.c

Modified: user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
==============================================================================
--- user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c	Wed Dec  2 21:58:34 2009	(r200048)
+++ user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c	Thu Dec  3 00:27:16 2009	(r200049)
@@ -122,12 +122,12 @@
 #include <sys/zio_checksum.h>
 #include <sys/zfs_context.h>
 #include <sys/arc.h>
+#include <sys/zfs_bio.h>
 #include <sys/refcount.h>
 #include <sys/vdev.h>
 #ifdef _KERNEL
 #include <sys/dnlc.h>
 #endif
-#include <sys/ktr.h>
 #include <sys/callb.h>
 #include <sys/kstat.h>
 #include <sys/sdt.h>
@@ -187,11 +187,6 @@ SYSCTL_QUAD(_vfs_zfs, OID_AUTO, arc_min,
 SYSCTL_INT(_vfs_zfs, OID_AUTO, mdcomp_disable, CTLFLAG_RDTUN,
     &zfs_mdcomp_disable, 0, "Disable metadata compression");
 
-static int zfs_page_cache_disable = 0;
-TUNABLE_INT("vfs.zfs.page_cache_disable", &zfs_page_cache_disable);
-SYSCTL_INT(_vfs_zfs, OID_AUTO, page_cache_disable, CTLFLAG_RDTUN,
-    &zfs_page_cache_disable, 0, "Disable backing ARC with page cache ");
-
 #ifdef ZIO_USE_UMA
 extern kmem_cache_t	*zio_buf_cache[];
 extern kmem_cache_t	*zio_data_buf_cache[];
@@ -263,8 +258,8 @@ static arc_state_t ARC_mfu_ghost;
 static arc_state_t ARC_l2c_only;
 
 typedef struct arc_stats {
-	kstat_named_t arcstat_hits;
 	kstat_named_t arcstat_page_cache_hits;
+	kstat_named_t arcstat_hits;
 	kstat_named_t arcstat_misses;
 	kstat_named_t arcstat_demand_data_hits;
 	kstat_named_t arcstat_demand_data_misses;
@@ -453,28 +448,33 @@ struct arc_write_callback {
 	arc_buf_t	*awcb_buf;
 };
 
+/*
+ * Keep initial ordering in-sync with zbio_buf_hdr
+ */
+
 struct arc_buf_hdr {
 	/* protected by hash lock */
 	dva_t			b_dva;
 	uint64_t		b_birth;
-	uint64_t		b_cksum0;
+	uint32_t		b_flags;
+	uint32_t		b_datacnt;
 
+	/* immutable */
+	arc_buf_contents_t	b_type;
+	uint64_t		b_size;
+	spa_t			*b_spa;
+
+	/* protected by hash lock */
 	kmutex_t		b_freeze_lock;
 	zio_cksum_t		*b_freeze_cksum;
 
 	arc_buf_hdr_t		*b_hash_next;
 	arc_buf_t		*b_buf;
-	uint32_t		b_flags;
-	uint32_t		b_datacnt;
+	uint64_t		b_cksum0;
 
 	arc_callback_t		*b_acb;
 	kcondvar_t		b_cv;
 
-	/* immutable */
-	arc_buf_contents_t	b_type;
-	uint64_t		b_size;
-	spa_t			*b_spa;
-
 	/* protected by arc state mutex */
 	arc_state_t		*b_state;
 	list_node_t		b_arc_node;
@@ -520,7 +520,6 @@ static void arc_evict_ghost(arc_state_t 
 #define	ARC_L2_EVICTED		(1 << 17)	/* evicted during I/O */
 #define	ARC_L2_WRITE_HEAD	(1 << 18)	/* head of write list */
 #define	ARC_STORED		(1 << 19)	/* has been store()d to */
-#define	ARC_BUF_CLONING		(1 << 21)	/* is being cloned */
 
 #define	HDR_IN_HASH_TABLE(hdr)	((hdr)->b_flags & ARC_IN_HASH_TABLE)
 #define	HDR_IO_IN_PROGRESS(hdr)	((hdr)->b_flags & ARC_IO_IN_PROGRESS)
@@ -642,9 +641,8 @@ struct l2arc_buf_hdr {
 typedef struct l2arc_data_free {
 	/* protected by l2arc_free_on_write_mtx */
 	arc_buf_t	*l2df_buf;
-	void		*l2df_data;
 	size_t		l2df_size;
-	void		(*l2df_func)(arc_buf_t *, void *, size_t);
+	void		(*l2df_func)(arc_buf_t *, size_t);
 	list_node_t	l2df_list_node;
 } l2arc_data_free_t;
 
@@ -1260,7 +1258,7 @@ arc_buf_clone(arc_buf_t *from)
 	buf->b_private = NULL;
 	buf->b_next = hdr->b_buf;
 	hdr->b_buf = buf;
-	hdr->b_flags |= ARC_BUF_CLONING;
+	hdr->b_flags |= ZBIO_BUF_CLONING;
 	arc_get_data_buf(buf);
 	bcopy(from->b_data, buf->b_data, size);
 	hdr->b_datacnt += 1;
@@ -1299,259 +1297,18 @@ arc_buf_add_ref(arc_buf_t *buf, void* ta
 	    data, metadata, hits);
 }
 
-#ifdef _KERNEL
-void
-arc_binval(spa_t *spa, dva_t *dva, uint64_t size)
-{
-	uint64_t blkno, blkno_lookup;
-	struct vnode *vp;
-	struct bufobj *bo;
-	struct buf *bp;
-	vm_pindex_t start, end;
-	vm_object_t object;
-	vm_page_t m;
-	int i;
-
-	if (zfs_page_cache_disable)
-		return;
-
-	if (dva == NULL || spa == NULL || blkno == 0 || size == 0)
-		return;
-
-	blkno_lookup = blkno = dva->dva_word[1] & ~(1ULL<<63);
-	vp = spa_get_vnode(spa);
-	bo = &vp->v_bufobj;
-
-	BO_LOCK(bo);
-retry:
-	bp = gbincore(bo, blkno_lookup);
-	if (bp != NULL) {
-		BUF_LOCK(bp, LK_EXCLUSIVE | LK_INTERLOCK, BO_MTX(bo));
-		CTR3(KTR_SPARE2, "arc_binval() bp=%p blkno %ld npages %d",
-		   bp, blkno, bp->b_npages);
-		bremfree(bp);
-		KASSERT(bp->b_flags & B_VMIO, ("buf found, VMIO not set"));		
-		bp->b_flags |= B_INVAL;
-		bp->b_birth = 0;
-		brelse(bp);
-	} else if (blkno_lookup & 0x7) {
-		blkno_lookup &= ~0x7;
-		goto retry;
-	} else {
-		CTR2(KTR_SPARE2, "arc_binval() blkno %ld npages %d",
-		    blkno, OFF_TO_IDX(size));
-		BO_UNLOCK(bo);
-	}
-	start = OFF_TO_IDX((blkno_lookup << 9));
-	end = start + OFF_TO_IDX(size + PAGE_MASK);
-	object = vp->v_object;
-
-	VM_OBJECT_LOCK(object);
-	vm_page_cache_free(object, start, end);
-	vm_object_page_remove(object, start, end, FALSE);
-#ifdef INVARIANTS
-	for (i = 0; i < OFF_TO_IDX(size); i++) {
-		KASSERT(vm_page_lookup(object, start + i) == NULL,
-		    ("found page at %ld blkno %ld blkno_lookup %ld",
-			start + i, blkno, blkno_lookup));
-	}
-#endif	
-	VM_OBJECT_UNLOCK(object);
-}
-
-static void
-arc_pcache(struct vnode *vp, struct buf *bp, uint64_t blkno)
-{
-	vm_pindex_t start = OFF_TO_IDX((blkno << 9));
-	vm_object_t object = vp->v_object;
-	struct bufobj *bo = &vp->v_bufobj;
-	vm_page_t m;
-	int i;
-
-	BO_LOCK(bo);
-	bgetvp(vp, bp);
-	BO_UNLOCK(bo);
-
-	CTR3(KTR_SPARE2, "arc_pcache() bp=%p blkno %ld npages %d",
-		   bp, blkno, bp->b_npages);
-	VM_OBJECT_LOCK(object);
-	for (i = 0; i < bp->b_npages; i++) {
-		m = bp->b_pages[i];
-		vm_page_insert(m, object, start + i);
-	}
-	VM_OBJECT_UNLOCK(object);
-	bp->b_flags |= B_VMIO;
-}
-
-static void
-arc_bcache(arc_buf_t *buf)
-{	
-	uint64_t blkno = buf->b_hdr->b_dva.dva_word[1] & ~(1ULL<<63);
-	struct buf *newbp, *bp = buf->b_bp;
-	struct vnode *vp = spa_get_vnode(buf->b_hdr->b_spa);
-	struct bufobj *bo = &vp->v_bufobj;
-	arc_buf_hdr_t *hdr = buf->b_hdr;
-	int cachebuf;
-
-	if (zfs_page_cache_disable)
-		return;
-
-	if (blkno == 0 || hdr->b_birth == 0)
-		return;
-
-	newbp = buf->b_bp;
-	newbp->b_birth = hdr->b_birth;
-	newbp->b_blkno = newbp->b_lblkno = blkno;
-	newbp->b_offset = (blkno << 9);
-	cachebuf = ((hdr->b_datacnt == 1) &&
-	    !(hdr->b_flags & ARC_IO_ERROR) &&
-	    ((newbp->b_flags & (B_INVAL|B_CACHE)) == B_CACHE) &&
-	    (blkno & 0x7) == 0);
-
-	arc_binval(hdr->b_spa, &hdr->b_dva, hdr->b_size);
-	if (cachebuf) 
-		arc_pcache(vp, newbp, blkno);	
-}
-#else
-void
-arc_binval(spa_t *spa, dva_t *dva, uint64_t size)
-{
-}
-#endif
-
-
-static void
-arc_getblk(arc_buf_t *buf)
-{
-	uint64_t		size = buf->b_hdr->b_size;
-	arc_buf_contents_t	type = buf->b_hdr->b_type;
-	spa_t			*spa = buf->b_hdr->b_spa;
-	uint64_t blkno = buf->b_hdr->b_dva.dva_word[1] & ~(1ULL<<63);
-	void *data;
-	arc_buf_t *tbuf;
-	struct vnode *vp;
-	int i, flags = 0;
-#ifdef _KERNEL	
-	struct buf *newbp, *bp;
-	struct bufobj *bo;
-	vm_pindex_t start, end;
-	vm_object_t object;
-#endif
-	if (type == ARC_BUFC_METADATA) {
-		arc_space_consume(size);
-	} else {
-		ASSERT(type == ARC_BUFC_DATA);
-#ifdef _KERNEL
-		flags = GB_NODUMP;
-#endif		
-		atomic_add_64(&arc_size, size);
-	}
-
-#ifdef 	_KERNEL
-	vp = spa_get_vnode(spa);
-	bo = &vp->v_bufobj;
-	newbp = NULL;
-#endif
-	if (size < PAGE_SIZE) {
-		data = zio_buf_alloc(size);
-	}
-#ifdef _KERNEL
-	else if ((buf->b_hdr->b_flags & ARC_BUF_CLONING) ||
-	    BUF_EMPTY(buf->b_hdr) ||
-	    (blkno == 0)) {
-		newbp = geteblk(size, flags);
-		data = newbp->b_data;
-		buf->b_hdr->b_flags &= ~ARC_BUF_CLONING;
-	} else {
-		newbp = getblk(vp, blkno, size, 0, 0, flags | GB_LOCK_NOWAIT);
-		if (newbp == NULL)
-			newbp = geteblk(size, flags);
-		else {
-			vm_object_t object = vp->v_object;
-			vm_page_t m;
-
-			/*
-			 * Strip the buffers pages from the object
-			 */
-			VM_OBJECT_LOCK(object);
-			vm_page_lock_queues();
-			for (i = 0; i < newbp->b_npages; i++){
-				m = newbp->b_pages[i];
-				vm_page_remove(m);
-			}
-			vm_page_unlock_queues();
-			VM_OBJECT_UNLOCK(object);
-			brelvp(newbp);
-			newbp->b_flags &= ~B_VMIO;
-		}
-		data = newbp->b_data;
-	}
-
-	if (newbp != NULL) {
-		BUF_KERNPROC(newbp);
-
-		CTR4(KTR_SPARE2, "arc_getblk() bp=%p flags %X blkno %ld npages %d",
-		    newbp, newbp->b_flags, blkno, newbp->b_npages);
-#ifdef INVARIANTS
-		for (i = 0; i < newbp->b_npages; i++)
-			KASSERT(newbp->b_pages[i]->object == NULL,
-			    ("newbp page not removed"));
-#endif	
-	}
-	buf->b_bp = newbp;
-#endif	
-	buf->b_data = data;
-}
-
-static void
-arc_brelse(arc_buf_t *buf, void *data, size_t size)
-{
-	struct buf *bp = buf->b_bp;
-	arc_buf_hdr_t *hdr = buf->b_hdr;
-#ifdef INVARIANTS
-	int i;
-#endif
-	
-	if (bp == NULL) {
-		zio_buf_free(buf->b_data, size);
-		return;
-	}
-#ifdef _KERNEL
-#ifdef INVARIANTS
-	for (i = 0; i < bp->b_npages; i++)
-		KASSERT(bp->b_pages[i]->object == NULL,
-		    ("newbp page not removed"));
-#endif	
-	arc_bcache(buf);
-
-
-	if (bp->b_vp == NULL)
-		KASSERT((bp->b_flags & B_VMIO) == 0, ("no vp but VMIO set!"));
-	else {
-		KASSERT((bp->b_flags & B_VMIO), ("vp but VMIO not set!"));
-		CTR4(KTR_SPARE2, "arc_brelse() bp=%p flags %X"
-		    " size %ld blkno=%ld",
-		    bp, bp->b_flags, size, bp->b_blkno);
-	}
-
-	bp->b_flags |= B_ZFS;
-	brelse(bp);
-#endif
-}
-
 /*
  * Free the arc data buffer.  If it is an l2arc write in progress,
  * the buffer is placed on l2arc_free_on_write to be freed later.
  */
 static void
-arc_buf_data_free(arc_buf_hdr_t *hdr, void (*free_func)(arc_buf_t *, void *, size_t),
-    arc_buf_t *buf, void *data, size_t size)
+arc_buf_data_free(arc_buf_hdr_t *hdr, void (*free_func)(arc_buf_t *, size_t),
+    arc_buf_t *buf, size_t size)
 {
 	if (HDR_L2_WRITING(hdr)) {
 		l2arc_data_free_t *df;
 		df = kmem_alloc(sizeof (l2arc_data_free_t), KM_SLEEP);
 		df->l2df_buf = buf;
-		df->l2df_data = data;
 		df->l2df_size = size;
 		df->l2df_func = free_func;
 		mutex_enter(&l2arc_free_on_write_mtx);
@@ -1559,7 +1316,7 @@ arc_buf_data_free(arc_buf_hdr_t *hdr, vo
 		mutex_exit(&l2arc_free_on_write_mtx);
 		ARCSTAT_BUMP(arcstat_l2_free_on_write);
 	} else {
-		free_func(buf, data, size);
+		free_func(buf, size);
 	}
 }
 
@@ -1577,13 +1334,13 @@ arc_buf_destroy(arc_buf_t *buf, boolean_
 		arc_cksum_verify(buf);
 		if (!recycle) {
 			if (type == ARC_BUFC_METADATA) {
-				arc_buf_data_free(buf->b_hdr, arc_brelse,
-				    buf, buf->b_data, size);
+				arc_buf_data_free(buf->b_hdr, zbio_relse,
+				    buf, size);
 				arc_space_return(size);
 			} else {
 				ASSERT(type == ARC_BUFC_DATA);
-				arc_buf_data_free(buf->b_hdr, arc_brelse,
-				    buf, buf->b_data, size);
+				arc_buf_data_free(buf->b_hdr,
+				    zbio_relse, buf, size);
 				atomic_add_64(&arc_size, -size);
 			}
 		}
@@ -1802,12 +1559,14 @@ arc_evict(arc_state_t *state, spa_t *spa
 
 	evicted_state = (state == arc_mru) ? arc_mru_ghost : arc_mfu_ghost;
 
+#ifdef _KERNEL
 	/*
 	 * don't recycle page cache bufs
 	 *
 	 */
 	if (recycle && (bytes >= PAGE_SIZE))
 		recycle = FALSE;
+#endif
 	if (type == ARC_BUFC_METADATA) {
 		offset = 0;
 		list_count = ARC_BUFC_NUMMETADATALISTS;
@@ -1822,9 +1581,7 @@ arc_evict(arc_state_t *state, spa_t *spa
 		list_count = ARC_BUFC_NUMDATALISTS;
 		idx = evict_data_offset;
 	}
-	for (bytes_remaining = 0, i = 0; i < list_count; i++) 
-                bytes_remaining += evicted_state->arcs_lsize[i + offset]; 
-
+	bytes_remaining = evicted_state->arcs_lsize[type];
 	count = 0;
 	
 evict_start:
@@ -2422,7 +2179,7 @@ arc_reclaim_thread(void *dummy __unused)
 static void
 arc_adapt(int bytes, arc_state_t *state)
 {
-	int mult, divisor;
+	int mult;
 
 	if (state == arc_l2c_only)
 		return;
@@ -2437,15 +2194,13 @@ arc_adapt(int bytes, arc_state_t *state)
 	 *	  target size of the MRU list.
 	 */
 	if (state == arc_mru_ghost) {
-		divisor = MAX(arc_mru_ghost->arcs_size, 1);
 		mult = ((arc_mru_ghost->arcs_size >= arc_mfu_ghost->arcs_size) ?
 		    1 : (arc_mfu_ghost->arcs_size/arc_mru_ghost->arcs_size));
 
 		arc_p = MIN(arc_c, arc_p + bytes * mult);
 	} else if (state == arc_mfu_ghost) {
-		divisor = MAX(arc_mfu_ghost->arcs_size, 1);		
 		mult = ((arc_mfu_ghost->arcs_size >= arc_mru_ghost->arcs_size) ?
-		    1 : (arc_mru_ghost->arcs_size/divisor));
+		    1 : (arc_mru_ghost->arcs_size/arc_mfu_ghost->arcs_size));
 
 		arc_p = MAX(0, (int64_t)arc_p - bytes * mult);
 	}
@@ -2545,7 +2300,14 @@ arc_get_data_buf(arc_buf_t *buf)
 	 * just allocate a new buffer.
 	 */
 	if (!arc_evict_needed(type)) {
-		arc_getblk(buf);
+		if (type == ARC_BUFC_METADATA) {
+			zbio_getblk(buf);
+			arc_space_consume(size);
+		} else {
+			ASSERT(type == ARC_BUFC_DATA);
+			zbio_data_getblk(buf);
+			atomic_add_64(&arc_size, size);
+		}
 		goto out;
 	}
 
@@ -2569,10 +2331,18 @@ arc_get_data_buf(arc_buf_t *buf)
 		    mfu_space > arc_mfu->arcs_size) ? arc_mru : arc_mfu;
 	}
 	if ((buf->b_data = arc_evict(state, NULL, size, TRUE, type)) == NULL) {
-		arc_getblk(buf);
-		ASSERT(buf->b_data != NULL);
+		if (type == ARC_BUFC_METADATA) {
+			zbio_getblk(buf);
+			arc_space_consume(size);
+		} else {
+			ASSERT(type == ARC_BUFC_DATA);
+			zbio_data_getblk(buf);
+			atomic_add_64(&arc_size, size);
+		}
+		if (size < PAGE_SIZE)
+			ARCSTAT_BUMP(arcstat_recycle_miss);
 	}
-
+	ASSERT(buf->b_data != NULL);
 out:
 	/*
 	 * Update the state size.  Note that ghost states have a
@@ -2818,18 +2588,7 @@ arc_read_done(zio_t *zio)
 			buf_hash_remove(hdr);
 		freeable = refcount_is_zero(&hdr->b_refcnt);
 	}
-#ifdef _KERNEL
-	else if (buf->b_bp != NULL) {
-#ifdef INVARIANTS
-		int i;
-		for (i = 0; i < buf->b_bp->b_npages; i++)
-			KASSERT(buf->b_bp->b_pages[i]->object == NULL,
-			    ("bp page not removed"));
-#endif	
-		buf->b_bp->b_flags |= B_CACHE;
-		buf->b_bp->b_flags &= ~B_INVAL;
-	}
-#endif
+
 	/*
 	 * Broadcast before we drop the hash_lock to avoid the possibility
 	 * that the hdr (and hence the cv) might be freed before we get to
@@ -3535,12 +3294,6 @@ arc_write_done(zio_t *zio)
 			exists = buf_hash_insert(hdr, &hash_lock);
 			ASSERT3P(exists, ==, NULL);
 		}
-#ifdef _KERNEL
-		else if (buf->b_bp != NULL) {
-			buf->b_bp->b_flags |= B_CACHE;
-			buf->b_bp->b_flags &= ~B_INVAL;
-		}
-#endif
 		hdr->b_flags &= ~ARC_IO_IN_PROGRESS;
 		/* if it's not anon, we are doing a scrub */
 		if (hdr->b_state == arc_anon)
@@ -3832,7 +3585,6 @@ arc_tempreserve_space(uint64_t reserve, 
 static kmutex_t arc_lowmem_lock;
 #ifdef _KERNEL
 static eventhandler_tag arc_event_lowmem = NULL;
-static eventhandler_tag arc_event_shutdown = NULL;
 
 static void
 arc_lowmem(void *arg __unused, int howto __unused)
@@ -3846,44 +3598,6 @@ arc_lowmem(void *arg __unused, int howto
 		tsleep(&needfree, 0, "zfs:lowmem", hz / 5);
 	mutex_exit(&arc_lowmem_lock);
 }
-void
-arc_shutdown(void *arg __unused, int howto __unused)
-{
-	struct mount *mp, *tmpmp;
-	int error;
-
-	/*
-	 * unmount all ZFS file systems - freeing any buffers
-	 * then free all space allocator resources
-	 */
-	TAILQ_FOREACH_SAFE(mp, &mountlist, mnt_list, tmpmp) {
-		if (strcmp(mp->mnt_vfc->vfc_name, "zfs") == 0) {
-			error = dounmount(mp, MNT_FORCE, curthread);
-			if (error) {
-				TAILQ_REMOVE(&mountlist, mp, mnt_list);
-				printf("unmount of %s failed (",
-				    mp->mnt_stat.f_mntonname);
-				if (error == EBUSY)
-					printf("BUSY)\n");
-				else
-					printf("%d)\n", error);
-			}
-		}
-		
-	}
-	arc_flush(NULL);
-
-#ifdef NOTYET
-	/*
-	 * need corresponding includes
-	 */
-	zfsdev_fini();
-	zvol_fini();
-	zfs_fini();
-#endif	
-	spa_fini();
-}
-
 #endif
 
 void
@@ -4009,8 +3723,6 @@ arc_init(void)
 #ifdef _KERNEL
 	arc_event_lowmem = EVENTHANDLER_REGISTER(vm_lowmem, arc_lowmem, NULL,
 	    EVENTHANDLER_PRI_FIRST);
-	arc_event_shutdown = EVENTHANDLER_REGISTER(shutdown_pre_sync,
-	    arc_shutdown, NULL, EVENTHANDLER_PRI_FIRST);
 #endif
 
 	arc_dead = FALSE;
@@ -4105,8 +3817,6 @@ arc_fini(void)
 #ifdef _KERNEL
 	if (arc_event_lowmem != NULL)
 		EVENTHANDLER_DEREGISTER(vm_lowmem, arc_event_lowmem);
-	if (arc_event_shutdown != NULL)
-		EVENTHANDLER_DEREGISTER(shutdown_pre_sync, arc_event_shutdown);
 #endif
 }
 
@@ -4326,9 +4036,8 @@ l2arc_do_free_on_write()
 
 	for (df = list_tail(buflist); df; df = df_prev) {
 		df_prev = list_prev(buflist, df);
-		ASSERT(df->l2df_data != NULL);
 		ASSERT(df->l2df_func != NULL);
-		df->l2df_func(df->l2df_buf, df->l2df_data, df->l2df_size);
+		df->l2df_func(df->l2df_buf, df->l2df_size);
 		list_remove(buflist, df);
 		kmem_free(df, sizeof (l2arc_data_free_t));
 	}

Modified: user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h
==============================================================================
--- user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h	Wed Dec  2 21:58:34 2009	(r200048)
+++ user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h	Thu Dec  3 00:27:16 2009	(r200049)
@@ -52,7 +52,9 @@ struct arc_buf {
 	void			*b_data;
 	arc_evict_func_t	*b_efunc;
 	void			*b_private;
+#ifdef _KERNEL	
 	struct buf 		*b_bp;
+#endif
 };
 
 typedef enum arc_buf_contents {
@@ -83,6 +85,7 @@ int arc_released(arc_buf_t *buf);
 int arc_has_callback(arc_buf_t *buf);
 void arc_buf_freeze(arc_buf_t *buf);
 void arc_buf_thaw(arc_buf_t *buf);
+	
 #ifdef ZFS_DEBUG
 int arc_referenced(arc_buf_t *buf);
 #endif
@@ -112,7 +115,6 @@ int arc_tryread(spa_t *spa, blkptr_t *bp
 void arc_set_callback(arc_buf_t *buf, arc_evict_func_t *func, void *private);
 int arc_buf_evict(arc_buf_t *buf);
 
-void arc_binval(spa_t *spa, dva_t *dva, uint64_t size);
 void arc_flush(spa_t *spa);
 void arc_tempreserve_clear(uint64_t reserve);
 int arc_tempreserve_space(uint64_t reserve, uint64_t txg);

Added: user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_bio.h
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_bio.h	Thu Dec  3 00:27:16 2009	(r200049)
@@ -0,0 +1,60 @@
+/**************************************************************************
+
+Copyright (c) 2009, Kip Macy, BitGravity Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions and the following disclaimer.
+
+ 2. Neither the name of the BitGravity Corporation nor the names of its
+    contributors may be used to endorse or promote products derived from
+    this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+$FreeBSD$
+
+***************************************************************************/
+
+#ifndef	_SYS_ZFS_BIO_H
+#define	_SYS_ZFS_BIO_H
+
+#define	ZBIO_BUF_CLONING	(1 << 30)	/* is being cloned */
+
+void zbio_sync_cache(spa_t *spa, blkptr_t *bp, uint64_t txg, uint64_t size);
+void zbio_getblk(arc_buf_t *buf);
+void zbio_data_getblk(arc_buf_t *buf);
+void zbio_relse(arc_buf_t *buf, size_t size);
+
+typedef struct zbio_buf_hdr zbio_buf_hdr_t;
+struct zbio_buf_hdr {
+	/* protected by hash lock */
+	dva_t			b_dva;
+	uint64_t		b_birth;
+	uint32_t		b_flags;
+	uint32_t		b_datacnt;
+
+	/* immutable */
+	arc_buf_contents_t	b_type;
+	uint64_t		b_size;
+	spa_t			*b_spa;
+};
+
+#ifdef _KERNEL
+void zbio_init(void);
+void zbio_fini(void);
+#endif
+#endif

Added: user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_bio.c
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_bio.c	Thu Dec  3 00:27:16 2009	(r200049)
@@ -0,0 +1,321 @@
+/**************************************************************************
+
+Copyright (c) 2009, Kip Macy, BitGravity Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions and the following disclaimer.
+
+ 2. Neither the name of the BitGravity Corporation nor the names of its
+    contributors may be used to endorse or promote products derived from
+    this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+***************************************************************************/
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/spa.h>
+#include <sys/zio.h>
+#include <sys/zio_checksum.h>
+#include <sys/zfs_context.h>
+#include <sys/arc.h>
+#include <sys/zfs_bio.h>
+#include <sys/refcount.h>
+#include <sys/vdev.h>
+#include <sys/callb.h>
+#include <sys/kstat.h>
+#include <sys/sdt.h>
+
+#include <vm/vm_pageout.h>
+
+#ifdef _KERNEL
+
+#define	BUF_EMPTY(buf)						\
+	((buf)->b_dva.dva_word[0] == 0 &&			\
+	(buf)->b_dva.dva_word[1] == 0 &&			\
+	(buf)->b_birth == 0)
+
+SYSCTL_DECL(_vfs_zfs);
+static int zfs_page_cache_disable = 1;
+TUNABLE_INT("vfs.zfs.page_cache_disable", &zfs_page_cache_disable);
+SYSCTL_INT(_vfs_zfs, OID_AUTO, page_cache_disable, CTLFLAG_RDTUN,
+    &zfs_page_cache_disable, 0, "Disable backing ARC with page cache ");
+
+static eventhandler_tag zbio_event_shutdown = NULL;
+
+void
+zbio_data_getblk(arc_buf_t *buf)
+{
+
+	zbio_getblk(buf);
+}
+
+void
+zbio_getblk(arc_buf_t *buf)
+{
+	zbio_buf_hdr_t		*hdr = (zbio_buf_hdr_t *)buf->b_hdr;
+	uint64_t		size = hdr->b_size;
+	arc_buf_contents_t	type = hdr->b_type;
+	spa_t			*spa = hdr->b_spa;
+	uint64_t blkno = hdr->b_dva.dva_word[1] & ~(1ULL<<63);
+	void *data;
+	arc_buf_t *tbuf;
+	struct vnode *vp;
+	int i, flags = 0;
+	struct buf *newbp;
+	struct bufobj *bo;
+	vm_pindex_t start, end;
+	vm_object_t object;
+
+	vp = spa_get_vnode(spa);
+	bo = &vp->v_bufobj;
+	newbp = NULL;
+	if ((size < PAGE_SIZE) || (hdr->b_flags & ZBIO_BUF_CLONING) ||
+	    zfs_page_cache_disable) {
+		data = zio_buf_alloc(size);
+		hdr->b_flags &= ~ZBIO_BUF_CLONING;
+	} else if (BUF_EMPTY(hdr)) {
+		newbp = geteblk(size, flags);
+		data = newbp->b_data;
+	} else {
+		newbp = getblk(vp, blkno, size, 0, 0, flags | GB_LOCK_NOWAIT);
+		if (newbp == NULL)
+			newbp = geteblk(size, flags);
+		else
+			brelvp(newbp);
+		data = newbp->b_data;
+	}
+
+	if (newbp != NULL) {
+		BUF_KERNPROC(newbp);
+		newbp->b_bufobj = bo;
+		CTR4(KTR_SPARE2, "arc_getblk() bp=%p flags %X "
+		    "blkno %ld npages %d",
+		    newbp, newbp->b_flags, blkno, newbp->b_npages);
+	}
+
+	buf->b_bp = newbp;
+	buf->b_data = data;
+}
+
+void
+zbio_relse(arc_buf_t *buf, size_t size)
+{
+	struct buf *bp = buf->b_bp;
+	void * data = buf->b_data;
+
+	if (bp == NULL) {
+		zio_buf_free(data, size);
+		return;
+	}
+
+	CTR4(KTR_SPARE2, "arc_brelse() bp=%p flags %X"
+	    " size %ld blkno=%ld",
+	    bp, bp->b_flags, size, bp->b_blkno);
+
+	bp->b_flags |= B_ZFS;
+	brelse(bp);
+}
+
+void
+zbio_sync_cache(spa_t *spa, blkptr_t *bp, uint64_t txg, uint64_t size)
+{
+#ifdef notyet
+	uint64_t blkno, blkno_lookup;
+	struct vnode *vp;
+	struct bufobj *bo;
+	struct buf *bp;
+	vm_pindex_t start, end;
+	vm_object_t object;
+	vm_page_t m;
+	int i;
+
+	if (zfs_page_cache_disable)
+		return;
+	blkno_lookup = blkno = dva->dva_word[1] & ~(1ULL<<63);
+	vp = spa_get_vnode(spa);
+	bo = &vp->v_bufobj;
+
+	if (dva == NULL || spa == NULL || blkno == 0 || size == 0) 
+		return;
+
+	start = OFF_TO_IDX((blkno_lookup << 9));
+	end = start + OFF_TO_IDX(size + PAGE_MASK);
+	object = vp->v_object;
+
+	VM_OBJECT_LOCK(object);
+	vm_page_cache_free(object, start, end);
+	vm_object_page_remove(object, start, end, FALSE);
+#ifdef INVARIANTS
+	for (i = 0; i < OFF_TO_IDX(size); i++) {
+		KASSERT(vm_page_lookup(object, start + i) == NULL,
+		    ("found page at %ld blkno %ld blkno_lookup %ld",
+			start + i, blkno, blkno_lookup));
+	}
+#endif	
+	VM_OBJECT_UNLOCK(object);
+#endif
+}
+
+#if 0
+static void
+arc_pcache(struct vnode *vp, struct buf *bp, uint64_t blkno)
+{
+	vm_pindex_t start = OFF_TO_IDX((blkno << 9));
+	vm_object_t object = vp->v_object;
+	struct bufobj *bo = &vp->v_bufobj;
+	vm_page_t m;
+	int i;
+
+	CTR3(KTR_SPARE2, "arc_pcache() bp=%p blkno %ld npages %d",
+		   bp, blkno, bp->b_npages);
+	VM_OBJECT_LOCK(object);
+	vm_page_lock_queues();
+	for (i = 0; i < bp->b_npages; i++) {
+		m = bp->b_pages[i];
+		m->valid = VM_PAGE_BITS_ALL;
+		vm_page_insert(m, object, start + i);
+		m->flags &= ~PG_UNMANAGED;
+		vm_page_enqueue(PQ_INACTIVE, m);
+		vdrop(vp);
+	}
+	vm_page_unlock_queues();
+	VM_OBJECT_UNLOCK(object);
+	bp->b_bufobj = bo;
+	bp->b_flags |= B_VMIO;
+}
+
+static void
+arc_bcache(arc_buf_t *buf)
+{	
+	uint64_t blkno = buf->b_hdr->b_dva.dva_word[1] & ~(1ULL<<63);
+	struct buf *bp;
+	struct vnode *vp = spa_get_vnode(buf->b_hdr->b_spa);
+	arc_buf_hdr_t *hdr = buf->b_hdr;
+	int cachebuf;
+
+	if (zfs_page_cache_disable)
+		return;
+
+	if (blkno == 0 || hdr->b_birth == 0)
+		return;
+
+	bp = buf->b_bp;
+	bp->b_birth = hdr->b_birth;
+	bp->b_blkno = bp->b_lblkno = blkno;
+	bp->b_offset = (blkno << 9);
+	cachebuf = ((hdr->b_datacnt == 1) &&
+	    !(hdr->b_flags & ARC_IO_ERROR) &&
+	    ((bp->b_flags & (B_INVAL|B_CACHE)) == B_CACHE) &&
+	    (blkno & 0x7) == 0);
+
+	arc_binval(hdr->b_spa, &hdr->b_dva, hdr->b_size);
+	if (cachebuf) 
+		arc_pcache(vp, bp, blkno);	
+}
+#endif
+
+static void
+zbio_shutdown(void *arg __unused, int howto __unused)
+{
+	struct mount *mp, *tmpmp;
+	int error;
+
+	/*
+	 * unmount all ZFS file systems - freeing any buffers
+	 * then free all space allocator resources
+	 */
+	TAILQ_FOREACH_SAFE(mp, &mountlist, mnt_list, tmpmp) {
+		if (strcmp(mp->mnt_vfc->vfc_name, "zfs") == 0) {
+			error = dounmount(mp, MNT_FORCE, curthread);
+			if (error) {
+				TAILQ_REMOVE(&mountlist, mp, mnt_list);
+				printf("unmount of %s failed (",
+				    mp->mnt_stat.f_mntonname);
+				if (error == EBUSY)
+					printf("BUSY)\n");
+				else
+					printf("%d)\n", error);
+			}
+		}
+		
+	}
+	arc_flush(NULL);
+
+#ifdef NOTYET
+	/*
+	 * need corresponding includes
+	 */
+	zfsdev_fini();
+	zvol_fini();
+	zfs_fini();
+#endif	
+	spa_fini();
+}
+
+void
+zbio_init(void)
+{
+
+	zbio_event_shutdown = EVENTHANDLER_REGISTER(shutdown_pre_sync,
+	    zbio_shutdown, NULL, EVENTHANDLER_PRI_FIRST);
+}
+
+void
+zbio_fini(void)
+{
+	if (zbio_event_shutdown != NULL)
+		EVENTHANDLER_DEREGISTER(shutdown_pre_sync, zbio_event_shutdown);
+}
+#else
+
+void
+zbio_getblk(arc_buf_t *buf)
+{
+	zbio_buf_hdr_t		*hdr = (zbio_buf_hdr_t *)buf->b_hdr;
+	uint64_t		size = hdr->b_size;
+

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-user mailing list