svn commit: r199776 - in user/kmacy/releng_8_fcs_buf_xen: cddl/contrib/opensolaris/cmd/ztest cddl/contrib/opensolaris/lib/libzpool/common/sys sys/amd64/amd64 sys/amd64/include sys/cddl/compat/opens...

Kip Macy kmacy at FreeBSD.org
Wed Nov 25 02:10:08 UTC 2009


Author: kmacy
Date: Wed Nov 25 02:10:07 2009
New Revision: 199776
URL: http://svn.freebsd.org/changeset/base/199776

Log:
  merge releng_8_fcs changes

Modified:
  user/kmacy/releng_8_fcs_buf_xen/cddl/contrib/opensolaris/cmd/ztest/ztest.c
  user/kmacy/releng_8_fcs_buf_xen/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h
  user/kmacy/releng_8_fcs_buf_xen/sys/amd64/amd64/minidump_machdep.c
  user/kmacy/releng_8_fcs_buf_xen/sys/amd64/amd64/pmap.c
  user/kmacy/releng_8_fcs_buf_xen/sys/amd64/amd64/uma_machdep.c
  user/kmacy/releng_8_fcs_buf_xen/sys/amd64/include/md_var.h
  user/kmacy/releng_8_fcs_buf_xen/sys/amd64/include/vmparam.h
  user/kmacy/releng_8_fcs_buf_xen/sys/cddl/compat/opensolaris/sys/kmem.h
  user/kmacy/releng_8_fcs_buf_xen/sys/cddl/compat/opensolaris/sys/mutex.h
  user/kmacy/releng_8_fcs_buf_xen/sys/cddl/compat/opensolaris/sys/rwlock.h
  user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
  user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c
  user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_zfetch.c
  user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
  user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h
  user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h
  user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h
  user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h
  user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_vfsops.h
  user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil.h
  user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil_impl.h
  user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c
  user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_acl.c
  user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_dir.c
  user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_fuid.c
  user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_log.c
  user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c
  user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
  user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c
  user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c
  user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c
  user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c
  user/kmacy/releng_8_fcs_buf_xen/sys/compat/freebsd32/freebsd32_misc.c
  user/kmacy/releng_8_fcs_buf_xen/sys/conf/files
  user/kmacy/releng_8_fcs_buf_xen/sys/conf/files.amd64
  user/kmacy/releng_8_fcs_buf_xen/sys/conf/files.i386
  user/kmacy/releng_8_fcs_buf_xen/sys/conf/kern.pre.mk
  user/kmacy/releng_8_fcs_buf_xen/sys/conf/options
  user/kmacy/releng_8_fcs_buf_xen/sys/kern/kern_resource.c
  user/kmacy/releng_8_fcs_buf_xen/sys/kern/subr_witness.c
  user/kmacy/releng_8_fcs_buf_xen/sys/kern/uipc_sockbuf.c
  user/kmacy/releng_8_fcs_buf_xen/sys/kern/uipc_socket.c
  user/kmacy/releng_8_fcs_buf_xen/sys/kern/uipc_syscalls.c
  user/kmacy/releng_8_fcs_buf_xen/sys/kern/vfs_bio.c
  user/kmacy/releng_8_fcs_buf_xen/sys/kern/vfs_subr.c
  user/kmacy/releng_8_fcs_buf_xen/sys/modules/zfs/Makefile
  user/kmacy/releng_8_fcs_buf_xen/sys/netinet/in_pcb.c
  user/kmacy/releng_8_fcs_buf_xen/sys/netinet/in_pcb.h
  user/kmacy/releng_8_fcs_buf_xen/sys/netinet/ip_output.c
  user/kmacy/releng_8_fcs_buf_xen/sys/netinet/tcp_input.c
  user/kmacy/releng_8_fcs_buf_xen/sys/netinet/tcp_usrreq.c
  user/kmacy/releng_8_fcs_buf_xen/sys/sys/buf.h
  user/kmacy/releng_8_fcs_buf_xen/sys/sys/file.h
  user/kmacy/releng_8_fcs_buf_xen/sys/sys/malloc.h
  user/kmacy/releng_8_fcs_buf_xen/sys/sys/param.h
  user/kmacy/releng_8_fcs_buf_xen/sys/sys/sockbuf.h
  user/kmacy/releng_8_fcs_buf_xen/sys/sys/socket.h
  user/kmacy/releng_8_fcs_buf_xen/sys/sys/socketvar.h
  user/kmacy/releng_8_fcs_buf_xen/sys/sys/sockstate.h
  user/kmacy/releng_8_fcs_buf_xen/sys/sys/syscallsubr.h
  user/kmacy/releng_8_fcs_buf_xen/sys/vm/pmap.h
  user/kmacy/releng_8_fcs_buf_xen/sys/vm/uma.h
  user/kmacy/releng_8_fcs_buf_xen/sys/vm/uma_core.c
  user/kmacy/releng_8_fcs_buf_xen/sys/vm/vm.h
  user/kmacy/releng_8_fcs_buf_xen/sys/vm/vm_contig.c
  user/kmacy/releng_8_fcs_buf_xen/sys/vm/vm_glue.c
  user/kmacy/releng_8_fcs_buf_xen/sys/vm/vm_kern.c
  user/kmacy/releng_8_fcs_buf_xen/sys/vm/vm_page.c
  user/kmacy/releng_8_fcs_buf_xen/sys/vm/vnode_pager.c

Modified: user/kmacy/releng_8_fcs_buf_xen/cddl/contrib/opensolaris/cmd/ztest/ztest.c
==============================================================================
--- user/kmacy/releng_8_fcs_buf_xen/cddl/contrib/opensolaris/cmd/ztest/ztest.c	Wed Nov 25 02:00:09 2009	(r199775)
+++ user/kmacy/releng_8_fcs_buf_xen/cddl/contrib/opensolaris/cmd/ztest/ztest.c	Wed Nov 25 02:10:07 2009	(r199776)
@@ -1304,7 +1304,7 @@ ztest_dmu_objset_create_destroy(ztest_ar
 	if (ztest_random(2) == 0 &&
 	    dmu_objset_open(name, DMU_OST_OTHER, DS_MODE_OWNER, &os) == 0) {
 		zr.zr_os = os;
-		zil_replay(os, &zr, &zr.zr_assign, ztest_replay_vector, NULL);
+		zil_replay(os, &zr, ztest_replay_vector);
 		dmu_objset_close(os);
 	}
 
@@ -3321,8 +3321,7 @@ ztest_run(char *pool)
 			if (test_future)
 				ztest_dmu_check_future_leak(&za[t]);
 			zr.zr_os = za[d].za_os;
-			zil_replay(zr.zr_os, &zr, &zr.zr_assign,
-			    ztest_replay_vector, NULL);
+			zil_replay(zr.zr_os, &zr, ztest_replay_vector);
 			za[d].za_zilog = zil_open(za[d].za_os, NULL);
 		}
 

Modified: user/kmacy/releng_8_fcs_buf_xen/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h
==============================================================================
--- user/kmacy/releng_8_fcs_buf_xen/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h	Wed Nov 25 02:00:09 2009	(r199775)
+++ user/kmacy/releng_8_fcs_buf_xen/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h	Wed Nov 25 02:10:07 2009	(r199776)
@@ -305,6 +305,8 @@ extern void cv_broadcast(kcondvar_t *cv)
 #define	KM_PUSHPAGE		KM_SLEEP
 #define	KM_NOSLEEP		UMEM_DEFAULT
 #define	KMC_NODEBUG		UMC_NODEBUG
+#define	KM_NODEBUG		KMC_NODEBUG
+
 #define	kmem_alloc(_s, _f)	umem_alloc(_s, _f)
 #define	kmem_zalloc(_s, _f)	umem_zalloc(_s, _f)
 #define	kmem_free(_b, _s)	umem_free(_b, _s)

Modified: user/kmacy/releng_8_fcs_buf_xen/sys/amd64/amd64/minidump_machdep.c
==============================================================================
--- user/kmacy/releng_8_fcs_buf_xen/sys/amd64/amd64/minidump_machdep.c	Wed Nov 25 02:00:09 2009	(r199775)
+++ user/kmacy/releng_8_fcs_buf_xen/sys/amd64/amd64/minidump_machdep.c	Wed Nov 25 02:10:07 2009	(r199776)
@@ -56,6 +56,7 @@ CTASSERT(sizeof(struct kerneldumpheader)
 extern uint64_t KPDPphys;
 
 uint64_t *vm_page_dump;
+uint64_t *vm_page_dump_exclude;
 int vm_page_dump_size;
 
 static struct kerneldumpheader kdh;
@@ -71,10 +72,16 @@ CTASSERT(sizeof(*vm_page_dump) == 8);
 static int
 is_dumpable(vm_paddr_t pa)
 {
-	int i;
+	int i, idx, bit, isdata;
+	uint64_t pfn = pa;
+
+	pfn >>= PAGE_SHIFT;
+	idx = pfn >> 6;		/* 2^6 = 64 */
+	bit = pfn & 63;
+	isdata = ((vm_page_dump_exclude[idx] & (1ul << bit)) == 0);
 
 	for (i = 0; dump_avail[i] != 0 || dump_avail[i + 1] != 0; i += 2) {
-		if (pa >= dump_avail[i] && pa < dump_avail[i + 1])
+		if (pa >= dump_avail[i] && pa < dump_avail[i + 1] && isdata)
 			return (1);
 	}
 	return (0);
@@ -226,6 +233,7 @@ minidumpsys(struct dumperinfo *di)
 	dumpsize = ptesize;
 	dumpsize += round_page(msgbufp->msg_size);
 	dumpsize += round_page(vm_page_dump_size);
+	printf("dumpsize: ");
 	for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) {
 		bits = vm_page_dump[i];
 		while (bits) {
@@ -238,10 +246,13 @@ minidumpsys(struct dumperinfo *di)
 				dump_drop_page(pa);
 			}
 			bits &= ~(1ul << bit);
+			if ((dumpsize % (1<<29)) == 0)
+				printf("%ldMB ", (dumpsize>>20));
 		}
 	}
 	dumpsize += PAGE_SIZE;
 
+	printf("\n");
 	/* Determine dump offset on device. */
 	if (di->mediasize < SIZEOF_METADATA + dumpsize + sizeof(kdh) * 2) {
 		error = ENOSPC;
@@ -273,6 +284,7 @@ minidumpsys(struct dumperinfo *di)
 		goto fail;
 	dumplo += sizeof(kdh);
 
+	printf("write header\n");
 	/* Dump my header */
 	bzero(&fakept, sizeof(fakept));
 	bcopy(&mdhdr, &fakept, sizeof(mdhdr));
@@ -280,16 +292,19 @@ minidumpsys(struct dumperinfo *di)
 	if (error)
 		goto fail;
 
+	printf("write msgbuf\n");
 	/* Dump msgbuf up front */
 	error = blk_write(di, (char *)msgbufp->msg_ptr, 0, round_page(msgbufp->msg_size));
 	if (error)
 		goto fail;
 
+	printf("write bitmap\n");
 	/* Dump bitmap */
 	error = blk_write(di, (char *)vm_page_dump, 0, round_page(vm_page_dump_size));
 	if (error)
 		goto fail;
 
+	printf("\nDump kernel page table pages\n");
 	/* Dump kernel page table pages */
 	pdp = (uint64_t *)PHYS_TO_DMAP(KPDPphys);
 	for (va = VM_MIN_KERNEL_ADDRESS; va < MAX(KERNBASE + NKPT * NBPDR,
@@ -343,8 +358,10 @@ minidumpsys(struct dumperinfo *di)
 
 	/* Dump memory chunks */
 	/* XXX cluster it up and use blk_dump() */
-	for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) {
-		bits = vm_page_dump[i];
+	printf("\nclustering memory chunks\n");
+	for (i = 0;
+	     i < vm_page_dump_size / sizeof(*vm_page_dump); i++) {
+		bits = vm_page_dump[i] & ~(vm_page_dump_exclude[i]);
 		while (bits) {
 			bit = bsfq(bits);
 			pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE;
@@ -354,7 +371,6 @@ minidumpsys(struct dumperinfo *di)
 			bits &= ~(1ul << bit);
 		}
 	}
-
 	error = blk_flush(di);
 	if (error)
 		goto fail;
@@ -365,6 +381,7 @@ minidumpsys(struct dumperinfo *di)
 		goto fail;
 	dumplo += sizeof(kdh);
 
+	printf("\nstarting dump\n");
 	/* Signal completion, signoff and exit stage left. */
 	dump_write(di, NULL, 0, 0, 0);
 	printf("\nDump complete\n");
@@ -403,3 +420,25 @@ dump_drop_page(vm_paddr_t pa)
 	bit = pa & 63;
 	atomic_clear_long(&vm_page_dump[idx], 1ul << bit);
 }
+
+void
+dump_exclude_page(vm_paddr_t pa)
+{
+	int idx, bit;
+
+	pa >>= PAGE_SHIFT;
+	idx = pa >> 6;		/* 2^6 = 64 */
+	bit = pa & 63;
+	atomic_set_long(&vm_page_dump_exclude[idx], 1ul << bit);
+}
+
+void
+dump_unexclude_page(vm_paddr_t pa)
+{
+	int idx, bit;
+
+	pa >>= PAGE_SHIFT;
+	idx = pa >> 6;		/* 2^6 = 64 */
+	bit = pa & 63;
+	atomic_clear_long(&vm_page_dump_exclude[idx], 1ul << bit);
+}

Modified: user/kmacy/releng_8_fcs_buf_xen/sys/amd64/amd64/pmap.c
==============================================================================
--- user/kmacy/releng_8_fcs_buf_xen/sys/amd64/amd64/pmap.c	Wed Nov 25 02:00:09 2009	(r199775)
+++ user/kmacy/releng_8_fcs_buf_xen/sys/amd64/amd64/pmap.c	Wed Nov 25 02:10:07 2009	(r199776)
@@ -1137,10 +1137,16 @@ pmap_map(vm_offset_t *virt, vm_paddr_t s
  * Note: SMP coherent.  Uses a ranged shootdown IPI.
  */
 void
-pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count)
+pmap_qenter_prot(vm_offset_t sva, vm_page_t *ma, int count, vm_prot_t prot)
 {
 	pt_entry_t *endpte, oldpte, *pte;
+	uint64_t flags = PG_V;
 
+	if (prot & VM_PROT_WRITE)
+		flags |= PG_RW;
+	if ((prot & VM_PROT_EXECUTE) == 0)
+		flags |= PG_NX;
+	
 	oldpte = 0;
 	pte = vtopte(sva);
 	endpte = pte + count;
@@ -1148,6 +1154,9 @@ pmap_qenter(vm_offset_t sva, vm_page_t *
 		oldpte |= *pte;
 		pte_store(pte, VM_PAGE_TO_PHYS(*ma) | PG_G |
 		    pmap_cache_bits((*ma)->md.pat_mode, 0) | PG_RW | PG_V);
+		pte_store(pte, VM_PAGE_TO_PHYS(*ma) | PG_G | flags);
+		if (prot & VM_PROT_EXCLUDE)
+			dump_exclude_page(VM_PAGE_TO_PHYS(*ma));
 		pte++;
 		ma++;
 	}
@@ -1156,6 +1165,16 @@ pmap_qenter(vm_offset_t sva, vm_page_t *
 		    PAGE_SIZE);
 }
 
+void
+pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count)
+{
+
+	pmap_qenter_prot(sva, ma, count,
+	    VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
+
+}
+
+
 /*
  * This routine tears out page mappings from the
  * kernel -- it is meant only for temporary mappings.
@@ -1168,6 +1187,7 @@ pmap_qremove(vm_offset_t sva, int count)
 
 	va = sva;
 	while (count-- > 0) {
+		dump_unexclude_page(pmap_kextract(va));
 		pmap_kremove(va);
 		va += PAGE_SIZE;
 	}

Modified: user/kmacy/releng_8_fcs_buf_xen/sys/amd64/amd64/uma_machdep.c
==============================================================================
--- user/kmacy/releng_8_fcs_buf_xen/sys/amd64/amd64/uma_machdep.c	Wed Nov 25 02:00:09 2009	(r199775)
+++ user/kmacy/releng_8_fcs_buf_xen/sys/amd64/amd64/uma_machdep.c	Wed Nov 25 02:10:07 2009	(r199776)
@@ -66,7 +66,8 @@ uma_small_alloc(uma_zone_t zone, int byt
 			break;
 	}
 	pa = m->phys_addr;
-	dump_add_page(pa);
+	if ((wait & M_NODUMP) == 0)
+		dump_add_page(pa);
 	va = (void *)PHYS_TO_DMAP(pa);
 	if ((wait & M_ZERO) && (m->flags & PG_ZERO) == 0)
 		pagezero(va);

Modified: user/kmacy/releng_8_fcs_buf_xen/sys/amd64/include/md_var.h
==============================================================================
--- user/kmacy/releng_8_fcs_buf_xen/sys/amd64/include/md_var.h	Wed Nov 25 02:00:09 2009	(r199775)
+++ user/kmacy/releng_8_fcs_buf_xen/sys/amd64/include/md_var.h	Wed Nov 25 02:10:07 2009	(r199776)
@@ -60,6 +60,7 @@ extern	char	kstack[];
 extern	char	sigcode[];
 extern	int	szsigcode;
 extern	uint64_t *vm_page_dump;
+extern	uint64_t *vm_page_dump_exclude;
 extern	int	vm_page_dump_size;
 extern	int	_udatasel;
 extern	int	_ucodesel;
@@ -88,6 +89,8 @@ void	fs_load_fault(void) __asm(__STRING(
 void	gs_load_fault(void) __asm(__STRING(gs_load_fault));
 void	dump_add_page(vm_paddr_t);
 void	dump_drop_page(vm_paddr_t);
+void	dump_exclude_page(vm_paddr_t);
+void	dump_unexclude_page(vm_paddr_t);
 void	initializecpu(void);
 void	initializecpucache(void);
 void	fillw(int /*u_short*/ pat, void *base, size_t cnt);

Modified: user/kmacy/releng_8_fcs_buf_xen/sys/amd64/include/vmparam.h
==============================================================================
--- user/kmacy/releng_8_fcs_buf_xen/sys/amd64/include/vmparam.h	Wed Nov 25 02:00:09 2009	(r199775)
+++ user/kmacy/releng_8_fcs_buf_xen/sys/amd64/include/vmparam.h	Wed Nov 25 02:10:07 2009	(r199776)
@@ -88,6 +88,11 @@
 #define	UMA_MD_SMALL_ALLOC
 
 /*
+ * We machine specific sparse kernel dump
+ */
+#define	VM_MD_MINIDUMP
+
+/*
  * The physical address space is densely populated.
  */
 #define	VM_PHYSSEG_DENSE

Modified: user/kmacy/releng_8_fcs_buf_xen/sys/cddl/compat/opensolaris/sys/kmem.h
==============================================================================
--- user/kmacy/releng_8_fcs_buf_xen/sys/cddl/compat/opensolaris/sys/kmem.h	Wed Nov 25 02:00:09 2009	(r199775)
+++ user/kmacy/releng_8_fcs_buf_xen/sys/cddl/compat/opensolaris/sys/kmem.h	Wed Nov 25 02:10:07 2009	(r199776)
@@ -40,7 +40,8 @@
 #define	KM_SLEEP		M_WAITOK
 #define	KM_PUSHPAGE		M_WAITOK
 #define	KM_NOSLEEP		M_NOWAIT
-#define	KMC_NODEBUG		0
+#define	KMC_NODEBUG		UMA_ZONE_NODUMP
+#define	KM_NODEBUG		M_NODUMP
 
 typedef struct kmem_cache {
 	char		kc_name[32];

Modified: user/kmacy/releng_8_fcs_buf_xen/sys/cddl/compat/opensolaris/sys/mutex.h
==============================================================================
--- user/kmacy/releng_8_fcs_buf_xen/sys/cddl/compat/opensolaris/sys/mutex.h	Wed Nov 25 02:00:09 2009	(r199775)
+++ user/kmacy/releng_8_fcs_buf_xen/sys/cddl/compat/opensolaris/sys/mutex.h	Wed Nov 25 02:10:07 2009	(r199776)
@@ -46,11 +46,7 @@ typedef enum {
 
 typedef struct sx	kmutex_t;
 
-#ifndef DEBUG
-#define	MUTEX_FLAGS	(SX_DUPOK | SX_NOWITNESS)
-#else
 #define	MUTEX_FLAGS	(SX_DUPOK)
-#endif
 
 #define	mutex_init(lock, desc, type, arg)	do {			\
 	const char *_name;						\

Modified: user/kmacy/releng_8_fcs_buf_xen/sys/cddl/compat/opensolaris/sys/rwlock.h
==============================================================================
--- user/kmacy/releng_8_fcs_buf_xen/sys/cddl/compat/opensolaris/sys/rwlock.h	Wed Nov 25 02:00:09 2009	(r199775)
+++ user/kmacy/releng_8_fcs_buf_xen/sys/cddl/compat/opensolaris/sys/rwlock.h	Wed Nov 25 02:10:07 2009	(r199776)
@@ -48,11 +48,7 @@ typedef enum {
 
 typedef	struct sx	krwlock_t;
 
-#ifndef DEBUG
-#define	RW_FLAGS	(SX_DUPOK | SX_NOWITNESS)
-#else
 #define	RW_FLAGS	(SX_DUPOK)
-#endif
 
 #define	RW_READ_HELD(x)		(rw_read_held((x)))
 #define	RW_WRITE_HELD(x)	(rw_write_held((x)))

Modified: user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
==============================================================================
--- user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c	Wed Nov 25 02:00:09 2009	(r199775)
+++ user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c	Wed Nov 25 02:10:07 2009	(r199776)
@@ -127,6 +127,7 @@
 #ifdef _KERNEL
 #include <sys/dnlc.h>
 #endif
+#include <sys/ktr.h>
 #include <sys/callb.h>
 #include <sys/kstat.h>
 #include <sys/sdt.h>
@@ -186,6 +187,16 @@ SYSCTL_QUAD(_vfs_zfs, OID_AUTO, arc_min,
 SYSCTL_INT(_vfs_zfs, OID_AUTO, mdcomp_disable, CTLFLAG_RDTUN,
     &zfs_mdcomp_disable, 0, "Disable metadata compression");
 
+static int zfs_page_cache_disable = 0;
+TUNABLE_INT("vfs.zfs.page_cache_disable", &zfs_page_cache_disable);
+SYSCTL_INT(_vfs_zfs, OID_AUTO, page_cache_disable, CTLFLAG_RDTUN,
+    &zfs_page_cache_disable, 0, "Disable backing ARC with page cache ");
+
+#ifdef ZIO_USE_UMA
+extern kmem_cache_t	*zio_buf_cache[];
+extern kmem_cache_t	*zio_data_buf_cache[];
+#endif
+
 /*
  * Note that buffers can be in one of 6 states:
  *	ARC_anon	- anonymous (discussed below)
@@ -218,13 +229,31 @@ SYSCTL_INT(_vfs_zfs, OID_AUTO, mdcomp_di
  * second level ARC benefit from these fast lookups.
  */
 
+#define	ARCS_LOCK_PAD		128
+struct arcs_lock {
+	kmutex_t	arcs_lock;
+#ifdef _KERNEL
+	unsigned char	pad[(ARCS_LOCK_PAD - sizeof (kmutex_t))];
+#endif
+};
+
+/*
+ * must be power of two for mask use to work
+ *
+ */
+#define ARC_BUFC_NUMDATALISTS		16
+#define ARC_BUFC_NUMMETADATALISTS	16
+#define ARC_BUFC_NUMLISTS	(ARC_BUFC_NUMMETADATALISTS+ARC_BUFC_NUMDATALISTS)
+
 typedef struct arc_state {
-	list_t	arcs_list[ARC_BUFC_NUMTYPES];	/* list of evictable buffers */
 	uint64_t arcs_lsize[ARC_BUFC_NUMTYPES];	/* amount of evictable data */
 	uint64_t arcs_size;	/* total amount of data in this state */
-	kmutex_t arcs_mtx;
+	list_t	arcs_lists[ARC_BUFC_NUMLISTS]; /* list of evictable buffers */
+	struct arcs_lock arcs_locks[ARC_BUFC_NUMLISTS] __aligned(128);
 } arc_state_t;
 
+#define ARCS_LOCK(s, i) &((s)->arcs_locks[(i)].arcs_lock)
+
 /* The 6 states: */
 static arc_state_t ARC_anon;
 static arc_state_t ARC_mru;
@@ -235,6 +264,7 @@ static arc_state_t ARC_l2c_only;
 
 typedef struct arc_stats {
 	kstat_named_t arcstat_hits;
+	kstat_named_t arcstat_page_cache_hits;
 	kstat_named_t arcstat_misses;
 	kstat_named_t arcstat_demand_data_hits;
 	kstat_named_t arcstat_demand_data_misses;
@@ -284,6 +314,7 @@ typedef struct arc_stats {
 
 static arc_stats_t arc_stats = {
 	{ "hits",			KSTAT_DATA_UINT64 },
+	{ "page_cache_hits",		KSTAT_DATA_UINT64 },
 	{ "misses",			KSTAT_DATA_UINT64 },
 	{ "demand_data_hits",		KSTAT_DATA_UINT64 },
 	{ "demand_data_misses",		KSTAT_DATA_UINT64 },
@@ -489,6 +520,7 @@ static void arc_evict_ghost(arc_state_t 
 #define	ARC_L2_EVICTED		(1 << 17)	/* evicted during I/O */
 #define	ARC_L2_WRITE_HEAD	(1 << 18)	/* head of write list */
 #define	ARC_STORED		(1 << 19)	/* has been store()d to */
+#define	ARC_BUF_CLONING		(1 << 21)	/* is being cloned */
 
 #define	HDR_IN_HASH_TABLE(hdr)	((hdr)->b_flags & ARC_IN_HASH_TABLE)
 #define	HDR_IO_IN_PROGRESS(hdr)	((hdr)->b_flags & ARC_IO_IN_PROGRESS)
@@ -609,9 +641,10 @@ struct l2arc_buf_hdr {
 
 typedef struct l2arc_data_free {
 	/* protected by l2arc_free_on_write_mtx */
+	arc_buf_t	*l2df_buf;
 	void		*l2df_data;
 	size_t		l2df_size;
-	void		(*l2df_func)(void *, size_t);
+	void		(*l2df_func)(arc_buf_t *, void *, size_t);
 	list_node_t	l2df_list_node;
 } l2arc_data_free_t;
 
@@ -953,20 +986,42 @@ arc_buf_freeze(arc_buf_t *buf)
 }
 
 static void
+get_buf_info(arc_buf_hdr_t *ab, arc_state_t *state, list_t **list, kmutex_t **lock)
+{
+	uint64_t buf_hashid = buf_hash(ab->b_spa, &ab->b_dva, ab->b_birth);
+
+	if (ab->b_type == ARC_BUFC_METADATA) 
+		buf_hashid &= (ARC_BUFC_NUMMETADATALISTS-1);
+	else {
+		buf_hashid &= (ARC_BUFC_NUMDATALISTS-1);
+		buf_hashid += ARC_BUFC_NUMMETADATALISTS;
+	}
+
+	*list = &state->arcs_lists[buf_hashid];
+	*lock = ARCS_LOCK(state, buf_hashid);
+}
+
+
+static void
 add_reference(arc_buf_hdr_t *ab, kmutex_t *hash_lock, void *tag)
 {
+
 	ASSERT(MUTEX_HELD(hash_lock));
 
 	if ((refcount_add(&ab->b_refcnt, tag) == 1) &&
 	    (ab->b_state != arc_anon)) {
+		list_t *list;
+		kmutex_t *lock;
 		uint64_t delta = ab->b_size * ab->b_datacnt;
-		list_t *list = &ab->b_state->arcs_list[ab->b_type];
 		uint64_t *size = &ab->b_state->arcs_lsize[ab->b_type];
 
-		ASSERT(!MUTEX_HELD(&ab->b_state->arcs_mtx));
-		mutex_enter(&ab->b_state->arcs_mtx);
+		get_buf_info(ab, ab->b_state, &list, &lock);
+		ASSERT(!MUTEX_HELD(lock));
+		mutex_enter(lock);
 		ASSERT(list_link_active(&ab->b_arc_node));
 		list_remove(list, ab);
+		mutex_exit(lock);
+
 		if (GHOST_STATE(ab->b_state)) {
 			ASSERT3U(ab->b_datacnt, ==, 0);
 			ASSERT3P(ab->b_buf, ==, NULL);
@@ -975,7 +1030,6 @@ add_reference(arc_buf_hdr_t *ab, kmutex_
 		ASSERT(delta > 0);
 		ASSERT3U(*size, >=, delta);
 		atomic_add_64(size, -delta);
-		mutex_exit(&ab->b_state->arcs_mtx);
 		/* remove the prefetch flag if we get a reference */
 		if (ab->b_flags & ARC_PREFETCH)
 			ab->b_flags &= ~ARC_PREFETCH;
@@ -994,14 +1048,19 @@ remove_reference(arc_buf_hdr_t *ab, kmut
 	if (((cnt = refcount_remove(&ab->b_refcnt, tag)) == 0) &&
 	    (state != arc_anon)) {
 		uint64_t *size = &state->arcs_lsize[ab->b_type];
+		list_t *list;
+		kmutex_t *lock;
 
-		ASSERT(!MUTEX_HELD(&state->arcs_mtx));
-		mutex_enter(&state->arcs_mtx);
+		get_buf_info(ab, state, &list, &lock);
+		
+		ASSERT(!MUTEX_HELD(lock));
+		mutex_enter(lock);
 		ASSERT(!list_link_active(&ab->b_arc_node));
-		list_insert_head(&state->arcs_list[ab->b_type], ab);
+		list_insert_head(list, ab);
+		mutex_exit(lock);
+
 		ASSERT(ab->b_datacnt > 0);
 		atomic_add_64(size, ab->b_size * ab->b_datacnt);
-		mutex_exit(&state->arcs_mtx);
 	}
 	return (cnt);
 }
@@ -1016,6 +1075,8 @@ arc_change_state(arc_state_t *new_state,
 	arc_state_t *old_state = ab->b_state;
 	int64_t refcnt = refcount_count(&ab->b_refcnt);
 	uint64_t from_delta, to_delta;
+	list_t *list;
+	kmutex_t *lock;
 
 	ASSERT(MUTEX_HELD(hash_lock));
 	ASSERT(new_state != old_state);
@@ -1030,14 +1091,17 @@ arc_change_state(arc_state_t *new_state,
 	 */
 	if (refcnt == 0) {
 		if (old_state != arc_anon) {
-			int use_mutex = !MUTEX_HELD(&old_state->arcs_mtx);
+			int use_mutex;
 			uint64_t *size = &old_state->arcs_lsize[ab->b_type];
 
+			get_buf_info(ab, old_state, &list, &lock);
+			use_mutex = !MUTEX_HELD(lock);
+
 			if (use_mutex)
-				mutex_enter(&old_state->arcs_mtx);
+				mutex_enter(lock);
 
 			ASSERT(list_link_active(&ab->b_arc_node));
-			list_remove(&old_state->arcs_list[ab->b_type], ab);
+			list_remove(list, ab);
 
 			/*
 			 * If prefetching out of the ghost cache,
@@ -1052,16 +1116,20 @@ arc_change_state(arc_state_t *new_state,
 			atomic_add_64(size, -from_delta);
 
 			if (use_mutex)
-				mutex_exit(&old_state->arcs_mtx);
+				mutex_exit(lock);
 		}
 		if (new_state != arc_anon) {
-			int use_mutex = !MUTEX_HELD(&new_state->arcs_mtx);
+			int use_mutex; 
 			uint64_t *size = &new_state->arcs_lsize[ab->b_type];
 
+			get_buf_info(ab, new_state, &list, &lock);
+			use_mutex = !MUTEX_HELD(lock);
+			
+			
 			if (use_mutex)
-				mutex_enter(&new_state->arcs_mtx);
+				mutex_enter(lock);
 
-			list_insert_head(&new_state->arcs_list[ab->b_type], ab);
+			list_insert_head(list, ab);
 
 			/* ghost elements have a ghost size */
 			if (GHOST_STATE(new_state)) {
@@ -1072,7 +1140,7 @@ arc_change_state(arc_state_t *new_state,
 			atomic_add_64(size, to_delta);
 
 			if (use_mutex)
-				mutex_exit(&new_state->arcs_mtx);
+				mutex_exit(lock);
 		}
 	}
 
@@ -1132,8 +1200,9 @@ arc_data_buf_free(void *buf, uint64_t si
 	atomic_add_64(&arc_size, -size);
 }
 
-arc_buf_t *
-arc_buf_alloc(spa_t *spa, int size, void *tag, arc_buf_contents_t type)
+static arc_buf_t *
+_arc_buf_alloc(spa_t *spa, int size, void *tag, arc_buf_contents_t type,
+	blkptr_t *bp)
 {
 	arc_buf_hdr_t *hdr;
 	arc_buf_t *buf;
@@ -1143,6 +1212,14 @@ arc_buf_alloc(spa_t *spa, int size, void
 	ASSERT(BUF_EMPTY(hdr));
 	hdr->b_size = size;
 	hdr->b_type = type;
+	if (bp != NULL) {
+		hdr->b_dva = *BP_IDENTITY(bp);
+		hdr->b_birth = bp->blk_birth;
+	} else {
+		hdr->b_dva.dva_word[0] = 0;
+		hdr->b_dva.dva_word[1] = 0;
+		hdr->b_birth = 0;
+	}
 	hdr->b_spa = spa;
 	hdr->b_state = arc_anon;
 	hdr->b_arc_access = 0;
@@ -1162,6 +1239,13 @@ arc_buf_alloc(spa_t *spa, int size, void
 	return (buf);
 }
 
+arc_buf_t *
+arc_buf_alloc(spa_t *spa, int size, void *tag, arc_buf_contents_t type)
+{
+
+	return (_arc_buf_alloc(spa, size, tag, type, NULL));
+}
+
 static arc_buf_t *
 arc_buf_clone(arc_buf_t *from)
 {
@@ -1176,6 +1260,7 @@ arc_buf_clone(arc_buf_t *from)
 	buf->b_private = NULL;
 	buf->b_next = hdr->b_buf;
 	hdr->b_buf = buf;
+	hdr->b_flags |= ARC_BUF_CLONING;
 	arc_get_data_buf(buf);
 	bcopy(from->b_data, buf->b_data, size);
 	hdr->b_datacnt += 1;
@@ -1214,17 +1299,242 @@ arc_buf_add_ref(arc_buf_t *buf, void* ta
 	    data, metadata, hits);
 }
 
+void
+arc_binval(spa_t *spa, dva_t *dva, uint64_t size)
+{
+	uint64_t blkno, blkno_lookup;
+	struct vnode *vp;
+	struct bufobj *bo;
+	struct buf *bp;
+	vm_pindex_t start, end;
+	vm_object_t object;
+	vm_page_t m;
+	int i;
+
+	if (zfs_page_cache_disable)
+		return;
+
+	if (dva == NULL || spa == NULL || blkno == 0 || size == 0)
+		return;
+
+	blkno_lookup = blkno = dva->dva_word[1] & ~(1UL<<63);
+	vp = spa_get_vnode(spa);
+	bo = &vp->v_bufobj;
+
+	BO_LOCK(bo);
+retry:
+	bp = gbincore(bo, blkno_lookup);
+	if (bp != NULL) {
+		BUF_LOCK(bp, LK_EXCLUSIVE | LK_INTERLOCK, BO_MTX(bo));
+		bremfree(bp);
+		bp->b_flags |= B_INVAL;
+		bp->b_birth = 0;
+		brelse(bp);
+	} else if (blkno_lookup & 0x7) {
+		blkno_lookup &= ~0x7;
+		goto retry;
+	} else
+		BO_UNLOCK(bo);
+
+	start = OFF_TO_IDX((blkno_lookup << 9));
+	end = start + OFF_TO_IDX(size + PAGE_MASK);
+	object = vp->v_object;
+
+	VM_OBJECT_LOCK(object);
+	vm_page_cache_free(object, start, end);
+	vm_object_page_remove(object, start, end, FALSE);
+#ifdef INVARIANTS
+	for (i = 0; i < OFF_TO_IDX(size); i++) {
+		KASSERT(vm_page_lookup(object, start + i) == NULL,
+		    ("found page at %ld blkno %ld blkno_lookup %ld",
+			start + i, blkno, blkno_lookup));
+	}
+#endif	
+	VM_OBJECT_UNLOCK(object);
+}
+
+static void
+arc_pcache(struct vnode *vp, struct buf *bp, uint64_t blkno)
+{
+	vm_pindex_t start = OFF_TO_IDX((blkno << 9));
+	vm_object_t object = vp->v_object;
+	struct bufobj *bo = &vp->v_bufobj;
+	vm_page_t m;
+	int i;
+
+	BO_LOCK(bo);
+	bgetvp(vp, bp);
+	BO_UNLOCK(bo);
+
+	VM_OBJECT_LOCK(object);
+	for (i = 0; i < bp->b_npages; i++) {
+		m = bp->b_pages[i];
+		vm_page_insert(m, object, start + i);
+	}
+	VM_OBJECT_UNLOCK(object);
+	bp->b_flags |= B_VMIO;
+}
+
+static void
+arc_bcache(arc_buf_t *buf)
+{	
+	uint64_t blkno = buf->b_hdr->b_dva.dva_word[1] & ~(1UL<<63);
+	struct buf *newbp, *bp = buf->b_bp;
+	struct vnode *vp = spa_get_vnode(buf->b_hdr->b_spa);
+	struct bufobj *bo = &vp->v_bufobj;
+	arc_buf_hdr_t *hdr = buf->b_hdr;
+	int cachebuf;
+
+	if (zfs_page_cache_disable)
+		return;
+
+	if (blkno == 0 || hdr->b_birth == 0)
+		return;
+
+	newbp = buf->b_bp;
+	newbp->b_birth = hdr->b_birth;
+	newbp->b_blkno = newbp->b_lblkno = blkno;
+	newbp->b_offset = (blkno << 9);
+	cachebuf = ((hdr->b_datacnt == 1) &&
+	    !(hdr->b_flags & ARC_IO_ERROR) &&
+	    ((newbp->b_flags & (B_INVAL|B_CACHE)) == B_CACHE) &&
+	    (blkno & 0x7) == 0);
+
+	arc_binval(hdr->b_spa, &hdr->b_dva, hdr->b_size);	
+	if (cachebuf) 
+		arc_pcache(vp, newbp, blkno);
+
+}
+
+static void
+arc_getblk(arc_buf_t *buf)
+{
+	uint64_t		size = buf->b_hdr->b_size;
+	arc_buf_contents_t	type = buf->b_hdr->b_type;
+	spa_t			*spa = buf->b_hdr->b_spa;
+	uint64_t blkno = buf->b_hdr->b_dva.dva_word[1] & ~(1UL<<63);
+	void *data;
+	struct buf *newbp, *bp;
+	arc_buf_t *tbuf;
+	struct vnode *vp;
+	struct bufobj *bo;
+	int i, flags = 0;
+	vm_pindex_t start, end;
+	vm_object_t object;
+
+	if (type == ARC_BUFC_METADATA) {
+		arc_space_consume(size);
+	} else {
+		ASSERT(type == ARC_BUFC_DATA);
+		flags = GB_NODUMP;
+		atomic_add_64(&arc_size, size);
+	}
+
+	vp = spa_get_vnode(spa);
+	bo = &vp->v_bufobj;
+	newbp = NULL;
+
+	if (size < PAGE_SIZE) {
+		data = zio_buf_alloc(size);
+	} else if ((buf->b_hdr->b_flags & ARC_BUF_CLONING) ||
+	    BUF_EMPTY(buf->b_hdr) ||
+	    (blkno == 0)) {
+		newbp = geteblk(size, flags);
+		data = newbp->b_data;
+		buf->b_hdr->b_flags &= ~ARC_BUF_CLONING;
+	} else {
+		newbp = getblk(vp, blkno, size, 0, 0, flags | GB_LOCK_NOWAIT);
+		if (newbp == NULL)
+			newbp = geteblk(size, flags);
+		else {
+			vm_object_t object = vp->v_object;
+			vm_page_t m;
+
+			/*
+			 * Strip the buffers pages from the object
+			 */
+			VM_OBJECT_LOCK(object);
+			vm_page_lock_queues();
+			for (i = 0; i < newbp->b_npages; i++){
+				m = newbp->b_pages[i];
+				vm_page_remove(m);
+			}
+			vm_page_unlock_queues();
+			VM_OBJECT_UNLOCK(object);
+			brelvp(newbp);
+			newbp->b_flags &= ~B_VMIO;
+		}
+		data = newbp->b_data;
+	}
+
+#ifdef LOGALL
+	/*
+	 * not useful for tracking down collisions
+	 *
+	 */
+	CTR2(KTR_SPARE2, "arc_getblk() bp=%p flags %X",
+	    newbp, newbp->b_flags);
+#endif
+
+	if (newbp != NULL) {
+		BUF_KERNPROC(newbp);
+#ifdef INVARIANTS
+		for (i = 0; i < newbp->b_npages; i++)
+			KASSERT(newbp->b_pages[i]->object == NULL,
+			    ("newbp page not removed"));
+#endif	
+	}
+	buf->b_bp = newbp;
+	buf->b_data = data;
+}
+
+void
+arc_brelse(arc_buf_t *buf, void *data, size_t size);
+
+void
+arc_brelse(arc_buf_t *buf, void *data, size_t size)
+{
+	struct buf *bp = buf->b_bp;
+	arc_buf_hdr_t *hdr = buf->b_hdr;
+#ifdef INVARIANTS
+	int i;
+#endif
+	
+	if (bp == NULL) {
+		zio_buf_free(buf->b_data, size);
+		return;
+	}
+#ifdef INVARIANTS
+	for (i = 0; i < bp->b_npages; i++)
+		KASSERT(bp->b_pages[i]->object == NULL,
+		    ("newbp page not removed"));
+#endif	
+	arc_bcache(buf);
+
+
+	if (bp->b_vp == NULL)
+		KASSERT((bp->b_flags & B_VMIO) == 0, ("no vp but VMIO set!"));
+	else
+		CTR4(KTR_SPARE2, "arc_brelse() bp=%p flags %X"
+		    " size %ld blkno=%ld",
+		    bp, bp->b_flags, size, bp->b_blkno);
+
+	bp->b_flags |= B_ZFS;
+	brelse(bp);
+}
+
 /*
  * Free the arc data buffer.  If it is an l2arc write in progress,
  * the buffer is placed on l2arc_free_on_write to be freed later.
  */
 static void
-arc_buf_data_free(arc_buf_hdr_t *hdr, void (*free_func)(void *, size_t),
-    void *data, size_t size)
+arc_buf_data_free(arc_buf_hdr_t *hdr, void (*free_func)(arc_buf_t *, void *, size_t),
+    arc_buf_t *buf, void *data, size_t size)
 {
 	if (HDR_L2_WRITING(hdr)) {
 		l2arc_data_free_t *df;
 		df = kmem_alloc(sizeof (l2arc_data_free_t), KM_SLEEP);
+		df->l2df_buf = buf;
 		df->l2df_data = data;
 		df->l2df_size = size;
 		df->l2df_func = free_func;
@@ -1233,7 +1543,7 @@ arc_buf_data_free(arc_buf_hdr_t *hdr, vo
 		mutex_exit(&l2arc_free_on_write_mtx);
 		ARCSTAT_BUMP(arcstat_l2_free_on_write);
 	} else {
-		free_func(data, size);
+		free_func(buf, data, size);
 	}
 }
 
@@ -1251,13 +1561,13 @@ arc_buf_destroy(arc_buf_t *buf, boolean_
 		arc_cksum_verify(buf);
 		if (!recycle) {
 			if (type == ARC_BUFC_METADATA) {
-				arc_buf_data_free(buf->b_hdr, zio_buf_free,
-				    buf->b_data, size);
+				arc_buf_data_free(buf->b_hdr, arc_brelse,
+				    buf, buf->b_data, size);
 				arc_space_return(size);
 			} else {
 				ASSERT(type == ARC_BUFC_DATA);
-				arc_buf_data_free(buf->b_hdr,
-				    zio_data_buf_free, buf->b_data, size);
+				arc_buf_data_free(buf->b_hdr, arc_brelse,
+				    buf, buf->b_data, size);
 				atomic_add_64(&arc_size, -size);
 			}
 		}
@@ -1462,21 +1772,57 @@ arc_evict(arc_state_t *state, spa_t *spa
 {
 	arc_state_t *evicted_state;
 	uint64_t bytes_evicted = 0, skipped = 0, missed = 0;
+	int64_t bytes_remaining;
 	arc_buf_hdr_t *ab, *ab_prev = NULL;
-	list_t *list = &state->arcs_list[type];
+	list_t *evicted_list, *list, *evicted_list_start, *list_start;
+	kmutex_t *lock, *evicted_lock;
 	kmutex_t *hash_lock;
 	boolean_t have_lock;
 	void *stolen = NULL;
+	static int evict_metadata_offset, evict_data_offset;
+	int i, idx, offset, list_count, count;
 
 	ASSERT(state == arc_mru || state == arc_mfu);
 
 	evicted_state = (state == arc_mru) ? arc_mru_ghost : arc_mfu_ghost;
 
-	mutex_enter(&state->arcs_mtx);
-	mutex_enter(&evicted_state->arcs_mtx);
+	/*
+	 * don't recycle page cache bufs
+	 *
+	 */
+	if (recycle && (bytes >= PAGE_SIZE))
+		recycle = FALSE;
+	if (type == ARC_BUFC_METADATA) {
+		offset = 0;
+		list_count = ARC_BUFC_NUMMETADATALISTS;
+		list_start = &state->arcs_lists[0];
+		evicted_list_start = &evicted_state->arcs_lists[0];
+		idx = evict_metadata_offset;
+	} else {
+		offset = ARC_BUFC_NUMMETADATALISTS;
+
+		list_start = &state->arcs_lists[offset];
+		evicted_list_start = &evicted_state->arcs_lists[offset];
+		list_count = ARC_BUFC_NUMDATALISTS;
+		idx = evict_data_offset;
+	}
+	for (bytes_remaining = 0, i = 0; i < list_count; i++) 
+                bytes_remaining += evicted_state->arcs_lsize[i + offset]; 
+
+	count = 0;
+	
+evict_start:
+	list = &list_start[idx];
+	evicted_list = &evicted_list_start[idx];
+	lock = ARCS_LOCK(state, (offset + idx));
+	evicted_lock = ARCS_LOCK(evicted_state, (offset + idx)); 
+
+	mutex_enter(lock);
+	mutex_enter(evicted_lock);
 
 	for (ab = list_tail(list); ab; ab = ab_prev) {
 		ab_prev = list_prev(list, ab);
+		bytes_remaining -= (ab->b_size * ab->b_datacnt);
 		/* prefetch buffers have a minimum lifespan */
 		if (HDR_IO_IN_PROGRESS(ab) ||
 		    (spa && ab->b_spa != spa) ||
@@ -1536,18 +1882,36 @@ arc_evict(arc_state_t *state, spa_t *spa
 				mutex_exit(hash_lock);
 			if (bytes >= 0 && bytes_evicted >= bytes)
 				break;
+			if (bytes_remaining > 0) {
+				mutex_exit(evicted_lock);
+				mutex_exit(lock);
+				idx  = ((idx + 1)&(list_count-1));
+				count++;
+				goto evict_start;
+			}
 		} else {
 			missed += 1;
 		}
 	}
 
-	mutex_exit(&evicted_state->arcs_mtx);
-	mutex_exit(&state->arcs_mtx);
-
-	if (bytes_evicted < bytes)
-		dprintf("only evicted %lld bytes from %x",
-		    (longlong_t)bytes_evicted, state);
+	mutex_exit(evicted_lock);
+	mutex_exit(lock);
+	
+	idx  = ((idx + 1)&(list_count-1));
+	count++;
 
+	if (bytes_evicted < bytes) {
+		if (count < list_count)
+			goto evict_start;
+		else
+			dprintf("only evicted %lld bytes from %x",
+			    (longlong_t)bytes_evicted, state);
+	}
+	if (type == ARC_BUFC_METADATA) 
+		evict_metadata_offset = idx;
+	else
+		evict_data_offset = idx;
+		
 	if (skipped)
 		ARCSTAT_INCR(arcstat_evict_skip, skipped);
 
@@ -1586,14 +1950,28 @@ static void
 arc_evict_ghost(arc_state_t *state, spa_t *spa, int64_t bytes)
 {
 	arc_buf_hdr_t *ab, *ab_prev;
-	list_t *list = &state->arcs_list[ARC_BUFC_DATA];
-	kmutex_t *hash_lock;
+	list_t *list, *list_start;
+	kmutex_t *hash_lock, *lock;
 	uint64_t bytes_deleted = 0;
 	uint64_t bufs_skipped = 0;
+	static int evict_offset;
+	int list_count, idx = evict_offset;
+	int offset, count = 0;
 
 	ASSERT(GHOST_STATE(state));
-top:
-	mutex_enter(&state->arcs_mtx);
+
+	/*
+	 * data lists come after metadata lists
+	 */
+	list_start = &state->arcs_lists[ARC_BUFC_NUMMETADATALISTS];
+	list_count = ARC_BUFC_NUMDATALISTS;
+	offset = ARC_BUFC_NUMMETADATALISTS;
+	
+evict_start:
+	list = &list_start[idx];
+	lock = ARCS_LOCK(state, idx + offset);
+
+	mutex_enter(lock);

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-user mailing list