svn commit: r199776 - in user/kmacy/releng_8_fcs_buf_xen:
cddl/contrib/opensolaris/cmd/ztest
cddl/contrib/opensolaris/lib/libzpool/common/sys
sys/amd64/amd64 sys/amd64/include sys/cddl/compat/opens...
Kip Macy
kmacy at FreeBSD.org
Wed Nov 25 02:10:08 UTC 2009
Author: kmacy
Date: Wed Nov 25 02:10:07 2009
New Revision: 199776
URL: http://svn.freebsd.org/changeset/base/199776
Log:
merge releng_8_fcs changes
Modified:
user/kmacy/releng_8_fcs_buf_xen/cddl/contrib/opensolaris/cmd/ztest/ztest.c
user/kmacy/releng_8_fcs_buf_xen/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h
user/kmacy/releng_8_fcs_buf_xen/sys/amd64/amd64/minidump_machdep.c
user/kmacy/releng_8_fcs_buf_xen/sys/amd64/amd64/pmap.c
user/kmacy/releng_8_fcs_buf_xen/sys/amd64/amd64/uma_machdep.c
user/kmacy/releng_8_fcs_buf_xen/sys/amd64/include/md_var.h
user/kmacy/releng_8_fcs_buf_xen/sys/amd64/include/vmparam.h
user/kmacy/releng_8_fcs_buf_xen/sys/cddl/compat/opensolaris/sys/kmem.h
user/kmacy/releng_8_fcs_buf_xen/sys/cddl/compat/opensolaris/sys/mutex.h
user/kmacy/releng_8_fcs_buf_xen/sys/cddl/compat/opensolaris/sys/rwlock.h
user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c
user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_zfetch.c
user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h
user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h
user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h
user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h
user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_vfsops.h
user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil.h
user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil_impl.h
user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c
user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_acl.c
user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_dir.c
user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_fuid.c
user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_log.c
user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c
user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c
user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c
user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c
user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c
user/kmacy/releng_8_fcs_buf_xen/sys/compat/freebsd32/freebsd32_misc.c
user/kmacy/releng_8_fcs_buf_xen/sys/conf/files
user/kmacy/releng_8_fcs_buf_xen/sys/conf/files.amd64
user/kmacy/releng_8_fcs_buf_xen/sys/conf/files.i386
user/kmacy/releng_8_fcs_buf_xen/sys/conf/kern.pre.mk
user/kmacy/releng_8_fcs_buf_xen/sys/conf/options
user/kmacy/releng_8_fcs_buf_xen/sys/kern/kern_resource.c
user/kmacy/releng_8_fcs_buf_xen/sys/kern/subr_witness.c
user/kmacy/releng_8_fcs_buf_xen/sys/kern/uipc_sockbuf.c
user/kmacy/releng_8_fcs_buf_xen/sys/kern/uipc_socket.c
user/kmacy/releng_8_fcs_buf_xen/sys/kern/uipc_syscalls.c
user/kmacy/releng_8_fcs_buf_xen/sys/kern/vfs_bio.c
user/kmacy/releng_8_fcs_buf_xen/sys/kern/vfs_subr.c
user/kmacy/releng_8_fcs_buf_xen/sys/modules/zfs/Makefile
user/kmacy/releng_8_fcs_buf_xen/sys/netinet/in_pcb.c
user/kmacy/releng_8_fcs_buf_xen/sys/netinet/in_pcb.h
user/kmacy/releng_8_fcs_buf_xen/sys/netinet/ip_output.c
user/kmacy/releng_8_fcs_buf_xen/sys/netinet/tcp_input.c
user/kmacy/releng_8_fcs_buf_xen/sys/netinet/tcp_usrreq.c
user/kmacy/releng_8_fcs_buf_xen/sys/sys/buf.h
user/kmacy/releng_8_fcs_buf_xen/sys/sys/file.h
user/kmacy/releng_8_fcs_buf_xen/sys/sys/malloc.h
user/kmacy/releng_8_fcs_buf_xen/sys/sys/param.h
user/kmacy/releng_8_fcs_buf_xen/sys/sys/sockbuf.h
user/kmacy/releng_8_fcs_buf_xen/sys/sys/socket.h
user/kmacy/releng_8_fcs_buf_xen/sys/sys/socketvar.h
user/kmacy/releng_8_fcs_buf_xen/sys/sys/sockstate.h
user/kmacy/releng_8_fcs_buf_xen/sys/sys/syscallsubr.h
user/kmacy/releng_8_fcs_buf_xen/sys/vm/pmap.h
user/kmacy/releng_8_fcs_buf_xen/sys/vm/uma.h
user/kmacy/releng_8_fcs_buf_xen/sys/vm/uma_core.c
user/kmacy/releng_8_fcs_buf_xen/sys/vm/vm.h
user/kmacy/releng_8_fcs_buf_xen/sys/vm/vm_contig.c
user/kmacy/releng_8_fcs_buf_xen/sys/vm/vm_glue.c
user/kmacy/releng_8_fcs_buf_xen/sys/vm/vm_kern.c
user/kmacy/releng_8_fcs_buf_xen/sys/vm/vm_page.c
user/kmacy/releng_8_fcs_buf_xen/sys/vm/vnode_pager.c
Modified: user/kmacy/releng_8_fcs_buf_xen/cddl/contrib/opensolaris/cmd/ztest/ztest.c
==============================================================================
--- user/kmacy/releng_8_fcs_buf_xen/cddl/contrib/opensolaris/cmd/ztest/ztest.c Wed Nov 25 02:00:09 2009 (r199775)
+++ user/kmacy/releng_8_fcs_buf_xen/cddl/contrib/opensolaris/cmd/ztest/ztest.c Wed Nov 25 02:10:07 2009 (r199776)
@@ -1304,7 +1304,7 @@ ztest_dmu_objset_create_destroy(ztest_ar
if (ztest_random(2) == 0 &&
dmu_objset_open(name, DMU_OST_OTHER, DS_MODE_OWNER, &os) == 0) {
zr.zr_os = os;
- zil_replay(os, &zr, &zr.zr_assign, ztest_replay_vector, NULL);
+ zil_replay(os, &zr, ztest_replay_vector);
dmu_objset_close(os);
}
@@ -3321,8 +3321,7 @@ ztest_run(char *pool)
if (test_future)
ztest_dmu_check_future_leak(&za[t]);
zr.zr_os = za[d].za_os;
- zil_replay(zr.zr_os, &zr, &zr.zr_assign,
- ztest_replay_vector, NULL);
+ zil_replay(zr.zr_os, &zr, ztest_replay_vector);
za[d].za_zilog = zil_open(za[d].za_os, NULL);
}
Modified: user/kmacy/releng_8_fcs_buf_xen/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h
==============================================================================
--- user/kmacy/releng_8_fcs_buf_xen/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h Wed Nov 25 02:00:09 2009 (r199775)
+++ user/kmacy/releng_8_fcs_buf_xen/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h Wed Nov 25 02:10:07 2009 (r199776)
@@ -305,6 +305,8 @@ extern void cv_broadcast(kcondvar_t *cv)
#define KM_PUSHPAGE KM_SLEEP
#define KM_NOSLEEP UMEM_DEFAULT
#define KMC_NODEBUG UMC_NODEBUG
+#define KM_NODEBUG KMC_NODEBUG
+
#define kmem_alloc(_s, _f) umem_alloc(_s, _f)
#define kmem_zalloc(_s, _f) umem_zalloc(_s, _f)
#define kmem_free(_b, _s) umem_free(_b, _s)
Modified: user/kmacy/releng_8_fcs_buf_xen/sys/amd64/amd64/minidump_machdep.c
==============================================================================
--- user/kmacy/releng_8_fcs_buf_xen/sys/amd64/amd64/minidump_machdep.c Wed Nov 25 02:00:09 2009 (r199775)
+++ user/kmacy/releng_8_fcs_buf_xen/sys/amd64/amd64/minidump_machdep.c Wed Nov 25 02:10:07 2009 (r199776)
@@ -56,6 +56,7 @@ CTASSERT(sizeof(struct kerneldumpheader)
extern uint64_t KPDPphys;
uint64_t *vm_page_dump;
+uint64_t *vm_page_dump_exclude;
int vm_page_dump_size;
static struct kerneldumpheader kdh;
@@ -71,10 +72,16 @@ CTASSERT(sizeof(*vm_page_dump) == 8);
static int
is_dumpable(vm_paddr_t pa)
{
- int i;
+ int i, idx, bit, isdata;
+ uint64_t pfn = pa;
+
+ pfn >>= PAGE_SHIFT;
+ idx = pfn >> 6; /* 2^6 = 64 */
+ bit = pfn & 63;
+ isdata = ((vm_page_dump_exclude[idx] & (1ul << bit)) == 0);
for (i = 0; dump_avail[i] != 0 || dump_avail[i + 1] != 0; i += 2) {
- if (pa >= dump_avail[i] && pa < dump_avail[i + 1])
+ if (pa >= dump_avail[i] && pa < dump_avail[i + 1] && isdata)
return (1);
}
return (0);
@@ -226,6 +233,7 @@ minidumpsys(struct dumperinfo *di)
dumpsize = ptesize;
dumpsize += round_page(msgbufp->msg_size);
dumpsize += round_page(vm_page_dump_size);
+ printf("dumpsize: ");
for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) {
bits = vm_page_dump[i];
while (bits) {
@@ -238,10 +246,13 @@ minidumpsys(struct dumperinfo *di)
dump_drop_page(pa);
}
bits &= ~(1ul << bit);
+ if ((dumpsize % (1<<29)) == 0)
+ printf("%ldMB ", (dumpsize>>20));
}
}
dumpsize += PAGE_SIZE;
+ printf("\n");
/* Determine dump offset on device. */
if (di->mediasize < SIZEOF_METADATA + dumpsize + sizeof(kdh) * 2) {
error = ENOSPC;
@@ -273,6 +284,7 @@ minidumpsys(struct dumperinfo *di)
goto fail;
dumplo += sizeof(kdh);
+ printf("write header\n");
/* Dump my header */
bzero(&fakept, sizeof(fakept));
bcopy(&mdhdr, &fakept, sizeof(mdhdr));
@@ -280,16 +292,19 @@ minidumpsys(struct dumperinfo *di)
if (error)
goto fail;
+ printf("write msgbuf\n");
/* Dump msgbuf up front */
error = blk_write(di, (char *)msgbufp->msg_ptr, 0, round_page(msgbufp->msg_size));
if (error)
goto fail;
+ printf("write bitmap\n");
/* Dump bitmap */
error = blk_write(di, (char *)vm_page_dump, 0, round_page(vm_page_dump_size));
if (error)
goto fail;
+ printf("\nDump kernel page table pages\n");
/* Dump kernel page table pages */
pdp = (uint64_t *)PHYS_TO_DMAP(KPDPphys);
for (va = VM_MIN_KERNEL_ADDRESS; va < MAX(KERNBASE + NKPT * NBPDR,
@@ -343,8 +358,10 @@ minidumpsys(struct dumperinfo *di)
/* Dump memory chunks */
/* XXX cluster it up and use blk_dump() */
- for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) {
- bits = vm_page_dump[i];
+ printf("\nclustering memory chunks\n");
+ for (i = 0;
+ i < vm_page_dump_size / sizeof(*vm_page_dump); i++) {
+ bits = vm_page_dump[i] & ~(vm_page_dump_exclude[i]);
while (bits) {
bit = bsfq(bits);
pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE;
@@ -354,7 +371,6 @@ minidumpsys(struct dumperinfo *di)
bits &= ~(1ul << bit);
}
}
-
error = blk_flush(di);
if (error)
goto fail;
@@ -365,6 +381,7 @@ minidumpsys(struct dumperinfo *di)
goto fail;
dumplo += sizeof(kdh);
+ printf("\nstarting dump\n");
/* Signal completion, signoff and exit stage left. */
dump_write(di, NULL, 0, 0, 0);
printf("\nDump complete\n");
@@ -403,3 +420,25 @@ dump_drop_page(vm_paddr_t pa)
bit = pa & 63;
atomic_clear_long(&vm_page_dump[idx], 1ul << bit);
}
+
+void
+dump_exclude_page(vm_paddr_t pa)
+{
+ int idx, bit;
+
+ pa >>= PAGE_SHIFT;
+ idx = pa >> 6; /* 2^6 = 64 */
+ bit = pa & 63;
+ atomic_set_long(&vm_page_dump_exclude[idx], 1ul << bit);
+}
+
+void
+dump_unexclude_page(vm_paddr_t pa)
+{
+ int idx, bit;
+
+ pa >>= PAGE_SHIFT;
+ idx = pa >> 6; /* 2^6 = 64 */
+ bit = pa & 63;
+ atomic_clear_long(&vm_page_dump_exclude[idx], 1ul << bit);
+}
Modified: user/kmacy/releng_8_fcs_buf_xen/sys/amd64/amd64/pmap.c
==============================================================================
--- user/kmacy/releng_8_fcs_buf_xen/sys/amd64/amd64/pmap.c Wed Nov 25 02:00:09 2009 (r199775)
+++ user/kmacy/releng_8_fcs_buf_xen/sys/amd64/amd64/pmap.c Wed Nov 25 02:10:07 2009 (r199776)
@@ -1137,10 +1137,16 @@ pmap_map(vm_offset_t *virt, vm_paddr_t s
* Note: SMP coherent. Uses a ranged shootdown IPI.
*/
void
-pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count)
+pmap_qenter_prot(vm_offset_t sva, vm_page_t *ma, int count, vm_prot_t prot)
{
pt_entry_t *endpte, oldpte, *pte;
+ uint64_t flags = PG_V;
+ if (prot & VM_PROT_WRITE)
+ flags |= PG_RW;
+ if ((prot & VM_PROT_EXECUTE) == 0)
+ flags |= PG_NX;
+
oldpte = 0;
pte = vtopte(sva);
endpte = pte + count;
@@ -1148,6 +1154,9 @@ pmap_qenter(vm_offset_t sva, vm_page_t *
oldpte |= *pte;
pte_store(pte, VM_PAGE_TO_PHYS(*ma) | PG_G |
pmap_cache_bits((*ma)->md.pat_mode, 0) | PG_RW | PG_V);
+ pte_store(pte, VM_PAGE_TO_PHYS(*ma) | PG_G | flags);
+ if (prot & VM_PROT_EXCLUDE)
+ dump_exclude_page(VM_PAGE_TO_PHYS(*ma));
pte++;
ma++;
}
@@ -1156,6 +1165,16 @@ pmap_qenter(vm_offset_t sva, vm_page_t *
PAGE_SIZE);
}
+void
+pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count)
+{
+
+ pmap_qenter_prot(sva, ma, count,
+ VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
+
+}
+
+
/*
* This routine tears out page mappings from the
* kernel -- it is meant only for temporary mappings.
@@ -1168,6 +1187,7 @@ pmap_qremove(vm_offset_t sva, int count)
va = sva;
while (count-- > 0) {
+ dump_unexclude_page(pmap_kextract(va));
pmap_kremove(va);
va += PAGE_SIZE;
}
Modified: user/kmacy/releng_8_fcs_buf_xen/sys/amd64/amd64/uma_machdep.c
==============================================================================
--- user/kmacy/releng_8_fcs_buf_xen/sys/amd64/amd64/uma_machdep.c Wed Nov 25 02:00:09 2009 (r199775)
+++ user/kmacy/releng_8_fcs_buf_xen/sys/amd64/amd64/uma_machdep.c Wed Nov 25 02:10:07 2009 (r199776)
@@ -66,7 +66,8 @@ uma_small_alloc(uma_zone_t zone, int byt
break;
}
pa = m->phys_addr;
- dump_add_page(pa);
+ if ((wait & M_NODUMP) == 0)
+ dump_add_page(pa);
va = (void *)PHYS_TO_DMAP(pa);
if ((wait & M_ZERO) && (m->flags & PG_ZERO) == 0)
pagezero(va);
Modified: user/kmacy/releng_8_fcs_buf_xen/sys/amd64/include/md_var.h
==============================================================================
--- user/kmacy/releng_8_fcs_buf_xen/sys/amd64/include/md_var.h Wed Nov 25 02:00:09 2009 (r199775)
+++ user/kmacy/releng_8_fcs_buf_xen/sys/amd64/include/md_var.h Wed Nov 25 02:10:07 2009 (r199776)
@@ -60,6 +60,7 @@ extern char kstack[];
extern char sigcode[];
extern int szsigcode;
extern uint64_t *vm_page_dump;
+extern uint64_t *vm_page_dump_exclude;
extern int vm_page_dump_size;
extern int _udatasel;
extern int _ucodesel;
@@ -88,6 +89,8 @@ void fs_load_fault(void) __asm(__STRING(
void gs_load_fault(void) __asm(__STRING(gs_load_fault));
void dump_add_page(vm_paddr_t);
void dump_drop_page(vm_paddr_t);
+void dump_exclude_page(vm_paddr_t);
+void dump_unexclude_page(vm_paddr_t);
void initializecpu(void);
void initializecpucache(void);
void fillw(int /*u_short*/ pat, void *base, size_t cnt);
Modified: user/kmacy/releng_8_fcs_buf_xen/sys/amd64/include/vmparam.h
==============================================================================
--- user/kmacy/releng_8_fcs_buf_xen/sys/amd64/include/vmparam.h Wed Nov 25 02:00:09 2009 (r199775)
+++ user/kmacy/releng_8_fcs_buf_xen/sys/amd64/include/vmparam.h Wed Nov 25 02:10:07 2009 (r199776)
@@ -88,6 +88,11 @@
#define UMA_MD_SMALL_ALLOC
/*
+ * We machine specific sparse kernel dump
+ */
+#define VM_MD_MINIDUMP
+
+/*
* The physical address space is densely populated.
*/
#define VM_PHYSSEG_DENSE
Modified: user/kmacy/releng_8_fcs_buf_xen/sys/cddl/compat/opensolaris/sys/kmem.h
==============================================================================
--- user/kmacy/releng_8_fcs_buf_xen/sys/cddl/compat/opensolaris/sys/kmem.h Wed Nov 25 02:00:09 2009 (r199775)
+++ user/kmacy/releng_8_fcs_buf_xen/sys/cddl/compat/opensolaris/sys/kmem.h Wed Nov 25 02:10:07 2009 (r199776)
@@ -40,7 +40,8 @@
#define KM_SLEEP M_WAITOK
#define KM_PUSHPAGE M_WAITOK
#define KM_NOSLEEP M_NOWAIT
-#define KMC_NODEBUG 0
+#define KMC_NODEBUG UMA_ZONE_NODUMP
+#define KM_NODEBUG M_NODUMP
typedef struct kmem_cache {
char kc_name[32];
Modified: user/kmacy/releng_8_fcs_buf_xen/sys/cddl/compat/opensolaris/sys/mutex.h
==============================================================================
--- user/kmacy/releng_8_fcs_buf_xen/sys/cddl/compat/opensolaris/sys/mutex.h Wed Nov 25 02:00:09 2009 (r199775)
+++ user/kmacy/releng_8_fcs_buf_xen/sys/cddl/compat/opensolaris/sys/mutex.h Wed Nov 25 02:10:07 2009 (r199776)
@@ -46,11 +46,7 @@ typedef enum {
typedef struct sx kmutex_t;
-#ifndef DEBUG
-#define MUTEX_FLAGS (SX_DUPOK | SX_NOWITNESS)
-#else
#define MUTEX_FLAGS (SX_DUPOK)
-#endif
#define mutex_init(lock, desc, type, arg) do { \
const char *_name; \
Modified: user/kmacy/releng_8_fcs_buf_xen/sys/cddl/compat/opensolaris/sys/rwlock.h
==============================================================================
--- user/kmacy/releng_8_fcs_buf_xen/sys/cddl/compat/opensolaris/sys/rwlock.h Wed Nov 25 02:00:09 2009 (r199775)
+++ user/kmacy/releng_8_fcs_buf_xen/sys/cddl/compat/opensolaris/sys/rwlock.h Wed Nov 25 02:10:07 2009 (r199776)
@@ -48,11 +48,7 @@ typedef enum {
typedef struct sx krwlock_t;
-#ifndef DEBUG
-#define RW_FLAGS (SX_DUPOK | SX_NOWITNESS)
-#else
#define RW_FLAGS (SX_DUPOK)
-#endif
#define RW_READ_HELD(x) (rw_read_held((x)))
#define RW_WRITE_HELD(x) (rw_write_held((x)))
Modified: user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
==============================================================================
--- user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c Wed Nov 25 02:00:09 2009 (r199775)
+++ user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c Wed Nov 25 02:10:07 2009 (r199776)
@@ -127,6 +127,7 @@
#ifdef _KERNEL
#include <sys/dnlc.h>
#endif
+#include <sys/ktr.h>
#include <sys/callb.h>
#include <sys/kstat.h>
#include <sys/sdt.h>
@@ -186,6 +187,16 @@ SYSCTL_QUAD(_vfs_zfs, OID_AUTO, arc_min,
SYSCTL_INT(_vfs_zfs, OID_AUTO, mdcomp_disable, CTLFLAG_RDTUN,
&zfs_mdcomp_disable, 0, "Disable metadata compression");
+static int zfs_page_cache_disable = 0;
+TUNABLE_INT("vfs.zfs.page_cache_disable", &zfs_page_cache_disable);
+SYSCTL_INT(_vfs_zfs, OID_AUTO, page_cache_disable, CTLFLAG_RDTUN,
+ &zfs_page_cache_disable, 0, "Disable backing ARC with page cache ");
+
+#ifdef ZIO_USE_UMA
+extern kmem_cache_t *zio_buf_cache[];
+extern kmem_cache_t *zio_data_buf_cache[];
+#endif
+
/*
* Note that buffers can be in one of 6 states:
* ARC_anon - anonymous (discussed below)
@@ -218,13 +229,31 @@ SYSCTL_INT(_vfs_zfs, OID_AUTO, mdcomp_di
* second level ARC benefit from these fast lookups.
*/
+#define ARCS_LOCK_PAD 128
+struct arcs_lock {
+ kmutex_t arcs_lock;
+#ifdef _KERNEL
+ unsigned char pad[(ARCS_LOCK_PAD - sizeof (kmutex_t))];
+#endif
+};
+
+/*
+ * must be power of two for mask use to work
+ *
+ */
+#define ARC_BUFC_NUMDATALISTS 16
+#define ARC_BUFC_NUMMETADATALISTS 16
+#define ARC_BUFC_NUMLISTS (ARC_BUFC_NUMMETADATALISTS+ARC_BUFC_NUMDATALISTS)
+
typedef struct arc_state {
- list_t arcs_list[ARC_BUFC_NUMTYPES]; /* list of evictable buffers */
uint64_t arcs_lsize[ARC_BUFC_NUMTYPES]; /* amount of evictable data */
uint64_t arcs_size; /* total amount of data in this state */
- kmutex_t arcs_mtx;
+ list_t arcs_lists[ARC_BUFC_NUMLISTS]; /* list of evictable buffers */
+ struct arcs_lock arcs_locks[ARC_BUFC_NUMLISTS] __aligned(128);
} arc_state_t;
+#define ARCS_LOCK(s, i) &((s)->arcs_locks[(i)].arcs_lock)
+
/* The 6 states: */
static arc_state_t ARC_anon;
static arc_state_t ARC_mru;
@@ -235,6 +264,7 @@ static arc_state_t ARC_l2c_only;
typedef struct arc_stats {
kstat_named_t arcstat_hits;
+ kstat_named_t arcstat_page_cache_hits;
kstat_named_t arcstat_misses;
kstat_named_t arcstat_demand_data_hits;
kstat_named_t arcstat_demand_data_misses;
@@ -284,6 +314,7 @@ typedef struct arc_stats {
static arc_stats_t arc_stats = {
{ "hits", KSTAT_DATA_UINT64 },
+ { "page_cache_hits", KSTAT_DATA_UINT64 },
{ "misses", KSTAT_DATA_UINT64 },
{ "demand_data_hits", KSTAT_DATA_UINT64 },
{ "demand_data_misses", KSTAT_DATA_UINT64 },
@@ -489,6 +520,7 @@ static void arc_evict_ghost(arc_state_t
#define ARC_L2_EVICTED (1 << 17) /* evicted during I/O */
#define ARC_L2_WRITE_HEAD (1 << 18) /* head of write list */
#define ARC_STORED (1 << 19) /* has been store()d to */
+#define ARC_BUF_CLONING (1 << 21) /* is being cloned */
#define HDR_IN_HASH_TABLE(hdr) ((hdr)->b_flags & ARC_IN_HASH_TABLE)
#define HDR_IO_IN_PROGRESS(hdr) ((hdr)->b_flags & ARC_IO_IN_PROGRESS)
@@ -609,9 +641,10 @@ struct l2arc_buf_hdr {
typedef struct l2arc_data_free {
/* protected by l2arc_free_on_write_mtx */
+ arc_buf_t *l2df_buf;
void *l2df_data;
size_t l2df_size;
- void (*l2df_func)(void *, size_t);
+ void (*l2df_func)(arc_buf_t *, void *, size_t);
list_node_t l2df_list_node;
} l2arc_data_free_t;
@@ -953,20 +986,42 @@ arc_buf_freeze(arc_buf_t *buf)
}
static void
+get_buf_info(arc_buf_hdr_t *ab, arc_state_t *state, list_t **list, kmutex_t **lock)
+{
+ uint64_t buf_hashid = buf_hash(ab->b_spa, &ab->b_dva, ab->b_birth);
+
+ if (ab->b_type == ARC_BUFC_METADATA)
+ buf_hashid &= (ARC_BUFC_NUMMETADATALISTS-1);
+ else {
+ buf_hashid &= (ARC_BUFC_NUMDATALISTS-1);
+ buf_hashid += ARC_BUFC_NUMMETADATALISTS;
+ }
+
+ *list = &state->arcs_lists[buf_hashid];
+ *lock = ARCS_LOCK(state, buf_hashid);
+}
+
+
+static void
add_reference(arc_buf_hdr_t *ab, kmutex_t *hash_lock, void *tag)
{
+
ASSERT(MUTEX_HELD(hash_lock));
if ((refcount_add(&ab->b_refcnt, tag) == 1) &&
(ab->b_state != arc_anon)) {
+ list_t *list;
+ kmutex_t *lock;
uint64_t delta = ab->b_size * ab->b_datacnt;
- list_t *list = &ab->b_state->arcs_list[ab->b_type];
uint64_t *size = &ab->b_state->arcs_lsize[ab->b_type];
- ASSERT(!MUTEX_HELD(&ab->b_state->arcs_mtx));
- mutex_enter(&ab->b_state->arcs_mtx);
+ get_buf_info(ab, ab->b_state, &list, &lock);
+ ASSERT(!MUTEX_HELD(lock));
+ mutex_enter(lock);
ASSERT(list_link_active(&ab->b_arc_node));
list_remove(list, ab);
+ mutex_exit(lock);
+
if (GHOST_STATE(ab->b_state)) {
ASSERT3U(ab->b_datacnt, ==, 0);
ASSERT3P(ab->b_buf, ==, NULL);
@@ -975,7 +1030,6 @@ add_reference(arc_buf_hdr_t *ab, kmutex_
ASSERT(delta > 0);
ASSERT3U(*size, >=, delta);
atomic_add_64(size, -delta);
- mutex_exit(&ab->b_state->arcs_mtx);
/* remove the prefetch flag if we get a reference */
if (ab->b_flags & ARC_PREFETCH)
ab->b_flags &= ~ARC_PREFETCH;
@@ -994,14 +1048,19 @@ remove_reference(arc_buf_hdr_t *ab, kmut
if (((cnt = refcount_remove(&ab->b_refcnt, tag)) == 0) &&
(state != arc_anon)) {
uint64_t *size = &state->arcs_lsize[ab->b_type];
+ list_t *list;
+ kmutex_t *lock;
- ASSERT(!MUTEX_HELD(&state->arcs_mtx));
- mutex_enter(&state->arcs_mtx);
+ get_buf_info(ab, state, &list, &lock);
+
+ ASSERT(!MUTEX_HELD(lock));
+ mutex_enter(lock);
ASSERT(!list_link_active(&ab->b_arc_node));
- list_insert_head(&state->arcs_list[ab->b_type], ab);
+ list_insert_head(list, ab);
+ mutex_exit(lock);
+
ASSERT(ab->b_datacnt > 0);
atomic_add_64(size, ab->b_size * ab->b_datacnt);
- mutex_exit(&state->arcs_mtx);
}
return (cnt);
}
@@ -1016,6 +1075,8 @@ arc_change_state(arc_state_t *new_state,
arc_state_t *old_state = ab->b_state;
int64_t refcnt = refcount_count(&ab->b_refcnt);
uint64_t from_delta, to_delta;
+ list_t *list;
+ kmutex_t *lock;
ASSERT(MUTEX_HELD(hash_lock));
ASSERT(new_state != old_state);
@@ -1030,14 +1091,17 @@ arc_change_state(arc_state_t *new_state,
*/
if (refcnt == 0) {
if (old_state != arc_anon) {
- int use_mutex = !MUTEX_HELD(&old_state->arcs_mtx);
+ int use_mutex;
uint64_t *size = &old_state->arcs_lsize[ab->b_type];
+ get_buf_info(ab, old_state, &list, &lock);
+ use_mutex = !MUTEX_HELD(lock);
+
if (use_mutex)
- mutex_enter(&old_state->arcs_mtx);
+ mutex_enter(lock);
ASSERT(list_link_active(&ab->b_arc_node));
- list_remove(&old_state->arcs_list[ab->b_type], ab);
+ list_remove(list, ab);
/*
* If prefetching out of the ghost cache,
@@ -1052,16 +1116,20 @@ arc_change_state(arc_state_t *new_state,
atomic_add_64(size, -from_delta);
if (use_mutex)
- mutex_exit(&old_state->arcs_mtx);
+ mutex_exit(lock);
}
if (new_state != arc_anon) {
- int use_mutex = !MUTEX_HELD(&new_state->arcs_mtx);
+ int use_mutex;
uint64_t *size = &new_state->arcs_lsize[ab->b_type];
+ get_buf_info(ab, new_state, &list, &lock);
+ use_mutex = !MUTEX_HELD(lock);
+
+
if (use_mutex)
- mutex_enter(&new_state->arcs_mtx);
+ mutex_enter(lock);
- list_insert_head(&new_state->arcs_list[ab->b_type], ab);
+ list_insert_head(list, ab);
/* ghost elements have a ghost size */
if (GHOST_STATE(new_state)) {
@@ -1072,7 +1140,7 @@ arc_change_state(arc_state_t *new_state,
atomic_add_64(size, to_delta);
if (use_mutex)
- mutex_exit(&new_state->arcs_mtx);
+ mutex_exit(lock);
}
}
@@ -1132,8 +1200,9 @@ arc_data_buf_free(void *buf, uint64_t si
atomic_add_64(&arc_size, -size);
}
-arc_buf_t *
-arc_buf_alloc(spa_t *spa, int size, void *tag, arc_buf_contents_t type)
+static arc_buf_t *
+_arc_buf_alloc(spa_t *spa, int size, void *tag, arc_buf_contents_t type,
+ blkptr_t *bp)
{
arc_buf_hdr_t *hdr;
arc_buf_t *buf;
@@ -1143,6 +1212,14 @@ arc_buf_alloc(spa_t *spa, int size, void
ASSERT(BUF_EMPTY(hdr));
hdr->b_size = size;
hdr->b_type = type;
+ if (bp != NULL) {
+ hdr->b_dva = *BP_IDENTITY(bp);
+ hdr->b_birth = bp->blk_birth;
+ } else {
+ hdr->b_dva.dva_word[0] = 0;
+ hdr->b_dva.dva_word[1] = 0;
+ hdr->b_birth = 0;
+ }
hdr->b_spa = spa;
hdr->b_state = arc_anon;
hdr->b_arc_access = 0;
@@ -1162,6 +1239,13 @@ arc_buf_alloc(spa_t *spa, int size, void
return (buf);
}
+arc_buf_t *
+arc_buf_alloc(spa_t *spa, int size, void *tag, arc_buf_contents_t type)
+{
+
+ return (_arc_buf_alloc(spa, size, tag, type, NULL));
+}
+
static arc_buf_t *
arc_buf_clone(arc_buf_t *from)
{
@@ -1176,6 +1260,7 @@ arc_buf_clone(arc_buf_t *from)
buf->b_private = NULL;
buf->b_next = hdr->b_buf;
hdr->b_buf = buf;
+ hdr->b_flags |= ARC_BUF_CLONING;
arc_get_data_buf(buf);
bcopy(from->b_data, buf->b_data, size);
hdr->b_datacnt += 1;
@@ -1214,17 +1299,242 @@ arc_buf_add_ref(arc_buf_t *buf, void* ta
data, metadata, hits);
}
+void
+arc_binval(spa_t *spa, dva_t *dva, uint64_t size)
+{
+ uint64_t blkno, blkno_lookup;
+ struct vnode *vp;
+ struct bufobj *bo;
+ struct buf *bp;
+ vm_pindex_t start, end;
+ vm_object_t object;
+ vm_page_t m;
+ int i;
+
+ if (zfs_page_cache_disable)
+ return;
+
+ if (dva == NULL || spa == NULL || blkno == 0 || size == 0)
+ return;
+
+ blkno_lookup = blkno = dva->dva_word[1] & ~(1UL<<63);
+ vp = spa_get_vnode(spa);
+ bo = &vp->v_bufobj;
+
+ BO_LOCK(bo);
+retry:
+ bp = gbincore(bo, blkno_lookup);
+ if (bp != NULL) {
+ BUF_LOCK(bp, LK_EXCLUSIVE | LK_INTERLOCK, BO_MTX(bo));
+ bremfree(bp);
+ bp->b_flags |= B_INVAL;
+ bp->b_birth = 0;
+ brelse(bp);
+ } else if (blkno_lookup & 0x7) {
+ blkno_lookup &= ~0x7;
+ goto retry;
+ } else
+ BO_UNLOCK(bo);
+
+ start = OFF_TO_IDX((blkno_lookup << 9));
+ end = start + OFF_TO_IDX(size + PAGE_MASK);
+ object = vp->v_object;
+
+ VM_OBJECT_LOCK(object);
+ vm_page_cache_free(object, start, end);
+ vm_object_page_remove(object, start, end, FALSE);
+#ifdef INVARIANTS
+ for (i = 0; i < OFF_TO_IDX(size); i++) {
+ KASSERT(vm_page_lookup(object, start + i) == NULL,
+ ("found page at %ld blkno %ld blkno_lookup %ld",
+ start + i, blkno, blkno_lookup));
+ }
+#endif
+ VM_OBJECT_UNLOCK(object);
+}
+
+static void
+arc_pcache(struct vnode *vp, struct buf *bp, uint64_t blkno)
+{
+ vm_pindex_t start = OFF_TO_IDX((blkno << 9));
+ vm_object_t object = vp->v_object;
+ struct bufobj *bo = &vp->v_bufobj;
+ vm_page_t m;
+ int i;
+
+ BO_LOCK(bo);
+ bgetvp(vp, bp);
+ BO_UNLOCK(bo);
+
+ VM_OBJECT_LOCK(object);
+ for (i = 0; i < bp->b_npages; i++) {
+ m = bp->b_pages[i];
+ vm_page_insert(m, object, start + i);
+ }
+ VM_OBJECT_UNLOCK(object);
+ bp->b_flags |= B_VMIO;
+}
+
+static void
+arc_bcache(arc_buf_t *buf)
+{
+ uint64_t blkno = buf->b_hdr->b_dva.dva_word[1] & ~(1UL<<63);
+ struct buf *newbp, *bp = buf->b_bp;
+ struct vnode *vp = spa_get_vnode(buf->b_hdr->b_spa);
+ struct bufobj *bo = &vp->v_bufobj;
+ arc_buf_hdr_t *hdr = buf->b_hdr;
+ int cachebuf;
+
+ if (zfs_page_cache_disable)
+ return;
+
+ if (blkno == 0 || hdr->b_birth == 0)
+ return;
+
+ newbp = buf->b_bp;
+ newbp->b_birth = hdr->b_birth;
+ newbp->b_blkno = newbp->b_lblkno = blkno;
+ newbp->b_offset = (blkno << 9);
+ cachebuf = ((hdr->b_datacnt == 1) &&
+ !(hdr->b_flags & ARC_IO_ERROR) &&
+ ((newbp->b_flags & (B_INVAL|B_CACHE)) == B_CACHE) &&
+ (blkno & 0x7) == 0);
+
+ arc_binval(hdr->b_spa, &hdr->b_dva, hdr->b_size);
+ if (cachebuf)
+ arc_pcache(vp, newbp, blkno);
+
+}
+
+static void
+arc_getblk(arc_buf_t *buf)
+{
+ uint64_t size = buf->b_hdr->b_size;
+ arc_buf_contents_t type = buf->b_hdr->b_type;
+ spa_t *spa = buf->b_hdr->b_spa;
+ uint64_t blkno = buf->b_hdr->b_dva.dva_word[1] & ~(1UL<<63);
+ void *data;
+ struct buf *newbp, *bp;
+ arc_buf_t *tbuf;
+ struct vnode *vp;
+ struct bufobj *bo;
+ int i, flags = 0;
+ vm_pindex_t start, end;
+ vm_object_t object;
+
+ if (type == ARC_BUFC_METADATA) {
+ arc_space_consume(size);
+ } else {
+ ASSERT(type == ARC_BUFC_DATA);
+ flags = GB_NODUMP;
+ atomic_add_64(&arc_size, size);
+ }
+
+ vp = spa_get_vnode(spa);
+ bo = &vp->v_bufobj;
+ newbp = NULL;
+
+ if (size < PAGE_SIZE) {
+ data = zio_buf_alloc(size);
+ } else if ((buf->b_hdr->b_flags & ARC_BUF_CLONING) ||
+ BUF_EMPTY(buf->b_hdr) ||
+ (blkno == 0)) {
+ newbp = geteblk(size, flags);
+ data = newbp->b_data;
+ buf->b_hdr->b_flags &= ~ARC_BUF_CLONING;
+ } else {
+ newbp = getblk(vp, blkno, size, 0, 0, flags | GB_LOCK_NOWAIT);
+ if (newbp == NULL)
+ newbp = geteblk(size, flags);
+ else {
+ vm_object_t object = vp->v_object;
+ vm_page_t m;
+
+ /*
+ * Strip the buffers pages from the object
+ */
+ VM_OBJECT_LOCK(object);
+ vm_page_lock_queues();
+ for (i = 0; i < newbp->b_npages; i++){
+ m = newbp->b_pages[i];
+ vm_page_remove(m);
+ }
+ vm_page_unlock_queues();
+ VM_OBJECT_UNLOCK(object);
+ brelvp(newbp);
+ newbp->b_flags &= ~B_VMIO;
+ }
+ data = newbp->b_data;
+ }
+
+#ifdef LOGALL
+ /*
+ * not useful for tracking down collisions
+ *
+ */
+ CTR2(KTR_SPARE2, "arc_getblk() bp=%p flags %X",
+ newbp, newbp->b_flags);
+#endif
+
+ if (newbp != NULL) {
+ BUF_KERNPROC(newbp);
+#ifdef INVARIANTS
+ for (i = 0; i < newbp->b_npages; i++)
+ KASSERT(newbp->b_pages[i]->object == NULL,
+ ("newbp page not removed"));
+#endif
+ }
+ buf->b_bp = newbp;
+ buf->b_data = data;
+}
+
+void
+arc_brelse(arc_buf_t *buf, void *data, size_t size);
+
+void
+arc_brelse(arc_buf_t *buf, void *data, size_t size)
+{
+ struct buf *bp = buf->b_bp;
+ arc_buf_hdr_t *hdr = buf->b_hdr;
+#ifdef INVARIANTS
+ int i;
+#endif
+
+ if (bp == NULL) {
+ zio_buf_free(buf->b_data, size);
+ return;
+ }
+#ifdef INVARIANTS
+ for (i = 0; i < bp->b_npages; i++)
+ KASSERT(bp->b_pages[i]->object == NULL,
+ ("newbp page not removed"));
+#endif
+ arc_bcache(buf);
+
+
+ if (bp->b_vp == NULL)
+ KASSERT((bp->b_flags & B_VMIO) == 0, ("no vp but VMIO set!"));
+ else
+ CTR4(KTR_SPARE2, "arc_brelse() bp=%p flags %X"
+ " size %ld blkno=%ld",
+ bp, bp->b_flags, size, bp->b_blkno);
+
+ bp->b_flags |= B_ZFS;
+ brelse(bp);
+}
+
/*
* Free the arc data buffer. If it is an l2arc write in progress,
* the buffer is placed on l2arc_free_on_write to be freed later.
*/
static void
-arc_buf_data_free(arc_buf_hdr_t *hdr, void (*free_func)(void *, size_t),
- void *data, size_t size)
+arc_buf_data_free(arc_buf_hdr_t *hdr, void (*free_func)(arc_buf_t *, void *, size_t),
+ arc_buf_t *buf, void *data, size_t size)
{
if (HDR_L2_WRITING(hdr)) {
l2arc_data_free_t *df;
df = kmem_alloc(sizeof (l2arc_data_free_t), KM_SLEEP);
+ df->l2df_buf = buf;
df->l2df_data = data;
df->l2df_size = size;
df->l2df_func = free_func;
@@ -1233,7 +1543,7 @@ arc_buf_data_free(arc_buf_hdr_t *hdr, vo
mutex_exit(&l2arc_free_on_write_mtx);
ARCSTAT_BUMP(arcstat_l2_free_on_write);
} else {
- free_func(data, size);
+ free_func(buf, data, size);
}
}
@@ -1251,13 +1561,13 @@ arc_buf_destroy(arc_buf_t *buf, boolean_
arc_cksum_verify(buf);
if (!recycle) {
if (type == ARC_BUFC_METADATA) {
- arc_buf_data_free(buf->b_hdr, zio_buf_free,
- buf->b_data, size);
+ arc_buf_data_free(buf->b_hdr, arc_brelse,
+ buf, buf->b_data, size);
arc_space_return(size);
} else {
ASSERT(type == ARC_BUFC_DATA);
- arc_buf_data_free(buf->b_hdr,
- zio_data_buf_free, buf->b_data, size);
+ arc_buf_data_free(buf->b_hdr, arc_brelse,
+ buf, buf->b_data, size);
atomic_add_64(&arc_size, -size);
}
}
@@ -1462,21 +1772,57 @@ arc_evict(arc_state_t *state, spa_t *spa
{
arc_state_t *evicted_state;
uint64_t bytes_evicted = 0, skipped = 0, missed = 0;
+ int64_t bytes_remaining;
arc_buf_hdr_t *ab, *ab_prev = NULL;
- list_t *list = &state->arcs_list[type];
+ list_t *evicted_list, *list, *evicted_list_start, *list_start;
+ kmutex_t *lock, *evicted_lock;
kmutex_t *hash_lock;
boolean_t have_lock;
void *stolen = NULL;
+ static int evict_metadata_offset, evict_data_offset;
+ int i, idx, offset, list_count, count;
ASSERT(state == arc_mru || state == arc_mfu);
evicted_state = (state == arc_mru) ? arc_mru_ghost : arc_mfu_ghost;
- mutex_enter(&state->arcs_mtx);
- mutex_enter(&evicted_state->arcs_mtx);
+ /*
+ * don't recycle page cache bufs
+ *
+ */
+ if (recycle && (bytes >= PAGE_SIZE))
+ recycle = FALSE;
+ if (type == ARC_BUFC_METADATA) {
+ offset = 0;
+ list_count = ARC_BUFC_NUMMETADATALISTS;
+ list_start = &state->arcs_lists[0];
+ evicted_list_start = &evicted_state->arcs_lists[0];
+ idx = evict_metadata_offset;
+ } else {
+ offset = ARC_BUFC_NUMMETADATALISTS;
+
+ list_start = &state->arcs_lists[offset];
+ evicted_list_start = &evicted_state->arcs_lists[offset];
+ list_count = ARC_BUFC_NUMDATALISTS;
+ idx = evict_data_offset;
+ }
+ for (bytes_remaining = 0, i = 0; i < list_count; i++)
+ bytes_remaining += evicted_state->arcs_lsize[i + offset];
+
+ count = 0;
+
+evict_start:
+ list = &list_start[idx];
+ evicted_list = &evicted_list_start[idx];
+ lock = ARCS_LOCK(state, (offset + idx));
+ evicted_lock = ARCS_LOCK(evicted_state, (offset + idx));
+
+ mutex_enter(lock);
+ mutex_enter(evicted_lock);
for (ab = list_tail(list); ab; ab = ab_prev) {
ab_prev = list_prev(list, ab);
+ bytes_remaining -= (ab->b_size * ab->b_datacnt);
/* prefetch buffers have a minimum lifespan */
if (HDR_IO_IN_PROGRESS(ab) ||
(spa && ab->b_spa != spa) ||
@@ -1536,18 +1882,36 @@ arc_evict(arc_state_t *state, spa_t *spa
mutex_exit(hash_lock);
if (bytes >= 0 && bytes_evicted >= bytes)
break;
+ if (bytes_remaining > 0) {
+ mutex_exit(evicted_lock);
+ mutex_exit(lock);
+ idx = ((idx + 1)&(list_count-1));
+ count++;
+ goto evict_start;
+ }
} else {
missed += 1;
}
}
- mutex_exit(&evicted_state->arcs_mtx);
- mutex_exit(&state->arcs_mtx);
-
- if (bytes_evicted < bytes)
- dprintf("only evicted %lld bytes from %x",
- (longlong_t)bytes_evicted, state);
+ mutex_exit(evicted_lock);
+ mutex_exit(lock);
+
+ idx = ((idx + 1)&(list_count-1));
+ count++;
+ if (bytes_evicted < bytes) {
+ if (count < list_count)
+ goto evict_start;
+ else
+ dprintf("only evicted %lld bytes from %x",
+ (longlong_t)bytes_evicted, state);
+ }
+ if (type == ARC_BUFC_METADATA)
+ evict_metadata_offset = idx;
+ else
+ evict_data_offset = idx;
+
if (skipped)
ARCSTAT_INCR(arcstat_evict_skip, skipped);
@@ -1586,14 +1950,28 @@ static void
arc_evict_ghost(arc_state_t *state, spa_t *spa, int64_t bytes)
{
arc_buf_hdr_t *ab, *ab_prev;
- list_t *list = &state->arcs_list[ARC_BUFC_DATA];
- kmutex_t *hash_lock;
+ list_t *list, *list_start;
+ kmutex_t *hash_lock, *lock;
uint64_t bytes_deleted = 0;
uint64_t bufs_skipped = 0;
+ static int evict_offset;
+ int list_count, idx = evict_offset;
+ int offset, count = 0;
ASSERT(GHOST_STATE(state));
-top:
- mutex_enter(&state->arcs_mtx);
+
+ /*
+ * data lists come after metadata lists
+ */
+ list_start = &state->arcs_lists[ARC_BUFC_NUMMETADATALISTS];
+ list_count = ARC_BUFC_NUMDATALISTS;
+ offset = ARC_BUFC_NUMMETADATALISTS;
+
+evict_start:
+ list = &list_start[idx];
+ lock = ARCS_LOCK(state, idx + offset);
+
+ mutex_enter(lock);
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-user
mailing list