svn commit: r208047 - in head: cddl/contrib/opensolaris/cmd/zdb cddl/contrib/opensolaris/cmd/ztest cddl/contrib/opensolaris/lib/libzpool/common cddl/contrib/opensolaris/lib/libzpool/common/sys sys/...

Martin Matuska mm at FreeBSD.org
Thu May 13 20:32:56 UTC 2010


Author: mm
Date: Thu May 13 20:32:56 2010
New Revision: 208047
URL: http://svn.freebsd.org/changeset/base/208047

Log:
  Import OpenSolaris revision 7837:001de5627df3
  It includes the following changes:
  - parallel reads in traversal code (Bug ID 6333409)
  - faster traversal for zfs send (Bug ID 6418042)
  - traversal code cleanup (Bug ID 6725675)
  - fix for two scrub related bugs (Bug ID 6729696, 6730101)
  - fix assertion in dbuf_verify (Bug ID 6752226)
  - fix panic during zfs send with i/o errors (Bug ID 6577985)
  - replace P2CROSS with P2BOUNDARY (Bug ID 6725680)
  
  List of OpenSolaris Bug IDs:
  6333409, 6418042, 6757112, 6725668, 6725675, 6725680,
  6725698, 6729696, 6730101, 6752226, 6577985, 6755042
  
  Approved by:	pjd, delphij (mentor)
  Obtained from:	OpenSolaris (multiple Bug IDs)
  MFC after:	1 week

Modified:
  head/cddl/contrib/opensolaris/cmd/zdb/zdb.c
  head/cddl/contrib/opensolaris/cmd/ztest/ztest.c
  head/cddl/contrib/opensolaris/lib/libzpool/common/kernel.c
  head/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h
  head/cddl/contrib/opensolaris/lib/libzpool/common/taskq.c
  head/sys/cddl/boot/zfs/zfsimpl.h
  head/sys/cddl/compat/opensolaris/sys/sysmacros.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scrub.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_traverse.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_pool.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/txg_impl.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/txg.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_cache.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c

Modified: head/cddl/contrib/opensolaris/cmd/zdb/zdb.c
==============================================================================
--- head/cddl/contrib/opensolaris/cmd/zdb/zdb.c	Thu May 13 20:31:24 2010	(r208046)
+++ head/cddl/contrib/opensolaris/cmd/zdb/zdb.c	Thu May 13 20:32:56 2010	(r208047)
@@ -50,6 +50,7 @@
 #include <sys/zio_checksum.h>
 #include <sys/zio_compress.h>
 #include <sys/zfs_fuid.h>
+#include <sys/arc.h>
 #undef ZFS_MAXNAMELEN
 #undef verify
 #include <libzfs.h>
@@ -62,8 +63,6 @@ typedef void object_viewer_t(objset_t *,
 extern void dump_intent_log(zilog_t *);
 uint64_t *zopt_object = NULL;
 int zopt_objects = 0;
-int zdb_advance = ADVANCE_PRE;
-zbookmark_t zdb_noread = { 0, 0, ZB_NO_LEVEL, 0 };
 libzfs_handle_t *g_zfs;
 boolean_t zdb_sig_user_data = B_TRUE;
 int zdb_sig_cksumalg = ZIO_CHECKSUM_SHA256;
@@ -88,8 +87,8 @@ static void
 usage(void)
 {
 	(void) fprintf(stderr,
-	    "Usage: %s [-udibcsvL] [-U cachefile_path] [-O order] "
-	    "[-B os:obj:level:blkid] [-S user:cksumalg] "
+	    "Usage: %s [-udibcsv] [-U cachefile_path] "
+	    "[-S user:cksumalg] "
 	    "dataset [object...]\n"
 	    "       %s -C [pool]\n"
 	    "       %s -l dev\n"
@@ -109,13 +108,8 @@ usage(void)
 	    "dump blkptr signatures\n");
 	(void) fprintf(stderr, "	-v verbose (applies to all others)\n");
 	(void) fprintf(stderr, "        -l dump label contents\n");
-	(void) fprintf(stderr, "	-L live pool (allows some errors)\n");
-	(void) fprintf(stderr, "	-O [!]<pre|post|prune|data|holes> "
-	    "visitation order\n");
 	(void) fprintf(stderr, "	-U cachefile_path -- use alternate "
 	    "cachefile\n");
-	(void) fprintf(stderr, "	-B objset:object:level:blkid -- "
-	    "simulate bad block\n");
 	(void) fprintf(stderr, "        -R read and display block from a "
 	    "device\n");
 	(void) fprintf(stderr, "        -e Pool is exported/destroyed/"
@@ -138,7 +132,7 @@ fatal(const char *fmt, ...)
 	va_end(ap);
 	(void) fprintf(stderr, "\n");
 
-	exit(1);
+	abort();
 }
 
 static void
@@ -571,7 +565,7 @@ dump_dnode(objset_t *os, uint64_t object
 }
 
 static uint64_t
-blkid2offset(dnode_phys_t *dnp, int level, uint64_t blkid)
+blkid2offset(const dnode_phys_t *dnp, int level, uint64_t blkid)
 {
 	if (level < 0)
 		return (blkid);
@@ -602,115 +596,104 @@ sprintf_blkptr_compact(char *blkbuf, blk
 	    (u_longlong_t)bp->blk_birth);
 }
 
-/* ARGSUSED */
-static int
-zdb_indirect_cb(traverse_blk_cache_t *bc, spa_t *spa, void *a)
+static void
+print_indirect(blkptr_t *bp, const zbookmark_t *zb,
+    const dnode_phys_t *dnp)
 {
-	zbookmark_t *zb = &bc->bc_bookmark;
-	blkptr_t *bp = &bc->bc_blkptr;
-	void *data = bc->bc_data;
-	dnode_phys_t *dnp = bc->bc_dnode;
-	char blkbuf[BP_SPRINTF_LEN + 80];
+	char blkbuf[BP_SPRINTF_LEN];
 	int l;
 
-	if (bc->bc_errno) {
-		(void) sprintf(blkbuf,
-		    "Error %d reading <%llu, %llu, %lld, %llu>: ",
-		    bc->bc_errno,
-		    (u_longlong_t)zb->zb_objset,
-		    (u_longlong_t)zb->zb_object,
-		    (u_longlong_t)zb->zb_level,
-		    (u_longlong_t)zb->zb_blkid);
-		goto out;
-	}
-
-	if (zb->zb_level == -1) {
-		ASSERT3U(BP_GET_TYPE(bp), ==, DMU_OT_OBJSET);
-		ASSERT3U(BP_GET_LEVEL(bp), ==, 0);
-	} else {
-		ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type);
-		ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level);
-	}
-
-	if (zb->zb_level > 0) {
-		uint64_t fill = 0;
-		blkptr_t *bpx, *bpend;
-
-		for (bpx = data, bpend = bpx + BP_GET_LSIZE(bp) / sizeof (*bpx);
-		    bpx < bpend; bpx++) {
-			if (bpx->blk_birth != 0) {
-				fill += bpx->blk_fill;
-			} else {
-				ASSERT(bpx->blk_fill == 0);
-			}
-		}
-		ASSERT3U(fill, ==, bp->blk_fill);
-	}
+	ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type);
+	ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level);
 
-	if (zb->zb_level == 0 && dnp->dn_type == DMU_OT_DNODE) {
-		uint64_t fill = 0;
-		dnode_phys_t *dnx, *dnend;
-
-		for (dnx = data, dnend = dnx + (BP_GET_LSIZE(bp)>>DNODE_SHIFT);
-		    dnx < dnend; dnx++) {
-			if (dnx->dn_type != DMU_OT_NONE)
-				fill++;
-		}
-		ASSERT3U(fill, ==, bp->blk_fill);
-	}
-
-	(void) sprintf(blkbuf, "%16llx ",
+	(void) printf("%16llx ",
 	    (u_longlong_t)blkid2offset(dnp, zb->zb_level, zb->zb_blkid));
 
 	ASSERT(zb->zb_level >= 0);
 
 	for (l = dnp->dn_nlevels - 1; l >= -1; l--) {
 		if (l == zb->zb_level) {
-			(void) sprintf(blkbuf + strlen(blkbuf), "L%llx",
-			    (u_longlong_t)zb->zb_level);
+			(void) printf("L%llx", (u_longlong_t)zb->zb_level);
 		} else {
-			(void) sprintf(blkbuf + strlen(blkbuf), " ");
+			(void) printf(" ");
 		}
 	}
 
-out:
-	if (bp->blk_birth == 0) {
-		(void) sprintf(blkbuf + strlen(blkbuf), "<hole>");
-		(void) printf("%s\n", blkbuf);
-	} else {
-		sprintf_blkptr_compact(blkbuf + strlen(blkbuf), bp,
-		    dump_opt['d'] > 5 ? 1 : 0);
-		(void) printf("%s\n", blkbuf);
+	sprintf_blkptr_compact(blkbuf, bp, dump_opt['d'] > 5 ? 1 : 0);
+	(void) printf("%s\n", blkbuf);
+}
+
+#define	SET_BOOKMARK(zb, objset, object, level, blkid)  \
+{                                                       \
+	(zb)->zb_objset = objset;                       \
+	(zb)->zb_object = object;                       \
+	(zb)->zb_level = level;                         \
+	(zb)->zb_blkid = blkid;                         \
+}
+
+static int
+visit_indirect(spa_t *spa, const dnode_phys_t *dnp,
+    blkptr_t *bp, const zbookmark_t *zb)
+{
+	int err;
+
+	if (bp->blk_birth == 0)
+		return (0);
+
+	print_indirect(bp, zb, dnp);
+
+	if (BP_GET_LEVEL(bp) > 0) {
+		uint32_t flags = ARC_WAIT;
+		int i;
+		blkptr_t *cbp;
+		int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT;
+		arc_buf_t *buf;
+		uint64_t fill = 0;
+
+		err = arc_read_nolock(NULL, spa, bp, arc_getbuf_func, &buf,
+		    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
+		if (err)
+			return (err);
+
+		/* recursively visit blocks below this */
+		cbp = buf->b_data;
+		for (i = 0; i < epb; i++, cbp++) {
+			zbookmark_t czb;
+
+			SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object,
+			    zb->zb_level - 1,
+			    zb->zb_blkid * epb + i);
+			err = visit_indirect(spa, dnp, cbp, &czb);
+			if (err)
+				break;
+			fill += cbp->blk_fill;
+		}
+		ASSERT3U(fill, ==, bp->blk_fill);
+		(void) arc_buf_remove_ref(buf, &buf);
 	}
 
-	return (bc->bc_errno ? ERESTART : 0);
+	return (err);
 }
 
 /*ARGSUSED*/
 static void
-dump_indirect(objset_t *os, uint64_t object, void *data, size_t size)
+dump_indirect(dnode_t *dn)
 {
-	traverse_handle_t *th;
-	uint64_t objset = dmu_objset_id(os);
-	int advance = zdb_advance;
+	dnode_phys_t *dnp = dn->dn_phys;
+	int j;
+	zbookmark_t czb;
 
 	(void) printf("Indirect blocks:\n");
 
-	if (object == 0)
-		advance |= ADVANCE_DATA;
-
-	th = traverse_init(dmu_objset_spa(os), zdb_indirect_cb, NULL, advance,
-	    ZIO_FLAG_CANFAIL);
-	th->th_noread = zdb_noread;
-
-	traverse_add_dnode(th, 0, -1ULL, objset, object);
-
-	while (traverse_more(th) == EAGAIN)
-		continue;
+	SET_BOOKMARK(&czb, dmu_objset_id(&dn->dn_objset->os),
+	    dn->dn_object, dnp->dn_nlevels - 1, 0);
+	for (j = 0; j < dnp->dn_nblkptr; j++) {
+		czb.zb_blkid = j;
+		(void) visit_indirect(dmu_objset_spa(&dn->dn_objset->os), dnp,
+		    &dnp->dn_blkptr[j], &czb);
+	}
 
 	(void) printf("\n");
-
-	traverse_fini(th);
 }
 
 /*ARGSUSED*/
@@ -1093,7 +1076,7 @@ dump_object(objset_t *os, uint64_t objec
 	}
 
 	if (verbosity >= 5)
-		dump_indirect(os, object, NULL, 0);
+		dump_indirect(dn);
 
 	if (verbosity >= 5) {
 		/*
@@ -1458,18 +1441,17 @@ typedef struct zdb_blkstats {
 #define	DMU_OT_DEFERRED	DMU_OT_NONE
 #define	DMU_OT_TOTAL	DMU_OT_NUMTYPES
 
-#define	ZB_TOTAL	ZB_MAXLEVEL
+#define	ZB_TOTAL	DN_MAX_LEVELS
 
 typedef struct zdb_cb {
 	zdb_blkstats_t	zcb_type[ZB_TOTAL + 1][DMU_OT_TOTAL + 1];
 	uint64_t	zcb_errors[256];
-	traverse_blk_cache_t *zcb_cache;
 	int		zcb_readfails;
 	int		zcb_haderrors;
 } zdb_cb_t;
 
 static void
-zdb_count_block(spa_t *spa, zdb_cb_t *zcb, blkptr_t *bp, int type)
+zdb_count_block(spa_t *spa, zdb_cb_t *zcb, blkptr_t *bp, dmu_object_type_t type)
 {
 	for (int i = 0; i < 4; i++) {
 		int l = (i < 2) ? BP_GET_LEVEL(bp) : ZB_TOTAL;
@@ -1485,7 +1467,7 @@ zdb_count_block(spa_t *spa, zdb_cb_t *zc
 	if (dump_opt['S']) {
 		boolean_t print_sig;
 
-		print_sig  = !zdb_sig_user_data || (BP_GET_LEVEL(bp) == 0 &&
+		print_sig = !zdb_sig_user_data || (BP_GET_LEVEL(bp) == 0 &&
 		    BP_GET_TYPE(bp) == DMU_OT_PLAIN_FILE_CONTENTS);
 
 		if (BP_GET_CHECKSUM(bp) < zdb_sig_cksumalg)
@@ -1507,56 +1489,55 @@ zdb_count_block(spa_t *spa, zdb_cb_t *zc
 		}
 	}
 
-	if (!dump_opt['L'])
-		VERIFY(zio_wait(zio_claim(NULL, spa, spa_first_txg(spa), bp,
-		    NULL, NULL, ZIO_FLAG_MUSTSUCCEED)) == 0);
+	VERIFY(zio_wait(zio_claim(NULL, spa, spa_first_txg(spa), bp,
+	    NULL, NULL, ZIO_FLAG_MUSTSUCCEED)) == 0);
 }
 
 static int
-zdb_blkptr_cb(traverse_blk_cache_t *bc, spa_t *spa, void *arg)
+zdb_blkptr_cb(spa_t *spa, blkptr_t *bp, const zbookmark_t *zb,
+    const dnode_phys_t *dnp, void *arg)
 {
-	zbookmark_t *zb = &bc->bc_bookmark;
 	zdb_cb_t *zcb = arg;
-	blkptr_t *bp = &bc->bc_blkptr;
-	dmu_object_type_t type = BP_GET_TYPE(bp);
 	char blkbuf[BP_SPRINTF_LEN];
-	int error = 0;
 
-	ASSERT(!BP_IS_HOLE(bp));
+	if (bp == NULL)
+		return (0);
 
-	zdb_count_block(spa, zcb, bp, type);
+	zdb_count_block(spa, zcb, bp, BP_GET_TYPE(bp));
 
-	if (bc->bc_errno) {
-		if (zcb->zcb_readfails++ < 10 && dump_opt['L']) {
-			uberblock_t ub;
-			vdev_uberblock_load(NULL, spa->spa_root_vdev, &ub);
-			if (ub.ub_txg != 0)
-				spa->spa_ubsync = ub;
-			error = EAGAIN;
-		} else {
+	if (dump_opt['c'] || dump_opt['S']) {
+		int ioerr, size;
+		void *data;
+
+		size = BP_GET_LSIZE(bp);
+		data = malloc(size);
+		ioerr = zio_wait(zio_read(NULL, spa, bp, data, size,
+		    NULL, NULL, ZIO_PRIORITY_ASYNC_READ,
+		    ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB, zb));
+		free(data);
+
+		/* We expect io errors on intent log */
+		if (ioerr && BP_GET_TYPE(bp) != DMU_OT_INTENT_LOG) {
 			zcb->zcb_haderrors = 1;
-			zcb->zcb_errors[bc->bc_errno]++;
-			error = ERESTART;
-		}
+			zcb->zcb_errors[ioerr]++;
 
-		if (dump_opt['b'] >= 3 || (dump_opt['b'] >= 2 && bc->bc_errno))
-			sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, bp);
-		else
-			blkbuf[0] = '\0';
+			if (dump_opt['b'] >= 2)
+				sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, bp);
+			else
+				blkbuf[0] = '\0';
 
-		if (!dump_opt['S']) {
-			(void) printf("zdb_blkptr_cb: Got error %d reading "
-			    "<%llu, %llu, %lld, %llx> %s -- %s\n",
-			    bc->bc_errno,
-			    (u_longlong_t)zb->zb_objset,
-			    (u_longlong_t)zb->zb_object,
-			    (u_longlong_t)zb->zb_level,
-			    (u_longlong_t)zb->zb_blkid,
-			    blkbuf,
-			    error == EAGAIN ? "retrying" : "skipping");
+			if (!dump_opt['S']) {
+				(void) printf("zdb_blkptr_cb: "
+				    "Got error %d reading "
+				    "<%llu, %llu, %lld, %llx> %s -- skipping\n",
+				    ioerr,
+				    (u_longlong_t)zb->zb_objset,
+				    (u_longlong_t)zb->zb_object,
+				    (u_longlong_t)zb->zb_level,
+				    (u_longlong_t)zb->zb_blkid,
+				    blkbuf);
+			}
 		}
-
-		return (error);
 	}
 
 	zcb->zcb_readfails = 0;
@@ -1566,8 +1547,8 @@ zdb_blkptr_cb(traverse_blk_cache_t *bc, 
 		(void) printf("objset %llu object %llu offset 0x%llx %s\n",
 		    (u_longlong_t)zb->zb_objset,
 		    (u_longlong_t)zb->zb_object,
-		    (u_longlong_t)blkid2offset(bc->bc_dnode,
-		    zb->zb_level, zb->zb_blkid), blkbuf);
+		    (u_longlong_t)blkid2offset(dnp, zb->zb_level, zb->zb_blkid),
+		    blkbuf);
 	}
 
 	return (0);
@@ -1576,22 +1557,12 @@ zdb_blkptr_cb(traverse_blk_cache_t *bc, 
 static int
 dump_block_stats(spa_t *spa)
 {
-	traverse_handle_t *th;
 	zdb_cb_t zcb = { 0 };
-	traverse_blk_cache_t dummy_cache = { 0 };
 	zdb_blkstats_t *zb, *tzb;
 	uint64_t alloc, space, logalloc;
 	vdev_t *rvd = spa->spa_root_vdev;
 	int leaks = 0;
-	int advance = zdb_advance;
-	int c, e, flags;
-
-	zcb.zcb_cache = &dummy_cache;
-
-	if (dump_opt['c'] || dump_opt['S'])
-		advance |= ADVANCE_DATA;
-
-	advance |= ADVANCE_PRUNE | ADVANCE_ZIL;
+	int c, e;
 
 	if (!dump_opt['S']) {
 		(void) printf("\nTraversing all blocks to %sverify"
@@ -1607,8 +1578,7 @@ dump_block_stats(spa_t *spa)
 	 * it's not part of any space map) is a double allocation,
 	 * reference to a freed block, or an unclaimed log block.
 	 */
-	if (!dump_opt['L'])
-		zdb_leak_init(spa);
+	zdb_leak_init(spa);
 
 	/*
 	 * If there's a deferred-free bplist, process that first.
@@ -1634,22 +1604,7 @@ dump_block_stats(spa_t *spa)
 		bplist_close(bpl);
 	}
 
-	/*
-	 * Now traverse the pool.  If we're reading all data to verify
-	 * checksums, do a scrubbing read so that we validate all copies.
-	 */
-	flags = ZIO_FLAG_CANFAIL;
-	if (advance & ADVANCE_DATA)
-		flags |= ZIO_FLAG_SCRUB;
-	th = traverse_init(spa, zdb_blkptr_cb, &zcb, advance, flags);
-	th->th_noread = zdb_noread;
-
-	traverse_add_pool(th, 0, spa_first_txg(spa) + TXG_CONCURRENT_STATES);
-
-	while (traverse_more(th) == EAGAIN)
-		continue;
-
-	traverse_fini(th);
+	zcb.zcb_haderrors |= traverse_pool(spa, zdb_blkptr_cb, &zcb);
 
 	if (zcb.zcb_haderrors && !dump_opt['S']) {
 		(void) printf("\nError counts:\n\n");
@@ -1665,8 +1620,7 @@ dump_block_stats(spa_t *spa)
 	/*
 	 * Report any leaked segments.
 	 */
-	if (!dump_opt['L'])
-		zdb_leak_fini(spa);
+	zdb_leak_fini(spa);
 
 	/*
 	 * If we're interested in printing out the blkptr signatures,
@@ -1676,10 +1630,6 @@ dump_block_stats(spa_t *spa)
 	if (dump_opt['S'])
 		return (zcb.zcb_haderrors ? 3 : 0);
 
-	if (dump_opt['L'])
-		(void) printf("\n\n *** Live pool traversal; "
-		    "block counts are only approximate ***\n\n");
-
 	alloc = spa_get_alloc(spa);
 	space = spa_get_space(spa);
 
@@ -2285,7 +2235,6 @@ main(int argc, char **argv)
 	int dump_all = 1;
 	int verbose = 0;
 	int error;
-	int flag, set;
 	int exported = 0;
 	char *vdev_dir = NULL;
 
@@ -2294,7 +2243,7 @@ main(int argc, char **argv)
 
 	dprintf_setup(&argc, argv);
 
-	while ((c = getopt(argc, argv, "udibcsvCLO:B:S:U:lRep:")) != -1) {
+	while ((c = getopt(argc, argv, "udibcsvCS:U:lRep:")) != -1) {
 		switch (c) {
 		case 'u':
 		case 'd':
@@ -2308,49 +2257,6 @@ main(int argc, char **argv)
 			dump_opt[c]++;
 			dump_all = 0;
 			break;
-		case 'L':
-			dump_opt[c]++;
-			break;
-		case 'O':
-			endstr = optarg;
-			if (endstr[0] == '!') {
-				endstr++;
-				set = 0;
-			} else {
-				set = 1;
-			}
-			if (strcmp(endstr, "post") == 0) {
-				flag = ADVANCE_PRE;
-				set = !set;
-			} else if (strcmp(endstr, "pre") == 0) {
-				flag = ADVANCE_PRE;
-			} else if (strcmp(endstr, "prune") == 0) {
-				flag = ADVANCE_PRUNE;
-			} else if (strcmp(endstr, "data") == 0) {
-				flag = ADVANCE_DATA;
-			} else if (strcmp(endstr, "holes") == 0) {
-				flag = ADVANCE_HOLES;
-			} else {
-				usage();
-			}
-			if (set)
-				zdb_advance |= flag;
-			else
-				zdb_advance &= ~flag;
-			break;
-		case 'B':
-			endstr = optarg - 1;
-			zdb_noread.zb_objset = strtoull(endstr + 1, &endstr, 0);
-			zdb_noread.zb_object = strtoull(endstr + 1, &endstr, 0);
-			zdb_noread.zb_level = strtol(endstr + 1, &endstr, 0);
-			zdb_noread.zb_blkid = strtoull(endstr + 1, &endstr, 16);
-			(void) printf("simulating bad block "
-			    "<%llu, %llu, %lld, %llx>\n",
-			    (u_longlong_t)zdb_noread.zb_objset,
-			    (u_longlong_t)zdb_noread.zb_object,
-			    (u_longlong_t)zdb_noread.zb_level,
-			    (u_longlong_t)zdb_noread.zb_blkid);
-			break;
 		case 'v':
 			verbose++;
 			break;
@@ -2387,21 +2293,17 @@ main(int argc, char **argv)
 		}
 	}
 
-	if (vdev_dir != NULL && exported == 0)
-		(void) fatal("-p option requires use of -e\n");
+	if (vdev_dir != NULL && exported == 0) {
+		(void) fprintf(stderr, "-p option requires use of -e\n");
+		usage();
+	}
 
 	kernel_init(FREAD);
 	g_zfs = libzfs_init();
 	ASSERT(g_zfs != NULL);
 
-	/*
-	 * Disable vdev caching.  If we don't do this, live pool traversal
-	 * won't make progress because it will never see disk updates.
-	 */
-	zfs_vdev_cache_size = 0;
-
 	for (c = 0; c < 256; c++) {
-		if (dump_all && c != 'L' && c != 'l' && c != 'R')
+		if (dump_all && c != 'l' && c != 'R')
 			dump_opt[c] = 1;
 		if (dump_opt[c])
 			dump_opt[c] += verbose;

Modified: head/cddl/contrib/opensolaris/cmd/ztest/ztest.c
==============================================================================
--- head/cddl/contrib/opensolaris/cmd/ztest/ztest.c	Thu May 13 20:31:24 2010	(r208046)
+++ head/cddl/contrib/opensolaris/cmd/ztest/ztest.c	Thu May 13 20:32:56 2010	(r208047)
@@ -77,7 +77,6 @@
 #include <sys/dmu.h>
 #include <sys/txg.h>
 #include <sys/zap.h>
-#include <sys/dmu_traverse.h>
 #include <sys/dmu_objset.h>
 #include <sys/poll.h>
 #include <sys/stat.h>
@@ -151,7 +150,6 @@ typedef struct ztest_args {
 	hrtime_t	za_start;
 	hrtime_t	za_stop;
 	hrtime_t	za_kill;
-	traverse_handle_t *za_th;
 	/*
 	 * Thread-local variables can go here to aid debugging.
 	 */
@@ -206,7 +204,6 @@ ztest_info_t ztest_info[] = {
 	{ ztest_dmu_object_alloc_free,		1,	&zopt_always	},
 	{ ztest_zap,				30,	&zopt_always	},
 	{ ztest_zap_parallel,			100,	&zopt_always	},
-	{ ztest_traverse,			1,	&zopt_often	},
 	{ ztest_dsl_prop_get_set,		1,	&zopt_sometimes	},
 	{ ztest_dmu_objset_create_destroy,	1,	&zopt_sometimes },
 	{ ztest_dmu_snapshot_create_destroy,	1,	&zopt_sometimes },
@@ -1447,152 +1444,6 @@ ztest_dmu_snapshot_create_destroy(ztest_
 	(void) rw_unlock(&ztest_shared->zs_name_lock);
 }
 
-#define	ZTEST_TRAVERSE_BLOCKS	1000
-
-static int
-ztest_blk_cb(traverse_blk_cache_t *bc, spa_t *spa, void *arg)
-{
-	ztest_args_t *za = arg;
-	zbookmark_t *zb = &bc->bc_bookmark;
-	blkptr_t *bp = &bc->bc_blkptr;
-	dnode_phys_t *dnp = bc->bc_dnode;
-	traverse_handle_t *th = za->za_th;
-	uint64_t size = BP_GET_LSIZE(bp);
-
-	/*
-	 * Level -1 indicates the objset_phys_t or something in its intent log.
-	 */
-	if (zb->zb_level == -1) {
-		if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) {
-			ASSERT3U(zb->zb_object, ==, 0);
-			ASSERT3U(zb->zb_blkid, ==, 0);
-			ASSERT3U(size, ==, sizeof (objset_phys_t));
-			za->za_zil_seq = 0;
-		} else if (BP_GET_TYPE(bp) == DMU_OT_INTENT_LOG) {
-			ASSERT3U(zb->zb_object, ==, 0);
-			ASSERT3U(zb->zb_blkid, >, za->za_zil_seq);
-			za->za_zil_seq = zb->zb_blkid;
-		} else {
-			ASSERT3U(zb->zb_object, !=, 0);	/* lr_write_t */
-		}
-
-		return (0);
-	}
-
-	ASSERT(dnp != NULL);
-
-	if (bc->bc_errno)
-		return (ERESTART);
-
-	/*
-	 * Once in a while, abort the traverse.   We only do this to odd
-	 * instance numbers to ensure that even ones can run to completion.
-	 */
-	if ((za->za_instance & 1) && ztest_random(10000) == 0)
-		return (EINTR);
-
-	if (bp->blk_birth == 0) {
-		ASSERT(th->th_advance & ADVANCE_HOLES);
-		return (0);
-	}
-
-	if (zb->zb_level == 0 && !(th->th_advance & ADVANCE_DATA) &&
-	    bc == &th->th_cache[ZB_DN_CACHE][0]) {
-		ASSERT(bc->bc_data == NULL);
-		return (0);
-	}
-
-	ASSERT(bc->bc_data != NULL);
-
-	/*
-	 * This is an expensive question, so don't ask it too often.
-	 */
-	if (((za->za_random ^ th->th_callbacks) & 0xff) == 0) {
-		void *xbuf = umem_alloc(size, UMEM_NOFAIL);
-		if (arc_tryread(spa, bp, xbuf) == 0) {
-			ASSERT(bcmp(bc->bc_data, xbuf, size) == 0);
-		}
-		umem_free(xbuf, size);
-	}
-
-	if (zb->zb_level > 0) {
-		ASSERT3U(size, ==, 1ULL << dnp->dn_indblkshift);
-		return (0);
-	}
-
-	ASSERT(zb->zb_level == 0);
-	ASSERT3U(size, ==, dnp->dn_datablkszsec << DEV_BSHIFT);
-
-	return (0);
-}
-
-/*
- * Verify that live pool traversal works.
- */
-void
-ztest_traverse(ztest_args_t *za)
-{
-	spa_t *spa = za->za_spa;
-	traverse_handle_t *th = za->za_th;
-	int rc, advance;
-	uint64_t cbstart, cblimit;
-
-	if (th == NULL) {
-		advance = 0;
-
-		if (ztest_random(2) == 0)
-			advance |= ADVANCE_PRE;
-
-		if (ztest_random(2) == 0)
-			advance |= ADVANCE_PRUNE;
-
-		if (ztest_random(2) == 0)
-			advance |= ADVANCE_DATA;
-
-		if (ztest_random(2) == 0)
-			advance |= ADVANCE_HOLES;
-
-		if (ztest_random(2) == 0)
-			advance |= ADVANCE_ZIL;
-
-		th = za->za_th = traverse_init(spa, ztest_blk_cb, za, advance,
-		    ZIO_FLAG_CANFAIL);
-
-		traverse_add_pool(th, 0, -1ULL);
-	}
-
-	advance = th->th_advance;
-	cbstart = th->th_callbacks;
-	cblimit = cbstart + ((advance & ADVANCE_DATA) ? 100 : 1000);
-
-	while ((rc = traverse_more(th)) == EAGAIN && th->th_callbacks < cblimit)
-		continue;
-
-	if (zopt_verbose >= 5)
-		(void) printf("traverse %s%s%s%s %llu blocks to "
-		    "<%llu, %llu, %lld, %llx>%s\n",
-		    (advance & ADVANCE_PRE) ? "pre" : "post",
-		    (advance & ADVANCE_PRUNE) ? "|prune" : "",
-		    (advance & ADVANCE_DATA) ? "|data" : "",
-		    (advance & ADVANCE_HOLES) ? "|holes" : "",
-		    (u_longlong_t)(th->th_callbacks - cbstart),
-		    (u_longlong_t)th->th_lastcb.zb_objset,
-		    (u_longlong_t)th->th_lastcb.zb_object,
-		    (u_longlong_t)th->th_lastcb.zb_level,
-		    (u_longlong_t)th->th_lastcb.zb_blkid,
-		    rc == 0 ? " [done]" :
-		    rc == EINTR ? " [aborted]" :
-		    rc == EAGAIN ? "" :
-		    strerror(rc));
-
-	if (rc != EAGAIN) {
-		if (rc != 0 && rc != EINTR)
-			fatal(0, "traverse_more(%p) = %d", th, rc);
-		traverse_fini(th);
-		za->za_th = NULL;
-	}
-}
-
 /*
  * Verify dsl_dataset_promote handles EBUSY
  */
@@ -3067,12 +2918,12 @@ ztest_verify_blocks(char *pool)
 	isa = strdup(isa);
 	/* LINTED */
 	(void) sprintf(bin,
-	    "/usr/sbin%.*s/zdb -bc%s%s -U /tmp/zpool.cache -O %s %s",
+	    "/usr/sbin%.*s/zdb -bc%s%s -U /tmp/zpool.cache %s",
 	    isalen,
 	    isa,
 	    zopt_verbose >= 3 ? "s" : "",
 	    zopt_verbose >= 4 ? "v" : "",
-	    ztest_random(2) == 0 ? "pre" : "post", pool);
+	    pool);
 	free(isa);
 
 	if (zopt_verbose >= 5)
@@ -3438,8 +3289,6 @@ ztest_run(char *pool)
 
 	while (--t >= 0) {
 		VERIFY(thr_join(za[t].za_thread, NULL, NULL) == 0);
-		if (za[t].za_th)
-			traverse_fini(za[t].za_th);
 		if (t < zopt_datasets) {
 			zil_close(za[t].za_zilog);
 			dmu_objset_close(za[t].za_os);

Modified: head/cddl/contrib/opensolaris/lib/libzpool/common/kernel.c
==============================================================================
--- head/cddl/contrib/opensolaris/lib/libzpool/common/kernel.c	Thu May 13 20:31:24 2010	(r208046)
+++ head/cddl/contrib/opensolaris/lib/libzpool/common/kernel.c	Thu May 13 20:32:56 2010	(r208047)
@@ -23,8 +23,6 @@
  * Use is subject to license terms.
  */
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #include <assert.h>
 #include <fcntl.h>
 #include <poll.h>
@@ -842,6 +840,8 @@ kernel_init(int mode)
 	VERIFY((random_fd = open("/dev/random", O_RDONLY)) != -1);
 	VERIFY((urandom_fd = open("/dev/urandom", O_RDONLY)) != -1);
 
+	system_taskq_init();
+
 	spa_init(mode);
 }
 

Modified: head/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h
==============================================================================
--- head/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h	Thu May 13 20:31:24 2010	(r208046)
+++ head/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h	Thu May 13 20:32:56 2010	(r208047)
@@ -334,11 +334,14 @@ typedef void (task_func_t)(void *);
 #define	TQ_NOSLEEP	KM_NOSLEEP	/* cannot block for memory; may fail */
 #define	TQ_NOQUEUE	0x02	/* Do not enqueue if can't dispatch */
 
+extern taskq_t *system_taskq;
+
 extern taskq_t	*taskq_create(const char *, int, pri_t, int, int, uint_t);
 extern taskqid_t taskq_dispatch(taskq_t *, task_func_t, void *, uint_t);
 extern void	taskq_destroy(taskq_t *);
 extern void	taskq_wait(taskq_t *);
 extern int	taskq_member(taskq_t *, void *);
+extern void	system_taskq_init(void);
 
 #define	XVA_MAPSIZE	3
 #define	XVA_MAGIC	0x78766174

Modified: head/cddl/contrib/opensolaris/lib/libzpool/common/taskq.c
==============================================================================
--- head/cddl/contrib/opensolaris/lib/libzpool/common/taskq.c	Thu May 13 20:31:24 2010	(r208046)
+++ head/cddl/contrib/opensolaris/lib/libzpool/common/taskq.c	Thu May 13 20:32:56 2010	(r208047)
@@ -19,15 +19,14 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #include <sys/zfs_context.h>
 
 int taskq_now;
+taskq_t *system_taskq;
 
 typedef struct task {
 	struct task	*task_next;
@@ -253,3 +252,10 @@ taskq_member(taskq_t *tq, void *t)
 
 	return (0);
 }
+
+void
+system_taskq_init(void)
+{
+	system_taskq = taskq_create("system_taskq", 64, minclsyspri, 4, 512,
+	    TASKQ_DYNAMIC | TASKQ_PREPOPULATE);
+}

Modified: head/sys/cddl/boot/zfs/zfsimpl.h
==============================================================================
--- head/sys/cddl/boot/zfs/zfsimpl.h	Thu May 13 20:31:24 2010	(r208046)
+++ head/sys/cddl/boot/zfs/zfsimpl.h	Thu May 13 20:32:56 2010	(r208047)
@@ -66,7 +66,7 @@
 #define	P2ROUNDUP(x, align)		(-(-(x) & -(align)))
 #define	P2END(x, align)			(-(~(x) & -(align)))
 #define	P2PHASEUP(x, align, phase)	((phase) - (((phase) - (x)) & -(align)))
-#define	P2CROSS(x, y, align)		(((x) ^ (y)) > (align) - 1)
+#define	P2BOUNDARY(off, len, align)	(((off) ^ ((off) + (len) - 1)) > (align) - 1)
 
 /*
  * General-purpose 32-bit and 64-bit bitfield encodings.

Modified: head/sys/cddl/compat/opensolaris/sys/sysmacros.h
==============================================================================
--- head/sys/cddl/compat/opensolaris/sys/sysmacros.h	Thu May 13 20:31:24 2010	(r208046)
+++ head/sys/cddl/compat/opensolaris/sys/sysmacros.h	Thu May 13 20:32:56 2010	(r208047)
@@ -43,6 +43,10 @@ extern "C" {
 #define	ABS(a)	((a) < 0 ? -(a) : (a))
 #endif
 
+#ifndef	SIGNOF
+#define	SIGNOF(a)	((a) < 0 ? -1 : (a) > 0)
+#endif
+
 /*
  * Macro for checking power of 2 address alignment.
  */
@@ -63,7 +67,7 @@ extern "C" {
 #define	P2ROUNDUP(x, align)		(-(-(x) & -(align)))
 #define	P2END(x, align)			(-(~(x) & -(align)))
 #define	P2PHASEUP(x, align, phase)	((phase) - (((phase) - (x)) & -(align)))
-#define	P2CROSS(x, y, align)		(((x) ^ (y)) > (align) - 1)
+#define	P2BOUNDARY(off, len, align)	(((off) ^ ((off) + (len) - 1)) > (align) - 1)
 /*
  * Determine whether two numbers have the same high-order bit.
  */

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c	Thu May 13 20:31:24 2010	(r208046)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c	Thu May 13 20:32:56 2010	(r208047)
@@ -308,20 +308,18 @@ dbuf_verify(dmu_buf_impl_t *db)
 		ASSERT3U(db->db.db_offset, ==, db->db_blkid * db->db.db_size);
 	}
 
-	if (db->db_level == 0) {
-		/* we can be momentarily larger in dnode_set_blksz() */
-		if (db->db_blkid != DB_BONUS_BLKID && dn) {
-			ASSERT3U(db->db.db_size, >=, dn->dn_datablksz);
-		}
-		if (db->db.db_object == DMU_META_DNODE_OBJECT) {
-			dbuf_dirty_record_t *dr = db->db_data_pending;
-			/*
-			 * it should only be modified in syncing
-			 * context, so make sure we only have
-			 * one copy of the data.
-			 */
-			ASSERT(dr == NULL || dr->dt.dl.dr_data == db->db_buf);
-		}
+	/*
+	 * We can't assert that db_size matches dn_datablksz because it
+	 * can be momentarily different when another thread is doing
+	 * dnode_set_blksz().
+	 */
+	if (db->db_level == 0 && db->db.db_object == DMU_META_DNODE_OBJECT) {
+		dbuf_dirty_record_t *dr = db->db_data_pending;
+		/*
+		 * It should only be modified in syncing context, so
+		 * make sure we only have one copy of the data.
+		 */
+		ASSERT(dr == NULL || dr->dt.dl.dr_data == db->db_buf);
 	}
 
 	/* verify db->db_blkptr */

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c	Thu May 13 20:31:24 2010	(r208046)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c	Thu May 13 20:32:56 2010	(r208047)
@@ -23,8 +23,6 @@
  * Use is subject to license terms.
  */
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #include <sys/dmu.h>
 #include <sys/dmu_impl.h>
 #include <sys/dmu_tx.h>
@@ -172,66 +170,59 @@ dump_dnode(struct backuparg *ba, uint64_
 	(level) * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT)))
 
 static int
-backup_cb(traverse_blk_cache_t *bc, spa_t *spa, void *arg)
+backup_cb(spa_t *spa, blkptr_t *bp, const zbookmark_t *zb,
+    const dnode_phys_t *dnp, void *arg)
 {
 	struct backuparg *ba = arg;
-	uint64_t object = bc->bc_bookmark.zb_object;
-	int level = bc->bc_bookmark.zb_level;
-	uint64_t blkid = bc->bc_bookmark.zb_blkid;
-	blkptr_t *bp = bc->bc_blkptr.blk_birth ? &bc->bc_blkptr : NULL;
 	dmu_object_type_t type = bp ? BP_GET_TYPE(bp) : DMU_OT_NONE;
-	void *data = bc->bc_data;
 	int err = 0;
 
 	if (issig(JUSTLOOKING) && issig(FORREAL))
 		return (EINTR);
 
-	ASSERT(data || bp == NULL);
-
-	if (bp == NULL && object == 0) {
-		uint64_t span = BP_SPAN(bc->bc_dnode, level);
-		uint64_t dnobj = (blkid * span) >> DNODE_SHIFT;
+	if (bp == NULL && zb->zb_object == 0) {
+		uint64_t span = BP_SPAN(dnp, zb->zb_level);
+		uint64_t dnobj = (zb->zb_blkid * span) >> DNODE_SHIFT;
 		err = dump_freeobjects(ba, dnobj, span >> DNODE_SHIFT);
 	} else if (bp == NULL) {
-		uint64_t span = BP_SPAN(bc->bc_dnode, level);
-		err = dump_free(ba, object, blkid * span, span);
-	} else if (data && level == 0 && type == DMU_OT_DNODE) {
-		dnode_phys_t *blk = data;
+		uint64_t span = BP_SPAN(dnp, zb->zb_level);
+		err = dump_free(ba, zb->zb_object, zb->zb_blkid * span, span);
+	} else if (zb->zb_level > 0 || type == DMU_OT_OBJSET) {
+		return (0);
+	} else if (type == DMU_OT_DNODE) {
+		dnode_phys_t *blk;
 		int i;
 		int blksz = BP_GET_LSIZE(bp);
+		uint32_t aflags = ARC_WAIT;
+		arc_buf_t *abuf;
 
+		if (arc_read_nolock(NULL, spa, bp,
+		    arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ,
+		    ZIO_FLAG_CANFAIL, &aflags, zb) != 0)
+			return (EIO);
+
+		blk = abuf->b_data;
 		for (i = 0; i < blksz >> DNODE_SHIFT; i++) {
-			uint64_t dnobj =
-			    (blkid << (DNODE_BLOCK_SHIFT - DNODE_SHIFT)) + i;
+			uint64_t dnobj = (zb->zb_blkid <<
+			    (DNODE_BLOCK_SHIFT - DNODE_SHIFT)) + i;
 			err = dump_dnode(ba, dnobj, blk+i);
 			if (err)
 				break;
 		}
-	} else if (level == 0 &&
-	    type != DMU_OT_DNODE && type != DMU_OT_OBJSET) {
+		(void) arc_buf_remove_ref(abuf, &abuf);
+	} else { /* it's a level-0 block of a regular object */
+		uint32_t aflags = ARC_WAIT;
+		arc_buf_t *abuf;
 		int blksz = BP_GET_LSIZE(bp);
-		if (data == NULL) {
-			uint32_t aflags = ARC_WAIT;
-			arc_buf_t *abuf;
-			zbookmark_t zb;
-
-			zb.zb_objset = ba->os->os->os_dsl_dataset->ds_object;
-			zb.zb_object = object;
-			zb.zb_level = level;
-			zb.zb_blkid = blkid;
-			(void) arc_read_nolock(NULL, spa, bp,
-			    arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ,
-			    ZIO_FLAG_MUSTSUCCEED, &aflags, &zb);
-
-			if (abuf) {
-				err = dump_data(ba, type, object, blkid * blksz,
-				    blksz, abuf->b_data);
-				(void) arc_buf_remove_ref(abuf, &abuf);

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-all mailing list