svn commit: r213197 - in head: cddl/contrib/opensolaris/cmd/zpool cddl/contrib/opensolaris/lib/libzfs/common sys/cddl/contrib/opensolaris/uts/common/fs/zfs sys/cddl/contrib/opensolaris/uts/common/f...

Martin Matuska mm at FreeBSD.org
Mon Sep 27 09:05:51 UTC 2010


Author: mm
Date: Mon Sep 27 09:05:51 2010
New Revision: 213197
URL: http://svn.freebsd.org/changeset/base/213197

Log:
  Enable offlining of log devices.
  
  OpenSolaris revision and Bug IDs:
  
  9701:cc5b64682e64
  6803605	should be able to offline log devices
  6726045	vdev_deflate_ratio is not set when offlining a log device
  6599442	zpool import has faults in the display
  
  Approved by:	delphij (mentor)
  Obtained from:	OpenSolaris (Bug ID 6803605, 6726045, 6599442)
  MFC after:	3 weeks

Modified:
  head/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c
  head/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h
  head/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c
  head/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_util.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scrub.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c

Modified: head/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c
==============================================================================
--- head/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c	Mon Sep 27 06:04:17 2010	(r213196)
+++ head/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c	Mon Sep 27 09:05:51 2010	(r213197)
@@ -980,14 +980,189 @@ max_width(zpool_handle_t *zhp, nvlist_t 
 	return (max);
 }
 
+typedef struct spare_cbdata {
+	uint64_t	cb_guid;
+	zpool_handle_t	*cb_zhp;
+} spare_cbdata_t;
+
+static boolean_t
+find_vdev(nvlist_t *nv, uint64_t search)
+{
+	uint64_t guid;
+	nvlist_t **child;
+	uint_t c, children;
+
+	if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0 &&
+	    search == guid)
+		return (B_TRUE);
+
+	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
+	    &child, &children) == 0) {
+		for (c = 0; c < children; c++)
+			if (find_vdev(child[c], search))
+				return (B_TRUE);
+	}
+
+	return (B_FALSE);
+}
+
+static int
+find_spare(zpool_handle_t *zhp, void *data)
+{
+	spare_cbdata_t *cbp = data;
+	nvlist_t *config, *nvroot;
+
+	config = zpool_get_config(zhp, NULL);
+	verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
+	    &nvroot) == 0);
+
+	if (find_vdev(nvroot, cbp->cb_guid)) {
+		cbp->cb_zhp = zhp;
+		return (1);
+	}
+
+	zpool_close(zhp);
+	return (0);
+}
+
+/*
+ * Print out configuration state as requested by status_callback.
+ */
+void
+print_status_config(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
+    int namewidth, int depth, boolean_t isspare)
+{
+	nvlist_t **child;
+	uint_t c, children;
+	vdev_stat_t *vs;
+	char rbuf[6], wbuf[6], cbuf[6], repaired[7];
+	char *vname;
+	uint64_t notpresent;
+	spare_cbdata_t cb;
+	char *state;
+
+	verify(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_STATS,
+	    (uint64_t **)&vs, &c) == 0);
+
+	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
+	    &child, &children) != 0)
+		children = 0;
+
+	state = zpool_state_to_name(vs->vs_state, vs->vs_aux);
+	if (isspare) {
+		/*
+		 * For hot spares, we use the terms 'INUSE' and 'AVAILABLE' for
+		 * online drives.
+		 */
+		if (vs->vs_aux == VDEV_AUX_SPARED)
+			state = "INUSE";
+		else if (vs->vs_state == VDEV_STATE_HEALTHY)
+			state = "AVAIL";
+	}
+
+	(void) printf("\t%*s%-*s  %-8s", depth, "", namewidth - depth,
+	    name, state);
+
+	if (!isspare) {
+		zfs_nicenum(vs->vs_read_errors, rbuf, sizeof (rbuf));
+		zfs_nicenum(vs->vs_write_errors, wbuf, sizeof (wbuf));
+		zfs_nicenum(vs->vs_checksum_errors, cbuf, sizeof (cbuf));
+		(void) printf(" %5s %5s %5s", rbuf, wbuf, cbuf);
+	}
+
+	if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT,
+	    &notpresent) == 0) {
+		char *path;
+		verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0);
+		(void) printf("  was %s", path);
+	} else if (vs->vs_aux != 0) {
+		(void) printf("  ");
+
+		switch (vs->vs_aux) {
+		case VDEV_AUX_OPEN_FAILED:
+			(void) printf(gettext("cannot open"));
+			break;
+
+		case VDEV_AUX_BAD_GUID_SUM:
+			(void) printf(gettext("missing device"));
+			break;
+
+		case VDEV_AUX_NO_REPLICAS:
+			(void) printf(gettext("insufficient replicas"));
+			break;
+
+		case VDEV_AUX_VERSION_NEWER:
+			(void) printf(gettext("newer version"));
+			break;
+
+		case VDEV_AUX_SPARED:
+			verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
+			    &cb.cb_guid) == 0);
+			if (zpool_iter(g_zfs, find_spare, &cb) == 1) {
+				if (strcmp(zpool_get_name(cb.cb_zhp),
+				    zpool_get_name(zhp)) == 0)
+					(void) printf(gettext("currently in "
+					    "use"));
+				else
+					(void) printf(gettext("in use by "
+					    "pool '%s'"),
+					    zpool_get_name(cb.cb_zhp));
+				zpool_close(cb.cb_zhp);
+			} else {
+				(void) printf(gettext("currently in use"));
+			}
+			break;
+
+		case VDEV_AUX_ERR_EXCEEDED:
+			(void) printf(gettext("too many errors"));
+			break;
+
+		case VDEV_AUX_IO_FAILURE:
+			(void) printf(gettext("experienced I/O failures"));
+			break;
+
+		case VDEV_AUX_BAD_LOG:
+			(void) printf(gettext("bad intent log"));
+			break;
+
+		default:
+			(void) printf(gettext("corrupted data"));
+			break;
+		}
+	} else if (vs->vs_scrub_repaired != 0 && children == 0) {
+		/*
+		 * Report bytes resilvered/repaired on leaf devices.
+		 */
+		zfs_nicenum(vs->vs_scrub_repaired, repaired, sizeof (repaired));
+		(void) printf(gettext("  %s %s"), repaired,
+		    (vs->vs_scrub_type == POOL_SCRUB_RESILVER) ?
+		    "resilvered" : "repaired");
+	}
+
+	(void) printf("\n");
+
+	for (c = 0; c < children; c++) {
+		uint64_t is_log = B_FALSE;
+
+		/* Don't print logs here */
+		(void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
+		    &is_log);
+		if (is_log)
+			continue;
+		vname = zpool_vdev_name(g_zfs, zhp, child[c]);
+		print_status_config(zhp, vname, child[c],
+		    namewidth, depth + 2, isspare);
+		free(vname);
+	}
+}
+
 
 /*
  * Print the configuration of an exported pool.  Iterate over all vdevs in the
  * pool, printing out the name and status for each one.
  */
 void
-print_import_config(const char *name, nvlist_t *nv, int namewidth, int depth,
-    boolean_t print_logs)
+print_import_config(const char *name, nvlist_t *nv, int namewidth, int depth)
 {
 	nvlist_t **child;
 	uint_t c, children;
@@ -1044,12 +1219,11 @@ print_import_config(const char *name, nv
 
 		(void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
 		    &is_log);
-		if ((is_log && !print_logs) || (!is_log && print_logs))
+		if (is_log)
 			continue;
 
 		vname = zpool_vdev_name(g_zfs, NULL, child[c]);
-		print_import_config(vname, child[c],
-		    namewidth, depth + 2, B_FALSE);
+		print_import_config(vname, child[c], namewidth, depth + 2);
 		free(vname);
 	}
 
@@ -1075,6 +1249,43 @@ print_import_config(const char *name, nv
 }
 
 /*
+ * Print log vdevs.
+ * Logs are recorded as top level vdevs in the main pool child array
+ * but with "is_log" set to 1. We use either print_status_config() or
+ * print_import_config() to print the top level logs then any log
+ * children (eg mirrored slogs) are printed recursively - which
+ * works because only the top level vdev is marked "is_log"
+ */
+static void
+print_logs(zpool_handle_t *zhp, nvlist_t *nv, int namewidth, boolean_t verbose)
+{
+	uint_t c, children;
+	nvlist_t **child;
+
+	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, &child,
+	    &children) != 0)
+		return;
+
+	(void) printf(gettext("\tlogs\n"));
+
+	for (c = 0; c < children; c++) {
+		uint64_t is_log = B_FALSE;
+		char *name;
+
+		(void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
+		    &is_log);
+		if (!is_log)
+			continue;
+		name = zpool_vdev_name(g_zfs, zhp, child[c]);
+		if (verbose)
+			print_status_config(zhp, name, child[c], namewidth,
+			    2, B_FALSE);
+		else
+			print_import_config(name, child[c], namewidth, 2);
+		free(name);
+	}
+}
+/*
  * Display the status for the given pool.
  */
 static void
@@ -1242,11 +1453,9 @@ show_import(nvlist_t *config)
 	if (namewidth < 10)
 		namewidth = 10;
 
-	print_import_config(name, nvroot, namewidth, 0, B_FALSE);
-	if (num_logs(nvroot) > 0) {
-		(void) printf(gettext("\tlogs\n"));
-		print_import_config(name, nvroot, namewidth, 0, B_TRUE);
-	}
+	print_import_config(name, nvroot, namewidth, 0);
+	if (num_logs(nvroot) > 0)
+		print_logs(NULL, nvroot, namewidth, B_FALSE);
 
 	if (reason == ZPOOL_STATUS_BAD_GUID_SUM) {
 		(void) printf(gettext("\n\tAdditional devices are known to "
@@ -2717,182 +2926,6 @@ print_scrub_status(nvlist_t *nvroot)
 	    (u_longlong_t)(minutes_left / 60), (uint_t)(minutes_left % 60));
 }
 
-typedef struct spare_cbdata {
-	uint64_t	cb_guid;
-	zpool_handle_t	*cb_zhp;
-} spare_cbdata_t;
-
-static boolean_t
-find_vdev(nvlist_t *nv, uint64_t search)
-{
-	uint64_t guid;
-	nvlist_t **child;
-	uint_t c, children;
-
-	if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0 &&
-	    search == guid)
-		return (B_TRUE);
-
-	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
-	    &child, &children) == 0) {
-		for (c = 0; c < children; c++)
-			if (find_vdev(child[c], search))
-				return (B_TRUE);
-	}
-
-	return (B_FALSE);
-}
-
-static int
-find_spare(zpool_handle_t *zhp, void *data)
-{
-	spare_cbdata_t *cbp = data;
-	nvlist_t *config, *nvroot;
-
-	config = zpool_get_config(zhp, NULL);
-	verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
-	    &nvroot) == 0);
-
-	if (find_vdev(nvroot, cbp->cb_guid)) {
-		cbp->cb_zhp = zhp;
-		return (1);
-	}
-
-	zpool_close(zhp);
-	return (0);
-}
-
-/*
- * Print out configuration state as requested by status_callback.
- */
-void
-print_status_config(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
-    int namewidth, int depth, boolean_t isspare)
-{
-	nvlist_t **child;
-	uint_t c, children;
-	vdev_stat_t *vs;
-	char rbuf[6], wbuf[6], cbuf[6], repaired[7];
-	char *vname;
-	uint64_t notpresent;
-	spare_cbdata_t cb;
-	char *state;
-
-	verify(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_STATS,
-	    (uint64_t **)&vs, &c) == 0);
-
-	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
-	    &child, &children) != 0)
-		children = 0;
-
-	state = zpool_state_to_name(vs->vs_state, vs->vs_aux);
-	if (isspare) {
-		/*
-		 * For hot spares, we use the terms 'INUSE' and 'AVAILABLE' for
-		 * online drives.
-		 */
-		if (vs->vs_aux == VDEV_AUX_SPARED)
-			state = "INUSE";
-		else if (vs->vs_state == VDEV_STATE_HEALTHY)
-			state = "AVAIL";
-	}
-
-	(void) printf("\t%*s%-*s  %-8s", depth, "", namewidth - depth,
-	    name, state);
-
-	if (!isspare) {
-		zfs_nicenum(vs->vs_read_errors, rbuf, sizeof (rbuf));
-		zfs_nicenum(vs->vs_write_errors, wbuf, sizeof (wbuf));
-		zfs_nicenum(vs->vs_checksum_errors, cbuf, sizeof (cbuf));
-		(void) printf(" %5s %5s %5s", rbuf, wbuf, cbuf);
-	}
-
-	if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT,
-	    &notpresent) == 0) {
-		char *path;
-		verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0);
-		(void) printf("  was %s", path);
-	} else if (vs->vs_aux != 0) {
-		(void) printf("  ");
-
-		switch (vs->vs_aux) {
-		case VDEV_AUX_OPEN_FAILED:
-			(void) printf(gettext("cannot open"));
-			break;
-
-		case VDEV_AUX_BAD_GUID_SUM:
-			(void) printf(gettext("missing device"));
-			break;
-
-		case VDEV_AUX_NO_REPLICAS:
-			(void) printf(gettext("insufficient replicas"));
-			break;
-
-		case VDEV_AUX_VERSION_NEWER:
-			(void) printf(gettext("newer version"));
-			break;
-
-		case VDEV_AUX_SPARED:
-			verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
-			    &cb.cb_guid) == 0);
-			if (zpool_iter(g_zfs, find_spare, &cb) == 1) {
-				if (strcmp(zpool_get_name(cb.cb_zhp),
-				    zpool_get_name(zhp)) == 0)
-					(void) printf(gettext("currently in "
-					    "use"));
-				else
-					(void) printf(gettext("in use by "
-					    "pool '%s'"),
-					    zpool_get_name(cb.cb_zhp));
-				zpool_close(cb.cb_zhp);
-			} else {
-				(void) printf(gettext("currently in use"));
-			}
-			break;
-
-		case VDEV_AUX_ERR_EXCEEDED:
-			(void) printf(gettext("too many errors"));
-			break;
-
-		case VDEV_AUX_IO_FAILURE:
-			(void) printf(gettext("experienced I/O failures"));
-			break;
-
-		case VDEV_AUX_BAD_LOG:
-			(void) printf(gettext("bad intent log"));
-			break;
-
-		default:
-			(void) printf(gettext("corrupted data"));
-			break;
-		}
-	} else if (vs->vs_scrub_repaired != 0 && children == 0) {
-		/*
-		 * Report bytes resilvered/repaired on leaf devices.
-		 */
-		zfs_nicenum(vs->vs_scrub_repaired, repaired, sizeof (repaired));
-		(void) printf(gettext("  %s %s"), repaired,
-		    (vs->vs_scrub_type == POOL_SCRUB_RESILVER) ?
-		    "resilvered" : "repaired");
-	}
-
-	(void) printf("\n");
-
-	for (c = 0; c < children; c++) {
-		uint64_t is_log = B_FALSE;
-
-		/* Don't print logs here */
-		(void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
-		    &is_log);
-		if (is_log)
-			continue;
-		vname = zpool_vdev_name(g_zfs, zhp, child[c]);
-		print_status_config(zhp, vname, child[c],
-		    namewidth, depth + 2, isspare);
-		free(vname);
-	}
-}
-
 static void
 print_error_log(zpool_handle_t *zhp)
 {
@@ -2969,39 +3002,6 @@ print_l2cache(zpool_handle_t *zhp, nvlis
 }
 
 /*
- * Print log vdevs.
- * Logs are recorded as top level vdevs in the main pool child array but with
- * "is_log" set to 1. We use print_status_config() to print the top level logs
- * then any log children (eg mirrored slogs) are printed recursively - which
- * works because only the top level vdev is marked "is_log"
- */
-static void
-print_logs(zpool_handle_t *zhp, nvlist_t *nv, int namewidth)
-{
-	uint_t c, children;
-	nvlist_t **child;
-
-	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, &child,
-	    &children) != 0)
-		return;
-
-	(void) printf(gettext("\tlogs\n"));
-
-	for (c = 0; c < children; c++) {
-		uint64_t is_log = B_FALSE;
-		char *name;
-
-		(void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
-		    &is_log);
-		if (!is_log)
-			continue;
-		name = zpool_vdev_name(g_zfs, zhp, child[c]);
-		print_status_config(zhp, name, child[c], namewidth, 2, B_FALSE);
-		free(name);
-	}
-}
-
-/*
  * Display a summary of pool status.  Displays a summary such as:
  *
  *        pool: tank
@@ -3229,7 +3229,7 @@ status_callback(zpool_handle_t *zhp, voi
 		    namewidth, 0, B_FALSE);
 
 		if (num_logs(nvroot) > 0)
-			print_logs(zhp, nvroot, namewidth);
+			print_logs(zhp, nvroot, namewidth, B_TRUE);
 		if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
 		    &l2cache, &nl2cache) == 0)
 			print_l2cache(zhp, l2cache, nl2cache, namewidth);

Modified: head/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h
==============================================================================
--- head/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h	Mon Sep 27 06:04:17 2010	(r213196)
+++ head/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h	Mon Sep 27 09:05:51 2010	(r213197)
@@ -116,6 +116,7 @@ enum {
 	EZFS_VDEVNOTSUP,	/* unsupported vdev type */
 	EZFS_NOTSUP,		/* ops not supported on this dataset */
 	EZFS_ACTIVE_SPARE,	/* pool has active shared spare devices */
+	EZFS_UNPLAYED_LOGS,	/* log device has unplayed logs */
 	EZFS_UNKNOWN
 };
 

Modified: head/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c
==============================================================================
--- head/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c	Mon Sep 27 06:04:17 2010	(r213196)
+++ head/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c	Mon Sep 27 09:05:51 2010	(r213197)
@@ -1720,6 +1720,12 @@ zpool_vdev_fault(zpool_handle_t *zhp, ui
 		 */
 		return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
 
+	case EEXIST:
+		/*
+		 * The log device has unplayed logs
+		 */
+		return (zfs_error(hdl, EZFS_UNPLAYED_LOGS, msg));
+
 	default:
 		return (zpool_standard_error(hdl, errno, msg));
 	}

Modified: head/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_util.c
==============================================================================
--- head/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_util.c	Mon Sep 27 06:04:17 2010	(r213196)
+++ head/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_util.c	Mon Sep 27 09:05:51 2010	(r213197)
@@ -210,6 +210,9 @@ libzfs_error_description(libzfs_handle_t
 	case EZFS_ACTIVE_SPARE:
 		return (dgettext(TEXT_DOMAIN, "pool has active shared spare "
 		    "device"));
+	case EZFS_UNPLAYED_LOGS:
+		return (dgettext(TEXT_DOMAIN, "log device has unplayed intent "
+		    "logs"));
 	case EZFS_UNKNOWN:
 		return (dgettext(TEXT_DOMAIN, "unknown error"));
 	default:

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scrub.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scrub.c	Mon Sep 27 06:04:17 2010	(r213196)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scrub.c	Mon Sep 27 09:05:51 2010	(r213197)
@@ -351,7 +351,7 @@ traverse_zil_block(zilog_t *zilog, blkpt
 		return;
 
 	/*
-	 * One block ("stumpy") can be allocated a long time ago; we
+	 * One block ("stubby") can be allocated a long time ago; we
 	 * want to visit that one because it has been allocated
 	 * (on-disk) even if it hasn't been claimed (even though for
 	 * plain scrub there's nothing to do to it).

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c	Mon Sep 27 06:04:17 2010	(r213196)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c	Mon Sep 27 09:05:51 2010	(r213197)
@@ -1110,6 +1110,33 @@ spa_check_removed(vdev_t *vd)
 }
 
 /*
+ * Load the slog device state from the config object since it's possible
+ * that the label does not contain the most up-to-date information.
+ */
+void
+spa_load_log_state(spa_t *spa)
+{
+	nvlist_t *nv, *nvroot, **child;
+	uint64_t is_log;
+	uint_t children, c;
+	vdev_t *rvd = spa->spa_root_vdev;
+
+	VERIFY(load_nvlist(spa, spa->spa_config_object, &nv) == 0);
+	VERIFY(nvlist_lookup_nvlist(nv, ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
+	VERIFY(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
+	    &child, &children) == 0);
+
+	for (c = 0; c < children; c++) {
+		vdev_t *tvd = rvd->vdev_child[c];
+
+		if (nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
+		    &is_log) == 0 && is_log)
+			vdev_load_log_state(tvd, child[c]);
+	}
+	nvlist_free(nv);
+}
+
+/*
  * Check for missing log devices
  */
 int
@@ -1125,13 +1152,7 @@ spa_check_logs(spa_t *spa)
 			return (1);
 		}
 		break;
-
-	case SPA_LOG_CLEAR:
-		(void) dmu_objset_find(spa->spa_name, zil_clear_log_chain, NULL,
-		    DS_FIND_CHILDREN);
-		break;
 	}
-	spa->spa_log_state = SPA_LOG_GOOD;
 	return (0);
 }
 
@@ -1455,6 +1476,8 @@ spa_load(spa_t *spa, nvlist_t *config, s
 		spa_config_exit(spa, SCL_ALL, FTAG);
 	}
 
+	spa_load_log_state(spa);
+
 	if (spa_check_logs(spa)) {
 		vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
 		    VDEV_AUX_BAD_LOG);
@@ -1542,6 +1565,7 @@ spa_load(spa_t *spa, nvlist_t *config, s
 		    zil_claim, tx, DS_FIND_CHILDREN);
 		dmu_tx_commit(tx);
 
+		spa->spa_log_state = SPA_LOG_GOOD;
 		spa->spa_sync_on = B_TRUE;
 		txg_sync_start(spa->spa_dsl_pool);
 

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h	Mon Sep 27 06:04:17 2010	(r213196)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h	Mon Sep 27 09:05:51 2010	(r213197)
@@ -259,6 +259,7 @@ extern void vdev_remove_parent(vdev_t *c
 /*
  * vdev sync load and sync
  */
+extern void vdev_load_log_state(vdev_t *vd, nvlist_t *nv);
 extern void vdev_load(vdev_t *vd);
 extern void vdev_sync(vdev_t *vd, uint64_t txg);
 extern void vdev_sync_done(vdev_t *vd, uint64_t txg);

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil.h
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil.h	Mon Sep 27 06:04:17 2010	(r213196)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil.h	Mon Sep 27 09:05:51 2010	(r213197)
@@ -366,9 +366,9 @@ extern uint64_t zil_itx_assign(zilog_t *
 
 extern void	zil_commit(zilog_t *zilog, uint64_t seq, uint64_t oid);
 
+extern int	zil_vdev_offline(char *osname, void *txarg);
 extern int	zil_claim(char *osname, void *txarg);
 extern int	zil_check_log_chain(char *osname, void *txarg);
-extern int	zil_clear_log_chain(char *osname, void *txarg);
 extern void	zil_sync(zilog_t *zilog, dmu_tx_t *tx);
 extern void	zil_clean(zilog_t *zilog);
 extern int	zil_is_committed(zilog_t *zilog);

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c	Mon Sep 27 06:04:17 2010	(r213196)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c	Mon Sep 27 09:05:51 2010	(r213197)
@@ -39,6 +39,7 @@
 #include <sys/zap.h>
 #include <sys/fs/zfs.h>
 #include <sys/arc.h>
+#include <sys/zil.h>
 
 SYSCTL_DECL(_vfs_zfs);
 SYSCTL_NODE(_vfs_zfs, OID_AUTO, vdev, CTLFLAG_RW, 0, "ZFS VDEV");
@@ -765,6 +766,15 @@ vdev_metaslab_init(vdev_t *vd, uint64_t 
 	if (vd->vdev_ms_shift == 0)	/* not being allocated from yet */
 		return (0);
 
+	/*
+	 * Compute the raidz-deflation ratio.  Note, we hard-code
+	 * in 128k (1 << 17) because it is the current "typical" blocksize.
+	 * Even if SPA_MAXBLOCKSIZE changes, this algorithm must never change,
+	 * or we will inconsistently account for existing bp's.
+	 */
+	vd->vdev_deflate_ratio = (1 << 17) /
+	    (vdev_psize_to_asize(vd, 1 << 17) >> SPA_MINBLOCKSHIFT);
+
 	ASSERT(oldc <= newc);
 
 	if (vd->vdev_islog)
@@ -998,6 +1008,8 @@ vdev_open(vdev_t *vd)
 	    vd->vdev_state == VDEV_STATE_OFFLINE);
 
 	vd->vdev_stat.vs_aux = VDEV_AUX_NONE;
+	vd->vdev_cant_read = B_FALSE;
+	vd->vdev_cant_write = B_FALSE;
 
 	if (!vd->vdev_removed && vd->vdev_faulted) {
 		ASSERT(vd->vdev_children == 0);
@@ -1113,18 +1125,6 @@ vdev_open(vdev_t *vd)
 	}
 
 	/*
-	 * If this is a top-level vdev, compute the raidz-deflation
-	 * ratio.  Note, we hard-code in 128k (1<<17) because it is the
-	 * current "typical" blocksize.  Even if SPA_MAXBLOCKSIZE
-	 * changes, this algorithm must never change, or we will
-	 * inconsistently account for existing bp's.
-	 */
-	if (vd->vdev_top == vd) {
-		vd->vdev_deflate_ratio = (1<<17) /
-		    (vdev_psize_to_asize(vd, 1<<17) >> SPA_MINBLOCKSHIFT);
-	}
-
-	/*
 	 * If a leaf vdev has a DTL, and seems healthy, then kick off a
 	 * resilver.  But don't do this if we are doing a reopen for a scrub,
 	 * since this would just restart the scrub we are already doing.
@@ -1937,7 +1937,8 @@ vdev_online(spa_t *spa, uint64_t guid, u
 int
 vdev_offline(spa_t *spa, uint64_t guid, uint64_t flags)
 {
-	vdev_t *vd;
+	vdev_t *vd, *tvd;
+	int error;
 
 	spa_vdev_state_enter(spa);
 
@@ -1947,34 +1948,58 @@ vdev_offline(spa_t *spa, uint64_t guid, 
 	if (!vd->vdev_ops->vdev_op_leaf)
 		return (spa_vdev_state_exit(spa, NULL, ENOTSUP));
 
+	tvd = vd->vdev_top;
+
 	/*
 	 * If the device isn't already offline, try to offline it.
 	 */
 	if (!vd->vdev_offline) {
 		/*
 		 * If this device has the only valid copy of some data,
-		 * don't allow it to be offlined.
+		 * don't allow it to be offlined. Log devices are always
+		 * expendable.
 		 */
-		if (vd->vdev_aux == NULL && vdev_dtl_required(vd))
+		if (!tvd->vdev_islog && vd->vdev_aux == NULL &&
+		    vdev_dtl_required(vd))
 			return (spa_vdev_state_exit(spa, NULL, EBUSY));
 
 		/*
 		 * Offline this device and reopen its top-level vdev.
-		 * If this action results in the top-level vdev becoming
-		 * unusable, undo it and fail the request.
+		 * If the top-level vdev is a log device then just offline
+		 * it. Otherwise, if this action results in the top-level
+		 * vdev becoming unusable, undo it and fail the request.
 		 */
 		vd->vdev_offline = B_TRUE;
-		vdev_reopen(vd->vdev_top);
-		if (vd->vdev_aux == NULL && vdev_is_dead(vd->vdev_top)) {
+		vdev_reopen(tvd);
+
+		if (!tvd->vdev_islog && vd->vdev_aux == NULL &&
+		    vdev_is_dead(tvd)) {
 			vd->vdev_offline = B_FALSE;
-			vdev_reopen(vd->vdev_top);
+			vdev_reopen(tvd);
 			return (spa_vdev_state_exit(spa, NULL, EBUSY));
 		}
 	}
 
 	vd->vdev_tmpoffline = !!(flags & ZFS_OFFLINE_TEMPORARY);
 
-	return (spa_vdev_state_exit(spa, vd, 0));
+	if (!tvd->vdev_islog || !vdev_is_dead(tvd))
+		return (spa_vdev_state_exit(spa, vd, 0));
+
+	(void) spa_vdev_state_exit(spa, vd, 0);
+
+	error = dmu_objset_find(spa_name(spa), zil_vdev_offline,
+	    NULL, DS_FIND_CHILDREN);
+	if (error) {
+		(void) vdev_online(spa, guid, 0, NULL);
+		return (error);
+	}
+	/*
+	 * If we successfully offlined the log device then we need to
+	 * sync out the current txg so that the "stubby" block can be
+	 * removed by zil_sync().
+	 */
+	txg_wait_synced(spa->spa_dsl_pool, 0);
+	return (0);
 }
 
 /*
@@ -2279,6 +2304,7 @@ vdev_space_update(vdev_t *vd, int64_t sp
 	 * childrens', thus not accurate enough for us.
 	 */
 	ASSERT((dspace_delta & (SPA_MINBLOCKSIZE-1)) == 0);
+	ASSERT(vd->vdev_deflate_ratio != 0 || vd->vdev_isl2cache);
 	dspace_delta = (dspace_delta >> SPA_MINBLOCKSHIFT) *
 	    vd->vdev_deflate_ratio;
 
@@ -2631,11 +2657,7 @@ vdev_set_state(vdev_t *vd, boolean_t iso
 boolean_t
 vdev_is_bootable(vdev_t *vd)
 {
-#ifdef __FreeBSD_version
-	return (B_TRUE);
-#else
-	int c;
-
+#ifdef sun
 	if (!vd->vdev_ops->vdev_op_leaf) {
 		char *vdev_type = vd->vdev_ops->vdev_op_type;
 
@@ -2654,6 +2676,35 @@ vdev_is_bootable(vdev_t *vd)
 		if (!vdev_is_bootable(vd->vdev_child[c]))
 			return (B_FALSE);
 	}
+#endif	/* sun */
 	return (B_TRUE);
-#endif
+}
+
+void
+vdev_load_log_state(vdev_t *vd, nvlist_t *nv)
+{
+	uint_t c, children;
+	nvlist_t **child;
+	uint64_t val;
+	spa_t *spa = vd->vdev_spa;
+
+	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
+	    &child, &children) == 0) {
+		for (c = 0; c < children; c++)
+			vdev_load_log_state(vd->vdev_child[c], child[c]);
+	}
+
+	if (vd->vdev_ops->vdev_op_leaf && nvlist_lookup_uint64(nv,
+	    ZPOOL_CONFIG_OFFLINE, &val) == 0 && val) {
+
+		/*
+		 * It would be nice to call vdev_offline()
+		 * directly but the pool isn't fully loaded and
+		 * the txg threads have not been started yet.
+		 */
+		spa_config_enter(spa, SCL_STATE_ALL, FTAG, RW_WRITER);
+		vd->vdev_offline = val;
+		vdev_reopen(vd->vdev_top);
+		spa_config_exit(spa, SCL_STATE_ALL, FTAG);
+	}
 }

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c	Mon Sep 27 06:04:17 2010	(r213196)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c	Mon Sep 27 09:05:51 2010	(r213197)
@@ -25,6 +25,7 @@
 
 #include <sys/zfs_context.h>
 #include <sys/spa.h>
+#include <sys/spa_impl.h>
 #include <sys/dmu.h>
 #include <sys/zap.h>
 #include <sys/arc.h>
@@ -515,6 +516,13 @@ zil_claim(char *osname, void *txarg)
 	zilog = dmu_objset_zil(os);
 	zh = zil_header_in_syncing_context(zilog);
 
+	if (zilog->zl_spa->spa_log_state == SPA_LOG_CLEAR) {
+		if (!BP_IS_HOLE(&zh->zh_log))
+			zio_free_blk(zilog->zl_spa, &zh->zh_log, first_txg);
+		BP_ZERO(&zh->zh_log);
+		dsl_dataset_dirty(dmu_objset_ds(os), tx);
+	}
+
 	/*
 	 * Record here whether the zil has any records to replay.
 	 * If the header block pointer is null or the block points
@@ -527,8 +535,10 @@ zil_claim(char *osname, void *txarg)
 	 * Note, the intent log can be empty but still need the
 	 * stubby to be claimed.
 	 */
-	if (!zil_empty(zilog))
+	if (!zil_empty(zilog)) {
 		zh->zh_flags |= ZIL_REPLAY_NEEDED;
+		dsl_dataset_dirty(dmu_objset_ds(os), tx);
+	}
 
 	/*
 	 * Claim all log blocks if we haven't already done so, and remember
@@ -597,36 +607,6 @@ zil_check_log_chain(char *osname, void *
 	return (error);
 }
 
-/*
- * Clear a log chain
- */
-/* ARGSUSED */
-int
-zil_clear_log_chain(char *osname, void *txarg)
-{
-	zilog_t *zilog;
-	zil_header_t *zh;
-	objset_t *os;
-	dmu_tx_t *tx;
-	int error;
-
-	error = dmu_objset_open(osname, DMU_OST_ANY, DS_MODE_USER, &os);
-	if (error) {
-		cmn_err(CE_WARN, "can't open objset for %s", osname);
-		return (0);
-	}
-
-	zilog = dmu_objset_zil(os);
-	tx = dmu_tx_create(zilog->zl_os);
-	(void) dmu_tx_assign(tx, TXG_WAIT);
-	zh = zil_header_in_syncing_context(zilog);
-	BP_ZERO(&zh->zh_log);
-	dsl_dataset_dirty(dmu_objset_ds(os), tx);
-	dmu_tx_commit(tx);
-	dmu_objset_close(os);
-	return (0);
-}
-
 static int
 zil_vdev_compare(const void *x1, const void *x2)
 {
@@ -771,9 +751,9 @@ zil_lwb_write_init(zilog_t *zilog, lwb_t
 	}
 	if (lwb->lwb_zio == NULL) {
 		lwb->lwb_zio = zio_rewrite(zilog->zl_root_zio, zilog->zl_spa,
-		    0, &lwb->lwb_blk, lwb->lwb_buf,
-		    lwb->lwb_sz, zil_lwb_write_done, lwb,
-		    ZIO_PRIORITY_LOG_WRITE, ZIO_FLAG_CANFAIL, &zb);
+		    0, &lwb->lwb_blk, lwb->lwb_buf, lwb->lwb_sz,
+		    zil_lwb_write_done, lwb, ZIO_PRIORITY_LOG_WRITE,
+		    ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE, &zb);
 	}
 }
 
@@ -1270,12 +1250,7 @@ zil_sync(zilog_t *zilog, dmu_tx_t *tx)
 		}
 	}
 
-	for (;;) {
-		lwb = list_head(&zilog->zl_lwb_list);
-		if (lwb == NULL) {
-			mutex_exit(&zilog->zl_lock);
-			return;
-		}
+	while ((lwb = list_head(&zilog->zl_lwb_list)) != NULL) {
 		zh->zh_log = lwb->lwb_blk;
 		if (lwb->lwb_buf != NULL || lwb->lwb_max_txg > txg)
 			break;
@@ -1692,3 +1667,24 @@ out:
 	mutex_exit(&zilog->zl_lock);
 	return (ret);
 }
+
+/* ARGSUSED */
+int
+zil_vdev_offline(char *osname, void *arg)
+{
+	objset_t *os;
+	zilog_t *zilog;
+	int error;
+
+	error = dmu_objset_open(osname, DMU_OST_ANY, DS_MODE_USER, &os);
+	if (error)
+		return (error);
+
+	zilog = dmu_objset_zil(os);
+	if (zil_suspend(zilog) != 0)
+		error = EEXIST;
+	else
+		zil_resume(zilog);
+	dmu_objset_close(os);
+	return (error);
+}


More information about the svn-src-head mailing list