svn commit: r213198 - in head: cddl/contrib/opensolaris/cmd/zinject sys/cddl/contrib/opensolaris/uts/common/fs/zfs sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys

Martin Matuska mm at FreeBSD.org
Mon Sep 27 09:42:32 UTC 2010


Author: mm
Date: Mon Sep 27 09:42:31 2010
New Revision: 213198
URL: http://svn.freebsd.org/changeset/base/213198

Log:
  Properly handle IO with B_FAILFAST
  Retry IO once with ZIO_FLAG_TRYHARD before declaring a pool faulted
  
  OpenSolaris revision and Bug IDs:
  
  9725:0bf7402e8022
  6843014 ZFS B_FAILFAST handling is broken
  
  Approved by:	delphij (mentor)
  Obtained from:	OpenSolaris (Bug ID 6843014)
  MFC after:	3 weeks

Modified:
  head/cddl/contrib/opensolaris/cmd/zinject/zinject.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ioctl.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_disk.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_fm.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_inject.c

Modified: head/cddl/contrib/opensolaris/cmd/zinject/zinject.c
==============================================================================
--- head/cddl/contrib/opensolaris/cmd/zinject/zinject.c	Mon Sep 27 09:05:51 2010	(r213197)
+++ head/cddl/contrib/opensolaris/cmd/zinject/zinject.c	Mon Sep 27 09:42:31 2010	(r213198)
@@ -19,12 +19,10 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 /*
  * ZFS Fault Injector
  *
@@ -227,7 +225,7 @@ usage(void)
 	    "\t\tClear the particular record (if given a numeric ID), or\n"
 	    "\t\tall records if 'all' is specificed.\n"
 	    "\n"
-	    "\tzinject -d device [-e errno] [-L <nvlist|uber>] pool\n"
+	    "\tzinject -d device [-e errno] [-L <nvlist|uber>] [-F] pool\n"
 	    "\t\tInject a fault into a particular device or the device's\n"
 	    "\t\tlabel.  Label injection can either be 'nvlist' or 'uber'.\n"
 	    "\t\t'errno' can either be 'nxio' (the default) or 'io'.\n"
@@ -519,7 +517,7 @@ main(int argc, char **argv)
 		return (0);
 	}
 
-	while ((c = getopt(argc, argv, ":ab:d:f:qhc:t:l:mr:e:uL:")) != -1) {
+	while ((c = getopt(argc, argv, ":ab:d:f:Fqhc:t:l:mr:e:uL:")) != -1) {
 		switch (c) {
 		case 'a':
 			flags |= ZINJECT_FLUSH_ARC;
@@ -556,6 +554,9 @@ main(int argc, char **argv)
 				return (1);
 			}
 			break;
+		case 'F':
+			record.zi_failfast = B_TRUE;
+			break;
 		case 'h':
 			usage();
 			return (0);

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c	Mon Sep 27 09:05:51 2010	(r213197)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c	Mon Sep 27 09:42:31 2010	(r213198)
@@ -4252,10 +4252,16 @@ spa_sync(spa_t *spa, uint64_t txg)
 				if (svdcount == SPA_DVAS_PER_BP)
 					break;
 			}
-			error = vdev_config_sync(svd, svdcount, txg);
+			error = vdev_config_sync(svd, svdcount, txg, B_FALSE);
+			if (error != 0)
+				error = vdev_config_sync(svd, svdcount, txg,
+				    B_TRUE);
 		} else {
 			error = vdev_config_sync(rvd->vdev_child,
-			    rvd->vdev_children, txg);
+			    rvd->vdev_children, txg, B_FALSE);
+			if (error != 0)
+				error = vdev_config_sync(rvd->vdev_child,
+				    rvd->vdev_children, txg, B_TRUE);
 		}
 
 		spa_config_exit(spa, SCL_STATE, FTAG);

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h	Mon Sep 27 09:05:51 2010	(r213197)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h	Mon Sep 27 09:42:31 2010	(r213198)
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -113,7 +113,8 @@ extern void vdev_queue_io_done(zio_t *zi
 
 extern void vdev_config_dirty(vdev_t *vd);
 extern void vdev_config_clean(vdev_t *vd);
-extern int vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg);
+extern int vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg,
+    boolean_t);
 
 extern void vdev_state_dirty(vdev_t *vd);
 extern void vdev_state_clean(vdev_t *vd);

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ioctl.h
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ioctl.h	Mon Sep 27 09:05:51 2010	(r213197)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ioctl.h	Mon Sep 27 09:42:31 2010	(r213198)
@@ -118,7 +118,7 @@ typedef struct zinject_record {
 	uint32_t	zi_error;
 	uint64_t	zi_type;
 	uint32_t	zi_freq;
-	uint32_t	zi_pad;	/* pad out to 64 bit alignment */
+	uint32_t	zi_failfast;
 } zinject_record_t;
 
 #define	ZINJECT_NULL		0x1

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h	Mon Sep 27 09:05:51 2010	(r213197)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h	Mon Sep 27 09:42:31 2010	(r213198)
@@ -117,31 +117,33 @@ enum zio_compress {
 #define	ZIO_PRIORITY_SCRUB		(zio_priority_table[10])
 #define	ZIO_PRIORITY_TABLE_SIZE		11
 
-#define	ZIO_FLAG_MUSTSUCCEED		0x00000
-#define	ZIO_FLAG_CANFAIL		0x00001
-#define	ZIO_FLAG_SPECULATIVE		0x00002
-#define	ZIO_FLAG_CONFIG_WRITER		0x00004
-#define	ZIO_FLAG_DONT_RETRY		0x00008
-
-#define	ZIO_FLAG_DONT_CACHE		0x00010
-#define	ZIO_FLAG_DONT_QUEUE		0x00020
-#define	ZIO_FLAG_DONT_AGGREGATE		0x00040
-#define	ZIO_FLAG_DONT_PROPAGATE		0x00080
-
-#define	ZIO_FLAG_IO_BYPASS		0x00100
-#define	ZIO_FLAG_IO_REPAIR		0x00200
-#define	ZIO_FLAG_IO_RETRY		0x00400
-#define	ZIO_FLAG_IO_REWRITE		0x00800
-
-#define	ZIO_FLAG_SELF_HEAL		0x01000
-#define	ZIO_FLAG_RESILVER		0x02000
-#define	ZIO_FLAG_SCRUB			0x04000
-#define	ZIO_FLAG_SCRUB_THREAD		0x08000
-
-#define	ZIO_FLAG_PROBE			0x10000
-#define	ZIO_FLAG_GANG_CHILD		0x20000
-#define	ZIO_FLAG_RAW			0x40000
-#define	ZIO_FLAG_GODFATHER		0x80000
+#define	ZIO_FLAG_MUSTSUCCEED		0x000000
+#define	ZIO_FLAG_CANFAIL		0x000001
+#define	ZIO_FLAG_SPECULATIVE		0x000002
+#define	ZIO_FLAG_CONFIG_WRITER		0x000004
+#define	ZIO_FLAG_DONT_RETRY		0x000008
+
+#define	ZIO_FLAG_DONT_CACHE		0x000010
+#define	ZIO_FLAG_DONT_QUEUE		0x000020
+#define	ZIO_FLAG_DONT_AGGREGATE		0x000040
+#define	ZIO_FLAG_DONT_PROPAGATE		0x000080
+
+#define	ZIO_FLAG_IO_BYPASS		0x000100
+#define	ZIO_FLAG_IO_REPAIR		0x000200
+#define	ZIO_FLAG_IO_RETRY		0x000400
+#define	ZIO_FLAG_IO_REWRITE		0x000800
+
+#define	ZIO_FLAG_SELF_HEAL		0x001000
+#define	ZIO_FLAG_RESILVER		0x002000
+#define	ZIO_FLAG_SCRUB			0x004000
+#define	ZIO_FLAG_SCRUB_THREAD		0x008000
+
+#define	ZIO_FLAG_PROBE			0x010000
+#define	ZIO_FLAG_GANG_CHILD		0x020000
+#define	ZIO_FLAG_RAW			0x040000
+#define	ZIO_FLAG_GODFATHER		0x080000
+
+#define	ZIO_FLAG_TRYHARD		0x100000
 
 #define	ZIO_FLAG_GANG_INHERIT		\
 	(ZIO_FLAG_CANFAIL |		\
@@ -159,7 +161,8 @@ enum zio_compress {
 	(ZIO_FLAG_GANG_INHERIT |	\
 	ZIO_FLAG_IO_REPAIR |		\
 	ZIO_FLAG_IO_RETRY |		\
-	ZIO_FLAG_PROBE)
+	ZIO_FLAG_PROBE |		\
+	ZIO_FLAG_TRYHARD)
 
 #define	ZIO_FLAG_AGG_INHERIT		\
 	(ZIO_FLAG_DONT_AGGREGATE |	\
@@ -440,7 +443,7 @@ extern int zio_inject_list_next(int *id,
     struct zinject_record *record);
 extern int zio_clear_fault(int id);
 extern int zio_handle_fault_injection(zio_t *zio, int error);
-extern int zio_handle_device_injection(vdev_t *vd, int error);
+extern int zio_handle_device_injection(vdev_t *vd, zio_t *zio, int error);
 extern int zio_handle_label_injection(zio_t *zio, int error);
 
 #ifdef	__cplusplus

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c	Mon Sep 27 09:05:51 2010	(r213197)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c	Mon Sep 27 09:42:31 2010	(r213198)
@@ -928,7 +928,7 @@ vdev_probe(vdev_t *vd, zio_t *zio)
 
 		vps->vps_flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_PROBE |
 		    ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_AGGREGATE |
-		    ZIO_FLAG_DONT_RETRY;
+		    ZIO_FLAG_TRYHARD;
 
 		if (spa_config_held(spa, SCL_ZIO, RW_WRITER)) {
 			/*
@@ -1025,7 +1025,7 @@ vdev_open(vdev_t *vd)
 	error = vd->vdev_ops->vdev_op_open(vd, &osize, &ashift);
 
 	if (zio_injection_enabled && error == 0)
-		error = zio_handle_device_injection(vd, ENXIO);
+		error = zio_handle_device_injection(vd, NULL, ENXIO);
 
 	if (error) {
 		if (vd->vdev_removed &&
@@ -2207,6 +2207,16 @@ vdev_stat_update(zio_t *zio, uint64_t ps
 	if (flags & ZIO_FLAG_SPECULATIVE)
 		return;
 
+	/*
+	 * If this is an I/O error that is going to be retried, then ignore the
+	 * error.  Otherwise, the user may interpret B_FAILFAST I/O errors as
+	 * hard errors, when in reality they can happen for any number of
+	 * innocuous reasons (bus resets, MPxIO link failure, etc).
+	 */
+	if (zio->io_error == EIO &&
+	    !(zio->io_flags & ZIO_FLAG_IO_RETRY))
+		return;
+
 	mutex_enter(&vd->vdev_stat_lock);
 	if (type == ZIO_TYPE_READ && !vdev_is_dead(vd)) {
 		if (zio->io_error == ECKSUM)

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_disk.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_disk.c	Mon Sep 27 09:05:51 2010	(r213197)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_disk.c	Mon Sep 27 09:42:31 2010	(r213198)
@@ -401,8 +401,9 @@ vdev_disk_io_start(zio_t *zio)
 
 	bioinit(bp);
 	bp->b_flags = B_BUSY | B_NOCACHE |
-	    (zio->io_type == ZIO_TYPE_READ ? B_READ : B_WRITE) |
-	    ((zio->io_flags & ZIO_FLAG_IO_RETRY) ? 0 : B_FAILFAST);
+	    (zio->io_type == ZIO_TYPE_READ ? B_READ : B_WRITE);
+	if (!(zio->io_flags & (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)))
+		bp->b_flags |= B_FAILFAST;
 	bp->b_bcount = zio->io_size;
 	bp->b_un.b_addr = zio->io_data;
 	bp->b_lblkno = lbtodb(zio->io_offset);

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c	Mon Sep 27 09:05:51 2010	(r213197)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c	Mon Sep 27 09:42:31 2010	(r213198)
@@ -339,8 +339,8 @@ vdev_label_read_config(vdev_t *vd)
 	nvlist_t *config = NULL;
 	vdev_phys_t *vp;
 	zio_t *zio;
-	int flags =
-	    ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE;
+	int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL |
+	    ZIO_FLAG_SPECULATIVE;
 
 	ASSERT(spa_config_held(spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL);
 
@@ -349,6 +349,7 @@ vdev_label_read_config(vdev_t *vd)
 
 	vp = zio_buf_alloc(sizeof (vdev_phys_t));
 
+retry:
 	for (int l = 0; l < VDEV_LABELS; l++) {
 
 		zio = zio_root(spa, NULL, NULL, flags);
@@ -368,6 +369,11 @@ vdev_label_read_config(vdev_t *vd)
 		}
 	}
 
+	if (config == NULL && !(flags & ZIO_FLAG_TRYHARD)) {
+		flags |= ZIO_FLAG_TRYHARD;
+		goto retry;
+	}
+
 	zio_buf_free(vp, sizeof (vdev_phys_t));
 
 	return (config);
@@ -648,6 +654,7 @@ vdev_label_init(vdev_t *vd, uint64_t crt
 	/*
 	 * Write everything in parallel.
 	 */
+retry:
 	zio = zio_root(spa, NULL, NULL, flags);
 
 	for (int l = 0; l < VDEV_LABELS; l++) {
@@ -674,6 +681,11 @@ vdev_label_init(vdev_t *vd, uint64_t crt
 
 	error = zio_wait(zio);
 
+	if (error != 0 && !(flags & ZIO_FLAG_TRYHARD)) {
+		flags |= ZIO_FLAG_TRYHARD;
+		goto retry;
+	}
+
 	nvlist_free(label);
 	zio_buf_free(pad2, VDEV_PAD_SIZE);
 	zio_buf_free(ub, VDEV_UBERBLOCK_SIZE(vd));
@@ -760,8 +772,8 @@ vdev_uberblock_load(zio_t *zio, vdev_t *
 {
 	spa_t *spa = vd->vdev_spa;
 	vdev_t *rvd = spa->spa_root_vdev;
-	int flags =
-	    ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE;
+	int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL |
+	    ZIO_FLAG_SPECULATIVE | ZIO_FLAG_TRYHARD;
 
 	if (vd == rvd) {
 		ASSERT(zio == NULL);
@@ -999,7 +1011,7 @@ vdev_label_sync_list(spa_t *spa, int l, 
  * at any time, you can just call it again, and it will resume its work.
  */
 int
-vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg)
+vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg, boolean_t tryhard)
 {
 	spa_t *spa = svd[0]->vdev_spa;
 	uberblock_t *ub = &spa->spa_uberblock;
@@ -1008,6 +1020,16 @@ vdev_config_sync(vdev_t **svd, int svdco
 	int error;
 	int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL;
 
+	/*
+	 * Normally, we don't want to try too hard to write every label and
+	 * uberblock.  If there is a flaky disk, we don't want the rest of the
+	 * sync process to block while we retry.  But if we can't write a
+	 * single label out, we should retry with ZIO_FLAG_TRYHARD before
+	 * bailing out and declaring the pool faulted.
+	 */
+	if (tryhard)
+		flags |= ZIO_FLAG_TRYHARD;
+
 	ASSERT(ub->ub_txg <= txg);
 
 	/*

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_fm.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_fm.c	Mon Sep 27 09:05:51 2010	(r213197)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_fm.c	Mon Sep 27 09:42:31 2010	(r213198)
@@ -134,6 +134,15 @@ zfs_ereport_post(const char *subclass, s
 		if (zio->io_flags & ZIO_FLAG_SPECULATIVE)
 			return;
 
+		/*
+		 * If this I/O is not a retry I/O, don't post an ereport.
+		 * Otherwise, we risk making bad diagnoses based on B_FAILFAST
+		 * I/Os.
+		 */
+		if (zio->io_error == EIO &&
+		    !(zio->io_flags & ZIO_FLAG_IO_RETRY))
+			return;
+
 		if (vd != NULL) {
 			/*
 			 * If the vdev has already been marked as failing due

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c	Mon Sep 27 09:05:51 2010	(r213197)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c	Mon Sep 27 09:42:31 2010	(r213198)
@@ -1870,7 +1870,8 @@ zio_vdev_io_done(zio_t *zio)
 			vdev_cache_write(zio);
 
 		if (zio_injection_enabled && zio->io_error == 0)
-			zio->io_error = zio_handle_device_injection(vd, EIO);
+			zio->io_error = zio_handle_device_injection(vd,
+			    zio, EIO);
 
 		if (zio_injection_enabled && zio->io_error == 0)
 			zio->io_error = zio_handle_label_injection(zio, EIO);

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_inject.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_inject.c	Mon Sep 27 09:05:51 2010	(r213197)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_inject.c	Mon Sep 27 09:42:31 2010	(r213198)
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -195,7 +195,7 @@ zio_handle_label_injection(zio_t *zio, i
 
 
 int
-zio_handle_device_injection(vdev_t *vd, int error)
+zio_handle_device_injection(vdev_t *vd, zio_t *zio, int error)
 {
 	inject_handler_t *handler;
 	int ret = 0;
@@ -210,6 +210,12 @@ zio_handle_device_injection(vdev_t *vd, 
 			continue;
 
 		if (vd->vdev_guid == handler->zi_record.zi_guid) {
+			if (handler->zi_record.zi_failfast &&
+			    (zio == NULL || (zio->io_flags &
+			    (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)))) {
+				continue;
+			}
+
 			if (handler->zi_record.zi_error == error) {
 				/*
 				 * For a failed open, pretend like the device


More information about the svn-src-all mailing list