svn commit: r265152 - in head/sys/cddl: compat/opensolaris/sys contrib/opensolaris/uts/common/fs/zfs contrib/opensolaris/uts/common/fs/zfs/sys

Steven Hartland smh at FreeBSD.org
Wed Apr 30 17:46:31 UTC 2014


Author: smh
Date: Wed Apr 30 17:46:29 2014
New Revision: 265152
URL: http://svnweb.freebsd.org/changeset/base/265152

Log:
  Reintroduce priority for the TRIM ZIOs instead of using the "NOW" priority
  
  The changes how TRIM requests are generated to use ZIO_TYPE_FREE + a priority
  instead of ZIO_TYPE_IOCTL, until processed by vdev_geom; only then is it
  translated the required geom values. This reduces the amount of changes
  required for FREE requests to be supported by the new IO scheduler. This
  also eliminates the need for a specific DKIOCTRIM.
  
  Also fixed FREE vdev child IO's from running ZIO_STAGE_VDEV_IO_DONE as part
  of their schedule.
  
  As the new IO scheduler can result in a request to execute one type of IO to
  actually run a different type of IO it requires that zio_trim requests are
  processed without holding the trim map lock (tm->tm_lock), as the free request
  execute call may result in write request running hence triggering a
  trim_map_write_start call, which takes the trim map lock and hence would result
  in recused on no-recursive sx lock.
  
  This is based off avg's original work, so credit to him.
  
  MFC after:	1 month

Modified:
  head/sys/cddl/compat/opensolaris/sys/dkio.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio_impl.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/trim_map.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c

Modified: head/sys/cddl/compat/opensolaris/sys/dkio.h
==============================================================================
--- head/sys/cddl/compat/opensolaris/sys/dkio.h	Wed Apr 30 17:43:49 2014	(r265151)
+++ head/sys/cddl/compat/opensolaris/sys/dkio.h	Wed Apr 30 17:46:29 2014	(r265152)
@@ -75,8 +75,6 @@ extern "C" {
  */
 #define	DKIOCFLUSHWRITECACHE	(DKIOC|34)	/* flush cache to phys medium */
 
-#define	DKIOCTRIM		(DKIOC|35)	/* TRIM a block */
-
 struct dk_callback {
 	void (*dkc_callback)(void *dkc_cookie, int error);
 	void *dkc_cookie;

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h	Wed Apr 30 17:43:49 2014	(r265151)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h	Wed Apr 30 17:46:29 2014	(r265152)
@@ -135,9 +135,10 @@ typedef enum zio_priority {
 	ZIO_PRIORITY_ASYNC_READ,	/* prefetch */
 	ZIO_PRIORITY_ASYNC_WRITE,	/* spa_sync() */
 	ZIO_PRIORITY_SCRUB,		/* asynchronous scrub/resilver reads */
+	ZIO_PRIORITY_TRIM,		/* free requests used for TRIM */
 	ZIO_PRIORITY_NUM_QUEUEABLE,
 
-	ZIO_PRIORITY_NOW		/* non-queued i/os (e.g. free) */
+	ZIO_PRIORITY_NOW		/* non-queued I/Os (e.g. ioctl) */
 } zio_priority_t;
 
 #define	ZIO_PIPELINE_CONTINUE		0x100
@@ -508,7 +509,7 @@ extern zio_t *zio_claim(zio_t *pio, spa_
 
 extern zio_t *zio_ioctl(zio_t *pio, spa_t *spa, vdev_t *vd, int cmd,
     uint64_t offset, uint64_t size, zio_done_func_t *done, void *priv,
-    enum zio_flag flags);
+    zio_priority_t priority, enum zio_flag flags);
 
 extern zio_t *zio_read_phys(zio_t *pio, vdev_t *vd, uint64_t offset,
     uint64_t size, void *data, int checksum,

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio_impl.h
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio_impl.h	Wed Apr 30 17:43:49 2014	(r265151)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio_impl.h	Wed Apr 30 17:46:29 2014	(r265152)
@@ -215,6 +215,10 @@ enum zio_stage {
 	ZIO_STAGE_FREE_BP_INIT |		\
 	ZIO_STAGE_DVA_FREE)
 
+#define	ZIO_FREE_PHYS_PIPELINE			\
+	(ZIO_INTERLOCK_STAGES |			\
+	ZIO_VDEV_IO_STAGES)
+
 #define	ZIO_DDT_FREE_PIPELINE			\
 	(ZIO_INTERLOCK_STAGES |			\
 	ZIO_STAGE_FREE_BP_INIT |		\

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/trim_map.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/trim_map.c	Wed Apr 30 17:43:49 2014	(r265151)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/trim_map.c	Wed Apr 30 17:46:29 2014	(r265152)
@@ -449,7 +449,7 @@ trim_map_vdev_commit(spa_t *spa, zio_t *
 {
 	trim_map_t *tm = vd->vdev_trimmap;
 	trim_seg_t *ts;
-	uint64_t size, txgtarget, txgsafe;
+	uint64_t size, offset, txgtarget, txgsafe;
 	hrtime_t timelimit;
 
 	ASSERT(vd->vdev_ops->vdev_op_leaf);
@@ -477,9 +477,20 @@ trim_map_vdev_commit(spa_t *spa, zio_t *
 		avl_remove(&tm->tm_queued_frees, ts);
 		avl_add(&tm->tm_inflight_frees, ts);
 		size = ts->ts_end - ts->ts_start;
-		zio_nowait(zio_trim(zio, spa, vd, ts->ts_start, size));
+		offset = ts->ts_start;
 		TRIM_MAP_SDEC(tm, size);
 		TRIM_MAP_QDEC(tm);
+		/*
+		 * We drop the lock while we call zio_nowait as the IO
+		 * scheduler can result in a different IO being run e.g.
+		 * a write which would result in a recursive lock.
+		 */
+		mutex_exit(&tm->tm_lock);
+
+		zio_nowait(zio_trim(zio, spa, vd, offset, size));
+
+		mutex_enter(&tm->tm_lock);
+		ts = trim_map_first(tm, txgtarget, txgsafe, timelimit);
 	}
 	mutex_exit(&tm->tm_lock);
 }

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c	Wed Apr 30 17:43:49 2014	(r265151)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c	Wed Apr 30 17:46:29 2014	(r265152)
@@ -800,10 +800,11 @@ vdev_geom_io_start(zio_t *zio)
 
 	vd = zio->io_vd;
 
-	if (zio->io_type == ZIO_TYPE_IOCTL) {
+	switch (zio->io_type) {
+	case ZIO_TYPE_IOCTL:
 		/* XXPOLICY */
 		if (!vdev_readable(vd)) {
-			zio->io_error = ENXIO;
+			zio->io_error = SET_ERROR(ENXIO);
 			return (ZIO_PIPELINE_CONTINUE);
 		}
 
@@ -812,28 +813,28 @@ vdev_geom_io_start(zio_t *zio)
 			if (zfs_nocacheflush || vdev_geom_bio_flush_disable)
 				break;
 			if (vd->vdev_nowritecache) {
-				zio->io_error = ENOTSUP;
-				break;
-			}
-			goto sendreq;
-		case DKIOCTRIM:
-			if (vdev_geom_bio_delete_disable)
-				break;
-			if (vd->vdev_notrim) {
-				zio->io_error = ENOTSUP;
+				zio->io_error = SET_ERROR(ENOTSUP);
 				break;
 			}
 			goto sendreq;
 		default:
-			zio->io_error = ENOTSUP;
+			zio->io_error = SET_ERROR(ENOTSUP);
 		}
 
 		return (ZIO_PIPELINE_CONTINUE);
+	case ZIO_TYPE_FREE:
+		if (vdev_geom_bio_delete_disable)
+			return (ZIO_PIPELINE_CONTINUE);
+
+		if (vd->vdev_notrim) {
+			zio->io_error = SET_ERROR(ENOTSUP);
+			return (ZIO_PIPELINE_CONTINUE);
+		}
 	}
 sendreq:
 	cp = vd->vdev_tsd;
 	if (cp == NULL) {
-		zio->io_error = ENXIO;
+		zio->io_error = SET_ERROR(ENXIO);
 		return (ZIO_PIPELINE_CONTINUE);
 	}
 	bp = g_alloc_bio();
@@ -846,21 +847,20 @@ sendreq:
 		bp->bio_offset = zio->io_offset;
 		bp->bio_length = zio->io_size;
 		break;
+	case ZIO_TYPE_FREE:
+		bp->bio_cmd = BIO_DELETE;
+		bp->bio_data = NULL;
+		bp->bio_offset = zio->io_offset;
+		bp->bio_length = zio->io_size;
+		break;
 	case ZIO_TYPE_IOCTL:
-		switch (zio->io_cmd) {
-		case DKIOCFLUSHWRITECACHE:
+		if (zio->io_cmd == DKIOCFLUSHWRITECACHE) {
 			bp->bio_cmd = BIO_FLUSH;
 			bp->bio_flags |= BIO_ORDERED;
 			bp->bio_data = NULL;
 			bp->bio_offset = cp->provider->mediasize;
 			bp->bio_length = 0;
 			break;
-		case DKIOCTRIM:
-			bp->bio_cmd = BIO_DELETE;
-			bp->bio_data = NULL;
-			bp->bio_offset = zio->io_offset;
-			bp->bio_length = zio->io_size;
-			break;
 		}
 		break;
 	}

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c	Wed Apr 30 17:43:49 2014	(r265151)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c	Wed Apr 30 17:46:29 2014	(r265152)
@@ -40,9 +40,9 @@
  *
  * ZFS issues I/O operations to leaf vdevs to satisfy and complete zios.  The
  * I/O scheduler determines when and in what order those operations are
- * issued.  The I/O scheduler divides operations into five I/O classes
+ * issued.  The I/O scheduler divides operations into six I/O classes
  * prioritized in the following order: sync read, sync write, async read,
- * async write, and scrub/resilver.  Each queue defines the minimum and
+ * async write, scrub/resilver and trim.  Each queue defines the minimum and
  * maximum number of concurrent operations that may be issued to the device.
  * In addition, the device has an aggregate maximum. Note that the sum of the
  * per-queue minimums must not exceed the aggregate maximum, and if the
@@ -61,7 +61,7 @@
  * done in the order specified above. No further operations are issued if the
  * aggregate maximum number of concurrent operations has been hit or if there
  * are no operations queued for an I/O class that has not hit its maximum.
- * Every time an i/o is queued or an operation completes, the I/O scheduler
+ * Every time an I/O is queued or an operation completes, the I/O scheduler
  * looks for new operations to issue.
  *
  * All I/O classes have a fixed maximum number of outstanding operations
@@ -70,7 +70,7 @@
  * transaction groups (see txg.c). Transaction groups enter the syncing state
  * periodically so the number of queued async writes will quickly burst up and
  * then bleed down to zero. Rather than servicing them as quickly as possible,
- * the I/O scheduler changes the maximum number of active async write i/os
+ * the I/O scheduler changes the maximum number of active async write I/Os
  * according to the amount of dirty data in the pool (see dsl_pool.c). Since
  * both throughput and latency typically increase with the number of
  * concurrent operations issued to physical devices, reducing the burstiness
@@ -113,14 +113,14 @@
  */
 
 /*
- * The maximum number of i/os active to each device.  Ideally, this will be >=
+ * The maximum number of I/Os active to each device.  Ideally, this will be >=
  * the sum of each queue's max_active.  It must be at least the sum of each
  * queue's min_active.
  */
 uint32_t zfs_vdev_max_active = 1000;
 
 /*
- * Per-queue limits on the number of i/os active to each device.  If the
+ * Per-queue limits on the number of I/Os active to each device.  If the
  * sum of the queue's max_active is < zfs_vdev_max_active, then the
  * min_active comes into play.  We will send min_active from each queue,
  * and then select from queues in the order defined by zio_priority_t.
@@ -145,6 +145,14 @@ uint32_t zfs_vdev_async_write_min_active
 uint32_t zfs_vdev_async_write_max_active = 10;
 uint32_t zfs_vdev_scrub_min_active = 1;
 uint32_t zfs_vdev_scrub_max_active = 2;
+uint32_t zfs_vdev_trim_min_active = 1;
+/*
+ * TRIM max active is large in comparison to the other values due to the fact
+ * that TRIM IOs are coalesced at the device layer. This value is set such
+ * that a typical SSD can process the queued IOs in a single request.
+ */
+uint32_t zfs_vdev_trim_max_active = 64;
+
 
 /*
  * When the pool has less than zfs_vdev_async_write_active_min_dirty_percent
@@ -171,7 +179,7 @@ SYSCTL_DECL(_vfs_zfs_vdev);
 TUNABLE_INT("vfs.zfs.vdev.max_active", &zfs_vdev_max_active);
 SYSCTL_UINT(_vfs_zfs_vdev, OID_AUTO, max_active, CTLFLAG_RW,
     &zfs_vdev_max_active, 0,
-    "The maximum number of i/os of all types active for each device.");
+    "The maximum number of I/Os of all types active for each device.");
 
 #define ZFS_VDEV_QUEUE_KNOB_MIN(name)					\
 TUNABLE_INT("vfs.zfs.vdev." #name "_min_active",			\
@@ -199,6 +207,8 @@ ZFS_VDEV_QUEUE_KNOB_MIN(async_write);
 ZFS_VDEV_QUEUE_KNOB_MAX(async_write);
 ZFS_VDEV_QUEUE_KNOB_MIN(scrub);
 ZFS_VDEV_QUEUE_KNOB_MAX(scrub);
+ZFS_VDEV_QUEUE_KNOB_MIN(trim);
+ZFS_VDEV_QUEUE_KNOB_MAX(trim);
 
 #undef ZFS_VDEV_QUEUE_KNOB
 
@@ -299,6 +309,7 @@ static void
 vdev_queue_io_add(vdev_queue_t *vq, zio_t *zio)
 {
 	spa_t *spa = zio->io_spa;
+	ASSERT(MUTEX_HELD(&vq->vq_lock));
 	ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE);
 	avl_add(&vq->vq_class[zio->io_priority].vqc_queued_tree, zio);
 
@@ -315,6 +326,7 @@ static void
 vdev_queue_io_remove(vdev_queue_t *vq, zio_t *zio)
 {
 	spa_t *spa = zio->io_spa;
+	ASSERT(MUTEX_HELD(&vq->vq_lock));
 	ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE);
 	avl_remove(&vq->vq_class[zio->io_priority].vqc_queued_tree, zio);
 
@@ -403,6 +415,8 @@ vdev_queue_class_min_active(zio_priority
 		return (zfs_vdev_async_write_min_active);
 	case ZIO_PRIORITY_SCRUB:
 		return (zfs_vdev_scrub_min_active);
+	case ZIO_PRIORITY_TRIM:
+		return (zfs_vdev_trim_min_active);
 	default:
 		panic("invalid priority %u", p);
 		return (0);
@@ -454,6 +468,8 @@ vdev_queue_class_max_active(spa_t *spa, 
 		    spa->spa_dsl_pool->dp_dirty_total));
 	case ZIO_PRIORITY_SCRUB:
 		return (zfs_vdev_scrub_max_active);
+	case ZIO_PRIORITY_TRIM:
+		return (zfs_vdev_trim_max_active);
 	default:
 		panic("invalid priority %u", p);
 		return (0);
@@ -470,6 +486,8 @@ vdev_queue_class_to_issue(vdev_queue_t *
 	spa_t *spa = vq->vq_vdev->vdev_spa;
 	zio_priority_t p;
 
+	ASSERT(MUTEX_HELD(&vq->vq_lock));
+
 	if (avl_numnodes(&vq->vq_active_tree) >= zfs_vdev_max_active)
 		return (ZIO_PRIORITY_NUM_QUEUEABLE);
 
@@ -511,10 +529,11 @@ vdev_queue_aggregate(vdev_queue_t *vq, z
 	zio_t *first, *last, *aio, *dio, *mandatory, *nio;
 	uint64_t maxgap = 0;
 	uint64_t size;
-	boolean_t stretch = B_FALSE;
-	vdev_queue_class_t *vqc = &vq->vq_class[zio->io_priority];
-	avl_tree_t *t = &vqc->vqc_queued_tree;
-	enum zio_flag flags = zio->io_flags & ZIO_FLAG_AGG_INHERIT;
+	boolean_t stretch;
+	avl_tree_t *t;
+	enum zio_flag flags;
+
+	ASSERT(MUTEX_HELD(&vq->vq_lock));
 
 	if (zio->io_flags & ZIO_FLAG_DONT_AGGREGATE)
 		return (NULL);
@@ -552,6 +571,8 @@ vdev_queue_aggregate(vdev_queue_t *vq, z
 	 * Walk backwards through sufficiently contiguous I/Os
 	 * recording the last non-option I/O.
 	 */
+	flags = zio->io_flags & ZIO_FLAG_AGG_INHERIT;
+	t = &vq->vq_class[zio->io_priority].vqc_queued_tree;
 	while ((dio = AVL_PREV(t, first)) != NULL &&
 	    (dio->io_flags & ZIO_FLAG_AGG_INHERIT) == flags &&
 	    IO_SPAN(dio, last) <= zfs_vdev_aggregation_limit &&
@@ -591,6 +612,7 @@ vdev_queue_aggregate(vdev_queue_t *vq, z
 	 * non-optional I/O is close enough to make aggregation
 	 * worthwhile.
 	 */
+	stretch = B_FALSE;
 	if (zio->io_type == ZIO_TYPE_WRITE && mandatory != NULL) {
 		zio_t *nio = last;
 		while ((dio = AVL_NEXT(t, nio)) != NULL &&
@@ -731,11 +753,13 @@ vdev_queue_io(zio_t *zio)
 		    zio->io_priority != ZIO_PRIORITY_ASYNC_READ &&
 		    zio->io_priority != ZIO_PRIORITY_SCRUB)
 			zio->io_priority = ZIO_PRIORITY_ASYNC_READ;
-	} else {
-		ASSERT(zio->io_type == ZIO_TYPE_WRITE);
+	} else if (zio->io_type == ZIO_TYPE_WRITE) {
 		if (zio->io_priority != ZIO_PRIORITY_SYNC_WRITE &&
 		    zio->io_priority != ZIO_PRIORITY_ASYNC_WRITE)
 			zio->io_priority = ZIO_PRIORITY_ASYNC_WRITE;
+	} else {
+		ASSERT(zio->io_type == ZIO_TYPE_FREE);
+		zio->io_priority = ZIO_PRIORITY_TRIM;
 	}
 
 	zio->io_flags |= ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE;

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c	Wed Apr 30 17:43:49 2014	(r265151)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c	Wed Apr 30 17:46:29 2014	(r265152)
@@ -788,6 +788,8 @@ zio_free_sync(zio_t *pio, spa_t *spa, ui
 	else if (BP_IS_GANG(bp) || BP_GET_DEDUP(bp))
 		stage |= ZIO_STAGE_ISSUE_ASYNC;
 
+	flags |= ZIO_FLAG_DONT_QUEUE;
+
 	zio = zio_create(pio, spa, txg, bp, NULL, size,
 	    NULL, NULL, ZIO_TYPE_FREE, ZIO_PRIORITY_NOW, flags,
 	    NULL, 0, NULL, ZIO_STAGE_OPEN, stage);
@@ -827,14 +829,14 @@ zio_claim(zio_t *pio, spa_t *spa, uint64
 zio_t *
 zio_ioctl(zio_t *pio, spa_t *spa, vdev_t *vd, int cmd, uint64_t offset,
     uint64_t size, zio_done_func_t *done, void *private,
-    enum zio_flag flags)
+    zio_priority_t priority, enum zio_flag flags)
 {
 	zio_t *zio;
 	int c;
 
 	if (vd->vdev_children == 0) {
 		zio = zio_create(pio, spa, 0, NULL, NULL, size, done, private,
-		    ZIO_TYPE_IOCTL, ZIO_PRIORITY_NOW, flags, vd, offset, NULL,
+		    ZIO_TYPE_IOCTL, priority, flags, vd, offset, NULL,
 		    ZIO_STAGE_OPEN, ZIO_IOCTL_PIPELINE);
 
 		zio->io_cmd = cmd;
@@ -843,7 +845,7 @@ zio_ioctl(zio_t *pio, spa_t *spa, vdev_t
 
 		for (c = 0; c < vd->vdev_children; c++)
 			zio_nowait(zio_ioctl(zio, spa, vd->vdev_child[c], cmd,
-			    offset, size, done, private, flags));
+			    offset, size, done, private, priority, flags));
 	}
 
 	return (zio);
@@ -928,6 +930,10 @@ zio_vdev_child_io(zio_t *pio, blkptr_t *
 		pio->io_pipeline &= ~ZIO_STAGE_CHECKSUM_VERIFY;
 	}
 
+	/* Not all IO types require vdev io done stage e.g. free */
+	if (!(pio->io_pipeline & ZIO_STAGE_VDEV_IO_DONE))
+		pipeline &= ~ZIO_STAGE_VDEV_IO_DONE;
+
 	if (vd->vdev_children == 0)
 		offset += VDEV_LABEL_START_SIZE;
 
@@ -973,7 +979,7 @@ void
 zio_flush(zio_t *zio, vdev_t *vd)
 {
 	zio_nowait(zio_ioctl(zio, zio->io_spa, vd, DKIOCFLUSHWRITECACHE, 0, 0,
-	    NULL, NULL,
+	    NULL, NULL, ZIO_PRIORITY_NOW,
 	    ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY));
 }
 
@@ -983,9 +989,10 @@ zio_trim(zio_t *zio, spa_t *spa, vdev_t 
 
 	ASSERT(vd->vdev_ops->vdev_op_leaf);
 
-	return zio_ioctl(zio, spa, vd, DKIOCTRIM, offset, size,
-	    NULL, NULL,
-	    ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY);
+	return (zio_create(zio, spa, 0, NULL, NULL, size, NULL, NULL,
+	    ZIO_TYPE_FREE, ZIO_PRIORITY_TRIM, ZIO_FLAG_DONT_AGGREGATE |
+	    ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY,
+	    vd, offset, NULL, ZIO_STAGE_OPEN, ZIO_FREE_PHYS_PIPELINE));
 }
 
 void
@@ -2530,7 +2537,8 @@ zio_vdev_io_start(zio_t **ziop)
 		return (vdev_mirror_ops.vdev_op_io_start(zio));
 	}
 
-	if (vd->vdev_ops->vdev_op_leaf && zio->io_type == ZIO_TYPE_FREE) {
+	if (vd->vdev_ops->vdev_op_leaf && zio->io_type == ZIO_TYPE_FREE &&
+	    zio->io_priority == ZIO_PRIORITY_NOW) {
 		trim_map_free(vd, zio->io_offset, zio->io_size, zio->io_txg);
 		return (ZIO_PIPELINE_CONTINUE);
 	}
@@ -2598,31 +2606,33 @@ zio_vdev_io_start(zio_t **ziop)
 		return (ZIO_PIPELINE_CONTINUE);
 	}
 
-	if (vd->vdev_ops->vdev_op_leaf &&
-	    (zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_WRITE)) {
+	if (vd->vdev_ops->vdev_op_leaf) {
+		switch (zio->io_type) {
+		case ZIO_TYPE_READ:
+			if (vdev_cache_read(zio))
+				return (ZIO_PIPELINE_CONTINUE);
+			/* FALLTHROUGH */
+		case ZIO_TYPE_WRITE:
+		case ZIO_TYPE_FREE:
+			if ((zio = vdev_queue_io(zio)) == NULL)
+				return (ZIO_PIPELINE_STOP);
+			*ziop = zio;
 
-		if (zio->io_type == ZIO_TYPE_READ && vdev_cache_read(zio))
-			return (ZIO_PIPELINE_CONTINUE);
-
-		if ((zio = vdev_queue_io(zio)) == NULL)
-			return (ZIO_PIPELINE_STOP);
-		*ziop = zio;
-
-		if (!vdev_accessible(vd, zio)) {
-			zio->io_error = SET_ERROR(ENXIO);
-			zio_interrupt(zio);
-			return (ZIO_PIPELINE_STOP);
+			if (!vdev_accessible(vd, zio)) {
+				zio->io_error = SET_ERROR(ENXIO);
+				zio_interrupt(zio);
+				return (ZIO_PIPELINE_STOP);
+			}
+			break;
 		}
-	}
-
-	/*
-	 * Note that we ignore repair writes for TRIM because they can conflict
-	 * with normal writes. This isn't an issue because, by definition, we
-	 * only repair blocks that aren't freed.
-	 */
-	if (vd->vdev_ops->vdev_op_leaf && zio->io_type == ZIO_TYPE_WRITE &&
-	    !(zio->io_flags & ZIO_FLAG_IO_REPAIR)) {
-		if (!trim_map_write_start(zio))
+		/*
+		 * Note that we ignore repair writes for TRIM because they can
+		 * conflict with normal writes. This isn't an issue because, by
+		 * definition, we only repair blocks that aren't freed.
+		 */
+		if (zio->io_type == ZIO_TYPE_WRITE &&
+		    !(zio->io_flags & ZIO_FLAG_IO_REPAIR) &&
+		    !trim_map_write_start(zio))
 			return (ZIO_PIPELINE_STOP);
 	}
 
@@ -2644,7 +2654,8 @@ zio_vdev_io_done(zio_t **ziop)
 	    zio->io_type == ZIO_TYPE_WRITE || zio->io_type == ZIO_TYPE_FREE);
 
 	if (vd != NULL && vd->vdev_ops->vdev_op_leaf &&
-	    (zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_WRITE)) {
+	    (zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_WRITE ||
+	    zio->io_type == ZIO_TYPE_FREE)) {
 
 		if (zio->io_type == ZIO_TYPE_WRITE &&
 		    !(zio->io_flags & ZIO_FLAG_IO_REPAIR))
@@ -2725,7 +2736,8 @@ zio_vdev_io_assess(zio_t **ziop)
 	if (zio_injection_enabled && zio->io_error == 0)
 		zio->io_error = zio_handle_fault_injection(zio, EIO);
 
-	if (zio->io_type == ZIO_TYPE_IOCTL && zio->io_cmd == DKIOCTRIM)
+	if (zio->io_type == ZIO_TYPE_FREE &&
+	    zio->io_priority != ZIO_PRIORITY_NOW) {
 		switch (zio->io_error) {
 		case 0:
 			ZIO_TRIM_STAT_INCR(bytes, zio->io_size);
@@ -2738,6 +2750,7 @@ zio_vdev_io_assess(zio_t **ziop)
 			ZIO_TRIM_STAT_BUMP(failed);
 			break;
 		}
+	}
 
 	/*
 	 * If the I/O failed, determine whether we should attempt to retry it.


More information about the svn-src-all mailing list