svn commit: r323107 - vendor-sys/illumos/dist/uts/common/fs/zfs vendor-sys/illumos/dist/uts/common/fs/zfs/sys vendor-sys/illumos/dist/uts/common/sys/fs vendor/illumos/dist/cmd/zpool vendor/illumos/...

Andriy Gapon avg at FreeBSD.org
Fri Sep 1 17:43:10 UTC 2017


Author: avg
Date: Fri Sep  1 17:43:08 2017
New Revision: 323107
URL: https://svnweb.freebsd.org/changeset/base/323107

Log:
  8414 Implemented zpool scrub pause/resume
  
  illumos/illumos-gate at 1702cce751c5cb7ead878d0205a6c90b027e3de8
  https://github.com/illumos/illumos-gate/commit/1702cce751c5cb7ead878d0205a6c90b027e3de8
  
  https://www.illumos.org/issues/8414
    This issue tracks the port of scrub pause from ZoL: https://github.com/zfsonlinux/zfs/pull/6167
    Currently, there is no way to pause a scrub. Pausing may be useful when
   the pool is busy with other I/O to preserve bandwidth.
  
    Description
  
    This patch adds the ability to pause and resume scrubbing.  This is achieved
    by maintaining a persistent on-disk scrub state.  While the state is 'paused'
    we do not scrub any more blocks.  We do however perform regular scan
    housekeeping such as freeing async destroyed and deadlist blocks while paused.
  
    If you're testing this change, you probably want to include the patch from #6164
  
    Motivation and Context
  
    Scrub pausing can be an I/O intensive operation and people have been asking
    for the ability to pause a scrub for a while. This allows one to preserve scrub
    progress while freeing up bandwidth for other I/O.
  
    How Has This Been Tested?
  
    Unit testing and zfs-tests.  to the pool.  This patch will also include the
    patch from https://github.com/zfsonlinux/zfs/ pull/6164 In certain cases
    (dsl_scan_sync() is one), we may end up calling
  
  Reviewed by: George Melikov <mail at gmelikov.ru>
  Reviewed by: Brian Behlendorf <behlendorf1 at llnl.gov>
  Reviewed by: Brad Lewis <brad.lewis at delphix.com>
  Reviewed by: Serapheim Dimitropoulos <serapheim at delphix.com>
  Reviewed by: Matt Ahrens <mahrens at delphix.com>
  Approved by: Dan McDonald <danmcd at joyent.com>
  Author: Alek Pinchuk <apinchuk at datto.com>

Modified:
  vendor-sys/illumos/dist/uts/common/fs/zfs/bpobj.c
  vendor-sys/illumos/dist/uts/common/fs/zfs/dsl_scan.c
  vendor-sys/illumos/dist/uts/common/fs/zfs/spa.c
  vendor-sys/illumos/dist/uts/common/fs/zfs/spa_misc.c
  vendor-sys/illumos/dist/uts/common/fs/zfs/sys/dsl_scan.h
  vendor-sys/illumos/dist/uts/common/fs/zfs/sys/spa.h
  vendor-sys/illumos/dist/uts/common/fs/zfs/sys/spa_impl.h
  vendor-sys/illumos/dist/uts/common/fs/zfs/zfs_ioctl.c
  vendor-sys/illumos/dist/uts/common/sys/fs/zfs.h

Changes in other areas also in this revision:
Modified:
  vendor/illumos/dist/cmd/zpool/zpool_main.c
  vendor/illumos/dist/lib/libzfs/common/libzfs.h
  vendor/illumos/dist/lib/libzfs/common/libzfs_pool.c
  vendor/illumos/dist/lib/libzfs/common/libzfs_util.c
  vendor/illumos/dist/man/man1m/zpool.1m

Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/bpobj.c
==============================================================================
--- vendor-sys/illumos/dist/uts/common/fs/zfs/bpobj.c	Fri Sep  1 17:38:49 2017	(r323106)
+++ vendor-sys/illumos/dist/uts/common/fs/zfs/bpobj.c	Fri Sep  1 17:43:08 2017	(r323107)
@@ -22,6 +22,7 @@
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
  * Copyright (c) 2014 Integros [integros.com]
+ * Copyright (c) 2017 Datto Inc.
  */
 
 #include <sys/bpobj.h>
@@ -211,6 +212,9 @@ bpobj_iterate_impl(bpobj_t *bpo, bpobj_itor_t func, vo
 	dmu_buf_t *dbuf = NULL;
 
 	mutex_enter(&bpo->bpo_lock);
+
+	if (!bpobj_hasentries(bpo))
+		goto out;
 
 	if (free)
 		dmu_buf_will_dirty(bpo->bpo_dbuf, tx);

Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/dsl_scan.c
==============================================================================
--- vendor-sys/illumos/dist/uts/common/fs/zfs/dsl_scan.c	Fri Sep  1 17:38:49 2017	(r323106)
+++ vendor-sys/illumos/dist/uts/common/fs/zfs/dsl_scan.c	Fri Sep  1 17:43:08 2017	(r323107)
@@ -23,6 +23,7 @@
  * Copyright 2016 Gary Mills
  * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
  * Copyright 2017 Joyent, Inc.
+ * Copyright (c) 2017 Datto Inc.
  */
 
 #include <sys/dsl_scan.h>
@@ -285,6 +286,8 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu
 		scn->scn_phys.scn_queue_obj = 0;
 	}
 
+	scn->scn_phys.scn_flags &= ~DSF_SCRUB_PAUSED;
+
 	/*
 	 * If we were "restarted" from a stopped state, don't bother
 	 * with anything else.
@@ -369,6 +372,91 @@ dsl_scan_cancel(dsl_pool_t *dp)
 	    dsl_scan_cancel_sync, NULL, 3, ZFS_SPACE_CHECK_RESERVED));
 }
 
+boolean_t
+dsl_scan_is_paused_scrub(const dsl_scan_t *scn)
+{
+	if (dsl_scan_scrubbing(scn->scn_dp) &&
+	    scn->scn_phys.scn_flags & DSF_SCRUB_PAUSED)
+		return (B_TRUE);
+
+	return (B_FALSE);
+}
+
+static int
+dsl_scrub_pause_resume_check(void *arg, dmu_tx_t *tx)
+{
+	pool_scrub_cmd_t *cmd = arg;
+	dsl_pool_t *dp = dmu_tx_pool(tx);
+	dsl_scan_t *scn = dp->dp_scan;
+
+	if (*cmd == POOL_SCRUB_PAUSE) {
+		/* can't pause a scrub when there is no in-progress scrub */
+		if (!dsl_scan_scrubbing(dp))
+			return (SET_ERROR(ENOENT));
+
+		/* can't pause a paused scrub */
+		if (dsl_scan_is_paused_scrub(scn))
+			return (SET_ERROR(EBUSY));
+	} else if (*cmd != POOL_SCRUB_NORMAL) {
+		return (SET_ERROR(ENOTSUP));
+	}
+
+	return (0);
+}
+
+static void
+dsl_scrub_pause_resume_sync(void *arg, dmu_tx_t *tx)
+{
+	pool_scrub_cmd_t *cmd = arg;
+	dsl_pool_t *dp = dmu_tx_pool(tx);
+	spa_t *spa = dp->dp_spa;
+	dsl_scan_t *scn = dp->dp_scan;
+
+	if (*cmd == POOL_SCRUB_PAUSE) {
+		/* can't pause a scrub when there is no in-progress scrub */
+		spa->spa_scan_pass_scrub_pause = gethrestime_sec();
+		scn->scn_phys.scn_flags |= DSF_SCRUB_PAUSED;
+		dsl_scan_sync_state(scn, tx);
+	} else {
+		ASSERT3U(*cmd, ==, POOL_SCRUB_NORMAL);
+		if (dsl_scan_is_paused_scrub(scn)) {
+			/*
+			 * We need to keep track of how much time we spend
+			 * paused per pass so that we can adjust the scrub rate
+			 * shown in the output of 'zpool status'
+			 */
+			spa->spa_scan_pass_scrub_spent_paused +=
+			    gethrestime_sec() - spa->spa_scan_pass_scrub_pause;
+			spa->spa_scan_pass_scrub_pause = 0;
+			scn->scn_phys.scn_flags &= ~DSF_SCRUB_PAUSED;
+			dsl_scan_sync_state(scn, tx);
+		}
+	}
+}
+
+/*
+ * Set scrub pause/resume state if it makes sense to do so
+ */
+int
+dsl_scrub_set_pause_resume(const dsl_pool_t *dp, pool_scrub_cmd_t cmd)
+{
+	return (dsl_sync_task(spa_name(dp->dp_spa),
+	    dsl_scrub_pause_resume_check, dsl_scrub_pause_resume_sync, &cmd, 3,
+	    ZFS_SPACE_CHECK_RESERVED));
+}
+
+boolean_t
+dsl_scan_scrubbing(const dsl_pool_t *dp)
+{
+	dsl_scan_t *scn = dp->dp_scan;
+
+	if (scn->scn_phys.scn_state == DSS_SCANNING &&
+	    scn->scn_phys.scn_func == POOL_SCAN_SCRUB)
+		return (B_TRUE);
+
+	return (B_FALSE);
+}
+
 static void dsl_scan_visitbp(blkptr_t *bp, const zbookmark_phys_t *zb,
     dnode_phys_t *dnp, dsl_dataset_t *ds, dsl_scan_t *scn,
     dmu_objset_type_t ostype, dmu_tx_t *tx);
@@ -410,14 +498,14 @@ dsl_scan_sync_state(dsl_scan_t *scn, dmu_tx_t *tx)
 extern int zfs_vdev_async_write_active_min_dirty_percent;
 
 static boolean_t
-dsl_scan_check_pause(dsl_scan_t *scn, const zbookmark_phys_t *zb)
+dsl_scan_check_suspend(dsl_scan_t *scn, const zbookmark_phys_t *zb)
 {
 	/* we never skip user/group accounting objects */
 	if (zb && (int64_t)zb->zb_object < 0)
 		return (B_FALSE);
 
-	if (scn->scn_pausing)
-		return (B_TRUE); /* we're already pausing */
+	if (scn->scn_suspending)
+		return (B_TRUE); /* we're already suspending */
 
 	if (!ZB_IS_ZERO(&scn->scn_phys.scn_bookmark))
 		return (B_FALSE); /* we're resuming */
@@ -427,7 +515,7 @@ dsl_scan_check_pause(dsl_scan_t *scn, const zbookmark_
 		return (B_FALSE);
 
 	/*
-	 * We pause if:
+	 * We suspend if:
 	 *  - we have scanned for the maximum time: an entire txg
 	 *    timeout (default 5 sec)
 	 *  or
@@ -450,19 +538,19 @@ dsl_scan_check_pause(dsl_scan_t *scn, const zbookmark_
 	    dirty_pct >= zfs_vdev_async_write_active_min_dirty_percent)) ||
 	    spa_shutting_down(scn->scn_dp->dp_spa)) {
 		if (zb) {
-			dprintf("pausing at bookmark %llx/%llx/%llx/%llx\n",
+			dprintf("suspending at bookmark %llx/%llx/%llx/%llx\n",
 			    (longlong_t)zb->zb_objset,
 			    (longlong_t)zb->zb_object,
 			    (longlong_t)zb->zb_level,
 			    (longlong_t)zb->zb_blkid);
 			scn->scn_phys.scn_bookmark = *zb;
 		}
-		dprintf("pausing at DDT bookmark %llx/%llx/%llx/%llx\n",
+		dprintf("suspending at DDT bookmark %llx/%llx/%llx/%llx\n",
 		    (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_class,
 		    (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_type,
 		    (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_checksum,
 		    (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_cursor);
-		scn->scn_pausing = B_TRUE;
+		scn->scn_suspending = B_TRUE;
 		return (B_TRUE);
 	}
 	return (B_FALSE);
@@ -600,7 +688,7 @@ dsl_scan_check_resume(dsl_scan_t *scn, const dnode_phy
 		/*
 		 * If we found the block we're trying to resume from, or
 		 * we went past it to a different object, zero it out to
-		 * indicate that it's OK to start checking for pausing
+		 * indicate that it's OK to start checking for suspending
 		 * again.
 		 */
 		if (bcmp(zb, &scn->scn_phys.scn_bookmark, sizeof (*zb)) == 0 ||
@@ -703,7 +791,7 @@ dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, d
 			/*
 			 * We also always visit user/group accounting
 			 * objects, and never skip them, even if we are
-			 * pausing.  This is necessary so that the space
+			 * suspending.  This is necessary so that the space
 			 * deltas from this txg get integrated.
 			 */
 			dsl_scan_visitdnode(scn, ds, osp->os_type,
@@ -759,7 +847,7 @@ dsl_scan_visitbp(blkptr_t *bp, const zbookmark_phys_t 
 
 	/* ASSERT(pbuf == NULL || arc_released(pbuf)); */
 
-	if (dsl_scan_check_pause(scn, zb))
+	if (dsl_scan_check_suspend(scn, zb))
 		return;
 
 	if (dsl_scan_check_resume(scn, dnp, zb))
@@ -1096,14 +1184,14 @@ dsl_scan_visitds(dsl_scan_t *scn, uint64_t dsobj, dmu_
 	char *dsname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
 	dsl_dataset_name(ds, dsname);
 	zfs_dbgmsg("scanned dataset %llu (%s) with min=%llu max=%llu; "
-	    "pausing=%u",
+	    "suspending=%u",
 	    (longlong_t)dsobj, dsname,
 	    (longlong_t)scn->scn_phys.scn_cur_min_txg,
 	    (longlong_t)scn->scn_phys.scn_cur_max_txg,
-	    (int)scn->scn_pausing);
+	    (int)scn->scn_suspending);
 	kmem_free(dsname, ZFS_MAX_DATASET_NAME_LEN);
 
-	if (scn->scn_pausing)
+	if (scn->scn_suspending)
 		goto out;
 
 	/*
@@ -1267,13 +1355,13 @@ dsl_scan_ddt(dsl_scan_t *scn, dmu_tx_t *tx)
 		dsl_scan_ddt_entry(scn, ddb->ddb_checksum, &dde, tx);
 		n++;
 
-		if (dsl_scan_check_pause(scn, NULL))
+		if (dsl_scan_check_suspend(scn, NULL))
 			break;
 	}
 
-	zfs_dbgmsg("scanned %llu ddt entries with class_max = %u; pausing=%u",
-	    (longlong_t)n, (int)scn->scn_phys.scn_ddt_class_max,
-	    (int)scn->scn_pausing);
+	zfs_dbgmsg("scanned %llu ddt entries with class_max = %u; "
+	    "suspending=%u", (longlong_t)n,
+	    (int)scn->scn_phys.scn_ddt_class_max, (int)scn->scn_suspending);
 
 	ASSERT(error == 0 || error == ENOENT);
 	ASSERT(error != ENOENT ||
@@ -1316,7 +1404,7 @@ dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx)
 		scn->scn_phys.scn_cur_min_txg = scn->scn_phys.scn_min_txg;
 		scn->scn_phys.scn_cur_max_txg = scn->scn_phys.scn_max_txg;
 		dsl_scan_ddt(scn, tx);
-		if (scn->scn_pausing)
+		if (scn->scn_suspending)
 			return;
 	}
 
@@ -1328,7 +1416,7 @@ dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx)
 		dsl_scan_visit_rootbp(scn, NULL,
 		    &dp->dp_meta_rootbp, tx);
 		spa_set_rootblkptr(dp->dp_spa, &dp->dp_meta_rootbp);
-		if (scn->scn_pausing)
+		if (scn->scn_suspending)
 			return;
 
 		if (spa_version(dp->dp_spa) < SPA_VERSION_DSL_SCRUB) {
@@ -1338,22 +1426,22 @@ dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx)
 			dsl_scan_visitds(scn,
 			    dp->dp_origin_snap->ds_object, tx);
 		}
-		ASSERT(!scn->scn_pausing);
+		ASSERT(!scn->scn_suspending);
 	} else if (scn->scn_phys.scn_bookmark.zb_objset !=
 	    ZB_DESTROYED_OBJSET) {
 		/*
-		 * If we were paused, continue from here.  Note if the
-		 * ds we were paused on was deleted, the zb_objset may
+		 * If we were suspended, continue from here.  Note if the
+		 * ds we were suspended on was deleted, the zb_objset may
 		 * be -1, so we will skip this and find a new objset
 		 * below.
 		 */
 		dsl_scan_visitds(scn, scn->scn_phys.scn_bookmark.zb_objset, tx);
-		if (scn->scn_pausing)
+		if (scn->scn_suspending)
 			return;
 	}
 
 	/*
-	 * In case we were paused right at the end of the ds, zero the
+	 * In case we were suspended right at the end of the ds, zero the
 	 * bookmark so we don't think that we're still trying to resume.
 	 */
 	bzero(&scn->scn_phys.scn_bookmark, sizeof (zbookmark_phys_t));
@@ -1385,14 +1473,14 @@ dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx)
 
 		dsl_scan_visitds(scn, dsobj, tx);
 		zap_cursor_fini(&zc);
-		if (scn->scn_pausing)
+		if (scn->scn_suspending)
 			return;
 	}
 	zap_cursor_fini(&zc);
 }
 
 static boolean_t
-dsl_scan_free_should_pause(dsl_scan_t *scn)
+dsl_scan_free_should_suspend(dsl_scan_t *scn)
 {
 	uint64_t elapsed_nanosecs;
 
@@ -1416,7 +1504,7 @@ dsl_scan_free_block_cb(void *arg, const blkptr_t *bp, 
 
 	if (!scn->scn_is_bptree ||
 	    (BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) != DMU_OT_OBJSET)) {
-		if (dsl_scan_free_should_pause(scn))
+		if (dsl_scan_free_should_suspend(scn))
 			return (SET_ERROR(ERESTART));
 	}
 
@@ -1439,7 +1527,8 @@ dsl_scan_active(dsl_scan_t *scn)
 		return (B_FALSE);
 	if (spa_shutting_down(spa))
 		return (B_FALSE);
-	if (scn->scn_phys.scn_state == DSS_SCANNING ||
+	if ((scn->scn_phys.scn_state == DSS_SCANNING &&
+	    !dsl_scan_is_paused_scrub(scn)) ||
 	    (scn->scn_async_destroying && !scn->scn_async_stalled))
 		return (B_TRUE);
 
@@ -1494,12 +1583,12 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
 		return;
 
 	scn->scn_visited_this_txg = 0;
-	scn->scn_pausing = B_FALSE;
+	scn->scn_suspending = B_FALSE;
 	scn->scn_sync_start_time = gethrtime();
 	spa->spa_scrub_active = B_TRUE;
 
 	/*
-	 * First process the async destroys.  If we pause, don't do
+	 * First process the async destroys.  If we suspend, don't do
 	 * any scrubbing or resilvering.  This ensures that there are no
 	 * async destroys while we are scanning, so the scan code doesn't
 	 * have to worry about traversing it.  It is also faster to free the
@@ -1616,7 +1705,7 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
 		return;
 
 	if (scn->scn_done_txg == tx->tx_txg) {
-		ASSERT(!scn->scn_pausing);
+		ASSERT(!scn->scn_suspending);
 		/* finished with scan. */
 		zfs_dbgmsg("txg %llu scan complete", tx->tx_txg);
 		dsl_scan_done(scn, B_TRUE, tx);
@@ -1625,6 +1714,9 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
 		return;
 	}
 
+	if (dsl_scan_is_paused_scrub(scn))
+		return;
+
 	if (scn->scn_phys.scn_ddt_bookmark.ddb_class <=
 	    scn->scn_phys.scn_ddt_class_max) {
 		zfs_dbgmsg("doing scan sync txg %llu; "
@@ -1659,7 +1751,7 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
 	    (longlong_t)scn->scn_visited_this_txg,
 	    (longlong_t)NSEC2MSEC(gethrtime() - scn->scn_sync_start_time));
 
-	if (!scn->scn_pausing) {
+	if (!scn->scn_suspending) {
 		scn->scn_done_txg = tx->tx_txg + 1;
 		zfs_dbgmsg("txg %llu traversal complete, waiting till txg %llu",
 		    tx->tx_txg, scn->scn_done_txg);
@@ -1867,11 +1959,15 @@ dsl_scan_scrub_cb(dsl_pool_t *dp,
 	return (0);
 }
 
-/* Called by the ZFS_IOC_POOL_SCAN ioctl to start a scrub or resilver */
+/*
+ * Called by the ZFS_IOC_POOL_SCAN ioctl to start a scrub or resilver.
+ * Can also be called to resume a paused scrub.
+ */
 int
 dsl_scan(dsl_pool_t *dp, pool_scan_func_t func)
 {
 	spa_t *spa = dp->dp_spa;
+	dsl_scan_t *scn = dp->dp_scan;
 
 	/*
 	 * Purge all vdev caches and probe all devices.  We do this here
@@ -1885,6 +1981,16 @@ dsl_scan(dsl_pool_t *dp, pool_scan_func_t func)
 	vdev_reopen(spa->spa_root_vdev);
 	spa->spa_scrub_reopen = B_FALSE;
 	(void) spa_vdev_state_exit(spa, NULL, 0);
+
+	if (func == POOL_SCAN_SCRUB && dsl_scan_is_paused_scrub(scn)) {
+		/* got scrub start cmd, resume paused scrub */
+		int err = dsl_scrub_set_pause_resume(scn->scn_dp,
+		    POOL_SCRUB_NORMAL);
+		if (err == 0)
+			return (ECANCELED);
+
+		return (SET_ERROR(err));
+	}
 
 	return (dsl_sync_task(spa_name(spa), dsl_scan_setup_check,
 	    dsl_scan_setup_sync, &func, 0, ZFS_SPACE_CHECK_NONE));

Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/spa.c
==============================================================================
--- vendor-sys/illumos/dist/uts/common/fs/zfs/spa.c	Fri Sep  1 17:38:49 2017	(r323106)
+++ vendor-sys/illumos/dist/uts/common/fs/zfs/spa.c	Fri Sep  1 17:43:08 2017	(r323107)
@@ -28,6 +28,7 @@
  * Copyright (c) 2014 Integros [integros.com]
  * Copyright 2016 Toomas Soome <tsoome at me.com>
  * Copyright 2017 Joyent, Inc.
+ * Copyright (c) 2017 Datto Inc.
  */
 
 /*
@@ -5746,6 +5747,16 @@ spa_vdev_setfru(spa_t *spa, uint64_t guid, const char 
  * SPA Scanning
  * ==========================================================================
  */
+int
+spa_scrub_pause_resume(spa_t *spa, pool_scrub_cmd_t cmd)
+{
+	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0);
+
+	if (dsl_scan_resilvering(spa->spa_dsl_pool))
+		return (SET_ERROR(EBUSY));
+
+	return (dsl_scrub_set_pause_resume(spa->spa_dsl_pool, cmd));
+}
 
 int
 spa_scan_stop(spa_t *spa)

Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/spa_misc.c
==============================================================================
--- vendor-sys/illumos/dist/uts/common/fs/zfs/spa_misc.c	Fri Sep  1 17:38:49 2017	(r323106)
+++ vendor-sys/illumos/dist/uts/common/fs/zfs/spa_misc.c	Fri Sep  1 17:43:08 2017	(r323107)
@@ -25,6 +25,7 @@
  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
  * Copyright 2013 Saso Kiselkov. All rights reserved.
  * Copyright (c) 2014 Integros [integros.com]
+ * Copyright (c) 2017 Datto Inc.
  */
 
 #include <sys/zfs_context.h>
@@ -2008,6 +2009,11 @@ spa_scan_stat_init(spa_t *spa)
 {
 	/* data not stored on disk */
 	spa->spa_scan_pass_start = gethrestime_sec();
+	if (dsl_scan_is_paused_scrub(spa->spa_dsl_pool->dp_scan))
+		spa->spa_scan_pass_scrub_pause = spa->spa_scan_pass_start;
+	else
+		spa->spa_scan_pass_scrub_pause = 0;
+	spa->spa_scan_pass_scrub_spent_paused = 0;
 	spa->spa_scan_pass_exam = 0;
 	vdev_scan_stat_init(spa->spa_root_vdev);
 }
@@ -2038,6 +2044,8 @@ spa_scan_get_stats(spa_t *spa, pool_scan_stat_t *ps)
 	/* data not stored on disk */
 	ps->pss_pass_start = spa->spa_scan_pass_start;
 	ps->pss_pass_exam = spa->spa_scan_pass_exam;
+	ps->pss_pass_scrub_pause = spa->spa_scan_pass_scrub_pause;
+	ps->pss_pass_scrub_spent_paused = spa->spa_scan_pass_scrub_spent_paused;
 
 	return (0);
 }

Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/sys/dsl_scan.h
==============================================================================
--- vendor-sys/illumos/dist/uts/common/fs/zfs/sys/dsl_scan.h	Fri Sep  1 17:38:49 2017	(r323106)
+++ vendor-sys/illumos/dist/uts/common/fs/zfs/sys/dsl_scan.h	Fri Sep  1 17:43:08 2017	(r323107)
@@ -21,6 +21,7 @@
 /*
  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2017 Datto Inc.
  */
 
 #ifndef	_SYS_DSL_SCAN_H
@@ -70,6 +71,7 @@ typedef struct dsl_scan_phys {
 
 typedef enum dsl_scan_flags {
 	DSF_VISIT_DS_AGAIN = 1<<0,
+	DSF_SCRUB_PAUSED = 1<<1,
 } dsl_scan_flags_t;
 
 /*
@@ -82,8 +84,8 @@ typedef enum dsl_scan_flags {
  *
  * The following members of this structure direct the behavior of the scan:
  *
- * scn_pausing -	a scan that cannot be completed in a single txg or
- *			has exceeded its allotted time will need to pause.
+ * scn_suspending -	a scan that cannot be completed in a single txg or
+ *			has exceeded its allotted time will need to suspend.
  *			When this flag is set the scanner will stop traversing
  *			the pool and write out the current state to disk.
  *
@@ -105,7 +107,7 @@ typedef enum dsl_scan_flags {
 typedef struct dsl_scan {
 	struct dsl_pool *scn_dp;
 
-	boolean_t scn_pausing;
+	boolean_t scn_suspending;
 	uint64_t scn_restart_txg;
 	uint64_t scn_done_txg;
 	uint64_t scn_sync_start_time;
@@ -115,8 +117,6 @@ typedef struct dsl_scan {
 	boolean_t scn_is_bptree;
 	boolean_t scn_async_destroying;
 	boolean_t scn_async_stalled;
-
-	/* for debugging / information */
 	uint64_t scn_visited_this_txg;
 
 	dsl_scan_phys_t scn_phys;
@@ -127,6 +127,8 @@ void dsl_scan_fini(struct dsl_pool *dp);
 void dsl_scan_sync(struct dsl_pool *, dmu_tx_t *);
 int dsl_scan_cancel(struct dsl_pool *);
 int dsl_scan(struct dsl_pool *, pool_scan_func_t);
+boolean_t dsl_scan_scrubbing(const struct dsl_pool *dp);
+int dsl_scrub_set_pause_resume(const struct dsl_pool *dp, pool_scrub_cmd_t cmd);
 void dsl_resilver_restart(struct dsl_pool *, uint64_t txg);
 boolean_t dsl_scan_resilvering(struct dsl_pool *dp);
 boolean_t dsl_dataset_unstable(struct dsl_dataset *ds);
@@ -137,6 +139,7 @@ void dsl_scan_ds_snapshotted(struct dsl_dataset *ds, s
 void dsl_scan_ds_clone_swapped(struct dsl_dataset *ds1, struct dsl_dataset *ds2,
     struct dmu_tx *tx);
 boolean_t dsl_scan_active(dsl_scan_t *scn);
+boolean_t dsl_scan_is_paused_scrub(const dsl_scan_t *scn);
 
 #ifdef	__cplusplus
 }

Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/sys/spa.h
==============================================================================
--- vendor-sys/illumos/dist/uts/common/fs/zfs/sys/spa.h	Fri Sep  1 17:38:49 2017	(r323106)
+++ vendor-sys/illumos/dist/uts/common/fs/zfs/sys/spa.h	Fri Sep  1 17:43:08 2017	(r323107)
@@ -26,6 +26,7 @@
  * Copyright 2013 Saso Kiselkov. All rights reserved.
  * Copyright (c) 2014 Integros [integros.com]
  * Copyright 2017 Joyent, Inc.
+ * Copyright (c) 2017 Datto Inc.
  */
 
 #ifndef _SYS_SPA_H
@@ -676,6 +677,7 @@ extern void spa_l2cache_drop(spa_t *spa);
 /* scanning */
 extern int spa_scan(spa_t *spa, pool_scan_func_t func);
 extern int spa_scan_stop(spa_t *spa);
+extern int spa_scrub_pause_resume(spa_t *spa, pool_scrub_cmd_t flag);
 
 /* spa syncing */
 extern void spa_sync(spa_t *spa, uint64_t txg); /* only for DMU use */

Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/sys/spa_impl.h
==============================================================================
--- vendor-sys/illumos/dist/uts/common/fs/zfs/sys/spa_impl.h	Fri Sep  1 17:38:49 2017	(r323106)
+++ vendor-sys/illumos/dist/uts/common/fs/zfs/sys/spa_impl.h	Fri Sep  1 17:43:08 2017	(r323107)
@@ -24,6 +24,7 @@
  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
  * Copyright 2013 Saso Kiselkov. All rights reserved.
+ * Copyright (c) 2017 Datto Inc.
  */
 
 #ifndef _SYS_SPA_IMPL_H
@@ -192,6 +193,8 @@ struct spa {
 	uint8_t		spa_scrub_started;	/* started since last boot */
 	uint8_t		spa_scrub_reopen;	/* scrub doing vdev_reopen */
 	uint64_t	spa_scan_pass_start;	/* start time per pass/reboot */
+	uint64_t	spa_scan_pass_scrub_pause; /* scrub pause time */
+	uint64_t	spa_scan_pass_scrub_spent_paused; /* total paused */
 	uint64_t	spa_scan_pass_exam;	/* examined bytes per pass */
 	kmutex_t	spa_async_lock;		/* protect async state */
 	kthread_t	*spa_async_thread;	/* thread doing async task */

Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/zfs_ioctl.c
==============================================================================
--- vendor-sys/illumos/dist/uts/common/fs/zfs/zfs_ioctl.c	Fri Sep  1 17:38:49 2017	(r323106)
+++ vendor-sys/illumos/dist/uts/common/fs/zfs/zfs_ioctl.c	Fri Sep  1 17:43:08 2017	(r323107)
@@ -32,6 +32,7 @@
  * Copyright (c) 2014 Integros [integros.com]
  * Copyright 2016 Toomas Soome <tsoome at me.com>
  * Copyright 2017 RackTop Systems.
+ * Copyright (c) 2017 Datto Inc.
  */
 
 /*
@@ -1671,6 +1672,7 @@ zfs_ioc_pool_tryimport(zfs_cmd_t *zc)
  * inputs:
  * zc_name              name of the pool
  * zc_cookie            scan func (pool_scan_func_t)
+ * zc_flags             scrub pause/resume flag (pool_scrub_cmd_t)
  */
 static int
 zfs_ioc_pool_scan(zfs_cmd_t *zc)
@@ -1681,7 +1683,12 @@ zfs_ioc_pool_scan(zfs_cmd_t *zc)
 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
 		return (error);
 
-	if (zc->zc_cookie == POOL_SCAN_NONE)
+	if (zc->zc_flags >= POOL_SCRUB_FLAGS_END)
+		return (SET_ERROR(EINVAL));
+
+	if (zc->zc_flags == POOL_SCRUB_PAUSE)
+		error = spa_scrub_pause_resume(spa, POOL_SCRUB_PAUSE);
+	else if (zc->zc_cookie == POOL_SCAN_NONE)
 		error = spa_scan_stop(spa);
 	else
 		error = spa_scan(spa, zc->zc_cookie);

Modified: vendor-sys/illumos/dist/uts/common/sys/fs/zfs.h
==============================================================================
--- vendor-sys/illumos/dist/uts/common/sys/fs/zfs.h	Fri Sep  1 17:38:49 2017	(r323106)
+++ vendor-sys/illumos/dist/uts/common/sys/fs/zfs.h	Fri Sep  1 17:43:08 2017	(r323107)
@@ -25,6 +25,7 @@
  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2014 Integros [integros.com]
  * Copyright 2017 Joyent, Inc.
+ * Copyright (c) 2017 Datto Inc.
  */
 
 /* Portions Copyright 2010 Robert Milkowski */
@@ -687,6 +688,16 @@ typedef enum pool_scan_func {
 } pool_scan_func_t;
 
 /*
+ * Used to control scrub pause and resume.
+ */
+typedef enum pool_scrub_cmd {
+	POOL_SCRUB_NORMAL = 0,
+	POOL_SCRUB_PAUSE,
+	POOL_SCRUB_FLAGS_END
+} pool_scrub_cmd_t;
+
+
+/*
  * ZIO types.  Needed to interpret vdev statistics below.
  */
 typedef enum zio_type {
@@ -718,6 +729,9 @@ typedef struct pool_scan_stat {
 	/* values not stored on disk */
 	uint64_t	pss_pass_exam;	/* examined bytes per scan pass */
 	uint64_t	pss_pass_start;	/* start time of a scan pass */
+	uint64_t	pss_pass_scrub_pause; /* pause time of a scurb pass */
+	/* cumulative time scrub spent paused, needed for rate calculation */
+	uint64_t	pss_pass_scrub_spent_paused;
 } pool_scan_stat_t;
 
 typedef enum dsl_scan_state {


More information about the svn-src-all mailing list