svn commit: r275738 - head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs

Sat Dec 13 01:39:25 UTC 2014

Author: delphij
Date: Sat Dec 13 01:39:24 2014
New Revision: 275738
URL: https://svnweb.freebsd.org/changeset/base/275738

Log:
  MFV r275546:
  
  Reduce scrub activities when system there is enough dirty data, namely when
  dirty data is more than zfs_vdev_async_write_active_min_dirty_percent (once
  we start to increase the number of concurrent async writes).
  
  While there also correct rounding error which would make scrub end up
  pausing for (zfs_txg_timeout + 1) seconds instead of the desired
  zfs_txg_timeout seconds.
  
  Illumos issue:
      5351 scrub goes for an extra second each txg
      5352 scrub should pause when there is some dirty data
  
  MFC after:	2 weeks

Modified:
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c
Directory Properties:
  head/sys/cddl/contrib/opensolaris/   (props changed)

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c
==============================================================================

--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c	Sat Dec 13 01:26:06 2014	(r275737)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c	Sat Dec 13 01:39:24 2014	(r275738)
@@ -414,12 +414,11 @@ dsl_scan_sync_state(dsl_scan_t *scn, dmu
 	    &scn->scn_phys, tx));
 }
 
+extern int zfs_vdev_async_write_active_min_dirty_percent;
+
 static boolean_t
 dsl_scan_check_pause(dsl_scan_t *scn, const zbookmark_phys_t *zb)
 {
-	uint64_t elapsed_nanosecs;
-	unsigned int mintime;
-
 	/* we never skip user/group accounting objects */
 	if (zb && (int64_t)zb->zb_object < 0)
 		return (B_FALSE);
@@ -434,12 +433,28 @@ dsl_scan_check_pause(dsl_scan_t *scn, co
 	if (zb && zb->zb_level != 0)
 		return (B_FALSE);
 
-	mintime = (scn->scn_phys.scn_func == POOL_SCAN_RESILVER) ?
+	/*
+	 * We pause if:
+	 *  - we have scanned for the maximum time: an entire txg
+	 *    timeout (default 5 sec)
+	 *  or
+	 *  - we have scanned for at least the minimum time (default 1 sec
+	 *    for scrub, 3 sec for resilver), and either we have sufficient
+	 *    dirty data that we are starting to write more quickly
+	 *    (default 30%), or someone is explicitly waiting for this txg
+	 *    to complete.
+	 *  or
+	 *  - the spa is shutting down because this pool is being exported
+	 *    or the machine is rebooting.
+	 */
+	int mintime = (scn->scn_phys.scn_func == POOL_SCAN_RESILVER) ?
 	    zfs_resilver_min_time_ms : zfs_scan_min_time_ms;
-	elapsed_nanosecs = gethrtime() - scn->scn_sync_start_time;
-	if (elapsed_nanosecs / NANOSEC > zfs_txg_timeout ||
+	uint64_t elapsed_nanosecs = gethrtime() - scn->scn_sync_start_time;
+	int dirty_pct = scn->scn_dp->dp_dirty_total * 100 / zfs_dirty_data_max;
+	if (elapsed_nanosecs / NANOSEC >= zfs_txg_timeout ||
 	    (NSEC2MSEC(elapsed_nanosecs) > mintime &&
-	    txg_sync_waiting(scn->scn_dp)) ||
+	    (txg_sync_waiting(scn->scn_dp) ||
+	    dirty_pct >= zfs_vdev_async_write_active_min_dirty_percent)) ||
 	    spa_shutting_down(scn->scn_dp->dp_spa)) {
 		if (zb) {
 			dprintf("pausing at bookmark %llx/%llx/%llx/%llx\n",