svn commit: r337212 - in vendor-sys/illumos/dist/uts/common/fs/zfs: . sys

Alexander Motin mav at FreeBSD.org
Fri Aug 3 00:11:08 UTC 2018


Author: mav
Date: Fri Aug  3 00:11:06 2018
New Revision: 337212
URL: https://svnweb.freebsd.org/changeset/base/337212

Log:
  9465 ARC check for 'anon_size > arc_c/2' can stall the system
  
  illumos/illumos-gate at abe1fd01ce5a83718c5a840daeab4abdaec1c104
  
  Reviewed by: Sebastien Roy <sebastien.roy at delphix.com>
  Reviewed by: Matt Ahrens <matt at delphix.com>
  Reviewed by: Prashanth Sreenivasa <pks at delphix.com>
  Approved by: Robert Mustacchi <rm at joyent.com>
  Author:     Don Brady <don.brady at delphix.com>

Modified:
  vendor-sys/illumos/dist/uts/common/fs/zfs/arc.c
  vendor-sys/illumos/dist/uts/common/fs/zfs/dsl_dir.c
  vendor-sys/illumos/dist/uts/common/fs/zfs/spa_misc.c
  vendor-sys/illumos/dist/uts/common/fs/zfs/sys/arc.h
  vendor-sys/illumos/dist/uts/common/fs/zfs/sys/spa.h
  vendor-sys/illumos/dist/uts/common/fs/zfs/sys/spa_impl.h

Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/arc.c
==============================================================================
--- vendor-sys/illumos/dist/uts/common/fs/zfs/arc.c	Fri Aug  3 00:01:48 2018	(r337211)
+++ vendor-sys/illumos/dist/uts/common/fs/zfs/arc.c	Fri Aug  3 00:11:06 2018	(r337212)
@@ -379,6 +379,13 @@ int zfs_arc_shrink_shift = 0;
 int zfs_arc_p_min_shift = 0;
 int zfs_arc_average_blocksize = 8 * 1024; /* 8KB */
 
+/*
+ * ARC dirty data constraints for arc_tempreserve_space() throttle
+ */
+uint_t zfs_arc_dirty_limit_percent = 50;	/* total dirty data limit */
+uint_t zfs_arc_anon_limit_percent = 25;		/* anon block dirty limit */
+uint_t zfs_arc_pool_dirty_percent = 20;		/* each pool's anon allowance */
+
 boolean_t zfs_compressed_arc_enabled = B_TRUE;
 
 /*
@@ -5839,12 +5846,10 @@ arc_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr
 }
 
 static int
-arc_memory_throttle(uint64_t reserve, uint64_t txg)
+arc_memory_throttle(spa_t *spa, uint64_t reserve, uint64_t txg)
 {
 #ifdef _KERNEL
 	uint64_t available_memory = ptob(freemem);
-	static uint64_t page_load = 0;
-	static uint64_t last_txg = 0;
 
 #if defined(__i386)
 	available_memory =
@@ -5854,9 +5859,9 @@ arc_memory_throttle(uint64_t reserve, uint64_t txg)
 	if (freemem > physmem * arc_lotsfree_percent / 100)
 		return (0);
 
-	if (txg > last_txg) {
-		last_txg = txg;
-		page_load = 0;
+	if (txg > spa->spa_lowmem_last_txg) {
+		spa->spa_lowmem_last_txg = txg;
+		spa->spa_lowmem_page_load = 0;
 	}
 	/*
 	 * If we are in pageout, we know that memory is already tight,
@@ -5864,18 +5869,19 @@ arc_memory_throttle(uint64_t reserve, uint64_t txg)
 	 * continue to let page writes occur as quickly as possible.
 	 */
 	if (curproc == proc_pageout) {
-		if (page_load > MAX(ptob(minfree), available_memory) / 4)
+		if (spa->spa_lowmem_page_load >
+		    MAX(ptob(minfree), available_memory) / 4)
 			return (SET_ERROR(ERESTART));
 		/* Note: reserve is inflated, so we deflate */
-		page_load += reserve / 8;
+		atomic_add_64(&spa->spa_lowmem_page_load, reserve / 8);
 		return (0);
-	} else if (page_load > 0 && arc_reclaim_needed()) {
+	} else if (spa->spa_lowmem_page_load > 0 && arc_reclaim_needed()) {
 		/* memory is low, delay before restarting */
 		ARCSTAT_INCR(arcstat_memory_throttle_count, 1);
 		return (SET_ERROR(EAGAIN));
 	}
-	page_load = 0;
-#endif
+	spa->spa_lowmem_page_load = 0;
+#endif /* _KERNEL */
 	return (0);
 }
 
@@ -5887,7 +5893,7 @@ arc_tempreserve_clear(uint64_t reserve)
 }
 
 int
-arc_tempreserve_space(uint64_t reserve, uint64_t txg)
+arc_tempreserve_space(spa_t *spa, uint64_t reserve, uint64_t txg)
 {
 	int error;
 	uint64_t anon_size;
@@ -5914,7 +5920,7 @@ arc_tempreserve_space(uint64_t reserve, uint64_t txg)
 	 * in order to compress/encrypt/etc the data.  We therefore need to
 	 * make sure that there is sufficient available memory for this.
 	 */
-	error = arc_memory_throttle(reserve, txg);
+	error = arc_memory_throttle(spa, reserve, txg);
 	if (error != 0)
 		return (error);
 
@@ -5922,12 +5928,24 @@ arc_tempreserve_space(uint64_t reserve, uint64_t txg)
 	 * Throttle writes when the amount of dirty data in the cache
 	 * gets too large.  We try to keep the cache less than half full
 	 * of dirty blocks so that our sync times don't grow too large.
+	 *
+	 * In the case of one pool being built on another pool, we want
+	 * to make sure we don't end up throttling the lower (backing)
+	 * pool when the upper pool is the majority contributor to dirty
+	 * data. To insure we make forward progress during throttling, we
+	 * also check the current pool's net dirty data and only throttle
+	 * if it exceeds zfs_arc_pool_dirty_percent of the anonymous dirty
+	 * data in the cache.
+	 *
 	 * Note: if two requests come in concurrently, we might let them
 	 * both succeed, when one of them should fail.  Not a huge deal.
 	 */
+	uint64_t total_dirty = reserve + arc_tempreserve + anon_size;
+	uint64_t spa_dirty_anon = spa_dirty_data(spa);
 
-	if (reserve + arc_tempreserve + anon_size > arc_c / 2 &&
-	    anon_size > arc_c / 4) {
+	if (total_dirty > arc_c * zfs_arc_dirty_limit_percent / 100 &&
+	    anon_size > arc_c * zfs_arc_anon_limit_percent / 100 &&
+	    spa_dirty_anon > anon_size * zfs_arc_pool_dirty_percent / 100) {
 		uint64_t meta_esize =
 		    refcount_count(&arc_anon->arcs_esize[ARC_BUFC_METADATA]);
 		uint64_t data_esize =

Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/dsl_dir.c
==============================================================================
--- vendor-sys/illumos/dist/uts/common/fs/zfs/dsl_dir.c	Fri Aug  3 00:01:48 2018	(r337211)
+++ vendor-sys/illumos/dist/uts/common/fs/zfs/dsl_dir.c	Fri Aug  3 00:11:06 2018	(r337212)
@@ -1379,7 +1379,7 @@ dsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t lsiz
 	    offsetof(struct tempreserve, tr_node));
 	ASSERT3S(asize, >, 0);
 
-	err = arc_tempreserve_space(lsize, tx->tx_txg);
+	err = arc_tempreserve_space(dd->dd_pool->dp_spa, lsize, tx->tx_txg);
 	if (err == 0) {
 		struct tempreserve *tr;
 

Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/spa_misc.c
==============================================================================
--- vendor-sys/illumos/dist/uts/common/fs/zfs/spa_misc.c	Fri Aug  3 00:01:48 2018	(r337211)
+++ vendor-sys/illumos/dist/uts/common/fs/zfs/spa_misc.c	Fri Aug  3 00:11:06 2018	(r337212)
@@ -1910,6 +1910,12 @@ bp_get_dsize(spa_t *spa, const blkptr_t *bp)
 	return (dsize);
 }
 
+uint64_t
+spa_dirty_data(spa_t *spa)
+{
+	return (spa->spa_dsl_pool->dp_dirty_total);
+}
+
 /*
  * ==========================================================================
  * Initialization and Termination

Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/sys/arc.h
==============================================================================
--- vendor-sys/illumos/dist/uts/common/fs/zfs/sys/arc.h	Fri Aug  3 00:01:48 2018	(r337211)
+++ vendor-sys/illumos/dist/uts/common/fs/zfs/sys/arc.h	Fri Aug  3 00:11:06 2018	(r337212)
@@ -190,7 +190,7 @@ void arc_freed(spa_t *spa, const blkptr_t *bp);
 
 void arc_flush(spa_t *spa, boolean_t retry);
 void arc_tempreserve_clear(uint64_t reserve);
-int arc_tempreserve_space(uint64_t reserve, uint64_t txg);
+int arc_tempreserve_space(spa_t *spa, uint64_t reserve, uint64_t txg);
 
 uint64_t arc_max_bytes(void);
 void arc_init(void);

Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/sys/spa.h
==============================================================================
--- vendor-sys/illumos/dist/uts/common/fs/zfs/sys/spa.h	Fri Aug  3 00:01:48 2018	(r337211)
+++ vendor-sys/illumos/dist/uts/common/fs/zfs/sys/spa.h	Fri Aug  3 00:11:06 2018	(r337212)
@@ -813,6 +813,7 @@ extern uint64_t spa_bootfs(spa_t *spa);
 extern uint64_t spa_delegation(spa_t *spa);
 extern objset_t *spa_meta_objset(spa_t *spa);
 extern uint64_t spa_deadman_synctime(spa_t *spa);
+extern uint64_t spa_dirty_data(spa_t *spa);
 
 /* Miscellaneous support routines */
 extern void spa_load_failed(spa_t *spa, const char *fmt, ...);

Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/sys/spa_impl.h
==============================================================================
--- vendor-sys/illumos/dist/uts/common/fs/zfs/sys/spa_impl.h	Fri Aug  3 00:01:48 2018	(r337211)
+++ vendor-sys/illumos/dist/uts/common/fs/zfs/sys/spa_impl.h	Fri Aug  3 00:11:06 2018	(r337212)
@@ -373,6 +373,10 @@ struct spa {
 		int spa_queued;
 	} spa_queue_stats[ZIO_PRIORITY_NUM_QUEUEABLE];
 
+	/* arc_memory_throttle() parameters during low memory condition */
+	uint64_t	spa_lowmem_page_load;	/* memory load during txg */
+	uint64_t	spa_lowmem_last_txg;	/* txg window start */
+
 	hrtime_t	spa_ccw_fail_time;	/* Conf cache write fail time */
 
 	/*


More information about the svn-src-all mailing list