svn commit: r248575 - in head/sys/cddl: compat/opensolaris/sys contrib/opensolaris/uts/common/fs/zfs

Steven Hartland smh at FreeBSD.org
Thu Mar 21 10:29:06 UTC 2013


Author: smh
Date: Thu Mar 21 10:29:05 2013
New Revision: 248575
URL: http://svnweb.freebsd.org/changeset/base/248575

Log:
  TRIM cache devices based on time instead of TXGs.
  Currently, the trim module uses the same algorithm for data and cache
  devices when deciding to issue TRIM requests, based on how far in the
  past the TXG is.
  
  Unfortunately, this is not ideal for cache devices, because the L2ARC
  doesn't use the concept of TXGs at all. In fact, when using a pool for
  reading only, the L2ARC is written but the TXG counter doesn't
  increase, and so no new TRIM requests are issued to the cache device.
  
  This patch fixes the issue by using time instead of the TXG number as
  the criteria for trimming on cache devices. The basic delay principle
  stays the same, but parameters are expressed in seconds instead of
  TXGs. The new parameters are named trim_l2arc_limit and
  trim_l2arc_batch, and both default to 30 second.
  
  Reviewed by:	pjd (mentor)
  Approved by:	pjd (mentor)
  Obtained from:	https://github.com/dechamps/zfs/commit/17122c31ac7f82875e837019205c21651c05f8cd
  MFC after:	2 weeks

Modified:
  head/sys/cddl/compat/opensolaris/sys/time.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/trim_map.c

Modified: head/sys/cddl/compat/opensolaris/sys/time.h
==============================================================================
--- head/sys/cddl/compat/opensolaris/sys/time.h	Thu Mar 21 10:16:10 2013	(r248574)
+++ head/sys/cddl/compat/opensolaris/sys/time.h	Thu Mar 21 10:29:05 2013	(r248575)
@@ -35,6 +35,7 @@
 #define MILLISEC	1000
 #define MICROSEC	1000000
 #define NANOSEC		1000000000
+#define TIME_MAX	LLONG_MAX
 
 typedef longlong_t	hrtime_t;
 

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/trim_map.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/trim_map.c	Thu Mar 21 10:16:10 2013	(r248574)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/trim_map.c	Thu Mar 21 10:29:05 2013	(r248575)
@@ -27,6 +27,7 @@
 #include <sys/spa_impl.h>
 #include <sys/vdev_impl.h>
 #include <sys/trim_map.h>
+#include <sys/time.h>
 
 /*
  * Calculate the zio end, upgrading based on ashift which would be
@@ -54,6 +55,7 @@ typedef struct trim_seg {
 	uint64_t	ts_start;	/* Starting offset of this segment. */
 	uint64_t	ts_end;		/* Ending offset (non-inclusive). */
 	uint64_t	ts_txg;		/* Segment creation txg. */
+	hrtime_t	ts_time;	/* Segment creation time. */
 } trim_seg_t;
 
 extern boolean_t zfs_notrim;
@@ -65,6 +67,11 @@ TUNABLE_INT("vfs.zfs.trim_txg_limit", &t
 SYSCTL_INT(_vfs_zfs, OID_AUTO, trim_txg_limit, CTLFLAG_RW, &trim_txg_limit, 0,
     "Delay TRIMs by that many TXGs.");
 
+static int trim_l2arc_limit = 30;
+TUNABLE_INT("vfs.zfs.trim_l2arc_limit", &trim_l2arc_limit);
+SYSCTL_INT(_vfs_zfs, OID_AUTO, trim_l2arc_limit, CTLFLAG_RWTUN, &trim_l2arc_limit, 0,
+    "Delay TRIMs by this many seconds for cache devices.");
+
 static void trim_map_vdev_commit_done(spa_t *spa, vdev_t *vd);
 
 static int
@@ -176,10 +183,12 @@ trim_map_segment_add(trim_map_t *tm, uin
 	avl_index_t where;
 	trim_seg_t tsearch, *ts_before, *ts_after, *ts;
 	boolean_t merge_before, merge_after;
+	hrtime_t time;
 
 	ASSERT(MUTEX_HELD(&tm->tm_lock));
 	VERIFY(start < end);
 
+	time = gethrtime();
 	tsearch.ts_start = start;
 	tsearch.ts_end = end;
 
@@ -214,6 +223,7 @@ trim_map_segment_add(trim_map_t *tm, uin
 		ts->ts_start = start;
 		ts->ts_end = end;
 		ts->ts_txg = txg;
+		ts->ts_time = time;
 		avl_insert(&tm->tm_queued_frees, ts, where);
 		list_insert_tail(&tm->tm_head, ts);
 	}
@@ -236,6 +246,7 @@ trim_map_segment_remove(trim_map_t *tm, 
 		nts->ts_start = end;
 		nts->ts_end = ts->ts_end;
 		nts->ts_txg = ts->ts_txg;
+		nts->ts_time = ts->ts_time;
 		ts->ts_end = start;
 		avl_insert_here(&tm->tm_queued_frees, nts, ts, AVL_AFTER);
 		list_insert_after(&tm->tm_head, ts, nts);
@@ -359,17 +370,18 @@ trim_map_write_done(zio_t *zio)
 /*
  * Return the oldest segment (the one with the lowest txg) or false if
  * the list is empty or the first element's txg is greater than txg given
- * as function argument.
+ * as function argument, or the first element's time is greater than time
+ * given as function argument
  */
 static trim_seg_t *
-trim_map_first(trim_map_t *tm, uint64_t txg)
+trim_map_first(trim_map_t *tm, uint64_t txg, hrtime_t time)
 {
 	trim_seg_t *ts;
 
 	ASSERT(MUTEX_HELD(&tm->tm_lock));
 
 	ts = list_head(&tm->tm_head);
-	if (ts != NULL && ts->ts_txg <= txg)
+	if (ts != NULL && ts->ts_txg <= txg && ts->ts_time <= time)
 		return (ts);
 	return (NULL);
 }
@@ -380,20 +392,28 @@ trim_map_vdev_commit(spa_t *spa, zio_t *
 	trim_map_t *tm = vd->vdev_trimmap;
 	trim_seg_t *ts;
 	uint64_t start, size, txglimit;
+	hrtime_t timelimit;
 
 	ASSERT(vd->vdev_ops->vdev_op_leaf);
 
 	if (tm == NULL)
 		return;
 
-	txglimit = MIN(spa_last_synced_txg(spa), spa_freeze_txg(spa)) -
-	    trim_txg_limit;
+	if (vd->vdev_isl2cache) {
+		timelimit = gethrtime() - trim_l2arc_limit * NANOSEC;
+		txglimit = UINT64_MAX;
+	} else {
+		timelimit = TIME_MAX;
+		txglimit = MIN(spa_last_synced_txg(spa), spa_freeze_txg(spa)) -
+		    trim_txg_limit;
+	}
 
 	mutex_enter(&tm->tm_lock);
 	/*
-	 * Loop until we send all frees up to the txglimit.
+	 * Loop until we send all frees up to the txglimit
+	 * or time limit if this is a cache device.
 	 */
-	while ((ts = trim_map_first(tm, txglimit)) != NULL) {
+	while ((ts = trim_map_first(tm, txglimit, timelimit)) != NULL) {
 		list_remove(&tm->tm_head, ts);
 		avl_remove(&tm->tm_queued_frees, ts);
 		avl_add(&tm->tm_inflight_frees, ts);


More information about the svn-src-all mailing list