svn commit: r204154 -
user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs
Kip Macy
kmacy at FreeBSD.org
Sat Feb 20 23:48:04 UTC 2010
Author: kmacy
Date: Sat Feb 20 23:48:04 2010
New Revision: 204154
URL: http://svn.freebsd.org/changeset/base/204154
Log:
- import latest L2ARC changes from OpenSolaris
- export L2ARC performance tunables as sysctls to permit run-time tuning
Modified:
user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
Modified: user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
==============================================================================
--- user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c Sat Feb 20 23:42:24 2010 (r204153)
+++ user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c Sat Feb 20 23:48:04 2010 (r204154)
@@ -189,6 +189,8 @@ extern kmem_cache_t *zio_buf_cache[];
extern kmem_cache_t *zio_data_buf_cache[];
#endif
+#define ddi_get_lbolt() (LBOLT)
+
/*
* Note that buffers can be in one of 6 states:
* ARC_anon - anonymous (discussed below)
@@ -567,8 +569,9 @@ uint64_t zfs_crc64_table[256];
*/
#define L2ARC_WRITE_SIZE (8 * 1024 * 1024) /* initial write max */
-#define L2ARC_HEADROOM 4 /* num of writes */
-#define L2ARC_FEED_SECS 1 /* caching interval */
+#define L2ARC_HEADROOM 2 /* num of writes */
+#define L2ARC_FEED_SECS 1 /* caching interval secs */
+#define L2ARC_FEED_MIN_MS 200 /* min caching interval ms */
#define l2arc_writes_sent ARCSTAT(arcstat_l2_writes_sent)
#define l2arc_writes_done ARCSTAT(arcstat_l2_writes_done)
@@ -580,7 +583,29 @@ uint64_t l2arc_write_max = L2ARC_WRITE_S
uint64_t l2arc_write_boost = L2ARC_WRITE_SIZE; /* extra write during warmup */
uint64_t l2arc_headroom = L2ARC_HEADROOM; /* number of dev writes */
uint64_t l2arc_feed_secs = L2ARC_FEED_SECS; /* interval seconds */
+uint64_t l2arc_feed_min_ms = L2ARC_FEED_MIN_MS; /* min interval milliseconds */
boolean_t l2arc_noprefetch = B_TRUE; /* don't cache prefetch bufs */
+boolean_t l2arc_feed_again = B_TRUE; /* turbo warmup */
+boolean_t l2arc_norw = B_TRUE; /* no reads during writes */
+
+SYSCTL_QUAD(_vfs_zfs, OID_AUTO, l2arc_write_max, CTLFLAG_RW,
+ &l2arc_write_max, 0, "max write size");
+SYSCTL_QUAD(_vfs_zfs, OID_AUTO, l2arc_write_boost, CTLFLAG_RW,
+ &l2arc_write_boost, 0, "extra write during warmup");
+SYSCTL_QUAD(_vfs_zfs, OID_AUTO, l2arc_headroom, CTLFLAG_RW,
+ &l2arc_headroom, 0, "number of dev writes");
+SYSCTL_QUAD(_vfs_zfs, OID_AUTO, l2arc_feed_secs, CTLFLAG_RW,
+ &l2arc_feed_secs, 0, "interval seconds");
+SYSCTL_QUAD(_vfs_zfs, OID_AUTO, l2arc_feed_min_ms, CTLFLAG_RW,
+ &l2arc_feed_min_ms, 0, "min interval milliseconds");
+
+SYSCTL_INT(_vfs_zfs, OID_AUTO, l2arc_noprefetch, CTLFLAG_RW,
+ &l2arc_noprefetch, 0, "don't cache prefetch bufs");
+SYSCTL_INT(_vfs_zfs, OID_AUTO, l2arc_feed_again, CTLFLAG_RW,
+ &l2arc_feed_again, 0, "turb warmup");
+SYSCTL_INT(_vfs_zfs, OID_AUTO, l2arc_norw, CTLFLAG_RW,
+ &l2arc_norw, 0, "no reads during writes");
+
/*
* L2ARC Internals
@@ -3891,8 +3916,70 @@ arc_fini(void)
*
* Tunables may be removed or added as future performance improvements are
* integrated, and also may become zpool properties.
+ *
+ * There are three key functions that control how the L2ARC warms up:
+ *
+ * l2arc_write_eligible() check if a buffer is eligible to cache
+ * l2arc_write_size() calculate how much to write
+ * l2arc_write_interval() calculate sleep delay between writes
+ *
+ * These three functions determine what to write, how much, and how quickly
+ * to send writes.
*/
+static boolean_t
+l2arc_write_eligible(spa_t *spa, arc_buf_hdr_t *ab)
+{
+ /*
+ * A buffer is *not* eligible for the L2ARC if it:
+ * 1. belongs to a different spa.
+ * 2. is already cached on the L2ARC.
+ * 3. has an I/O in progress (it may be an incomplete read).
+ * 4. is flagged not eligible (zfs property).
+ */
+ if (ab->b_spa != spa || ab->b_l2hdr != NULL ||
+ HDR_IO_IN_PROGRESS(ab) || !HDR_L2CACHE(ab))
+ return (B_FALSE);
+
+ return (B_TRUE);
+}
+
+static uint64_t
+l2arc_write_size(l2arc_dev_t *dev)
+{
+ uint64_t size;
+
+ size = dev->l2ad_write;
+
+ if (arc_warm == B_FALSE)
+ size += dev->l2ad_boost;
+
+ return (size);
+
+}
+
+static clock_t
+l2arc_write_interval(clock_t began, uint64_t wanted, uint64_t wrote)
+{
+ clock_t interval, next, now;
+
+ /*
+ * If the ARC lists are busy, increase our write rate; if the
+ * lists are stale, idle back. This is achieved by checking
+ * how much we previously wrote - if it was more than half of
+ * what we wanted, schedule the next write much sooner.
+ */
+ if (l2arc_feed_again && wrote > (wanted / 2))
+ interval = (hz * l2arc_feed_min_ms) / 1000;
+ else
+ interval = hz * l2arc_feed_secs;
+
+ now = ddi_get_lbolt();
+ next = MAX(now, MIN(now + interval, began + interval));
+
+ return (next);
+}
+
static void
l2arc_hdr_stat_add(void)
{
@@ -4313,7 +4400,7 @@ top:
* An ARC_L2_WRITING flag is set so that the L2ARC buffers are not valid
* for reading until they have completed writing.
*/
-static void
+static uint64_t
l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
{
arc_buf_hdr_t *ab, *ab_prev, *head;
@@ -4379,20 +4466,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_de
break;
}
- if (ab->b_spa != spa) {
- mutex_exit(hash_lock);
- continue;
- }
-
- if (ab->b_l2hdr != NULL) {
- /*
- * Already in L2ARC.
- */
- mutex_exit(hash_lock);
- continue;
- }
-
- if (HDR_IO_IN_PROGRESS(ab) || !HDR_L2CACHE(ab)) {
+ if (!l2arc_write_eligible(spa, ab)) {
mutex_exit(hash_lock);
continue;
}
@@ -4403,12 +4477,6 @@ l2arc_write_buffers(spa_t *spa, l2arc_de
break;
}
- if (ab->b_buf == NULL) {
- DTRACE_PROBE1(l2arc__buf__null, void *, ab);
- mutex_exit(hash_lock);
- continue;
- }
-
if (pio == NULL) {
/*
* Insert a dummy header on the buflist so
@@ -4475,7 +4543,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_de
if (pio == NULL) {
ASSERT3U(write_sz, ==, 0);
kmem_cache_free(hdr_cache, head);
- return;
+ return (0);
}
ASSERT3U(write_sz, <=, target_sz);
@@ -4496,6 +4564,8 @@ l2arc_write_buffers(spa_t *spa, l2arc_de
}
(void) zio_wait(pio);
+
+ return (write_sz);
}
/*
@@ -4508,20 +4578,19 @@ l2arc_feed_thread(void *dummy __unused)
callb_cpr_t cpr;
l2arc_dev_t *dev;
spa_t *spa;
- uint64_t size;
+ uint64_t size, wrote;
+ clock_t begin, next = ddi_get_lbolt();
CALLB_CPR_INIT(&cpr, &l2arc_feed_thr_lock, callb_generic_cpr, FTAG);
mutex_enter(&l2arc_feed_thr_lock);
while (l2arc_thread_exit == 0) {
- /*
- * Pause for l2arc_feed_secs seconds between writes.
- */
CALLB_CPR_SAFE_BEGIN(&cpr);
(void) cv_timedwait(&l2arc_feed_thr_cv, &l2arc_feed_thr_lock,
- hz * l2arc_feed_secs);
+ next);
CALLB_CPR_SAFE_END(&cpr, &l2arc_feed_thr_lock);
+ next = ddi_get_lbolt() + hz;
/*
* Quick check for L2ARC devices.
@@ -4532,6 +4601,7 @@ l2arc_feed_thread(void *dummy __unused)
continue;
}
mutex_exit(&l2arc_dev_mtx);
+ begin = ddi_get_lbolt();
/*
* This selects the next l2arc device to write to, and in
@@ -4560,9 +4630,7 @@ l2arc_feed_thread(void *dummy __unused)
ARCSTAT_BUMP(arcstat_l2_feeds);
- size = dev->l2ad_write;
- if (arc_warm == B_FALSE)
- size += dev->l2ad_boost;
+ size = l2arc_write_size(dev);
/*
* Evict L2ARC buffers that will be overwritten.
@@ -4572,7 +4640,12 @@ l2arc_feed_thread(void *dummy __unused)
/*
* Write ARC buffers.
*/
- l2arc_write_buffers(spa, dev, size);
+ wrote = l2arc_write_buffers(spa, dev, size);
+
+ /*
+ * Calculate interval between writes.
+ */
+ next = l2arc_write_interval(begin, size, wrote);
spa_config_exit(spa, SCL_L2ARC, dev);
}
More information about the svn-src-user
mailing list