[CFR] Tunables for scrub and resilver
Martin Matuska
mm at FreeBSD.org
Mon Jul 2 06:48:37 UTC 2012
On 1.7.2012 18:04, Pawel Jakub Dawidek wrote:
> On Sun, Jul 01, 2012 at 05:48:16PM +0200, Martin Matuska wrote:
>> Hi,
>>
>> I would like to hear your opinion on the attached patch to add scrub and
>> resilver tunables.
>> This way users can add more priority to scrub and resilver (make it
>> faster) at cost of other I/O etc.
>>
>> On-line version of the patch:
>> http://people.freebsd.org/~mm/patches/zfs/dsl_scan.patch
>>
>> The patch adds tuning for all of the dsl_scan.c tunables, as available
>> in illumos.
>> zfs_resilver_delay and zfs_scrub_delay (resulting in scan_delay) need to
>> be non-negative, otherwise we trigger a kernel assert in pause().
>> Other values are used for timer comparsions and should be safe even if
>> negative (resulting behavior equals a value of zero).
> I had similar patch for some time now:
>
> http://people.freebsd.org/~pjd/patches/dsl_scan.c.patch
>
> The only reason I haven't committed it was that I wasn't sure with
> variables can be safely modified at run-time. If you did the audit and
> you are sure we can make them RW, then I'm fine with the patch (except
> for style issues mentioned below).
>
>> +SYSCTL_INT(_vfs_zfs, OID_AUTO, top_maxinflight, CTLFLAG_RW,
>> + &zfs_top_maxinflight, 0, "Maximum I/Os per top-level vdev");
> Should be four spaces instead of tab.
>
>> - delay(scan_delay);
>> + delay(MAX(scan_delay,0));
> Missing space before comma.
>
> Although maybe we should make it unsigned and use SYSCTL_UINT()?
>
I updated my patch to use unsigned integers, as negative values make
here really no sense.
>From my practical tests, zfs_top_maxinflight should never be zero (or
negative), as this makes the zpool command hang (scrub runs in terms of
bytes/sec).
Changing all other variables to any values on run-time is fine.
Please check the updated patch:
http://people.freebsd.org/~mm/patches/zfs/dsl_scan.patch
--
Martin Matuska
FreeBSD committer
http://blog.vx.sk
-------------- next part --------------
Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c
===================================================================
--- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c (revision 237745)
+++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c (working copy)
@@ -58,16 +58,47 @@
static dsl_syncfunc_t dsl_scan_cancel_sync;
static void dsl_scan_sync_state(dsl_scan_t *, dmu_tx_t *tx);
-int zfs_top_maxinflight = 32; /* maximum I/Os per top-level */
-int zfs_resilver_delay = 2; /* number of ticks to delay resilver */
-int zfs_scrub_delay = 4; /* number of ticks to delay scrub */
-int zfs_scan_idle = 50; /* idle window in clock ticks */
+unsigned int zfs_top_maxinflight = 32; /* maximum I/Os per top-level */
+unsigned int zfs_resilver_delay = 2; /* number of ticks to delay resilver */
+unsigned int zfs_scrub_delay = 4; /* number of ticks to delay scrub */
+unsigned int zfs_scan_idle = 50; /* idle window in clock ticks */
-int zfs_scan_min_time_ms = 1000; /* min millisecs to scrub per txg */
-int zfs_free_min_time_ms = 1000; /* min millisecs to free per txg */
-int zfs_resilver_min_time_ms = 3000; /* min millisecs to resilver per txg */
+unsigned int zfs_scan_min_time_ms = 1000; /* min millisecs to scrub per txg */
+unsigned int zfs_free_min_time_ms = 1000; /* min millisecs to free per txg */
+unsigned int zfs_resilver_min_time_ms = 3000; /* min millisecs to resilver
+ per txg */
boolean_t zfs_no_scrub_io = B_FALSE; /* set to disable scrub i/o */
boolean_t zfs_no_scrub_prefetch = B_FALSE; /* set to disable srub prefetching */
+
+SYSCTL_DECL(_vfs_zfs);
+TUNABLE_INT("vfs.zfs.top_maxinflight", &zfs_top_maxinflight);
+SYSCTL_UINT(_vfs_zfs, OID_AUTO, top_maxinflight, CTLFLAG_RW,
+ &zfs_top_maxinflight, 0, "Maximum I/Os per top-level vdev");
+TUNABLE_INT("vfs.zfs.resilver_delay", &zfs_resilver_delay);
+SYSCTL_UINT(_vfs_zfs, OID_AUTO, resilver_delay, CTLFLAG_RW,
+ &zfs_resilver_delay, 0, "Number of ticks to delay resilver");
+TUNABLE_INT("vfs.zfs.scrub_delay", &zfs_scrub_delay);
+SYSCTL_UINT(_vfs_zfs, OID_AUTO, scrub_delay, CTLFLAG_RW,
+ &zfs_scrub_delay, 0, "Number of ticks to delay scrub");
+TUNABLE_INT("vfs.zfs.scan_idle", &zfs_scan_idle);
+SYSCTL_UINT(_vfs_zfs, OID_AUTO, scan_idle, CTLFLAG_RW,
+ &zfs_scan_idle, 0, "Idle scan window in clock ticks");
+TUNABLE_INT("vfs.zfs.scan_min_time_ms", &zfs_scan_min_time_ms);
+SYSCTL_UINT(_vfs_zfs, OID_AUTO, scan_min_time_ms, CTLFLAG_RW,
+ &zfs_scan_min_time_ms, 0, "Min millisecs to scrub per txg");
+TUNABLE_INT("vfs.zfs.free_min_time_ms", &zfs_free_min_time_ms);
+SYSCTL_UINT(_vfs_zfs, OID_AUTO, free_min_time_ms, CTLFLAG_RW,
+ &zfs_free_min_time_ms, 0, "Min millisecs to free per txg");
+TUNABLE_INT("vfs.zfs.resilver_min_time_ms", &zfs_resilver_min_time_ms);
+SYSCTL_UINT(_vfs_zfs, OID_AUTO, resilver_min_time_ms, CTLFLAG_RW,
+ &zfs_resilver_min_time_ms, 0, "Min millisecs to resilver per txg");
+TUNABLE_INT("vfs.zfs.no_scrub_io", &zfs_no_scrub_io);
+SYSCTL_INT(_vfs_zfs, OID_AUTO, no_scrub_io, CTLFLAG_RW,
+ &zfs_no_scrub_io, 0, "Disable scrub I/O");
+TUNABLE_INT("vfs.zfs.no_scrub_prefetch", &zfs_no_scrub_prefetch);
+SYSCTL_INT(_vfs_zfs, OID_AUTO, no_scrub_prefetch, CTLFLAG_RW,
+ &zfs_no_scrub_prefetch, 0, "Disable scrub prefetching");
+
enum ddt_class zfs_scrub_ddt_class_max = DDT_CLASS_DUPLICATE;
#define DSL_SCAN_IS_SCRUB_RESILVER(scn) \
@@ -405,7 +436,7 @@
dsl_scan_check_pause(dsl_scan_t *scn, const zbookmark_t *zb)
{
uint64_t elapsed_nanosecs;
- int mintime;
+ unsigned int mintime;
/* we never skip user/group accounting objects */
if (zb && (int64_t)zb->zb_object < 0)
@@ -1638,7 +1669,7 @@
boolean_t needs_io;
int zio_flags = ZIO_FLAG_SCAN_THREAD | ZIO_FLAG_RAW | ZIO_FLAG_CANFAIL;
int zio_priority;
- int scan_delay = 0;
+ unsigned int scan_delay = 0;
if (phys_birth <= scn->scn_phys.scn_min_txg ||
phys_birth >= scn->scn_phys.scn_max_txg)
@@ -1695,7 +1726,8 @@
if (needs_io && !zfs_no_scrub_io) {
vdev_t *rvd = spa->spa_root_vdev;
- uint64_t maxinflight = rvd->vdev_children * zfs_top_maxinflight;
+ uint64_t maxinflight = rvd->vdev_children *
+ MAX(zfs_top_maxinflight, 1);
void *data = zio_data_buf_alloc(size);
mutex_enter(&spa->spa_scrub_lock);
@@ -1709,7 +1741,7 @@
* then throttle our workload to limit the impact of a scan.
*/
if (ddi_get_lbolt64() - spa->spa_last_io <= zfs_scan_idle)
- delay(scan_delay);
+ delay(MAX((int)scan_delay, 0));
zio_nowait(zio_read(NULL, spa, bp, data, size,
dsl_scan_scrub_done, NULL, zio_priority,
More information about the zfs-devel
mailing list