svn commit: r315290 - in vendor/illumos/dist: cmd/zdb cmd/ztest lib/libzpool/common lib/libzpool/common/sys
Josh Paetzel
jpaetzel at FreeBSD.org
Wed Mar 15 04:16:09 UTC 2017
Author: jpaetzel
Date: Wed Mar 15 04:16:08 2017
New Revision: 315290
URL: https://svnweb.freebsd.org/changeset/base/315290
Log:
7303 dynamic metaslab selection
illumos/illumos-gate at 8363e80ae72609660f6090766ca8c2c18aa53f0c
https://github.com/illumos/illumos-gate/commit/8363e80ae72609660f6090766ca8c2c18aa53f0
https://www.illumos.org/issues/7303
This change introduces a new weighting algorithm to improve metaslab selection.
The new weighting algorithm relies on the SPACEMAP_HISTOGRAM feature. As a result,
the metaslab weight now encodes the type of weighting algorithm used
(size-based vs segment-based).
This also introduce a new allocation tracing facility and two new dcmds to help
debug allocation problems. Each zio now contains a zio_alloc_list_t structure
that is populated as the zio goes through the allocations stage. Here's an
example of how to use the tracing facility:
> c5ec000::print zio_t io_alloc_list | ::walk list | ::metaslab_trace
MSID DVA ASIZE WEIGHT RESULT VDEV
- 0 400 0 NOT_ALLOCATABLE ztest.0a
- 0 400 0 NOT_ALLOCATABLE ztest.0a
- 0 400 0 ENOSPC ztest.0a
- 0 200 0 NOT_ALLOCATABLE ztest.0a
- 0 200 0 NOT_ALLOCATABLE ztest.0a
- 0 200 0 ENOSPC ztest.0a
1 0 400 1 x 8M 17b1a00 ztest.0a
> 1ff2400::print zio_t io_alloc_list | ::walk list | ::metaslab_trace
MSID DVA ASIZE WEIGHT RESULT VDEV
- 0 200 0 NOT_ALLOCATABLE mirror-2
- 0 200 0 NOT_ALLOCATABLE mirror-0
1 0 200 1 x 4M 112ae00 mirror-1
- 1 200 0 NOT_ALLOCATABLE mirror-2
- 1 200 0 NOT_ALLOCATABLE mirror-0
1 1 200 1 x 4M 112b000 mirror-1
- 2 200 0 NOT_ALLOCATABLE mirror-2
If the metaslab is using segment-based weighting then the WEIGHT column will
display the number of segments available in the bucket where the allocation
attempt was made.
Author: George Wilson <george.wilson at delphix.com>
Reviewed by: Alex Reece <alex at delphix.com>
Reviewed by: Chris Siden <christopher.siden at delphix.com>
Reviewed by: Dan Kimmel <dan.kimmel at delphix.com>
Reviewed by: Matthew Ahrens <mahrens at delphix.com>
Reviewed by: Paul Dagnelie <paul.dagnelie at delphix.com>
Reviewed by: Pavel Zakharov <pavel.zakharov at delphix.com>
Reviewed by: Prakash Surya <prakash.surya at delphix.com>
Reviewed by: Don Brady <don.brady at intel.com>
Approved by: Richard Lowe <richlowe at richlowe.net>
Modified:
vendor/illumos/dist/cmd/zdb/zdb.c
vendor/illumos/dist/cmd/ztest/ztest.c
vendor/illumos/dist/lib/libzpool/common/kernel.c
vendor/illumos/dist/lib/libzpool/common/sys/zfs_context.h
Modified: vendor/illumos/dist/cmd/zdb/zdb.c
==============================================================================
--- vendor/illumos/dist/cmd/zdb/zdb.c Wed Mar 15 00:29:27 2017 (r315289)
+++ vendor/illumos/dist/cmd/zdb/zdb.c Wed Mar 15 04:16:08 2017 (r315290)
@@ -2562,10 +2562,21 @@ zdb_leak_init(spa_t *spa, zdb_cb_t *zcb)
if (!dump_opt['L']) {
vdev_t *rvd = spa->spa_root_vdev;
+
+ /*
+ * We are going to be changing the meaning of the metaslab's
+ * ms_tree. Ensure that the allocator doesn't try to
+ * use the tree.
+ */
+ spa->spa_normal_class->mc_ops = &zdb_metaslab_ops;
+ spa->spa_log_class->mc_ops = &zdb_metaslab_ops;
+
for (uint64_t c = 0; c < rvd->vdev_children; c++) {
vdev_t *vd = rvd->vdev_child[c];
+ metaslab_group_t *mg = vd->vdev_mg;
for (uint64_t m = 0; m < vd->vdev_ms_count; m++) {
metaslab_t *msp = vd->vdev_ms[m];
+ ASSERT3P(msp->ms_group, ==, mg);
mutex_enter(&msp->ms_lock);
metaslab_unload(msp);
@@ -2586,8 +2597,6 @@ zdb_leak_init(spa_t *spa, zdb_cb_t *zcb)
(longlong_t)m,
(longlong_t)vd->vdev_ms_count);
- msp->ms_ops = &zdb_metaslab_ops;
-
/*
* We don't want to spend the CPU
* manipulating the size-ordered
@@ -2597,7 +2606,10 @@ zdb_leak_init(spa_t *spa, zdb_cb_t *zcb)
msp->ms_tree->rt_ops = NULL;
VERIFY0(space_map_load(msp->ms_sm,
msp->ms_tree, SM_ALLOC));
- msp->ms_loaded = B_TRUE;
+
+ if (!msp->ms_loaded) {
+ msp->ms_loaded = B_TRUE;
+ }
}
mutex_exit(&msp->ms_lock);
}
@@ -2619,8 +2631,10 @@ zdb_leak_fini(spa_t *spa)
vdev_t *rvd = spa->spa_root_vdev;
for (int c = 0; c < rvd->vdev_children; c++) {
vdev_t *vd = rvd->vdev_child[c];
+ metaslab_group_t *mg = vd->vdev_mg;
for (int m = 0; m < vd->vdev_ms_count; m++) {
metaslab_t *msp = vd->vdev_ms[m];
+ ASSERT3P(mg, ==, msp->ms_group);
mutex_enter(&msp->ms_lock);
/*
@@ -2634,7 +2648,10 @@ zdb_leak_fini(spa_t *spa)
* from the ms_tree.
*/
range_tree_vacate(msp->ms_tree, zdb_leak, vd);
- msp->ms_loaded = B_FALSE;
+
+ if (msp->ms_loaded) {
+ msp->ms_loaded = B_FALSE;
+ }
mutex_exit(&msp->ms_lock);
}
Modified: vendor/illumos/dist/cmd/ztest/ztest.c
==============================================================================
--- vendor/illumos/dist/cmd/ztest/ztest.c Wed Mar 15 00:29:27 2017 (r315289)
+++ vendor/illumos/dist/cmd/ztest/ztest.c Wed Mar 15 04:16:08 2017 (r315290)
@@ -171,7 +171,7 @@ static const ztest_shared_opts_t ztest_o
.zo_mirrors = 2,
.zo_raidz = 4,
.zo_raidz_parity = 1,
- .zo_vdev_size = SPA_MINDEVSIZE * 2,
+ .zo_vdev_size = SPA_MINDEVSIZE * 4, /* 256m default size */
.zo_datasets = 7,
.zo_threads = 23,
.zo_passtime = 60, /* 60 seconds */
Modified: vendor/illumos/dist/lib/libzpool/common/kernel.c
==============================================================================
--- vendor/illumos/dist/lib/libzpool/common/kernel.c Wed Mar 15 00:29:27 2017 (r315289)
+++ vendor/illumos/dist/lib/libzpool/common/kernel.c Wed Mar 15 04:16:08 2017 (r315290)
@@ -94,6 +94,11 @@ kstat_create(const char *module, int ins
/*ARGSUSED*/
void
+kstat_named_init(kstat_named_t *knp, const char *name, uchar_t type)
+{}
+
+/*ARGSUSED*/
+void
kstat_install(kstat_t *ksp)
{}
Modified: vendor/illumos/dist/lib/libzpool/common/sys/zfs_context.h
==============================================================================
--- vendor/illumos/dist/lib/libzpool/common/sys/zfs_context.h Wed Mar 15 00:29:27 2017 (r315289)
+++ vendor/illumos/dist/lib/libzpool/common/sys/zfs_context.h Wed Mar 15 04:16:08 2017 (r315290)
@@ -301,6 +301,7 @@ extern void cv_broadcast(kcondvar_t *cv)
*/
extern kstat_t *kstat_create(const char *, int,
const char *, const char *, uchar_t, ulong_t, uchar_t);
+extern void kstat_named_init(kstat_named_t *, const char *, uchar_t);
extern void kstat_install(kstat_t *);
extern void kstat_delete(kstat_t *);
extern void kstat_waitq_enter(kstat_io_t *);
More information about the svn-src-all
mailing list