svn commit: r354941 - in head: cddl/contrib/opensolaris/cmd/zdb cddl/contrib/opensolaris/cmd/zfs cddl/contrib/opensolaris/cmd/zpool cddl/contrib/opensolaris/cmd/ztest cddl/contrib/opensolaris/lib/l...
Andriy Gapon
avg at FreeBSD.org
Thu Nov 21 08:20:07 UTC 2019
Author: avg
Date: Thu Nov 21 08:20:05 2019
New Revision: 354941
URL: https://svnweb.freebsd.org/changeset/base/354941
Log:
MFV r354382,r354385: 10601 10757 Pool allocation classes
illumos/illumos-gate at 663207adb1669640c01c5ec6949ce78fd806efae
https://github.com/illumos/illumos-gate/commit/663207adb1669640c01c5ec6949ce78fd806efae
10601 Pool allocation classes
https://www.illumos.org/issues/10601
illumos port of ZoL Pool allocation classes. Includes at least these two
commits:
441709695 Pool allocation classes misplacing small file blocks
cc99f275a Pool allocation classes
10757 Add -gLp to zpool subcommands for alt vdev names
https://www.illumos.org/issues/10757
Port from ZoL of
d2f3e292d Add -gLp to zpool subcommands for alt vdev names
Note that a subsequent ZoL commit changed -p to -P
a77f29f93 Change full path subcommand flag from -p to -P
Portions contributed by: Jerry Jelinek <jerry.jelinek at joyent.com>
Portions contributed by: Håkan Johansson <f96hajo at chalmers.se>
Portions contributed by: Richard Yao <ryao at gentoo.org>
Portions contributed by: Chunwei Chen <david.chen at nutanix.com>
Portions contributed by: loli10K <ezomori.nozomu at gmail.com>
Author: Don Brady <don.brady at delphix.com>
11541 allocation_classes feature must be enabled to add log device
illumos/illumos-gate at c1064fd7ce62fe763a4475e9988ffea3b22137de
https://github.com/illumos/illumos-gate/commit/c1064fd7ce62fe763a4475e9988ffea3b22137de
https://www.illumos.org/issues/11541
After the allocation_classes feature was integrated, one can no longer add a
log device to a pool unless that feature is enabled. There is an explicit check
for this, but it is unnecessary in the case of log devices, so we should handle
this better instead of forcing the feature to be enabled.
Author: Jerry Jelinek <jerry.jelinek at joyent.com>
FreeBSD notes.
I faithfully added the new -g, -L, -P flags, but only -g does something:
vdev GUIDs are displayed instead of device names. -L, resolve symlinks,
and -P, display full disk paths, do nothing at the moment.
The use of special vdevs is backward compatible for read-only access, so
root pools should be bootable, but exercise caution.
MFC after: 4 weeks
Modified:
head/cddl/contrib/opensolaris/cmd/zdb/zdb.c
head/cddl/contrib/opensolaris/cmd/zfs/zfs.8
head/cddl/contrib/opensolaris/cmd/zpool/zpool-features.7
head/cddl/contrib/opensolaris/cmd/zpool/zpool.8
head/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c
head/cddl/contrib/opensolaris/cmd/zpool/zpool_vdev.c
head/cddl/contrib/opensolaris/cmd/ztest/ztest.c
head/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h
head/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_dataset.c
head/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c
head/cddl/contrib/opensolaris/lib/libzpool/common/util.c
head/stand/libsa/zfs/zfsimpl.c
head/sys/cddl/contrib/opensolaris/common/zfs/zfeature_common.c
head/sys/cddl/contrib/opensolaris/common/zfs/zfeature_common.h
head/sys/cddl/contrib/opensolaris/common/zfs/zfs_prop.c
head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c
head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c
head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c
head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c
head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h
head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_objset.h
head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab.h
head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h
head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h
head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h
head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h
head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h
head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c
head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c
head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_removal.c
head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c
head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c
head/sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h
Directory Properties:
head/cddl/contrib/opensolaris/ (props changed)
head/cddl/contrib/opensolaris/cmd/zdb/ (props changed)
head/cddl/contrib/opensolaris/lib/libzfs/ (props changed)
head/sys/cddl/contrib/opensolaris/ (props changed)
Modified: head/cddl/contrib/opensolaris/cmd/zdb/zdb.c
==============================================================================
--- head/cddl/contrib/opensolaris/cmd/zdb/zdb.c Thu Nov 21 07:48:03 2019 (r354940)
+++ head/cddl/contrib/opensolaris/cmd/zdb/zdb.c Thu Nov 21 08:20:05 2019 (r354941)
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2017 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
* Copyright (c) 2014 Integros [integros.com]
* Copyright 2017 Nexenta Systems, Inc.
* Copyright (c) 2017, 2018 Lawrence Livermore National Security, LLC.
@@ -931,13 +931,23 @@ dump_metaslab(metaslab_t *msp)
static void
print_vdev_metaslab_header(vdev_t *vd)
{
- (void) printf("\tvdev %10llu\n\t%-10s%5llu %-19s %-15s %-10s\n",
- (u_longlong_t)vd->vdev_id,
+ vdev_alloc_bias_t alloc_bias = vd->vdev_alloc_bias;
+ const char *bias_str;
+
+ bias_str = (alloc_bias == VDEV_BIAS_LOG || vd->vdev_islog) ?
+ VDEV_ALLOC_BIAS_LOG :
+ (alloc_bias == VDEV_BIAS_SPECIAL) ? VDEV_ALLOC_BIAS_SPECIAL :
+ (alloc_bias == VDEV_BIAS_DEDUP) ? VDEV_ALLOC_BIAS_DEDUP :
+ vd->vdev_islog ? "log" : "";
+
+ (void) printf("\tvdev %10llu %s\n"
+ "\t%-10s%5llu %-19s %-15s %-12s\n",
+ (u_longlong_t)vd->vdev_id, bias_str,
"metaslabs", (u_longlong_t)vd->vdev_ms_count,
"offset", "spacemap", "free");
- (void) printf("\t%15s %19s %15s %10s\n",
+ (void) printf("\t%15s %19s %15s %12s\n",
"---------------", "-------------------",
- "---------------", "-------------");
+ "---------------", "------------");
}
static void
@@ -953,7 +963,7 @@ dump_metaslab_groups(spa_t *spa)
vdev_t *tvd = rvd->vdev_child[c];
metaslab_group_t *mg = tvd->vdev_mg;
- if (mg->mg_class != mc)
+ if (mg == NULL || mg->mg_class != mc)
continue;
metaslab_group_histogram_verify(mg);
@@ -2807,6 +2817,7 @@ typedef struct zdb_blkstats {
uint64_t zb_count;
uint64_t zb_gangs;
uint64_t zb_ditto_samevdev;
+ uint64_t zb_ditto_same_ms;
uint64_t zb_psize_histogram[PSIZE_HISTO_SIZE];
} zdb_blkstats_t;
@@ -2846,6 +2857,16 @@ typedef struct zdb_cb {
uint32_t **zcb_vd_obsolete_counts;
} zdb_cb_t;
+/* test if two DVA offsets from same vdev are within the same metaslab */
+static boolean_t
+same_metaslab(spa_t *spa, uint64_t vdev, uint64_t off1, uint64_t off2)
+{
+ vdev_t *vd = vdev_lookup_top(spa, vdev);
+ uint64_t ms_shift = vd->vdev_ms_shift;
+
+ return ((off1 >> ms_shift) == (off2 >> ms_shift));
+}
+
static void
zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
dmu_object_type_t type)
@@ -2857,6 +2878,8 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const b
if (zilog && zil_bp_tree_add(zilog, bp) != 0)
return;
+ spa_config_enter(zcb->zcb_spa, SCL_CONFIG, FTAG, RW_READER);
+
for (int i = 0; i < 4; i++) {
int l = (i < 2) ? BP_GET_LEVEL(bp) : ZB_TOTAL;
int t = (i & 1) ? type : ZDB_OT_TOTAL;
@@ -2882,8 +2905,15 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const b
switch (BP_GET_NDVAS(bp)) {
case 2:
if (DVA_GET_VDEV(&bp->blk_dva[0]) ==
- DVA_GET_VDEV(&bp->blk_dva[1]))
+ DVA_GET_VDEV(&bp->blk_dva[1])) {
zb->zb_ditto_samevdev++;
+
+ if (same_metaslab(zcb->zcb_spa,
+ DVA_GET_VDEV(&bp->blk_dva[0]),
+ DVA_GET_OFFSET(&bp->blk_dva[0]),
+ DVA_GET_OFFSET(&bp->blk_dva[1])))
+ zb->zb_ditto_same_ms++;
+ }
break;
case 3:
equal = (DVA_GET_VDEV(&bp->blk_dva[0]) ==
@@ -2892,13 +2922,37 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const b
DVA_GET_VDEV(&bp->blk_dva[2])) +
(DVA_GET_VDEV(&bp->blk_dva[1]) ==
DVA_GET_VDEV(&bp->blk_dva[2]));
- if (equal != 0)
+ if (equal != 0) {
zb->zb_ditto_samevdev++;
+
+ if (DVA_GET_VDEV(&bp->blk_dva[0]) ==
+ DVA_GET_VDEV(&bp->blk_dva[1]) &&
+ same_metaslab(zcb->zcb_spa,
+ DVA_GET_VDEV(&bp->blk_dva[0]),
+ DVA_GET_OFFSET(&bp->blk_dva[0]),
+ DVA_GET_OFFSET(&bp->blk_dva[1])))
+ zb->zb_ditto_same_ms++;
+ else if (DVA_GET_VDEV(&bp->blk_dva[0]) ==
+ DVA_GET_VDEV(&bp->blk_dva[2]) &&
+ same_metaslab(zcb->zcb_spa,
+ DVA_GET_VDEV(&bp->blk_dva[0]),
+ DVA_GET_OFFSET(&bp->blk_dva[0]),
+ DVA_GET_OFFSET(&bp->blk_dva[2])))
+ zb->zb_ditto_same_ms++;
+ else if (DVA_GET_VDEV(&bp->blk_dva[1]) ==
+ DVA_GET_VDEV(&bp->blk_dva[2]) &&
+ same_metaslab(zcb->zcb_spa,
+ DVA_GET_VDEV(&bp->blk_dva[1]),
+ DVA_GET_OFFSET(&bp->blk_dva[1]),
+ DVA_GET_OFFSET(&bp->blk_dva[2])))
+ zb->zb_ditto_same_ms++;
+ }
break;
}
-
}
+ spa_config_exit(zcb->zcb_spa, SCL_CONFIG, FTAG);
+
if (BP_IS_EMBEDDED(bp)) {
zcb->zcb_embedded_blocks[BPE_GET_ETYPE(bp)]++;
zcb->zcb_embedded_histogram[BPE_GET_ETYPE(bp)]
@@ -3709,6 +3763,7 @@ dump_block_stats(spa_t *spa)
uint64_t norm_alloc, norm_space, total_alloc, total_found;
int flags = TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | TRAVERSE_HARD;
boolean_t leaks = B_FALSE;
+ int err;
bzero(&zcb, sizeof (zcb));
(void) printf("\nTraversing all blocks %s%s%s%s%s...\n\n",
@@ -3751,8 +3806,10 @@ dump_block_stats(spa_t *spa)
flags |= TRAVERSE_PREFETCH_DATA;
zcb.zcb_totalasize = metaslab_class_get_alloc(spa_normal_class(spa));
+ zcb.zcb_totalasize += metaslab_class_get_alloc(spa_special_class(spa));
+ zcb.zcb_totalasize += metaslab_class_get_alloc(spa_dedup_class(spa));
zcb.zcb_start = zcb.zcb_lastprint = gethrtime();
- zcb.zcb_haderrors |= traverse_pool(spa, 0, flags, zdb_blkptr_cb, &zcb);
+ err = traverse_pool(spa, 0, flags, zdb_blkptr_cb, &zcb);
/*
* If we've traversed the data blocks then we need to wait for those
@@ -3768,6 +3825,12 @@ dump_block_stats(spa_t *spa)
}
}
+ /*
+ * Done after zio_wait() since zcb_haderrors is modified in
+ * zdb_blkptr_done()
+ */
+ zcb.zcb_haderrors |= err;
+
if (zcb.zcb_haderrors) {
(void) printf("\nError counts:\n\n");
(void) printf("\t%5s %s\n", "errno", "count");
@@ -3789,7 +3852,10 @@ dump_block_stats(spa_t *spa)
norm_alloc = metaslab_class_get_alloc(spa_normal_class(spa));
norm_space = metaslab_class_get_space(spa_normal_class(spa));
- total_alloc = norm_alloc + metaslab_class_get_alloc(spa_log_class(spa));
+ total_alloc = norm_alloc +
+ metaslab_class_get_alloc(spa_log_class(spa)) +
+ metaslab_class_get_alloc(spa_special_class(spa)) +
+ metaslab_class_get_alloc(spa_dedup_class(spa));
total_found = tzb->zb_asize - zcb.zcb_dedup_asize +
zcb.zcb_removing_size + zcb.zcb_checkpoint_size;
@@ -3811,31 +3877,50 @@ dump_block_stats(spa_t *spa)
return (2);
(void) printf("\n");
- (void) printf("\tbp count: %10llu\n",
+ (void) printf("\t%-16s %14llu\n", "bp count:",
(u_longlong_t)tzb->zb_count);
- (void) printf("\tganged count: %10llu\n",
+ (void) printf("\t%-16s %14llu\n", "ganged count:",
(longlong_t)tzb->zb_gangs);
- (void) printf("\tbp logical: %10llu avg: %6llu\n",
+ (void) printf("\t%-16s %14llu avg: %6llu\n", "bp logical:",
(u_longlong_t)tzb->zb_lsize,
(u_longlong_t)(tzb->zb_lsize / tzb->zb_count));
- (void) printf("\tbp physical: %10llu avg:"
- " %6llu compression: %6.2f\n",
- (u_longlong_t)tzb->zb_psize,
+ (void) printf("\t%-16s %14llu avg: %6llu compression: %6.2f\n",
+ "bp physical:", (u_longlong_t)tzb->zb_psize,
(u_longlong_t)(tzb->zb_psize / tzb->zb_count),
(double)tzb->zb_lsize / tzb->zb_psize);
- (void) printf("\tbp allocated: %10llu avg:"
- " %6llu compression: %6.2f\n",
- (u_longlong_t)tzb->zb_asize,
+ (void) printf("\t%-16s %14llu avg: %6llu compression: %6.2f\n",
+ "bp allocated:", (u_longlong_t)tzb->zb_asize,
(u_longlong_t)(tzb->zb_asize / tzb->zb_count),
(double)tzb->zb_lsize / tzb->zb_asize);
- (void) printf("\tbp deduped: %10llu ref>1:"
- " %6llu deduplication: %6.2f\n",
- (u_longlong_t)zcb.zcb_dedup_asize,
+ (void) printf("\t%-16s %14llu ref>1: %6llu deduplication: %6.2f\n",
+ "bp deduped:", (u_longlong_t)zcb.zcb_dedup_asize,
(u_longlong_t)zcb.zcb_dedup_blocks,
(double)zcb.zcb_dedup_asize / tzb->zb_asize + 1.0);
- (void) printf("\tSPA allocated: %10llu used: %5.2f%%\n",
+ (void) printf("\t%-16s %14llu used: %5.2f%%\n", "Normal class:",
(u_longlong_t)norm_alloc, 100.0 * norm_alloc / norm_space);
+ if (spa_special_class(spa)->mc_rotor != NULL) {
+ uint64_t alloc = metaslab_class_get_alloc(
+ spa_special_class(spa));
+ uint64_t space = metaslab_class_get_space(
+ spa_special_class(spa));
+
+ (void) printf("\t%-16s %14llu used: %5.2f%%\n",
+ "Special class", (u_longlong_t)alloc,
+ 100.0 * alloc / space);
+ }
+
+ if (spa_dedup_class(spa)->mc_rotor != NULL) {
+ uint64_t alloc = metaslab_class_get_alloc(
+ spa_dedup_class(spa));
+ uint64_t space = metaslab_class_get_space(
+ spa_dedup_class(spa));
+
+ (void) printf("\t%-16s %14llu used: %5.2f%%\n",
+ "Dedup class", (u_longlong_t)alloc,
+ 100.0 * alloc / space);
+ }
+
for (bp_embedded_type_t i = 0; i < NUM_BP_EMBEDDED_TYPES; i++) {
if (zcb.zcb_embedded_blocks[i] == 0)
continue;
@@ -3856,6 +3941,10 @@ dump_block_stats(spa_t *spa)
if (tzb->zb_ditto_samevdev != 0) {
(void) printf("\tDittoed blocks on same vdev: %llu\n",
(longlong_t)tzb->zb_ditto_samevdev);
+ }
+ if (tzb->zb_ditto_same_ms != 0) {
+ (void) printf("\tDittoed blocks in same metaslab: %llu\n",
+ (longlong_t)tzb->zb_ditto_same_ms);
}
for (uint64_t v = 0; v < spa->spa_root_vdev->vdev_children; v++) {
Modified: head/cddl/contrib/opensolaris/cmd/zfs/zfs.8
==============================================================================
--- head/cddl/contrib/opensolaris/cmd/zfs/zfs.8 Thu Nov 21 07:48:03 2019 (r354940)
+++ head/cddl/contrib/opensolaris/cmd/zfs/zfs.8 Thu Nov 21 08:20:05 2019 (r354941)
@@ -1134,8 +1134,23 @@ This feature must be enabled to be used
.Po see
.Xr zpool-features 7
.Pc .
+.It Sy special_small_blocks Ns = Ns Ar size
+This value represents the threshold block size for including small file
+blocks into the special allocation class.
+Blocks smaller than or equal to this value will be assigned to the special
+allocation class while greater blocks will be assigned to the regular class.
+Valid values are zero or a power of two from 512B up to 128K.
+The default size is 0 which means no small file blocks will be allocated in
+the special class.
+.Pp
+Before setting this property, a special class vdev must be added to the
+pool.
+See
+.Xr zpool 8
+for more details on the special allocation class.
.It Sy mountpoint Ns = Ns Ar path | Cm none | legacy
-Controls the mount point used for this file system. See the
+Controls the mount point used for this file system.
+See the
.Qq Sx Mount Points
section for more information on how this property is used.
.Pp
@@ -3023,7 +3038,7 @@ property of the filesystem or volume which is received
To use this flag, the storage pool must have the
.Sy extensible_dataset
feature enabled. See
-.Xr zpool-features 5
+.Xr zpool-features 7
for details on ZFS feature flags.
.El
.It Xo
Modified: head/cddl/contrib/opensolaris/cmd/zpool/zpool-features.7
==============================================================================
--- head/cddl/contrib/opensolaris/cmd/zpool/zpool-features.7 Thu Nov 21 07:48:03 2019 (r354940)
+++ head/cddl/contrib/opensolaris/cmd/zpool/zpool-features.7 Thu Nov 21 08:20:05 2019 (r354941)
@@ -632,6 +632,25 @@ and will return to being
once all filesystems that have ever had their checksum set to
.Sy skein
are destroyed.
+.It Sy allocation_classes
+.Bl -column "READ\-ONLY COMPATIBLE" "com.intel:allocation_classes"
+.It GUID Ta com.intel:allocation_classes
+.It READ\-ONLY COMPATIBLE Ta yes
+.It DEPENDENCIES Ta none
+.El
+.Pp
+This feature enables support for separate allocation classes.
+.Pp
+This feature becomes
+.Sy active
+when a dedicated allocation class vdev
+(dedup or special) is created with
+.Dq zpool create
+or
+.Dq zpool add .
+With device removal, it can be returned to the
+.Sy enabled
+state if all the top-level vdevs from an allocation class are removed.
.El
.Sh SEE ALSO
.Xr zpool 8
Modified: head/cddl/contrib/opensolaris/cmd/zpool/zpool.8
==============================================================================
--- head/cddl/contrib/opensolaris/cmd/zpool/zpool.8 Thu Nov 21 07:48:03 2019 (r354940)
+++ head/cddl/contrib/opensolaris/cmd/zpool/zpool.8 Thu Nov 21 08:20:05 2019 (r354941)
@@ -24,6 +24,8 @@
.\" Copyright (c) 2012, 2017 by Delphix. All Rights Reserved.
.\" Copyright 2017 Nexenta Systems, Inc.
.\" Copyright (c) 2017 Datto Inc.
+.\" Copyright (c) 2017 George Melikov. All Rights Reserved.
+.\" Copyright 2019 Joyent, Inc.
.\"
.\" $FreeBSD$
.\"
@@ -38,7 +40,7 @@
.Op Fl \&?
.Nm
.Cm add
-.Op Fl fn
+.Op Fl fgLnP
.Ar pool vdev ...
.Nm
.Cm attach
@@ -127,17 +129,19 @@
.Op Ar device Ns ...
.Nm
.Cm iostat
-.Op Fl T Cm d Ns | Ns Cm u
.Op Fl v
+.Op Fl T Cm d Ns | Ns Cm u
+.Op Fl gLP
.Op Ar pool
.Ar ...
+.Op Ar inverval Op Ar count
.Nm
.Cm labelclear
.Op Fl f
.Ar device
.Nm
.Cm list
-.Op Fl Hpv
+.Op Fl HgLpPv
.Op Fl o Ar property Ns Op , Ns Ar ...
.Op Fl T Cm d Ns | Ns Cm u
.Op Ar pool
@@ -179,7 +183,7 @@
.Ar property Ns = Ns Ar value pool
.Nm
.Cm split
-.Op Fl n
+.Op Fl gLnP
.Op Fl R Ar altroot
.Op Fl o Ar mntopts
.Op Fl o Ar property Ns = Ns Ar value
@@ -187,7 +191,7 @@
.Op Ar device ...
.Nm
.Cm status
-.Op Fl Dvx
+.Op Fl DgLPvx
.Op Fl T Cm d Ns | Ns Cm u
.Op Ar pool
.Ar ...
@@ -320,11 +324,27 @@ types are not supported for the intent log. For more i
see the
.Qq Sx Intent Log
section.
+.It Sy dedup
+A device dedicated solely for allocating dedup data.
+The redundancy of this device should match the redundancy of the other normal
+devices in the pool.
+If more than one dedup device is specified, then allocations are load-balanced
+between devices.
+.It Sy special
+A device dedicated solely for allocating various kinds of internal metadata,
+and optionally small file data.
+The redundancy of this device should match the redundancy of the other normal
+devices in the pool.
+If more than one special device is specified, then allocations are
+load-balanced between devices.
+.Pp
+For more information on special allocations, see the
+.Sx Special Allocation Class
+section.
.It Sy cache
-A device used to cache storage pool data. A cache device cannot be configured
-as a mirror or
-.No raidz
-group. For more information, see the
+A device used to cache storage pool data.
+A cache device cannot be configured as a mirror or raidz group.
+For more information, see the
.Qq Sx Cache Devices
section.
.El
@@ -602,6 +622,31 @@ zfs properties) may be unenforceable while a checkpoin
checkpoint is allowed to consume the dataset's reservation.
Finally, data that is part of the checkpoint but has been freed in the
current state of the pool won't be scanned during a scrub.
+.Ss Special Allocation Class
+The allocations in the special class are dedicated to specific block types.
+By default this includes all metadata, the indirect blocks of user data, and
+any dedup data.
+The class can also be provisioned to accept a limited percentage of small file
+data blocks.
+.Pp
+A pool must always have at least one general (non-specified) vdev before
+other devices can be assigned to the special class.
+If the special class becomes full, then allocations intended for it will spill
+back into the normal class.
+.Pp
+Dedup data can be excluded from the special class by setting the
+.Sy vfs.zfs.ddt_data_is_special
+sysctl to false (0).
+.Pp
+Inclusion of small file blocks in the special class is opt-in.
+Each dataset can control the size of small file blocks allowed in the special
+class by setting the
+.Sy special_small_blocks
+dataset property.
+It defaults to zero so you must opt-in by setting it to a non-zero value.
+See
+.Xr zfs 1M
+for more info on setting this property.
.Ss Properties
Each pool has several properties associated with it. Some properties are
read-only statistics while others are configurable and change the behavior of
@@ -872,7 +917,7 @@ Displays a help message.
.It Xo
.Nm
.Cm add
-.Op Fl fn
+.Op Fl fgLnP
.Ar pool vdev ...
.Xc
.Pp
@@ -891,11 +936,30 @@ Forces use of
.Ar vdev ,
even if they appear in use or specify a conflicting replication level.
Not all devices can be overridden in this manner.
+.It Fl g
+Display
+.Ar vdev ,
+GUIDs instead of the normal device names.
+These GUIDs can be used in place of
+device names for the zpool detach/offline/remove/replace commands.
+.It Fl L
+Display real paths for
+.Ar vdev Ns s
+resolving all symbolic links.
+This can be used to look up the current block
+device name regardless of the /dev/disk/ path used to open it.
.It Fl n
Displays the configuration that would be used without actually adding the
.Ar vdev Ns s.
-The actual pool creation can still fail due to insufficient privileges or device
-sharing.
+The actual pool creation can still fail due to insufficient privileges or
+device sharing.
+.It Fl P
+Display real paths for
+.Ar vdev Ns s
+instead of only the last component of the path.
+This can be used in conjunction with the
+.Fl L
+flag.
.El
.It Xo
.Nm
@@ -1512,7 +1576,7 @@ with no flags on the relevant target devices.
.Nm
.Cm iostat
.Op Fl T Cm d Ns | Ns Cm u
-.Op Fl v
+.Op Fl gLPv
.Op Ar pool
.Ar ...
.Op Ar interval Op Ar count
@@ -1544,10 +1608,25 @@ Use modifier
.Cm u
for unixtime
.Pq equals Qq Ic date +%s .
+.It Fl g
+Display vdev GUIDs instead of the normal device names.
+These GUIDs can be used in place of device names for the zpool
+detach/offline/remove/replace commands.
+.It Fl L
+Display real paths for vdevs resolving all symbolic links.
+This can be used to look up the current block device name regardless of the
+.Pa /dev/disk/
+path used to open it.
+.It Fl P
+Display full paths for vdevs instead of only the last component of
+the path.
+This can be used in conjunction with the
+.Fl L
+flag.
.It Fl v
-Verbose statistics. Reports usage statistics for individual
-.No vdev Ns s
-within the pool, in addition to the pool-wide statistics.
+Verbose statistics.
+Reports usage statistics for individual vdevs within the
+pool, in addition to the pool-wide statistics.
.El
.It Xo
.Nm
@@ -1570,7 +1649,7 @@ Treat exported or foreign devices as inactive.
.It Xo
.Nm
.Cm list
-.Op Fl Hpv
+.Op Fl HgLpPv
.Op Fl o Ar property Ns Op , Ns Ar ...
.Op Fl T Cm d Ns | Ns Cm u
.Op Ar pool
@@ -1603,11 +1682,27 @@ Use modifier
.Cm u
for unixtime
.Pq equals Qq Ic date +%s .
+.It Fl g
+Display vdev GUIDs instead of the normal device names.
+These GUIDs can be used in place of device names for the zpool
+detach/offline/remove/replace commands.
.It Fl H
Scripted mode. Do not display headers, and separate fields by a single tab
instead of arbitrary space.
+.It Fl L
+Display real paths for vdevs resolving all symbolic links.
+This can be used to look up the current block device name regardless of the
+/dev/disk/ path used to open it.
.It Fl p
-Display numbers in parsable (exact) values.
+Display numbers in parsable
+.Pq exact
+values.
+.It Fl P
+Display full paths for vdevs instead of only the last component of
+the path.
+This can be used in conjunction with the
+.Fl L
+flag.
.It Fl v
Verbose statistics. Reports usage statistics for individual
.Em vdevs
@@ -1702,7 +1797,7 @@ the background.
The removal progress can be monitored with
.Nm zpool Cm status.
This feature must be enabled to be used, see
-.Xr zpool-features 5
+.Xr zpool-features 7
.Pp
A mirrored top-level device (log or data) can be removed by specifying the
top-level mirror for the same.
@@ -1844,7 +1939,7 @@ values.
.It Xo
.Nm
.Cm split
-.Op Fl n
+.Op Fl gLnP
.Op Fl R Ar altroot
.Op Fl o Ar mntopts
.Op Fl o Ar property Ns = Ns Ar value
@@ -1884,6 +1979,15 @@ parameter for the new pool's alternate root. See the
description in the
.Qq Sx Properties
section, above.
+.It Fl g
+Display vdev GUIDs instead of the normal device names.
+These GUIDs can be used in place of device names for the zpool
+detach/offline/remove/replace commands.
+.It Fl L
+Display real paths for vdevs resolving all symbolic links.
+This can be used to look up the current block device name regardless of the
+.Pa /dev/disk/
+path used to open it.
.It Fl n
Displays the configuration that would be created without actually splitting the
pool. The actual pool split could still fail due to insufficient privileges or
@@ -1900,11 +2004,17 @@ option.
Sets the specified property on the new pool. See the
.Qq Sx Properties
section, above, for more information on the available pool properties.
+.It Fl P
+Display full paths for vdevs instead of only the last component of
+the path.
+This can be used in conjunction with the
+.Fl L
+flag.
.El
.It Xo
.Nm
.Cm status
-.Op Fl Dvx
+.Op Fl DgLPvx
.Op Fl T Cm d Ns | Ns Cm u
.Op Ar pool
.Ar ...
@@ -1939,6 +2049,21 @@ Display a histogram of deduplication statistics, showi
and referenced
.Pq logically referenced in the pool
block counts and sizes by reference count.
+.It Fl g
+Display vdev GUIDs instead of the normal device names.
+These GUIDs can be used in place of device names for the zpool
+detach/offline/remove/replace commands.
+.It Fl L
+Display real paths for vdevs resolving all symbolic links.
+This can be used to look up the current block device name regardless of the
+.Pa /dev/disk/
+path used to open it.
+.It Fl P
+Display full paths for vdevs instead of only the last component of
+the path.
+This can be used in conjunction with the
+.Fl L
+flag.
.It Fl T Cm d Ns | Ns Cm u
Print a timestamp.
.Pp
@@ -2029,6 +2154,30 @@ Successful completion.
An error occurred.
.It 2
Invalid command line options were specified.
+.El
+.Sh ENVIRONMENT VARIABLES
+.Bl -tag -width "ZPOOL_VDEV_NAME_FOLLOW_LINKS"
+.It Ev ZPOOL_VDEV_NAME_GUID
+Cause
+.Nm zpool
+subcommands to output vdev guids by default.
+This behavior is identical to the
+.Nm zpool status -g
+command line option.
+.It Ev ZPOOL_VDEV_NAME_FOLLOW_LINKS
+Cause
+.Nm zpool
+subcommands to follow links for vdev names by default.
+This behavior is identical to the
+.Nm zpool status -L
+command line option.
+.It Ev ZPOOL_VDEV_NAME_PATH
+Cause
+.Nm zpool
+subcommands to output full vdev path names by default.
+This behavior is identical to the
+.Nm zpool status -P
+command line option.
.El
.Sh EXAMPLES
.Bl -tag -width 0n
Modified: head/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c
==============================================================================
--- head/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c Thu Nov 21 07:48:03 2019 (r354940)
+++ head/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c Thu Nov 21 08:20:05 2019 (r354941)
@@ -28,6 +28,7 @@
* Copyright 2016 Igor Kozhukhov <ikozhukhov at gmail.com>.
* Copyright 2016 Nexenta Systems, Inc.
* Copyright (c) 2017 Datto Inc.
+ * Copyright (c) 2017, Intel Corporation.
*/
#include <solaris.h>
@@ -209,6 +210,8 @@ static zpool_command_t command_table[] = {
#define NCOMMAND (sizeof (command_table) / sizeof (command_table[0]))
+#define VDEV_ALLOC_CLASS_LOGS "logs"
+
static zpool_command_t *current_command;
static char history_str[HIS_MAX_RECORD_LEN];
static boolean_t log_history = B_TRUE;
@@ -219,7 +222,7 @@ get_usage(zpool_help_t idx)
{
switch (idx) {
case HELP_ADD:
- return (gettext("\tadd [-fn] <pool> <vdev> ...\n"));
+ return (gettext("\tadd [-fgLnP] <pool> <vdev> ...\n"));
case HELP_ATTACH:
return (gettext("\tattach [-f] <pool> <device> "
"<new-device>\n"));
@@ -251,12 +254,12 @@ get_usage(zpool_help_t idx)
"[-R root] [-F [-n]] [-t]\n"
"\t [--rewind-to-checkpoint] <pool | id> [newpool]\n"));
case HELP_IOSTAT:
- return (gettext("\tiostat [-v] [-T d|u] [pool] ... [interval "
- "[count]]\n"));
+ return (gettext("\tiostat [-gLPv] [-T d|u] [pool] ... "
+ "[interval [count]]\n"));
case HELP_LABELCLEAR:
return (gettext("\tlabelclear [-f] <vdev>\n"));
case HELP_LIST:
- return (gettext("\tlist [-Hpv] [-o property[,...]] "
+ return (gettext("\tlist [-gHLpPv] [-o property[,...]] "
"[-T d|u] [pool] ... [interval [count]]\n"));
case HELP_OFFLINE:
return (gettext("\toffline [-t] <pool> <device> ...\n"));
@@ -274,8 +277,8 @@ get_usage(zpool_help_t idx)
case HELP_SCRUB:
return (gettext("\tscrub [-s | -p] <pool> ...\n"));
case HELP_STATUS:
- return (gettext("\tstatus [-vx] [-T d|u] [pool] ... [interval "
- "[count]]\n"));
+ return (gettext("\tstatus [-DgLPvx] [-T d|u] [pool] ... "
+ "[interval [count]]\n"));
case HELP_UPGRADE:
return (gettext("\tupgrade [-v]\n"
"\tupgrade [-V version] <-a | pool ...>\n"));
@@ -285,7 +288,7 @@ get_usage(zpool_help_t idx)
case HELP_SET:
return (gettext("\tset <property=value> <pool> \n"));
case HELP_SPLIT:
- return (gettext("\tsplit [-n] [-R altroot] [-o mntopts]\n"
+ return (gettext("\tsplit [-gLnP] [-R altroot] [-o mntopts]\n"
"\t [-o property=value] <pool> <newpool> "
"[<device> ...]\n"));
case HELP_REGUID:
@@ -307,7 +310,7 @@ print_prop_cb(int prop, void *cb)
{
FILE *fp = cb;
- (void) fprintf(fp, "\t%-15s ", zpool_prop_to_name(prop));
+ (void) fprintf(fp, "\t%-19s ", zpool_prop_to_name(prop));
if (zpool_prop_readonly(prop))
(void) fprintf(fp, " NO ");
@@ -359,14 +362,14 @@ usage(boolean_t requested)
(void) fprintf(fp,
gettext("\nthe following properties are supported:\n"));
- (void) fprintf(fp, "\n\t%-15s %s %s\n\n",
+ (void) fprintf(fp, "\n\t%-19s %s %s\n\n",
"PROPERTY", "EDIT", "VALUES");
/* Iterate over all properties */
(void) zprop_iter(print_prop_cb, fp, B_FALSE, B_TRUE,
ZFS_TYPE_POOL);
- (void) fprintf(fp, "\t%-15s ", "feature at ...");
+ (void) fprintf(fp, "\t%-19s ", "feature at ...");
(void) fprintf(fp, "YES disabled | enabled | active\n");
(void) fprintf(fp, gettext("\nThe feature@ properties must be "
@@ -384,32 +387,45 @@ usage(boolean_t requested)
exit(requested ? 0 : 2);
}
-void
+/*
+ * print a pool vdev config for dry runs
+ */
+static void
print_vdev_tree(zpool_handle_t *zhp, const char *name, nvlist_t *nv, int indent,
- boolean_t print_logs)
+ const char *match, int name_flags)
{
nvlist_t **child;
uint_t c, children;
char *vname;
+ boolean_t printed = B_FALSE;
- if (name != NULL)
- (void) printf("\t%*s%s\n", indent, "", name);
-
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
- &child, &children) != 0)
+ &child, &children) != 0) {
+ if (name != NULL)
+ (void) printf("\t%*s%s\n", indent, "", name);
return;
+ }
for (c = 0; c < children; c++) {
uint64_t is_log = B_FALSE;
+ char *class = "";
(void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
&is_log);
- if ((is_log && !print_logs) || (!is_log && print_logs))
+ if (is_log)
+ class = VDEV_ALLOC_BIAS_LOG;
+ (void) nvlist_lookup_string(child[c],
+ ZPOOL_CONFIG_ALLOCATION_BIAS, &class);
+ if (strcmp(match, class) != 0)
continue;
- vname = zpool_vdev_name(g_zfs, zhp, child[c], B_FALSE);
- print_vdev_tree(zhp, vname, child[c], indent + 2,
- B_FALSE);
+ if (!printed && name != NULL) {
+ (void) printf("\t%*s%s\n", indent, "", name);
+ printed = B_TRUE;
+ }
+ vname = zpool_vdev_name(g_zfs, zhp, child[c], name_flags);
+ print_vdev_tree(zhp, vname, child[c], indent + 2, "",
+ name_flags);
free(vname);
}
}
@@ -517,11 +533,14 @@ add_prop_list_default(const char *propname, char *prop
}
/*
- * zpool add [-fn] <pool> <vdev> ...
+ * zpool add [-fgLnP] [-o property=value] <pool> <vdev> ...
*
* -f Force addition of devices, even if they appear in use
+ * -g Display guid for individual vdev name.
+ * -L Follow links when resolving vdev path name.
* -n Do not add the devices, but display the resulting layout if
* they were to be added.
+ * -P Display full path for vdev name.
*
* Adds the given vdevs to 'pool'. As with create, the bulk of this work is
* handled by get_vdev_spec(), which constructs the nvlist needed to pass to
@@ -532,6 +551,7 @@ zpool_do_add(int argc, char **argv)
{
boolean_t force = B_FALSE;
boolean_t dryrun = B_FALSE;
+ int name_flags = 0;
int c;
nvlist_t *nvroot;
char *poolname;
@@ -542,14 +562,23 @@ zpool_do_add(int argc, char **argv)
nvlist_t *config;
/* check options */
- while ((c = getopt(argc, argv, "fn")) != -1) {
+ while ((c = getopt(argc, argv, "fgLnP")) != -1) {
switch (c) {
case 'f':
force = B_TRUE;
break;
+ case 'g':
+ name_flags |= VDEV_NAME_GUID;
+ break;
+ case 'L':
+ name_flags |= VDEV_NAME_FOLLOW_LINKS;
+ break;
case 'n':
dryrun = B_TRUE;
break;
+ case 'P':
+ name_flags |= VDEV_NAME_PATH;
+ break;
case '?':
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
optopt);
@@ -609,17 +638,26 @@ zpool_do_add(int argc, char **argv)
"configuration:\n"), zpool_get_name(zhp));
/* print original main pool and new tree */
- print_vdev_tree(zhp, poolname, poolnvroot, 0, B_FALSE);
- print_vdev_tree(zhp, NULL, nvroot, 0, B_FALSE);
+ print_vdev_tree(zhp, poolname, poolnvroot, 0, "",
+ name_flags | VDEV_NAME_TYPE_ID);
+ print_vdev_tree(zhp, NULL, nvroot, 0, "", name_flags);
- /* Do the same for the logs */
- if (num_logs(poolnvroot) > 0) {
- print_vdev_tree(zhp, "logs", poolnvroot, 0, B_TRUE);
- print_vdev_tree(zhp, NULL, nvroot, 0, B_TRUE);
- } else if (num_logs(nvroot) > 0) {
- print_vdev_tree(zhp, "logs", nvroot, 0, B_TRUE);
- }
+ /* print other classes: 'dedup', 'special', and 'log' */
+ print_vdev_tree(zhp, "dedup", poolnvroot, 0,
+ VDEV_ALLOC_BIAS_DEDUP, name_flags);
+ print_vdev_tree(zhp, NULL, nvroot, 0, VDEV_ALLOC_BIAS_DEDUP,
+ name_flags);
+ print_vdev_tree(zhp, "special", poolnvroot, 0,
+ VDEV_ALLOC_BIAS_SPECIAL, name_flags);
+ print_vdev_tree(zhp, NULL, nvroot, 0, VDEV_ALLOC_BIAS_SPECIAL,
+ name_flags);
+
+ print_vdev_tree(zhp, "logs", poolnvroot, 0, VDEV_ALLOC_BIAS_LOG,
+ name_flags);
+ print_vdev_tree(zhp, NULL, nvroot, 0, VDEV_ALLOC_BIAS_LOG,
+ name_flags);
+
ret = 0;
} else {
ret = (zpool_add(zhp, nvroot) != 0);
@@ -1217,9 +1255,13 @@ zpool_do_create(int argc, char **argv)
(void) printf(gettext("would create '%s' with the "
"following layout:\n\n"), poolname);
- print_vdev_tree(NULL, poolname, nvroot, 0, B_FALSE);
- if (num_logs(nvroot) > 0)
- print_vdev_tree(NULL, "logs", nvroot, 0, B_TRUE);
+ print_vdev_tree(NULL, poolname, nvroot, 0, "", 0);
+ print_vdev_tree(NULL, "dedup", nvroot, 0,
+ VDEV_ALLOC_BIAS_DEDUP, 0);
+ print_vdev_tree(NULL, "special", nvroot, 0,
+ VDEV_ALLOC_BIAS_SPECIAL, 0);
+ print_vdev_tree(NULL, "logs", nvroot, 0,
+ VDEV_ALLOC_BIAS_LOG, 0);
ret = 0;
} else {
@@ -1426,13 +1468,15 @@ zpool_do_export(int argc, char **argv)
* name column.
*/
static int
-max_width(zpool_handle_t *zhp, nvlist_t *nv, int depth, int max)
+max_width(zpool_handle_t *zhp, nvlist_t *nv, int depth, int max,
+ int name_flags)
{
- char *name = zpool_vdev_name(g_zfs, zhp, nv, B_TRUE);
+ char *name;
nvlist_t **child;
uint_t c, children;
int ret;
+ name = zpool_vdev_name(g_zfs, zhp, nv, name_flags | VDEV_NAME_TYPE_ID);
if (strlen(name) + depth > max)
max = strlen(name) + depth;
@@ -1442,7 +1486,7 @@ max_width(zpool_handle_t *zhp, nvlist_t *nv, int depth
&child, &children) == 0) {
for (c = 0; c < children; c++)
if ((ret = max_width(zhp, child[c], depth + 2,
- max)) > max)
+ max, name_flags)) > max)
max = ret;
}
@@ -1450,7 +1494,7 @@ max_width(zpool_handle_t *zhp, nvlist_t *nv, int depth
&child, &children) == 0) {
for (c = 0; c < children; c++)
if ((ret = max_width(zhp, child[c], depth + 2,
- max)) > max)
+ max, name_flags)) > max)
max = ret;
}
@@ -1458,11 +1502,10 @@ max_width(zpool_handle_t *zhp, nvlist_t *nv, int depth
&child, &children) == 0) {
for (c = 0; c < children; c++)
if ((ret = max_width(zhp, child[c], depth + 2,
- max)) > max)
+ max, name_flags)) > max)
max = ret;
}
-
return (max);
}
@@ -1511,12 +1554,24 @@ find_spare(zpool_handle_t *zhp, void *data)
return (0);
}
+typedef struct status_cbdata {
+ int cb_count;
+ int cb_name_flags;
+ int cb_namewidth;
+ boolean_t cb_allpools;
+ boolean_t cb_verbose;
+ boolean_t cb_explain;
+ boolean_t cb_first;
+ boolean_t cb_dedup_stats;
+ boolean_t cb_print_status;
+} status_cbdata_t;
+
/*
* Print out configuration state as requested by status_callback.
*/
-void
-print_status_config(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
- int namewidth, int depth, boolean_t isspare)
+static void
+print_status_config(zpool_handle_t *zhp, status_cbdata_t *cb, const char *name,
+ nvlist_t *nv, int depth, boolean_t isspare)
{
nvlist_t **child;
uint_t c, vsc, children;
@@ -1526,7 +1581,7 @@ print_status_config(zpool_handle_t *zhp, const char *n
char *vname;
uint64_t notpresent;
uint64_t ashift;
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-all
mailing list