git: d8fbbd371ca1 - main - zfs: merge openzfs/zfs@1644e2ffd
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Sun, 19 Apr 2026 07:53:08 UTC
The branch main has been updated by mm:
URL: https://cgit.FreeBSD.org/src/commit/?id=d8fbbd371ca11d9ad4b29b9d3a316885a5da0b15
commit d8fbbd371ca11d9ad4b29b9d3a316885a5da0b15
Merge: 1c50cb1d7562 1644e2ffd264
Author: Martin Matuska <mm@FreeBSD.org>
AuthorDate: 2026-04-18 22:21:01 +0000
Commit: Martin Matuska <mm@FreeBSD.org>
CommitDate: 2026-04-18 22:22:45 +0000
zfs: merge openzfs/zfs@1644e2ffd
Notable upstream pull request merges:
#18148 d1b0a6982 draid: add failure domains support
#18167 f203fedde Add zoned_uid property with additive least privilege
authorization
#18191 -multiple FreeBSD: Fix a couple of races involving zvol creation
and teardown
#18213 33ed68fc2 zpool create: report which device caused failure
#18235 931deb290 Prevent range tree corruption race by updating
dnode_sync()
#18282 b44a3ecf4 zpool: Change zpool offline spares policy
#18310 -multiple Fix s_active leak in zfsvfs_hold() when z_unmounted is
true
#18351 ce837a28e Bridge speculative and prescient prefetchers
#18380 fc659bd6d draid: fix import failure after disks replacements
#18385 16858492e FreeBSD: Implement relatime property
#18390 a22b3f670 abd: Fix stats asymmetry in case of Direct I/O
#18399 7b1682a82 Add support for POSIX_FADV_DONTNEED
#18403 5cb95ad89 fix memleak in spa_errlog.c
#18405 0752cf067 draid: allow seq resilver reads from degraded vdevs
#18407 e635d27eb Add ability to set user properties while changing
encryption key
#18414 2abf469be draid: fix cksum errors after rebuild with degraded disks
#18415 -multiple Fix snapshot automount deadlock during concurrent zfs recv
#18421 1644e2ffd Fix read corruption after block clone after truncate
Obtained from: OpenZFS
OpenZFS commit: 1644e2ffd2640fa3e2c191ceaf048a5fc8399493
.../openzfs/.github/workflows/checkstyle.yaml | 7 +-
sys/contrib/openzfs/.github/workflows/codeql.yml | 2 +-
.../.github/workflows/scripts/generate-ci-type.py | 33 +-
.../.github/workflows/scripts/qemu-1-setup.sh | 21 +
.../.github/workflows/scripts/qemu-2-start.sh | 8 +-
.../.github/workflows/scripts/qemu-3-deps-vm.sh | 22 +-
.../.github/workflows/scripts/qemu-4-build-vm.sh | 11 +-
.../.github/workflows/scripts/qemu-7-prepare.sh | 22 +-
.../workflows/scripts/qemu-9-summary-page.sh | 4 +-
sys/contrib/openzfs/.github/workflows/smatch.yml | 4 +-
sys/contrib/openzfs/.github/workflows/zfs-arm.yml | 40 ++
.../.github/workflows/zfs-qemu-packages.yml | 23 +-
sys/contrib/openzfs/.github/workflows/zfs-qemu.yml | 42 +-
sys/contrib/openzfs/.github/workflows/zloop.yml | 7 +-
sys/contrib/openzfs/AUTHORS | 1 +
sys/contrib/openzfs/cmd/zed/agents/zfs_diagnosis.c | 4 +-
sys/contrib/openzfs/cmd/zed/agents/zfs_retire.c | 146 +++++-
sys/contrib/openzfs/cmd/zfs/zfs_main.c | 7 +-
sys/contrib/openzfs/cmd/zinject/translate.c | 35 +-
sys/contrib/openzfs/cmd/zinject/zinject.c | 4 +-
sys/contrib/openzfs/cmd/zpool/zpool_main.c | 21 +-
sys/contrib/openzfs/cmd/zpool/zpool_vdev.c | 234 ++++++++--
sys/contrib/openzfs/cmd/ztest.c | 11 +-
sys/contrib/openzfs/config/deb.am | 20 +-
.../config/kernel-copy-from-user-inatomic.m4 | 30 --
sys/contrib/openzfs/config/kernel.m4 | 2 -
.../contrib/debian/openzfs-zfsutils.install | 1 +
.../contrib/pyzfs/libzfs_core/_constants.py | 4 +
sys/contrib/openzfs/contrib/pyzfs/setup.py.in | 4 +-
sys/contrib/openzfs/include/libzfs.h | 2 +
.../openzfs/include/os/freebsd/spl/sys/zone.h | 73 +++
.../include/os/freebsd/zfs/sys/zfs_vfsops_os.h | 1 +
.../include/os/freebsd/zfs/sys/zfs_znode_impl.h | 4 +-
.../openzfs/include/os/linux/spl/sys/zone.h | 58 +++
.../include/os/linux/zfs/sys/zfs_vfsops_os.h | 14 +-
sys/contrib/openzfs/include/sys/dbuf.h | 2 +
sys/contrib/openzfs/include/sys/dmu.h | 6 +
sys/contrib/openzfs/include/sys/dmu_zfetch.h | 1 +
sys/contrib/openzfs/include/sys/dnode.h | 13 +
sys/contrib/openzfs/include/sys/dsl_crypt.h | 3 +-
sys/contrib/openzfs/include/sys/fs/zfs.h | 8 +
sys/contrib/openzfs/include/sys/spa.h | 2 +-
sys/contrib/openzfs/include/sys/spa_impl.h | 1 +
sys/contrib/openzfs/include/sys/vdev_draid.h | 7 +-
sys/contrib/openzfs/include/sys/vdev_raidz_impl.h | 1 +
sys/contrib/openzfs/include/zfeature_common.h | 1 +
sys/contrib/openzfs/lib/libzfs/libzfs.abi | 43 +-
sys/contrib/openzfs/lib/libzfs/libzfs_crypto.c | 52 ++-
sys/contrib/openzfs/lib/libzfs/libzfs_dataset.c | 10 +-
sys/contrib/openzfs/lib/libzfs/libzfs_pool.c | 170 +++++--
sys/contrib/openzfs/lib/libzfs/libzfs_status.c | 43 +-
sys/contrib/openzfs/lib/libzfs/libzfs_util.c | 6 +
.../openzfs/lib/libzfs/os/linux/libzfs_pool_os.c | 19 +-
sys/contrib/openzfs/man/Makefile.am | 16 +-
sys/contrib/openzfs/man/man1/dbufstat.1 | 233 ++++++++++
sys/contrib/openzfs/man/man4/zfs.4 | 11 +
sys/contrib/openzfs/man/man7/vdevprops.7 | 26 +-
sys/contrib/openzfs/man/man7/zfsprops.7 | 92 ++++
sys/contrib/openzfs/man/man7/zpool-features.7 | 27 ++
sys/contrib/openzfs/man/man7/zpoolconcepts.7 | 36 +-
sys/contrib/openzfs/man/man8/zfs-load-key.8 | 9 +-
sys/contrib/openzfs/man/man8/zfs-set.8 | 20 +-
sys/contrib/openzfs/man/man8/zfs-zone.8 | 15 +-
sys/contrib/openzfs/man/man8/zinject.8 | 10 +-
sys/contrib/openzfs/man/man8/zpool-create.8 | 35 ++
sys/contrib/openzfs/man/man8/zpool-list.8 | 4 +-
sys/contrib/openzfs/man/man8/zpool-offline.8 | 7 +-
sys/contrib/openzfs/man/man8/zpool-resilver.8 | 3 +
sys/contrib/openzfs/man/man8/zpool-scrub.8 | 4 +-
sys/contrib/openzfs/man/man8/zpool-status.8 | 4 +-
.../openzfs/module/os/freebsd/zfs/zfs_vfsops.c | 8 +
.../openzfs/module/os/freebsd/zfs/zfs_vnops_os.c | 4 +-
.../openzfs/module/os/freebsd/zfs/zfs_znode_os.c | 43 ++
.../openzfs/module/os/freebsd/zfs/zvol_os.c | 84 ++--
sys/contrib/openzfs/module/os/linux/spl/spl-zone.c | 413 +++++++++++++++--
.../openzfs/module/os/linux/zfs/spa_misc_os.c | 50 +-
.../openzfs/module/os/linux/zfs/zfs_ctldir.c | 57 ++-
.../openzfs/module/os/linux/zfs/zfs_ioctl_os.c | 4 +
.../openzfs/module/os/linux/zfs/zfs_vfsops.c | 240 +---------
sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c | 39 +-
.../openzfs/module/os/linux/zfs/zpl_super.c | 509 ++++++++++++++++++++-
sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c | 2 +-
.../openzfs/module/zcommon/zfeature_common.c | 13 +
sys/contrib/openzfs/module/zcommon/zfs_prop.c | 15 +-
sys/contrib/openzfs/module/zcommon/zpool_prop.c | 6 +
sys/contrib/openzfs/module/zfs/abd.c | 3 +-
sys/contrib/openzfs/module/zfs/dbuf.c | 78 +++-
sys/contrib/openzfs/module/zfs/ddt_log.c | 3 +-
sys/contrib/openzfs/module/zfs/dmu.c | 79 ++++
sys/contrib/openzfs/module/zfs/dmu_zfetch.c | 69 ++-
sys/contrib/openzfs/module/zfs/dnode.c | 2 +
sys/contrib/openzfs/module/zfs/dnode_sync.c | 105 +++--
sys/contrib/openzfs/module/zfs/dsl_crypt.c | 15 +-
sys/contrib/openzfs/module/zfs/dsl_deleg.c | 13 +-
sys/contrib/openzfs/module/zfs/spa.c | 58 ++-
sys/contrib/openzfs/module/zfs/spa_errlog.c | 2 +-
sys/contrib/openzfs/module/zfs/spa_log_spacemap.c | 8 +-
sys/contrib/openzfs/module/zfs/space_map.c | 3 +-
sys/contrib/openzfs/module/zfs/vdev.c | 68 ++-
sys/contrib/openzfs/module/zfs/vdev_draid.c | 423 +++++++++++++----
sys/contrib/openzfs/module/zfs/vdev_label.c | 23 +-
sys/contrib/openzfs/module/zfs/vdev_mirror.c | 13 +-
sys/contrib/openzfs/module/zfs/vdev_raidz.c | 65 ++-
sys/contrib/openzfs/module/zfs/zfs_ioctl.c | 318 ++++++++++++-
sys/contrib/openzfs/module/zfs/zio.c | 6 +-
sys/contrib/openzfs/module/zfs/zvol.c | 51 ++-
sys/contrib/openzfs/rpm/generic/zfs.spec.in | 6 +
sys/contrib/openzfs/scripts/spdxcheck.pl | 1 -
sys/contrib/openzfs/tests/runfiles/common.run | 40 +-
sys/contrib/openzfs/tests/runfiles/linux.run | 7 +-
sys/contrib/openzfs/tests/runfiles/sanity.run | 6 +-
.../openzfs/tests/test-runner/bin/zts-report.py.in | 6 +-
sys/contrib/openzfs/tests/zfs-tests/cmd/.gitignore | 1 +
.../openzfs/tests/zfs-tests/cmd/Makefile.am | 2 +
.../tests/zfs-tests/cmd/clone_after_trunc.c | 117 +++++
.../openzfs/tests/zfs-tests/include/commands.cfg | 2 +
.../openzfs/tests/zfs-tests/tests/Makefile.am | 46 ++
.../block_cloning/block_cloning_after_trunc.ksh | 31 ++
.../zfs_change-key/zfs_change-key_userprop.ksh | 72 +++
.../zpool_create/zpool_create_draid_005_pos.ksh | 149 ++++++
.../zpool_create/zpool_create_errinfo_001_neg.ksh | 103 +++++
.../functional/cli_root/zpool_get/vdev_get.cfg | 2 +
.../functional/cli_root/zpool_get/zpool_get.cfg | 1 +
.../cli_root/zpool_offline/zpool_offline_spare.ksh | 84 ++++
.../tests/functional/fadvise/fadvise_dontneed.ksh | 63 +++
.../functional/fault/auto_offline_001_pos.ksh | 5 +-
.../functional/fault/suspend_draid_fgroups.ksh | 163 +++++++
.../tests/functional/redundancy/redundancy.kshlib | 65 ++-
.../redundancy/redundancy_draid_degraded1.ksh | 141 ++++++
.../redundancy/redundancy_draid_degraded2.ksh | 157 +++++++
.../redundancy/redundancy_draid_spare4.ksh | 152 ++++++
.../redundancy/redundancy_draid_width.ksh | 91 ++++
.../tests/functional/rsend/send_raw_ashift.ksh | 3 -
.../tests/functional/zoned_uid/cleanup.ksh | 46 ++
.../zfs-tests/tests/functional/zoned_uid/setup.ksh | 99 ++++
.../tests/functional/zoned_uid/zoned_uid.cfg | 33 ++
.../functional/zoned_uid/zoned_uid_001_pos.ksh | 85 ++++
.../functional/zoned_uid/zoned_uid_002_pos.ksh | 83 ++++
.../functional/zoned_uid/zoned_uid_003_pos.ksh | 100 ++++
.../functional/zoned_uid/zoned_uid_004_pos.ksh | 91 ++++
.../functional/zoned_uid/zoned_uid_005_neg.ksh | 72 +++
.../functional/zoned_uid/zoned_uid_006_pos.ksh | 109 +++++
.../functional/zoned_uid/zoned_uid_007_pos.ksh | 110 +++++
.../functional/zoned_uid/zoned_uid_008_pos.ksh | 128 ++++++
.../functional/zoned_uid/zoned_uid_009_pos.ksh | 149 ++++++
.../functional/zoned_uid/zoned_uid_010_pos.ksh | 157 +++++++
.../functional/zoned_uid/zoned_uid_011_neg.ksh | 153 +++++++
.../functional/zoned_uid/zoned_uid_012_pos.ksh | 120 +++++
.../functional/zoned_uid/zoned_uid_013_pos.ksh | 122 +++++
.../functional/zoned_uid/zoned_uid_014_pos.ksh | 116 +++++
.../functional/zoned_uid/zoned_uid_015_pos.ksh | 114 +++++
.../functional/zoned_uid/zoned_uid_016_pos.ksh | 132 ++++++
.../functional/zoned_uid/zoned_uid_017_neg.ksh | 125 +++++
.../functional/zoned_uid/zoned_uid_018_pos.ksh | 129 ++++++
.../functional/zoned_uid/zoned_uid_019_neg.ksh | 141 ++++++
.../functional/zoned_uid/zoned_uid_020_neg.ksh | 171 +++++++
.../functional/zoned_uid/zoned_uid_021_neg.ksh | 109 +++++
.../functional/zoned_uid/zoned_uid_022_neg.ksh | 154 +++++++
.../functional/zoned_uid/zoned_uid_023_pos.ksh | 131 ++++++
.../functional/zoned_uid/zoned_uid_024_neg.ksh | 144 ++++++
.../functional/zoned_uid/zoned_uid_025_pos.ksh | 102 +++++
.../functional/zoned_uid/zoned_uid_026_pos.ksh | 112 +++++
.../functional/zoned_uid/zoned_uid_027_pos.ksh | 103 +++++
.../functional/zoned_uid/zoned_uid_028_neg.ksh | 103 +++++
.../functional/zoned_uid/zoned_uid_029_neg.ksh | 120 +++++
.../functional/zoned_uid/zoned_uid_030_pos.ksh | 183 ++++++++
.../functional/zoned_uid/zoned_uid_031_pos.ksh | 110 +++++
.../functional/zoned_uid/zoned_uid_common.kshlib | 237 ++++++++++
sys/modules/zfs/zfs_config.h | 4 +-
sys/modules/zfs/zfs_gitrev.h | 2 +-
170 files changed, 9623 insertions(+), 846 deletions(-)
diff --cc sys/contrib/openzfs/.github/workflows/zfs-arm.yml
index 000000000000,6039e4736c42..6039e4736c42
mode 000000,100644..100644
--- a/sys/contrib/openzfs/.github/workflows/zfs-arm.yml
+++ b/sys/contrib/openzfs/.github/workflows/zfs-arm.yml
diff --cc sys/contrib/openzfs/include/os/freebsd/spl/sys/zone.h
index cfe63946706b,000000000000..12c80b39dfac
mode 100644,000000..100644
--- a/sys/contrib/openzfs/include/os/freebsd/spl/sys/zone.h
+++ b/sys/contrib/openzfs/include/os/freebsd/spl/sys/zone.h
@@@ -1,68 -1,0 +1,141 @@@
+// SPDX-License-Identifier: BSD-2-Clause
+/*
+ * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _OPENSOLARIS_SYS_ZONE_H_
+#define _OPENSOLARIS_SYS_ZONE_H_
+
+#include <sys/jail.h>
++#include <sys/errno.h>
+
+/*
+ * Macros to help with zone visibility restrictions.
+ */
+
+#define GLOBAL_ZONEID 0
+
+/*
+ * Is proc in the global zone?
+ */
+#define INGLOBALZONE(proc) (!jailed((proc)->p_ucred))
+
+/*
+ * Attach the given dataset to the given jail.
+ */
+extern int zone_dataset_attach(struct ucred *, const char *, int);
+
+/*
+ * Detach the given dataset to the given jail.
+ */
+extern int zone_dataset_detach(struct ucred *, const char *, int);
+
+/*
+ * Returns true if the named pool/dataset is visible in the current zone.
+ */
+extern int zone_dataset_visible(const char *, int *);
+
+/*
+ * Safely get the hostid of the specified zone (defaults to machine's hostid
+ * if the specified zone doesn't emulate a hostid). Passing NULL retrieves
+ * the global zone's (i.e., physical system's) hostid.
+ */
+extern uint32_t zone_get_hostid(void *);
+
++/*
++ * Operations that can be authorized via zoned_uid delegation.
++ * Shared with Linux; on FreeBSD these are defined but the check
++ * always returns NOT_APPLICABLE (no user namespace support).
++ */
++typedef enum zone_uid_op {
++ ZONE_OP_CREATE,
++ ZONE_OP_SNAPSHOT,
++ ZONE_OP_CLONE,
++ ZONE_OP_DESTROY,
++ ZONE_OP_RENAME,
++ ZONE_OP_SETPROP
++} zone_uid_op_t;
++
++typedef enum zone_admin_result {
++ ZONE_ADMIN_NOT_APPLICABLE,
++ ZONE_ADMIN_ALLOWED,
++ ZONE_ADMIN_DENIED
++} zone_admin_result_t;
++
++/*
++ * FreeBSD stub: zoned_uid delegation is not applicable (no user namespaces).
++ * Always returns NOT_APPLICABLE so callers fall through to existing
++ * jail-based permission checks.
++ */
++static inline zone_admin_result_t
++zone_dataset_admin_check(const char *dataset, zone_uid_op_t op,
++ const char *aux_dataset)
++{
++ (void) dataset, (void) op, (void) aux_dataset;
++ return (ZONE_ADMIN_NOT_APPLICABLE);
++}
++
++/*
++ * Callback type for looking up zoned_uid property.
++ */
++typedef uid_t (*zone_get_zoned_uid_fn_t)(const char *dataset,
++ char *root_out, size_t root_size);
++
++/*
++ * FreeBSD stubs: zoned_uid attach/detach require user namespaces
++ * which FreeBSD does not have. Return ENXIO (consistent with the
++ * Linux fallback when CONFIG_USER_NS is not defined).
++ */
++static inline int
++zone_dataset_attach_uid(struct ucred *cred, const char *dataset, uid_t uid)
++{
++ (void) cred, (void) dataset, (void) uid;
++ return (ENXIO);
++}
++
++static inline int
++zone_dataset_detach_uid(struct ucred *cred, const char *dataset, uid_t uid)
++{
++ (void) cred, (void) dataset, (void) uid;
++ return (ENXIO);
++}
++
++/*
++ * FreeBSD stubs: no-op since zoned_uid delegation requires user namespaces.
++ */
++static inline void
++zone_register_zoned_uid_callback(zone_get_zoned_uid_fn_t fn)
++{
++ (void) fn;
++}
++
++static inline void
++zone_unregister_zoned_uid_callback(void)
++{
++}
++
+#endif /* !_OPENSOLARIS_SYS_ZONE_H_ */
diff --cc sys/contrib/openzfs/man/man1/dbufstat.1
index 000000000000,311af5e76a98..311af5e76a98
mode 000000,100644..100644
--- a/sys/contrib/openzfs/man/man1/dbufstat.1
+++ b/sys/contrib/openzfs/man/man1/dbufstat.1
diff --cc sys/contrib/openzfs/module/zfs/vdev.c
index 9def59b06727,000000000000..30639d7f4c7f
mode 100644,000000..100644
--- a/sys/contrib/openzfs/module/zfs/vdev.c
+++ b/sys/contrib/openzfs/module/zfs/vdev.c
@@@ -1,6866 -1,0 +1,6922 @@@
+// SPDX-License-Identifier: CDDL-1.0
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or https://opensource.org/licenses/CDDL-1.0.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2021 by Delphix. All rights reserved.
+ * Copyright 2017 Nexenta Systems, Inc.
+ * Copyright (c) 2014 Integros [integros.com]
+ * Copyright 2016 Toomas Soome <tsoome@me.com>
+ * Copyright 2017 Joyent, Inc.
+ * Copyright (c) 2017, Intel Corporation.
+ * Copyright (c) 2019, Datto Inc. All rights reserved.
+ * Copyright (c) 2021, 2025, Klara, Inc.
+ * Copyright (c) 2021, 2023 Hewlett Packard Enterprise Development LP.
+ * Copyright (c) 2026, Seagate Technology, LLC.
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/fm/fs/zfs.h>
+#include <sys/spa.h>
+#include <sys/spa_impl.h>
+#include <sys/bpobj.h>
+#include <sys/dmu.h>
+#include <sys/dmu_tx.h>
+#include <sys/dsl_dir.h>
+#include <sys/vdev_impl.h>
+#include <sys/vdev_rebuild.h>
+#include <sys/vdev_draid.h>
+#include <sys/uberblock_impl.h>
+#include <sys/metaslab.h>
+#include <sys/metaslab_impl.h>
+#include <sys/space_map.h>
+#include <sys/space_reftree.h>
+#include <sys/zio.h>
+#include <sys/zap.h>
+#include <sys/fs/zfs.h>
+#include <sys/arc.h>
+#include <sys/zil.h>
+#include <sys/dsl_scan.h>
+#include <sys/vdev_raidz.h>
+#include <sys/abd.h>
+#include <sys/vdev_initialize.h>
+#include <sys/vdev_trim.h>
+#include <sys/vdev_raidz.h>
+#include <sys/zvol.h>
+#include <sys/zfs_ratelimit.h>
+#include "zfs_prop.h"
+
+/*
+ * One metaslab from each (normal-class) vdev is used by the ZIL. These are
+ * called "embedded slog metaslabs", are referenced by vdev_log_mg, and are
+ * part of the spa_embedded_log_class. The metaslab with the most free space
+ * in each vdev is selected for this purpose when the pool is opened (or a
+ * vdev is added). See vdev_metaslab_init().
+ *
+ * Log blocks can be allocated from the following locations. Each one is tried
+ * in order until the allocation succeeds:
+ * 1. dedicated log vdevs, aka "slog" (spa_log_class)
+ * 2. embedded slog metaslabs (spa_embedded_log_class)
+ * 3. other metaslabs in normal vdevs (spa_normal_class)
+ *
+ * zfs_embedded_slog_min_ms disables the embedded slog if there are fewer
+ * than this number of metaslabs in the vdev. This ensures that we don't set
+ * aside an unreasonable amount of space for the ZIL. If set to less than
+ * 1 << (spa_slop_shift + 1), on small pools the usable space may be reduced
+ * (by more than 1<<spa_slop_shift) due to the embedded slog metaslab.
+ */
+static uint_t zfs_embedded_slog_min_ms = 64;
+
+/* default target for number of metaslabs per top-level vdev */
+static uint_t zfs_vdev_default_ms_count = 200;
+
+/* minimum number of metaslabs per top-level vdev */
+static uint_t zfs_vdev_min_ms_count = 16;
+
+/* practical upper limit of total metaslabs per top-level vdev */
+static uint_t zfs_vdev_ms_count_limit = 1ULL << 17;
+
+/* lower limit for metaslab size (512M) */
+static uint_t zfs_vdev_default_ms_shift = 29;
+
+/* upper limit for metaslab size (16G) */
+static uint_t zfs_vdev_max_ms_shift = 34;
+
+int vdev_validate_skip = B_FALSE;
+
+/*
+ * Since the DTL space map of a vdev is not expected to have a lot of
+ * entries, we default its block size to 4K.
+ */
+int zfs_vdev_dtl_sm_blksz = (1 << 12);
+
+/*
+ * Rate limit slow IO (delay) events to this many per second.
+ */
+static unsigned int zfs_slow_io_events_per_second = 20;
+
+/*
+ * Rate limit deadman "hung IO" events to this many per second.
+ */
+static unsigned int zfs_deadman_events_per_second = 1;
+
+/*
+ * Rate limit direct write IO verify failures to this many per scond.
+ */
+static unsigned int zfs_dio_write_verify_events_per_second = 20;
+
+/*
+ * Rate limit checksum events after this many checksum errors per second.
+ */
+static unsigned int zfs_checksum_events_per_second = 20;
+
+/*
+ * Ignore errors during scrub/resilver. Allows to work around resilver
+ * upon import when there are pool errors.
+ */
+static int zfs_scan_ignore_errors = 0;
+
+/*
+ * vdev-wide space maps that have lots of entries written to them at
+ * the end of each transaction can benefit from a higher I/O bandwidth
+ * (e.g. vdev_obsolete_sm), thus we default their block size to 128K.
+ */
+int zfs_vdev_standard_sm_blksz = (1 << 17);
+
+/*
+ * Tunable parameter for debugging or performance analysis. Setting this
+ * will cause pool corruption on power loss if a volatile out-of-order
+ * write cache is enabled.
+ */
+int zfs_nocacheflush = 0;
+
+/*
+ * Maximum and minimum ashift values that can be automatically set based on
+ * vdev's physical ashift (disk's physical sector size). While ASHIFT_MAX
+ * is higher than the maximum value, it is intentionally limited here to not
+ * excessively impact pool space efficiency. Higher ashift values may still
+ * be forced by vdev logical ashift or by user via ashift property, but won't
+ * be set automatically as a performance optimization.
+ */
+uint_t zfs_vdev_max_auto_ashift = 14;
+uint_t zfs_vdev_min_auto_ashift = ASHIFT_MIN;
+
+/*
+ * VDEV checksum verification for Direct I/O writes. This is neccessary for
+ * Linux, because anonymous pages can not be placed under write protection
+ * during Direct I/O writes.
+ */
+#if !defined(__FreeBSD__)
+uint_t zfs_vdev_direct_write_verify = 1;
+#else
+uint_t zfs_vdev_direct_write_verify = 0;
+#endif
+
+void
+vdev_dbgmsg(vdev_t *vd, const char *fmt, ...)
+{
+ va_list adx;
+ char buf[256];
+
+ va_start(adx, fmt);
+ (void) vsnprintf(buf, sizeof (buf), fmt, adx);
+ va_end(adx);
+
+ if (vd->vdev_path != NULL) {
+ zfs_dbgmsg("%s vdev '%s': %s", vd->vdev_ops->vdev_op_type,
+ vd->vdev_path, buf);
+ } else {
+ zfs_dbgmsg("%s-%llu vdev (guid %llu): %s",
+ vd->vdev_ops->vdev_op_type,
+ (u_longlong_t)vd->vdev_id,
+ (u_longlong_t)vd->vdev_guid, buf);
+ }
+}
+
+void
+vdev_dbgmsg_print_tree(vdev_t *vd, int indent)
+{
+ char state[20];
+
+ if (vd->vdev_ishole || vd->vdev_ops == &vdev_missing_ops) {
+ zfs_dbgmsg("%*svdev %llu: %s", indent, "",
+ (u_longlong_t)vd->vdev_id,
+ vd->vdev_ops->vdev_op_type);
+ return;
+ }
+
+ switch (vd->vdev_state) {
+ case VDEV_STATE_UNKNOWN:
+ (void) snprintf(state, sizeof (state), "unknown");
+ break;
+ case VDEV_STATE_CLOSED:
+ (void) snprintf(state, sizeof (state), "closed");
+ break;
+ case VDEV_STATE_OFFLINE:
+ (void) snprintf(state, sizeof (state), "offline");
+ break;
+ case VDEV_STATE_REMOVED:
+ (void) snprintf(state, sizeof (state), "removed");
+ break;
+ case VDEV_STATE_CANT_OPEN:
+ (void) snprintf(state, sizeof (state), "can't open");
+ break;
+ case VDEV_STATE_FAULTED:
+ (void) snprintf(state, sizeof (state), "faulted");
+ break;
+ case VDEV_STATE_DEGRADED:
+ (void) snprintf(state, sizeof (state), "degraded");
+ break;
+ case VDEV_STATE_HEALTHY:
+ (void) snprintf(state, sizeof (state), "healthy");
+ break;
+ default:
+ (void) snprintf(state, sizeof (state), "<state %u>",
+ (uint_t)vd->vdev_state);
+ }
+
+ zfs_dbgmsg("%*svdev %u: %s%s, guid: %llu, path: %s, %s", indent,
+ "", (int)vd->vdev_id, vd->vdev_ops->vdev_op_type,
+ vd->vdev_islog ? " (log)" : "",
+ (u_longlong_t)vd->vdev_guid,
+ vd->vdev_path ? vd->vdev_path : "N/A", state);
+
+ for (uint64_t i = 0; i < vd->vdev_children; i++)
+ vdev_dbgmsg_print_tree(vd->vdev_child[i], indent + 2);
+}
+
+char *
+vdev_rt_name(vdev_t *vd, const char *name)
+{
+ return (kmem_asprintf("{spa=%s vdev_guid=%llu %s}",
+ spa_name(vd->vdev_spa),
+ (u_longlong_t)vd->vdev_guid,
+ name));
+}
+
+static char *
+vdev_rt_name_dtl(vdev_t *vd, const char *name, vdev_dtl_type_t dtl_type)
+{
+ return (kmem_asprintf("{spa=%s vdev_guid=%llu %s[%d]}",
+ spa_name(vd->vdev_spa),
+ (u_longlong_t)vd->vdev_guid,
+ name,
+ dtl_type));
+}
+
+/*
+ * Virtual device management.
+ */
+
+static vdev_ops_t *const vdev_ops_table[] = {
+ &vdev_root_ops,
+ &vdev_raidz_ops,
+ &vdev_draid_ops,
+ &vdev_draid_spare_ops,
+ &vdev_mirror_ops,
+ &vdev_replacing_ops,
+ &vdev_spare_ops,
+ &vdev_disk_ops,
+ &vdev_file_ops,
+ &vdev_missing_ops,
+ &vdev_hole_ops,
+ &vdev_indirect_ops,
+ NULL
+};
+
+/*
+ * Given a vdev type, return the appropriate ops vector.
+ */
+static vdev_ops_t *
+vdev_getops(const char *type)
+{
+ vdev_ops_t *ops, *const *opspp;
+
+ for (opspp = vdev_ops_table; (ops = *opspp) != NULL; opspp++)
+ if (strcmp(ops->vdev_op_type, type) == 0)
+ break;
+
+ return (ops);
+}
+
+/*
+ * Given a vdev and a metaslab class, find which metaslab group we're
+ * interested in. All vdevs may belong to two different metaslab classes.
+ * Dedicated slog devices use only the primary metaslab group, rather than a
+ * separate log group. For embedded slogs, vdev_log_mg will be non-NULL and
+ * will point to a metaslab group of either embedded_log_class (for normal
+ * vdevs) or special_embedded_log_class (for special vdevs).
+ */
+metaslab_group_t *
+vdev_get_mg(vdev_t *vd, metaslab_class_t *mc)
+{
+ if ((mc == spa_embedded_log_class(vd->vdev_spa) ||
+ mc == spa_special_embedded_log_class(vd->vdev_spa)) &&
+ vd->vdev_log_mg != NULL)
+ return (vd->vdev_log_mg);
+ else
+ return (vd->vdev_mg);
+}
+
+void
+vdev_default_xlate(vdev_t *vd, const zfs_range_seg64_t *logical_rs,
+ zfs_range_seg64_t *physical_rs, zfs_range_seg64_t *remain_rs)
+{
+ (void) vd, (void) remain_rs;
+
+ physical_rs->rs_start = logical_rs->rs_start;
+ physical_rs->rs_end = logical_rs->rs_end;
+}
+
+/*
+ * Derive the enumerated allocation bias from string input.
+ * String origin is either the per-vdev zap or zpool(8).
+ */
+static vdev_alloc_bias_t
+vdev_derive_alloc_bias(const char *bias)
+{
+ vdev_alloc_bias_t alloc_bias = VDEV_BIAS_NONE;
+
+ if (strcmp(bias, VDEV_ALLOC_BIAS_LOG) == 0)
+ alloc_bias = VDEV_BIAS_LOG;
+ else if (strcmp(bias, VDEV_ALLOC_BIAS_SPECIAL) == 0)
+ alloc_bias = VDEV_BIAS_SPECIAL;
+ else if (strcmp(bias, VDEV_ALLOC_BIAS_DEDUP) == 0)
+ alloc_bias = VDEV_BIAS_DEDUP;
+
+ return (alloc_bias);
+}
+
+uint64_t
+vdev_default_psize(vdev_t *vd, uint64_t asize, uint64_t txg)
+{
+ ASSERT0(asize % (1ULL << vd->vdev_top->vdev_ashift));
+ uint64_t csize, psize = asize;
+ for (int c = 0; c < vd->vdev_children; c++) {
+ csize = vdev_asize_to_psize_txg(vd->vdev_child[c], asize, txg);
+ psize = MIN(psize, csize);
+ }
+
+ return (psize);
+}
+
+/*
+ * Default asize function: return the MAX of psize with the asize of
+ * all children. This is what's used by anything other than RAID-Z.
+ */
+uint64_t
+vdev_default_asize(vdev_t *vd, uint64_t psize, uint64_t txg)
+{
+ uint64_t asize = P2ROUNDUP(psize, 1ULL << vd->vdev_top->vdev_ashift);
+ uint64_t csize;
+
+ for (int c = 0; c < vd->vdev_children; c++) {
+ csize = vdev_psize_to_asize_txg(vd->vdev_child[c], psize, txg);
+ asize = MAX(asize, csize);
+ }
+
+ return (asize);
+}
+
+uint64_t
+vdev_default_min_asize(vdev_t *vd)
+{
+ return (vd->vdev_min_asize);
+}
+
+/*
+ * Get the minimum allocatable size. We define the allocatable size as
+ * the vdev's asize rounded to the nearest metaslab. This allows us to
+ * replace or attach devices which don't have the same physical size but
+ * can still satisfy the same number of allocations.
+ */
+uint64_t
+vdev_get_min_asize(vdev_t *vd)
+{
+ vdev_t *pvd = vd->vdev_parent;
+
+ /*
+ * If our parent is NULL (inactive spare or cache) or is the root,
+ * just return our own asize.
+ */
+ if (pvd == NULL)
+ return (vd->vdev_asize);
+
+ /*
+ * The top-level vdev just returns the allocatable size rounded
+ * to the nearest metaslab.
+ */
+ if (vd == vd->vdev_top)
+ return (P2ALIGN_TYPED(vd->vdev_asize, 1ULL << vd->vdev_ms_shift,
+ uint64_t));
+
+ return (pvd->vdev_ops->vdev_op_min_asize(pvd));
+}
+
+void
+vdev_set_min_asize(vdev_t *vd)
+{
+ vd->vdev_min_asize = vdev_get_min_asize(vd);
+
+ for (int c = 0; c < vd->vdev_children; c++)
+ vdev_set_min_asize(vd->vdev_child[c]);
+}
+
+/*
+ * Get the minimal allocation size for the top-level vdev.
+ */
+uint64_t
+vdev_get_min_alloc(vdev_t *vd)
+{
+ uint64_t min_alloc = 1ULL << vd->vdev_ashift;
+
+ if (vd->vdev_ops->vdev_op_min_alloc != NULL)
+ min_alloc = vd->vdev_ops->vdev_op_min_alloc(vd);
+
+ return (min_alloc);
+}
+
+/*
+ * Get the parity level for a top-level vdev.
+ */
+uint64_t
+vdev_get_nparity(vdev_t *vd)
+{
+ uint64_t nparity = 0;
+
+ if (vd->vdev_ops->vdev_op_nparity != NULL)
+ nparity = vd->vdev_ops->vdev_op_nparity(vd);
+
+ return (nparity);
+}
+
+static int
+vdev_prop_get_objid(vdev_t *vd, uint64_t *objid)
+{
+
+ if (vd->vdev_root_zap != 0) {
+ *objid = vd->vdev_root_zap;
+ } else if (vd->vdev_top_zap != 0) {
+ *objid = vd->vdev_top_zap;
+ } else if (vd->vdev_leaf_zap != 0) {
+ *objid = vd->vdev_leaf_zap;
+ } else {
+ return (EINVAL);
+ }
+
+ return (0);
+}
+
+static int
+vdev_prop_get_int(vdev_t *vd, vdev_prop_t prop, uint64_t *value)
+{
+ spa_t *spa = vd->vdev_spa;
+ objset_t *mos = spa->spa_meta_objset;
+ uint64_t objid;
+ int err;
+
+ if (vdev_prop_get_objid(vd, &objid) != 0)
+ return (EINVAL);
+
+ err = zap_lookup(mos, objid, vdev_prop_to_name(prop),
+ sizeof (uint64_t), 1, value);
+ if (err == ENOENT)
+ *value = vdev_prop_default_numeric(prop);
+
+ return (err);
+}
+
+static int
+vdev_prop_get_bool(vdev_t *vd, vdev_prop_t prop, boolean_t *bvalue)
+{
+ int err;
+ uint64_t ivalue;
+
+ err = vdev_prop_get_int(vd, prop, &ivalue);
+ *bvalue = ivalue != 0;
+
+ return (err);
+}
+
+/*
+ * Get the number of data disks for a top-level vdev.
+ */
+uint64_t
+vdev_get_ndisks(vdev_t *vd)
+{
+ uint64_t ndisks = 1;
+
+ if (vd->vdev_ops->vdev_op_ndisks != NULL)
+ ndisks = vd->vdev_ops->vdev_op_ndisks(vd);
+
+ return (ndisks);
+}
+
+vdev_t *
+vdev_lookup_top(spa_t *spa, uint64_t vdev)
+{
+ vdev_t *rvd = spa->spa_root_vdev;
+
+ ASSERT(spa_config_held(spa, SCL_ALL, RW_READER) != 0);
+
+ if (vdev < rvd->vdev_children) {
+ ASSERT(rvd->vdev_child[vdev] != NULL);
+ return (rvd->vdev_child[vdev]);
+ }
+
+ return (NULL);
+}
+
+vdev_t *
+vdev_lookup_by_guid(vdev_t *vd, uint64_t guid)
+{
+ vdev_t *mvd;
+
+ if (vd->vdev_guid == guid)
+ return (vd);
+
+ for (int c = 0; c < vd->vdev_children; c++)
+ if ((mvd = vdev_lookup_by_guid(vd->vdev_child[c], guid)) !=
+ NULL)
+ return (mvd);
+
+ return (NULL);
+}
+
+static int
+vdev_count_leaves_impl(vdev_t *vd)
+{
+ int n = 0;
+
+ if (vd->vdev_ops->vdev_op_leaf)
+ return (1);
+
+ for (int c = 0; c < vd->vdev_children; c++)
+ n += vdev_count_leaves_impl(vd->vdev_child[c]);
+
+ return (n);
+}
+
+int
+vdev_count_leaves(spa_t *spa)
+{
+ int rc;
+
+ spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
+ rc = vdev_count_leaves_impl(spa->spa_root_vdev);
+ spa_config_exit(spa, SCL_VDEV, FTAG);
+
+ return (rc);
+}
+
+void
+vdev_add_child(vdev_t *pvd, vdev_t *cvd)
+{
+ size_t oldsize, newsize;
+ uint64_t id = cvd->vdev_id;
+ vdev_t **newchild;
+
+ ASSERT(spa_config_held(cvd->vdev_spa, SCL_ALL, RW_WRITER) == SCL_ALL);
+ ASSERT0P(cvd->vdev_parent);
+
+ cvd->vdev_parent = pvd;
+
+ if (pvd == NULL)
+ return;
+
+ ASSERT(id >= pvd->vdev_children || pvd->vdev_child[id] == NULL);
+
+ oldsize = pvd->vdev_children * sizeof (vdev_t *);
+ pvd->vdev_children = MAX(pvd->vdev_children, id + 1);
+ newsize = pvd->vdev_children * sizeof (vdev_t *);
+
+ newchild = kmem_alloc(newsize, KM_SLEEP);
+ if (pvd->vdev_child != NULL) {
+ memcpy(newchild, pvd->vdev_child, oldsize);
+ kmem_free(pvd->vdev_child, oldsize);
+ }
+
+ pvd->vdev_child = newchild;
+ pvd->vdev_child[id] = cvd;
+ pvd->vdev_nonrot &= cvd->vdev_nonrot;
+
+ cvd->vdev_top = (pvd->vdev_top ? pvd->vdev_top: cvd);
+ ASSERT0P(cvd->vdev_top->vdev_parent->vdev_parent);
+
+ /*
+ * Walk up all ancestors to update guid sum.
+ */
+ for (; pvd != NULL; pvd = pvd->vdev_parent)
+ pvd->vdev_guid_sum += cvd->vdev_guid_sum;
+
+ if (cvd->vdev_ops->vdev_op_leaf) {
*** 7530 LINES SKIPPED ***