git: d411c1d696ef - main - zfs: merge openzfs/zfs@d96e29576

From: Martin Matuska <mm_at_FreeBSD.org>
Date: Wed, 03 May 2023 10:25:56 UTC
The branch main has been updated by mm:

URL: https://cgit.FreeBSD.org/src/commit/?id=d411c1d696ef35d60f8c3564e5eef7aeafa2fece

commit d411c1d696ef35d60f8c3564e5eef7aeafa2fece
Merge: 16303d2ba6b0 d96e29576c89
Author:     Martin Matuska <mm@FreeBSD.org>
AuthorDate: 2023-05-03 10:04:55 +0000
Commit:     Martin Matuska <mm@FreeBSD.org>
CommitDate: 2023-05-03 10:04:55 +0000

    zfs: merge openzfs/zfs@d96e29576
    
    Notable upstream pull request merges:
    
      #11680 Add support for zpool user properties
      #14145 Storage device expansion "silently" fails on degraded vdev
      #14405 Create zap for root vdev
      #14659 Allow MMP to bypass waiting for other threads
      #14674 Miscellaneous FreBSD compilation bugfixes
      #14692 Fix some signedness issues in arc_evict()
      #14702 Fix typo in check_clones()
      #14715 module: small fixes for FreeBSD/aarch64
      #14716 Trim needless zeroes from checksum events
      #14719 vdev: expose zfs_vdev_max_ms_shift as a module parameter
      #14722 Fix "Detach spare vdev in case if resilvering does not happen"
      #14723 freebsd clone range fixes
      #14728 Fix BLAKE3 aarch64 assembly for FreeBSD and macOS
      #14735 Fix in check_filesystem()
      #14739 Fix data corruption when cloning embedded blocks
      #14758 Fix VERIFY(!zil_replaying(zilog, tx)) panic
      #14761 Revert "ZFS_IOC_COUNT_FILLED does unnecessary txg_wait_synced()"
      #14774 FreeBSD .zfs fixups
      #14776 FreeBSD: make zfs_vfs_held() definition consistent with declaration
      #14779 powerpc64: Support ELFv2 asm on Big Endian
      #14788 FreeBSD: add missing vop_fplookup assignments
      #14789 PAM: support the authentication facility
      #14790 Revert "Fix data race between zil_commit() and zil_suspend()"
      #14795 Fix positive ABD size assertion in abd_verify()
      #14798 Mark TX_COMMIT transaction with TXG_NOTHROTTLE
      #14804 Correct ABD size for split block ZIOs
      #14806 Use correct block pointer in block cloning case.
      #14808 blake3: fix up bogus checksums in face of cpu migration
    
    Obtained from:  OpenZFS
    OpenZFS commit: d96e29576c89e6e547cb82b477651d2b85ea0fed

 lib/libbe/be.c                                     |  8 +-
 lib/libbe/be_access.c                              |  6 +-
 lib/libbe/be_info.c                                |  4 +-
 sys/contrib/openzfs/META                           |  2 +-
 sys/contrib/openzfs/cmd/zdb/zdb.c                  | 39 ++++++++-
 sys/contrib/openzfs/cmd/zfs/zfs_iter.c             |  6 +-
 sys/contrib/openzfs/cmd/zfs/zfs_main.c             | 47 ++++++----
 sys/contrib/openzfs/cmd/zpool/zpool_main.c         | 99 ++++++++++++++++------
 sys/contrib/openzfs/config/kernel-acl.m4           | 34 ++++++--
 .../openzfs/config/kernel-generic_fillattr.m4      | 33 ++++++--
 sys/contrib/openzfs/config/kernel-inode-create.m4  | 41 +++++++--
 sys/contrib/openzfs/config/kernel-inode-getattr.m4 | 63 ++++++++++----
 .../openzfs/config/kernel-inode-permission.m4      | 35 ++++++--
 sys/contrib/openzfs/config/kernel-inode-setattr.m4 | 87 +++++++++++++++++++
 .../openzfs/config/kernel-is_owner_or_cap.m4       | 25 +++++-
 sys/contrib/openzfs/config/kernel-mkdir.m4         | 55 +++++++++---
 sys/contrib/openzfs/config/kernel-mknod.m4         | 34 +++++++-
 sys/contrib/openzfs/config/kernel-rename.m4        | 56 ++++++++----
 .../openzfs/config/kernel-setattr-prepare.m4       | 44 +++++++---
 sys/contrib/openzfs/config/kernel-symlink.m4       | 33 ++++++--
 sys/contrib/openzfs/config/kernel-tmpfile.m4       | 33 ++++++--
 sys/contrib/openzfs/config/kernel-writepage_t.m4   | 26 ++++++
 sys/contrib/openzfs/config/kernel-xattr-handler.m4 | 91 ++++++++++++--------
 sys/contrib/openzfs/config/kernel.m4               |  8 +-
 .../openzfs/contrib/debian/control.modules.in      |  4 +-
 .../contrib/dracut/90zfs/module-setup.sh.in        |  3 +
 .../contrib/dracut/90zfs/zfs-env-bootfs.service.in | 15 +++-
 .../contrib/dracut/90zfs/zfs-generator.sh.in       | 67 ++-------------
 .../openzfs/contrib/dracut/90zfs/zfs-lib.sh.in     |  2 +-
 .../90zfs/zfs-nonroot-necessities.service.in       | 20 +++++
 .../dracut/90zfs/zfs-rollback-bootfs.service.in    |  3 +-
 .../dracut/90zfs/zfs-snapshot-bootfs.service.in    |  3 +-
 sys/contrib/openzfs/contrib/dracut/Makefile.am     |  1 +
 sys/contrib/openzfs/contrib/initramfs/README.md    | 25 +++++-
 sys/contrib/openzfs/contrib/initramfs/hooks/zfs.in |  3 +
 sys/contrib/openzfs/contrib/initramfs/scripts/zfs  | 10 +++
 .../openzfs/contrib/pam_zfs_key/pam_zfs_key.c      | 67 +++++++++++----
 sys/contrib/openzfs/include/libzfs.h               | 26 ++++--
 sys/contrib/openzfs/include/libzutil.h             |  1 +
 .../include/os/freebsd/spl/sys/simd_aarch64.h      |  6 +-
 .../openzfs/include/os/freebsd/spl/sys/types.h     |  2 +-
 .../include/os/freebsd/zfs/sys/zfs_vnops_os.h      | 10 +--
 .../include/os/linux/kernel/linux/blkdev_compat.h  |  2 +-
 .../include/os/linux/kernel/linux/vfs_compat.h     | 21 ++++-
 .../include/os/linux/kernel/linux/xattr_compat.h   | 17 +++-
 .../openzfs/include/os/linux/spl/sys/cred.h        | 30 ++++---
 .../openzfs/include/os/linux/spl/sys/isa_defs.h    | 18 +++-
 .../openzfs/include/os/linux/spl/sys/types.h       | 15 +++-
 .../openzfs/include/os/linux/zfs/sys/policy.h      |  6 +-
 .../openzfs/include/os/linux/zfs/sys/trace_zil.h   |  8 ++
 .../include/os/linux/zfs/sys/zfs_vnops_os.h        | 15 ++--
 sys/contrib/openzfs/include/os/linux/zfs/sys/zpl.h | 13 ++-
 sys/contrib/openzfs/include/sys/dmu_objset.h       |  7 ++
 sys/contrib/openzfs/include/sys/dnode.h            | 10 ++-
 sys/contrib/openzfs/include/sys/fs/zfs.h           |  1 +
 sys/contrib/openzfs/include/sys/spa.h              |  3 +
 sys/contrib/openzfs/include/sys/vdev_impl.h        |  1 +
 sys/contrib/openzfs/include/sys/zfs_acl.h          | 10 +--
 sys/contrib/openzfs/include/zfeature_common.h      |  1 +
 .../openzfs/lib/libspl/include/sys/isa_defs.h      | 18 +++-
 sys/contrib/openzfs/lib/libzfs/libzfs.abi          | 81 +++++++++++++++---
 sys/contrib/openzfs/lib/libzfs/libzfs_changelist.c |  7 +-
 sys/contrib/openzfs/lib/libzfs/libzfs_crypto.c     |  2 +-
 sys/contrib/openzfs/lib/libzfs/libzfs_dataset.c    | 18 ++--
 sys/contrib/openzfs/lib/libzfs/libzfs_iter.c       | 75 +++++++++++++---
 sys/contrib/openzfs/lib/libzfs/libzfs_mount.c      |  4 +-
 sys/contrib/openzfs/lib/libzfs/libzfs_pool.c       |  1 +
 sys/contrib/openzfs/lib/libzfs/libzfs_sendrecv.c   | 21 ++---
 sys/contrib/openzfs/lib/libzfs/libzfs_util.c       |  2 +-
 sys/contrib/openzfs/lib/libzutil/zutil_import.c    |  5 +-
 sys/contrib/openzfs/man/man4/zfs.4                 | 10 ++-
 sys/contrib/openzfs/man/man7/dracut.zfs.7          | 16 ++--
 sys/contrib/openzfs/man/man7/zfsprops.7            | 12 ++-
 sys/contrib/openzfs/man/man7/zpool-features.7      | 16 ++++
 sys/contrib/openzfs/man/man7/zpoolconcepts.7       | 64 +++++++-------
 sys/contrib/openzfs/module/Makefile.bsd            | 10 +++
 sys/contrib/openzfs/module/lua/ldo.c               |  2 +
 sys/contrib/openzfs/module/lua/setjmp/setjmp.S     |  2 +
 .../openzfs/module/lua/setjmp/setjmp_loongarch64.S | 82 ++++++++++++++++++
 .../openzfs/module/os/freebsd/zfs/sysctl_os.c      |  8 --
 .../openzfs/module/os/freebsd/zfs/zfs_acl.c        | 10 +--
 .../openzfs/module/os/freebsd/zfs/zfs_vnops_os.c   | 15 ++--
 sys/contrib/openzfs/module/os/linux/spl/spl-cred.c | 12 +++
 sys/contrib/openzfs/module/os/linux/zfs/policy.c   | 13 +--
 sys/contrib/openzfs/module/os/linux/zfs/zfs_acl.c  | 36 ++++----
 sys/contrib/openzfs/module/os/linux/zfs/zfs_dir.c  |  4 +-
 .../openzfs/module/os/linux/zfs/zfs_ioctl_os.c     |  4 +
 .../openzfs/module/os/linux/zfs/zfs_vfsops.c       |  4 +-
 .../openzfs/module/os/linux/zfs/zfs_vnops_os.c     | 35 ++++----
 .../openzfs/module/os/linux/zfs/zfs_znode.c        |  2 +-
 .../openzfs/module/os/linux/zfs/zpl_ctldir.c       | 53 +++++++++---
 sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c | 38 +++++++--
 .../openzfs/module/os/linux/zfs/zpl_inode.c        | 77 ++++++++++++-----
 .../openzfs/module/os/linux/zfs/zpl_xattr.c        | 25 +++---
 .../openzfs/module/zcommon/zfeature_common.c       |  6 ++
 sys/contrib/openzfs/module/zfs/arc.c               |  8 +-
 sys/contrib/openzfs/module/zfs/dbuf.c              |  3 +-
 sys/contrib/openzfs/module/zfs/mmp.c               |  2 +-
 sys/contrib/openzfs/module/zfs/spa.c               | 16 +++-
 sys/contrib/openzfs/module/zfs/spa_errlog.c        | 12 +--
 sys/contrib/openzfs/module/zfs/spa_misc.c          | 29 ++++++-
 sys/contrib/openzfs/module/zfs/vdev.c              | 50 +++++++++--
 sys/contrib/openzfs/module/zfs/vdev_indirect.c     |  7 +-
 sys/contrib/openzfs/module/zfs/vdev_label.c        |  6 ++
 sys/contrib/openzfs/module/zfs/vdev_queue.c        |  3 +
 sys/contrib/openzfs/module/zfs/zfs_fm.c            | 10 +--
 sys/contrib/openzfs/module/zfs/zfs_replay.c        | 14 +--
 sys/contrib/openzfs/module/zfs/zfs_vnops.c         |  4 +-
 sys/contrib/openzfs/module/zfs/zil.c               |  9 +-
 sys/contrib/openzfs/tests/runfiles/common.run      | 19 +++--
 sys/contrib/openzfs/tests/runfiles/linux.run       |  6 +-
 .../tests/test-runner/bin/test-runner.py.in        | 34 ++++----
 .../openzfs/tests/zfs-tests/tests/Makefile.am      |  4 +
 .../functional/cli_root/zdb/zdb_encrypted.ksh      |  4 +-
 .../functional/cli_root/zpool_get/vdev_get.cfg     | 73 ++++++++++++++++
 .../cli_root/zpool_get/vdev_get_001_pos.ksh        | 62 ++++++++++++++
 .../functional/cli_root/zpool_get/zpool_get.cfg    |  1 +
 .../cli_root/zpool_set/vdev_set_001_pos.ksh        | 52 ++++++++++++
 .../tests/functional/rsend/send-c_volume.ksh       |  9 +-
 .../tests/functional/vdev_zaps/vdev_zaps.kshlib    | 13 +++
 .../functional/vdev_zaps/vdev_zaps_001_pos.ksh     |  3 +-
 .../functional/vdev_zaps/vdev_zaps_002_pos.ksh     |  1 +
 .../functional/vdev_zaps/vdev_zaps_003_pos.ksh     |  1 +
 .../functional/vdev_zaps/vdev_zaps_004_pos.ksh     |  1 +
 .../functional/vdev_zaps/vdev_zaps_005_pos.ksh     |  1 +
 .../functional/vdev_zaps/vdev_zaps_006_pos.ksh     |  1 +
 .../functional/vdev_zaps/vdev_zaps_007_pos.ksh     |  1 +
 .../functional/zvol/zvol_misc/zvol_misc_trim.ksh   |  4 +-
 sys/modules/zfs/zfs_config.h                       | 64 ++++++++++++--
 sys/modules/zfs/zfs_gitrev.h                       |  2 +-
 130 files changed, 2034 insertions(+), 640 deletions(-)

diff --cc lib/libbe/be.c
index a5e053cb82df,000000000000..a27025684305
mode 100644,000000..100644
--- a/lib/libbe/be.c
+++ b/lib/libbe/be.c
@@@ -1,1309 -1,0 +1,1309 @@@
 +/*-
 + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
 + *
 + * Copyright (c) 2017 Kyle J. Kneitinger <kyle@kneit.in>
 + *
 + * Redistribution and use in source and binary forms, with or without
 + * modification, are permitted provided that the following conditions
 + * are met:
 + * 1. Redistributions of source code must retain the above copyright
 + *    notice, this list of conditions and the following disclaimer.
 + * 2. Redistributions in binary form must reproduce the above copyright
 + *    notice, this list of conditions and the following disclaimer in the
 + *    documentation and/or other materials provided with the distribution.
 + *
 + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 + * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 + * SUCH DAMAGE.
 + */
 +
 +#include <sys/cdefs.h>
 +__FBSDID("$FreeBSD$");
 +
 +#include <sys/param.h>
 +#include <sys/mount.h>
 +#include <sys/stat.h>
 +#include <sys/ucred.h>
 +#include <sys/queue.h>
 +#include <sys/zfs_context.h>
 +#include <sys/mntent.h>
 +#include <sys/zfs_ioctl.h>
 +
 +#include <libzutil.h>
 +#include <ctype.h>
 +#include <libgen.h>
 +#include <libzfs_core.h>
 +#include <libzfs_impl.h>
 +#include <stdio.h>
 +#include <stdlib.h>
 +#include <time.h>
 +#include <unistd.h>
 +#include <libzfsbootenv.h>
 +
 +#include "be.h"
 +#include "be_impl.h"
 +
 +struct promote_entry {
 +	char				name[BE_MAXPATHLEN];
 +	SLIST_ENTRY(promote_entry)	link;
 +};
 +
 +struct be_destroy_data {
 +	libbe_handle_t			*lbh;
 +	char				target_name[BE_MAXPATHLEN];
 +	char				*snapname;
 +	SLIST_HEAD(, promote_entry)	promotelist;
 +};
 +
 +#if SOON
 +static int be_create_child_noent(libbe_handle_t *lbh, const char *active,
 +    const char *child_path);
 +static int be_create_child_cloned(libbe_handle_t *lbh, const char *active);
 +#endif
 +
 +/* Arbitrary... should tune */
 +#define	BE_SNAP_SERIAL_MAX	1024
 +
 +/*
 + * Iterator function for locating the rootfs amongst the children of the
 + * zfs_be_root set by loader(8).  data is expected to be a libbe_handle_t *.
 + */
 +static int
 +be_locate_rootfs(libbe_handle_t *lbh)
 +{
 +	struct statfs sfs;
 +	struct mnttab entry;
 +	zfs_handle_t *zfs;
 +
 +	/*
 +	 * Check first if root is ZFS; if not, we'll bail on rootfs capture.
 +	 * Unfortunately needed because zfs_path_to_zhandle will emit to
 +	 * stderr if / isn't actually a ZFS filesystem, which we'd like
 +	 * to avoid.
 +	 */
 +	if (statfs("/", &sfs) == 0) {
 +		statfs2mnttab(&sfs, &entry);
 +		if (strcmp(entry.mnt_fstype, MNTTYPE_ZFS) != 0)
 +			return (1);
 +	} else
 +		return (1);
 +	zfs = zfs_path_to_zhandle(lbh->lzh, "/", ZFS_TYPE_FILESYSTEM);
 +	if (zfs == NULL)
 +		return (1);
 +
 +	strlcpy(lbh->rootfs, zfs_get_name(zfs), sizeof(lbh->rootfs));
 +	zfs_close(zfs);
 +	return (0);
 +}
 +
 +/*
 + * Initializes the libbe context to operate in the root boot environment
 + * dataset, for example, zroot/ROOT.
 + */
 +libbe_handle_t *
 +libbe_init(const char *root)
 +{
 +	char altroot[MAXPATHLEN];
 +	libbe_handle_t *lbh;
 +	char *poolname, *pos;
 +	int pnamelen;
 +
 +	lbh = NULL;
 +	poolname = pos = NULL;
 +
 +	if ((lbh = calloc(1, sizeof(libbe_handle_t))) == NULL)
 +		goto err;
 +
 +	if ((lbh->lzh = libzfs_init()) == NULL)
 +		goto err;
 +
 +	/*
 +	 * Grab rootfs, we'll work backwards from there if an optional BE root
 +	 * has not been passed in.
 +	 */
 +	if (be_locate_rootfs(lbh) != 0) {
 +		if (root == NULL)
 +			goto err;
 +		*lbh->rootfs = '\0';
 +	}
 +	if (root == NULL) {
 +		/* Strip off the final slash from rootfs to get the be root */
 +		strlcpy(lbh->root, lbh->rootfs, sizeof(lbh->root));
 +		pos = strrchr(lbh->root, '/');
 +		if (pos == NULL)
 +			goto err;
 +		*pos = '\0';
 +	} else
 +		strlcpy(lbh->root, root, sizeof(lbh->root));
 +
 +	if ((pos = strchr(lbh->root, '/')) == NULL)
 +		goto err;
 +
 +	pnamelen = pos - lbh->root;
 +	poolname = malloc(pnamelen + 1);
 +	if (poolname == NULL)
 +		goto err;
 +
 +	strlcpy(poolname, lbh->root, pnamelen + 1);
 +	if ((lbh->active_phandle = zpool_open(lbh->lzh, poolname)) == NULL)
 +		goto err;
 +	free(poolname);
 +	poolname = NULL;
 +
 +	if (zpool_get_prop(lbh->active_phandle, ZPOOL_PROP_BOOTFS, lbh->bootfs,
 +	    sizeof(lbh->bootfs), NULL, true) != 0)
 +		goto err;
 +
 +	if (zpool_get_prop(lbh->active_phandle, ZPOOL_PROP_ALTROOT,
 +	    altroot, sizeof(altroot), NULL, true) == 0 &&
 +	    strcmp(altroot, "-") != 0)
 +		lbh->altroot_len = strlen(altroot);
 +
 +	return (lbh);
 +err:
 +	if (lbh != NULL) {
 +		if (lbh->active_phandle != NULL)
 +			zpool_close(lbh->active_phandle);
 +		if (lbh->lzh != NULL)
 +			libzfs_fini(lbh->lzh);
 +		free(lbh);
 +	}
 +	free(poolname);
 +	return (NULL);
 +}
 +
 +
 +/*
 + * Free memory allocated by libbe_init()
 + */
 +void
 +libbe_close(libbe_handle_t *lbh)
 +{
 +
 +	if (lbh->active_phandle != NULL)
 +		zpool_close(lbh->active_phandle);
 +	libzfs_fini(lbh->lzh);
 +	free(lbh);
 +}
 +
 +/*
 + * Proxy through to libzfs for the moment.
 + */
 +void
 +be_nicenum(uint64_t num, char *buf, size_t buflen)
 +{
 +
 +	zfs_nicenum(num, buf, buflen);
 +}
 +
 +static bool
 +be_should_promote_clones(zfs_handle_t *zfs_hdl, struct be_destroy_data *bdd)
 +{
 +	char *atpos;
 +
 +	if (zfs_get_type(zfs_hdl) != ZFS_TYPE_SNAPSHOT)
 +		return (false);
 +
 +	/*
 +	 * If we're deleting a snapshot, we need to make sure we only promote
 +	 * clones that are derived from one of the snapshots we're deleting,
 +	 * rather than that of a snapshot we're not touching.  This keeps stuff
 +	 * in a consistent state, making sure that we don't error out unless
 +	 * we really need to.
 +	 */
 +	if (bdd->snapname == NULL)
 +		return (true);
 +
 +	atpos = strchr(zfs_get_name(zfs_hdl), '@');
 +	return (strcmp(atpos + 1, bdd->snapname) == 0);
 +}
 +
 +/*
 + * This is executed from be_promote_dependent_clones via zfs_iter_dependents,
 + * It checks if the dependent type is a snapshot then attempts to find any
 + * clones associated with it. Any clones not related to the destroy target are
 + * added to the promote list.
 + */
 +static int
 +be_dependent_clone_cb(zfs_handle_t *zfs_hdl, void *data)
 +{
 +	int err;
 +	bool found;
 +	const char *name;
 +	struct nvlist *nvl;
 +	struct nvpair *nvp;
 +	struct be_destroy_data *bdd;
 +	struct promote_entry *entry, *newentry;
 +
 +	nvp = NULL;
 +	err = 0;
 +	bdd = (struct be_destroy_data *)data;
 +
 +	if (be_should_promote_clones(zfs_hdl, bdd) &&
 +	    (nvl = zfs_get_clones_nvl(zfs_hdl)) != NULL) {
 +		while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
 +			name = nvpair_name(nvp);
 +
 +			/*
 +			 * Skip if the clone is equal to, or a child of, the
 +			 * destroy target.
 +			 */
 +			if (strncmp(name, bdd->target_name,
 +			    strlen(bdd->target_name)) == 0 ||
 +			    strstr(name, bdd->target_name) == name) {
 +				continue;
 +			}
 +
 +			found = false;
 +			SLIST_FOREACH(entry, &bdd->promotelist, link) {
 +				if (strcmp(entry->name, name) == 0) {
 +					found = true;
 +					break;
 +				}
 +			}
 +
 +			if (found)
 +				continue;
 +
 +			newentry = malloc(sizeof(struct promote_entry));
 +			if (newentry == NULL) {
 +				err = ENOMEM;
 +				break;
 +			}
 +
 +#define	BE_COPY_NAME(entry, src)	\
 +	strlcpy((entry)->name, (src), sizeof((entry)->name))
 +			if (BE_COPY_NAME(newentry, name) >=
 +			    sizeof(newentry->name)) {
 +				/* Shouldn't happen. */
 +				free(newentry);
 +				err = ENAMETOOLONG;
 +				break;
 +			}
 +#undef BE_COPY_NAME
 +
 +			/*
 +			 * We're building up a SLIST here to make sure both that
 +			 * we get the order right and so that we don't
 +			 * inadvertently observe the wrong state by promoting
 +			 * datasets while we're still walking the tree.  The
 +			 * latter can lead to situations where we promote a BE
 +			 * then effectively demote it again.
 +			 */
 +			SLIST_INSERT_HEAD(&bdd->promotelist, newentry, link);
 +		}
 +		nvlist_free(nvl);
 +	}
 +	zfs_close(zfs_hdl);
 +	return (err);
 +}
 +
 +/*
 + * This is called before a destroy, so that any datasets(environments) that are
 + * dependent on this one get promoted before destroying the target.
 + */
 +static int
 +be_promote_dependent_clones(zfs_handle_t *zfs_hdl, struct be_destroy_data *bdd)
 +{
 +	int err;
 +	zfs_handle_t *clone;
 +	struct promote_entry *entry;
 +
 +	snprintf(bdd->target_name, BE_MAXPATHLEN, "%s/", zfs_get_name(zfs_hdl));
- 	err = zfs_iter_dependents(zfs_hdl, 0, true, be_dependent_clone_cb, bdd);
++	err = zfs_iter_dependents(zfs_hdl, true, be_dependent_clone_cb, bdd);
 +
 +	/*
 +	 * Drain the list and walk away from it if we're only deleting a
 +	 * snapshot.
 +	 */
 +	if (bdd->snapname != NULL && !SLIST_EMPTY(&bdd->promotelist))
 +		err = BE_ERR_HASCLONES;
 +	while (!SLIST_EMPTY(&bdd->promotelist)) {
 +		entry = SLIST_FIRST(&bdd->promotelist);
 +		SLIST_REMOVE_HEAD(&bdd->promotelist, link);
 +
 +#define	ZFS_GRAB_CLONE()	\
 +	zfs_open(bdd->lbh->lzh, entry->name, ZFS_TYPE_FILESYSTEM)
 +		/*
 +		 * Just skip this part on error, we still want to clean up the
 +		 * promotion list after the first error.  We'll then preserve it
 +		 * all the way back.
 +		 */
 +		if (err == 0 && (clone = ZFS_GRAB_CLONE()) != NULL) {
 +			err = zfs_promote(clone);
 +			if (err != 0)
 +				err = BE_ERR_DESTROYMNT;
 +			zfs_close(clone);
 +		}
 +#undef ZFS_GRAB_CLONE
 +		free(entry);
 +	}
 +
 +	return (err);
 +}
 +
 +static int
 +be_destroy_cb(zfs_handle_t *zfs_hdl, void *data)
 +{
 +	char path[BE_MAXPATHLEN];
 +	struct be_destroy_data *bdd;
 +	zfs_handle_t *snap;
 +	int err;
 +
 +	bdd = (struct be_destroy_data *)data;
 +	if (bdd->snapname == NULL) {
- 		err = zfs_iter_children(zfs_hdl, 0, be_destroy_cb, data);
++		err = zfs_iter_children(zfs_hdl, be_destroy_cb, data);
 +		if (err != 0)
 +			return (err);
 +		return (zfs_destroy(zfs_hdl, false));
 +	}
 +	/* If we're dealing with snapshots instead, delete that one alone */
- 	err = zfs_iter_filesystems(zfs_hdl, 0, be_destroy_cb, data);
++	err = zfs_iter_filesystems(zfs_hdl, be_destroy_cb, data);
 +	if (err != 0)
 +		return (err);
 +	/*
 +	 * This part is intentionally glossing over any potential errors,
 +	 * because there's a lot less potential for errors when we're cleaning
 +	 * up snapshots rather than a full deep BE.  The primary error case
 +	 * here being if the snapshot doesn't exist in the first place, which
 +	 * the caller will likely deem insignificant as long as it doesn't
 +	 * exist after the call.  Thus, such a missing snapshot shouldn't jam
 +	 * up the destruction.
 +	 */
 +	snprintf(path, sizeof(path), "%s@%s", zfs_get_name(zfs_hdl),
 +	    bdd->snapname);
 +	if (!zfs_dataset_exists(bdd->lbh->lzh, path, ZFS_TYPE_SNAPSHOT))
 +		return (0);
 +	snap = zfs_open(bdd->lbh->lzh, path, ZFS_TYPE_SNAPSHOT);
 +	if (snap != NULL)
 +		zfs_destroy(snap, false);
 +	return (0);
 +}
 +
 +#define	BE_DESTROY_WANTORIGIN	(BE_DESTROY_ORIGIN | BE_DESTROY_AUTOORIGIN)
 +/*
 + * Destroy the boot environment or snapshot specified by the name
 + * parameter. Options are or'd together with the possible values:
 + * BE_DESTROY_FORCE : forces operation on mounted datasets
 + * BE_DESTROY_ORIGIN: destroy the origin snapshot as well
 + */
 +static int
 +be_destroy_internal(libbe_handle_t *lbh, const char *name, int options,
 +    bool odestroyer)
 +{
 +	struct be_destroy_data bdd;
 +	char origin[BE_MAXPATHLEN], path[BE_MAXPATHLEN];
 +	zfs_handle_t *fs;
 +	char *snapdelim;
 +	int err, force, mounted;
 +	size_t rootlen;
 +
 +	bdd.lbh = lbh;
 +	bdd.snapname = NULL;
 +	SLIST_INIT(&bdd.promotelist);
 +	force = options & BE_DESTROY_FORCE;
 +	*origin = '\0';
 +
 +	be_root_concat(lbh, name, path);
 +
 +	if ((snapdelim = strchr(path, '@')) == NULL) {
 +		if (!zfs_dataset_exists(lbh->lzh, path, ZFS_TYPE_FILESYSTEM))
 +			return (set_error(lbh, BE_ERR_NOENT));
 +
 +		if (strcmp(path, lbh->rootfs) == 0 ||
 +		    strcmp(path, lbh->bootfs) == 0)
 +			return (set_error(lbh, BE_ERR_DESTROYACT));
 +
 +		fs = zfs_open(lbh->lzh, path, ZFS_TYPE_FILESYSTEM);
 +		if (fs == NULL)
 +			return (set_error(lbh, BE_ERR_ZFSOPEN));
 +
 +		/* Don't destroy a mounted dataset unless force is specified */
 +		if ((mounted = zfs_is_mounted(fs, NULL)) != 0) {
 +			if (force) {
 +				zfs_unmount(fs, NULL, 0);
 +			} else {
 +				free(bdd.snapname);
 +				return (set_error(lbh, BE_ERR_DESTROYMNT));
 +			}
 +		}
 +	} else {
 +		/*
 +		 * If we're initially destroying a snapshot, origin options do
 +		 * not make sense.  If we're destroying the origin snapshot of
 +		 * a BE, we want to maintain the options in case we need to
 +		 * fake success after failing to promote.
 +		 */
 +		if (!odestroyer)
 +			options &= ~BE_DESTROY_WANTORIGIN;
 +		if (!zfs_dataset_exists(lbh->lzh, path, ZFS_TYPE_SNAPSHOT))
 +			return (set_error(lbh, BE_ERR_NOENT));
 +
 +		bdd.snapname = strdup(snapdelim + 1);
 +		if (bdd.snapname == NULL)
 +			return (set_error(lbh, BE_ERR_NOMEM));
 +		*snapdelim = '\0';
 +		fs = zfs_open(lbh->lzh, path, ZFS_TYPE_DATASET);
 +		if (fs == NULL) {
 +			free(bdd.snapname);
 +			return (set_error(lbh, BE_ERR_ZFSOPEN));
 +		}
 +	}
 +
 +	/*
 +	 * Whether we're destroying a BE or a single snapshot, we need to walk
 +	 * the tree of what we're going to destroy and promote everything in our
 +	 * path so that we can make it happen.
 +	 */
 +	if ((err = be_promote_dependent_clones(fs, &bdd)) != 0) {
 +		free(bdd.snapname);
 +
 +		/*
 +		 * If we're just destroying the origin of some other dataset
 +		 * we were invoked to destroy, then we just ignore
 +		 * BE_ERR_HASCLONES and return success unless the caller wanted
 +		 * to force the issue.
 +		 */
 +		if (odestroyer && err == BE_ERR_HASCLONES &&
 +		    (options & BE_DESTROY_AUTOORIGIN) != 0)
 +			return (0);
 +		return (set_error(lbh, err));
 +	}
 +
 +	/*
 +	 * This was deferred until after we promote all of the derivatives so
 +	 * that we grab the new origin after everything's settled down.
 +	 */
 +	if ((options & BE_DESTROY_WANTORIGIN) != 0 &&
 +	    zfs_prop_get(fs, ZFS_PROP_ORIGIN, origin, sizeof(origin),
 +	    NULL, NULL, 0, 1) != 0 &&
 +	    (options & BE_DESTROY_ORIGIN) != 0)
 +		return (set_error(lbh, BE_ERR_NOORIGIN));
 +
 +	/*
 +	 * If the caller wants auto-origin destruction and the origin
 +	 * name matches one of our automatically created snapshot names
 +	 * (i.e. strftime("%F-%T") with a serial at the end), then
 +	 * we'll set the DESTROY_ORIGIN flag and nuke it
 +	 * be_is_auto_snapshot_name is exported from libbe(3) so that
 +	 * the caller can determine if it needs to warn about the origin
 +	 * not being destroyed or not.
 +	 */
 +	if ((options & BE_DESTROY_AUTOORIGIN) != 0 && *origin != '\0' &&
 +	    be_is_auto_snapshot_name(lbh, origin))
 +		options |= BE_DESTROY_ORIGIN;
 +
 +	err = be_destroy_cb(fs, &bdd);
 +	zfs_close(fs);
 +	free(bdd.snapname);
 +	if (err != 0) {
 +		/* Children are still present or the mount is referenced */
 +		if (err == EBUSY)
 +			return (set_error(lbh, BE_ERR_DESTROYMNT));
 +		return (set_error(lbh, BE_ERR_UNKNOWN));
 +	}
 +
 +	if ((options & BE_DESTROY_ORIGIN) == 0)
 +		return (0);
 +
 +	/* The origin can't possibly be shorter than the BE root */
 +	rootlen = strlen(lbh->root);
 +	if (*origin == '\0' || strlen(origin) <= rootlen + 1)
 +		return (set_error(lbh, BE_ERR_INVORIGIN));
 +
 +	/*
 +	 * We'll be chopping off the BE root and running this back through
 +	 * be_destroy, so that we properly handle the origin snapshot whether
 +	 * it be that of a deep BE or not.
 +	 */
 +	if (strncmp(origin, lbh->root, rootlen) != 0 || origin[rootlen] != '/')
 +		return (0);
 +
 +	return (be_destroy_internal(lbh, origin + rootlen + 1,
 +	    options & ~BE_DESTROY_ORIGIN, true));
 +}
 +
 +int
 +be_destroy(libbe_handle_t *lbh, const char *name, int options)
 +{
 +
 +	/*
 +	 * The consumer must not set both BE_DESTROY_AUTOORIGIN and
 +	 * BE_DESTROY_ORIGIN.  Internally, we'll set the latter from the former.
 +	 * The latter should imply that we must succeed at destroying the
 +	 * origin, or complain otherwise.
 +	 */
 +	if ((options & BE_DESTROY_WANTORIGIN) == BE_DESTROY_WANTORIGIN)
 +		return (set_error(lbh, BE_ERR_UNKNOWN));
 +	return (be_destroy_internal(lbh, name, options, false));
 +}
 +
 +static void
 +be_setup_snapshot_name(libbe_handle_t *lbh, char *buf, size_t buflen)
 +{
 +	time_t rawtime;
 +	int len, serial;
 +
 +	time(&rawtime);
 +	len = strlen(buf);
 +	len += strftime(buf + len, buflen - len, "@%F-%T", localtime(&rawtime));
 +	/* No room for serial... caller will do its best */
 +	if (buflen - len < 2)
 +		return;
 +
 +	for (serial = 0; serial < BE_SNAP_SERIAL_MAX; ++serial) {
 +		snprintf(buf + len, buflen - len, "-%d", serial);
 +		if (!zfs_dataset_exists(lbh->lzh, buf, ZFS_TYPE_SNAPSHOT))
 +			return;
 +	}
 +}
 +
 +bool
 +be_is_auto_snapshot_name(libbe_handle_t *lbh __unused, const char *name)
 +{
 +	const char *snap;
 +	int day, hour, minute, month, second, serial, year;
 +
 +	if ((snap = strchr(name, '@')) == NULL)
 +		return (false);
 +	++snap;
 +	/* We'll grab the individual components and do some light validation. */
 +	if (sscanf(snap, "%d-%d-%d-%d:%d:%d-%d", &year, &month, &day, &hour,
 +	    &minute, &second, &serial) != 7)
 +		return (false);
 +	return (year >= 1970) && (month >= 1 && month <= 12) &&
 +	    (day >= 1 && day <= 31) && (hour >= 0 && hour <= 23) &&
 +	    (minute >= 0 && minute <= 59) && (second >= 0 && second <= 60) &&
 +	    serial >= 0;
 +}
 +
 +int
 +be_snapshot(libbe_handle_t *lbh, const char *source, const char *snap_name,
 +    bool recursive, char *result)
 +{
 +	char buf[BE_MAXPATHLEN];
 +	int err;
 +
 +	be_root_concat(lbh, source, buf);
 +
 +	if ((err = be_exists(lbh, buf)) != 0)
 +		return (set_error(lbh, err));
 +
 +	if (snap_name != NULL) {
 +		if (strlcat(buf, "@", sizeof(buf)) >= sizeof(buf))
 +			return (set_error(lbh, BE_ERR_INVALIDNAME));
 +
 +		if (strlcat(buf, snap_name, sizeof(buf)) >= sizeof(buf))
 +			return (set_error(lbh, BE_ERR_INVALIDNAME));
 +
 +		if (result != NULL)
 +			snprintf(result, BE_MAXPATHLEN, "%s@%s", source,
 +			    snap_name);
 +	} else {
 +		be_setup_snapshot_name(lbh, buf, sizeof(buf));
 +
 +		if (result != NULL && strlcpy(result, strrchr(buf, '/') + 1,
 +		    sizeof(buf)) >= sizeof(buf))
 +			return (set_error(lbh, BE_ERR_INVALIDNAME));
 +	}
 +	if ((err = zfs_snapshot(lbh->lzh, buf, recursive, NULL)) != 0) {
 +		switch (err) {
 +		case EZFS_INVALIDNAME:
 +			return (set_error(lbh, BE_ERR_INVALIDNAME));
 +
 +		default:
 +			/*
 +			 * The other errors that zfs_ioc_snapshot might return
 +			 * shouldn't happen if we've set things up properly, so
 +			 * we'll gloss over them and call it UNKNOWN as it will
 +			 * require further triage.
 +			 */
 +			if (errno == ENOTSUP)
 +				return (set_error(lbh, BE_ERR_NOPOOL));
 +			return (set_error(lbh, BE_ERR_UNKNOWN));
 +		}
 +	}
 +
 +	return (BE_ERR_SUCCESS);
 +}
 +
 +
 +/*
 + * Create the boot environment specified by the name parameter
 + */
 +int
 +be_create(libbe_handle_t *lbh, const char *name)
 +{
 +	int err;
 +
 +	err = be_create_from_existing(lbh, name, be_active_path(lbh));
 +
 +	return (set_error(lbh, err));
 +}
 +
 +static int
 +be_deep_clone_prop(int prop, void *cb)
 +{
 +	int err;
 +        struct libbe_dccb *dccb;
 +	zprop_source_t src;
 +	char pval[BE_MAXPATHLEN];
 +	char source[BE_MAXPATHLEN];
 +	char *val;
 +
 +	dccb = cb;
 +	/* Skip some properties we don't want to touch */
 +	if (prop == ZFS_PROP_CANMOUNT)
 +		return (ZPROP_CONT);
 +
 +	/* Don't copy readonly properties */
 +	if (zfs_prop_readonly(prop))
 +		return (ZPROP_CONT);
 +
 +	if ((err = zfs_prop_get(dccb->zhp, prop, (char *)&pval,
 +	    sizeof(pval), &src, (char *)&source, sizeof(source), false)))
 +		/* Just continue if we fail to read a property */
 +		return (ZPROP_CONT);
 +
 +	/*
 +	 * Only copy locally defined or received properties.  This continues
 +	 * to avoid temporary/default/local properties intentionally without
 +	 * breaking received datasets.
 +	 */
 +	if (src != ZPROP_SRC_LOCAL && src != ZPROP_SRC_RECEIVED)
 +		return (ZPROP_CONT);
 +
 +	/* Augment mountpoint with altroot, if needed */
 +	val = pval;
 +	if (prop == ZFS_PROP_MOUNTPOINT)
 +		val = be_mountpoint_augmented(dccb->lbh, val);
 +
 +	nvlist_add_string(dccb->props, zfs_prop_to_name(prop), val);
 +
 +	return (ZPROP_CONT);
 +}
 +
 +/*
 + * Return the corresponding boot environment path for a given
 + * dataset path, the constructed path is placed in 'result'.
 + *
 + * example: say our new boot environment name is 'bootenv' and
 + *          the dataset path is 'zroot/ROOT/default/data/set'.
 + *
 + * result should produce: 'zroot/ROOT/bootenv/data/set'
 + */
 +static int
 +be_get_path(struct libbe_deep_clone *ldc, const char *dspath, char *result, int result_size)
 +{
 +	char *pos;
 +	char *child_dataset;
 +
 +	/* match the root path for the boot environments */
 +	pos = strstr(dspath, ldc->lbh->root);
 +
 +	/* no match, different pools? */
 +	if (pos == NULL)
 +		return (BE_ERR_BADPATH);
 +
 +	/* root path of the new boot environment */
 +	snprintf(result, result_size, "%s/%s", ldc->lbh->root, ldc->bename);
 +
 +        /* gets us to the parent dataset, the +1 consumes a trailing slash */
 +	pos += strlen(ldc->lbh->root) + 1;
 +
 +	/* skip the parent dataset */
 +	if ((child_dataset = strchr(pos, '/')) != NULL)
 +		strlcat(result, child_dataset, result_size);
 +
 +	return (BE_ERR_SUCCESS);
 +}
 +
 +static int
 +be_clone_cb(zfs_handle_t *ds, void *data)
 +{
 +	int err;
 +	char be_path[BE_MAXPATHLEN];
 +	char snap_path[BE_MAXPATHLEN];
 +	const char *dspath;
 +	zfs_handle_t *snap_hdl;
 +	nvlist_t *props;
 +	struct libbe_deep_clone *ldc;
 +	struct libbe_dccb dccb;
 +
 +	ldc = (struct libbe_deep_clone *)data;
 +	dspath = zfs_get_name(ds);
 +
 +	snprintf(snap_path, sizeof(snap_path), "%s@%s", dspath, ldc->snapname);
 +
 +	/* construct the boot environment path from the dataset we're cloning */
 +	if (be_get_path(ldc, dspath, be_path, sizeof(be_path)) != BE_ERR_SUCCESS)
 +		return (BE_ERR_UNKNOWN);
 +
 +	/* the dataset to be created (i.e. the boot environment) already exists */
 +	if (zfs_dataset_exists(ldc->lbh->lzh, be_path, ZFS_TYPE_DATASET))
 +		return (BE_ERR_EXISTS);
 +
 +	/* no snapshot found for this dataset, silently skip it */
 +	if (!zfs_dataset_exists(ldc->lbh->lzh, snap_path, ZFS_TYPE_SNAPSHOT))
 +		return (0);
 +
 +	if ((snap_hdl =
 +	    zfs_open(ldc->lbh->lzh, snap_path, ZFS_TYPE_SNAPSHOT)) == NULL)
 +		return (BE_ERR_ZFSOPEN);
 +
 +	nvlist_alloc(&props, NV_UNIQUE_NAME, KM_SLEEP);
 +	nvlist_add_string(props, "canmount", "noauto");
 +
 +	dccb.lbh = ldc->lbh;
 +	dccb.zhp = ds;
 +	dccb.props = props;
 +	if (zprop_iter(be_deep_clone_prop, &dccb, B_FALSE, B_FALSE,
 +	    ZFS_TYPE_FILESYSTEM) == ZPROP_INVAL)
 +		return (-1);
 +
 +	if ((err = zfs_clone(snap_hdl, be_path, props)) != 0)
 +		return (BE_ERR_ZFSCLONE);
 +
 +	nvlist_free(props);
 +	zfs_close(snap_hdl);
 +
 +	if (ldc->depth_limit == -1 || ldc->depth < ldc->depth_limit) {
 +		ldc->depth++;
- 		err = zfs_iter_filesystems(ds, 0, be_clone_cb, ldc);
++		err = zfs_iter_filesystems(ds, be_clone_cb, ldc);
 +		ldc->depth--;
 +	}
 +
 +	return (err);
 +}
 +
 +/*
 + * Create a boot environment with a given name from a given snapshot.
 + * Snapshots can be in the format 'zroot/ROOT/default@snapshot' or
 + * 'default@snapshot'. In the latter case, 'default@snapshot' will be prepended
 + * with the root path that libbe was initailized with.
 +*/
 +static int
 +be_clone(libbe_handle_t *lbh, const char *bename, const char *snapshot, int depth)
 +{
 +	int err;
 +	char snap_path[BE_MAXPATHLEN];
 +	char *parentname, *snapname;
*** 2351 LINES SKIPPED ***