git: 240afd8c1fcc - main - makefs: Add ZFS support
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Fri, 05 Aug 2022 17:43:37 UTC
The branch main has been updated by markj:
URL: https://cgit.FreeBSD.org/src/commit/?id=240afd8c1fcc8c5f29dbd4ff0c915795d414405d
commit 240afd8c1fcc8c5f29dbd4ff0c915795d414405d
Author: Mark Johnston <markj@FreeBSD.org>
AuthorDate: 2022-08-05 17:07:54 +0000
Commit: Mark Johnston <markj@FreeBSD.org>
CommitDate: 2022-08-05 17:42:29 +0000
makefs: Add ZFS support
This allows one to take a staged directory tree and create a file
consisting of a ZFS pool with one or more datasets that contain the
contents of the directory tree. This is useful for creating virtual
machine images without using the kernel to create a pool; "zpool create"
requires root privileges and currently is not permitted in jails.
makefs -t zfs also provides reproducible images by using a fixed seed
for pseudo-random number generation, used for generating GUIDs and hash
salts. makefs -t zfs requires relatively little by way of machine
resources.
The "zpool_reguid" rc.conf setting can be used to ask a FreeBSD guest to
generate a unique pool GUID upon first boot.
A small number of pool and dataset properties are supported. The pool
is backed by a single disk vdev. Data is always checksummed using
Fletcher-4, no redundant copies are made, and no compression is used.
The manual page documents supported pool and filesystem properties.
The implementation uses a few pieces of ZFS support from with the boot
loader, especially definitions for various on-disk structures, but is
otherwise standalone and in particular doesn't depend on OpenZFS.
This feature should be treated as experimental for now, i.e., important
data shouldn't be trusted to a makefs-created pool, and the command-line
interface is subject to change.
Sponsored by: The FreeBSD Foundation
Differential Revision: https://reviews.freebsd.org/D35248
---
usr.sbin/makefs/Makefile | 11 +
usr.sbin/makefs/makefs.8 | 97 ++-
usr.sbin/makefs/makefs.c | 3 +
usr.sbin/makefs/makefs.h | 5 +
usr.sbin/makefs/tests/Makefile | 1 +
usr.sbin/makefs/tests/makefs_zfs_tests.sh | 634 +++++++++++++++++++
usr.sbin/makefs/zfs.c | 758 +++++++++++++++++++++++
usr.sbin/makefs/zfs/Makefile.inc | 12 +
usr.sbin/makefs/zfs/dsl.c | 598 ++++++++++++++++++
usr.sbin/makefs/zfs/fs.c | 981 ++++++++++++++++++++++++++++++
usr.sbin/makefs/zfs/objset.c | 259 ++++++++
usr.sbin/makefs/zfs/vdev.c | 435 +++++++++++++
usr.sbin/makefs/zfs/zap.c | 551 +++++++++++++++++
usr.sbin/makefs/zfs/zfs.h | 167 +++++
14 files changed, 4509 insertions(+), 3 deletions(-)
diff --git a/usr.sbin/makefs/Makefile b/usr.sbin/makefs/Makefile
index 3fea648f9383..fe472d7e7309 100644
--- a/usr.sbin/makefs/Makefile
+++ b/usr.sbin/makefs/Makefile
@@ -19,6 +19,17 @@ MAN= makefs.8
NO_WCAST_ALIGN=
CSTD= c11
+.if ${MK_ZFS} != "no"
+SRCS+= zfs.c
+CFLAGS+=-I${SRCDIR}/zfs \
+ -I${SRCTOP}/stand/libsa \
+ -I${SRCTOP}/sys/cddl/boot
+
+CFLAGS+= -DHAVE_ZFS
+
+.include "${SRCDIR}/zfs/Makefile.inc"
+.endif
+
.include "${SRCDIR}/cd9660/Makefile.inc"
.include "${SRCDIR}/ffs/Makefile.inc"
.include "${SRCDIR}/msdos/Makefile.inc"
diff --git a/usr.sbin/makefs/makefs.8 b/usr.sbin/makefs/makefs.8
index fdf8d532b69f..464583eab3a1 100644
--- a/usr.sbin/makefs/makefs.8
+++ b/usr.sbin/makefs/makefs.8
@@ -35,7 +35,7 @@
.\"
.\" $FreeBSD$
.\"
-.Dd September 17, 2020
+.Dd August 5, 2022
.Dt MAKEFS 8
.Os
.Sh NAME
@@ -266,6 +266,8 @@ BSD fast file system (default).
ISO 9660 file system.
.It Sy msdos
FAT12, FAT16, or FAT32 file system.
+.It Sy zfs
+ZFS pool containing one or more file systems.
.El
.It Fl x
Exclude file system nodes not explicitly listed in the specfile.
@@ -494,10 +496,97 @@ Volume ID.
.It Cm volume_label
Volume Label.
.El
+.Ss zfs-specific options
+Note: ZFS support is currently considered experimental.
+Do not use it for anything critical.
+.Pp
+The image created by
+.Nm
+contains a ZFS pool with a single vdev of type
+.Ql disk .
+The root dataset is always created implicitly and contains the entire input
+directory tree unless additional datasets are specified using the options
+described below.
+.Pp
+The arguments consist of a keyword, an equal sign
+.Pq Ql = ,
+and a value.
+The following keywords are supported:
+.Pp
+.Bl -tag -width omit-trailing-period -offset indent -compact
+.It ashift
+The base-2 logarithm of the minimum block size.
+Typical values are 9 (512B blocks) and 12 (4KB blocks).
+The default value is 12.
+.It bootfs
+The name of the bootable dataset for the pool.
+Specifying this option causes the
+.Ql bootfs
+property to be set in the created pool.
+.It mssize
+The size of metaslabs in the created pool.
+By default,
+.Nm
+allocates large (up to 512MB) metaslabs with the expectation that
+the image will be auto-expanded upon first use.
+This option allows the default heuristic to be overridden.
+.It poolname
+The name of the ZFS pool.
+This option must be specified.
+.It rootpath
+An implicit path prefix added to dataset mountpoints.
+By default it is
+.Pa /<poolname> .
+For creating bootable pools, the
+.Va rootpath
+should be set to
+.Pa / .
+At least one dataset must have a mountpoint equal to
+.Va rootpath .
+.It fs
+Create an additional dataset.
+This option may be specified multiple times.
+The argument value must be of the form
+.Ar <dataset>[;<prop1=v1>[;<prop2=v2>[;...]]] ,
+where
+.Ar dataset
+is the name of the dataset and must belong to the pool's namespace.
+For example, with a pool name of
+.Ql test
+all dataset names must be prefixed by
+.Ql test/ .
+A dataset must exist at each level of the pool's namespace.
+For example, to create
+.Ql test/foo/bar ,
+.Ql test/foo
+must be created as well.
+.Pp
+The dataset mountpoints determine how the datasets are populated with
+files from the staged directory tree.
+Conceptually, all datasets are mounted before any are populated with files.
+The root of the staged directory tree is mapped to
+.Va rootpath .
+.Pp
+Dataset properties, as described in
+.Xr zfsprops 8 ,
+may be specified following the dataset name.
+The following properties may be set for a dataset:
+.Pp
+.Bl -tag -compact -offset indent
+.It atime
+.It canmount
+.It exec
+.It mountpoint
+.It setuid
+.El
+.El
.Sh SEE ALSO
.Xr mtree 5 ,
.Xr mtree 8 ,
-.Xr newfs 8
+.Xr newfs 8 ,
+.Xr zfsconcepts 8 ,
+.Xr zfsprops 8 ,
+.Xr zpoolprops 8
.Sh HISTORY
The
.Nm
@@ -518,4 +607,6 @@ and first appeared in
.An Ram Vedam
(cd9660 support),
.An Christos Zoulas
-(msdos support).
+(msdos support),
+.An Mark Johnston
+(zfs support).
diff --git a/usr.sbin/makefs/makefs.c b/usr.sbin/makefs/makefs.c
index 888a2b3edea7..2a50768d3152 100644
--- a/usr.sbin/makefs/makefs.c
+++ b/usr.sbin/makefs/makefs.c
@@ -77,6 +77,9 @@ static fstype_t fstypes[] = {
ENTRY(cd9660),
ENTRY(ffs),
ENTRY(msdos),
+#ifdef HAVE_ZFS
+ ENTRY(zfs),
+#endif
{ .type = NULL },
};
diff --git a/usr.sbin/makefs/makefs.h b/usr.sbin/makefs/makefs.h
index 68dc0362dd21..e88313e8366d 100644
--- a/usr.sbin/makefs/makefs.h
+++ b/usr.sbin/makefs/makefs.h
@@ -78,12 +78,14 @@ enum fi_flags {
FI_SIZED = 1<<0, /* inode sized */
FI_ALLOCATED = 1<<1, /* fsinode->ino allocated */
FI_WRITTEN = 1<<2, /* inode written */
+ FI_ROOT = 1<<3, /* root of a ZFS dataset */
};
typedef struct {
uint32_t ino; /* inode number used on target fs */
uint32_t nlink; /* number of links to this entry */
enum fi_flags flags; /* flags used by fs specific code */
+ void *param; /* for use by individual fs impls */
struct stat st; /* stat entry */
} fsinode;
@@ -186,6 +188,9 @@ void fs ## _makefs(const char *, const char *, fsnode *, fsinfo_t *)
DECLARE_FUN(cd9660);
DECLARE_FUN(ffs);
DECLARE_FUN(msdos);
+#ifdef HAVE_ZFS
+DECLARE_FUN(zfs);
+#endif
extern u_int debug;
extern int dupsok;
diff --git a/usr.sbin/makefs/tests/Makefile b/usr.sbin/makefs/tests/Makefile
index 85e4b233aea7..c2c9f6bea5b6 100644
--- a/usr.sbin/makefs/tests/Makefile
+++ b/usr.sbin/makefs/tests/Makefile
@@ -2,6 +2,7 @@
ATF_TESTS_SH+= makefs_cd9660_tests
ATF_TESTS_SH+= makefs_ffs_tests
+ATF_TESTS_SH+= makefs_zfs_tests
BINDIR= ${TESTSDIR}
diff --git a/usr.sbin/makefs/tests/makefs_zfs_tests.sh b/usr.sbin/makefs/tests/makefs_zfs_tests.sh
new file mode 100644
index 000000000000..8cd79966c49a
--- /dev/null
+++ b/usr.sbin/makefs/tests/makefs_zfs_tests.sh
@@ -0,0 +1,634 @@
+#-
+# SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+#
+# Copyright (c) 2022 The FreeBSD Foundation
+#
+# This software was developed by Mark Johnston under sponsorship from
+# the FreeBSD Foundation.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+
+MAKEFS="makefs -t zfs -o nowarn=true"
+ZFS_POOL_NAME="makefstest$$"
+TEST_ZFS_POOL_NAME="$TMPDIR/poolname"
+
+. "$(dirname "$0")/makefs_tests_common.sh"
+
+common_cleanup()
+{
+ local pool md
+
+ # Try to force a TXG, this can help catch bugs by triggering a panic.
+ sync
+
+ pool=$(cat $TEST_ZFS_POOL_NAME)
+ if zpool list "$pool" >/dev/null; then
+ zpool destroy "$pool"
+ fi
+
+ md=$(cat $TEST_MD_DEVICE_FILE)
+ if [ -c /dev/"$md" ]; then
+ mdconfig -d -u "$md"
+ fi
+}
+
+import_image()
+{
+ atf_check -e empty -o save:$TEST_MD_DEVICE_FILE -s exit:0 \
+ mdconfig -a -f $TEST_IMAGE
+ atf_check zpool import -R $TEST_MOUNT_DIR $ZFS_POOL_NAME
+ echo "$ZFS_POOL_NAME" > $TEST_ZFS_POOL_NAME
+}
+
+#
+# Test autoexpansion of the vdev.
+#
+# The pool is initially 10GB, so we get 10GB minus one metaslab's worth of
+# usable space for data. Then the pool is expanded to 50GB, and the amount of
+# usable space is 50GB minus one metaslab.
+#
+atf_test_case autoexpand cleanup
+autoexpand_body()
+{
+ local mssize poolsize poolsize1 newpoolsize
+
+ create_test_inputs
+
+ mssize=$((128 * 1024 * 1024))
+ poolsize=$((10 * 1024 * 1024 * 1024))
+ atf_check $MAKEFS -s $poolsize -o mssize=$mssize -o rootpath=/ \
+ -o poolname=$ZFS_POOL_NAME \
+ $TEST_IMAGE $TEST_INPUTS_DIR
+
+ newpoolsize=$((50 * 1024 * 1024 * 1024))
+ truncate -s $newpoolsize $TEST_IMAGE
+
+ import_image
+
+ check_image_contents
+
+ poolsize1=$(zpool list -Hp -o size $ZFS_POOL_NAME)
+ atf_check [ $((poolsize1 + $mssize)) -eq $poolsize ]
+
+ atf_check zpool online -e $ZFS_POOL_NAME /dev/$(cat $TEST_MD_DEVICE_FILE)
+
+ check_image_contents
+
+ poolsize1=$(zpool list -Hp -o size $ZFS_POOL_NAME)
+ atf_check [ $((poolsize1 + $mssize)) -eq $newpoolsize ]
+}
+autoexpand_cleanup()
+{
+ common_cleanup
+}
+
+#
+# Test with some default layout defined by the common code.
+#
+atf_test_case basic cleanup
+basic_body()
+{
+ create_test_inputs
+
+ atf_check $MAKEFS -s 10g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \
+ $TEST_IMAGE $TEST_INPUTS_DIR
+
+ import_image
+
+ check_image_contents
+}
+basic_cleanup()
+{
+ common_cleanup
+}
+
+atf_test_case dataset_removal cleanup
+dataset_removal_body()
+{
+ create_test_dirs
+
+ cd $TEST_INPUTS_DIR
+ mkdir dir
+ cd -
+
+ atf_check $MAKEFS -s 1g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \
+ -o fs=${ZFS_POOL_NAME}/dir \
+ $TEST_IMAGE $TEST_INPUTS_DIR
+
+ import_image
+
+ check_image_contents
+
+ atf_check zfs destroy ${ZFS_POOL_NAME}/dir
+}
+dataset_removal_cleanup()
+{
+ common_cleanup
+}
+
+#
+# Make sure that we can create and remove an empty directory.
+#
+atf_test_case empty_dir cleanup
+empty_dir_body()
+{
+ create_test_dirs
+
+ cd $TEST_INPUTS_DIR
+ mkdir dir
+ cd -
+
+ atf_check $MAKEFS -s 10g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \
+ $TEST_IMAGE $TEST_INPUTS_DIR
+
+ import_image
+
+ check_image_contents
+
+ atf_check rmdir ${TEST_MOUNT_DIR}/dir
+}
+empty_dir_cleanup()
+{
+ common_cleanup
+}
+
+atf_test_case empty_fs cleanup
+empty_fs_body()
+{
+ create_test_dirs
+
+ atf_check $MAKEFS -s 10g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \
+ $TEST_IMAGE $TEST_INPUTS_DIR
+
+ import_image
+
+ check_image_contents
+}
+empty_fs_cleanup()
+{
+ common_cleanup
+}
+
+atf_test_case file_sizes cleanup
+file_sizes_body()
+{
+ local i
+
+ create_test_dirs
+ cd $TEST_INPUTS_DIR
+
+ i=1
+ while [ $i -lt $((1 << 20)) ]; do
+ truncate -s $i ${i}.1
+ truncate -s $(($i - 1)) ${i}.2
+ truncate -s $(($i + 1)) ${i}.3
+ i=$(($i << 1))
+ done
+
+ cd -
+
+ # XXXMJ this creates sparse files, make sure makefs doesn't
+ # preserve the sparseness.
+ # XXXMJ need to test with larger files (at least 128MB for L2 indirs)
+ atf_check $MAKEFS -s 10g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \
+ $TEST_IMAGE $TEST_INPUTS_DIR
+
+ import_image
+
+ check_image_contents
+}
+file_sizes_cleanup()
+{
+ common_cleanup
+}
+
+atf_test_case hard_links cleanup
+hard_links_body()
+{
+ local f
+
+ create_test_dirs
+ cd $TEST_INPUTS_DIR
+
+ mkdir dir
+ echo "hello" > 1
+ ln 1 2
+ ln 1 dir/1
+
+ echo "goodbye" > dir/a
+ ln dir/a dir/b
+ ln dir/a a
+
+ cd -
+
+ atf_check $MAKEFS -s 10g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \
+ $TEST_IMAGE $TEST_INPUTS_DIR
+
+ import_image
+
+ check_image_contents
+
+ stat -f '%i' ${TEST_MOUNT_DIR}/1 > ./ino
+ stat -f '%l' ${TEST_MOUNT_DIR}/1 > ./nlink
+ for f in 1 2 dir/1; do
+ atf_check -o file:./nlink -e empty -s exit:0 \
+ stat -f '%l' ${TEST_MOUNT_DIR}/${f}
+ atf_check -o file:./ino -e empty -s exit:0 \
+ stat -f '%i' ${TEST_MOUNT_DIR}/${f}
+ atf_check cmp -s ${TEST_INPUTS_DIR}/1 ${TEST_MOUNT_DIR}/${f}
+ done
+
+ stat -f '%i' ${TEST_MOUNT_DIR}/dir/a > ./ino
+ stat -f '%l' ${TEST_MOUNT_DIR}/dir/a > ./nlink
+ for f in dir/a dir/b a; do
+ atf_check -o file:./nlink -e empty -s exit:0 \
+ stat -f '%l' ${TEST_MOUNT_DIR}/${f}
+ atf_check -o file:./ino -e empty -s exit:0 \
+ stat -f '%i' ${TEST_MOUNT_DIR}/${f}
+ atf_check cmp -s ${TEST_INPUTS_DIR}/dir/a ${TEST_MOUNT_DIR}/${f}
+ done
+}
+hard_links_cleanup()
+{
+ common_cleanup
+}
+
+# Allocate enough dnodes from an object set that the meta dnode needs to use
+# indirect blocks.
+atf_test_case indirect_dnode_array cleanup
+indirect_dnode_array_body()
+{
+ local count i
+
+ # How many dnodes do we need to allocate? Well, the data block size
+ # for meta dnodes is always 16KB, so with a dnode size of 512B we get
+ # 32 dnodes per direct block. The maximum indirect block size is 128KB
+ # and that can fit 1024 block pointers, so we need at least 32 * 1024
+ # files to force the use of two levels of indirection.
+ #
+ # Unfortunately that number of files makes the test run quite slowly,
+ # so we settle for a single indirect block for now...
+ count=$(jot -r 1 32 1024)
+
+ create_test_dirs
+ cd $TEST_INPUTS_DIR
+ for i in $(seq 1 $count); do
+ touch $i
+ done
+ cd -
+
+ atf_check $MAKEFS -s 10g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \
+ $TEST_IMAGE $TEST_INPUTS_DIR
+
+ import_image
+
+ check_image_contents
+}
+indirect_dnode_array_cleanup()
+{
+ common_cleanup
+}
+
+#
+# Create some files with long names, so as to test fat ZAP handling.
+#
+atf_test_case long_file_name cleanup
+long_file_name_body()
+{
+ local dir i
+
+ create_test_dirs
+ cd $TEST_INPUTS_DIR
+
+ # micro ZAP keys can be at most 50 bytes.
+ for i in $(seq 1 60); do
+ touch $(jot -s '' $i 1 1)
+ done
+ dir=$(jot -s '' 61 1 1)
+ mkdir $dir
+ for i in $(seq 1 60); do
+ touch ${dir}/$(jot -s '' $i 1 1)
+ done
+
+ cd -
+
+ atf_check $MAKEFS -s 10g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \
+ $TEST_IMAGE $TEST_INPUTS_DIR
+
+ import_image
+
+ check_image_contents
+
+ # Add a directory entry in the hope that OpenZFS might catch a bug
+ # in makefs' fat ZAP encoding.
+ touch ${TEST_MOUNT_DIR}/foo
+}
+long_file_name_cleanup()
+{
+ common_cleanup
+}
+
+#
+# Exercise handling of multiple datasets.
+#
+atf_test_case multi_dataset_1 cleanup
+multi_dataset_1_body()
+{
+ create_test_dirs
+ cd $TEST_INPUTS_DIR
+
+ mkdir dir1
+ echo a > dir1/a
+ mkdir dir2
+ echo b > dir2/b
+
+ cd -
+
+ atf_check $MAKEFS -s 1g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \
+ -o fs=${ZFS_POOL_NAME}/dir1 -o fs=${ZFS_POOL_NAME}/dir2 \
+ $TEST_IMAGE $TEST_INPUTS_DIR
+
+ import_image
+
+ check_image_contents
+
+ # Make sure that we have three datasets with the expected mount points.
+ atf_check -o inline:${ZFS_POOL_NAME}\\n -e empty -s exit:0 \
+ zfs list -H -o name ${ZFS_POOL_NAME}
+ atf_check -o inline:${TEST_MOUNT_DIR}\\n -e empty -s exit:0 \
+ zfs list -H -o mountpoint ${ZFS_POOL_NAME}
+
+ atf_check -o inline:${ZFS_POOL_NAME}/dir1\\n -e empty -s exit:0 \
+ zfs list -H -o name ${ZFS_POOL_NAME}/dir1
+ atf_check -o inline:${TEST_MOUNT_DIR}/dir1\\n -e empty -s exit:0 \
+ zfs list -H -o mountpoint ${ZFS_POOL_NAME}/dir1
+
+ atf_check -o inline:${ZFS_POOL_NAME}/dir2\\n -e empty -s exit:0 \
+ zfs list -H -o name ${ZFS_POOL_NAME}/dir2
+ atf_check -o inline:${TEST_MOUNT_DIR}/dir2\\n -e empty -s exit:0 \
+ zfs list -H -o mountpoint ${ZFS_POOL_NAME}/dir2
+}
+multi_dataset_1_cleanup()
+{
+ common_cleanup
+}
+
+#
+# Create a pool with two datasets, where the root dataset is mounted below
+# the child dataset.
+#
+atf_test_case multi_dataset_2 cleanup
+multi_dataset_2_body()
+{
+ create_test_dirs
+ cd $TEST_INPUTS_DIR
+
+ mkdir dir1
+ echo a > dir1/a
+ mkdir dir2
+ echo b > dir2/b
+
+ cd -
+
+ atf_check $MAKEFS -s 1g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \
+ -o fs=${ZFS_POOL_NAME}/dir1\;mountpoint=/ \
+ -o fs=${ZFS_POOL_NAME}\;mountpoint=/dir1 \
+ $TEST_IMAGE $TEST_INPUTS_DIR
+
+ import_image
+
+ check_image_contents
+}
+multi_dataset_2_cleanup()
+{
+ common_cleanup
+}
+
+#
+# Create a dataset with a non-existent mount point.
+#
+atf_test_case multi_dataset_3 cleanup
+multi_dataset_3_body()
+{
+ create_test_dirs
+ cd $TEST_INPUTS_DIR
+
+ mkdir dir1
+ echo a > dir1/a
+
+ cd -
+
+ atf_check $MAKEFS -s 1g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \
+ -o fs=${ZFS_POOL_NAME}/dir1 \
+ -o fs=${ZFS_POOL_NAME}/dir2 \
+ $TEST_IMAGE $TEST_INPUTS_DIR
+
+ import_image
+
+ atf_check -o inline:${TEST_MOUNT_DIR}/dir2\\n -e empty -s exit:0 \
+ zfs list -H -o mountpoint ${ZFS_POOL_NAME}/dir2
+
+ # Mounting dir2 should have created a directory called dir2. Go
+ # back and create it in the staging tree before comparing.
+ atf_check mkdir ${TEST_INPUTS_DIR}/dir2
+
+ check_image_contents
+}
+multi_dataset_3_cleanup()
+{
+ common_cleanup
+}
+
+#
+# Create an unmounted dataset.
+#
+atf_test_case multi_dataset_4 cleanup
+multi_dataset_4_body()
+{
+ create_test_dirs
+ cd $TEST_INPUTS_DIR
+
+ mkdir dir1
+ echo a > dir1/a
+
+ cd -
+
+ atf_check $MAKEFS -s 1g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \
+ -o fs=${ZFS_POOL_NAME}/dir1\;canmount=noauto\;mountpoint=none \
+ $TEST_IMAGE $TEST_INPUTS_DIR
+
+ import_image
+
+ atf_check -o inline:none\\n -e empty -s exit:0 \
+ zfs list -H -o mountpoint ${ZFS_POOL_NAME}/dir1
+
+ check_image_contents
+
+ atf_check zfs set mountpoint=/dir1 ${ZFS_POOL_NAME}/dir1
+ atf_check zfs mount ${ZFS_POOL_NAME}/dir1
+ atf_check -o inline:${TEST_MOUNT_DIR}/dir1\\n -e empty -s exit:0 \
+ zfs list -H -o mountpoint ${ZFS_POOL_NAME}/dir1
+
+ # dir1/a should be part of the root dataset, not dir1.
+ atf_check -s not-exit:0 -e not-empty stat ${TEST_MOUNT_DIR}dir1/a
+}
+multi_dataset_4_cleanup()
+{
+ common_cleanup
+}
+
+#
+# Rudimentary test to verify that two ZFS images created using the same
+# parameters and input hierarchy are byte-identical. In particular, makefs(1)
+# does not preserve file access times.
+#
+atf_test_case reproducible cleanup
+reproducible_body()
+{
+ create_test_inputs
+
+ atf_check $MAKEFS -s 512m -o rootpath=/ -o poolname=$ZFS_POOL_NAME \
+ ${TEST_IMAGE}.1 $TEST_INPUTS_DIR
+
+ atf_check $MAKEFS -s 512m -o rootpath=/ -o poolname=$ZFS_POOL_NAME \
+ ${TEST_IMAGE}.2 $TEST_INPUTS_DIR
+
+ # XXX-MJ cmp(1) is really slow
+ atf_check cmp ${TEST_IMAGE}.1 ${TEST_IMAGE}.2
+}
+reproducible_cleanup()
+{
+}
+
+#
+# Verify that we can take a snapshot of a generated dataset.
+#
+atf_test_case snapshot cleanup
+snapshot_body()
+{
+ create_test_dirs
+ cd $TEST_INPUTS_DIR
+
+ mkdir dir
+ echo "hello" > dir/hello
+ echo "goodbye" > goodbye
+
+ cd -
+
+ atf_check $MAKEFS -s 10g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \
+ $TEST_IMAGE $TEST_INPUTS_DIR
+
+ import_image
+
+ atf_check zfs snapshot ${ZFS_POOL_NAME}@1
+}
+snapshot_cleanup()
+{
+ common_cleanup
+}
+
+#
+# Check handling of symbolic links.
+#
+atf_test_case soft_links cleanup
+soft_links_body()
+{
+ create_test_dirs
+ cd $TEST_INPUTS_DIR
+
+ mkdir dir
+ ln -s a a
+ ln -s dir/../a a
+ ln -s dir/b b
+ echo 'c' > dir
+ ln -s dir/c c
+ # XXX-MJ overflows bonus buffer ln -s $(jot -s '' 320 1 1) 1
+
+ cd -
+
+ atf_check $MAKEFS -s 10g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \
+ $TEST_IMAGE $TEST_INPUTS_DIR
+
+ import_image
+
+ check_image_contents
+}
+soft_links_cleanup()
+{
+ common_cleanup
+}
+
+#
+# Verify that we can set properties on the root dataset.
+#
+atf_test_case root_props cleanup
+root_props_body()
+{
+ create_test_inputs
+
+ atf_check $MAKEFS -s 10g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \
+ -o fs=${ZFS_POOL_NAME}\;atime=off\;setuid=off \
+ $TEST_IMAGE $TEST_INPUTS_DIR
+
+ import_image
+
+ check_image_contents
+
+ atf_check -o inline:off\\n -e empty -s exit:0 \
+ zfs get -H -o value atime $ZFS_POOL_NAME
+ atf_check -o inline:local\\n -e empty -s exit:0 \
+ zfs get -H -o source atime $ZFS_POOL_NAME
+ atf_check -o inline:off\\n -e empty -s exit:0 \
+ zfs get -H -o value setuid $ZFS_POOL_NAME
+ atf_check -o inline:local\\n -e empty -s exit:0 \
+ zfs get -H -o source setuid $ZFS_POOL_NAME
+}
+root_props_cleanup()
+{
+ common_cleanup
+}
+
+atf_init_test_cases()
+{
+ atf_add_test_case autoexpand
+ atf_add_test_case basic
+ atf_add_test_case dataset_removal
+ atf_add_test_case empty_dir
+ atf_add_test_case empty_fs
+ atf_add_test_case file_sizes
+ atf_add_test_case hard_links
+ atf_add_test_case indirect_dnode_array
+ atf_add_test_case long_file_name
+ atf_add_test_case multi_dataset_1
+ atf_add_test_case multi_dataset_2
+ atf_add_test_case multi_dataset_3
+ atf_add_test_case multi_dataset_4
+ atf_add_test_case reproducible
+ atf_add_test_case snapshot
+ atf_add_test_case soft_links
+ atf_add_test_case root_props
+
+ # XXXMJ tests:
+ # - test with different ashifts (at least, 9 and 12), different image sizes
+ # - create datasets in imported pool
+}
diff --git a/usr.sbin/makefs/zfs.c b/usr.sbin/makefs/zfs.c
new file mode 100644
index 000000000000..08689a558870
--- /dev/null
+++ b/usr.sbin/makefs/zfs.c
@@ -0,0 +1,758 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2022 The FreeBSD Foundation
+ *
+ * This software was developed by Mark Johnston under sponsorship from
+ * the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/errno.h>
+#include <sys/queue.h>
+
+#include <assert.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <util.h>
+
+#include "makefs.h"
+#include "zfs.h"
+
+#define VDEV_LABEL_SPACE \
+ ((off_t)(VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE))
+_Static_assert(VDEV_LABEL_SPACE <= MINDEVSIZE, "");
+
+#define MINMSSIZE ((off_t)1 << 24) /* 16MB */
+#define DFLTMSSIZE ((off_t)1 << 29) /* 512MB */
+#define MAXMSSIZE ((off_t)1 << 34) /* 16GB */
+
+#define INDIR_LEVELS 6
+/* Indirect blocks are always 128KB. */
+#define BLKPTR_PER_INDIR (MAXBLOCKSIZE / sizeof(blkptr_t))
+
+struct dnode_cursor {
+ char inddir[INDIR_LEVELS][MAXBLOCKSIZE];
+ off_t indloc;
+ off_t indspace;
+ dnode_phys_t *dnode;
+ off_t dataoff;
+ off_t datablksz;
+};
+
+void
+zfs_prep_opts(fsinfo_t *fsopts)
+{
+ zfs_opt_t *zfs = ecalloc(1, sizeof(*zfs));
+
+ const option_t zfs_options[] = {
+ { '\0', "bootfs", &zfs->bootfs, OPT_STRPTR,
+ 0, 0, "Bootable dataset" },
+ { '\0', "mssize", &zfs->mssize, OPT_INT64,
+ MINMSSIZE, MAXMSSIZE, "Metaslab size" },
*** 3725 LINES SKIPPED ***