git: f5be20afc356 - releng/13.0 - Fix lseek(SEEK_DATA/SEEK_HOLE) mmap consistency

From: Mark Johnston <markj_at_FreeBSD.org>
Date: Tue, 15 Mar 2022 18:14:11 UTC
The branch releng/13.0 has been updated by markj:

URL: https://cgit.FreeBSD.org/src/commit/?id=f5be20afc3568876c44269420a542627238e4da0

commit f5be20afc3568876c44269420a542627238e4da0
Author:     Brian Behlendorf <behlendorf1@llnl.gov>
AuthorDate: 2021-11-07 21:27:44 +0000
Commit:     Mark Johnston <markj@FreeBSD.org>
CommitDate: 2022-03-15 18:09:52 +0000

    Fix lseek(SEEK_DATA/SEEK_HOLE) mmap consistency
    
    When using lseek(2) to report data/holes memory mapped regions of
    the file were ignored.  This could result in incorrect results.
    To handle this zfs_holey_common() was updated to asynchronously
    writeback any dirty mmap(2) regions prior to reporting holes.
    
    Additionally, while not strictly required, the dn_struct_rwlock is
    now held over the dirty check to prevent the dnode structure from
    changing.  This ensures that a clean dnode can't be dirtied before
    the data/hole is located.  The range lock is now also taken to
    ensure the call cannot race with zfs_write().
    
    Furthermore, the code was refactored to provide a dnode_is_dirty()
    helper function which checks the dnode for any dirty records to
    determine its dirtiness.
    
    Reviewed-by: Matthew Ahrens <mahrens@delphix.com>
    Reviewed-by: Tony Hutter <hutter2@llnl.gov>
    Reviewed-by: Rich Ercolani <rincebrain@gmail.com>
    Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
    Issue #11900
    Closes #12724
    (cherry picked from commit 664d487a5dbd758216ac613934a4080fcc1de347)
    
    Approved by:    so
    Security:       FreeBSD-EN-22:11.zfs
---
 sys/contrib/openzfs/configure.ac                   |   1 +
 .../openzfs/include/os/freebsd/spl/sys/vnode.h     |  18 +++
 .../include/os/freebsd/zfs/sys/zfs_znode_impl.h    |   1 +
 .../include/os/linux/zfs/sys/zfs_znode_impl.h      |   1 +
 sys/contrib/openzfs/include/sys/dnode.h            |   1 +
 sys/contrib/openzfs/module/zfs/dmu.c               |  53 ++++----
 sys/contrib/openzfs/module/zfs/dnode.c             |  20 +++
 sys/contrib/openzfs/module/zfs/zfs_vnops.c         |   9 +-
 sys/contrib/openzfs/tests/runfiles/common.run      |   2 +-
 .../openzfs/tests/zfs-tests/cmd/Makefile.am        |   1 +
 .../openzfs/tests/zfs-tests/include/commands.cfg   |   1 +
 .../openzfs/tests/zfs-tests/include/tunables.cfg   |   1 +
 .../zfs-tests/tests/functional/mmap/Makefile.am    |   3 +-
 .../tests/functional/mmap/mmap_seek_001_pos.ksh    |  67 ++++++++++
 tests/zfs-tests/cmd/mmap_seek/.gitignore           |   1 +
 tests/zfs-tests/cmd/mmap_seek/Makefile.am          |   6 +
 tests/zfs-tests/cmd/mmap_seek/mmap_seek.c          | 147 +++++++++++++++++++++
 17 files changed, 303 insertions(+), 30 deletions(-)

diff --git a/sys/contrib/openzfs/configure.ac b/sys/contrib/openzfs/configure.ac
index 4520a290a9a5..0134cc47e314 100644
--- a/sys/contrib/openzfs/configure.ac
+++ b/sys/contrib/openzfs/configure.ac
@@ -223,6 +223,7 @@ AC_CONFIG_FILES([
 	tests/zfs-tests/cmd/mktree/Makefile
 	tests/zfs-tests/cmd/mmap_exec/Makefile
 	tests/zfs-tests/cmd/mmap_libaio/Makefile
+	tests/zfs-tests/cmd/mmap_seek/Makefile
 	tests/zfs-tests/cmd/mmapwrite/Makefile
 	tests/zfs-tests/cmd/nvlist_to_lua/Makefile
 	tests/zfs-tests/cmd/randfree_file/Makefile
diff --git a/sys/contrib/openzfs/include/os/freebsd/spl/sys/vnode.h b/sys/contrib/openzfs/include/os/freebsd/spl/sys/vnode.h
index fa7bbd88c6c8..55491da54604 100644
--- a/sys/contrib/openzfs/include/os/freebsd/spl/sys/vnode.h
+++ b/sys/contrib/openzfs/include/os/freebsd/spl/sys/vnode.h
@@ -59,6 +59,8 @@ enum symfollow { NO_FOLLOW = NOFOLLOW };
 #include <sys/file.h>
 #include <sys/filedesc.h>
 #include <sys/syscallsubr.h>
+#include <sys/vm.h>
+#include <vm/vm_object.h>
 
 typedef	struct vop_vector	vnodeops_t;
 #define	VOP_FID		VOP_VPTOFH
@@ -88,6 +90,22 @@ vn_is_readonly(vnode_t *vp)
 #define	vn_has_cached_data(vp)	\
 	((vp)->v_object != NULL && \
 	(vp)->v_object->resident_page_count > 0)
+
+static __inline void
+vn_flush_cached_data(vnode_t *vp, boolean_t sync)
+{
+#if __FreeBSD_version > 1300054
+	if (vm_object_mightbedirty(vp->v_object)) {
+#else
+	if (vp->v_object->flags & OBJ_MIGHTBEDIRTY) {
+#endif
+		int flags = sync ? OBJPC_SYNC : 0;
+		zfs_vmobject_wlock(vp->v_object);
+		vm_object_page_clean(vp->v_object, 0, 0, flags);
+		zfs_vmobject_wunlock(vp->v_object);
+	}
+}
+
 #define	vn_exists(vp)		do { } while (0)
 #define	vn_invalid(vp)		do { } while (0)
 #define	vn_renamepath(tdvp, svp, tnm, lentnm)	do { } while (0)
diff --git a/sys/contrib/openzfs/include/os/freebsd/zfs/sys/zfs_znode_impl.h b/sys/contrib/openzfs/include/os/freebsd/zfs/sys/zfs_znode_impl.h
index 091186f23174..aa6efaf53337 100644
--- a/sys/contrib/openzfs/include/os/freebsd/zfs/sys/zfs_znode_impl.h
+++ b/sys/contrib/openzfs/include/os/freebsd/zfs/sys/zfs_znode_impl.h
@@ -118,6 +118,7 @@ extern minor_t zfsdev_minor_alloc(void);
 #define	Z_ISDIR(type) ((type) == VDIR)
 
 #define	zn_has_cached_data(zp)	vn_has_cached_data(ZTOV(zp))
+#define	zn_flush_cached_data(zp, sync)	vn_flush_cached_data(ZTOV(zp), sync)
 #define	zn_rlimit_fsize(zp, uio, td)	vn_rlimit_fsize(ZTOV(zp), (uio), (td))
 
 /* Called on entry to each ZFS vnode and vfs operation  */
diff --git a/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_znode_impl.h b/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_znode_impl.h
index 13e5fb653f5b..c22a599bfe42 100644
--- a/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_znode_impl.h
+++ b/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_znode_impl.h
@@ -71,6 +71,7 @@ extern "C" {
 #define	Z_ISDIR(type)	S_ISDIR(type)
 
 #define	zn_has_cached_data(zp)	((zp)->z_is_mapped)
+#define	zn_flush_cached_data(zp, sync)	write_inode_now(ZTOI(zp), sync)
 #define	zn_rlimit_fsize(zp, uio, td)	(0)
 
 #define	zhold(zp)	igrab(ZTOI((zp)))
diff --git a/sys/contrib/openzfs/include/sys/dnode.h b/sys/contrib/openzfs/include/sys/dnode.h
index 3208b60f0e7b..4c075a805603 100644
--- a/sys/contrib/openzfs/include/sys/dnode.h
+++ b/sys/contrib/openzfs/include/sys/dnode.h
@@ -425,6 +425,7 @@ boolean_t dnode_add_ref(dnode_t *dn, void *ref);
 void dnode_rele(dnode_t *dn, void *ref);
 void dnode_rele_and_unlock(dnode_t *dn, void *tag, boolean_t evicting);
 int dnode_try_claim(objset_t *os, uint64_t object, int slots);
+boolean_t dnode_is_dirty(dnode_t *dn);
 void dnode_setdirty(dnode_t *dn, dmu_tx_t *tx);
 void dnode_set_dirtyctx(dnode_t *dn, dmu_tx_t *tx, void *tag);
 void dnode_sync(dnode_t *dn, dmu_tx_t *tx);
diff --git a/sys/contrib/openzfs/module/zfs/dmu.c b/sys/contrib/openzfs/module/zfs/dmu.c
index 5d6e98d245da..1e6d5d67c014 100644
--- a/sys/contrib/openzfs/module/zfs/dmu.c
+++ b/sys/contrib/openzfs/module/zfs/dmu.c
@@ -2082,42 +2082,41 @@ int
 dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole, uint64_t *off)
 {
 	dnode_t *dn;
-	int i, err;
-	boolean_t clean = B_TRUE;
+	int err;
 
+restart:
 	err = dnode_hold(os, object, FTAG, &dn);
 	if (err)
 		return (err);
 
-	/*
-	 * Check if dnode is dirty
-	 */
-	for (i = 0; i < TXG_SIZE; i++) {
-		if (multilist_link_active(&dn->dn_dirty_link[i])) {
-			clean = B_FALSE;
-			break;
-		}
-	}
+	rw_enter(&dn->dn_struct_rwlock, RW_READER);
 
-	/*
-	 * If compatibility option is on, sync any current changes before
-	 * we go trundling through the block pointers.
-	 */
-	if (!clean && zfs_dmu_offset_next_sync) {
-		clean = B_TRUE;
-		dnode_rele(dn, FTAG);
-		txg_wait_synced(dmu_objset_pool(os), 0);
-		err = dnode_hold(os, object, FTAG, &dn);
-		if (err)
-			return (err);
-	}
+	if (dnode_is_dirty(dn)) {
+		/*
+		 * If the zfs_dmu_offset_next_sync module option is enabled
+		 * then strict hole reporting has been requested.  Dirty
+		 * dnodes must be synced to disk to accurately report all
+		 * holes.  When disabled (the default) dirty dnodes are
+		 * reported to not have any holes which is always safe.
+		 *
+		 * When called by zfs_holey_common() the zp->z_rangelock
+		 * is held to prevent zfs_write() and mmap writeback from
+		 * re-dirtying the dnode after txg_wait_synced().
+		 */
+		if (zfs_dmu_offset_next_sync) {
+			rw_exit(&dn->dn_struct_rwlock);
+			dnode_rele(dn, FTAG);
+			txg_wait_synced(dmu_objset_pool(os), 0);
+			goto restart;
+		}
 
-	if (clean)
-		err = dnode_next_offset(dn,
-		    (hole ? DNODE_FIND_HOLE : 0), off, 1, 1, 0);
-	else
 		err = SET_ERROR(EBUSY);
+	} else {
+		err = dnode_next_offset(dn, DNODE_FIND_HAVELOCK |
+		    (hole ? DNODE_FIND_HOLE : 0), off, 1, 1, 0);
+	}
 
+	rw_exit(&dn->dn_struct_rwlock);
 	dnode_rele(dn, FTAG);
 
 	return (err);
diff --git a/sys/contrib/openzfs/module/zfs/dnode.c b/sys/contrib/openzfs/module/zfs/dnode.c
index eaba9c0c0e7f..8592c5f8c3a9 100644
--- a/sys/contrib/openzfs/module/zfs/dnode.c
+++ b/sys/contrib/openzfs/module/zfs/dnode.c
@@ -1652,6 +1652,26 @@ dnode_try_claim(objset_t *os, uint64_t object, int slots)
 	    slots, NULL, NULL));
 }
 
+/*
+ * Checks if the dnode contains any uncommitted dirty records.
+ */
+boolean_t
+dnode_is_dirty(dnode_t *dn)
+{
+	mutex_enter(&dn->dn_mtx);
+
+	for (int i = 0; i < TXG_SIZE; i++) {
+		if (list_head(&dn->dn_dirty_records[i]) != NULL) {
+			mutex_exit(&dn->dn_mtx);
+			return (B_TRUE);
+		}
+	}
+
+	mutex_exit(&dn->dn_mtx);
+
+	return (B_FALSE);
+}
+
 void
 dnode_setdirty(dnode_t *dn, dmu_tx_t *tx)
 {
diff --git a/sys/contrib/openzfs/module/zfs/zfs_vnops.c b/sys/contrib/openzfs/module/zfs/zfs_vnops.c
index 79128ed4b89f..2fbc6adbcc9a 100644
--- a/sys/contrib/openzfs/module/zfs/zfs_vnops.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_vnops.c
@@ -85,6 +85,7 @@ zfs_fsync(znode_t *zp, int syncflag, cred_t *cr)
 static int
 zfs_holey_common(znode_t *zp, ulong_t cmd, loff_t *off)
 {
+	zfs_locked_range_t *lr;
 	uint64_t noff = (uint64_t)*off; /* new offset */
 	uint64_t file_sz;
 	int error;
@@ -100,12 +101,18 @@ zfs_holey_common(znode_t *zp, ulong_t cmd, loff_t *off)
 	else
 		hole = B_FALSE;
 
+	/* Flush any mmap()'d data to disk */
+	if (zn_has_cached_data(zp))
+		zn_flush_cached_data(zp, B_FALSE);
+
+	lr = zfs_rangelock_enter(&zp->z_rangelock, 0, file_sz, RL_READER);
 	error = dmu_offset_next(ZTOZSB(zp)->z_os, zp->z_id, hole, &noff);
+	zfs_rangelock_exit(lr);
 
 	if (error == ESRCH)
 		return (SET_ERROR(ENXIO));
 
-	/* file was dirty, so fall back to using generic logic */
+	/* File was dirty, so fall back to using generic logic */
 	if (error == EBUSY) {
 		if (hole)
 			*off = file_sz;
diff --git a/sys/contrib/openzfs/tests/runfiles/common.run b/sys/contrib/openzfs/tests/runfiles/common.run
index 290b9ffba65c..71ca15c179b0 100644
--- a/sys/contrib/openzfs/tests/runfiles/common.run
+++ b/sys/contrib/openzfs/tests/runfiles/common.run
@@ -650,7 +650,7 @@ tests = ['migration_001_pos', 'migration_002_pos', 'migration_003_pos',
 tags = ['functional', 'migration']
 
 [tests/functional/mmap]
-tests = ['mmap_write_001_pos', 'mmap_read_001_pos']
+tests = ['mmap_write_001_pos', 'mmap_read_001_pos', 'mmap_seek_001_pos']
 tags = ['functional', 'mmap']
 
 [tests/functional/mount]
diff --git a/sys/contrib/openzfs/tests/zfs-tests/cmd/Makefile.am b/sys/contrib/openzfs/tests/zfs-tests/cmd/Makefile.am
index 7fe9a2c571f8..5efc896bf639 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/cmd/Makefile.am
+++ b/sys/contrib/openzfs/tests/zfs-tests/cmd/Makefile.am
@@ -19,6 +19,7 @@ SUBDIRS = \
 	mktree \
 	mmap_exec \
 	mmap_libaio \
+	mmap_seek \
 	mmapwrite \
 	nvlist_to_lua \
 	randwritecomp \
diff --git a/sys/contrib/openzfs/tests/zfs-tests/include/commands.cfg b/sys/contrib/openzfs/tests/zfs-tests/include/commands.cfg
index 299653547759..e8134b37eb35 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/include/commands.cfg
+++ b/sys/contrib/openzfs/tests/zfs-tests/include/commands.cfg
@@ -210,6 +210,7 @@ export ZFSTEST_FILES='badsend
     mktree
     mmap_exec
     mmap_libaio
+    mmap_seek
     mmapwrite
     nvlist_to_lua
     randfree_file
diff --git a/sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg b/sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg
index e93e299ea25a..738a6ffe28fc 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg
+++ b/sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg
@@ -32,6 +32,7 @@ DEADMAN_FAILMODE		deadman_failmode		zfs_deadman_failmode
 DEADMAN_SYNCTIME_MS		deadman_synctime_ms		zfs_deadman_synctime_ms
 DEADMAN_ZIOTIME_MS		deadman_ziotime_ms		zfs_deadman_ziotime_ms
 DISABLE_IVSET_GUID_CHECK	disable_ivset_guid_check	zfs_disable_ivset_guid_check
+DMU_OFFSET_NEXT_SYNC		dmu_offset_next_sync		zfs_dmu_offset_next_sync
 INITIALIZE_CHUNK_SIZE		initialize_chunk_size		zfs_initialize_chunk_size
 INITIALIZE_VALUE		initialize_value		zfs_initialize_value
 KEEP_LOG_SPACEMAPS_AT_EXPORT	keep_log_spacemaps_at_export	zfs_keep_log_spacemaps_at_export
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/mmap/Makefile.am b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/mmap/Makefile.am
index 2adc398b8c09..b26791ee7ce0 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/mmap/Makefile.am
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/mmap/Makefile.am
@@ -4,7 +4,8 @@ dist_pkgdata_SCRIPTS = \
 	cleanup.ksh \
 	mmap_read_001_pos.ksh \
 	mmap_write_001_pos.ksh \
-	mmap_libaio_001_pos.ksh
+	mmap_libaio_001_pos.ksh \
+	mmap_seek_001_pos.ksh
 
 dist_pkgdata_DATA = \
 	mmap.cfg
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/mmap/mmap_seek_001_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/mmap/mmap_seek_001_pos.ksh
new file mode 100755
index 000000000000..6188549ad8d2
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/mmap/mmap_seek_001_pos.ksh
@@ -0,0 +1,67 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2021 by Lawrence Livermore National Security, LLC.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/mmap/mmap.cfg
+
+#
+# DESCRIPTION:
+# lseek() data/holes for an mmap()'d file.
+#
+# STRATEGY:
+# 1. Enable compression and hole reporting for dirty files.
+# 2. Call mmap_seek binary test case for various record sizes.
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+	log_must zfs set compression=off $TESTPOOL/$TESTFS
+	log_must zfs set recordsize=128k $TESTPOOL/$TESTFS
+	log_must rm -f $TESTDIR/test-mmap-file
+	log_must set_tunable64 DMU_OFFSET_NEXT_SYNC $dmu_offset_next_sync
+}
+
+log_assert "lseek() data/holes for an mmap()'d file."
+
+log_onexit cleanup
+
+# Enable hole reporting for dirty files.
+typeset dmu_offset_next_sync=$(get_tunable DMU_OFFSET_NEXT_SYNC)
+log_must set_tunable64 DMU_OFFSET_NEXT_SYNC 1
+
+# Compression must be enabled to convert zero'd blocks to holes.
+# This behavior is checked by the mmap_seek test.
+log_must zfs set compression=on $TESTPOOL/$TESTFS
+
+for bs in 4096 8192 16384 32768 65536 131072; do
+	log_must zfs set recordsize=$bs $TESTPOOL/$TESTFS
+	log_must mmap_seek $TESTDIR/test-mmap-file $((1024*1024)) $bs
+	log_must rm $TESTDIR/test-mmap-file
+done
+
+log_pass "lseek() data/holes for an mmap()'d file succeeded."
diff --git a/tests/zfs-tests/cmd/mmap_seek/.gitignore b/tests/zfs-tests/cmd/mmap_seek/.gitignore
new file mode 100644
index 000000000000..6b05a7917500
--- /dev/null
+++ b/tests/zfs-tests/cmd/mmap_seek/.gitignore
@@ -0,0 +1 @@
+/mmap_seek
diff --git a/tests/zfs-tests/cmd/mmap_seek/Makefile.am b/tests/zfs-tests/cmd/mmap_seek/Makefile.am
new file mode 100644
index 000000000000..b938931125f5
--- /dev/null
+++ b/tests/zfs-tests/cmd/mmap_seek/Makefile.am
@@ -0,0 +1,6 @@
+include $(top_srcdir)/config/Rules.am
+
+pkgexecdir = $(datadir)/@PACKAGE@/zfs-tests/bin
+
+pkgexec_PROGRAMS = mmap_seek
+mmap_seek_SOURCES = mmap_seek.c
diff --git a/tests/zfs-tests/cmd/mmap_seek/mmap_seek.c b/tests/zfs-tests/cmd/mmap_seek/mmap_seek.c
new file mode 100644
index 000000000000..f476e1dba9a4
--- /dev/null
+++ b/tests/zfs-tests/cmd/mmap_seek/mmap_seek.c
@@ -0,0 +1,147 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2021 by Lawrence Livermore National Security, LLC.
+ */
+
+#include <unistd.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <errno.h>
+
+static void
+seek_data(int fd, off_t offset, off_t expected)
+{
+	off_t data_offset = lseek(fd, offset, SEEK_DATA);
+	if (data_offset != expected) {
+		fprintf(stderr, "lseek(fd, %d, SEEK_DATA) = %d (expected %d)\n",
+		    (int)offset, (int)data_offset, (int)expected);
+		exit(2);
+	}
+}
+
+static void
+seek_hole(int fd, off_t offset, off_t expected)
+{
+	off_t hole_offset = lseek(fd, offset, SEEK_HOLE);
+	if (hole_offset != expected) {
+		fprintf(stderr, "lseek(fd, %d, SEEK_HOLE) = %d (expected %d)\n",
+		    (int)offset, (int)hole_offset, (int)expected);
+		exit(2);
+	}
+}
+
+int
+main(int argc, char **argv)
+{
+	char *execname = argv[0];
+	char *file_path = argv[1];
+	char *buf = NULL;
+	int err;
+
+	if (argc != 4) {
+		(void) printf("usage: %s <file name> <file size> "
+		    "<block size>\n", argv[0]);
+		exit(1);
+	}
+
+	int fd = open(file_path, O_RDWR | O_CREAT, 0666);
+	if (fd == -1) {
+		(void) fprintf(stderr, "%s: %s: ", execname, file_path);
+		perror("open");
+		exit(2);
+	}
+
+	off_t file_size = atoi(argv[2]);
+	off_t block_size = atoi(argv[3]);
+
+	if (block_size * 2 > file_size) {
+		(void) fprintf(stderr, "file size must be at least "
+		    "double the block size\n");
+		exit(2);
+	}
+
+	err = ftruncate(fd, file_size);
+	if (err == -1) {
+		perror("ftruncate");
+		exit(2);
+	}
+
+	if ((buf = mmap(NULL, file_size, PROT_READ | PROT_WRITE,
+	    MAP_SHARED, fd, 0)) == MAP_FAILED) {
+		perror("mmap");
+		exit(2);
+	}
+
+	/* Verify the file is sparse and reports no data. */
+	seek_data(fd, 0, -1);
+
+	/* Verify the file is reported as a hole. */
+	seek_hole(fd, 0, 0);
+
+	/* Verify search beyond end of file is an error. */
+	seek_data(fd, 2 * file_size, -1);
+	seek_hole(fd, 2 * file_size, -1);
+
+	/* Dirty the first byte. */
+	memset(buf, 'a', 1);
+	seek_data(fd, 0, 0);
+	seek_data(fd, block_size, -1);
+	seek_hole(fd, 0, block_size);
+	seek_hole(fd, block_size, block_size);
+
+	/* Dirty the first half of the file. */
+	memset(buf, 'b', file_size / 2);
+	seek_data(fd, 0, 0);
+	seek_data(fd, block_size, block_size);
+	seek_hole(fd, 0, P2ROUNDUP(file_size / 2, block_size));
+	seek_hole(fd, block_size, P2ROUNDUP(file_size / 2, block_size));
+
+	/* Dirty the whole file. */
+	memset(buf, 'c', file_size);
+	seek_data(fd, 0, 0);
+	seek_data(fd, file_size * 3 / 4,
+	    P2ROUNDUP(file_size * 3 / 4, block_size));
+	seek_hole(fd, 0, file_size);
+	seek_hole(fd, file_size / 2, file_size);
+
+	/* Punch a hole (required compression be enabled). */
+	memset(buf + block_size, 0, block_size);
+	seek_data(fd, 0, 0);
+	seek_data(fd, block_size, 2 * block_size);
+	seek_hole(fd, 0, block_size);
+	seek_hole(fd, block_size, block_size);
+	seek_hole(fd, 2 * block_size, file_size);
+
+	err = munmap(buf, file_size);
+	if (err == -1) {
+		perror("munmap");
+		exit(2);
+	}
+
+	close(fd);
+
+	return (0);
+}