git: 89d57b94d776 - main - fusefs: implement VOP_DEALLOCATE

From: Alan Somers <asomers_at_FreeBSD.org>
Date: Wed, 19 Jan 2022 04:25:41 UTC
The branch main has been updated by asomers:

URL: https://cgit.FreeBSD.org/src/commit/?id=89d57b94d776877f77cc04752e449dac57a14618

commit 89d57b94d776877f77cc04752e449dac57a14618
Author:     Alan Somers <asomers@FreeBSD.org>
AuthorDate: 2022-01-09 02:35:05 +0000
Commit:     Alan Somers <asomers@FreeBSD.org>
CommitDate: 2022-01-19 04:13:02 +0000

    fusefs: implement VOP_DEALLOCATE
    
    MFC after:      Never
    Reviewed by:    khng
    Differential Revision: https://reviews.freebsd.org/D33800
---
 sys/fs/fuse/fuse_kernel.h                  |   6 +
 sys/fs/fuse/fuse_vnops.c                   | 115 +++++++-
 tests/sys/fs/fusefs/default_permissions.cc | 101 ++++++-
 tests/sys/fs/fusefs/fallocate.cc           | 457 ++++++++++++++++++++++++++++-
 4 files changed, 660 insertions(+), 19 deletions(-)

diff --git a/sys/fs/fuse/fuse_kernel.h b/sys/fs/fuse/fuse_kernel.h
index 51445637b9a8..7d13545643ea 100644
--- a/sys/fs/fuse/fuse_kernel.h
+++ b/sys/fs/fuse/fuse_kernel.h
@@ -400,6 +400,12 @@ struct fuse_file_lock {
  */
 #define FUSE_FSYNC_FDATASYNC	(1 << 0)
 
+/**
+ * Fallocate flags.
+ */
+#define FUSE_FALLOC_FL_KEEP_SIZE	0x1
+#define FUSE_FALLOC_FL_PUNCH_HOLE	0x2
+
 enum fuse_opcode {
 	FUSE_LOOKUP		= 1,
 	FUSE_FORGET		= 2,  /* no reply */
diff --git a/sys/fs/fuse/fuse_vnops.c b/sys/fs/fuse/fuse_vnops.c
index 31398596bc17..3384b7b84b3a 100644
--- a/sys/fs/fuse/fuse_vnops.c
+++ b/sys/fs/fuse/fuse_vnops.c
@@ -133,6 +133,7 @@ static vop_close_t fuse_fifo_close;
 static vop_close_t fuse_vnop_close;
 static vop_copy_file_range_t fuse_vnop_copy_file_range;
 static vop_create_t fuse_vnop_create;
+static vop_deallocate_t fuse_vnop_deallocate;
 static vop_deleteextattr_t fuse_vnop_deleteextattr;
 static vop_fdatasync_t fuse_vnop_fdatasync;
 static vop_fsync_t fuse_vnop_fsync;
@@ -189,6 +190,7 @@ struct vop_vector fuse_vnops = {
 	.vop_close = fuse_vnop_close,
 	.vop_copy_file_range = fuse_vnop_copy_file_range,
 	.vop_create = fuse_vnop_create,
+	.vop_deallocate = fuse_vnop_deallocate,
 	.vop_deleteextattr = fuse_vnop_deleteextattr,
 	.vop_fsync = fuse_vnop_fsync,
 	.vop_fdatasync = fuse_vnop_fdatasync,
@@ -621,11 +623,8 @@ fuse_vnop_allocate(struct vop_allocate_args *ap)
 	} else if (err == EOPNOTSUPP) {
 		/*
 		 * The file system server does not support FUSE_FALLOCATE with
-		 * the supplied mode.  That's effectively the same thing as
-		 * ENOSYS since we only ever issue mode=0.
-		 * TODO: revise this section once we support fspacectl.
+		 * the supplied mode for this particular file.
 		 */
-		fsess_set_notimpl(mp, FUSE_FALLOCATE);
 		err = EINVAL;
 	} else if (!err) {
 		*offset += *len;
@@ -2900,6 +2899,114 @@ out:
 	return (err);
 }
 
+/*
+    struct vop_deallocate_args {
+	struct vop_generic_args a_gen;
+	struct vnode *a_vp;
+	off_t *a_offset;
+	off_t *a_len;
+	int a_flags;
+	int a_ioflag;
+        struct ucred *a_cred;
+    };
+*/
+static int
+fuse_vnop_deallocate(struct vop_deallocate_args *ap)
+{
+	struct vnode *vp = ap->a_vp;
+	struct mount *mp = vnode_mount(vp);
+	struct fuse_filehandle *fufh;
+	struct fuse_dispatcher fdi;
+	struct fuse_fallocate_in *ffi;
+	struct ucred *cred = ap->a_cred;
+	pid_t pid = curthread->td_proc->p_pid;
+	off_t *len = ap->a_len;
+	off_t *offset = ap->a_offset;
+	int ioflag = ap->a_ioflag;
+	off_t filesize;
+	int err;
+	bool closefufh = false;
+
+	if (fuse_isdeadfs(vp))
+		return (ENXIO);
+
+	if (vfs_isrdonly(mp))
+		return (EROFS);
+
+	if (fsess_not_impl(mp, FUSE_FALLOCATE))
+		goto fallback;
+
+	err = fuse_filehandle_getrw(vp, FWRITE, &fufh, cred, pid);
+	if (err == EBADF && vnode_mount(vp)->mnt_flag & MNT_EXPORTED) {
+		/*
+		 * nfsd will do I/O without first doing VOP_OPEN.  We
+		 * must implicitly open the file here
+		 */
+		err = fuse_filehandle_open(vp, FWRITE, &fufh, curthread, cred);
+		closefufh = true;
+	}
+	if (err)
+		return (err);
+
+	fuse_vnode_update(vp, FN_MTIMECHANGE | FN_CTIMECHANGE);
+
+	err = fuse_vnode_size(vp, &filesize, cred, curthread);
+	if (err)
+		goto out;
+	fuse_inval_buf_range(vp, filesize, *offset, *offset + *len);
+
+	fdisp_init(&fdi, sizeof(*ffi));
+	fdisp_make_vp(&fdi, FUSE_FALLOCATE, vp, curthread, cred);
+	ffi = fdi.indata;
+	ffi->fh = fufh->fh_id;
+	ffi->offset = *offset;
+	ffi->length = *len;
+	/*
+	 * FreeBSD's fspacectl is equivalent to Linux's fallocate with
+	 * mode == FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE
+	 */
+	ffi->mode = FUSE_FALLOC_FL_PUNCH_HOLE | FUSE_FALLOC_FL_KEEP_SIZE;
+	err = fdisp_wait_answ(&fdi);
+
+	if (err == ENOSYS) {
+		fsess_set_notimpl(mp, FUSE_FALLOCATE);
+		goto fallback;
+	} else if (err == EOPNOTSUPP) {
+		/*
+		 * The file system server does not support FUSE_FALLOCATE with
+		 * the supplied mode for this particular file.
+		 */
+		goto fallback;
+	} else if (!err) {
+		/*
+		 * Clip the returned offset to EoF.  Do it here rather than
+		 * before FUSE_FALLOCATE just in case the kernel's cached file
+		 * size is out of date.  Unfortunately, FUSE does not return
+		 * any information about filesize from that operation.
+		 */
+		*offset = MIN(*offset + *len, filesize);
+		*len = 0;
+		fuse_vnode_undirty_cached_timestamps(vp, false);
+		fuse_internal_clear_suid_on_write(vp, cred, curthread);
+
+		if (ioflag & IO_SYNC)
+			err = fuse_internal_fsync(vp, curthread, MNT_WAIT,
+			    false);
+	}
+
+out:
+	if (closefufh)
+		fuse_filehandle_close(vp, fufh, curthread, cred);
+
+	return (err);
+
+fallback:
+	if (closefufh)
+		fuse_filehandle_close(vp, fufh, curthread, cred);
+
+	return (vop_stddeallocate(ap));
+}
+
 /*
     struct vop_deleteextattr_args {
 	struct vop_generic_args a_gen;
diff --git a/tests/sys/fs/fusefs/default_permissions.cc b/tests/sys/fs/fusefs/default_permissions.cc
index 0739ad48f1e2..a84f366bd736 100644
--- a/tests/sys/fs/fusefs/default_permissions.cc
+++ b/tests/sys/fs/fusefs/default_permissions.cc
@@ -161,6 +161,7 @@ class Access: public DefaultPermissions {};
 class Chown: public DefaultPermissions {};
 class Chgrp: public DefaultPermissions {};
 class CopyFileRange: public DefaultPermissions {};
+class Fspacectl: public DefaultPermissions {};
 class Lookup: public DefaultPermissions {};
 class Open: public DefaultPermissions {};
 class PosixFallocate: public DefaultPermissions {};
@@ -835,6 +836,104 @@ TEST_F(Listextattr, system)
 	ASSERT_EQ(EPERM, errno);
 }
 
+/* A write by a non-owner should clear a file's SGID bit */
+TEST_F(Fspacectl, clear_sgid)
+{
+	const char FULLPATH[] = "mountpoint/file.txt";
+	const char RELPATH[] = "file.txt";
+	struct stat sb;
+	struct spacectl_range rqsr;
+	uint64_t ino = 42;
+	mode_t oldmode = 02777;
+	mode_t newmode = 0777;
+	off_t fsize = 16;
+	off_t off = 8;
+	off_t len = 8;
+	int fd;
+
+	expect_getattr(FUSE_ROOT_ID, S_IFDIR | 0755, UINT64_MAX, 1);
+	FuseTest::expect_lookup(RELPATH, ino, S_IFREG | oldmode, fsize,
+	    1, UINT64_MAX, 0, 0);
+	expect_open(ino, 0, 1);
+	expect_fallocate(ino, off, len,
+		FUSE_FALLOC_FL_KEEP_SIZE | FUSE_FALLOC_FL_PUNCH_HOLE, 0);
+	expect_chmod(ino, newmode, fsize);
+
+	fd = open(FULLPATH, O_WRONLY);
+	ASSERT_LE(0, fd) << strerror(errno);
+	rqsr.r_len = len;
+	rqsr.r_offset = off;
+	EXPECT_EQ(0, fspacectl(fd, SPACECTL_DEALLOC, &rqsr, 0, NULL));
+	ASSERT_EQ(0, fstat(fd, &sb)) << strerror(errno);
+	EXPECT_EQ(S_IFREG | newmode, sb.st_mode);
+
+	leak(fd);
+}
+
+/* A write by a non-owner should clear a file's SUID bit */
+TEST_F(Fspacectl, clear_suid)
+{
+	const char FULLPATH[] = "mountpoint/file.txt";
+	const char RELPATH[] = "file.txt";
+	struct stat sb;
+	struct spacectl_range rqsr;
+	uint64_t ino = 42;
+	mode_t oldmode = 04777;
+	mode_t newmode = 0777;
+	off_t fsize = 16;
+	off_t off = 8;
+	off_t len = 8;
+	int fd;
+
+	expect_getattr(FUSE_ROOT_ID, S_IFDIR | 0755, UINT64_MAX, 1);
+	FuseTest::expect_lookup(RELPATH, ino, S_IFREG | oldmode, fsize,
+	    1, UINT64_MAX, 0, 0);
+	expect_open(ino, 0, 1);
+	expect_fallocate(ino, off, len,
+		FUSE_FALLOC_FL_KEEP_SIZE | FUSE_FALLOC_FL_PUNCH_HOLE, 0);
+	expect_chmod(ino, newmode, fsize);
+
+	fd = open(FULLPATH, O_WRONLY);
+	ASSERT_LE(0, fd) << strerror(errno);
+	rqsr.r_len = len;
+	rqsr.r_offset = off;
+	EXPECT_EQ(0, fspacectl(fd, SPACECTL_DEALLOC, &rqsr, 0, NULL));
+	ASSERT_EQ(0, fstat(fd, &sb)) << strerror(errno);
+	EXPECT_EQ(S_IFREG | newmode, sb.st_mode);
+
+	leak(fd);
+}
+
+/*
+ * fspacectl() of a file without writable permissions should succeed as
+ * long as the file descriptor is writable.  This is important when combined
+ * with O_CREAT
+ */
+TEST_F(Fspacectl, posix_fallocate_of_newly_created_file)
+{
+	const char FULLPATH[] = "mountpoint/some_file.txt";
+	const char RELPATH[] = "some_file.txt";
+	struct spacectl_range rqsr;
+	const uint64_t ino = 42;
+	off_t off = 8;
+	off_t len = 8;
+	int fd;
+
+	expect_getattr(FUSE_ROOT_ID, S_IFDIR | 0777, UINT64_MAX, 1);
+	EXPECT_LOOKUP(FUSE_ROOT_ID, RELPATH)
+		.WillOnce(Invoke(ReturnErrno(ENOENT)));
+	expect_create(RELPATH, ino);
+	expect_fallocate(ino, off, len,
+		FUSE_FALLOC_FL_KEEP_SIZE | FUSE_FALLOC_FL_PUNCH_HOLE, 0);
+
+	fd = open(FULLPATH, O_CREAT | O_RDWR, 0);
+	ASSERT_LE(0, fd) << strerror(errno);
+	rqsr.r_len = len;
+	rqsr.r_offset = off;
+	EXPECT_EQ(0, fspacectl(fd, SPACECTL_DEALLOC, &rqsr, 0, NULL));
+	leak(fd);
+}
+
 /* A component of the search path lacks execute permissions */
 TEST_F(Lookup, eacces)
 {
@@ -939,7 +1038,7 @@ TEST_F(PosixFallocate, clear_suid)
 }
 
 /*
- * posix_fallcoate() of a file without writable permissions should succeed as
+ * posix_fallocate() of a file without writable permissions should succeed as
  * long as the file descriptor is writable.  This is important when combined
  * with O_CREAT
  */
diff --git a/tests/sys/fs/fusefs/fallocate.cc b/tests/sys/fs/fusefs/fallocate.cc
index 386a3ac746ea..7ad2644980cf 100644
--- a/tests/sys/fs/fusefs/fallocate.cc
+++ b/tests/sys/fs/fusefs/fallocate.cc
@@ -45,7 +45,93 @@ extern "C" {
 
 using namespace testing;
 
-class Fallocate: public FuseTest{};
+/* Is buf all zero? */
+static bool
+is_zero(const char *buf, uint64_t size)
+{
+    return buf[0] == 0 && !memcmp(buf, buf + 1, size - 1);
+}
+
+class Fallocate: public FuseTest {
+public:
+/*
+ * expect VOP_DEALLOCATE to be implemented by vop_stddeallocate.
+ */
+void expect_vop_stddeallocate(uint64_t ino, uint64_t off, uint64_t length)
+{
+	/* XXX read offset and size may depend on cache mode */
+	EXPECT_CALL(*m_mock, process(
+		ResultOf([=](auto in) {
+			return (in.header.opcode == FUSE_READ &&
+				in.header.nodeid == ino &&
+				in.body.read.offset <= off &&
+				in.body.read.offset + in.body.read.size >=
+					off + length);
+		}, Eq(true)),
+		_)
+	).WillOnce(Invoke(ReturnImmediate([=](auto in, auto& out) {
+		out.header.len = sizeof(struct fuse_out_header) +
+			in.body.read.size;
+		memset(out.body.bytes, 'X', in.body.read.size);
+	}))).RetiresOnSaturation();
+	EXPECT_CALL(*m_mock, process(
+		ResultOf([=](auto in) {
+			const char *buf = (const char*)in.body.bytes +
+				sizeof(struct fuse_write_in);
+
+			return (in.header.opcode == FUSE_WRITE &&
+				in.header.nodeid == ino &&
+				in.body.write.offset == off  &&
+				in.body.write.size == length &&
+				is_zero(buf, length));
+		}, Eq(true)),
+		_)
+	).WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) {
+		SET_OUT_HEADER_LEN(out, write);
+		out.body.write.size = length;
+	})));
+}
+};
+
+class Fspacectl: public Fallocate {};
+
+class Fspacectl_7_18: public Fspacectl {
+public:
+virtual void SetUp() {
+	m_kernel_minor_version = 18;
+	Fspacectl::SetUp();
+}
+};
+
+class FspacectlCache: public Fspacectl, public WithParamInterface<cache_mode> {
+public:
+bool m_direct_io;
+
+FspacectlCache(): m_direct_io(false) {};
+
+virtual void SetUp() {
+	int cache_mode = GetParam();
+	switch (cache_mode) {
+		case Uncached:
+			m_direct_io = true;
+			break;
+		case WritebackAsync:
+			m_async = true;
+			/* FALLTHROUGH */
+		case Writeback:
+			m_init_flags |= FUSE_WRITEBACK_CACHE;
+			/* FALLTHROUGH */
+		case Writethrough:
+			break;
+		default:
+			FAIL() << "Unknown cache mode";
+	}
+
+	FuseTest::SetUp();
+	if (IsSkipped())
+		return;
+}
+};
 
 class PosixFallocate: public Fallocate {
 public:
@@ -83,6 +169,331 @@ virtual void SetUp() {
 };
 
 
+/*
+ * If the server returns ENOSYS, it indicates that the server does not support
+ * FUSE_FALLOCATE.  This and future calls should fall back to vop_stddeallocate.
+ */
+TEST_F(Fspacectl, enosys)
+{
+	const char FULLPATH[] = "mountpoint/some_file.txt";
+	const char RELPATH[] = "some_file.txt";
+	off_t fsize = 1 << 20;
+	off_t off0 = 100;
+	off_t len0 = 500;
+	struct spacectl_range rqsr = { .r_offset = off0, .r_len = len0 };
+	uint64_t ino = 42;
+	uint64_t off1 = fsize;
+	uint64_t len1 = 1000;
+	off_t off2 = fsize / 2;
+	off_t len2 = 500;
+	int fd;
+
+	expect_lookup(RELPATH, ino, S_IFREG | 0644, fsize, 1);
+	expect_open(ino, 0, 1);
+	expect_fallocate(ino, off0, len0,
+		FUSE_FALLOC_FL_KEEP_SIZE | FUSE_FALLOC_FL_PUNCH_HOLE, ENOSYS);
+	expect_vop_stddeallocate(ino, off0, len0);
+	expect_vop_stddeallocate(ino, off2, len2);
+
+	fd = open(FULLPATH, O_RDWR);
+	ASSERT_LE(0, fd) << strerror(errno);
+	EXPECT_EQ(0, fspacectl(fd, SPACECTL_DEALLOC, &rqsr, 0, NULL));
+
+	/* Subsequent calls shouldn't query the daemon either */
+	rqsr.r_offset = off2;
+	rqsr.r_len = len2;
+	EXPECT_EQ(0, fspacectl(fd, SPACECTL_DEALLOC, &rqsr, 0, NULL));
+
+	/* Neither should posix_fallocate query the daemon */
+	EXPECT_EQ(EINVAL, posix_fallocate(fd, off1, len1));
+
+	leak(fd);
+}
+
+/*
+ * EOPNOTSUPP means "the file system does not support fallocate with the
+ * supplied mode on this particular file".  So we should fallback, but not
+ * assume anything about whether the operation will fail on a different file or
+ * with a different mode.
+ */
+TEST_F(Fspacectl, eopnotsupp)
+{
+	const char FULLPATH[] = "mountpoint/some_file.txt";
+	const char RELPATH[] = "some_file.txt";
+	struct spacectl_range rqsr;
+	uint64_t ino = 42;
+	uint64_t fsize = 1 << 20;
+	uint64_t off0 = 500;
+	uint64_t len = 1000;
+	uint64_t off1 = fsize / 2;
+	int fd;
+
+	expect_lookup(RELPATH, ino, S_IFREG | 0644, fsize, 1);
+	expect_open(ino, 0, 1);
+	expect_fallocate(ino, off0, len,
+		FUSE_FALLOC_FL_KEEP_SIZE | FUSE_FALLOC_FL_PUNCH_HOLE,
+	                EOPNOTSUPP);
+	expect_vop_stddeallocate(ino, off0, len);
+	expect_fallocate(ino, off1, len,
+		FUSE_FALLOC_FL_KEEP_SIZE | FUSE_FALLOC_FL_PUNCH_HOLE,
+	                EOPNOTSUPP);
+	expect_vop_stddeallocate(ino, off1, len);
+	expect_fallocate(ino, fsize, len, 0, 0);
+
+	fd = open(FULLPATH, O_RDWR);
+	ASSERT_LE(0, fd) << strerror(errno);
+
+	/*
+	 * Though the FUSE daemon will reject the call, the kernel should fall
+	 * back to a read-modify-write approach.
+	 */
+	rqsr.r_offset = off0;
+	rqsr.r_len = len;
+	EXPECT_EQ(0, fspacectl(fd, SPACECTL_DEALLOC, &rqsr, 0, NULL));
+
+	/* Subsequent calls should still query the daemon */
+	rqsr.r_offset = off1;
+	rqsr.r_len = len;
+	EXPECT_EQ(0, fspacectl(fd, SPACECTL_DEALLOC, &rqsr, 0, NULL));
+
+	/* But subsequent posix_fallocate calls _should_ query the daemon */
+	EXPECT_EQ(0, posix_fallocate(fd, fsize, len));
+
+	leak(fd);
+}
+
+TEST_F(Fspacectl, erofs)
+{
+	const char FULLPATH[] = "mountpoint/some_file.txt";
+	const char RELPATH[] = "some_file.txt";
+	struct statfs statbuf;
+	uint64_t fsize = 2000;
+	struct spacectl_range rqsr = { .r_offset = 0, .r_len = 1 };
+	struct iovec *iov = NULL;
+	int iovlen = 0;
+	uint64_t ino = 42;
+	int fd;
+	int newflags;
+
+	expect_lookup(RELPATH, ino, S_IFREG | 0644, fsize, 1);
+	expect_open(ino, 0, 1);
+	EXPECT_CALL(*m_mock, process(
+		ResultOf([](auto in) {
+			return (in.header.opcode == FUSE_STATFS);
+		}, Eq(true)),
+		_)
+	).WillRepeatedly(Invoke(ReturnImmediate([=](auto in __unused, auto& out)
+	{
+		/*
+		 * All of the fields except f_flags are don't care, and f_flags
+		 * is set by the VFS
+		 */
+		SET_OUT_HEADER_LEN(out, statfs);
+	})));
+
+	fd = open(FULLPATH, O_RDWR);
+	ASSERT_LE(0, fd) << strerror(errno);
+
+	/* Remount read-only */
+	ASSERT_EQ(0, statfs("mountpoint", &statbuf)) << strerror(errno);
+	newflags = statbuf.f_flags | MNT_UPDATE | MNT_RDONLY;
+	build_iovec(&iov, &iovlen, "fstype", (void*)statbuf.f_fstypename, -1);
+	build_iovec(&iov, &iovlen, "fspath", (void*)statbuf.f_mntonname, -1);
+	build_iovec(&iov, &iovlen, "from", __DECONST(void *, "/dev/fuse"), -1);
+	ASSERT_EQ(0, nmount(iov, iovlen, newflags)) << strerror(errno);
+
+	EXPECT_EQ(-1, fspacectl(fd, SPACECTL_DEALLOC, &rqsr, 0, NULL));
+	EXPECT_EQ(EROFS, errno);
+
+	leak(fd);
+}
+
+TEST_F(Fspacectl, ok)
+{
+	const char FULLPATH[] = "mountpoint/some_file.txt";
+	const char RELPATH[] = "some_file.txt";
+	struct spacectl_range rqsr, rmsr;
+	struct stat sb0, sb1;
+	uint64_t ino = 42;
+	uint64_t fsize = 2000;
+	uint64_t offset = 500;
+	uint64_t length = 1000;
+	int fd;
+
+	expect_lookup(RELPATH, ino, S_IFREG | 0644, fsize, 1);
+	expect_open(ino, 0, 1);
+	expect_fallocate(ino, offset, length,
+		FUSE_FALLOC_FL_KEEP_SIZE | FUSE_FALLOC_FL_PUNCH_HOLE, 0);
+
+	fd = open(FULLPATH, O_RDWR);
+	ASSERT_LE(0, fd) << strerror(errno);
+	ASSERT_EQ(0, fstat(fd, &sb0)) << strerror(errno);
+	rqsr.r_offset = offset;
+	rqsr.r_len = length;
+	EXPECT_EQ(0, fspacectl(fd, SPACECTL_DEALLOC, &rqsr, 0, &rmsr));
+	EXPECT_EQ(0, rmsr.r_len);
+	EXPECT_EQ((off_t)(offset + length), rmsr.r_offset);
+
+	/*
+	 * The file's attributes should not have been invalidated, so this fstat
+	 * will not requery the daemon.
+	 */
+	EXPECT_EQ(0, fstat(fd, &sb1));
+	EXPECT_EQ(fsize, (uint64_t)sb1.st_size);
+
+	/* mtime and ctime should be updated */
+	EXPECT_EQ(sb0.st_atime, sb1.st_atime);
+	EXPECT_NE(sb0.st_mtime, sb1.st_mtime);
+	EXPECT_NE(sb0.st_ctime, sb1.st_ctime);
+
+	leak(fd);
+}
+
+/* The returned rqsr.r_off should be clipped at EoF */
+TEST_F(Fspacectl, past_eof)
+{
+	const char FULLPATH[] = "mountpoint/some_file.txt";
+	const char RELPATH[] = "some_file.txt";
+	struct spacectl_range rqsr, rmsr;
+	uint64_t ino = 42;
+	uint64_t fsize = 1000;
+	uint64_t offset = 1500;
+	uint64_t length = 1000;
+	int fd;
+
+	expect_lookup(RELPATH, ino, S_IFREG | 0644, fsize, 1);
+	expect_open(ino, 0, 1);
+	expect_fallocate(ino, offset, length,
+		FUSE_FALLOC_FL_KEEP_SIZE | FUSE_FALLOC_FL_PUNCH_HOLE, 0);
+
+	fd = open(FULLPATH, O_RDWR);
+	ASSERT_LE(0, fd) << strerror(errno);
+	rqsr.r_offset = offset;
+	rqsr.r_len = length;
+	EXPECT_EQ(0, fspacectl(fd, SPACECTL_DEALLOC, &rqsr, 0, &rmsr));
+	EXPECT_EQ(0, rmsr.r_len);
+	EXPECT_EQ((off_t)fsize, rmsr.r_offset);
+
+	leak(fd);
+}
+
+/* The returned rqsr.r_off should be clipped at EoF */
+TEST_F(Fspacectl, spans_eof)
+{
+	const char FULLPATH[] = "mountpoint/some_file.txt";
+	const char RELPATH[] = "some_file.txt";
+	struct spacectl_range rqsr, rmsr;
+	uint64_t ino = 42;
+	uint64_t fsize = 1000;
+	uint64_t offset = 500;
+	uint64_t length = 1000;
+	int fd;
+
+	expect_lookup(RELPATH, ino, S_IFREG | 0644, fsize, 1);
+	expect_open(ino, 0, 1);
+	expect_fallocate(ino, offset, length,
+		FUSE_FALLOC_FL_KEEP_SIZE | FUSE_FALLOC_FL_PUNCH_HOLE, 0);
+
+	fd = open(FULLPATH, O_RDWR);
+	ASSERT_LE(0, fd) << strerror(errno);
+	rqsr.r_offset = offset;
+	rqsr.r_len = length;
+	EXPECT_EQ(0, fspacectl(fd, SPACECTL_DEALLOC, &rqsr, 0, &rmsr));
+	EXPECT_EQ(0, rmsr.r_len);
+	EXPECT_EQ((off_t)fsize, rmsr.r_offset);
+
+	leak(fd);
+}
+
+/*
+ * With older servers, no FUSE_FALLOCATE should be attempted.  The kernel
+ * should fall back to vop_stddeallocate.
+ */
+TEST_F(Fspacectl_7_18, ok)
+{
+	const char FULLPATH[] = "mountpoint/some_file.txt";
+	const char RELPATH[] = "some_file.txt";
+	struct spacectl_range rqsr, rmsr;
+	void *buf;
+	uint64_t ino = 42;
+	uint64_t fsize = 2000;
+	uint64_t offset = 500;
+	uint64_t length = 1000;
+	int fd;
+
+	buf = malloc(length);
+
+	expect_lookup(RELPATH, ino, S_IFREG | 0644, fsize, 1);
+	expect_open(ino, 0, 1);
+	expect_vop_stddeallocate(ino, offset, length);
+
+	fd = open(FULLPATH, O_RDWR);
+	ASSERT_LE(0, fd) << strerror(errno);
+	rqsr.r_offset = offset;
+	rqsr.r_len = length;
+	EXPECT_EQ(0, fspacectl(fd, SPACECTL_DEALLOC, &rqsr, 0, &rmsr));
+	EXPECT_EQ(0, rmsr.r_len);
+	EXPECT_EQ((off_t)(offset + length), rmsr.r_offset);
+
+	leak(fd);
+	free(buf);
+}
+
+/*
+ * A successful fspacectl should clear the zeroed data from the kernel cache.
+ */
+TEST_P(FspacectlCache, clears_cache)
+{
+	const char FULLPATH[] = "mountpoint/some_file.txt";
+	const char RELPATH[] = "some_file.txt";
+	const char *CONTENTS = "abcdefghijklmnopqrstuvwxyz";
+	struct spacectl_range rqsr, rmsr;
+	uint64_t ino = 42;
+	ssize_t bufsize = strlen(CONTENTS);
+	uint64_t fsize = bufsize;
+	uint8_t buf[bufsize];
+	char zbuf[bufsize];
+	uint64_t offset = 0;
+	uint64_t length = bufsize;
+	int fd;
+
+	bzero(zbuf, bufsize);
+
+	expect_lookup(RELPATH, ino, S_IFREG | 0644, fsize, 1);
+	expect_open(ino, 0, 1);
+	/* NB: expectations are applied in LIFO order */
+	expect_read(ino, 0, fsize, fsize, zbuf);
+	expect_read(ino, 0, fsize, fsize, CONTENTS);
+	expect_fallocate(ino, offset, length,
+		FUSE_FALLOC_FL_KEEP_SIZE | FUSE_FALLOC_FL_PUNCH_HOLE, 0);
+
+	fd = open(FULLPATH, O_RDWR);
+	ASSERT_LE(0, fd) << strerror(errno);
+
+	/* Populate the cache */
+	ASSERT_EQ(fsize, (uint64_t)pread(fd, buf, bufsize, 0))
+		<< strerror(errno);
+	ASSERT_EQ(0, memcmp(buf, CONTENTS, fsize));
+
+	/* Zero the file */
+	rqsr.r_offset = offset;
+	rqsr.r_len = length;
+	EXPECT_EQ(0, fspacectl(fd, SPACECTL_DEALLOC, &rqsr, 0, &rmsr));
+	EXPECT_EQ(0, rmsr.r_len);
+	EXPECT_EQ((off_t)(offset + length), rmsr.r_offset);
+
+	/* Read again.  This should query the daemon */
+	ASSERT_EQ(fsize, (uint64_t)pread(fd, buf, bufsize, 0))
+		<< strerror(errno);
+	ASSERT_EQ(0, memcmp(buf, zbuf, fsize));
+
+	leak(fd);
+}
+
+INSTANTIATE_TEST_CASE_P(FspacectlCache, FspacectlCache,
+	Values(Uncached, Writethrough, Writeback),
+);
+
 /*
  * If the server returns ENOSYS, it indicates that the server does not support
  * FUSE_FALLOCATE.  This and future calls should return EINVAL.
@@ -92,50 +503,68 @@ TEST_F(PosixFallocate, enosys)
 	const char FULLPATH[] = "mountpoint/some_file.txt";
 	const char RELPATH[] = "some_file.txt";
 	uint64_t ino = 42;
-	uint64_t offset = 0;
-	uint64_t length = 1000;
+	uint64_t off0 = 0;
+	uint64_t len0 = 1000;
+	off_t off1 = 100;
+	off_t len1 = 200;
+	uint64_t fsize = 500;
+	struct spacectl_range rqsr = { .r_offset = off1, .r_len = len1 };
 	int fd;
 
-	expect_lookup(RELPATH, ino, S_IFREG | 0644, 0, 1);
+	expect_lookup(RELPATH, ino, S_IFREG | 0644, fsize, 1);
 	expect_open(ino, 0, 1);
-	expect_fallocate(ino, offset, length, 0, ENOSYS);
+	expect_fallocate(ino, off0, len0, 0, ENOSYS);
+	expect_vop_stddeallocate(ino, off1, len1);
 
 	fd = open(FULLPATH, O_RDWR);
 	ASSERT_LE(0, fd) << strerror(errno);
-	EXPECT_EQ(EINVAL, posix_fallocate(fd, offset, length));
+	EXPECT_EQ(EINVAL, posix_fallocate(fd, off0, len0));
 
 	/* Subsequent calls shouldn't query the daemon*/
-	EXPECT_EQ(EINVAL, posix_fallocate(fd, offset, length));
+	EXPECT_EQ(EINVAL, posix_fallocate(fd, off0, len0));
+
+	/* Neither should VOP_DEALLOCATE query the daemon */
+	EXPECT_EQ(0, fspacectl(fd, SPACECTL_DEALLOC, &rqsr, 0, NULL));
 
 	leak(fd);
 }
 
 /*
- * EOPNOTSUPP means either "the file system does not support fallocate" or "the
- * file system does not support fallocate with the supplied mode".  fusefs
- * should conservatively assume the latter, and not issue any more fallocate
- * operations with the same mode.
+ * EOPNOTSUPP means "the file system does not support fallocate with the
+ * supplied mode on this particular file".  So we should fallback, but not
+ * assume anything about whether the operation will fail on a different file or
+ * with a different mode.
  */
 TEST_F(PosixFallocate, eopnotsupp)
 {
 	const char FULLPATH[] = "mountpoint/some_file.txt";
 	const char RELPATH[] = "some_file.txt";
+	struct spacectl_range rqsr;
 	uint64_t ino = 42;
+	uint64_t fsize = 2000;
 	uint64_t offset = 0;
 	uint64_t length = 1000;
 	int fd;
 
-	expect_lookup(RELPATH, ino, S_IFREG | 0644, 0, 1);
+	expect_lookup(RELPATH, ino, S_IFREG | 0644, fsize, 1);
 	expect_open(ino, 0, 1);
+	expect_fallocate(ino, fsize, length, 0, EOPNOTSUPP);
 	expect_fallocate(ino, offset, length, 0, EOPNOTSUPP);
+	expect_fallocate(ino, offset, length,
+		FUSE_FALLOC_FL_KEEP_SIZE | FUSE_FALLOC_FL_PUNCH_HOLE, 0);
 
 	fd = open(FULLPATH, O_RDWR);
 	ASSERT_LE(0, fd) << strerror(errno);
-	EXPECT_EQ(EINVAL, posix_fallocate(fd, offset, length));
+	EXPECT_EQ(EINVAL, posix_fallocate(fd, fsize, length));
 
-	/* Subsequent calls shouldn't query the daemon*/
+	/* Subsequent calls should still query the daemon*/
 	EXPECT_EQ(EINVAL, posix_fallocate(fd, offset, length));
 
+	/* And subsequent VOP_DEALLOCATE calls should also query the daemon */
+	rqsr.r_len = length;
+	rqsr.r_offset = offset;
+	EXPECT_EQ(0, fspacectl(fd, SPACECTL_DEALLOC, &rqsr, 0, NULL));
+
 	leak(fd);
 }