git: 37df9d3bba85 - main - fusefs: update FUSE protocol to 7.24 and implement FUSE_LSEEK

Alan Somers asomers at FreeBSD.org
Thu Dec 31 15:52:09 UTC 2020


The branch main has been updated by asomers:

URL: https://cgit.FreeBSD.org/src/commit/?id=37df9d3bba8577fcdd63382ff5a4a5cbb4aa55b4

commit 37df9d3bba8577fcdd63382ff5a4a5cbb4aa55b4
Author:     Alan Somers <asomers at FreeBSD.org>
AuthorDate: 2020-12-29 00:26:31 +0000
Commit:     Alan Somers <asomers at FreeBSD.org>
CommitDate: 2020-12-31 15:51:47 +0000

    fusefs: update FUSE protocol to 7.24 and implement FUSE_LSEEK
    
    FUSE_LSEEK reports holes on fuse file systems, and is used for example
    by bsdtar.
    
    MFC after:      2 weeks
    Relnotes:       yes
    Reviewed by:    cem
    Differential Revision: https://reviews.freebsd.org/D27804
---
 sys/fs/fuse/fuse_internal.c   |   9 +-
 sys/fs/fuse/fuse_ipc.c        |   8 +-
 sys/fs/fuse/fuse_ipc.h        |  37 ++++-
 sys/fs/fuse/fuse_kernel.h     |  17 +-
 sys/fs/fuse/fuse_vfsops.c     |   2 +-
 sys/fs/fuse/fuse_vnops.c      | 141 +++++++++++++++--
 tests/sys/fs/fusefs/Makefile  |   1 +
 tests/sys/fs/fusefs/lseek.cc  | 360 ++++++++++++++++++++++++++++++++++++++++++
 tests/sys/fs/fusefs/mockfs.cc |  35 +++-
 tests/sys/fs/fusefs/mockfs.hh |   2 +
 10 files changed, 587 insertions(+), 25 deletions(-)

diff --git a/sys/fs/fuse/fuse_internal.c b/sys/fs/fuse/fuse_internal.c
index 79365b8802df..2faad7cd8651 100644
--- a/sys/fs/fuse/fuse_internal.c
+++ b/sys/fs/fuse/fuse_internal.c
@@ -219,7 +219,7 @@ fuse_internal_access(struct vnode *vp,
 		SDT_PROBE0(fusefs, , internal, access_vadmin);
 	}
 
-	if (!fsess_isimpl(mp, FUSE_ACCESS))
+	if (fsess_not_impl(mp, FUSE_ACCESS))
 		return 0;
 
 	if ((mode & (VWRITE | VAPPEND)) != 0)
@@ -337,14 +337,14 @@ fuse_internal_fsync(struct vnode *vp,
 	int op = FUSE_FSYNC;
 	int err = 0;
 
-	if (!fsess_isimpl(vnode_mount(vp),
+	if (fsess_not_impl(vnode_mount(vp),
 	    (vnode_vtype(vp) == VDIR ? FUSE_FSYNCDIR : FUSE_FSYNC))) {
 		return 0;
 	}
 	if (vnode_isdir(vp))
 		op = FUSE_FSYNCDIR;
 
-	if (!fsess_isimpl(mp, op))
+	if (fsess_not_impl(mp, op))
 		return 0;
 
 	fdisp_init(&fdi, sizeof(*ffsi));
@@ -1051,6 +1051,9 @@ fuse_internal_init_callback(struct fuse_ticket *tick, struct uio *uio)
 	else
 		data->cache_mode = FUSE_CACHE_WT;
 
+	if (!fuse_libabi_geq(data, 7, 24))
+		fsess_set_notimpl(data->mp, FUSE_LSEEK);
+
 out:
 	if (err) {
 		fdata_set_dead(data);
diff --git a/sys/fs/fuse/fuse_ipc.c b/sys/fs/fuse/fuse_ipc.c
index 776a4ecd11d7..d3738da26b34 100644
--- a/sys/fs/fuse/fuse_ipc.c
+++ b/sys/fs/fuse/fuse_ipc.c
@@ -230,7 +230,7 @@ fuse_interrupt_send(struct fuse_ticket *otick, int err)
 		 * If the fuse daemon doesn't support interrupts, then there's
 		 * nothing more that we can do
 		 */
-		if (!fsess_isimpl(data->mp, FUSE_INTERRUPT))
+		if (fsess_not_impl(data->mp, FUSE_INTERRUPT))
 			return;
 
 		/* 
@@ -423,7 +423,7 @@ fticket_wait_answer(struct fuse_ticket *ftick)
 	struct fuse_data *data = ftick->tk_data;
 	bool interrupted = false;
 
-	if (fsess_isimpl(ftick->tk_data->mp, FUSE_INTERRUPT) &&
+	if (fsess_maybe_impl(ftick->tk_data->mp, FUSE_INTERRUPT) &&
 	    data->dataflags & FSESS_INTR) {
 		SIGEMPTYSET(blockedset);
 	} else {
@@ -851,6 +851,10 @@ fuse_body_audit(struct fuse_ticket *ftick, size_t blen)
 		err = (blen == 0) ? 0 : EINVAL;
 		break;
 
+	case FUSE_LSEEK:
+		err = (blen == sizeof(struct fuse_lseek_out)) ? 0 : EINVAL;
+		break;
+
 	default:
 		panic("FUSE: opcodes out of sync (%d)\n", opcode);
 	}
diff --git a/sys/fs/fuse/fuse_ipc.h b/sys/fs/fuse/fuse_ipc.h
index c8a665abded7..2ed75b3319e3 100644
--- a/sys/fs/fuse/fuse_ipc.h
+++ b/sys/fs/fuse/fuse_ipc.h
@@ -212,7 +212,15 @@ struct fuse_data {
 	int				daemon_timeout;
 	int				linux_errnos;
 	unsigned			time_gran;
+	/* A bitmask of FUSE RPCs that are not implemented by the server */
 	uint64_t			notimpl;
+	/*
+	 * A bitmask of FUSE RPCs that are implemented by the server.
+	 * If an operation is not present in either notimpl or isimpl, then it
+	 * may be implemented by the server, but the kernel doesn't know for
+	 * sure.
+	 */
+	uint64_t			isimpl;
 	uint64_t			mnt_flag;
 	enum fuse_data_cache_mode	cache_mode;
 };
@@ -240,13 +248,40 @@ fuse_get_mpdata(struct mount *mp)
 }
 
 static inline bool
-fsess_isimpl(struct mount *mp, int opcode)
+fsess_is_impl(struct mount *mp, int opcode)
+{
+	struct fuse_data *data = fuse_get_mpdata(mp);
+
+	return ((data->isimpl & (1ULL << opcode)) != 0);
+
+}
+
+static inline bool
+fsess_maybe_impl(struct mount *mp, int opcode)
 {
 	struct fuse_data *data = fuse_get_mpdata(mp);
 
 	return ((data->notimpl & (1ULL << opcode)) == 0);
 
 }
+
+static inline bool
+fsess_not_impl(struct mount *mp, int opcode)
+{
+	struct fuse_data *data = fuse_get_mpdata(mp);
+
+	return ((data->notimpl & (1ULL << opcode)) != 0);
+
+}
+
+static inline void
+fsess_set_impl(struct mount *mp, int opcode)
+{
+	struct fuse_data *data = fuse_get_mpdata(mp);
+
+	data->isimpl |= (1ULL << opcode);
+}
+
 static inline void
 fsess_set_notimpl(struct mount *mp, int opcode)
 {
diff --git a/sys/fs/fuse/fuse_kernel.h b/sys/fs/fuse/fuse_kernel.h
index fa3c63417f19..6e97b04a733f 100644
--- a/sys/fs/fuse/fuse_kernel.h
+++ b/sys/fs/fuse/fuse_kernel.h
@@ -102,6 +102,9 @@
  *  - add ctime and ctimensec to fuse_setattr_in
  *  - add FUSE_RENAME2 request
  *  - add FUSE_NO_OPEN_SUPPORT flag
+ *
+ * 7.24
+ *  - add FUSE_LSEEK for SEEK_HOLE and SEEK_DATA support
  */
 
 #ifndef _FUSE_FUSE_KERNEL_H
@@ -117,7 +120,7 @@
 #define FUSE_KERNEL_VERSION 7
 
 /** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 23
+#define FUSE_KERNEL_MINOR_VERSION 24
 
 /** The node ID of the root inode */
 #define FUSE_ROOT_ID 1
@@ -340,6 +343,7 @@ enum fuse_opcode {
 	FUSE_FALLOCATE     = 43,
 	FUSE_READDIRPLUS   = 44,
 	FUSE_RENAME2       = 45,
+	FUSE_LSEEK         = 46,
 
 #ifdef linux
 	/* CUSE specific operations */
@@ -751,4 +755,15 @@ struct fuse_notify_retrieve_in {
 	uint64_t	dummy4;
 };
 
+struct fuse_lseek_in {
+       uint64_t        fh;
+       uint64_t        offset;
+       uint32_t        whence;
+       uint32_t        padding;
+};
+
+struct fuse_lseek_out {
+       uint64_t        offset;
+};
+
 #endif /* _FUSE_FUSE_KERNEL_H */
diff --git a/sys/fs/fuse/fuse_vfsops.c b/sys/fs/fuse/fuse_vfsops.c
index 04d273127ade..7f47f8800994 100644
--- a/sys/fs/fuse/fuse_vfsops.c
+++ b/sys/fs/fuse/fuse_vfsops.c
@@ -501,7 +501,7 @@ fuse_vfsop_unmount(struct mount *mp, int mntflags)
 	if (fdata_get_dead(data)) {
 		goto alreadydead;
 	}
-	if (fsess_isimpl(mp, FUSE_DESTROY)) {
+	if (fsess_maybe_impl(mp, FUSE_DESTROY)) {
 		fdisp_init(&fdi, 0);
 		fdisp_make(&fdi, FUSE_DESTROY, mp, 0, td, NULL);
 
diff --git a/sys/fs/fuse/fuse_vnops.c b/sys/fs/fuse/fuse_vnops.c
index fde673d1f5f3..efac7e041cf6 100644
--- a/sys/fs/fuse/fuse_vnops.c
+++ b/sys/fs/fuse/fuse_vnops.c
@@ -69,6 +69,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/errno.h>
 #include <sys/kernel.h>
 #include <sys/conf.h>
+#include <sys/filio.h>
 #include <sys/uio.h>
 #include <sys/malloc.h>
 #include <sys/queue.h>
@@ -136,6 +137,7 @@ static vop_fsync_t fuse_vnop_fsync;
 static vop_getattr_t fuse_vnop_getattr;
 static vop_getextattr_t fuse_vnop_getextattr;
 static vop_inactive_t fuse_vnop_inactive;
+static vop_ioctl_t fuse_vnop_ioctl;
 static vop_link_t fuse_vnop_link;
 static vop_listextattr_t fuse_vnop_listextattr;
 static vop_lookup_t fuse_vnop_lookup;
@@ -190,11 +192,7 @@ struct vop_vector fuse_vnops = {
 	.vop_getattr = fuse_vnop_getattr,
 	.vop_getextattr = fuse_vnop_getextattr,
 	.vop_inactive = fuse_vnop_inactive,
-	/*
-	 * TODO: implement vop_ioctl after upgrading to protocol 7.16.
-	 * FUSE_IOCTL was added in 7.11, but 32-bit compat is broken until
-	 * 7.16.
-	 */
+	.vop_ioctl = fuse_vnop_ioctl,
 	.vop_link = fuse_vnop_link,
 	.vop_listextattr = fuse_vnop_listextattr,
 	.vop_lookup = fuse_vnop_lookup,
@@ -284,7 +282,7 @@ fuse_flush(struct vnode *vp, struct ucred *cred, pid_t pid, int fflag)
 	struct mount *mp = vnode_mount(vp);
 	int err;
 
-	if (!fsess_isimpl(vnode_mount(vp), FUSE_FLUSH))
+	if (fsess_not_impl(vnode_mount(vp), FUSE_FLUSH))
 		return 0;
 
 	err = fuse_filehandle_getrw(vp, fflag, &fufh, cred, pid);
@@ -318,6 +316,42 @@ fuse_fifo_close(struct vop_close_args *ap)
 	return (fifo_specops.vop_close(ap));
 }
 
+/* Send FUSE_LSEEK for this node */
+static int
+fuse_vnop_do_lseek(struct vnode *vp, struct thread *td, struct ucred *cred,
+	pid_t pid, off_t *offp, int whence)
+{
+	struct fuse_dispatcher fdi;
+	struct fuse_filehandle *fufh;
+	struct fuse_lseek_in *flsi;
+	struct fuse_lseek_out *flso;
+	struct mount *mp = vnode_mount(vp);
+	int err;
+
+	MPASS(VOP_ISLOCKED(vp));
+
+	err = fuse_filehandle_getrw(vp, FREAD, &fufh, cred, pid);
+	if (err)
+		return (err);
+	fdisp_init(&fdi, sizeof(*flsi));
+	fdisp_make_vp(&fdi, FUSE_LSEEK, vp, td, cred);
+	flsi = fdi.indata;
+	flsi->fh = fufh->fh_id;
+	flsi->offset = *offp;
+	flsi->whence = whence;
+	err = fdisp_wait_answ(&fdi);
+	if (err == ENOSYS) {
+		fsess_set_notimpl(mp, FUSE_LSEEK);
+	} else if (err == 0) {
+		fsess_set_impl(mp, FUSE_LSEEK);
+		flso = fdi.answ;
+		*offp = flso->offset;
+	}
+	fdisp_destroy(&fdi);
+
+	return (err);
+}
+
 /*
     struct vnop_access_args {
 	struct vnode *a_vp;
@@ -516,7 +550,7 @@ fuse_vnop_bmap(struct vop_bmap_args *ap)
 			*runp = 0;
 	}
 
-	if (fsess_isimpl(mp, FUSE_BMAP)) {
+	if (fsess_maybe_impl(mp, FUSE_BMAP)) {
 		fdisp_init(&fdi, sizeof(*fbi));
 		fdisp_make_vp(&fdi, FUSE_BMAP, vp, td, td->td_ucred);
 		fbi = fdi.indata;
@@ -652,7 +686,7 @@ fuse_vnop_create(struct vop_create_args *ap)
 	if (vap->va_type != VREG)
 		return (EINVAL);
 
-	if (!fsess_isimpl(mp, FUSE_CREATE) || vap->va_type == VSOCK) {
+	if (fsess_not_impl(mp, FUSE_CREATE) || vap->va_type == VSOCK) {
 		/* Fallback to FUSE_MKNOD/FUSE_OPEN */
 		fdisp_make_mknod_for_fallback(fdip, cnp, dvp, parentnid, td,
 			cred, mode, &op);
@@ -883,6 +917,56 @@ fuse_vnop_inactive(struct vop_inactive_args *ap)
 	return 0;
 }
 
+/*
+    struct vnop_ioctl_args {
+	struct vnode *a_vp;
+	u_long a_command;
+	caddr_t a_data;
+	int a_fflag;
+	struct ucred *a_cred;
+	struct thread *a_td;
+    };
+*/
+static int
+fuse_vnop_ioctl(struct vop_ioctl_args *ap)
+{
+	struct vnode *vp = ap->a_vp;
+	struct mount *mp = vnode_mount(vp);
+	struct ucred *cred = ap->a_cred;
+	off_t *offp;
+	pid_t pid = ap->a_td->td_proc->p_pid;
+	int err;
+
+	switch (ap->a_command) {
+	case FIOSEEKDATA:
+	case FIOSEEKHOLE:
+		/* Call FUSE_LSEEK, if we can, or fall back to vop_stdioctl */
+		if (fsess_maybe_impl(mp, FUSE_LSEEK)) {
+			int whence;
+
+			offp = ap->a_data;
+			if (ap->a_command == FIOSEEKDATA)
+				whence = SEEK_DATA;
+			else
+				whence = SEEK_HOLE;
+
+			vn_lock(vp, LK_SHARED | LK_RETRY);
+			err = fuse_vnop_do_lseek(vp, ap->a_td, cred, pid, offp,
+			    whence);
+			VOP_UNLOCK(vp);
+		}
+		if (fsess_not_impl(mp, FUSE_LSEEK))
+			err = vop_stdioctl(ap);
+		break;
+	default:
+		/* TODO: implement FUSE_IOCTL */
+		err = ENOTTY;
+		break;
+	}
+	return (err);
+}
+
+
 /*
     struct vnop_link_args {
 	struct vnode *a_tdvp;
@@ -1337,6 +1421,8 @@ fuse_vnop_open(struct vop_open_args *ap)
 static int
 fuse_vnop_pathconf(struct vop_pathconf_args *ap)
 {
+	struct vnode *vp = ap->a_vp;
+	struct mount *mp;
 
 	switch (ap->a_name) {
 	case _PC_FILESIZEBITS:
@@ -1354,6 +1440,35 @@ fuse_vnop_pathconf(struct vop_pathconf_args *ap)
 	case _PC_NO_TRUNC:
 		*ap->a_retval = 1;
 		return (0);
+	case _PC_MIN_HOLE_SIZE:
+		/*
+		 * The FUSE protocol provides no mechanism for a server to
+		 * report _PC_MIN_HOLE_SIZE.  It's a protocol bug.  Instead,
+		 * return EINVAL if the server does not support FUSE_LSEEK, or
+		 * 1 if it does.
+		 */
+		mp = vnode_mount(vp);
+		if (!fsess_is_impl(mp, FUSE_LSEEK) &&
+		    !fsess_not_impl(mp, FUSE_LSEEK)) {
+			off_t offset = 0;
+
+			/* Issue a FUSE_LSEEK to find out if it's implemented */
+			fuse_vnop_do_lseek(vp, curthread, curthread->td_ucred,
+			    curthread->td_proc->p_pid, &offset, SEEK_DATA);
+		}
+
+		if (fsess_is_impl(mp, FUSE_LSEEK)) {
+			*ap->a_retval = 1;
+			return (0);
+		} else {
+			/*
+			 * Probably FUSE_LSEEK is not implemented.  It might
+			 * be, if the FUSE_LSEEK above returned an error like
+			 * EACCES, but in that case we can't tell, so it's
+			 * safest to report EINVAL anyway.
+			 */
+			return (EINVAL);
+		}
 	default:
 		return (vop_stdpathconf(ap));
 	}
@@ -2035,7 +2150,7 @@ fuse_vnop_getextattr(struct vop_getextattr_args *ap)
 	if (fuse_isdeadfs(vp))
 		return (ENXIO);
 
-	if (!fsess_isimpl(mp, FUSE_GETXATTR))
+	if (fsess_not_impl(mp, FUSE_GETXATTR))
 		return EOPNOTSUPP;
 
 	err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td, VREAD);
@@ -2121,7 +2236,7 @@ fuse_vnop_setextattr(struct vop_setextattr_args *ap)
 	if (fuse_isdeadfs(vp))
 		return (ENXIO);
 
-	if (!fsess_isimpl(mp, FUSE_SETXATTR))
+	if (fsess_not_impl(mp, FUSE_SETXATTR))
 		return EOPNOTSUPP;
 
 	if (vfs_isrdonly(mp))
@@ -2133,7 +2248,7 @@ fuse_vnop_setextattr(struct vop_setextattr_args *ap)
 		 * If we got here as fallback from VOP_DELETEEXTATTR, then
 		 * return EOPNOTSUPP.
 		 */
-		if (!fsess_isimpl(mp, FUSE_REMOVEXATTR))
+		if (fsess_not_impl(mp, FUSE_REMOVEXATTR))
 			return (EOPNOTSUPP);
 		else
 			return (EINVAL);
@@ -2286,7 +2401,7 @@ fuse_vnop_listextattr(struct vop_listextattr_args *ap)
 	if (fuse_isdeadfs(vp))
 		return (ENXIO);
 
-	if (!fsess_isimpl(mp, FUSE_LISTXATTR))
+	if (fsess_not_impl(mp, FUSE_LISTXATTR))
 		return EOPNOTSUPP;
 
 	err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td, VREAD);
@@ -2409,7 +2524,7 @@ fuse_vnop_deleteextattr(struct vop_deleteextattr_args *ap)
 	if (fuse_isdeadfs(vp))
 		return (ENXIO);
 
-	if (!fsess_isimpl(mp, FUSE_REMOVEXATTR))
+	if (fsess_not_impl(mp, FUSE_REMOVEXATTR))
 		return EOPNOTSUPP;
 
 	if (vfs_isrdonly(mp))
diff --git a/tests/sys/fs/fusefs/Makefile b/tests/sys/fs/fusefs/Makefile
index 253078f8b1a7..8d199a53c074 100644
--- a/tests/sys/fs/fusefs/Makefile
+++ b/tests/sys/fs/fusefs/Makefile
@@ -29,6 +29,7 @@ GTESTS+=	io
 GTESTS+=	link
 GTESTS+=	locks
 GTESTS+=	lookup
+GTESTS+=	lseek
 GTESTS+=	mkdir
 GTESTS+=	mknod
 GTESTS+=	mount
diff --git a/tests/sys/fs/fusefs/lseek.cc b/tests/sys/fs/fusefs/lseek.cc
new file mode 100644
index 000000000000..089b0f86a7f6
--- /dev/null
+++ b/tests/sys/fs/fusefs/lseek.cc
@@ -0,0 +1,360 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2020 Alan Somers
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+extern "C" {
+#include <sys/param.h>
+
+#include <fcntl.h>
+}
+
+#include "mockfs.hh"
+#include "utils.hh"
+
+using namespace testing;
+
+class Lseek: public FuseTest {};
+class LseekPathconf: public Lseek {};
+class LseekPathconf_7_23: public LseekPathconf {
+public:
+virtual void SetUp() {
+	m_kernel_minor_version = 23;
+	FuseTest::SetUp();
+}
+};
+class LseekSeekHole: public Lseek {};
+class LseekSeekData: public Lseek {};
+
+/*
+ * If a previous lseek operation has already returned enosys, then pathconf can
+ * return EINVAL immediately.
+ */
+TEST_F(LseekPathconf, already_enosys)
+{
+	const char FULLPATH[] = "mountpoint/some_file.txt";
+	const char RELPATH[] = "some_file.txt";
+	const uint64_t ino = 42;
+	off_t fsize = 1 << 30;	/* 1 GiB */
+	off_t offset_in = 1 << 28;
+	int fd;
+
+	expect_lookup(RELPATH, ino, S_IFREG | 0644, fsize, 1);
+	expect_open(ino, 0, 1);
+	EXPECT_CALL(*m_mock, process(
+		ResultOf([=](auto in) {
+			return (in.header.opcode == FUSE_LSEEK);
+		}, Eq(true)),
+		_)
+	).WillOnce(Invoke(ReturnErrno(ENOSYS)));
+
+	fd = open(FULLPATH, O_RDONLY);
+
+	EXPECT_EQ(offset_in, lseek(fd, offset_in, SEEK_DATA));
+	EXPECT_EQ(-1, fpathconf(fd, _PC_MIN_HOLE_SIZE));
+	EXPECT_EQ(EINVAL, errno);
+}
+
+/*
+ * If a previous lseek operation has already returned successfully, then
+ * pathconf can return 1 immediately.  1 means "holes are reported, but size is
+ * not specified".
+ */
+TEST_F(LseekPathconf, already_seeked)
+{
+	const char FULLPATH[] = "mountpoint/some_file.txt";
+	const char RELPATH[] = "some_file.txt";
+	const uint64_t ino = 42;
+	off_t fsize = 1 << 30;	/* 1 GiB */
+	off_t offset = 1 << 28;
+	int fd;
+
+	expect_lookup(RELPATH, ino, S_IFREG | 0644, fsize, 1);
+	expect_open(ino, 0, 1);
+	EXPECT_CALL(*m_mock, process(
+		ResultOf([=](auto in) {
+			return (in.header.opcode == FUSE_LSEEK);
+		}, Eq(true)),
+		_)
+	).WillOnce(Invoke(ReturnImmediate([=](auto i, auto& out) {
+		SET_OUT_HEADER_LEN(out, lseek);
+		out.body.lseek.offset = i.body.lseek.offset;
+	})));
+	fd = open(FULLPATH, O_RDONLY);
+	EXPECT_EQ(offset, lseek(fd, offset, SEEK_DATA));
+
+	EXPECT_EQ(1, fpathconf(fd, _PC_MIN_HOLE_SIZE));
+}
+
+/*
+ * If no FUSE_LSEEK operation has been attempted since mount, try once as soon
+ * as a pathconf request comes in.
+ */
+TEST_F(LseekPathconf, enosys_now)
+{
+	const char FULLPATH[] = "mountpoint/some_file.txt";
+	const char RELPATH[] = "some_file.txt";
+	const uint64_t ino = 42;
+	off_t fsize = 1 << 30;	/* 1 GiB */
+	int fd;
+
+	expect_lookup(RELPATH, ino, S_IFREG | 0644, fsize, 1);
+	expect_open(ino, 0, 1);
+	EXPECT_CALL(*m_mock, process(
+		ResultOf([=](auto in) {
+			return (in.header.opcode == FUSE_LSEEK);
+		}, Eq(true)),
+		_)
+	).WillOnce(Invoke(ReturnErrno(ENOSYS)));
+
+	fd = open(FULLPATH, O_RDONLY);
+
+	EXPECT_EQ(-1, fpathconf(fd, _PC_MIN_HOLE_SIZE));
+	EXPECT_EQ(EINVAL, errno);
+}
+
+/*
+ * If no FUSE_LSEEK operation has been attempted since mount, try one as soon
+ * as a pathconf request comes in.  This is the typical pattern of bsdtar.  It
+ * will only try SEEK_HOLE/SEEK_DATA if fpathconf says they're supported.
+ */
+TEST_F(LseekPathconf, seek_now)
+{
+	const char FULLPATH[] = "mountpoint/some_file.txt";
+	const char RELPATH[] = "some_file.txt";
+	const uint64_t ino = 42;
+	off_t fsize = 1 << 30;	/* 1 GiB */
+	off_t offset_initial = 1 << 27;
+	off_t offset_out = 1 << 29;
+	int fd;
+
+	expect_lookup(RELPATH, ino, S_IFREG | 0644, fsize, 1);
+	expect_open(ino, 0, 1);
+	EXPECT_CALL(*m_mock, process(
+		ResultOf([=](auto in) {
+			return (in.header.opcode == FUSE_LSEEK);
+		}, Eq(true)),
+		_)
+	).WillOnce(Invoke(ReturnImmediate([=](auto i __unused, auto& out) {
+		SET_OUT_HEADER_LEN(out, lseek);
+		out.body.lseek.offset = offset_out;
+	})));
+
+	fd = open(FULLPATH, O_RDONLY);
+	EXPECT_EQ(offset_initial, lseek(fd, offset_initial, SEEK_SET));
+	EXPECT_EQ(1, fpathconf(fd, _PC_MIN_HOLE_SIZE));
+	/* And check that the file pointer hasn't changed */
+	EXPECT_EQ(offset_initial, lseek(fd, 0, SEEK_CUR));
+}
+
+/*
+ * For servers using older protocol versions, no FUSE_LSEEK should be attempted
+ */
+TEST_F(LseekPathconf_7_23, already_enosys)
+{
+	const char FULLPATH[] = "mountpoint/some_file.txt";
+	const char RELPATH[] = "some_file.txt";
+	const uint64_t ino = 42;
+	off_t fsize = 1 << 30;	/* 1 GiB */
+	int fd;
+
+	expect_lookup(RELPATH, ino, S_IFREG | 0644, fsize, 1);
+	expect_open(ino, 0, 1);
+	EXPECT_CALL(*m_mock, process(
+		ResultOf([=](auto in) {
+			return (in.header.opcode == FUSE_LSEEK);
+		}, Eq(true)),
+		_)
+	).Times(0);
+
+	fd = open(FULLPATH, O_RDONLY);
+	EXPECT_EQ(-1, fpathconf(fd, _PC_MIN_HOLE_SIZE));
+	EXPECT_EQ(EINVAL, errno);
+}
+
+TEST_F(LseekSeekData, ok)
+{
+	const char FULLPATH[] = "mountpoint/some_file.txt";
+	const char RELPATH[] = "some_file.txt";
+	const uint64_t ino = 42;
+	off_t fsize = 1 << 30;	/* 1 GiB */
+	off_t offset_in = 1 << 28;
+	off_t offset_out = 1 << 29;
+	int fd;
+
+	expect_lookup(RELPATH, ino, S_IFREG | 0644, fsize, 1);
+	expect_open(ino, 0, 1);
+	EXPECT_CALL(*m_mock, process(
+		ResultOf([=](auto in) {
+			return (in.header.opcode == FUSE_LSEEK &&
+				in.header.nodeid == ino &&
+				in.body.lseek.fh == FH &&
+				(off_t)in.body.lseek.offset == offset_in &&
+				in.body.lseek.whence == SEEK_DATA);
+		}, Eq(true)),
+		_)
+	).WillOnce(Invoke(ReturnImmediate([=](auto i __unused, auto& out) {
+		SET_OUT_HEADER_LEN(out, lseek);
+		out.body.lseek.offset = offset_out;
+	})));
+	fd = open(FULLPATH, O_RDONLY);
+	EXPECT_EQ(offset_out, lseek(fd, offset_in, SEEK_DATA));
+	EXPECT_EQ(offset_out, lseek(fd, 0, SEEK_CUR));
+}
+
+/*
+ * If the server returns ENOSYS, fusefs should fall back to the default
+ * behavior, and never query the server again.
+ */
+TEST_F(LseekSeekData, enosys)
+{
+	const char FULLPATH[] = "mountpoint/some_file.txt";
+	const char RELPATH[] = "some_file.txt";
+	const uint64_t ino = 42;
+	off_t fsize = 1 << 30;	/* 1 GiB */
+	off_t offset_in = 1 << 28;
+	int fd;
+
+	expect_lookup(RELPATH, ino, S_IFREG | 0644, fsize, 1);
+	expect_open(ino, 0, 1);
+	EXPECT_CALL(*m_mock, process(
+		ResultOf([=](auto in) {
+			return (in.header.opcode == FUSE_LSEEK &&
+				in.header.nodeid == ino &&
+				in.body.lseek.fh == FH &&
+				(off_t)in.body.lseek.offset == offset_in &&
+				in.body.lseek.whence == SEEK_DATA);
+		}, Eq(true)),
+		_)
+	).WillOnce(Invoke(ReturnErrno(ENOSYS)));
+	fd = open(FULLPATH, O_RDONLY);
+
+	/*
+	 * Default behavior: ENXIO if offset is < 0 or >= fsize, offset
+	 * otherwise.
+	 */
+	EXPECT_EQ(offset_in, lseek(fd, offset_in, SEEK_DATA));
+	EXPECT_EQ(-1, lseek(fd, -1, SEEK_HOLE));
+	EXPECT_EQ(ENXIO, errno);
+	EXPECT_EQ(-1, lseek(fd, fsize, SEEK_HOLE));
+	EXPECT_EQ(ENXIO, errno);
+}
+
+TEST_F(LseekSeekHole, ok)
+{
+	const char FULLPATH[] = "mountpoint/some_file.txt";
+	const char RELPATH[] = "some_file.txt";
+	const uint64_t ino = 42;
+	off_t fsize = 1 << 30;	/* 1 GiB */
+	off_t offset_in = 1 << 28;
+	off_t offset_out = 1 << 29;
+	int fd;
+
+	expect_lookup(RELPATH, ino, S_IFREG | 0644, fsize, 1);
+	expect_open(ino, 0, 1);
+	EXPECT_CALL(*m_mock, process(
+		ResultOf([=](auto in) {
+			return (in.header.opcode == FUSE_LSEEK &&
+				in.header.nodeid == ino &&
+				in.body.lseek.fh == FH &&
+				(off_t)in.body.lseek.offset == offset_in &&
+				in.body.lseek.whence == SEEK_HOLE);
+		}, Eq(true)),
+		_)
+	).WillOnce(Invoke(ReturnImmediate([=](auto i __unused, auto& out) {
+		SET_OUT_HEADER_LEN(out, lseek);
+		out.body.lseek.offset = offset_out;
+	})));
+	fd = open(FULLPATH, O_RDONLY);
+	EXPECT_EQ(offset_out, lseek(fd, offset_in, SEEK_HOLE));
+	EXPECT_EQ(offset_out, lseek(fd, 0, SEEK_CUR));
+}
+
+/*
+ * If the server returns ENOSYS, fusefs should fall back to the default
+ * behavior, and never query the server again.
+ */
+TEST_F(LseekSeekHole, enosys)
+{
+	const char FULLPATH[] = "mountpoint/some_file.txt";
+	const char RELPATH[] = "some_file.txt";
+	const uint64_t ino = 42;
+	off_t fsize = 1 << 30;	/* 1 GiB */
+	off_t offset_in = 1 << 28;
+	int fd;
+
+	expect_lookup(RELPATH, ino, S_IFREG | 0644, fsize, 1);
+	expect_open(ino, 0, 1);
+	EXPECT_CALL(*m_mock, process(
+		ResultOf([=](auto in) {
+			return (in.header.opcode == FUSE_LSEEK &&
+				in.header.nodeid == ino &&
+				in.body.lseek.fh == FH &&
+				(off_t)in.body.lseek.offset == offset_in &&
+				in.body.lseek.whence == SEEK_HOLE);
+		}, Eq(true)),
+		_)
+	).WillOnce(Invoke(ReturnErrno(ENOSYS)));
+	fd = open(FULLPATH, O_RDONLY);
+
+	/*
+	 * Default behavior: ENXIO if offset is < 0 or >= fsize, fsize
+	 * otherwise.
+	 */
+	EXPECT_EQ(fsize, lseek(fd, offset_in, SEEK_HOLE));
+	EXPECT_EQ(-1, lseek(fd, -1, SEEK_HOLE));
+	EXPECT_EQ(ENXIO, errno);
+	EXPECT_EQ(-1, lseek(fd, fsize, SEEK_HOLE));
+	EXPECT_EQ(ENXIO, errno);
+}
+
+/* lseek should return ENXIO when offset points to EOF */
+TEST_F(LseekSeekHole, enxio)
+{
+	const char FULLPATH[] = "mountpoint/some_file.txt";
+	const char RELPATH[] = "some_file.txt";
+	const uint64_t ino = 42;
+	off_t fsize = 1 << 30;	/* 1 GiB */
+	off_t offset_in = fsize;
+	int fd;
+
+	expect_lookup(RELPATH, ino, S_IFREG | 0644, fsize, 1);
+	expect_open(ino, 0, 1);
+	EXPECT_CALL(*m_mock, process(
+		ResultOf([=](auto in) {
+			return (in.header.opcode == FUSE_LSEEK &&
+				in.header.nodeid == ino &&
+				in.body.lseek.fh == FH &&
+				(off_t)in.body.lseek.offset == offset_in &&
+				in.body.lseek.whence == SEEK_HOLE);
+		}, Eq(true)),
+		_)
+	).WillOnce(Invoke(ReturnErrno(ENXIO)));
+	fd = open(FULLPATH, O_RDONLY);
+	EXPECT_EQ(-1, lseek(fd, offset_in, SEEK_HOLE));
+	EXPECT_EQ(ENXIO, errno);
+}
diff --git a/tests/sys/fs/fusefs/mockfs.cc b/tests/sys/fs/fusefs/mockfs.cc
index f977c705331f..32d8fc7f6129 100644
--- a/tests/sys/fs/fusefs/mockfs.cc
+++ b/tests/sys/fs/fusefs/mockfs.cc
@@ -62,8 +62,7 @@ int verbosity = 0;
 
 const char* opcode2opname(uint32_t opcode)
 {
-	const int NUM_OPS = 39;
-	const char* table[NUM_OPS] = {
+	const char* table[] = {
 		"Unknown (opcode 0)",
 		"LOOKUP",
 		"FORGET",
@@ -102,9 +101,17 @@ const char* opcode2opname(uint32_t opcode)
 		"CREATE",
 		"INTERRUPT",
 		"BMAP",
-		"DESTROY"
+		"DESTROY",
+		"IOCTL",
+		"POLL",
+		"NOTIFY_REPLY",
+		"BATCH_FORGET",
+		"FALLOCATE",
+		"READDIRPLUS",
+		"RENAME2",
+		"LSEEK",
 	};
-	if (opcode >= NUM_OPS)
+	if (opcode >= nitems(table))
 		return ("Unknown (opcode > max)");
 	else
 		return (table[opcode]);
@@ -211,6 +218,22 @@ void MockFS::debug_request(const mockfs_buf_in &in, ssize_t buflen)
 		case FUSE_LOOKUP:
 			printf(" %s", in.body.lookup);
 			break;
+		case FUSE_LSEEK:
+			switch (in.body.lseek.whence) {
+			case SEEK_HOLE:
+				printf(" SEEK_HOLE offset=%ld",
+				    in.body.lseek.offset);
+				break;
+			case SEEK_DATA:
+				printf(" SEEK_DATA offset=%ld",
+				    in.body.lseek.offset);
+				break;
+			default:
+				printf(" whence=%u offset=%ld",
+				    in.body.lseek.whence, in.body.lseek.offset);
+				break;
+			}
+			break;
 		case FUSE_MKDIR:
 			name = (const char*)in.body.bytes +
 				sizeof(fuse_mkdir_in);
@@ -636,6 +659,10 @@ void MockFS::audit_request(const mockfs_buf_in &in, ssize_t buflen) {
 		EXPECT_EQ(inlen, fih + sizeof(in.body.bmap));
 		EXPECT_EQ((size_t)buflen, inlen);
 		break;
+	case FUSE_LSEEK:
+		EXPECT_EQ(inlen, fih + sizeof(in.body.lseek));
+		EXPECT_EQ((size_t)buflen, inlen);
+		break;
 	case FUSE_NOTIFY_REPLY:
 	case FUSE_BATCH_FORGET:
 	case FUSE_FALLOCATE:
diff --git a/tests/sys/fs/fusefs/mockfs.hh b/tests/sys/fs/fusefs/mockfs.hh
index 138c125649fd..6fb15089bc23 100644
--- a/tests/sys/fs/fusefs/mockfs.hh
+++ b/tests/sys/fs/fusefs/mockfs.hh
@@ -170,6 +170,7 @@ union fuse_payloads_in {
 	fuse_link_in	link;
 	fuse_listxattr_in listxattr;
 	char		lookup[0];
+	fuse_lseek_in	lseek;
 	fuse_mkdir_in	mkdir;
 	fuse_mknod_in	mknod;
 	fuse_open_in	open;
@@ -210,6 +211,7 @@ union fuse_payloads_out {
 	fuse_getxattr_out	getxattr;
 	fuse_init_out		init;
 	fuse_init_out_7_22	init_7_22;
+	fuse_lseek_out		lseek;
 	/* The inval_entry structure should be followed by the entry's name */
 	fuse_notify_inval_entry_out	inval_entry;
 	fuse_notify_inval_inode_out	inval_inode;


More information about the dev-commits-src-all mailing list