git: 49e8190c76c1 - stable/13 - fusefs: fix an interaction between copy_file_range and mmap

From: Alan Somers <asomers_at_FreeBSD.org>
Date: Fri, 02 Feb 2024 21:31:27 UTC
The branch stable/13 has been updated by asomers:

URL: https://cgit.FreeBSD.org/src/commit/?id=49e8190c76c1525c55ae94b33832cbb09563ae1e

commit 49e8190c76c1525c55ae94b33832cbb09563ae1e
Author:     Alan Somers <asomers@FreeBSD.org>
AuthorDate: 2023-12-31 14:31:16 +0000
Commit:     Alan Somers <asomers@FreeBSD.org>
CommitDate: 2024-02-02 21:31:19 +0000

    fusefs: fix an interaction between copy_file_range and mmap
    
    If a copy_file_range operation tries to read from a page that was
    previously written via mmap, that page must be flushed first.
    
    Reviewed by:    kib
    Differential Revision: https://reviews.freebsd.org/D43451
    
    (cherry picked from commit 1c909c300b92601f7690610097ac98126caff835)
---
 sys/fs/fuse/fuse_vnops.c               |  1 +
 tests/sys/fs/fusefs/copy_file_range.cc | 68 +++++++++++++++++++++++++
 tests/sys/fs/fusefs/io.cc              | 90 ++++++++++++++++++++++++++++++++--
 3 files changed, 154 insertions(+), 5 deletions(-)

diff --git a/sys/fs/fuse/fuse_vnops.c b/sys/fs/fuse/fuse_vnops.c
index 6de23cf71cf3..75b83c648d38 100644
--- a/sys/fs/fuse/fuse_vnops.c
+++ b/sys/fs/fuse/fuse_vnops.c
@@ -908,6 +908,7 @@ fuse_vnop_copy_file_range(struct vop_copy_file_range_args *ap)
 	if (err)
 		goto unlock;
 
+	vnode_pager_clean_sync(invp);
 	err = fuse_inval_buf_range(outvp, outfilesize, *ap->a_outoffp,
 		*ap->a_outoffp + io.uio_resid);
 	if (err)
diff --git a/tests/sys/fs/fusefs/copy_file_range.cc b/tests/sys/fs/fusefs/copy_file_range.cc
index 8640780c0b58..17b21b888736 100644
--- a/tests/sys/fs/fusefs/copy_file_range.cc
+++ b/tests/sys/fs/fusefs/copy_file_range.cc
@@ -27,6 +27,7 @@
 
 extern "C" {
 #include <sys/param.h>
+#include <sys/mman.h>
 #include <sys/time.h>
 #include <sys/resource.h>
 
@@ -320,6 +321,73 @@ TEST_F(CopyFileRange, fallback)
 	ASSERT_EQ(len, copy_file_range(fd1, &start1, fd2, &start2, len, 0));
 }
 
+/*
+ * Writes via mmap should not conflict with using copy_file_range.  Any dirty
+ * pages that overlap with copy_file_range's input should be flushed before
+ * FUSE_COPY_FILE_RANGE is sent.
+ */
+TEST_F(CopyFileRange, mmap_write)
+{
+	const char FULLPATH[] = "mountpoint/src.txt";
+	const char RELPATH[] = "src.txt";
+	uint8_t *wbuf, *fbuf;
+	void *p;
+	size_t fsize = 0x6000;
+	size_t wsize = 0x3000;
+	ssize_t r;
+	off_t offset2_in = 0;
+	off_t offset2_out = wsize;
+	size_t copysize = wsize;
+	const uint64_t ino = 42;
+	const uint64_t fh = 0xdeadbeef1a7ebabe;
+	int fd;
+	const mode_t mode = 0644;
+
+	fbuf = (uint8_t*)calloc(1, fsize);
+	wbuf = (uint8_t*)malloc(wsize);
+	memset(wbuf, 1, wsize);
+
+	expect_lookup(RELPATH, ino, S_IFREG | mode, fsize, 1);
+	expect_open(ino, 0, 1, fh);
+	/* This read is initiated by the mmap write */
+	expect_read(ino, 0, fsize, fsize, fbuf, -1, fh);
+	/* This write flushes the buffer filled by the mmap write */
+	expect_write(ino, 0, wsize, wsize, wbuf);
+
+	EXPECT_CALL(*m_mock, process(
+		ResultOf([=](auto in) {
+			return (in.header.opcode == FUSE_COPY_FILE_RANGE &&
+				(off_t)in.body.copy_file_range.off_in == offset2_in &&
+				(off_t)in.body.copy_file_range.off_out == offset2_out &&
+				in.body.copy_file_range.len == copysize
+			);
+		}, Eq(true)),
+		_)
+	).WillOnce(Invoke(ReturnImmediate([&](auto in __unused, auto& out) {
+		SET_OUT_HEADER_LEN(out, write);
+		out.body.write.size = copysize;
+	})));
+
+	fd = open(FULLPATH, O_RDWR);
+
+	/* First, write some data via mmap */
+	p = mmap(NULL, wsize, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+	ASSERT_NE(MAP_FAILED, p) << strerror(errno);
+	memmove((uint8_t*)p, wbuf, wsize);
+	ASSERT_EQ(0, munmap(p, wsize)) << strerror(errno);
+
+	/*
+	 * Then copy it around the file via copy_file_range.  This should
+	 * trigger a FUSE_WRITE to flush the pages written by mmap.
+	 */
+	r = copy_file_range(fd, &offset2_in, fd, &offset2_out, copysize, 0);
+	ASSERT_EQ(copysize, (size_t)r) << strerror(errno);
+
+	free(wbuf);
+	free(fbuf);
+}
+
+
 /*
  * copy_file_range should send SIGXFSZ and return EFBIG when the operation
  * would exceed the limit imposed by RLIMIT_FSIZE.
diff --git a/tests/sys/fs/fusefs/io.cc b/tests/sys/fs/fusefs/io.cc
index 3c14621f0b66..fc648f44d755 100644
--- a/tests/sys/fs/fusefs/io.cc
+++ b/tests/sys/fs/fusefs/io.cc
@@ -73,7 +73,7 @@ static void compare(const void *tbuf, const void *controlbuf, off_t baseofs,
 	}
 }
 
-typedef tuple<bool, uint32_t, cache_mode> IoParam;
+typedef tuple<bool, uint32_t, cache_mode, uint32_t> IoParam;
 
 class Io: public FuseTest, public WithParamInterface<IoParam> {
 public:
@@ -112,6 +112,7 @@ void SetUp()
 		default:
 			FAIL() << "Unknown cache mode";
 	}
+	m_kernel_minor_version = get<3>(GetParam());
 	m_noatime = true;	// To prevent SETATTR for atime on close
 
 	FuseTest::SetUp();
@@ -120,9 +121,10 @@ void SetUp()
 
 	if (verbosity > 0) {
 		printf("Test Parameters: init_flags=%#x maxwrite=%#x "
-		    "%sasync cache=%s\n",
+		    "%sasync cache=%s kernel_minor_version=%d\n",
 		    m_init_flags, m_maxwrite, m_async? "" : "no",
-		    cache_mode_to_s(get<2>(GetParam())));
+		    cache_mode_to_s(get<2>(GetParam())),
+		    m_kernel_minor_version);
 	}
 
 	expect_lookup(RELPATH, ino, S_IFREG | 0644, 0, 1);
@@ -195,6 +197,30 @@ void SetUp()
 		}, Eq(true)),
 		_)
 	).WillRepeatedly(Invoke(ReturnErrno(0)));
+	EXPECT_CALL(*m_mock, process(
+		ResultOf([=](auto in) {
+			return (in.header.opcode == FUSE_COPY_FILE_RANGE &&
+				in.header.nodeid == ino &&
+				in.body.copy_file_range.nodeid_out == ino &&
+				in.body.copy_file_range.flags == 0);
+		}, Eq(true)),
+		_)
+	).WillRepeatedly(Invoke(ReturnImmediate([=](auto in, auto& out) {
+		off_t off_in = in.body.copy_file_range.off_in;
+		off_t off_out = in.body.copy_file_range.off_out;
+		ASSERT_EQ((ssize_t)in.body.copy_file_range.len,
+		    copy_file_range(m_backing_fd, &off_in, m_backing_fd,
+			    &off_out, in.body.copy_file_range.len, 0));
+		SET_OUT_HEADER_LEN(out, write);
+		out.body.write.size = in.body.copy_file_range.len;
+	})));
+	/* Claim that we don't support FUSE_LSEEK */
+	EXPECT_CALL(*m_mock, process(
+		ResultOf([=](auto in) {
+			return (in.header.opcode == FUSE_LSEEK);
+		}, Eq(true)),
+		_)
+	).WillRepeatedly(Invoke(ReturnErrno(ENOSYS)));
 
 	m_test_fd = open(FULLPATH, O_RDWR );
 	EXPECT_LE(0, m_test_fd) << strerror(errno);
@@ -223,6 +249,31 @@ void do_closeopen()
 	ASSERT_LE(0, m_control_fd) << strerror(errno);
 }
 
+void do_copy_file_range(off_t off_in, off_t off_out, size_t size)
+{
+	ssize_t r;
+	off_t test_off_in = off_in;
+	off_t test_off_out = off_out;
+	off_t test_size = size;
+	off_t control_off_in = off_in;
+	off_t control_off_out = off_out;
+	off_t control_size = size;
+
+	while (test_size > 0) {
+		r = copy_file_range(m_test_fd, &test_off_in, m_test_fd,
+				&test_off_out, test_size, 0);
+		ASSERT_GT(r, 0) << strerror(errno);
+		test_size -= r;
+	}
+	while (control_size > 0) {
+		r = copy_file_range(m_control_fd, &control_off_in, m_control_fd,
+				&control_off_out, control_size, 0);
+		ASSERT_GT(r, 0) << strerror(errno);
+		control_size -= r;
+	}
+	m_filesize = std::max(m_filesize, off_out + (off_t)size);
+}
+
 void do_ftruncate(off_t offs)
 {
 	ASSERT_EQ(0, ftruncate(m_test_fd, offs)) << strerror(errno);
@@ -345,6 +396,13 @@ virtual void SetUp() {
 }
 };
 
+class IoCopyFileRange: public Io {
+public:
+virtual void SetUp() {
+	Io::SetUp();
+}
+};
+
 /*
  * Extend a file with dirty data in the last page of the last block.
  *
@@ -517,16 +575,38 @@ TEST_P(IoCacheable, vnode_pager_generic_putpage_clean_block_at_eof)
 	do_mapwrite(0x1bbc3, 0x3b4e0);
 }
 
+/*
+ * A copy_file_range that follows an mmap write to the input area needs to
+ * flush the mmap buffer first.
+ */
+TEST_P(IoCopyFileRange, copy_file_range_from_mapped_write)
+{
+	do_mapwrite(0x1000, 0);
+	do_copy_file_range(0, 0x1000, 0x1000);
+	do_read(0x1000, 0x1000);
+}
+
+
 INSTANTIATE_TEST_CASE_P(Io, Io,
 	Combine(Bool(),					/* async read */
 		Values(0x1000, 0x10000, 0x20000),	/* m_maxwrite */
-		Values(Uncached, Writethrough, Writeback, WritebackAsync)
+		Values(Uncached, Writethrough, Writeback, WritebackAsync),
+		Values(28)				/* kernel_minor_vers */
 	)
 );
 
 INSTANTIATE_TEST_CASE_P(Io, IoCacheable,
 	Combine(Bool(),					/* async read */
 		Values(0x1000, 0x10000, 0x20000),	/* m_maxwrite */
-		Values(Writethrough, Writeback, WritebackAsync)
+		Values(Writethrough, Writeback, WritebackAsync),
+		Values(28)				/* kernel_minor_vers */
+	)
+);
+
+INSTANTIATE_TEST_CASE_P(Io, IoCopyFileRange,
+	Combine(Bool(),					/* async read */
+		Values(0x10000),			/* m_maxwrite */
+		Values(Writethrough, Writeback, WritebackAsync),
+		Values(27, 28)				/* kernel_minor_vers */
 	)
 );