svn commit: r350388 - in projects/fuse2: sys/fs/fuse tests/sys/fs/fusefs

Alan Somers asomers at FreeBSD.org
Sun Jul 28 15:17:34 UTC 2019


Author: asomers
Date: Sun Jul 28 15:17:32 2019
New Revision: 350388
URL: https://svnweb.freebsd.org/changeset/base/350388

Log:
  fusefs: fix panic when writing with O_DIRECT and using writeback cache
  
  When a fusefs file system is mounted using the writeback cache, the cache
  may still be bypassed by opening a file with O_DIRECT.  When writing with
  O_DIRECT, the cache must be invalidated for the affected portion of the
  file.  Fix some panics caused by inadvertently invalidating too much.
  
  Sponsored by:	The FreeBSD Foundation

Modified:
  projects/fuse2/sys/fs/fuse/fuse_io.c
  projects/fuse2/tests/sys/fs/fusefs/write.cc

Modified: projects/fuse2/sys/fs/fuse/fuse_io.c
==============================================================================
--- projects/fuse2/sys/fs/fuse/fuse_io.c	Sun Jul 28 04:02:22 2019	(r350387)
+++ projects/fuse2/sys/fs/fuse/fuse_io.c	Sun Jul 28 15:17:32 2019	(r350388)
@@ -119,9 +119,11 @@ SDT_PROVIDER_DECLARE(fusefs);
  */
 SDT_PROBE_DEFINE2(fusefs, , io, trace, "int", "char*");
 
+static int
+fuse_inval_buf_range(struct vnode *vp, off_t filesize, off_t start, off_t end);
 static void
 fuse_io_clear_suid_on_write(struct vnode *vp, struct ucred *cred,
-	struct thread *td);
+    struct thread *td);
 static int 
 fuse_read_directbackend(struct vnode *vp, struct uio *uio,
     struct ucred *cred, struct fuse_filehandle *fufh);
@@ -136,6 +138,58 @@ static int 
 fuse_write_biobackend(struct vnode *vp, struct uio *uio,
     struct ucred *cred, struct fuse_filehandle *fufh, int ioflag, pid_t pid);
 
+/* Invalidate a range of cached data, whether dirty of not */
+static int
+fuse_inval_buf_range(struct vnode *vp, off_t filesize, off_t start, off_t end)
+{
+	struct buf *bp;
+	daddr_t left_lbn, end_lbn, right_lbn;
+	off_t new_filesize;
+	int iosize, left_on, right_on, right_blksize;
+
+	iosize = fuse_iosize(vp);
+	left_lbn = start / iosize;
+	end_lbn = howmany(end, iosize);
+	left_on = start & (iosize - 1);
+	if (left_on != 0) {
+		bp = getblk(vp, left_lbn, iosize, PCATCH, 0, 0);
+		if ((bp->b_flags & B_CACHE) != 0 && bp->b_dirtyend >= left_on) {
+			/* 
+			 * Flush the dirty buffer, because we don't have a
+			 * byte-granular way to record which parts of the
+			 * buffer are valid.
+			 */
+			bwrite(bp);
+			if (bp->b_error)
+				return (bp->b_error);
+		} else {
+			brelse(bp);
+		}
+	}
+	right_on = end & (iosize - 1);
+	if (right_on != 0) {
+		right_lbn = end / iosize;
+		new_filesize = MAX(filesize, end);
+		right_blksize = MIN(iosize, new_filesize - iosize * right_lbn);
+		bp = getblk(vp, right_lbn, right_blksize, PCATCH, 0, 0);
+		if ((bp->b_flags & B_CACHE) != 0 && bp->b_dirtyoff < right_on) {
+			/* 
+			 * Flush the dirty buffer, because we don't have a
+			 * byte-granular way to record which parts of the
+			 * buffer are valid.
+			 */
+			bwrite(bp);
+			if (bp->b_error)
+				return (bp->b_error);
+		} else {
+			brelse(bp);
+		}
+	}
+
+	v_inval_buf_range(vp, left_lbn, end_lbn, iosize);
+	return (0);
+}
+
 /*
  * FreeBSD clears the SUID and SGID bits on any write by a non-root user.
  */
@@ -236,7 +290,6 @@ fuse_io_dispatch(struct vnode *vp, struct uio *uio, in
 	case UIO_WRITE:
 		fuse_vnode_update(vp, FN_MTIMECHANGE | FN_CTIMECHANGE);
 		if (directio) {
-			const int iosize = fuse_iosize(vp);
 			off_t start, end, filesize;
 
 			SDT_PROBE2(fusefs, , io, trace, 1,
@@ -252,7 +305,9 @@ fuse_io_dispatch(struct vnode *vp, struct uio *uio, in
 				(IO_VMIO | IO_DIRECT),
 			    ("IO_DIRECT used for a cache flush?"));
 			/* Invalidate the write cache when writing directly */
-			v_inval_buf_range(vp, start, end, iosize);
+			err = fuse_inval_buf_range(vp, filesize, start, end);
+			if (err)
+				return (err);
 			err = fuse_write_directbackend(vp, uio, cred, fufh,
 				filesize, ioflag, false);
 		} else {

Modified: projects/fuse2/tests/sys/fs/fusefs/write.cc
==============================================================================
--- projects/fuse2/tests/sys/fs/fusefs/write.cc	Sun Jul 28 04:02:22 2019	(r350387)
+++ projects/fuse2/tests/sys/fs/fusefs/write.cc	Sun Jul 28 15:17:32 2019	(r350388)
@@ -951,6 +951,132 @@ TEST_F(WriteBackAsync, delay)
 }
 
 /*
+ * A direct write should not evict dirty cached data from outside of its own
+ * byte range.
+ */
+TEST_F(WriteBackAsync, direct_io_ignores_unrelated_cached)
+{
+	const char FULLPATH[] = "mountpoint/some_file.txt";
+	const char RELPATH[] = "some_file.txt";
+	const char CONTENTS0[] = "abcdefgh";
+	const char CONTENTS1[] = "ijklmnop";
+	uint64_t ino = 42;
+	int fd;
+	ssize_t bufsize = strlen(CONTENTS0) + 1;
+	ssize_t fsize = 2 * m_maxbcachebuf;
+	char readbuf[bufsize];
+	void *zeros;
+
+	zeros = calloc(1, m_maxbcachebuf);
+	ASSERT_NE(nullptr, zeros);
+
+	expect_lookup(RELPATH, ino, fsize);
+	expect_open(ino, 0, 1);
+	expect_read(ino, 0, m_maxbcachebuf, m_maxbcachebuf, zeros);
+	FuseTest::expect_write(ino, m_maxbcachebuf, bufsize, bufsize, 0, 0,
+		CONTENTS1);
+
+	fd = open(FULLPATH, O_RDWR);
+	EXPECT_LE(0, fd) << strerror(errno);
+
+	// Cache first block with dirty data.  This will entail first reading
+	// the existing data.
+	ASSERT_EQ(bufsize, pwrite(fd, CONTENTS0, bufsize, 0))
+		<< strerror(errno);
+
+	// Write directly to second block
+	ASSERT_EQ(0, fcntl(fd, F_SETFL, O_DIRECT)) << strerror(errno);
+	ASSERT_EQ(bufsize, pwrite(fd, CONTENTS1, bufsize, m_maxbcachebuf))
+		<< strerror(errno);
+
+	// Read from the first block again.  Should be serviced by cache.
+	ASSERT_EQ(0, fcntl(fd, F_SETFL, 0)) << strerror(errno);
+	ASSERT_EQ(bufsize, pread(fd, readbuf, bufsize, 0)) << strerror(errno);
+	ASSERT_STREQ(readbuf, CONTENTS0);
+
+	leak(fd);
+	free(zeros);
+}
+
+/*
+ * If a direct io write partially overlaps one or two blocks of dirty cached
+ * data, No dirty data should be lost.  Admittedly this is a weird test,
+ * because it would be unusual to use O_DIRECT and the writeback cache.
+ */
+TEST_F(WriteBackAsync, direct_io_partially_overlaps_cached_block)
+{
+	const char FULLPATH[] = "mountpoint/some_file.txt";
+	const char RELPATH[] = "some_file.txt";
+	uint64_t ino = 42;
+	int fd;
+	off_t bs = m_maxbcachebuf;
+	ssize_t fsize = 3 * bs;
+	void *readbuf, *zeros, *ones, *zeroones, *onezeros;
+
+	readbuf = malloc(bs);
+	ASSERT_NE(nullptr, readbuf) << strerror(errno);
+	zeros = calloc(1, 3 * bs);
+	ASSERT_NE(nullptr, zeros);
+	ones = calloc(1, 2 * bs);
+	ASSERT_NE(nullptr, ones);
+	memset(ones, 1, 2 * bs);
+	zeroones = calloc(1, bs);
+	ASSERT_NE(nullptr, zeroones);
+	memset((uint8_t*)zeroones + bs / 2, 1, bs / 2);
+	onezeros = calloc(1, bs);
+	ASSERT_NE(nullptr, onezeros);
+	memset(onezeros, 1, bs / 2);
+
+	expect_lookup(RELPATH, ino, fsize);
+	expect_open(ino, 0, 1);
+
+	fd = open(FULLPATH, O_RDWR);
+	EXPECT_LE(0, fd) << strerror(errno);
+
+	/* Cache first and third blocks with dirty data.  */
+	ASSERT_EQ(3 * bs, pwrite(fd, zeros, 3 * bs, 0)) << strerror(errno);
+
+	/*
+	 * Write directly to all three blocks.  The partially written blocks
+	 * will be flushed because they're dirty.
+	 */
+	FuseTest::expect_write(ino, 0, bs, bs, 0, 0, zeros);
+	FuseTest::expect_write(ino, 2 * bs, bs, bs, 0, 0, zeros);
+	/* The direct write is split in two because of the m_maxwrite value */
+	FuseTest::expect_write(ino,     bs / 2, bs, bs, 0, 0, ones);
+	FuseTest::expect_write(ino, 3 * bs / 2, bs, bs, 0, 0, ones);
+	ASSERT_EQ(0, fcntl(fd, F_SETFL, O_DIRECT)) << strerror(errno);
+	ASSERT_EQ(2 * bs, pwrite(fd, ones, 2 * bs, bs / 2)) << strerror(errno);
+
+	/*
+	 * Read from both the valid and invalid portions of the first and third
+	 * blocks again.  This will entail FUSE_READ operations because these
+	 * blocks were invalidated by the direct write.
+	 */
+	expect_read(ino, 0, bs, bs, zeroones);
+	expect_read(ino, 2 * bs, bs, bs, onezeros);
+	ASSERT_EQ(0, fcntl(fd, F_SETFL, 0)) << strerror(errno);
+	ASSERT_EQ(bs / 2, pread(fd, readbuf, bs / 2, 0)) << strerror(errno);
+	EXPECT_EQ(0, memcmp(zeros, readbuf, bs / 2));
+	ASSERT_EQ(bs / 2, pread(fd, readbuf, bs / 2, 5 * bs / 2))
+		<< strerror(errno);
+	EXPECT_EQ(0, memcmp(zeros, readbuf, bs / 2));
+	ASSERT_EQ(bs / 2, pread(fd, readbuf, bs / 2, bs / 2))
+		<< strerror(errno);
+	EXPECT_EQ(0, memcmp(ones, readbuf, bs / 2));
+	ASSERT_EQ(bs / 2, pread(fd, readbuf, bs / 2, 2 * bs))
+		<< strerror(errno);
+	EXPECT_EQ(0, memcmp(ones, readbuf, bs / 2));
+
+	leak(fd);
+	free(zeroones);
+	free(onezeros);
+	free(ones);
+	free(zeros);
+	free(readbuf);
+}
+
+/*
  * In WriteBack mode, writes may be cached beyond what the server thinks is the
  * EOF.  In this case, a short read at EOF should _not_ cause fusefs to update
  * the file's size.


More information about the svn-src-projects mailing list