svn commit: r346162 - in projects/fuse2: sys/fs/fuse sys/kern sys/sys tests/sys/fs/fusefs

Alan Somers asomers at FreeBSD.org
Fri Apr 12 19:05:08 UTC 2019


Author: asomers
Date: Fri Apr 12 19:05:06 2019
New Revision: 346162
URL: https://svnweb.freebsd.org/changeset/base/346162

Log:
  fusefs: evict invalidated cache contents during write-through
  
  fusefs's default cache mode is "writethrough", although it currently works
  more like "write-around"; writes bypass the cache completely.  Since writes
  bypass the cache, they were leaving stale previously-read data in the cache.
  This commit invalidates that stale data.  It also adds a new global
  v_inval_buf_range method, like vtruncbuf but for a range of a file.
  
  PR:		235774
  Reported by:	cem
  Sponsored by:	The FreeBSD Foundation

Modified:
  projects/fuse2/sys/fs/fuse/fuse_io.c
  projects/fuse2/sys/kern/vfs_subr.c
  projects/fuse2/sys/sys/vnode.h
  projects/fuse2/tests/sys/fs/fusefs/write.cc

Modified: projects/fuse2/sys/fs/fuse/fuse_io.c
==============================================================================
--- projects/fuse2/sys/fs/fuse/fuse_io.c	Fri Apr 12 18:54:09 2019	(r346161)
+++ projects/fuse2/sys/fs/fuse/fuse_io.c	Fri Apr 12 19:05:06 2019	(r346162)
@@ -171,8 +171,13 @@ fuse_io_dispatch(struct vnode *vp, struct uio *uio, in
 		 * cached.
 		 */
 		if (directio || fuse_data_cache_mode == FUSE_CACHE_WT) {
+			off_t start, end;
+
 			SDT_PROBE2(fuse, , io, trace, 1,
 				"direct write of vnode");
+			start = uio->uio_offset;
+			end = start + uio->uio_resid;
+			v_inval_buf_range(vp, start, end, fuse_iosize(vp));
 			err = fuse_write_directbackend(vp, uio, cred, fufh,
 				ioflag);
 		} else {

Modified: projects/fuse2/sys/kern/vfs_subr.c
==============================================================================
--- projects/fuse2/sys/kern/vfs_subr.c	Fri Apr 12 18:54:09 2019	(r346161)
+++ projects/fuse2/sys/kern/vfs_subr.c	Fri Apr 12 19:05:06 2019	(r346162)
@@ -116,6 +116,8 @@ static void	vfs_knl_assert_locked(void *arg);
 static void	vfs_knl_assert_unlocked(void *arg);
 static void	vnlru_return_batches(struct vfsops *mnt_op);
 static void	destroy_vpollinfo(struct vpollinfo *vi);
+static int	v_inval_buf_range1(struct vnode *vp, struct bufobj *bo,
+		    daddr_t startlbn, daddr_t endlbn);
 
 /*
  * These fences are intended for cases where some synchronization is
@@ -1865,9 +1867,8 @@ int
 vtruncbuf(struct vnode *vp, struct ucred *cred, off_t length, int blksize)
 {
 	struct buf *bp, *nbp;
-	int anyfreed;
-	daddr_t trunclbn;
 	struct bufobj *bo;
+	daddr_t startlbn;
 
 	CTR5(KTR_VFS, "%s: vp %p with cred %p and block %d:%ju", __func__,
 	    vp, cred, blksize, (uintmax_t)length);
@@ -1875,22 +1876,111 @@ vtruncbuf(struct vnode *vp, struct ucred *cred, off_t 
 	/*
 	 * Round up to the *next* lbn.
 	 */
-	trunclbn = howmany(length, blksize);
+	startlbn = howmany(length, blksize);
 
 	ASSERT_VOP_LOCKED(vp, "vtruncbuf");
+
 restart:
 	bo = &vp->v_bufobj;
 	BO_LOCK(bo);
+	if (v_inval_buf_range1(vp, bo, length, INT64_MAX) == EAGAIN)
+		goto restart;
+
+	if (length > 0) {
+restartsync:
+		TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
+			if (bp->b_lblkno > 0)
+				continue;
+			/*
+			 * Since we hold the vnode lock this should only
+			 * fail if we're racing with the buf daemon.
+			 */
+			if (BUF_LOCK(bp,
+			    LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
+			    BO_LOCKPTR(bo)) == ENOLCK) {
+				goto restart;
+			}
+			VNASSERT((bp->b_flags & B_DELWRI), vp,
+			    ("buf(%p) on dirty queue without DELWRI", bp));
+
+			bremfree(bp);
+			bawrite(bp);
+			BO_LOCK(bo);
+			goto restartsync;
+		}
+	}
+
+	bufobj_wwait(bo, 0, 0);
+	BO_UNLOCK(bo);
+	vnode_pager_setsize(vp, length);
+
+	return (0);
+}
+
+/*
+ * Invalidate the cached pages of a file's buffer within the range of block
+ * numbers [startlbn, endlbn).  Every buffer that overlaps that range will be
+ * invalidated.  This must not result in any dirty data being lost.
+ */
+void
+v_inval_buf_range(struct vnode *vp, off_t start, off_t end, int blksize)
+{
+	struct bufobj *bo;
+	daddr_t startlbn, endlbn;
+
+	/* Round "outwards" */
+	startlbn = start / blksize;
+	endlbn = howmany(end, blksize);
+
+	ASSERT_VOP_LOCKED(vp, "v_inval_buf_range");
+
+restart:
+	bo = &vp->v_bufobj;
+	BO_LOCK(bo);
+
+#ifdef INVARIANTS
+	struct buf *bp, *nbp;
+
+	TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
+		/* 
+		 * Disallow invalidating dirty data outside of the requested
+		 * offsets.  Assume that data within the requested offsets is
+		 * being invalidated for a good reason.
+		 */
+		off_t blkstart, blkend;
+
+		blkstart = bp->b_offset;
+		blkend = bp->b_offset + bp->b_bcount;
+		KASSERT(blkstart >= start && blkend <= end,
+			("Invalidating extra dirty data!"));
+	}
+#endif
+
+	if (v_inval_buf_range1(vp, bo, startlbn, endlbn) == EAGAIN)
+		goto restart;
+
+	BO_UNLOCK(bo);
+	vn_pages_remove(vp, OFF_TO_IDX(start), OFF_TO_IDX(end));
+}
+
+/* Like v_inval_buf_range, but operates on whole buffers instead of offsets */
+static int
+v_inval_buf_range1(struct vnode *vp, struct bufobj *bo,
+    daddr_t startlbn, daddr_t endlbn)
+{
+	struct buf *bp, *nbp;
+	int anyfreed;
+
 	anyfreed = 1;
 	for (;anyfreed;) {
 		anyfreed = 0;
 		TAILQ_FOREACH_SAFE(bp, &bo->bo_clean.bv_hd, b_bobufs, nbp) {
-			if (bp->b_lblkno < trunclbn)
+			if (bp->b_lblkno < startlbn || bp->b_lblkno >= endlbn)
 				continue;
 			if (BUF_LOCK(bp,
 			    LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
 			    BO_LOCKPTR(bo)) == ENOLCK)
-				goto restart;
+				return EAGAIN;
 
 			bremfree(bp);
 			bp->b_flags |= (B_INVAL | B_RELBUF);
@@ -1904,17 +1994,17 @@ restart:
 			    (nbp->b_vp != vp) ||
 			    (nbp->b_flags & B_DELWRI))) {
 				BO_UNLOCK(bo);
-				goto restart;
+				return EAGAIN;
 			}
 		}
 
 		TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
-			if (bp->b_lblkno < trunclbn)
+			if (bp->b_lblkno < startlbn || bp->b_lblkno >= endlbn)
 				continue;
 			if (BUF_LOCK(bp,
 			    LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
 			    BO_LOCKPTR(bo)) == ENOLCK)
-				goto restart;
+				return EAGAIN;
 			bremfree(bp);
 			bp->b_flags |= (B_INVAL | B_RELBUF);
 			bp->b_flags &= ~B_ASYNC;
@@ -1927,40 +2017,11 @@ restart:
 			    (nbp->b_vp != vp) ||
 			    (nbp->b_flags & B_DELWRI) == 0)) {
 				BO_UNLOCK(bo);
-				goto restart;
+				return EAGAIN;
 			}
 		}
 	}
-
-	if (length > 0) {
-restartsync:
-		TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
-			if (bp->b_lblkno > 0)
-				continue;
-			/*
-			 * Since we hold the vnode lock this should only
-			 * fail if we're racing with the buf daemon.
-			 */
-			if (BUF_LOCK(bp,
-			    LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
-			    BO_LOCKPTR(bo)) == ENOLCK) {
-				goto restart;
-			}
-			VNASSERT((bp->b_flags & B_DELWRI), vp,
-			    ("buf(%p) on dirty queue without DELWRI", bp));
-
-			bremfree(bp);
-			bawrite(bp);
-			BO_LOCK(bo);
-			goto restartsync;
-		}
-	}
-
-	bufobj_wwait(bo, 0, 0);
-	BO_UNLOCK(bo);
-	vnode_pager_setsize(vp, length);
-
-	return (0);
+	return 0;
 }
 
 static void

Modified: projects/fuse2/sys/sys/vnode.h
==============================================================================
--- projects/fuse2/sys/sys/vnode.h	Fri Apr 12 18:54:09 2019	(r346161)
+++ projects/fuse2/sys/sys/vnode.h	Fri Apr 12 19:05:06 2019	(r346162)
@@ -660,6 +660,8 @@ void	vinactive(struct vnode *, struct thread *);
 int	vinvalbuf(struct vnode *vp, int save, int slpflag, int slptimeo);
 int	vtruncbuf(struct vnode *vp, struct ucred *cred, off_t length,
 	    int blksize);
+void	v_inval_buf_range(struct vnode *vp, off_t start, off_t end,
+	    int blksize);
 void	vunref(struct vnode *);
 void	vn_printf(struct vnode *vp, const char *fmt, ...) __printflike(2,3);
 int	vrecycle(struct vnode *vp);

Modified: projects/fuse2/tests/sys/fs/fusefs/write.cc
==============================================================================
--- projects/fuse2/tests/sys/fs/fusefs/write.cc	Fri Apr 12 18:54:09 2019	(r346161)
+++ projects/fuse2/tests/sys/fs/fusefs/write.cc	Fri Apr 12 19:05:06 2019	(r346162)
@@ -228,8 +228,7 @@ TEST_F(Write, append_direct_io)
 }
 
 /* A direct write should evict any overlapping cached data */
-/* https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=235774 */
-TEST_F(Write, DISABLED_direct_io_evicts_cache)
+TEST_F(Write, direct_io_evicts_cache)
 {
 	const char FULLPATH[] = "mountpoint/some_file.txt";
 	const char RELPATH[] = "some_file.txt";
@@ -407,6 +406,42 @@ TEST_F(Write, DISABLED_mmap)
 
 	free(expected);
 	free(zeros);
+}
+
+/* In WriteThrough mode, a write should evict overlapping cached data */
+TEST_F(WriteThrough, evicts_read_cache)
+{
+	const char FULLPATH[] = "mountpoint/some_file.txt";
+	const char RELPATH[] = "some_file.txt";
+	const char CONTENTS0[] = "abcdefgh";
+	const char CONTENTS1[] = "ijklmnop";
+	uint64_t ino = 42;
+	int fd;
+	ssize_t bufsize = strlen(CONTENTS0) + 1;
+	char readbuf[bufsize];
+
+	expect_lookup(RELPATH, ino, bufsize);
+	expect_open(ino, 0, 1);
+	expect_read(ino, 0, bufsize, bufsize, CONTENTS0);
+	expect_write(ino, 0, bufsize, bufsize, 0, CONTENTS1);
+
+	fd = open(FULLPATH, O_RDWR);
+	EXPECT_LE(0, fd) << strerror(errno);
+
+	// Prime cache
+	ASSERT_EQ(bufsize, read(fd, readbuf, bufsize)) << strerror(errno);
+
+	// Write directly, evicting cache
+	ASSERT_EQ(0, lseek(fd, 0, SEEK_SET)) << strerror(errno);
+	ASSERT_EQ(bufsize, write(fd, CONTENTS1, bufsize)) << strerror(errno);
+
+	// Read again.  Cache should be bypassed
+	expect_read(ino, 0, bufsize, bufsize, CONTENTS1);
+	ASSERT_EQ(0, lseek(fd, 0, SEEK_SET)) << strerror(errno);
+	ASSERT_EQ(bufsize, read(fd, readbuf, bufsize)) << strerror(errno);
+	ASSERT_STREQ(readbuf, CONTENTS1);
+
+	/* Deliberately leak fd.  close(2) will be tested in release.cc */
 }
 
 TEST_F(WriteThrough, pwrite)


More information about the svn-src-projects mailing list