git: ebea872f8915 - stable/13 - nfscl: Always invalidate buffers for append writes

From: Rick Macklem <rmacklem_at_FreeBSD.org>
Date: Thu, 09 Jun 2022 04:22:05 UTC
The branch stable/13 has been updated by rmacklem:

URL: https://cgit.FreeBSD.org/src/commit/?id=ebea872f891567ea8610cb8e00acc443b59dbe0d

commit ebea872f891567ea8610cb8e00acc443b59dbe0d
Author:     Rick Macklem <rmacklem@FreeBSD.org>
AuthorDate: 2022-01-06 22:18:36 +0000
Commit:     Rick Macklem <rmacklem@FreeBSD.org>
CommitDate: 2022-06-09 04:20:42 +0000

    nfscl: Always invalidate buffers for append writes
    
    kib@ reported a problem which was resolved by
    reverting commit 867c27c23a5c, which changed the NFS
    client to use direct RPCs to the server for
    IO_APPEND writes.  He also spotted that the
    code only invalidated buffer cache buffers
    when they were marked NMODIFIED (had been
    written into).
    
    This patch modifies the NFS VOP_WRITE() to
    always invalidate the buffer cache buffers
    and pages for the file when IO_APPEND is
    specified.  It also includes some cleanup
    suggested by kib@.
    
    (cherry picked from commit e4df1036f66dc360606fea053ec04ccabb69df14)
---
 sys/fs/nfsclient/nfs_clbio.c | 67 +++++++++++++++++++++++++++-----------------
 1 file changed, 41 insertions(+), 26 deletions(-)

diff --git a/sys/fs/nfsclient/nfs_clbio.c b/sys/fs/nfsclient/nfs_clbio.c
index 908f59855730..081aff6c0e7c 100644
--- a/sys/fs/nfsclient/nfs_clbio.c
+++ b/sys/fs/nfsclient/nfs_clbio.c
@@ -947,27 +947,33 @@ ncl_write(struct vop_write_args *ap)
 	 * Synchronously flush pending buffers if we are in synchronous
 	 * mode or if we are appending.
 	 */
-	if (ioflag & (IO_APPEND | IO_SYNC)) {
-		NFSLOCKNODE(np);
-		if (np->n_flag & NMODIFIED) {
-			NFSUNLOCKNODE(np);
-#ifdef notyet /* Needs matching nonblock semantics elsewhere, too. */
-			/*
-			 * Require non-blocking, synchronous writes to
-			 * dirty files to inform the program it needs
-			 * to fsync(2) explicitly.
-			 */
-			if (ioflag & IO_NDELAY)
-				return (EAGAIN);
-#endif
-			np->n_attrstamp = 0;
-			KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
-			error = ncl_vinvalbuf(vp, V_SAVE | ((ioflag &
-			    IO_VMIO) != 0 ? V_VMIO : 0), td, 1);
-			if (error != 0)
-				return (error);
-		} else
-			NFSUNLOCKNODE(np);
+	if ((ioflag & IO_APPEND) || ((ioflag & IO_SYNC) && (np->n_flag &
+	    NMODIFIED))) {
+		/*
+		 * For the case where IO_APPEND is being done using a
+		 * direct output (to the NFS server) RPC and
+		 * newnfs_directio_enable is 0, all buffer cache buffers,
+		 * including ones not modified, must be invalidated.
+		 * This ensures that stale data is not read out of the
+		 * buffer cache.  The call also invalidates all mapped
+		 * pages and, since the exclusive lock is held on the vnode,
+		 * new pages cannot be faulted in.
+		 *
+		 * For the case where newnfs_directio_enable is set
+		 * (which is not the default), it is not obvious that
+		 * stale data should be left in the buffer cache, but
+		 * the code has been this way for over a decade without
+		 * complaints.  Note that, unlike doing IO_APPEND via
+		 * a direct write RPC when newnfs_directio_enable is not set,
+		 * when newnfs_directio_enable is set, reading is done via
+		 * direct to NFS server RPCs as well.
+		 */
+		np->n_attrstamp = 0;
+		KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
+		error = ncl_vinvalbuf(vp, V_SAVE | ((ioflag &
+		    IO_VMIO) != 0 ? V_VMIO : 0), td, 1);
+		if (error != 0)
+			return (error);
 	}
 
 	orig_resid = uio->uio_resid;
@@ -998,11 +1004,20 @@ ncl_write(struct vop_write_args *ap)
 	if (uio->uio_resid == 0)
 		return (0);
 
-	if (vp->v_type == VREG && ((newnfs_directio_enable && (ioflag &
-	    IO_DIRECT)) || (ioflag & IO_APPEND))) {
-		if ((ioflag & IO_APPEND) != 0)
-			ioflag |= IO_SYNC;
-		return nfs_directio_write(vp, uio, cred, ioflag);
+	/*
+	 * Do IO_APPEND writing via a synchronous direct write.
+	 * This can result in a significant performance improvement.
+	 */
+	if ((newnfs_directio_enable && (ioflag & IO_DIRECT)) ||
+	    (ioflag & IO_APPEND)) {
+		/*
+		 * Direct writes to the server must be done NFSWRITE_FILESYNC,
+		 * because the write data is not cached and, therefore, the
+		 * write cannot be redone after a server reboot.
+		 * Set IO_SYNC to make this happen.
+		 */
+		ioflag |= IO_SYNC;
+		return (nfs_directio_write(vp, uio, cred, ioflag));
 	}
 
 	/*