git: 867c27c23a5c - main - nfscl: Change IO_APPEND writes to direct I/O

From: Rick Macklem <rmacklem_at_FreeBSD.org>
Date: Wed, 15 Dec 2021 16:39:28 UTC
The branch main has been updated by rmacklem:

URL: https://cgit.FreeBSD.org/src/commit/?id=867c27c23a5c469b27611cf53cc2390b5a193fa5

commit 867c27c23a5c469b27611cf53cc2390b5a193fa5
Author:     Rick Macklem <rmacklem@FreeBSD.org>
AuthorDate: 2021-12-15 16:35:48 +0000
Commit:     Rick Macklem <rmacklem@FreeBSD.org>
CommitDate: 2021-12-15 16:35:48 +0000

    nfscl: Change IO_APPEND writes to direct I/O
    
    IO_APPEND writes have always been very slow over NFS, due to
    the need to acquire an up to date file size after flushing
    all writes to the NFS server.
    
    This patch switches the IO_APPEND writes to use direct I/O,
    bypassing the buffer cache.  As such, flushing of writes
    normally only occurs when the open(..O_APPEND..) is done.
    It does imply that all writes must be done synchronously
    and must be committed to stable storage on the file server
    (NFSWRITE_FILESYNC).
    
    For a simple test program that does 10,000 IO_APPEND writes
    in a loop, performance improved significantly with this patch.
    
    For a UFS exported file system, the test ran 12x faster.
    This drops to 3x faster when the open(2)/close(2) are done
    for each loop iteration.
    For a ZFS exported file system, the test ran 40% faster.
    
    The much smaller improvement may have been because the ZFS
    file system I tested against does not have a ZIL log and
    does have "sync" enabled.
    
    Note that IO_APPEND write performance is still much slower
    than when done on local file systems.
    
    Although this is a simple patch, it does result in a
    significant semantics change, so I have given it a
    large MFC time.
    
    Tested by:      otis
    MFC after:      3 months
---
 sys/fs/nfsclient/nfs_clbio.c   | 6 +++++-
 sys/fs/nfsclient/nfs_clvnops.c | 8 ++++----
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/sys/fs/nfsclient/nfs_clbio.c b/sys/fs/nfsclient/nfs_clbio.c
index 29bc66669dfb..c3339617acce 100644
--- a/sys/fs/nfsclient/nfs_clbio.c
+++ b/sys/fs/nfsclient/nfs_clbio.c
@@ -1001,8 +1001,12 @@ ncl_write(struct vop_write_args *ap)
 	if (uio->uio_resid == 0)
 		return (0);
 
-	if (newnfs_directio_enable && (ioflag & IO_DIRECT) && vp->v_type == VREG)
+	if (vp->v_type == VREG && ((newnfs_directio_enable && (ioflag &
+	    IO_DIRECT)) || (ioflag & IO_APPEND))) {
+		if ((ioflag & IO_APPEND) != 0)
+			ioflag |= IO_SYNC;
 		return nfs_directio_write(vp, uio, cred, ioflag);
+	}
 
 	/*
 	 * Maybe this should be above the vnode op call, but so long as
diff --git a/sys/fs/nfsclient/nfs_clvnops.c b/sys/fs/nfsclient/nfs_clvnops.c
index ba2ba27b8fb6..db2ef26a77c6 100644
--- a/sys/fs/nfsclient/nfs_clvnops.c
+++ b/sys/fs/nfsclient/nfs_clvnops.c
@@ -709,8 +709,8 @@ nfs_open(struct vop_open_args *ap)
 	/*
 	 * If the object has >= 1 O_DIRECT active opens, we disable caching.
 	 */
-	if (newnfs_directio_enable && (fmode & O_DIRECT) &&
-	    (vp->v_type == VREG)) {
+	if (vp->v_type == VREG && ((newnfs_directio_enable && (fmode &
+	    O_DIRECT)) || (fmode & O_APPEND))) {
 		if (np->n_directio_opens == 0) {
 			NFSUNLOCKNODE(np);
 			if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) {
@@ -967,11 +967,11 @@ nfs_close(struct vop_close_args *ap)
 			error = nfscl_maperr(ap->a_td, error, (uid_t)0,
 			    (gid_t)0);
 	}
-	if (newnfs_directio_enable)
+	if (vp->v_type == VREG && ((newnfs_directio_enable && (fmode &
+	    O_DIRECT)) || (fmode & O_APPEND))) {
 		KASSERT((np->n_directio_asyncwr == 0),
 			("nfs_close: dirty unflushed (%d) directio buffers\n",
 			 np->n_directio_asyncwr));
-	if (newnfs_directio_enable && (fmode & O_DIRECT) && (vp->v_type == VREG)) {
 		NFSLOCKNODE(np);
 		KASSERT((np->n_directio_opens > 0), 
 			("nfs_close: unexpectedly value (0) of n_directio_opens\n"));