git: ca826694e3b0 - stable/12 - nfscl: Fix a race between Setattr of size and Lookup

From: Rick Macklem <rmacklem_at_FreeBSD.org>
Date: Thu, 18 Nov 2021 00:21:30 UTC
The branch stable/12 has been updated by rmacklem:

URL: https://cgit.FreeBSD.org/src/commit/?id=ca826694e3b08133b9e16744e1178863cfc4c16b

commit ca826694e3b08133b9e16744e1178863cfc4c16b
Author:     Rick Macklem <rmacklem@FreeBSD.org>
AuthorDate: 2021-10-30 23:35:02 +0000
Commit:     Rick Macklem <rmacklem@FreeBSD.org>
CommitDate: 2021-11-17 23:07:11 +0000

    nfscl: Fix a race between Setattr of size and Lookup
    
    PR#259071 provides a test program that fails for the NFS client.
    Testing with it, there appears to be a race between Lookup
    and VOPs like Setattr-of-size, where Lookup ends up loading
    stale attributes (including what might be the wrong file size)
    into the NFS vnode's attribute cache.
    
    The race occurs when the modifying VOP (which holds a lock
    on the vnode), blocks the acquisition of the vnode in Lookup,
    after the RPC (with now potentially stale attributes).
    
    Here's what seems to happen:
    Child                                Parent
    
    does stat(), which does
    VOP_LOOKUP(), doing the Lookup
    RPC with the directory vnode
    locked, acquiring file attributes
    valid at this point in time
    
    blocks waiting for locked file       does ftruncate(), which
    vnode                                does VOP_SETATTR() of Size,
                                         changing the file's size
                                         while holding an exclusive
                                         lock on the file's vnode
                                         releases the vnode lock
    acquires file vnode and fills in
    now stale attributes including
    the old wrong Size
                                         does a read() which returns
                                         wrong data size
    
    This patch fixes the problem by saving a timestamp in the NFS vnode
    in the VOPs that modify the file (Setattr-of-size, Allocate).
    Then lookup/readdirplus compares that timestamp with the time just
    before starting the RPC after it has acquired the file's vnode.
    If the modifying RPC occurred during the Lookup, the attributes
    in the RPC reply are discarded, since they might be stale.
    
    With this patch the test program works as expected.
    
    Note that the test program does not fail on a July stable/12,
    although this race is in the NFS client code.  I suspect a
    fairly recent change to the name caching code exposed this
    bug.
    
    PR:     259071
    (cherry picked from commit 2be417843a04f25e631e99d5188eb2652b13d80d)
---
 sys/fs/nfsclient/nfs_clrpcops.c | 29 ++++++++++++++--
 sys/fs/nfsclient/nfs_clvnops.c  | 73 +++++++++++++++++++++++++++++++++++++----
 sys/fs/nfsclient/nfsnode.h      |  1 +
 3 files changed, 93 insertions(+), 10 deletions(-)

diff --git a/sys/fs/nfsclient/nfs_clrpcops.c b/sys/fs/nfsclient/nfs_clrpcops.c
index 50f26a5d70c3..d6072bf2523e 100644
--- a/sys/fs/nfsclient/nfs_clrpcops.c
+++ b/sys/fs/nfsclient/nfs_clrpcops.c
@@ -3308,7 +3308,8 @@ nfsrpc_readdirplus(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
 	nfsattrbit_t attrbits, dattrbits;
 	size_t tresid;
 	u_int32_t *tl2 = NULL, rderr;
-	struct timespec dctime;
+	struct timespec dctime, ts;
+	bool attr_ok;
 
 	KASSERT(uiop->uio_iovcnt == 1 &&
 	    (uio_uio_resid(uiop) & (DIRBLKSIZ - 1)) == 0,
@@ -3488,6 +3489,7 @@ nfsrpc_readdirplus(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
 			*tl = txdr_unsigned(NFSV4OP_GETATTR);
 			(void) nfsrv_putattrbit(nd, &dattrbits);
 		}
+		nanouptime(&ts);
 		error = nfscl_request(nd, vp, p, cred, stuff);
 		if (error)
 			return (error);
@@ -3641,6 +3643,7 @@ nfsrpc_readdirplus(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
 				ncookie.lval[1];
 
 			    if (nfhp != NULL) {
+				attr_ok = true;
 				if (NFSRV_CMPFH(nfhp->nfh_fh, nfhp->nfh_len,
 				    dnp->n_fhp->nfh_fh, dnp->n_fhp->nfh_len)) {
 				    VREF(vp);
@@ -3670,12 +3673,32 @@ nfsrpc_readdirplus(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
 				    if (!error) {
 					newvp = NFSTOV(np);
 					unlocknewvp = 1;
+					/*
+					 * If n_localmodtime >= time before RPC,
+					 * then a file modification operation,
+					 * such as VOP_SETATTR() of size, has
+					 * occurred while the Lookup RPC and
+					 * acquisition of the vnode happened. As
+					 * such, the attributes might be stale,
+					 * with possibly an incorrect size.
+					 */
+					NFSLOCKNODE(np);
+					if (timespecisset(
+					    &np->n_localmodtime) &&
+					    timespeccmp(&np->n_localmodtime,
+					    &ts, >=)) {
+					    NFSCL_DEBUG(4, "nfsrpc_readdirplus:"
+						" localmod stale attributes\n");
+					    attr_ok = false;
+					}
+					NFSUNLOCKNODE(np);
 				    }
 				}
 				nfhp = NULL;
 				if (newvp != NULLVP) {
-				    error = nfscl_loadattrcache(&newvp,
-					&nfsva, NULL, NULL, 0, 0);
+				    if (attr_ok)
+					error = nfscl_loadattrcache(&newvp,
+					    &nfsva, NULL, NULL, 0, 0);
 				    if (error) {
 					if (unlocknewvp)
 					    vput(newvp);
diff --git a/sys/fs/nfsclient/nfs_clvnops.c b/sys/fs/nfsclient/nfs_clvnops.c
index ffa8e41ec578..b4cbde91c664 100644
--- a/sys/fs/nfsclient/nfs_clvnops.c
+++ b/sys/fs/nfsclient/nfs_clvnops.c
@@ -962,6 +962,7 @@ nfs_setattr(struct vop_setattr_args *ap)
 	struct vattr *vap = ap->a_vap;
 	int error = 0;
 	u_quad_t tsize;
+	struct timespec ts;
 
 #ifndef nolint
 	tsize = (u_quad_t)0;
@@ -1053,11 +1054,18 @@ nfs_setattr(struct vop_setattr_args *ap)
 			NFSUNLOCKNODE(np);
 	}
 	error = nfs_setattrrpc(vp, vap, ap->a_cred, td);
-	if (error && vap->va_size != VNOVAL) {
-		NFSLOCKNODE(np);
-		np->n_size = np->n_vattr.na_size = tsize;
-		vnode_pager_setsize(vp, tsize);
-		NFSUNLOCKNODE(np);
+	if (vap->va_size != VNOVAL) {
+		if (error == 0) {
+			nanouptime(&ts);
+			NFSLOCKNODE(np);
+			np->n_localmodtime = ts;
+			NFSUNLOCKNODE(np);
+		} else {
+			NFSLOCKNODE(np);
+			np->n_size = np->n_vattr.na_size = tsize;
+			vnode_pager_setsize(vp, tsize);
+			NFSUNLOCKNODE(np);
+		}
 	}
 	return (error);
 }
@@ -1114,8 +1122,8 @@ nfs_lookup(struct vop_lookup_args *ap)
 	struct nfsfh *nfhp;
 	struct nfsvattr dnfsva, nfsva;
 	struct vattr vattr;
-	struct timespec nctime;
-	
+	struct timespec nctime, ts;
+
 	*vpp = NULLVP;
 	if ((flags & ISLASTCN) && (mp->mnt_flag & MNT_RDONLY) &&
 	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
@@ -1219,6 +1227,7 @@ nfs_lookup(struct vop_lookup_args *ap)
 	error = 0;
 	newvp = NULLVP;
 	NFSINCRGLOBAL(nfsstatsv1.lookupcache_misses);
+	nanouptime(&ts);
 	error = nfsrpc_lookup(dvp, cnp->cn_nameptr, cnp->cn_namelen,
 	    cnp->cn_cred, td, &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag,
 	    NULL);
@@ -1285,6 +1294,22 @@ nfs_lookup(struct vop_lookup_args *ap)
 		if (error)
 			return (error);
 		newvp = NFSTOV(np);
+		/*
+		 * If n_localmodtime >= time before RPC, then
+		 * a file modification operation, such as
+		 * VOP_SETATTR() of size, has occurred while
+		 * the Lookup RPC and acquisition of the vnode
+		 * happened.  As such, the attributes might
+		 * be stale, with possibly an incorrect size.
+		 */
+		NFSLOCKNODE(np);
+		if (timespecisset(&np->n_localmodtime) &&
+		    timespeccmp(&np->n_localmodtime, &ts, >=)) {
+			NFSCL_DEBUG(4, "nfs_lookup: rename localmod "
+			    "stale attributes\n");
+			attrflag = 0;
+		}
+		NFSUNLOCKNODE(np);
 		if (attrflag)
 			(void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL,
 			    0, 1);
@@ -1344,6 +1369,22 @@ nfs_lookup(struct vop_lookup_args *ap)
 		if (error)
 			return (error);
 		newvp = NFSTOV(np);
+		/*
+		 * If n_localmodtime >= time before RPC, then
+		 * a file modification operation, such as
+		 * VOP_SETATTR() of size, has occurred while
+		 * the Lookup RPC and acquisition of the vnode
+		 * happened.  As such, the attributes might
+		 * be stale, with possibly an incorrect size.
+		 */
+		NFSLOCKNODE(np);
+		if (timespecisset(&np->n_localmodtime) &&
+		    timespeccmp(&np->n_localmodtime, &ts, >=)) {
+			NFSCL_DEBUG(4, "nfs_lookup: localmod "
+			    "stale attributes\n");
+			attrflag = 0;
+		}
+		NFSUNLOCKNODE(np);
 		if (attrflag)
 			(void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL,
 			    0, 1);
@@ -2561,7 +2602,9 @@ nfs_lookitup(struct vnode *dvp, char *name, int len, struct ucred *cred,
 	struct componentname cn;
 	int error = 0, attrflag, dattrflag;
 	u_int hash;
+	struct timespec ts;
 
+	nanouptime(&ts);
 	error = nfsrpc_lookup(dvp, name, len, cred, td, &dnfsva, &nfsva,
 	    &nfhp, &attrflag, &dattrflag, NULL);
 	if (dattrflag)
@@ -2622,6 +2665,22 @@ printf("replace=%s\n",nnn);
 		    if (error)
 			return (error);
 		    newvp = NFSTOV(np);
+		    /*
+		     * If n_localmodtime >= time before RPC, then
+		     * a file modification operation, such as
+		     * VOP_SETATTR() of size, has occurred while
+		     * the Lookup RPC and acquisition of the vnode
+		     * happened.  As such, the attributes might
+		     * be stale, with possibly an incorrect size.
+		     */
+		    NFSLOCKNODE(np);
+		    if (timespecisset(&np->n_localmodtime) &&
+			timespeccmp(&np->n_localmodtime, &ts, >=)) {
+			NFSCL_DEBUG(4, "nfs_lookitup: localmod "
+			    "stale attributes\n");
+			attrflag = 0;
+		    }
+		    NFSUNLOCKNODE(np);
 		}
 		if (!attrflag && *npp == NULL) {
 			if (newvp == dvp)
diff --git a/sys/fs/nfsclient/nfsnode.h b/sys/fs/nfsclient/nfsnode.h
index 66a2de31551a..c3ddcafa9d6f 100644
--- a/sys/fs/nfsclient/nfsnode.h
+++ b/sys/fs/nfsclient/nfsnode.h
@@ -128,6 +128,7 @@ struct nfsnode {
 	u_int64_t		 n_change;	/* old Change attribute */
 	struct nfsv4node	*n_v4;		/* extra V4 stuff */
 	struct ucred		*n_writecred;	/* Cred. for putpages */
+	struct timespec		n_localmodtime;	/* Last local modify */
 };
 
 #define	n_atim		n_un1.nf_atim