git: 701eb03cc0dc - stable/13 - nfscl: Fix a deadlock related to the NFSv4 clientID lock

From: Rick Macklem <rmacklem_at_FreeBSD.org>
Date: Wed, 20 Oct 2021 01:42:14 UTC
The branch stable/13 has been updated by rmacklem:

URL: https://cgit.FreeBSD.org/src/commit/?id=701eb03cc0dca907de872a41407b8487c38429fc

commit 701eb03cc0dca907de872a41407b8487c38429fc
Author:     Rick Macklem <rmacklem@FreeBSD.org>
AuthorDate: 2021-10-12 04:58:24 +0000
Commit:     Rick Macklem <rmacklem@FreeBSD.org>
CommitDate: 2021-10-20 01:36:56 +0000

    nfscl: Fix a deadlock related to the NFSv4 clientID lock
    
    Without this patch, it is possible for a process doing an NFSv4
    Open/create of a file to block to allow another process
    to acquire the exclusive lock on the clientID when holding
    a shared lock on the clientID.  As such, both processes
    deadlock, with one wanting the exclusive lock, while the
    other holds the shared lock.  This deadlock is unlikely to occur
    unless delegations are in use on the NFSv4 mount.
    
    This patch fixes the problem by not deferring to the process
    waiting for the exclusive lock when a shared lock (reference cnt)
    is already held by the process.
    
    This problem was detected during a recent NFSv4 interoperability
    testing event held by the IETF working group.
    
    (cherry picked from commit 120b20bdf49630cf2a7dbc5f93b9e985e1f4f198)
---
 sys/fs/nfs/nfs_var.h            |  4 ++--
 sys/fs/nfsclient/nfs_clrpcops.c | 12 ++++++------
 sys/fs/nfsclient/nfs_clstate.c  | 29 ++++++++++++++++-------------
 sys/fs/nfsclient/nfs_clvfsops.c |  2 +-
 4 files changed, 25 insertions(+), 22 deletions(-)

diff --git a/sys/fs/nfs/nfs_var.h b/sys/fs/nfs/nfs_var.h
index 9cbaeae361a6..846ab2503981 100644
--- a/sys/fs/nfs/nfs_var.h
+++ b/sys/fs/nfs/nfs_var.h
@@ -569,12 +569,12 @@ void nfsrpc_bindconnsess(CLIENT *, void *, struct ucred *);
 /* nfs_clstate.c */
 int nfscl_open(vnode_t, u_int8_t *, int, u_int32_t, int,
     struct ucred *, NFSPROC_T *, struct nfsclowner **, struct nfsclopen **,
-    int *, int *, int);
+    int *, int *, int, bool);
 int nfscl_getstateid(vnode_t, u_int8_t *, int, u_int32_t, int, struct ucred *,
     NFSPROC_T *, nfsv4stateid_t *, void **);
 void nfscl_ownerrelease(struct nfsmount *, struct nfsclowner *, int, int, int);
 void nfscl_openrelease(struct nfsmount *, struct nfsclopen *, int, int);
-int nfscl_getcl(struct mount *, struct ucred *, NFSPROC_T *, bool,
+int nfscl_getcl(struct mount *, struct ucred *, NFSPROC_T *, bool, bool,
     struct nfsclclient **);
 struct nfsclclient *nfscl_findcl(struct nfsmount *);
 void nfscl_clientrelease(struct nfsclclient *);
diff --git a/sys/fs/nfsclient/nfs_clrpcops.c b/sys/fs/nfsclient/nfs_clrpcops.c
index 984a63484777..dbbfdb3b2976 100644
--- a/sys/fs/nfsclient/nfs_clrpcops.c
+++ b/sys/fs/nfsclient/nfs_clrpcops.c
@@ -390,7 +390,7 @@ else printf(" fhl=0\n");
 	do {
 	    dp = NULL;
 	    error = nfscl_open(vp, nfhp->nfh_fh, nfhp->nfh_len, mode, 1,
-		cred, p, NULL, &op, &newone, &ret, 1);
+		cred, p, NULL, &op, &newone, &ret, 1, true);
 	    if (error) {
 		return (error);
 	    }
@@ -2061,7 +2061,7 @@ nfsrpc_create(vnode_t dvp, char *name, int namelen, struct vattr *vap,
 		dp = NULL;
 		error = nfscl_open(dvp, NULL, 0, (NFSV4OPEN_ACCESSWRITE |
 		    NFSV4OPEN_ACCESSREAD), 0, cred, p, &owp, NULL, &newone,
-		    NULL, 1);
+		    NULL, 1, true);
 		if (error)
 			return (error);
 		if (nmp->nm_clp != NULL)
@@ -2347,7 +2347,7 @@ nfsrpc_createv4(vnode_t dvp, char *name, int namelen, struct vattr *vap,
 		 */
 		error = nfscl_open(dvp, nfhp->nfh_fh, nfhp->nfh_len, 
 		    (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), 0,
-		    cred, p, NULL, &op, &newone, NULL, 0);
+		    cred, p, NULL, &op, &newone, NULL, 0, false);
 		if (error)
 			goto nfsmout;
 		op->nfso_stateid = stateid;
@@ -3968,7 +3968,7 @@ nfsrpc_advlock(vnode_t vp, off_t size, int op, struct flock *fl,
 	do {
 	    nd->nd_repstat = 0;
 	    if (op == F_GETLK) {
-		error = nfscl_getcl(vp->v_mount, cred, p, false, &clp);
+		error = nfscl_getcl(vp->v_mount, cred, p, false, true, &clp);
 		if (error)
 			return (error);
 		error = nfscl_lockt(vp, clp, off, len, fl, p, id, flags);
@@ -3985,7 +3985,7 @@ nfsrpc_advlock(vnode_t vp, off_t size, int op, struct flock *fl,
 		 * We must loop around for all lockowner cases.
 		 */
 		callcnt = 0;
-		error = nfscl_getcl(vp->v_mount, cred, p, false, &clp);
+		error = nfscl_getcl(vp->v_mount, cred, p, false, true, &clp);
 		if (error)
 			return (error);
 		do {
@@ -7931,7 +7931,7 @@ nfsrpc_createlayout(vnode_t dvp, char *name, int namelen, struct vattr *vap,
 			 */
 			error = nfscl_open(dvp, nfhp->nfh_fh, nfhp->nfh_len, 
 			    (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), 0,
-			    cred, p, NULL, &op, &newone, NULL, 0);
+			    cred, p, NULL, &op, &newone, NULL, 0, false);
 			if (error != 0)
 				goto nfsmout;
 			op->nfso_stateid = stateid;
diff --git a/sys/fs/nfsclient/nfs_clstate.c b/sys/fs/nfsclient/nfs_clstate.c
index 8d1c4de0ba8e..5aebd68dd9e1 100644
--- a/sys/fs/nfsclient/nfs_clstate.c
+++ b/sys/fs/nfsclient/nfs_clstate.c
@@ -218,7 +218,7 @@ static short *nfscl_cberrmap[] = {
 int
 nfscl_open(vnode_t vp, u_int8_t *nfhp, int fhlen, u_int32_t amode, int usedeleg,
     struct ucred *cred, NFSPROC_T *p, struct nfsclowner **owpp,
-    struct nfsclopen **opp, int *newonep, int *retp, int lockit)
+    struct nfsclopen **opp, int *newonep, int *retp, int lockit, bool firstref)
 {
 	struct nfsclclient *clp;
 	struct nfsclowner *owp, *nowp;
@@ -246,7 +246,7 @@ nfscl_open(vnode_t vp, u_int8_t *nfhp, int fhlen, u_int32_t amode, int usedeleg,
 		fhlen - 1, M_NFSCLOPEN, M_WAITOK);
 	    nop->nfso_hash.le_prev = NULL;
 	}
-	ret = nfscl_getcl(vp->v_mount, cred, p, false, &clp);
+	ret = nfscl_getcl(vp->v_mount, cred, p, false, firstref, &clp);
 	if (ret != 0) {
 		free(nowp, M_NFSCLOWNER);
 		if (nop != NULL)
@@ -865,7 +865,7 @@ nfscl_openrelease(struct nfsmount *nmp, struct nfsclopen *op, int error,
  */
 int
 nfscl_getcl(struct mount *mp, struct ucred *cred, NFSPROC_T *p,
-    bool tryminvers, struct nfsclclient **clpp)
+    bool tryminvers, bool firstref, struct nfsclclient **clpp)
 {
 	struct nfsclclient *clp;
 	struct nfsclclient *newclp = NULL;
@@ -934,14 +934,16 @@ nfscl_getcl(struct mount *mp, struct ucred *cred, NFSPROC_T *p,
 		    NFSCLSTATEMUTEXPTR, mp);
 	if (igotlock == 0) {
 		/*
-		 * Call nfsv4_lock() with "iwantlock == 0" so that it will
-		 * wait for a pending exclusive lock request.  This gives the
-		 * exclusive lock request priority over this shared lock
-		 * request.
+		 * Call nfsv4_lock() with "iwantlock == 0" on the firstref so
+		 * that it will wait for a pending exclusive lock request.
+		 * This gives the exclusive lock request priority over this
+		 * shared lock request.
 		 * An exclusive lock on nfsc_lock is used mainly for server
-		 * crash recoveries.
+		 * crash recoveries and delegation recalls.
 		 */
-		nfsv4_lock(&clp->nfsc_lock, 0, NULL, NFSCLSTATEMUTEXPTR, mp);
+		if (firstref)
+			nfsv4_lock(&clp->nfsc_lock, 0, NULL, NFSCLSTATEMUTEXPTR,
+			    mp);
 		nfsv4_getref(&clp->nfsc_lock, NULL, NFSCLSTATEMUTEXPTR, mp);
 	}
 	if (igotlock == 0 && NFSCL_FORCEDISM(mp)) {
@@ -1123,7 +1125,8 @@ nfscl_getbytelock(vnode_t vp, u_int64_t off, u_int64_t len,
 		if (recovery)
 			clp = rclp;
 		else
-			error = nfscl_getcl(vp->v_mount, cred, p, false, &clp);
+			error = nfscl_getcl(vp->v_mount, cred, p, false, true,
+			    &clp);
 	}
 	if (error) {
 		free(nlp, M_NFSCLLOCKOWNER);
@@ -1454,7 +1457,7 @@ nfscl_checkwritelocked(vnode_t vp, struct flock *fl,
 		end = NFS64BITSSET;
 	}
 
-	error = nfscl_getcl(vp->v_mount, cred, p, false, &clp);
+	error = nfscl_getcl(vp->v_mount, cred, p, false, true, &clp);
 	if (error)
 		return (1);
 	nfscl_filllockowner(id, own, flags);
@@ -3247,7 +3250,7 @@ nfscl_getclose(vnode_t vp, struct nfsclclient **clpp)
 	struct nfsfh *nfhp;
 	int error, notdecr;
 
-	error = nfscl_getcl(vp->v_mount, NULL, NULL, false, &clp);
+	error = nfscl_getcl(vp->v_mount, NULL, NULL, false, true, &clp);
 	if (error)
 		return (error);
 	*clpp = clp;
@@ -3321,7 +3324,7 @@ nfscl_doclose(vnode_t vp, struct nfsclclient **clpp, NFSPROC_T *p)
 	struct nfsclrecalllayout *recallp;
 	int error;
 
-	error = nfscl_getcl(vp->v_mount, NULL, NULL, false, &clp);
+	error = nfscl_getcl(vp->v_mount, NULL, NULL, false, true, &clp);
 	if (error)
 		return (error);
 	*clpp = clp;
diff --git a/sys/fs/nfsclient/nfs_clvfsops.c b/sys/fs/nfsclient/nfs_clvfsops.c
index d6ec1f9a3825..57ac7f59f38d 100644
--- a/sys/fs/nfsclient/nfs_clvfsops.c
+++ b/sys/fs/nfsclient/nfs_clvfsops.c
@@ -1611,7 +1611,7 @@ mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
 	/* For NFSv4, get the clientid now. */
 	if ((argp->flags & NFSMNT_NFSV4) != 0) {
 		NFSCL_DEBUG(3, "at getcl\n");
-		error = nfscl_getcl(mp, cred, td, tryminvers, &clp);
+		error = nfscl_getcl(mp, cred, td, tryminvers, true, &clp);
 		NFSCL_DEBUG(3, "aft getcl=%d\n", error);
 		if (error != 0)
 			goto bad;