git: 11d1d20e3721 - stable/12 - nfscl: Make nfscl_getlayout() acquire the correct pNFS layout

From: Rick Macklem <rmacklem_at_FreeBSD.org>
Date: Fri, 19 Nov 2021 02:02:42 UTC
The branch stable/12 has been updated by rmacklem:

URL: https://cgit.FreeBSD.org/src/commit/?id=11d1d20e372173288f9ddb35d049277b3fe20cd7

commit 11d1d20e372173288f9ddb35d049277b3fe20cd7
Author:     Rick Macklem <rmacklem@FreeBSD.org>
AuthorDate: 2021-10-13 22:48:54 +0000
Commit:     Rick Macklem <rmacklem@FreeBSD.org>
CommitDate: 2021-11-19 01:59:02 +0000

    nfscl: Make nfscl_getlayout() acquire the correct pNFS layout
    
    Without this patch, if a pNFS read layout has already been acquired
    for a file, writes would be redirected to the Metadata Server (MDS),
    because nfscl_getlayout() would not acquire a read/write layout for
    the file.  This happened because there was no "mode" argument to
    nfscl_getlayout() to indicate whether reading or writing was being done.
    Since doing I/O through the Metadata Server is not encouraged for some
    pNFS servers, it is preferable to get a read/write layout for writes
    instead of redirecting the write to the MDS.
    
    This patch adds a access mode argument to nfscl_getlayout() and
    nfsrpc_getlayout(), so that nfscl_getlayout() knows to acquire a read/write
    layout for writing, even if a read layout has already been acquired.
    This patch only affects NFSv4.1/4.2 client behaviour when pNFS ("pnfs" mount
    option against a server that supports pNFS) is in use.
    
    This problem was detected during a recent NFSv4 interoperability
    testing event held by the IETF working group.
    
    (cherry picked from commit 24af0fcdfc4983fd3cef10f9d949aca79476c2c2)
---
 sys/fs/nfs/nfs_var.h            |  2 +-
 sys/fs/nfsclient/nfs_clrpcops.c | 14 +++++++-------
 sys/fs/nfsclient/nfs_clstate.c  |  7 ++++---
 3 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/sys/fs/nfs/nfs_var.h b/sys/fs/nfs/nfs_var.h
index 91914314970d..e93d6e9bdf27 100644
--- a/sys/fs/nfs/nfs_var.h
+++ b/sys/fs/nfs/nfs_var.h
@@ -602,7 +602,7 @@ int nfscl_layout(struct nfsmount *, vnode_t, u_int8_t *, int, nfsv4stateid_t *,
     int, int, struct nfsclflayouthead *, struct nfscllayout **, struct ucred *,
     NFSPROC_T *);
 struct nfscllayout *nfscl_getlayout(struct nfsclclient *, uint8_t *, int,
-    uint64_t, struct nfsclflayout **, int *);
+    uint64_t, uint32_t, struct nfsclflayout **, int *);
 void nfscl_dserr(uint32_t, uint32_t, struct nfscldevinfo *,
     struct nfscllayout *, struct nfsclds *);
 void nfscl_cancelreqs(struct nfsclds *);
diff --git a/sys/fs/nfsclient/nfs_clrpcops.c b/sys/fs/nfsclient/nfs_clrpcops.c
index f2eb3af5f101..992b8ad8bdcd 100644
--- a/sys/fs/nfsclient/nfs_clrpcops.c
+++ b/sys/fs/nfsclient/nfs_clrpcops.c
@@ -138,7 +138,7 @@ static int nfsrpc_locku(struct nfsrv_descript *, struct nfsmount *,
 static int nfsrpc_setaclrpc(vnode_t, struct ucred *, NFSPROC_T *,
     struct acl *, nfsv4stateid_t *, void *);
 static int nfsrpc_getlayout(struct nfsmount *, vnode_t, struct nfsfh *, int,
-    uint32_t *, nfsv4stateid_t *, uint64_t, struct nfscllayout **,
+    uint32_t, uint32_t *, nfsv4stateid_t *, uint64_t, struct nfscllayout **,
     struct ucred *, NFSPROC_T *);
 static int nfsrpc_fillsa(struct nfsmount *, struct sockaddr_in *,
     struct sockaddr_in6 *, sa_family_t, int, struct nfsclds **, NFSPROC_T *);
@@ -5345,8 +5345,8 @@ nfsmout:
  */
 static int
 nfsrpc_getlayout(struct nfsmount *nmp, vnode_t vp, struct nfsfh *nfhp,
-    int iomode, uint32_t *notifybitsp, nfsv4stateid_t *stateidp, uint64_t off,
-    struct nfscllayout **lypp, struct ucred *cred, NFSPROC_T *p)
+    int iomode, uint32_t rw, uint32_t *notifybitsp, nfsv4stateid_t *stateidp,
+    uint64_t off, struct nfscllayout **lypp, struct ucred *cred, NFSPROC_T *p)
 {
 	struct nfscllayout *lyp;
 	struct nfsclflayout *flp;
@@ -5366,7 +5366,7 @@ nfsrpc_getlayout(struct nfsmount *nmp, vnode_t vp, struct nfsfh *nfhp,
 	 * flp == NULL.
 	 */
 	lyp = nfscl_getlayout(nmp->nm_clp, nfhp->nfh_fh, nfhp->nfh_len,
-	    off, &flp, &recalled);
+	    off, rw, &flp, &recalled);
 	islocked = 0;
 	if (lyp == NULL || flp == NULL) {
 		if (recalled != 0)
@@ -5690,7 +5690,7 @@ nfscl_doiods(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
 	/* Search for a layout for this file. */
 	off = uiop->uio_offset;
 	layp = nfscl_getlayout(nmp->nm_clp, np->n_fhp->nfh_fh,
-	    np->n_fhp->nfh_len, off, &rflp, &recalled);
+	    np->n_fhp->nfh_len, off, rwaccess, &rflp, &recalled);
 	if (layp == NULL || rflp == NULL) {
 		if (recalled != 0) {
 			NFSFREECRED(newcred);
@@ -5710,7 +5710,7 @@ nfscl_doiods(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
 		else
 			iolaymode = NFSLAYOUTIOMODE_READ;
 		error = nfsrpc_getlayout(nmp, vp, np->n_fhp, iolaymode,
-		    NULL, &stateid, off, &layp, newcred, p);
+		    rwaccess, NULL, &stateid, off, &layp, newcred, p);
 		if (error != 0) {
 			NFSLOCKNODE(np);
 			np->n_flag |= NNOLAYOUT;
@@ -7124,7 +7124,7 @@ nfsrpc_getopenlayout(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp,
 	 * on it, iff flp != NULL or a lock (exclusive lock) on it iff
 	 * flp == NULL.
 	 */
-	lyp = nfscl_getlayout(nmp->nm_clp, newfhp, newfhlen, 0, &flp,
+	lyp = nfscl_getlayout(nmp->nm_clp, newfhp, newfhlen, 0, mode, &flp,
 	    &recalled);
 	NFSCL_DEBUG(4, "nfsrpc_getopenlayout nfscl_getlayout lyp=%p\n", lyp);
 	if (lyp == NULL)
diff --git a/sys/fs/nfsclient/nfs_clstate.c b/sys/fs/nfsclient/nfs_clstate.c
index 279f7e38c346..9eeb2ca10865 100644
--- a/sys/fs/nfsclient/nfs_clstate.c
+++ b/sys/fs/nfsclient/nfs_clstate.c
@@ -5030,7 +5030,8 @@ nfscl_layout(struct nfsmount *nmp, vnode_t vp, u_int8_t *fhp, int fhlen,
  */
 struct nfscllayout *
 nfscl_getlayout(struct nfsclclient *clp, uint8_t *fhp, int fhlen,
-    uint64_t off, struct nfsclflayout **retflpp, int *recalledp)
+    uint64_t off, uint32_t rwaccess, struct nfsclflayout **retflpp,
+    int *recalledp)
 {
 	struct nfscllayout *lyp;
 	mount_t mp;
@@ -5046,8 +5047,8 @@ nfscl_getlayout(struct nfsclclient *clp, uint8_t *fhp, int fhlen,
 			TAILQ_REMOVE(&clp->nfsc_layout, lyp, nfsly_list);
 			TAILQ_INSERT_HEAD(&clp->nfsc_layout, lyp, nfsly_list);
 			lyp->nfsly_timestamp = NFSD_MONOSEC + 120;
-			error = nfscl_findlayoutforio(lyp, off,
-			    NFSV4OPEN_ACCESSREAD, retflpp);
+			error = nfscl_findlayoutforio(lyp, off, rwaccess,
+			    retflpp);
 			if (error == 0)
 				nfsv4_getref(&lyp->nfsly_lock, NULL,
 				    NFSCLSTATEMUTEXPTR, mp);