git: f8dc06303bac - main - nfsd: Fix the NFSv4.2 pNFS MDS server for NFSERR_NOSPC via LayoutError

From: Rick Macklem <rmacklem_at_FreeBSD.org>
Date: Tue, 09 Nov 2021 00:01:47 UTC
The branch main has been updated by rmacklem:

URL: https://cgit.FreeBSD.org/src/commit/?id=f8dc06303bac39be53872de7429aa54694b3f86a

commit f8dc06303bac39be53872de7429aa54694b3f86a
Author:     Rick Macklem <rmacklem@FreeBSD.org>
AuthorDate: 2021-11-08 23:58:00 +0000
Commit:     Rick Macklem <rmacklem@FreeBSD.org>
CommitDate: 2021-11-08 23:58:00 +0000

    nfsd: Fix the NFSv4.2 pNFS MDS server for NFSERR_NOSPC via LayoutError
    
    If a pNFS server's DS runs out of disk space, it replies
    NFSERR_NOSPC to the client doing writing.  For the Linux
    client, it then sends a LayoutError RPC to the MDS server to
    tell it about the error and keeps retrying, doing repeated
    LayoutGets to the MDS and Write RPCs to the DS.  The Linux client is
    "stuck" until disk space on the DS is free'd up unless
    a subsequent LayoutGet request is sent a NFSERR_NOSPC
    reply.
    The looping problem still occurs for NFSv4.1 mounts, but no
    fix for this is known at this time.
    
    This patch changes the pNFS MDS server to reply to LayoutGet
    operations with NFSERR_NOSPC once a LayoutError reports the
    problem, until the DS has available space.  This keeps the Linux
    NFSv4.2 from looping.
    
    Found during recent testing because of issues w.r.t. a DS
    being out of space found during a recent IEFT NFSv4 working
    group testing event.
    
    MFC after:      2 weeks
---
 sys/fs/nfs/nfs_var.h              |  2 ++
 sys/fs/nfs/nfsrvstate.h           |  3 +++
 sys/fs/nfsserver/nfs_nfsdport.c   | 53 +++++++++++++++++++++++++++++++++++++++
 sys/fs/nfsserver/nfs_nfsdserv.c   |  4 +++
 sys/fs/nfsserver/nfs_nfsdsocket.c |  3 +++
 sys/fs/nfsserver/nfs_nfsdstate.c  | 53 +++++++++++++++++++++++++++++++++++++--
 6 files changed, 116 insertions(+), 2 deletions(-)

diff --git a/sys/fs/nfs/nfs_var.h b/sys/fs/nfs/nfs_var.h
index 7e4136a6ff67..8fbf72345167 100644
--- a/sys/fs/nfs/nfs_var.h
+++ b/sys/fs/nfs/nfs_var.h
@@ -170,6 +170,7 @@ int nfsrv_copymr(vnode_t, vnode_t, vnode_t, struct nfsdevice *,
 int nfsrv_mdscopymr(char *, char *, char *, char *, int *, char *, NFSPROC_T *,
     struct vnode **, struct vnode **, struct pnfsdsfile **, struct nfsdevice **,
     struct nfsdevice **);
+void nfsrv_marknospc(char *, bool);
 
 /* nfs_nfsdserv.c */
 int nfsrvd_access(struct nfsrv_descript *, int,
@@ -770,6 +771,7 @@ int nfsvno_listxattr(struct vnode *, uint64_t, struct ucred *, struct thread *,
 void nfsm_trimtrailing(struct nfsrv_descript *, struct mbuf *, char *, int,
     int);
 bool nfsrv_checkwrongsec(struct nfsrv_descript *, int, enum vtype);
+void nfsrv_checknospc(void);
 
 /* nfs_commonkrpc.c */
 int newnfs_nmcancelreqs(struct nfsmount *);
diff --git a/sys/fs/nfs/nfsrvstate.h b/sys/fs/nfs/nfsrvstate.h
index 427d5b132281..9eebeece9727 100644
--- a/sys/fs/nfs/nfsrvstate.h
+++ b/sys/fs/nfs/nfsrvstate.h
@@ -132,6 +132,7 @@ struct nfslayout {
 	nfsv4stateid_t		lay_stateid;
 	nfsquad_t		lay_clientid;
 	fhandle_t		lay_fh;
+	char			lay_deviceid[NFSX_V4DEVICEID];
 	fsid_t			lay_fsid;
 	uint32_t		lay_layoutlen;
 	uint16_t		lay_mirrorcnt;
@@ -147,6 +148,7 @@ struct nfslayout {
 #define	NFSLAY_RECALL	0x0004
 #define	NFSLAY_RETURNED	0x0008
 #define	NFSLAY_CALLB	0x0010
+#define	NFSLAY_NOSPC	0x0020
 
 /*
  * Structure for an NFSv4.1 session.
@@ -353,6 +355,7 @@ struct nfsdevice {
 	char			*nfsdev_host;
 	fsid_t			nfsdev_mdsfsid;
 	uint32_t		nfsdev_nextdir;
+	bool			nfsdev_nospc;
 	vnode_t			nfsdev_dsdir[0];
 };
 
diff --git a/sys/fs/nfsserver/nfs_nfsdport.c b/sys/fs/nfsserver/nfs_nfsdport.c
index 806f4a8545b8..add13aca1d90 100644
--- a/sys/fs/nfsserver/nfs_nfsdport.c
+++ b/sys/fs/nfsserver/nfs_nfsdport.c
@@ -6949,6 +6949,59 @@ nfsrv_checkwrongsec(struct nfsrv_descript *nd, int nextop, enum vtype vtyp)
 	return (true);
 }
 
+/*
+ * Check DSs marked no space.
+ */
+void
+nfsrv_checknospc(void)
+{
+	struct statfs *tsf;
+	struct nfsdevice *ds;
+	struct vnode **dvpp, **tdvpp, *dvp;
+	char *devid, *tdevid;
+	int cnt, error = 0, i;
+
+	if (nfsrv_devidcnt <= 0)
+		return;
+	dvpp = mallocarray(nfsrv_devidcnt, sizeof(*dvpp), M_TEMP, M_WAITOK);
+	devid = malloc(nfsrv_devidcnt * NFSX_V4DEVICEID, M_TEMP, M_WAITOK);
+	tsf = malloc(sizeof(*tsf), M_TEMP, M_WAITOK);
+
+	/* Get an array of the dvps for the DSs. */
+	tdvpp = dvpp;
+	tdevid = devid;
+	i = 0;
+	NFSDDSLOCK();
+	/* First, search for matches for same file system. */
+	TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
+		if (ds->nfsdev_nmp != NULL && ds->nfsdev_nospc) {
+			if (++i > nfsrv_devidcnt)
+				break;
+			*tdvpp++ = ds->nfsdev_dvp;
+			NFSBCOPY(ds->nfsdev_deviceid, tdevid, NFSX_V4DEVICEID);
+			tdevid += NFSX_V4DEVICEID;
+		}
+	}
+	NFSDDSUNLOCK();
+
+	/* Do a VFS_STATFS() for each of the DSs and clear no space. */
+	cnt = i;
+	tdvpp = dvpp;
+	tdevid = devid;
+	for (i = 0; i < cnt && error == 0; i++) {
+		dvp = *tdvpp++;
+		error = VFS_STATFS(dvp->v_mount, tsf);
+		if (error == 0 && tsf->f_bavail > 0) {
+			NFSD_DEBUG(1, "nfsrv_checknospc: reset nospc\n");
+			nfsrv_marknospc(tdevid, false);
+		}
+		tdevid += NFSX_V4DEVICEID;
+	}
+	free(tsf, M_TEMP);
+	free(dvpp, M_TEMP);
+	free(devid, M_TEMP);
+}
+
 extern int (*nfsd_call_nfsd)(struct thread *, struct nfssvc_args *);
 
 /*
diff --git a/sys/fs/nfsserver/nfs_nfsdserv.c b/sys/fs/nfsserver/nfs_nfsdserv.c
index 4d92cb61614d..a590c599518f 100644
--- a/sys/fs/nfsserver/nfs_nfsdserv.c
+++ b/sys/fs/nfsserver/nfs_nfsdserv.c
@@ -5052,6 +5052,10 @@ nfsrvd_layouterror(struct nfsrv_descript *nd, __unused int isdgram,
 		if (stat != NFSERR_ACCES && stat != NFSERR_STALE &&
 		    stat != NFSERR_NOSPC)
 			nfsrv_delds(devid, curthread);
+
+		/* For NFSERR_NOSPC, mark all deviceids and layouts. */
+		if (stat == NFSERR_NOSPC)
+			nfsrv_marknospc(devid, true);
 	}
 nfsmout:
 	vput(vp);
diff --git a/sys/fs/nfsserver/nfs_nfsdsocket.c b/sys/fs/nfsserver/nfs_nfsdsocket.c
index fdd46b6290e9..997b7f5b88e2 100644
--- a/sys/fs/nfsserver/nfs_nfsdsocket.c
+++ b/sys/fs/nfsserver/nfs_nfsdsocket.c
@@ -722,6 +722,9 @@ nfsrvd_compound(struct nfsrv_descript *nd, int isdgram, u_char *tag,
 
 	p = curthread;
 
+	/* Check for and optionally clear the no space flags for DSs. */
+	nfsrv_checknospc();
+
 	NFSVNO_EXINIT(&vpnes);
 	NFSVNO_EXINIT(&savevpnes);
 	/*
diff --git a/sys/fs/nfsserver/nfs_nfsdstate.c b/sys/fs/nfsserver/nfs_nfsdstate.c
index 797b9b0a466e..e6a919093738 100644
--- a/sys/fs/nfsserver/nfs_nfsdstate.c
+++ b/sys/fs/nfsserver/nfs_nfsdstate.c
@@ -6817,9 +6817,14 @@ nfsrv_layoutget(struct nfsrv_descript *nd, vnode_t vp, struct nfsexstuff *exp,
 			NFSD_DEBUG(1, "ret layout too small\n");
 			return (NFSERR_TOOSMALL);
 		}
-		if (*iomode == NFSLAYOUTIOMODE_RW)
+		if (*iomode == NFSLAYOUTIOMODE_RW) {
+			if ((lyp->lay_flags & NFSLAY_NOSPC) != 0) {
+				NFSUNLOCKLAYOUT(lhyp);
+				NFSD_DEBUG(1, "ret layout nospace\n");
+				return (NFSERR_NOSPC);
+			}
 			lyp->lay_flags |= NFSLAY_RW;
-		else
+		} else
 			lyp->lay_flags |= NFSLAY_READ;
 		NFSBCOPY(lyp->lay_xdr, layp, lyp->lay_layoutlen);
 		*layoutlenp = lyp->lay_layoutlen;
@@ -6892,6 +6897,7 @@ nfsrv_filelayout(struct nfsrv_descript *nd, int iomode, fhandle_t *fhp,
 	NFSBCOPY(fhp, &lyp->lay_fh, sizeof(*fhp));
 	lyp->lay_clientid.qval = nd->nd_clientid.qval;
 	lyp->lay_fsid = fs;
+	NFSBCOPY(devid, lyp->lay_deviceid, NFSX_V4DEVICEID);
 
 	/* Fill in the xdr for the files layout. */
 	tl = (uint32_t *)lyp->lay_xdr;
@@ -6941,6 +6947,7 @@ nfsrv_flexlayout(struct nfsrv_descript *nd, int iomode, int mirrorcnt,
 	lyp->lay_clientid.qval = nd->nd_clientid.qval;
 	lyp->lay_fsid = fs;
 	lyp->lay_mirrorcnt = mirrorcnt;
+	NFSBCOPY(devid, lyp->lay_deviceid, NFSX_V4DEVICEID);
 
 	/* Fill in the xdr for the files layout. */
 	tl = (uint32_t *)lyp->lay_xdr;
@@ -7015,6 +7022,10 @@ nfsrv_flexlayouterr(struct nfsrv_descript *nd, uint32_t *layp, int maxcnt,
 			if (stat != NFSERR_ACCES && stat != NFSERR_STALE &&
 			    stat != NFSERR_NOSPC)
 				nfsrv_delds(devid, p);
+
+			/* For NFSERR_NOSPC, mark all devids and layouts. */
+			if (stat == NFSERR_NOSPC)
+				nfsrv_marknospc(devid, true);
 		}
 	}
 }
@@ -8773,3 +8784,41 @@ nfsrv_findmirroredds(struct nfsmount *nmp)
 	}
 	return (fndds);
 }
+
+/*
+ * Mark the appropriate devid and all associated layout as "out of space".
+ */
+void
+nfsrv_marknospc(char *devid, bool setit)
+{
+	struct nfsdevice *ds;
+	struct nfslayout *lyp;
+	struct nfslayouthash *lhyp;
+	int i;
+
+	NFSDDSLOCK();
+	TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
+		if (NFSBCMP(ds->nfsdev_deviceid, devid, NFSX_V4DEVICEID) == 0) {
+			NFSD_DEBUG(1, "nfsrv_marknospc: devid %d\n", setit);
+			ds->nfsdev_nospc = setit;
+		}
+	}
+	NFSDDSUNLOCK();
+
+	for (i = 0; i < nfsrv_layouthashsize; i++) {
+		lhyp = &nfslayouthash[i];
+		NFSLOCKLAYOUT(lhyp);
+		TAILQ_FOREACH(lyp, &lhyp->list, lay_list) {
+			if (NFSBCMP(lyp->lay_deviceid, devid,
+			    NFSX_V4DEVICEID) == 0) {
+				NFSD_DEBUG(1, "nfsrv_marknospc: layout %d\n",
+				    setit);
+				if (setit)
+					lyp->lay_flags |= NFSLAY_NOSPC;
+				else
+					lyp->lay_flags &= ~NFSLAY_NOSPC;
+			}
+		}
+		NFSUNLOCKLAYOUT(lhyp);
+	}
+}