git: 3dfbbe8586bb - main - nfsd: Add per-MDS fs striping configuration

From: Rick Macklem <rmacklem_at_FreeBSD.org>
Date: Fri, 05 Jun 2026 15:01:41 UTC
The branch main has been updated by rmacklem:

URL: https://cgit.FreeBSD.org/src/commit/?id=3dfbbe8586bbd489ffc67941f09f94c2d8152946

commit 3dfbbe8586bbd489ffc67941f09f94c2d8152946
Author:     Rick Macklem <rmacklem@FreeBSD.org>
AuthorDate: 2026-06-05 14:59:42 +0000
Commit:     Rick Macklem <rmacklem@FreeBSD.org>
CommitDate: 2026-06-05 14:59:42 +0000

    nfsd: Add per-MDS fs striping configuration
    
    Commit 72e57bc26417 added support for striping to the pNFS
    server when using the Flexible File layout.
    It is configured globally via the following sysctls:
    vfs.nfsd.pnfsstripeunit - Size (in bytes) of a stripe
    vfs.nfsd.pnfsstripecnt - # of DSs to stripe across
    
    This patch allows the above settings to be overridden
    on a per-MDS exported file system basis.  With this
    patch, a stripeunit can optionally be specified for
    the MDS file system listed when one is listed after
    a '#' in the "-p" nfsd argument.  This is done by
    appending "@NNN", where NNN is the stripeunit, in bytes.
    
    The current syntax implies that neither a '#' nor '@'
    can be in the MDS mount point's directory path.
    
    This patch does not affect current pNFS server
    configurations (of which there appears to be very
    few, anyhow).
---
 sys/fs/nfs/nfsrvstate.h          |  1 +
 sys/fs/nfsserver/nfs_nfsdport.c  | 18 ++++++++++++------
 sys/fs/nfsserver/nfs_nfsdstate.c | 21 ++++++++++++++++++++-
 3 files changed, 33 insertions(+), 7 deletions(-)

diff --git a/sys/fs/nfs/nfsrvstate.h b/sys/fs/nfs/nfsrvstate.h
index 91365f66bde3..350190272f53 100644
--- a/sys/fs/nfs/nfsrvstate.h
+++ b/sys/fs/nfs/nfsrvstate.h
@@ -357,6 +357,7 @@ struct nfsdevice {
 	char			*nfsdev_flexaddr;
 	char			*nfsdev_host;
 	fsid_t			nfsdev_mdsfsid;
+	uint64_t		nfsdev_mdsstripesiz;
 	uint32_t		nfsdev_nextdir;
 	bool			nfsdev_nospc;
 	vnode_t			nfsdev_dsdir[0];
diff --git a/sys/fs/nfsserver/nfs_nfsdport.c b/sys/fs/nfsserver/nfs_nfsdport.c
index 7fc8c426ba64..5927146c78d6 100644
--- a/sys/fs/nfsserver/nfs_nfsdport.c
+++ b/sys/fs/nfsserver/nfs_nfsdport.c
@@ -71,7 +71,7 @@ extern int nfs_bufpackets;
 extern u_long sb_max_adj;
 extern struct nfsv4lock nfsv4rootfs_lock;
 
-static uint64_t nfsrv_stripesiz = 0;
+uint64_t nfsrv_stripesiz = 0;
 static int nfsrv_maxstripecnt = 1;
 
 VNET_DECLARE(int, nfsrv_numnfsd);
@@ -4761,6 +4761,7 @@ nfsrv_pnfscreate(struct vnode *vp, struct vattr *vap, struct ucred *cred,
 	struct ucred *tcred;
 	int *dsdir, error, i, j, mirrorcnt, ret, stripecnt;
 	int failpos, timo;
+	uint64_t stripesiz;
 
 	/* Get a DS server directory in a round-robin order. */
 	mirrorcnt = 1;
@@ -4771,6 +4772,7 @@ nfsrv_pnfscreate(struct vnode *vp, struct vattr *vap, struct ucred *cred,
 	    M_TEMP, M_WAITOK);
 	dsdir = malloc(sizeof(*dsdir) * nfsrv_maxpnfsmirror *
 	    nfsrv_maxstripecnt, M_TEMP, M_WAITOK);
+	stripesiz = nfsrv_stripesiz;
 	NFSDDSLOCK();
 	/*
 	 * Search for the first entry that handles this MDS fs, but use the
@@ -4783,9 +4785,13 @@ nfsrv_pnfscreate(struct vnode *vp, struct vattr *vap, struct ucred *cred,
 				ds = tds;
 			else if (tds->nfsdev_mdsisset != 0 && fsidcmp(
 			    &mp->mnt_stat.f_fsid, &tds->nfsdev_mdsfsid) == 0) {
-				if (j == 0)
+				if (j == 0) {
 					ds = fds = tds;
-				if (nfsrv_stripesiz == 0)
+					if (nfsrv_maxstripecnt > 1)
+						stripesiz =
+						    tds->nfsdev_mdsstripesiz;
+				}
+				if (stripesiz == 0)
 					break;
 				j++;
 			}
@@ -4804,7 +4810,7 @@ nfsrv_pnfscreate(struct vnode *vp, struct vattr *vap, struct ucred *cred,
 	 * of devices devided by the number of mirrors.
 	 */
 	stripecnt = 0;
-	if (nfsrv_stripesiz > 0) {
+	if (stripesiz > 0) {
 		if (j > 0)
 			stripecnt = j / nfsrv_maxpnfsmirror;
 		else
@@ -4886,7 +4892,7 @@ nfsrv_pnfscreate(struct vnode *vp, struct vattr *vap, struct ucred *cred,
 	failpos = -1;
 	for (i = 0; i < j - 1 && error == 0; i++, tpf++, tdsc++) {
 		tpf->dsf_stripecnt = stripecnt;
-		tpf->dsf_stripesiz = nfsrv_stripesiz;
+		tpf->dsf_stripesiz = stripesiz;
 		tpf->dsf_dir = dsdir[i];
 		tdsc->tcred = tcred;
 		tdsc->p = p;
@@ -4918,7 +4924,7 @@ nfsrv_pnfscreate(struct vnode *vp, struct vattr *vap, struct ucred *cred,
 	}
 	if (error == 0) {
 		tpf->dsf_stripecnt = stripecnt;
-		tpf->dsf_stripesiz = nfsrv_stripesiz;
+		tpf->dsf_stripesiz = stripesiz;
 		tpf->dsf_dir = dsdir[j - 1];
 		error = nfsrv_dscreate(dvp[j - 1], vap, &va, &fh, tpf,
 		    &dsattr, NULL, tcred, p, NULL);
diff --git a/sys/fs/nfsserver/nfs_nfsdstate.c b/sys/fs/nfsserver/nfs_nfsdstate.c
index 17b31867fdcf..965fdcc01ebe 100644
--- a/sys/fs/nfsserver/nfs_nfsdstate.c
+++ b/sys/fs/nfsserver/nfs_nfsdstate.c
@@ -63,6 +63,7 @@ extern struct nfsdontlisthead nfsrv_dontlisthead;
 extern volatile int nfsrv_devidcnt;
 extern struct nfslayouthead nfsrv_recalllisthead;
 extern char *nfsrv_zeropnfsdat;
+extern uint64_t nfsrv_stripesiz;
 
 SYSCTL_DECL(_vfs_nfsd);
 int	nfsrv_statehashsize = NFSSTATEHASHSIZE;
@@ -7537,8 +7538,9 @@ nfsrv_setdsserver(char *dspathp, char *mdspathp, NFSPROC_T *p,
 	struct nfsdevice *ds;
 	struct mount *mp;
 	int error, i;
-	char *dsdirpath;
+	char *cp, *dsdirpath, *endcp;
 	size_t dsdirsize;
+	u_quad_t stripesiz;
 
 	NFSD_DEBUG(4, "setdssrv path=%s\n", dspathp);
 	*dsp = NULL;
@@ -7576,6 +7578,7 @@ nfsrv_setdsserver(char *dspathp, char *mdspathp, NFSPROC_T *p,
 	    M_NFSDSTATE, M_WAITOK | M_ZERO);
 	ds->nfsdev_dvp = nd.ni_vp;
 	ds->nfsdev_nmp = VFSTONFS(nd.ni_vp->v_mount);
+	ds->nfsdev_mdsstripesiz = nfsrv_stripesiz;
 	NFSVOPUNLOCK(nd.ni_vp);
 
 	dsdirsize = strlen(dspathp) + 16;
@@ -7608,6 +7611,9 @@ nfsrv_setdsserver(char *dspathp, char *mdspathp, NFSPROC_T *p,
 	free(dsdirpath, M_TEMP);
 
 	if (strlen(mdspathp) > 0) {
+		cp = strchr(mdspathp, '@');
+		if (cp != NULL)
+			*cp = '\0';
 		/*
 		 * This DS stores file for a specific MDS exported file
 		 * system.
@@ -7635,6 +7641,19 @@ nfsrv_setdsserver(char *dspathp, char *mdspathp, NFSPROC_T *p,
 		ds->nfsdev_mdsfsid = mp->mnt_stat.f_fsid;
 		ds->nfsdev_mdsisset = 1;
 		vput(nd.ni_vp);
+		if (cp != NULL) {
+			/* There is a stripesiz specified. */
+			endcp = NULL;
+			if (*(cp + 1) != '\0')
+				stripesiz = strtouq(cp + 1, &endcp, 10);
+			if (endcp == NULL || *endcp != '\0') {
+				error = ENXIO;
+				NFSD_DEBUG(4, "mds stripesiz invalid\n");
+				goto out;
+			}
+			ds->nfsdev_mdsstripesiz = stripesiz;
+			*cp = '@';
+		}
 	}
 
 out: