svn commit: r335870 - in head/sys/fs: nfs nfsserver
Rick Macklem
rmacklem at FreeBSD.org
Mon Jul 2 19:21:36 UTC 2018
Author: rmacklem
Date: Mon Jul 2 19:21:33 2018
New Revision: 335870
URL: https://svnweb.freebsd.org/changeset/base/335870
Log:
Add an optional feature to the pNFS server.
Without this patch, the pNFS server distributes the data storage files across
all of the specified DSs.
A tester noted that it would be nice if a system administrator could control
which DSs are used to store the file data for a given exported MDS file system.
This patch adds the kernel support to do this. It also makes a slight semantic
change to nfsv4_findmirror(), since some uses of it no longer require that
the DS being searched for have a current mirror.
A patch that will be committed in a few minutes will modify the nfsd daemon
to support this feature.
The patch should only affect sites using the pNFS server (specified via the
"-p" command line option for nfsd.
Suggested by: james.rose at framestore.com
Modified:
head/sys/fs/nfs/nfs.h
head/sys/fs/nfs/nfs_commonsubs.c
head/sys/fs/nfs/nfsrvstate.h
head/sys/fs/nfsserver/nfs_nfsdport.c
head/sys/fs/nfsserver/nfs_nfsdstate.c
Modified: head/sys/fs/nfs/nfs.h
==============================================================================
--- head/sys/fs/nfs/nfs.h Mon Jul 2 18:23:43 2018 (r335869)
+++ head/sys/fs/nfs/nfs.h Mon Jul 2 19:21:33 2018 (r335870)
@@ -185,6 +185,8 @@ struct nfsd_nfsd_args {
int dnshostlen; /* Length of DNS names */
char *dspath; /* DS Mount path on MDS */
int dspathlen; /* Length of DS Mount path on MDS */
+ char *mdspath; /* MDS mount for DS path on MDS */
+ int mdspathlen; /* Length of MDS mount for DS path on MDS */
int mirrorcnt; /* Number of mirrors to create on DSs */
};
Modified: head/sys/fs/nfs/nfs_commonsubs.c
==============================================================================
--- head/sys/fs/nfs/nfs_commonsubs.c Mon Jul 2 18:23:43 2018 (r335869)
+++ head/sys/fs/nfs/nfs_commonsubs.c Mon Jul 2 19:21:33 2018 (r335870)
@@ -4692,37 +4692,26 @@ nfsv4_freeslot(struct nfsclsession *sep, int slot)
}
/*
- * Search for a matching pnfsd mirror device structure, base on the nmp arg.
+ * Search for a matching pnfsd DS, based on the nmp arg.
* Return one if found, NULL otherwise.
*/
struct nfsdevice *
nfsv4_findmirror(struct nfsmount *nmp)
{
- struct nfsdevice *ds, *fndds;
- int fndmirror;
+ struct nfsdevice *ds;
mtx_assert(NFSDDSMUTEXPTR, MA_OWNED);
/*
* Search the DS server list for a match with nmp.
- * Remove the DS entry if found and there is a mirror.
*/
- fndds = NULL;
- fndmirror = 0;
if (nfsrv_devidcnt == 0)
- return (fndds);
+ return (NULL);
TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
if (ds->nfsdev_nmp == nmp) {
- NFSCL_DEBUG(4, "fnd main ds\n");
- fndds = ds;
- } else if (ds->nfsdev_nmp != NULL)
- fndmirror = 1;
- if (fndds != NULL && fndmirror != 0)
+ NFSCL_DEBUG(4, "nfsv4_findmirror: fnd main ds\n");
break;
+ }
}
- if (fndmirror == 0) {
- NFSCL_DEBUG(4, "no mirror for DS\n");
- return (NULL);
- }
- return (fndds);
+ return (ds);
}
Modified: head/sys/fs/nfs/nfsrvstate.h
==============================================================================
--- head/sys/fs/nfs/nfsrvstate.h Mon Jul 2 18:23:43 2018 (r335869)
+++ head/sys/fs/nfs/nfsrvstate.h Mon Jul 2 19:21:33 2018 (r335870)
@@ -345,9 +345,11 @@ struct nfsdevice {
uint16_t nfsdev_hostnamelen;
uint16_t nfsdev_fileaddrlen;
uint16_t nfsdev_flexaddrlen;
+ uint16_t nfsdev_mdsisset;
char *nfsdev_fileaddr;
char *nfsdev_flexaddr;
char *nfsdev_host;
+ fsid_t nfsdev_mdsfsid;
uint32_t nfsdev_nextdir;
vnode_t nfsdev_dsdir[0];
};
Modified: head/sys/fs/nfsserver/nfs_nfsdport.c
==============================================================================
--- head/sys/fs/nfsserver/nfs_nfsdport.c Mon Jul 2 18:23:43 2018 (r335869)
+++ head/sys/fs/nfsserver/nfs_nfsdport.c Mon Jul 2 19:21:33 2018 (r335870)
@@ -3355,6 +3355,10 @@ nfssvc_nfsd(struct thread *td, struct nfssvc_args *uap
nfsdarg.addrlen = 0;
nfsdarg.dnshost = NULL;
nfsdarg.dnshostlen = 0;
+ nfsdarg.dspath = NULL;
+ nfsdarg.dspathlen = 0;
+ nfsdarg.mdspath = NULL;
+ nfsdarg.mdspathlen = 0;
nfsdarg.mirrorcnt = 1;
}
} else
@@ -3364,14 +3368,15 @@ nfssvc_nfsd(struct thread *td, struct nfssvc_args *uap
if (nfsdarg.addrlen > 0 && nfsdarg.addrlen < 10000 &&
nfsdarg.dnshostlen > 0 && nfsdarg.dnshostlen < 10000 &&
nfsdarg.dspathlen > 0 && nfsdarg.dspathlen < 10000 &&
+ nfsdarg.mdspathlen > 0 && nfsdarg.mdspathlen < 10000 &&
nfsdarg.mirrorcnt >= 1 &&
nfsdarg.mirrorcnt <= NFSDEV_MAXMIRRORS &&
nfsdarg.addr != NULL && nfsdarg.dnshost != NULL &&
- nfsdarg.dspath != NULL) {
+ nfsdarg.dspath != NULL && nfsdarg.mdspath != NULL) {
NFSD_DEBUG(1, "addrlen=%d dspathlen=%d dnslen=%d"
- " mirrorcnt=%d\n", nfsdarg.addrlen,
+ " mdspathlen=%d mirrorcnt=%d\n", nfsdarg.addrlen,
nfsdarg.dspathlen, nfsdarg.dnshostlen,
- nfsdarg.mirrorcnt);
+ nfsdarg.mdspathlen, nfsdarg.mirrorcnt);
cp = malloc(nfsdarg.addrlen + 1, M_TEMP, M_WAITOK);
error = copyin(nfsdarg.addr, cp, nfsdarg.addrlen);
if (error != 0) {
@@ -3399,6 +3404,17 @@ nfssvc_nfsd(struct thread *td, struct nfssvc_args *uap
}
cp[nfsdarg.dspathlen] = '\0'; /* Ensure nul term. */
nfsdarg.dspath = cp;
+ cp = malloc(nfsdarg.mdspathlen + 1, M_TEMP, M_WAITOK);
+ error = copyin(nfsdarg.mdspath, cp, nfsdarg.mdspathlen);
+ if (error != 0) {
+ free(nfsdarg.addr, M_TEMP);
+ free(nfsdarg.dnshost, M_TEMP);
+ free(nfsdarg.dspath, M_TEMP);
+ free(cp, M_TEMP);
+ goto out;
+ }
+ cp[nfsdarg.mdspathlen] = '\0'; /* Ensure nul term. */
+ nfsdarg.mdspath = cp;
} else {
nfsdarg.addr = NULL;
nfsdarg.addrlen = 0;
@@ -3406,12 +3422,15 @@ nfssvc_nfsd(struct thread *td, struct nfssvc_args *uap
nfsdarg.dnshostlen = 0;
nfsdarg.dspath = NULL;
nfsdarg.dspathlen = 0;
+ nfsdarg.mdspath = NULL;
+ nfsdarg.mdspathlen = 0;
nfsdarg.mirrorcnt = 1;
}
error = nfsrvd_nfsd(td, &nfsdarg);
free(nfsdarg.addr, M_TEMP);
free(nfsdarg.dnshost, M_TEMP);
free(nfsdarg.dspath, M_TEMP);
+ free(nfsdarg.mdspath, M_TEMP);
} else if (uap->flag & NFSSVC_PNFSDS) {
error = copyin(uap->argp, &pnfsdarg, sizeof(pnfsdarg));
if (error == 0 && pnfsdarg.op == PNFSDOP_DELDSSERVER) {
@@ -3846,9 +3865,12 @@ nfsrv_pnfscreate(struct vnode *vp, struct vattr *vap,
/* Get a DS server directory in a round-robin order. */
mirrorcnt = 1;
+ mp = vp->v_mount;
NFSDDSLOCK();
TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
- if (ds->nfsdev_nmp != NULL)
+ if (ds->nfsdev_nmp != NULL && (ds->nfsdev_mdsisset == 0 ||
+ (mp->mnt_stat.f_fsid.val[0] == ds->nfsdev_mdsfsid.val[0] &&
+ mp->mnt_stat.f_fsid.val[1] == ds->nfsdev_mdsfsid.val[1])))
break;
}
if (ds == NULL) {
@@ -3862,7 +3884,12 @@ nfsrv_pnfscreate(struct vnode *vp, struct vattr *vap,
mds = TAILQ_NEXT(ds, nfsdev_list);
if (nfsrv_maxpnfsmirror > 1 && mds != NULL) {
TAILQ_FOREACH_FROM(mds, &nfsrv_devidhead, nfsdev_list) {
- if (mds->nfsdev_nmp != NULL) {
+ if (mds->nfsdev_nmp != NULL &&
+ (mds->nfsdev_mdsisset == 0 ||
+ (mp->mnt_stat.f_fsid.val[0] ==
+ mds->nfsdev_mdsfsid.val[0] &&
+ mp->mnt_stat.f_fsid.val[1] ==
+ mds->nfsdev_mdsfsid.val[1]))) {
dsdir[mirrorcnt] = i;
dvp[mirrorcnt] = mds->nfsdev_dsdir[i];
mirrorcnt++;
@@ -4464,6 +4491,7 @@ nfsrv_dsgetsockmnt(struct vnode *vp, int lktype, char
struct nfsmount *curnmp, int *ippos, int *dsdirp)
{
struct vnode *dvp, *nvp, **tdvpp;
+ struct mount *mp;
struct nfsmount *nmp, *newnmp;
struct sockaddr *sad;
struct sockaddr_in *sin;
@@ -4485,6 +4513,7 @@ nfsrv_dsgetsockmnt(struct vnode *vp, int lktype, char
newnmp = *newnmpp;
else
newnmp = NULL;
+ mp = vp->v_mount;
error = vn_extattr_get(vp, IO_NODELOCKED, EXTATTR_NAMESPACE_SYSTEM,
"pnfsd.dsfile", buflenp, buf, p);
mirrorcnt = *buflenp / sizeof(*pf);
@@ -4545,7 +4574,13 @@ nfsrv_dsgetsockmnt(struct vnode *vp, int lktype, char
fndds = ds;
else if (newnmpp != NULL &&
newnmp == NULL &&
- (*newnmpp == NULL || fndds == NULL))
+ (*newnmpp == NULL ||
+ fndds == NULL) &&
+ (ds->nfsdev_mdsisset == 0 ||
+ (ds->nfsdev_mdsfsid.val[0] ==
+ mp->mnt_stat.f_fsid.val[0] &&
+ ds->nfsdev_mdsfsid.val[1] ==
+ mp->mnt_stat.f_fsid.val[1])))
/*
* Return a destination for the
* copy in newnmpp. Choose the
Modified: head/sys/fs/nfsserver/nfs_nfsdstate.c
==============================================================================
--- head/sys/fs/nfsserver/nfs_nfsdstate.c Mon Jul 2 18:23:43 2018 (r335869)
+++ head/sys/fs/nfsserver/nfs_nfsdstate.c Mon Jul 2 19:21:33 2018 (r335870)
@@ -210,7 +210,7 @@ static void nfsrv_freelayouts(nfsquad_t *clid, fsid_t
int iomode);
static void nfsrv_freealllayouts(void);
static void nfsrv_freedevid(struct nfsdevice *ds);
-static int nfsrv_setdsserver(char *dspathp, NFSPROC_T *p,
+static int nfsrv_setdsserver(char *dspathp, char *mdspathp, NFSPROC_T *p,
struct nfsdevice **dsp);
static int nfsrv_delds(char *devid, NFSPROC_T *p);
static void nfsrv_deleteds(struct nfsdevice *fndds);
@@ -232,6 +232,7 @@ static int nfsrv_dontlayout(fhandle_t *fhp);
static int nfsrv_createdsfile(vnode_t vp, fhandle_t *fhp, struct pnfsdsfile *pf,
vnode_t dvp, struct nfsdevice *ds, struct ucred *cred, NFSPROC_T *p,
vnode_t *tvpp);
+static struct nfsdevice *nfsrv_findmirroredds(struct nfsmount *nmp);
/*
* Scan the client list for a match and either return the current one,
@@ -7369,10 +7370,12 @@ nfsrv_freealllayouts(void)
* Look up the mount path for the DS server.
*/
static int
-nfsrv_setdsserver(char *dspathp, NFSPROC_T *p, struct nfsdevice **dsp)
+nfsrv_setdsserver(char *dspathp, char *mdspathp, NFSPROC_T *p,
+ struct nfsdevice **dsp)
{
struct nameidata nd;
struct nfsdevice *ds;
+ struct mount *mp;
int error, i;
char *dsdirpath;
size_t dsdirsize;
@@ -7400,6 +7403,9 @@ nfsrv_setdsserver(char *dspathp, NFSPROC_T *p, struct
* Allocate a DS server structure with the NFS mounted directory
* vnode reference counted, so that a non-forced dismount will
* fail with EBUSY.
+ * This structure is always linked into the list, even if an error
+ * is being returned. The caller will free the entire list upon
+ * an error return.
*/
*dsp = ds = malloc(sizeof(*ds) + nfsrv_dsdirsize * sizeof(vnode_t),
M_NFSDSTATE, M_WAITOK | M_ZERO);
@@ -7435,6 +7441,36 @@ nfsrv_setdsserver(char *dspathp, NFSPROC_T *p, struct
}
free(dsdirpath, M_TEMP);
+ if (strlen(mdspathp) > 0) {
+ /*
+ * This DS stores file for a specific MDS exported file
+ * system.
+ */
+ NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF,
+ UIO_SYSSPACE, mdspathp, p);
+ error = namei(&nd);
+ NFSD_DEBUG(4, "mds lookup=%d\n", error);
+ if (error != 0)
+ goto out;
+ if (nd.ni_vp->v_type != VDIR) {
+ vput(nd.ni_vp);
+ error = ENOTDIR;
+ NFSD_DEBUG(4, "mdspath not dir\n");
+ goto out;
+ }
+ mp = nd.ni_vp->v_mount;
+ if ((mp->mnt_flag & MNT_EXPORTED) == 0) {
+ vput(nd.ni_vp);
+ error = ENXIO;
+ NFSD_DEBUG(4, "mdspath not an exported fs\n");
+ goto out;
+ }
+ ds->nfsdev_mdsfsid = mp->mnt_stat.f_fsid;
+ ds->nfsdev_mdsisset = 1;
+ vput(nd.ni_vp);
+ }
+
+out:
TAILQ_INSERT_TAIL(&nfsrv_devidhead, ds, nfsdev_list);
atomic_add_int(&nfsrv_devidcnt, 1);
return (error);
@@ -7514,11 +7550,7 @@ nfsrv_deldsnmp(struct nfsmount *nmp, NFSPROC_T *p)
NFSD_DEBUG(4, "deldsdvp\n");
NFSDDSLOCK();
- if (nfsrv_faildscnt <= 0) {
- NFSDDSUNLOCK();
- return (NULL);
- }
- fndds = nfsv4_findmirror(nmp);
+ fndds = nfsrv_findmirroredds(nmp);
if (fndds != NULL)
nfsrv_deleteds(fndds);
NFSDDSUNLOCK();
@@ -7551,21 +7583,35 @@ nfsrv_delds(char *devid, NFSPROC_T *p)
nmp = NULL;
fndmirror = 0;
NFSDDSLOCK();
- if (nfsrv_faildscnt <= 0) {
- NFSDDSUNLOCK();
- return (ENXIO);
- }
TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
if (NFSBCMP(ds->nfsdev_deviceid, devid, NFSX_V4DEVICEID) == 0 &&
ds->nfsdev_nmp != NULL) {
NFSD_DEBUG(4, "fnd main ds\n");
fndds = ds;
- } else if (ds->nfsdev_nmp != NULL)
- fndmirror = 1;
- if (fndds != NULL && fndmirror != 0)
break;
+ }
}
- if (fndds != NULL && fndmirror != 0) {
+ if (fndds == NULL) {
+ NFSDDSUNLOCK();
+ return (ENXIO);
+ }
+ if (fndds->nfsdev_mdsisset == 0 && nfsrv_faildscnt > 0)
+ fndmirror = 1;
+ else {
+ /* For the fsid is set case, search for a mirror. */
+ TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
+ if (ds != fndds && ds->nfsdev_nmp != NULL &&
+ ds->nfsdev_mdsisset != 0 &&
+ ds->nfsdev_mdsfsid.val[0] ==
+ fndds->nfsdev_mdsfsid.val[0] &&
+ ds->nfsdev_mdsfsid.val[1] ==
+ fndds->nfsdev_mdsfsid.val[1]) {
+ fndmirror = 1;
+ break;
+ }
+ }
+ }
+ if (fndmirror != 0) {
nmp = fndds->nfsdev_nmp;
NFSLOCKMNT(nmp);
if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM |
@@ -7579,7 +7625,7 @@ nfsrv_delds(char *devid, NFSPROC_T *p)
}
}
NFSDDSUNLOCK();
- if (fndds != NULL && nmp != NULL) {
+ if (nmp != NULL) {
nfsrv_flexmirrordel(fndds->nfsdev_deviceid, p);
printf("pNFS server: mirror %s failed\n", fndds->nfsdev_host);
nfsrv_killrpcs(nmp);
@@ -7601,7 +7647,8 @@ nfsrv_deleteds(struct nfsdevice *fndds)
NFSD_DEBUG(4, "deleteds: deleting a mirror\n");
fndds->nfsdev_nmp = NULL;
- nfsrv_faildscnt--;
+ if (fndds->nfsdev_mdsisset == 0)
+ nfsrv_faildscnt--;
}
/*
@@ -7687,24 +7734,27 @@ int
nfsrv_createdevids(struct nfsd_nfsd_args *args, NFSPROC_T *p)
{
struct nfsdevice *ds;
- char *addrp, *dnshostp, *dspathp;
+ char *addrp, *dnshostp, *dspathp, *mdspathp;
int error, i;
addrp = args->addr;
dnshostp = args->dnshost;
dspathp = args->dspath;
+ mdspathp = args->mdspath;
nfsrv_maxpnfsmirror = args->mirrorcnt;
- if (addrp == NULL || dnshostp == NULL || dspathp == NULL)
+ if (addrp == NULL || dnshostp == NULL || dspathp == NULL ||
+ mdspathp == NULL)
return (0);
/*
* Loop around for each nul-terminated string in args->addr,
- * args->dnshost and args->dnspath.
+ * args->dnshost, args->dnspath and args->mdspath.
*/
while (addrp < (args->addr + args->addrlen) &&
dnshostp < (args->dnshost + args->dnshostlen) &&
- dspathp < (args->dspath + args->dspathlen)) {
- error = nfsrv_setdsserver(dspathp, p, &ds);
+ dspathp < (args->dspath + args->dspathlen) &&
+ mdspathp < (args->mdspath + args->mdspathlen)) {
+ error = nfsrv_setdsserver(dspathp, mdspathp, p, &ds);
if (error != 0) {
/* Free all DS servers. */
nfsrv_freealldevids();
@@ -7715,6 +7765,7 @@ nfsrv_createdevids(struct nfsd_nfsd_args *args, NFSPRO
addrp += (strlen(addrp) + 1);
dnshostp += (strlen(dnshostp) + 1);
dspathp += (strlen(dspathp) + 1);
+ mdspathp += (strlen(mdspathp) + 1);
}
if (nfsrv_devidcnt < nfsrv_maxpnfsmirror) {
/* Free all DS servers. */
@@ -8299,9 +8350,15 @@ nfsrv_mdscopymr(char *mdspathp, char *dspathp, char *c
}
nmp = VFSTONFS(nd.ni_vp->v_mount);
- /* Search the nfsdev list for a match. */
+ /*
+ * Search the nfsdevice list for a match. If curnmp == NULL,
+ * this is a recovery and there must be a mirror.
+ */
NFSDDSLOCK();
- *dsp = nfsv4_findmirror(nmp);
+ if (curnmp == NULL)
+ *dsp = nfsrv_findmirroredds(nmp);
+ else
+ *dsp = nfsv4_findmirror(nmp);
NFSDDSUNLOCK();
if (*dsp == NULL) {
vput(nd.ni_vp);
@@ -8331,7 +8388,7 @@ nfsrv_mdscopymr(char *mdspathp, char *dspathp, char *c
if (error == 0 && nmp != NULL) {
/* Search the nfsdev list for a match. */
NFSDDSLOCK();
- *dsp = nfsv4_findmirror(nmp);
+ *dsp = nfsrv_findmirroredds(nmp);
NFSDDSUNLOCK();
}
if (error == 0 && (nmp == NULL || *dsp == NULL)) {
@@ -8374,5 +8431,56 @@ nfsrv_mdscopymr(char *mdspathp, char *dspathp, char *c
} else
vput(vp);
return (error);
+}
+
+/*
+ * Search for a matching pnfsd mirror device structure, base on the nmp arg.
+ * Return one if found, NULL otherwise.
+ */
+static struct nfsdevice *
+nfsrv_findmirroredds(struct nfsmount *nmp)
+{
+ struct nfsdevice *ds, *fndds;
+ int fndmirror;
+
+ mtx_assert(NFSDDSMUTEXPTR, MA_OWNED);
+ /*
+ * Search the DS server list for a match with nmp.
+ * Remove the DS entry if found and there is a mirror.
+ */
+ fndds = NULL;
+ fndmirror = 0;
+ if (nfsrv_devidcnt == 0)
+ return (fndds);
+ TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
+ if (ds->nfsdev_nmp == nmp) {
+ NFSD_DEBUG(4, "nfsrv_findmirroredds: fnd main ds\n");
+ fndds = ds;
+ break;
+ }
+ }
+ if (fndds == NULL)
+ return (fndds);
+ if (fndds->nfsdev_mdsisset == 0 && nfsrv_faildscnt > 0)
+ fndmirror = 1;
+ else {
+ /* For the fsid is set case, search for a mirror. */
+ TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
+ if (ds != fndds && ds->nfsdev_nmp != NULL &&
+ ds->nfsdev_mdsisset != 0 &&
+ ds->nfsdev_mdsfsid.val[0] ==
+ fndds->nfsdev_mdsfsid.val[0] &&
+ ds->nfsdev_mdsfsid.val[1] ==
+ fndds->nfsdev_mdsfsid.val[1]) {
+ fndmirror = 1;
+ break;
+ }
+ }
+ }
+ if (fndmirror == 0) {
+ NFSD_DEBUG(4, "nfsrv_findmirroredds: no mirror for DS\n");
+ return (NULL);
+ }
+ return (fndds);
}
More information about the svn-src-head
mailing list