svn commit: r324336 - in head/sys/fs: nfs nfsclient

Rick Macklem rmacklem at FreeBSD.org
Thu Oct 5 20:10:41 UTC 2017


Author: rmacklem
Date: Thu Oct  5 20:10:40 2017
New Revision: 324336
URL: https://svnweb.freebsd.org/changeset/base/324336

Log:
  Add Flex File Layout support to the NFSv4.1 pNFS client.
  
  This patch adds support for the Flexible File Layout to the pNFS client.
  Although the patch is rather large, it should only affect NFS mounts
  using the "pnfs" option against pNFS servers that do not support File
  Layout.
  There are still a couple of things missing from the Flexible File Layout
  client implementation:
  - The code does not yet do a LayoutReturn with I/O error stats when
    I/O error(s) occur when attempting to do I/O on a DS.
    This will be fixed in a future commit, since it is important for the
    MDS to know that I/O on a DS is failing.
  - The current code does writes and commits to mirror DSs serially.
    Making them happen concurrently will be done in a future commit,
    after discussion on freebsd-current@ on the best way to do this.
  - The code does not handle NFSv4.0 DSs. Since there is no extant pNFS
    server that implements NFSv4.0 DSs and NFSv4.1 DSs makes more sense
    now, I don't intend to implement this until there is a need for it.
    There is support for NFSv4.1 and NFSv3 DSs.

Modified:
  head/sys/fs/nfs/nfs_var.h
  head/sys/fs/nfsclient/nfs_clrpcops.c
  head/sys/fs/nfsclient/nfs_clstate.c

Modified: head/sys/fs/nfs/nfs_var.h
==============================================================================
--- head/sys/fs/nfs/nfs_var.h	Thu Oct  5 19:18:02 2017	(r324335)
+++ head/sys/fs/nfs/nfs_var.h	Thu Oct  5 20:10:40 2017	(r324336)
@@ -555,7 +555,7 @@ int nfscl_tryclose(struct nfsclopen *, struct ucred *,
     struct nfsmount *, NFSPROC_T *);
 void nfscl_cleanup(NFSPROC_T *);
 int nfscl_layout(struct nfsmount *, vnode_t, u_int8_t *, int, nfsv4stateid_t *,
-    int, struct nfsclflayouthead *, struct nfscllayout **, struct ucred *,
+    int, int, struct nfsclflayouthead *, struct nfscllayout **, struct ucred *,
     NFSPROC_T *);
 struct nfscllayout *nfscl_getlayout(struct nfsclclient *, uint8_t *, int,
     uint64_t, struct nfsclflayout **, int *);

Modified: head/sys/fs/nfsclient/nfs_clrpcops.c
==============================================================================
--- head/sys/fs/nfsclient/nfs_clrpcops.c	Thu Oct  5 19:18:02 2017	(r324335)
+++ head/sys/fs/nfsclient/nfs_clrpcops.c	Thu Oct  5 20:10:40 2017	(r324336)
@@ -116,18 +116,28 @@ static int nfscl_doflayoutio(vnode_t, struct uio *, in
     nfsv4stateid_t *, int, struct nfscldevinfo *, struct nfscllayout *,
     struct nfsclflayout *, uint64_t, uint64_t, int, struct ucred *,
     NFSPROC_T *);
+static int nfscl_dofflayoutio(vnode_t, struct uio *, int *, int *, int *,
+    nfsv4stateid_t *, int, struct nfscldevinfo *, struct nfscllayout *,
+    struct nfsclflayout *, uint64_t, uint64_t, int, int, struct mbuf *,
+    struct ucred *, NFSPROC_T *);
+static struct mbuf *nfsm_copym(struct mbuf *, int, int);
 static int nfsrpc_readds(vnode_t, struct uio *, nfsv4stateid_t *, int *,
-    struct nfsclds *, uint64_t, int, struct nfsfh *, struct ucred *,
-    NFSPROC_T *);
+    struct nfsclds *, uint64_t, int, struct nfsfh *, int, int, int,
+    struct ucred *, NFSPROC_T *);
 static int nfsrpc_writeds(vnode_t, struct uio *, int *, int *,
     nfsv4stateid_t *, struct nfsclds *, uint64_t, int,
-    struct nfsfh *, int, struct ucred *, NFSPROC_T *);
+    struct nfsfh *, int, int, int, int, struct ucred *, NFSPROC_T *);
+static int nfsrpc_writedsmir(vnode_t, int *, int *, nfsv4stateid_t *,
+    struct nfsclds *, uint64_t, int, struct nfsfh *, struct mbuf *, int, int,
+    struct ucred *, NFSPROC_T *);
 static enum nfsclds_state nfscl_getsameserver(struct nfsmount *,
     struct nfsclds *, struct nfsclds **);
 static int nfsrpc_commitds(vnode_t, uint64_t, int, struct nfsclds *,
-    struct nfsfh *, struct ucred *, NFSPROC_T *);
+    struct nfsfh *, int, int, struct ucred *, NFSPROC_T *);
 static void nfsrv_setuplayoutget(struct nfsrv_descript *, int, uint64_t,
-    uint64_t, uint64_t, nfsv4stateid_t *, int, int);
+    uint64_t, uint64_t, nfsv4stateid_t *, int, int, int);
+static int nfsrv_parseug(struct nfsrv_descript *, int, uid_t *, gid_t *,
+    NFSPROC_T *);
 static int nfsrv_parselayoutget(struct nfsrv_descript *, nfsv4stateid_t *,
     int *, struct nfsclflayouthead *);
 static int nfsrpc_getopenlayout(struct nfsmount *, vnode_t, u_int8_t *,
@@ -139,19 +149,19 @@ static int nfsrpc_getcreatelayout(vnode_t, char *, int
     struct nfsfh **, int *, int *, void *, int *);
 static int nfsrpc_openlayoutrpc(struct nfsmount *, vnode_t, u_int8_t *,
     int, uint8_t *, int, uint32_t, struct nfsclopen *, uint8_t *, int,
-    struct nfscldeleg **, nfsv4stateid_t *, int, int, int *,
+    struct nfscldeleg **, nfsv4stateid_t *, int, int, int, int *,
     struct nfsclflayouthead *, int *, struct ucred *, NFSPROC_T *);
 static int nfsrpc_createlayout(vnode_t, char *, int, struct vattr *,
     nfsquad_t, int, struct nfsclowner *, struct nfscldeleg **,
     struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *,
     struct nfsfh **, int *, int *, void *, int *, nfsv4stateid_t *,
-    int, int, int *, struct nfsclflayouthead *, int *);
+    int, int, int, int *, struct nfsclflayouthead *, int *);
 static int nfsrpc_layoutget(struct nfsmount *, uint8_t *, int, int, uint64_t,
-    uint64_t, uint64_t, int, nfsv4stateid_t *, int *, struct nfsclflayouthead *,
-    struct ucred *, NFSPROC_T *, void *);
+    uint64_t, uint64_t, int, int, nfsv4stateid_t *, int *,
+    struct nfsclflayouthead *, struct ucred *, NFSPROC_T *, void *);
 static int nfsrpc_layoutgetres(struct nfsmount *, vnode_t, uint8_t *,
     int, nfsv4stateid_t *, int, uint32_t *, struct nfscllayout **,
-    struct nfsclflayouthead *, int, int *, struct ucred *, NFSPROC_T *);
+    struct nfsclflayouthead *, int, int, int *, struct ucred *, NFSPROC_T *);
 
 /*
  * nfs null call from vfs.
@@ -4849,9 +4859,10 @@ nfsrpc_destroyclient(struct nfsmount *nmp, struct nfsc
  */
 static int
 nfsrpc_layoutget(struct nfsmount *nmp, uint8_t *fhp, int fhlen, int iomode,
-    uint64_t offset, uint64_t len, uint64_t minlen, int layoutlen,
-    nfsv4stateid_t *stateidp, int *retonclosep, struct nfsclflayouthead *flhp,
-    struct ucred *cred, NFSPROC_T *p, void *stuff)
+    uint64_t offset, uint64_t len, uint64_t minlen, int layouttype,
+    int layoutlen, nfsv4stateid_t *stateidp, int *retonclosep,
+    struct nfsclflayouthead *flhp, struct ucred *cred, NFSPROC_T *p,
+    void *stuff)
 {
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	int error;
@@ -4859,7 +4870,7 @@ nfsrpc_layoutget(struct nfsmount *nmp, uint8_t *fhp, i
 	nfscl_reqstart(nd, NFSPROC_LAYOUTGET, nmp, fhp, fhlen, NULL, NULL, 0,
 	    0);
 	nfsrv_setuplayoutget(nd, iomode, offset, len, minlen, stateidp,
-	    layoutlen, 0);
+	    layouttype, layoutlen, 0);
 	nd->nd_flag |= ND_USEGSSNAME;
 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
@@ -4882,7 +4893,7 @@ nfsrpc_getdeviceinfo(struct nfsmount *nmp, uint8_t *de
     uint32_t *notifybitsp, struct nfscldevinfo **ndip, struct ucred *cred,
     NFSPROC_T *p)
 {
-	uint32_t cnt, *tl;
+	uint32_t cnt, *tl, vers, minorvers;
 	struct nfsrv_descript nfsd;
 	struct nfsrv_descript *nd = &nfsd;
 	struct sockaddr_in sin, ssin;
@@ -4915,51 +4926,68 @@ nfsrpc_getdeviceinfo(struct nfsmount *nmp, uint8_t *de
 	if (error != 0)
 		return (error);
 	if (nd->nd_repstat == 0) {
-		NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_UNSIGNED);
-		if (layouttype != fxdr_unsigned(int, *tl++))
+		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
+		if (layouttype != fxdr_unsigned(int, *tl))
 			printf("EEK! devinfo layout type not same!\n");
-		stripecnt = fxdr_unsigned(int, *++tl);
-		NFSCL_DEBUG(4, "stripecnt=%d\n", stripecnt);
-		if (stripecnt < 1 || stripecnt > 4096) {
-			printf("NFS devinfo stripecnt %d: out of range\n",
-			    stripecnt);
-			error = NFSERR_BADXDR;
-			goto nfsmout;
-		}
-		NFSM_DISSECT(tl, uint32_t *, (stripecnt + 1) * NFSX_UNSIGNED);
-		addrcnt = fxdr_unsigned(int, *(tl + stripecnt));
-		NFSCL_DEBUG(4, "addrcnt=%d\n", addrcnt);
-		if (addrcnt < 1 || addrcnt > 128) {
-			printf("NFS devinfo addrcnt %d: out of range\n",
-			    addrcnt);
-			error = NFSERR_BADXDR;
-			goto nfsmout;
-		}
-
-		/*
-		 * Now we know how many stripe indices and addresses, so
-		 * we can allocate the structure the correct size.
-		 */
-		i = (stripecnt * sizeof(uint8_t)) / sizeof(struct nfsclds *)
-		    + 1;
-		NFSCL_DEBUG(4, "stripeindices=%d\n", i);
-		ndi = malloc(sizeof(*ndi) + (addrcnt + i) *
-		    sizeof(struct nfsclds *), M_NFSDEVINFO, M_WAITOK | M_ZERO);
-		NFSBCOPY(deviceid, ndi->nfsdi_deviceid, NFSX_V4DEVICEID);
-		ndi->nfsdi_refcnt = 0;
-		ndi->nfsdi_stripecnt = stripecnt;
-		ndi->nfsdi_addrcnt = addrcnt;
-		/* Fill in the stripe indices. */
-		for (i = 0; i < stripecnt; i++) {
-			stripeindex = fxdr_unsigned(uint8_t, *tl++);
-			NFSCL_DEBUG(4, "stripeind=%d\n", stripeindex);
-			if (stripeindex >= addrcnt) {
-				printf("NFS devinfo stripeindex %d: too big\n",
-				    (int)stripeindex);
+		if (layouttype == NFSLAYOUT_NFSV4_1_FILES) {
+			NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
+			stripecnt = fxdr_unsigned(int, *tl);
+			NFSCL_DEBUG(4, "stripecnt=%d\n", stripecnt);
+			if (stripecnt < 1 || stripecnt > 4096) {
+				printf("pNFS File layout devinfo stripecnt %d:"
+				    " out of range\n", stripecnt);
 				error = NFSERR_BADXDR;
 				goto nfsmout;
 			}
-			nfsfldi_setstripeindex(ndi, i, stripeindex);
+			NFSM_DISSECT(tl, uint32_t *, (stripecnt + 1) *
+			    NFSX_UNSIGNED);
+			addrcnt = fxdr_unsigned(int, *(tl + stripecnt));
+			NFSCL_DEBUG(4, "addrcnt=%d\n", addrcnt);
+			if (addrcnt < 1 || addrcnt > 128) {
+				printf("NFS devinfo addrcnt %d: out of range\n",
+				    addrcnt);
+				error = NFSERR_BADXDR;
+				goto nfsmout;
+			}
+	
+			/*
+			 * Now we know how many stripe indices and addresses, so
+			 * we can allocate the structure the correct size.
+			 */
+			i = (stripecnt * sizeof(uint8_t)) /
+			    sizeof(struct nfsclds *) + 1;
+			NFSCL_DEBUG(4, "stripeindices=%d\n", i);
+			ndi = malloc(sizeof(*ndi) + (addrcnt + i) *
+			    sizeof(struct nfsclds *), M_NFSDEVINFO, M_WAITOK |
+			    M_ZERO);
+			NFSBCOPY(deviceid, ndi->nfsdi_deviceid,
+			    NFSX_V4DEVICEID);
+			ndi->nfsdi_refcnt = 0;
+			ndi->nfsdi_flags = NFSDI_FILELAYOUT;
+			ndi->nfsdi_stripecnt = stripecnt;
+			ndi->nfsdi_addrcnt = addrcnt;
+			/* Fill in the stripe indices. */
+			for (i = 0; i < stripecnt; i++) {
+				stripeindex = fxdr_unsigned(uint8_t, *tl++);
+				NFSCL_DEBUG(4, "stripeind=%d\n", stripeindex);
+				if (stripeindex >= addrcnt) {
+					printf("pNFS File Layout devinfo"
+					    " stripeindex %d: too big\n",
+					    (int)stripeindex);
+					error = NFSERR_BADXDR;
+					goto nfsmout;
+				}
+				nfsfldi_setstripeindex(ndi, i, stripeindex);
+			}
+		} else if (layouttype == NFSLAYOUT_FLEXFILE) {
+			/* For Flex File, we only get one address list. */
+			ndi = malloc(sizeof(*ndi) + sizeof(struct nfsclds *),
+			    M_NFSDEVINFO, M_WAITOK | M_ZERO);
+			NFSBCOPY(deviceid, ndi->nfsdi_deviceid,
+			    NFSX_V4DEVICEID);
+			ndi->nfsdi_refcnt = 0;
+			ndi->nfsdi_flags = NFSDI_FLEXFILE;
+			addrcnt = ndi->nfsdi_addrcnt = 1;
 		}
 
 		/* Now, dissect the server address(es). */
@@ -5005,6 +5033,46 @@ nfsrpc_getdeviceinfo(struct nfsmount *nmp, uint8_t *de
 		}
 
 		gotvers = NFS_VER4;	/* Always NFSv4 for File Layout. */
+		/* For Flex File, we will take one of the versions to use. */
+		if (layouttype == NFSLAYOUT_FLEXFILE) {
+			NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
+			j = fxdr_unsigned(int, *tl);
+			if (j < 1 || j > NFSDEV_MAXVERS) {
+				printf("pNFS: too many versions\n");
+				error = NFSERR_BADXDR;
+				goto nfsmout;
+			}
+			gotvers = 0;
+			for (i = 0; i < j; i++) {
+				NFSM_DISSECT(tl, uint32_t *, 5 * NFSX_UNSIGNED);
+				vers = fxdr_unsigned(uint32_t, *tl++);
+				minorvers = fxdr_unsigned(uint32_t, *tl++);
+				if ((vers == NFS_VER4 && minorvers ==
+				    NFSV41_MINORVERSION) || (vers == NFS_VER3 &&
+				    gotvers == 0)) {
+					gotvers = vers;
+					/* We'll take this one. */
+					ndi->nfsdi_versindex = i;
+					ndi->nfsdi_vers = vers;
+					ndi->nfsdi_minorvers = minorvers;
+					ndi->nfsdi_rsize = fxdr_unsigned(
+					    uint32_t, *tl++);
+					ndi->nfsdi_wsize = fxdr_unsigned(
+					    uint32_t, *tl++);
+					if (*tl == newnfs_true)
+						ndi->nfsdi_flags |=
+						    NFSDI_TIGHTCOUPLED;
+					else
+						ndi->nfsdi_flags &=
+						    ~NFSDI_TIGHTCOUPLED;
+				}
+			}
+			if (gotvers == 0) {
+				printf("pNFS: no NFSv3 or NFSv4.1\n");
+				error = NFSERR_BADXDR;
+				goto nfsmout;
+			}
+		}
 
 		/* And the notify bits. */
 		NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
@@ -5033,7 +5101,7 @@ nfsrpc_getdeviceinfo(struct nfsmount *nmp, uint8_t *de
 			*gotdspp = dsp;
 		}
 	}
-	if (nd->nd_repstat != 0)
+	if (nd->nd_repstat != 0 && error == 0)
 		error = nd->nd_repstat;
 nfsmout:
 	if (error != 0 && ndi != NULL)
@@ -5126,7 +5194,15 @@ nfsrpc_layoutreturn(struct nfsmount *nmp, uint8_t *fh,
 		*tl++ = stateidp->other[0];
 		*tl++ = stateidp->other[1];
 		*tl++ = stateidp->other[2];
-		*tl = txdr_unsigned(0);
+		if (layouttype == NFSLAYOUT_NFSV4_1_FILES)
+			*tl = txdr_unsigned(0);
+		else if (layouttype == NFSLAYOUT_FLEXFILE) {
+			*tl = txdr_unsigned(2 * NFSX_UNSIGNED);
+			NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
+			/* No ioerrs or stats yet. */
+			*tl++ = 0;
+			*tl = 0;
+		}
 	}
 	nd->nd_flag |= ND_USEGSSNAME;
 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
@@ -5162,11 +5238,15 @@ nfsrpc_getlayout(struct nfsmount *nmp, vnode_t vp, str
 	struct nfscllayout *lyp;
 	struct nfsclflayout *flp;
 	struct nfsclflayouthead flh;
-	int error = 0, islocked, layoutlen, recalled, retonclose;
+	int error = 0, islocked, layoutlen, layouttype, recalled, retonclose;
 	nfsv4stateid_t stateid;
 	struct nfsclsession *tsep;
 
 	*lypp = NULL;
+	if (NFSHASFLEXFILE(nmp))
+		layouttype = NFSLAYOUT_FLEXFILE;
+	else
+		layouttype = NFSLAYOUT_NFSV4_1_FILES;
 	/*
 	 * If lyp is returned non-NULL, there will be a refcnt (shared lock)
 	 * on it, iff flp != NULL or a lock (exclusive lock) on it iff
@@ -5189,8 +5269,8 @@ nfsrpc_getlayout(struct nfsmount *nmp, vnode_t vp, str
 			stateid.other[2] = stateidp->other[2];
 			error = nfsrpc_layoutget(nmp, nfhp->nfh_fh,
 			    nfhp->nfh_len, iomode, (uint64_t)0, UINT64_MAX,
-			    (uint64_t)0, layoutlen, &stateid, &retonclose,
-			    &flh, cred, p, NULL);
+			    (uint64_t)0, layouttype, layoutlen, &stateid,
+			    &retonclose, &flh, cred, p, NULL);
 		} else {
 			islocked = 1;
 			stateid.seqid = lyp->nfsly_stateid.seqid;
@@ -5199,12 +5279,12 @@ nfsrpc_getlayout(struct nfsmount *nmp, vnode_t vp, str
 			stateid.other[2] = lyp->nfsly_stateid.other[2];
 			error = nfsrpc_layoutget(nmp, nfhp->nfh_fh,
 			    nfhp->nfh_len, iomode, off, UINT64_MAX,
-			    (uint64_t)0, layoutlen, &stateid, &retonclose,
-			    &flh, cred, p, NULL);
+			    (uint64_t)0, layouttype, layoutlen, &stateid,
+			    &retonclose, &flh, cred, p, NULL);
 		}
 		error = nfsrpc_layoutgetres(nmp, vp, nfhp->nfh_fh,
 		    nfhp->nfh_len, &stateid, retonclose, notifybitsp, &lyp,
-		    &flh, error, NULL, cred, p);
+		    &flh, layouttype, error, NULL, cred, p);
 		if (error == 0)
 			*lypp = lyp;
 		else if (islocked != 0)
@@ -5336,38 +5416,53 @@ nfsrpc_fillsa(struct nfsmount *nmp, struct sockaddr_in
 	error = newnfs_connect(nmp, nrp, NULL, p, 0);
 	NFSCL_DEBUG(3, "DS connect=%d\n", error);
 
+	dsp = NULL;
 	/* Now, do the exchangeid and create session. */
 	if (error == 0) {
-		error = nfsrpc_exchangeid(nmp, clp, nrp, NFSV4EXCH_USEPNFSDS,
-		    &dsp, nrp->nr_cred, p);
-		NFSCL_DEBUG(3, "DS exchangeid=%d\n", error);
-		if (error != 0)
-			newnfs_disconnect(nrp);
+		if (vers == NFS_VER4) {
+			error = nfsrpc_exchangeid(nmp, clp, nrp,
+			    NFSV4EXCH_USEPNFSDS, &dsp, nrp->nr_cred, p);
+			NFSCL_DEBUG(3, "DS exchangeid=%d\n", error);
+			if (error != 0)
+				newnfs_disconnect(nrp);
+		} else {
+			dsp = malloc(sizeof(struct nfsclds), M_NFSCLDS,
+			    M_WAITOK | M_ZERO);
+			dsp->nfsclds_flags |= NFSCLDS_DS;
+			dsp->nfsclds_expire = INT32_MAX; /* No renews needed. */
+			mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF);
+			mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession",
+			    NULL, MTX_DEF);
+		}
 	}
 	if (error == 0) {
 		dsp->nfsclds_sockp = nrp;
-		NFSLOCKMNT(nmp);
-		retv = nfscl_getsameserver(nmp, dsp, &tdsp);
-		NFSCL_DEBUG(3, "getsame ret=%d\n", retv);
-		if (retv == NFSDSP_USETHISSESSION) {
+		if (vers == NFS_VER4) {
+			NFSLOCKMNT(nmp);
+			retv = nfscl_getsameserver(nmp, dsp, &tdsp);
+			NFSCL_DEBUG(3, "getsame ret=%d\n", retv);
+			if (retv == NFSDSP_USETHISSESSION) {
+				NFSUNLOCKMNT(nmp);
+				/*
+				 * If there is already a session for this
+				 * server, use it.
+				 */
+				(void)newnfs_disconnect(nrp);
+				nfscl_freenfsclds(dsp);
+				*dspp = tdsp;
+				return (0);
+			}
+			if (retv == NFSDSP_SEQTHISSESSION)
+				sequenceid =
+				    tdsp->nfsclds_sess.nfsess_sequenceid;
+			else
+				sequenceid =
+				    dsp->nfsclds_sess.nfsess_sequenceid;
 			NFSUNLOCKMNT(nmp);
-			/*
-			 * If there is already a session for this server,
-			 * use it.
-			 */
-			(void)newnfs_disconnect(nrp);
-			nfscl_freenfsclds(dsp);
-			*dspp = tdsp;
-			return (0);
+			error = nfsrpc_createsession(nmp, &dsp->nfsclds_sess,
+			    nrp, sequenceid, 0, nrp->nr_cred, p);
+			NFSCL_DEBUG(3, "DS createsess=%d\n", error);
 		}
-		if (retv == NFSDSP_SEQTHISSESSION)
-			sequenceid = tdsp->nfsclds_sess.nfsess_sequenceid;
-		else
-			sequenceid = dsp->nfsclds_sess.nfsess_sequenceid;
-		NFSUNLOCKMNT(nmp);
-		error = nfsrpc_createsession(nmp, &dsp->nfsclds_sess,
-		    nrp, sequenceid, 0, nrp->nr_cred, p);
-		NFSCL_DEBUG(3, "DS createsess=%d\n", error);
 	} else {
 		NFSFREECRED(nrp->nr_cred);
 		NFSFREEMUTEX(&nrp->nr_mtx);
@@ -5448,11 +5543,17 @@ nfscl_doiods(vnode_t vp, struct uio *uiop, int *iomode
 	struct nfscllayout *layp;
 	struct nfscldevinfo *dip;
 	struct nfsclflayout *rflp;
+	struct mbuf *m;
 	nfsv4stateid_t stateid;
 	struct ucred *newcred;
 	uint64_t lastbyte, len, off, oresid, xfer;
-	int eof, error, iolaymode, recalled;
+	int eof, error, firstmirror, i, iolaymode, mirrorcnt, recalled;
 	void *lckp;
+	uint8_t *dev;
+	void *iovbase;
+	size_t iovlen;
+	off_t offs;
+	ssize_t resid;
 
 	if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 || nfs_numnfscbd == 0 ||
 	    (np->n_flag & NNOLAYOUT) != 0)
@@ -5520,30 +5621,93 @@ nfscl_doiods(vnode_t vp, struct uio *uiop, int *iomode
 			oresid = xfer = (uint64_t)uiop->uio_resid;
 			if (xfer > (rflp->nfsfl_end - rflp->nfsfl_off))
 				xfer = rflp->nfsfl_end - rflp->nfsfl_off;
-			dip = nfscl_getdevinfo(nmp->nm_clp, rflp->nfsfl_dev,
-			    rflp->nfsfl_devp);
-			if (dip != NULL) {
-				error = nfscl_doflayoutio(vp, uiop, iomode,
-				    must_commit, &eof, &stateid, rwaccess, dip,
-				    layp, rflp, off, xfer, docommit, newcred,
-				    p);
-				nfscl_reldevinfo(dip);
-				lastbyte = off + xfer - 1;
-				if (error == 0) {
+			/*
+			 * For Flex File layout with mirrored DSs, select one
+			 * of them at random for reads. For writes and commits,
+			 * do all mirrors.
+			 */
+			m = NULL;
+			firstmirror = 0;
+			mirrorcnt = 1;
+			if ((layp->nfsly_flags & NFSLY_FLEXFILE) != 0 &&
+			    (mirrorcnt = rflp->nfsfl_mirrorcnt) > 1) {
+				if (rwaccess == NFSV4OPEN_ACCESSREAD) {
+					firstmirror = arc4random() % mirrorcnt;
+					mirrorcnt = firstmirror + 1;
+				} else if (docommit == 0) {
+					/*
+					 * Save values, so uiop can be rolled
+					 * back upon a write error.
+					 */
+					offs = uiop->uio_offset;
+					resid = uiop->uio_resid;
+					iovbase = uiop->uio_iov->iov_base;
+					iovlen = uiop->uio_iov->iov_len;
+					m = nfsm_uiombuflist(uiop, len, NULL,
+					    NULL);
+				}
+			}
+			for (i = firstmirror; i < mirrorcnt && error == 0; i++){
+				if ((layp->nfsly_flags & NFSLY_FLEXFILE) != 0)
+					dev = rflp->nfsfl_ffm[i].dev;
+				else
+					dev = rflp->nfsfl_dev;
+				dip = nfscl_getdevinfo(nmp->nm_clp, dev,
+				    rflp->nfsfl_devp);
+				if (dip != NULL) {
+					if ((rflp->nfsfl_flags & NFSFL_FLEXFILE)
+					    != 0)
+						error = nfscl_dofflayoutio(vp,
+						    uiop, iomode, must_commit,
+						    &eof, &stateid, rwaccess,
+						    dip, layp, rflp, off, xfer,
+						    i, docommit, m, newcred,
+						    p);
+					else
+						error = nfscl_doflayoutio(vp,
+						    uiop, iomode, must_commit,
+						    &eof, &stateid, rwaccess,
+						    dip, layp, rflp, off, xfer,
+						    docommit, newcred, p);
+					nfscl_reldevinfo(dip);
+				} else
+					error = EIO;
+			}
+			if (m != NULL)
+				m_freem(m);
+			if (error == 0) {
+				if (mirrorcnt > 1 && rwaccess ==
+				    NFSV4OPEN_ACCESSWRITE && docommit == 0) {
 					NFSLOCKCLSTATE();
-					if (lastbyte > layp->nfsly_lastbyte)
-						layp->nfsly_lastbyte = lastbyte;
+					layp->nfsly_flags |= NFSLY_WRITTEN;
 					NFSUNLOCKCLSTATE();
-				} else if (error == NFSERR_OPENMODE &&
-				    rwaccess == NFSV4OPEN_ACCESSREAD) {
-					NFSLOCKMNT(nmp);
-					nmp->nm_state |= NFSSTA_OPENMODE;
-					NFSUNLOCKMNT(nmp);
 				}
+				lastbyte = off + xfer - 1;
+				NFSLOCKCLSTATE();
+				if (lastbyte > layp->nfsly_lastbyte)
+					layp->nfsly_lastbyte = lastbyte;
+				NFSUNLOCKCLSTATE();
+			} else if (error == NFSERR_OPENMODE &&
+			    rwaccess == NFSV4OPEN_ACCESSREAD) {
+				NFSLOCKMNT(nmp);
+				nmp->nm_state |= NFSSTA_OPENMODE;
+				NFSUNLOCKMNT(nmp);
 			} else
 				error = EIO;
 			if (error == 0)
 				len -= (oresid - (uint64_t)uiop->uio_resid);
+			else if (mirrorcnt > 1 && rwaccess ==
+			    NFSV4OPEN_ACCESSWRITE && docommit == 0) {
+				/*
+				 * In case the rpc gets retried, roll the
+				 * uio fields changed by nfsm_uiombuflist()
+				 * back.
+				 */
+				uiop->uio_offset = offs;
+				uiop->uio_resid = resid;
+				uiop->uio_iov->iov_base = iovbase;
+				uiop->uio_iov->iov_len = iovlen;
+			}
 		}
 	}
 	if (lckp != NULL)
@@ -5555,6 +5719,38 @@ nfscl_doiods(vnode_t vp, struct uio *uiop, int *iomode
 }
 
 /*
+ * Make a copy of the mbuf chain and add an mbuf for null padding, as required.
+ */
+static struct mbuf *
+nfsm_copym(struct mbuf *m, int off, int xfer)
+{
+	struct mbuf *m2, *m3, *m4;
+	uint32_t *tl;
+	int rem;
+
+	m2 = m_copym(m, off, xfer, M_WAITOK);
+	rem = NFSM_RNDUP(xfer) - xfer;
+	if (rem > 0) {
+		/*
+		 * The zero padding to a multiple of 4 bytes is required by
+		 * the XDR. So that the mbufs copied by reference aren't
+		 * modified, add an mbuf with the zero'd bytes to the list.
+		 * rem will be a maximum of 3, so one zero'd uint32_t is
+		 * sufficient.
+		 */
+		m3 = m2;
+		while (m3->m_next != NULL)
+			m3 = m3->m_next;
+		NFSMGET(m4);
+		tl = NFSMTOD(m4, uint32_t *);
+		*tl = 0;
+		mbuf_setlen(m4, rem);
+		mbuf_setnext(m3, m4);
+	}
+	return (m2);
+}
+
+/*
  * Find a file layout that will handle the first bytes of the requested
  * range and return the information from it needed to to the I/O operation.
  */
@@ -5659,7 +5855,7 @@ nfscl_doflayoutio(vnode_t vp, struct uio *uiop, int *i
 		if (docommit != 0) {
 			if (error == 0)
 				error = nfsrpc_commitds(vp, io_off, xfer,
-				    *dspp, fhp, cred, p);
+				    *dspp, fhp, 0, 0, cred, p);
 			if (error == 0) {
 				/*
 				 * Set both eof and uio_resid = 0 to end any
@@ -5674,11 +5870,11 @@ nfscl_doflayoutio(vnode_t vp, struct uio *uiop, int *i
 			}
 		} else if (rwflag == NFSV4OPEN_ACCESSREAD)
 			error = nfsrpc_readds(vp, uiop, stateidp, eofp, *dspp,
-			    io_off, xfer, fhp, cred, p);
+			    io_off, xfer, fhp, 0, 0, 0, cred, p);
 		else {
 			error = nfsrpc_writeds(vp, uiop, iomode, must_commit,
 			    stateidp, *dspp, io_off, xfer, fhp, commit_thru_mds,
-			    cred, p);
+			    0, 0, 0, cred, p);
 			if (error == 0) {
 				NFSLOCKCLSTATE();
 				lyp->nfsly_flags |= NFSLY_WRITTEN;
@@ -5696,42 +5892,174 @@ nfscl_doflayoutio(vnode_t vp, struct uio *uiop, int *i
 }
 
 /*
+ * Do I/O using an NFSv4.1 flex file layout.
+ */
+static int
+nfscl_dofflayoutio(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
+    int *eofp, nfsv4stateid_t *stateidp, int rwflag, struct nfscldevinfo *dp,
+    struct nfscllayout *lyp, struct nfsclflayout *flp, uint64_t off,
+    uint64_t len, int mirror, int docommit, struct mbuf *mp, struct ucred *cred,
+    NFSPROC_T *p)
+{
+	uint64_t transfer, xfer;
+	int error, rel_off;
+	struct nfsnode *np;
+	struct nfsfh *fhp;
+	struct nfsclds **dspp;
+	struct ucred *tcred;
+	struct mbuf *m;
+
+	np = VTONFS(vp);
+	error = 0;
+	rel_off = 0;
+	NFSCL_DEBUG(4, "nfscl_dofflayoutio: off=%ju len=%ju\n", (uintmax_t)off,
+	    (uintmax_t)len);
+	/* Loop around, doing I/O for each stripe unit. */
+	while (len > 0 && error == 0) {
+		dspp = nfsfldi_addr(dp, 0);
+		fhp = flp->nfsfl_ffm[mirror].fh[dp->nfsdi_versindex];
+		stateidp = &flp->nfsfl_ffm[mirror].st;
+		NFSCL_DEBUG(4, "mirror=%d vind=%d fhlen=%d st.seqid=0x%x\n",
+		    mirror, dp->nfsdi_versindex, fhp->nfh_len, stateidp->seqid);
+		if ((dp->nfsdi_flags & NFSDI_TIGHTCOUPLED) == 0) {
+			tcred = NFSNEWCRED(cred);
+			tcred->cr_uid = flp->nfsfl_ffm[mirror].user;
+			tcred->cr_groups[0] = flp->nfsfl_ffm[mirror].group;
+			tcred->cr_ngroups = 1;
+		} else
+			tcred = cred;
+		if (rwflag == NFSV4OPEN_ACCESSREAD)
+			transfer = dp->nfsdi_rsize;
+		else
+			transfer = dp->nfsdi_wsize;
+		mtx_lock(&np->n_mtx);
+		np->n_flag |= NDSCOMMIT;
+		mtx_unlock(&np->n_mtx);
+		if (len > transfer && docommit == 0)
+			xfer = transfer;
+		else
+			xfer = len;
+		if (docommit != 0) {
+			if (error == 0)
+				error = nfsrpc_commitds(vp, off, xfer, *dspp,
+				    fhp, dp->nfsdi_vers, dp->nfsdi_minorvers,
+				    tcred, p);
+			NFSCL_DEBUG(4, "aft nfsrpc_commitds=%d\n", error);
+			if (error == 0) {
+				/*
+				 * Set both eof and uio_resid = 0 to end any
+				 * loops.
+				 */
+				*eofp = 1;
+				uiop->uio_resid = 0;
+			} else {
+				mtx_lock(&np->n_mtx);
+				np->n_flag &= ~NDSCOMMIT;
+				mtx_unlock(&np->n_mtx);
+			}
+		} else if (rwflag == NFSV4OPEN_ACCESSREAD)
+			error = nfsrpc_readds(vp, uiop, stateidp, eofp, *dspp,
+			    off, xfer, fhp, 1, dp->nfsdi_vers,
+			    dp->nfsdi_minorvers, tcred, p);
+		else {
+			if (flp->nfsfl_mirrorcnt == 1) {
+				error = nfsrpc_writeds(vp, uiop, iomode,
+				    must_commit, stateidp, *dspp, off, xfer,
+				    fhp, 0, 1, dp->nfsdi_vers,
+				    dp->nfsdi_minorvers, tcred, p);
+				if (error == 0) {
+					NFSLOCKCLSTATE();
+					lyp->nfsly_flags |= NFSLY_WRITTEN;
+					NFSUNLOCKCLSTATE();
+				}
+			} else {
+				m = nfsm_copym(mp, rel_off, xfer);
+				NFSCL_DEBUG(4, "mcopy reloff=%d xfer=%jd\n",
+				    rel_off, (uintmax_t)xfer);
+				error = nfsrpc_writedsmir(vp, iomode,
+				    must_commit, stateidp, *dspp, off, xfer,
+				    fhp, m, dp->nfsdi_vers, dp->nfsdi_minorvers,
+				    tcred, p);
+				NFSCL_DEBUG(4, "nfsrpc_writedsmir=%d\n", error);
+			}
+		}
+		NFSCL_DEBUG(4, "aft read/writeds=%d\n", error);
+		if (error == 0) {
+			len -= xfer;
+			off += xfer;
+			rel_off += xfer;
+		}
+		if ((dp->nfsdi_flags & NFSDI_TIGHTCOUPLED) == 0)
+			NFSFREECRED(tcred);
+	}
+	NFSCL_DEBUG(4, "eo nfscl_dofflayoutio=%d\n", error);
+	return (error);
+}
+
+/*
  * The actual read RPC done to a DS.
  */
 static int
 nfsrpc_readds(vnode_t vp, struct uio *uiop, nfsv4stateid_t *stateidp, int *eofp,
-    struct nfsclds *dsp, uint64_t io_off, int len, struct nfsfh *fhp,
-    struct ucred *cred, NFSPROC_T *p)
+    struct nfsclds *dsp, uint64_t io_off, int len, struct nfsfh *fhp, int flex,
+    int vers, int minorvers, struct ucred *cred, NFSPROC_T *p)
 {
 	uint32_t *tl;
-	int error, retlen;
+	int attrflag, error, retlen;
 	struct nfsrv_descript nfsd;
 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
 	struct nfsrv_descript *nd = &nfsd;
 	struct nfssockreq *nrp;
+	struct nfsvattr na;
 
 	nd->nd_mrep = NULL;
-	nfscl_reqstart(nd, NFSPROC_READDS, nmp, fhp->nfh_fh, fhp->nfh_len,
-	    NULL, &dsp->nfsclds_sess, 0, 0);
-	nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSEQIDZERO);
+	if (vers == 0 || vers == NFS_VER4) {
+		nfscl_reqstart(nd, NFSPROC_READDS, nmp, fhp->nfh_fh,
+		    fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers);
+		vers = NFS_VER4;
+		NFSCL_DEBUG(4, "nfsrpc_readds: vers4 minvers=%d\n", minorvers);
+		if (flex != 0)
+			nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
+		else
+			nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSEQIDZERO);
+	} else {
+		nfscl_reqstart(nd, NFSPROC_READ, nmp, fhp->nfh_fh,
+		    fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers);
+		NFSCL_DEBUG(4, "nfsrpc_readds: vers3\n");
+	}
 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED * 3);
 	txdr_hyper(io_off, tl);
 	*(tl + 2) = txdr_unsigned(len);
 	nrp = dsp->nfsclds_sockp;
+	NFSCL_DEBUG(4, "nfsrpc_readds: nrp=%p\n", nrp);
 	if (nrp == NULL)
 		/* If NULL, use the MDS socket. */
 		nrp = &nmp->nm_sockreq;
 	error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
-	    NFS_PROG, NFS_VER4, NULL, 1, NULL, &dsp->nfsclds_sess);
+	    NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess);
+	NFSCL_DEBUG(4, "nfsrpc_readds: stat=%d err=%d\n", nd->nd_repstat,
+	    error);
 	if (error != 0)
 		return (error);
+	if (vers == NFS_VER3) {
+		error = nfscl_postop_attr(nd, &na, &attrflag, NULL);
+		NFSCL_DEBUG(4, "nfsrpc_readds: postop=%d\n", error);
+		if (error != 0)
+			goto nfsmout;
+	}
 	if (nd->nd_repstat != 0) {
 		error = nd->nd_repstat;
 		goto nfsmout;
 	}
-	NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
-	*eofp = fxdr_unsigned(int, *tl);
+	if (vers == NFS_VER3) {
+		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
+		*eofp = fxdr_unsigned(int, *(tl + 1));
+	} else {
+		NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
+		*eofp = fxdr_unsigned(int, *tl);
+	}
 	NFSM_STRSIZ(retlen, len);
+	NFSCL_DEBUG(4, "nfsrpc_readds: retlen=%d eof=%d\n", retlen, *eofp);
 	error = nfsm_mbufuio(nd, uiop, retlen);
 nfsmout:
 	if (nd->nd_mrep != NULL)
@@ -5745,24 +6073,40 @@ nfsmout:
 static int
 nfsrpc_writeds(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
     nfsv4stateid_t *stateidp, struct nfsclds *dsp, uint64_t io_off, int len,
-    struct nfsfh *fhp, int commit_thru_mds, struct ucred *cred, NFSPROC_T *p)
+    struct nfsfh *fhp, int commit_thru_mds, int flex, int vers, int minorvers,
+    struct ucred *cred, NFSPROC_T *p)
 {
 	uint32_t *tl;
 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
-	int error, rlen, commit, committed = NFSWRITE_FILESYNC;
+	int attrflag, error, rlen, commit, committed = NFSWRITE_FILESYNC;
 	int32_t backup;
 	struct nfsrv_descript nfsd;
 	struct nfsrv_descript *nd = &nfsd;
 	struct nfssockreq *nrp;
+	struct nfsvattr na;
 
 	KASSERT(uiop->uio_iovcnt == 1, ("nfs: writerpc iovcnt > 1"));
 	nd->nd_mrep = NULL;
-	nfscl_reqstart(nd, NFSPROC_WRITEDS, nmp, fhp->nfh_fh, fhp->nfh_len,
-	    NULL, &dsp->nfsclds_sess, 0, 0);
-	nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSEQIDZERO);
-	NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED);
+	if (vers == 0 || vers == NFS_VER4) {
+		nfscl_reqstart(nd, NFSPROC_WRITEDS, nmp, fhp->nfh_fh,
+		    fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers);
+		NFSCL_DEBUG(4, "nfsrpc_writeds: vers4 minvers=%d\n", minorvers);
+		vers = NFS_VER4;
+		if (flex != 0)
+			nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
+		else
+			nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSEQIDZERO);
+		NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED);
+	} else {
+		nfscl_reqstart(nd, NFSPROC_WRITE, nmp, fhp->nfh_fh,
+		    fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers);
+		NFSCL_DEBUG(4, "nfsrpc_writeds: vers3\n");
+		NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 3 * NFSX_UNSIGNED);
+	}
 	txdr_hyper(io_off, tl);
 	tl += 2;
+	if (vers == NFS_VER3)
+		*tl++ = txdr_unsigned(len);
 	*tl++ = txdr_unsigned(*iomode);
 	*tl = txdr_unsigned(len);
 	nfsm_uiombuf(nd, uiop, len);
@@ -5771,7 +6115,9 @@ nfsrpc_writeds(vnode_t vp, struct uio *uiop, int *iomo
 		/* If NULL, use the MDS socket. */
 		nrp = &nmp->nm_sockreq;
 	error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
-	    NFS_PROG, NFS_VER4, NULL, 1, NULL, &dsp->nfsclds_sess);
+	    NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess);
+	NFSCL_DEBUG(4, "nfsrpc_writeds: err=%d stat=%d\n", error,
+	    nd->nd_repstat);
 	if (error != 0)
 		return (error);
 	if (nd->nd_repstat != 0) {
@@ -5786,8 +6132,16 @@ nfsrpc_writeds(vnode_t vp, struct uio *uiop, int *iomo
 		uio_iov_len_add(uiop, len);
 		error = nd->nd_repstat;
 	} else {
+		if (vers == NFS_VER3) {
+			error = nfscl_wcc_data(nd, vp, &na, &attrflag, NULL,
+			    NULL);
+			NFSCL_DEBUG(4, "nfsrpc_writeds: wcc_data=%d\n", error);
+			if (error != 0)
+				goto nfsmout;
+		}
 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF);
 		rlen = fxdr_unsigned(int, *tl++);
+		NFSCL_DEBUG(4, "nfsrpc_writeds: len=%d rlen=%d\n", len, rlen);
 		if (rlen == 0) {
 			error = NFSERR_IO;
 			goto nfsmout;
@@ -5842,6 +6196,117 @@ nfsmout:
 }
 
 /*
+ * The actual write RPC done to a DS.
+ * This variant is called from a separate kernel process for mirrors.
+ * Any short write is considered an IO error.
+ */
+static int
+nfsrpc_writedsmir(vnode_t vp, int *iomode, int *must_commit,
+    nfsv4stateid_t *stateidp, struct nfsclds *dsp, uint64_t io_off, int len,
+    struct nfsfh *fhp, struct mbuf *m, int vers, int minorvers,
+    struct ucred *cred, NFSPROC_T *p)
+{
+	uint32_t *tl;
+	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
+	int attrflag, error, commit, committed = NFSWRITE_FILESYNC, rlen;
+	struct nfsrv_descript nfsd;
+	struct nfsrv_descript *nd = &nfsd;
+	struct nfssockreq *nrp;
+	struct nfsvattr na;
+
+	nd->nd_mrep = NULL;
+	if (vers == 0 || vers == NFS_VER4) {
+		nfscl_reqstart(nd, NFSPROC_WRITEDS, nmp, fhp->nfh_fh,
+		    fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers);
+		vers = NFS_VER4;
+		NFSCL_DEBUG(4, "nfsrpc_writedsmir: vers4 minvers=%d\n",
+		    minorvers);
+		nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
+		NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED);
+	} else {
+		nfscl_reqstart(nd, NFSPROC_WRITE, nmp, fhp->nfh_fh,
+		    fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers);
+		NFSCL_DEBUG(4, "nfsrpc_writedsmir: vers3\n");
+		NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 3 * NFSX_UNSIGNED);
+	}
+	txdr_hyper(io_off, tl);
+	tl += 2;
+	if (vers == NFS_VER3)
+		*tl++ = txdr_unsigned(len);
+	*tl++ = txdr_unsigned(*iomode);
+	*tl = txdr_unsigned(len);
+	if (len > 0) {
+		/* Put data in mbuf chain. */
+		nd->nd_mb->m_next = m;
+		/* Set nd_mb and nd_bpos to end of data. */
+		while (m->m_next != NULL)
+			m = m->m_next;
+		nd->nd_mb = m;
+		nd->nd_bpos = mtod(m, char *) + m->m_len;
+		NFSCL_DEBUG(4, "nfsrpc_writedsmir: lastmb len=%d\n", m->m_len);
+	}
+	nrp = dsp->nfsclds_sockp;
+	if (nrp == NULL)
+		/* If NULL, use the MDS socket. */
+		nrp = &nmp->nm_sockreq;
+	error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
+	    NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess);
+	NFSCL_DEBUG(4, "nfsrpc_writedsmir: err=%d stat=%d\n", error,
+	    nd->nd_repstat);
+	if (error != 0)
+		return (error);
+	if (nd->nd_repstat != 0)
+		error = nd->nd_repstat;
+	else {
+		if (vers == NFS_VER3) {
+			error = nfscl_wcc_data(nd, vp, &na, &attrflag, NULL,
+			    NULL);
+			NFSCL_DEBUG(4, "nfsrpc_writedsmir: wcc_data=%d\n",
+			    error);
+			if (error != 0)
+				goto nfsmout;
+		}
+		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF);
+		rlen = fxdr_unsigned(int, *tl++);
+		NFSCL_DEBUG(4, "nfsrpc_writedsmir: len=%d rlen=%d\n", len,
+		    rlen);
+		if (rlen != len) {
+			error = NFSERR_IO;
+			NFSCL_DEBUG(4, "nfsrpc_writedsmir: len=%d rlen=%d\n",
+			    len, rlen);
+			goto nfsmout;
+		}
+		commit = fxdr_unsigned(int, *tl++);
+
+		/*
+		 * Return the lowest commitment level
+		 * obtained by any of the RPCs.
+		 */
+		if (committed == NFSWRITE_FILESYNC)
+			committed = commit;
+		else if (committed == NFSWRITE_DATASYNC &&
+		    commit == NFSWRITE_UNSTABLE)
+			committed = commit;
+		NFSLOCKDS(dsp);
+		if ((dsp->nfsclds_flags & NFSCLDS_HASWRITEVERF) == 0) {
+			NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
+			dsp->nfsclds_flags |= NFSCLDS_HASWRITEVERF;
+		} else if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF)) {
+			*must_commit = 1;
+			NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
+		}
+		NFSUNLOCKDS(dsp);
+	}
+nfsmout:
+	if (nd->nd_mrep != NULL)
+		mbuf_freem(nd->nd_mrep);
+	*iomode = committed;
+	if (nd->nd_repstat != 0 && error == 0)
+		error = nd->nd_repstat;
+	return (error);
+}
+
+/*
  * Free up the nfsclds structure.
  */
 void
@@ -5913,17 +6378,26 @@ nfscl_getsameserver(struct nfsmount *nmp, struct nfscl
  */
 static int
 nfsrpc_commitds(vnode_t vp, uint64_t offset, int cnt, struct nfsclds *dsp,
-    struct nfsfh *fhp, struct ucred *cred, NFSPROC_T *p)
+    struct nfsfh *fhp, int vers, int minorvers, struct ucred *cred,
+    NFSPROC_T *p)
 {
 	uint32_t *tl;
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
 	struct nfssockreq *nrp;
-	int error;
+	struct nfsvattr na;
+	int attrflag, error;
 	
 	nd->nd_mrep = NULL;
-	nfscl_reqstart(nd, NFSPROC_COMMITDS, nmp, fhp->nfh_fh, fhp->nfh_len,
-	    NULL, &dsp->nfsclds_sess, 0, 0);
+	if (vers == 0 || vers == NFS_VER4) {
+		nfscl_reqstart(nd, NFSPROC_COMMITDS, nmp, fhp->nfh_fh,

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-head mailing list