git: bb958dcf3d8a - main - nfsd: Add support for the NFSv4.2 Deallocate operation

Rick Macklem rmacklem at FreeBSD.org
Fri Aug 27 01:19:45 UTC 2021


The branch main has been updated by rmacklem:

URL: https://cgit.FreeBSD.org/src/commit/?id=bb958dcf3d8af3a033dacbf8133681c9b0c73b2f

commit bb958dcf3d8af3a033dacbf8133681c9b0c73b2f
Author:     Rick Macklem <rmacklem at FreeBSD.org>
AuthorDate: 2021-08-27 01:14:11 +0000
Commit:     Rick Macklem <rmacklem at FreeBSD.org>
CommitDate: 2021-08-27 01:14:11 +0000

    nfsd: Add support for the NFSv4.2 Deallocate operation
    
    The recently added VOP_DEALLOCATE(9) VOP call allows
    implementation of the Deallocate NFSv4.2 operation.
    
    Since the Deallocate operation is a single succeed/fail
    operation, the call to VOP_DEALLOCATE(9) loops so long
    as progress is being made.  It calls maybe_yield()
    between loop iterations to allow other processes
    to preempt it.
    
    Where RFC 7862 underspecifies behaviour, the code
    is written to be Linux NFSv4.2 server compatible.
    
    Reviewed by:    khng
    Differential Revision:  https://reviews.freebsd.org/D31624
---
 sys/fs/nfs/nfs_var.h              |   4 +
 sys/fs/nfsserver/nfs_nfsdport.c   | 201 ++++++++++++++++++++++++++++++++++++++
 sys/fs/nfsserver/nfs_nfsdserv.c   | 105 ++++++++++++++++++++
 sys/fs/nfsserver/nfs_nfsdsocket.c |   2 +-
 4 files changed, 311 insertions(+), 1 deletion(-)

diff --git a/sys/fs/nfs/nfs_var.h b/sys/fs/nfs/nfs_var.h
index b4c21c6f7029..5ddae345f906 100644
--- a/sys/fs/nfs/nfs_var.h
+++ b/sys/fs/nfs/nfs_var.h
@@ -282,6 +282,8 @@ int nfsrvd_teststateid(struct nfsrv_descript *, int,
     vnode_t, struct nfsexstuff *);
 int nfsrvd_allocate(struct nfsrv_descript *, int,
     vnode_t, struct nfsexstuff *);
+int nfsrvd_deallocate(struct nfsrv_descript *, int,
+    vnode_t, struct nfsexstuff *);
 int nfsrvd_copy_file_range(struct nfsrv_descript *, int,
     vnode_t, vnode_t, struct nfsexstuff *, struct nfsexstuff *);
 int nfsrvd_seek(struct nfsrv_descript *, int,
@@ -752,6 +754,8 @@ int nfsrv_setacl(struct vnode *, NFSACL_T *, struct ucred *, NFSPROC_T *);
 int nfsvno_seek(struct nfsrv_descript *, struct vnode *, u_long, off_t *, int,
     bool *, struct ucred *, NFSPROC_T *);
 int nfsvno_allocate(struct vnode *, off_t, off_t, struct ucred *, NFSPROC_T *);
+int nfsvno_deallocate(struct vnode *, off_t, off_t, struct ucred *,
+    NFSPROC_T *);
 int nfsvno_getxattr(struct vnode *, char *, uint32_t, struct ucred *,
     uint64_t, int, struct thread *, struct mbuf **, struct mbuf **, int *);
 int nfsvno_setxattr(struct vnode *, char *, int, struct mbuf *, char *,
diff --git a/sys/fs/nfsserver/nfs_nfsdport.c b/sys/fs/nfsserver/nfs_nfsdport.c
index efe9aac7a136..d93c547c5530 100644
--- a/sys/fs/nfsserver/nfs_nfsdport.c
+++ b/sys/fs/nfsserver/nfs_nfsdport.c
@@ -132,6 +132,8 @@ static int nfsrv_writedsrpc(fhandle_t *, off_t, int, struct ucred *,
     char *, int *);
 static int nfsrv_allocatedsrpc(fhandle_t *, off_t, off_t, struct ucred *,
     NFSPROC_T *, struct vnode *, struct nfsmount **, int, int *);
+static int nfsrv_deallocatedsrpc(fhandle_t *, off_t, off_t, struct ucred *,
+    NFSPROC_T *, struct vnode *, struct nfsmount **, int, int *);
 static int nfsrv_setacldsrpc(fhandle_t *, struct ucred *, NFSPROC_T *,
     struct vnode *, struct nfsmount **, int, struct acl *, int *);
 static int nfsrv_setattrdsrpc(fhandle_t *, struct ucred *, NFSPROC_T *,
@@ -4898,6 +4900,9 @@ tryagain:
 		} else if (ioproc == NFSPROC_ALLOCATE)
 			error = nfsrv_allocatedsrpc(fh, off, *offp, cred, p, vp,
 			    &nmp[0], mirrorcnt, &failpos);
+		else if (ioproc == NFSPROC_DEALLOCATE)
+			error = nfsrv_deallocatedsrpc(fh, off, *offp, cred, p,
+			    vp, &nmp[0], mirrorcnt, &failpos);
 		else {
 			error = nfsrv_getattrdsrpc(&fh[mirrorcnt - 1], cred, p,
 			    vp, nmp[mirrorcnt - 1], nap);
@@ -5679,6 +5684,166 @@ nfsrv_allocatedsrpc(fhandle_t *fhp, off_t off, off_t len, struct ucred *cred,
 	return (error);
 }
 
+/*
+ * Do a deallocate RPC on a DS data file, using this structure for the
+ * arguments, so that this function can be executed by a separate kernel
+ * process.
+ */
+struct nfsrvdeallocatedsdorpc {
+	int			done;
+	int			inprog;
+	struct task		tsk;
+	fhandle_t		fh;
+	off_t			off;
+	off_t			len;
+	struct nfsmount		*nmp;
+	struct ucred		*cred;
+	NFSPROC_T		*p;
+	int			err;
+};
+
+static int
+nfsrv_deallocatedsdorpc(struct nfsmount *nmp, fhandle_t *fhp, off_t off,
+    off_t len, struct nfsvattr *nap, struct ucred *cred, NFSPROC_T *p)
+{
+	uint32_t *tl;
+	struct nfsrv_descript *nd;
+	nfsattrbit_t attrbits;
+	nfsv4stateid_t st;
+	int error;
+
+	nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO);
+	nfscl_reqstart(nd, NFSPROC_DEALLOCATE, nmp, (u_int8_t *)fhp,
+	    sizeof(fhandle_t), NULL, NULL, 0, 0);
+
+	/*
+	 * Use a stateid where other is an alternating 01010 pattern and
+	 * seqid is 0xffffffff.  This value is not defined as special by
+	 * the RFC and is used by the FreeBSD NFS server to indicate an
+	 * MDS->DS proxy operation.
+	 */
+	st.other[0] = 0x55555555;
+	st.other[1] = 0x55555555;
+	st.other[2] = 0x55555555;
+	st.seqid = 0xffffffff;
+	nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID);
+	NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_UNSIGNED);
+	txdr_hyper(off, tl); tl += 2;
+	txdr_hyper(len, tl); tl += 2;
+	NFSD_DEBUG(4, "nfsrv_deallocatedsdorpc: len=%jd\n", (intmax_t)len);
+
+	*tl = txdr_unsigned(NFSV4OP_GETATTR);
+	NFSGETATTR_ATTRBIT(&attrbits);
+	nfsrv_putattrbit(nd, &attrbits);
+	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p,
+	    cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
+	if (error != 0) {
+		free(nd, M_TEMP);
+		return (error);
+	}
+	NFSD_DEBUG(4, "nfsrv_deallocatedsdorpc: aft allocaterpc=%d\n",
+	    nd->nd_repstat);
+	if (nd->nd_repstat == 0) {
+		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
+		error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL,
+		    NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL);
+	} else
+		error = nd->nd_repstat;
+	NFSD_DEBUG(4, "nfsrv_deallocatedsdorpc: aft loadattr=%d\n", error);
+nfsmout:
+	m_freem(nd->nd_mrep);
+	free(nd, M_TEMP);
+	NFSD_DEBUG(4, "nfsrv_deallocatedsdorpc error=%d\n", error);
+	return (error);
+}
+
+/*
+ * Start up the thread that will execute nfsrv_deallocatedsdorpc().
+ */
+static void
+start_deallocatedsdorpc(void *arg, int pending)
+{
+	struct nfsrvdeallocatedsdorpc *drpc;
+
+	drpc = (struct nfsrvdeallocatedsdorpc *)arg;
+	drpc->err = nfsrv_deallocatedsdorpc(drpc->nmp, &drpc->fh, drpc->off,
+	    drpc->len, NULL, drpc->cred, drpc->p);
+	drpc->done = 1;
+	NFSD_DEBUG(4, "start_deallocatedsdorpc: err=%d\n", drpc->err);
+}
+
+static int
+nfsrv_deallocatedsrpc(fhandle_t *fhp, off_t off, off_t len, struct ucred *cred,
+    NFSPROC_T *p, struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt,
+    int *failposp)
+{
+	struct nfsrvdeallocatedsdorpc *drpc, *tdrpc = NULL;
+	struct nfsvattr na;
+	int error, i, ret, timo;
+
+	NFSD_DEBUG(4, "in nfsrv_deallocatedsrpc\n");
+	drpc = NULL;
+	if (mirrorcnt > 1)
+		tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP,
+		    M_WAITOK);
+
+	/*
+	 * Do the deallocate RPC for every DS, using a separate kernel process
+	 * for every DS except the last one.
+	 */
+	error = 0;
+	for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) {
+		tdrpc->done = 0;
+		NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp));
+		tdrpc->off = off;
+		tdrpc->len = len;
+		tdrpc->nmp = *nmpp;
+		tdrpc->cred = cred;
+		tdrpc->p = p;
+		tdrpc->inprog = 0;
+		tdrpc->err = 0;
+		ret = EIO;
+		if (nfs_pnfsiothreads != 0) {
+			ret = nfs_pnfsio(start_deallocatedsdorpc, tdrpc);
+			NFSD_DEBUG(4, "nfsrv_deallocatedsrpc: nfs_pnfsio=%d\n",
+			    ret);
+		}
+		if (ret != 0) {
+			ret = nfsrv_deallocatedsdorpc(*nmpp, fhp, off, len,
+			    NULL, cred, p);
+			if (nfsds_failerr(ret) && *failposp == -1)
+				*failposp = i;
+			else if (error == 0 && ret != 0)
+				error = ret;
+		}
+		nmpp++;
+		fhp++;
+	}
+	ret = nfsrv_deallocatedsdorpc(*nmpp, fhp, off, len, &na, cred, p);
+	if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1)
+		*failposp = mirrorcnt - 1;
+	else if (error == 0 && ret != 0)
+		error = ret;
+	if (error == 0)
+		error = nfsrv_setextattr(vp, &na, p);
+	NFSD_DEBUG(4, "nfsrv_deallocatedsrpc: aft setextat=%d\n", error);
+	tdrpc = drpc;
+	timo = hz / 50;		/* Wait for 20msec. */
+	if (timo < 1)
+		timo = 1;
+	for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) {
+		/* Wait for RPCs on separate threads to complete. */
+		while (tdrpc->inprog != 0 && tdrpc->done == 0)
+			tsleep(&tdrpc->tsk, PVFS, "srvalds", timo);
+		if (nfsds_failerr(tdrpc->err) && *failposp == -1)
+			*failposp = i;
+		else if (error == 0 && tdrpc->err != 0)
+			error = tdrpc->err;
+	}
+	free(drpc, M_TEMP);
+	return (error);
+}
+
 static int
 nfsrv_setattrdsdorpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p,
     struct vnode *vp, struct nfsmount *nmp, struct nfsvattr *nap,
@@ -6425,6 +6590,42 @@ nfsvno_allocate(struct vnode *vp, off_t off, off_t len, struct ucred *cred,
 	return (error);
 }
 
+/*
+ * Deallocate vnode op call.
+ */
+int
+nfsvno_deallocate(struct vnode *vp, off_t off, off_t len, struct ucred *cred,
+    NFSPROC_T *p)
+{
+	int error;
+	off_t olen;
+
+	ASSERT_VOP_ELOCKED(vp, "nfsvno_deallocate vp");
+	/*
+	 * Attempt to deallocate on a DS file. A return of ENOENT implies
+	 * there is no DS file to deallocate on.
+	 */
+	error = nfsrv_proxyds(vp, off, 0, cred, p, NFSPROC_DEALLOCATE, NULL,
+	    NULL, NULL, NULL, NULL, &len, 0, NULL);
+	if (error != ENOENT)
+		return (error);
+
+	/*
+	 * Do the actual VOP_DEALLOCATE(), looping so long as
+	 * progress is being made, to achieve completion.
+	 */
+	do {
+		olen = len;
+		error = VOP_DEALLOCATE(vp, &off, &len, 0, IO_SYNC, cred);
+		if (error == 0 && len > 0 && olen > len)
+			maybe_yield();
+	} while (error == 0 && len > 0 && olen > len);
+	if (error == 0 && len > 0)
+		error = NFSERR_IO;
+	NFSEXITCODE(error);
+	return (error);
+}
+
 /*
  * Get Extended Atribute vnode op into an mbuf list.
  */
diff --git a/sys/fs/nfsserver/nfs_nfsdserv.c b/sys/fs/nfsserver/nfs_nfsdserv.c
index 12181d04f1fa..2d4635c75204 100644
--- a/sys/fs/nfsserver/nfs_nfsdserv.c
+++ b/sys/fs/nfsserver/nfs_nfsdserv.c
@@ -5389,6 +5389,111 @@ nfsmout:
 	return (error);
 }
 
+/*
+ * nfs deallocate service
+ */
+int
+nfsrvd_deallocate(struct nfsrv_descript *nd, __unused int isdgram,
+    vnode_t vp, struct nfsexstuff *exp)
+{
+	uint32_t *tl;
+	struct nfsvattr forat;
+	int error = 0, forat_ret = 1, gotproxystateid;
+	off_t off, len;
+	struct nfsstate st, *stp = &st;
+	struct nfslock lo, *lop = &lo;
+	nfsv4stateid_t stateid;
+	nfsquad_t clientid;
+	nfsattrbit_t attrbits;
+
+	gotproxystateid = 0;
+	NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID + 2 * NFSX_HYPER);
+	stp->ls_flags = (NFSLCK_CHECK | NFSLCK_WRITEACCESS);
+	lop->lo_flags = NFSLCK_WRITE;
+	stp->ls_ownerlen = 0;
+	stp->ls_op = NULL;
+	stp->ls_uid = nd->nd_cred->cr_uid;
+	stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++);
+	clientid.lval[0] = stp->ls_stateid.other[0] = *tl++;
+	clientid.lval[1] = stp->ls_stateid.other[1] = *tl++;
+	if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) {
+		if ((nd->nd_flag & ND_NFSV41) != 0)
+			clientid.qval = nd->nd_clientid.qval;
+		else if (nd->nd_clientid.qval != clientid.qval)
+			printf("EEK2 multiple clids\n");
+	} else {
+		if ((nd->nd_flag & ND_NFSV41) != 0)
+			printf("EEK! no clientid from session\n");
+		nd->nd_flag |= ND_IMPLIEDCLID;
+		nd->nd_clientid.qval = clientid.qval;
+	}
+	stp->ls_stateid.other[2] = *tl++;
+	/*
+	 * Don't allow this to be done for a DS.
+	 */
+	if ((nd->nd_flag & ND_DSSERVER) != 0)
+		nd->nd_repstat = NFSERR_NOTSUPP;
+	/* However, allow the proxy stateid. */
+	if (stp->ls_stateid.seqid == 0xffffffff &&
+	    stp->ls_stateid.other[0] == 0x55555555 &&
+	    stp->ls_stateid.other[1] == 0x55555555 &&
+	    stp->ls_stateid.other[2] == 0x55555555)
+		gotproxystateid = 1;
+	off = fxdr_hyper(tl); tl += 2;
+	lop->lo_first = off;
+	len = fxdr_hyper(tl);
+	if (len < 0)
+		len = OFF_MAX;
+	NFSD_DEBUG(4, "dealloc: off=%jd len=%jd\n", (intmax_t)off,
+	    (intmax_t)len);
+	lop->lo_end = lop->lo_first + len;
+	/*
+	 * Sanity check the offset and length.
+	 * off and len are off_t (signed int64_t) whereas
+	 * lo_first and lo_end are uint64_t and, as such,
+	 * if off >= 0 && len > 0, lo_end cannot overflow
+	 * unless off_t is changed to something other than
+	 * int64_t.  Check lo_end < lo_first in case that
+	 * is someday the case.
+	 * The error to return is not specified by RFC 7862 so I
+	 * made this compatible with the Linux knfsd.
+	 */
+	if (nd->nd_repstat == 0) {
+		if (off < 0 || lop->lo_end > NFSRV_MAXFILESIZE)
+			nd->nd_repstat = NFSERR_FBIG;
+		else if (len == 0 || lop->lo_end < lop->lo_first)
+			nd->nd_repstat = NFSERR_INVAL;
+	}
+
+	if (nd->nd_repstat == 0 && vnode_vtype(vp) != VREG)
+		nd->nd_repstat = NFSERR_WRONGTYPE;
+	NFSZERO_ATTRBIT(&attrbits);
+	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_OWNER);
+	forat_ret = nfsvno_getattr(vp, &forat, nd, curthread, 1, &attrbits);
+	if (nd->nd_repstat == 0)
+		nd->nd_repstat = forat_ret;
+	if (nd->nd_repstat == 0 && (forat.na_uid != nd->nd_cred->cr_uid ||
+	     NFSVNO_EXSTRICTACCESS(exp)))
+		nd->nd_repstat = nfsvno_accchk(vp, VWRITE, nd->nd_cred, exp,
+		    curthread, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED,
+		    NULL);
+	if (nd->nd_repstat == 0 && gotproxystateid == 0)
+		nd->nd_repstat = nfsrv_lockctrl(vp, &stp, &lop, NULL, clientid,
+		    &stateid, exp, nd, curthread);
+
+	if (nd->nd_repstat == 0)
+		nd->nd_repstat = nfsvno_deallocate(vp, off, len, nd->nd_cred,
+		    curthread);
+	vput(vp);
+	NFSD_DEBUG(4, "eo deallocate=%d\n", nd->nd_repstat);
+	NFSEXITCODE2(0, nd);
+	return (0);
+nfsmout:
+	vput(vp);
+	NFSEXITCODE2(error, nd);
+	return (error);
+}
+
 /*
  * nfs copy service
  */
diff --git a/sys/fs/nfsserver/nfs_nfsdsocket.c b/sys/fs/nfsserver/nfs_nfsdsocket.c
index 85771974be2f..fdd46b6290e9 100644
--- a/sys/fs/nfsserver/nfs_nfsdsocket.c
+++ b/sys/fs/nfsserver/nfs_nfsdsocket.c
@@ -198,7 +198,7 @@ int (*nfsrv4_ops0[NFSV42_NOPS])(struct nfsrv_descript *,
 	nfsrvd_allocate,
 	(int (*)(struct nfsrv_descript *, int, vnode_t , struct nfsexstuff *))0,
 	nfsrvd_notsupp,
-	nfsrvd_notsupp,
+	nfsrvd_deallocate,
 	nfsrvd_ioadvise,
 	nfsrvd_layouterror,
 	nfsrvd_layoutstats,


More information about the dev-commits-src-all mailing list