git: bb958dcf3d8a - main - nfsd: Add support for the NFSv4.2 Deallocate operation
Rick Macklem
rmacklem at FreeBSD.org
Fri Aug 27 01:19:45 UTC 2021
The branch main has been updated by rmacklem:
URL: https://cgit.FreeBSD.org/src/commit/?id=bb958dcf3d8af3a033dacbf8133681c9b0c73b2f
commit bb958dcf3d8af3a033dacbf8133681c9b0c73b2f
Author: Rick Macklem <rmacklem at FreeBSD.org>
AuthorDate: 2021-08-27 01:14:11 +0000
Commit: Rick Macklem <rmacklem at FreeBSD.org>
CommitDate: 2021-08-27 01:14:11 +0000
nfsd: Add support for the NFSv4.2 Deallocate operation
The recently added VOP_DEALLOCATE(9) VOP call allows
implementation of the Deallocate NFSv4.2 operation.
Since the Deallocate operation is a single succeed/fail
operation, the call to VOP_DEALLOCATE(9) loops so long
as progress is being made. It calls maybe_yield()
between loop iterations to allow other processes
to preempt it.
Where RFC 7862 underspecifies behaviour, the code
is written to be Linux NFSv4.2 server compatible.
Reviewed by: khng
Differential Revision: https://reviews.freebsd.org/D31624
---
sys/fs/nfs/nfs_var.h | 4 +
sys/fs/nfsserver/nfs_nfsdport.c | 201 ++++++++++++++++++++++++++++++++++++++
sys/fs/nfsserver/nfs_nfsdserv.c | 105 ++++++++++++++++++++
sys/fs/nfsserver/nfs_nfsdsocket.c | 2 +-
4 files changed, 311 insertions(+), 1 deletion(-)
diff --git a/sys/fs/nfs/nfs_var.h b/sys/fs/nfs/nfs_var.h
index b4c21c6f7029..5ddae345f906 100644
--- a/sys/fs/nfs/nfs_var.h
+++ b/sys/fs/nfs/nfs_var.h
@@ -282,6 +282,8 @@ int nfsrvd_teststateid(struct nfsrv_descript *, int,
vnode_t, struct nfsexstuff *);
int nfsrvd_allocate(struct nfsrv_descript *, int,
vnode_t, struct nfsexstuff *);
+int nfsrvd_deallocate(struct nfsrv_descript *, int,
+ vnode_t, struct nfsexstuff *);
int nfsrvd_copy_file_range(struct nfsrv_descript *, int,
vnode_t, vnode_t, struct nfsexstuff *, struct nfsexstuff *);
int nfsrvd_seek(struct nfsrv_descript *, int,
@@ -752,6 +754,8 @@ int nfsrv_setacl(struct vnode *, NFSACL_T *, struct ucred *, NFSPROC_T *);
int nfsvno_seek(struct nfsrv_descript *, struct vnode *, u_long, off_t *, int,
bool *, struct ucred *, NFSPROC_T *);
int nfsvno_allocate(struct vnode *, off_t, off_t, struct ucred *, NFSPROC_T *);
+int nfsvno_deallocate(struct vnode *, off_t, off_t, struct ucred *,
+ NFSPROC_T *);
int nfsvno_getxattr(struct vnode *, char *, uint32_t, struct ucred *,
uint64_t, int, struct thread *, struct mbuf **, struct mbuf **, int *);
int nfsvno_setxattr(struct vnode *, char *, int, struct mbuf *, char *,
diff --git a/sys/fs/nfsserver/nfs_nfsdport.c b/sys/fs/nfsserver/nfs_nfsdport.c
index efe9aac7a136..d93c547c5530 100644
--- a/sys/fs/nfsserver/nfs_nfsdport.c
+++ b/sys/fs/nfsserver/nfs_nfsdport.c
@@ -132,6 +132,8 @@ static int nfsrv_writedsrpc(fhandle_t *, off_t, int, struct ucred *,
char *, int *);
static int nfsrv_allocatedsrpc(fhandle_t *, off_t, off_t, struct ucred *,
NFSPROC_T *, struct vnode *, struct nfsmount **, int, int *);
+static int nfsrv_deallocatedsrpc(fhandle_t *, off_t, off_t, struct ucred *,
+ NFSPROC_T *, struct vnode *, struct nfsmount **, int, int *);
static int nfsrv_setacldsrpc(fhandle_t *, struct ucred *, NFSPROC_T *,
struct vnode *, struct nfsmount **, int, struct acl *, int *);
static int nfsrv_setattrdsrpc(fhandle_t *, struct ucred *, NFSPROC_T *,
@@ -4898,6 +4900,9 @@ tryagain:
} else if (ioproc == NFSPROC_ALLOCATE)
error = nfsrv_allocatedsrpc(fh, off, *offp, cred, p, vp,
&nmp[0], mirrorcnt, &failpos);
+ else if (ioproc == NFSPROC_DEALLOCATE)
+ error = nfsrv_deallocatedsrpc(fh, off, *offp, cred, p,
+ vp, &nmp[0], mirrorcnt, &failpos);
else {
error = nfsrv_getattrdsrpc(&fh[mirrorcnt - 1], cred, p,
vp, nmp[mirrorcnt - 1], nap);
@@ -5679,6 +5684,166 @@ nfsrv_allocatedsrpc(fhandle_t *fhp, off_t off, off_t len, struct ucred *cred,
return (error);
}
+/*
+ * Do a deallocate RPC on a DS data file, using this structure for the
+ * arguments, so that this function can be executed by a separate kernel
+ * process.
+ */
+struct nfsrvdeallocatedsdorpc {
+ int done;
+ int inprog;
+ struct task tsk;
+ fhandle_t fh;
+ off_t off;
+ off_t len;
+ struct nfsmount *nmp;
+ struct ucred *cred;
+ NFSPROC_T *p;
+ int err;
+};
+
+static int
+nfsrv_deallocatedsdorpc(struct nfsmount *nmp, fhandle_t *fhp, off_t off,
+ off_t len, struct nfsvattr *nap, struct ucred *cred, NFSPROC_T *p)
+{
+ uint32_t *tl;
+ struct nfsrv_descript *nd;
+ nfsattrbit_t attrbits;
+ nfsv4stateid_t st;
+ int error;
+
+ nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO);
+ nfscl_reqstart(nd, NFSPROC_DEALLOCATE, nmp, (u_int8_t *)fhp,
+ sizeof(fhandle_t), NULL, NULL, 0, 0);
+
+ /*
+ * Use a stateid where other is an alternating 01010 pattern and
+ * seqid is 0xffffffff. This value is not defined as special by
+ * the RFC and is used by the FreeBSD NFS server to indicate an
+ * MDS->DS proxy operation.
+ */
+ st.other[0] = 0x55555555;
+ st.other[1] = 0x55555555;
+ st.other[2] = 0x55555555;
+ st.seqid = 0xffffffff;
+ nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID);
+ NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_UNSIGNED);
+ txdr_hyper(off, tl); tl += 2;
+ txdr_hyper(len, tl); tl += 2;
+ NFSD_DEBUG(4, "nfsrv_deallocatedsdorpc: len=%jd\n", (intmax_t)len);
+
+ *tl = txdr_unsigned(NFSV4OP_GETATTR);
+ NFSGETATTR_ATTRBIT(&attrbits);
+ nfsrv_putattrbit(nd, &attrbits);
+ error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p,
+ cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
+ if (error != 0) {
+ free(nd, M_TEMP);
+ return (error);
+ }
+ NFSD_DEBUG(4, "nfsrv_deallocatedsdorpc: aft allocaterpc=%d\n",
+ nd->nd_repstat);
+ if (nd->nd_repstat == 0) {
+ NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
+ error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL,
+ NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL);
+ } else
+ error = nd->nd_repstat;
+ NFSD_DEBUG(4, "nfsrv_deallocatedsdorpc: aft loadattr=%d\n", error);
+nfsmout:
+ m_freem(nd->nd_mrep);
+ free(nd, M_TEMP);
+ NFSD_DEBUG(4, "nfsrv_deallocatedsdorpc error=%d\n", error);
+ return (error);
+}
+
+/*
+ * Start up the thread that will execute nfsrv_deallocatedsdorpc().
+ */
+static void
+start_deallocatedsdorpc(void *arg, int pending)
+{
+ struct nfsrvdeallocatedsdorpc *drpc;
+
+ drpc = (struct nfsrvdeallocatedsdorpc *)arg;
+ drpc->err = nfsrv_deallocatedsdorpc(drpc->nmp, &drpc->fh, drpc->off,
+ drpc->len, NULL, drpc->cred, drpc->p);
+ drpc->done = 1;
+ NFSD_DEBUG(4, "start_deallocatedsdorpc: err=%d\n", drpc->err);
+}
+
+static int
+nfsrv_deallocatedsrpc(fhandle_t *fhp, off_t off, off_t len, struct ucred *cred,
+ NFSPROC_T *p, struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt,
+ int *failposp)
+{
+ struct nfsrvdeallocatedsdorpc *drpc, *tdrpc = NULL;
+ struct nfsvattr na;
+ int error, i, ret, timo;
+
+ NFSD_DEBUG(4, "in nfsrv_deallocatedsrpc\n");
+ drpc = NULL;
+ if (mirrorcnt > 1)
+ tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP,
+ M_WAITOK);
+
+ /*
+ * Do the deallocate RPC for every DS, using a separate kernel process
+ * for every DS except the last one.
+ */
+ error = 0;
+ for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) {
+ tdrpc->done = 0;
+ NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp));
+ tdrpc->off = off;
+ tdrpc->len = len;
+ tdrpc->nmp = *nmpp;
+ tdrpc->cred = cred;
+ tdrpc->p = p;
+ tdrpc->inprog = 0;
+ tdrpc->err = 0;
+ ret = EIO;
+ if (nfs_pnfsiothreads != 0) {
+ ret = nfs_pnfsio(start_deallocatedsdorpc, tdrpc);
+ NFSD_DEBUG(4, "nfsrv_deallocatedsrpc: nfs_pnfsio=%d\n",
+ ret);
+ }
+ if (ret != 0) {
+ ret = nfsrv_deallocatedsdorpc(*nmpp, fhp, off, len,
+ NULL, cred, p);
+ if (nfsds_failerr(ret) && *failposp == -1)
+ *failposp = i;
+ else if (error == 0 && ret != 0)
+ error = ret;
+ }
+ nmpp++;
+ fhp++;
+ }
+ ret = nfsrv_deallocatedsdorpc(*nmpp, fhp, off, len, &na, cred, p);
+ if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1)
+ *failposp = mirrorcnt - 1;
+ else if (error == 0 && ret != 0)
+ error = ret;
+ if (error == 0)
+ error = nfsrv_setextattr(vp, &na, p);
+ NFSD_DEBUG(4, "nfsrv_deallocatedsrpc: aft setextat=%d\n", error);
+ tdrpc = drpc;
+ timo = hz / 50; /* Wait for 20msec. */
+ if (timo < 1)
+ timo = 1;
+ for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) {
+ /* Wait for RPCs on separate threads to complete. */
+ while (tdrpc->inprog != 0 && tdrpc->done == 0)
+ tsleep(&tdrpc->tsk, PVFS, "srvalds", timo);
+ if (nfsds_failerr(tdrpc->err) && *failposp == -1)
+ *failposp = i;
+ else if (error == 0 && tdrpc->err != 0)
+ error = tdrpc->err;
+ }
+ free(drpc, M_TEMP);
+ return (error);
+}
+
static int
nfsrv_setattrdsdorpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p,
struct vnode *vp, struct nfsmount *nmp, struct nfsvattr *nap,
@@ -6425,6 +6590,42 @@ nfsvno_allocate(struct vnode *vp, off_t off, off_t len, struct ucred *cred,
return (error);
}
+/*
+ * Deallocate vnode op call.
+ */
+int
+nfsvno_deallocate(struct vnode *vp, off_t off, off_t len, struct ucred *cred,
+ NFSPROC_T *p)
+{
+ int error;
+ off_t olen;
+
+ ASSERT_VOP_ELOCKED(vp, "nfsvno_deallocate vp");
+ /*
+ * Attempt to deallocate on a DS file. A return of ENOENT implies
+ * there is no DS file to deallocate on.
+ */
+ error = nfsrv_proxyds(vp, off, 0, cred, p, NFSPROC_DEALLOCATE, NULL,
+ NULL, NULL, NULL, NULL, &len, 0, NULL);
+ if (error != ENOENT)
+ return (error);
+
+ /*
+ * Do the actual VOP_DEALLOCATE(), looping so long as
+ * progress is being made, to achieve completion.
+ */
+ do {
+ olen = len;
+ error = VOP_DEALLOCATE(vp, &off, &len, 0, IO_SYNC, cred);
+ if (error == 0 && len > 0 && olen > len)
+ maybe_yield();
+ } while (error == 0 && len > 0 && olen > len);
+ if (error == 0 && len > 0)
+ error = NFSERR_IO;
+ NFSEXITCODE(error);
+ return (error);
+}
+
/*
* Get Extended Atribute vnode op into an mbuf list.
*/
diff --git a/sys/fs/nfsserver/nfs_nfsdserv.c b/sys/fs/nfsserver/nfs_nfsdserv.c
index 12181d04f1fa..2d4635c75204 100644
--- a/sys/fs/nfsserver/nfs_nfsdserv.c
+++ b/sys/fs/nfsserver/nfs_nfsdserv.c
@@ -5389,6 +5389,111 @@ nfsmout:
return (error);
}
+/*
+ * nfs deallocate service
+ */
+int
+nfsrvd_deallocate(struct nfsrv_descript *nd, __unused int isdgram,
+ vnode_t vp, struct nfsexstuff *exp)
+{
+ uint32_t *tl;
+ struct nfsvattr forat;
+ int error = 0, forat_ret = 1, gotproxystateid;
+ off_t off, len;
+ struct nfsstate st, *stp = &st;
+ struct nfslock lo, *lop = &lo;
+ nfsv4stateid_t stateid;
+ nfsquad_t clientid;
+ nfsattrbit_t attrbits;
+
+ gotproxystateid = 0;
+ NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID + 2 * NFSX_HYPER);
+ stp->ls_flags = (NFSLCK_CHECK | NFSLCK_WRITEACCESS);
+ lop->lo_flags = NFSLCK_WRITE;
+ stp->ls_ownerlen = 0;
+ stp->ls_op = NULL;
+ stp->ls_uid = nd->nd_cred->cr_uid;
+ stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++);
+ clientid.lval[0] = stp->ls_stateid.other[0] = *tl++;
+ clientid.lval[1] = stp->ls_stateid.other[1] = *tl++;
+ if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) {
+ if ((nd->nd_flag & ND_NFSV41) != 0)
+ clientid.qval = nd->nd_clientid.qval;
+ else if (nd->nd_clientid.qval != clientid.qval)
+ printf("EEK2 multiple clids\n");
+ } else {
+ if ((nd->nd_flag & ND_NFSV41) != 0)
+ printf("EEK! no clientid from session\n");
+ nd->nd_flag |= ND_IMPLIEDCLID;
+ nd->nd_clientid.qval = clientid.qval;
+ }
+ stp->ls_stateid.other[2] = *tl++;
+ /*
+ * Don't allow this to be done for a DS.
+ */
+ if ((nd->nd_flag & ND_DSSERVER) != 0)
+ nd->nd_repstat = NFSERR_NOTSUPP;
+ /* However, allow the proxy stateid. */
+ if (stp->ls_stateid.seqid == 0xffffffff &&
+ stp->ls_stateid.other[0] == 0x55555555 &&
+ stp->ls_stateid.other[1] == 0x55555555 &&
+ stp->ls_stateid.other[2] == 0x55555555)
+ gotproxystateid = 1;
+ off = fxdr_hyper(tl); tl += 2;
+ lop->lo_first = off;
+ len = fxdr_hyper(tl);
+ if (len < 0)
+ len = OFF_MAX;
+ NFSD_DEBUG(4, "dealloc: off=%jd len=%jd\n", (intmax_t)off,
+ (intmax_t)len);
+ lop->lo_end = lop->lo_first + len;
+ /*
+ * Sanity check the offset and length.
+ * off and len are off_t (signed int64_t) whereas
+ * lo_first and lo_end are uint64_t and, as such,
+ * if off >= 0 && len > 0, lo_end cannot overflow
+ * unless off_t is changed to something other than
+ * int64_t. Check lo_end < lo_first in case that
+ * is someday the case.
+ * The error to return is not specified by RFC 7862 so I
+ * made this compatible with the Linux knfsd.
+ */
+ if (nd->nd_repstat == 0) {
+ if (off < 0 || lop->lo_end > NFSRV_MAXFILESIZE)
+ nd->nd_repstat = NFSERR_FBIG;
+ else if (len == 0 || lop->lo_end < lop->lo_first)
+ nd->nd_repstat = NFSERR_INVAL;
+ }
+
+ if (nd->nd_repstat == 0 && vnode_vtype(vp) != VREG)
+ nd->nd_repstat = NFSERR_WRONGTYPE;
+ NFSZERO_ATTRBIT(&attrbits);
+ NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_OWNER);
+ forat_ret = nfsvno_getattr(vp, &forat, nd, curthread, 1, &attrbits);
+ if (nd->nd_repstat == 0)
+ nd->nd_repstat = forat_ret;
+ if (nd->nd_repstat == 0 && (forat.na_uid != nd->nd_cred->cr_uid ||
+ NFSVNO_EXSTRICTACCESS(exp)))
+ nd->nd_repstat = nfsvno_accchk(vp, VWRITE, nd->nd_cred, exp,
+ curthread, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED,
+ NULL);
+ if (nd->nd_repstat == 0 && gotproxystateid == 0)
+ nd->nd_repstat = nfsrv_lockctrl(vp, &stp, &lop, NULL, clientid,
+ &stateid, exp, nd, curthread);
+
+ if (nd->nd_repstat == 0)
+ nd->nd_repstat = nfsvno_deallocate(vp, off, len, nd->nd_cred,
+ curthread);
+ vput(vp);
+ NFSD_DEBUG(4, "eo deallocate=%d\n", nd->nd_repstat);
+ NFSEXITCODE2(0, nd);
+ return (0);
+nfsmout:
+ vput(vp);
+ NFSEXITCODE2(error, nd);
+ return (error);
+}
+
/*
* nfs copy service
*/
diff --git a/sys/fs/nfsserver/nfs_nfsdsocket.c b/sys/fs/nfsserver/nfs_nfsdsocket.c
index 85771974be2f..fdd46b6290e9 100644
--- a/sys/fs/nfsserver/nfs_nfsdsocket.c
+++ b/sys/fs/nfsserver/nfs_nfsdsocket.c
@@ -198,7 +198,7 @@ int (*nfsrv4_ops0[NFSV42_NOPS])(struct nfsrv_descript *,
nfsrvd_allocate,
(int (*)(struct nfsrv_descript *, int, vnode_t , struct nfsexstuff *))0,
nfsrvd_notsupp,
- nfsrvd_notsupp,
+ nfsrvd_deallocate,
nfsrvd_ioadvise,
nfsrvd_layouterror,
nfsrvd_layoutstats,
More information about the dev-commits-src-main
mailing list