git: 00134a07898f - main - fusefs: require FUSE_NO_OPENDIR_SUPPORT for NFS exporting
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Fri, 04 Feb 2022 23:31:43 UTC
The branch main has been updated by asomers:
URL: https://cgit.FreeBSD.org/src/commit/?id=00134a07898fa807b8a1fcb2596f0e3644143f69
commit 00134a07898fa807b8a1fcb2596f0e3644143f69
Author: Alan Somers <asomers@FreeBSD.org>
AuthorDate: 2022-01-03 00:16:09 +0000
Commit: Alan Somers <asomers@FreeBSD.org>
CommitDate: 2022-02-04 23:31:05 +0000
fusefs: require FUSE_NO_OPENDIR_SUPPORT for NFS exporting
FUSE file systems that do not set FUSE_NO_OPENDIR_SUPPORT do not
guarantee that d_off will be valid after closing and reopening a
directory. That conflicts with NFS's statelessness, that results in
unresolvable bugs when NFS reads large directories, if:
* The file system _does_ change the d_off field for the last directory
entry previously returned by VOP_READDIR, or
* The file system deletes the last directory entry previously seen by
NFS.
Rather than doing a poor job of exporting such file systems, it's better
just to refuse.
Even though this is technically a breaking change, 13.0-RELEASE's
NFS-FUSE support was bad enough that an MFC should be allowed.
MFC after: 3 weeks.
Reviewed by: rmacklem
Differential Revision: https://reviews.freebsd.org/D33726
---
sys/fs/fuse/fuse_internal.c | 84 +++++++++++++++------------------------------
sys/fs/fuse/fuse_internal.h | 8 ++---
sys/fs/fuse/fuse_vnops.c | 50 +++++++++++++++++----------
3 files changed, 64 insertions(+), 78 deletions(-)
diff --git a/sys/fs/fuse/fuse_internal.c b/sys/fs/fuse/fuse_internal.c
index eb8f1f87d90f..8f5cbb2d86ae 100644
--- a/sys/fs/fuse/fuse_internal.c
+++ b/sys/fs/fuse/fuse_internal.c
@@ -553,7 +553,6 @@ fuse_internal_mknod(struct vnode *dvp, struct vnode **vpp,
int
fuse_internal_readdir(struct vnode *vp,
struct uio *uio,
- off_t startoff,
struct fuse_filehandle *fufh,
struct fuse_iov *cookediov,
int *ncookies,
@@ -562,7 +561,6 @@ fuse_internal_readdir(struct vnode *vp,
int err = 0;
struct fuse_dispatcher fdi;
struct fuse_read_in *fri = NULL;
- int fnd_start;
if (uio_resid(uio) == 0)
return 0;
@@ -572,18 +570,6 @@ fuse_internal_readdir(struct vnode *vp,
* Note that we DO NOT have a UIO_SYSSPACE here (so no need for p2p
* I/O).
*/
-
- /*
- * fnd_start is set non-zero once the offset in the directory gets
- * to the startoff. This is done because directories must be read
- * from the beginning (offset == 0) when fuse_vnop_readdir() needs
- * to do an open of the directory.
- * If it is not set non-zero here, it will be set non-zero in
- * fuse_internal_readdir_processdata() when uio_offset == startoff.
- */
- fnd_start = 0;
- if (uio->uio_offset == startoff)
- fnd_start = 1;
while (uio_resid(uio) > 0) {
fdi.iosize = sizeof(*fri);
fdisp_make_vp(&fdi, FUSE_READDIR, vp, NULL, NULL);
@@ -595,9 +581,8 @@ fuse_internal_readdir(struct vnode *vp,
if ((err = fdisp_wait_answ(&fdi)))
break;
- if ((err = fuse_internal_readdir_processdata(uio, startoff,
- &fnd_start, fri->size, fdi.answ, fdi.iosize, cookediov,
- ncookies, &cookies)))
+ if ((err = fuse_internal_readdir_processdata(uio, fri->size,
+ fdi.answ, fdi.iosize, cookediov, ncookies, &cookies)))
break;
}
@@ -612,8 +597,6 @@ fuse_internal_readdir(struct vnode *vp,
*/
int
fuse_internal_readdir_processdata(struct uio *uio,
- off_t startoff,
- int *fnd_start,
size_t reqsize,
void *buf,
size_t bufsize,
@@ -624,8 +607,6 @@ fuse_internal_readdir_processdata(struct uio *uio,
int err = 0;
int oreclen;
size_t freclen;
- int ents_copied = 0;
- int ents_seen = 0;
struct dirent *de;
struct fuse_dirent *fudge;
@@ -636,7 +617,7 @@ fuse_internal_readdir_processdata(struct uio *uio,
return -1;
for (;;) {
if (bufsize < FUSE_NAME_OFFSET) {
- err = (ents_seen == 0 || ents_copied > 0) ? -1 : 0;
+ err = -1;
break;
}
fudge = (struct fuse_dirent *)buf;
@@ -647,7 +628,7 @@ fuse_internal_readdir_processdata(struct uio *uio,
* This indicates a partial directory entry at the
* end of the directory data.
*/
- err = (ents_seen == 0 || ents_copied > 0) ? -1 : 0;
+ err = -1;
break;
}
#ifdef ZERO_PAD_INCOMPLETE_BUFS
@@ -669,41 +650,32 @@ fuse_internal_readdir_processdata(struct uio *uio,
err = -1;
break;
}
- ents_seen++;
- /*
- * Don't start to copy the directory entries out until
- * the requested offset in the directory is found.
- */
- if (*fnd_start != 0) {
- fiov_adjust(cookediov, oreclen);
- bzero(cookediov->base, oreclen);
-
- de = (struct dirent *)cookediov->base;
- de->d_fileno = fudge->ino;
- de->d_off = fudge->off;
- de->d_reclen = oreclen;
- de->d_type = fudge->type;
- de->d_namlen = fudge->namelen;
- memcpy((char *)cookediov->base + sizeof(struct dirent) -
- MAXNAMLEN - 1,
- (char *)buf + FUSE_NAME_OFFSET, fudge->namelen);
- dirent_terminate(de);
-
- err = uiomove(cookediov->base, cookediov->len, uio);
- if (err)
+ fiov_adjust(cookediov, oreclen);
+ bzero(cookediov->base, oreclen);
+
+ de = (struct dirent *)cookediov->base;
+ de->d_fileno = fudge->ino;
+ de->d_off = fudge->off;
+ de->d_reclen = oreclen;
+ de->d_type = fudge->type;
+ de->d_namlen = fudge->namelen;
+ memcpy((char *)cookediov->base + sizeof(struct dirent) -
+ MAXNAMLEN - 1,
+ (char *)buf + FUSE_NAME_OFFSET, fudge->namelen);
+ dirent_terminate(de);
+
+ err = uiomove(cookediov->base, cookediov->len, uio);
+ if (err)
+ break;
+ if (cookies != NULL) {
+ if (*ncookies == 0) {
+ err = -1;
break;
- if (cookies != NULL) {
- if (*ncookies == 0) {
- err = -1;
- break;
- }
- *cookies = fudge->off;
- cookies++;
- (*ncookies)--;
}
- ents_copied++;
- } else if (startoff == fudge->off)
- *fnd_start = 1;
+ *cookies = fudge->off;
+ cookies++;
+ (*ncookies)--;
+ }
buf = (char *)buf + freclen;
bufsize -= freclen;
uio_setoffset(uio, fudge->off);
diff --git a/sys/fs/fuse/fuse_internal.h b/sys/fs/fuse/fuse_internal.h
index c17eff2acac3..5d852b420366 100644
--- a/sys/fs/fuse/fuse_internal.h
+++ b/sys/fs/fuse/fuse_internal.h
@@ -250,12 +250,12 @@ int fuse_internal_mknod(struct vnode *dvp, struct vnode **vpp,
struct pseudo_dirent {
uint32_t d_namlen;
};
-int fuse_internal_readdir(struct vnode *vp, struct uio *uio, off_t startoff,
+int fuse_internal_readdir(struct vnode *vp, struct uio *uio,
struct fuse_filehandle *fufh, struct fuse_iov *cookediov, int *ncookies,
uint64_t *cookies);
-int fuse_internal_readdir_processdata(struct uio *uio, off_t startoff,
- int *fnd_start, size_t reqsize, void *buf, size_t bufsize,
- struct fuse_iov *cookediov, int *ncookies, uint64_t **cookiesp);
+int fuse_internal_readdir_processdata(struct uio *uio, size_t reqsize,
+ void *buf, size_t bufsize, struct fuse_iov *cookediov, int *ncookies,
+ uint64_t **cookiesp);
/* remove */
diff --git a/sys/fs/fuse/fuse_vnops.c b/sys/fs/fuse/fuse_vnops.c
index da925b5dcbb5..10d64390123d 100644
--- a/sys/fs/fuse/fuse_vnops.c
+++ b/sys/fs/fuse/fuse_vnops.c
@@ -1846,11 +1846,10 @@ fuse_vnop_readdir(struct vop_readdir_args *ap)
struct uio *uio = ap->a_uio;
struct ucred *cred = ap->a_cred;
struct fuse_filehandle *fufh = NULL;
- struct fuse_data *mpdata = fuse_get_mpdata(vnode_mount(vp));
+ struct mount *mp = vnode_mount(vp);
struct fuse_iov cookediov;
int err = 0;
uint64_t *cookies;
- off_t startoff;
ssize_t tresid;
int ncookies;
bool closefufh = false;
@@ -1867,25 +1866,18 @@ fuse_vnop_readdir(struct vop_readdir_args *ap)
}
tresid = uio->uio_resid;
- startoff = uio->uio_offset;
err = fuse_filehandle_get_dir(vp, &fufh, cred, pid);
- if (err == EBADF && vnode_mount(vp)->mnt_flag & MNT_EXPORTED) {
+ if (err == EBADF && mp->mnt_flag & MNT_EXPORTED) {
+ struct fuse_data *data = fuse_get_mpdata(mp);
+
+ KASSERT(data->dataflags & FSESS_NO_OPENDIR_SUPPORT,
+ ("FUSE file systems that don't set "
+ "FUSE_NO_OPENDIR_SUPPORT should not be exported"));
/*
* nfsd will do VOP_READDIR without first doing VOP_OPEN. We
- * must implicitly open the directory here
+ * must implicitly open the directory here.
*/
err = fuse_filehandle_open(vp, FREAD, &fufh, curthread, cred);
- if (err == 0 && !(mpdata->dataflags & FSESS_NO_OPEN_SUPPORT)) {
- /*
- * FUSE does not require a directory entry's d_off
- * field to be valid outside of the lifetime of the
- * directory's FUSE file handle. So we must read the
- * directory from the beginning. However, if the file
- * system sets FUSE_NO_OPENDIR_SUPPORT, then the d_off
- * field will be valid for the lifetime of the dirent.
- */
- uio->uio_offset = 0;
- }
closefufh = true;
}
if (err)
@@ -1903,7 +1895,7 @@ fuse_vnop_readdir(struct vop_readdir_args *ap)
#define DIRCOOKEDSIZE FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + MAXNAMLEN + 1)
fiov_init(&cookediov, DIRCOOKEDSIZE);
- err = fuse_internal_readdir(vp, uio, startoff, fufh, &cookediov,
+ err = fuse_internal_readdir(vp, uio, fufh, &cookediov,
&ncookies, cookies);
fiov_teardown(&cookediov);
@@ -3108,8 +3100,30 @@ fuse_vnop_vptofh(struct vop_vptofh_args *ap)
struct vattr va;
int err;
- if (!(data->dataflags & FSESS_EXPORT_SUPPORT))
+ if (!(data->dataflags & FSESS_EXPORT_SUPPORT)) {
+ /* NFS requires lookups for "." and ".." */
+ SDT_PROBE2(fusefs, , vnops, trace, 1,
+ "VOP_VPTOFH without FUSE_EXPORT_SUPPORT");
return EOPNOTSUPP;
+ }
+ if ((mp->mnt_flag & MNT_EXPORTED) &&
+ !(data->dataflags & FSESS_NO_OPENDIR_SUPPORT))
+ {
+ /*
+ * NFS is stateless, so nfsd must reopen a directory on every
+ * call to VOP_READDIR, passing in the d_off field from the
+ * final dirent of the previous invocation. But without
+ * FUSE_NO_OPENDIR_SUPPORT, the FUSE protocol does not
+ * guarantee that d_off will be valid after a directory is
+ * closed and reopened. So prohibit exporting FUSE file
+ * systems that don't set that flag.
+ *
+ * But userspace NFS servers don't have this problem.
+ */
+ SDT_PROBE2(fusefs, , vnops, trace, 1,
+ "VOP_VPTOFH without FUSE_NO_OPENDIR_SUPPORT");
+ return EOPNOTSUPP;
+ }
err = fuse_internal_getattr(vp, &va, curthread->td_ucred, curthread);
if (err)