git: 7e68af7ce2c1 - main - fusefs: redo vnode attribute locking
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Thu, 12 Mar 2026 16:11:55 UTC
The branch main has been updated by asomers:
URL: https://cgit.FreeBSD.org/src/commit/?id=7e68af7ce2c1b892954df415774fe59fd2f1b62f
commit 7e68af7ce2c1b892954df415774fe59fd2f1b62f
Author: Alan Somers <asomers@FreeBSD.org>
AuthorDate: 2026-01-23 21:23:51 +0000
Commit: Alan Somers <asomers@FreeBSD.org>
CommitDate: 2026-03-12 16:11:25 +0000
fusefs: redo vnode attribute locking
Previously most fields in fuse_vnode_data were protected by the vnode
lock. But because DEBUG_VFS_LOCKS was never enabled by default until
stable/15 the assertions were never checked, and many were wrong.
Others were missing. This led to panics in stable/15 and 16.0-CURRENT,
when a vnode was expected to be exclusively locked but wasn't, for fuse
file systems that mount with "-o async".
In some places it isn't possible to exclusively lock the vnode when
accessing these fields. So protect them with a new mutex instead. This
fixes panics and unprotected field accesses in VOP_READ,
VOP_COPY_FILE_RANGE, VOP_GETATTR, VOP_BMAP, and FUSE_NOTIFY_INVAL_ENTRY.
Add assertions everywhere the protected fields are accessed.
Lock the vnode exclusively when handling FUSE_NOTIFY_INVAL_INODE.
During fuse_vnode_setsize, if the vnode isn't already exclusively
locked, use the vn_delayed_setsize mechanism. This fixes panics during
VOP_READ or VOP_GETATTR.
Also, ensure that fuse_vnop_rename locks the "from" vnode.
Finally, reorder elements in struct fuse_vnode_data to reduce the
structure size.
Fixes: 283391
Reported by: kargl, markj, vishwin, Abdelkader Boudih, groenveld@acm.org
MFC after: 2 weeks
Sponsored by: ConnectWise
Reviewed by: kib
Differential Revision: https://reviews.freebsd.org/D55230
---
sys/fs/fuse/fuse_file.c | 8 +-
sys/fs/fuse/fuse_internal.c | 42 +++++----
sys/fs/fuse/fuse_io.c | 21 ++++-
sys/fs/fuse/fuse_node.c | 89 ++++++++++++++++----
sys/fs/fuse/fuse_node.h | 91 +++++++++++++++++---
sys/fs/fuse/fuse_vfsops.c | 2 +
sys/fs/fuse/fuse_vnops.c | 81 ++++++++++++++++--
tests/sys/fs/fusefs/bmap.cc | 34 +++++---
tests/sys/fs/fusefs/notify.cc | 38 ++++++---
tests/sys/fs/fusefs/read.cc | 192 ++++++++++++++++++++++++++++++++++++++++++
tests/sys/fs/fusefs/rename.cc | 90 ++++++++++++++++++++
11 files changed, 609 insertions(+), 79 deletions(-)
diff --git a/sys/fs/fuse/fuse_file.c b/sys/fs/fuse/fuse_file.c
index 5f5819c2ccae..2cb8ef84e511 100644
--- a/sys/fs/fuse/fuse_file.c
+++ b/sys/fs/fuse/fuse_file.c
@@ -194,6 +194,8 @@ fuse_filehandle_close(struct vnode *vp, struct fuse_filehandle *fufh,
int err = 0;
int op = FUSE_RELEASE;
+ ASSERT_VOP_ELOCKED(vp, __func__);
+
if (fuse_isdeadfs(vp)) {
goto out;
}
@@ -381,7 +383,11 @@ fuse_filehandle_init(struct vnode *vp, fufh_type_t fufh_type,
} else {
if ((foo->open_flags & FOPEN_KEEP_CACHE) == 0)
fuse_io_invalbuf(vp, td);
- VTOFUD(vp)->flag &= ~FN_DIRECTIO;
+ /*
+ * XXX Update the flag without the lock for now. See
+ * https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=293088
+ */
+ VTOFUD(vp)->flag &= ~FN_DIRECTIO;
}
}
diff --git a/sys/fs/fuse/fuse_internal.c b/sys/fs/fuse/fuse_internal.c
index a3590060f44a..914deb416c08 100644
--- a/sys/fs/fuse/fuse_internal.c
+++ b/sys/fs/fuse/fuse_internal.c
@@ -262,7 +262,7 @@ fuse_internal_cache_attrs(struct vnode *vp, struct fuse_attr *attr,
fvdat = VTOFUD(vp);
data = fuse_get_mpdata(mp);
- ASSERT_VOP_ELOCKED(vp, "fuse_internal_cache_attrs");
+ ASSERT_CACHED_ATTRS_LOCKED(vp);
fuse_validity_2_bintime(attr_valid, attr_valid_nsec,
&fvdat->attr_cache_timeout);
@@ -478,7 +478,9 @@ fuse_internal_invalidate_entry(struct mount *mp, struct uio *uio)
cn.cn_namelen = fnieo.namelen;
err = cache_lookup(dvp, &vp, &cn, NULL, NULL);
MPASS(err == 0);
+ CACHED_ATTR_LOCK(dvp);
fuse_vnode_clear_attr_cache(dvp);
+ CACHED_ATTR_UNLOCK(dvp);
vput(dvp);
return (0);
}
@@ -498,8 +500,8 @@ fuse_internal_invalidate_inode(struct mount *mp, struct uio *uio)
if (fniio.ino == FUSE_ROOT_ID)
err = VFS_ROOT(mp, LK_EXCLUSIVE, &vp);
else
- err = fuse_internal_get_cached_vnode(mp, fniio.ino, LK_SHARED,
- &vp);
+ err = fuse_internal_get_cached_vnode(mp, fniio.ino,
+ LK_EXCLUSIVE, &vp);
SDT_PROBE2(fusefs, , internal, invalidate_inode, vp, &fniio);
if (err != 0 || vp == NULL)
return (err);
@@ -694,6 +696,8 @@ fuse_internal_remove(struct vnode *dvp,
nlink_t nlink;
int err = 0;
+ ASSERT_CACHED_ATTRS_LOCKED(vp);
+
fdisp_init(&fdi, cnp->cn_namelen + 1);
fdisp_make_vp(&fdi, op, dvp, curthread, cnp->cn_cred);
@@ -891,15 +895,9 @@ fuse_internal_do_getattr(struct vnode *vp, struct vattr *vap,
struct fuse_vnode_data *fvdat = VTOFUD(vp);
struct fuse_getattr_in *fgai;
struct fuse_attr_out *fao;
- off_t old_filesize = fvdat->cached_attrs.va_size;
- struct timespec old_atime = fvdat->cached_attrs.va_atime;
- struct timespec old_ctime = fvdat->cached_attrs.va_ctime;
- struct timespec old_mtime = fvdat->cached_attrs.va_mtime;
__enum_uint8(vtype) vtyp;
int err;
- ASSERT_VOP_LOCKED(vp, __func__);
-
fdisp_init(&fdi, sizeof(*fgai));
fdisp_make_vp(&fdi, FUSE_GETATTR, vp, td, cred);
fgai = fdi.indata;
@@ -917,22 +915,27 @@ fuse_internal_do_getattr(struct vnode *vp, struct vattr *vap,
fao = (struct fuse_attr_out *)fdi.answ;
vtyp = IFTOVT(fao->attr.mode);
+
+ CACHED_ATTR_LOCK(vp);
if (fvdat->flag & FN_SIZECHANGE)
- fao->attr.size = old_filesize;
+ fao->attr.size = fvdat->cached_attrs.va_size;
if (fvdat->flag & FN_ATIMECHANGE) {
- fao->attr.atime = old_atime.tv_sec;
- fao->attr.atimensec = old_atime.tv_nsec;
+ fao->attr.atime = fvdat->cached_attrs.va_atime.tv_sec;
+ fao->attr.atimensec = fvdat->cached_attrs.va_atime.tv_nsec;
}
if (fvdat->flag & FN_CTIMECHANGE) {
- fao->attr.ctime = old_ctime.tv_sec;
- fao->attr.ctimensec = old_ctime.tv_nsec;
+ fao->attr.ctime = fvdat->cached_attrs.va_ctime.tv_sec;
+ fao->attr.ctimensec = fvdat->cached_attrs.va_ctime.tv_nsec;
}
if (fvdat->flag & FN_MTIMECHANGE) {
- fao->attr.mtime = old_mtime.tv_sec;
- fao->attr.mtimensec = old_mtime.tv_nsec;
+ fao->attr.mtime = fvdat->cached_attrs.va_mtime.tv_sec;
+ fao->attr.mtimensec = fvdat->cached_attrs.va_mtime.tv_nsec;
}
+
fuse_internal_cache_attrs(vp, &fao->attr, fao->attr_valid,
fao->attr_valid_nsec, vap, true);
+
+ CACHED_ATTR_UNLOCK(vp);
if (vtyp != vnode_vtype(vp)) {
fuse_internal_vnode_disappear(vp);
err = ENOENT;
@@ -950,10 +953,13 @@ fuse_internal_getattr(struct vnode *vp, struct vattr *vap, struct ucred *cred,
{
struct vattr *attrs;
+ CACHED_ATTR_LOCK(vp);
if ((attrs = VTOVA(vp)) != NULL) {
*vap = *attrs; /* struct copy */
+ CACHED_ATTR_UNLOCK(vp);
return 0;
- }
+ } else
+ CACHED_ATTR_UNLOCK(vp);
return fuse_internal_do_getattr(vp, vap, cred, td);
}
@@ -1140,7 +1146,7 @@ int fuse_internal_setattr(struct vnode *vp, struct vattr *vap,
int err = 0;
__enum_uint8(vtype) vtyp;
- ASSERT_VOP_ELOCKED(vp, __func__);
+ ASSERT_CACHED_ATTRS_LOCKED(vp);
mp = vnode_mount(vp);
fvdat = VTOFUD(vp);
diff --git a/sys/fs/fuse/fuse_io.c b/sys/fs/fuse/fuse_io.c
index 0760d7641c7d..9f864e48effc 100644
--- a/sys/fs/fuse/fuse_io.c
+++ b/sys/fs/fuse/fuse_io.c
@@ -401,6 +401,7 @@ retry:
fuse_warn(data, FSESS_WARN_WROTE_LONG,
"wrote more data than we provided it.");
/* This is bonkers. Clear attr cache. */
+ ASSERT_CACHED_ATTRS_LOCKED(vp);
fvdat->flag &= ~FN_SIZECHANGE;
fuse_vnode_clear_attr_cache(vp);
err = EINVAL;
@@ -416,8 +417,10 @@ retry:
fuse_vnode_setsize(vp, as_written_offset, false);
getnanouptime(&fvdat->last_local_modify);
}
- if (as_written_offset - diff >= filesize)
+ if (as_written_offset - diff >= filesize) {
+ ASSERT_CACHED_ATTRS_LOCKED(vp);
fvdat->flag &= ~FN_SIZECHANGE;
+ }
if (diff > 0) {
/* Short write */
@@ -454,8 +457,11 @@ retry:
fdisp_destroy(&fdi);
- if (wrote_anything)
+ if (wrote_anything) {
+ CACHED_ATTR_LOCK(vp);
fuse_vnode_undirty_cached_timestamps(vp, false);
+ CACHED_ATTR_UNLOCK(vp);
+ }
vn_rlimit_fsizex_res(uio, r);
return (err);
@@ -556,6 +562,7 @@ again:
err = fuse_vnode_setsize(vp, uio->uio_offset + n, false);
filesize = uio->uio_offset + n;
getnanouptime(&fvdat->last_local_modify);
+ ASSERT_CACHED_ATTRS_LOCKED(vp);
fvdat->flag |= FN_SIZECHANGE;
if (err) {
brelse(bp);
@@ -806,6 +813,7 @@ fuse_io_strategy(struct vnode *vp, struct buf *bp)
left = uiop->uio_resid;
bzero((char *)bp->b_data + nread, left);
+ CACHED_ATTR_LOCK(vp);
if ((fvdat->flag & FN_SIZECHANGE) == 0) {
/*
* A short read with no error, when not using
@@ -838,6 +846,7 @@ fuse_io_strategy(struct vnode *vp, struct buf *bp)
"Short read of a dirty file");
uiop->uio_resid = 0;
}
+ CACHED_ATTR_UNLOCK(vp);
}
if (error) {
bp->b_ioflags |= BIO_ERROR;
@@ -855,10 +864,18 @@ fuse_io_strategy(struct vnode *vp, struct buf *bp)
* anything about it. In particular, we can't invalidate any
* part of the file's buffers because VOP_STRATEGY is called
* with them already locked.
+ *
+ * Normally the vnode should be exclusively locked at this
+ * point. However, if clustered reads are in use, then in a
+ * mixed read-write workload getblkx may need to flush a
+ * partially written buffer to disk during a read. In such a
+ * case, the vnode may only have a shared lock at this point.
*/
+ CACHED_ATTR_LOCK(vp);
filesize = fvdat->cached_attrs.va_size;
/* filesize must've been cached by fuse_vnop_open. */
KASSERT(filesize != VNOVAL, ("filesize should've been cached"));
+ CACHED_ATTR_UNLOCK(vp);
if ((off_t)bp->b_lblkno * biosize + bp->b_dirtyend > filesize)
bp->b_dirtyend = filesize -
diff --git a/sys/fs/fuse/fuse_node.c b/sys/fs/fuse/fuse_node.c
index 742dc66bcafc..f4fb993a7ca1 100644
--- a/sys/fs/fuse/fuse_node.c
+++ b/sys/fs/fuse/fuse_node.c
@@ -157,6 +157,8 @@ fuse_vnode_init(struct vnode *vp, struct fuse_vnode_data *fvdat,
fvdat->nid = nodeid;
LIST_INIT(&fvdat->handles);
+ mtx_init(&fvdat->cached_attr_mtx, "fuse attr cache mutex", NULL,
+ MTX_DEF);
vattr_null(&fvdat->cached_attrs);
fvdat->cached_attrs.va_birthtime.tv_sec = -1;
fvdat->cached_attrs.va_birthtime.tv_nsec = 0;
@@ -181,6 +183,7 @@ fuse_vnode_destroy(struct vnode *vp)
struct fuse_vnode_data *fvdat = vp->v_data;
vp->v_data = NULL;
+ mtx_destroy(&fvdat->cached_attr_mtx);
KASSERT(LIST_EMPTY(&fvdat->handles),
("Destroying fuse vnode with open files!"));
free(fvdat, M_FUSEVN);
@@ -386,7 +389,8 @@ fuse_vnode_savesize(struct vnode *vp, struct ucred *cred, pid_t pid)
struct fuse_setattr_in *fsai;
int err = 0;
- ASSERT_VOP_ELOCKED(vp, "fuse_io_extend");
+ ASSERT_VOP_ELOCKED(vp, __func__); /* For flag and last_local_modify */
+ ASSERT_CACHED_ATTRS_LOCKED(vp);
if (fuse_isdeadfs(vp)) {
return EBADF;
@@ -439,10 +443,10 @@ fuse_vnode_setsize(struct vnode *vp, off_t newsize, bool from_server)
struct vattr *attrs;
off_t oldsize;
size_t iosize;
- struct buf *bp = NULL;
int err = 0;
- ASSERT_VOP_ELOCKED(vp, "fuse_vnode_setsize");
+ ASSERT_VOP_LOCKED(vp, __func__);
+ ASSERT_CACHED_ATTRS_LOCKED(vp);
iosize = fuse_iosize(vp);
oldsize = fvdat->cached_attrs.va_size;
@@ -450,7 +454,45 @@ fuse_vnode_setsize(struct vnode *vp, off_t newsize, bool from_server)
if ((attrs = VTOVA(vp)) != NULL)
attrs->va_size = newsize;
- if (newsize < oldsize) {
+ if (from_server && newsize > oldsize && oldsize != VNOVAL) {
+ /*
+ * The FUSE server changed the file size behind our back. We
+ * should invalidate the entire cache.
+ */
+ daddr_t end_lbn;
+
+ end_lbn = howmany(newsize, iosize);
+ v_inval_buf_range(vp, 0, end_lbn, iosize);
+ }
+
+ if (VOP_ISLOCKED(vp) == LK_EXCLUSIVE) {
+ err = fuse_vnode_setsize_immediate(vp, newsize < oldsize);
+ } else {
+ /* Without an exclusive vnode lock, we must defer the operation */
+ fvdat->flag |= FN_DELAYED_TRUNCATE;
+ vn_delayed_setsize(vp);
+ }
+
+ return err;
+}
+
+/* Immediately set the vnode's size in the pager */
+int
+fuse_vnode_setsize_immediate(struct vnode *vp, bool shrink)
+{
+ struct fuse_vnode_data *fvdat = VTOFUD(vp);
+ struct buf *bp = NULL;
+ size_t iosize;
+ off_t newsize;
+ int err = 0;
+
+ MPASS(fvdat);
+ ASSERT_VOP_ELOCKED(vp, __func__);
+
+ iosize = fuse_iosize(vp);
+ newsize = fvdat->cached_attrs.va_size;
+
+ if (shrink) {
daddr_t lbn;
err = vtruncbuf(vp, newsize, fuse_iosize(vp));
@@ -474,21 +516,13 @@ fuse_vnode_setsize(struct vnode *vp, off_t newsize, bool from_server)
MPASS(bp->b_flags & B_VMIO);
vfs_bio_clrbuf(bp);
bp->b_dirtyend = MIN(bp->b_dirtyend, newsize - lbn * iosize);
- } else if (from_server && newsize > oldsize && oldsize != VNOVAL) {
- /*
- * The FUSE server changed the file size behind our back. We
- * should invalidate the entire cache.
- */
- daddr_t end_lbn;
-
- end_lbn = howmany(newsize, iosize);
- v_inval_buf_range(vp, 0, end_lbn, iosize);
}
out:
if (bp)
brelse(bp);
vnode_pager_setsize(vp, newsize);
- return err;
+
+ return (err);
}
/* Get the current, possibly dirty, size of the file */
@@ -497,15 +531,28 @@ fuse_vnode_size(struct vnode *vp, off_t *filesize, struct ucred *cred,
struct thread *td)
{
struct fuse_vnode_data *fvdat = VTOFUD(vp);
+ struct vattr va;
int error = 0;
+ ASSERT_VOP_LOCKED(vp, __func__);
+
+ CACHED_ATTR_LOCK(vp);
if (!(fvdat->flag & FN_SIZECHANGE) &&
(!fuse_vnode_attr_cache_valid(vp) ||
- fvdat->cached_attrs.va_size == VNOVAL))
- error = fuse_internal_do_getattr(vp, NULL, cred, td);
-
- if (!error)
+ fvdat->cached_attrs.va_size == VNOVAL)) {
+ CACHED_ATTR_UNLOCK(vp);
+ /*
+ * It incurs a large struct copy, but we supply &va so we don't
+ * have to acquire the lock a second time after
+ * fuse_internal_do_getattr returns.
+ */
+ error = fuse_internal_do_getattr(vp, &va, cred, td);
+ if (!error)
+ *filesize = va.va_size;
+ } else {
*filesize = fvdat->cached_attrs.va_size;
+ CACHED_ATTR_UNLOCK(vp);
+ }
return error;
}
@@ -515,6 +562,8 @@ fuse_vnode_undirty_cached_timestamps(struct vnode *vp, bool atime)
{
struct fuse_vnode_data *fvdat = VTOFUD(vp);
+ ASSERT_CACHED_ATTRS_LOCKED(vp);
+
fvdat->flag &= ~(FN_MTIMECHANGE | FN_CTIMECHANGE);
if (atime)
fvdat->flag &= ~FN_ATIMECHANGE;
@@ -537,6 +586,8 @@ fuse_vnode_update(struct vnode *vp, int flags)
if (mp->mnt_flag & MNT_NOATIME)
flags &= ~FN_ATIMECHANGE;
+ CACHED_ATTR_LOCK(vp);
+
if (flags & FN_ATIMECHANGE)
fvdat->cached_attrs.va_atime = ts;
if (flags & FN_MTIMECHANGE)
@@ -545,6 +596,8 @@ fuse_vnode_update(struct vnode *vp, int flags)
fvdat->cached_attrs.va_ctime = ts;
fvdat->flag |= flags;
+
+ CACHED_ATTR_UNLOCK(vp);
}
void
diff --git a/sys/fs/fuse/fuse_node.h b/sys/fs/fuse/fuse_node.h
index 97774de9eeb5..b6e388d01702 100644
--- a/sys/fs/fuse/fuse_node.h
+++ b/sys/fs/fuse/fuse_node.h
@@ -79,6 +79,11 @@
* cache_attrs.va_size field does not time out.
*/
#define FN_SIZECHANGE 0x00000100
+/*
+ * Whether I/O to this vnode should bypass the cache.
+ * XXX BUG: this should be part of the file handle, not the vnode data.
+ * https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=293088
+ */
#define FN_DIRECTIO 0x00000200
/* Indicates that parent_nid is valid */
#define FN_PARENT_NID 0x00000400
@@ -92,38 +97,81 @@
#define FN_CTIMECHANGE 0x00001000
#define FN_ATIMECHANGE 0x00002000
+/* vop_delayed_setsize should truncate the file */
+#define FN_DELAYED_TRUNCATE 0x00004000
+
+#define CACHED_ATTR_LOCK(vp) \
+do { \
+ ASSERT_VOP_LOCKED(vp, __func__); \
+ if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) \
+ mtx_lock(&VTOFUD(vp)->cached_attr_mtx); \
+} while(0)
+
+#define CACHED_ATTR_UNLOCK(vp) \
+do { \
+ ASSERT_VOP_LOCKED(vp, __func__); \
+ if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) \
+ mtx_unlock(&VTOFUD(vp)->cached_attr_mtx); \
+} while(0)
+
struct fuse_vnode_data {
- /** self **/
+ /* self's node id, similar to an inode number. Immutable. */
uint64_t nid;
+ /*
+ * Generation number. Distinguishes files with same nid but that don't
+ * overlap in time. Immutable.
+ */
uint64_t generation;
- /** parent **/
+ /* parent's node id. Protected by the vnode lock. */
uint64_t parent_nid;
/** I/O **/
- /* List of file handles for all of the vnode's open file descriptors */
+ /*
+ * List of file handles for all of the vnode's open file descriptors.
+ * Protected by the vnode lock.
+ */
LIST_HEAD(, fuse_filehandle) handles;
- /** flags **/
- uint32_t flag;
+ /* Protects flag, attr_cache_timeout and cached_attrs */
+ struct mtx cached_attr_mtx;
- /** meta **/
- /* The monotonic time after which the attr cache is invalid */
+ /*
+ * The monotonic time after which the attr cache is invalid
+ * Protected by an exclusive vnode lock or the cached_attr_mtx
+ */
struct bintime attr_cache_timeout;
- /*
+
+ /*
* Monotonic time after which the entry is invalid. Used for lookups
- * by nodeid instead of pathname.
+ * by nodeid instead of pathname. Protected by the vnode lock.
*/
struct bintime entry_cache_timeout;
+
/*
* Monotonic time of the last FUSE operation that modified the file
* size. Used to avoid races between mutator ops like VOP_SETATTR and
- * unlocked accessor ops like VOP_LOOKUP.
+ * unlocked accessor ops like VOP_LOOKUP. Protected by the vnode lock.
*/
struct timespec last_local_modify;
+
+ /* Protected by an exclusive vnode lock or the cached_attr_mtx */
struct vattr cached_attrs;
+
+ /* Number of FUSE_LOOKUPs minus FUSE_FORGETs. Protected by vnode lock */
uint64_t nlookup;
+
+ /*
+ * Misc flags. Protected by an exclusive vnode lock or the
+ * cached_attr_mtx, because some of the flags reflect the contents of
+ * cached_attrs.
+ */
+ uint32_t flag;
+
+ /* Vnode type. Immutable */
__enum_uint8(vtype) vtype;
+
+ /* State for clustered writes. Protected by vnode lock */
struct vn_clusterw clusterw;
};
@@ -141,18 +189,32 @@ struct fuse_fid {
#define VTOFUD(vp) \
((struct fuse_vnode_data *)((vp)->v_data))
#define VTOI(vp) (VTOFUD(vp)->nid)
+
+#define ASSERT_CACHED_ATTRS_LOCKED(vp) \
+do { \
+ ASSERT_VOP_LOCKED(vp, __func__); \
+ VNASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE || \
+ mtx_owned(&VTOFUD(vp)->cached_attr_mtx), vp, \
+ ("cached attrs not locked")); \
+} while(0)
+
static inline bool
fuse_vnode_attr_cache_valid(struct vnode *vp)
{
+ struct fuse_vnode_data *fvdat = VTOFUD(vp);
struct bintime now;
+ ASSERT_CACHED_ATTRS_LOCKED(vp);
+
getbinuptime(&now);
- return (bintime_cmp(&(VTOFUD(vp)->attr_cache_timeout), &now, >));
+ return (bintime_cmp(&fvdat->attr_cache_timeout, &now, >));
}
static inline struct vattr*
VTOVA(struct vnode *vp)
{
+ ASSERT_CACHED_ATTRS_LOCKED(vp);
+
if (fuse_vnode_attr_cache_valid(vp))
return &(VTOFUD(vp)->cached_attrs);
else
@@ -162,6 +224,8 @@ VTOVA(struct vnode *vp)
static inline void
fuse_vnode_clear_attr_cache(struct vnode *vp)
{
+ ASSERT_CACHED_ATTRS_LOCKED(vp);
+
bintime_clear(&VTOFUD(vp)->attr_cache_timeout);
}
@@ -184,10 +248,14 @@ static inline void
fuse_vnode_setparent(struct vnode *vp, struct vnode *dvp)
{
if (dvp != NULL && vp->v_type == VDIR) {
+ ASSERT_VOP_ELOCKED(vp, __func__); /* for parent_nid */
+
MPASS(dvp->v_type == VDIR);
VTOFUD(vp)->parent_nid = VTOI(dvp);
VTOFUD(vp)->flag |= FN_PARENT_NID;
} else {
+ ASSERT_CACHED_ATTRS_LOCKED(vp);
+
VTOFUD(vp)->flag &= ~FN_PARENT_NID;
}
}
@@ -207,6 +275,7 @@ void fuse_vnode_open(struct vnode *vp, int32_t fuse_open_flags,
int fuse_vnode_savesize(struct vnode *vp, struct ucred *cred, pid_t pid);
int fuse_vnode_setsize(struct vnode *vp, off_t newsize, bool from_server);
+int fuse_vnode_setsize_immediate(struct vnode *vp, bool shrink);
void fuse_vnode_undirty_cached_timestamps(struct vnode *vp, bool atime);
diff --git a/sys/fs/fuse/fuse_vfsops.c b/sys/fs/fuse/fuse_vfsops.c
index a5118aa7675f..7b6ad86e4b2b 100644
--- a/sys/fs/fuse/fuse_vfsops.c
+++ b/sys/fs/fuse/fuse_vfsops.c
@@ -601,6 +601,8 @@ fuse_vfsop_vget(struct mount *mp, ino_t ino, int flags, struct vnode **vpp)
error = fuse_vnode_get(mp, feo, nodeid, NULL, vpp, NULL, vtyp);
if (error)
goto out;
+ /* for last_local_modify and fuse_internal_cache_attrs */
+ ASSERT_VOP_ELOCKED(*vpp, __func__);
fvdat = VTOFUD(*vpp);
if (timespeccmp(&now, &fvdat->last_local_modify, >)) {
diff --git a/sys/fs/fuse/fuse_vnops.c b/sys/fs/fuse/fuse_vnops.c
index c66d3bcf01e0..ad7a2dcf84ef 100644
--- a/sys/fs/fuse/fuse_vnops.c
+++ b/sys/fs/fuse/fuse_vnops.c
@@ -134,6 +134,7 @@ static vop_close_t fuse_vnop_close;
static vop_copy_file_range_t fuse_vnop_copy_file_range;
static vop_create_t fuse_vnop_create;
static vop_deallocate_t fuse_vnop_deallocate;
+static vop_delayed_setsize_t fuse_vnop_delayed_setsize;
static vop_deleteextattr_t fuse_vnop_deleteextattr;
static vop_fdatasync_t fuse_vnop_fdatasync;
static vop_fsync_t fuse_vnop_fsync;
@@ -191,6 +192,7 @@ struct vop_vector fuse_vnops = {
.vop_copy_file_range = fuse_vnop_copy_file_range,
.vop_create = fuse_vnop_create,
.vop_deallocate = fuse_vnop_deallocate,
+ .vop_delayed_setsize = fuse_vnop_delayed_setsize,
.vop_deleteextattr = fuse_vnop_deleteextattr,
.vop_fsync = fuse_vnop_fsync,
.vop_fdatasync = fuse_vnop_fdatasync,
@@ -707,6 +709,8 @@ fuse_vnop_allocate(struct vop_allocate_args *ap)
return (EXTERROR(EOPNOTSUPP, "This server does not implement "
"FUSE_FALLOCATE"));
+ ASSERT_CACHED_ATTRS_LOCKED(vp);
+
io.uio_offset = *offset;
io.uio_resid = *len;
err = vn_rlimit_fsize(vp, &io, curthread);
@@ -779,7 +783,7 @@ fuse_vnop_bmap(struct vop_bmap_args *ap)
struct fuse_data *data;
struct fuse_vnode_data *fvdat = VTOFUD(vp);
uint64_t biosize;
- off_t fsize;
+ off_t fsize = VNOVAL;
daddr_t lbn = ap->a_bn;
daddr_t *pbn = ap->a_bnp;
int *runp = ap->a_runp;
@@ -822,9 +826,10 @@ fuse_vnop_bmap(struct vop_bmap_args *ap)
* and the risk of getting it wrong is not worth the cost of
* another upcall.
*/
- if (fvdat->cached_attrs.va_size != VNOVAL)
- fsize = fvdat->cached_attrs.va_size;
- else
+ CACHED_ATTR_LOCK(vp);
+ fsize = fvdat->cached_attrs.va_size;
+ CACHED_ATTR_UNLOCK(vp);
+ if (fsize == VNOVAL)
error = fuse_vnode_size(vp, &fsize, td->td_ucred, td);
if (error == 0)
*runp = MIN(MAX(0, fsize / (off_t)biosize - lbn - 1),
@@ -894,6 +899,7 @@ fuse_vnop_close(struct vop_close_args *ap)
cred = td->td_ucred;
err = fuse_flush(vp, cred, pid, fflag);
+ ASSERT_CACHED_ATTRS_LOCKED(vp); /* For fvdat->flag */
if (err == 0 && (fvdat->flag & FN_ATIMECHANGE) && !vfs_isrdonly(mp)) {
struct vattr vap;
struct fuse_data *data;
@@ -911,6 +917,7 @@ fuse_vnop_close(struct vop_close_args *ap)
}
if (access_e == 0) {
VATTR_NULL(&vap);
+ ASSERT_CACHED_ATTRS_LOCKED(vp);
vap.va_atime = fvdat->cached_attrs.va_atime;
/*
* Ignore errors setting when setting atime. That
@@ -1035,6 +1042,7 @@ fuse_vnop_copy_file_range(struct vop_copy_file_range_args *ap)
*ap->a_inoffp += fwo->size;
*ap->a_outoffp += fwo->size;
fuse_internal_clear_suid_on_write(outvp, outcred, td);
+ ASSERT_CACHED_ATTRS_LOCKED(outvp);
if (*ap->a_outoffp > outfvdat->cached_attrs.va_size) {
fuse_vnode_setsize(outvp, *ap->a_outoffp, false);
getnanouptime(&outfvdat->last_local_modify);
@@ -1337,6 +1345,7 @@ fuse_vnop_inactive(struct vop_inactive_args *ap)
int need_flush = 1;
+ ASSERT_CACHED_ATTRS_LOCKED(vp); /* For fvdat->flag */
LIST_FOREACH_SAFE(fufh, &fvdat->handles, next, fufh_tmp) {
if (need_flush && vp->v_type == VREG) {
if ((VTOFUD(vp)->flag & FN_SIZECHANGE) != 0) {
@@ -1557,6 +1566,7 @@ fuse_vnop_lookup(struct vop_lookup_args *ap)
else if ((err = fuse_internal_access(dvp, VEXEC, td, cred)))
return err;
+ ASSERT_CACHED_ATTRS_LOCKED(dvp); /* For flag */
is_dot = cnp->cn_namelen == 1 && *(cnp->cn_nameptr) == '.';
if (isdotdot && !(data->dataflags & FSESS_EXPORT_SUPPORT)) {
if (!(VTOFUD(dvp)->flag & FN_PARENT_NID)) {
@@ -1960,6 +1970,10 @@ fuse_vnop_read(struct vop_read_args *ap)
"to be closed"));
}
+ /*
+ * XXX Check this flag without the lock. See
+ * https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=293088
+ */
if (VTOFUD(vp)->flag & FN_DIRECTIO) {
ioflag |= IO_DIRECT;
}
@@ -2220,6 +2234,8 @@ fuse_vnop_remove(struct vop_remove_args *ap)
return err;
}
+SDT_PROBE_DEFINE4(fusefs, , vnops, erelookup, "struct vnode*",
+ "struct vnode*", "struct vnode*", "struct vnode*");
/*
struct vnop_rename_args {
struct vnode *a_fdvp;
@@ -2242,6 +2258,7 @@ fuse_vnop_rename(struct vop_rename_args *ap)
struct fuse_data *data;
bool newparent = fdvp != tdvp;
bool isdir = fvp->v_type == VDIR;
+ int locktype;
int err = 0;
if (fuse_isdeadfs(fdvp)) {
@@ -2270,11 +2287,32 @@ fuse_vnop_rename(struct vop_rename_args *ap)
* have write permission to it, so ".." can be modified.
*/
data = fuse_get_mpdata(vnode_mount(tdvp));
+
+ if (tdvp != fdvp)
+ locktype = LK_EXCLUSIVE; /* for fuse_vnode_setparent */
+ else
+ locktype = LK_SHARED;
+
+ /*
+ * Must use LK_NOWAIT to prevent LORs between fvp and tdvp or
+ * tvp
+ */
+ if (vn_lock(fvp, locktype | LK_NOWAIT) != 0) {
+ /*
+ * Can't release tdvp or tvp to try avoiding the LOR.
+ * Must return instead.
+ */
+ SDT_PROBE4(fusefs, , vnops, erelookup, fdvp, fvp, tdvp,
+ tvp);
+ err = ERELOOKUP;
+ goto out;
+ }
+
if (data->dataflags & FSESS_DEFAULT_PERMISSIONS && isdir && newparent) {
err = fuse_internal_access(fvp, VWRITE,
curthread, tcnp->cn_cred);
if (err)
- goto out;
+ goto unlock;
}
err = fuse_internal_rename(fdvp, fcnp, tdvp, tcnp);
if (err == 0) {
@@ -2293,6 +2331,8 @@ fuse_vnop_rename(struct vop_rename_args *ap)
}
cache_purge(fdvp);
}
+unlock:
+ VOP_UNLOCK(fvp);
out:
if (tdvp == tvp) {
vrele(tdvp);
@@ -2568,6 +2608,7 @@ static int
fuse_vnop_write(struct vop_write_args *ap)
{
struct vnode *vp = ap->a_vp;
+ struct fuse_vnode_data *fvdat = VTOFUD(vp);
struct uio *uio = ap->a_uio;
int ioflag = ap->a_ioflag;
struct ucred *cred = ap->a_cred;
@@ -2583,9 +2624,12 @@ fuse_vnop_write(struct vop_write_args *ap)
"to be closed"));
}
- if (VTOFUD(vp)->flag & FN_DIRECTIO) {
+ /*
+ * XXX Check this flag without the lock. See
+ * https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=293088
+ */
+ if (fvdat->flag & FN_DIRECTIO)
ioflag |= IO_DIRECT;
- }
err = fuse_filehandle_getrw(vp, FWRITE, &fufh, cred, pid);
if (err == EBADF && vnode_mount(vp)->mnt_flag & MNT_EXPORTED) {
@@ -3219,6 +3263,29 @@ fallback:
return (vop_stddeallocate(ap));
}
+/*
+ struct vop_delayed_setsize_args {
+ struct vop_generic_args a_gen;
+ struct vnode *a_vp;
+ };
+ */
+static int
+fuse_vnop_delayed_setsize(struct vop_delayed_setsize_args *ap)
+{
+ struct vnode *vp = ap->a_vp;
+ struct fuse_vnode_data *fvdat = VTOFUD(ap->a_vp);
+ bool shrink = (fvdat->flag & FN_DELAYED_TRUNCATE) != 0;
+ int err;
+
+ if (!fvdat)
+ return (0);
+
+ err = fuse_vnode_setsize_immediate(vp, shrink);
+ fvdat->flag &= ~FN_DELAYED_TRUNCATE;
+
+ return (err);
+}
+
/*
struct vop_deleteextattr_args {
struct vop_generic_args a_gen;
diff --git a/tests/sys/fs/fusefs/bmap.cc b/tests/sys/fs/fusefs/bmap.cc
index e61dadb6d79e..622a3c3debcc 100644
--- a/tests/sys/fs/fusefs/bmap.cc
+++ b/tests/sys/fs/fusefs/bmap.cc
@@ -44,10 +44,13 @@ using namespace testing;
const static char FULLPATH[] = "mountpoint/foo";
const static char RELPATH[] = "foo";
-class Bmap: public FuseTest {
+class Bmap: public FuseTest,
+ public WithParamInterface<tuple<int, int>>
+{
public:
virtual void SetUp() {
m_maxreadahead = UINT32_MAX;
+ m_init_flags |= get<0>(GetParam());
FuseTest::SetUp();
}
void expect_bmap(uint64_t ino, uint64_t lbn, uint32_t blocksize, uint64_t pbn)
@@ -73,12 +76,12 @@ void expect_lookup(const char *relpath, uint64_t ino, off_t size)
}
};
-class BmapEof: public Bmap, public WithParamInterface<int> {};
+class BmapEof: public Bmap {};
/*
* Test FUSE_BMAP
*/
-TEST_F(Bmap, bmap)
+TEST_P(Bmap, bmap)
{
struct fiobmap2_arg arg;
/*
@@ -124,7 +127,7 @@ TEST_F(Bmap, bmap)
* If the daemon does not implement VOP_BMAP, fusefs should return sensible
* defaults.
*/
-TEST_F(Bmap, default_)
+TEST_P(Bmap, default_)
{
struct fiobmap2_arg arg;
const off_t filesize = 1 << 30;
@@ -181,7 +184,7 @@ TEST_F(Bmap, default_)
* The server returns an error for some reason for FUSE_BMAP. fusefs should
* faithfully report that error up to the caller.
*/
-TEST_F(Bmap, einval)
+TEST_P(Bmap, einval)
{
struct fiobmap2_arg arg;
const off_t filesize = 1 << 30;
@@ -217,7 +220,7 @@ TEST_F(Bmap, einval)
* https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=264196 . The bug did not
* lie in fusefs, but this is a convenient place for a regression test.
*/
-TEST_F(Bmap, spurious_einval)
+TEST_P(Bmap, spurious_einval)
{
const off_t filesize = 4ull << 30;
const ino_t ino = 42;
@@ -288,7 +291,7 @@ TEST_P(BmapEof, eof)
int fd;
int ngetattrs;
- ngetattrs = GetParam();
+ ngetattrs = get<1>(GetParam());
FuseTest::expect_lookup(RELPATH, ino, mode, filesize, 1, 0);
expect_open(ino, 0, 1);
// Depending on ngetattrs, FUSE_READ could be called with either
@@ -348,6 +351,17 @@ TEST_P(BmapEof, eof)
leak(fd);
}
-INSTANTIATE_TEST_SUITE_P(BE, BmapEof,
- Values(1, 2, 3)
-);
+/*
+ * Try with and without async reads, because it affects the type of vnode lock
+ * on entry to fuse_vnop_bmap.
+ */
+INSTANTIATE_TEST_SUITE_P(B, Bmap, Values(
+ tuple(0, 0),
+ tuple(FUSE_ASYNC_READ, 0)
+));
+
+INSTANTIATE_TEST_SUITE_P(BE, BmapEof, Values(
+ tuple(0, 1),
+ tuple(0, 2),
+ tuple(0, 3)
+));
diff --git a/tests/sys/fs/fusefs/notify.cc b/tests/sys/fs/fusefs/notify.cc
index d370a1e6e706..69742fb2a54b 100644
--- a/tests/sys/fs/fusefs/notify.cc
+++ b/tests/sys/fs/fusefs/notify.cc
@@ -47,8 +47,15 @@ using namespace testing;
* invalidation. This file tests our client's handling of those messages.
*/
-class Notify: public FuseTest {
+class Notify: public FuseTest,
+ public WithParamInterface<int>
+{
public:
+virtual void SetUp() {
+ m_init_flags |= GetParam();
+ FuseTest::SetUp();
+}
+
/* Ignore an optional FUSE_FSYNC */
void maybe_expect_fsync(uint64_t ino)
{
@@ -154,7 +161,7 @@ static void* store(void* arg) {
}
/* Invalidate a nonexistent entry */
-TEST_F(Notify, inval_entry_nonexistent)
+TEST_P(Notify, inval_entry_nonexistent)
{
const static char *name = "foo";
struct inval_entry_args iea;
@@ -173,7 +180,7 @@ TEST_F(Notify, inval_entry_nonexistent)
}
/* Invalidate a cached entry */
-TEST_F(Notify, inval_entry)
+TEST_P(Notify, inval_entry)
{
const static char FULLPATH[] = "mountpoint/foo";
*** 404 LINES SKIPPED ***