git: 45117ffcd533 - main - vfs: add VOP_DELAYED_SETSIZE() and related infrastructure
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Thu, 05 Mar 2026 23:48:27 UTC
The branch main has been updated by kib:
URL: https://cgit.FreeBSD.org/src/commit/?id=45117ffcd533ddf995f654db60b10899ae8370ec
commit 45117ffcd533ddf995f654db60b10899ae8370ec
Author: Konstantin Belousov <kib@FreeBSD.org>
AuthorDate: 2026-02-28 16:11:58 +0000
Commit: Konstantin Belousov <kib@FreeBSD.org>
CommitDate: 2026-03-05 23:46:54 +0000
vfs: add VOP_DELAYED_SETSIZE() and related infrastructure
The change generalizes code that was initially developed for nfs client
to handle filesystems that needs to call vnode_pager_setsize() while
only owning the vnode lock shared. Since vnode pager might need to trim
or extend the vnode vm_object' page queue, the vnode lock for the call
must be owned exclusive. This is typical for filesystems with remote
authorative source of file attributes, like nfs/p9/fuse.
Handle the conflict by delaying the vnode_pager_setsize() to the next
vnode locking to avoid relock. But if the next locking request is in
shared mode, lock it exclusively instead, perform the delayed
vnode_pager_setsize() call by doing VOP_DEFAULT_SETSIZE(), and then
downgrade to shared.
Filesystems that opt into the feature must provide the implementation of
VOP_DELAYED_SETSIZE() that actually calls vnode_pager_setsize(), and use
vn_delay_setsize() helper to mark the vnode as requiring the delay call.
Reviewed by: rmacklem
Tested by: pho
Sponsored by: The FreeBSD Foundation
MFC after: 1 week
Differential revision: https://reviews.freebsd.org/D55595
---
sys/fs/deadfs/dead_vnops.c | 1 +
sys/kern/vfs_default.c | 1 +
sys/kern/vfs_vnops.c | 74 +++++++++++++++++++++++++++++++++++++++++++++-
sys/kern/vnode_if.src | 8 +++++
sys/sys/vnode.h | 31 +++++++++++++++++++
5 files changed, 114 insertions(+), 1 deletion(-)
diff --git a/sys/fs/deadfs/dead_vnops.c b/sys/fs/deadfs/dead_vnops.c
index 137c86b65058..b6d6fa55d221 100644
--- a/sys/fs/deadfs/dead_vnops.c
+++ b/sys/fs/deadfs/dead_vnops.c
@@ -80,6 +80,7 @@ struct vop_vector dead_vnodeops = {
.vop_write = dead_write,
.vop_fplookup_vexec = VOP_EOPNOTSUPP,
.vop_fplookup_symlink = VOP_EOPNOTSUPP,
+ .vop_delayed_setsize = VOP_NULL,
};
VFS_VOP_VECTOR_REGISTER(dead_vnodeops);
diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c
index 468d5d18b02b..3151c69d1912 100644
--- a/sys/kern/vfs_default.c
+++ b/sys/kern/vfs_default.c
@@ -147,6 +147,7 @@ struct vop_vector default_vnodeops = {
.vop_add_writecount = vop_stdadd_writecount,
.vop_copy_file_range = vop_stdcopy_file_range,
.vop_vput_pair = vop_stdvput_pair,
+ .vop_delayed_setsize = VOP_PANIC,
};
VFS_VOP_VECTOR_REGISTER(default_vnodeops);
diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c
index a53df50c06bd..24efdf4ac0d5 100644
--- a/sys/kern/vfs_vnops.c
+++ b/sys/kern/vfs_vnops.c
@@ -1959,9 +1959,74 @@ _vn_lock_fallback(struct vnode *vp, int flags, const char *file, int line,
return (0);
}
+static int
+vn_lock_delayed_setsize(struct vop_lock1_args *ap)
+{
+ struct vnode *vp;
+ int error, lktype;
+ bool onfault;
+
+ vp = ap->a_vp;
+ lktype = ap->a_flags & LK_TYPE_MASK;
+ if (vp->v_op == &dead_vnodeops)
+ return (0);
+ VI_LOCK(vp);
+ if ((vp->v_iflag & VI_DELAYEDSSZ) == 0 || (lktype != LK_SHARED &&
+ lktype != LK_EXCLUSIVE && lktype != LK_UPGRADE &&
+ lktype != LK_TRYUPGRADE)) {
+ VI_UNLOCK(vp);
+ return (0);
+ }
+ onfault = (ap->a_flags & LK_EATTR_MASK) == LK_NOWAIT &&
+ (ap->a_flags & LK_INIT_MASK) == LK_CANRECURSE &&
+ (lktype == LK_SHARED || lktype == LK_EXCLUSIVE);
+ if (onfault && vp->v_vnlock->lk_recurse == 0) {
+ /*
+ * Force retry in vm_fault(), to make the lock request
+ * sleepable, which allows us to piggy-back the
+ * sleepable call to vnode_pager_setsize().
+ */
+ VI_UNLOCK(vp);
+ VOP_UNLOCK(vp);
+ return (EBUSY);
+ }
+ if ((ap->a_flags & LK_NOWAIT) != 0 ||
+ (lktype == LK_SHARED && vp->v_vnlock->lk_recurse > 0)) {
+ VI_UNLOCK(vp);
+ return (0);
+ }
+ if (lktype == LK_SHARED) {
+ VOP_UNLOCK(vp);
+ ap->a_flags &= ~LK_TYPE_MASK;
+ ap->a_flags |= LK_EXCLUSIVE | LK_INTERLOCK;
+ error = VOP_LOCK1_APV(&default_vnodeops, ap);
+ if (error != 0 || vp->v_op == &dead_vnodeops)
+ return (error);
+ if (vp->v_data == NULL)
+ goto downgrade;
+ MPASS(vp->v_data != NULL);
+ VI_LOCK(vp);
+ if ((vp->v_iflag & VI_DELAYEDSSZ) == 0) {
+ VI_UNLOCK(vp);
+ goto downgrade;
+ }
+ }
+ vp->v_iflag &= ~VI_DELAYEDSSZ;
+ VI_UNLOCK(vp);
+ VOP_DELAYED_SETSIZE(vp);
+downgrade:
+ if (lktype == LK_SHARED) {
+ ap->a_flags &= ~(LK_TYPE_MASK | LK_INTERLOCK);
+ ap->a_flags |= LK_DOWNGRADE;
+ (void)VOP_LOCK1_APV(&default_vnodeops, ap);
+ }
+ return (0);
+}
+
int
_vn_lock(struct vnode *vp, int flags, const char *file, int line)
{
+ struct vop_lock1_args ap;
int error;
VNASSERT((flags & LK_TYPE_MASK) != 0, vp,
@@ -1970,7 +2035,14 @@ _vn_lock(struct vnode *vp, int flags, const char *file, int line)
error = VOP_LOCK1(vp, flags, file, line);
if (__predict_false(error != 0 || VN_IS_DOOMED(vp)))
return (_vn_lock_fallback(vp, flags, file, line, error));
- return (0);
+ if (__predict_false((vp->v_iflag & VI_DELAYEDSSZ) == 0))
+ return (0);
+ ap.a_gen.a_desc = &vop_lock1_desc;
+ ap.a_vp = vp;
+ ap.a_flags = flags;
+ ap.a_file = file;
+ ap.a_line = line;
+ return (vn_lock_delayed_setsize(&ap));
}
/*
diff --git a/sys/kern/vnode_if.src b/sys/kern/vnode_if.src
index 6b7448d9f1df..78ba1aa7afda 100644
--- a/sys/kern/vnode_if.src
+++ b/sys/kern/vnode_if.src
@@ -847,6 +847,14 @@ vop_inotify_add_watch {
IN struct thread *td;
};
+
+%% delayed_setsize vp E E E
+
+vop_delayed_setsize {
+ IN struct vnode *vp;
+};
+
+
# The VOPs below are spares at the end of the table to allow new VOPs to be
# added in stable branches without breaking the KBI. New VOPs in HEAD should
# be added above these spares. When merging a new VOP to a stable branch,
diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h
index 1a267e0e272c..36e10fd8d8b7 100644
--- a/sys/sys/vnode.h
+++ b/sys/sys/vnode.h
@@ -268,6 +268,7 @@ _Static_assert(sizeof(struct vnode) <= 448, "vnode size crosses 448 bytes");
#define VI_DEFINACT 0x0010 /* deferred inactive */
#define VI_FOPENING 0x0020 /* In open, with opening process having the
first right to advlock file */
+#define VI_DELAYEDSSZ 0x0040 /* Delayed setsize */
#define VV_ROOT 0x0001 /* root of its filesystem */
#define VV_ISTTY 0x0002 /* vnode represents a tty */
@@ -1251,6 +1252,36 @@ vn_get_state(struct vnode *vp)
atomic_load_consume_ptr(&(_vp)->v_data);\
})
+static inline void
+vn_delay_setsize_locked(struct vnode *vp)
+{
+ ASSERT_VI_LOCKED(vp, "delayed_setsize");
+ vp->v_iflag |= VI_DELAYEDSSZ;
+}
+
+static inline void
+vn_delay_setsize(struct vnode *vp)
+{
+ VI_LOCK(vp);
+ vn_delay_setsize_locked(vp);
+ VI_UNLOCK(vp);
+}
+
+static inline void
+vn_clear_delayed_setsize_locked(struct vnode *vp)
+{
+ ASSERT_VI_LOCKED(vp, "delayed_setsize");
+ vp->v_iflag &= ~VI_DELAYEDSSZ;
+}
+
+static inline void
+vn_clear_delayed_setsize(struct vnode *vp)
+{
+ VI_LOCK(vp);
+ vn_clear_delayed_setsize_locked(vp);
+ VI_UNLOCK(vp);
+}
+
#endif /* _KERNEL */
#endif /* !_SYS_VNODE_H_ */