git: 1f0f120183db - stable/13 - vfs: make skipping LRU requeue optional
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Wed, 10 Jul 2024 22:07:06 UTC
The branch stable/13 has been updated by mjg:
URL: https://cgit.FreeBSD.org/src/commit/?id=1f0f120183db12e680107a0553a8de2d854aa757
commit 1f0f120183db12e680107a0553a8de2d854aa757
Author: Mateusz Guzik <mjg@FreeBSD.org>
AuthorDate: 2024-07-08 12:24:41 +0000
Commit: Mateusz Guzik <mjg@FreeBSD.org>
CommitDate: 2024-07-10 22:06:15 +0000
vfs: make skipping LRU requeue optional
As explained in the comment in the code it is a bottleneck in certain
workloads. On the other hand it does not need to be skipped in most
cases, while transiently running into the lock being contended happens a
lot.
(cherry picked from commit 0a9aa6fdf58468945240e86bf16c268acc8c1776)
---
sys/kern/vfs_subr.c | 54 +++++++++++++++++++++++++++++++++--------------------
1 file changed, 34 insertions(+), 20 deletions(-)
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index 33232987705e..a1b4779b6d3f 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -223,6 +223,10 @@ static counter_u64_t vnode_skipped_requeues;
SYSCTL_COUNTER_U64(_vfs_vnode_stats, OID_AUTO, skipped_requeues, CTLFLAG_RD, &vnode_skipped_requeues,
"Number of times LRU requeue was skipped due to lock contention");
+static __read_mostly bool vnode_can_skip_requeue;
+SYSCTL_BOOL(_vfs_vnode_param, OID_AUTO, can_skip_requeue, CTLFLAG_RW,
+ &vnode_can_skip_requeue, 0, "Is LRU requeue skippable");
+
static u_long deferred_inact;
SYSCTL_ULONG(_vfs, OID_AUTO, deferred_inact, CTLFLAG_RD,
&deferred_inact, 0, "Number of times inactive processing was deferred");
@@ -3795,31 +3799,41 @@ vdbatch_process(struct vdbatch *vd)
* lock contention, where vnode_list_mtx becomes the primary bottleneck
* if multiple CPUs get here (one real-world example is highly parallel
* do-nothing make , which will stat *tons* of vnodes). Since it is
- * quasi-LRU (read: not that great even if fully honoured) just dodge
- * the problem. Parties which don't like it are welcome to implement
- * something better.
+ * quasi-LRU (read: not that great even if fully honoured) provide an
+ * option to just dodge the problem. Parties which don't like it are
+ * welcome to implement something better.
*/
- critical_enter();
- if (mtx_trylock(&vnode_list_mtx)) {
- for (i = 0; i < VDBATCH_SIZE; i++) {
- vp = vd->tab[i];
- vd->tab[i] = NULL;
- TAILQ_REMOVE(&vnode_list, vp, v_vnodelist);
- TAILQ_INSERT_TAIL(&vnode_list, vp, v_vnodelist);
- MPASS(vp->v_dbatchcpu != NOCPU);
- vp->v_dbatchcpu = NOCPU;
+ if (vnode_can_skip_requeue) {
+ if (!mtx_trylock(&vnode_list_mtx)) {
+ counter_u64_add(vnode_skipped_requeues, 1);
+ critical_enter();
+ for (i = 0; i < VDBATCH_SIZE; i++) {
+ vp = vd->tab[i];
+ vd->tab[i] = NULL;
+ MPASS(vp->v_dbatchcpu != NOCPU);
+ vp->v_dbatchcpu = NOCPU;
+ }
+ vd->index = 0;
+ critical_exit();
+ return;
+
}
- mtx_unlock(&vnode_list_mtx);
+ /* fallthrough to locked processing */
} else {
- counter_u64_add(vnode_skipped_requeues, 1);
+ mtx_lock(&vnode_list_mtx);
+ }
- for (i = 0; i < VDBATCH_SIZE; i++) {
- vp = vd->tab[i];
- vd->tab[i] = NULL;
- MPASS(vp->v_dbatchcpu != NOCPU);
- vp->v_dbatchcpu = NOCPU;
- }
+ mtx_assert(&vnode_list_mtx, MA_OWNED);
+ critical_enter();
+ for (i = 0; i < VDBATCH_SIZE; i++) {
+ vp = vd->tab[i];
+ vd->tab[i] = NULL;
+ TAILQ_REMOVE(&vnode_list, vp, v_vnodelist);
+ TAILQ_INSERT_TAIL(&vnode_list, vp, v_vnodelist);
+ MPASS(vp->v_dbatchcpu != NOCPU);
+ vp->v_dbatchcpu = NOCPU;
}
+ mtx_unlock(&vnode_list_mtx);
vd->index = 0;
critical_exit();
}