git: 21a42a7c22c9 - stable/13 - vfs: drop one vnode list lock trip during vnlru free recycle
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Wed, 04 Oct 2023 12:08:49 UTC
The branch stable/13 has been updated by mjg:
URL: https://cgit.FreeBSD.org/src/commit/?id=21a42a7c22c9bf921ba9d250c81bd41e70c63ea9
commit 21a42a7c22c9bf921ba9d250c81bd41e70c63ea9
Author: Mateusz Guzik <mjg@FreeBSD.org>
AuthorDate: 2023-09-14 14:35:40 +0000
Commit: Mateusz Guzik <mjg@FreeBSD.org>
CommitDate: 2023-10-04 12:04:15 +0000
vfs: drop one vnode list lock trip during vnlru free recycle
vnlru_free_impl would take the lock prior to returning even though most
frequent caller does not need it.
Unsurprisingly vnode_list mtx is the primary bottleneck when recycling
and avoiding the useless lock trip helps.
Setting maxvnodes to 400000 and running 20 parallel finds each with a
dedicated directory tree of 1 million vnodes in total:
before: 4.50s user 1225.71s system 1979% cpu 1:02.14 total
after: 4.20s user 806.23s system 1973% cpu 41.059 total
That's 34% reduction in total real time.
With this the block *remains* the primary bottleneck when running on
ZFS.
(cherry picked from commit 74be676d87745eb727642f6f8329236c848929d5)
---
sys/kern/vfs_subr.c | 43 +++++++++++++++++++++++++++++++++++--------
1 file changed, 35 insertions(+), 8 deletions(-)
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index 209d6ac37cda..40f7a6d33c20 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -1278,13 +1278,14 @@ vnlru_free_impl(int count, struct vfsops *mnt_op, struct vnode *mvp)
mtx_assert(&vnode_list_mtx, MA_OWNED);
if (count > max_vnlru_free)
count = max_vnlru_free;
+ if (count == 0) {
+ mtx_unlock(&vnode_list_mtx);
+ return (0);
+ }
ocount = count;
retried = false;
vp = mvp;
for (;;) {
- if (count == 0) {
- break;
- }
vp = TAILQ_NEXT(vp, v_vnodelist);
if (__predict_false(vp == NULL)) {
/*
@@ -1307,6 +1308,7 @@ vnlru_free_impl(int count, struct vfsops *mnt_op, struct vnode *mvp)
*/
TAILQ_REMOVE(&vnode_list, mvp, v_vnodelist);
TAILQ_INSERT_TAIL(&vnode_list, mvp, v_vnodelist);
+ mtx_unlock(&vnode_list_mtx);
break;
}
if (__predict_false(vp->v_type == VMARKER))
@@ -1354,18 +1356,28 @@ vnlru_free_impl(int count, struct vfsops *mnt_op, struct vnode *mvp)
*/
vtryrecycle(vp);
count--;
+ if (count == 0) {
+ break;
+ }
mtx_lock(&vnode_list_mtx);
vp = mvp;
}
+ mtx_assert(&vnode_list_mtx, MA_NOTOWNED);
return (ocount - count);
}
+/*
+ * XXX: returns without vnode_list_mtx locked!
+ */
static int
vnlru_free_locked(int count)
{
+ int ret;
mtx_assert(&vnode_list_mtx, MA_OWNED);
- return (vnlru_free_impl(count, NULL, vnode_list_free_marker));
+ ret = vnlru_free_impl(count, NULL, vnode_list_free_marker);
+ mtx_assert(&vnode_list_mtx, MA_NOTOWNED);
+ return (ret);
}
void
@@ -1377,7 +1389,7 @@ vnlru_free_vfsops(int count, struct vfsops *mnt_op, struct vnode *mvp)
VNPASS(mvp->v_type == VMARKER, mvp);
mtx_lock(&vnode_list_mtx);
vnlru_free_impl(count, mnt_op, mvp);
- mtx_unlock(&vnode_list_mtx);
+ mtx_assert(&vnode_list_mtx, MA_NOTOWNED);
}
/*
@@ -1549,7 +1561,7 @@ vnlru_under_unlocked(u_long rnumvnodes, u_long limit)
}
static void
-vnlru_kick(void)
+vnlru_kick_locked(void)
{
mtx_assert(&vnode_list_mtx, MA_OWNED);
@@ -1559,6 +1571,15 @@ vnlru_kick(void)
}
}
+static void
+vnlru_kick(void)
+{
+
+ mtx_lock(&vnode_list_mtx);
+ vnlru_kick_locked();
+ mtx_unlock(&vnode_list_mtx);
+}
+
static void
vnlru_proc(void)
{
@@ -1589,6 +1610,7 @@ vnlru_proc(void)
*/
if (rnumvnodes > desiredvnodes) {
vnlru_free_locked(rnumvnodes - desiredvnodes);
+ mtx_lock(&vnode_list_mtx);
rnumvnodes = atomic_load_long(&numvnodes);
}
/*
@@ -1767,6 +1789,7 @@ vn_alloc_hard(struct mount *mp)
rnumvnodes = atomic_load_long(&numvnodes);
if (rnumvnodes + 1 < desiredvnodes) {
vn_alloc_cyclecount = 0;
+ mtx_unlock(&vnode_list_mtx);
goto alloc;
}
rfreevnodes = vnlru_read_freevnodes();
@@ -1786,22 +1809,26 @@ vn_alloc_hard(struct mount *mp)
*/
if (vnlru_free_locked(1) > 0)
goto alloc;
+ mtx_assert(&vnode_list_mtx, MA_NOTOWNED);
if (mp == NULL || (mp->mnt_kern_flag & MNTK_SUSPEND) == 0) {
/*
* Wait for space for a new vnode.
*/
- vnlru_kick();
+ mtx_lock(&vnode_list_mtx);
+ vnlru_kick_locked();
vn_alloc_sleeps++;
msleep(&vnlruproc_sig, &vnode_list_mtx, PVFS, "vlruwk", hz);
if (atomic_load_long(&numvnodes) + 1 > desiredvnodes &&
vnlru_read_freevnodes() > 1)
vnlru_free_locked(1);
+ else
+ mtx_unlock(&vnode_list_mtx);
}
alloc:
+ mtx_assert(&vnode_list_mtx, MA_NOTOWNED);
rnumvnodes = atomic_fetchadd_long(&numvnodes, 1) + 1;
if (vnlru_under(rnumvnodes, vlowat))
vnlru_kick();
- mtx_unlock(&vnode_list_mtx);
return (uma_zalloc_smr(vnode_zone, M_WAITOK));
}