git: 21a42a7c22c9 - stable/13 - vfs: drop one vnode list lock trip during vnlru free recycle
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Wed, 04 Oct 2023 12:08:49 UTC
The branch stable/13 has been updated by mjg: URL: https://cgit.FreeBSD.org/src/commit/?id=21a42a7c22c9bf921ba9d250c81bd41e70c63ea9 commit 21a42a7c22c9bf921ba9d250c81bd41e70c63ea9 Author: Mateusz Guzik <mjg@FreeBSD.org> AuthorDate: 2023-09-14 14:35:40 +0000 Commit: Mateusz Guzik <mjg@FreeBSD.org> CommitDate: 2023-10-04 12:04:15 +0000 vfs: drop one vnode list lock trip during vnlru free recycle vnlru_free_impl would take the lock prior to returning even though most frequent caller does not need it. Unsurprisingly vnode_list mtx is the primary bottleneck when recycling and avoiding the useless lock trip helps. Setting maxvnodes to 400000 and running 20 parallel finds each with a dedicated directory tree of 1 million vnodes in total: before: 4.50s user 1225.71s system 1979% cpu 1:02.14 total after: 4.20s user 806.23s system 1973% cpu 41.059 total That's 34% reduction in total real time. With this the block *remains* the primary bottleneck when running on ZFS. (cherry picked from commit 74be676d87745eb727642f6f8329236c848929d5) --- sys/kern/vfs_subr.c | 43 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 35 insertions(+), 8 deletions(-) diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index 209d6ac37cda..40f7a6d33c20 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -1278,13 +1278,14 @@ vnlru_free_impl(int count, struct vfsops *mnt_op, struct vnode *mvp) mtx_assert(&vnode_list_mtx, MA_OWNED); if (count > max_vnlru_free) count = max_vnlru_free; + if (count == 0) { + mtx_unlock(&vnode_list_mtx); + return (0); + } ocount = count; retried = false; vp = mvp; for (;;) { - if (count == 0) { - break; - } vp = TAILQ_NEXT(vp, v_vnodelist); if (__predict_false(vp == NULL)) { /* @@ -1307,6 +1308,7 @@ vnlru_free_impl(int count, struct vfsops *mnt_op, struct vnode *mvp) */ TAILQ_REMOVE(&vnode_list, mvp, v_vnodelist); TAILQ_INSERT_TAIL(&vnode_list, mvp, v_vnodelist); + mtx_unlock(&vnode_list_mtx); break; } if (__predict_false(vp->v_type == VMARKER)) @@ -1354,18 +1356,28 @@ vnlru_free_impl(int count, struct vfsops *mnt_op, struct vnode *mvp) */ vtryrecycle(vp); count--; + if (count == 0) { + break; + } mtx_lock(&vnode_list_mtx); vp = mvp; } + mtx_assert(&vnode_list_mtx, MA_NOTOWNED); return (ocount - count); } +/* + * XXX: returns without vnode_list_mtx locked! + */ static int vnlru_free_locked(int count) { + int ret; mtx_assert(&vnode_list_mtx, MA_OWNED); - return (vnlru_free_impl(count, NULL, vnode_list_free_marker)); + ret = vnlru_free_impl(count, NULL, vnode_list_free_marker); + mtx_assert(&vnode_list_mtx, MA_NOTOWNED); + return (ret); } void @@ -1377,7 +1389,7 @@ vnlru_free_vfsops(int count, struct vfsops *mnt_op, struct vnode *mvp) VNPASS(mvp->v_type == VMARKER, mvp); mtx_lock(&vnode_list_mtx); vnlru_free_impl(count, mnt_op, mvp); - mtx_unlock(&vnode_list_mtx); + mtx_assert(&vnode_list_mtx, MA_NOTOWNED); } /* @@ -1549,7 +1561,7 @@ vnlru_under_unlocked(u_long rnumvnodes, u_long limit) } static void -vnlru_kick(void) +vnlru_kick_locked(void) { mtx_assert(&vnode_list_mtx, MA_OWNED); @@ -1559,6 +1571,15 @@ vnlru_kick(void) } } +static void +vnlru_kick(void) +{ + + mtx_lock(&vnode_list_mtx); + vnlru_kick_locked(); + mtx_unlock(&vnode_list_mtx); +} + static void vnlru_proc(void) { @@ -1589,6 +1610,7 @@ vnlru_proc(void) */ if (rnumvnodes > desiredvnodes) { vnlru_free_locked(rnumvnodes - desiredvnodes); + mtx_lock(&vnode_list_mtx); rnumvnodes = atomic_load_long(&numvnodes); } /* @@ -1767,6 +1789,7 @@ vn_alloc_hard(struct mount *mp) rnumvnodes = atomic_load_long(&numvnodes); if (rnumvnodes + 1 < desiredvnodes) { vn_alloc_cyclecount = 0; + mtx_unlock(&vnode_list_mtx); goto alloc; } rfreevnodes = vnlru_read_freevnodes(); @@ -1786,22 +1809,26 @@ vn_alloc_hard(struct mount *mp) */ if (vnlru_free_locked(1) > 0) goto alloc; + mtx_assert(&vnode_list_mtx, MA_NOTOWNED); if (mp == NULL || (mp->mnt_kern_flag & MNTK_SUSPEND) == 0) { /* * Wait for space for a new vnode. */ - vnlru_kick(); + mtx_lock(&vnode_list_mtx); + vnlru_kick_locked(); vn_alloc_sleeps++; msleep(&vnlruproc_sig, &vnode_list_mtx, PVFS, "vlruwk", hz); if (atomic_load_long(&numvnodes) + 1 > desiredvnodes && vnlru_read_freevnodes() > 1) vnlru_free_locked(1); + else + mtx_unlock(&vnode_list_mtx); } alloc: + mtx_assert(&vnode_list_mtx, MA_NOTOWNED); rnumvnodes = atomic_fetchadd_long(&numvnodes, 1) + 1; if (vnlru_under(rnumvnodes, vlowat)) vnlru_kick(); - mtx_unlock(&vnode_list_mtx); return (uma_zalloc_smr(vnode_zone, M_WAITOK)); }