git: b7ff445ffa38 - main - Reduce bufdaemon/bufspacedaemon shutdown time.

From: Alexander Motin <mav_at_FreeBSD.org>
Date: Wed, 19 Jan 2022 00:48:32 UTC
The branch main has been updated by mav:

URL: https://cgit.FreeBSD.org/src/commit/?id=b7ff445ffa38282daeab36ce82681ba3f54c8851

commit b7ff445ffa38282daeab36ce82681ba3f54c8851
Author:     Alexander Motin <mav@FreeBSD.org>
AuthorDate: 2022-01-19 00:26:16 +0000
Commit:     Alexander Motin <mav@FreeBSD.org>
CommitDate: 2022-01-19 00:26:16 +0000

    Reduce bufdaemon/bufspacedaemon shutdown time.
    
    Before this change bufdaemon and bufspacedaemon threads used
    kthread_shutdown() to stop activity on system shutdown.  The problem is
    that kthread_shutdown() has no idea about the wait channel and lock used
    by specific thread to wake them up reliably.  As result, up to 9 threads
    could consume up to 9 seconds to shutdown for no good reason.
    
    This change introduces specific shutdown functions, knowing how to
    properly wake up specific threads, reducing wait for those threads on
    shutdown/reboot from average 4 seconds to effectively zero.
    
    MFC after:      2 weeks
    Reviewed by:    kib, markj
    Differential Revision:  https://reviews.freebsd.org/D33936
---
 sys/kern/vfs_bio.c | 108 +++++++++++++++++++++++++++++++++++------------------
 1 file changed, 71 insertions(+), 37 deletions(-)

diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
index 8e79943e3fee..ce7f860f1ffe 100644
--- a/sys/kern/vfs_bio.c
+++ b/sys/kern/vfs_bio.c
@@ -133,6 +133,7 @@ struct bufdomain {
 	int		bd_lim;
 	/* atomics */
 	int		bd_wanted;
+	bool		bd_shutdown;
 	int __aligned(CACHE_LINE_SIZE)	bd_numdirtybuffers;
 	int __aligned(CACHE_LINE_SIZE)	bd_running;
 	long __aligned(CACHE_LINE_SIZE) bd_bufspace;
@@ -340,6 +341,11 @@ static struct mtx_padalign __exclusive_cache_line rbreqlock;
  */
 static struct mtx_padalign __exclusive_cache_line bdirtylock;
 
+/*
+ * bufdaemon shutdown request and sleep channel.
+ */
+static bool bd_shutdown;
+
 /*
  * Wakeup point for bufdaemon, as well as indicator of whether it is already
  * active.  Set to 1 when the bufdaemon is already "on" the queue, 0 when it
@@ -628,33 +634,6 @@ bufspace_daemon_wakeup(struct bufdomain *bd)
 	}
 }
 
-/*
- *	bufspace_daemon_wait:
- *
- *	Sleep until the domain falls below a limit or one second passes.
- */
-static void
-bufspace_daemon_wait(struct bufdomain *bd)
-{
-	/*
-	 * Re-check our limits and sleep.  bd_running must be
-	 * cleared prior to checking the limits to avoid missed
-	 * wakeups.  The waker will adjust one of bufspace or
-	 * freebuffers prior to checking bd_running.
-	 */
-	BD_RUN_LOCK(bd);
-	atomic_store_int(&bd->bd_running, 0);
-	if (bd->bd_bufspace < bd->bd_bufspacethresh &&
-	    bd->bd_freebuffers > bd->bd_lofreebuffers) {
-		msleep(&bd->bd_running, BD_RUN_LOCKPTR(bd), PRIBIO|PDROP,
-		    "-", hz);
-	} else {
-		/* Avoid spurious wakeups while running. */
-		atomic_store_int(&bd->bd_running, 1);
-		BD_RUN_UNLOCK(bd);
-	}
-}
-
 /*
  *	bufspace_adjust:
  *
@@ -785,6 +764,22 @@ bufspace_wait(struct bufdomain *bd, struct vnode *vp, int gbflags,
 	BD_UNLOCK(bd);
 }
 
+static void
+bufspace_daemon_shutdown(void *arg, int howto __unused)
+{
+	struct bufdomain *bd = arg;
+	int error;
+
+	BD_RUN_LOCK(bd);
+	bd->bd_shutdown = true;
+	wakeup(&bd->bd_running);
+	error = msleep(&bd->bd_shutdown, BD_RUN_LOCKPTR(bd), 0,
+	    "bufspace_shutdown", 60 * hz);
+	BD_RUN_UNLOCK(bd);
+	if (error != 0)
+		printf("bufspacedaemon wait error: %d\n", error);
+}
+
 /*
  *	bufspace_daemon:
  *
@@ -795,14 +790,14 @@ bufspace_wait(struct bufdomain *bd, struct vnode *vp, int gbflags,
 static void
 bufspace_daemon(void *arg)
 {
-	struct bufdomain *bd;
+	struct bufdomain *bd = arg;
 
-	EVENTHANDLER_REGISTER(shutdown_pre_sync, kthread_shutdown, curthread,
+	EVENTHANDLER_REGISTER(shutdown_pre_sync, bufspace_daemon_shutdown, bd,
 	    SHUTDOWN_PRI_LAST + 100);
 
-	bd = arg;
-	for (;;) {
-		kthread_suspend_check();
+	BD_RUN_LOCK(bd);
+	while (!bd->bd_shutdown) {
+		BD_RUN_UNLOCK(bd);
 
 		/*
 		 * Free buffers from the clean queue until we meet our
@@ -852,8 +847,29 @@ bufspace_daemon(void *arg)
 			}
 			maybe_yield();
 		}
-		bufspace_daemon_wait(bd);
+
+		/*
+		 * Re-check our limits and sleep.  bd_running must be
+		 * cleared prior to checking the limits to avoid missed
+		 * wakeups.  The waker will adjust one of bufspace or
+		 * freebuffers prior to checking bd_running.
+		 */
+		BD_RUN_LOCK(bd);
+		if (bd->bd_shutdown)
+			break;
+		atomic_store_int(&bd->bd_running, 0);
+		if (bd->bd_bufspace < bd->bd_bufspacethresh &&
+		    bd->bd_freebuffers > bd->bd_lofreebuffers) {
+			msleep(&bd->bd_running, BD_RUN_LOCKPTR(bd),
+			    PRIBIO, "-", hz);
+		} else {
+			/* Avoid spurious wakeups while running. */
+			atomic_store_int(&bd->bd_running, 1);
+		}
 	}
+	wakeup(&bd->bd_shutdown);
+	BD_RUN_UNLOCK(bd);
+	kthread_exit();
 }
 
 /*
@@ -3391,6 +3407,21 @@ buf_flush(struct vnode *vp, struct bufdomain *bd, int target)
 	return (flushed);
 }
 
+static void
+buf_daemon_shutdown(void *arg __unused, int howto __unused)
+{
+	int error;
+
+	mtx_lock(&bdlock);
+	bd_shutdown = true;
+	wakeup(&bd_request);
+	error = msleep(&bd_shutdown, &bdlock, 0, "buf_daemon_shutdown",
+	    60 * hz);
+	mtx_unlock(&bdlock);
+	if (error != 0)
+		printf("bufdaemon wait error: %d\n", error);
+}
+
 static void
 buf_daemon()
 {
@@ -3402,7 +3433,7 @@ buf_daemon()
 	/*
 	 * This process needs to be suspended prior to shutdown sync.
 	 */
-	EVENTHANDLER_REGISTER(shutdown_pre_sync, kthread_shutdown, curthread,
+	EVENTHANDLER_REGISTER(shutdown_pre_sync, buf_daemon_shutdown, NULL,
 	    SHUTDOWN_PRI_LAST + 100);
 
 	/*
@@ -3422,12 +3453,10 @@ buf_daemon()
 	 */
 	curthread->td_pflags |= TDP_NORUNNINGBUF | TDP_BUFNEED;
 	mtx_lock(&bdlock);
-	for (;;) {
+	while (!bd_shutdown) {
 		bd_request = 0;
 		mtx_unlock(&bdlock);
 
-		kthread_suspend_check();
-
 		/*
 		 * Save speedupreq for this pass and reset to capture new
 		 * requests.
@@ -3464,6 +3493,8 @@ buf_daemon()
 		 * to avoid endless loops on unlockable buffers.
 		 */
 		mtx_lock(&bdlock);
+		if (bd_shutdown)
+			break;
 		if (BIT_EMPTY(BUF_DOMAINS, &bdlodirty)) {
 			/*
 			 * We reached our low water mark, reset the
@@ -3489,6 +3520,9 @@ buf_daemon()
 			msleep(&bd_request, &bdlock, PVM, "qsleep", hz / 10);
 		}
 	}
+	wakeup(&bd_shutdown);
+	mtx_unlock(&bdlock);
+	kthread_exit();
 }
 
 /*