git: 7fdef9cdb7ee - stable/14 - Optimize HPTS so that little work is done until we have a hpts thread that is over the connection threshold

From: Michael Tuexen <tuexen_at_FreeBSD.org>
Date: Wed, 24 Apr 2024 20:41:17 UTC
The branch stable/14 has been updated by tuexen:

URL: https://cgit.FreeBSD.org/src/commit/?id=7fdef9cdb7ee3ff9b7bc7629ee2db7c808e8f1a5

commit 7fdef9cdb7ee3ff9b7bc7629ee2db7c808e8f1a5
Author:     Randall Stewart <rrs@FreeBSD.org>
AuthorDate: 2024-03-28 12:12:37 +0000
Commit:     Michael Tuexen <tuexen@FreeBSD.org>
CommitDate: 2024-04-24 20:37:40 +0000

    Optimize HPTS so that little work is done until we have a hpts thread that is over the connection threshold
    
    HPTS inserts a softclock for system call return that optimizes performance. However when
    no HPTS threads need the help (i.e. when they have less than 100 or so connections) then
    there should be little work done i.e. check the counter and return instead of running through
    all the threads getting locks etc.ptimize HPTS so that little work is done until we have a hpts
    thread that is over the connection threshold.
    
    Reported by:    eduardo
    Reviewed by:    gallatin, glebius, tuexen
    Tested by:      gallatin
    Differential Revision: https://reviews.freebsd.org/D44420
    
    (cherry picked from commit b7b78c1c169dd2213b4cb3e14e19c045b2c5e5af)
---
 sys/kern/subr_trap.c   |  2 ++
 sys/netinet/tcp_hpts.c | 10 +++++++++-
 sys/sys/systm.h        |  4 +++-
 3 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/sys/kern/subr_trap.c b/sys/kern/subr_trap.c
index 42ce9e7988b5..5a89ba8b6500 100644
--- a/sys/kern/subr_trap.c
+++ b/sys/kern/subr_trap.c
@@ -76,6 +76,8 @@
 #include <sys/epoch.h>
 #endif
 
+volatile uint32_t __read_frequently hpts_that_need_softclock = 0;
+
 void	(*tcp_hpts_softclock)(void);
 
 /*
diff --git a/sys/netinet/tcp_hpts.c b/sys/netinet/tcp_hpts.c
index 07c5f04b907e..85341cab0750 100644
--- a/sys/netinet/tcp_hpts.c
+++ b/sys/netinet/tcp_hpts.c
@@ -193,7 +193,8 @@ struct tcp_hpts_entry {
 	uint8_t p_direct_wake :1, /* boolean */
 		p_on_min_sleep:1, /* boolean */
 		p_hpts_wake_scheduled:1, /* boolean */
-		p_avail:5;
+		hit_callout_thresh:1,
+		p_avail:4;
 	uint8_t p_fill[3];	  /* Fill to 32 bits */
 	/* Cache line 0x40 */
 	struct hptsh {
@@ -1684,6 +1685,13 @@ tcp_hpts_thread(void *ctx)
 	ticks_ran = tcp_hptsi(hpts, 1);
 	tv.tv_sec = 0;
 	tv.tv_usec = hpts->p_hpts_sleep_time * HPTS_TICKS_PER_SLOT;
+	if ((hpts->p_on_queue_cnt > conn_cnt_thresh) && (hpts->hit_callout_thresh == 0)) {
+		hpts->hit_callout_thresh = 1;
+		atomic_add_int(&hpts_that_need_softclock, 1);
+	} else if ((hpts->p_on_queue_cnt <= conn_cnt_thresh) && (hpts->hit_callout_thresh == 1)) {
+		hpts->hit_callout_thresh = 0;
+		atomic_subtract_int(&hpts_that_need_softclock, 1);
+	}
 	if (hpts->p_on_queue_cnt >= conn_cnt_thresh) {
 		if(hpts->p_direct_wake == 0) {
 			/*
diff --git a/sys/sys/systm.h b/sys/sys/systm.h
index 3721db649a3d..eb690b1cf4b4 100644
--- a/sys/sys/systm.h
+++ b/sys/sys/systm.h
@@ -393,8 +393,10 @@ extern int	cpu_disable_c2_sleep;
 extern int	cpu_disable_c3_sleep;
 
 extern void	(*tcp_hpts_softclock)(void);
+extern volatile uint32_t __read_frequently hpts_that_need_softclock;
+
 #define	tcp_hpts_softclock()	do {					\
-		if (tcp_hpts_softclock != NULL)				\
+		if (hpts_that_need_softclock > 0)			\
 			tcp_hpts_softclock();				\
 } while (0)