git: b7b78c1c169d - main - Optimize HPTS so that little work is done until we have a hpts thread that is over the connection threshold

From: Randall Stewart <rrs_at_FreeBSD.org>
Date: Thu, 28 Mar 2024 12:13:17 UTC
The branch main has been updated by rrs:

URL: https://cgit.FreeBSD.org/src/commit/?id=b7b78c1c169dd2213b4cb3e14e19c045b2c5e5af

commit b7b78c1c169dd2213b4cb3e14e19c045b2c5e5af
Author:     Randall Stewart <rrs@FreeBSD.org>
AuthorDate: 2024-03-28 12:12:37 +0000
Commit:     Randall Stewart <rrs@FreeBSD.org>
CommitDate: 2024-03-28 12:12:37 +0000

    Optimize HPTS so that little work is done until we have a hpts thread that is over the connection threshold
    
    HPTS inserts a softclock for system call return that optimizes performance. However when
    no HPTS threads need the help (i.e. when they have less than 100 or so connections) then
    there should be little work done i.e. check the counter and return instead of running through
    all the threads getting locks etc.ptimize HPTS so that little work is done until we have a hpts
    thread that is over the connection threshold.
    
    Reported by:    eduardo
    Reviewed by:    gallatin, glebius, tuexen
    Tested by:      gallatin
    Differential Revision: https://reviews.freebsd.org/D44420
---
 sys/kern/subr_trap.c   |  2 ++
 sys/netinet/tcp_hpts.c | 10 +++++++++-
 sys/sys/systm.h        |  4 +++-
 3 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/sys/kern/subr_trap.c b/sys/kern/subr_trap.c
index e9a16cd0b36e..18388ae5f232 100644
--- a/sys/kern/subr_trap.c
+++ b/sys/kern/subr_trap.c
@@ -74,6 +74,8 @@
 #include <sys/epoch.h>
 #endif
 
+volatile uint32_t __read_frequently hpts_that_need_softclock = 0;
+
 void	(*tcp_hpts_softclock)(void);
 
 /*
diff --git a/sys/netinet/tcp_hpts.c b/sys/netinet/tcp_hpts.c
index 8c4d2d41a3eb..222f8e0229dd 100644
--- a/sys/netinet/tcp_hpts.c
+++ b/sys/netinet/tcp_hpts.c
@@ -193,7 +193,8 @@ struct tcp_hpts_entry {
 	uint8_t p_direct_wake :1, /* boolean */
 		p_on_min_sleep:1, /* boolean */
 		p_hpts_wake_scheduled:1, /* boolean */
-		p_avail:5;
+		hit_callout_thresh:1,
+		p_avail:4;
 	uint8_t p_fill[3];	  /* Fill to 32 bits */
 	/* Cache line 0x40 */
 	struct hptsh {
@@ -1683,6 +1684,13 @@ tcp_hpts_thread(void *ctx)
 	ticks_ran = tcp_hptsi(hpts, 1);
 	tv.tv_sec = 0;
 	tv.tv_usec = hpts->p_hpts_sleep_time * HPTS_TICKS_PER_SLOT;
+	if ((hpts->p_on_queue_cnt > conn_cnt_thresh) && (hpts->hit_callout_thresh == 0)) {
+		hpts->hit_callout_thresh = 1;
+		atomic_add_int(&hpts_that_need_softclock, 1);
+	} else if ((hpts->p_on_queue_cnt <= conn_cnt_thresh) && (hpts->hit_callout_thresh == 1)) {
+		hpts->hit_callout_thresh = 0;
+		atomic_subtract_int(&hpts_that_need_softclock, 1);
+	}
 	if (hpts->p_on_queue_cnt >= conn_cnt_thresh) {
 		if(hpts->p_direct_wake == 0) {
 			/*
diff --git a/sys/sys/systm.h b/sys/sys/systm.h
index 435e6b61c904..6c2b466c4cfe 100644
--- a/sys/sys/systm.h
+++ b/sys/sys/systm.h
@@ -400,8 +400,10 @@ extern int	cpu_disable_c2_sleep;
 extern int	cpu_disable_c3_sleep;
 
 extern void	(*tcp_hpts_softclock)(void);
+extern volatile uint32_t __read_frequently hpts_that_need_softclock;
+
 #define	tcp_hpts_softclock()	do {					\
-		if (tcp_hpts_softclock != NULL)				\
+		if (hpts_that_need_softclock > 0)			\
 			tcp_hpts_softclock();				\
 } while (0)