git: e3cbc572f154 - main - kern/subr_trap.c: repair the HPTS performance hack in userret()

From: Gleb Smirnoff <glebius_at_FreeBSD.org>
Date: Mon, 04 Dec 2023 18:59:31 UTC
The branch main has been updated by glebius:

URL: https://cgit.FreeBSD.org/src/commit/?id=e3cbc572f1541fdc18be9971d23e210d5018e662

commit e3cbc572f1541fdc18be9971d23e210d5018e662
Author:     Gleb Smirnoff <glebius@FreeBSD.org>
AuthorDate: 2023-12-04 18:19:46 +0000
Commit:     Gleb Smirnoff <glebius@FreeBSD.org>
CommitDate: 2023-12-04 18:19:46 +0000

    kern/subr_trap.c: repair the HPTS performance hack in userret()
    
    It wasn't functional as subr_trap.c doesn't include opt_inet.h.  Put a
    better comment provided by gallatin@ in place of the old one.  The idea
    is to use userret() as a cheap place to call a soft clock.  This approach
    saves CPU on busy machines and saves power on idle machines.
    An alternative would be to constantly schedule callouts.  Running with
    neither callouts nor the soft clock ruins HPTS precision.
    
    Reviewed by:            tuexen, rrs
    Differential Revision:  https://reviews.freebsd.org/D42860
---
 sys/kern/subr_trap.c   | 20 ++++++++++++--------
 sys/netinet/tcp_hpts.h |  1 -
 sys/netinet/tcp_lro.c  |  4 +---
 sys/sys/systm.h        |  6 ++++++
 4 files changed, 19 insertions(+), 12 deletions(-)

diff --git a/sys/kern/subr_trap.c b/sys/kern/subr_trap.c
index 8720d9f71c1c..e9a16cd0b36e 100644
--- a/sys/kern/subr_trap.c
+++ b/sys/kern/subr_trap.c
@@ -74,6 +74,8 @@
 #include <sys/epoch.h>
 #endif
 
+void	(*tcp_hpts_softclock)(void);
+
 /*
  * Define the code needed before returning to user mode, for trap and
  * syscall.
@@ -125,16 +127,18 @@ userret(struct thread *td, struct trapframe *frame)
 	if (PMC_THREAD_HAS_SAMPLES(td))
 		PMC_CALL_HOOK(td, PMC_FN_THR_USERRET, NULL);
 #endif
-#ifdef TCPHPTS
 	/*
-	 * @gallatin is adament that this needs to go here, I
-	 * am not so sure. Running hpts is a lot like
-	 * a lro_flush() that happens while a user process
-	 * is running. But he may know best so I will go
-	 * with his view of accounting. :-)
+	 * Calling tcp_hpts_softclock() here allows us to avoid frequent,
+	 * expensive callouts that trash the cache and lead to a much higher
+	 * number of interrupts and context switches.  Testing on busy web
+	 * servers at Netflix has shown that this improves CPU use by 7% over
+	 * relying only on callouts to drive HPTS, and also results in idle
+	 * power savings on mostly idle servers.
+	 * This was inspired by the paper "Soft Timers: Efficient Microsecond
+	 * Software Timer Support for Network Processing"
+	 * by Mohit Aron and Peter Druschel.
 	 */
-	tcp_run_hpts();
-#endif
+	tcp_hpts_softclock();
 	/*
 	 * Let the scheduler adjust our priority etc.
 	 */
diff --git a/sys/netinet/tcp_hpts.h b/sys/netinet/tcp_hpts.h
index 8ca21daf60de..7eb1b2e08cb4 100644
--- a/sys/netinet/tcp_hpts.h
+++ b/sys/netinet/tcp_hpts.h
@@ -152,7 +152,6 @@ void __tcp_set_hpts(struct tcpcb *tp, int32_t line);
 
 void tcp_set_inp_to_drop(struct inpcb *inp, uint16_t reason);
 
-extern void (*tcp_hpts_softclock)(void);
 void tcp_lro_hpts_init(void);
 
 extern int32_t tcp_min_hptsi_time;
diff --git a/sys/netinet/tcp_lro.c b/sys/netinet/tcp_lro.c
index 255e543ae21d..921d28f82517 100644
--- a/sys/netinet/tcp_lro.c
+++ b/sys/netinet/tcp_lro.c
@@ -89,7 +89,6 @@ SYSCTL_NODE(_net_inet_tcp, OID_AUTO, lro,  CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
 
 long tcplro_stacks_wanting_mbufq;
 int	(*tcp_lro_flush_tcphpts)(struct lro_ctrl *lc, struct lro_entry *le);
-void	(*tcp_hpts_softclock)(void);
 
 counter_u64_t tcp_inp_lro_direct_queue;
 counter_u64_t tcp_inp_lro_wokeup_queue;
@@ -1262,8 +1261,7 @@ tcp_lro_flush_all(struct lro_ctrl *lc)
 done:
 	/* flush active streams */
 	tcp_lro_rx_done(lc);
-	if (tcp_hpts_softclock != NULL)
-		tcp_hpts_softclock();
+	tcp_hpts_softclock();
 	lc->lro_mbuf_count = 0;
 }
 
diff --git a/sys/sys/systm.h b/sys/sys/systm.h
index 2532bc3d9926..06d40481375f 100644
--- a/sys/sys/systm.h
+++ b/sys/sys/systm.h
@@ -378,6 +378,12 @@ void	cpu_et_frequency(struct eventtimer *et, uint64_t newfreq);
 extern int	cpu_disable_c2_sleep;
 extern int	cpu_disable_c3_sleep;
 
+extern void	(*tcp_hpts_softclock)(void);
+#define	tcp_hpts_softclock()	do {					\
+		if (tcp_hpts_softclock != NULL)				\
+			tcp_hpts_softclock();				\
+} while (0)
+
 char	*kern_getenv(const char *name);
 void	freeenv(char *env);
 int	getenv_int(const char *name, int *data);