git: e3cbc572f154 - main - kern/subr_trap.c: repair the HPTS performance hack in userret()
Date: Mon, 04 Dec 2023 18:59:31 UTC
The branch main has been updated by glebius: URL: https://cgit.FreeBSD.org/src/commit/?id=e3cbc572f1541fdc18be9971d23e210d5018e662 commit e3cbc572f1541fdc18be9971d23e210d5018e662 Author: Gleb Smirnoff <glebius@FreeBSD.org> AuthorDate: 2023-12-04 18:19:46 +0000 Commit: Gleb Smirnoff <glebius@FreeBSD.org> CommitDate: 2023-12-04 18:19:46 +0000 kern/subr_trap.c: repair the HPTS performance hack in userret() It wasn't functional as subr_trap.c doesn't include opt_inet.h. Put a better comment provided by gallatin@ in place of the old one. The idea is to use userret() as a cheap place to call a soft clock. This approach saves CPU on busy machines and saves power on idle machines. An alternative would be to constantly schedule callouts. Running with neither callouts nor the soft clock ruins HPTS precision. Reviewed by: tuexen, rrs Differential Revision: https://reviews.freebsd.org/D42860 --- sys/kern/subr_trap.c | 20 ++++++++++++-------- sys/netinet/tcp_hpts.h | 1 - sys/netinet/tcp_lro.c | 4 +--- sys/sys/systm.h | 6 ++++++ 4 files changed, 19 insertions(+), 12 deletions(-) diff --git a/sys/kern/subr_trap.c b/sys/kern/subr_trap.c index 8720d9f71c1c..e9a16cd0b36e 100644 --- a/sys/kern/subr_trap.c +++ b/sys/kern/subr_trap.c @@ -74,6 +74,8 @@ #include <sys/epoch.h> #endif +void (*tcp_hpts_softclock)(void); + /* * Define the code needed before returning to user mode, for trap and * syscall. @@ -125,16 +127,18 @@ userret(struct thread *td, struct trapframe *frame) if (PMC_THREAD_HAS_SAMPLES(td)) PMC_CALL_HOOK(td, PMC_FN_THR_USERRET, NULL); #endif -#ifdef TCPHPTS /* - * @gallatin is adament that this needs to go here, I - * am not so sure. Running hpts is a lot like - * a lro_flush() that happens while a user process - * is running. But he may know best so I will go - * with his view of accounting. :-) + * Calling tcp_hpts_softclock() here allows us to avoid frequent, + * expensive callouts that trash the cache and lead to a much higher + * number of interrupts and context switches. Testing on busy web + * servers at Netflix has shown that this improves CPU use by 7% over + * relying only on callouts to drive HPTS, and also results in idle + * power savings on mostly idle servers. + * This was inspired by the paper "Soft Timers: Efficient Microsecond + * Software Timer Support for Network Processing" + * by Mohit Aron and Peter Druschel. */ - tcp_run_hpts(); -#endif + tcp_hpts_softclock(); /* * Let the scheduler adjust our priority etc. */ diff --git a/sys/netinet/tcp_hpts.h b/sys/netinet/tcp_hpts.h index 8ca21daf60de..7eb1b2e08cb4 100644 --- a/sys/netinet/tcp_hpts.h +++ b/sys/netinet/tcp_hpts.h @@ -152,7 +152,6 @@ void __tcp_set_hpts(struct tcpcb *tp, int32_t line); void tcp_set_inp_to_drop(struct inpcb *inp, uint16_t reason); -extern void (*tcp_hpts_softclock)(void); void tcp_lro_hpts_init(void); extern int32_t tcp_min_hptsi_time; diff --git a/sys/netinet/tcp_lro.c b/sys/netinet/tcp_lro.c index 255e543ae21d..921d28f82517 100644 --- a/sys/netinet/tcp_lro.c +++ b/sys/netinet/tcp_lro.c @@ -89,7 +89,6 @@ SYSCTL_NODE(_net_inet_tcp, OID_AUTO, lro, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, long tcplro_stacks_wanting_mbufq; int (*tcp_lro_flush_tcphpts)(struct lro_ctrl *lc, struct lro_entry *le); -void (*tcp_hpts_softclock)(void); counter_u64_t tcp_inp_lro_direct_queue; counter_u64_t tcp_inp_lro_wokeup_queue; @@ -1262,8 +1261,7 @@ tcp_lro_flush_all(struct lro_ctrl *lc) done: /* flush active streams */ tcp_lro_rx_done(lc); - if (tcp_hpts_softclock != NULL) - tcp_hpts_softclock(); + tcp_hpts_softclock(); lc->lro_mbuf_count = 0; } diff --git a/sys/sys/systm.h b/sys/sys/systm.h index 2532bc3d9926..06d40481375f 100644 --- a/sys/sys/systm.h +++ b/sys/sys/systm.h @@ -378,6 +378,12 @@ void cpu_et_frequency(struct eventtimer *et, uint64_t newfreq); extern int cpu_disable_c2_sleep; extern int cpu_disable_c3_sleep; +extern void (*tcp_hpts_softclock)(void); +#define tcp_hpts_softclock() do { \ + if (tcp_hpts_softclock != NULL) \ + tcp_hpts_softclock(); \ +} while (0) + char *kern_getenv(const char *name); void freeenv(char *env); int getenv_int(const char *name, int *data);