git: 48b55a7c7be4 - main - tcp_hpts: make the module unloadable

From: Gleb Smirnoff <glebius_at_FreeBSD.org>
Date: Tue, 19 Dec 2023 18:22:52 UTC
The branch main has been updated by glebius:

URL: https://cgit.FreeBSD.org/src/commit/?id=48b55a7c7be4175998f9b26dfbec5a561acbd936

commit 48b55a7c7be4175998f9b26dfbec5a561acbd936
Author:     Gleb Smirnoff <glebius@FreeBSD.org>
AuthorDate: 2023-12-19 18:21:56 +0000
Commit:     Gleb Smirnoff <glebius@FreeBSD.org>
CommitDate: 2023-12-19 18:21:56 +0000

    tcp_hpts: make the module unloadable
    
    Although the HPTS subsytem wasn't initially designed as a loadable
    module, now it is so.  Make it possible to also unload it, but for
    safety reasons hide that under 'kldunload -f'.
    
    Reviewed by:            tuexen
    Differential Revision:  https://reviews.freebsd.org/D43092
---
 sys/netinet/tcp_hpts.c     | 82 ++++++++++++++++++++++++++++++++++++++++++----
 sys/netinet/tcp_hpts.h     |  1 +
 sys/netinet/tcp_lro_hpts.c |  6 ++++
 3 files changed, 83 insertions(+), 6 deletions(-)

diff --git a/sys/netinet/tcp_hpts.c b/sys/netinet/tcp_hpts.c
index 50a30a2cff03..a6fa79a26949 100644
--- a/sys/netinet/tcp_hpts.c
+++ b/sys/netinet/tcp_hpts.c
@@ -229,7 +229,7 @@ static struct tcp_hptsi {
 	uint32_t rp_num_hptss;	/* Number of hpts threads */
 } tcp_pace;
 
-MALLOC_DEFINE(M_TCPHPTS, "tcp_hpts", "TCP hpts");
+static MALLOC_DEFINE(M_TCPHPTS, "tcp_hpts", "TCP hpts");
 #ifdef RSS
 static int tcp_bind_threads = 1;
 #else
@@ -240,7 +240,6 @@ static int hpts_does_tp_logging = 0;
 
 static int32_t tcp_hptsi(struct tcp_hpts_entry *hpts, int from_callout);
 static void tcp_hpts_thread(void *ctx);
-static void tcp_init_hptsi(void *st);
 
 int32_t tcp_min_hptsi_time = DEFAULT_MIN_SLEEP;
 static int conn_cnt_thresh = DEFAULT_CONNECTION_THESHOLD;
@@ -1794,7 +1793,7 @@ hpts_gather_grps(struct cpu_group **grps, int32_t *at, int32_t max, struct cpu_g
 }
 
 static void
-tcp_init_hptsi(void *st)
+tcp_hpts_mod_load(void)
 {
 	struct cpu_group *cpu_top;
 	int32_t error __diagused;
@@ -2005,10 +2004,81 @@ tcp_init_hptsi(void *st)
 	printf("TCP Hpts created %d swi interrupt threads and bound %d to %s\n",
 	    created, bound,
 	    tcp_bind_threads == 2 ? "NUMA domains" : "cpus");
-#ifdef INVARIANTS
-	printf("HPTS is in INVARIANT mode!!\n");
+}
+
+static void
+tcp_hpts_mod_unload(void)
+{
+	int rv __diagused;
+
+	tcp_lro_hpts_uninit();
+	atomic_store_ptr(&tcp_hpts_softclock, NULL);
+
+	for (int i = 0; i < tcp_pace.rp_num_hptss; i++) {
+		struct tcp_hpts_entry *hpts = tcp_pace.rp_ent[i];
+
+		rv = callout_drain(&hpts->co);
+		MPASS(rv != 0);
+
+		rv = swi_remove(hpts->ie_cookie);
+		MPASS(rv == 0);
+
+		rv = sysctl_ctx_free(&hpts->hpts_ctx);
+		MPASS(rv == 0);
+
+		mtx_destroy(&hpts->p_mtx);
+		free(hpts->p_hptss, M_TCPHPTS);
+		free(hpts, M_TCPHPTS);
+	}
+
+	free(tcp_pace.rp_ent, M_TCPHPTS);
+	free(tcp_pace.cts_last_ran, M_TCPHPTS);
+#ifdef SMP
+	free(tcp_pace.grps, M_TCPHPTS);
 #endif
+
+	counter_u64_free(hpts_hopelessly_behind);
+	counter_u64_free(hpts_loops);
+	counter_u64_free(back_tosleep);
+	counter_u64_free(combined_wheel_wrap);
+	counter_u64_free(wheel_wrap);
+	counter_u64_free(hpts_wake_timeout);
+	counter_u64_free(hpts_direct_awakening);
+	counter_u64_free(hpts_back_tosleep);
+	counter_u64_free(hpts_direct_call);
+	counter_u64_free(cpu_uses_flowid);
+	counter_u64_free(cpu_uses_random);
+}
+
+static int
+tcp_hpts_modevent(module_t mod, int what, void *arg)
+{
+
+	switch (what) {
+	case MOD_LOAD:
+		tcp_hpts_mod_load();
+		return (0);
+	case MOD_QUIESCE:
+		/*
+		 * Since we are a dependency of TCP stack modules, they should
+		 * already be unloaded, and the HPTS ring is empty.  However,
+		 * function pointer manipulations aren't 100% safe.  Although,
+		 * tcp_hpts_mod_unload() use atomic(9) the userret() doesn't.
+		 * Thus, allow only forced unload of HPTS.
+		 */
+		return (EBUSY);
+	case MOD_UNLOAD:
+		tcp_hpts_mod_unload();
+		return (0);
+	default:
+		return (EINVAL);
+	};
 }
 
-SYSINIT(tcphptsi, SI_SUB_SOFTINTR, SI_ORDER_ANY, tcp_init_hptsi, NULL);
+static moduledata_t tcp_hpts_module = {
+	.name = "tcphpts",
+	.evhand = tcp_hpts_modevent,
+};
+
+DECLARE_MODULE(tcphpts, tcp_hpts_module, SI_SUB_SOFTINTR, SI_ORDER_ANY);
 MODULE_VERSION(tcphpts, 1);
diff --git a/sys/netinet/tcp_hpts.h b/sys/netinet/tcp_hpts.h
index 7eb1b2e08cb4..0c5cfac28a6c 100644
--- a/sys/netinet/tcp_hpts.h
+++ b/sys/netinet/tcp_hpts.h
@@ -153,6 +153,7 @@ void __tcp_set_hpts(struct tcpcb *tp, int32_t line);
 void tcp_set_inp_to_drop(struct inpcb *inp, uint16_t reason);
 
 void tcp_lro_hpts_init(void);
+void tcp_lro_hpts_uninit(void);
 
 extern int32_t tcp_min_hptsi_time;
 
diff --git a/sys/netinet/tcp_lro_hpts.c b/sys/netinet/tcp_lro_hpts.c
index 769c82a32391..7724e727d5ce 100644
--- a/sys/netinet/tcp_lro_hpts.c
+++ b/sys/netinet/tcp_lro_hpts.c
@@ -584,3 +584,9 @@ tcp_lro_hpts_init(void)
 {
 	tcp_lro_flush_tcphpts = _tcp_lro_flush_tcphpts;
 }
+
+void
+tcp_lro_hpts_uninit(void)
+{
+	atomic_store_ptr(&tcp_lro_flush_tcphpts, NULL);
+}