git: 26bdd35c39d8 - main - rack and bbr not loading if TCP_RATELIMIT is not configured.

From: Randall Stewart <rrs_at_FreeBSD.org>
Date: Thu, 05 Jan 2023 17:01:39 UTC
The branch main has been updated by rrs:

URL: https://cgit.FreeBSD.org/src/commit/?id=26bdd35c39d8de2672fac8903a161699ffe38a82

commit 26bdd35c39d8de2672fac8903a161699ffe38a82
Author:     Randall Stewart <rrs@FreeBSD.org>
AuthorDate: 2023-01-05 16:59:21 +0000
Commit:     Randall Stewart <rrs@FreeBSD.org>
CommitDate: 2023-01-05 16:59:52 +0000

    rack and bbr not loading if TCP_RATELIMIT is not configured.
    
    So it turns out that rack and bbr still will not load without TCP_RATELIMIT. This needs
    to be fixed and lets also at the same time bring tcp_ratelimit up to date where we allow
    the transports to set a divisor (though still having a default path with the default
    divisor of 1000) for setting the burst size.
    
    Reviewed by: tuexen, gallatin
    Sponsored by: Netflix Inc
    Differential Revision: https://reviews.freebsd.org/D37954
---
 sys/netinet/tcp_ratelimit.c | 42 ++++++++++++++++++++++-----
 sys/netinet/tcp_ratelimit.h | 71 +++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 101 insertions(+), 12 deletions(-)

diff --git a/sys/netinet/tcp_ratelimit.c b/sys/netinet/tcp_ratelimit.c
index dc207d10311c..ca619e66c07d 100644
--- a/sys/netinet/tcp_ratelimit.c
+++ b/sys/netinet/tcp_ratelimit.c
@@ -255,6 +255,10 @@ static uint32_t wait_time_floor = 8000;	/* 8 ms */
 static uint32_t rs_hw_floor_mss = 16;
 static uint32_t num_of_waits_allowed = 1; /* How many time blocks are we willing to wait */
 
+static uint32_t mss_divisor = RL_DEFAULT_DIVISOR;
+static uint32_t even_num_segs = 1;
+static uint32_t even_threshold = 4;
+
 SYSCTL_NODE(_net_inet_tcp, OID_AUTO, rl, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "TCP Ratelimit stats");
 SYSCTL_UINT(_net_inet_tcp_rl, OID_AUTO, alive, CTLFLAG_RW,
@@ -277,6 +281,15 @@ SYSCTL_UINT(_net_inet_tcp_rl, OID_AUTO, hw_floor_mss, CTLFLAG_RW,
     &rs_hw_floor_mss, 16,
     "Number of mss that are a minum for hardware pacing?");
 
+SYSCTL_INT(_net_inet_tcp_rl, OID_AUTO, divisor, CTLFLAG_RW,
+    &mss_divisor, RL_DEFAULT_DIVISOR,
+    "The value divided into bytes per second to help establish mss size");
+SYSCTL_INT(_net_inet_tcp_rl, OID_AUTO, even, CTLFLAG_RW,
+    &even_num_segs, 1,
+    "Do we round mss size up to an even number of segments for delayed ack");
+SYSCTL_INT(_net_inet_tcp_rl, OID_AUTO, eventhresh, CTLFLAG_RW,
+    &even_threshold, 4,
+    "At what number of mss do we start rounding up to an even number of mss?");
 
 static void
 rl_add_syctl_entries(struct sysctl_oid *rl_sysctl_root, struct tcp_rate_set *rs)
@@ -1583,8 +1596,8 @@ tcp_log_pacing_size(struct tcpcb *tp, uint64_t bw, uint32_t segsiz, uint32_t new
 }
 
 uint32_t
-tcp_get_pacing_burst_size (struct tcpcb *tp, uint64_t bw, uint32_t segsiz, int can_use_1mss,
-   const struct tcp_hwrate_limit_table *te, int *err)
+tcp_get_pacing_burst_size_w_divisor(struct tcpcb *tp, uint64_t bw, uint32_t segsiz, int can_use_1mss,
+   const struct tcp_hwrate_limit_table *te, int *err, int divisor)
 {
 	/*
 	 * We use the google formula to calculate the
@@ -1592,20 +1605,35 @@ tcp_get_pacing_burst_size (struct tcpcb *tp, uint64_t bw, uint32_t segsiz, int c
 	 * bw < 24Meg
 	 *   tso = 2mss
 	 * else
-	 *   tso = min(bw/1000, 64k)
+	 *   tso = min(bw/(div=1000), 64k)
 	 *
 	 * Note for these calculations we ignore the
 	 * packet overhead (enet hdr, ip hdr and tcp hdr).
+	 * We only get the google formula when we have
+	 * divisor = 1000, which is the default for now.
 	 */
 	uint64_t lentim, res, bytes;
 	uint32_t new_tso, min_tso_segs;
 
-	bytes = bw / 1000;
-	if (bytes > (64 * 1000))
-		bytes = 64 * 1000;
+	/* It can't be zero */
+	if ((divisor == 0) ||
+	    (divisor < RL_MIN_DIVISOR)) {
+		if (mss_divisor)
+			bytes = bw / mss_divisor;
+		else
+			bytes = bw / 1000;
+	} else
+		bytes = bw / divisor;
+	/* We can't ever send more than 65k in a TSO */
+	if (bytes > 0xffff) {
+		bytes = 0xffff;
+	}
 	/* Round up */
 	new_tso = (bytes + segsiz - 1) / segsiz;
-	if (can_use_1mss && (bw < ONE_POINT_TWO_MEG))
+	/* Are we enforcing even boundaries? */
+	if (even_num_segs && (new_tso & 1) && (new_tso > even_threshold))
+		new_tso++;
+	if (can_use_1mss)
 		min_tso_segs = 1;
 	else
 		min_tso_segs = 2;
diff --git a/sys/netinet/tcp_ratelimit.h b/sys/netinet/tcp_ratelimit.h
index b689c9127493..f5d8f1ec6b1a 100644
--- a/sys/netinet/tcp_ratelimit.h
+++ b/sys/netinet/tcp_ratelimit.h
@@ -36,6 +36,9 @@
 
 struct m_snd_tag;
 
+#define RL_MIN_DIVISOR 50
+#define RL_DEFAULT_DIVISOR 1000
+
 /* Flags on an individual rate */
 #define HDWRPACE_INITED 	0x0001
 #define HDWRPACE_TAGPRESENT	0x0002
@@ -121,6 +124,14 @@ tcp_chg_pacing_rate(const struct tcp_hwrate_limit_table *crte,
 void
 tcp_rel_pacing_rate(const struct tcp_hwrate_limit_table *crte,
     struct tcpcb *tp);
+
+uint32_t
+tcp_get_pacing_burst_size_w_divisor(struct tcpcb *tp, uint64_t bw, uint32_t segsiz, int can_use_1mss,
+    const struct tcp_hwrate_limit_table *te, int *err, int divisor);
+
+void
+tcp_rl_log_enobuf(const struct tcp_hwrate_limit_table *rte);
+
 #else
 static inline const struct tcp_hwrate_limit_table *
 tcp_set_pacing_rate(struct tcpcb *tp, struct ifnet *ifp,
@@ -160,8 +171,56 @@ tcp_hw_highest_rate_ifp(struct ifnet *ifp, struct inpcb *inp)
 	return (0);
 }
 
+static inline uint32_t
+tcp_get_pacing_burst_size_w_divisor(struct tcpcb *tp, uint64_t bw, uint32_t segsiz, int can_use_1mss,
+   const struct tcp_hwrate_limit_table *te, int *err, int divisor)
+{
+	/*
+	 * We use the google formula to calculate the
+	 * TSO size. I.E.
+	 * bw < 24Meg
+	 *   tso = 2mss
+	 * else
+	 *   tso = min(bw/(div=1000), 64k)
+	 *
+	 * Note for these calculations we ignore the
+	 * packet overhead (enet hdr, ip hdr and tcp hdr).
+	 * We only get the google formula when we have
+	 * divisor = 1000, which is the default for now.
+	 */
+	uint64_t bytes;
+	uint32_t new_tso, min_tso_segs;
+
+	/* It can't be zero */
+	if ((divisor == 0) ||
+	    (divisor < RL_MIN_DIVISOR)) {
+		bytes = bw / RL_DEFAULT_DIVISOR;
+	} else
+		bytes = bw / divisor;
+	/* We can't ever send more than 65k in a TSO */
+	if (bytes > 0xffff) {
+		bytes = 0xffff;
+	}
+	/* Round up */
+	new_tso = (bytes + segsiz - 1) / segsiz;
+	if (can_use_1mss)
+		min_tso_segs = 1;
+	else
+		min_tso_segs = 2;
+	if (new_tso < min_tso_segs)
+		new_tso = min_tso_segs;
+	new_tso *= segsiz;
+	return (new_tso);
+}
+
+/* Do nothing if RATELIMIT is not defined */
+static void
+tcp_rl_log_enobuf(const struct tcp_hwrate_limit_table *rte)
+{
+}
 
 #endif
+
 /*
  * Given a b/w and a segsiz, and optional hardware
  * rate limit, return the ideal size to burst
@@ -170,13 +229,15 @@ tcp_hw_highest_rate_ifp(struct ifnet *ifp, struct inpcb *inp)
  * limit, if not it will bottom out at 2mss (think
  * delayed ack).
  */
-uint32_t
+static inline uint32_t
 tcp_get_pacing_burst_size(struct tcpcb *tp, uint64_t bw, uint32_t segsiz, int can_use_1mss,
-   const struct tcp_hwrate_limit_table *te, int *err);
-
+			  const struct tcp_hwrate_limit_table *te, int *err)
+{
 
-void
-tcp_rl_log_enobuf(const struct tcp_hwrate_limit_table *rte);
+	return (tcp_get_pacing_burst_size_w_divisor(tp, bw, segsiz,
+						    can_use_1mss,
+						    te, err, 0));
+}
 
 #endif
 #endif