git: a3aa6f652904 - main - cc_cubic: Use units of micro seconds (usecs) instead of ticks in rtt.

From: Cheng Cui <cc_at_FreeBSD.org>
Date: Thu, 01 Jun 2023 15:55:58 UTC
The branch main has been updated by cc:

URL: https://cgit.FreeBSD.org/src/commit/?id=a3aa6f65290482cedf4aeda1d0875ca6433c7f04

commit a3aa6f65290482cedf4aeda1d0875ca6433c7f04
Author:     Cheng Cui <cc@FreeBSD.org>
AuthorDate: 2023-06-01 11:48:07 +0000
Commit:     Cheng Cui <cc@FreeBSD.org>
CommitDate: 2023-06-01 11:55:01 +0000

    cc_cubic: Use units of micro seconds (usecs) instead of ticks in rtt.
    
    This improves TCP friendly cwnd in cases of low latency high drop rate
    networks. Tests show +42% and +37% better performance in 1Gpbs and 10Gbps
    cases.
    
    Reported by: Bhaskar Pardeshi from VMware.
    Reviewed By: rscheff, tuexen
    Approved by: rscheff (mentor), tuexen (mentor)
---
 sys/netinet/cc/cc_cubic.c | 60 +++++++++++++++++++++++++----------------------
 sys/netinet/cc/cc_cubic.h | 33 ++++++++++++++------------
 2 files changed, 50 insertions(+), 43 deletions(-)

diff --git a/sys/netinet/cc/cc_cubic.c b/sys/netinet/cc/cc_cubic.c
index 8992b9beba13..be9bd9859122 100644
--- a/sys/netinet/cc/cc_cubic.c
+++ b/sys/netinet/cc/cc_cubic.c
@@ -240,7 +240,7 @@ cubic_ack_received(struct cc_var *ccv, uint16_t type)
 {
 	struct cubic *cubic_data;
 	unsigned long w_tf, w_cubic_next;
-	int ticks_since_cong;
+	int usecs_since_cong;
 
 	cubic_data = ccv->cc_data;
 	cubic_record_rtt(ccv);
@@ -253,7 +253,7 @@ cubic_ack_received(struct cc_var *ccv, uint16_t type)
 	    (ccv->flags & CCF_CWND_LIMITED)) {
 		 /* Use the logic in NewReno ack_received() for slow start. */
 		if (CCV(ccv, snd_cwnd) <= CCV(ccv, snd_ssthresh) ||
-		    cubic_data->min_rtt_ticks == TCPTV_SRTTBASE) {
+		    cubic_data->min_rtt_usecs == TCPTV_SRTTBASE) {
 			cubic_does_slow_start(ccv, cubic_data);
 		} else {
 			if (cubic_data->flags & CUBICFLAG_HYSTART_IN_CSS) {
@@ -282,12 +282,12 @@ cubic_ack_received(struct cc_var *ccv, uint16_t type)
 				cubic_data->K = cubic_k(cubic_data->max_cwnd /
 							CCV(ccv, t_maxseg));
 			}
-			if ((ticks_since_cong =
-			    ticks - cubic_data->t_last_cong) < 0) {
+			usecs_since_cong = (ticks - cubic_data->t_last_cong) * tick;
+			if (usecs_since_cong < 0) {
 				/*
 				 * dragging t_last_cong along
 				 */
-				ticks_since_cong = INT_MAX;
+				usecs_since_cong = INT_MAX;
 				cubic_data->t_last_cong = ticks - INT_MAX;
 			}
 			/*
@@ -297,13 +297,14 @@ cubic_ack_received(struct cc_var *ccv, uint16_t type)
 			 * RTT is dominated by network buffering rather than
 			 * propagation delay.
 			 */
-			w_tf = tf_cwnd(ticks_since_cong,
-			    cubic_data->mean_rtt_ticks, cubic_data->max_cwnd,
-			    CCV(ccv, t_maxseg));
+			w_tf = tf_cwnd(usecs_since_cong, cubic_data->mean_rtt_usecs,
+				       cubic_data->max_cwnd, CCV(ccv, t_maxseg));
 
-			w_cubic_next = cubic_cwnd(ticks_since_cong +
-			    cubic_data->mean_rtt_ticks, cubic_data->max_cwnd,
-			    CCV(ccv, t_maxseg), cubic_data->K);
+			w_cubic_next = cubic_cwnd(usecs_since_cong +
+						  cubic_data->mean_rtt_usecs,
+						  cubic_data->max_cwnd,
+						  CCV(ccv, t_maxseg),
+						  cubic_data->K);
 
 			ccv->flags &= ~CCF_ABC_SENTAWND;
 
@@ -397,8 +398,8 @@ cubic_cb_init(struct cc_var *ccv, void *ptr)
 
 	/* Init some key variables with sensible defaults. */
 	cubic_data->t_last_cong = ticks;
-	cubic_data->min_rtt_ticks = TCPTV_SRTTBASE;
-	cubic_data->mean_rtt_ticks = 1;
+	cubic_data->min_rtt_usecs = TCPTV_SRTTBASE;
+	cubic_data->mean_rtt_usecs = 1;
 
 	ccv->cc_data = cubic_data;
 	cubic_data->flags = CUBICFLAG_HYSTART_ENABLED;
@@ -549,13 +550,13 @@ cubic_post_recovery(struct cc_var *ccv)
 
 	/* Calculate the average RTT between congestion epochs. */
 	if (cubic_data->epoch_ack_count > 0 &&
-	    cubic_data->sum_rtt_ticks >= cubic_data->epoch_ack_count) {
-		cubic_data->mean_rtt_ticks = (int)(cubic_data->sum_rtt_ticks /
+	    cubic_data->sum_rtt_usecs >= cubic_data->epoch_ack_count) {
+		cubic_data->mean_rtt_usecs = (int)(cubic_data->sum_rtt_usecs /
 		    cubic_data->epoch_ack_count);
 	}
 
 	cubic_data->epoch_ack_count = 0;
-	cubic_data->sum_rtt_ticks = 0;
+	cubic_data->sum_rtt_usecs = 0;
 }
 
 /*
@@ -565,13 +566,13 @@ static void
 cubic_record_rtt(struct cc_var *ccv)
 {
 	struct cubic *cubic_data;
-	int t_srtt_ticks;
+	uint32_t t_srtt_usecs;
 
 	/* Ignore srtt until a min number of samples have been taken. */
 	if (CCV(ccv, t_rttupdated) >= CUBIC_MIN_RTT_SAMPLES) {
 		cubic_data = ccv->cc_data;
-		t_srtt_ticks = tcp_get_srtt(ccv->ccvc.tcp,
-					    TCP_TMR_GRANULARITY_TICKS);
+		t_srtt_usecs = tcp_get_srtt(ccv->ccvc.tcp,
+					    TCP_TMR_GRANULARITY_USEC);
 		/*
 		 * Record the current SRTT as our minrtt if it's the smallest
 		 * we've seen or minrtt is currently equal to its initialised
@@ -579,24 +580,27 @@ cubic_record_rtt(struct cc_var *ccv)
 		 *
 		 * XXXLAS: Should there be some hysteresis for minrtt?
 		 */
-		if ((t_srtt_ticks < cubic_data->min_rtt_ticks ||
-		    cubic_data->min_rtt_ticks == TCPTV_SRTTBASE)) {
-			cubic_data->min_rtt_ticks = max(1, t_srtt_ticks);
+		if ((t_srtt_usecs < cubic_data->min_rtt_usecs ||
+		    cubic_data->min_rtt_usecs == TCPTV_SRTTBASE)) {
+			/* A minimal rtt is a single unshifted tick of a ticks
+			 * timer. */
+			cubic_data->min_rtt_usecs = max(tick >> TCP_RTT_SHIFT,
+							t_srtt_usecs);
 
 			/*
 			 * If the connection is within its first congestion
-			 * epoch, ensure we prime mean_rtt_ticks with a
+			 * epoch, ensure we prime mean_rtt_usecs with a
 			 * reasonable value until the epoch average RTT is
 			 * calculated in cubic_post_recovery().
 			 */
-			if (cubic_data->min_rtt_ticks >
-			    cubic_data->mean_rtt_ticks)
-				cubic_data->mean_rtt_ticks =
-				    cubic_data->min_rtt_ticks;
+			if (cubic_data->min_rtt_usecs >
+			    cubic_data->mean_rtt_usecs)
+				cubic_data->mean_rtt_usecs =
+				    cubic_data->min_rtt_usecs;
 		}
 
 		/* Sum samples for epoch average RTT calculation. */
-		cubic_data->sum_rtt_ticks += t_srtt_ticks;
+		cubic_data->sum_rtt_usecs += t_srtt_usecs;
 		cubic_data->epoch_ack_count++;
 	}
 }
diff --git a/sys/netinet/cc/cc_cubic.h b/sys/netinet/cc/cc_cubic.h
index 0749a9ebbc1a..3d408154c1a5 100644
--- a/sys/netinet/cc/cc_cubic.h
+++ b/sys/netinet/cc/cc_cubic.h
@@ -91,8 +91,8 @@
 struct cubic {
 	/* CUBIC K in fixed point form with CUBIC_SHIFT worth of precision. */
 	int64_t		K;
-	/* Sum of RTT samples across an epoch in ticks. */
-	int64_t		sum_rtt_ticks;
+	/* Sum of RTT samples across an epoch in usecs. */
+	int64_t		sum_rtt_usecs;
 	/* cwnd at the most recent congestion event. */
 	unsigned long	max_cwnd;
 	/* cwnd at the previous congestion event. */
@@ -101,10 +101,10 @@ struct cubic {
 	unsigned long	prev_max_cwnd_cp;
 	/* various flags */
 	uint32_t	flags;
-	/* Minimum observed rtt in ticks. */
-	int		min_rtt_ticks;
+	/* Minimum observed rtt in usecs. */
+	int		min_rtt_usecs;
 	/* Mean observed rtt between congestion epochs. */
-	int		mean_rtt_ticks;
+	int		mean_rtt_usecs;
 	/* ACKs since last congestion event. */
 	int		epoch_ack_count;
 	/* Timestamp (in ticks) of arriving in congestion avoidance from last
@@ -222,14 +222,15 @@ cubic_k(unsigned long wmax_pkts)
  * XXXLAS: Characterise bounds for overflow.
  */
 static __inline unsigned long
-cubic_cwnd(int ticks_since_cong, unsigned long wmax, uint32_t smss, int64_t K)
+cubic_cwnd(int usecs_since_cong, unsigned long wmax, uint32_t smss, int64_t K)
 {
 	int64_t cwnd;
 
 	/* K is in fixed point form with CUBIC_SHIFT worth of precision. */
 
 	/* t - K, with CUBIC_SHIFT worth of precision. */
-	cwnd = (((int64_t)ticks_since_cong << CUBIC_SHIFT) - (K * hz)) / hz;
+	cwnd = (((int64_t)usecs_since_cong << CUBIC_SHIFT) - (K * hz * tick)) /
+	       (hz * tick);
 
 	if (cwnd > CUBED_ROOT_MAX_ULONG)
 		return INT_MAX;
@@ -255,15 +256,17 @@ cubic_cwnd(int ticks_since_cong, unsigned long wmax, uint32_t smss, int64_t K)
 }
 
 /*
- * Compute an approximation of the NewReno cwnd some number of ticks after a
+ * Compute an approximation of the NewReno cwnd some number of usecs after a
  * congestion event. RTT should be the average RTT estimate for the path
  * measured over the previous congestion epoch and wmax is the value of cwnd at
  * the last congestion event. The "TCP friendly" concept in the CUBIC I-D is
  * rather tricky to understand and it turns out this function is not required.
  * It is left here for reference.
+ *
+ * XXX: Not used
  */
 static __inline unsigned long
-reno_cwnd(int ticks_since_cong, int rtt_ticks, unsigned long wmax,
+reno_cwnd(int usecs_since_cong, int rtt_usecs, unsigned long wmax,
     uint32_t smss)
 {
 
@@ -272,26 +275,26 @@ reno_cwnd(int ticks_since_cong, int rtt_ticks, unsigned long wmax,
 	 * W_tcp(t) deals with cwnd/wmax in pkts, so because our cwnd is in
 	 * bytes, we have to multiply by smss.
 	 */
-	return (((wmax * RENO_BETA) + (((ticks_since_cong * smss)
-	    << CUBIC_SHIFT) / rtt_ticks)) >> CUBIC_SHIFT);
+	return (((wmax * RENO_BETA) + (((usecs_since_cong * smss)
+	    << CUBIC_SHIFT) / rtt_usecs)) >> CUBIC_SHIFT);
 }
 
 /*
- * Compute an approximation of the "TCP friendly" cwnd some number of ticks
+ * Compute an approximation of the "TCP friendly" cwnd some number of usecs
  * after a congestion event that is designed to yield the same average cwnd as
  * NewReno while using CUBIC's beta of 0.7. RTT should be the average RTT
  * estimate for the path measured over the previous congestion epoch and wmax is
  * the value of cwnd at the last congestion event.
  */
 static __inline unsigned long
-tf_cwnd(int ticks_since_cong, int rtt_ticks, unsigned long wmax,
+tf_cwnd(int usecs_since_cong, int rtt_usecs, unsigned long wmax,
     uint32_t smss)
 {
 
 	/* Equation 4 of I-D. */
 	return (((wmax * CUBIC_BETA) +
-	    (((THREE_X_PT3 * (unsigned long)ticks_since_cong *
-	    (unsigned long)smss) << CUBIC_SHIFT) / (TWO_SUB_PT3 * rtt_ticks)))
+	    (((THREE_X_PT3 * (unsigned long)usecs_since_cong *
+	    (unsigned long)smss) << CUBIC_SHIFT) / (TWO_SUB_PT3 * rtt_usecs)))
 	    >> CUBIC_SHIFT);
 }