git: 81560c55823b - main - TCP: Rack ends up sending all that is outstanding every timeout.

From: Randall Stewart <rrs_at_FreeBSD.org>
Date: Fri, 09 Sep 2022 13:00:48 UTC
The branch main has been updated by rrs:

URL: https://cgit.FreeBSD.org/src/commit/?id=81560c55823b2c4193ab293b210ba9d681b6e53f

commit 81560c55823b2c4193ab293b210ba9d681b6e53f
Author:     Randall Stewart <rrs@FreeBSD.org>
AuthorDate: 2022-09-09 12:59:21 +0000
Commit:     Randall Stewart <rrs@FreeBSD.org>
CommitDate: 2022-09-09 12:59:21 +0000

    TCP: Rack ends up sending all that is outstanding every timeout.
    
    In doing some testing for a different problem, I have found rack retransmitting
    all outstanding data every time a timeout occurs. The outstanding is sent 1ms
    apart between each packet, and then the timeout runs off again. This causes
    extra retransmissions when we should be waiting for an ack after sending the
    very first segment.
    
    Reviewed by: tuexen
    Sponsored by: Netflix Inc
    Differential Revision: https://reviews.freebsd.org/D36494
---
 sys/netinet/tcp_stacks/rack.c     | 21 +++++++++++++++++++++
 sys/netinet/tcp_stacks/tcp_rack.h |  3 ++-
 2 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c
index a97f82a02b85..3ab4494865a5 100644
--- a/sys/netinet/tcp_stacks/rack.c
+++ b/sys/netinet/tcp_stacks/rack.c
@@ -6685,6 +6685,7 @@ rack_timeout_rxt(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts)
 	}
 	rack->r_ctl.rc_hpts_flags &= ~PACE_TMR_RXT;
 	rack->r_ctl.retran_during_recovery = 0;
+	rack->rc_ack_required = 1;
 	rack->r_ctl.dsack_byte_cnt = 0;
 	if (IN_FASTRECOVERY(tp->t_flags))
 		tp->t_flags |= TF_WASFRECOVERY;
@@ -14068,6 +14069,11 @@ rack_do_segment_nounlock(struct mbuf *m, struct tcphdr *th, struct socket *so,
 	 */
 	rack = (struct tcp_rack *)tp->t_fb_ptr;
 	if (m->m_flags & M_ACKCMP) {
+		/*
+		 * All compressed ack's are ack's by definition so
+		 * remove any ack required flag and then do the processing.
+		 */
+		rack->rc_ack_required = 0;
 		return (rack_do_compressed_ack_processing(tp, so, m, nxt_pkt, tv));
 	}
 	if (m->m_flags & M_ACKCMP) {
@@ -14238,6 +14244,9 @@ rack_do_segment_nounlock(struct mbuf *m, struct tcphdr *th, struct socket *so,
 		TCP_LOG_EVENTP(tp, th, &so->so_rcv, &so->so_snd, TCP_LOG_IN, 0,
 		    tlen, &log, true, &ltv);
 	}
+	/* Remove ack required flag if set, we have one  */
+	if (thflags & TH_ACK)
+		rack->rc_ack_required = 0;
 	if ((thflags & TH_SYN) && (thflags & TH_FIN) && V_drop_synfin) {
 		way_out = 4;
 		retval = 0;
@@ -16798,6 +16807,18 @@ rack_output(struct tcpcb *tp)
 		}
 #ifdef TCP_ACCOUNTING
 		sched_unpin();
+#endif
+		return (0);
+	}
+	if ((rack->rc_ack_required == 1) &&
+	    (rack->r_timer_override == 0)){
+		/* A timeout occurred and no ack has arrived */
+		if (tcp_in_hpts(rack->rc_inp) == 0) {
+			/* Timer is not running */
+			rack_start_hpts_timer(rack, tp, cts, 0, 0, 0);
+		}
+#ifdef TCP_ACCOUNTING
+		sched_unpin();
 #endif
 		return (0);
 	}
diff --git a/sys/netinet/tcp_stacks/tcp_rack.h b/sys/netinet/tcp_stacks/tcp_rack.h
index 798a1ba4364f..6f447d5ea470 100644
--- a/sys/netinet/tcp_stacks/tcp_rack.h
+++ b/sys/netinet/tcp_stacks/tcp_rack.h
@@ -549,7 +549,7 @@ struct tcp_rack {
 	struct inpcb *rc_inp;	/* The inpcb Lock(a) */
 	uint8_t rc_free_cnt;	/* Number of free entries on the rc_free list
 				 * Lock(a) */
-	uint8_t client_bufferlvl : 4, /* Expected range [0,5]: 0=unset, 1=low/empty */
+	uint8_t client_bufferlvl : 3, /* Expected range [0,5]: 0=unset, 1=low/empty */
 		rack_deferred_inited : 1,
 	        /* ******************************************************************** */
 	        /* Note for details of next two fields see rack_init_retransmit_rate()  */
@@ -557,6 +557,7 @@ struct tcp_rack {
 		full_size_rxt: 1,
 		shape_rxt_to_pacing_min : 1,
 	        /* ******************************************************************** */
+		rc_ack_required: 1,
 		spare : 1;
 	uint8_t no_prr_addback : 1,
 		gp_ready : 1,