git: e2c6a6d29b16 - main - tcp: include RFC6675 IsLost() in pipe calculation

From: Richard Scheffenegger <rscheff_at_FreeBSD.org>
Date: Mon, 09 Oct 2023 10:39:49 UTC
The branch main has been updated by rscheff:

URL: https://cgit.FreeBSD.org/src/commit/?id=e2c6a6d29b163f936c6f9a8020021f711768808d

commit e2c6a6d29b163f936c6f9a8020021f711768808d
Author:     Richard Scheffenegger <rscheff@FreeBSD.org>
AuthorDate: 2023-10-09 09:55:14 +0000
Commit:     Richard Scheffenegger <rscheff@FreeBSD.org>
CommitDate: 2023-10-09 10:37:20 +0000

    tcp: include RFC6675 IsLost() in pipe calculation
    
    Add more accounting while processing SACK data, to
    keep track of when a packet is deemed lost using
    the RFC6675 guidance.
    
    Together with PRR (RFC6972) this allows a sender to
    retransmit presumed lost packets faster, and loss
    recovery to complete earlier.
    
    Reviewed By: cc, rrs, #transport
    Sponsored by: NetApp, Inc.
    Differential Revision: https://reviews.freebsd.org/D39299
---
 sys/netinet/tcp_input.c |  3 +-
 sys/netinet/tcp_sack.c  | 77 ++++++++++++++++++++++++++++++++++++++++++++-----
 sys/netinet/tcp_var.h   |  2 ++
 3 files changed, 74 insertions(+), 8 deletions(-)

diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
index 11ec7d2d74d2..ba11503955dc 100644
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -4048,7 +4048,8 @@ tcp_compute_pipe(struct tcpcb *tp)
 	if (tp->t_fb->tfb_compute_pipe == NULL) {
 		return (tp->snd_max - tp->snd_una +
 			tp->sackhint.sack_bytes_rexmit -
-			tp->sackhint.sacked_bytes);
+			tp->sackhint.sacked_bytes -
+			tp->sackhint.lost_bytes);
 	} else {
 		return((*tp->t_fb->tfb_compute_pipe)(tp));
 	}
diff --git a/sys/netinet/tcp_sack.c b/sys/netinet/tcp_sack.c
index fca4b99ee83b..9a01ff684cbb 100644
--- a/sys/netinet/tcp_sack.c
+++ b/sys/netinet/tcp_sack.c
@@ -555,6 +555,11 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
 	int i, j, num_sack_blks, sack_changed;
 	int delivered_data, left_edge_delta;
 
+	tcp_seq loss_hiack = 0;
+	int loss_thresh = 0;
+	int loss_sblks = 0;
+	int notlost_bytes = 0;
+
 	INP_WLOCK_ASSERT(tptoinpcb(tp));
 
 	num_sack_blks = 0;
@@ -637,6 +642,7 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
 		 */
 		tp->snd_fack = SEQ_MAX(tp->snd_una, th_ack);
 		tp->sackhint.sacked_bytes = 0;	/* reset */
+		tp->sackhint.hole_bytes = 0;
 	}
 	/*
 	 * In the while-loop below, incoming SACK blocks (sack_blocks[]) and
@@ -661,13 +667,17 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
 		 */
 		if (((temp = TAILQ_LAST(&tp->snd_holes, sackhole_head)) != NULL) &&
 		    SEQ_LEQ(tp->snd_fack,temp->end)) {
+			tp->sackhint.hole_bytes -= temp->end - temp->start;
 			temp->start = SEQ_MAX(tp->snd_fack, SEQ_MAX(tp->snd_una, th_ack));
 			temp->end = sblkp->start;
 			temp->rxmit = temp->start;
 			delivered_data += sblkp->end - sblkp->start;
+			tp->sackhint.hole_bytes += temp->end - temp->start;
+			KASSERT(tp->sackhint.hole_bytes >= 0,
+			    ("sackhint hole bytes >= 0"));
 			tp->snd_fack = sblkp->end;
-			sblkp--;
 			sack_changed = 1;
+			sblkp--;
 		} else {
 			/*
 			 * Append a new SACK hole at the tail.  If the
@@ -678,10 +688,11 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
 			temp = tcp_sackhole_insert(tp, tp->snd_fack,sblkp->start,NULL);
 			if (temp != NULL) {
 				delivered_data += sblkp->end - sblkp->start;
+				tp->sackhint.hole_bytes += temp->end - temp->start;
 				tp->snd_fack = sblkp->end;
+				sack_changed = 1;
 				/* Go to the previous sack block. */
 				sblkp--;
-				sack_changed = 1;
 			} else {
 				/*
 				 * We failed to add a new hole based on the current
@@ -709,11 +720,29 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
 		sack_changed = 1;
 	}
 	cur = TAILQ_LAST(&tp->snd_holes, sackhole_head); /* Last SACK hole. */
+	loss_hiack = tp->snd_fack;
+
 	/*
 	 * Since the incoming sack blocks are sorted, we can process them
 	 * making one sweep of the scoreboard.
 	 */
-	while (sblkp >= sack_blocks  && cur != NULL) {
+	while (cur != NULL) {
+		if (!(sblkp >= sack_blocks)) {
+			if (((loss_sblks >= tcprexmtthresh) ||
+			    (loss_thresh > (tcprexmtthresh-1)*tp->t_maxseg))) 
+				break;
+			loss_thresh += loss_hiack - cur->end;
+			loss_hiack = cur->start;
+			loss_sblks++;
+			if (!((loss_sblks >= tcprexmtthresh) ||
+			    (loss_thresh > (tcprexmtthresh-1)*tp->t_maxseg))) {
+				notlost_bytes += cur->end - cur->start;
+			} else {
+				break;
+			}
+			cur = TAILQ_PREV(cur, sackhole_head, scblink);
+			continue;
+		}
 		if (SEQ_GEQ(sblkp->start, cur->end)) {
 			/*
 			 * SACKs data beyond the current hole.  Go to the
@@ -727,6 +756,12 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
 			 * SACKs data before the current hole.  Go to the
 			 * previous hole.
 			 */
+			loss_thresh += loss_hiack - cur->end;
+			loss_hiack = cur->start;
+			loss_sblks++;
+			if (!((loss_sblks >= tcprexmtthresh) ||
+			    (loss_thresh > (tcprexmtthresh-1)*tp->t_maxseg)))
+				notlost_bytes += cur->end - cur->start;
 			cur = TAILQ_PREV(cur, sackhole_head, scblink);
 			continue;
 		}
@@ -742,6 +777,7 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
 				delivered_data += (cur->end - cur->start);
 				temp = cur;
 				cur = TAILQ_PREV(cur, sackhole_head, scblink);
+				tp->sackhint.hole_bytes -= temp->end - temp->start;
 				tcp_sackhole_remove(tp, temp);
 				/*
 				 * The sack block may ack all or part of the
@@ -752,6 +788,7 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
 			} else {
 				/* Move start of hole forward. */
 				delivered_data += (sblkp->end - cur->start);
+				tp->sackhint.hole_bytes -= sblkp->end - cur->start;
 				cur->start = sblkp->end;
 				cur->rxmit = SEQ_MAX(cur->rxmit, cur->start);
 			}
@@ -760,6 +797,7 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
 			if (SEQ_GEQ(sblkp->end, cur->end)) {
 				/* Move end of hole backward. */
 				delivered_data += (cur->end - sblkp->start);
+				tp->sackhint.hole_bytes -= cur->end - sblkp->start;
 				cur->end = sblkp->start;
 				cur->rxmit = SEQ_MIN(cur->rxmit, cur->end);
 				if ((tp->t_flags & TF_LRD) && SEQ_GEQ(cur->rxmit, cur->end))
@@ -778,6 +816,13 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
 						    (SEQ_MIN(temp->rxmit,
 						    temp->end) - temp->start);
 					}
+					tp->sackhint.hole_bytes -= sblkp->end - sblkp->start;
+					loss_thresh += loss_hiack - temp->end;
+					loss_hiack = temp->start;
+					loss_sblks++;
+					if (!((loss_sblks >= tcprexmtthresh) ||
+					    (loss_thresh > (tcprexmtthresh-1)*tp->t_maxseg)))
+						notlost_bytes += temp->end - temp->start;
 					cur->end = sblkp->start;
 					cur->rxmit = SEQ_MIN(cur->rxmit,
 					    cur->end);
@@ -794,11 +839,25 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
 		 * we're done with the sack block or the sack hole.
 		 * Accordingly, we advance one or the other.
 		 */
-		if (SEQ_LEQ(sblkp->start, cur->start))
+		if (SEQ_LEQ(sblkp->start, cur->start)) {
+			loss_thresh += loss_hiack - cur->end;
+			loss_hiack = cur->start;
+			loss_sblks++;
+			if (!((loss_sblks >= tcprexmtthresh) ||
+			    (loss_thresh > (tcprexmtthresh-1)*tp->t_maxseg)))
+				notlost_bytes += cur->end - cur->start;
 			cur = TAILQ_PREV(cur, sackhole_head, scblink);
-		else
+		} else {
 			sblkp--;
+		}
 	}
+
+	KASSERT(!(TAILQ_EMPTY(&tp->snd_holes) && (tp->sackhint.hole_bytes != 0)),
+	    ("SACK scoreboard empty, but accounting non-zero\n"));
+
+	KASSERT(notlost_bytes <= tp->sackhint.hole_bytes,
+	    ("SACK: more bytes marked notlost than in scoreboard holes"));
+
 	if (!(to->to_flags & TOF_SACK))
 		/*
 		 * If this ACK did not contain any
@@ -811,6 +870,7 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
 		sack_changed = 0;
 	tp->sackhint.delivered_data = delivered_data;
 	tp->sackhint.sacked_bytes += delivered_data - left_edge_delta;
+	tp->sackhint.lost_bytes = tp->sackhint.hole_bytes - notlost_bytes;
 	KASSERT((delivered_data >= 0), ("delivered_data < 0"));
 	KASSERT((tp->sackhint.sacked_bytes >= 0), ("sacked_bytes < 0"));
 	return (sack_changed);
@@ -845,6 +905,7 @@ tcp_free_sackholes(struct tcpcb *tp)
 void
 tcp_sack_partialack(struct tcpcb *tp, struct tcphdr *th)
 {
+	struct sackhole *temp;
 	int num_segs = 1;
 	u_int maxseg = tcp_maxseg(tp);
 
@@ -891,8 +952,10 @@ tcp_sack_partialack(struct tcpcb *tp, struct tcphdr *th)
 		highdata = SEQ_MIN(highdata, tp->snd_recover);
 		if (th->th_ack != highdata) {
 			tp->snd_fack = th->th_ack;
-			(void)tcp_sackhole_insert(tp, SEQ_MAX(th->th_ack,
-			    highdata - maxseg), highdata, NULL);
+			if ((temp = tcp_sackhole_insert(tp, SEQ_MAX(th->th_ack,
+			    highdata - maxseg), highdata, NULL)) != NULL)
+				tp->sackhint.hole_bytes += temp->end -
+								temp->start;
 		}
 	}
 	(void) tcp_output(tp);
diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
index cd5712132522..11509a87c6e7 100644
--- a/sys/netinet/tcp_var.h
+++ b/sys/netinet/tcp_var.h
@@ -128,6 +128,8 @@ struct sackhint {
 	uint32_t	recover_fs;	/* Flight Size at the start of Loss recovery */
 	uint32_t	prr_delivered;	/* Total bytes delivered using PRR */
 	uint32_t	prr_out;	/* Bytes sent during IN_RECOVERY */
+	uint32_t	hole_bytes;	/* current number of bytes in scoreboard holes */
+	uint32_t	lost_bytes;	/* number of rfc6675 IsLost() bytes */
 };
 
 #define SEGQ_EMPTY(tp) TAILQ_EMPTY(&(tp)->t_segq)