git: b8ffda0a37e1 - releng/13.0 - tcp: various improvements and fixes to PRR - Ensure cwnd doesn't shrink to zero with PRR - use accurate rfc6675_pipe when enabled - Address two incorrect calculations and enhance readability - address second instance of cwnd potentially becoming zero - fix sublte bug due to implicit int to uint typecase in max() - fix bug due to typo in hand-coded CEILING() function by using howmany() macro - use int instead of long, and add a missing long typecast - replace if conditionals with easier to read imax/imin (as in pseudocode) - Avoid accounting left-edge twice in partial ACK. - Include new data sent in PRR calculation - Improve PRR initial transmission timing - Fix prr_out when pipe < ssthresh

Richard Scheffenegger rscheff at FreeBSD.org
Thu Mar 4 12:00:51 UTC 2021


The branch releng/13.0 has been updated by rscheff:

URL: https://cgit.FreeBSD.org/src/commit/?id=b8ffda0a37e126623069bce49e8096dbdb4bfde3

commit b8ffda0a37e126623069bce49e8096dbdb4bfde3
Author:     Richard Scheffenegger <rscheff at FreeBSD.org>
AuthorDate: 2021-02-19 12:52:06 +0000
Commit:     Richard Scheffenegger <rscheff at FreeBSD.org>
CommitDate: 2021-03-04 11:56:35 +0000

    tcp: various improvements and fixes to PRR
    - Ensure cwnd doesn't shrink to zero with PRR
    - use accurate rfc6675_pipe when enabled
    - Address two incorrect calculations and enhance readability
    - address second instance of cwnd potentially becoming zero
    - fix sublte bug due to implicit int to uint typecase in max()
    - fix bug due to typo in hand-coded CEILING() function by using howmany() macro
    - use int instead of long, and add a missing long typecast
    - replace if conditionals with easier to read imax/imin (as in pseudocode)
    - Avoid accounting left-edge twice in partial ACK.
    - Include new data sent in PRR calculation
    - Improve PRR initial transmission timing
    - Fix prr_out when pipe < ssthresh
    
    Reviewed By:    #transport, kbowling, tuexen
    Reported by:    Liang Tian
    Approved by:    re (gjb)
    MFC after:      3 days
    Sponsored by:   NetApp, Inc.
    Differential Revision:  https://reviews.freebsd.org/D28780
    Differential Revision:  https://reviews.freebsd.org/D28816
    Differential Revision:  https://reviews.freebsd.org/D28813
    Differential Revision:  https://reviews.freebsd.org/D28819
    Differential Revision:  https://reviews.freebsd.org/D28941
    Differential Revision:  https://reviews.freebsd.org/D28953
    Differential Revision:  https://reviews.freebsd.org/D28998
    
    (cherry picked from commit 853fd7a2e39802e46bd3d6476529796ac22412d9)
    (cherry picked from commit c3aa4ba5dfc084e40e4151b25a0d5f8d24a036ba)
    
    (cherry picked from commit a8e431e1537d056a3f9e466eaceec28c399c220b)
    (cherry picked from commit 32ed0ef06b8326271c4665406cac81fa47d0d29c)
    
    (cherry picked from commit 48396dc77922c68377ecac0ead2f8b0b5453c451)
    (cherry picked from commit ffbf1b809ef5080afa130c11fa4afce9fef7e7fe)
    
    (cherry picked from commit 31d7a27c6e88c3d5bd0907774ec70176a92da5bb)
    (cherry picked from commit 25fb4c363b299c26c35158fa059379af4a007253)
    
    (cherry picked from commit 9e83a6a556ed8d9a2821de5d5f5c7d4b1292c694)
    (cherry picked from commit 9596751563dc1819d177fa3ec6660a3bdc18021c)
    
    (cherry picked from commit e9071000c9a04e3f685579500e24da9848944bb1)
    (cherry picked from commit 2b43cd2ea26f758d23e8c009cf7521c4ddec7353)
    
    (cherry picked from commit 0b0f8b359d0b94b09cfec35e5d5de01b23c7fbf1)
    (cherry picked from commit 05e742af6f548364909ed671f0b3774e54dc77d1)
---
 sys/netinet/tcp_input.c | 95 ++++++++++++++++++++++++-------------------------
 1 file changed, 47 insertions(+), 48 deletions(-)

diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
index 459b78cd444a..f2edbecbb079 100644
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -2577,8 +2577,8 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
 					if (V_tcp_do_prr &&
 					    IN_FASTRECOVERY(tp->t_flags) &&
 					    (tp->t_flags & TF_SACK_PERMIT)) {
-						long snd_cnt = 0, limit = 0;
-						long del_data = 0, pipe = 0;
+						int snd_cnt = 0, limit = 0;
+						int del_data = 0, pipe = 0;
 						/*
 						 * In a duplicate ACK del_data is only the
 						 * diff_in_sack. If no SACK is used del_data
@@ -2586,45 +2586,41 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
 						 * estimate to be in the network.
 						 */
 						del_data = tp->sackhint.delivered_data;
-						pipe = (tp->snd_nxt - tp->snd_fack) +
-							tp->sackhint.sack_bytes_rexmit;
+						if (V_tcp_do_rfc6675_pipe)
+							pipe = tcp_compute_pipe(tp);
+						else
+							pipe = (tp->snd_nxt - tp->snd_fack) +
+								tp->sackhint.sack_bytes_rexmit;
 						tp->sackhint.prr_delivered += del_data;
-						if (pipe > tp->snd_ssthresh) {
+						if (pipe >= tp->snd_ssthresh) {
 							if (tp->sackhint.recover_fs == 0)
 								tp->sackhint.recover_fs =
-								    max(1, tp->snd_nxt - tp->snd_una);
-							snd_cnt = (tp->sackhint.prr_delivered *
-							    tp->snd_ssthresh /
-							    tp->sackhint.recover_fs) +
-							    1 - tp->sackhint.sack_bytes_rexmit;
+								    imax(1, tp->snd_nxt - tp->snd_una);
+							snd_cnt = howmany((long)tp->sackhint.prr_delivered *
+							    tp->snd_ssthresh, tp->sackhint.recover_fs) -
+							    (tp->sackhint.sack_bytes_rexmit +
+							    (tp->snd_nxt - tp->snd_recover));
 						} else {
 							if (V_tcp_do_prr_conservative)
 								limit = tp->sackhint.prr_delivered -
-									tp->sackhint.sack_bytes_rexmit;
+									(tp->sackhint.sack_bytes_rexmit +
+									(tp->snd_nxt - tp->snd_recover));
 							else
-								if ((tp->sackhint.prr_delivered -
-								    tp->sackhint.sack_bytes_rexmit) >
-								    del_data)
-									limit = tp->sackhint.prr_delivered -
-									    tp->sackhint.sack_bytes_rexmit +
-									    maxseg;
-								else
-									limit = del_data + maxseg;
-							if ((tp->snd_ssthresh - pipe) < limit)
-								snd_cnt = tp->snd_ssthresh - pipe;
-							else
-								snd_cnt = limit;
+								limit = imax(tp->sackhint.prr_delivered -
+									    (tp->sackhint.sack_bytes_rexmit +
+									    (tp->snd_nxt - tp->snd_recover)),
+									    del_data) + maxseg;
+							snd_cnt = imin(tp->snd_ssthresh - pipe, limit);
 						}
-						snd_cnt = max((snd_cnt / maxseg), 0);
+						snd_cnt = imax(snd_cnt, 0) / maxseg;
 						/*
 						 * Send snd_cnt new data into the network in
 						 * response to this ACK. If there is a going
 						 * to be a SACK retransmission, adjust snd_cwnd
 						 * accordingly.
 						 */
-						tp->snd_cwnd = tp->snd_nxt - tp->snd_recover +
-						    tp->sackhint.sack_bytes_rexmit +
-						    (snd_cnt * maxseg);
+						tp->snd_cwnd = imax(maxseg, tp->snd_nxt - tp->snd_recover +
+						    tp->sackhint.sack_bytes_rexmit + (snd_cnt * maxseg));
 					} else if ((tp->t_flags & TF_SACK_PERMIT) &&
 					    IN_FASTRECOVERY(tp->t_flags)) {
 						int awnd;
@@ -2684,7 +2680,8 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
 						 * snd_ssthresh is already updated by
 						 * cc_cong_signal.
 						 */
-						tp->sackhint.prr_delivered = 0;
+						tp->sackhint.prr_delivered =
+						    tp->sackhint.sacked_bytes;
 						tp->sackhint.sack_bytes_rexmit = 0;
 						tp->sackhint.recover_fs = max(1,
 						    tp->snd_nxt - tp->snd_una);
@@ -3927,7 +3924,7 @@ tcp_mssopt(struct in_conninfo *inc)
 void
 tcp_prr_partialack(struct tcpcb *tp, struct tcphdr *th)
 {
-	long snd_cnt = 0, limit = 0, del_data = 0, pipe = 0;
+	int snd_cnt = 0, limit = 0, del_data = 0, pipe = 0;
 	int maxseg = tcp_maxseg(tp);
 
 	INP_WLOCK_ASSERT(tp->t_inpcb);
@@ -3939,41 +3936,43 @@ tcp_prr_partialack(struct tcpcb *tp, struct tcphdr *th)
 	 * (del_data) and an estimate of how many bytes are in the
 	 * network.
 	 */
-	if (SEQ_GEQ(th->th_ack, tp->snd_una))
-		del_data = BYTES_THIS_ACK(tp, th);
-	del_data += tp->sackhint.delivered_data;
-	pipe = (tp->snd_nxt - tp->snd_fack) + tp->sackhint.sack_bytes_rexmit;
+	del_data = tp->sackhint.delivered_data;
+	if (V_tcp_do_rfc6675_pipe)
+		pipe = tcp_compute_pipe(tp);
+	else
+		pipe = (tp->snd_nxt - tp->snd_fack) + tp->sackhint.sack_bytes_rexmit;
 	tp->sackhint.prr_delivered += del_data;
 	/*
 	 * Proportional Rate Reduction
 	 */
-	if (pipe > tp->snd_ssthresh) {
+	if (pipe >= tp->snd_ssthresh) {
 		if (tp->sackhint.recover_fs == 0)
 			tp->sackhint.recover_fs =
-			    max(1, tp->snd_nxt - tp->snd_una);
-		snd_cnt = (tp->sackhint.prr_delivered * tp->snd_ssthresh /
-		    tp->sackhint.recover_fs) - tp->sackhint.sack_bytes_rexmit;
+			    imax(1, tp->snd_nxt - tp->snd_una);
+		snd_cnt = howmany((long)tp->sackhint.prr_delivered *
+			    tp->snd_ssthresh, tp->sackhint.recover_fs) -
+			    (tp->sackhint.sack_bytes_rexmit +
+			    (tp->snd_nxt - tp->snd_recover));
 	} else {
 		if (V_tcp_do_prr_conservative)
 			limit = tp->sackhint.prr_delivered -
-			    tp->sackhint.sack_bytes_rexmit;
+			    (tp->sackhint.sack_bytes_rexmit +
+			    (tp->snd_nxt - tp->snd_recover));
 		else
-			if ((tp->sackhint.prr_delivered -
-			    tp->sackhint.sack_bytes_rexmit) > del_data)
-				limit = tp->sackhint.prr_delivered -
-				    tp->sackhint.sack_bytes_rexmit + maxseg;
-			else
-				limit = del_data + maxseg;
-		snd_cnt = min((tp->snd_ssthresh - pipe), limit);
+			limit = imax(tp->sackhint.prr_delivered -
+				    (tp->sackhint.sack_bytes_rexmit +
+				    (tp->snd_nxt - tp->snd_recover)),
+				    del_data) + maxseg;
+		snd_cnt = imin((tp->snd_ssthresh - pipe), limit);
 	}
-	snd_cnt = max((snd_cnt / maxseg), 0);
+	snd_cnt = imax(snd_cnt, 0) / maxseg;
 	/*
 	 * Send snd_cnt new data into the network in response to this ack.
 	 * If there is going to be a SACK retransmission, adjust snd_cwnd
 	 * accordingly.
 	 */
-	tp->snd_cwnd = tp->snd_nxt - tp->snd_recover +
-		tp->sackhint.sack_bytes_rexmit + (snd_cnt * maxseg);
+	tp->snd_cwnd = imax(maxseg, tp->snd_nxt - tp->snd_recover +
+		tp->sackhint.sack_bytes_rexmit + (snd_cnt * maxseg));
 	tp->t_flags |= TF_ACKNOW;
 	(void) tcp_output(tp);
 }


More information about the dev-commits-src-all mailing list