git: e2c6a6d29b16 - main - tcp: include RFC6675 IsLost() in pipe calculation
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Mon, 09 Oct 2023 10:39:49 UTC
The branch main has been updated by rscheff: URL: https://cgit.FreeBSD.org/src/commit/?id=e2c6a6d29b163f936c6f9a8020021f711768808d commit e2c6a6d29b163f936c6f9a8020021f711768808d Author: Richard Scheffenegger <rscheff@FreeBSD.org> AuthorDate: 2023-10-09 09:55:14 +0000 Commit: Richard Scheffenegger <rscheff@FreeBSD.org> CommitDate: 2023-10-09 10:37:20 +0000 tcp: include RFC6675 IsLost() in pipe calculation Add more accounting while processing SACK data, to keep track of when a packet is deemed lost using the RFC6675 guidance. Together with PRR (RFC6972) this allows a sender to retransmit presumed lost packets faster, and loss recovery to complete earlier. Reviewed By: cc, rrs, #transport Sponsored by: NetApp, Inc. Differential Revision: https://reviews.freebsd.org/D39299 --- sys/netinet/tcp_input.c | 3 +- sys/netinet/tcp_sack.c | 77 ++++++++++++++++++++++++++++++++++++++++++++----- sys/netinet/tcp_var.h | 2 ++ 3 files changed, 74 insertions(+), 8 deletions(-) diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index 11ec7d2d74d2..ba11503955dc 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -4048,7 +4048,8 @@ tcp_compute_pipe(struct tcpcb *tp) if (tp->t_fb->tfb_compute_pipe == NULL) { return (tp->snd_max - tp->snd_una + tp->sackhint.sack_bytes_rexmit - - tp->sackhint.sacked_bytes); + tp->sackhint.sacked_bytes - + tp->sackhint.lost_bytes); } else { return((*tp->t_fb->tfb_compute_pipe)(tp)); } diff --git a/sys/netinet/tcp_sack.c b/sys/netinet/tcp_sack.c index fca4b99ee83b..9a01ff684cbb 100644 --- a/sys/netinet/tcp_sack.c +++ b/sys/netinet/tcp_sack.c @@ -555,6 +555,11 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack) int i, j, num_sack_blks, sack_changed; int delivered_data, left_edge_delta; + tcp_seq loss_hiack = 0; + int loss_thresh = 0; + int loss_sblks = 0; + int notlost_bytes = 0; + INP_WLOCK_ASSERT(tptoinpcb(tp)); num_sack_blks = 0; @@ -637,6 +642,7 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack) */ tp->snd_fack = SEQ_MAX(tp->snd_una, th_ack); tp->sackhint.sacked_bytes = 0; /* reset */ + tp->sackhint.hole_bytes = 0; } /* * In the while-loop below, incoming SACK blocks (sack_blocks[]) and @@ -661,13 +667,17 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack) */ if (((temp = TAILQ_LAST(&tp->snd_holes, sackhole_head)) != NULL) && SEQ_LEQ(tp->snd_fack,temp->end)) { + tp->sackhint.hole_bytes -= temp->end - temp->start; temp->start = SEQ_MAX(tp->snd_fack, SEQ_MAX(tp->snd_una, th_ack)); temp->end = sblkp->start; temp->rxmit = temp->start; delivered_data += sblkp->end - sblkp->start; + tp->sackhint.hole_bytes += temp->end - temp->start; + KASSERT(tp->sackhint.hole_bytes >= 0, + ("sackhint hole bytes >= 0")); tp->snd_fack = sblkp->end; - sblkp--; sack_changed = 1; + sblkp--; } else { /* * Append a new SACK hole at the tail. If the @@ -678,10 +688,11 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack) temp = tcp_sackhole_insert(tp, tp->snd_fack,sblkp->start,NULL); if (temp != NULL) { delivered_data += sblkp->end - sblkp->start; + tp->sackhint.hole_bytes += temp->end - temp->start; tp->snd_fack = sblkp->end; + sack_changed = 1; /* Go to the previous sack block. */ sblkp--; - sack_changed = 1; } else { /* * We failed to add a new hole based on the current @@ -709,11 +720,29 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack) sack_changed = 1; } cur = TAILQ_LAST(&tp->snd_holes, sackhole_head); /* Last SACK hole. */ + loss_hiack = tp->snd_fack; + /* * Since the incoming sack blocks are sorted, we can process them * making one sweep of the scoreboard. */ - while (sblkp >= sack_blocks && cur != NULL) { + while (cur != NULL) { + if (!(sblkp >= sack_blocks)) { + if (((loss_sblks >= tcprexmtthresh) || + (loss_thresh > (tcprexmtthresh-1)*tp->t_maxseg))) + break; + loss_thresh += loss_hiack - cur->end; + loss_hiack = cur->start; + loss_sblks++; + if (!((loss_sblks >= tcprexmtthresh) || + (loss_thresh > (tcprexmtthresh-1)*tp->t_maxseg))) { + notlost_bytes += cur->end - cur->start; + } else { + break; + } + cur = TAILQ_PREV(cur, sackhole_head, scblink); + continue; + } if (SEQ_GEQ(sblkp->start, cur->end)) { /* * SACKs data beyond the current hole. Go to the @@ -727,6 +756,12 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack) * SACKs data before the current hole. Go to the * previous hole. */ + loss_thresh += loss_hiack - cur->end; + loss_hiack = cur->start; + loss_sblks++; + if (!((loss_sblks >= tcprexmtthresh) || + (loss_thresh > (tcprexmtthresh-1)*tp->t_maxseg))) + notlost_bytes += cur->end - cur->start; cur = TAILQ_PREV(cur, sackhole_head, scblink); continue; } @@ -742,6 +777,7 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack) delivered_data += (cur->end - cur->start); temp = cur; cur = TAILQ_PREV(cur, sackhole_head, scblink); + tp->sackhint.hole_bytes -= temp->end - temp->start; tcp_sackhole_remove(tp, temp); /* * The sack block may ack all or part of the @@ -752,6 +788,7 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack) } else { /* Move start of hole forward. */ delivered_data += (sblkp->end - cur->start); + tp->sackhint.hole_bytes -= sblkp->end - cur->start; cur->start = sblkp->end; cur->rxmit = SEQ_MAX(cur->rxmit, cur->start); } @@ -760,6 +797,7 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack) if (SEQ_GEQ(sblkp->end, cur->end)) { /* Move end of hole backward. */ delivered_data += (cur->end - sblkp->start); + tp->sackhint.hole_bytes -= cur->end - sblkp->start; cur->end = sblkp->start; cur->rxmit = SEQ_MIN(cur->rxmit, cur->end); if ((tp->t_flags & TF_LRD) && SEQ_GEQ(cur->rxmit, cur->end)) @@ -778,6 +816,13 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack) (SEQ_MIN(temp->rxmit, temp->end) - temp->start); } + tp->sackhint.hole_bytes -= sblkp->end - sblkp->start; + loss_thresh += loss_hiack - temp->end; + loss_hiack = temp->start; + loss_sblks++; + if (!((loss_sblks >= tcprexmtthresh) || + (loss_thresh > (tcprexmtthresh-1)*tp->t_maxseg))) + notlost_bytes += temp->end - temp->start; cur->end = sblkp->start; cur->rxmit = SEQ_MIN(cur->rxmit, cur->end); @@ -794,11 +839,25 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack) * we're done with the sack block or the sack hole. * Accordingly, we advance one or the other. */ - if (SEQ_LEQ(sblkp->start, cur->start)) + if (SEQ_LEQ(sblkp->start, cur->start)) { + loss_thresh += loss_hiack - cur->end; + loss_hiack = cur->start; + loss_sblks++; + if (!((loss_sblks >= tcprexmtthresh) || + (loss_thresh > (tcprexmtthresh-1)*tp->t_maxseg))) + notlost_bytes += cur->end - cur->start; cur = TAILQ_PREV(cur, sackhole_head, scblink); - else + } else { sblkp--; + } } + + KASSERT(!(TAILQ_EMPTY(&tp->snd_holes) && (tp->sackhint.hole_bytes != 0)), + ("SACK scoreboard empty, but accounting non-zero\n")); + + KASSERT(notlost_bytes <= tp->sackhint.hole_bytes, + ("SACK: more bytes marked notlost than in scoreboard holes")); + if (!(to->to_flags & TOF_SACK)) /* * If this ACK did not contain any @@ -811,6 +870,7 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack) sack_changed = 0; tp->sackhint.delivered_data = delivered_data; tp->sackhint.sacked_bytes += delivered_data - left_edge_delta; + tp->sackhint.lost_bytes = tp->sackhint.hole_bytes - notlost_bytes; KASSERT((delivered_data >= 0), ("delivered_data < 0")); KASSERT((tp->sackhint.sacked_bytes >= 0), ("sacked_bytes < 0")); return (sack_changed); @@ -845,6 +905,7 @@ tcp_free_sackholes(struct tcpcb *tp) void tcp_sack_partialack(struct tcpcb *tp, struct tcphdr *th) { + struct sackhole *temp; int num_segs = 1; u_int maxseg = tcp_maxseg(tp); @@ -891,8 +952,10 @@ tcp_sack_partialack(struct tcpcb *tp, struct tcphdr *th) highdata = SEQ_MIN(highdata, tp->snd_recover); if (th->th_ack != highdata) { tp->snd_fack = th->th_ack; - (void)tcp_sackhole_insert(tp, SEQ_MAX(th->th_ack, - highdata - maxseg), highdata, NULL); + if ((temp = tcp_sackhole_insert(tp, SEQ_MAX(th->th_ack, + highdata - maxseg), highdata, NULL)) != NULL) + tp->sackhint.hole_bytes += temp->end - + temp->start; } } (void) tcp_output(tp); diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h index cd5712132522..11509a87c6e7 100644 --- a/sys/netinet/tcp_var.h +++ b/sys/netinet/tcp_var.h @@ -128,6 +128,8 @@ struct sackhint { uint32_t recover_fs; /* Flight Size at the start of Loss recovery */ uint32_t prr_delivered; /* Total bytes delivered using PRR */ uint32_t prr_out; /* Bytes sent during IN_RECOVERY */ + uint32_t hole_bytes; /* current number of bytes in scoreboard holes */ + uint32_t lost_bytes; /* number of rfc6675 IsLost() bytes */ }; #define SEGQ_EMPTY(tp) TAILQ_EMPTY(&(tp)->t_segq)