git: f3bba8cd62f2 - main - TCP without LRO doing static pacing does not always pace as expected.
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Sun, 15 Jun 2025 09:05:00 UTC
The branch main has been updated by rrs: URL: https://cgit.FreeBSD.org/src/commit/?id=f3bba8cd62f2ddcacba7d9c36a2cc21b1fe11968 commit f3bba8cd62f2ddcacba7d9c36a2cc21b1fe11968 Author: Randall Stewart <rrs@FreeBSD.org> AuthorDate: 2025-06-15 09:02:39 +0000 Commit: Randall Stewart <rrs@FreeBSD.org> CommitDate: 2025-06-15 09:02:39 +0000 TCP without LRO doing static pacing does not always pace as expected. cause a surprise extra packet i.e. the burst limit being overstepped and another one where the burst limit is under stepped. This patch fixes those errors. Reported by: tuexen Reviewed by: tuexen Differential Revision: <https://reviews.freebsd.org/D50858> --- sys/netinet/tcp_stacks/rack.c | 56 +++++++++++++++++++++++++++---------------- 1 file changed, 36 insertions(+), 20 deletions(-) diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c index cb4e22401c12..1d4bc3124058 100644 --- a/sys/netinet/tcp_stacks/rack.c +++ b/sys/netinet/tcp_stacks/rack.c @@ -17178,6 +17178,12 @@ rack_log_pacing_delay_calc (struct tcp_rack *rack, uint32_t len, uint32_t slot, log.u_bbr.cwnd_gain |= rack->rc_gp_saw_ss; log.u_bbr.cwnd_gain <<= 1; log.u_bbr.cwnd_gain |= rack->rc_gp_saw_ca; + log.u_bbr.cwnd_gain <<= 1; + log.u_bbr.cwnd_gain |= rack->use_fixed_rate; + log.u_bbr.cwnd_gain <<= 1; + log.u_bbr.cwnd_gain |= rack->rc_always_pace; + log.u_bbr.cwnd_gain <<= 1; + log.u_bbr.cwnd_gain |= rack->gp_ready; log.u_bbr.bbr_substate = quality; log.u_bbr.bbr_state = rack->dgp_on; log.u_bbr.bbr_state <<= 1; @@ -17539,8 +17545,8 @@ rack_get_pacing_delay(struct tcp_rack *rack, struct tcpcb *tp, uint32_t len, str rack->r_ctl.rc_last_us_rtt, 88, __LINE__, NULL, gain); } - if ((bw_est == 0) || (rate_wanted == 0) || - ((rack->gp_ready == 0) && (rack->use_fixed_rate == 0))) { + if (((bw_est == 0) || (rate_wanted == 0) || (rack->gp_ready == 0)) && + (rack->use_fixed_rate == 0)) { /* * No way yet to make a b/w estimate or * our raise is set incorrectly. @@ -19039,7 +19045,7 @@ rack_sndbuf_autoscale(struct tcp_rack *rack) static int rack_fast_output(struct tcpcb *tp, struct tcp_rack *rack, uint64_t ts_val, - uint32_t cts, uint32_t ms_cts, struct timeval *tv, long tot_len, int *send_err) + uint32_t cts, uint32_t ms_cts, struct timeval *tv, long *tot_len, int *send_err, int line) { /* * Enter to do fast output. We are given that the sched_pin is @@ -19212,7 +19218,7 @@ again: } if (rack->r_ctl.fsb.rfo_apply_push && (len == rack->r_ctl.fsb.left_to_send)) { - tcp_set_flags(th, flags | TH_PUSH); + flags |= TH_PUSH; add_flag |= RACK_HAD_PUSH; } if ((m->m_next == NULL) || (len <= 0)){ @@ -19391,11 +19397,11 @@ again: log.u_bbr.inflight = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked); log.u_bbr.flex5 = log.u_bbr.inflight; log.u_bbr.lt_epoch = rack->r_ctl.cwnd_to_use; - log.u_bbr.delivered = 0; + log.u_bbr.delivered = rack->r_ctl.fsb.left_to_send; log.u_bbr.rttProp = 0; log.u_bbr.delRate = rack->r_must_retran; log.u_bbr.delRate <<= 1; - log.u_bbr.pkt_epoch = __LINE__; + log.u_bbr.pkt_epoch = line; /* For fast output no retrans so just inflight and how many mss we send */ log.u_bbr.flex5 = log.u_bbr.inflight; log.u_bbr.bbr_substate = (uint8_t)((len + segsiz - 1)/segsiz); @@ -19468,7 +19474,7 @@ again: tcp_account_for_send(tp, len, 0, 0, rack->r_ctl.fsb.hw_tls); rack->forced_ack = 0; /* If we send something zap the FA flag */ - tot_len += len; + *tot_len += len; if ((tp->t_flags & TF_GPUTINPROG) == 0) rack_start_gp_measurement(tp, rack, tp->snd_max, sb_offset); tp->snd_max += len; @@ -19504,6 +19510,7 @@ again: } if ((rack->r_ctl.fsb.left_to_send >= segsiz) && (max_val > len) && + (*tot_len < rack->r_ctl.rc_pace_max_segs) && (tso == 0)) { max_val -= len; len = segsiz; @@ -19515,14 +19522,14 @@ again: } tp->t_flags &= ~(TF_ACKNOW | TF_DELACK); counter_u64_add(rack_fto_send, 1); - slot = rack_get_pacing_delay(rack, tp, tot_len, NULL, segsiz, __LINE__); - rack_start_hpts_timer(rack, tp, cts, slot, tot_len, 0); + slot = rack_get_pacing_delay(rack, tp, *tot_len, NULL, segsiz, __LINE__); + rack_start_hpts_timer(rack, tp, cts, slot, *tot_len, 0); #ifdef TCP_ACCOUNTING crtsc = get_cyclecount(); if (tp->t_flags2 & TF2_TCP_ACCOUNTING) { tp->tcp_cnt_counters[SND_OUT_DATA] += cnt_thru; tp->tcp_proc_time[SND_OUT_DATA] += (crtsc - ts_val); - tp->tcp_cnt_counters[CNT_OF_MSS_OUT] += ((tot_len + segsiz - 1) / segsiz); + tp->tcp_cnt_counters[CNT_OF_MSS_OUT] += ((*tot_len + segsiz - 1) / segsiz); } sched_unpin(); #endif @@ -19884,20 +19891,36 @@ rack_output(struct tcpcb *tp) TCPS_HAVEESTABLISHED(tp->t_state)) { rack_set_state(tp, rack); } + segsiz = min(ctf_fixed_maxseg(tp), rack->r_ctl.rc_pace_min_segs); + minseg = segsiz; + if (rack->r_ctl.rc_pace_max_segs == 0) + pace_max_seg = rack->rc_user_set_max_segs * segsiz; + else + pace_max_seg = rack->r_ctl.rc_pace_max_segs; if ((rack->r_fast_output) && (doing_tlp == 0) && (tp->rcv_numsacks == 0)) { int ret; error = 0; - ret = rack_fast_output(tp, rack, ts_val, cts, ms_cts, &tv, tot_len_this_send, &error); - if (ret >= 0) + ret = rack_fast_output(tp, rack, ts_val, cts, ms_cts, &tv, &tot_len_this_send, &error, __LINE__); + if (ret > 0) return(ret); else if (error) { inp = rack->rc_inp; so = inp->inp_socket; sb = &so->so_snd; goto nomore; + } else { + /* Return == 0, if there is more we can send tot_len wise fall through and send */ + if (tot_len_this_send >= pace_max_seg) + return (ret); +#ifdef TCP_ACCOUNTING + /* We need to re-pin since fast_output un-pined */ + sched_pin(); + ts_val = get_cyclecount(); +#endif + /* Fall back out so we can send any more that may bring us to pace_max_seg */ } } inp = rack->rc_inp; @@ -20004,12 +20027,6 @@ again: ms_cts = tcp_tv_to_mssectick(&tv); tso = 0; mtu = 0; - segsiz = min(ctf_fixed_maxseg(tp), rack->r_ctl.rc_pace_min_segs); - minseg = segsiz; - if (rack->r_ctl.rc_pace_max_segs == 0) - pace_max_seg = rack->rc_user_set_max_segs * segsiz; - else - pace_max_seg = rack->r_ctl.rc_pace_max_segs; if (TCPS_HAVEESTABLISHED(tp->t_state) && (rack->r_ctl.pcm_max_seg == 0)) { /* @@ -21593,7 +21610,6 @@ send: flags |= TH_PUSH; add_flag |= RACK_HAD_PUSH; } - SOCK_SENDBUF_UNLOCK(so); } else { SOCK_SENDBUF_UNLOCK(so); @@ -22536,7 +22552,7 @@ enobufs: segsiz, pace_max_seg, hw_tls, flags); if (rack->r_fast_output) { error = 0; - ret = rack_fast_output(tp, rack, ts_val, cts, ms_cts, &tv, tot_len_this_send, &error); + ret = rack_fast_output(tp, rack, ts_val, cts, ms_cts, &tv, &tot_len_this_send, &error, __LINE__); if (ret >= 0) return (ret); else if (error)