git: c112243f6bd3 - main - Revert "Update to bring the rack stack with all its fixes in."
Date: Mon, 11 Mar 2024 20:28:53 UTC
The branch main has been updated by brooks:
URL: https://cgit.FreeBSD.org/src/commit/?id=c112243f6bd3738ab1ca0fc56ba1accef66c9d7a
commit c112243f6bd3738ab1ca0fc56ba1accef66c9d7a
Author: Brooks Davis <brooks@FreeBSD.org>
AuthorDate: 2024-03-11 20:15:20 +0000
Commit: Brooks Davis <brooks@FreeBSD.org>
CommitDate: 2024-03-11 20:28:24 +0000
Revert "Update to bring the rack stack with all its fixes in."
This commit was incomplete and breaks LINT kernels. The tree has been
broken for 8+ hours.
This reverts commit f6d489f402c320f1a6eaa473491a0b8c3878113e.
---
sys/modules/tcp/rack/Makefile | 2 +-
sys/netinet/tcp.h | 38 +-
sys/netinet/tcp_log_buf.h | 9 +-
sys/netinet/tcp_stacks/bbr.c | 4 +-
sys/netinet/tcp_stacks/rack.c | 4447 +++++++++-------------------------
sys/netinet/tcp_stacks/rack_pcm.c | 0
sys/netinet/tcp_stacks/sack_filter.h | 5 -
sys/netinet/tcp_stacks/tailq_hash.c | 33 +-
sys/netinet/tcp_stacks/tailq_hash.h | 8 +-
sys/netinet/tcp_stacks/tcp_rack.h | 135 +-
sys/netinet/tcp_subr.c | 57 +-
sys/netinet/tcp_syncache.c | 5 +-
sys/netinet/tcp_usrreq.c | 7 -
sys/netinet/tcp_var.h | 12 +-
14 files changed, 1173 insertions(+), 3589 deletions(-)
diff --git a/sys/modules/tcp/rack/Makefile b/sys/modules/tcp/rack/Makefile
index d5f3ba170f68..c5bb20602337 100644
--- a/sys/modules/tcp/rack/Makefile
+++ b/sys/modules/tcp/rack/Makefile
@@ -5,7 +5,7 @@
STACKNAME= rack
KMOD= tcp_${STACKNAME}
-SRCS= rack.c sack_filter.c rack_bbr_common.c tailq_hash.c rack_pcm.c
+SRCS= rack.c sack_filter.c rack_bbr_common.c tailq_hash.c
SRCS+= opt_inet.h opt_inet6.h opt_ipsec.h
SRCS+= opt_kern_tls.h
diff --git a/sys/netinet/tcp.h b/sys/netinet/tcp.h
index a8259fa30a3a..f9e561f6ce35 100644
--- a/sys/netinet/tcp.h
+++ b/sys/netinet/tcp.h
@@ -334,22 +334,9 @@ __tcp_set_flags(struct tcphdr *th, uint16_t flags)
#define TCP_RACK_PACING_DIVISOR 1146 /* Pacing divisor given to rate-limit code for burst sizing */
#define TCP_RACK_PACE_MIN_SEG 1147 /* Pacing min seg size rack will use */
#define TCP_RACK_DGP_IN_REC 1148 /* Do we use full DGP in recovery? */
-#define TCP_POLICER_DETECT 1149 /* Do we apply a thresholds to rack to detect and compensate for policers? */
-#define TCP_RXT_CLAMP TCP_POLICER_DETECT
+#define TCP_RXT_CLAMP 1149 /* Do we apply a threshold to rack so if excess rxt clamp cwnd? */
#define TCP_HYBRID_PACING 1150 /* Hybrid pacing enablement */
#define TCP_PACING_DND 1151 /* When pacing with rr_config=3 can sacks disturb us */
-#define TCP_SS_EEXIT 1152 /* Do we do early exit from slowtart if no b/w growth */
-#define TCP_DGP_UPPER_BOUNDS 1153 /* SS and CA upper bound in percentage */
-#define TCP_NO_TIMELY 1154 /* Disable/enable Timely */
-#define TCP_HONOR_HPTS_MIN 1155 /* Do we honor hpts min to */
-#define TCP_REC_IS_DYN 1156 /* Do we allow timely to change recovery multiplier? */
-#define TCP_SIDECHAN_DIS 1157 /* Disable/enable the side-channel */
-#define TCP_FILLCW_RATE_CAP 1158 /* Set a cap for DGP's fillcw */
-#define TCP_POLICER_MSS 1159 /* Policer MSS requirement */
-#define TCP_STACK_SPEC_INFO 1160 /* Get stack specific information (if present) */
-#define RACK_CSPR_IS_FCC 1161
-#define TCP_GP_USE_LTBW 1162 /* how we use lt_bw 0=not, 1=min, 2=max */
-
/* Start of reserved space for third-party user-settable options. */
#define TCP_VENDOR SO_VENDOR
@@ -460,7 +447,6 @@ struct tcp_info {
u_int32_t tcpi_rcv_adv; /* Peer advertised window */
u_int32_t tcpi_dupacks; /* Consecutive dup ACKs recvd */
- u_int32_t tcpi_rttmin; /* Min observed RTT */
/* Padding to grow without breaking ABI. */
u_int32_t __tcpi_pad[14]; /* Padding. */
};
@@ -477,20 +463,6 @@ struct tcp_fastopen {
#define TCP_FUNCTION_NAME_LEN_MAX 32
-struct stack_specific_info {
- char stack_name[TCP_FUNCTION_NAME_LEN_MAX];
- uint64_t policer_last_bw; /* Only valid if detection enabled and policer detected */
- uint64_t bytes_transmitted;
- uint64_t bytes_retransmitted;
- uint32_t policer_detection_enabled: 1,
- policer_detected : 1, /* transport thinks a policer is on path */
- highly_buffered : 1, /* transport considers the path highly buffered */
- spare : 29;
- uint32_t policer_bucket_size; /* Only valid if detection enabled and policer detected */
- uint32_t current_round;
- uint32_t _rack_i_pad[18];
-};
-
struct tcp_function_set {
char function_set_name[TCP_FUNCTION_NAME_LEN_MAX];
uint32_t pcbcnt;
@@ -516,7 +488,6 @@ struct tcp_snd_req {
uint64_t start;
uint64_t end;
uint32_t flags;
- uint32_t playout_ms;
};
union tcp_log_userdata {
@@ -547,12 +518,9 @@ struct tcp_log_user {
#define TCP_HYBRID_PACING_H_MS 0x0008 /* A client hint for maxseg is present */
#define TCP_HYBRID_PACING_ENABLE 0x0010 /* We are enabling hybrid pacing else disable */
#define TCP_HYBRID_PACING_S_MSS 0x0020 /* Clent wants us to set the mss overriding gp est in CU */
-#define TCP_HAS_PLAYOUT_MS 0x0040 /* The client included the chunk playout milliseconds: deprecate */
-/* the below are internal only flags */
-#define TCP_HYBRID_PACING_USER_MASK 0x0FFF /* Non-internal flags mask */
-#define TCP_HYBRID_PACING_SETMSS 0x1000 /* Internal flag that tells us we set the mss on this entry */
+#define TCP_HYBRID_PACING_SETMSS 0x1000 /* Internal flag that tellsus we set the mss on this entry */
#define TCP_HYBRID_PACING_WASSET 0x2000 /* We init to this to know if a hybrid command was issued */
-#define TCP_HYBRID_PACING_SENDTIME 0x4000 /* Duplicate tm to last, use sendtime for catch up mode */
+
struct tcp_hybrid_req {
struct tcp_snd_req req;
diff --git a/sys/netinet/tcp_log_buf.h b/sys/netinet/tcp_log_buf.h
index 2e91d9cbdf3c..1f5b7cf9b54f 100644
--- a/sys/netinet/tcp_log_buf.h
+++ b/sys/netinet/tcp_log_buf.h
@@ -267,9 +267,7 @@ enum tcp_log_events {
TCP_RACK_TP_TRIGGERED, /* A rack tracepoint is triggered 68 */
TCP_HYBRID_PACING_LOG, /* Hybrid pacing log 69 */
TCP_LOG_PRU, /* TCP protocol user request 70 */
- TCP_POLICER_DET, /* TCP Policer detectionn 71 */
- TCP_PCM_MEASURE, /* TCP Path Capacity Measurement 72 */
- TCP_LOG_END /* End (keep at end) 72 */
+ TCP_LOG_END /* End (keep at end) 71 */
};
enum tcp_log_states {
@@ -373,11 +371,10 @@ struct tcp_log_dev_log_queue {
#define TCP_TP_COLLAPSED_RXT 0x00000004 /* When we actually retransmit a collapsed window rsm */
#define TCP_TP_REQ_LOG_FAIL 0x00000005 /* We tried to allocate a Request log but had no space */
#define TCP_TP_RESET_RCV 0x00000006 /* Triggers when we receive a RST */
-#define TCP_TP_POLICER_DET 0x00000007 /* When we detect a policer */
-#define TCP_TP_EXCESS_RXT TCP_TP_POLICER_DET /* alias */
+#define TCP_TP_EXCESS_RXT 0x00000007 /* When we get excess RXT's clamping the cwnd */
#define TCP_TP_SAD_TRIGGERED 0x00000008 /* Sack Attack Detection triggers */
+
#define TCP_TP_SAD_SUSPECT 0x0000000a /* A sack has supicious information in it */
-#define TCP_TP_PACED_BOTTOM 0x0000000b /* We have paced at the bottom */
#ifdef _KERNEL
diff --git a/sys/netinet/tcp_stacks/bbr.c b/sys/netinet/tcp_stacks/bbr.c
index 934b35bd22d7..931beba7a262 100644
--- a/sys/netinet/tcp_stacks/bbr.c
+++ b/sys/netinet/tcp_stacks/bbr.c
@@ -11529,9 +11529,7 @@ bbr_do_segment_nounlock(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
bbr_set_pktepoch(bbr, cts, __LINE__);
bbr_check_bbr_for_state(bbr, cts, __LINE__, (bbr->r_ctl.rc_lost - lost));
if (nxt_pkt == 0) {
- if ((bbr->r_wanted_output != 0) ||
- (tp->t_flags & TF_ACKNOW)) {
-
+ if (bbr->r_wanted_output != 0) {
bbr->rc_output_starts_timer = 0;
did_out = 1;
if (tcp_output(tp) < 0)
diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c
index 1fe07fa8d641..49d946dbb63b 100644
--- a/sys/netinet/tcp_stacks/rack.c
+++ b/sys/netinet/tcp_stacks/rack.c
@@ -142,12 +142,9 @@ VNET_DECLARE(uint32_t, newreno_beta_ecn);
#define V_newreno_beta VNET(newreno_beta)
#define V_newreno_beta_ecn VNET(newreno_beta_ecn)
-#define M_TCPFSB __CONCAT(M_TCPFSB, STACKNAME)
-#define M_TCPDO __CONCAT(M_TCPDO, STACKNAME)
-MALLOC_DEFINE(M_TCPFSB, "tcp_fsb_" __XSTRING(STACKNAME), "TCP fast send block");
-MALLOC_DEFINE(M_TCPDO, "tcp_do_" __XSTRING(STACKNAME), "TCP deferred options");
-MALLOC_DEFINE(M_TCPPCM, "tcp_pcm_" __XSTRING(STACKNAME), "TCP PCM measurement information");
+MALLOC_DEFINE(M_TCPFSB, "tcp_fsb", "TCP fast send block");
+MALLOC_DEFINE(M_TCPDO, "tcp_do", "TCP deferred options");
struct sysctl_ctx_list rack_sysctl_ctx;
struct sysctl_oid *rack_sysctl_root;
@@ -193,24 +190,12 @@ static int32_t rack_tlp_use_greater = 1;
static int32_t rack_reorder_thresh = 2;
static int32_t rack_reorder_fade = 60000000; /* 0 - never fade, def 60,000,000
* - 60 seconds */
-static uint16_t rack_policer_rxt_thresh= 0; /* 499 = 49.9%, 0 is off */
-static uint8_t rack_policer_avg_thresh = 0; /* 3.2 */
-static uint8_t rack_policer_med_thresh = 0; /* 1 - 16 */
-static uint16_t rack_policer_bucket_reserve = 20; /* How much % is reserved in the bucket */
-static uint64_t rack_pol_min_bw = 125000; /* 1mbps in Bytes per sec */
-static uint32_t rack_policer_data_thresh = 64000; /* 64,000 bytes must be sent before we engage */
-static uint32_t rack_policing_do_bw_comp = 1;
-static uint32_t rack_pcm_every_n_rounds = 100;
-static uint32_t rack_pcm_blast = 0;
-static uint32_t rack_pcm_is_enabled = 1;
-static uint8_t rack_req_del_mss = 18; /* How many segments need to be sent in a recovery episode to do policer_detection */
-static uint8_t rack_ssthresh_rest_rto_rec = 0; /* Do we restore ssthresh when we have rec -> rto -> rec */
-
-static uint32_t rack_gp_gain_req = 1200; /* Amount percent wise required to gain to record a round has "gaining" */
-static uint32_t rack_rnd_cnt_req = 0x10005; /* Default number of rounds if we are below rack_gp_gain_req where we exit ss */
-
-
-static int32_t rack_rxt_scoreboard_clear_thresh = 2;
+static uint32_t rack_clamp_ss_upper = 110;
+static uint32_t rack_clamp_ca_upper = 105;
+static uint32_t rack_rxt_min_rnds = 10; /* Min rounds if drastic rxt clamp is in place */
+static uint32_t rack_unclamp_round_thresh = 100; /* number of perfect rounds before we unclamp */
+static uint32_t rack_unclamp_rxt_thresh = 5; /* .5% and under */
+static uint64_t rack_rxt_clamp_thresh = 0; /* Do we do the rxt clamp thing */
static int32_t rack_dnd_default = 0; /* For rr_conf = 3, what is the default for dnd */
static int32_t rack_rxt_controls = 0;
static int32_t rack_fill_cw_state = 0;
@@ -232,8 +217,9 @@ static int32_t rack_do_hystart = 0;
static int32_t rack_apply_rtt_with_reduced_conf = 0;
static int32_t rack_hibeta_setting = 0;
static int32_t rack_default_pacing_divisor = 250;
+static int32_t rack_uses_full_dgp_in_rec = 1;
static uint16_t rack_pacing_min_seg = 0;
-static int32_t rack_timely_off = 0;
+
static uint32_t sad_seg_size_per = 800; /* 80.0 % */
static int32_t rack_pkt_delay = 1000;
@@ -249,7 +235,7 @@ static int32_t rack_use_rsm_rfo = 1;
static int32_t rack_max_abc_post_recovery = 2;
static int32_t rack_client_low_buf = 0;
static int32_t rack_dsack_std_based = 0x3; /* bit field bit 1 sets rc_rack_tmr_std_based and bit 2 sets rc_rack_use_dsack */
-static int32_t rack_bw_multipler = 0; /* Limit on fill cw's jump up to be this x gp_est */
+static int32_t rack_bw_multipler = 2; /* Limit on fill cw's jump up to be this x gp_est */
#ifdef TCP_ACCOUNTING
static int32_t rack_tcp_accounting = 0;
#endif
@@ -261,9 +247,8 @@ static int32_t use_rack_rr = 1;
static int32_t rack_non_rxt_use_cr = 0; /* does a non-rxt in recovery use the configured rate (ss/ca)? */
static int32_t rack_persist_min = 250000; /* 250usec */
static int32_t rack_persist_max = 2000000; /* 2 Second in usec's */
-static int32_t rack_honors_hpts_min_to = 1; /* Do we honor the hpts minimum time out for pacing timers */
-static uint32_t rack_max_reduce = 10; /* Percent we can reduce slot by */
static int32_t rack_sack_not_required = 1; /* set to one to allow non-sack to use rack */
+static int32_t rack_default_init_window = 0; /* Use system default */
static int32_t rack_limit_time_with_srtt = 0;
static int32_t rack_autosndbuf_inc = 20; /* In percentage form */
static int32_t rack_enobuf_hw_boost_mult = 0; /* How many times the hw rate we boost slot using time_between */
@@ -297,6 +282,7 @@ static int32_t rack_rwnd_block_ends_measure = 0;
static int32_t rack_def_profile = 0;
static int32_t rack_lower_cwnd_at_tlp = 0;
+static int32_t rack_limited_retran = 0;
static int32_t rack_always_send_oldest = 0;
static int32_t rack_tlp_threshold_use = TLP_USE_TWO_ONE;
@@ -370,7 +356,6 @@ static int32_t rack_timely_no_stopping = 0;
static int32_t rack_down_raise_thresh = 100;
static int32_t rack_req_segs = 1;
static uint64_t rack_bw_rate_cap = 0;
-static uint64_t rack_fillcw_bw_cap = 3750000; /* Cap fillcw at 30Mbps */
/* Rack specific counters */
@@ -392,7 +377,6 @@ counter_u64_t rack_tlp_retran;
counter_u64_t rack_tlp_retran_bytes;
counter_u64_t rack_to_tot;
counter_u64_t rack_hot_alloc;
-counter_u64_t tcp_policer_detected;
counter_u64_t rack_to_alloc;
counter_u64_t rack_to_alloc_hard;
counter_u64_t rack_to_alloc_emerg;
@@ -456,7 +440,7 @@ rack_log_progress_event(struct tcp_rack *rack, struct tcpcb *tp, uint32_t tick,
static int
rack_process_ack(struct mbuf *m, struct tcphdr *th,
struct socket *so, struct tcpcb *tp, struct tcpopt *to,
- uint32_t tiwin, int32_t tlen, int32_t * ofia, int32_t thflags, int32_t * ret_val, int32_t orig_tlen);
+ uint32_t tiwin, int32_t tlen, int32_t * ofia, int32_t thflags, int32_t * ret_val);
static int
rack_process_data(struct mbuf *m, struct tcphdr *th,
struct socket *so, struct tcpcb *tp, int32_t drop_hdrlen, int32_t tlen,
@@ -470,8 +454,6 @@ static struct rack_sendmap *rack_alloc_limit(struct tcp_rack *rack,
static struct rack_sendmap *
rack_check_recovery_mode(struct tcpcb *tp,
uint32_t tsused);
-static uint32_t
-rack_grab_rtt(struct tcpcb *tp, struct tcp_rack *rack);
static void
rack_cong_signal(struct tcpcb *tp,
uint32_t type, uint32_t ack, int );
@@ -522,14 +504,13 @@ rack_log_ack(struct tcpcb *tp, struct tcpopt *to,
static void
rack_log_output(struct tcpcb *tp, struct tcpopt *to, int32_t len,
uint32_t seq_out, uint16_t th_flags, int32_t err, uint64_t ts,
- struct rack_sendmap *hintrsm, uint32_t add_flags, struct mbuf *s_mb, uint32_t s_moff, int hw_tls, int segsiz);
+ struct rack_sendmap *hintrsm, uint16_t add_flags, struct mbuf *s_mb, uint32_t s_moff, int hw_tls, int segsiz);
static uint64_t rack_get_gp_est(struct tcp_rack *rack);
-
static void
rack_log_sack_passed(struct tcpcb *tp, struct tcp_rack *rack,
- struct rack_sendmap *rsm, uint32_t cts);
+ struct rack_sendmap *rsm);
static void rack_log_to_event(struct tcp_rack *rack, int32_t to_num, struct rack_sendmap *rsm);
static int32_t rack_output(struct tcpcb *tp);
@@ -545,10 +526,10 @@ static int32_t rack_stopall(struct tcpcb *tp);
static void rack_timer_cancel(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts, int line);
static uint32_t
rack_update_entry(struct tcpcb *tp, struct tcp_rack *rack,
- struct rack_sendmap *rsm, uint64_t ts, int32_t * lenp, uint32_t add_flag, int segsiz);
+ struct rack_sendmap *rsm, uint64_t ts, int32_t * lenp, uint16_t add_flag, int segsiz);
static void
rack_update_rsm(struct tcpcb *tp, struct tcp_rack *rack,
- struct rack_sendmap *rsm, uint64_t ts, uint32_t add_flag, int segsiz);
+ struct rack_sendmap *rsm, uint64_t ts, uint16_t add_flag, int segsiz);
static int
rack_update_rtt(struct tcpcb *tp, struct tcp_rack *rack,
struct rack_sendmap *rsm, struct tcpopt *to, uint32_t cts, int32_t ack_type, tcp_seq th_ack);
@@ -557,10 +538,6 @@ static int
rack_do_close_wait(struct mbuf *m, struct tcphdr *th,
struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen,
int32_t tlen, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt, uint8_t iptos);
-
-static void
-rack_peg_rxt(struct tcp_rack *rack, struct rack_sendmap *rsm, uint32_t segsiz);
-
static int
rack_do_closing(struct mbuf *m, struct tcphdr *th,
struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen,
@@ -743,22 +720,6 @@ rack_undo_cc_pacing(struct tcp_rack *rack)
rack_swap_beta_values(rack, 4);
}
-static void
-rack_remove_pacing(struct tcp_rack *rack)
-{
- if (rack->rc_pacing_cc_set)
- rack_undo_cc_pacing(rack);
- if (rack->r_ctl.pacing_method & RACK_REG_PACING)
- tcp_decrement_paced_conn();
- if (rack->r_ctl.pacing_method & RACK_DGP_PACING)
- tcp_dec_dgp_pacing_cnt();
- rack->rc_always_pace = 0;
- rack->r_ctl.pacing_method = RACK_PACING_NONE;
- rack->dgp_on = 0;
- rack->rc_hybrid_mode = 0;
- rack->use_fixed_rate = 0;
-}
-
static void
rack_log_gpset(struct tcp_rack *rack, uint32_t seq_end, uint32_t ack_end_t,
uint32_t send_end_t, int line, uint8_t mode, struct rack_sendmap *rsm)
@@ -781,8 +742,6 @@ rack_log_gpset(struct tcp_rack *rack, uint32_t seq_end, uint32_t ack_end_t,
log.u_bbr.pkts_out = line;
log.u_bbr.cwnd_gain = rack->app_limited_needs_set;
log.u_bbr.pkt_epoch = rack->r_ctl.rc_app_limited_cnt;
- log.u_bbr.epoch = rack->r_ctl.current_round;
- log.u_bbr.lt_epoch = rack->r_ctl.rc_considered_lost;
if (rsm != NULL) {
log.u_bbr.applimited = rsm->r_start;
log.u_bbr.delivered = rsm->r_end;
@@ -898,7 +857,6 @@ rack_init_sysctls(void)
struct sysctl_oid *rack_measure;
struct sysctl_oid *rack_probertt;
struct sysctl_oid *rack_hw_pacing;
- struct sysctl_oid *rack_policing;
rack_attack = SYSCTL_ADD_NODE(&rack_sysctl_ctx,
SYSCTL_CHILDREN(rack_sysctl_root),
@@ -1036,36 +994,11 @@ rack_init_sysctls(void)
"pacing",
CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
"Pacing related Controls");
- SYSCTL_ADD_U32(&rack_sysctl_ctx,
- SYSCTL_CHILDREN(rack_pacing),
- OID_AUTO, "pcm_enabled", CTLFLAG_RW,
- &rack_pcm_is_enabled, 1,
- "Do we by default do PCM measurements?");
- SYSCTL_ADD_U32(&rack_sysctl_ctx,
- SYSCTL_CHILDREN(rack_pacing),
- OID_AUTO, "pcm_rnds", CTLFLAG_RW,
- &rack_pcm_every_n_rounds, 100,
- "How many rounds before we need to do a PCM measurement");
- SYSCTL_ADD_U32(&rack_sysctl_ctx,
- SYSCTL_CHILDREN(rack_pacing),
- OID_AUTO, "pcm_blast", CTLFLAG_RW,
- &rack_pcm_blast, 0,
- "Blast out the full cwnd/rwnd when doing a PCM measurement");
- SYSCTL_ADD_U32(&rack_sysctl_ctx,
- SYSCTL_CHILDREN(rack_pacing),
- OID_AUTO, "rnd_gp_gain", CTLFLAG_RW,
- &rack_gp_gain_req, 1200,
- "How much do we have to increase the GP to record the round 1200 = 120.0");
- SYSCTL_ADD_U32(&rack_sysctl_ctx,
- SYSCTL_CHILDREN(rack_pacing),
- OID_AUTO, "dgp_out_of_ss_at", CTLFLAG_RW,
- &rack_rnd_cnt_req, 0x10005,
- "How many rounds less than rnd_gp_gain will drop us out of SS");
SYSCTL_ADD_S32(&rack_sysctl_ctx,
SYSCTL_CHILDREN(rack_pacing),
- OID_AUTO, "no_timely", CTLFLAG_RW,
- &rack_timely_off, 0,
- "Do we not use timely in DGP?");
+ OID_AUTO, "fulldgpinrec", CTLFLAG_RW,
+ &rack_uses_full_dgp_in_rec, 1,
+ "Do we use all DGP features in recovery (fillcw, timely et.al.)?");
SYSCTL_ADD_S32(&rack_sysctl_ctx,
SYSCTL_CHILDREN(rack_pacing),
OID_AUTO, "fullbufdisc", CTLFLAG_RW,
@@ -1084,13 +1017,13 @@ rack_init_sysctls(void)
SYSCTL_ADD_S32(&rack_sysctl_ctx,
SYSCTL_CHILDREN(rack_pacing),
OID_AUTO, "divisor", CTLFLAG_RW,
- &rack_default_pacing_divisor, 250,
+ &rack_default_pacing_divisor, 4,
"What is the default divisor given to the rl code?");
SYSCTL_ADD_S32(&rack_sysctl_ctx,
SYSCTL_CHILDREN(rack_pacing),
OID_AUTO, "fillcw_max_mult", CTLFLAG_RW,
- &rack_bw_multipler, 0,
- "What is the limit multiplier of the current gp_est that fillcw can increase the b/w too, 200 == 200% (0 = off)?");
+ &rack_bw_multipler, 2,
+ "What is the multiplier of the current gp_est that fillcw can increase the b/w too?");
SYSCTL_ADD_S32(&rack_sysctl_ctx,
SYSCTL_CHILDREN(rack_pacing),
OID_AUTO, "max_pace_over", CTLFLAG_RW,
@@ -1106,6 +1039,11 @@ rack_init_sysctls(void)
OID_AUTO, "limit_wsrtt", CTLFLAG_RW,
&rack_limit_time_with_srtt, 0,
"Do we limit pacing time based on srtt");
+ SYSCTL_ADD_S32(&rack_sysctl_ctx,
+ SYSCTL_CHILDREN(rack_pacing),
+ OID_AUTO, "init_win", CTLFLAG_RW,
+ &rack_default_init_window, 0,
+ "Do we have a rack initial window 0 = system default");
SYSCTL_ADD_U16(&rack_sysctl_ctx,
SYSCTL_CHILDREN(rack_pacing),
OID_AUTO, "gp_per_ss", CTLFLAG_RW,
@@ -1141,11 +1079,6 @@ rack_init_sysctls(void)
OID_AUTO, "rate_cap", CTLFLAG_RW,
&rack_bw_rate_cap, 0,
"If set we apply this value to the absolute rate cap used by pacing");
- SYSCTL_ADD_U64(&rack_sysctl_ctx,
- SYSCTL_CHILDREN(rack_pacing),
- OID_AUTO, "fillcw_cap", CTLFLAG_RW,
- &rack_fillcw_bw_cap, 3750000,
- "Do we have an absolute cap on the amount of b/w fillcw can specify (0 = no)?");
SYSCTL_ADD_U8(&rack_sysctl_ctx,
SYSCTL_CHILDREN(rack_sysctl_root),
OID_AUTO, "req_measure_cnt", CTLFLAG_RW,
@@ -1384,6 +1317,11 @@ rack_init_sysctls(void)
OID_AUTO, "send_oldest", CTLFLAG_RW,
&rack_always_send_oldest, 0,
"Should we always send the oldest TLP and RACK-TLP");
+ SYSCTL_ADD_S32(&rack_sysctl_ctx,
+ SYSCTL_CHILDREN(rack_tlp),
+ OID_AUTO, "rack_tlimit", CTLFLAG_RW,
+ &rack_limited_retran, 0,
+ "How many times can a rack timeout drive out sends");
SYSCTL_ADD_S32(&rack_sysctl_ctx,
SYSCTL_CHILDREN(rack_tlp),
OID_AUTO, "tlp_cwnd_flag", CTLFLAG_RW,
@@ -1417,26 +1355,6 @@ rack_init_sysctls(void)
"timers",
CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
"Timer related controls");
- SYSCTL_ADD_U8(&rack_sysctl_ctx,
- SYSCTL_CHILDREN(rack_timers),
- OID_AUTO, "reset_ssth_rec_rto", CTLFLAG_RW,
- &rack_ssthresh_rest_rto_rec, 0,
- "When doing recovery -> rto -> recovery do we reset SSthresh?");
- SYSCTL_ADD_U32(&rack_sysctl_ctx,
- SYSCTL_CHILDREN(rack_timers),
- OID_AUTO, "scoreboard_thresh", CTLFLAG_RW,
- &rack_rxt_scoreboard_clear_thresh, 2,
- "How many RTO's are allowed before we clear the scoreboard");
- SYSCTL_ADD_U32(&rack_sysctl_ctx,
- SYSCTL_CHILDREN(rack_timers),
- OID_AUTO, "honor_hpts_min", CTLFLAG_RW,
- &rack_honors_hpts_min_to, 1,
- "Do rack pacing timers honor hpts min timeout");
- SYSCTL_ADD_U32(&rack_sysctl_ctx,
- SYSCTL_CHILDREN(rack_timers),
- OID_AUTO, "hpts_max_reduce", CTLFLAG_RW,
- &rack_max_reduce, 10,
- "Max percentage we will reduce slot by for pacing when we are behind");
SYSCTL_ADD_U32(&rack_sysctl_ctx,
SYSCTL_CHILDREN(rack_timers),
OID_AUTO, "persmin", CTLFLAG_RW,
@@ -1516,6 +1434,11 @@ rack_init_sysctls(void)
"features",
CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
"Feature controls");
+ SYSCTL_ADD_U64(&rack_sysctl_ctx,
+ SYSCTL_CHILDREN(rack_features),
+ OID_AUTO, "rxt_clamp_thresh", CTLFLAG_RW,
+ &rack_rxt_clamp_thresh, 0,
+ "Bit encoded clamping setup bits CCCC CCCCC UUUU UULF PPPP PPPP PPPP PPPP");
SYSCTL_ADD_S32(&rack_sysctl_ctx,
SYSCTL_CHILDREN(rack_features),
OID_AUTO, "hybrid_set_maxseg", CTLFLAG_RW,
@@ -1551,53 +1474,6 @@ rack_init_sysctls(void)
OID_AUTO, "hystartplusplus", CTLFLAG_RW,
&rack_do_hystart, 0,
"Should RACK enable HyStart++ on connections?");
- /* Policer detection */
- rack_policing = SYSCTL_ADD_NODE(&rack_sysctl_ctx,
- SYSCTL_CHILDREN(rack_sysctl_root),
- OID_AUTO,
- "policing",
- CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
- "policer detection");
- SYSCTL_ADD_U16(&rack_sysctl_ctx,
- SYSCTL_CHILDREN(rack_policing),
- OID_AUTO, "rxt_thresh", CTLFLAG_RW,
- &rack_policer_rxt_thresh, 0,
- "Percentage of retransmits we need to be a possible policer (499 = 49.9 percent)");
- SYSCTL_ADD_U8(&rack_sysctl_ctx,
- SYSCTL_CHILDREN(rack_policing),
- OID_AUTO, "avg_thresh", CTLFLAG_RW,
- &rack_policer_avg_thresh, 0,
- "What threshold of average retransmits needed to recover a lost packet (1 - 169 aka 21 = 2.1)?");
- SYSCTL_ADD_U8(&rack_sysctl_ctx,
- SYSCTL_CHILDREN(rack_policing),
- OID_AUTO, "med_thresh", CTLFLAG_RW,
- &rack_policer_med_thresh, 0,
- "What threshold of Median retransmits needed to recover a lost packet (1 - 16)?");
- SYSCTL_ADD_U32(&rack_sysctl_ctx,
- SYSCTL_CHILDREN(rack_policing),
- OID_AUTO, "data_thresh", CTLFLAG_RW,
- &rack_policer_data_thresh, 64000,
- "How many bytes must have gotten through before we can start doing policer detection?");
- SYSCTL_ADD_U32(&rack_sysctl_ctx,
- SYSCTL_CHILDREN(rack_policing),
- OID_AUTO, "bwcomp", CTLFLAG_RW,
- &rack_policing_do_bw_comp, 1,
- "Do we raise up low b/w so that at least pace_max_seg can be sent in the srtt?");
- SYSCTL_ADD_U8(&rack_sysctl_ctx,
- SYSCTL_CHILDREN(rack_policing),
- OID_AUTO, "recmss", CTLFLAG_RW,
- &rack_req_del_mss, 18,
- "How many MSS must be delivered during recovery to engage policer detection?");
- SYSCTL_ADD_U16(&rack_sysctl_ctx,
- SYSCTL_CHILDREN(rack_policing),
- OID_AUTO, "res_div", CTLFLAG_RW,
- &rack_policer_bucket_reserve, 20,
- "What percentage is reserved in the policer bucket?");
- SYSCTL_ADD_U64(&rack_sysctl_ctx,
- SYSCTL_CHILDREN(rack_policing),
- OID_AUTO, "min_comp_bw", CTLFLAG_RW,
- &rack_pol_min_bw, 125000,
- "Do we have a min b/w for b/w compensation (0 = no)?");
/* Misc rack controls */
rack_misc = SYSCTL_ADD_NODE(&rack_sysctl_ctx,
SYSCTL_CHILDREN(rack_sysctl_root),
@@ -1702,8 +1578,31 @@ rack_init_sysctls(void)
OID_AUTO, "autoscale", CTLFLAG_RW,
&rack_autosndbuf_inc, 20,
"What percentage should rack scale up its snd buffer by?");
-
-
+ SYSCTL_ADD_U32(&rack_sysctl_ctx,
+ SYSCTL_CHILDREN(rack_misc),
+ OID_AUTO, "rnds_for_rxt_clamp", CTLFLAG_RW,
+ &rack_rxt_min_rnds, 10,
+ "Number of rounds needed between RTT clamps due to high loss rates");
+ SYSCTL_ADD_U32(&rack_sysctl_ctx,
+ SYSCTL_CHILDREN(rack_misc),
+ OID_AUTO, "rnds_for_unclamp", CTLFLAG_RW,
+ &rack_unclamp_round_thresh, 100,
+ "Number of rounds needed with no loss to unclamp");
+ SYSCTL_ADD_U32(&rack_sysctl_ctx,
+ SYSCTL_CHILDREN(rack_misc),
+ OID_AUTO, "rxt_threshs_for_unclamp", CTLFLAG_RW,
+ &rack_unclamp_rxt_thresh, 5,
+ "Percentage of retransmits we need to be under to unclamp (5 = .5 percent)\n");
+ SYSCTL_ADD_U32(&rack_sysctl_ctx,
+ SYSCTL_CHILDREN(rack_misc),
+ OID_AUTO, "clamp_ss_upper", CTLFLAG_RW,
+ &rack_clamp_ss_upper, 110,
+ "Clamp percentage ceiling in SS?");
+ SYSCTL_ADD_U32(&rack_sysctl_ctx,
+ SYSCTL_CHILDREN(rack_misc),
+ OID_AUTO, "clamp_ca_upper", CTLFLAG_RW,
+ &rack_clamp_ca_upper, 110,
+ "Clamp percentage ceiling in CA?");
/* Sack Attacker detection stuff */
SYSCTL_ADD_U32(&rack_sysctl_ctx,
SYSCTL_CHILDREN(rack_attack),
@@ -1880,13 +1779,6 @@ rack_init_sysctls(void)
OID_AUTO, "alloc_hot", CTLFLAG_RD,
&rack_hot_alloc,
"Total allocations from the top of our list");
- tcp_policer_detected = counter_u64_alloc(M_WAITOK);
- SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
- SYSCTL_CHILDREN(rack_counters),
- OID_AUTO, "policer_detected", CTLFLAG_RD,
- &tcp_policer_detected,
- "Total policer_detections");
-
rack_to_alloc = counter_u64_alloc(M_WAITOK);
SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
SYSCTL_CHILDREN(rack_counters),
@@ -2065,8 +1957,17 @@ rack_init_sysctls(void)
static uint32_t
rc_init_window(struct tcp_rack *rack)
{
- return (tcp_compute_initwnd(tcp_maxseg(rack->rc_tp)));
+ uint32_t win;
+ if (rack->rc_init_win == 0) {
+ /*
+ * Nothing set by the user, use the system stack
+ * default.
+ */
+ return (tcp_compute_initwnd(tcp_maxseg(rack->rc_tp)));
+ }
+ win = ctf_fixed_maxseg(rack->rc_tp) * rack->rc_init_win;
+ return (win);
}
static uint64_t
@@ -2170,7 +2071,6 @@ rack_log_hybrid_bw(struct tcp_rack *rack, uint32_t seq, uint64_t cbw, uint64_t t
off = (uint64_t)(cur) - (uint64_t)(&rack->rc_tp->t_tcpreq_info[0]);
log.u_bbr.bbr_substate = (uint8_t)(off / sizeof(struct tcp_sendfile_track));
#endif
- log.u_bbr.inhpts = 1;
log.u_bbr.flex4 = (uint32_t)(rack->rc_tp->t_sndbytes - cur->sent_at_fs);
log.u_bbr.flex5 = (uint32_t)(rack->rc_tp->t_snd_rxt_bytes - cur->rxt_at_fs);
log.u_bbr.flex7 = (uint16_t)cur->hybrid_flags;
@@ -2216,24 +2116,9 @@ rack_log_hybrid_sends(struct tcp_rack *rack, struct tcp_sendfile_track *cur, int
memset(&log, 0, sizeof(log));
log.u_bbr.timeStamp = tcp_get_usecs(&tv);
+ log.u_bbr.cur_del_rate = rack->rc_tp->t_sndbytes;
log.u_bbr.delRate = cur->sent_at_fs;
-
- if ((cur->flags & TCP_TRK_TRACK_FLG_LSND) == 0) {
- /*
- * We did not get a new Rules Applied to set so
- * no overlapping send occured, this means the
- * current byte counts are correct.
- */
- log.u_bbr.cur_del_rate = rack->rc_tp->t_sndbytes;
- log.u_bbr.rttProp = rack->rc_tp->t_snd_rxt_bytes;
- } else {
- /*
- * Overlapping send case, we switched to a new
- * send and did a rules applied.
- */
- log.u_bbr.cur_del_rate = cur->sent_at_ls;
- log.u_bbr.rttProp = cur->rxt_at_ls;
- }
+ log.u_bbr.rttProp = rack->rc_tp->t_snd_rxt_bytes;
log.u_bbr.bw_inuse = cur->rxt_at_fs;
log.u_bbr.cwnd_gain = line;
off = (uint64_t)(cur) - (uint64_t)(&rack->rc_tp->t_tcpreq_info[0]);
@@ -2253,7 +2138,6 @@ rack_log_hybrid_sends(struct tcp_rack *rack, struct tcp_sendfile_track *cur, int
log.u_bbr.lt_epoch = (uint32_t)((cur->timestamp >> 32) & 0x00000000ffffffff);
/* now set all the flags in */
log.u_bbr.pkts_out = cur->hybrid_flags;
- log.u_bbr.lost = cur->playout_ms;
log.u_bbr.flex6 = cur->flags;
/*
* Last send time = <flex5 | pkt_epoch> note we do not distinguish cases
@@ -2262,20 +2146,6 @@ rack_log_hybrid_sends(struct tcp_rack *rack, struct tcp_sendfile_track *cur, int
*/
log.u_bbr.pkt_epoch = (uint32_t)(rack->r_ctl.last_tmit_time_acked & 0x00000000ffffffff);
log.u_bbr.flex5 = (uint32_t)((rack->r_ctl.last_tmit_time_acked >> 32) & 0x00000000ffffffff);
- /*
- * Compose bbr_state to be a bit wise 0000ADHF
- * where A is the always_pace flag
- * where D is the dgp_on flag
- * where H is the hybrid_mode on flag
- * where F is the use_fixed_rate flag.
- */
- log.u_bbr.bbr_state = rack->rc_always_pace;
- log.u_bbr.bbr_state <<= 1;
- log.u_bbr.bbr_state |= rack->dgp_on;
- log.u_bbr.bbr_state <<= 1;
- log.u_bbr.bbr_state |= rack->rc_hybrid_mode;
- log.u_bbr.bbr_state <<= 1;
- log.u_bbr.bbr_state |= rack->use_fixed_rate;
log.u_bbr.flex8 = HYBRID_LOG_SENT_LOST;
tcp_log_event(rack->rc_tp, NULL,
@@ -2429,7 +2299,6 @@ normal_ratecap:
#ifdef TCP_REQUEST_TRK
if (rack->rc_hybrid_mode &&
rack->rc_catch_up &&
- (rack->r_ctl.rc_last_sft != NULL) &&
(rack->r_ctl.rc_last_sft->hybrid_flags & TCP_HYBRID_PACING_S_MSS) &&
(rack_hybrid_allow_set_maxseg == 1) &&
((rack->r_ctl.rc_last_sft->hybrid_flags & TCP_HYBRID_PACING_SETMSS) == 0)) {
@@ -2469,10 +2338,7 @@ rack_get_gp_est(struct tcp_rack *rack)
*/
uint64_t srtt;
- if (rack->dis_lt_bw == 1)
- lt_bw = 0;
- else
- lt_bw = rack_get_lt_bw(rack);
+ lt_bw = rack_get_lt_bw(rack);
if (lt_bw) {
/*
* No goodput bw but a long-term b/w does exist
@@ -2508,22 +2374,19 @@ rack_get_gp_est(struct tcp_rack *rack)
/* Still doing initial average must calculate */
bw = rack->r_ctl.gp_bw / max(rack->r_ctl.num_measurements, 1);
}
- if (rack->dis_lt_bw) {
- /* We are not using lt-bw */
- ret_bw = bw;
- goto compensate;
- }
lt_bw = rack_get_lt_bw(rack);
if (lt_bw == 0) {
/* If we don't have one then equate it to the gp_bw */
lt_bw = rack->r_ctl.gp_bw;
}
- if (rack->use_lesser_lt_bw) {
+ if ((rack->r_cwnd_was_clamped == 1) && (rack->r_clamped_gets_lower > 0)){
+ /* if clamped take the lowest */
if (lt_bw < bw)
ret_bw = lt_bw;
else
ret_bw = bw;
} else {
+ /* If not set for clamped to get lowest, take the highest */
if (lt_bw > bw)
ret_bw = lt_bw;
else
@@ -2624,8 +2487,6 @@ rack_log_dsack_event(struct tcp_rack *rack, uint8_t mod, uint32_t flex4, uint32_
log.u_bbr.flex7 = rack->r_ctl.dsack_persist;
log.u_bbr.flex8 = mod;
log.u_bbr.timeStamp = tcp_get_usecs(&tv);
- log.u_bbr.epoch = rack->r_ctl.current_round;
- log.u_bbr.lt_epoch = rack->r_ctl.rc_considered_lost;
TCP_LOG_EVENTP(rack->rc_tp, NULL,
&rack->rc_inp->inp_socket->so_rcv,
&rack->rc_inp->inp_socket->so_snd,
@@ -2674,8 +2535,6 @@ rack_log_hdwr_pacing(struct tcp_rack *rack,
else
log.u_bbr.cur_del_rate = 0;
log.u_bbr.rttProp = rack->r_ctl.last_hw_bw_req;
- log.u_bbr.epoch = rack->r_ctl.current_round;
- log.u_bbr.lt_epoch = rack->r_ctl.rc_considered_lost;
TCP_LOG_EVENTP(rack->rc_tp, NULL,
&rack->rc_inp->inp_socket->so_rcv,
&rack->rc_inp->inp_socket->so_snd,
@@ -2693,9 +2552,28 @@ rack_get_output_bw(struct tcp_rack *rack, uint64_t bw, struct rack_sendmap *rsm,
uint64_t bw_est, high_rate;
uint64_t gain;
- gain = (uint64_t)rack_get_output_gain(rack, rsm);
- bw_est = bw * gain;
- bw_est /= (uint64_t)100;
+ if ((rack->r_pacing_discount == 0) ||
+ (rack_full_buffer_discount == 0)) {
+ /*
+ * No buffer level based discount from client buffer
+ * level is enabled or the feature is disabled.
+ */
+ gain = (uint64_t)rack_get_output_gain(rack, rsm);
+ bw_est = bw * gain;
+ bw_est /= (uint64_t)100;
+ } else {
+ /*
+ * We have a discount in place apply it with
+ * just a 100% gain (we get no boost if the buffer
+ * is full).
+ */
+ uint64_t discount;
+
+ discount = bw * (uint64_t)(rack_full_buffer_discount * rack->r_ctl.pacing_discount_amm);
+ discount /= 100;
+ /* What %% of the b/w do we discount */
+ bw_est = bw - discount;
+ }
/* Never fall below the minimum (def 64kbps) */
if (bw_est < RACK_MIN_BW)
bw_est = RACK_MIN_BW;
@@ -2781,8 +2659,6 @@ log_anyway:
log.u_bbr.pkts_out = rack->r_ctl.rc_out_at_rto;
log.u_bbr.delivered = rack->r_ctl.rc_snd_max_at_rto;
log.u_bbr.pacing_gain = rack->r_must_retran;
- log.u_bbr.epoch = rack->r_ctl.current_round;
- log.u_bbr.lt_epoch = rack->r_ctl.rc_considered_lost;
TCP_LOG_EVENTP(rack->rc_tp, NULL,
&rack->rc_inp->inp_socket->so_rcv,
&rack->rc_inp->inp_socket->so_snd,
@@ -2822,10 +2698,6 @@ rack_log_to_start(struct tcp_rack *rack, uint32_t cts, uint32_t to, int32_t slot
log.u_bbr.lt_epoch = rack->rc_tp->t_rxtshift;
log.u_bbr.lost = rack_rto_min;
log.u_bbr.epoch = rack->r_ctl.roundends;
- log.u_bbr.bw_inuse = rack->r_ctl.current_round;
- log.u_bbr.bw_inuse <<= 32;
- log.u_bbr.bw_inuse |= rack->r_ctl.rc_considered_lost;
- log.u_bbr.applimited = rack->rc_tp->t_flags2;
TCP_LOG_EVENTP(rack->rc_tp, NULL,
&rack->rc_inp->inp_socket->so_rcv,
&rack->rc_inp->inp_socket->so_snd,
@@ -2859,9 +2731,6 @@ rack_log_to_event(struct tcp_rack *rack, int32_t to_num, struct rack_sendmap *rs
log.u_bbr.pkts_out = rack->r_ctl.rc_out_at_rto;
log.u_bbr.delivered = rack->r_ctl.rc_snd_max_at_rto;
log.u_bbr.pacing_gain = rack->r_must_retran;
- log.u_bbr.bw_inuse = rack->r_ctl.current_round;
- log.u_bbr.bw_inuse <<= 32;
- log.u_bbr.bw_inuse |= rack->r_ctl.rc_considered_lost;
TCP_LOG_EVENTP(rack->rc_tp, NULL,
&rack->rc_inp->inp_socket->so_rcv,
&rack->rc_inp->inp_socket->so_snd,
@@ -2911,9 +2780,6 @@ rack_log_map_chg(struct tcpcb *tp, struct tcp_rack *rack,
log.u_bbr.lost = 0;
else
log.u_bbr.lost = rack->r_ctl.rc_prr_sndcnt;
- log.u_bbr.bw_inuse = rack->r_ctl.current_round;
- log.u_bbr.bw_inuse <<= 32;
- log.u_bbr.bw_inuse |= rack->r_ctl.rc_considered_lost;
TCP_LOG_EVENTP(rack->rc_tp, NULL,
&rack->rc_inp->inp_socket->so_rcv,
&rack->rc_inp->inp_socket->so_snd,
@@ -3061,9 +2927,6 @@ rack_log_rtt_sample_calc(struct tcp_rack *rack, uint32_t rtt, uint32_t send_time
log.u_bbr.flex4 = where;
log.u_bbr.flex7 = 2;
log.u_bbr.timeStamp = tcp_get_usecs(&tv);
- log.u_bbr.bw_inuse = rack->r_ctl.current_round;
- log.u_bbr.bw_inuse <<= 32;
- log.u_bbr.bw_inuse |= rack->r_ctl.rc_considered_lost;
TCP_LOG_EVENTP(rack->rc_tp, NULL,
&rack->rc_inp->inp_socket->so_rcv,
&rack->rc_inp->inp_socket->so_snd,
@@ -3076,7 +2939,7 @@ rack_log_rtt_sample_calc(struct tcp_rack *rack, uint32_t rtt, uint32_t send_time
static void
rack_log_rtt_sendmap(struct tcp_rack *rack, uint32_t idx, uint64_t tsv, uint32_t tsecho)
{
- if (rack_verbose_logging && tcp_bblogging_on(rack->rc_tp)) {
+ if (tcp_bblogging_on(rack->rc_tp)) {
union tcp_log_stackspecific log;
struct timeval tv;
@@ -3088,9 +2951,6 @@ rack_log_rtt_sendmap(struct tcp_rack *rack, uint32_t idx, uint64_t tsv, uint32_t
log.u_bbr.flex7 = 3;
log.u_bbr.rttProp = tsv;
log.u_bbr.timeStamp = tcp_get_usecs(&tv);
- log.u_bbr.bw_inuse = rack->r_ctl.current_round;
- log.u_bbr.bw_inuse <<= 32;
- log.u_bbr.bw_inuse |= rack->r_ctl.rc_considered_lost;
TCP_LOG_EVENTP(rack->rc_tp, NULL,
&rack->rc_inp->inp_socket->so_rcv,
&rack->rc_inp->inp_socket->so_snd,
@@ -3119,9 +2979,6 @@ rack_log_progress_event(struct tcp_rack *rack, struct tcpcb *tp, uint32_t tick,
log.u_bbr.pkts_out = rack->r_ctl.rc_out_at_rto;
log.u_bbr.delivered = rack->r_ctl.rc_snd_max_at_rto;
log.u_bbr.pacing_gain = rack->r_must_retran;
- log.u_bbr.bw_inuse = rack->r_ctl.current_round;
- log.u_bbr.bw_inuse <<= 32;
- log.u_bbr.bw_inuse |= rack->r_ctl.rc_considered_lost;
TCP_LOG_EVENTP(tp, NULL,
&rack->rc_inp->inp_socket->so_rcv,
&rack->rc_inp->inp_socket->so_snd,
@@ -3194,13 +3051,6 @@ rack_log_doseg_done(struct tcp_rack *rack, uint32_t cts, int32_t nxt_pkt, int32_
log.u_bbr.pkts_out = rack->r_ctl.rc_out_at_rto;
log.u_bbr.delivered = rack->r_ctl.rc_snd_max_at_rto;
log.u_bbr.pacing_gain = rack->r_must_retran;
- log.u_bbr.bw_inuse = rack->r_ctl.current_round;
- log.u_bbr.bw_inuse <<= 32;
- log.u_bbr.bw_inuse |= rack->r_ctl.rc_considered_lost;
- log.u_bbr.epoch = rack->rc_inp->inp_socket->so_snd.sb_hiwat;
- log.u_bbr.lt_epoch = rack->rc_inp->inp_socket->so_rcv.sb_hiwat;
- log.u_bbr.lost = rack->rc_tp->t_srtt;
- log.u_bbr.pkt_epoch = rack->rc_tp->rfbuf_cnt;
TCP_LOG_EVENTP(rack->rc_tp, NULL,
&rack->rc_inp->inp_socket->so_rcv,
&rack->rc_inp->inp_socket->so_snd,
@@ -3262,9 +3112,6 @@ rack_log_type_just_return(struct tcp_rack *rack, uint32_t cts, uint32_t tlen, ui
log.u_bbr.delivered = rack->r_ctl.rc_snd_max_at_rto;
log.u_bbr.pacing_gain = rack->r_must_retran;
log.u_bbr.cwnd_gain = rack->rc_has_collapsed;
- log.u_bbr.bw_inuse = rack->r_ctl.current_round;
- log.u_bbr.bw_inuse <<= 32;
- log.u_bbr.bw_inuse |= rack->r_ctl.rc_considered_lost;
TCP_LOG_EVENTP(rack->rc_tp, NULL,
&rack->rc_inp->inp_socket->so_rcv,
&rack->rc_inp->inp_socket->so_snd,
@@ -3299,9 +3146,6 @@ rack_log_to_cancel(struct tcp_rack *rack, int32_t hpts_removed, int line, uint32
log.u_bbr.pkts_out = rack->r_ctl.rc_out_at_rto;
log.u_bbr.delivered = rack->r_ctl.rc_snd_max_at_rto;
log.u_bbr.pacing_gain = rack->r_must_retran;
- log.u_bbr.bw_inuse = rack->r_ctl.current_round;
- log.u_bbr.bw_inuse <<= 32;
- log.u_bbr.bw_inuse |= rack->r_ctl.rc_considered_lost;
TCP_LOG_EVENTP(rack->rc_tp, NULL,
&rack->rc_inp->inp_socket->so_rcv,
&rack->rc_inp->inp_socket->so_snd,
@@ -3470,7 +3314,6 @@ rack_counter_destroy(void)
counter_u64_free(rack_saw_enobuf_hw);
counter_u64_free(rack_saw_enetunreach);
counter_u64_free(rack_hot_alloc);
- counter_u64_free(tcp_policer_detected);
counter_u64_free(rack_to_alloc);
counter_u64_free(rack_to_alloc_hard);
counter_u64_free(rack_to_alloc_emerg);
@@ -3632,8 +3475,6 @@ rack_free(struct tcp_rack *rack, struct rack_sendmap *rsm)
rack->r_ctl.rc_num_split_allocs--;
}
if (rsm == rack->r_ctl.rc_first_appl) {
- rack->r_ctl.cleared_app_ack_seq = rsm->r_start + (rsm->r_end - rsm->r_start);
- rack->r_ctl.cleared_app_ack = 1;
if (rack->r_ctl.rc_app_limited_cnt == 0)
rack->r_ctl.rc_first_appl = NULL;
else
@@ -3649,7 +3490,7 @@ rack_free(struct tcp_rack *rack, struct rack_sendmap *rsm)
rack->r_ctl.rc_sacklast = NULL;
memset(rsm, 0, sizeof(struct rack_sendmap));
/* Make sure we are not going to overrun our count limit of 0xff */
- if ((rack->rc_free_cnt + 1) > RACK_FREE_CNT_MAX) {
+ if ((rack->rc_free_cnt + 1) > 0xff) {
rack_free_trim(rack);
}
TAILQ_INSERT_HEAD(&rack->r_ctl.rc_free, rsm, r_tnext);
@@ -3965,8 +3806,6 @@ rack_increase_bw_mul(struct tcp_rack *rack, int timely_says, uint64_t cur_bw, ui
logged = 0;
- if (rack->rc_skip_timely)
- return;
if (override) {
/*
* override is passed when we are
@@ -4137,8 +3976,6 @@ rack_decrease_bw_mul(struct tcp_rack *rack, int timely_says, uint32_t rtt, int32
uint64_t logvar, logvar2, logvar3;
uint32_t logged, new_per, ss_red, ca_red, rec_red, alt, val;
- if (rack->rc_skip_timely)
- return;
if (rack->rc_gp_incr) {
/* Turn off increment counting */
rack->rc_gp_incr = 0;
@@ -4340,7 +4177,6 @@ rack_enter_probertt(struct tcp_rack *rack, uint32_t us_cts)
*/
uint32_t segsiz;
- rack->r_ctl.rc_lower_rtt_us_cts = us_cts;
if (rack->rc_gp_dyn_mul == 0)
return;
@@ -4367,6 +4203,7 @@ rack_enter_probertt(struct tcp_rack *rack, uint32_t us_cts)
rack->r_ctl.rc_pace_min_segs);
rack->in_probe_rtt = 1;
rack->measure_saw_probe_rtt = 1;
+ rack->r_ctl.rc_lower_rtt_us_cts = us_cts;
rack->r_ctl.rc_time_probertt_starts = 0;
rack->r_ctl.rc_entry_gp_rtt = rack->r_ctl.rc_gp_srtt;
if (rack_probertt_use_min_rtt_entry)
@@ -4550,7 +4387,6 @@ static void
rack_check_probe_rtt(struct tcp_rack *rack, uint32_t us_cts)
{
/* Check in on probe-rtt */
-
if (rack->rc_gp_filled == 0) {
/* We do not do p-rtt unless we have gp measurements */
return;
@@ -4595,10 +4431,7 @@ no_exit:
if (calc) {
/* Maybe */
calc *= rack_per_of_gp_probertt_reduce;
- if (calc > rack_per_of_gp_probertt)
- rack->r_ctl.rack_per_of_gp_probertt = rack_per_of_gp_lowthresh;
- else
- rack->r_ctl.rack_per_of_gp_probertt = rack_per_of_gp_probertt - calc;
+ rack->r_ctl.rack_per_of_gp_probertt = rack_per_of_gp_probertt - calc;
/* Limit it too */
if (rack->r_ctl.rack_per_of_gp_probertt < rack_per_of_gp_lowthresh)
rack->r_ctl.rack_per_of_gp_probertt = rack_per_of_gp_lowthresh;
@@ -4639,9 +4472,7 @@ no_exit:
rack_exit_probertt(rack, us_cts);
}
- } else if ((rack->rc_skip_timely == 0) &&
- (TSTMP_GT(us_cts, rack->r_ctl.rc_lower_rtt_us_cts)) &&
*** 6977 LINES SKIPPED ***