git: 43b117f88f30 - main - tcp: make the maximum number of retransmissions tunable per VNET
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Tue, 06 Jun 2023 21:02:40 UTC
The branch main has been updated by rscheff:
URL: https://cgit.FreeBSD.org/src/commit/?id=43b117f88f3044d5f08e70b0daf0bb964f9ecb4b
commit 43b117f88f3044d5f08e70b0daf0bb964f9ecb4b
Author: Richard Scheffenegger <rscheff@FreeBSD.org>
AuthorDate: 2023-06-06 20:56:44 +0000
Commit: Richard Scheffenegger <rscheff@FreeBSD.org>
CommitDate: 2023-06-06 20:58:54 +0000
tcp: make the maximum number of retransmissions tunable per VNET
Both Windows (TcpMaxDataRetransmissions) and Linux (tcp_retries2)
allow to restrict the maximum number of consecutive timer based
retransmissions. Add that same capability on a per-VNet basis to
FreeBSD.
Reviewed By: cc, tuexen, #transport
Sponsored by: NetApp, Inc.
Differential Revision: https://reviews.freebsd.org/D40424
---
share/man/man4/tcp.4 | 5 ++++-
sys/netinet/tcp_output.c | 2 +-
sys/netinet/tcp_stacks/bbr.c | 8 ++++----
sys/netinet/tcp_stacks/rack.c | 8 ++++----
sys/netinet/tcp_timer.c | 30 ++++++++++++++++++++++++++----
sys/netinet/tcp_var.h | 2 ++
6 files changed, 41 insertions(+), 14 deletions(-)
diff --git a/share/man/man4/tcp.4 b/share/man/man4/tcp.4
index ce27705f7eda..382e39a4355d 100644
--- a/share/man/man4/tcp.4
+++ b/share/man/man4/tcp.4
@@ -34,7 +34,7 @@
.\" From: @(#)tcp.4 8.1 (Berkeley) 6/5/93
.\" $FreeBSD$
.\"
-.Dd February 3, 2023
+.Dd June 6, 2023
.Dt TCP 4
.Os
.Sh NAME
@@ -843,6 +843,9 @@ Maximum size of automatic receive buffer.
Initial
.Tn TCP
receive window (buffer size).
+.It Va retries
+Maximum number of consecutive timer based retransmits sent after a data
+segment is lost (default and maximum is 12).
.It Va rexmit_drop_options
Drop TCP options from third and later retransmitted SYN segments
of a connection.
diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c
index abfab1a62176..800480413586 100644
--- a/sys/netinet/tcp_output.c
+++ b/sys/netinet/tcp_output.c
@@ -1766,7 +1766,7 @@ tcp_setpersist(struct tcpcb *tp)
tt = maxunacktime;
}
tcp_timer_activate(tp, TT_PERSIST, tt);
- if (tp->t_rxtshift < TCP_MAXRXTSHIFT)
+ if (tp->t_rxtshift < V_tcp_retries)
tp->t_rxtshift++;
}
diff --git a/sys/netinet/tcp_stacks/bbr.c b/sys/netinet/tcp_stacks/bbr.c
index 5ecb558dadb3..1e8053afc45c 100644
--- a/sys/netinet/tcp_stacks/bbr.c
+++ b/sys/netinet/tcp_stacks/bbr.c
@@ -4763,7 +4763,7 @@ bbr_timeout_persist(struct tcpcb *tp, struct tcp_bbr *bbr, uint32_t cts)
* the idle time (no responses to probes) reaches the maximum
* backoff that we would use if retransmitting.
*/
- if (tp->t_rxtshift == TCP_MAXRXTSHIFT &&
+ if (tp->t_rxtshift >= V_tcp_retries &&
(ticks - tp->t_rcvtime >= tcp_maxpersistidle ||
ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff)) {
KMOD_TCPSTAT_INC(tcps_persistdrop);
@@ -4796,7 +4796,7 @@ bbr_timeout_persist(struct tcpcb *tp, struct tcp_bbr *bbr, uint32_t cts)
tp->t_flags &= ~TF_DELACK;
free(t_template, M_TEMP);
}
- if (tp->t_rxtshift < TCP_MAXRXTSHIFT)
+ if (tp->t_rxtshift < V_tcp_retries)
tp->t_rxtshift++;
bbr_start_hpts_timer(bbr, tp, cts, 3, 0, 0);
out:
@@ -4990,8 +4990,8 @@ bbr_timeout_rxt(struct tcpcb *tp, struct tcp_bbr *bbr, uint32_t cts)
*/
tp->t_rxtshift++;
}
- if (tp->t_rxtshift > TCP_MAXRXTSHIFT) {
- tp->t_rxtshift = TCP_MAXRXTSHIFT;
+ if (tp->t_rxtshift > V_tcp_retries) {
+ tp->t_rxtshift = V_tcp_retries;
KMOD_TCPSTAT_INC(tcps_timeoutdrop);
tcp_log_end_status(tp, TCP_EI_STATUS_RETRAN);
/* XXXGL: previously t_softerror was casted to uint16_t */
diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c
index c9b5b937cc46..36fd5daf07dd 100644
--- a/sys/netinet/tcp_stacks/rack.c
+++ b/sys/netinet/tcp_stacks/rack.c
@@ -7445,7 +7445,7 @@ rack_timeout_persist(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts)
* the idle time (no responses to probes) reaches the maximum
* backoff that we would use if retransmitting.
*/
- if (tp->t_rxtshift == TCP_MAXRXTSHIFT &&
+ if (tp->t_rxtshift >= V_tcp_retries &&
(ticks - tp->t_rcvtime >= tcp_maxpersistidle ||
TICKS_2_USEC(ticks - tp->t_rcvtime) >= RACK_REXMTVAL(tp) * tcp_totbackoff)) {
KMOD_TCPSTAT_INC(tcps_persistdrop);
@@ -7491,7 +7491,7 @@ rack_timeout_persist(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts)
tp->t_flags &= ~TF_DELACK;
free(t_template, M_TEMP);
}
- if (tp->t_rxtshift < TCP_MAXRXTSHIFT)
+ if (tp->t_rxtshift < V_tcp_retries)
tp->t_rxtshift++;
out:
rack_log_to_event(rack, RACK_TO_FRM_PERSIST, NULL);
@@ -7783,10 +7783,10 @@ rack_timeout_rxt(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts)
*/
tp->t_rxtshift++;
}
- if (tp->t_rxtshift > TCP_MAXRXTSHIFT) {
+ if (tp->t_rxtshift > V_tcp_retries) {
tcp_log_end_status(tp, TCP_EI_STATUS_RETRAN);
drop_it:
- tp->t_rxtshift = TCP_MAXRXTSHIFT;
+ tp->t_rxtshift = V_tcp_retries;
KMOD_TCPSTAT_INC(tcps_timeoutdrop);
/* XXXGL: previously t_softerror was casted to uint16_t */
MPASS(tp->t_softerror >= 0);
diff --git a/sys/netinet/tcp_timer.c b/sys/netinet/tcp_timer.c
index d1301c18d54f..6126d85c7565 100644
--- a/sys/netinet/tcp_timer.c
+++ b/sys/netinet/tcp_timer.c
@@ -201,6 +201,28 @@ static int per_cpu_timers = 0;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, per_cpu_timers, CTLFLAG_RW,
&per_cpu_timers , 0, "run tcp timers on all cpus");
+static int
+sysctl_net_inet_tcp_retries(SYSCTL_HANDLER_ARGS)
+{
+ int error, new;
+
+ new = V_tcp_retries;
+ error = sysctl_handle_int(oidp, &new, 0, req);
+ if (error == 0 && req->newptr) {
+ if ((new < 1) || (new > TCP_MAXRXTSHIFT))
+ error = EINVAL;
+ else
+ V_tcp_retries = new;
+ }
+ return (error);
+}
+
+VNET_DEFINE(int, tcp_retries) = TCP_MAXRXTSHIFT;
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, retries,
+ CTLTYPE_INT | CTLFLAG_VNET | CTLFLAG_RW,
+ &VNET_NAME(tcp_retries), 0, sysctl_net_inet_tcp_retries, "I",
+ "maximum number of consecutive timer based retransmissions");
+
/*
* Map the given inp to a CPU id.
*
@@ -492,7 +514,7 @@ tcp_timer_persist(struct tcpcb *tp)
* progress.
*/
progdrop = tcp_maxunacktime_check(tp);
- if (progdrop || (tp->t_rxtshift == TCP_MAXRXTSHIFT &&
+ if (progdrop || (tp->t_rxtshift >= V_tcp_retries &&
(ticks - tp->t_rcvtime >= tcp_maxpersistidle ||
ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff))) {
if (!progdrop)
@@ -555,10 +577,10 @@ tcp_timer_rexmt(struct tcpcb *tp)
* or we've gone long enough without making progress, then drop
* the session.
*/
- if (++tp->t_rxtshift > TCP_MAXRXTSHIFT || tcp_maxunacktime_check(tp)) {
- if (tp->t_rxtshift > TCP_MAXRXTSHIFT)
+ if (++tp->t_rxtshift > V_tcp_retries || tcp_maxunacktime_check(tp)) {
+ if (tp->t_rxtshift > V_tcp_retries)
TCPSTAT_INC(tcps_timeoutdrop);
- tp->t_rxtshift = TCP_MAXRXTSHIFT;
+ tp->t_rxtshift = V_tcp_retries;
tcp_log_end_status(tp, TCP_EI_STATUS_RETRAN);
NET_EPOCH_ENTER(et);
tp = tcp_drop(tp, ETIMEDOUT);
diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
index 86345b2aa630..587998331fbf 100644
--- a/sys/netinet/tcp_var.h
+++ b/sys/netinet/tcp_var.h
@@ -1289,6 +1289,7 @@ VNET_DECLARE(int, tcp_perconn_stats_dflt_tpl);
VNET_DECLARE(int, tcp_perconn_stats_enable);
#endif /* STATS */
VNET_DECLARE(int, tcp_recvspace);
+VNET_DECLARE(int, tcp_retries);
VNET_DECLARE(int, tcp_sack_globalholes);
VNET_DECLARE(int, tcp_sack_globalmaxholes);
VNET_DECLARE(int, tcp_sack_maxholes);
@@ -1335,6 +1336,7 @@ VNET_DECLARE(struct inpcbinfo, tcbinfo);
#define V_tcp_perconn_stats_enable VNET(tcp_perconn_stats_enable)
#endif /* STATS */
#define V_tcp_recvspace VNET(tcp_recvspace)
+#define V_tcp_retries VNET(tcp_retries)
#define V_tcp_sack_globalholes VNET(tcp_sack_globalholes)
#define V_tcp_sack_globalmaxholes VNET(tcp_sack_globalmaxholes)
#define V_tcp_sack_maxholes VNET(tcp_sack_maxholes)