git: 97e28f0f58cd - main - tcp: Rack ack war with a mis-behaving firewall or nat with resets.

From: Randall Stewart <rrs_at_FreeBSD.org>
Date: Wed, 17 Nov 2021 14:47:32 UTC
The branch main has been updated by rrs:

URL: https://cgit.FreeBSD.org/src/commit/?id=97e28f0f58cda4488ccd299470f308b0135a1821

commit 97e28f0f58cda4488ccd299470f308b0135a1821
Author:     Randall Stewart <rrs@FreeBSD.org>
AuthorDate: 2021-11-17 14:45:51 +0000
Commit:     Randall Stewart <rrs@FreeBSD.org>
CommitDate: 2021-11-17 14:45:51 +0000

    tcp: Rack ack war with a mis-behaving firewall or nat with resets.
    
    Previously we added ack-war prevention for misbehaving firewalls. This is
    where the f/w or nat messes up its sequence numbers and causes an ack-war.
    There is yet another type of ack war that we have found in the wild that is
    like unto this. Basically the f/w or nat gets a ack (keep-alive probe or such)
    and instead of turning the ack/seq around and adding a TH_RST it does something
    real stupid and sends a new packet with seq=0. This of course triggers the challenge
    ack in the reset processing which then sends in a challenge ack (if the seq=0 is within
    the range of possible sequence numbers allowed by the challenge) and then we rinse-repeat.
    
    This will add the needed tweaks (similar to the last ack-war prevention using the same sysctls and counters)
    to prevent it and allow say 5 per second by default.
    
    Reviewed by: Michael Tuexen
    Sponsored by: Netflix Inc.
    Differential Revision: https://reviews.freebsd.org/D32938
---
 sys/netinet/tcp_stacks/rack.c            | 32 ++++++++++++++++++-------
 sys/netinet/tcp_stacks/rack_bbr_common.c | 40 ++++++++++++++++++++++++++++----
 sys/netinet/tcp_stacks/rack_bbr_common.h |  5 ++--
 3 files changed, 62 insertions(+), 15 deletions(-)

diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c
index 7769aa1272c0..30c8e18f2adb 100644
--- a/sys/netinet/tcp_stacks/rack.c
+++ b/sys/netinet/tcp_stacks/rack.c
@@ -11564,9 +11564,12 @@ rack_do_syn_recv(struct mbuf *m, struct tcphdr *th, struct socket *so,
 			return (0);
 		}
 	}
+
 	if ((thflags & TH_RST) ||
 	    (tp->t_fin_is_rst && (thflags & TH_FIN)))
-		return (ctf_process_rst(m, th, so, tp));
+		return (__ctf_process_rst(m, th, so, tp,
+					  &rack->r_ctl.challenge_ack_ts,
+					  &rack->r_ctl.challenge_ack_cnt));
 	/*
 	 * RFC 1323 PAWS: If we have a timestamp reply on this segment and
 	 * it's less than ts_recent, drop it.
@@ -11775,7 +11778,9 @@ rack_do_established(struct mbuf *m, struct tcphdr *th, struct socket *so,
 
 	if ((thflags & TH_RST) ||
 	    (tp->t_fin_is_rst && (thflags & TH_FIN)))
-		return (ctf_process_rst(m, th, so, tp));
+		return (__ctf_process_rst(m, th, so, tp,
+					  &rack->r_ctl.challenge_ack_ts,
+					  &rack->r_ctl.challenge_ack_cnt));
 
 	/*
 	 * RFC5961 Section 4.2 Send challenge ACK for any SYN in
@@ -11875,7 +11880,9 @@ rack_do_close_wait(struct mbuf *m, struct tcphdr *th, struct socket *so,
 	ctf_calc_rwin(so, tp);
 	if ((thflags & TH_RST) ||
 	    (tp->t_fin_is_rst && (thflags & TH_FIN)))
-		return (ctf_process_rst(m, th, so, tp));
+		return (__ctf_process_rst(m, th, so, tp,
+					  &rack->r_ctl.challenge_ack_ts,
+					  &rack->r_ctl.challenge_ack_cnt));
 	/*
 	 * RFC5961 Section 4.2 Send challenge ACK for any SYN in
 	 * synchronized state.
@@ -12004,7 +12011,9 @@ rack_do_fin_wait_1(struct mbuf *m, struct tcphdr *th, struct socket *so,
 
 	if ((thflags & TH_RST) ||
 	    (tp->t_fin_is_rst && (thflags & TH_FIN)))
-		return (ctf_process_rst(m, th, so, tp));
+		return (__ctf_process_rst(m, th, so, tp,
+					  &rack->r_ctl.challenge_ack_ts,
+					  &rack->r_ctl.challenge_ack_cnt));
 	/*
 	 * RFC5961 Section 4.2 Send challenge ACK for any SYN in
 	 * synchronized state.
@@ -12131,7 +12140,9 @@ rack_do_closing(struct mbuf *m, struct tcphdr *th, struct socket *so,
 
 	if ((thflags & TH_RST) ||
 	    (tp->t_fin_is_rst && (thflags & TH_FIN)))
-		return (ctf_process_rst(m, th, so, tp));
+		return (__ctf_process_rst(m, th, so, tp,
+					  &rack->r_ctl.challenge_ack_ts,
+					  &rack->r_ctl.challenge_ack_cnt));
 	/*
 	 * RFC5961 Section 4.2 Send challenge ACK for any SYN in
 	 * synchronized state.
@@ -12244,7 +12255,9 @@ rack_do_lastack(struct mbuf *m, struct tcphdr *th, struct socket *so,
 
 	if ((thflags & TH_RST) ||
 	    (tp->t_fin_is_rst && (thflags & TH_FIN)))
-		return (ctf_process_rst(m, th, so, tp));
+		return (__ctf_process_rst(m, th, so, tp,
+					  &rack->r_ctl.challenge_ack_ts,
+					  &rack->r_ctl.challenge_ack_cnt));
 	/*
 	 * RFC5961 Section 4.2 Send challenge ACK for any SYN in
 	 * synchronized state.
@@ -12358,7 +12371,9 @@ rack_do_fin_wait_2(struct mbuf *m, struct tcphdr *th, struct socket *so,
 	/* Reset receive buffer auto scaling when not in bulk receive mode. */
 	if ((thflags & TH_RST) ||
 	    (tp->t_fin_is_rst && (thflags & TH_FIN)))
-		return (ctf_process_rst(m, th, so, tp));
+		return (__ctf_process_rst(m, th, so, tp,
+					  &rack->r_ctl.challenge_ack_ts,
+					  &rack->r_ctl.challenge_ack_cnt));
 	/*
 	 * RFC5961 Section 4.2 Send challenge ACK for any SYN in
 	 * synchronized state.
@@ -14566,7 +14581,8 @@ rack_do_segment_nounlock(struct mbuf *m, struct tcphdr *th, struct socket *so,
 		rack_check_probe_rtt(rack, us_cts);
 	}
 	rack_clear_rate_sample(rack);
-	if (rack->forced_ack) {
+	if ((rack->forced_ack) &&
+	    ((th->th_flags & TH_RST) == 0)) {
 		rack_handle_probe_response(rack, tiwin, us_cts);
 	}
 	/*
diff --git a/sys/netinet/tcp_stacks/rack_bbr_common.c b/sys/netinet/tcp_stacks/rack_bbr_common.c
index bf93359368f9..88e028109c95 100644
--- a/sys/netinet/tcp_stacks/rack_bbr_common.c
+++ b/sys/netinet/tcp_stacks/rack_bbr_common.c
@@ -764,7 +764,8 @@ ctf_do_drop(struct mbuf *m, struct tcpcb *tp)
 }
 
 int
-ctf_process_rst(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp)
+__ctf_process_rst(struct mbuf *m, struct tcphdr *th, struct socket *so,
+		struct tcpcb *tp, uint32_t *ts, uint32_t *cnt)
 {
 	/*
 	 * RFC5961 Section 3.2
@@ -811,11 +812,40 @@ ctf_process_rst(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcp
 			dropped = 1;
 			ctf_do_drop(m, tp);
 		} else {
+			int send_challenge;
+
 			KMOD_TCPSTAT_INC(tcps_badrst);
-			/* Send challenge ACK. */
-			tcp_respond(tp, mtod(m, void *), th, m,
-			    tp->rcv_nxt, tp->snd_nxt, TH_ACK);
-			tp->last_ack_sent = tp->rcv_nxt;
+			if ((ts != NULL) && (cnt != NULL) &&
+			    (tcp_ack_war_time_window > 0) &&
+			    (tcp_ack_war_cnt > 0)) {
+				/* We are possibly preventing an  ack-rst  war prevention */
+				uint32_t cts;
+
+				/*
+				 * We use a msec tick here which gives us
+				 * roughly 49 days. We don't need the
+				 * precision of a microsecond timestamp which
+				 * would only give us hours.
+				 */
+				cts = tcp_ts_getticks();
+				if (TSTMP_LT((*ts), cts)) {
+					/* Timestamp is in the past */
+					*cnt = 0;
+					*ts = (cts + tcp_ack_war_time_window);
+				}
+				if (*cnt < tcp_ack_war_cnt) {
+					*cnt = (*cnt + 1);
+					send_challenge = 1;
+				} else
+					send_challenge = 0;
+			} else
+				send_challenge = 1;
+			if (send_challenge) {
+				/* Send challenge ACK. */
+				tcp_respond(tp, mtod(m, void *), th, m,
+					    tp->rcv_nxt, tp->snd_nxt, TH_ACK);
+				tp->last_ack_sent = tp->rcv_nxt;
+			}
 		}
 	} else {
 		m_freem(m);
diff --git a/sys/netinet/tcp_stacks/rack_bbr_common.h b/sys/netinet/tcp_stacks/rack_bbr_common.h
index 49ec5c9ce4fa..9a711253d978 100644
--- a/sys/netinet/tcp_stacks/rack_bbr_common.h
+++ b/sys/netinet/tcp_stacks/rack_bbr_common.h
@@ -119,8 +119,9 @@ void
 ctf_do_drop(struct mbuf *m, struct tcpcb *tp);
 
 int
-ctf_process_rst(struct mbuf *m, struct tcphdr *th,
-    struct socket *so, struct tcpcb *tp);
+__ctf_process_rst(struct mbuf *m, struct tcphdr *th,
+      struct socket *so, struct tcpcb *tp, uint32_t *ts, uint32_t *cnt);
+#define ctf_process_rst(m, t, s, p) __ctf_process_rst(m, t, s, p, NULL, NULL)
 
 void
 ctf_challenge_ack(struct mbuf *m, struct tcphdr *th,