git: 83c1ec92e454 - main - tcp: ECN preparations for ECN++, AccECN (tcp_respond)

From: Richard Scheffenegger <rscheff_at_FreeBSD.org>
Date: Thu, 20 Oct 2022 19:49:52 UTC
The branch main has been updated by rscheff:

URL: https://cgit.FreeBSD.org/src/commit/?id=83c1ec92e454a7592dd15b15b738d18ae36575e0

commit 83c1ec92e454a7592dd15b15b738d18ae36575e0
Author:     Richard Scheffenegger <rscheff@FreeBSD.org>
AuthorDate: 2022-10-20 19:20:13 +0000
Commit:     Richard Scheffenegger <rscheff@FreeBSD.org>
CommitDate: 2022-10-20 19:48:27 +0000

    tcp: ECN preparations for ECN++, AccECN (tcp_respond)
    
    tcp_respond is another function to build a tcp control packet
    quickly. With ECN++ and AccECN, both the IP ECN header, and
    the TCP ECN flags are supposed to reflect the correct state.
    
    Also ensure that on receiving multiple ECN SYN-ACKs, the
    responses triggered will reflect the latest state.
    
    Reviewed By:            tuexen, #transport
    Sponsored by:           NetApp, Inc.
    Differential Revision:  https://reviews.freebsd.org/D36973
---
 sys/netinet/tcp_ecn.c                    | 11 +++++++++++
 sys/netinet/tcp_input.c                  |  1 +
 sys/netinet/tcp_stacks/bbr.c             | 12 ++++++------
 sys/netinet/tcp_stacks/rack.c            | 12 ++++++------
 sys/netinet/tcp_stacks/rack_bbr_common.c |  4 +++-
 sys/netinet/tcp_stacks/rack_bbr_common.h |  2 +-
 sys/netinet/tcp_subr.c                   |  8 ++++++--
 sys/netinet/tcp_var.h                    |  2 +-
 8 files changed, 35 insertions(+), 17 deletions(-)

diff --git a/sys/netinet/tcp_ecn.c b/sys/netinet/tcp_ecn.c
index 28a176dfad2a..c74f4fa7c514 100644
--- a/sys/netinet/tcp_ecn.c
+++ b/sys/netinet/tcp_ecn.c
@@ -116,6 +116,7 @@ tcp_ecn_input_syn_sent(struct tcpcb *tp, uint16_t thflags, int iptos)
 		/* RFC3168 ECN handling */
 		if ((thflags & (TH_CWR | TH_ECE)) == (0 | TH_ECE)) {
 			tp->t_flags2 |= TF2_ECN_PERMIT;
+			tp->t_flags2 &= ~TF2_ACE_PERMIT;
 			TCPSTAT_INC(tcps_ecn_shs);
 		}
 	} else
@@ -133,11 +134,13 @@ tcp_ecn_input_syn_sent(struct tcpcb *tp, uint16_t thflags, int iptos)
 		/* RFC3168 SYN */
 		case (0|0|TH_ECE):
 			tp->t_flags2 |= TF2_ECN_PERMIT;
+			tp->t_flags2 &= ~TF2_ACE_PERMIT;
 			TCPSTAT_INC(tcps_ecn_shs);
 			break;
 		/* non-ECT SYN */
 		case (0|TH_CWR|0):
 			tp->t_flags2 |= TF2_ACE_PERMIT;
+			tp->t_flags2 &= ~TF2_ECN_PERMIT;
 			tp->t_scep = 5;
 			TCPSTAT_INC(tcps_ecn_shs);
 			TCPSTAT_INC(tcps_ace_nect);
@@ -145,6 +148,7 @@ tcp_ecn_input_syn_sent(struct tcpcb *tp, uint16_t thflags, int iptos)
 		/* ECT0 SYN */
 		case (TH_AE|0|0):
 			tp->t_flags2 |= TF2_ACE_PERMIT;
+			tp->t_flags2 &= ~TF2_ECN_PERMIT;
 			tp->t_scep = 5;
 			TCPSTAT_INC(tcps_ecn_shs);
 			TCPSTAT_INC(tcps_ace_ect0);
@@ -152,6 +156,7 @@ tcp_ecn_input_syn_sent(struct tcpcb *tp, uint16_t thflags, int iptos)
 		/* ECT1 SYN */
 		case (0|TH_CWR|TH_ECE):
 			tp->t_flags2 |= TF2_ACE_PERMIT;
+			tp->t_flags2 &= ~TF2_ECN_PERMIT;
 			tp->t_scep = 5;
 			TCPSTAT_INC(tcps_ecn_shs);
 			TCPSTAT_INC(tcps_ace_ect1);
@@ -159,6 +164,7 @@ tcp_ecn_input_syn_sent(struct tcpcb *tp, uint16_t thflags, int iptos)
 		/* CE SYN */
 		case (TH_AE|TH_CWR|0):
 			tp->t_flags2 |= TF2_ACE_PERMIT;
+			tp->t_flags2 &= ~TF2_ECN_PERMIT;
 			tp->t_scep = 6;
 			/*
 			 * reduce the IW to 2 MSS (to
@@ -170,6 +176,7 @@ tcp_ecn_input_syn_sent(struct tcpcb *tp, uint16_t thflags, int iptos)
 			TCPSTAT_INC(tcps_ace_nect);
 			break;
 		default:
+			tp->t_flags2 &= ~(TF2_ECN_PERMIT | TF2_ACE_PERMIT);
 			break;
 		}
 		/*
@@ -211,6 +218,7 @@ tcp_ecn_input_parallel_syn(struct tcpcb *tp, uint16_t thflags, int iptos)
 		/* RFC3168 ECN handling */
 		if ((thflags & (TH_CWR | TH_ECE)) == (TH_CWR | TH_ECE)) {
 			tp->t_flags2 |= TF2_ECN_PERMIT;
+			tp->t_flags2 &= ~TF2_ACE_PERMIT;
 			tp->t_flags2 |= TF2_ECN_SND_ECE;
 			TCPSTAT_INC(tcps_ecn_shs);
 		}
@@ -221,14 +229,17 @@ tcp_ecn_input_parallel_syn(struct tcpcb *tp, uint16_t thflags, int iptos)
 		switch (thflags & (TH_AE | TH_CWR | TH_ECE)) {
 		default:
 		case (0|0|0):
+			tp->t_flags2 &= ~(TF2_ECN_PERMIT | TF2_ACE_PERMIT);
 			break;
 		case (0|TH_CWR|TH_ECE):
 			tp->t_flags2 |= TF2_ECN_PERMIT;
+			tp->t_flags2 &= ~TF2_ACE_PERMIT;
 			tp->t_flags2 |= TF2_ECN_SND_ECE;
 			TCPSTAT_INC(tcps_ecn_shs);
 			break;
 		case (TH_AE|TH_CWR|TH_ECE):
 			tp->t_flags2 |= TF2_ACE_PERMIT;
+			tp->t_flags2 &= ~TF2_ECN_PERMIT;
 			TCPSTAT_INC(tcps_ecn_shs);
 			/*
 			 * Set the AccECN Codepoints on
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
index c1e1f58e315c..672776e4e882 100644
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -2243,6 +2243,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
 			tp = tcp_drop(tp, ECONNRESET);
 			rstreason = BANDLIM_UNLIMITED;
 		} else {
+			tcp_ecn_input_syn_sent(tp, thflags, iptos);
 			/* Send challenge ACK. */
 			tcp_respond(tp, mtod(m, void *), th, m, tp->rcv_nxt,
 			    tp->snd_nxt, TH_ACK);
diff --git a/sys/netinet/tcp_stacks/bbr.c b/sys/netinet/tcp_stacks/bbr.c
index 40d3a40dbe38..4a768487b9f6 100644
--- a/sys/netinet/tcp_stacks/bbr.c
+++ b/sys/netinet/tcp_stacks/bbr.c
@@ -9259,7 +9259,7 @@ bbr_do_established(struct mbuf *m, struct tcphdr *th, struct socket *so,
 	 * synchronized state.
 	 */
 	if (thflags & TH_SYN) {
-		ctf_challenge_ack(m, th, tp, &ret_val);
+		ctf_challenge_ack(m, th, tp, iptos, &ret_val);
 		return (ret_val);
 	}
 	/*
@@ -9355,7 +9355,7 @@ bbr_do_close_wait(struct mbuf *m, struct tcphdr *th, struct socket *so,
 	 * synchronized state.
 	 */
 	if (thflags & TH_SYN) {
-		ctf_challenge_ack(m, th, tp, &ret_val);
+		ctf_challenge_ack(m, th, tp, iptos, &ret_val);
 		return (ret_val);
 	}
 	/*
@@ -9476,7 +9476,7 @@ bbr_do_fin_wait_1(struct mbuf *m, struct tcphdr *th, struct socket *so,
 	 * synchronized state.
 	 */
 	if (thflags & TH_SYN) {
-		ctf_challenge_ack(m, th, tp, &ret_val);
+		ctf_challenge_ack(m, th, tp, iptos, &ret_val);
 		return (ret_val);
 	}
 	/*
@@ -9600,7 +9600,7 @@ bbr_do_closing(struct mbuf *m, struct tcphdr *th, struct socket *so,
 	 * synchronized state.
 	 */
 	if (thflags & TH_SYN) {
-		ctf_challenge_ack(m, th, tp, &ret_val);
+		ctf_challenge_ack(m, th, tp, iptos, &ret_val);
 		return (ret_val);
 	}
 	/*
@@ -9710,7 +9710,7 @@ bbr_do_lastack(struct mbuf *m, struct tcphdr *th, struct socket *so,
 	 * synchronized state.
 	 */
 	if (thflags & TH_SYN) {
-		ctf_challenge_ack(m, th, tp, &ret_val);
+		ctf_challenge_ack(m, th, tp, iptos, &ret_val);
 		return (ret_val);
 	}
 	/*
@@ -9822,7 +9822,7 @@ bbr_do_fin_wait_2(struct mbuf *m, struct tcphdr *th, struct socket *so,
 	 * synchronized state.
 	 */
 	if (thflags & TH_SYN) {
-		ctf_challenge_ack(m, th, tp, &ret_val);
+		ctf_challenge_ack(m, th, tp, iptos, &ret_val);
 		return (ret_val);
 	}
 	INP_WLOCK_ASSERT(tp->t_inpcb);
diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c
index 74503bc8a1b2..997e032e4a08 100644
--- a/sys/netinet/tcp_stacks/rack.c
+++ b/sys/netinet/tcp_stacks/rack.c
@@ -11679,7 +11679,7 @@ rack_do_established(struct mbuf *m, struct tcphdr *th, struct socket *so,
 	 * synchronized state.
 	 */
 	if (thflags & TH_SYN) {
-		ctf_challenge_ack(m, th, tp, &ret_val);
+		ctf_challenge_ack(m, th, tp, iptos, &ret_val);
 		return (ret_val);
 	}
 	/*
@@ -11779,7 +11779,7 @@ rack_do_close_wait(struct mbuf *m, struct tcphdr *th, struct socket *so,
 	 * synchronized state.
 	 */
 	if (thflags & TH_SYN) {
-		ctf_challenge_ack(m, th, tp, &ret_val);
+		ctf_challenge_ack(m, th, tp, iptos, &ret_val);
 		return (ret_val);
 	}
 	/*
@@ -11909,7 +11909,7 @@ rack_do_fin_wait_1(struct mbuf *m, struct tcphdr *th, struct socket *so,
 	 * synchronized state.
 	 */
 	if (thflags & TH_SYN) {
-		ctf_challenge_ack(m, th, tp, &ret_val);
+		ctf_challenge_ack(m, th, tp, iptos, &ret_val);
 		return (ret_val);
 	}
 	/*
@@ -12036,7 +12036,7 @@ rack_do_closing(struct mbuf *m, struct tcphdr *th, struct socket *so,
 	 * synchronized state.
 	 */
 	if (thflags & TH_SYN) {
-		ctf_challenge_ack(m, th, tp, &ret_val);
+		ctf_challenge_ack(m, th, tp, iptos, &ret_val);
 		return (ret_val);
 	}
 	/*
@@ -12149,7 +12149,7 @@ rack_do_lastack(struct mbuf *m, struct tcphdr *th, struct socket *so,
 	 * synchronized state.
 	 */
 	if (thflags & TH_SYN) {
-		ctf_challenge_ack(m, th, tp, &ret_val);
+		ctf_challenge_ack(m, th, tp, iptos, &ret_val);
 		return (ret_val);
 	}
 	/*
@@ -12263,7 +12263,7 @@ rack_do_fin_wait_2(struct mbuf *m, struct tcphdr *th, struct socket *so,
 	 * synchronized state.
 	 */
 	if (thflags & TH_SYN) {
-		ctf_challenge_ack(m, th, tp, &ret_val);
+		ctf_challenge_ack(m, th, tp, iptos, &ret_val);
 		return (ret_val);
 	}
 	/*
diff --git a/sys/netinet/tcp_stacks/rack_bbr_common.c b/sys/netinet/tcp_stacks/rack_bbr_common.c
index d1150c4e4047..467e615fe250 100644
--- a/sys/netinet/tcp_stacks/rack_bbr_common.c
+++ b/sys/netinet/tcp_stacks/rack_bbr_common.c
@@ -95,6 +95,7 @@ __FBSDID("$FreeBSD$");
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
 #include <netinet/tcpip.h>
+#include <netinet/tcp_ecn.h>
 #include <netinet/tcp_hpts.h>
 #include <netinet/tcp_lro.h>
 #include <netinet/cc/cc.h>
@@ -860,7 +861,7 @@ __ctf_process_rst(struct mbuf *m, struct tcphdr *th, struct socket *so,
  * and valid.
  */
 void
-ctf_challenge_ack(struct mbuf *m, struct tcphdr *th, struct tcpcb *tp, int32_t * ret_val)
+ctf_challenge_ack(struct mbuf *m, struct tcphdr *th, struct tcpcb *tp, uint8_t iptos, int32_t * ret_val)
 {
 
 	NET_EPOCH_ASSERT();
@@ -873,6 +874,7 @@ ctf_challenge_ack(struct mbuf *m, struct tcphdr *th, struct tcpcb *tp, int32_t *
 		*ret_val = 1;
 		ctf_do_drop(m, tp);
 	} else {
+		tcp_ecn_input_syn_sent(tp, tcp_get_flags(th), iptos);
 		/* Send challenge ACK. */
 		tcp_respond(tp, mtod(m, void *), th, m, tp->rcv_nxt,
 		    tp->snd_nxt, TH_ACK);
diff --git a/sys/netinet/tcp_stacks/rack_bbr_common.h b/sys/netinet/tcp_stacks/rack_bbr_common.h
index 9a711253d978..688c64dd92c3 100644
--- a/sys/netinet/tcp_stacks/rack_bbr_common.h
+++ b/sys/netinet/tcp_stacks/rack_bbr_common.h
@@ -125,7 +125,7 @@ __ctf_process_rst(struct mbuf *m, struct tcphdr *th,
 
 void
 ctf_challenge_ack(struct mbuf *m, struct tcphdr *th,
-    struct tcpcb *tp, int32_t * ret_val);
+    struct tcpcb *tp, uint8_t iptos, int32_t * ret_val);
 
 int
 ctf_ts_check(struct mbuf *m, struct tcphdr *th,
diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
index 8fdaab35fb19..b78967a0f20c 100644
--- a/sys/netinet/tcp_subr.c
+++ b/sys/netinet/tcp_subr.c
@@ -105,6 +105,7 @@ __FBSDID("$FreeBSD$");
 #include <netinet/tcp_seq.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
+#include <netinet/tcp_ecn.h>
 #include <netinet/tcp_log_buf.h>
 #include <netinet/tcp_syncache.h>
 #include <netinet/tcp_hpts.h>
@@ -1778,7 +1779,7 @@ tcpip_maketemplate(struct inpcb *inp)
  */
 void
 tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m,
-    tcp_seq ack, tcp_seq seq, int flags)
+    tcp_seq ack, tcp_seq seq, uint16_t flags)
 {
 	struct tcpopt to;
 	struct inpcb *inp;
@@ -1793,6 +1794,7 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m,
 	int isipv6;
 #endif /* INET6 */
 	int optlen, tlen, win, ulen;
+	int ect = 0;
 	bool incl_opts;
 	uint16_t port;
 	int output_ret;
@@ -1980,6 +1982,7 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m,
 	m->m_len = tlen;
 	to.to_flags = 0;
 	if (incl_opts) {
+		ect = tcp_ecn_output_established(tp, &flags, 0, false);
 		/* Make sure we have room. */
 		if (M_TRAILINGSPACE(m) < TCP_MAXOLEN) {
 			m->m_next = m_get(M_NOWAIT, MT_DATA);
@@ -2018,7 +2021,7 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m,
 			ulen = tlen - sizeof(struct ip6_hdr);
 			uh->uh_ulen = htons(ulen);
 		}
-		ip6->ip6_flow = 0;
+		ip6->ip6_flow = htonl(ect << 20);
 		ip6->ip6_vfc = IPV6_VERSION;
 		if (port)
 			ip6->ip6_nxt = IPPROTO_UDP;
@@ -2036,6 +2039,7 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m,
 			ulen = tlen - sizeof(struct ip);
 			uh->uh_ulen = htons(ulen);
 		}
+		ip->ip_tos = ect;
 		ip->ip_len = htons(tlen);
 		ip->ip_ttl = V_ip_defttl;
 		if (port) {
diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
index 1c6e0bbccbab..1514e016ee13 100644
--- a/sys/netinet/tcp_var.h
+++ b/sys/netinet/tcp_var.h
@@ -1157,7 +1157,7 @@ struct tcpcb *
 int	 tcp_default_output(struct tcpcb *);
 void	 tcp_state_change(struct tcpcb *, int);
 void	 tcp_respond(struct tcpcb *, void *,
-	    struct tcphdr *, struct mbuf *, tcp_seq, tcp_seq, int);
+	    struct tcphdr *, struct mbuf *, tcp_seq, tcp_seq, uint16_t);
 bool	 tcp_twcheck(struct inpcb *, struct tcpopt *, struct tcphdr *,
 	    struct mbuf *, int);
 void	 tcp_setpersist(struct tcpcb *);