git: f7220c486c1b - main - tcp: move ECN handling code to a common file

From: Richard Scheffenegger <rscheff_at_FreeBSD.org>
Date: Sat, 05 Feb 2022 14:05:23 UTC
The branch main has been updated by rscheff:

URL: https://cgit.FreeBSD.org/src/commit/?id=f7220c486c1bf858e97c2d8e5c4c9dac4947d50a

commit f7220c486c1bf858e97c2d8e5c4c9dac4947d50a
Author:     Richard Scheffenegger <rscheff@FreeBSD.org>
AuthorDate: 2022-02-05 13:59:00 +0000
Commit:     Richard Scheffenegger <rscheff@FreeBSD.org>
CommitDate: 2022-02-05 14:04:42 +0000

    tcp: move ECN handling code to a common file
    
    Reduce the burden to maintain correct and
    extensible ECN related code across multiple
    stacks and codepaths.
    
    Formally no functional change.
    
    Incidentially this establishes correct
    ECN operation in one instance.
    
    Reviewed By: rrs, #transport
    Sponsored by:        NetApp, Inc.
    Differential Revision: https://reviews.freebsd.org/D34162
---
 sys/conf/files                |   1 +
 sys/netinet/tcp_ecn.c         | 297 ++++++++++++++++++++++++++++++++++++++++++
 sys/netinet/tcp_ecn.h         |  55 ++++++++
 sys/netinet/tcp_input.c       |  46 +------
 sys/netinet/tcp_output.c      |  66 +++-------
 sys/netinet/tcp_stacks/rack.c | 198 +++++++++-------------------
 sys/netinet/tcp_syncache.c    |  15 +--
 7 files changed, 451 insertions(+), 227 deletions(-)

diff --git a/sys/conf/files b/sys/conf/files
index 78921d2c9fa0..148bd9f4f7b4 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -4364,6 +4364,7 @@ netinet/sctp_usrreq.c		optional inet sctp | inet6 sctp
 netinet/sctputil.c		optional inet sctp | inet6 sctp
 netinet/siftr.c			optional inet siftr alq | inet6 siftr alq
 netinet/tcp_debug.c		optional tcpdebug
+netinet/tcp_ecn.c		optional inet | inet6
 netinet/tcp_fastopen.c		optional inet tcp_rfc7413 | inet6 tcp_rfc7413
 netinet/tcp_hostcache.c		optional inet | inet6
 netinet/tcp_input.c		optional inet | inet6
diff --git a/sys/netinet/tcp_ecn.c b/sys/netinet/tcp_ecn.c
new file mode 100644
index 000000000000..2ef552af93bf
--- /dev/null
+++ b/sys/netinet/tcp_ecn.c
@@ -0,0 +1,297 @@
+/*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995
+ *      The Regents of the University of California.  All rights reserved.
+ * Copyright (c) 2007-2008,2010
+ *      Swinburne University of Technology, Melbourne, Australia.
+ * Copyright (c) 2009-2010 Lawrence Stewart <lstewart@freebsd.org>
+ * Copyright (c) 2010 The FreeBSD Foundation
+ * Copyright (c) 2010-2011 Juniper Networks, Inc.
+ * Copyright (c) 2019 Richard Scheffenegger <srichard@netapp.com>
+ * All rights reserved.
+ *
+ * Portions of this software were developed at the Centre for Advanced Internet
+ * Architectures, Swinburne University of Technology, by Lawrence Stewart,
+ * James Healy and David Hayes, made possible in part by a grant from the Cisco
+ * University Research Program Fund at Community Foundation Silicon Valley.
+ *
+ * Portions of this software were developed at the Centre for Advanced
+ * Internet Architectures, Swinburne University of Technology, Melbourne,
+ * Australia by David Hayes under sponsorship from the FreeBSD Foundation.
+ *
+ * Portions of this software were developed by Robert N. M. Watson under
+ * contract to Juniper Networks, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *      @(#)tcp_ecn.c 8.12 (Berkeley) 5/24/95
+ */
+
+/*
+ * Utility functions to deal with Explicit Congestion Notification in TCP
+ * implementing the essential parts of the Accurate ECN extension
+ * https://tools.ietf.org/html/draft-ietf-tcpm-accurate-ecn-09
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_inet.h"
+#include "opt_inet6.h"
+#include "opt_tcpdebug.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+
+#include <machine/cpu.h>
+
+#include <vm/uma.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/route.h>
+#include <net/vnet.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/in_var.h>
+#include <netinet/in_pcb.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip6.h>
+#include <netinet/icmp6.h>
+#include <netinet6/nd6.h>
+#include <netinet6/ip6_var.h>
+#include <netinet6/in6_pcb.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_seq.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcp_syncache.h>
+#include <netinet/tcp_timer.h>
+#include <netinet6/tcp6_var.h>
+#include <netinet/tcpip.h>
+#include <netinet/tcp_ecn.h>
+
+
+/*
+ * Process incoming SYN,ACK packet
+ */
+void
+tcp_ecn_input_syn_sent(struct tcpcb *tp, uint16_t thflags, int iptos)
+{
+	thflags &= (TH_CWR|TH_ECE);
+
+	if (((thflags & (TH_CWR | TH_ECE)) == TH_ECE) &&
+	    V_tcp_do_ecn) {
+		tp->t_flags2 |= TF2_ECN_PERMIT;
+		KMOD_TCPSTAT_INC(tcps_ecn_shs);
+	}
+}
+
+/*
+ * Handle parallel SYN for ECN
+ */
+void
+tcp_ecn_input_parallel_syn(struct tcpcb *tp, uint16_t thflags, int iptos)
+{
+	if (thflags & TH_ACK)
+		return;
+	if (V_tcp_do_ecn == 0)
+		return;
+	if ((V_tcp_do_ecn == 1) || (V_tcp_do_ecn == 2)) {
+		/* RFC3168 ECN handling */
+		if ((thflags & (TH_CWR | TH_ECE)) == (TH_CWR | TH_ECE)) {
+			tp->t_flags2 |= TF2_ECN_PERMIT;
+			tp->t_flags2 |= TF2_ECN_SND_ECE;
+			KMOD_TCPSTAT_INC(tcps_ecn_shs);
+		}
+	}
+}
+
+/*
+ * TCP ECN processing.
+ */
+int
+tcp_ecn_input_segment(struct tcpcb *tp, uint16_t thflags, int iptos)
+{
+	int delta_ace = 0;
+
+	if (tp->t_flags2 & TF2_ECN_PERMIT) {
+		switch (iptos & IPTOS_ECN_MASK) {
+		case IPTOS_ECN_CE:
+			KMOD_TCPSTAT_INC(tcps_ecn_ce);
+			break;
+		case IPTOS_ECN_ECT0:
+			KMOD_TCPSTAT_INC(tcps_ecn_ect0);
+			break;
+		case IPTOS_ECN_ECT1:
+			KMOD_TCPSTAT_INC(tcps_ecn_ect1);
+			break;
+		}
+
+		/* RFC3168 ECN handling */
+		if (thflags & TH_ECE)
+			delta_ace = 1;
+		if (thflags & TH_CWR) {
+			tp->t_flags2 &= ~TF2_ECN_SND_ECE;
+			tp->t_flags |= TF_ACKNOW;
+		}
+		if ((iptos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
+			tp->t_flags2 |= TF2_ECN_SND_ECE;
+
+		/* Process a packet differently from RFC3168. */
+		cc_ecnpkt_handler_flags(tp, thflags, iptos);
+	}
+
+	return delta_ace;
+}
+
+/*
+ * Send ECN setup <SYN> packet header flags
+ */
+uint16_t
+tcp_ecn_output_syn_sent(struct tcpcb *tp)
+{
+	uint16_t thflags = 0;
+
+	if (V_tcp_do_ecn == 1) {
+		/* Send a RFC3168 ECN setup <SYN> packet */
+		if (tp->t_rxtshift >= 1) {
+			if (tp->t_rxtshift <= V_tcp_ecn_maxretries)
+				thflags = TH_ECE|TH_CWR;
+		} else
+			thflags = TH_ECE|TH_CWR;
+	}
+
+	return thflags;
+}
+
+/*
+ * output processing of ECN feature
+ * returning IP ECN header codepoint
+ */
+int
+tcp_ecn_output_established(struct tcpcb *tp, uint16_t *thflags, int len)
+{
+	int ipecn = IPTOS_ECN_NOTECT;
+	bool newdata;
+
+	/*
+	 * If the peer has ECN, mark data packets with
+	 * ECN capable transmission (ECT).
+	 * Ignore pure control packets, retransmissions
+	 * and window probes.
+	 */
+	newdata = (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max) &&
+		    !((tp->t_flags & TF_FORCEDATA) && len == 1));
+	if (newdata) {
+		ipecn = IPTOS_ECN_ECT0;
+		KMOD_TCPSTAT_INC(tcps_ecn_ect0);
+	}
+	/*
+	 * Reply with proper ECN notifications.
+	 */
+	if (newdata &&
+	    (tp->t_flags2 & TF2_ECN_SND_CWR)) {
+		*thflags |= TH_CWR;
+		tp->t_flags2 &= ~TF2_ECN_SND_CWR;
+	}
+	if (tp->t_flags2 & TF2_ECN_SND_ECE)
+		*thflags |= TH_ECE;
+
+	return ipecn;
+}
+
+/*
+ * Set up the ECN related tcpcb fields from
+ * a syncache entry
+ */
+void
+tcp_ecn_syncache_socket(struct tcpcb *tp, struct syncache *sc)
+{
+	if (sc->sc_flags & SCF_ECN) {
+		switch (sc->sc_flags & SCF_ECN) {
+		case SCF_ECN:
+			tp->t_flags2 |= TF2_ECN_PERMIT;
+			break;
+		/* undefined SCF codepoint */
+		default:
+			break;
+		}
+	}
+}
+
+/*
+ * Process a <SYN> packets ECN information, and provide the
+ * syncache with the relevant information.
+ */
+int
+tcp_ecn_syncache_add(uint16_t thflags, int iptos)
+{
+	int scflags = 0;
+
+	switch (thflags & (TH_CWR|TH_ECE)) {
+	/* no ECN */
+	case (0|0):
+		break;
+	/* legacy ECN */
+	case (TH_CWR|TH_ECE):
+		scflags = SCF_ECN;
+		break;
+	default:
+		break;
+	}
+	return scflags;
+}
+
+/*
+ * Set up the ECN information for the <SYN,ACK> from
+ * syncache information.
+ */
+uint16_t
+tcp_ecn_syncache_respond(uint16_t thflags, struct syncache *sc)
+{
+	if ((thflags & TH_SYN) &&
+	    (sc->sc_flags & SCF_ECN)) {
+		switch (sc->sc_flags & SCF_ECN) {
+		case SCF_ECN:
+			thflags |= (0 | TH_ECE);
+			KMOD_TCPSTAT_INC(tcps_ecn_shs);
+			break;
+		/* undefined SCF codepoint */
+		default:
+			break;
+		}
+	}
+	return thflags;
+}
diff --git a/sys/netinet/tcp_ecn.h b/sys/netinet/tcp_ecn.h
new file mode 100644
index 000000000000..b8a52bac83b2
--- /dev/null
+++ b/sys/netinet/tcp_ecn.h
@@ -0,0 +1,55 @@
+/*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 1982, 1986, 1993, 1994, 1995
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tcp_ecn.h	8.4 (Berkeley) 5/24/95
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET_TCP_ECN_H_
+#define _NETINET_TCP_ECN_H_
+
+#ifdef _KERNEL
+
+#include <netinet/tcp.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcp_syncache.h>
+
+void	 tcp_ecn_input_syn_sent(struct tcpcb *, uint16_t, int);
+void	 tcp_ecn_input_parallel_syn(struct tcpcb *, uint16_t, int);
+int	 tcp_ecn_input_segment(struct tcpcb *, uint16_t, int);
+uint16_t tcp_ecn_output_syn_sent(struct tcpcb *);
+int	 tcp_ecn_output_established(struct tcpcb *, uint16_t *, int);
+void	 tcp_ecn_syncache_socket(struct tcpcb *, struct syncache *);
+int	 tcp_ecn_syncache_add(uint16_t, int);
+uint16_t tcp_ecn_syncache_respond(uint16_t, struct syncache *);
+
+#endif /* _KERNEL */
+
+#endif /* _NETINET_TCP_ECN_H_ */
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
index 9a1f3ace2541..695cd5a916db 100644
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -123,6 +123,7 @@ __FBSDID("$FreeBSD$");
 #ifdef TCP_OFFLOAD
 #include <netinet/tcp_offload.h>
 #endif
+#include <netinet/tcp_ecn.h>
 #include <netinet/udp.h>
 
 #include <netipsec/ipsec_support.h>
@@ -1517,7 +1518,8 @@ void
 tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
     struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos)
 {
-	int thflags, acked, ourfinisacked, needoutput = 0, sack_changed;
+	uint16_t thflags;
+	int acked, ourfinisacked, needoutput = 0, sack_changed;
 	int rstreason, todrop, win, incforsyn = 0;
 	uint32_t tiwin;
 	uint16_t nsegs;
@@ -1597,32 +1599,8 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
 	/*
 	 * TCP ECN processing.
 	 */
-	if (tp->t_flags2 & TF2_ECN_PERMIT) {
-		if (thflags & TH_CWR) {
-			tp->t_flags2 &= ~TF2_ECN_SND_ECE;
-			tp->t_flags |= TF_ACKNOW;
-		}
-		switch (iptos & IPTOS_ECN_MASK) {
-		case IPTOS_ECN_CE:
-			tp->t_flags2 |= TF2_ECN_SND_ECE;
-			TCPSTAT_INC(tcps_ecn_ce);
-			break;
-		case IPTOS_ECN_ECT0:
-			TCPSTAT_INC(tcps_ecn_ect0);
-			break;
-		case IPTOS_ECN_ECT1:
-			TCPSTAT_INC(tcps_ecn_ect1);
-			break;
-		}
-
-		/* Process a packet differently from RFC3168. */
-		cc_ecnpkt_handler(tp, th, iptos);
-
-		/* Congestion experienced. */
-		if (thflags & TH_ECE) {
-			cc_cong_signal(tp, th, CC_ECN);
-		}
-	}
+	if (tcp_ecn_input_segment(tp, thflags, iptos))
+		cc_cong_signal(tp, th, CC_ECN);
 
 	/*
 	 * Parse options on any incoming segment.
@@ -1663,13 +1641,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
 	 */
 	if (tp->t_state == TCPS_SYN_SENT && (thflags & TH_SYN)) {
 		/* Handle parallel SYN for ECN */
-		if (!(thflags & TH_ACK) &&
-		    ((thflags & (TH_CWR | TH_ECE)) == (TH_CWR | TH_ECE)) &&
-		    ((V_tcp_do_ecn == 1) || (V_tcp_do_ecn == 2))) {
-			tp->t_flags2 |= TF2_ECN_PERMIT;
-			tp->t_flags2 |= TF2_ECN_SND_ECE;
-			TCPSTAT_INC(tcps_ecn_shs);
-		}
+		tcp_ecn_input_parallel_syn(tp, thflags, iptos);
 		if ((to.to_flags & TOF_SCALE) &&
 		    (tp->t_flags & TF_REQ_SCALE) &&
 		    !(tp->t_flags & TF_NOOPT)) {
@@ -2075,11 +2047,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
 			else
 				tp->t_flags |= TF_ACKNOW;
 
-			if (((thflags & (TH_CWR | TH_ECE)) == TH_ECE) &&
-			    (V_tcp_do_ecn == 1)) {
-				tp->t_flags2 |= TF2_ECN_PERMIT;
-				TCPSTAT_INC(tcps_ecn_shs);
-			}
+			tcp_ecn_input_syn_sent(tp, thflags, iptos);
 
 			/*
 			 * Received <SYN,ACK> in SYN_SENT[*] state.
diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c
index ce6d9b86e73f..0817997c72d3 100644
--- a/sys/netinet/tcp_output.c
+++ b/sys/netinet/tcp_output.c
@@ -84,8 +84,9 @@ __FBSDID("$FreeBSD$");
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_log_buf.h>
 #include <netinet/tcp_seq.h>
-#include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
+#include <netinet/tcp_syncache.h>
+#include <netinet/tcp_timer.h>
 #include <netinet/tcpip.h>
 #include <netinet/cc/cc.h>
 #include <netinet/tcp_fastopen.h>
@@ -98,6 +99,7 @@ __FBSDID("$FreeBSD$");
 #ifdef TCP_OFFLOAD
 #include <netinet/tcp_offload.h>
 #endif
+#include <netinet/tcp_ecn.h>
 
 #include <netipsec/ipsec_support.h>
 
@@ -199,7 +201,8 @@ tcp_default_output(struct tcpcb *tp)
 	struct socket *so = tp->t_inpcb->inp_socket;
 	int32_t len;
 	uint32_t recwin, sendwin;
-	int off, flags, error = 0;	/* Keep compiler happy */
+	uint16_t flags;
+	int off, error = 0;	/* Keep compiler happy */
 	u_int if_hw_tsomaxsegcount = 0;
 	u_int if_hw_tsomaxsegsize = 0;
 	struct mbuf *m;
@@ -1197,54 +1200,27 @@ send:
 	 * resend those bits a number of times as per
 	 * RFC 3168.
 	 */
-	if (tp->t_state == TCPS_SYN_SENT && V_tcp_do_ecn == 1) {
-		if (tp->t_rxtshift >= 1) {
-			if (tp->t_rxtshift <= V_tcp_ecn_maxretries)
-				flags |= TH_ECE|TH_CWR;
-		} else
-			flags |= TH_ECE|TH_CWR;
-	}
-	/* Handle parallel SYN for ECN */
-	if ((tp->t_state == TCPS_SYN_RECEIVED) &&
-	    (tp->t_flags2 & TF2_ECN_SND_ECE)) {
-			flags |= TH_ECE;
-			tp->t_flags2 &= ~TF2_ECN_SND_ECE;
+	if (tp->t_state == TCPS_SYN_SENT && V_tcp_do_ecn) {
+		flags |= tcp_ecn_output_syn_sent(tp);
 	}
-
-	if (TCPS_HAVEESTABLISHED(tp->t_state) &&
+	/* Also handle parallel SYN for ECN */
+	if ((TCPS_HAVERCVDSYN(tp->t_state)) &&
 	    (tp->t_flags2 & TF2_ECN_PERMIT)) {
-		/*
-		 * If the peer has ECN, mark data packets with
-		 * ECN capable transmission (ECT).
-		 * Ignore pure ack packets, retransmissions and window probes.
-		 */
-		if (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max) &&
-		    (sack_rxmit == 0) &&
-		    !((tp->t_flags & TF_FORCEDATA) && len == 1 &&
-		    SEQ_LT(tp->snd_una, tp->snd_max))) {
+		int ect = tcp_ecn_output_established(tp, &flags, len);
+		if ((tp->t_state == TCPS_SYN_RECEIVED) &&
+		    (tp->t_flags2 & TF2_ECN_SND_ECE))
+			tp->t_flags2 &= ~TF2_ECN_SND_ECE;
 #ifdef INET6
-			if (isipv6) {
-				ip6->ip6_flow &= ~htonl(IPTOS_ECN_MASK << 20);
-				ip6->ip6_flow |= htonl(IPTOS_ECN_ECT0 << 20);
-			}
-			else
+		if (isipv6) {
+			ip6->ip6_flow &= ~htonl(IPTOS_ECN_MASK << 20);
+			ip6->ip6_flow |= htonl(ect << 20);
+		}
+		else
 #endif
-			{
-				ip->ip_tos &= ~IPTOS_ECN_MASK;
-				ip->ip_tos |= IPTOS_ECN_ECT0;
-			}
-			TCPSTAT_INC(tcps_ecn_ect0);
-			/*
-			 * Reply with proper ECN notifications.
-			 * Only set CWR on new data segments.
-			 */
-			if (tp->t_flags2 & TF2_ECN_SND_CWR) {
-				flags |= TH_CWR;
-				tp->t_flags2 &= ~TF2_ECN_SND_CWR;
-			}
+		{
+			ip->ip_tos &= ~IPTOS_ECN_MASK;
+			ip->ip_tos |= ect;
 		}
-		if (tp->t_flags2 & TF2_ECN_SND_ECE)
-			flags |= TH_ECE;
 	}
 
 	/*
diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c
index 6d5b3f2133a6..4ad6a12ee43b 100644
--- a/sys/netinet/tcp_stacks/rack.c
+++ b/sys/netinet/tcp_stacks/rack.c
@@ -93,6 +93,7 @@ __FBSDID("$FreeBSD$");
 #include <netinet/tcp_seq.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
+#include <netinet/tcp_syncache.h>
 #include <netinet/tcp_hpts.h>
 #include <netinet/tcp_ratelimit.h>
 #include <netinet/tcp_accounting.h>
@@ -113,6 +114,7 @@ __FBSDID("$FreeBSD$");
 #ifdef INET6
 #include <netinet6/tcp6_var.h>
 #endif
+#include <netinet/tcp_ecn.h>
 
 #include <netipsec/ipsec_support.h>
 
@@ -11406,11 +11408,9 @@ rack_do_syn_sent(struct mbuf *m, struct tcphdr *th, struct socket *so,
 			tp->t_flags |= TF_ACKNOW;
 			rack->rc_dack_toggle = 0;
 		}
-		if (((thflags & (TH_CWR | TH_ECE)) == TH_ECE) &&
-		    (V_tcp_do_ecn == 1)) {
-			tp->t_flags2 |= TF2_ECN_PERMIT;
-			KMOD_TCPSTAT_INC(tcps_ecn_shs);
-		}
+
+		tcp_ecn_input_syn_sent(tp, thflags, iptos);
+
 		if (SEQ_GT(th->th_ack, tp->snd_una)) {
 			/*
 			 * We advance snd_una for the
@@ -13683,31 +13683,8 @@ rack_do_compressed_ack_processing(struct tcpcb *tp, struct socket *so, struct mb
 		}
 		tp->t_rcvtime = ticks;
 		/* Now what about ECN? */
-		if (tp->t_flags2 & TF2_ECN_PERMIT) {
-			if (ae->flags & TH_CWR) {
-				tp->t_flags2 &= ~TF2_ECN_SND_ECE;
-				tp->t_flags |= TF_ACKNOW;
-			}
-			switch (ae->codepoint & IPTOS_ECN_MASK) {
-			case IPTOS_ECN_CE:
-				tp->t_flags2 |= TF2_ECN_SND_ECE;
-				KMOD_TCPSTAT_INC(tcps_ecn_ce);
-				break;
-			case IPTOS_ECN_ECT0:
-				KMOD_TCPSTAT_INC(tcps_ecn_ect0);
-				break;
-			case IPTOS_ECN_ECT1:
-				KMOD_TCPSTAT_INC(tcps_ecn_ect1);
-				break;
-			}
-
-			/* Process a packet differently from RFC3168. */
-			cc_ecnpkt_handler_flags(tp, ae->flags, ae->codepoint);
-			/* Congestion experienced. */
-			if (ae->flags & TH_ECE) {
-				rack_cong_signal(tp,  CC_ECN, ae->ack);
-			}
-		}
+		if (tcp_ecn_input_segment(tp, ae->flags, ae->codepoint))
+			rack_cong_signal(tp, CC_ECN, ae->ack);
 #ifdef TCP_ACCOUNTING
 		/* Count for the specific type of ack in */
 		counter_u64_add(tcp_cnt_counters[ae->ack_val_set], 1);
@@ -14457,32 +14434,8 @@ rack_do_segment_nounlock(struct mbuf *m, struct tcphdr *th, struct socket *so,
 	 * TCP ECN processing. XXXJTL: If we ever use ECN, we need to move
 	 * this to occur after we've validated the segment.
 	 */
-	if (tp->t_flags2 & TF2_ECN_PERMIT) {
-		if (thflags & TH_CWR) {
-			tp->t_flags2 &= ~TF2_ECN_SND_ECE;
-			tp->t_flags |= TF_ACKNOW;
-		}
-		switch (iptos & IPTOS_ECN_MASK) {
-		case IPTOS_ECN_CE:
-			tp->t_flags2 |= TF2_ECN_SND_ECE;
-			KMOD_TCPSTAT_INC(tcps_ecn_ce);
-			break;
-		case IPTOS_ECN_ECT0:
-			KMOD_TCPSTAT_INC(tcps_ecn_ect0);
-			break;
-		case IPTOS_ECN_ECT1:
-			KMOD_TCPSTAT_INC(tcps_ecn_ect1);
-			break;
-		}
-
-		/* Process a packet differently from RFC3168. */
-		cc_ecnpkt_handler(tp, th, iptos);
-
-		/* Congestion experienced. */
-		if (thflags & TH_ECE) {
-			rack_cong_signal(tp, CC_ECN, th->th_ack);
-		}
-	}
+	if (tcp_ecn_input_segment(tp, thflags, iptos))
+		rack_cong_signal(tp, CC_ECN, th->th_ack);
 
 	/*
 	 * If echoed timestamp is later than the current time, fall back to
@@ -14516,13 +14469,7 @@ rack_do_segment_nounlock(struct mbuf *m, struct tcphdr *th, struct socket *so,
 		 */
 		if (tp->t_state == TCPS_SYN_SENT && (thflags & TH_SYN)) {
 			/* Handle parallel SYN for ECN */
-			if (!(thflags & TH_ACK) &&
-			    ((thflags & (TH_CWR | TH_ECE)) == (TH_CWR | TH_ECE)) &&
-			    ((V_tcp_do_ecn == 1) || (V_tcp_do_ecn == 2))) {
-				tp->t_flags2 |= TF2_ECN_PERMIT;
-				tp->t_flags2 |= TF2_ECN_SND_ECE;
-				TCPSTAT_INC(tcps_ecn_shs);
-			}
+			tcp_ecn_input_parallel_syn(tp, thflags, iptos);
 			if ((to.to_flags & TOF_SCALE) &&
 			    (tp->t_flags & TF_REQ_SCALE)) {
 				tp->t_flags |= TF_RCVD_SCALE;
@@ -15879,7 +15826,8 @@ rack_fast_rsm_output(struct tcpcb *tp, struct tcp_rack *rack, struct rack_sendma
 	struct tcpopt to;
 	u_char opt[TCP_MAXOLEN];
 	uint32_t hdrlen, optlen;
-	int32_t slot, segsiz, max_val, tso = 0, error, flags, ulen = 0;
+	int32_t slot, segsiz, max_val, tso = 0, error, ulen = 0;
+	uint16_t flags;
 	uint32_t if_hw_tsomaxsegcount = 0, startseq;
 	uint32_t if_hw_tsomaxsegsize;
 
@@ -16006,7 +15954,6 @@ rack_fast_rsm_output(struct tcpcb *tp, struct tcp_rack *rack, struct rack_sendma
 	if ((rsm->r_flags & RACK_HAD_PUSH) &&
 	    (len == (rsm->r_end - rsm->r_start)))
 		flags |= TH_PUSH;
-	tcp_set_flags(th, flags);
 	th->th_win = htons((u_short)(rack->r_ctl.fsb.recwin >> tp->rcv_scale));
 	if (th->th_win == 0) {
 		tp->t_sndzerowin++;
@@ -16056,6 +16003,25 @@ rack_fast_rsm_output(struct tcpcb *tp, struct tcp_rack *rack, struct rack_sendma
 		udp->uh_ulen = htons(ulen);
 	}
 	m->m_pkthdr.rcvif = (struct ifnet *)0;
+	if (TCPS_HAVERCVDSYN(tp->t_state) &&
+	    (tp->t_flags2 & TF2_ECN_PERMIT)) {
+		int ect = tcp_ecn_output_established(tp, &flags, len);
+		if ((tp->t_state == TCPS_SYN_RECEIVED) &&
+		    (tp->t_flags2 & TF2_ECN_SND_ECE))
+		    tp->t_flags2 &= ~TF2_ECN_SND_ECE;
+#ifdef INET6
+		if (rack->r_is_v6) {
+		    ip6->ip6_flow &= ~htonl(IPTOS_ECN_MASK << 20);
+		    ip6->ip6_flow |= htonl(ect << 20);
+		}
+		else
+#endif
+		{
+		    ip->ip_tos &= ~IPTOS_ECN_MASK;
+		    ip->ip_tos |= ect;
+		}
+	}
+	tcp_set_flags(th, flags);
 	m->m_pkthdr.len = hdrlen + len;	/* in6_cksum() need this */
 #ifdef INET6
 	if (rack->r_is_v6) {
@@ -16379,7 +16345,8 @@ rack_fast_output(struct tcpcb *tp, struct tcp_rack *rack, uint64_t ts_val,
 	u_char opt[TCP_MAXOLEN];
 	uint32_t hdrlen, optlen;
 	int cnt_thru = 1;
-	int32_t slot, segsiz, len, max_val, tso = 0, sb_offset, error, flags, ulen = 0;
+	int32_t slot, segsiz, len, max_val, tso = 0, sb_offset, error, ulen = 0;
+	uint16_t flags;
 	uint32_t s_soff;
 	uint32_t if_hw_tsomaxsegcount = 0, startseq;
 	uint32_t if_hw_tsomaxsegsize;
@@ -16528,37 +16495,23 @@ again:
 		udp->uh_ulen = htons(ulen);
 	}
 	m->m_pkthdr.rcvif = (struct ifnet *)0;
-	if (tp->t_state == TCPS_ESTABLISHED &&
+	if (TCPS_HAVERCVDSYN(tp->t_state) &&
 	    (tp->t_flags2 & TF2_ECN_PERMIT)) {
-		/*
-		 * If the peer has ECN, mark data packets with ECN capable
-		 * transmission (ECT). Ignore pure ack packets,
-		 * retransmissions.
-		 */
-		if (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max)) {
+		int ect = tcp_ecn_output_established(tp, &flags, len);
+		if ((tp->t_state == TCPS_SYN_RECEIVED) &&
+		    (tp->t_flags2 & TF2_ECN_SND_ECE))
+			tp->t_flags2 &= ~TF2_ECN_SND_ECE;
 #ifdef INET6
-			if (rack->r_is_v6) {
-				ip6->ip6_flow &= ~htonl(IPTOS_ECN_MASK << 20);
-				ip6->ip6_flow |= htonl(IPTOS_ECN_ECT0 << 20);
-			}
-			else
+		if (rack->r_is_v6) {
+			ip6->ip6_flow &= ~htonl(IPTOS_ECN_MASK << 20);
+			ip6->ip6_flow |= htonl(ect << 20);
+		}
+		else
 #endif
-			{
-				ip->ip_tos &= ~IPTOS_ECN_MASK;
-				ip->ip_tos |= IPTOS_ECN_ECT0;
-			}
-			KMOD_TCPSTAT_INC(tcps_ecn_ect0);
-			/*
-			 * Reply with proper ECN notifications.
-			 * Only set CWR on new data segments.
-			 */
-			if (tp->t_flags2 & TF2_ECN_SND_CWR) {
-				flags |= TH_CWR;
-				tp->t_flags2 &= ~TF2_ECN_SND_CWR;
-			}
+		{
+			ip->ip_tos &= ~IPTOS_ECN_MASK;
+			ip->ip_tos |= ect;
 		}
-		if (tp->t_flags2 & TF2_ECN_SND_ECE)
-			flags |= TH_ECE;
 	}
 	m->m_pkthdr.len = hdrlen + len;	/* in6_cksum() need this */
 #ifdef INET6
@@ -16786,7 +16739,8 @@ rack_output(struct tcpcb *tp)
 	struct socket *so;
 	uint32_t recwin;
 	uint32_t sb_offset, s_moff = 0;
-	int32_t len, flags, error = 0;
+	int32_t len, error = 0;
+	uint16_t flags;
 	struct mbuf *m, *s_mb = NULL;
 	struct mbuf *mb;
 	uint32_t if_hw_tsomaxsegcount = 0;
@@ -18596,51 +18550,27 @@ send:
 	 * are on a retransmit, we may resend those bits a number of times
 	 * as per RFC 3168.
 	 */
-	if (tp->t_state == TCPS_SYN_SENT && V_tcp_do_ecn == 1) {
-		if (tp->t_rxtshift >= 1) {
-			if (tp->t_rxtshift <= V_tcp_ecn_maxretries)
-				flags |= TH_ECE | TH_CWR;
-		} else
-			flags |= TH_ECE | TH_CWR;
+	if (tp->t_state == TCPS_SYN_SENT && V_tcp_do_ecn) {
+		flags |= tcp_ecn_output_syn_sent(tp);
 	}
-	/* Handle parallel SYN for ECN */
-	if ((tp->t_state == TCPS_SYN_RECEIVED) &&
-	    (tp->t_flags2 & TF2_ECN_SND_ECE)) {
-		flags |= TH_ECE;
-		tp->t_flags2 &= ~TF2_ECN_SND_ECE;
-	}
-	if (TCPS_HAVEESTABLISHED(tp->t_state) &&
+	/* Also handle parallel SYN for ECN */
+	if (TCPS_HAVERCVDSYN(tp->t_state) &&
 	    (tp->t_flags2 & TF2_ECN_PERMIT)) {
-		/*
-		 * If the peer has ECN, mark data packets with ECN capable
-		 * transmission (ECT). Ignore pure ack packets,
-		 * retransmissions.
-		 */
-		if (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max) &&
-		    (sack_rxmit == 0)) {
+		int ect = tcp_ecn_output_established(tp, &flags, len);
+		if ((tp->t_state == TCPS_SYN_RECEIVED) &&
+		    (tp->t_flags2 & TF2_ECN_SND_ECE))
+			tp->t_flags2 &= ~TF2_ECN_SND_ECE;
 #ifdef INET6
-			if (isipv6) {
-				ip6->ip6_flow &= ~htonl(IPTOS_ECN_MASK << 20);
-				ip6->ip6_flow |= htonl(IPTOS_ECN_ECT0 << 20);
-			}
-			else
+		if (isipv6) {
+			ip6->ip6_flow &= ~htonl(IPTOS_ECN_MASK << 20);
+			ip6->ip6_flow |= htonl(ect << 20);
+		}
+		else
 #endif
-			{
-				ip->ip_tos &= ~IPTOS_ECN_MASK;
-				ip->ip_tos |= IPTOS_ECN_ECT0;
-			}
-			KMOD_TCPSTAT_INC(tcps_ecn_ect0);
-			/*
-			 * Reply with proper ECN notifications.
-			 * Only set CWR on new data segments.
-			 */
-			if (tp->t_flags2 & TF2_ECN_SND_CWR) {
-				flags |= TH_CWR;
-				tp->t_flags2 &= ~TF2_ECN_SND_CWR;
-			}
+		{
+			ip->ip_tos &= ~IPTOS_ECN_MASK;
+			ip->ip_tos |= ect;
 		}
-		if (tp->t_flags2 & TF2_ECN_SND_ECE)
-			flags |= TH_ECE;
 	}
 	/*
 	 * If we are doing retransmissions, then snd_nxt will not reflect
diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c
index 5fcafa44cc97..ed4adda59c22 100644
--- a/sys/netinet/tcp_syncache.c
+++ b/sys/netinet/tcp_syncache.c
@@ -89,6 +89,7 @@ __FBSDID("$FreeBSD$");
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
 #include <netinet/tcp_syncache.h>
+#include <netinet/tcp_ecn.h>
 #ifdef INET6
 #include <netinet6/tcp6_var.h>
 #endif
@@ -1027,8 +1028,7 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
 			tp->t_flags |= TF_SACK_PERMIT;
 	}
 
-	if (sc->sc_flags & SCF_ECN)
-		tp->t_flags2 |= TF2_ECN_PERMIT;
+	tcp_ecn_syncache_socket(tp, sc);
 
 	/*
 	 * Set up MSS and get cached values from tcp_hostcache.
@@ -1743,9 +1743,9 @@ skip_alloc:
 		sc->sc_peer_mss = to->to_mss;	/* peer mss may be zero */
 	if (ltflags & TF_NOOPT)
 		sc->sc_flags |= SCF_NOOPT;
-	if (((tcp_get_flags(th) & (TH_ECE|TH_CWR)) == (TH_ECE|TH_CWR)) &&
-	    V_tcp_do_ecn)
-		sc->sc_flags |= SCF_ECN;
+	/* ECN Handshake */
+	if (V_tcp_do_ecn)
+		sc->sc_flags |= tcp_ecn_syncache_add(tcp_get_flags(th), iptos);
 
 	if (V_tcp_syncookies)
 		sc->sc_iss = syncookie_generate(sch, sc);
@@ -1938,10 +1938,7 @@ syncache_respond(struct syncache *sc, const struct mbuf *m0, int flags)
 	th->th_win = htons(sc->sc_wnd);
 	th->th_urp = 0;
 
-	if ((flags & TH_SYN) && (sc->sc_flags & SCF_ECN)) {
-		flags |= TH_ECE;
-		TCPSTAT_INC(tcps_ecn_shs);
-	}
+	flags = tcp_ecn_syncache_respond(flags, sc);
 	tcp_set_flags(th, flags);
 
 	/* Tack on the TCP options. */