git: 7994ef3c394d - main - Revert "tcp: move ECN handling code to a common file"

From: Richard Scheffenegger <rscheff_at_FreeBSD.org>
Date: Sat, 05 Feb 2022 02:28:49 UTC
The branch main has been updated by rscheff:

URL: https://cgit.FreeBSD.org/src/commit/?id=7994ef3c394d16e37af7a4848e58d01c28b81fbc

commit 7994ef3c394d16e37af7a4848e58d01c28b81fbc
Author:     Richard Scheffenegger <rscheff@FreeBSD.org>
AuthorDate: 2022-02-05 00:07:51 +0000
Commit:     Richard Scheffenegger <rscheff@FreeBSD.org>
CommitDate: 2022-02-05 00:07:51 +0000

    Revert "tcp: move ECN handling code to a common file"
    
    This reverts commit 0c424c90eaa6602e07bca7836b1d178b91f2a88a.
---
 sys/conf/files                |   1 -
 sys/netinet/tcp_ecn.c         | 296 ------------------------------------------
 sys/netinet/tcp_ecn.h         |  55 --------
 sys/netinet/tcp_input.c       |  46 ++++++-
 sys/netinet/tcp_output.c      |  63 ++++++---
 sys/netinet/tcp_stacks/rack.c | 192 ++++++++++++++++++---------
 sys/netinet/tcp_syncache.c    |  15 ++-
 7 files changed, 224 insertions(+), 444 deletions(-)

diff --git a/sys/conf/files b/sys/conf/files
index 148bd9f4f7b4..78921d2c9fa0 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -4364,7 +4364,6 @@ netinet/sctp_usrreq.c		optional inet sctp | inet6 sctp
 netinet/sctputil.c		optional inet sctp | inet6 sctp
 netinet/siftr.c			optional inet siftr alq | inet6 siftr alq
 netinet/tcp_debug.c		optional tcpdebug
-netinet/tcp_ecn.c		optional inet | inet6
 netinet/tcp_fastopen.c		optional inet tcp_rfc7413 | inet6 tcp_rfc7413
 netinet/tcp_hostcache.c		optional inet | inet6
 netinet/tcp_input.c		optional inet | inet6
diff --git a/sys/netinet/tcp_ecn.c b/sys/netinet/tcp_ecn.c
deleted file mode 100644
index cf29431ea5d2..000000000000
--- a/sys/netinet/tcp_ecn.c
+++ /dev/null
@@ -1,296 +0,0 @@
-/*-
- * SPDX-License-Identifier: BSD-3-Clause
- *
- * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995
- *      The Regents of the University of California.  All rights reserved.
- * Copyright (c) 2007-2008,2010
- *      Swinburne University of Technology, Melbourne, Australia.
- * Copyright (c) 2009-2010 Lawrence Stewart <lstewart@freebsd.org>
- * Copyright (c) 2010 The FreeBSD Foundation
- * Copyright (c) 2010-2011 Juniper Networks, Inc.
- * Copyright (c) 2019 Richard Scheffenegger <srichard@netapp.com>
- * All rights reserved.
- *
- * Portions of this software were developed at the Centre for Advanced Internet
- * Architectures, Swinburne University of Technology, by Lawrence Stewart,
- * James Healy and David Hayes, made possible in part by a grant from the Cisco
- * University Research Program Fund at Community Foundation Silicon Valley.
- *
- * Portions of this software were developed at the Centre for Advanced
- * Internet Architectures, Swinburne University of Technology, Melbourne,
- * Australia by David Hayes under sponsorship from the FreeBSD Foundation.
- *
- * Portions of this software were developed by Robert N. M. Watson under
- * contract to Juniper Networks, Inc.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- *      @(#)tcp_ecn.c 8.12 (Berkeley) 5/24/95
- */
-
-/*
- * Utility functions to deal with Explicit Congestion Notification in TCP
- * implementing the essential parts of the Accurate ECN extension
- * https://tools.ietf.org/html/draft-ietf-tcpm-accurate-ecn-09
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include "opt_inet.h"
-#include "opt_inet6.h"
-#include "opt_tcpdebug.h"
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/kernel.h>
-#include <sys/sysctl.h>
-#include <sys/malloc.h>
-#include <sys/mbuf.h>
-#include <sys/socket.h>
-#include <sys/socketvar.h>
-
-#include <machine/cpu.h>
-
-#include <vm/uma.h>
-
-#include <net/if.h>
-#include <net/if_var.h>
-#include <net/route.h>
-#include <net/vnet.h>
-
-#include <netinet/in.h>
-#include <netinet/in_systm.h>
-#include <netinet/ip.h>
-#include <netinet/in_var.h>
-#include <netinet/in_pcb.h>
-#include <netinet/ip_var.h>
-#include <netinet/ip6.h>
-#include <netinet/icmp6.h>
-#include <netinet6/nd6.h>
-#include <netinet6/ip6_var.h>
-#include <netinet6/in6_pcb.h>
-#include <netinet/tcp.h>
-#include <netinet/tcp_fsm.h>
-#include <netinet/tcp_seq.h>
-#include <netinet/tcp_timer.h>
-#include <netinet/tcp_var.h>
-#include <netinet6/tcp6_var.h>
-#include <netinet/tcpip.h>
-#include <netinet/tcp_ecn.h>
-
-
-/*
- * Process incoming SYN,ACK packet
- */
-void
-tcp_ecn_input_syn_sent(struct tcpcb *tp, uint16_t thflags, int iptos)
-{
-	thflags &= (TH_CWR|TH_ECE);
-
-	if (((thflags & (TH_CWR | TH_ECE)) == TH_ECE) &&
-	    V_tcp_do_ecn) {
-		tp->t_flags2 |= TF2_ECN_PERMIT;
-		KMOD_TCPSTAT_INC(tcps_ecn_shs);
-	}
-}
-
-/*
- * Handle parallel SYN for ECN
- */
-void
-tcp_ecn_input_parallel_syn(struct tcpcb *tp, uint16_t thflags, int iptos)
-{
-	if (thflags & TH_ACK)
-		return;
-	if (V_tcp_do_ecn == 0)
-		return;
-	if ((V_tcp_do_ecn == 1) || (V_tcp_do_ecn == 2)) {
-		/* RFC3168 ECN handling */
-		if ((thflags & (TH_CWR | TH_ECE)) == (TH_CWR | TH_ECE)) {
-			tp->t_flags2 |= TF2_ECN_PERMIT;
-			tp->t_flags2 |= TF2_ECN_SND_ECE;
-			KMOD_TCPSTAT_INC(tcps_ecn_shs);
-		}
-	}
-}
-
-/*
- * TCP ECN processing.
- */
-int
-tcp_ecn_input_segment(struct tcpcb *tp, uint16_t thflags, int iptos)
-{
-	int delta_ace = 0;
-
-	if (tp->t_flags2 & TF2_ECN_PERMIT) {
-		switch (iptos & IPTOS_ECN_MASK) {
-		case IPTOS_ECN_CE:
-			KMOD_TCPSTAT_INC(tcps_ecn_ce);
-			break;
-		case IPTOS_ECN_ECT0:
-			KMOD_TCPSTAT_INC(tcps_ecn_ect0);
-			break;
-		case IPTOS_ECN_ECT1:
-			KMOD_TCPSTAT_INC(tcps_ecn_ect1);
-			break;
-		}
-
-		/* RFC3168 ECN handling */
-		if (thflags & TH_ECE)
-			delta_ace = 1;
-		if (thflags & TH_CWR) {
-			tp->t_flags2 &= ~TF2_ECN_SND_ECE;
-			tp->t_flags |= TF_ACKNOW;
-		}
-		if ((iptos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
-			tp->t_flags2 |= TF2_ECN_SND_ECE;
-
-		/* Process a packet differently from RFC3168. */
-		cc_ecnpkt_handler_flags(tp, thflags, iptos);
-	}
-
-	return delta_ace;
-}
-
-/*
- * Send ECN setup <SYN> packet header flags
- */
-uint16_t
-tcp_ecn_output_syn_sent(struct tcpcb *tp)
-{
-	uint16_t thflags = 0;
-
-	if (V_tcp_do_ecn == 1) {
-		/* Send a RFC3168 ECN setup <SYN> packet */
-		if (tp->t_rxtshift >= 1) {
-			if (tp->t_rxtshift <= V_tcp_ecn_maxretries)
-				thflags = TH_ECE|TH_CWR;
-		} else
-			thflags = TH_ECE|TH_CWR;
-	}
-
-	return thflags;
-}
-
-/*
- * output processing of ECN feature
- * returning IP ECN header codepoint
- */
-int
-tcp_ecn_output_established(struct tcpcb *tp, uint16_t *thflags, int len)
-{
-	int ipecn = IPTOS_ECN_NOTECT;
-	bool newdata;
-
-	/*
-	 * If the peer has ECN, mark data packets with
-	 * ECN capable transmission (ECT).
-	 * Ignore pure control packets, retransmissions
-	 * and window probes.
-	 */
-	newdata = (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max) &&
-		    !((tp->t_flags & TF_FORCEDATA) && len == 1));
-	if (newdata) {
-		ipecn = IPTOS_ECN_ECT0;
-		KMOD_TCPSTAT_INC(tcps_ecn_ect0);
-	}
-	/*
-	 * Reply with proper ECN notifications.
-	 */
-	if (newdata &&
-	    (tp->t_flags2 & TF2_ECN_SND_CWR)) {
-		*thflags |= TH_CWR;
-		tp->t_flags2 &= ~TF2_ECN_SND_CWR;
-	}
-	if (tp->t_flags2 & TF2_ECN_SND_ECE)
-		*thflags |= TH_ECE;
-
-	return ipecn;
-}
-
-/*
- * Set up the ECN related tcpcb fields from
- * a syncache entry
- */
-void
-tcp_ecn_syncache_socket(struct tcpcb *tp, struct syncache *sc)
-{
-	if (sc->sc_flags & SCF_ECN) {
-		switch (sc->sc_flags & SCF_ECN) {
-		case SCF_ECN:
-			tp->t_flags2 |= TF2_ECN_PERMIT;
-			break;
-		/* undefined SCF codepoint */
-		default:
-			break;
-		}
-	}
-}
-
-/*
- * Process a <SYN> packets ECN information, and provide the
- * syncache with the relevant information.
- */
-int
-tcp_ecn_syncache_add(uint16_t thflags, int iptos)
-{
-	int scflags = 0;
-
-	switch (thflags & (TH_CWR|TH_ECE)) {
-	/* no ECN */
-	case (0|0):
-		break;
-	/* legacy ECN */
-	case (TH_CWR|TH_ECE):
-		scflags = SCF_ECN;
-		break;
-	default:
-		break;
-	}
-	return scflags;
-}
-
-/*
- * Set up the ECN information for the <SYN,ACK> from
- * syncache information.
- */
-uint16_t
-tcp_ecn_syncache_respond(uint16_t thflags, struct syncache *sc)
-{
-	if ((thflags & TH_SYN) &&
-	    (sc->sc_flags & SCF_ECN)) {
-		switch (sc->sc_flags & SCF_ECN) {
-		case SCF_ECN:
-			thflags |= (0 | TH_ECE);
-			KMOD_TCPSTAT_INC(tcps_ecn_shs);
-			break;
-		/* undefined SCF codepoint */
-		default:
-			break;
-		}
-	}
-	return thflags;
-}
diff --git a/sys/netinet/tcp_ecn.h b/sys/netinet/tcp_ecn.h
deleted file mode 100644
index 5ee49ce53a7a..000000000000
--- a/sys/netinet/tcp_ecn.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*-
- * SPDX-License-Identifier: BSD-3-Clause
- *
- * Copyright (c) 1982, 1986, 1993, 1994, 1995
- *	The Regents of the University of California.  All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- *	@(#)tcp_ecn.h	8.4 (Berkeley) 5/24/95
- * $FreeBSD$
- */
-
-#ifndef _NETINET_TCP_ECN_H_
-#define _NETINET_TCP_ECN_H_
-
-#include <netinet/tcp.h>
-#include <netinet/tcp_var.h>
-#include <netinet/tcp_syncache.h>
-
-#ifdef _KERNEL
-
-void	 tcp_ecn_input_syn_sent(struct tcpcb *, uint16_t, int);
-void	 tcp_ecn_input_parallel_syn(struct tcpcb *, uint16_t, int);
-int	 tcp_ecn_input_segment(struct tcpcb *, uint16_t, int);
-uint16_t tcp_ecn_output_syn_sent(struct tcpcb *);
-int	 tcp_ecn_output_established(struct tcpcb *, uint16_t *, int);
-void	 tcp_ecn_syncache_socket(struct tcpcb *, struct syncache *);
-int	 tcp_ecn_syncache_add(uint16_t, int);
-uint16_t tcp_ecn_syncache_respond(uint16_t, struct syncache *);
-
-#endif /* _KERNEL */
-
-#endif /* _NETINET_TCP_ECN_H_ */
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
index d0b323723e6b..9a1f3ace2541 100644
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -104,7 +104,6 @@ __FBSDID("$FreeBSD$");
 #include <netinet6/ip6_var.h>
 #include <netinet6/nd6.h>
 #include <netinet/tcp.h>
-#include <netinet/tcp_ecn.h>
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_log_buf.h>
 #include <netinet/tcp_seq.h>
@@ -1518,8 +1517,7 @@ void
 tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
     struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos)
 {
-	uint16_t thflags;
-	int acked, ourfinisacked, needoutput = 0, sack_changed;
+	int thflags, acked, ourfinisacked, needoutput = 0, sack_changed;
 	int rstreason, todrop, win, incforsyn = 0;
 	uint32_t tiwin;
 	uint16_t nsegs;
@@ -1599,8 +1597,32 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
 	/*
 	 * TCP ECN processing.
 	 */
-	if (tcp_ecn_input_segment(tp, thflags, iptos))
-		cc_cong_signal(tp, th, CC_ECN);
+	if (tp->t_flags2 & TF2_ECN_PERMIT) {
+		if (thflags & TH_CWR) {
+			tp->t_flags2 &= ~TF2_ECN_SND_ECE;
+			tp->t_flags |= TF_ACKNOW;
+		}
+		switch (iptos & IPTOS_ECN_MASK) {
+		case IPTOS_ECN_CE:
+			tp->t_flags2 |= TF2_ECN_SND_ECE;
+			TCPSTAT_INC(tcps_ecn_ce);
+			break;
+		case IPTOS_ECN_ECT0:
+			TCPSTAT_INC(tcps_ecn_ect0);
+			break;
+		case IPTOS_ECN_ECT1:
+			TCPSTAT_INC(tcps_ecn_ect1);
+			break;
+		}
+
+		/* Process a packet differently from RFC3168. */
+		cc_ecnpkt_handler(tp, th, iptos);
+
+		/* Congestion experienced. */
+		if (thflags & TH_ECE) {
+			cc_cong_signal(tp, th, CC_ECN);
+		}
+	}
 
 	/*
 	 * Parse options on any incoming segment.
@@ -1641,7 +1663,13 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
 	 */
 	if (tp->t_state == TCPS_SYN_SENT && (thflags & TH_SYN)) {
 		/* Handle parallel SYN for ECN */
-		tcp_ecn_input_parallel_syn(tp, thflags, iptos);
+		if (!(thflags & TH_ACK) &&
+		    ((thflags & (TH_CWR | TH_ECE)) == (TH_CWR | TH_ECE)) &&
+		    ((V_tcp_do_ecn == 1) || (V_tcp_do_ecn == 2))) {
+			tp->t_flags2 |= TF2_ECN_PERMIT;
+			tp->t_flags2 |= TF2_ECN_SND_ECE;
+			TCPSTAT_INC(tcps_ecn_shs);
+		}
 		if ((to.to_flags & TOF_SCALE) &&
 		    (tp->t_flags & TF_REQ_SCALE) &&
 		    !(tp->t_flags & TF_NOOPT)) {
@@ -2047,7 +2075,11 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
 			else
 				tp->t_flags |= TF_ACKNOW;
 
-			tcp_ecn_input_syn_sent(tp, thflags, iptos);
+			if (((thflags & (TH_CWR | TH_ECE)) == TH_ECE) &&
+			    (V_tcp_do_ecn == 1)) {
+				tp->t_flags2 |= TF2_ECN_PERMIT;
+				TCPSTAT_INC(tcps_ecn_shs);
+			}
 
 			/*
 			 * Received <SYN,ACK> in SYN_SENT[*] state.
diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c
index df9ce167b7d5..ce6d9b86e73f 100644
--- a/sys/netinet/tcp_output.c
+++ b/sys/netinet/tcp_output.c
@@ -98,7 +98,6 @@ __FBSDID("$FreeBSD$");
 #ifdef TCP_OFFLOAD
 #include <netinet/tcp_offload.h>
 #endif
-#include <netinet/tcp_ecn.h>
 
 #include <netipsec/ipsec_support.h>
 
@@ -200,8 +199,7 @@ tcp_default_output(struct tcpcb *tp)
 	struct socket *so = tp->t_inpcb->inp_socket;
 	int32_t len;
 	uint32_t recwin, sendwin;
-	uint16_t flags;
-	int off, error = 0;	/* Keep compiler happy */
+	int off, flags, error = 0;	/* Keep compiler happy */
 	u_int if_hw_tsomaxsegcount = 0;
 	u_int if_hw_tsomaxsegsize = 0;
 	struct mbuf *m;
@@ -1199,27 +1197,54 @@ send:
 	 * resend those bits a number of times as per
 	 * RFC 3168.
 	 */
-	if (tp->t_state == TCPS_SYN_SENT && V_tcp_do_ecn) {
-		flags |= tcp_ecn_output_syn_sent(tp);
+	if (tp->t_state == TCPS_SYN_SENT && V_tcp_do_ecn == 1) {
+		if (tp->t_rxtshift >= 1) {
+			if (tp->t_rxtshift <= V_tcp_ecn_maxretries)
+				flags |= TH_ECE|TH_CWR;
+		} else
+			flags |= TH_ECE|TH_CWR;
 	}
-	/* Also handle parallel SYN for ECN */
-	if ((TCPS_HAVERCVDSYN(tp->t_state)) &&
-	    (tp->t_flags2 & TF2_ECN_PERMIT)) {
-		int ect = tcp_ecn_output_established(tp, &flags, len);
-		if ((tp->t_state == TCPS_SYN_RECEIVED) &&
-		    (tp->t_flags2 & TF2_ECN_SND_ECE))
+	/* Handle parallel SYN for ECN */
+	if ((tp->t_state == TCPS_SYN_RECEIVED) &&
+	    (tp->t_flags2 & TF2_ECN_SND_ECE)) {
+			flags |= TH_ECE;
 			tp->t_flags2 &= ~TF2_ECN_SND_ECE;
+	}
+
+	if (TCPS_HAVEESTABLISHED(tp->t_state) &&
+	    (tp->t_flags2 & TF2_ECN_PERMIT)) {
+		/*
+		 * If the peer has ECN, mark data packets with
+		 * ECN capable transmission (ECT).
+		 * Ignore pure ack packets, retransmissions and window probes.
+		 */
+		if (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max) &&
+		    (sack_rxmit == 0) &&
+		    !((tp->t_flags & TF_FORCEDATA) && len == 1 &&
+		    SEQ_LT(tp->snd_una, tp->snd_max))) {
 #ifdef INET6
-		if (isipv6) {
-			ip6->ip6_flow &= ~htonl(IPTOS_ECN_MASK << 20);
-			ip6->ip6_flow |= htonl(ect << 20);
-		}
-		else
+			if (isipv6) {
+				ip6->ip6_flow &= ~htonl(IPTOS_ECN_MASK << 20);
+				ip6->ip6_flow |= htonl(IPTOS_ECN_ECT0 << 20);
+			}
+			else
 #endif
-		{
-			ip->ip_tos &= ~IPTOS_ECN_MASK;
-			ip->ip_tos |= ect;
+			{
+				ip->ip_tos &= ~IPTOS_ECN_MASK;
+				ip->ip_tos |= IPTOS_ECN_ECT0;
+			}
+			TCPSTAT_INC(tcps_ecn_ect0);
+			/*
+			 * Reply with proper ECN notifications.
+			 * Only set CWR on new data segments.
+			 */
+			if (tp->t_flags2 & TF2_ECN_SND_CWR) {
+				flags |= TH_CWR;
+				tp->t_flags2 &= ~TF2_ECN_SND_CWR;
+			}
 		}
+		if (tp->t_flags2 & TF2_ECN_SND_ECE)
+			flags |= TH_ECE;
 	}
 
 	/*
diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c
index 7bc37a9552a7..6d5b3f2133a6 100644
--- a/sys/netinet/tcp_stacks/rack.c
+++ b/sys/netinet/tcp_stacks/rack.c
@@ -113,7 +113,6 @@ __FBSDID("$FreeBSD$");
 #ifdef INET6
 #include <netinet6/tcp6_var.h>
 #endif
-#include <netinet/tcp_ecn.h>
 
 #include <netipsec/ipsec_support.h>
 
@@ -11407,9 +11406,11 @@ rack_do_syn_sent(struct mbuf *m, struct tcphdr *th, struct socket *so,
 			tp->t_flags |= TF_ACKNOW;
 			rack->rc_dack_toggle = 0;
 		}
-
-		tcp_ecn_input_syn_sent(tp, thflags, iptos);
-
+		if (((thflags & (TH_CWR | TH_ECE)) == TH_ECE) &&
+		    (V_tcp_do_ecn == 1)) {
+			tp->t_flags2 |= TF2_ECN_PERMIT;
+			KMOD_TCPSTAT_INC(tcps_ecn_shs);
+		}
 		if (SEQ_GT(th->th_ack, tp->snd_una)) {
 			/*
 			 * We advance snd_una for the
@@ -13682,8 +13683,31 @@ rack_do_compressed_ack_processing(struct tcpcb *tp, struct socket *so, struct mb
 		}
 		tp->t_rcvtime = ticks;
 		/* Now what about ECN? */
-		if (tcp_ecn_input_segment(tp, ae->flags, ae->codepoint))
-			rack_cong_signal(tp, CC_ECN, ae->ack);
+		if (tp->t_flags2 & TF2_ECN_PERMIT) {
+			if (ae->flags & TH_CWR) {
+				tp->t_flags2 &= ~TF2_ECN_SND_ECE;
+				tp->t_flags |= TF_ACKNOW;
+			}
+			switch (ae->codepoint & IPTOS_ECN_MASK) {
+			case IPTOS_ECN_CE:
+				tp->t_flags2 |= TF2_ECN_SND_ECE;
+				KMOD_TCPSTAT_INC(tcps_ecn_ce);
+				break;
+			case IPTOS_ECN_ECT0:
+				KMOD_TCPSTAT_INC(tcps_ecn_ect0);
+				break;
+			case IPTOS_ECN_ECT1:
+				KMOD_TCPSTAT_INC(tcps_ecn_ect1);
+				break;
+			}
+
+			/* Process a packet differently from RFC3168. */
+			cc_ecnpkt_handler_flags(tp, ae->flags, ae->codepoint);
+			/* Congestion experienced. */
+			if (ae->flags & TH_ECE) {
+				rack_cong_signal(tp,  CC_ECN, ae->ack);
+			}
+		}
 #ifdef TCP_ACCOUNTING
 		/* Count for the specific type of ack in */
 		counter_u64_add(tcp_cnt_counters[ae->ack_val_set], 1);
@@ -14433,8 +14457,32 @@ rack_do_segment_nounlock(struct mbuf *m, struct tcphdr *th, struct socket *so,
 	 * TCP ECN processing. XXXJTL: If we ever use ECN, we need to move
 	 * this to occur after we've validated the segment.
 	 */
-	if (tcp_ecn_input_segment(tp, thflags, iptos))
-		rack_cong_signal(tp, CC_ECN, th->th_ack);
+	if (tp->t_flags2 & TF2_ECN_PERMIT) {
+		if (thflags & TH_CWR) {
+			tp->t_flags2 &= ~TF2_ECN_SND_ECE;
+			tp->t_flags |= TF_ACKNOW;
+		}
+		switch (iptos & IPTOS_ECN_MASK) {
+		case IPTOS_ECN_CE:
+			tp->t_flags2 |= TF2_ECN_SND_ECE;
+			KMOD_TCPSTAT_INC(tcps_ecn_ce);
+			break;
+		case IPTOS_ECN_ECT0:
+			KMOD_TCPSTAT_INC(tcps_ecn_ect0);
+			break;
+		case IPTOS_ECN_ECT1:
+			KMOD_TCPSTAT_INC(tcps_ecn_ect1);
+			break;
+		}
+
+		/* Process a packet differently from RFC3168. */
+		cc_ecnpkt_handler(tp, th, iptos);
+
+		/* Congestion experienced. */
+		if (thflags & TH_ECE) {
+			rack_cong_signal(tp, CC_ECN, th->th_ack);
+		}
+	}
 
 	/*
 	 * If echoed timestamp is later than the current time, fall back to
@@ -14468,7 +14516,13 @@ rack_do_segment_nounlock(struct mbuf *m, struct tcphdr *th, struct socket *so,
 		 */
 		if (tp->t_state == TCPS_SYN_SENT && (thflags & TH_SYN)) {
 			/* Handle parallel SYN for ECN */
-			tcp_ecn_input_parallel_syn(tp, thflags, iptos);
+			if (!(thflags & TH_ACK) &&
+			    ((thflags & (TH_CWR | TH_ECE)) == (TH_CWR | TH_ECE)) &&
+			    ((V_tcp_do_ecn == 1) || (V_tcp_do_ecn == 2))) {
+				tp->t_flags2 |= TF2_ECN_PERMIT;
+				tp->t_flags2 |= TF2_ECN_SND_ECE;
+				TCPSTAT_INC(tcps_ecn_shs);
+			}
 			if ((to.to_flags & TOF_SCALE) &&
 			    (tp->t_flags & TF_REQ_SCALE)) {
 				tp->t_flags |= TF_RCVD_SCALE;
@@ -16002,24 +16056,6 @@ rack_fast_rsm_output(struct tcpcb *tp, struct tcp_rack *rack, struct rack_sendma
 		udp->uh_ulen = htons(ulen);
 	}
 	m->m_pkthdr.rcvif = (struct ifnet *)0;
-	if (TCPS_HAVERCVDSYN(tp->t_state) &&
-	    (tp->t_flags2 & TF2_ECN_PERMIT)) {
-		int ect = tcp_ecn_output_established(tp, &flags, len);
-		if ((tp->t_state == TCPS_SYN_RECEIVED) &&
-		    (tp->t_flags2 & TF2_ECN_SND_ECE))
-		    tp->t_flags2 &= ~TF2_ECN_SND_ECE;
-#ifdef INET6
-		if (rack->r_is_v6) {
-		    ip6->ip6_flow &= ~htonl(IPTOS_ECN_MASK << 20);
-		    ip6->ip6_flow |= htonl(ect << 20);
-		}
-		else
-#endif
-		{
-		    ip->ip_tos &= ~IPTOS_ECN_MASK;
-		    ip->ip_tos |= ect;
-		}
-	}
 	m->m_pkthdr.len = hdrlen + len;	/* in6_cksum() need this */
 #ifdef INET6
 	if (rack->r_is_v6) {
@@ -16343,8 +16379,7 @@ rack_fast_output(struct tcpcb *tp, struct tcp_rack *rack, uint64_t ts_val,
 	u_char opt[TCP_MAXOLEN];
 	uint32_t hdrlen, optlen;
 	int cnt_thru = 1;
-	int32_t slot, segsiz, len, max_val, tso = 0, sb_offset, error, ulen = 0;
-	uint16_t flags;
+	int32_t slot, segsiz, len, max_val, tso = 0, sb_offset, error, flags, ulen = 0;
 	uint32_t s_soff;
 	uint32_t if_hw_tsomaxsegcount = 0, startseq;
 	uint32_t if_hw_tsomaxsegsize;
@@ -16493,23 +16528,37 @@ again:
 		udp->uh_ulen = htons(ulen);
 	}
 	m->m_pkthdr.rcvif = (struct ifnet *)0;
-	if (TCPS_HAVERCVDSYN(tp->t_state) &&
+	if (tp->t_state == TCPS_ESTABLISHED &&
 	    (tp->t_flags2 & TF2_ECN_PERMIT)) {
-		int ect = tcp_ecn_output_established(tp, &flags, len);
-		if ((tp->t_state == TCPS_SYN_RECEIVED) &&
-		    (tp->t_flags2 & TF2_ECN_SND_ECE))
-			tp->t_flags2 &= ~TF2_ECN_SND_ECE;
+		/*
+		 * If the peer has ECN, mark data packets with ECN capable
+		 * transmission (ECT). Ignore pure ack packets,
+		 * retransmissions.
+		 */
+		if (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max)) {
 #ifdef INET6
-		if (rack->r_is_v6) {
-			ip6->ip6_flow &= ~htonl(IPTOS_ECN_MASK << 20);
-			ip6->ip6_flow |= htonl(ect << 20);
-		}
-		else
+			if (rack->r_is_v6) {
+				ip6->ip6_flow &= ~htonl(IPTOS_ECN_MASK << 20);
+				ip6->ip6_flow |= htonl(IPTOS_ECN_ECT0 << 20);
+			}
+			else
 #endif
-		{
-			ip->ip_tos &= ~IPTOS_ECN_MASK;
-			ip->ip_tos |= ect;
+			{
+				ip->ip_tos &= ~IPTOS_ECN_MASK;
+				ip->ip_tos |= IPTOS_ECN_ECT0;
+			}
+			KMOD_TCPSTAT_INC(tcps_ecn_ect0);
+			/*
+			 * Reply with proper ECN notifications.
+			 * Only set CWR on new data segments.
+			 */
+			if (tp->t_flags2 & TF2_ECN_SND_CWR) {
+				flags |= TH_CWR;
+				tp->t_flags2 &= ~TF2_ECN_SND_CWR;
+			}
 		}
+		if (tp->t_flags2 & TF2_ECN_SND_ECE)
+			flags |= TH_ECE;
 	}
 	m->m_pkthdr.len = hdrlen + len;	/* in6_cksum() need this */
 #ifdef INET6
@@ -16737,8 +16786,7 @@ rack_output(struct tcpcb *tp)
 	struct socket *so;
 	uint32_t recwin;
 	uint32_t sb_offset, s_moff = 0;
-	int32_t len, error = 0;
-	uint16_t flags;
+	int32_t len, flags, error = 0;
 	struct mbuf *m, *s_mb = NULL;
 	struct mbuf *mb;
 	uint32_t if_hw_tsomaxsegcount = 0;
@@ -18548,27 +18596,51 @@ send:
 	 * are on a retransmit, we may resend those bits a number of times
 	 * as per RFC 3168.
 	 */
-	if (tp->t_state == TCPS_SYN_SENT && V_tcp_do_ecn) {
-		flags |= tcp_ecn_output_syn_sent(tp);
+	if (tp->t_state == TCPS_SYN_SENT && V_tcp_do_ecn == 1) {
+		if (tp->t_rxtshift >= 1) {
+			if (tp->t_rxtshift <= V_tcp_ecn_maxretries)
+				flags |= TH_ECE | TH_CWR;
+		} else
+			flags |= TH_ECE | TH_CWR;
 	}
-	/* Also handle parallel SYN for ECN */
-	if (TCPS_HAVERCVDSYN(tp->t_state) &&
+	/* Handle parallel SYN for ECN */
+	if ((tp->t_state == TCPS_SYN_RECEIVED) &&
+	    (tp->t_flags2 & TF2_ECN_SND_ECE)) {
+		flags |= TH_ECE;
+		tp->t_flags2 &= ~TF2_ECN_SND_ECE;
+	}
+	if (TCPS_HAVEESTABLISHED(tp->t_state) &&
 	    (tp->t_flags2 & TF2_ECN_PERMIT)) {
-		int ect = tcp_ecn_output_established(tp, &flags, len);
-		if ((tp->t_state == TCPS_SYN_RECEIVED) &&
-		    (tp->t_flags2 & TF2_ECN_SND_ECE))
-			tp->t_flags2 &= ~TF2_ECN_SND_ECE;
+		/*
+		 * If the peer has ECN, mark data packets with ECN capable
+		 * transmission (ECT). Ignore pure ack packets,
+		 * retransmissions.
+		 */
+		if (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max) &&
+		    (sack_rxmit == 0)) {
 #ifdef INET6
-		if (isipv6) {
-			ip6->ip6_flow &= ~htonl(IPTOS_ECN_MASK << 20);
-			ip6->ip6_flow |= htonl(ect << 20);
-		}
-		else
+			if (isipv6) {
+				ip6->ip6_flow &= ~htonl(IPTOS_ECN_MASK << 20);
+				ip6->ip6_flow |= htonl(IPTOS_ECN_ECT0 << 20);
+			}
+			else
 #endif
-		{
-			ip->ip_tos &= ~IPTOS_ECN_MASK;
-			ip->ip_tos |= ect;
+			{
+				ip->ip_tos &= ~IPTOS_ECN_MASK;
+				ip->ip_tos |= IPTOS_ECN_ECT0;
+			}
+			KMOD_TCPSTAT_INC(tcps_ecn_ect0);
+			/*
+			 * Reply with proper ECN notifications.
+			 * Only set CWR on new data segments.
+			 */
+			if (tp->t_flags2 & TF2_ECN_SND_CWR) {
+				flags |= TH_CWR;
+				tp->t_flags2 &= ~TF2_ECN_SND_CWR;
+			}
 		}
+		if (tp->t_flags2 & TF2_ECN_SND_ECE)
+			flags |= TH_ECE;
 	}
 	/*
 	 * If we are doing retransmissions, then snd_nxt will not reflect
diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c
index ed4adda59c22..5fcafa44cc97 100644
--- a/sys/netinet/tcp_syncache.c
+++ b/sys/netinet/tcp_syncache.c
@@ -89,7 +89,6 @@ __FBSDID("$FreeBSD$");
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
 #include <netinet/tcp_syncache.h>
-#include <netinet/tcp_ecn.h>
 #ifdef INET6
 #include <netinet6/tcp6_var.h>
 #endif
@@ -1028,7 +1027,8 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
 			tp->t_flags |= TF_SACK_PERMIT;
 	}
 
-	tcp_ecn_syncache_socket(tp, sc);
+	if (sc->sc_flags & SCF_ECN)
+		tp->t_flags2 |= TF2_ECN_PERMIT;
 
 	/*
 	 * Set up MSS and get cached values from tcp_hostcache.
@@ -1743,9 +1743,9 @@ skip_alloc:
 		sc->sc_peer_mss = to->to_mss;	/* peer mss may be zero */
 	if (ltflags & TF_NOOPT)
 		sc->sc_flags |= SCF_NOOPT;
-	/* ECN Handshake */
-	if (V_tcp_do_ecn)
-		sc->sc_flags |= tcp_ecn_syncache_add(tcp_get_flags(th), iptos);
+	if (((tcp_get_flags(th) & (TH_ECE|TH_CWR)) == (TH_ECE|TH_CWR)) &&
+	    V_tcp_do_ecn)
+		sc->sc_flags |= SCF_ECN;
 
 	if (V_tcp_syncookies)
 		sc->sc_iss = syncookie_generate(sch, sc);
@@ -1938,7 +1938,10 @@ syncache_respond(struct syncache *sc, const struct mbuf *m0, int flags)
 	th->th_win = htons(sc->sc_wnd);
 	th->th_urp = 0;
 
-	flags = tcp_ecn_syncache_respond(flags, sc);
+	if ((flags & TH_SYN) && (sc->sc_flags & SCF_ECN)) {
+		flags |= TH_ECE;
+		TCPSTAT_INC(tcps_ecn_shs);
+	}
 	tcp_set_flags(th, flags);
 
 	/* Tack on the TCP options. */