git: 7994ef3c394d - main - Revert "tcp: move ECN handling code to a common file"
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Sat, 05 Feb 2022 02:28:49 UTC
The branch main has been updated by rscheff:
URL: https://cgit.FreeBSD.org/src/commit/?id=7994ef3c394d16e37af7a4848e58d01c28b81fbc
commit 7994ef3c394d16e37af7a4848e58d01c28b81fbc
Author: Richard Scheffenegger <rscheff@FreeBSD.org>
AuthorDate: 2022-02-05 00:07:51 +0000
Commit: Richard Scheffenegger <rscheff@FreeBSD.org>
CommitDate: 2022-02-05 00:07:51 +0000
Revert "tcp: move ECN handling code to a common file"
This reverts commit 0c424c90eaa6602e07bca7836b1d178b91f2a88a.
---
sys/conf/files | 1 -
sys/netinet/tcp_ecn.c | 296 ------------------------------------------
sys/netinet/tcp_ecn.h | 55 --------
sys/netinet/tcp_input.c | 46 ++++++-
sys/netinet/tcp_output.c | 63 ++++++---
sys/netinet/tcp_stacks/rack.c | 192 ++++++++++++++++++---------
sys/netinet/tcp_syncache.c | 15 ++-
7 files changed, 224 insertions(+), 444 deletions(-)
diff --git a/sys/conf/files b/sys/conf/files
index 148bd9f4f7b4..78921d2c9fa0 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -4364,7 +4364,6 @@ netinet/sctp_usrreq.c optional inet sctp | inet6 sctp
netinet/sctputil.c optional inet sctp | inet6 sctp
netinet/siftr.c optional inet siftr alq | inet6 siftr alq
netinet/tcp_debug.c optional tcpdebug
-netinet/tcp_ecn.c optional inet | inet6
netinet/tcp_fastopen.c optional inet tcp_rfc7413 | inet6 tcp_rfc7413
netinet/tcp_hostcache.c optional inet | inet6
netinet/tcp_input.c optional inet | inet6
diff --git a/sys/netinet/tcp_ecn.c b/sys/netinet/tcp_ecn.c
deleted file mode 100644
index cf29431ea5d2..000000000000
--- a/sys/netinet/tcp_ecn.c
+++ /dev/null
@@ -1,296 +0,0 @@
-/*-
- * SPDX-License-Identifier: BSD-3-Clause
- *
- * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995
- * The Regents of the University of California. All rights reserved.
- * Copyright (c) 2007-2008,2010
- * Swinburne University of Technology, Melbourne, Australia.
- * Copyright (c) 2009-2010 Lawrence Stewart <lstewart@freebsd.org>
- * Copyright (c) 2010 The FreeBSD Foundation
- * Copyright (c) 2010-2011 Juniper Networks, Inc.
- * Copyright (c) 2019 Richard Scheffenegger <srichard@netapp.com>
- * All rights reserved.
- *
- * Portions of this software were developed at the Centre for Advanced Internet
- * Architectures, Swinburne University of Technology, by Lawrence Stewart,
- * James Healy and David Hayes, made possible in part by a grant from the Cisco
- * University Research Program Fund at Community Foundation Silicon Valley.
- *
- * Portions of this software were developed at the Centre for Advanced
- * Internet Architectures, Swinburne University of Technology, Melbourne,
- * Australia by David Hayes under sponsorship from the FreeBSD Foundation.
- *
- * Portions of this software were developed by Robert N. M. Watson under
- * contract to Juniper Networks, Inc.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * @(#)tcp_ecn.c 8.12 (Berkeley) 5/24/95
- */
-
-/*
- * Utility functions to deal with Explicit Congestion Notification in TCP
- * implementing the essential parts of the Accurate ECN extension
- * https://tools.ietf.org/html/draft-ietf-tcpm-accurate-ecn-09
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include "opt_inet.h"
-#include "opt_inet6.h"
-#include "opt_tcpdebug.h"
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/kernel.h>
-#include <sys/sysctl.h>
-#include <sys/malloc.h>
-#include <sys/mbuf.h>
-#include <sys/socket.h>
-#include <sys/socketvar.h>
-
-#include <machine/cpu.h>
-
-#include <vm/uma.h>
-
-#include <net/if.h>
-#include <net/if_var.h>
-#include <net/route.h>
-#include <net/vnet.h>
-
-#include <netinet/in.h>
-#include <netinet/in_systm.h>
-#include <netinet/ip.h>
-#include <netinet/in_var.h>
-#include <netinet/in_pcb.h>
-#include <netinet/ip_var.h>
-#include <netinet/ip6.h>
-#include <netinet/icmp6.h>
-#include <netinet6/nd6.h>
-#include <netinet6/ip6_var.h>
-#include <netinet6/in6_pcb.h>
-#include <netinet/tcp.h>
-#include <netinet/tcp_fsm.h>
-#include <netinet/tcp_seq.h>
-#include <netinet/tcp_timer.h>
-#include <netinet/tcp_var.h>
-#include <netinet6/tcp6_var.h>
-#include <netinet/tcpip.h>
-#include <netinet/tcp_ecn.h>
-
-
-/*
- * Process incoming SYN,ACK packet
- */
-void
-tcp_ecn_input_syn_sent(struct tcpcb *tp, uint16_t thflags, int iptos)
-{
- thflags &= (TH_CWR|TH_ECE);
-
- if (((thflags & (TH_CWR | TH_ECE)) == TH_ECE) &&
- V_tcp_do_ecn) {
- tp->t_flags2 |= TF2_ECN_PERMIT;
- KMOD_TCPSTAT_INC(tcps_ecn_shs);
- }
-}
-
-/*
- * Handle parallel SYN for ECN
- */
-void
-tcp_ecn_input_parallel_syn(struct tcpcb *tp, uint16_t thflags, int iptos)
-{
- if (thflags & TH_ACK)
- return;
- if (V_tcp_do_ecn == 0)
- return;
- if ((V_tcp_do_ecn == 1) || (V_tcp_do_ecn == 2)) {
- /* RFC3168 ECN handling */
- if ((thflags & (TH_CWR | TH_ECE)) == (TH_CWR | TH_ECE)) {
- tp->t_flags2 |= TF2_ECN_PERMIT;
- tp->t_flags2 |= TF2_ECN_SND_ECE;
- KMOD_TCPSTAT_INC(tcps_ecn_shs);
- }
- }
-}
-
-/*
- * TCP ECN processing.
- */
-int
-tcp_ecn_input_segment(struct tcpcb *tp, uint16_t thflags, int iptos)
-{
- int delta_ace = 0;
-
- if (tp->t_flags2 & TF2_ECN_PERMIT) {
- switch (iptos & IPTOS_ECN_MASK) {
- case IPTOS_ECN_CE:
- KMOD_TCPSTAT_INC(tcps_ecn_ce);
- break;
- case IPTOS_ECN_ECT0:
- KMOD_TCPSTAT_INC(tcps_ecn_ect0);
- break;
- case IPTOS_ECN_ECT1:
- KMOD_TCPSTAT_INC(tcps_ecn_ect1);
- break;
- }
-
- /* RFC3168 ECN handling */
- if (thflags & TH_ECE)
- delta_ace = 1;
- if (thflags & TH_CWR) {
- tp->t_flags2 &= ~TF2_ECN_SND_ECE;
- tp->t_flags |= TF_ACKNOW;
- }
- if ((iptos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
- tp->t_flags2 |= TF2_ECN_SND_ECE;
-
- /* Process a packet differently from RFC3168. */
- cc_ecnpkt_handler_flags(tp, thflags, iptos);
- }
-
- return delta_ace;
-}
-
-/*
- * Send ECN setup <SYN> packet header flags
- */
-uint16_t
-tcp_ecn_output_syn_sent(struct tcpcb *tp)
-{
- uint16_t thflags = 0;
-
- if (V_tcp_do_ecn == 1) {
- /* Send a RFC3168 ECN setup <SYN> packet */
- if (tp->t_rxtshift >= 1) {
- if (tp->t_rxtshift <= V_tcp_ecn_maxretries)
- thflags = TH_ECE|TH_CWR;
- } else
- thflags = TH_ECE|TH_CWR;
- }
-
- return thflags;
-}
-
-/*
- * output processing of ECN feature
- * returning IP ECN header codepoint
- */
-int
-tcp_ecn_output_established(struct tcpcb *tp, uint16_t *thflags, int len)
-{
- int ipecn = IPTOS_ECN_NOTECT;
- bool newdata;
-
- /*
- * If the peer has ECN, mark data packets with
- * ECN capable transmission (ECT).
- * Ignore pure control packets, retransmissions
- * and window probes.
- */
- newdata = (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max) &&
- !((tp->t_flags & TF_FORCEDATA) && len == 1));
- if (newdata) {
- ipecn = IPTOS_ECN_ECT0;
- KMOD_TCPSTAT_INC(tcps_ecn_ect0);
- }
- /*
- * Reply with proper ECN notifications.
- */
- if (newdata &&
- (tp->t_flags2 & TF2_ECN_SND_CWR)) {
- *thflags |= TH_CWR;
- tp->t_flags2 &= ~TF2_ECN_SND_CWR;
- }
- if (tp->t_flags2 & TF2_ECN_SND_ECE)
- *thflags |= TH_ECE;
-
- return ipecn;
-}
-
-/*
- * Set up the ECN related tcpcb fields from
- * a syncache entry
- */
-void
-tcp_ecn_syncache_socket(struct tcpcb *tp, struct syncache *sc)
-{
- if (sc->sc_flags & SCF_ECN) {
- switch (sc->sc_flags & SCF_ECN) {
- case SCF_ECN:
- tp->t_flags2 |= TF2_ECN_PERMIT;
- break;
- /* undefined SCF codepoint */
- default:
- break;
- }
- }
-}
-
-/*
- * Process a <SYN> packets ECN information, and provide the
- * syncache with the relevant information.
- */
-int
-tcp_ecn_syncache_add(uint16_t thflags, int iptos)
-{
- int scflags = 0;
-
- switch (thflags & (TH_CWR|TH_ECE)) {
- /* no ECN */
- case (0|0):
- break;
- /* legacy ECN */
- case (TH_CWR|TH_ECE):
- scflags = SCF_ECN;
- break;
- default:
- break;
- }
- return scflags;
-}
-
-/*
- * Set up the ECN information for the <SYN,ACK> from
- * syncache information.
- */
-uint16_t
-tcp_ecn_syncache_respond(uint16_t thflags, struct syncache *sc)
-{
- if ((thflags & TH_SYN) &&
- (sc->sc_flags & SCF_ECN)) {
- switch (sc->sc_flags & SCF_ECN) {
- case SCF_ECN:
- thflags |= (0 | TH_ECE);
- KMOD_TCPSTAT_INC(tcps_ecn_shs);
- break;
- /* undefined SCF codepoint */
- default:
- break;
- }
- }
- return thflags;
-}
diff --git a/sys/netinet/tcp_ecn.h b/sys/netinet/tcp_ecn.h
deleted file mode 100644
index 5ee49ce53a7a..000000000000
--- a/sys/netinet/tcp_ecn.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*-
- * SPDX-License-Identifier: BSD-3-Clause
- *
- * Copyright (c) 1982, 1986, 1993, 1994, 1995
- * The Regents of the University of California. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * @(#)tcp_ecn.h 8.4 (Berkeley) 5/24/95
- * $FreeBSD$
- */
-
-#ifndef _NETINET_TCP_ECN_H_
-#define _NETINET_TCP_ECN_H_
-
-#include <netinet/tcp.h>
-#include <netinet/tcp_var.h>
-#include <netinet/tcp_syncache.h>
-
-#ifdef _KERNEL
-
-void tcp_ecn_input_syn_sent(struct tcpcb *, uint16_t, int);
-void tcp_ecn_input_parallel_syn(struct tcpcb *, uint16_t, int);
-int tcp_ecn_input_segment(struct tcpcb *, uint16_t, int);
-uint16_t tcp_ecn_output_syn_sent(struct tcpcb *);
-int tcp_ecn_output_established(struct tcpcb *, uint16_t *, int);
-void tcp_ecn_syncache_socket(struct tcpcb *, struct syncache *);
-int tcp_ecn_syncache_add(uint16_t, int);
-uint16_t tcp_ecn_syncache_respond(uint16_t, struct syncache *);
-
-#endif /* _KERNEL */
-
-#endif /* _NETINET_TCP_ECN_H_ */
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
index d0b323723e6b..9a1f3ace2541 100644
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -104,7 +104,6 @@ __FBSDID("$FreeBSD$");
#include <netinet6/ip6_var.h>
#include <netinet6/nd6.h>
#include <netinet/tcp.h>
-#include <netinet/tcp_ecn.h>
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_log_buf.h>
#include <netinet/tcp_seq.h>
@@ -1518,8 +1517,7 @@ void
tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos)
{
- uint16_t thflags;
- int acked, ourfinisacked, needoutput = 0, sack_changed;
+ int thflags, acked, ourfinisacked, needoutput = 0, sack_changed;
int rstreason, todrop, win, incforsyn = 0;
uint32_t tiwin;
uint16_t nsegs;
@@ -1599,8 +1597,32 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
/*
* TCP ECN processing.
*/
- if (tcp_ecn_input_segment(tp, thflags, iptos))
- cc_cong_signal(tp, th, CC_ECN);
+ if (tp->t_flags2 & TF2_ECN_PERMIT) {
+ if (thflags & TH_CWR) {
+ tp->t_flags2 &= ~TF2_ECN_SND_ECE;
+ tp->t_flags |= TF_ACKNOW;
+ }
+ switch (iptos & IPTOS_ECN_MASK) {
+ case IPTOS_ECN_CE:
+ tp->t_flags2 |= TF2_ECN_SND_ECE;
+ TCPSTAT_INC(tcps_ecn_ce);
+ break;
+ case IPTOS_ECN_ECT0:
+ TCPSTAT_INC(tcps_ecn_ect0);
+ break;
+ case IPTOS_ECN_ECT1:
+ TCPSTAT_INC(tcps_ecn_ect1);
+ break;
+ }
+
+ /* Process a packet differently from RFC3168. */
+ cc_ecnpkt_handler(tp, th, iptos);
+
+ /* Congestion experienced. */
+ if (thflags & TH_ECE) {
+ cc_cong_signal(tp, th, CC_ECN);
+ }
+ }
/*
* Parse options on any incoming segment.
@@ -1641,7 +1663,13 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
*/
if (tp->t_state == TCPS_SYN_SENT && (thflags & TH_SYN)) {
/* Handle parallel SYN for ECN */
- tcp_ecn_input_parallel_syn(tp, thflags, iptos);
+ if (!(thflags & TH_ACK) &&
+ ((thflags & (TH_CWR | TH_ECE)) == (TH_CWR | TH_ECE)) &&
+ ((V_tcp_do_ecn == 1) || (V_tcp_do_ecn == 2))) {
+ tp->t_flags2 |= TF2_ECN_PERMIT;
+ tp->t_flags2 |= TF2_ECN_SND_ECE;
+ TCPSTAT_INC(tcps_ecn_shs);
+ }
if ((to.to_flags & TOF_SCALE) &&
(tp->t_flags & TF_REQ_SCALE) &&
!(tp->t_flags & TF_NOOPT)) {
@@ -2047,7 +2075,11 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
else
tp->t_flags |= TF_ACKNOW;
- tcp_ecn_input_syn_sent(tp, thflags, iptos);
+ if (((thflags & (TH_CWR | TH_ECE)) == TH_ECE) &&
+ (V_tcp_do_ecn == 1)) {
+ tp->t_flags2 |= TF2_ECN_PERMIT;
+ TCPSTAT_INC(tcps_ecn_shs);
+ }
/*
* Received <SYN,ACK> in SYN_SENT[*] state.
diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c
index df9ce167b7d5..ce6d9b86e73f 100644
--- a/sys/netinet/tcp_output.c
+++ b/sys/netinet/tcp_output.c
@@ -98,7 +98,6 @@ __FBSDID("$FreeBSD$");
#ifdef TCP_OFFLOAD
#include <netinet/tcp_offload.h>
#endif
-#include <netinet/tcp_ecn.h>
#include <netipsec/ipsec_support.h>
@@ -200,8 +199,7 @@ tcp_default_output(struct tcpcb *tp)
struct socket *so = tp->t_inpcb->inp_socket;
int32_t len;
uint32_t recwin, sendwin;
- uint16_t flags;
- int off, error = 0; /* Keep compiler happy */
+ int off, flags, error = 0; /* Keep compiler happy */
u_int if_hw_tsomaxsegcount = 0;
u_int if_hw_tsomaxsegsize = 0;
struct mbuf *m;
@@ -1199,27 +1197,54 @@ send:
* resend those bits a number of times as per
* RFC 3168.
*/
- if (tp->t_state == TCPS_SYN_SENT && V_tcp_do_ecn) {
- flags |= tcp_ecn_output_syn_sent(tp);
+ if (tp->t_state == TCPS_SYN_SENT && V_tcp_do_ecn == 1) {
+ if (tp->t_rxtshift >= 1) {
+ if (tp->t_rxtshift <= V_tcp_ecn_maxretries)
+ flags |= TH_ECE|TH_CWR;
+ } else
+ flags |= TH_ECE|TH_CWR;
}
- /* Also handle parallel SYN for ECN */
- if ((TCPS_HAVERCVDSYN(tp->t_state)) &&
- (tp->t_flags2 & TF2_ECN_PERMIT)) {
- int ect = tcp_ecn_output_established(tp, &flags, len);
- if ((tp->t_state == TCPS_SYN_RECEIVED) &&
- (tp->t_flags2 & TF2_ECN_SND_ECE))
+ /* Handle parallel SYN for ECN */
+ if ((tp->t_state == TCPS_SYN_RECEIVED) &&
+ (tp->t_flags2 & TF2_ECN_SND_ECE)) {
+ flags |= TH_ECE;
tp->t_flags2 &= ~TF2_ECN_SND_ECE;
+ }
+
+ if (TCPS_HAVEESTABLISHED(tp->t_state) &&
+ (tp->t_flags2 & TF2_ECN_PERMIT)) {
+ /*
+ * If the peer has ECN, mark data packets with
+ * ECN capable transmission (ECT).
+ * Ignore pure ack packets, retransmissions and window probes.
+ */
+ if (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max) &&
+ (sack_rxmit == 0) &&
+ !((tp->t_flags & TF_FORCEDATA) && len == 1 &&
+ SEQ_LT(tp->snd_una, tp->snd_max))) {
#ifdef INET6
- if (isipv6) {
- ip6->ip6_flow &= ~htonl(IPTOS_ECN_MASK << 20);
- ip6->ip6_flow |= htonl(ect << 20);
- }
- else
+ if (isipv6) {
+ ip6->ip6_flow &= ~htonl(IPTOS_ECN_MASK << 20);
+ ip6->ip6_flow |= htonl(IPTOS_ECN_ECT0 << 20);
+ }
+ else
#endif
- {
- ip->ip_tos &= ~IPTOS_ECN_MASK;
- ip->ip_tos |= ect;
+ {
+ ip->ip_tos &= ~IPTOS_ECN_MASK;
+ ip->ip_tos |= IPTOS_ECN_ECT0;
+ }
+ TCPSTAT_INC(tcps_ecn_ect0);
+ /*
+ * Reply with proper ECN notifications.
+ * Only set CWR on new data segments.
+ */
+ if (tp->t_flags2 & TF2_ECN_SND_CWR) {
+ flags |= TH_CWR;
+ tp->t_flags2 &= ~TF2_ECN_SND_CWR;
+ }
}
+ if (tp->t_flags2 & TF2_ECN_SND_ECE)
+ flags |= TH_ECE;
}
/*
diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c
index 7bc37a9552a7..6d5b3f2133a6 100644
--- a/sys/netinet/tcp_stacks/rack.c
+++ b/sys/netinet/tcp_stacks/rack.c
@@ -113,7 +113,6 @@ __FBSDID("$FreeBSD$");
#ifdef INET6
#include <netinet6/tcp6_var.h>
#endif
-#include <netinet/tcp_ecn.h>
#include <netipsec/ipsec_support.h>
@@ -11407,9 +11406,11 @@ rack_do_syn_sent(struct mbuf *m, struct tcphdr *th, struct socket *so,
tp->t_flags |= TF_ACKNOW;
rack->rc_dack_toggle = 0;
}
-
- tcp_ecn_input_syn_sent(tp, thflags, iptos);
-
+ if (((thflags & (TH_CWR | TH_ECE)) == TH_ECE) &&
+ (V_tcp_do_ecn == 1)) {
+ tp->t_flags2 |= TF2_ECN_PERMIT;
+ KMOD_TCPSTAT_INC(tcps_ecn_shs);
+ }
if (SEQ_GT(th->th_ack, tp->snd_una)) {
/*
* We advance snd_una for the
@@ -13682,8 +13683,31 @@ rack_do_compressed_ack_processing(struct tcpcb *tp, struct socket *so, struct mb
}
tp->t_rcvtime = ticks;
/* Now what about ECN? */
- if (tcp_ecn_input_segment(tp, ae->flags, ae->codepoint))
- rack_cong_signal(tp, CC_ECN, ae->ack);
+ if (tp->t_flags2 & TF2_ECN_PERMIT) {
+ if (ae->flags & TH_CWR) {
+ tp->t_flags2 &= ~TF2_ECN_SND_ECE;
+ tp->t_flags |= TF_ACKNOW;
+ }
+ switch (ae->codepoint & IPTOS_ECN_MASK) {
+ case IPTOS_ECN_CE:
+ tp->t_flags2 |= TF2_ECN_SND_ECE;
+ KMOD_TCPSTAT_INC(tcps_ecn_ce);
+ break;
+ case IPTOS_ECN_ECT0:
+ KMOD_TCPSTAT_INC(tcps_ecn_ect0);
+ break;
+ case IPTOS_ECN_ECT1:
+ KMOD_TCPSTAT_INC(tcps_ecn_ect1);
+ break;
+ }
+
+ /* Process a packet differently from RFC3168. */
+ cc_ecnpkt_handler_flags(tp, ae->flags, ae->codepoint);
+ /* Congestion experienced. */
+ if (ae->flags & TH_ECE) {
+ rack_cong_signal(tp, CC_ECN, ae->ack);
+ }
+ }
#ifdef TCP_ACCOUNTING
/* Count for the specific type of ack in */
counter_u64_add(tcp_cnt_counters[ae->ack_val_set], 1);
@@ -14433,8 +14457,32 @@ rack_do_segment_nounlock(struct mbuf *m, struct tcphdr *th, struct socket *so,
* TCP ECN processing. XXXJTL: If we ever use ECN, we need to move
* this to occur after we've validated the segment.
*/
- if (tcp_ecn_input_segment(tp, thflags, iptos))
- rack_cong_signal(tp, CC_ECN, th->th_ack);
+ if (tp->t_flags2 & TF2_ECN_PERMIT) {
+ if (thflags & TH_CWR) {
+ tp->t_flags2 &= ~TF2_ECN_SND_ECE;
+ tp->t_flags |= TF_ACKNOW;
+ }
+ switch (iptos & IPTOS_ECN_MASK) {
+ case IPTOS_ECN_CE:
+ tp->t_flags2 |= TF2_ECN_SND_ECE;
+ KMOD_TCPSTAT_INC(tcps_ecn_ce);
+ break;
+ case IPTOS_ECN_ECT0:
+ KMOD_TCPSTAT_INC(tcps_ecn_ect0);
+ break;
+ case IPTOS_ECN_ECT1:
+ KMOD_TCPSTAT_INC(tcps_ecn_ect1);
+ break;
+ }
+
+ /* Process a packet differently from RFC3168. */
+ cc_ecnpkt_handler(tp, th, iptos);
+
+ /* Congestion experienced. */
+ if (thflags & TH_ECE) {
+ rack_cong_signal(tp, CC_ECN, th->th_ack);
+ }
+ }
/*
* If echoed timestamp is later than the current time, fall back to
@@ -14468,7 +14516,13 @@ rack_do_segment_nounlock(struct mbuf *m, struct tcphdr *th, struct socket *so,
*/
if (tp->t_state == TCPS_SYN_SENT && (thflags & TH_SYN)) {
/* Handle parallel SYN for ECN */
- tcp_ecn_input_parallel_syn(tp, thflags, iptos);
+ if (!(thflags & TH_ACK) &&
+ ((thflags & (TH_CWR | TH_ECE)) == (TH_CWR | TH_ECE)) &&
+ ((V_tcp_do_ecn == 1) || (V_tcp_do_ecn == 2))) {
+ tp->t_flags2 |= TF2_ECN_PERMIT;
+ tp->t_flags2 |= TF2_ECN_SND_ECE;
+ TCPSTAT_INC(tcps_ecn_shs);
+ }
if ((to.to_flags & TOF_SCALE) &&
(tp->t_flags & TF_REQ_SCALE)) {
tp->t_flags |= TF_RCVD_SCALE;
@@ -16002,24 +16056,6 @@ rack_fast_rsm_output(struct tcpcb *tp, struct tcp_rack *rack, struct rack_sendma
udp->uh_ulen = htons(ulen);
}
m->m_pkthdr.rcvif = (struct ifnet *)0;
- if (TCPS_HAVERCVDSYN(tp->t_state) &&
- (tp->t_flags2 & TF2_ECN_PERMIT)) {
- int ect = tcp_ecn_output_established(tp, &flags, len);
- if ((tp->t_state == TCPS_SYN_RECEIVED) &&
- (tp->t_flags2 & TF2_ECN_SND_ECE))
- tp->t_flags2 &= ~TF2_ECN_SND_ECE;
-#ifdef INET6
- if (rack->r_is_v6) {
- ip6->ip6_flow &= ~htonl(IPTOS_ECN_MASK << 20);
- ip6->ip6_flow |= htonl(ect << 20);
- }
- else
-#endif
- {
- ip->ip_tos &= ~IPTOS_ECN_MASK;
- ip->ip_tos |= ect;
- }
- }
m->m_pkthdr.len = hdrlen + len; /* in6_cksum() need this */
#ifdef INET6
if (rack->r_is_v6) {
@@ -16343,8 +16379,7 @@ rack_fast_output(struct tcpcb *tp, struct tcp_rack *rack, uint64_t ts_val,
u_char opt[TCP_MAXOLEN];
uint32_t hdrlen, optlen;
int cnt_thru = 1;
- int32_t slot, segsiz, len, max_val, tso = 0, sb_offset, error, ulen = 0;
- uint16_t flags;
+ int32_t slot, segsiz, len, max_val, tso = 0, sb_offset, error, flags, ulen = 0;
uint32_t s_soff;
uint32_t if_hw_tsomaxsegcount = 0, startseq;
uint32_t if_hw_tsomaxsegsize;
@@ -16493,23 +16528,37 @@ again:
udp->uh_ulen = htons(ulen);
}
m->m_pkthdr.rcvif = (struct ifnet *)0;
- if (TCPS_HAVERCVDSYN(tp->t_state) &&
+ if (tp->t_state == TCPS_ESTABLISHED &&
(tp->t_flags2 & TF2_ECN_PERMIT)) {
- int ect = tcp_ecn_output_established(tp, &flags, len);
- if ((tp->t_state == TCPS_SYN_RECEIVED) &&
- (tp->t_flags2 & TF2_ECN_SND_ECE))
- tp->t_flags2 &= ~TF2_ECN_SND_ECE;
+ /*
+ * If the peer has ECN, mark data packets with ECN capable
+ * transmission (ECT). Ignore pure ack packets,
+ * retransmissions.
+ */
+ if (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max)) {
#ifdef INET6
- if (rack->r_is_v6) {
- ip6->ip6_flow &= ~htonl(IPTOS_ECN_MASK << 20);
- ip6->ip6_flow |= htonl(ect << 20);
- }
- else
+ if (rack->r_is_v6) {
+ ip6->ip6_flow &= ~htonl(IPTOS_ECN_MASK << 20);
+ ip6->ip6_flow |= htonl(IPTOS_ECN_ECT0 << 20);
+ }
+ else
#endif
- {
- ip->ip_tos &= ~IPTOS_ECN_MASK;
- ip->ip_tos |= ect;
+ {
+ ip->ip_tos &= ~IPTOS_ECN_MASK;
+ ip->ip_tos |= IPTOS_ECN_ECT0;
+ }
+ KMOD_TCPSTAT_INC(tcps_ecn_ect0);
+ /*
+ * Reply with proper ECN notifications.
+ * Only set CWR on new data segments.
+ */
+ if (tp->t_flags2 & TF2_ECN_SND_CWR) {
+ flags |= TH_CWR;
+ tp->t_flags2 &= ~TF2_ECN_SND_CWR;
+ }
}
+ if (tp->t_flags2 & TF2_ECN_SND_ECE)
+ flags |= TH_ECE;
}
m->m_pkthdr.len = hdrlen + len; /* in6_cksum() need this */
#ifdef INET6
@@ -16737,8 +16786,7 @@ rack_output(struct tcpcb *tp)
struct socket *so;
uint32_t recwin;
uint32_t sb_offset, s_moff = 0;
- int32_t len, error = 0;
- uint16_t flags;
+ int32_t len, flags, error = 0;
struct mbuf *m, *s_mb = NULL;
struct mbuf *mb;
uint32_t if_hw_tsomaxsegcount = 0;
@@ -18548,27 +18596,51 @@ send:
* are on a retransmit, we may resend those bits a number of times
* as per RFC 3168.
*/
- if (tp->t_state == TCPS_SYN_SENT && V_tcp_do_ecn) {
- flags |= tcp_ecn_output_syn_sent(tp);
+ if (tp->t_state == TCPS_SYN_SENT && V_tcp_do_ecn == 1) {
+ if (tp->t_rxtshift >= 1) {
+ if (tp->t_rxtshift <= V_tcp_ecn_maxretries)
+ flags |= TH_ECE | TH_CWR;
+ } else
+ flags |= TH_ECE | TH_CWR;
}
- /* Also handle parallel SYN for ECN */
- if (TCPS_HAVERCVDSYN(tp->t_state) &&
+ /* Handle parallel SYN for ECN */
+ if ((tp->t_state == TCPS_SYN_RECEIVED) &&
+ (tp->t_flags2 & TF2_ECN_SND_ECE)) {
+ flags |= TH_ECE;
+ tp->t_flags2 &= ~TF2_ECN_SND_ECE;
+ }
+ if (TCPS_HAVEESTABLISHED(tp->t_state) &&
(tp->t_flags2 & TF2_ECN_PERMIT)) {
- int ect = tcp_ecn_output_established(tp, &flags, len);
- if ((tp->t_state == TCPS_SYN_RECEIVED) &&
- (tp->t_flags2 & TF2_ECN_SND_ECE))
- tp->t_flags2 &= ~TF2_ECN_SND_ECE;
+ /*
+ * If the peer has ECN, mark data packets with ECN capable
+ * transmission (ECT). Ignore pure ack packets,
+ * retransmissions.
+ */
+ if (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max) &&
+ (sack_rxmit == 0)) {
#ifdef INET6
- if (isipv6) {
- ip6->ip6_flow &= ~htonl(IPTOS_ECN_MASK << 20);
- ip6->ip6_flow |= htonl(ect << 20);
- }
- else
+ if (isipv6) {
+ ip6->ip6_flow &= ~htonl(IPTOS_ECN_MASK << 20);
+ ip6->ip6_flow |= htonl(IPTOS_ECN_ECT0 << 20);
+ }
+ else
#endif
- {
- ip->ip_tos &= ~IPTOS_ECN_MASK;
- ip->ip_tos |= ect;
+ {
+ ip->ip_tos &= ~IPTOS_ECN_MASK;
+ ip->ip_tos |= IPTOS_ECN_ECT0;
+ }
+ KMOD_TCPSTAT_INC(tcps_ecn_ect0);
+ /*
+ * Reply with proper ECN notifications.
+ * Only set CWR on new data segments.
+ */
+ if (tp->t_flags2 & TF2_ECN_SND_CWR) {
+ flags |= TH_CWR;
+ tp->t_flags2 &= ~TF2_ECN_SND_CWR;
+ }
}
+ if (tp->t_flags2 & TF2_ECN_SND_ECE)
+ flags |= TH_ECE;
}
/*
* If we are doing retransmissions, then snd_nxt will not reflect
diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c
index ed4adda59c22..5fcafa44cc97 100644
--- a/sys/netinet/tcp_syncache.c
+++ b/sys/netinet/tcp_syncache.c
@@ -89,7 +89,6 @@ __FBSDID("$FreeBSD$");
#include <netinet/tcp_timer.h>
#include <netinet/tcp_var.h>
#include <netinet/tcp_syncache.h>
-#include <netinet/tcp_ecn.h>
#ifdef INET6
#include <netinet6/tcp6_var.h>
#endif
@@ -1028,7 +1027,8 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
tp->t_flags |= TF_SACK_PERMIT;
}
- tcp_ecn_syncache_socket(tp, sc);
+ if (sc->sc_flags & SCF_ECN)
+ tp->t_flags2 |= TF2_ECN_PERMIT;
/*
* Set up MSS and get cached values from tcp_hostcache.
@@ -1743,9 +1743,9 @@ skip_alloc:
sc->sc_peer_mss = to->to_mss; /* peer mss may be zero */
if (ltflags & TF_NOOPT)
sc->sc_flags |= SCF_NOOPT;
- /* ECN Handshake */
- if (V_tcp_do_ecn)
- sc->sc_flags |= tcp_ecn_syncache_add(tcp_get_flags(th), iptos);
+ if (((tcp_get_flags(th) & (TH_ECE|TH_CWR)) == (TH_ECE|TH_CWR)) &&
+ V_tcp_do_ecn)
+ sc->sc_flags |= SCF_ECN;
if (V_tcp_syncookies)
sc->sc_iss = syncookie_generate(sch, sc);
@@ -1938,7 +1938,10 @@ syncache_respond(struct syncache *sc, const struct mbuf *m0, int flags)
th->th_win = htons(sc->sc_wnd);
th->th_urp = 0;
- flags = tcp_ecn_syncache_respond(flags, sc);
+ if ((flags & TH_SYN) && (sc->sc_flags & SCF_ECN)) {
+ flags |= TH_ECE;
+ TCPSTAT_INC(tcps_ecn_shs);
+ }
tcp_set_flags(th, flags);
/* Tack on the TCP options. */