git: dc9daa04fb68 - main - tcp: allow packets to be marked as ECT1 instead of ECT0

From: Richard Scheffenegger <rscheff_at_FreeBSD.org>
Date: Tue, 08 Nov 2022 18:10:01 UTC
The branch main has been updated by rscheff:

URL: https://cgit.FreeBSD.org/src/commit/?id=dc9daa04fb6813c5229470b71201d0645c95c206

commit dc9daa04fb6813c5229470b71201d0645c95c206
Author:     Richard Scheffenegger <rscheff@FreeBSD.org>
AuthorDate: 2022-11-08 17:35:58 +0000
Commit:     Richard Scheffenegger <rscheff@FreeBSD.org>
CommitDate: 2022-11-08 17:36:38 +0000

    tcp: allow packets to be marked as ECT1 instead of ECT0
    
    This adds the capability for a modular congestion control
    to select which variant of ECN-capable-transport it wants to use
    when sending out elegible segments. As an initial CC to utilize
    this, DCTCP was selected.
    
    Event:                  IETF 115 Hackathon
    Reviewed By:            tuexen, #transport
    Sponsored by:           NetApp, Inc.
    Differential Revision:  https://reviews.freebsd.org/D24869
---
 share/man/man4/cc_dctcp.4 |  7 ++++++-
 sys/netinet/cc/cc_dctcp.c | 12 +++++++++++-
 sys/netinet/tcp_ecn.c     |  9 +++++++--
 sys/netinet/tcp_var.h     |  4 +++-
 4 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/share/man/man4/cc_dctcp.4 b/share/man/man4/cc_dctcp.4
index 8c23f2eaa134..c4c8eb7da623 100644
--- a/share/man/man4/cc_dctcp.4
+++ b/share/man/man4/cc_dctcp.4
@@ -28,7 +28,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd May 13, 2021
+.Dd November 8, 2022
 .Dt CC_DCTCP 4
 .Os
 .Sh NAME
@@ -91,6 +91,11 @@ gain of 1 / ( 2 ^
 .It Va slowstart
 A flag if the congestion window should be reduced by one half after slow start.
 Valid settings 0 and 1, default 0.
+.It Va ect1
+Controls if a DCTCP session should use IP ECT(0) marking when sending out
+segments (default), or ECT(1) marking making use of L4S infrastructure.
+Changes to this setting will only affect new sessions, existing sessions will
+retain their previous marking value.
 .El
 .Sh SEE ALSO
 .Xr cc_cdg 4 ,
diff --git a/sys/netinet/cc/cc_dctcp.c b/sys/netinet/cc/cc_dctcp.c
index 5e4a01649f57..ce11e611a0e5 100644
--- a/sys/netinet/cc/cc_dctcp.c
+++ b/sys/netinet/cc/cc_dctcp.c
@@ -68,6 +68,8 @@ VNET_DEFINE_STATIC(uint32_t, dctcp_shift_g) = 4;
 #define	V_dctcp_shift_g	    VNET(dctcp_shift_g)
 VNET_DEFINE_STATIC(uint32_t, dctcp_slowstart) = 0;
 #define	V_dctcp_slowstart   VNET(dctcp_slowstart)
+VNET_DEFINE_STATIC(uint32_t, dctcp_ect1) = 0;
+#define	V_dctcp_ect1	    VNET(dctcp_ect1)
 
 struct dctcp {
 	uint32_t bytes_ecn;	  /* # of marked bytes during a RTT */
@@ -313,8 +315,11 @@ dctcp_conn_init(struct cc_var *ccv)
 
 	dctcp_data = ccv->cc_data;
 
-	if (CCV(ccv, t_flags2) & TF2_ECN_PERMIT)
+	if (CCV(ccv, t_flags2) & TF2_ECN_PERMIT) {
 		dctcp_data->save_sndnxt = CCV(ccv, snd_nxt);
+		if (V_dctcp_ect1)
+			CCV(ccv, t_flags2) |= TF2_ECN_USE_ECT1;
+	}
 }
 
 /*
@@ -478,5 +483,10 @@ SYSCTL_PROC(_net_inet_tcp_cc_dctcp, OID_AUTO, slowstart,
     &VNET_NAME(dctcp_slowstart), 0, &dctcp_slowstart_handler, "IU",
     "half CWND reduction after the first slow start");
 
+SYSCTL_UINT(_net_inet_tcp_cc_dctcp, OID_AUTO, ect1,
+    CTLFLAG_VNET | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
+    &VNET_NAME(dctcp_ect1), 0,
+    "Send DCTCP segments with ÍP ECT(0) or ECT(1)");
+
 DECLARE_CC_MODULE(dctcp, &dctcp_cc_algo);
 MODULE_VERSION(dctcp, 2);
diff --git a/sys/netinet/tcp_ecn.c b/sys/netinet/tcp_ecn.c
index 3cbc1c53dad7..8e3e38ddabfc 100644
--- a/sys/netinet/tcp_ecn.c
+++ b/sys/netinet/tcp_ecn.c
@@ -411,8 +411,13 @@ tcp_ecn_output_established(struct tcpcb *tp, uint16_t *thflags, int len, bool rx
 		    !((tp->t_flags & TF_FORCEDATA) && len == 1));
 	/* RFC3168 ECN marking, only new data segments */
 	if (newdata) {
-		ipecn = IPTOS_ECN_ECT0;
-		TCPSTAT_INC(tcps_ecn_ect0);
+		if (tp->t_flags2 & TF2_ECN_USE_ECT1) {
+			ipecn = IPTOS_ECN_ECT1;
+			TCPSTAT_INC(tcps_ecn_ect1);
+		} else {
+			ipecn = IPTOS_ECN_ECT0;
+			TCPSTAT_INC(tcps_ecn_ect0);
+		}
 	}
 	/*
 	 * Reply with proper ECN notifications.
diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
index 3bcba227d5da..0655eb85aae0 100644
--- a/sys/netinet/tcp_var.h
+++ b/sys/netinet/tcp_var.h
@@ -577,7 +577,9 @@ tcp_packets_this_ack(struct tcpcb *tp, tcp_seq ack)
 #define	TF2_ECN_SND_CWR		0x00000040 /* ECN CWR in queue */
 #define	TF2_ECN_SND_ECE		0x00000080 /* ECN ECE in queue */
 #define	TF2_ACE_PERMIT		0x00000100 /* Accurate ECN mode */
-#define TF2_FBYTES_COMPLETE	0x00000400 /* We have first bytes in and out */
+#define	TF2_FBYTES_COMPLETE	0x00000400 /* We have first bytes in and out */
+#define	TF2_ECN_USE_ECT1	0x00000800 /* Use ECT(1) marking on session */
+
 /*
  * Structure to hold TCP options that are only used during segment
  * processing (in tcp_input), but not held in the tcpcb.