git: 01d74fe1ffc3 - main - Path MTU discovery hooks for offloaded TCP connections.

Navdeep Parhar np at FreeBSD.org
Wed Apr 21 20:01:48 UTC 2021


The branch main has been updated by np:

URL: https://cgit.FreeBSD.org/src/commit/?id=01d74fe1ffc32dc7f42dc0fb0c4861276a6b2bd2

commit 01d74fe1ffc32dc7f42dc0fb0c4861276a6b2bd2
Author:     Navdeep Parhar <np at FreeBSD.org>
AuthorDate: 2021-04-13 00:25:22 +0000
Commit:     Navdeep Parhar <np at FreeBSD.org>
CommitDate: 2021-04-21 20:00:16 +0000

    Path MTU discovery hooks for offloaded TCP connections.
    
    Notify the TOE driver when when an ICMP type 3 code 4 (Fragmentation
    needed and DF set) message is received for an offloaded connection.
    This gives the driver an opportunity to lower the path MTU for the
    connection and resume transmission, much like what the kernel does for
    the connections that it handles.
    
    Reviewed by:    glebius@
    Sponsored by:   Chelsio Communications
    Differential Revision:  https://reviews.freebsd.org/D29755
---
 sys/netinet/tcp_offload.c | 11 +++++++
 sys/netinet/tcp_offload.h |  3 ++
 sys/netinet/tcp_subr.c    | 80 ++++++++++++++++++++++++++++++++---------------
 sys/netinet/toecore.c     |  9 ++++++
 sys/netinet/toecore.h     |  4 +++
 5 files changed, 81 insertions(+), 26 deletions(-)

diff --git a/sys/netinet/tcp_offload.c b/sys/netinet/tcp_offload.c
index ba190f0303f1..84a4bc3c31a3 100644
--- a/sys/netinet/tcp_offload.c
+++ b/sys/netinet/tcp_offload.c
@@ -219,3 +219,14 @@ tcp_offload_detach(struct tcpcb *tp)
 
 	tod->tod_pcb_detach(tod, tp);
 }
+
+void
+tcp_offload_pmtu_update(struct tcpcb *tp, tcp_seq seq, int mtu)
+{
+	struct toedev *tod = tp->tod;
+
+	KASSERT(tod != NULL, ("%s: tp->tod is NULL, tp %p", __func__, tp));
+	INP_WLOCK_ASSERT(tp->t_inpcb);
+
+	tod->tod_pmtu_update(tod, tp, seq, mtu);
+}
diff --git a/sys/netinet/tcp_offload.h b/sys/netinet/tcp_offload.h
index 19c120ccdd7d..8f3786e9f7eb 100644
--- a/sys/netinet/tcp_offload.h
+++ b/sys/netinet/tcp_offload.h
@@ -36,6 +36,8 @@
 #error "no user-serviceable parts inside"
 #endif
 
+#include <netinet/tcp.h>
+
 extern int registered_toedevs;
 
 int  tcp_offload_connect(struct socket *, struct sockaddr *);
@@ -48,5 +50,6 @@ void tcp_offload_ctloutput(struct tcpcb *, int, int);
 void tcp_offload_tcp_info(struct tcpcb *, struct tcp_info *);
 int  tcp_offload_alloc_tls_session(struct tcpcb *, struct ktls_session *, int);
 void tcp_offload_detach(struct tcpcb *);
+void tcp_offload_pmtu_update(struct tcpcb *, tcp_seq, int);
 
 #endif
diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
index 1ce7a5b1fcf3..b5ecdc6f2307 100644
--- a/sys/netinet/tcp_subr.c
+++ b/sys/netinet/tcp_subr.c
@@ -2791,6 +2791,21 @@ SYSCTL_PROC(_net_inet6_tcp6, OID_AUTO, getcred,
 #endif /* INET6 */
 
 #ifdef INET
+/* Path MTU to try next when a fragmentation-needed message is received. */
+static inline int
+tcp_next_pmtu(const struct icmp *icp, const struct ip *ip)
+{
+	int mtu = ntohs(icp->icmp_nextmtu);
+
+	/* If no alternative MTU was proposed, try the next smaller one. */
+	if (!mtu)
+		mtu = ip_next_mtu(ntohs(ip->ip_len), 1);
+	if (mtu < V_tcp_minmss + sizeof(struct tcpiphdr))
+		mtu = V_tcp_minmss + sizeof(struct tcpiphdr);
+
+	return (mtu);
+}
+
 static void
 tcp_ctlinput_with_port(int cmd, struct sockaddr *sa, void *vip, uint16_t port)
 {
@@ -2846,6 +2861,17 @@ tcp_ctlinput_with_port(int cmd, struct sockaddr *sa, void *vip, uint16_t port)
 		    !(inp->inp_flags & INP_DROPPED) &&
 		    !(inp->inp_socket == NULL)) {
 			tp = intotcpcb(inp);
+#ifdef TCP_OFFLOAD
+			if (tp->t_flags & TF_TOE && cmd == PRC_MSGSIZE) {
+				/*
+				 * MTU discovery for offloaded connections.  Let
+				 * the TOE driver verify seq# and process it.
+				 */
+				mtu = tcp_next_pmtu(icp, ip);
+				tcp_offload_pmtu_update(tp, icmp_tcp_seq, mtu);
+				goto out;
+			}
+#endif
 			if (tp->t_port != port) {
 				goto out;
 			}
@@ -2853,24 +2879,11 @@ tcp_ctlinput_with_port(int cmd, struct sockaddr *sa, void *vip, uint16_t port)
 			    SEQ_LT(ntohl(icmp_tcp_seq), tp->snd_max)) {
 				if (cmd == PRC_MSGSIZE) {
 					/*
-					 * MTU discovery:
-					 * If we got a needfrag set the MTU
-					 * in the route to the suggested new
-					 * value (if given) and then notify.
+					 * MTU discovery: we got a needfrag and
+					 * will potentially try a lower MTU.
 					 */
-					mtu = ntohs(icp->icmp_nextmtu);
-					/*
-					 * If no alternative MTU was
-					 * proposed, try the next smaller
-					 * one.
-					 */
-					if (!mtu)
-						mtu = ip_next_mtu(
-						    ntohs(ip->ip_len), 1);
-					if (mtu < V_tcp_minmss +
-					    sizeof(struct tcpiphdr))
-						mtu = V_tcp_minmss +
-						    sizeof(struct tcpiphdr);
+					mtu = tcp_next_pmtu(icp, ip);
+
 					/*
 					 * Only process the offered MTU if it
 					 * is smaller than the current one.
@@ -2948,6 +2961,20 @@ tcp_ctlinput_viaudp(int cmd, struct sockaddr *sa, void *vip, void *unused)
 #endif /* INET */
 
 #ifdef INET6
+static inline int
+tcp6_next_pmtu(const struct icmp6_hdr *icmp6)
+{
+	int mtu = ntohl(icmp6->icmp6_mtu);
+
+	/*
+	 * If no alternative MTU was proposed, or the proposed MTU was too
+	 * small, set to the min.
+	 */
+	if (mtu < IPV6_MMTU)
+		mtu = IPV6_MMTU - 8;	/* XXXNP: what is the adjustment for? */
+	return (mtu);
+}
+
 static void
 tcp6_ctlinput_with_port(int cmd, struct sockaddr *sa, void *d, uint16_t port)
 {
@@ -3039,6 +3066,14 @@ tcp6_ctlinput_with_port(int cmd, struct sockaddr *sa, void *d, uint16_t port)
 		    !(inp->inp_flags & INP_DROPPED) &&
 		    !(inp->inp_socket == NULL)) {
 			tp = intotcpcb(inp);
+#ifdef TCP_OFFLOAD
+			if (tp->t_flags & TF_TOE && cmd == PRC_MSGSIZE) {
+				/* MTU discovery for offloaded connections. */
+				mtu = tcp6_next_pmtu(icmp6);
+				tcp_offload_pmtu_update(tp, icmp_tcp_seq, mtu);
+				goto out;
+			}
+#endif
 			if (tp->t_port != port) {
 				goto out;
 			}
@@ -3051,15 +3086,8 @@ tcp6_ctlinput_with_port(int cmd, struct sockaddr *sa, void *d, uint16_t port)
 					 * in the route to the suggested new
 					 * value (if given) and then notify.
 					 */
-					mtu = ntohl(icmp6->icmp6_mtu);
-					/*
-					 * If no alternative MTU was
-					 * proposed, or the proposed
-					 * MTU was too small, set to
-					 * the min.
-					 */
-					if (mtu < IPV6_MMTU)
-						mtu = IPV6_MMTU - 8;
+					mtu = tcp6_next_pmtu(icmp6);
+
 					bzero(&inc, sizeof(inc));
 					inc.inc_fibnum = M_GETFIB(m);
 					inc.inc_flags |= INC_ISIPV6;
diff --git a/sys/netinet/toecore.c b/sys/netinet/toecore.c
index d8d499a6fde3..5792298d2883 100644
--- a/sys/netinet/toecore.c
+++ b/sys/netinet/toecore.c
@@ -199,6 +199,14 @@ toedev_alloc_tls_session(struct toedev *tod __unused, struct tcpcb *tp __unused,
 	return (EINVAL);
 }
 
+static void
+toedev_pmtu_update(struct toedev *tod __unused, struct tcpcb *tp __unused,
+    tcp_seq seq __unused, int mtu __unused)
+{
+
+	return;
+}
+
 /*
  * Inform one or more TOE devices about a listening socket.
  */
@@ -290,6 +298,7 @@ init_toedev(struct toedev *tod)
 	tod->tod_ctloutput = toedev_ctloutput;
 	tod->tod_tcp_info = toedev_tcp_info;
 	tod->tod_alloc_tls_session = toedev_alloc_tls_session;
+	tod->tod_pmtu_update = toedev_pmtu_update;
 }
 
 /*
diff --git a/sys/netinet/toecore.h b/sys/netinet/toecore.h
index 36493abf7149..ce796ab54dc5 100644
--- a/sys/netinet/toecore.h
+++ b/sys/netinet/toecore.h
@@ -35,6 +35,7 @@
 #error "no user-serviceable parts inside"
 #endif
 
+#include <netinet/tcp.h>
 #include <sys/_eventhandler.h>
 
 struct tcpopt;
@@ -114,6 +115,9 @@ struct toedev {
 	/* Create a TLS session */
 	int (*tod_alloc_tls_session)(struct toedev *, struct tcpcb *,
 	    struct ktls_session *, int);
+
+	/* ICMP fragmentation-needed received, adjust PMTU. */
+	void (*tod_pmtu_update)(struct toedev *, struct tcpcb *, tcp_seq, int);
 };
 
 typedef	void (*tcp_offload_listen_start_fn)(void *, struct tcpcb *);


More information about the dev-commits-src-all mailing list