svn commit: r367123 - in head: sbin/ifconfig sys/dev/mlx5/mlx5_en sys/kern sys/net sys/netinet sys/sys

John Baldwin jhb at FreeBSD.org
Thu Oct 29 00:23:19 UTC 2020


Author: jhb
Date: Thu Oct 29 00:23:16 2020
New Revision: 367123
URL: https://svnweb.freebsd.org/changeset/base/367123

Log:
  Support hardware rate limiting (pacing) with TLS offload.
  
  - Add a new send tag type for a send tag that supports both rate
    limiting (packet pacing) and TLS offload (mostly similar to D22669
    but adds a separate structure when allocating the new tag type).
  
  - When allocating a send tag for TLS offload, check to see if the
    connection already has a pacing rate.  If so, allocate a tag that
    supports both rate limiting and TLS offload rather than a plain TLS
    offload tag.
  
  - When setting an initial rate on an existing ifnet KTLS connection,
    set the rate in the TCP control block inp and then reset the TLS
    send tag (via ktls_output_eagain) to reallocate a TLS + ratelimit
    send tag.  This allocates the TLS send tag asynchronously from a
    task queue, so the TLS rate limit tag alloc is always sleepable.
  
  - When modifying a rate on a connection using KTLS, look for a TLS
    send tag.  If the send tag is only a plain TLS send tag, assume we
    failed to allocate a TLS ratelimit tag (either during the
    TCP_TXTLS_ENABLE socket option, or during the send tag reset
    triggered by ktls_output_eagain) and ignore the new rate.  If the
    send tag is a ratelimit TLS send tag, change the rate on the TLS tag
    and leave the inp tag alone.
  
  - Lock the inp lock when setting sb_tls_info for a socket send buffer
    so that the routines in tcp_ratelimit can safely dereference the
    pointer without needing to grab the socket buffer lock.
  
  - Add an IFCAP_TXTLS_RTLMT capability flag and associated
    administrative controls in ifconfig(8).  TLS rate limit tags are
    only allocated if this capability is enabled.  Note that TLS offload
    (whether unlimited or rate limited) always requires IFCAP_TXTLS[46].
  
  Reviewed by:	gallatin, hselasky
  Relnotes:	yes
  Sponsored by:	Netflix
  Differential Revision:	https://reviews.freebsd.org/D26691

Modified:
  head/sbin/ifconfig/ifconfig.8
  head/sbin/ifconfig/ifconfig.c
  head/sys/dev/mlx5/mlx5_en/mlx5_en_main.c
  head/sys/kern/uipc_ktls.c
  head/sys/net/if.h
  head/sys/net/if_var.h
  head/sys/net/if_vlan.c
  head/sys/netinet/tcp_ratelimit.c
  head/sys/sys/ktls.h

Modified: head/sbin/ifconfig/ifconfig.8
==============================================================================
--- head/sbin/ifconfig/ifconfig.8	Thu Oct 29 00:03:19 2020	(r367122)
+++ head/sbin/ifconfig/ifconfig.8	Thu Oct 29 00:23:16 2020	(r367123)
@@ -28,7 +28,7 @@
 .\"     From: @(#)ifconfig.8	8.3 (Berkeley) 1/5/94
 .\" $FreeBSD$
 .\"
-.Dd October 25, 2020
+.Dd October 28, 2020
 .Dt IFCONFIG 8
 .Os
 .Sh NAME
@@ -561,6 +561,10 @@ It will always disable TLS for
 .Xr ip 4
 and
 .Xr ip6 4 .
+.It Cm txtlsrtlmt
+Enable use of rate limiting (packet pacing) for TLS offload.
+.It Fl txtlsrtlmt
+Disable use of rate limiting for TLS offload.
 .It Cm nomap
 If the driver supports unmapped network buffers,
 enable them on the interface.

Modified: head/sbin/ifconfig/ifconfig.c
==============================================================================
--- head/sbin/ifconfig/ifconfig.c	Thu Oct 29 00:03:19 2020	(r367122)
+++ head/sbin/ifconfig/ifconfig.c	Thu Oct 29 00:23:16 2020	(r367123)
@@ -1345,7 +1345,7 @@ unsetifdescr(const char *val, int value, int s, const 
 "\10VLAN_HWCSUM\11TSO4\12TSO6\13LRO\14WOL_UCAST\15WOL_MCAST\16WOL_MAGIC" \
 "\17TOE4\20TOE6\21VLAN_HWFILTER\23VLAN_HWTSO\24LINKSTATE\25NETMAP" \
 "\26RXCSUM_IPV6\27TXCSUM_IPV6\31TXRTLMT\32HWRXTSTMP\33NOMAP\34TXTLS4\35TXTLS6" \
-"\36VXLAN_HWCSUM\37VXLAN_HWTSO"
+"\36VXLAN_HWCSUM\37VXLAN_HWTSO\40TXTLS_RTLMT"
 
 /*
  * Print the status of the interface.  If an address family was
@@ -1685,6 +1685,8 @@ static struct cmd basic_cmds[] = {
 	DEF_CMD("-wol_magic",	-IFCAP_WOL_MAGIC,	setifcap),
 	DEF_CMD("txrtlmt",	IFCAP_TXRTLMT,	setifcap),
 	DEF_CMD("-txrtlmt",	-IFCAP_TXRTLMT,	setifcap),
+	DEF_CMD("txtlsrtlmt",	IFCAP_TXTLS_RTLMT,	setifcap),
+	DEF_CMD("-txtlsrtlmt",	-IFCAP_TXTLS_RTLMT,	setifcap),
 	DEF_CMD("hwrxtstmp",	IFCAP_HWRXTSTMP,	setifcap),
 	DEF_CMD("-hwrxtstmp",	-IFCAP_HWRXTSTMP,	setifcap),
 	DEF_CMD("normal",	-IFF_LINK0,	setifflags),

Modified: head/sys/dev/mlx5/mlx5_en/mlx5_en_main.c
==============================================================================
--- head/sys/dev/mlx5/mlx5_en/mlx5_en_main.c	Thu Oct 29 00:03:19 2020	(r367122)
+++ head/sys/dev/mlx5/mlx5_en/mlx5_en_main.c	Thu Oct 29 00:23:16 2020	(r367123)
@@ -3349,6 +3349,10 @@ mlx5e_ioctl(struct ifnet *ifp, u_long command, caddr_t
 			ifp->if_capenable ^= IFCAP_TXTLS4;
 		if (mask & IFCAP_TXTLS6)
 			ifp->if_capenable ^= IFCAP_TXTLS6;
+#ifdef RATELIMIT
+		if (mask & IFCAP_TXTLS_RTLMT)
+			ifp->if_capenable ^= IFCAP_TXTLS_RTLMT;
+#endif
 		if (mask & IFCAP_RXCSUM)
 			ifp->if_capenable ^= IFCAP_RXCSUM;
 		if (mask & IFCAP_RXCSUM_IPV6)
@@ -4320,7 +4324,9 @@ mlx5e_create_ifp(struct mlx5_core_dev *mdev)
 	ifp->if_capabilities |= IFCAP_HWSTATS | IFCAP_HWRXTSTMP;
 	ifp->if_capabilities |= IFCAP_NOMAP;
 	ifp->if_capabilities |= IFCAP_TXTLS4 | IFCAP_TXTLS6;
-	ifp->if_capabilities |= IFCAP_TXRTLMT;
+#ifdef RATELIMIT
+	ifp->if_capabilities |= IFCAP_TXRTLMT | IFCAP_TXTLS_RTLMT;
+#endif
 	ifp->if_snd_tag_alloc = mlx5e_snd_tag_alloc;
 	ifp->if_snd_tag_free = mlx5e_snd_tag_free;
 	ifp->if_snd_tag_modify = mlx5e_snd_tag_modify;

Modified: head/sys/kern/uipc_ktls.c
==============================================================================
--- head/sys/kern/uipc_ktls.c	Thu Oct 29 00:03:19 2020	(r367122)
+++ head/sys/kern/uipc_ktls.c	Thu Oct 29 00:23:16 2020	(r367123)
@@ -814,12 +814,24 @@ ktls_alloc_snd_tag(struct inpcb *inp, struct ktls_sess
 	ifp = nh->nh_ifp;
 	if_ref(ifp);
 
-	params.hdr.type = IF_SND_TAG_TYPE_TLS;
+	/*
+	 * Allocate a TLS + ratelimit tag if the connection has an
+	 * existing pacing rate.
+	 */
+	if (tp->t_pacing_rate != -1 &&
+	    (ifp->if_capenable & IFCAP_TXTLS_RTLMT) != 0) {
+		params.hdr.type = IF_SND_TAG_TYPE_TLS_RATE_LIMIT;
+		params.tls_rate_limit.inp = inp;
+		params.tls_rate_limit.tls = tls;
+		params.tls_rate_limit.max_rate = tp->t_pacing_rate;
+	} else {
+		params.hdr.type = IF_SND_TAG_TYPE_TLS;
+		params.tls.inp = inp;
+		params.tls.tls = tls;
+	}
 	params.hdr.flowid = inp->inp_flowid;
 	params.hdr.flowtype = inp->inp_flowtype;
 	params.hdr.numa_domain = inp->inp_numa_domain;
-	params.tls.inp = inp;
-	params.tls.tls = tls;
 	INP_RUNLOCK(inp);
 
 	if (ifp->if_snd_tag_alloc == NULL) {
@@ -1034,6 +1046,7 @@ int
 ktls_enable_tx(struct socket *so, struct tls_enable *en)
 {
 	struct ktls_session *tls;
+	struct inpcb *inp;
 	int error;
 
 	if (!ktls_offload_enable)
@@ -1086,12 +1099,20 @@ ktls_enable_tx(struct socket *so, struct tls_enable *e
 		return (error);
 	}
 
+	/*
+	 * Write lock the INP when setting sb_tls_info so that
+	 * routines in tcp_ratelimit.c can read sb_tls_info while
+	 * holding the INP lock.
+	 */
+	inp = so->so_pcb;
+	INP_WLOCK(inp);
 	SOCKBUF_LOCK(&so->so_snd);
 	so->so_snd.sb_tls_seqno = be64dec(en->rec_seq);
 	so->so_snd.sb_tls_info = tls;
 	if (tls->mode != TCP_TLS_MODE_SW)
 		so->so_snd.sb_flags |= SB_TLS_IFNET;
 	SOCKBUF_UNLOCK(&so->so_snd);
+	INP_WUNLOCK(inp);
 	sbunlock(&so->so_snd);
 
 	counter_u64_add(ktls_offload_total, 1);
@@ -1344,6 +1365,42 @@ ktls_output_eagain(struct inpcb *inp, struct ktls_sess
 	mtx_pool_unlock(mtxpool_sleep, tls);
 	return (ENOBUFS);
 }
+
+#ifdef RATELIMIT
+int
+ktls_modify_txrtlmt(struct ktls_session *tls, uint64_t max_pacing_rate)
+{
+	union if_snd_tag_modify_params params = {
+		.rate_limit.max_rate = max_pacing_rate,
+		.rate_limit.flags = M_NOWAIT,
+	};
+	struct m_snd_tag *mst;
+	struct ifnet *ifp;
+	int error;
+
+	/* Can't get to the inp, but it should be locked. */
+	/* INP_LOCK_ASSERT(inp); */
+
+	MPASS(tls->mode == TCP_TLS_MODE_IFNET);
+
+	if (tls->snd_tag == NULL) {
+		/*
+		 * Resetting send tag, ignore this change.  The
+		 * pending reset may or may not see this updated rate
+		 * in the tcpcb.  If it doesn't, we will just lose
+		 * this rate change.
+		 */
+		return (0);
+	}
+
+	MPASS(tls->snd_tag != NULL);
+	MPASS(tls->snd_tag->type == IF_SND_TAG_TYPE_TLS_RATE_LIMIT);
+
+	mst = tls->snd_tag;
+	ifp = mst->ifp;
+	return (ifp->if_snd_tag_modify(mst, &params));
+}
+#endif
 #endif
 
 void

Modified: head/sys/net/if.h
==============================================================================
--- head/sys/net/if.h	Thu Oct 29 00:03:19 2020	(r367122)
+++ head/sys/net/if.h	Thu Oct 29 00:23:16 2020	(r367123)
@@ -250,6 +250,7 @@ struct if_data {
 #define	IFCAP_TXTLS6		0x10000000 /* can do TLS encryption and segmentation for TCP6 */
 #define	IFCAP_VXLAN_HWCSUM	0x20000000 /* can do IFCAN_HWCSUM on VXLANs */
 #define	IFCAP_VXLAN_HWTSO	0x40000000 /* can do IFCAP_TSO on VXLANs */
+#define	IFCAP_TXTLS_RTLMT	0x80000000 /* can do TLS with rate limiting */
 
 #define IFCAP_HWCSUM_IPV6	(IFCAP_RXCSUM_IPV6 | IFCAP_TXCSUM_IPV6)
 

Modified: head/sys/net/if_var.h
==============================================================================
--- head/sys/net/if_var.h	Thu Oct 29 00:03:19 2020	(r367122)
+++ head/sys/net/if_var.h	Thu Oct 29 00:23:16 2020	(r367123)
@@ -191,7 +191,8 @@ struct m_snd_tag;
 #define	IF_SND_TAG_TYPE_RATE_LIMIT 0
 #define	IF_SND_TAG_TYPE_UNLIMITED 1
 #define	IF_SND_TAG_TYPE_TLS 2
-#define	IF_SND_TAG_TYPE_MAX 3
+#define	IF_SND_TAG_TYPE_TLS_RATE_LIMIT 3
+#define	IF_SND_TAG_TYPE_MAX 4
 
 struct if_snd_tag_alloc_header {
 	uint32_t type;		/* send tag type, see IF_SND_TAG_XXX */
@@ -213,6 +214,13 @@ struct if_snd_tag_alloc_tls {
 	const struct ktls_session *tls;
 };
 
+struct if_snd_tag_alloc_tls_rate_limit {
+	struct if_snd_tag_alloc_header hdr;
+	struct inpcb *inp;
+	const struct ktls_session *tls;
+	uint64_t max_rate;	/* in bytes/s */
+};
+
 struct if_snd_tag_rate_limit_params {
 	uint64_t max_rate;	/* in bytes/s */
 	uint32_t queue_level;	/* 0 (empty) .. 65535 (full) */
@@ -226,16 +234,19 @@ union if_snd_tag_alloc_params {
 	struct if_snd_tag_alloc_rate_limit rate_limit;
 	struct if_snd_tag_alloc_rate_limit unlimited;
 	struct if_snd_tag_alloc_tls tls;
+	struct if_snd_tag_alloc_tls_rate_limit tls_rate_limit;
 };
 
 union if_snd_tag_modify_params {
 	struct if_snd_tag_rate_limit_params rate_limit;
 	struct if_snd_tag_rate_limit_params unlimited;
+	struct if_snd_tag_rate_limit_params tls_rate_limit;
 };
 
 union if_snd_tag_query_params {
 	struct if_snd_tag_rate_limit_params rate_limit;
 	struct if_snd_tag_rate_limit_params unlimited;
+	struct if_snd_tag_rate_limit_params tls_rate_limit;
 };
 
 /* Query return flags */

Modified: head/sys/net/if_vlan.c
==============================================================================
--- head/sys/net/if_vlan.c	Thu Oct 29 00:03:19 2020	(r367122)
+++ head/sys/net/if_vlan.c	Thu Oct 29 00:23:16 2020	(r367123)
@@ -1782,10 +1782,10 @@ vlan_capabilities(struct ifvlan *ifv)
 	 * this ever changes, then a new IFCAP_VLAN_TXTLS can be
 	 * defined.
 	 */
-	if (p->if_capabilities & IFCAP_TXTLS)
-		cap |= p->if_capabilities & IFCAP_TXTLS;
-	if (p->if_capenable & IFCAP_TXTLS)
-		ena |= mena & IFCAP_TXTLS;
+	if (p->if_capabilities & (IFCAP_TXTLS | IFCAP_TXTLS_RTLMT))
+		cap |= p->if_capabilities & (IFCAP_TXTLS | IFCAP_TXTLS_RTLMT);
+	if (p->if_capenable & (IFCAP_TXTLS | IFCAP_TXTLS_RTLMT))
+		ena |= mena & (IFCAP_TXTLS | IFCAP_TXTLS_RTLMT);
 
 	ifp->if_capabilities = cap;
 	ifp->if_capenable = ena;

Modified: head/sys/netinet/tcp_ratelimit.c
==============================================================================
--- head/sys/netinet/tcp_ratelimit.c	Thu Oct 29 00:03:19 2020	(r367122)
+++ head/sys/netinet/tcp_ratelimit.c	Thu Oct 29 00:23:16 2020	(r367123)
@@ -1219,6 +1219,9 @@ tcp_set_pacing_rate(struct tcpcb *tp, struct ifnet *if
     uint64_t bytes_per_sec, int flags, int *error)
 {
 	const struct tcp_hwrate_limit_table *rte;
+#ifdef KERN_TLS
+	struct ktls_session *tls;
+#endif
 
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
@@ -1233,17 +1236,30 @@ tcp_set_pacing_rate(struct tcpcb *tp, struct ifnet *if
 			return (NULL);
 		}
 #ifdef KERN_TLS
+		tls = NULL;
 		if (tp->t_inpcb->inp_socket->so_snd.sb_flags & SB_TLS_IFNET) {
+			tls = tp->t_inpcb->inp_socket->so_snd.sb_tls_info;
+
+			if ((ifp->if_capenable & IFCAP_TXTLS_RTLMT) == 0 ||
+			    tls->mode != TCP_TLS_MODE_IFNET) {
+				if (error)
+					*error = ENODEV;
+				return (NULL);
+			}
+		}
+#endif
+		rte = rt_setup_rate(tp->t_inpcb, ifp, bytes_per_sec, flags, error);
+#ifdef KERN_TLS
+		if (rte != NULL && tls != NULL && tls->snd_tag != NULL) {
 			/*
-			 * We currently can't do both TLS and hardware
-			 * pacing
+			 * Fake a route change error to reset the TLS
+			 * send tag.  This will convert the existing
+			 * tag to a TLS ratelimit tag.
 			 */
-			if (error)
-				*error = EINVAL;
-			return (NULL);
+			MPASS(tls->snd_tag->type == IF_SND_TAG_TYPE_TLS);
+			ktls_output_eagain(tp->t_inpcb, tls);
 		}
 #endif
-		rte = rt_setup_rate(tp->t_inpcb, ifp, bytes_per_sec, flags, error);
 	} else {
 		/*
 		 * We are modifying a rate, wrong interface?
@@ -1264,18 +1280,44 @@ tcp_chg_pacing_rate(const struct tcp_hwrate_limit_tabl
 {
 	const struct tcp_hwrate_limit_table *nrte;
 	const struct tcp_rate_set *rs;
+#ifdef KERN_TLS
+	struct ktls_session *tls = NULL;
+#endif
 	int is_indirect = 0;
 	int err;
 
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
-	if ((tp->t_inpcb->inp_snd_tag == NULL) ||
-	    (crte == NULL)) {
+	if (crte == NULL) {
 		/* Wrong interface */
 		if (error)
 			*error = EINVAL;
 		return (NULL);
 	}
+
+#ifdef KERN_TLS
+	if (tp->t_inpcb->inp_socket->so_snd.sb_flags & SB_TLS_IFNET) {
+		tls = tp->t_inpcb->inp_socket->so_snd.sb_tls_info;
+		MPASS(tls->mode == TCP_TLS_MODE_IFNET);
+		if (tls->snd_tag != NULL &&
+		    tls->snd_tag->type != IF_SND_TAG_TYPE_TLS_RATE_LIMIT) {
+			/*
+			 * NIC probably doesn't support ratelimit TLS
+			 * tags if it didn't allocate one when an
+			 * existing rate was present, so ignore.
+			 */
+			if (error)
+				*error = EOPNOTSUPP;
+			return (NULL);
+		}
+	}
+#endif
+	if (tp->t_inpcb->inp_snd_tag == NULL) {
+		/* Wrong interface */
+		if (error)
+			*error = EINVAL;
+		return (NULL);
+	}
 	rs = crte->ptbl;
 	if ((rs->rs_flags & RS_IS_DEAD) ||
 	    (crte->flags & HDWRPACE_IFPDEPARTED)) {
@@ -1327,7 +1369,12 @@ re_rate:
 		return (NULL);
 	}
 	/* Change rates to our new entry */
-	err = in_pcbmodify_txrtlmt(tp->t_inpcb, nrte->rate);
+#ifdef KERN_TLS
+	if (tls != NULL)
+		err = ktls_modify_txrtlmt(tls, nrte->rate);
+	else
+#endif
+		err = in_pcbmodify_txrtlmt(tp->t_inpcb, nrte->rate);
 	if (err) {
 		if (error)
 			*error = err;
@@ -1365,6 +1412,13 @@ tcp_rel_pacing_rate(const struct tcp_hwrate_limit_tabl
 			rs_defer_destroy(rs);
 		mtx_unlock(&rs_mtx);
 	}
+
+	/*
+	 * XXX: If this connection is using ifnet TLS, should we
+	 * switch it to using an unlimited rate, or perhaps use
+	 * ktls_output_eagain() to reset the send tag to a plain
+	 * TLS tag?
+	 */
 	in_pcbdetach_txrtlmt(tp->t_inpcb);
 }
 

Modified: head/sys/sys/ktls.h
==============================================================================
--- head/sys/sys/ktls.h	Thu Oct 29 00:03:19 2020	(r367122)
+++ head/sys/sys/ktls.h	Thu Oct 29 00:23:16 2020	(r367123)
@@ -222,6 +222,9 @@ int ktls_get_rx_mode(struct socket *so);
 int ktls_set_tx_mode(struct socket *so, int mode);
 int ktls_get_tx_mode(struct socket *so);
 int ktls_output_eagain(struct inpcb *inp, struct ktls_session *tls);
+#ifdef RATELIMIT
+int ktls_modify_txrtlmt(struct ktls_session *tls, uint64_t max_pacing_rate);
+#endif
 
 static inline struct ktls_session *
 ktls_hold(struct ktls_session *tls)


More information about the svn-src-head mailing list