svn commit: r271504 - in head/sys: dev/oce dev/vmware/vmxnet3 dev/xen/netfront net netinet ofed/drivers/net/mlx4

Hans Petter Selasky hselasky at FreeBSD.org
Sat Sep 13 08:26:12 UTC 2014


Author: hselasky
Date: Sat Sep 13 08:26:09 2014
New Revision: 271504
URL: http://svnweb.freebsd.org/changeset/base/271504

Log:
  Improve transmit sending offload, TSO, algorithm in general.
  
  The current TSO limitation feature only takes the total number of
  bytes in an mbuf chain into account and does not limit by the number
  of mbufs in a chain. Some kinds of hardware is limited by two
  factors. One is the fragment length and the second is the fragment
  count. Both of these limits need to be taken into account when doing
  TSO. Else some kinds of hardware might have to drop completely valid
  mbuf chains because they cannot loaded into the given hardware's DMA
  engine. The new way of doing TSO limitation has been made backwards
  compatible as input from other FreeBSD developers and will use
  defaults for values not set.
  
  MFC after:	1 week
  Sponsored by:	Mellanox Technologies

Modified:
  head/sys/dev/oce/oce_if.c
  head/sys/dev/oce/oce_if.h
  head/sys/dev/vmware/vmxnet3/if_vmx.c
  head/sys/dev/vmware/vmxnet3/if_vmxvar.h
  head/sys/dev/xen/netfront/netfront.c
  head/sys/net/if.c
  head/sys/net/if_lagg.c
  head/sys/net/if_var.h
  head/sys/net/if_vlan.c
  head/sys/netinet/tcp_output.c
  head/sys/ofed/drivers/net/mlx4/en_netdev.c

Modified: head/sys/dev/oce/oce_if.c
==============================================================================
--- head/sys/dev/oce/oce_if.c	Sat Sep 13 07:45:03 2014	(r271503)
+++ head/sys/dev/oce/oce_if.c	Sat Sep 13 08:26:09 2014	(r271504)
@@ -1731,7 +1731,10 @@ oce_attach_ifp(POCE_SOFTC sc)
 	sc->ifp->if_baudrate = IF_Gbps(10);
 
 #if __FreeBSD_version >= 1000000
-	sc->ifp->if_hw_tsomax = OCE_MAX_TSO_SIZE;
+	sc->ifp->if_hw_tsomax = IF_HW_TSOMAX_BUILD_VALUE(
+	    65535 - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN) /* bytes */,
+	    OCE_MAX_TX_ELEMENTS /* maximum frag count */,
+	    12 /* 4K frag size */);
 #endif
 
 	ether_ifattach(sc->ifp, sc->macaddr.mac_addr);

Modified: head/sys/dev/oce/oce_if.h
==============================================================================
--- head/sys/dev/oce/oce_if.h	Sat Sep 13 07:45:03 2014	(r271503)
+++ head/sys/dev/oce/oce_if.h	Sat Sep 13 08:26:09 2014	(r271504)
@@ -152,7 +152,6 @@ extern int mp_ncpus;			/* system's total
 #define OCE_MAX_TX_ELEMENTS		29
 #define OCE_MAX_TX_DESC			1024
 #define OCE_MAX_TX_SIZE			65535
-#define OCE_MAX_TSO_SIZE		(65535 - ETHER_HDR_LEN)
 #define OCE_MAX_RX_SIZE			4096
 #define OCE_MAX_RQ_POSTS		255
 #define OCE_DEFAULT_PROMISCUOUS		0

Modified: head/sys/dev/vmware/vmxnet3/if_vmx.c
==============================================================================
--- head/sys/dev/vmware/vmxnet3/if_vmx.c	Sat Sep 13 07:45:03 2014	(r271503)
+++ head/sys/dev/vmware/vmxnet3/if_vmx.c	Sat Sep 13 08:26:09 2014	(r271504)
@@ -1722,7 +1722,11 @@ vmxnet3_setup_interface(struct vmxnet3_s
 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
 	ifp->if_init = vmxnet3_init;
 	ifp->if_ioctl = vmxnet3_ioctl;
-	ifp->if_hw_tsomax = VMXNET3_TSO_MAXSIZE;
+
+	ifp->if_hw_tsomax = IF_HW_TSOMAX_BUILD_VALUE(
+	    65535 - sizeof(struct ether_vlan_header) /* bytes */,
+	    VMXNET3_TX_MAXSEGS /* maximum frag count */,
+	    VMXNET3_TX_MAXSEGSHIFT /* frag size */);
 
 #ifdef VMXNET3_LEGACY_TX
 	ifp->if_start = vmxnet3_start;

Modified: head/sys/dev/vmware/vmxnet3/if_vmxvar.h
==============================================================================
--- head/sys/dev/vmware/vmxnet3/if_vmxvar.h	Sat Sep 13 07:45:03 2014	(r271503)
+++ head/sys/dev/vmware/vmxnet3/if_vmxvar.h	Sat Sep 13 08:26:09 2014	(r271504)
@@ -277,14 +277,13 @@ struct vmxnet3_softc {
  */
 #define VMXNET3_TX_MAXSEGS		32
 #define VMXNET3_TX_MAXSIZE		(VMXNET3_TX_MAXSEGS * MCLBYTES)
-#define VMXNET3_TSO_MAXSIZE \
-    (VMXNET3_TX_MAXSIZE - sizeof(struct ether_vlan_header))
 
 /*
  * Maximum support Tx segments size. The length field in the
  * Tx descriptor is 14 bits.
  */
-#define VMXNET3_TX_MAXSEGSIZE		(1 << 14)
+#define VMXNET3_TX_MAXSEGSHIFT		14
+#define VMXNET3_TX_MAXSEGSIZE		(1 << VMXNET3_TX_MAXSEGSHIFT)
 
 /*
  * The maximum number of Rx segments we accept. When LRO is enabled,

Modified: head/sys/dev/xen/netfront/netfront.c
==============================================================================
--- head/sys/dev/xen/netfront/netfront.c	Sat Sep 13 07:45:03 2014	(r271503)
+++ head/sys/dev/xen/netfront/netfront.c	Sat Sep 13 08:26:09 2014	(r271504)
@@ -134,7 +134,6 @@ static const int MODPARM_rx_flip = 0;
  * to mirror the Linux MAX_SKB_FRAGS constant.
  */
 #define	MAX_TX_REQ_FRAGS (65536 / PAGE_SIZE + 2)
-#define	NF_TSO_MAXBURST ((IP_MAXPACKET / PAGE_SIZE) * MCLBYTES)
 
 #define RX_COPY_THRESHOLD 256
 
@@ -2102,7 +2101,10 @@ create_netdev(device_t dev)
 	
     	ifp->if_hwassist = XN_CSUM_FEATURES;
     	ifp->if_capabilities = IFCAP_HWCSUM;
-	ifp->if_hw_tsomax = NF_TSO_MAXBURST;
+	ifp->if_hw_tsomax = IF_HW_TSOMAX_BUILD_VALUE(
+	    65535 - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN) /* bytes */,
+	    MAX_TX_REQ_FRAGS /* maximum frag count */,
+	    PAGE_SHIFT /* PAGE_SIZE frag size */);
 	
     	ether_ifattach(ifp, np->mac);
     	callout_init(&np->xn_stat_ch, CALLOUT_MPSAFE);

Modified: head/sys/net/if.c
==============================================================================
--- head/sys/net/if.c	Sat Sep 13 07:45:03 2014	(r271503)
+++ head/sys/net/if.c	Sat Sep 13 08:26:09 2014	(r271504)
@@ -423,6 +423,52 @@ if_grow(void)
 }
 
 /*
+ * Compute the least common value of two "if_hw_tsomax" values:
+ */
+u_int
+if_hw_tsomax_common(u_int a, u_int b)
+{
+	u_int a_bytes = IF_HW_TSOMAX_GET_BYTES(a);
+	u_int a_frag_count = IF_HW_TSOMAX_GET_FRAG_COUNT(a);
+	u_int a_frag_size = IF_HW_TSOMAX_GET_FRAG_SIZE(a);
+	u_int b_bytes = IF_HW_TSOMAX_GET_BYTES(b);
+	u_int b_frag_count = IF_HW_TSOMAX_GET_FRAG_COUNT(b);
+	u_int b_frag_size = IF_HW_TSOMAX_GET_FRAG_SIZE(b);
+
+	return (IF_HW_TSOMAX_BUILD_VALUE(min(a_bytes, b_bytes),
+	    min(a_frag_count, b_frag_count),
+	    min(a_frag_size, b_frag_size)));
+}
+
+/*
+ * Range check the "if_hw_tsomax" value:
+ */
+u_int
+if_hw_tsomax_range_check(u_int a)
+{
+	u_int a_bytes = IF_HW_TSOMAX_GET_BYTES(a);
+	u_int a_frag_count = IF_HW_TSOMAX_GET_FRAG_COUNT(a);
+	u_int a_frag_size = IF_HW_TSOMAX_GET_FRAG_SIZE(a);
+
+	/* round down to nearest 4 bytes */
+	a_bytes &= 0xFFFC;
+
+	/* use default, if zero */
+	if (a_bytes == 0)
+		a_bytes = IF_HW_TSOMAX_DEFAULT_BYTES;
+
+	/* use default, if zero */
+	if (a_frag_count == 0)
+		a_frag_count = IF_HW_TSOMAX_DEFAULT_FRAG_COUNT;
+
+	/* use default, if zero */
+	if (a_frag_size == 0)
+		a_frag_size = IF_HW_TSOMAX_DEFAULT_FRAG_SIZE;
+
+	return (IF_HW_TSOMAX_BUILD_VALUE(a_bytes, a_frag_count, a_frag_size));
+}
+
+/*
  * Allocate a struct ifnet and an index for an interface.  A layer 2
  * common structure will also be allocated if an allocation routine is
  * registered for the passed type.
@@ -445,6 +491,7 @@ if_alloc(u_char type)
 	ifp->if_index = idx;
 	ifp->if_type = type;
 	ifp->if_alloctype = type;
+	ifp->if_hw_tsomax = IF_HW_TSOMAX_DEFAULT_VALUE();
 	if (if_com_alloc[type] != NULL) {
 		ifp->if_l2com = if_com_alloc[type](type, ifp);
 		if (ifp->if_l2com == NULL) {
@@ -657,16 +704,9 @@ if_attach_internal(struct ifnet *ifp, in
 		TAILQ_INSERT_HEAD(&ifp->if_addrhead, ifa, ifa_link);
 		/* Reliably crash if used uninitialized. */
 		ifp->if_broadcastaddr = NULL;
-
-#if defined(INET) || defined(INET6)
-		/* Initialize to max value. */
-		if (ifp->if_hw_tsomax == 0)
-			ifp->if_hw_tsomax = min(IP_MAXPACKET, 32 * MCLBYTES -
-			    (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN));
-		KASSERT(ifp->if_hw_tsomax <= IP_MAXPACKET &&
-		    ifp->if_hw_tsomax >= IP_MAXPACKET / 8,
-		    ("%s: tsomax outside of range", __func__));
-#endif
+		/* range check TSO value */
+		ifp->if_hw_tsomax =
+		    if_hw_tsomax_range_check(ifp->if_hw_tsomax);
 	}
 #ifdef VIMAGE
 	else {

Modified: head/sys/net/if_lagg.c
==============================================================================
--- head/sys/net/if_lagg.c	Sat Sep 13 07:45:03 2014	(r271503)
+++ head/sys/net/if_lagg.c	Sat Sep 13 08:26:09 2014	(r271504)
@@ -445,11 +445,7 @@ lagg_capabilities(struct lagg_softc *sc)
 	struct lagg_port *lp;
 	int cap = ~0, ena = ~0;
 	u_long hwa = ~0UL;
-#if defined(INET) || defined(INET6)
-	u_int hw_tsomax = IP_MAXPACKET;	/* Initialize to the maximum value. */
-#else
-	u_int hw_tsomax = ~0;	/* if_hw_tsomax is only for INET/INET6, but.. */
-#endif
+	u_int hw_tsomax = IF_HW_TSOMAX_DEFAULT_VALUE();
 
 	LAGG_WLOCK_ASSERT(sc);
 
@@ -458,10 +454,9 @@ lagg_capabilities(struct lagg_softc *sc)
 		cap &= lp->lp_ifp->if_capabilities;
 		ena &= lp->lp_ifp->if_capenable;
 		hwa &= lp->lp_ifp->if_hwassist;
-		/* Set to the minimum value of the lagg ports. */
-		if (lp->lp_ifp->if_hw_tsomax < hw_tsomax &&
-		    lp->lp_ifp->if_hw_tsomax > 0)
-			hw_tsomax = lp->lp_ifp->if_hw_tsomax;
+		/* Set to the common value of the lagg ports. */
+		hw_tsomax = if_hw_tsomax_common(hw_tsomax,
+		    lp->lp_ifp->if_hw_tsomax);
 	}
 	cap = (cap == ~0 ? 0 : cap);
 	ena = (ena == ~0 ? 0 : ena);

Modified: head/sys/net/if_var.h
==============================================================================
--- head/sys/net/if_var.h	Sat Sep 13 07:45:03 2014	(r271503)
+++ head/sys/net/if_var.h	Sat Sep 13 08:26:09 2014	(r271504)
@@ -120,6 +120,43 @@ typedef int (*if_transmit_fn_t)(if_t, st
 typedef	uint64_t (*if_get_counter_t)(if_t, ifnet_counter);
 
 /*
+ * Macros defining how to decode the "if_hw_tsomax" field:
+ */
+#define	IF_HW_TSOMAX_GET_BYTES(x)		\
+    ((uint16_t)(x))	/* 32..65535 */
+
+#define	IF_HW_TSOMAX_GET_FRAG_COUNT(x)		\
+    ((uint8_t)((x) >> 16))	/* 1..255 */
+
+#define	IF_HW_TSOMAX_GET_FRAG_SIZE(x)		\
+    ((uint8_t)((x) >> 24))	/* 12..16 */
+
+/*
+ * The following macro defines how to build the "if_hw_tsomax"
+ * field. The "bytes" field has unit 1 bytes and declares the maximum
+ * number of bytes which can be transferred by a single transmit
+ * offload, TSO, job. The "bytes" field is rounded down to the neares
+ * 4 bytes to avoid having the hardware do unaligned memory
+ * accesses. The "frag_count" field has unit 1 fragment and declares
+ * the maximum number of fragments a TSO job can contain. The
+ * "frag_size" field has unit logarithm in base 2 of the actual value
+ * in bytes and declares the maximum size of a fragment.
+ */
+#define	IF_HW_TSOMAX_BUILD_VALUE(bytes, frag_count, frag_size)	\
+    (((bytes) & 0xFFFC) | (((frag_count) & 0xFF) << 16) |	\
+    (((frag_size) & 0xFF) << 24))
+
+#define	IF_HW_TSOMAX_DEFAULT_BYTES (65536 - 4)
+#define	IF_HW_TSOMAX_DEFAULT_FRAG_COUNT 255
+#define	IF_HW_TSOMAX_DEFAULT_FRAG_SIZE 16
+
+#define	IF_HW_TSOMAX_DEFAULT_VALUE()		\
+    IF_HW_TSOMAX_BUILD_VALUE(			\
+    IF_HW_TSOMAX_DEFAULT_BYTES,			\
+    IF_HW_TSOMAX_DEFAULT_FRAG_COUNT,		\
+    IF_HW_TSOMAX_DEFAULT_FRAG_SIZE)
+
+/*
  * Structure defining a network interface.
  *
  * Size ILP32:  592 (approx)
@@ -222,8 +259,7 @@ struct ifnet {
 	if_get_counter_t if_get_counter; /* get counter values */
 
 	/* Stuff that's only temporary and doesn't belong here. */
-	u_int	if_hw_tsomax;		/* tso burst length limit, the minimum
-					 * is (IP_MAXPACKET / 8).
+	u_int	if_hw_tsomax;		/* TSO burst length limits.
 					 * XXXAO: Have to find a better place
 					 * for it eventually. */
 	/*
@@ -608,6 +644,10 @@ void if_setioctlfn(if_t ifp, int (*)(if_
 void if_setstartfn(if_t ifp, void (*)(if_t));
 void if_settransmitfn(if_t ifp, if_transmit_fn_t);
 void if_setqflushfn(if_t ifp, if_qflush_fn_t);
+
+/* "if_hw_tsomax" related functions */
+u_int if_hw_tsomax_common(u_int, u_int);
+u_int if_hw_tsomax_range_check(u_int);
  
 /* Revisit the below. These are inline functions originally */
 int drbr_inuse_drv(if_t ifp, struct buf_ring *br);

Modified: head/sys/net/if_vlan.c
==============================================================================
--- head/sys/net/if_vlan.c	Sat Sep 13 07:45:03 2014	(r271503)
+++ head/sys/net/if_vlan.c	Sat Sep 13 08:26:09 2014	(r271504)
@@ -1511,8 +1511,8 @@ vlan_capabilities(struct ifvlan *ifv)
 	 * propagate the hardware-assisted flag. TSO on VLANs
 	 * does not necessarily require hardware VLAN tagging.
 	 */
-	if (p->if_hw_tsomax > 0)
-		ifp->if_hw_tsomax = p->if_hw_tsomax;
+	ifp->if_hw_tsomax = if_hw_tsomax_common(ifp->if_hw_tsomax,
+	    p->if_hw_tsomax);
 	if (p->if_capabilities & IFCAP_VLAN_HWTSO)
 		ifp->if_capabilities |= p->if_capabilities & IFCAP_TSO;
 	if (p->if_capenable & IFCAP_VLAN_HWTSO) {

Modified: head/sys/netinet/tcp_output.c
==============================================================================
--- head/sys/netinet/tcp_output.c	Sat Sep 13 07:45:03 2014	(r271503)
+++ head/sys/netinet/tcp_output.c	Sat Sep 13 08:26:09 2014	(r271504)
@@ -52,6 +52,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/sysctl.h>
 
 #include <net/if.h>
+#include <net/if_var.h>
 #include <net/route.h>
 #include <net/vnet.h>
 
@@ -767,18 +768,88 @@ send:
 		flags &= ~TH_FIN;
 
 		if (tso) {
+			u_int if_hw_tsomax_bytes;
+			u_int if_hw_tsomax_frag_count;
+			u_int if_hw_tsomax_frag_size;
+			struct mbuf *mb;
+			u_int moff;
+			int max_len;
+
+			/* extract TSO information */
+			if_hw_tsomax_bytes =
+			    IF_HW_TSOMAX_GET_BYTES(tp->t_tsomax);
+			if_hw_tsomax_frag_count =
+			    IF_HW_TSOMAX_GET_FRAG_COUNT(tp->t_tsomax);
+			if_hw_tsomax_frag_size =
+			    IF_HW_TSOMAX_GET_FRAG_SIZE(tp->t_tsomax);
+
+			/* compute maximum TSO length */
+			max_len = (if_hw_tsomax_bytes - hdrlen);
+
+			/* clamp maximum length value */
+			if (max_len > IP_MAXPACKET)
+				max_len = IP_MAXPACKET;
+			else if (max_len < 0)
+				max_len = 0;
+
+			/* get smallest length */
+			if (len > (u_int)max_len) {
+				if (max_len != 0)
+					sendalot = 1;
+				len = (u_int)max_len;
+			}
+
 			KASSERT(ipoptlen == 0,
 			    ("%s: TSO can't do IP options", __func__));
 
+			max_len = 0;
+			mb = sbsndptr(&so->so_snd, off, len, &moff);
+
+			/* now make sure the number of fragments fit too */
+			while (mb != NULL && (u_int)max_len < len) {
+				u_int cur_length;
+				u_int cur_frags;
+
+				/*
+				 * Get length of mbuf fragment and how
+				 * many hardware frags, rounded up, it
+				 * would use:
+				 */
+				cur_length = (mb->m_len - moff);
+				cur_frags = (cur_length +
+				    (1 << if_hw_tsomax_frag_size) - 1) >>
+				    if_hw_tsomax_frag_size;
+
+				/* Handle special case: Zero Length Mbuf */
+				if (cur_frags == 0)
+					cur_frags = 1;
+
+				/*
+				 * Check if the fragment limit will be
+				 * reached or exceeded:
+				 */
+				if (cur_frags >= if_hw_tsomax_frag_count) {
+					max_len += min(cur_length,
+					    if_hw_tsomax_frag_count <<
+					    if_hw_tsomax_frag_size);
+					break;
+				}
+				max_len += cur_length;
+				if_hw_tsomax_frag_count -= cur_frags;
+				moff = 0;
+				mb = mb->m_next;
+			}
+
 			/*
 			 * Limit a burst to t_tsomax minus IP,
 			 * TCP and options length to keep ip->ip_len
 			 * from overflowing or exceeding the maximum
 			 * length allowed by the network interface.
 			 */
-			if (len > tp->t_tsomax - hdrlen) {
-				len = tp->t_tsomax - hdrlen;
-				sendalot = 1;
+			if (len > (u_int)max_len) {
+				if (max_len != 0)
+					sendalot = 1;
+				len = (u_int)max_len;
 			}
 
 			/*

Modified: head/sys/ofed/drivers/net/mlx4/en_netdev.c
==============================================================================
--- head/sys/ofed/drivers/net/mlx4/en_netdev.c	Sat Sep 13 07:45:03 2014	(r271503)
+++ head/sys/ofed/drivers/net/mlx4/en_netdev.c	Sat Sep 13 08:26:09 2014	(r271504)
@@ -673,6 +673,12 @@ int mlx4_en_do_start_port(struct net_dev
 	else
 		priv->rx_csum = 0;
 
+	/* set TSO limits so that we don't have to drop TX packets */
+	dev->if_hw_tsomax = IF_HW_TSOMAX_BUILD_VALUE(
+	    65535 - sizeof(struct ether_vlan_header) /* bytes */,
+	    16 /* maximum frag count */,
+	    16 /* can do up to 4GByte */);
+
 	err = mlx4_wol_read(priv->mdev->dev, &config, priv->port);
 	if (err) {
 		en_err(priv, "Failed to get WoL info, unable to modify\n");


More information about the svn-src-all mailing list