[please review] TSO mbuf chain length limiting patch
    Colin Percival 
    cperciva at freebsd.org
       
    Sun Jun  3 20:59:38 UTC 2012
    
    
  
On 06/03/12 12:05, Andrew Gallatin wrote:
> On 06/03/12 12:51, Colin Percival wrote:
>> I've attached a new patch which:
>> 1. adds a IFCAP_TSO_MSS "capability" and a if_tx_tso_mss field to struct ifnet,
>> 2. sets these in netfront when the IFCAP_TSO4 flag is set,
>> 3. extends tcp_maxmtu to read this value,
>> 4. adds a tx_tso_mss field to struct tcpcb,
>> 5. makes tcp_mss_update set tx_tso_mss using tcp_maxmtu, and
>> 6. limits TSO lengths to tx_tso_mss in tcp_output.
> 
> Looks good to me.  I don't pretend to understand the bowels of
> the TCP stack, so I can't comment on the "sendalot" stuff to
> force segmentation.
The "sendalot" bit is rather confusing, and I'm not at all certain
that it's doing what it's supposed to be doing, but my reading of
the code is that it really means "there's more data buffered than
what we're sending right now so after we issue this write we need
to go back to the top and do another".
> I assume it works as well as your
> previous patch to solve the problems you were seeing with Xen?
Yes.
> One minor nit is that I envision this limit to always be
> 64K or less, so you could probably get away with stealing
> 16 bits from ifcspare.
With IPv6 I could imagine larger limits happening, so I figured
it was better to take one of the ints.
> The other trivial nit is that I would have made these new
> if_tx_tso_mss and  t_tx_tso_mss fields unsigned.
Oops, quite right.  I was trying to make sure I didn't break ABI,
but of course int and u_int should have the same size no matter
what platform we're on.
> Thanks so much for doing this!
Thanks for reviewing.  Updated patch attached with s/int/u_int/ and
some other minor cleanups.
Can anyone review this for ABI safety?
-- 
Colin Percival
Security Officer Emeritus, FreeBSD | The power to serve
Founder, Tarsnap | www.tarsnap.com | Online backups for the truly paranoid
-------------- next part --------------
Index: netinet/tcp_input.c
===================================================================
--- netinet/tcp_input.c	(revision 235780)
+++ netinet/tcp_input.c	(working copy)
@@ -3298,6 +3298,7 @@
 {
 	int mss = 0;
 	u_long maxmtu = 0;
+	u_int tx_tso_mss = 0;
 	struct inpcb *inp = tp->t_inpcb;
 	struct hc_metrics_lite metrics;
 	int origoffer;
@@ -3321,8 +3322,10 @@
 	/* Initialize. */
 #ifdef INET6
 	if (isipv6) {
-		maxmtu = tcp_maxmtu6(&inp->inp_inc, mtuflags);
+		maxmtu = tcp_maxmtu6_ext(&inp->inp_inc, mtuflags,
+		    &tx_tso_mss);
 		tp->t_maxopd = tp->t_maxseg = V_tcp_v6mssdflt;
+		tp->t_tx_tso_mss = tx_tso_mss;
 	}
 #endif
 #if defined(INET) && defined(INET6)
@@ -3330,8 +3333,10 @@
 #endif
 #ifdef INET
 	{
-		maxmtu = tcp_maxmtu(&inp->inp_inc, mtuflags);
+		maxmtu = tcp_maxmtu_ext(&inp->inp_inc, mtuflags,
+		    &tx_tso_mss);
 		tp->t_maxopd = tp->t_maxseg = V_tcp_mssdflt;
+		tp->t_tx_tso_mss = tx_tso_mss;
 	}
 #endif
 
Index: netinet/tcp_subr.c
===================================================================
--- netinet/tcp_subr.c	(revision 235780)
+++ netinet/tcp_subr.c	(working copy)
@@ -1708,7 +1708,7 @@
  * tcp_mss_update to get the peer/interface MTU.
  */
 u_long
-tcp_maxmtu(struct in_conninfo *inc, int *flags)
+tcp_maxmtu_ext(struct in_conninfo *inc, int *flags, u_int * tx_tso_mss)
 {
 	struct route sro;
 	struct sockaddr_in *dst;
@@ -1738,15 +1738,26 @@
 			    ifp->if_hwassist & CSUM_TSO)
 				*flags |= CSUM_TSO;
 		}
+		if (tx_tso_mss != NULL) {
+			if (ifp->if_capabilities & IFCAP_TSO_MSS)
+				*tx_tso_mss = ifp->if_tx_tso_mss;
+		}
 		RTFREE(sro.ro_rt);
 	}
 	return (maxmtu);
 }
+
+u_long
+tcp_maxmtu(struct in_conninfo *inc, int *flags)
+{
+
+	return (tcp_maxmtu_ext(inc, flags, NULL));
+}
 #endif /* INET */
 
 #ifdef INET6
 u_long
-tcp_maxmtu6(struct in_conninfo *inc, int *flags)
+tcp_maxmtu6_ext(struct in_conninfo *inc, int *flags, u_int * tx_tso_mss)
 {
 	struct route_in6 sro6;
 	struct ifnet *ifp;
@@ -1775,11 +1786,22 @@
 			    ifp->if_hwassist & CSUM_TSO)
 				*flags |= CSUM_TSO;
 		}
+		if (tx_tso_mss != NULL) {
+			if (ifp->if_capabilities & IFCAP_TSO_MSS)
+				*tx_tso_mss = ifp->if_tx_tso_mss;
+		}
 		RTFREE(sro6.ro_rt);
 	}
 
 	return (maxmtu);
 }
+
+u_long
+tcp_maxmtu6(struct in_conninfo *inc, int *flags)
+{
+
+	return (tcp_maxmtu6_ext(inc, flags, NULL));
+}
 #endif /* INET6 */
 
 #ifdef IPSEC
Index: netinet/tcp_var.h
===================================================================
--- netinet/tcp_var.h	(revision 235780)
+++ netinet/tcp_var.h	(working copy)
@@ -208,7 +208,9 @@
 	u_int	t_keepintvl;		/* interval between keepalives */
 	u_int	t_keepcnt;		/* number of keepalives before close */
 
-	uint32_t t_ispare[8];		/* 5 UTO, 3 TBD */
+	uint32_t t_ispare[5];		/* 5 UTO */
+	uint32_t t_tx_tso_mss;		/* max segment size for TSO offload */
+	uint32_t t_ispare2[2];		/* 2 TBD */
 	void	*t_pspare2[4];		/* 4 TBD */
 	uint64_t _pad[6];		/* 6 TBD (1-2 CC/RTT?) */
 };
@@ -674,7 +676,9 @@
 #endif
 void	 tcp_input(struct mbuf *, int);
 u_long	 tcp_maxmtu(struct in_conninfo *, int *);
+u_long	 tcp_maxmtu_ext(struct in_conninfo *, int *, u_int *);
 u_long	 tcp_maxmtu6(struct in_conninfo *, int *);
+u_long	 tcp_maxmtu6_ext(struct in_conninfo *, int *, u_int *);
 void	 tcp_mss_update(struct tcpcb *, int, int, struct hc_metrics_lite *,
 	    int *);
 void	 tcp_mss(struct tcpcb *, int);
Index: netinet/tcp_output.c
===================================================================
--- netinet/tcp_output.c	(revision 235780)
+++ netinet/tcp_output.c	(working copy)
@@ -748,6 +748,15 @@
 			}
 
 			/*
+			 * Some drivers want an even shorter limit to the
+			 * length sent via TSO; respect their wishes.
+			 */
+			if (tp->t_tx_tso_mss != 0 && len > tp->t_tx_tso_mss) {
+				len = tp->t_tx_tso_mss;
+				sendalot = 1;
+			}
+
+			/*
 			 * Prevent the last segment from being
 			 * fractional unless the send sockbuf can
 			 * be emptied.
Index: dev/xen/netfront/netfront.c
===================================================================
--- dev/xen/netfront/netfront.c	(revision 235780)
+++ dev/xen/netfront/netfront.c	(working copy)
@@ -135,6 +135,7 @@
  * to mirror the Linux MAX_SKB_FRAGS constant.
  */
 #define	MAX_TX_REQ_FRAGS (65536 / PAGE_SIZE + 2)
+#define	TX_TSO_MSS (MAX_TX_REQ_FRAGS - 2) * MCLBYTES
 
 #define RX_COPY_THRESHOLD 256
 
@@ -2040,6 +2041,9 @@
 	if (val) {
 		np->xn_ifp->if_capabilities |= IFCAP_TSO4|IFCAP_LRO;
 		printf(" feature-gso-tcp4");
+
+		np->xn_ifp->if_capabilities |= IFCAP_TSO_MSS;
+		np->xn_ifp->if_tx_tso_mss = TX_TSO_MSS;
 	}
 
 	printf("\n");
Index: net/if.h
===================================================================
--- net/if.h	(revision 235780)
+++ net/if.h	(working copy)
@@ -230,6 +230,7 @@
 #define	IFCAP_VLAN_HWTSO	0x40000 /* can do IFCAP_TSO on VLANs */
 #define	IFCAP_LINKSTATE		0x80000 /* the runtime link state is dynamic */
 #define	IFCAP_NETMAP		0x100000 /* netmap mode supported/enabled */
+#define	IFCAP_TSO_MSS		0x200000 /* TSO size is limited */
 
 #define IFCAP_HWCSUM	(IFCAP_RXCSUM | IFCAP_TXCSUM)
 #define	IFCAP_TSO	(IFCAP_TSO4 | IFCAP_TSO6)
Index: net/if_var.h
===================================================================
--- net/if_var.h	(revision 235780)
+++ net/if_var.h	(working copy)
@@ -205,7 +205,8 @@
 	 * be used with care where binary compatibility is required.
 	 */
 	char	if_cspare[3];
-	int	if_ispare[4];
+	u_int	if_tx_tso_mss;		/* if IFCAP_TSO_MSS */
+	int	if_ispare[3];
 	void	*if_pspare[8];		/* 1 netmap, 7 TDB */
 };
 
    
    
More information about the freebsd-net
mailing list