PERFORCE change 105799 for review

Matt Jacob mjacob at FreeBSD.org
Thu Sep 7 16:55:52 UTC 2006


http://perforce.freebsd.org/chv.cgi?CH=105799

Change 105799 by mjacob at newisp on 2006/09/07 16:55:22

	IFC.

Affected files ...

.. //depot/projects/newisp/amd64/amd64/machdep.c#2 integrate
.. //depot/projects/newisp/i386/i386/machdep.c#2 integrate
.. //depot/projects/newisp/netinet/tcp_input.c#3 integrate
.. //depot/projects/newisp/netinet/tcp_output.c#2 integrate
.. //depot/projects/newisp/netinet/tcp_subr.c#4 integrate
.. //depot/projects/newisp/netinet/tcp_timer.c#3 integrate
.. //depot/projects/newisp/netinet/tcp_timer.h#2 integrate
.. //depot/projects/newisp/netinet/tcp_var.h#3 integrate

Differences ...

==== //depot/projects/newisp/amd64/amd64/machdep.c#2 (text+ko) ====

@@ -39,7 +39,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/amd64/amd64/machdep.c,v 1.651 2006/07/27 19:47:22 jhb Exp $");
+__FBSDID("$FreeBSD: src/sys/amd64/amd64/machdep.c,v 1.652 2006/09/07 15:03:02 jhb Exp $");
 
 #include "opt_atalk.h"
 #include "opt_atpic.h"
@@ -160,8 +160,10 @@
 long Maxmem = 0;
 long realmem = 0;
 
-vm_paddr_t phys_avail[20];
-vm_paddr_t dump_avail[20];
+#define PHYSMAP_SIZE	(2 * 30)
+
+vm_paddr_t phys_avail[PHYSMAP_SIZE + 2];
+vm_paddr_t dump_avail[PHYSMAP_SIZE + 2];
 
 /* must be 2 less so 0 0 can signal end of chunks */
 #define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(phys_avail[0])) - 2)
@@ -832,8 +834,6 @@
 }
 #endif
 
-#define PHYSMAP_SIZE	(2 * 20)
-
 u_int basemem;
 
 /*

==== //depot/projects/newisp/i386/i386/machdep.c#2 (text+ko) ====

@@ -38,7 +38,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/i386/i386/machdep.c,v 1.633 2006/08/09 23:37:30 imp Exp $");
+__FBSDID("$FreeBSD: src/sys/i386/i386/machdep.c,v 1.634 2006/09/07 15:03:02 jhb Exp $");
 
 #include "opt_apic.h"
 #include "opt_atalk.h"
@@ -188,8 +188,10 @@
 long Maxmem = 0;
 long realmem = 0;
 
-vm_paddr_t phys_avail[10];
-vm_paddr_t dump_avail[10];
+#define PHYSMAP_SIZE	(2 * 16)
+
+vm_paddr_t phys_avail[PHYSMAP_SIZE + 2];
+vm_paddr_t dump_avail[PHYSMAP_SIZE + 2];
 
 /* must be 2 less so 0 0 can signal end of chunks */
 #define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(phys_avail[0])) - 2)
@@ -1614,8 +1616,6 @@
 	ssd->ssd_gran  = sd->sd_gran;
 }
 
-#define PHYSMAP_SIZE	(2 * 8)
-
 /*
  * Populate the (physmap) array with base/bound pairs describing the
  * available physical memory in the system, then test this memory and

==== //depot/projects/newisp/netinet/tcp_input.c#3 (text+ko) ====

@@ -27,7 +27,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)tcp_input.c	8.12 (Berkeley) 5/24/95
- * $FreeBSD: src/sys/netinet/tcp_input.c,v 1.305 2006/09/06 21:51:58 andre Exp $
+ * $FreeBSD: src/sys/netinet/tcp_input.c,v 1.306 2006/09/07 13:06:00 ru Exp $
  */
 
 #include "opt_ipfw.h"		/* for ipfw_fwd		*/
@@ -3187,7 +3187,7 @@
 	const int isipv6 = 0;
 #endif
 
-	/* tcbinfo lock required for tcp_twclose(), tcp_2msl_reset. */
+	/* tcbinfo lock required for tcp_twclose(), tcp_timer_2msl_reset(). */
 	INP_INFO_WLOCK_ASSERT(&tcbinfo);
 	INP_LOCK_ASSERT(inp);
 
@@ -3256,7 +3256,7 @@
 	if (thflags & TH_FIN) {
 		seq = th->th_seq + tlen + (thflags & TH_SYN ? 1 : 0);
 		if (seq + 1 == tw->rcv_nxt)
-			tcp_timer_2msl_reset(tw, 2 * tcp_msl, 1);
+			tcp_timer_2msl_reset(tw, 1);
 	}
 
 	/*

==== //depot/projects/newisp/netinet/tcp_output.c#2 (text+ko) ====

@@ -27,7 +27,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)tcp_output.c	8.4 (Berkeley) 5/24/95
- * $FreeBSD: src/sys/netinet/tcp_output.c,v 1.115 2006/02/23 21:14:34 qingli Exp $
+ * $FreeBSD: src/sys/netinet/tcp_output.c,v 1.116 2006/09/07 12:53:01 andre Exp $
  */
 
 #include "opt_inet.h"
@@ -105,6 +105,10 @@
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, newreno, CTLFLAG_RW, &tcp_do_newreno,
 	0, "Enable NewReno Algorithms");
 
+int	tcp_do_tso = 1;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, tso, CTLFLAG_RW,
+	&tcp_do_tso, 0, "Enable TCP Segmentation Offload");
+
 /*
  * Tcp output routine: figure out what should be sent and send it.
  */
@@ -127,6 +131,7 @@
 	int i, sack_rxmit;
 	int sack_bytes_rxmt;
 	struct sackhole *p;
+	int tso = 0;
 #if 0
 	int maxburst = TCP_MAXBURST;
 #endif
@@ -376,12 +381,34 @@
 
 	/*
 	 * len will be >= 0 after this point.  Truncate to the maximum
-	 * segment length and ensure that FIN is removed if the length
-	 * no longer contains the last data byte.
+	 * segment length or enable TCP Segmentation Offloading (if supported
+	 * by hardware) and ensure that FIN is removed if the length no longer
+	 * contains the last data byte.
+	 *
+	 * TSO may only be used if we are in a pure bulk sending state.  The
+	 * presence of TCP-MD5, SACK retransmits, SACK advertizements and
+	 * IP options prevent using TSO.  With TSO the TCP header is the same
+	 * (except for the sequence number) for all generated packets.  This
+	 * makes it impossible to transmit any options which vary per generated
+	 * segment or packet.
+	 *
+	 * The length of TSO bursts is limited to TCP_MAXWIN.  That limit and
+	 * removal of FIN (if not already catched here) are handled later after
+	 * the exact length of the TCP options are known.
 	 */
 	if (len > tp->t_maxseg) {
-		len = tp->t_maxseg;
-		sendalot = 1;
+		if ((tp->t_flags & TF_TSO) && tcp_do_tso &&
+		    ((tp->t_flags & TF_SIGNATURE) == 0) &&
+		    tp->rcv_numsacks == 0 && sack_rxmit == 0 &&
+		    tp->t_inpcb->inp_options == NULL &&
+		    tp->t_inpcb->in6p_options == NULL &&
+		    tp->t_inpcb->inp_sp == NULL) {
+			tso = 1;
+		} else {
+			len = tp->t_maxseg;
+			sendalot = 1;
+			tso = 0;
+		}
 	}
 	if (sack_rxmit) {
 		if (SEQ_LT(p->rxmit + len, tp->snd_una + so->so_snd.sb_cc))
@@ -397,7 +424,7 @@
 	 * Sender silly window avoidance.   We transmit under the following
 	 * conditions when len is non-zero:
 	 *
-	 *	- We have a full segment
+	 *	- We have a full segment (or more with TSO)
 	 *	- This is the last buffer in a write()/send() and we are
 	 *	  either idle or running NODELAY
 	 *	- we've timed out (e.g. persist timer)
@@ -406,7 +433,7 @@
 	 *	- we need to retransmit
 	 */
 	if (len) {
-		if (len == tp->t_maxseg)
+		if (len >= tp->t_maxseg)
 			goto send;
 		/*
 		 * NOTE! on localhost connections an 'ack' from the remote
@@ -702,14 +729,24 @@
 	 * bump the packet length beyond the t_maxopd length.
 	 * Clear the FIN bit because we cut off the tail of
 	 * the segment.
+	 *
+	 * When doing TSO limit a burst to TCP_MAXWIN and set the
+	 * flag to continue sending and prevent the last segment
+	 * from being fractional thus making them all equal sized.
 	 */
 	if (len + optlen + ipoptlen > tp->t_maxopd) {
-		/*
-		 * If there is still more to send, don't close the connection.
-		 */
 		flags &= ~TH_FIN;
-		len = tp->t_maxopd - optlen - ipoptlen;
-		sendalot = 1;
+		if (tso) {
+			if (len > TCP_MAXWIN) {
+				len = TCP_MAXWIN - TCP_MAXWIN %
+					(tp->t_maxopd - optlen);
+				sendalot = 1;
+			} else if (tp->t_flags & TF_NEEDFIN)
+				sendalot = 1;
+		} else {
+			len = tp->t_maxopd - optlen - ipoptlen;
+			sendalot = 1;
+		}
 	}
 
 /*#ifdef DIAGNOSTIC*/
@@ -947,6 +984,16 @@
 	}
 
 	/*
+	 * Enable TSO and specify the size of the segments.
+	 * The TCP pseudo header checksum is always provided.
+	 * XXX: Fixme: This is currently not the case for IPv6.
+	 */
+	if (tso) {
+		m->m_pkthdr.csum_flags = CSUM_TSO;
+		m->m_pkthdr.tso_segsz = tp->t_maxopd - optlen;
+	}
+
+	/*
 	 * In transmit state, time the transmission and arrange for
 	 * the retransmit.  In persist state, just set snd_max.
 	 */
@@ -1119,11 +1166,22 @@
 		}
 		if (error == EMSGSIZE) {
 			/*
-			 * ip_output() will have already fixed the route
-			 * for us.  tcp_mtudisc() will, as its last action,
-			 * initiate retransmission, so it is important to
-			 * not do so here.
+			 * For some reason the interface we used initially
+			 * to send segments changed to another or lowered
+			 * its MTU.
+			 *
+			 * tcp_mtudisc() will find out the new MTU and as
+			 * its last action, initiate retransmission, so it
+			 * is important to not do so here.
+			 *
+			 * If TSO was active we either got an interface
+			 * without TSO capabilits or TSO was turned off.
+			 * Disable it for this connection as too and
+			 * immediatly retry with MSS sized segments generated
+			 * by this function.
 			 */
+			if (tso)
+				tp->t_flags &= ~TF_TSO;
 			tcp_mtudisc(tp->t_inpcb, 0);
 			return 0;
 		}

==== //depot/projects/newisp/netinet/tcp_subr.c#4 (text+ko) ====

@@ -27,7 +27,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)tcp_subr.c	8.2 (Berkeley) 5/24/95
- * $FreeBSD: src/sys/netinet/tcp_subr.c,v 1.260 2006/09/06 21:51:58 andre Exp $
+ * $FreeBSD: src/sys/netinet/tcp_subr.c,v 1.261 2006/09/07 13:06:00 ru Exp $
  */
 
 #include "opt_compat.h"
@@ -1736,7 +1736,7 @@
 {
 	struct tcptw *tw;
 	struct inpcb *inp;
-	int tw_time, acknow;
+	int acknow;
 	struct socket *so;
 
 	INP_INFO_WLOCK_ASSERT(&tcbinfo);	/* tcp_timer_2msl_reset(). */
@@ -1781,7 +1781,6 @@
  * be used for fin-wait-2 state also, then we may need
  * a ts_recent from the last segment.
  */
-	tw_time = 2 * tcp_msl;
 	acknow = tp->t_flags & TF_ACKNOW;
 
 	/*
@@ -1803,7 +1802,7 @@
 		tcp_twrespond(tw, TH_ACK);
 	inp->inp_ppcb = tw;
 	inp->inp_vflag |= INP_TIMEWAIT;
-	tcp_timer_2msl_reset(tw, tw_time, 0);
+	tcp_timer_2msl_reset(tw, 0);
 
 	/*
 	 * If the inpcb owns the sole reference to the socket, then we can

==== //depot/projects/newisp/netinet/tcp_timer.c#3 (text+ko) ====

@@ -27,7 +27,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)tcp_timer.c	8.2 (Berkeley) 5/24/95
- * $FreeBSD: src/sys/netinet/tcp_timer.c,v 1.83 2006/09/06 13:56:35 glebius Exp $
+ * $FreeBSD: src/sys/netinet/tcp_timer.c,v 1.85 2006/09/07 13:06:00 ru Exp $
  */
 
 #include "opt_inet6.h"
@@ -230,46 +230,30 @@
 }
 
 /*
- * The timed wait lists contain references to each of the TCP sessions
- * currently TIME_WAIT state.  The list pointers, including the list pointers
- * in each tcptw structure, are protected using the global tcbinfo lock,
- * which must be held over list iteration and modification.
+ * The timed wait queue contains references to each of the TCP sessions
+ * currently in the TIME_WAIT state.  The queue pointers, including the
+ * queue pointers in each tcptw structure, are protected using the global
+ * tcbinfo lock, which must be held over queue iteration and modification.
  */
-struct twlist {
-	LIST_HEAD(, tcptw)	tw_list;
-	struct tcptw	tw_tail;
-};
-#define TWLIST_NLISTS	2
-static struct twlist twl_2msl[TWLIST_NLISTS];
-static struct twlist *tw_2msl_list[] = { &twl_2msl[0], &twl_2msl[1], NULL };
+static TAILQ_HEAD(, tcptw)	twq_2msl;
 
 void
 tcp_timer_init(void)
 {
-	int i;
-	struct twlist *twl;
 
-	for (i = 0; i < TWLIST_NLISTS; i++) {
-		twl = &twl_2msl[i];
-		LIST_INIT(&twl->tw_list);
-		LIST_INSERT_HEAD(&twl->tw_list, &twl->tw_tail, tw_2msl);
-	}
+	TAILQ_INIT(&twq_2msl);
 }
 
 void
-tcp_timer_2msl_reset(struct tcptw *tw, int timeo, int rearm)
+tcp_timer_2msl_reset(struct tcptw *tw, int rearm)
 {
-	int i;
-	struct tcptw *tw_tail;
 
 	INP_INFO_WLOCK_ASSERT(&tcbinfo);
 	INP_LOCK_ASSERT(tw->tw_inpcb);
 	if (rearm)
-		LIST_REMOVE(tw, tw_2msl);
-	tw->tw_time = timeo + ticks;
-	i = timeo > tcp_msl ? 1 : 0;
-	tw_tail = &twl_2msl[i].tw_tail;
-	LIST_INSERT_BEFORE(tw_tail, tw, tw_2msl);
+		TAILQ_REMOVE(&twq_2msl, tw, tw_2msl);
+	tw->tw_time = ticks + 2 * tcp_msl;
+	TAILQ_INSERT_TAIL(&twq_2msl, tw, tw_2msl);
 }
 
 void
@@ -277,31 +261,23 @@
 {
 
 	INP_INFO_WLOCK_ASSERT(&tcbinfo);
-	LIST_REMOVE(tw, tw_2msl);
+	TAILQ_REMOVE(&twq_2msl, tw, tw_2msl);
 }
 
 struct tcptw *
 tcp_timer_2msl_tw(int reuse)
 {
-	struct tcptw *tw, *tw_tail;
-	struct twlist *twl;
-	int i;
+	struct tcptw *tw;
 
 	INP_INFO_WLOCK_ASSERT(&tcbinfo);
-	for (i = 0; i < TWLIST_NLISTS; i++) {
-		twl = tw_2msl_list[i];
-		tw_tail = &twl->tw_tail;
-
-		for (;;) {
-			tw = LIST_FIRST(&twl->tw_list);
-			if (tw == tw_tail || (!reuse && tw->tw_time > ticks))
-				break;
-			INP_LOCK(tw->tw_inpcb);
-			tcp_twclose(tw, reuse);
-			if (reuse)
-				return (tw);
-		}
-
+	for (;;) {
+		tw = TAILQ_FIRST(&twq_2msl);
+		if (tw == NULL || (!reuse && tw->tw_time > ticks))
+			break;
+		INP_LOCK(tw->tw_inpcb);
+		tcp_twclose(tw, reuse);
+		if (reuse)
+			return (tw);
 	}
 	return (NULL);
 }

==== //depot/projects/newisp/netinet/tcp_timer.h#2 (text+ko) ====

@@ -27,7 +27,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)tcp_timer.h	8.1 (Berkeley) 6/10/93
- * $FreeBSD: src/sys/netinet/tcp_timer.h,v 1.29 2006/08/11 21:15:23 mohans Exp $
+ * $FreeBSD: src/sys/netinet/tcp_timer.h,v 1.30 2006/09/07 13:06:00 ru Exp $
  */
 
 #ifndef _NETINET_TCP_TIMER_H_
@@ -156,7 +156,7 @@
 void	tcp_timer_2msl(void *xtp);
 struct tcptw *
 	tcp_timer_2msl_tw(int _reuse);		/* XXX temporary */
-void	tcp_timer_2msl_reset(struct tcptw *_tw, int _timeo, int rearm);
+void	tcp_timer_2msl_reset(struct tcptw *_tw, int rearm);
 void	tcp_timer_2msl_stop(struct tcptw *_tw);
 void	tcp_timer_keep(void *xtp);
 void	tcp_timer_persist(void *xtp);

==== //depot/projects/newisp/netinet/tcp_var.h#3 (text+ko) ====

@@ -27,7 +27,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)tcp_var.h	8.4 (Berkeley) 5/24/95
- * $FreeBSD: src/sys/netinet/tcp_var.h,v 1.135 2006/09/06 21:51:58 andre Exp $
+ * $FreeBSD: src/sys/netinet/tcp_var.h,v 1.136 2006/09/07 13:06:00 ru Exp $
  */
 
 #ifndef _NETINET_TCP_VAR_H_
@@ -276,7 +276,7 @@
 	u_long		t_recent;
 	u_long		t_starttime;
 	int		tw_time;
-	LIST_ENTRY(tcptw) tw_2msl;
+	TAILQ_ENTRY(tcptw) tw_2msl;
 };
 
 #define	intotcpcb(ip)	((struct tcpcb *)(ip)->inp_ppcb)


More information about the p4-projects mailing list