PERFORCE change 105799 for review
Matt Jacob
mjacob at FreeBSD.org
Thu Sep 7 16:55:52 UTC 2006
http://perforce.freebsd.org/chv.cgi?CH=105799
Change 105799 by mjacob at newisp on 2006/09/07 16:55:22
IFC.
Affected files ...
.. //depot/projects/newisp/amd64/amd64/machdep.c#2 integrate
.. //depot/projects/newisp/i386/i386/machdep.c#2 integrate
.. //depot/projects/newisp/netinet/tcp_input.c#3 integrate
.. //depot/projects/newisp/netinet/tcp_output.c#2 integrate
.. //depot/projects/newisp/netinet/tcp_subr.c#4 integrate
.. //depot/projects/newisp/netinet/tcp_timer.c#3 integrate
.. //depot/projects/newisp/netinet/tcp_timer.h#2 integrate
.. //depot/projects/newisp/netinet/tcp_var.h#3 integrate
Differences ...
==== //depot/projects/newisp/amd64/amd64/machdep.c#2 (text+ko) ====
@@ -39,7 +39,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/amd64/amd64/machdep.c,v 1.651 2006/07/27 19:47:22 jhb Exp $");
+__FBSDID("$FreeBSD: src/sys/amd64/amd64/machdep.c,v 1.652 2006/09/07 15:03:02 jhb Exp $");
#include "opt_atalk.h"
#include "opt_atpic.h"
@@ -160,8 +160,10 @@
long Maxmem = 0;
long realmem = 0;
-vm_paddr_t phys_avail[20];
-vm_paddr_t dump_avail[20];
+#define PHYSMAP_SIZE (2 * 30)
+
+vm_paddr_t phys_avail[PHYSMAP_SIZE + 2];
+vm_paddr_t dump_avail[PHYSMAP_SIZE + 2];
/* must be 2 less so 0 0 can signal end of chunks */
#define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(phys_avail[0])) - 2)
@@ -832,8 +834,6 @@
}
#endif
-#define PHYSMAP_SIZE (2 * 20)
-
u_int basemem;
/*
==== //depot/projects/newisp/i386/i386/machdep.c#2 (text+ko) ====
@@ -38,7 +38,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/i386/i386/machdep.c,v 1.633 2006/08/09 23:37:30 imp Exp $");
+__FBSDID("$FreeBSD: src/sys/i386/i386/machdep.c,v 1.634 2006/09/07 15:03:02 jhb Exp $");
#include "opt_apic.h"
#include "opt_atalk.h"
@@ -188,8 +188,10 @@
long Maxmem = 0;
long realmem = 0;
-vm_paddr_t phys_avail[10];
-vm_paddr_t dump_avail[10];
+#define PHYSMAP_SIZE (2 * 16)
+
+vm_paddr_t phys_avail[PHYSMAP_SIZE + 2];
+vm_paddr_t dump_avail[PHYSMAP_SIZE + 2];
/* must be 2 less so 0 0 can signal end of chunks */
#define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(phys_avail[0])) - 2)
@@ -1614,8 +1616,6 @@
ssd->ssd_gran = sd->sd_gran;
}
-#define PHYSMAP_SIZE (2 * 8)
-
/*
* Populate the (physmap) array with base/bound pairs describing the
* available physical memory in the system, then test this memory and
==== //depot/projects/newisp/netinet/tcp_input.c#3 (text+ko) ====
@@ -27,7 +27,7 @@
* SUCH DAMAGE.
*
* @(#)tcp_input.c 8.12 (Berkeley) 5/24/95
- * $FreeBSD: src/sys/netinet/tcp_input.c,v 1.305 2006/09/06 21:51:58 andre Exp $
+ * $FreeBSD: src/sys/netinet/tcp_input.c,v 1.306 2006/09/07 13:06:00 ru Exp $
*/
#include "opt_ipfw.h" /* for ipfw_fwd */
@@ -3187,7 +3187,7 @@
const int isipv6 = 0;
#endif
- /* tcbinfo lock required for tcp_twclose(), tcp_2msl_reset. */
+ /* tcbinfo lock required for tcp_twclose(), tcp_timer_2msl_reset(). */
INP_INFO_WLOCK_ASSERT(&tcbinfo);
INP_LOCK_ASSERT(inp);
@@ -3256,7 +3256,7 @@
if (thflags & TH_FIN) {
seq = th->th_seq + tlen + (thflags & TH_SYN ? 1 : 0);
if (seq + 1 == tw->rcv_nxt)
- tcp_timer_2msl_reset(tw, 2 * tcp_msl, 1);
+ tcp_timer_2msl_reset(tw, 1);
}
/*
==== //depot/projects/newisp/netinet/tcp_output.c#2 (text+ko) ====
@@ -27,7 +27,7 @@
* SUCH DAMAGE.
*
* @(#)tcp_output.c 8.4 (Berkeley) 5/24/95
- * $FreeBSD: src/sys/netinet/tcp_output.c,v 1.115 2006/02/23 21:14:34 qingli Exp $
+ * $FreeBSD: src/sys/netinet/tcp_output.c,v 1.116 2006/09/07 12:53:01 andre Exp $
*/
#include "opt_inet.h"
@@ -105,6 +105,10 @@
SYSCTL_INT(_net_inet_tcp, OID_AUTO, newreno, CTLFLAG_RW, &tcp_do_newreno,
0, "Enable NewReno Algorithms");
+int tcp_do_tso = 1;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, tso, CTLFLAG_RW,
+ &tcp_do_tso, 0, "Enable TCP Segmentation Offload");
+
/*
* Tcp output routine: figure out what should be sent and send it.
*/
@@ -127,6 +131,7 @@
int i, sack_rxmit;
int sack_bytes_rxmt;
struct sackhole *p;
+ int tso = 0;
#if 0
int maxburst = TCP_MAXBURST;
#endif
@@ -376,12 +381,34 @@
/*
* len will be >= 0 after this point. Truncate to the maximum
- * segment length and ensure that FIN is removed if the length
- * no longer contains the last data byte.
+ * segment length or enable TCP Segmentation Offloading (if supported
+ * by hardware) and ensure that FIN is removed if the length no longer
+ * contains the last data byte.
+ *
+ * TSO may only be used if we are in a pure bulk sending state. The
+ * presence of TCP-MD5, SACK retransmits, SACK advertizements and
+ * IP options prevent using TSO. With TSO the TCP header is the same
+ * (except for the sequence number) for all generated packets. This
+ * makes it impossible to transmit any options which vary per generated
+ * segment or packet.
+ *
+ * The length of TSO bursts is limited to TCP_MAXWIN. That limit and
+ * removal of FIN (if not already catched here) are handled later after
+ * the exact length of the TCP options are known.
*/
if (len > tp->t_maxseg) {
- len = tp->t_maxseg;
- sendalot = 1;
+ if ((tp->t_flags & TF_TSO) && tcp_do_tso &&
+ ((tp->t_flags & TF_SIGNATURE) == 0) &&
+ tp->rcv_numsacks == 0 && sack_rxmit == 0 &&
+ tp->t_inpcb->inp_options == NULL &&
+ tp->t_inpcb->in6p_options == NULL &&
+ tp->t_inpcb->inp_sp == NULL) {
+ tso = 1;
+ } else {
+ len = tp->t_maxseg;
+ sendalot = 1;
+ tso = 0;
+ }
}
if (sack_rxmit) {
if (SEQ_LT(p->rxmit + len, tp->snd_una + so->so_snd.sb_cc))
@@ -397,7 +424,7 @@
* Sender silly window avoidance. We transmit under the following
* conditions when len is non-zero:
*
- * - We have a full segment
+ * - We have a full segment (or more with TSO)
* - This is the last buffer in a write()/send() and we are
* either idle or running NODELAY
* - we've timed out (e.g. persist timer)
@@ -406,7 +433,7 @@
* - we need to retransmit
*/
if (len) {
- if (len == tp->t_maxseg)
+ if (len >= tp->t_maxseg)
goto send;
/*
* NOTE! on localhost connections an 'ack' from the remote
@@ -702,14 +729,24 @@
* bump the packet length beyond the t_maxopd length.
* Clear the FIN bit because we cut off the tail of
* the segment.
+ *
+ * When doing TSO limit a burst to TCP_MAXWIN and set the
+ * flag to continue sending and prevent the last segment
+ * from being fractional thus making them all equal sized.
*/
if (len + optlen + ipoptlen > tp->t_maxopd) {
- /*
- * If there is still more to send, don't close the connection.
- */
flags &= ~TH_FIN;
- len = tp->t_maxopd - optlen - ipoptlen;
- sendalot = 1;
+ if (tso) {
+ if (len > TCP_MAXWIN) {
+ len = TCP_MAXWIN - TCP_MAXWIN %
+ (tp->t_maxopd - optlen);
+ sendalot = 1;
+ } else if (tp->t_flags & TF_NEEDFIN)
+ sendalot = 1;
+ } else {
+ len = tp->t_maxopd - optlen - ipoptlen;
+ sendalot = 1;
+ }
}
/*#ifdef DIAGNOSTIC*/
@@ -947,6 +984,16 @@
}
/*
+ * Enable TSO and specify the size of the segments.
+ * The TCP pseudo header checksum is always provided.
+ * XXX: Fixme: This is currently not the case for IPv6.
+ */
+ if (tso) {
+ m->m_pkthdr.csum_flags = CSUM_TSO;
+ m->m_pkthdr.tso_segsz = tp->t_maxopd - optlen;
+ }
+
+ /*
* In transmit state, time the transmission and arrange for
* the retransmit. In persist state, just set snd_max.
*/
@@ -1119,11 +1166,22 @@
}
if (error == EMSGSIZE) {
/*
- * ip_output() will have already fixed the route
- * for us. tcp_mtudisc() will, as its last action,
- * initiate retransmission, so it is important to
- * not do so here.
+ * For some reason the interface we used initially
+ * to send segments changed to another or lowered
+ * its MTU.
+ *
+ * tcp_mtudisc() will find out the new MTU and as
+ * its last action, initiate retransmission, so it
+ * is important to not do so here.
+ *
+ * If TSO was active we either got an interface
+ * without TSO capabilits or TSO was turned off.
+ * Disable it for this connection as too and
+ * immediatly retry with MSS sized segments generated
+ * by this function.
*/
+ if (tso)
+ tp->t_flags &= ~TF_TSO;
tcp_mtudisc(tp->t_inpcb, 0);
return 0;
}
==== //depot/projects/newisp/netinet/tcp_subr.c#4 (text+ko) ====
@@ -27,7 +27,7 @@
* SUCH DAMAGE.
*
* @(#)tcp_subr.c 8.2 (Berkeley) 5/24/95
- * $FreeBSD: src/sys/netinet/tcp_subr.c,v 1.260 2006/09/06 21:51:58 andre Exp $
+ * $FreeBSD: src/sys/netinet/tcp_subr.c,v 1.261 2006/09/07 13:06:00 ru Exp $
*/
#include "opt_compat.h"
@@ -1736,7 +1736,7 @@
{
struct tcptw *tw;
struct inpcb *inp;
- int tw_time, acknow;
+ int acknow;
struct socket *so;
INP_INFO_WLOCK_ASSERT(&tcbinfo); /* tcp_timer_2msl_reset(). */
@@ -1781,7 +1781,6 @@
* be used for fin-wait-2 state also, then we may need
* a ts_recent from the last segment.
*/
- tw_time = 2 * tcp_msl;
acknow = tp->t_flags & TF_ACKNOW;
/*
@@ -1803,7 +1802,7 @@
tcp_twrespond(tw, TH_ACK);
inp->inp_ppcb = tw;
inp->inp_vflag |= INP_TIMEWAIT;
- tcp_timer_2msl_reset(tw, tw_time, 0);
+ tcp_timer_2msl_reset(tw, 0);
/*
* If the inpcb owns the sole reference to the socket, then we can
==== //depot/projects/newisp/netinet/tcp_timer.c#3 (text+ko) ====
@@ -27,7 +27,7 @@
* SUCH DAMAGE.
*
* @(#)tcp_timer.c 8.2 (Berkeley) 5/24/95
- * $FreeBSD: src/sys/netinet/tcp_timer.c,v 1.83 2006/09/06 13:56:35 glebius Exp $
+ * $FreeBSD: src/sys/netinet/tcp_timer.c,v 1.85 2006/09/07 13:06:00 ru Exp $
*/
#include "opt_inet6.h"
@@ -230,46 +230,30 @@
}
/*
- * The timed wait lists contain references to each of the TCP sessions
- * currently TIME_WAIT state. The list pointers, including the list pointers
- * in each tcptw structure, are protected using the global tcbinfo lock,
- * which must be held over list iteration and modification.
+ * The timed wait queue contains references to each of the TCP sessions
+ * currently in the TIME_WAIT state. The queue pointers, including the
+ * queue pointers in each tcptw structure, are protected using the global
+ * tcbinfo lock, which must be held over queue iteration and modification.
*/
-struct twlist {
- LIST_HEAD(, tcptw) tw_list;
- struct tcptw tw_tail;
-};
-#define TWLIST_NLISTS 2
-static struct twlist twl_2msl[TWLIST_NLISTS];
-static struct twlist *tw_2msl_list[] = { &twl_2msl[0], &twl_2msl[1], NULL };
+static TAILQ_HEAD(, tcptw) twq_2msl;
void
tcp_timer_init(void)
{
- int i;
- struct twlist *twl;
- for (i = 0; i < TWLIST_NLISTS; i++) {
- twl = &twl_2msl[i];
- LIST_INIT(&twl->tw_list);
- LIST_INSERT_HEAD(&twl->tw_list, &twl->tw_tail, tw_2msl);
- }
+ TAILQ_INIT(&twq_2msl);
}
void
-tcp_timer_2msl_reset(struct tcptw *tw, int timeo, int rearm)
+tcp_timer_2msl_reset(struct tcptw *tw, int rearm)
{
- int i;
- struct tcptw *tw_tail;
INP_INFO_WLOCK_ASSERT(&tcbinfo);
INP_LOCK_ASSERT(tw->tw_inpcb);
if (rearm)
- LIST_REMOVE(tw, tw_2msl);
- tw->tw_time = timeo + ticks;
- i = timeo > tcp_msl ? 1 : 0;
- tw_tail = &twl_2msl[i].tw_tail;
- LIST_INSERT_BEFORE(tw_tail, tw, tw_2msl);
+ TAILQ_REMOVE(&twq_2msl, tw, tw_2msl);
+ tw->tw_time = ticks + 2 * tcp_msl;
+ TAILQ_INSERT_TAIL(&twq_2msl, tw, tw_2msl);
}
void
@@ -277,31 +261,23 @@
{
INP_INFO_WLOCK_ASSERT(&tcbinfo);
- LIST_REMOVE(tw, tw_2msl);
+ TAILQ_REMOVE(&twq_2msl, tw, tw_2msl);
}
struct tcptw *
tcp_timer_2msl_tw(int reuse)
{
- struct tcptw *tw, *tw_tail;
- struct twlist *twl;
- int i;
+ struct tcptw *tw;
INP_INFO_WLOCK_ASSERT(&tcbinfo);
- for (i = 0; i < TWLIST_NLISTS; i++) {
- twl = tw_2msl_list[i];
- tw_tail = &twl->tw_tail;
-
- for (;;) {
- tw = LIST_FIRST(&twl->tw_list);
- if (tw == tw_tail || (!reuse && tw->tw_time > ticks))
- break;
- INP_LOCK(tw->tw_inpcb);
- tcp_twclose(tw, reuse);
- if (reuse)
- return (tw);
- }
-
+ for (;;) {
+ tw = TAILQ_FIRST(&twq_2msl);
+ if (tw == NULL || (!reuse && tw->tw_time > ticks))
+ break;
+ INP_LOCK(tw->tw_inpcb);
+ tcp_twclose(tw, reuse);
+ if (reuse)
+ return (tw);
}
return (NULL);
}
==== //depot/projects/newisp/netinet/tcp_timer.h#2 (text+ko) ====
@@ -27,7 +27,7 @@
* SUCH DAMAGE.
*
* @(#)tcp_timer.h 8.1 (Berkeley) 6/10/93
- * $FreeBSD: src/sys/netinet/tcp_timer.h,v 1.29 2006/08/11 21:15:23 mohans Exp $
+ * $FreeBSD: src/sys/netinet/tcp_timer.h,v 1.30 2006/09/07 13:06:00 ru Exp $
*/
#ifndef _NETINET_TCP_TIMER_H_
@@ -156,7 +156,7 @@
void tcp_timer_2msl(void *xtp);
struct tcptw *
tcp_timer_2msl_tw(int _reuse); /* XXX temporary */
-void tcp_timer_2msl_reset(struct tcptw *_tw, int _timeo, int rearm);
+void tcp_timer_2msl_reset(struct tcptw *_tw, int rearm);
void tcp_timer_2msl_stop(struct tcptw *_tw);
void tcp_timer_keep(void *xtp);
void tcp_timer_persist(void *xtp);
==== //depot/projects/newisp/netinet/tcp_var.h#3 (text+ko) ====
@@ -27,7 +27,7 @@
* SUCH DAMAGE.
*
* @(#)tcp_var.h 8.4 (Berkeley) 5/24/95
- * $FreeBSD: src/sys/netinet/tcp_var.h,v 1.135 2006/09/06 21:51:58 andre Exp $
+ * $FreeBSD: src/sys/netinet/tcp_var.h,v 1.136 2006/09/07 13:06:00 ru Exp $
*/
#ifndef _NETINET_TCP_VAR_H_
@@ -276,7 +276,7 @@
u_long t_recent;
u_long t_starttime;
int tw_time;
- LIST_ENTRY(tcptw) tw_2msl;
+ TAILQ_ENTRY(tcptw) tw_2msl;
};
#define intotcpcb(ip) ((struct tcpcb *)(ip)->inp_ppcb)
More information about the p4-projects
mailing list