PERFORCE change 113893 for review
Roman Divacky
rdivacky at FreeBSD.org
Fri Feb 2 09:19:43 UTC 2007
http://perforce.freebsd.org/chv.cgi?CH=113893
Change 113893 by rdivacky at rdivacky_witten on 2007/02/02 09:18:56
IFC
Affected files ...
.. //depot/projects/linuxolator/src/sys/arm/xscale/ixp425/avila_machdep.c#3 integrate
.. //depot/projects/linuxolator/src/sys/compat/linux/linux_emul.c#32 integrate
.. //depot/projects/linuxolator/src/sys/dev/iwi/if_iwi.c#6 integrate
.. //depot/projects/linuxolator/src/sys/kern/sched_4bsd.c#10 integrate
.. //depot/projects/linuxolator/src/sys/kern/subr_witness.c#6 integrate
.. //depot/projects/linuxolator/src/sys/kern/uipc_socket.c#10 integrate
.. //depot/projects/linuxolator/src/sys/net80211/_ieee80211.h#4 integrate
.. //depot/projects/linuxolator/src/sys/netinet/tcp_input.c#7 integrate
.. //depot/projects/linuxolator/src/sys/netinet/tcp_output.c#5 integrate
.. //depot/projects/linuxolator/src/sys/netinet/tcp_syncache.c#5 integrate
.. //depot/projects/linuxolator/src/sys/netinet/tcp_usrreq.c#4 integrate
.. //depot/projects/linuxolator/src/sys/netinet/tcp_var.h#3 integrate
.. //depot/projects/linuxolator/src/sys/sun4v/include/intr_machdep.h#3 integrate
.. //depot/projects/linuxolator/src/sys/sun4v/include/smp.h#4 integrate
.. //depot/projects/linuxolator/src/sys/sun4v/sun4v/intr_machdep.c#4 integrate
.. //depot/projects/linuxolator/src/sys/sun4v/sun4v/mp_machdep.c#5 integrate
.. //depot/projects/linuxolator/src/sys/sun4v/sun4v/tte.c#4 integrate
.. //depot/projects/linuxolator/src/sys/sys/socketvar.h#2 integrate
Differences ...
==== //depot/projects/linuxolator/src/sys/arm/xscale/ixp425/avila_machdep.c#3 (text+ko) ====
@@ -49,7 +49,7 @@
#include "opt_ddb.h"
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/arm/xscale/ixp425/avila_machdep.c,v 1.2 2006/12/06 06:34:54 julian Exp $");
+__FBSDID("$FreeBSD: src/sys/arm/xscale/ixp425/avila_machdep.c,v 1.3 2007/02/02 05:14:21 kevlo Exp $");
#define _ARM32_BUS_DMA_PRIVATE
#include <sys/param.h>
@@ -274,7 +274,7 @@
#ifdef DDB
vm_offset_t zstart = 0, zend = 0;
#endif
- int i = 0;
+ int i;
uint32_t fake_preload[35];
uint32_t memsize;
==== //depot/projects/linuxolator/src/sys/compat/linux/linux_emul.c#32 (text+ko) ====
@@ -27,7 +27,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/compat/linux/linux_emul.c,v 1.14 2007/02/01 13:29:27 kib Exp $");
+__FBSDID("$FreeBSD: src/sys/compat/linux/linux_emul.c,v 1.15 2007/02/02 08:58:16 kib Exp $");
#include "opt_compat.h"
==== //depot/projects/linuxolator/src/sys/dev/iwi/if_iwi.c#6 (text+ko) ====
@@ -27,7 +27,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/dev/iwi/if_iwi.c,v 1.44 2006/12/07 15:24:38 kevlo Exp $");
+__FBSDID("$FreeBSD: src/sys/dev/iwi/if_iwi.c,v 1.45 2007/02/02 05:17:18 kevlo Exp $");
/*-
* Intel(R) PRO/Wireless 2200BG/2225BG/2915ABG driver
@@ -545,9 +545,10 @@
ring->queued = 0;
ring->cur = ring->next = 0;
- error = bus_dma_tag_create(NULL, 4, 0, BUS_SPACE_MAXADDR_32BIT,
- BUS_SPACE_MAXADDR, NULL, NULL, count * IWI_CMD_DESC_SIZE, 1,
- count * IWI_CMD_DESC_SIZE, 0, NULL, NULL, &ring->desc_dmat);
+ error = bus_dma_tag_create(bus_get_dma_tag(sc->sc_dev), 4, 0,
+ BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL,
+ count * IWI_CMD_DESC_SIZE, 1, count * IWI_CMD_DESC_SIZE, 0,
+ NULL, NULL, &ring->desc_dmat);
if (error != 0) {
device_printf(sc->sc_dev, "could not create desc DMA tag\n");
goto fail;
@@ -606,9 +607,10 @@
ring->csr_ridx = csr_ridx;
ring->csr_widx = csr_widx;
- error = bus_dma_tag_create(NULL, 4, 0, BUS_SPACE_MAXADDR_32BIT,
- BUS_SPACE_MAXADDR, NULL, NULL, count * IWI_TX_DESC_SIZE, 1,
- count * IWI_TX_DESC_SIZE, 0, NULL, NULL, &ring->desc_dmat);
+ error = bus_dma_tag_create(bus_get_dma_tag(sc->sc_dev), 4, 0,
+ BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL,
+ count * IWI_TX_DESC_SIZE, 1, count * IWI_TX_DESC_SIZE, 0, NULL,
+ NULL, &ring->desc_dmat);
if (error != 0) {
device_printf(sc->sc_dev, "could not create desc DMA tag\n");
goto fail;
@@ -636,9 +638,9 @@
goto fail;
}
- error = bus_dma_tag_create(NULL, 1, 0, BUS_SPACE_MAXADDR_32BIT,
- BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, IWI_MAX_NSEG,
- MCLBYTES, 0, NULL, NULL, &ring->data_dmat);
+ error = bus_dma_tag_create(bus_get_dma_tag(sc->sc_dev), 1, 0,
+ BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES,
+ IWI_MAX_NSEG, MCLBYTES, 0, NULL, NULL, &ring->data_dmat);
if (error != 0) {
device_printf(sc->sc_dev, "could not create data DMA tag\n");
goto fail;
@@ -744,9 +746,9 @@
goto fail;
}
- error = bus_dma_tag_create(NULL, 1, 0, BUS_SPACE_MAXADDR_32BIT,
- BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1, MCLBYTES, 0, NULL,
- NULL, &ring->data_dmat);
+ error = bus_dma_tag_create(bus_get_dma_tag(sc->sc_dev), 1, 0,
+ BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES,
+ 1, MCLBYTES, 0, NULL, NULL, &ring->data_dmat);
if (error != 0) {
device_printf(sc->sc_dev, "could not create data DMA tag\n");
goto fail;
@@ -3111,9 +3113,10 @@
if (sc->fw_uc.size > sc->fw_dma_size)
sc->fw_dma_size = sc->fw_uc.size;
- if (bus_dma_tag_create(NULL, 4, 0, BUS_SPACE_MAXADDR_32BIT,
- BUS_SPACE_MAXADDR, NULL, NULL, sc->fw_dma_size, 1, sc->fw_dma_size,
- 0, NULL, NULL, &sc->fw_dmat) != 0) {
+ if (bus_dma_tag_create(bus_get_dma_tag(sc->sc_dev), 4, 0,
+ BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL,
+ sc->fw_dma_size, 1, sc->fw_dma_size, 0, NULL, NULL,
+ &sc->fw_dmat) != 0) {
device_printf(sc->sc_dev,
"could not create firmware DMA tag\n");
IWI_LOCK(sc);
==== //depot/projects/linuxolator/src/sys/kern/sched_4bsd.c#10 (text+ko) ====
@@ -33,7 +33,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/kern/sched_4bsd.c,v 1.95 2007/01/23 08:46:50 jeff Exp $");
+__FBSDID("$FreeBSD: src/sys/kern/sched_4bsd.c,v 1.96 2007/02/02 05:14:21 julian Exp $");
#include "opt_hwpmc_hooks.h"
@@ -866,9 +866,12 @@
* or stopped or any thing else similar. We never put the idle
* threads on the run queue, however.
*/
- if (td == PCPU_GET(idlethread))
+ if (td->td_flags & TDF_IDLETD) {
TD_SET_CAN_RUN(td);
- else {
+#ifdef SMP
+ idle_cpus_mask &= ~PCPU_GET(cpumask);
+#endif
+ } else {
if (TD_IS_RUNNING(td)) {
/* Put us back on the run queue. */
sched_add(td, (flags & SW_PREEMPT) ?
@@ -901,13 +904,33 @@
PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT);
#endif
+ /* I feel sleepy */
cpu_switch(td, newtd);
+ /*
+ * Where am I? What year is it?
+ * We are in the same thread that went to sleep above,
+ * but any amount of time may have passed. All out context
+ * will still be available as will local variables.
+ * PCPU values however may have changed as we may have
+ * changed CPU so don't trust cached values of them.
+ * New threads will go to fork_exit() instead of here
+ * so if you change things here you may need to change
+ * things there too.
+ * If the thread above was exiting it will never wake
+ * up again here, so either it has saved everything it
+ * needed to, or the thread_wait() or wait() will
+ * need to reap it.
+ */
#ifdef HWPMC_HOOKS
if (PMC_PROC_IS_USING_PMCS(td->td_proc))
PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_IN);
#endif
}
+#ifdef SMP
+ if (td->td_flags & TDF_IDLETD)
+ idle_cpus_mask |= PCPU_GET(cpumask);
+#endif
sched_lock.mtx_lock = (uintptr_t)td;
td->td_oncpu = PCPU_GET(cpuid);
}
@@ -1326,18 +1349,9 @@
{
struct proc *p;
struct thread *td;
-#ifdef SMP
- cpumask_t mycpu;
-#endif
td = curthread;
p = td->td_proc;
-#ifdef SMP
- mycpu = PCPU_GET(cpumask);
- mtx_lock_spin(&sched_lock);
- idle_cpus_mask |= mycpu;
- mtx_unlock_spin(&sched_lock);
-#endif
for (;;) {
mtx_assert(&Giant, MA_NOTOWNED);
@@ -1345,13 +1359,7 @@
cpu_idle();
mtx_lock_spin(&sched_lock);
-#ifdef SMP
- idle_cpus_mask &= ~mycpu;
-#endif
mi_switch(SW_VOL, NULL);
-#ifdef SMP
- idle_cpus_mask |= mycpu;
-#endif
mtx_unlock_spin(&sched_lock);
}
}
==== //depot/projects/linuxolator/src/sys/kern/subr_witness.c#6 (text+ko) ====
@@ -82,7 +82,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/kern/subr_witness.c,v 1.221 2007/01/16 22:56:28 ssouhlal Exp $");
+__FBSDID("$FreeBSD: src/sys/kern/subr_witness.c,v 1.222 2007/02/02 09:02:18 kib Exp $");
#include "opt_ddb.h"
#include "opt_witness.h"
@@ -370,6 +370,13 @@
{ "cdev", &lock_class_mtx_sleep },
{ NULL, NULL },
/*
+ * kqueue/VFS interaction
+ */
+ { "kqueue", &lock_class_mtx_sleep },
+ { "struct mount mtx", &lock_class_mtx_sleep },
+ { "vnode interlock", &lock_class_mtx_sleep },
+ { NULL, NULL },
+ /*
* spin locks
*/
#ifdef SMP
==== //depot/projects/linuxolator/src/sys/kern/uipc_socket.c#10 (text+ko) ====
@@ -95,7 +95,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/kern/uipc_socket.c,v 1.289 2007/01/22 14:50:28 andre Exp $");
+__FBSDID("$FreeBSD: src/sys/kern/uipc_socket.c,v 1.290 2007/02/01 17:53:40 andre Exp $");
#include "opt_inet.h"
#include "opt_mac.h"
@@ -368,6 +368,10 @@
knlist_init(&so->so_snd.sb_sel.si_note, SOCKBUF_MTX(&so->so_snd),
NULL, NULL, NULL);
so->so_count = 1;
+ /*
+ * Auto-sizing of socket buffers is managed by the protocols and
+ * the appropriate flags must be set in the pru_attach function.
+ */
error = (*prp->pr_usrreqs->pru_attach)(so, proto, td);
if (error) {
KASSERT(so->so_count == 1, ("socreate: so_count %d",
@@ -442,6 +446,8 @@
so->so_snd.sb_lowat = head->so_snd.sb_lowat;
so->so_rcv.sb_timeo = head->so_rcv.sb_timeo;
so->so_snd.sb_timeo = head->so_snd.sb_timeo;
+ so->so_rcv.sb_flags |= head->so_rcv.sb_flags & SB_AUTOSIZE;
+ so->so_snd.sb_flags |= head->so_snd.sb_flags & SB_AUTOSIZE;
so->so_state |= connstatus;
ACCEPT_LOCK();
if (connstatus) {
@@ -2116,6 +2122,8 @@
error = ENOBUFS;
goto bad;
}
+ (sopt->sopt_name == SO_SNDBUF ? &so->so_snd :
+ &so->so_rcv)->sb_flags &= ~SB_AUTOSIZE;
break;
/*
==== //depot/projects/linuxolator/src/sys/net80211/_ieee80211.h#4 (text+ko) ====
@@ -29,7 +29,7 @@
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
- * $FreeBSD: src/sys/net80211/_ieee80211.h,v 1.6 2007/01/15 01:12:28 sam Exp $
+ * $FreeBSD: src/sys/net80211/_ieee80211.h,v 1.7 2007/02/02 02:45:33 sam Exp $
*/
#ifndef _NET80211__IEEE80211_H_
#define _NET80211__IEEE80211_H_
@@ -186,6 +186,8 @@
(((_c)->ic_flags & (IEEE80211_CHAN_QUARTER | IEEE80211_CHAN_HALF)) == 0)
#define IEEE80211_IS_CHAN_GSM(_c) \
(((_c)->ic_flags & IEEE80211_CHAN_GSM) != 0)
+#define IEEE80211_IS_CHAN_PASSIVE(_c) \
+ (((_c)->ic_flags & IEEE80211_CHAN_PASSIVE) != 0)
/* ni_chan encoding for FH phy */
#define IEEE80211_FH_CHANMOD 80
==== //depot/projects/linuxolator/src/sys/netinet/tcp_input.c#7 (text+ko) ====
@@ -27,7 +27,7 @@
* SUCH DAMAGE.
*
* @(#)tcp_input.c 8.12 (Berkeley) 5/24/95
- * $FreeBSD: src/sys/netinet/tcp_input.c,v 1.311 2006/12/12 12:17:56 bz Exp $
+ * $FreeBSD: src/sys/netinet/tcp_input.c,v 1.312 2007/02/01 18:32:13 andre Exp $
*/
#include "opt_ipfw.h" /* for ipfw_fwd */
@@ -161,6 +161,18 @@
&tcp_reass_overflows, 0,
"Global number of TCP Segment Reassembly Queue Overflows");
+int tcp_do_autorcvbuf = 1;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, recvbuf_auto, CTLFLAG_RW,
+ &tcp_do_autorcvbuf, 0, "Enable automatic receive buffer sizing");
+
+int tcp_autorcvbuf_inc = 16*1024;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, recvbuf_inc, CTLFLAG_RW,
+ &tcp_autorcvbuf_inc, 0, "Incrementor step size of automatic receive buffer");
+
+int tcp_autorcvbuf_max = 256*1024;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, recvbuf_max, CTLFLAG_RW,
+ &tcp_autorcvbuf_max, 0, "Max size of automatic receive buffer");
+
struct inpcbhead tcb;
#define tcb6 tcb /* for KAME src sync over BSD*'s */
struct inpcbinfo tcbinfo;
@@ -1295,6 +1307,8 @@
} else if (th->th_ack == tp->snd_una &&
LIST_EMPTY(&tp->t_segq) &&
tlen <= sbspace(&so->so_rcv)) {
+ int newsize = 0; /* automatic sockbuf scaling */
+
KASSERT(headlocked, ("headlocked"));
INP_INFO_WUNLOCK(&tcbinfo);
headlocked = 0;
@@ -1321,18 +1335,78 @@
tcpstat.tcps_rcvpack++;
tcpstat.tcps_rcvbyte += tlen;
ND6_HINT(tp); /* some progress has been done */
- /*
#ifdef TCPDEBUG
if (so->so_options & SO_DEBUG)
tcp_trace(TA_INPUT, ostate, tp,
(void *)tcp_saveipgen, &tcp_savetcp, 0);
#endif
- * Add data to socket buffer.
- */
+ /*
+ * Automatic sizing of receive socket buffer. Often the send
+ * buffer size is not optimally adjusted to the actual network
+ * conditions at hand (delay bandwidth product). Setting the
+ * buffer size too small limits throughput on links with high
+ * bandwidth and high delay (eg. trans-continental/oceanic links).
+ *
+ * On the receive side the socket buffer memory is only rarely
+ * used to any significant extent. This allows us to be much
+ * more aggressive in scaling the receive socket buffer. For
+ * the case that the buffer space is actually used to a large
+ * extent and we run out of kernel memory we can simply drop
+ * the new segments; TCP on the sender will just retransmit it
+ * later. Setting the buffer size too big may only consume too
+ * much kernel memory if the application doesn't read() from
+ * the socket or packet loss or reordering makes use of the
+ * reassembly queue.
+ *
+ * The criteria to step up the receive buffer one notch are:
+ * 1. the number of bytes received during the time it takes
+ * one timestamp to be reflected back to us (the RTT);
+ * 2. received bytes per RTT is within seven eighth of the
+ * current socket buffer size;
+ * 3. receive buffer size has not hit maximal automatic size;
+ *
+ * This algorithm does one step per RTT at most and only if
+ * we receive a bulk stream w/o packet losses or reorderings.
+ * Shrinking the buffer during idle times is not necessary as
+ * it doesn't consume any memory when idle.
+ *
+ * TODO: Only step up if the application is actually serving
+ * the buffer to better manage the socket buffer resources.
+ */
+ if (tcp_do_autorcvbuf &&
+ to.to_tsecr &&
+ (so->so_rcv.sb_flags & SB_AUTOSIZE)) {
+ if (to.to_tsecr > tp->rfbuf_ts &&
+ to.to_tsecr - tp->rfbuf_ts < hz) {
+ if (tp->rfbuf_cnt >
+ (so->so_rcv.sb_hiwat / 8 * 7) &&
+ so->so_rcv.sb_hiwat <
+ tcp_autorcvbuf_max) {
+ newsize =
+ min(so->so_rcv.sb_hiwat +
+ tcp_autorcvbuf_inc,
+ tcp_autorcvbuf_max);
+ }
+ /* Start over with next RTT. */
+ tp->rfbuf_ts = 0;
+ tp->rfbuf_cnt = 0;
+ } else
+ tp->rfbuf_cnt += tlen; /* add up */
+ }
+
+ /* Add data to socket buffer. */
SOCKBUF_LOCK(&so->so_rcv);
if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
m_freem(m);
} else {
+ /*
+ * Set new socket buffer size.
+ * Give up when limit is reached.
+ */
+ if (newsize)
+ if (!sbreserve_locked(&so->so_rcv,
+ newsize, so, curthread))
+ so->so_rcv.sb_flags &= ~SB_AUTOSIZE;
m_adj(m, drop_hdrlen); /* delayed header drop */
sbappendstream_locked(&so->so_rcv, m);
}
@@ -1361,6 +1435,10 @@
tp->rcv_wnd = imax(win, (int)(tp->rcv_adv - tp->rcv_nxt));
}
+ /* Reset receive buffer auto scaling when not in bulk receive mode. */
+ tp->rfbuf_ts = 0;
+ tp->rfbuf_cnt = 0;
+
switch (tp->t_state) {
/*
==== //depot/projects/linuxolator/src/sys/netinet/tcp_output.c#5 (text+ko) ====
@@ -27,7 +27,7 @@
* SUCH DAMAGE.
*
* @(#)tcp_output.c 8.4 (Berkeley) 5/24/95
- * $FreeBSD: src/sys/netinet/tcp_output.c,v 1.121 2006/10/22 11:52:16 rwatson Exp $
+ * $FreeBSD: src/sys/netinet/tcp_output.c,v 1.122 2007/02/01 18:32:13 andre Exp $
*/
#include "opt_inet.h"
@@ -110,6 +110,19 @@
SYSCTL_INT(_net_inet_tcp, OID_AUTO, tso, CTLFLAG_RW,
&tcp_do_tso, 0, "Enable TCP Segmentation Offload");
+int tcp_do_autosndbuf = 1;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, sendbuf_auto, CTLFLAG_RW,
+ &tcp_do_autosndbuf, 0, "Enable automatic send buffer sizing");
+
+int tcp_autosndbuf_inc = 8*1024;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, sendbuf_inc, CTLFLAG_RW,
+ &tcp_autosndbuf_inc, 0, "Incrementor step size of automatic send buffer");
+
+int tcp_autosndbuf_max = 256*1024;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, sendbuf_max, CTLFLAG_RW,
+ &tcp_autosndbuf_max, 0, "Max size of automatic send buffer");
+
+
/*
* Tcp output routine: figure out what should be sent and send it.
*/
@@ -380,11 +393,60 @@
}
}
+ /* len will be >= 0 after this point. */
+ KASSERT(len >= 0, ("%s: len < 0", __func__));
+
+ /*
+ * Automatic sizing of send socket buffer. Often the send buffer
+ * size is not optimally adjusted to the actual network conditions
+ * at hand (delay bandwidth product). Setting the buffer size too
+ * small limits throughput on links with high bandwidth and high
+ * delay (eg. trans-continental/oceanic links). Setting the
+ * buffer size too big consumes too much real kernel memory,
+ * especially with many connections on busy servers.
+ *
+ * The criteria to step up the send buffer one notch are:
+ * 1. receive window of remote host is larger than send buffer
+ * (with a fudge factor of 5/4th);
+ * 2. send buffer is filled to 7/8th with data (so we actually
+ * have data to make use of it);
+ * 3. send buffer fill has not hit maximal automatic size;
+ * 4. our send window (slow start and cogestion controlled) is
+ * larger than sent but unacknowledged data in send buffer.
+ *
+ * The remote host receive window scaling factor may limit the
+ * growing of the send buffer before it reaches its allowed
+ * maximum.
+ *
+ * It scales directly with slow start or congestion window
+ * and does at most one step per received ACK. This fast
+ * scaling has the drawback of growing the send buffer beyond
+ * what is strictly necessary to make full use of a given
+ * delay*bandwith product. However testing has shown this not
+ * to be much of an problem. At worst we are trading wasting
+ * of available bandwith (the non-use of it) for wasting some
+ * socket buffer memory.
+ *
+ * TODO: Shrink send buffer during idle periods together
+ * with congestion window. Requires another timer. Has to
+ * wait for upcoming tcp timer rewrite.
+ */
+ if (tcp_do_autosndbuf && so->so_snd.sb_flags & SB_AUTOSIZE) {
+ if ((tp->snd_wnd / 4 * 5) >= so->so_snd.sb_hiwat &&
+ so->so_snd.sb_cc >= (so->so_snd.sb_hiwat / 8 * 7) &&
+ so->so_snd.sb_cc < tcp_autosndbuf_max &&
+ sendwin >= (so->so_snd.sb_cc - (tp->snd_nxt - tp->snd_una))) {
+ if (!sbreserve_locked(&so->so_snd,
+ min(so->so_snd.sb_hiwat + tcp_autosndbuf_inc,
+ tcp_autosndbuf_max), so, curthread))
+ so->so_snd.sb_flags &= ~SB_AUTOSIZE;
+ }
+ }
+
/*
- * len will be >= 0 after this point. Truncate to the maximum
- * segment length or enable TCP Segmentation Offloading (if supported
- * by hardware) and ensure that FIN is removed if the length no longer
- * contains the last data byte.
+ * Truncate to the maximum segment length or enable TCP Segmentation
+ * Offloading (if supported by hardware) and ensure that FIN is removed
+ * if the length no longer contains the last data byte.
*
* TSO may only be used if we are in a pure bulk sending state. The
* presence of TCP-MD5, SACK retransmits, SACK advertizements and
@@ -606,6 +668,10 @@
optlen += TCPOLEN_TSTAMP_APPA;
}
+ /* Set receive buffer autosizing timestamp. */
+ if (tp->rfbuf_ts == 0 && (so->so_rcv.sb_flags & SB_AUTOSIZE))
+ tp->rfbuf_ts = ticks;
+
#ifdef TCP_SIGNATURE
#ifdef INET6
if (!isipv6)
==== //depot/projects/linuxolator/src/sys/netinet/tcp_syncache.c#5 (text+ko) ====
@@ -29,7 +29,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: src/sys/netinet/tcp_syncache.c,v 1.103 2006/12/13 06:00:56 csjp Exp $
+ * $FreeBSD: src/sys/netinet/tcp_syncache.c,v 1.104 2007/02/01 17:39:18 andre Exp $
*/
#include "opt_inet.h"
@@ -1014,9 +1014,15 @@
if (to->to_flags & TOF_SCALE) {
int wscale = 0;
- /* Compute proper scaling value from buffer space */
+ /*
+ * Compute proper scaling value from buffer space.
+ * Leave enough room for the socket buffer to grow
+ * with auto sizing. This allows us to scale the
+ * receive buffer over a wide range while not losing
+ * any efficiency or fine granularity.
+ */
while (wscale < TCP_MAX_WINSHIFT &&
- (TCP_MAXWIN << wscale) < sb_hiwat)
+ (0x1 << wscale) < tcp_minmss)
wscale++;
sc->sc_requested_r_scale = wscale;
sc->sc_requested_s_scale = to->to_requested_s_scale;
==== //depot/projects/linuxolator/src/sys/netinet/tcp_usrreq.c#4 (text+ko) ====
@@ -29,7 +29,7 @@
* SUCH DAMAGE.
*
* From: @(#)tcp_usrreq.c 8.2 (Berkeley) 1/3/94
- * $FreeBSD: src/sys/netinet/tcp_usrreq.c,v 1.142 2006/11/22 17:16:54 sam Exp $
+ * $FreeBSD: src/sys/netinet/tcp_usrreq.c,v 1.144 2007/02/01 18:32:13 andre Exp $
*/
#include "opt_inet.h"
@@ -1131,9 +1131,14 @@
inp->inp_laddr = laddr;
in_pcbrehash(inp);
- /* Compute window scaling to request. */
+ /*
+ * Compute window scaling to request:
+ * Scale to fit into sweet spot. See tcp_syncache.c.
+ * XXX: This should move to tcp_output().
+ * XXX: This should be based on the actual MSS.
+ */
while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
- (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat)
+ (0x1 << tp->request_r_scale) < tcp_minmss)
tp->request_r_scale++;
soisconnecting(so);
@@ -1441,6 +1446,8 @@
if (error)
return (error);
}
+ so->so_rcv.sb_flags |= SB_AUTOSIZE;
+ so->so_snd.sb_flags |= SB_AUTOSIZE;
INP_INFO_WLOCK(&tcbinfo);
error = in_pcballoc(so, &tcbinfo);
if (error) {
==== //depot/projects/linuxolator/src/sys/netinet/tcp_var.h#3 (text+ko) ====
@@ -27,7 +27,7 @@
* SUCH DAMAGE.
*
* @(#)tcp_var.h 8.4 (Berkeley) 5/24/95
- * $FreeBSD: src/sys/netinet/tcp_var.h,v 1.137 2006/09/13 13:08:27 andre Exp $
+ * $FreeBSD: src/sys/netinet/tcp_var.h,v 1.138 2007/02/01 18:32:13 andre Exp $
*/
#ifndef _NETINET_TCP_VAR_H_
@@ -202,6 +202,8 @@
episode starts at this seq number */
struct sackhint sackhint; /* SACK scoreboard hint */
int t_rttlow; /* smallest observerved RTT */
+ u_int32_t rfbuf_ts; /* recv buffer autoscaling timestamp */
+ int rfbuf_cnt; /* recv buffer autoscaling byte count */
};
#define IN_FASTRECOVERY(tp) (tp->t_flags & TF_FASTRECOVERY)
==== //depot/projects/linuxolator/src/sys/sun4v/include/intr_machdep.h#3 (text+ko) ====
@@ -23,7 +23,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: src/sys/sun4v/include/intr_machdep.h,v 1.2 2007/01/19 11:15:33 marius Exp $
+ * $FreeBSD: src/sys/sun4v/include/intr_machdep.h,v 1.3 2007/02/02 05:00:21 kmacy Exp $
*/
#ifndef _MACHINE_INTR_MACHDEP_H_
@@ -46,6 +46,7 @@
#define PIL_RENDEZVOUS 3 /* smp rendezvous ipi */
#define PIL_AST 4 /* ast ipi */
#define PIL_STOP 5 /* stop cpu ipi */
+#define PIL_PREEMPT 6 /* preempt idle thread cpu ipi */
#define PIL_FAST 13 /* fast interrupts */
#define PIL_TICK 14
==== //depot/projects/linuxolator/src/sys/sun4v/include/smp.h#4 (text+ko) ====
@@ -23,7 +23,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: src/sys/sun4v/include/smp.h,v 1.3 2006/12/25 02:05:52 kmacy Exp $
+ * $FreeBSD: src/sys/sun4v/include/smp.h,v 1.4 2007/02/02 05:00:21 kmacy Exp $
*/
#ifndef _MACHINE_SMP_H_
@@ -44,7 +44,9 @@
#define IPI_AST PIL_AST
#define IPI_RENDEZVOUS PIL_RENDEZVOUS
#define IPI_STOP PIL_STOP
+#define IPI_PREEMPT PIL_PREEMPT
+
#define IPI_RETRIES 5000
struct cpu_start_args {
@@ -79,6 +81,7 @@
void cpu_ipi_ast(struct trapframe *tf);
void cpu_ipi_stop(struct trapframe *tf);
+void cpu_ipi_preempt(struct trapframe *tf);
void ipi_selected(u_int cpus, u_int ipi);
void ipi_all(u_int ipi);
==== //depot/projects/linuxolator/src/sys/sun4v/sun4v/intr_machdep.c#4 (text+ko) ====
@@ -59,7 +59,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/sun4v/sun4v/intr_machdep.c,v 1.3 2006/11/24 05:27:49 kmacy Exp $");
+__FBSDID("$FreeBSD: src/sys/sun4v/sun4v/intr_machdep.c,v 1.4 2007/02/02 05:00:21 kmacy Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -114,7 +114,8 @@
"rndzvs", /* PIL_RENDEZVOUS */
"ast", /* PIL_AST */
"stop", /* PIL_STOP */
- "stray", "stray", "stray", "stray", "stray", "stray", "stray",
+ "preempt", /* PIL_PREEMPT */
+ "stray", "stray", "stray", "stray", "stray", "stray",
"fast", /* PIL_FAST */
"tick", /* PIL_TICK */
};
@@ -266,6 +267,7 @@
intr_handlers[PIL_AST] = cpu_ipi_ast;
intr_handlers[PIL_RENDEZVOUS] = (ih_func_t *)smp_rendezvous_action;
intr_handlers[PIL_STOP]= cpu_ipi_stop;
+ intr_handlers[PIL_PREEMPT]= cpu_ipi_preempt;
#endif
mtx_init(&intr_table_lock, "intr table", NULL, MTX_SPIN);
cpu_intrq_alloc();
==== //depot/projects/linuxolator/src/sys/sun4v/sun4v/mp_machdep.c#5 (text+ko) ====
@@ -55,7 +55,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/sun4v/sun4v/mp_machdep.c,v 1.5 2006/12/17 01:31:56 kmacy Exp $");
+__FBSDID("$FreeBSD: src/sys/sun4v/sun4v/mp_machdep.c,v 1.6 2007/02/02 05:00:21 kmacy Exp $");
#include "opt_trap_trace.h"
@@ -456,6 +456,20 @@
}
void
+cpu_ipi_preempt(struct trapframe *tf)
+{
+ struct thread *running_thread = curthread;
+
+ mtx_lock_spin(&sched_lock);
+ if (running_thread->td_critnest > 1)
+ running_thread->td_owepreempt = 1;
+ else
+ mi_switch(SW_INVOL | SW_PREEMPT, NULL);
+ mtx_unlock_spin(&sched_lock);
+
+}
+
+void
cpu_ipi_selected(int cpu_count, uint16_t *cpulist, u_long d0, u_long d1, u_long d2, uint64_t *ackmask)
{
==== //depot/projects/linuxolator/src/sys/sun4v/sun4v/tte.c#4 (text+ko) ====
@@ -26,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/sun4v/sun4v/tte.c,v 1.3 2006/12/24 08:03:27 kmacy Exp $");
+__FBSDID("$FreeBSD: src/sys/sun4v/sun4v/tte.c,v 1.4 2007/02/02 04:57:11 kmacy Exp $");
#include "opt_ddb.h"
#include "opt_pmap.h"
@@ -74,7 +74,7 @@
PMAP_LOCK(pmap);
otte_data = tte_hash_clear_bits(pmap->pm_hash, pv->pv_va, flags);
if ((matchbits = (otte_data & active_flags)) != 0) {
- if (matchbits == VTD_W)
+ if ((otte_data & (VTD_SW_W|VTD_W)) == (VTD_SW_W|VTD_W))
vm_page_dirty(m);
pmap_invalidate_page(pmap, pv->pv_va, TRUE);
}
==== //depot/projects/linuxolator/src/sys/sys/socketvar.h#2 (text+ko) ====
@@ -27,7 +27,7 @@
* SUCH DAMAGE.
*
* @(#)socketvar.h 8.3 (Berkeley) 2/19/95
- * $FreeBSD: src/sys/sys/socketvar.h,v 1.154 2006/08/01 10:30:26 rwatson Exp $
+ * $FreeBSD: src/sys/sys/socketvar.h,v 1.155 2007/02/01 17:53:41 andre Exp $
*/
#ifndef _SYS_SOCKETVAR_H_
@@ -128,6 +128,7 @@
#define SB_NOINTR 0x40 /* operations not interruptible */
#define SB_AIO 0x80 /* AIO operations queued */
#define SB_KNOTE 0x100 /* kernel note attached */
+#define SB_AUTOSIZE 0x800 /* automatically size socket buffer */
void (*so_upcall)(struct socket *, void *, int);
void *so_upcallarg;
More information about the p4-projects
mailing list