PERFORCE change 109067 for review
Paolo Pisati
piso at FreeBSD.org
Thu Nov 2 19:38:20 UTC 2006
http://perforce.freebsd.org/chv.cgi?CH=109067
Change 109067 by piso at piso_newluxor on 2006/11/02 19:37:58
IFC at 109062
Affected files ...
.. //depot/projects/soc2005/libalias/sbin/ipfw/ipfw2.c#11 edit
.. //depot/projects/soc2005/libalias/sys/boot/common/load_elf.c#3 integrate
.. //depot/projects/soc2005/libalias/sys/boot/i386/boot2/boot2.c#3 integrate
.. //depot/projects/soc2005/libalias/sys/boot/i386/libi386/elf32_freebsd.c#3 integrate
.. //depot/projects/soc2005/libalias/sys/boot/pc98/boot2/boot.c#3 integrate
.. //depot/projects/soc2005/libalias/sys/ddb/db_command.c#4 integrate
.. //depot/projects/soc2005/libalias/sys/geom/eli/g_eli.c#6 integrate
.. //depot/projects/soc2005/libalias/sys/geom/journal/g_journal.c#3 integrate
.. //depot/projects/soc2005/libalias/sys/kern/uipc_mbuf.c#4 integrate
.. //depot/projects/soc2005/libalias/sys/kern/uipc_socket.c#8 integrate
.. //depot/projects/soc2005/libalias/sys/kern/uipc_syscalls.c#6 integrate
.. //depot/projects/soc2005/libalias/sys/net/bridgestp.c#7 integrate
.. //depot/projects/soc2005/libalias/sys/net/if_tap.c#4 integrate
.. //depot/projects/soc2005/libalias/sys/net/if_tun.c#5 integrate
.. //depot/projects/soc2005/libalias/sys/net/ppp_tty.c#2 integrate
.. //depot/projects/soc2005/libalias/sys/netgraph/ng_device.c#2 integrate
.. //depot/projects/soc2005/libalias/sys/sys/libkern.h#4 integrate
.. //depot/projects/soc2005/libalias/sys/sys/mbuf.h#6 integrate
.. //depot/projects/soc2005/libalias/sys/sys/socket.h#3 integrate
Differences ...
==== //depot/projects/soc2005/libalias/sbin/ipfw/ipfw2.c#11 (text+ko) ====
@@ -526,8 +526,9 @@
if (optname == IP_FW_GET || optname == IP_DUMMYNET_GET ||
optname == IP_FW_ADD || optname == IP_FW_TABLE_LIST ||
- optname == IP_FW_TABLE_GETSIZE || optname == IP_FW_NAT_GET_CONFIG ||
- optname == IP_FW_NAT_GET_LOG)
+ optname == IP_FW_TABLE_GETSIZE ||
+ optname == IP_FW_NAT_GET_CONFIG ||
+ optname == IP_FW_NAT_GET_LOG)
i = getsockopt(s, IPPROTO_IP, optname, optval,
(socklen_t *)optlen);
else
==== //depot/projects/soc2005/libalias/sys/boot/common/load_elf.c#3 (text+ko) ====
@@ -26,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/boot/common/load_elf.c,v 1.35 2006/10/29 14:50:57 ru Exp $");
+__FBSDID("$FreeBSD: src/sys/boot/common/load_elf.c,v 1.36 2006/11/02 17:28:37 ru Exp $");
#include <sys/param.h>
#include <sys/exec.h>
@@ -263,7 +263,7 @@
#if __ELF_WORD_SIZE == 64
off = - (off & 0xffffffffff000000ull);/* x86_64 relocates after locore */
#else
- off = - (off & 0xc0000000u); /* i386 relocates after locore */
+ off = - (off & 0xff000000u); /* i386 relocates after locore */
#endif
#else
off = 0; /* other archs use direct mapped kernels */
==== //depot/projects/soc2005/libalias/sys/boot/i386/boot2/boot2.c#3 (text+ko) ====
@@ -14,7 +14,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/boot/i386/boot2/boot2.c,v 1.82 2006/10/29 14:50:57 ru Exp $");
+__FBSDID("$FreeBSD: src/sys/boot/i386/boot2/boot2.c,v 1.83 2006/11/02 17:28:38 ru Exp $");
#include <sys/param.h>
#include <sys/disklabel.h>
@@ -334,7 +334,7 @@
return;
}
if (fmt == 0) {
- addr = hdr.ex.a_entry & 0x3fffffff;
+ addr = hdr.ex.a_entry & 0xffffff;
p = PTOV(addr);
fs_off = PAGE_SIZE;
if (xfsread(ino, p, hdr.ex.a_text))
@@ -368,7 +368,7 @@
j++;
}
for (i = 0; i < 2; i++) {
- p = PTOV(ep[i].p_paddr & 0x3fffffff);
+ p = PTOV(ep[i].p_paddr & 0xffffff);
fs_off = ep[i].p_offset;
if (xfsread(ino, p, ep[i].p_filesz))
return;
@@ -389,7 +389,7 @@
p += es[i].sh_size;
}
}
- addr = hdr.eh.e_entry & 0x3fffffff;
+ addr = hdr.eh.e_entry & 0xffffff;
}
bootinfo.bi_esymtab = VTOP(p);
bootinfo.bi_kernelname = VTOP(kname);
==== //depot/projects/soc2005/libalias/sys/boot/i386/libi386/elf32_freebsd.c#3 (text+ko) ====
@@ -25,7 +25,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/boot/i386/libi386/elf32_freebsd.c,v 1.16 2006/10/29 14:50:58 ru Exp $");
+__FBSDID("$FreeBSD: src/sys/boot/i386/libi386/elf32_freebsd.c,v 1.17 2006/11/02 17:28:38 ru Exp $");
#include <sys/param.h>
#include <sys/exec.h>
@@ -65,7 +65,7 @@
err = bi_load32(fp->f_args, &boothowto, &bootdev, &bootinfop, &modulep, &kernend);
if (err != 0)
return(err);
- entry = ehdr->e_entry & 0x3fffffff;
+ entry = ehdr->e_entry & 0xffffff;
#ifdef DEBUG
printf("Start @ 0x%lx ...\n", entry);
==== //depot/projects/soc2005/libalias/sys/boot/pc98/boot2/boot.c#3 (text+ko) ====
@@ -49,7 +49,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/boot/pc98/boot2/boot.c,v 1.15 2006/10/29 14:50:58 ru Exp $");
+__FBSDID("$FreeBSD: src/sys/boot/pc98/boot2/boot.c,v 1.16 2006/11/02 17:28:38 ru Exp $");
#include "boot.h"
#include <a.out.h>
@@ -199,9 +199,9 @@
/*
* We assume that the entry address is the same as the lowest text
* address and that the kernel startup code handles relocation by
- * this address rounded down to a multiple of 1G.
+ * this address rounded down to a multiple of 16M.
*/
- startaddr = head.a_entry & 0x3FFFFFFF;
+ startaddr = head.a_entry & 0x00FFFFFF;
addr = startaddr;
printf("Booting %d:%s(%d,%c)%s @ 0x%x\n"
, dosdev & 0x0f
==== //depot/projects/soc2005/libalias/sys/ddb/db_command.c#4 (text+ko) ====
@@ -32,7 +32,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/ddb/db_command.c,v 1.71 2006/10/10 07:26:53 bde Exp $");
+__FBSDID("$FreeBSD: src/sys/ddb/db_command.c,v 1.72 2006/11/02 11:47:38 kib Exp $");
#include <sys/param.h>
#include <sys/linker_set.h>
@@ -690,14 +690,22 @@
{
struct proc *p;
struct thread *td;
+ jmp_buf jb;
+ void *prev_jb;
LIST_FOREACH(p, &allproc, p_list) {
- FOREACH_THREAD_IN_PROC(p, td) {
- db_printf("\nTracing command %s pid %d tid %ld td %p\n",
- p->p_comm, p->p_pid, (long)td->td_tid, td);
- db_trace_thread(td, -1);
- if (db_pager_quit)
- return;
+ prev_jb = kdb_jmpbuf(jb);
+ if (setjmp(jb) == 0) {
+ FOREACH_THREAD_IN_PROC(p, td) {
+ db_printf("\nTracing command %s pid %d tid %ld td %p\n",
+ p->p_comm, p->p_pid, (long)td->td_tid, td);
+ db_trace_thread(td, -1);
+ if (db_pager_quit) {
+ kdb_jmpbuf(prev_jb);
+ return;
+ }
+ }
}
+ kdb_jmpbuf(prev_jb);
}
}
==== //depot/projects/soc2005/libalias/sys/geom/eli/g_eli.c#6 (text+ko) ====
@@ -25,7 +25,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/geom/eli/g_eli.c,v 1.32 2006/11/01 16:05:06 pjd Exp $");
+__FBSDID("$FreeBSD: src/sys/geom/eli/g_eli.c,v 1.33 2006/11/02 09:01:34 pjd Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -642,7 +642,7 @@
for (i = 0; i < threads; i++) {
if (g_eli_cpu_is_disabled(i)) {
G_ELI_DEBUG(1, "%s: CPU %u disabled, skipping.",
- bpp->name, threads);
+ bpp->name, i);
continue;
}
wr = malloc(sizeof(*wr), M_ELI, M_WAITOK | M_ZERO);
==== //depot/projects/soc2005/libalias/sys/geom/journal/g_journal.c#3 (text+ko) ====
@@ -25,7 +25,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/geom/journal/g_journal.c,v 1.6 2006/11/02 00:37:39 pjd Exp $");
+__FBSDID("$FreeBSD: src/sys/geom/journal/g_journal.c,v 1.8 2006/11/02 16:24:18 pjd Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -600,11 +600,9 @@
&error);
g_topology_lock();
g_access(cp, -1, 0, 0);
- if (error != 0) {
+ if (buf == NULL) {
GJ_DEBUG(1, "Cannot read metadata from %s (error=%d).",
cp->provider->name, error);
- if (buf != NULL)
- g_free(buf);
return (error);
}
@@ -1622,7 +1620,7 @@
}
if (bp != NULL) {
if (bp->bio_data == NULL) {
- nbp = g_clone_bio(pbp);
+ nbp = g_duplicate_bio(pbp);
nbp->bio_cflags = GJ_BIO_READ;
nbp->bio_data =
pbp->bio_data + cstart - pbp->bio_offset;
@@ -1646,7 +1644,7 @@
* Its time for asking data provider.
*/
GJ_DEBUG(3, "READ(data): (%jd, %jd)", ostart, oend);
- nbp = g_clone_bio(pbp);
+ nbp = g_duplicate_bio(pbp);
nbp->bio_cflags = GJ_BIO_READ;
nbp->bio_data = pbp->bio_data + ostart - pbp->bio_offset;
nbp->bio_offset = ostart;
@@ -2239,6 +2237,8 @@
struct g_consumer *cp;
int error;
+ sc = NULL; /* gcc */
+
g_topology_assert();
/*
* There are two possibilities:
==== //depot/projects/soc2005/libalias/sys/kern/uipc_mbuf.c#4 (text+ko) ====
@@ -30,7 +30,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/kern/uipc_mbuf.c,v 1.168 2006/10/22 11:52:13 rwatson Exp $");
+__FBSDID("$FreeBSD: src/sys/kern/uipc_mbuf.c,v 1.169 2006/11/02 17:37:21 andre Exp $");
#include "opt_mac.h"
#include "opt_param.h"
@@ -94,61 +94,61 @@
* chain.
*/
struct mbuf *
-m_getm(struct mbuf *m, int len, int how, short type)
+m_getm2(struct mbuf *m, int len, int how, short type, int flags)
{
- struct mbuf *mb, *top, *cur, *mtail;
- int num, rem;
- int i;
+ struct mbuf *mb, *nm = NULL, *mtail = NULL;
+
+ KASSERT(len >= 0, ("%s: len is < 0", __func__));
+
+ /* Validate flags. */
+ flags &= (M_PKTHDR | M_EOR);
- KASSERT(len >= 0, ("m_getm(): len is < 0"));
+ /* Packet header mbuf must be first in chain. */
+ if ((flags & M_PKTHDR) && m != NULL)
+ flags &= ~M_PKTHDR;
- /* If m != NULL, we will append to the end of that chain. */
- if (m != NULL)
- for (mtail = m; mtail->m_next != NULL; mtail = mtail->m_next);
- else
- mtail = NULL;
+ /* Loop and append maximum sized mbufs to the chain tail. */
+ while (len > 0) {
+ if (len > MCLBYTES)
+ mb = m_getjcl(how, type, (flags & M_PKTHDR),
+ MJUMPAGESIZE);
+ else if (len >= MINCLSIZE)
+ mb = m_getcl(how, type, (flags & M_PKTHDR));
+ else if (flags & M_PKTHDR)
+ mb = m_gethdr(how, type);
+ else
+ mb = m_get(how, type);
- /*
- * Calculate how many mbufs+clusters ("packets") we need and how much
- * leftover there is after that and allocate the first mbuf+cluster
- * if required.
- */
- num = len / MCLBYTES;
- rem = len % MCLBYTES;
- top = cur = NULL;
- if (num > 0) {
- if ((top = cur = m_getcl(how, type, 0)) == NULL)
- goto failed;
- top->m_len = 0;
- }
- num--;
+ /* Fail the whole operation if one mbuf can't be allocated. */
+ if (mb == NULL) {
+ if (nm != NULL)
+ m_freem(nm);
+ return (NULL);
+ }
- for (i = 0; i < num; i++) {
- mb = m_getcl(how, type, 0);
- if (mb == NULL)
- goto failed;
- mb->m_len = 0;
- cur = (cur->m_next = mb);
- }
- if (rem > 0) {
- mb = (rem >= MINCLSIZE) ?
- m_getcl(how, type, 0) : m_get(how, type);
- if (mb == NULL)
- goto failed;
- mb->m_len = 0;
- if (cur == NULL)
- top = mb;
+ /* Book keeping. */
+ len -= (mb->m_flags & M_EXT) ? mb->m_ext.ext_size :
+ ((mb->m_flags & M_PKTHDR) ? MHLEN : MLEN);
+ if (mtail != NULL)
+ mtail->m_next = mb;
else
- cur->m_next = mb;
+ nm = mb;
+ mtail = mb;
+ flags &= ~M_PKTHDR; /* Only valid on the first mbuf. */
}
+ if (flags & M_EOR)
+ mtail->m_flags |= M_EOR; /* Only valid on the last mbuf. */
+
+ /* If mbuf was supplied, append new chain to the end of it. */
+ if (m != NULL) {
+ for (mtail = m; mtail->m_next != NULL; mtail = mtail->m_next)
+ ;
+ mtail->m_next = nm;
+ mtail->m_flags &= ~M_EOR;
+ } else
+ m = nm;
- if (mtail != NULL)
- mtail->m_next = top;
- return top;
-failed:
- if (top != NULL)
- m_freem(top);
- return NULL;
+ return (m);
}
/*
@@ -1610,55 +1610,58 @@
#endif
+/*
+ * Copy the contents of uio into a properly sized mbuf chain.
+ */
struct mbuf *
-m_uiotombuf(struct uio *uio, int how, int len, int align)
+m_uiotombuf(struct uio *uio, int how, int len, int align, int flags)
{
- struct mbuf *m_new = NULL, *m_final = NULL;
- int progress = 0, error = 0, length, total;
+ struct mbuf *m, *mb;
+ int error, length, total;
+ int progress = 0;
+ /*
+ * len can be zero or an arbitrary large value bound by
+ * the total data supplied by the uio.
+ */
if (len > 0)
total = min(uio->uio_resid, len);
else
total = uio->uio_resid;
+
+ /*
+ * The smallest unit returned by m_getm2() is a single mbuf
+ * with pkthdr. We can't align past it. Align align itself.
+ */
+ if (align)
+ align &= ~(sizeof(long) - 1);
if (align >= MHLEN)
- goto nospace;
- if (total + align > MHLEN)
- m_final = m_getcl(how, MT_DATA, M_PKTHDR);
- else
- m_final = m_gethdr(how, MT_DATA);
- if (m_final == NULL)
- goto nospace;
- m_final->m_data += align;
- m_new = m_final;
- while (progress < total) {
- length = total - progress;
- if (length > MCLBYTES)
- length = MCLBYTES;
- if (m_new == NULL) {
- if (length > MLEN)
- m_new = m_getcl(how, MT_DATA, 0);
- else
- m_new = m_get(how, MT_DATA);
- if (m_new == NULL)
- goto nospace;
+ return (NULL);
+
+ /* Give us all or nothing. */
+ m = m_getm2(NULL, total + align, how, MT_DATA, flags);
+ if (m == NULL)
+ return (NULL);
+ m->m_data += align;
+
+ /* Fill all mbufs with uio data and update header information. */
+ for (mb = m; mb != NULL; mb = mb->m_next) {
+ length = min(M_TRAILINGSPACE(mb), total - progress);
+
+ error = uiomove(mtod(mb, void *), length, uio);
+ if (error) {
+ m_freem(m);
+ return (NULL);
}
- error = uiomove(mtod(m_new, void *), length, uio);
- if (error)
- goto nospace;
+
+ mb->m_len = length;
progress += length;
- m_new->m_len = length;
- if (m_new != m_final)
- m_cat(m_final, m_new);
- m_new = NULL;
+ if (flags & M_PKTHDR)
+ m->m_pkthdr.len += length;
}
- m_fixhdr(m_final);
- return (m_final);
-nospace:
- if (m_new)
- m_free(m_new);
- if (m_final)
- m_freem(m_final);
- return (NULL);
+ KASSERT(progress == total, ("%s: progress != total", __func__));
+
+ return (m);
}
/*
==== //depot/projects/soc2005/libalias/sys/kern/uipc_socket.c#8 (text+ko) ====
@@ -94,7 +94,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/kern/uipc_socket.c,v 1.284 2006/10/22 11:52:14 rwatson Exp $");
+__FBSDID("$FreeBSD: src/sys/kern/uipc_socket.c,v 1.285 2006/11/02 17:45:28 andre Exp $");
#include "opt_inet.h"
#include "opt_mac.h"
@@ -813,9 +813,11 @@
#include <vm/vm.h>
#include <vm/vm_page.h>
#include <vm/vm_object.h>
-#endif /*ZERO_COPY_SOCKETS*/
/*
+ * sosend_copyin() is only used if zero copy sockets are enabled. Otherwise
+ * sosend_dgram() and sosend_generic() use m_uiotombuf().
+ *
* sosend_copyin() accepts a uio and prepares an mbuf chain holding part or
* all of the data referenced by the uio. If desired, it uses zero-copy.
* *space will be updated to reflect data copied in.
@@ -939,6 +941,7 @@
*retmp = top;
return (error);
}
+#endif /*ZERO_COPY_SOCKETS*/
#define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK)
@@ -954,7 +957,9 @@
{
long space, resid;
int clen = 0, error, dontroute;
+#ifdef ZERO_COPY_SOCKETS
int atomic = sosendallatonce(so) || top;
+#endif
KASSERT(so->so_type == SOCK_DGRAM, ("sodgram_send: !SOCK_DGRAM"));
KASSERT(so->so_proto->pr_flags & PR_ATOMIC,
@@ -1040,9 +1045,19 @@
if (flags & MSG_EOR)
top->m_flags |= M_EOR;
} else {
+#ifdef ZERO_COPY_SOCKETS
error = sosend_copyin(uio, &top, atomic, &space, flags);
if (error)
goto out;
+#else
+ top = m_uiotombuf(uio, M_WAITOK, space, max_hdr,
+ (M_PKTHDR | ((flags & MSG_EOR) ? M_EOR : 0)));
+ if (top == NULL) {
+ error = EFAULT; /* only possible error */
+ goto out;
+ }
+ space -= resid - uio->uio_resid;
+#endif
resid = uio->uio_resid;
}
KASSERT(resid == 0, ("sosend_dgram: resid != 0"));
@@ -1202,12 +1217,25 @@
if (flags & MSG_EOR)
top->m_flags |= M_EOR;
} else {
+#ifdef ZERO_COPY_SOCKETS
error = sosend_copyin(uio, &top, atomic,
&space, flags);
if (error != 0) {
SOCKBUF_LOCK(&so->so_snd);
goto release;
}
+#else
+ top = m_uiotombuf(uio, M_WAITOK, space,
+ (atomic ? max_hdr : 0),
+ (atomic ? M_PKTHDR : 0) |
+ ((flags & MSG_EOR) ? M_EOR : 0));
+ if (top == NULL) {
+ SOCKBUF_LOCK(&so->so_snd);
+ error = EFAULT; /* only possible error */
+ goto release;
+ }
+ space -= resid - uio->uio_resid;
+#endif
resid = uio->uio_resid;
}
if (dontroute) {
==== //depot/projects/soc2005/libalias/sys/kern/uipc_syscalls.c#6 (text+ko) ====
@@ -33,7 +33,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/kern/uipc_syscalls.c,v 1.239 2006/10/22 11:52:14 rwatson Exp $");
+__FBSDID("$FreeBSD: src/sys/kern/uipc_syscalls.c,v 1.241 2006/11/02 17:37:21 andre Exp $");
#include "opt_compat.h"
#include "opt_ktrace.h"
@@ -1882,19 +1882,20 @@
struct vnode *vp;
struct vm_object *obj = NULL;
struct socket *so = NULL;
- struct mbuf *m, *m_header = NULL;
+ struct mbuf *m = NULL;
struct sf_buf *sf;
struct vm_page *pg;
- off_t off, xfsize, hdtr_size, sbytes = 0;
- int error, headersize = 0, headersent = 0;
+ off_t off, xfsize, hdtr_size = 0, sbytes = 0, rem = 0;
+ int error, headersize = 0, headersent = 0, mnw = 0;
int vfslocked;
NET_LOCK_GIANT();
- hdtr_size = 0;
-
/*
- * The descriptor must be a regular file and have a backing VM object.
+ * The file descriptor must be a regular file and have a
+ * backing VM object.
+ * File offset must be positive. If it goes beyond EOF
+ * we send only the header/trailer and no payload data.
*/
if ((error = fgetvp_read(td, uap->fd, &vp)) != 0)
goto done;
@@ -1922,7 +1923,17 @@
error = EINVAL;
goto done;
}
- if ((error = getsock(td->td_proc->p_fd, uap->s, &sock_fp, NULL)) != 0)
+ if (uap->offset < 0) {
+ error = EINVAL;
+ goto done;
+ }
+
+ /*
+ * The socket must be a stream socket and connected.
+ * Remember if it a blocking or non-blocking socket.
+ */
+ if ((error = getsock(td->td_proc->p_fd, uap->s, &sock_fp,
+ NULL)) != 0)
goto done;
so = sock_fp->f_data;
if (so->so_type != SOCK_STREAM) {
@@ -1933,10 +1944,13 @@
error = ENOTCONN;
goto done;
}
- if (uap->offset < 0) {
- error = EINVAL;
- goto done;
- }
+ /*
+ * Do not wait on memory allocations but return ENOMEM for
+ * caller to retry later.
+ * XXX: Experimental.
+ */
+ if (uap->flags & SF_MNOWAIT)
+ mnw = 1;
#ifdef MAC
SOCK_LOCK(so);
@@ -1946,290 +1960,307 @@
goto done;
#endif
- /*
- * If specified, get the pointer to the sf_hdtr struct for
- * any headers/trailers.
- */
+ /* If headers are specified copy them into mbufs. */
if (hdr_uio != NULL) {
hdr_uio->uio_td = td;
hdr_uio->uio_rw = UIO_WRITE;
if (hdr_uio->uio_resid > 0) {
- m_header = m_uiotombuf(hdr_uio, M_DONTWAIT, 0, 0);
- if (m_header == NULL)
+ m = m_uiotombuf(hdr_uio, (mnw ? M_NOWAIT : M_WAITOK),
+ 0, 0, 0);
+ if (m == NULL) {
+ error = mnw ? EAGAIN : ENOBUFS;
goto done;
- headersize = m_header->m_pkthdr.len;
+ }
+ headersize = hdr_uio->uio_resid;
if (compat)
sbytes += headersize;
}
}
- /*
- * Protect against multiple writers to the socket.
- */
+ /* Protect against multiple writers to the socket. */
SOCKBUF_LOCK(&so->so_snd);
(void) sblock(&so->so_snd, M_WAITOK);
SOCKBUF_UNLOCK(&so->so_snd);
/*
- * Loop through the pages in the file, starting with the requested
+ * Loop through the pages of the file, starting with the requested
* offset. Get a file page (do I/O if necessary), map the file page
* into an sf_buf, attach an mbuf header to the sf_buf, and queue
* it on the socket.
+ * This is done in two loops. The inner loop turns as many pages
+ * as it can, up to available socket buffer space, without blocking
+ * into mbufs to have it bulk delivered into the socket send buffer.
+ * The outer loop checks the state and available space of the socket
+ * and takes care of the overall progress.
*/
- for (off = uap->offset; ; off += xfsize, sbytes += xfsize) {
- vm_pindex_t pindex;
- vm_offset_t pgoff;
+ for (off = uap->offset; ; ) {
+ int loopbytes = 0;
+ int space = 0;
+ int done = 0;
- pindex = OFF_TO_IDX(off);
- VM_OBJECT_LOCK(obj);
-retry_lookup:
/*
- * Calculate the amount to transfer. Not to exceed a page,
- * the EOF, or the passed in nbytes.
- */
- xfsize = obj->un_pager.vnp.vnp_size - off;
- VM_OBJECT_UNLOCK(obj);
- if (xfsize > PAGE_SIZE)
- xfsize = PAGE_SIZE;
- pgoff = (vm_offset_t)(off & PAGE_MASK);
- if (PAGE_SIZE - pgoff < xfsize)
- xfsize = PAGE_SIZE - pgoff;
- if (uap->nbytes && xfsize > (uap->nbytes - sbytes))
- xfsize = uap->nbytes - sbytes;
- if (xfsize <= 0) {
- if (m_header != NULL) {
- m = m_header;
- m_header = NULL;
- SOCKBUF_LOCK(&so->so_snd);
- goto retry_space;
- } else
- break;
- }
- /*
- * Optimize the non-blocking case by looking at the socket space
- * before going to the extra work of constituting the sf_buf.
+ * Check the socket state for ongoing connection,
+ * no errors and space in socket buffer.
+ * If space is low allow for the remainder of the
+ * file to be processed if it fits the socket buffer.
+ * Otherwise block in waiting for sufficient space
+ * to proceed, or if the socket is nonblocking, return
+ * to userland with EAGAIN while reporting how far
+ * we've come.
+ * We wait until the socket buffer has significant free
+ * space to do bulk sends. This makes good use of file
+ * system read ahead and allows packet segmentation
+ * offloading hardware to take over lots of work. If
+ * we were not careful here we would send off only one
+ * sfbuf at a time.
*/
SOCKBUF_LOCK(&so->so_snd);
- if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) {
- if (so->so_snd.sb_state & SBS_CANTSENDMORE)
- error = EPIPE;
- else
- error = EAGAIN;
- sbunlock(&so->so_snd);
+ if (so->so_snd.sb_lowat < so->so_snd.sb_hiwat / 2)
+ so->so_snd.sb_lowat = so->so_snd.sb_hiwat / 2;
+retry_space:
+ if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
+ error = EPIPE;
+ SOCKBUF_UNLOCK(&so->so_snd);
+ goto done;
+ } else if (so->so_error) {
+ error = so->so_error;
+ so->so_error = 0;
SOCKBUF_UNLOCK(&so->so_snd);
goto done;
}
- SOCKBUF_UNLOCK(&so->so_snd);
- VM_OBJECT_LOCK(obj);
- /*
- * Attempt to look up the page.
- *
- * Allocate if not found
- *
- * Wait and loop if busy.
- */
- pg = vm_page_lookup(obj, pindex);
-
- if (pg == NULL) {
- pg = vm_page_alloc(obj, pindex, VM_ALLOC_NOBUSY |
- VM_ALLOC_NORMAL | VM_ALLOC_WIRED);
- if (pg == NULL) {
- VM_OBJECT_UNLOCK(obj);
- VM_WAIT;
- VM_OBJECT_LOCK(obj);
- goto retry_lookup;
+ space = sbspace(&so->so_snd);
+ if (space < rem &&
+ (space <= 0 ||
+ space < so->so_snd.sb_lowat)) {
+ if (so->so_state & SS_NBIO) {
+ SOCKBUF_UNLOCK(&so->so_snd);
+ error = EAGAIN;
+ goto done;
}
- } else if (vm_page_sleep_if_busy(pg, TRUE, "sfpbsy"))
- goto retry_lookup;
- else {
+ /*
+ * sbwait drops the lock while sleeping.
+ * When we loop back to retry_space the
+ * state may have changed and we retest
+ * for it.
+ */
+ error = sbwait(&so->so_snd);
/*
- * Wire the page so it does not get ripped out from
- * under us.
+ * An error from sbwait usually indicates that we've
+ * been interrupted by a signal. If we've sent anything
+ * then return bytes sent, otherwise return the error.
*/
- vm_page_lock_queues();
- vm_page_wire(pg);
- vm_page_unlock_queues();
+ if (error) {
+ SOCKBUF_UNLOCK(&so->so_snd);
+ goto done;
+ }
+ goto retry_space;
}
+ SOCKBUF_UNLOCK(&so->so_snd);
/*
- * If page is not valid for what we need, initiate I/O
+ * Loop and construct maximum sized mbuf chain to be bulk
+ * dumped into socket buffer.
*/
+ while(space > loopbytes) {
+ vm_pindex_t pindex;
+ vm_offset_t pgoff;
+ struct mbuf *m0;
- if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize)) {
- VM_OBJECT_UNLOCK(obj);
- } else if (uap->flags & SF_NODISKIO) {
- error = EBUSY;
- } else {
- int bsize, resid;
-
+ VM_OBJECT_LOCK(obj);
+ /*
+ * Calculate the amount to transfer.
+ * Not to exceed a page, the EOF,
+ * or the passed in nbytes.
+ */
+ pgoff = (vm_offset_t)(off & PAGE_MASK);
+ xfsize = omin(PAGE_SIZE - pgoff,
+ obj->un_pager.vnp.vnp_size - off -
+ sbytes - loopbytes);
+ if (uap->nbytes)
+ rem = (uap->nbytes - sbytes - loopbytes);
+ else
+ rem = obj->un_pager.vnp.vnp_size - off -
+ sbytes - loopbytes;
+ xfsize = omin(rem, xfsize);
+ if (xfsize <= 0) {
+ VM_OBJECT_UNLOCK(obj);
+ done = 1; /* all data sent */
+ break;
+ }
/*
- * Ensure that our page is still around when the I/O
- * completes.
+ * Don't overflow the send buffer.
+ * Stop here and send out what we've
+ * already got.
*/
- vm_page_io_start(pg);
- VM_OBJECT_UNLOCK(obj);
-
+ if (space < loopbytes + xfsize) {
+ VM_OBJECT_UNLOCK(obj);
+ break;
+ }
+retry_lookup:
/*
- * Get the page from backing store.
+ * Attempt to look up the page.
+ * Allocate if not found or
+ * wait and loop if busy.
*/
- bsize = vp->v_mount->mnt_stat.f_iosize;
- vfslocked = VFS_LOCK_GIANT(vp->v_mount);
- vn_lock(vp, LK_SHARED | LK_RETRY, td);
+ pindex = OFF_TO_IDX(off);
+ pg = vm_page_lookup(obj, pindex);
+ if (pg == NULL) {
+ pg = vm_page_alloc(obj, pindex,
+ VM_ALLOC_NOBUSY | VM_ALLOC_NORMAL |
+ VM_ALLOC_WIRED);
+ if (pg == NULL) {
+ VM_OBJECT_UNLOCK(obj);
+ VM_WAIT;
+ VM_OBJECT_LOCK(obj);
+ goto retry_lookup;
+ }
+ } else if (vm_page_sleep_if_busy(pg, TRUE, "sfpbsy"))
+ goto retry_lookup;
+ else {
+ /*
+ * Wire the page so it does not get
+ * ripped out from under us.
+ */
+ vm_page_lock_queues();
+ vm_page_wire(pg);
+ vm_page_unlock_queues();
+ }
+
/*
- * XXXMAC: Because we don't have fp->f_cred here,
- * we pass in NOCRED. This is probably wrong, but
- * is consistent with our original implementation.
+ * Check if page is valid for what we need,
+ * otherwise initiate I/O.
+ * If we already turned some pages into mbufs,
+ * send them off before we come here again and
+ * block.
*/
- error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE,
- trunc_page(off), UIO_NOCOPY, IO_NODELOCKED |
- IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT),
- td->td_ucred, NOCRED, &resid, td);
- VOP_UNLOCK(vp, 0, td);
- VFS_UNLOCK_GIANT(vfslocked);
- VM_OBJECT_LOCK(obj);
- vm_page_io_finish(pg);
- if (!error)
+ if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize))
+ VM_OBJECT_UNLOCK(obj);
+ else if (m != NULL)
+ error = EAGAIN; /* send what we already got */
+ else if (uap->flags & SF_NODISKIO)
+ error = EBUSY;
+ else {
+ int bsize, resid;
+
+ /*
+ * Ensure that our page is still around
+ * when the I/O completes.
+ */
+ vm_page_io_start(pg);
+ VM_OBJECT_UNLOCK(obj);
+
+ /*
+ * Get the page from backing store.
+ */
+ bsize = vp->v_mount->mnt_stat.f_iosize;
+ vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+ vn_lock(vp, LK_SHARED | LK_RETRY, td);
+
+ /*
+ * XXXMAC: Because we don't have fp->f_cred
+ * here, we pass in NOCRED. This is probably
+ * wrong, but is consistent with our original
+ * implementation.
+ */
+ error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE,
+ trunc_page(off), UIO_NOCOPY, IO_NODELOCKED |
+ IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT),
+ td->td_ucred, NOCRED, &resid, td);
+ VOP_UNLOCK(vp, 0, td);
+ VFS_UNLOCK_GIANT(vfslocked);
+ VM_OBJECT_LOCK(obj);
+ vm_page_io_finish(pg);
+ if (!error)
+ VM_OBJECT_UNLOCK(obj);
+ mbstat.sf_iocnt++;
+ }
+ if (error) {
+ vm_page_lock_queues();
+ vm_page_unwire(pg, 0);
+ /*
+ * See if anyone else might know about
+ * this page. If not and it is not valid,
+ * then free it.
+ */
+ if (pg->wire_count == 0 && pg->valid == 0 &&
+ pg->busy == 0 && !(pg->oflags & VPO_BUSY) &&
+ pg->hold_count == 0) {
+ vm_page_free(pg);
+ }
+ vm_page_unlock_queues();
VM_OBJECT_UNLOCK(obj);
- mbstat.sf_iocnt++;
- }
-
- if (error) {
- vm_page_lock_queues();
- vm_page_unwire(pg, 0);
+ if (error == EAGAIN)
+ error = 0; /* not a real error */
+ break;
+ }
+
/*
- * See if anyone else might know about this page.
- * If not and it is not valid, then free it.
+ * Get a sendfile buf. We usually wait as long
+ * as necessary, but this wait can be interrupted.
*/
- if (pg->wire_count == 0 && pg->valid == 0 &&
- pg->busy == 0 && !(pg->oflags & VPO_BUSY) &&
- pg->hold_count == 0) {
>>> TRUNCATED FOR MAIL (1000 lines) <<<
More information about the p4-projects
mailing list