PERFORCE change 113714 for review
Robert Watson
rwatson at FreeBSD.org
Tue Jan 30 18:48:36 UTC 2007
http://perforce.freebsd.org/chv.cgi?CH=113714
Change 113714 by rwatson at rwatson_cinnamon on 2007/01/30 18:48:29
First experimentation with zero-copy (i.e., one-copy) BPF:
- Break out the current BPF buffering model into bpf_buffer.c.
- Add a new bpf_zerocopy.c that implements an alternative buffering
model based on pages of memory "donated" by a user process.
The basic model is that the BPF consumer will select a buffering
mode before attaching to an interface; if none is selected, then
the default is the current behavior providing complete
compatibility. If zero-copy buffering is seleced, an ioctl() to
identify two page-aligned, integer multiple of page-sized buffers
that will be wired by the kernel and used in place of malloc(9)-
allocated buffers. These buffers will be written to directly by
the BPF tap routines, and are laid out identically to buffers
read from user space. For now, an explicit acknowledgement via an
ioctl is required to rotate the buffers when user space has
completed reading the current finished buffer (hbuf) in similar
style to what occurs when bpfread() finishes currently. It would
be desirable to trigger rotation via the shared memory also,
perhaps via a head with flags indicating if user space is ready for
the buffer to be re-used.
sf_bufs are used to map the user pages into kernel, which requires
a fair amount of bookkeeping as they must be individually managed
and are not contiguously mapped. However, this is significantly
more functional than trying to do the same with an mmaping of the
BPF device due to serious limitations in the device pager.
I have not yet updated any applications (and especially libpcap)
or performed any performance benchmarking of serious debugging.
This is simply a code snapshot as a starting point for discussion.
Affected files ...
.. //depot/projects/zcopybpf/src/sys/conf/files#3 edit
.. //depot/projects/zcopybpf/src/sys/conf/options#2 edit
.. //depot/projects/zcopybpf/src/sys/net/bpf.c#2 edit
.. //depot/projects/zcopybpf/src/sys/net/bpf.h#2 edit
.. //depot/projects/zcopybpf/src/sys/net/bpf_buffer.c#3 edit
.. //depot/projects/zcopybpf/src/sys/net/bpf_buffer.h#1 add
.. //depot/projects/zcopybpf/src/sys/net/bpf_zerocopy.c#1 add
.. //depot/projects/zcopybpf/src/sys/net/bpf_zerocopy.h#1 add
.. //depot/projects/zcopybpf/src/sys/net/bpfdesc.h#3 edit
Differences ...
==== //depot/projects/zcopybpf/src/sys/conf/files#3 (text+ko) ====
@@ -1510,8 +1510,10 @@
libkern/strtouq.c standard
libkern/strvalid.c standard
net/bpf.c standard
+net/bpf_buffer.c optional bpf
net/bpf_jitter.c optional bpf_jitter
net/bpf_filter.c optional bpf | netgraph_bpf
+net/bpf_zerocopy.c optional bpf_zerocopy
net/bridgestp.c optional if_bridge
net/bsd_comp.c optional ppp_bsdcomp
net/if.c standard
==== //depot/projects/zcopybpf/src/sys/conf/options#2 (text+ko) ====
@@ -492,6 +492,7 @@
# DRM options
DRM_DEBUG opt_drm.h
+BPF_ZEROCOPY opt_bpf.h
ZERO_COPY_SOCKETS opt_zero.h
TI_PRIVATE_JUMBOS opt_ti.h
TI_JUMBO_HDRSPLIT opt_ti.h
==== //depot/projects/zcopybpf/src/sys/net/bpf.c#2 (text+ko) ====
@@ -65,9 +65,13 @@
#include <net/if.h>
#include <net/bpf.h>
+#include <net/bpf_buffer.h>
#ifdef BPF_JITTER
#include <net/bpf_jitter.h>
#endif
+#ifdef BPF_ZEROCOPY
+#include <net/bpf_zerocopy.h>
+#endif
#include <net/bpfdesc.h>
#include <netinet/in.h>
@@ -79,7 +83,7 @@
#include <security/mac/mac_framework.h>
-static MALLOC_DEFINE(M_BPF, "BPF", "BPF data");
+MALLOC_DEFINE(M_BPF, "BPF", "BPF data");
#if defined(DEV_BPF) || defined(NETGRAPH_BPF)
@@ -95,19 +99,17 @@
static struct mtx bpf_mtx; /* bpf global lock */
static int bpf_bpfd_cnt;
-static void bpf_allocbufs(struct bpf_d *);
static void bpf_attachd(struct bpf_d *, struct bpf_if *);
static void bpf_detachd(struct bpf_d *);
static void bpf_freed(struct bpf_d *);
-static void bpf_mcopy(const void *, void *, size_t);
static int bpf_movein(struct uio *, int, int,
struct mbuf **, struct sockaddr *, struct bpf_insn *);
static int bpf_setif(struct bpf_d *, struct ifreq *);
static void bpf_timed_out(void *);
static __inline void
bpf_wakeup(struct bpf_d *);
-static void catchpacket(struct bpf_d *, u_char *, u_int,
- u_int, void (*)(const void *, void *, size_t),
+static void catchpacket(struct bpf_d *, u_char *, u_int, u_int,
+ void (*)(struct bpf_d *, caddr_t, u_int, void *, u_int),
struct timeval *);
static void reset_d(struct bpf_d *);
static int bpf_setf(struct bpf_d *, struct bpf_program *, u_long cmd);
@@ -123,12 +125,6 @@
* The default read buffer size is patchable.
*/
SYSCTL_NODE(_net, OID_AUTO, bpf, CTLFLAG_RW, 0, "bpf sysctl");
-static int bpf_bufsize = 4096;
-SYSCTL_INT(_net_bpf, OID_AUTO, bufsize, CTLFLAG_RW,
- &bpf_bufsize, 0, "");
-static int bpf_maxbufsize = BPF_MAXBUFSIZE;
-SYSCTL_INT(_net_bpf, OID_AUTO, maxbufsize, CTLFLAG_RW,
- &bpf_maxbufsize, 0, "");
static int bpf_maxinsns = BPF_MAXINSNS;
SYSCTL_INT(_net_bpf, OID_AUTO, maxinsns, CTLFLAG_RW,
&bpf_maxinsns, 0, "Maximum bpf program instructions");
@@ -159,7 +155,163 @@
static struct filterops bpfread_filtops =
{ 1, NULL, filt_bpfdetach, filt_bpfread };
+/*
+ * Wrapper functions for various buffering methods. If the set of buffer
+ * modes expands, we will probably want to introduce a switch data structure
+ * similar to protosw, et.
+ */
+static void
+bpf_append_bytes(struct bpf_d *d, caddr_t buf, u_int offset, void *src,
+ u_int len)
+{
+
+ switch (d->bd_bufmode) {
+ case BPF_BUFMODE_BUFFER:
+ return (bpf_buffer_append_bytes(d, buf, offset, src, len));
+
+#ifdef BPF_ZEROCOPY
+ case BPF_BUFMODE_ZBUF:
+ return (bpf_zerocopy_append_bytes(d, buf, offset, src, len));
+#endif
+
+ default:
+ panic("bpf_buf_append_bytes");
+ }
+}
+
+static void
+bpf_append_mbuf(struct bpf_d *d, caddr_t buf, u_int offset, void *src,
+ u_int len)
+{
+
+ switch (d->bd_bufmode) {
+ case BPF_BUFMODE_BUFFER:
+ return (bpf_buffer_append_mbuf(d, buf, offset, src, len));
+
+#ifdef BPF_ZEROCOPY
+ case BPF_BUFMODE_ZBUF:
+ return (bpf_zerocopy_append_mbuf(d, buf, offset, src, len));
+#endif
+
+ default:
+ panic("bpf_buf_append_mbuf");
+ }
+}
+
+static void
+bpf_free(struct bpf_d *d)
+{
+
+ switch (d->bd_bufmode) {
+ case BPF_BUFMODE_BUFFER:
+ return (bpf_buffer_free(d));
+
+#ifdef BPF_ZEROCOPY
+ case BPF_BUFMODE_ZBUF:
+ return (bpf_zerocopy_free(d));
+#endif
+
+ default:
+ panic("bpf_buf_free");
+ }
+}
+
+static int
+bpf_uiomove(struct bpf_d *d, caddr_t buf, u_int len, struct uio *uio)
+{
+
+ switch (d->bd_bufmode) {
+ case BPF_BUFMODE_BUFFER:
+ return (bpf_buffer_uiomove(d, buf, len, uio));
+
+#ifdef BPF_ZEROCOPY
+ case BPF_BUFMODE_ZBUF:
+ return (bpf_zerocopy_uiomove(d, buf, len, uio));
+#endif
+
+ default:
+ panic("bpf_buf_uiomove");
+ }
+}
+
static int
+bpf_ioctl_sblen(struct bpf_d *d, u_int *i)
+{
+
+ if (d->bd_bufmode != BPF_BUFMODE_BUFFER)
+ return (EOPNOTSUPP);
+ return (bpf_buffer_ioctl_sblen(d, i));
+}
+
+static int
+bpf_ioctl_ackzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz)
+{
+
+ if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
+ return (EOPNOTSUPP);
+#ifdef BPF_ZEROCOPY
+ return (bpf_zerocopy_ioctl_ackzbuf(td, d, bz));
+#else
+ panic("bpf_ioctl_getznext");
+#endif
+}
+
+static int
+bpf_ioctl_getzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz)
+{
+
+ if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
+ return (EOPNOTSUPP);
+#ifdef BPF_ZEROCOPY
+ return (bpf_zerocopy_ioctl_getzbuf(td, d, bz));
+#else
+ panic("bpf_ioctl_getznext");
+#endif
+}
+
+static int
+bpf_ioctl_getznext(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz)
+{
+
+ if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
+ return (EOPNOTSUPP);
+#ifdef BPF_ZEROCOPY
+ return (bpf_zerocopy_ioctl_getznext(td, d, bz));
+#else
+ panic("bpf_ioctl_getznext");
+#endif
+}
+
+static int
+bpf_ioctl_getzmax(struct thread *td, struct bpf_d *d, u_int *i)
+{
+
+ if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
+ return (EOPNOTSUPP);
+#ifdef BPF_ZEROCOPY
+ return (bpf_zerocopy_ioctl_getzmax(td, d, i));
+#else
+ return (ENOTTY);
+#endif
+}
+
+static int
+bpf_ioctl_setzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz)
+{
+
+ if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
+ return (EOPNOTSUPP);
+#ifdef BPF_ZEROCOPY
+ return (bpf_zerocopy_ioctl_setzbuf(td, d, bz));
+#else
+ return (ENOTTY);
+#endif
+}
+
+/*
+ * General BPF functions.
+ */
+static int
bpf_movein(struct uio *uio, int linktype, int mtu, struct mbuf **mp,
struct sockaddr *sockp, struct bpf_insn *wfilter)
{
@@ -404,7 +556,14 @@
"bpf%d", dev2unit(dev));
MALLOC(d, struct bpf_d *, sizeof(*d), M_BPF, M_WAITOK | M_ZERO);
dev->si_drv1 = d;
- d->bd_bufsize = bpf_bufsize;
+
+ /*
+ * XXXRW: For historical reasons, perform a one-time initialization
+ * call to the buffer routines, even though we're not yet committed
+ * to a particular buffer method.
+ */
+ bpf_buffer_init(d);
+ d->bd_bufmode = BPF_BUFMODE_DEFAULT;
d->bd_sig = SIGIO;
d->bd_seesent = 1;
d->bd_pid = td->td_proc->p_pid;
@@ -451,18 +610,19 @@
return (0);
}
-
/*
- * Rotate the packet buffers in descriptor d. Move the store buffer
- * into the hold slot, and the free buffer into the store slot.
- * Zero the length of the new store buffer.
+ * Rotate the packet buffers in descriptor d. Move the store buffer into
+ * the hold slot, and the free buffer ino the store slot. Zero the length of
+ * the new store buffer. Descriptor lock should be held.
*/
-#define ROTATE_BUFFERS(d) \
- (d)->bd_hbuf = (d)->bd_sbuf; \
- (d)->bd_hlen = (d)->bd_slen; \
- (d)->bd_sbuf = (d)->bd_fbuf; \
- (d)->bd_slen = 0; \
- (d)->bd_fbuf = NULL;
+#define ROTATE_BUFFERS(d) do { \
+ (d)->bd_hbuf = (d)->bd_sbuf; \
+ (d)->bd_hlen = (d)->bd_slen; \
+ (d)->bd_sbuf = (d)->bd_fbuf; \
+ (d)->bd_slen = 0; \
+ (d)->bd_fbuf = NULL; \
+} while (0)
+
/*
* bpfread - read next chunk of packets from buffers
*/
@@ -553,8 +713,12 @@
* Move data from hold buffer into user space.
* We know the entire buffer is transferred since
* we checked above that the read buffer is bpf_bufsize bytes.
+ *
+ * XXXRW: More synchronization needed here: what if a second thread
+ * issues a read on the same fd at the same time? Don't want this
+ * getting invalidated.
*/
- error = uiomove(d->bd_hbuf, d->bd_hlen, uio);
+ error = bpf_uiomove(d, d->bd_hbuf, d->bd_hlen, uio);
BPFD_LOCK(d);
d->bd_fbuf = d->bd_hbuf;
@@ -565,7 +729,6 @@
return (error);
}
-
/*
* If there are processes sleeping on this descriptor, wake them up.
*/
@@ -685,6 +848,10 @@
* BIOCGSEESENT Get "see packets sent" flag
* BIOCSSEESENT Set "see packets sent" flag
* BIOCLOCK Set "locked" flag
+ * BIOCGETZBUF Query current zero-copy buffer locations.
+ * BIOCSETZBUF Set current zero-copy buffer locations.
+ * BIOCSETZBUF Acknowledge reading zero-copy buffers.
+ * BIOCGETZMAX Get maximume zero-copy buffer size.
*/
/* ARGSUSED */
static int
@@ -721,6 +888,8 @@
case BIOCSRTIMEOUT:
case BIOCIMMEDIATE:
case TIOCGPGRP:
+ case BIOCACKZBUF:
+ case BIOCGETZBUF:
break;
default:
return (EPERM);
@@ -773,17 +942,7 @@
* Set buffer length.
*/
case BIOCSBLEN:
- if (d->bd_bif != NULL)
- error = EINVAL;
- else {
- u_int size = *(u_int *)addr;
-
- if (size > bpf_maxbufsize)
- *(u_int *)addr = size = bpf_maxbufsize;
- else if (size < BPF_MINBUFSIZE)
- *(u_int *)addr = size = BPF_MINBUFSIZE;
- d->bd_bufsize = size;
- }
+ error = bpf_ioctl_sblen(d, (u_int *)addr);
break;
/*
@@ -1002,6 +1161,59 @@
case BIOCGRSIG:
*(u_int *)addr = d->bd_sig;
break;
+
+ case BIOCGETBUFMODE:
+ *(u_int *)addr = d->bd_bufmode;
+ break;
+
+ case BIOCSETBUFMODE:
+ /*
+ * Allow the buffering mode to be changed as long as we
+ * haven't yet committed to a particular mode. Our
+ * definition of commitment, for now, is whether or not a
+ * buffer has been allocated or an interface attached, since
+ * that's the point where things get tricky.
+ *
+ * XXXRW: This will need some refinement. Is checking both
+ * for buffers and interface binding redundant?
+ */
+ switch (*(u_int *)addr) {
+ case BPF_BUFMODE_BUFFER:
+ break;
+
+#ifdef BPF_ZEROCOPY
+ case BPF_BUFMODE_ZBUF:
+ break;
+#endif
+
+ default:
+ return (EINVAL);
+ }
+
+ BPFD_LOCK(d);
+ if (d->bd_sbuf != NULL || d->bd_hbuf != NULL ||
+ d->bd_fbuf != NULL || d->bd_bif != NULL) {
+ BPFD_UNLOCK(d);
+ return (EBUSY);
+ }
+ d->bd_bufmode = *(u_int *)addr;
+ BPFD_UNLOCK(d);
+ break;
+
+ case BIOCACKZBUF:
+ return (bpf_ioctl_ackzbuf(td, d, (struct bpf_zbuf *)addr));
+
+ case BIOCGETZBUF:
+ return (bpf_ioctl_getzbuf(td, d, (struct bpf_zbuf *)addr));
+
+ case BIOCGETZMAX:
+ return (bpf_ioctl_getzmax(td, d, (u_int *)addr));
+
+ case BIOCGETZNEXT:
+ return (bpf_ioctl_getznext(td, d, (struct bpf_zbuf *)addr));
+
+ case BIOCSETZBUF:
+ return (bpf_ioctl_setzbuf(td, d, (struct bpf_zbuf *)addr));
}
return (error);
}
@@ -1102,13 +1314,30 @@
return (ENXIO);
bp = theywant->if_bpf;
+
/*
- * Allocate the packet buffers if we need to.
- * If we're already attached to requested interface,
- * just flush the buffer.
+ * Behavior here depends on the buffering model. If we're using
+ * kernel memory buffers, then we can allocate them here. If we're
+ * using zero-copy, then the user process must have registered
+ * buffers by the time we get here. If not, return an error.
+ *
+ * XXXRW: Could this be better abstracted?
*/
- if (d->bd_sbuf == NULL)
- bpf_allocbufs(d);
+ switch (d->bd_bufmode) {
+ case BPF_BUFMODE_BUFFER:
+ if (d->bd_sbuf == NULL)
+ bpf_buffer_alloc(d);
+ KASSERT(d->bd_sbuf != NULL, ("bpf_setif: bd_sbuf NULL"));
+ break;
+
+ case BPF_BUFMODE_ZBUF:
+ if (d->bd_sbuf == NULL)
+ return (EINVAL);
+ break;
+
+ default:
+ panic("bpf_setif: bufmode %d", d->bd_bufmode);
+ }
if (bp != d->bd_bif) {
if (d->bd_bif)
/*
@@ -1252,7 +1481,8 @@
#ifdef MAC
if (mac_check_bpfdesc_receive(d, bp->bif_ifp) == 0)
#endif
- catchpacket(d, pkt, pktlen, slen, bcopy, &tv);
+ catchpacket(d, pkt, pktlen, slen,
+ bpf_append_bytes, &tv);
}
BPFD_UNLOCK(d);
}
@@ -1260,30 +1490,6 @@
}
/*
- * Copy data from an mbuf chain into a buffer. This code is derived
- * from m_copydata in sys/uipc_mbuf.c.
- */
-static void
-bpf_mcopy(const void *src_arg, void *dst_arg, size_t len)
-{
- const struct mbuf *m;
- u_int count;
- u_char *dst;
-
- m = src_arg;
- dst = dst_arg;
- while (len > 0) {
- if (m == NULL)
- panic("bpf_mcopy");
- count = min(m->m_len, len);
- bcopy(mtod(m, void *), dst, count);
- m = m->m_next;
- dst += count;
- len -= count;
- }
-}
-
-/*
* Incoming linkage from device drivers, when packet is in an mbuf chain.
*/
void
@@ -1323,7 +1529,7 @@
if (mac_check_bpfdesc_receive(d, bp->bif_ifp) == 0)
#endif
catchpacket(d, (u_char *)m, pktlen, slen,
- bpf_mcopy, &tv);
+ bpf_append_mbuf, &tv);
}
BPFD_UNLOCK(d);
}
@@ -1373,7 +1579,7 @@
if (mac_check_bpfdesc_receive(d, bp->bif_ifp) == 0)
#endif
catchpacket(d, (u_char *)&mb, pktlen, slen,
- bpf_mcopy, &tv);
+ bpf_append_mbuf, &tv);
}
BPFD_UNLOCK(d);
}
@@ -1384,14 +1590,15 @@
* Move the packet data from interface memory (pkt) into the
* store buffer. "cpfn" is the routine called to do the actual data
* transfer. bcopy is passed in to copy contiguous chunks, while
- * bpf_mcopy is passed in to copy mbuf chains. In the latter case,
+ * bpf_append_mbuf is passed in to copy mbuf chains. In the latter case,
* pkt is really an mbuf.
*/
static void
catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen,
- void (*cpfn)(const void *, void *, size_t), struct timeval *tv)
+ void (*cpfn)(struct bpf_d *, caddr_t, u_int, void *, u_int),
+ struct timeval *tv)
{
- struct bpf_hdr *hp;
+ struct bpf_hdr hdr;
int totlen, curlen;
int hdrlen = d->bd_bif->bif_hdrlen;
int do_wakeup = 0;
@@ -1438,16 +1645,20 @@
do_wakeup = 1;
/*
- * Append the bpf header.
+ * Append the bpf header. Note we append the actual header size, but
+ * move forward the length of the header plus padding.
*/
- hp = (struct bpf_hdr *)(d->bd_sbuf + curlen);
- hp->bh_tstamp = *tv;
- hp->bh_datalen = pktlen;
- hp->bh_hdrlen = hdrlen;
+ bzero(&hdr, sizeof(hdr));
+ hdr.bh_tstamp = *tv;
+ hdr.bh_datalen = pktlen;
+ hdr.bh_hdrlen = hdrlen;
+ hdr.bh_caplen = totlen - hdrlen;
+ bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr, sizeof(hdr));
+
/*
* Copy the packet data into the store buffer and update its length.
*/
- (*cpfn)(pkt, (u_char *)hp + hdrlen, (hp->bh_caplen = totlen - hdrlen));
+ (*cpfn)(d, d->bd_sbuf, curlen + hdrlen, pkt, hdr.bh_caplen);
d->bd_slen = curlen + totlen;
if (do_wakeup)
@@ -1455,41 +1666,19 @@
}
/*
- * Initialize all nonzero fields of a descriptor.
- */
-static void
-bpf_allocbufs(struct bpf_d *d)
-{
-
- KASSERT(d->bd_fbuf == NULL, ("bpf_allocbufs: bd_fbuf != NULL"));
- KASSERT(d->bd_sbuf == NULL, ("bpf_allocbufs: bd_sbuf != NULL"));
- KASSERT(d->bd_hbuf == NULL, ("bpf_allocbufs: bd_hbuf != NULL"));
-
- d->bd_fbuf = (caddr_t)malloc(d->bd_bufsize, M_BPF, M_WAITOK);
- d->bd_sbuf = (caddr_t)malloc(d->bd_bufsize, M_BPF, M_WAITOK);
- d->bd_slen = 0;
- d->bd_hlen = 0;
-}
-
-/*
* Free buffers currently in use by a descriptor.
* Called on close.
*/
static void
bpf_freed(struct bpf_d *d)
{
+
/*
* We don't need to lock out interrupts since this descriptor has
* been detached from its interface and it yet hasn't been marked
* free.
*/
- if (d->bd_sbuf != NULL) {
- free(d->bd_sbuf, M_BPF);
- if (d->bd_hbuf != NULL)
- free(d->bd_hbuf, M_BPF);
- if (d->bd_fbuf != NULL)
- free(d->bd_fbuf, M_BPF);
- }
+ bpf_free(d);
if (d->bd_rfilter) {
free((caddr_t)d->bd_rfilter, M_BPF);
#ifdef BPF_JITTER
==== //depot/projects/zcopybpf/src/sys/net/bpf.h#2 (text+ko) ====
@@ -92,6 +92,38 @@
#define BPF_MAJOR_VERSION 1
#define BPF_MINOR_VERSION 1
+/*
+ * Historically, BPF has supported a single buffering model, first using mbuf
+ * clusters in kernel, and later using malloc(9) buffers in kernel. We now
+ * support multiple buffering modes, which may be queried and set using
+ * BIOCGETBUFMODE and BIOCSETBUFMODE. So as to avoid handling the complexity
+ * of changing modes while sniffing packets, the mode becomes fixed once an
+ * interface has been attached to the BPF descriptor.
+ */
+#define BPF_BUFMODE_BUFFER 1 /* Kernel buffers with read(). */
+#define BPF_BUFMODE_ZBUF 2 /* Zero-copy buffers. */
+
+#define BPF_BUFMODE_DEFAULT BPF_BUFMODE_BUFFER /* Default. */
+
+/*
+ * Struct used by BIOCACKZBUF, BIOCGETZNEXT, BIOCGETZBUF, BIOCSETZBUF:
+ * describes up to two zero-copy buffer as used by BPF.
+ *
+ * BIOCACKZBUF Acknowledge read of stored zero-copy buffer (rotate).
+ * BIOCGETZBUF Query current zero-copy buffer locations.
+ * BIOCGETZNEXT Query next stored buffer, if available.
+ * BIOCSETZBUF Set current zero-copy buffer locations (once only).
+ *
+ * Pointers may be set to NULL to indicate a buffer is not configure, should
+ * be freed, or is not being acknowledged.
+ */
+struct bpf_zbuf {
+ void *bz_bufa; /* Location of 'a' zero-copy buffer. */
+ size_t bz_bufalen; /* Size of 'a' zero-copy buffer. */
+ void *bz_bufb; /* Location of 'b' zero-copy buffer. */
+ size_t bz_bufblen; /* Size of 'b' zero-copy buffer. */
+};
+
#define BIOCGBLEN _IOR('B',102, u_int)
#define BIOCSBLEN _IOWR('B',102, u_int)
#define BIOCSETF _IOW('B',103, struct bpf_program)
@@ -115,6 +147,13 @@
#define BIOCGDLTLIST _IOWR('B',121, struct bpf_dltlist)
#define BIOCLOCK _IO('B', 122)
#define BIOCSETWF _IOW('B',123, struct bpf_program)
+#define BIOCGETBUFMODE _IOR('B', 124, u_int)
+#define BIOCSETBUFMODE _IOW('B', 125, u_int)
+#define BIOCACKZBUF _IOW('B', 126, struct bpf_zbuf)
+#define BIOCGETZBUF _IOR('B', 127, struct bpf_zbuf)
+#define BIOCGETZMAX _IOR('B', 128, u_int)
+#define BIOCGETZNEXT _IOR('B', 129, struct bpf_zbuf)
+#define BIOCSETZBUF _IOW('B', 130, struct bpf_zbuf)
/*
* Structure prepended to each packet.
@@ -615,6 +654,15 @@
};
#ifdef _KERNEL
+
+#ifdef MALLOC_DECLARE
+MALLOC_DECLARE(M_BPF);
+#endif
+
+#ifdef SYSCTL_DECL
+SYSCTL_DECL(_net_bpf);
+#endif
+
/*
* Descriptor associated with each attached hardware interface.
*/
==== //depot/projects/zcopybpf/src/sys/net/bpf_buffer.c#3 (text+ko) ====
@@ -1,4 +1,31 @@
/*-
+ * Copyright (c) 2007 Seccuris Inc.
+ * All rights reserved.
+ *
+ * This sofware was developed by Robert N. M. Watson under contract to
+ * Seccuris Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
* Copyright (c) 1990, 1991, 1993
* The Regents of the University of California. All rights reserved.
*
@@ -37,180 +64,81 @@
*/
#include "opt_bpf.h"
-#include "opt_mac.h"
-#include "opt_netgraph.h"
-#include <sys/types.h>
#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/conf.h>
-#include <sys/fcntl.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
-#include <sys/time.h>
-#include <sys/priv.h>
-#include <sys/proc.h>
-#include <sys/signalvar.h>
-#include <sys/filio.h>
-#include <sys/sockio.h>
-#include <sys/ttycom.h>
+#include <sys/socket.h>
#include <sys/uio.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
-#include <sys/event.h>
-#include <sys/file.h>
-#include <sys/poll.h>
-#include <sys/proc.h>
-
-#include <sys/socket.h>
-
#include <net/if.h>
#include <net/bpf.h>
-#ifdef BPF_JITTER
-#include <net/bpf_jitter.h>
-#endif
+#include <net/bpf_buffer.h>
#include <net/bpfdesc.h>
-#include <netinet/in.h>
-#include <netinet/if_ether.h>
-#include <sys/kernel.h>
-#include <sys/sysctl.h>
+/*
+ * Implement historical kernel memory buffering model for BPF: two malloc(9)
+ * kernel buffers are hung off of the descriptor. The size is fixed prior to
+ * attaching to an ifnet, ad cannot be changed after that. read(2) simply
+ * copies the data to user space using uiomove(9).
+ */
-#include <net80211/ieee80211_freebsd.h>
+static int bpf_bufsize = 4096;
+SYSCTL_INT(_net_bpf, OID_AUTO, bufsize, CTLFLAG_RW,
+ &bpf_bufsize, 0, "");
+static int bpf_maxbufsize = BPF_MAXBUFSIZE;
+SYSCTL_INT(_net_bpf, OID_AUTO, maxbufsize, CTLFLAG_RW,
+ &bpf_maxbufsize, 0, "");
-#include <security/mac/mac_framework.h>
+void
+bpf_buffer_alloc(struct bpf_d *d)
+{
-static MALLOC_DEFINE(M_BPF, "BPF", "BPF data");
+ KASSERT(d->bd_fbuf == NULL, ("bpf_buffer_alloc: bd_fbuf != NULL"));
+ KASSERT(d->bd_sbuf == NULL, ("bpf_buffer_alloc: bd_sbuf != NULL"));
+ KASSERT(d->bd_hbuf == NULL, ("bpf_buffer_alloc: bd_hbuf != NULL"));
-#if defined(DEV_BPF) || defined(NETGRAPH_BPF)
+ // printf("bpf_buffer_alloc size %d\n", d->bd_bufsize);
-#define PRINET 26 /* interruptible */
+ d->bd_fbuf = (caddr_t)malloc(d->bd_bufsize, M_BPF, M_WAITOK);
+ d->bd_sbuf = (caddr_t)malloc(d->bd_bufsize, M_BPF, M_WAITOK);
+ d->bd_hbuf = NULL;
+ d->bd_slen = 0;
+ d->bd_hlen = 0;
+}
/*
- * Rotate the packet buffers in descriptor d. Move the store buffer
- * into the hold slot, and the free buffer into the store slot.
- * Zero the length of the new store buffer.
+ * Simple data copy to the current kernel buffer.
*/
-#define ROTATE_BUFFERS(d) \
- (d)->bd_hbuf = (d)->bd_sbuf; \
- (d)->bd_hlen = (d)->bd_slen; \
- (d)->bd_sbuf = (d)->bd_fbuf; \
- (d)->bd_slen = 0; \
- (d)->bd_fbuf = NULL;
-/*
- * bpfread - read next chunk of packets from buffers
- */
-static int
-bpfread(struct cdev *dev, struct uio *uio, int ioflag)
+void
+bpf_buffer_append_bytes(struct bpf_d *d, caddr_t buf, u_int offset,
+ void *src, u_int len)
{
- struct bpf_d *d = dev->si_drv1;
- int timed_out;
- int error;
+ u_char *src_bytes;
- /*
- * Restrict application to use a buffer the same size as
- * as kernel buffers.
- */
- if (uio->uio_resid != d->bd_bufsize)
- return (EINVAL);
+ // printf("bpf_buffer_append_bytes size %d\n", len);
- BPFD_LOCK(d);
- if (d->bd_state == BPF_WAITING)
- callout_stop(&d->bd_callout);
- timed_out = (d->bd_state == BPF_TIMED_OUT);
- d->bd_state = BPF_IDLE;
- /*
- * If the hold buffer is empty, then do a timed sleep, which
- * ends when the timeout expires or when enough packets
- * have arrived to fill the store buffer.
- */
- while (d->bd_hbuf == NULL) {
- if ((d->bd_immediate || timed_out) && d->bd_slen != 0) {
- /*
- * A packet(s) either arrived since the previous
- * read or arrived while we were asleep.
- * Rotate the buffers and return what's here.
- */
- ROTATE_BUFFERS(d);
- break;
- }
-
- /*
- * No data is available, check to see if the bpf device
- * is still pointed at a real interface. If not, return
- * ENXIO so that the userland process knows to rebind
- * it before using it again.
- */
- if (d->bd_bif == NULL) {
- BPFD_UNLOCK(d);
- return (ENXIO);
- }
-
- if (ioflag & O_NONBLOCK) {
- BPFD_UNLOCK(d);
- return (EWOULDBLOCK);
- }
- error = msleep(d, &d->bd_mtx, PRINET|PCATCH,
- "bpf", d->bd_rtout);
- if (error == EINTR || error == ERESTART) {
- BPFD_UNLOCK(d);
- return (error);
- }
- if (error == EWOULDBLOCK) {
- /*
- * On a timeout, return what's in the buffer,
- * which may be nothing. If there is something
- * in the store buffer, we can rotate the buffers.
- */
- if (d->bd_hbuf)
- /*
- * We filled up the buffer in between
- * getting the timeout and arriving
- * here, so we don't need to rotate.
- */
- break;
-
- if (d->bd_slen == 0) {
- BPFD_UNLOCK(d);
- return (0);
- }
- ROTATE_BUFFERS(d);
- break;
- }
- }
- /*
- * At this point, we know we have something in the hold slot.
- */
- BPFD_UNLOCK(d);
-
- /*
- * Move data from hold buffer into user space.
- * We know the entire buffer is transferred since
- * we checked above that the read buffer is bpf_bufsize bytes.
- */
- error = uiomove(d->bd_hbuf, d->bd_hlen, uio);
-
- BPFD_LOCK(d);
- d->bd_fbuf = d->bd_hbuf;
- d->bd_hbuf = NULL;
- d->bd_hlen = 0;
- BPFD_UNLOCK(d);
-
- return (error);
+ src_bytes = (u_char *)src;
+ bcopy(src_bytes, buf + offset, len);
}
/*
- * Copy data from an mbuf chain into a buffer. This code is derived
- * from m_copydata in sys/uipc_mbuf.c.
+ * Scatter-gather data copy from an mbuf chain to the current kernel buffer.
*/
-static void
-bpf_mcopy(const void *src_arg, void *dst_arg, size_t len)
+void
+bpf_buffer_append_mbuf(struct bpf_d *d, caddr_t buf, u_int offset, void *src,
+ u_int len)
{
const struct mbuf *m;
+ u_char *dst;
u_int count;
- u_char *dst;
+
+ // printf("bpf_buffer_append_mbuf size %d\n", len);
- m = src_arg;
- dst = dst_arg;
+ m = (struct mbuf *)src;
+ dst = (u_char *)buf + offset;
while (len > 0) {
if (m == NULL)
panic("bpf_mcopy");
@@ -223,18 +151,76 @@
}
/*
- * Initialize all nonzero fields of a descriptor.
+ * Free BPF kernel buffers on device close.
+ */
+void
+bpf_buffer_free(struct bpf_d *d)
+{
+
+ // printf("bpf_buffer_free(sbuf: %p, hbuf: %p, fbuf: %p)\n",
+ // d->bd_sbuf, d->bd_hbuf, d->bd_fbuf);
+
+ if (d->bd_sbuf != NULL)
+ free(d->bd_sbuf, M_BPF);
>>> TRUNCATED FOR MAIL (1000 lines) <<<
More information about the p4-projects
mailing list