PERFORCE change 113839 for review
Robert Watson
rwatson at FreeBSD.org
Thu Feb 1 15:25:38 UTC 2007
http://perforce.freebsd.org/chv.cgi?CH=113839
Change 113839 by rwatson at rwatson_cinnamon on 2007/02/01 15:25:28
Further chicken scratchings at teaching libpcap about shared memory
headers for zero-copy BPF. This sort of works; the lack of
timeouts is still an issue, and there appears to be a problem under
high use leading to a crash (likely a pointer/buffer bug somewhere
in the code I've added).
Affected files ...
.. //depot/projects/zcopybpf/src/contrib/libpcap/pcap-bpf.c#3 edit
.. //depot/projects/zcopybpf/src/contrib/libpcap/pcap-int.h#3 edit
Differences ...
==== //depot/projects/zcopybpf/src/contrib/libpcap/pcap-bpf.c#3 (text+ko) ====
@@ -141,7 +141,98 @@
return (0);
}
+#ifdef BIOCGETBUFMODE
+/*
+ * Selection routine for zero-copy BPF: identify the next completed buffer,
+ * if any. Try shared memory first, and if that doesn't work, make a system
+ * call, which may dislodge a buffer.
+ *
+ * Return (1) if the buffer is found, (0) if a retry is required, and (-1) if
+ * there is an unrecoverable error.
+ *
+ * XXXRW: Check to make sure the version comparison we're doing here is
+ * really the right thing -- maybe use serial number arithmetic?
+ */
static int
+pcap_next_zbuf(pcap_t *p, u_int *cc)
+{
+ struct bpf_zbuf_header *bzh;
+ struct pollfd pollfd;
+ struct bpf_zbuf bz;
+ int i;
+
+ p->bzh = NULL;
+
+ /*
+ * First try directly accessing the zero-copy buffer headers.
+ */
+ bzh = (struct bpf_zbuf_header *)p->zbuf1;
+ if (bzh->bzh_kernel_gen > bzh->bzh_user_gen) {
+ printf("pcap_next_zbuf: zbuf1 gen\n");
+ goto found;
+ }
+ bzh = (struct bpf_zbuf_header *)p->zbuf2;
+ if (bzh->bzh_kernel_gen > bzh->bzh_user_gen) {
+ printf("pcap_next_zbuf: zbuf2 gen\n");
+ goto found;
+ }
+
+ /*
+ * Next, try asking the kernel, which may dislodge a buffer in
+ * immediate mode.
+ */
+ bzero(&bz, sizeof(bz));
+ if (ioctl(p->fd, BIOCGETZNEXT, &bz) < 0) {
+ snprintf(p->errbuf, PCAP_ERRBUF_SIZE, "BIOCGETZNEXT: %s",
+ pcap_strerror(errno));
+ return (-1);
+ }
+ bzh = bz.bz_bufa;
+ if (bzh != NULL) {
+ printf("pcap_next_zbuf getznext\n");
+ goto found;
+ }
+
+ printf("poll timeout %d\n", p->timeout);
+ bzero(&pollfd, sizeof(pollfd));
+ pollfd.fd = p->fd;
+ pollfd.events = POLLIN;
+ i = poll(&pollfd, 1, p->timeout == 0 ? INFTIM : p->timeout);
+ if (i < 0 && errno != EINTR) {
+ snprintf(p->errbuf, PCAP_ERRBUF_SIZE, "poll: %s",
+ pcap_strerror(errno));
+ return (-1);
+ }
+ return (0);
+found:
+ p->bzh = bzh;
+ *cc = bzh->bzh_kernel_len;
+ p->buffer = (u_char *)(bzh + 1);
+ return (1);
+}
+
+static int
+pcap_ack_zbuf(pcap_t *p)
+{
+ struct bpf_zbuf bz;
+
+ p->bzh->bzh_user_gen++;
+#if 0
+ bzero(&bz, sizeof(bz));
+ bz.bz_bufa = (u_char *)p->bzh;
+ if (ioctl(p->fd, BIOCACKZBUF, &bz) < 0) {
+ snprintf(p->errbuf, PCAP_ERRBUF_SIZE, "BIOCACKZBUF: %s",
+ pcap_strerror(errno));
+ return (-1);
+ }
+#endif
+ p->bzh = NULL;
+ p->buffer = NULL;
+ return (0);
+}
+#endif
+
+static int
pcap_read_bpf(pcap_t *p, int cnt, pcap_handler callback, u_char *user)
{
int cc;
@@ -149,13 +240,12 @@
register u_char *bp, *ep;
u_char *datap;
struct bpf_insn *fcode;
+#ifdef BIOCSETBUFMODE
+ int i;
+#endif
#ifdef PCAP_FDDIPAD
register int pad;
#endif
-#ifdef BIOCSETBUFMODE
- struct pollfd pollfd;
- struct bpf_zbuf bz;
-#endif
fcode = p->md.use_bpf ? NULL : p->fcode.bf_insns;
again:
@@ -174,51 +264,14 @@
cc = p->cc;
if (p->cc == 0) {
#ifdef BIOCSETBUFMODE
- /*
- * XXXRW: All of this could use serious revision.
- */
if (p->zbuf1 != NULL) {
- if (p->buffer != NULL) {
- bzero(&bz, sizeof(bz));
- bz.bz_bufa = p->buffer;
- bz.bz_buflen = p->bufsize;
- if (ioctl(p->fd, BIOCACKZBUF, &bz) < 0) {
- snprintf(p->errbuf, PCAP_ERRBUF_SIZE,
- "BIOCGETZNEXT: %s",
- pcap_strerror(errno));
- return (-1);
- }
- p->buffer = NULL;
- }
- bzero(&bz, sizeof(bz));
- if (ioctl(p->fd, BIOCGETZNEXT, &bz) < 0) {
- snprintf(p->errbuf, PCAP_ERRBUF_SIZE,
- "BIOCGETZNEXT: %s",
- pcap_strerror(errno));
+ if (p->buffer != NULL)
+ pcap_ack_zbuf(p);
+ i = pcap_next_zbuf(p, &cc);
+ if (i == 0)
+ goto again;
+ if (i < 0)
return (-1);
- }
- printf("getznext returned %p\n", bz.bz_bufa);
- if (bz.bz_bufa != NULL) {
- p->buffer = bz.bz_bufa;
- cc = bz.bz_buflen;
- } else {
- /*
- * XXXRW: Need to implement non-blocking
- * operation -- query fd with fcntl?
- */
- bzero(&pollfd, sizeof(pollfd));
- pollfd.fd = p->fd;
- pollfd.events = POLLIN;
- printf("poll returned %d\n",
- poll(&pollfd, 1, p->timeout == 0 ? INFTIM
- : p->timeout));
- printf("pollfd.revents = 0x%x\n",
- pollfd.revents);
-
- /* XXXRW: Should force buffer rotation here. */
-
- goto again;
- }
} else
#endif
cc = read(p->fd, (char *)p->buffer, p->bufsize);
@@ -727,21 +780,21 @@
/*
* XXXRW: This logic should be revisited.
*/
- v = 32768;
- if (v % getpagesize() != 0)
- v = getpagesize();
- if (v > zbufmax)
- v = zbufmax;
+ p->zbufsize = 32768;
+ if (p->zbufsize % getpagesize() != 0)
+ p->zbufsize = getpagesize();
+ if (p->zbufsize > zbufmax)
+ p->zbufsize = zbufmax;
- p->zbuf1 = mmap(NULL, v, PROT_READ | PROT_WRITE, MAP_ANON,
- -1, 0);
- p->zbuf2 = mmap(NULL, v, PROT_READ | PROT_WRITE, MAP_ANON,
- -1, 0);
+ p->zbuf1 = mmap(NULL, p->zbufsize, PROT_READ | PROT_WRITE,
+ MAP_ANON, -1, 0);
+ p->zbuf2 = mmap(NULL, p->zbufsize, PROT_READ | PROT_WRITE,
+ MAP_ANON, -1, 0);
if (p->zbuf1 == MAP_FAILED || p->zbuf2 == MAP_FAILED) {
if (p->zbuf1 != MAP_FAILED)
- munmap(p->zbuf1, v);
+ munmap(p->zbuf1, p->zbufsize);
if (p->zbuf2 != MAP_FAILED)
- munmap(p->zbuf1, v);
+ munmap(p->zbuf1, p->zbufsize);
snprintf(ebuf, PCAP_ERRBUF_SIZE, "mmap: %s",
pcap_strerror(errno));
}
@@ -749,7 +802,7 @@
bzero(&bz, sizeof(bz));
bz.bz_bufa = p->zbuf1;
bz.bz_bufb = p->zbuf2;
- bz.bz_buflen = v;
+ bz.bz_buflen = p->zbufsize;
if (ioctl(fd, BIOCSETZBUF, (caddr_t)&bz) < 0) {
snprintf(ebuf, PCAP_ERRBUF_SIZE, "BIOCSETZBUF: %s",
@@ -763,6 +816,8 @@
device, pcap_strerror(errno));
goto bad;
}
+
+ v = p->zbufsize - sizeof(struct bpf_zbuf_header);
} else {
#endif
==== //depot/projects/zcopybpf/src/contrib/libpcap/pcap-int.h#3 (text+ko) ====
@@ -162,11 +162,22 @@
*
* Zero-copy read buffer -- for zero-copy BPF. 'buffer' above will
* alternative between these two actual mmap'd buffers as required.
+ * As there is a header on the front size of the mmap'd buffer, only
+ * some of the buffer is exposed to libpcap as a whole via bufsize;
+ * zbufsize is the true size.
*/
u_char *zbuf1, *zbuf2;
+ u_int zbufsize;
u_int timeout;
/*
+ * If there's currently a buffer being actively processed, then it is
+ * referenced here; 'buffer' is also pointed at it, but offset by the
+ * size of the header.
+ */
+ struct bpf_zbuf_header *bzh;
+
+ /*
* Place holder for pcap_next().
*/
u_char *pkt;
More information about the p4-projects
mailing list