PERFORCE change 113774 for review

Robert Watson rwatson at FreeBSD.org
Wed Jan 31 16:38:22 UTC 2007


http://perforce.freebsd.org/chv.cgi?CH=113774

Change 113774 by rwatson at rwatson_cinnamon on 2007/01/31 16:37:37

	Chicken scratchings on libpcap to add zero-copy BPF support to it.
	It does correctly capture packets, but there are issues with the
	event model, as we currently can't fully support BPF read timeouts
	with identical semantics to those used with bpfread(), an issue
	being worked on.

Affected files ...

.. //depot/projects/zcopybpf/src/contrib/libpcap/pcap-bpf.c#2 edit
.. //depot/projects/zcopybpf/src/contrib/libpcap/pcap-int.h#2 edit
.. //depot/projects/zcopybpf/src/lib/libpcap/Makefile#2 edit

Differences ...

==== //depot/projects/zcopybpf/src/contrib/libpcap/pcap-bpf.c#2 (text+ko) ====

@@ -30,6 +30,8 @@
 #endif
 
 #include <sys/param.h>			/* optionally get BSD define */
+#include <sys/mman.h>
+#include <sys/poll.h>
 #include <sys/time.h>
 #include <sys/timeb.h>
 #include <sys/socket.h>
@@ -150,6 +152,10 @@
 #ifdef PCAP_FDDIPAD
 	register int pad;
 #endif
+#ifdef BIOCSETBUFMODE
+	struct pollfd pollfd;
+	struct bpf_zbuf bz;
+#endif
 
 	fcode = p->md.use_bpf ? NULL : p->fcode.bf_insns;
  again:
@@ -167,7 +173,56 @@
 	}
 	cc = p->cc;
 	if (p->cc == 0) {
-		cc = read(p->fd, (char *)p->buffer, p->bufsize);
+#ifdef BIOCSETBUFMODE
+		/*
+		 * XXXRW: All of this could use serious revision.
+		 */
+		if (p->zbuf1 != NULL) {
+			if (p->buffer != NULL) {
+				bzero(&bz, sizeof(bz));
+				bz.bz_bufa = p->buffer;
+				bz.bz_buflen = p->bufsize;
+				if (ioctl(p->fd, BIOCACKZBUF, &bz) < 0) {
+					snprintf(p->errbuf, PCAP_ERRBUF_SIZE,
+					    "BIOCGETZNEXT: %s",
+					    pcap_strerror(errno));
+					return (-1);
+				}
+				p->buffer = NULL;
+			}
+			bzero(&bz, sizeof(bz));
+			if (ioctl(p->fd, BIOCGETZNEXT, &bz) < 0) {
+				snprintf(p->errbuf, PCAP_ERRBUF_SIZE,
+				    "BIOCGETZNEXT: %s",
+				    pcap_strerror(errno));
+				return (-1);
+			}
+			printf("getznext returned %p\n", bz.bz_bufa);
+			if (bz.bz_bufa != NULL) {
+				p->buffer = bz.bz_bufa;
+				cc = bz.bz_buflen;
+			} else {
+				/*
+				 * XXXRW: Need to implement non-blocking
+				 * operation -- query fd with fcntl?
+				 */
+				bzero(&pollfd, sizeof(pollfd));
+				pollfd.fd = p->fd;
+				pollfd.events = POLLIN;
+				printf("poll returned %d\n",
+				    poll(&pollfd, 1, p->timeout == 0 ? INFTIM
+				    : p->timeout));
+				printf("pollfd.revents = 0x%x\n",
+				    pollfd.revents);
+
+				/* XXXRW: Should force buffer rotation here. */
+
+				goto again;
+			}
+		} else
+#endif
+			cc = read(p->fd, (char *)p->buffer, p->bufsize);
+
 		if (cc < 0) {
 			/* Don't choke when we get ptraced */
 			switch (errno) {
@@ -580,6 +635,13 @@
 #define DLT_DOCSIS	143
 #endif
 
+/*
+ * XXXRW: The following is an evil global hack to control whether zero-copy
+ * BPF is used or not.  It should be replaced with something real, if it is
+ * worth keeping such a frob.
+ */
+int bpf_zerocopy = 1;
+
 pcap_t *
 pcap_open_live(const char *device, int snaplen, int promisc, int to_ms,
     char *ebuf)
@@ -598,6 +660,10 @@
 	struct bpf_insn total_insn;
 	struct bpf_program total_prog;
 	struct utsname osinfo;
+#ifdef BIOCSETBUFMODE
+	struct bpf_zbuf bz;
+	u_int bufmode, zbufmax;
+#endif
 
 #ifdef HAVE_DAG_API
 	if (strstr(device, "dag")) {
@@ -636,6 +702,71 @@
 	}
 
 	/*
+	 * XXXRW: Depending on the availability of zero-copy BPF, we take one
+	 * of two strategies here: if it is available and usable, we go ahead
+	 * and set it up; otherwise we play the song-and-dance to try to
+	 * probe an acceptable read buffer size.  Zero-copy BPF requires that
+	 * buffers be mapped into memory before selecting the interface to
+	 * attach to, so we do that here also.
+	 */
+#ifdef BIOCSETBUFMODE
+	if (bpf_zerocopy) {
+		bufmode = BPF_BUFMODE_ZBUF;
+		if (ioctl(fd, BIOCSETBUFMODE, (caddr_t)&bufmode) < 0) {
+			snprintf(ebuf, PCAP_ERRBUF_SIZE, "BIOCSETBUFMODE: %s",
+			    pcap_strerror(errno));
+			goto bad;
+		}
+
+		if (ioctl(fd, BIOCGETZMAX, (caddr_t)&zbufmax) < 0) {
+			snprintf(ebuf, PCAP_ERRBUF_SIZE, "BIOCGETZMAX: %s",
+			    pcap_strerror(errno));
+			goto bad;
+		}
+
+		/*
+		 * XXXRW: This logic should be revisited.
+		 */
+		v = 32768;
+		if (v % getpagesize() != 0)
+			v = getpagesize();
+		if (v > zbufmax)
+			v = zbufmax;
+
+		p->zbuf1 = mmap(NULL, v, PROT_READ | PROT_WRITE, MAP_ANON,
+		    -1, 0);
+		p->zbuf2 = mmap(NULL, v, PROT_READ | PROT_WRITE, MAP_ANON,
+		    -1, 0);
+		if (p->zbuf1 == MAP_FAILED || p->zbuf2 == MAP_FAILED) {
+			if (p->zbuf1 != MAP_FAILED)
+				munmap(p->zbuf1, v);
+			if (p->zbuf2 != MAP_FAILED)
+				munmap(p->zbuf1, v);
+			snprintf(ebuf, PCAP_ERRBUF_SIZE, "mmap: %s",
+			    pcap_strerror(errno));
+		}
+
+		bzero(&bz, sizeof(bz));
+		bz.bz_bufa = p->zbuf1;
+		bz.bz_bufb = p->zbuf2;
+		bz.bz_buflen = v;
+
+		if (ioctl(fd, BIOCSETZBUF, (caddr_t)&bz) < 0) {
+			snprintf(ebuf, PCAP_ERRBUF_SIZE, "BIOCSETZBUF: %s",
+			    pcap_strerror(errno));
+			goto bad;
+		}
+
+		(void)strncpy(ifr.ifr_name, device, sizeof(ifr.ifr_name));
+		if (ioctl(fd, BIOCSETIF, (caddr_t)&ifr) < 0) {
+			snprintf(ebuf, PCAP_ERRBUF_SIZE, "BIOCSETIF: %s: %s",
+			    device, pcap_strerror(errno));
+			goto bad;
+		}
+	} else {
+#endif
+
+	/*
 	 * Try finding a good size for the buffer; 32768 may be too
 	 * big, so keep cutting it in half until we find a size
 	 * that works, or run out of sizes to try.  If the default
@@ -670,6 +801,9 @@
 			 "BIOCSBLEN: %s: No buffer size worked", device);
 		goto bad;
 	}
+#ifdef BIOCSETBUFMODE
+	}
+#endif
 
 	/* Get the data link layer type. */
 	if (ioctl(fd, BIOCGDLT, (caddr_t)&v) < 0) {
@@ -859,6 +993,9 @@
 			goto bad;
 		}
 	}
+#ifdef BIOCSETBUFMODE
+	p->timeout = to_ms;
+#endif
 
 #ifdef _AIX
 #ifdef	BIOCIMMEDIATE
@@ -931,16 +1068,22 @@
 		goto bad;
 	}
 	p->bufsize = v;
-	p->buffer = (u_char *)malloc(p->bufsize);
-	if (p->buffer == NULL) {
-		snprintf(ebuf, PCAP_ERRBUF_SIZE, "malloc: %s",
-		    pcap_strerror(errno));
-		goto bad;
+#ifdef BIOCSETBUFMODE
+	if (p->zbuf1 == NULL) {
+#endif
+		p->buffer = (u_char *)malloc(p->bufsize);
+		if (p->buffer == NULL) {
+			snprintf(ebuf, PCAP_ERRBUF_SIZE, "malloc: %s",
+			    pcap_strerror(errno));
+			goto bad;
+		}
+#ifdef _AIX
+		/* For some strange reason this seems to prevent the EFAULT 
+		 * problems we have experienced from AIX BPF. */
+		memset(p->buffer, 0x0, p->bufsize);
+#endif
+#ifdef BIOCSETBUFMODE
 	}
-#ifdef _AIX
-	/* For some strange reason this seems to prevent the EFAULT 
-	 * problems we have experienced from AIX BPF. */
-	memset(p->buffer, 0x0, p->bufsize);
 #endif
 
 	/*
@@ -1025,7 +1168,16 @@
 
 	return (p);
  bad:
+
 	(void)close(fd);
+#ifdef BIOCSETBUFMODE
+	if (p->zbuf1 != NULL)
+		munmap(p->zbuf1, v);
+	if (p->zbuf2 != NULL)
+		munmap(p->zbuf2, v);
+#endif
+	if (p->buffer != NULL)
+		free(p->buffer);
 	if (p->dlt_list != NULL)
 		free(p->dlt_list);
 	free(p);

==== //depot/projects/zcopybpf/src/contrib/libpcap/pcap-int.h#2 (text+ko) ====

@@ -148,7 +148,7 @@
 	struct pcap_md md;
 
 	/*
-	 * Read buffer.
+	 * Read buffer -- for file descriptor read buffer model.
 	 */
 	int bufsize;
 	u_char *buffer;
@@ -156,6 +156,17 @@
 	int cc;
 
 	/*
+	 * XXXRW: Exactly how to handle ifdefs, etc, is not something I've
+	 * worked out yet.  Presumably we need to add a configure check for
+	 * zero-copy BPF.
+	 *
+	 * Zero-copy read buffer -- for zero-copy BPF.  'buffer' above will
+	 * alternative between these two actual mmap'd buffers as required.
+	 */
+	u_char *zbuf1, *zbuf2;
+	u_int timeout;
+
+	/*
 	 * Place holder for pcap_next().
 	 */
 	u_char *pkt;

==== //depot/projects/zcopybpf/src/lib/libpcap/Makefile#2 (text+ko) ====

@@ -16,6 +16,7 @@
 
 YFLAGS+=-p pcapyy
 LFLAGS+=-Ppcapyy
+CFLAGS+=-I../../sys -g
 CFLAGS+=-DHAVE_CONFIG_H -Dyylval=pcapyylval -I${.CURDIR} -I.
 CFLAGS+=-D_U_="__attribute__((unused))"
 CFLAGS+=-DHAVE_SNPRINTF -DHAVE_VSNPRINTF


More information about the p4-projects mailing list