PERFORCE change 113839 for review

Robert Watson rwatson at FreeBSD.org
Thu Feb 1 15:25:38 UTC 2007


http://perforce.freebsd.org/chv.cgi?CH=113839

Change 113839 by rwatson at rwatson_cinnamon on 2007/02/01 15:25:28

	Further chicken scratchings at teaching libpcap about shared memory
	headers for zero-copy BPF.  This sort of works; the lack of
	timeouts is still an issue, and there appears to be a problem under
	high use leading to a crash (likely a pointer/buffer bug somewhere
	in the code I've added).

Affected files ...

.. //depot/projects/zcopybpf/src/contrib/libpcap/pcap-bpf.c#3 edit
.. //depot/projects/zcopybpf/src/contrib/libpcap/pcap-int.h#3 edit

Differences ...

==== //depot/projects/zcopybpf/src/contrib/libpcap/pcap-bpf.c#3 (text+ko) ====

@@ -141,7 +141,98 @@
 	return (0);
 }
 
+#ifdef BIOCGETBUFMODE
+/*
+ * Selection routine for zero-copy BPF: identify the next completed buffer,
+ * if any.  Try shared memory first, and if that doesn't work, make a system
+ * call, which may dislodge a buffer.
+ *
+ * Return (1) if the buffer is found, (0) if a retry is required, and (-1) if
+ * there is an unrecoverable error.
+ *
+ * XXXRW: Check to make sure the version comparison we're doing here is
+ * really the right thing -- maybe use serial number arithmetic?
+ */
 static int
+pcap_next_zbuf(pcap_t *p, u_int *cc)
+{
+	struct bpf_zbuf_header *bzh;
+	struct pollfd pollfd;
+	struct bpf_zbuf bz;
+	int i;
+
+	p->bzh = NULL;
+
+	/*
+	 * First try directly accessing the zero-copy buffer headers.
+	 */
+	bzh = (struct bpf_zbuf_header *)p->zbuf1;
+	if (bzh->bzh_kernel_gen > bzh->bzh_user_gen) {
+		printf("pcap_next_zbuf: zbuf1 gen\n");
+		goto found;
+	}
+	bzh = (struct bpf_zbuf_header *)p->zbuf2;
+	if (bzh->bzh_kernel_gen > bzh->bzh_user_gen) {
+		printf("pcap_next_zbuf: zbuf2 gen\n");
+		goto found;
+	}
+
+	/*
+	 * Next, try asking the kernel, which may dislodge a buffer in
+	 * immediate mode.
+	 */
+	bzero(&bz, sizeof(bz));
+	if (ioctl(p->fd, BIOCGETZNEXT, &bz) < 0) {
+		snprintf(p->errbuf, PCAP_ERRBUF_SIZE, "BIOCGETZNEXT: %s",
+		    pcap_strerror(errno));
+		return (-1);
+	}
+	bzh = bz.bz_bufa;
+	if (bzh != NULL) {
+		printf("pcap_next_zbuf getznext\n");
+		goto found;
+	}
+
+	printf("poll timeout %d\n", p->timeout);
+	bzero(&pollfd, sizeof(pollfd));
+	pollfd.fd = p->fd;
+	pollfd.events = POLLIN;
+	i = poll(&pollfd, 1, p->timeout == 0 ? INFTIM : p->timeout);
+	if (i < 0 && errno != EINTR) {
+		snprintf(p->errbuf, PCAP_ERRBUF_SIZE, "poll: %s",
+		    pcap_strerror(errno));
+		return (-1);
+	}
+	return (0);
+found:
+	p->bzh = bzh;
+	*cc = bzh->bzh_kernel_len;
+	p->buffer = (u_char *)(bzh + 1);
+	return (1);
+}
+
+static int
+pcap_ack_zbuf(pcap_t *p)
+{
+	struct bpf_zbuf bz;
+
+	p->bzh->bzh_user_gen++;
+#if 0
+	bzero(&bz, sizeof(bz));
+	bz.bz_bufa = (u_char *)p->bzh;
+	if (ioctl(p->fd, BIOCACKZBUF, &bz) < 0) {
+		snprintf(p->errbuf, PCAP_ERRBUF_SIZE, "BIOCACKZBUF: %s",
+		    pcap_strerror(errno));
+		return (-1);
+	}
+#endif
+	p->bzh = NULL;
+	p->buffer = NULL;
+	return (0);
+}
+#endif
+
+static int
 pcap_read_bpf(pcap_t *p, int cnt, pcap_handler callback, u_char *user)
 {
 	int cc;
@@ -149,13 +240,12 @@
 	register u_char *bp, *ep;
 	u_char *datap;
 	struct bpf_insn *fcode;
+#ifdef BIOCSETBUFMODE
+	int i;
+#endif
 #ifdef PCAP_FDDIPAD
 	register int pad;
 #endif
-#ifdef BIOCSETBUFMODE
-	struct pollfd pollfd;
-	struct bpf_zbuf bz;
-#endif
 
 	fcode = p->md.use_bpf ? NULL : p->fcode.bf_insns;
  again:
@@ -174,51 +264,14 @@
 	cc = p->cc;
 	if (p->cc == 0) {
 #ifdef BIOCSETBUFMODE
-		/*
-		 * XXXRW: All of this could use serious revision.
-		 */
 		if (p->zbuf1 != NULL) {
-			if (p->buffer != NULL) {
-				bzero(&bz, sizeof(bz));
-				bz.bz_bufa = p->buffer;
-				bz.bz_buflen = p->bufsize;
-				if (ioctl(p->fd, BIOCACKZBUF, &bz) < 0) {
-					snprintf(p->errbuf, PCAP_ERRBUF_SIZE,
-					    "BIOCGETZNEXT: %s",
-					    pcap_strerror(errno));
-					return (-1);
-				}
-				p->buffer = NULL;
-			}
-			bzero(&bz, sizeof(bz));
-			if (ioctl(p->fd, BIOCGETZNEXT, &bz) < 0) {
-				snprintf(p->errbuf, PCAP_ERRBUF_SIZE,
-				    "BIOCGETZNEXT: %s",
-				    pcap_strerror(errno));
+			if (p->buffer != NULL)
+				pcap_ack_zbuf(p);
+			i = pcap_next_zbuf(p, &cc);
+			if (i == 0)
+				goto again;
+			if (i < 0)
 				return (-1);
-			}
-			printf("getznext returned %p\n", bz.bz_bufa);
-			if (bz.bz_bufa != NULL) {
-				p->buffer = bz.bz_bufa;
-				cc = bz.bz_buflen;
-			} else {
-				/*
-				 * XXXRW: Need to implement non-blocking
-				 * operation -- query fd with fcntl?
-				 */
-				bzero(&pollfd, sizeof(pollfd));
-				pollfd.fd = p->fd;
-				pollfd.events = POLLIN;
-				printf("poll returned %d\n",
-				    poll(&pollfd, 1, p->timeout == 0 ? INFTIM
-				    : p->timeout));
-				printf("pollfd.revents = 0x%x\n",
-				    pollfd.revents);
-
-				/* XXXRW: Should force buffer rotation here. */
-
-				goto again;
-			}
 		} else
 #endif
 			cc = read(p->fd, (char *)p->buffer, p->bufsize);
@@ -727,21 +780,21 @@
 		/*
 		 * XXXRW: This logic should be revisited.
 		 */
-		v = 32768;
-		if (v % getpagesize() != 0)
-			v = getpagesize();
-		if (v > zbufmax)
-			v = zbufmax;
+		p->zbufsize = 32768;
+		if (p->zbufsize % getpagesize() != 0)
+			p->zbufsize = getpagesize();
+		if (p->zbufsize > zbufmax)
+			p->zbufsize = zbufmax;
 
-		p->zbuf1 = mmap(NULL, v, PROT_READ | PROT_WRITE, MAP_ANON,
-		    -1, 0);
-		p->zbuf2 = mmap(NULL, v, PROT_READ | PROT_WRITE, MAP_ANON,
-		    -1, 0);
+		p->zbuf1 = mmap(NULL, p->zbufsize, PROT_READ | PROT_WRITE,
+		    MAP_ANON, -1, 0);
+		p->zbuf2 = mmap(NULL, p->zbufsize, PROT_READ | PROT_WRITE,
+		    MAP_ANON, -1, 0);
 		if (p->zbuf1 == MAP_FAILED || p->zbuf2 == MAP_FAILED) {
 			if (p->zbuf1 != MAP_FAILED)
-				munmap(p->zbuf1, v);
+				munmap(p->zbuf1, p->zbufsize);
 			if (p->zbuf2 != MAP_FAILED)
-				munmap(p->zbuf1, v);
+				munmap(p->zbuf1, p->zbufsize);
 			snprintf(ebuf, PCAP_ERRBUF_SIZE, "mmap: %s",
 			    pcap_strerror(errno));
 		}
@@ -749,7 +802,7 @@
 		bzero(&bz, sizeof(bz));
 		bz.bz_bufa = p->zbuf1;
 		bz.bz_bufb = p->zbuf2;
-		bz.bz_buflen = v;
+		bz.bz_buflen = p->zbufsize;
 
 		if (ioctl(fd, BIOCSETZBUF, (caddr_t)&bz) < 0) {
 			snprintf(ebuf, PCAP_ERRBUF_SIZE, "BIOCSETZBUF: %s",
@@ -763,6 +816,8 @@
 			    device, pcap_strerror(errno));
 			goto bad;
 		}
+
+		v = p->zbufsize - sizeof(struct bpf_zbuf_header);
 	} else {
 #endif
 

==== //depot/projects/zcopybpf/src/contrib/libpcap/pcap-int.h#3 (text+ko) ====

@@ -162,11 +162,22 @@
 	 *
 	 * Zero-copy read buffer -- for zero-copy BPF.  'buffer' above will
 	 * alternative between these two actual mmap'd buffers as required.
+	 * As there is a header on the front size of the mmap'd buffer, only
+	 * some of the buffer is exposed to libpcap as a whole via bufsize;
+	 * zbufsize is the true size.
 	 */
 	u_char *zbuf1, *zbuf2;
+	u_int zbufsize;
 	u_int timeout;
 
 	/*
+	 * If there's currently a buffer being actively processed, then it is
+	 * referenced here; 'buffer' is also pointed at it, but offset by the
+	 * size of the header.
+	 */
+	struct bpf_zbuf_header *bzh;
+
+	/*
 	 * Place holder for pcap_next().
 	 */
 	u_char *pkt;


More information about the p4-projects mailing list