PERFORCE change 114125 for review

Christian S.J. Peron csjp at FreeBSD.org
Tue Feb 6 22:02:34 UTC 2007


http://perforce.freebsd.org/chv.cgi?CH=114125

Change 114125 by csjp at csjp_rnd01 on 2007/02/06 22:01:39

	Somewhat refactor the pcap_next_zbuf() code:
	
	- Copy the read timeout from pcap_open_live() into the
	  pcap object
	- Implement the read timeout in the form of a select(2) timeout
	- When select returns perform the following:
	
	        (1) Check to see if we wokeup as a result of a timeout
	        (2) Check to see if there is data in the store buffer
	        (3) Compare user/kernel generation numbers
	        (4) If no packet data is ready, call BIOCROTZBUF to see
	            if the wakeup was a result of BIOCIMMEDIATE
	        (5) If not packet is available, retry
	
	These changes should roughly emulate read timeout behaviour which
	results in users getting fresh packet data every second.
	
	Finally, implement an environment variable BPF_ZERO_COPY when, if
	set will implement the zero copy functionality, otherwise, the
	regular buffer mode is used (by default).
	
	This makes libpcap work with zero copy. I've tested this with  
	tcpdump so far.
	
	It's a start :)

Affected files ...

.. //depot/projects/zcopybpf/src/contrib/libpcap/pcap-bpf.c#4 edit
.. //depot/projects/zcopybpf/src/contrib/libpcap/pcap-int.h#4 edit

Differences ...

==== //depot/projects/zcopybpf/src/contrib/libpcap/pcap-bpf.c#4 (text+ko) ====

@@ -157,57 +157,90 @@
 pcap_next_zbuf(pcap_t *p, u_int *cc)
 {
 	struct bpf_zbuf_header *bzh;
-	struct pollfd pollfd;
 	struct bpf_zbuf bz;
-	int i;
+	struct timeval tv;
+	fd_set r_set;
+	int r;
 
+	tv.tv_sec = 1;
+	tv.tv_usec = 0;
+	FD_ZERO(&r_set);
+	FD_SET(p->fd, &r_set);
 	p->bzh = NULL;
+	p->buffer = NULL;
 
+	if (p->to_ms != 0) {
+		tv.tv_sec = p->to_ms / 1000;
+		tv.tv_usec = (p->to_ms * 1000) % 1000000;
+	}
+	r = select(p->fd + 1, &r_set, NULL, NULL, &tv);
+	if (r < 0 && errno == EINTR)
+		return (0);
+	else if (r < 0) {
+		(void) snprintf(p->errbuf, PCAP_ERRBUF_SIZE,
+		    "select: %s", strerror(errno));
+		return (-1);
+	}
 	/*
-	 * First try directly accessing the zero-copy buffer headers.
+	 * Handle timeouts here
+	 */
+	if (r == 0) {
+		if (ioctl(p->fd, BIOCROTZBUF, &bz) < 0) {
+			(void) snprintf(p->errbuf, PCAP_ERRBUF_SIZE,
+			    "BIOCROTZBUF: %s", strerror(errno));
+			return (-1);
+		}
+		/*
+		 * select(2) woke us up due to a timeout, and there was no
+		 * data to be processed in the store buffer.  Tell pcap to
+		 * to wait again.
+		 */
+		if (bz.bz_bufa == NULL)
+			return (0);
+	}
+	/* XXXCSJP should we check FD_ISSET()? */
+	/*
+	 * If we have made it this far, chances are select(2) returned because
+	 * there is data ready to be processed in the hold buffer.  Compare the
+	 * user generation numbers against the kernels.  If there are any
+	 * differences, process the packet data.
 	 */
 	bzh = (struct bpf_zbuf_header *)p->zbuf1;
 	if (bzh->bzh_kernel_gen > bzh->bzh_user_gen) {
-		printf("pcap_next_zbuf: zbuf1 gen\n");
-		goto found;
+		p->bzh = bzh;
+		p->buffer = (u_char *)p->zbuf1;
+		p->buffer += sizeof(*bzh);
+		*cc = bzh->bzh_kernel_len;
+		return (1);
 	}
 	bzh = (struct bpf_zbuf_header *)p->zbuf2;
 	if (bzh->bzh_kernel_gen > bzh->bzh_user_gen) {
-		printf("pcap_next_zbuf: zbuf2 gen\n");
-		goto found;
+		p->bzh = bzh;
+		p->buffer = (u_char *)p->zbuf2;
+		p->buffer += sizeof(*bzh);
+		*cc = bzh->bzh_kernel_len;
+		return (1);
 	}
-
 	/*
-	 * Next, try asking the kernel, which may dislodge a buffer in
-	 * immediate mode.
+	 * If the generation numbers were the same for both buffers, then it
+	 * is possible that we woke up because of BIOCIMMEDIATE.  In either
+	 * case, manually rotate the buffers.
 	 */
-	bzero(&bz, sizeof(bz));
-	if (ioctl(p->fd, BIOCGETZNEXT, &bz) < 0) {
-		snprintf(p->errbuf, PCAP_ERRBUF_SIZE, "BIOCGETZNEXT: %s",
-		    pcap_strerror(errno));
+	if (ioctl(p->fd, BIOCROTZBUF, &bz) < 0) {
+		(void) snprintf(p->errbuf, PCAP_ERRBUF_SIZE,
+		    "BIOCROTZBUF: %s", strerror(errno));
 		return (-1);
 	}
-	bzh = bz.bz_bufa;
-	if (bzh != NULL) {
-		printf("pcap_next_zbuf getznext\n");
-		goto found;
-	}
-
-	printf("poll timeout %d\n", p->timeout);
-	bzero(&pollfd, sizeof(pollfd));
-	pollfd.fd = p->fd;
-	pollfd.events = POLLIN;
-	i = poll(&pollfd, 1, p->timeout == 0 ? INFTIM : p->timeout);
-	if (i < 0 && errno != EINTR) {
-		snprintf(p->errbuf, PCAP_ERRBUF_SIZE, "poll: %s",
-		    pcap_strerror(errno));
-		return (-1);
-	}
-	return (0);
-found:
-	p->bzh = bzh;
-	*cc = bzh->bzh_kernel_len;
-	p->buffer = (u_char *)(bzh + 1);
+	/*
+	 * It's possible that we were unable to rotate the buffer because the
+	 * user generation numbers have not been modified, in which case retry.
+	 */
+	if (bz.bz_bufa == NULL)
+		return (0);
+	p->bzh = (struct bpf_zbuf_header *)bz.bz_bufa;
+	p->buffer = (u_char *)bz.bz_bufa;
+	p->buffer += sizeof(*bzh);
+	*cc = bz.bz_buflen;
 	return (1);
 }
 
@@ -217,15 +250,6 @@
 	struct bpf_zbuf bz;
 
 	p->bzh->bzh_user_gen++;
-#if 0
-	bzero(&bz, sizeof(bz));
-	bz.bz_bufa = (u_char *)p->bzh;
-	if (ioctl(p->fd, BIOCACKZBUF, &bz) < 0) {
-		snprintf(p->errbuf, PCAP_ERRBUF_SIZE, "BIOCACKZBUF: %s",
-		    pcap_strerror(errno));
-		return (-1);
-	}
-#endif
 	p->bzh = NULL;
 	p->buffer = NULL;
 	return (0);
@@ -688,13 +712,6 @@
 #define DLT_DOCSIS	143
 #endif
 
-/*
- * XXXRW: The following is an evil global hack to control whether zero-copy
- * BPF is used or not.  It should be replaced with something real, if it is
- * worth keeping such a frob.
- */
-int bpf_zerocopy = 1;
-
 pcap_t *
 pcap_open_live(const char *device, int snaplen, int promisc, int to_ms,
     char *ebuf)
@@ -763,7 +780,7 @@
 	 * attach to, so we do that here also.
 	 */
 #ifdef BIOCSETBUFMODE
-	if (bpf_zerocopy) {
+	if (getenv("BPF_ZERO_COPY")) {
 		bufmode = BPF_BUFMODE_ZBUF;
 		if (ioctl(fd, BIOCSETBUFMODE, (caddr_t)&bufmode) < 0) {
 			snprintf(ebuf, PCAP_ERRBUF_SIZE, "BIOCSETBUFMODE: %s",
@@ -1033,6 +1050,7 @@
 	}
 #endif
 	/* set timeout */
+	p->to_ms = to_ms;
 	if (to_ms != 0) {
 		/*
 		 * XXX - is this seconds/nanoseconds in AIX?

==== //depot/projects/zcopybpf/src/contrib/libpcap/pcap-int.h#4 (text+ko) ====

@@ -154,6 +154,7 @@
 	u_char *buffer;
 	u_char *bp;
 	int cc;
+	int to_ms;
 
 	/*
 	 * XXXRW: Exactly how to handle ifdefs, etc, is not something I've


More information about the p4-projects mailing list