svn commit: r294294 - stable/10/usr.sbin/bhyve

George V. Neville-Neil gnn at FreeBSD.org
Mon Jan 18 21:24:30 UTC 2016


Author: gnn
Date: Mon Jan 18 21:24:28 2016
New Revision: 294294
URL: https://svnweb.freebsd.org/changeset/base/294294

Log:
  MFC: 293459,293643
  
  Add netmap support for bhyve

Modified:
  stable/10/usr.sbin/bhyve/pci_virtio_net.c
Directory Properties:
  stable/10/   (props changed)

Modified: stable/10/usr.sbin/bhyve/pci_virtio_net.c
==============================================================================
--- stable/10/usr.sbin/bhyve/pci_virtio_net.c	Mon Jan 18 20:47:04 2016	(r294293)
+++ stable/10/usr.sbin/bhyve/pci_virtio_net.c	Mon Jan 18 21:24:28 2016	(r294294)
@@ -36,6 +36,10 @@ __FBSDID("$FreeBSD$");
 #include <sys/ioctl.h>
 #include <machine/atomic.h>
 #include <net/ethernet.h>
+#ifndef NETMAP_WITH_LIBS
+#define NETMAP_WITH_LIBS
+#endif
+#include <net/netmap_user.h>
 
 #include <errno.h>
 #include <fcntl.h>
@@ -133,6 +137,8 @@ struct pci_vtnet_softc {
 	struct mevent	*vsc_mevp;
 
 	int		vsc_tapfd;
+	struct nm_desc	*vsc_nmd;
+
 	int		vsc_rx_ready;
 	volatile int	resetting;	/* set and checked outside lock */
 
@@ -149,6 +155,10 @@ struct pci_vtnet_softc {
 	pthread_mutex_t	tx_mtx;
 	pthread_cond_t	tx_cond;
 	int		tx_in_progress;
+
+	void (*pci_vtnet_rx)(struct pci_vtnet_softc *sc);
+	void (*pci_vtnet_tx)(struct pci_vtnet_softc *sc, struct iovec *iov,
+			     int iovcnt, int len);
 };
 
 static void pci_vtnet_reset(void *);
@@ -371,14 +381,208 @@ pci_vtnet_tap_rx(struct pci_vtnet_softc 
 	vq_endchains(vq, 1);
 }
 
+static int
+pci_vtnet_netmap_writev(struct nm_desc *nmd, struct iovec *iov, int iovcnt)
+{
+	int r, i;
+	int len = 0;
+
+	for (r = nmd->cur_tx_ring; ; ) {
+		struct netmap_ring *ring = NETMAP_TXRING(nmd->nifp, r);
+		uint32_t cur, idx;
+		char *buf;
+
+		if (nm_ring_empty(ring)) {
+			r++;
+			if (r > nmd->last_tx_ring)
+				r = nmd->first_tx_ring;
+			if (r == nmd->cur_rx_ring)
+				break;
+			continue;
+		}
+		cur = ring->cur;
+		idx = ring->slot[cur].buf_idx;
+		buf = NETMAP_BUF(ring, idx);
+
+		for (i = 0; i < iovcnt; i++) {
+			memcpy(&buf[len], iov[i].iov_base, iov[i].iov_len);
+			len += iov[i].iov_len;
+		}
+		ring->slot[cur].len = len;
+		ring->head = ring->cur = nm_ring_next(ring, cur);
+		nmd->cur_tx_ring = r;
+		ioctl(nmd->fd, NIOCTXSYNC, NULL);
+		break;
+	}
+
+	return (len);
+}
+
+static inline int
+pci_vtnet_netmap_readv(struct nm_desc *nmd, struct iovec *iov, int iovcnt)
+{
+	int len = 0;
+	int i = 0;
+	int r;
+
+	for (r = nmd->cur_rx_ring; ; ) {
+		struct netmap_ring *ring = NETMAP_RXRING(nmd->nifp, r);
+		uint32_t cur, idx;
+		char *buf;
+		size_t left;
+
+		if (nm_ring_empty(ring)) {
+			r++;
+			if (r > nmd->last_rx_ring)
+				r = nmd->first_rx_ring;
+			if (r == nmd->cur_rx_ring)
+				break;
+			continue;
+		}
+		cur = ring->cur;
+		idx = ring->slot[cur].buf_idx;
+		buf = NETMAP_BUF(ring, idx);
+		left = ring->slot[cur].len;
+
+		for (i = 0; i < iovcnt && left > 0; i++) {
+			if (iov[i].iov_len > left)
+				iov[i].iov_len = left;
+			memcpy(iov[i].iov_base, &buf[len], iov[i].iov_len);
+			len += iov[i].iov_len;
+			left -= iov[i].iov_len;
+		}
+		ring->head = ring->cur = nm_ring_next(ring, cur);
+		nmd->cur_rx_ring = r;
+		ioctl(nmd->fd, NIOCRXSYNC, NULL);
+		break;
+	}
+	for (; i < iovcnt; i++)
+		iov[i].iov_len = 0;
+
+	return (len);
+}
+
+/*
+ * Called to send a buffer chain out to the vale port
+ */
+static void
+pci_vtnet_netmap_tx(struct pci_vtnet_softc *sc, struct iovec *iov, int iovcnt,
+		    int len)
+{
+	static char pad[60]; /* all zero bytes */
+
+	if (sc->vsc_nmd == NULL)
+		return;
+
+	/*
+	 * If the length is < 60, pad out to that and add the
+	 * extra zero'd segment to the iov. It is guaranteed that
+	 * there is always an extra iov available by the caller.
+	 */
+	if (len < 60) {
+		iov[iovcnt].iov_base = pad;
+		iov[iovcnt].iov_len = 60 - len;
+		iovcnt++;
+	}
+	(void) pci_vtnet_netmap_writev(sc->vsc_nmd, iov, iovcnt);
+}
+
+static void
+pci_vtnet_netmap_rx(struct pci_vtnet_softc *sc)
+{
+	struct iovec iov[VTNET_MAXSEGS], *riov;
+	struct vqueue_info *vq;
+	void *vrx;
+	int len, n;
+	uint16_t idx;
+
+	/*
+	 * Should never be called without a valid netmap descriptor
+	 */
+	assert(sc->vsc_nmd != NULL);
+
+	/*
+	 * But, will be called when the rx ring hasn't yet
+	 * been set up or the guest is resetting the device.
+	 */
+	if (!sc->vsc_rx_ready || sc->resetting) {
+		/*
+		 * Drop the packet and try later.
+		 */
+		(void) nm_nextpkt(sc->vsc_nmd, (void *)dummybuf);
+		return;
+	}
+
+	/*
+	 * Check for available rx buffers
+	 */
+	vq = &sc->vsc_queues[VTNET_RXQ];
+	if (!vq_has_descs(vq)) {
+		/*
+		 * Drop the packet and try later.  Interrupt on
+		 * empty, if that's negotiated.
+		 */
+		(void) nm_nextpkt(sc->vsc_nmd, (void *)dummybuf);
+		vq_endchains(vq, 1);
+		return;
+	}
+
+	do {
+		/*
+		 * Get descriptor chain.
+		 */
+		n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL);
+		assert(n >= 1 && n <= VTNET_MAXSEGS);
+
+		/*
+		 * Get a pointer to the rx header, and use the
+		 * data immediately following it for the packet buffer.
+		 */
+		vrx = iov[0].iov_base;
+		riov = rx_iov_trim(iov, &n, sc->rx_vhdrlen);
+
+		len = pci_vtnet_netmap_readv(sc->vsc_nmd, riov, n);
+
+		if (len == 0) {
+			/*
+			 * No more packets, but still some avail ring
+			 * entries.  Interrupt if needed/appropriate.
+			 */
+			vq_endchains(vq, 0);
+			return;
+		}
+
+		/*
+		 * The only valid field in the rx packet header is the
+		 * number of buffers if merged rx bufs were negotiated.
+		 */
+		memset(vrx, 0, sc->rx_vhdrlen);
+
+		if (sc->rx_merge) {
+			struct virtio_net_rxhdr *vrxh;
+
+			vrxh = vrx;
+			vrxh->vrh_bufs = 1;
+		}
+
+		/*
+		 * Release this chain and handle more chains.
+		 */
+		vq_relchain(vq, idx, len + sc->rx_vhdrlen);
+	} while (vq_has_descs(vq));
+
+	/* Interrupt if needed, including for NOTIFY_ON_EMPTY. */
+	vq_endchains(vq, 1);
+}
+
 static void
-pci_vtnet_tap_callback(int fd, enum ev_type type, void *param)
+pci_vtnet_rx_callback(int fd, enum ev_type type, void *param)
 {
 	struct pci_vtnet_softc *sc = param;
 
 	pthread_mutex_lock(&sc->rx_mtx);
 	sc->rx_in_progress = 1;
-	pci_vtnet_tap_rx(sc);
+	sc->pci_vtnet_rx(sc);
 	sc->rx_in_progress = 0;
 	pthread_mutex_unlock(&sc->rx_mtx);
 
@@ -421,7 +625,7 @@ pci_vtnet_proctx(struct pci_vtnet_softc 
 	}
 
 	DPRINTF(("virtio: packet send, %d bytes, %d segs\n\r", plen, n));
-	pci_vtnet_tap_tx(sc, &iov[1], n - 1, plen);
+	sc->pci_vtnet_tx(sc, &iov[1], n - 1, plen);
 
 	/* chain is processed, release it and set tlen */
 	vq_relchain(vq, idx, tlen);
@@ -532,6 +736,67 @@ pci_vtnet_parsemac(char *mac_str, uint8_
         return (0);
 }
 
+static void
+pci_vtnet_tap_setup(struct pci_vtnet_softc *sc, char *devname)
+{
+	char tbuf[80];
+
+	strcpy(tbuf, "/dev/");
+	strlcat(tbuf, devname, sizeof(tbuf));
+
+	sc->pci_vtnet_rx = pci_vtnet_tap_rx;
+	sc->pci_vtnet_tx = pci_vtnet_tap_tx;
+
+	sc->vsc_tapfd = open(tbuf, O_RDWR);
+	if (sc->vsc_tapfd == -1) {
+		WPRINTF(("open of tap device %s failed\n", tbuf));
+		return;
+	}
+
+	/*
+	 * Set non-blocking and register for read
+	 * notifications with the event loop
+	 */
+	int opt = 1;
+	if (ioctl(sc->vsc_tapfd, FIONBIO, &opt) < 0) {
+		WPRINTF(("tap device O_NONBLOCK failed\n"));
+		close(sc->vsc_tapfd);
+		sc->vsc_tapfd = -1;
+	}
+
+	sc->vsc_mevp = mevent_add(sc->vsc_tapfd,
+				  EVF_READ,
+				  pci_vtnet_rx_callback,
+				  sc);
+	if (sc->vsc_mevp == NULL) {
+		WPRINTF(("Could not register event\n"));
+		close(sc->vsc_tapfd);
+		sc->vsc_tapfd = -1;
+	}
+}
+
+static void
+pci_vtnet_netmap_setup(struct pci_vtnet_softc *sc, char *ifname)
+{
+	sc->pci_vtnet_rx = pci_vtnet_netmap_rx;
+	sc->pci_vtnet_tx = pci_vtnet_netmap_tx;
+
+	sc->vsc_nmd = nm_open(ifname, NULL, 0, 0);
+	if (sc->vsc_nmd == NULL) {
+		WPRINTF(("open of netmap device %s failed\n", ifname));
+		return;
+	}
+
+	sc->vsc_mevp = mevent_add(sc->vsc_nmd->fd,
+				  EVF_READ,
+				  pci_vtnet_rx_callback,
+				  sc);
+	if (sc->vsc_mevp == NULL) {
+		WPRINTF(("Could not register event\n"));
+		nm_close(sc->vsc_nmd);
+		sc->vsc_nmd = NULL;
+	}
+}
 
 static int
 pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
@@ -567,8 +832,8 @@ pci_vtnet_init(struct vmctx *ctx, struct
 	 */
 	mac_provided = 0;
 	sc->vsc_tapfd = -1;
+	sc->vsc_nmd = NULL;
 	if (opts != NULL) {
-		char tbuf[80];
 		int err;
 
 		devname = vtopts = strdup(opts);
@@ -583,36 +848,13 @@ pci_vtnet_init(struct vmctx *ctx, struct
 			mac_provided = 1;
 		}
 
-		strcpy(tbuf, "/dev/");
-		strlcat(tbuf, devname, sizeof(tbuf));
+		if (strncmp(devname, "vale", 4) == 0)
+			pci_vtnet_netmap_setup(sc, devname);
+		if ((strncmp(devname, "tap", 3) == 0) ||
+		    (strncmp(devname, "vmmnet", 5) == 0))
+			pci_vtnet_tap_setup(sc, devname);
 
 		free(devname);
-
-		sc->vsc_tapfd = open(tbuf, O_RDWR);
-		if (sc->vsc_tapfd == -1) {
-			WPRINTF(("open of tap device %s failed\n", tbuf));
-		} else {
-			/*
-			 * Set non-blocking and register for read
-			 * notifications with the event loop
-			 */
-			int opt = 1;
-			if (ioctl(sc->vsc_tapfd, FIONBIO, &opt) < 0) {
-				WPRINTF(("tap device O_NONBLOCK failed\n"));
-				close(sc->vsc_tapfd);
-				sc->vsc_tapfd = -1;
-			}
-
-			sc->vsc_mevp = mevent_add(sc->vsc_tapfd,
-						  EVF_READ,
-						  pci_vtnet_tap_callback,
-						  sc);
-			if (sc->vsc_mevp == NULL) {
-				WPRINTF(("Could not register event\n"));
-				close(sc->vsc_tapfd);
-				sc->vsc_tapfd = -1;
-			}
-		}		
 	}
 
 	/*


More information about the svn-src-all mailing list