socsvn commit: r288453 - soc2015/stefano/ptnetmap/stable/10/usr.sbin/bhyve

stefano at FreeBSD.org stefano at FreeBSD.org
Wed Jul 15 18:56:15 UTC 2015


Author: stefano
Date: Wed Jul 15 18:56:13 2015
New Revision: 288453
URL: http://svnweb.FreeBSD.org/socsvn/?view=rev&rev=288453

Log:
  add net-backend in virtio-net to use multiple backend
  (netmap, tap)

Added:
  soc2015/stefano/ptnetmap/stable/10/usr.sbin/bhyve/net_backends.c
  soc2015/stefano/ptnetmap/stable/10/usr.sbin/bhyve/net_backends.h
Modified:
  soc2015/stefano/ptnetmap/stable/10/usr.sbin/bhyve/Makefile
  soc2015/stefano/ptnetmap/stable/10/usr.sbin/bhyve/pci_virtio_net.c

Modified: soc2015/stefano/ptnetmap/stable/10/usr.sbin/bhyve/Makefile
==============================================================================
--- soc2015/stefano/ptnetmap/stable/10/usr.sbin/bhyve/Makefile	Wed Jul 15 17:43:13 2015	(r288452)
+++ soc2015/stefano/ptnetmap/stable/10/usr.sbin/bhyve/Makefile	Wed Jul 15 18:56:13 2015	(r288453)
@@ -20,6 +20,7 @@
 	mem.c			\
 	mevent.c		\
 	mptbl.c			\
+	net_backends.c		\
 	pci_ahci.c		\
 	pci_emul.c		\
 	pci_hostbridge.c	\

Added: soc2015/stefano/ptnetmap/stable/10/usr.sbin/bhyve/net_backends.c
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ soc2015/stefano/ptnetmap/stable/10/usr.sbin/bhyve/net_backends.c	Wed Jul 15 18:56:13 2015	(r288453)
@@ -0,0 +1,850 @@
+/*-
+ * Copyright (c) 2014 Vincenzo Maffione <v.maffione at gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
+ * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+#include <sys/uio.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/types.h>		/* u_short etc */
+#include <net/ethernet.h>	/* ETHER_ADDR_LEN */
+#include <net/if.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <unistd.h>
+#include <assert.h>
+#include <pthread.h>
+#include <pthread_np.h>
+#include <poll.h>
+#include <assert.h>
+
+#include "mevent.h"
+#include "dev/virtio/network/virtio_net.h"
+#include "net_backends.h"
+
+#include <sys/linker_set.h>
+
+#define NETMAP_WITH_LIBS
+#include <net/netmap_user.h>
+#if (NETMAP_API < 11)
+#error "Netmap API version must be >= 11"
+#endif
+
+/*
+ * The API for network backends. This might need to be exposed
+ * if we implement them in separate files.
+ */
+struct net_backend {
+	const char *name;	/* name of the backend */
+	/*
+	 * The init and cleanup functions are used internally,
+	 * virtio-net should never use it.
+	 */
+	int (*init)(struct net_backend *be, const char *devname,
+				net_backend_cb_t cb, void *param);
+	void (*cleanup)(struct net_backend *be);
+
+
+	/*
+	 * Called to serve a guest transmit request. The scatter-gather
+	 * vector provided by the caller has 'iovcnt' elements and contains
+	 * the packet to send. 'len' is the length of whole packet in bytes.
+	 */
+	int (*send)(struct net_backend *be, struct iovec *iov,
+			int iovcnt, int len, int more);
+
+	/*
+	 * Called to serve guest receive request. When the function
+	 * returns a positive value, the scatter-gather vector
+	 * provided by the caller (having 'iovcnt' elements in it) will
+	 * contain a chunk of the received packet. The 'more' flag will
+	 * be set if the returned chunk was the last one for the current
+	 * packet, and 0 otherwise. The function returns the chunk size
+	 * in bytes, or 0 if the backend doesn't have a new packet to
+	 * receive.
+	 * Note that it may be necessary to call this callback many
+	 * times to receive a single packet, depending of how big is
+	 * buffers you provide.
+	 */
+	int (*recv)(struct net_backend *be, struct iovec *iov,
+		    int iovcnt, int *more);
+
+	/*
+	 * Ask the backend for the virtio-net features it is able to
+	 * support. Possible features are TSO, UFO and checksum offloading
+	 * in both rx and tx direction and for both IPv4 and IPv6.
+	 */
+	uint64_t (*get_features)(struct net_backend *be);
+
+	/*
+	 * Tell the backend to enable/disable the specified virtio-net
+	 * features.
+	 */
+	uint64_t (*set_features)(struct net_backend *be, uint64_t features);
+
+	struct pci_vtnet_softc *sc;
+	int fd;
+	void *priv;	/* Pointer to backend-specific data. */
+};
+
+
+SET_DECLARE(net_backend_set, struct net_backend);
+
+#define WPRINTF(params) printf params
+
+/* the null backend */
+static int
+netbe_null_init(struct net_backend *be, const char *devname,
+			net_backend_cb_t cb, void *param)
+{
+	D("initializing null backend");
+	be->fd = -1;
+	return 0;
+}
+
+static void
+netbe_null_cleanup(struct net_backend *be)
+{
+	D("");
+}
+
+static uint64_t
+netbe_null_get_features(struct net_backend *be)
+{
+	D("");
+	return 0;
+}
+
+static uint64_t
+netbe_null_set_features(struct net_backend *be, uint64_t features)
+{
+	D("setting 0x%lx", features);
+	return 0;
+}
+
+static int
+netbe_null_send(struct net_backend *be, struct iovec *iov,
+	int iovcnt, int len, int more)
+{
+	return 0; /* pretend we send */
+}
+
+static int
+netbe_null_recv(struct net_backend *be, struct iovec *iov,
+	int iovcnt, int *more)
+{
+	fprintf(stderr, "netbe_null_recv called ?\n");
+	return -1; /* never called, i believe */
+}
+
+static struct net_backend null_backend = {
+	.name = "null",
+	.init = netbe_null_init,
+	.cleanup = netbe_null_cleanup,
+	.send = netbe_null_send,
+	.recv = netbe_null_recv,
+	.get_features = netbe_null_get_features,
+	.set_features = netbe_null_set_features,
+};
+
+DATA_SET(net_backend_set, null_backend);
+
+
+/* the tap backend */
+
+struct tap_priv {
+	struct mevent *mevp;
+};
+
+
+static void
+tap_cleanup(struct net_backend *be)
+{
+	// XXX destroy priv->mevp ?
+	if (be->fd != -1)
+		close(be->fd);
+	if (be->priv)
+		free(be->priv);
+	be->fd = -1;
+	be->priv = NULL;
+}
+
+
+
+static int
+tap_init(struct net_backend *be, const char *devname,
+			net_backend_cb_t cb, void *param)
+{
+	char tbuf[80];
+	int fd;
+	int opt = 1;
+	struct tap_priv *priv;
+
+	priv = calloc(1, sizeof(struct tap_priv));
+	if (priv == NULL) {
+		WPRINTF(("tap_priv alloc failed\n"));
+		return -1;
+	}
+
+	strcpy(tbuf, "/dev/");
+	strlcat(tbuf, devname, sizeof(tbuf));
+
+	fd = open(tbuf, O_RDWR);
+	if (fd == -1) {
+		WPRINTF(("open of tap device %s failed\n", tbuf));
+		goto error;
+	}
+
+	/*
+	 * Set non-blocking and register for read
+	 * notifications with the event loop
+	 */
+	if (ioctl(fd, FIONBIO, &opt) < 0) {
+		WPRINTF(("tap device O_NONBLOCK failed\n"));
+		goto error;
+	}
+
+	priv->mevp = mevent_add(fd,
+			EVF_READ,
+			cb,
+			param);
+	if (priv->mevp == NULL) {
+		WPRINTF(("Could not register event\n"));
+		goto error;
+	}
+
+	be->fd = fd;
+	be->priv = priv;
+
+	return 0;
+
+error:
+	tap_cleanup(be);
+	return -1;
+}
+
+
+/*
+ * Called to send a buffer chain out to the tap device
+ */
+static int
+tap_send(struct net_backend *be, struct iovec *iov, int iovcnt, int len,
+	int more)
+{
+	static char pad[60]; /* all zero bytes */
+
+	/* Skip the first descriptor, which contains the virtio-net
+	 * header.
+	 */
+	len -= iov[0].iov_len;
+	iov++;
+	iovcnt--;
+
+	/*
+	 * If the length is < 60, pad out to that and add the
+	 * extra zero'd segment to the iov. It is guaranteed that
+	 * there is always an extra iov available by the caller.
+	 */
+	if (len < 60) {
+		iov[iovcnt].iov_base = pad;
+		iov[iovcnt].iov_len = 60 - len;
+		iovcnt++;
+	}
+	return writev(be->fd, iov, iovcnt);
+}
+
+static int
+tap_recv(struct net_backend *be, struct iovec *iov, int iovcnt, int *more)
+{
+	struct virtio_net_hdr_mrg_rxbuf *vrx;
+	uint8_t *buf;
+	int ret, len = sizeof(*vrx);
+
+	/* Should never be called without a valid tap fd */
+	assert(be->fd != -1);
+	*more = 0;
+
+	/*
+	 * Get a pointer to the rx header, and use the
+	 * data immediately following it for the packet buffer.
+	 */
+	vrx = iov[0].iov_base;
+	buf = (uint8_t *)(vrx + 1);
+
+	ret = read(be->fd, buf, iov[0].iov_len - len);
+
+	if (ret < 0 && errno == EWOULDBLOCK) {
+		return 0;
+	}
+
+	/* Insert an empty rx packet header. */
+	memset(vrx, 0, len);
+	ret += len;
+
+	return ret;
+}
+
+static uint64_t
+tap_get_features(struct net_backend *be)
+{
+	return 0; // nothing extra
+}
+
+static uint64_t
+tap_set_features(struct net_backend *be, uint64_t features)
+{
+#if 0 // XXX todo
+	 if (!(features & VIRTIO_NET_F_MRG_RXBUF)) {
+		sc->rx_merge = 0;
+		/* non-merge rx header is 2 bytes shorter */
+		sc->rx_vhdrlen -= 2;
+	}
+#endif
+	return 0; /* success */
+}
+
+static struct net_backend tap_backend = {
+	.name = "tap",
+	.init = tap_init,
+	.cleanup = tap_cleanup,
+	.send = tap_send,
+	.recv = tap_recv,
+	.get_features = tap_get_features,
+	.set_features = tap_set_features,
+};
+
+DATA_SET(net_backend_set, tap_backend);
+
+/*
+ * The netmap backend
+ */
+
+
+/* The virtio-net features supported by netmap. */
+#define NETMAP_FEATURES (VIRTIO_NET_F_CSUM | VIRTIO_NET_F_HOST_TSO4 | \
+		VIRTIO_NET_F_HOST_TSO6 | VIRTIO_NET_F_HOST_UFO | \
+		VIRTIO_NET_F_GUEST_CSUM | VIRTIO_NET_F_GUEST_TSO4 | \
+		VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_UFO)
+
+#define NETMAP_POLLMASK (POLLIN | POLLRDNORM | POLLRDBAND)
+
+struct netmap_priv {
+	char ifname[IFNAMSIZ];
+	struct nm_desc *nmd;
+	struct netmap_ring *rx;
+	struct netmap_ring *tx;
+	pthread_t evloop_tid;
+	net_backend_cb_t cb;
+	void *cb_param;
+
+	/* Support for splitted receives. */
+	int rx_continue;
+	int rx_idx;
+	uint8_t *rx_buf;
+	int rx_avail;
+	int rx_morefrag;
+	int rx_avail_slots;
+};
+
+static void *
+netmap_evloop_thread(void *param)
+{
+	struct net_backend *be = param;
+	struct netmap_priv *priv = be->priv;
+	struct pollfd pfd;
+	int ret;
+
+	for (;;) {
+		pfd.fd = be->fd;
+		pfd.events = NETMAP_POLLMASK;
+		ret = poll(&pfd, 1, INFTIM);
+		if (ret == -1 && errno != EINTR) {
+			WPRINTF(("netmap poll failed, %d\n", errno));
+		} else if (ret == 1 && (pfd.revents & NETMAP_POLLMASK)) {
+			priv->cb(pfd.fd, EVF_READ, priv->cb_param);
+		}
+	}
+
+	return NULL;
+}
+
+static void
+netmap_set_vnet_hdr_len(struct net_backend *be,
+				   int vnet_hdr_len)
+{
+	int err;
+	struct nmreq req;
+	struct netmap_priv *priv = be->priv;
+
+	memset(&req, 0, sizeof(req));
+	strcpy(req.nr_name, priv->ifname);
+	req.nr_version = NETMAP_API;
+	req.nr_cmd = NETMAP_BDG_VNET_HDR;
+	req.nr_arg1 = vnet_hdr_len;
+	err = ioctl(be->fd, NIOCREGIF, &req);
+	if (err) {
+		WPRINTF(("Unable to set vnet header length %d\n",
+				vnet_hdr_len));
+	}
+}
+
+static uint64_t
+netmap_get_features(struct net_backend *be)
+{
+	return NETMAP_FEATURES;
+}
+
+static uint64_t
+netmap_set_features(struct net_backend *be, uint64_t features)
+{
+	int vnet_hdr_len = 0;
+
+	if (features & NETMAP_FEATURES) {
+		vnet_hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
+	}
+
+	netmap_set_vnet_hdr_len(be, vnet_hdr_len);
+	return 0;
+}
+
+static int
+netmap_init(struct net_backend *be, const char *devname,
+			net_backend_cb_t cb, void *param)
+{
+	const char *ndname = "/dev/netmap";
+	struct netmap_priv *priv = NULL;
+	char tname[40];
+
+	priv = calloc(1, sizeof(struct netmap_priv));
+	if (priv == NULL) {
+		WPRINTF(("Unable alloc netmap private data\n"));
+		return -1;
+	}
+
+	strncpy(priv->ifname, devname, sizeof(priv->ifname));
+	priv->ifname[sizeof(priv->ifname) - 1] = '\0';
+
+	priv->nmd = nm_open(priv->ifname, NULL, NETMAP_NO_TX_POLL, NULL);
+	if (priv->nmd == NULL) {
+		WPRINTF(("Unable to nm_open(): device '%s', "
+				"interface '%s', errno (%s)\n",
+				ndname, devname, strerror(errno)));
+		goto err_open;
+	}
+
+	priv->tx = NETMAP_TXRING(priv->nmd->nifp, 0);
+	priv->rx = NETMAP_RXRING(priv->nmd->nifp, 0);
+
+	priv->cb = cb;
+	priv->cb_param = param;
+	priv->rx_continue = 0;
+
+	be->fd = priv->nmd->fd;
+	be->priv = priv;
+
+	/* Create a thread for netmap poll. */
+	pthread_create(&priv->evloop_tid, NULL, netmap_evloop_thread, (void *)be);
+	snprintf(tname, sizeof(tname), "netmap-evloop-%p", priv);
+	pthread_set_name_np(priv->evloop_tid, tname);
+
+	return 0;
+
+err_open:
+	free(priv);
+
+	return -1;
+}
+
+static void
+netmap_cleanup(struct net_backend *be)
+{
+	struct netmap_priv *priv = be->priv;
+
+	if (priv) {
+		nm_close(priv->nmd);
+	}
+	be->fd = -1;
+}
+
+/* A fast copy routine only for multiples of 64 bytes, non overlapped. */
+static inline void
+pkt_copy(const void *_src, void *_dst, int l)
+{
+    const uint64_t *src = _src;
+    uint64_t *dst = _dst;
+    if (l >= 1024) {
+        bcopy(src, dst, l);
+        return;
+    }
+    for (; l > 0; l -= 64) {
+        *dst++ = *src++;
+        *dst++ = *src++;
+        *dst++ = *src++;
+        *dst++ = *src++;
+        *dst++ = *src++;
+        *dst++ = *src++;
+        *dst++ = *src++;
+        *dst++ = *src++;
+    }
+}
+
+static int
+netmap_send(struct net_backend *be, struct iovec *iov,
+			int iovcnt, int size, int more)
+{
+	struct netmap_priv *priv = be->priv;
+	struct netmap_ring *ring;
+	uint32_t last;
+	uint32_t idx;
+	uint8_t *dst;
+	int j;
+	uint32_t i;
+
+	if (iovcnt <= 0)
+		goto txsync;
+
+	ring = priv->tx;
+	last = i = ring->cur;
+
+	if (nm_ring_space(ring) < iovcnt) {
+		static int c;
+		c++;
+		RD(5, "no space, txsync %d", c);
+		/* Not enough netmap slots. */
+		goto txsync;
+	}
+
+	for (j = 0; j < iovcnt; j++) {
+		int iov_frag_size = iov[j].iov_len;
+		int offset = 0;
+		int nm_frag_size;
+
+		/* Split each iovec fragment over more netmap slots, if
+		   necessary (without performing data copy). */
+		while (iov_frag_size) {
+			nm_frag_size = iov_frag_size;
+			if (nm_frag_size > ring->nr_buf_size) {
+				nm_frag_size = ring->nr_buf_size;
+			}
+
+			if (nm_ring_empty(ring)) {
+				/* We run out of netmap slots while splitting the
+				   iovec fragments. */
+				goto txsync;
+			}
+
+			idx = ring->slot[i].buf_idx;
+			dst = (uint8_t *)NETMAP_BUF(ring, idx);
+
+			ring->slot[i].len = nm_frag_size;
+// #define USE_INDIRECT_BUFFERS
+#ifdef USE_INDIRECT_BUFFERS
+			ring->slot[i].flags = NS_MOREFRAG | NS_INDIRECT;
+			ring->slot[i].ptr = (uintptr_t)(iov[j].iov_base + offset);
+#else	/* !USE_INDIRECT_BUFFERS */
+			ring->slot[i].flags = NS_MOREFRAG;
+			pkt_copy(iov[j].iov_base + offset, dst, nm_frag_size);
+#endif	/* !USING_INDIRECT_BUFFERS */
+
+			last = i;
+			i = nm_ring_next(ring, i);
+
+			offset += nm_frag_size;
+			iov_frag_size -= nm_frag_size;
+		}
+	}
+	/* The last slot must not have NS_MOREFRAG set. */
+	ring->slot[last].flags &= ~NS_MOREFRAG;
+
+	/* Now update ring->cur and ring->avail. */
+	ring->cur = ring->head = i;
+
+txsync:
+	if (!more) {// || nm_ring_space(ring) < 64) {
+		// IFRATE(vq->vq_vs->rate.cur.var2[vq->vq_num]++);
+		// netmap_ioctl_counter++;
+		ioctl(be->fd, NIOCTXSYNC, NULL);
+	}
+
+	return 0;
+}
+
+static int
+netmap_receive(struct net_backend *be, struct iovec *iov,
+			  int iovcnt, int *more)
+{
+	struct netmap_priv *priv = be->priv;
+	struct netmap_ring *ring;
+	int tot = 0;
+	int copylen;
+	int iov_avail;
+	uint8_t *iov_buf;
+
+	assert(iovcnt);
+
+	ring = priv->rx;
+
+	/* Init iovec pointers. */
+	iov_buf = iov->iov_base;
+	iov_avail = iov->iov_len;
+
+	if (!priv->rx_continue) {
+		/* Init netmap pointers. */
+		priv->rx_idx = ring->cur;
+		priv->rx_avail_slots = nm_ring_space(ring);
+		priv->rx_buf = NETMAP_BUF(ring,
+				ring->slot[priv->rx_idx].buf_idx);
+		priv->rx_avail = ring->slot[priv->rx_idx].len;
+		priv->rx_morefrag = (ring->slot[priv->rx_idx].flags
+					& NS_MOREFRAG);
+
+		if (!priv->rx_avail_slots) {
+			goto out;
+		}
+		priv->rx_continue = 1;
+	}
+
+	for (;;) {
+		copylen = priv->rx_avail;
+		if (copylen > iov_avail) {
+			copylen = iov_avail;
+		}
+
+		/* Copy and update pointers. */
+		bcopy(priv->rx_buf, iov_buf, copylen);
+		iov_buf += copylen;
+		iov_avail -= copylen;
+		priv->rx_buf += copylen;
+		priv->rx_avail -= copylen;
+		tot += copylen;
+
+		if (!priv->rx_avail) {
+			priv->rx_avail_slots--;
+			if (!priv->rx_morefrag || !priv->rx_avail_slots) {
+				priv->rx_continue = 0;
+				break;
+			}
+			/* Go to the next netmap slot. */
+			priv->rx_idx = nm_ring_next(ring, priv->rx_idx);
+			priv->rx_buf = NETMAP_BUF(ring,
+					ring->slot[priv->rx_idx].buf_idx);
+			priv->rx_avail = ring->slot[priv->rx_idx].len;
+			priv->rx_morefrag =
+				(ring->slot[priv->rx_idx].flags
+					& NS_MOREFRAG);
+		}
+
+		if (!iov_avail) {
+			iovcnt--;
+			if (!iovcnt) {
+				break;
+			}
+			/* Go to the next iovec descriptor. */
+			iov++;
+			iov_buf = iov->iov_base;
+			iov_avail = iov->iov_len;
+		}
+	}
+
+	if (!priv->rx_continue) {
+		/* End of reception: Update the ring now. */
+		ring->cur = ring->head = nm_ring_next(ring, priv->rx_idx);
+	}
+out:
+	*more = priv->rx_continue;
+
+	return tot;
+}
+
+static struct net_backend netmap_backend = {
+	.name = "netmap|vale",
+	.init = netmap_init,
+	.cleanup = netmap_cleanup,
+	.send = netmap_send,
+	.recv = netmap_receive,
+	.get_features = netmap_get_features,
+	.set_features = netmap_set_features,
+};
+
+DATA_SET(net_backend_set, netmap_backend);
+
+
+/*
+ * make sure a backend is properly initialized
+ */
+static void
+netbe_fix(struct net_backend *be)
+{
+	if (be == NULL)
+		return;
+	if (be->name == NULL) {
+		fprintf(stderr, "missing name for %p\n", be);
+		be->name = "unnamed netbe";
+	}
+	if (be->init == NULL) {
+		fprintf(stderr, "missing init for %p %s\n", be, be->name);
+		be->init = netbe_null_init;
+	}
+	if (be->cleanup == NULL) {
+		fprintf(stderr, "missing cleanup for %p %s\n", be, be->name);
+		be->cleanup = netbe_null_cleanup;
+	}
+	if (be->send == NULL) {
+		fprintf(stderr, "missing send for %p %s\n", be, be->name);
+		be->send = netbe_null_send;
+	}
+	if (be->recv == NULL) {
+		fprintf(stderr, "missing recv for %p %s\n", be, be->name);
+		be->recv = netbe_null_recv;
+	}
+	if (be->get_features == NULL) {
+		fprintf(stderr, "missing get_features for %p %s\n",
+			be, be->name);
+		be->get_features = netbe_null_get_features;
+	}
+	if (be->set_features == NULL) {
+		fprintf(stderr, "missing set_features for %p %s\n",
+			be, be->name);
+		be->set_features = netbe_null_set_features;
+	}
+}
+
+/*
+ * keys is a set of prefixes separated by '|',
+ * return 1 if the leftmost part of name matches one prefix.
+ */
+static const char *
+netbe_name_match(const char *keys, const char *name)
+{
+	const char *n = name, *good = keys;
+	char c;
+
+	if (!keys || !name)
+		return NULL;
+	while ( (c = *keys++) ) {
+		if (c == '|') { /* reached the separator */
+			if (good)
+				break;
+			/* prepare for new round */
+			n = name;
+			good = keys;
+		} else if (good && c != *n++) {
+			good = NULL; /* drop till next keyword */
+		}
+	}
+	return good;
+}
+
+struct net_backend *
+netbe_init(const char *devname, net_backend_cb_t cb, void *param)
+{
+	/*
+	 * Choose the network backend depending on the user
+	 * provided device name.
+	 */
+	struct net_backend **pbe, *ret, *be = NULL;
+	int err;
+
+	SET_FOREACH(pbe, net_backend_set) {
+		netbe_fix(*pbe); /* make sure we have all fields */
+		if (netbe_name_match((*pbe)->name, devname)) {
+			be = *pbe;
+			break;
+		}
+	}
+	if (be == NULL)
+		return NULL; /* or null backend ? */
+	ret = calloc(1, sizeof(*ret));
+	*ret = *be;
+	ret->fd = -1;
+	ret->priv = NULL;
+	ret->sc = param;
+
+	err = be->init(ret, devname, cb, param);
+	if (err) {
+		free(ret);
+		ret = NULL;
+	}
+	return ret;
+}
+
+
+void
+netbe_cleanup(struct net_backend *be)
+{
+	if (be == NULL)
+		return;
+	be->cleanup(be);
+	free(be);
+}
+
+
+uint64_t
+netbe_get_features(struct net_backend *be)
+{
+	if (be == NULL)
+		return 0;
+	return be->get_features(be);
+}
+
+
+uint64_t
+netbe_set_features(struct net_backend *be, uint64_t features)
+{
+	if (be == NULL)
+		return 0;
+	return be->set_features(be, features);
+}
+
+
+int
+netbe_send(struct net_backend *be, struct iovec *iov, int iovcnt, int len,
+	int more)
+{
+	if (be == NULL)
+		return -1;
+#if 0
+	int i;
+	D("sending iovcnt %d len %d iovec %p", iovcnt, len, iov);
+	for (i=0; i < iovcnt; i++)
+		D("   %3d: %4d %p", i, (int)iov[i].iov_len, iov[i].iov_base);
+#endif
+	return be->send(be, iov, iovcnt, len, more);
+}
+
+
+// XXX sc->rx_vhdrlen is the negotiated length
+int
+netbe_recv(struct net_backend *be, struct iovec *iov, int iovcnt, int *more)
+{
+	if (be == NULL)
+		return -1;
+	return be->recv(be, iov, iovcnt, more);
+}

Added: soc2015/stefano/ptnetmap/stable/10/usr.sbin/bhyve/net_backends.h
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ soc2015/stefano/ptnetmap/stable/10/usr.sbin/bhyve/net_backends.h	Wed Jul 15 18:56:13 2015	(r288453)
@@ -0,0 +1,49 @@
+/*-
+ * Copyright (c) 2014 Vincenzo Maffione <v.maffione at gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
+ * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __NET_BACKENDS_H__
+#define __NET_BACKENDS_H__
+
+#include <stdint.h>
+
+extern int netmap_ioctl_counter;
+
+typedef void (*net_backend_cb_t)(int, enum ev_type, void *param);
+
+/* Interface between virtio-net and the network backend. */
+struct net_backend;
+
+struct net_backend *netbe_init(const char *devname,
+			net_backend_cb_t cb, void *param);
+void	netbe_cleanup(struct net_backend *be);
+uint64_t netbe_get_features(struct net_backend *be);
+uint64_t netbe_set_features(struct net_backend *be, uint64_t features);
+int	netbe_send(struct net_backend *be, struct iovec *iov,
+                        int iovcnt, int len, int more);
+int	netbe_recv(struct net_backend *be, struct iovec *iov,
+		    int iovcnt, int *more);
+
+#endif /* __NET_BACKENDS_H__ */

Modified: soc2015/stefano/ptnetmap/stable/10/usr.sbin/bhyve/pci_virtio_net.c
==============================================================================
--- soc2015/stefano/ptnetmap/stable/10/usr.sbin/bhyve/pci_virtio_net.c	Wed Jul 15 17:43:13 2015	(r288452)
+++ soc2015/stefano/ptnetmap/stable/10/usr.sbin/bhyve/pci_virtio_net.c	Wed Jul 15 18:56:13 2015	(r288453)
@@ -54,6 +54,7 @@
 #include "pci_emul.h"
 #include "mevent.h"
 #include "virtio.h"
+#include "net_backends.h"
 
 #define VTNET_RINGSZ	1024
 
@@ -130,9 +131,8 @@
 	struct virtio_softc vsc_vs;
 	struct vqueue_info vsc_queues[VTNET_MAXQ - 1];
 	pthread_mutex_t vsc_mtx;
-	struct mevent	*vsc_mevp;
 
-	int		vsc_tapfd;
+	struct net_backend *vsc_be;
 	int		vsc_rx_ready;
 	volatile int	resetting;	/* set and checked outside lock */
 
@@ -228,73 +228,38 @@
 }
 
 /*
- * Called to send a buffer chain out to the tap device
- */
-static void
-pci_vtnet_tap_tx(struct pci_vtnet_softc *sc, struct iovec *iov, int iovcnt,
-		 int len)
-{
-	static char pad[60]; /* all zero bytes */
-
-	if (sc->vsc_tapfd == -1)
-		return;
-
-	/*
-	 * If the length is < 60, pad out to that and add the
-	 * extra zero'd segment to the iov. It is guaranteed that
-	 * there is always an extra iov available by the caller.
-	 */
-	if (len < 60) {
-		iov[iovcnt].iov_base = pad;
-		iov[iovcnt].iov_len = 60 - len;
-		iovcnt++;
-	}
-	(void) writev(sc->vsc_tapfd, iov, iovcnt);
-}
-
-/*
  *  Called when there is read activity on the tap file descriptor.
  * Each buffer posted by the guest is assumed to be able to contain
  * an entire ethernet frame + rx header.
  *  MP note: the dummybuf is only used for discarding frames, so there
  * is no need for it to be per-vtnet or locked.
  */
-static uint8_t dummybuf[2048];
-
-static __inline struct iovec *
-rx_iov_trim(struct iovec *iov, int *niov, int tlen)
+void
+pci_vtnet_rx_discard(struct pci_vtnet_softc *sc, struct iovec *iov)
 {
-	struct iovec *riov;
-
-	/* XXX short-cut: assume first segment is >= tlen */
-	assert(iov[0].iov_len >= tlen);
+	int more;
 
-	iov[0].iov_len -= tlen;
-	if (iov[0].iov_len == 0) {
-		assert(*niov > 1);
-		*niov -= 1;
-		riov = &iov[1];
-	} else {
-		iov[0].iov_base = (void *)((uintptr_t)iov[0].iov_base + tlen);
-		riov = &iov[0];
-	}
+	/*
+	 * MP note: the dummybuf is only used to discard frames,
+	 * so there is no need for it to be per-vtnet or locked.
+	 * We only make it large enough for TSO-sized segment.
+	 */
+	static uint8_t dummybuf[65536+64];
 

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-soc-all mailing list