git: 40993deee7d6 - stable/14 - bhyve: Split backends into separate files

From: Mark Johnston <markj_at_FreeBSD.org>
Date: Thu, 14 Dec 2023 14:43:29 UTC
The branch stable/14 has been updated by markj:

URL: https://cgit.FreeBSD.org/src/commit/?id=40993deee7d624ccea318881b60c779ee4306c2d

commit 40993deee7d624ccea318881b60c779ee4306c2d
Author:     Mark Johnston <markj@FreeBSD.org>
AuthorDate: 2023-11-22 19:10:27 +0000
Commit:     Mark Johnston <markj@FreeBSD.org>
CommitDate: 2023-12-14 14:43:02 +0000

    bhyve: Split backends into separate files
    
    Currently the net_backend structure definition is private to
    net_backends.c, so all of the backend definitions are there.  While
    adding a new backend to use libslirp, it was noted that this file is
    somewhat cluttered.  Move the netmap and netgraph backends to their own
    files and clean up includes a bit.  No functional change intended.
    
    Reviewed by:    corvink, jhb
    MFC after:      3 weeks
    Sponsored by:   Innovate UK
    Differential Revision:  https://reviews.freebsd.org/D42689
    
    (cherry picked from commit be74aede49fb480792448bf563c5079998de7cbd)
---
 usr.sbin/bhyve/Makefile               |   5 +-
 usr.sbin/bhyve/net_backend_netgraph.c | 191 ++++++++++
 usr.sbin/bhyve/net_backend_netmap.c   | 384 ++++++++++++++++++++
 usr.sbin/bhyve/net_backends.c         | 661 +---------------------------------
 usr.sbin/bhyve/net_backends.h         |   7 +-
 usr.sbin/bhyve/net_backends_priv.h    | 152 ++++++++
 6 files changed, 756 insertions(+), 644 deletions(-)

diff --git a/usr.sbin/bhyve/Makefile b/usr.sbin/bhyve/Makefile
index de8e87d2ad49..6ce7f6c7ba62 100644
--- a/usr.sbin/bhyve/Makefile
+++ b/usr.sbin/bhyve/Makefile
@@ -33,6 +33,7 @@ SRCS=	\
 	iov.c			\
 	mem.c			\
 	mevent.c		\
+	net_backend_netmap.c	\
 	net_backends.c		\
 	net_utils.c		\
 	pci_emul.c		\
@@ -92,8 +93,8 @@ CFLAGS+=-DINET
 CFLAGS+=-DINET6
 .endif
 .if ${MK_NETGRAPH_SUPPORT} != "no"
-CFLAGS+=-DNETGRAPH
-LIBADD+=    netgraph
+SRCS+=	net_backend_netgraph.c
+LIBADD+=	netgraph
 .endif
 .if ${MK_OPENSSL} == "no"
 CFLAGS+=-DNO_OPENSSL
diff --git a/usr.sbin/bhyve/net_backend_netgraph.c b/usr.sbin/bhyve/net_backend_netgraph.c
new file mode 100644
index 000000000000..7d1659d611e3
--- /dev/null
+++ b/usr.sbin/bhyve/net_backend_netgraph.c
@@ -0,0 +1,191 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2019 Vincenzo Maffione <vmaffione@FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
+ * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef WITHOUT_CAPSICUM
+#include <sys/capsicum.h>
+#endif
+#include <sys/socket.h>
+#include <sys/sysctl.h>
+
+#ifndef WITHOUT_CAPSICUM
+#include <capsicum_helpers.h>
+#endif
+#include <err.h>
+#include <netgraph.h>
+#include <string.h>
+#include <sysexits.h>
+#include <unistd.h>
+
+#include "config.h"
+#include "debug.h"
+#include "net_backends.h"
+#include "net_backends_priv.h"
+
+#define NG_SBUF_MAX_SIZE (4 * 1024 * 1024)
+
+static int
+ng_init(struct net_backend *be, const char *devname __unused,
+	 nvlist_t *nvl, net_be_rxeof_t cb, void *param)
+{
+	struct tap_priv *p = NET_BE_PRIV(be);
+	struct ngm_connect ngc;
+	const char *value, *nodename;
+	int sbsz;
+	int ctrl_sock;
+	int flags;
+	unsigned long maxsbsz;
+	size_t msbsz;
+#ifndef WITHOUT_CAPSICUM
+	cap_rights_t rights;
+#endif
+
+	if (cb == NULL) {
+		EPRINTLN("Netgraph backend requires non-NULL callback");
+		return (-1);
+	}
+
+	be->fd = -1;
+
+	memset(&ngc, 0, sizeof(ngc));
+
+	value = get_config_value_node(nvl, "path");
+	if (value == NULL) {
+		EPRINTLN("path must be provided");
+		return (-1);
+	}
+	strncpy(ngc.path, value, NG_PATHSIZ - 1);
+
+	value = get_config_value_node(nvl, "hook");
+	if (value == NULL)
+		value = "vmlink";
+	strncpy(ngc.ourhook, value, NG_HOOKSIZ - 1);
+
+	value = get_config_value_node(nvl, "peerhook");
+	if (value == NULL) {
+		EPRINTLN("peer hook must be provided");
+		return (-1);
+	}
+	strncpy(ngc.peerhook, value, NG_HOOKSIZ - 1);
+
+	nodename = get_config_value_node(nvl, "socket");
+	if (NgMkSockNode(nodename,
+		&ctrl_sock, &be->fd) < 0) {
+		EPRINTLN("can't get Netgraph sockets");
+		return (-1);
+	}
+
+	if (NgSendMsg(ctrl_sock, ".",
+		NGM_GENERIC_COOKIE,
+		NGM_CONNECT, &ngc, sizeof(ngc)) < 0) {
+		EPRINTLN("can't connect to node");
+		close(ctrl_sock);
+		goto error;
+	}
+
+	close(ctrl_sock);
+
+	flags = fcntl(be->fd, F_GETFL);
+
+	if (flags < 0) {
+		EPRINTLN("can't get socket flags");
+		goto error;
+	}
+
+	if (fcntl(be->fd, F_SETFL, flags | O_NONBLOCK) < 0) {
+		EPRINTLN("can't set O_NONBLOCK flag");
+		goto error;
+	}
+
+	/*
+	 * The default ng_socket(4) buffer's size is too low.
+	 * Calculate the minimum value between NG_SBUF_MAX_SIZE
+	 * and kern.ipc.maxsockbuf.
+	 */
+	msbsz = sizeof(maxsbsz);
+	if (sysctlbyname("kern.ipc.maxsockbuf", &maxsbsz, &msbsz,
+		NULL, 0) < 0) {
+		EPRINTLN("can't get 'kern.ipc.maxsockbuf' value");
+		goto error;
+	}
+
+	/*
+	 * We can't set the socket buffer size to kern.ipc.maxsockbuf value,
+	 * as it takes into account the mbuf(9) overhead.
+	 */
+	maxsbsz = maxsbsz * MCLBYTES / (MSIZE + MCLBYTES);
+
+	sbsz = MIN(NG_SBUF_MAX_SIZE, maxsbsz);
+
+	if (setsockopt(be->fd, SOL_SOCKET, SO_SNDBUF, &sbsz,
+		sizeof(sbsz)) < 0) {
+		EPRINTLN("can't set TX buffer size");
+		goto error;
+	}
+
+	if (setsockopt(be->fd, SOL_SOCKET, SO_RCVBUF, &sbsz,
+		sizeof(sbsz)) < 0) {
+		EPRINTLN("can't set RX buffer size");
+		goto error;
+	}
+
+#ifndef WITHOUT_CAPSICUM
+	cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE);
+	if (caph_rights_limit(be->fd, &rights) == -1)
+		errx(EX_OSERR, "Unable to apply rights for sandbox");
+#endif
+
+	memset(p->bbuf, 0, sizeof(p->bbuf));
+	p->bbuflen = 0;
+
+	p->mevp = mevent_add_disabled(be->fd, EVF_READ, cb, param);
+	if (p->mevp == NULL) {
+		EPRINTLN("Could not register event");
+		goto error;
+	}
+
+	return (0);
+
+error:
+	tap_cleanup(be);
+	return (-1);
+}
+
+static struct net_backend ng_backend = {
+	.prefix = "netgraph",
+	.priv_size = sizeof(struct tap_priv),
+	.init = ng_init,
+	.cleanup = tap_cleanup,
+	.send = tap_send,
+	.peek_recvlen = tap_peek_recvlen,
+	.recv = tap_recv,
+	.recv_enable = tap_recv_enable,
+	.recv_disable = tap_recv_disable,
+	.get_cap = tap_get_cap,
+	.set_cap = tap_set_cap,
+};
+
+DATA_SET(net_backend_set, ng_backend);
diff --git a/usr.sbin/bhyve/net_backend_netmap.c b/usr.sbin/bhyve/net_backend_netmap.c
new file mode 100644
index 000000000000..5ba11b96797c
--- /dev/null
+++ b/usr.sbin/bhyve/net_backend_netmap.c
@@ -0,0 +1,384 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2019 Vincenzo Maffione <vmaffione@FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
+ * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <net/if.h>
+#include <net/netmap.h>
+#include <net/netmap_virt.h>
+#define NETMAP_WITH_LIBS
+#include <net/netmap_user.h>
+
+#include <assert.h>
+
+#include "debug.h"
+#include "iov.h"
+#include "mevent.h"
+#include "net_backends.h"
+#include "net_backends_priv.h"
+
+/* The virtio-net features supported by netmap. */
+#define NETMAP_FEATURES (VIRTIO_NET_F_CSUM | VIRTIO_NET_F_HOST_TSO4 | \
+		VIRTIO_NET_F_HOST_TSO6 | VIRTIO_NET_F_HOST_UFO | \
+		VIRTIO_NET_F_GUEST_CSUM | VIRTIO_NET_F_GUEST_TSO4 | \
+		VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_UFO)
+
+struct netmap_priv {
+	char ifname[IFNAMSIZ];
+	struct nm_desc *nmd;
+	uint16_t memid;
+	struct netmap_ring *rx;
+	struct netmap_ring *tx;
+	struct mevent *mevp;
+	net_be_rxeof_t cb;
+	void *cb_param;
+};
+
+static void
+nmreq_init(struct nmreq *req, char *ifname)
+{
+
+	memset(req, 0, sizeof(*req));
+	strlcpy(req->nr_name, ifname, sizeof(req->nr_name));
+	req->nr_version = NETMAP_API;
+}
+
+static int
+netmap_set_vnet_hdr_len(struct net_backend *be, int vnet_hdr_len)
+{
+	int err;
+	struct nmreq req;
+	struct netmap_priv *priv = NET_BE_PRIV(be);
+
+	nmreq_init(&req, priv->ifname);
+	req.nr_cmd = NETMAP_BDG_VNET_HDR;
+	req.nr_arg1 = vnet_hdr_len;
+	err = ioctl(be->fd, NIOCREGIF, &req);
+	if (err) {
+		EPRINTLN("Unable to set vnet header length %d", vnet_hdr_len);
+		return (err);
+	}
+
+	be->be_vnet_hdr_len = vnet_hdr_len;
+
+	return (0);
+}
+
+static int
+netmap_has_vnet_hdr_len(struct net_backend *be, unsigned vnet_hdr_len)
+{
+	unsigned prev_hdr_len = be->be_vnet_hdr_len;
+	int ret;
+
+	if (vnet_hdr_len == prev_hdr_len) {
+		return (1);
+	}
+
+	ret = netmap_set_vnet_hdr_len(be, vnet_hdr_len);
+	if (ret) {
+		return (0);
+	}
+
+	netmap_set_vnet_hdr_len(be, prev_hdr_len);
+
+	return (1);
+}
+
+static uint64_t
+netmap_get_cap(struct net_backend *be)
+{
+
+	return (netmap_has_vnet_hdr_len(be, VNET_HDR_LEN) ?
+	    NETMAP_FEATURES : 0);
+}
+
+static int
+netmap_set_cap(struct net_backend *be, uint64_t features __unused,
+    unsigned vnet_hdr_len)
+{
+
+	return (netmap_set_vnet_hdr_len(be, vnet_hdr_len));
+}
+
+static int
+netmap_init(struct net_backend *be, const char *devname,
+    nvlist_t *nvl __unused, net_be_rxeof_t cb, void *param)
+{
+	struct netmap_priv *priv = NET_BE_PRIV(be);
+
+	strlcpy(priv->ifname, devname, sizeof(priv->ifname));
+	priv->ifname[sizeof(priv->ifname) - 1] = '\0';
+
+	priv->nmd = nm_open(priv->ifname, NULL, NETMAP_NO_TX_POLL, NULL);
+	if (priv->nmd == NULL) {
+		EPRINTLN("Unable to nm_open(): interface '%s', errno (%s)",
+		    devname, strerror(errno));
+		return (-1);
+	}
+
+	priv->memid = priv->nmd->req.nr_arg2;
+	priv->tx = NETMAP_TXRING(priv->nmd->nifp, 0);
+	priv->rx = NETMAP_RXRING(priv->nmd->nifp, 0);
+	priv->cb = cb;
+	priv->cb_param = param;
+	be->fd = priv->nmd->fd;
+
+	priv->mevp = mevent_add_disabled(be->fd, EVF_READ, cb, param);
+	if (priv->mevp == NULL) {
+		EPRINTLN("Could not register event");
+		return (-1);
+	}
+
+	return (0);
+}
+
+static void
+netmap_cleanup(struct net_backend *be)
+{
+	struct netmap_priv *priv = NET_BE_PRIV(be);
+
+	if (priv->mevp) {
+		mevent_delete(priv->mevp);
+	}
+	if (priv->nmd) {
+		nm_close(priv->nmd);
+	}
+	be->fd = -1;
+}
+
+static ssize_t
+netmap_send(struct net_backend *be, const struct iovec *iov,
+	    int iovcnt)
+{
+	struct netmap_priv *priv = NET_BE_PRIV(be);
+	struct netmap_ring *ring;
+	ssize_t totlen = 0;
+	int nm_buf_size;
+	int nm_buf_len;
+	uint32_t head;
+	uint8_t *nm_buf;
+	int j;
+
+	ring = priv->tx;
+	head = ring->head;
+	if (head == ring->tail) {
+		EPRINTLN("No space, drop %zu bytes", count_iov(iov, iovcnt));
+		goto txsync;
+	}
+	nm_buf = NETMAP_BUF(ring, ring->slot[head].buf_idx);
+	nm_buf_size = ring->nr_buf_size;
+	nm_buf_len = 0;
+
+	for (j = 0; j < iovcnt; j++) {
+		uint8_t *iov_frag_buf = iov[j].iov_base;
+		int iov_frag_size = iov[j].iov_len;
+
+		totlen += iov_frag_size;
+
+		/*
+		 * Split each iovec fragment over more netmap slots, if
+		 * necessary.
+		 */
+		for (;;) {
+			int copylen;
+
+			copylen = iov_frag_size < nm_buf_size ? iov_frag_size : nm_buf_size;
+			memcpy(nm_buf, iov_frag_buf, copylen);
+
+			iov_frag_buf += copylen;
+			iov_frag_size -= copylen;
+			nm_buf += copylen;
+			nm_buf_size -= copylen;
+			nm_buf_len += copylen;
+
+			if (iov_frag_size == 0) {
+				break;
+			}
+
+			ring->slot[head].len = nm_buf_len;
+			ring->slot[head].flags = NS_MOREFRAG;
+			head = nm_ring_next(ring, head);
+			if (head == ring->tail) {
+				/*
+				 * We ran out of netmap slots while
+				 * splitting the iovec fragments.
+				 */
+				EPRINTLN("No space, drop %zu bytes",
+				    count_iov(iov, iovcnt));
+				goto txsync;
+			}
+			nm_buf = NETMAP_BUF(ring, ring->slot[head].buf_idx);
+			nm_buf_size = ring->nr_buf_size;
+			nm_buf_len = 0;
+		}
+	}
+
+	/* Complete the last slot, which must not have NS_MOREFRAG set. */
+	ring->slot[head].len = nm_buf_len;
+	ring->slot[head].flags = 0;
+	head = nm_ring_next(ring, head);
+
+	/* Now update ring->head and ring->cur. */
+	ring->head = ring->cur = head;
+txsync:
+	ioctl(be->fd, NIOCTXSYNC, NULL);
+
+	return (totlen);
+}
+
+static ssize_t
+netmap_peek_recvlen(struct net_backend *be)
+{
+	struct netmap_priv *priv = NET_BE_PRIV(be);
+	struct netmap_ring *ring = priv->rx;
+	uint32_t head = ring->head;
+	ssize_t totlen = 0;
+
+	while (head != ring->tail) {
+		struct netmap_slot *slot = ring->slot + head;
+
+		totlen += slot->len;
+		if ((slot->flags & NS_MOREFRAG) == 0)
+			break;
+		head = nm_ring_next(ring, head);
+	}
+
+	return (totlen);
+}
+
+static ssize_t
+netmap_recv(struct net_backend *be, const struct iovec *iov, int iovcnt)
+{
+	struct netmap_priv *priv = NET_BE_PRIV(be);
+	struct netmap_slot *slot = NULL;
+	struct netmap_ring *ring;
+	uint8_t *iov_frag_buf;
+	int iov_frag_size;
+	ssize_t totlen = 0;
+	uint32_t head;
+
+	assert(iovcnt);
+
+	ring = priv->rx;
+	head = ring->head;
+	iov_frag_buf = iov->iov_base;
+	iov_frag_size = iov->iov_len;
+
+	do {
+		uint8_t *nm_buf;
+		int nm_buf_len;
+
+		if (head == ring->tail) {
+			return (0);
+		}
+
+		slot = ring->slot + head;
+		nm_buf = NETMAP_BUF(ring, slot->buf_idx);
+		nm_buf_len = slot->len;
+
+		for (;;) {
+			int copylen = nm_buf_len < iov_frag_size ?
+			    nm_buf_len : iov_frag_size;
+
+			memcpy(iov_frag_buf, nm_buf, copylen);
+			nm_buf += copylen;
+			nm_buf_len -= copylen;
+			iov_frag_buf += copylen;
+			iov_frag_size -= copylen;
+			totlen += copylen;
+
+			if (nm_buf_len == 0) {
+				break;
+			}
+
+			iov++;
+			iovcnt--;
+			if (iovcnt == 0) {
+				/* No space to receive. */
+				EPRINTLN("Short iov, drop %zd bytes",
+				    totlen);
+				return (-ENOSPC);
+			}
+			iov_frag_buf = iov->iov_base;
+			iov_frag_size = iov->iov_len;
+		}
+
+		head = nm_ring_next(ring, head);
+
+	} while (slot->flags & NS_MOREFRAG);
+
+	/* Release slots to netmap. */
+	ring->head = ring->cur = head;
+
+	return (totlen);
+}
+
+static void
+netmap_recv_enable(struct net_backend *be)
+{
+	struct netmap_priv *priv = NET_BE_PRIV(be);
+
+	mevent_enable(priv->mevp);
+}
+
+static void
+netmap_recv_disable(struct net_backend *be)
+{
+	struct netmap_priv *priv = NET_BE_PRIV(be);
+
+	mevent_disable(priv->mevp);
+}
+
+static struct net_backend netmap_backend = {
+	.prefix = "netmap",
+	.priv_size = sizeof(struct netmap_priv),
+	.init = netmap_init,
+	.cleanup = netmap_cleanup,
+	.send = netmap_send,
+	.peek_recvlen = netmap_peek_recvlen,
+	.recv = netmap_recv,
+	.recv_enable = netmap_recv_enable,
+	.recv_disable = netmap_recv_disable,
+	.get_cap = netmap_get_cap,
+	.set_cap = netmap_set_cap,
+};
+
+/* A clone of the netmap backend, with a different prefix. */
+static struct net_backend vale_backend = {
+	.prefix = "vale",
+	.priv_size = sizeof(struct netmap_priv),
+	.init = netmap_init,
+	.cleanup = netmap_cleanup,
+	.send = netmap_send,
+	.peek_recvlen = netmap_peek_recvlen,
+	.recv = netmap_recv,
+	.recv_enable = netmap_recv_enable,
+	.recv_disable = netmap_recv_disable,
+	.get_cap = netmap_get_cap,
+	.set_cap = netmap_set_cap,
+};
+
+DATA_SET(net_backend_set, netmap_backend);
+DATA_SET(net_backend_set, vale_backend);
diff --git a/usr.sbin/bhyve/net_backends.c b/usr.sbin/bhyve/net_backends.c
index de6afab53854..2d11c45f217a 100644
--- a/usr.sbin/bhyve/net_backends.c
+++ b/usr.sbin/bhyve/net_backends.c
@@ -32,8 +32,7 @@
  * features) is exported by net_backends.h.
  */
 
-#include <sys/cdefs.h>
-#include <sys/types.h>		/* u_short etc */
+#include <sys/types.h>
 #ifndef WITHOUT_CAPSICUM
 #include <sys/capsicum.h>
 #endif
@@ -43,153 +42,35 @@
 
 #include <net/if.h>
 #include <net/if_tap.h>
-#include <net/netmap.h>
-#include <net/netmap_virt.h>
-#define NETMAP_WITH_LIBS
-#include <net/netmap_user.h>
 
+#include <assert.h>
 #ifndef WITHOUT_CAPSICUM
 #include <capsicum_helpers.h>
 #endif
 #include <err.h>
 #include <errno.h>
 #include <fcntl.h>
+#include <poll.h>
+#include <pthread.h>
+#include <pthread_np.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdint.h>
 #include <string.h>
-#include <unistd.h>
 #include <sysexits.h>
-#include <assert.h>
-#include <pthread.h>
-#include <pthread_np.h>
-#include <poll.h>
-#include <assert.h>
-
-#ifdef NETGRAPH
-#include <sys/param.h>
-#include <sys/sysctl.h>
-#include <netgraph.h>
-#endif
+#include <unistd.h>
 
 #include "config.h"
 #include "debug.h"
 #include "iov.h"
 #include "mevent.h"
 #include "net_backends.h"
+#include "net_backends_priv.h"
 #include "pci_emul.h"
 
-#include <sys/linker_set.h>
-
-/*
- * Each network backend registers a set of function pointers that are
- * used to implement the net backends API.
- * This might need to be exposed if we implement backends in separate files.
- */
-struct net_backend {
-	const char *prefix;	/* prefix matching this backend */
-
-	/*
-	 * Routines used to initialize and cleanup the resources needed
-	 * by a backend. The cleanup function is used internally,
-	 * and should not be called by the frontend.
-	 */
-	int (*init)(struct net_backend *be, const char *devname,
-	    nvlist_t *nvl, net_be_rxeof_t cb, void *param);
-	void (*cleanup)(struct net_backend *be);
-
-	/*
-	 * Called to serve a guest transmit request. The scatter-gather
-	 * vector provided by the caller has 'iovcnt' elements and contains
-	 * the packet to send.
-	 */
-	ssize_t (*send)(struct net_backend *be, const struct iovec *iov,
-	    int iovcnt);
-
-	/*
-	 * Get the length of the next packet that can be received from
-	 * the backend. If no packets are currently available, this
-	 * function returns 0.
-	 */
-	ssize_t (*peek_recvlen)(struct net_backend *be);
-
-	/*
-	 * Called to receive a packet from the backend. When the function
-	 * returns a positive value 'len', the scatter-gather vector
-	 * provided by the caller contains a packet with such length.
-	 * The function returns 0 if the backend doesn't have a new packet to
-	 * receive.
-	 */
-	ssize_t (*recv)(struct net_backend *be, const struct iovec *iov,
-	    int iovcnt);
-
-	/*
-	 * Ask the backend to enable or disable receive operation in the
-	 * backend. On return from a disable operation, it is guaranteed
-	 * that the receive callback won't be called until receive is
-	 * enabled again. Note however that it is up to the caller to make
-	 * sure that netbe_recv() is not currently being executed by another
-	 * thread.
-	 */
-	void (*recv_enable)(struct net_backend *be);
-	void (*recv_disable)(struct net_backend *be);
-
-	/*
-	 * Ask the backend for the virtio-net features it is able to
-	 * support. Possible features are TSO, UFO and checksum offloading
-	 * in both rx and tx direction and for both IPv4 and IPv6.
-	 */
-	uint64_t (*get_cap)(struct net_backend *be);
-
-	/*
-	 * Tell the backend to enable/disable the specified virtio-net
-	 * features (capabilities).
-	 */
-	int (*set_cap)(struct net_backend *be, uint64_t features,
-	    unsigned int vnet_hdr_len);
-
-	struct pci_vtnet_softc *sc;
-	int fd;
-
-	/*
-	 * Length of the virtio-net header used by the backend and the
-	 * frontend, respectively. A zero value means that the header
-	 * is not used.
-	 */
-	unsigned int be_vnet_hdr_len;
-	unsigned int fe_vnet_hdr_len;
-
-	/* Size of backend-specific private data. */
-	size_t priv_size;
-
-	/* Backend-specific private data follows. */
-};
-
-#define	NET_BE_PRIV(be)		((void *)((be) + 1))
 #define	NET_BE_SIZE(be)		(sizeof(*be) + (be)->priv_size)
 
-SET_DECLARE(net_backend_set, struct net_backend);
-
-#define VNET_HDR_LEN	sizeof(struct virtio_net_rxhdr)
-
-#define WPRINTF(params) PRINTLN params
-
-/*
- * The tap backend
- */
-
-struct tap_priv {
-	struct mevent *mevp;
-	/*
-	 * A bounce buffer that allows us to implement the peek_recvlen
-	 * callback. In the future we may get the same information from
-	 * the kevent data.
-	 */
-	char bbuf[1 << 16];
-	ssize_t bbuflen;
-};
-
-static void
+void
 tap_cleanup(struct net_backend *be)
 {
 	struct tap_priv *priv = NET_BE_PRIV(be);
@@ -216,7 +97,7 @@ tap_init(struct net_backend *be, const char *devname,
 #endif
 
 	if (cb == NULL) {
-		WPRINTF(("TAP backend requires non-NULL callback"));
+		EPRINTLN("TAP backend requires non-NULL callback");
 		return (-1);
 	}
 
@@ -225,7 +106,7 @@ tap_init(struct net_backend *be, const char *devname,
 
 	be->fd = open(tbuf, O_RDWR);
 	if (be->fd == -1) {
-		WPRINTF(("open of tap device %s failed", tbuf));
+		EPRINTLN("open of tap device %s failed", tbuf);
 		goto error;
 	}
 
@@ -234,12 +115,12 @@ tap_init(struct net_backend *be, const char *devname,
 	 * notifications with the event loop
 	 */
 	if (ioctl(be->fd, FIONBIO, &opt) < 0) {
-		WPRINTF(("tap device O_NONBLOCK failed"));
+		EPRINTLN("tap device O_NONBLOCK failed");
 		goto error;
 	}
 
 	if (ioctl(be->fd, VMIO_SIOCSIFFLAGS, up)) {
-		WPRINTF(("tap device link up failed"));
+		EPRINTLN("tap device link up failed");
 		goto error;
 	}
 
@@ -254,7 +135,7 @@ tap_init(struct net_backend *be, const char *devname,
 
 	priv->mevp = mevent_add_disabled(be->fd, EVF_READ, cb, param);
 	if (priv->mevp == NULL) {
-		WPRINTF(("Could not register event"));
+		EPRINTLN("Could not register event");
 		goto error;
 	}
 
@@ -268,13 +149,13 @@ error:
 /*
  * Called to send a buffer chain out to the tap device
  */
-static ssize_t
+ssize_t
 tap_send(struct net_backend *be, const struct iovec *iov, int iovcnt)
 {
 	return (writev(be->fd, iov, iovcnt));
 }
 
-static ssize_t
+ssize_t
 tap_peek_recvlen(struct net_backend *be)
 {
 	struct tap_priv *priv = NET_BE_PRIV(be);
@@ -304,7 +185,7 @@ tap_peek_recvlen(struct net_backend *be)
 	return (ret);
 }
 
-static ssize_t
+ssize_t
 tap_recv(struct net_backend *be, const struct iovec *iov, int iovcnt)
 {
 	struct tap_priv *priv = NET_BE_PRIV(be);
@@ -332,7 +213,7 @@ tap_recv(struct net_backend *be, const struct iovec *iov, int iovcnt)
 	return (ret);
 }
 
-static void
+void
 tap_recv_enable(struct net_backend *be)
 {
 	struct tap_priv *priv = NET_BE_PRIV(be);
@@ -340,7 +221,7 @@ tap_recv_enable(struct net_backend *be)
 	mevent_enable(priv->mevp);
 }
 
-static void
+void
 tap_recv_disable(struct net_backend *be)
 {
 	struct tap_priv *priv = NET_BE_PRIV(be);
@@ -348,14 +229,14 @@ tap_recv_disable(struct net_backend *be)
 	mevent_disable(priv->mevp);
 }
 
-static uint64_t
+uint64_t
 tap_get_cap(struct net_backend *be __unused)
 {
 
 	return (0); /* no capabilities for now */
 }
 
-static int
+int
 tap_set_cap(struct net_backend *be __unused, uint64_t features,
     unsigned vnet_hdr_len)
 {
@@ -395,508 +276,6 @@ static struct net_backend vmnet_backend = {
 DATA_SET(net_backend_set, tap_backend);
 DATA_SET(net_backend_set, vmnet_backend);
 
-#ifdef NETGRAPH
-
-/*
- * Netgraph backend
- */
-
-#define NG_SBUF_MAX_SIZE (4 * 1024 * 1024)
-
-static int
-ng_init(struct net_backend *be, const char *devname __unused,
-	 nvlist_t *nvl, net_be_rxeof_t cb, void *param)
-{
-	struct tap_priv *p = NET_BE_PRIV(be);
-	struct ngm_connect ngc;
-	const char *value, *nodename;
-	int sbsz;
-	int ctrl_sock;
-	int flags;
-	unsigned long maxsbsz;
-	size_t msbsz;
-#ifndef WITHOUT_CAPSICUM
-	cap_rights_t rights;
-#endif
-
-	if (cb == NULL) {
-		WPRINTF(("Netgraph backend requires non-NULL callback"));
-		return (-1);
-	}
-
-	be->fd = -1;
-
-	memset(&ngc, 0, sizeof(ngc));
-
-	value = get_config_value_node(nvl, "path");
-	if (value == NULL) {
-		WPRINTF(("path must be provided"));
-		return (-1);
-	}
-	strncpy(ngc.path, value, NG_PATHSIZ - 1);
-
-	value = get_config_value_node(nvl, "hook");
-	if (value == NULL)
-		value = "vmlink";
-	strncpy(ngc.ourhook, value, NG_HOOKSIZ - 1);
-
-	value = get_config_value_node(nvl, "peerhook");
-	if (value == NULL) {
-		WPRINTF(("peer hook must be provided"));
-		return (-1);
-	}
-	strncpy(ngc.peerhook, value, NG_HOOKSIZ - 1);
-
-	nodename = get_config_value_node(nvl, "socket");
-	if (NgMkSockNode(nodename,
-		&ctrl_sock, &be->fd) < 0) {
-		WPRINTF(("can't get Netgraph sockets"));
-		return (-1);
-	}
-
-	if (NgSendMsg(ctrl_sock, ".",
-		NGM_GENERIC_COOKIE,
-		NGM_CONNECT, &ngc, sizeof(ngc)) < 0) {
*** 625 LINES SKIPPED ***