svn commit: r354552 - head/usr.sbin/bhyve

Fri Nov 8 17:57:05 UTC 2019

Author: vmaffione
Date: Fri Nov  8 17:57:03 2019
New Revision: 354552
URL: https://svnweb.freebsd.org/changeset/base/354552

Log:
  bhyve: add support for virtio-net mergeable rx buffers
  
  Mergeable rx buffers is a virtio-net feature that allows the hypervisor
  to use multiple RX descriptor chains to receive a single receive packet.
  Without this feature, a TSO-enabled guest is compelled to publish only
  64K (or 32K) long chains, and each of these large buffers is consumed
  to receive a single packet, even a very short one. This is a waste of
  memory, as a RX queue has room for 256 chains, which means up to 16MB
  of buffer memory for each (single-queue) vtnet device.
  With the feature on, the guest can publish 2K long chains, and the
  hypervisor will merge them as needed.
  
  This change also enables the feature in the netmap backend, which
  supports virtio-net offloads. We plan to add support for the
  tap backend too.
  Note that differently from QEMU/KVM, here we implement one-copy receive,
  while QEMU uses two copies.
  
  Reviewed by:    jhb
  MFC after:      3 weeks
  Differential Revision:	https://reviews.freebsd.org/D21007

Modified:
  head/usr.sbin/bhyve/net_backends.c
  head/usr.sbin/bhyve/pci_virtio_console.c
  head/usr.sbin/bhyve/pci_virtio_net.c
  head/usr.sbin/bhyve/virtio.c
  head/usr.sbin/bhyve/virtio.h

Modified: head/usr.sbin/bhyve/net_backends.c
==============================================================================

--- head/usr.sbin/bhyve/net_backends.c	Fri Nov  8 17:33:42 2019	(r354551)
+++ head/usr.sbin/bhyve/net_backends.c	Fri Nov  8 17:57:03 2019	(r354552)
@@ -328,7 +328,8 @@ DATA_SET(net_backend_set, vmnet_backend);
 #define NETMAP_FEATURES (VIRTIO_NET_F_CSUM | VIRTIO_NET_F_HOST_TSO4 | \
 		VIRTIO_NET_F_HOST_TSO6 | VIRTIO_NET_F_HOST_UFO | \
 		VIRTIO_NET_F_GUEST_CSUM | VIRTIO_NET_F_GUEST_TSO4 | \
-		VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_UFO)
+		VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_UFO | \
+		VIRTIO_NET_F_MRG_RXBUF)
 
 struct netmap_priv {
 	char ifname[IFNAMSIZ];

Modified: head/usr.sbin/bhyve/pci_virtio_console.c
==============================================================================
--- head/usr.sbin/bhyve/pci_virtio_console.c	Fri Nov  8 17:33:42 2019	(r354551)
+++ head/usr.sbin/bhyve/pci_virtio_console.c	Fri Nov  8 17:57:03 2019	(r354552)
@@ -423,7 +423,7 @@ pci_vtcon_sock_rx(int fd __unused, enum ev_type t __un
 		len = readv(sock->vss_conn_fd, &iov, n);
 
 		if (len == 0 || (len < 0 && errno == EWOULDBLOCK)) {
-			vq_retchain(vq);
+			vq_retchains(vq, 1);
 			vq_endchains(vq, 0);
 			if (len == 0)
 				goto close;

Modified: head/usr.sbin/bhyve/pci_virtio_net.c
==============================================================================
--- head/usr.sbin/bhyve/pci_virtio_net.c	Fri Nov  8 17:33:42 2019	(r354551)
+++ head/usr.sbin/bhyve/pci_virtio_net.c	Fri Nov  8 17:57:03 2019	(r354552)
@@ -58,11 +58,14 @@ __FBSDID("$FreeBSD$");
 #include "virtio.h"
 #include "net_utils.h"
 #include "net_backends.h"
+#include "iov.h"
 
 #define VTNET_RINGSZ	1024
 
 #define VTNET_MAXSEGS	256
 
+#define VTNET_MAX_PKT_LEN	(65536 + 64)
+
 #define VTNET_S_HOSTCAPS      \
   ( VIRTIO_NET_F_MAC | VIRTIO_NET_F_STATUS | \
     VIRTIO_F_NOTIFY_ON_EMPTY | VIRTIO_RING_F_INDIRECT_DESC)
@@ -170,59 +173,119 @@ pci_vtnet_reset(void *vsc)
 	pthread_mutex_unlock(&sc->rx_mtx);
 }
 
+struct virtio_mrg_rxbuf_info {
+	uint16_t idx;
+	uint16_t pad;
+	uint32_t len;
+};
+
 static void
 pci_vtnet_rx(struct pci_vtnet_softc *sc)
 {
+	struct virtio_mrg_rxbuf_info info[VTNET_MAXSEGS];
 	struct iovec iov[VTNET_MAXSEGS + 1];
 	struct vqueue_info *vq;
-	int len, n;
-	uint16_t idx;
+	uint32_t cur_iov_bytes;
+	struct iovec *cur_iov;
+	uint16_t cur_iov_len;
+	uint32_t ulen;
+	int n_chains;
+	int len;
 
 	vq = &sc->vsc_queues[VTNET_RXQ];
 	for (;;) {
 		/*
-		 * Check for available rx buffers.
+		 * Get a descriptor chain to store the next ingress
+		 * packet. In case of mergeable rx buffers, get as
+		 * many chains as necessary in order to make room
+		 * for a maximum sized LRO packet.
 		 */
-		if (!vq_has_descs(vq)) {
-			/* No rx buffers. Enable RX kicks and double check. */
-			vq_kick_enable(vq);
-			if (!vq_has_descs(vq)) {
+		cur_iov_bytes = 0;
+		cur_iov_len = 0;
+		cur_iov = iov;
+		n_chains = 0;
+		do {
+			int n = vq_getchain(vq, &info[n_chains].idx, cur_iov,
+			    VTNET_MAXSEGS - cur_iov_len, NULL);
+
+			if (n == 0) {
 				/*
-				 * Still no buffers. Interrupt if needed
-				 * (including for NOTIFY_ON_EMPTY), and
-				 * disable the backend until the next kick.
+				 * No rx buffers. Enable RX kicks and double
+				 * check.
 				 */
-				vq_endchains(vq, /*used_all_avail=*/1);
-				netbe_rx_disable(sc->vsc_be);
-				return;
+				vq_kick_enable(vq);
+				if (!vq_has_descs(vq)) {
+					/*
+					 * Still no buffers. Return the unused
+					 * chains (if any), interrupt if needed
+					 * (including for NOTIFY_ON_EMPTY), and
+					 * disable the backend until the next
+					 * kick.
+					 */
+					vq_retchains(vq, n_chains);
+					vq_endchains(vq, /*used_all_avail=*/1);
+					netbe_rx_disable(sc->vsc_be);
+					return;
+				}
+
+				/* More rx buffers found, so keep going. */
+				vq_kick_disable(vq);
+				continue;
 			}
+			assert(n >= 1 && cur_iov_len + n <= VTNET_MAXSEGS);
+			cur_iov_len += n;
+			if (!sc->rx_merge) {
+				n_chains = 1;
+				break;
+			}
+			info[n_chains].len = (uint32_t)count_iov(cur_iov, n);
+			cur_iov_bytes += info[n_chains].len;
+			cur_iov += n;
+			n_chains++;
+		} while (cur_iov_bytes < VTNET_MAX_PKT_LEN &&
+			    cur_iov_len < VTNET_MAXSEGS);
 
-			/* More rx buffers found, so keep going. */
-			vq_kick_disable(vq);
-		}
+		len = netbe_recv(sc->vsc_be, iov, cur_iov_len);
 
-		/*
-		 * Get descriptor chain.
-		 */
-		n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL);
-		assert(n >= 1 && n <= VTNET_MAXSEGS);
-
-		len = netbe_recv(sc->vsc_be, iov, n);
-
 		if (len <= 0) {
 			/*
 			 * No more packets (len == 0), or backend errored
 			 * (err < 0). Return unused available buffers
 			 * and stop.
 			 */
-			vq_retchain(vq);
+			vq_retchains(vq, n_chains);
 			/* Interrupt if needed/appropriate and stop. */
 			vq_endchains(vq, /*used_all_avail=*/0);
 			return;
 		}
 
-		/* Publish the info to the guest */
-		vq_relchain(vq, idx, (uint32_t)len);
+		ulen = (uint32_t)len; /* avoid too many casts below */
+
+		/* Publish the used buffers to the guest. */
+		if (!sc->rx_merge) {
+			vq_relchain(vq, info[0].idx, ulen);
+		} else {
+			struct virtio_net_rxhdr *hdr = iov[0].iov_base;
+			uint32_t iolen;
+			int i = 0;
+
+			assert(iov[0].iov_len >= sizeof(*hdr));
+
+			do {
+				iolen = info[i].len;
+				if (iolen > ulen) {
+					iolen = ulen;
+				}
+				vq_relchain_prepare(vq, info[i].idx, iolen);
+				ulen -= iolen;
+				i++;
+				assert(i <= n_chains);
+			} while (ulen > 0);
+
+			hdr->vrh_bufs = i;
+			vq_relchain_publish(vq);
+			vq_retchains(vq, n_chains - i);
+		}
 	}
 
 }

Modified: head/usr.sbin/bhyve/virtio.c
==============================================================================
--- head/usr.sbin/bhyve/virtio.c	Fri Nov  8 17:33:42 2019	(r354551)
+++ head/usr.sbin/bhyve/virtio.c	Fri Nov  8 17:57:03 2019	(r354552)
@@ -102,6 +102,7 @@ vi_reset_dev(struct virtio_softc *vs)
 	for (vq = vs->vs_queues, i = 0; i < nvq; vq++, i++) {
 		vq->vq_flags = 0;
 		vq->vq_last_avail = 0;
+		vq->vq_next_used = 0;
 		vq->vq_save_used = 0;
 		vq->vq_pfn = 0;
 		vq->vq_msix_idx = VIRTIO_MSI_NO_VECTOR;
@@ -199,6 +200,7 @@ vi_vq_init(struct virtio_softc *vs, uint32_t pfn)
 	/* Mark queue as allocated, and start at 0 when we use it. */
 	vq->vq_flags = VQ_ALLOC;
 	vq->vq_last_avail = 0;
+	vq->vq_next_used = 0;
 	vq->vq_save_used = 0;
 }
 
@@ -279,7 +281,7 @@ vq_getchain(struct vqueue_info *vq, uint16_t *pidx,
          * the guest has written are valid (including all their
          * vd_next fields and vd_flags).
 	 *
-	 * Compute (last_avail - va_idx) in integers mod 2**16.  This is
+	 * Compute (va_idx - last_avail) in integers mod 2**16.  This is
 	 * the number of descriptors the device has made available
 	 * since the last time we updated vq->vq_last_avail.
 	 *
@@ -382,38 +384,30 @@ loopy:
 }
 
 /*
- * Return the currently-first request chain back to the available queue.
+ * Return the first n_chain request chains back to the available queue.
  *
- * (This chain is the one you handled when you called vq_getchain()
+ * (These chains are the ones you handled when you called vq_getchain()
  * and used its positive return value.)
  */
 void
-vq_retchain(struct vqueue_info *vq)
+vq_retchains(struct vqueue_info *vq, uint16_t n_chains)
 {
 
-	vq->vq_last_avail--;
+	vq->vq_last_avail -= n_chains;
 }
 
-/*
- * Return specified request chain to the guest, setting its I/O length
- * to the provided value.
- *
- * (This chain is the one you handled when you called vq_getchain()
- * and used its positive return value.)
- */
 void
-vq_relchain(struct vqueue_info *vq, uint16_t idx, uint32_t iolen)
+vq_relchain_prepare(struct vqueue_info *vq, uint16_t idx, uint32_t iolen)
 {
-	uint16_t uidx, mask;
 	volatile struct vring_used *vuh;
 	volatile struct virtio_used *vue;
+	uint16_t mask;
 
 	/*
 	 * Notes:
 	 *  - mask is N-1 where N is a power of 2 so computes x % N
 	 *  - vuh points to the "used" data shared with guest
 	 *  - vue points to the "used" ring entry we want to update
-	 *  - head is the same value we compute in vq_iovecs().
 	 *
 	 * (I apologize for the two fields named vu_idx; the
 	 * virtio spec calls the one that vue points to, "id"...)
@@ -421,18 +415,35 @@ vq_relchain(struct vqueue_info *vq, uint16_t idx, uint
 	mask = vq->vq_qsize - 1;
 	vuh = vq->vq_used;
 
-	uidx = vuh->vu_idx;
-	vue = &vuh->vu_ring[uidx++ & mask];
+	vue = &vuh->vu_ring[vq->vq_next_used++ & mask];
 	vue->vu_idx = idx;
 	vue->vu_tlen = iolen;
+}
 
+void
+vq_relchain_publish(struct vqueue_info *vq)
+{
 	/*
 	 * Ensure the used descriptor is visible before updating the index.
 	 * This is necessary on ISAs with memory ordering less strict than x86
 	 * (and even on x86 to act as a compiler barrier).
 	 */
 	atomic_thread_fence_rel();
-	vuh->vu_idx = uidx;
+	vq->vq_used->vu_idx = vq->vq_next_used;
+}
+
+/*
+ * Return specified request chain to the guest, setting its I/O length
+ * to the provided value.
+ *
+ * (This chain is the one you handled when you called vq_getchain()
+ * and used its positive return value.)
+ */
+void
+vq_relchain(struct vqueue_info *vq, uint16_t idx, uint32_t iolen)
+{
+	vq_relchain_prepare(vq, idx, iolen);
+	vq_relchain_publish(vq);
 }
 
 /*

Modified: head/usr.sbin/bhyve/virtio.h
==============================================================================
--- head/usr.sbin/bhyve/virtio.h	Fri Nov  8 17:33:42 2019	(r354551)
+++ head/usr.sbin/bhyve/virtio.h	Fri Nov  8 17:57:03 2019	(r354552)
@@ -392,6 +392,7 @@ struct vqueue_info {
 
 	uint16_t vq_flags;	/* flags (see above) */
 	uint16_t vq_last_avail;	/* a recent value of vq_avail->va_idx */
+	uint16_t vq_next_used;	/* index of the next used slot to be filled */
 	uint16_t vq_save_used;	/* saved vq_used->vu_idx; see vq_endchains */
 	uint16_t vq_msix_idx;	/* MSI-X index, or VIRTIO_MSI_NO_VECTOR */
 
@@ -479,7 +480,10 @@ void	vi_set_io_bar(struct virtio_softc *, int);
 
 int	vq_getchain(struct vqueue_info *vq, uint16_t *pidx,
 		    struct iovec *iov, int n_iov, uint16_t *flags);
-void	vq_retchain(struct vqueue_info *vq);
+void	vq_retchains(struct vqueue_info *vq, uint16_t n_chains);
+void	vq_relchain_prepare(struct vqueue_info *vq, uint16_t idx,
+			    uint32_t iolen);
+void	vq_relchain_publish(struct vqueue_info *vq);
 void	vq_relchain(struct vqueue_info *vq, uint16_t idx, uint32_t iolen);
 void	vq_endchains(struct vqueue_info *vq, int used_all_avail);