svn commit: r344272 - stable/11/sys/dev/vmware/vmxnet3

Vincenzo Maffione vmaffione at FreeBSD.org
Tue Feb 19 10:07:49 UTC 2019


Author: vmaffione
Date: Tue Feb 19 10:07:48 2019
New Revision: 344272
URL: https://svnweb.freebsd.org/changeset/base/344272

Log:
  vmx(4): add native netmap support
  
  This change adds native netmap support for the vmx(4) adapter
  (vmxnet3). Native support comes for free in 12, since the driver has been
  ported to iflib. To make it minimally intrusive, the native support is
  only enabled if vmxnet3.netmap_native is set at boot (e.g., in loader.conf).
  
  Tested on stable/11 running inside vmplayer.
  
  Submitted by:	Giuseppe Lettieri <g.lettieri at iet.unipi.it>
  Reviewed by:	vmaffione, bryanv
  Sponsored by:	Sunny Valley Networks
  Differential Revision:	https://reviews.freebsd.org/D19104

Added:
  stable/11/sys/dev/vmware/vmxnet3/vmx_netmap.h   (contents, props changed)
Modified:
  stable/11/sys/dev/vmware/vmxnet3/if_vmx.c

Modified: stable/11/sys/dev/vmware/vmxnet3/if_vmx.c
==============================================================================
--- stable/11/sys/dev/vmware/vmxnet3/if_vmx.c	Tue Feb 19 03:46:32 2019	(r344271)
+++ stable/11/sys/dev/vmware/vmxnet3/if_vmx.c	Tue Feb 19 10:07:48 2019	(r344272)
@@ -239,6 +239,10 @@ typedef enum {
 
 static void	vmxnet3_barrier(struct vmxnet3_softc *, vmxnet3_barrier_t);
 
+#ifdef DEV_NETMAP
+#include "vmx_netmap.h"
+#endif
+
 /* Tunables. */
 static int vmxnet3_mq_disable = 0;
 TUNABLE_INT("hw.vmx.mq_disable", &vmxnet3_mq_disable);
@@ -270,6 +274,9 @@ DRIVER_MODULE(vmx, pci, vmxnet3_driver, vmxnet3_devcla
 
 MODULE_DEPEND(vmx, pci, 1, 1, 1);
 MODULE_DEPEND(vmx, ether, 1, 1, 1);
+#ifdef DEV_NETMAP
+MODULE_DEPEND(vmx, netmap, 1, 1, 1);
+#endif
 
 #define VMXNET3_VMWARE_VENDOR_ID	0x15AD
 #define VMXNET3_VMWARE_DEVICE_ID	0x07B0
@@ -347,6 +354,10 @@ vmxnet3_attach(device_t dev)
 	vmxnet3_start_taskqueue(sc);
 #endif
 
+#ifdef DEV_NETMAP
+	vmxnet3_netmap_attach(sc);
+#endif
+
 fail:
 	if (error)
 		vmxnet3_detach(dev);
@@ -390,6 +401,10 @@ vmxnet3_detach(device_t dev)
 #endif
 	vmxnet3_free_interrupts(sc);
 
+#ifdef DEV_NETMAP
+	netmap_detach(ifp);
+#endif
+
 	if (ifp != NULL) {
 		if_free(ifp);
 		sc->vmx_ifp = NULL;
@@ -1846,6 +1861,11 @@ vmxnet3_txq_eof(struct vmxnet3_txqueue *txq)
 	txr = &txq->vxtxq_cmd_ring;
 	txc = &txq->vxtxq_comp_ring;
 
+#ifdef DEV_NETMAP
+	if (netmap_tx_irq(sc->vmx_ifp, txq - sc->vmx_txq) != NM_IRQ_PASS)
+		return;
+#endif
+
 	VMXNET3_TXQ_LOCK_ASSERT(txq);
 
 	for (;;) {
@@ -2111,6 +2131,15 @@ vmxnet3_rxq_eof(struct vmxnet3_rxqueue *rxq)
 	ifp = sc->vmx_ifp;
 	rxc = &rxq->vxrxq_comp_ring;
 
+#ifdef DEV_NETMAP
+	{
+		int dummy;
+		if (netmap_rx_irq(ifp, rxq - sc->vmx_rxq, &dummy) !=
+		    NM_IRQ_PASS)
+			return;
+	}
+#endif
+
 	VMXNET3_RXQ_LOCK_ASSERT(rxq);
 
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
@@ -2401,6 +2430,10 @@ vmxnet3_stop_rendezvous(struct vmxnet3_softc *sc)
 	struct vmxnet3_txqueue *txq;
 	int i;
 
+#ifdef DEV_NETMAP
+	netmap_disable_all_rings(sc->vmx_ifp);
+#endif
+
 	for (i = 0; i < sc->vmx_nrxqueues; i++) {
 		rxq = &sc->vmx_rxq[i];
 		VMXNET3_RXQ_LOCK(rxq);
@@ -2454,6 +2487,10 @@ vmxnet3_txinit(struct vmxnet3_softc *sc, struct vmxnet
 	bzero(txr->vxtxr_txd,
 	    txr->vxtxr_ndesc * sizeof(struct vmxnet3_txdesc));
 
+#ifdef DEV_NETMAP
+	vmxnet3_netmap_txq_init(sc, txq);
+#endif
+
 	txc = &txq->vxtxq_comp_ring;
 	txc->vxcr_next = 0;
 	txc->vxcr_gen = VMXNET3_INIT_GEN;
@@ -2468,6 +2505,10 @@ vmxnet3_rxinit(struct vmxnet3_softc *sc, struct vmxnet
 	struct vmxnet3_rxring *rxr;
 	struct vmxnet3_comp_ring *rxc;
 	int i, populate, idx, frame_size, error;
+#ifdef DEV_NETMAP
+	struct netmap_adapter *na;
+	struct netmap_slot *slot;
+#endif
 
 	ifp = sc->vmx_ifp;
 	frame_size = ETHER_ALIGN + sizeof(struct ether_vlan_header) +
@@ -2498,12 +2539,24 @@ vmxnet3_rxinit(struct vmxnet3_softc *sc, struct vmxnet
 	else
 		populate = VMXNET3_RXRINGS_PERQ;
 
+#ifdef DEV_NETMAP
+	na = NA(ifp);
+	slot = netmap_reset(na, NR_RX, rxq - sc->vmx_rxq, 0);
+#endif
+
 	for (i = 0; i < populate; i++) {
 		rxr = &rxq->vxrxq_cmd_ring[i];
 		rxr->vxrxr_fill = 0;
 		rxr->vxrxr_gen = VMXNET3_INIT_GEN;
 		bzero(rxr->vxrxr_rxd,
 		    rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc));
+#ifdef DEV_NETMAP
+		if (slot != NULL) {
+			vmxnet3_netmap_rxq_init(sc, rxq, rxr, slot);
+			i = populate;
+			break;
+		}
+#endif
 
 		for (idx = 0; idx < rxr->vxrxr_ndesc; idx++) {
 			error = vmxnet3_newbuf(sc, rxr);
@@ -2625,6 +2678,10 @@ vmxnet3_init_locked(struct vmxnet3_softc *sc)
 
 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
 	vmxnet3_link_status(sc);
+
+#ifdef DEV_NETMAP
+	netmap_enable_all_rings(ifp);
+#endif
 
 	vmxnet3_enable_all_intrs(sc);
 	callout_reset(&sc->vmx_tick, hz, vmxnet3_tick, sc);

Added: stable/11/sys/dev/vmware/vmxnet3/vmx_netmap.h
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ stable/11/sys/dev/vmware/vmxnet3/vmx_netmap.h	Tue Feb 19 10:07:48 2019	(r344272)
@@ -0,0 +1,376 @@
+/*
+ * Copyright (C) 2019 Universita` di Pisa.
+ * Sponsored by Sunny Valley Networks.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/* $FreeBSD$ */
+
+#include <net/netmap.h>
+#include <dev/netmap/netmap_kern.h>
+
+static int
+vmxnet3_netmap_reg(struct netmap_adapter *na, int onoff)
+{
+	struct ifnet *ifp = na->ifp;
+	struct vmxnet3_softc *sc = ifp->if_softc;
+
+	VMXNET3_CORE_LOCK(sc);
+	vmxnet3_stop(sc);
+	if (onoff) {
+		nm_set_native_flags(na);
+	} else {
+		nm_clear_native_flags(na);
+	}
+	vmxnet3_init_locked(sc);
+	VMXNET3_CORE_UNLOCK(sc);
+	return 0;
+}
+
+static void
+vmxnet3_netmap_rxq_init(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq,
+		struct vmxnet3_rxring *rxr, struct netmap_slot *slot)
+{
+	struct ifnet *ifp = sc->vmx_ifp;
+	struct netmap_adapter *na = NA(ifp);
+	struct vmxnet3_rxdesc *rxd;
+	int q, i;
+
+	q = rxq - sc->vmx_rxq;
+
+	for (i = 0; ; i++) {
+		int idx = rxr->vxrxr_fill;
+		int si = netmap_idx_n2k(na->rx_rings[q], idx);
+		struct vmxnet3_rxbuf  *rxb = &rxr->vxrxr_rxbuf[idx];
+		uint64_t paddr;
+		void *addr;
+
+		addr = PNMB(na, slot +  si, &paddr);
+		netmap_load_map(na, rxr->vxrxr_rxtag, rxb->vrxb_dmamap, addr);
+
+		rxd = &rxr->vxrxr_rxd[idx];
+		rxd->addr = paddr;
+		rxd->len = NETMAP_BUF_SIZE(na);
+		rxd->gen = rxr->vxrxr_gen ^ 1;
+		rxd->btype = VMXNET3_BTYPE_HEAD;
+		nm_prdis("%d: addr %lx len %u btype %u gen %u",
+			idx, rxd->addr, rxd->len, rxd->btype, rxd->gen);
+
+		if (i == rxr->vxrxr_ndesc -1)
+			break;
+
+		rxd->gen ^= 1;
+		vmxnet3_rxr_increment_fill(rxr);
+	}
+}
+
+static void
+vmxnet3_netmap_txq_init(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *txq)
+{
+	struct ifnet *ifp = sc->vmx_ifp;
+	struct netmap_adapter *na;
+	struct netmap_slot *slot;
+	struct vmxnet3_txring *txr;
+	int i, gen, q;
+
+	q = txq - sc->vmx_txq;
+
+	na = NA(ifp);
+
+	slot = netmap_reset(na, NR_TX, q, 0);
+	if (slot == NULL)
+		return;
+
+	txr = &txq->vxtxq_cmd_ring;
+	gen = txr->vxtxr_gen ^ 1;
+
+	for (i = 0; i < txr->vxtxr_ndesc; i++) {
+		int si = netmap_idx_n2k(na->tx_rings[q], i);
+		struct vmxnet3_txdesc *txd = &txr->vxtxr_txd[i];
+		uint64_t paddr;
+		void *addr;
+
+		addr = PNMB(na, slot +  si, &paddr);
+
+		txd->addr = paddr;
+		txd->len = 0;
+		txd->gen = gen;
+		txd->dtype = 0;
+		txd->offload_mode = VMXNET3_OM_NONE;
+		txd->offload_pos = 0;
+		txd->hlen = 0;
+		txd->eop = 0;
+		txd->compreq = 0;
+		txd->vtag_mode = 0;
+		txd->vtag = 0;
+
+		netmap_load_map(na, txr->vxtxr_txtag,
+				txr->vxtxr_txbuf[i].vtxb_dmamap, addr);
+	}
+}
+
+static int
+vmxnet3_netmap_txsync(struct netmap_kring *kring, int flags)
+{
+	struct netmap_adapter *na = kring->na;
+	struct ifnet *ifp = na->ifp;
+	struct netmap_ring *ring = kring->ring;
+	u_int nm_i;
+	u_int nic_i;
+	u_int n;
+	u_int const lim = kring->nkr_num_slots - 1;
+	u_int const head = kring->rhead;
+
+	/*
+	 * interrupts on every tx packet are expensive so request
+	 * them every half ring, or where NS_REPORT is set
+	 */
+	u_int report_frequency = kring->nkr_num_slots >> 1;
+	/* device specific */
+	struct vmxnet3_softc *sc = ifp->if_softc;
+	struct vmxnet3_txqueue *txq = &sc->vmx_txq[kring->ring_id];
+	struct vmxnet3_txring *txr = &txq->vxtxq_cmd_ring;
+	struct vmxnet3_comp_ring *txc = &txq->vxtxq_comp_ring;
+	struct vmxnet3_txcompdesc *txcd = txc->vxcr_u.txcd;
+	int gen = txr->vxtxr_gen;
+
+	/* no need to dma-sync the ring; memory barriers are sufficient */
+
+	nm_i = kring->nr_hwcur;
+	if (nm_i != head) {
+		nic_i = netmap_idx_k2n(kring, nm_i);
+		for (n = 0; nm_i != head; n++) {
+			struct netmap_slot *slot = &ring->slot[nm_i];
+			u_int len = slot->len;
+			uint64_t paddr;
+			void *addr = PNMB(na, slot, &paddr);
+			int compreq = !!(slot->flags & NS_REPORT ||
+				nic_i == 0 || nic_i == report_frequency);
+
+			/* device specific */
+			struct vmxnet3_txdesc *curr = &txr->vxtxr_txd[nic_i];
+			struct vmxnet3_txbuf *txbuf = &txr->vxtxr_txbuf[nic_i];
+
+			NM_CHECK_ADDR_LEN(na, addr, len);
+
+			/* fill the slot in the NIC ring */
+			curr->len = len;
+			curr->eop = 1; /* NS_MOREFRAG not supported */
+			curr->compreq = compreq;
+
+			if (slot->flags & NS_BUF_CHANGED) {
+				curr->addr = paddr;
+				netmap_reload_map(na, txr->vxtxr_txtag,
+						txbuf->vtxb_dmamap, addr);
+			}
+			slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
+
+			/* make sure changes to the buffer are synced */
+			bus_dmamap_sync(txr->vxtxr_txtag, txbuf->vtxb_dmamap,
+					BUS_DMASYNC_PREWRITE);
+
+			/* pass ownership */
+			vmxnet3_barrier(sc, VMXNET3_BARRIER_WR);
+			curr->gen = gen;
+
+			nm_i = nm_next(nm_i, lim);
+			nic_i++;
+			if (unlikely(nic_i == lim + 1)) {
+				nic_i = 0;
+				gen = txr->vxtxr_gen ^= 1;
+			}
+		}
+
+		vmxnet3_write_bar0(sc, VMXNET3_BAR0_TXH(txq->vxtxq_id), nic_i);
+	}
+	kring->nr_hwcur = nm_i;
+
+	/* reclaim completed packets */
+	for (;;) {
+		u_int sop;
+		struct vmxnet3_txbuf *txb;
+
+		txcd = &txc->vxcr_u.txcd[txc->vxcr_next];
+		if (txcd->gen != txc->vxcr_gen)
+			break;
+
+		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
+
+		if (++txc->vxcr_next == txc->vxcr_ndesc) {
+			txc->vxcr_next = 0;
+			txc->vxcr_gen ^= 1;
+		}
+
+		sop = txr->vxtxr_next;
+		txb = &txr->vxtxr_txbuf[sop];
+
+		bus_dmamap_sync(txr->vxtxr_txtag, txb->vtxb_dmamap,
+		   BUS_DMASYNC_POSTWRITE);
+
+		txr->vxtxr_next = (txcd->eop_idx + 1) % txr->vxtxr_ndesc;
+	}
+	kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, txr->vxtxr_next), lim);
+
+	return 0;
+}
+
+static int
+vmxnet3_netmap_rxsync(struct netmap_kring *kring, int flags)
+{
+	struct netmap_adapter *na = kring->na;
+	struct netmap_ring *ring = kring->ring;
+	u_int nm_i;
+	u_int nic_i;
+	u_int const lim = kring->nkr_num_slots - 1;
+	u_int const head = kring->rhead;
+	int force_update = (flags & NAF_FORCE_READ);
+
+	struct ifnet *ifp = na->ifp;
+	struct vmxnet3_softc *sc = ifp->if_softc;
+	struct vmxnet3_rxqueue *rxq = &sc->vmx_rxq[kring->ring_id];
+	struct vmxnet3_rxring *rxr;
+	struct vmxnet3_comp_ring *rxc;
+
+	if (head > lim)
+		return netmap_ring_reinit(kring);
+
+	rxr = &rxq->vxrxq_cmd_ring[0];
+
+	/* no need to dma-sync the ring; memory barriers are sufficient */
+
+	/* first part: import newly received packets */
+	if (netmap_no_pendintr || force_update) {
+		rxc = &rxq->vxrxq_comp_ring;
+		nm_i = kring->nr_hwtail;
+		nic_i = netmap_idx_k2n(kring, nm_i);
+		for (;;) {
+			struct vmxnet3_rxcompdesc *rxcd;
+			struct vmxnet3_rxbuf *rxb;
+
+			rxcd = &rxc->vxcr_u.rxcd[rxc->vxcr_next];
+
+			if (rxcd->gen != rxc->vxcr_gen)
+				break;
+			vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
+
+			while (__predict_false(rxcd->rxd_idx != nic_i)) {
+				nm_prlim(1, "%u skipped! idx %u", nic_i, rxcd->rxd_idx);
+				/* to shelter the application from this  we
+				 * would need to rotate the kernel-owned
+				 * portion of the netmap and nic rings. We
+				 * return len=0 for now and hope for the best.
+				 */
+				ring->slot[nm_i].len = 0;
+				nic_i = nm_next(nm_i, lim);
+				nm_i = nm_next(nm_i, lim);
+			}
+
+			rxb = &rxr->vxrxr_rxbuf[nic_i];
+
+			ring->slot[nm_i].len = rxcd->len;
+			ring->slot[nm_i].flags = 0;
+
+			bus_dmamap_sync(rxr->vxrxr_rxtag, rxb->vrxb_dmamap,
+					BUS_DMASYNC_POSTREAD);
+
+			nic_i = nm_next(nm_i, lim);
+			nm_i = nm_next(nm_i, lim);
+
+			rxc->vxcr_next++;
+			if (__predict_false(rxc->vxcr_next == rxc->vxcr_ndesc)) {
+				rxc->vxcr_next = 0;
+				rxc->vxcr_gen ^= 1;
+			}
+		}
+		kring->nr_hwtail = nm_i;
+	}
+	/* second part: skip past packets that userspace has released */
+	nm_i = kring->nr_hwcur;
+	if (nm_i != head) {
+		nic_i = netmap_idx_k2n(kring, nm_i);
+		while (nm_i != head) {
+			struct netmap_slot *slot = &ring->slot[nm_i];
+			struct vmxnet3_rxdesc *rxd_fill;
+			struct vmxnet3_rxbuf *rxbuf;
+
+			if (slot->flags & NS_BUF_CHANGED) {
+				uint64_t paddr;
+				void *addr = PNMB(na, slot, &paddr);
+				struct vmxnet3_rxdesc *rxd = &rxr->vxrxr_rxd[nic_i];
+
+
+				if (addr == NETMAP_BUF_BASE(na))
+					return netmap_ring_reinit(kring);
+
+				rxd->addr = paddr;
+				rxbuf = &rxr->vxrxr_rxbuf[nic_i];
+				netmap_reload_map(na, rxr->vxrxr_rxtag,
+						rxbuf->vrxb_dmamap, addr);
+				slot->flags &= ~NS_BUF_CHANGED;
+				vmxnet3_barrier(sc, VMXNET3_BARRIER_WR);
+			}
+
+			rxd_fill = &rxr->vxrxr_rxd[rxr->vxrxr_fill];
+			rxbuf = &rxr->vxrxr_rxbuf[rxr->vxrxr_fill];
+
+			bus_dmamap_sync(rxr->vxrxr_rxtag, rxbuf->vrxb_dmamap,
+					BUS_DMASYNC_PREREAD);
+
+			rxd_fill->gen = rxr->vxrxr_gen;
+			vmxnet3_rxr_increment_fill(rxr);
+
+			nm_i = nm_next(nm_i, lim);
+			nic_i = nm_next(nic_i, lim);
+		}
+		kring->nr_hwcur = head;
+		if (__predict_false(rxq->vxrxq_rs->update_rxhead)) {
+			vmxnet3_write_bar0(sc,
+				VMXNET3_BAR0_RXH1(kring->ring_id), rxr->vxrxr_fill);
+		}
+	}
+	return 0;
+}
+
+static void
+vmxnet3_netmap_attach(struct vmxnet3_softc *sc)
+{
+	struct netmap_adapter na;
+	int enable = 0;
+
+	if (getenv_int("vmxnet3.netmap_native", &enable) < 0 || !enable) {
+		return;
+	}
+
+	bzero(&na, sizeof(na));
+
+	na.ifp = sc->vmx_ifp;
+	na.na_flags = NAF_BDG_MAYSLEEP;
+	na.num_tx_desc = sc->vmx_ntxdescs;
+	na.num_rx_desc = sc->vmx_nrxdescs;
+	na.num_tx_rings = sc->vmx_ntxqueues;
+	na.num_rx_rings = sc->vmx_nrxqueues;
+	na.nm_register = vmxnet3_netmap_reg;
+	na.nm_txsync = vmxnet3_netmap_txsync;
+	na.nm_rxsync = vmxnet3_netmap_rxsync;
+	netmap_attach(&na);
+}


More information about the svn-src-all mailing list