svn commit: r261909 - in head: sys/conf sys/dev/netmap sys/modules/netmap sys/net tools/tools/netmap

Luigi Rizzo luigi at FreeBSD.org
Sat Feb 15 04:53:05 UTC 2014


Author: luigi
Date: Sat Feb 15 04:53:04 2014
New Revision: 261909
URL: http://svnweb.freebsd.org/changeset/base/261909

Log:
  This new version of netmap brings you the following:
  
  - netmap pipes, providing bidirectional blocking I/O while moving
    100+ Mpps between processes using shared memory channels
    (no mistake: over one hundred million. But mind you, i said
    *moving* not *processing*);
  
  - kqueue support (BHyVe needs it);
  
  - improved user library. Just the interface name lets you select a NIC,
    host port, VALE switch port, netmap pipe, and individual queues.
    The upcoming netmap-enabled libpcap will use this feature.
  
  - optional extra buffers associated to netmap ports, for applications
    that need to buffer data yet don't want to make copies.
  
  - segmentation offloading for the VALE switch, useful between VMs.
  
  and a number of bug fixes and performance improvements.
  
  My colleagues Giuseppe Lettieri and Vincenzo Maffione did a substantial
  amount of work on these features so we owe them a big thanks.
  
  There are some external repositories that can be of interest:
  
      https://code.google.com/p/netmap
          our public repository for netmap/VALE code, including
          linux versions and other stuff that does not belong here,
          such as python bindings.
  
      https://code.google.com/p/netmap-libpcap
          a clone of the libpcap repository with netmap support.
  	With this any libpcap client has access to most netmap
  	feature with no recompilation. E.g. tcpdump can filter
  	packets at 10-15 Mpps.
  
      https://code.google.com/p/netmap-ipfw
          a userspace version of ipfw+dummynet which uses netmap
          to send/receive packets. Speed is up in the 7-10 Mpps
          range per core for simple rulesets.
  
  Both netmap-libpcap and netmap-ipfw will be merged upstream at some
  point, but while this happens it is useful to have access to them.
  
  And yes, this code will be merged soon. It is infinitely better
  than the version currently in 10 and 9.
  
  MFC after:	3 days

Added:
  head/sys/dev/netmap/netmap_offloadings.c   (contents, props changed)
  head/sys/dev/netmap/netmap_pipe.c   (contents, props changed)
Deleted:
  head/tools/tools/netmap/click-test.cfg
  head/tools/tools/netmap/nm_util.c
  head/tools/tools/netmap/nm_util.h
  head/tools/tools/netmap/pcap.c
Modified:
  head/sys/conf/files
  head/sys/dev/netmap/netmap.c
  head/sys/dev/netmap/netmap_freebsd.c
  head/sys/dev/netmap/netmap_generic.c
  head/sys/dev/netmap/netmap_kern.h
  head/sys/dev/netmap/netmap_mem2.c
  head/sys/dev/netmap/netmap_mem2.h
  head/sys/dev/netmap/netmap_vale.c
  head/sys/modules/netmap/Makefile
  head/sys/net/netmap.h
  head/sys/net/netmap_user.h
  head/tools/tools/netmap/Makefile
  head/tools/tools/netmap/README
  head/tools/tools/netmap/bridge.c
  head/tools/tools/netmap/pkt-gen.c
  head/tools/tools/netmap/vale-ctl.c

Modified: head/sys/conf/files
==============================================================================
--- head/sys/conf/files	Sat Feb 15 02:30:34 2014	(r261908)
+++ head/sys/conf/files	Sat Feb 15 04:53:04 2014	(r261909)
@@ -1875,8 +1875,10 @@ dev/ncv/ncr53c500_pccard.c	optional ncv 
 dev/netmap/netmap.c		optional netmap
 dev/netmap/netmap_freebsd.c	optional netmap
 dev/netmap/netmap_generic.c	optional netmap
-dev/netmap/netmap_mbq.c	optional netmap
+dev/netmap/netmap_mbq.c		optional netmap
 dev/netmap/netmap_mem2.c	optional netmap
+dev/netmap/netmap_offloadings.c	optional netmap
+dev/netmap/netmap_pipe.c	optional netmap
 dev/netmap/netmap_vale.c	optional netmap
 # compile-with "${NORMAL_C} -Wconversion -Wextra"
 dev/nge/if_nge.c		optional nge

Modified: head/sys/dev/netmap/netmap.c
==============================================================================
--- head/sys/dev/netmap/netmap.c	Sat Feb 15 02:30:34 2014	(r261908)
+++ head/sys/dev/netmap/netmap.c	Sat Feb 15 04:53:04 2014	(r261909)
@@ -156,9 +156,11 @@ ports attached to the switch)
 
 
 /* reduce conditional code */
-#define init_waitqueue_head(x)	// only needed in linux
-
+// linux API, use for the knlist in FreeBSD
+#define init_waitqueue_head(x)	knlist_init_mtx(&(x)->si_note, NULL)
 
+void freebsd_selwakeup(struct selinfo *si, int pri);
+#define OS_selwakeup(a, b)	freebsd_selwakeup(a, b)
 
 #elif defined(linux)
 
@@ -231,6 +233,7 @@ static int netmap_admode = NETMAP_ADMODE
 
 int netmap_generic_mit = 100*1000;   /* Generic mitigation interval in nanoseconds. */
 int netmap_generic_ringsize = 1024;   /* Generic ringsize. */
+int netmap_generic_rings = 1;   /* number of queues in generic. */
 
 SYSCTL_INT(_dev_netmap, OID_AUTO, flags, CTLFLAG_RW, &netmap_flags, 0 , "");
 SYSCTL_INT(_dev_netmap, OID_AUTO, fwd, CTLFLAG_RW, &netmap_fwd, 0 , "");
@@ -238,6 +241,7 @@ SYSCTL_INT(_dev_netmap, OID_AUTO, mmap_u
 SYSCTL_INT(_dev_netmap, OID_AUTO, admode, CTLFLAG_RW, &netmap_admode, 0 , "");
 SYSCTL_INT(_dev_netmap, OID_AUTO, generic_mit, CTLFLAG_RW, &netmap_generic_mit, 0 , "");
 SYSCTL_INT(_dev_netmap, OID_AUTO, generic_ringsize, CTLFLAG_RW, &netmap_generic_ringsize, 0 , "");
+SYSCTL_INT(_dev_netmap, OID_AUTO, generic_rings, CTLFLAG_RW, &netmap_generic_rings, 0 , "");
 
 NMG_LOCK_T	netmap_global_lock;
 
@@ -270,28 +274,30 @@ netmap_set_all_rings(struct ifnet *ifp, 
 {
 	struct netmap_adapter *na;
 	int i;
+	u_int ntx, nrx;
 
 	if (!(ifp->if_capenable & IFCAP_NETMAP))
 		return;
 
 	na = NA(ifp);
 
-	for (i = 0; i <= na->num_tx_rings; i++) {
+	ntx = netmap_real_tx_rings(na);
+	nrx = netmap_real_rx_rings(na);
+
+	for (i = 0; i < ntx; i++) {
 		if (stopped)
 			netmap_disable_ring(na->tx_rings + i);
 		else
 			na->tx_rings[i].nkr_stopped = 0;
-		na->nm_notify(na, i, NR_TX, NAF_DISABLE_NOTIFY |
-			(i == na->num_tx_rings ? NAF_GLOBAL_NOTIFY: 0));
+		na->nm_notify(na, i, NR_TX, NAF_DISABLE_NOTIFY);
 	}
 
-	for (i = 0; i <= na->num_rx_rings; i++) {
+	for (i = 0; i < nrx; i++) {
 		if (stopped)
 			netmap_disable_ring(na->rx_rings + i);
 		else
 			na->rx_rings[i].nkr_stopped = 0;
-		na->nm_notify(na, i, NR_RX, NAF_DISABLE_NOTIFY |
-			(i == na->num_rx_rings ? NAF_GLOBAL_NOTIFY: 0));
+		na->nm_notify(na, i, NR_RX, NAF_DISABLE_NOTIFY);
 	}
 }
 
@@ -426,14 +432,73 @@ netmap_update_config(struct netmap_adapt
 	return 1;
 }
 
+static int
+netmap_txsync_compat(struct netmap_kring *kring, int flags)
+{
+	struct netmap_adapter *na = kring->na;
+	return na->nm_txsync(na, kring->ring_id, flags);
+}
+
+static int
+netmap_rxsync_compat(struct netmap_kring *kring, int flags)
+{
+	struct netmap_adapter *na = kring->na;
+	return na->nm_rxsync(na, kring->ring_id, flags);
+}
+
+static int
+netmap_txsync_to_host_compat(struct netmap_kring *kring, int flags)
+{
+	(void)flags;
+	netmap_txsync_to_host(kring->na);
+	return 0;
+}
+
+static int
+netmap_rxsync_from_host_compat(struct netmap_kring *kring, int flags)
+{
+	(void)flags;
+	netmap_rxsync_from_host(kring->na, NULL, NULL);
+	return 0;
+}
+
+
 
+/* create the krings array and initialize the fields common to all adapters.
+ * The array layout is this:
+ *
+ *                    +----------+
+ * na->tx_rings ----->|          | \
+ *                    |          |  } na->num_tx_ring
+ *                    |          | /
+ *                    +----------+
+ *                    |          |    host tx kring
+ * na->rx_rings ----> +----------+
+ *                    |          | \
+ *                    |          |  } na->num_rx_rings
+ *                    |          | /
+ *                    +----------+
+ *                    |          |    host rx kring
+ *                    +----------+
+ * na->tailroom ----->|          | \
+ *                    |          |  } tailroom bytes
+ *                    |          | /
+ *                    +----------+
+ *
+ * Note: for compatibility, host krings are created even when not needed.
+ * The tailroom space is currently used by vale ports for allocating leases.
+ */
 int
-netmap_krings_create(struct netmap_adapter *na, u_int ntx, u_int nrx, u_int tailroom)
+netmap_krings_create(struct netmap_adapter *na, u_int tailroom)
 {
 	u_int i, len, ndesc;
 	struct netmap_kring *kring;
+	u_int ntx, nrx;
+
+	/* account for the (possibly fake) host rings */
+	ntx = na->num_tx_rings + 1;
+	nrx = na->num_rx_rings + 1;
 
-	// XXX additional space for extra rings ?
 	len = (ntx + nrx) * sizeof(struct netmap_kring) + tailroom;
 
 	na->tx_rings = malloc((size_t)len, M_DEVBUF, M_NOWAIT | M_ZERO);
@@ -454,12 +519,19 @@ netmap_krings_create(struct netmap_adapt
 		kring->na = na;
 		kring->ring_id = i;
 		kring->nkr_num_slots = ndesc;
+		if (i < na->num_tx_rings) {
+			kring->nm_sync = netmap_txsync_compat; // XXX
+		} else if (i == na->num_tx_rings) {
+			kring->nm_sync = netmap_txsync_to_host_compat;
+		}
 		/*
 		 * IMPORTANT: Always keep one slot empty.
 		 */
 		kring->rhead = kring->rcur = kring->nr_hwcur = 0;
 		kring->rtail = kring->nr_hwtail = ndesc - 1;
 		snprintf(kring->name, sizeof(kring->name) - 1, "%s TX%d", NM_IFPNAME(na->ifp), i);
+		ND("ktx %s h %d c %d t %d",
+			kring->name, kring->rhead, kring->rcur, kring->rtail);
 		mtx_init(&kring->q_lock, "nm_txq_lock", NULL, MTX_DEF);
 		init_waitqueue_head(&kring->si);
 	}
@@ -471,9 +543,16 @@ netmap_krings_create(struct netmap_adapt
 		kring->na = na;
 		kring->ring_id = i;
 		kring->nkr_num_slots = ndesc;
+		if (i < na->num_rx_rings) {
+			kring->nm_sync = netmap_rxsync_compat; // XXX
+		} else if (i == na->num_rx_rings) {
+			kring->nm_sync = netmap_rxsync_from_host_compat;
+		}
 		kring->rhead = kring->rcur = kring->nr_hwcur = 0;
 		kring->rtail = kring->nr_hwtail = 0;
 		snprintf(kring->name, sizeof(kring->name) - 1, "%s RX%d", NM_IFPNAME(na->ifp), i);
+		ND("krx %s h %d c %d t %d",
+			kring->name, kring->rhead, kring->rcur, kring->rtail);
 		mtx_init(&kring->q_lock, "nm_rxq_lock", NULL, MTX_DEF);
 		init_waitqueue_head(&kring->si);
 	}
@@ -486,17 +565,15 @@ netmap_krings_create(struct netmap_adapt
 }
 
 
-/* XXX check boundaries */
+/* undo the actions performed by netmap_krings_create */
 void
 netmap_krings_delete(struct netmap_adapter *na)
 {
-	int i;
+	struct netmap_kring *kring = na->tx_rings;
 
-	for (i = 0; i < na->num_tx_rings + 1; i++) {
-		mtx_destroy(&na->tx_rings[i].q_lock);
-	}
-	for (i = 0; i < na->num_rx_rings + 1; i++) {
-		mtx_destroy(&na->rx_rings[i].q_lock);
+	/* we rely on the krings layout described above */
+	for ( ; kring != na->tailroom; kring++) {
+		mtx_destroy(&kring->q_lock);
 	}
 	free(na->tx_rings, M_DEVBUF);
 	na->tx_rings = na->rx_rings = na->tailroom = NULL;
@@ -677,6 +754,20 @@ netmap_do_unregif(struct netmap_priv_d *
 	netmap_mem_if_delete(na, nifp);
 }
 
+static __inline int
+nm_tx_si_user(struct netmap_priv_d *priv)
+{
+	return (priv->np_na != NULL &&
+		(priv->np_txqlast - priv->np_txqfirst > 1));
+}
+
+static __inline int
+nm_rx_si_user(struct netmap_priv_d *priv)
+{
+	return (priv->np_na != NULL &&
+		(priv->np_rxqlast - priv->np_rxqfirst > 1));
+}
+
 
 /*
  * returns 1 if this is the last instance and we can free priv
@@ -702,6 +793,10 @@ netmap_dtor_locked(struct netmap_priv_d 
 	priv->np_nifp = NULL;
 	netmap_drop_memory_locked(priv);
 	if (priv->np_na) {
+		if (nm_tx_si_user(priv))
+			na->tx_si_users--;
+		if (nm_rx_si_user(priv))
+			na->rx_si_users--;
 		netmap_adapter_put(na);
 		priv->np_na = NULL;
 	}
@@ -864,22 +959,8 @@ netmap_txsync_to_host(struct netmap_adap
 	struct netmap_kring *kring = &na->tx_rings[na->num_tx_rings];
 	struct netmap_ring *ring = kring->ring;
 	u_int const lim = kring->nkr_num_slots - 1;
-	u_int const head = nm_txsync_prologue(kring);
+	u_int const head = kring->rhead;
 	struct mbq q;
-	int error;
-
-	error = nm_kr_tryget(kring);
-	if (error) {
-		if (error == NM_KR_BUSY)
-			D("ring %p busy (user error)", kring);
-		return;
-	}
-	if (head > lim) {
-		D("invalid ring index in stack TX kring %p", kring);
-		netmap_ring_reinit(kring);
-		nm_kr_put(kring);
-		return;
-	}
 
 	/* Take packets from hwcur to head and pass them up.
 	 * force head = cur since netmap_grab_packets() stops at head
@@ -896,7 +977,6 @@ netmap_txsync_to_host(struct netmap_adap
 		kring->nr_hwtail -= lim + 1;
 	nm_txsync_finalize(kring);
 
-	nm_kr_put(kring);
 	netmap_send_up(na->ifp, &q);
 }
 
@@ -921,27 +1001,15 @@ netmap_rxsync_from_host(struct netmap_ad
 	struct netmap_ring *ring = kring->ring;
 	u_int nm_i, n;
 	u_int const lim = kring->nkr_num_slots - 1;
-	u_int const head = nm_rxsync_prologue(kring);
+	u_int const head = kring->rhead;
 	int ret = 0;
 	struct mbq *q = &kring->rx_queue;
 
 	(void)pwait;	/* disable unused warnings */
-
-	if (head > lim) {
-		netmap_ring_reinit(kring);
-		return EINVAL;
-	}
-
-	if (kring->nkr_stopped) /* check a first time without lock */
-		return EBUSY;
+	(void)td;
 
 	mtx_lock(&q->lock);
 
-	if (kring->nkr_stopped) {  /* check again with lock held */
-		ret = EBUSY;
-		goto unlock_out;
-	}
-
 	/* First part: import newly received packets */
 	n = mbq_len(q);
 	if (n) { /* grab packets from the queue */
@@ -982,8 +1050,6 @@ netmap_rxsync_from_host(struct netmap_ad
 	if (kring->rcur == kring->rtail && td) /* no bufs available */
 		selrecord(td, &kring->si);
 
-unlock_out:
-
 	mtx_unlock(&q->lock);
 	return ret;
 }
@@ -1107,19 +1173,26 @@ netmap_get_hw_na(struct ifnet *ifp, stru
 int
 netmap_get_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
 {
-	struct ifnet *ifp;
+	struct ifnet *ifp = NULL;
 	int error = 0;
-	struct netmap_adapter *ret;
+	struct netmap_adapter *ret = NULL;
 
 	*na = NULL;     /* default return value */
 
 	/* first try to see if this is a bridge port. */
 	NMG_LOCK_ASSERT();
 
+	error = netmap_get_pipe_na(nmr, na, create);
+	if (error || *na != NULL)
+		return error;
+
 	error = netmap_get_bdg_na(nmr, na, create);
-	if (error || *na != NULL) /* valid match in netmap_get_bdg_na() */
+	if (error)
 		return error;
 
+	if (*na != NULL) /* valid match in netmap_get_bdg_na() */
+		goto pipes;
+
 	ifp = ifunit_ref(nmr->nr_name);
 	if (ifp == NULL) {
 	        return ENXIO;
@@ -1129,18 +1202,23 @@ netmap_get_na(struct nmreq *nmr, struct 
 	if (error)
 		goto out;
 
-	if (ret != NULL) {
-		/* Users cannot use the NIC attached to a bridge directly */
-		if (NETMAP_OWNED_BY_KERN(ret)) {
-			error = EBUSY;
-			goto out;
-		}
-		error = 0;
-		*na = ret;
-		netmap_adapter_get(ret);
+	/* Users cannot use the NIC attached to a bridge directly */
+	if (NETMAP_OWNED_BY_KERN(ret)) {
+		error = EBUSY;
+		goto out;
 	}
+	*na = ret;
+	netmap_adapter_get(ret);
+
+pipes:
+	error = netmap_pipe_alloc(*na, nmr);
+
 out:
-	if_rele(ifp);
+	if (error && ret != NULL)
+		netmap_adapter_put(ret);
+
+	if (ifp)
+		if_rele(ifp);
 
 	return error;
 }
@@ -1365,45 +1443,88 @@ netmap_ring_reinit(struct netmap_kring *
  * for all rings is the same as a single ring.
  */
 static int
-netmap_set_ringid(struct netmap_priv_d *priv, u_int ringid)
+netmap_set_ringid(struct netmap_priv_d *priv, uint16_t ringid, uint32_t flags)
 {
 	struct netmap_adapter *na = priv->np_na;
-	struct ifnet *ifp = na->ifp;
-	u_int i = ringid & NETMAP_RING_MASK;
-	/* initially (np_qfirst == np_qlast) we don't want to lock */
-	u_int lim = na->num_rx_rings;
-
-	if (na->num_tx_rings > lim)
-		lim = na->num_tx_rings;
-	if ( (ringid & NETMAP_HW_RING) && i >= lim) {
-		D("invalid ring id %d", i);
-		return (EINVAL);
-	}
-	priv->np_ringid = ringid;
-	if (ringid & NETMAP_SW_RING) {
-		priv->np_qfirst = NETMAP_SW_RING;
-		priv->np_qlast = 0;
-	} else if (ringid & NETMAP_HW_RING) {
-		priv->np_qfirst = i;
-		priv->np_qlast = i + 1;
-	} else {
-		priv->np_qfirst = 0;
-		priv->np_qlast = NETMAP_HW_RING ;
+	u_int j, i = ringid & NETMAP_RING_MASK;
+	u_int reg = flags & NR_REG_MASK;
+
+	if (reg == NR_REG_DEFAULT) {
+		/* convert from old ringid to flags */
+		if (ringid & NETMAP_SW_RING) {
+			reg = NR_REG_SW;
+		} else if (ringid & NETMAP_HW_RING) {
+			reg = NR_REG_ONE_NIC;
+		} else {
+			reg = NR_REG_ALL_NIC;
+		}
+		D("deprecated API, old ringid 0x%x -> ringid %x reg %d", ringid, i, reg);
+	}
+	switch (reg) {
+	case NR_REG_ALL_NIC:
+	case NR_REG_PIPE_MASTER:
+	case NR_REG_PIPE_SLAVE:
+		priv->np_txqfirst = 0;
+		priv->np_txqlast = na->num_tx_rings;
+		priv->np_rxqfirst = 0;
+		priv->np_rxqlast = na->num_rx_rings;
+		ND("%s %d %d", "ALL/PIPE",
+			priv->np_rxqfirst, priv->np_rxqlast);
+		break;
+	case NR_REG_SW:
+	case NR_REG_NIC_SW:
+		if (!(na->na_flags & NAF_HOST_RINGS)) {
+			D("host rings not supported");
+			return EINVAL;
+		}
+		priv->np_txqfirst = (reg == NR_REG_SW ?
+			na->num_tx_rings : 0);
+		priv->np_txqlast = na->num_tx_rings + 1;
+		priv->np_rxqfirst = (reg == NR_REG_SW ?
+			na->num_rx_rings : 0);
+		priv->np_rxqlast = na->num_rx_rings + 1;
+		ND("%s %d %d", reg == NR_REG_SW ? "SW" : "NIC+SW",
+			priv->np_rxqfirst, priv->np_rxqlast);
+		break;
+	case NR_REG_ONE_NIC:
+		if (i >= na->num_tx_rings && i >= na->num_rx_rings) {
+			D("invalid ring id %d", i);
+			return EINVAL;
+		}
+		/* if not enough rings, use the first one */
+		j = i;
+		if (j >= na->num_tx_rings)
+			j = 0;
+		priv->np_txqfirst = j;
+		priv->np_txqlast = j + 1;
+		j = i;
+		if (j >= na->num_rx_rings)
+			j = 0;
+		priv->np_rxqfirst = j;
+		priv->np_rxqlast = j + 1;
+		break;
+	default:
+		D("invalid regif type %d", reg);
+		return EINVAL;
 	}
 	priv->np_txpoll = (ringid & NETMAP_NO_TX_POLL) ? 0 : 1;
-    if (netmap_verbose) {
-	if (ringid & NETMAP_SW_RING)
-		D("ringid %s set to SW RING", NM_IFPNAME(ifp));
-	else if (ringid & NETMAP_HW_RING)
-		D("ringid %s set to HW RING %d", NM_IFPNAME(ifp),
-			priv->np_qfirst);
-	else
-		D("ringid %s set to all %d HW RINGS", NM_IFPNAME(ifp), lim);
-    }
+	priv->np_flags = (flags & ~NR_REG_MASK) | reg;
+	if (nm_tx_si_user(priv))
+		na->tx_si_users++;
+	if (nm_rx_si_user(priv))
+		na->rx_si_users++;
+	if (netmap_verbose) {
+		D("%s: tx [%d,%d) rx [%d,%d) id %d", 
+			NM_IFPNAME(na->ifp),
+			priv->np_txqfirst,
+			priv->np_txqlast,
+			priv->np_rxqfirst,
+			priv->np_rxqlast,
+			i);
+	}
 	return 0;
 }
 
-
 /*
  * possibly move the interface to netmap-mode.
  * If success it returns a pointer to netmap_if, otherwise NULL.
@@ -1411,7 +1532,7 @@ netmap_set_ringid(struct netmap_priv_d *
  */
 struct netmap_if *
 netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na,
-	uint16_t ringid, int *err)
+	uint16_t ringid, uint32_t flags, int *err)
 {
 	struct ifnet *ifp = na->ifp;
 	struct netmap_if *nifp = NULL;
@@ -1421,7 +1542,7 @@ netmap_do_regif(struct netmap_priv_d *pr
 	/* ring configuration may have changed, fetch from the card */
 	netmap_update_config(na);
 	priv->np_na = na;     /* store the reference */
-	error = netmap_set_ringid(priv, ringid);
+	error = netmap_set_ringid(priv, ringid, flags);
 	if (error)
 		goto out;
 	/* ensure allocators are ready */
@@ -1501,26 +1622,12 @@ netmap_ioctl(struct cdev *dev, u_long cm
 	struct nmreq *nmr = (struct nmreq *) data;
 	struct netmap_adapter *na = NULL;
 	int error;
-	u_int i, lim;
+	u_int i, qfirst, qlast;
 	struct netmap_if *nifp;
 	struct netmap_kring *krings;
 
 	(void)dev;	/* UNUSED */
 	(void)fflag;	/* UNUSED */
-#ifdef linux
-#define devfs_get_cdevpriv(pp)				\
-	({ *(struct netmap_priv_d **)pp = ((struct file *)td)->private_data; 	\
-		(*pp ? 0 : ENOENT); })
-
-/* devfs_set_cdevpriv cannot fail on linux */
-#define devfs_set_cdevpriv(p, fn)				\
-	({ ((struct file *)td)->private_data = p; (p ? 0 : EINVAL); })
-
-
-#define devfs_clear_cdevpriv()	do {				\
-		netmap_dtor(priv); ((struct file *)td)->private_data = 0;	\
-	} while (0)
-#endif /* linux */
 
 	if (cmd == NIOCGINFO || cmd == NIOCREGIF) {
 		/* truncate name */
@@ -1530,6 +1637,9 @@ netmap_ioctl(struct cdev *dev, u_long cm
 				nmr->nr_name,
 				nmr->nr_version, NETMAP_API);
 			nmr->nr_version = NETMAP_API;
+		}
+		if (nmr->nr_version < NETMAP_MIN_API ||
+		    nmr->nr_version > NETMAP_MAX_API) {
 			return EINVAL;
 		}
 	}
@@ -1564,7 +1674,8 @@ netmap_ioctl(struct cdev *dev, u_long cm
 				nmd = na->nm_mem; /* get memory allocator */
 			}
 
-			error = netmap_mem_get_info(nmd, &nmr->nr_memsize, &memflags);
+			error = netmap_mem_get_info(nmd, &nmr->nr_memsize, &memflags,
+				&nmr->nr_arg2);
 			if (error)
 				break;
 			if (na == NULL) /* only memory info */
@@ -1576,8 +1687,6 @@ netmap_ioctl(struct cdev *dev, u_long cm
 			nmr->nr_tx_rings = na->num_tx_rings;
 			nmr->nr_rx_slots = na->num_rx_desc;
 			nmr->nr_tx_slots = na->num_tx_desc;
-			if (memflags & NETMAP_MEM_PRIVATE)
-				nmr->nr_ringid |= NETMAP_PRIV_MEM;
 			netmap_adapter_put(na);
 		} while (0);
 		NMG_UNLOCK();
@@ -1587,7 +1696,7 @@ netmap_ioctl(struct cdev *dev, u_long cm
 		/* possibly attach/detach NIC and VALE switch */
 		i = nmr->nr_cmd;
 		if (i == NETMAP_BDG_ATTACH || i == NETMAP_BDG_DETACH
-				|| i == NETMAP_BDG_OFFSET) {
+				|| i == NETMAP_BDG_VNET_HDR) {
 			error = netmap_bdg_ctl(nmr, NULL);
 			break;
 		} else if (i != 0) {
@@ -1602,7 +1711,7 @@ netmap_ioctl(struct cdev *dev, u_long cm
 			u_int memflags;
 
 			if (priv->np_na != NULL) {	/* thread already registered */
-				error = netmap_set_ringid(priv, nmr->nr_ringid);
+				error = EBUSY;
 				break;
 			}
 			/* find the interface and a reference */
@@ -1615,27 +1724,39 @@ netmap_ioctl(struct cdev *dev, u_long cm
 				error = EBUSY;
 				break;
 			}
-			nifp = netmap_do_regif(priv, na, nmr->nr_ringid, &error);
+			nifp = netmap_do_regif(priv, na, nmr->nr_ringid, nmr->nr_flags, &error);
 			if (!nifp) {    /* reg. failed, release priv and ref */
 				netmap_adapter_put(na);
 				priv->np_nifp = NULL;
 				break;
 			}
+			priv->np_td = td; // XXX kqueue, debugging only
 
 			/* return the offset of the netmap_if object */
 			nmr->nr_rx_rings = na->num_rx_rings;
 			nmr->nr_tx_rings = na->num_tx_rings;
 			nmr->nr_rx_slots = na->num_rx_desc;
 			nmr->nr_tx_slots = na->num_tx_desc;
-			error = netmap_mem_get_info(na->nm_mem, &nmr->nr_memsize, &memflags);
+			error = netmap_mem_get_info(na->nm_mem, &nmr->nr_memsize, &memflags,
+				&nmr->nr_arg2);
 			if (error) {
 				netmap_adapter_put(na);
 				break;
 			}
 			if (memflags & NETMAP_MEM_PRIVATE) {
-				nmr->nr_ringid |= NETMAP_PRIV_MEM;
 				*(uint32_t *)(uintptr_t)&nifp->ni_flags |= NI_PRIV_MEM;
 			}
+			priv->np_txsi = (priv->np_txqlast - priv->np_txqfirst > 1) ?
+				&na->tx_si : &na->tx_rings[priv->np_txqfirst].si;
+			priv->np_rxsi = (priv->np_rxqlast - priv->np_rxqfirst > 1) ?
+				&na->rx_si : &na->rx_rings[priv->np_rxqfirst].si;
+
+			if (nmr->nr_arg3) {
+				D("requested %d extra buffers", nmr->nr_arg3);
+				nmr->nr_arg3 = netmap_extra_alloc(na,
+					&nifp->ni_bufs_head, nmr->nr_arg3);
+				D("got %d extra buffers", nmr->nr_arg3);
+			}
 			nmr->nr_offset = netmap_mem_if_offset(na->nm_mem, nifp);
 		} while (0);
 		NMG_UNLOCK();
@@ -1666,21 +1787,17 @@ netmap_ioctl(struct cdev *dev, u_long cm
 			break;
 		}
 
-		if (priv->np_qfirst == NETMAP_SW_RING) { /* host rings */
-			if (cmd == NIOCTXSYNC)
-				netmap_txsync_to_host(na);
-			else
-				netmap_rxsync_from_host(na, NULL, NULL);
-			break;
+		if (cmd == NIOCTXSYNC) {
+			krings = na->tx_rings;
+			qfirst = priv->np_txqfirst;
+			qlast = priv->np_txqlast;
+		} else {
+			krings = na->rx_rings;
+			qfirst = priv->np_rxqfirst;
+			qlast = priv->np_rxqlast;
 		}
-		/* find the last ring to scan */
-		lim = priv->np_qlast;
-		if (lim == NETMAP_HW_RING)
-			lim = (cmd == NIOCTXSYNC) ?
-			    na->num_tx_rings : na->num_rx_rings;
 
-		krings = (cmd == NIOCTXSYNC) ? na->tx_rings : na->rx_rings;
-		for (i = priv->np_qfirst; i < lim; i++) {
+		for (i = qfirst; i < qlast; i++) {
 			struct netmap_kring *kring = krings + i;
 			if (nm_kr_tryget(kring)) {
 				error = EBUSY;
@@ -1694,14 +1811,14 @@ netmap_ioctl(struct cdev *dev, u_long cm
 				if (nm_txsync_prologue(kring) >= kring->nkr_num_slots) {
 					netmap_ring_reinit(kring);
 				} else {
-					na->nm_txsync(na, i, NAF_FORCE_RECLAIM);
+					kring->nm_sync(kring, NAF_FORCE_RECLAIM);
 				}
 				if (netmap_verbose & NM_VERB_TXSYNC)
 					D("post txsync ring %d cur %d hwcur %d",
 					    i, kring->ring->cur,
 					    kring->nr_hwcur);
 			} else {
-				na->nm_rxsync(na, i, NAF_FORCE_READ);
+				kring->nm_sync(kring, NAF_FORCE_READ);
 				microtime(&na->rx_rings[i].ring->ts);
 			}
 			nm_kr_put(kring);
@@ -1772,9 +1889,9 @@ netmap_poll(struct cdev *dev, int events
 	struct ifnet *ifp;
 	struct netmap_kring *kring;
 	u_int i, check_all_tx, check_all_rx, want_tx, want_rx, revents = 0;
-	u_int lim_tx, lim_rx;
 	struct mbq q;		/* packets from hw queues to host stack */
 	void *pwait = dev;	/* linux compatibility */
+	int is_kevent = 0;
 
 	/*
 	 * In order to avoid nested locks, we need to "double check"
@@ -1786,7 +1903,19 @@ netmap_poll(struct cdev *dev, int events
 	(void)pwait;
 	mbq_init(&q);
 
-	if (devfs_get_cdevpriv((void **)&priv) != 0 || priv == NULL)
+	/*
+	 * XXX kevent has curthread->tp_fop == NULL,
+	 * so devfs_get_cdevpriv() fails. We circumvent this by passing
+	 * priv as the first argument, which is also useful to avoid
+	 * the selrecord() which are not necessary in that case.
+	 */
+	if (devfs_get_cdevpriv((void **)&priv) != 0) {
+		is_kevent = 1;
+		if (netmap_verbose)
+			D("called from kevent");
+		priv = (struct netmap_priv_d *)dev;
+	}
+	if (priv == NULL)
 		return POLLERR;
 
 	if (priv->np_nifp == NULL) {
@@ -1811,28 +1940,6 @@ netmap_poll(struct cdev *dev, int events
 	want_tx = events & (POLLOUT | POLLWRNORM);
 	want_rx = events & (POLLIN | POLLRDNORM);
 
-	lim_tx = na->num_tx_rings;
-	lim_rx = na->num_rx_rings;
-
-	if (priv->np_qfirst == NETMAP_SW_RING) {
-		// XXX locking ?
-		/* handle the host stack ring */
-		if (priv->np_txpoll || want_tx) {
-			/* push any packets up, then we are always ready */
-			netmap_txsync_to_host(na);
-			revents |= want_tx;
-		}
-		if (want_rx) {
-			kring = &na->rx_rings[lim_rx];
-			/* XXX replace with rxprologue etc. */
-			if (nm_ring_empty(kring->ring))
-				netmap_rxsync_from_host(na, td, dev);
-			if (!nm_ring_empty(kring->ring))
-				revents |= want_rx;
-		}
-		return (revents);
-	}
-
 
 	/*
 	 * check_all_{tx|rx} are set if the card has more than one queue AND
@@ -1847,19 +1954,15 @@ netmap_poll(struct cdev *dev, int events
 	 * there are pending packets to send. The latter can be disabled
 	 * passing NETMAP_NO_TX_POLL in the NIOCREG call.
 	 */
-	check_all_tx = (priv->np_qlast == NETMAP_HW_RING) && (lim_tx > 1);
-	check_all_rx = (priv->np_qlast == NETMAP_HW_RING) && (lim_rx > 1);
-
-	if (priv->np_qlast != NETMAP_HW_RING) {
-		lim_tx = lim_rx = priv->np_qlast;
-	}
+	check_all_tx = nm_tx_si_user(priv);
+	check_all_rx = nm_rx_si_user(priv);
 
 	/*
 	 * We start with a lock free round which is cheap if we have
 	 * slots available. If this fails, then lock and call the sync
 	 * routines.
 	 */
-	for (i = priv->np_qfirst; want_rx && i < lim_rx; i++) {
+	for (i = priv->np_rxqfirst; want_rx && i < priv->np_rxqlast; i++) {
 		kring = &na->rx_rings[i];
 		/* XXX compare ring->cur and kring->tail */
 		if (!nm_ring_empty(kring->ring)) {
@@ -1867,7 +1970,7 @@ netmap_poll(struct cdev *dev, int events
 			want_rx = 0;	/* also breaks the loop */
 		}
 	}
-	for (i = priv->np_qfirst; want_tx && i < lim_tx; i++) {
+	for (i = priv->np_txqfirst; want_tx && i < priv->np_txqlast; i++) {
 		kring = &na->tx_rings[i];
 		/* XXX compare ring->cur and kring->tail */
 		if (!nm_ring_empty(kring->ring)) {
@@ -1891,7 +1994,7 @@ netmap_poll(struct cdev *dev, int events
 		 * used to skip rings with no pending transmissions.
 		 */
 flush_tx:
-		for (i = priv->np_qfirst; i < lim_tx; i++) {
+		for (i = priv->np_txqfirst; i < priv->np_txqlast; i++) {
 			int found = 0;
 
 			kring = &na->tx_rings[i];
@@ -1906,7 +2009,7 @@ flush_tx:
 				netmap_ring_reinit(kring);
 				revents |= POLLERR;
 			} else {
-				if (na->nm_txsync(na, i, 0))
+				if (kring->nm_sync(kring, 0))
 					revents |= POLLERR;
 			}
 
@@ -1921,12 +2024,12 @@ flush_tx:
 			if (found) { /* notify other listeners */
 				revents |= want_tx;
 				want_tx = 0;
-				na->nm_notify(na, i, NR_TX, NAF_GLOBAL_NOTIFY);
+				na->nm_notify(na, i, NR_TX, 0);
 			}
 		}
-		if (want_tx && retry_tx) {
+		if (want_tx && retry_tx && !is_kevent) {
 			selrecord(td, check_all_tx ?
-			    &na->tx_si : &na->tx_rings[priv->np_qfirst].si);
+			    &na->tx_si : &na->tx_rings[priv->np_txqfirst].si);
 			retry_tx = 0;
 			goto flush_tx;
 		}
@@ -1940,7 +2043,7 @@ flush_tx:
 		int send_down = 0; /* transparent mode */
 		/* two rounds here to for race avoidance */
 do_retry_rx:
-		for (i = priv->np_qfirst; i < lim_rx; i++) {
+		for (i = priv->np_rxqfirst; i < priv->np_rxqlast; i++) {
 			int found = 0;
 
 			kring = &na->rx_rings[i];
@@ -1962,7 +2065,7 @@ do_retry_rx:
 				netmap_grab_packets(kring, &q, netmap_fwd);
 			}
 
-			if (na->nm_rxsync(na, i, 0))
+			if (kring->nm_sync(kring, 0))
 				revents |= POLLERR;
 			if (netmap_no_timestamp == 0 ||
 					kring->ring->flags & NR_TIMESTAMP) {
@@ -1974,24 +2077,26 @@ do_retry_rx:
 			if (found) {
 				revents |= want_rx;
 				retry_rx = 0;
-				na->nm_notify(na, i, NR_RX, NAF_GLOBAL_NOTIFY);
+				na->nm_notify(na, i, NR_RX, 0);
 			}
 		}
 
 		/* transparent mode XXX only during first pass ? */
-		kring = &na->rx_rings[lim_rx];
-		if (check_all_rx
-		    && (netmap_fwd || kring->ring->flags & NR_FORWARD)) {
-			/* XXX fix to use kring fields */
-			if (nm_ring_empty(kring->ring))
-				send_down = netmap_rxsync_from_host(na, td, dev);
-			if (!nm_ring_empty(kring->ring))
-				revents |= want_rx;
+		if (na->na_flags & NAF_HOST_RINGS) {
+			kring = &na->rx_rings[na->num_rx_rings];
+			if (check_all_rx
+			    && (netmap_fwd || kring->ring->flags & NR_FORWARD)) {
+				/* XXX fix to use kring fields */
+				if (nm_ring_empty(kring->ring))
+					send_down = netmap_rxsync_from_host(na, td, dev);
+				if (!nm_ring_empty(kring->ring))
+					revents |= want_rx;
+			}
 		}
 
-		if (retry_rx)
+		if (retry_rx && !is_kevent)
 			selrecord(td, check_all_rx ?
-			    &na->rx_si : &na->rx_rings[priv->np_qfirst].si);
+			    &na->rx_si : &na->rx_rings[priv->np_rxqfirst].si);
 		if (send_down > 0 || retry_rx) {
 			retry_rx = 0;
 			if (send_down)
@@ -2032,14 +2137,14 @@ netmap_notify(struct netmap_adapter *na,
 
 	if (tx == NR_TX) {
 		kring = na->tx_rings + n_ring;
-		selwakeuppri(&kring->si, PI_NET);
-		if (flags & NAF_GLOBAL_NOTIFY)
-			selwakeuppri(&na->tx_si, PI_NET);
+		OS_selwakeup(&kring->si, PI_NET);
+		if (na->tx_si_users > 0)
+			OS_selwakeup(&na->tx_si, PI_NET);
 	} else {
 		kring = na->rx_rings + n_ring;
-		selwakeuppri(&kring->si, PI_NET);
-		if (flags & NAF_GLOBAL_NOTIFY)
-			selwakeuppri(&na->rx_si, PI_NET);
+		OS_selwakeup(&kring->si, PI_NET);
+		if (na->rx_si_users > 0)
+			OS_selwakeup(&na->rx_si, PI_NET);
 	}
 	return 0;
 }
@@ -2090,6 +2195,7 @@ netmap_detach_common(struct netmap_adapt
 		D("freeing leftover tx_rings");
 		na->nm_krings_delete(na);
 	}
+	netmap_pipe_dealloc(na);
 	if (na->na_flags & NAF_MEM_OWNER)
 		netmap_mem_private_delete(na->nm_mem);
 	bzero(na, sizeof(*na));
@@ -2120,6 +2226,7 @@ netmap_attach(struct netmap_adapter *arg
 	if (hwna == NULL)
 		goto fail;
 	hwna->up = *arg;
+	hwna->up.na_flags |= NAF_HOST_RINGS;
 	if (netmap_attach_common(&hwna->up)) {
 		free(hwna, M_DEVBUF);
 		goto fail;
@@ -2177,12 +2284,10 @@ NM_DBG(netmap_adapter_put)(struct netmap
 	return 1;
 }
 
-
 int
 netmap_hw_krings_create(struct netmap_adapter *na)
 {
-	int ret = netmap_krings_create(na,
-		na->num_tx_rings + 1, na->num_rx_rings + 1, 0);
+	int ret = netmap_krings_create(na, 0);
 	if (ret == 0) {
 		/* initialize the mbq for the sw rx ring */
 		mbq_safe_init(&na->rx_rings[na->num_rx_rings].rx_queue);
@@ -2370,7 +2475,7 @@ netmap_reset(struct netmap_adapter *na, 
 	 * We do the wakeup here, but the ring is not yet reconfigured.
 	 * However, we are under lock so there are no races.
 	 */
-	na->nm_notify(na, n, tx, NAF_GLOBAL_NOTIFY);
+	na->nm_notify(na, n, tx, 0);
 	return kring->ring->slot;
 }
 
@@ -2405,15 +2510,13 @@ netmap_common_irq(struct ifnet *ifp, u_i
 			return;	// not a physical queue
 		kring = na->rx_rings + q;
 		kring->nr_kflags |= NKR_PENDINTR;	// XXX atomic ?
-		na->nm_notify(na, q, NR_RX,
-			(na->num_rx_rings > 1 ? NAF_GLOBAL_NOTIFY : 0));
+		na->nm_notify(na, q, NR_RX, 0);
 		*work_done = 1; /* do not fire napi again */
 	} else { /* TX path */
 		if (q >= na->num_tx_rings)
 			return;	// not a physical queue
 		kring = na->tx_rings + q;
-		na->nm_notify(na, q, NR_TX,
-			(na->num_tx_rings > 1 ? NAF_GLOBAL_NOTIFY : 0));
+		na->nm_notify(na, q, NR_TX, 0);
 	}
 }
 

Modified: head/sys/dev/netmap/netmap_freebsd.c
==============================================================================
--- head/sys/dev/netmap/netmap_freebsd.c	Sat Feb 15 02:30:34 2014	(r261908)
+++ head/sys/dev/netmap/netmap_freebsd.c	Sat Feb 15 04:53:04 2014	(r261909)
@@ -29,8 +29,10 @@
 #include <sys/module.h>
 #include <sys/errno.h>
 #include <sys/param.h>  /* defines used in kernel.h */
+#include <sys/poll.h>  /* POLLIN, POLLOUT */
 #include <sys/kernel.h> /* types used in module initialization */
 #include <sys/conf.h>	/* DEV_MODULE */
+#include <sys/endian.h>
 
 #include <sys/rwlock.h>
 
@@ -49,6 +51,8 @@
 #include <net/if.h>
 #include <net/if_var.h>
 #include <machine/bus.h>        /* bus_dmamap_* */
+#include <netinet/in.h>		/* in6_cksum_pseudo() */
+#include <machine/in_cksum.h>  /* in_pseudo(), in_cksum_hdr() */
 
 #include <net/netmap.h>
 #include <dev/netmap/netmap_kern.h>
@@ -57,6 +61,73 @@
 
 /* ======================== FREEBSD-SPECIFIC ROUTINES ================== */
 
+rawsum_t nm_csum_raw(uint8_t *data, size_t len, rawsum_t cur_sum)
+{
+	/* TODO XXX please use the FreeBSD implementation for this. */
+	uint16_t *words = (uint16_t *)data;
+	int nw = len / 2;
+	int i;
+
+	for (i = 0; i < nw; i++)
+		cur_sum += be16toh(words[i]);
+
+	if (len & 1)
+		cur_sum += (data[len-1] << 8);
+
+	return cur_sum;
+}
+
+/* Fold a raw checksum: 'cur_sum' is in host byte order, while the
+ * return value is in network byte order.
+ */
+uint16_t nm_csum_fold(rawsum_t cur_sum)
+{
+	/* TODO XXX please use the FreeBSD implementation for this. */

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-all mailing list