svn commit: r368034 - stable/12/tools/tools/netmap

Vincenzo Maffione vmaffione at FreeBSD.org
Wed Nov 25 21:23:14 UTC 2020


Author: vmaffione
Date: Wed Nov 25 21:23:13 2020
New Revision: 368034
URL: https://svnweb.freebsd.org/changeset/base/368034

Log:
  MFC r367933
  
  netmap: lb: switch to libnetmap
  
  Use the newer libnetmap (included in base) rather than the older
  nm_open()/nm_close() defined in netmap_user.h

Modified:
  stable/12/tools/tools/netmap/lb.c
  stable/12/tools/tools/netmap/pkt_hash.c
Directory Properties:
  stable/12/   (props changed)

Modified: stable/12/tools/tools/netmap/lb.c
==============================================================================
--- stable/12/tools/tools/netmap/lb.c	Wed Nov 25 21:22:16 2020	(r368033)
+++ stable/12/tools/tools/netmap/lb.c	Wed Nov 25 21:23:13 2020	(r368034)
@@ -23,21 +23,22 @@
  * SUCH DAMAGE.
  */
 /* $FreeBSD$ */
-#include <stdio.h>
-#include <string.h>
 #include <ctype.h>
-#include <stdbool.h>
+#include <errno.h>
 #include <inttypes.h>
+#include <libnetmap.h>
+#include <netinet/in.h>		/* htonl */
+#include <pthread.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
 #include <syslog.h>
-
-#define NETMAP_WITH_LIBS
-#include <net/netmap_user.h>
+#include <sys/ioctl.h>
 #include <sys/poll.h>
+#include <unistd.h>
 
-#include <netinet/in.h>		/* htonl */
-
-#include <pthread.h>
-
 #include "pkt_hash.h"
 #include "ctrs.h"
 
@@ -86,12 +87,12 @@ struct compact_ipv6_hdr {
 #define DEF_BATCH	2048
 #define DEF_WAIT_LINK	2
 #define DEF_STATS_INT	600
-#define BUF_REVOKE	100
+#define BUF_REVOKE	150
 #define STAT_MSG_MAXSIZE 1024
 
 static struct {
-	char ifname[MAX_IFNAMELEN];
-	char base_name[MAX_IFNAMELEN];
+	char ifname[MAX_IFNAMELEN + 1];
+	char base_name[MAX_IFNAMELEN + 1];
 	int netmap_fd;
 	uint16_t output_rings;
 	uint16_t num_groups;
@@ -173,7 +174,7 @@ struct port_des {
 	unsigned int last_sync;
 	uint32_t last_tail;
 	struct overflow_queue *oq;
-	struct nm_desc *nmd;
+	struct nmport_d *nmd;
 	struct netmap_ring *ring;
 	struct group_des *group;
 };
@@ -375,7 +376,7 @@ free_buffers(void)
 	D("added %d buffers to netmap free list", tot);
 
 	for (i = 0; i < glob_arg.output_rings + 1; ++i) {
-		nm_close(ports[i].nmd);
+		nmport_close(ports[i].nmd);
 	}
 }
 
@@ -480,6 +481,28 @@ init_groups(void)
 	g->last = 1;
 }
 
+
+/* To support packets that span multiple slots (NS_MOREFRAG) we
+ * need to make sure of the following:
+ *
+ * - all fragments of the same packet must go to the same output pipe
+ * - when dropping, all fragments of the same packet must be dropped
+ *
+ * For the former point we remember and reuse the last hash computed
+ * in each input ring, and only update it when NS_MOREFRAG was not
+ * set in the last received slot (this marks the start of a new packet).
+ *
+ * For the latter point, we only update the output ring head pointer
+ * when an entire packet has been forwarded. We keep a shadow_head
+ * pointer to know where to put the next partial fragment and,
+ * when the need to drop arises, we roll it back to head.
+ */
+struct morefrag {
+	uint16_t last_flag;	/* for intput rings */
+	uint32_t last_hash;	/* for input rings */
+	uint32_t shadow_head;	/* for output rings */
+};
+
 /* push the packet described by slot rs to the group g.
  * This may cause other buffers to be pushed down the
  * chain headed by g.
@@ -493,21 +516,28 @@ forward_packet(struct group_des *g, struct netmap_slot
 	struct port_des *port = &g->ports[output_port];
 	struct netmap_ring *ring = port->ring;
 	struct overflow_queue *q = port->oq;
+	struct morefrag *mf = (struct morefrag *)ring->sem;
+	uint16_t curmf = rs->flags & NS_MOREFRAG;
 
 	/* Move the packet to the output pipe, unless there is
 	 * either no space left on the ring, or there is some
 	 * packet still in the overflow queue (since those must
 	 * take precedence over the new one)
 	*/
-	if (ring->head != ring->tail && (q == NULL || oq_empty(q))) {
-		struct netmap_slot *ts = &ring->slot[ring->head];
+	if (mf->shadow_head != ring->tail && (q == NULL || oq_empty(q))) {
+		struct netmap_slot *ts = &ring->slot[mf->shadow_head];
 		struct netmap_slot old_slot = *ts;
 
 		ts->buf_idx = rs->buf_idx;
 		ts->len = rs->len;
-		ts->flags |= NS_BUF_CHANGED;
+		ts->flags = rs->flags | NS_BUF_CHANGED;
 		ts->ptr = rs->ptr;
-		ring->head = nm_ring_next(ring, ring->head);
+		mf->shadow_head = nm_ring_next(ring, mf->shadow_head);
+		if (!curmf) {
+			ring->head = mf->shadow_head;
+		}
+		ND("curmf %2x ts->flags %2x shadow_head %3u head %3u tail %3u",
+				curmf, ts->flags, mf->shadow_head, ring->head, ring->tail);
 		port->ctr.bytes += rs->len;
 		port->ctr.pkts++;
 		forwarded++;
@@ -516,9 +546,20 @@ forward_packet(struct group_des *g, struct netmap_slot
 
 	/* use the overflow queue, if available */
 	if (q == NULL || oq_full(q)) {
+		uint32_t scan;
 		/* no space left on the ring and no overflow queue
 		 * available: we are forced to drop the packet
 		 */
+
+		/* drop previous fragments, if any */
+		for (scan = ring->head; scan != mf->shadow_head;
+				scan = nm_ring_next(ring, scan)) {
+			struct netmap_slot *ts = &ring->slot[scan];
+			dropped++;
+			port->ctr.drop_bytes += ts->len;
+		}
+		mf->shadow_head = ring->head;
+
 		dropped++;
 		port->ctr.drop++;
 		port->ctr.drop_bytes += rs->len;
@@ -550,9 +591,12 @@ forward_packet(struct group_des *g, struct netmap_slot
 
 		/* move the oldest BUF_REVOKE buffers from the
 		 * lp queue to the free queue
+		 *
+		 * We cannot revoke a partially received packet.
+		 * To make thinks simple we make sure to leave
+		 * at least NETMAP_MAX_FRAGS slots in the queue.
 		 */
-		// XXX optimize this cycle
-		for (j = 0; lp->oq->n && j < BUF_REVOKE; j++) {
+		for (j = 0; lp->oq->n > NETMAP_MAX_FRAGS && j < BUF_REVOKE; j++) {
 			struct netmap_slot tmp = oq_deq(lp->oq);
 
 			dropped++;
@@ -651,14 +695,6 @@ int main(int argc, char **argv)
 		return 1;
 	}
 
-	/* extract the base name */
-	char *nscan = strncmp(glob_arg.ifname, "netmap:", 7) ?
-			glob_arg.ifname : glob_arg.ifname + 7;
-	strncpy(glob_arg.base_name, nscan, MAX_IFNAMELEN - 1);
-	for (nscan = glob_arg.base_name; *nscan && !index("-*^{}/@", *nscan); nscan++)
-		;
-	*nscan = '\0';
-
 	if (glob_arg.num_groups == 0)
 		parse_pipes("");
 
@@ -678,6 +714,15 @@ int main(int argc, char **argv)
 		return 1;
 	}
 	struct port_des *rxport = &ports[npipes];
+
+	rxport->nmd = nmport_prepare(glob_arg.ifname);
+	if (rxport->nmd == NULL) {
+		D("cannot parse %s", glob_arg.ifname);
+		return (1);
+	}
+	/* extract the base name */
+	strncpy(glob_arg.base_name, rxport->nmd->hdr.nr_name, MAX_IFNAMELEN);
+
 	init_groups();
 
 	memset(&counters_buf, 0, sizeof(counters_buf));
@@ -687,24 +732,15 @@ int main(int argc, char **argv)
 		return 1;
 	}
 
-	/* we need base_req to specify pipes and extra bufs */
-	struct nmreq base_req;
-	memset(&base_req, 0, sizeof(base_req));
+	rxport->nmd->reg.nr_extra_bufs = glob_arg.extra_bufs;
 
-	base_req.nr_arg1 = npipes;
-	base_req.nr_arg3 = glob_arg.extra_bufs;
-
-	rxport->nmd = nm_open(glob_arg.ifname, &base_req, 0, NULL);
-
-	if (rxport->nmd == NULL) {
+	if (nmport_open_desc(rxport->nmd) < 0) {
 		D("cannot open %s", glob_arg.ifname);
 		return (1);
-	} else {
-		D("successfully opened %s (tx rings: %u)", glob_arg.ifname,
-		  rxport->nmd->req.nr_tx_slots);
 	}
+	D("successfully opened %s", glob_arg.ifname);
 
-	uint32_t extra_bufs = rxport->nmd->req.nr_arg3;
+	uint32_t extra_bufs = rxport->nmd->reg.nr_extra_bufs;
 	struct overflow_queue *oq = NULL;
 	/* reference ring to access the buffers */
 	rxport->ring = NETMAP_RXRING(rxport->nmd->nifp, 0);
@@ -772,25 +808,31 @@ run:
 			snprintf(p->interface, MAX_PORTNAMELEN, "%s%s{%d/xT@%d",
 					(strncmp(g->pipename, "vale", 4) ? "netmap:" : ""),
 					g->pipename, g->first_id + k,
-					rxport->nmd->req.nr_arg2);
+					rxport->nmd->reg.nr_mem_id);
 			D("opening pipe named %s", p->interface);
 
-			p->nmd = nm_open(p->interface, NULL, 0, rxport->nmd);
+			p->nmd = nmport_open(p->interface);
 
 			if (p->nmd == NULL) {
 				D("cannot open %s", p->interface);
 				return (1);
-			} else if (p->nmd->req.nr_arg2 != rxport->nmd->req.nr_arg2) {
+			} else if (p->nmd->mem != rxport->nmd->mem) {
 				D("failed to open pipe #%d in zero-copy mode, "
 					"please close any application that uses either pipe %s}%d, "
 				        "or %s{%d, and retry",
 					k + 1, g->pipename, g->first_id + k, g->pipename, g->first_id + k);
 				return (1);
 			} else {
+				struct morefrag *mf;
+
 				D("successfully opened pipe #%d %s (tx slots: %d)",
-				  k + 1, p->interface, p->nmd->req.nr_tx_slots);
+				  k + 1, p->interface, p->nmd->reg.nr_tx_slots);
 				p->ring = NETMAP_TXRING(p->nmd->nifp, 0);
 				p->last_tail = nm_ring_next(p->ring, p->ring->tail);
+				mf = (struct morefrag *)p->ring->sem;
+				mf->last_flag = 0;	/* unused */
+				mf->last_hash = 0;	/* unused */
+				mf->shadow_head = p->ring->head;
 			}
 			D("zerocopy %s",
 			  (rxport->nmd->mem == p->nmd->mem) ? "enabled" : "disabled");
@@ -843,6 +885,16 @@ run:
 	if (glob_arg.stdout_interval > 0 && glob_arg.stdout_interval < poll_timeout)
 		poll_timeout = glob_arg.stdout_interval;
 
+	/* initialize the morefrag structures for the input rings */
+	for (i = rxport->nmd->first_rx_ring; i <= rxport->nmd->last_rx_ring; i++) {
+		struct netmap_ring *rxring = NETMAP_RXRING(rxport->nmd->nifp, i);
+		struct morefrag *mf = (struct morefrag *)rxring->sem;
+
+		mf->last_flag = 0;
+		mf->last_hash = 0;
+		mf->shadow_head = 0; /* unused */
+	}
+
 	while (!do_abort) {
 		u_int polli = 0;
 		iter++;
@@ -871,7 +923,7 @@ run:
 		pollfd[polli].revents = 0;
 		++polli;
 
-		//RD(5, "polling %d file descriptors", polli+1);
+		ND(5, "polling %d file descriptors", polli);
 		rv = poll(pollfd, polli, poll_timeout);
 		if (rv <= 0) {
 			if (rv < 0 && errno != EAGAIN && errno != EINTR)
@@ -902,7 +954,7 @@ run:
 					struct netmap_slot *rs = &ring->slot[last];
 					// XXX less aggressive?
 					rs->buf_idx = forward_packet(g + 1, rs);
-					rs->flags |= NS_BUF_CHANGED;
+					rs->flags = NS_BUF_CHANGED;
 					rs->ptr = 0;
 				}
 				p->last_tail = last;
@@ -918,27 +970,34 @@ run:
 			for (i = 0; i < npipes; i++) {
 				struct port_des *p = &ports[i];
 				struct overflow_queue *q = p->oq;
-				uint32_t k, lim;
+				uint32_t k;
+				int64_t lim;
 				struct netmap_ring *ring;
 				struct netmap_slot *slot;
+				struct morefrag *mf;
 
 				if (oq_empty(q))
 					continue;
 				ring = p->ring;
-				lim = nm_ring_space(ring);
+				mf = (struct morefrag *)ring->sem;
+				lim = ring->tail - mf->shadow_head;
 				if (!lim)
 					continue;
+				if (lim < 0)
+					lim += ring->num_slots;
 				if (q->n < lim)
 					lim = q->n;
 				for (k = 0; k < lim; k++) {
 					struct netmap_slot s = oq_deq(q), tmp;
 					tmp.ptr = 0;
-					slot = &ring->slot[ring->head];
+					slot = &ring->slot[mf->shadow_head];
 					tmp.buf_idx = slot->buf_idx;
 					oq_enq(freeq, &tmp);
 					*slot = s;
 					slot->flags |= NS_BUF_CHANGED;
-					ring->head = nm_ring_next(ring, ring->head);
+					mf->shadow_head = nm_ring_next(ring, mf->shadow_head);
+					if (!(slot->flags & NS_MOREFRAG))
+						ring->head = mf->shadow_head;
 				}
 			}
 		}
@@ -947,6 +1006,7 @@ run:
 		int batch = 0;
 		for (i = rxport->nmd->first_rx_ring; i <= rxport->nmd->last_rx_ring; i++) {
 			struct netmap_ring *rxring = NETMAP_RXRING(rxport->nmd->nifp, i);
+			struct morefrag *mf = (struct morefrag *)rxring->sem;
 
 			//D("prepare to scan rings");
 			int next_head = rxring->head;
@@ -959,7 +1019,15 @@ run:
 				received_bytes += rs->len;
 
 				// CHOOSE THE CORRECT OUTPUT PIPE
-				rs->ptr = pkt_hdr_hash((const unsigned char *)next_buf, 4, 'B');
+				// If the previous slot had NS_MOREFRAG set, this is another
+				// fragment of the last packet and it should go to the same
+				// output pipe as before.
+				if (!mf->last_flag) {
+					// 'B' is just a hashing seed
+					mf->last_hash = pkt_hdr_hash((const unsigned char *)next_buf, 4, 'B');
+				}
+				mf->last_flag = rs->flags & NS_MOREFRAG;
+				rs->ptr = mf->last_hash;
 				if (rs->ptr == 0) {
 					non_ip++; // XXX ??
 				}
@@ -968,9 +1036,8 @@ run:
 				next_slot = &rxring->slot[next_head];
 				next_buf = NETMAP_BUF(rxring, next_slot->buf_idx);
 				__builtin_prefetch(next_buf);
-				// 'B' is just a hashing seed
 				rs->buf_idx = forward_packet(g, rs);
-				rs->flags |= NS_BUF_CHANGED;
+				rs->flags = NS_BUF_CHANGED;
 				rxring->head = rxring->cur = next_head;
 
 				batch++;

Modified: stable/12/tools/tools/netmap/pkt_hash.c
==============================================================================
--- stable/12/tools/tools/netmap/pkt_hash.c	Wed Nov 25 21:22:16 2020	(r368033)
+++ stable/12/tools/tools/netmap/pkt_hash.c	Wed Nov 25 21:23:13 2020	(r368034)
@@ -26,6 +26,7 @@
  ** POSSIBILITY OF SUCH DAMAGE.
  **/
 /* $FreeBSD$ */
+
 /* for func prototypes */
 #include "pkt_hash.h"
 


More information about the svn-src-all mailing list