svn commit: r343689 - in head/sys: dev/netmap net

Vincenzo Maffione vmaffione at FreeBSD.org
Sat Feb 2 22:39:31 UTC 2019


Author: vmaffione
Date: Sat Feb  2 22:39:29 2019
New Revision: 343689
URL: https://svnweb.freebsd.org/changeset/base/343689

Log:
  netmap: upgrade sync-kloop support
  
  Add SYNC_KLOOP_MODE option, and add support for direct mode, where application
  executes the TXSYNC and RXSYNC in the context of the ioeventfd wake up callback.
  
  MFC after:	5 days

Modified:
  head/sys/dev/netmap/netmap.c
  head/sys/dev/netmap/netmap_kern.h
  head/sys/dev/netmap/netmap_kloop.c
  head/sys/net/netmap.h

Modified: head/sys/dev/netmap/netmap.c
==============================================================================
--- head/sys/dev/netmap/netmap.c	Sat Feb  2 21:14:53 2019	(r343688)
+++ head/sys/dev/netmap/netmap.c	Sat Feb  2 22:39:29 2019	(r343689)
@@ -2971,6 +2971,9 @@ nmreq_opt_size_by_type(uint32_t nro_reqtype, uint64_t 
 	case NETMAP_REQ_OPT_CSB:
 		rv = sizeof(struct nmreq_opt_csb);
 		break;
+	case NETMAP_REQ_OPT_SYNC_KLOOP_MODE:
+		rv = sizeof(struct nmreq_opt_sync_kloop_mode);
+		break;
 	}
 	/* subtract the common header */
 	return rv - sizeof(struct nmreq_option);
@@ -3287,10 +3290,8 @@ netmap_poll(struct netmap_priv_d *priv, int events, NM
 	 * there are pending packets to send. The latter can be disabled
 	 * passing NETMAP_NO_TX_POLL in the NIOCREG call.
 	 */
-	si[NR_RX] = nm_si_user(priv, NR_RX) ? &na->si[NR_RX] :
-				&na->rx_rings[priv->np_qfirst[NR_RX]]->si;
-	si[NR_TX] = nm_si_user(priv, NR_TX) ? &na->si[NR_TX] :
-				&na->tx_rings[priv->np_qfirst[NR_TX]]->si;
+	si[NR_RX] = priv->np_si[NR_RX];
+	si[NR_TX] = priv->np_si[NR_TX];
 
 #ifdef __FreeBSD__
 	/*

Modified: head/sys/dev/netmap/netmap_kern.h
==============================================================================
--- head/sys/dev/netmap/netmap_kern.h	Sat Feb  2 21:14:53 2019	(r343688)
+++ head/sys/dev/netmap/netmap_kern.h	Sat Feb  2 22:39:29 2019	(r343689)
@@ -1169,6 +1169,15 @@ nm_kr_txempty(struct netmap_kring *kring)
  * rxsync_prologue */
 #define nm_kr_rxempty(_k)	nm_kr_txempty(_k)
 
+/* True if the application needs to wait for more space on the ring
+ * (more received packets or more free tx slots).
+ * Only valid after *xsync_prologue. */
+static inline int
+nm_kr_wouldblock(struct netmap_kring *kring)
+{
+	return kring->rcur == kring->nr_hwtail;
+}
+
 /*
  * protect against multiple threads using the same ring.
  * also check that the ring has not been stopped or locked

Modified: head/sys/dev/netmap/netmap_kloop.c
==============================================================================
--- head/sys/dev/netmap/netmap_kloop.c	Sat Feb  2 21:14:53 2019	(r343688)
+++ head/sys/dev/netmap/netmap_kloop.c	Sat Feb  2 22:39:29 2019	(r343689)
@@ -141,6 +141,9 @@ sync_kloop_kring_dump(const char *title, const struct 
 		kring->rcur, kring->rtail, kring->nr_hwtail);
 }
 
+/* Arguments for netmap_sync_kloop_tx_ring() and
+ * netmap_sync_kloop_rx_ring().
+ */
 struct sync_kloop_ring_args {
 	struct netmap_kring *kring;
 	struct nm_csb_atok *csb_atok;
@@ -148,6 +151,10 @@ struct sync_kloop_ring_args {
 #ifdef SYNC_KLOOP_POLL
 	struct eventfd_ctx *irq_ctx;
 #endif /* SYNC_KLOOP_POLL */
+	/* Are we busy waiting rather than using a schedule() loop ? */
+	bool busy_wait;
+	/* Are we processing in the context of VM exit ? */
+	bool direct;
 };
 
 static void
@@ -161,10 +168,16 @@ netmap_sync_kloop_tx_ring(const struct sync_kloop_ring
 	uint32_t num_slots;
 	int batch;
 
+	if (unlikely(nm_kr_tryget(kring, 1, NULL))) {
+		return;
+	}
+
 	num_slots = kring->nkr_num_slots;
 
 	/* Disable application --> kernel notifications. */
-	csb_ktoa_kick_enable(csb_ktoa, 0);
+	if (!a->direct) {
+		csb_ktoa_kick_enable(csb_ktoa, 0);
+	}
 	/* Copy the application kring pointers from the CSB */
 	sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots);
 
@@ -197,7 +210,9 @@ netmap_sync_kloop_tx_ring(const struct sync_kloop_ring
 		if (unlikely(nm_txsync_prologue(kring, &shadow_ring) >= num_slots)) {
 			/* Reinit ring and enable notifications. */
 			netmap_ring_reinit(kring);
-			csb_ktoa_kick_enable(csb_ktoa, 1);
+			if (!a->busy_wait) {
+				csb_ktoa_kick_enable(csb_ktoa, 1);
+			}
 			break;
 		}
 
@@ -206,8 +221,10 @@ netmap_sync_kloop_tx_ring(const struct sync_kloop_ring
 		}
 
 		if (unlikely(kring->nm_sync(kring, shadow_ring.flags))) {
-			/* Reenable notifications. */
-			csb_ktoa_kick_enable(csb_ktoa, 1);
+			if (!a->busy_wait) {
+				/* Reenable notifications. */
+				csb_ktoa_kick_enable(csb_ktoa, 1);
+			}
 			nm_prerr("txsync() failed");
 			break;
 		}
@@ -232,7 +249,8 @@ netmap_sync_kloop_tx_ring(const struct sync_kloop_ring
 		/* Interrupt the application if needed. */
 #ifdef SYNC_KLOOP_POLL
 		if (a->irq_ctx && more_txspace && csb_atok_intr_enabled(csb_atok)) {
-			/* Disable application kick to avoid sending unnecessary kicks */
+			/* We could disable kernel --> application kicks here,
+			 * to avoid spurious interrupts. */
 			eventfd_signal(a->irq_ctx, 1);
 			more_txspace = false;
 		}
@@ -241,6 +259,9 @@ netmap_sync_kloop_tx_ring(const struct sync_kloop_ring
 		/* Read CSB to see if there is more work to do. */
 		sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots);
 		if (shadow_ring.head == kring->rhead) {
+			if (a->busy_wait) {
+				break;
+			}
 			/*
 			 * No more packets to transmit. We enable notifications and
 			 * go to sleep, waiting for a kick from the application when new
@@ -268,6 +289,8 @@ netmap_sync_kloop_tx_ring(const struct sync_kloop_ring
 		}
 	}
 
+	nm_kr_put(kring);
+
 #ifdef SYNC_KLOOP_POLL
 	if (a->irq_ctx && more_txspace && csb_atok_intr_enabled(csb_atok)) {
 		eventfd_signal(a->irq_ctx, 1);
@@ -297,13 +320,19 @@ netmap_sync_kloop_rx_ring(const struct sync_kloop_ring
 	bool some_recvd = false;
 	uint32_t num_slots;
 
+	if (unlikely(nm_kr_tryget(kring, 1, NULL))) {
+		return;
+	}
+
 	num_slots = kring->nkr_num_slots;
 
 	/* Get RX csb_atok and csb_ktoa pointers from the CSB. */
 	num_slots = kring->nkr_num_slots;
 
 	/* Disable notifications. */
-	csb_ktoa_kick_enable(csb_ktoa, 0);
+	if (!a->direct) {
+		csb_ktoa_kick_enable(csb_ktoa, 0);
+	}
 	/* Copy the application kring pointers from the CSB */
 	sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots);
 
@@ -315,7 +344,9 @@ netmap_sync_kloop_rx_ring(const struct sync_kloop_ring
 		if (unlikely(nm_rxsync_prologue(kring, &shadow_ring) >= num_slots)) {
 			/* Reinit ring and enable notifications. */
 			netmap_ring_reinit(kring);
-			csb_ktoa_kick_enable(csb_ktoa, 1);
+			if (!a->busy_wait) {
+				csb_ktoa_kick_enable(csb_ktoa, 1);
+			}
 			break;
 		}
 
@@ -324,8 +355,10 @@ netmap_sync_kloop_rx_ring(const struct sync_kloop_ring
 		}
 
 		if (unlikely(kring->nm_sync(kring, shadow_ring.flags))) {
-			/* Reenable notifications. */
-			csb_ktoa_kick_enable(csb_ktoa, 1);
+			if (!a->busy_wait) {
+				/* Reenable notifications. */
+				csb_ktoa_kick_enable(csb_ktoa, 1);
+			}
 			nm_prerr("rxsync() failed");
 			break;
 		}
@@ -351,7 +384,8 @@ netmap_sync_kloop_rx_ring(const struct sync_kloop_ring
 #ifdef SYNC_KLOOP_POLL
 		/* Interrupt the application if needed. */
 		if (a->irq_ctx && some_recvd && csb_atok_intr_enabled(csb_atok)) {
-			/* Disable application kick to avoid sending unnecessary kicks */
+			/* We could disable kernel --> application kicks here,
+			 * to avoid spurious interrupts. */
 			eventfd_signal(a->irq_ctx, 1);
 			some_recvd = false;
 		}
@@ -360,6 +394,9 @@ netmap_sync_kloop_rx_ring(const struct sync_kloop_ring
 		/* Read CSB to see if there is more work to do. */
 		sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots);
 		if (sync_kloop_norxslots(kring, shadow_ring.head)) {
+			if (a->busy_wait) {
+				break;
+			}
 			/*
 			 * No more slots available for reception. We enable notification and
 			 * go to sleep, waiting for a kick from the application when new receive
@@ -401,6 +438,7 @@ netmap_sync_kloop_rx_ring(const struct sync_kloop_ring
 }
 
 #ifdef SYNC_KLOOP_POLL
+struct sync_kloop_poll_ctx;
 struct sync_kloop_poll_entry {
 	/* Support for receiving notifications from
 	 * a netmap ring or from the application. */
@@ -411,12 +449,24 @@ struct sync_kloop_poll_entry {
 	/* Support for sending notifications to the application. */
 	struct eventfd_ctx *irq_ctx;
 	struct file *irq_filp;
+
+	/* Arguments for the ring processing function. Useful
+	 * in case of custom wake-up function. */
+	struct sync_kloop_ring_args *args;
+	struct sync_kloop_poll_ctx *parent;
+
 };
 
 struct sync_kloop_poll_ctx {
 	poll_table wait_table;
 	unsigned int next_entry;
+	int (*next_wake_fun)(wait_queue_t *, unsigned, int, void *);
 	unsigned int num_entries;
+	unsigned int num_tx_rings;
+	unsigned int num_rings;
+	/* First num_tx_rings entries are for the TX kicks.
+	 * Then the RX kicks entries follow. The last two
+	 * entries are for TX irq, and RX irq. */
 	struct sync_kloop_poll_entry entries[0];
 };
 
@@ -433,10 +483,78 @@ sync_kloop_poll_table_queue_proc(struct file *file, wa
 	entry->wqh = wqh;
 	entry->filp = file;
 	/* Use the default wake up function. */
-	init_waitqueue_entry(&entry->wait, current);
+	if (poll_ctx->next_wake_fun == NULL) {
+		init_waitqueue_entry(&entry->wait, current);
+	} else {
+		init_waitqueue_func_entry(&entry->wait,
+		    poll_ctx->next_wake_fun);
+	}
 	add_wait_queue(wqh, &entry->wait);
-	poll_ctx->next_entry++;
 }
+
+static int
+sync_kloop_tx_kick_wake_fun(wait_queue_t *wait, unsigned mode,
+    int wake_flags, void *key)
+{
+	struct sync_kloop_poll_entry *entry =
+	    container_of(wait, struct sync_kloop_poll_entry, wait);
+
+	netmap_sync_kloop_tx_ring(entry->args);
+
+	return 0;
+}
+
+static int
+sync_kloop_tx_irq_wake_fun(wait_queue_t *wait, unsigned mode,
+    int wake_flags, void *key)
+{
+	struct sync_kloop_poll_entry *entry =
+	    container_of(wait, struct sync_kloop_poll_entry, wait);
+	struct sync_kloop_poll_ctx *poll_ctx = entry->parent;
+	int i;
+
+	for (i = 0; i < poll_ctx->num_tx_rings; i++) {
+		struct eventfd_ctx *irq_ctx = poll_ctx->entries[i].irq_ctx;
+
+		if (irq_ctx) {
+			eventfd_signal(irq_ctx, 1);
+		}
+	}
+
+	return 0;
+}
+
+static int
+sync_kloop_rx_kick_wake_fun(wait_queue_t *wait, unsigned mode,
+    int wake_flags, void *key)
+{
+	struct sync_kloop_poll_entry *entry =
+	    container_of(wait, struct sync_kloop_poll_entry, wait);
+
+	netmap_sync_kloop_rx_ring(entry->args);
+
+	return 0;
+}
+
+static int
+sync_kloop_rx_irq_wake_fun(wait_queue_t *wait, unsigned mode,
+    int wake_flags, void *key)
+{
+	struct sync_kloop_poll_entry *entry =
+	    container_of(wait, struct sync_kloop_poll_entry, wait);
+	struct sync_kloop_poll_ctx *poll_ctx = entry->parent;
+	int i;
+
+	for (i = poll_ctx->num_tx_rings; i < poll_ctx->num_rings; i++) {
+		struct eventfd_ctx *irq_ctx = poll_ctx->entries[i].irq_ctx;
+
+		if (irq_ctx) {
+			eventfd_signal(irq_ctx, 1);
+		}
+	}
+
+	return 0;
+}
 #endif  /* SYNC_KLOOP_POLL */
 
 int
@@ -455,6 +573,10 @@ netmap_sync_kloop(struct netmap_priv_d *priv, struct n
 	struct nm_csb_ktoa* csb_ktoa_base;
 	struct netmap_adapter *na;
 	struct nmreq_option *opt;
+	bool na_could_sleep = false;
+	bool busy_wait = true;
+	bool direct_tx = false;
+	bool direct_rx = false;
 	int err = 0;
 	int i;
 
@@ -505,8 +627,44 @@ netmap_sync_kloop(struct netmap_priv_d *priv, struct n
 		goto out;
 	}
 
+	/* Prepare the arguments for netmap_sync_kloop_tx_ring()
+	 * and netmap_sync_kloop_rx_ring(). */
+	for (i = 0; i < num_tx_rings; i++) {
+		struct sync_kloop_ring_args *a = args + i;
+
+		a->kring = NMR(na, NR_TX)[i + priv->np_qfirst[NR_TX]];
+		a->csb_atok = csb_atok_base + i;
+		a->csb_ktoa = csb_ktoa_base + i;
+		a->busy_wait = busy_wait;
+		a->direct = direct_tx;
+	}
+	for (i = 0; i < num_rx_rings; i++) {
+		struct sync_kloop_ring_args *a = args + num_tx_rings + i;
+
+		a->kring = NMR(na, NR_RX)[i + priv->np_qfirst[NR_RX]];
+		a->csb_atok = csb_atok_base + num_tx_rings + i;
+		a->csb_ktoa = csb_ktoa_base + num_tx_rings + i;
+		a->busy_wait = busy_wait;
+		a->direct = direct_rx;
+	}
+
 	/* Validate notification options. */
 	opt = nmreq_findoption((struct nmreq_option *)(uintptr_t)hdr->nr_options,
+				NETMAP_REQ_OPT_SYNC_KLOOP_MODE);
+	if (opt != NULL) {
+		struct nmreq_opt_sync_kloop_mode *mode_opt =
+		    (struct nmreq_opt_sync_kloop_mode *)opt;
+
+		direct_tx = !!(mode_opt->mode & NM_OPT_SYNC_KLOOP_DIRECT_TX);
+		direct_rx = !!(mode_opt->mode & NM_OPT_SYNC_KLOOP_DIRECT_RX);
+		if (mode_opt->mode & ~(NM_OPT_SYNC_KLOOP_DIRECT_TX |
+		    NM_OPT_SYNC_KLOOP_DIRECT_RX)) {
+			opt->nro_status = err = EINVAL;
+			goto out;
+		}
+		opt->nro_status = 0;
+	}
+	opt = nmreq_findoption((struct nmreq_option *)(uintptr_t)hdr->nr_options,
 				NETMAP_REQ_OPT_SYNC_KLOOP_EVENTFDS);
 	if (opt != NULL) {
 		err = nmreq_checkduplicate(opt);
@@ -524,54 +682,132 @@ netmap_sync_kloop(struct netmap_priv_d *priv, struct n
 #ifdef SYNC_KLOOP_POLL
 		eventfds_opt = (struct nmreq_opt_sync_kloop_eventfds *)opt;
 		opt->nro_status = 0;
+
+		/* Check if some ioeventfd entry is not defined, and force sleep
+		 * synchronization in that case. */
+		busy_wait = false;
+		for (i = 0; i < num_rings; i++) {
+			if (eventfds_opt->eventfds[i].ioeventfd < 0) {
+				busy_wait = true;
+				break;
+			}
+		}
+
+		if (busy_wait && (direct_tx || direct_rx)) {
+			/* For direct processing we need all the
+			 * ioeventfds to be valid. */
+			opt->nro_status = err = EINVAL;
+			goto out;
+		}
+
 		/* We need 2 poll entries for TX and RX notifications coming
 		 * from the netmap adapter, plus one entries per ring for the
 		 * notifications coming from the application. */
 		poll_ctx = nm_os_malloc(sizeof(*poll_ctx) +
-				(2 + num_rings) * sizeof(poll_ctx->entries[0]));
+				(num_rings + 2) * sizeof(poll_ctx->entries[0]));
 		init_poll_funcptr(&poll_ctx->wait_table,
 					sync_kloop_poll_table_queue_proc);
 		poll_ctx->num_entries = 2 + num_rings;
+		poll_ctx->num_tx_rings = num_tx_rings;
+		poll_ctx->num_rings = num_rings;
 		poll_ctx->next_entry = 0;
+		poll_ctx->next_wake_fun = NULL;
+
+		if (direct_tx && (na->na_flags & NAF_BDG_MAYSLEEP)) {
+			/* In direct mode, VALE txsync is called from
+			 * wake-up context, where it is not possible
+			 * to sleep.
+			 */
+			na->na_flags &= ~NAF_BDG_MAYSLEEP;
+			na_could_sleep = true;
+		}
+
+		for (i = 0; i < num_rings + 2; i++) {
+			poll_ctx->entries[i].args = args + i;
+			poll_ctx->entries[i].parent = poll_ctx;
+		}
+
 		/* Poll for notifications coming from the applications through
-		 * eventfds . */
-		for (i = 0; i < num_rings; i++) {
-			struct eventfd_ctx *irq;
-			struct file *filp;
+		 * eventfds. */
+		for (i = 0; i < num_rings; i++, poll_ctx->next_entry++) {
+			struct eventfd_ctx *irq = NULL;
+			struct file *filp = NULL;
 			unsigned long mask;
+			bool tx_ring = (i < num_tx_rings);
 
-			filp = eventfd_fget(eventfds_opt->eventfds[i].ioeventfd);
-			if (IS_ERR(filp)) {
-				err = PTR_ERR(filp);
-				goto out;
+			if (eventfds_opt->eventfds[i].irqfd >= 0) {
+				filp = eventfd_fget(
+				    eventfds_opt->eventfds[i].irqfd);
+				if (IS_ERR(filp)) {
+					err = PTR_ERR(filp);
+					goto out;
+				}
+				irq = eventfd_ctx_fileget(filp);
+				if (IS_ERR(irq)) {
+					err = PTR_ERR(irq);
+					goto out;
+				}
 			}
-			mask = filp->f_op->poll(filp, &poll_ctx->wait_table);
-			if (mask & POLLERR) {
-				err = EINVAL;
-				goto out;
-			}
-
-			filp = eventfd_fget(eventfds_opt->eventfds[i].irqfd);
-			if (IS_ERR(filp)) {
-				err = PTR_ERR(filp);
-				goto out;
-			}
 			poll_ctx->entries[i].irq_filp = filp;
-			irq = eventfd_ctx_fileget(filp);
-			if (IS_ERR(irq)) {
-				err = PTR_ERR(irq);
-				goto out;
-			}
 			poll_ctx->entries[i].irq_ctx = irq;
+			poll_ctx->entries[i].args->busy_wait = busy_wait;
+			/* Don't let netmap_sync_kloop_*x_ring() use
+			 * IRQs in direct mode. */
+			poll_ctx->entries[i].args->irq_ctx =
+			    ((tx_ring && direct_tx) ||
+			    (!tx_ring && direct_rx)) ? NULL :
+			    poll_ctx->entries[i].irq_ctx;
+			poll_ctx->entries[i].args->direct =
+			    (tx_ring ? direct_tx : direct_rx);
+
+			if (!busy_wait) {
+				filp = eventfd_fget(
+				    eventfds_opt->eventfds[i].ioeventfd);
+				if (IS_ERR(filp)) {
+					err = PTR_ERR(filp);
+					goto out;
+				}
+				if (tx_ring && direct_tx) {
+					/* Override the wake up function
+					 * so that it can directly call
+					 * netmap_sync_kloop_tx_ring().
+					 */
+					poll_ctx->next_wake_fun =
+					    sync_kloop_tx_kick_wake_fun;
+				} else if (!tx_ring && direct_rx) {
+					/* Same for direct RX. */
+					poll_ctx->next_wake_fun =
+					    sync_kloop_rx_kick_wake_fun;
+				} else {
+					poll_ctx->next_wake_fun = NULL;
+				}
+				mask = filp->f_op->poll(filp,
+				    &poll_ctx->wait_table);
+				if (mask & POLLERR) {
+					err = EINVAL;
+					goto out;
+				}
+			}
 		}
+
 		/* Poll for notifications coming from the netmap rings bound to
 		 * this file descriptor. */
-		{
+		if (!busy_wait) {
 			NMG_LOCK();
+			/* In direct mode, override the wake up function so
+			 * that it can forward the netmap_tx_irq() to the
+			 * guest. */
+			poll_ctx->next_wake_fun = direct_tx ?
+			    sync_kloop_tx_irq_wake_fun : NULL;
 			poll_wait(priv->np_filp, priv->np_si[NR_TX],
 			    &poll_ctx->wait_table);
+			poll_ctx->next_entry++;
+
+			poll_ctx->next_wake_fun = direct_rx ?
+			    sync_kloop_rx_irq_wake_fun : NULL;
 			poll_wait(priv->np_filp, priv->np_si[NR_RX],
 			    &poll_ctx->wait_table);
+			poll_ctx->next_entry++;
 			NMG_UNLOCK();
 		}
 #else   /* SYNC_KLOOP_POLL */
@@ -580,31 +816,10 @@ netmap_sync_kloop(struct netmap_priv_d *priv, struct n
 #endif  /* SYNC_KLOOP_POLL */
 	}
 
-	/* Prepare the arguments for netmap_sync_kloop_tx_ring()
-	 * and netmap_sync_kloop_rx_ring(). */
-	for (i = 0; i < num_tx_rings; i++) {
-		struct sync_kloop_ring_args *a = args + i;
+	nm_prinf("kloop busy_wait %u, direct_tx %u, direct_rx %u, "
+	    "na_could_sleep %u", busy_wait, direct_tx, direct_rx,
+	    na_could_sleep);
 
-		a->kring = NMR(na, NR_TX)[i + priv->np_qfirst[NR_TX]];
-		a->csb_atok = csb_atok_base + i;
-		a->csb_ktoa = csb_ktoa_base + i;
-#ifdef SYNC_KLOOP_POLL
-		if (poll_ctx)
-			a->irq_ctx = poll_ctx->entries[i].irq_ctx;
-#endif /* SYNC_KLOOP_POLL */
-	}
-	for (i = 0; i < num_rx_rings; i++) {
-		struct sync_kloop_ring_args *a = args + num_tx_rings + i;
-
-		a->kring = NMR(na, NR_RX)[i + priv->np_qfirst[NR_RX]];
-		a->csb_atok = csb_atok_base + num_tx_rings + i;
-		a->csb_ktoa = csb_ktoa_base + num_tx_rings + i;
-#ifdef SYNC_KLOOP_POLL
-		if (poll_ctx)
-			a->irq_ctx = poll_ctx->entries[num_tx_rings + i].irq_ctx;
-#endif /* SYNC_KLOOP_POLL */
-	}
-
 	/* Main loop. */
 	for (;;) {
 		if (unlikely(NM_ACCESS_ONCE(priv->np_kloop_state) & NM_SYNC_KLOOP_STOPPING)) {
@@ -612,7 +827,7 @@ netmap_sync_kloop(struct netmap_priv_d *priv, struct n
 		}
 
 #ifdef SYNC_KLOOP_POLL
-		if (poll_ctx) {
+		if (!busy_wait) {
 			/* It is important to set the task state as
 			 * interruptible before processing any TX/RX ring,
 			 * so that if a notification on ring Y comes after
@@ -627,46 +842,37 @@ netmap_sync_kloop(struct netmap_priv_d *priv, struct n
 #endif  /* SYNC_KLOOP_POLL */
 
 		/* Process all the TX rings bound to this file descriptor. */
-		for (i = 0; i < num_tx_rings; i++) {
+		for (i = 0; !direct_tx && i < num_tx_rings; i++) {
 			struct sync_kloop_ring_args *a = args + i;
-
-			if (unlikely(nm_kr_tryget(a->kring, 1, NULL))) {
-				continue;
-			}
 			netmap_sync_kloop_tx_ring(a);
-			nm_kr_put(a->kring);
 		}
 
 		/* Process all the RX rings bound to this file descriptor. */
-		for (i = 0; i < num_rx_rings; i++) {
+		for (i = 0; !direct_rx && i < num_rx_rings; i++) {
 			struct sync_kloop_ring_args *a = args + num_tx_rings + i;
-
-			if (unlikely(nm_kr_tryget(a->kring, 1, NULL))) {
-				continue;
-			}
 			netmap_sync_kloop_rx_ring(a);
-			nm_kr_put(a->kring);
 		}
 
-#ifdef SYNC_KLOOP_POLL
-		if (poll_ctx) {
-			/* If a poll context is present, yield to the scheduler
-			 * waiting for a notification to come either from
-			 * netmap or the application. */
-			schedule_timeout(msecs_to_jiffies(3000));
-		} else
-#endif /* SYNC_KLOOP_POLL */
-		{
+		if (busy_wait) {
 			/* Default synchronization method: sleep for a while. */
 			usleep_range(sleep_us, sleep_us);
 		}
+#ifdef SYNC_KLOOP_POLL
+		else {
+			/* Yield to the scheduler waiting for a notification
+			 * to come either from netmap or the application. */
+			schedule_timeout(msecs_to_jiffies(3000));
+		}
+#endif /* SYNC_KLOOP_POLL */
 	}
 out:
 #ifdef SYNC_KLOOP_POLL
 	if (poll_ctx) {
 		/* Stop polling from netmap and the eventfds, and deallocate
 		 * the poll context. */
-		__set_current_state(TASK_RUNNING);
+		if (!busy_wait) {
+			__set_current_state(TASK_RUNNING);
+		}
 		for (i = 0; i < poll_ctx->next_entry; i++) {
 			struct sync_kloop_poll_entry *entry =
 						poll_ctx->entries + i;
@@ -696,6 +902,9 @@ out:
 	/* Reset the kloop state. */
 	NMG_LOCK();
 	priv->np_kloop_state = 0;
+	if (na_could_sleep) {
+		na->na_flags |= NAF_BDG_MAYSLEEP;
+	}
 	NMG_UNLOCK();
 
 	return err;
@@ -770,14 +979,14 @@ netmap_pt_guest_txsync(struct nm_csb_atok *atok, struc
 	atok->appl_need_kick = 0;
 
 	/*
-	 * First part: tell the host (updating the CSB) to process the new
-	 * packets.
+	 * First part: tell the host to process the new packets,
+	 * updating the CSB.
 	 */
 	kring->nr_hwcur = ktoa->hwcur;
 	nm_sync_kloop_appl_write(atok, kring->rcur, kring->rhead);
 
         /* Ask for a kick from a guest to the host if needed. */
-	if (((kring->rhead != kring->nr_hwcur || nm_kr_txempty(kring))
+	if (((kring->rhead != kring->nr_hwcur || nm_kr_wouldblock(kring))
 		&& NM_ACCESS_ONCE(ktoa->kern_need_kick)) ||
 			(flags & NAF_FORCE_RECLAIM)) {
 		atok->sync_flags = flags;
@@ -787,7 +996,7 @@ netmap_pt_guest_txsync(struct nm_csb_atok *atok, struc
 	/*
 	 * Second part: reclaim buffers for completed transmissions.
 	 */
-	if (nm_kr_txempty(kring) || (flags & NAF_FORCE_RECLAIM)) {
+	if (nm_kr_wouldblock(kring) || (flags & NAF_FORCE_RECLAIM)) {
 		nm_sync_kloop_appl_read(ktoa, &kring->nr_hwtail,
 					&kring->nr_hwcur);
 	}
@@ -797,7 +1006,7 @@ netmap_pt_guest_txsync(struct nm_csb_atok *atok, struc
 	 * go to sleep and we need to be notified by the host when more free
 	 * space is available.
          */
-	if (nm_kr_txempty(kring) && !(kring->nr_kflags & NKR_NOINTR)) {
+	if (nm_kr_wouldblock(kring) && !(kring->nr_kflags & NKR_NOINTR)) {
 		/* Reenable notifications. */
 		atok->appl_need_kick = 1;
                 /* Double check, with store-load memory barrier. */
@@ -805,7 +1014,7 @@ netmap_pt_guest_txsync(struct nm_csb_atok *atok, struc
 		nm_sync_kloop_appl_read(ktoa, &kring->nr_hwtail,
 					&kring->nr_hwcur);
                 /* If there is new free space, disable notifications */
-		if (unlikely(!nm_kr_txempty(kring))) {
+		if (unlikely(!nm_kr_wouldblock(kring))) {
 			atok->appl_need_kick = 0;
 		}
 	}
@@ -851,11 +1060,6 @@ netmap_pt_guest_rxsync(struct nm_csb_atok *atok, struc
 	 */
 	if (kring->rhead != kring->nr_hwcur) {
 		nm_sync_kloop_appl_write(atok, kring->rcur, kring->rhead);
-                /* Ask for a kick from the guest to the host if needed. */
-		if (NM_ACCESS_ONCE(ktoa->kern_need_kick)) {
-			atok->sync_flags = flags;
-			notify = true;
-		}
 	}
 
         /*
@@ -863,7 +1067,7 @@ netmap_pt_guest_rxsync(struct nm_csb_atok *atok, struc
 	 * we need to be notified by the host when more RX slots have been
 	 * completed.
          */
-	if (nm_kr_rxempty(kring) && !(kring->nr_kflags & NKR_NOINTR)) {
+	if (nm_kr_wouldblock(kring) && !(kring->nr_kflags & NKR_NOINTR)) {
 		/* Reenable notifications. */
                 atok->appl_need_kick = 1;
                 /* Double check, with store-load memory barrier. */
@@ -871,10 +1075,17 @@ netmap_pt_guest_rxsync(struct nm_csb_atok *atok, struc
 		nm_sync_kloop_appl_read(ktoa, &kring->nr_hwtail,
 					&kring->nr_hwcur);
                 /* If there are new slots, disable notifications. */
-		if (!nm_kr_rxempty(kring)) {
+		if (!nm_kr_wouldblock(kring)) {
                         atok->appl_need_kick = 0;
                 }
         }
+
+	/* Ask for a kick from the guest to the host if needed. */
+	if ((kring->rhead != kring->nr_hwcur || nm_kr_wouldblock(kring))
+		&& NM_ACCESS_ONCE(ktoa->kern_need_kick)) {
+		atok->sync_flags = flags;
+		notify = true;
+	}
 
 	nm_prdis(1, "%s CSB(head:%u cur:%u hwtail:%u) KRING(head:%u cur:%u tail:%u)",
 		kring->name, atok->head, atok->cur, ktoa->hwtail,

Modified: head/sys/net/netmap.h
==============================================================================
--- head/sys/net/netmap.h	Sat Feb  2 21:14:53 2019	(r343688)
+++ head/sys/net/netmap.h	Sat Feb  2 22:39:29 2019	(r343689)
@@ -540,7 +540,8 @@ enum {
 
 enum {
 	/* On NETMAP_REQ_REGISTER, ask netmap to use memory allocated
-	 * from user-space allocated memory pools (e.g. hugepages). */
+	 * from user-space allocated memory pools (e.g. hugepages).
+	 */
 	NETMAP_REQ_OPT_EXTMEM = 1,
 
 	/* ON NETMAP_REQ_SYNC_KLOOP_START, ask netmap to use eventfd-based
@@ -551,8 +552,15 @@ enum {
 	/* On NETMAP_REQ_REGISTER, ask netmap to work in CSB mode, where
 	 * head, cur and tail pointers are not exchanged through the
 	 * struct netmap_ring header, but rather using an user-provided
-	 * memory area (see struct nm_csb_atok and struct nm_csb_ktoa). */
+	 * memory area (see struct nm_csb_atok and struct nm_csb_ktoa).
+	 */
 	NETMAP_REQ_OPT_CSB,
+
+	/* An extension to NETMAP_REQ_OPT_SYNC_KLOOP_EVENTFDS, which specifies
+	 * if the TX and/or RX rings are synced in the context of the VM exit.
+	 * This requires the 'ioeventfd' fields to be valid (cannot be < 0).
+	 */
+	NETMAP_REQ_OPT_SYNC_KLOOP_MODE,
 };
 
 /*
@@ -877,6 +885,12 @@ struct nmreq_opt_sync_kloop_eventfds {
 	 * their order must agree with the CSB arrays passed in the
 	 * NETMAP_REQ_OPT_CSB option. Each entry contains a file descriptor
 	 * backed by an eventfd.
+	 *
+	 * If any of the 'ioeventfd' entries is < 0, the event loop uses
+	 * the sleeping synchronization strategy (according to sleep_us),
+	 * and keeps kern_need_kick always disabled.
+	 * Each 'irqfd' can be < 0, and in that case the corresponding queue
+	 * is never notified.
 	 */
 	struct {
 		/* Notifier for the application --> kernel loop direction. */
@@ -884,6 +898,13 @@ struct nmreq_opt_sync_kloop_eventfds {
 		/* Notifier for the kernel loop --> application direction. */
 		int32_t irqfd;
 	} eventfds[0];
+};
+
+struct nmreq_opt_sync_kloop_mode {
+	struct nmreq_option	nro_opt;	/* common header */
+#define NM_OPT_SYNC_KLOOP_DIRECT_TX (1 << 0)
+#define NM_OPT_SYNC_KLOOP_DIRECT_RX (1 << 1)
+	uint32_t mode;
 };
 
 struct nmreq_opt_extmem {


More information about the svn-src-all mailing list