svn commit: r270334 - stable/10/sys/dev/virtio/network

Fri Aug 22 15:12:21 UTC 2014

Author: bryanv
Date: Fri Aug 22 15:12:20 2014
New Revision: 270334
URL: http://svnweb.freebsd.org/changeset/base/270334

Log:
  MFC r268481:
  
    Rework when the Tx queue completion interrupt is enabled
  
    The Tx interrupt is now kept disabled in the common case, only
    enabled when the number of free descriptors in the queue falls
    below a threshold. Transmitted frames are cleared from the VQ
    before subsequent transmit, or in the watchdog timer.
  
    This was a very big performance improvement for an experimental
    Netmap bhyve backend.

Modified:
  stable/10/sys/dev/virtio/network/if_vtnet.c
  stable/10/sys/dev/virtio/network/if_vtnetvar.h
Directory Properties:
  stable/10/   (props changed)

Modified: stable/10/sys/dev/virtio/network/if_vtnet.c
==============================================================================

--- stable/10/sys/dev/virtio/network/if_vtnet.c	Fri Aug 22 15:10:26 2014	(r270333)
+++ stable/10/sys/dev/virtio/network/if_vtnet.c	Fri Aug 22 15:12:20 2014	(r270334)
@@ -126,6 +126,8 @@ static int	vtnet_rxq_eof(struct vtnet_rx
 static void	vtnet_rx_vq_intr(void *);
 static void	vtnet_rxq_tq_intr(void *, int);
 
+static int	vtnet_txq_below_threshold(struct vtnet_txq *);
+static int	vtnet_txq_notify(struct vtnet_txq *);
 static void	vtnet_txq_free_mbufs(struct vtnet_txq *);
 static int	vtnet_txq_offload_ctx(struct vtnet_txq *, struct mbuf *,
 		    int *, int *, int *);
@@ -147,7 +149,7 @@ static void	vtnet_txq_tq_deferred(void *
 #endif
 static void	vtnet_txq_start(struct vtnet_txq *);
 static void	vtnet_txq_tq_intr(void *, int);
-static void	vtnet_txq_eof(struct vtnet_txq *);
+static int	vtnet_txq_eof(struct vtnet_txq *);
 static void	vtnet_tx_vq_intr(void *);
 static void	vtnet_tx_start_all(struct vtnet_softc *);
 
@@ -204,6 +206,8 @@ static void	vtnet_ifmedia_sts(struct ifn
 static void	vtnet_get_hwaddr(struct vtnet_softc *);
 static void	vtnet_set_hwaddr(struct vtnet_softc *);
 static void	vtnet_vlan_tag_remove(struct mbuf *);
+static void	vtnet_set_rx_process_limit(struct vtnet_softc *);
+static void	vtnet_set_tx_intr_threshold(struct vtnet_softc *);
 
 static void	vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *,
 		    struct sysctl_oid_list *, struct vtnet_rxq *);
@@ -239,19 +243,6 @@ TUNABLE_INT("hw.vtnet.mq_max_pairs", &vt
 static int vtnet_rx_process_limit = 512;
 TUNABLE_INT("hw.vtnet.rx_process_limit", &vtnet_rx_process_limit);
 
-/*
- * Reducing the number of transmit completed interrupts can improve
- * performance. To do so, the define below keeps the Tx vq interrupt
- * disabled and adds calls to vtnet_txeof() in the start and watchdog
- * paths. The price to pay for this is the m_free'ing of transmitted
- * mbufs may be delayed until the watchdog fires.
- *
- * BMV: Reintroduce this later as a run-time option, if it makes
- * sense after the EVENT_IDX feature is supported.
- *
- * #define VTNET_TX_INTR_MODERATION
- */
-
 static uma_zone_t vtnet_tx_header_zone;
 
 static struct virtio_feature_desc vtnet_feature_desc[] = {
@@ -901,7 +892,6 @@ vtnet_setup_interface(struct vtnet_softc
 {
 	device_t dev;
 	struct ifnet *ifp;
-	int limit;
 
 	dev = sc->vtnet_dev;
 
@@ -1000,11 +990,8 @@ vtnet_setup_interface(struct vtnet_softc
 		    vtnet_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
 	}
 
-	limit = vtnet_tunable_int(sc, "rx_process_limit",
-	    vtnet_rx_process_limit);
-	if (limit < 0)
-		limit = INT_MAX;
-	sc->vtnet_rx_process_limit = limit;
+	vtnet_set_rx_process_limit(sc);
+	vtnet_set_tx_intr_threshold(sc);
 
 	return (0);
 }
@@ -1895,6 +1882,44 @@ vtnet_rxq_tq_intr(void *xrxq, int pendin
 	VTNET_RXQ_UNLOCK(rxq);
 }
 
+static int
+vtnet_txq_below_threshold(struct vtnet_txq *txq)
+{
+	struct vtnet_softc *sc;
+	struct virtqueue *vq;
+
+	sc = txq->vtntx_sc;
+	vq = txq->vtntx_vq;
+
+	return (virtqueue_nfree(vq) <= sc->vtnet_tx_intr_thresh);
+}
+
+static int
+vtnet_txq_notify(struct vtnet_txq *txq)
+{
+	struct virtqueue *vq;
+
+	vq = txq->vtntx_vq;
+
+	txq->vtntx_watchdog = VTNET_TX_TIMEOUT;
+	virtqueue_notify(vq);
+
+	if (vtnet_txq_enable_intr(txq) == 0)
+		return (0);
+
+	/*
+	 * Drain frames that were completed since last checked. If this
+	 * causes the queue to go above the threshold, the caller should
+	 * continue transmitting.
+	 */
+	if (vtnet_txq_eof(txq) != 0 && vtnet_txq_below_threshold(txq) == 0) {
+		virtqueue_disable_intr(vq);
+		return (1);
+	}
+
+	return (0);
+}
+
 static void
 vtnet_txq_free_mbufs(struct vtnet_txq *txq)
 {
@@ -2169,11 +2194,11 @@ vtnet_start_locked(struct vtnet_txq *txq
 	struct vtnet_softc *sc;
 	struct virtqueue *vq;
 	struct mbuf *m0;
-	int enq;
+	int tries, enq;
 
 	sc = txq->vtntx_sc;
 	vq = txq->vtntx_vq;
-	enq = 0;
+	tries = 0;
 
 	VTNET_TXQ_LOCK_ASSERT(txq);
 
@@ -2183,6 +2208,9 @@ vtnet_start_locked(struct vtnet_txq *txq
 
 	vtnet_txq_eof(txq);
 
+again:
+	enq = 0;
+
 	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
 		if (virtqueue_full(vq))
 			break;
@@ -2201,9 +2229,12 @@ vtnet_start_locked(struct vtnet_txq *txq
 		ETHER_BPF_MTAP(ifp, m0);
 	}
 
-	if (enq > 0) {
-		virtqueue_notify(vq);
-		txq->vtntx_watchdog = VTNET_TX_TIMEOUT;
+	if (enq > 0 && vtnet_txq_notify(txq) != 0) {
+		if (tries++ < VTNET_NOTIFY_RETRIES)
+			goto again;
+
+		txq->vtntx_stats.vtxs_rescheduled++;
+		taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
 	}
 }
 
@@ -2230,13 +2261,13 @@ vtnet_txq_mq_start_locked(struct vtnet_t
 	struct virtqueue *vq;
 	struct buf_ring *br;
 	struct ifnet *ifp;
-	int enq, error;
+	int enq, tries, error;
 
 	sc = txq->vtntx_sc;
 	vq = txq->vtntx_vq;
 	br = txq->vtntx_br;
 	ifp = sc->vtnet_ifp;
-	enq = 0;
+	tries = 0;
 	error = 0;
 
 	VTNET_TXQ_LOCK_ASSERT(txq);
@@ -2256,14 +2287,16 @@ vtnet_txq_mq_start_locked(struct vtnet_t
 
 	vtnet_txq_eof(txq);
 
+again:
+	enq = 0;
+
 	while ((m = drbr_peek(ifp, br)) != NULL) {
 		if (virtqueue_full(vq)) {
 			drbr_putback(ifp, br, m);
 			break;
 		}
 
-		error = vtnet_txq_encap(txq, &m);
-		if (error) {
+		if (vtnet_txq_encap(txq, &m) != 0) {
 			if (m != NULL)
 				drbr_putback(ifp, br, m);
 			else
@@ -2276,9 +2309,12 @@ vtnet_txq_mq_start_locked(struct vtnet_t
 		ETHER_BPF_MTAP(ifp, m);
 	}
 
-	if (enq > 0) {
-		virtqueue_notify(vq);
-		txq->vtntx_watchdog = VTNET_TX_TIMEOUT;
+	if (enq > 0 && vtnet_txq_notify(txq) != 0) {
+		if (tries++ < VTNET_NOTIFY_RETRIES)
+			goto again;
+
+		txq->vtntx_stats.vtxs_rescheduled++;
+		taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
 	}
 
 	return (0);
@@ -2366,30 +2402,26 @@ vtnet_txq_tq_intr(void *xtxq, int pendin
 	}
 
 	vtnet_txq_eof(txq);
-
 	vtnet_txq_start(txq);
 
-	if (vtnet_txq_enable_intr(txq) != 0) {
-		vtnet_txq_disable_intr(txq);
-		txq->vtntx_stats.vtxs_rescheduled++;
-		taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
-	}
-
 	VTNET_TXQ_UNLOCK(txq);
 }
 
-static void
+static int
 vtnet_txq_eof(struct vtnet_txq *txq)
 {
 	struct virtqueue *vq;
 	struct vtnet_tx_header *txhdr;
 	struct mbuf *m;
+	int deq;
 
 	vq = txq->vtntx_vq;
+	deq = 0;
 	VTNET_TXQ_LOCK_ASSERT(txq);
 
 	while ((txhdr = virtqueue_dequeue(vq, NULL)) != NULL) {
 		m = txhdr->vth_mbuf;
+		deq++;
 
 		txq->vtntx_stats.vtxs_opackets++;
 		txq->vtntx_stats.vtxs_obytes += m->m_pkthdr.len;
@@ -2402,6 +2434,8 @@ vtnet_txq_eof(struct vtnet_txq *txq)
 
 	if (virtqueue_empty(vq))
 		txq->vtntx_watchdog = 0;
+
+	return (deq);
 }
 
 static void
@@ -2410,12 +2444,10 @@ vtnet_tx_vq_intr(void *xtxq)
 	struct vtnet_softc *sc;
 	struct vtnet_txq *txq;
 	struct ifnet *ifp;
-	int tries;
 
 	txq = xtxq;
 	sc = txq->vtntx_sc;
 	ifp = sc->vtnet_ifp;
-	tries = 0;
 
 	if (__predict_false(txq->vtntx_id >= sc->vtnet_act_vq_pairs)) {
 		/*
@@ -2430,30 +2462,15 @@ vtnet_tx_vq_intr(void *xtxq)
 
 	VTNET_TXQ_LOCK(txq);
 
-again:
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
 		VTNET_TXQ_UNLOCK(txq);
 		return;
 	}
 
 	vtnet_txq_eof(txq);
-
 	vtnet_txq_start(txq);
 
-	if (vtnet_txq_enable_intr(txq) != 0) {
-		vtnet_txq_disable_intr(txq);
-		/*
-		 * This is an occasional race, so retry a few times
-		 * before scheduling the taskqueue.
-		 */
-		if (tries++ < VTNET_INTR_DISABLE_RETRIES)
-			goto again;
-
-		VTNET_TXQ_UNLOCK(txq);
-		txq->vtntx_stats.vtxs_rescheduled++;
-		taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
-	} else
-		VTNET_TXQ_UNLOCK(txq);
+	VTNET_TXQ_UNLOCK(txq);
 }
 
 static void
@@ -2500,21 +2517,31 @@ vtnet_qflush(struct ifnet *ifp)
 static int
 vtnet_watchdog(struct vtnet_txq *txq)
 {
-	struct vtnet_softc *sc;
+	struct ifnet *ifp;
 
-	sc = txq->vtntx_sc;
+	ifp = txq->vtntx_sc->vtnet_ifp;
 
 	VTNET_TXQ_LOCK(txq);
-	if (sc->vtnet_flags & VTNET_FLAG_EVENT_IDX)
-		vtnet_txq_eof(txq);
+	if (txq->vtntx_watchdog == 1) {
+		/*
+		 * Only drain completed frames if the watchdog is about to
+		 * expire. If any frames were drained, there may be enough
+		 * free descriptors now available to transmit queued frames.
+		 * In that case, the timer will immediately be decremented
+		 * below, but the timeout is generous enough that should not
+		 * be a problem.
+		 */
+		if (vtnet_txq_eof(txq) != 0)
+			vtnet_txq_start(txq);
+	}
+
 	if (txq->vtntx_watchdog == 0 || --txq->vtntx_watchdog) {
 		VTNET_TXQ_UNLOCK(txq);
 		return (0);
 	}
 	VTNET_TXQ_UNLOCK(txq);
 
-	if_printf(sc->vtnet_ifp, "watchdog timeout on queue %d\n",
-	    txq->vtntx_id);
+	if_printf(ifp, "watchdog timeout on queue %d\n", txq->vtntx_id);
 	return (1);
 }
 
@@ -3564,6 +3591,50 @@ vtnet_vlan_tag_remove(struct mbuf *m)
 }
 
 static void
+vtnet_set_rx_process_limit(struct vtnet_softc *sc)
+{
+	int limit;
+
+	limit = vtnet_tunable_int(sc, "rx_process_limit",
+	    vtnet_rx_process_limit);
+	if (limit < 0)
+		limit = INT_MAX;
+	sc->vtnet_rx_process_limit = limit;
+}
+
+static void
+vtnet_set_tx_intr_threshold(struct vtnet_softc *sc)
+{
+	device_t dev;
+	int size, thresh;
+
+	dev = sc->vtnet_dev;
+	size = virtqueue_size(sc->vtnet_txqs[0].vtntx_vq);
+
+	/*
+	 * The Tx interrupt is disabled until the queue free count falls
+	 * below our threshold. Completed frames are drained from the Tx
+	 * virtqueue before transmitting new frames and in the watchdog
+	 * callout, so the frequency of Tx interrupts is greatly reduced,
+	 * at the cost of not freeing mbufs as quickly as they otherwise
+	 * would be.
+	 *
+	 * N.B. We assume all the Tx queues are the same size.
+	 */
+	thresh = size / 4;
+
+	/*
+	 * Without indirect descriptors, leave enough room for the most
+	 * segments we handle.
+	 */
+	if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC) == 0 &&
+	    thresh < sc->vtnet_tx_nsegs)
+		thresh = sc->vtnet_tx_nsegs;
+
+	sc->vtnet_tx_intr_thresh = thresh;
+}
+
+static void
 vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *ctx,
     struct sysctl_oid_list *child, struct vtnet_rxq *rxq)
 {
@@ -3758,8 +3829,18 @@ vtnet_rxq_disable_intr(struct vtnet_rxq 
 static int
 vtnet_txq_enable_intr(struct vtnet_txq *txq)
 {
+	struct virtqueue *vq;
+
+	vq = txq->vtntx_vq;
+
+	if (vtnet_txq_below_threshold(txq) != 0)
+		return (virtqueue_postpone_intr(vq, VQ_POSTPONE_LONG));
 
-	return (virtqueue_postpone_intr(txq->vtntx_vq, VQ_POSTPONE_LONG));
+	/*
+	 * The free count is above our threshold. Keep the Tx interrupt
+	 * disabled until the queue is fuller.
+	 */
+	return (0);
 }
 
 static void

Modified: stable/10/sys/dev/virtio/network/if_vtnetvar.h
==============================================================================
--- stable/10/sys/dev/virtio/network/if_vtnetvar.h	Fri Aug 22 15:10:26 2014	(r270333)
+++ stable/10/sys/dev/virtio/network/if_vtnetvar.h	Fri Aug 22 15:12:20 2014	(r270334)
@@ -149,6 +149,7 @@ struct vtnet_softc {
 	int			 vtnet_rx_nmbufs;
 	int			 vtnet_rx_clsize;
 	int			 vtnet_rx_new_clsize;
+	int			 vtnet_tx_intr_thresh;
 	int			 vtnet_tx_nsegs;
 	int			 vtnet_if_flags;
 	int			 vtnet_act_vq_pairs;
@@ -183,6 +184,14 @@ struct vtnet_softc {
 #define VTNET_INTR_DISABLE_RETRIES	4
 
 /*
+ * Similarly, additional completed entries can appear in a virtqueue
+ * between when lasted checked and before notifying the host. Number
+ * of times to retry before scheduling the taskqueue to process the
+ * queue.
+ */
+#define VTNET_NOTIFY_RETRIES		4
+
+/*
  * Fake the media type. The host does not provide us with any real media
  * information.
  */