svn commit: r301197 - head/sys/dev/xen/netfront

Thu Jun 2 11:14:27 UTC 2016

Author: royger
Date: Thu Jun  2 11:14:26 2016
New Revision: 301197
URL: https://svnweb.freebsd.org/changeset/base/301197

Log:
  xen-netfront: always keep the Rx ring full of requests
  
  This is based on Linux commit 1f3c2eba1e2d866ef99bb9b10ade4096e3d7607c from
  David Vrabel:
  
  A full Rx ring only requires 1 MiB of memory.  This is not enough memory
  that it is useful to dynamically scale the number of Rx requests in the ring
  based on traffic rates, because:
  
  a) Even the full 1 MiB is a tiny fraction of a typically modern Linux
     VM (for example, the AWS micro instance still has 1 GiB of memory).
  
  b) Netfront would have used up to 1 MiB already even with moderate
     data rates (there was no adjustment of target based on memory
     pressure).
  
  c) Small VMs are going to typically have one VCPU and hence only one
     queue.
  
  Keeping the ring full of Rx requests handles bursty traffic better than
  trying to converge on an optimal number of requests to keep filled.
  
  Reviewed by:	Wei Liu <wei.liu2 at citrix.com>
  Sponsored by:	Citrix Systems R&D

Modified:
  head/sys/dev/xen/netfront/netfront.c

Modified: head/sys/dev/xen/netfront/netfront.c
==============================================================================

--- head/sys/dev/xen/netfront/netfront.c	Thu Jun  2 11:12:11 2016	(r301196)
+++ head/sys/dev/xen/netfront/netfront.c	Thu Jun  2 11:14:26 2016	(r301197)
@@ -77,6 +77,8 @@ __FBSDID("$FreeBSD$");
 #define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE)
 #define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
 
+#define NET_RX_SLOTS_MIN (XEN_NETIF_NR_SLOTS_MIN + 1)
+
 /*
  * Should the driver do LRO on the RX end
  *  this can be toggled on the fly, but the
@@ -114,6 +116,7 @@ struct netfront_rx_info;
 static void xn_txeof(struct netfront_txq *);
 static void xn_rxeof(struct netfront_rxq *);
 static void xn_alloc_rx_buffers(struct netfront_rxq *);
+static void xn_alloc_rx_buffers_callout(void *arg);
 
 static void xn_release_rx_bufs(struct netfront_rxq *);
 static void xn_release_tx_bufs(struct netfront_txq *);
@@ -182,16 +185,14 @@ struct netfront_rxq {
 	grant_ref_t 		grant_ref[NET_TX_RING_SIZE + 1];
 
 	struct mbuf		*mbufs[NET_RX_RING_SIZE + 1];
-	struct mbufq		batch;		/* batch queue */
-	int                     target;
-
-	xen_pfn_t		pfn_array[NET_RX_RING_SIZE];
 
 	struct lro_ctrl		lro;
 
 	struct taskqueue 	*tq;
 	struct task		intrtask;
 
+	struct callout		rx_refill;
+
 	struct xn_rx_stats	stats;
 };
 
@@ -233,12 +234,6 @@ struct netfront_info {
 	u_int			carrier;
 	u_int			maxfrags;
 
-	/* Receive-ring batched refills. */
-#define RX_MIN_TARGET 32
-#define RX_MAX_TARGET NET_RX_RING_SIZE
-	int			rx_min_target;
-	int			rx_max_target;
-
 	device_t		xbdev;
 	uint8_t			mac[ETHER_ADDR_LEN];
 
@@ -687,6 +682,7 @@ static void
 destroy_rxq(struct netfront_rxq *rxq)
 {
 
+	callout_drain(&rxq->rx_refill);
 	free(rxq->ring.sring, M_DEVBUF);
 	taskqueue_drain_all(rxq->tq);
 	taskqueue_free(rxq->tq);
@@ -721,7 +717,6 @@ setup_rxqs(device_t dev, struct netfront
 
 		rxq->id = q;
 		rxq->info = info;
-		rxq->target = RX_MIN_TARGET;
 		rxq->ring_ref = GRANT_REF_INVALID;
 		rxq->ring.sring = NULL;
 		snprintf(rxq->name, XN_QUEUE_NAME_LEN, "xnrx_%u", q);
@@ -733,11 +728,9 @@ setup_rxqs(device_t dev, struct netfront
 			rxq->grant_ref[i] = GRANT_REF_INVALID;
 		}
 
-		mbufq_init(&rxq->batch, INT_MAX);
-
 		/* Start resources allocation */
 
-		if (gnttab_alloc_grant_references(RX_MAX_TARGET,
+		if (gnttab_alloc_grant_references(NET_RX_RING_SIZE,
 		    &rxq->gref_head) != 0) {
 			device_printf(dev, "allocating rx gref");
 			error = ENOMEM;
@@ -760,6 +753,8 @@ setup_rxqs(device_t dev, struct netfront
 		rxq->tq = taskqueue_create_fast(rxq->name, M_WAITOK,
 		    taskqueue_thread_enqueue, &rxq->tq);
 
+		callout_init(&rxq->rx_refill, 1);
+
 		error = taskqueue_start_threads(&rxq->tq, 1, PI_NET,
 		    "%s rxq %d", device_get_nameunit(dev), rxq->id);
 		if (error != 0) {
@@ -1058,119 +1053,88 @@ xn_release_tx_bufs(struct netfront_txq *
 	}
 }
 
-static void
-xn_alloc_rx_buffers(struct netfront_rxq *rxq)
+static struct mbuf *
+xn_alloc_one_rx_buffer(struct netfront_rxq *rxq)
 {
-	struct netfront_info *np = rxq->info;
-	int otherend_id = xenbus_get_otherend_id(np->xbdev);
-	unsigned short id;
-	struct mbuf *m_new;
-	int i, batch_target, notify;
-	RING_IDX req_prod;
-	grant_ref_t ref;
-	netif_rx_request_t *req;
-	vm_offset_t vaddr;
-	u_long pfn;
+	struct mbuf *m;
 
-	req_prod = rxq->ring.req_prod_pvt;
+	m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MJUMPAGESIZE);
+	if (m == NULL)
+		return NULL;
+	m->m_len = m->m_pkthdr.len = MJUMPAGESIZE;
 
-	if (__predict_false(np->carrier == 0))
-		return;
+	return (m);
+}
 
-	/*
-	 * Allocate mbufs greedily, even though we batch updates to the
-	 * receive ring. This creates a less bursty demand on the memory
-	 * allocator, and so should reduce the chance of failed allocation
-	 * requests both for ourself and for other kernel subsystems.
-	 *
-	 * Here we attempt to maintain rx_target buffers in flight, counting
-	 * buffers that we have yet to process in the receive ring.
-	 */
-	batch_target = rxq->target - (req_prod - rxq->ring.rsp_cons);
-	for (i = mbufq_len(&rxq->batch); i < batch_target; i++) {
-		m_new = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MJUMPAGESIZE);
-		if (m_new == NULL) {
-			if (i != 0)
-				goto refill;
-			/* XXX set timer */
-			break;
-		}
-		m_new->m_len = m_new->m_pkthdr.len = MJUMPAGESIZE;
+static void
+xn_alloc_rx_buffers(struct netfront_rxq *rxq)
+{
+	RING_IDX req_prod;
+	int notify;
 
-		/* queue the mbufs allocated */
-		mbufq_enqueue(&rxq->batch, m_new);
-	}
+	XN_RX_LOCK_ASSERT(rxq);
 
-	/*
-	 * If we've allocated at least half of our target number of entries,
-	 * submit them to the backend - we have enough to make the overhead
-	 * of submission worthwhile.  Otherwise wait for more mbufs and
-	 * request entries to become available.
-	 */
-	if (i < (rxq->target/2)) {
-		if (req_prod > rxq->ring.sring->req_prod)
-			goto push;
+	if (__predict_false(rxq->info->carrier == 0))
 		return;
-	}
 
-	/*
-	 * Double floating fill target if we risked having the backend
-	 * run out of empty buffers for receive traffic.  We define "running
-	 * low" as having less than a fourth of our target buffers free
-	 * at the time we refilled the queue.
-	 */
-	if ((req_prod - rxq->ring.sring->rsp_prod) < (rxq->target / 4)) {
-		rxq->target *= 2;
-		if (rxq->target > np->rx_max_target)
-			rxq->target = np->rx_max_target;
-	}
+	for (req_prod = rxq->ring.req_prod_pvt;
+	     req_prod - rxq->ring.rsp_cons < NET_RX_RING_SIZE;
+	     req_prod++) {
+		struct mbuf *m;
+		unsigned short id;
+		grant_ref_t ref;
+		struct netif_rx_request *req;
+		unsigned long pfn;
 
-refill:
-	for (i = 0; ; i++) {
-		if ((m_new = mbufq_dequeue(&rxq->batch)) == NULL)
+		m = xn_alloc_one_rx_buffer(rxq);
+		if (m == NULL)
 			break;
 
-		m_new->m_ext.ext_arg1 = (vm_paddr_t *)(uintptr_t)(
-				vtophys(m_new->m_ext.ext_buf) >> PAGE_SHIFT);
-
-		id = xn_rxidx(req_prod + i);
+		id = xn_rxidx(req_prod);
 
 		KASSERT(rxq->mbufs[id] == NULL, ("non-NULL xn_rx_chain"));
-		rxq->mbufs[id] = m_new;
+		rxq->mbufs[id] = m;
 
 		ref = gnttab_claim_grant_reference(&rxq->gref_head);
 		KASSERT(ref != GNTTAB_LIST_END,
-			("reserved grant references exhuasted"));
+		    ("reserved grant references exhuasted"));
 		rxq->grant_ref[id] = ref;
 
-		vaddr = mtod(m_new, vm_offset_t);
-		pfn = vtophys(vaddr) >> PAGE_SHIFT;
-		req = RING_GET_REQUEST(&rxq->ring, req_prod + i);
+		pfn = atop(vtophys(mtod(m, vm_offset_t)));
+		req = RING_GET_REQUEST(&rxq->ring, req_prod);
 
-		gnttab_grant_foreign_access_ref(ref, otherend_id, pfn, 0);
+		gnttab_grant_foreign_access_ref(ref,
+		    xenbus_get_otherend_id(rxq->info->xbdev), pfn, 0);
 		req->id = id;
 		req->gref = ref;
+	}
 
-		rxq->pfn_array[i] =
-		    vtophys(mtod(m_new,vm_offset_t)) >> PAGE_SHIFT;
+	rxq->ring.req_prod_pvt = req_prod;
+
+	/* Not enough requests? Try again later. */
+	if (req_prod - rxq->ring.rsp_cons < NET_RX_SLOTS_MIN) {
+		callout_reset(&rxq->rx_refill, hz/10, xn_alloc_rx_buffers_callout,
+		    rxq);
+		return;
 	}
 
-	KASSERT(i, ("no mbufs processed")); /* should have returned earlier */
-	KASSERT(mbufq_len(&rxq->batch) == 0, ("not all mbufs processed"));
-	/*
-	 * We may have allocated buffers which have entries outstanding
-	 * in the page * update queue -- make sure we flush those first!
-	 */
-	wmb();
+	wmb();		/* barrier so backend seens requests */
 
-	/* Above is a suitable barrier to ensure backend will see requests. */
-	rxq->ring.req_prod_pvt = req_prod + i;
-push:
 	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&rxq->ring, notify);
 	if (notify)
 		xen_intr_signal(rxq->xen_intr_handle);
 }
 
+static void xn_alloc_rx_buffers_callout(void *arg)
+{
+	struct netfront_rxq *rxq;
+
+	rxq = (struct netfront_rxq *)arg;
+	XN_RX_LOCK(rxq);
+	xn_alloc_rx_buffers(rxq);
+	XN_RX_UNLOCK(rxq);
+}
+
 static void
 xn_release_rx_bufs(struct netfront_rxq *rxq)
 {
@@ -1792,8 +1756,12 @@ xn_ifinit_locked(struct netfront_info *n
 
 	for (i = 0; i < np->num_queues; i++) {
 		rxq = &np->rxq[i];
+		XN_RX_LOCK(rxq);
 		xn_alloc_rx_buffers(rxq);
 		rxq->ring.sring->rsp_event = rxq->ring.rsp_cons + 1;
+		if (RING_HAS_UNCONSUMED_RESPONSES(&rxq->ring))
+			taskqueue_enqueue(rxq->tq, &rxq->intrtask);
+		XN_RX_UNLOCK(rxq);
 	}
 
 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
@@ -2009,7 +1977,9 @@ xn_connect(struct netfront_info *np)
 		XN_TX_LOCK(txq);
 		xn_txeof(txq);
 		XN_TX_UNLOCK(txq);
+		XN_RX_LOCK(rxq);
 		xn_alloc_rx_buffers(rxq);
+		XN_RX_UNLOCK(rxq);
 	}
 
 	return (0);
@@ -2225,9 +2195,6 @@ create_netdev(device_t dev)
 	ifmedia_add(&np->sc_media, IFM_ETHER|IFM_MANUAL, 0, NULL);
 	ifmedia_set(&np->sc_media, IFM_ETHER|IFM_MANUAL);
 
-	np->rx_min_target = RX_MIN_TARGET;
-	np->rx_max_target = RX_MAX_TARGET;
-
 	err = xen_net_read_mac(dev, np->mac);
 	if (err != 0)
 		goto error;