svn commit: r325576 - head/sys/dev/ena

Marcin Wojtas mw at FreeBSD.org
Thu Nov 9 11:48:23 UTC 2017


Author: mw
Date: Thu Nov  9 11:48:22 2017
New Revision: 325576
URL: https://svnweb.freebsd.org/changeset/base/325576

Log:
  Check for Rx ring state to prevent from stall in the ENA driver
  
  In case when Rx ring is full and driver will fail to allocate Rx mbufs,
  the ring could be stalled.
  
  Keep alive is checking every second for Rx ring state, and if it is full
  for two cycles, then trigger rx_cleanup routine in another thread.
  
  Submitted by: Michal Krawczyk <mk at semihalf.com>
  Reviewed by: byenduri_gmail.com
  Obtained from: Semihalf
  Sponsored by: Amazon, Inc.
  Differential Revision: https://reviews.freebsd.org/D12856

Modified:
  head/sys/dev/ena/ena.c
  head/sys/dev/ena/ena.h
  head/sys/dev/ena/ena_sysctl.c

Modified: head/sys/dev/ena/ena.c
==============================================================================
--- head/sys/dev/ena/ena.c	Thu Nov  9 11:47:47 2017	(r325575)
+++ head/sys/dev/ena/ena.c	Thu Nov  9 11:48:22 2017	(r325576)
@@ -122,6 +122,7 @@ static void	ena_destroy_all_rx_queues(struct ena_adapt
 static void	ena_destroy_all_io_queues(struct ena_adapter *);
 static int	ena_create_io_queues(struct ena_adapter *);
 static int	ena_tx_cleanup(struct ena_ring *);
+static void	ena_deferred_rx_cleanup(void *, int);
 static int	ena_rx_cleanup(struct ena_ring *);
 static inline int validate_tx_req_id(struct ena_ring *, uint16_t);
 static void	ena_rx_hash_mbuf(struct ena_ring *, struct ena_com_rx_ctx *,
@@ -471,6 +472,7 @@ ena_init_io_rings(struct ena_adapter *adapter)
 		    device_get_nameunit(adapter->pdev), i);
 
 		mtx_init(&txr->ring_mtx, txr->mtx_name, NULL, MTX_DEF);
+		mtx_init(&rxr->ring_mtx, rxr->mtx_name, NULL, MTX_DEF);
 
 		que = &adapter->que[i];
 		que->adapter = adapter;
@@ -480,6 +482,8 @@ ena_init_io_rings(struct ena_adapter *adapter)
 
 		txr->que = que;
 		rxr->que = que;
+
+		rxr->empty_rx_queue = 0;
 	}
 }
 
@@ -495,6 +499,7 @@ ena_free_io_ring_resources(struct ena_adapter *adapter
 	    sizeof(rxr->rx_stats));
 
 	mtx_destroy(&txr->ring_mtx);
+	mtx_destroy(&rxr->ring_mtx);
 
 	drbr_free(txr->br, M_DEVBUF);
 
@@ -847,6 +852,22 @@ ena_setup_rx_resources(struct ena_adapter *adapter, un
 		}
 	}
 
+	/* Allocate taskqueues */
+	TASK_INIT(&rx_ring->cmpl_task, 0, ena_deferred_rx_cleanup, rx_ring);
+	rx_ring->cmpl_tq = taskqueue_create_fast("ena RX completion", M_WAITOK,
+	    taskqueue_thread_enqueue, &rx_ring->cmpl_tq);
+
+	/* RSS set cpu for thread */
+#ifdef RSS
+	CPU_SETOF(que->cpu, &cpu_mask);
+	taskqueue_start_threads_cpuset(&rx_ring->cmpl_tq, 1, PI_NET, &cpu_mask,
+	    "%s rx_ring cmpl (bucket %d)",
+	    device_get_nameunit(adapter->pdev), que->cpu);
+#else
+	taskqueue_start_threads(&rx_ring->cmpl_tq, 1, PI_NET,
+	    "%s rx_ring cmpl %d", device_get_nameunit(adapter->pdev), que->cpu);
+#endif
+
 	return (0);
 
 err_rx_dma:
@@ -877,6 +898,11 @@ ena_free_rx_resources(struct ena_adapter *adapter, uns
 
 	ena_trace(ENA_INFO, "%s qid %d\n", __func__, qid);
 
+	while (taskqueue_cancel(rx_ring->cmpl_tq, &rx_ring->cmpl_task, NULL) != 0)
+		taskqueue_drain(rx_ring->cmpl_tq, &rx_ring->cmpl_task);
+
+	taskqueue_free(rx_ring->cmpl_tq);
+
 	/* Free buffer DMA maps, */
 	for (int i = 0; i < rx_ring->ring_size; i++) {
 		m_freem(rx_ring->rx_buffer_info[i].mbuf);
@@ -1565,6 +1591,24 @@ ena_rx_checksum(struct ena_ring *rx_ring, struct ena_c
 	return;
 }
 
+static void
+ena_deferred_rx_cleanup(void *arg, int pending)
+{
+	struct ena_ring *rx_ring = arg;
+	int budget = CLEAN_BUDGET;
+
+	ENA_RING_MTX_LOCK(rx_ring);
+	/*
+	 * If deferred task was executed, perform cleanup of all awaiting
+	 * descs (or until given budget is depleted to avoid infinite loop).
+	 */
+	while (budget--) {
+		if (ena_rx_cleanup(rx_ring) == 0)
+			break;
+	}
+	ENA_RING_MTX_UNLOCK(rx_ring);
+}
+
 /**
  * ena_rx_cleanup - handle rx irq
  * @arg: ring for which irq is being handled
@@ -1736,7 +1780,17 @@ ena_handle_msix(void *arg)
 	io_cq = &adapter->ena_dev->io_cq_queues[ena_qid];
 
 	for (i = 0; i < CLEAN_BUDGET; ++i) {
-		rxc = ena_rx_cleanup(rx_ring);
+		/*
+		 * If lock cannot be acquired, then deferred cleanup task was
+		 * being executed and rx ring is being cleaned up in
+		 * another thread.
+		 */
+		if (ENA_RING_MTX_TRYLOCK(rx_ring)) {
+			rxc = ena_rx_cleanup(rx_ring);
+			ENA_RING_MTX_UNLOCK(rx_ring);
+		} else {
+			rxc = 0;
+		}
 
 		/* Protection from calling ena_tx_cleanup from ena_start_xmit */
 		ENA_RING_MTX_LOCK(tx_ring);
@@ -3374,7 +3428,54 @@ static void check_for_missing_tx_completions(struct en
 	adapter->next_monitored_tx_qid = i % adapter->num_queues;
 }
 
+/* trigger deferred rx cleanup after 2 consecutive detections */
+#define EMPTY_RX_REFILL 2
+/* For the rare case where the device runs out of Rx descriptors and the
+ * msix handler failed to refill new Rx descriptors (due to a lack of memory
+ * for example).
+ * This case will lead to a deadlock:
+ * The device won't send interrupts since all the new Rx packets will be dropped
+ * The msix handler won't allocate new Rx descriptors so the device won't be
+ * able to send new packets.
+ *
+ * When such a situation is detected - execute rx cleanup task in another thread
+ */
+static void
+check_for_empty_rx_ring(struct ena_adapter *adapter)
+{
+	struct ena_ring *rx_ring;
+	int i, refill_required;
 
+	if (!adapter->up)
+		return;
+
+	if (adapter->trigger_reset)
+		return;
+
+	for (i = 0; i < adapter->num_queues; i++) {
+		rx_ring = &adapter->rx_ring[i];
+
+		refill_required = ena_com_free_desc(rx_ring->ena_com_io_sq);
+		if (unlikely(refill_required == (rx_ring->ring_size - 1))) {
+			rx_ring->empty_rx_queue++;
+
+			if (rx_ring->empty_rx_queue >= EMPTY_RX_REFILL)	{
+				counter_u64_add(rx_ring->rx_stats.empty_rx_ring,
+				    1);
+
+				device_printf(adapter->pdev,
+				    "trigger refill for ring %d\n", i);
+
+				taskqueue_enqueue(rx_ring->cmpl_tq,
+				    &rx_ring->cmpl_task);
+				rx_ring->empty_rx_queue = 0;
+			}
+		} else {
+			rx_ring->empty_rx_queue = 0;
+		}
+	}
+}
+
 static void
 ena_timer_service(void *data)
 {
@@ -3387,6 +3488,8 @@ ena_timer_service(void *data)
 	check_for_admin_com_state(adapter);
 
 	check_for_missing_tx_completions(adapter);
+
+	check_for_empty_rx_ring(adapter);
 
 	if (host_info)
 		ena_update_host_info(host_info, adapter->ifp);

Modified: head/sys/dev/ena/ena.h
==============================================================================
--- head/sys/dev/ena/ena.h	Thu Nov  9 11:47:47 2017	(r325575)
+++ head/sys/dev/ena/ena.h	Thu Nov  9 11:48:22 2017	(r325576)
@@ -284,15 +284,23 @@ struct ena_ring {
 	struct buf_ring *br; /* only for TX */
 	struct mtx ring_mtx;
 	char mtx_name[16];
-	struct task enqueue_task;
-	struct taskqueue *enqueue_tq;
-	struct task cmpl_task;
-	struct taskqueue *cmpl_tq;
+	union {
+		struct {
+			struct task enqueue_task;
+			struct taskqueue *enqueue_tq;
+		};
+		struct {
+			struct task cmpl_task;
+			struct taskqueue *cmpl_tq;
+		};
+	};
 
 	union {
 		struct ena_stats_tx tx_stats;
 		struct ena_stats_rx rx_stats;
 	};
+
+	int empty_rx_queue;
 
 } __aligned(CACHE_LINE_SIZE);
 

Modified: head/sys/dev/ena/ena_sysctl.c
==============================================================================
--- head/sys/dev/ena/ena_sysctl.c	Thu Nov  9 11:47:47 2017	(r325575)
+++ head/sys/dev/ena/ena_sysctl.c	Thu Nov  9 11:48:22 2017	(r325576)
@@ -200,6 +200,9 @@ ena_sysctl_add_stats(struct ena_adapter *adapter)
 		SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
 		    "bad_req_id", CTLFLAG_RD,
 		    &rx_stats->bad_req_id, "Bad request id count");
+		SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
+		    "empty_rx_ring", CTLFLAG_RD,
+		    &rx_stats->empty_rx_ring, "RX descriptors depletion count");
 	}
 
 	/* Stats read from device */


More information about the svn-src-head mailing list