svn commit: r360777 - head/sys/dev/ena

Marcin Wojtas mw at FreeBSD.org
Thu May 7 11:28:41 UTC 2020


Author: mw
Date: Thu May  7 11:28:39 2020
New Revision: 360777
URL: https://svnweb.freebsd.org/changeset/base/360777

Log:
  Optimize ENA Rx refill for low memory conditions
  
  Sometimes, especially when there is not much memory in the system left,
  allocating mbuf jumbo clusters (like 9KB or 16KB) can take a lot of time
  and it is not guaranteed that it'll succeed. In that situation, the
  fallback will work, but if the refill needs to take a place for a lot of
  descriptors at once, the time spent in m_getjcl looking for memory can
  cause system unresponsiveness due to high priority of the Rx task. This
  can also lead to driver reset, because Tx cleanup routine is being
  blocked and timer service could detect that Tx packets aren't cleaned
  up. The reset routine can further create another unresponsiveness - Rx
  rings are being refilled there, so m_getjcl will again burn the CPU.
  This was causing NVMe driver timeouts and resets, because network driver
  is having higher priority.
  
  Instead of 16KB jumbo clusters for the Rx buffers, 9KB clusters are
  enough - ENA MTU is being set to 9K anyway, so it's very unlikely that
  more space than 9KB will be needed.
  
  However, 9KB jumbo clusters can still cause issues, so by default the
  page size mbuf cluster will be used for the Rx descriptors. This can have a
  small (~2%) impact on the throughput of the device, so to restore
  original behavior, one must change sysctl "hw.ena.enable_9k_mbufs" to
  "1" in "/boot/loader.conf" file.
  
  As a part of this patch (important fix), the version of the driver
  was updated to v2.1.2.
  
  Submitted by:   cperciva
  Reviewed by:    Michal Krawczyk <mk at semihalf.com>
  Reviewed by:    Ido Segev <idose at amazon.com>
  Reviewed by:    Guy Tzalik <gtzalik at amazon.com>
  MFC after:      3 days
  PR:             225791, 234838, 235856, 236989, 243531
  Differential Revision: https://reviews.freebsd.org/D24546

Modified:
  head/sys/dev/ena/ena.c
  head/sys/dev/ena/ena.h
  head/sys/dev/ena/ena_sysctl.c
  head/sys/dev/ena/ena_sysctl.h

Modified: head/sys/dev/ena/ena.c
==============================================================================
--- head/sys/dev/ena/ena.c	Thu May  7 10:46:02 2020	(r360776)
+++ head/sys/dev/ena/ena.c	Thu May  7 11:28:39 2020	(r360777)
@@ -368,6 +368,7 @@ ena_init_io_rings_common(struct ena_adapter *adapter, 
 	ring->ena_dev = adapter->ena_dev;
 	ring->first_interrupt = false;
 	ring->no_interrupt_event_cnt = 0;
+	ring->rx_mbuf_sz = ena_mbuf_sz;
 }
 
 static void
@@ -508,9 +509,9 @@ ena_setup_rx_dma_tag(struct ena_adapter *adapter)
 	    ENA_DMA_BIT_MASK(adapter->dma_width), /* lowaddr of excl window  */
 	    BUS_SPACE_MAXADDR, 			  /* highaddr of excl window */
 	    NULL, NULL,				  /* filter, filterarg 	     */
-	    MJUM16BYTES,			  /* maxsize 		     */
+	    ena_mbuf_sz,			  /* maxsize 		     */
 	    adapter->max_rx_sgl_size,		  /* nsegments 		     */
-	    MJUM16BYTES,			  /* maxsegsize 	     */
+	    ena_mbuf_sz,			  /* maxsegsize 	     */
 	    0,					  /* flags 		     */
 	    NULL,				  /* lockfunc 		     */
 	    NULL,				  /* lockarg 		     */
@@ -963,7 +964,8 @@ ena_alloc_rx_mbuf(struct ena_adapter *adapter,
 		return (0);
 
 	/* Get mbuf using UMA allocator */
-	rx_info->mbuf = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MJUM16BYTES);
+	rx_info->mbuf = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR,
+	    rx_ring->rx_mbuf_sz);
 
 	if (unlikely(rx_info->mbuf == NULL)) {
 		counter_u64_add(rx_ring->rx_stats.mjum_alloc_fail, 1);
@@ -974,7 +976,7 @@ ena_alloc_rx_mbuf(struct ena_adapter *adapter,
 		}
 		mlen = MCLBYTES;
 	} else {
-		mlen = MJUM16BYTES;
+		mlen = rx_ring->rx_mbuf_sz;
 	}
 	/* Set mbuf length*/
 	rx_info->mbuf->m_pkthdr.len = rx_info->mbuf->m_len = mlen;

Modified: head/sys/dev/ena/ena.h
==============================================================================
--- head/sys/dev/ena/ena.h	Thu May  7 10:46:02 2020	(r360776)
+++ head/sys/dev/ena/ena.h	Thu May  7 11:28:39 2020	(r360777)
@@ -41,7 +41,7 @@
 
 #define DRV_MODULE_VER_MAJOR	2
 #define DRV_MODULE_VER_MINOR	1
-#define DRV_MODULE_VER_SUBMINOR 1
+#define DRV_MODULE_VER_SUBMINOR 2
 
 #define DRV_MODULE_NAME		"ena"
 
@@ -307,8 +307,13 @@ struct ena_ring {
 
 	/* Determines if device will use LLQ or normal mode for TX */
 	enum ena_admin_placement_policy_type tx_mem_queue_type;
-	/* The maximum length the driver can push to the device (For LLQ) */
-	uint8_t tx_max_header_size;
+	union {
+		/* The maximum length the driver can push to the device (For LLQ) */
+		uint8_t tx_max_header_size;
+		/* The maximum (and default) mbuf size for the Rx descriptor. */
+		uint16_t rx_mbuf_sz;
+
+	};
 
 	bool first_interrupt;
 	uint16_t no_interrupt_event_cnt;

Modified: head/sys/dev/ena/ena_sysctl.c
==============================================================================
--- head/sys/dev/ena/ena_sysctl.c	Thu May  7 10:46:02 2020	(r360776)
+++ head/sys/dev/ena/ena_sysctl.c	Thu May  7 11:28:39 2020	(r360777)
@@ -48,6 +48,17 @@ int ena_log_level = ENA_ALERT | ENA_WARNING;
 SYSCTL_INT(_hw_ena, OID_AUTO, log_level, CTLFLAG_RWTUN,
     &ena_log_level, 0, "Logging level indicating verbosity of the logs");
 
+/*
+ * Use 9k mbufs for the Rx buffers. Default to 0 (use page size mbufs instead).
+ * Using 9k mbufs in low memory conditions might cause allocation to take a lot
+ * of time and lead to the OS instability as it needs to look for the contiguous
+ * pages.
+ * However, page size mbufs has a bit smaller throughput than 9k mbufs, so if
+ * the network performance is the priority, the 9k mbufs can be used.
+ */
+int ena_enable_9k_mbufs = 0;
+SYSCTL_INT(_hw_ena, OID_AUTO, enable_9k_mbufs, CTLFLAG_RDTUN,
+    &ena_enable_9k_mbufs, 0, "Use 9 kB mbufs for Rx descriptors");
 
 void
 ena_sysctl_add_nodes(struct ena_adapter *adapter)

Modified: head/sys/dev/ena/ena_sysctl.h
==============================================================================
--- head/sys/dev/ena/ena_sysctl.h	Thu May  7 10:46:02 2020	(r360776)
+++ head/sys/dev/ena/ena_sysctl.h	Thu May  7 11:28:39 2020	(r360777)
@@ -41,4 +41,7 @@
 
 void	ena_sysctl_add_nodes(struct ena_adapter *);
 
+extern int ena_enable_9k_mbufs;
+#define ena_mbuf_sz (ena_enable_9k_mbufs ? MJUM9BYTES : MJUMPAGESIZE)
+
 #endif /* !(ENA_SYSCTL_H) */


More information about the svn-src-head mailing list