git: 6d1ef2abd330 - main - ena: Implement full RSS reconfiguration

Marcin Wojtas mw at FreeBSD.org
Wed Sep 1 23:07:43 UTC 2021


The branch main has been updated by mw:

URL: https://cgit.FreeBSD.org/src/commit/?id=6d1ef2abd330fac4057f092abbbdc28a568b4327

commit 6d1ef2abd330fac4057f092abbbdc28a568b4327
Author:     Artur Rojek <ar at semihalf.com>
AuthorDate: 2021-08-12 08:34:29 +0000
Commit:     Marcin Wojtas <mw at FreeBSD.org>
CommitDate: 2021-09-01 23:06:53 +0000

    ena: Implement full RSS reconfiguration
    
    Bind RX/TX queues and MSI-X vectors to matching CPUs based on the RSS
    bucket entries.
    
    Introduce sysctls for the following RSS functionality:
    - rss.indir_table:      indirection table mapping
    - rss.indir_table_size: indirection table size
    - rss.key:              RSS hash key (if Toeplitz used)
    
    Said sysctls are only available when compiled without `option RSS`, as
    kernel-side RSS support currently doesn't offer RSS reconfiguration.
    
    Migrate the hash algorithm from CRC32 to Toeplitz and change the initial
    hash value to 0x0 in order to match the standard Toeplitz implementation.
    Provide helpers for hash key inversion required for HW operations.
    
    Obtained from: Semihalf
    MFC after: 2 weeks
    Sponsored by: Amazon, Inc.
---
 share/man/man4/ena.4       |  51 +++++++++
 sys/dev/ena/ena.c          |  62 ++++++++---
 sys/dev/ena/ena.h          |  10 +-
 sys/dev/ena/ena_datapath.c |  14 ++-
 sys/dev/ena/ena_rss.c      | 128 ++++++++++++++++++++++-
 sys/dev/ena/ena_rss.h      |  24 +++++
 sys/dev/ena/ena_sysctl.c   | 256 +++++++++++++++++++++++++++++++++++++++++++++
 7 files changed, 525 insertions(+), 20 deletions(-)

diff --git a/share/man/man4/ena.4 b/share/man/man4/ena.4
index cd98fe2c84ba..aacf7956c9f8 100644
--- a/share/man/man4/ena.4
+++ b/share/man/man4/ena.4
@@ -269,6 +269,57 @@ command should be used:
 .Bd -literal -offset indent
 sysctl dev.ena.1.eni_metrics.sample_interval=10
 .Ed
+.It Va dev.ena.X.rss.indir_table_size
+RSS indirection table size.
+The default is 128.
+Returns the number of entries in the RSS indirection table.
+.Pp
+Example:
+To read the RSS indirection table size, the following command should be used:
+.Bd -literal -offset indent
+sysctl dev.ena.0.rss.indir_table_size
+.Ed
+.It Va dev.ena.X.rss.indir_table
+RSS indirection table mapping.
+The default is x:y key-pairs of indir_table_size length.
+Updates selected indices of the RSS indirection table.
+.Pp
+The entry string consists of one or more x:y keypairs, where x stands for
+the table index and y for its new value. Table indices that don't need to be
+updated can be omitted from the string and will retain their existing values.
+.Pp
+If an index is entered more than once, the last value is used.
+.Pp
+Example:
+To update two selected indices in the RSS indirection table, e.g. setting index
+0 to queue 5 and then index 5 to queue 0, the following command should be used:
+.Bd -literal -offset indent
+sysctl dev.ena.0.rss.indir_table="0:5 5:0"
+.Ed
+.It Va dev.ena.X.rss.key
+RSS hash key.
+The default is 40 bytes long randomly generated hash key.
+Controls the RSS Toeplitz hash algorithm key value.
+.Pp
+Only available when driver compiled without the kernel side RSS support.
+.Pp
+Example:
+To change the RSS hash key value to
+.Pp
+0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
+.br
+0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
+.br
+0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
+.br
+0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
+.br
+0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa
+.Pp
+the following command should be used:
+.Bd -literal -offset indent
+sysctl dev.ena.0.rss.key=6d5a56da255b0ec24167253d43a38fb0d0ca2bcbae7b30b477cb2da38030f20c6a42b73bbeac01fa
+.Ed
 .El
 .Sh DIAGNOSTICS
 .Ss Device initialization phase
diff --git a/sys/dev/ena/ena.c b/sys/dev/ena/ena.c
index 91ddc7733b9f..84ef234cd937 100644
--- a/sys/dev/ena/ena.c
+++ b/sys/dev/ena/ena.c
@@ -601,8 +601,10 @@ static int
 ena_setup_tx_resources(struct ena_adapter *adapter, int qid)
 {
 	device_t pdev = adapter->pdev;
+	char thread_name[MAXCOMLEN + 1];
 	struct ena_que *que = &adapter->que[qid];
 	struct ena_ring *tx_ring = que->tx_ring;
+	cpuset_t *cpu_mask = NULL;
 	int size, i, err;
 #ifdef DEV_NETMAP
 	bus_dmamap_t *map;
@@ -686,8 +688,16 @@ ena_setup_tx_resources(struct ena_adapter *adapter, int qid)
 
 	tx_ring->running = true;
 
-	taskqueue_start_threads(&tx_ring->enqueue_tq, 1, PI_NET,
-	    "%s txeq %d", device_get_nameunit(adapter->pdev), que->cpu);
+#ifdef RSS
+	cpu_mask = &que->cpu_mask;
+	snprintf(thread_name, sizeof(thread_name), "%s txeq %d",
+	    device_get_nameunit(adapter->pdev), que->cpu);
+#else
+	snprintf(thread_name, sizeof(thread_name), "%s txeq %d",
+	    device_get_nameunit(adapter->pdev), que->id);
+#endif
+	taskqueue_start_threads_cpuset(&tx_ring->enqueue_tq, 1, PI_NET,
+	    cpu_mask, "%s", thread_name);
 
 	return (0);
 
@@ -1423,6 +1433,7 @@ ena_create_io_queues(struct ena_adapter *adapter)
 	struct ena_que *queue;
 	uint16_t ena_qid;
 	uint32_t msix_vector;
+	cpuset_t *cpu_mask = NULL;
 	int rc, i;
 
 	/* Create TX queues */
@@ -1489,7 +1500,11 @@ ena_create_io_queues(struct ena_adapter *adapter)
 		queue->cleanup_tq = taskqueue_create_fast("ena cleanup",
 		    M_WAITOK, taskqueue_thread_enqueue, &queue->cleanup_tq);
 
-		taskqueue_start_threads(&queue->cleanup_tq, 1, PI_NET,
+#ifdef RSS
+		cpu_mask = &queue->cpu_mask;
+#endif
+		taskqueue_start_threads_cpuset(&queue->cleanup_tq, 1, PI_NET,
+		    cpu_mask,
 		    "%s queue %d cleanup",
 		    device_get_nameunit(adapter->pdev), i);
 	}
@@ -1628,7 +1643,10 @@ ena_setup_mgmnt_intr(struct ena_adapter *adapter)
 static int
 ena_setup_io_intr(struct ena_adapter *adapter)
 {
-	static int last_bind_cpu = -1;
+#ifdef RSS
+	int num_buckets = rss_getnumbuckets();
+	static int last_bind = 0;
+#endif
 	int irq_idx;
 
 	if (adapter->msix_entries == NULL)
@@ -1646,15 +1664,12 @@ ena_setup_io_intr(struct ena_adapter *adapter)
 		ena_log(adapter->pdev, DBG, "ena_setup_io_intr vector: %d\n",
 		    adapter->msix_entries[irq_idx].vector);
 
-		/*
-		 * We want to bind rings to the corresponding cpu
-		 * using something similar to the RSS round-robin technique.
-		 */
-		if (unlikely(last_bind_cpu < 0))
-			last_bind_cpu = CPU_FIRST();
+#ifdef RSS
 		adapter->que[i].cpu = adapter->irq_tbl[irq_idx].cpu =
-		    last_bind_cpu;
-		last_bind_cpu = CPU_NEXT(last_bind_cpu);
+		    rss_getcpu(last_bind);
+		last_bind = (last_bind + 1) % num_buckets;
+		CPU_SETOF(adapter->que[i].cpu, &adapter->que[i].cpu_mask);
+#endif
 	}
 
 	return (0);
@@ -1746,6 +1761,19 @@ ena_request_io_irq(struct ena_adapter *adapter)
 			goto err;
 		}
 		irq->requested = true;
+
+#ifdef RSS
+		rc = bus_bind_intr(adapter->pdev, irq->res, irq->cpu);
+		if (unlikely(rc != 0)) {
+			ena_log(pdev, ERR, "failed to bind "
+			    "interrupt handler for irq %ju to cpu %d: %d\n",
+			    rman_get_start(irq->res), irq->cpu, rc);
+			goto err;
+		}
+
+		ena_log(pdev, INFO, "queue %d - cpu %d\n",
+		    i - ENA_IO_IRQ_FIRST_IDX, irq->cpu);
+#endif
 	}
 
 	return (rc);
@@ -2464,6 +2492,10 @@ ena_calc_max_io_queue_num(device_t pdev, struct ena_com_dev *ena_dev,
 	/* 1 IRQ for for mgmnt and 1 IRQ for each TX/RX pair */
 	max_num_io_queues = min_t(uint32_t, max_num_io_queues,
 	    pci_msix_count(pdev) - 1);
+#ifdef RSS
+	max_num_io_queues = min_t(uint32_t, max_num_io_queues,
+	    rss_getnumbuckets());
+#endif
 
 	return (max_num_io_queues);
 }
@@ -2692,7 +2724,8 @@ ena_config_host_info(struct ena_com_dev *ena_dev, device_t dev)
 		(DRV_MODULE_VER_SUBMINOR << ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT);
 	host_info->num_cpus = mp_ncpus;
 	host_info->driver_supported_features =
-	    ENA_ADMIN_HOST_INFO_RX_OFFSET_MASK;
+	    ENA_ADMIN_HOST_INFO_RX_OFFSET_MASK |
+	    ENA_ADMIN_HOST_INFO_RSS_CONFIGURABLE_FUNCTION_KEY_MASK;
 
 	rc = ena_com_set_host_attributes(ena_dev);
 	if (unlikely(rc != 0)) {
@@ -3709,6 +3742,9 @@ ena_detach(device_t pdev)
 
 	ena_free_pci_resources(adapter);
 
+	if (adapter->rss_indir != NULL)
+		free(adapter->rss_indir, M_DEVBUF);
+
 	if (likely(ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter)))
 		ena_com_rss_destroy(ena_dev);
 
diff --git a/sys/dev/ena/ena.h b/sys/dev/ena/ena.h
index bc0f59a6f711..50467ffe0707 100644
--- a/sys/dev/ena/ena.h
+++ b/sys/dev/ena/ena.h
@@ -34,6 +34,7 @@
 #ifndef ENA_H
 #define ENA_H
 
+#include "opt_rss.h"
 
 #include "ena-com/ena_com.h"
 #include "ena-com/ena_eth_com.h"
@@ -122,6 +123,8 @@
 
 #define	ENA_IO_TXQ_IDX(q)		(2 * (q))
 #define	ENA_IO_RXQ_IDX(q)		(2 * (q) + 1)
+#define	ENA_IO_TXQ_IDX_TO_COMBINED_IDX(q)	((q) / 2)
+#define	ENA_IO_RXQ_IDX_TO_COMBINED_IDX(q)	(((q) - 1) / 2)
 
 #define	ENA_MGMNT_IRQ_IDX		0
 #define	ENA_IO_IRQ_FIRST_IDX		1
@@ -200,7 +203,9 @@ struct ena_irq {
 	void *cookie;
 	unsigned int vector;
 	bool requested;
+#ifdef RSS
 	int cpu;
+#endif
 	char name[ENA_IRQNAME_SIZE];
 };
 
@@ -213,7 +218,10 @@ struct ena_que {
 	struct taskqueue *cleanup_tq;
 
 	uint32_t id;
+#ifdef RSS
 	int cpu;
+	cpuset_t cpu_mask;
+#endif
 	struct sysctl_oid *oid;
 };
 
@@ -431,7 +439,7 @@ struct ena_adapter {
 	uint32_t buf_ring_size;
 
 	/* RSS*/
-	uint8_t	rss_ind_tbl[ENA_RX_RSS_TABLE_SIZE];
+	struct ena_indir *rss_indir;
 
 	uint8_t mac_addr[ETHER_ADDR_LEN];
 	/* mdio and phy*/
diff --git a/sys/dev/ena/ena_datapath.c b/sys/dev/ena/ena_datapath.c
index e1993a714fa0..0e6a6fe82038 100644
--- a/sys/dev/ena/ena_datapath.c
+++ b/sys/dev/ena/ena_datapath.c
@@ -36,6 +36,9 @@ __FBSDID("$FreeBSD$");
 #ifdef DEV_NETMAP
 #include "ena_netmap.h"
 #endif /* DEV_NETMAP */
+#ifdef RSS
+#include <net/rss_config.h>
+#endif /* RSS */
 
 /*********************************************************************
  *  Static functions prototypes
@@ -129,6 +132,9 @@ ena_mq_start(if_t ifp, struct mbuf *m)
 	struct ena_ring *tx_ring;
 	int ret, is_drbr_empty;
 	uint32_t i;
+#ifdef RSS
+	uint32_t bucket_id;
+#endif
 
 	if (unlikely((if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING) == 0))
 		return (ENODEV);
@@ -140,7 +146,13 @@ ena_mq_start(if_t ifp, struct mbuf *m)
 	 * It should improve performance.
 	 */
 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
-		i = m->m_pkthdr.flowid % adapter->num_io_queues;
+#ifdef RSS
+		if (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m),
+		    &bucket_id) == 0)
+			i = bucket_id % adapter->num_io_queues;
+		else
+#endif
+			i = m->m_pkthdr.flowid % adapter->num_io_queues;
 	} else {
 		i = curcpu % adapter->num_io_queues;
 	}
diff --git a/sys/dev/ena/ena_rss.c b/sys/dev/ena/ena_rss.c
index f314905559d2..116eaa425b01 100644
--- a/sys/dev/ena/ena_rss.c
+++ b/sys/dev/ena/ena_rss.c
@@ -59,6 +59,45 @@ ena_rss_key_fill(void *key, size_t size)
 	memcpy(key, default_key, size);
 }
 
+/*
+ * ENA HW expects the key to be in reverse-byte order.
+ */
+static void
+ena_rss_reorder_hash_key(u8 *reordered_key, const u8 *key, size_t key_size)
+{
+	int i;
+
+	key = key + key_size - 1;
+
+	for (i = 0; i < key_size; ++i)
+		*reordered_key++ = *key--;
+}
+
+int ena_rss_set_hash(struct ena_com_dev *ena_dev, const u8 *key)
+{
+	enum ena_admin_hash_functions ena_func = ENA_ADMIN_TOEPLITZ;
+	u8 hw_key[ENA_HASH_KEY_SIZE];
+
+	ena_rss_reorder_hash_key(hw_key, key, ENA_HASH_KEY_SIZE);
+
+	return (ena_com_fill_hash_function(ena_dev, ena_func, hw_key,
+	    ENA_HASH_KEY_SIZE, 0x0));
+}
+
+int ena_rss_get_hash_key(struct ena_com_dev *ena_dev, u8 *key)
+{
+	u8 hw_key[ENA_HASH_KEY_SIZE];
+	int rc;
+
+	rc = ena_com_get_hash_key(ena_dev, hw_key);
+	if (rc != 0)
+		return rc;
+
+	ena_rss_reorder_hash_key(key, hw_key, ENA_HASH_KEY_SIZE);
+
+	return (0);
+}
+
 static int
 ena_rss_init_default(struct ena_adapter *adapter)
 {
@@ -73,7 +112,11 @@ ena_rss_init_default(struct ena_adapter *adapter)
 	}
 
 	for (i = 0; i < ENA_RX_RSS_TABLE_SIZE; i++) {
+#ifdef RSS
+		qid = rss_get_indirection_to_bucket(i) % adapter->num_io_queues;
+#else
 		qid = i % adapter->num_io_queues;
+#endif
 		rc = ena_com_indirect_table_fill_entry(ena_dev, i,
 		    ENA_IO_RXQ_IDX(qid));
 		if (unlikely((rc != 0) && (rc != EOPNOTSUPP))) {
@@ -89,12 +132,11 @@ ena_rss_init_default(struct ena_adapter *adapter)
 		uint8_t hash_key[RSS_KEYSIZE];
 
 		rss_getkey(hash_key);
-		rc = ena_com_fill_hash_function(ena_dev, ENA_ADMIN_TOEPLITZ,
-		    hash_key, RSS_KEYSIZE, 0xFFFFFFFF);
+		rc = ena_rss_set_hash(ena_dev, hash_key);
 	} else
 #endif
-	rc = ena_com_fill_hash_function(ena_dev, ENA_ADMIN_CRC32, NULL,
-	    ENA_HASH_KEY_SIZE, 0xFFFFFFFF);
+	rc = ena_com_fill_hash_function(ena_dev, ENA_ADMIN_TOEPLITZ, NULL,
+	    ENA_HASH_KEY_SIZE, 0x0);
 	if (unlikely((rc != 0) && (rc != EOPNOTSUPP))) {
 		ena_log(dev, ERR, "Cannot fill hash function\n");
 		goto err_rss_destroy;
@@ -106,7 +148,9 @@ ena_rss_init_default(struct ena_adapter *adapter)
 		goto err_rss_destroy;
 	}
 
-	return (0);
+	rc = ena_rss_indir_init(adapter);
+
+	return (rc == EOPNOTSUPP ? 0 : rc);
 
 err_rss_destroy:
 	ena_com_rss_destroy(ena_dev);
@@ -180,3 +224,77 @@ ena_rss_init_default_deferred(void *arg)
 	}
 }
 SYSINIT(ena_rss_init, SI_SUB_KICK_SCHEDULER, SI_ORDER_SECOND, ena_rss_init_default_deferred, NULL);
+
+int
+ena_rss_indir_get(struct ena_adapter *adapter, uint32_t *table)
+{
+	int rc, i;
+
+	rc = ena_com_indirect_table_get(adapter->ena_dev, table);
+	if (rc != 0) {
+		if (rc == EOPNOTSUPP)
+			device_printf(adapter->pdev,
+			    "Reading from indirection table not supported\n");
+		else
+			device_printf(adapter->pdev,
+			    "Unable to get indirection table\n");
+		return (rc);
+	}
+
+	for (i = 0; i < ENA_RX_RSS_TABLE_SIZE; ++i)
+		table[i] = ENA_IO_RXQ_IDX_TO_COMBINED_IDX(table[i]);
+
+	return (0);
+}
+
+int
+ena_rss_indir_set(struct ena_adapter *adapter, uint32_t *table)
+{
+	int rc, i;
+
+	for (i = 0; i < ENA_RX_RSS_TABLE_SIZE; ++i) {
+		rc = ena_com_indirect_table_fill_entry(adapter->ena_dev, i,
+		    ENA_IO_RXQ_IDX(table[i]));
+		if (rc != 0) {
+			device_printf(adapter->pdev,
+			    "Cannot fill indirection table entry %d\n", i);
+			return (rc);
+		}
+	}
+
+	rc = ena_com_indirect_table_set(adapter->ena_dev);
+	if (rc == EOPNOTSUPP)
+		device_printf(adapter->pdev,
+		    "Writing to indirection table not supported\n");
+	else if (rc != 0)
+		device_printf(adapter->pdev,
+		    "Cannot set indirection table\n");
+
+	return (rc);
+}
+
+int
+ena_rss_indir_init(struct ena_adapter *adapter)
+{
+	struct ena_indir *indir = adapter->rss_indir;
+	int rc;
+
+	if (indir == NULL) {
+		adapter->rss_indir = indir = malloc(sizeof(struct ena_indir),
+		    M_DEVBUF, M_WAITOK | M_ZERO);
+		if (indir == NULL)
+			return (ENOMEM);
+	}
+
+	rc = ena_rss_indir_get(adapter, indir->table);
+	if (rc != 0) {
+		free(adapter->rss_indir, M_DEVBUF);
+		adapter->rss_indir = NULL;
+
+		return (rc);
+	}
+
+	ena_rss_copy_indir_buf(indir->sysctl_buf, indir->table);
+
+	return (0);
+}
diff --git a/sys/dev/ena/ena_rss.h b/sys/dev/ena/ena_rss.h
index 14e686a5c045..42bec6fb2aa6 100644
--- a/sys/dev/ena/ena_rss.h
+++ b/sys/dev/ena/ena_rss.h
@@ -44,6 +44,30 @@
 
 #include "ena.h"
 
+#define ENA_RX_RSS_MSG_RECORD_SZ	8
+
+struct ena_indir {
+	uint32_t table[ENA_RX_RSS_TABLE_SIZE];
+	/* This is the buffer wired to `rss.indir_table` sysctl. */
+	char sysctl_buf[ENA_RX_RSS_TABLE_SIZE * ENA_RX_RSS_MSG_RECORD_SZ];
+};
+
+int	ena_rss_set_hash(struct ena_com_dev *ena_dev, const u8 *key);
+int	ena_rss_get_hash_key(struct ena_com_dev *ena_dev, u8 *key);
 int	ena_rss_configure(struct ena_adapter *);
+int	ena_rss_indir_get(struct ena_adapter *adapter, uint32_t *table);
+int	ena_rss_indir_set(struct ena_adapter *adapter, uint32_t *table);
+int	ena_rss_indir_init(struct ena_adapter *adapter);
+
+static inline void
+ena_rss_copy_indir_buf(char *buf, uint32_t *table)
+{
+	int i;
+
+	for (i = 0; i < ENA_RX_RSS_TABLE_SIZE; ++i) {
+		buf += snprintf(buf, ENA_RX_RSS_MSG_RECORD_SZ + 1,
+		    "%s%d:%d", i == 0 ? "" : " ", i, table[i]);
+	}
+}
 
 #endif /* !(ENA_RSS_H) */
diff --git a/sys/dev/ena/ena_sysctl.c b/sys/dev/ena/ena_sysctl.c
index 91bd0f74b341..db3eb69cd369 100644
--- a/sys/dev/ena/ena_sysctl.c
+++ b/sys/dev/ena/ena_sysctl.c
@@ -31,19 +31,31 @@
 #include <sys/param.h>
 __FBSDID("$FreeBSD$");
 
+#include "opt_rss.h"
+
 #include "ena_sysctl.h"
+#include "ena_rss.h"
 
 static void	ena_sysctl_add_wd(struct ena_adapter *);
 static void	ena_sysctl_add_stats(struct ena_adapter *);
 static void	ena_sysctl_add_eni_metrics(struct ena_adapter *);
 static void	ena_sysctl_add_tuneables(struct ena_adapter *);
+/* Kernel option RSS prevents manipulation of key hash and indirection table. */
+#ifndef RSS
+static void	ena_sysctl_add_rss(struct ena_adapter *);
+#endif
 static int	ena_sysctl_buf_ring_size(SYSCTL_HANDLER_ARGS);
 static int	ena_sysctl_rx_queue_size(SYSCTL_HANDLER_ARGS);
 static int	ena_sysctl_io_queues_nb(SYSCTL_HANDLER_ARGS);
 static int	ena_sysctl_eni_metrics_interval(SYSCTL_HANDLER_ARGS);
+#ifndef RSS
+static int	ena_sysctl_rss_key(SYSCTL_HANDLER_ARGS);
+static int	ena_sysctl_rss_indir_table(SYSCTL_HANDLER_ARGS);
+#endif
 
 /* Limit max ENI sample rate to be an hour. */
 #define ENI_METRICS_MAX_SAMPLE_INTERVAL 3600
+#define ENA_HASH_KEY_MSG_SIZE		(ENA_HASH_KEY_SIZE * 2 + 1)
 
 static SYSCTL_NODE(_hw, OID_AUTO, ena, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
     "ENA driver parameters");
@@ -83,6 +95,8 @@ SYSCTL_BOOL(_hw_ena, OID_AUTO, force_large_llq_header, CTLFLAG_RDTUN,
     &ena_force_large_llq_header, 0,
     "Increases maximum supported header size in LLQ mode to 224 bytes, while reducing the maximum Tx queue size by half.\n");
 
+int ena_rss_table_size = ENA_RX_RSS_TABLE_SIZE;
+
 void
 ena_sysctl_add_nodes(struct ena_adapter *adapter)
 {
@@ -90,6 +104,9 @@ ena_sysctl_add_nodes(struct ena_adapter *adapter)
 	ena_sysctl_add_stats(adapter);
 	ena_sysctl_add_eni_metrics(adapter);
 	ena_sysctl_add_tuneables(adapter);
+#ifndef RSS
+	ena_sysctl_add_rss(adapter);
+#endif
 }
 
 static void
@@ -405,6 +422,45 @@ ena_sysctl_add_tuneables(struct ena_adapter *adapter)
 	    ena_sysctl_io_queues_nb, "I", "Number of IO queues.");
 }
 
+/* Kernel option RSS prevents manipulation of key hash and indirection table. */
+#ifndef RSS
+static void
+ena_sysctl_add_rss(struct ena_adapter *adapter)
+{
+	device_t dev;
+
+	struct sysctl_ctx_list *ctx;
+	struct sysctl_oid *tree;
+	struct sysctl_oid_list *child;
+
+	dev = adapter->pdev;
+
+	ctx = device_get_sysctl_ctx(dev);
+	tree = device_get_sysctl_tree(dev);
+	child = SYSCTL_CHILDREN(tree);
+
+	/* RSS options */
+	tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "rss",
+	    CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, "Receive Side Scaling options.");
+	child = SYSCTL_CHILDREN(tree);
+
+	/* RSS hash key */
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "key",
+	    CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
+	    ena_sysctl_rss_key, "A", "RSS key.");
+
+	/* Tuneable RSS indirection table */
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "indir_table",
+	    CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
+	    ena_sysctl_rss_indir_table, "A", "RSS indirection table.");
+
+	/* RSS indirection table size */
+	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "indir_table_size",
+	    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, &ena_rss_table_size, 0,
+	    "RSS indirection table size.");
+}
+#endif /* RSS */
+
 
 /*
  * ena_sysctl_update_queue_node_nb - Register/unregister sysctl queue nodes.
@@ -662,3 +718,203 @@ unlock:
 
 	return (0);
 }
+
+#ifndef RSS
+/*
+ * Change the Receive Side Scaling hash key.
+ */
+static int
+ena_sysctl_rss_key(SYSCTL_HANDLER_ARGS)
+{
+	struct ena_adapter *adapter = arg1;
+	struct ena_com_dev *ena_dev = adapter->ena_dev;
+	enum ena_admin_hash_functions ena_func;
+	char msg[ENA_HASH_KEY_MSG_SIZE];
+	char elem[3] = { 0 };
+	char *endp;
+	u8 rss_key[ENA_HASH_KEY_SIZE];
+	int error, i;
+
+	ENA_LOCK_LOCK();
+	if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
+		error = EINVAL;
+		goto unlock;
+	}
+
+	if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) {
+		error = ENOTSUP;
+		goto unlock;
+	}
+
+	error = sysctl_wire_old_buffer(req, sizeof(msg));
+	if (error != 0)
+		goto unlock;
+
+	error = ena_com_get_hash_function(adapter->ena_dev, &ena_func);
+	if (error != 0) {
+		device_printf(adapter->pdev, "Cannot get hash function\n");
+		goto unlock;
+	}
+
+	if (ena_func != ENA_ADMIN_TOEPLITZ) {
+		error = EINVAL;
+		device_printf(adapter->pdev, "Unsupported hash algorithm\n");
+		goto unlock;
+	}
+
+	error = ena_rss_get_hash_key(ena_dev, rss_key);
+	if (error != 0) {
+		device_printf(adapter->pdev, "Cannot get hash key\n");
+		goto unlock;
+	}
+
+	for (i = 0; i < ENA_HASH_KEY_SIZE; ++i)
+		snprintf(&msg[i * 2], 3, "%02x", rss_key[i]);
+
+	error = sysctl_handle_string(oidp, msg, sizeof(msg), req);
+	if (error != 0 || req->newptr == NULL)
+		goto unlock;
+
+	if (strlen(msg) != sizeof(msg) - 1) {
+		error = EINVAL;
+		device_printf(adapter->pdev, "Invalid key size\n");
+		goto unlock;
+	}
+
+	for (i = 0; i < ENA_HASH_KEY_SIZE; ++i) {
+		strncpy(elem, &msg[i * 2], 2);
+		rss_key[i] = strtol(elem, &endp, 16);
+
+		/* Both hex nibbles in the string must be valid to continue. */
+		if (endp == elem || *endp != '\0' || rss_key[i] < 0) {
+			error = EINVAL;
+			device_printf(adapter->pdev,
+			    "Invalid key hex value: '%c'\n", *endp);
+			goto unlock;
+		}
+	}
+
+	error = ena_rss_set_hash(ena_dev, rss_key);
+	if (error != 0)
+		device_printf(adapter->pdev, "Cannot fill hash key\n");
+
+unlock:
+	ENA_LOCK_UNLOCK();
+
+	return (error);
+}
+
+/*
+ * Change the Receive Side Scaling indirection table.
+ *
+ * The sysctl entry string consists of one or more `x:y` keypairs, where
+ * x stands for the table index and y for its new value.
+ * Table indices that don't need to be updated can be omitted from the string
+ * and will retain their existing values. If an index is entered more than once,
+ * the last value is used.
+ *
+ * Example:
+ * To update two selected indices in the RSS indirection table, e.g. setting
+ * index 0 to queue 5 and then index 5 to queue 0, the below command should be
+ * used:
+ *   sysctl dev.ena.0.rss.indir_table="0:5 5:0"
+ */
+static int
+ena_sysctl_rss_indir_table(SYSCTL_HANDLER_ARGS)
+{
+	int num_queues, error;
+	struct ena_adapter *adapter = arg1;
+	struct ena_com_dev *ena_dev;
+	struct ena_indir *indir;
+	char *msg, *buf, *endp;
+	uint32_t idx, value;
+
+	ENA_LOCK_LOCK();
+	if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
+		error = EINVAL;
+		goto unlock;
+	}
+
+	if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) {
+		error = ENOTSUP;
+		goto unlock;
+	}
+
+	ena_dev = adapter->ena_dev;
+	indir = adapter->rss_indir;
+	msg = indir->sysctl_buf;
+
+	if (unlikely(indir == NULL)) {
+		error = ENOTSUP;
+		goto unlock;
+	}
+
+	error = sysctl_handle_string(oidp, msg, sizeof(indir->sysctl_buf), req);
+	if (error != 0 || req->newptr == NULL)
+		goto unlock;
+
+	num_queues = adapter->num_io_queues;
+
+	/*
+	 * This sysctl expects msg to be a list of `x:y` record pairs,
+	 * where x is the indirection table index and y is its value.
+	 */
+	for (buf = msg; *buf != '\0'; buf = endp) {
+		idx = strtol(buf, &endp, 10);
+
+		if (endp == buf || idx < 0) {
+			device_printf(adapter->pdev, "Invalid index: %s\n",
+			    buf);
+			error = EINVAL;
+			break;
+		}
+
+		if (idx >= ENA_RX_RSS_TABLE_SIZE) {
+			device_printf(adapter->pdev, "Index %d out of range\n",
+			    idx);
+			error = ERANGE;
+			break;
+		}
+
+		buf = endp;
+
+		if (*buf++ != ':') {
+			device_printf(adapter->pdev, "Missing ':' separator\n");
+			error = EINVAL;
+			break;
+		}
+
+		value = strtol(buf, &endp, 10);
+
+		if (endp == buf || value < 0) {
+			device_printf(adapter->pdev, "Invalid value: %s\n",
+			    buf);
+			error = EINVAL;
+			break;
+		}
+
+		if (value >= num_queues) {
+			device_printf(adapter->pdev, "Value %d out of range\n",
+			    value);
+			error = ERANGE;
+			break;
+		}
+
+		indir->table[idx] = value;
+	}
+
+	if (error != 0) /* Reload indirection table with last good data. */
+		ena_rss_indir_get(adapter, indir->table);
+
+	/* At this point msg has been clobbered by sysctl_handle_string. */
+	ena_rss_copy_indir_buf(msg, indir->table);
+
+	if (error == 0)
+		error = ena_rss_indir_set(adapter, indir->table);
+
+unlock:
+	ENA_LOCK_UNLOCK();
+
+	return (error);
+}
+#endif /* RSS */


More information about the dev-commits-src-all mailing list