git: b065af196fdc - main - aq(4): interrupt model and queue-count correctness
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Sat, 20 Jun 2026 19:10:35 UTC
The branch main has been updated by adrian:
URL: https://cgit.FreeBSD.org/src/commit/?id=b065af196fdcab18d36caae214c905467723b3f1
commit b065af196fdcab18d36caae214c905467723b3f1
Author: Nick Pricenull <nick@spun.ionull>
AuthorDate: 2026-06-20 19:02:34 +0000
Commit: Adrian Chadd <adrian@FreeBSD.org>
CommitDate: 2026-06-20 19:10:15 +0000
aq(4): interrupt model and queue-count correctness
Rework the MSI-X and queue-count handling to use the standard iflib
interrupt model and to keep every ring serviced.
- Cap isc_n{tx,rx}qsets_max at the RSS indirection-table size
(HW_ATL_RSS_INDIRECTION_QUEUES_MAX, 8) instead of HW_ATL_B0_RINGS_MAX.
RSS only steers RX traffic to eight rings, so on hosts with more CPUs
the surplus TX rings never make progress: iflib flowid-steers TCP
flows across every TX ring, and a flow landing on a surplus ring has
its segments queued but never transmitted, hanging the connection.
- Add a TX-specific ifdi_tx_queue_intr_enable that reads
tx_rings[txqid]->msix. It was wired to the RX handler, which indexes
rx_rings[] with the qid; safe only while tx_rings_count ==
rx_rings_count, otherwise the lookup walks past rx_rings[] and feeds a
garbage msix value into the IRQ mask register.
- Fix three MSI-X / admin-IRQ bugs: the TX softirq was attached to
rx_rings[i]->irq (overwriting the RX handle and leaving the TX handle
uninitialized); the admin-IRQ failure path dereferenced
rx_rings[rx_rings_count], one past the end; and aq_linkstat_isr cleared
the admin interrupt by writing the raw vector number instead of
BIT(vector).
- Allocate one IFLIB_INTR_RXTX vector per RX/TX queue pair like every
other in-tree iflib driver (em/ix/igc, vmxnet3) instead of an
IFLIB_INTR_RX vector per RX ring plus a hand-wrapped IFLIB_INTR_TX
softirq per TX ring. iflib's iflib_fast_intr_rxtx() then services TX
completions on the shared vector through isc_txd_credits_update().
Reviewed by: adrian
Differential Revision: https://reviews.freebsd.org/D57434
---
sys/dev/aq/aq_hw.c | 37 +++++++++++++++++++++----------------
sys/dev/aq/aq_hw.h | 7 +++++++
sys/dev/aq/aq_irq.c | 2 +-
sys/dev/aq/aq_main.c | 36 +++++++++++++++++++++++++++---------
4 files changed, 56 insertions(+), 26 deletions(-)
diff --git a/sys/dev/aq/aq_hw.c b/sys/dev/aq/aq_hw.c
index 5a45e61041c3..3e3e4a9d2f47 100644
--- a/sys/dev/aq/aq_hw.c
+++ b/sys/dev/aq/aq_hw.c
@@ -394,6 +394,7 @@ aq_hw_qos_set(struct aq_hw *hw)
{
uint32_t tc = 0U;
uint32_t buff_size = 0U;
+ uint32_t n_tcs;
unsigned int i_priority = 0U;
int err = 0;
@@ -409,19 +410,23 @@ aq_hw_qos_set(struct aq_hw *hw)
tps_tx_pkt_shed_desc_tc_arb_mode_set(hw, 0U);
tps_tx_pkt_shed_data_arb_mode_set(hw, 0U);
- tps_tx_pkt_shed_tc_data_max_credit_set(hw, 0xFFF, 0U);
- tps_tx_pkt_shed_tc_data_weight_set(hw, 0x64, 0U);
- tps_tx_pkt_shed_desc_tc_max_credit_set(hw, 0x50, 0U);
- tps_tx_pkt_shed_desc_tc_weight_set(hw, 0x1E, 0U);
-
- /* Tx buf size */
- buff_size = AQ_HW_TXBUF_MAX;
-
- tpb_tx_pkt_buff_size_per_tc_set(hw, buff_size, tc);
- tpb_tx_buff_hi_threshold_per_tc_set(hw,
- (buff_size * (1024 / 32U) * 66U) / 100U, tc);
- tpb_tx_buff_lo_threshold_per_tc_set(hw,
- (buff_size * (1024 / 32U) * 50U) / 100U, tc);
+ /* One TC per active 8-ring group; share the buffer across them. */
+ n_tcs = howmany(hw->tx_rings_count, HW_ATL_B0_RINGS_PER_TC);
+ n_tcs = MIN(MAX(n_tcs, 1U), HW_ATL_B0_TCS_MAX);
+ buff_size = AQ_HW_TXBUF_MAX / n_tcs;
+
+ for (tc = 0; tc < n_tcs; tc++) {
+ tps_tx_pkt_shed_tc_data_max_credit_set(hw, 0xFFF, tc);
+ tps_tx_pkt_shed_tc_data_weight_set(hw, 0x64, tc);
+ tps_tx_pkt_shed_desc_tc_max_credit_set(hw, 0x50, tc);
+ tps_tx_pkt_shed_desc_tc_weight_set(hw, 0x1E, tc);
+
+ tpb_tx_pkt_buff_size_per_tc_set(hw, buff_size, tc);
+ tpb_tx_buff_hi_threshold_per_tc_set(hw,
+ AQ_BUF_THRESHOLD(buff_size, 66U), tc);
+ tpb_tx_buff_lo_threshold_per_tc_set(hw,
+ AQ_BUF_THRESHOLD(buff_size, 50U), tc);
+ }
/* QoS Rx buf size per TC */
tc = 0;
@@ -429,13 +434,13 @@ aq_hw_qos_set(struct aq_hw *hw)
rpb_rx_pkt_buff_size_per_tc_set(hw, buff_size, tc);
rpb_rx_buff_hi_threshold_per_tc_set(hw,
- (buff_size * (1024U / 32U) * 66U) / 100U, tc);
+ AQ_BUF_THRESHOLD(buff_size, 66U), tc);
rpb_rx_buff_lo_threshold_per_tc_set(hw,
- (buff_size * (1024U / 32U) * 50U) / 100U, tc);
+ AQ_BUF_THRESHOLD(buff_size, 50U), tc);
/* QoS 802.1p priority -> TC mapping */
for (i_priority = 8U; i_priority--;)
- rpf_rpb_user_priority_tc_map_set(hw, i_priority, 0U);
+ rpf_rpb_user_priority_tc_map_set(hw, i_priority, 0U);
err = aq_hw_err_from_flags(hw);
AQ_DBG_EXIT(err);
diff --git a/sys/dev/aq/aq_hw.h b/sys/dev/aq/aq_hw.h
index bdd79871fd76..dd214aa3673b 100644
--- a/sys/dev/aq/aq_hw.h
+++ b/sys/dev/aq/aq_hw.h
@@ -169,6 +169,8 @@ struct aq_hw {
uint32_t mbox_addr;
struct aq_hw_fw_mbox mbox;
+
+ uint32_t tx_rings_count;
};
#define aq_hw_s aq_hw
@@ -185,6 +187,8 @@ struct aq_hw {
#define HW_ATL_B0_MTU_JUMBO 16352U
#define HW_ATL_B0_TSO_SIZE (160*1024)
#define HW_ATL_B0_RINGS_MAX 32U
+#define HW_ATL_B0_TCS_MAX 4U /* 4-TC mode */
+#define HW_ATL_B0_RINGS_PER_TC (HW_ATL_B0_RINGS_MAX / HW_ATL_B0_TCS_MAX)
#define HW_ATL_B0_LRO_RXD_MAX 16U
#define AQ_HW_FW_SM_RAM 0x2U
@@ -208,6 +212,9 @@ struct aq_hw {
#define AQ_HW_TXBUF_MAX 160U
#define AQ_HW_RXBUF_MAX 320U
+/* pct% of a kb-KB packet buffer, in 32-byte threshold units */
+#define AQ_BUF_THRESHOLD(kb, pct) ((kb) * (1024U / 32U) * (pct) / 100U)
+
#define L2_FILTER_ACTION_DISCARD (0x0)
#define L2_FILTER_ACTION_HOST (0x1)
diff --git a/sys/dev/aq/aq_irq.c b/sys/dev/aq/aq_irq.c
index 6338a7777dee..21f319b87fbd 100644
--- a/sys/dev/aq/aq_irq.c
+++ b/sys/dev/aq/aq_irq.c
@@ -179,7 +179,7 @@ aq_linkstat_isr(void *arg)
struct aq_hw *hw = &aq_dev->hw;
/* clear interrupt status */
- itr_irq_status_clearlsw_set(hw, aq_dev->msix);
+ itr_irq_status_clearlsw_set(hw, BIT(aq_dev->msix));
iflib_admin_intr_deferred(aq_dev->ctx);
diff --git a/sys/dev/aq/aq_main.c b/sys/dev/aq/aq_main.c
index e893bb7f73b3..ebe8b13da795 100644
--- a/sys/dev/aq/aq_main.c
+++ b/sys/dev/aq/aq_main.c
@@ -182,6 +182,7 @@ static void aq_add_stats_sysctls(struct aq_dev *softc);
static void aq_if_enable_intr(if_ctx_t ctx);
static void aq_if_disable_intr(if_ctx_t ctx);
static int aq_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid);
+static int aq_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid);
static int aq_if_msix_intr_assign(if_ctx_t ctx, int msix);
/* VLAN support */
@@ -253,7 +254,7 @@ static device_method_t aq_if_methods[] = {
DEVMETHOD(ifdi_intr_enable, aq_if_enable_intr),
DEVMETHOD(ifdi_intr_disable, aq_if_disable_intr),
DEVMETHOD(ifdi_rx_queue_intr_enable, aq_if_rx_queue_intr_enable),
- DEVMETHOD(ifdi_tx_queue_intr_enable, aq_if_rx_queue_intr_enable),
+ DEVMETHOD(ifdi_tx_queue_intr_enable, aq_if_tx_queue_intr_enable),
DEVMETHOD(ifdi_msix_intr_assign, aq_if_msix_intr_assign),
/* VLAN support */
@@ -411,7 +412,7 @@ aq_if_attach_pre(if_ctx_t ctx)
scctx->isc_rxqsizes[0] = sizeof(aq_rx_desc_t) * scctx->isc_nrxd[0];
scctx->isc_ntxqsets_max = HW_ATL_B0_RINGS_MAX;
- scctx->isc_nrxqsets_max = HW_ATL_B0_RINGS_MAX;
+ scctx->isc_nrxqsets_max = HW_ATL_RSS_INDIRECTION_QUEUES_MAX;
/* iflib will map and release this bar */
scctx->isc_msix_bar = pci_msix_table_bar(softc->dev);
@@ -667,6 +668,8 @@ aq_if_init(if_ctx_t ctx)
softc = iflib_get_softc(ctx);
hw = &softc->hw;
+ hw->tx_rings_count = softc->tx_rings_count;
+
err = aq_hw_init(&softc->hw, softc->hw.mac_addr, softc->msix,
softc->scctx->isc_intr == IFLIB_INTR_MSIX);
if (err != EOK) {
@@ -965,6 +968,20 @@ aq_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid)
return (0);
}
+static int
+aq_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid)
+{
+ struct aq_dev *softc = iflib_get_softc(ctx);
+ struct aq_hw *hw = &softc->hw;
+
+ AQ_DBG_ENTER();
+
+ itr_irq_msk_setlsw_set(hw, BIT(softc->tx_rings[txqid]->msix));
+
+ AQ_DBG_EXIT(0);
+ return (0);
+}
+
static int
aq_if_msix_intr_assign(if_ctx_t ctx, int msix)
{
@@ -979,7 +996,7 @@ aq_if_msix_intr_assign(if_ctx_t ctx, int msix)
for (i = 0; i < softc->rx_rings_count; i++, vector++) {
snprintf(irq_name, sizeof(irq_name), "rxq%d", i);
rc = iflib_irq_alloc_generic(ctx, &softc->rx_rings[i]->irq,
- vector + 1, IFLIB_INTR_RX, aq_isr_rx, softc->rx_rings[i],
+ vector + 1, IFLIB_INTR_RXTX, aq_isr_rx, softc->rx_rings[i],
softc->rx_rings[i]->index, irq_name);
device_printf(softc->dev, "Assign IRQ %u to rx ring %u\n",
vector, softc->rx_rings[i]->index);
@@ -995,12 +1012,13 @@ aq_if_msix_intr_assign(if_ctx_t ctx, int msix)
rx_vectors = vector;
- for (i = 0; i < softc->tx_rings_count; i++, vector++) {
+ for (i = 0; i < softc->tx_rings_count; i++) {
snprintf(irq_name, sizeof(irq_name), "txq%d", i);
- iflib_softirq_alloc_generic(ctx, &softc->rx_rings[i]->irq,
- IFLIB_INTR_TX, softc->tx_rings[i], i, irq_name);
-
- softc->tx_rings[i]->msix = (vector % softc->rx_rings_count);
+ softc->tx_rings[i]->msix = (i % softc->rx_rings_count);
+ iflib_softirq_alloc_generic(ctx,
+ &softc->rx_rings[softc->tx_rings[i]->msix]->irq,
+ IFLIB_INTR_TX, softc->tx_rings[i],
+ softc->tx_rings[i]->index, irq_name);
device_printf(softc->dev, "Assign IRQ %u to tx ring %u\n",
softc->tx_rings[i]->msix, softc->tx_rings[i]->index);
}
@@ -1013,7 +1031,7 @@ aq_if_msix_intr_assign(if_ctx_t ctx, int msix)
if (rc) {
device_printf(iflib_get_dev(ctx),
"Failed to register admin handler");
- i = softc->rx_rings_count;
+ i = softc->rx_rings_count - 1;
goto fail;
}
AQ_DBG_EXIT(0);