git: 619934a5c165 - main - aq(4): RX/TX and HW-path correctness and hardening
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Sat, 20 Jun 2026 19:10:37 UTC
The branch main has been updated by adrian:
URL: https://cgit.FreeBSD.org/src/commit/?id=619934a5c165869604810ece91c2f2cb734fc3eb
commit 619934a5c165869604810ece91c2f2cb734fc3eb
Author: Nick Pricenull <nick@spun.ionull>
AuthorDate: 2026-06-20 19:02:52 +0000
Commit: Adrian Chadd <adrian@FreeBSD.org>
CommitDate: 2026-06-20 19:10:15 +0000
aq(4): RX/TX and HW-path correctness and hardening
Independent correctness fixes, plus robustness against a non-responding
device, malformed descriptor writeback, and torn MMIO reads, and the move
to the FreeBSD bus_space(9) register abstraction.
Correctness:
- aq_hw_ver_match returned true if any of major/minor/build was >=
expected; compare lexicographically so e.g. 2.0.1 is correctly seen as
older than 2.1.0.
- The VLAN hardware-filter iteration used the vlan tag directly as the
bitstring index; use vlan_tag + 1 so the active-VLAN bookkeeping lines
up with the table.
- aq_initmedia only registered IFM_AUTO in full-duplex/pause variants, so
a bare "ifconfig aq0 media autoselect" matched no entry and returned
ENXIO. Add the bare IFM_ETHER|IFM_AUTO entry, matching ix/em/igc/ixv.
- Convert the per-ring diagnostic counters to counter(9): per-CPU,
tear-free, no atomics on the increment path, fixing a data race and a
32-bit torn read against the locklessly-read sysctls. Drop three
counters that were never populated (jumbo_pkts, tx_drops, tx_queue_full).
Hardening and modernization:
- Implement aq_hw_err_from_flags (previously a stub returning 0 that left
~24 call sites as no-ops): detect a non-responding device in the
register read path (all-ones, confirmed against reg 0x10) and latch a
sticky AQ_HW_FLAG_ERR_UNPLUG -> ENXIO; aq_if_init clears it so a
transient detection cannot permanently wedge the device.
- Bound the RX fragment loop in aq_isc_rxd_pkt_get (EBADMSG once i reaches
isc_rx_nsegments) so a malformed never-EOP stream cannot write past the
fragment array.
- Bound the TX completion count in aq_isc_txd_credits_update against the
raw HW head pointer (reject head >= tx_size) so a glitched head cannot
make iflib free still-in-flight TX mbufs.
- Remove the dead hardware-RSC branch in aq_isc_rxd_available; it followed
wb.next_desp, a raw device value used as an index, but RSC is never
enabled so rsc_cnt is always 0. Advance sequentially like the other
in-tree iflib drivers; this drops the last raw-hardware-pointer
dereference in the RX path.
- Flush MMIO after interrupt-status acks (AQ_HW_FLUSH) so the write lands
before the vector is re-armed under auto-mask-clear; retry the high word
in read64_ against a torn lo/hi pair; document the non-atomic IMR
read-modify-write in itr_irq_map_en_{rx,tx}_set.
- Replace the raw-pointer MMIO (the Linux readl/writel idiom) with
bus_space(9): store the BAR tag and handle in struct aq_hw and route
AQ_READ_REG/AQ_WRITE_REG through bus_space_read_4/write_4. No functional
change on amd64.
Reviewed by: adrian
Differential Revision: https://reviews.freebsd.org/D57436
---
sys/dev/aq/aq_fw2x.c | 11 ++++++--
sys/dev/aq/aq_hw.c | 30 ++++++++++++++------
sys/dev/aq/aq_hw.h | 16 +++++++++--
sys/dev/aq/aq_irq.c | 6 ++--
sys/dev/aq/aq_main.c | 77 +++++++++++++++++++++++++++++++++++++++++----------
sys/dev/aq/aq_media.c | 2 +-
sys/dev/aq/aq_ring.c | 35 +++++++++++++----------
sys/dev/aq/aq_ring.h | 19 ++++++-------
8 files changed, 139 insertions(+), 57 deletions(-)
diff --git a/sys/dev/aq/aq_fw2x.c b/sys/dev/aq/aq_fw2x.c
index aee7d80d7013..0a6d433a48fc 100644
--- a/sys/dev/aq/aq_fw2x.c
+++ b/sys/dev/aq/aq_fw2x.c
@@ -207,8 +207,15 @@ int fw2x_get_stats(struct aq_hw* hw, struct aq_hw_stats_s* stats);
static uint64_t
read64_(struct aq_hw* hw, uint32_t addr)
{
- uint64_t lo = AQ_READ_REG(hw, addr);
- uint64_t hi = AQ_READ_REG(hw, addr + 4);
+ uint64_t lo, hi, hi2;
+
+ hi = AQ_READ_REG(hw, addr + 4);
+ do {
+ hi2 = hi;
+ lo = AQ_READ_REG(hw, addr);
+ hi = AQ_READ_REG(hw, addr + 4);
+ } while (hi != hi2);
+
return (lo | (hi << 32));
}
diff --git a/sys/dev/aq/aq_hw.c b/sys/dev/aq/aq_hw.c
index f3f47c7ecb1e..882fdf1807f2 100644
--- a/sys/dev/aq/aq_hw.c
+++ b/sys/dev/aq/aq_hw.c
@@ -51,9 +51,24 @@
int
aq_hw_err_from_flags(struct aq_hw *hw)
{
+ if (atomic_load_acq_long(&hw->flags) & AQ_HW_FLAG_ERR_UNPLUG)
+ return (ENXIO);
return (0);
}
+inline uint32_t
+aq_hw_read_reg(struct aq_hw *hw, uint32_t reg)
+{
+ uint32_t val = le32toh(bus_space_read_4(hw->hw_tag, hw->hw_handle, reg));
+
+ if (__predict_false(val == 0xFFFFFFFFU) &&
+ le32toh(bus_space_read_4(hw->hw_tag, hw->hw_handle, 0x10)) ==
+ 0xFFFFFFFFU)
+ atomic_set_rel_long(&hw->flags, AQ_HW_FLAG_ERR_UNPLUG);
+
+ return (val);
+}
+
static void
aq_hw_chip_features_init(struct aq_hw *hw, uint32_t *p)
{
@@ -123,14 +138,11 @@ aq_hw_ver_match(const aq_hw_fw_version* ver_expected,
{
AQ_DBG_ENTER();
- if (ver_actual->major_version >= ver_expected->major_version)
- return (true);
- if (ver_actual->minor_version >= ver_expected->minor_version)
- return (true);
- if (ver_actual->build_number >= ver_expected->build_number)
- return (true);
-
- return (false);
+ if (ver_actual->major_version != ver_expected->major_version)
+ return (ver_actual->major_version > ver_expected->major_version);
+ if (ver_actual->minor_version != ver_expected->minor_version)
+ return (ver_actual->minor_version > ver_expected->minor_version);
+ return (ver_actual->build_number >= ver_expected->build_number);
}
static int
@@ -660,7 +672,7 @@ aq_hw_init(struct aq_hw *hw, uint8_t *mac_addr, uint8_t adm_irq, bool msix)
reg_gen_irq_map_set(hw, 0x80 | adm_irq, 3);
- aq_hw_offload_set(hw);
+ err = aq_hw_offload_set(hw);
err_exit:
AQ_DBG_EXIT(err);
diff --git a/sys/dev/aq/aq_hw.h b/sys/dev/aq/aq_hw.h
index dd214aa3673b..f954f6055a4c 100644
--- a/sys/dev/aq/aq_hw.h
+++ b/sys/dev/aq/aq_hw.h
@@ -37,14 +37,21 @@
#include <sys/types.h>
#include <sys/cdefs.h>
+#include <machine/atomic.h>
#include <machine/cpufunc.h>
+#include <machine/bus.h>
#include <sys/endian.h>
#include <net/ethernet.h>
#include "aq_common.h"
-#define AQ_WRITE_REG(hw, reg, value) writel(((hw)->hw_addr + (reg)), htole32(value))
+#define AQ_WRITE_REG(hw, reg, value) \
+ bus_space_write_4((hw)->hw_tag, (hw)->hw_handle, (reg), htole32(value))
-#define AQ_READ_REG(hw, reg) le32toh(readl((hw)->hw_addr + reg))
+#define AQ_HW_FLAG_ERR_UNPLUG 0x1UL
+
+struct aq_hw;
+extern uint32_t aq_hw_read_reg(struct aq_hw *hw, uint32_t reg);
+#define AQ_READ_REG(hw, reg) aq_hw_read_reg((hw), (reg))
#define AQ_WRITE_REG_BIT(hw, reg, msk, shift, value) do { \
@@ -137,7 +144,8 @@ struct aq_hw_fc_info {
struct aq_hw {
void *aq_dev;
- uint8_t *hw_addr;
+ bus_space_tag_t hw_tag;
+ bus_space_handle_t hw_handle;
uint32_t regs_size;
uint8_t mac_addr[ETHER_ADDR_LEN];
@@ -170,6 +178,8 @@ struct aq_hw {
uint32_t mbox_addr;
struct aq_hw_fw_mbox mbox;
+ u_long flags;
+
uint32_t tx_rings_count;
};
diff --git a/sys/dev/aq/aq_irq.c b/sys/dev/aq/aq_irq.c
index 21f319b87fbd..9f8138868eac 100644
--- a/sys/dev/aq/aq_irq.c
+++ b/sys/dev/aq/aq_irq.c
@@ -163,9 +163,9 @@ aq_isr_rx(void *arg)
struct aq_dev *aq_dev = ring->dev;
struct aq_hw *hw = &aq_dev->hw;
- /* clear interrupt status */
itr_irq_status_clearlsw_set(hw, BIT(ring->msix));
- ring->stats.irq++;
+ AQ_HW_FLUSH();
+ counter_u64_add(ring->stats.irq, 1);
return (FILTER_SCHEDULE_THREAD);
}
@@ -178,8 +178,8 @@ aq_linkstat_isr(void *arg)
aq_dev_t *aq_dev = arg;
struct aq_hw *hw = &aq_dev->hw;
- /* clear interrupt status */
itr_irq_status_clearlsw_set(hw, BIT(aq_dev->msix));
+ AQ_HW_FLUSH();
iflib_admin_intr_deferred(aq_dev->ctx);
diff --git a/sys/dev/aq/aq_main.c b/sys/dev/aq/aq_main.c
index fd278826bd2d..c7701cc4f6d8 100644
--- a/sys/dev/aq/aq_main.c
+++ b/sys/dev/aq/aq_main.c
@@ -353,7 +353,8 @@ aq_if_attach_pre(if_ctx_t ctx)
softc->mmio_tag = rman_get_bustag(softc->mmio_res);
softc->mmio_handle = rman_get_bushandle(softc->mmio_res);
softc->mmio_size = rman_get_size(softc->mmio_res);
- softc->hw.hw_addr = (uint8_t*) softc->mmio_handle;
+ softc->hw.hw_tag = softc->mmio_tag;
+ softc->hw.hw_handle = softc->mmio_handle;
hw = &softc->hw;
hw->link_rate = aq_fw_speed_auto;
hw->itr = -1;
@@ -542,6 +543,42 @@ aq_if_resume(if_ctx_t ctx)
return (0);
}
+_Static_assert(sizeof(struct aq_ring_stats) % sizeof(counter_u64_t) == 0,
+ "aq_ring_stats must contain only counter_u64_t fields");
+
+static int
+aq_ring_stats_alloc(struct aq_ring *ring)
+{
+ counter_u64_t *c = (counter_u64_t *)&ring->stats;
+ int i, n = sizeof(ring->stats) / sizeof(counter_u64_t);
+
+ for (i = 0; i < n; i++) {
+ c[i] = counter_u64_alloc(M_NOWAIT);
+ if (c[i] == NULL) {
+ while (i-- > 0) {
+ counter_u64_free(c[i]);
+ c[i] = NULL;
+ }
+ return (ENOMEM);
+ }
+ }
+ return (0);
+}
+
+static void
+aq_ring_stats_free(struct aq_ring *ring)
+{
+ counter_u64_t *c = (counter_u64_t *)&ring->stats;
+ int i, n = sizeof(ring->stats) / sizeof(counter_u64_t);
+
+ for (i = 0; i < n; i++) {
+ if (c[i] != NULL) {
+ counter_u64_free(c[i]);
+ c[i] = NULL;
+ }
+ }
+}
+
/* Soft queue setup and teardown */
static int
aq_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
@@ -571,6 +608,13 @@ aq_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
ring->dev = softc;
softc->tx_rings_count++;
+
+ rc = aq_ring_stats_alloc(ring);
+ if (rc != 0) {
+ device_printf(softc->dev,
+ "atlantic: tx_ring stats alloc fail\n");
+ goto fail;
+ }
}
AQ_DBG_EXIT(rc);
@@ -621,6 +665,13 @@ aq_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
}
softc->rx_rings_count++;
+
+ rc = aq_ring_stats_alloc(ring);
+ if (rc != 0) {
+ device_printf(softc->dev,
+ "atlantic: rx_ring stats alloc fail\n");
+ goto fail;
+ }
}
AQ_DBG_EXIT(rc);
@@ -643,6 +694,7 @@ aq_if_queues_free(if_ctx_t ctx)
for (i = 0; i < softc->tx_rings_count; i++) {
if (softc->tx_rings[i]) {
+ aq_ring_stats_free(softc->tx_rings[i]);
free(softc->tx_rings[i], M_AQ);
softc->tx_rings[i] = NULL;
}
@@ -650,6 +702,7 @@ aq_if_queues_free(if_ctx_t ctx)
softc->tx_rings_count = 0;
for (i = 0; i < softc->rx_rings_count; i++) {
if (softc->rx_rings[i]){
+ aq_ring_stats_free(softc->rx_rings[i]);
free(softc->rx_rings[i], M_AQ);
softc->rx_rings[i] = NULL;
}
@@ -673,6 +726,8 @@ aq_if_init(if_ctx_t ctx)
softc = iflib_get_softc(ctx);
hw = &softc->hw;
+ atomic_store_rel_long(&hw->flags, 0);
+
hw->tx_rings_count = softc->tx_rings_count;
err = aq_hw_init(&softc->hw, softc->hw.mac_addr, softc->msix,
@@ -1080,7 +1135,7 @@ aq_update_vlan_filters(struct aq_dev *softc)
aq_vlans[i].location = i;
aq_vlans[i].queue = 0xFF;
aq_vlans[i].vlan_id = vlan_tag;
- bit_pos = vlan_tag;
+ bit_pos = vlan_tag + 1;
} else {
aq_vlans[i].enable = false;
}
@@ -1319,14 +1374,10 @@ aq_add_stats_sysctls(struct aq_dev *softc)
CTLFLAG_RD, NULL, "Queue Name");
queue_list = SYSCTL_CHILDREN(queue_node);
- SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_pkts",
+ SYSCTL_ADD_COUNTER_U64(ctx, queue_list, OID_AUTO, "tx_pkts",
CTLFLAG_RD, &(ring->stats.tx_pkts), "TX Packets");
- SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_bytes",
+ SYSCTL_ADD_COUNTER_U64(ctx, queue_list, OID_AUTO, "tx_bytes",
CTLFLAG_RD, &(ring->stats.tx_bytes), "TX Octets");
- SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_drops",
- CTLFLAG_RD, &(ring->stats.tx_drops), "TX Drops");
- SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_queue_full",
- CTLFLAG_RD, &(ring->stats.tx_queue_full), "TX Queue Full");
SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "tx_head",
CTLTYPE_UINT | CTLFLAG_RD, ring, 0,
aq_sysctl_print_tx_head, "IU", "ring head pointer");
@@ -1342,15 +1393,13 @@ aq_add_stats_sysctls(struct aq_dev *softc)
CTLFLAG_RD, NULL, "Queue Name");
queue_list = SYSCTL_CHILDREN(queue_node);
- SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_pkts",
+ SYSCTL_ADD_COUNTER_U64(ctx, queue_list, OID_AUTO, "rx_pkts",
CTLFLAG_RD, &(ring->stats.rx_pkts), "RX Packets");
- SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
+ SYSCTL_ADD_COUNTER_U64(ctx, queue_list, OID_AUTO, "rx_bytes",
CTLFLAG_RD, &(ring->stats.rx_bytes), "TX Octets");
- SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "jumbo_pkts",
- CTLFLAG_RD, &(ring->stats.jumbo_pkts), "Jumbo Packets");
- SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_err",
+ SYSCTL_ADD_COUNTER_U64(ctx, queue_list, OID_AUTO, "rx_err",
CTLFLAG_RD, &(ring->stats.rx_err), "RX Errors");
- SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "irq",
+ SYSCTL_ADD_COUNTER_U64(ctx, queue_list, OID_AUTO, "irq",
CTLFLAG_RD, &(ring->stats.irq), "RX interrupts");
SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rx_head",
CTLTYPE_UINT | CTLFLAG_RD, ring, 0,
diff --git a/sys/dev/aq/aq_media.c b/sys/dev/aq/aq_media.c
index f961f3bb5f0f..655b060e7bd2 100644
--- a/sys/dev/aq/aq_media.c
+++ b/sys/dev/aq/aq_media.c
@@ -203,7 +203,7 @@ aq_initmedia(aq_dev_t *aq_dev)
// ifconfig eth0 none
ifmedia_add(aq_dev->media, IFM_ETHER | IFM_NONE, 0, NULL);
- // ifconfig eth0 auto
+ ifmedia_add(aq_dev->media, IFM_ETHER | IFM_AUTO, 0, NULL);
aq_add_media_types(aq_dev, IFM_AUTO);
if (AQ_HW_SUPPORT_SPEED(aq_dev, AQ_LINK_100M))
diff --git a/sys/dev/aq/aq_ring.c b/sys/dev/aq/aq_ring.c
index 51014ae0a9d7..900061437ddb 100644
--- a/sys/dev/aq/aq_ring.c
+++ b/sys/dev/aq/aq_ring.c
@@ -292,16 +292,10 @@ aq_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget)
cnt++;
} else {
- /* LRO/Jumbo: wait for whole packet be in the ring */
- if (rx_desc[i].wb.rsc_cnt) {
- i = rx_desc[i].wb.next_desp;
- iter++;
- continue;
- } else {
- iter++;
- i = aq_next(i, ring->rx_size - 1);
- continue;
- }
+ /* Jumbo frame spans descriptors; advance. */
+ iter++;
+ i = aq_next(i, ring->rx_size - 1);
+ continue;
}
}
@@ -356,13 +350,19 @@ aq_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri)
i = 0;
do {
+ if (i >= aq_dev->sctx->isc_rx_nsegments) {
+ counter_u64_add(ring->stats.rx_err, 1);
+ rc = (EBADMSG);
+ goto exit;
+ }
+
rx_desc = (aq_rx_desc_t *) &ring->rx_descs[cidx];
trace_aq_rx_descr(ring->index, cidx,
(volatile uint64_t *)rx_desc);
if ((rx_desc->wb.rx_stat & BIT(0)) != 0) {
- ring->stats.rx_err++;
+ counter_u64_add(ring->stats.rx_err, 1);
rc = (EBADMSG);
goto exit;
}
@@ -397,8 +397,8 @@ aq_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri)
ri->iri_len = total_len;
ri->iri_nfrags = i;
- ring->stats.rx_bytes += total_len;
- ring->stats.rx_pkts++;
+ counter_u64_add(ring->stats.rx_bytes, total_len);
+ counter_u64_add(ring->stats.rx_pkts, 1);
exit:
AQ_DBG_EXIT(rc);
@@ -551,8 +551,8 @@ aq_isc_txd_encap(void *arg, if_pkt_info_t pi)
trace_aq_tx_descr(ring->index, pidx, (volatile void*)txd);
ring->tx_tail = pidx;
- ring->stats.tx_pkts++;
- ring->stats.tx_bytes += pay_len;
+ counter_u64_add(ring->stats.tx_pkts, 1);
+ counter_u64_add(ring->stats.tx_bytes, pay_len);
pi->ipi_new_pidx = pidx;
@@ -592,6 +592,11 @@ aq_isc_txd_credits_update(void *arg, uint16_t txqid, bool clear)
head = tdm_tx_desc_head_ptr_get(&aq_dev->hw, ring->index);
AQ_DBG_PRINT("swhead %d hwhead %d", ring->tx_head, head);
+ if (head >= ring->tx_size) {
+ /* Malformed HW head; report no completions. */
+ goto done;
+ }
+
if (ring->tx_head == head) {
avail = 0; // ring->tx_size;
goto done;
diff --git a/sys/dev/aq/aq_ring.h b/sys/dev/aq/aq_ring.h
index 2d3740802213..2f33fbf75d55 100644
--- a/sys/dev/aq/aq_ring.h
+++ b/sys/dev/aq/aq_ring.h
@@ -35,6 +35,8 @@
#ifndef _AQ_RING_H_
#define _AQ_RING_H_
+#include <sys/counter.h>
+
#include "aq_hw.h"
#define REFILL_THRESHOLD 128
@@ -130,16 +132,13 @@ typedef volatile union {
} __attribute__((__packed__)) aq_txc_desc_t;
struct aq_ring_stats {
- uint64_t rx_pkts;
- uint64_t rx_bytes;
- uint64_t jumbo_pkts;
- uint64_t rx_err;
- uint64_t irq;
-
- uint64_t tx_pkts;
- uint64_t tx_bytes;
- uint64_t tx_drops;
- uint64_t tx_queue_full;
+ counter_u64_t rx_pkts;
+ counter_u64_t rx_bytes;
+ counter_u64_t rx_err;
+ counter_u64_t irq;
+
+ counter_u64_t tx_pkts;
+ counter_u64_t tx_bytes;
};
struct aq_dev;