git: 619934a5c165 - main - aq(4): RX/TX and HW-path correctness and hardening

From: Adrian Chadd <adrian_at_FreeBSD.org>
Date: Sat, 20 Jun 2026 19:10:37 UTC
The branch main has been updated by adrian:

URL: https://cgit.FreeBSD.org/src/commit/?id=619934a5c165869604810ece91c2f2cb734fc3eb

commit 619934a5c165869604810ece91c2f2cb734fc3eb
Author:     Nick Pricenull <nick@spun.ionull>
AuthorDate: 2026-06-20 19:02:52 +0000
Commit:     Adrian Chadd <adrian@FreeBSD.org>
CommitDate: 2026-06-20 19:10:15 +0000

    aq(4): RX/TX and HW-path correctness and hardening
    
    Independent correctness fixes, plus robustness against a non-responding
    device, malformed descriptor writeback, and torn MMIO reads, and the move
    to the FreeBSD bus_space(9) register abstraction.
    
    Correctness:
    - aq_hw_ver_match returned true if any of major/minor/build was >=
      expected; compare lexicographically so e.g. 2.0.1 is correctly seen as
      older than 2.1.0.
    - The VLAN hardware-filter iteration used the vlan tag directly as the
      bitstring index; use vlan_tag + 1 so the active-VLAN bookkeeping lines
      up with the table.
    - aq_initmedia only registered IFM_AUTO in full-duplex/pause variants, so
      a bare "ifconfig aq0 media autoselect" matched no entry and returned
      ENXIO.  Add the bare IFM_ETHER|IFM_AUTO entry, matching ix/em/igc/ixv.
    - Convert the per-ring diagnostic counters to counter(9): per-CPU,
      tear-free, no atomics on the increment path, fixing a data race and a
      32-bit torn read against the locklessly-read sysctls.  Drop three
      counters that were never populated (jumbo_pkts, tx_drops, tx_queue_full).
    
    Hardening and modernization:
    - Implement aq_hw_err_from_flags (previously a stub returning 0 that left
      ~24 call sites as no-ops): detect a non-responding device in the
      register read path (all-ones, confirmed against reg 0x10) and latch a
      sticky AQ_HW_FLAG_ERR_UNPLUG -> ENXIO; aq_if_init clears it so a
      transient detection cannot permanently wedge the device.
    - Bound the RX fragment loop in aq_isc_rxd_pkt_get (EBADMSG once i reaches
      isc_rx_nsegments) so a malformed never-EOP stream cannot write past the
      fragment array.
    - Bound the TX completion count in aq_isc_txd_credits_update against the
      raw HW head pointer (reject head >= tx_size) so a glitched head cannot
      make iflib free still-in-flight TX mbufs.
    - Remove the dead hardware-RSC branch in aq_isc_rxd_available; it followed
      wb.next_desp, a raw device value used as an index, but RSC is never
      enabled so rsc_cnt is always 0.  Advance sequentially like the other
      in-tree iflib drivers; this drops the last raw-hardware-pointer
      dereference in the RX path.
    - Flush MMIO after interrupt-status acks (AQ_HW_FLUSH) so the write lands
      before the vector is re-armed under auto-mask-clear; retry the high word
      in read64_ against a torn lo/hi pair; document the non-atomic IMR
      read-modify-write in itr_irq_map_en_{rx,tx}_set.
    - Replace the raw-pointer MMIO (the Linux readl/writel idiom) with
      bus_space(9): store the BAR tag and handle in struct aq_hw and route
      AQ_READ_REG/AQ_WRITE_REG through bus_space_read_4/write_4.  No functional
      change on amd64.
    
    Reviewed by:    adrian
    Differential Revision:  https://reviews.freebsd.org/D57436
---
 sys/dev/aq/aq_fw2x.c  | 11 ++++++--
 sys/dev/aq/aq_hw.c    | 30 ++++++++++++++------
 sys/dev/aq/aq_hw.h    | 16 +++++++++--
 sys/dev/aq/aq_irq.c   |  6 ++--
 sys/dev/aq/aq_main.c  | 77 +++++++++++++++++++++++++++++++++++++++++----------
 sys/dev/aq/aq_media.c |  2 +-
 sys/dev/aq/aq_ring.c  | 35 +++++++++++++----------
 sys/dev/aq/aq_ring.h  | 19 ++++++-------
 8 files changed, 139 insertions(+), 57 deletions(-)

diff --git a/sys/dev/aq/aq_fw2x.c b/sys/dev/aq/aq_fw2x.c
index aee7d80d7013..0a6d433a48fc 100644
--- a/sys/dev/aq/aq_fw2x.c
+++ b/sys/dev/aq/aq_fw2x.c
@@ -207,8 +207,15 @@ int fw2x_get_stats(struct aq_hw* hw, struct aq_hw_stats_s* stats);
 static uint64_t
 read64_(struct aq_hw* hw, uint32_t addr)
 {
-	uint64_t lo = AQ_READ_REG(hw, addr);
-	uint64_t hi = AQ_READ_REG(hw, addr + 4);
+	uint64_t lo, hi, hi2;
+
+	hi = AQ_READ_REG(hw, addr + 4);
+	do {
+		hi2 = hi;
+		lo = AQ_READ_REG(hw, addr);
+		hi = AQ_READ_REG(hw, addr + 4);
+	} while (hi != hi2);
+
 	return (lo | (hi << 32));
 }
 
diff --git a/sys/dev/aq/aq_hw.c b/sys/dev/aq/aq_hw.c
index f3f47c7ecb1e..882fdf1807f2 100644
--- a/sys/dev/aq/aq_hw.c
+++ b/sys/dev/aq/aq_hw.c
@@ -51,9 +51,24 @@
 int
 aq_hw_err_from_flags(struct aq_hw *hw)
 {
+	if (atomic_load_acq_long(&hw->flags) & AQ_HW_FLAG_ERR_UNPLUG)
+		return (ENXIO);
 	return (0);
 }
 
+inline uint32_t
+aq_hw_read_reg(struct aq_hw *hw, uint32_t reg)
+{
+	uint32_t val = le32toh(bus_space_read_4(hw->hw_tag, hw->hw_handle, reg));
+
+	if (__predict_false(val == 0xFFFFFFFFU) &&
+	    le32toh(bus_space_read_4(hw->hw_tag, hw->hw_handle, 0x10)) ==
+	    0xFFFFFFFFU)
+		atomic_set_rel_long(&hw->flags, AQ_HW_FLAG_ERR_UNPLUG);
+
+	return (val);
+}
+
 static void
 aq_hw_chip_features_init(struct aq_hw *hw, uint32_t *p)
 {
@@ -123,14 +138,11 @@ aq_hw_ver_match(const aq_hw_fw_version* ver_expected,
 {
 	AQ_DBG_ENTER();
 
-	if (ver_actual->major_version >= ver_expected->major_version)
-		return (true);
-	if (ver_actual->minor_version >= ver_expected->minor_version)
-		return (true);
-	if (ver_actual->build_number >= ver_expected->build_number)
-		return (true);
-
-	return (false);
+	if (ver_actual->major_version != ver_expected->major_version)
+		return (ver_actual->major_version > ver_expected->major_version);
+	if (ver_actual->minor_version != ver_expected->minor_version)
+		return (ver_actual->minor_version > ver_expected->minor_version);
+	return (ver_actual->build_number >= ver_expected->build_number);
 }
 
 static int
@@ -660,7 +672,7 @@ aq_hw_init(struct aq_hw *hw, uint8_t *mac_addr, uint8_t adm_irq, bool msix)
 
 	reg_gen_irq_map_set(hw, 0x80 | adm_irq, 3);
 
-	aq_hw_offload_set(hw);
+	err = aq_hw_offload_set(hw);
 
 err_exit:
 	AQ_DBG_EXIT(err);
diff --git a/sys/dev/aq/aq_hw.h b/sys/dev/aq/aq_hw.h
index dd214aa3673b..f954f6055a4c 100644
--- a/sys/dev/aq/aq_hw.h
+++ b/sys/dev/aq/aq_hw.h
@@ -37,14 +37,21 @@
 
 #include <sys/types.h>
 #include <sys/cdefs.h>
+#include <machine/atomic.h>
 #include <machine/cpufunc.h>
+#include <machine/bus.h>
 #include <sys/endian.h>
 #include <net/ethernet.h>
 #include "aq_common.h"
 
-#define AQ_WRITE_REG(hw, reg, value) writel(((hw)->hw_addr + (reg)), htole32(value))
+#define AQ_WRITE_REG(hw, reg, value) \
+	bus_space_write_4((hw)->hw_tag, (hw)->hw_handle, (reg), htole32(value))
 
-#define AQ_READ_REG(hw, reg) le32toh(readl((hw)->hw_addr + reg))
+#define AQ_HW_FLAG_ERR_UNPLUG	0x1UL
+
+struct aq_hw;
+extern uint32_t aq_hw_read_reg(struct aq_hw *hw, uint32_t reg);
+#define AQ_READ_REG(hw, reg) aq_hw_read_reg((hw), (reg))
 
 
 #define AQ_WRITE_REG_BIT(hw, reg, msk, shift, value) do { \
@@ -137,7 +144,8 @@ struct aq_hw_fc_info {
 
 struct aq_hw {
 	void *aq_dev;
-	uint8_t *hw_addr;
+	bus_space_tag_t hw_tag;
+	bus_space_handle_t hw_handle;
 	uint32_t regs_size;
 
 	uint8_t mac_addr[ETHER_ADDR_LEN];
@@ -170,6 +178,8 @@ struct aq_hw {
 	uint32_t mbox_addr;
 	struct aq_hw_fw_mbox mbox;
 
+	u_long flags;
+
 	uint32_t tx_rings_count;
 };
 
diff --git a/sys/dev/aq/aq_irq.c b/sys/dev/aq/aq_irq.c
index 21f319b87fbd..9f8138868eac 100644
--- a/sys/dev/aq/aq_irq.c
+++ b/sys/dev/aq/aq_irq.c
@@ -163,9 +163,9 @@ aq_isr_rx(void *arg)
 	struct aq_dev   *aq_dev = ring->dev;
 	struct aq_hw    *hw = &aq_dev->hw;
 
-	/* clear interrupt status */
 	itr_irq_status_clearlsw_set(hw, BIT(ring->msix));
-	ring->stats.irq++;
+	AQ_HW_FLUSH();
+	counter_u64_add(ring->stats.irq, 1);
 	return (FILTER_SCHEDULE_THREAD);
 }
 
@@ -178,8 +178,8 @@ aq_linkstat_isr(void *arg)
 	aq_dev_t              *aq_dev = arg;
 	struct aq_hw          *hw = &aq_dev->hw;
 
-	/* clear interrupt status */
 	itr_irq_status_clearlsw_set(hw, BIT(aq_dev->msix));
+	AQ_HW_FLUSH();
 
 	iflib_admin_intr_deferred(aq_dev->ctx);
 
diff --git a/sys/dev/aq/aq_main.c b/sys/dev/aq/aq_main.c
index fd278826bd2d..c7701cc4f6d8 100644
--- a/sys/dev/aq/aq_main.c
+++ b/sys/dev/aq/aq_main.c
@@ -353,7 +353,8 @@ aq_if_attach_pre(if_ctx_t ctx)
 	softc->mmio_tag = rman_get_bustag(softc->mmio_res);
 	softc->mmio_handle = rman_get_bushandle(softc->mmio_res);
 	softc->mmio_size = rman_get_size(softc->mmio_res);
-	softc->hw.hw_addr = (uint8_t*) softc->mmio_handle;
+	softc->hw.hw_tag = softc->mmio_tag;
+	softc->hw.hw_handle = softc->mmio_handle;
 	hw = &softc->hw;
 	hw->link_rate = aq_fw_speed_auto;
 	hw->itr = -1;
@@ -542,6 +543,42 @@ aq_if_resume(if_ctx_t ctx)
 	return (0);
 }
 
+_Static_assert(sizeof(struct aq_ring_stats) % sizeof(counter_u64_t) == 0,
+    "aq_ring_stats must contain only counter_u64_t fields");
+
+static int
+aq_ring_stats_alloc(struct aq_ring *ring)
+{
+	counter_u64_t *c = (counter_u64_t *)&ring->stats;
+	int i, n = sizeof(ring->stats) / sizeof(counter_u64_t);
+
+	for (i = 0; i < n; i++) {
+		c[i] = counter_u64_alloc(M_NOWAIT);
+		if (c[i] == NULL) {
+			while (i-- > 0) {
+				counter_u64_free(c[i]);
+				c[i] = NULL;
+			}
+			return (ENOMEM);
+		}
+	}
+	return (0);
+}
+
+static void
+aq_ring_stats_free(struct aq_ring *ring)
+{
+	counter_u64_t *c = (counter_u64_t *)&ring->stats;
+	int i, n = sizeof(ring->stats) / sizeof(counter_u64_t);
+
+	for (i = 0; i < n; i++) {
+		if (c[i] != NULL) {
+			counter_u64_free(c[i]);
+			c[i] = NULL;
+		}
+	}
+}
+
 /* Soft queue setup and teardown */
 static int
 aq_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
@@ -571,6 +608,13 @@ aq_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
 		ring->dev = softc;
 
 		softc->tx_rings_count++;
+
+		rc = aq_ring_stats_alloc(ring);
+		if (rc != 0) {
+			device_printf(softc->dev,
+			    "atlantic: tx_ring stats alloc fail\n");
+			goto fail;
+		}
 	}
 
 	AQ_DBG_EXIT(rc);
@@ -621,6 +665,13 @@ aq_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
 		}
 
 		softc->rx_rings_count++;
+
+		rc = aq_ring_stats_alloc(ring);
+		if (rc != 0) {
+			device_printf(softc->dev,
+			    "atlantic: rx_ring stats alloc fail\n");
+			goto fail;
+		}
 	}
 
 	AQ_DBG_EXIT(rc);
@@ -643,6 +694,7 @@ aq_if_queues_free(if_ctx_t ctx)
 
 	for (i = 0; i < softc->tx_rings_count; i++) {
 		if (softc->tx_rings[i]) {
+			aq_ring_stats_free(softc->tx_rings[i]);
 			free(softc->tx_rings[i], M_AQ);
 			softc->tx_rings[i] = NULL;
 		}
@@ -650,6 +702,7 @@ aq_if_queues_free(if_ctx_t ctx)
 	softc->tx_rings_count = 0;
 	for (i = 0; i < softc->rx_rings_count; i++) {
 		if (softc->rx_rings[i]){
+			aq_ring_stats_free(softc->rx_rings[i]);
 			free(softc->rx_rings[i], M_AQ);
 			softc->rx_rings[i] = NULL;
 		}
@@ -673,6 +726,8 @@ aq_if_init(if_ctx_t ctx)
 	softc = iflib_get_softc(ctx);
 	hw = &softc->hw;
 
+	atomic_store_rel_long(&hw->flags, 0);
+
 	hw->tx_rings_count = softc->tx_rings_count;
 
 	err = aq_hw_init(&softc->hw, softc->hw.mac_addr, softc->msix,
@@ -1080,7 +1135,7 @@ aq_update_vlan_filters(struct aq_dev *softc)
 			aq_vlans[i].location = i;
 			aq_vlans[i].queue = 0xFF;
 			aq_vlans[i].vlan_id = vlan_tag;
-			bit_pos = vlan_tag;
+			bit_pos = vlan_tag + 1;
 		} else {
 			aq_vlans[i].enable = false;
 		}
@@ -1319,14 +1374,10 @@ aq_add_stats_sysctls(struct aq_dev *softc)
 		    CTLFLAG_RD, NULL, "Queue Name");
 		queue_list = SYSCTL_CHILDREN(queue_node);
 
-		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_pkts",
+		SYSCTL_ADD_COUNTER_U64(ctx, queue_list, OID_AUTO, "tx_pkts",
 		    CTLFLAG_RD, &(ring->stats.tx_pkts), "TX Packets");
-		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_bytes",
+		SYSCTL_ADD_COUNTER_U64(ctx, queue_list, OID_AUTO, "tx_bytes",
 		    CTLFLAG_RD, &(ring->stats.tx_bytes), "TX Octets");
-		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_drops",
-		    CTLFLAG_RD, &(ring->stats.tx_drops), "TX Drops");
-		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_queue_full",
-		    CTLFLAG_RD, &(ring->stats.tx_queue_full), "TX Queue Full");
 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "tx_head",
 		    CTLTYPE_UINT | CTLFLAG_RD, ring, 0,
 		    aq_sysctl_print_tx_head, "IU", "ring head pointer");
@@ -1342,15 +1393,13 @@ aq_add_stats_sysctls(struct aq_dev *softc)
 		    CTLFLAG_RD, NULL, "Queue Name");
 		queue_list = SYSCTL_CHILDREN(queue_node);
 
-		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_pkts",
+		SYSCTL_ADD_COUNTER_U64(ctx, queue_list, OID_AUTO, "rx_pkts",
 		    CTLFLAG_RD, &(ring->stats.rx_pkts), "RX Packets");
-		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
+		SYSCTL_ADD_COUNTER_U64(ctx, queue_list, OID_AUTO, "rx_bytes",
 		    CTLFLAG_RD, &(ring->stats.rx_bytes), "TX Octets");
-		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "jumbo_pkts",
-		    CTLFLAG_RD, &(ring->stats.jumbo_pkts), "Jumbo Packets");
-		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_err",
+		SYSCTL_ADD_COUNTER_U64(ctx, queue_list, OID_AUTO, "rx_err",
 		    CTLFLAG_RD, &(ring->stats.rx_err), "RX Errors");
-		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "irq",
+		SYSCTL_ADD_COUNTER_U64(ctx, queue_list, OID_AUTO, "irq",
 		    CTLFLAG_RD, &(ring->stats.irq), "RX interrupts");
 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rx_head",
 		    CTLTYPE_UINT | CTLFLAG_RD, ring, 0,
diff --git a/sys/dev/aq/aq_media.c b/sys/dev/aq/aq_media.c
index f961f3bb5f0f..655b060e7bd2 100644
--- a/sys/dev/aq/aq_media.c
+++ b/sys/dev/aq/aq_media.c
@@ -203,7 +203,7 @@ aq_initmedia(aq_dev_t *aq_dev)
 	// ifconfig eth0 none
 	ifmedia_add(aq_dev->media, IFM_ETHER | IFM_NONE, 0, NULL);
 
-	// ifconfig eth0 auto
+	ifmedia_add(aq_dev->media, IFM_ETHER | IFM_AUTO, 0, NULL);
 	aq_add_media_types(aq_dev, IFM_AUTO);
 
 	if (AQ_HW_SUPPORT_SPEED(aq_dev, AQ_LINK_100M))
diff --git a/sys/dev/aq/aq_ring.c b/sys/dev/aq/aq_ring.c
index 51014ae0a9d7..900061437ddb 100644
--- a/sys/dev/aq/aq_ring.c
+++ b/sys/dev/aq/aq_ring.c
@@ -292,16 +292,10 @@ aq_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget)
 
 			cnt++;
 		} else {
-			/* LRO/Jumbo: wait for whole packet be in the ring */
-			if (rx_desc[i].wb.rsc_cnt) {
-				i = rx_desc[i].wb.next_desp;
-				iter++;
-				continue;
-			} else {
-				iter++;
-				i = aq_next(i, ring->rx_size - 1);
-				continue;
-			}
+			/* Jumbo frame spans descriptors; advance. */
+			iter++;
+			i = aq_next(i, ring->rx_size - 1);
+			continue;
 		}
 	}
 
@@ -356,13 +350,19 @@ aq_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri)
 	i = 0;
 
 	do {
+		if (i >= aq_dev->sctx->isc_rx_nsegments) {
+			counter_u64_add(ring->stats.rx_err, 1);
+			rc = (EBADMSG);
+			goto exit;
+		}
+
 		rx_desc = (aq_rx_desc_t *) &ring->rx_descs[cidx];
 
 		trace_aq_rx_descr(ring->index, cidx,
 		    (volatile uint64_t *)rx_desc);
 
 		if ((rx_desc->wb.rx_stat & BIT(0)) != 0) {
-			ring->stats.rx_err++;
+			counter_u64_add(ring->stats.rx_err, 1);
 			rc = (EBADMSG);
 			goto exit;
 		}
@@ -397,8 +397,8 @@ aq_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri)
 	ri->iri_len = total_len;
 	ri->iri_nfrags = i;
 
-	ring->stats.rx_bytes += total_len;
-	ring->stats.rx_pkts++;
+	counter_u64_add(ring->stats.rx_bytes, total_len);
+	counter_u64_add(ring->stats.rx_pkts, 1);
 
 exit:
 	AQ_DBG_EXIT(rc);
@@ -551,8 +551,8 @@ aq_isc_txd_encap(void *arg, if_pkt_info_t pi)
 	trace_aq_tx_descr(ring->index, pidx, (volatile void*)txd);
 	ring->tx_tail = pidx;
 
-	ring->stats.tx_pkts++;
-	ring->stats.tx_bytes += pay_len;
+	counter_u64_add(ring->stats.tx_pkts, 1);
+	counter_u64_add(ring->stats.tx_bytes, pay_len);
 
 	pi->ipi_new_pidx = pidx;
 
@@ -592,6 +592,11 @@ aq_isc_txd_credits_update(void *arg, uint16_t txqid, bool clear)
 	head = tdm_tx_desc_head_ptr_get(&aq_dev->hw, ring->index);
 	AQ_DBG_PRINT("swhead %d hwhead %d", ring->tx_head, head);
 
+	if (head >= ring->tx_size) {
+		/* Malformed HW head; report no completions. */
+		goto done;
+	}
+
 	if (ring->tx_head == head) {
 		avail = 0; // ring->tx_size;
 		goto done;
diff --git a/sys/dev/aq/aq_ring.h b/sys/dev/aq/aq_ring.h
index 2d3740802213..2f33fbf75d55 100644
--- a/sys/dev/aq/aq_ring.h
+++ b/sys/dev/aq/aq_ring.h
@@ -35,6 +35,8 @@
 #ifndef _AQ_RING_H_
 #define _AQ_RING_H_
 
+#include <sys/counter.h>
+
 #include "aq_hw.h"
 
 #define REFILL_THRESHOLD 128
@@ -130,16 +132,13 @@ typedef volatile union {
 } __attribute__((__packed__)) aq_txc_desc_t;
 
 struct aq_ring_stats {
-	uint64_t rx_pkts;
-	uint64_t rx_bytes;
-	uint64_t jumbo_pkts;
-	uint64_t rx_err;
-	uint64_t irq;
-
-	uint64_t tx_pkts;
-	uint64_t tx_bytes;
-	uint64_t tx_drops;
-	uint64_t tx_queue_full;
+	counter_u64_t rx_pkts;
+	counter_u64_t rx_bytes;
+	counter_u64_t rx_err;
+	counter_u64_t irq;
+
+	counter_u64_t tx_pkts;
+	counter_u64_t tx_bytes;
 };
 
 struct aq_dev;