git: d2b96f654a67 - main - iflib: Fix panic observed while doing sysctl -a with if_bnxt unload
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Tue, 14 Apr 2026 09:14:18 UTC
The branch main has been updated by ssaxena:
URL: https://cgit.FreeBSD.org/src/commit/?id=d2b96f654a672f6059c5c623c276dcd76841ed12
commit d2b96f654a672f6059c5c623c276dcd76841ed12
Author: Sreekanth Reddy <sreekanth.reddy@broadcom.com>
AuthorDate: 2026-04-13 06:28:08 +0000
Commit: Sumit Saxena <ssaxena@FreeBSD.org>
CommitDate: 2026-04-14 09:13:34 +0000
iflib: Fix panic observed while doing sysctl -a with if_bnxt unload
Observed below kernel panic calltrace while performing sysctl -a
operation while unloading the if_bnxt driver,
Fatal trap 9: general protection fault while in kernel mode
KDB: stack backtrace:
db_trace_self_wrapper() at db_trace_self_wrapper+0x2b/frame 0xfffffe02a7569940
vpanic() at vpanic+0x136/frame 0xfffffe02a7569a70
panic() at panic+0x43/frame 0xfffffe02a7569ad0
trap_fatal() at trap_fatal+0x68/frame 0xfffffe02a7569af0
calltrap() at calltrap+0x8/frame 0xfffffe02a7569af0
trap 0x9, rip = 0xffffffff80c0b411, rsp = 0xfffffe02a7569bc0, rbp = 0xfffffe02a7569be0 ---
sysctl_handle_counter_u64() at sysctl_handle_counter_u64+0x61/frame 0xfffffe02a7569be0
sysctl_root_handler_locked() at sysctl_root_handler_locked+0x9c/frame 0xfffffe02a7569c30
sysctl_root() at sysctl_root+0x22f/frame 0xfffffe02a7569cb0
userland_sysctl() at userland_sysctl+0x196/frame 0xfffffe02a7569d50
sys___sysctl() at sys___sysctl+0x65/frame 0xfffffe02a7569e00
amd64_syscall() at amd64_syscall+0x169/frame 0xfffffe02a7569f30
fast_syscall_common() at fast_syscall_common+0xf8/frame 0xfffffe02a7569f30
Root Cause:
iflib adds per-device sysctl nodes under the device tree using the device
sysctl context. Some of those nodes are counter sysctl that point at fields
inside txq→ift_br. When the if_bnxt driver is unloaded, iflib_device_deregister
runs and calls iflib_tx_structures_free, which frees the txqs ift_br. The device
sysctl tree is only freed when the device is destroyed. If sysctl -a runs during
unload, it can still traverse the device tree and call sysctl_handle_counter_u64
for those nodes. The handler does counter_u64_fetch(*(counter_u64_t *)arg1).
By then arg1 can point into freed memory and leads to use after free type kernel panic.
Fix:
flib now uses its own sysctl context for all iflib-related nodes
instead of using device’s context. And iflib sysctl context is now
removed before any queue/ring memory is freed.
MFC after: 2 weeks
Reviewed by: gallatin, ssaxena, #iflib
Differential Revision: https://reviews.freebsd.org/D55981
---
sys/net/iflib.c | 45 ++++++++++++++++++++++++---------------------
1 file changed, 24 insertions(+), 21 deletions(-)
diff --git a/sys/net/iflib.c b/sys/net/iflib.c
index f9d0b1af0f83..186c41d9f839 100644
--- a/sys/net/iflib.c
+++ b/sys/net/iflib.c
@@ -190,6 +190,7 @@ struct iflib_ctx {
struct ifmedia ifc_media;
struct ifmedia *ifc_mediap;
+ struct sysctl_ctx_list ifc_sysctl_ctx;
struct sysctl_oid *ifc_sysctl_node;
uint16_t ifc_sysctl_ntxqs;
uint16_t ifc_sysctl_nrxqs;
@@ -5293,6 +5294,8 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct
fail_detach:
ether_ifdetach(ctx->ifc_ifp);
fail_queues:
+ sysctl_ctx_free(&ctx->ifc_sysctl_ctx);
+ ctx->ifc_sysctl_node = NULL;
taskqueue_free(ctx->ifc_tq);
iflib_tqg_detach(ctx);
iflib_tx_structures_free(ctx);
@@ -5332,6 +5335,9 @@ iflib_device_deregister(if_ctx_t ctx)
if_t ifp = ctx->ifc_ifp;
device_t dev = ctx->ifc_dev;
+ sysctl_ctx_free(&ctx->ifc_sysctl_ctx);
+ ctx->ifc_sysctl_node = NULL;
+
/* Make sure VLANS are not using driver */
if (if_vlantrunkinuse(ifp)) {
device_printf(dev, "Vlan in use, detach first\n");
@@ -6787,62 +6793,61 @@ iflib_add_device_sysctl_pre(if_ctx_t ctx)
{
device_t dev = iflib_get_dev(ctx);
struct sysctl_oid_list *child, *oid_list;
- struct sysctl_ctx_list *ctx_list;
struct sysctl_oid *node;
- ctx_list = device_get_sysctl_ctx(dev);
+ sysctl_ctx_init(&ctx->ifc_sysctl_ctx);
child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
- ctx->ifc_sysctl_node = node = SYSCTL_ADD_NODE(ctx_list, child,
+ ctx->ifc_sysctl_node = node = SYSCTL_ADD_NODE(&ctx->ifc_sysctl_ctx, child,
OID_AUTO, "iflib", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
"IFLIB fields");
oid_list = SYSCTL_CHILDREN(node);
- SYSCTL_ADD_CONST_STRING(ctx_list, oid_list, OID_AUTO, "driver_version",
+ SYSCTL_ADD_CONST_STRING(&ctx->ifc_sysctl_ctx, oid_list, OID_AUTO, "driver_version",
CTLFLAG_RD, ctx->ifc_sctx->isc_driver_version, "driver version");
- SYSCTL_ADD_BOOL(ctx_list, oid_list, OID_AUTO, "simple_tx",
+ SYSCTL_ADD_BOOL(&ctx->ifc_sysctl_ctx, oid_list, OID_AUTO, "simple_tx",
CTLFLAG_RDTUN, &ctx->ifc_sysctl_simple_tx, 0,
"use simple tx ring");
- SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_ntxqs",
+ SYSCTL_ADD_U16(&ctx->ifc_sysctl_ctx, oid_list, OID_AUTO, "override_ntxqs",
CTLFLAG_RWTUN, &ctx->ifc_sysctl_ntxqs, 0,
"# of txqs to use, 0 => use default #");
- SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_nrxqs",
+ SYSCTL_ADD_U16(&ctx->ifc_sysctl_ctx, oid_list, OID_AUTO, "override_nrxqs",
CTLFLAG_RWTUN, &ctx->ifc_sysctl_nrxqs, 0,
"# of rxqs to use, 0 => use default #");
- SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_qs_enable",
+ SYSCTL_ADD_U16(&ctx->ifc_sysctl_ctx, oid_list, OID_AUTO, "override_qs_enable",
CTLFLAG_RWTUN, &ctx->ifc_sysctl_qs_eq_override, 0,
"permit #txq != #rxq");
- SYSCTL_ADD_INT(ctx_list, oid_list, OID_AUTO, "disable_msix",
+ SYSCTL_ADD_INT(&ctx->ifc_sysctl_ctx, oid_list, OID_AUTO, "disable_msix",
CTLFLAG_RWTUN, &ctx->ifc_softc_ctx.isc_disable_msix, 0,
"disable MSI-X (default 0)");
- SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "rx_budget",
+ SYSCTL_ADD_U16(&ctx->ifc_sysctl_ctx, oid_list, OID_AUTO, "rx_budget",
CTLFLAG_RWTUN, &ctx->ifc_sysctl_rx_budget, 0, "set the RX budget");
- SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "tx_abdicate",
+ SYSCTL_ADD_U16(&ctx->ifc_sysctl_ctx, oid_list, OID_AUTO, "tx_abdicate",
CTLFLAG_RWTUN, &ctx->ifc_sysctl_tx_abdicate, 0,
"cause TX to abdicate instead of running to completion");
ctx->ifc_sysctl_core_offset = CORE_OFFSET_UNSPECIFIED;
- SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "core_offset",
+ SYSCTL_ADD_U16(&ctx->ifc_sysctl_ctx, oid_list, OID_AUTO, "core_offset",
CTLFLAG_RDTUN, &ctx->ifc_sysctl_core_offset, 0,
"offset to start using cores at");
- SYSCTL_ADD_U8(ctx_list, oid_list, OID_AUTO, "separate_txrx",
+ SYSCTL_ADD_U8(&ctx->ifc_sysctl_ctx, oid_list, OID_AUTO, "separate_txrx",
CTLFLAG_RDTUN, &ctx->ifc_sysctl_separate_txrx, 0,
"use separate cores for TX and RX");
- SYSCTL_ADD_U8(ctx_list, oid_list, OID_AUTO, "use_logical_cores",
+ SYSCTL_ADD_U8(&ctx->ifc_sysctl_ctx, oid_list, OID_AUTO, "use_logical_cores",
CTLFLAG_RDTUN, &ctx->ifc_sysctl_use_logical_cores, 0,
"try to make use of logical cores for TX and RX");
- SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "use_extra_msix_vectors",
+ SYSCTL_ADD_U16(&ctx->ifc_sysctl_ctx, oid_list, OID_AUTO, "use_extra_msix_vectors",
CTLFLAG_RDTUN, &ctx->ifc_sysctl_extra_msix_vectors, 0,
"attempt to reserve the given number of extra MSI-X vectors during driver load for the creation of additional interfaces later");
- SYSCTL_ADD_INT(ctx_list, oid_list, OID_AUTO, "allocated_msix_vectors",
+ SYSCTL_ADD_INT(&ctx->ifc_sysctl_ctx, oid_list, OID_AUTO, "allocated_msix_vectors",
CTLFLAG_RDTUN, &ctx->ifc_softc_ctx.isc_vectors, 0,
"total # of MSI-X vectors allocated by driver");
/* XXX change for per-queue sizes */
- SYSCTL_ADD_PROC(ctx_list, oid_list, OID_AUTO, "override_ntxds",
+ SYSCTL_ADD_PROC(&ctx->ifc_sysctl_ctx, oid_list, OID_AUTO, "override_ntxds",
CTLTYPE_STRING | CTLFLAG_RWTUN | CTLFLAG_NEEDGIANT, ctx,
IFLIB_NTXD_HANDLER, mp_ndesc_handler, "A",
"list of # of TX descriptors to use, 0 = use default #");
- SYSCTL_ADD_PROC(ctx_list, oid_list, OID_AUTO, "override_nrxds",
+ SYSCTL_ADD_PROC(&ctx->ifc_sysctl_ctx, oid_list, OID_AUTO, "override_nrxds",
CTLTYPE_STRING | CTLFLAG_RWTUN | CTLFLAG_NEEDGIANT, ctx,
IFLIB_NRXD_HANDLER, mp_ndesc_handler, "A",
"list of # of RX descriptors to use, 0 = use default #");
@@ -6853,9 +6858,8 @@ iflib_add_device_sysctl_post(if_ctx_t ctx)
{
if_shared_ctx_t sctx = ctx->ifc_sctx;
if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
- device_t dev = iflib_get_dev(ctx);
struct sysctl_oid_list *child;
- struct sysctl_ctx_list *ctx_list;
+ struct sysctl_ctx_list *ctx_list = &ctx->ifc_sysctl_ctx;
iflib_fl_t fl;
iflib_txq_t txq;
iflib_rxq_t rxq;
@@ -6864,7 +6868,6 @@ iflib_add_device_sysctl_post(if_ctx_t ctx)
char *qfmt;
struct sysctl_oid *queue_node, *fl_node, *node;
struct sysctl_oid_list *queue_list, *fl_list;
- ctx_list = device_get_sysctl_ctx(dev);
node = ctx->ifc_sysctl_node;
child = SYSCTL_CHILDREN(node);