git: a727d9531afb - main - cxgbe(4): Fix bad races between sysctl and driver detach.

From: Navdeep Parhar <np_at_FreeBSD.org>
Date: Sat, 15 Jan 2022 00:47:48 UTC
The branch main has been updated by np:

URL: https://cgit.FreeBSD.org/src/commit/?id=a727d9531afbd58e304acc27e3717031e78bff90

commit a727d9531afbd58e304acc27e3717031e78bff90
Author:     Navdeep Parhar <np@FreeBSD.org>
AuthorDate: 2022-01-13 22:21:49 +0000
Commit:     Navdeep Parhar <np@FreeBSD.org>
CommitDate: 2022-01-15 00:44:57 +0000

    cxgbe(4): Fix bad races between sysctl and driver detach.
    
    The default sysctl context setup by newbus for a device is eventually
    freed by device_sysctl_fini, which runs after the device driver's detach
    routine.  sysctl nodes associated with this context must not use any
    resources (like driver locks, hardware access, counters, etc.) that are
    released by driver detach.
    
    There are a lot of sysctl nodes like this in cxgbe(4) and the fix is to
    hang them off a context that is explicitly freed by the driver before it
    releases any resource that might be used by a sysctl.
    
    This fixes panics when running "sysctl dev.t6nex dev.cc" in a tight loop
    and loading/unloading the driver in parallel.
    
    Reported by:    Suhas Lokesha
    MFC after:      1 week
    Sponsored by:   Chelsio Communications
---
 sys/dev/cxgbe/adapter.h          |  6 +++--
 sys/dev/cxgbe/crypto/t4_crypto.c |  8 ++++---
 sys/dev/cxgbe/t4_main.c          | 48 +++++++++++-----------------------------
 sys/dev/cxgbe/t4_vf.c            |  1 +
 4 files changed, 23 insertions(+), 40 deletions(-)

diff --git a/sys/dev/cxgbe/adapter.h b/sys/dev/cxgbe/adapter.h
index a7139b3f7b0e..34c85a3eeaad 100644
--- a/sys/dev/cxgbe/adapter.h
+++ b/sys/dev/cxgbe/adapter.h
@@ -159,7 +159,7 @@ enum {
 	FW_OK		= (1 << 1),
 	CHK_MBOX_ACCESS	= (1 << 2),
 	MASTER_PF	= (1 << 3),
-	ADAP_SYSCTL_CTX	= (1 << 4),
+	/* 1 << 4 is unused, was ADAP_SYSCTL_CTX */
 	ADAP_ERR	= (1 << 5),
 	BUF_PACKING_OK	= (1 << 6),
 	IS_VF		= (1 << 7),
@@ -174,7 +174,7 @@ enum {
 	/* VI flags */
 	DOOMED		= (1 << 0),
 	VI_INIT_DONE	= (1 << 1),
-	VI_SYSCTL_CTX	= (1 << 2),
+	/* 1 << 2 is unused, was VI_SYSCTL_CTX */
 	TX_USES_VM_WR 	= (1 << 3),
 	VI_SKIP_STATS 	= (1 << 4),
 
@@ -332,6 +332,8 @@ struct port_info {
 	u_int tx_parse_error;
 	int fcs_reg;
 	uint64_t fcs_base;
+
+	struct sysctl_ctx_list ctx;
 };
 
 #define	IS_MAIN_VI(vi)		((vi) == &((vi)->pi->vi[0]))
diff --git a/sys/dev/cxgbe/crypto/t4_crypto.c b/sys/dev/cxgbe/crypto/t4_crypto.c
index c9ac54c8219c..d7e48e9cfdce 100644
--- a/sys/dev/cxgbe/crypto/t4_crypto.c
+++ b/sys/dev/cxgbe/crypto/t4_crypto.c
@@ -249,6 +249,8 @@ struct ccr_softc {
 	counter_u64_t stats_sglist_error;
 	counter_u64_t stats_process_error;
 	counter_u64_t stats_sw_fallback;
+
+	struct sysctl_ctx_list ctx;
 };
 
 /*
@@ -1819,14 +1821,12 @@ ccr_probe(device_t dev)
 static void
 ccr_sysctls(struct ccr_softc *sc)
 {
-	struct sysctl_ctx_list *ctx;
+	struct sysctl_ctx_list *ctx = &sc->ctx;
 	struct sysctl_oid *oid, *port_oid;
 	struct sysctl_oid_list *children;
 	char buf[16];
 	int i;
 
-	ctx = device_get_sysctl_ctx(sc->dev);
-
 	/*
 	 * dev.ccr.X.
 	 */
@@ -1952,6 +1952,7 @@ ccr_attach(device_t dev)
 
 	sc = device_get_softc(dev);
 	sc->dev = dev;
+	sysctl_ctx_init(&sc->ctx);
 	sc->adapter = device_get_softc(device_get_parent(dev));
 	for_each_port(sc->adapter, i) {
 		ccr_init_port(sc, i);
@@ -2018,6 +2019,7 @@ ccr_detach(device_t dev)
 
 	crypto_unregister_all(sc->cid);
 
+	sysctl_ctx_free(&sc->ctx);
 	mtx_destroy(&sc->lock);
 	counter_u64_free(sc->stats_cipher_encrypt);
 	counter_u64_free(sc->stats_cipher_decrypt);
diff --git a/sys/dev/cxgbe/t4_main.c b/sys/dev/cxgbe/t4_main.c
index 58b064e78402..e7f6c239266b 100644
--- a/sys/dev/cxgbe/t4_main.c
+++ b/sys/dev/cxgbe/t4_main.c
@@ -1122,6 +1122,7 @@ t4_attach(device_t dev)
 
 	sc = device_get_softc(dev);
 	sc->dev = dev;
+	sysctl_ctx_init(&sc->ctx);
 	TUNABLE_INT_FETCH("hw.cxgbe.dflags", &sc->debug_flags);
 
 	if ((pci_get_device(dev) & 0xff00) == 0x5400)
@@ -1175,10 +1176,10 @@ t4_attach(device_t dev)
 
 	TASK_INIT(&sc->reset_task, 0, reset_adapter, sc);
 
-	sc->ctrlq_oid = SYSCTL_ADD_NODE(device_get_sysctl_ctx(sc->dev),
+	sc->ctrlq_oid = SYSCTL_ADD_NODE(&sc->ctx,
 	    SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)), OID_AUTO, "ctrlq",
 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "control queues");
-	sc->fwq_oid = SYSCTL_ADD_NODE(device_get_sysctl_ctx(sc->dev),
+	sc->fwq_oid = SYSCTL_ADD_NODE(&sc->ctx,
 	    SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)), OID_AUTO, "fwq",
 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "firmware event queue");
 
@@ -1732,6 +1733,7 @@ t4_detach_common(device_t dev)
 	}
 
 	device_delete_children(dev);
+	sysctl_ctx_free(&sc->ctx);
 	adapter_full_uninit(sc);
 
 	if ((sc->flags & (IS_VF | FW_OK)) == FW_OK)
@@ -2413,12 +2415,12 @@ cxgbe_vi_attach(device_t dev, struct vi_info *vi)
 {
 	struct ifnet *ifp;
 	struct sbuf *sb;
-	struct sysctl_ctx_list *ctx;
+	struct sysctl_ctx_list *ctx = &vi->ctx;
 	struct sysctl_oid_list *children;
 	struct pfil_head_args pa;
 	struct adapter *sc = vi->adapter;
 
-	ctx = device_get_sysctl_ctx(vi->dev);
+	sysctl_ctx_init(ctx);
 	children = SYSCTL_CHILDREN(device_get_sysctl_tree(vi->dev));
 	vi->rxq_oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "rxq",
 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "NIC rx queues");
@@ -2565,6 +2567,8 @@ cxgbe_attach(device_t dev)
 	struct vi_info *vi;
 	int i, rc;
 
+	sysctl_ctx_init(&pi->ctx);
+
 	rc = cxgbe_vi_attach(dev, &pi->vi[0]);
 	if (rc)
 		return (rc);
@@ -2606,6 +2610,7 @@ cxgbe_vi_detach(struct vi_info *vi)
 #endif
 	cxgbe_uninit_synchronized(vi);
 	callout_drain(&vi->tick);
+	sysctl_ctx_free(&vi->ctx);
 	vi_full_uninit(vi);
 
 	if_free(vi->ifp);
@@ -2625,6 +2630,7 @@ cxgbe_detach(device_t dev)
 		return (rc);
 	device_delete_children(dev);
 
+	sysctl_ctx_free(&pi->ctx);
 	doom_vi(sc, &pi->vi[0]);
 
 	if (pi->flags & HAS_TRACEQ) {
@@ -6469,11 +6475,6 @@ adapter_full_init(struct adapter *sc)
 
 	ASSERT_SYNCHRONIZED_OP(sc);
 
-	if (!(sc->flags & ADAP_SYSCTL_CTX)) {
-		sysctl_ctx_init(&sc->ctx);
-		sc->flags |= ADAP_SYSCTL_CTX;
-	}
-
 	/*
 	 * queues that belong to the adapter (not any particular port).
 	 */
@@ -6528,12 +6529,6 @@ adapter_full_uninit(struct adapter *sc)
 {
 	int i;
 
-	/* Do this before freeing the adapter queues. */
-	if (sc->flags & ADAP_SYSCTL_CTX) {
-		sysctl_ctx_free(&sc->ctx);
-		sc->flags &= ~ADAP_SYSCTL_CTX;
-	}
-
 	t4_teardown_adapter_queues(sc);
 
 	for (i = 0; i < nitems(sc->tq) && sc->tq[i]; i++) {
@@ -6626,11 +6621,6 @@ vi_full_init(struct vi_info *vi)
 
 	ASSERT_SYNCHRONIZED_OP(sc);
 
-	if (!(vi->flags & VI_SYSCTL_CTX)) {
-		sysctl_ctx_init(&vi->ctx);
-		vi->flags |= VI_SYSCTL_CTX;
-	}
-
 	/*
 	 * Allocate tx/rx/fl queues for this VI.
 	 */
@@ -6764,12 +6754,6 @@ vi_full_uninit(struct vi_info *vi)
 		free(vi->nm_rss, M_CXGBE);
 	}
 
-	/* Do this before freeing the VI queues. */
-	if (vi->flags & VI_SYSCTL_CTX) {
-		sysctl_ctx_free(&vi->ctx);
-		vi->flags &= ~VI_SYSCTL_CTX;
-	}
-
 	t4_teardown_vi_queues(vi);
 	vi->flags &= ~VI_INIT_DONE;
 }
@@ -7132,13 +7116,11 @@ static char *caps_decoder[] = {
 void
 t4_sysctls(struct adapter *sc)
 {
-	struct sysctl_ctx_list *ctx;
+	struct sysctl_ctx_list *ctx = &sc->ctx;
 	struct sysctl_oid *oid;
 	struct sysctl_oid_list *children, *c0;
 	static char *doorbells = {"\20\1UDB\2WCWR\3UDBWC\4KDB"};
 
-	ctx = device_get_sysctl_ctx(sc->dev);
-
 	/*
 	 * dev.t4nex.X.
 	 */
@@ -7649,12 +7631,10 @@ t4_sysctls(struct adapter *sc)
 void
 vi_sysctls(struct vi_info *vi)
 {
-	struct sysctl_ctx_list *ctx;
+	struct sysctl_ctx_list *ctx = &vi->ctx;
 	struct sysctl_oid *oid;
 	struct sysctl_oid_list *children;
 
-	ctx = device_get_sysctl_ctx(vi->dev);
-
 	/*
 	 * dev.v?(cxgbe|cxl).X.
 	 */
@@ -7754,7 +7734,7 @@ vi_sysctls(struct vi_info *vi)
 static void
 cxgbe_sysctls(struct port_info *pi)
 {
-	struct sysctl_ctx_list *ctx;
+	struct sysctl_ctx_list *ctx = &pi->ctx;
 	struct sysctl_oid *oid;
 	struct sysctl_oid_list *children, *children2;
 	struct adapter *sc = pi->adapter;
@@ -7762,8 +7742,6 @@ cxgbe_sysctls(struct port_info *pi)
 	char name[16];
 	static char *tc_flags = {"\20\1USER"};
 
-	ctx = device_get_sysctl_ctx(pi->dev);
-
 	/*
 	 * dev.cxgbe.X.
 	 */
diff --git a/sys/dev/cxgbe/t4_vf.c b/sys/dev/cxgbe/t4_vf.c
index 4ad5e9d7839d..95b984f3e3be 100644
--- a/sys/dev/cxgbe/t4_vf.c
+++ b/sys/dev/cxgbe/t4_vf.c
@@ -489,6 +489,7 @@ t4vf_attach(device_t dev)
 
 	sc = device_get_softc(dev);
 	sc->dev = dev;
+	sysctl_ctx_init(&sc->ctx);
 	pci_enable_busmaster(dev);
 	pci_set_max_read_req(dev, 4096);
 	sc->params.pci.mps = pci_get_max_payload(dev);