git: dca645cd3112 - stable/14 - nvme: Add handling for bar5

From: Alexander Ziaee <ziaee_at_FreeBSD.org>
Date: Tue, 04 Nov 2025 18:16:17 UTC
The branch stable/14 has been updated by ziaee:

URL: https://cgit.FreeBSD.org/src/commit/?id=dca645cd3112feefee2dce8cab029b1b779823b8

commit dca645cd3112feefee2dce8cab029b1b779823b8
Author:     Jasper Tran O'Leary <jtranoleary@google.com>
AuthorDate: 2025-10-28 20:43:35 +0000
Commit:     Alexander Ziaee <ziaee@FreeBSD.org>
CommitDate: 2025-11-04 18:15:17 +0000

    nvme: Add handling for bar5
    
    The NVMe spec allows the Table BIR (TBIR) and PBA DIR (PBIR) to
    be 0, 4, or 5. The existing NVMe driver basically only has support
    for 4, perhaps under the assumption that BAR4 is 64-bit and also
    occupies BAR5.
    
    This change adds support for BAR5, covering the case where BAR4
    and BAR5 might both be present and 32-bit, where the Table BIR
    might be 4 and the PBA BIR might be 5, or vice versa.
    
    The NVMe spec (in the SR-IOV section) also permits VFs to use BIR=2,
    so I haven't added stricter checks on which BIR will be permitted
    by the driver.
    
    This enables FreeBSD on Google Compute Engine C4 Machines.
    
    MFC after:              3 days
    Reviewed by:            imp
    Sponsored by:           Google
    Co-authored-by:         Matt Delco <delco@google.com>
    Signed-off-by:          Jasper Tran O'Leary <jtranoleary@google.com>
    Differential Revision:  https://reviews.freebsd.org/D53140
    
    (cherry picked from commit 7b32f4f0a7fe9b1b2f5a3905ca15f656713255ad)
---
 sys/dev/nvme/nvme_ctrlr.c   |  9 +++++++--
 sys/dev/nvme/nvme_pci.c     | 48 ++++++++++++++++++++++++++++++++++-----------
 sys/dev/nvme/nvme_private.h |  6 ++++--
 3 files changed, 48 insertions(+), 15 deletions(-)

diff --git a/sys/dev/nvme/nvme_ctrlr.c b/sys/dev/nvme/nvme_ctrlr.c
index 6f5d6ae74add..ce203e2869fd 100644
--- a/sys/dev/nvme/nvme_ctrlr.c
+++ b/sys/dev/nvme/nvme_ctrlr.c
@@ -1584,9 +1584,14 @@ noadminq:
 		bus_release_resource(ctrlr->dev, SYS_RES_IRQ,
 		    rman_get_rid(ctrlr->res), ctrlr->res);
 
-	if (ctrlr->bar4_resource != NULL) {
+	if (ctrlr->msix_table_resource != NULL) {
 		bus_release_resource(dev, SYS_RES_MEMORY,
-		    ctrlr->bar4_resource_id, ctrlr->bar4_resource);
+		    ctrlr->msix_table_resource_id, ctrlr->msix_table_resource);
+	}
+
+	if (ctrlr->msix_pba_resource != NULL) {
+		bus_release_resource(dev, SYS_RES_MEMORY,
+		    ctrlr->msix_pba_resource_id, ctrlr->msix_pba_resource);
 	}
 
 	bus_release_resource(dev, SYS_RES_MEMORY,
diff --git a/sys/dev/nvme/nvme_pci.c b/sys/dev/nvme/nvme_pci.c
index a78327ba0e8b..9c40c3d9f5c7 100644
--- a/sys/dev/nvme/nvme_pci.c
+++ b/sys/dev/nvme/nvme_pci.c
@@ -154,11 +154,15 @@ nvme_ctrlr_allocate_bar(struct nvme_controller *ctrlr)
 {
 
 	ctrlr->resource_id = PCIR_BAR(0);
+	ctrlr->msix_table_resource_id = -1;
+	ctrlr->msix_table_resource = NULL;
+	ctrlr->msix_pba_resource_id = -1;
+	ctrlr->msix_pba_resource = NULL;
 
 	ctrlr->resource = bus_alloc_resource_any(ctrlr->dev, SYS_RES_MEMORY,
 	    &ctrlr->resource_id, RF_ACTIVE);
 
-	if(ctrlr->resource == NULL) {
+	if (ctrlr->resource == NULL) {
 		nvme_printf(ctrlr, "unable to allocate pci resource\n");
 		return (ENOMEM);
 	}
@@ -168,15 +172,32 @@ nvme_ctrlr_allocate_bar(struct nvme_controller *ctrlr)
 	ctrlr->regs = (struct nvme_registers *)ctrlr->bus_handle;
 
 	/*
-	 * The NVMe spec allows for the MSI-X table to be placed behind
-	 *  BAR 4/5, separate from the control/doorbell registers.  Always
-	 *  try to map this bar, because it must be mapped prior to calling
-	 *  pci_alloc_msix().  If the table isn't behind BAR 4/5,
-	 *  bus_alloc_resource() will just return NULL which is OK.
+	 * The NVMe spec allows for the MSI-X tables to be placed behind
+	 *  BAR 4 and/or 5, separate from the control/doorbell registers.
 	 */
-	ctrlr->bar4_resource_id = PCIR_BAR(4);
-	ctrlr->bar4_resource = bus_alloc_resource_any(ctrlr->dev, SYS_RES_MEMORY,
-	    &ctrlr->bar4_resource_id, RF_ACTIVE);
+
+	ctrlr->msix_table_resource_id = pci_msix_table_bar(ctrlr->dev);
+	ctrlr->msix_pba_resource_id = pci_msix_pba_bar(ctrlr->dev);
+
+	if (ctrlr->msix_table_resource_id >= 0 &&
+	    ctrlr->msix_table_resource_id != ctrlr->resource_id) {
+		ctrlr->msix_table_resource = bus_alloc_resource_any(ctrlr->dev,
+		    SYS_RES_MEMORY, &ctrlr->msix_table_resource_id, RF_ACTIVE);
+		if (ctrlr->msix_table_resource == NULL) {
+			nvme_printf(ctrlr, "unable to allocate msi-x table resource\n");
+			return (ENOMEM);
+		}
+	}
+	if (ctrlr->msix_pba_resource_id >= 0 &&
+	    ctrlr->msix_pba_resource_id != ctrlr->resource_id &&
+	    ctrlr->msix_pba_resource_id != ctrlr->msix_table_resource_id) {
+		ctrlr->msix_pba_resource = bus_alloc_resource_any(ctrlr->dev,
+		    SYS_RES_MEMORY, &ctrlr->msix_pba_resource_id, RF_ACTIVE);
+		if (ctrlr->msix_pba_resource == NULL) {
+			nvme_printf(ctrlr, "unable to allocate msi-x pba resource\n");
+			return (ENOMEM);
+		}
+	}
 
 	return (0);
 }
@@ -202,9 +223,14 @@ bad:
 		    ctrlr->resource_id, ctrlr->resource);
 	}
 
-	if (ctrlr->bar4_resource != NULL) {
+	if (ctrlr->msix_table_resource != NULL) {
+		bus_release_resource(dev, SYS_RES_MEMORY,
+		    ctrlr->msix_table_resource_id, ctrlr->msix_table_resource);
+	}
+
+	if (ctrlr->msix_pba_resource != NULL) {
 		bus_release_resource(dev, SYS_RES_MEMORY,
-		    ctrlr->bar4_resource_id, ctrlr->bar4_resource);
+		    ctrlr->msix_pba_resource_id, ctrlr->msix_pba_resource);
 	}
 
 	if (ctrlr->tag)
diff --git a/sys/dev/nvme/nvme_private.h b/sys/dev/nvme/nvme_private.h
index 36e04ceb7f31..93833672674a 100644
--- a/sys/dev/nvme/nvme_private.h
+++ b/sys/dev/nvme/nvme_private.h
@@ -233,8 +233,10 @@ struct nvme_controller {
 	 *  separate from the control registers which are in BAR 0/1.  These
 	 *  members track the mapping of BAR 4/5 for that reason.
 	 */
-	int			bar4_resource_id;
-	struct resource		*bar4_resource;
+	int			msix_table_resource_id;
+	struct resource		*msix_table_resource;
+	int			msix_pba_resource_id;
+	struct resource		*msix_pba_resource;
 
 	int			msi_count;
 	uint32_t		enable_aborts;