git: 900b7ac652b3 - stable/12 - nvme(4): Add MSI and single MSI-X support.

Alexander Motin mav at FreeBSD.org
Tue Sep 7 01:29:21 UTC 2021


The branch stable/12 has been updated by mav:

URL: https://cgit.FreeBSD.org/src/commit/?id=900b7ac652b3f5c8bcf42621aa07b12ae0d883ea

commit 900b7ac652b3f5c8bcf42621aa07b12ae0d883ea
Author:     Alexander Motin <mav at FreeBSD.org>
AuthorDate: 2021-08-31 17:34:48 +0000
Commit:     Alexander Motin <mav at FreeBSD.org>
CommitDate: 2021-09-07 01:25:34 +0000

    nvme(4): Add MSI and single MSI-X support.
    
    If we can't allocate more MSI-X vectors, accept using single shared.
    If we can't allocate any MSI-X, try to allocate 2 MSI vectors, but
    accept single shared.  If still no luck, fall back to shared INTx.
    
    This provides maximal flexibility in some limited scenarios.  For
    example, vmd(4) does not support INTx and can handle only limited
    number of MSI/MSI-X vectors without sharing.
    
    MFC after:      1 week
    
    (cherry picked from commit e3bdf3da769a55f0944d9c337bb4d91b6435f02c)
---
 sys/dev/nvme/nvme_ahci.c    |  9 ++---
 sys/dev/nvme/nvme_ctrlr.c   |  2 +-
 sys/dev/nvme/nvme_pci.c     | 92 ++++++++++++++++++++++++++++-----------------
 sys/dev/nvme/nvme_private.h |  4 +-
 sys/dev/nvme/nvme_qpair.c   | 14 ++++---
 5 files changed, 73 insertions(+), 48 deletions(-)

diff --git a/sys/dev/nvme/nvme_ahci.c b/sys/dev/nvme/nvme_ahci.c
index 1037fab66664..8542f6f55246 100644
--- a/sys/dev/nvme/nvme_ahci.c
+++ b/sys/dev/nvme/nvme_ahci.c
@@ -87,19 +87,18 @@ nvme_ahci_attach(device_t dev)
 	ctrlr->rid = 0;
 	ctrlr->res = bus_alloc_resource_any(dev, SYS_RES_IRQ,
 	    &ctrlr->rid, RF_SHAREABLE | RF_ACTIVE);
-
 	if (ctrlr->res == NULL) {
-		nvme_printf(ctrlr, "unable to allocate shared IRQ\n");
+		nvme_printf(ctrlr, "unable to allocate shared interrupt\n");
 		ret = ENOMEM;
 		goto bad;
 	}
 
-	ctrlr->msix_enabled = 0;
+	ctrlr->msi_count = 0;
 	ctrlr->num_io_queues = 1;
 	if (bus_setup_intr(dev, ctrlr->res,
-	    INTR_TYPE_MISC | INTR_MPSAFE, NULL, nvme_ctrlr_intx_handler,
+	    INTR_TYPE_MISC | INTR_MPSAFE, NULL, nvme_ctrlr_shared_handler,
 	    ctrlr, &ctrlr->tag) != 0) {
-		nvme_printf(ctrlr, "unable to setup intx handler\n");
+		nvme_printf(ctrlr, "unable to setup shared interrupt\n");
 		ret = ENOMEM;
 		goto bad;
 	}
diff --git a/sys/dev/nvme/nvme_ctrlr.c b/sys/dev/nvme/nvme_ctrlr.c
index 9893862f0183..b09c68c75dbf 100644
--- a/sys/dev/nvme/nvme_ctrlr.c
+++ b/sys/dev/nvme/nvme_ctrlr.c
@@ -1204,7 +1204,7 @@ nvme_ctrlr_poll(struct nvme_controller *ctrlr)
  * interrupts in the controller.
  */
 void
-nvme_ctrlr_intx_handler(void *arg)
+nvme_ctrlr_shared_handler(void *arg)
 {
 	struct nvme_controller *ctrlr = arg;
 
diff --git a/sys/dev/nvme/nvme_pci.c b/sys/dev/nvme/nvme_pci.c
index af06e579ad47..42a3dc38e85f 100644
--- a/sys/dev/nvme/nvme_pci.c
+++ b/sys/dev/nvme/nvme_pci.c
@@ -47,7 +47,7 @@ static int    nvme_pci_detach(device_t);
 static int    nvme_pci_suspend(device_t);
 static int    nvme_pci_resume(device_t);
 
-static void nvme_ctrlr_setup_interrupts(struct nvme_controller *ctrlr);
+static int nvme_ctrlr_setup_interrupts(struct nvme_controller *ctrlr);
 
 static device_method_t nvme_pci_methods[] = {
 	/* Device interface */
@@ -189,7 +189,9 @@ nvme_pci_attach(device_t dev)
 	if (status != 0)
 		goto bad;
 	pci_enable_busmaster(dev);
-	nvme_ctrlr_setup_interrupts(ctrlr);
+	status = nvme_ctrlr_setup_interrupts(ctrlr);
+	if (status != 0)
+		goto bad;
 	return nvme_attach(dev);
 bad:
 	if (ctrlr->resource != NULL) {
@@ -209,7 +211,7 @@ bad:
 		bus_release_resource(dev, SYS_RES_IRQ,
 		    rman_get_rid(ctrlr->res), ctrlr->res);
 
-	if (ctrlr->msix_enabled)
+	if (ctrlr->msi_count > 0)
 		pci_release_msi(dev);
 
 	return status;
@@ -222,54 +224,60 @@ nvme_pci_detach(device_t dev)
 	int rv;
 
 	rv = nvme_detach(dev);
-	if (ctrlr->msix_enabled)
+	if (ctrlr->msi_count > 0)
 		pci_release_msi(dev);
 	pci_disable_busmaster(dev);
 	return (rv);
 }
 
 static int
-nvme_ctrlr_configure_intx(struct nvme_controller *ctrlr)
+nvme_ctrlr_setup_shared(struct nvme_controller *ctrlr, int rid)
 {
+	int error;
 
-	ctrlr->msix_enabled = 0;
 	ctrlr->num_io_queues = 1;
-	ctrlr->rid = 0;
+	ctrlr->rid = rid;
 	ctrlr->res = bus_alloc_resource_any(ctrlr->dev, SYS_RES_IRQ,
 	    &ctrlr->rid, RF_SHAREABLE | RF_ACTIVE);
-
 	if (ctrlr->res == NULL) {
-		nvme_printf(ctrlr, "unable to allocate shared IRQ\n");
+		nvme_printf(ctrlr, "unable to allocate shared interrupt\n");
 		return (ENOMEM);
 	}
 
-	if (bus_setup_intr(ctrlr->dev, ctrlr->res,
-	    INTR_TYPE_MISC | INTR_MPSAFE, NULL, nvme_ctrlr_intx_handler,
-	    ctrlr, &ctrlr->tag) != 0) {
-		nvme_printf(ctrlr, "unable to setup intx handler\n");
-		return (ENOMEM);
+	error = bus_setup_intr(ctrlr->dev, ctrlr->res,
+	    INTR_TYPE_MISC | INTR_MPSAFE, NULL, nvme_ctrlr_shared_handler,
+	    ctrlr, &ctrlr->tag);
+	if (error) {
+		nvme_printf(ctrlr, "unable to setup shared interrupt\n");
+		return (error);
 	}
 
 	return (0);
 }
 
-static void
+static int
 nvme_ctrlr_setup_interrupts(struct nvme_controller *ctrlr)
 {
 	device_t	dev;
 	int		force_intx, num_io_queues, per_cpu_io_queues;
 	int		min_cpus_per_ioq;
-	int		num_vectors_requested, num_vectors_allocated;
+	int		num_vectors_requested;
 
 	dev = ctrlr->dev;
 
 	force_intx = 0;
 	TUNABLE_INT_FETCH("hw.nvme.force_intx", &force_intx);
-	if (force_intx || pci_msix_count(dev) < 2) {
-		nvme_ctrlr_configure_intx(ctrlr);
-		return;
-	}
+	if (force_intx)
+		return (nvme_ctrlr_setup_shared(ctrlr, 0));
 
+	if (pci_msix_count(dev) == 0)
+		goto msi;
+
+	/*
+	 * Try to allocate one MSI-X per core for I/O queues, plus one
+	 * for admin queue, but accept single shared MSI-X if have to.
+	 * Fall back to MSI if can't get any MSI-X.
+	 */
 	num_io_queues = mp_ncpus;
 	TUNABLE_INT_FETCH("hw.nvme.num_io_queues", &num_io_queues);
 	if (num_io_queues < 1 || num_io_queues > mp_ncpus)
@@ -287,31 +295,45 @@ nvme_ctrlr_setup_interrupts(struct nvme_controller *ctrlr)
 		    max(1, mp_ncpus / min_cpus_per_ioq));
 	}
 
-	num_io_queues = min(num_io_queues, pci_msix_count(dev) - 1);
+	num_io_queues = min(num_io_queues, max(1, pci_msix_count(dev) - 1));
 
 again:
 	if (num_io_queues > vm_ndomains)
 		num_io_queues -= num_io_queues % vm_ndomains;
-	/* One vector for per core I/O queue, plus one vector for admin queue. */
-	num_vectors_requested = num_io_queues + 1;
-	num_vectors_allocated = num_vectors_requested;
-	if (pci_alloc_msix(dev, &num_vectors_allocated) != 0) {
-		nvme_ctrlr_configure_intx(ctrlr);
-		return;
-	}
-	if (num_vectors_allocated < 2) {
-		pci_release_msi(dev);
-		nvme_ctrlr_configure_intx(ctrlr);
-		return;
+	num_vectors_requested = min(num_io_queues + 1, pci_msix_count(dev));
+	ctrlr->msi_count = num_vectors_requested;
+	if (pci_alloc_msix(dev, &ctrlr->msi_count) != 0) {
+		nvme_printf(ctrlr, "unable to allocate MSI-X\n");
+		ctrlr->msi_count = 0;
+		goto msi;
 	}
-	if (num_vectors_allocated != num_vectors_requested) {
+	if (ctrlr->msi_count == 1)
+		return (nvme_ctrlr_setup_shared(ctrlr, 1));
+	if (ctrlr->msi_count != num_vectors_requested) {
 		pci_release_msi(dev);
-		num_io_queues = num_vectors_allocated - 1;
+		num_io_queues = ctrlr->msi_count - 1;
 		goto again;
 	}
 
-	ctrlr->msix_enabled = 1;
 	ctrlr->num_io_queues = num_io_queues;
+	return (0);
+
+msi:
+	/*
+	 * Try to allocate 2 MSIs (admin and I/O queues), but accept single
+	 * shared if have to.  Fall back to INTx if can't get any MSI.
+	 */
+	ctrlr->msi_count = min(pci_msi_count(dev), 2);
+	if (ctrlr->msi_count > 0) {
+		if (pci_alloc_msi(dev, &ctrlr->msi_count) != 0) {
+			nvme_printf(ctrlr, "unable to allocate MSI\n");
+			ctrlr->msi_count = 0;
+		} else if (ctrlr->msi_count == 2) {
+			ctrlr->num_io_queues = 1;
+			return (0);
+		}
+	}
+	return (nvme_ctrlr_setup_shared(ctrlr, ctrlr->msi_count > 0 ? 1 : 0));
 }
 
 static int
diff --git a/sys/dev/nvme/nvme_private.h b/sys/dev/nvme/nvme_private.h
index 001d45f2acc4..46a75a0fddcf 100644
--- a/sys/dev/nvme/nvme_private.h
+++ b/sys/dev/nvme/nvme_private.h
@@ -251,7 +251,7 @@ struct nvme_controller {
 	int			bar4_resource_id;
 	struct resource		*bar4_resource;
 
-	uint32_t		msix_enabled;
+	int			msi_count;
 	uint32_t		enable_aborts;
 
 	uint32_t		num_io_queues;
@@ -560,7 +560,7 @@ void	nvme_notify_fail_consumers(struct nvme_controller *ctrlr);
 void	nvme_notify_new_controller(struct nvme_controller *ctrlr);
 void	nvme_notify_ns(struct nvme_controller *ctrlr, int nsid);
 
-void	nvme_ctrlr_intx_handler(void *arg);
+void	nvme_ctrlr_shared_handler(void *arg);
 void	nvme_ctrlr_poll(struct nvme_controller *ctrlr);
 
 int	nvme_ctrlr_suspend(struct nvme_controller *ctrlr);
diff --git a/sys/dev/nvme/nvme_qpair.c b/sys/dev/nvme/nvme_qpair.c
index d126b9dbeb18..b2609145ef8f 100644
--- a/sys/dev/nvme/nvme_qpair.c
+++ b/sys/dev/nvme/nvme_qpair.c
@@ -655,7 +655,7 @@ nvme_qpair_process_completions(struct nvme_qpair *qpair)
 }
 
 static void
-nvme_qpair_msix_handler(void *arg)
+nvme_qpair_msi_handler(void *arg)
 {
 	struct nvme_qpair *qpair = arg;
 
@@ -673,7 +673,7 @@ nvme_qpair_construct(struct nvme_qpair *qpair,
 	uint8_t			*queuemem, *prpmem, *prp_list;
 	int			i, err;
 
-	qpair->vector = ctrlr->msix_enabled ? qpair->id : 0;
+	qpair->vector = ctrlr->msi_count > 1 ? qpair->id : 0;
 	qpair->num_entries = num_entries;
 	qpair->num_trackers = num_trackers;
 	qpair->ctrlr = ctrlr;
@@ -799,7 +799,7 @@ nvme_qpair_construct(struct nvme_qpair *qpair,
 	    qpair->num_entries, M_NVME, DOMAINSET_PREF(qpair->domain),
 	    M_ZERO | M_WAITOK);
 
-	if (ctrlr->msix_enabled) {
+	if (ctrlr->msi_count > 1) {
 		/*
 		 * MSI-X vector resource IDs start at 1, so we add one to
 		 *  the queue's vector to get the corresponding rid to use.
@@ -808,10 +808,14 @@ nvme_qpair_construct(struct nvme_qpair *qpair,
 
 		qpair->res = bus_alloc_resource_any(ctrlr->dev, SYS_RES_IRQ,
 		    &qpair->rid, RF_ACTIVE);
+		if (qpair->res == NULL) {
+			nvme_printf(ctrlr, "unable to allocate MSI\n");
+			goto out;
+		}
 		if (bus_setup_intr(ctrlr->dev, qpair->res,
 		    INTR_TYPE_MISC | INTR_MPSAFE, NULL,
-		    nvme_qpair_msix_handler, qpair, &qpair->tag) != 0) {
-			nvme_printf(ctrlr, "unable to setup intx handler\n");
+		    nvme_qpair_msi_handler, qpair, &qpair->tag) != 0) {
+			nvme_printf(ctrlr, "unable to setup MSI\n");
 			goto out;
 		}
 		if (qpair->id == 0) {


More information about the dev-commits-src-all mailing list