svn commit: r351355 - in head/sys: conf dev/nvme modules/nvme

Warner Losh imp at FreeBSD.org
Wed Aug 21 22:17:56 UTC 2019


Author: imp
Date: Wed Aug 21 22:17:55 2019
New Revision: 351355
URL: https://svnweb.freebsd.org/changeset/base/351355

Log:
  Separate the pci attachment from the rest of nvme
  
  Nvme drives can be attached in a number of different ways. Separate out the PCI
  attachment so that we can have other attachment types, like ahci and various
  types of NVMeoF.
  
  Submitted by: cognet@

Added:
  head/sys/dev/nvme/nvme_pci.c   (contents, props changed)
Modified:
  head/sys/conf/files
  head/sys/dev/nvme/nvme.c
  head/sys/dev/nvme/nvme_ctrlr.c
  head/sys/dev/nvme/nvme_private.h
  head/sys/modules/nvme/Makefile

Modified: head/sys/conf/files
==============================================================================
--- head/sys/conf/files	Wed Aug 21 21:05:15 2019	(r351354)
+++ head/sys/conf/files	Wed Aug 21 22:17:55 2019	(r351355)
@@ -2483,6 +2483,7 @@ dev/nvme/nvme_ctrlr.c		optional nvme
 dev/nvme/nvme_ctrlr_cmd.c	optional nvme
 dev/nvme/nvme_ns.c		optional nvme
 dev/nvme/nvme_ns_cmd.c		optional nvme
+dev/nvme/nvme_pci.c		optional nvme pci
 dev/nvme/nvme_qpair.c		optional nvme
 dev/nvme/nvme_sim.c		optional nvme scbus
 dev/nvme/nvme_sysctl.c		optional nvme

Modified: head/sys/dev/nvme/nvme.c
==============================================================================
--- head/sys/dev/nvme/nvme.c	Wed Aug 21 21:05:15 2019	(r351354)
+++ head/sys/dev/nvme/nvme.c	Wed Aug 21 22:17:55 2019	(r351355)
@@ -36,9 +36,6 @@ __FBSDID("$FreeBSD$");
 
 #include <vm/uma.h>
 
-#include <dev/pci/pcireg.h>
-#include <dev/pci/pcivar.h>
-
 #include "nvme_private.h"
 
 struct nvme_consumer {
@@ -58,107 +55,8 @@ int32_t		nvme_retry_count;
 
 MALLOC_DEFINE(M_NVME, "nvme", "nvme(4) memory allocations");
 
-static int    nvme_probe(device_t);
-static int    nvme_attach(device_t);
-static int    nvme_detach(device_t);
-static int    nvme_shutdown(device_t);
+devclass_t nvme_devclass;
 
-static devclass_t nvme_devclass;
-
-static device_method_t nvme_pci_methods[] = {
-	/* Device interface */
-	DEVMETHOD(device_probe,     nvme_probe),
-	DEVMETHOD(device_attach,    nvme_attach),
-	DEVMETHOD(device_detach,    nvme_detach),
-	DEVMETHOD(device_shutdown,  nvme_shutdown),
-	{ 0, 0 }
-};
-
-static driver_t nvme_pci_driver = {
-	"nvme",
-	nvme_pci_methods,
-	sizeof(struct nvme_controller),
-};
-
-DRIVER_MODULE(nvme, pci, nvme_pci_driver, nvme_devclass, NULL, NULL);
-MODULE_VERSION(nvme, 1);
-MODULE_DEPEND(nvme, cam, 1, 1, 1);
-
-static struct _pcsid
-{
-	uint32_t	devid;
-	int		match_subdevice;
-	uint16_t	subdevice;
-	const char	*desc;
-	uint32_t	quirks;
-} pci_ids[] = {
-	{ 0x01118086,		0, 0, "NVMe Controller"  },
-	{ IDT32_PCI_ID,		0, 0, "IDT NVMe Controller (32 channel)"  },
-	{ IDT8_PCI_ID,		0, 0, "IDT NVMe Controller (8 channel)" },
-	{ 0x09538086,		1, 0x3702, "DC P3700 SSD" },
-	{ 0x09538086,		1, 0x3703, "DC P3700 SSD [2.5\" SFF]" },
-	{ 0x09538086,		1, 0x3704, "DC P3500 SSD [Add-in Card]" },
-	{ 0x09538086,		1, 0x3705, "DC P3500 SSD [2.5\" SFF]" },
-	{ 0x09538086,		1, 0x3709, "DC P3600 SSD [Add-in Card]" },
-	{ 0x09538086,		1, 0x370a, "DC P3600 SSD [2.5\" SFF]" },
-	{ 0x00031c58,		0, 0, "HGST SN100",	QUIRK_DELAY_B4_CHK_RDY },
-	{ 0x00231c58,		0, 0, "WDC SN200",	QUIRK_DELAY_B4_CHK_RDY },
-	{ 0x05401c5f,		0, 0, "Memblaze Pblaze4", QUIRK_DELAY_B4_CHK_RDY },
-	{ 0xa821144d,		0, 0, "Samsung PM1725", QUIRK_DELAY_B4_CHK_RDY },
-	{ 0xa822144d,		0, 0, "Samsung PM1725a", QUIRK_DELAY_B4_CHK_RDY },
-	{ 0x01161179,		0, 0, "Toshiba XG5", QUIRK_DISABLE_TIMEOUT },
-	{ 0x00000000,		0, 0, NULL  }
-};
-
-static int
-nvme_match(uint32_t devid, uint16_t subdevice, struct _pcsid *ep)
-{
-	if (devid != ep->devid)
-		return 0;
-
-	if (!ep->match_subdevice)
-		return 1;
-
-	if (subdevice == ep->subdevice)
-		return 1;
-	else
-		return 0;
-}
-
-static int
-nvme_probe (device_t device)
-{
-	struct _pcsid	*ep;
-	uint32_t	devid;
-	uint16_t	subdevice;
-
-	devid = pci_get_devid(device);
-	subdevice = pci_get_subdevice(device);
-	ep = pci_ids;
-
-	while (ep->devid) {
-		if (nvme_match(devid, subdevice, ep))
-			break;
-		++ep;
-	}
-
-	if (ep->desc) {
-		device_set_desc(device, ep->desc);
-		return (BUS_PROBE_DEFAULT);
-	}
-
-#if defined(PCIS_STORAGE_NVM)
-	if (pci_get_class(device)    == PCIC_STORAGE &&
-	    pci_get_subclass(device) == PCIS_STORAGE_NVM &&
-	    pci_get_progif(device)   == PCIP_STORAGE_NVM_ENTERPRISE_NVMHCI_1_0) {
-		device_set_desc(device, "Generic NVMe Device");
-		return (BUS_PROBE_GENERIC);
-	}
-#endif
-
-	return (ENXIO);
-}
-
 static void
 nvme_init(void)
 {
@@ -181,7 +79,7 @@ nvme_uninit(void)
 
 SYSUNINIT(nvme_unregister, SI_SUB_DRIVERS, SI_ORDER_SECOND, nvme_uninit, NULL);
 
-static int
+int
 nvme_shutdown(device_t dev)
 {
 	struct nvme_controller	*ctrlr;
@@ -225,25 +123,12 @@ nvme_dump_completion(struct nvme_completion *cpl)
 	    cpl->cid, p, sc, sct, m, dnr);
 }
 
-static int
+int
 nvme_attach(device_t dev)
 {
 	struct nvme_controller	*ctrlr = DEVICE2SOFTC(dev);
 	int			status;
-	struct _pcsid		*ep;
-	uint32_t		devid;
-	uint16_t		subdevice;
 
-	devid = pci_get_devid(dev);
-	subdevice = pci_get_subdevice(dev);
-	ep = pci_ids;
-	while (ep->devid) {
-		if (nvme_match(devid, subdevice, ep))
-			break;
-		++ep;
-	}
-	ctrlr->quirks = ep->quirks;
-
 	status = nvme_ctrlr_construct(ctrlr, dev);
 
 	if (status != 0) {
@@ -252,32 +137,8 @@ nvme_attach(device_t dev)
 	}
 
 	/*
-	 * Some drives do not implement the completion timeout feature
-	 * correctly. There's a WAR from the manufacturer to just disable it.
-	 * The driver wouldn't respond correctly to a timeout anyway.
-	 */
-	if (ep->quirks & QUIRK_DISABLE_TIMEOUT) {
-		int ptr;
-		uint16_t devctl2;
-
-		status = pci_find_cap(dev, PCIY_EXPRESS, &ptr);
-		if (status) {
-			device_printf(dev, "Can't locate PCIe capability?");
-			return (status);
-		}
-		devctl2 = pci_read_config(dev, ptr + PCIER_DEVICE_CTL2, sizeof(devctl2));
-		devctl2 |= PCIEM_CTL2_COMP_TIMO_DISABLE;
-		pci_write_config(dev, ptr + PCIER_DEVICE_CTL2, devctl2, sizeof(devctl2));
-	}
-
-	/*
-	 * Enable busmastering so the completion status messages can
-	 * be busmastered back to the host.
-	 */
-	pci_enable_busmaster(dev);
-
-	/*
 	 * Reset controller twice to ensure we do a transition from cc.en==1
+	 * Reset controller twice to ensure we do a transition from cc.en==1
 	 *  to cc.en==0.  This is because we don't really know what status
 	 *  the controller was left in when boot handed off to OS.
 	 */
@@ -301,13 +162,12 @@ nvme_attach(device_t dev)
 	return (0);
 }
 
-static int
+int
 nvme_detach (device_t dev)
 {
 	struct nvme_controller	*ctrlr = DEVICE2SOFTC(dev);
 
 	nvme_ctrlr_destruct(ctrlr, dev);
-	pci_disable_busmaster(dev);
 	return (0);
 }
 

Modified: head/sys/dev/nvme/nvme_ctrlr.c
==============================================================================
--- head/sys/dev/nvme/nvme_ctrlr.c	Wed Aug 21 21:05:15 2019	(r351354)
+++ head/sys/dev/nvme/nvme_ctrlr.c	Wed Aug 21 22:17:55 2019	(r351355)
@@ -42,50 +42,14 @@ __FBSDID("$FreeBSD$");
 #include <sys/uio.h>
 #include <sys/endian.h>
 
-#include <dev/pci/pcireg.h>
-#include <dev/pci/pcivar.h>
-
 #include "nvme_private.h"
 
 #define B4_CHK_RDY_DELAY_MS	2300		/* work around controller bug */
 
 static void nvme_ctrlr_construct_and_submit_aer(struct nvme_controller *ctrlr,
 						struct nvme_async_event_request *aer);
-static void nvme_ctrlr_setup_interrupts(struct nvme_controller *ctrlr);
 
 static int
-nvme_ctrlr_allocate_bar(struct nvme_controller *ctrlr)
-{
-
-	ctrlr->resource_id = PCIR_BAR(0);
-
-	ctrlr->resource = bus_alloc_resource_any(ctrlr->dev, SYS_RES_MEMORY,
-	    &ctrlr->resource_id, RF_ACTIVE);
-
-	if(ctrlr->resource == NULL) {
-		nvme_printf(ctrlr, "unable to allocate pci resource\n");
-		return (ENOMEM);
-	}
-
-	ctrlr->bus_tag = rman_get_bustag(ctrlr->resource);
-	ctrlr->bus_handle = rman_get_bushandle(ctrlr->resource);
-	ctrlr->regs = (struct nvme_registers *)ctrlr->bus_handle;
-
-	/*
-	 * The NVMe spec allows for the MSI-X table to be placed behind
-	 *  BAR 4/5, separate from the control/doorbell registers.  Always
-	 *  try to map this bar, because it must be mapped prior to calling
-	 *  pci_alloc_msix().  If the table isn't behind BAR 4/5,
-	 *  bus_alloc_resource() will just return NULL which is OK.
-	 */
-	ctrlr->bar4_resource_id = PCIR_BAR(4);
-	ctrlr->bar4_resource = bus_alloc_resource_any(ctrlr->dev, SYS_RES_MEMORY,
-	    &ctrlr->bar4_resource_id, RF_ACTIVE);
-
-	return (0);
-}
-
-static int
 nvme_ctrlr_construct_admin_qpair(struct nvme_controller *ctrlr)
 {
 	struct nvme_qpair	*qpair;
@@ -876,9 +840,8 @@ nvme_ctrlr_start(void *ctrlr_arg)
 	 *  the number of I/O queues supported, so cannot reset
 	 *  the adminq again here.
 	 */
-	if (ctrlr->is_resetting) {
+	if (ctrlr->is_resetting)
 		nvme_qpair_reset(&ctrlr->adminq);
-	}
 
 	for (i = 0; i < ctrlr->num_io_queues; i++)
 		nvme_qpair_reset(&ctrlr->ioq[i]);
@@ -1004,34 +967,6 @@ nvme_ctrlr_intx_handler(void *arg)
 	nvme_mmio_write_4(ctrlr, intmc, 1);
 }
 
-static int
-nvme_ctrlr_configure_intx(struct nvme_controller *ctrlr)
-{
-
-	ctrlr->msix_enabled = 0;
-	ctrlr->num_io_queues = 1;
-	ctrlr->num_cpus_per_ioq = mp_ncpus;
-	ctrlr->rid = 0;
-	ctrlr->res = bus_alloc_resource_any(ctrlr->dev, SYS_RES_IRQ,
-	    &ctrlr->rid, RF_SHAREABLE | RF_ACTIVE);
-
-	if (ctrlr->res == NULL) {
-		nvme_printf(ctrlr, "unable to allocate shared IRQ\n");
-		return (ENOMEM);
-	}
-
-	bus_setup_intr(ctrlr->dev, ctrlr->res,
-	    INTR_TYPE_MISC | INTR_MPSAFE, NULL, nvme_ctrlr_intx_handler,
-	    ctrlr, &ctrlr->tag);
-
-	if (ctrlr->tag == NULL) {
-		nvme_printf(ctrlr, "unable to setup intx handler\n");
-		return (ENOMEM);
-	}
-
-	return (0);
-}
-
 static void
 nvme_pt_done(void *arg, const struct nvme_completion *cpl)
 {
@@ -1177,88 +1112,6 @@ static struct cdevsw nvme_ctrlr_cdevsw = {
 	.d_ioctl =	nvme_ctrlr_ioctl
 };
 
-static void
-nvme_ctrlr_setup_interrupts(struct nvme_controller *ctrlr)
-{
-	device_t	dev;
-	int		per_cpu_io_queues;
-	int		min_cpus_per_ioq;
-	int		num_vectors_requested, num_vectors_allocated;
-	int		num_vectors_available;
-
-	dev = ctrlr->dev;
-	min_cpus_per_ioq = 1;
-	TUNABLE_INT_FETCH("hw.nvme.min_cpus_per_ioq", &min_cpus_per_ioq);
-
-	if (min_cpus_per_ioq < 1) {
-		min_cpus_per_ioq = 1;
-	} else if (min_cpus_per_ioq > mp_ncpus) {
-		min_cpus_per_ioq = mp_ncpus;
-	}
-
-	per_cpu_io_queues = 1;
-	TUNABLE_INT_FETCH("hw.nvme.per_cpu_io_queues", &per_cpu_io_queues);
-
-	if (per_cpu_io_queues == 0) {
-		min_cpus_per_ioq = mp_ncpus;
-	}
-
-	ctrlr->force_intx = 0;
-	TUNABLE_INT_FETCH("hw.nvme.force_intx", &ctrlr->force_intx);
-
-	/*
-	 * FreeBSD currently cannot allocate more than about 190 vectors at
-	 *  boot, meaning that systems with high core count and many devices
-	 *  requesting per-CPU interrupt vectors will not get their full
-	 *  allotment.  So first, try to allocate as many as we may need to
-	 *  understand what is available, then immediately release them.
-	 *  Then figure out how many of those we will actually use, based on
-	 *  assigning an equal number of cores to each I/O queue.
-	 */
-
-	/* One vector for per core I/O queue, plus one vector for admin queue. */
-	num_vectors_available = min(pci_msix_count(dev), mp_ncpus + 1);
-	if (pci_alloc_msix(dev, &num_vectors_available) != 0) {
-		num_vectors_available = 0;
-	}
-	pci_release_msi(dev);
-
-	if (ctrlr->force_intx || num_vectors_available < 2) {
-		nvme_ctrlr_configure_intx(ctrlr);
-		return;
-	}
-
-	/*
-	 * Do not use all vectors for I/O queues - one must be saved for the
-	 *  admin queue.
-	 */
-	ctrlr->num_cpus_per_ioq = max(min_cpus_per_ioq,
-	    howmany(mp_ncpus, num_vectors_available - 1));
-
-	ctrlr->num_io_queues = howmany(mp_ncpus, ctrlr->num_cpus_per_ioq);
-	num_vectors_requested = ctrlr->num_io_queues + 1;
-	num_vectors_allocated = num_vectors_requested;
-
-	/*
-	 * Now just allocate the number of vectors we need.  This should
-	 *  succeed, since we previously called pci_alloc_msix()
-	 *  successfully returning at least this many vectors, but just to
-	 *  be safe, if something goes wrong just revert to INTx.
-	 */
-	if (pci_alloc_msix(dev, &num_vectors_allocated) != 0) {
-		nvme_ctrlr_configure_intx(ctrlr);
-		return;
-	}
-
-	if (num_vectors_allocated < num_vectors_requested) {
-		pci_release_msi(dev);
-		nvme_ctrlr_configure_intx(ctrlr);
-		return;
-	}
-
-	ctrlr->msix_enabled = 1;
-}
-
 int
 nvme_ctrlr_construct(struct nvme_controller *ctrlr, device_t dev)
 {
@@ -1274,11 +1127,6 @@ nvme_ctrlr_construct(struct nvme_controller *ctrlr, de
 
 	mtx_init(&ctrlr->lock, "nvme ctrlr lock", NULL, MTX_DEF);
 
-	status = nvme_ctrlr_allocate_bar(ctrlr);
-
-	if (status != 0)
-		return (status);
-
 	/*
 	 * Software emulators may set the doorbell stride to something
 	 *  other than zero, but this driver is not set up to handle that.
@@ -1308,8 +1156,6 @@ nvme_ctrlr_construct(struct nvme_controller *ctrlr, de
 	ctrlr->enable_aborts = 0;
 	TUNABLE_INT_FETCH("hw.nvme.enable_aborts", &ctrlr->enable_aborts);
 
-	nvme_ctrlr_setup_interrupts(ctrlr);
-
 	ctrlr->max_xfer_size = NVME_MAX_XFER_SIZE;
 	if (nvme_ctrlr_construct_admin_qpair(ctrlr) != 0)
 		return (ENXIO);
@@ -1394,9 +1240,6 @@ nvme_ctrlr_destruct(struct nvme_controller *ctrlr, dev
 	if (ctrlr->res)
 		bus_release_resource(ctrlr->dev, SYS_RES_IRQ,
 		    rman_get_rid(ctrlr->res), ctrlr->res);
-
-	if (ctrlr->msix_enabled)
-		pci_release_msi(dev);
 
 	if (ctrlr->bar4_resource != NULL) {
 		bus_release_resource(dev, SYS_RES_MEMORY,

Added: head/sys/dev/nvme/nvme_pci.c
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/sys/dev/nvme/nvme_pci.c	Wed Aug 21 22:17:55 2019	(r351355)
@@ -0,0 +1,333 @@
+/*-
+ * Copyright (C) 2012-2016 Intel Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/buf.h>
+#include <sys/bus.h>
+#include <sys/conf.h>
+#include <sys/proc.h>
+#include <sys/smp.h>
+
+#include <dev/pci/pcireg.h>
+#include <dev/pci/pcivar.h>
+
+#include "nvme_private.h"
+
+static int    nvme_pci_probe(device_t);
+static int    nvme_pci_attach(device_t);
+static int    nvme_pci_detach(device_t);
+
+static void nvme_ctrlr_setup_interrupts(struct nvme_controller *ctrlr);
+
+static device_method_t nvme_pci_methods[] = {
+	/* Device interface */
+	DEVMETHOD(device_probe,     nvme_pci_probe),
+	DEVMETHOD(device_attach,    nvme_pci_attach),
+	DEVMETHOD(device_detach,    nvme_pci_detach),
+	DEVMETHOD(device_shutdown,  nvme_shutdown),
+	{ 0, 0 }
+};
+
+static driver_t nvme_pci_driver = {
+	"nvme",
+	nvme_pci_methods,
+	sizeof(struct nvme_controller),
+};
+
+DRIVER_MODULE(nvme, pci, nvme_pci_driver, nvme_devclass, NULL, 0);
+MODULE_VERSION(nvme_pci, 1);
+
+static struct _pcsid
+{
+	uint32_t	devid;
+	int		match_subdevice;
+	uint16_t	subdevice;
+	const char	*desc;
+	uint32_t	quirks;
+} pci_ids[] = {
+	{ 0x01118086,		0, 0, "NVMe Controller"  },
+	{ IDT32_PCI_ID,		0, 0, "IDT NVMe Controller (32 channel)"  },
+	{ IDT8_PCI_ID,		0, 0, "IDT NVMe Controller (8 channel)" },
+	{ 0x09538086,		1, 0x3702, "DC P3700 SSD" },
+	{ 0x09538086,		1, 0x3703, "DC P3700 SSD [2.5\" SFF]" },
+	{ 0x09538086,		1, 0x3704, "DC P3500 SSD [Add-in Card]" },
+	{ 0x09538086,		1, 0x3705, "DC P3500 SSD [2.5\" SFF]" },
+	{ 0x09538086,		1, 0x3709, "DC P3600 SSD [Add-in Card]" },
+	{ 0x09538086,		1, 0x370a, "DC P3600 SSD [2.5\" SFF]" },
+	{ 0x00031c58,		0, 0, "HGST SN100",	QUIRK_DELAY_B4_CHK_RDY },
+	{ 0x00231c58,		0, 0, "WDC SN200",	QUIRK_DELAY_B4_CHK_RDY },
+	{ 0x05401c5f,		0, 0, "Memblaze Pblaze4", QUIRK_DELAY_B4_CHK_RDY },
+	{ 0xa821144d,		0, 0, "Samsung PM1725", QUIRK_DELAY_B4_CHK_RDY },
+	{ 0xa822144d,		0, 0, "Samsung PM1725a", QUIRK_DELAY_B4_CHK_RDY },
+	{ 0x00000000,		0, 0, NULL  }
+};
+
+
+static int
+nvme_match(uint32_t devid, uint16_t subdevice, struct _pcsid *ep)
+{
+	if (devid != ep->devid)
+		return 0;
+
+	if (!ep->match_subdevice)
+		return 1;
+
+	if (subdevice == ep->subdevice)
+		return 1;
+	else
+		return 0;
+}
+
+static int
+nvme_pci_probe (device_t device)
+{
+	struct nvme_controller *ctrlr = DEVICE2SOFTC(device);
+	struct _pcsid	*ep;
+	uint32_t	devid;
+	uint16_t	subdevice;
+
+	devid = pci_get_devid(device);
+	subdevice = pci_get_subdevice(device);
+	ep = pci_ids;
+
+	while (ep->devid) {
+		if (nvme_match(devid, subdevice, ep))
+			break;
+		++ep;
+	}
+	if (ep->devid)
+		ctrlr->quirks = ep->quirks;
+
+	if (ep->desc) {
+		device_set_desc(device, ep->desc);
+		return (BUS_PROBE_DEFAULT);
+	}
+
+#if defined(PCIS_STORAGE_NVM)
+	if (pci_get_class(device)    == PCIC_STORAGE &&
+	    pci_get_subclass(device) == PCIS_STORAGE_NVM &&
+	    pci_get_progif(device)   == PCIP_STORAGE_NVM_ENTERPRISE_NVMHCI_1_0) {
+		device_set_desc(device, "Generic NVMe Device");
+		return (BUS_PROBE_GENERIC);
+	}
+#endif
+
+	return (ENXIO);
+}
+
+static int
+nvme_ctrlr_allocate_bar(struct nvme_controller *ctrlr)
+{
+
+	ctrlr->resource_id = PCIR_BAR(0);
+
+	ctrlr->resource = bus_alloc_resource_any(ctrlr->dev, SYS_RES_MEMORY,
+	    &ctrlr->resource_id, RF_ACTIVE);
+
+	if(ctrlr->resource == NULL) {
+		nvme_printf(ctrlr, "unable to allocate pci resource\n");
+		return (ENOMEM);
+	}
+
+	ctrlr->bus_tag = rman_get_bustag(ctrlr->resource);
+	ctrlr->bus_handle = rman_get_bushandle(ctrlr->resource);
+	ctrlr->regs = (struct nvme_registers *)ctrlr->bus_handle;
+
+	/*
+	 * The NVMe spec allows for the MSI-X table to be placed behind
+	 *  BAR 4/5, separate from the control/doorbell registers.  Always
+	 *  try to map this bar, because it must be mapped prior to calling
+	 *  pci_alloc_msix().  If the table isn't behind BAR 4/5,
+	 *  bus_alloc_resource() will just return NULL which is OK.
+	 */
+	ctrlr->bar4_resource_id = PCIR_BAR(4);
+	ctrlr->bar4_resource = bus_alloc_resource_any(ctrlr->dev, SYS_RES_MEMORY,
+	    &ctrlr->bar4_resource_id, RF_ACTIVE);
+
+	return (0);
+}
+
+static int
+nvme_pci_attach(device_t dev)
+{
+	struct nvme_controller*ctrlr = DEVICE2SOFTC(dev);
+	int status;
+
+	ctrlr->dev = dev;
+	status = nvme_ctrlr_allocate_bar(ctrlr);
+	if (status != 0)
+		goto bad;
+	pci_enable_busmaster(dev);
+	nvme_ctrlr_setup_interrupts(ctrlr);
+	return nvme_attach(dev);
+bad:
+	if (ctrlr->resource != NULL) {
+		bus_release_resource(dev, SYS_RES_MEMORY,
+		    ctrlr->resource_id, ctrlr->resource);
+	}
+
+	if (ctrlr->bar4_resource != NULL) {
+		bus_release_resource(dev, SYS_RES_MEMORY,
+		    ctrlr->bar4_resource_id, ctrlr->bar4_resource);
+	}
+
+	if (ctrlr->tag)
+		bus_teardown_intr(dev, ctrlr->res, ctrlr->tag);
+
+	if (ctrlr->res)
+		bus_release_resource(dev, SYS_RES_IRQ,
+		    rman_get_rid(ctrlr->res), ctrlr->res);
+
+	if (ctrlr->msix_enabled)
+		pci_release_msi(dev);
+
+	return status;
+}
+
+static int
+nvme_pci_detach(device_t dev)
+{
+	struct nvme_controller*ctrlr = DEVICE2SOFTC(dev);
+
+	if (ctrlr->msix_enabled)
+		pci_release_msi(dev);
+	pci_disable_busmaster(dev);
+	return (nvme_detach(dev));
+}
+
+static int
+nvme_ctrlr_configure_intx(struct nvme_controller *ctrlr)
+{
+
+	ctrlr->msix_enabled = 0;
+	ctrlr->num_io_queues = 1;
+	ctrlr->num_cpus_per_ioq = mp_ncpus;
+	ctrlr->rid = 0;
+	ctrlr->res = bus_alloc_resource_any(ctrlr->dev, SYS_RES_IRQ,
+	    &ctrlr->rid, RF_SHAREABLE | RF_ACTIVE);
+
+	if (ctrlr->res == NULL) {
+		nvme_printf(ctrlr, "unable to allocate shared IRQ\n");
+		return (ENOMEM);
+	}
+
+	bus_setup_intr(ctrlr->dev, ctrlr->res,
+	    INTR_TYPE_MISC | INTR_MPSAFE, NULL, nvme_ctrlr_intx_handler,
+	    ctrlr, &ctrlr->tag);
+
+	if (ctrlr->tag == NULL) {
+		nvme_printf(ctrlr, "unable to setup intx handler\n");
+		return (ENOMEM);
+	}
+
+	return (0);
+}
+
+static void
+nvme_ctrlr_setup_interrupts(struct nvme_controller *ctrlr)
+{
+	device_t	dev;
+	int		per_cpu_io_queues;
+	int		min_cpus_per_ioq;
+	int		num_vectors_requested, num_vectors_allocated;
+	int		num_vectors_available;
+
+	dev = ctrlr->dev;
+	min_cpus_per_ioq = 1;
+	TUNABLE_INT_FETCH("hw.nvme.min_cpus_per_ioq", &min_cpus_per_ioq);
+
+	if (min_cpus_per_ioq < 1) {
+		min_cpus_per_ioq = 1;
+	} else if (min_cpus_per_ioq > mp_ncpus) {
+		min_cpus_per_ioq = mp_ncpus;
+	}
+
+	per_cpu_io_queues = 1;
+	TUNABLE_INT_FETCH("hw.nvme.per_cpu_io_queues", &per_cpu_io_queues);
+
+	if (per_cpu_io_queues == 0) {
+		min_cpus_per_ioq = mp_ncpus;
+	}
+
+	ctrlr->force_intx = 0;
+	TUNABLE_INT_FETCH("hw.nvme.force_intx", &ctrlr->force_intx);
+
+	/*
+	 * FreeBSD currently cannot allocate more than about 190 vectors at
+	 *  boot, meaning that systems with high core count and many devices
+	 *  requesting per-CPU interrupt vectors will not get their full
+	 *  allotment.  So first, try to allocate as many as we may need to
+	 *  understand what is available, then immediately release them.
+	 *  Then figure out how many of those we will actually use, based on
+	 *  assigning an equal number of cores to each I/O queue.
+	 */
+
+	/* One vector for per core I/O queue, plus one vector for admin queue. */
+	num_vectors_available = min(pci_msix_count(dev), mp_ncpus + 1);
+	if (pci_alloc_msix(dev, &num_vectors_available) != 0) {
+		num_vectors_available = 0;
+	}
+	pci_release_msi(dev);
+
+	if (ctrlr->force_intx || num_vectors_available < 2) {
+		nvme_ctrlr_configure_intx(ctrlr);
+		return;
+	}
+
+	/*
+	 * Do not use all vectors for I/O queues - one must be saved for the
+	 *  admin queue.
+	 */
+	ctrlr->num_cpus_per_ioq = max(min_cpus_per_ioq,
+	    howmany(mp_ncpus, num_vectors_available - 1));
+
+	ctrlr->num_io_queues = howmany(mp_ncpus, ctrlr->num_cpus_per_ioq);
+	num_vectors_requested = ctrlr->num_io_queues + 1;
+	num_vectors_allocated = num_vectors_requested;
+
+	/*
+	 * Now just allocate the number of vectors we need.  This should
+	 *  succeed, since we previously called pci_alloc_msix()
+	 *  successfully returning at least this many vectors, but just to
+	 *  be safe, if something goes wrong just revert to INTx.
+	 */
+	if (pci_alloc_msix(dev, &num_vectors_allocated) != 0) {
+		nvme_ctrlr_configure_intx(ctrlr);
+		return;
+	}
+
+	if (num_vectors_allocated < num_vectors_requested) {
+		pci_release_msi(dev);
+		nvme_ctrlr_configure_intx(ctrlr);
+		return;
+	}
+
+	ctrlr->msix_enabled = 1;
+}

Modified: head/sys/dev/nvme/nvme_private.h
==============================================================================
--- head/sys/dev/nvme/nvme_private.h	Wed Aug 21 21:05:15 2019	(r351354)
+++ head/sys/dev/nvme/nvme_private.h	Wed Aug 21 22:17:55 2019	(r351355)
@@ -37,6 +37,7 @@
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
+#include <sys/module.h>
 #include <sys/mutex.h>
 #include <sys/rman.h>
 #include <sys/systm.h>
@@ -122,6 +123,8 @@ struct nvme_completion_poll_status {
 	int			done;
 };
 
+extern devclass_t nvme_devclass;
+
 #define NVME_REQUEST_VADDR	1
 #define NVME_REQUEST_NULL	2 /* For requests with no payload. */
 #define NVME_REQUEST_UIO	3
@@ -438,6 +441,10 @@ void	nvme_sysctl_initialize_ctrlr(struct nvme_controll
 
 void	nvme_dump_command(struct nvme_command *cmd);
 void	nvme_dump_completion(struct nvme_completion *cpl);
+
+int	nvme_attach(device_t dev);
+int	nvme_shutdown(device_t dev);
+int	nvme_detach(device_t dev);
 
 static __inline void
 nvme_single_map(void *arg, bus_dma_segment_t *seg, int nseg, int error)

Modified: head/sys/modules/nvme/Makefile
==============================================================================
--- head/sys/modules/nvme/Makefile	Wed Aug 21 21:05:15 2019	(r351354)
+++ head/sys/modules/nvme/Makefile	Wed Aug 21 22:17:55 2019	(r351355)
@@ -9,6 +9,7 @@ SRCS =	nvme.c			\
 	nvme_ctrlr_cmd.c	\
 	nvme_ns.c		\
 	nvme_ns_cmd.c		\
+	nvme_pci.c		\
 	nvme_qpair.c		\
 	nvme_sim.c		\
 	nvme_sysctl.c		\


More information about the svn-src-all mailing list