git: aed44717a160 - main - nvd: Connect nvme_if methods
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Wed, 10 Dec 2025 22:53:54 UTC
The branch main has been updated by imp:
URL: https://cgit.FreeBSD.org/src/commit/?id=aed44717a1606e4c5c79f7c8831de49cba64d7e6
commit aed44717a1606e4c5c79f7c8831de49cba64d7e6
Author: Warner Losh <imp@FreeBSD.org>
AuthorDate: 2025-12-10 22:52:38 +0000
Commit: Warner Losh <imp@FreeBSD.org>
CommitDate: 2025-12-10 22:52:38 +0000
nvd: Connect nvme_if methods
Conenct methods to manage namespaces explicitly to replace the old
consumer interface.
Sponsored by: Netflix
Differential Revision: https://reviews.freebsd.org/D51388
---
sys/dev/nvd/nvd.c | 237 +++++++++++++++++++++++++-------------------
sys/dev/nvme/nvme.c | 36 -------
sys/dev/nvme/nvme_ctrlr.c | 45 ++++++---
sys/dev/nvme/nvme_if.m | 2 +-
sys/dev/nvme/nvme_private.h | 2 -
sys/modules/nvd/Makefile | 2 +-
6 files changed, 167 insertions(+), 157 deletions(-)
diff --git a/sys/dev/nvd/nvd.c b/sys/dev/nvd/nvd.c
index fef3d1a1bc92..d96a752a12d9 100644
--- a/sys/dev/nvd/nvd.c
+++ b/sys/dev/nvd/nvd.c
@@ -29,6 +29,7 @@
#include <sys/param.h>
#include <sys/bio.h>
+#include <sys/bus.h>
#include <sys/devicestat.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
@@ -47,6 +48,8 @@
#include <dev/pci/pcivar.h>
+#include "nvme_if.h"
+
#define NVD_STR "nvd"
struct nvd_disk;
@@ -385,17 +388,96 @@ nvd_bioq_process(void *arg, int pending)
}
}
-static void
-nvd_new_disk(struct nvme_namespace *ns, struct nvd_controller *ctrlr)
+static int
+nvdc_controller_failed(device_t dev)
+{
+ struct nvd_controller *nvd_ctrlr = device_get_softc(dev);
+ struct nvd_disk *ndisk;
+
+ mtx_lock(&nvd_lock);
+ TAILQ_REMOVE(&ctrlr_head, nvd_ctrlr, tailq);
+ TAILQ_FOREACH(ndisk, &nvd_ctrlr->disk_head, ctrlr_tailq)
+ nvd_gone(ndisk);
+ while (!TAILQ_EMPTY(&nvd_ctrlr->disk_head))
+ msleep(&nvd_ctrlr->disk_head, &nvd_lock, 0, "nvd_fail", 0);
+ mtx_unlock(&nvd_lock);
+ return (0);
+}
+
+static int
+nvdc_probe(device_t dev)
+{
+ if (!nvme_use_nvd)
+ return (ENXIO);
+
+ device_set_desc(dev, "nvme storage namespace");
+ return (BUS_PROBE_DEFAULT);
+}
+
+static int
+nvdc_attach(device_t dev)
+{
+ struct nvd_controller *nvd_ctrlr = device_get_softc(dev);
+ struct nvme_controller *ctrlr = device_get_ivars(dev);
+
+ nvd_ctrlr->ctrlr = ctrlr;
+ TAILQ_INIT(&nvd_ctrlr->disk_head);
+ mtx_lock(&nvd_lock);
+ TAILQ_INSERT_TAIL(&ctrlr_head, nvd_ctrlr, tailq);
+ mtx_unlock(&nvd_lock);
+
+ return (0);
+}
+
+static int
+nvdc_detach(device_t dev)
+{
+ return (nvdc_controller_failed(dev));
+}
+
+static struct nvd_disk *
+nvd_nsid_to_disk(struct nvd_controller *nvd_ctrlr, uint32_t nsid)
+{
+ struct nvd_disk *ndisk;
+
+ mtx_lock(&nvd_lock);
+ TAILQ_FOREACH(ndisk, &nvd_ctrlr->disk_head, ctrlr_tailq) {
+ if (ndisk->ns->id != nsid)
+ continue;
+ break;
+ }
+ mtx_unlock(&nvd_lock);
+ return ndisk;
+}
+
+static struct nvd_disk *
+nvd_ns_to_disk(struct nvd_controller *nvd_ctrlr, struct nvme_namespace *ns)
{
+ struct nvd_disk *ndisk;
+
+ mtx_lock(&nvd_lock);
+ TAILQ_FOREACH(ndisk, &nvd_ctrlr->disk_head, ctrlr_tailq) {
+ if (ndisk->ns != ns)
+ continue;
+ break;
+ }
+ mtx_unlock(&nvd_lock);
+ return ndisk;
+}
+
+static int
+nvdc_ns_added(device_t dev, struct nvme_namespace *ns)
+{
+ struct nvd_controller *nvd_ctrlr = device_get_softc(dev);
+ struct nvd_disk *ndisk;
uint8_t descr[NVME_MODEL_NUMBER_LENGTH+1];
- struct nvd_disk *ndisk, *tnd;
+ struct nvd_disk *tnd;
struct disk *disk;
- device_t dev = ctrlr->ctrlr->dev;
- int unit;
+ device_t pdev = nvd_ctrlr->ctrlr->dev;
+ int unit;
ndisk = malloc(sizeof(struct nvd_disk), M_NVD, M_ZERO | M_WAITOK);
- ndisk->ctrlr = ctrlr;
+ ndisk->ctrlr = nvd_ctrlr;
ndisk->ns = ns;
ndisk->cur_depth = 0;
ndisk->ordered_in_flight = 0;
@@ -415,7 +497,7 @@ nvd_new_disk(struct nvme_namespace *ns, struct nvd_controller *ctrlr)
TAILQ_INSERT_BEFORE(tnd, ndisk, global_tailq);
else
TAILQ_INSERT_TAIL(&disk_head, ndisk, global_tailq);
- TAILQ_INSERT_TAIL(&ctrlr->disk_head, ndisk, ctrlr_tailq);
+ TAILQ_INSERT_TAIL(&nvd_ctrlr->disk_head, ndisk, ctrlr_tailq);
mtx_unlock(&nvd_lock);
ndisk->tq = taskqueue_create("nvd_taskq", M_WAITOK,
@@ -464,14 +546,14 @@ nvd_new_disk(struct nvme_namespace *ns, struct nvd_controller *ctrlr)
* which has no access to the config space for this controller, report
* the AHCI controller's data.
*/
- if (ctrlr->ctrlr->quirks & QUIRK_AHCI)
- dev = device_get_parent(dev);
- disk->d_hba_vendor = pci_get_vendor(dev);
- disk->d_hba_device = pci_get_device(dev);
- disk->d_hba_subvendor = pci_get_subvendor(dev);
- disk->d_hba_subdevice = pci_get_subdevice(dev);
+ if (nvd_ctrlr->ctrlr->quirks & QUIRK_AHCI)
+ pdev = device_get_parent(pdev);
+ disk->d_hba_vendor = pci_get_vendor(pdev);
+ disk->d_hba_device = pci_get_device(pdev);
+ disk->d_hba_subvendor = pci_get_subvendor(pdev);
+ disk->d_hba_subdevice = pci_get_subdevice(pdev);
disk->d_rotation_rate = DISK_RR_NON_ROTATING;
- strlcpy(disk->d_attachment, device_get_nameunit(dev),
+ strlcpy(disk->d_attachment, device_get_nameunit(pdev),
sizeof(disk->d_attachment));
disk_create(disk, DISK_VERSION);
@@ -481,14 +563,35 @@ nvd_new_disk(struct nvme_namespace *ns, struct nvd_controller *ctrlr)
(uintmax_t)disk->d_mediasize / (1024*1024),
(uintmax_t)disk->d_mediasize / disk->d_sectorsize,
disk->d_sectorsize);
+
+ return (0);
}
-#if 0
-static void
-nvd_resize(struct nvd_disk *ndisk)
+static int
+nvdc_ns_removed(device_t dev, struct nvme_namespace *ns)
{
- struct disk *disk = ndisk->disk;
- struct nvme_namespace *ns = ndisk->ns;
+ struct nvd_controller *nvd_ctrlr = device_get_softc(dev);
+ struct nvd_disk *ndisk = nvd_ns_to_disk(nvd_ctrlr, ns);
+
+ if (ndisk == NULL)
+ panic("nvdc: no namespace found for ns %p", ns);
+ nvd_gone(ndisk);
+ /* gonecb removes it from the list -- no need to wait */
+ return (0);
+}
+
+static int
+nvdc_ns_changed(device_t dev, uint32_t nsid)
+{
+ struct nvd_controller *nvd_ctrlr = device_get_softc(dev);
+ struct nvd_disk *ndisk = nvd_nsid_to_disk(nvd_ctrlr, nsid);
+ struct disk *disk;
+ struct nvme_namespace *ns;
+
+ if (ndisk == NULL)
+ panic("nvdc: no namespace found for %d", nsid);
+ disk = ndisk->disk;
+ ns = ndisk->ns;
disk->d_sectorsize = nvme_ns_get_sector_size(ns);
disk->d_mediasize = (off_t)nvme_ns_get_size(ns);
@@ -504,98 +607,28 @@ nvd_resize(struct nvd_disk *ndisk)
(uintmax_t)disk->d_mediasize / (1024*1024),
(uintmax_t)disk->d_mediasize / disk->d_sectorsize,
disk->d_sectorsize);
-}
-#endif
-
-static int
-nvdc_fail(device_t dev)
-{
- return ENXIO;
-}
-
-#if 0
-static void *
-nvd_ns_changed(struct nvme_namespace *ns, void *ctrlr_arg)
-{
- struct nvd_disk *ndisk;
- struct nvd_controller *ctrlr = ctrlr_arg;
-
- if ((ns->flags & NVME_NS_DELTA) == 0) {
- nvd_new_disk(ns, ctrlr_arg);
- return (ctrlr_arg);
- }
-
- mtx_lock(&nvd_lock);
- TAILQ_FOREACH(ndisk, &ctrlr->disk_head, ctrlr_tailq) {
- if (ndisk->ns->id != ns->id)
- continue;
- nvd_resize(ndisk);
- break;
- }
- mtx_unlock(&nvd_lock);
- return (ctrlr_arg);
-}
-
-static void
-nvd_controller_fail(void *ctrlr_arg)
-{
- struct nvd_controller *ctrlr = ctrlr_arg;
- struct nvd_disk *ndisk;
-
- mtx_lock(&nvd_lock);
- TAILQ_REMOVE(&ctrlr_head, ctrlr, tailq);
- TAILQ_FOREACH(ndisk, &ctrlr->disk_head, ctrlr_tailq)
- nvd_gone(ndisk);
- while (!TAILQ_EMPTY(&ctrlr->disk_head))
- msleep(&ctrlr->disk_head, &nvd_lock, 0, "nvd_fail", 0);
- mtx_unlock(&nvd_lock);
-}
-#endif
-
-static int
-nvdc_probe(device_t dev)
-{
- if (!nvme_use_nvd)
- return (ENXIO);
-
- device_set_desc(dev, "nvme storage namespace");
- return (BUS_PROBE_DEFAULT);
-}
-
-static int
-nvdc_attach(device_t dev)
-{
- struct nvd_controller *nvd_ctrlr = device_get_softc(dev);
- struct nvme_controller *ctrlr = device_get_ivars(dev);
-
- nvd_ctrlr->ctrlr = ctrlr;
- TAILQ_INIT(&nvd_ctrlr->disk_head);
- mtx_lock(&nvd_lock);
- TAILQ_INSERT_TAIL(&ctrlr_head, nvd_ctrlr, tailq);
- mtx_unlock(&nvd_lock);
-
- for (int i = 0; i < min(ctrlr->cdata.nn, NVME_MAX_NAMESPACES); i++) {
- struct nvme_namespace *ns = &ctrlr->ns[i];
-
- if (ns->data.nsze == 0)
- continue;
- nvd_new_disk(ns, nvd_ctrlr);
- }
-
return (0);
}
static int
-nvdc_detach(device_t dev)
+nvdc_handle_aen(device_t dev, const struct nvme_completion *cpl,
+ uint32_t pg_nr, void *page, uint32_t page_len)
{
- return (nvdc_fail(dev));
+ /* Do nothing */
+ return (0);
}
static device_method_t nvdc_methods[] = {
/* Device interface */
- DEVMETHOD(device_probe, nvdc_probe),
- DEVMETHOD(device_attach, nvdc_attach),
- DEVMETHOD(device_detach, nvdc_detach),
+ DEVMETHOD(device_probe, nvdc_probe),
+ DEVMETHOD(device_attach, nvdc_attach),
+ DEVMETHOD(device_detach, nvdc_detach),
+ /* Nvme controller messages */
+ DEVMETHOD(nvme_ns_added, nvdc_ns_added),
+ DEVMETHOD(nvme_ns_removed, nvdc_ns_removed),
+ DEVMETHOD(nvme_ns_changed, nvdc_ns_changed),
+ DEVMETHOD(nvme_controller_failed, nvdc_controller_failed),
+ DEVMETHOD(nvme_handle_aen, nvdc_handle_aen),
{ 0, 0 }
};
diff --git a/sys/dev/nvme/nvme.c b/sys/dev/nvme/nvme.c
index 7788889ae591..3ff64c781884 100644
--- a/sys/dev/nvme/nvme.c
+++ b/sys/dev/nvme/nvme.c
@@ -170,18 +170,6 @@ nvme_notify(struct nvme_consumer *cons,
}
}
-void
-nvme_notify_new_controller(struct nvme_controller *ctrlr)
-{
- int i;
-
- for (i = 0; i < NVME_MAX_CONSUMERS; i++) {
- if (nvme_consumer[i].id != INVALID_CONSUMER_ID) {
- nvme_notify(&nvme_consumer[i], ctrlr);
- }
- }
-}
-
static void
nvme_notify_new_consumer(struct nvme_consumer *cons)
{
@@ -247,30 +235,6 @@ nvme_notify_fail_consumers(struct nvme_controller *ctrlr)
}
}
-void
-nvme_notify_ns(struct nvme_controller *ctrlr, int nsid)
-{
- struct nvme_consumer *cons;
- struct nvme_namespace *ns;
- void *ctrlr_cookie;
- uint32_t i;
-
- KASSERT(nsid <= NVME_MAX_NAMESPACES,
- ("%s: Namespace notification to nsid %d exceeds range\n",
- device_get_nameunit(ctrlr->dev), nsid));
-
- if (!ctrlr->is_initialized)
- return;
-
- ns = &ctrlr->ns[nsid - 1];
- for (i = 0; i < NVME_MAX_CONSUMERS; i++) {
- cons = &nvme_consumer[i];
- if (cons->id != INVALID_CONSUMER_ID && cons->ns_fn != NULL &&
- (ctrlr_cookie = ctrlr->cons_cookie[i]) != NULL)
- ns->cons_cookie[i] = (*cons->ns_fn)(ns, ctrlr_cookie);
- }
-}
-
struct nvme_consumer *
nvme_register_consumer(nvme_cons_ns_fn_t ns_fn, nvme_cons_ctrlr_fn_t ctrlr_fn,
nvme_cons_async_fn_t async_fn,
diff --git a/sys/dev/nvme/nvme_ctrlr.c b/sys/dev/nvme/nvme_ctrlr.c
index 50753f06c4e2..7a449fdc4727 100644
--- a/sys/dev/nvme/nvme_ctrlr.c
+++ b/sys/dev/nvme/nvme_ctrlr.c
@@ -48,6 +48,8 @@
#include "nvme_private.h"
#include "nvme_linux.h"
+#include "nvme_if.h"
+
#define B4_CHK_RDY_DELAY_MS 2300 /* work around controller bug */
static void nvme_ctrlr_construct_and_submit_aer(struct nvme_controller *ctrlr,
@@ -1082,10 +1084,20 @@ nvme_ctrlr_start_config_hook(void *arg)
device_t child;
ctrlr->is_initialized = true;
- nvme_notify_new_controller(ctrlr);
child = device_add_child(ctrlr->dev, NULL, DEVICE_UNIT_ANY);
device_set_ivars(child, ctrlr);
bus_attach_children(ctrlr->dev);
+
+ /*
+ * Now notify the child of all the known namepsaces
+ */
+ for (int i = 0; i < min(ctrlr->cdata.nn, NVME_MAX_NAMESPACES); i++) {
+ struct nvme_namespace *ns = &ctrlr->ns[i];
+
+ if (ns->data.nsze == 0)
+ continue;
+ NVME_NS_ADDED(child, ns);
+ }
}
TSEXIT();
}
@@ -1223,23 +1235,26 @@ nvme_ctrlr_aer_task(void *arg, int pending)
nvme_ctrlr_cmd_set_async_event_config(aer->ctrlr,
aer->ctrlr->async_event_config, NULL, NULL);
} else if (aer->log_page_id == NVME_LOG_CHANGED_NAMESPACE) {
- struct nvme_ns_list *nsl =
- (struct nvme_ns_list *)aer->log_page_buffer;
- struct nvme_controller *ctrlr = aer->ctrlr;
+ device_t *children;
+ int n_children;
+ struct nvme_ns_list *nsl;
+ if (device_get_children(aer->ctrlr->dev, &children, &n_children) != 0) {
+ children = NULL;
+ n_children = 0;
+ }
+ nsl = (struct nvme_ns_list *)aer->log_page_buffer;
for (int i = 0; i < nitems(nsl->ns) && nsl->ns[i] != 0; i++) {
- struct nvme_namespace *ns;
- uint32_t id = nsl->ns[i];
-
- if (nsl->ns[i] > NVME_MAX_NAMESPACES)
- break;
-
- ns = &ctrlr->ns[id - 1];
- ns->flags |= NVME_NS_DELTA;
- nvme_ns_construct(ns, id, ctrlr);
- nvme_notify_ns(ctrlr, id);
- ns->flags &= ~NVME_NS_DELTA;
+ /*
+ * I think we need to query the name space here and see
+ * if it went away, arrived, or changed in size and call
+ * the nuanced routine (after constructing or before
+ * destructing the namespace). XXX needs more work XXX.
+ */
+ for (int j = 0; j < n_children; j++)
+ NVME_NS_CHANGED(children[j], nsl->ns[i]);
}
+ free(children, M_TEMP);
}
/*
diff --git a/sys/dev/nvme/nvme_if.m b/sys/dev/nvme/nvme_if.m
index a89381d165f7..5a06ff112f4c 100644
--- a/sys/dev/nvme/nvme_if.m
+++ b/sys/dev/nvme/nvme_if.m
@@ -33,7 +33,7 @@ METHOD int ns_removed {
#
METHOD int ns_changed {
device_t dev; /* nvme device */
- struct nvme_namespace *ns; /* information about the namespace */
+ uint32_t nsid; /* nsid that just changed */
};
#
diff --git a/sys/dev/nvme/nvme_private.h b/sys/dev/nvme/nvme_private.h
index a425a6a5ad62..cd0ef444150f 100644
--- a/sys/dev/nvme/nvme_private.h
+++ b/sys/dev/nvme/nvme_private.h
@@ -561,8 +561,6 @@ void nvme_notify_async_consumers(struct nvme_controller *ctrlr,
uint32_t log_page_id, void *log_page_buffer,
uint32_t log_page_size);
void nvme_notify_fail_consumers(struct nvme_controller *ctrlr);
-void nvme_notify_new_controller(struct nvme_controller *ctrlr);
-void nvme_notify_ns(struct nvme_controller *ctrlr, int nsid);
void nvme_ctrlr_shared_handler(void *arg);
void nvme_ctrlr_poll(struct nvme_controller *ctrlr);
diff --git a/sys/modules/nvd/Makefile b/sys/modules/nvd/Makefile
index 08b2d61859e9..84bffa71942e 100644
--- a/sys/modules/nvd/Makefile
+++ b/sys/modules/nvd/Makefile
@@ -1,6 +1,6 @@
.PATH: ${SRCTOP}/sys/dev/nvd
KMOD= nvd
-SRCS= nvd.c opt_geom.h device_if.h bus_if.h pci_if.h
+SRCS= nvd.c opt_geom.h device_if.h bus_if.h nvme_if.h pci_if.h
.include <bsd.kmod.mk>