git: f46d4971b5af - main - nvmf: Handle shutdowns more gracefully

From: John Baldwin <jhb_at_FreeBSD.org>
Date: Wed, 05 Jun 2024 20:04:04 UTC
The branch main has been updated by jhb:

URL: https://cgit.FreeBSD.org/src/commit/?id=f46d4971b5afdfecef3ae5979d7c96e9817aedee

commit f46d4971b5afdfecef3ae5979d7c96e9817aedee
Author:     John Baldwin <jhb@FreeBSD.org>
AuthorDate: 2024-06-05 19:59:28 +0000
Commit:     John Baldwin <jhb@FreeBSD.org>
CommitDate: 2024-06-05 19:59:28 +0000

    nvmf: Handle shutdowns more gracefully
    
    If an association is disconnected during a clean shutdown, abort all
    pending and future I/O requests with an error to avoid hangs either due
    to filesystem unmounts or a stuck GEOM event.
    
    If an association is connected during a clean shutdown, gracefully
    disconnect from the remote controller and close the open queues.
    
    Reviewed by:    imp
    Sponsored by:   Chelsio Communications
    Differential Revision:  https://reviews.freebsd.org/D45462
---
 sys/dev/nvmf/host/nvmf.c     | 71 ++++++++++++++++++++++++++++++++++++++++++--
 sys/dev/nvmf/host/nvmf_ns.c  | 27 +++++++++++++++--
 sys/dev/nvmf/host/nvmf_sim.c | 16 ++++++++--
 sys/dev/nvmf/host/nvmf_var.h |  7 +++++
 4 files changed, 114 insertions(+), 7 deletions(-)

diff --git a/sys/dev/nvmf/host/nvmf.c b/sys/dev/nvmf/host/nvmf.c
index c309836ed8a8..47cdbe7e47fd 100644
--- a/sys/dev/nvmf/host/nvmf.c
+++ b/sys/dev/nvmf/host/nvmf.c
@@ -8,12 +8,14 @@
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/conf.h>
+#include <sys/eventhandler.h>
 #include <sys/lock.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/memdesc.h>
 #include <sys/module.h>
 #include <sys/mutex.h>
+#include <sys/reboot.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/taskqueue.h>
@@ -31,6 +33,8 @@ SYSCTL_BOOL(_kern_nvmf, OID_AUTO, fail_on_disconnection, CTLFLAG_RWTUN,
 MALLOC_DEFINE(M_NVMF, "nvmf", "NVMe over Fabrics host");
 
 static void	nvmf_disconnect_task(void *arg, int pending);
+static void	nvmf_shutdown_pre_sync(void *arg, int howto);
+static void	nvmf_shutdown_post_sync(void *arg, int howto);
 
 void
 nvmf_complete(void *arg, const struct nvme_completion *cqe)
@@ -528,6 +532,11 @@ nvmf_attach(device_t dev)
 		goto out;
 	}
 
+	sc->shutdown_pre_sync_eh = EVENTHANDLER_REGISTER(shutdown_pre_sync,
+	    nvmf_shutdown_pre_sync, sc, SHUTDOWN_PRI_FIRST);
+	sc->shutdown_post_sync_eh = EVENTHANDLER_REGISTER(shutdown_post_sync,
+	    nvmf_shutdown_post_sync, sc, SHUTDOWN_PRI_FIRST);
+
 	return (0);
 out:
 	if (sc->ns != NULL) {
@@ -698,6 +707,62 @@ out:
 	return (error);
 }
 
+static void
+nvmf_shutdown_pre_sync(void *arg, int howto)
+{
+	struct nvmf_softc *sc = arg;
+
+	if ((howto & RB_NOSYNC) != 0 || SCHEDULER_STOPPED())
+		return;
+
+	/*
+	 * If this association is disconnected, abort any pending
+	 * requests with an error to permit filesystems to unmount
+	 * without hanging.
+	 */
+	sx_xlock(&sc->connection_lock);
+	if (sc->admin != NULL || sc->detaching) {
+		sx_xunlock(&sc->connection_lock);
+		return;
+	}
+
+	for (u_int i = 0; i < sc->cdata->nn; i++) {
+		if (sc->ns[i] != NULL)
+			nvmf_shutdown_ns(sc->ns[i]);
+	}
+	nvmf_shutdown_sim(sc);
+	sx_xunlock(&sc->connection_lock);
+}
+
+static void
+nvmf_shutdown_post_sync(void *arg, int howto)
+{
+	struct nvmf_softc *sc = arg;
+
+	if ((howto & RB_NOSYNC) != 0 || SCHEDULER_STOPPED())
+		return;
+
+	/*
+	 * If this association is connected, disconnect gracefully.
+	 */
+	sx_xlock(&sc->connection_lock);
+	if (sc->admin == NULL || sc->detaching) {
+		sx_xunlock(&sc->connection_lock);
+		return;
+	}
+
+	callout_drain(&sc->ka_tx_timer);
+	callout_drain(&sc->ka_rx_timer);
+
+	nvmf_shutdown_controller(sc);
+	for (u_int i = 0; i < sc->num_io_queues; i++) {
+		nvmf_destroy_qp(sc->io[i]);
+	}
+	nvmf_destroy_qp(sc->admin);
+	sc->admin = NULL;
+	sx_xunlock(&sc->connection_lock);
+}
+
 static int
 nvmf_detach(device_t dev)
 {
@@ -710,6 +775,9 @@ nvmf_detach(device_t dev)
 	sc->detaching = true;
 	sx_xunlock(&sc->connection_lock);
 
+	EVENTHANDLER_DEREGISTER(shutdown_pre_sync, sc->shutdown_pre_sync_eh);
+	EVENTHANDLER_DEREGISTER(shutdown_pre_sync, sc->shutdown_post_sync_eh);
+
 	nvmf_destroy_sim(sc);
 	for (i = 0; i < sc->cdata->nn; i++) {
 		if (sc->ns[i] != NULL)
@@ -1006,9 +1074,6 @@ static device_method_t nvmf_methods[] = {
 	DEVMETHOD(device_probe,     nvmf_probe),
 	DEVMETHOD(device_attach,    nvmf_attach),
 	DEVMETHOD(device_detach,    nvmf_detach),
-#if 0
-	DEVMETHOD(device_shutdown,  nvmf_shutdown),
-#endif
 	DEVMETHOD_END
 };
 
diff --git a/sys/dev/nvmf/host/nvmf_ns.c b/sys/dev/nvmf/host/nvmf_ns.c
index 8381cc4aec54..87cb4fa68001 100644
--- a/sys/dev/nvmf/host/nvmf_ns.c
+++ b/sys/dev/nvmf/host/nvmf_ns.c
@@ -29,6 +29,7 @@ struct nvmf_namespace {
 	u_int	flags;
 	uint32_t lba_size;
 	bool disconnected;
+	bool shutdown;
 
 	TAILQ_HEAD(, bio) pending_bios;
 	struct mtx lock;
@@ -89,7 +90,7 @@ nvmf_ns_biodone(struct bio *bio)
 		bio->bio_driver2 = 0;
 		mtx_lock(&ns->lock);
 		if (ns->disconnected) {
-			if (nvmf_fail_disconnect) {
+			if (nvmf_fail_disconnect || ns->shutdown) {
 				mtx_unlock(&ns->lock);
 				bio->bio_error = ECONNABORTED;
 				bio->bio_flags |= BIO_ERROR;
@@ -211,7 +212,7 @@ nvmf_ns_submit_bio(struct nvmf_namespace *ns, struct bio *bio)
 
 	mtx_lock(&ns->lock);
 	if (ns->disconnected) {
-		if (nvmf_fail_disconnect) {
+		if (nvmf_fail_disconnect || ns->shutdown) {
 			error = ECONNABORTED;
 		} else {
 			TAILQ_INSERT_TAIL(&ns->pending_bios, bio, bio_queue);
@@ -429,6 +430,28 @@ nvmf_reconnect_ns(struct nvmf_namespace *ns)
 	}
 }
 
+void
+nvmf_shutdown_ns(struct nvmf_namespace *ns)
+{
+	TAILQ_HEAD(, bio) bios;
+	struct bio *bio;
+
+	mtx_lock(&ns->lock);
+	ns->shutdown = true;
+	TAILQ_INIT(&bios);
+	TAILQ_CONCAT(&bios, &ns->pending_bios, bio_queue);
+	mtx_unlock(&ns->lock);
+
+	while (!TAILQ_EMPTY(&bios)) {
+		bio = TAILQ_FIRST(&bios);
+		TAILQ_REMOVE(&bios, bio, bio_queue);
+		bio->bio_error = ECONNABORTED;
+		bio->bio_flags |= BIO_ERROR;
+		bio->bio_resid = bio->bio_bcount;
+		biodone(bio);
+	}
+}
+
 void
 nvmf_destroy_ns(struct nvmf_namespace *ns)
 {
diff --git a/sys/dev/nvmf/host/nvmf_sim.c b/sys/dev/nvmf/host/nvmf_sim.c
index 71bb71dd4063..4bf68553cb49 100644
--- a/sys/dev/nvmf/host/nvmf_sim.c
+++ b/sys/dev/nvmf/host/nvmf_sim.c
@@ -40,7 +40,10 @@ nvmf_ccb_done(union ccb *ccb)
 		return;
 
 	if (nvmf_cqe_aborted(&ccb->nvmeio.cpl)) {
-		if (nvmf_fail_disconnect)
+		struct cam_sim *sim = xpt_path_sim(ccb->ccb_h.path);
+		struct nvmf_softc *sc = cam_sim_softc(sim);
+
+		if (nvmf_fail_disconnect || sc->sim_shutdown)
 			ccb->ccb_h.status = CAM_DEV_NOT_THERE;
 		else
 			ccb->ccb_h.status = CAM_REQUEUE_REQ;
@@ -109,7 +112,7 @@ nvmf_sim_io(struct nvmf_softc *sc, union ccb *ccb)
 	mtx_lock(&sc->sim_mtx);
 	if (sc->sim_disconnected) {
 		mtx_unlock(&sc->sim_mtx);
-		if (nvmf_fail_disconnect)
+		if (nvmf_fail_disconnect || sc->sim_shutdown)
 			nvmeio->ccb_h.status = CAM_DEV_NOT_THERE;
 		else
 			nvmeio->ccb_h.status = CAM_REQUEUE_REQ;
@@ -325,6 +328,15 @@ nvmf_reconnect_sim(struct nvmf_softc *sc)
 	xpt_release_simq(sc->sim, 1);
 }
 
+void
+nvmf_shutdown_sim(struct nvmf_softc *sc)
+{
+	mtx_lock(&sc->sim_mtx);
+	sc->sim_shutdown = true;
+	mtx_unlock(&sc->sim_mtx);
+	xpt_release_simq(sc->sim, 1);
+}
+
 void
 nvmf_destroy_sim(struct nvmf_softc *sc)
 {
diff --git a/sys/dev/nvmf/host/nvmf_var.h b/sys/dev/nvmf/host/nvmf_var.h
index cf88d2f7b01e..adf6d8bde3d9 100644
--- a/sys/dev/nvmf/host/nvmf_var.h
+++ b/sys/dev/nvmf/host/nvmf_var.h
@@ -9,6 +9,7 @@
 #define	__NVMF_VAR_H__
 
 #include <sys/_callout.h>
+#include <sys/_eventhandler.h>
 #include <sys/_lock.h>
 #include <sys/_mutex.h>
 #include <sys/_sx.h>
@@ -42,6 +43,7 @@ struct nvmf_softc {
 	struct cam_path *path;
 	struct mtx sim_mtx;
 	bool sim_disconnected;
+	bool sim_shutdown;
 
 	struct nvmf_namespace **ns;
 
@@ -82,6 +84,9 @@ struct nvmf_softc {
 
 	u_int num_aer;
 	struct nvmf_aer *aer;
+
+	eventhandler_tag shutdown_pre_sync_eh;
+	eventhandler_tag shutdown_post_sync_eh;
 };
 
 struct nvmf_request {
@@ -187,6 +192,7 @@ struct nvmf_namespace *nvmf_init_ns(struct nvmf_softc *sc, uint32_t id,
     const struct nvme_namespace_data *data);
 void	nvmf_disconnect_ns(struct nvmf_namespace *ns);
 void	nvmf_reconnect_ns(struct nvmf_namespace *ns);
+void	nvmf_shutdown_ns(struct nvmf_namespace *ns);
 void	nvmf_destroy_ns(struct nvmf_namespace *ns);
 bool	nvmf_update_ns(struct nvmf_namespace *ns,
     const struct nvme_namespace_data *data);
@@ -206,6 +212,7 @@ void	nvmf_free_request(struct nvmf_request *req);
 int	nvmf_init_sim(struct nvmf_softc *sc);
 void	nvmf_disconnect_sim(struct nvmf_softc *sc);
 void	nvmf_reconnect_sim(struct nvmf_softc *sc);
+void	nvmf_shutdown_sim(struct nvmf_softc *sc);
 void	nvmf_destroy_sim(struct nvmf_softc *sc);
 void	nvmf_sim_rescan_ns(struct nvmf_softc *sc, uint32_t id);