git: 2a6b7055a980 - main - nvme: Timeout expired transactions
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Fri, 25 Aug 2023 16:20:27 UTC
The branch main has been updated by imp:
URL: https://cgit.FreeBSD.org/src/commit/?id=2a6b7055a980f7e7543dfdbda4aa0c356133b77d
commit 2a6b7055a980f7e7543dfdbda4aa0c356133b77d
Author: Warner Losh <imp@FreeBSD.org>
AuthorDate: 2023-08-25 16:09:50 +0000
Commit: Warner Losh <imp@FreeBSD.org>
CommitDate: 2023-08-25 16:10:44 +0000
nvme: Timeout expired transactions
When we went to having a shared timeout routine, failing the timed-out
transaction code was inadvertantly dropped. Reinstate it.
Fixes: 502dc84a8b670
Sponsored by: Netflix
MFC After: 2 weeks
Reviewed by: chuck, jhb
Differential Revision: https://reviews.freebsd.org/D36921
---
sys/dev/nvme/nvme_qpair.c | 60 ++++++++++++++++++++++++++++++++++++-----------
1 file changed, 46 insertions(+), 14 deletions(-)
diff --git a/sys/dev/nvme/nvme_qpair.c b/sys/dev/nvme/nvme_qpair.c
index 45a999c17d89..0ad0b7cbe17f 100644
--- a/sys/dev/nvme/nvme_qpair.c
+++ b/sys/dev/nvme/nvme_qpair.c
@@ -944,6 +944,30 @@ nvme_io_qpair_destroy(struct nvme_qpair *qpair)
nvme_qpair_destroy(qpair);
}
+static void
+nvme_abort_complete(void *arg, const struct nvme_completion *status)
+{
+ struct nvme_tracker *tr = arg;
+
+ /*
+ * If cdw0 == 1, the controller was not able to abort the command
+ * we requested. We still need to check the active tracker array,
+ * to cover race where I/O timed out at same time controller was
+ * completing the I/O.
+ */
+ if (status->cdw0 == 1 && tr->qpair->act_tr[tr->cid] != NULL) {
+ /*
+ * An I/O has timed out, and the controller was unable to
+ * abort it for some reason. Construct a fake completion
+ * status, and then complete the I/O's tracker manually.
+ */
+ nvme_printf(tr->qpair->ctrlr,
+ "abort command failed, aborting command manually\n");
+ nvme_qpair_manual_complete_tracker(tr,
+ NVME_SCT_GENERIC, NVME_SC_ABORTED_BY_REQUEST, 0, ERROR_PRINT_ALL);
+ }
+}
+
static void
nvme_qpair_timeout(void *arg)
{
@@ -952,36 +976,44 @@ nvme_qpair_timeout(void *arg)
struct nvme_tracker *tr;
sbintime_t now;
bool idle;
+ bool expired;
uint32_t csts;
uint8_t cfs;
mtx_lock(&qpair->lock);
idle = TAILQ_EMPTY(&qpair->outstanding_tr);
+
again:
switch (qpair->recovery_state) {
case RECOVERY_NONE:
- if (idle)
- break;
+ /*
+ * Check to see if we need to timeout any commands. If we do, then
+ * we also enter a recovery phase.
+ */
now = getsbinuptime();
- idle = true;
+ expired = false;
TAILQ_FOREACH(tr, &qpair->outstanding_tr, tailq) {
if (tr->deadline == SBT_MAX)
continue;
idle = false;
if (now > tr->deadline) {
- /*
- * We're now passed our earliest deadline. We
- * need to do expensive things to cope, but next
- * time. Flag that and close the door to any
- * further processing.
- */
- qpair->recovery_state = RECOVERY_START;
- nvme_printf(ctrlr, "RECOVERY_START %jd vs %jd\n",
- (uintmax_t)now, (uintmax_t)tr->deadline);
- break;
+ expired = true;
+ nvme_ctrlr_cmd_abort(ctrlr, tr->cid, qpair->id,
+ nvme_abort_complete, tr);
}
}
- break;
+ if (!expired)
+ break;
+
+ /*
+ * We're now passed our earliest deadline. We need to do
+ * expensive things to cope, but next time. Flag that
+ * and close the door to any further processing.
+ */
+ qpair->recovery_state = RECOVERY_START;
+ nvme_printf(ctrlr, "RECOVERY_START %jd vs %jd\n",
+ (uintmax_t)now, (uintmax_t)tr->deadline);
+ /* FALLTHROUGH */
case RECOVERY_START:
/*
* Read csts to get value of cfs - controller fatal status.