svn commit: r248754 - head/sys/dev/nvme

Jim Harris jimharris at FreeBSD.org
Tue Mar 26 20:32:58 UTC 2013


Author: jimharris
Date: Tue Mar 26 20:32:57 2013
New Revision: 248754
URL: http://svnweb.freebsd.org/changeset/base/248754

Log:
  By default, always escalate to controller reset when an I/O times out.
  
  While aborts are typically cleaner than a full controller reset, many times
  an I/O timeout indicates other controller-level issues where aborts may not
  work.  NVMe drivers for other operating systems are also defaulting to
  controller reset rather than aborts for timed out I/O.
  
  Sponsored by:	Intel
  Reviewed by:	carl

Modified:
  head/sys/dev/nvme/nvme_ctrlr.c
  head/sys/dev/nvme/nvme_private.h
  head/sys/dev/nvme/nvme_qpair.c

Modified: head/sys/dev/nvme/nvme_ctrlr.c
==============================================================================
--- head/sys/dev/nvme/nvme_ctrlr.c	Tue Mar 26 20:32:46 2013	(r248753)
+++ head/sys/dev/nvme/nvme_ctrlr.c	Tue Mar 26 20:32:57 2013	(r248754)
@@ -422,12 +422,8 @@ nvme_ctrlr_hw_reset(struct nvme_controll
 void
 nvme_ctrlr_reset(struct nvme_controller *ctrlr)
 {
-	int status;
 
-	status = nvme_ctrlr_hw_reset(ctrlr);
-	DELAY(100*1000);
-	if (status == 0)
-		taskqueue_enqueue(ctrlr->taskqueue, &ctrlr->restart_task);
+	taskqueue_enqueue(ctrlr->taskqueue, &ctrlr->reset_task);
 }
 
 static int
@@ -686,11 +682,24 @@ err:
 }
 
 static void
-nvme_ctrlr_restart_task(void *arg, int pending)
+nvme_ctrlr_reset_task(void *arg, int pending)
 {
-	struct nvme_controller *ctrlr = arg;
+	struct nvme_controller	*ctrlr = arg;
+	int			status;
 
-	nvme_ctrlr_start(ctrlr);
+	device_printf(ctrlr->dev, "resetting controller");
+	status = nvme_ctrlr_hw_reset(ctrlr);
+	/*
+	 * Use pause instead of DELAY, so that we yield to any nvme interrupt
+	 *  handlers on this CPU that were blocked on a qpair lock. We want
+	 *  all nvme interrupts completed before proceeding with restarting the
+	 *  controller.
+	 *
+	 * XXX - any way to guarantee the interrupt handlers have quiesced?
+	 */
+	pause("nvmereset", hz / 10);
+	if (status == 0)
+		nvme_ctrlr_start(ctrlr);
 }
 
 static void
@@ -841,6 +850,9 @@ nvme_ctrlr_construct(struct nvme_control
 	ctrlr->force_intx = 0;
 	TUNABLE_INT_FETCH("hw.nvme.force_intx", &ctrlr->force_intx);
 
+	ctrlr->enable_aborts = 0;
+	TUNABLE_INT_FETCH("hw.nvme.enable_aborts", &ctrlr->enable_aborts);
+
 	ctrlr->msix_enabled = 1;
 
 	if (ctrlr->force_intx) {
@@ -879,7 +891,7 @@ intx:
 
 	ctrlr->cdev->si_drv1 = (void *)ctrlr;
 
-	TASK_INIT(&ctrlr->restart_task, 0, nvme_ctrlr_restart_task, ctrlr);
+	TASK_INIT(&ctrlr->reset_task, 0, nvme_ctrlr_reset_task, ctrlr);
 	ctrlr->taskqueue = taskqueue_create("nvme_taskq", M_WAITOK,
 	    taskqueue_thread_enqueue, &ctrlr->taskqueue);
 	taskqueue_start_threads(&ctrlr->taskqueue, 1, PI_DISK, "nvme taskq");

Modified: head/sys/dev/nvme/nvme_private.h
==============================================================================
--- head/sys/dev/nvme/nvme_private.h	Tue Mar 26 20:32:46 2013	(r248753)
+++ head/sys/dev/nvme/nvme_private.h	Tue Mar 26 20:32:57 2013	(r248754)
@@ -230,6 +230,7 @@ struct nvme_controller {
 
 	uint32_t		msix_enabled;
 	uint32_t		force_intx;
+	uint32_t		enable_aborts;
 
 	uint32_t		num_io_queues;
 	boolean_t		per_cpu_io_queues;
@@ -239,7 +240,7 @@ struct nvme_controller {
 	uint32_t		ns_identified;
 	uint32_t		queues_created;
 	uint32_t		num_start_attempts;
-	struct task		restart_task;
+	struct task		reset_task;
 	struct taskqueue	*taskqueue;
 
 	/* For shared legacy interrupt. */

Modified: head/sys/dev/nvme/nvme_qpair.c
==============================================================================
--- head/sys/dev/nvme/nvme_qpair.c	Tue Mar 26 20:32:46 2013	(r248753)
+++ head/sys/dev/nvme/nvme_qpair.c	Tue Mar 26 20:32:57 2013	(r248754)
@@ -460,21 +460,20 @@ nvme_timeout(void *arg)
 	struct nvme_controller	*ctrlr = qpair->ctrlr;
 	union csts_register	csts;
 
+	/* Read csts to get value of cfs - controller fatal status. */
 	csts.raw = nvme_mmio_read_4(ctrlr, csts);
-	if (csts.bits.cfs == 1) {
+	device_printf(ctrlr->dev, "i/o timeout, csts.cfs=%d\n", csts.bits.cfs);
+	nvme_dump_command(&tr->req->cmd);
+
+	if (ctrlr->enable_aborts && csts.bits.cfs == 0) {
 		/*
-		 * The controller is reporting fatal status.  Don't bother
-		 *  trying to abort the timed out command - proceed
-		 *  immediately to a controller-level reset.
+		 * If aborts are enabled, only use them if the controller is
+		 *  not reporting fatal status.
 		 */
-		device_printf(ctrlr->dev,
-		    "controller reports fatal status, resetting...\n");
+		nvme_ctrlr_cmd_abort(ctrlr, tr->cid, qpair->id,
+		    nvme_abort_complete, tr);
+	} else
 		nvme_ctrlr_reset(ctrlr);
-		return;
-	}
-
-	nvme_ctrlr_cmd_abort(ctrlr, tr->cid, qpair->id,
-	    nvme_abort_complete, tr);
 }
 
 void


More information about the svn-src-all mailing list