svn commit: r359366 - head/usr.sbin/bhyve

Chuck Tuffli chuck at FreeBSD.org
Fri Mar 27 15:28:43 UTC 2020


Author: chuck
Date: Fri Mar 27 15:28:22 2020
New Revision: 359366
URL: https://svnweb.freebsd.org/changeset/base/359366

Log:
  bhyve: fix NVMe emulation missed interrupts
  
  The bhyve NVMe emulation has a race in the logic which generates command
  completion interrupts. On FreeBSD guests, this manifests as kernel log
  messages similar to:
      nvme0: Missing interrupt
  
  The NVMe emulation code sets a per-submission queue "busy" flag while
  processing the submission queue, and only generates an interrupt when
  the submission queue is not busy.
  
  Aside from being counter to the NVMe design (i.e. interrupt properties
  are tied to the completion queue) and adding complexity (e.g. exceptions
  to not generating an interrupt when "busy"), it causes a race condition
  under the following conditions:
   - guest OS has no outstanding interrupts
   - guest OS submits a single NVMe IO command
   - bhyve emulation processes the SQ and sets the "busy" flag
   - bhyve emulation submits the asynchronous IO to the backing storage
   - IO request to the backing storage completes before the SQ processing
     loop exits and doesn't generate an interrupt because the SQ is "busy"
   - bhyve emulation finishes processing the SQ and clears the "busy" flag
  
  Fix is to remove the "busy" flag and generate an interrupt when the CQ
  head and tail pointers do not match.
  
  Reported by:	khng300
  Reviewed by:	jhb, imp
  Approved by:	jhb (maintainer)
  MFC after:	2 weeks
  Differential Revision: https://reviews.freebsd.org/D24082

Modified:
  head/usr.sbin/bhyve/pci_nvme.c

Modified: head/usr.sbin/bhyve/pci_nvme.c
==============================================================================
--- head/usr.sbin/bhyve/pci_nvme.c	Fri Mar 27 15:28:16 2020	(r359365)
+++ head/usr.sbin/bhyve/pci_nvme.c	Fri Mar 27 15:28:22 2020	(r359366)
@@ -1082,12 +1082,12 @@ pci_nvme_handle_admin_cmd(struct pci_nvme_softc* sc, u
 	struct nvme_command *cmd;
 	struct nvme_submission_queue *sq;
 	struct nvme_completion_queue *cq;
-	int do_intr = 0;
 	uint16_t sqhead;
 
 	DPRINTF(("%s index %u", __func__, (uint32_t)value));
 
 	sq = &sc->submit_queues[0];
+	cq = &sc->compl_queues[0];
 
 	sqhead = atomic_load_acq_short(&sq->head);
 
@@ -1107,44 +1107,44 @@ pci_nvme_handle_admin_cmd(struct pci_nvme_softc* sc, u
 		switch (cmd->opc) {
 		case NVME_OPC_DELETE_IO_SQ:
 			DPRINTF(("%s command DELETE_IO_SQ", __func__));
-			do_intr |= nvme_opc_delete_io_sq(sc, cmd, &compl);
+			nvme_opc_delete_io_sq(sc, cmd, &compl);
 			break;
 		case NVME_OPC_CREATE_IO_SQ:
 			DPRINTF(("%s command CREATE_IO_SQ", __func__));
-			do_intr |= nvme_opc_create_io_sq(sc, cmd, &compl);
+			nvme_opc_create_io_sq(sc, cmd, &compl);
 			break;
 		case NVME_OPC_DELETE_IO_CQ:
 			DPRINTF(("%s command DELETE_IO_CQ", __func__));
-			do_intr |= nvme_opc_delete_io_cq(sc, cmd, &compl);
+			nvme_opc_delete_io_cq(sc, cmd, &compl);
 			break;
 		case NVME_OPC_CREATE_IO_CQ:
 			DPRINTF(("%s command CREATE_IO_CQ", __func__));
-			do_intr |= nvme_opc_create_io_cq(sc, cmd, &compl);
+			nvme_opc_create_io_cq(sc, cmd, &compl);
 			break;
 		case NVME_OPC_GET_LOG_PAGE:
 			DPRINTF(("%s command GET_LOG_PAGE", __func__));
-			do_intr |= nvme_opc_get_log_page(sc, cmd, &compl);
+			nvme_opc_get_log_page(sc, cmd, &compl);
 			break;
 		case NVME_OPC_IDENTIFY:
 			DPRINTF(("%s command IDENTIFY", __func__));
-			do_intr |= nvme_opc_identify(sc, cmd, &compl);
+			nvme_opc_identify(sc, cmd, &compl);
 			break;
 		case NVME_OPC_ABORT:
 			DPRINTF(("%s command ABORT", __func__));
-			do_intr |= nvme_opc_abort(sc, cmd, &compl);
+			nvme_opc_abort(sc, cmd, &compl);
 			break;
 		case NVME_OPC_SET_FEATURES:
 			DPRINTF(("%s command SET_FEATURES", __func__));
-			do_intr |= nvme_opc_set_features(sc, cmd, &compl);
+			nvme_opc_set_features(sc, cmd, &compl);
 			break;
 		case NVME_OPC_GET_FEATURES:
 			DPRINTF(("%s command GET_FEATURES", __func__));
-			do_intr |= nvme_opc_get_features(sc, cmd, &compl);
+			nvme_opc_get_features(sc, cmd, &compl);
 			break;
 		case NVME_OPC_ASYNC_EVENT_REQUEST:
 			DPRINTF(("%s command ASYNC_EVENT_REQ", __func__));
 			/* XXX dont care, unhandled for now
-			do_intr |= nvme_opc_async_event_req(sc, cmd, &compl);
+			nvme_opc_async_event_req(sc, cmd, &compl);
 			*/
 			compl.status = NVME_NO_STATUS;
 			break;
@@ -1152,15 +1152,12 @@ pci_nvme_handle_admin_cmd(struct pci_nvme_softc* sc, u
 			WPRINTF(("0x%x command is not implemented",
 			    cmd->opc));
 			pci_nvme_status_genc(&compl.status, NVME_SC_INVALID_OPCODE);
-			do_intr |= 1;
 		}
 	
 		if (NVME_COMPLETION_VALID(compl)) {
 			struct nvme_completion *cp;
 			int phase;
 
-			cq = &sc->compl_queues[0];
-
 			cp = &(cq->qbase)[cq->tail];
 			cp->cdw0 = compl.cdw0;
 			cp->sqid = 0;
@@ -1180,7 +1177,7 @@ pci_nvme_handle_admin_cmd(struct pci_nvme_softc* sc, u
 	atomic_store_short(&sq->head, sqhead);
 	atomic_store_int(&sq->busy, 0);
 
-	if (do_intr)
+	if (cq->head != cq->tail)
 		pci_generate_msix(sc->nsc_pi, 0);
 
 }
@@ -1276,7 +1273,6 @@ pci_nvme_set_completion(struct pci_nvme_softc *sc,
 {
 	struct nvme_completion_queue *cq = &sc->compl_queues[sq->cqid];
 	struct nvme_completion *compl;
-	int do_intr = 0;
 	int phase;
 
 	DPRINTF(("%s sqid %d cqid %u cid %u status: 0x%x 0x%x",
@@ -1300,14 +1296,16 @@ pci_nvme_set_completion(struct pci_nvme_softc *sc,
 
 	cq->tail = (cq->tail + 1) % cq->size;
 
-	if (cq->intr_en & NVME_CQ_INTEN)
-		do_intr = 1;
-
 	pthread_mutex_unlock(&cq->mtx);
 
-	if (ignore_busy || !atomic_load_acq_int(&sq->busy))
-		if (do_intr)
+	if (cq->head != cq->tail) {
+		if (cq->intr_en & NVME_CQ_INTEN) {
 			pci_generate_msix(sc->nsc_pi, cq->intr_vec);
+		} else {
+			DPRINTF(("%s: CQ%u interrupt disabled\n",
+						__func__, sq->cqid));
+		}
+	}
 }
 
 static void


More information about the svn-src-head mailing list