svn commit: r362745 - head/usr.sbin/bhyve

Chuck Tuffli chuck at FreeBSD.org
Mon Jun 29 00:31:15 UTC 2020


Author: chuck
Date: Mon Jun 29 00:31:14 2020
New Revision: 362745
URL: https://svnweb.freebsd.org/changeset/base/362745

Log:
  bhyve: refactor NVMe IO command handling
  
  This refactors the NVMe I/O command processing function to make adding
  new commands easier. The main change is to move command specific
  processing (i.e. Read/Write) to separate functions for each NVMe I/O
  command and leave the common per-command processing in the existing
  pci_nvme_handle_io_cmd() function.
  
  While here, add checks for some common errors (invalid Namespace ID,
  invalid opcode, LBA out of range).
  
  Add myself to the Copyright holders
  
  Reviewed by:	imp
  Tested by:	Jason Tubnor
  MFC after:	2 weeks
  Differential Revision: https://reviews.freebsd.org/D24879

Modified:
  head/usr.sbin/bhyve/pci_nvme.c

Modified: head/usr.sbin/bhyve/pci_nvme.c
==============================================================================
--- head/usr.sbin/bhyve/pci_nvme.c	Mon Jun 29 00:31:11 2020	(r362744)
+++ head/usr.sbin/bhyve/pci_nvme.c	Mon Jun 29 00:31:14 2020	(r362745)
@@ -3,6 +3,7 @@
  *
  * Copyright (c) 2017 Shunsuke Mie
  * Copyright (c) 2018 Leon Dang
+ * Copyright (c) 2020 Chuck Tuffli
  *
  * Function crc16 Copyright (c) 2017, Fedor Uporov 
  *     Obtained from function ext2_crc16() in sys/fs/ext2fs/ext2_csum.c
@@ -1386,6 +1387,122 @@ pci_nvme_io_partial(struct blockif_req *br, int err)
 	pthread_cond_signal(&req->cv);
 }
 
+static bool
+nvme_opc_write_read(struct pci_nvme_softc *sc,
+    struct nvme_command *cmd,
+    struct pci_nvme_blockstore *nvstore,
+    struct pci_nvme_ioreq *req,
+    uint16_t *status)
+{
+	uint64_t lba, nblocks, bytes;
+	size_t offset;
+	bool is_write = cmd->opc == NVME_OPC_WRITE;
+	bool pending = false;
+
+	lba = ((uint64_t)cmd->cdw11 << 32) | cmd->cdw10;
+	nblocks = (cmd->cdw12 & 0xFFFF) + 1;
+
+	offset = lba * nvstore->sectsz;
+	bytes  = nblocks * nvstore->sectsz;
+
+	if ((offset + bytes) > nvstore->size) {
+		WPRINTF("%s command would exceed LBA range", __func__);
+		pci_nvme_status_genc(status, NVME_SC_LBA_OUT_OF_RANGE);
+		goto out;
+	}
+
+	req->io_req.br_offset = lba;
+
+	/* PRP bits 1:0 must be zero */
+	cmd->prp1 &= ~0x3UL;
+	cmd->prp2 &= ~0x3UL;
+
+	if (nvstore->type == NVME_STOR_RAM) {
+		uint8_t *buf = nvstore->ctx;
+		enum nvme_copy_dir dir;
+
+		if (is_write)
+			dir = NVME_COPY_TO_PRP;
+		else
+			dir = NVME_COPY_FROM_PRP;
+
+		if (nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, cmd->prp1, cmd->prp2,
+		    buf + offset, bytes, dir))
+			pci_nvme_status_genc(status,
+			    NVME_SC_DATA_TRANSFER_ERROR);
+		else
+			pci_nvme_status_genc(status, NVME_SC_SUCCESS);
+	} else {
+		uint64_t size;
+		int err;
+
+		size = MIN(PAGE_SIZE - (cmd->prp1 % PAGE_SIZE), bytes);
+		if (pci_nvme_append_iov_req(sc, req, cmd->prp1,
+		    size, is_write, offset)) {
+			pci_nvme_status_genc(status,
+			    NVME_SC_DATA_TRANSFER_ERROR);
+			goto out;
+		}
+
+		offset += size;
+		bytes  -= size;
+
+		if (bytes == 0) {
+			;
+		} else if (bytes <= PAGE_SIZE) {
+			size = bytes;
+			if (pci_nvme_append_iov_req(sc, req, cmd->prp2,
+			    size, is_write, offset)) {
+				pci_nvme_status_genc(status,
+				    NVME_SC_DATA_TRANSFER_ERROR);
+				goto out;
+			}
+		} else {
+			void *vmctx = sc->nsc_pi->pi_vmctx;
+			uint64_t *prp_list = &cmd->prp2;
+			uint64_t *last = prp_list;
+
+			/* PRP2 is pointer to a physical region page list */
+			while (bytes) {
+				/* Last entry in list points to the next list */
+				if (prp_list == last) {
+					uint64_t prp = *prp_list;
+
+					prp_list = paddr_guest2host(vmctx, prp,
+					    PAGE_SIZE - (prp % PAGE_SIZE));
+					last = prp_list + (NVME_PRP2_ITEMS - 1);
+				}
+
+				size = MIN(bytes, PAGE_SIZE);
+
+				if (pci_nvme_append_iov_req(sc, req, *prp_list,
+				    size, is_write, offset)) {
+					pci_nvme_status_genc(status,
+					    NVME_SC_DATA_TRANSFER_ERROR);
+					goto out;
+				}
+
+				offset += size;
+				bytes  -= size;
+
+				prp_list++;
+			}
+		}
+		req->io_req.br_callback = pci_nvme_io_done;
+		if (is_write)
+			err = blockif_write(nvstore->ctx, &req->io_req);
+		else
+			err = blockif_read(nvstore->ctx, &req->io_req);
+
+		if (err)
+			pci_nvme_status_genc(status, NVME_SC_DATA_TRANSFER_ERROR);
+		else
+			pending = true;
+	}
+out:
+	return (pending);
+}
+
 static void
 pci_nvme_dealloc_sm(struct blockif_req *br, int err)
 {
@@ -1421,14 +1538,15 @@ pci_nvme_dealloc_sm(struct blockif_req *br, int err)
 	}
 }
 
-static int
+static bool
 nvme_opc_dataset_mgmt(struct pci_nvme_softc *sc,
     struct nvme_command *cmd,
     struct pci_nvme_blockstore *nvstore,
     struct pci_nvme_ioreq *req,
     uint16_t *status)
 {
-	int err = -1;
+	int err;
+	bool pending = false;
 
 	if ((sc->ctrldata.oncs & NVME_ONCS_DSM) == 0) {
 		pci_nvme_status_genc(status, NVME_SC_INVALID_OPCODE);
@@ -1463,9 +1581,6 @@ nvme_opc_dataset_mgmt(struct pci_nvme_softc *sc,
 		nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, cmd->prp1, cmd->prp2,
 		    (uint8_t *)range, NVME_MAX_DSM_TRIM, NVME_COPY_FROM_PRP);
 
-		req->opc = cmd->opc;
-		req->cid = cmd->cid;
-		req->nsid = cmd->nsid;
 		/*
 		 * If the request is for more than a single range, store
 		 * the ranges in the br_iov. Optimize for the common case
@@ -1501,11 +1616,13 @@ nvme_opc_dataset_mgmt(struct pci_nvme_softc *sc,
 		err = blockif_delete(nvstore->ctx, &req->io_req);
 		if (err)
 			pci_nvme_status_genc(status, NVME_SC_INTERNAL_DEVICE_ERROR);
+		else
+			pending = true;
 
 		free(range);
 	}
 out:
-	return (err);
+	return (pending);
 }
 
 static void
@@ -1514,7 +1631,6 @@ pci_nvme_handle_io_cmd(struct pci_nvme_softc* sc, uint
 	struct nvme_submission_queue *sq;
 	uint16_t status;
 	uint16_t sqhead;
-	int err;
 
 	/* handle all submissions up to sq->tail index */
 	sq = &sc->submit_queues[idx];
@@ -1531,189 +1647,69 @@ pci_nvme_handle_io_cmd(struct pci_nvme_softc* sc, uint
 
 	while (sqhead != atomic_load_acq_short(&sq->tail)) {
 		struct nvme_command *cmd;
-		struct pci_nvme_ioreq *req = NULL;
-		uint64_t lba;
-		uint64_t nblocks, bytes, size, cpsz;
+		struct pci_nvme_ioreq *req;
+		uint32_t nsid;
+		bool pending;
 
-		/* TODO: support scatter gather list handling */
+		pending = false;
+		req = NULL;
+		status = 0;
 
 		cmd = &sq->qbase[sqhead];
 		sqhead = (sqhead + 1) % sq->size;
 
-		lba = ((uint64_t)cmd->cdw11 << 32) | cmd->cdw10;
+		nsid = le32toh(cmd->nsid);
+		if ((nsid == 0) || (nsid > sc->ctrldata.nn)) {
+			pci_nvme_status_genc(&status,
+			    NVME_SC_INVALID_NAMESPACE_OR_FORMAT);
+			status |=
+			    NVME_STATUS_DNR_MASK << NVME_STATUS_DNR_SHIFT;
+			goto complete;
+ 		}
 
-		if (cmd->opc == NVME_OPC_FLUSH) {
-			pci_nvme_status_genc(&status, NVME_SC_SUCCESS);
-			pci_nvme_set_completion(sc, sq, idx, cmd->cid, 0,
-			                        status, 1);
-
-			continue;
-		} else if (cmd->opc == 0x08) {
-			/* TODO: write zeroes */
-			WPRINTF("%s write zeroes lba 0x%lx blocks %u",
-			        __func__, lba, cmd->cdw12 & 0xFFFF);
-			pci_nvme_status_genc(&status, NVME_SC_SUCCESS);
-			pci_nvme_set_completion(sc, sq, idx, cmd->cid, 0,
-			                        status, 1);
-
-			continue;
+		req = pci_nvme_get_ioreq(sc);
+		if (req == NULL) {
+			pci_nvme_status_genc(&status,
+			    NVME_SC_INTERNAL_DEVICE_ERROR);
+			WPRINTF("%s: unable to allocate IO req", __func__);
+			goto complete;
 		}
+		req->nvme_sq = sq;
+		req->sqid = idx;
+		req->opc = cmd->opc;
+		req->cid = cmd->cid;
+		req->nsid = cmd->nsid;
 
-		if (sc->nvstore.type == NVME_STOR_BLOCKIF) {
-			req = pci_nvme_get_ioreq(sc);
-			req->nvme_sq = sq;
-			req->sqid = idx;
-		}
-
-		if (cmd->opc == NVME_OPC_DATASET_MANAGEMENT) {
-			if (nvme_opc_dataset_mgmt(sc, cmd, &sc->nvstore, req,
-			    &status)) {
-				pci_nvme_set_completion(sc, sq, idx, cmd->cid,
-				    0, status, 1);
-				if (req)
-					pci_nvme_release_ioreq(sc, req);
-			}
-			continue;
-		}
-
-		nblocks = (cmd->cdw12 & 0xFFFF) + 1;
-
-		bytes = nblocks * sc->nvstore.sectsz;
-
-		/*
-		 * If data starts mid-page and flows into the next page, then
-		 * increase page count
-		 */
-
-		DPRINTF("[h%u:t%u:n%u] %s starting LBA 0x%lx blocks %lu "
-		         "(%lu-bytes)",
-		         sqhead==0 ? sq->size-1 : sqhead-1, sq->tail, sq->size,
-		         cmd->opc == NVME_OPC_WRITE ?
-			     "WRITE" : "READ",
-		         lba, nblocks, bytes);
-
-		cmd->prp1 &= ~(0x03UL);
-		cmd->prp2 &= ~(0x03UL);
-
-		DPRINTF(" prp1 0x%lx prp2 0x%lx", cmd->prp1, cmd->prp2);
-
-		size = bytes;
-		lba *= sc->nvstore.sectsz;
-
-		cpsz = PAGE_SIZE - (cmd->prp1 % PAGE_SIZE);
-
-		if (cpsz > bytes)
-			cpsz = bytes;
-
-		if (req != NULL) {
-			req->io_req.br_offset = ((uint64_t)cmd->cdw11 << 32) |
-			                        cmd->cdw10;
-			req->opc = cmd->opc;
-			req->cid = cmd->cid;
-			req->nsid = cmd->nsid;
-		}
-
-		err = pci_nvme_append_iov_req(sc, req, cmd->prp1, cpsz,
-		    cmd->opc == NVME_OPC_WRITE, lba);
-		lba += cpsz;
-		size -= cpsz;
-
-		if (size == 0)
-			goto iodone;
-
-		if (size <= PAGE_SIZE) {
-			/* prp2 is second (and final) page in transfer */
-
-			err = pci_nvme_append_iov_req(sc, req, cmd->prp2,
-			    size,
-			    cmd->opc == NVME_OPC_WRITE,
-			    lba);
-		} else {
-			uint64_t *prp_list;
-			int i;
-
-			/* prp2 is pointer to a physical region page list */
-			prp_list = paddr_guest2host(sc->nsc_pi->pi_vmctx,
-			                            cmd->prp2, PAGE_SIZE);
-
-			i = 0;
-			while (size != 0) {
-				cpsz = MIN(size, PAGE_SIZE);
-
-				/*
-				 * Move to linked physical region page list
-				 * in last item.
-				 */ 
-				if (i == (NVME_PRP2_ITEMS-1) &&
-				    size > PAGE_SIZE) {
-					assert((prp_list[i] & (PAGE_SIZE-1)) == 0);
-					prp_list = paddr_guest2host(
-					              sc->nsc_pi->pi_vmctx,
-					              prp_list[i], PAGE_SIZE);
-					i = 0;
-				}
-				if (prp_list[i] == 0) {
-					WPRINTF("PRP2[%d] = 0 !!!", i);
-					err = 1;
-					break;
-				}
-
-				err = pci_nvme_append_iov_req(sc, req,
-				    prp_list[i], cpsz,
-				    cmd->opc == NVME_OPC_WRITE, lba);
-				if (err)
-					break;
-
-				lba += cpsz;
-				size -= cpsz;
-				i++;
-			}
-		}
-
-iodone:
-		if (sc->nvstore.type == NVME_STOR_RAM) {
-			uint16_t code, status;
-
-			code = err ? NVME_SC_LBA_OUT_OF_RANGE :
-			    NVME_SC_SUCCESS;
-			pci_nvme_status_genc(&status, code);
-
-			pci_nvme_set_completion(sc, sq, idx, cmd->cid, 0,
-			                        status, 1);
-
-			continue;
-		}
-
-
-		if (err)
-			goto do_error;
-
-		req->io_req.br_callback = pci_nvme_io_done;
-
-		err = 0;
 		switch (cmd->opc) {
+		case NVME_OPC_FLUSH:
+			pci_nvme_status_genc(&status, NVME_SC_SUCCESS);
+ 			break;
+		case NVME_OPC_WRITE:
 		case NVME_OPC_READ:
-			err = blockif_read(sc->nvstore.ctx, &req->io_req);
+			pending = nvme_opc_write_read(sc, cmd, &sc->nvstore,
+			    req, &status);
 			break;
-		case NVME_OPC_WRITE:
-			err = blockif_write(sc->nvstore.ctx, &req->io_req);
+		case NVME_OPC_WRITE_ZEROES:
+			/* TODO: write zeroes
+			WPRINTF("%s write zeroes lba 0x%lx blocks %u",
+			        __func__, lba, cmd->cdw12 & 0xFFFF); */
+			pci_nvme_status_genc(&status, NVME_SC_SUCCESS);
 			break;
-		default:
-			WPRINTF("%s unhandled io command 0x%x",
-				 __func__, cmd->opc);
-			err = 1;
+		case NVME_OPC_DATASET_MANAGEMENT:
+ 			pending = nvme_opc_dataset_mgmt(sc, cmd, &sc->nvstore,
+			    req, &status);
+			break;
+ 		default:
+ 			WPRINTF("%s unhandled io command 0x%x",
+			    __func__, cmd->opc);
+			pci_nvme_status_genc(&status, NVME_SC_INVALID_OPCODE);
 		}
-
-do_error:
-		if (err) {
-			uint16_t status;
-
-			pci_nvme_status_genc(&status,
-			    NVME_SC_DATA_TRANSFER_ERROR);
-
+complete:
+		if (!pending) {
 			pci_nvme_set_completion(sc, sq, idx, cmd->cid, 0,
-			                        status, 1);
-			pci_nvme_release_ioreq(sc, req);
+			    status, 1);
+			if (req != NULL)
+				pci_nvme_release_ioreq(sc, req);
 		}
 	}
 


More information about the svn-src-all mailing list