svn commit: r362745 - head/usr.sbin/bhyve
Chuck Tuffli
chuck at FreeBSD.org
Mon Jun 29 00:31:15 UTC 2020
Author: chuck
Date: Mon Jun 29 00:31:14 2020
New Revision: 362745
URL: https://svnweb.freebsd.org/changeset/base/362745
Log:
bhyve: refactor NVMe IO command handling
This refactors the NVMe I/O command processing function to make adding
new commands easier. The main change is to move command specific
processing (i.e. Read/Write) to separate functions for each NVMe I/O
command and leave the common per-command processing in the existing
pci_nvme_handle_io_cmd() function.
While here, add checks for some common errors (invalid Namespace ID,
invalid opcode, LBA out of range).
Add myself to the Copyright holders
Reviewed by: imp
Tested by: Jason Tubnor
MFC after: 2 weeks
Differential Revision: https://reviews.freebsd.org/D24879
Modified:
head/usr.sbin/bhyve/pci_nvme.c
Modified: head/usr.sbin/bhyve/pci_nvme.c
==============================================================================
--- head/usr.sbin/bhyve/pci_nvme.c Mon Jun 29 00:31:11 2020 (r362744)
+++ head/usr.sbin/bhyve/pci_nvme.c Mon Jun 29 00:31:14 2020 (r362745)
@@ -3,6 +3,7 @@
*
* Copyright (c) 2017 Shunsuke Mie
* Copyright (c) 2018 Leon Dang
+ * Copyright (c) 2020 Chuck Tuffli
*
* Function crc16 Copyright (c) 2017, Fedor Uporov
* Obtained from function ext2_crc16() in sys/fs/ext2fs/ext2_csum.c
@@ -1386,6 +1387,122 @@ pci_nvme_io_partial(struct blockif_req *br, int err)
pthread_cond_signal(&req->cv);
}
+static bool
+nvme_opc_write_read(struct pci_nvme_softc *sc,
+ struct nvme_command *cmd,
+ struct pci_nvme_blockstore *nvstore,
+ struct pci_nvme_ioreq *req,
+ uint16_t *status)
+{
+ uint64_t lba, nblocks, bytes;
+ size_t offset;
+ bool is_write = cmd->opc == NVME_OPC_WRITE;
+ bool pending = false;
+
+ lba = ((uint64_t)cmd->cdw11 << 32) | cmd->cdw10;
+ nblocks = (cmd->cdw12 & 0xFFFF) + 1;
+
+ offset = lba * nvstore->sectsz;
+ bytes = nblocks * nvstore->sectsz;
+
+ if ((offset + bytes) > nvstore->size) {
+ WPRINTF("%s command would exceed LBA range", __func__);
+ pci_nvme_status_genc(status, NVME_SC_LBA_OUT_OF_RANGE);
+ goto out;
+ }
+
+ req->io_req.br_offset = lba;
+
+ /* PRP bits 1:0 must be zero */
+ cmd->prp1 &= ~0x3UL;
+ cmd->prp2 &= ~0x3UL;
+
+ if (nvstore->type == NVME_STOR_RAM) {
+ uint8_t *buf = nvstore->ctx;
+ enum nvme_copy_dir dir;
+
+ if (is_write)
+ dir = NVME_COPY_TO_PRP;
+ else
+ dir = NVME_COPY_FROM_PRP;
+
+ if (nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, cmd->prp1, cmd->prp2,
+ buf + offset, bytes, dir))
+ pci_nvme_status_genc(status,
+ NVME_SC_DATA_TRANSFER_ERROR);
+ else
+ pci_nvme_status_genc(status, NVME_SC_SUCCESS);
+ } else {
+ uint64_t size;
+ int err;
+
+ size = MIN(PAGE_SIZE - (cmd->prp1 % PAGE_SIZE), bytes);
+ if (pci_nvme_append_iov_req(sc, req, cmd->prp1,
+ size, is_write, offset)) {
+ pci_nvme_status_genc(status,
+ NVME_SC_DATA_TRANSFER_ERROR);
+ goto out;
+ }
+
+ offset += size;
+ bytes -= size;
+
+ if (bytes == 0) {
+ ;
+ } else if (bytes <= PAGE_SIZE) {
+ size = bytes;
+ if (pci_nvme_append_iov_req(sc, req, cmd->prp2,
+ size, is_write, offset)) {
+ pci_nvme_status_genc(status,
+ NVME_SC_DATA_TRANSFER_ERROR);
+ goto out;
+ }
+ } else {
+ void *vmctx = sc->nsc_pi->pi_vmctx;
+ uint64_t *prp_list = &cmd->prp2;
+ uint64_t *last = prp_list;
+
+ /* PRP2 is pointer to a physical region page list */
+ while (bytes) {
+ /* Last entry in list points to the next list */
+ if (prp_list == last) {
+ uint64_t prp = *prp_list;
+
+ prp_list = paddr_guest2host(vmctx, prp,
+ PAGE_SIZE - (prp % PAGE_SIZE));
+ last = prp_list + (NVME_PRP2_ITEMS - 1);
+ }
+
+ size = MIN(bytes, PAGE_SIZE);
+
+ if (pci_nvme_append_iov_req(sc, req, *prp_list,
+ size, is_write, offset)) {
+ pci_nvme_status_genc(status,
+ NVME_SC_DATA_TRANSFER_ERROR);
+ goto out;
+ }
+
+ offset += size;
+ bytes -= size;
+
+ prp_list++;
+ }
+ }
+ req->io_req.br_callback = pci_nvme_io_done;
+ if (is_write)
+ err = blockif_write(nvstore->ctx, &req->io_req);
+ else
+ err = blockif_read(nvstore->ctx, &req->io_req);
+
+ if (err)
+ pci_nvme_status_genc(status, NVME_SC_DATA_TRANSFER_ERROR);
+ else
+ pending = true;
+ }
+out:
+ return (pending);
+}
+
static void
pci_nvme_dealloc_sm(struct blockif_req *br, int err)
{
@@ -1421,14 +1538,15 @@ pci_nvme_dealloc_sm(struct blockif_req *br, int err)
}
}
-static int
+static bool
nvme_opc_dataset_mgmt(struct pci_nvme_softc *sc,
struct nvme_command *cmd,
struct pci_nvme_blockstore *nvstore,
struct pci_nvme_ioreq *req,
uint16_t *status)
{
- int err = -1;
+ int err;
+ bool pending = false;
if ((sc->ctrldata.oncs & NVME_ONCS_DSM) == 0) {
pci_nvme_status_genc(status, NVME_SC_INVALID_OPCODE);
@@ -1463,9 +1581,6 @@ nvme_opc_dataset_mgmt(struct pci_nvme_softc *sc,
nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, cmd->prp1, cmd->prp2,
(uint8_t *)range, NVME_MAX_DSM_TRIM, NVME_COPY_FROM_PRP);
- req->opc = cmd->opc;
- req->cid = cmd->cid;
- req->nsid = cmd->nsid;
/*
* If the request is for more than a single range, store
* the ranges in the br_iov. Optimize for the common case
@@ -1501,11 +1616,13 @@ nvme_opc_dataset_mgmt(struct pci_nvme_softc *sc,
err = blockif_delete(nvstore->ctx, &req->io_req);
if (err)
pci_nvme_status_genc(status, NVME_SC_INTERNAL_DEVICE_ERROR);
+ else
+ pending = true;
free(range);
}
out:
- return (err);
+ return (pending);
}
static void
@@ -1514,7 +1631,6 @@ pci_nvme_handle_io_cmd(struct pci_nvme_softc* sc, uint
struct nvme_submission_queue *sq;
uint16_t status;
uint16_t sqhead;
- int err;
/* handle all submissions up to sq->tail index */
sq = &sc->submit_queues[idx];
@@ -1531,189 +1647,69 @@ pci_nvme_handle_io_cmd(struct pci_nvme_softc* sc, uint
while (sqhead != atomic_load_acq_short(&sq->tail)) {
struct nvme_command *cmd;
- struct pci_nvme_ioreq *req = NULL;
- uint64_t lba;
- uint64_t nblocks, bytes, size, cpsz;
+ struct pci_nvme_ioreq *req;
+ uint32_t nsid;
+ bool pending;
- /* TODO: support scatter gather list handling */
+ pending = false;
+ req = NULL;
+ status = 0;
cmd = &sq->qbase[sqhead];
sqhead = (sqhead + 1) % sq->size;
- lba = ((uint64_t)cmd->cdw11 << 32) | cmd->cdw10;
+ nsid = le32toh(cmd->nsid);
+ if ((nsid == 0) || (nsid > sc->ctrldata.nn)) {
+ pci_nvme_status_genc(&status,
+ NVME_SC_INVALID_NAMESPACE_OR_FORMAT);
+ status |=
+ NVME_STATUS_DNR_MASK << NVME_STATUS_DNR_SHIFT;
+ goto complete;
+ }
- if (cmd->opc == NVME_OPC_FLUSH) {
- pci_nvme_status_genc(&status, NVME_SC_SUCCESS);
- pci_nvme_set_completion(sc, sq, idx, cmd->cid, 0,
- status, 1);
-
- continue;
- } else if (cmd->opc == 0x08) {
- /* TODO: write zeroes */
- WPRINTF("%s write zeroes lba 0x%lx blocks %u",
- __func__, lba, cmd->cdw12 & 0xFFFF);
- pci_nvme_status_genc(&status, NVME_SC_SUCCESS);
- pci_nvme_set_completion(sc, sq, idx, cmd->cid, 0,
- status, 1);
-
- continue;
+ req = pci_nvme_get_ioreq(sc);
+ if (req == NULL) {
+ pci_nvme_status_genc(&status,
+ NVME_SC_INTERNAL_DEVICE_ERROR);
+ WPRINTF("%s: unable to allocate IO req", __func__);
+ goto complete;
}
+ req->nvme_sq = sq;
+ req->sqid = idx;
+ req->opc = cmd->opc;
+ req->cid = cmd->cid;
+ req->nsid = cmd->nsid;
- if (sc->nvstore.type == NVME_STOR_BLOCKIF) {
- req = pci_nvme_get_ioreq(sc);
- req->nvme_sq = sq;
- req->sqid = idx;
- }
-
- if (cmd->opc == NVME_OPC_DATASET_MANAGEMENT) {
- if (nvme_opc_dataset_mgmt(sc, cmd, &sc->nvstore, req,
- &status)) {
- pci_nvme_set_completion(sc, sq, idx, cmd->cid,
- 0, status, 1);
- if (req)
- pci_nvme_release_ioreq(sc, req);
- }
- continue;
- }
-
- nblocks = (cmd->cdw12 & 0xFFFF) + 1;
-
- bytes = nblocks * sc->nvstore.sectsz;
-
- /*
- * If data starts mid-page and flows into the next page, then
- * increase page count
- */
-
- DPRINTF("[h%u:t%u:n%u] %s starting LBA 0x%lx blocks %lu "
- "(%lu-bytes)",
- sqhead==0 ? sq->size-1 : sqhead-1, sq->tail, sq->size,
- cmd->opc == NVME_OPC_WRITE ?
- "WRITE" : "READ",
- lba, nblocks, bytes);
-
- cmd->prp1 &= ~(0x03UL);
- cmd->prp2 &= ~(0x03UL);
-
- DPRINTF(" prp1 0x%lx prp2 0x%lx", cmd->prp1, cmd->prp2);
-
- size = bytes;
- lba *= sc->nvstore.sectsz;
-
- cpsz = PAGE_SIZE - (cmd->prp1 % PAGE_SIZE);
-
- if (cpsz > bytes)
- cpsz = bytes;
-
- if (req != NULL) {
- req->io_req.br_offset = ((uint64_t)cmd->cdw11 << 32) |
- cmd->cdw10;
- req->opc = cmd->opc;
- req->cid = cmd->cid;
- req->nsid = cmd->nsid;
- }
-
- err = pci_nvme_append_iov_req(sc, req, cmd->prp1, cpsz,
- cmd->opc == NVME_OPC_WRITE, lba);
- lba += cpsz;
- size -= cpsz;
-
- if (size == 0)
- goto iodone;
-
- if (size <= PAGE_SIZE) {
- /* prp2 is second (and final) page in transfer */
-
- err = pci_nvme_append_iov_req(sc, req, cmd->prp2,
- size,
- cmd->opc == NVME_OPC_WRITE,
- lba);
- } else {
- uint64_t *prp_list;
- int i;
-
- /* prp2 is pointer to a physical region page list */
- prp_list = paddr_guest2host(sc->nsc_pi->pi_vmctx,
- cmd->prp2, PAGE_SIZE);
-
- i = 0;
- while (size != 0) {
- cpsz = MIN(size, PAGE_SIZE);
-
- /*
- * Move to linked physical region page list
- * in last item.
- */
- if (i == (NVME_PRP2_ITEMS-1) &&
- size > PAGE_SIZE) {
- assert((prp_list[i] & (PAGE_SIZE-1)) == 0);
- prp_list = paddr_guest2host(
- sc->nsc_pi->pi_vmctx,
- prp_list[i], PAGE_SIZE);
- i = 0;
- }
- if (prp_list[i] == 0) {
- WPRINTF("PRP2[%d] = 0 !!!", i);
- err = 1;
- break;
- }
-
- err = pci_nvme_append_iov_req(sc, req,
- prp_list[i], cpsz,
- cmd->opc == NVME_OPC_WRITE, lba);
- if (err)
- break;
-
- lba += cpsz;
- size -= cpsz;
- i++;
- }
- }
-
-iodone:
- if (sc->nvstore.type == NVME_STOR_RAM) {
- uint16_t code, status;
-
- code = err ? NVME_SC_LBA_OUT_OF_RANGE :
- NVME_SC_SUCCESS;
- pci_nvme_status_genc(&status, code);
-
- pci_nvme_set_completion(sc, sq, idx, cmd->cid, 0,
- status, 1);
-
- continue;
- }
-
-
- if (err)
- goto do_error;
-
- req->io_req.br_callback = pci_nvme_io_done;
-
- err = 0;
switch (cmd->opc) {
+ case NVME_OPC_FLUSH:
+ pci_nvme_status_genc(&status, NVME_SC_SUCCESS);
+ break;
+ case NVME_OPC_WRITE:
case NVME_OPC_READ:
- err = blockif_read(sc->nvstore.ctx, &req->io_req);
+ pending = nvme_opc_write_read(sc, cmd, &sc->nvstore,
+ req, &status);
break;
- case NVME_OPC_WRITE:
- err = blockif_write(sc->nvstore.ctx, &req->io_req);
+ case NVME_OPC_WRITE_ZEROES:
+ /* TODO: write zeroes
+ WPRINTF("%s write zeroes lba 0x%lx blocks %u",
+ __func__, lba, cmd->cdw12 & 0xFFFF); */
+ pci_nvme_status_genc(&status, NVME_SC_SUCCESS);
break;
- default:
- WPRINTF("%s unhandled io command 0x%x",
- __func__, cmd->opc);
- err = 1;
+ case NVME_OPC_DATASET_MANAGEMENT:
+ pending = nvme_opc_dataset_mgmt(sc, cmd, &sc->nvstore,
+ req, &status);
+ break;
+ default:
+ WPRINTF("%s unhandled io command 0x%x",
+ __func__, cmd->opc);
+ pci_nvme_status_genc(&status, NVME_SC_INVALID_OPCODE);
}
-
-do_error:
- if (err) {
- uint16_t status;
-
- pci_nvme_status_genc(&status,
- NVME_SC_DATA_TRANSFER_ERROR);
-
+complete:
+ if (!pending) {
pci_nvme_set_completion(sc, sq, idx, cmd->cid, 0,
- status, 1);
- pci_nvme_release_ioreq(sc, req);
+ status, 1);
+ if (req != NULL)
+ pci_nvme_release_ioreq(sc, req);
}
}
More information about the svn-src-all
mailing list