svn commit: r359831 - stable/12/usr.sbin/bhyve

Chuck Tuffli chuck at FreeBSD.org
Sun Apr 12 19:14:28 UTC 2020


Author: chuck
Date: Sun Apr 12 19:14:27 2020
New Revision: 359831
URL: https://svnweb.freebsd.org/changeset/base/359831

Log:
  MFC r359364
  
  bhyve: implement NVMe deallocate command

Modified:
  stable/12/usr.sbin/bhyve/pci_nvme.c
Directory Properties:
  stable/12/   (props changed)

Modified: stable/12/usr.sbin/bhyve/pci_nvme.c
==============================================================================
--- stable/12/usr.sbin/bhyve/pci_nvme.c	Sun Apr 12 19:02:34 2020	(r359830)
+++ stable/12/usr.sbin/bhyve/pci_nvme.c	Sun Apr 12 19:14:27 2020	(r359831)
@@ -180,6 +180,7 @@ struct pci_nvme_blockstore {
 	uint32_t	sectsz;
 	uint32_t	sectsz_bits;
 	uint64_t	eui64;
+	uint32_t	deallocate:1;
 };
 
 struct pci_nvme_ioreq {
@@ -209,6 +210,15 @@ struct pci_nvme_ioreq {
 	struct iovec	iovpadding[NVME_MAX_BLOCKIOVS-BLOCKIF_IOV_MAX];
 };
 
+enum nvme_dsm_type {
+	/* Dataset Management bit in ONCS reflects backing storage capability */
+	NVME_DATASET_MANAGEMENT_AUTO,
+	/* Unconditionally set Dataset Management bit in ONCS */
+	NVME_DATASET_MANAGEMENT_ENABLE,
+	/* Unconditionally clear Dataset Management bit in ONCS */
+	NVME_DATASET_MANAGEMENT_DISABLE,
+};
+
 struct pci_nvme_softc {
 	struct pci_devinst *nsc_pi;
 
@@ -246,6 +256,8 @@ struct pci_nvme_softc {
 	uint32_t	intr_coales_aggr_time;   /* 0x08: uS to delay intr */
 	uint32_t	intr_coales_aggr_thresh; /* 0x08: compl-Q entries */
 	uint32_t	async_ev_config;         /* 0x0B: async event config */
+
+	enum nvme_dsm_type dataset_management;
 };
 
 
@@ -285,6 +297,9 @@ static void pci_nvme_io_partial(struct blockif_req *br
 	((NVME_STATUS_SCT_MASK << NVME_STATUS_SCT_SHIFT) |\
 	 (NVME_STATUS_SC_MASK << NVME_STATUS_SC_SHIFT))
 
+#define NVME_ONCS_DSM	(NVME_CTRLR_DATA_ONCS_DSM_MASK << \
+	NVME_CTRLR_DATA_ONCS_DSM_SHIFT)
+
 static __inline void
 cpywithpad(char *dst, size_t dst_size, const char *src, char pad)
 {
@@ -363,6 +378,19 @@ pci_nvme_init_ctrldata(struct pci_nvme_softc *sc)
 	    (4 << NVME_CTRLR_DATA_CQES_MIN_SHIFT);
 	cd->nn = 1;	/* number of namespaces */
 
+	cd->oncs = 0;
+	switch (sc->dataset_management) {
+	case NVME_DATASET_MANAGEMENT_AUTO:
+		if (sc->nvstore.deallocate)
+			cd->oncs |= NVME_ONCS_DSM;
+		break;
+	case NVME_DATASET_MANAGEMENT_ENABLE:
+		cd->oncs |= NVME_ONCS_DSM;
+		break;
+	default:
+		break;
+	}
+
 	cd->fna = 0x03;
 
 	cd->power_state[0].mp = 10;
@@ -429,6 +457,9 @@ pci_nvme_init_nsdata(struct pci_nvme_softc *sc,
 	nd->ncap = nd->nsze;
 	nd->nuse = nd->nsze;
 
+	if (nvstore->type == NVME_STOR_BLOCKIF)
+		nvstore->deallocate = blockif_candelete(nvstore->ctx);
+
 	nd->nlbaf = 0; /* NLBAF is a 0's based value (i.e. 1 LBA Format) */
 	nd->flbas = 0;
 
@@ -1339,7 +1370,7 @@ pci_nvme_io_done(struct blockif_req *br, int err)
 	uint16_t code, status;
 
 	DPRINTF(("%s error %d %s", __func__, err, strerror(err)));
-	
+
 	/* TODO return correct error */
 	code = err ? NVME_SC_DATA_TRANSFER_ERROR : NVME_SC_SUCCESS;
 	pci_nvme_status_genc(&status, code);
@@ -1358,7 +1389,128 @@ pci_nvme_io_partial(struct blockif_req *br, int err)
 	pthread_cond_signal(&req->cv);
 }
 
+static void
+pci_nvme_dealloc_sm(struct blockif_req *br, int err)
+{
+	struct pci_nvme_ioreq *req = br->br_param;
+	struct pci_nvme_softc *sc = req->sc;
+	bool done = true;
+	uint16_t status;
 
+	if (err) {
+		pci_nvme_status_genc(&status, NVME_SC_INTERNAL_DEVICE_ERROR);
+	} else if ((req->prev_gpaddr + 1) == (req->prev_size)) {
+		pci_nvme_status_genc(&status, NVME_SC_SUCCESS);
+	} else {
+		struct iovec *iov = req->io_req.br_iov;
+
+		req->prev_gpaddr++;
+		iov += req->prev_gpaddr;
+
+		/* The iov_* values already include the sector size */
+		req->io_req.br_offset = (off_t)iov->iov_base;
+		req->io_req.br_resid = iov->iov_len;
+		if (blockif_delete(sc->nvstore.ctx, &req->io_req)) {
+			pci_nvme_status_genc(&status,
+			    NVME_SC_INTERNAL_DEVICE_ERROR);
+		} else
+			done = false;
+	}
+
+	if (done) {
+		pci_nvme_set_completion(sc, req->nvme_sq, req->sqid,
+		    req->cid, 0, status, 0);
+		pci_nvme_release_ioreq(sc, req);
+	}
+}
+
+static int
+nvme_opc_dataset_mgmt(struct pci_nvme_softc *sc,
+    struct nvme_command *cmd,
+    struct pci_nvme_blockstore *nvstore,
+    struct pci_nvme_ioreq *req,
+    uint16_t *status)
+{
+	int err = -1;
+
+	if ((sc->ctrldata.oncs & NVME_ONCS_DSM) == 0) {
+		pci_nvme_status_genc(status, NVME_SC_INVALID_OPCODE);
+		goto out;
+	}
+
+	if (cmd->cdw11 & NVME_DSM_ATTR_DEALLOCATE) {
+		struct nvme_dsm_range *range;
+		uint32_t nr, r;
+		int sectsz = sc->nvstore.sectsz;
+
+		/*
+		 * DSM calls are advisory only, and compliant controllers
+		 * may choose to take no actions (i.e. return Success).
+		 */
+		if (!nvstore->deallocate) {
+			pci_nvme_status_genc(status, NVME_SC_SUCCESS);
+			goto out;
+		}
+
+		if (req == NULL) {
+			pci_nvme_status_genc(status, NVME_SC_INTERNAL_DEVICE_ERROR);
+			goto out;
+		}
+
+		/* copy locally because a range entry could straddle PRPs */
+		range = calloc(1, NVME_MAX_DSM_TRIM);
+		if (range == NULL) {
+			pci_nvme_status_genc(status, NVME_SC_INTERNAL_DEVICE_ERROR);
+			goto out;
+		}
+		nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, cmd->prp1, cmd->prp2,
+		    (uint8_t *)range, NVME_MAX_DSM_TRIM, NVME_COPY_FROM_PRP);
+
+		req->opc = cmd->opc;
+		req->cid = cmd->cid;
+		req->nsid = cmd->nsid;
+		/*
+		 * If the request is for more than a single range, store
+		 * the ranges in the br_iov. Optimize for the common case
+		 * of a single range.
+		 *
+		 * Note that NVMe Number of Ranges is a zero based value
+		 */
+		nr = cmd->cdw10 & 0xff;
+
+		req->io_req.br_iovcnt = 0;
+		req->io_req.br_offset = range[0].starting_lba * sectsz;
+		req->io_req.br_resid = range[0].length * sectsz;
+
+		if (nr == 0) {
+			req->io_req.br_callback = pci_nvme_io_done;
+		} else {
+			struct iovec *iov = req->io_req.br_iov;
+
+			for (r = 0; r <= nr; r++) {
+				iov[r].iov_base = (void *)(range[r].starting_lba * sectsz);
+				iov[r].iov_len = range[r].length * sectsz;
+			}
+			req->io_req.br_callback = pci_nvme_dealloc_sm;
+
+			/*
+			 * Use prev_gpaddr to track the current entry and
+			 * prev_size to track the number of entries
+			 */
+			req->prev_gpaddr = 0;
+			req->prev_size = r;
+		}
+
+		err = blockif_delete(nvstore->ctx, &req->io_req);
+		if (err)
+			pci_nvme_status_genc(status, NVME_SC_INTERNAL_DEVICE_ERROR);
+
+		free(range);
+	}
+out:
+	return (err);
+}
+
 static void
 pci_nvme_handle_io_cmd(struct pci_nvme_softc* sc, uint16_t idx)
 {
@@ -1410,16 +1562,27 @@ pci_nvme_handle_io_cmd(struct pci_nvme_softc* sc, uint
 			continue;
 		}
 
-		nblocks = (cmd->cdw12 & 0xFFFF) + 1;
-
-		bytes = nblocks * sc->nvstore.sectsz;
-
 		if (sc->nvstore.type == NVME_STOR_BLOCKIF) {
 			req = pci_nvme_get_ioreq(sc);
 			req->nvme_sq = sq;
 			req->sqid = idx;
 		}
 
+		if (cmd->opc == NVME_OPC_DATASET_MANAGEMENT) {
+			if (nvme_opc_dataset_mgmt(sc, cmd, &sc->nvstore, req,
+			    &status)) {
+				pci_nvme_set_completion(sc, sq, idx, cmd->cid,
+				    0, status, 1);
+				if (req)
+					pci_nvme_release_ioreq(sc, req);
+			}
+			continue;
+		}
+
+		nblocks = (cmd->cdw12 & 0xFFFF) + 1;
+
+		bytes = nblocks * sc->nvstore.sectsz;
+
 		/*
 		 * If data starts mid-page and flows into the next page, then
 		 * increase page count
@@ -1868,6 +2031,7 @@ pci_nvme_parse_opts(struct pci_nvme_softc *sc, char *o
 	sc->ioslots = NVME_IOSLOTS;
 	sc->num_squeues = sc->max_queues;
 	sc->num_cqueues = sc->max_queues;
+	sc->dataset_management = NVME_DATASET_MANAGEMENT_AUTO;
 	sectsz = 0;
 
 	uopt = strdup(opts);
@@ -1912,6 +2076,13 @@ pci_nvme_parse_opts(struct pci_nvme_softc *sc, char *o
 			}
 		} else if (!strcmp("eui64", xopts)) {
 			sc->nvstore.eui64 = htobe64(strtoull(config, NULL, 0));
+		} else if (!strcmp("dsm", xopts)) {
+			if (!strcmp("auto", config))
+				sc->dataset_management = NVME_DATASET_MANAGEMENT_AUTO;
+			else if (!strcmp("enable", config))
+				sc->dataset_management = NVME_DATASET_MANAGEMENT_ENABLE;
+			else if (!strcmp("disable", config))
+				sc->dataset_management = NVME_DATASET_MANAGEMENT_DISABLE;
 		} else if (optidx == 0) {
 			snprintf(bident, sizeof(bident), "%d:%d",
 			         sc->nsc_pi->pi_slot, sc->nsc_pi->pi_func);
@@ -2031,8 +2202,12 @@ pci_nvme_init(struct vmctx *ctx, struct pci_devinst *p
 	sem_init(&sc->iosemlock, 0, sc->ioslots);
 
 	pci_nvme_reset(sc);
-	pci_nvme_init_ctrldata(sc);
+	/*
+	 * Controller data depends on Namespace data so initialize Namespace
+	 * data first.
+	 */
 	pci_nvme_init_nsdata(sc, &sc->nsdata, 1, &sc->nvstore);
+	pci_nvme_init_ctrldata(sc);
 	pci_nvme_init_logpages(sc);
 
 	pci_lintr_request(pi);


More information about the svn-src-all mailing list