svn commit: r356961 - in stable/12: share/man/man4 sys/dev/nvme

Alexander Motin mav at FreeBSD.org
Wed Jan 22 01:08:29 UTC 2020


Author: mav
Date: Wed Jan 22 01:08:27 2020
New Revision: 356961
URL: https://svnweb.freebsd.org/changeset/base/356961

Log:
  MFC r356474, r356480, r356482, r356506:
  Add Host Memory Buffer support to nvme(4).
  
  This allows cheapest DRAM-less NVMe SSDs to use some of host RAM (about
  1MB per 1GB on the devices I have) for its metadata cache, significantly
  improving random I/O performance.  Device reports minimal and preferable
  size of the buffer.  The code limits it to 5% of physical RAM by default.
  If the buffer can not be allocated or below minimal size, the device will
  just have to work without it.

Modified:
  stable/12/share/man/man4/nvme.4
  stable/12/sys/dev/nvme/nvme.h
  stable/12/sys/dev/nvme/nvme_ctrlr.c
  stable/12/sys/dev/nvme/nvme_ctrlr_cmd.c
  stable/12/sys/dev/nvme/nvme_private.h
Directory Properties:
  stable/12/   (props changed)

Modified: stable/12/share/man/man4/nvme.4
==============================================================================
--- stable/12/share/man/man4/nvme.4	Wed Jan 22 01:04:26 2020	(r356960)
+++ stable/12/share/man/man4/nvme.4	Wed Jan 22 01:08:27 2020	(r356961)
@@ -33,7 +33,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd August 21, 2019
+.Dd January 6, 2020
 .Dt NVME 4
 .Os
 .Sh NAME
@@ -119,6 +119,14 @@ hw.nvme.force_intx=1
 .Ed
 .Pp
 Note that use of INTx implies disabling of per-CPU I/O queue pairs.
+.Pp
+To control maximum amount of system RAM in bytes to use as Host Memory
+Buffer for capable devices, set the following tunable:
+.Bd -literal -offset indent
+hw.nvme.hmb_max
+.Ed
+.Pp
+The default value is 5% of physical memory size per device.
 .Pp
 The
 .Xr nvd 4

Modified: stable/12/sys/dev/nvme/nvme.h
==============================================================================
--- stable/12/sys/dev/nvme/nvme.h	Wed Jan 22 01:04:26 2020	(r356960)
+++ stable/12/sys/dev/nvme/nvme.h	Wed Jan 22 01:08:27 2020	(r356961)
@@ -1531,6 +1531,12 @@ struct nvme_get_nsid {
 	uint32_t	nsid;
 };
 
+struct nvme_hmb_desc {
+	uint64_t	addr;
+	uint32_t	size;
+	uint32_t	reserved;
+};
+
 #define nvme_completion_is_error(cpl)					\
 	(NVME_STATUS_GET_SC((cpl)->status) != 0 || NVME_STATUS_GET_SCT((cpl)->status) != 0)
 
@@ -1566,6 +1572,8 @@ int	nvme_ctrlr_passthrough_cmd(struct nvme_controller 
 /* Admin functions */
 void	nvme_ctrlr_cmd_set_feature(struct nvme_controller *ctrlr,
 				   uint8_t feature, uint32_t cdw11,
+				   uint32_t cdw12, uint32_t cdw13,
+				   uint32_t cdw14, uint32_t cdw15,
 				   void *payload, uint32_t payload_size,
 				   nvme_cb_fn_t cb_fn, void *cb_arg);
 void	nvme_ctrlr_cmd_get_feature(struct nvme_controller *ctrlr,

Modified: stable/12/sys/dev/nvme/nvme_ctrlr.c
==============================================================================
--- stable/12/sys/dev/nvme/nvme_ctrlr.c	Wed Jan 22 01:04:26 2020	(r356960)
+++ stable/12/sys/dev/nvme/nvme_ctrlr.c	Wed Jan 22 01:08:27 2020	(r356961)
@@ -841,6 +841,169 @@ nvme_ctrlr_configure_int_coalescing(struct nvme_contro
 }
 
 static void
+nvme_ctrlr_hmb_free(struct nvme_controller *ctrlr)
+{
+	struct nvme_hmb_chunk *hmbc;
+	int i;
+
+	if (ctrlr->hmb_desc_paddr) {
+		bus_dmamap_unload(ctrlr->hmb_desc_tag, ctrlr->hmb_desc_map);
+		bus_dmamem_free(ctrlr->hmb_desc_tag, ctrlr->hmb_desc_vaddr,
+		    ctrlr->hmb_desc_map);
+		ctrlr->hmb_desc_paddr = 0;
+	}
+	if (ctrlr->hmb_desc_tag) {
+		bus_dma_tag_destroy(ctrlr->hmb_desc_tag);
+		ctrlr->hmb_desc_tag = NULL;
+	}
+	for (i = 0; i < ctrlr->hmb_nchunks; i++) {
+		hmbc = &ctrlr->hmb_chunks[i];
+		bus_dmamap_unload(ctrlr->hmb_tag, hmbc->hmbc_map);
+		bus_dmamem_free(ctrlr->hmb_tag, hmbc->hmbc_vaddr,
+		    hmbc->hmbc_map);
+	}
+	ctrlr->hmb_nchunks = 0;
+	if (ctrlr->hmb_tag) {
+		bus_dma_tag_destroy(ctrlr->hmb_tag);
+		ctrlr->hmb_tag = NULL;
+	}
+	if (ctrlr->hmb_chunks) {
+		free(ctrlr->hmb_chunks, M_NVME);
+		ctrlr->hmb_chunks = NULL;
+	}
+}
+
+static void
+nvme_ctrlr_hmb_alloc(struct nvme_controller *ctrlr)
+{
+	struct nvme_hmb_chunk *hmbc;
+	size_t pref, min, minc, size;
+	int err, i;
+	uint64_t max;
+
+	/* Limit HMB to 5% of RAM size per device by default. */
+	max = (uint64_t)physmem * PAGE_SIZE / 20;
+	TUNABLE_UINT64_FETCH("hw.nvme.hmb_max", &max);
+
+	min = (long long unsigned)ctrlr->cdata.hmmin * 4096;
+	if (max == 0 || max < min)
+		return;
+	pref = MIN((long long unsigned)ctrlr->cdata.hmpre * 4096, max);
+	minc = MAX(ctrlr->cdata.hmminds * 4096, PAGE_SIZE);
+	if (min > 0 && ctrlr->cdata.hmmaxd > 0)
+		minc = MAX(minc, min / ctrlr->cdata.hmmaxd);
+	ctrlr->hmb_chunk = pref;
+
+again:
+	ctrlr->hmb_chunk = roundup2(ctrlr->hmb_chunk, PAGE_SIZE);
+	ctrlr->hmb_nchunks = howmany(pref, ctrlr->hmb_chunk);
+	if (ctrlr->cdata.hmmaxd > 0 && ctrlr->hmb_nchunks > ctrlr->cdata.hmmaxd)
+		ctrlr->hmb_nchunks = ctrlr->cdata.hmmaxd;
+	ctrlr->hmb_chunks = malloc(sizeof(struct nvme_hmb_chunk) *
+	    ctrlr->hmb_nchunks, M_NVME, M_WAITOK);
+	err = bus_dma_tag_create(bus_get_dma_tag(ctrlr->dev),
+	    PAGE_SIZE, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
+	    ctrlr->hmb_chunk, 1, ctrlr->hmb_chunk, 0, NULL, NULL, &ctrlr->hmb_tag);
+	if (err != 0) {
+		nvme_printf(ctrlr, "HMB tag create failed %d\n", err);
+		nvme_ctrlr_hmb_free(ctrlr);
+		return;
+	}
+
+	for (i = 0; i < ctrlr->hmb_nchunks; i++) {
+		hmbc = &ctrlr->hmb_chunks[i];
+		if (bus_dmamem_alloc(ctrlr->hmb_tag,
+		    (void **)&hmbc->hmbc_vaddr, BUS_DMA_NOWAIT,
+		    &hmbc->hmbc_map)) {
+			nvme_printf(ctrlr, "failed to alloc HMB\n");
+			break;
+		}
+		if (bus_dmamap_load(ctrlr->hmb_tag, hmbc->hmbc_map,
+		    hmbc->hmbc_vaddr, ctrlr->hmb_chunk, nvme_single_map,
+		    &hmbc->hmbc_paddr, BUS_DMA_NOWAIT) != 0) {
+			bus_dmamem_free(ctrlr->hmb_tag, hmbc->hmbc_vaddr,
+			    hmbc->hmbc_map);
+			nvme_printf(ctrlr, "failed to load HMB\n");
+			break;
+		}
+		bus_dmamap_sync(ctrlr->hmb_tag, hmbc->hmbc_map,
+		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
+	}
+
+	if (i < ctrlr->hmb_nchunks && i * ctrlr->hmb_chunk < min &&
+	    ctrlr->hmb_chunk / 2 >= minc) {
+		ctrlr->hmb_nchunks = i;
+		nvme_ctrlr_hmb_free(ctrlr);
+		ctrlr->hmb_chunk /= 2;
+		goto again;
+	}
+	ctrlr->hmb_nchunks = i;
+	if (ctrlr->hmb_nchunks * ctrlr->hmb_chunk < min) {
+		nvme_ctrlr_hmb_free(ctrlr);
+		return;
+	}
+
+	size = sizeof(struct nvme_hmb_desc) * ctrlr->hmb_nchunks;
+	err = bus_dma_tag_create(bus_get_dma_tag(ctrlr->dev),
+	    16, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
+	    size, 1, size, 0, NULL, NULL, &ctrlr->hmb_desc_tag);
+	if (err != 0) {
+		nvme_printf(ctrlr, "HMB desc tag create failed %d\n", err);
+		nvme_ctrlr_hmb_free(ctrlr);
+		return;
+	}
+	if (bus_dmamem_alloc(ctrlr->hmb_desc_tag,
+	    (void **)&ctrlr->hmb_desc_vaddr, BUS_DMA_WAITOK,
+	    &ctrlr->hmb_desc_map)) {
+		nvme_printf(ctrlr, "failed to alloc HMB desc\n");
+		nvme_ctrlr_hmb_free(ctrlr);
+		return;
+	}
+	if (bus_dmamap_load(ctrlr->hmb_desc_tag, ctrlr->hmb_desc_map,
+	    ctrlr->hmb_desc_vaddr, size, nvme_single_map,
+	    &ctrlr->hmb_desc_paddr, BUS_DMA_NOWAIT) != 0) {
+		bus_dmamem_free(ctrlr->hmb_desc_tag, ctrlr->hmb_desc_vaddr,
+		    ctrlr->hmb_desc_map);
+		nvme_printf(ctrlr, "failed to load HMB desc\n");
+		nvme_ctrlr_hmb_free(ctrlr);
+		return;
+	}
+
+	for (i = 0; i < ctrlr->hmb_nchunks; i++) {
+		ctrlr->hmb_desc_vaddr[i].addr =
+		    htole64(ctrlr->hmb_chunks[i].hmbc_paddr);
+		ctrlr->hmb_desc_vaddr[i].size = htole32(ctrlr->hmb_chunk / 4096);
+	}
+	bus_dmamap_sync(ctrlr->hmb_desc_tag, ctrlr->hmb_desc_map,
+	    BUS_DMASYNC_PREWRITE);
+
+	nvme_printf(ctrlr, "Allocated %lluMB host memory buffer\n",
+	    (long long unsigned)ctrlr->hmb_nchunks * ctrlr->hmb_chunk
+	    / 1024 / 1024);
+}
+
+static void
+nvme_ctrlr_hmb_enable(struct nvme_controller *ctrlr, bool enable, bool memret)
+{
+	struct nvme_completion_poll_status	status;
+	uint32_t cdw11;
+
+	cdw11 = 0;
+	if (enable)
+		cdw11 |= 1;
+	if (memret)
+		cdw11 |= 2;
+	status.done = 0;
+	nvme_ctrlr_cmd_set_feature(ctrlr, NVME_FEAT_HOST_MEMORY_BUFFER, cdw11,
+	    ctrlr->hmb_nchunks * ctrlr->hmb_chunk / 4096, ctrlr->hmb_desc_paddr,
+	    ctrlr->hmb_desc_paddr >> 32, ctrlr->hmb_nchunks, NULL, 0,
+	    nvme_completion_poll_cb, &status);
+	nvme_completion_poll(&status);
+	if (nvme_completion_is_error(&status.cpl))
+		nvme_printf(ctrlr, "nvme_ctrlr_hmb_enable failed!\n");
+}
+
+static void
 nvme_ctrlr_start(void *ctrlr_arg, bool resetting)
 {
 	struct nvme_controller *ctrlr = ctrlr_arg;
@@ -888,6 +1051,13 @@ nvme_ctrlr_start(void *ctrlr_arg, bool resetting)
 		}
 	}
 
+	if (ctrlr->cdata.hmpre > 0 && ctrlr->hmb_nchunks == 0) {
+		nvme_ctrlr_hmb_alloc(ctrlr);
+		if (ctrlr->hmb_nchunks > 0)
+			nvme_ctrlr_hmb_enable(ctrlr, true, false);
+	} else if (ctrlr->hmb_nchunks > 0)
+		nvme_ctrlr_hmb_enable(ctrlr, true, true);
+
 	if (nvme_ctrlr_create_qpairs(ctrlr) != 0) {
 		nvme_ctrlr_fail(ctrlr);
 		return;
@@ -1240,11 +1410,15 @@ nvme_ctrlr_destruct(struct nvme_controller *ctrlr, dev
 		destroy_dev(ctrlr->cdev);
 
 	if (ctrlr->is_initialized) {
-		if (!gone)
+		if (!gone) {
+			if (ctrlr->hmb_nchunks > 0)
+				nvme_ctrlr_hmb_enable(ctrlr, false, false);
 			nvme_ctrlr_delete_qpairs(ctrlr);
+		}
 		for (i = 0; i < ctrlr->num_io_queues; i++)
 			nvme_io_qpair_destroy(&ctrlr->ioq[i]);
 		free(ctrlr->ioq, M_NVME);
+		nvme_ctrlr_hmb_free(ctrlr);
 		nvme_admin_qpair_destroy(&ctrlr->adminq);
 	}
 
@@ -1367,6 +1541,9 @@ nvme_ctrlr_suspend(struct nvme_controller *ctrlr)
 		    "Competing reset task didn't finish. Try again later.\n");
 		return (EWOULDBLOCK);
 	}
+
+	if (ctrlr->hmb_nchunks > 0)
+		nvme_ctrlr_hmb_enable(ctrlr, false, false);
 
 	/*
 	 * Per Section 7.6.2 of NVMe spec 1.4, to properly suspend, we need to

Modified: stable/12/sys/dev/nvme/nvme_ctrlr_cmd.c
==============================================================================
--- stable/12/sys/dev/nvme/nvme_ctrlr_cmd.c	Wed Jan 22 01:04:26 2020	(r356960)
+++ stable/12/sys/dev/nvme/nvme_ctrlr_cmd.c	Wed Jan 22 01:08:27 2020	(r356961)
@@ -166,7 +166,8 @@ nvme_ctrlr_cmd_delete_io_sq(struct nvme_controller *ct
 
 void
 nvme_ctrlr_cmd_set_feature(struct nvme_controller *ctrlr, uint8_t feature,
-    uint32_t cdw11, void *payload, uint32_t payload_size,
+    uint32_t cdw11, uint32_t cdw12, uint32_t cdw13, uint32_t cdw14,
+    uint32_t cdw15, void *payload, uint32_t payload_size,
     nvme_cb_fn_t cb_fn, void *cb_arg)
 {
 	struct nvme_request *req;
@@ -178,6 +179,10 @@ nvme_ctrlr_cmd_set_feature(struct nvme_controller *ctr
 	cmd->opc = NVME_OPC_SET_FEATURES;
 	cmd->cdw10 = htole32(feature);
 	cmd->cdw11 = htole32(cdw11);
+	cmd->cdw12 = htole32(cdw12);
+	cmd->cdw13 = htole32(cdw13);
+	cmd->cdw14 = htole32(cdw14);
+	cmd->cdw15 = htole32(cdw15);
 
 	nvme_ctrlr_submit_admin_request(ctrlr, req);
 }
@@ -208,7 +213,7 @@ nvme_ctrlr_cmd_set_num_queues(struct nvme_controller *
 
 	cdw11 = ((num_queues - 1) << 16) | (num_queues - 1);
 	nvme_ctrlr_cmd_set_feature(ctrlr, NVME_FEAT_NUMBER_OF_QUEUES, cdw11,
-	    NULL, 0, cb_fn, cb_arg);
+	    0, 0, 0, 0, NULL, 0, cb_fn, cb_arg);
 }
 
 void
@@ -219,8 +224,8 @@ nvme_ctrlr_cmd_set_async_event_config(struct nvme_cont
 
 	cdw11 = state;
 	nvme_ctrlr_cmd_set_feature(ctrlr,
-	    NVME_FEAT_ASYNC_EVENT_CONFIGURATION, cdw11, NULL, 0, cb_fn,
-	    cb_arg);
+	    NVME_FEAT_ASYNC_EVENT_CONFIGURATION, cdw11, 0, 0, 0, 0, NULL, 0,
+	    cb_fn, cb_arg);
 }
 
 void
@@ -245,7 +250,7 @@ nvme_ctrlr_cmd_set_interrupt_coalescing(struct nvme_co
 
 	cdw11 = ((microseconds/100) << 8) | threshold;
 	nvme_ctrlr_cmd_set_feature(ctrlr, NVME_FEAT_INTERRUPT_COALESCING, cdw11,
-	    NULL, 0, cb_fn, cb_arg);
+	    0, 0, 0, 0, NULL, 0, cb_fn, cb_arg);
 }
 
 void

Modified: stable/12/sys/dev/nvme/nvme_private.h
==============================================================================
--- stable/12/sys/dev/nvme/nvme_private.h	Wed Jan 22 01:04:26 2020	(r356960)
+++ stable/12/sys/dev/nvme/nvme_private.h	Wed Jan 22 01:08:27 2020	(r356961)
@@ -279,9 +279,6 @@ struct nvme_controller {
 	struct resource		*res;
 	void			*tag;
 
-	bus_dma_tag_t		hw_desc_tag;
-	bus_dmamap_t		hw_desc_map;
-
 	/** maximum i/o size in bytes */
 	uint32_t		max_xfer_size;
 
@@ -324,6 +321,20 @@ struct nvme_controller {
 
 	bool				is_failed;
 	STAILQ_HEAD(, nvme_request)	fail_req;
+
+	/* Host Memory Buffer */
+	int				hmb_nchunks;
+	size_t				hmb_chunk;
+	bus_dma_tag_t			hmb_tag;
+	struct nvme_hmb_chunk {
+		bus_dmamap_t		hmbc_map;
+		void			*hmbc_vaddr;
+		uint64_t		hmbc_paddr;
+	} *hmb_chunks;
+	bus_dma_tag_t			hmb_desc_tag;
+	bus_dmamap_t			hmb_desc_map;
+	struct nvme_hmb_desc		*hmb_desc_vaddr;
+	uint64_t			hmb_desc_paddr;
 };
 
 #define nvme_mmio_offsetof(reg)						       \


More information about the svn-src-all mailing list