svn commit: r368132 - head/sys/dev/nvme

Alexander Motin mav at FreeBSD.org
Sun Nov 29 00:20:32 UTC 2020


Author: mav
Date: Sun Nov 29 00:20:31 2020
New Revision: 368132
URL: https://svnweb.freebsd.org/changeset/base/368132

Log:
  Increase nvme(4) maximum transfer size from 1MB to 2MB.
  
  With 4KB page size the 2MB is the maximum we can address with one page PRP.
  Going further would require chaining, that would add some more complexity.
  
  On the other side, to reduce memory consumption, allocate the PRP memory
  respecting maximum transfer size reported in the controller identify data.
  Many of NVMe devices support much smaller values, starting from 128KB.
  To do that we have to change the initialization sequence to pull the data
  earlier, before setting up the I/O queue pairs.  The admin queue pair is
  still allocated for full MIN(maxphys, 2MB) size, but it is not a big deal,
  since there is only one such queue with only 16 trackers.
  
  Reviewed by:	imp
  MFC after:	2 weeks
  Sponsored by:	iXsystems, Inc.

Modified:
  head/sys/dev/nvme/nvme.h
  head/sys/dev/nvme/nvme_ctrlr.c
  head/sys/dev/nvme/nvme_private.h
  head/sys/dev/nvme/nvme_qpair.c

Modified: head/sys/dev/nvme/nvme.h
==============================================================================
--- head/sys/dev/nvme/nvme.h	Sat Nov 28 23:24:19 2020	(r368131)
+++ head/sys/dev/nvme/nvme.h	Sun Nov 29 00:20:31 2020	(r368132)
@@ -59,8 +59,8 @@
  */
 #define NVME_GLOBAL_NAMESPACE_TAG	((uint32_t)0xFFFFFFFF)
 
-/* Cap nvme to 1MB transfers driver explodes with larger sizes */
-#define NVME_MAX_XFER_SIZE		(maxphys < (1<<20) ? maxphys : (1<<20))
+/* Cap transfers by the maximum addressable by page-sized PRP (4KB -> 2MB). */
+#define NVME_MAX_XFER_SIZE		MIN(maxphys, (PAGE_SIZE/8*PAGE_SIZE))
 
 /* Register field definitions */
 #define NVME_CAP_LO_REG_MQES_SHIFT			(0)

Modified: head/sys/dev/nvme/nvme_ctrlr.c
==============================================================================
--- head/sys/dev/nvme/nvme_ctrlr.c	Sat Nov 28 23:24:19 2020	(r368131)
+++ head/sys/dev/nvme/nvme_ctrlr.c	Sun Nov 29 00:20:31 2020	(r368132)
@@ -1053,16 +1053,16 @@ nvme_ctrlr_start(void *ctrlr_arg, bool resetting)
 	 *  the number of I/O queues supported, so cannot reset
 	 *  the adminq again here.
 	 */
-	if (resetting)
+	if (resetting) {
 		nvme_qpair_reset(&ctrlr->adminq);
+		nvme_admin_qpair_enable(&ctrlr->adminq);
+	}
 
 	if (ctrlr->ioq != NULL) {
 		for (i = 0; i < ctrlr->num_io_queues; i++)
 			nvme_qpair_reset(&ctrlr->ioq[i]);
 	}
 
-	nvme_admin_qpair_enable(&ctrlr->adminq);
-
 	/*
 	 * If it was a reset on initialization command timeout, just
 	 * return here, letting initialization code fail gracefully.
@@ -1070,7 +1070,7 @@ nvme_ctrlr_start(void *ctrlr_arg, bool resetting)
 	if (resetting && !ctrlr->is_initialized)
 		return;
 
-	if (nvme_ctrlr_identify(ctrlr) != 0) {
+	if (resetting && nvme_ctrlr_identify(ctrlr) != 0) {
 		nvme_ctrlr_fail(ctrlr);
 		return;
 	}
@@ -1145,7 +1145,8 @@ fail:
 	nvme_qpair_reset(&ctrlr->adminq);
 	nvme_admin_qpair_enable(&ctrlr->adminq);
 
-	if (nvme_ctrlr_set_num_qpairs(ctrlr) == 0 &&
+	if (nvme_ctrlr_identify(ctrlr) == 0 &&
+	    nvme_ctrlr_set_num_qpairs(ctrlr) == 0 &&
 	    nvme_ctrlr_construct_io_qpairs(ctrlr) == 0)
 		nvme_ctrlr_start(ctrlr, false);
 	else

Modified: head/sys/dev/nvme/nvme_private.h
==============================================================================
--- head/sys/dev/nvme/nvme_private.h	Sat Nov 28 23:24:19 2020	(r368131)
+++ head/sys/dev/nvme/nvme_private.h	Sun Nov 29 00:20:31 2020	(r368132)
@@ -56,15 +56,6 @@ MALLOC_DECLARE(M_NVME);
 #define IDT32_PCI_ID		0x80d0111d /* 32 channel board */
 #define IDT8_PCI_ID		0x80d2111d /* 8 channel board */
 
-/*
- * For commands requiring more than 2 PRP entries, one PRP will be
- *  embedded in the command (prp1), and the rest of the PRP entries
- *  will be in a list pointed to by the command (prp2).  This means
- *  that real max number of PRP entries we support is 32+1, which
- *  results in a max xfer size of 32*PAGE_SIZE.
- */
-#define NVME_MAX_PRP_LIST_ENTRIES	(NVME_MAX_XFER_SIZE / PAGE_SIZE)
-
 #define NVME_ADMIN_TRACKERS	(16)
 #define NVME_ADMIN_ENTRIES	(128)
 /* min and max are defined in admin queue attributes section of spec */

Modified: head/sys/dev/nvme/nvme_qpair.c
==============================================================================
--- head/sys/dev/nvme/nvme_qpair.c	Sat Nov 28 23:24:19 2020	(r368131)
+++ head/sys/dev/nvme/nvme_qpair.c	Sun Nov 29 00:20:31 2020	(r368132)
@@ -687,8 +687,8 @@ nvme_qpair_construct(struct nvme_qpair *qpair,
 	/* Note: NVMe PRP format is restricted to 4-byte alignment. */
 	err = bus_dma_tag_create(bus_get_dma_tag(ctrlr->dev),
 	    4, PAGE_SIZE, BUS_SPACE_MAXADDR,
-	    BUS_SPACE_MAXADDR, NULL, NULL, NVME_MAX_XFER_SIZE,
-	    (NVME_MAX_XFER_SIZE/PAGE_SIZE)+1, PAGE_SIZE, 0,
+	    BUS_SPACE_MAXADDR, NULL, NULL, ctrlr->max_xfer_size,
+	    btoc(ctrlr->max_xfer_size) + 1, PAGE_SIZE, 0,
 	    NULL, NULL, &qpair->dma_tag_payload);
 	if (err != 0) {
 		nvme_printf(ctrlr, "payload tag create failed %d\n", err);
@@ -703,7 +703,12 @@ nvme_qpair_construct(struct nvme_qpair *qpair,
 	cmdsz = roundup2(cmdsz, PAGE_SIZE);
 	cplsz = qpair->num_entries * sizeof(struct nvme_completion);
 	cplsz = roundup2(cplsz, PAGE_SIZE);
-	prpsz = sizeof(uint64_t) * NVME_MAX_PRP_LIST_ENTRIES;
+	/*
+	 * For commands requiring more than 2 PRP entries, one PRP will be
+	 * embedded in the command (prp1), and the rest of the PRP entries
+	 * will be in a list pointed to by the command (prp2).
+	 */
+	prpsz = sizeof(uint64_t) * btoc(ctrlr->max_xfer_size);
 	prpmemsz = qpair->num_trackers * prpsz;
 	allocsz = cmdsz + cplsz + prpmemsz;
 


More information about the svn-src-all mailing list