git: 0fd4cd405bbf - main - nvme: Use controller's page size instead of PAGE_SIZE to create qpair

From: Warner Losh <imp_at_FreeBSD.org>
Date: Fri, 15 Apr 2022 20:46:51 UTC
The branch main has been updated by imp:

URL: https://cgit.FreeBSD.org/src/commit/?id=0fd4cd405bbf6b9dcc3d4fc3ddf37c876ba97c11

commit 0fd4cd405bbf6b9dcc3d4fc3ddf37c876ba97c11
Author:     Warner Losh <imp@FreeBSD.org>
AuthorDate: 2022-04-15 20:41:59 +0000
Commit:     Warner Losh <imp@FreeBSD.org>
CommitDate: 2022-04-15 20:46:19 +0000

    nvme: Use controller's page size instead of PAGE_SIZE to create qpair
    
    When constructing qpair, use the controller's notion of page size rather
    than the host's PAGE_SIZE. Currently, these are both 4k, but the arm 16k
    page size support requires decoupling.
    
    There's a "hidden" PAGE_SIZE in btoc, so we must change btoc(x) to
    howmany(x, ctrlr->page_size) to properly count the number of pages (in
    the drive's world view) are needed for various calculations.
    
    With these changes, we the nvme driver operates at production level load
    for both host 4k and host 16k page size.
    
    Sponsored by:           Netflix
    Differential Revision:  https://reviews.freebsd.org/D34873
---
 sys/dev/nvme/nvme_qpair.c | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/sys/dev/nvme/nvme_qpair.c b/sys/dev/nvme/nvme_qpair.c
index 175975b2b3ac..3b20a7e209fa 100644
--- a/sys/dev/nvme/nvme_qpair.c
+++ b/sys/dev/nvme/nvme_qpair.c
@@ -702,9 +702,10 @@ nvme_qpair_construct(struct nvme_qpair *qpair,
 
 	/* Note: NVMe PRP format is restricted to 4-byte alignment. */
 	err = bus_dma_tag_create(bus_get_dma_tag(ctrlr->dev),
-	    4, PAGE_SIZE, BUS_SPACE_MAXADDR,
+	    4, ctrlr->page_size, BUS_SPACE_MAXADDR,
 	    BUS_SPACE_MAXADDR, NULL, NULL, ctrlr->max_xfer_size,
-	    btoc(ctrlr->max_xfer_size) + 1, PAGE_SIZE, 0,
+	    howmany(ctrlr->max_xfer_size, ctrlr->page_size) + 1,
+	    ctrlr->page_size, 0,
 	    NULL, NULL, &qpair->dma_tag_payload);
 	if (err != 0) {
 		nvme_printf(ctrlr, "payload tag create failed %d\n", err);
@@ -716,20 +717,21 @@ nvme_qpair_construct(struct nvme_qpair *qpair,
 	 * cannot cross a page boundary.
 	 */
 	cmdsz = qpair->num_entries * sizeof(struct nvme_command);
-	cmdsz = roundup2(cmdsz, PAGE_SIZE);
+	cmdsz = roundup2(cmdsz, ctrlr->page_size);
 	cplsz = qpair->num_entries * sizeof(struct nvme_completion);
-	cplsz = roundup2(cplsz, PAGE_SIZE);
+	cplsz = roundup2(cplsz, ctrlr->page_size);
 	/*
 	 * For commands requiring more than 2 PRP entries, one PRP will be
 	 * embedded in the command (prp1), and the rest of the PRP entries
 	 * will be in a list pointed to by the command (prp2).
 	 */
-	prpsz = sizeof(uint64_t) * btoc(ctrlr->max_xfer_size);
+	prpsz = sizeof(uint64_t) *
+	    howmany(ctrlr->max_xfer_size, ctrlr->page_size);
 	prpmemsz = qpair->num_trackers * prpsz;
 	allocsz = cmdsz + cplsz + prpmemsz;
 
 	err = bus_dma_tag_create(bus_get_dma_tag(ctrlr->dev),
-	    PAGE_SIZE, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
+	    ctrlr->page_size, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
 	    allocsz, 1, allocsz, 0, NULL, NULL, &qpair->dma_tag);
 	if (err != 0) {
 		nvme_printf(ctrlr, "tag create failed %d\n", err);
@@ -791,13 +793,13 @@ nvme_qpair_construct(struct nvme_qpair *qpair,
 
 		/*
 		 * Make sure that the PRP list for this tracker doesn't
-		 * overflow to another page.
+		 * overflow to another nvme page.
 		 */
 		if (trunc_page(list_phys) !=
 		    trunc_page(list_phys + prpsz - 1)) {
-			list_phys = roundup2(list_phys, PAGE_SIZE);
+			list_phys = roundup2(list_phys, ctrlr->page_size);
 			prp_list =
-			    (uint8_t *)roundup2((uintptr_t)prp_list, PAGE_SIZE);
+			    (uint8_t *)roundup2((uintptr_t)prp_list, ctrlr->page_size);
 		}
 
 		tr = malloc_domainset(sizeof(*tr), M_NVME,
@@ -1101,10 +1103,9 @@ nvme_payload_map(void *arg, bus_dma_segment_t *seg, int nseg, int error)
 	}
 
 	/*
-	 * Note that we specified PAGE_SIZE for alignment and max
-	 *  segment size when creating the bus dma tags.  So here
-	 *  we can safely just transfer each segment to its
-	 *  associated PRP entry.
+	 * Note that we specified ctrlr->page_size for alignment and max
+	 * segment size when creating the bus dma tags.  So here we can safely
+	 * just transfer each segment to its associated PRP entry.
 	 */
 	tr->req->cmd.prp1 = htole64(seg[0].ds_addr);