svn commit: r309556 - stable/10/sys/dev/cxgbe/tom

Mon Dec 5 19:16:48 UTC 2016

Author: jhb
Date: Mon Dec  5 19:16:46 2016
New Revision: 309556
URL: https://svnweb.freebsd.org/changeset/base/309556

Log:
  MFC 303688,303750,305166,305167: Centralize and rework page pod handling.
  
  Note that the TOE DDP code in 10 is different from 11 and later and
  had to be updated directly.
  
  303688:
  cxgbe/t4_tom: Read the chip's DDP page sizes and save them in a
  per-adapter data structure.  This replaces a global array with hardcoded
  page sizes.
  
  303750:
  cxgbe/t4_tom: The page pod arena allocates from pod address space and
  not index space.  The minimum valid allocation out of this arena is the
  size of a single page pod.
  
  305166:
  cxgbe/t4_tom: Add general purpose routines to deal with page pod regions
  and allocations within them.  Switch to these routines to manage the TOE
  DDP region.
  
  305167:
  cxgbe/t4_tom: Two new routines to allocate and write page pods for a
  buffer in the kernel's address space.
  
  Sponsored by:	Chelsio Communications

Modified:
  stable/10/sys/dev/cxgbe/tom/t4_ddp.c
  stable/10/sys/dev/cxgbe/tom/t4_tom.c
  stable/10/sys/dev/cxgbe/tom/t4_tom.h
Directory Properties:
  stable/10/   (props changed)

Modified: stable/10/sys/dev/cxgbe/tom/t4_ddp.c
==============================================================================

--- stable/10/sys/dev/cxgbe/tom/t4_ddp.c	Mon Dec  5 19:15:33 2016	(r309555)
+++ stable/10/sys/dev/cxgbe/tom/t4_ddp.c	Mon Dec  5 19:16:46 2016	(r309556)
@@ -74,46 +74,29 @@ VNET_DECLARE(int, tcp_autorcvbuf_max);
 
 static struct mbuf *get_ddp_mbuf(int len);
 
-#define PPOD_SZ(n)	((n) * sizeof(struct pagepod))
-#define PPOD_SIZE	(PPOD_SZ(1))
-
-/* XXX: must match A_ULP_RX_TDDP_PSZ */
-static int t4_ddp_pgsz[] = {4096, 4096 << 2, 4096 << 4, 4096 << 6};
-
 #define MAX_DDP_BUFFER_SIZE		(M_TCB_RX_DDP_BUF0_LEN)
-static int
-alloc_ppods(struct tom_data *td, int n, u_int *ppod_addr)
-{
-	vmem_addr_t v;
-	int rc;
-
-	MPASS(n > 0);
-
-	rc = vmem_alloc(td->ppod_arena, PPOD_SZ(n), M_NOWAIT | M_FIRSTFIT, &v);
-	*ppod_addr = (u_int)v;
 
-	return (rc);
-}
-
-static void
-free_ppods(struct tom_data *td, u_int ppod_addr, int n)
+static struct ddp_buffer *
+alloc_ddp_buffer(vm_page_t *pages, int npages, int offset, int len)
 {
+	struct ddp_buffer *db;
 
-	MPASS(n > 0);
-
-	vmem_free(td->ppod_arena, (vmem_addr_t)ppod_addr, PPOD_SZ(n));
-}
+	db = malloc(sizeof(*db), M_CXGBE, M_NOWAIT | M_ZERO);
+	if (db == NULL) {
+		CTR1(KTR_CXGBE, "%s: malloc failed.", __func__);
+		return (NULL);
+	}
 
-static inline int
-pages_to_nppods(int npages, int ddp_pgsz)
-{
-	int nsegs = npages * PAGE_SIZE / ddp_pgsz;
+	db->npages = npages;
+	db->pages = pages;
+	db->offset = offset;
+	db->len = len;
 
-	return (howmany(nsegs, PPOD_PAGES));
+	return (db);
 }
 
 static void
-free_ddp_buffer(struct tom_data *td, struct ddp_buffer *db)
+free_ddp_buffer(struct ddp_buffer *db)
 {
 
 	if (db == NULL)
@@ -122,8 +105,8 @@ free_ddp_buffer(struct tom_data *td, str
 	if (db->pages)
 		free(db->pages, M_CXGBE);
 
-	if (db->nppods > 0)
-		free_ppods(td, db->ppod_addr, db->nppods);
+	if (db->prsv.prsv_nppods > 0)
+		t4_free_page_pods(&db->prsv);
 
 	free(db, M_CXGBE);
 }
@@ -135,7 +118,7 @@ release_ddp_resources(struct toepcb *toe
 
 	for (i = 0; i < nitems(toep->db); i++) {
 		if (toep->db[i] != NULL) {
-			free_ddp_buffer(toep->td, toep->db[i]);
+			free_ddp_buffer(toep->db[i]);
 			toep->db[i] = NULL;
 		}
 	}
@@ -302,7 +285,7 @@ mk_update_tcb_for_ddp(struct adapter *sc
 	ulpmc = mk_set_tcb_field_ulp(ulpmc, toep,
 	    W_TCB_RX_DDP_BUF0_TAG + db_idx,
 	    V_TCB_RX_DDP_BUF0_TAG(M_TCB_RX_DDP_BUF0_TAG),
-	    V_TCB_RX_DDP_BUF0_TAG(db->tag));
+	    V_TCB_RX_DDP_BUF0_TAG(db->prsv.prsv_tag));
 
 	/* Update the current offset in the DDP buffer and its total length */
 	if (db_idx == 0)
@@ -651,12 +634,52 @@ calculate_hcf(int n1, int n2)
 	return (b);
 }
 
-static struct ddp_buffer *
-alloc_ddp_buffer(struct tom_data *td, vm_page_t *pages, int npages, int offset,
-    int len)
+static inline int
+pages_to_nppods(int npages, int ddp_page_shift)
 {
-	int i, hcf, seglen, idx, ppod, nppods;
-	struct ddp_buffer *db;
+
+	MPASS(ddp_page_shift >= PAGE_SHIFT);
+
+	return (howmany(npages >> (ddp_page_shift - PAGE_SHIFT), PPOD_PAGES));
+}
+
+static int
+alloc_page_pods(struct ppod_region *pr, u_int nppods, u_int pgsz_idx,
+    struct ppod_reservation *prsv)
+{
+	vmem_addr_t addr;       /* relative to start of region */
+
+	if (vmem_alloc(pr->pr_arena, PPOD_SZ(nppods), M_NOWAIT | M_FIRSTFIT,
+	    &addr) != 0)
+		return (ENOMEM);
+
+	CTR5(KTR_CXGBE, "%-17s arena %p, addr 0x%08x, nppods %d, pgsz %d",
+	    __func__, pr->pr_arena, (uint32_t)addr & pr->pr_tag_mask,
+	    nppods, 1 << pr->pr_page_shift[pgsz_idx]);
+
+	/*
+	 * The hardware tagmask includes an extra invalid bit but the arena was
+	 * seeded with valid values only.  An allocation out of this arena will
+	 * fit inside the tagmask but won't have the invalid bit set.
+	 */
+	MPASS((addr & pr->pr_tag_mask) == addr);
+	MPASS((addr & pr->pr_invalid_bit) == 0);
+
+	prsv->prsv_pr = pr;
+	prsv->prsv_tag = V_PPOD_PGSZ(pgsz_idx) | addr;
+	prsv->prsv_nppods = nppods;
+
+	return (0);
+}
+
+int
+t4_alloc_page_pods_for_db(struct ppod_region *pr, struct ddp_buffer *db)
+{
+	int i, hcf, seglen, idx, nppods;
+	struct ppod_reservation *prsv = &db->prsv;
+
+	KASSERT(prsv->prsv_nppods == 0,
+	    ("%s: page pods already allocated", __func__));
 
 	/*
 	 * The DDP page size is unrelated to the VM page size.  We combine
@@ -666,71 +689,123 @@ alloc_ddp_buffer(struct tom_data *td, vm
 	 * the page list.
 	 */
 	hcf = 0;
-	for (i = 0; i < npages; i++) {
+	for (i = 0; i < db->npages; i++) {
 		seglen = PAGE_SIZE;
-		while (i < npages - 1 &&
-		    pages[i]->phys_addr + PAGE_SIZE == pages[i + 1]->phys_addr) {
+		while (i < db->npages - 1 &&
+		    db->pages[i]->phys_addr + PAGE_SIZE ==
+		    db->pages[i + 1]->phys_addr) {
 			seglen += PAGE_SIZE;
 			i++;
 		}
 
 		hcf = calculate_hcf(hcf, seglen);
-		if (hcf < t4_ddp_pgsz[1]) {
+		if (hcf < (1 << pr->pr_page_shift[1])) {
 			idx = 0;
 			goto have_pgsz;	/* give up, short circuit */
 		}
 	}
 
-	if (hcf % t4_ddp_pgsz[0] != 0) {
-		/* hmmm.  This could only happen when PAGE_SIZE < 4K */
-		KASSERT(PAGE_SIZE < 4096,
-		    ("%s: PAGE_SIZE %d, hcf %d", __func__, PAGE_SIZE, hcf));
-		CTR3(KTR_CXGBE, "%s: PAGE_SIZE %d, hcf %d",
-		    __func__, PAGE_SIZE, hcf);
-		return (NULL);
-	}
-
-	for (idx = nitems(t4_ddp_pgsz) - 1; idx > 0; idx--) {
-		if (hcf % t4_ddp_pgsz[idx] == 0)
+#define PR_PAGE_MASK(x) ((1 << pr->pr_page_shift[(x)]) - 1)
+	MPASS((hcf & PR_PAGE_MASK(0)) == 0); /* PAGE_SIZE is >= 4K everywhere */
+	for (idx = nitems(pr->pr_page_shift) - 1; idx > 0; idx--) {
+		if ((hcf & PR_PAGE_MASK(idx)) == 0)
 			break;
 	}
+#undef PR_PAGE_MASK
+
 have_pgsz:
 	MPASS(idx <= M_PPOD_PGSZ);
 
-	db = malloc(sizeof(*db), M_CXGBE, M_NOWAIT);
-	if (db == NULL) {
-		CTR1(KTR_CXGBE, "%s: malloc failed.", __func__);
-		return (NULL);
+	nppods = pages_to_nppods(db->npages, pr->pr_page_shift[idx]);
+	if (alloc_page_pods(pr, nppods, idx, prsv) != 0)
+		return (0);
+	MPASS(prsv->prsv_nppods > 0);
+
+	return (1);
+}
+
+int
+t4_alloc_page_pods_for_buf(struct ppod_region *pr, vm_offset_t buf, int len,
+    struct ppod_reservation *prsv)
+{
+	int hcf, seglen, idx, npages, nppods;
+	uintptr_t start_pva, end_pva, pva, p1;
+
+	MPASS(buf > 0);
+	MPASS(len > 0);
+
+	/*
+	 * The DDP page size is unrelated to the VM page size.  We combine
+	 * contiguous physical pages into larger segments to get the best DDP
+	 * page size possible.  This is the largest of the four sizes in
+	 * A_ULP_RX_ISCSI_PSZ that evenly divides the HCF of the segment sizes
+	 * in the page list.
+	 */
+	hcf = 0;
+	start_pva = trunc_page(buf);
+	end_pva = trunc_page(buf + len - 1);
+	pva = start_pva;
+	while (pva <= end_pva) {
+		seglen = PAGE_SIZE;
+		p1 = pmap_kextract(pva);
+		pva += PAGE_SIZE;
+		while (pva <= end_pva && p1 + seglen == pmap_kextract(pva)) {
+			seglen += PAGE_SIZE;
+			pva += PAGE_SIZE;
+		}
+
+		hcf = calculate_hcf(hcf, seglen);
+		if (hcf < (1 << pr->pr_page_shift[1])) {
+			idx = 0;
+			goto have_pgsz;	/* give up, short circuit */
+		}
 	}
 
-	nppods = pages_to_nppods(npages, t4_ddp_pgsz[idx]);
-	if (alloc_ppods(td, nppods, &db->ppod_addr) != 0) {
-		free(db, M_CXGBE);
-		CTR4(KTR_CXGBE, "%s: no pods, nppods %d, resid %d, pgsz %d",
-		    __func__, nppods, len, t4_ddp_pgsz[idx]);
-		return (NULL);
+#define PR_PAGE_MASK(x) ((1 << pr->pr_page_shift[(x)]) - 1)
+	MPASS((hcf & PR_PAGE_MASK(0)) == 0); /* PAGE_SIZE is >= 4K everywhere */
+	for (idx = nitems(pr->pr_page_shift) - 1; idx > 0; idx--) {
+		if ((hcf & PR_PAGE_MASK(idx)) == 0)
+			break;
 	}
-	ppod = (db->ppod_addr - td->ppod_start) / PPOD_SIZE;
+#undef PR_PAGE_MASK
 
-	db->tag = V_PPOD_PGSZ(idx) | V_PPOD_TAG(ppod);
-	db->nppods = nppods;
-	db->npages = npages;
-	db->pages = pages;
-	db->offset = offset;
-	db->len = len;
+have_pgsz:
+	MPASS(idx <= M_PPOD_PGSZ);
 
-	CTR6(KTR_CXGBE, "New DDP buffer.  "
-	    "ddp_pgsz %d, ppod 0x%x, npages %d, nppods %d, offset %d, len %d",
-	    t4_ddp_pgsz[idx], ppod, db->npages, db->nppods, db->offset,
-	    db->len);
+	npages = 1;
+	npages += (end_pva - start_pva) >> pr->pr_page_shift[idx];
+	nppods = howmany(npages, PPOD_PAGES);
+	if (alloc_page_pods(pr, nppods, idx, prsv) != 0)
+		return (ENOMEM);
+	MPASS(prsv->prsv_nppods > 0);
 
-	return (db);
+	return (0);
+}
+
+void
+t4_free_page_pods(struct ppod_reservation *prsv)
+{
+	struct ppod_region *pr = prsv->prsv_pr;
+	vmem_addr_t addr;
+
+	MPASS(prsv != NULL);
+	MPASS(prsv->prsv_nppods != 0);
+
+	addr = prsv->prsv_tag & pr->pr_tag_mask;
+	MPASS((addr & pr->pr_invalid_bit) == 0);
+
+	CTR4(KTR_CXGBE, "%-17s arena %p, addr 0x%08x, nppods %d", __func__,
+	    pr->pr_arena, addr, prsv->prsv_nppods);
+
+	vmem_free(pr->pr_arena, addr, PPOD_SZ(prsv->prsv_nppods));
+	prsv->prsv_nppods = 0;
 }
 
 #define NUM_ULP_TX_SC_IMM_PPODS (256 / PPOD_SIZE)
 
-static int
-write_page_pods(struct adapter *sc, struct toepcb *toep, struct ddp_buffer *db)
+int
+t4_write_page_pods_for_db(struct adapter *sc, struct sge_wrq *wrq, int tid,
+    struct ddp_buffer *db)
 {
 	struct wrqe *wr;
 	struct ulp_mem_io *ulpmc;
@@ -739,22 +814,26 @@ write_page_pods(struct adapter *sc, stru
 	int i, j, k, n, chunk, len, ddp_pgsz, idx;
 	u_int ppod_addr;
 	uint32_t cmd;
+	struct ppod_reservation *prsv = &db->prsv;
+	struct ppod_region *pr = prsv->prsv_pr;
+
+	MPASS(prsv->prsv_nppods > 0);
 
 	cmd = htobe32(V_ULPTX_CMD(ULP_TX_MEM_WRITE));
 	if (is_t4(sc))
 		cmd |= htobe32(F_ULP_MEMIO_ORDER);
 	else
 		cmd |= htobe32(F_T5_ULP_MEMIO_IMM);
-	ddp_pgsz = t4_ddp_pgsz[G_PPOD_PGSZ(db->tag)];
-	ppod_addr = db->ppod_addr;
-	for (i = 0; i < db->nppods; ppod_addr += chunk) {
+	ddp_pgsz = 1 << pr->pr_page_shift[G_PPOD_PGSZ(prsv->prsv_tag)];
+	ppod_addr = pr->pr_start + (prsv->prsv_tag & pr->pr_tag_mask);
+	for (i = 0; i < prsv->prsv_nppods; ppod_addr += chunk) {
 
 		/* How many page pods are we writing in this cycle */
-		n = min(db->nppods - i, NUM_ULP_TX_SC_IMM_PPODS);
+		n = min(prsv->prsv_nppods - i, NUM_ULP_TX_SC_IMM_PPODS);
 		chunk = PPOD_SZ(n);
 		len = roundup2(sizeof(*ulpmc) + sizeof(*ulpsc) + chunk, 16);
 
-		wr = alloc_wrqe(len, toep->ctrlq);
+		wr = alloc_wrqe(len, wrq);
 		if (wr == NULL)
 			return (ENOMEM);	/* ok to just bail out */
 		ulpmc = wrtod(wr);
@@ -772,7 +851,7 @@ write_page_pods(struct adapter *sc, stru
 		ppod = (struct pagepod *)(ulpsc + 1);
 		for (j = 0; j < n; i++, j++, ppod++) {
 			ppod->vld_tid_pgsz_tag_color = htobe64(F_PPOD_VALID |
-			    V_PPOD_TID(toep->tid) | db->tag);
+			    V_PPOD_TID(tid) | prsv->prsv_tag);
 			ppod->len_offset = htobe64(V_PPOD_LEN(db->len) |
 			    V_PPOD_OFST(db->offset));
 			ppod->rsvd = 0;
@@ -800,6 +879,94 @@ write_page_pods(struct adapter *sc, stru
 	return (0);
 }
 
+int
+t4_write_page_pods_for_buf(struct adapter *sc, struct sge_wrq *wrq, int tid,
+    struct ppod_reservation *prsv, vm_offset_t buf, int buflen)
+{
+	struct wrqe *wr;
+	struct ulp_mem_io *ulpmc;
+	struct ulptx_idata *ulpsc;
+	struct pagepod *ppod;
+	int i, j, k, n, chunk, len, ddp_pgsz;
+	u_int ppod_addr, offset;
+	uint32_t cmd;
+	struct ppod_region *pr = prsv->prsv_pr;
+	uintptr_t end_pva, pva, pa;
+
+	cmd = htobe32(V_ULPTX_CMD(ULP_TX_MEM_WRITE));
+	if (is_t4(sc))
+		cmd |= htobe32(F_ULP_MEMIO_ORDER);
+	else
+		cmd |= htobe32(F_T5_ULP_MEMIO_IMM);
+	ddp_pgsz = 1 << pr->pr_page_shift[G_PPOD_PGSZ(prsv->prsv_tag)];
+	offset = buf & PAGE_MASK;
+	ppod_addr = pr->pr_start + (prsv->prsv_tag & pr->pr_tag_mask);
+	pva = trunc_page(buf);
+	end_pva = trunc_page(buf + buflen - 1);
+	for (i = 0; i < prsv->prsv_nppods; ppod_addr += chunk) {
+
+		/* How many page pods are we writing in this cycle */
+		n = min(prsv->prsv_nppods - i, NUM_ULP_TX_SC_IMM_PPODS);
+		MPASS(n > 0);
+		chunk = PPOD_SZ(n);
+		len = roundup2(sizeof(*ulpmc) + sizeof(*ulpsc) + chunk, 16);
+
+		wr = alloc_wrqe(len, wrq);
+		if (wr == NULL)
+			return (ENOMEM);	/* ok to just bail out */
+		ulpmc = wrtod(wr);
+
+		INIT_ULPTX_WR(ulpmc, len, 0, 0);
+		ulpmc->cmd = cmd;
+		ulpmc->dlen = htobe32(V_ULP_MEMIO_DATA_LEN(chunk / 32));
+		ulpmc->len16 = htobe32(howmany(len - sizeof(ulpmc->wr), 16));
+		ulpmc->lock_addr = htobe32(V_ULP_MEMIO_ADDR(ppod_addr >> 5));
+
+		ulpsc = (struct ulptx_idata *)(ulpmc + 1);
+		ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM));
+		ulpsc->len = htobe32(chunk);
+
+		ppod = (struct pagepod *)(ulpsc + 1);
+		for (j = 0; j < n; i++, j++, ppod++) {
+			ppod->vld_tid_pgsz_tag_color = htobe64(F_PPOD_VALID |
+			    V_PPOD_TID(tid) |
+			    (prsv->prsv_tag & ~V_PPOD_PGSZ(M_PPOD_PGSZ)));
+			ppod->len_offset = htobe64(V_PPOD_LEN(buflen) |
+			    V_PPOD_OFST(offset));
+			ppod->rsvd = 0;
+
+			for (k = 0; k < nitems(ppod->addr); k++) {
+				if (pva > end_pva)
+					ppod->addr[k] = 0;
+				else {
+					pa = pmap_kextract(pva);
+					ppod->addr[k] = htobe64(pa);
+					pva += ddp_pgsz;
+				}
+#if 0
+				CTR5(KTR_CXGBE,
+				    "%s: tid %d ppod[%d]->addr[%d] = %p",
+				    __func__, tid, i, k,
+				    htobe64(ppod->addr[k]));
+#endif
+			}
+
+			/*
+			 * Walk back 1 segment so that the first address in the
+			 * next pod is the same as the last one in the current
+			 * pod.
+			 */
+			pva -= ddp_pgsz;
+		}
+
+		t4_wrq_tx(sc, wr);
+	}
+
+	MPASS(pva <= end_pva);
+
+	return (0);
+}
+
 /*
  * Reuse, or allocate (and program the page pods for) a new DDP buffer.  The
  * "pages" array is handed over to this function and should not be used in any
@@ -823,27 +990,32 @@ select_ddp_buffer(struct adapter *sc, st
 	}
 
 	/* Allocate new buffer, write its page pods. */
-	db = alloc_ddp_buffer(td, pages, npages, db_off, db_len);
+	db = alloc_ddp_buffer(pages, npages, db_off, db_len);
 	if (db == NULL) {
 		vm_page_unhold_pages(pages, npages);
 		free(pages, M_CXGBE);
 		return (-1);
 	}
-	if (write_page_pods(sc, toep, db) != 0) {
+	if (t4_alloc_page_pods_for_db(&td->pr, db)) {
 		vm_page_unhold_pages(pages, npages);
-		free_ddp_buffer(td, db);
+		free_ddp_buffer(db);
+		return (-1);
+	}
+	if (t4_write_page_pods_for_db(sc, toep->ctrlq, toep->tid, db) != 0) {
+		vm_page_unhold_pages(pages, npages);
+		free_ddp_buffer(db);
 		return (-1);
 	}
 
 	i = empty_slot;
 	if (i < 0) {
 		i = arc4random() % nitems(toep->db);
-		free_ddp_buffer(td, toep->db[i]);
+		free_ddp_buffer(toep->db[i]);
 	}
 	toep->db[i] = db;
 
 	CTR5(KTR_CXGBE, "%s: tid %d, DDP buffer[%d] = %p (tag 0x%x)",
-	    __func__, toep->tid, i, db, db->tag);
+	    __func__, toep->tid, i, db, db->prsv.prsv_tag);
 
 	return (i);
 }
@@ -975,23 +1147,52 @@ no_ddp:
 	return (0);
 }
 
-void
-t4_init_ddp(struct adapter *sc, struct tom_data *td)
+int
+t4_init_ppod_region(struct ppod_region *pr, struct t4_range *r, u_int psz,
+    const char *name)
 {
+	int i;
+
+	MPASS(pr != NULL);
+	MPASS(r->size > 0);
 
-	td->ppod_start = sc->vres.ddp.start;
-	td->ppod_arena = vmem_create("DDP page pods", sc->vres.ddp.start,
-	    sc->vres.ddp.size, 1, 32, M_FIRSTFIT | M_NOWAIT);
+	pr->pr_start = r->start;
+	pr->pr_len = r->size;
+	pr->pr_page_shift[0] = 12 + G_HPZ0(psz);
+	pr->pr_page_shift[1] = 12 + G_HPZ1(psz);
+	pr->pr_page_shift[2] = 12 + G_HPZ2(psz);
+	pr->pr_page_shift[3] = 12 + G_HPZ3(psz);
+
+	/* The SGL -> page pod algorithm requires the sizes to be in order. */
+	for (i = 1; i < nitems(pr->pr_page_shift); i++) {
+		if (pr->pr_page_shift[i] <= pr->pr_page_shift[i - 1])
+			return (ENXIO);
+	}
+
+	pr->pr_tag_mask = ((1 << fls(r->size)) - 1) & V_PPOD_TAG(M_PPOD_TAG);
+	pr->pr_alias_mask = V_PPOD_TAG(M_PPOD_TAG) & ~pr->pr_tag_mask;
+	if (pr->pr_tag_mask == 0 || pr->pr_alias_mask == 0)
+		return (ENXIO);
+	pr->pr_alias_shift = fls(pr->pr_tag_mask);
+	pr->pr_invalid_bit = 1 << (pr->pr_alias_shift - 1);
+
+	pr->pr_arena = vmem_create(name, 0, pr->pr_len, PPOD_SIZE, 0,
+	    M_FIRSTFIT | M_NOWAIT);
+	if (pr->pr_arena == NULL)
+		return (ENOMEM);
+
+	return (0);
 }
 
 void
-t4_uninit_ddp(struct adapter *sc __unused, struct tom_data *td)
+t4_free_ppod_region(struct ppod_region *pr)
 {
 
-	if (td->ppod_arena != NULL) {
-		vmem_destroy(td->ppod_arena);
-		td->ppod_arena = NULL;
-	}
+	MPASS(pr != NULL);
+
+	if (pr->pr_arena)
+		vmem_destroy(pr->pr_arena);
+	bzero(pr, sizeof(*pr));
 }
 
 #define	VNET_SO_ASSERT(so)						\

Modified: stable/10/sys/dev/cxgbe/tom/t4_tom.c
==============================================================================
--- stable/10/sys/dev/cxgbe/tom/t4_tom.c	Mon Dec  5 19:15:33 2016	(r309555)
+++ stable/10/sys/dev/cxgbe/tom/t4_tom.c	Mon Dec  5 19:16:46 2016	(r309556)
@@ -942,7 +942,7 @@ free_tom_data(struct adapter *sc, struct
 	KASSERT(td->lctx_count == 0,
 	    ("%s: lctx hash table is not empty.", __func__));
 
-	t4_uninit_ddp(sc, td);
+	t4_free_ppod_region(&td->pr);
 	destroy_clip_table(sc, td);
 
 	if (td->listen_mask != 0)
@@ -1036,8 +1036,12 @@ t4_tom_activate(struct adapter *sc)
 	if (rc != 0)
 		goto done;
 
-	/* DDP page pods and CPL handlers */
-	t4_init_ddp(sc, td);
+	rc = t4_init_ppod_region(&td->pr, &sc->vres.ddp,
+	    t4_read_reg(sc, A_ULP_RX_TDDP_PSZ), "TDDP page pods");
+	if (rc != 0)
+		goto done;
+	t4_set_reg_field(sc, A_ULP_RX_TDDP_TAGMASK,
+	    V_TDDPTAGMASK(M_TDDPTAGMASK), td->pr.pr_tag_mask);
 
 	/* CLIP table for IPv6 offload */
 	init_clip_table(sc, td);

Modified: stable/10/sys/dev/cxgbe/tom/t4_tom.h
==============================================================================
--- stable/10/sys/dev/cxgbe/tom/t4_tom.h	Mon Dec  5 19:15:33 2016	(r309555)
+++ stable/10/sys/dev/cxgbe/tom/t4_tom.h	Mon Dec  5 19:16:46 2016	(r309556)
@@ -60,6 +60,9 @@ struct mbufq {
 
 #define USE_DDP_RX_FLOW_CONTROL
 
+#define PPOD_SZ(n)	((n) * sizeof(struct pagepod))
+#define PPOD_SIZE	(PPOD_SZ(1))
+
 /* TOE PCB flags */
 enum {
 	TPF_ATTACHED	   = (1 << 0),	/* a tcpcb refers to this toepcb */
@@ -90,14 +93,29 @@ struct ofld_tx_sdesc {
 	uint8_t tx_credits;	/* firmware tx credits (unit is 16B) */
 };
 
+struct ppod_region {
+	u_int pr_start;
+	u_int pr_len;
+	u_int pr_page_shift[4];
+	uint32_t pr_tag_mask;		/* hardware tagmask for this region. */
+	uint32_t pr_invalid_bit;	/* OR with this to invalidate tag. */
+	uint32_t pr_alias_mask;		/* AND with tag to get alias bits. */
+	u_int pr_alias_shift;		/* shift this much for first alias bit. */
+	vmem_t *pr_arena;
+};
+
+struct ppod_reservation {
+	struct ppod_region *prsv_pr;
+	uint32_t prsv_tag;		/* Full tag: pgsz, alias, tag, color */
+	u_int prsv_nppods;
+};
+
 struct ddp_buffer {
-	uint32_t tag;	/* includes color, page pod addr, and DDP page size */
-	u_int ppod_addr;
-	int nppods;
 	int offset;
 	int len;
 	int npages;
 	vm_page_t *pages;
+	struct ppod_reservation prsv;
 };
 
 struct toepcb {
@@ -206,8 +224,7 @@ struct tom_data {
 	u_long listen_mask;
 	int lctx_count;		/* # of lctx in the hash table */
 
-	u_int ppod_start;
-	vmem_t *ppod_arena;
+	struct ppod_region pr;
 
 	struct mtx clip_table_lock;
 	struct clip_head clip_table;
@@ -314,8 +331,17 @@ void t4_push_pdus(struct adapter *sc, st
 int do_set_tcb_rpl(struct sge_iq *, const struct rss_header *, struct mbuf *);
 
 /* t4_ddp.c */
-void t4_init_ddp(struct adapter *, struct tom_data *);
-void t4_uninit_ddp(struct adapter *, struct tom_data *);
+int t4_init_ppod_region(struct ppod_region *, struct t4_range *, u_int,
+    const char *);
+void t4_free_ppod_region(struct ppod_region *);
+int t4_alloc_page_pods_for_db(struct ppod_region *, struct ddp_buffer *);
+int t4_alloc_page_pods_for_buf(struct ppod_region *, vm_offset_t, int,
+    struct ppod_reservation *);
+int t4_write_page_pods_for_db(struct adapter *, struct sge_wrq *, int,
+    struct ddp_buffer *);
+int t4_write_page_pods_for_buf(struct adapter *, struct sge_wrq *, int tid,
+    struct ppod_reservation *, vm_offset_t, int);
+void t4_free_page_pods(struct ppod_reservation *);
 int t4_soreceive_ddp(struct socket *, struct sockaddr **, struct uio *,
     struct mbuf **, struct mbuf **, int *);
 int t4_ddp_mod_load(void);