From nobody Fri Oct 29 23:58:02 2021 X-Original-To: dev-commits-src-branches@mlmmj.nyi.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2610:1c1:1:606c::19:1]) by mlmmj.nyi.freebsd.org (Postfix) with ESMTP id 61523181CD8D; Fri, 29 Oct 2021 23:58:02 +0000 (UTC) (envelope-from git@FreeBSD.org) Received: from mxrelay.nyi.freebsd.org (mxrelay.nyi.freebsd.org [IPv6:2610:1c1:1:606c::19:3]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256 client-signature RSA-PSS (4096 bits) client-digest SHA256) (Client CN "mxrelay.nyi.freebsd.org", Issuer "R3" (verified OK)) by mx1.freebsd.org (Postfix) with ESMTPS id 4Hgzrp1d0bz4V2h; Fri, 29 Oct 2021 23:58:02 +0000 (UTC) (envelope-from git@FreeBSD.org) Received: from gitrepo.freebsd.org (gitrepo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:5]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256) (Client did not present a certificate) by mxrelay.nyi.freebsd.org (Postfix) with ESMTPS id 10E39205AE; Fri, 29 Oct 2021 23:58:02 +0000 (UTC) (envelope-from git@FreeBSD.org) Received: from gitrepo.freebsd.org ([127.0.1.44]) by gitrepo.freebsd.org (8.16.1/8.16.1) with ESMTP id 19TNw2Rd003208; Fri, 29 Oct 2021 23:58:02 GMT (envelope-from git@gitrepo.freebsd.org) Received: (from git@localhost) by gitrepo.freebsd.org (8.16.1/8.16.1/Submit) id 19TNw2Xp003207; Fri, 29 Oct 2021 23:58:02 GMT (envelope-from git) Date: Fri, 29 Oct 2021 23:58:02 GMT Message-Id: <202110292358.19TNw2Xp003207@gitrepo.freebsd.org> To: src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-branches@FreeBSD.org From: John Baldwin Subject: git: 8e90bce009bb - stable/13 - cxgbe tom: Set the tid in the work requests to program page pods for iSCSI. List-Id: Commits to the stable branches of the FreeBSD src repository List-Archive: https://lists.freebsd.org/archives/dev-commits-src-branches List-Help: List-Post: List-Subscribe: List-Unsubscribe: Sender: owner-dev-commits-src-branches@freebsd.org X-BeenThere: dev-commits-src-branches@freebsd.org MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit X-Git-Committer: jhb X-Git-Repository: src X-Git-Refname: refs/heads/stable/13 X-Git-Reftype: branch X-Git-Commit: 8e90bce009bb6d931e7e259fd25cbd277d863fc5 Auto-Submitted: auto-generated X-ThisMailContainsUnwantedMimeParts: N The branch stable/13 has been updated by jhb: URL: https://cgit.FreeBSD.org/src/commit/?id=8e90bce009bb6d931e7e259fd25cbd277d863fc5 commit 8e90bce009bb6d931e7e259fd25cbd277d863fc5 Author: John Baldwin AuthorDate: 2021-05-14 19:16:40 +0000 Commit: John Baldwin CommitDate: 2021-10-29 22:40:55 +0000 cxgbe tom: Set the tid in the work requests to program page pods for iSCSI. As a result, CPL_FW4_ACK now returns credits for these work requests. To support this, page pod work requests are now constructed in special mbufs similar to "raw" mbufs used for NIC TLS in plain TX queues. These special mbufs are stored in the ulp_pduq and dispatched in order with PDU work requests. Sponsored by: Chelsio Communications Discussed with: np Differential Revision: https://reviews.freebsd.org/D29904 (cherry picked from commit 4427ac3675f91df039d54a23518132e0e0fede86) --- sys/dev/cxgbe/cxgbei/icl_cxgbei.c | 7 +- sys/dev/cxgbe/tom/t4_cpl_io.c | 243 ++++++++++++++++++++++---------------- sys/dev/cxgbe/tom/t4_ddp.c | 48 ++++++-- sys/dev/cxgbe/tom/t4_tom.h | 18 ++- 4 files changed, 198 insertions(+), 118 deletions(-) diff --git a/sys/dev/cxgbe/cxgbei/icl_cxgbei.c b/sys/dev/cxgbe/cxgbei/icl_cxgbei.c index 961acfb31987..4e168a33e2ca 100644 --- a/sys/dev/cxgbe/cxgbei/icl_cxgbei.c +++ b/sys/dev/cxgbe/cxgbei/icl_cxgbei.c @@ -844,8 +844,8 @@ no_ddp: goto no_ddp; } - rc = t4_write_page_pods_for_buf(sc, &toep->ofld_txq->wrq, toep->tid, - prsv, (vm_offset_t)csio->data_ptr, csio->dxfer_len); + rc = t4_write_page_pods_for_buf(sc, toep, prsv, + (vm_offset_t)csio->data_ptr, csio->dxfer_len); if (rc != 0) { t4_free_page_pods(prsv); uma_zfree(prsv_zone, prsv); @@ -959,8 +959,7 @@ no_ddp: goto no_ddp; } - rc = t4_write_page_pods_for_buf(sc, &toep->ofld_txq->wrq, - toep->tid, prsv, buf, xferlen); + rc = t4_write_page_pods_for_buf(sc, toep, prsv, buf, xferlen); if (rc != 0) { t4_free_page_pods(prsv); uma_zfree(prsv_zone, prsv); diff --git a/sys/dev/cxgbe/tom/t4_cpl_io.c b/sys/dev/cxgbe/tom/t4_cpl_io.c index 9168547494f1..b2150558165d 100644 --- a/sys/dev/cxgbe/tom/t4_cpl_io.c +++ b/sys/dev/cxgbe/tom/t4_cpl_io.c @@ -927,10 +927,10 @@ rqdrop_locked(struct mbufq *q, int plen) } } -void -t4_push_pdus(struct adapter *sc, struct toepcb *toep, int drop) +static struct wrqe * +write_iscsi_mbuf_wr(struct toepcb *toep, struct mbuf *sndptr) { - struct mbuf *sndptr, *m; + struct mbuf *m; struct fw_ofld_tx_data_wr *txwr; struct wrqe *wr; u_int plen, nsegs, credits, max_imm, max_nsegs, max_nsegs_1mbuf; @@ -938,9 +938,131 @@ t4_push_pdus(struct adapter *sc, struct toepcb *toep, int drop) struct inpcb *inp = toep->inp; struct tcpcb *tp = intotcpcb(inp); int tx_credits, shove; + static const u_int ulp_extra_len[] = {0, 4, 4, 8}; + + M_ASSERTPKTHDR(sndptr); + + tx_credits = min(toep->tx_credits, MAX_OFLD_TX_CREDITS); + if (mbuf_raw_wr(sndptr)) { + plen = sndptr->m_pkthdr.len; + KASSERT(plen <= SGE_MAX_WR_LEN, + ("raw WR len %u is greater than max WR len", plen)); + if (plen > tx_credits * 16) + return (NULL); + + wr = alloc_wrqe(roundup2(plen, 16), &toep->ofld_txq->wrq); + if (__predict_false(wr == NULL)) + return (NULL); + + m_copydata(sndptr, 0, plen, wrtod(wr)); + return (wr); + } + + max_imm = max_imm_payload(tx_credits); + max_nsegs = max_dsgl_nsegs(tx_credits); + + plen = 0; + nsegs = 0; + max_nsegs_1mbuf = 0; /* max # of SGL segments in any one mbuf */ + for (m = sndptr; m != NULL; m = m->m_next) { + int n = sglist_count(mtod(m, void *), m->m_len); + + nsegs += n; + plen += m->m_len; + + /* + * This mbuf would send us _over_ the nsegs limit. + * Suspend tx because the PDU can't be sent out. + */ + if (plen > max_imm && nsegs > max_nsegs) + return (NULL); + + if (max_nsegs_1mbuf < n) + max_nsegs_1mbuf = n; + } + + if (__predict_false(toep->flags & TPF_FIN_SENT)) + panic("%s: excess tx.", __func__); + + /* + * We have a PDU to send. All of it goes out in one WR so 'm' + * is NULL. A PDU's length is always a multiple of 4. + */ + MPASS(m == NULL); + MPASS((plen & 3) == 0); + MPASS(sndptr->m_pkthdr.len == plen); + + shove = !(tp->t_flags & TF_MORETOCOME); + ulp_submode = mbuf_ulp_submode(sndptr); + MPASS(ulp_submode < nitems(ulp_extra_len)); + + /* + * plen doesn't include header and data digests, which are + * generated and inserted in the right places by the TOE, but + * they do occupy TCP sequence space and need to be accounted + * for. + */ + adjusted_plen = plen + ulp_extra_len[ulp_submode]; + if (plen <= max_imm) { + + /* Immediate data tx */ + + wr = alloc_wrqe(roundup2(sizeof(*txwr) + plen, 16), + &toep->ofld_txq->wrq); + if (wr == NULL) { + /* XXX: how will we recover from this? */ + return (NULL); + } + txwr = wrtod(wr); + credits = howmany(wr->wr_len, 16); + write_tx_wr(txwr, toep, plen, adjusted_plen, credits, + shove, ulp_submode); + m_copydata(sndptr, 0, plen, (void *)(txwr + 1)); + nsegs = 0; + } else { + int wr_len; + + /* DSGL tx */ + wr_len = sizeof(*txwr) + sizeof(struct ulptx_sgl) + + ((3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1)) * 8; + wr = alloc_wrqe(roundup2(wr_len, 16), + &toep->ofld_txq->wrq); + if (wr == NULL) { + /* XXX: how will we recover from this? */ + return (NULL); + } + txwr = wrtod(wr); + credits = howmany(wr_len, 16); + write_tx_wr(txwr, toep, 0, adjusted_plen, credits, + shove, ulp_submode); + write_tx_sgl(txwr + 1, sndptr, m, nsegs, max_nsegs_1mbuf); + if (wr_len & 0xf) { + uint64_t *pad = (uint64_t *)((uintptr_t)txwr + wr_len); + *pad = 0; + } + } + + tp->snd_nxt += adjusted_plen; + tp->snd_max += adjusted_plen; + + counter_u64_add(toep->ofld_txq->tx_iscsi_pdus, 1); + counter_u64_add(toep->ofld_txq->tx_iscsi_octets, plen); + + return (wr); +} + +void +t4_push_pdus(struct adapter *sc, struct toepcb *toep, int drop) +{ + struct mbuf *sndptr, *m; + struct fw_wr_hdr *wrhdr; + struct wrqe *wr; + u_int plen, credits; +#ifdef INVARIANTS + struct inpcb *inp = toep->inp; +#endif struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx]; struct mbufq *pduq = &toep->ulp_pduq; - static const u_int ulp_extra_len[] = {0, 4, 4, 8}; INP_WLOCK_ASSERT(inp); KASSERT(toep->flags & TPF_FLOWC_WR_SENT, @@ -965,99 +1087,14 @@ t4_push_pdus(struct adapter *sc, struct toepcb *toep, int drop) rqdrop_locked(&toep->ulp_pdu_reclaimq, drop); while ((sndptr = mbufq_first(pduq)) != NULL) { - M_ASSERTPKTHDR(sndptr); - - tx_credits = min(toep->tx_credits, MAX_OFLD_TX_CREDITS); - max_imm = max_imm_payload(tx_credits); - max_nsegs = max_dsgl_nsegs(tx_credits); - - plen = 0; - nsegs = 0; - max_nsegs_1mbuf = 0; /* max # of SGL segments in any one mbuf */ - for (m = sndptr; m != NULL; m = m->m_next) { - int n = sglist_count(mtod(m, void *), m->m_len); - - nsegs += n; - plen += m->m_len; - - /* - * This mbuf would send us _over_ the nsegs limit. - * Suspend tx because the PDU can't be sent out. - */ - if (plen > max_imm && nsegs > max_nsegs) { - toep->flags |= TPF_TX_SUSPENDED; - return; - } - - if (max_nsegs_1mbuf < n) - max_nsegs_1mbuf = n; - } - - if (__predict_false(toep->flags & TPF_FIN_SENT)) - panic("%s: excess tx.", __func__); - - /* - * We have a PDU to send. All of it goes out in one WR so 'm' - * is NULL. A PDU's length is always a multiple of 4. - */ - MPASS(m == NULL); - MPASS((plen & 3) == 0); - MPASS(sndptr->m_pkthdr.len == plen); - - shove = !(tp->t_flags & TF_MORETOCOME); - ulp_submode = mbuf_ulp_submode(sndptr); - MPASS(ulp_submode < nitems(ulp_extra_len)); - - /* - * plen doesn't include header and data digests, which are - * generated and inserted in the right places by the TOE, but - * they do occupy TCP sequence space and need to be accounted - * for. - */ - adjusted_plen = plen + ulp_extra_len[ulp_submode]; - if (plen <= max_imm) { - - /* Immediate data tx */ - - wr = alloc_wrqe(roundup2(sizeof(*txwr) + plen, 16), - &toep->ofld_txq->wrq); - if (wr == NULL) { - /* XXX: how will we recover from this? */ - toep->flags |= TPF_TX_SUSPENDED; - return; - } - txwr = wrtod(wr); - credits = howmany(wr->wr_len, 16); - write_tx_wr(txwr, toep, plen, adjusted_plen, credits, - shove, ulp_submode); - m_copydata(sndptr, 0, plen, (void *)(txwr + 1)); - nsegs = 0; - } else { - int wr_len; - - /* DSGL tx */ - wr_len = sizeof(*txwr) + sizeof(struct ulptx_sgl) + - ((3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1)) * 8; - wr = alloc_wrqe(roundup2(wr_len, 16), - &toep->ofld_txq->wrq); - if (wr == NULL) { - /* XXX: how will we recover from this? */ - toep->flags |= TPF_TX_SUSPENDED; - return; - } - txwr = wrtod(wr); - credits = howmany(wr_len, 16); - write_tx_wr(txwr, toep, 0, adjusted_plen, credits, - shove, ulp_submode); - write_tx_sgl(txwr + 1, sndptr, m, nsegs, - max_nsegs_1mbuf); - if (wr_len & 0xf) { - uint64_t *pad = (uint64_t *) - ((uintptr_t)txwr + wr_len); - *pad = 0; - } + wr = write_iscsi_mbuf_wr(toep, sndptr); + if (wr == NULL) { + toep->flags |= TPF_TX_SUSPENDED; + return; } + plen = sndptr->m_pkthdr.len; + credits = howmany(wr->wr_len, 16); KASSERT(toep->tx_credits >= credits, ("%s: not enough credits", __func__)); @@ -1068,16 +1105,19 @@ t4_push_pdus(struct adapter *sc, struct toepcb *toep, int drop) toep->tx_credits -= credits; toep->tx_nocompl += credits; toep->plen_nocompl += plen; - if (toep->tx_credits <= toep->tx_total * 3 / 8 && + + /* + * Ensure there are enough credits for a full-sized WR + * as page pod WRs can be full-sized. + */ + if (toep->tx_credits <= SGE_MAX_WR_LEN * 5 / 4 && toep->tx_nocompl >= toep->tx_total / 4) { - txwr->op_to_immdlen |= htobe32(F_FW_WR_COMPL); + wrhdr = wrtod(wr); + wrhdr->hi |= htobe32(F_FW_WR_COMPL); toep->tx_nocompl = 0; toep->plen_nocompl = 0; } - tp->snd_nxt += adjusted_plen; - tp->snd_max += adjusted_plen; - toep->flags |= TPF_TX_DATA_SENT; if (toep->tx_credits < MIN_OFLD_TX_CREDITS) toep->flags |= TPF_TX_SUSPENDED; @@ -1092,9 +1132,6 @@ t4_push_pdus(struct adapter *sc, struct toepcb *toep, int drop) } toep->txsd_avail--; - counter_u64_add(toep->ofld_txq->tx_iscsi_pdus, 1); - counter_u64_add(toep->ofld_txq->tx_iscsi_octets, plen); - t4_l2t_send(sc, wr, toep->l2te); } diff --git a/sys/dev/cxgbe/tom/t4_ddp.c b/sys/dev/cxgbe/tom/t4_ddp.c index b0d53dd63997..e87d013a0453 100644 --- a/sys/dev/cxgbe/tom/t4_ddp.c +++ b/sys/dev/cxgbe/tom/t4_ddp.c @@ -1081,11 +1081,30 @@ t4_write_page_pods_for_ps(struct adapter *sc, struct sge_wrq *wrq, int tid, return (0); } +static struct mbuf * +alloc_raw_wr_mbuf(int len) +{ + struct mbuf *m; + + if (len <= MHLEN) + m = m_gethdr(M_NOWAIT, MT_DATA); + else if (len <= MCLBYTES) + m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); + else + m = NULL; + if (m == NULL) + return (NULL); + m->m_pkthdr.len = len; + m->m_len = len; + set_mbuf_raw_wr(m, true); + return (m); +} + int -t4_write_page_pods_for_buf(struct adapter *sc, struct sge_wrq *wrq, int tid, +t4_write_page_pods_for_buf(struct adapter *sc, struct toepcb *toep, struct ppod_reservation *prsv, vm_offset_t buf, int buflen) { - struct wrqe *wr; + struct inpcb *inp = toep->inp; struct ulp_mem_io *ulpmc; struct ulptx_idata *ulpsc; struct pagepod *ppod; @@ -1094,6 +1113,8 @@ t4_write_page_pods_for_buf(struct adapter *sc, struct sge_wrq *wrq, int tid, uint32_t cmd; struct ppod_region *pr = prsv->prsv_pr; uintptr_t end_pva, pva, pa; + struct mbuf *m; + struct mbufq wrq; cmd = htobe32(V_ULPTX_CMD(ULP_TX_MEM_WRITE)); if (is_t4(sc)) @@ -1105,6 +1126,7 @@ t4_write_page_pods_for_buf(struct adapter *sc, struct sge_wrq *wrq, int tid, ppod_addr = pr->pr_start + (prsv->prsv_tag & pr->pr_tag_mask); pva = trunc_page(buf); end_pva = trunc_page(buf + buflen - 1); + mbufq_init(&wrq, INT_MAX); for (i = 0; i < prsv->prsv_nppods; ppod_addr += chunk) { /* How many page pods are we writing in this cycle */ @@ -1113,12 +1135,14 @@ t4_write_page_pods_for_buf(struct adapter *sc, struct sge_wrq *wrq, int tid, chunk = PPOD_SZ(n); len = roundup2(sizeof(*ulpmc) + sizeof(*ulpsc) + chunk, 16); - wr = alloc_wrqe(len, wrq); - if (wr == NULL) - return (ENOMEM); /* ok to just bail out */ - ulpmc = wrtod(wr); + m = alloc_raw_wr_mbuf(len); + if (m == NULL) { + mbufq_drain(&wrq); + return (ENOMEM); + } + ulpmc = mtod(m, struct ulp_mem_io *); - INIT_ULPTX_WR(ulpmc, len, 0, 0); + INIT_ULPTX_WR(ulpmc, len, 0, toep->tid); ulpmc->cmd = cmd; ulpmc->dlen = htobe32(V_ULP_MEMIO_DATA_LEN(chunk / 32)); ulpmc->len16 = htobe32(howmany(len - sizeof(ulpmc->wr), 16)); @@ -1131,7 +1155,7 @@ t4_write_page_pods_for_buf(struct adapter *sc, struct sge_wrq *wrq, int tid, ppod = (struct pagepod *)(ulpsc + 1); for (j = 0; j < n; i++, j++, ppod++) { ppod->vld_tid_pgsz_tag_color = htobe64(F_PPOD_VALID | - V_PPOD_TID(tid) | + V_PPOD_TID(toep->tid) | (prsv->prsv_tag & ~V_PPOD_PGSZ(M_PPOD_PGSZ))); ppod->len_offset = htobe64(V_PPOD_LEN(buflen) | V_PPOD_OFST(offset)); @@ -1148,7 +1172,7 @@ t4_write_page_pods_for_buf(struct adapter *sc, struct sge_wrq *wrq, int tid, #if 0 CTR5(KTR_CXGBE, "%s: tid %d ppod[%d]->addr[%d] = %p", - __func__, tid, i, k, + __func__, toep->tid, i, k, htobe64(ppod->addr[k])); #endif } @@ -1161,9 +1185,13 @@ t4_write_page_pods_for_buf(struct adapter *sc, struct sge_wrq *wrq, int tid, pva -= ddp_pgsz; } - t4_wrq_tx(sc, wr); + mbufq_enqueue(&wrq, m); } + INP_WLOCK(inp); + mbufq_concat(&toep->ulp_pduq, &wrq); + INP_WUNLOCK(inp); + MPASS(pva <= end_pva); return (0); diff --git a/sys/dev/cxgbe/tom/t4_tom.h b/sys/dev/cxgbe/tom/t4_tom.h index 68b3d29295f8..f1129b47cbcf 100644 --- a/sys/dev/cxgbe/tom/t4_tom.h +++ b/sys/dev/cxgbe/tom/t4_tom.h @@ -330,6 +330,22 @@ td_adapter(struct tom_data *td) return (td->tod.tod_softc); } +static inline void +set_mbuf_raw_wr(struct mbuf *m, bool raw) +{ + + M_ASSERTPKTHDR(m); + m->m_pkthdr.PH_per.eight[6] = raw; +} + +static inline bool +mbuf_raw_wr(struct mbuf *m) +{ + + M_ASSERTPKTHDR(m); + return (m->m_pkthdr.PH_per.eight[6]); +} + static inline void set_mbuf_ulp_submode(struct mbuf *m, uint8_t ulp_submode) { @@ -423,7 +439,7 @@ int t4_alloc_page_pods_for_buf(struct ppod_region *, vm_offset_t, int, struct ppod_reservation *); int t4_write_page_pods_for_ps(struct adapter *, struct sge_wrq *, int, struct pageset *); -int t4_write_page_pods_for_buf(struct adapter *, struct sge_wrq *, int, +int t4_write_page_pods_for_buf(struct adapter *, struct toepcb *, struct ppod_reservation *, vm_offset_t, int); void t4_free_page_pods(struct ppod_reservation *); int t4_soreceive_ddp(struct socket *, struct sockaddr **, struct uio *,