From nobody Fri Oct 29 23:58:20 2021 X-Original-To: dev-commits-src-all@mlmmj.nyi.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2610:1c1:1:606c::19:1]) by mlmmj.nyi.freebsd.org (Postfix) with ESMTP id 5BFD1181D241; Fri, 29 Oct 2021 23:58:24 +0000 (UTC) (envelope-from git@FreeBSD.org) Received: from mxrelay.nyi.freebsd.org (mxrelay.nyi.freebsd.org [IPv6:2610:1c1:1:606c::19:3]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256 client-signature RSA-PSS (4096 bits) client-digest SHA256) (Client CN "mxrelay.nyi.freebsd.org", Issuer "R3" (verified OK)) by mx1.freebsd.org (Postfix) with ESMTPS id 4HgzsC4HqJz4V8C; Fri, 29 Oct 2021 23:58:23 +0000 (UTC) (envelope-from git@FreeBSD.org) Received: from gitrepo.freebsd.org (gitrepo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:5]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256) (Client did not present a certificate) by mxrelay.nyi.freebsd.org (Postfix) with ESMTPS id 24F3820240; Fri, 29 Oct 2021 23:58:20 +0000 (UTC) (envelope-from git@FreeBSD.org) Received: from gitrepo.freebsd.org ([127.0.1.44]) by gitrepo.freebsd.org (8.16.1/8.16.1) with ESMTP id 19TNwKlj003615; Fri, 29 Oct 2021 23:58:20 GMT (envelope-from git@gitrepo.freebsd.org) Received: (from git@localhost) by gitrepo.freebsd.org (8.16.1/8.16.1/Submit) id 19TNwK66003614; Fri, 29 Oct 2021 23:58:20 GMT (envelope-from git) Date: Fri, 29 Oct 2021 23:58:20 GMT Message-Id: <202110292358.19TNwK66003614@gitrepo.freebsd.org> To: src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-branches@FreeBSD.org From: John Baldwin Subject: git: 0695b57a9875 - stable/13 - cxgbei: Fix a race between transfer setup and a peer reset. List-Id: Commit messages for all branches of the src repository List-Archive: https://lists.freebsd.org/archives/dev-commits-src-all List-Help: List-Post: List-Subscribe: List-Unsubscribe: Sender: owner-dev-commits-src-all@freebsd.org X-BeenThere: dev-commits-src-all@freebsd.org MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit X-Git-Committer: jhb X-Git-Repository: src X-Git-Refname: refs/heads/stable/13 X-Git-Reftype: branch X-Git-Commit: 0695b57a987591b7b5e5ac0e78365c69faf11216 Auto-Submitted: auto-generated X-ThisMailContainsUnwantedMimeParts: N The branch stable/13 has been updated by jhb: URL: https://cgit.FreeBSD.org/src/commit/?id=0695b57a987591b7b5e5ac0e78365c69faf11216 commit 0695b57a987591b7b5e5ac0e78365c69faf11216 Author: John Baldwin AuthorDate: 2021-05-20 23:03:19 +0000 Commit: John Baldwin CommitDate: 2021-10-29 23:08:09 +0000 cxgbei: Fix a race between transfer setup and a peer reset. In 4427ac3675f9, the TOM driver stopped sending work requests to program iSCSI page pods directly and instead queued them to be written asynchronously with iSCSI PDUs. The queue of mbufs to send is protected by the inp lock. However, the inp cannot be safely obtained from the toep since a RST from the remote peer might have cleared toep->inp asynchronously in an ithread. To fix, obtain the inp from the socket as is already done in icl_cxgbei_conn_pdu_queue_cb() and fail the new transfer setup with ECONNRESET if the connection has been reset. To avoid passing sockets or inps into the page pod routines, pull the mbufq out of the two relevant page pod routines such that the routines queue new work request mbufs to a caller-supplied mbufq. Reported by: Jithesh Arakkan @ Chelsio Fixes: 4427ac3675f91df039d54a23518132e0e0fede86 (cherry picked from commit f949967c8eb3ab5e5a965e3cf07a726dfdc81263) --- sys/dev/cxgbe/cxgbei/icl_cxgbei.c | 44 +++++++++++++++++++++++++++++++++++++-- sys/dev/cxgbe/tom/t4_ddp.c | 31 +++++++-------------------- sys/dev/cxgbe/tom/t4_tom.h | 4 ++-- 3 files changed, 51 insertions(+), 28 deletions(-) diff --git a/sys/dev/cxgbe/cxgbei/icl_cxgbei.c b/sys/dev/cxgbe/cxgbei/icl_cxgbei.c index b9f7c6355b6f..01759d929c0e 100644 --- a/sys/dev/cxgbe/cxgbei/icl_cxgbei.c +++ b/sys/dev/cxgbe/cxgbei/icl_cxgbei.c @@ -983,6 +983,8 @@ icl_cxgbei_conn_task_setup(struct icl_conn *ic, struct icl_pdu *ip, struct ppod_region *pr = &ci->pr; struct cxgbei_ddp_state *ddp; struct ppod_reservation *prsv; + struct inpcb *inp; + struct mbufq mq; uint32_t itt; int rc = 0; @@ -1028,14 +1030,32 @@ no_ddp: goto no_ddp; } + mbufq_init(&mq, INT_MAX); rc = t4_write_page_pods_for_buf(sc, toep, prsv, - (vm_offset_t)csio->data_ptr, csio->dxfer_len); + (vm_offset_t)csio->data_ptr, csio->dxfer_len, &mq); if (__predict_false(rc != 0)) { + mbufq_drain(&mq); t4_free_page_pods(prsv); free(ddp, M_CXGBEI); goto no_ddp; } + /* + * Do not get inp from toep->inp as the toepcb might have + * detached already. + */ + inp = sotoinpcb(ic->ic_socket); + INP_WLOCK(inp); + if ((inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) != 0) { + INP_WUNLOCK(inp); + mbufq_drain(&mq); + t4_free_page_pods(prsv); + free(ddp, M_CXGBEI); + return (ECONNRESET); + } + mbufq_concat(&toep->ulp_pduq, &mq); + INP_WUNLOCK(inp); + ddp->cmp.last_datasn = -1; cxgbei_insert_cmp(icc, &ddp->cmp, prsv->prsv_tag); *ittp = htobe32(prsv->prsv_tag); @@ -1096,6 +1116,8 @@ icl_cxgbei_conn_transfer_setup(struct icl_conn *ic, union ctl_io *io, struct cxgbei_ddp_state *ddp; struct ppod_reservation *prsv; struct ctl_sg_entry *sgl, sg_entry; + struct inpcb *inp; + struct mbufq mq; int sg_entries = ctsio->kern_sg_entries; uint32_t ttt; int xferlen, rc = 0, alias; @@ -1173,14 +1195,32 @@ no_ddp: goto no_ddp; } + mbufq_init(&mq, INT_MAX); rc = t4_write_page_pods_for_sgl(sc, toep, prsv, sgl, sg_entries, - xferlen); + xferlen, &mq); if (__predict_false(rc != 0)) { + mbufq_drain(&mq); t4_free_page_pods(prsv); free(ddp, M_CXGBEI); goto no_ddp; } + /* + * Do not get inp from toep->inp as the toepcb might + * have detached already. + */ + inp = sotoinpcb(ic->ic_socket); + INP_WLOCK(inp); + if ((inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) != 0) { + INP_WUNLOCK(inp); + mbufq_drain(&mq); + t4_free_page_pods(prsv); + free(ddp, M_CXGBEI); + return (ECONNRESET); + } + mbufq_concat(&toep->ulp_pduq, &mq); + INP_WUNLOCK(inp); + ddp->cmp.next_buffer_offset = ctsio->kern_rel_offset + first_burst; ddp->cmp.last_datasn = -1; diff --git a/sys/dev/cxgbe/tom/t4_ddp.c b/sys/dev/cxgbe/tom/t4_ddp.c index 34c01674659a..2b58cb60d4fd 100644 --- a/sys/dev/cxgbe/tom/t4_ddp.c +++ b/sys/dev/cxgbe/tom/t4_ddp.c @@ -1175,9 +1175,9 @@ alloc_raw_wr_mbuf(int len) int t4_write_page_pods_for_buf(struct adapter *sc, struct toepcb *toep, - struct ppod_reservation *prsv, vm_offset_t buf, int buflen) + struct ppod_reservation *prsv, vm_offset_t buf, int buflen, + struct mbufq *wrq) { - struct inpcb *inp = toep->inp; struct ulp_mem_io *ulpmc; struct ulptx_idata *ulpsc; struct pagepod *ppod; @@ -1187,7 +1187,6 @@ t4_write_page_pods_for_buf(struct adapter *sc, struct toepcb *toep, struct ppod_region *pr = prsv->prsv_pr; uintptr_t end_pva, pva, pa; struct mbuf *m; - struct mbufq wrq; cmd = htobe32(V_ULPTX_CMD(ULP_TX_MEM_WRITE)); if (is_t4(sc)) @@ -1199,7 +1198,6 @@ t4_write_page_pods_for_buf(struct adapter *sc, struct toepcb *toep, ppod_addr = pr->pr_start + (prsv->prsv_tag & pr->pr_tag_mask); pva = trunc_page(buf); end_pva = trunc_page(buf + buflen - 1); - mbufq_init(&wrq, INT_MAX); for (i = 0; i < prsv->prsv_nppods; ppod_addr += chunk) { /* How many page pods are we writing in this cycle */ @@ -1209,10 +1207,8 @@ t4_write_page_pods_for_buf(struct adapter *sc, struct toepcb *toep, len = roundup2(sizeof(*ulpmc) + sizeof(*ulpsc) + chunk, 16); m = alloc_raw_wr_mbuf(len); - if (m == NULL) { - mbufq_drain(&wrq); + if (m == NULL) return (ENOMEM); - } ulpmc = mtod(m, struct ulp_mem_io *); INIT_ULPTX_WR(ulpmc, len, 0, toep->tid); @@ -1258,13 +1254,9 @@ t4_write_page_pods_for_buf(struct adapter *sc, struct toepcb *toep, pva -= ddp_pgsz; } - mbufq_enqueue(&wrq, m); + mbufq_enqueue(wrq, m); } - INP_WLOCK(inp); - mbufq_concat(&toep->ulp_pduq, &wrq); - INP_WUNLOCK(inp); - MPASS(pva <= end_pva); return (0); @@ -1273,9 +1265,8 @@ t4_write_page_pods_for_buf(struct adapter *sc, struct toepcb *toep, int t4_write_page_pods_for_sgl(struct adapter *sc, struct toepcb *toep, struct ppod_reservation *prsv, struct ctl_sg_entry *sgl, int entries, - int xferlen) + int xferlen, struct mbufq *wrq) { - struct inpcb *inp = toep->inp; struct ulp_mem_io *ulpmc; struct ulptx_idata *ulpsc; struct pagepod *ppod; @@ -1285,7 +1276,6 @@ t4_write_page_pods_for_sgl(struct adapter *sc, struct toepcb *toep, struct ppod_region *pr = prsv->prsv_pr; uintptr_t pva, pa; struct mbuf *m; - struct mbufq wrq; MPASS(sgl != NULL); MPASS(entries > 0); @@ -1298,7 +1288,6 @@ t4_write_page_pods_for_sgl(struct adapter *sc, struct toepcb *toep, offset = (vm_offset_t)sgl->addr & PAGE_MASK; ppod_addr = pr->pr_start + (prsv->prsv_tag & pr->pr_tag_mask); pva = trunc_page((vm_offset_t)sgl->addr); - mbufq_init(&wrq, INT_MAX); for (i = 0; i < prsv->prsv_nppods; ppod_addr += chunk) { /* How many page pods are we writing in this cycle */ @@ -1308,10 +1297,8 @@ t4_write_page_pods_for_sgl(struct adapter *sc, struct toepcb *toep, len = roundup2(sizeof(*ulpmc) + sizeof(*ulpsc) + chunk, 16); m = alloc_raw_wr_mbuf(len); - if (m == NULL) { - mbufq_drain(&wrq); + if (m == NULL) return (ENOMEM); - } ulpmc = mtod(m, struct ulp_mem_io *); INIT_ULPTX_WR(ulpmc, len, 0, toep->tid); @@ -1378,13 +1365,9 @@ t4_write_page_pods_for_sgl(struct adapter *sc, struct toepcb *toep, } } - mbufq_enqueue(&wrq, m); + mbufq_enqueue(wrq, m); } - INP_WLOCK(inp); - mbufq_concat(&toep->ulp_pduq, &wrq); - INP_WUNLOCK(inp); - return (0); } diff --git a/sys/dev/cxgbe/tom/t4_tom.h b/sys/dev/cxgbe/tom/t4_tom.h index c7984f838735..21cfb1df6e16 100644 --- a/sys/dev/cxgbe/tom/t4_tom.h +++ b/sys/dev/cxgbe/tom/t4_tom.h @@ -443,9 +443,9 @@ int t4_alloc_page_pods_for_sgl(struct ppod_region *, struct ctl_sg_entry *, int, int t4_write_page_pods_for_ps(struct adapter *, struct sge_wrq *, int, struct pageset *); int t4_write_page_pods_for_buf(struct adapter *, struct toepcb *, - struct ppod_reservation *, vm_offset_t, int); + struct ppod_reservation *, vm_offset_t, int, struct mbufq *); int t4_write_page_pods_for_sgl(struct adapter *, struct toepcb *, - struct ppod_reservation *, struct ctl_sg_entry *, int, int); + struct ppod_reservation *, struct ctl_sg_entry *, int, int, struct mbufq *); void t4_free_page_pods(struct ppod_reservation *); int t4_soreceive_ddp(struct socket *, struct sockaddr **, struct uio *, struct mbuf **, struct mbuf **, int *);