git: 51983604daa4 - stable/13 - iscsi: Teach the iSCSI stack about "large" received PDUs.

From: John Baldwin <jhb_at_FreeBSD.org>
Date: Fri, 29 Oct 2021 23:58:36 UTC
The branch stable/13 has been updated by jhb:

URL: https://cgit.FreeBSD.org/src/commit/?id=51983604daa4c263b8e2824f13aedc2c8d75f8cd

commit 51983604daa4c263b8e2824f13aedc2c8d75f8cd
Author:     John Baldwin <jhb@FreeBSD.org>
AuthorDate: 2021-08-18 17:56:28 +0000
Commit:     John Baldwin <jhb@FreeBSD.org>
CommitDate: 2021-10-29 23:30:51 +0000

    iscsi: Teach the iSCSI stack about "large" received PDUs.
    
    When using iSCSI PDU offload (cxgbei) on T6 adapters, a burst of
    received PDUs can be reported via a single message to the driver.
    
    Previously the driver passed these multi-PDU bursts up to the iSCSI
    stack up as a single "large" PDU by rewriting the buffer offset, data
    segment length, and DataSN fields in the iSCSI header.  The DataSN
    field in particular was rewritten so that each of the "large" PDUs
    used consecutively increasing values.  While this worked, the forged
    DataSN values did not match the ExpDataSN value in the subsequent SCSI
    Response PDU.  The initiator does not currently verify this value, but
    the forged DataSN values prevent adding a check.
    
    To avoid this, allow a logical iSCSI PDU (struct icl_pdu) to describe
    a burst of PDUs via a new 'ip_additional_pdus' field.  Normally this
    field is set to zero when 'struct icl_pdu' represents a single PDU.
    If logical PDU represents a burst of on-the-wire PDUs, then 'ip_npdus'
    contains the count of additional on-the-wire PDUs.  The header of this
    "large" PDU is still modified, but the DataSN field now contains the
    DataSN value of the first on-the-wire PDU in the burst.
    
    Reviewed by:    mav
    Sponsored by:   Chelsio Communications
    Differential Revision:  https://reviews.freebsd.org/D31577
    
    (cherry picked from commit c261b6ea4e2ef1fc6a446443ee594ad76f392350)
---
 sys/cam/ctl/ctl_frontend_iscsi.c  |  2 +-
 sys/dev/cxgbe/cxgbei/cxgbei.c     | 18 +++++++++++-------
 sys/dev/cxgbe/cxgbei/cxgbei.h     |  1 -
 sys/dev/cxgbe/cxgbei/icl_cxgbei.c |  1 -
 sys/dev/iscsi/icl.h               |  8 ++++++++
 5 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/sys/cam/ctl/ctl_frontend_iscsi.c b/sys/cam/ctl/ctl_frontend_iscsi.c
index b8fafcea69ed..0cbe3bcefc73 100644
--- a/sys/cam/ctl/ctl_frontend_iscsi.c
+++ b/sys/cam/ctl/ctl_frontend_iscsi.c
@@ -917,7 +917,7 @@ cfiscsi_pdu_handle_data_out(struct icl_pdu *request)
 		cfiscsi_session_terminate(cs);
 		return;
 	}
-	cdw->cdw_datasn++;
+	cdw->cdw_datasn += request->ip_additional_pdus + 1;
 
 	io = cdw->cdw_ctl_io;
 	KASSERT((io->io_hdr.flags & CTL_FLAG_DATA_MASK) != CTL_FLAG_DATA_IN,
diff --git a/sys/dev/cxgbe/cxgbei/cxgbei.c b/sys/dev/cxgbe/cxgbei/cxgbei.c
index 8dc580d65770..6af7043460fb 100644
--- a/sys/dev/cxgbe/cxgbei/cxgbei.c
+++ b/sys/dev/cxgbe/cxgbei/cxgbei.c
@@ -583,10 +583,12 @@ do_rx_iscsi_cmp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
 		    cmp->next_buffer_offset;
 
 		if (prev_seg_len != 0) {
+			uint32_t orig_datasn;
+
 			/*
-			 * Since cfiscsi doesn't know about previous
-			 * headers, pretend that the entire r2t data
-			 * length was received in this single segment.
+			 * Return a "large" PDU representing the burst
+			 * of PDUs.  Adjust the offset and length of
+			 * this PDU to represent the entire burst.
 			 */
 			ip->ip_data_len += prev_seg_len;
 			bhsdo->bhsdo_data_segment_len[2] = ip->ip_data_len;
@@ -595,17 +597,19 @@ do_rx_iscsi_cmp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
 			bhsdo->bhsdo_buffer_offset =
 			    htobe32(cmp->next_buffer_offset);
 
-			npdus = htobe32(bhsdo->bhsdo_datasn) - cmp->last_datasn;
+			orig_datasn = htobe32(bhsdo->bhsdo_datasn);
+			npdus = orig_datasn - cmp->last_datasn;
+			bhsdo->bhsdo_datasn = htobe32(cmp->last_datasn + 1);
+			cmp->last_datasn = orig_datasn;
+			ip->ip_additional_pdus = npdus - 1;
 		} else {
 			MPASS(htobe32(bhsdo->bhsdo_datasn) ==
 			    cmp->last_datasn + 1);
 			npdus = 1;
+			cmp->last_datasn = htobe32(bhsdo->bhsdo_datasn);
 		}
 
 		cmp->next_buffer_offset += ip->ip_data_len;
-		cmp->last_datasn = htobe32(bhsdo->bhsdo_datasn);
-		bhsdo->bhsdo_datasn = htobe32(cmp->next_datasn);
-		cmp->next_datasn++;
 		toep->ofld_rxq->rx_iscsi_ddp_pdus += npdus;
 		toep->ofld_rxq->rx_iscsi_ddp_octets += ip->ip_data_len;
 	} else {
diff --git a/sys/dev/cxgbe/cxgbei/cxgbei.h b/sys/dev/cxgbe/cxgbei/cxgbei.h
index 20754fa893a8..09d556988091 100644
--- a/sys/dev/cxgbe/cxgbei/cxgbei.h
+++ b/sys/dev/cxgbe/cxgbei/cxgbei.h
@@ -58,7 +58,6 @@ struct cxgbei_cmp {
 
 	uint32_t tt;		/* Transfer tag. */
 
-	uint32_t next_datasn;
 	uint32_t next_buffer_offset;
 	uint32_t last_datasn;
 };
diff --git a/sys/dev/cxgbe/cxgbei/icl_cxgbei.c b/sys/dev/cxgbe/cxgbei/icl_cxgbei.c
index a6e7f8b95815..687fc545cebd 100644
--- a/sys/dev/cxgbe/cxgbei/icl_cxgbei.c
+++ b/sys/dev/cxgbe/cxgbei/icl_cxgbei.c
@@ -1346,7 +1346,6 @@ no_ddp:
 	prsv->prsv_tag &= ~pr->pr_alias_mask;
 	prsv->prsv_tag |= alias << pr->pr_alias_shift & pr->pr_alias_mask;
 
-	ddp->cmp.next_datasn = 0;
 	ddp->cmp.last_datasn = -1;
 	cxgbei_insert_cmp(icc, &ddp->cmp, prsv->prsv_tag);
 	*tttp = htobe32(prsv->prsv_tag);
diff --git a/sys/dev/iscsi/icl.h b/sys/dev/iscsi/icl.h
index bd12569a8957..07dcbbf2a0b5 100644
--- a/sys/dev/iscsi/icl.h
+++ b/sys/dev/iscsi/icl.h
@@ -75,6 +75,14 @@ struct icl_pdu {
 	size_t			ip_data_len;
 	struct mbuf		*ip_data_mbuf;
 
+	/*
+	 * When a "large" received PDU represents multiple on-the-wire
+	 * PDUs, this is the count of additional on-the-wire PDUs.
+	 * For PDUs that match on-the-wire PDUs, this should be set to
+	 * zero.
+	 */
+	u_int			ip_additional_pdus;
+
 	/*
 	 * User (initiator or provider) private fields.
 	 */