git: df275ae50900 - main - cxgbe(4): Add another setting to the knob that controls congestion.

From: Navdeep Parhar <np_at_FreeBSD.org>
Date: Mon, 12 Sep 2022 18:48:15 UTC
The branch main has been updated by np:

URL: https://cgit.FreeBSD.org/src/commit/?id=df275ae5090048fb7a8c082d509fae57e891957a

commit df275ae5090048fb7a8c082d509fae57e891957a
Author:     Navdeep Parhar <np@FreeBSD.org>
AuthorDate: 2022-09-09 22:16:22 +0000
Commit:     Navdeep Parhar <np@FreeBSD.org>
CommitDate: 2022-09-12 18:40:29 +0000

    cxgbe(4): Add another setting to the knob that controls congestion.
    
    hw.cxgbe.cong_drop=2 will generate backpressure *and* drop frames for
    queues that are congested.
    
    MFC after:      2 weeks
    Sponsored by:   Chelsio Communications
---
 share/man/man4/cxgbe.4    |   3 +-
 sys/dev/cxgbe/adapter.h   |   4 +-
 sys/dev/cxgbe/t4_netmap.c |  41 ++++++------------
 sys/dev/cxgbe/t4_sge.c    | 106 +++++++++++++++++++++++++++++-----------------
 4 files changed, 84 insertions(+), 70 deletions(-)

diff --git a/share/man/man4/cxgbe.4 b/share/man/man4/cxgbe.4
index 5f959299d2ad..b0f8af93b3b6 100644
--- a/share/man/man4/cxgbe.4
+++ b/share/man/man4/cxgbe.4
@@ -31,7 +31,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd November 10, 2021
+.Dd September 12, 2022
 .Dt CXGBE 4
 .Os
 .Sh NAME
@@ -277,6 +277,7 @@ Controls the hardware response to congestion.
 0 instructs the hardware to backpressure its pipeline on congestion.
 This usually results in the port emitting PAUSE frames.
 1 instructs the hardware to drop frames destined for congested queues.
+2 instructs the hardware to both backpressure the pipeline and drop frames.
 .It Va hw.cxgbe.pause_settings
 PAUSE frame settings.
 Bit 0 is rx_pause, bit 1 is tx_pause, bit 2 is pause_autoneg.
diff --git a/sys/dev/cxgbe/adapter.h b/sys/dev/cxgbe/adapter.h
index 41d8e2879b20..f002f77fdd31 100644
--- a/sys/dev/cxgbe/adapter.h
+++ b/sys/dev/cxgbe/adapter.h
@@ -428,7 +428,7 @@ struct sge_iq {
 	int8_t   intr_pktc_idx;	/* packet count threshold index */
 	uint8_t  gen;		/* generation bit */
 	uint8_t  intr_params;	/* interrupt holdoff parameters */
-	int8_t   cong;		/* congestion settings */
+	int8_t   cong_drop;	/* congestion drop settings for the queue */
 	uint16_t qsize;		/* size (# of entries) of the queue */
 	uint16_t sidx;		/* index of the entry with the status page */
 	uint16_t cidx;		/* consumer index */
@@ -1384,7 +1384,7 @@ struct mbuf *alloc_wr_mbuf(int, int);
 int parse_pkt(struct mbuf **, bool);
 void *start_wrq_wr(struct sge_wrq *, int, struct wrq_cookie *);
 void commit_wrq_wr(struct sge_wrq *, void *, struct wrq_cookie *);
-int tnl_cong(struct port_info *, int);
+int t4_sge_set_conm_context(struct adapter *, int, int, int);
 void t4_register_an_handler(an_handler_t);
 void t4_register_fw_msg_handler(int, fw_msg_handler_t);
 void t4_register_cpl_handler(int, cpl_handler_t);
diff --git a/sys/dev/cxgbe/t4_netmap.c b/sys/dev/cxgbe/t4_netmap.c
index a847dca3ddbd..0f9aee784aa5 100644
--- a/sys/dev/cxgbe/t4_netmap.c
+++ b/sys/dev/cxgbe/t4_netmap.c
@@ -276,14 +276,17 @@ free_nm_txq(struct vi_info *vi, struct sge_nm_txq *nm_txq)
 }
 
 static int
-alloc_nm_rxq_hwq(struct vi_info *vi, struct sge_nm_rxq *nm_rxq, int cong)
+alloc_nm_rxq_hwq(struct vi_info *vi, struct sge_nm_rxq *nm_rxq)
 {
-	int rc, cntxt_id, i;
+	int rc, cntxt_id;
 	__be32 v;
 	struct adapter *sc = vi->adapter;
+	struct port_info *pi = vi->pi;
 	struct sge_params *sp = &sc->params.sge;
 	struct netmap_adapter *na = NA(vi->ifp);
 	struct fw_iq_cmd c;
+	const int cong_drop = nm_cong_drop;
+	const int cong_map = pi->rx_e_chan_map;
 
 	MPASS(na != NULL);
 	MPASS(nm_rxq->iq_desc != NULL);
@@ -313,15 +316,15 @@ alloc_nm_rxq_hwq(struct vi_info *vi, struct sge_nm_rxq *nm_rxq, int cong)
 	    V_FW_IQ_CMD_TYPE(FW_IQ_TYPE_FL_INT_CAP) |
 	    V_FW_IQ_CMD_VIID(vi->viid) |
 	    V_FW_IQ_CMD_IQANUD(X_UPDATEDELIVERY_INTERRUPT));
-	c.iqdroprss_to_iqesize = htobe16(V_FW_IQ_CMD_IQPCIECH(vi->pi->tx_chan) |
+	c.iqdroprss_to_iqesize = htobe16(V_FW_IQ_CMD_IQPCIECH(pi->tx_chan) |
 	    F_FW_IQ_CMD_IQGTSMODE |
 	    V_FW_IQ_CMD_IQINTCNTTHRESH(0) |
 	    V_FW_IQ_CMD_IQESIZE(ilog2(IQ_ESIZE) - 4));
 	c.iqsize = htobe16(vi->qsize_rxq);
 	c.iqaddr = htobe64(nm_rxq->iq_ba);
-	if (cong >= 0) {
+	if (cong_drop != -1) {
 		c.iqns_to_fl0congen = htobe32(F_FW_IQ_CMD_IQFLINTCONGEN |
-		    V_FW_IQ_CMD_FL0CNGCHMAP(cong) | F_FW_IQ_CMD_FL0CONGCIF |
+		    V_FW_IQ_CMD_FL0CNGCHMAP(cong_map) | F_FW_IQ_CMD_FL0CONGCIF |
 		    F_FW_IQ_CMD_FL0CONGEN);
 	}
 	c.iqns_to_fl0congen |=
@@ -373,29 +376,9 @@ alloc_nm_rxq_hwq(struct vi_info *vi, struct sge_nm_rxq *nm_rxq, int cong)
 	nm_rxq->fl_db_val = V_QID(nm_rxq->fl_cntxt_id) |
 	    sc->chip_params->sge_fl_db;
 
-	if (chip_id(sc) >= CHELSIO_T5 && cong >= 0) {
-		uint32_t param, val;
-
-		param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) |
-		    V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_CONM_CTXT) |
-		    V_FW_PARAMS_PARAM_YZ(nm_rxq->iq_cntxt_id);
-		if (cong == 0)
-			val = 1 << 19;
-		else {
-			val = 2 << 19;
-			for (i = 0; i < 4; i++) {
-				if (cong & (1 << i))
-					val |= 1 << (i << 2);
-			}
-		}
-
-		rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
-		if (rc != 0) {
-			/* report error but carry on */
-			device_printf(sc->dev,
-			    "failed to set congestion manager context for "
-			    "ingress queue %d: %d\n", nm_rxq->iq_cntxt_id, rc);
-		}
+	if (chip_id(sc) >= CHELSIO_T5 && cong_drop != -1) {
+		t4_sge_set_conm_context(sc, nm_rxq->iq_cntxt_id, cong_drop,
+		    cong_map);
 	}
 
 	t4_write_reg(sc, sc->sge_gts_reg,
@@ -749,7 +732,7 @@ cxgbe_netmap_on(struct adapter *sc, struct vi_info *vi, struct ifnet *ifp,
 		if (!nm_kring_pending_on(kring))
 			continue;
 
-		alloc_nm_rxq_hwq(vi, nm_rxq, tnl_cong(vi->pi, nm_cong_drop));
+		alloc_nm_rxq_hwq(vi, nm_rxq);
 		nm_rxq->fl_hwidx = hwidx;
 		slot = netmap_reset(na, NR_RX, i, 0);
 		MPASS(slot != NULL);	/* XXXNM: error check, not assert */
diff --git a/sys/dev/cxgbe/t4_sge.c b/sys/dev/cxgbe/t4_sge.c
index 43bb7d9e559a..9e75d1d96bad 100644
--- a/sys/dev/cxgbe/t4_sge.c
+++ b/sys/dev/cxgbe/t4_sge.c
@@ -124,10 +124,11 @@ SYSCTL_INT(_hw_cxgbe, OID_AUTO, spg_len, CTLFLAG_RDTUN, &spg_len, 0,
  * -1: no congestion feedback (not recommended).
  *  0: backpressure the channel instead of dropping packets right away.
  *  1: no backpressure, drop packets for the congested queue immediately.
+ *  2: both backpressure and drop.
  */
 static int cong_drop = 0;
 SYSCTL_INT(_hw_cxgbe, OID_AUTO, cong_drop, CTLFLAG_RDTUN, &cong_drop, 0,
-    "Congestion control for RX queues (0 = backpressure, 1 = drop");
+    "Congestion control for NIC RX queues (0 = backpressure, 1 = drop, 2 = both");
 
 /*
  * Deliver multiple frames in the same free list buffer if they fit.
@@ -554,7 +555,7 @@ t4_sge_modload(void)
 		spg_len = len;
 	}
 
-	if (cong_drop < -1 || cong_drop > 1) {
+	if (cong_drop < -1 || cong_drop > 2) {
 		printf("Invalid hw.cxgbe.cong_drop value (%d),"
 		    " using 0 instead.\n", cong_drop);
 		cong_drop = 0;
@@ -3382,7 +3383,7 @@ init_iq(struct sge_iq *iq, struct adapter *sc, int tmr_idx, int pktc_idx,
 	iq->qsize = roundup2(qsize, 16);	/* See FW_IQ_CMD/iqsize */
 	iq->sidx = iq->qsize - sc->params.sge.spg_len / IQ_ESIZE;
 	iq->intr_idx = intr_idx;
-	iq->cong = cong;
+	iq->cong_drop = cong;
 }
 
 static inline void
@@ -3548,9 +3549,10 @@ free_iq_fl(struct adapter *sc, struct sge_iq *iq, struct sge_fl *fl)
 static int
 alloc_iq_fl_hwq(struct vi_info *vi, struct sge_iq *iq, struct sge_fl *fl)
 {
-	int rc, i, cntxt_id;
+	int rc, cntxt_id, cong_map;
 	struct fw_iq_cmd c;
 	struct adapter *sc = vi->adapter;
+	struct port_info *pi = vi->pi;
 	__be32 v = 0;
 
 	MPASS (!(iq->flags & IQ_HW_ALLOCATED));
@@ -3582,15 +3584,17 @@ alloc_iq_fl_hwq(struct vi_info *vi, struct sge_iq *iq, struct sge_fl *fl)
 	    V_FW_IQ_CMD_TYPE(FW_IQ_TYPE_FL_INT_CAP) |
 	    V_FW_IQ_CMD_VIID(vi->viid) |
 	    V_FW_IQ_CMD_IQANUD(X_UPDATEDELIVERY_INTERRUPT));
-	c.iqdroprss_to_iqesize = htobe16(V_FW_IQ_CMD_IQPCIECH(vi->pi->tx_chan) |
+	c.iqdroprss_to_iqesize = htobe16(V_FW_IQ_CMD_IQPCIECH(pi->tx_chan) |
 	    F_FW_IQ_CMD_IQGTSMODE |
 	    V_FW_IQ_CMD_IQINTCNTTHRESH(iq->intr_pktc_idx) |
 	    V_FW_IQ_CMD_IQESIZE(ilog2(IQ_ESIZE) - 4));
 	c.iqsize = htobe16(iq->qsize);
 	c.iqaddr = htobe64(iq->ba);
 	c.iqns_to_fl0congen = htobe32(V_FW_IQ_CMD_IQTYPE(iq->qtype));
-	if (iq->cong >= 0)
+	if (iq->cong_drop != -1) {
+		cong_map = iq->qtype == IQ_ETH ? pi->rx_e_chan_map : 0;
 		c.iqns_to_fl0congen |= htobe32(F_FW_IQ_CMD_IQFLINTCONGEN);
+	}
 
 	if (fl) {
 		bzero(fl->desc, fl->sidx * EQ_ESIZE + sc->params.sge.spg_len);
@@ -3600,9 +3604,9 @@ alloc_iq_fl_hwq(struct vi_info *vi, struct sge_iq *iq, struct sge_fl *fl)
 			(fl_pad ? F_FW_IQ_CMD_FL0PADEN : 0) |
 			(fl->flags & FL_BUF_PACKING ? F_FW_IQ_CMD_FL0PACKEN :
 			    0));
-		if (iq->cong >= 0) {
+		if (iq->cong_drop != -1) {
 			c.iqns_to_fl0congen |=
-				htobe32(V_FW_IQ_CMD_FL0CNGCHMAP(iq->cong) |
+				htobe32(V_FW_IQ_CMD_FL0CNGCHMAP(cong_map) |
 				    F_FW_IQ_CMD_FL0CONGCIF |
 				    F_FW_IQ_CMD_FL0CONGEN);
 		}
@@ -3636,6 +3640,8 @@ alloc_iq_fl_hwq(struct vi_info *vi, struct sge_iq *iq, struct sge_fl *fl)
 	if (fl) {
 		u_int qid;
 #ifdef INVARIANTS
+		int i;
+
 		MPASS(!(fl->flags & FL_BUF_RESUME));
 		for (i = 0; i < fl->sidx * 8; i++)
 			MPASS(fl->sdesc[i].cl == NULL);
@@ -3675,28 +3681,10 @@ alloc_iq_fl_hwq(struct vi_info *vi, struct sge_iq *iq, struct sge_fl *fl)
 		FL_UNLOCK(fl);
 	}
 
-	if (chip_id(sc) >= CHELSIO_T5 && !(sc->flags & IS_VF) && iq->cong >= 0) {
-		uint32_t param, val;
-
-		param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) |
-		    V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_CONM_CTXT) |
-		    V_FW_PARAMS_PARAM_YZ(iq->cntxt_id);
-		if (iq->cong == 0)
-			val = 1 << 19;
-		else {
-			val = 2 << 19;
-			for (i = 0; i < 4; i++) {
-				if (iq->cong & (1 << i))
-					val |= 1 << (i << 2);
-			}
-		}
-
-		rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
-		if (rc != 0) {
-			/* report error but carry on */
-			CH_ERR(sc, "failed to set congestion manager context "
-			    "for ingress queue %d: %d\n", iq->cntxt_id, rc);
-		}
+	if (chip_id(sc) >= CHELSIO_T5 && !(sc->flags & IS_VF) &&
+	    iq->cong_drop != -1) {
+		t4_sge_set_conm_context(sc, iq->cntxt_id, iq->cong_drop,
+		    cong_map);
 	}
 
 	/* Enable IQ interrupts */
@@ -3920,15 +3908,57 @@ free_ctrlq(struct adapter *sc, int idx)
 }
 
 int
-tnl_cong(struct port_info *pi, int drop)
+t4_sge_set_conm_context(struct adapter *sc, int cntxt_id, int cong_drop,
+    int cong_map)
 {
+	const int cng_ch_bits_log = sc->chip_params->cng_ch_bits_log;
+	uint32_t param, val;
+	uint16_t ch_map;
+	int cong_mode, rc, i;
 
-	if (drop == -1)
-		return (-1);
-	else if (drop == 1)
-		return (0);
-	else
-		return (pi->rx_e_chan_map);
+	if (chip_id(sc) < CHELSIO_T5)
+		return (ENOTSUP);
+
+	/* Convert the driver knob to the mode understood by the firmware. */
+	switch (cong_drop) {
+	case -1:
+		cong_mode = X_CONMCTXT_CNGTPMODE_DISABLE;
+		break;
+	case 0:
+		cong_mode = X_CONMCTXT_CNGTPMODE_CHANNEL;
+		break;
+	case 1:
+		cong_mode = X_CONMCTXT_CNGTPMODE_QUEUE;
+		break;
+	case 2:
+		cong_mode = X_CONMCTXT_CNGTPMODE_BOTH;
+		break;
+	default:
+		MPASS(0);
+		CH_ERR(sc, "cong_drop = %d is invalid (ingress queue %d).\n",
+		    cong_drop, cntxt_id);
+		return (EINVAL);
+	}
+
+	param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) |
+	    V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_CONM_CTXT) |
+	    V_FW_PARAMS_PARAM_YZ(cntxt_id);
+	val = V_CONMCTXT_CNGTPMODE(cong_mode);
+	if (cong_mode == X_CONMCTXT_CNGTPMODE_CHANNEL ||
+	    cong_mode == X_CONMCTXT_CNGTPMODE_BOTH) {
+		for (i = 0, ch_map = 0; i < 4; i++) {
+			if (cong_map & (1 << i))
+				ch_map |= 1 << (i << cng_ch_bits_log);
+		}
+		val |= V_CONMCTXT_CNGCHMAP(ch_map);
+	}
+	rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
+	if (rc != 0) {
+		CH_ERR(sc, "failed to set congestion manager context "
+		    "for ingress queue %d: %d\n", cntxt_id, rc);
+	}
+
+	return (rc);
 }
 
 /*
@@ -3960,7 +3990,7 @@ alloc_rxq(struct vi_info *vi, struct sge_rxq *rxq, int idx, int intr_idx,
 		    "rx queue");
 
 		init_iq(&rxq->iq, sc, vi->tmr_idx, vi->pktc_idx, vi->qsize_rxq,
-		    intr_idx, tnl_cong(vi->pi, cong_drop), IQ_ETH);
+		    intr_idx, cong_drop, IQ_ETH);
 #if defined(INET) || defined(INET6)
 		if (ifp->if_capenable & IFCAP_LRO)
 			rxq->iq.flags |= IQ_LRO_ENABLED;