git: 33cd6211052c - stable/14 - mana: Fix TX CQE error handling

From: Wei Hu <whu_at_FreeBSD.org>
Date: Wed, 24 Jan 2024 12:36:54 UTC
The branch stable/14 has been updated by whu:

URL: https://cgit.FreeBSD.org/src/commit/?id=33cd6211052c5add833150b2eff1bc0729651d3a

commit 33cd6211052c5add833150b2eff1bc0729651d3a
Author:     Wei Hu <whu@FreeBSD.org>
AuthorDate: 2024-01-17 09:19:35 +0000
Commit:     Wei Hu <whu@FreeBSD.org>
CommitDate: 2024-01-24 12:36:13 +0000

    mana: Fix TX CQE error handling
    
    For an unknown TX CQE error type (probably from a newer hardware),
    still free the mbuf, update the queue tail, etc., otherwise the
    accounting will be wrong.
    
    Also, TX errors can be triggered by injecting corrupted packets, so
    replace the mana_err to mana_dbg logging.
    
    Reported by:    NetApp
    MFC after:      1 week
    Sponsored by:   Microsoft
    
    (cherry picked from commit 516b5059705b6b8bbba28821dbe05964c128f9a9)
---
 sys/dev/mana/mana.h        |  2 ++
 sys/dev/mana/mana_en.c     | 14 ++++++++------
 sys/dev/mana/mana_sysctl.c |  6 ++++++
 3 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/sys/dev/mana/mana.h b/sys/dev/mana/mana.h
index ab394f3203ad..906b28eb56b6 100644
--- a/sys/dev/mana/mana.h
+++ b/sys/dev/mana/mana.h
@@ -137,6 +137,8 @@ struct mana_stats {
 	counter_u64_t			mbuf_alloc_fail;	/* rx */
 	counter_u64_t			alt_chg;		/* tx */
 	counter_u64_t			alt_reset;		/* tx */
+	counter_u64_t			cqe_err;		/* tx */
+	counter_u64_t			cqe_unknown_type;	/* tx */
 };
 
 struct mana_txq {
diff --git a/sys/dev/mana/mana_en.c b/sys/dev/mana/mana_en.c
index 812a42e51dfc..09de49a11010 100644
--- a/sys/dev/mana/mana_en.c
+++ b/sys/dev/mana/mana_en.c
@@ -1487,21 +1487,23 @@ mana_poll_tx_cq(struct mana_cq *cq)
 		case CQE_TX_VPORT_DISABLED:
 		case CQE_TX_VLAN_TAGGING_VIOLATION:
 			sa_drop ++;
-			mana_err(NULL,
+			mana_dbg(NULL,
 			    "TX: txq %d CQE error %d, ntc = %d, "
 			    "pending sends = %d: err ignored.\n",
 			    txq_idx, cqe_oob->cqe_hdr.cqe_type,
 			    next_to_complete, txq->pending_sends);
+			counter_u64_add(txq->stats.cqe_err, 1);
 			break;
 
 		default:
-			/* If the CQE type is unexpected, log an error,
-			 * and go through the error path.
+			/* If the CQE type is unknown, log a debug msg,
+			 * and still free the mbuf, etc.
 			 */
-			mana_err(NULL,
-			    "ERROR: TX: Unexpected CQE type %d: HW BUG?\n",
+			mana_dbg(NULL,
+			    "ERROR: TX: Unknown CQE type %d\n",
 			    cqe_oob->cqe_hdr.cqe_type);
-			return;
+			counter_u64_add(txq->stats.cqe_unknown_type, 1);
+			break;
 		}
 		if (txq->gdma_txq_id != completions[i].wq_num) {
 			mana_dbg(NULL,
diff --git a/sys/dev/mana/mana_sysctl.c b/sys/dev/mana/mana_sysctl.c
index d03ebe44ab31..844a05040595 100644
--- a/sys/dev/mana/mana_sysctl.c
+++ b/sys/dev/mana/mana_sysctl.c
@@ -292,6 +292,12 @@ mana_sysctl_add_queues(struct mana_port_context *apc)
 		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
 		    "alt_reset", CTLFLAG_RD,
 		    &tx_stats->alt_reset, "Reset to self txq");
+		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
+		    "cqe_err", CTLFLAG_RD,
+		    &tx_stats->cqe_err, "Error CQE count");
+		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
+		    "cqe_unknown_type", CTLFLAG_RD,
+		    &tx_stats->cqe_unknown_type, "Unknown CQE count");
 
 		/* RX stats */
 		rx_node = SYSCTL_ADD_NODE(ctx, queue_list, OID_AUTO,