git: bc531a1faa99 - main - mlx5en: Improve CQE error debugging.
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Thu, 17 Feb 2022 12:13:51 UTC
The branch main has been updated by hselasky:
URL: https://cgit.FreeBSD.org/src/commit/?id=bc531a1faa99b94b7b7761f1640304dd815eec5d
commit bc531a1faa99b94b7b7761f1640304dd815eec5d
Author: Hans Petter Selasky <hselasky@FreeBSD.org>
AuthorDate: 2022-02-17 11:50:22 +0000
Commit: Hans Petter Selasky <hselasky@FreeBSD.org>
CommitDate: 2022-02-17 12:13:09 +0000
mlx5en: Improve CQE error debugging.
MFC after: 1 week
Sponsored by: NVIDIA Networking
---
sys/dev/mlx5/mlx5_en/en.h | 2 ++
sys/dev/mlx5/mlx5_en/mlx5_en_rx.c | 1 +
sys/dev/mlx5/mlx5_en/mlx5_en_tx.c | 4 +++-
sys/dev/mlx5/mlx5_en/mlx5_en_txrx.c | 25 +++++++++++++++++++++++++
4 files changed, 31 insertions(+), 1 deletion(-)
diff --git a/sys/dev/mlx5/mlx5_en/en.h b/sys/dev/mlx5/mlx5_en/en.h
index fa355c68831e..36a55ff5c4d0 100644
--- a/sys/dev/mlx5/mlx5_en/en.h
+++ b/sys/dev/mlx5/mlx5_en/en.h
@@ -1205,6 +1205,8 @@ int mlx5e_open_locked(struct ifnet *);
int mlx5e_close_locked(struct ifnet *);
void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, int event);
+void mlx5e_dump_err_cqe(struct mlx5e_cq *, u32, const struct mlx5_err_cqe *);
+
mlx5e_cq_comp_t mlx5e_rx_cq_comp;
mlx5e_cq_comp_t mlx5e_tx_cq_comp;
struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq);
diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c b/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c
index 0e3a3b3917f4..3c8813190f76 100644
--- a/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c
+++ b/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c
@@ -495,6 +495,7 @@ mlx5e_poll_rx_cq(struct mlx5e_rq *rq, int budget)
BUS_DMASYNC_POSTREAD);
if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) {
+ mlx5e_dump_err_cqe(&rq->cq, rq->rqn, (const void *)cqe);
rq->stats.wqe_err++;
goto wq_ll_pop;
}
diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_tx.c b/sys/dev/mlx5/mlx5_en/mlx5_en_tx.c
index 9e0837a76393..78458ab69f13 100644
--- a/sys/dev/mlx5/mlx5_en/mlx5_en_tx.c
+++ b/sys/dev/mlx5/mlx5_en/mlx5_en_tx.c
@@ -1045,8 +1045,10 @@ mlx5e_poll_tx_cq(struct mlx5e_sq *sq, int budget)
mlx5_cqwq_pop(&sq->cq.wq);
/* check if the completion event indicates an error */
- if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ))
+ if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) {
+ mlx5e_dump_err_cqe(&sq->cq, sq->sqn, (const void *)cqe);
sq->stats.cqe_err++;
+ }
/* setup local variables */
sqcc_this = be16toh(cqe->wqe_counter);
diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_txrx.c b/sys/dev/mlx5/mlx5_en/mlx5_en_txrx.c
index 9f5e17ad864e..aff247f5aea2 100644
--- a/sys/dev/mlx5/mlx5_en/mlx5_en_txrx.c
+++ b/sys/dev/mlx5/mlx5_en/mlx5_en_txrx.c
@@ -28,6 +28,8 @@
#include "opt_rss.h"
#include "opt_ratelimit.h"
+#include <linux/printk.h>
+
#include <dev/mlx5/mlx5_en/en.h>
struct mlx5_cqe64 *
@@ -54,3 +56,26 @@ mlx5e_cq_error_event(struct mlx5_core_cq *mcq, int event)
mlx5_en_err(cq->priv->ifp, "cqn=0x%.6x event=0x%.2x\n",
mcq->cqn, event);
}
+
+void
+mlx5e_dump_err_cqe(struct mlx5e_cq *cq, u32 qn, const struct mlx5_err_cqe *err_cqe)
+{
+ u32 ci;
+
+ /* Don't print flushed in error syndromes. */
+ if (err_cqe->vendor_err_synd == 0xf9 && err_cqe->syndrome == 0x05)
+ return;
+ /* Don't print when the queue is set to error state by software. */
+ if (err_cqe->vendor_err_synd == 0xf5 && err_cqe->syndrome == 0x05)
+ return;
+
+ ci = (cq->wq.cc - 1) & cq->wq.sz_m1;
+
+ mlx5_en_err(cq->priv->ifp,
+ "Error CQE on CQN 0x%x, CI 0x%x, QN 0x%x, OPCODE 0x%x, SYNDROME 0x%x, VENDOR SYNDROME 0x%x\n",
+ cq->mcq.cqn, ci, qn, err_cqe->op_own >> 4,
+ err_cqe->syndrome, err_cqe->vendor_err_synd);
+
+ print_hex_dump(NULL, NULL, DUMP_PREFIX_OFFSET,
+ 16, 1, err_cqe, sizeof(*err_cqe), false);
+}