git: 4b5dd427cb63 - stable/13 - mlx5en: Improve CQE error debugging.
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Thu, 24 Feb 2022 10:04:21 UTC
The branch stable/13 has been updated by hselasky: URL: https://cgit.FreeBSD.org/src/commit/?id=4b5dd427cb63a2f3965d64afe4cc07f35189639a commit 4b5dd427cb63a2f3965d64afe4cc07f35189639a Author: Hans Petter Selasky <hselasky@FreeBSD.org> AuthorDate: 2022-02-24 09:58:54 +0000 Commit: Hans Petter Selasky <hselasky@FreeBSD.org> CommitDate: 2022-02-24 09:58:54 +0000 mlx5en: Improve CQE error debugging. Sponsored by: NVIDIA Networking (cherry picked from commit bc531a1faa99b94b7b7761f1640304dd815eec5d) --- sys/dev/mlx5/mlx5_en/en.h | 2 ++ sys/dev/mlx5/mlx5_en/mlx5_en_rx.c | 1 + sys/dev/mlx5/mlx5_en/mlx5_en_tx.c | 4 +++- sys/dev/mlx5/mlx5_en/mlx5_en_txrx.c | 25 +++++++++++++++++++++++++ 4 files changed, 31 insertions(+), 1 deletion(-) diff --git a/sys/dev/mlx5/mlx5_en/en.h b/sys/dev/mlx5/mlx5_en/en.h index 405fe6390cfb..28928aa18fa1 100644 --- a/sys/dev/mlx5/mlx5_en/en.h +++ b/sys/dev/mlx5/mlx5_en/en.h @@ -1198,6 +1198,8 @@ int mlx5e_open_locked(struct ifnet *); int mlx5e_close_locked(struct ifnet *); void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, int event); +void mlx5e_dump_err_cqe(struct mlx5e_cq *, u32, const struct mlx5_err_cqe *); + mlx5e_cq_comp_t mlx5e_rx_cq_comp; mlx5e_cq_comp_t mlx5e_tx_cq_comp; struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq); diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c b/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c index 9e58a9bd7904..9de80265f078 100644 --- a/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c +++ b/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c @@ -482,6 +482,7 @@ mlx5e_poll_rx_cq(struct mlx5e_rq *rq, int budget) BUS_DMASYNC_POSTREAD); if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) { + mlx5e_dump_err_cqe(&rq->cq, rq->rqn, (const void *)cqe); rq->stats.wqe_err++; goto wq_ll_pop; } diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_tx.c b/sys/dev/mlx5/mlx5_en/mlx5_en_tx.c index 24899ef0b881..4cc25e1d5b51 100644 --- a/sys/dev/mlx5/mlx5_en/mlx5_en_tx.c +++ b/sys/dev/mlx5/mlx5_en/mlx5_en_tx.c @@ -1042,8 +1042,10 @@ mlx5e_poll_tx_cq(struct mlx5e_sq *sq, int budget) mlx5_cqwq_pop(&sq->cq.wq); /* check if the completion event indicates an error */ - if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) + if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) { + mlx5e_dump_err_cqe(&sq->cq, sq->sqn, (const void *)cqe); sq->stats.cqe_err++; + } /* setup local variables */ sqcc_this = be16toh(cqe->wqe_counter); diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_txrx.c b/sys/dev/mlx5/mlx5_en/mlx5_en_txrx.c index 9f5e17ad864e..aff247f5aea2 100644 --- a/sys/dev/mlx5/mlx5_en/mlx5_en_txrx.c +++ b/sys/dev/mlx5/mlx5_en/mlx5_en_txrx.c @@ -28,6 +28,8 @@ #include "opt_rss.h" #include "opt_ratelimit.h" +#include <linux/printk.h> + #include <dev/mlx5/mlx5_en/en.h> struct mlx5_cqe64 * @@ -54,3 +56,26 @@ mlx5e_cq_error_event(struct mlx5_core_cq *mcq, int event) mlx5_en_err(cq->priv->ifp, "cqn=0x%.6x event=0x%.2x\n", mcq->cqn, event); } + +void +mlx5e_dump_err_cqe(struct mlx5e_cq *cq, u32 qn, const struct mlx5_err_cqe *err_cqe) +{ + u32 ci; + + /* Don't print flushed in error syndromes. */ + if (err_cqe->vendor_err_synd == 0xf9 && err_cqe->syndrome == 0x05) + return; + /* Don't print when the queue is set to error state by software. */ + if (err_cqe->vendor_err_synd == 0xf5 && err_cqe->syndrome == 0x05) + return; + + ci = (cq->wq.cc - 1) & cq->wq.sz_m1; + + mlx5_en_err(cq->priv->ifp, + "Error CQE on CQN 0x%x, CI 0x%x, QN 0x%x, OPCODE 0x%x, SYNDROME 0x%x, VENDOR SYNDROME 0x%x\n", + cq->mcq.cqn, ci, qn, err_cqe->op_own >> 4, + err_cqe->syndrome, err_cqe->vendor_err_synd); + + print_hex_dump(NULL, NULL, DUMP_PREFIX_OFFSET, + 16, 1, err_cqe, sizeof(*err_cqe), false); +}