git: 274319acb484 - main - ena: Add reset reason for missing admin interrupt
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Tue, 15 Oct 2024 17:43:08 UTC
The branch main has been updated by osamaabb:
URL: https://cgit.FreeBSD.org/src/commit/?id=274319acb48424958242d55e1b0c7d4528da7f70
commit 274319acb48424958242d55e1b0c7d4528da7f70
Author: Osama Abboud <osamaabb@amazon.com>
AuthorDate: 2024-08-07 06:24:19 +0000
Commit: Osama Abboud <osamaabb@FreeBSD.org>
CommitDate: 2024-10-15 17:38:31 +0000
ena: Add reset reason for missing admin interrupt
There can be cases when we trigger reset if an admin interrupt
is missing.
In order to identify this use-case specifically,
this commit adds a new reset reason.
Approved by: cperciva (mentor)
MFC after: 2 weeks
Sponsored by: Amazon, Inc.
---
sys/dev/ena/ena.c | 13 +++++++++++--
sys/dev/ena/ena.h | 5 ++++-
sys/dev/ena/ena_sysctl.c | 4 ++++
3 files changed, 19 insertions(+), 3 deletions(-)
diff --git a/sys/dev/ena/ena.c b/sys/dev/ena/ena.c
index 3f3a4946ccca..36e9ac15e8ff 100644
--- a/sys/dev/ena/ena.c
+++ b/sys/dev/ena/ena.c
@@ -3029,6 +3029,7 @@ static void
check_for_missing_keep_alive(struct ena_adapter *adapter)
{
sbintime_t timestamp, time;
+ enum ena_regs_reset_reason_types reset_reason = ENA_REGS_RESET_KEEP_ALIVE_TO;
if (adapter->wd_active == 0)
return;
@@ -3040,7 +3041,10 @@ check_for_missing_keep_alive(struct ena_adapter *adapter)
time = getsbinuptime() - timestamp;
if (unlikely(time > adapter->keep_alive_timeout)) {
ena_log(adapter->pdev, ERR, "Keep alive watchdog timeout.\n");
- ena_trigger_reset(adapter, ENA_REGS_RESET_KEEP_ALIVE_TO);
+ if (ena_com_aenq_has_keep_alive(adapter->ena_dev))
+ reset_reason = ENA_REGS_RESET_MISSING_ADMIN_INTERRUPT;
+
+ ena_trigger_reset(adapter, reset_reason);
}
}
@@ -3048,10 +3052,15 @@ check_for_missing_keep_alive(struct ena_adapter *adapter)
static void
check_for_admin_com_state(struct ena_adapter *adapter)
{
+ enum ena_regs_reset_reason_types reset_reason = ENA_REGS_RESET_ADMIN_TO;
if (unlikely(ena_com_get_admin_running_state(adapter->ena_dev) == false)) {
ena_log(adapter->pdev, ERR,
"ENA admin queue is not in running state!\n");
- ena_trigger_reset(adapter, ENA_REGS_RESET_ADMIN_TO);
+ counter_u64_add(adapter->dev_stats.admin_q_pause, 1);
+ if (ena_com_get_missing_admin_interrupt(adapter->ena_dev))
+ reset_reason = ENA_REGS_RESET_MISSING_ADMIN_INTERRUPT;
+
+ ena_trigger_reset(adapter, reset_reason);
}
}
diff --git a/sys/dev/ena/ena.h b/sys/dev/ena/ena.h
index b747736224d8..1a436a702ba1 100644
--- a/sys/dev/ena/ena.h
+++ b/sys/dev/ena/ena.h
@@ -391,6 +391,8 @@ struct ena_stats_dev {
counter_u64_t missing_intr;
counter_u64_t tx_desc_malformed;
counter_u64_t rx_desc_malformed;
+ counter_u64_t missing_admin_interrupt;
+ counter_u64_t admin_to;
};
struct ena_hw_stats {
@@ -542,7 +544,7 @@ struct ena_reset_stats_offset {
static const struct ena_reset_stats_offset resets_to_stats_offset_map[ENA_REGS_RESET_LAST] = {
ENA_RESET_STATS_ENTRY(ENA_REGS_RESET_KEEP_ALIVE_TO, wd_expired),
- ENA_RESET_STATS_ENTRY(ENA_REGS_RESET_ADMIN_TO, admin_q_pause),
+ ENA_RESET_STATS_ENTRY(ENA_REGS_RESET_ADMIN_TO, admin_to),
ENA_RESET_STATS_ENTRY(ENA_REGS_RESET_OS_TRIGGER, os_trigger),
ENA_RESET_STATS_ENTRY(ENA_REGS_RESET_MISS_TX_CMPL, missing_tx_cmpl),
ENA_RESET_STATS_ENTRY(ENA_REGS_RESET_INV_RX_REQ_ID, bad_rx_req_id),
@@ -552,6 +554,7 @@ static const struct ena_reset_stats_offset resets_to_stats_offset_map[ENA_REGS_R
ENA_RESET_STATS_ENTRY(ENA_REGS_RESET_MISS_INTERRUPT, missing_intr),
ENA_RESET_STATS_ENTRY(ENA_REGS_RESET_TX_DESCRIPTOR_MALFORMED, tx_desc_malformed),
ENA_RESET_STATS_ENTRY(ENA_REGS_RESET_RX_DESCRIPTOR_MALFORMED, rx_desc_malformed),
+ ENA_RESET_STATS_ENTRY(ENA_REGS_RESET_MISSING_ADMIN_INTERRUPT, missing_admin_interrupt),
};
int ena_up(struct ena_adapter *adapter);
diff --git a/sys/dev/ena/ena_sysctl.c b/sys/dev/ena/ena_sysctl.c
index 79c167221a0f..b9c880e2e8e4 100644
--- a/sys/dev/ena/ena_sysctl.c
+++ b/sys/dev/ena/ena_sysctl.c
@@ -298,6 +298,10 @@ ena_sysctl_add_stats(struct ena_adapter *adapter)
&dev_stats->tx_desc_malformed, "TX descriptors malformed count");
SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "rx_desc_malformed", CTLFLAG_RD,
&dev_stats->rx_desc_malformed, "RX descriptors malformed count");
+ SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "missing_admin_interrupt", CTLFLAG_RD,
+ &dev_stats->missing_admin_interrupt, "Missing admin interrupts count");
+ SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "admin_to", CTLFLAG_RD,
+ &dev_stats->admin_to, "Admin queue timeouts count");
SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "total_resets", CTLFLAG_RD,
&dev_stats->total_resets, "Total resets count");