git: e82644e59ece - main - cam/iosched: Add a counter of I/Os that take too long
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Sat, 20 Jul 2024 02:59:06 UTC
The branch main has been updated by imp:
URL: https://cgit.FreeBSD.org/src/commit/?id=e82644e59ece5cdc67250262508e81fa22deea90
commit e82644e59ece5cdc67250262508e81fa22deea90
Author: Warner Losh <imp@FreeBSD.org>
AuthorDate: 2024-07-20 02:52:40 +0000
Commit: Warner Losh <imp@FreeBSD.org>
CommitDate: 2024-07-20 02:53:37 +0000
cam/iosched: Add a counter of I/Os that take too long
Add kern.cam.DEV.UNIT.iosched.too_long (to count I/Os taking too long)
and kern.cam.DEV.UNIT.bad_latency (to set this threshold, defaults to
500ms). Each class of I/O (read, write, trim) has its own counters and
thresholds.
Sponsored by: Netflix
Reviewed by: jhb
Differential Revision: https://reviews.freebsd.org/D46033
---
sys/cam/cam_iosched.c | 21 +++++++++++++++++++++
1 file changed, 21 insertions(+)
diff --git a/sys/cam/cam_iosched.c b/sys/cam/cam_iosched.c
index 471e6c355d69..022eb23cb621 100644
--- a/sys/cam/cam_iosched.c
+++ b/sys/cam/cam_iosched.c
@@ -271,6 +271,9 @@ struct iop_stats {
sbintime_t emvar;
sbintime_t sd; /* Last computed sd */
+ uint64_t too_long; /* Number of I/Os greater than bad lat threshold */
+ sbintime_t bad_latency; /* Latency threshold */
+
uint32_t state_flags;
#define IOP_RATE_LIMITED 1u
@@ -856,6 +859,7 @@ cam_iosched_iop_stats_init(struct cam_iosched_softc *isc, struct iop_stats *ios)
ios->total = 0;
ios->ema = 0;
ios->emvar = 0;
+ ios->bad_latency = SBT_1S / 2; /* Default to 500ms */
ios->softc = isc;
cam_iosched_limiter_init(ios);
}
@@ -1046,6 +1050,15 @@ cam_iosched_iop_stats_sysctl_init(struct cam_iosched_softc *isc, struct iop_stat
OID_AUTO, "errs", CTLFLAG_RD,
&ios->errs, 0,
"# of transactions completed with an error");
+ SYSCTL_ADD_U64(ctx, n,
+ OID_AUTO, "too_long", CTLFLAG_RD,
+ &ios->too_long, 0,
+ "# of transactions completed took too long");
+ SYSCTL_ADD_PROC(ctx, n,
+ OID_AUTO, "bad_latency",
+ CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE,
+ &ios->bad_latency, 0, cam_iosched_sbintime_sysctl, "A",
+ "Threshold for counting transactions that took too long (in us)");
SYSCTL_ADD_PROC(ctx, n,
OID_AUTO, "limiter",
@@ -1916,6 +1929,14 @@ cam_iosched_update(struct iop_stats *iop, sbintime_t sim_latency)
sbintime_t y, deltasq, delta;
int i;
+ /*
+ * Simple threshold: count the number of events that excede the
+ * configured threshold.
+ */
+ if (sim_latency > iop->bad_latency) {
+ iop->too_long++;
+ }
+
/*
* Keep counts for latency. We do it by power of two buckets.
* This helps us spot outlier behavior obscured by averages.