git: 22054f88914b - main - Report I/O stats from the CAM_IOSCHED_DYNAMIC extension

From: Warner Losh <imp_at_FreeBSD.org>
Date: Mon, 28 Feb 2022 17:50:40 UTC
The branch main has been updated by imp:

URL: https://cgit.FreeBSD.org/src/commit/?id=22054f88914b51113f77f6eccc11353a891f9f3e

commit 22054f88914b51113f77f6eccc11353a891f9f3e
Author:     Warner Losh <imp@FreeBSD.org>
AuthorDate: 2022-02-28 17:17:06 +0000
Commit:     Warner Losh <imp@FreeBSD.org>
CommitDate: 2022-02-28 17:44:47 +0000

    Report I/O stats from the CAM_IOSCHED_DYNAMIC extension
    
    Report, on a periodic basis, the I/O latencies the CAM I/O scheduler
    computes. These times are only for the hardware portion of the I/O as
    measured from the time the operation is scheduled with the SIM using
    xpt_action() until the SIM reports it has completed with xpt_dine(). Any
    time the I/O operation spends in a software queue is no included.
    
    The P50 (median), P90, P99 and P99.9 statistics about the latency of
    each of the read, write and trim operations that completed during the
    polling interval are reported. If there are fewer than 2, 10, 100 or
    1000 operations during the polling interval, no statistic is reported
    and a single dash '-' is displayed.
    
    The read, write and trim commands (either on the command line or at run
    time) toggle display of these operations. The color command toggles
    color (it defaults to on, like gstat). When color is enabled, unknown
    statistics are reported in blue, high latency for a statistics is
    reported in red, medium in magenta and low in green (as with gstat). The
    med= and hi= commands can set these latency thresholds.
    
    Limitations: The entire sysctl space for all the devices is walked for
    each polling period. This should be optimized to remember the OIDs and
    only do such polling with the xpt generation changes. There is also no
    way to filter devices displayed. This command only works on physical
    devies that are connected to SCSI, ATA or NVME sims as those are the
    only ones that are instrumented in the CAM I/O scheduler (the
    CAM_IOSCHED_DYNAMIC option must be in the kernel, and the dynamic
    scheduler can't be disabled).
    
    MFC After:              1 month
    Relnotes:               yes
    Sponsored by:           Netflix
    Reviewed by:            pauamma_gundo.com, chs
    Differential Revision:  https://reviews.freebsd.org/D34259
---
 usr.bin/systat/Makefile |   2 +-
 usr.bin/systat/cmdtab.c |   3 +
 usr.bin/systat/extern.h |   2 +
 usr.bin/systat/iolat.c  | 520 ++++++++++++++++++++++++++++++++++++++++++++++++
 usr.bin/systat/main.c   |   8 +
 usr.bin/systat/systat.1 |  60 ++++++
 6 files changed, 594 insertions(+), 1 deletion(-)

diff --git a/usr.bin/systat/Makefile b/usr.bin/systat/Makefile
index e0575ff0d15a..a0abf22177a8 100644
--- a/usr.bin/systat/Makefile
+++ b/usr.bin/systat/Makefile
@@ -4,7 +4,7 @@
 .include <src.opts.mk>
 
 PROG=	systat
-SRCS=	cmds.c cmdtab.c devs.c fetch.c iostat.c keyboard.c main.c sysput.c \
+SRCS=	cmds.c cmdtab.c devs.c fetch.c iolat.c iostat.c keyboard.c main.c sysput.c \
 	netcmds.c netstat.c pigs.c proc.c swap.c icmp.c \
 	mode.c ip.c sctp.c tcp.c zarc.c \
 	vmstat.c convtbl.c ifcmds.c ifstat.c
diff --git a/usr.bin/systat/cmdtab.c b/usr.bin/systat/cmdtab.c
index 52329948748b..068823c73874 100644
--- a/usr.bin/systat/cmdtab.c
+++ b/usr.bin/systat/cmdtab.c
@@ -83,6 +83,9 @@ struct	cmdtab cmdtab[] = {
 	{ "zarc",	showzarc,	fetchzarc,	labelzarc,
 	  initzarc,	openzarc,	closezarc,	0,
 	  resetzarc,	CF_ZFSARC },
+	{ "iolat",	showiolat,	fetchiolat,	labeliolat,
+	  initiolat,	openiolat,	closeiolat,	cmdiolat,
+	  0,		CF_LOADAV },
 	{ NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0 }
 };
 struct  cmdtab *curcmd = &cmdtab[0];
diff --git a/usr.bin/systat/extern.h b/usr.bin/systat/extern.h
index 3b5597598b8b..fc58b7a2af9c 100644
--- a/usr.bin/systat/extern.h
+++ b/usr.bin/systat/extern.h
@@ -88,6 +88,7 @@ void	 closepigs(WINDOW *);
 void	 closeswap(WINDOW *);
 void	 closetcp(WINDOW *);
 int	 cmdifstat(const char *, const char *);
+int	 cmdiolat(const char *, const char *);
 int	 cmdiostat(const char *, const char *);
 int	 cmdkre(const char *, const char *);
 int	 cmdnetstat(const char *, const char *);
@@ -188,3 +189,4 @@ void	 sysputwuint64(WINDOW* , int, int, int, uint64_t, int);
 
 SYSTAT_CMD( zarc );
 SYSTAT_CMD( sctp );
+SYSTAT_CMD( iolat );
diff --git a/usr.bin/systat/iolat.c b/usr.bin/systat/iolat.c
new file mode 100644
index 000000000000..9535297df3ee
--- /dev/null
+++ b/usr.bin/systat/iolat.c
@@ -0,0 +1,520 @@
+/*
+ * Copyright (c) 2021 Netflix, Inc
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+
+#include <sys/param.h>
+#include <sys/sysctl.h>
+#include <sys/resource.h>
+
+#include <devstat.h>
+#include <err.h>
+#include <errno.h>
+#include <math.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <sys/queue.h>
+#include <sys/sysctl.h>
+
+#include "systat.h"
+#include "extern.h"
+#include "devs.h"
+
+#define CAM_BASE "kern.cam"
+#define LATENCY ".latencies"
+#define CAM_IOSCHED_BASE "kern.cam.iosched.bucket_base_us"
+
+#define DEV_NAMSIZE	32
+#define OP_NAMSIZE	16
+#define MAX_LATS	32
+
+static double high_thresh = 500;
+static double med_thresh = 300;
+static bool docolor = true;
+
+static int ndevs;
+static SLIST_HEAD(, iosched_stat)	curlist;
+
+struct iosched_op_stat {
+	int		nlats;
+	uint64_t	lats[MAX_LATS];
+	uint64_t	prev_lats[MAX_LATS];
+};
+
+enum { OP_READ = 0, OP_WRITE, OP_TRIM, NUM_OPS };
+static const char *ops[NUM_OPS] = { "read", "write", "trim" };
+#define OP_READ_MASK (1 << OP_READ)
+#define OP_WRITE_MASK (1 << OP_WRITE)
+#define OP_TRIM_MASK (1 << OP_TRIM)
+
+static uint32_t flags = OP_READ_MASK | OP_WRITE_MASK | OP_TRIM_MASK;
+
+struct iosched_stat {
+	SLIST_ENTRY(iosched_stat)	 link;
+	char		dev_name[DEV_NAMSIZE];
+	int		unit;
+	struct iosched_op_stat op_stats[NUM_OPS];
+};
+
+static int	name2oid(const char *, int *);
+static int	walk_sysctl(int *, size_t);
+
+static int
+name2oid(const char *name, int *oidp)
+{
+	int oid[2];
+	int i;
+	size_t j;
+
+	oid[0] = CTL_SYSCTL;
+	oid[1] = CTL_SYSCTL_NAME2OID;
+
+	j = CTL_MAXNAME * sizeof(int);
+	i = sysctl(oid, 2, oidp, &j, name, strlen(name));
+	if (i < 0)
+		return (i);
+	j /= sizeof(int);
+	return (j);
+}
+
+static size_t /* Includes the trailing NUL */
+oid2name(int *oid, size_t nlen, char *name, size_t namlen)
+{
+	int qoid[CTL_MAXNAME + 2];
+	int i;
+	size_t j;
+
+	bzero(name, namlen);
+	qoid[0] = CTL_SYSCTL;
+	qoid[1] = CTL_SYSCTL_NAME;
+	memcpy(qoid + 2, oid, nlen * sizeof(int));
+	j = namlen;
+	i = sysctl(qoid, nlen + 2, name, &j, 0, 0);
+	if (i || !j)
+		err(1, "sysctl name %d %zu %d", i, j, errno);
+	return (j);
+}
+
+static int
+oidfmt(int *oid, int len, u_int *kind)
+{
+	int qoid[CTL_MAXNAME+2];
+	u_char buf[BUFSIZ];
+	int i;
+	size_t j;
+
+	qoid[0] = CTL_SYSCTL;
+	qoid[1] = CTL_SYSCTL_OIDFMT;
+	memcpy(qoid + 2, oid, len * sizeof(int));
+
+	j = sizeof(buf);
+	i = sysctl(qoid, len + 2, buf, &j, 0, 0);
+	if (i)
+		err(1, "sysctl fmt %d %zu %d", i, j, errno);
+	*kind = *(u_int *)buf;
+	return (0);
+}
+
+static int
+split_u64(char *str, const char *delim, uint64_t *buckets, int *nbuckets)
+{
+	int n = *nbuckets, i;
+	char *v;
+
+	memset(buckets, 0, n * sizeof(buckets[0]));
+	for (i = 0; (v = strsep(&str, delim)) != NULL && i < n; i++) {
+		buckets[i] = strtoull(v, NULL, 10);
+	}
+	if (i < n)
+		*nbuckets = i;
+	return (i < n);
+}
+
+static double baselat = 0.000020;
+
+static float
+pest(int permill, uint64_t *lats, int nlat)
+{
+	uint64_t tot, samp;
+	int i;
+	float b1, b2;
+
+	for (tot = 0, i = 0; i < nlat; i++)
+		tot += lats[i];
+	if (tot == 0)
+		return -nanf("");
+	if (tot < (uint64_t)2000 / (1000 - permill))
+		return nanf("");
+	samp = tot * permill / 1000;
+	if (samp < lats[0])
+		return baselat * (float)samp / lats[0]; /* linear interpolation 0 and baselat */
+	for (tot = 0, i = 0; samp >= tot && i < nlat; i++)
+		tot += lats[i];
+	i--;
+	b1 = baselat * (1 << (i - 1));
+	b2 = baselat * (1 << i);
+	/* Should expoentially interpolate between buckets -- doing linear instead */
+	return b1 + (b2 - b1) * (float)(lats[i] - (tot - samp)) / lats[i];
+}
+
+static int
+op2num(const char *op)
+{
+	for (int i = 0; i < NUM_OPS; i++)
+		if (strcmp(op, ops[i]) == 0)
+			return i;
+	return -1;
+}
+
+static struct iosched_op_stat *
+find_dev(const char *dev, int unit, int op)
+{
+	struct iosched_stat *isp;
+	struct iosched_op_stat *iosp;
+
+	SLIST_FOREACH(isp, &curlist, link) {
+		if (strcmp(isp->dev_name, dev) != 0 || isp->unit != unit)
+			continue;
+		iosp = &isp->op_stats[op];
+		return iosp;
+	}
+	return NULL;
+}
+
+static struct iosched_op_stat *
+alloc_dev(const char *dev, int unit, int op)
+{
+	struct iosched_stat *isp;
+	struct iosched_op_stat *iosp;
+
+	isp = malloc(sizeof(*isp));
+	if (isp == NULL)
+		return NULL;
+	strlcpy(isp->dev_name, dev, sizeof(isp->dev_name));
+	isp->unit = unit;
+	SLIST_INSERT_HEAD(&curlist, isp, link);
+	ndevs++;
+	iosp = &isp->op_stats[op];
+	return iosp;
+}
+
+#define E3 1000.0
+static void
+update_dev(const char *dev, int unit, int op, uint64_t *lats, int nlat)
+{
+	struct iosched_op_stat *iosp;
+
+	iosp = find_dev(dev, unit, op);
+	if (iosp == NULL)
+		iosp = alloc_dev(dev, unit, op);
+	if (iosp == NULL)
+		return;
+	iosp->nlats = nlat;
+	memcpy(iosp->prev_lats, iosp->lats, iosp->nlats * sizeof(uint64_t));
+	memcpy(iosp->lats, lats, iosp->nlats * sizeof(uint64_t));
+//	printf("%s%d: %-6s %.3f %.3f %.3f %.3f\r\n",
+//	    dev, unit, operation, E3 * pest(500, lats, nlat), E3 * pest(900, lats, nlat),
+//	    E3 * pest(990, lats, nlat), E3 * pest(999, lats, nlat));
+}
+
+static int
+walk_sysctl(int *base_oid, size_t len)
+{
+	int qoid[CTL_MAXNAME + 2], oid[CTL_MAXNAME];
+	size_t l1, l2;
+	char name[BUFSIZ];
+
+	if (len > CTL_MAXNAME)
+		err(1, "Length %zd too long", len);
+
+	qoid[0] = CTL_SYSCTL;
+	qoid[1] = CTL_SYSCTL_NEXT;
+	l1 = 2;
+	memcpy(qoid + 2, base_oid, len * sizeof(int));
+	l1 += len;
+	for (;;) {
+		/*
+		 * Get the next one or return when we get to the end of the
+		 * sysctls in the kernel.
+		 */
+		l2 = sizeof(oid);
+		if (sysctl(qoid, l1, oid, &l2, 0, 0) != 0) {
+			if (errno == ENOENT)
+				return (0);
+			err(1, "sysctl(getnext) %zu", l2);
+		}
+
+		l2 /= sizeof(int);
+
+		/*
+		 * Bail if we're seeing OIDs that don't have the
+		 * same prefix or can't have the same prefix.
+		 */
+		if (l2 < len ||
+		    memcmp(oid, base_oid, len * sizeof(int)) != 0)
+			return (0);
+
+		/*
+		 * Get the name, validate it's one we're looking for,
+		 * parse the latency and add to list.
+		 */
+		do {
+			int nlat;
+			size_t l3;
+			char val[BUFSIZ];
+			char *walker, *dev, *opstr;
+			uint64_t latvals[MAX_LATS];
+			u_int kind;
+			int unit, op;
+
+			l1 = oid2name(oid, l2, name, sizeof(name));
+			if (strcmp(name + l1 - strlen(LATENCY) - 1, LATENCY) != 0)
+				break;
+			if (oidfmt(oid, l2, &kind) != 0)
+				err(1, "oidfmt");
+			if ((kind & CTLTYPE) != CTLTYPE_STRING)
+				errx(1, "string");
+			l3 = sizeof(val);
+			if (sysctl(oid, l2, val, &l3, 0, 0) != 0)
+				err(1, "sysctl");
+			val[l3] = '\0';
+			nlat = nitems(latvals);
+			if (split_u64(val, ",", latvals, &nlat) == 0)
+				break;
+			walker = name + strlen(CAM_BASE) + 1;
+			dev = strsep(&walker, ".");
+			unit = (int)strtol(strsep(&walker, "."), NULL, 10);
+			strsep(&walker, ".");
+			opstr = strsep(&walker, ".");
+			op = op2num(opstr);
+			if (op < 0)
+				break;
+			update_dev(dev, unit, op, latvals, nlat);
+		} while (false);
+
+		memcpy(qoid + 2, oid, l2 * sizeof(int));
+		l1 = 2 + l2;
+	}
+}
+
+void
+closeiolat(WINDOW *w)
+{
+	if (w == NULL)
+		return;
+	wclear(w);
+	wrefresh(w);
+	delwin(w);
+}
+
+static void
+doublecmd(const char *cmd, double *v)
+{
+	const char *p;
+	double tv;
+
+	p = strchr(cmd, '=');
+	if (p == NULL)
+		return;	/* XXX Tell the user something? */
+	if (sscanf(p + 1, "%lf", &tv) != 1)
+		return;	/* XXX Tell the user something? */
+	*v = tv;
+}
+
+int
+cmdiolat(const char *cmd __unused, const char *args __unused)
+{
+	fprintf(stderr, "CMD IS '%s'\n\n", cmd);
+	if (prefix(cmd, "trim"))
+		flags ^= OP_TRIM_MASK;
+	else if (prefix(cmd, "read"))
+		flags ^= OP_READ_MASK;
+	else if (prefix(cmd, "write"))
+		flags ^= OP_WRITE_MASK;
+	else if (prefix(cmd, "color"))
+		docolor = !docolor;
+	else if (prefix("high", cmd))
+		doublecmd(cmd, &high_thresh);
+	else if (prefix("med", cmd))
+		doublecmd(cmd, &med_thresh);
+	else
+		return (0);
+	wclear(wnd);
+	labeliolat();
+	refresh();
+	return (1);
+}
+
+int
+initiolat(void)
+{
+	int cam[CTL_MAXNAME];
+	uint64_t sbt_base;
+	size_t len = sizeof(sbt_base);
+
+	SLIST_INIT(&curlist);
+
+	baselat = 1e-3;		/* old default */
+	if (sysctlbyname(CAM_IOSCHED_BASE, &sbt_base, &len, NULL, 0) == 0)
+		baselat = sbt_base * 1e-6;	/* Convert to microseconds */
+
+	name2oid(CAM_BASE, cam);
+	walk_sysctl(cam, 2);
+	return (1);
+}
+
+void
+fetchiolat(void)
+{
+	int cam[CTL_MAXNAME];
+
+	name2oid(CAM_BASE, cam);
+	walk_sysctl(cam, 2);
+}
+
+#define	INSET	10
+
+void
+labeliolat(void)
+{
+	int _col, ndrives, lpr, row, j;
+	int regions __unused;
+	struct iosched_stat *isp;
+	char tmpstr[32];
+#define COLWIDTH	29
+#define DRIVESPERLINE	((getmaxx(wnd) - 1 - INSET) / COLWIDTH)
+	ndrives = ndevs; // XXX FILTER XXX
+	regions = howmany(ndrives, DRIVESPERLINE);
+	lpr = 2; /* for headers */
+	for (int i = 0; i < NUM_OPS; i++) {
+		if (flags & (1 << i))
+			lpr++;
+	}
+	row = 0;
+	_col = INSET;
+	j = 2;
+	if (flags & OP_READ_MASK)
+		mvwaddstr(wnd, row + j++, 1, "read");
+	if (flags & OP_WRITE_MASK)
+		mvwaddstr(wnd, row + j++, 1, "write");
+	if (flags & OP_TRIM_MASK)
+		mvwaddstr(wnd, row + j++, 1, "trim");
+	SLIST_FOREACH(isp, &curlist, link) {
+		if (_col + COLWIDTH >= getmaxx(wnd) - 1 - INSET) {
+			_col = INSET;
+			row += lpr + 1;
+			if (row > getmaxy(wnd) - 1 - (lpr + 1))
+				break;
+			j = 2;
+			if (flags & OP_READ_MASK)
+				mvwaddstr(wnd, row + j++, 1, "read");
+			if (flags & OP_WRITE_MASK)
+				mvwaddstr(wnd, row + j++, 1, "write");
+			if (flags & OP_TRIM_MASK)
+				mvwaddstr(wnd, row + j++, 1, "trim");
+		}
+		snprintf(tmpstr, sizeof(tmpstr), "%s%d", isp->dev_name, isp->unit);
+		mvwaddstr(wnd, row, _col + (COLWIDTH - strlen(tmpstr)) / 2, tmpstr);
+		mvwaddstr(wnd, row + 1, _col, "   p50    p90    p99  p99.9");
+		_col += COLWIDTH;
+	}
+}
+
+WINDOW *
+openiolat(void)
+{
+	return (subwin(stdscr, LINES-3-1, 0, MAINWIN_ROW, 0));
+}
+
+static void
+fmt(float f, char *buf, size_t len)
+{
+	if (isnan(f))
+		strlcpy(buf, "   -  ", len);
+	else if (f >= 1000.0)
+		snprintf(buf, len, "%6d", (int)f);
+	else if (f >= 100.0)
+		snprintf(buf, len, "%6.1f", f);
+	else if (f >= 10.0)
+		snprintf(buf, len, "%6.2f", f);
+	else
+		snprintf(buf, len, "%6.3f", f);
+}
+
+static void
+latout(double lat, int y, int x)
+{
+	int i;
+	char tmpstr[32];
+
+	fmt(lat, tmpstr, sizeof(tmpstr));
+	if (isnan(lat))
+		i = 4;
+	else if (lat > high_thresh)
+		i = 3;
+	else if (lat > med_thresh)
+		i = 2;
+	else
+		i = 1;
+	if (docolor)
+		wattron(wnd, COLOR_PAIR(i));
+	mvwaddstr(wnd, y, x, tmpstr);
+	if (docolor)
+		wattroff(wnd, COLOR_PAIR(i));
+}
+
+void
+showiolat(void)
+{
+	int _col, ndrives, lpr, row, k;
+	int regions __unused;
+	struct iosched_stat *isp;
+	struct iosched_op_stat *iosp;
+#define COLWIDTH	29
+#define DRIVESPERLINE	((getmaxx(wnd) - 1 - INSET) / COLWIDTH)
+	ndrives = ndevs; // XXX FILTER XXX
+	regions = howmany(ndrives, DRIVESPERLINE);
+	lpr = 2; /* XXX */
+	for (int i = 0; i < NUM_OPS; i++) {
+		if (flags & (1 << i))
+			lpr++;
+	}
+	row = 0;
+	_col = INSET;
+	SLIST_FOREACH(isp, &curlist, link) {
+		if (_col + COLWIDTH >= getmaxx(wnd) - 1 - INSET) {
+			_col = INSET;
+			row += lpr + 1;
+			if (row > getmaxy(wnd) - 1 - (lpr + 1))
+				break;
+		}
+		k = 2;
+		for (int i = 0; i < NUM_OPS; i++) {
+			uint64_t lats[MAX_LATS];
+			int nlats;
+			float p50, p90, p99, p999;
+
+			if ((flags & (1 << i)) == 0)
+				continue;
+			iosp = &isp->op_stats[i];
+			nlats = iosp->nlats;
+			memset(lats, 0, sizeof(lats));
+			for (int j = 0; j < iosp->nlats; j++)
+				lats[j] = iosp->lats[j] - iosp->prev_lats[j];
+			p50 = pest(500, lats, nlats) * E3;
+			p90 = pest(900, lats, nlats) * E3;
+			p99 = pest(990, lats, nlats) * E3;
+			p999 = pest(999, lats, nlats) * E3;
+			latout(p50, row + k, _col);
+			latout(p90, row + k, _col + 7);
+			latout(p99, row + k, _col + 14);
+			latout(p999, row + k, _col + 21);
+			k++;
+		}
+		_col += COLWIDTH;
+	}
+}
diff --git a/usr.bin/systat/main.c b/usr.bin/systat/main.c
index 3266fa187714..a52fbd7ebc4d 100644
--- a/usr.bin/systat/main.c
+++ b/usr.bin/systat/main.c
@@ -155,6 +155,7 @@ main(int argc, char **argv)
 	char errbuf[_POSIX2_LINE_MAX], dummy;
 	size_t	size;
 	struct cmdentry *cmd = NULL;
+	short cf, cb;
 
 	(void) setlocale(LC_ALL, "");
 
@@ -214,6 +215,13 @@ main(int argc, char **argv)
 	 * routines to minimize update work by curses.
 	 */
 	initscr();
+	start_color();
+	use_default_colors();
+	pair_content(0, &cf, &cb);
+	init_pair(1, COLOR_GREEN, cb);
+	init_pair(2, COLOR_MAGENTA, cb);
+	init_pair(3, COLOR_RED, cb);
+	init_pair(4, COLOR_BLUE, cb);
 	CMDLINE = LINES - 1;
 	wnd = (*curcmd->c_open)();
 	if (wnd == NULL) {
diff --git a/usr.bin/systat/systat.1 b/usr.bin/systat/systat.1
index 1f6ddaeafb1a..9f866727bd89 100644
--- a/usr.bin/systat/systat.1
+++ b/usr.bin/systat/systat.1
@@ -88,6 +88,7 @@ to be one of:
 .Ic icmp ,
 .Ic icmp6 ,
 .Ic ifstat ,
+.Ic iolat ,
 .Ic iostat ,
 .Ic ip ,
 .Ic ip6 ,
@@ -244,6 +245,59 @@ but with SCTP statistics.
 Like
 .Ic icmp ,
 but with TCP statistics.
+.It Ic iolat
+Display statistics describing the hardware latencies of I/O operations as
+computed by the
+.Va CAM_IOSCHED_DYNAMIC
+option.
+This option must be in the kernel config file of the running kernel for this
+display to work.
+All devices are displayed as there is currently no way to filter them.
+The statistics displayed for the I/O latencies are the percentiles with
+sufficient data during the polling interval to compute.
+If a value cannot be estimated ``-'' is displayed.
+The P50 (also known as the median), P90, P99 and P99.9 values are computed if
+more than 2, 10, 100 or 1000 operations occurred during the polling interval.
+The latency is the hardware latency values, and does not include any software
+queueing time.
+The latencies are estimated based on histogram data computed by the CAM I/O
+scheduler and represent estimates of the actual value that are only good to
+two or three significant digits.
+The display of latency changes based on the scale of the latency to reflect
+the precision of the estimates and to fit on the available screen space.
+All latencies are reported in milliseconds.
+When color is enabled
+.Bl -bullet
+.It Values below the medium latency threshold are displayed in green.
+.It Values between the minimum latency and high latency thresholds are displayed
+in magenta.
+.It Values above the high latency thresholds are displayed in red.
+.Pp
+When color is disabled, the default foreground and background colors are always
+used.
+.Pp
+The following commands are specific to the
+.Ic iolat
+display; the minimum unambiguous prefix may be supplied.
+.Pp
+.Bl -tag -width Fl -compact
+.It Cm color
+Toggle the use of color in the display.
+The default is on.
+.It Cm hi=XXX
+Set the high latency threshold to XXX milliseconds.
+.It Cm med=XXX
+Set the medium latency threshold to XXX milliseconds.
+.It Cm read
+Toggle the display of statistics about read operations.
+The default is on.
+.It Cm write
+Toggle the display of statistics about write operations.
+The default is on.
+.It Cm trim
+Toggle the display of statistics about trim operations.
+The default is on.
+.El
 .It Ic iostat
 Display, in the lower window, statistics about processor use
 and disk throughput.
@@ -682,3 +736,9 @@ The
 .Ic vmstat
 display looks out of place because it is (it was added in as
 a separate display rather than created as a new program).
+The
+.Ic iolat
+command doesn't implement the common device commands including
+filtering, as it doesn't use the
+.Xr devstat 3
+mechanism to obtain its statistics.