svn commit: r315735 - head/sys/cam
Warner Losh
imp at FreeBSD.org
Wed Mar 22 19:18:48 UTC 2017
Author: imp
Date: Wed Mar 22 19:18:47 2017
New Revision: 315735
URL: https://svnweb.freebsd.org/changeset/base/315735
Log:
Implement moving SD.
From the paper "Incremental calculation of weighted mean and variance"
by Tony Finch Februrary 2009, retrieved from
http://people.ds.cam.ac.uk/fanf2/hermes/doc/antiforgery/stats.pdf
converted to use shifting.
Modified:
head/sys/cam/cam_iosched.c
Modified: head/sys/cam/cam_iosched.c
==============================================================================
--- head/sys/cam/cam_iosched.c Wed Mar 22 19:17:13 2017 (r315734)
+++ head/sys/cam/cam_iosched.c Wed Mar 22 19:18:47 2017 (r315735)
@@ -93,6 +93,7 @@ SYSCTL_INT(_kern_cam, OID_AUTO, do_dynam
* For a brief intro: https://en.wikipedia.org/wiki/Moving_average
*
* [*] Steal from the load average code and many other places.
+ * Note: See computation of EMA and EMVAR for acceptable ranges of alpha.
*/
static int alpha_bits = 9;
TUNABLE_INT("kern.cam.iosched_alpha_bits", &alpha_bits);
@@ -226,7 +227,7 @@ struct iop_stats {
*/
/* Exp Moving Average, see alpha_bits for more details */
sbintime_t ema;
- sbintime_t emss; /* Exp Moving sum of the squares */
+ sbintime_t emvar;
sbintime_t sd; /* Last computed sd */
uint32_t state_flags;
@@ -755,8 +756,7 @@ cam_iosched_iop_stats_init(struct cam_io
ios->queued = 0;
ios->total = 0;
ios->ema = 0;
- ios->emss = 0;
- ios->sd = 0;
+ ios->emvar = 0;
ios->softc = isc;
}
@@ -897,13 +897,9 @@ cam_iosched_iop_stats_sysctl_init(struct
&ios->ema,
"Fast Exponentially Weighted Moving Average");
SYSCTL_ADD_UQUAD(ctx, n,
- OID_AUTO, "emss", CTLFLAG_RD,
- &ios->emss,
- "Fast Exponentially Weighted Moving Sum of Squares (maybe wrong)");
- SYSCTL_ADD_UQUAD(ctx, n,
- OID_AUTO, "sd", CTLFLAG_RD,
- &ios->sd,
- "Estimated SD for fast ema (may be wrong)");
+ OID_AUTO, "emvar", CTLFLAG_RD,
+ &ios->emvar,
+ "Fast Exponentially Weighted Moving Variance");
SYSCTL_ADD_INT(ctx, n,
OID_AUTO, "pending", CTLFLAG_RD,
@@ -1523,35 +1519,6 @@ isqrt64(uint64_t val)
return res;
}
-/*
- * a and b are 32.32 fixed point stored in a 64-bit word.
- * Let al and bl be the .32 part of a and b.
- * Let ah and bh be the 32 part of a and b.
- * R is the radix and is 1 << 32
- *
- * a * b
- * (ah + al / R) * (bh + bl / R)
- * ah * bh + (al * bh + ah * bl) / R + al * bl / R^2
- *
- * After multiplicaiton, we have to renormalize by multiply by
- * R, so we wind up with
- * ah * bh * R + al * bh + ah * bl + al * bl / R
- * which turns out to be a very nice way to compute this value
- * so long as ah and bh are < 65536 there's no loss of high bits
- * and the low order bits are below the threshold of caring for
- * this application.
- */
-static uint64_t
-mul(uint64_t a, uint64_t b)
-{
- uint64_t al, ah, bl, bh;
- al = a & 0xffffffff;
- ah = a >> 32;
- bl = b & 0xffffffff;
- bh = b >> 32;
- return ((ah * bh) << 32) + al * bh + ah * bl + ((al * bl) >> 32);
-}
-
static sbintime_t latencies[] = {
SBT_1MS << 0,
SBT_1MS << 1,
@@ -1569,8 +1536,7 @@ static sbintime_t latencies[] = {
static void
cam_iosched_update(struct iop_stats *iop, sbintime_t sim_latency)
{
- sbintime_t y, yy;
- uint64_t var;
+ sbintime_t y, deltasq, delta;
int i;
/*
@@ -1591,42 +1557,61 @@ cam_iosched_update(struct iop_stats *iop
* (2 ^ -alpha_bits). For more info see the NIST statistical
* handbook.
*
- * ema_t = y_t * alpha + ema_t-1 * (1 - alpha)
+ * ema_t = y_t * alpha + ema_t-1 * (1 - alpha) [nist]
+ * ema_t = y_t * alpha + ema_t-1 - alpha * ema_t-1
+ * ema_t = alpha * y_t - alpha * ema_t-1 + ema_t-1
* alpha = 1 / (1 << alpha_bits)
+ * sub e == ema_t-1, b == 1/alpha (== 1 << alpha_bits), d == y_t - ema_t-1
+ * = y_t/b - e/b + be/b
+ * = (y_t - e + be) / b
+ * = (e + d) / b
*
* Since alpha is a power of two, we can compute this w/o any mult or
* division.
+ *
+ * Variance can also be computed. Usually, it would be expressed as follows:
+ * diff_t = y_t - ema_t-1
+ * emvar_t = (1 - alpha) * (emavar_t-1 + diff_t^2 * alpha)
+ * = emavar_t-1 - alpha * emavar_t-1 + delta_t^2 * alpha - (delta_t * alpha)^2
+ * sub b == 1/alpha (== 1 << alpha_bits), e == emavar_t-1, d = delta_t^2
+ * = e - e/b + dd/b + dd/bb
+ * = (bbe - be + bdd + dd) / bb
+ * = (bbe + b(dd-e) + dd) / bb (which is expanded below bb = 1<<(2*alpha_bits))
+ */
+ /*
+ * XXX possible numeric issues
+ * o We assume right shifted integers do the right thing, since that's
+ * implementation defined. You can change the right shifts to / (1LL << alpha).
+ * o alpha_bits = 9 gives ema ceiling of 23 bits of seconds for ema and 14 bits
+ * for emvar. This puts a ceiling of 13 bits on alpha since we need a
+ * few tens of seconds of representation.
+ * o We mitigate alpha issues by never setting it too high.
*/
y = sim_latency;
- iop->ema = (y + (iop->ema << alpha_bits) - iop->ema) >> alpha_bits;
-
- yy = mul(y, y);
- iop->emss = (yy + (iop->emss << alpha_bits) - iop->emss) >> alpha_bits;
+ delta = (y - iop->ema); /* d */
+ iop->ema = ((iop->ema << alpha_bits) + delta) >> alpha_bits;
/*
- * s_1 = sum of data
- * s_2 = sum of data * data
- * ema ~ mean (or s_1 / N)
- * emss ~ s_2 / N
- *
- * sd = sqrt((N * s_2 - s_1 ^ 2) / (N * (N - 1)))
- * sd = sqrt((N * s_2 / N * (N - 1)) - (s_1 ^ 2 / (N * (N - 1))))
- *
- * N ~ 2 / alpha - 1
- * alpha < 1 / 16 (typically much less)
- * N > 31 --> N large so N * (N - 1) is approx N * N
- *
- * substituting and rearranging:
- * sd ~ sqrt(s_2 / N - (s_1 / N) ^ 2)
- * ~ sqrt(emss - ema ^ 2);
- * which is the formula used here to get a decent estimate of sd which
- * we use to detect outliers. Note that when first starting up, it
- * takes a while for emss sum of squares estimator to converge on a
- * good value. during this time, it can be less than ema^2. We
- * compute a sd of 0 in that case, and ignore outliers.
- */
- var = iop->emss - mul(iop->ema, iop->ema);
- iop->sd = (int64_t)var < 0 ? 0 : isqrt64(var);
+ * Were we to naively plow ahead at this point, we wind up with many numerical
+ * issues making any SD > ~3ms unreliable. So, we shift right by 12. This leaves
+ * us with microsecond level precision in the input, so the same in the
+ * output. It means we can't overflow deltasq unless delta > 4k seconds. It
+ * also means that emvar can be up 46 bits 40 of which are fraction, which
+ * gives us a way to measure up to ~8s in the SD before the computation goes
+ * unstable. Even the worst hard disk rarely has > 1s service time in the
+ * drive. It does mean we have to shift left 12 bits after taking the
+ * square root to compute the actual standard deviation estimate. This loss of
+ * precision is preferable to needing int128 types to work. The above numbers
+ * assume alpha=9. 10 or 11 are ok, but we start to run into issues at 12,
+ * so 12 or 13 is OK for EMA, EMVAR and SD will be wrong in those cases.
+ */
+ delta >>= 12;
+ deltasq = delta * delta; /* dd */
+ iop->emvar = ((iop->emvar << (2 * alpha_bits)) + /* bbe */
+ ((deltasq - iop->emvar) << alpha_bits) + /* b(dd-e) */
+ deltasq) /* dd */
+ >> (2 * alpha_bits); /* div bb */
+ iop->sd = (sbintime_t)isqrt64((uint64_t)iop->emvar) << 12;
}
static void
More information about the svn-src-head
mailing list