PERFORCE change 165689 for review
Fabio Checconi
fabio at FreeBSD.org
Mon Jul 6 14:38:03 UTC 2009
http://perforce.freebsd.org/chv.cgi?CH=165689
Change 165689 by fabio at fabio_granpasso on 2009/07/06 14:37:41
Snapshot of private repo: add seekiness and thinktime heuristics,
improve async writeout accounting, update the bio classification
code to use the hooks committed in HEAD (when configured).
Affected files ...
.. //depot/projects/soc2009/fabio_gsched/geom_sched/sys/geom/sched/g_sched.c#2 edit
.. //depot/projects/soc2009/fabio_gsched/geom_sched/sys/geom/sched/gs_rr.c#2 edit
.. //depot/projects/soc2009/fabio_gsched/geom_sched/sys/geom/sched/gs_scheduler.h#2 edit
Differences ...
==== //depot/projects/soc2009/fabio_gsched/geom_sched/sys/geom/sched/g_sched.c#2 (text+ko) ====
@@ -114,6 +114,7 @@
#include "gs_scheduler.h"
#include "g_sched.h" /* geom hooks */
+#define HAVE_BIO_CLASSIFIER
/*
* Size of the per-geom hash table storing traffic classes.
* We may decide to change it at a later time, it has no ABI
@@ -178,14 +179,6 @@
.gs_expire_secs = 10,
};
-/*
- * What kind of classifier we want to use ?
- * (not supported yet)
- */
-#define G_CLASS_PID 0
-
-static const int g_sched_classifier = G_CLASS_PID;
-
SYSCTL_DECL(_kern_geom);
SYSCTL_NODE(_kern_geom, OID_AUTO, sched, CTLFLAG_RW, 0,
"GEOM_SCHED stuff");
@@ -321,31 +314,24 @@
* so we do not make assumptions on the return value which for
* us is just an opaque identifier.
*/
-static inline struct thread *
-g_sched_issuer(struct bio *bp)
+#ifndef HAVE_BIO_CLASSIFIER
+static inline u_long
+g_sched_classify(struct bio *bp)
{
while (bp->bio_parent != NULL)
bp = bp->bio_parent;
- return (bp->bio_caller1);
+ return ((u_long)bp->bio_caller1);
}
-
-/*
- * Fetch the actual field used for classification, among the
- * ones available in the credentials associated with the bio.
- * Not much to do so far.
- */
-static u_long
-g_sched_classify(struct thread *tp)
+#else
+static inline u_long
+g_sched_classify(struct bio *bp)
{
- switch (g_sched_classifier) {
- case G_CLASS_PID:
- default:
- return (tp->td_tid);
- }
+ return ((u_long)bp->bio_classifier1);
}
+#endif
/* Return the hash chain for the given key. */
static inline struct g_hash *
@@ -369,12 +355,10 @@
struct g_sched_class *gsc;
struct g_gsched *gsp;
struct g_hash *bucket;
- struct thread *tp;
u_long key;
sc = gp->softc;
- tp = g_sched_issuer(bp);
- key = g_sched_classify(tp);
+ key = g_sched_classify(bp);
bucket = g_sched_hash(sc, key);
LIST_FOREACH(gsc, bucket, gsc_clist) {
if (key == gsc->gsc_key) {
@@ -389,7 +373,7 @@
if (!gsc)
return (NULL);
- if (gsp->gs_init_class(sc->sc_data, gsc->gsc_priv, tp)) {
+ if (gsp->gs_init_class(sc->sc_data, gsc->gsc_priv)) {
free(gsc, M_GEOM_SCHED);
return (NULL);
}
@@ -1258,6 +1242,7 @@
* code in g_ioreq_patch() for the details.
*/
+#ifndef HAVE_BIO_CLASSIFIER
#if defined(__i386__)
#define CODE_SIZE 29
#define STORE_SIZE 5
@@ -1374,6 +1359,47 @@
}
}
+static inline void
+g_classifier_ini(void)
+{
+
+ g_ioreq_patch();
+}
+
+static inline void
+g_classifier_fini(void)
+{
+
+ g_ioreq_restore();
+}
+#else /* !HAVE_BIO_CLASSIFIER */
+static int
+g_sched_tag(void *arg, struct bio *bp)
+{
+
+ bp->bio_classifier1 = curthread;
+ return (1);
+}
+
+static struct g_classifier_hook g_sched_classifier = {
+ .func = g_sched_tag,
+};
+
+static inline void
+g_classifier_ini(void)
+{
+
+ g_register_classifier(&g_sched_classifier);
+}
+
+static inline void
+g_classifier_fini(void)
+{
+
+ g_unregister_classifier(&g_sched_classifier);
+}
+#endif
+
static void
g_sched_init(struct g_class *mp)
{
@@ -1384,14 +1410,14 @@
mp, &g_sched_class);
/* Patch g_io_request to store classification info in the bio. */
- g_ioreq_patch();
+ g_classifier_ini();
}
static void
g_sched_fini(struct g_class *mp)
{
- g_ioreq_restore();
+ g_classifier_fini();
G_SCHED_DEBUG(0, "Unloading...");
==== //depot/projects/soc2009/fabio_gsched/geom_sched/sys/geom/sched/gs_rr.c#2 (text+ko) ====
@@ -65,6 +65,11 @@
G_QUEUE_IDLING /* Waiting for a new request. */
};
+/* possible queue flags */
+enum g_rr_flags {
+ G_FLAG_COMPLETED = 1, /* Completed a req. in the current budget. */
+};
+
struct g_rr_softc;
/*
@@ -79,6 +84,7 @@
enum g_rr_state q_status;
unsigned int q_service; /* service received so far */
int q_slice_end; /* actual slice end in ticks */
+ enum g_rr_flags q_flags; /* queue flags */
struct bio_queue_head q_bioq;
/* Scheduling parameters */
@@ -86,6 +92,13 @@
unsigned int q_slice_duration; /* slice size in ticks */
unsigned int q_wait_ticks; /* wait time for anticipation */
+ /* Stats to drive the various heuristics. */
+ struct g_savg q_thinktime; /* Thinktime average. */
+ struct g_savg q_seekdist; /* Seek distance average. */
+
+ off_t q_lastoff; /* Last submitted req. offset. */
+ int q_lastsub; /* Last submitted req. time. */
+
/* Expiration deadline for an empty queue. */
int q_expire;
@@ -289,7 +302,7 @@
}
static int
-g_rr_init_class(void *data, void *priv, struct thread *tp)
+g_rr_init_class(void *data, void *priv)
{
struct g_rr_softc *sc = data;
struct g_rr_queue *qp = priv;
@@ -339,8 +352,33 @@
g_rr_queue_expired(struct g_rr_queue *qp)
{
- return (qp->q_service >= qp->q_budget ||
- ticks - qp->q_slice_end >= 0);
+ if (qp->q_service >= qp->q_budget)
+ return (1);
+
+ if ((qp->q_flags & G_FLAG_COMPLETED) &&
+ ticks - qp->q_slice_end >= 0)
+ return (1);
+
+ return (0);
+}
+
+static inline int
+g_rr_should_anticipate(struct g_rr_queue *qp, struct bio *bp)
+{
+ int wait = get_bounded(&me.wait_ms, 2);
+
+ if (!me.w_anticipate && (bp->bio_cmd & BIO_WRITE))
+ return (0);
+
+ if (g_savg_valid(&qp->q_thinktime) &&
+ g_savg_read(&qp->q_thinktime) > wait)
+ return (0);
+
+ if (g_savg_valid(&qp->q_seekdist) &&
+ g_savg_read(&qp->q_seekdist) > 2048)
+ return (0);
+
+ return (1);
}
/*
@@ -389,9 +427,7 @@
TAILQ_REMOVE(&sc->sc_rr_tailq, qp, q_tailq);
sc->sc_active = qp;
qp->q_service = 0;
- /* in case we want to make the slice adaptive */
- qp->q_slice_duration = get_bounded(&me.quantum_ms, 2);
- qp->q_slice_end = ticks + qp->q_slice_duration;
+ qp->q_flags &= ~G_FLAG_COMPLETED;
}
bp = gs_bioq_takefirst(&qp->q_bioq); /* surely not NULL */
@@ -412,7 +448,7 @@
* on read or writes (e.g., anticipate only on reads).
*/
expired = g_rr_queue_expired(qp); /* are we expired ? */
- next = gs_bioq_first(&qp->q_bioq); /* do we have one more ? */
+ next = gs_bioq_first(&qp->q_bioq); /* do we have one more ? */
if (expired) {
sc->sc_active = NULL;
/* Either requeue or release reference. */
@@ -423,7 +459,7 @@
} else if (next != NULL) {
qp->q_status = G_QUEUE_READY;
} else {
- if (!force && (me.w_anticipate || bp->bio_cmd & BIO_READ)) {
+ if (!force && g_rr_should_anticipate(qp, bp)) {
/* anticipate */
qp->q_status = G_QUEUE_BUSY;
} else {
@@ -439,6 +475,30 @@
return (bp);
}
+static inline void
+g_rr_update_thinktime(struct g_rr_queue *qp)
+{
+ int delta = ticks - qp->q_lastsub, wait = get_bounded(&me.wait_ms, 2);
+
+ qp->q_lastsub = ticks;
+ delta = (delta > 2 * wait) ? 2 * wait : delta;
+ g_savg_add_sample(&qp->q_thinktime, delta);
+}
+
+static inline void
+g_rr_update_seekdist(struct g_rr_queue *qp, struct bio *bp)
+{
+ off_t dist;
+
+ if (qp->q_lastoff > bp->bio_offset)
+ dist = qp->q_lastoff - bp->bio_offset;
+ else
+ dist = bp->bio_offset - qp->q_lastoff;
+
+ qp->q_lastoff = bp->bio_offset + bp->bio_length;
+ g_savg_add_sample(&qp->q_seekdist, qp->q_seekdist.gs_smpl ? dist : 0);
+}
+
/*
* Called when a real request for disk I/O arrives.
* Locate the queue associated with the client.
@@ -476,6 +536,9 @@
}
}
+ g_rr_update_thinktime(qp);
+ g_rr_update_seekdist(qp, bp);
+
/* Inherit the reference returned by g_rr_queue_get(). */
bp->bio_caller1 = qp;
gs_bioq_disksort(&qp->q_bioq, bp);
@@ -559,6 +622,13 @@
qp = bp->bio_caller1;
if (qp == sc->sc_active && qp->q_status == G_QUEUE_BUSY) {
+ if (!(qp->q_flags & G_FLAG_COMPLETED)) {
+ qp->q_flags |= G_FLAG_COMPLETED;
+ /* in case we want to make the slice adaptive */
+ qp->q_slice_duration = get_bounded(&me.quantum_ms, 2);
+ qp->q_slice_end = ticks + qp->q_slice_duration;
+ }
+
/* The queue is trying anticipation, start the timer. */
qp->q_status = G_QUEUE_IDLING;
/* may make this adaptive */
==== //depot/projects/soc2009/fabio_gsched/geom_sched/sys/geom/sched/gs_scheduler.h#2 (text+ko) ====
@@ -90,7 +90,7 @@
typedef int gs_start_t (void *data, struct bio *bio);
typedef void gs_done_t (void *data, struct bio *bio);
typedef struct bio *gs_next_t (void *data, int force);
-typedef int gs_init_class_t (void *data, void *priv, struct thread *tp);
+typedef int gs_init_class_t (void *data, void *priv);
typedef void gs_fini_class_t (void *data, void *priv);
struct g_gsched {
@@ -175,6 +175,40 @@
void g_sched_dispatch(struct g_geom *geom);
/*
+ * Simple gathering of statistical data, used by schedulers to collect
+ * info on process history. Just keep an exponential average of the
+ * samples, with some extra bits of precision.
+ */
+struct g_savg {
+ uint64_t gs_avg;
+ unsigned int gs_smpl;
+};
+
+static inline void
+g_savg_add_sample(struct g_savg *ss, uint64_t sample)
+{
+
+ /* EMA with alpha = 0.125, fixed point, 3 bits of precision. */
+ ss->gs_avg = sample + ss->gs_avg - (ss->gs_avg >> 3);
+ ss->gs_smpl = 1 + ss->gs_smpl - (ss->gs_smpl >> 3);
+}
+
+static inline int
+g_savg_valid(struct g_savg *ss)
+{
+
+ /* We want at least 8 samples to deem an average as valid. */
+ return (ss->gs_smpl > 7);
+}
+
+static inline uint64_t
+g_savg_read(struct g_savg *ss)
+{
+
+ return (ss->gs_avg / ss->gs_smpl);
+}
+
+/*
* Declaration of a scheduler module.
*/
int g_gsched_modevent(module_t mod, int cmd, void *arg);
More information about the p4-projects
mailing list