svn commit: r186888 - in user/luigi/geom_sched/sys: conf dev/ata
geom geom/sched modules/geom modules/geom/geom_sched
modules/geom/geom_sched/geom_sched
modules/geom/geom_sched/gs_as modules/geom/g...
Luigi Rizzo
luigi at FreeBSD.org
Thu Jan 8 01:24:20 PST 2009
Author: luigi
Date: Thu Jan 8 09:24:18 2009
New Revision: 186888
URL: http://svn.freebsd.org/changeset/base/186888
Log:
initial snapshot of geom disk schedulers by Fabio Checconi.
Added:
user/luigi/geom_sched/sys/geom/sched/
user/luigi/geom_sched/sys/geom/sched/g_as.c
user/luigi/geom_sched/sys/geom/sched/g_gsched.h
user/luigi/geom_sched/sys/geom/sched/g_rr.c
user/luigi/geom_sched/sys/geom/sched/g_sched.c
user/luigi/geom_sched/sys/geom/sched/g_sched.h
user/luigi/geom_sched/sys/geom/sched/gs_as.c
user/luigi/geom_sched/sys/geom/sched/gs_rr.c
user/luigi/geom_sched/sys/modules/geom/geom_sched/
user/luigi/geom_sched/sys/modules/geom/geom_sched/Makefile
user/luigi/geom_sched/sys/modules/geom/geom_sched/geom_sched/
user/luigi/geom_sched/sys/modules/geom/geom_sched/geom_sched/Makefile
user/luigi/geom_sched/sys/modules/geom/geom_sched/gs_as/
user/luigi/geom_sched/sys/modules/geom/geom_sched/gs_as/Makefile
user/luigi/geom_sched/sys/modules/geom/geom_sched/gs_rr/
user/luigi/geom_sched/sys/modules/geom/geom_sched/gs_rr/Makefile
user/luigi/geom_sched/sys/modules/geom/geom_sched/gsched_as/
user/luigi/geom_sched/sys/modules/geom/geom_sched/gsched_as/Makefile
user/luigi/geom_sched/sys/modules/geom/geom_sched/gsched_rr/
user/luigi/geom_sched/sys/modules/geom/geom_sched/gsched_rr/Makefile
Modified:
user/luigi/geom_sched/sys/conf/NOTES
user/luigi/geom_sched/sys/conf/files
user/luigi/geom_sched/sys/conf/options
user/luigi/geom_sched/sys/dev/ata/ata-all.h
user/luigi/geom_sched/sys/dev/ata/ata-disk.c
user/luigi/geom_sched/sys/dev/ata/ata-queue.c
user/luigi/geom_sched/sys/geom/geom_disk.c
user/luigi/geom_sched/sys/geom/geom_disk.h
user/luigi/geom_sched/sys/geom/geom_io.c
user/luigi/geom_sched/sys/modules/geom/Makefile
user/luigi/geom_sched/sys/sys/bio.h
Modified: user/luigi/geom_sched/sys/conf/NOTES
==============================================================================
--- user/luigi/geom_sched/sys/conf/NOTES Thu Jan 8 06:38:06 2009 (r186887)
+++ user/luigi/geom_sched/sys/conf/NOTES Thu Jan 8 09:24:18 2009 (r186888)
@@ -153,6 +153,10 @@ options GEOM_PART_MBR # MBR partitioni
options GEOM_PART_VTOC8 # SMI VTOC8 disk label
options GEOM_PC98 # NEC PC9800 partitioning
options GEOM_RAID3 # RAID3 functionality.
+options GEOM_SCHED # Disk scheduling in GEOM.
+options GEOM_GSCHED_AS # Geom-based anticipatory.
+options GEOM_GS_AS # Driver-based anticipatory.
+options GEOM_GS_RR # Driver-based round-robin.
options GEOM_SHSEC # Shared secret.
options GEOM_STRIPE # Disk striping.
options GEOM_SUNLABEL # Sun/Solaris partitioning
Modified: user/luigi/geom_sched/sys/conf/files
==============================================================================
--- user/luigi/geom_sched/sys/conf/files Thu Jan 8 06:38:06 2009 (r186887)
+++ user/luigi/geom_sched/sys/conf/files Thu Jan 8 09:24:18 2009 (r186888)
@@ -1283,6 +1283,7 @@ geom/geom_mbr_enc.c optional geom_mbr
geom/geom_pc98.c optional geom_pc98
geom/geom_pc98_enc.c optional geom_pc98
geom/geom_slice.c standard
+geom/geom_sched.c standard
geom/geom_subr.c standard
geom/geom_sunlabel.c optional geom_sunlabel
geom/geom_sunlabel_enc.c optional geom_sunlabel
@@ -1312,6 +1313,11 @@ geom/part/g_part_vtoc8.c optional geom_p
geom/raid3/g_raid3.c optional geom_raid3
geom/raid3/g_raid3_ctl.c optional geom_raid3
geom/shsec/g_shsec.c optional geom_shsec
+geom/sched/g_sched.c optional geom_sched
+geom/sched/g_as.c optional geom_gsched_as
+geom/sched/g_rr.c optional geom_gsched_rr
+geom/sched/gs_as.c optional geom_gs_as
+geom/sched/gs_rr.c optional geom_gs_rr
geom/stripe/g_stripe.c optional geom_stripe
geom/uzip/g_uzip.c optional geom_uzip
geom/virstor/binstream.c optional geom_virstor
Modified: user/luigi/geom_sched/sys/conf/options
==============================================================================
--- user/luigi/geom_sched/sys/conf/options Thu Jan 8 06:38:06 2009 (r186887)
+++ user/luigi/geom_sched/sys/conf/options Thu Jan 8 09:24:18 2009 (r186888)
@@ -98,6 +98,10 @@ GEOM_PART_MBR opt_geom.h
GEOM_PART_VTOC8 opt_geom.h
GEOM_PC98 opt_geom.h
GEOM_RAID3 opt_geom.h
+GEOM_SCHED opt_geom.h
+GEOM_GSCHED_AS opt_geom.h
+GEOM_GS_AS opt_geom.h
+GEOM_GS_RR opt_geom.h
GEOM_SHSEC opt_geom.h
GEOM_STRIPE opt_geom.h
GEOM_SUNLABEL opt_geom.h
Modified: user/luigi/geom_sched/sys/dev/ata/ata-all.h
==============================================================================
--- user/luigi/geom_sched/sys/dev/ata/ata-all.h Thu Jan 8 06:38:06 2009 (r186887)
+++ user/luigi/geom_sched/sys/dev/ata/ata-all.h Thu Jan 8 09:24:18 2009 (r186888)
@@ -510,6 +510,7 @@ struct ata_channel {
TAILQ_HEAD(, ata_request) ata_queue; /* head of ATA queue */
struct ata_request *freezepoint; /* composite freezepoint */
struct ata_request *running; /* currently running request */
+ struct disk *disks[2]; /* disks, if any */
};
/* disk bay/enclosure related */
@@ -546,6 +547,9 @@ int ata_wmode(struct ata_params *ap);
int ata_umode(struct ata_params *ap);
int ata_limit_mode(device_t dev, int mode, int maxmode);
+/* ata-disk.c */
+struct ata_request *ata_create_request(struct bio *bp, int full);
+
/* ata-queue.c: */
int ata_controlcmd(device_t dev, u_int8_t command, u_int16_t feature, u_int64_t lba, u_int16_t count);
int ata_atapicmd(device_t dev, u_int8_t *ccb, caddr_t data, int count, int flags, int timeout);
Modified: user/luigi/geom_sched/sys/dev/ata/ata-disk.c
==============================================================================
--- user/luigi/geom_sched/sys/dev/ata/ata-disk.c Thu Jan 8 06:38:06 2009 (r186887)
+++ user/luigi/geom_sched/sys/dev/ata/ata-disk.c Thu Jan 8 09:24:18 2009 (r186888)
@@ -60,6 +60,7 @@ static void ad_describe(device_t dev);
static int ad_version(u_int16_t);
static disk_strategy_t ad_strategy;
static disk_ioctl_t ad_ioctl;
+static disk_kick_t ad_kick;
static dumper_t ad_dump;
/*
@@ -148,6 +149,7 @@ ad_attach(device_t dev)
adp->disk = disk_alloc();
adp->disk->d_strategy = ad_strategy;
adp->disk->d_ioctl = ad_ioctl;
+ adp->disk->d_kick = ad_kick;
adp->disk->d_dump = ad_dump;
adp->disk->d_name = "ad";
adp->disk->d_drv1 = dev;
@@ -168,6 +170,7 @@ ad_attach(device_t dev)
snprintf(adp->disk->d_ident, sizeof(adp->disk->d_ident), "ad:%s",
atadev->param.serial);
disk_create(adp->disk, DISK_VERSION);
+ ch->disks[atadev->unit == ATA_SLAVE] = adp->disk;
device_add_child(dev, "subdisk", device_get_unit(dev));
ad_firmware_geom_adjust(dev, adp->disk);
bus_generic_attach(dev);
@@ -179,6 +182,7 @@ ad_attach(device_t dev)
static int
ad_detach(device_t dev)
{
+ struct ata_channel *ch = device_get_softc(device_get_parent(dev));
struct ad_softc *adp = device_get_ivars(dev);
struct ata_device *atadev = device_get_softc(dev);
device_t *children;
@@ -199,6 +203,8 @@ ad_detach(device_t dev)
free(children, M_TEMP);
}
+ ch->disks[atadev->unit == ATA_SLAVE] = NULL;
+
/* detroy disk from the system so we dont get any further requests */
disk_destroy(adp->disk);
@@ -266,13 +272,13 @@ ad_spindown(void *priv)
ata_queue_request(request);
}
-
-static void
-ad_strategy(struct bio *bp)
+struct ata_request *
+ata_create_request(struct bio *bp, int full)
{
- device_t dev = bp->bio_disk->d_drv1;
+ device_t dev = bp->bio_disk->d_drv1;
struct ata_device *atadev = device_get_softc(dev);
struct ata_request *request;
+ struct ata_channel *ch;
if (atadev->spindown != 0)
callout_reset(&atadev->spindown_timer, hz * atadev->spindown,
@@ -281,7 +287,7 @@ ad_strategy(struct bio *bp)
if (!(request = ata_alloc_request())) {
device_printf(dev, "FAILURE - out of memory in start\n");
biofinish(bp, NULL, ENOMEM);
- return;
+ return NULL;
}
/* setup request */
@@ -344,10 +350,32 @@ ad_strategy(struct bio *bp)
device_printf(dev, "FAILURE - unknown BIO operation\n");
ata_free_request(request);
biofinish(bp, NULL, EIO);
- return;
+ return NULL;
}
request->flags |= ATA_R_ORDERED;
- ata_queue_request(request);
+
+ if (full != 0) {
+ if ((request->parent = device_get_parent(dev)) == NULL) {
+ ata_free_request(request);
+ biofinish(bp, NULL, ENXIO);
+ return NULL;
+ }
+
+ ch = device_get_softc(request->parent);
+ callout_init_mtx(&request->callout, &ch->state_mtx,
+ CALLOUT_RETURNUNLOCKED);
+ }
+
+ return request;
+}
+
+static void
+ad_strategy(struct bio *bp)
+{
+ struct ata_request *request;
+
+ if ((request = ata_create_request(bp, 0)) != NULL)
+ ata_queue_request(request);
}
static void
@@ -369,6 +397,18 @@ ad_ioctl(struct disk *disk, u_long cmd,
return ata_device_ioctl(disk->d_drv1, cmd, data);
}
+static void
+ad_kick(struct disk *disk)
+{
+ device_t dev;
+ struct ata_channel *ch;
+
+ dev = disk->d_drv1;
+ ch = device_get_softc(device_get_parent(dev));
+ if (ch != NULL)
+ ata_start(ch->dev);
+}
+
static int
ad_dump(void *arg, void *virtual, vm_offset_t physical,
off_t offset, size_t length)
Modified: user/luigi/geom_sched/sys/dev/ata/ata-queue.c
==============================================================================
--- user/luigi/geom_sched/sys/dev/ata/ata-queue.c Thu Jan 8 06:38:06 2009 (r186887)
+++ user/luigi/geom_sched/sys/dev/ata/ata-queue.c Thu Jan 8 09:24:18 2009 (r186888)
@@ -42,6 +42,7 @@ __FBSDID("$FreeBSD$");
#include <sys/rman.h>
#include <dev/ata/ata-all.h>
#include <ata_if.h>
+#include <geom/geom_sched.h>
/* prototypes */
static void ata_completed(void *, int);
@@ -171,10 +172,25 @@ ata_start(device_t dev)
struct ata_channel *ch = device_get_softc(dev);
struct ata_request *request;
struct ata_composite *cptr;
- int dependencies = 0;
+ struct disk *dp;
+ struct bio *bp;
+ int dependencies = 0, i;
- /* if we have a request on the queue try to get it running */
mtx_lock(&ch->queue_mtx);
+ if (TAILQ_FIRST(&ch->ata_queue) == NULL) {
+ for (i = 0; i < 2; i++) {
+ dp = ch->disks[i];
+ while (dp != NULL && (bp = g_sched_next(dp)) != NULL) {
+ request = ata_create_request(bp, 1);
+ if (request != NULL) {
+ ata_sort_queue(ch, request);
+ break;
+ }
+ }
+ }
+ }
+
+ /* if we have a request on the queue try to get it running */
if ((request = TAILQ_FIRST(&ch->ata_queue))) {
/* we need the locking function to get the lock for this channel */
Modified: user/luigi/geom_sched/sys/geom/geom_disk.c
==============================================================================
--- user/luigi/geom_sched/sys/geom/geom_disk.c Thu Jan 8 06:38:06 2009 (r186887)
+++ user/luigi/geom_sched/sys/geom/geom_disk.c Thu Jan 8 09:24:18 2009 (r186888)
@@ -55,9 +55,11 @@ __FBSDID("$FreeBSD$");
#include <geom/geom.h>
#include <geom/geom_disk.h>
#include <geom/geom_int.h>
+#include <geom/geom_sched.h>
static struct mtx g_disk_done_mtx;
+static g_ctl_req_t g_disk_ctlreq;
static g_access_t g_disk_access;
static g_init_t g_disk_init;
static g_fini_t g_disk_fini;
@@ -68,6 +70,7 @@ static g_dumpconf_t g_disk_dumpconf;
static struct g_class g_disk_class = {
.name = "DISK",
.version = G_VERSION,
+ .ctlreq = g_disk_ctlreq,
.init = g_disk_init,
.fini = g_disk_fini,
.start = g_disk_start,
@@ -81,16 +84,19 @@ g_disk_init(struct g_class *mp __unused)
{
mtx_init(&g_disk_done_mtx, "g_disk_done", NULL, MTX_DEF);
+ g_sched_init();
}
static void
g_disk_fini(struct g_class *mp __unused)
{
+ g_sched_fini();
mtx_destroy(&g_disk_done_mtx);
}
DECLARE_GEOM_CLASS(g_disk_class, g_disk);
+MODULE_VERSION(g_disk, 0);
static void __inline
g_disk_lock_giant(struct disk *dp)
@@ -106,6 +112,83 @@ g_disk_unlock_giant(struct disk *dp)
mtx_unlock(&Giant);
}
+static void
+g_disk_configure(struct gctl_req *req, struct g_class *mp)
+{
+ struct disk *dp;
+ struct g_provider *pp;
+ const char *sched, *name;
+ char param[16];
+ int i, *nargs;
+
+ g_topology_assert();
+
+ nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
+ if (nargs == NULL) {
+ gctl_error(req, "No '%s' argument.", "nargs");
+ return;
+ }
+
+ if (*nargs <= 0) {
+ gctl_error(req, "Missing devices.");
+ return;
+ }
+
+ sched = gctl_get_asciiparam(req, "iosched");
+ if (sched == NULL) {
+ gctl_error(req, "No '%s' argument.", "iosched");
+ return;
+ }
+
+ for (i = 0; i < *nargs; i++) {
+ snprintf(param, sizeof(param), "arg%d", i);
+ name = gctl_get_asciiparam(req, param);
+ if (name == NULL) {
+ gctl_error(req, "No '%s' argument.", param);
+ return;
+ }
+
+ if (strncmp(name, "/dev/", strlen("/dev/")) == 0)
+ name += strlen("/dev/");
+
+ pp = g_provider_by_name(name);
+ if (pp == NULL || pp->geom->class != mp) {
+ gctl_error(req, "Provider %s is invalid.", name);
+ return;
+ }
+
+ dp = pp->geom->softc;
+ if (g_sched_configure(dp, sched) != 0) {
+ gctl_error(req, "Could not set scheduler %s.", sched);
+ return;
+ }
+ }
+}
+
+static void
+g_disk_ctlreq(struct gctl_req *req, struct g_class *mp, const char *verb)
+{
+ uint32_t *version;
+
+ g_topology_assert();
+
+ version = gctl_get_paraml(req, "version", sizeof(*version));
+ if (version == NULL) {
+ gctl_error(req, "No '%s' argument.", "version");
+ return;
+ }
+
+ if (*version != G_VERSION) {
+ gctl_error(req, "Userland and kernel parts are out of sync.");
+ return;
+ }
+
+ if (strcmp(verb, "configure") == 0)
+ g_disk_configure(req, mp);
+ else
+ gctl_error(req, "Unknown verb.");
+}
+
static int
g_disk_access(struct g_provider *pp, int r, int w, int e)
{
@@ -198,6 +281,8 @@ g_disk_done(struct bio *bp)
mtx_lock(&g_disk_done_mtx);
bp->bio_completed = bp->bio_length - bp->bio_resid;
+ g_sched_done(bp);
+
bp2 = bp->bio_parent;
if (bp2->bio_error == 0)
bp2->bio_error = bp->bio_error;
@@ -288,7 +373,7 @@ g_disk_start(struct bio *bp)
bp2->bio_disk = dp;
devstat_start_transaction_bio(dp->d_devstat, bp2);
g_disk_lock_giant(dp);
- dp->d_strategy(bp2);
+ g_sched_start(dp, bp2);
g_disk_unlock_giant(dp);
bp2 = bp3;
bp3 = NULL;
@@ -466,6 +551,7 @@ disk_create(struct disk *dp, int version
dp->d_sectorsize, DEVSTAT_ALL_SUPPORTED,
DEVSTAT_TYPE_DIRECT, DEVSTAT_PRIORITY_MAX);
dp->d_geom = NULL;
+ g_sched_disk_init(dp);
g_disk_ident_adjust(dp->d_ident, sizeof(dp->d_ident));
g_post_event(g_disk_create, dp, M_WAITOK, dp, NULL);
}
@@ -476,6 +562,7 @@ disk_destroy(struct disk *dp)
g_cancel_event(dp);
dp->d_destroyed = 1;
+ g_sched_disk_fini(dp);
if (dp->d_devstat != NULL)
devstat_remove_entry(dp->d_devstat);
g_post_event(g_disk_destroy, dp, M_WAITOK, NULL);
@@ -487,6 +574,8 @@ disk_gone(struct disk *dp)
struct g_geom *gp;
struct g_provider *pp;
+ g_sched_disk_gone(dp);
+
gp = dp->d_geom;
if (gp != NULL)
LIST_FOREACH(pp, &gp->provider, provider)
Modified: user/luigi/geom_sched/sys/geom/geom_disk.h
==============================================================================
--- user/luigi/geom_sched/sys/geom/geom_disk.h Thu Jan 8 06:38:06 2009 (r186887)
+++ user/luigi/geom_sched/sys/geom/geom_disk.h Thu Jan 8 09:24:18 2009 (r186888)
@@ -53,7 +53,10 @@ typedef int disk_ioctl_t(struct disk *,
int fflag, struct thread *td);
/* NB: disk_ioctl_t SHALL be cast'able to d_ioctl_t */
+typedef void disk_kick_t(struct disk *);
+
struct g_geom;
+struct g_sched;
struct devstat;
struct disk {
@@ -74,6 +77,7 @@ struct disk {
disk_close_t *d_close;
disk_strategy_t *d_strategy;
disk_ioctl_t *d_ioctl;
+ disk_kick_t *d_kick;
dumper_t *d_dump;
/* Info fields from driver to geom_disk.c. Valid when open */
@@ -86,6 +90,13 @@ struct disk {
u_int d_stripesize;
char d_ident[DISK_IDENT_SIZE];
+ /* Scheduler fields */
+ struct mtx d_sched_lock;
+ u_int d_sched_flags;
+ u_int d_nr_sorted;
+ struct g_sched *d_sched;
+ void *d_sched_data;
+
/* Fields private to the driver */
void *d_drv1;
};
Modified: user/luigi/geom_sched/sys/geom/geom_io.c
==============================================================================
--- user/luigi/geom_sched/sys/geom/geom_io.c Thu Jan 8 06:38:06 2009 (r186887)
+++ user/luigi/geom_sched/sys/geom/geom_io.c Thu Jan 8 09:24:18 2009 (r186888)
@@ -172,6 +172,7 @@ g_clone_bio(struct bio *bp)
bp2->bio_offset = bp->bio_offset;
bp2->bio_data = bp->bio_data;
bp2->bio_attribute = bp->bio_attribute;
+ bp2->bio_thread = bp->bio_thread;
bp->bio_children++;
}
#ifdef KTR
@@ -369,6 +370,10 @@ g_io_request(struct bio *bp, struct g_co
bp->bio_error = 0;
bp->bio_completed = 0;
+ /* Pass down the thread that issued the bio. */
+ if (bp->bio_thread == NULL)
+ bp->bio_thread = curthread;
+
KASSERT(!(bp->bio_flags & BIO_ONQUEUE),
("Bio already on queue bp=%p", bp));
bp->bio_flags |= BIO_ONQUEUE;
Added: user/luigi/geom_sched/sys/geom/sched/g_as.c
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ user/luigi/geom_sched/sys/geom/sched/g_as.c Thu Jan 8 09:24:18 2009 (r186888)
@@ -0,0 +1,204 @@
+/*-
+ * Copyright (c) 2007 Fabio Checconi <fabio at FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/bio.h>
+#include <sys/callout.h>
+#include <sys/proc.h>
+#include <sys/taskqueue.h>
+
+#include <geom/geom.h>
+#include <geom/sched/g_gsched.h>
+
+/*
+ * Status values for AS.
+ */
+#define G_AS_NOWAIT 0 /* Not wating at all. */
+#define G_AS_WAITREQ 1 /* Waiting a request to complete. */
+#define G_AS_WAITING 2 /* Waiting a new request. */
+
+struct g_as_softc {
+ struct g_geom *sc_geom;
+ struct thread *sc_curthread;
+ int sc_status;
+ long sc_batch;
+
+ struct callout sc_wait;
+ struct bio_queue_head sc_bioq;
+};
+
+#define G_AS_WAIT_EXPIRE (hz/200 > 0 ? hz/200 : 2)
+#define G_AS_MAX_BATCH 0x00800000
+
+/*
+ * Dispatch the first queued request. Here we also update the status
+ * according to the dispatched request.
+ */
+static void
+g_as_dispatch(struct g_as_softc *sc)
+{
+ struct bio *bio;
+
+ /*
+ * Batching means just don't serve too many requests waiting
+ * for sequential ones, it is not really coupled with the
+ * threads being served. Its only purpose is to let not the
+ * scheduler starve other threads while an aggressive one
+ * is making continuously new requests.
+ */
+ sc->sc_curthread = NULL;
+
+ bio = bioq_takefirst(&sc->sc_bioq);
+ if (bio != NULL) {
+ sc->sc_batch += bio->bio_length;
+ if (sc->sc_batch > G_AS_MAX_BATCH) {
+ /*
+ * Too many requests served here, don't wait
+ * for the next.
+ */
+ sc->sc_batch = 0;
+ sc->sc_status = G_AS_NOWAIT;
+ } else {
+ /*
+ * When this request will be served we'll wait
+ * for a new one from the same thread.
+ * Of course we are anticipating everything
+ * here, even writes or asynchronous requests,
+ * but this is only a prototype.
+ */
+ sc->sc_status = G_AS_WAITREQ;
+ }
+ g_io_request(bio, LIST_FIRST(&sc->sc_geom->consumer));
+ } else
+ sc->sc_status = G_AS_NOWAIT;
+}
+
+static void
+g_as_wait_timeout(void *data)
+{
+ struct g_as_softc *sc = data;
+
+ g_sched_lock(sc->sc_geom);
+ /*
+ * We were waiting for a new request for curthread, it did
+ * not come, just dispatch the next one.
+ */
+ if (sc->sc_status == G_AS_WAITING)
+ g_as_dispatch(sc);
+ g_sched_unlock(sc->sc_geom);
+}
+
+static void
+g_as_start(void *data, struct bio *bio)
+{
+ struct g_as_softc *sc = data;
+
+ bioq_disksort(&sc->sc_bioq, bio);
+
+ /*
+ * If the request being submitted is the one we were waiting for
+ * stop the timer and dispatch it, otherwise do nothing.
+ */
+ if (sc->sc_status == G_AS_NOWAIT ||
+ bio->bio_thread == sc->sc_curthread) {
+ callout_stop(&sc->sc_wait);
+ g_as_dispatch(sc);
+ }
+}
+
+static void
+g_as_done(void *data, struct bio *bio)
+{
+ struct g_as_softc *sc = data;
+ struct bio *bp2;
+
+ bp2 = bio->bio_parent;
+
+ /* Don't wait when fragments are completed. */
+ if (bp2->bio_children != bp2->bio_inbed + 1)
+ return;
+
+ if (sc->sc_status == G_AS_WAITREQ) {
+ /*
+ * Start waiting for a new request from curthread.
+ */
+ sc->sc_curthread = bio->bio_thread;
+ sc->sc_status = G_AS_WAITING;
+ callout_reset(&sc->sc_wait, G_AS_WAIT_EXPIRE,
+ g_as_wait_timeout, sc);
+ } else {
+ /*
+ * Since we don't have to wait anything just dispatch
+ * the next request.
+ */
+ g_as_dispatch(sc);
+ }
+}
+
+static void *
+g_as_init(struct g_geom *geom)
+{
+ struct g_as_softc *sc;
+
+ sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO);
+ sc->sc_geom = geom;
+ sc->sc_curthread = NULL;
+ sc->sc_status = G_AS_NOWAIT;
+
+ callout_init(&sc->sc_wait, CALLOUT_MPSAFE);
+ bioq_init(&sc->sc_bioq);
+
+ return sc;
+}
+
+static void
+g_as_fini(void *data)
+{
+ struct g_as_softc *sc = data;
+
+ KASSERT(bioq_first(&sc->sc_bioq) == NULL,
+ ("Still requests pending."));
+ callout_drain(&sc->sc_wait);
+
+ g_free(sc);
+}
+
+static struct g_gsched g_as = {
+ .gs_name = "as",
+ .gs_init = g_as_init,
+ .gs_fini = g_as_fini,
+ .gs_start = g_as_start,
+ .gs_done = g_as_done,
+};
+
+DECLARE_GSCHED_MODULE(as, &g_as);
Added: user/luigi/geom_sched/sys/geom/sched/g_gsched.h
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ user/luigi/geom_sched/sys/geom/sched/g_gsched.h Thu Jan 8 09:24:18 2009 (r186888)
@@ -0,0 +1,88 @@
+/*-
+ * Copyright (c) 2008 Fabio Checconi <fabio at FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _G_GSCHED_H_
+#define _G_GSCHED_H_
+
+#ifdef _KERNEL
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <sys/queue.h>
+#include <geom/sched/g_sched.h>
+
+/*
+ * This is the interface exported to scheduling modules.
+ */
+/*
+ * Geom I/O scheduler descriptor.
+ */
+struct g_geom;
+
+typedef void *gs_init_t (struct g_geom *geom);
+typedef void gs_fini_t (void *data);
+typedef void gs_start_t (void *data, struct bio *bio);
+typedef void gs_done_t (void *data, struct bio *bio);
+
+struct g_gsched {
+ const char *gs_name;
+ int gs_refs;
+
+ gs_init_t *gs_init;
+ gs_fini_t *gs_fini;
+ gs_start_t *gs_start;
+ gs_done_t *gs_done;
+
+ LIST_ENTRY(g_gsched) glist;
+};
+
+/*
+ * Locking interface. When each operation registered with the
+ * scheduler is invoked, a per-instance lock is taken to protect
+ * the data associated with it. If the scheduler needs something
+ * else to access the same data (e.g., a callout) it must use
+ * these functions.
+ */
+void g_sched_lock(struct g_geom *gp);
+void g_sched_unlock(struct g_geom *gp);
+
+/*
+ * Declaration of a scheduler module.
+ */
+int g_gsched_modevent(module_t mod, int cmd, void *arg);
+
+#define DECLARE_GSCHED_MODULE(name, gsched) \
+ static moduledata_t name##_mod = { \
+ #name, \
+ g_gsched_modevent, \
+ gsched, \
+ }; \
+ DECLARE_MODULE(name, name##_mod, SI_SUB_DRIVERS, SI_ORDER_ANY); \
+ MODULE_DEPEND(name, g_sched, 0, 0, 0);
+
+#endif /* _KERNEL */
+
+#endif /* _G_GSCHED_H_ */
Added: user/luigi/geom_sched/sys/geom/sched/g_rr.c
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ user/luigi/geom_sched/sys/geom/sched/g_rr.c Thu Jan 8 09:24:18 2009 (r186888)
@@ -0,0 +1,355 @@
+/*-
+ * Copyright (c) 2008 Fabio Checconi
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/bio.h>
+#include <sys/callout.h>
+#include <sys/hash.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/proc.h>
+#include <sys/queue.h>
+#include <geom/geom.h>
+#include <geom/sched/g_gsched.h>
+
+/*
+ * Trivial round robin disk scheduler, with per-thread queues, always
+ * anticipating requests from the last served thread.
+ */
+
+/* Timeout for anticipation. */
+#define G_RR_WAIT_EXPIRE (hz/200 > 0 ? hz/200 : 2)
+
+#define G_QUEUE_NOWAIT 0 /* Ready to dispatch. */
+#define G_QUEUE_WAITREQ 1 /* Waiting for a completion. */
+#define G_QUEUE_WAITING 2 /* Waiting for a new request. */
+
+/*
+ * Per process (thread) queue structure. Each process (thread) in the
+ * system that accesses the disk managed by an instance of this scheduler
+ * has an associated queue.
+ */
+struct g_rr_queue {
+ int q_refs;
+ int q_status;
+ u_long q_key;
+ struct proc *q_proc;
+
+ struct bio_queue_head q_bioq;
+ unsigned int q_service;
+ unsigned int q_budget;
+
+ LIST_ENTRY(g_rr_queue) q_hash;
+ TAILQ_ENTRY(g_rr_queue) q_tailq;
+};
+
+/* List types. */
+TAILQ_HEAD(g_rr_tailq, g_rr_queue);
+LIST_HEAD(g_hash, g_rr_queue);
+
+/* Size of the per-device hash table storing threads. */
+#define G_RR_HASH_SIZE 32
+
+/* Default slice for RR between queues. */
+#define G_RR_DEFAULT_BUDGET 0x00800000
+
+/*
+ * Per device descriptor. It holds the RR list of queues accessing
+ * the disk.
+ */
+struct g_rr_softc {
+ struct g_geom *sc_geom;
+
+ struct g_rr_queue *sc_active;
+ struct g_rr_tailq sc_rr_tailq;
+
+ struct g_hash *sc_hash;
+ u_long sc_hash_mask;
+
+ struct callout sc_wait;
+};
+
+static inline u_long
+g_rr_key(struct thread *tp)
+{
+
+ return (tp != NULL ? tp->td_tid : 0);
+}
+
+/* Return the hash chain for the given key. */
+static inline struct g_hash *
+g_rr_hash(struct g_rr_softc *sc, u_long key)
+{
+
+ return (&sc->sc_hash[key & sc->sc_hash_mask]);
+}
+
+/*
+ * Get a reference to the queue that holds requests for tp, allocating
+ * it if necessary.
+ */
+static struct g_rr_queue *
+g_rr_queue_get(struct g_rr_softc *sc, struct thread *tp)
+{
+ struct g_hash *bucket;
+ struct g_rr_queue *qp;
+ u_long key;
+
+ key = g_rr_key(tp);
+ bucket = g_rr_hash(sc, key);
+ LIST_FOREACH(qp, bucket, q_hash) {
+ if (qp->q_key == key) {
+ qp->q_refs++;
+ return (qp);
+ }
+ }
+
+ qp = g_malloc(sizeof *qp, M_NOWAIT | M_ZERO);
+
+ if (qp != NULL) {
+ /* One for the hash table, one for the caller. */
+ qp->q_refs = 2;
+
+ qp->q_key = key;
+ qp->q_proc = tp->td_proc;
+ bioq_init(&qp->q_bioq);
+ qp->q_budget = G_RR_DEFAULT_BUDGET;
+ LIST_INSERT_HEAD(bucket, qp, q_hash);
+ }
+
+ return (qp);
+}
+
+/*
+ * Release a reference to the queue.
+ */
+static void
+g_rr_queue_put(struct g_rr_queue *qp)
+{
+
+ if (--qp->q_refs > 0)
+ return;
+
+ LIST_REMOVE(qp, q_hash);
+ KASSERT(bioq_first(&qp->q_bioq) == NULL, ("released nonempty queue"));
+
+ g_free(qp);
+}
+
+static void *
+g_rr_init(struct g_geom *geom)
+{
+ struct g_rr_softc *sc;
+
+ sc = g_malloc(sizeof *sc, M_WAITOK | M_ZERO);
+ sc->sc_geom = geom;
+ TAILQ_INIT(&sc->sc_rr_tailq);
+ sc->sc_hash = hashinit(G_RR_HASH_SIZE, M_GEOM, &sc->sc_hash_mask);
+ callout_init(&sc->sc_wait, CALLOUT_MPSAFE);
+
+ return (sc);
+}
+
+static void
+g_rr_fini(void *data)
+{
+ struct g_rr_softc *sc;
+ struct g_rr_queue *qp, *qp2;
+ int i;
+
+ sc = data;
+ callout_drain(&sc->sc_wait);
+ KASSERT(sc->sc_active == NULL, ("still a queue under service"));
+ KASSERT(TAILQ_EMPTY(&sc->sc_rr_tailq), ("still scheduled queues"));
+ for (i = 0; i < G_RR_HASH_SIZE; i++) {
+ LIST_FOREACH_SAFE(qp, &sc->sc_hash[i], q_hash, qp2) {
+ LIST_REMOVE(qp, q_hash);
+ g_rr_queue_put(qp);
+ }
+ }
+ hashdestroy(sc->sc_hash, M_GEOM, sc->sc_hash_mask);
+ g_free(sc);
+}
+
+/*
+ * Activate a queue, inserting it into the RR list and preparing it
+ * to be served.
+ */
+static inline void
+g_rr_activate(struct g_rr_softc *sc, struct g_rr_queue *qp)
+{
+
+ qp->q_service = 0;
+ TAILQ_INSERT_TAIL(&sc->sc_rr_tailq, qp, q_tailq);
+}
+
+static void
+g_rr_dispatch(struct g_rr_softc *sc)
+{
+ struct g_rr_queue *qp;
+ struct bio *bp, *next;
+
+ /* Try with the queue under service first. */
+ qp = sc->sc_active;
+ if (qp == NULL) {
+ /* No queue under service, look for the first in RR order. */
+ qp = TAILQ_FIRST(&sc->sc_rr_tailq);
+ if (qp == NULL) {
+ /* No queue at all, just return. */
+ return;
+ }
+ /* Select the new queue for service. */
+ TAILQ_REMOVE(&sc->sc_rr_tailq, qp, q_tailq);
+ sc->sc_active = qp;
+ } else if (qp->q_status != G_QUEUE_NOWAIT) {
+ /* Queue is anticipating, stop dispatching. */
+ return;
+ }
+
+ bp = bioq_takefirst(&qp->q_bioq);
+ qp->q_service += bp->bio_length;
+ next = bioq_first(&qp->q_bioq);
+ if (qp->q_service > qp->q_budget) {
+ /* Queue exhausted its budget. */
+ sc->sc_active = NULL;
+ if (next != NULL) {
+ /* If it has more requests requeue it. */
+ qp->q_status = G_QUEUE_NOWAIT;
+ g_rr_activate(sc, qp);
+ } else {
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-user
mailing list