svn commit: r186888 - in user/luigi/geom_sched/sys: conf dev/ata geom geom/sched modules/geom modules/geom/geom_sched modules/geom/geom_sched/geom_sched modules/geom/geom_sched/gs_as modules/geom/g...

Thu Jan 8 01:24:20 PST 2009

Author: luigi
Date: Thu Jan  8 09:24:18 2009
New Revision: 186888
URL: http://svn.freebsd.org/changeset/base/186888

Log:
  initial snapshot of geom disk schedulers by Fabio Checconi.

Added:
  user/luigi/geom_sched/sys/geom/sched/
  user/luigi/geom_sched/sys/geom/sched/g_as.c
  user/luigi/geom_sched/sys/geom/sched/g_gsched.h
  user/luigi/geom_sched/sys/geom/sched/g_rr.c
  user/luigi/geom_sched/sys/geom/sched/g_sched.c
  user/luigi/geom_sched/sys/geom/sched/g_sched.h
  user/luigi/geom_sched/sys/geom/sched/gs_as.c
  user/luigi/geom_sched/sys/geom/sched/gs_rr.c
  user/luigi/geom_sched/sys/modules/geom/geom_sched/
  user/luigi/geom_sched/sys/modules/geom/geom_sched/Makefile
  user/luigi/geom_sched/sys/modules/geom/geom_sched/geom_sched/
  user/luigi/geom_sched/sys/modules/geom/geom_sched/geom_sched/Makefile
  user/luigi/geom_sched/sys/modules/geom/geom_sched/gs_as/
  user/luigi/geom_sched/sys/modules/geom/geom_sched/gs_as/Makefile
  user/luigi/geom_sched/sys/modules/geom/geom_sched/gs_rr/
  user/luigi/geom_sched/sys/modules/geom/geom_sched/gs_rr/Makefile
  user/luigi/geom_sched/sys/modules/geom/geom_sched/gsched_as/
  user/luigi/geom_sched/sys/modules/geom/geom_sched/gsched_as/Makefile
  user/luigi/geom_sched/sys/modules/geom/geom_sched/gsched_rr/
  user/luigi/geom_sched/sys/modules/geom/geom_sched/gsched_rr/Makefile
Modified:
  user/luigi/geom_sched/sys/conf/NOTES
  user/luigi/geom_sched/sys/conf/files
  user/luigi/geom_sched/sys/conf/options
  user/luigi/geom_sched/sys/dev/ata/ata-all.h
  user/luigi/geom_sched/sys/dev/ata/ata-disk.c
  user/luigi/geom_sched/sys/dev/ata/ata-queue.c
  user/luigi/geom_sched/sys/geom/geom_disk.c
  user/luigi/geom_sched/sys/geom/geom_disk.h
  user/luigi/geom_sched/sys/geom/geom_io.c
  user/luigi/geom_sched/sys/modules/geom/Makefile
  user/luigi/geom_sched/sys/sys/bio.h

Modified: user/luigi/geom_sched/sys/conf/NOTES
==============================================================================

--- user/luigi/geom_sched/sys/conf/NOTES	Thu Jan  8 06:38:06 2009	(r186887)
+++ user/luigi/geom_sched/sys/conf/NOTES	Thu Jan  8 09:24:18 2009	(r186888)
@@ -153,6 +153,10 @@ options 	GEOM_PART_MBR		# MBR partitioni
 options 	GEOM_PART_VTOC8		# SMI VTOC8 disk label
 options 	GEOM_PC98		# NEC PC9800 partitioning
 options 	GEOM_RAID3		# RAID3 functionality.
+options 	GEOM_SCHED		# Disk scheduling in GEOM.
+options 	GEOM_GSCHED_AS		# Geom-based anticipatory.
+options 	GEOM_GS_AS		# Driver-based anticipatory.
+options 	GEOM_GS_RR		# Driver-based round-robin.
 options 	GEOM_SHSEC		# Shared secret.
 options 	GEOM_STRIPE		# Disk striping.
 options 	GEOM_SUNLABEL		# Sun/Solaris partitioning

Modified: user/luigi/geom_sched/sys/conf/files
==============================================================================
--- user/luigi/geom_sched/sys/conf/files	Thu Jan  8 06:38:06 2009	(r186887)
+++ user/luigi/geom_sched/sys/conf/files	Thu Jan  8 09:24:18 2009	(r186888)
@@ -1283,6 +1283,7 @@ geom/geom_mbr_enc.c		optional geom_mbr
 geom/geom_pc98.c		optional geom_pc98
 geom/geom_pc98_enc.c		optional geom_pc98
 geom/geom_slice.c		standard
+geom/geom_sched.c		standard
 geom/geom_subr.c		standard
 geom/geom_sunlabel.c		optional geom_sunlabel
 geom/geom_sunlabel_enc.c	optional geom_sunlabel
@@ -1312,6 +1313,11 @@ geom/part/g_part_vtoc8.c	optional geom_p
 geom/raid3/g_raid3.c		optional geom_raid3
 geom/raid3/g_raid3_ctl.c	optional geom_raid3
 geom/shsec/g_shsec.c		optional geom_shsec
+geom/sched/g_sched.c		optional geom_sched
+geom/sched/g_as.c		optional geom_gsched_as
+geom/sched/g_rr.c		optional geom_gsched_rr
+geom/sched/gs_as.c		optional geom_gs_as
+geom/sched/gs_rr.c		optional geom_gs_rr
 geom/stripe/g_stripe.c		optional geom_stripe
 geom/uzip/g_uzip.c		optional geom_uzip
 geom/virstor/binstream.c	optional geom_virstor

Modified: user/luigi/geom_sched/sys/conf/options
==============================================================================
--- user/luigi/geom_sched/sys/conf/options	Thu Jan  8 06:38:06 2009	(r186887)
+++ user/luigi/geom_sched/sys/conf/options	Thu Jan  8 09:24:18 2009	(r186888)
@@ -98,6 +98,10 @@ GEOM_PART_MBR	opt_geom.h
 GEOM_PART_VTOC8	opt_geom.h
 GEOM_PC98	opt_geom.h
 GEOM_RAID3	opt_geom.h
+GEOM_SCHED	opt_geom.h
+GEOM_GSCHED_AS	opt_geom.h
+GEOM_GS_AS	opt_geom.h
+GEOM_GS_RR	opt_geom.h
 GEOM_SHSEC	opt_geom.h
 GEOM_STRIPE	opt_geom.h
 GEOM_SUNLABEL	opt_geom.h

Modified: user/luigi/geom_sched/sys/dev/ata/ata-all.h
==============================================================================
--- user/luigi/geom_sched/sys/dev/ata/ata-all.h	Thu Jan  8 06:38:06 2009	(r186887)
+++ user/luigi/geom_sched/sys/dev/ata/ata-all.h	Thu Jan  8 09:24:18 2009	(r186888)
@@ -510,6 +510,7 @@ struct ata_channel {
     TAILQ_HEAD(, ata_request)   ata_queue;      /* head of ATA queue */
     struct ata_request          *freezepoint;   /* composite freezepoint */
     struct ata_request          *running;       /* currently running request */
+    struct disk			*disks[2];	/* disks, if any */
 };
 
 /* disk bay/enclosure related */
@@ -546,6 +547,9 @@ int ata_wmode(struct ata_params *ap);
 int ata_umode(struct ata_params *ap);
 int ata_limit_mode(device_t dev, int mode, int maxmode);
 
+/* ata-disk.c */
+struct ata_request *ata_create_request(struct bio *bp, int full);
+
 /* ata-queue.c: */
 int ata_controlcmd(device_t dev, u_int8_t command, u_int16_t feature, u_int64_t lba, u_int16_t count);
 int ata_atapicmd(device_t dev, u_int8_t *ccb, caddr_t data, int count, int flags, int timeout);

Modified: user/luigi/geom_sched/sys/dev/ata/ata-disk.c
==============================================================================
--- user/luigi/geom_sched/sys/dev/ata/ata-disk.c	Thu Jan  8 06:38:06 2009	(r186887)
+++ user/luigi/geom_sched/sys/dev/ata/ata-disk.c	Thu Jan  8 09:24:18 2009	(r186888)
@@ -60,6 +60,7 @@ static void ad_describe(device_t dev);
 static int ad_version(u_int16_t);
 static disk_strategy_t ad_strategy;
 static disk_ioctl_t ad_ioctl;
+static disk_kick_t ad_kick;
 static dumper_t ad_dump;
 
 /*
@@ -148,6 +149,7 @@ ad_attach(device_t dev)
     adp->disk = disk_alloc();
     adp->disk->d_strategy = ad_strategy;
     adp->disk->d_ioctl = ad_ioctl;
+    adp->disk->d_kick = ad_kick;
     adp->disk->d_dump = ad_dump;
     adp->disk->d_name = "ad";
     adp->disk->d_drv1 = dev;
@@ -168,6 +170,7 @@ ad_attach(device_t dev)
     snprintf(adp->disk->d_ident, sizeof(adp->disk->d_ident), "ad:%s",
 	atadev->param.serial);
     disk_create(adp->disk, DISK_VERSION);
+    ch->disks[atadev->unit == ATA_SLAVE] = adp->disk;
     device_add_child(dev, "subdisk", device_get_unit(dev));
     ad_firmware_geom_adjust(dev, adp->disk);
     bus_generic_attach(dev);
@@ -179,6 +182,7 @@ ad_attach(device_t dev)
 static int
 ad_detach(device_t dev)
 {
+    struct ata_channel *ch = device_get_softc(device_get_parent(dev));
     struct ad_softc *adp = device_get_ivars(dev);
     struct ata_device *atadev = device_get_softc(dev);
     device_t *children;
@@ -199,6 +203,8 @@ ad_detach(device_t dev)
 	free(children, M_TEMP);
     }
 
+    ch->disks[atadev->unit == ATA_SLAVE] = NULL;
+
     /* detroy disk from the system so we dont get any further requests */
     disk_destroy(adp->disk);
 
@@ -266,13 +272,13 @@ ad_spindown(void *priv)
     ata_queue_request(request);
 }
 
-
-static void 
-ad_strategy(struct bio *bp)
+struct ata_request * 
+ata_create_request(struct bio *bp, int full)
 {
-    device_t dev =  bp->bio_disk->d_drv1;
+    device_t dev = bp->bio_disk->d_drv1;
     struct ata_device *atadev = device_get_softc(dev);
     struct ata_request *request;
+    struct ata_channel *ch;
 
     if (atadev->spindown != 0)
 	callout_reset(&atadev->spindown_timer, hz * atadev->spindown,
@@ -281,7 +287,7 @@ ad_strategy(struct bio *bp)
     if (!(request = ata_alloc_request())) {
 	device_printf(dev, "FAILURE - out of memory in start\n");
 	biofinish(bp, NULL, ENOMEM);
-	return;
+	return NULL;
     }
 
     /* setup request */
@@ -344,10 +350,32 @@ ad_strategy(struct bio *bp)
 	device_printf(dev, "FAILURE - unknown BIO operation\n");
 	ata_free_request(request);
 	biofinish(bp, NULL, EIO);
-	return;
+	return NULL;
     }
     request->flags |= ATA_R_ORDERED;
-    ata_queue_request(request);
+
+    if (full != 0) {
+	if ((request->parent = device_get_parent(dev)) == NULL) {
+	    ata_free_request(request);
+	    biofinish(bp, NULL, ENXIO);
+	    return NULL;
+	}
+
+	ch = device_get_softc(request->parent);
+	callout_init_mtx(&request->callout, &ch->state_mtx,
+			 CALLOUT_RETURNUNLOCKED);
+    }
+
+    return request;
+}
+
+static void
+ad_strategy(struct bio *bp)
+{
+    struct ata_request *request;
+
+    if ((request = ata_create_request(bp, 0)) != NULL)
+	ata_queue_request(request);
 }
 
 static void
@@ -369,6 +397,18 @@ ad_ioctl(struct disk *disk, u_long cmd, 
     return ata_device_ioctl(disk->d_drv1, cmd, data);
 }
 
+static void
+ad_kick(struct disk *disk)
+{
+    device_t dev;
+    struct ata_channel *ch;
+
+    dev = disk->d_drv1;
+    ch = device_get_softc(device_get_parent(dev));
+    if (ch != NULL)
+	ata_start(ch->dev);
+}
+
 static int
 ad_dump(void *arg, void *virtual, vm_offset_t physical,
 	off_t offset, size_t length)

Modified: user/luigi/geom_sched/sys/dev/ata/ata-queue.c
==============================================================================
--- user/luigi/geom_sched/sys/dev/ata/ata-queue.c	Thu Jan  8 06:38:06 2009	(r186887)
+++ user/luigi/geom_sched/sys/dev/ata/ata-queue.c	Thu Jan  8 09:24:18 2009	(r186888)
@@ -42,6 +42,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/rman.h>
 #include <dev/ata/ata-all.h>
 #include <ata_if.h>
+#include <geom/geom_sched.h>
 
 /* prototypes */
 static void ata_completed(void *, int);
@@ -171,10 +172,25 @@ ata_start(device_t dev)
     struct ata_channel *ch = device_get_softc(dev);
     struct ata_request *request;
     struct ata_composite *cptr;
-    int dependencies = 0;
+    struct disk *dp;
+    struct bio *bp;
+    int dependencies = 0, i;
 
-    /* if we have a request on the queue try to get it running */
     mtx_lock(&ch->queue_mtx);
+    if (TAILQ_FIRST(&ch->ata_queue) == NULL) {
+	for (i = 0; i < 2; i++) {
+	    dp = ch->disks[i];
+	    while (dp != NULL && (bp = g_sched_next(dp)) != NULL) {
+		request = ata_create_request(bp, 1);
+		if (request != NULL) {
+		    ata_sort_queue(ch, request);
+		    break;
+		}
+	    }
+	}
+    }
+
+    /* if we have a request on the queue try to get it running */
     if ((request = TAILQ_FIRST(&ch->ata_queue))) {
 
 	/* we need the locking function to get the lock for this channel */

Modified: user/luigi/geom_sched/sys/geom/geom_disk.c
==============================================================================
--- user/luigi/geom_sched/sys/geom/geom_disk.c	Thu Jan  8 06:38:06 2009	(r186887)
+++ user/luigi/geom_sched/sys/geom/geom_disk.c	Thu Jan  8 09:24:18 2009	(r186888)
@@ -55,9 +55,11 @@ __FBSDID("$FreeBSD$");
 #include <geom/geom.h>
 #include <geom/geom_disk.h>
 #include <geom/geom_int.h>
+#include <geom/geom_sched.h>
 
 static struct mtx g_disk_done_mtx;
 
+static g_ctl_req_t g_disk_ctlreq;
 static g_access_t g_disk_access;
 static g_init_t g_disk_init;
 static g_fini_t g_disk_fini;
@@ -68,6 +70,7 @@ static g_dumpconf_t g_disk_dumpconf;
 static struct g_class g_disk_class = {
 	.name = "DISK",
 	.version = G_VERSION,
+	.ctlreq = g_disk_ctlreq,
 	.init = g_disk_init,
 	.fini = g_disk_fini,
 	.start = g_disk_start,
@@ -81,16 +84,19 @@ g_disk_init(struct g_class *mp __unused)
 {
 
 	mtx_init(&g_disk_done_mtx, "g_disk_done", NULL, MTX_DEF);
+	g_sched_init();
 }
 
 static void
 g_disk_fini(struct g_class *mp __unused)
 {
 
+	g_sched_fini();
 	mtx_destroy(&g_disk_done_mtx);
 }
 
 DECLARE_GEOM_CLASS(g_disk_class, g_disk);
+MODULE_VERSION(g_disk, 0);
 
 static void __inline
 g_disk_lock_giant(struct disk *dp)
@@ -106,6 +112,83 @@ g_disk_unlock_giant(struct disk *dp)
 		mtx_unlock(&Giant);
 }
 
+static void
+g_disk_configure(struct gctl_req *req, struct g_class *mp)
+{
+	struct disk *dp;
+	struct g_provider *pp;
+	const char *sched, *name;
+	char param[16];
+	int i, *nargs;
+
+	g_topology_assert();
+
+	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
+	if (nargs == NULL) {
+		gctl_error(req, "No '%s' argument.", "nargs");
+		return;
+	}
+
+	if (*nargs <= 0) {
+		gctl_error(req, "Missing devices.");
+		return;
+	}
+
+	sched = gctl_get_asciiparam(req, "iosched");
+	if (sched == NULL) {
+		gctl_error(req, "No '%s' argument.", "iosched");
+		return;
+	}
+
+	for (i = 0; i < *nargs; i++) {
+		snprintf(param, sizeof(param), "arg%d", i);
+		name = gctl_get_asciiparam(req, param);
+		if (name == NULL) {
+			gctl_error(req, "No '%s' argument.", param);
+			return;
+		}
+
+		if (strncmp(name, "/dev/", strlen("/dev/")) == 0)
+			name += strlen("/dev/");
+
+		pp = g_provider_by_name(name);
+		if (pp == NULL || pp->geom->class != mp) {
+			gctl_error(req, "Provider %s is invalid.", name);
+			return;
+		}
+
+		dp = pp->geom->softc;
+		if (g_sched_configure(dp, sched) != 0) {
+			gctl_error(req, "Could not set scheduler %s.", sched);
+			return;
+		}
+	}
+}
+
+static void
+g_disk_ctlreq(struct gctl_req *req, struct g_class *mp, const char *verb)
+{
+	uint32_t *version;
+
+	g_topology_assert();
+
+	version = gctl_get_paraml(req, "version", sizeof(*version));
+	if (version == NULL) {
+		gctl_error(req, "No '%s' argument.", "version");
+		return;
+	}
+
+	if (*version != G_VERSION) {
+		gctl_error(req, "Userland and kernel parts are out of sync.");
+		return;
+	}
+
+	if (strcmp(verb, "configure") == 0)
+		g_disk_configure(req, mp);
+	else
+		gctl_error(req, "Unknown verb.");
+}
+
 static int
 g_disk_access(struct g_provider *pp, int r, int w, int e)
 {
@@ -198,6 +281,8 @@ g_disk_done(struct bio *bp)
 	mtx_lock(&g_disk_done_mtx);
 	bp->bio_completed = bp->bio_length - bp->bio_resid;
 
+	g_sched_done(bp);
+
 	bp2 = bp->bio_parent;
 	if (bp2->bio_error == 0)
 		bp2->bio_error = bp->bio_error;
@@ -288,7 +373,7 @@ g_disk_start(struct bio *bp)
 			bp2->bio_disk = dp;
 			devstat_start_transaction_bio(dp->d_devstat, bp2);
 			g_disk_lock_giant(dp);
-			dp->d_strategy(bp2);
+			g_sched_start(dp, bp2);
 			g_disk_unlock_giant(dp);
 			bp2 = bp3;
 			bp3 = NULL;
@@ -466,6 +551,7 @@ disk_create(struct disk *dp, int version
 		    dp->d_sectorsize, DEVSTAT_ALL_SUPPORTED,
 		    DEVSTAT_TYPE_DIRECT, DEVSTAT_PRIORITY_MAX);
 	dp->d_geom = NULL;
+	g_sched_disk_init(dp);
 	g_disk_ident_adjust(dp->d_ident, sizeof(dp->d_ident));
 	g_post_event(g_disk_create, dp, M_WAITOK, dp, NULL);
 }
@@ -476,6 +562,7 @@ disk_destroy(struct disk *dp)
 
 	g_cancel_event(dp);
 	dp->d_destroyed = 1;
+	g_sched_disk_fini(dp);
 	if (dp->d_devstat != NULL)
 		devstat_remove_entry(dp->d_devstat);
 	g_post_event(g_disk_destroy, dp, M_WAITOK, NULL);
@@ -487,6 +574,8 @@ disk_gone(struct disk *dp)
 	struct g_geom *gp;
 	struct g_provider *pp;
 
+	g_sched_disk_gone(dp);
+
 	gp = dp->d_geom;
 	if (gp != NULL)
 		LIST_FOREACH(pp, &gp->provider, provider)

Modified: user/luigi/geom_sched/sys/geom/geom_disk.h
==============================================================================
--- user/luigi/geom_sched/sys/geom/geom_disk.h	Thu Jan  8 06:38:06 2009	(r186887)
+++ user/luigi/geom_sched/sys/geom/geom_disk.h	Thu Jan  8 09:24:18 2009	(r186888)
@@ -53,7 +53,10 @@ typedef	int	disk_ioctl_t(struct disk *, 
 			int fflag, struct thread *td);
 		/* NB: disk_ioctl_t SHALL be cast'able to d_ioctl_t */
 
+typedef void	disk_kick_t(struct disk *);
+
 struct g_geom;
+struct g_sched;
 struct devstat;
 
 struct disk {
@@ -74,6 +77,7 @@ struct disk {
 	disk_close_t		*d_close;
 	disk_strategy_t		*d_strategy;
 	disk_ioctl_t		*d_ioctl;
+	disk_kick_t		*d_kick;
 	dumper_t		*d_dump;
 
 	/* Info fields from driver to geom_disk.c. Valid when open */
@@ -86,6 +90,13 @@ struct disk {
 	u_int			d_stripesize;
 	char			d_ident[DISK_IDENT_SIZE];
 
+	/* Scheduler fields */
+	struct mtx		d_sched_lock;
+	u_int			d_sched_flags;
+	u_int			d_nr_sorted;
+	struct g_sched		*d_sched;
+	void			*d_sched_data;
+
 	/* Fields private to the driver */
 	void			*d_drv1;
 };

Modified: user/luigi/geom_sched/sys/geom/geom_io.c
==============================================================================
--- user/luigi/geom_sched/sys/geom/geom_io.c	Thu Jan  8 06:38:06 2009	(r186887)
+++ user/luigi/geom_sched/sys/geom/geom_io.c	Thu Jan  8 09:24:18 2009	(r186888)
@@ -172,6 +172,7 @@ g_clone_bio(struct bio *bp)
 		bp2->bio_offset = bp->bio_offset;
 		bp2->bio_data = bp->bio_data;
 		bp2->bio_attribute = bp->bio_attribute;
+		bp2->bio_thread = bp->bio_thread;
 		bp->bio_children++;
 	}
 #ifdef KTR
@@ -369,6 +370,10 @@ g_io_request(struct bio *bp, struct g_co
 	bp->bio_error = 0;
 	bp->bio_completed = 0;
 
+	/* Pass down the thread that issued the bio. */
+	if (bp->bio_thread == NULL)
+		bp->bio_thread = curthread;
+
 	KASSERT(!(bp->bio_flags & BIO_ONQUEUE),
 	    ("Bio already on queue bp=%p", bp));
 	bp->bio_flags |= BIO_ONQUEUE;

Added: user/luigi/geom_sched/sys/geom/sched/g_as.c
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ user/luigi/geom_sched/sys/geom/sched/g_as.c	Thu Jan  8 09:24:18 2009	(r186888)
@@ -0,0 +1,204 @@
+/*-
+ * Copyright (c) 2007 Fabio Checconi <fabio at FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/bio.h>
+#include <sys/callout.h>
+#include <sys/proc.h>
+#include <sys/taskqueue.h>
+
+#include <geom/geom.h>
+#include <geom/sched/g_gsched.h>
+
+/*
+ * Status values for AS.
+ */
+#define	G_AS_NOWAIT		0	/* Not wating at all. */
+#define	G_AS_WAITREQ		1	/* Waiting a request to complete. */
+#define	G_AS_WAITING		2	/* Waiting a new request. */
+
+struct g_as_softc {
+	struct g_geom		*sc_geom;
+	struct thread		*sc_curthread;
+	int			sc_status;
+	long			sc_batch;
+
+	struct callout		sc_wait;
+	struct bio_queue_head	sc_bioq;
+};
+
+#define	G_AS_WAIT_EXPIRE	(hz/200 > 0 ? hz/200 : 2)
+#define	G_AS_MAX_BATCH		0x00800000
+
+/*
+ * Dispatch the first queued request.  Here we also update the status
+ * according to the dispatched request.
+ */
+static void
+g_as_dispatch(struct g_as_softc *sc)
+{
+	struct bio *bio;
+
+	/*
+	 * Batching means just don't serve too many requests waiting
+	 * for sequential ones, it is not really coupled with the
+	 * threads being served.  Its only purpose is to let not the
+	 * scheduler starve other threads while an aggressive one
+	 * is making continuously new requests.
+	 */
+	sc->sc_curthread = NULL;
+
+	bio = bioq_takefirst(&sc->sc_bioq);
+	if (bio != NULL) {
+		sc->sc_batch += bio->bio_length;
+		if (sc->sc_batch > G_AS_MAX_BATCH) {
+			/*
+			 * Too many requests served here, don't wait
+			 * for the next.
+			 */
+			sc->sc_batch = 0;
+			sc->sc_status = G_AS_NOWAIT;
+		} else {
+			/*
+			 * When this request will be served we'll wait
+			 * for a new one from the same thread.
+			 * Of course we are anticipating everything
+			 * here, even writes or asynchronous requests,
+			 * but this is only a prototype.
+			 */
+			sc->sc_status = G_AS_WAITREQ;
+		}
+		g_io_request(bio, LIST_FIRST(&sc->sc_geom->consumer));
+	} else
+		sc->sc_status = G_AS_NOWAIT;
+}
+
+static void
+g_as_wait_timeout(void *data)
+{
+	struct g_as_softc *sc = data;
+
+	g_sched_lock(sc->sc_geom);
+	/*
+	 * We were waiting for a new request for curthread, it did
+	 * not come, just dispatch the next one.
+	 */
+	if (sc->sc_status == G_AS_WAITING)
+		g_as_dispatch(sc);
+	g_sched_unlock(sc->sc_geom);
+}
+
+static void
+g_as_start(void *data, struct bio *bio)
+{
+	struct g_as_softc *sc = data;
+
+	bioq_disksort(&sc->sc_bioq, bio);
+
+	/*
+	 * If the request being submitted is the one we were waiting for
+	 * stop the timer and dispatch it, otherwise do nothing.
+	 */
+	if (sc->sc_status == G_AS_NOWAIT ||
+	    bio->bio_thread == sc->sc_curthread) {
+		callout_stop(&sc->sc_wait);
+		g_as_dispatch(sc);
+	}
+}
+
+static void
+g_as_done(void *data, struct bio *bio)
+{
+	struct g_as_softc *sc = data;
+	struct bio *bp2;
+
+	bp2 = bio->bio_parent;
+
+	/* Don't wait when fragments are completed. */
+	if (bp2->bio_children != bp2->bio_inbed + 1)
+		return;
+
+	if (sc->sc_status == G_AS_WAITREQ) {
+		/*
+		 * Start waiting for a new request from curthread.
+		 */
+		sc->sc_curthread = bio->bio_thread;
+		sc->sc_status = G_AS_WAITING;
+		callout_reset(&sc->sc_wait, G_AS_WAIT_EXPIRE,
+		    g_as_wait_timeout, sc);
+	} else {
+		/*
+		 * Since we don't have to wait anything just dispatch
+		 * the next request.
+		 */
+		g_as_dispatch(sc);
+	}
+}
+
+static void *
+g_as_init(struct g_geom *geom)
+{
+	struct g_as_softc *sc;
+
+	sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO);
+	sc->sc_geom = geom;
+	sc->sc_curthread = NULL;
+	sc->sc_status = G_AS_NOWAIT;
+
+	callout_init(&sc->sc_wait, CALLOUT_MPSAFE);
+	bioq_init(&sc->sc_bioq);
+
+	return sc;
+}
+
+static void
+g_as_fini(void *data)
+{
+	struct g_as_softc *sc = data;
+
+	KASSERT(bioq_first(&sc->sc_bioq) == NULL,
+	    ("Still requests pending."));
+	callout_drain(&sc->sc_wait);
+
+	g_free(sc);
+}
+
+static struct g_gsched g_as = {
+	.gs_name = "as",
+	.gs_init = g_as_init,
+	.gs_fini = g_as_fini,
+	.gs_start = g_as_start,
+	.gs_done = g_as_done,
+};
+
+DECLARE_GSCHED_MODULE(as, &g_as);

Added: user/luigi/geom_sched/sys/geom/sched/g_gsched.h
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ user/luigi/geom_sched/sys/geom/sched/g_gsched.h	Thu Jan  8 09:24:18 2009	(r186888)
@@ -0,0 +1,88 @@
+/*-
+ * Copyright (c) 2008 Fabio Checconi <fabio at FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef	_G_GSCHED_H_
+#define	_G_GSCHED_H_
+
+#ifdef _KERNEL
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <sys/queue.h>
+#include <geom/sched/g_sched.h>
+
+/*
+ * This is the interface exported to scheduling modules.
+ */
+/*
+ * Geom I/O scheduler descriptor.
+ */
+struct g_geom;
+
+typedef void *gs_init_t (struct g_geom *geom);
+typedef void gs_fini_t (void *data);
+typedef void gs_start_t (void *data, struct bio *bio);
+typedef void gs_done_t (void *data, struct bio *bio);
+
+struct g_gsched {
+	const char	*gs_name;
+	int		gs_refs;
+
+	gs_init_t	*gs_init;
+	gs_fini_t	*gs_fini;
+	gs_start_t	*gs_start;
+	gs_done_t	*gs_done;
+
+	LIST_ENTRY(g_gsched) glist;
+};
+
+/*
+ * Locking interface.  When each operation registered with the
+ * scheduler is invoked, a per-instance lock is taken to protect
+ * the data associated with it.  If the scheduler needs something
+ * else to access the same data (e.g., a callout) it must use
+ * these functions.
+ */
+void g_sched_lock(struct g_geom *gp);
+void g_sched_unlock(struct g_geom *gp);
+
+/*
+ * Declaration of a scheduler module.
+ */
+int g_gsched_modevent(module_t mod, int cmd, void *arg);
+
+#define	DECLARE_GSCHED_MODULE(name, gsched)				\
+	static moduledata_t name##_mod = {				\
+		#name,							\
+		g_gsched_modevent,					\
+		gsched,							\
+	};								\
+	DECLARE_MODULE(name, name##_mod, SI_SUB_DRIVERS, SI_ORDER_ANY);	\
+	MODULE_DEPEND(name, g_sched, 0, 0, 0);
+
+#endif	/* _KERNEL */
+
+#endif	/* _G_GSCHED_H_ */

Added: user/luigi/geom_sched/sys/geom/sched/g_rr.c
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ user/luigi/geom_sched/sys/geom/sched/g_rr.c	Thu Jan  8 09:24:18 2009	(r186888)
@@ -0,0 +1,355 @@
+/*-
+ * Copyright (c) 2008 Fabio Checconi
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/bio.h>
+#include <sys/callout.h>
+#include <sys/hash.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/proc.h>
+#include <sys/queue.h>
+#include <geom/geom.h>
+#include <geom/sched/g_gsched.h>
+
+/*
+ * Trivial round robin disk scheduler, with per-thread queues, always
+ * anticipating requests from the last served thread.
+ */
+
+/* Timeout for anticipation. */
+#define	G_RR_WAIT_EXPIRE	(hz/200 > 0 ? hz/200 : 2)
+
+#define	G_QUEUE_NOWAIT		0	/* Ready to dispatch. */
+#define	G_QUEUE_WAITREQ		1	/* Waiting for a completion. */
+#define G_QUEUE_WAITING		2	/* Waiting for a new request. */
+
+/*
+ * Per process (thread) queue structure.  Each process (thread) in the
+ * system that accesses the disk managed by an instance of this scheduler
+ * has an associated queue.
+ */
+struct g_rr_queue {
+	int		q_refs;
+	int		q_status;
+	u_long		q_key;
+	struct proc	*q_proc;
+
+	struct bio_queue_head q_bioq;
+	unsigned int	q_service;
+	unsigned int	q_budget;
+
+	LIST_ENTRY(g_rr_queue) q_hash;
+	TAILQ_ENTRY(g_rr_queue) q_tailq;
+};
+
+/* List types. */
+TAILQ_HEAD(g_rr_tailq, g_rr_queue);
+LIST_HEAD(g_hash, g_rr_queue);
+
+/* Size of the per-device hash table storing threads. */
+#define	G_RR_HASH_SIZE		32
+
+/* Default slice for RR between queues. */
+#define	G_RR_DEFAULT_BUDGET	0x00800000
+
+/*
+ * Per device descriptor.  It holds the RR list of queues accessing
+ * the disk.
+ */
+struct g_rr_softc {
+	struct g_geom	*sc_geom;
+
+	struct g_rr_queue *sc_active;
+	struct g_rr_tailq sc_rr_tailq;
+
+	struct g_hash	*sc_hash;
+	u_long		sc_hash_mask;
+
+	struct callout	sc_wait;
+};
+
+static inline u_long
+g_rr_key(struct thread *tp)
+{
+
+	return (tp != NULL ? tp->td_tid : 0);
+}
+
+/* Return the hash chain for the given key. */
+static inline struct g_hash *
+g_rr_hash(struct g_rr_softc *sc, u_long key)
+{
+
+	return (&sc->sc_hash[key & sc->sc_hash_mask]);
+}
+
+/*
+ * Get a reference to the queue that holds requests for tp, allocating
+ * it if necessary.
+ */
+static struct g_rr_queue *
+g_rr_queue_get(struct g_rr_softc *sc, struct thread *tp)
+{
+	struct g_hash *bucket;
+	struct g_rr_queue *qp;
+	u_long key;
+
+	key = g_rr_key(tp);
+	bucket = g_rr_hash(sc, key);
+	LIST_FOREACH(qp, bucket, q_hash) {
+		if (qp->q_key == key) {
+			qp->q_refs++;
+			return (qp);
+		}
+	}
+
+	qp = g_malloc(sizeof *qp, M_NOWAIT | M_ZERO);
+
+	if (qp != NULL) {
+		/* One for the hash table, one for the caller. */
+		qp->q_refs = 2;
+
+		qp->q_key = key;
+		qp->q_proc = tp->td_proc;
+		bioq_init(&qp->q_bioq);
+		qp->q_budget = G_RR_DEFAULT_BUDGET;
+		LIST_INSERT_HEAD(bucket, qp, q_hash);
+	}
+
+	return (qp);
+}
+
+/*
+ * Release a reference to the queue.
+ */
+static void
+g_rr_queue_put(struct g_rr_queue *qp)
+{
+
+	if (--qp->q_refs > 0)
+		return;
+
+	LIST_REMOVE(qp, q_hash);
+	KASSERT(bioq_first(&qp->q_bioq) == NULL, ("released nonempty queue"));
+
+	g_free(qp);
+}
+
+static void *
+g_rr_init(struct g_geom *geom)
+{
+	struct g_rr_softc *sc;
+
+	sc = g_malloc(sizeof *sc, M_WAITOK | M_ZERO);
+	sc->sc_geom = geom;
+	TAILQ_INIT(&sc->sc_rr_tailq);
+	sc->sc_hash = hashinit(G_RR_HASH_SIZE, M_GEOM, &sc->sc_hash_mask);
+	callout_init(&sc->sc_wait, CALLOUT_MPSAFE);
+
+	return (sc);
+}
+
+static void
+g_rr_fini(void *data)
+{
+	struct g_rr_softc *sc;
+	struct g_rr_queue *qp, *qp2;
+	int i;
+
+	sc = data;
+	callout_drain(&sc->sc_wait);
+	KASSERT(sc->sc_active == NULL, ("still a queue under service"));
+	KASSERT(TAILQ_EMPTY(&sc->sc_rr_tailq), ("still scheduled queues"));
+	for (i = 0; i < G_RR_HASH_SIZE; i++) {
+		LIST_FOREACH_SAFE(qp, &sc->sc_hash[i], q_hash, qp2) {
+			LIST_REMOVE(qp, q_hash);
+			g_rr_queue_put(qp);
+		}
+	}
+	hashdestroy(sc->sc_hash, M_GEOM, sc->sc_hash_mask);
+	g_free(sc);
+}
+
+/*
+ * Activate a queue, inserting it into the RR list and preparing it
+ * to be served.
+ */
+static inline void
+g_rr_activate(struct g_rr_softc *sc, struct g_rr_queue *qp)
+{
+
+	qp->q_service = 0;
+	TAILQ_INSERT_TAIL(&sc->sc_rr_tailq, qp, q_tailq);
+}
+
+static void
+g_rr_dispatch(struct g_rr_softc *sc)
+{
+	struct g_rr_queue *qp;
+	struct bio *bp, *next;
+
+	/* Try with the queue under service first. */
+	qp = sc->sc_active;
+	if (qp == NULL) {
+		/* No queue under service, look for the first in RR order. */
+		qp = TAILQ_FIRST(&sc->sc_rr_tailq);
+		if (qp == NULL) {
+			/* No queue at all, just return. */
+			return;
+		}
+		/* Select the new queue for service. */
+		TAILQ_REMOVE(&sc->sc_rr_tailq, qp, q_tailq);
+		sc->sc_active = qp;
+	} else if (qp->q_status != G_QUEUE_NOWAIT) {
+		/* Queue is anticipating, stop dispatching. */
+		return;
+	}
+
+	bp = bioq_takefirst(&qp->q_bioq);
+	qp->q_service += bp->bio_length;
+	next = bioq_first(&qp->q_bioq);
+ 	if (qp->q_service > qp->q_budget) {
+		/* Queue exhausted its budget. */
+		sc->sc_active = NULL;
+		if (next != NULL) {
+			/* If it has more requests requeue it. */
+			qp->q_status = G_QUEUE_NOWAIT;
+			g_rr_activate(sc, qp);
+		} else {

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***