kern/169403: CAM layer, I/O starvation, no fairness
jerry Toung
jtoung at opnet.com
Mon Jun 25 17:50:07 UTC 2012
>Number: 169403
>Category: kern
>Synopsis: CAM layer, I/O starvation, no fairness
>Confidential: no
>Severity: serious
>Priority: low
>Responsible: freebsd-bugs
>State: open
>Quarter:
>Keywords:
>Date-Required:
>Class: sw-bug
>Submitter-Id: current-users
>Arrival-Date: Mon Jun 25 17:50:06 UTC 2012
>Closed-Date:
>Last-Modified:
>Originator: jerry Toung
>Release: FreeBSD 8.1-RELEASE #0
>Organization:
Opnet
>Environment:
FreeBSD dev8 8.1-RELEASE FreeBSD 8.1-RELEASE #0: Mon Jul 19 02:36:49 UTC 2010 root at mason.cse.buffalo.edu:/usr/obj/usr/src/sys/GENERIC amd64
>Description:
I am convinced that there is a bug in the CAM code that leads to I/O starvation.
I have already discussed this privately with some. I am now bringing this up to
the general audience to get more feedback.
My setup is that I have 1 RAID controller with 2 arrays connected to
it, da0 and da1.
The controller supports 252 tags. After boot up, camcontrol tags on
da0 and da1 shows that both devices have 252 openings each. A process
P0 writing on da0 is dormant most of the time, but would wake up with
burst of I/Os, 5000-6000 ops as reported by gstat.
A process P1 writing on da1 has a fixed data rate to da1 as reported by gstat.
The issue: When P0 generates that burst of 5000-6000 ops, the write
rate of P1 on da1 goes to 0 MB/sec for up to 8-9sec,
vfs.hirunningspace starts climbing and we get into waithirunning() or
getblk() sleep channel. BTW, raising hirunningspace has no effect on
the 0 MB/s behavior.
The first problem that I see here, is that if the sim's devq has 252
alloc_queue and send_queue, the struct cam_ed representing da0 and da1 should each have 126 openings and not 252.
The second problem is that clearly, there is no I/O fairness in CAM as seen
in gstat output and da0 exclusively takes a hold of the sim/controller
until it has processed all it's I/Os (8-9 seconds). The code that does
this is at
cam/cam_xpt.c:3030
3030 && (devq->alloc_openings > 0)
and
cam/cam_xpt.c:3091
3091 && (devq->send_openings > 0)
After you've split the openings to 126 each, the tests above will always be true
I have a patch and it fixes those problems.
da0 and da1 now both automatically get 126 openings and based on that,
extra logic implements fairness in cam/cam_xpt.c. No more 0 MB/s on
da1. This is on 8.1-RELEASE FreeBSD.
>How-To-Repeat:
>Fix:
Patch attached with submission follows:
diff -rup cam/cam_queue.c cam/cam_queue.c
--- cam/cam_queue.c 2010-06-13 19:09:06.000000000 -0700
+++ cam/cam_queue.c 2012-03-22 10:50:50.000000000 -0700
@@ -412,7 +412,7 @@ heap_down(cam_pinfo **queue_array, int i
{
int child;
int parent;
-
+
parent = index;
child = parent << 1;
for (; child <= num_entries; child = parent << 1) {
diff -rup cam/cam_sim.c cam/cam_sim.c
--- cam/cam_sim.c 2010-06-13 19:09:06.000000000 -0700
+++ cam/cam_sim.c 2012-03-19 13:05:10.000000000 -0700
@@ -87,6 +87,7 @@ cam_sim_alloc(sim_action_func sim_action
sim->refcount = 1;
sim->devq = queue;
sim->max_ccbs = 8; /* Reserve for management purposes. */
+ sim->dev_count = 0;
sim->mtx = mtx;
if (mtx == &Giant) {
sim->flags |= 0;
diff -rup cam/cam_sim.h cam/cam_sim.h
--- cam/cam_sim.h 2010-06-13 19:09:06.000000000 -0700
+++ cam/cam_sim.h 2012-03-19 15:34:17.000000000 -0700
@@ -118,6 +118,8 @@ struct cam_sim {
u_int max_ccbs;
/* Current count of allocated ccbs */
u_int ccb_count;
+ /* Number of peripheral drivers mapped to this sim */
+ u_int dev_count;
};
diff -rup cam/cam_xpt.c cam/cam_xpt.c
--- cam/cam_xpt.c 2010-06-13 19:09:06.000000000 -0700
+++ cam/cam_xpt.c 2012-03-29 11:41:51.000000000 -0700
@@ -303,7 +303,7 @@ xpt_schedule_dev_allocq(struct cam_eb *b
int retval;
if ((dev->drvq.entries > 0) &&
- (dev->ccbq.devq_openings > 0) &&
+ (dev->runs_token < dev->ccbq.queue.array_size) &&
(cam_ccbq_frozen(&dev->ccbq, CAM_PRIORITY_TO_RL(
CAMQ_GET_PRIO(&dev->drvq))) == 0)) {
/*
@@ -327,7 +327,7 @@ xpt_schedule_dev_sendq(struct cam_eb *bu
int retval;
if ((dev->ccbq.queue.entries > 0) &&
- (dev->ccbq.dev_openings > 0) &&
+ (dev->runs_token < dev->ccbq.queue.array_size) &&
(cam_ccbq_frozen_top(&dev->ccbq) == 0)) {
/*
* The priority of a device waiting for controller
@@ -973,6 +973,9 @@ xpt_add_periph(struct cam_periph *periph
struct cam_ed *device;
int32_t status;
struct periph_list *periph_head;
+ struct cam_eb *bus;
+ struct cam_et *target;
+ struct cam_ed *devptr;
mtx_assert(periph->sim->mtx, MA_OWNED);
@@ -991,6 +994,8 @@ xpt_add_periph(struct cam_periph *periph
status = camq_resize(&device->drvq,
device->drvq.array_size + 1);
+ if (periph->periph_name != NULL && strncmp(periph->periph_name, "da",2) ==0 )
+ device->sim->dev_count++;
device->generation++;
SLIST_INSERT_HEAD(periph_head, periph, periph_links);
@@ -998,6 +1003,24 @@ xpt_add_periph(struct cam_periph *periph
mtx_lock(&xsoftc.xpt_topo_lock);
xsoftc.xpt_generation++;
+
+ if (device != NULL && device->sim->dev_count > 1 &&
+ (device->sim->max_dev_openings > device->sim->dev_count)) {
+ TAILQ_FOREACH(bus, &xsoftc.xpt_busses, links) {
+ if (bus->sim != device->sim)
+ continue;
+ TAILQ_FOREACH(target, &bus->et_entries, links) {
+ TAILQ_FOREACH(devptr, &target->ed_entries, links) {
+ /*
+ * The number of openings/tags supported by the sim (i.e controller)
+ * is evenly distributed between all devices that share this sim.
+ */
+ cam_ccbq_resize(&devptr->ccbq,
+ (devptr->sim->max_dev_openings/devptr->sim->dev_count));
+ }
+ }
+ }
+ }
mtx_unlock(&xsoftc.xpt_topo_lock);
return (status);
@@ -3072,6 +3095,11 @@ xpt_run_dev_allocq(struct cam_eb *bus)
}
/* We may have more work. Attempt to reschedule. */
+ device->runs_token++;
+ if (device->runs_token >= device->ccbq.queue.array_size) {
+ device->runs_token = 0;
+ break;
+ }
xpt_schedule_dev_allocq(bus, device);
}
devq->alloc_queue.qfrozen_cnt[0]--;
@@ -3139,7 +3167,6 @@ xpt_run_dev_sendq(struct cam_eb *bus)
devq->send_openings--;
devq->send_active++;
- xpt_schedule_dev_sendq(bus, device);
if (work_ccb && (work_ccb->ccb_h.flags & CAM_DEV_QFREEZE) != 0){
/*
@@ -3170,6 +3197,13 @@ xpt_run_dev_sendq(struct cam_eb *bus)
*/
sim = work_ccb->ccb_h.path->bus->sim;
(*(sim->sim_action))(sim, work_ccb);
+
+ device->runs_token++;
+ if (device->runs_token >= device->ccbq.queue.array_size) {
+ device->runs_token = 0;
+ break;
+ }
+ xpt_schedule_dev_sendq(bus, device);
}
devq->send_queue.qfrozen_cnt[0]--;
}
@@ -4285,6 +4319,7 @@ xpt_alloc_device(struct cam_eb *bus, str
device->tag_delay_count = 0;
device->tag_saved_openings = 0;
device->refcount = 1;
+ device->runs_token = 0;
callout_init_mtx(&device->callout, bus->sim->mtx, 0);
/*
diff -rup cam/cam_xpt_internal.h cam/cam_xpt_internal.h
--- cam/cam_xpt_internal.h 2010-06-13 19:09:06.000000000 -0700
+++ cam/cam_xpt_internal.h 2012-03-21 13:57:45.000000000 -0700
@@ -118,6 +118,7 @@ struct cam_ed {
#define CAM_TAG_DELAY_COUNT 5
u_int32_t tag_saved_openings;
u_int32_t refcount;
+ u_int32_t runs_token;
struct callout callout;
};
diff -rup cam/scsi/scsi_da.c cam/scsi/scsi_da.c
--- cam/scsi/scsi_da.c 2010-06-13 19:09:06.000000000 -0700
+++ cam/scsi/scsi_da.c 2012-03-21 14:16:00.000000000 -0700
@@ -56,7 +56,13 @@ __FBSDID("$FreeBSD: src/sys/cam/scsi/scs
#include <cam/cam_ccb.h>
#include <cam/cam_periph.h>
#include <cam/cam_xpt_periph.h>
+#include <cam/cam_queue.h>
#include <cam/cam_sim.h>
+#include <cam/cam_xpt.h>
+#include <cam/cam_xpt_sim.h>
+#include <cam/cam_xpt_periph.h>
+#include <cam/cam_xpt_internal.h>
+#include <cam/cam_debug.h>
#include <cam/scsi/scsi_message.h>
@@ -1102,6 +1108,26 @@ dasysctlinit(void *context, int pending)
&softc->minimum_cmd_size, 0, dacmdsizesysctl, "I",
"Minimum CDB size");
+ SYSCTL_ADD_INT(&softc->sysctl_ctx,SYSCTL_CHILDREN(softc->sysctl_tree),
+ OID_AUTO, "outstanding_cmds", CTLTYPE_INT | CTLFLAG_RD,
+ &softc->outstanding_cmds, 0, "Outstanding CDB Cmds");
+
+ SYSCTL_ADD_INT(&softc->sysctl_ctx,SYSCTL_CHILDREN(softc->sysctl_tree),
+ OID_AUTO, "ccbq_devq_openings", CTLTYPE_INT | CTLFLAG_RD,
+ &periph->path->device->ccbq.devq_openings, 0, "CCBQ Dev Openings");
+
+ SYSCTL_ADD_INT(&softc->sysctl_ctx,SYSCTL_CHILDREN(softc->sysctl_tree),
+ OID_AUTO, "ccbq_array_size", CTLTYPE_INT | CTLFLAG_RW,
+ &periph->path->device->ccbq.queue.array_size, 0, "CCBQ Array Size");
+
+ SYSCTL_ADD_INT(&softc->sysctl_ctx,SYSCTL_CHILDREN(softc->sysctl_tree),
+ OID_AUTO, "sim_ccb_count", CTLTYPE_INT | CTLFLAG_RD,
+ &periph->sim->ccb_count, 0, "SIM CCB COUNT");
+
+ SYSCTL_ADD_INT(&softc->sysctl_ctx,SYSCTL_CHILDREN(softc->sysctl_tree),
+ OID_AUTO, "sim_devq_alloc_openings", CTLTYPE_INT | CTLFLAG_RD,
+ &periph->sim->devq->alloc_openings, 0, "SIM Devq Alloc Openings");
+
cam_periph_release(periph);
}
>Release-Note:
>Audit-Trail:
>Unformatted:
More information about the freebsd-bugs
mailing list