mptutil(8) segfault on IBM xSeries 3550
John Baldwin
jhb at freebsd.org
Thu Feb 18 15:24:04 UTC 2010
On Monday 15 February 2010 5:25:15 pm Charles Owens wrote:
> Charles Owens wrote:
> > Howdy,
> >
> > We're working with IBM hardware (xSeries 3550) that has an
> > mpt-based RAID controller... after initial success with testing the
> > mptutil utility, now operations other than "show adapter" and "show
> > volume" are resulting in segfaults.
> >
> > While it was working properly we created and removed volumes several
> > times, force-failed drives, and just generally put it through its
> > paces... and all seemed fine. Then, after a reboot, it suddenly started
> > failing with segfault as described, and nothing we do has helped to get
> > it out of this state (including trying to use the LSI in-BIOS manager to
> > create/delete volumes -- which in and of itself works fine).
> >
> > We found recent thread
> > http://docs.freebsd.org/cgi/mid.cgi?4B56CD4C.80503 and hoped that it
> > might somehow relate... and even tried the patch that John Baldwin
> > posted, but to no avail.
> >
> > Has anyone seen this behavior and/or have a suggested fix or workaround?
> >
> >
> > Here's the output of "mptutil show adapter":
> >
> > mpt0 Adapter:
> > Board Name: SR-BR10i
> > Board Assembly: L3-25116-01H
> > Chip Name: C1068E
> > Chip Revision: UNUSED
> > RAID Levels: RAID0, RAID1, RAID1E
> > RAID0 Stripes: 64K
> > RAID1E Stripes: 64K
> > RAID0 Drives/Vol: 1-10
> > RAID1 Drives/Vol: 2
> > RAID1E Drives/Vol: 3-10
> >
> >
> > This work is being done using FreeBSD 8.0-RELEASE-p2 + PAE.
> >
>
>
> I should add that the RAID controller in question is the IBM
> ServeRAID-BR10i SAS/SATA Controller which is based on the LSI 1068E
> processor, as described here:
> http://www-01.ibm.com/common/ssi/rep_ca/4/872/ENUSAG09-0104/index.html
Try this updated patch. It should fix the problems with 'mptutil show drives'
displaying all daX devices in the system rather than just the ones for the
mptX bus. I had incorrectly interpreted the XPT matches as being an AND
rather than an OR. This changes the code to first do a lookup for the logical
"path" (SCSI bus) for mptX devices and then do a second lookup to fetch any
daX devices on that path. I tested it on a machine with an mpt controller and
a USB disk. Unfortunately I wasn't able to test any of the RAID stuff, just
'show drives'. This mpt(4) controller doesn't support RAID either, so I was
also able to verify the fix you had already tested for cleaning up 'show
adapter' output in that case.
Index: mpt_cam.c
===================================================================
--- mpt_cam.c (revision 204004)
+++ mpt_cam.c (working copy)
@@ -56,15 +56,75 @@
return (xptfd);
}
+/* Fetch the path id of bus 0 for the opened mpt controller. */
+static int
+fetch_path_id(path_id_t *path_id)
+{
+ struct bus_match_pattern *b;
+ union ccb ccb;
+ size_t bufsize;
+
+ if (xpt_open() < 0)
+ return (ENXIO);
+
+ /* First, find the path id of bus 0 for this mpt controller. */
+ bzero(&ccb, sizeof(ccb));
+
+ ccb.ccb_h.func_code = XPT_DEV_MATCH;
+
+ bufsize = sizeof(struct dev_match_result) * 1;
+ ccb.cdm.num_matches = 0;
+ ccb.cdm.match_buf_len = bufsize;
+ ccb.cdm.matches = calloc(1, bufsize);
+
+ bufsize = sizeof(struct dev_match_pattern) * 1;
+ ccb.cdm.num_patterns = 1;
+ ccb.cdm.pattern_buf_len = bufsize;
+ ccb.cdm.patterns = calloc(1, bufsize);
+
+ /* Match mptX bus 0. */
+ ccb.cdm.patterns[0].type = DEV_MATCH_BUS;
+ b = &ccb.cdm.patterns[0].pattern.bus_pattern;
+ snprintf(b->dev_name, sizeof(b->dev_name), "mpt");
+ b->unit_number = mpt_unit;
+ b->bus_id = 0;
+ b->flags = BUS_MATCH_NAME | BUS_MATCH_UNIT | BUS_MATCH_BUS_ID;
+
+ if (ioctl(xptfd, CAMIOCOMMAND, &ccb) < 0) {
+ free(ccb.cdm.matches);
+ free(ccb.cdm.patterns);
+ return (errno);
+ }
+ free(ccb.cdm.patterns);
+
+ if (((ccb.ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) ||
+ (ccb.cdm.status != CAM_DEV_MATCH_LAST)) {
+ warnx("fetch_path_id got CAM error %#x, CDM error %d\n",
+ ccb.ccb_h.status, ccb.cdm.status);
+ free(ccb.cdm.matches);
+ return (EIO);
+ }
+
+ /* We should have exactly 1 match for the bus. */
+ if (ccb.cdm.num_matches != 1 ||
+ ccb.cdm.matches[0].type != DEV_MATCH_BUS) {
+ free(ccb.cdm.matches);
+ return (ENOENT);
+ }
+ *path_id = ccb.cdm.matches[0].result.bus_result.path_id;
+ free(ccb.cdm.matches);
+ return (0);
+}
+
int
mpt_query_disk(U8 VolumeBus, U8 VolumeID, struct mpt_query_disk *qd)
{
- struct bus_match_pattern *b;
struct periph_match_pattern *p;
struct periph_match_result *r;
union ccb ccb;
+ path_id_t path_id;
size_t bufsize;
- int i;
+ int error, i;
/* mpt(4) only handles devices on bus 0. */
if (VolumeBus != 0)
@@ -73,6 +133,11 @@
if (xpt_open() < 0)
return (ENXIO);
+ /* Find the path ID of bus 0. */
+ error = fetch_path_id(&path_id);
+ if (error)
+ return (error);
+
bzero(&ccb, sizeof(ccb));
ccb.ccb_h.func_code = XPT_DEV_MATCH;
@@ -85,25 +150,18 @@
ccb.cdm.match_buf_len = bufsize;
ccb.cdm.matches = calloc(1, bufsize);
- bufsize = sizeof(struct dev_match_pattern) * 2;
- ccb.cdm.num_patterns = 2;
+ bufsize = sizeof(struct dev_match_pattern) * 1;
+ ccb.cdm.num_patterns = 1;
ccb.cdm.pattern_buf_len = bufsize;
ccb.cdm.patterns = calloc(1, bufsize);
- /* Match mptX bus 0. */
- ccb.cdm.patterns[0].type = DEV_MATCH_BUS;
- b = &ccb.cdm.patterns[0].pattern.bus_pattern;
- snprintf(b->dev_name, sizeof(b->dev_name), "mpt");
- b->unit_number = mpt_unit;
- b->bus_id = 0;
- b->flags = BUS_MATCH_NAME | BUS_MATCH_UNIT | BUS_MATCH_BUS_ID;
-
/* Look for a "da" device at the specified target and lun. */
- ccb.cdm.patterns[1].type = DEV_MATCH_PERIPH;
- p = &ccb.cdm.patterns[1].pattern.periph_pattern;
+ ccb.cdm.patterns[0].type = DEV_MATCH_PERIPH;
+ p = &ccb.cdm.patterns[0].pattern.periph_pattern;
+ p->path_id = path_id;
snprintf(p->periph_name, sizeof(p->periph_name), "da");
p->target_id = VolumeID;
- p->flags = PERIPH_MATCH_NAME | PERIPH_MATCH_TARGET;
+ p->flags = PERIPH_MATCH_PATH | PERIPH_MATCH_NAME | PERIPH_MATCH_TARGET;
if (ioctl(xptfd, CAMIOCOMMAND, &ccb) < 0) {
i = errno;
@@ -122,25 +180,22 @@
}
/*
- * We should have exactly 2 matches, 1 for the bus and 1 for
- * the peripheral. However, if we only have 1 match and it is
- * for the bus, don't print an error message and return
- * ENOENT.
+ * We should have exactly 1 match for the peripheral.
+ * However, if we don't get a match, don't print an error
+ * message and return ENOENT.
*/
- if (ccb.cdm.num_matches == 1 &&
- ccb.cdm.matches[0].type == DEV_MATCH_BUS) {
+ if (ccb.cdm.num_matches == 0) {
free(ccb.cdm.matches);
return (ENOENT);
}
- if (ccb.cdm.num_matches != 2) {
- warnx("mpt_query_disk got %d matches, expected 2",
+ if (ccb.cdm.num_matches != 1) {
+ warnx("mpt_query_disk got %d matches, expected 1",
ccb.cdm.num_matches);
free(ccb.cdm.matches);
return (EIO);
}
- if (ccb.cdm.matches[0].type != DEV_MATCH_BUS ||
- ccb.cdm.matches[1].type != DEV_MATCH_PERIPH) {
- warnx("mpt_query_disk got wrong CAM matches");
+ if (ccb.cdm.matches[0].type != DEV_MATCH_PERIPH) {
+ warnx("mpt_query_disk got wrong CAM match");
free(ccb.cdm.matches);
return (EIO);
}
@@ -336,47 +391,44 @@
{
CONFIG_PAGE_IOC_2 *ioc2;
struct mpt_standalone_disk *disks;
- struct bus_match_pattern *b;
struct periph_match_pattern *p;
struct periph_match_result *r;
struct cam_device *dev;
union ccb ccb;
+ path_id_t path_id;
size_t bufsize;
u_int i;
- int count;
+ int count, error;
if (xpt_open() < 0)
return (ENXIO);
+ error = fetch_path_id(&path_id);
+ if (error)
+ return (error);
+
for (count = 100;; count+= 100) {
/* Try to fetch 'count' disks in one go. */
bzero(&ccb, sizeof(ccb));
ccb.ccb_h.func_code = XPT_DEV_MATCH;
- bufsize = sizeof(struct dev_match_result) * (count + 2);
+ bufsize = sizeof(struct dev_match_result) * (count + 1);
ccb.cdm.num_matches = 0;
ccb.cdm.match_buf_len = bufsize;
ccb.cdm.matches = calloc(1, bufsize);
- bufsize = sizeof(struct dev_match_pattern) * 2;
- ccb.cdm.num_patterns = 2;
+ bufsize = sizeof(struct dev_match_pattern) * 1;
+ ccb.cdm.num_patterns = 1;
ccb.cdm.pattern_buf_len = bufsize;
ccb.cdm.patterns = calloc(1, bufsize);
- /* Match mptX bus 0. */
- ccb.cdm.patterns[0].type = DEV_MATCH_BUS;
- b = &ccb.cdm.patterns[0].pattern.bus_pattern;
- snprintf(b->dev_name, sizeof(b->dev_name), "mpt");
- b->unit_number = mpt_unit;
- b->bus_id = 0;
- b->flags = BUS_MATCH_NAME | BUS_MATCH_UNIT | BUS_MATCH_BUS_ID;
-
/* Match any "da" peripherals. */
- ccb.cdm.patterns[1].type = DEV_MATCH_PERIPH;
- p = &ccb.cdm.patterns[1].pattern.periph_pattern;
+ ccb.cdm.patterns[0].type = DEV_MATCH_PERIPH;
+ p = &ccb.cdm.patterns[0].pattern.periph_pattern;
+ p->path_id = path_id;
snprintf(p->periph_name, sizeof(p->periph_name), "da");
- p->flags = PERIPH_MATCH_NAME;
+ p->flags = PERIPH_MATCH_PATH | PERIPH_MATCH_NAME;
if (ioctl(xptfd, CAMIOCOMMAND, &ccb) < 0) {
i = errno;
@@ -406,21 +458,16 @@
break;
}
- /*
- * We should have N + 1 matches, 1 for the bus and 1 for each
- * "da" device.
- */
- if (ccb.cdm.num_matches < 1) {
- warnx("mpt_fetch_disks didn't get any matches");
+ /* Shortcut if we don't have any "da" devices. */
+ if (ccb.cdm.num_matches == 0) {
free(ccb.cdm.matches);
- return (EIO);
+ *ndisks = 0;
+ *disksp = NULL;
+ return (0);
}
- if (ccb.cdm.matches[0].type != DEV_MATCH_BUS) {
- warnx("mpt_fetch_disks got wrong CAM matches");
- free(ccb.cdm.matches);
- return (EIO);
- }
- for (i = 1; i < ccb.cdm.num_matches; i++) {
+
+ /* We should have N matches, 1 for each "da" device. */
+ for (i = 0; i < ccb.cdm.num_matches; i++) {
if (ccb.cdm.matches[i].type != DEV_MATCH_PERIPH) {
warnx("mpt_fetch_disks got wrong CAM matches");
free(ccb.cdm.matches);
@@ -428,14 +475,6 @@
}
}
- /* Shortcut if we don't have any "da" devices. */
- if (ccb.cdm.num_matches == 1) {
- free(ccb.cdm.matches);
- *ndisks = 0;
- *disksp = NULL;
- return (0);
- }
-
/*
* Some of the "da" peripherals may be for RAID volumes, so
* fetch the IOC 2 page (list of RAID volumes) so we can
@@ -444,7 +483,7 @@
ioc2 = mpt_read_ioc_page(fd, 2, NULL);
disks = calloc(ccb.cdm.num_matches, sizeof(*disks));
count = 0;
- for (i = 1; i < ccb.cdm.num_matches; i++) {
+ for (i = 0; i < ccb.cdm.num_matches; i++) {
r = &ccb.cdm.matches[i].result.periph_result;
if (periph_is_volume(ioc2, r))
continue;
@@ -480,10 +519,9 @@
int
mpt_rescan_bus(int bus, int id)
{
- struct bus_match_pattern *b;
union ccb ccb;
path_id_t path_id;
- size_t bufsize;
+ int error;
/* mpt(4) only handles devices on bus 0. */
if (bus != -1 && bus != 0)
@@ -492,54 +530,12 @@
if (xpt_open() < 0)
return (ENXIO);
- /* First, find the path id of bus 0 for this mpt controller. */
- bzero(&ccb, sizeof(ccb));
+ error = fetch_path_id(&path_id);
+ if (error)
+ return (error);
- ccb.ccb_h.func_code = XPT_DEV_MATCH;
-
- bufsize = sizeof(struct dev_match_result) * 1;
- ccb.cdm.num_matches = 0;
- ccb.cdm.match_buf_len = bufsize;
- ccb.cdm.matches = calloc(1, bufsize);
-
- bufsize = sizeof(struct dev_match_pattern) * 1;
- ccb.cdm.num_patterns = 1;
- ccb.cdm.pattern_buf_len = bufsize;
- ccb.cdm.patterns = calloc(1, bufsize);
-
- /* Match mptX bus 0. */
- ccb.cdm.patterns[0].type = DEV_MATCH_BUS;
- b = &ccb.cdm.patterns[0].pattern.bus_pattern;
- snprintf(b->dev_name, sizeof(b->dev_name), "mpt");
- b->unit_number = mpt_unit;
- b->bus_id = 0;
- b->flags = BUS_MATCH_NAME | BUS_MATCH_UNIT | BUS_MATCH_BUS_ID;
-
- if (ioctl(xptfd, CAMIOCOMMAND, &ccb) < 0) {
- free(ccb.cdm.matches);
- free(ccb.cdm.patterns);
- return (errno);
- }
- free(ccb.cdm.patterns);
-
- if (((ccb.ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) ||
- (ccb.cdm.status != CAM_DEV_MATCH_LAST)) {
- warnx("mpt_rescan_bus got CAM error %#x, CDM error %d\n",
- ccb.ccb_h.status, ccb.cdm.status);
- free(ccb.cdm.matches);
- return (EIO);
- }
-
- /* We should have exactly 1 match for the bus. */
- if (ccb.cdm.num_matches != 1 ||
- ccb.cdm.matches[0].type != DEV_MATCH_BUS) {
- free(ccb.cdm.matches);
- return (ENOENT);
- }
- path_id = ccb.cdm.matches[0].result.bus_result.path_id;
- free(ccb.cdm.matches);
-
- /* Now perform the actual rescan. */
+ /* Perform the actual rescan. */
+ bzero(&ccb, sizeof(ccb));
ccb.ccb_h.path_id = path_id;
if (id == -1) {
ccb.ccb_h.func_code = XPT_SCAN_BUS;
Index: mpt_show.c
===================================================================
--- mpt_show.c (revision 204004)
+++ mpt_show.c (working copy)
@@ -78,6 +78,7 @@
CONFIG_PAGE_MANUFACTURING_0 *man0;
CONFIG_PAGE_IOC_2 *ioc2;
CONFIG_PAGE_IOC_6 *ioc6;
+ U16 IOCStatus;
int fd, comma;
if (ac != 1) {
@@ -108,7 +109,7 @@
free(man0);
- ioc2 = mpt_read_ioc_page(fd, 2, NULL);
+ ioc2 = mpt_read_ioc_page(fd, 2, &IOCStatus);
if (ioc2 != NULL) {
printf(" RAID Levels:");
comma = 0;
@@ -151,9 +152,11 @@
printf(" none");
printf("\n");
free(ioc2);
- }
+ } else if ((IOCStatus & MPI_IOCSTATUS_MASK) !=
+ MPI_IOCSTATUS_CONFIG_INVALID_PAGE)
+ warnx("mpt_read_ioc_page(2): %s", mpt_ioc_status(IOCStatus));
- ioc6 = mpt_read_ioc_page(fd, 6, NULL);
+ ioc6 = mpt_read_ioc_page(fd, 6, &IOCStatus);
if (ioc6 != NULL) {
display_stripe_map(" RAID0 Stripes",
ioc6->SupportedStripeSizeMapIS);
@@ -172,7 +175,9 @@
printf("-%u", ioc6->MaxDrivesIME);
printf("\n");
free(ioc6);
- }
+ } else if ((IOCStatus & MPI_IOCSTATUS_MASK) !=
+ MPI_IOCSTATUS_CONFIG_INVALID_PAGE)
+ warnx("mpt_read_ioc_page(6): %s", mpt_ioc_status(IOCStatus));
/* TODO: Add an ioctl to fetch IOC_FACTS and print firmware version. */
@@ -541,7 +546,8 @@
for (i = 0; i <= 0xff; i++) {
pinfo = mpt_pd_info(fd, i, &IOCStatus);
if (pinfo == NULL) {
- if (IOCStatus != MPI_IOCSTATUS_CONFIG_INVALID_PAGE)
+ if ((IOCStatus & MPI_IOCSTATUS_MASK) !=
+ MPI_IOCSTATUS_CONFIG_INVALID_PAGE)
warnx("mpt_pd_info(%d): %s", i,
mpt_ioc_status(IOCStatus));
continue;
--
John Baldwin
More information about the freebsd-hardware
mailing list