mfi(4) endless loop kernel output on attach

pluknet pluknet at gmail.com
Thu Oct 22 14:30:25 UTC 2009


2009/10/15 John Baldwin <jhb at freebsd.org>:
> On Thursday 15 October 2009 5:51:19 am pluknet wrote:
>> Hi.
>>
>> This is 7.2-R. Seen on IBM x3650M2.
>>
>> During the boot I get those endless looping kernel messages while on
>> mfi(4) attach phase.
>> It's getting more odd since 7.2 booted and worked fine on exactly this
>> server model
>> months ago (on different box though).. Any hints?
>
> We just had some boxes die like this (but spewing a different loop of messages
> on boot related to continuously scheduling patrol reads and consistency
> checks that finished immediately) at work.  We fixed them by swapping out the
> controller.  We might try stick them in a different box and reflashing them
> using mfiutil(8) to see if it's some sort of corrupted state that flashing
> the adapter fixes.
>
> In your case it looks lik the firmware keeps crashing and restarting.
>

Some more thoughts..

There was a problem I got with 'MegaCli -AdpBbuCmd -BbuLearn -aall' command.
On 6.2-R process slept on mfiwait wchan:

db> bt 14734
Tracing pid 14734 tid 100135 td 0xc93f8190
sched_switch(c93f8190,0,1) at sched_switch+0x143
mi_switch(1,0,c93f8190,f9a32acc,c06a43a4,...) at mi_switch+0x1ba
sleepq_switch(c8c6b0d0) at sleepq_switch+0x87
sleepq_wait(c8c6b0d0,0,c93f8190,c8c6b0d0,c8c25800,...) at sleepq_wait+0x5c
msleep(c8c6b0d0,c8c25954,4c,c090acbc,0) at msleep+0x269
mfi_wait_command(c8c25800,c8c6b0d0,0,0,cc382460,...) at mfi_wait_command+0xa8
mfi_ioctl(c8c31300,c1144d01,cc870a00,1,c93f8190,...) at mfi_ioctl+0x485
devfs_ioctl_f(c90a2750,c1144d01,cc870a00,c9048000,c93f8190) at
devfs_ioctl_f+0xaf
ioctl(c93f8190,f9a32d04) at ioctl+0x445
syscall(3b,3b,3b,0,bfbfedc0,...) at syscall+0x2bf
Xint0x80_syscall() at Xint0x80_syscall+0x1f
--- syscall (54, FreeBSD ELF32, ioctl), eip = 0x8177207, esp =
0xbfbfe88c, ebp = 0xbfbfe8b8 ---

Then:
mfi0: COMMAND 0xc8c6b0d0 TIMEOUT AFTER 51 SECONDS
mfi0: COMMAND 0xc8c61d50 TIMEOUT AFTER 49 SECONDS
mfi0: COMMAND 0xc8c61850 TIMEOUT AFTER 49 SECONDS


On 6.4-R MegaCli throws a page fault due to NULL deref
in mfi_data_cb():cm->cm_sg (see below).

There was past 6.4 backport mentioning
"fix some bugs in the API for the management ioctl."
With this patch I have no longer panic and/or locks.

Thanks to LSI now on 7.2-R (and on patched 6.4-R) it returns an error:
# ./MegaCli -AdpBbuCmd -BbuLearn -aall

Adapter 0: BBU Learn Failed

Exit Code: 0x32


db> bt
Tracing pid 43059 tid 101363 td 0xcf46e680
mfi_data_cb(c9cfae00,c9cc3e00,1,0) at mfi_data_cb+0x5e
bus_dmamap_load(c9cd7c80,0,caf86270,0,c0597240,c9cfae00,0) at
bus_dmamap_load+0x4a1
mfi_mapcmd(c9cc3800,c9cfae00) at mfi_mapcmd+0x31
mfi_startio(c9cc3800) at mfi_startio+0x9b
mfi_wait_command(c9cc3800,c9cfae00,0,0,caf86270,...) at mfi_wait_command+0x89
mfi_ioctl(c9cf7200,c1144d01,d3fb6200,1,cf46e680,...) at mfi_ioctl+0x52a
devfs_ioctl_f(d1a551b0,c1144d01,d3fb6200,cbf52c80,cf46e680) at
devfs_ioctl_f+0xaf
ioctl(cf46e680,fbd91d04) at ioctl+0x445
syscall(3b,3b,3b,0,bfbfedc0,...) at syscall+0x2bf
Xint0x80_syscall() at Xint0x80_syscall+0x1f
--- syscall (54, FreeBSD ELF32, ioctl), eip = 0x8177207, esp =
0xbfbfe88c, ebp = 0xbfbfe8b8

#9  0xc08cbb1a in calltrap () at /usr/src/sys/i386/i386/exception.s:139
#10 0xc059729e in mfi_data_cb (arg=0xc8a744b0, segs=0xc8a49e00, nsegs=1,
---Type <return> to continue, or q <return> to quit---
    error=0) at /usr/src/sys/dev/mfi/mfi.c:1488
#11 0xc08c7afd in bus_dmamap_load (dmat=0xc8a6f100, map=0xac89e000,
    buf=0xc8a5ac60, buflen=0, callback=0xc0597240 <mfi_data_cb>,
    callback_arg=0xc8a744b0, flags=0)
    at /usr/src/sys/i386/i386/busdma_machdep.c:733
#12 0xc059721d in mfi_mapcmd (sc=0xc8a49800, cm=0xc8a49e00)
    at /usr/src/sys/dev/mfi/mfi.c:1452
#13 0xc0597177 in mfi_startio (sc=0xc8a49800)
    at /usr/src/sys/dev/mfi/mfi.c:1436
#14 0xc0595f09 in mfi_wait_command (sc=0xc8a49800, cm=0xc8a744b0)
    at /usr/src/sys/dev/mfi/mfi.c:822
#15 0xc059840a in mfi_ioctl (dev=0xac89e000, cmd=0, arg=0xc8de8800 "", flag=1,
    td=0xc8a5ac60) at /usr/src/sys/dev/mfi/mfi.c:2061
#16 0xc06598b7 in devfs_ioctl_f (fp=0xc902dc18, com=3239333121,
    data=0xc8de8800, cred=0xc9052980, td=0xc8e2dd00)
    at /usr/src/sys/fs/devfs/devfs_vnops.c:480
#17 0xc06d3a11 in ioctl (td=0xc8e2dd00, uap=0xeb37bd04) at file.h:265

(kgdb) f 10
#10 0xc059729e in mfi_data_cb (arg=0xc8a744b0, segs=0xc8a49e00, nsegs=1,
    error=0) at /usr/src/sys/dev/mfi/mfi.c:1488
1488                            sgl->sg32[i].addr = segs[i].ds_addr;
(kgdb) list
1483                    return;
1484            }
1485
1486            if ((sc->mfi_flags & MFI_FLAGS_SG64) == 0) {
1487                    for (i = 0; i < nsegs; i++) {
1488                            sgl->sg32[i].addr = segs[i].ds_addr;
1489                            sgl->sg32[i].len = segs[i].ds_len;
1490                    }
1491            } else {
1492                    for (i = 0; i < nsegs; i++) {
(kgdb) p i
$1 = 0
(kgdb) p *segs
$3 = {ds_addr = 2457600, ds_len = 65536}
(kgdb) p sgl
$4 = (union mfi_sgl *) 0x0
(kgdb) p *cm
$6 = {cm_link = {tqe_next = 0x0, tqe_prev = 0xc8a49814}, cm_timestamp = 0,
  cm_sc = 0xc8a49800, cm_frame = 0xe8fee680, cm_frame_busaddr = 3748513408,
  cm_sense = 0xe904c780, cm_sense_busaddr = 3749103488, cm_dmamap = 0x0,
  cm_sg = 0x0, cm_data = 0xc8a5ac60, cm_len = 0, cm_total_frame_size = 0,
  cm_extra_frames = 0, cm_flags = 6, cm_aen_abort = 0, cm_complete = 0,
  cm_private = 0x0, cm_index = 15, cm_error = 0}


-- 
wbr,
pluknet


More information about the freebsd-stable mailing list