kern/124670: large file operation on RAID cause many GEOM errors - crash

Chameeya Software Services Ltd. chameeyass at hotmail.com
Fri Jun 20 11:31:15 UTC 2008







Okay managed to build a debug kernel..took two hours!

and started kgdb.. and this is what I got

[<code>]
GEOM_MIRROR: Device dat: rebuilding provider ad4 stopped.
Waiting (max 60 seconds) for system process `vnlru' to stop...done
Waiting (max 60 seconds) for system process `bufdaemon' to stop...GEOM_MIRROR: Device dat: provider mirror/dat destroyed.


Fatal trap 12: page fault while in kernel mode
cpuid = 0; apic id = 00
fault virtual address   = 0xf000fff0
fault code              = supervisor read, page not present
instruction pointer     = 0x20:0xc0747f04
stack pointer           = 0x28:0xd92dfc3c
frame pointer           = 0x28:0xd92dfc54
code segment            = base 0x0, limit 0xfffff, type 0x1b
                        = DPL 0, pres 1, def32 1, gran 1
processor eflags        = interrupt enabled, resume, IOPL = 0
current process         = 3 (g_up)
trap number             = 12
panic: page fault
cpuid = 0
Uptime: 8m38s
Physical memory: 627 MB
Dumping 35 MB: 20 4

#0  doadump () at pcpu.h:195
195             __asm __volatile("movl %%fs:0,%0" : "=r" (td));
(kgdb) backtrace
#0  doadump () at pcpu.h:195
#1  0xc0754457 in boot (howto=260) at /usr/src/sys/kern/kern_shutdown.c:409
#2  0xc0754719 in panic (fmt=Variable "fmt" is not available.
) at /usr/src/sys/kern/kern_shutdown.c:563
#3  0xc0a4905c in trap_fatal (frame=0xd92dfbfc, eva=4026597360)
    at /usr/src/sys/i386/i386/trap.c:899
#4  0xc0a492e0 in trap_pfault (frame=0xd92dfbfc, usermode=0, eva=4026597360)
    at /usr/src/sys/i386/i386/trap.c:812
#5  0xc0a49c8c in trap (frame=0xd92dfbfc) at /usr/src/sys/i386/i386/trap.c:490
#6  0xc0a2fc0b in calltrap () at /usr/src/sys/i386/i386/exception.s:139
#7  0xc0747f04 in _mtx_lock_sleep (m=0x58, tid=3275608064, opts=0, 
    file=0xc0d18eda "/usr/src/sys/modules/geom/geom_mirror/../../../geom/mirror/g_mirror.c", line=1010) at /usr/src/sys/kern/kern_mutex.c:335
#8  0xc0748392 in _mtx_lock_flags (m=0x58, opts=0, 
    file=0xc0d18eda "/usr/src/sys/modules/geom/geom_mirror/../../../geom/mirror/g_mirror.c", line=1010) at /usr/src/sys/kern/kern_mutex.c:186
#9  0xc0d0efca in ?? ()
#10 0x00000058 in ?? ()
#11 0x00000000 in ?? ()
#12 0xc0d18eda in ?? ()
#13 0x000003f2 in ?? ()
#14 0xc0d0ef80 in ?? ()
#15 0xc3665210 in ?? ()
#16 0xc33dd000 in ?? ()
---Type <return> to continue, or q <return> to quit---
#17 0xd92dfcb8 in ?? ()
#18 0xc07b3c9d in biodone (bp=0x58) at /usr/src/sys/kern/vfs_bio.c:3009
Previous frame identical to this frame (corrupt stack?)
(kgdb) 
(kgdb) list *0xc0747f04
0xc0747f04 is in _mtx_lock_sleep (/usr/src/sys/kern/kern_mutex.c:337).
332                      */
333                     v = m->mtx_lock;
334                     if (v != MTX_UNOWNED) {
335                             owner = (struct thread *)(v & ~MTX_FLAGMASK);
336     #ifdef ADAPTIVE_GIANT
337                             if (TD_IS_RUNNING(owner)) {
338     #else
339                             if (m != &Giant && TD_IS_RUNNING(owner)) {
340     #endif
341                                     if (LOCK_LOG_TEST(&m->lock_object, 0))
(kgdb) 
[</CODE>]

That was one core. and this is the other
[<CODE>]
Unread portion of the kernel message buffer:


Fatal trap 12: page fault while in kernel mode
cpuid = 0; apic id = 00
fault virtual address   = 0xc0
fault code              = supervisor read, page not present
instruction pointer     = 0x20:0xc06e9331
stack pointer           = 0x28:0xdaab0a5c
frame pointer           = 0x28:0xdaab0a5c
code segment            = base 0x0, limit 0xfffff, type 0x1b
                        = DPL 0, pres 1, def32 1, gran 1
processor eflags        = interrupt enabled, resume, IOPL = 0
current process         = 1065 (Thunar)
trap number             = 12
panic: page fault
cpuid = 0
Uptime: 1h24m0s
Physical memory: 627 MB
Dumping 62 MB: 47 31 15

#0  doadump () at pcpu.h:195
195             __asm __volatile("movl %%fs:0,%0" : "=r" (td));
(kgdb) backtrace
#0  doadump () at pcpu.h:195
#1  0xc0754457 in boot (howto=260) at /usr/src/sys/kern/kern_shutdown.c:409
#2  0xc0754719 in panic (fmt=Variable "fmt" is not available.
) at /usr/src/sys/kern/kern_shutdown.c:563
#3  0xc0a4905c in trap_fatal (frame=0xdaab0a1c, eva=192)
    at /usr/src/sys/i386/i386/trap.c:899
#4  0xc0a492e0 in trap_pfault (frame=0xdaab0a1c, usermode=0, eva=192)
    at /usr/src/sys/i386/i386/trap.c:812
#5  0xc0a49c8c in trap (frame=0xdaab0a1c) at /usr/src/sys/i386/i386/trap.c:490
#6  0xc0a2fc0b in calltrap () at /usr/src/sys/i386/i386/exception.s:139
#7  0xc06e9331 in dev2udev (x=0xc3653200)
    at /usr/src/sys/fs/devfs/devfs_vnops.c:1325
#8  0xc095e1fb in ufs_getattr (ap=0xdaab0aa0)
    at /usr/src/sys/ufs/ufs/ufs_vnops.c:401
#9  0xc0a5d8e2 in VOP_GETATTR_APV (vop=0xc0b93c60, a=0xdaab0aa0)
    at vnode_if.c:530
#10 0xc07d9b69 in vn_stat (vp=0xc3b4cbb0, sb=0xdaab0b94, 
    active_cred=0xc3a40e00, file_cred=0x0, td=0xc3adfc60) at vnode_if.h:286
#11 0xc07d08d5 in kern_lstat (td=0xc3adfc60, 
    path=0x29c21440 <Address 0x29c21440 out of bounds>, pathseg=UIO_USERSPACE, 
    sbp=0xdaab0c18) at /usr/src/sys/kern/vfs_syscalls.c:2165
#12 0xc07d099f in lstat (td=0xc3adfc60, uap=0xdaab0cfc)
    at /usr/src/sys/kern/vfs_syscalls.c:2144
#13 0xc0a49635 in syscall (frame=0xdaab0d38)
---Type <return> to continue, or q <return> to quit---
    at /usr/src/sys/i386/i386/trap.c:1035
#14 0xc0a2fc70 in Xint0x80_syscall () at /usr/src/sys/i386/i386/exception.s:196
#15 0x00000033 in ?? ()
Previous frame inner to this frame (corrupt stack?)
(kgdb) 
(kgdb) list *0xc06e9331
0xc06e9331 is in dev2udev (/usr/src/sys/fs/devfs/devfs_vnops.c:1325).
1320    dev_t
1321    dev2udev(struct cdev *x)
1322    {
1323            if (x == NULL)
1324                    return (NODEV);
1325            return (x->si_priv->cdp_inode);
1326    }
1327
1328    static struct fileops devfs_ops_f = {
1329            .fo_read =      devfs_read_f,
[</CODE>]

A third core
[<CODE>]
Unread portion of the kernel message buffer:
ad4: FAILURE - device detached
subdisk4: detached
ad4: detached
GEOM_MIRROR: Device dat: provider ad4 disconnected.
GEOM_MIRROR: Device dat: provider mirror/dat destroyed.
GEOM_MIRROR: Device dat destroyed.
g_vfs_done():mirror/dats1d[WRITE(offset=3862298624, length=16384)]error = 6
g_vfs_done():mirror/dats1d[WRITE(offset=4432019456, length=16384)]error = 6
panic: softdep_move_dependencies: need merge code
cpuid = 0
Uptime: 5m34s
Physical memory: 627 MB
Dumping 111 MB: 96 80 64 48 32 16

#0  doadump () at pcpu.h:195
195             __asm __volatile("movl %%fs:0,%0" : "=r" (td));
(kgdb) backtrace
#0  doadump () at pcpu.h:195
#1  0xc0754457 in boot (howto=260) at /usr/src/sys/kern/kern_shutdown.c:409
#2  0xc0754719 in panic (fmt=Variable "fmt" is not available.
) at /usr/src/sys/kern/kern_shutdown.c:563
#3  0xc0944e44 in softdep_move_dependencies (oldbp=0xd01b6da8, 
    newbp=0xd0178730) at /usr/src/sys/ufs/ffs/ffs_softdep.c:997
#4  0xc094c816 in ffs_backgroundwritedone (bp=0xd01b6da8)
    at /usr/src/sys/ufs/ffs/ffs_vfsops.c:1669
#5  0xc07b9b77 in bufdone (bp=0xd01b6da8) at /usr/src/sys/kern/vfs_bio.c:3167
#6  0xc0708928 in g_vfs_done (bip=0xc39dfd68)
    at /usr/src/sys/geom/geom_vfs.c:87
#7  0xc07b3c9d in biodone (bp=0xc39dfd68) at /usr/src/sys/kern/vfs_bio.c:3009
#8  0xc0704ddf in g_io_schedule_up (tp=0xc33dd000)
    at /usr/src/sys/geom/geom_io.c:587
#9  0xc070512e in g_up_procbody () at /usr/src/sys/geom/geom_kern.c:95
#10 0xc0734479 in fork_exit (callout=0xc07050c0 <g_up_procbody>, arg=0x0, 
    frame=0xd92dfd38) at /usr/src/sys/kern/kern_fork.c:781
#11 0xc0a2fc80 in fork_trampoline () at /usr/src/sys/i386/i386/exception.s:205
(kgdb) 
[</CODE>]

Hope that helps someone out

I think I may have another core as well. similar stuff I think.



Salik.
_________________________________________________________________
Great deals on almost anything at eBay.co.uk. Search, bid, find and win on eBay today!
http://clk.atdmt.com/UKM/go/msnnkmgl0010000004ukm/direct/01/


More information about the freebsd-bugs mailing list