Still crashes in swapgeom_strategy and also sometimes in propagate_priority

Thu Sep 18 00:23:23 PDT 2003

Hello.

I'm experiencing frequent crashes on my FreeBSD box here. It's running 
FreeBSD 5.x CURRENT (CVSUP'ed yesterday (the 17th) around 1800 CET).

eivind at vimes:~ > uname -a
FreeBSD vimes.eivind 5.1-CURRENT FreeBSD 5.1-CURRENT #0: Thu Sep 18 
00:22:50 CEST 2003     root at vimes.eivind:/usr/obj/usr/src/sys/VIMES  i386
eivind at vimes:~ >

Here's the difference between the GENERIC kernel and my kernel:

eivind at vimes:/usr/src/sys/i386/conf > diff GENERIC VIMES
19c19
< # $FreeBSD: src/sys/i386/conf/GENERIC,v 1.391 2003/09/10 18:54:58 obrien 
Exp $
---
> # $FreeBSD: src/sys/i386/conf/GENERIC,v 1.390 2003/09/09 18:17:23 wpaul 
Exp $
25c25
< ident         GENERIC
---
> ident         VIMES
63,66c63,66
< options       INVARIANTS              #Enable calls of extra sanity 
checking
< options       INVARIANT_SUPPORT       #Extra sanity checks of internal 
structures, required by INVARIANTS
< options       WITNESS                 #Enable checks to detect deadlocks 
and cycles
< options       WITNESS_SKIPSPIN        #Don't run witness on spinlocks for 
speed
---
> #options      INVARIANTS              #Enable calls of extra sanity 
checking
> #options      INVARIANT_SUPPORT       #Extra sanity checks of internal 
structures, required by INVARIANTS
> #options      WITNESS                 #Enable checks to detect deadlocks 
and cycles
> #options      WITNESS_SKIPSPIN        #Don't run witness on spinlocks for 
speed
191d190
< device                bge             # Broadcom BCM570xx Gigabit Ethernet
206a206
> device                bge             # Broadcom BCM570xx Gigabit Ethernet
268a269,274
>
> # These options are a subset of the IPFILTER options.
> options       IPFILTER                #ipfilter support
> options       IPFILTER_LOG            #ipfilter logging
> options       IPFILTER_DEFAULT_BLOCK  #block all packets by default
>
eivind at vimes:/usr/src/sys/i386/conf >

I have compiled the kernel with debug info but I'm unable to produce a 
crash dump. Here's what I see on the screen when it crashes in 
swapgeom_strategy:

-START-
Fatal trap 12: page fault while in kernel mode
fault virtual address   = 0x40
fault code              = supervisor write, page not present
instruction pointer     = 0x8:0xc04ad20c
stack pointer           = 0x10:0xcaf23a08
frame pointer           = 0x10:0xcaf23a20
code segment            = base 0x0, limit 0xfffff, type 0x1b
                        = DPL 0, pres 1, def32 1, gran 1
processor eflags        = interrupt enabled, resume, IOPL = 0
current process         = 6 (pagedaemon)
kernel: type 12 trap, code=0
Stopped at      swapgeom_strategy+0x3c: movl   %edi,0x40(%eax)
db> show reg
cs                 0x8
ds                0x10
es                0x10
fs                0x18
ss                0x10
eax                  0
ecx                  0
edx                0x4
ebx         0xc1f8eac0
esp         0xcaf23a08
ebp         0xcaf23a20
esi         0xc5ab4228
edi         0xc5ab4228
eip         0xc04ad20c  swapgeom_strategy+0x3c
efl            0x10246
dr0                  0
dr1                  0
dr2                  0
dr3                  0
dr4         0xffff0ff0
dr5              0x400
dr6         0xffff0ff0
dr7              0x400
swapgeom_strategy+0x3c: movl    %edi,0x40(%eax)
db> trace
swapgeom_strategy(c5ab4228,c1f8eac0,0,0,f) at swapgeom_strategy+0x3c
swp_pager_strategy(c5ab4228,200,0,3c6f,0) at swp_pager_strategy+0xc5
swap_pager_putpages(c0838c24,caf23b44,10,0,caf23ac0) at
swap_pager_putpages+0x452
vm_pageout_flush(caf23b44,10,0,1,1) at vm_pageout_flush+0x18b
vm_pageout_clean(c0b78ea8,0,0,0,0) at vm_pageout_clean+0x2cd
vm_pageout_scan(0,c0636420,44,c0569dbc,1f4) at vm_pageout_scan+0x73f
vm_pageout(0,caf23d48,0,0,0) at vm_pageout+0x368
fork_exit(c04c2fa0,0,caf23d48) at fork_exit+0xb1
fork_trampoline() at fork_trampoline+0x8
-- trap 0x1, eip = 0, esp = 0xcaf23d7c, ebp = 0 ---
db> panic
panic: from debugger
Debugger("panic")

Fatal trap 3: breakpoint instruction fault while in kernel mode
instruction pointer     = 0x8:0xc04ee154
stack pointer           = 0x10:0xcaf23780
frame pointer           = 0x10:0xcaf2378c
code segment            = base 0x0, limit 0xfffff, type 0x1b
                        = DPL 0, pres 1, def32 1, gran 1
processor eflags        = IOPL = 0
currnet process         = 6 (pagedaemon)
Stopped at      swapgeom_strategy+0x3c: movl    %edi,0x40(%eax)
db> panic
panic: from debugger
Uptime: .......
All mbufs or mbuf clusters exhausted, please see tuning(7).
All mbufs or mbuf clusters exhausted, please see tuning(7).
All mbufs or mbuf clusters exhausted, please see tuning(7).
...etc...
-STOP-

I've only seen this "All mbufs or mbuf clusters..." message one time - 
normally it just hangs there after a message telling me it's going to dump 
192MB of RAM to disk (which it never does).

I've loaded the debug kernel into gdb and ran "l *swapgeom_strategy+0x3c" 
(I'm no C programmer/kernel guru, so if anyone know how I can get more 
information they'll have to tell me how. :)

eivind at vimes:~/tmp/debug/20030918 > gdb -k kernel.debug
GNU gdb 5.2.1 (FreeBSD)
Copyright 2002 Free Software Foundation, Inc.
GDB is free software, covered by the GNU General Public License, and you are
welcome to change it and/or distribute copies of it under certain 
conditions.
Type "show copying" to see the conditions.
There is absolutely no warranty for GDB.  Type "show warranty" for details.
This GDB was configured as "i386-undermydesk-freebsd"...
(kgdb) l *swapgeom_strategy+0x3c
0xc04ad20c is in swapgeom_strategy (/usr/src/sys/vm/swap_pager.c:2388).
2383                    bp->b_ioflags |= BIO_ERROR;
2384                    bufdone(bp);
2385                    return;
2386            }
2387            bio = g_clone_bio(&bp->b_io);
2388            bio->bio_caller2 = bp;
2389            bio->bio_offset = (bp->b_blkno - sp->sw_first) * PAGE_SIZE;
2390            bio->bio_length = bp->b_bcount;
2391            bio->bio_done = swapgeom_done;
2392            g_io_request(bio, cp);
(kgdb)

Here's the crash in propagate_priority:

-START-
Fatal trap 12: page fault while in kernel mode
fault virtual address   = 0x24
fault code              = supervisor read, page not present
instruction pointer     = 0x8:0xc0346b2b
stack pointer           = 0x10:0xcaec4c38
frame pointer           = 0x10:0xcaec4c4c
code segment            = base 0x0, limit 0xfffff, type 0x1b
                        = DPL 0, pres 1, def32 1, gran 1
processor eflags        = interrupt enabled, resume, IOPL = 0
current process         = 12 (swi8: tty:sio clock)
kernel: type 12 trap, code=0
Stopped at      propagate_priority+0x8b:    cmpl    0x24(%ebx),%ecx
db> show reg
cs                 0x8
ds          0xc0d30010
es          0xc0350010  getrusage+0x150
fs          0xcaec0018
ss                0x10
eax               0x24
ecx         0xc0d3eab0
edx         0xc0566311
ebx                  0
esp         0xcaec4c38
ebp         0xcaec4c4c
esi               0x24
edi                  0
eip         0xc0346b2b  propagate_priority+0x8b
efl            0x10293
dr0                  0
dr1                  0
dr2                  0
dr3                  0
dr4         0xffff0ff0
dr5              0x400
dr6         0xffff0ff0
dr7              0x400
propagate_priority+0x8b:    cmpl    0x24(%ebx),%ecx
db> trace
propagate_priority(c0d37980,c0627ce0,c0d39600,0,c0d379a0) at
propagate_priority+0x8b
_mtx_lock_sleep(c0625260,0,0,0,c0420ae0) at _mtx_lock_sleep+0x259
softclock(0,0,0,0,c0d36974) at softclock+0x250
ithread_loop(c0d35280,caec4d48,0,0,0) at ithread_loop+0x1d8
fork_exit(c033b850,c0d35280,caec4d48) at fork_exit+0xb1
fork_trampoline() at fork_trampoline+0x8
-- trap 0x1, eip = 0, esp = 0xcaec4d7c, ebp = 0 ---
db> panic
panic: from debugger
Debugger("panic")

Fatal trap 3: breakpoint instruction fault while in kernel mode
instruction pointer     = 0x8:0xc04ee154
stack pointer           = 0x10:0xcaec49ec
frame pointer           = 0x10:0xcaec49f8
code segment            = base 0x0, limit 0xfffff, type 0x1b
                        = DPL 0, pres 1, def32 1, gran 1
processor eflags        = IOPL = 0
currnet process         = 12 (swi8: tty:sio clock)
Stopped at      propagate_priority+0x8b:    cmpl    0x24(%ebx),%ecx
db>
-STOP-

And here's the output from gdb:

(kgdb) l *propagate_priority+0x8b
0xc0346b2b is in propagate_priority (/usr/src/sys/kern/kern_mutex.c:178).
173
174                     /*
175                      * Check if the thread needs to be moved up on
176                      * the blocked chain
177                      */
178                     if (td == TAILQ_FIRST(&m->mtx_blocked)) {
179                             continue;
180                     }
181
182                     td1 = TAILQ_PREV(td, threadqueue, td_lockq);
(kgdb)

Does anyone have any suggestions as to what the problem might be? For the 
record, I've seen the exact same crashes with kernel+world built from 
source around the 7th and 15th of September as well.

-- 
Eivind Olsen
eivind at aminor.no