diskio / filesystem related deadlock on SMP 6.0-STABLE machine.

Niki Denev nike_d at cytexbg.com
Thu Jan 26 00:38:53 PST 2006


As per Don Lewis's advice i rebuilt my kernel
with  DEBUG_LOCKS, DEBUG_VFS_LOCKS,
WITNESS and INVARIANTS.

I was hoping to use this to extract more info about
the problem, but this time the machine panics on boot while
trying to enable the quotas.
I will try later with quotas disabled, maybe the problem is in
the QUOTA support?

Here is what i was able to get with the debugger on the serial
console : 

[...boot messages...]
Enabling quotas:KDB: stack backtrace:
vfs_badlock() at vfs_badlock+0x95
assert_vop_locked() at assert_vop_locked+0x77
quotaon() at quotaon+0x181
ufs_quotactl() at ufs_quotactl+0x150
quotactl() at quotactl+0xfa
syscall() at syscall+0x4b2
Xfast_syscall() at Xfast_syscall+0xa8
--- syscall (148, FreeBSD ELF64, quotactl), rip = 0x80068657c, rsp = 
0x7fffffffeda8, rbp = 0x1 ---
quotaon: 0xffffff00c33a1b18 is not locked but should be
KDB: enter: lock violation
[thread pid 398 tid 100116 ]
Stopped at      kdb_enter+0x2f: nop

db> bt
Tracing pid 398 tid 100116 td 0xffffff00c371c720
kdb_enter() at kdb_enter+0x2f
assert_vop_locked() at assert_vop_locked+0x77
quotaon() at quotaon+0x181
ufs_quotactl() at ufs_quotactl+0x150
quotactl() at quotactl+0xfa
syscall() at syscall+0x4b2
Xfast_syscall() at Xfast_syscall+0xa8
--- syscall (148, FreeBSD ELF64, quotactl), rip = 0x80068657c, rsp = 
0x7fffffffeda8, rbp = 0 ---

db> ps
  pid   proc     uid  ppid  pgrp  flag   stat  wmesg    wchan  cmd
  398 ffffff00c39bd000    0   392    73 0004002 [CPU 1] quotaon
  392 ffffff006b68a9c0    0    73    73 0000002 [SLPQ wait 0xffffff006b68a9c0]
[SLP] sh
  347 ffffff011e647680    0     1   347 0000000 [RUNQ] syslogd
  309 ffffff00c3cba680    0     1   309 0000000 [SLPQ select 
0xffffffff808d7e70][SLP] devd
  176 ffffff00c3cba9c0    0     1   176 0000000 [SLPQ pause 
0xffffff00c3cbaa28][SLP] adjkerntz
   73 ffffff006bb6f9c0    0     1    73 0004002 [SLPQ wait 0xffffff006bb6f9c0]
[SLP] sh
   72 ffffff006bcd0000    0     0     0 0000204 [SLPQ m:w1 0xffffff008dd13200]
[SLP] g_mirror gm2
   71 ffffff006bcd0340    0     0     0 0000204 [SLPQ m:w1 0xffffff011d117600]
[SLP] g_mirror gm1
   70 ffffff006bcd0680    0     0     0 0000204 [SLPQ m:w2 0xffffff000235e400]
[SLP] g_mirror gm0
   69 ffffff011ecc9000    0     0     0 0000204 [SLPQ - 0xffffffffb5ec6be4]
[SLP] schedcpu
   68 ffffff011ea5c000    0     0     0 0000204 [SLPQ - 0xffffffff808e3138]
[SLP] nfsiod 3
   67 ffffff011ea5c340    0     0     0 0000204 [SLPQ - 0xffffffff808e3130]
[SLP] nfsiod 2
   66 ffffff011ea5c680    0     0     0 0000204 [SLPQ - 0xffffffff808e3128]
[SLP] nfsiod 1
   65 ffffff011ea5c9c0    0     0     0 0000204 [SLPQ - 0xffffffff808e3120]
[SLP] nfsiod 0
   64 ffffff011ebbf000    0     0     0 0000204 [SLPQ syncer 
0xffffffff8084dea0][SLP] syncer
   63 ffffff011ebbf340    0     0     0 0000204 [SLPQ vlruwt 
0xffffff011ebbf340][SLP] vnlru
   62 ffffff011ebbf680    0     0     0 0000204 [SLPQ psleep 
0xffffffff808d8738][SLP] bufdaemon
   61 ffffff011ebbf9c0    0     0     0 000020c [SLPQ pgzero 
0xffffffff808eb5e0][SLP] pagezero
   60 ffffff011eba0000    0     0     0 0000204 [SLPQ psleep 
0xffffffff808eacac][SLP] vmdaemon
   59 ffffff011eba0340    0     0     0 0000204 [SLPQ psleep 
0xffffffff808eac5c][SLP] pagedaemon
   58 ffffff011eba0680    0     0     0 0000204 [SLPQ - 0xffffff011d723248]
[SLP] fdc0
   57 ffffff011eba09c0    0     0     0 0000204 [IWAIT] swi0: sio
   56 ffffff011f599340    0     0     0 0000204 [SLPQ idle 0xffffffff885f4000]
[SLP] aic_recovery1
   55 ffffff011f599680    0     0     0 0000204 [SLPQ idle 0xffffffff885f0000]
[SLP] aic_recovery0
   54 ffffff011f5999c0    0     0     0 0000204 [SLPQ usbevt 
0xffffffff885ee420][SLP] usb1
   53 ffffff011f5de000    0     0     0 0000204 [SLPQ usbtsk 
0xffffffff80848fd0][SLP] usbtask
   52 ffffff011f5de340    0     0     0 0000204 [SLPQ usbevt 
0xffffffff885ec420][SLP] usb0
   51 ffffff011f5de680    0     0     0 0000204 [IWAIT] swi5:+
    9 ffffff011f5de9c0    0     0     0 0000204 [SLPQ - 0xffffff0000d3e600]
[SLP] kqueue taskq
   50 ffffff011f5df000    0     0     0 0000204 [IWAIT] swi2: cambio
    8 ffffff011f5df340    0     0     0 0000204 [SLPQ - 0xffffff0000d3e900]
[SLP] acpi_task2
    7 ffffff011f5df680    0     0     0 0000204 [SLPQ - 0xffffff0000d3e900]
[SLP] acpi_task1
    6 ffffff011f5df9c0    0     0     0 0000204 [SLPQ - 0xffffff0000d3e900]
[SLP] acpi_task0
   49 ffffff011f5879c0    0     0     0 0000204 [IWAIT] swi6: task queue
   48 ffffff011f557000    0     0     0 0000204 [IWAIT] swi6:+
    5 ffffff011f557340    0     0     0 0000204 [SLPQ - 0xffffff0000d3ee00]
[SLP] thread taskq
   47 ffffff011f557680    0     0     0 0000204 [SLPQ - 0xffffffff80846bc0]
[SLP] yarrow
    4 ffffff011f5579c0    0     0     0 0000204 [SLPQ - 0xffffffff80849908]
[SLP] g_down
    3 ffffff011f558000    0     0     0 0000204 [SLPQ - 0xffffffff80849900]
[SLP] g_up
    2 ffffff011f558340    0     0     0 0000204 [SLPQ - 0xffffffff808498f0]
[SLP] g_event
   46 ffffff011f558680    0     0     0 0000204 [IWAIT] swi1: net
   45 ffffff011f5589c0    0     0     0 0000204 [IWAIT] swi3: vm
   44 ffffff011f599000    0     0     0 000020c [CPU 0] swi4: clock sio
   43 ffffff011f577680    0     0     0 0000204 [IWAIT] irq31:
   42 ffffff011f5779c0    0     0     0 0000204 [IWAIT] irq30:
   41 ffffff011f564000    0     0     0 0000204 [IWAIT] irq29:
   40 ffffff011f564340    0     0     0 0000204 [IWAIT] irq28:
   39 ffffff011f564680    0     0     0 0000204 [IWAIT] irq27: bge1
   38 ffffff011f5649c0    0     0     0 0000204 [IWAIT] irq26: bge0
   37 ffffff011f587000    0     0     0 0000204 [IWAIT] irq25: ahd1
   36 ffffff011f587340    0     0     0 0000204 [IWAIT] irq24: ahd0
   35 ffffff011f587680    0     0     0 0000204 [IWAIT] irq23:
   34 ffffff011f584680    0     0     0 0000204 [IWAIT] irq22:
   33 ffffff011f5849c0    0     0     0 0000204 [IWAIT] irq21:
   32 ffffff011f595000    0     0     0 0000204 [IWAIT] irq20:
   31 ffffff011f595340    0     0     0 0000204 [IWAIT] irq19: ohci0 ohci1
   30 ffffff011f595680    0     0     0 0000204 [IWAIT] irq18:
   29 ffffff011f5959c0    0     0     0 0000204 [IWAIT] irq17:
   28 ffffff011f577000    0     0     0 0000204 [IWAIT] irq16:
   27 ffffff011f577340    0     0     0 0000204 [IWAIT] irq15: ata1
   26 ffffff011f5da9c0    0     0     0 0000204 [IWAIT] irq14: ata0
   25 ffffff011f582000    0     0     0 0000204 [IWAIT] irq13:
   24 ffffff011f582340    0     0     0 0000204 [IWAIT] irq12:
   23 ffffff011f582680    0     0     0 0000204 [IWAIT] irq11:
   22 ffffff011f5829c0    0     0     0 0000204 [IWAIT] irq10:
   21 ffffff011f584000    0     0     0 0000204 [IWAIT] irq9: acpi0
   20 ffffff011f584340    0     0     0 0000204 [IWAIT] irq8:
   19 ffffff011f596340    0     0     0 0000204 [IWAIT] irq7: ppc0
   18 ffffff011f596680    0     0     0 0000204 [IWAIT] irq6: fdc0
   17 ffffff011f5969c0    0     0     0 0000204 [IWAIT] irq5:
   16 ffffff011f5da000    0     0     0 0000204 [IWAIT] irq4: sio0
   15 ffffff011f5da340    0     0     0 0000204 [IWAIT] irq3:
   14 ffffff011f5da680    0     0     0 0000204 [IWAIT] irq0:
   13 ffffff011f5cf000    0     0     0 0000204 [IWAIT] irq1: atkbd0
   12 ffffff011f5cf340    0     0     0 000020c [Can run] idle: cpu0
   11 ffffff011f5cf680    0     0     0 000020c [Can run] idle: cpu1
    1 ffffff011f5cf9c0    0     0     1 0004200 [SLPQ wait 0xffffff011f5cf9c0]
[SLP] init
   10 ffffff011f596000    0     0     0 0000204 [SLPQ ktrace 
0xffffffff8084aa40][SLP] ktrace
    0 ffffffff80849a60    0     0     0 0000200 [IWAIT] swapper

db> show lockedvnods
Locked vnodes

db> show lockedbufs
db>

db> show locks
exclusive sleep mutex Giant r = 0 (0xffffffff8084d5c0) locked 
@ /usr/src/sys/kern/vfs_lookup.c:197

db> alltrace
Tracing command quotaon pid 398 tid 100116 td 0xffffff00c371c720
kdb_enter() at kdb_enter+0x2f
assert_vop_locked() at assert_vop_locked+0x77
quotaon() at quotaon+0x181
ufs_quotactl() at ufs_quotactl+0x150
quotactl() at quotactl+0xfa
syscall() at syscall+0x4b2
Xfast_syscall() at Xfast_syscall+0xa8
--- syscall (148, FreeBSD ELF64, quotactl), rip = 0x80068657c, rsp = 
0x7fffffffeda8, rbp = 0 ---

Tracing command sh pid 392 tid 100087 td 0xffffff006b84cbe0
sched_switch() at sched_switch+0x13a
mi_switch() at mi_switch+0x1d2
sleepq_switch() at sleepq_switch+0xdd
sleepq_wait_sig() at sleepq_wait_sig+0xe
msleep() at msleep+0x35c
kern_wait() at kern_wait+0x353
wait4() at wait4+0x38
syscall() at syscall+0x4b2
Xfast_syscall() at Xfast_syscall+0xa8
--- syscall (7, FreeBSD ELF64, wait4), rip = 0x80093e02c, rsp = 
0x7fffffffcc08, rbp = 0x49 ---

Tracing command syslogd pid 347 tid 100074 td 0xffffff011e666260
sched_switch() at sched_switch+0x13a
mi_switch() at mi_switch+0x1d2
critical_exit() at critical_exit+0xb0
lapic_handle_timer() at lapic_handle_timer+0xba
Xtimerint() at Xtimerint+0x76
_mtx_lock_flags() at _mtx_lock_flags+0xa3
giant_read() at giant_read+0x37
devfs_read_f() at devfs_read_f+0x6c
dofileread() at dofileread+0x94
kern_readv() at kern_readv+0x60
read() at read+0x4a
syscall() at syscall+0x4b2
Xfast_syscall() at Xfast_syscall+0xa8
--- syscall (3, FreeBSD ELF64, read), rip = 0x800827a6c, rsp = 0x7fffffffd568, 
rbp = 0x511070 ---

Tracing command devd pid 309 tid 100102 td 0xffffff00c2658720
sched_switch() at sched_switch+0x13a
mi_switch() at mi_switch+0x1d2
sleepq_switch() at sleepq_switch+0xdd
sleepq_wait_sig() at sleepq_wait_sig+0xe
cv_wait_sig() at cv_wait_sig+0x1bc
kern_select() at kern_select+0x96d
select() at select+0x3e
syscall() at syscall+0x4b2
Xfast_syscall() at Xfast_syscall+0xa8
--- syscall (93, FreeBSD ELF64, select), rip = 0x43be6c, rsp = 0x7fffffffe958, 
rbp = 0x7fffffffe980 ---

Tracing command adjkerntz pid 176 tid 100103 td 0xffffff00c26584c0
sched_switch() at sched_switch+0x13a
mi_switch() at mi_switch+0x1d2
sleepq_switch() at sleepq_switch+0xdd
sleepq_wait_sig() at sleepq_wait_sig+0xe
msleep() at msleep+0x35c
kern_sigsuspend() at kern_sigsuspend+0xa0
sigsuspend() at sigsuspend+0x40
syscall() at syscall+0x4b2
Xfast_syscall() at Xfast_syscall+0xa8
--- syscall (341, FreeBSD ELF64, sigsuspend), rip = 0x800684b3c, rsp = 
0x7fffffffed28, rbp = 0xffffffffffffe3e0 ---
(null)() at 0x800684b3c
*** error reading from address ffffffffffffe3e8 ***


I hope this is sufficient.

P.S. Sorry no crashdump, because i'm running on gmirror-ed gstripe,
maybe i can try later to attach another disk to the machine to use it 
only for crashdumps.

Regards,
Niki Denev.


More information about the freebsd-stable mailing list