diskio / filesystem related deadlock on SMP 6.0-STABLE machine.
Niki Denev
nike_d at cytexbg.com
Thu Jan 26 00:38:53 PST 2006
As per Don Lewis's advice i rebuilt my kernel
with DEBUG_LOCKS, DEBUG_VFS_LOCKS,
WITNESS and INVARIANTS.
I was hoping to use this to extract more info about
the problem, but this time the machine panics on boot while
trying to enable the quotas.
I will try later with quotas disabled, maybe the problem is in
the QUOTA support?
Here is what i was able to get with the debugger on the serial
console :
[...boot messages...]
Enabling quotas:KDB: stack backtrace:
vfs_badlock() at vfs_badlock+0x95
assert_vop_locked() at assert_vop_locked+0x77
quotaon() at quotaon+0x181
ufs_quotactl() at ufs_quotactl+0x150
quotactl() at quotactl+0xfa
syscall() at syscall+0x4b2
Xfast_syscall() at Xfast_syscall+0xa8
--- syscall (148, FreeBSD ELF64, quotactl), rip = 0x80068657c, rsp =
0x7fffffffeda8, rbp = 0x1 ---
quotaon: 0xffffff00c33a1b18 is not locked but should be
KDB: enter: lock violation
[thread pid 398 tid 100116 ]
Stopped at kdb_enter+0x2f: nop
db> bt
Tracing pid 398 tid 100116 td 0xffffff00c371c720
kdb_enter() at kdb_enter+0x2f
assert_vop_locked() at assert_vop_locked+0x77
quotaon() at quotaon+0x181
ufs_quotactl() at ufs_quotactl+0x150
quotactl() at quotactl+0xfa
syscall() at syscall+0x4b2
Xfast_syscall() at Xfast_syscall+0xa8
--- syscall (148, FreeBSD ELF64, quotactl), rip = 0x80068657c, rsp =
0x7fffffffeda8, rbp = 0 ---
db> ps
pid proc uid ppid pgrp flag stat wmesg wchan cmd
398 ffffff00c39bd000 0 392 73 0004002 [CPU 1] quotaon
392 ffffff006b68a9c0 0 73 73 0000002 [SLPQ wait 0xffffff006b68a9c0]
[SLP] sh
347 ffffff011e647680 0 1 347 0000000 [RUNQ] syslogd
309 ffffff00c3cba680 0 1 309 0000000 [SLPQ select
0xffffffff808d7e70][SLP] devd
176 ffffff00c3cba9c0 0 1 176 0000000 [SLPQ pause
0xffffff00c3cbaa28][SLP] adjkerntz
73 ffffff006bb6f9c0 0 1 73 0004002 [SLPQ wait 0xffffff006bb6f9c0]
[SLP] sh
72 ffffff006bcd0000 0 0 0 0000204 [SLPQ m:w1 0xffffff008dd13200]
[SLP] g_mirror gm2
71 ffffff006bcd0340 0 0 0 0000204 [SLPQ m:w1 0xffffff011d117600]
[SLP] g_mirror gm1
70 ffffff006bcd0680 0 0 0 0000204 [SLPQ m:w2 0xffffff000235e400]
[SLP] g_mirror gm0
69 ffffff011ecc9000 0 0 0 0000204 [SLPQ - 0xffffffffb5ec6be4]
[SLP] schedcpu
68 ffffff011ea5c000 0 0 0 0000204 [SLPQ - 0xffffffff808e3138]
[SLP] nfsiod 3
67 ffffff011ea5c340 0 0 0 0000204 [SLPQ - 0xffffffff808e3130]
[SLP] nfsiod 2
66 ffffff011ea5c680 0 0 0 0000204 [SLPQ - 0xffffffff808e3128]
[SLP] nfsiod 1
65 ffffff011ea5c9c0 0 0 0 0000204 [SLPQ - 0xffffffff808e3120]
[SLP] nfsiod 0
64 ffffff011ebbf000 0 0 0 0000204 [SLPQ syncer
0xffffffff8084dea0][SLP] syncer
63 ffffff011ebbf340 0 0 0 0000204 [SLPQ vlruwt
0xffffff011ebbf340][SLP] vnlru
62 ffffff011ebbf680 0 0 0 0000204 [SLPQ psleep
0xffffffff808d8738][SLP] bufdaemon
61 ffffff011ebbf9c0 0 0 0 000020c [SLPQ pgzero
0xffffffff808eb5e0][SLP] pagezero
60 ffffff011eba0000 0 0 0 0000204 [SLPQ psleep
0xffffffff808eacac][SLP] vmdaemon
59 ffffff011eba0340 0 0 0 0000204 [SLPQ psleep
0xffffffff808eac5c][SLP] pagedaemon
58 ffffff011eba0680 0 0 0 0000204 [SLPQ - 0xffffff011d723248]
[SLP] fdc0
57 ffffff011eba09c0 0 0 0 0000204 [IWAIT] swi0: sio
56 ffffff011f599340 0 0 0 0000204 [SLPQ idle 0xffffffff885f4000]
[SLP] aic_recovery1
55 ffffff011f599680 0 0 0 0000204 [SLPQ idle 0xffffffff885f0000]
[SLP] aic_recovery0
54 ffffff011f5999c0 0 0 0 0000204 [SLPQ usbevt
0xffffffff885ee420][SLP] usb1
53 ffffff011f5de000 0 0 0 0000204 [SLPQ usbtsk
0xffffffff80848fd0][SLP] usbtask
52 ffffff011f5de340 0 0 0 0000204 [SLPQ usbevt
0xffffffff885ec420][SLP] usb0
51 ffffff011f5de680 0 0 0 0000204 [IWAIT] swi5:+
9 ffffff011f5de9c0 0 0 0 0000204 [SLPQ - 0xffffff0000d3e600]
[SLP] kqueue taskq
50 ffffff011f5df000 0 0 0 0000204 [IWAIT] swi2: cambio
8 ffffff011f5df340 0 0 0 0000204 [SLPQ - 0xffffff0000d3e900]
[SLP] acpi_task2
7 ffffff011f5df680 0 0 0 0000204 [SLPQ - 0xffffff0000d3e900]
[SLP] acpi_task1
6 ffffff011f5df9c0 0 0 0 0000204 [SLPQ - 0xffffff0000d3e900]
[SLP] acpi_task0
49 ffffff011f5879c0 0 0 0 0000204 [IWAIT] swi6: task queue
48 ffffff011f557000 0 0 0 0000204 [IWAIT] swi6:+
5 ffffff011f557340 0 0 0 0000204 [SLPQ - 0xffffff0000d3ee00]
[SLP] thread taskq
47 ffffff011f557680 0 0 0 0000204 [SLPQ - 0xffffffff80846bc0]
[SLP] yarrow
4 ffffff011f5579c0 0 0 0 0000204 [SLPQ - 0xffffffff80849908]
[SLP] g_down
3 ffffff011f558000 0 0 0 0000204 [SLPQ - 0xffffffff80849900]
[SLP] g_up
2 ffffff011f558340 0 0 0 0000204 [SLPQ - 0xffffffff808498f0]
[SLP] g_event
46 ffffff011f558680 0 0 0 0000204 [IWAIT] swi1: net
45 ffffff011f5589c0 0 0 0 0000204 [IWAIT] swi3: vm
44 ffffff011f599000 0 0 0 000020c [CPU 0] swi4: clock sio
43 ffffff011f577680 0 0 0 0000204 [IWAIT] irq31:
42 ffffff011f5779c0 0 0 0 0000204 [IWAIT] irq30:
41 ffffff011f564000 0 0 0 0000204 [IWAIT] irq29:
40 ffffff011f564340 0 0 0 0000204 [IWAIT] irq28:
39 ffffff011f564680 0 0 0 0000204 [IWAIT] irq27: bge1
38 ffffff011f5649c0 0 0 0 0000204 [IWAIT] irq26: bge0
37 ffffff011f587000 0 0 0 0000204 [IWAIT] irq25: ahd1
36 ffffff011f587340 0 0 0 0000204 [IWAIT] irq24: ahd0
35 ffffff011f587680 0 0 0 0000204 [IWAIT] irq23:
34 ffffff011f584680 0 0 0 0000204 [IWAIT] irq22:
33 ffffff011f5849c0 0 0 0 0000204 [IWAIT] irq21:
32 ffffff011f595000 0 0 0 0000204 [IWAIT] irq20:
31 ffffff011f595340 0 0 0 0000204 [IWAIT] irq19: ohci0 ohci1
30 ffffff011f595680 0 0 0 0000204 [IWAIT] irq18:
29 ffffff011f5959c0 0 0 0 0000204 [IWAIT] irq17:
28 ffffff011f577000 0 0 0 0000204 [IWAIT] irq16:
27 ffffff011f577340 0 0 0 0000204 [IWAIT] irq15: ata1
26 ffffff011f5da9c0 0 0 0 0000204 [IWAIT] irq14: ata0
25 ffffff011f582000 0 0 0 0000204 [IWAIT] irq13:
24 ffffff011f582340 0 0 0 0000204 [IWAIT] irq12:
23 ffffff011f582680 0 0 0 0000204 [IWAIT] irq11:
22 ffffff011f5829c0 0 0 0 0000204 [IWAIT] irq10:
21 ffffff011f584000 0 0 0 0000204 [IWAIT] irq9: acpi0
20 ffffff011f584340 0 0 0 0000204 [IWAIT] irq8:
19 ffffff011f596340 0 0 0 0000204 [IWAIT] irq7: ppc0
18 ffffff011f596680 0 0 0 0000204 [IWAIT] irq6: fdc0
17 ffffff011f5969c0 0 0 0 0000204 [IWAIT] irq5:
16 ffffff011f5da000 0 0 0 0000204 [IWAIT] irq4: sio0
15 ffffff011f5da340 0 0 0 0000204 [IWAIT] irq3:
14 ffffff011f5da680 0 0 0 0000204 [IWAIT] irq0:
13 ffffff011f5cf000 0 0 0 0000204 [IWAIT] irq1: atkbd0
12 ffffff011f5cf340 0 0 0 000020c [Can run] idle: cpu0
11 ffffff011f5cf680 0 0 0 000020c [Can run] idle: cpu1
1 ffffff011f5cf9c0 0 0 1 0004200 [SLPQ wait 0xffffff011f5cf9c0]
[SLP] init
10 ffffff011f596000 0 0 0 0000204 [SLPQ ktrace
0xffffffff8084aa40][SLP] ktrace
0 ffffffff80849a60 0 0 0 0000200 [IWAIT] swapper
db> show lockedvnods
Locked vnodes
db> show lockedbufs
db>
db> show locks
exclusive sleep mutex Giant r = 0 (0xffffffff8084d5c0) locked
@ /usr/src/sys/kern/vfs_lookup.c:197
db> alltrace
Tracing command quotaon pid 398 tid 100116 td 0xffffff00c371c720
kdb_enter() at kdb_enter+0x2f
assert_vop_locked() at assert_vop_locked+0x77
quotaon() at quotaon+0x181
ufs_quotactl() at ufs_quotactl+0x150
quotactl() at quotactl+0xfa
syscall() at syscall+0x4b2
Xfast_syscall() at Xfast_syscall+0xa8
--- syscall (148, FreeBSD ELF64, quotactl), rip = 0x80068657c, rsp =
0x7fffffffeda8, rbp = 0 ---
Tracing command sh pid 392 tid 100087 td 0xffffff006b84cbe0
sched_switch() at sched_switch+0x13a
mi_switch() at mi_switch+0x1d2
sleepq_switch() at sleepq_switch+0xdd
sleepq_wait_sig() at sleepq_wait_sig+0xe
msleep() at msleep+0x35c
kern_wait() at kern_wait+0x353
wait4() at wait4+0x38
syscall() at syscall+0x4b2
Xfast_syscall() at Xfast_syscall+0xa8
--- syscall (7, FreeBSD ELF64, wait4), rip = 0x80093e02c, rsp =
0x7fffffffcc08, rbp = 0x49 ---
Tracing command syslogd pid 347 tid 100074 td 0xffffff011e666260
sched_switch() at sched_switch+0x13a
mi_switch() at mi_switch+0x1d2
critical_exit() at critical_exit+0xb0
lapic_handle_timer() at lapic_handle_timer+0xba
Xtimerint() at Xtimerint+0x76
_mtx_lock_flags() at _mtx_lock_flags+0xa3
giant_read() at giant_read+0x37
devfs_read_f() at devfs_read_f+0x6c
dofileread() at dofileread+0x94
kern_readv() at kern_readv+0x60
read() at read+0x4a
syscall() at syscall+0x4b2
Xfast_syscall() at Xfast_syscall+0xa8
--- syscall (3, FreeBSD ELF64, read), rip = 0x800827a6c, rsp = 0x7fffffffd568,
rbp = 0x511070 ---
Tracing command devd pid 309 tid 100102 td 0xffffff00c2658720
sched_switch() at sched_switch+0x13a
mi_switch() at mi_switch+0x1d2
sleepq_switch() at sleepq_switch+0xdd
sleepq_wait_sig() at sleepq_wait_sig+0xe
cv_wait_sig() at cv_wait_sig+0x1bc
kern_select() at kern_select+0x96d
select() at select+0x3e
syscall() at syscall+0x4b2
Xfast_syscall() at Xfast_syscall+0xa8
--- syscall (93, FreeBSD ELF64, select), rip = 0x43be6c, rsp = 0x7fffffffe958,
rbp = 0x7fffffffe980 ---
Tracing command adjkerntz pid 176 tid 100103 td 0xffffff00c26584c0
sched_switch() at sched_switch+0x13a
mi_switch() at mi_switch+0x1d2
sleepq_switch() at sleepq_switch+0xdd
sleepq_wait_sig() at sleepq_wait_sig+0xe
msleep() at msleep+0x35c
kern_sigsuspend() at kern_sigsuspend+0xa0
sigsuspend() at sigsuspend+0x40
syscall() at syscall+0x4b2
Xfast_syscall() at Xfast_syscall+0xa8
--- syscall (341, FreeBSD ELF64, sigsuspend), rip = 0x800684b3c, rsp =
0x7fffffffed28, rbp = 0xffffffffffffe3e0 ---
(null)() at 0x800684b3c
*** error reading from address ffffffffffffe3e8 ***
I hope this is sufficient.
P.S. Sorry no crashdump, because i'm running on gmirror-ed gstripe,
maybe i can try later to attach another disk to the machine to use it
only for crashdumps.
Regards,
Niki Denev.
More information about the freebsd-stable
mailing list