6.2 amd64 hang - deadlock?

Guy Helmer ghelmer at palisadesys.com
Wed Feb 7 16:44:10 UTC 2007


Here is the info I've grabbed from a Woodcrest Xeon machine that 
deadlocked under our stress test this morning.  Oddly, even though it is 
built as an SMP kernel, it's only running one CPU of the four available 
cores - is that a result of having to set hint.apic.0.disabled="1" to 
avoid an interrupt problem?

Anyway, our stress test has a lot of process churn (fork/exec) that has 
already exposed one race in procfs for which Kostik Belousov just 
commited a fix.  What follows is a by-hand copy of what appears to be 
most relevant.  I have a (large - 4GB) core dump that I can refer back 
to if necessary.

Thanks in advance for any help!

Guy

db> show allpcpu
Current CPU: 0

cpuid        = 0
curthread    = 0xffffff0x12f46b720: pid 17 "swi6: Giant taskq"
curpcb       = 0xffffffffb395ad10
fpcurthread  = none
idlethread   = 0xffffff012f46a720: pid 10 "idle: cpu0"
spin locks held:

db> show alllocks
Process 87741 (perform_ca) thread 0xffffff00acd90260 (100350)
exclusive sx allproc r = 0 (0xffffffffff8070dd60) locked @ kern/kern_exit.c:674
exclusive sx proctree r = 0 (0xffffffffff8070dde0) locked @ kern/kern_exit.c:602
Process 83677 (ph) thread 0xffffff0065f15000 (100076)
exclusive sx user map r = 0 (0xffffff00af53d6d0) locked @ vm/vm_map.c:3074
Process 813 (sendmail) thread 0xffffff012efa3000 (1000036)
exclusive sx sysctl lock r = 0 (0xffffffff8070e580) locked @ kern/kern_sysctl.c:1375
Process 17 (swi6: Giant taskq) thread 0xffffff012f46b720 (1000012)
exclusive sleep mutex Giant r = 0 (0xffffffff8070d300) locked @ kern/kern_intr.c:681
db> show sleepchain 87741
thread 100350 (pid 87741, perform_ca) is on a run queue
db> show sleepchain 83677
thread 100299 (pid 83677, ph) sleeping on 0xffffff0065f15720 "umtx"
db> show sleepchain 813
thread 1000036 (pid 813, sendmail) blocked on lk "allproc" EXCL (count 0)
db> where 87741
Tracing pid 87741 tid 100350 td 0xffffff00acd90260
sched_switch() at sched_switch+0x13a
mi_switch() at mi_switch_0x1d2
maybe_preempt() at maybe_preempt+0x132
sched_add() at sched_add+x17e
intr_event_schedule_thread() at intr_event_schedule_thread+0xbc
atpic_handle_intr() at atpic_handle_intr+0x5b
Xatpic_intr10() at Xatpic_intr10+0x6b
--- interrupt, rip = 0xffffffff80325b88, rsp = 0xffffffffb63469d0, rbp = 0xffffffffb646a00 ---
_mtx_lock_flags() at _mtx_lock_flags+0x8
_sx_asset() at _sx_assert+0x8f
_sx_xunlock() at _sx_xunlock+0x33
kern_wait() at kern_wait+0x597
wait4() at wait4+0x38
syscall() at syscall+0x4d1
Xfast_syscall() at Xfast_syscall+0xa8
--- syscall (7, FreeBSD ELF64, wait4), rip = 0x4114fc, rsp = 0x7fffffffb9c8, rbp = 0x156c4 ---
db> where 83677
Tracing pid 83677 tid 100299 td 0xffffff0065f15720
sched_switch() at sched_switch+0x13a
mi_switch() at mi_switch+0x1d2
sleepq_switch() at sleepq_switch+0xc2
sleepq_wait_sig() at sleepq_wait_sig+0x28
msleep() at msleep+0x2d8
_do_lock() at _do_lock+0x598
_umtx_op() at _umtx_op+0x4d1
syscall() at syscall+0x4d1
Xfast_syscall() at Xfast_syscall+0xa8
--- syscall (454, FreeBSD ELF64, _umtx_op), rip = 0x800da0e3c, rsp = 0x7ffffedf63c8, rbp = 0xfa0180 ---
db> where 813
Tracing pid 813 tid 1000036 td 0xffffff012efa3000
sched_switch() at sched_switch+0x13a
mi_switch() at mi_switch+0x1d2
sleepq_switch() at sleepq_switch+0xc2
sleepq_wait() at sleepq_wait+0x40
cv_wait() at cv_wait+0x16f
_sx_slock() at _sx_slock+0xb8
pfind() at pfind+0x22
sysctl_kern_proc_args() at sysctl_kern_proc_args+0x2f
sysctl_root() at sysctl_root+0xa0
userland_sysctl() at userland_sysctl+0x11c
__sysctl() at __sysctl+0xcd
syscall() at syscall+0x4d1
Xfast_syscall() at Xfast_syscall+0xa8
--- syscall (202, FreeBSD ELF64, __sysctl), rip = 0x800d16afc, rsp = 0x7fffffffb048, rbp = 0x483d47 ---
db> where 17
Tracing pid 17 tid 100012 td 0xffffff012f46b720
kdb_enter() at kdb_enter+0x2f
scgetc() at scgetc+0x43e
sckbdevent() at sckbdevent+0x83
kbdmux_intr() at kbdmux_intr+0x4d
kbdmux_kbd_intr() at kbdmux_kbd_intr+0x20
taskqueue_run() at taskqueue_run+0x135
ithread_loop() at ithread_loop+0x14c
fork_exit() at fork_exit+0xbb
fork_trampoline() at fork_trampoline+0xe
--- trap 0, rip = 0, rsp = 0xfffffffffb395ad00, rbp = 0 ---
db> allt ....
db> where 87429
Tracing command perform_ca pid 87429 tid 100145 0xffffff00b5861000
sched_switch() at sched_switch+0x13a
mi_switch() at mi_switch+0x1d2
sleepq_switch() at sleepq_switch+0xc2
sleepq_wait() at sleepq_wait+0x40
cv_wait() at cv_wait+0x16f
_sx_unlock() at _sx_unlock+0x71
_vm_map_lock() at _vm_map_lock+0x3e
vm_map_remove() at vm_map_remove+0x2e
pipe_free_kmem() at pipe_free_kmem+0x72
pipeclose() at pipeclose+0x272
pipe_close() at pipe_close+0x31
fdrop_locked() at fdrop_locked+0xa1
closef() at closef+0x35f
close() at close+0x172
syscall() at syscall+0x4d1
Xfast_syscall() at Xfast_syscall+0xa8
--- syscall (6, FreeBSD ELF64, close), rip = 0x42425c, rsp = 0x7fffffffb9a8, rbp = 0 ---
db> show sleepchain 819
thread 100055 (pid 819, sendmail) blocked on sx "sysctl lock" XLOCK
thread 100036 (pid 813, sendmail) blocked on lk "allproc" EXCL (count 0)
db> where 819
Tracing pid 819 tid 100055 td 0xffffff012efa3980
sched_switch() at sched_switch+0x13a
mi_switch() at mi_switch+0x1d2
sleepq_switch() at sleepq_switch+0xc2
sleepq_wait() at sleepq_wait+0x40
cv_wait() at cv_wait+0x16f
_sx_xlock() at _sx_xlock+0x71
userland_sysctl() at userland_sysctl+0xf5
__sysctl() at __sysctl+0xcd
syscall() at syscall+0x4d1
Xfast_syscall() at Xfast_syscall+0xa8
--- syscall (202, FreeBSD ELF64, __sysctl), rip = 0x800d16afc, rsp = 0x7fffffffcde8, rbp = 0x3 ---
db>



More information about the freebsd-stable mailing list