6.2 sporadically locks up

pluknet pluknet at gmail.com
Tue Jun 16 10:23:49 UTC 2009


Hi all.

This is one of livelocks we have on a weekly basis.
Yes, we do still use ULE scheduler on 6.2 and not moved to 7 yet.
Any thought?

db> ps
 pid  ppid  pgrp   uid   state   wmesg     wchan    cmd
70304 69700 69670     0  R                           sh
70303 70292 93818  3572  RL      CPU 2               chrsh
70302 70294 93818  3572  R                           crond
70299 93818 93818     0  R       CPU 1               crond
70298 93818 93818     0  R                           crond
70294 93818 93818  3572  S       piperd   0xd1d8d330 crond
70292 93818 93818  3572  R                           crond
70284 70279 70040 10229  S       biord    0xdbe2e4e8 perl5.8.8
70283 70278 93818 10229  SL      biord    0xdbd70710 exim-4.63-0
70279 70040 70040 10229  S       wait     0xc9005860 sh
70278 69996 93818 10229  S       wait     0xcaf4ac90 sh
70191  4680  4680  9738  S       select   0xc0a12944 httpd
70190  4796  4796 10008  R                           httpd
70188  5043  5043 30532  RL                          httpd
70043 69999 70043  3572  Ss      select   0xc0a12944 wget
70042 70000 70042  3572  Ss      select   0xc0a12944 wget
70041 70001 70041  3572  Ss      select   0xc0a12944 wget
70040 69996 70040 10229  Ss      piperd   0xca35e990 perl5.8.8
70039 70002 70039  3572  Ss      select   0xc0a12944 wget
db> kill 9 70284
db> kill 9 70283
db> c

telnet> send break
KDB: enter: Line break on console
[thread pid 70299 tid 101189 ]
Stopped at      kdb_enter+0x2b: nop    db> ps
 pid  ppid  pgrp   uid   state   wmesg     wchan    cmd
70304 69700 69670     0  R                           sh
70303 70292 93818  3572  RL      CPU 2               chrsh
70302 70294 93818  3572  R                           crond
70299 93818 93818     0  R       CPU 1               crond
70298 93818 93818     0  R                           crond
70294 93818 93818  3572  S       piperd   0xd1d8d330 crond
70292 93818 93818  3572  R                           crond
70284 70279 70040 10229  S       biord    0xdbe2e4e8 perl5.8.8
70283 70278 93818 10229  SL      biord    0xdbd70710 exim-4.63-0
70279 70040 70040 10229  S       wait     0xc9005860 sh
70278 69996 93818 10229  S       wait     0xcaf4ac90 sh
70191  4680  4680  9738  S       select   0xc0a12944 httpd
70190  4796  4796 10008  R                           httpd
70188  5043  5043 30532  RL                          httpd
70043 69999 70043  3572  Ss      select   0xc0a12944 wget
70042 70000 70042  3572  Ss      select   0xc0a12944 wget
70041 70001 70041  3572  Ss      select   0xc0a12944 wget
70040 69996 70040 10229  Ss      piperd   0xca35e990 perl5.8.8
70039 70002 70039  3572  Ss      select   0xc0a12944 wget
db> bt
Tracing pid 70299 tid 101189 td 0xc99f9960
kdb_enter(c094016e) at kdb_enter+0x2b
siointr1(c7f93000) at siointr1+0xce
siointr(c7f93000) at siointr+0x5e
intr_execute_handlers(c7cf24c8,eee33ad4,4,eee33b24,c0899013,...) at intr_execute
_handlers+0xe1
lapic_handle_intr(37) at lapic_handle_intr+0x2e
Xapic_isr1() at Xapic_isr1+0x33
--- interrupt, eip = 0xc067a3ce, esp = 0xeee33b18, ebp = 0xeee33b24 ---
_mtx_lock_sleep(c0a06c60,c99f9960,0,0,0) at _mtx_lock_sleep+0xc6
namei(eee33c00) at namei+0x1ec
kern_stat(c99f9960,2827dc14,0,eee33c74) at kern_stat+0x35
stat(c99f9960,eee33d04) at stat+0x1b
syscall(3b,805003b,bfbf003b,0,1,...) at syscall+0x2bf
Xint0x80_syscall() at Xint0x80_syscall+0x1f
--- syscall (188, FreeBSD ELF32, stat), eip = 0x28268453, esp = 0xbfbfd70c, ebp
= 0xbfbfd7e8 ---
db> bt 70283
Tracing pid 70283 tid 101809 td 0xcb250e10
sched_switch(cb250e10,0,1) at sched_switch+0x143
mi_switch(1,0,cb250e10,ef6c6a08,c06a43a4,...) at mi_switch+0x1ba

sleepq_switch(dbd70710) at sleepq_switch+0x87
sleepq_wait(dbd70710,0,cb250e10,4c,dbd70710,...) at sleepq_wait+0x5c
msleep(dbd70710,c0a12f80,4c,c0928ea1,0) at msleep+0x269
bwait(dbd70710,4c,c0928ea1) at bwait+0x5f
bufwait(dbd70710) at bufwait+0x1a
ufs_bmaparray(c8a22990,d,0,ef6c6b24,0,...) at ufs_bmaparray+0x656
ufs_bmap(ef6c6b70) at ufs_bmap+0x4b
VOP_BMAP_APV(c09d5720,ef6c6b70) at VOP_BMAP_APV+0x41
vnode_pager_haspage(c8a166b4,34,0,ef6c6bd8,ef6c6bd4) at vnode_pager_haspage+0x19
7
vm_fault_additional_pages(c1351850,0,0,ef6c6c84,ef6c6c48,...) at vm_fault_additi
onal_pages+0x59
vm_fault(cff46128,2838a000,2,8) at vm_fault+0xaeb
trap_pfault(ef6c6d38,1,2838a3a9,2838a3a9,0,...) at trap_pfault+0x123
trap(3b,3b,3b,2838a3a9,c57,...) at trap+0x1eb
calltrap() at calltrap+0x5
--- trap 0xc, eip = 0x2810b8c8, esp = 0xbfbfe960, ebp = 0xbfbfea08 ---
db> show lockchain 70283
thread 101809 (pid 70283, exim-4.63-0) inhibited
db> show lockchain 70284
thread 101825 (pid 70284, perl5.8.8) inhibited
db> show lockchain Giant
thread -3420549 (pid 434, ) ??? (0xc099cb0c)
db> show allpcpu
Current CPU: 1

cpuid        = 0
curthread    = 0xc7cfec80: pid 18 "swi4: clock sio"
curpcb       = 0xe6892d90
fpcurthread  = none
idlethread   = 0xc7cfeaf0: pid 17 "idle: cpu0"
APIC ID      = 0
currentldt   = 0x50

cpuid        = 1
curthread    = 0xc99f9960: pid 70299 "crond"
curpcb       = 0xeee33d90
fpcurthread  = none
idlethread   = 0xc7cfe000: pid 16 "idle: cpu1"
APIC ID      = 1
currentldt   = 0x50

cpuid        = 2
curthread    = 0xc99f9af0: pid 70303 "chrsh"
curpcb       = 0xeee36d90
fpcurthread  = none
idlethread   = 0xc7cfde10: pid 15 "idle: cpu2"
APIC ID      = 2
currentldt   = 0x50

cpuid        = 3
curthread    = 0xd087d320: pid 69700 "sh"
curpcb       = 0xf0179d90
fpcurthread  = none
idlethread   = 0xc7cfdc80: pid 14 "idle: cpu3"
APIC ID      = 3
currentldt   = 0x50

cpuid        = 4
curthread    = 0xc98f84b0: pid 69604 "httpd"
curpcb       = 0xeedadd90
fpcurthread  = none
idlethread   = 0xc7cfdaf0: pid 13 "idle: cpu4"
APIC ID      = 4
currentldt   = 0x50

cpuid        = 5
curthread    = 0xcaebe190: pid 69598 "httpd"
curpcb       = 0xf160fd90
fpcurthread  = none
idlethread   = 0xc7cfd960: pid 12 "idle: cpu5"
APIC ID      = 5
currentldt   = 0x50

cpuid        = 6
curthread    = 0xc7cfe960: pid 27 "irq17: bce1 aacu0"
curpcb       = 0xe688cd90
fpcurthread  = none
idlethread   = 0xc7cfd7d0: pid 11 "idle: cpu6"
APIC ID      = 6
currentldt   = 0x50

cpuid        = 7
curthread    = 0xc837fe10: pid 69711 "arcconf"
curpcb       = 0xf0a7ad90
fpcurthread  = none
idlethread   = 0xc7cfd640: pid 10 "idle: cpu7"
APIC ID      = 7
currentldt   = 0x50

db> bt 69711
Tracing pid 69711 tid 101066 td 0xc837fe10
sched_switch(7fffffff,1,0,c7e63020,c,...) at sched_switch+0x143
db> show lockchain 69711
thread 101066 (pid 69711, arcconf) running on CPU 7
db> bt 27
Tracing pid 27 tid 100017 td 0xc7cfe960
sched_switch(181cd81,8515a000,c7f65000,1,c7f55c00,...) at sched_switch+0x143
_end(c7fbbc00,c7f57c08,31656362,0,0,...) at 0xc7f5aa00
_end(1,c7f5ae40,0,0,0,...) at 0xc7f60480
_end(c7fbbc00,c7f57c08,31656362,0,0,...) at 0xc7e32300
_end(1,c7f5ae40,0,0,0,...) at 0xc7f60480
_end(c7fbbc00,c7f57c08,31656362,0,0,...) at 0xc7e32300
_end(1,c7f5ae40,0,0,0,...) at 0xc7f60480
_end(c7fbbc00,c7f57c08,31656362,0,0,...) at 0xc7e32300
_end(1,c7f5ae40,0,0,0,...) at 0xc7f60480
_end(c7fbbc00,c7f57c08,31656362,0,0,...) at 0xc7e32300
_end(1,c7f5ae40,0,0,0,...) at 0xc7f60480
_end(c7fbbc00,c7f57c08,31656362,0,0,...) at 0xc7e32300
_end(1,c7f5ae40,0,0,0,...) at 0xc7f60480
_end(c7fbbc00,c7f57c08,31656362,0,0,...) at 0xc7e32300
_end(1,c7f5ae40,0,0,0,...) at 0xc7f60480
_end(c7fbbc00,c7f57c08,31656362,0,0,...) at 0xc7e32300
_end(1,c7f5ae40,0,0,0,...) at 0xc7f60480
_end(c7fbbc00,c7f57c08,31656362,0,0,...) at 0xc7e32300
_end(1,c7f5ae40,0,0,0,...) at 0xc7f60480
_end(c7fbbc00,c7f57c08,31656362,0,0,...) at 0xc7e32300
db> show lockchain 27
thread 100017 (pid 27, irq17: bce1 aacu0) running on CPU 6
db> bt 18
Tracing pid 18 tid 100015 td 0xc7cfec80
sched_switch(c0a21e40,c08639e4,0,0,0,...) at sched_switch+0x143
db> show lockchain 18
thread 100015 (pid 18, swi4: clock sio) running on CPU 0
db> bt 70304
Tracing pid 70304 tid 101836 td 0xcc4064b0
fork_trampoline() at fork_trampoline
db> bt 70303
Tracing pid 70303 tid 101552 td 0xc99f9af0
fork_trampoline() at fork_trampoline
db> bt 70292
Tracing pid 70292 tid 100415 td 0xc98f7320
sched_switch(c98f7320,0,2) at sched_switch+0x143
mi_switch(2,0,c98f7320,a1) at mi_switch+0x1ba
ast(eed8cd38) at ast+0x3fe
doreti_ast() at doreti_ast+0x17
db> bt 70040
Tracing pid 70040 tid 101893 td 0xd0df1320
sched_switch(d0df1320,0,1) at sched_switch+0x143
mi_switch(1,0,ca35e990,efe1abec,c06a43f5,...) at mi_switch+0x1ba
sleepq_switch(ca35e990) at sleepq_switch+0x87
sleepq_wait_sig(ca35e990) at sleepq_wait_sig+0x1d
msleep(ca35e990,ca35eb00,14c,c0927184,0,...) at msleep+0x25a
pipe_read(ca9f33a8,efe1acbc,cd96de80,0,d0df1320) at pipe_read+0x42d
dofileread(d0df1320,4,ca9f33a8,efe1acbc,ffffffff,...) at dofileread+0x85
kern_readv(d0df1320,4,efe1acbc,8077000,1000,...) at kern_readv+0x36
read(d0df1320,efe1ad04) at read+0x45
syscall(804003b,3b,bfbf003b,0,28296da0,...) at syscall+0x2bf
Xint0x80_syscall() at Xint0x80_syscall+0x1f
--- syscall (3, FreeBSD ELF32, read), eip = 0x2827234f, esp = 0xbfbfc89c, ebp =
0xbfbfc8b8 ---
db> show intr
irq1: atkbd0 (pid 41)
irq4: sio0 (no thread)
irq9: acpi0 (pid 26)
irq14: ata0 (pid 38) {ENTROPY}
irq15: ata1 (pid 39) {ENTROPY}
irq16: bce0 (pid 29)
irq17: bce1 aacu0 (pid 27)
irq22: uhci1 uhci3 (pid 33)
irq23: uhci0 uhci+ (pid 30)
swi4: clock sio (pid 18) {SOFT, NEED}
swi3: vm (pid 19) {SOFT}
swi1: net (pid 20) {SOFT, NEED}
swi6: Giant taskq (pid 22) {SOFT}
swi5: + (pid 23) {SOFT}
swi2: cambio (pid 24) {SOFT}
swi6: task queue (pid 25) {SOFT}
swi0: sio (pid 40) {SOFT}
db> show lockedbufs
buf at 0xdbd65ca8
b_flags = 0x30000000<vmio,ram>
b_error = 0, b_bufsize = 16384, b_bcount = 16384, b_resid = 0
b_bufobj = (0xc8a49e90), b_data = 0xdc1e7000, b_blkno = 1513488416
b_npages = 4, pages(OBJ, IDX, PA): (0xc8da2c60, 0x234,
0x42fe8000),(0xc8da2c60, 0x235, 0xa0129000),(0xc8da2c60, 0x236,
0x7c26a000),(0xc8da2c60, 0x237, 0x52bab000)
lock type bufwait: EXCL (count 1) by thread 0xd01c54b0 (pid 1029)
buf at 0xdbd66830
b_flags = 0x20000000<vmio>
b_error = 0, b_bufsize = 16384, b_bcount = 16384, b_resid = 0
b_bufobj = (0xc8254d80), b_data = 0xdc20b000, b_blkno = 2171944672
b_npages = 4, pages(OBJ, IDX, PA): (0xc8257a50, 0x102ea7dc,
0x756a8000),(0xc8257a50, 0x102ea7dd, 0x7849000),(0xc8257a50,
0x102ea7de, 0x5560a000),(0xc8257a50, 0x102ea7df, 0xb6f4b000)
db> show lockedvnods
Locked vnodes

0xca4d7110: tag ufs, type VDIR
   usecount 2, writecount 0, refcount 4 mountedhere 0
   flags ()
   v_object 0xccdb6d68 ref 0 pages 5
    lock type ufs: EXCL (count 1) by thread 0xc9216190 (pid 69696)
       ino 13, on dev aacdu0s1e

0xc8a22990: tag ufs, type VREG
   usecount 11, writecount 0, refcount 14 mountedhere 0
   flags ()
   v_object 0xc8a166b4 ref 9 pages 40
    lock type ufs: SHARED (count 1)
       ino 1768379, on dev aacdu0s1f

0xc8f92110: tag ufs, type VDIR
   usecount 1, writecount 0, refcount 4 mountedhere 0
   flags ()
   v_object 0xcec50420 ref 0 pages 1
    lock type ufs: EXCL (count 1) by thread 0xca3ece10 (pid 64525)
       ino 14291128, on dev aacdu0s1g

0xc85bd110: tag ufs, type VREG
   usecount 1, writecount 1, refcount 423 mountedhere 0
   flags ()
   v_object 0xc85946b4 ref 0 pages 1970
    lock type ufs: SHARED (count 1)
       ino 189127858, on dev aacdu0s1g

0xc8a49dd0: tag ufs, type VREG
   usecount 2, writecount 2, refcount 85 mountedhere 0
   flags ()
   v_object 0xc8da2c60 ref 0 pages 328
    lock type ufs: SHARED (count 1)
       ino 189127816, on dev aacdu0s1g

0xc8df3770: tag ufs, type VDIR
   usecount 10, writecount 0, refcount 13 mountedhere 0
   flags ()
   v_object 0xc8da2210 ref 0 pages 1
    lock type ufs: EXCL (count 1) by thread 0xcb25daf0 (pid 69722)
       ino 183912960, on dev aacdu0s1g

0xccf1b990: tag ufs, type VNON
   usecount 1, writecount 0, refcount 1 mountedhere 0
   flags ()
    lock type ufs: EXCL (count 1) by thread 0xcb25daf0 (pid 69722)
       ino 183913000, on dev aacdu0s1g

0xd032f550: tag ufs, type VREG
   usecount 1, writecount 0, refcount 3 mountedhere 0
   flags ()
   v_object 0xd08c7318 ref 0 pages 1
    lock type ufs: SHARED (count 1)
       ino 142034973, on dev aacdu0s1g
db> bt 69722
Tracing pid 69722 tid 100908 td 0xcb25daf0
sched_switch(3408255728,0,1) at sched_switch+323
mi_switch(1,0,3408255728,4017571924,3228189604,...) at mi_switch+442
sleepq_switch(3689066592) at sleepq_switch+135
sleepq_wait(3689066592,0,3408255728,76,3689066592,...) at sleepq_wait+92
msleep(3689066592,3231788928,76,3230830241,0) at msleep+617
bwait(3689066592,76,3230830241) at bwait+95
bufwait(3689066592,1,0,0,0,...) at bufwait+26
breadn(3357887680,1471303904,0,16384,0,...) at breadn+486
bread(3357887680,1471303904,0,16384,0,4017572212) at bread+32
ffs_vget(3357590832,183913000,2,4017572316) at ffs_vget+816
ufs_lookup(4017572476) at ufs_lookup+2722
VOP_CACHEDLOOKUP_APV(3231536928,4017572476) at VOP_CACHEDLOOKUP_APV+56
vfs_cache_lookup(4017572632) at vfs_cache_lookup+178
VOP_LOOKUP_APV(3231536928,4017572632) at VOP_LOOKUP_APV+67
lookup(4017572768) at lookup+1217
namei(4017572768) at namei+922
kern_lstat(3408255728,134647808,0,4017572980) at kern_lstat+71
lstat(3408255728,4017573124) at lstat+27
syscall(59,59,59,134647845,1,...) at syscall+703
Xint0x80_syscall() at Xint0x80_syscall+31
--- syscall (190, FreeBSD ELF32, lstat), eip = 672752691, esp =
3217024908, ebp = 3217025080 ---
db> bt 64525
Tracing pid 64525 tid 102165 td 0xca3ece10
sched_switch(3393113616,0,1) at sched_switch+323
mi_switch(1,0,3393113616,4011194528,3228189604,...) at mi_switch+442
sleepq_switch(3689874456) at sleepq_switch+135
sleepq_wait(3689874456,0,3393113616,76,3689874456,...) at sleepq_wait+92
msleep(3689874456,3231788928,76,3230830241,0) at msleep+617
bwait(3689874456,76,3230830241) at bwait+95
bufwait(3689874456,1,0,0,0,...) at bufwait+26
breadn(3371770128,0,0,2048,0,...) at breadn+486
bread(3371770128,0,0,2048,0,4011194740) at bread+32
ffs_blkatoff(3371770128,0,0,0,4011194848) at ffs_blkatoff+158
ufs_lookup(4011195004) at ufs_lookup+717
VOP_CACHEDLOOKUP_APV(3231536928,4011195004) at VOP_CACHEDLOOKUP_APV+56
vfs_cache_lookup(4011195160) at vfs_cache_lookup+178
VOP_LOOKUP_APV(3231536928,4011195160) at VOP_LOOKUP_APV+67
lookup(4011195296) at lookup+1217
namei(4011195296) at namei+922
kern_lstat(3393113616,3217013088,0,4011195508) at kern_lstat+71
lstat(3393113616,4011195652) at lstat+27
syscall(3230203963,59,2097211,672879355,3217010833,...) at syscall+703
Xint0x80_syscall() at Xint0x80_syscall+31
--- syscall (190, FreeBSD ELF32, lstat), eip = 672797747, esp =
3217009740, ebp = 3217013000 ---
db> bt 69696
Tracing pid 69696 tid 100386 td 0xc9216190
sched_switch(3374408080,0,1) at sched_switch+323
mi_switch(1,0,3374408080,4004567636,3228189604,...) at mi_switch+442
sleepq_switch(3688649048) at sleepq_switch+135
sleepq_wait(3688649048,0,3374408080,76,3688649048,...) at sleepq_wait+92
msleep(3688649048,3231788928,76,3230830241,0) at msleep+617
bwait(3688649048,76,3230830241) at bwait+95
bufwait(3688649048,4096,0,4004567784,3227891026,...) at bufwait+26
cluster_read(3394072848,26112,0,1,0,...) at cluster_read+1617
ffs_read(4004568076) at ffs_read+607
VOP_READ_APV(3231536928,4004568076) at VOP_READ_APV+56
ufs_readdir(4004568208) at ufs_readdir+209
VOP_READDIR_APV(3231536928,4004568208) at VOP_READDIR_APV+56
getdirentries(3374408080,4004568324) at getdirentries+347
syscall(59,59,59,134693312,134688816,...) at syscall+703
Xint0x80_syscall() at Xint0x80_syscall+31
--- syscall (196, FreeBSD ELF32, getdirentries), eip = 672506011, esp
= 3217023708, ebp = 3217023752 ---

-- 
wbr,
pluknet


More information about the freebsd-stable mailing list