hald/geom(?) lock up

pluknet pluknet at gmail.com
Tue Apr 7 22:36:21 UTC 2009


[redirected from -current]

Seeing a similar locking issue on stable/7 as of April 5 now
(I guess some pieces merged from -current caused that).

2009/2/10 pluknet <pluknet at gmail.com>:
> Hi all.
>
> On recent -current after inserting a CD my system locks up in a few minutes.
> Any commands I tried then lock on sysctl lock; i.e. ctrl+t returns:
> load: 0.04 cmd: sudo 1008 [sysctl lock] 0.00u 0.00s 0% 312k
>
> FreeBSD 8.0-CURRENT #19: Tue Feb 10 00:24:21 UTC 2009
>
> Captured ddb (partially transcribed as ddb capture does not return some data):
>
> db> show alllocks
>
> Process 924 (sshd) thread 0xc5d606c0 (100107)
> exclusive sx so_rcv_sx ... uipc_sockbuf.c:148
> Process 890 (hald-addon-storage) thread 0xc5afd240 (100091)
> exclusive sx GEOM topology ... geom_dev.c:171
> Process 880 (hald) thread 0xc5d60900 (100106)
> exclusive sx sysctl lock ... kern_sysctl.c:1510
> Process 12 (intr) thread 0xc5663000 (100023)
> exclusive sleep mutex Giant ... kbdmux.c:1044
> db> bt 890
>
> Tracing pid 890 tid 100091 td 0xc5afd240
> sched_switch(c5afd240,0,104,18d,ae7e8399,...) at sched_switch+0x437
> mi_switch(104,0,c0805c50,1d2,0,...) at mi_switch+0x200
> sleepq_switch(c5afd240,0,c0805c50,26a,2,...) at sleepq_switch+0x15f
> sleepq_timedwait(c089b424,0,c07f05e9,2,0,...) at sleepq_timedwait+0x6b
> _sleep(c089b424,0,0,c07f05e9,1f4,...) at _sleep+0x329
> pause(c07f05e9,1f4,10,c5e170d0,c5986200,...) at pause+0x47
> acd_geom_access(c5986200,1,0,0,0,...) at acd_geom_access+0xeb
> g_access(c5989340,1,0,0,2000,...) at g_access+0x20b
> g_dev_open(c59a0500,1,2000,c5afd240,c052a7b4,...) at g_dev_open+0x104
> devfs_open(e7f1cacc,c5afd240,e7f1cba8,0,e7f1caf4,...) at devfs_open+0xe6
> VOP_OPEN_APV(c0847e80,e7f1cacc,c07fd022,c07fa9c9,c5decb84,...) at
> VOP_OPEN_APV+0xa5
> vn_open_cred(e7f1cba8,e7f1cc5c,0,c5512900,c5de47a8,...) at vn_open_cred+0x429
> vn_open(e7f1cba8,e7f1cc5c,0,c5de47a8,3,...) at vn_open+0x33
> kern_openat(c5afd240,ffffff9c,bfbfe490,0,1,...) at kern_openat+0x110
> kern_open(c5afd240,bfbfe490,0,0,0,...) at kern_open+0x35
> open(c5afd240,e7f1ccf8,c,c0818fe7,c084b2d8,...) at open+0x30
> syscall(e7f1cd38) at syscall+0x2a3
> Xint0x80_syscall() at Xint0x80_syscall+0x20
> --- syscall (5, FreeBSD ELF32, open), eip = 0x281c5fd3, esp =
> 0xbfbfe19c, ebp = 0xbfbfe1c8 ---
> db> bt 880
>
> Tracing pid 880 tid 100106 td 0xc5d60900
> sched_switch(c5d60900,0,104,18d,dc8c7829,...) at sched_switch+0x437
> mi_switch(104,0,c0805c50,1d2,4c,...) at mi_switch+0x200
> sleepq_switch(c5d60900,0,c0805c50,26a,0,...) at sleepq_switch+0x15f
> sleepq_timedwait(c5c8e200,4c,c07f97cd,0,0,...) at sleepq_timedwait+0x6b
> _sleep(c5c8e200,0,4c,c07f97cd,3e8,...) at _sleep+0x329
> g_waitfor_event(c0542cb0,c59847e0,2,0,0,...) at g_waitfor_event+0x9c
> sysctl_kern_geom_conftxt(c08493c0,0,0,e7f72ba4,e7f72ba4,...) at
> sysctl_kern_geom_conftxt+0x58
> sysctl_root(e7f72ba4,0,c0802669,5e6,c5d60900,...) at sysctl_root+0x199
> userland_sysctl(c5d60900,e7f72c10,3,0,bfbfe9c4,...) at userland_sysctl+0x115
> __sysctl(c5d60900,e7f72cf8,18,c08087f9,c084c550,...) at __sysctl+0x94
> syscall(e7f72d38) at syscall+0x2a3
> Xint0x80_syscall() at Xint0x80_syscall+0x20
> --- syscall (202, FreeBSD ELF32, __sysctl), eip = 0x28350b3f, esp =
> 0xbfbfe8bc, ebp = 0xbfbfe8e8 ---
> db> c
>
> [thread pid 12 tid 100023 ]
> Stopped at      kdb_enter+0x3a: movl    $0,kdb_why
> db> ps
>
>  pid  ppid  pgrp   uid   state   wmesg     wchan    cmd
>  1113   932  1113  1001  S+      sysctl l 0xc089b464 ls
>  1042     1  1042     0  Ss      select   0xc5e3aaa4 sshd
>  981   884   880     0  S       select   0xc5c950a4 initial thread
>  932   868   932  1001  S+      wait     0xc5af7a90 bash
>  929   928   929  1001  Ss+     ttyin    0xc575e270 bash
>  928   924   924  1001  S       select   0xc5799624 sshd
>  924     1   924     0  Ss      sbwait   0xc5df7238 sshd
>  890   884   880     0  S       acdld    0xc089b424 initial thread
>  884   880   880     0  S       select   0xc5989de4 initial thread
>  883     1   883     0  Ss      (threaded)          console-kit-daemon
> 100111                   S       waitvt   0xc0896f84 console-kit-daemon
> 100125                   S       waitvt   0xc0896fbc console-kit-daemon
> 100124                   S       waitvt   0xc0896fb8 console-kit-daemon
> 100123                   S       waitvt   0xc0896fb4 console-kit-daemon
> 100122                   S       waitvt   0xc0896fb0 console-kit-daemon
> 100121                   S       waitvt   0xc0896fac console-kit-daemon
> 100120                   S       waitvt   0xc0896fa8 console-kit-daemon
> 100119                   S       waitvt   0xc0896fa4 console-kit-daemon
> 100118                   S       waitvt   0xc0896fa0 console-kit-daemon
> db> bt 1113
>
> Tracing pid 1113 tid 100126 td 0xc5d5f6c0
> sched_switch(c5d5f6c0,0,104,18d,6d4b9014,...) at sched_switch+0x437
> mi_switch(104,0,c0805c50,1d2,0,...) at mi_switch+0x200
> sleepq_switch(c5d5f6c0,0,c0805c50,247,c089b464,...) at sleepq_switch+0x15f
> sleepq_wait(c089b464,0,c0802757,3,0,...) at sleepq_wait+0x63
> _sx_xlock_hard(c089b464,c5d5f6c0,0,c0802669,5e6,...) at _sx_xlock_hard+0x286
> _sx_xlock(c089b464,0,c0802669,5e6,c5d5f6c0,...) at _sx_xlock+0xc0
> userland_sysctl(c5d5f6c0,e7faec10,2,bfbfeacc,bfbfead0,...) at
> userland_sysctl+0xf1
> __sysctl(c5d5f6c0,e7faecf8,18,c,c084c550,...) at __sysctl+0x94
> syscall(e7faed38) at syscall+0x2a3
> Xint0x80_syscall() at Xint0x80_syscall+0x20
> --- syscall (202, FreeBSD ELF32, __sysctl), eip = 0x2806147b, esp =
> 0xbfbfea6c, ebp = 0xbfbfea98 ---
> db> c
>

^^^
I experienced a similar lockup when inserted a flash drive.
Sorry, not too much info saved here:
o exclusive sx lock @ kern_sysctl.c:1501
o exclusive sx GEOM topology @ g_part.c:1514

After a half hour or something like that the system went in its normal state.

Then the system locked up again after I tried a dumb `mount /dev/cd1 /mnt`,
^T showed that mount locked at [cgticb] wait channel.

db> show alllocks

Process 66819 (mount) thread 0xc467c460 (100130)
exclusive sx GEOM topology .. locked @ .. ffs_vfsops.c:633
Process 1479 (sshd) thread 0xc467caf0 (100127)
Process 1156 (hald) thread 0xc4201d20 (100078)
exclusive sx sysctl lock ... locked @ .. kern_sysctl.c:1501
Process 898 (hcsecd) thread 0xc4163af0 (100071)
Process 15 (swi6: Giant taskq) thread 0xc3c95000 (100010)
db> show lockedvnods

Locked vnodes
db> bt 66819

Tracing pid 66819 tid 100130 td 0xc467c460
sched_switch(c467c460,0,1,180,66eaa085,...) at sched_switch+0x253
mi_switch(1,0,c07bc615,1cb,c467c460,...) at mi_switch+0x243
sleepq_switch(c467c460,0,c07bc615,243,4c,...) at sleepq_switch+0x182
sleepq_wait(c6f17b3c,0,c07b95b9,dc,0,...) at sleepq_wait+0x60
_sleep(c6f17b3c,0,4c,c079982f,0,...) at _sleep+0x399
cam_periph_getccb(c6f17b00,1,c0799bcf,e66ba82c,c055eb94,1) at
cam_periph_getccb+0xcd
cdgetccb(c5975800,e66ba844,c044368a,c082b4ec,0,...) at cdgetccb+0xd8
cdcheckmedia(c6f17b00,14c,c07a0e6c,b7,0,...) at cdcheckmedia+0x5d
cdopen(c71ca200,4,c07b09ca,75,1,...) at cdopen+0xed
g_disk_access(c6edc280,1,1,1,1,...) at g_disk_access+0x11d
g_access(c4e7cbc0,1,1,1,c6ed9980,...) at g_access+0x229
g_vfs_open(c64088a0,e66baa30,c07c3923,1,c467c460,...) at g_vfs_open+0xae
ffs_mount(c3ffab40,c467c460,c07c3c78,3f0,0,...) at ffs_mount+0x17a2
vfs_donmount(c467c460,0,c437a580,c437a580,804b907,...) at vfs_donmount+0x135d
nmount(c467c460,e66bacfc,c,e66bad38,c08033b0,...) at nmount+0xab
syscall(e66bad38) at syscall+0x2d3
Xint0x80_syscall() at Xint0x80_syscall+0x20
--- syscall (378, FreeBSD ELF32, nmount), eip = 0x280dc6c7, esp =
0xbfbfdbcc, ebp = 0xbfbfe128 ---
db> bt 1156

Tracing pid 1156 tid 100078 td 0xc4201d20
sched_switch(c4201d20,0,1,180,b08e54c,...) at sched_switch+0x253
mi_switch(1,0,c07bc615,1cb,4c,...) at mi_switch+0x243
sleepq_switch(c4201d20,0,c07bc615,266,4c,...) at sleepq_switch+0x182
sleepq_timedwait(c6ede200,3e8,c07b0fa6,0,0,...) at sleepq_timedwait+0x68
_sleep(c6ede200,0,4c,c07b0fa6,3e8,...) at _sleep+0x375
g_waitfor_event(c0517400,c4b86d80,2,0,51f,...) at g_waitfor_event+0x9c
sysctl_kern_geom_conftxt(c07ff820,0,0,e65f8ba4,e65f8ba4,...) at
sysctl_kern_geom_conftxt+0x58
sysctl_root(e65f8ba4,0,c07b978b,5dd,c4201d20,...) at sysctl_root+0x1b5
userland_sysctl(c4201d20,e65f8c14,3,0,bfbfe9c4,...) at userland_sysctl+0x131
__sysctl(c4201d20,e65f8cfc,18,c07beafb,c0802330,...) at __sysctl+0x98
syscall(e65f8d38) at syscall+0x2d3
Xint0x80_syscall() at Xint0x80_syscall+0x20
--- syscall (202, FreeBSD ELF32, __sysctl), eip = 0x28338763, esp =
0xbfbfe8bc, ebp = 0xbfbfe8e8 ---
db> bt 898

Tracing pid 898 tid 100071 td 0xc4163af0
sched_switch(c4163af0,0,1,180,a2924776,...) at sched_switch+0x253
mi_switch(1,0,c07bc615,1cb,c4160588,...) at mi_switch+0x243
sleepq_switch(c4160588,0,c07bc615,19e,c415e8dc,...) at sleepq_switch+0x182
sleepq_catch_signals(c055e864,c0847ec0,4,c07b782d,58,...) at
sleepq_catch_signals+0x26d
sleepq_wait_sig(c415e8dc,0,c07b95b9,dc,0,...) at sleepq_wait_sig+0x14
_sleep(c415e8dc,c415e894,158,c07c13a1,0) at _sleep+0x389
sbwait(c415e870,4,c07c1456,5b0,c415e894,...) at sbwait+0x76
soreceive_generic(c415e820,e65e2be8,e65e2bf4,0,0,...) at soreceive_generic+0x400
soreceive(c415e820,e65e2be8,e65e2bf4,0,0,...) at soreceive+0x38
kern_recvit(c4163af0,3,e65e2c60,0,0,...) at kern_recvit+0x153
recvit(bfbfee74,e65e2c64,4,bfbfec2e,200,bfbfee2e,22,e65e2c58,1,0,2817b878,0)
at recvit+0x31
recvfrom(c4163af0,e65e2cfc,18,c07d2c3a,c08012f8,...) at recvfrom+0x76
syscall(e65e2d38) at syscall+0x2d3
Xint0x80_syscall() at Xint0x80_syscall+0x20
--- syscall (29, FreeBSD ELF32, recvfrom), eip = 0x281233c7, esp =
0xbfbfebfc, ebp = 0xbfbfee88 ---
db> capture off


After some minutes the system console spammed with a number of
calcru: runtime went backwards....
and one
g_vfs_done():cd1[READ(offset=65536, length=8192)]error=5
continuing to stay in some lockup state.

Then I tasted ddb again:

db> show alllocks

Process 898 (hcsecd) thread 0xc4163af0 (100071)
Process 15 (swi6: Giant taskq) thread 0xc3c95000 (100010)
Process 3 (g_event) thread 0xc3c50230 (100006)
exclusive sx GEOM topology .. locked @ .. geom_event.c:185
db> bt 3

Tracing pid 3 tid 100006 td 0xc3c50230
sched_switch(c3c50230,0,1,180,ecfc9041,...) at sched_switch+0x253
mi_switch(1,0,c07bc615,1cb,c3c50230,...) at mi_switch+0x243
sleepq_switch(c3c50230,0,c07bc615,243,4c,...) at sleepq_switch+0x182
sleepq_wait(c3e11c28,0,c0799806,0,0,...) at sleepq_wait+0x60
_sleep(c3e11c28,0,4c,c0799806,0) at _sleep+0x399
cam_periph_ccbwait(c3e11c00,2,3,376,c0843730,...) at cam_periph_ccbwait+0x59
cam_periph_runccb(c3e11c00,c044df30,2,3,c3fc7b40,...) at cam_periph_runccb+0x78
cdrunccb(3,1,c0452e80,20,0,...) at cdrunccb+0x4b
cdprevent(c587a650,c07efa40,c0452e80,20,c587a650,...) at cdprevent+0xaf
cdcheckmedia(c6f17b00,14c,c07a0e6c,b7,0,...) at cdcheckmedia+0x170
cdopen(c71ca200,4,c07b09ca,75,0,...) at cdopen+0xed
g_disk_access(c6edc280,1,0,0,0,...) at g_disk_access+0x11d
g_access(c4a57d40,1,0,0,c6edc2d8,...) at g_access+0x229
g_part_taste(c07ffd00,c6edc280,0,20f,c7168b80,...) at g_part_taste+0xb1
g_new_provider_event(c6edc280,0,c07b0ed1,d2,0,...) at g_new_provider_event+0xa9
g_run_events(c0841800,0,4c,c07af7a5,64,...) at g_run_events+0x31e
g_event_procbody(0,c3a88d38,c07b4f38,322,c3c8d570,...) at g_event_procbody+0x95
fork_exit(c0519660,0,c3a88d38) at fork_exit+0xc5
fork_trampoline() at fork_trampoline+0x8
--- trap 0, eip = 0, esp = 0xc3a88d70, ebp = 0 ---

db> capture off

^T showed now that mount waits in the [g_waitidle] channel.

Then it locked down finally after about 30 minutes or so of wait in
[g_waitidle].

I was able to ping / use ssh/nfs during that lock up
(well, sometimes it refused to connect, I cannot say when definitely).

-- 
wbr,
pluknet


More information about the freebsd-stable mailing list