Ufs dead-locks on freebsd 6.2

Andrew Edwards aedwards at sandvine.com
Sat May 26 01:01:09 UTC 2007


I guess I feel similar to Gore Jarold with his posting about being
frustrated with ufs.  I have a serious problem, it's preventing me from
upgrading my production systems to freebsd 6+, I can reproduce this
problem easily but I can't seem to get anyone to assist me.  At the
suggestion of one of our internal developers I have enabled memguard to
help find the cause of the panic and I'm posting the current backtrace
etc. as recommended from the developers handbook on debgging dead-locks.
If someone can help me it would be greatly appreciated.

db> bt
Tracing pid 26543 tid 105117 td 0xd41c6a80
kdb_enter(c0785f13) at kdb_enter+0x2b
vfs_badlock(c0785f2c,c0786051,cd3dc414) at vfs_badlock+0x47
assert_vop_locked(cd3dc414,c0786051) at assert_vop_locked+0x4a
vop_lock_post(fa048bec,0,1002,cd3dc414,fa048c08,...) at
vop_lock_post+0x2a
VOP_LOCK_APV(c07dc2e0,fa048bec) at VOP_LOCK_APV+0xa0
vn_lock(cd3dc414,1002,d41c6a80,0,0,...) at vn_lock+0xac
vn_statfile(cd3f0870,fa048c74,cd39f280,d41c6a80) at vn_statfile+0x63
kern_fstat(d41c6a80,3,fa048c74) at kern_fstat+0x35
fstat(d41c6a80,fa048d04) at fstat+0x19
syscall(3b,805003b,bfbf003b,8054000,8054000,...) at syscall+0x25b
Xint0x80_syscall() at Xint0x80_syscall+0x1f
--- syscall (189, FreeBSD ELF32, fstat), eip = 0x6814b08f, esp =
0xbfbfec2c, ebp = 0xbfbfeca8 ---
db> show pcpu
cpuid        = 1
curthread    = 0xd41c6a80: pid 26543 "cron"
curpcb       = 0xfa048d90
fpcurthread  = none
idlethread   = 0xccb5c600: pid 10 "idle: cpu1"
APIC ID      = 6
currentldt   = 0x50
spin locks held:
db> show allpcpu
Current CPU: 1

cpuid        = 0
curthread    = 0xcd01ec00: pid 898 "cron"
curpcb       = 0xf5ad7d90
fpcurthread  = none
idlethread   = 0xccb5c780: pid 11 "idle: cpu0"
APIC ID      = 0
currentldt   = 0x50
spin locks held:

cpuid        = 1
curthread    = 0xd41c6a80: pid 26543 "cron"
curpcb       = 0xfa048d90
fpcurthread  = none
idlethread   = 0xccb5c600: pid 10 "idle: cpu1"
APIC ID      = 6
currentldt   = 0x50
spin locks held:

db> show locks
exclusive sx user map r = 0 (0xd4135734) locked @
/usr/src/sys/vm/vm_map.c:3074
db> show alllocks
Process 26543 (cron) thread 0xd41c6a80 (105117)
exclusive sx user map r = 0 (0xd4135734) locked @
/usr/src/sys/vm/vm_map.c:3074
db> show lockedvnods
Locked vnodes

0xce00cc3c: tag ufs, type VREG
    usecount 1, writecount 1, refcount 11 mountedhere 0
    flags ()
    v_object 0xcdae9b58 ref 0 pages 96
     lock type ufs: EXCL (count 1) by thread 0xccffb300 (pid 18081) with
1 pending#0 0xc0593f0d at lockmgr+0x4ed
#1 0xc06b8e0e at ffs_lock+0x76
#2 0xc0739787 at VOP_LOCK_APV+0x87
#3 0xc0601c28 at vn_lock+0xac
#4 0xc06015c4 at vn_write+0x138
#5 0xc05c4544 at dofilewrite+0x7c
#6 0xc05c43e3 at kern_writev+0x3b
#7 0xc05c4309 at write+0x45
#8 0xc0723e2b at syscall+0x25b
#9 0xc070ee0f at Xint0x80_syscall+0x1f

        ino 494730, on dev amrd0s1d

0xccf936cc: tag ufs, type VDIR
    usecount 1, writecount 0, refcount 2494 mountedhere 0
    flags (VV_ROOT)
    v_object 0xd18598c4 ref 0 pages 0
     lock type ufs: EXCL (count 1) by thread 0xcdff7000 (pid 19300) with
2492 pending#0 0xc0593f0d at lockmgr+0x4ed
#1 0xc06b8e0e at ffs_lock+0x76
#2 0xc0739787 at VOP_LOCK_APV+0x87
#3 0xc0601c28 at vn_lock+0xac
#4 0xc05f5d12 at vget+0xbe
#5 0xc05ed9f9 at vfs_hash_get+0x8d
#6 0xc06b7b8f at ffs_vget+0x27
#7 0xc06c1435 at ufs_root+0x19
#8 0xc05eef1c at lookup+0x7c8
#9 0xc05ee4b2 at namei+0x39a
#10 0xc0600a13 at vn_open_cred+0x5b
#11 0xc06009b6 at vn_open+0x1e
#12 0xc05fa126 at kern_open+0xb6
#13 0xc05fa03a at open+0x1a
#14 0xc0723e2b at syscall+0x25b
#15 0xc070ee0f at Xint0x80_syscall+0x1f

        ino 2, on dev amrd0s1d

0xcd00a000: tag ufs, type VDIR
    usecount 1, writecount 0, refcount 3 mountedhere 0
    flags ()
    v_object 0xce083210 ref 0 pages 0
     lock type ufs: EXCL (count 1) by thread 0xcd574d80 (pid 89705) with
1 pending#0 0xc0593f0d at lockmgr+0x4ed
#1 0xc06b8e0e at ffs_lock+0x76
#2 0xc0739787 at VOP_LOCK_APV+0x87
#3 0xc0601c28 at vn_lock+0xac
#4 0xc05f5d12 at vget+0xbe
#5 0xc05ea48e at cache_lookup+0x34a
#6 0xc05ea9c2 at vfs_cache_lookup+0x92
#7 0xc0737847 at VOP_LOOKUP_APV+0x87
#8 0xc05eebf8 at lookup+0x4a4
#9 0xc05ee4b2 at namei+0x39a
#10 0xc0600a13 at vn_open_cred+0x5b
#11 0xc06009b6 at vn_open+0x1e
#12 0xc05fa126 at kern_open+0xb6
#13 0xc05fa03a at open+0x1a
#14 0xc0723e2b at syscall+0x25b
#15 0xc070ee0f at Xint0x80_syscall+0x1f

        ino 494592, on dev amrd0s1d

0xcd23f15c: tag ufs, type VREG
    usecount 1, writecount 1, refcount 4 mountedhere 0
    flags ()
    v_object 0xcd16ec60 ref 0 pages 12
     lock type ufs: EXCL (count 1) by thread 0xccff8300 (pid 713)#0
0xc0593f0d at lockmgr+0x4ed
#1 0xc06b8e0e at ffs_lock+0x76
#2 0xc0739787 at VOP_LOCK_APV+0x87
#3 0xc0601c28 at vn_lock+0xac
#4 0xc06015c4 at vn_write+0x138
#5 0xc05c4544 at dofilewrite+0x7c
#6 0xc05c43e3 at kern_writev+0x3b
#7 0xc05c4309 at write+0x45
#8 0xc0723e2b at syscall+0x25b
#9 0xc070ee0f at Xint0x80_syscall+0x1f

        ino 494620, on dev amrd0s1d

0xd02a0984: tag ufs, type VREG
    usecount 1, writecount 1, refcount 3 mountedhere 0
    flags ()
    v_object 0xcd89fad4 ref 0 pages 3
     lock type ufs: EXCL (count 1) by thread 0xccffb000 (pid 603)#0
0xc0593f0d at lockmgr+0x4ed
#1 0xc06b8e0e at ffs_lock+0x76
#2 0xc0739787 at VOP_LOCK_APV+0x87
#3 0xc0601c28 at vn_lock+0xac
#4 0xc05fd950 at fsync+0x9c
#5 0xc0723e2b at syscall+0x25b
#6 0xc070ee0f at Xint0x80_syscall+0x1f

        ino 188453, on dev amrd0s1d

0xd0b442b8: tag ufs, type VREG
    usecount 1, writecount 1, refcount 2 mountedhere 0
    flags ()
    v_object 0xcffc88c4 ref 0 pages 1
     lock type ufs: EXCL (count 1) by thread 0xcd590480 (pid 18304)#0
0xc0593f0d at lockmgr+0x4ed
#1 0xc06b8e0e at ffs_lock+0x76
#2 0xc0739787 at VOP_LOCK_APV+0x87
#3 0xc0601c28 at vn_lock+0xac
#4 0xc05fd950 at fsync+0x9c
#5 0xc0723e2b at syscall+0x25b
#6 0xc070ee0f at Xint0x80_syscall+0x1f

        ino 424403, on dev amrd0s1d

0xd24d3c3c: tag ufs, type VREG
    usecount 1, writecount 1, refcount 2 mountedhere 0
    flags ()
    v_object 0xd13b3ce4 ref 0 pages 1
     lock type ufs: EXCL (count 1) by thread 0xcd59ec00 (pid 18735)#0
0xc0593f0d at lockmgr+0x4ed
#1 0xc06b8e0e at ffs_lock+0x76
#2 0xc0739787 at VOP_LOCK_APV+0x87
#3 0xc0601c28 at vn_lock+0xac
#4 0xc05fd950 at fsync+0x9c
#5 0xc0723e2b at syscall+0x25b
#6 0xc070ee0f at Xint0x80_syscall+0x1f

        ino 424405, on dev amrd0s1d

0xd09d6414: tag ufs, type VREG
    usecount 1, writecount 1, refcount 3 mountedhere 0
    flags ()
    v_object 0xcfd71b58 ref 0 pages 1
     lock type ufs: EXCL (count 1) by thread 0xcd6cfc00 (pid 20083)#0
0xc0593f0d at lockmgr+0x4ed
#1 0xc06b8e0e at ffs_lock+0x76
#2 0xc0739787 at VOP_LOCK_APV+0x87
#3 0xc0601c28 at vn_lock+0xac
#4 0xc05fd950 at fsync+0x9c
#5 0xc0723e2b at syscall+0x25b
#6 0xc070ee0f at Xint0x80_syscall+0x1f

        ino 424406, on dev amrd0s1d

0xce0762b8: tag ufs, type VREG
    usecount 1, writecount 1, refcount 3 mountedhere 0
    flags ()
    v_object 0xd14fcdec ref 0 pages 1
     lock type ufs: EXCL (count 1) by thread 0xcd01e000 (pid 17274)#0
0xc0593f0d at lockmgr+0x4ed
#1 0xc06b8e0e at ffs_lock+0x76
#2 0xc0739787 at VOP_LOCK_APV+0x87
#3 0xc0601c28 at vn_lock+0xac
#4 0xc05fd950 at fsync+0x9c
#5 0xc0723e2b at syscall+0x25b
#6 0xc070ee0f at Xint0x80_syscall+0x1f

        ino 424408, on dev amrd0s1d

0xce91f414: tag ufs, type VREG
    usecount 1, writecount 1, refcount 3 mountedhere 0
    flags ()
    v_object 0xd24af9cc ref 0 pages 1
     lock type ufs: EXCL (count 1) by thread 0xce03d780 (pid 20575)#0
0xc0593f0d at lockmgr+0x4ed
#1 0xc06b8e0e at ffs_lock+0x76
#2 0xc0739787 at VOP_LOCK_APV+0x87
#3 0xc0601c28 at vn_lock+0xac
#4 0xc05fd950 at fsync+0x9c
#5 0xc0723e2b at syscall+0x25b
#6 0xc070ee0f at Xint0x80_syscall+0x1f

        ino 424409, on dev amrd0s1d

0xd1701d98: tag ufs, type VREG
    usecount 1, writecount 1, refcount 3 mountedhere 0
    flags ()
    v_object 0xd0c8d39c ref 0 pages 1
     lock type ufs: EXCL (count 1) by thread 0xcd6ce180 (pid 21143)#0
0xc0593f0d at lockmgr+0x4ed
#1 0xc06b8e0e at ffs_lock+0x76
#2 0xc0739787 at VOP_LOCK_APV+0x87
#3 0xc0601c28 at vn_lock+0xac
#4 0xc05fd950 at fsync+0x9c
#5 0xc0723e2b at syscall+0x25b
#6 0xc070ee0f at Xint0x80_syscall+0x1f

        ino 424411, on dev amrd0s1d

0xcedf7828: tag ufs, type VREG
    usecount 1, writecount 1, refcount 3 mountedhere 0
    flags ()
    v_object 0xd2c37948 ref 0 pages 1
     lock type ufs: EXCL (count 1) by thread 0xcd576000 (pid 21114)#0
0xc0593f0d at lockmgr+0x4ed
#1 0xc06b8e0e at ffs_lock+0x76
#2 0xc0739787 at VOP_LOCK_APV+0x87
#3 0xc0601c28 at vn_lock+0xac
#4 0xc05fd950 at fsync+0x9c
#5 0xc0723e2b at syscall+0x25b
#6 0xc070ee0f at Xint0x80_syscall+0x1f

        ino 424412, on dev amrd0s1d

0xd1679570: tag ufs, type VREG
    usecount 1, writecount 1, refcount 69 mountedhere 0
    flags ()
    v_object 0xd1fd0318 ref 0 pages 9308
     lock type ufs: EXCL (count 1) by thread 0xcd59ea80 (pid 65735)#0
0xc0593f0d at lockmgr+0x4ed
#1 0xc06b8e0e at ffs_lock+0x76
#2 0xc0739787 at VOP_LOCK_APV+0x87
#3 0xc0601c28 at vn_lock+0xac
#4 0xc06015c4 at vn_write+0x138
#5 0xc05c4544 at dofilewrite+0x7c
#6 0xc05c43e3 at kern_writev+0x3b
#7 0xc05c4309 at write+0x45
#8 0xc0723e2b at syscall+0x25b
#9 0xc070ee0f at Xint0x80_syscall+0x1f

        ino 112037039, on dev amrd1s1d
db> alltrace

Tracing command  pid 26548 tid 105114 td 0xd41c7000
*** error reading from address b4ba72d3 ***

-----Original Message-----
From: owner-freebsd-fs at freebsd.org [mailto:owner-freebsd-fs at freebsd.org]
On Behalf Of Andrew Edwards
Sent: Tuesday, May 22, 2007 1:35 PM
To: freebsd-fs at freebsd.org; freebsd-performance at freebsd.org
Subject: RE: Ufs dead-locks on freebsd 6.2

It's been a couple of days with no response, how do I know if anyone is
looking into this problem?

> -----Original Message-----
> From: owner-freebsd-fs at freebsd.org
[mailto:owner-freebsd-fs at freebsd.org]
> On Behalf Of Andrew Edwards
> Sent: Saturday, May 19, 2007 12:34 AM
> To: freebsd-fs at freebsd.org; freebsd-performance at freebsd.org
> Subject: RE: Ufs dead-locks on freebsd 6.2
> 
> Fsck didn't help but below is a list of processes that were stuck in 
> disk.  Also, one potential problem I've hit is I have mrtg scripts
that
> get launched from cron every min.  MRTG is supposed to have a locking 
> mechanism to prevent the same script from running at the same time but
I
> suspect since the filesystem was unaccessible the cron jobs just kept 
> piling up and piling up until the system would eventually crash.  I 
> caught it when the load avg. was at 620 and killed all the cron's I 
> could.  That brought the load avg. down to under 1 however system is 
> still taking up 30% of the processor time and the disks are basically 
> idle.  I can still do an ls -l on the root of all my mounted ufs and
nfs
> filesystems but on one it's taking a considerable amount longer than
the
> rest.  This particular rsync that I was running is copying into the
/d2
> fs.
> 
> The system is still running and I can make tpc connections and 
> somethings I have running from inetd work but ssh stops responding
right
> away and I can't logon via the console.  So, I've captured a core dump

> of the system and rebooted so that I could use it again.  Are there
any
> suggestion as to what to do next?  I'm debaiting installing an adaptec

> raid and rebuilding the system to see if I get the same problem, my 
> worry is that it's the intel raid drivers that are causing this
problem
> and I have 4 other systems with the same card.
> 
> 
>   PID  TT  STAT      TIME COMMAND
>     2  ??  DL     0:04.86 [g_event]
>     3  ??  DL     2:05.90 [g_up]
>     4  ??  DL     1:07.95 [g_down]
>     5  ??  DL     0:00.00 [xpt_thrd]
>     6  ??  DL     0:00.00 [kqueue taskq]
>     7  ??  DL     0:00.00 [thread taskq]
>     8  ??  DL     0:06.96 [pagedaemon]
>     9  ??  DL     0:00.00 [vmdaemon]
>    15  ??  DL     0:22.28 [yarrow]
>    24  ??  DL     0:00.01 [usb0]
>    25  ??  DL     0:00.00 [usbtask]
>    27  ??  DL     0:00.01 [usb1]
>    29  ??  DL     0:00.01 [usb2]
>    36  ??  DL     1:28.73 [pagezero]
>    37  ??  DL     0:08.76 [bufdaemon]
>    38  ??  DL     0:00.54 [vnlru]
>    39  ??  DL     1:08.12 [syncer]
>    40  ??  DL     0:04.00 [softdepflush]
>    41  ??  DL     0:11.05 [schedcpu]
> 27182  ??  Ds     0:05.75 /usr/sbin/syslogd -l /var/run/log -l
> /var/named/var/run/log -b 127.0.0.1 -a 10.128.0.0/10
> 27471  ??  Is     0:01.10 /usr/local/bin/postmaster -D
> /usr/local/pgsql/data (postgres)
> 27594  ??  Is     0:00.04 /usr/libexec/ftpd -m -D -l -l
> 27602  ??  DL     0:00.28 [smbiod1]
> 96581  ??  D      0:00.00 cron: running job (cron)
> 96582  ??  D      0:00.00 cron: running job (cron)
> 96583  ??  D      0:00.00 cron: running job (cron)
> 96585  ??  D      0:00.00 cron: running job (cron)
> 96586  ??  D      0:00.00 cron: running job (cron)
> 96587  ??  D      0:00.00 cron: running job (cron)
> 96588  ??  D      0:00.00 cron: running job (cron)
> 96589  ??  D      0:00.00 cron: running job (cron)
> 96590  ??  D      0:00.00 cron: running job (cron)
> 96591  ??  D      0:00.00 cron: running job (cron)
> 96592  ??  D      0:00.00 cron: running job (cron)
> 96593  ??  D      0:00.00 cron: running job (cron)
> 96594  ??  D      0:00.00 cron: running job (cron)
> 96607  ??  D      0:00.00 cron: running job (cron)
> 96608  ??  D      0:00.00 cron: running job (cron)
> 96609  ??  D      0:00.00 cron: running job (cron)
> 96610  ??  D      0:00.00 cron: running job (cron)
> 96611  ??  D      0:00.00 cron: running job (cron)
> 96612  ??  D      0:00.00 cron: running job (cron)
> 96613  ??  D      0:00.00 cron: running job (cron)
> 96614  ??  D      0:00.00 cron: running job (cron)
> 96615  ??  D      0:00.00 cron: running job (cron)
> 96616  ??  D      0:00.00 cron: running job (cron)
> 96617  ??  D      0:00.00 cron: running job (cron)
> 96631  ??  D      0:00.00 cron: running job (cron)
> 96632  ??  D      0:00.00 cron: running job (cron)
> 96633  ??  D      0:00.00 cron: running job (cron)
> 96634  ??  D      0:00.00 cron: running job (cron)
> 96635  ??  D      0:00.00 cron: running job (cron)
> 96636  ??  D      0:00.00 cron: running job (cron)
> 96637  ??  D      0:00.00 cron: running job (cron)
> 96638  ??  D      0:00.00 cron: running job (cron)
> 96639  ??  D      0:00.00 cron: running job (cron)
> 96642  ??  D      0:00.00 cron: running job (cron)
> 96650  ??  D      0:00.00 cron: running job (cron)
> 29393  p0  D+    22:04.58 /usr/local/bin/rsync
> 
> real    0m0.012s
> user    0m0.000s
> sys     0m0.010s
> /
> 
> real    0m0.019s
> user    0m0.000s
> sys     0m0.016s
> /var
> 
> real    0m0.028s
> user    0m0.008s
> sys     0m0.018s
> /diskless
> 
> real    0m0.017s
> user    0m0.008s
> sys     0m0.007s
> /usr
> 
> real    0m0.016s
> user    0m0.000s
> sys     0m0.015s
> /d2
> 
> real    0m0.024s
> user    0m0.000s
> sys     0m0.023s
> /exports/home
> 
> real    0m2.559s
> user    0m0.216s
> sys     0m2.307s
> 


More information about the freebsd-fs mailing list