kern/92785: Using exported filesystem on OS/2 NFS client causes filesystem freeze

Ulrich Spoerlein uspoerlein at gmail.com
Fri Dec 15 04:44:49 PST 2006


Hi,

we too, ran into this problem. OS/2 Clients kill our NFS server. It is
running a RELENG_6 snapshot from 2006-11-14. rpc.lockd and rpc.statd
are running. I'll conduct a test without those two services shortly.

You can still log in the system with ssh and cruse around, but mountd
is stuck in ufs state and is no longer serving requests.

root at fs2:~# ps axl | grep ufs
    0 39370     1   0  -4  0  3052  2200 ufs    Ds    ??    0:00.01
/usr/sbin/mountd -r

db> show lockedvnods
Locked vnodes

0xc87b9414: tag ufs, type VDIR
    usecount 0, writecount 0, refcount 4 mountedhere 0
    flags (VV_ROOT)
    v_object 0xc8c43c60 ref 0 pages 1
     lock type ufs: EXCL (count 4) by thread 0xc8bac300 (pid 6926)
with 1 pending#0 0xc0668bf9 at lockmgr+0x4ed
#1 0xc078572e at ffs_lock+0x76
#2 0xc0838287 at VOP_LOCK_APV+0x87
#3 0xc06d663c at vn_lock+0xac
#4 0xc06ca4ca at vget+0xc2
#5 0xc06c24a9 at vfs_hash_get+0x8d
#6 0xc07844af at ffs_vget+0x27
#7 0xc078b253 at ufs_lookup+0xa4b
#8 0xc083641b at VOP_CACHEDLOOKUP_APV+0x9b
#9 0xc06bf499 at vfs_cache_lookup+0xb5
#10 0xc0836347 at VOP_LOOKUP_APV+0x87
#11 0xc06c3626 at lookup+0x46e
#12 0xc0734fba at nfs_namei+0x40e
#13 0xc0726d81 at nfsrv_lookup+0x1dd
#14 0xc0736765 at nfssvc_nfsd+0x3d9
#15 0xc07360b4 at nfssvc+0x18c
#16 0xc0825a07 at syscall+0x25b
#17 0xc0811f7f at Xint0x80_syscall+0x1f

        ino 2, on dev da1s2e


db> trace 6926
Tracing pid 6926 tid 100106 td 0xc8bac300
sched_switch(c8bac300,0,1) at sched_switch+0x177
mi_switch(1,0) at mi_switch+0x270
sleepq_switch(c8678200) at sleepq_switch+0xc1
sleepq_wait_sig(c8678200) at sleepq_wait_sig+0x1d
msleep(c8678200,c09c9f00,158,c088bec9,0,...) at msleep+0x26a
nfssvc_nfsd(c8bac300) at nfssvc_nfsd+0xe5
nfssvc(c8bac300,eafd4d04) at nfssvc+0x18c
syscall(3b,3b,3b,1,0,...) at syscall+0x25b
Xint0x80_syscall() at Xint0x80_syscall+0x1f
--- syscall (155, FreeBSD ELF32, nfssvc), eip = 0x280bd1b7, esp =
0xbfbfe90c, ebp = 0xbfbfe928 ---
db> trace 39370
Tracing pid 39370 tid 100102 td 0xc8bac900
sched_switch(c8bac900,0,1) at sched_switch+0x177
mi_switch(1,0) at mi_switch+0x270
sleepq_switch(c87b946c,c0973440,0,c089798c,211,...) at sleepq_switch+0xc1
sleepq_wait(c87b946c,0,c87b94dc,b7,c08929b8,...) at sleepq_wait+0x46
msleep(c87b946c,c0972500,50,c089c1c1,0,...) at msleep+0x279
acquire(eafe094c,40,60000,c8bac900,0,...) at acquire+0x76
lockmgr(c87b946c,2002,c87b94dc,c8bac900) at lockmgr+0x44e
ffs_lock(eafe09a4) at ffs_lock+0x76
VOP_LOCK_APV(c0943320,eafe09a4) at VOP_LOCK_APV+0x87
vn_lock(c87b9414,2002,c8bac900,c87b9414) at vn_lock+0xac
vget(c87b9414,2002,c8bac900) at vget+0xc2
vfs_hash_get(c86cf2e4,2,2,c8bac900,eafe0abc,0,0) at vfs_hash_get+0x8d
ffs_vget(c86cf2e4,2,2,eafe0abc) at ffs_vget+0x27
ufs_root(c86cf2e4,2,eafe0b00,c8bac900,0,...) at ufs_root+0x19
lookup(eafe0ba0) at lookup+0x743
namei(eafe0ba0) at namei+0x39a
kern_lstat(c8bac900,bfbfd2a0,0,eafe0c74) at kern_lstat+0x47
lstat(c8bac900,eafe0d04) at lstat+0x1b
syscall(3b,3b,3b,281512fb,bfbfc9f1,...) at syscall+0x25b
Xint0x80_syscall() at Xint0x80_syscall+0x1f
--- syscall (190, FreeBSD ELF32, lstat), eip = 0x2813d427, esp =
0xbfbfc5ac, ebp = 0xbfbfd268 ---

I was under the impression, that you are not allowed to sleep while
holding a lock in the FreeBSD kernel. Doesn't this also apply to the
lockmgr itself?

Upon shutting down the system, I had a panic coming in:

panic: userret: Returning with 4 locks held.
cpuid = 1
KDB: stack backtrace:
kdb_backtrace(100,c8bac300,c8bac3c8,c8bad218,c8bac300,...) at kdb_backtrace+0x29
panic(c089806f,4,0,c8bac300,c8bad218,...) at panic+0x114
userret(c8bac300,eafd4d38,0,2,0,...) at userret+0x183
syscall(3b,3b,3b,1,0,...) at syscall+0x321
Xint0x80_syscall() at Xint0x80_syscall+0x1f
--- syscall (0, FreeBSD ELF32, nosys), eip = 0x280bd1b7, esp =
0xbfbfe90c, ebp = 0xbfbfe928 ---
KDB: enter: panic
[thread pid 6926 tid 100106 ]
Stopped at      kdb_enter+0x2b: nop
db> bt
Tracing pid 6926 tid 100106 td 0xc8bac300
kdb_enter(c0894aec) at kdb_enter+0x2b
panic(c089806f,4,0,c8bac300,c8bad218,...) at panic+0x127
userret(c8bac300,eafd4d38,0,2,0,...) at userret+0x183
syscall(3b,3b,3b,1,0,...) at syscall+0x321
Xint0x80_syscall() at Xint0x80_syscall+0x1f
--- syscall (0, FreeBSD ELF32, nosys), eip = 0x280bd1b7, esp =
0xbfbfe90c, ebp = 0xbfbfe928 ---
db> show lockedvnods
Locked vnodes

0xc8761c3c: tag ufs, type VDIR
    usecount 1, writecount 0, refcount 1 mountedhere 0xc86cf2e4
    flags ()
     lock type ufs: EXCL (count 1) by thread 0xc8bac780 (pid 59934)#0
0xc0668bf9 at lockmgr+0x4ed
#1 0xc078572e at ffs_lock+0x76
#2 0xc0838287 at VOP_LOCK_APV+0x87
#3 0xc06d663c at vn_lock+0xac
#4 0xc06c5eba at dounmount+0x62
#5 0xc06c5e31 at unmount+0x1e5
#6 0xc0825a07 at syscall+0x25b
#7 0xc0811f7f at Xint0x80_syscall+0x1f

        ino 8260, on dev ufs/root

0xc87b9414: tag ufs, type VDIR
    usecount 0, writecount 0, refcount 4 mountedhere 0
    flags (VV_ROOT)
    v_object 0xc8c43c60 ref 0 pages 1
     lock type ufs: EXCL (count 4) by thread 0xc8bac300 (pid 6926)
with 1 pending#0 0xc0668bf9 at lockmgr+0x4ed
#1 0xc078572e at ffs_lock+0x76
#2 0xc0838287 at VOP_LOCK_APV+0x87
#3 0xc06d663c at vn_lock+0xac
#4 0xc06ca4ca at vget+0xc2
#5 0xc06c24a9 at vfs_hash_get+0x8d
#6 0xc07844af at ffs_vget+0x27
#7 0xc078b253 at ufs_lookup+0xa4b
#8 0xc083641b at VOP_CACHEDLOOKUP_APV+0x9b
#9 0xc06bf499 at vfs_cache_lookup+0xb5
#10 0xc0836347 at VOP_LOOKUP_APV+0x87
#11 0xc06c3626 at lookup+0x46e
#12 0xc0734fba at nfs_namei+0x40e
#13 0xc0726d81 at nfsrv_lookup+0x1dd
#14 0xc0736765 at nfssvc_nfsd+0x3d9
#15 0xc07360b4 at nfssvc+0x18c
#16 0xc0825a07 at syscall+0x25b
#17 0xc0811f7f at Xint0x80_syscall+0x1f

        ino 2, on dev da1s2e
db> ps
  pid  ppid  pgrp   uid   state   wmesg     wchan    cmd
41369 75806 93096     0  R                           sleep
59934 55518 59934     0  S+      vfslock  0xc86cf308 umount
75806 62826 93096     0  R                           sh
62826     1 93096     0  R                           sh
39370     1 39370     0  Ss      ufs      0xc87b946c mountd
 6926     1  3231     0  R       CPU 1               nfsd

So this seems to be umount and mountd tripping over each other.

Uli


More information about the freebsd-stable mailing list