LOR with nfsclient "sillyrename"

Jeremiah Lott jlott at averesystems.com
Thu Jul 21 20:37:36 UTC 2011


We're seeing nfsclient deadlocks with what looks like lock order reversal after removing a "silly rename".  It is fairly rare, but we've seen it happen a few times.  I included relevant back traces from an occurrence.  From what I can see, nfs_inactive() is called with the vnode locked.  If there is a silly-rename, it will call vrele() on its parent directory, which can potentially try to lock the parent directory.  Since this is the opposite order of the lock acquisition in lookup, it can deadlock.  This happened in a FreeBSD7 build, but I looked through freebsd head and didn't see any change that addressed this.  Anyone seen this before?

   Jeremiah Lott
   Avere Systems

(kgdb) thread 132
[Switching to thread 132 (Thread 100437)]#10 0xffffffff804e794e in vget (
    vp=0xffffff019bafe5e8, flags=8194, td=0xffffff00096e16e0)
    at ../../../kern/vfs_subr.c:2070
2070            if ((error = vn_lock(vp, flags | LK_INTERLOCK, td)) != 0) {
(kgdb) bt
#0  sched_switch (td=0xffffff00096e16e0, newtd=0xffffff00021f46e0, flags=1)
    at ../../../kern/sched_4bsd.c:928
#1  0xffffffff80477a1c in mi_switch (flags=1, newtd=0x0)
    at ../../../kern/kern_synch.c:444
#2  0xffffffff804a032b in sleepq_switch (wchan=Variable "wchan" is not available.
)
    at ../../../kern/subr_sleepqueue.c:535
#3  0xffffffff804a05f1 in sleepq_catch_signals (wchan=0xffffff019bafe680)
    at ../../../kern/subr_sleepqueue.c:455
#4  0xffffffff804a0ba6 in sleepq_wait_sig (wchan=Variable "wchan" is not available.
)
    at ../../../kern/subr_sleepqueue.c:650
#5  0xffffffff80477f15 in _sleep (ident=0xffffff019bafe680, 
    lock=0xffffffff80f58020, priority=336, wmesg=0xffffffff8077df5c "nfs", 
    timo=0) at ../../../kern/kern_synch.c:224
#6  0xffffffff8045eef0 in acquire (lkpp=0xffffffffd1475420, extflags=Variable "extflags" is not available.
)
    at ../../../kern/kern_lock.c:151
#7  0xffffffff8045f228 in _lockmgr (lkp=0xffffff019bafe680, flags=Variable "flags" is not available.
)
    at ../../../kern/kern_lock.c:384
#8  0xffffffff806fefda in VOP_LOCK1_APV (vop=0xffffffff8095f360, 
    a=0xffffffffd14754c0) at vnode_if.c:1618
#9  0xffffffff804f106f in _vn_lock (vp=0xffffff019bafe5e8, flags=2, 
    td=0xffffff00096e16e0, file=0xffffffff80772758 "../../../kern/vfs_subr.c", 
    line=2070) at vnode_if.h:851
#10 0xffffffff804e794e in vget (vp=0xffffff019bafe5e8, flags=8194, 
    td=0xffffff00096e16e0) at ../../../kern/vfs_subr.c:2070
---Type <return> to continue, or q <return> to quit---
#11 0xffffffff804db125 in vfs_hash_get (mp=0xffffff00049e1000, 
    hash=2562259366, flags=Variable "flags" is not available.
) at ../../../kern/vfs_hash.c:81
#12 0xffffffff80552198 in nfs_nget (mntp=0xffffff00049e1000, 
    fhp=0xffffff0d97160578, fhsize=48, npp=0xffffffffd14757d0, flags=2)
    at ../../../nfsclient/nfs_node.c:120
#13 0xffffffff8056084b in nfs_lookup (ap=Variable "ap" is not available.
)
    at ../../../nfsclient/nfs_vnops.c:950
#14 0xffffffff806ffa84 in VOP_LOOKUP_APV (vop=0xffffffff8096a980, 
    a=0xffffffffd14758e0) at vnode_if.c:99
#15 0xffffffff804dc16d in lookup (ndp=0xffffffffd1475a00) at vnode_if.h:57
#16 0xffffffff804dce94 in namei (ndp=0xffffffffd1475a00)
    at ../../../kern/vfs_lookup.c:215
#17 0xffffffff804ebf4e in kern_unlink (td=0xffffff00096e16e0, 
    path=0x80e815178 <Address 0x80e815178 out of bounds>, 
    pathseg=UIO_USERSPACE) at ../../../kern/vfs_syscalls.c:1670
#18 0xffffffff8066d756 in syscall (frame=0xffffffffd1475c80)
    at ../../../amd64/amd64/trap.c:939
#19 0xffffffff8065409b in Xfast_syscall ()
    at ../../../amd64/amd64/exception.S:339
#20 0x00000008014524bc in ?? ()
(kgdb) up 6
#6  0xffffffff8045eef0 in acquire (lkpp=0xffffffffd1475420, extflags=Variable "extflags" is not available.
)
    at ../../../kern/kern_lock.c:151
151                     error = msleep(lkp, lkp->lk_interlock, lkp->lk_prio,
(kgdb) print *lkp  
$7 = {lk_object = {lo_name = 0xffffffff8077df5c "nfs", 
    lo_type = 0xffffffff8077df5c "nfs", lo_flags = 70844416, 
    lo_witness_data = {lod_list = {stqe_next = 0x0}, lod_witness = 0x0}}, 
  lk_interlock = 0xffffffff80f58020, lk_flags = 33816640, lk_sharecount = 0, 
  lk_waitcount = 1, lk_exclusivecount = 1, lk_prio = 336, lk_timo = 51, 
  lk_lockholder = 0xffffff0180998000, lk_newlock = 0x0}
(kgdb) print td
No symbol "td" in current context.
(kgdb) thread 134
[Switching to thread 134 (Thread 100765)]#0  sched_switch (
    td=0xffffff0180998000, newtd=0xffffff00021f4370, flags=1)
    at ../../../kern/sched_4bsd.c:928
928                     if (PMC_PROC_IS_USING_PMCS(td->td_proc))
(kgdb) bt
#0  sched_switch (td=0xffffff0180998000, newtd=0xffffff00021f4370, flags=1)
    at ../../../kern/sched_4bsd.c:928
#1  0xffffffff80477a1c in mi_switch (flags=1, newtd=0x0)
    at ../../../kern/kern_synch.c:444
#2  0xffffffff804a032b in sleepq_switch (wchan=Variable "wchan" is not available.
)
    at ../../../kern/subr_sleepqueue.c:535
#3  0xffffffff804a05f1 in sleepq_catch_signals (wchan=0xffffff0009b90a70)
    at ../../../kern/subr_sleepqueue.c:455
#4  0xffffffff804a0ba6 in sleepq_wait_sig (wchan=Variable "wchan" is not available.
)
    at ../../../kern/subr_sleepqueue.c:650
#5  0xffffffff80477f15 in _sleep (ident=0xffffff0009b90a70, 
    lock=0xffffffff80f57c60, priority=336, wmesg=0xffffffff8077df5c "nfs", 
    timo=0) at ../../../kern/kern_synch.c:224
#6  0xffffffff8045eef0 in acquire (lkpp=0xffffffffd1ade690, extflags=Variable "extflags" is not available.
)
    at ../../../kern/kern_lock.c:151
#7  0xffffffff8045f68e in _lockmgr (lkp=0xffffff0009b90a70, flags=Variable "flags" is not available.
)
    at ../../../kern/kern_lock.c:391
#8  0xffffffff806fefda in VOP_LOCK1_APV (vop=0xffffffff8095f360, 
    a=0xffffffffd1ade730) at vnode_if.c:1618
#9  0xffffffff804f106f in _vn_lock (vp=0xffffff0009b909d8, flags=2, 
    td=0xffffff0180998000, file=0xffffffff80772758 "../../../kern/vfs_subr.c", 
    line=2165) at vnode_if.h:851
#10 0xffffffff804e72a7 in vrele (vp=0xffffff0009b909d8)
    at ../../../kern/vfs_subr.c:2165
---Type <return> to continue, or q <return> to quit---
#11 0xffffffff80551f11 in nfs_inactive (ap=0xffffffffd1ade860)
    at ../../../nfsclient/nfs_node.c:216
#12 0xffffffff806fdfa8 in VOP_INACTIVE_APV (vop=Variable "vop" is not available.
) at vnode_if.c:1513
#13 0xffffffff804e26b8 in vinactive (vp=0xffffff019bafe5e8, 
    td=0xffffff0180998000) at vnode_if.h:796
#14 0xffffffff804e713e in vput (vp=0xffffff019bafe5e8)
    at ../../../kern/vfs_subr.c:2232
#15 0xffffffff804f197b in vn_close (vp=0xffffff019bafe5e8, flags=Variable "flags" is not available.
)
    at ../../../kern/vfs_vnops.c:295
#16 0xffffffff804f1a20 in vn_closefile (fp=0xffffff0180dab000, 
    td=0xffffff0180998000) at ../../../kern/vfs_vnops.c:867
#17 0xffffffff80446838 in fdrop (fp=0xffffff0180dab000, td=0xffffff0180998000)
    at file.h:299
#18 0xffffffff804480f9 in closef (fp=0xffffff0180dab000, td=0xffffff0180998000)
    at ../../../kern/kern_descrip.c:2033
#19 0xffffffff804486e0 in kern_close (td=0xffffff0180998000, fd=53)
    at ../../../kern/kern_descrip.c:1125
#20 0xffffffff8066d756 in syscall (frame=0xffffffffd1adec80)
    at ../../../amd64/amd64/trap.c:939
#21 0xffffffff8065409b in Xfast_syscall ()
    at ../../../amd64/amd64/exception.S:339
#22 0x00000008014631fc in ?? ()
Previous frame inner to this frame (corrupt stack?)
(kgdb) up 6
#6  0xffffffff8045eef0 in acquire (lkpp=0xffffffffd1ade690, extflags=Variable "extflags" is not available.
)
    at ../../../kern/kern_lock.c:151
151                     error = msleep(lkp, lkp->lk_interlock, lkp->lk_prio,
(kgdb) print *lkp
$8 = {lk_object = {lo_name = 0xffffffff8077df5c "nfs", 
    lo_type = 0xffffffff8077df5c "nfs", lo_flags = 70844416, 
    lo_witness_data = {lod_list = {stqe_next = 0x0}, lod_witness = 0x0}}, 
  lk_interlock = 0xffffffff80f57c60, lk_flags = 33947712, lk_sharecount = 0, 
  lk_waitcount = 2, lk_exclusivecount = 1, lk_prio = 336, lk_timo = 51, 
  lk_lockholder = 0xffffff00096e16e0, lk_newlock = 0x0}


More information about the freebsd-net mailing list