[Bug 204764] Filesystem deadlock, process in vodead state

bugzilla-noreply at freebsd.org bugzilla-noreply at freebsd.org
Fri Apr 1 12:14:10 UTC 2016


https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=204764

--- Comment #19 from Rudolf Čejka <cejkar at fit.vutbr.cz> ---
And now the debugging info. Unfortunately, we are new for this bug, so we have
the kernel without INVARIANTS and without debugging.patch yet.

We have locked bacula-fd, which is backup client with similar function to find
& stat and it occured during incremental backup operation:

procstat -kk (I can send full procstat -kk -a personally, it has 429876 bytes):

 1331 101013 bacula-fd        -                mi_switch+0xbe
sleepq_catch_signals+0xab sleepq_wait_sig+0xc _cv_wait_sig+0x17a seltdwait+0xae
kern_select+0x8fa sys_select+0x54 amd64_syscall+0x321 Xfast_syscall+0xfb 
 1331 101035 bacula-fd        -                mi_switch+0xbe
sleepq_catch_signals+0xab sleepq_timedwait_sig+0xf _sleep+0x238
umtxq_sleep+0x125 do_wait+0x387 __umtx_op_wait_uint_private+0x83
amd64_syscall+0x321 Xfast_syscall+0xfb 
 1331 102066 bacula-fd        -                mi_switch+0xbe sleepq_wait+0x3a
_sleep+0x287 vnode_create_vobject+0x100 ufs_lookup_ino+0xa0
VOP_CACHEDLOOKUP_APV+0x7a vfs_cache_lookup+0xd6 VOP_LOOKUP_APV+0x7a
lookup+0x571 namei+0x344 kern_statat_vnhook+0xae sys_lstat+0x30
amd64_syscall+0x321 Xfast_syscall+0xfb 
 1331 103370 bacula-fd        -                mi_switch+0xbe
sleepq_catch_signals+0xab sleepq_timedwait_sig+0xf _cv_timedwait_sig_sbt+0x19e
seltdwait+0xa4 kern_select+0x8fa sys_select+0x54 amd64_syscall+0x321
Xfast_syscall+0xfb 

where the important thread is TID 102066, with vnode_create_vobject.

(kgdb) info threads
...
701 Thread 102066 (PID=1331: bacula-fd)  sched_switch (td=0xfffff803ec526960,
newtd=0x0, flags=<value optimized out>) at /usr/src/sys/kern/sched_ule.c:1945
...

(kgdb) thread 701
[Switching to thread 701 (Thread 102066)]#0  sched_switch
(td=0xfffff803ec526960, newtd=0x0, flags=<value optimized out>) at
/usr/src/sys/kern/sched_ule.c:1945
1945                    cpuid = PCPU_GET(cpuid);

(kgdb) bt
#0  sched_switch (td=0xfffff803ec526960, newtd=0x0, flags=<value optimized
out>) at /usr/src/sys/kern/sched_ule.c:1945
#1  0xffffffff80580d4e in mi_switch (flags=260, newtd=0x0) at
/usr/src/sys/kern/kern_synch.c:491
#2  0xffffffff805c155a in sleepq_wait (wchan=0x0, pri=0) at
/usr/src/sys/kern/subr_sleepqueue.c:618
#3  0xffffffff80580837 in _sleep (ident=<value optimized out>, lock=<value
optimized out>, priority=<value optimized out>, wmesg=<value optimized out>,
sbt=<value optimized out>, pr=<value optimized out>, flags=<value optimized
out>)
    at /usr/src/sys/kern/kern_synch.c:255
#4  0xffffffff80766e60 in vnode_create_vobject (vp=0xfffff816fe4f3588,
isize=512, td=0xfffff803ec526960) at /usr/src/sys/vm/vnode_pager.c:120
#5  0xffffffff8072fe80 in ufs_lookup_ino (vdp=0xfffff816fe4f3588,
vpp=0xfffffe17f2e63858, cnp=0xfffffe17f2e63880, dd_ino=0x0) at
/usr/src/sys/ufs/ufs/ufs_lookup.c:259
#6  0xffffffff807f897a in VOP_CACHEDLOOKUP_APV (vop=<value optimized out>,
a=<value optimized out>) at vnode_if.c:197
#7  0xffffffff806092e6 in vfs_cache_lookup (ap=<value optimized out>) at
vnode_if.h:80
#8  0xffffffff807f883a in VOP_LOOKUP_APV (vop=<value optimized out>, a=<value
optimized out>) at vnode_if.c:129
#9  0xffffffff806111f1 in lookup (ndp=0xfffffe17f2e637f8) at vnode_if.h:54
#10 0xffffffff80610a04 in namei (ndp=0xfffffe17f2e637f8) at
/usr/src/sys/kern/vfs_lookup.c:302
#11 0xffffffff8062631e in kern_statat_vnhook (td=0xfffff803ec526960,
flag=<value optimized out>, fd=-100, path=0x43c18128 <Error reading address
0x43c18128: Bad address>, pathseg=UIO_USERSPACE, sbp=0xfffffe17f2e63910,
hook=0)
    at /usr/src/sys/kern/vfs_syscalls.c:2300
#12 0xffffffff80626420 in sys_lstat (td=0x0, uap=0xfffffe17f2e63a40) at
/usr/src/sys/kern/vfs_syscalls.c:2280
#13 0xffffffff8078acd1 in amd64_syscall (td=0xfffff803ec526960, traced=0) at
subr_syscall.c:141
#14 0xffffffff8076fffb in Xfast_syscall () at
/usr/src/sys/amd64/amd64/exception.S:396
#15 0x00000000425a731a in ?? ()
Previous frame inner to this frame (corrupt stack?)

(kgdb) frame 4
#4  0xffffffff80766e60 in vnode_create_vobject (vp=0xfffff816fe4f3588,
isize=512, td=0xfffff803ec526960) at /usr/src/sys/vm/vnode_pager.c:120
120                     VM_OBJECT_SLEEP(object, object, PDROP | PVM, "vodead",
0);

(kgdb) p *vp
$1 = {v_tag = 0xffffffff8085c651 "ufs", v_op = 0xffffffff80b0f950, v_data =
0xfffff81488311160, v_mount = 0xfffff8019a27d000, v_nmntvnodes = {tqe_next =
0xfffff816894dc3b0, tqe_prev = 0xfffff80b0d4991f8}, v_un = {vu_mount = 0x0, 
    vu_socket = 0x0, vu_cdev = 0x0, vu_fifoinfo = 0x0}, v_hashlist = {le_next =
0xfffff80824c363b0, le_prev = 0xfffff81742127b48}, v_cache_src = {lh_first =
0x0}, v_cache_dst = {tqh_first = 0xfffff80e5f6a5af0, 
    tqh_last = 0xfffff80e5f6a5b10}, v_cache_dd = 0xfffff80e5f6a5af0, v_lock =
{lock_object = {lo_name = 0xffffffff8085c651 "ufs", lo_flags = 117112832,
lo_data = 0, lo_witness = 0x0}, lk_lock = 1, lk_exslpfail = 0, lk_timo = 51, 
    lk_pri = 96}, v_interlock = {lock_object = {lo_name = 0xffffffff80870687
"vnode interlock", lo_flags = 16973824, lo_data = 0, lo_witness = 0x0},
mtx_lock = 4}, v_vnlock = 0xfffff816fe4f35f0, v_actfreelist = {
    tqe_next = 0xfffff80b0d4991d8, tqe_prev = 0xfffff816894dc470}, v_bufobj =
{bo_lock = {lock_object = {lo_name = 0xffffffff80870697 "bufobj interlock",
lo_flags = 86179840, lo_data = 0, lo_witness = 0x0}, rw_lock = 1}, 
    bo_ops = 0xffffffff80afc070, bo_object = 0xfffff802f8423d00, bo_synclist =
{le_next = 0x0, le_prev = 0x0}, bo_private = 0xfffff816fe4f3588, __bo_vnode =
0xfffff816fe4f3588, bo_clean = {bv_hd = {tqh_first = 0x0, 
        tqh_last = 0xfffff816fe4f36a8}, bv_root = {pt_root = 0}, bv_cnt = 0},
bo_dirty = {bv_hd = {tqh_first = 0x0, tqh_last = 0xfffff816fe4f36c8}, bv_root =
{pt_root = 0}, bv_cnt = 0}, bo_numoutput = 0, bo_flag = 0, bo_bsize = 32768}, 
  v_pollinfo = 0x0, v_label = 0x0, v_lockf = 0x0, v_rl = {rl_waiters =
{tqh_first = 0x0, tqh_last = 0xfffff816fe4f3710}, rl_currdep = 0x0}, v_cstart =
0, v_lasta = 0, v_lastw = 0, v_clen = 0, v_holdcnt = 8, v_usecount = 7, 
  v_iflag = 512, v_vflag = 0, v_writecount = 0, v_hash = 76467851, v_type =
VDIR}

(kgdb) p *(vp->v_bufobj.bo_object)
$2 = {lock = {lock_object = {lo_name = 0xffffffff8088aeb6 "vm object", lo_flags
= 90374144, lo_data = 0, lo_witness = 0x0}, rw_lock = 1}, object_list =
{tqe_next = 0xfffff802f8423e00, tqe_prev = 0xfffff802f8423c20}, shadow_head = {
    lh_first = 0x0}, shadow_list = {le_next = 0x0, le_prev =
0xfffff801744afa30}, memq = {tqh_first = 0xfffff8183ea423d0, tqh_last =
0xfffff8183ea423e0}, rtree = {rt_root = 18446735381746688977, rt_flags = 0
'\0'}, size = 1, 
  generation = 1, ref_count = 0, shadow_count = 0, memattr = 6 '\006', type = 2
'\002', flags = 16392, pg_color = 1608, paging_in_progress = 0,
resident_page_count = 1, backing_object = 0x0, backing_object_offset = 0, 
  pager_object_list = {tqe_next = 0x0, tqe_prev = 0x0}, rvq = {lh_first = 0x0},
cache = {rt_root = 0, rt_flags = 0 '\0'}, handle = 0xfffff816fe4f3588, un_pager
= {vnp = {vnp_size = 512, writemappings = 0}, devp = {devp_pglist = {
        tqh_first = 0x200, tqh_last = 0x0}, ops = 0x0, dev = 0x0}, sgp =
{sgp_pglist = {tqh_first = 0x200, tqh_last = 0x0}}, swp = {swp_tmpfs = 0x200,
swp_bcount = 0}}, cred = 0x0, charge = 0}

(kgdb) p *td
$3 = {td_lock = 0xffffffff80b9d4d0, td_proc = 0xfffff8025dd694f0, td_plist =
{tqe_next = 0xfffff801744ac4b0, tqe_prev = 0xfffff805374dd970}, td_runq =
{tqe_next = 0x0, tqe_prev = 0xffffffff80b728b0}, td_slpq = {
    tqe_next = 0xfffff80389d11960, tqe_prev = 0xfffff80223243e80}, td_lockq =
{tqe_next = 0x0, tqe_prev = 0xfffffe00003849b8}, td_hash = {le_next = 0x0,
le_prev = 0xfffffe00007d7590}, td_cpuset = 0xfffff801100593a8, 
  td_sel = 0xfffff806ce36eb00, td_sleepqueue = 0x0, td_turnstile =
0xfffff80174d07e40, td_rlqe = 0xfffff8017477ba28, td_umtxq =
0xfffff803a5a44f00, td_tid = 102066, td_sigqueue = {sq_signals = {__bits = {0,
0, 0, 0}}, sq_kill = {
      __bits = {0, 0, 0, 0}}, sq_list = {tqh_first = 0x0, tqh_last =
0xfffff803ec526a18}, sq_proc = 0xfffff8025dd694f0, sq_flags = 1},
td_lend_user_pri = 255 '˙', td_flags = 4, td_inhibitors = 2, td_pflags = 0,
td_dupfd = 0, 
  td_sqqueue = 0, td_wchan = 0xfffff802f8423d00, td_wmesg = 0xffffffff8088be0a
"vodead", td_lastcpu = 13 '\r', td_oncpu = 255 '˙', td_owepreempt = 0 '\0',
td_tsqueue = 0 '\0', td_locks = 26721, td_rw_rlocks = 0, td_lk_slocks = 0, 
  td_stopsched = 0, td_blocked = 0x0, td_lockname = 0x0, td_contested =
{lh_first = 0x0}, td_sleeplocks = 0x0, td_intr_nesting_level = 0, td_pinned =
0, td_ucred = 0xfffff8017438e800, td_estcpu = 0, td_slptick = 1266348642, 
  td_blktick = 0, td_swvoltick = 1266348642, td_cow = 0, td_ru = {ru_utime =
{tv_sec = 0, tv_usec = 0}, ru_stime = {tv_sec = 0, tv_usec = 0}, ru_maxrss =
7952, ru_ixrss = 21233268, ru_idrss = 1299996, ru_isrss = 13866624, 
    ru_minflt = 1, ru_majflt = 0, ru_nswap = 0, ru_inblock = 2105608,
ru_oublock = 0, ru_msgsnd = 1531599, ru_msgrcv = 100, ru_nsignals = 0, ru_nvcsw
= 1685809, ru_nivcsw = 9633}, td_rux = {rux_runtime = 2226241352945, 
    rux_uticks = 46227, rux_sticks = 62106, rux_iticks = 0, rux_uu = 365363300,
rux_su = 490865795, rux_tu = 856229096}, td_incruntime = 0, td_runtime =
2226241352945, td_pticks = 0, td_sticks = 0, td_iticks = 0, td_uticks = 0, 
  td_intrval = 0, td_oldsigmask = {__bits = {0, 0, 0, 0}}, td_generation =
1695442, td_sigstk = {ss_sp = 0x0, ss_size = 0, ss_flags = 0}, td_xsig = 0,
td_profil_addr = 0, td_profil_ticks = 0, 
  td_name = "bacula-fd\000\000\000\000\000\000\000\000\000\000", td_fpop = 0x0,
td_dbgflags = 0, td_dbgksi = {ksi_link = {tqe_next = 0x0, tqe_prev = 0x0},
ksi_info = {si_signo = 0, si_errno = 0, si_code = 0, si_pid = 0, si_uid = 0, 
      si_status = 0, si_addr = 0x0, si_value = {sival_int = 0, sival_ptr = 0x0,
sigval_int = 0, sigval_ptr = 0x0}, _reason = {_fault = {_trapno = 0}, _timer =
{_timerid = 0, _overrun = 0}, _mesgq = {_mqd = 0}, _poll = {_band = 0}, 
        __spare__ = {__spare1__ = 0, __spare2__ = {0, 0, 0, 0, 0, 0, 0}}}},
ksi_flags = 0, ksi_sigq = 0x0}, td_ng_outbound = 0, td_osd = {osd_nslots = 0,
osd_slots = 0x0, osd_next = {le_next = 0x0, le_prev = 0x0}}, 
  td_map_def_user = 0x0, td_dbg_forked = 0, td_vp_reserv = 0, td_no_sleeping =
0, td_dom_rr_idx = 0, td_sigmask = {__bits = {0, 0, 0, 0}}, td_rqindex = 48
'0', td_base_pri = 152 '\230', td_priority = 152 '\230', 
  td_pri_class = 3 '\003', td_user_pri = 183 'ˇ', td_base_user_pri = 183 'ˇ',
td_pcb = 0xfffffe17f2e63b80, td_state = TDS_INHIBITED, td_retval = {0,
1084620448}, td_slpcallout = {c_links = {le = {le_next = 0x0, 
        le_prev = 0xfffffe0000ad9288}, sle = {sle_next = 0x0}, tqe = {tqe_next
= 0x0, tqe_prev = 0xfffffe0000ad9288}}, c_time = 5430875852315327, c_precision
= 26843, c_arg = 0xfffff803ec526960, 
    c_func = 0xffffffff805c1370 <sleepq_timeout>, c_lock = 0x0, c_flags = 2,
c_iflags = 272, c_cpu = 11}, td_frame = 0xfffffe17f2e63ac0, td_kstack_obj =
0xfffff8076ec32a00, td_kstack = 18446741977545703424, td_kstack_pages = 4, 
  td_critnest = 1, td_md = {md_spinlock_count = 1, md_saved_flags = 582,
md_spurflt_addr = 1131679744}, td_sched = 0xfffff803ec526de0, td_ar = 0x0,
td_lprof = {{lh_first = 0x0}, {lh_first = 0x0}}, td_dtrace = 0x0, td_errno = 0, 
  td_vnet = 0x0, td_vnet_lpush = 0x0, td_intr_frame = 0x0, td_rfppwait_p =
0xfffff80616f0a000, td_ma = 0x0, td_ma_cnt = 0, td_su = 0x0, td_dbg_sc_code =
0, td_dbg_sc_narg = 0, td_emuldata = 0x0}

(kgdb) p *object
$4 = {lock = {lock_object = {lo_name = 0xffffffff8088aeb6 "vm object", lo_flags
= 90374144, lo_data = 0, lo_witness = 0x0}, rw_lock = 1}, object_list =
{tqe_next = 0xfffff802f8423e00, tqe_prev = 0xfffff802f8423c20}, shadow_head = {
    lh_first = 0x0}, shadow_list = {le_next = 0x0, le_prev =
0xfffff801744afa30}, memq = {tqh_first = 0xfffff8183ea423d0, tqh_last =
0xfffff8183ea423e0}, rtree = {rt_root = 18446735381746688977, rt_flags = 0
'\0'}, size = 1, 
  generation = 1, ref_count = 0, shadow_count = 0, memattr = 6 '\006', type = 2
'\002', flags = 16392, pg_color = 1608, paging_in_progress = 0,
resident_page_count = 1, backing_object = 0x0, backing_object_offset = 0, 
  pager_object_list = {tqe_next = 0x0, tqe_prev = 0x0}, rvq = {lh_first = 0x0},
cache = {rt_root = 0, rt_flags = 0 '\0'}, handle = 0xfffff816fe4f3588, un_pager
= {vnp = {vnp_size = 512, writemappings = 0}, devp = {devp_pglist = {
        tqh_first = 0x200, tqh_last = 0x0}, ops = 0x0, dev = 0x0}, sgp =
{sgp_pglist = {tqh_first = 0x200, tqh_last = 0x0}}, swp = {swp_tmpfs = 0x200,
swp_bcount = 0}}, cred = 0x0, charge = 0}

-- 
You are receiving this mail because:
You are the assignee for the bug.


More information about the freebsd-fs mailing list