[Bug 204764] Filesystem deadlock, process in vodead state

bugzilla-noreply at freebsd.org bugzilla-noreply at freebsd.org
Fri Feb 26 12:52:25 UTC 2016


https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=204764

johans <johan at 300.nl> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
                 CC|                            |johan at 300.nl

--- Comment #15 from johans <johan at 300.nl> ---
We've just ran into the same bug on one of our machines.

# procstat -kk 52827
  PID    TID COMM             TDNAME           KSTACK                       
52827 100187 ruby21           -                mi_switch+0xe1
thread_suspend_switch+0x170 thread_single+0x4e5 exit1+0xbe sigexit+0x925
postsig+0x286 ast+0x427 doreti_ast+0x1f 
52827 100389 ruby21           -                mi_switch+0xe1 sleepq_wait+0x3a
_sleep+0x287 vnode_create_vobject+0x100 zfs_freebsd_open+0xf5 VOP_OPEN_APV+0xa1
vn_open_vnode+0x234 vn_open_cred+0x33e kern_openat+0x26f amd64_syscall+0x357
Xfast_syscall+0xfb 


(kgdb) print *object
$10 = {
  lock = {
    lock_object = {
      lo_name = 0xffffffff80ff8196 "vm object", 
      lo_flags = 90374144, 
      lo_data = 0, 
      lo_witness = 0x0
    }, 
    rw_lock = 1
  }, 
  object_list = {
    tqe_next = 0xfffff80135aaa600, 
    tqe_prev = 0xfffff80135aaa420
  }, 
  shadow_head = {
    lh_first = 0x0
  }, 
  shadow_list = {
    le_next = 0x0, 
    le_prev = 0xfffff8019f85f030
  }, 
  memq = {
    tqh_first = 0x0, 
    tqh_last = 0xfffff80135aaa548
  }, 
  rtree = {
    rt_root = 0, 
    rt_flags = 0 '\0'
  }, 
  size = 0, 
  generation = 1, 
  ref_count = 0, 
  shadow_count = 0, 
  memattr = 6 '\006', 
  type = 2 '\002', 
  flags = 24584, 
  pg_color = 1569, 
  paging_in_progress = 0, 
  resident_page_count = 0, 
  backing_object = 0x0, 
  backing_object_offset = 0, 
  pager_object_list = {
    tqe_next = 0x0, 
    tqe_prev = 0x0
  }, 
  rvq = {
    lh_first = 0x0
  }, 
  cache = {
    rt_root = 0, 
    rt_flags = 0 '\0'
  }, 
  handle = 0xfffff8010bc08760, 
  un_pager = {
    vnp = {
      vnp_size = 0, 
      writemappings = 0
    }, 
    devp = {
      devp_pglist = {
        tqh_first = 0x0, 
        tqh_last = 0x0
      }, 
      ops = 0x0, 
      dev = 0x0
    }, 
    sgp = {
      sgp_pglist = {
        tqh_first = 0x0, 
        tqh_last = 0x0
      }
    }, 
    swp = {
      swp_tmpfs = 0x0, 
      swp_bcount = 0
    }
  }, 
  cred = 0x0, 
  charge = 0
}

(kgdb) p *vp
$11 = {
  v_tag = 0xffffffff81acefb6 "zfs", 
  v_op = 0xffffffff81ae12f0, 
  v_data = 0xfffff80031109450, 
  v_mount = 0xfffff8000801a330, 
  v_nmntvnodes = {
    tqe_next = 0xfffff801c912a1d8, 
    tqe_prev = 0xfffff8022bfa0780
  }, 
  v_un = {
    vu_mount = 0x0, 
    vu_socket = 0x0, 
    vu_cdev = 0x0, 
    vu_fifoinfo = 0x0
  }, 
  v_hashlist = {
    le_next = 0x0, 
    le_prev = 0x0
  }, 
  v_cache_src = {
    lh_first = 0x0
  }, 
  v_cache_dst = {
    tqh_first = 0xfffff801a260f4d0, 
    tqh_last = 0xfffff801a260f4f0
  }, 
  v_cache_dd = 0x0, 
  v_lock = {
    lock_object = {
      lo_name = 0xffffffff81acefb6 "zfs", 
      lo_flags = 117112832, 
      lo_data = 0, 
      lo_witness = 0x0
    }, 
    lk_lock = 1, 
    lk_exslpfail = 0, 
    lk_timo = 51, 
    lk_pri = 96
  }, 
  v_interlock = {
    lock_object = {
      lo_name = 0xffffffff80fd2f2d "vnode interlock", 
      lo_flags = 16973824, 
      lo_data = 0, 
      lo_witness = 0x0
    }, 
    mtx_lock = 4
  }, 
  v_vnlock = 0xfffff8010bc087c8, 
  v_actfreelist = {
    tqe_next = 0xfffff80079317000, 
    tqe_prev = 0xfffff80092a9e820
  }, 
  v_bufobj = {
    bo_lock = {
      lock_object = {
        lo_name = 0xffffffff80fd2f3d "bufobj interlock", 
        lo_flags = 86179840, 
        lo_data = 0, 
        lo_witness = 0x0
      }, 
      rw_lock = 1
    }, 
    bo_ops = 0xffffffff8149bff0, 
    bo_object = 0xfffff80135aaa500, 
    bo_synclist = {
      le_next = 0x0, 
      le_prev = 0x0
    }, 
    bo_private = 0xfffff8010bc08760, 
    __bo_vnode = 0xfffff8010bc08760, 
    bo_clean = {
      bv_hd = {
        tqh_first = 0x0, 
        tqh_last = 0xfffff8010bc08880
      }, 
      bv_root = {
        pt_root = 0
      }, 
      bv_cnt = 0
    }, 
    bo_dirty = {
      bv_hd = {
        tqh_first = 0x0, 
        tqh_last = 0xfffff8010bc088a0
      }, 
      bv_root = {
        pt_root = 0
      }, 
      bv_cnt = 0
    }, 
    bo_numoutput = 0, 
    bo_flag = 0, 
    bo_bsize = 131072
  }, 
  v_pollinfo = 0x0, 
  v_label = 0x0, 
  v_lockf = 0x0, 
  v_rl = {
    rl_waiters = {
      tqh_first = 0x0, 
      tqh_last = 0xfffff8010bc088e8
    }, 
    rl_currdep = 0x0
  }, 
  v_cstart = 0, 
  v_lasta = 0, 
  v_lastw = 0, 
  v_clen = 0, 
  v_holdcnt = 2, 
  v_usecount = 2, 
  v_iflag = 512, 
  v_vflag = 0, 
  v_writecount = 0, 
  v_hash = 17547399, 
  v_type = VREG
}

(kgdb) print vp->v_cache_dst->tqh_first->nc_name
$14 = 0xfffff801a260f512 "puppet20160226-52827-x6ea8y"

The file where things go wrong for us is: /tmp/puppet20160226-52827-x6ea8y

Performing a cat on this file also hangs on vodead.

Any relevant waiting channels:

# ps -o lwp -laxwwwSH | awk '{ if ($10 !~
"^(-|ttyin|lockf|wait|select|kqread|tx\->)") print; }' 
   LWP   UID   PID  PPID CPU  PRI  NI     VSZ    RSS MWCHAN   STAT TT        
TIME COMMAND
100000     0     0     0   0  -16   0       0 285088 swapin   DLs   -     
0:41.09 [kernel/swapper]
100048     0     0     0   0  -92   0       0 285088 vtbslp   DLs   -     
0:00.00 [kernel/virtio_ballo]
100018     0     2     0   0  -16   0       0     16 crypto_w DL    -     
0:00.00 [crypto]
100019     0     3     0   0  -16   0       0     16 crypto_r DL    -     
0:00.00 [crypto returns]
100057     0     5     0   0   20   0       0   6176 arc_recl DL    -    
13:07.13 [zfskern/arc_reclaim]
100058     0     5     0   0   -8   0       0   6176 l2arc_fe DL    -     
0:40.44 [zfskern/l2arc_feed_]
102486     0     5     0   0   -8   0       0   6176 zio->io_ DL    -     
8:24.66 [zfskern/txg_thread_]
100062     0     7     0   0  -16   0       0     32 psleep   DL    -    
17:00.73 [pagedaemon/pagedaem]
100068     0     7     0   0  -16   0       0     32 umarcl   DL    -     
0:00.00 [pagedaemon/uma]
100063     0     8     0   0  -16   0       0     16 psleep   DL    -     
0:00.00 [vmdaemon]
100064     0     9     0   0  155   0       0     16 pgzero   DL    -     
0:00.17 [pagezero]
100001     0    10     0   0  -16   0       0     16 audit_wo DL    -    
14:36.35 [audit]
100065     0    16     0   0  -16   0       0     16 psleep   DL    -     
0:51.50 [bufdaemon]
100066     0    17     0   0  -16   0       0     16 vlruwt   DL    -     
1:27.70 [vnlru]
100067     0    18     0   0   16   0       0     16 syncer   DL    -    
63:11.82 [syncer]
100406 65534   921     1   0   52   0   71104  33604 uwait    Is    -     
0:00.00 /usr/local/bin/memcached -l 127.0.0.1 -m 1024 -p 11211 -C -d -u nobody
-P /var/run/memcached/memcached.pid
100387     0   958     1   0   23   0   16612   2224 nanslp   Is    -    
97:24.14 /usr/sbin/cron -s
100394     0  1194     0   0  -16   0       0     16 pftm     DL    -    
30:48.46 [pf purge]
100388     0  1201     1   0   52   0   14700   2256 sbwait   Is    -     
0:00.00 pflogd: [priv] (pflogd)
100375    64  1202  1201   0   20   0   14700   2312 bpf      S     -     
5:36.59 pflogd: [running] -s 116 -i pflog0 -f /dev/null (pflogd)
100389     0 52827     1   0   52   0  213452 112968 vodead   T     -     
0:08.69 ruby21: puppet agent: applying configuration (ruby21)
100400     0 55604     0   0  -16   0       0     16 ftcl     DL    -     
0:00.00 [ftcleanup]
101076     0 58064     1   0   20   0   16588   2168 auditd   Is    -     
0:07.02 /usr/sbin/auditd
101349     0 37651 37601   0   20   0   12356   2132 vodead   D+    0-    
0:00.00 cat puppet20160226-52827-x6ea8y

This is the first time we're seeing this.

-- 
You are receiving this mail because:
You are the assignee for the bug.


More information about the freebsd-fs mailing list