[Bug 229958] ctld / zfs deadlock with 'zfs rename ...'

bugzilla-noreply at freebsd.org bugzilla-noreply at freebsd.org
Thu Dec 6 08:29:54 UTC 2018


https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=229958

--- Comment #4 from Andriy Gapon <avg at FreeBSD.org> ---
(In reply to emz from comment #1)
I think that the problem is with this thread:
(kgdb) tid 108285
(kgdb) bt
#0  sched_switch (td=0xfffff80130a82000, newtd=0xfffff8048008c000,
flags=<optimized out>) at /usr/src/sys/kern/sched_ule.c:2112
#1  0xffffffff80bac7c1 in mi_switch (flags=<optimized out>, newtd=0x0) at
/usr/src/sys/kern/kern_synch.c:439
#2  0xffffffff80bfa9cc in sleepq_wait (wchan=<unavailable>, pri=<unavailable>)
at /usr/src/sys/kern/subr_sleepqueue.c:692
#3  0xffffffff80b3b6b2 in _cv_wait (cvp=0xfffff8001b148a98,
lock=0xfffff8001b148a28) at /usr/src/sys/kern/kern_condvar.c:146
#4  0xffffffff8270f575 in txg_wait_synced (dp=0xfffff8001b148800, txg=38513569)
at /usr/src/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/txg.c:659
#5  0xffffffff827448ab in zil_close (zilog=0xfffff8036f227400) at
/usr/src/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c:2975
#6  0xffffffff8277f4b5 in zvol_last_close (zv=0xfffff80535721800) at
/usr/src/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c:893
#7  0xffffffff8278089e in zvol_d_close (dev=<optimized out>, flags=131075,
fmt=<optimized out>, td=<optimized out>) at
/usr/src/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c:3138
#8  0xffffffff80a57f0a in devfs_close (ap=<optimized out>) at
/usr/src/sys/fs/devfs/devfs_vnops.c:650
#9  0xffffffff811fefc8 in VOP_CLOSE_APV (vop=<optimized out>,
a=0xfffffe010d6f9518) at vnode_if.c:534
#10 0xffffffff80c8068f in VOP_CLOSE (vp=<optimized out>, fflag=<optimized out>,
cred=<optimized out>, td=<optimized out>) at ./vnode_if.h:225
#11 vn_close1 (vp=0xfffff803ba0705a0, flags=3, file_cred=0x0,
td=0xfffff80130a82000, keep_ref=false) at /usr/src/sys/kern/vfs_vnops.c:454
#12 0xffffffff82c3b6d3 in ctl_be_block_close (be_lun=<optimized out>) at
/usr/src/sys/cam/ctl/ctl_backend_block.c:2102
#13 ctl_be_block_rm (softc=<optimized out>, req=<optimized out>) at
/usr/src/sys/cam/ctl/ctl_backend_block.c:2505
#14 ctl_be_block_ioctl (dev=<optimized out>, cmd=<optimized out>,
addr=0xfffff80057162400 "block", flag=<optimized out>, td=<optimized out>) at
/usr/src/sys/cam/ctl/ctl_backend_block.c:1794
#15 0xffffffff82c34ba6 in ctl_ioctl (dev=0xfffff804a017f200, cmd=<optimized
out>, addr=<optimized out>, flag=3, td=0xfffff80130a82000) at
/usr/src/sys/cam/ctl/ctl.c:2967
#16 0xffffffff80a5810d in devfs_ioctl (ap=0xfffffe010d6f9718) at
/usr/src/sys/fs/devfs/devfs_vnops.c:807
#17 0xffffffff811ffa6e in VOP_IOCTL_APV (vop=<optimized out>,
a=0xfffffe010d6f9718) at vnode_if.c:1067
#18 0xffffffff80c7f0e4 in VOP_IOCTL (vp=<optimized out>, command=<optimized
out>, data=<optimized out>, fflag=<unavailable>, cred=<optimized out>,
td=<optimized out>) at ./vnode_if.h:448
#19 vn_ioctl (fp=0xfffff8001bd82370, com=<optimized out>,
data=0xfffff80057162400, active_cred=0xfffff801b448e400, td=<unavailable>) at
/usr/src/sys/kern/vfs_vnops.c:1490
#20 0xffffffff80a586ef in devfs_ioctl_f (fp=<unavailable>, com=<unavailable>,
data=<unavailable>, cred=<unavailable>, td=0xfffff80130a82000) at
/usr/src/sys/fs/devfs/devfs_vnops.c:765
#21 0xffffffff80c0a36d in fo_ioctl (fp=<optimized out>, com=<optimized out>,
active_cred=<unavailable>, td=<optimized out>, data=<optimized out>) at
/usr/src/sys/sys/file.h:330
#22 kern_ioctl (td=0xfffff80130a82000, fd=3, com=3244876065,
data=<unavailable>) at /usr/src/sys/kern/sys_generic.c:800
#23 0xffffffff80c0a08e in sys_ioctl (td=0xfffff80130a82000,
uap=0xfffff80130a823c0) at /usr/src/sys/kern/sys_generic.c:712
#24 0xffffffff81079f92 in syscallenter (td=<optimized out>) at
/usr/src/sys/amd64/amd64/../../kern/subr_syscall.c:135
#25 amd64_syscall (td=0xfffff80130a82000, traced=0) at
/usr/src/sys/amd64/amd64/trap.c:1154

The problem is that it holds the spa_namespace_lock while waiting for a TXG
sync:
(kgdb) p spa_namespace_lock
$1 = {lock_object = {lo_name = 0xffffffff828037f2 <.L.str.48+1>
"spa_namespace_lock", lo_flags = 577830912, lo_data = 0, lo_witness = 0x0},
sx_lock = 18446735282727821316}
(kgdb) p/x $1
$2 = {lock_object = {lo_name = 0xffffffff828037f2, lo_flags = 0x22710000,
lo_data = 0x0, lo_witness = 0x0}, sx_lock = 0xfffff80130a82004}
(kgdb) p ((struct thread *)0xfffff80130a82000)->td_tid
$3 = 108285
(kgdb) tid 108285

Now let's take a look what the sync thread is doing:
(kgdb) fr 4
#4  0xffffffff8270f575 in txg_wait_synced (dp=0xfffff8001b148800, txg=38513569)
at /usr/src/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/txg.c:659
(kgdb) p dp->dp_tx.tx_sync_thread->td_tid
$10 = 101227
(kgdb) tid $10
(kgdb) bt
#0  sched_switch (td=0xfffff8049493c000, newtd=0xfffff8000362e580,
flags=<optimized out>) at /usr/src/sys/kern/sched_ule.c:2112
#1  0xffffffff80bac7c1 in mi_switch (flags=<optimized out>, newtd=0x0) at
/usr/src/sys/kern/kern_synch.c:439
#2  0xffffffff80bfa9cc in sleepq_wait (wchan=<unavailable>, pri=<unavailable>)
at /usr/src/sys/kern/subr_sleepqueue.c:692
#3  0xffffffff80baac3c in _sx_xlock_hard (sx=0xffffffff8282e3b8
<spa_namespace_lock>, x=<optimized out>, opts=<optimized out>) at
/usr/src/sys/kern/kern_sx.c:857
#4  0xffffffff827803ab in __sx_xlock (opts=0, line=0, sx=<optimized out>,
td=<optimized out>, file=<optimized out>) at /usr/src/sys/sys/sx.h:168
#5  zvol_rename_minors (oldname=0xfffff8024f1da800
"data/kvm/desktop/desktop-master-m40-lp at desktop3", newname=0xfffff80249a2a400
"data/kvm/desktop/desktop-master-m40-lp at desktop1")
    at /usr/src/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c:3045
#6  0xffffffff826d32bf in dsl_dataset_rename_snapshot_sync_impl
(dp=0xfffff8001b148800, hds=0xfffff80108c93000, arg=<optimized out>) at
/usr/src/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c:2533
#7  0xffffffff826cf4d4 in dsl_dataset_rename_snapshot_sync
(arg=0xfffffe010d7c6578, tx=<optimized out>) at
/usr/src/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c:2557
#8  0xffffffff826d964e in dsl_sync_task_sync (dst=0xfffffe010d7c64a8,
tx=0xfffff8068cdac300) at
/usr/src/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_synctask.c:234
#9  0xffffffff826d7ffb in dsl_pool_sync (dp=<optimized out>, txg=<optimized
out>) at /usr/src/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c:819
#10 0xffffffff82701675 in spa_sync (spa=<optimized out>, txg=<optimized out>)
at /usr/src/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c:8088
#11 0xffffffff8270efc8 in txg_sync_thread (arg=0xfffff8001b148800) at
/usr/src/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/txg.c:543
#12 0xffffffff80b61e23 in fork_exit (callout=0xffffffff8270edc0
<txg_sync_thread>, arg=0xfffff8001b148800, frame=0xfffffe00a6d5cac0) at
/usr/src/sys/kern/kern_fork.c:1057
#13 <signal handler called>

So, the sync thread is blocked on the spa_namespace_lock as suspected.

-- 
You are receiving this mail because:
You are the assignee for the bug.


More information about the freebsd-fs mailing list