vfs.zfs.vol.recursive completely broken, time to remove?

From: Andriy Gapon <avg_at_FreeBSD.org>
Date: Wed, 29 Sep 2021 06:09:58 UTC
It seems that while previously there could sometimes be a deadlock with 
vfs.zfs.vol.recursive=1, now it is guaranteed.

There was an OpenZFS change to use a pool of threads for vdev opening (to 
parallelize / speed up).  When such a thread opens a zvol it needs to acquire 
spa_namespace_lock.  But the lock is already held by a thread requesting the 
operation such as a thread doing zpool import (spa_load).
So, it's an obvious deadlock: the main thread is waiting for the workers while 
holding the namespace lock, at least one of the workers is blocked on the same lock.

(kgdb) bt
#0  sched_switch (td=td@entry=0xfffff805b04f1740, flags=<optimized out>, 
flags@entry=260) at /usr/devel/git/trant/sys/kern/sched_ule.c:2147
#1  0xffffffff80851a86 in mi_switch (flags=flags@entry=260) at 
/usr/devel/git/trant/sys/kern/kern_synch.c:542
#2  0xffffffff808966f6 in sleepq_switch (wchan=wchan@entry=0xffffffff80ffa038 
<spa_namespace_lock>, pri=pri@entry=0) at 
/usr/devel/git/trant/sys/kern/subr_sleepqueue.c:608
#3  0xffffffff8089659c in sleepq_wait (wchan=wchan@entry=0xffffffff80ffa038 
<spa_namespace_lock>, pri=pri@entry=0) at 
/usr/devel/git/trant/sys/kern/subr_sleepqueue.c:659
#4  0xffffffff8084ec98 in _sx_xlock_hard (sx=sx@entry=0xffffffff80ffa038 
<spa_namespace_lock>, x=<optimized out>, opts=<optimized out>, opts@entry=0, 
file=<optimized out>,
     file@entry=0xffffffff80c748ea 
"/usr/devel/git/trant/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c", 
line=<optimized out>, line@entry=244) at /usr/devel/git/trant/sys/kern/kern_sx.c:858
#5  0xffffffff8084e610 in _sx_xlock (sx=0xffffffff80ffa038 <spa_namespace_lock>, 
opts=<unavailable>, opts@entry=0, file=0xffffffff80c748ea 
"/usr/devel/git/trant/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c",
     line=line@entry=244) at /usr/devel/git/trant/sys/kern/kern_sx.c:332
#6  0xffffffff80393c13 in zvol_geom_open (pp=pp@entry=0xfffff8003a30fd00, 
flag=1, count=count@entry=2) at 
/usr/devel/git/trant/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c:244
#7  0xffffffff80393731 in zvol_geom_access (pp=<unavailable>, acr=<optimized 
out>, acw=0, ace=<optimized out>) at 
/usr/devel/git/trant/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c:488
#8  0xffffffff807b524e in g_access (cp=cp@entry=0xfffff8070ab01b00, 
dcr=<optimized out>, dcr@entry=1, dcw=<optimized out>, dcw@entry=0, 
dce=<optimized out>, dce@entry=1) at /usr/devel/git/trant/sys/geom/geom_subr.c:1042
#9  0xffffffff8036d53c in vdev_geom_attach (pp=pp@entry=0xfffff8003a30fd00, 
vd=vd@entry=0x0, sanity=<optimized out>, sanity@entry=1) at 
/usr/devel/git/trant/sys/contrib/openzfs/module/os/freebsd/zfs/vdev_geom.c:281
#10 0xffffffff8036f2c9 in vdev_attach_ok (vd=vd@entry=0xfffffe0281746000, 
pp=pp@entry=0xfffff8003a30fd00) at 
/usr/devel/git/trant/sys/contrib/openzfs/module/os/freebsd/zfs/vdev_geom.c:652
#11 0xffffffff8036f80e in vdev_geom_attach_by_guids (vd=<optimized out>, 
vd@entry=0xfffffe0281746000) at 
/usr/devel/git/trant/sys/contrib/openzfs/module/os/freebsd/zfs/vdev_geom.c:726
#12 0xffffffff8036efbb in vdev_geom_open_by_guids 
(vd=vd@entry=0xfffffe0281746000) at 
/usr/devel/git/trant/sys/contrib/openzfs/module/os/freebsd/zfs/vdev_geom.c:763
#13 0xffffffff8036e02d in vdev_geom_open (vd=0xfffffe0281746000, vd@entry=<error 
reading variable: value is not available>, psize=0xfffffe02e247dbd8, 
psize@entry=<error reading variable: value is not available>,
     max_psize=0xfffffe02e247dbe0, max_psize@entry=<error reading variable: 
value is not available>, logical_ashift=0xfffffe02e247dbc8, 
logical_ashift@entry=<error reading variable: value is not available>,
     physical_ashift=0xfffffe02e247dbd0, physical_ashift@entry=<error reading 
variable: value is not available>) at 
/usr/devel/git/trant/sys/contrib/openzfs/module/os/freebsd/zfs/vdev_geom.c:871
#14 0xffffffff80474d8e in vdev_open (vd=vd@entry=0xfffffe0281746000) at 
/usr/devel/git/trant/sys/contrib/openzfs/module/zfs/vdev.c:1912
#15 0xffffffff8047bb4d in vdev_open_child (arg=0xfffffe0281746000) at 
/usr/devel/git/trant/sys/contrib/openzfs/module/zfs/vdev.c:1741
#16 0xffffffff80346a7f in taskq_run (arg=0xfffff8046aadf9c0, 
pending=<unavailable>) at 
/usr/devel/git/trant/sys/contrib/openzfs/module/os/freebsd/spl/spl_taskq.c:315
#17 0xffffffff8089b2aa in taskqueue_run_locked 
(queue=queue@entry=0xfffff8005b247700) at 
/usr/devel/git/trant/sys/kern/subr_taskqueue.c:476
#18 0xffffffff8089c0c8 in taskqueue_thread_loop 
(arg=arg@entry=0xfffff804a354f330) at 
/usr/devel/git/trant/sys/kern/subr_taskqueue.c:793
#19 0xffffffff80808ad9 in fork_exit (callout=0xffffffff8089c060 
<taskqueue_thread_loop>, arg=0xfffff804a354f330, frame=0xfffffe02e247dd40) at 
/usr/devel/git/trant/sys/kern/kern_fork.c:1083
#20 <signal handler called>

(kgdb) p spa_namespace_lock
$15 = {lock_object = {lo_name = 0xffffffff80c619af "spa_namespace_lock", 
lo_flags = 577830912, lo_data = 0, lo_witness = 0x0}, sx_lock = 
18446735305924472836}
(kgdb) p/x spa_namespace_lock.sx_lock
$16 = 0xfffff8069748f004
(kgdb) p ((struct thread *)0xfffff8069748f000)->td_tid
$17 = 109639
(kgdb) tid $17

(kgdb) bt
#0  sched_switch (td=td@entry=0xfffff8069748f000, flags=<optimized out>, 
flags@entry=260) at /usr/devel/git/trant/sys/kern/sched_ule.c:2147
#1  0xffffffff80851a86 in mi_switch (flags=flags@entry=260) at 
/usr/devel/git/trant/sys/kern/kern_synch.c:542
#2  0xffffffff808966f6 in sleepq_switch (wchan=wchan@entry=0xfffffe01ea4ec460, 
pri=pri@entry=0) at /usr/devel/git/trant/sys/kern/subr_sleepqueue.c:608
#3  0xffffffff8089659c in sleepq_wait (wchan=0xfffffe01ea4ec460, pri=0) at 
/usr/devel/git/trant/sys/kern/subr_sleepqueue.c:659
#4  0xffffffff80850ed2 in _sleep (ident=<optimized out>, lock=<optimized out>, 
priority=priority@entry=0, wmesg=<optimized out>, sbt=<optimized out>, 
sbt@entry=0, pr=<optimized out>, pr@entry=0, flags=256)
     at /usr/devel/git/trant/sys/kern/kern_synch.c:221
#5  0xffffffff8089b92a in TQ_SLEEP (tq=tq@entry=0xfffff8005b247700, 
p=<unavailable>, p@entry=0xfffffe01ea4ec460, wm=<unavailable>) at 
/usr/devel/git/trant/sys/kern/subr_taskqueue.c:125
#6  0xffffffff8089ba42 in taskqueue_drain_tq_queue 
(queue=queue@entry=0xfffff8005b247700) at 
/usr/devel/git/trant/sys/kern/subr_taskqueue.c:385
#7  0xffffffff8089bc8c in taskqueue_quiesce (queue=0xfffff8005b247700) at 
/usr/devel/git/trant/sys/kern/subr_taskqueue.c:632
#8  0xffffffff80346b1c in taskq_wait (tq=tq@entry=0xfffff804a354f330) at 
/usr/devel/git/trant/sys/contrib/openzfs/module/os/freebsd/spl/spl_taskq.c:417
#9  0xffffffff80474bd5 in vdev_open_children_impl 
(vd=vd@entry=0xfffffe02b4878000, open_func=0xffffffff80474bf0 
<vdev_default_open_children_func>) at 
/usr/devel/git/trant/sys/contrib/openzfs/module/zfs/vdev.c:1800
#10 0xffffffff80474ac0 in vdev_open_children (vd=<unavailable>, 
vd@entry=0xfffffe02b4878000) at 
/usr/devel/git/trant/sys/contrib/openzfs/module/zfs/vdev.c:1811
#11 0xffffffff8048c084 in vdev_mirror_open (vd=0xfffffe02b4878000, 
vd@entry=<error reading variable: value is not available>, 
asize=0xfffffe01ea4ec578, asize@entry=<error reading variable: value is not 
available>,
     max_asize=0xfffffe01ea4ec580, max_asize@entry=<error reading variable: 
value is not available>, logical_ashift=0xfffffe01ea4ec568, 
logical_ashift@entry=<error reading variable: value is not available>,
     physical_ashift=0xfffffe01ea4ec570, physical_ashift@entry=<error reading 
variable: value is not available>) at 
/usr/devel/git/trant/sys/contrib/openzfs/module/zfs/vdev_mirror.c:396
#12 0xffffffff80474d8e in vdev_open (vd=vd@entry=0xfffffe02b4878000) at 
/usr/devel/git/trant/sys/contrib/openzfs/module/zfs/vdev.c:1912
#13 0xffffffff80474b38 in vdev_open_children_impl 
(vd=vd@entry=0xfffffe01c3add000, open_func=0xffffffff80474bf0 
<vdev_default_open_children_func>) at 
/usr/devel/git/trant/sys/contrib/openzfs/module/zfs/vdev.c:1790
#14 0xffffffff80474ac0 in vdev_open_children (vd=<unavailable>, 
vd@entry=0xfffffe01c3add000) at 
/usr/devel/git/trant/sys/contrib/openzfs/module/zfs/vdev.c:1811
#15 0xffffffff804a0b2e in vdev_root_open (vd=0xfffffe01c3add000, vd@entry=<error 
reading variable: value is not available>, asize=0xfffffe01ea4ec688, 
asize@entry=<error reading variable: value is not available>,
     max_asize=0xfffffe01ea4ec690, max_asize@entry=<error reading variable: 
value is not available>, ashift=0xfffffe01ea4ec678, ashift@entry=<error reading 
variable: value is not available>, pshift=0xfffffe01ea4ec680,
     pshift@entry=<error reading variable: value is not available>) at 
/usr/devel/git/trant/sys/contrib/openzfs/module/zfs/vdev_root.c:96
#16 0xffffffff80474d8e in vdev_open (vd=0xfffffe01c3add000) at 
/usr/devel/git/trant/sys/contrib/openzfs/module/zfs/vdev.c:1912
#17 0xffffffff8045ad6f in spa_ld_open_vdevs (spa=spa@entry=0xfffffe01c73f3000) 
at /usr/devel/git/trant/sys/contrib/openzfs/module/zfs/spa.c:3394
#18 0xffffffff8045a77a in spa_ld_trusted_config 
(spa=spa@entry=0xfffffe01c73f3000, type=<optimized out>, 
type@entry=SPA_IMPORT_EXISTING, reloading=reloading@entry=0)
     at /usr/devel/git/trant/sys/contrib/openzfs/module/zfs/spa.c:3777
#19 0xffffffff80459054 in spa_ld_mos_with_trusted_config 
(spa=spa@entry=0xfffffe01c73f3000, type=type@entry=SPA_IMPORT_EXISTING, 
update_config_cache=update_config_cache@entry=0xfffffe01ea4ec784)
     at /usr/devel/git/trant/sys/contrib/openzfs/module/zfs/spa.c:4632
#20 0xffffffff80458c24 in spa_load_impl (spa=spa@entry=0xfffffe01c73f3000, 
type=type@entry=SPA_IMPORT_EXISTING, ereport=ereport@entry=0xfffffe01ea4ec7c0) 
at /usr/devel/git/trant/sys/contrib/openzfs/module/zfs/spa.c:4678
#21 0xffffffff80451883 in spa_load (spa=spa@entry=0xfffffe01c73f3000, 
state=state@entry=SPA_LOAD_TRYIMPORT, type=<unavailable>, 
type@entry=SPA_IMPORT_EXISTING) at 
/usr/devel/git/trant/sys/contrib/openzfs/module/zfs/spa.c:2892
#22 0xffffffff8045150e in spa_tryimport (tryconfig=<optimized out>, 
tryconfig@entry=0xfffff80718db15c0) at 
/usr/devel/git/trant/sys/contrib/openzfs/module/zfs/spa.c:6187
#23 0xffffffff804c063d in zfs_ioc_pool_tryimport (zc=0xfffffe0241577000, 
zc@entry=<error reading variable: value is not available>) at 
/usr/devel/git/trant/sys/contrib/openzfs/module/zfs/zfs_ioctl.c:1655
#24 0xffffffff804baa86 in zfsdev_ioctl_common (vecnum=vecnum@entry=6, 
zc=zc@entry=0xfffffe0241577000, flag=<optimized out>, flag@entry=0) at 
/usr/devel/git/trant/sys/contrib/openzfs/module/zfs/zfs_ioctl.c:7603
#25 0xffffffff8036b02c in zfsdev_ioctl (dev=<unavailable>, dev@entry=<error 
reading variable: value is not available>, zcmd=<unavailable>, zcmd@entry=<error 
reading variable: value is not available>,
     arg=0xfffffe01ea4ecba0 "\017", arg@entry=<error reading variable: value is 
not available>, flag=<unavailable>, flag@entry=<error reading variable: value is 
not available>, td=<unavailable>,
     td@entry=<error reading variable: value is not available>) at 
/usr/devel/git/trant/sys/contrib/openzfs/module/os/freebsd/zfs/kmod_core.c:165
#26 0xffffffff807176cf in devfs_ioctl (ap=0xfffffe01ea4ec9d8) at 
/usr/devel/git/trant/sys/fs/devfs/devfs_vnops.c:944
#27 0xffffffff80bb26e2 in VOP_IOCTL_APV (vop=0xffffffff80ead008 <devfs_specops>, 
a=a@entry=0xfffffe01ea4ec9d8) at vnode_if.c:1231
#28 0xffffffff80928014 in VOP_IOCTL (vp=vp@entry=0xfffff8001de22000, 
command=<optimized out>, data=<unavailable>, data@entry=0xfffffe01ea4ecba0, 
fflag=<unavailable>, cred=<unavailable>, cred@entry=0xfffff80492e37d00,
     td=<unavailable>) at ./vnode_if.h:642
#29 0xffffffff80923330 in vn_ioctl (fp=0xfffff807307366e0, com=<unavailable>, 
data=0xfffffe01ea4ecba0, active_cred=0xfffff80492e37d00, td=<unavailable>) at 
/usr/devel/git/trant/sys/kern/vfs_vnops.c:1690
#30 0xffffffff80717bbe in devfs_ioctl_f (fp=<unavailable>, fp@entry=<error 
reading variable: value is not available>, com=<unavailable>, com@entry=<error 
reading variable: value is not available>, data=<unavailable>,
     data@entry=<error reading variable: value is not available>, 
cred=<unavailable>, cred@entry=<error reading variable: value is not available>, 
td=0xfffff8069748f000, td@entry=<error reading variable: value is not available>)
     at /usr/devel/git/trant/sys/fs/devfs/devfs_vnops.c:875
#31 0xffffffff808abc6b in fo_ioctl (fp=<unavailable>, com=<unavailable>, 
com@entry=3222821382, data=<unavailable>, data@entry=0xfffffe01ea4ecba0, 
active_cred=<unavailable>, td=<unavailable>, td@entry=0xfffff8069748f000)
     at /usr/devel/git/trant/sys/sys/file.h:356
#32 0xffffffff808abc01 in kern_ioctl (td=td@entry=0xfffff8069748f000, fd=3, 
com=com@entry=3222821382, data=data@entry=0xfffffe01ea4ecba0 "\017") at 
/usr/devel/git/trant/sys/kern/sys_generic.c:803
#33 0xffffffff808ab982 in sys_ioctl (td=0xfffff8069748f000, 
uap=0xfffff8069748f3e8) at /usr/devel/git/trant/sys/kern/sys_generic.c:711
#34 0xffffffff80b30cc9 in syscallenter (td=0xfffff8069748f000) at 
/usr/devel/git/trant/sys/amd64/amd64/../../kern/subr_syscal
-- 
Andriy Gapon