[Bug 293382] Dead lock and kernel crash around closefp_impl

From: <bugzilla-noreply_at_freebsd.org>
Date: Tue, 31 Mar 2026 08:31:45 UTC
https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=293382

--- Comment #34 from Paul <devgs@ukr.net> ---
Hi,

It crashed again. Though in a different place now. But that's because the old
place (`MPASS(kn->kn_kq == kq);`) is no longer present in the latest patch.

We are currently using e220af9cee74082841db9e80cbb73f570f3a570f + the latest
patch.


Unread portion of the kernel message buffer:
panic: mutex kqueue not owned at /usr/src/sys/kern/kern_event.c:316
cpuid = 30
time = 1774943888
KDB: stack backtrace:
db_trace_self_wrapper() at db_trace_self_wrapper+0x2b/frame 0xfffffe069b454810
vpanic() at vpanic+0x136/frame 0xfffffe069b454940
panic() at panic+0x43/frame 0xfffffe069b4549a0
__mtx_assert() at __mtx_assert+0xa9/frame 0xfffffe069b4549b0
kqueue_register() at kqueue_register+0x82a/frame 0xfffffe069b454a30
kqueue_kevent() at kqueue_kevent+0xc9/frame 0xfffffe069b454c90
kern_kevent_fp() at kern_kevent_fp+0x9b/frame 0xfffffe069b454ce0
kern_kevent() at kern_kevent+0x82/frame 0xfffffe069b454d40
kern_kevent_generic() at kern_kevent_generic+0x70/frame 0xfffffe069b454da0
sys_kevent() at sys_kevent+0x61/frame 0xfffffe069b454e00
amd64_syscall() at amd64_syscall+0x169/frame 0xfffffe069b454f30
fast_syscall_common() at fast_syscall_common+0xf8/frame 0xfffffe069b454f30
--- syscall (560, FreeBSD ELF64, kevent), rip = 0x82cf723ea, rsp = 0x858888b18,
rbp = 0x858888c00 ---
KDB: enter: panic

(kgdb) bt
#0  __curthread () at /usr/src/sys/amd64/include/pcpu_aux.h:57
#1  doadump (textdump=0) at /usr/src/sys/kern/kern_shutdown.c:399
#2  0xffffffff804b60a8 in db_fncall_generic (nargs=0, args=0xfffffe069b454230,
addr=<optimized out>, rv=<optimized out>) at /usr/src/sys/ddb/db_command.c:631
#3  db_fncall (dummy1=<optimized out>, dummy2=<optimized out>,
dummy3=<optimized out>, dummy4=<optimized out>) at
/usr/src/sys/ddb/db_command.c:679
#4  0xffffffff804b5b2d in db_command (last_cmdp=<optimized out>,
cmd_table=<optimized out>, dopager=false) at /usr/src/sys/ddb/db_command.c:508
#5  0xffffffff804b5c76 in db_command_script
(command=command@entry=0xffffffff81bd7722 <db_recursion_data+18> "call
doadump") at /usr/src/sys/ddb/db_command.c:573
#6  0xffffffff804bba58 in db_script_exec
(scriptname=scriptname@entry=0xfffffe069b454400 "kdb.enter.panic",
warnifnotfound=warnifnotfound@entry=0) at /usr/src/sys/ddb/db_script.c:301
#7  0xffffffff804bb952 in db_script_kdbenter (eventname=<optimized out>) at
/usr/src/sys/ddb/db_script.c:323
#8  0xffffffff804b91e1 in db_trap (type=<optimized out>, code=<optimized out>)
at /usr/src/sys/ddb/db_main.c:266
#9  0xffffffff80c23aef in kdb_trap (type=type@entry=3, code=code@entry=0,
tf=tf@entry=0xfffffe069b454750) at /usr/src/sys/kern/subr_kdb.c:790
#10 0xffffffff811318fd in trap (frame=<optimized out>) at
/usr/src/sys/amd64/amd64/trap.c:697
#11 <signal handler called>
#12 kdb_enter (why=<optimized out>, msg=<optimized out>) at
/usr/src/sys/kern/subr_kdb.c:556
#13 0xffffffff80bd0a6b in vpanic (fmt=0xffffffff812def18 "mutex %s not owned at
%s:%d", ap=ap@entry=0xfffffe069b454980) at
/usr/src/sys/kern/kern_shutdown.c:962
#14 0xffffffff80bd08d3 in panic (fmt=0xffffffff81da22a0 <cnputs_mtx>
"\325\376!\201\377\377\377\377") at /usr/src/sys/kern/kern_shutdown.c:887
#15 0xffffffff80ba9729 in __mtx_assert (c=<optimized out>, what=what@entry=4,
file=0xffffffff8134c5ca "/usr/src/sys/kern/kern_cons.c", line=-1129344347,
line@entry=316) at /usr/src/sys/kern/kern_mutex.c:1108
#16 0xffffffff80b7004a in kn_enter_flux (kn=0xff0100303150bcd0) at
/usr/src/sys/kern/kern_event.c:316
#17 kqueue_register (kq=kq@entry=0xff010002208b5b00,
kev=kev@entry=0xfffffe069b454a40, td=td@entry=0xff010001e7d30000,
mflag=mflag@entry=2) at /usr/src/sys/kern/kern_event.c:1868
#18 0xffffffff80b71239 in kqueue_kevent (kq=kq@entry=0xff010002208b5b00,
td=td@entry=0xff010001e7d30000, nchanges=nchanges@entry=1,
nevents=nevents@entry=0, k_ops=k_ops@entry=0xfffffe069b454de0,
timeout=timeout@entry=0x0) at /usr/src/sys/kern/kern_event.c:1515
#19 0xffffffff80b710fb in kern_kevent_fp (td=td@entry=0xff010001e7d30000,
fp=<optimized out>, nchanges=nchanges@entry=1, nevents=nevents@entry=0,
k_ops=k_ops@entry=0xfffffe069b454de0, timeout=timeout@entry=0x0) at
/usr/src/sys/kern/kern_event.c:1546
#20 0xffffffff80b71012 in kern_kevent (td=td@entry=0xff010001e7d30000,
fd=<optimized out>, nchanges=1, nevents=0,
k_ops=k_ops@entry=0xfffffe069b454de0, timeout=timeout@entry=0x0) at
/usr/src/sys/kern/kern_event.c:1486
#21 0xffffffff80b70d10 in kern_kevent_generic (td=0xff010001e7d30000,
uap=uap@entry=0xfffffe069b454db0, k_ops=k_ops@entry=0xfffffe069b454de0,
struct_name=0xffffffff81308166 "kevent") at /usr/src/sys/kern/kern_event.c:1342
#22 0xffffffff80b70c01 in sys_kevent (td=0xffffffff81da22a0 <cnputs_mtx>,
uap=<optimized out>) at /usr/src/sys/kern/kern_event.c:1315
#23 0xffffffff81132739 in syscallenter (td=0xff010001e7d30000) at
/usr/src/sys/amd64/amd64/../../kern/subr_syscall.c:193
#24 amd64_syscall (td=0xff010001e7d30000, traced=0) at
/usr/src/sys/amd64/amd64/trap.c:1267
#25 <signal handler called>
#26 0x000000082cf723ea in ?? ()
Backtrace stopped: Cannot access memory at address 0x858888b18

(kgdb) fr 16
#16 0xffffffff80b7004a in kn_enter_flux (kn=0xff0100303150bcd0) at
/usr/src/sys/kern/kern_event.c:316
316             KQ_OWNED(kn->kn_kq);
(kgdb) p *((struct eknote*)kn)
$1 = {
  k = {
    kn_link = {
      sle_next = 0x0
    },
    kn_selnext = {
      sle_next = 0x0
    },
    kn_knlist = 0xff010011b7fb0838,
    kn_tqe = {
      tqe_next = 0xffffffffffffffff,
      tqe_prev = 0xffffffffffffffff
    },
    kn_kq = 0xff0100021013b000,
    kn_kevent = {
      ident = 298277,
      filter = -1,
      flags = 32,
      fflags = 0,
      data = 0,
      udata = 0x35934102e9d0,
      ext = {0, 0, 0, 0}
    },
    kn_hook = 0x0,
    kn_hookid = 0,
    kn_status = 0,
    kn_influx = 0,
    kn_sfflags = 0,
    kn_sdata = 0,
    kn_ptr = {
      p_fp = 0xff01005c34494280,
      p_proc = 0xff01005c34494280,
      p_aio = 0xff01005c34494280,
      p_lio = 0xff01005c34494280,
      p_prison = 0xff01005c34494280,
      p_v = 0xff01005c34494280
    },
    kn_fop = 0xffffffff814dd950 <soread_filtops>
  },
  c = {
    kn_link = {
      sle_next = 0x0
    },
    kn_selnext = {
      sle_next = 0x0
    },
    kn_knlist = 0x0,
    kn_tqe = {
      tqe_next = 0x0,
      tqe_prev = 0x0
    },
    kn_kq = 0x0,
    kn_kevent = {
      ident = 0,
      filter = 0,
      flags = 0,
      fflags = 0,
      data = 0,
      udata = 0x0,
      ext = {0, 0, 0, 0}
    },
    kn_hook = 0x0,
    kn_hookid = 0,
    kn_status = 0,
    kn_influx = 0,
    kn_sfflags = 0,
    kn_sdata = 0,
    kn_ptr = {
      p_fp = 0x0,
      p_proc = 0x0,
      p_aio = 0x0,
      p_lio = 0x0,
      p_prison = 0x0,
      p_v = 0x0
    },
    kn_fop = 0x0
  },
  on_kn_link = 1
}
(kgdb) fr 17
#17 kqueue_register (kq=kq@entry=0xff010002208b5b00,
kev=kev@entry=0xfffffe069b454a40, td=td@entry=0xff010001e7d30000,
mflag=mflag@entry=2) at /usr/src/sys/kern/kern_event.c:1868
1868                    kn_enter_flux(kn);
(kgdb) p *kq
$3 = {
  kq_lock = {
    lock_object = {
      lo_name = 0xffffffff813464b6 "kqueue",
      lo_flags = 21168128,
      lo_data = 0,
      lo_witness = 0xff0100804bd8db80
    },
    mtx_lock = 18374967962832666624
  },
  kq_refcnt = 1,
  kq_list = {
    tqe_next = 0xff010002039d2400,
    tqe_prev = 0xff010002039cfb28
  },
  kq_head = {
    tqh_first = 0x0,
    tqh_last = 0xff010002208b5b38
  },
  kq_count = 0,
  kq_sel = {
    si_tdlist = {
      tqh_first = 0x0,
      tqh_last = 0x0
    },
    si_note = {
      kl_list = {
        slh_first = 0x0
      },
      kl_lock = 0xffffffff80b71f90 <knlist_mtx_lock>,
      kl_unlock = 0xffffffff80b71fb0 <knlist_mtx_unlock>,
      kl_assert_lock = 0xffffffff80b71fd0 <knlist_mtx_assert_lock>,
      kl_lockarg = 0xff010002208b5b00,
      kl_autodestroy = 0
    },
    si_mtx = 0x0
  },
  kq_sigio = 0x0,
  kq_fdp = 0xfffffe0694a7bc90,
  kq_state = 0,
  kq_knlistsize = 683008,
  kq_knlist = 0xfffffe09db9fe000,
  kq_knhashmask = 0,
  kq_knhash = 0x0,
  kq_task = {
    ta_link = {
      stqe_next = 0x0
    },
    ta_pending = 0,
    ta_priority = 0 '\000',
    ta_flags = 0 '\000',
    ta_func = 0xffffffff80b747e0 <kqueue_task>,
    ta_context = 0xff010002208b5b00
  },
  kq_cred = 0xff010027f901b480,
  kq_forksrc = 0x0
}
(kgdb) p *kev
$4 = {
  ident = 297765,
  filter = -1,
  flags = 2,
  fflags = 0,
  data = 0,
  udata = 0x0,
  ext = {0, 0, 0, 0}
}

Seems like knote and kevent don't actually match one another, but both have
filter equal to -1, which seems to be sufficient to mistake them. As seen in
kqueue_register(): 

 if (kev->filter == kn->kn_filter)
     break;

Seems like the simple fix should be to improve the matching strategy and
account for -1?

-- 
You are receiving this mail because:
You are the assignee for the bug.