[Bug 228858] panic when delivering knote to a process who has opened a kqueue() is dying

bugzilla-noreply at freebsd.org bugzilla-noreply at freebsd.org
Sun Jun 10 05:45:54 UTC 2018


https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=228858

            Bug ID: 228858
           Summary: panic when delivering knote to a process who has
                    opened a kqueue() is dying
           Product: Base System
           Version: 11.0-STABLE
          Hardware: Any
                OS: Any
            Status: New
          Severity: Affects Some People
          Priority: ---
         Component: kern
          Assignee: bugs at FreeBSD.org
          Reporter: siddharthtuli at gmail.com

The following race can occur on multi-processor systems resulting in this
panic.

-- Race --

        cpu x                                                                  
                cpu y
process X dies                                                          
Process Y is sending knote() to kqueue() opened by X
kqueue_close                                                               
knote (due to exec, exit, fork etc of any process)
    kqueue_drain
        KQ_LOCK(kq) << Acquired the lock                                     
KQ_LOCK(kq) << sleep and loop in __mtx_lock_sleep 
        ….
        KQ_UNLOCK(kq)
        kqueue_destory
            mtx_destroy(&kq->kq_lock);
                          set MTX_UNOWNED|MTX_CONTESTED
                                                                               
__mtx_lock_sleep()
                                                                               
Panic because no owner and (MTX_UNOWNED|MTX_CONTESTED) are set
free(kq, M_KQUEUE)


Process X is listening to NOTE_EXIT|NOTE_EXEC|NOTE_TRACK|NOTE_TRACKERR on all
the running processes. When process X dies, kernel will close kqueue
descriptor.  The kq_lock is therefore destroyed - MTX_UNOWNED|MTX_CONTESTED.
Due to the above race, the other thread that is trying to deliver a knote() to
process X could panic in __mtx_lock_sleep() because it finds that lock is not
exclusively MTX_UNOWNED and tries to deref the owner of the lock (which is
NULL). Other api’s like knote_fork() could also run into this problem.


<2>fault virtual address        = 0x3b0
<2>fault code           = supervisor read data, page not present
<2>instruction pointer  = 0x20:0xffffffff804169f6
<2>stack pointer                = 0x28:0xfffffe011f1db9a0
<2>frame pointer                = 0x28:0xfffffe011f1dba10
<2>code segment         = base 0x0, limit 0xfffff, type 0x1b
<2>                     = DPL 0, pres 1, long 1, def32 0, gran 1
<2>processor eflags     = interrupt enabled, resume, IOPL = 0
<2>current process              = 9199 (rcp)
<2>trap number          = 12
<2>panic: page fault

(kgdb) bt
#0  __curthread () at ./machine/pcpu.h:221
#1  doadump (textdump=1) at
/.amd/svl-engdata1vs1/occamdev/build/freebsd/stable_11/20180413.165755_fbsd-builder_stable_11.0.dc8ec62/src/sys/kern/kern_shutdown.c:313
#2  0xffffffff8042b93f in kern_reboot (howto=260)
    at
/.amd/svl-engdata1vs1/occamdev/build/freebsd/stable_11/20180413.165755_fbsd-builder_stable_11.0.dc8ec62/src/sys/kern/kern_shutdown.c:381
#3  0xffffffff8042be9f in vpanic (fmt=<optimized out>, ap=<optimized out>)
    at
/.amd/svl-engdata1vs1/occamdev/build/freebsd/stable_11/20180413.165755_fbsd-builder_stable_11.0.dc8ec62/src/sys/kern/kern_shutdown.c:792
#4  0xffffffff8042bee3 in panic (fmt=<unavailable>)
    at
/.amd/svl-engdata1vs1/occamdev/build/freebsd/stable_11/20180413.165755_fbsd-builder_stable_11.0.dc8ec62/src/sys/kern/kern_shutdown.c:705
#5  0xffffffff80572b51 in trap_fatal (frame=<optimized out>, eva=<optimized
out>)
    at
/.amd/svl-engdata1vs1/occamdev/build/freebsd/stable_11/20180413.165755_fbsd-builder_stable_11.0.dc8ec62/src/sys/amd64/amd64/trap.c:841
#6  0xffffffff80572d44 in trap_pfault (frame=0xfffffe011f1db8f0,
usermode=<optimized out>)
    at
/.amd/svl-engdata1vs1/occamdev/build/freebsd/stable_11/20180413.165755_fbsd-builder_stable_11.0.dc8ec62/src/sys/amd64/amd64/trap.c:691
#7  0xffffffff805724dc in trap (frame=0xfffffe011f1db8f0)
    at
/.amd/svl-engdata1vs1/occamdev/build/freebsd/stable_11/20180413.165755_fbsd-builder_stable_11.0.dc8ec62/src/sys/amd64/amd64/trap.c:442
#8  0xffffffff8055a661 in calltrap ()
    at
/.amd/svl-engdata1vs1/occamdev/build/freebsd/stable_11/20180413.165755_fbsd-builder_stable_11.0.dc8ec62/src/sys/amd64/amd64/exception.S:236
#9  0xffffffff804169f6 in __mtx_lock_sleep (c=0xfffff8006ec00d18,
tid=18446735282413282656, opts=0, file=0x0, line=96)
    at
/.amd/svl-engdata1vs1/occamdev/build/freebsd/stable_11/20180413.165755_fbsd-builder_stable_11.0.dc8ec62/src/sys/kern/kern_mutex.c:435
#10 0xffffffff803f5175 in knote (list=0xfffff80089f6c5c0, hint=2147483648,
lockflags=<optimized out>)
    at
/.amd/svl-engdata1vs1/occamdev/build/freebsd/stable_11/20180413.165755_fbsd-builder_stable_11.0.dc8ec62/src/sys/kern/kern_event.c:2047
#11 0xffffffff803fa46a in exit1 (td=0xfffff8011de8a560, rval=<optimized out>,
signo=0)
    at
/.amd/svl-engdata1vs1/occamdev/build/freebsd/stable_11/20180413.165755_fbsd-builder_stable_11.0.dc8ec62/src/sys/kern/kern_exit.c:515
#12 0xffffffff803f9b7d in sys_sys_exit (td=0xfffff8006ec00d00, uap=<optimized
out>)
    at
/.amd/svl-engdata1vs1/occamdev/build/freebsd/stable_11/20180413.165755_fbsd-builder_stable_11.0.dc8ec62/src/sys/kern/kern_exit.c:178
#13 0xffffffff8058297e in syscallenter (td=0xfffff8011de8a560, sa=<optimized
out>)
    at
/.amd/svl-engdata1vs1/occamdev/build/freebsd/stable_11/20180413.165755_fbsd-builder_stable_11.0.dc8ec62/src/sys/amd64/ia32/../../kern/subr_syscall.c:146
#14 ia32_syscall (frame=0xfffffe011f1dbc00)
    at
/.amd/svl-engdata1vs1/occamdev/build/freebsd/stable_11/20180413.165755_fbsd-builder_stable_11.0.dc8ec62/src/sys/amd64/ia32/ia32_syscall.c:187
#15 0xffffffff8055ac45 in Xint0x80_syscall ()
    at
/.amd/svl-engdata1vs1/occamdev/build/freebsd/stable_11/20180413.165755_fbsd-builder_stable_11.0.dc8ec62/src/sys/amd64/ia32/ia32_exception.S:73
fr #16 0x00000000c83791bb in ?? ()

(kgdb) fr 9
#9  0xffffffff804169f6 in __mtx_lock_sleep (c=0xfffff8006ec00d18,
tid=18446735282413282656, opts=0, file=0x0, line=96)
    at
/.amd/svl-engdata1vs1/occamdev/build/freebsd/stable_11/20180413.165755_fbsd-builder_stable_11.0.dc8ec62/src/sys/kern/kern_mutex.c:435
 433                 v = m->mtx_lock;
 434                 if (v != MTX_UNOWNED) {
 435                         owner = (struct thread *)(v & ~MTX_FLAGMASK); 
 436                         if (TD_IS_RUNNING(owner)) { <==== Owner is NULL.
Trying to access offset 0x3b0 resulting in page fault

#define TD_IS_RUNNING(td)       ((td)->td_state == TDS_RUNNING)

(kgdb)  p &(*(struct thread*)0)->td_state
$7 = (enum {...} *) 0x3b0

(kgdb) p c
$2 = (volatile uintptr_t *) 0xfffff8006ec00d18
(kgdb) pt struct mtx
type = struct mtx {
    struct lock_object lock_object;
    volatile uintptr_t mtx_lock;
}
(kgdb) p &(*(struct mtx*)0)->mtx_lock
$4 = (volatile uintptr_t *) 0x18
(kgdb) p *(struct mtx*)(0xfffff8006ec00d18-0x18)
$6 = {
  lock_object = {
    lo_name = 0xffffffff805fdf06 "kqueue", 
    lo_flags = 21102592, 
    lo_data = 0, 
    lo_witness = 0x0
  }, 
  mtx_lock = 6 <=== MTX_UNOWNED|MTX_CONTESTED
}

===> from “info threads”
  40   Thread 100237 (PID=5863: pmond) sched_switch (td=0xfffff8006e2a5560,
newtd=<optimized out>, flags=<optimized out>)
    at
/.amd/svl-engdata1vs1/occamdev/build/freebsd/stable_11/20180413.165755_fbsd-builder_stable_11.0.dc8ec62/src/sys/kern/sched_ule.c:1979

 (kgdb)   thread 40
[Switching to thread 40 (Thread 100237)]
#0  sched_switch (td=0xfffff8006e2a5560, newtd=<optimized out>,
flags=<optimized out>)
    at
/.amd/svl-engdata1vs1/occamdev/build/freebsd/stable_11/20180413.165755_fbsd-builder_stable_11.0.dc8ec62/src/sys/kern/sched_ule.c:1979
1979   
/.amd/svl-engdata1vs1/occamdev/build/freebsd/stable_11/20180413.165755_fbsd-builder_stable_11.0.dc8ec62/src/sys/kern/sched_ule.c:
No such file or directory.
(kgdb) bt
#0  sched_switch (td=0xfffff8006e2a5560, newtd=<optimized out>,
flags=<optimized out>)
    at
/.amd/svl-engdata1vs1/occamdev/build/freebsd/stable_11/20180413.165755_fbsd-builder_stable_11.0.dc8ec62/src/sys/kern/sched_ule.c:1979
#1  0x0000000000000000 in ?? ()
(kgdb) p td->td_proc->p_satete
There is no member named p_satete.
(kgdb) p td->td_proc->p_state
$13 = PRS_ZOMBIE
(kgdb)

-- 
You are receiving this mail because:
You are the assignee for the bug.


More information about the freebsd-bugs mailing list