odd KSE panic

Julian Elischer julian at elischer.org
Fri Jul 2 15:11:40 PDT 2004



On Fri, 2 Jul 2004, Andrew Gallatin wrote:

> 
> Daniel Eischen writes:
>  > On Fri, 2 Jul 2004, Andrew Gallatin wrote:
> 
>  > > The interesting thing is that there is no stack..  Just one function
>  > > from my driver (mx_free()) sitting out there by itself.  Is the kernel
>  > > somehow ripping the kernel stacks of all threads out from under them
>  > > when one thread calls exit()?  How do I take a reference so I
>  > > don't risk getting marooned without a stack?
>  > 
>  > exit() exits the process, including reaping all kernel threads.
>  > I'm not sure why one thread (worker) doing an exit() will
>  > still allow other threads to continue running.  You should
>  > be using pthread_exit() to exit from the worker thread,
>  > but that still doesn't explain why you're having the problem.
>  > 
> 
> Thanks.. I'm calling pthread_exit() now.  Still having a problem.
> 
> What can you tell about the state of threads from this ddb info:
> 
> Fatal trap 12: page fault while in kernel mode
> cpuid = 1; apic id = 01
> fault virtual address   = 0x0
> fault code              = supervisor read, page not present
> instruction pointer     = 0x8:0xc1d69193
> stack pointer           = 0x10:0x0
> frame pointer           = 0x10:0x0
> code segment            = base 0x0, limit 0xfffff, type 0x1b
>                         = DPL 0, pres 1, def32 1, gran 1
> processor eflags        = interrupt enabled, resume, IOPL = 0
> current process         = 1937 (mx_loopback_test)
> kernel: type 12 trap, code=0
> kernel trap 12 with interrupts disabled
> 
> 
> Fatal trap 12: page fault while in kernel mode
> cpuid = 1; apic id = 01
> fault virtual address   = 0x0
> fault code              = supervisor read, page not present
> instruction pointer     = 0x8:0xc0651e11
> stack pointer           = 0x10:0xfffffefc
> frame pointer           = 0x10:0xffffff1c
> code segment            = base 0x0, limit 0xfffff, type 0x1b
>                         = DPL 0, pres 1, def32 1, gran 1
> processor eflags        = resume, IOPL = 0
> current process         = 1937 (mx_loopback_test)
> kernel: type 12 trap, code=0
> Stopped at      kdb_trap+0x151: movl    0x40(%edx),%eax
> 
> db> ps
>  pid   proc     uarea   uid  ppid  pgrp  flag   stat  wmesg    wchan  cmd
>  1937 c1c5a898 e6319000 1387   643  1937 000c002 (threaded)   mx_loopback_test
>    thread 0xc21cec60 ksegrp 0xc182c580 [SLPQ kserel 0xc182c5dc][SLP]
>    thread 0xc21cedc0 ksegrp 0xc1cf1c00 [SLPQ ksesigwait 0xc1c5a998][SLP]
>    thread 0xc1b962c0 ksegrp 0xc182c580 [CPU 1][kse 0xc2161360]

When one thread calls exit() it marks the fact that the process is
exiting, and then tries to wakeup all the other threads, and then
suspends itself. The other threads, when awoken are supposed to notice
what's going on and abort whatever they are doing and when they release 
all their resources, (by unrolling back to the user boundary) they are
supposed to call thread_exit(). The last one out is supposed to 
wakeyup the original thread that called exit(), which can then proceed
on the basis that it is now the only remaining thread.

If there are threads waiting in uninterruptble sleeps then the process
as a whole can not exit until they have finished sleeping and come back
to the user boundary and called thread_exit().

None of the three threads you show is in exit, or even anything related
to exit.



> 
> db> sho thread  0xc21cec60
> Proc 0xc1c5a898    thread 0xc21cec60 ksegrp 0xc182c580 [SLPQ kserel 0xc182c5dc][SLP]
> sched_switch(c21cec60,df262f7,22c29cb3,ffc03014,c21cec60) at sched_switch+0xbc
> mi_switch(1,c052c35e,c182c5dc,c1c5a898,0) at mi_switch+0x1a2
> sleepq_switch(c182c5dc,0,0,e8474c98,c0512cef) at sleepq_switch+0x169
> sleepq_timedwait_sig(c182c5dc,0,c1c5a904,c069e850,0) at sleepq_timedwait_sig+0x17
> msleep(c182c5dc,c1c5a904,168,c069e850,ea61) at msleep+0x490
> kse_release(c21cec60,e8474d14,4,c04f102e,1) at kse_release+0x288
> syscall(2f,2f,2f,8052200,0) at syscall+0x2f0
> Xint0x80_syscall() at Xint0x80_syscall+0x1f
> --- syscall (383, FreeBSD ELF32, kse_release), eip = 0x280941a7, esp = 0x8193f90, ebp = 0x8193fcc ---
> 
> db> sho thread 0xc21cedc0 
> Proc 0xc1c5a898    thread 0xc21cedc0 ksegrp 0xc1cf1c00 [SLPQ ksesigwait 0xc1c5a998][SLP]
> sched_switch(c21cedc0,2717cc87,22c51a72,ffc00014,c21cedc0) at sched_switch+0xbc
> mi_switch(1,c052c35e,c1c5a998,c1c5a898,0) at mi_switch+0x1a2
> sleepq_switch(c1c5a998,0,0,e8477c98,c0512cef) at sleepq_switch+0x169
> sleepq_timedwait_sig(c1c5a998,0,c1c5a904,c069e845,0) at sleepq_timedwait_sig+0x17
> msleep(c1c5a998,c1c5a904,168,c069e845,7531) at msleep+0x490
> kse_release(c21cedc0,e8477d14,4,c04f102e,1) at kse_release+0x195
> syscall(2f,2f,2f,8052100,81) at syscall+0x2f0
> Xint0x80_syscall() at Xint0x80_syscall+0x1f
> --- syscall (383, FreeBSD ELF32, kse_release), eip = 0x280941a7, esp = 0xbfafef40, ebp = 0xbfafef8c ---
> 
> db> sho thread 0xc1b962c0 
> Proc 0xc1c5a898    thread 0xc1b962c0 ksegrp 0xc182c580 [CPU 1][kse 0xc2161360]
> kdb_trap(c,0,ffffffc0,1,1) at kdb_trap+0x151
> trap_fatal(ffffffc0,0,1,0,c1b962c0) at trap_fatal+0x2e3
> trap_pfault(ffffffc0,0,0,0,0) at trap_pfault+0x22d
> trap(18,10,10,0,c16c8ce0) at trap+0x2dd
> calltrap() at calltrap+0x5
> --- trap 0xc, eip = 0xc1d69193, esp = 0, ebp = 0 ---
> mx_free() at mx_free+0x1b
> db> 
> 
> 
> 
> (gdb) l * kse_release+0x288
> 0xc04f5145 is in kse_release (../../../kern/kern_kse.c:357).
> 352                             kg->kg_upsleeps++;
> 353                             td->td_kflags |= TDK_KSEREL;
> 354                             error = msleep(&kg->kg_completed, &p->p_mtx,
> 355                                     PPAUSE|PCATCH, "kserel",
> 356                                     (uap->timeout ? tvtohz(&tv) : 0));
> 357                             td->td_kflags &= ~(TDK_KSEREL | TDK_WAKEUP);
> 358                             kg->kg_upsleeps--;
> 359                     }
> 360                     PROC_UNLOCK(p);
> 361             }
> 
> 
> (gdb) l * kse_release+0x195
> 0xc04f5052 is in kse_release (../../../kern/kern_kse.c:343).
> 338                     /* UTS wants to wait for signal event */
> 339                     if (!(p->p_flag & P_SIGEVENT) && !(ku->ku_flags & KUF_DOUPCALL)) {
> 340                             td->td_kflags |= TDK_KSERELSIG;
> 341                             error = msleep(&p->p_siglist, &p->p_mtx, PPAUSE|PCATCH,
> 342                                 "ksesigwait", (uap->timeout ? tvtohz(&tv) : 0));
> 343                             td->td_kflags &= ~(TDK_KSERELSIG | TDK_WAKEUP);
> 344                     }
> 345                     p->p_flag &= ~P_SIGEVENT;
> 346                     sigset = p->p_siglist;
> 347                     PROC_UNLOCK(p);
> 
> 
> (from objdump -D -S, since gdb -k seems to no longer work..)
> 00008178 <mx_free>:
> 
> void
> mx_free(void *ptr)
> {
>     8178:       55                      push   %ebp
>     8179:       89 e5                   mov    %esp,%ebp
>     817b:       83 ec 08                sub    $0x8,%esp
>   free(ptr, M_MXBUF);
>     817e:       c7 44 24 04 20 71 02    movl   $0x27120,0x4(%esp)
>     8185:       00 
>     8186:       8b 45 08                mov    0x8(%ebp),%eax
>     8189:       89 04 24                mov    %eax,(%esp)
>     818c:       e8 fc ff ff ff          call   818d <mx_free+0x15>
> }
>     8191:       89 ec                   mov    %ebp,%esp
>     8193:       5d                      pop    %ebp
>     8194:       c3                      ret    
> 

I can't even find mx_free in my sources..
I'll cvs update and see if it's new.. if so then that's kinda
suspicious
right there..

ummmm nope.. where is mx_free?

> 
> Thanks,
> 
> Drew
> _______________________________________________
> freebsd-threads at freebsd.org mailing list
> http://lists.freebsd.org/mailman/listinfo/freebsd-threads
> To unsubscribe, send any mail to "freebsd-threads-unsubscribe at freebsd.org"
> 



More information about the freebsd-threads mailing list