Reproducible panic (page fault) in poll on 6.3-RELEASE-p4

Stef Walter stef at memberwebs.com
Tue Jan 13 17:46:47 PST 2009


I have a kernel panic that I can consistently trigger. After a short
while (5 to 30 minutes) with a certain connection pattern of UDP openvpn
connections the server crashes.

I have a crash dump, and stack trace. It seems td->td_selq has been
corrupted (see below).

The only similar panic I've found is:

http://unix.derkeiler.com/Mailing-Lists/FreeBSD/current/2004-02/0867.html

Does anyone know of a patch or any other pointers in the direction of
solving this problem?

Thanks in advance,

Stef Walter


------- 8< ------- 8< --------

6.3-RELEASE-p4 FreeBSD 6.3-RELEASE-p4 #0: Thu Sep 25 20:16:32 UTC 2008


kgdb: kvm_nlist(_stopped_cpus):
kgdb: kvm_nlist(_stoppcbs):
[GDB will not be able to debug user-mode threads:
/usr/lib/libthread_db.so: Undefined symbol "ps_pglobal_lookup"]
GNU gdb 6.1.1 [FreeBSD]
Copyright 2004 Free Software Foundation, Inc.
GDB is free software, covered by the GNU General Public License, and you are
welcome to change it and/or distribute copies of it under certain
conditions.
Type "show copying" to see the conditions.
There is absolutely no warranty for GDB.  Type "show warranty" for details.
This GDB was configured as "i386-marcel-freebsd".

Unread portion of the kernel message buffer:


Fatal trap 12: page fault while in kernel mode
fault virtual address	= 0xd
fault code		= supervisor write, page not present
instruction pointer	= 0x20:0xc055d5ec
stack pointer	        = 0x28:0xeb927b9c
frame pointer	        = 0x28:0xeb927b9c
code segment		= base 0x0, limit 0xfffff, type 0x1b
			= DPL 0, pres 1, def32 1, gran 1
processor eflags	= interrupt enabled, resume, IOPL = 0
current process		= 49994 (openvpn)
trap number		= 12
panic: page fault
Uptime: 14h58m30s
Dumping 3326 MB (2 chunks)
  chunk 0: 1MB (157 pages) ... ok
  chunk 1: 3326MB (851312 pages) 3310 3294 3278 3262 3246 3230 3214 3198
3182 3166 3150 3134 3118 3102 3086 3070 3054 3038 3022 3006 2990 2974
2958 2942 2926 2910 2894 2878 2862 2846 2830 2814 2798 2782 2766 2750
2734 2718 2702 2686 2670 2654 2638 2622 2606 2590 2574 2558 2542 2526
2510 2494 2478 2462 2446 2430 2414 2398 2382 2366 2350 2334 2318 2302
2286 2270 2254 2238 2222 2206 2190 2174 2158 2142 2126 2110 2094 2078
2062 2046 2030 2014 1998 1982 1966 1950 1934 1918 1902 1886 1870 1854
1838 1822 1806 1790 1774 1758 1742 1726 1710 1694 1678 1662 1646 1630
1614 1598 1582 1566 1550 1534 1518 1502 1486 1470 1454 1438 1422 1406
1390 1374 1358 1342 1326 1310 1294 1278 1262 1246 1230 1214 1198 1182
1166 1150 1134 1118 1102 1086 1070 1054 1038 1022 1006 990 974 958 942
926 910 894 878 862 846 830 814 798 782 766 750 734 718 702 686 670 654
638 622 606 590 574 558 542 526 510 494 478 462 446 430 414 398 382 366
350 334 318 302 286 270 254 238 222 206 190 174 158 142 126 110 94 78 62
46 30 14

#0  doadump () at pcpu.h:165
165		__asm __volatile("movl %%fs:0,%0" : "=r" (td));
(kgdb) bt
#0  doadump () at pcpu.h:165
#1  0xc053952e in boot (howto=260) at /usr/src/sys/kern/kern_shutdown.c:409
#2  0xc05397c4 in panic (fmt=0xc071b0fc "%s")
    at /usr/src/sys/kern/kern_shutdown.c:565
#3  0xc06f3208 in trap_fatal (frame=0xeb927b5c, eva=13)
    at /usr/src/sys/i386/i386/trap.c:838
#4  0xc06f2f6f in trap_pfault (frame=0xeb927b5c, usermode=0, eva=13)
    at /usr/src/sys/i386/i386/trap.c:745
#5  0xc06f2bcd in trap (frame=
      {tf_fs = -342753272, tf_es = -1068498904, tf_ds = -1065746392,
tf_edi = -929257216, tf_esi = 23531, tf_ebp = -342721636, tf_isp =
-342721656, tf_ebx = 35, tf_edx = -929257216, tf_ecx = -1065693536,
tf_eax = 5, tf_trapno = 12, tf_err = 2, tf_eip = -1068116500, tf_cs =
32, tf_eflags = 590342, tf_esp = -342721316, tf_ss = -1068117214}) at
/usr/src/sys/i386/i386/trap.c:435
#6  0xc06e096a in calltrap () at /usr/src/sys/i386/i386/exception.s:139
#7  0xc055d5ec in clear_selinfo_list (td=0xc89ca900)
    at /usr/src/sys/kern/sys_generic.c:1085
#8  0xc055d322 in poll (td=0xc89ca900, uap=0xeb927d04)
    at /usr/src/sys/kern/sys_generic.c:984
#9  0xc06f351f in syscall (frame=
      {tf_fs = 59, tf_es = 59, tf_ds = 59, tf_edi = -1077943456, tf_esi
= 134902336, tf_ebp = -1077943512, tf_isp = -342721180, tf_ebx = 0,
tf_edx = 1034, tf_ecx = 1034, tf_eax = 209, tf_trapno = 22, tf_err = 2,
tf_eip = 673704855, tf_cs = ---Type <return> to continue, or q <return>
to quit---
51, tf_eflags = 662, tf_esp = -1077943556, tf_ss = 59})
    at /usr/src/sys/i386/i386/trap.c:984
#10 0xc06e09bf in Xint0x80_syscall () at
/usr/src/sys/i386/i386/exception.s:200
#11 0x00000033 in ?? ()
Previous frame inner to this frame (corrupt stack?)
(kgdb) up 7
#7  0xc055d5ec in clear_selinfo_list (td=0xc89ca900)
    at /usr/src/sys/kern/sys_generic.c:1085
1085		TAILQ_FOREACH(si, &td->td_selq, si_thrlist)
(kgdb) p td
$1 = (struct thread *) 0xc89ca900
(kgdb) p *td
$2 = {td_proc = 0xc89c8c90, td_ksegrp = 0xc82e1540, td_plist = {
    tqe_next = 0x0, tqe_prev = 0xc89c8ca0}, td_kglist = {tqe_next = 0x0,
    tqe_prev = 0xc82e154c}, td_slpq = {tqe_next = 0xc8aa9180,
    tqe_prev = 0xc89cac18}, td_lockq = {tqe_next = 0x0,
    tqe_prev = 0xebb6bbac}, td_runq = {tqe_next = 0x0, tqe_prev = 0x0},
  td_selq = {tqh_first = 0xcb11a8a8, tqh_last = 0xce64f89c},
  td_sleepqueue = 0xc82d4bc0, td_turnstile = 0xc82c4e00,
  td_umtxq = 0xc8678280, td_tid = 100069, td_flags = 16842819,
  td_inhibitors = 0, td_pflags = 0, td_dupfd = 0, td_wchan = 0x0,
  td_wmesg = 0x0, td_lastcpu = 0 '\0', td_oncpu = 0 '\0',
  td_owepreempt = 0 '\0', td_locks = 5, td_tsqueue = 0 '\0',
  td_sqqueue = 0 '\0', td_blocked = 0x0, td_ithd = 0x0, td_lockname = 0x0,
  td_contested = {lh_first = 0xc82c3d40}, td_sleeplocks = 0x0,
  td_intr_nesting_level = 0, td_pinned = 1, td_mailbox = 0x0,
  td_ucred = 0xce6e2b00, td_standin = 0x0, td_upcall = 0x0, td_sticks =
4353,
  td_uuticks = 0, td_usticks = 0, td_intrval = 0, td_oldsigmask =
{__bits = {
      0, 0, 0, 0}}, td_sigmask = {__bits = {0, 0, 0, 0}}, td_siglist = {
    __bits = {0, 0, 0, 0}}, td_generation = 43, td_sigstk = {ss_sp = 0x0,
    ss_size = 0, ss_flags = 4}, td_kflags = 0, td_xsig = 0,
  td_profil_addr = 0, td_profil_ticks = 0, td_base_pri = 180 '�',
  td_priority = 16 '\020', td_pcb = 0xeb927d90, td_state = TDS_RUNNING,
  td_retval = {0, 1034}, td_slpcallout = {c_links = {sle = {sle_next =
0x0},
      tqe = {tqe_next = 0x0, tqe_prev = 0xdc3250e0}}, c_time = 53907218,
---Type <return> to continue, or q <return> to quit---q
c_arg =Quit
(kgdb) up
#8  0xc055d322 in poll (td=0xc89ca900, uap=0xeb927d04)
    at /usr/src/sys/kern/sys_generic.c:984
984		clear_selinfo_list(td);
(kgdb) p td
$3 = (struct thread *) 0xc89ca900
(kgdb) down
#7  0xc055d5ec in clear_selinfo_list (td=0xc89ca900)
    at /usr/src/sys/kern/sys_generic.c:1085
1085		TAILQ_FOREACH(si, &td->td_selq, si_thrlist)
(kgdb) p td->td_selq
$6 = {tqh_first = 0xcb11a8a8, tqh_last = 0xce64f89c}
(kgdb) p td->td_selq->tqh_first
$7 = (struct selinfo *) 0xcb11a8a8
(kgdb) p *td->td_selq->tqh_first
$8 = {si_thrlist = {tqe_next = 0xce64f89c, tqe_prev = 0xc89ca930},
  si_thread = 0x0, si_note = {kl_list = {slh_first = 0x0},
    kl_lock = 0xc051cd38 <knlist_mtx_lock>,
    kl_unlock = 0xc051cd6c <knlist_mtx_unlock>,
    kl_locked = 0xc051cda8 <knlist_mtx_locked>, kl_lockarg = 0xcb11a8cc},
  si_flags = 0}
(kgdb) p *td->td_selq->tqh_last
$9 = (struct selinfo *) 0x5




More information about the freebsd-stable mailing list