6.2-STABLE panic in network multicast-address related cleanup code
Ralf S. Engelschall
rse+freebsd-stable at FreeBSD.org
Sun Sep 2 02:00:39 PDT 2007
The following kernel panic occurred twice yesterday evening under
the latest 6.2-STABLE when I ran the (multi-cast using?) Elvin event
notifcation consumer client ec(1) from libelvin 4.0.3 with "ec -e
elvin://10.0.0.1 'Timeout > 0'" against an Avis server under 10.0.0.1. A
second after stopping the ec(1) with CTRL-C the following panic occurs
more or less reproducable:
1. Kernel:
FreeBSD en1.engelschall.com 6.2-STABLE FreeBSD 6.2-STABLE #0: Thu Aug
30 11:31:47 CEST 2007 root at en1.engelschall.com:/var/obj/usr/src/sys/EN1 i386
2. Panic:
# kgdb /boot/kernel/kernel.debug vmcore.40
[GDB will not be able to debug user-mode threads: /usr/lib/libthread_db.so: Undefined symbol "ps_pglobal_lookup"]
GNU gdb 6.1.1 [FreeBSD]
Copyright 2004 Free Software Foundation, Inc.
GDB is free software, covered by the GNU General Public License, and you are
welcome to change it and/or distribute copies of it under certain conditions.
Type "show copying" to see the conditions.
There is absolutely no warranty for GDB. Type "show warranty" for details.
This GDB was configured as "i386-marcel-freebsd".
Unread portion of the kernel message buffer:
Fatal trap 12: page fault while in kernel mode
cpuid = 1; apic id = 01
fault virtual address = 0x615b2094
fault code = supervisor read, page not present
instruction pointer = 0x20:0xc05e3e6f
stack pointer = 0x28:0xeb4ecaf4
frame pointer = 0x28:0xeb4ecafc
code segment = base 0x0, limit 0xfffff, type 0x1b
= DPL 0, pres 1, def32 1, gran 1
processor eflags = interrupt enabled, resume, IOPL = 0
current process = 58238 (ec)
trap number = 12
panic: page fault
cpuid = 1
KDB: stack backtrace:
kdb_backtrace(100,c6e1f780,28,eb4ecab4,c,...) at 0xc056c82d = kdb_backtrace+0x29
panic(c07089bd,c072fc26,0,fffff,c7b6f89b,...) at 0xc0552bf0 = panic+0x114
trap_fatal(eb4ecab4,615b2094,c6e1f780,c79a1cb8,c,...) at 0xc06e6232 = trap_fatal+0x2ce
trap_pfault(eb4ecab4,0,615b2094) at 0xc06e5f3b = trap_pfault+0x1d7
trap(c6c00008,eb4e0028,28,c0786920,ce75c8a0,...) at 0xc06e5b51 = trap+0x2fd
calltrap() at 0xc06d0b1a = calltrap+0x5
--- trap 0xc, eip = 0xc05e3e6f, esp = 0xeb4ecaf4, ebp = 0xeb4ecafc ---
in_delmulti(ce75c8a0) at 0xc05e3e6f = in_delmulti+0xb
ip_freemoptions(ca45b880,c7bc3870,0,c7c912c8,eb4ecb38,...) at 0xc05ed8e9 = ip_freemoptions+0x21
in_pcbdetach(c7bc3870) at 0xc05e5d6c = in_pcbdetach+0x15c
udp_detach(c7c912c8) at 0xc05fee96 = udp_detach+0xb6
soclose(c7c912c8) at 0xc058e710 = soclose+0xa8
soo_close(c7009ca8,c6e1f780) at 0xc057db3f = soo_close+0x63
fdrop_locked(c7009ca8,c6e1f780,ca23fb00,eb4ecc00,c052e263,...) at 0xc052fd9c = fdrop_locked+0xd0
fdrop(c7009ca8,c6e1f780,87,c0564d7f,0,...) at 0xc052fcc5 = fdrop+0x41
closef(c7009ca8,c6e1f780) at 0xc052e263 = closef+0x42f
fdfree(c6e1f780) at 0xc052d253 = fdfree+0x5a3
exit1(c6e1f780,0,eb4ecd30,c06e6577,c6e1f780,...) at 0xc0536b4b = exit1+0x49b
exit1(c6e1f780,eb4ecd04) at 0xc05366b0 = exit1
syscall(3b,3b,3b,83fbfc0,83e2000,...) at 0xc06e6577 = syscall+0x2bf
Xint0x80_syscall() at 0xc06d0b6f = Xint0x80_syscall+0x1f
--- syscall (1, FreeBSD ELF32, sys_exit), eip = 0x283ef0af, esp = 0xbfbfe44c, ebp = 0xbfbfe468 ---
Uptime: 2d11h34m40s
Dumping 2045 MB (7 chunks)
chunk 0: 1MB (142 pages) ... ok
chunk 1: 2030MB (519581 pages) 2014 1998 1982 1966 1950 1934 1918 1902 1886 1870 1854 1838 1822 1806 1790 1774 1758 1742 1726 1710 1694 1678 1662 1646 1630 1614 1598 1582 1566 1550 1534 1518 1502 1486 1470 1454 1438 1422 1406 1390 1374 1358 1342 1326 1310 1294 1278 1262 1246 1230 1214 1198 1182 1166 1150 1134 1118 1102 1086 1070 1054 1038 1022 1006 990 974 958 942 926 910 894 878 862 846 830 814 798 782 766 750 734 718 702 686 670 654 638 622 606 590 574 558 542 526 510 494 478 462 446 430 414 398 382 366 350 334 318 302 286 270 254 238 222 206 190 174 158 142 126 110 94 78 62 46 30 14 ... ok
chunk 2: 15MB (3708 pages) ... ok
chunk 3: 1MB (159 pages) ... ok
chunk 4: 1MB (122 pages)
#0 doadump () at pcpu.h:165
165 pcpu.h: No such file or directory.
in pcpu.h
3. Details:
(kgdb) bt
#0 doadump () at pcpu.h:165
#1 0xc055293e in boot (howto=260) at /usr/src/sys/kern/kern_shutdown.c:409
#2 0xc0552c95 in panic (fmt=0xc07089bd "%s") at /usr/src/sys/kern/kern_shutdown.c:565
#3 0xc06e6232 in trap_fatal (frame=0xeb4ecab4, eva=1633362068) at /usr/src/sys/i386/i386/trap.c:838
#4 0xc06e5f3b in trap_pfault (frame=0xeb4ecab4, usermode=0, eva=1633362068) at /usr/src/sys/i386/i386/trap.c:745
#5 0xc06e5b51 in trap (frame=
{tf_fs = -960495608, tf_es = -347209688, tf_ds = 40, tf_edi = -1065850592, tf_esi = -831141728, tf_ebp = -347157764, tf_isp = -347157792, tf_ebx = 1633361978, tf_edx = -958269568, tf_ecx = 4, tf_eax = 1, tf_trapno = 12, tf_err = 0, tf_eip = -1067565457, tf_cs = 32, tf_eflags = 66050, tf_esp = 0, tf_ss = -901400448}) at /usr/src/sys/i386/i386/trap.c:435
#6 0xc06d0b1a in calltrap () at /usr/src/sys/i386/i386/exception.s:139
#7 0xc05e3e6f in in_delmulti (inm=0xce75c8a0) at /usr/src/sys/netinet/in.c:1063
#8 0xc05ed8e9 in ip_freemoptions (imo=0xca45b880) at /usr/src/sys/netinet/ip_output.c:2064
#9 0xc05e5d6c in in_pcbdetach (inp=0xc7bc3870) at /usr/src/sys/netinet/in_pcb.c:714
#10 0xc05fee96 in udp_detach (so=0x1) at /usr/src/sys/netinet/udp_usrreq.c:1071
#11 0xc058e710 in soclose (so=0xc7c912c8) at /usr/src/sys/kern/uipc_socket.c:459
#12 0xc057db3f in soo_close (fp=0xc7009ca8, td=0xc6e1f780) at /usr/src/sys/kern/sys_socket.c:317
#13 0xc052fd9c in fdrop_locked (fp=0xc7009ca8, td=0xc6e1f780) at file.h:296
#14 0xc052fcc5 in fdrop (fp=0xc7009ca8, td=0xc6e1f780) at /usr/src/sys/kern/kern_descrip.c:2113
#15 0xc052e263 in closef (fp=0xc7009ca8, td=0xc6e1f780) at /usr/src/sys/kern/kern_descrip.c:1933
#16 0xc052d253 in fdfree (td=0xc6e1f780) at /usr/src/sys/kern/kern_descrip.c:1651
#17 0xc0536b4b in exit1 (td=0xc6e1f780, rv=0) at /usr/src/sys/kern/kern_exit.c:273
#18 0xc05366b0 in sys_exit (td=0xc6e1f780, uap=0x1) at /usr/src/sys/kern/kern_exit.c:99
#19 0xc06e6577 in syscall (frame=
{tf_fs = 59, tf_es = 59, tf_ds = 59, tf_edi = 138395584, tf_esi = 138289152, tf_ebp = -1077943192, tf_isp = -347157148, tf_ebx = 675280672, tf_edx = 0, tf_ecx = 0, tf_eax = 1, tf_trapno = 12, tf_err = 2, tf_eip = 675213487, tf_cs = 51, tf_eflags = 658, tf_esp = -1077943220, tf_ss = 59}) at /usr/src/sys/i386/i386/trap.c:984
#20 0xc06d0b6f in Xint0x80_syscall () at /usr/src/sys/i386/i386/exception.s:200
#21 0x00000033 in ?? ()
Previous frame inner to this frame (corrupt stack?)
[...]
(kgdb) up
#7 0xc05e3e6f in in_delmulti (inm=0xce75c8a0) at /usr/src/sys/netinet/in.c:1063
1063 ifp = inm->inm_ifp;
Current language: auto; currently c
(kgdb) list
1058 in_delmulti(inm)
1059 register struct in_multi *inm;
1060 {
1061 struct ifnet *ifp;
1062
1063 ifp = inm->inm_ifp;
1064 IFF_LOCKGIANT(ifp);
1065 IN_MULTI_LOCK();
1066 in_delmulti_locked(inm, 0);
1067 IN_MULTI_UNLOCK();
(kgdb) print inm
$1 = (struct in_multi *) 0xce75c8a0
(kgdb) print *inm
$2 = {inm_link = {le_next = 0x0, le_prev = 0x11}, inm_addr = {s_addr = 1684566899}, inm_ifp = 0x615b203a, inm_ifma = 0x70656363,
inm_timer = 1566860660, inm_state = 450304, inm_rti = 0x6d7b6}
(kgdb) print inm->inm_ifp
$3 = (struct ifnet *) 0x615b203a
(kgdb) print *inm->inm_ifp
Cannot access memory at address 0x615b203a
Seems like some multi-cast related cleanups are broken. When
in_delmulti() is called, the "inm" is still valid but the "inm->inm_ifp"
already points to an invalid location. I'm not familiar with this
network code, so cannot easily determine the root of the problem.
Perhaps someone of us who knows the network stack good enough can look
at this issue and figure out the root cause (and fix it ;-).
For reproducing this one have to build
http://distfiles-msn.opendarwin.org/libelvin-4.0.3.tar.gz and run its
commands ec(1) and ep(1) against an Avis (see http://avsis.sf.net/)
server. This is a little bit nasty as one needs Java for Avis, etc. So,
sorry, I've currently no simple reproducing procedure. But perhaps the
bug can already be found with the above information.
I've also a corresponding "vmcore" (2GB) and a "kernel.debug" (16MB)
file available in case someone wants to digg deeper. Or tell me the
kgdb commands I should run against those for digging up additional
information.
--
rse at FreeBSD.org Ralf S. Engelschall
FreeBSD.org/~rse rse at engelschall.com
FreeBSD committer www.engelschall.com
More information about the freebsd-stable
mailing list