ip forwarding panic

Tue May 27 16:15:07 UTC 2008

Hi,

this is from a soekris box w/o disk so I cannot take a core.
The kernel on the soekris is 7-STABLE from May 21.

I am doing IP forwarding through the soekris with two connected
subnets and no other routes.

Using the following on a machine left and right of the soekris:
./netreceive 4500
./netblast 192.0.2.2 4500 56 60

it takes up to 3 seconds and 3k-6k packets arriving on the receiver side
to get any of these *booms*

------------------------------------------------------------------------

Fatal trap 12: page fault while in kernel mode
fault virtual address   = 0xc
fault code              = supervisor read, page not present
instruction pointer     = 0x20:0xc0550ea8
stack pointer           = 0x28:0xc9c02b2c
frame pointer           = 0x28:0xc9c02b48
code segment            = base 0x0, limit 0xfffff, type 0x1b
                         = DPL 0, pres 1, def32 1, gran 1
processor eflags        = interrupt enabled, resume, IOPL = 0
current process         = 19 (irq10: sis0 sis1+)
trap number             = 12
panic: page fault
KDB: stack backtrace:
db_trace_self_wrapper(c0771c93,c9c029cc,c0504a2a,c076fe5f,c07ce140,...) at db_trace_self_wrapper+0x26
kdb_backtrace(c076fe5f,c07ce140,c07637f5,c9c029d8,c9c029d8,...) at kdb_backtrace+0x29
panic(c07637f5,c078db22,c51594d0,1,1,...) at panic+0xaa
trap_fatal(c07cc200,0,1,0,62,...) at trap_fatal+0x303
trap_pfault(c5167c00,c52b8600,c5285ac8,c519a04c,c51592ac,...) at trap_pfault+0x270
trap(c9c02aec) at trap+0x3ea
calltrap() at calltrap+0x6
--- trap 0xc, eip = 0xc0550ea8, esp = 0xc9c02b2c, ebp = 0xc9c02b48 ---
m_copydata(c52b8700,0,cc,c5209e34,0,...) at m_copydata+0x38
ip_forward(c52b8700,0,c50fdc00,0,c51420a8,...) at ip_forward+0x1c8
ip_input(c52b8700,0,800,c5165000,800,...) at ip_input+0x67c
netisr_dispatch(2,c52b8700,10,3,0,...) at netisr_dispatch+0x55
ether_demux(c5165000,c52b8700,3,0,3,...) at ether_demux+0x1e0
ether_input(c5165000,c52b8700,28,28,c07c70f8,...) at ether_input+0x343
sis_rxeof(c9c00020,246,c07e8829,c9c02cbc,c07474f7,...) at sis_rxeof+0x1a3
sis_intr(c515a800,0,c076d632,46b,aa55aa55,...) at sis_intr+0x10e
ithread_loop(c5169910,c9c02d38,aa55aa55,aa55aa55,aa55aa55,...) at ithread_loop+0x1ab
fork_exit(c04e4990,c5169910,c9c02d38) at fork_exit+0x96
fork_trampoline() at fork_trampoline+0x8
--- trap 0, eip = 0, esp = 0xc9c02d70, ebp = 0 ---
Uptime: 9m29s
Cannot dump. No dump device defined.
Automatic reboot in 15 seconds - press a key on the console to abort

telnet> send brk
KDB: enter: Line break on console
[thread pid 19 tid 100017 ]
Stopped at      kdb_enter_why+0x3a:     movl    $0,kdb_why
------------------------------------------------------------------------

(gdb) l *ip_forward+0x1c8
0xc05c6318 is in ip_forward (/usr/src/RELENG_7_soekris/src/sys/netinet/ip_input.c:1314).
1309            }
1310
1311    #ifdef IPSTEALTH
1312            if (!ipstealth) {
1313    #endif
1314                    ip->ip_ttl -= IPTTLDEC;
1315    #ifdef IPSTEALTH
1316            }
1317    #endif
1318

0xc05c7adc is in ip_input (/usr/src/RELENG_7_soekris/src/sys/netinet/ip_input.c:610).
605             } else {
606     #ifdef IPSEC
607                     if (ip_ipsec_fwd(m))
608                             goto bad;
609     #endif /* IPSEC */
610                     ip_forward(m, dchg);
611             }
612             return;
613
614     ours:
(gdb)

For the next crash (below) I managed to got into ddb in time.

What makes me think something is wrong is that I have a payload
size of 56 bytes and mtu of 1500
 	%./netblast 192.0.2.2 4500 56 60
but we are calling into ip_fragment?

struct ip looks ok.

db> p/x *0xc534000e
54000045
db> p/x *0xc5340012
     aaa1
db> p/x *0xc5340016
25161140
db> p/x *0xc534001a
1f40a8c0
db> p/x *0xc534001e
  20200c0

The MTU is the 3rd argument to ip_fragement which according to the
backtrace is 5dc = 1500.

So why didn't we call into *if_output in ip_output as
 	if (ip->ip_len <= mtu ||
would have been true?

------------------------------------------------------------------------
Fatal trap 12: page fault while in kernel mode
fault virtual address   = 0xc
fault code              = supervisor read, page not present
instruction pointer     = 0x20:0xc0551776
stack pointer           = 0x28:0xc9c02a54
frame pointer           = 0x28:0xc9c02a80
code segment            = base 0x0, limit 0xfffff, type 0x1b
                         = DPL 0, pres 1, def32 1, gran 1
processor eflags        = interrupt enabled, resume, IOPL = 0
current process         = 19 (irq10: sis0 sis1+)
trap number             = 12
panic: page fault
KDB: stack backtrace:
db_trace_self_wrapper(c0771c93,c9c028f4,c0504a2a,c076fe5f,c07ce140,...) at db_trace_self_wrapper+0x26
kdb_backtrace(c076fe5f,c07ce140,c07637f5,c9c02900,c9c02900,...) at kdb_backtrace+0x29
panic(c07637f5,c078db22,c51594d0,1,1,...) at panic+0xaa
trap_fatal(c07cc200,0,1,0,c07c6f40,...) at trap_fatal+0x303
trap_pfault(c515ac00,c5167c00,62,c9c02a28,c51592ac,...) at trap_pfault+0x270
trap(c9c02a14) at trap+0x3ea
calltrap() at calltrap+0x6
--- trap 0xc, eip = 0xc0551776, esp = 0xc9c02a54, ebp = 0xc9c02a80 ---
m_copym(c5305800,5dc,5c8,1,1,...) at m_copym+0x36
ip_fragment(c534000e,c9c02b50,5dc,0,1,...) at ip_fragment+0x235
ip_output(c5305800,0,c9c02b8c,1,0,...) at ip_output+0xb6e
ip_forward(c5305800,0,c04f6eec,c9c02bcc,c0747a31,...) at ip_forward+0x384
ip_input(c5305800,0,800,c5165000,800,...) at ip_input+0x67c
netisr_dispatch(2,c5305800,10,3,0,...) at netisr_dispatch+0x55
ether_demux(c5165000,c5305800,3,0,3,...) at ether_demux+0x1e0
ether_input(c5165000,c5305800,c5160028,c5150028,c07c70f8,...) at ether_input+0x343
sis_rxeof(c9c00020,246,c07e8829,c9c02cbc,c07474f7,...) at sis_rxeof+0x1a3
sis_intr(c515a800,0,c076d632,46b,aa55aa55,...) at sis_intr+0x10e
ithread_loop(c5169910,c9c02d38,aa55aa55,aa55aa55,aa55aa55,...) at ithread_loop+0x1ab
fork_exit(c04e4990,c5169910,c9c02d38) at fork_exit+0x96
fork_trampoline() at fork_trampoline+0x8
--- trap 0, eip = 0, esp = 0xc9c02d70, ebp = 0 ---
Uptime: 2m34s
Cannot dump. No dump device defined.
Automatic reboot in 15 seconds - press a key on the console to abort

telnet> send brk
KDB: enter: Line break on console
[thread pid 19 tid 100017 ]
Stopped at      kdb_enter_why+0x3a:     movl    $0,kdb_why
------------------------------------------------------------------------

0xc05c8f95 is in ip_fragment (/usr/src/RELENG_7_soekris/src/sys/netinet/ip_output.c:732).
727                             len = ip->ip_len - off;
728                             m->m_flags |= M_LASTFRAG;
729                     } else
730                             mhip->ip_off |= IP_MF;
731                     mhip->ip_len = htons((u_short)(len + mhlen));
732                     m->m_next = m_copy(m0, off, len);
733                     if (m->m_next == NULL) {        /* copy failed */
734                             m_free(m);
735                             error = ENOBUFS;        /* ??? */
736                             ipstat.ips_odropped++;

(gdb) l *ip_output+0xb6e
0xc05c9c6e is in ip_output (/usr/src/RELENG_7_soekris/src/sys/netinet/ip_output.c:571).
566
567             /*
568              * Too large for interface; fragment if possible. If successful,
569              * on return, m will point to a list of packets to be sent.
570              */
571             error = ip_fragment(ip, &m, mtu, ifp->if_hwassist, sw_csum);
572             if (error)
573                     goto bad;
574             for (; m; m = m0) {
575                     m0 = m->m_nextpkt;

I swapped the machines (differnt kernels unfortunately) and put the soekris
on the leaf and the other machine (actully server class) is holding up
fine (same sources, different kernel config though).

Anyone any ideas?

-- 
Bjoern A. Zeeb              Stop bit received. Insert coin for new game.