page fault while in kernel mode - after upgrade from 12.2 to 13.0

Tue May 4 18:38:42 UTC 2021

Hi Mark,

sorry for the delay, I only can test after work. I triggered another 2 panics, this time
with a different result (see below). Can I provide some more information?

Thank you!
   Michael

--- #1

Fatal trap 12: page fault while in kernel mode
cpuid = 1; apic id = 01
fault virtual address   = 0x388
fault code              = supervisor read data, page not present
instruction pointer     = 0x20:0xffffffff80d3fa67
stack pointer           = 0x28:0xfffffe0115bea9c0
frame pointer           = 0x28:0xfffffe0115beaa20
code segment            = base 0x0, limit 0xfffff, type 0x1b
                         = DPL 0, pres 1, long 1, def32 0, gran 1
processor eflags        = interrupt enabled, resume, IOPL = 0
current process         = 12 (swi1: netisr 0)
trap number             = 12
panic: page fault
cpuid = 1
time = 1620144777
KDB: stack backtrace:
#0 0xffffffff80c57345 at kdb_backtrace+0x65
#1 0xffffffff80c09d21 at vpanic+0x181
#2 0xffffffff80c09b93 at panic+0x43
#3 0xffffffff8108b187 at trap_fatal+0x387
#4 0xffffffff8108b1df at trap_pfault+0x4f
#5 0xffffffff8108a83d at trap+0x27d
#6 0xffffffff810617a8 at calltrap+0x8
#7 0xffffffff80bcae5d at ithread_loop+0x24d
#8 0xffffffff80bc7c5e at fork_exit+0x7e
#9 0xffffffff8106282e at fork_trampoline+0xe
Uptime: 3m51s
Dumping 2617 out of 65454 MB:..1%..11%..21%..31%..41%..51%..61%..71%..81%..91%

__curthread () at /usr/src/sys/amd64/include/pcpu_aux.h:55
55              __asm("movq %%gs:%P1,%0" : "=r" (td) : "n" (offsetof(struct pcpu,
(kgdb) list *0xffffffff80d3fa67
0xffffffff80d3fa67 is in swi_net (/usr/src/sys/net/netisr.c:918).
913                     if (local_npw.nw_head == NULL)
914                             local_npw.nw_tail = NULL;
915                     local_npw.nw_len--;
916                     VNET_ASSERT(m->m_pkthdr.rcvif != NULL,
917                         ("%s:%d rcvif == NULL: m=%p", __func__, __LINE__, m));
918                     CURVNET_SET(m->m_pkthdr.rcvif->if_vnet);
919                     netisr_proto[proto].np_handler(m);
920                     CURVNET_RESTORE();
921             }
922             KASSERT(local_npw.nw_len == 0,
(kgdb) backtrace
#0  __curthread () at /usr/src/sys/amd64/include/pcpu_aux.h:55
#1  doadump (textdump=<optimized out>) at /usr/src/sys/kern/kern_shutdown.c:399
#2  0xffffffff80c09916 in kern_reboot (howto=260) at /usr/src/sys/kern/kern_shutdown.c:486
#3  0xffffffff80c09d90 in vpanic (fmt=<optimized out>, ap=<optimized out>) at /usr/src/sys/kern/kern_shutdown.c:919
#4  0xffffffff80c09b93 in panic (fmt=<unavailable>) at /usr/src/sys/kern/kern_shutdown.c:843
#5  0xffffffff8108b187 in trap_fatal (frame=0xfffffe0115bea900, eva=904) at /usr/src/sys/amd64/amd64/trap.c:915
#6  0xffffffff8108b1df in trap_pfault (frame=frame at entry=0xfffffe0115bea900, usermode=false, signo=<optimized out>, signo at entry=0x0, ucode=<optimized
out>, ucode at entry=0x0) at /usr/src/sys/amd64/amd64/trap.c:732
#7  0xffffffff8108a83d in trap (frame=0xfffffe0115bea900) at /usr/src/sys/amd64/amd64/trap.c:398
#8  <signal handler called>
#9  0xffffffff80d3fa67 in netisr_process_workstream_proto (nwsp=<optimized out>, proto=1) at /usr/src/sys/net/netisr.c:918
#10 swi_net (arg=<optimized out>) at /usr/src/sys/net/netisr.c:966
#11 0xffffffff80bcae5d in intr_event_execute_handlers (p=<optimized out>, ie=0xfffff80003dbb600) at /usr/src/sys/kern/kern_intr.c:1168
#12 ithread_execute_handlers (p=<optimized out>, ie=0xfffff80003dbb600) at /usr/src/sys/kern/kern_intr.c:1181
#13 ithread_loop (arg=arg at entry=0xfffff80003dced40) at /usr/src/sys/kern/kern_intr.c:1269
#14 0xffffffff80bc7c5e in fork_exit (callout=0xffffffff80bcac10 <ithread_loop>, arg=0xfffff80003dced40, frame=0xfffffe0115beab00) at
/usr/src/sys/kern/kern_fork.c:1069
#15 <signal handler called>

--- #2

Fatal trap 12: page fault while in kernel mode
cpuid = 1; apic id = 01
fault virtual address   = 0x8
fault code              = supervisor read data, page not present
instruction pointer     = 0x20:0xffffffff80ca599c
stack pointer           = 0x28:0xfffffe0115bea6c0
frame pointer           = 0x28:0xfffffe0115bea700
code segment            = base 0x0, limit 0xfffff, type 0x1b
                         = DPL 0, pres 1, long 1, def32 0, gran 1
processor eflags        = interrupt enabled, resume, IOPL = 0
current process         = 12 (swi1: netisr 0)
trap number             = 12
panic: page fault
cpuid = 1
time = 1620152374
KDB: stack backtrace:
#0 0xffffffff80c57345 at kdb_backtrace+0x65
#1 0xffffffff80c09d21 at vpanic+0x181
#2 0xffffffff80c09b93 at panic+0x43
#3 0xffffffff8108b187 at trap_fatal+0x387
#4 0xffffffff8108b1df at trap_pfault+0x4f
#5 0xffffffff8108a83d at trap+0x27d
#6 0xffffffff810617a8 at calltrap+0x8
#7 0xffffffff80dbf0ae at tcp_do_segment+0x10ce
#8 0xffffffff80dbd21e at tcp_input+0xabe
#9 0xffffffff80dafc15 at ip_input+0x125
#10 0xffffffff80d3fa7b at swi_net+0x12b
#11 0xffffffff80bcae5d at ithread_loop+0x24d
#12 0xffffffff80bc7c5e at fork_exit+0x7e
#13 0xffffffff8106282e at fork_trampoline+0xe
Uptime: 2h3m59s
Dumping 2666 out of 65454 MB:..1%..11%..21%..31%..41%..51%..61%..71%..81%..91%

__curthread () at /usr/src/sys/amd64/include/pcpu_aux.h:55
55              __asm("movq %%gs:%P1,%0" : "=r" (td) : "n" (offsetof(struct pcpu,
(kgdb) #0  __curthread () at /usr/src/sys/amd64/include/pcpu_aux.h:55
#1  doadump (textdump=<optimized out>)
     at /usr/src/sys/kern/kern_shutdown.c:399
#2  0xffffffff80c09916 in kern_reboot (howto=260)
     at /usr/src/sys/kern/kern_shutdown.c:486
#3  0xffffffff80c09d90 in vpanic (fmt=<optimized out>, ap=<optimized out>)
     at /usr/src/sys/kern/kern_shutdown.c:919
#4  0xffffffff80c09b93 in panic (fmt=<unavailable>)
     at /usr/src/sys/kern/kern_shutdown.c:843
#5  0xffffffff8108b187 in trap_fatal (frame=0xfffffe0115bea600, eva=8)
     at /usr/src/sys/amd64/amd64/trap.c:915
#6  0xffffffff8108b1df in trap_pfault (frame=frame at entry=0xfffffe0115bea600,
     usermode=false, signo=<optimized out>, signo at entry=0x0,
     ucode=<optimized out>, ucode at entry=0x0)
     at /usr/src/sys/amd64/amd64/trap.c:732
#7  0xffffffff8108a83d in trap (frame=0xfffffe0115bea600)
     at /usr/src/sys/amd64/amd64/trap.c:398
#8  <signal handler called>
#9  sbcut_internal (sb=0xfffff80522aa09c0, len=203, len at entry=476)
     at /usr/src/sys/kern/uipc_sockbuf.c:1491
#10 0xffffffff80ca5b8a in sbcut_locked (sb=0xfffff80522aa09c0,
     len=-743943424, len at entry=476) at /usr/src/sys/kern/uipc_sockbuf.c:1591
#11 0xffffffff80dbf0ae in tcp_do_segment (m=0xfffff8004c2aae00,
     th=<optimized out>, so=<optimized out>, tp=<optimized out>,
     drop_hdrlen=52, tlen=<optimized out>, iptos=0 '\000')
     at /usr/src/sys/netinet/tcp_input.c:2918
#12 0xffffffff80dbd21e in tcp_input (mp=<optimized out>,
     offp=<optimized out>, proto=<optimized out>)
     at /usr/src/sys/netinet/tcp_input.c:1382
#13 0xffffffff80dafc15 in ip_input (m=0x0)
     at /usr/src/sys/netinet/ip_input.c:829
#14 0xffffffff80d3fa7b in netisr_process_workstream_proto (
     nwsp=<optimized out>, proto=1) at /usr/src/sys/net/netisr.c:919
#15 swi_net (arg=<optimized out>) at /usr/src/sys/net/netisr.c:966
#16 0xffffffff80bcae5d in intr_event_execute_handlers (p=<optimized out>,
     ie=0xfffff80003bbe500) at /usr/src/sys/kern/kern_intr.c:1168
#17 ithread_execute_handlers (p=<optimized out>, ie=0xfffff80003bbe500)
     at /usr/src/sys/kern/kern_intr.c:1181
#18 ithread_loop (arg=arg at entry=0xfffff80003cb6d40)
     at /usr/src/sys/kern/kern_intr.c:1269
#19 0xffffffff80bc7c5e in fork_exit (
     callout=0xffffffff80bcac10 <ithread_loop>, arg=0xfffff80003cb6d40,
     frame=0xfffffe0115beab00) at /usr/src/sys/kern/kern_fork.c:1069
#20 <signal handler called>

---

On 03.05.2021 21:45, Mark Johnston wrote:
> On Mon, May 03, 2021 at 08:04:30PM +0200, Michael Schmiedgen wrote:
>> Hi List,
>>
>> if I start a Samba jail, after a few seconds the system crashes. Very reproducible.
>>
>> System has ~10 jails and 3 bhyve VMs. Dell server, Xeon E3-1240, 64GB RAM, 3 way mirror ZFS.
>>
>> It also occurs a few seconds after I start a phone call using the SIP VM of that machine,
>> very strange.
>>
>> I got some log messages suggesting raising somaxconn, so I did
>>
>> kern.ipc.somaxconn=4096
>>
>> in sysctl.conf
>>
>>
>> Below some debug information, please let me know if I should provide further information.
>>
>> Should I open a bug or something?
>>
>> Thank you very much!
>>     Michael
>>
>>
>>
>> Fatal trap 12: page fault while in kernel mode
>> cpuid = 0; apic id = 00
>> fault virtual address   = 0x0
>> fault code              = supervisor read data, page not present
>> instruction pointer     = 0x20:0xffffffff80ca52c0
>> stack pointer           = 0x28:0xfffffe019d039650
>> frame pointer           = 0x28:0xfffffe019d039690
>> code segment            = base 0x0, limit 0xfffff, type 0x1b
>>                           = DPL 0, pres 1, long 1, def32 0, gran 1
>> processor eflags        = interrupt enabled, resume, IOPL = 0
>> current process         = 649 (devd)
>> trap number             = 12
>> panic: page fault
>> cpuid = 0
>> time = 1620061253
>> KDB: stack backtrace:
>> #0 0xffffffff80c57345 at kdb_backtrace+0x65
>> #1 0xffffffff80c09d21 at vpanic+0x181
>> #2 0xffffffff80c09b93 at panic+0x43
>> #3 0xffffffff8108b187 at trap_fatal+0x387
>> #4 0xffffffff8108b1df at trap_pfault+0x4f
>> #5 0xffffffff8108a83d at trap+0x27d
>> #6 0xffffffff810617a8 at calltrap+0x8
>> #7 0xffffffff80ca51c3 at sbappendaddr_locked+0x93
>> #8 0xffffffff80cb437a at uipc_send+0x73a
>> #9 0xffffffff80ca9053 at sosend_generic+0x633
>> #10 0xffffffff80ca94e0 at sosend+0x50
>> #11 0xffffffff80caff2e at kern_sendit+0x20e
>> #12 0xffffffff80cb032b at sendit+0x1db
>> #13 0xffffffff80cb013d at sys_sendto+0x4d
>> #14 0xffffffff8108ba8c at amd64_syscall+0x10c
>> #15 0xffffffff810620ce at fast_syscall_common+0xf8
>> Uptime: 2m2s
>> Dumping 2373 out of 65454 MB:..1%..11%..21%..31%..41%..51%..61%..71%..81%..91%
>>
>> __curthread () at /usr/src/sys/amd64/include/pcpu_aux.h:55
>> 55              __asm("movq %%gs:%P1,%0" : "=r" (td) : "n" (offsetof(struct pcpu,
>> (kgdb) list *0xffffffff80ca52c0
>> 0xffffffff80ca52c0 is in sbappendaddr_locked_internal (/usr/src/sys/kern/uipc_sockbuf.c:1169).
>> 1164            if (ctrl_last)
>> 1165                    ctrl_last->m_next = m0; /* concatenate data to control */
>> 1166            else
>> 1167                    control = m0;
>> 1168            m->m_next = control;
>> 1169            for (n = m; n->m_next != NULL; n = n->m_next)
>> 1170                    sballoc(sb, n);
>> 1171            sballoc(sb, n);
>> 1172            nlast = n;
>> 1173            SBLINKRECORD(sb, m);
>
> So we are crashing because "n" is somehow NULL?  That seems difficult to
> explain.  Can you show the local variables in this frame?
>
> Does the panic always have the same stack trace?
>