zio_done panic on unadulterated FreeBSD Release 9.1

Po-Li Soong polis at spectralogic.com
Mon Jan 14 20:04:11 UTC 2013


Konstantin,

First of all, I agree with you that it would be very strange to have crashed at vm_page_free_toq+0x45, by which point m (in the register rbx. See below for the assembly listing.) has been dereferenced a few times. However, there is a discrepancy between the KDB backtrace and the annotated one just few lines below. In the annotated backtrace, it appears that it is the vm_page_remove that runs into the panic at 0xffffffff80b50597, which is at line 975. That source line looks a lot more probable for causing a panic than that in vm_page_free_toq. Listed below are the assembly listing of vm_page_free_toq and vm_page_remove in the proximity of places of concerns.

Regards,

Po-Li Soong

Dump of assembler code for function vm_page_free_toq:
0xffffffff80b506b0 <vm_page_free_toq+0>:        push   %rbp
0xffffffff80b506b1 <vm_page_free_toq+1>:        mov    %rsp,%rbp
0xffffffff80b506b4 <vm_page_free_toq+4>:        sub    $0x20,%rsp
0xffffffff80b506b8 <vm_page_free_toq+8>:        mov    %rbx,-0x18(%rbp)
0xffffffff80b506bc <vm_page_free_toq+12>:       mov    %r12,-0x10(%rbp)
0xffffffff80b506c0 <vm_page_free_toq+16>:       mov    %rdi,%rbx
0xffffffff80b506c3 <vm_page_free_toq+19>:       mov    %r13,-0x8(%rbp)
0xffffffff80b506c7 <vm_page_free_toq+23>:       incl   %gs:0xac
0xffffffff80b506cf <vm_page_free_toq+31>:       testb  $0x2,0x6d(%rdi)
0xffffffff80b506d3 <vm_page_free_toq+35>:       jne    0xffffffff80b50901 <vm_page_free_toq+593>
0xffffffff80b506d9 <vm_page_free_toq+41>:       cmpb   $0x0,0x71(%rdi)
0xffffffff80b506dd <vm_page_free_toq+45>:       jne    0xffffffff80b50912 <vm_page_free_toq+610>
0xffffffff80b506e3 <vm_page_free_toq+51>:       testb  $0x4,0x6e(%rdi)
0xffffffff80b506e7 <vm_page_free_toq+55>:       je     0xffffffff80b50863 <vm_page_free_toq+435>
0xffffffff80b506ed <vm_page_free_toq+61>:       mov    %rbx,%rdi
0xffffffff80b506f0 <vm_page_free_toq+64>:       callq  0xffffffff80b50540 <vm_page_remove>
0xffffffff80b506f5 <vm_page_free_toq+69>:       movzbl 0x6d(%rbx),%eax
0xffffffff80b506f9 <vm_page_free_toq+73>:       test   $0x4,%al
0xffffffff80b506fb <vm_page_free_toq+75>:       jne    0xffffffff80b50767 <vm_page_free_toq+183>
0xffffffff80b506fd <vm_page_free_toq+77>:       mov    0x68(%rbx),%esi
 
 
Dump of assembler code for function vm_page_remove:
0xffffffff80b50540 <vm_page_remove+0>:  push   %rbp
0xffffffff80b50541 <vm_page_remove+1>:  mov    %rsp,%rbp
0xffffffff80b50544 <vm_page_remove+4>:  push   %r13
0xffffffff80b50546 <vm_page_remove+6>:  push   %r12
0xffffffff80b50548 <vm_page_remove+8>:  push   %rbx
0xffffffff80b50549 <vm_page_remove+9>:  mov    %rdi,%rbx
0xffffffff80b5054c <vm_page_remove+12>: sub    $0x8,%rsp
0xffffffff80b50550 <vm_page_remove+16>: mov    0x30(%rdi),%r13
0xffffffff80b50554 <vm_page_remove+20>: movzwl 0x6e(%rdi),%eax
0xffffffff80b50558 <vm_page_remove+24>: test   %r13,%r13
0xffffffff80b5055b <vm_page_remove+27>: je     0xffffffff80b50610 <vm_page_remove+208>
0xffffffff80b50561 <vm_page_remove+33>: test   $0x1,%al
0xffffffff80b50563 <vm_page_remove+35>: jne    0xffffffff80b50635 <vm_page_remove+245>
0xffffffff80b50569 <vm_page_remove+41>: mov    0x10(%rbx),%r12
0xffffffff80b5056d <vm_page_remove+45>: test   %r12,%r12
0xffffffff80b50570 <vm_page_remove+48>: je     0xffffffff80b5057d <vm_page_remove+61>
0xffffffff80b50572 <vm_page_remove+50>: cmp    %rbx,0x20(%r12)
0xffffffff80b50577 <vm_page_remove+55>: je     0xffffffff80b50660 <vm_page_remove+288>
0xffffffff80b5057d <vm_page_remove+61>: mov    0x18(%rbx),%rcx
0xffffffff80b50581 <vm_page_remove+65>: mov    0x8(%rcx),%rsi
0xffffffff80b50585 <vm_page_remove+69>: mov    (%rsi),%rdx
0xffffffff80b50588 <vm_page_remove+72>: test   %rdx,%rdx
0xffffffff80b5058b <vm_page_remove+75>: je     0xffffffff80b50597 <vm_page_remove+87>
0xffffffff80b5058d <vm_page_remove+77>: cmp    %rbx,0x28(%rdx)
0xffffffff80b50591 <vm_page_remove+81>: je     0xffffffff80b50646 <vm_page_remove+262>
0xffffffff80b50597 <vm_page_remove+87>: mov    0x58(%r13),%rsi    ; <<-------------------
0xffffffff80b5059b <vm_page_remove+91>: cmp    %rbx,%rsi
0xffffffff80b5059e <vm_page_remove+94>: je     0xffffffff80b505a9 <vm_page_remove+105>
0xffffffff80b505a0 <vm_page_remove+96>: mov    0x38(%rbx),%rdi
0xffffffff80b505a4 <vm_page_remove+100>:        callq  0xffffffff80b4fa90 <vm_page_splay>
0xffffffff80b505a9 <vm_page_remove+105>:        mov    0x20(%rbx),%rax
0xffffffff80b505ad <vm_page_remove+109>:        test   %rax,%rax
0xffffffff80b505b0 <vm_page_remove+112>:        mov    %rax,%rdx
0xffffffff80b505b3 <vm_page_remove+115>:        je     0xffffffff80b50672 <vm_page_remove+306>
0xffffffff80b505b9 <vm_page_remove+121>:        mov    0x28(%rbx),%rsi
0xffffffff80b505bd <vm_page_remove+125>:        test   %rsi,%rsi

-----Original Message-----
From: Konstantin Belousov [mailto:kostikbel at gmail.com] 
Sent: Sunday, January 13, 2013 10:55 AM
To: Po-Li Soong
Cc: stable at FreeBSD.org
Subject: Re: zio_done panic on unadulterated FreeBSD Release 9.1

On Fri, Jan 11, 2013 at 03:09:58PM +0000, Po-Li Soong wrote:
> (kgdb) p/x *(struct vm_object *)0xffffffff81281580
> $1 = {mtx = {lock_object = {lo_name = 0xffffffff80e54bbd,
>       lo_flags = 0x1430000, lo_data = 0x0, lo_witness = 0x0},
>     mtx_lock = 0xfffffe0006f44000}, object_list = {
>     tqe_next = 0xffffffff81281240, tqe_prev = 0xffffffff812814a0},
>   shadow_head = {lh_first = 0x0}, shadow_list = {le_next = 0x0,
>     le_prev = 0x0}, memq = {tqh_first = 0xfffffe00cfd3f880,
>     tqh_last = 0xfffffe00c9cac398}, root = 0xfffffe00cd733ab0,
>   size = 0x7ffffff, generation = 0x1, ref_count = 0x3f8, shadow_count = 0x0,
>   memattr = 0x6, type = 0x4, flags = 0x1000, pg_color = 0x0, pad1 = 0x0,
>   resident_page_count = 0x9b729, backing_object = 0x0,
>   backing_object_offset = 0x0, pager_object_list = {tqe_next = 0x0,
>     tqe_prev = 0x0}, rvq = {lh_first = 0xfffffe00c7dd2140}, cache = 0x0,
>   handle = 0x0, un_pager = {vnp = {vnp_size = 0x0, writemappings = 0x0},
>     devp = {devp_pglist = {tqh_first = 0x0, tqh_last = 0x0}, ops = 0x0},
>     sgp = {sgp_pglist = {tqh_first = 0x0, tqh_last = 0x0}}, swp = {
>       swp_bcount = 0x0}}, cred = 0x0, charge = 0x0, paging_in_progress 
> = 0x1}
> 
> (kgdb)  p/x *(struct vm_page *)0xfffffe00cd733ab0
> $2 = {pageq = {tqe_next = 0x0, tqe_prev = 0xfffffe00c7e7d678}, listq = {
>     tqe_next = 0xfffffe00cd733b28, tqe_prev = 0xfffffe00cd7331d8},
>   left = 0xfffffe00c9b31c38, right = 0xfffffe00cd735c70,
>   object = 0xfffffffb81281580, pindex = 0x7495a, phys_addr = 0xbe95a000, md = {
>     pv_list = {tqh_first = 0x0, tqh_last = 0xfffffe00cd733af8},
>     pat_mode = 0x6}, queue = 0xff, segind = 0x2, hold_count = 0x0,
>   order = 0xd, pool = 0x0, cow = 0x0, wire_count = 0x0, aflags = 0x0,
>   flags = 0x0, oflags = 0x4, act_count = 0x0, busy = 0x0, valid = 0xff,
>   dirty = 0x0}
> 
> (kgdb) list *vm_page_free_toq+0x45
> 0xffffffff80b506f5 is in vm_page_free_toq (/usr/src/sys/vm/vm_page.c:1878).
> warning: Source file is more recent than executable.
> 
> 1873
> 1874            /*
> 1875             * If fictitious remove object association and
> 1876             * return, otherwise delay object association removal.
> 1877             */
> 1878            if ((m->flags & PG_FICTITIOUS) != 0) {
> 1879                    return;
> 1880            }
> 1881
> 1882            m->valid = 0;
> (kgdb)
This is strange. Can you disassemble your instance of the
vm_page_free_toq() and show me the assembler listing ? The line you show has nothing to cause page fault if the m pointer itself is valid.

> 
> 
> -----Original Message-----
> From: Konstantin Belousov [mailto:kostikbel at gmail.com]
> Sent: Wednesday, January 09, 2013 4:49 PM
> To: Po-Li Soong
> Cc: stable at FreeBSD.org
> Subject: Re: zio_done panic on unadulterated FreeBSD Release 9.1
> 
> On Wed, Jan 09, 2013 at 08:03:38PM +0000, Po-Li Soong wrote:
> > Hi,
> > 
> > My name is Po-Li Soong. I ran into a crash not long after installing the 9.1 release on my home machine. I was performing a test run of file transfer with samba server running on the FreeBSD installation. The transfer rate was about 70-80 MB/sec. The core.txt is attached. If there are other crash dumps needed, please let me know.
> > 
> > I first discussed this panic with Justin Gibbs, a coworker of mine at Spectra Logic. He referred me to this email address, suggesting that the information should be relevant to you. Thanks for the help.
> > 
> > Regards,
> > 
> > Po-Li Soong
> > 
> 
> > maestoso dumped core - see /var/crash/vmcore.0
> > 
> > Sat Jan  5 19:53:24 MST 2013
> > 
> > FreeBSD maestoso 9.1-RELEASE FreeBSD 9.1-RELEASE #0 r243825: Tue Dec  4 09:23:10 UTC 2012     root at farrell.cse.buffalo.edu:/usr/obj/usr/src/sys/GENERIC  amd64
> > 
> > panic: page fault
> > 
> > GNU gdb 6.1.1 [FreeBSD]
> > Copyright 2004 Free Software Foundation, Inc.
> > GDB is free software, covered by the GNU General Public License, and 
> > you are welcome to change it and/or distribute copies of it under certain conditions.
> > Type "show copying" to see the conditions.
> > There is absolutely no warranty for GDB.  Type "show warranty" for details.
> > This GDB was configured as "amd64-marcel-freebsd"...
> > 
> > Unread portion of the kernel message buffer:
> > 
> > 
> > Fatal trap 12: page fault while in kernel mode cpuid = 1; apic id = 01
> > fault virtual address	= 0xfffffffb812815d8
> > fault code		= supervisor read data, page not present
> > instruction pointer	= 0x20:0xffffffff80b50597
> > stack pointer	        = 0x28:0xffffff80fa3bc8d0
> > frame pointer	        = 0x28:0xffffff80fa3bc900
> > code segment		= base 0x0, limit 0xfffff, type 0x1b
> > 			= DPL 0, pres 1, long 1, def32 0, gran 1
> > processor eflags	= interrupt enabled, resume, IOPL = 0
> > current process		= 0 (zio_write_intr_5)
> > trap number		= 12
> > panic: page fault
> > cpuid = 3
> > KDB: stack backtrace:
> > #0 0xffffffff809208a6 at kdb_backtrace+0x66
> > #1 0xffffffff808ea8be at panic+0x1ce
> > #2 0xffffffff80bd8240 at trap_fatal+0x290
> > #3 0xffffffff80bd857d at trap_pfault+0x1ed
> > #4 0xffffffff80bd8b9e at trap+0x3ce
> > #5 0xffffffff80bc315f at calltrap+0x8
> > #6 0xffffffff80b506f5 at vm_page_free_toq+0x45
> > #7 0xffffffff80b4f276 at vm_object_page_remove+0x196
> > #8 0xffffffff80b46b06 at vm_map_delete+0x316
> > #9 0xffffffff80b46c11 at vm_map_remove+0x51
> > #10 0xffffffff80b3a70a at uma_large_free+0x3a
> > #11 0xffffffff808d589a at free+0x5a
> > #12 0xffffffff8169b4ce at zio_done+0x2ee
> > #13 0xffffffff81699063 at zio_execute+0xc3
> > #14 0xffffffff8092cf55 at taskqueue_run_locked+0x85
> > #15 0xffffffff8092ded6 at taskqueue_thread_loop+0x46
> > #16 0xffffffff808bb9ef at fork_exit+0x11f
> > #17 0xffffffff80bc368e at fork_trampoline+0xe
> > Uptime: 3h19m34s
> > Dumping 571 out of 3561
> > MB:..3%..12%..23%..31%..42%..51%..62%..73%..82%..93%
> > 
> > Reading symbols from /boot/kernel/zfs.ko...Reading symbols from /boot/kernel/zfs.ko.symbols...done.
> > done.
> > Loaded symbols for /boot/kernel/zfs.ko Reading symbols from 
> > /boot/kernel/opensolaris.ko...Reading symbols from /boot/kernel/opensolaris.ko.symbols...done.
> > done.
> > Loaded symbols for /boot/kernel/opensolaris.ko
> > #0  doadump (textdump=Variable "textdump" is not available.
> > ) at pcpu.h:224
> > 224	pcpu.h: No such file or directory.
> > 	in pcpu.h
> > (kgdb) #0  doadump (textdump=Variable "textdump" is not available.
> > ) at pcpu.h:224
> > #1  0xffffffff808ea3a1 in kern_reboot (howto=260)
> >     at /usr/src/sys/kern/kern_shutdown.c:448
> > #2  0xffffffff808ea897 in panic (fmt=0x1 <Address 0x1 out of bounds>)
> >     at /usr/src/sys/kern/kern_shutdown.c:636
> > #3  0xffffffff80bd8240 in trap_fatal (frame=0xc, eva=Variable "eva" is not available.
> > )
> >     at /usr/src/sys/amd64/amd64/trap.c:857
> > #4  0xffffffff80bd857d in trap_pfault (frame=0xffffff80fa3bc820, usermode=0)
> >     at /usr/src/sys/amd64/amd64/trap.c:773
> > #5  0xffffffff80bd8b9e in trap (frame=0xffffff80fa3bc820)
> >     at /usr/src/sys/amd64/amd64/trap.c:456
> P
> > #6  0xffffffff80bc315f in calltrap ()
> >     at /usr/src/sys/amd64/amd64/exception.S:228
> > #7  0xffffffff80b50597 in vm_page_remove (m=0xfffffe00cd733ab0)
> >     at /usr/src/sys/vm/vm_page.c:975
> > #8  0xffffffff80b506f5 in vm_page_free_toq (m=0xfffffe00cd733ab0)
> >     at /usr/src/sys/vm/vm_page.c:1872
> > #9  0xffffffff80b4f276 in vm_object_page_remove (object=0xffffffff81281580, 
> >     start=477512, end=477539, options=Variable "options" is not available.
> > ) at /usr/src/sys/vm/vm_object.c:1899
> > #10 0xffffffff80b46b06 in vm_map_delete (map=0xfffffe00020000e8, start=Variable "start" is not available.
> > )
> >     at /usr/src/sys/vm/vm_map.c:2739
> > #11 0xffffffff80b46c11 in vm_map_remove (map=0xfffffe00020000e8, 
> >     start=18446743525909626880, end=18446743525909737472)
> >     at /usr/src/sys/vm/vm_map.c:2871
> > #12 0xffffffff80b3a70a in uma_large_free (slab=0xfffffe00aceff8e0)
> >     at /usr/src/sys/vm/uma_core.c:3085
> > #13 0xffffffff808d589a in free (addr=0xffffff8074948000, 
> >     mtp=0xffffffff81747c20) at /usr/src/sys/kern/kern_malloc.c:572
> > #14 0xffffffff8169b4ce in zio_done (zio=0xfffffe007a9906e0)
> >     at
> > /usr/src/sys/modules/zfs/../../cddl/contrib/opensolaris/uts/common/f
> > s/
> > zfs/zio.c:2960
> > #15 0xffffffff81699063 in zio_execute (zio=0xfffffe007a9906e0)
> >     at
> > /usr/src/sys/modules/zfs/../../cddl/contrib/opensolaris/uts/common/f
> > s/
> > zfs/zio.c:1196
> > #16 0xffffffff8092cf55 in taskqueue_run_locked (queue=0xfffffe0006ed9a00)
> >     at /usr/src/sys/kern/subr_taskqueue.c:308
> > #17 0xffffffff8092ded6 in taskqueue_thread_loop (arg=Variable "arg" is not available.
> > )
> >     at /usr/src/sys/kern/subr_taskqueue.c:497
> > #18 0xffffffff808bb9ef in fork_exit (
> >     callout=0xffffffff8092de90 <taskqueue_thread_loop>, 
> >     arg=0xfffffe0006c072e0, frame=0xffffff80fa3bcc40)
> >     at /usr/src/sys/kern/kern_fork.c:992
> > #19 0xffffffff80bc368e in fork_trampoline ()
> >     at /usr/src/sys/amd64/amd64/exception.S:602
> > #20 0x0000000000000000 in ?? ()
> > #21 0x0000000000000000 in ?? ()
> > #22 0x0000000000000000 in ?? ()
> > #23 0x0000000000000000 in ?? ()
> > #24 0x0000000000000000 in ?? ()
> > #25 0x0000000000000000 in ?? ()
> > #26 0x0000000000000000 in ?? ()
> > #27 0x0000000000000000 in ?? ()
> > #28 0x0000000000000000 in ?? ()
> > #29 0x0000000000000000 in ?? ()
> > #30 0x0000000000000000 in ?? ()
> > #31 0x0000000000000000 in ?? ()
> > #32 0x0000000000000000 in ?? ()
> > #33 0x0000000000000000 in ?? ()
> > #34 0x0000000000000000 in ?? ()
> > #35 0x0000000000000000 in ?? ()
> > #36 0x0000000000000000 in ?? ()
> > #37 0x0000000000000000 in ?? ()
> > #38 0x0000000000000000 in ?? ()
> > #39 0x0000000000000000 in ?? ()
> > #40 0x0000000000000000 in ?? ()
> > #41 0x0000000000000000 in ?? ()
> > #42 0x0000000000000000 in ?? ()
> > #43 0x0000000000000000 in ?? ()
> > #44 0xffffffff81242880 in tdq_cpu ()
> > #45 0xffffffff81242880 in tdq_cpu ()
> > #46 0xfffffe0006f44000 in ?? ()
> > #47 0x0000000000000000 in ?? ()
> > #48 0xffffff80fa3bc290 in ?? ()
> > #49 0xffffff80fa3bc238 in ?? ()
> > #50 0xfffffe00049a88e0 in ?? ()
> > #51 0xffffffff8091352e in sched_switch (td=0xffffffff812228a0, 
> >     newtd=0xfffffe0006c072e0, flags=Variable "flags" is not available.
> > ) at /usr/src/sys/kern/sched_ule.c:1921
> > Previous frame inner to this frame (corrupt stack?)
> > (kgdb)
> 
> Please, at the kgdb prompt, do
> p/x *(struct vm_object *)0xffffffff81281580 p/x *(struct vm_page 
> *)0xfffffe00cd733ab0 list *vm_page_free_toq+0x45


More information about the freebsd-stable mailing list