panic: sbuf_vprintf called with a NULL sbuf pointer
Don Lewis
truckman at FreeBSD.org
Wed Jan 20 23:15:15 UTC 2016
On 8 Dec, John Baldwin wrote:
> On Tuesday, December 08, 2015 11:39:56 AM Don Lewis wrote:
>> On 8 Dec, John Baldwin wrote:
>> > On Monday, December 07, 2015 10:10:51 PM Don Lewis wrote:
>> >> On 2 Dec, John Baldwin wrote:
>> >> > On Wednesday, December 02, 2015 01:25:56 PM Don Lewis wrote:
>> >> >> > If you want to look at this further, try going to frame 16 and dissassembling the
>> >> >> > instructions before the call to see if you can spot which register the first
>> >> >> > parameter (saved in %rdi IIRC) comes from.
>> >> >>
>> >> >> Dump of assembler code for function sbuf_printf:
>> >> >> 0xffffffff80a673e0 <+0>: push %rbp
>> >> >> 0xffffffff80a673e1 <+1>: mov %rsp,%rbp
>> >> >> 0xffffffff80a673e4 <+4>: push %r14
>> >> >> 0xffffffff80a673e6 <+6>: push %rbx
>> >> >> 0xffffffff80a673e7 <+7>: sub $0x50,%rsp
>> >> >> 0xffffffff80a673eb <+11>: mov %rsi,%r14
>> >> >> 0xffffffff80a673ee <+14>: mov %rdi,%rbx
>> >> >> 0xffffffff80a673f1 <+17>: mov %r9,-0x38(%rbp)
>> >> >> 0xffffffff80a673f5 <+21>: mov %r8,-0x40(%rbp)
>> >> >> 0xffffffff80a673f9 <+25>: mov %rcx,-0x48(%rbp)
>> >> >> 0xffffffff80a673fd <+29>: mov %rdx,-0x50(%rbp)
>> >> >> 0xffffffff80a67401 <+33>: lea -0x60(%rbp),%rax
>> >> >> 0xffffffff80a67405 <+37>: mov %rax,-0x20(%rbp)
>> >> >> 0xffffffff80a67409 <+41>: lea 0x10(%rbp),%rax
>> >> >> 0xffffffff80a6740d <+45>: mov %rax,-0x28(%rbp)
>> >> >> 0xffffffff80a67411 <+49>: movl $0x30,-0x2c(%rbp)
>> >> >> 0xffffffff80a67418 <+56>: movl $0x10,-0x30(%rbp)
>> >> >> 0xffffffff80a6741f <+63>: mov $0xffffffff8137bdf8,%rdi
>> >> >> 0xffffffff80a67426 <+70>: mov %rbx,%rsi
>> >> >> 0xffffffff80a67429 <+73>: callq 0xffffffff80a66c00 <_assert_sbuf_integrity>
>> >> >>
>> >> >>
>> >> >> 0xffffffff80a237b9 <+825>: jmpq 0xffffffff80a236fd <sigexit+637>
>> >> >> 0xffffffff80a237be <+830>: mov $0xffffffff80fd8ad3,%rsi
>> >> >> 0xffffffff80a237c5 <+837>: xor %eax,%eax
>> >> >> 0xffffffff80a237c7 <+839>: mov %r12,%rdi
>> >> >> 0xffffffff80a237ca <+842>: mov -0x228(%rbp),%rdx
>> >> >> 0xffffffff80a237d1 <+849>: callq 0xffffffff80a673e0 <sbuf_printf>
>> >> >> => 0xffffffff80a237d6 <+854>: inc %r14d
>> >> >> 0xffffffff80a237d9 <+857>: jmpq 0xffffffff80a236fd <sigexit+637>
>> >> >
>> >> > So maybe try 'p $r12' in the corefile_open() frame.
>> >>
>> >> #17 0xffffffff80a237d6 in corefile_open (compress=0, comm=<optimized out>,
>> >> uid=<optimized out>, pid=<optimized out>, td=<optimized out>,
>> >> vpp=<optimized out>, namep=<optimized out>)
>> >> at /usr/src/sys/kern/kern_sig.c:3188
>> >> 3188 sbuf_printf(&sb, "%s", comm);
>> >> (kgdb) p $r12
>> >> $1 = 0
>> >
>> > So it's definitely zero. :( The next step is probably to disassemble the
>> > corefile_open function (ugh) and walk backwards to find where %r12 is read
>> > from. It might be from a local variable on the stack, so then you would
>> > want to examine that memory in the stack and the surrounding memory to see
>> > if there is memory corruption and perhaps if there is anything recognizable
>> > about it (e.g. if the corruption contains some sort of data you recognize,
>> > or if the corruption is bounded by a certain length, etc.). It's a bit of
>> > a shot in the dark though.
>> >
>> > Is this reproducible?
>>
>> No it's not. The only time it happened was when there was a swap
>> timeout, probably because of a lengthy deep recovery on one of the
>> mirrored swap devices.
>>
>> The code in question is:
>> struct sbuf sb;
>> [snip]
>> (void)sbuf_new(&sb, name, MAXPATHLEN, SBUF_FIXEDLEN);
>> [snip]
>> for (i = 0; format[i] != '\0'; i++) {
>> switch (format[i]) {
>> case '%': /* Format character */
>> i++;
>> switch (format[i]) {
>> [snip]
>> case 'N': /* process name */
>> sbuf_printf(&sb, "%s", comm);
>> break;
>>
>>
>> &sb is used in a bunch of places, so the compiler probably computes its
>> value once by adding the proper offset to the stack pointer and stashing
>> the result in a register. Since kern.corefile is "%N.core", the failure
>> is happening on the first interation of the loop, so there isn't much
>> opportunity for things to get corrupted. Also, the control flow in this
>> function only depends on the format, so there shouldn't be anything
>> special about a swap timeout vs. a segfault generated core.
>
> Yes, r12 is call-safe (IIRC), so I expect it only computes it once as well,
> but I've sometimes seen the compiler spill local vars onto the stack due to
> register pressure. That said, I think it is unlikely it would have to spill
> &sb during the early part of the function. :(
I had some more time to look at this. It's a bit messy because
corefile_open() and coredump() both get inlined into sig_exit(). Here's
the section of code from the malloc() call and sbuf_new() to the failing
sbuf_printf() call:
0xffffffff80a23685 <+517>: callq 0xffffffff80a00680 <malloc>
0xffffffff80a2368a <+522>: mov %rax,-0x238(%rbp)
0xffffffff80a23691 <+529>: lea -0x138(%rbp),%r12
0xffffffff80a23698 <+536>: mov $0x400,%edx
0xffffffff80a2369d <+541>: xor %ecx,%ecx
0xffffffff80a2369f <+543>: mov %r12,%rdi
0xffffffff80a236a2 <+546>: mov %rax,%rsi
0xffffffff80a236a5 <+549>: callq 0xffffffff80a66830 <sbuf_new>
0xffffffff80a236aa <+554>: mov $0xffffffff81ccd6f0,%rdi
0xffffffff80a236b1 <+561>: xor %esi,%esi
0xffffffff80a236b3 <+563>: mov $0xffffffff813757c7,%rdx
0xffffffff80a236ba <+570>: mov $0xc5d,%ecx
0xffffffff80a236bf <+575>: callq 0xffffffff80a27d00 <_sx_slock>
0xffffffff80a236c4 <+580>: mov $0xffffffff,%ebx
0xffffffff80a236c9 <+585>: xor %r14d,%r14d
0xffffffff80a236cc <+588>: xor %r15d,%r15d
0xffffffff80a236cf <+591>: jmp 0xffffffff80a236fd <sigexit+637>
0xffffffff80a236d1 <+593>: data16 data16 data16 data16 data16 nopw %cs:0x0(
%rax,%rax,1)
0xffffffff80a236e0 <+608>: mov $0x3,%edi
0xffffffff80a236e5 <+613>: mov $0xffffffff81375c89,%rsi
0xffffffff80a236ec <+620>: mov $0xffffffff8186d900,%rcx
0xffffffff80a236f3 <+627>: xor %eax,%eax
0xffffffff80a236f3 <+627>: xor %eax,%eax
0xffffffff80a236f5 <+629>: callq 0xffffffff80a61a20 <log>
0xffffffff80a236fa <+634>: inc %r14d
0xffffffff80a236fd <+637>: movslq %r14d,%rax
0xffffffff80a23700 <+640>: movsbl -0x7e792700(%rax),%esi
0xffffffff80a23707 <+647>: cmp $0x25,%esi
0xffffffff80a2370a <+650>: jne 0xffffffff80a2380f <sigexit+911>
0xffffffff80a23710 <+656>: inc %r14d
0xffffffff80a23713 <+659>: movslq %r14d,%rax
0xffffffff80a23716 <+662>: movsbl -0x7e792700(%rax),%edx
0xffffffff80a2371d <+669>: lea -0x48(%rdx),%eax
0xffffffff80a23720 <+672>: cmp $0x8,%eax
0xffffffff80a23723 <+675>: jbe 0xffffffff80a23740 <sigexit+704>
0xffffffff80a23725 <+677>: cmp $0x55,%edx
0xffffffff80a23728 <+680>: je 0xffffffff80a237ef <sigexit+879>
0xffffffff80a2372e <+686>: cmp $0x25,%edx
0xffffffff80a23731 <+689>: jne 0xffffffff80a236e0 <sigexit+608>
0xffffffff80a23733 <+691>: mov $0x25,%esi
0xffffffff80a23738 <+696>: jmpq 0xffffffff80a23814 <sigexit+916>
0xffffffff80a2373d <+701>: nopl (%rax)
0xffffffff80a23740 <+704>: jmpq *-0x7ec8a8b8(,%rax,8)
0xffffffff80a23747 <+711>: test %r15,%r15
0xffffffff80a2374a <+714>: jne 0xffffffff80a23765 <sigexit+741>
0xffffffff80a2374c <+716>: mov $0x100,%edi
0xffffffff80a23751 <+721>: mov $0xffffffff818668a0,%rsi
0xffffffff80a23758 <+728>: mov $0x2,%edx
0xffffffff80a2375d <+733>: callq 0xffffffff80a00680 <malloc>
0xffffffff80a23762 <+738>: mov %rax,%r15
0xffffffff80a23765 <+741>: mov -0x220(%rbp),%rax
0xffffffff80a2376c <+748>: mov 0x140(%rax),%rdi
0xffffffff80a23773 <+755>: mov $0x100,%edx
0xffffffff80a23778 <+760>: mov %r15,%rsi
0xffffffff80a2377b <+763>: callq 0xffffffff809f0ef0 <getcredhostname>
0xffffffff80a23780 <+768>: mov $0xffffffff80fd8ad3,%rsi
0xffffffff80a23787 <+775>: xor %eax,%eax
0xffffffff80a23789 <+777>: mov %r12,%rdi
0xffffffff80a2378c <+780>: mov %r15,%rdx
0xffffffff80a2378f <+783>: jmp 0xffffffff80a237d1 <sigexit+849>
0xffffffff80a23791 <+785>: mov $0xffffffff81373ba2,%rsi
0xffffffff80a23798 <+792>: xor %eax,%eax
0xffffffff80a2379a <+794>: mov %r12,%rdi
0xffffffff80a2379d <+797>: callq 0xffffffff80a673e0 <sbuf_printf>
0xffffffff80a237a2 <+802>: mov %r12,%rdi
0xffffffff80a237a5 <+805>: callq 0xffffffff80a67760 <sbuf_len>
0xffffffff80a237aa <+810>: mov $0xfffff103,%ecx
0xffffffff80a237af <+815>: lea 0xefc(%rcx,%rax,1),%ebx
0xffffffff80a237b6 <+822>: inc %r14d
0xffffffff80a237b9 <+825>: jmpq 0xffffffff80a236fd <sigexit+637>
0xffffffff80a237be <+830>: mov $0xffffffff80fd8ad3,%rsi
0xffffffff80a237c5 <+837>: xor %eax,%eax
0xffffffff80a237c7 <+839>: mov %r12,%rdi
0xffffffff80a237ca <+842>: mov -0x228(%rbp),%rdx
0xffffffff80a237d1 <+849>: callq 0xffffffff80a673e0 <sbuf_printf>
=> 0xffffffff80a237d6 <+854>: inc %r14d
0xffffffff80a237d9 <+857>: jmpq 0xffffffff80a236fd <sigexit+637>
0xffffffff80a237de <+862>: mov $0xffffffff81361469,%rsi
Basically it's:
lea -0x138(%rbp),%r12
[ snip ]
mov %r12,%rdi
callq sbuf_printf
More information about the freebsd-current
mailing list