Arm64 stack issues (was Re: FreeBSD status for/on ODroid-C2?)
Mark Millard
markmi at dsl-only.net
Tue Feb 7 04:05:36 UTC 2017
[I got a lucky sh core dump with more stack context/content
available to look at for an example sh crash. This helps
narrow things down.]
On 2017-Feb-5, at 1:12 AM, Mark Millard <markmi at dsl-only.net> wrote:
> [Top post of a new result.]
>
> Using lldb to look at the memory for the stack around
> sh failure points has some apparently fixed structure.
> Example:
>
> . . . junk values . . .
> 0xffffffffe4d0: 0x0000000000000078 0x637261612f737470
> 0xffffffffe4e0: 0x00000000004345c8 0x0000000000434000
> 0xffffffffe4f0: 0x0000000000434000 0x0000000040a903e0
> 0xffffffffe500: 0x0000ffffffffe540 0x000000004054cd94
> 0xffffffffe510: 0x696d6b72616d2f6c 0x0000000000000000
> 0xffffffffe520: 0x0000000000000000 0x0000000000000000
> 0xffffffffe530: 0x0000000000000000 0xe8021690dc1f70b8
> 0xffffffffe540: 0x00000000004345c8 0x0000000000434000
> 0xffffffffe550: 0x0000000000434000 0x000000000000000f
> 0xffffffffe560: 0x0000ffffffffe5a0 0x000000000041aef0
> 0xffffffffe570: 0x0000000000434c38 0x732f7273752f3a6e
> 0xffffffffe580: 0x0000000000000001 0x0000000000000005
> 0xffffffffe590: 0x0000000040a33180 0x0000000000000000
> 0xffffffffe5a0: 0x0000ffffffffc5c0 0x000000000040f490
> . . . junk values . . .
I got lucky and got a core dump that did not have the junk
areas and could trace the stack's frame pointer chain
between main and libc.so.7`__free (through freejob along
the way). See later.
> where "register read" showed:
>
> sp = 0x0000ffffffffe600
>
> (The distance and direction to the last non-junk line
> from the reported sp in each example is the same.)
> Looking around that 0x000000000040f490:
>
> 0x40f48c: 0x97fffc74 bl 0x40e65c ; freejob at jobs.c:463
> 0x40f490: 0x9100c294 add x20, x20, #0x30 ; =0x30
>
> It is the same address and code in each case.
I should have originally noted that 0x40f48c is in
forkshell, along the child process code-path:
pid_t
forkshell(struct job *jp, union node *n, int mode)
{
. . . (see /usr/src/bin/sh/jobs.c for this) . . .
INTOFF;
if (mode == FORK_BG && (jp == NULL || jp->nprocs == 0))
checkzombies();
flushall();
pid = fork();
if (pid == -1) {
TRACE(("Fork failed, errno=%d\n", errno));
INTON;
error("Cannot fork: %s", strerror(errno));
}
if (pid == 0) {
struct job *p;
int wasroot;
int i;
TRACE(("Child shell %d\n", (int)getpid()));
wasroot = rootshell;
rootshell = 0;
handler = &main_handler;
closescript();
INTON;
forcelocal = 0;
clear_traps();
#if JOBS
. . . (see /usr/src/bin/sh/jobs.c for this) . . .
#else
. . . (see /usr/src/bin/sh/jobs.c for this) . . .
#endif
INTOFF;
for (i = njobs, p = jobtab ; --i >= 0 ; p++)
if (p->used)
freejob(p);
INTON;
if (wasroot && iflag) {
setsignal(SIGINT);
setsignal(SIGQUIT);
setsignal(SIGTERM);
}
return pid;
}
. . . (see /usr/src/bin/sh/jobs.c for this) . . .
> Sometimes the junk values are all zeros over sizable
> distances. Sometimes the sizable areas seem to have
> random data.
>
> /usr/src/bin/sh/jobs.c 's freejobs is:
>
> static void
> freejob(struct job *jp)
> {
> struct procstat *ps;
> int i;
>
> INTOFF;
> if (bgjob == jp)
> bgjob = NULL;
> for (i = jp->nprocs, ps = jp->ps ; --i >= 0 ; ps++) {
> if (ps->cmd != nullstr)
> ckfree(ps->cmd);
> }
> if (jp->ps != &jp->ps0)
> ckfree(jp->ps);
> jp->used = 0;
> #if JOBS
> deljob(jp);
> #endif
> INTON;
> }
>
> /usr/src/bin/sh/error.h defines INTOFF and INTON:
>
> #define EXINT 0 /* SIGINT received */
> #define EXERROR 1 /* a generic error */
> #define EXEXEC 2 /* command execution failed */
> #define EXEXIT 3 /* call exitshell(exitstatus) */
>
> . . .
>
> extern struct jmploc *handler;
> extern volatile sig_atomic_t exception;
>
> . . .
>
> extern volatile sig_atomic_t suppressint;
> extern volatile sig_atomic_t intpending;
>
> #define INTOFF suppressint++
> #define INTON { if (--suppressint == 0 && intpending) onint(); }
> #define is_int_on() suppressint
> #define SETINTON(s) suppressint = (s)
> #define FORCEINTON {suppressint = 0; if (intpending) onint();}
> #define SET_PENDING_INT intpending = 1
> #define CLEAR_PENDING_INT intpending = 0
> #define int_pending() intpending
>
> void exraise(int) __dead2;
> void onint(void) __dead2;
>
> /usr/src/bin/sh/error.c hAS:
>
> void
> exraise(int e)
> {
> INTOFF;
> if (handler == NULL)
> abort();
> exception = e;
> longjmp(handler->loc, 1);
> }
> . . .
> void
> onint(void)
> {
> sigset_t sigs;
>
> intpending = 0;
> sigemptyset(&sigs);
> sigprocmask(SIG_SETMASK, &sigs, NULL);
>
> /*
> * This doesn't seem to be needed, since main() emits a newline.
> */
> #if 0
> if (tcgetpgrp(0) == getpid())
> write(STDERR_FILENO, "\n", 1);
> #endif
> if (rootshell && iflag)
> exraise(EXINT);
> else {
> signal(SIGINT, SIG_DFL);
> kill(getpid(), SIGINT);
> _exit(128 + SIGINT);
> }
> }
>
> # grep setjmp /usr/src/bin/sh/*
> /usr/src/bin/sh/TOUR:so I implement it using setjmp and longjmp. The global variable
> /usr/src/bin/sh/error.h:#include <setjmp.h>
> /usr/src/bin/sh/error.h: * BSD setjmp saves the signal mask, which violates ANSI C and takes time,
> /usr/src/bin/sh/error.h: * so we use _setjmp instead.
> /usr/src/bin/sh/error.h:#define setjmp(jmploc) _setjmp(jmploc)
> /usr/src/bin/sh/eval.c: if (setjmp(jmploc.loc)) {
> /usr/src/bin/sh/eval.c: if (setjmp(jmploc.loc))
> /usr/src/bin/sh/eval.c: if (setjmp(jmploc.loc)) {
> /usr/src/bin/sh/eval.c: if (setjmp(jmploc.loc)) {
> /usr/src/bin/sh/eval.c: if (setjmp(jmploc.loc)) {
> /usr/src/bin/sh/histedit.c: if (setjmp(jmploc.loc)) {
> /usr/src/bin/sh/jobs.c: if (setjmp(jmploc.loc))
> /usr/src/bin/sh/main.c: * commands. The setjmp call sets up the location to jump to when an
> /usr/src/bin/sh/main.c: if (setjmp(main_handler.loc)) {
> /usr/src/bin/sh/parser.c: if (setjmp(jmploc.loc)) {
> /usr/src/bin/sh/parser.c: if (!setjmp(jmploc.loc)) {
> /usr/src/bin/sh/trap.c: if (!setjmp(loc1.loc)) {
> /usr/src/bin/sh/trap.c: if (!setjmp(loc2.loc)) {
> /usr/src/bin/sh/var.c: if (setjmp(jmploc.loc))
Here is the call chain that I was able to trace
in the newer core dump:
(most nested first to least nested last;
showing frame pointer and lr value pairs
and calls/return-places)
(ifree is not in the core file)
0xffffffffcc60: 0x0000ffffffffcca0 0x000000004054cd94
libc.so.7`__free:
0x4054cd90 <+144>: bl 0xad6fc ; ifree at jemalloc_jemalloc.c:1876
0x4054cd94 <+148>: adrp x9, 185
0xffffffffcca0: 0x0000ffffffffccc0 0x0000000000411300
sh`ckfree:
0x4112fc <+28>: bl 0x4027e0 ; symbol stub for: free
0x411300 <+32>: ldr x8, [x19, #0x970]
0xffffffffccc0: 0x0000ffffffffcd00 0x000000000040e6e8
sh`freejob:
0x40e6e4 <+136>: bl 0x4112e0 ; ckfree at memalloc.c:86
0x40e6e8 <+140>: adrp x8, 38
0xffffffffcd00: 0x0000ffffffffce20 0x000000000040f490
sh`forkshell:
0x40f48c <+856>: bl 0x40e65c ; freejob at jobs.c:463
0x40f490 <+860>: add x20, x20, #0x30 ; =0x30
0xffffffffce20: 0x0000ffffffffced0 0x0000000000405954
sh`evaltree:
0x405950 <+1204>: bl 0x40f134 ; forkshell at jobs.c:838
0x405954 <+1208>: cbnz w0, 0x4059dc ; <+1344> [inlined] evalpipe + 300 at eval.c:286
0xffffffffced0: 0x0000ffffffffd160 0x0000000000406e28
sh`evalbackcmd:
0x406e24 <+336>: bl 0x40549c ; evaltree at eval.c:193
0x406e28 <+340>: ldur w0, [x29, #-0x5c]
0xffffffffd160: 0x0000ffffffffd310 0x0000000000409324
sh`argstr:
0x409320 <+428>: bl 0x406cd4 ; evalbackcmd at eval.c:646
0x409324 <+432>: mov x0, x26
0xffffffffd310: 0x0000ffffffffd370 0x0000000000408fa8
sh`expandarg:
0x408fa4 <+108>: bl 0x409174 ; argstr at expand.c:267
0x408fa8 <+112>: cbz x19, 0x409020 ; <+232> at expand.c:236
0xffffffffd370: 0x0000ffffffffd5f0 0x0000000000407530
sh`exphere:
0x40752c <+212>: bl 0x408f38 ; expandarg at expand.c:225
0x407530 <+216>: ldr x8, [x20]
0xffffffffd5f0: 0x0000ffffffffd630 0x00000000004073f0
sh`expredir:
0x4073ec <+112>: bl 0x407458 ; exphere at eval.c:494
0x4073f0 <+116>: b 0x407428 ; <+172> at eval.c:535
0xffffffffd630: 0x0000ffffffffd960 0x0000000000406154
sh`evalcommand:
0x406150 <+744>: bl 0x40737c ; expredir at eval.c:532
0x406154 <+748>: ldur w27, [x29, #-0x68]
0xffffffffd960: 0x0000ffffffffda10 0x0000000000405570
sh`evaltree:
0x40556c <+208>: bl 0x405e68 ; evalcommand at eval.c:825
0x405570 <+212>: b 0x405a9c ; <+1536> at eval.c:623
0x405574 <+216>: ldr x8, [x24, #0x8]
0xffffffffda10: 0x0000ffffffffdac0 0x00000000004056b4
sh`evaltree:
0x4056b0 <+532>: bl 0x40549c ; <+0> at eval.c:193
0x4056b4 <+536>: ldr w8, [x19, #0x990]
0xffffffffdac0: 0x0000ffffffffdb70 0x0000000000405550
sh`evaltree:
0x40554c <+176>: bl 0x40549c ; <+0> at eval.c:193
0x405550 <+180>: ldr w8, [x22, #0x994]
0xffffffffdb70: 0x0000ffffffffdbf0 0x0000000000411034
sh`cmdloop:
0x411030 <+248>: bl 0x40549c ; evaltree at eval.c:193
0x411034 <+252>: mov w27, wzr
0xffffffffdbf0: 0x0000ffffffffdc50 0x0000000000410ea8
sh`main:
0x410ea4 <+656>: bl 0x410f38 ; cmdloop at main.c:199
0x410ea8 <+660>: adrp x8, 36
0xffffffffdc50: 0x0000ffffffffdc90 0x0000000000402f30
sh`__start:
0x402f2c <+356>: bl 0x410c14 ; main at main.c:97
0x402f30 <+360>: bl 0x402ae0 ; symbol stub for: exit
(_rtld is not in the core file)
0xffffffffdc90: 0x0000000000000000 0x0000000040434658
ld-elf.so.1`.rtld_start:
0x40434654 <+20>: bl 0x2e4c ; _rtld at rtld.c:339
0x40434658 <+24>: mov x8, x0
Some of the most nested possibly had returned. But the
forkshell / freejob general time frame seem to match
everything that I've seen.
[The details of the middle "eval*" related layers vary
from what I can tell.]
"register read" shows fp, lr, and pc majorly
messed up.
General Purpose Registers:
x0 = 0x0000000000000000
x1 = 0x00000000404346e8 ld-elf.so.1`_rtld_tlsdesc
x2 = 0x0000000040a00000
x3 = 0x0000000000000002
x4 = 0x0000000000000096
x5 = 0x0000000040a5fd10
x6 = 0x0000000000434c28 sh..bss + 9448
x7 = 0x0000000000434c28 sh..bss + 9448
x8 = 0x0000000000000001
x9 = 0x0000000000000000
x10 = 0x0000000000000000
x11 = 0x0000000040a350c0
x12 = 0x0000000040a0e770
x13 = 0x0000000000000072
x14 = 0x000000000000006f
x15 = 0x0000000000000010
x16 = 0x0000000000432340
x17 = 0x000000004054cd00 libc.so.7`__free at jemalloc_jemalloc.c:2007
x18 = 0x0000000000000000
x19 = 0x0000000000000000
x20 = 0x0000000000000000
x21 = 0x0000000000000001
x22 = 0x0000000040a5ff10
x23 = 0x0000ffffffffd190
x24 = 0x0000000000434000 sh..bss + 6336
x25 = 0x0000000000434000 sh..bss + 6336
x26 = 0x0000ffffffffcd00
x27 = 0x0000000000434000 sh..bss + 6336
x28 = 0x0000000040a6f5e0
fp = 0x0000000040a5fed8
lr = 0x0000000000000000
sp = 0x0000ffffffffcd60
pc = 0x0000000000000000
cpsr = 0x60000000
sp is also odd by being in the middle of the stack range
for:
0xffffffffcd00: 0x0000ffffffffce20 0x000000000040f490
sh`forkshell:
0x40f48c <+856>: bl 0x40e65c ; freejob at jobs.c:463
0x40f490 <+860>: add x20, x20, #0x30 ; =0x30
0xffffffffce20: 0x0000ffffffffced0 0x0000000000405954
sh`evaltree:
0x405950 <+1204>: bl 0x40f134 ; forkshell at jobs.c:838
0x405954 <+1208>: cbnz w0, 0x4059dc ; <+1344> [inlined] evalpipe + 300 at eval.c:286
NOTE: The fork happened earlier in sh`forkshell and this
is the child process that has the odd value.
[It leaves me wondering if 0x0000ffffffffcd60 is a stack
pointer value associated with a call to something
earlier than the sh`forkshell call that is called by
sh`forkshell .]
Also: in the ones with only a small section of the junk
areas the equivalent of:
0xffffffffcd00: 0x0000ffffffffce20 0x000000000040f490
is the largest addressed non-junk content in the area
and the equivalent of:
0xffffffffce20: 0x0000ffffffffced0 0x0000000000405954
would instead show zeros or "random" garbage values.
In this case, however that range of the stack looks like:
. . .
0xffffffffcd00: 0x0000ffffffffce20 0x000000000040f490
0xffffffffcd10: 0x0000ffffffffcd00 0x0000000000434000
0xffffffffcd20: 0x0000000000434000 0x0000ffffffffd190
0xffffffffcd30: 0x0000000040a5ff10 0x0000000000000001
0xffffffffcd40: 0x0000000000000000 0x0000000000000000
0xffffffffcd50: 0x0000000040a5fed8 0x0000000000000000
0xffffffffcd60: 0x0000ffffffffcf90 0x00000000004068e4
0xffffffffcd70: 0x0000000000000000 0x827a80ccb3228215
0xffffffffcd80: 0x0000000040a6f5c0 0x0000000000434000
0xffffffffcd90: 0x0000000000434000 0x0000000000434000
0xffffffffcda0: 0x0000000000434000 0x0000000000434000
0xffffffffcdb0: 0x0000000040a6f638 0x0000000000000000
0xffffffffcdc0: 0x0000000040a350c0 0x0000000000434000
0xffffffffcdd0: 0x0000ffffffffce20 0x000000000040f1c8
0xffffffffcde0: 0x0000000000000003 0x0000000040a350c0
0xffffffffcdf0: 0x0000000040a6f5c0 0x0000000000434000
0xffffffffce00: 0x0000000000434000 0x0000000040a6f638
0xffffffffce10: 0x0000000000000000 0x0000000000434000
0xffffffffce20: 0x0000ffffffffced0 0x0000000000405954
. . .
Interestingly 0xffffffffcd60 reported for the sp looks
like it has a frame-pointer/lr-value pair that does not
fit with the overall call chain that ties together but
is some fragment of a prior(?) call chain:
0xffffffffcd60: 0x0000ffffffffcf90 0x00000000004068e4
sh`evalcommand:
0x4068e0 <+2680>: bl 0x402be0 ; symbol stub for: _setjmp
0x4068e4 <+2684>: cbz w0, 0x406a04 ; <+2972> at eval.c:1101
It looks like it is a record from calling _setjmp in
sh`evalcommand .
(sh uses _setjmp/_longjmp via macros that turn
into them for setjmp/longjmp references in
sh's source code.)
Interestingly (likely junk relative to the above):
0xffffffffcf90: 0x0000000000000000 0x0000000000432000
where:
(lldb) dis -s 0x0000000000432000
sh`__frame_dummy_init_array_entry:
0x432000 <+0>: .long 0x00402fac ; unknown opcode
0x432004 <+4>: .long 0x00000000 ; unknown opcode
(lldb) dis -s __frame_dummy_init_array_entry -c32
sh`frame_dummy:
0x402fac <+0>: adrp x8, 48
0x402fb0 <+4>: adrp x1, 48
0x402fb4 <+8>: ldr x8, [x8, #0x30]
0x402fb8 <+12>: ldr x1, [x1, #0x228]
0x402fbc <+16>: cmp x8, #0x0 ; =0x0
0x402fc0 <+20>: ccmp x1, #0x0, #0x4, ne
0x402fc4 <+24>: b.ne 0x402fcc ; <+32>
0x402fc8 <+28>: ret
0x402fcc <+32>: adrp x0, 48
0x402fd0 <+36>: add x0, x0, #0x30 ; =0x30
0x402fd4 <+40>: br x1
sh`lookupalias:
. . .
Ohter notes:
Some examples of funcnest==0 others have (e.g.) funcnest==2.
This one had funcnest==0.
commandname varies. In this case it was:
(lldb) print commandname
(char *) $74 = 0x0000ffffffffe210 "/usr/obj/portswork/usr/ports/devel/aarch64-none-elf-gcc/work/gcc-6.3.0/libiberty/configure"
Other examples include:
(lldb) print commandname
(char *) $0 = 0x0000ffffffffdc40 "/usr/obj/portswork/usr/ports/devel/aarch64-none-elf-gcc/work/gcc-6.3.0/fixincludes/configure"
(lldb) print commandname
(char *) $0 = 0x0000ffffffffe498 "/usr/obj/portswork/usr/ports/devel/aarch64-none-elf-gcc/work/gcc-6.3.0/libiberty/../config.sub"
(lldb) print commandname
(char *) $0 = 0x0000ffffffffe398 "../libtool"
So far the forkshell/fork/freejob and associated materials always seeming
to be involved is all that I've found that is common (at least that is
suggested by what I see so far) within the sh context.
> Other notes:
>
> As a personal investigation I've temporarily changed to using
> something not fully generic but based on gic-400 specifics:
>
> # svnlite diff /usr/src/sys/arm/arm/gic.c
> Index: /usr/src/sys/arm/arm/gic.c
> ===================================================================
> --- /usr/src/sys/arm/arm/gic.c (revision 312982)
> +++ /usr/src/sys/arm/arm/gic.c (working copy)
> @@ -672,9 +672,13 @@
>
> if (irq >= sc->nirqs) {
> #ifdef GIC_DEBUG_SPURIOUS
> +#define EXPECTED_SPURIOUS_IRQ 1023
> + if (irq != EXPECTED_SPURIOUS_IRQ) {
> device_printf(sc->gic_dev,
> - "Spurious interrupt detected: last irq: %d on CPU%d\n",
> + "Spurious interrupt %d detected of %d: last irq: %d on CPU%d\n",
> + irq, sc->nirqs,
> sc->last_irq[PCPU_GET(cpuid)], PCPU_GET(cpuid));
> + }
> #endif
> return (FILTER_HANDLED);
> }
> @@ -720,6 +724,16 @@
> if (irq < sc->nirqs)
> goto dispatch_irq;
>
> + if (irq != EXPECTED_SPURIOUS_IRQ) {
> +#undef EXPECTED_SPURIOUS_IRQ
> +#ifdef GIC_DEBUG_SPURIOUS
> + device_printf(sc->gic_dev,
> + "Spurious end interrupt %d detected of %d: last irq: %d on CPU%d\n",
> + irq, sc->nirqs,
> + sc->last_irq[PCPU_GET(cpuid)], PCPU_GET(cpuid));
> +#endif
> + }
> +
> return (FILTER_HANDLED);
> }
>
>
> The result was no notices of Spurious interrupts have been generated:
> All of the odd interrupts were the special 1023 value.
>
> [As far as I could tell from the code the configuration is such that
> 1022 should not be generated --and were not. 1020 and 1021 are
> reserved and should not be generated.]
===
Mark Millard
markmi at dsl-only.net
More information about the freebsd-arm
mailing list