FYI: Pine64+ 2GB (so A64) booting and non-debug vs. debug kernel: nondebug+INVARIANTS+INVARIANT_SUPPORT sufficient to boot
    Mark Millard 
    markmi at dsl-only.net
       
    Fri Sep 15 04:14:59 UTC 2017
    
    
  
[I've traced the failure back to the bad pointer
value that is in place when it was put to use. I
omit the prior details of earlier explorations
that got me into this area.]
Summary of the following investigations:
When the witness or invariant failure during:
taskqgroup_adjust_softirq(0)...
happens it traces back to the condition:
        pcpu_find(cpu)->pc_curthread == NULL
in the code:
        ctd = pcpu_find(cpu)->pc_curthread;
for cpu == 1 in tdq_notify (but inlined). It then
attempts to evaluate:
ctd->td_priority
which gets a data_abort but it is during when
blocked_lock is the container lock for the
thread.
In the witness included case this leads to:
panic: acquiring blockable sleep lock with spinlock or critical section held (sleep mutex) pmap @ /usr/src/sys/arm64/arm64/pmap.c:4710
cpuid = 0
time = 13
but that is a later consequence of the earlier
problem.
I'm not sure why pcpu_find(cpu)->pc_curthread
is still NULL but since the behavior is intermittent
for debug kernel builds it suggests a race for an
update that was initiated but not always finished
in time.
(I've had occasions of hours of reboots to try
to get a failure but mostly only needing a few.
I've not run into a long sequence of failures to
boot for a debug kernel but have had some
back-to-back ones.)
Supporting detail that lead to the above:
int
pmap_fault(pmap_t pmap, uint64_t esr, uint64_t far)
The far (fault address register) argument to pmap_fault is the 
rd one (x2 below):
ffff000000606954 <pmap_fault> stp       x22, x21, [sp, #-48]!
ffff000000606958 <pmap_fault+0x4> stp   x20, x19, [sp, #16]
ffff00000060695c <pmap_fault+0x8> stp   x29, x30, [sp, #32]
ffff000000606960 <pmap_fault+0xc> add   x29, sp, #0x20
ffff000000606964 <pmap_fault+0x10> mov  x20, x2
ffff000000606968 <pmap_fault+0x14> mov  x22, x1
ffff00000060696c <pmap_fault+0x18> mov  x21, x0
For the failing call sequence far ends up stored on the stack
via the x20 save-to-stack in:
ffff0000002f8c0c <__mtx_lock_flags> stp x24, x23, [sp, #-64]!
ffff0000002f8c10 <__mtx_lock_flags+0x4> stp     x22, x21, [sp, #16]
ffff0000002f8c14 <__mtx_lock_flags+0x8> stp     x20, x19, [sp, #32]
ffff0000002f8c18 <__mtx_lock_flags+0xc> stp     x29, x30, [sp, #48]
ffff0000002f8c1c <__mtx_lock_flags+0x10> add    x29, sp, #0x30
Stack segment with a little context:
0xffff000069850470:     ffff0000698504b0                ffff0000002f8b80
0xffff000069850480:     ffff000000c6a528                0
0xffff000069850490:     96000004                        ffff000000c6a658
0xffff0000698504a0:     37e                             ffff000000c6a670
0xffff0000698504b0:     ffff0000698504e0                ffff000000606998
So it appears:
pmap_fault`esr == 0x96000004
pmap_fault`pmap == 0xffff000000c6a658 (vmspace0+0x130)
pmap_fault`far == 0x37e
I'll note that 0x37e = 894 so it matches up with
x8 == 0x0 for the likes of:
elr         0xffff00000033f0dc  sched_switch+0x2bc
. . .
ssched_switch+0x2b8:     ldrb    w9, [x8, #894]
matching:
db> show reg
. . .
x8                           0
. . .
So apparently sched_switch tried to access 0x37e
db> x/gx 0xffff000000606998   
pmap_fault+0x44:        f100111f927e0ec8
Part of the back trace is (for the example
debug kernel):
kassert_panic() at witness_checkorder+0x160
         pc = 0xffff0000003174e4  lr = 0xffff000000374990
         sp = 0xffff0000698503f0  fp = 0xffff000069850470
witness_checkorder() at __mtx_lock_flags+0xa8
         pc = 0xffff000000374990  lr = 0xffff0000002f8b7c
         sp = 0xffff000069850480  fp = 0xffff0000698504b0
__mtx_lock_flags() at pmap_fault+0x40
         pc = 0xffff0000002f8b7c  lr = 0xffff000000606994
         sp = 0xffff0000698504c0  fp = 0xffff0000698504e0
        
pmap_fault() at data_abort+0xb8
         pc = 0xffff000000606994  lr = 0xffff000000608a9c
         sp = 0xffff0000698504f0  fp = 0xffff0000698505a0
data_abort() at do_el1h_sync+0xfc
         pc = 0xffff000000608a9c  lr = 0xffff0000006088f0
         sp = 0xffff0000698505b0  fp = 0xffff0000698505e0
do_el1h_sync() at handle_el1h_sync+0x74
         pc = 0xffff0000006088f0  lr = 0xffff0000005f1874
         sp = 0xffff0000698505f0  fp = 0xffff000069850700
handle_el1h_sync() at sched_switch+0x2a8
         pc = 0xffff0000005f1874  lr = 0xffff00000033f0c8
         sp = 0xffff000069850710  fp = 0xffff0000698507f0
sched_switch() at mi_switch+0x1b8
         pc = 0xffff00000033f0c8  lr = 0xffff00000032161c
         sp = 0xffff000069850800  fp = 0xffff000069850820
mi_switch() at taskqgroup_binder+0x7c
         pc = 0xffff00000032161c  lr = 0xffff00000035510c
         sp = 0xffff000069850830  fp = 0xffff000069850860
taskqgroup_binder() at gtaskqueue_run_locked+0x104
         pc = 0xffff00000035510c  lr = 0xffff000000354f74
         sp = 0xffff000069850870  fp = 0xffff0000698508e0
        
gtaskqueue_run_locked() at gtaskqueue_thread_loop+0x9c
         pc = 0xffff000000354f74  lr = 0xffff000000354d10
         sp = 0xffff0000698508f0  fp = 0xffff000069850910
gtaskqueue_thread_loop() at fork_exit+0x7c
         pc = 0xffff000000354d10  lr = 0xffff0000002dbd3c
         sp = 0xffff000069850920  fp = 0xffff000069850950
fork_exit() at fork_trampoline+0x10
         pc = 0xffff0000002dbd3c  lr = 0xffff000000608664
         sp = 0xffff000069850960  fp = 0x0000000000000000
Note:
ffff00000033f0bc <sched_switch+0x29c> ldr       w0, [x19, #1292]
ffff00000033f0c0 <sched_switch+0x2a0> ldrb      w27, [x19, #894]
ffff00000033f0c4 <sched_switch+0x2a4> str       w0, [sp, #12]
ffff00000033f0c8 <sched_switch+0x2a8> bl        ffff000000359708 <pcpu_find>
ffff00000033f0cc <sched_switch+0x2ac> ldr       x8, [x0]
ffff00000033f0d0 <sched_switch+0x2b0> mov       w11, w27
ffff00000033f0d4 <sched_switch+0x2b4> adrp      x27, ffff000000c85000 <dpcpu+0x158>
ffff00000033f0d8 <sched_switch+0x2b8> ldrb      w9, [x8, #894]
ffff00000033f0dc <sched_switch+0x2bc> cmp       w11, w9
ffff00000033f0e0 <sched_switch+0x2c0> b.cs      ffff00000033f150 <sched_switch+0x330>  // b.hs, b.nlast
This is code for the later part of what is
shown below:
static void
tdq_notify(struct tdq *tdq, struct thread *td)
{
        struct thread *ctd;
        int pri;
        int cpu;
        if (tdq->tdq_ipipending)
                return;
        cpu = td_get_sched(td)->ts_cpu;
        pri = td->td_priority;
        ctd = pcpu_find(cpu)->pc_curthread;
        if (!sched_shouldpreempt(pri, ctd->td_priority, 1))
                return;
. . .
}
(Where: sched_shouldpreempt has been inlined and
some of it is interlaced.)
The failing [x8, #894] is the attempt to access: ctd->td_priority
In other words: ctd == NULL resulted from the pcpu_find
(i.e., x8 == 0 ). As for how it got to be zero:
db> show reg
spsr        0x9600000440000085
x0          0xffff000000ac1000  __pcpu+0x200
. . .
db> x/gx cpuid_to_pcpu,32
cpuid_to_pcpu:  ffff000000ac0e00                ffff000000ac1000
cpuid_to_pcpu+0x10:     ffff000000ac1200                ffff000000ac1400
cpuid_to_pcpu+0x20:     0                               0
. . .
(So cpu == 1 .)
db> x/gx 0xffff000000ac1000,8
__pcpu+0x200:   0                               fffffd00005dda80
__pcpu+0x210:   0                               0
__pcpu+0x220:   0                               0
__pcpu+0x230:   100000000                       ffff000000ac1200
Thus it seems that at the time for cpu==1 :
        pcpu_find(cpu)->pc_curthread == NULL
(at __pcpu+0x200).
===
Mark Millard
markmi at dsl-only.net
    
    
More information about the freebsd-arm
mailing list