head -r323246 Pine64+ 2GB boot time context: acquiring blockable sleep lock with spinlock or critical section held for data_abort calling pmap_fault calling __mtx_lock_flags
Mark Millard
markmi at dsl-only.net
Mon Sep 11 09:21:38 UTC 2017
[I got another blockable sleep lock panic during
the Pine64+ 2GB boot, this time with ddb> input
working. I show both the older example and the
new one.]
On 2017-Sep-10, at 3:25 PM, Mark Millard <markmi at dsl-only.net> wrote:
> [I got a boot-time panic with a debug kernel that
> reported a "acquiring blockable sleep lock with
> spinlock or critical section held (sleep mutex)".
> This was for data_abort calling pmap_fault calling
> __mtx_lock_flags . I first include prior non-debug
> kernel reports in case they are related.]
>
> . . .
>
> . . .
> Release APs
> APs not started
> CPU 0: ARM Cortex-A53 r0p4 affinity: 0
> Instruction Set Attributes 0 = <AES+PMULL,SHA1,SHA2,CRC32>
> Instruction Set Attributes 1 = <0>
> Processor Features 0 = <AdvSIMD,Float,EL3 32,EL2 32,EL1 32,EL0 32>
> Processor Features 1 = <0>
> Memory Model Features 0 = <4k Granule,64k Granule,MixedEndian,S/NS Mem,16bit ASID,1TB PA>
> Memory Model Features 1 = <>
> Debug Features 0 = <2 CTX Breakpoints,4 Watchpoints,6 Breakpoints,PMUv3,Debug v8>
> Debug Features 1 = <0>
> Auxiliary Features 0 = <0>
> Auxiliary Features 1 = <0>
> CPU 1: (null) (null) r0p0 affinity: 0
> CPU 2: (null) (null) r0p0 affinity: 0
> CPU 3: (null) (null) r0p0 affinity: 0
> panic: acquiring blockable sleep lock with spinlock or critical section held (sleep mutex) pmap @ /usr/src/sys/arm64/arm64/pmap.c:4710
> cpuid = 0
> time = 13
> KDB: stack backtrace:
> db_trace_self() at db_trace_self_wrapper+0x28
> pc = 0xffff0000005efc78 lr = 0xffff000000088094
> sp = 0xffff000069850080 fp = 0xffff000069850290
>
> db_trace_self_wrapper() at vpanic+0x164
> pc = 0xffff000000088094 lr = 0xffff00000031764c
> sp = 0xffff0000698502a0 fp = 0xffff000069850310
>
> vpanic() at kassert_panic+0x15c
> pc = 0xffff00000031764c lr = 0xffff0000003174e4
> sp = 0xffff000069850320 fp = 0xffff0000698503e0
>
> kassert_panic() at witness_checkorder+0x160
> pc = 0xffff0000003174e4 lr = 0xffff000000374990
> sp = 0xffff0000698503f0 fp = 0xffff000069850470
>
> witness_checkorder() at __mtx_lock_flags+0xa8
> pc = 0xffff000000374990 lr = 0xffff0000002f8b7c
> sp = 0xffff000069850480 fp = 0xffff0000698504b0
>
> __mtx_lock_flags() at pmap_fault+0x40
> pc = 0xffff0000002f8b7c lr = 0xffff000000606994
> sp = 0xffff0000698504c0 fp = 0xffff0000698504e0
>
> pmap_fault() at data_abort+0xb8
> pc = 0xffff000000606994 lr = 0xffff000000608a9c
> sp = 0xffff0000698504f0 fp = 0xffff0000698505a0
>
> data_abort() at do_el1h_sync+0xfc
> pc = 0xffff000000608a9c lr = 0xffff0000006088f0
> sp = 0xffff0000698505b0 fp = 0xffff0000698505e0
>
> do_el1h_sync() at handle_el1h_sync+0x74
> pc = 0xffff0000006088f0 lr = 0xffff0000005f1874
> sp = 0xffff0000698505f0 fp = 0xffff000069850700
>
> handle_el1h_sync() at sched_switch+0x2a8
> pc = 0xffff0000005f1874 lr = 0xffff00000033f0c8
> sp = 0xffff000069850710 fp = 0xffff0000698507f0
>
> sched_switch() at mi_switch+0x1b8
> pc = 0xffff00000033f0c8 lr = 0xffff00000032161c
> sp = 0xffff000069850800 fp = 0xffff000069850820
>
> mi_switch() at taskqgroup_binder+0x7c
> pc = 0xffff00000032161c lr = 0xffff00000035510c
> sp = 0xffff000069850830 fp = 0xffff000069850860
>
> taskqgroup_binder() at gtaskqueue_run_locked+0x104
> pc = 0xffff00000035510c lr = 0xffff000000354f74
> sp = 0xffff000069850870 fp = 0xffff0000698508e0
>
> gtaskqueue_run_locked() at gtaskqueue_thread_loop+0x9c
> pc = 0xffff000000354f74 lr = 0xffff000000354d10
> sp = 0xffff0000698508f0 fp = 0xffff000069850910
>
> gtaskqueue_thread_loop() at fork_exit+0x7c
> pc = 0xffff000000354d10 lr = 0xffff0000002dbd3c
> sp = 0xffff000069850920 fp = 0xffff000069850950
>
> fork_exit() at fork_trampoline+0x10
> pc = 0xffff0000002dbd3c lr = 0xffff000000608664
> sp = 0xffff000069850960 fp = 0x0000000000000000
>
> KDB: enter: panic
> [ thread pid 0 tid 100058 ]
> Stopped at sched_switch+0x2b8: ldrb w9, [x8, #894]
> db>
>
> Unfortunately it was not taking console input so that is
> all I got.
From the new example:
CPU 1: (null) (null) r0p0 affinity: 0
CPU 2: (null) (null) r0p0 affinity: 0
CPU 3: (null) (null) r0p0 affinity: 0
panic: acquiring blockable sleep lock with spinlock or critical section held (sleep mutex) pmap @ /usr/src/sys/arm64/arm64/pmap.c:4710
cpuid = 0
time = 13
KDB: stack backtrace:
db_trace_self() at db_trace_self_wrapper+0x28
pc = 0xffff0000005efc78 lr = 0xffff000000088094
sp = 0xffff000069850080 fp = 0xffff000069850290
db_trace_self_wrapper() at vpanic+0x164
pc = 0xffff000000088094 lr = 0xffff00000031764c
sp = 0xffff0000698502a0 fp = 0xffff000069850310
vpanic() at kassert_panic+0x15c
pc = 0xffff00000031764c lr = 0xffff0000003174e4
sp = 0xffff000069850320 fp = 0xffff0000698503e0
kassert_panic() at witness_checkorder+0x160
pc = 0xffff0000003174e4 lr = 0xffff000000374990
sp = 0xffff0000698503f0 fp = 0xffff000069850470
witness_checkorder() at __mtx_lock_flags+0xa8
pc = 0xffff000000374990 lr = 0xffff0000002f8b7c
sp = 0xffff000069850480 fp = 0xffff0000698504b0
__mtx_lock_flags() at pmap_fault+0x40
pc = 0xffff0000002f8b7c lr = 0xffff000000606994
sp = 0xffff0000698504c0 fp = 0xffff0000698504e0
pmap_fault() at data_abort+0xb8
pc = 0xffff000000606994 lr = 0xffff000000608a9c
sp = 0xffff0000698504f0 fp = 0xffff0000698505a0
data_abort() at do_el1h_sync+0xfc
pc = 0xffff000000608a9c lr = 0xffff0000006088f0
sp = 0xffff0000698505b0 fp = 0xffff0000698505e0
do_el1h_sync() at handle_el1h_sync+0x74
pc = 0xffff0000006088f0 lr = 0xffff0000005f1874
sp = 0xffff0000698505f0 fp = 0xffff000069850700
handle_el1h_sync() at sched_switch+0x2a8
pc = 0xffff0000005f1874 lr = 0xffff00000033f0c8
sp = 0xffff000069850710 fp = 0xffff0000698507f0
sched_switch() at mi_switch+0x1b8
pc = 0xffff00000033f0c8 lr = 0xffff00000032161c
sp = 0xffff000069850800 fp = 0xffff000069850820
mi_switch() at taskqgroup_binder+0x7c
pc = 0xffff00000032161c lr = 0xffff00000035510c
sp = 0xffff000069850830 fp = 0xffff000069850860
taskqgroup_binder() at gtaskqueue_run_locked+0x104
pc = 0xffff00000035510c lr = 0xffff000000354f74
sp = 0xffff000069850870 fp = 0xffff0000698508e0
gtaskqueue_run_locked() at gtaskqueue_thread_loop+0x9c
pc = 0xffff000000354f74 lr = 0xffff000000354d10
sp = 0xffff0000698508f0 fp = 0xffff000069850910
gtaskqueue_thread_loop() at fork_exit+0x7c
pc = 0xffff000000354d10 lr = 0xffff0000002dbd3c
sp = 0xffff000069850920 fp = 0xffff000069850950
fork_exit() at fork_trampoline+0x10
pc = 0xffff0000002dbd3c lr = 0xffff000000608664
sp = 0xffff000069850960 fp = 0x0000000000000000
KDB: enter: panic
[ thread pid 0 tid 100058 ]
Stopped at sched_switch+0x2b8: ldrb w9, [x8, #894]
It turns our that x8 is reported as holding the value zero:
db> show regs
No such command; use "help" to list available commands
db> show reg
spsr 0x9600000440000085
x0 0xffff000000ac1000 __pcpu+0x200
x1 0x4
x2 0xffff00000068a5cb $d.4+0x15c
x3 0x218 $d.9+0x1d8
x4 0
x5 0x11
x6 0xffff000000a45f20
x7 0x40 $d.14
x8 0
x9 0x5
x10 0xffff0000009a7d88 tdq_cpu+0xe08
x11 0x18
x12 0x1ddc88
x13 0x7ff707d0
x14 0
x15 0x7ff6e010
x16 0x2af8 $d.1+0x122e
x17 0x27c0 $d.1+0xef6
x18 0xffff000069850790
x19 0xfffffd0001415a80
x20 0xffff0000009a7c80 tdq_cpu+0xd00
x21 0xffff0000009a6f80 tdq_cpu
x22 0xffff0000009a7d1d tdq_cpu+0xd9d
x23 0x1
x24 0
x25 0xffff0000009a6f80 tdq_cpu
x26 0xffff000000c85000 dpcpu+0x158
x27 0xffff000000c85000 dpcpu+0x158
x28 0
x29 0xffff0000698507f0
lr 0xffff00000033f0cc sched_switch+0x2ac
elr 0xffff00000033f0dc sched_switch+0x2bc
sp 0xffff000069850790
sched_switch+0x2b8: ldrb w9, [x8, #894]
db> show lockchain
thread 100058 (pid 0, softirq_1) is on a run queue
db> show locks
db> show lock
db> show locktree
db> show sleepqueue
db> show sleepq
ddb> show sleepchain
thread 100058 (pid 0, softirq_1) is on a run queue
db> show alllocks
Process 0 (kernel) thread 0xffff000000acd500 (100000)
exclusive sleep mutex Giant (Giant) r = 0 (0xffff000000c5d860) locked @ /usr/src/sys/kern/kern_module.c:116
db> show allchains
chain 1:
thread 100049 (pid 18, vmdaemon) sleeping on 0xffff000000aa811c "psleep"
chain 2:
thread 100054 (pid 17, laundry: dom0) sleeping on 0xffff000000aa80c4 "launds"
chain 3:
thread 100055 (pid 17, uma) sleeping on 0xffff000000aa7b68 "umarcl"
chain 4:
thread 100047 (pid 16, mmcsd0: mmc/sd card) sleeping on 0xfffffd0000638800 "mmcsd disk jobqueue"
chain 5:
thread 100046 (pid 15, soaiod4) sleeping on 0xffff000000a9dbe4 "-"
chain 6:
thread 100045 (pid 9, soaiod3) sleeping on 0xffff000000a9dbe4 "-"
chain 7:
thread 100044 (pid 8, soaiod2) sleeping on 0xffff000000a9dbe4 "-"
chain 8:
thread 100043 (pid 7, soaiod1) sleeping on 0xffff000000a9dbe4 "-"
chain 9:
thread 100036 (pid 5, sctp_iterator) sleeping on 0xffff000000c7bf20 "waiting_for_work"
chain 10:
thread 100028 (pid 14, usbus0) sleeping on 0xffff000040925358 "-"
chain 11:
thread 100029 (pid 14, usbus0) sleeping on 0xffff0000409253b0 "-"
chain 12:
thread 100030 (pid 14, usbus0) sleeping on 0xffff000040925408 "-"
chain 13:
thread 100031 (pid 14, usbus0) sleeping on 0xffff000040925460 "-"
chain 14:
thread 100032 (pid 14, usbus0) sleeping on 0xffff0000409254b8 "-"
chain 15:
thread 100025 (pid 4, doneq0) sleeping on 0xffff000000878280 "-"
chain 16:
thread 100042 (pid 4, scanner) sleeping on 0xffff0000008780c8 "-"
chain 17:
thread 100024 (pid 3, crypto returns) sleeping on 0xffff000000aa6008 "crypto_ret_wait"
chain 18:
thread 100023 (pid 2, crypto) sleeping on 0xffff000000aa5ec0 "crypto_wait"
chain 19:
thread 100019 (pid 13, g_event) sleeping on 0xffff000000c6a450 "-"
chain 20:
thread 100020 (pid 13, g_up) sleeping on 0xffff000000c6a460 "-"
chain 21:
thread 100021 (pid 13, g_down) sleeping on 0xffff000000c6a458 "-"
chain 22:
thread 100014 (pid 12, swi4: clock (0)) blocked on lock 0xffff000000c5d860 (sleep mutex) "Giant"
thread 100000 (pid 0, swapper) is on a run queue
chain 23:
thread 100002 (pid 1, kernel) blocked on lock 0xffff000000c5d860 (sleep mutex) "Giant"
thread 100000 (pid 0, swapper) is on a run queue
chain 24:
thread 100001 (pid 10, audit) sleeping on 0xffff000000c808e0 "audit_worker_cv"
chain 25:
thread 100009 (pid 0, thread taskq) sleeping on 0xfffffd00005f2b00 "-"
chain 26:
thread 100010 (pid 0, aiod_kick taskq) sleeping on 0xfffffd00005f2a00 "-"
chain 27:
thread 100012 (pid 0, kqueue_ctx taskq) sleeping on 0xfffffd00005f2700 "-"
chain 28:
thread 100022 (pid 0, firmware taskq) sleeping on 0xfffffd00005f2000 "-"
chain 29:
thread 100037 (pid 0, acpi_task_0) sleeping on 0xfffffd00005f1400 "-"
chain 30:
thread 100038 (pid 0, acpi_task_1) sleeping on 0xfffffd00005f1400 "-"
chain 31:
thread 100039 (pid 0, acpi_task_2) sleeping on 0xfffffd00005f1400 "-"
chain 32:
thread 100041 (pid 0, CAM taskq) sleeping on 0xfffffd00005f1e00 "-"
chain 33:
thread 100056 (pid 0, if_config_tqg_0) sleeping on 0xfffffd00005f1300 "-"
chain 34:
thread 100057 (pid 0, softirq_0) sleeping on 0xfffffd00005f1200 "-"
chain 35:
thread 100059 (pid 0, softirq_2) sleeping on 0xfffffd00005f1000 "-"
chain 36:
thread 100060 (pid 0, softirq_3) sleeping on 0xfffffd00005f0e00 "-"
The code for:
panic: acquiring blockable sleep lock with spinlock or critical section held (sleep mutex) pmap @ /usr/src/sys/arm64/arm64/pmap.c:4710
is the PMAP_LOCK in:
int
pmap_fault(pmap_t pmap, uint64_t esr, uint64_t far)
{
#ifdef SMP
uint64_t par;
#endif
switch (ESR_ELx_EXCEPTION(esr)) {
case EXCP_DATA_ABORT_L:
case EXCP_DATA_ABORT:
break;
default:
return (KERN_FAILURE);
}
#ifdef SMP
PMAP_LOCK(pmap);
switch (esr & ISS_DATA_DFSC_MASK) {
case ISS_DATA_DFSC_TF_L0:
case ISS_DATA_DFSC_TF_L1:
case ISS_DATA_DFSC_TF_L2:
case ISS_DATA_DFSC_TF_L3:
/* Ask the MMU to check the address */
if (pmap == kernel_pmap)
par = arm64_address_translate_s1e1r(far);
else
par = arm64_address_translate_s1e0r(far);
/*
* If the translation was successful the address was invalid
* due to a break-before-make sequence. We can unlock and
* return success to the trap handler.
*/
if (PAR_SUCCESS(par)) {
PMAP_UNLOCK(pmap);
return (KERN_SUCCESS);
}
break;
default:
break;
}
PMAP_UNLOCK(pmap);
#endif
return (KERN_FAILURE);
}
===
Mark Millard
markmi at dsl-only.net
More information about the freebsd-toolchain
mailing list