Official armv7 PkgBase kernel-NODEBUG installation's USB2 boot gets "Fatal kernel mode data abort: 'Alignment Fault' on write" very early, at least on an OrangePi+ 2ed
Date: Thu, 07 Nov 2024 09:42:09 UTC
Note: Unfortunately, the panics here are too early for a
dump device to be available.
Context started PkgBase upgrade from:
# uname -apKU
FreeBSD OPiP2E-RPi2v1p1 15.0-CURRENT FreeBSD 15.0-CURRENT main-n272821-37798b1d5dd1 GENERIC-NODEBUG arm armv7 1500025 1500025
Installed packages to be UPGRADED:
FreeBSD-dtb: 15.snap20241009161500 -> 15.snap20241028121139 [base]
FreeBSD-kernel-generic: 15.snap20241011221604 -> 15.snap20241106134422 [base]
FreeBSD-kernel-generic-dbg: 15.snap20241011221604 -> 15.snap20241106134422 [base]
FreeBSD-kernel-generic-mmccam: 15.snap20241011221604 -> 15.snap20241106134422 [base]
FreeBSD-kernel-generic-mmccam-dbg: 15.snap20241011221604 -> 15.snap20241106134422 [base]
FreeBSD-kernel-generic-nodebug: 15.snap20241011221604 -> 15.snap20241106134422 [base]
FreeBSD-kernel-generic-nodebug-dbg: 15.snap20241011221604 -> 15.snap20241106134422 [base]
FreeBSD-src-sys: 15.snap20241011221604 -> 15.snap20241106160110 [base]
(Those were installed but the FreeBSD-dtb had linux 6.4
dtb files, not the 6.8 ones. 6.8 ones from a personal build
were copied to where they need to be. I've separately
reported the 6.4 vs. 6.8 issue.)
# ~/pkgbase-snapshot-list.sh
Via pkg-static info -C -x '^FreeBSD-' . . .
1 FreeBSD-*-15.snap20241106160110
6 FreeBSD-*-15.snap20241106134422
1 FreeBSD-*-15.snap20241028121139
3 FreeBSD-*-15.snap20241011221604
2 FreeBSD-*-15.snap20241011210446
38 FreeBSD-*-15.snap20241011182434
4 FreeBSD-*-15.snap20241011073851
5 FreeBSD-*-15.snap20241010141501
1 FreeBSD-*-15.snap20241010120743
296 FreeBSD-*-15.snap20241009161500
Instead via /var/cache/pkg/*.snap*.pkg . . .
1 FreeBSD-*-15.snap20241106160110
6 FreeBSD-*-15.snap20241106134422
1 FreeBSD-*-15.snap20241028121139
10 FreeBSD-*-15.snap20241011221604
2 FreeBSD-*-15.snap20241011210446
38 FreeBSD-*-15.snap20241011182434
4 FreeBSD-*-15.snap20241011073851
5 FreeBSD-*-15.snap20241010141501
1 FreeBSD-*-15.snap20241010120743
297 FreeBSD-*-15.snap20241009161500
The failure (kernel-GENERIC-NODEBUG):
. . .
Root mount waiting for: usbus3 CAM
Fatal kernel mode data abort: 'Alignment Fault' on write
trapframe: 0xc6c9ac10
FSR=00000801, FAR=db23209b, spsr=20000013
r0 =db232080, r1 =00000000, r2 =00000006, r3 =00000024
r4 =db19e280, r5 =00000000, r6 =00000001, r7 =00000006
r8 =c6c9ad20, r9 =c0b7973c, r10=c092074c, r11=c6c9acb8
r12=00000000, ssp=c6c9aca0, slr=c01b01d8, pc =c01aff88
panic: Fatal abort
cpuid = 1
time = 3
KDB: stack backtrace:
db_trace_self() at db_trace_self
pc = 0xc0667004 lr = 0xc0078630 (db_trace_self_wrapper+0x30)
sp = 0xc6c9a9c8 fp = 0xc6c9aae0
db_trace_self_wrapper() at db_trace_self_wrapper+0x30
pc = 0xc0078630 lr = 0xc0328db8 (vpanic+0x140)
sp = 0xc6c9aae8 fp = 0xc6c9ab08
r4 = 0x00000100 r5 = 0x00000000
r6 = 0xc084d1f1 r7 = 0xc0b69a94
vpanic() at vpanic+0x140
pc = 0xc0328db8 lr = 0xc0328c78 (vpanic)
sp = 0xc6c9ab10 fp = 0xc6c9ab14
r4 = 0xc6c9ac10 r5 = 0x00000013
r6 = 0xdb23209b r7 = 0x00000001
r8 = 0x00000801 r9 = 0x00000013
r10 = 0xdb23209b
vpanic() at vpanic
pc = 0xc0328c78 lr = 0xc068c8e8 (abort_align)
sp = 0xc6c9ab1c fp = 0xc6c9ab48
r4 = 0x00000001 r5 = 0x00000801
r6 = 0x00000013 r7 = 0xdb23209b
r8 = 0xc6c9ab14 r9 = 0xc0328c78
r10 = 0xc6c9ab1c
abort_align() at abort_align
pc = 0xc068c8e8 lr = 0xc068c958 (abort_align+0x70)
sp = 0xc6c9ab50 fp = 0xc6c9ab68
r4 = 0xc6d21c00 r10 = 0xdb23209b
abort_align() at abort_align+0x70
pc = 0xc068c958 lr = 0xc068c5e0 (abort_handler+0x430)
sp = 0xc6c9ab70 fp = 0xc6c9ac08
r4 = 0x00000000 r10 = 0xdb23209b
abort_handler() at abort_handler+0x430
pc = 0xc068c5e0 lr = 0xc0669868 (exception_exit)
sp = 0xc6c9ac10 fp = 0xc6c9acb8
r4 = 0xdb19e280 r5 = 0x00000000
r6 = 0x00000001 r7 = 0x00000006
r8 = 0xc6c9ad20 r9 = 0xc0b7973c
r10 = 0xc092074c
exception_exit() at exception_exit
pc = 0xc0669868 lr = 0xc01b01d8 (usb_msc_auto_quirk+0xfc)
sp = 0xc6c9aca0 fp = 0xc6c9acb8
r0 = 0xdb232080 r1 = 0x00000000
r2 = 0x00000006 r3 = 0x00000024
r4 = 0xdb19e280 r5 = 0x00000000
r6 = 0x00000001 r7 = 0x00000006
r8 = 0xc6c9ad20 r9 = 0xc0b7973c
r10 = 0xc092074c r12 = 0x00000000
bbb_command_start() at bbb_command_start+0x4c
pc = 0xc01aff88 lr = 0xc01b01d8 (usb_msc_auto_quirk+0xfc)
sp = 0xc6c9acc0 fp = 0xc6c9acf8
r4 = 0xdb16d800 r5 = 0xdb19e280
r6 = 0x00000001 r10 = 0xc092074c
usb_msc_auto_quirk() at usb_msc_auto_quirk+0xfc
pc = 0xc01b01d8 lr = 0xc01a4bd8 (usb_alloc_device+0x9c4)
sp = 0xc6c9ad00 fp = 0xc6c9ad68
r4 = 0x00000000 r5 = 0x00000001
r6 = 0x00000000 r7 = 0x00000002
r8 = 0xdb16d800 r9 = 0xda241c78
r10 = 0x000003ee
usb_alloc_device() at usb_alloc_device+0x9c4
pc = 0xc01a4bd8 lr = 0xc01ad16c (uhub_explore+0x494)
sp = 0xc6c9ad70 fp = 0xc6c9adc0
r4 = 0x00000000 r5 = 0x00000000
r6 = 0xdb16e800 r7 = 0x00000000
r8 = 0xdb18c200 r9 = 0x00000001
r10 = 0x00000000
uhub_explore() at uhub_explore+0x494
pc = 0xc01ad16c lr = 0xc0198654 (usb_bus_explore+0x1d4)
sp = 0xc6c9adc8 fp = 0xc6c9add8
r4 = 0xda241c78 r5 = 0xdb16e800
r6 = 0x00000000 r7 = 0xda241d6c
r8 = 0xc09b0b5f r9 = 0x00000001
r10 = 0xda241d1c
usb_bus_explore() at usb_bus_explore+0x1d4
pc = 0xc0198654 lr = 0xc01b22d0 (usb_process+0x124)
sp = 0xc6c9ade0 fp = 0xc6c9ae10
r4 = 0xda241d0c r5 = 0xda241d14
usb_process() at usb_process+0x124
pc = 0xc01b22d0 lr = 0xc02da4f0 (fork_exit+0xb0)
sp = 0xc6c9ae18 fp = 0xc6c9ae38
r4 = 0xc6c9ae40 r5 = 0xc6d21c00
r6 = 0xc6d08740 r7 = 0xda241d0c
r8 = 0xc01b21ac r9 = 0x00000000
r10 = 0x00000000
fork_exit() at fork_exit+0xb0
pc = 0xc02da4f0 lr = 0xc06697fc (swi_exit)
sp = 0xc6c9ae40 fp = 0x00000000
r4 = 0xc01b21ac r5 = 0xda241d0c
r6 = 0x00000000 r7 = 0x00000000
r8 = 0x00000000 r10 = 0x00000000
swi_exit() at swi_exit
pc = 0xc06697fc lr = 0xc06697fc (swi_exit)
sp = 0xc6c9ae40 fp = 0x00000000
KDB: enter: panic
[ thread pid 14 tid 100069 ]
Stopped at kdb_enter+0x54: ldrb r15, [r15, r15, ror r15]!
db>
Looking at bbb_command_start() 's pc:
# llvm-addr2line -e /boot/kernel.GENERIC-NODEBUG/kernel 0xc01aff88
/home/pkgbuild/worktrees/main/sys/dev/usb/usb_msctest.c:554
What leads to that line is:
/*------------------------------------------------------------------------*
* bbb_command_start - execute a SCSI command synchronously
*
* Return values
* 0: Success
* Else: Failure
*------------------------------------------------------------------------*/
static int
bbb_command_start(struct bbb_transfer *sc, uint8_t dir, uint8_t lun,
void *data_ptr, size_t data_len, void *cmd_ptr, size_t cmd_len,
usb_timeout_t data_timeout)
{
sc->lun = lun;
sc->dir = data_len ? dir : DIR_NONE;
sc->data_ptr = data_ptr;
sc->data_len = data_len;
sc->data_rem = data_len;
sc->data_timeout = (data_timeout + USB_MS_HZ);
sc->actlen = 0;
sc->error = 0;
sc->cmd_len = cmd_len;
memset(&sc->cbw->CBWCDB, 0, sizeof(sc->cbw->CBWCDB));
The memset line is line 554 of sys/dev/usb/usb_msctest.c .
I'll note that attempting to use the WITNESS variant of the kernel
( /boot/kernel/ ) gets a different, even earlier failure:
. . .
VT: init without driver.
panic: acquiring blockable sleep lock with spinlock or critical section held (sleep mutex) pmap @ /home/pkgbuild/worktrees/main/sys/arm/arm/pmap-v6.c:6455
cpuid = 0
time = 1
KDB: stack backtrace:
Fatal kernel mode data abort: 'Translation Fault (L1)' on read
trapframe: 0xc0f14568
FSR=00000005, FAR=db7fcfb1, spsr=200001d3
r0 =c0f1465c, r1 =00000001, r2 =db7fcfae, r3 =1b000a4e
r4 =c07fc55c, r5 =8fce1b89, r6 =00006f3e, r7 =81000000
r8 =c07c4b6c, r9 =c094ace8, r10=c09741d8, r11=c0f14618
r12=c0f146c4, ssp=c0f145fc, slr=c0601428, pc =c062686c
panic: Fatal abort
cpuid = 0
time = 1
KDB: stack backtrace:
Fatal kernel mode data abort: 'Translation Fault (L1)' on read
trapframe: 0xc0f141f0
FSR=00000005, FAR=db7fcfb1, spsr=200001d3
r0 =c0f142e4, r1 =00000001, r2 =db7fcfae, r3 =1b000a4e
r4 =c07fc55c, r5 =8fce1b89, r6 =00006f3e, r7 =81000000
r8 =c07c4b6c, r9 =c094ace8, r10=c09741d8, r11=c0f142a0
r12=c0f1434c, ssp=c0f14284, slr=c0601428, pc =c062686c
panic: Fatal abort
cpuid = 0
time = 1
KDB: stack backtrace:
Fatal kernel mode data abort: 'Translation Fault (L1)' on read
trapframe: 0xc0f13e78
FSR=00000005, FAR=db7fcfb1, spsr=200001d3
r0 =c0f13f6c, r1 =00000001, r2 =db7fcfae, r3 =1b000a4e
r4 =c07fc55c, r5 =8fce1b89, r6 =00006f3e, r7 =81000000
r8 =c07c4b6c, r9 =c094ace8, r10=c09741d8, r11=c0f13f28
r12=c0f13fd4, ssp=c0f13f0c, slr=c0601428, pc =c062686c
panic: Fatal abort
cpuid = 0
time = 1
KDB: stack backtrace:
Fatal kernel mode data abort: 'Translation Fault (L1)' on read
trapframe: 0xc0f13b00
FSR=00000005, FAR=db7fcfb1, spsr=200001d3
r0 =c0f13bf4, r1 =00000001, r2 =db7fcfae, r3 =1b000a4e
r4 =c07fc55c, r5 =8fce1b89, r6 =00006f3e, r7 =81000000
r8 =c07c4b6c, r9 =c094ace8, r10=c09741d8, r11=c0f13bb0
r12=c0f13c5c, ssp=c0f13b94, slr=c0601428, pc =c062686c
panic: Fatal abort
cpuid = 0
time = 1
KDB: stack backtrace:
Fatal kernel mode data abort: 'Translation Fault (L1)' on read
trapframe: 0xc0f13788
FSR=00000005, FAR=db7fcfb1, spsr=200001d3
r0 =c0f1387c, r1 =00000001, r2 =db7fcfae, r3 =1b000a4e
r4 =c07fc55c, r5 =8fce1b89, r6 =00006f3e, r7 =81000000
r8 =c07c4b6c, r9 =c094ace8, r10=c09741d8, r11=c0f13838
r12=c0f138e4, ssp=c0f1381c, slr=c0601428, pc =c062686c
. . .
Looking:
# llvm-addr2line -e /boot/kernel.GENERIC-NODEBUG/kernel 0xc062686c
/home/pkgbuild/worktrees/main/sys/vm/uma_core.c:5676
static int
sysctl_handle_uma_zone_frees(SYSCTL_HANDLER_ARGS)
{
uma_zone_t zone = arg1;
uint64_t cur;
cur = uma_zone_get_frees(zone);
return (sysctl_handle_64(oidp, &cur, 0, req));
}
The "return" line is 5676 of sys/vm/uma_core.c .
Also, for what leads up to:
/home/pkgbuild/worktrees/main/sys/arm/arm/pmap-v6.c:6455
/*
* The implementation of pmap_fault() uses IN_RANGE2() macro which
* depends on the fact that given range size is a power of 2.
*/
CTASSERT(powerof2(NB_IN_PT1));
CTASSERT(powerof2(PT2MAP_SIZE));
#define IN_RANGE2(addr, start, size) \
((vm_offset_t)(start) == ((vm_offset_t)(addr) & ~((size) - 1)))
/*
* Handle access and R/W emulation faults.
*/
int
pmap_fault(pmap_t pmap, vm_offset_t far, uint32_t fsr, int idx, bool usermode)
{
pt1_entry_t *pte1p, pte1;
pt2_entry_t *pte2p, pte2;
if (pmap == NULL)
pmap = kernel_pmap;
/*
* In kernel, we should never get abort with FAR which is in range of
* pmap->pm_pt1 or PT2MAP address spaces. If it happens, stop here
* and print out a useful abort message and even get to the debugger
* otherwise it likely ends with never ending loop of aborts.
*/
if (__predict_false(IN_RANGE2(far, pmap->pm_pt1, NB_IN_PT1))) {
/*
* All L1 tables should always be mapped and present.
* However, we check only current one herein. For user mode,
* only permission abort from malicious user is not fatal.
* And alignment abort as it may have higher priority.
*/
if (!usermode || (idx != FAULT_ALIGN && idx != FAULT_PERM_L2)) {
CTR4(KTR_PMAP, "%s: pmap %#x pm_pt1 %#x far %#x",
__func__, pmap, pmap->pm_pt1, far);
panic("%s: pm_pt1 abort", __func__);
}
return (KERN_INVALID_ADDRESS);
}
if (__predict_false(IN_RANGE2(far, PT2MAP, PT2MAP_SIZE))) {
/*
* PT2MAP should be always mapped and present in current
* L1 table. However, only existing L2 tables are mapped
* in PT2MAP. For user mode, only L2 translation abort and
* permission abort from malicious user is not fatal.
* And alignment abort as it may have higher priority.
*/
if (!usermode || (idx != FAULT_ALIGN &&
idx != FAULT_TRAN_L2 && idx != FAULT_PERM_L2)) {
CTR4(KTR_PMAP, "%s: pmap %#x PT2MAP %#x far %#x",
__func__, pmap, PT2MAP, far);
panic("%s: PT2MAP abort", __func__);
}
return (KERN_INVALID_ADDRESS);
}
/*
* A pmap lock is used below for handling of access and R/W emulation
* aborts. They were handled by atomic operations before so some
* analysis of new situation is needed to answer the following question:
* Is it safe to use the lock even for these aborts?
*
* There may happen two cases in general:
*
* (1) Aborts while the pmap lock is locked already - this should not
* happen as pmap lock is not recursive. However, under pmap lock only
* internal kernel data should be accessed and such data should be
* mapped with A bit set and NM bit cleared. If double abort happens,
* then a mapping of data which has caused it must be fixed. Further,
* all new mappings are always made with A bit set and the bit can be
* cleared only on managed mappings.
*
* (2) Aborts while another lock(s) is/are locked - this already can
* happen. However, there is no difference here if it's either access or
* R/W emulation abort, or if it's some other abort.
*/
PMAP_LOCK(pmap);
That "PMAP_LOCK(pmap);" line is line 6455 of sys/arm/arm/pmap-v6.c .
FYI: Running the prior kernel.GENERIC-NODEBUG/ ( called
kernel.GENERIC-NODEBUG.good/ ) continues to operate
normally. I do not have the older PkgBase kernel/ around
to try, unfortunately.
===
Mark Millard
marklmi at yahoo.com