git: ab63af3ab3fd - stable/13 - arm64: Use adrp + :lo12: to load globals from asm
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Mon, 19 Feb 2024 16:45:05 UTC
The branch stable/13 has been updated by andrew: URL: https://cgit.FreeBSD.org/src/commit/?id=ab63af3ab3fdaee5dd934fa1d5eda981c28e2a87 commit ab63af3ab3fdaee5dd934fa1d5eda981c28e2a87 Author: Andrew Turner <andrew@FreeBSD.org> AuthorDate: 2023-11-11 09:27:30 +0000 Commit: Andrew Turner <andrew@FreeBSD.org> CommitDate: 2024-02-19 12:44:31 +0000 arm64: Use adrp + :lo12: to load globals from asm When loading a global variable we can use a pseudo-instruction similar to "ldr, xn, =global" to load the address of the symbol. As this is unlikely to be supported by a mov instruction a pc-relative load is used, with the absolute address written at the end of the function so it will be loaded. This load can be partially replaced with an adrp instruction. This generates the address, aligned to a 4k boundary, using a pc-relative addition. Because the address is 4k-aligned we then update reading the global variable using a load with the offset of the load the low 12-bits of the global. Arm64 assemblers have :lo12: to support this, e.g. "ldr xn, [xn, :lo12:global]". The only remaining users of "ldr, xn, =global" that I can find are executed from the physical address space the kernel was loaded in and need an address in the kernels virtual address space. Because of this they can't use adrp. Sponsored by: Arm Ltd Differential Revision: https://reviews.freebsd.org/D42565 (cherry picked from commit 7eb26be9c8080686f64fdc0a28e5ae7839bbc82d) --- sys/arm64/arm64/cpufunc_asm.S | 7 ++++--- sys/arm64/arm64/locore.S | 4 ++-- sys/arm64/arm64/support.S | 4 ++-- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/sys/arm64/arm64/cpufunc_asm.S b/sys/arm64/arm64/cpufunc_asm.S index 2b2ca6836530..8163e6c3d0d0 100644 --- a/sys/arm64/arm64/cpufunc_asm.S +++ b/sys/arm64/arm64/cpufunc_asm.S @@ -52,11 +52,12 @@ */ .macro cache_handle_range dcop = 0, ic = 0, icop = 0 .if \ic == 0 - ldr x3, =dcache_line_size /* Load the D cache line size */ + adrp x3, dcache_line_size /* Load the D cache line size */ + ldr x3, [x3, :lo12:dcache_line_size] .else - ldr x3, =idcache_line_size /* Load the I & D cache line size */ + adrp x3, idcache_line_size /* Load the I & D cache line size */ + ldr x3, [x3, :lo12:idcache_line_size] .endif - ldr x3, [x3] sub x4, x3, #1 /* Get the address mask */ and x2, x0, x4 /* Get the low bits of the address */ add x1, x1, x2 /* Add these to the size */ diff --git a/sys/arm64/arm64/locore.S b/sys/arm64/arm64/locore.S index 58f0ad8d85aa..7d93d1de30c0 100644 --- a/sys/arm64/arm64/locore.S +++ b/sys/arm64/arm64/locore.S @@ -204,8 +204,8 @@ mp_virtdone: BTI_J /* Start using the AP boot stack */ - ldr x4, =bootstack - ldr x4, [x4] + adrp x4, bootstack + ldr x4, [x4, :lo12:bootstack] mov sp, x4 #if defined(PERTHREAD_SSP) diff --git a/sys/arm64/arm64/support.S b/sys/arm64/arm64/support.S index 805867fd5d17..a2d422c31fea 100644 --- a/sys/arm64/arm64/support.S +++ b/sys/arm64/arm64/support.S @@ -284,8 +284,8 @@ END(pagezero_simple) ENTRY(pagezero_cache) add x1, x0, #PAGE_SIZE - ldr x2, =dczva_line_size - ldr x2, [x2] + adrp x2, dczva_line_size + ldr x2, [x2, :lo12:dczva_line_size] 1: dc zva, x0