git: ab63af3ab3fd - stable/13 - arm64: Use adrp + :lo12: to load globals from asm
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Mon, 19 Feb 2024 16:45:05 UTC
The branch stable/13 has been updated by andrew:
URL: https://cgit.FreeBSD.org/src/commit/?id=ab63af3ab3fdaee5dd934fa1d5eda981c28e2a87
commit ab63af3ab3fdaee5dd934fa1d5eda981c28e2a87
Author: Andrew Turner <andrew@FreeBSD.org>
AuthorDate: 2023-11-11 09:27:30 +0000
Commit: Andrew Turner <andrew@FreeBSD.org>
CommitDate: 2024-02-19 12:44:31 +0000
arm64: Use adrp + :lo12: to load globals from asm
When loading a global variable we can use a pseudo-instruction similar
to "ldr, xn, =global" to load the address of the symbol. As this is
unlikely to be supported by a mov instruction a pc-relative load is
used, with the absolute address written at the end of the function so
it will be loaded.
This load can be partially replaced with an adrp instruction. This
generates the address, aligned to a 4k boundary, using a pc-relative
addition. Because the address is 4k-aligned we then update reading the
global variable using a load with the offset of the load the low
12-bits of the global. Arm64 assemblers have :lo12: to support this,
e.g. "ldr xn, [xn, :lo12:global]".
The only remaining users of "ldr, xn, =global" that I can find are
executed from the physical address space the kernel was loaded in and
need an address in the kernels virtual address space. Because of this
they can't use adrp.
Sponsored by: Arm Ltd
Differential Revision: https://reviews.freebsd.org/D42565
(cherry picked from commit 7eb26be9c8080686f64fdc0a28e5ae7839bbc82d)
---
sys/arm64/arm64/cpufunc_asm.S | 7 ++++---
sys/arm64/arm64/locore.S | 4 ++--
sys/arm64/arm64/support.S | 4 ++--
3 files changed, 8 insertions(+), 7 deletions(-)
diff --git a/sys/arm64/arm64/cpufunc_asm.S b/sys/arm64/arm64/cpufunc_asm.S
index 2b2ca6836530..8163e6c3d0d0 100644
--- a/sys/arm64/arm64/cpufunc_asm.S
+++ b/sys/arm64/arm64/cpufunc_asm.S
@@ -52,11 +52,12 @@
*/
.macro cache_handle_range dcop = 0, ic = 0, icop = 0
.if \ic == 0
- ldr x3, =dcache_line_size /* Load the D cache line size */
+ adrp x3, dcache_line_size /* Load the D cache line size */
+ ldr x3, [x3, :lo12:dcache_line_size]
.else
- ldr x3, =idcache_line_size /* Load the I & D cache line size */
+ adrp x3, idcache_line_size /* Load the I & D cache line size */
+ ldr x3, [x3, :lo12:idcache_line_size]
.endif
- ldr x3, [x3]
sub x4, x3, #1 /* Get the address mask */
and x2, x0, x4 /* Get the low bits of the address */
add x1, x1, x2 /* Add these to the size */
diff --git a/sys/arm64/arm64/locore.S b/sys/arm64/arm64/locore.S
index 58f0ad8d85aa..7d93d1de30c0 100644
--- a/sys/arm64/arm64/locore.S
+++ b/sys/arm64/arm64/locore.S
@@ -204,8 +204,8 @@ mp_virtdone:
BTI_J
/* Start using the AP boot stack */
- ldr x4, =bootstack
- ldr x4, [x4]
+ adrp x4, bootstack
+ ldr x4, [x4, :lo12:bootstack]
mov sp, x4
#if defined(PERTHREAD_SSP)
diff --git a/sys/arm64/arm64/support.S b/sys/arm64/arm64/support.S
index 805867fd5d17..a2d422c31fea 100644
--- a/sys/arm64/arm64/support.S
+++ b/sys/arm64/arm64/support.S
@@ -284,8 +284,8 @@ END(pagezero_simple)
ENTRY(pagezero_cache)
add x1, x0, #PAGE_SIZE
- ldr x2, =dczva_line_size
- ldr x2, [x2]
+ adrp x2, dczva_line_size
+ ldr x2, [x2, :lo12:dczva_line_size]
1:
dc zva, x0