git: ab63af3ab3fd - stable/13 - arm64: Use adrp + :lo12: to load globals from asm

From: Andrew Turner <andrew_at_FreeBSD.org>
Date: Mon, 19 Feb 2024 16:45:05 UTC
The branch stable/13 has been updated by andrew:

URL: https://cgit.FreeBSD.org/src/commit/?id=ab63af3ab3fdaee5dd934fa1d5eda981c28e2a87

commit ab63af3ab3fdaee5dd934fa1d5eda981c28e2a87
Author:     Andrew Turner <andrew@FreeBSD.org>
AuthorDate: 2023-11-11 09:27:30 +0000
Commit:     Andrew Turner <andrew@FreeBSD.org>
CommitDate: 2024-02-19 12:44:31 +0000

    arm64: Use adrp + :lo12: to load globals from asm
    
    When loading a global variable we can use a pseudo-instruction similar
    to "ldr, xn, =global" to load the address of the symbol. As this is
    unlikely to be supported by a mov instruction a pc-relative load is
    used, with the absolute address written at the end of the function so
    it will be loaded.
    
    This load can be partially replaced with an adrp instruction. This
    generates the address, aligned to a 4k boundary, using a pc-relative
    addition. Because the address is 4k-aligned we then update reading the
    global variable using a load with the offset of the load the low
    12-bits of the global. Arm64 assemblers have :lo12: to support this,
    e.g. "ldr xn, [xn, :lo12:global]".
    
    The only remaining users of "ldr, xn, =global" that I can find are
    executed from the physical address space the kernel was loaded in and
    need an address in the kernels virtual address space. Because of this
    they can't use adrp.
    
    Sponsored by:   Arm Ltd
    Differential Revision:  https://reviews.freebsd.org/D42565
    
    (cherry picked from commit 7eb26be9c8080686f64fdc0a28e5ae7839bbc82d)
---
 sys/arm64/arm64/cpufunc_asm.S | 7 ++++---
 sys/arm64/arm64/locore.S      | 4 ++--
 sys/arm64/arm64/support.S     | 4 ++--
 3 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/sys/arm64/arm64/cpufunc_asm.S b/sys/arm64/arm64/cpufunc_asm.S
index 2b2ca6836530..8163e6c3d0d0 100644
--- a/sys/arm64/arm64/cpufunc_asm.S
+++ b/sys/arm64/arm64/cpufunc_asm.S
@@ -52,11 +52,12 @@
  */
 .macro cache_handle_range dcop = 0, ic = 0, icop = 0
 .if \ic == 0
-	ldr	x3, =dcache_line_size	/* Load the D cache line size */
+	adrp	x3, dcache_line_size	/* Load the D cache line size */
+	ldr	x3, [x3, :lo12:dcache_line_size]
 .else
-	ldr	x3, =idcache_line_size	/* Load the I & D cache line size */
+	adrp	x3, idcache_line_size	/* Load the I & D cache line size */
+	ldr	x3, [x3, :lo12:idcache_line_size]
 .endif
-	ldr	x3, [x3]
 	sub	x4, x3, #1		/* Get the address mask */
 	and	x2, x0, x4		/* Get the low bits of the address */
 	add	x1, x1, x2		/* Add these to the size */
diff --git a/sys/arm64/arm64/locore.S b/sys/arm64/arm64/locore.S
index 58f0ad8d85aa..7d93d1de30c0 100644
--- a/sys/arm64/arm64/locore.S
+++ b/sys/arm64/arm64/locore.S
@@ -204,8 +204,8 @@ mp_virtdone:
 	BTI_J
 
 	/* Start using the AP boot stack */
-	ldr	x4, =bootstack
-	ldr	x4, [x4]
+	adrp	x4, bootstack
+	ldr	x4, [x4, :lo12:bootstack]
 	mov	sp, x4
 
 #if defined(PERTHREAD_SSP)
diff --git a/sys/arm64/arm64/support.S b/sys/arm64/arm64/support.S
index 805867fd5d17..a2d422c31fea 100644
--- a/sys/arm64/arm64/support.S
+++ b/sys/arm64/arm64/support.S
@@ -284,8 +284,8 @@ END(pagezero_simple)
 ENTRY(pagezero_cache)
 	add	x1, x0, #PAGE_SIZE
 
-	ldr	x2, =dczva_line_size
-	ldr	x2, [x2]
+	adrp	x2, dczva_line_size
+	ldr	x2, [x2, :lo12:dczva_line_size]
 
 1:
 	dc	zva, x0