git: c78ebc69c2aa - main - arm64: Support a shared release for spin-table

From: Andrew Turner <andrew_at_FreeBSD.org>
Date: Fri, 10 May 2024 09:30:08 UTC
The branch main has been updated by andrew:

URL: https://cgit.FreeBSD.org/src/commit/?id=c78ebc69c2aa5454b4dc8fd7451b3b0d031205b8

commit c78ebc69c2aa5454b4dc8fd7451b3b0d031205b8
Author:     Andrew Turner <andrew@FreeBSD.org>
AuthorDate: 2024-04-29 14:23:42 +0000
Commit:     Andrew Turner <andrew@FreeBSD.org>
CommitDate: 2024-05-10 09:29:24 +0000

    arm64: Support a shared release for spin-table
    
    When releasing multiple CPUs that share a release address we need them
    to wait for their turn to boot. Add a mechanism to do this by booting
    them until they enable the TLB before waiting their turn to enter
    init_secondary.
    
    Reviewed by:    jhibbits, kevans
    Sponsored by:   Arm Ltd
    Differential Revision:  https://reviews.freebsd.org/D45082
---
 sys/arm64/arm64/locore.S     | 55 ++++++++++++++++++++++++++++++++++++++++----
 sys/arm64/arm64/mp_machdep.c | 21 +++++++++++++----
 2 files changed, 67 insertions(+), 9 deletions(-)

diff --git a/sys/arm64/arm64/locore.S b/sys/arm64/arm64/locore.S
index fd77938edae9..94a50b735bc6 100644
--- a/sys/arm64/arm64/locore.S
+++ b/sys/arm64/arm64/locore.S
@@ -29,6 +29,7 @@
 #include <sys/syscall.h>
 #include <machine/asm.h>
 #include <machine/armreg.h>
+#include <machine/cpu.h>
 #include <machine/hypervisor.h>
 #include <machine/param.h>
 #include <machine/pte.h>
@@ -192,12 +193,50 @@ END(_start)
 
 #ifdef SMP
 /*
- * mpentry(unsigned long)
+ * void
+ * mpentry_psci(unsigned long)
  *
- * Called by a core when it is being brought online.
+ * Called by a core when it is being brought online with psci.
  * The data in x0 is passed straight to init_secondary.
  */
-ENTRY(mpentry)
+ENTRY(mpentry_psci)
+	mov	x26, xzr
+	b	mpentry_common
+END(mpentry_psci)
+
+/*
+ * void
+ * mpentry_spintable(void)
+ *
+ * Called by a core when it is being brought online with a spin-table.
+ * Reads the new CPU ID and passes this to init_secondary.
+ */
+ENTRY(mpentry_spintable)
+	ldr	x26, =spintable_wait
+	b	mpentry_common
+END(mpentry_spintable)
+
+/* Wait for the current CPU to be released */
+LENTRY(spintable_wait)
+	/* Read the affinity bits from mpidr_el1 */
+	mrs	x1, mpidr_el1
+	ldr	x2, =CPU_AFF_MASK
+	and	x1, x1, x2
+
+	adrp	x2, ap_cpuid
+1:
+	ldr	x0, [x2, :lo12:ap_cpuid]
+	cmp	x0, x1
+	b.ne	1b
+
+	str	xzr, [x2, :lo12:ap_cpuid]
+	dsb	sy
+	sev
+
+	ret
+LEND(mpentry_spintable)
+
+LENTRY(mpentry_common)
 	/* Disable interrupts */
 	msr	daifset, #DAIF_INTR
 
@@ -228,6 +267,14 @@ ENTRY(mpentry)
 mp_virtdone:
 	BTI_J
 
+	/*
+	 * Allow this CPU to wait until the kernel is ready for it,
+	 * e.g. with spin-table but each CPU uses the same release address
+	 */
+	cbz	x26, 1f
+	blr	x26
+1:
+
 	/* Start using the AP boot stack */
 	adrp	x4, bootstack
 	ldr	x4, [x4, :lo12:bootstack]
@@ -258,7 +305,7 @@ mp_virtdone:
 	msr	tpidr_el1, x18
 
 	b	init_secondary
-END(mpentry)
+LEND(mpentry_common)
 #endif
 
 /*
diff --git a/sys/arm64/arm64/mp_machdep.c b/sys/arm64/arm64/mp_machdep.c
index 9c6175445572..bd13dde9cee0 100644
--- a/sys/arm64/arm64/mp_machdep.c
+++ b/sys/arm64/arm64/mp_machdep.c
@@ -106,7 +106,8 @@ static void ipi_stop(void *);
 static u_int fdt_cpuid;
 #endif
 
-void mpentry(unsigned long cpuid);
+void mpentry_psci(unsigned long cpuid);
+void mpentry_spintable(void);
 void init_secondary(uint64_t);
 
 /* Synchronize AP startup. */
@@ -114,6 +115,7 @@ static struct mtx ap_boot_mtx;
 
 /* Used to initialize the PCPU ahead of calling init_secondary(). */
 void *bootpcpu;
+uint64_t ap_cpuid;
 
 /* Stacks for AP initialization, discarded once idle threads are started. */
 void *bootstack;
@@ -420,7 +422,10 @@ enable_cpu_spin(uint64_t cpu, vm_paddr_t entry, vm_paddr_t release_paddr)
 {
 	vm_paddr_t *release_addr;
 
-	release_addr = pmap_mapdev(release_paddr, sizeof(*release_addr));
+	ap_cpuid = cpu & CPU_AFF_MASK;
+
+	release_addr = pmap_mapdev_attr(release_paddr, sizeof(*release_addr),
+	    VM_MEMATTR_DEFAULT);
 	if (release_addr == NULL)
 		return (ENOMEM);
 
@@ -432,6 +437,10 @@ enable_cpu_spin(uint64_t cpu, vm_paddr_t entry, vm_paddr_t release_paddr)
 	    "sev	\n"
 	    ::: "memory");
 
+	/* Wait for the target CPU to start */
+	while (atomic_load_64(&ap_cpuid) != 0)
+		__asm __volatile("wfe");
+
 	return (0);
 }
 
@@ -475,7 +484,6 @@ start_cpu(u_int cpuid, uint64_t target_cpu, int domain, vm_paddr_t release_addr)
 	bootstack = (char *)bootstacks[cpuid] + MP_BOOTSTACK_SIZE;
 
 	printf("Starting CPU %u (%lx)\n", cpuid, target_cpu);
-	pa = pmap_extract(kernel_pmap, (vm_offset_t)mpentry);
 
 	/*
 	 * A limited set of hardware we support can only do spintables and
@@ -483,10 +491,13 @@ start_cpu(u_int cpuid, uint64_t target_cpu, int domain, vm_paddr_t release_addr)
 	 * PSCI branch here.
 	 */
 	MPASS(release_addr == 0 || !psci_present);
-	if (release_addr != 0)
+	if (release_addr != 0) {
+		pa = pmap_extract(kernel_pmap, (vm_offset_t)mpentry_spintable);
 		err = enable_cpu_spin(target_cpu, pa, release_addr);
-	else
+	} else {
+		pa = pmap_extract(kernel_pmap, (vm_offset_t)mpentry_psci);
 		err = enable_cpu_psci(target_cpu, pa, cpuid);
+	}
 
 	if (err != 0) {
 		pcpu_destroy(pcpup);