git: d33bf8c50bd7 - releng/13.1 - Stop using the kmem for PCPU memory on arm64

From: Andrew Turner <andrew_at_FreeBSD.org>
Date: Wed, 27 Apr 2022 14:25:38 UTC
The branch releng/13.1 has been updated by andrew:

URL: https://cgit.FreeBSD.org/src/commit/?id=d33bf8c50bd7359e75732aab0b905f9ba7f13e2c

commit d33bf8c50bd7359e75732aab0b905f9ba7f13e2c
Author:     Andrew Turner <andrew@FreeBSD.org>
AuthorDate: 2022-04-27 13:42:21 +0000
Commit:     Andrew Turner <andrew@FreeBSD.org>
CommitDate: 2022-04-27 14:16:35 +0000

    Stop using the kmem for PCPU memory on arm64
    
    When allocating memory with a kernel memory allocator we may get
    memory that will later be promoted to a superpage. If this happens
    while another CPU is using the pointer they can race and when the
    promotion passes through the break-before-make sequence the pointer
    will be invalid for a short length of time.
    
    Revert the commit that added the use of the kernel allocator and
    subsequent fixes to the original change.
    
    Revert "Pass the ACPI ID when reading the ACPI domain"
    
    This reverts commit aa3b5d79b2acc508cca63c24251d65f4d2d76fa5.
    
    Revert "Stop reading the arm64 domain when it's known"
    
    This reverts commit b7c23efd7428256f69ccfd65a9c5e9f50585bf66.
    
    Revert "Allocate arm64 per-CPU data in the correct domain"
    
    This reverts commit f51997c6e410e2413686983d8fd57c1877f8c0ad.
    
    Approved by:    re (gjb)
    Reported by:    dch
    Sponsored by:   The FreeBSD Foundation
---
 sys/arm64/arm64/machdep.c    | 14 +++-----------
 sys/arm64/arm64/mp_machdep.c | 39 +++++++++++++--------------------------
 sys/arm64/include/counter.h  |  2 +-
 sys/arm64/include/pcpu_aux.h |  2 +-
 4 files changed, 18 insertions(+), 39 deletions(-)

diff --git a/sys/arm64/arm64/machdep.c b/sys/arm64/arm64/machdep.c
index c3d725800fde..aa5697db09dd 100644
--- a/sys/arm64/arm64/machdep.c
+++ b/sys/arm64/arm64/machdep.c
@@ -102,12 +102,7 @@ __FBSDID("$FreeBSD$");
 
 enum arm64_bus arm64_bus_method = ARM64_BUS_NONE;
 
-/*
- * XXX: The .bss is assumed to be in the boot CPU NUMA domain. If not we
- * could relocate this, but will need to keep the same virtual address as
- * it's reverenced by the EARLY_COUNTER macro.
- */
-struct pcpu pcpu0;
+struct pcpu __pcpu[MAXCPU];
 
 #if defined(PERTHREAD_SSP)
 /*
@@ -357,10 +352,7 @@ makectx(struct trapframe *tf, struct pcb *pcb)
 static void
 init_proc0(vm_offset_t kstack)
 {
-	struct pcpu *pcpup;
-
-	pcpup = cpuid_to_pcpu[0];
-	MPASS(pcpup != NULL);
+	struct pcpu *pcpup = &__pcpu[0];
 
 	proc_linkup0(&proc0, &thread0);
 	thread0.td_kstack = kstack;
@@ -791,7 +783,7 @@ initarm(struct arm64_bootparams *abp)
 		    EXFLAG_NOALLOC);
 
 	/* Set the pcpu data, this is needed by pmap_bootstrap */
-	pcpup = &pcpu0;
+	pcpup = &__pcpu[0];
 	pcpu_init(pcpup, 0, sizeof(struct pcpu));
 
 	/*
diff --git a/sys/arm64/arm64/mp_machdep.c b/sys/arm64/arm64/mp_machdep.c
index b42f65b9e399..76304eadf381 100644
--- a/sys/arm64/arm64/mp_machdep.c
+++ b/sys/arm64/arm64/mp_machdep.c
@@ -40,7 +40,6 @@ __FBSDID("$FreeBSD$");
 #include <sys/bus.h>
 #include <sys/cpu.h>
 #include <sys/csan.h>
-#include <sys/domainset.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/malloc.h>
@@ -152,7 +151,7 @@ static bool
 is_boot_cpu(uint64_t target_cpu)
 {
 
-	return (cpuid_to_pcpu[0]->pc_mpidr == (target_cpu & CPU_AFF_MASK));
+	return (__pcpu[0].pc_mpidr == (target_cpu & CPU_AFF_MASK));
 }
 
 static void
@@ -214,17 +213,15 @@ init_secondary(uint64_t cpu)
 	 * they can pass random value in it.
 	 */
 	mpidr = READ_SPECIALREG(mpidr_el1) & CPU_AFF_MASK;
-	if (cpu >= MAXCPU || cpuid_to_pcpu[cpu] == NULL ||
-	    cpuid_to_pcpu[cpu]->pc_mpidr != mpidr) {
+	if  (cpu >= MAXCPU || __pcpu[cpu].pc_mpidr != mpidr) {
 		for (cpu = 0; cpu < mp_maxid; cpu++)
-			if (cpuid_to_pcpu[cpu] != NULL &&
-			    cpuid_to_pcpu[cpu]->pc_mpidr == mpidr)
+			if (__pcpu[cpu].pc_mpidr == mpidr)
 				break;
 		if ( cpu >= MAXCPU)
 			panic("MPIDR for this CPU is not in pcpu table");
 	}
 
-	pcpup = cpuid_to_pcpu[cpu];
+	pcpup = &__pcpu[cpu];
 	/*
 	 * Set the pcpu pointer with a backup in tpidr_el1 to be
 	 * loaded when entering the kernel from userland.
@@ -486,7 +483,7 @@ cpu_mp_probe(void)
  * do nothing. Returns true if the CPU is present and running.
  */
 static bool
-start_cpu(u_int cpuid, uint64_t target_cpu, int domain)
+start_cpu(u_int cpuid, uint64_t target_cpu)
 {
 	struct pcpu *pcpup;
 	vm_paddr_t pa;
@@ -502,17 +499,14 @@ start_cpu(u_int cpuid, uint64_t target_cpu, int domain)
 
 	KASSERT(cpuid < MAXCPU, ("Too many CPUs"));
 
-	pcpup = (void *)kmem_malloc_domainset(DOMAINSET_PREF(domain),
-	    sizeof(*pcpup), M_WAITOK | M_ZERO);
+	pcpup = &__pcpu[cpuid];
 	pcpu_init(pcpup, cpuid, sizeof(struct pcpu));
 	pcpup->pc_mpidr = target_cpu & CPU_AFF_MASK;
 
-	dpcpu[cpuid - 1] = (void *)kmem_malloc_domainset(
-	    DOMAINSET_PREF(domain), DPCPU_SIZE, M_WAITOK | M_ZERO);
+	dpcpu[cpuid - 1] = (void *)kmem_malloc(DPCPU_SIZE, M_WAITOK | M_ZERO);
 	dpcpu_init(dpcpu[cpuid - 1], cpuid);
 
-	bootstacks[cpuid] = (void *)kmem_malloc_domainset(
-	    DOMAINSET_PREF(domain), PAGE_SIZE, M_WAITOK | M_ZERO);
+	bootstacks[cpuid] = (void *)kmem_malloc(PAGE_SIZE, M_WAITOK | M_ZERO);
 
 	naps = atomic_load_int(&aps_started);
 	bootstack = (char *)bootstacks[cpuid] + PAGE_SIZE;
@@ -555,7 +549,6 @@ madt_handler(ACPI_SUBTABLE_HEADER *entry, void *arg)
 	ACPI_MADT_GENERIC_INTERRUPT *intr;
 	u_int *cpuid;
 	u_int id;
-	int domain;
 
 	switch(entry->Type) {
 	case ACPI_MADT_TYPE_GENERIC_INTERRUPT:
@@ -567,14 +560,8 @@ madt_handler(ACPI_SUBTABLE_HEADER *entry, void *arg)
 		else
 			id = *cpuid;
 
-		domain = 0;
-#ifdef NUMA
-		if (vm_ndomains > 1)
-			domain = acpi_pxm_get_cpu_locality(intr->Uid);
-#endif
-		if (start_cpu(id, intr->ArmMpidr, domain)) {
-			MPASS(cpuid_to_pcpu[id] != NULL);
-			cpuid_to_pcpu[id]->pc_acpi_id = intr->Uid;
+		if (start_cpu(id, intr->ArmMpidr)) {
+			__pcpu[id].pc_acpi_id = intr->Uid;
 			/*
 			 * Don't increment for the boot CPU, its CPU ID is
 			 * reserved.
@@ -637,7 +624,7 @@ start_cpu_fdt(u_int id, phandle_t node, u_int addr_size, pcell_t *reg)
 	else
 		cpuid = fdt_cpuid;
 
-	if (!start_cpu(cpuid, target_cpu, 0))
+	if (!start_cpu(cpuid, target_cpu))
 		return (FALSE);
 
 	/*
@@ -650,7 +637,7 @@ start_cpu_fdt(u_int id, phandle_t node, u_int addr_size, pcell_t *reg)
 	if (vm_ndomains == 1 ||
 	    OF_getencprop(node, "numa-node-id", &domain, sizeof(domain)) <= 0)
 		domain = 0;
-	cpuid_to_pcpu[cpuid]->pc_domain = domain;
+	__pcpu[cpuid].pc_domain = domain;
 	if (domain < MAXMEMDOM)
 		CPU_SET(cpuid, &cpuset_domain[domain]);
 	return (TRUE);
@@ -681,7 +668,7 @@ cpu_mp_start(void)
 
 	/* CPU 0 is always boot CPU. */
 	CPU_SET(0, &all_cpus);
-	cpuid_to_pcpu[0]->pc_mpidr = READ_SPECIALREG(mpidr_el1) & CPU_AFF_MASK;
+	__pcpu[0].pc_mpidr = READ_SPECIALREG(mpidr_el1) & CPU_AFF_MASK;
 
 	switch(arm64_bus_method) {
 #ifdef DEV_ACPI
diff --git a/sys/arm64/include/counter.h b/sys/arm64/include/counter.h
index 7f747b525d9c..333015cc7139 100644
--- a/sys/arm64/include/counter.h
+++ b/sys/arm64/include/counter.h
@@ -32,7 +32,7 @@
 #include <sys/pcpu.h>
 #include <machine/atomic.h>
 
-#define	EARLY_COUNTER	&pcpu0.pc_early_dummy_counter
+#define	EARLY_COUNTER	&__pcpu[0].pc_early_dummy_counter
 
 #define	counter_enter()	do {} while (0)
 #define	counter_exit()	do {} while (0)
diff --git a/sys/arm64/include/pcpu_aux.h b/sys/arm64/include/pcpu_aux.h
index 382811dfa1fb..3d4c70c491d6 100644
--- a/sys/arm64/include/pcpu_aux.h
+++ b/sys/arm64/include/pcpu_aux.h
@@ -47,6 +47,6 @@
  */
 _Static_assert(PAGE_SIZE % sizeof(struct pcpu) == 0, "fix pcpu size");
 
-extern struct pcpu pcpu0;
+extern struct pcpu __pcpu[];
 
 #endif	/* _MACHINE_PCPU_AUX_H_ */