git: f51997c6e410 - stable/13 - Allocate arm64 per-CPU data in the correct domain

From: Andrew Turner <andrew_at_FreeBSD.org>
Date: Tue, 14 Dec 2021 11:07:20 UTC
The branch stable/13 has been updated by andrew:

URL: https://cgit.FreeBSD.org/src/commit/?id=f51997c6e410e2413686983d8fd57c1877f8c0ad

commit f51997c6e410e2413686983d8fd57c1877f8c0ad
Author:     Andrew Turner <andrew@FreeBSD.org>
AuthorDate: 2021-10-06 15:08:04 +0000
Commit:     Andrew Turner <andrew@FreeBSD.org>
CommitDate: 2021-12-14 10:58:01 +0000

    Allocate arm64 per-CPU data in the correct domain
    
    To minimise NUMA traffic allocate the pcpu, dpcpu, and boot stacks in
    the correct domain when possible.
    
    Submitted by:   markj
    Sponsored by:   The FreeBSD Foundation
    Differential Revision: https://reviews.freebsd.org/D32338
    
    (cherry picked from commit a90ebeb5feea63c9d8377cbc74e0442156a0ed15)
---
 sys/arm64/arm64/machdep.c    | 14 +++++++++++---
 sys/arm64/arm64/mp_machdep.c | 39 ++++++++++++++++++++++++++-------------
 sys/arm64/include/counter.h  |  2 +-
 sys/arm64/include/pcpu_aux.h |  2 +-
 4 files changed, 39 insertions(+), 18 deletions(-)

diff --git a/sys/arm64/arm64/machdep.c b/sys/arm64/arm64/machdep.c
index 529b17772d17..b4a2a8cdd8e5 100644
--- a/sys/arm64/arm64/machdep.c
+++ b/sys/arm64/arm64/machdep.c
@@ -102,7 +102,12 @@ __FBSDID("$FreeBSD$");
 
 enum arm64_bus arm64_bus_method = ARM64_BUS_NONE;
 
-struct pcpu __pcpu[MAXCPU];
+/*
+ * XXX: The .bss is assumed to be in the boot CPU NUMA domain. If not we
+ * could relocate this, but will need to keep the same virtual address as
+ * it's reverenced by the EARLY_COUNTER macro.
+ */
+struct pcpu pcpu0;
 
 static struct trapframe proc0_tf;
 
@@ -334,7 +339,10 @@ makectx(struct trapframe *tf, struct pcb *pcb)
 static void
 init_proc0(vm_offset_t kstack)
 {
-	struct pcpu *pcpup = &__pcpu[0];
+	struct pcpu *pcpup;
+
+	pcpup = cpuid_to_pcpu[0];
+	MPASS(pcpup != NULL);
 
 	proc_linkup0(&proc0, &thread0);
 	thread0.td_kstack = kstack;
@@ -722,7 +730,7 @@ initarm(struct arm64_bootparams *abp)
 		    EXFLAG_NOALLOC);
 
 	/* Set the pcpu data, this is needed by pmap_bootstrap */
-	pcpup = &__pcpu[0];
+	pcpup = &pcpu0;
 	pcpu_init(pcpup, 0, sizeof(struct pcpu));
 
 	/*
diff --git a/sys/arm64/arm64/mp_machdep.c b/sys/arm64/arm64/mp_machdep.c
index 547bd576362e..7ce758ae897a 100644
--- a/sys/arm64/arm64/mp_machdep.c
+++ b/sys/arm64/arm64/mp_machdep.c
@@ -41,6 +41,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/bus.h>
 #include <sys/cpu.h>
 #include <sys/csan.h>
+#include <sys/domainset.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/malloc.h>
@@ -152,7 +153,7 @@ static bool
 is_boot_cpu(uint64_t target_cpu)
 {
 
-	return (__pcpu[0].pc_mpidr == (target_cpu & CPU_AFF_MASK));
+	return (cpuid_to_pcpu[0]->pc_mpidr == (target_cpu & CPU_AFF_MASK));
 }
 
 static void
@@ -214,15 +215,17 @@ init_secondary(uint64_t cpu)
 	 * they can pass random value in it.
 	 */
 	mpidr = READ_SPECIALREG(mpidr_el1) & CPU_AFF_MASK;
-	if  (cpu >= MAXCPU || __pcpu[cpu].pc_mpidr != mpidr) {
+	if (cpu >= MAXCPU || cpuid_to_pcpu[cpu] == NULL ||
+	    cpuid_to_pcpu[cpu]->pc_mpidr != mpidr) {
 		for (cpu = 0; cpu < mp_maxid; cpu++)
-			if (__pcpu[cpu].pc_mpidr == mpidr)
+			if (cpuid_to_pcpu[cpu] != NULL &&
+			    cpuid_to_pcpu[cpu]->pc_mpidr == mpidr)
 				break;
 		if ( cpu >= MAXCPU)
 			panic("MPIDR for this CPU is not in pcpu table");
 	}
 
-	pcpup = &__pcpu[cpu];
+	pcpup = cpuid_to_pcpu[cpu];
 	/*
 	 * Set the pcpu pointer with a backup in tpidr_el1 to be
 	 * loaded when entering the kernel from userland.
@@ -483,7 +486,7 @@ cpu_mp_probe(void)
  * do nothing. Returns true if the CPU is present and running.
  */
 static bool
-start_cpu(u_int cpuid, uint64_t target_cpu)
+start_cpu(u_int cpuid, uint64_t target_cpu, int domain)
 {
 	struct pcpu *pcpup;
 	vm_paddr_t pa;
@@ -499,14 +502,17 @@ start_cpu(u_int cpuid, uint64_t target_cpu)
 
 	KASSERT(cpuid < MAXCPU, ("Too many CPUs"));
 
-	pcpup = &__pcpu[cpuid];
+	pcpup = (void *)kmem_malloc_domainset(DOMAINSET_PREF(domain),
+	    sizeof(*pcpup), M_WAITOK | M_ZERO);
 	pcpu_init(pcpup, cpuid, sizeof(struct pcpu));
 	pcpup->pc_mpidr = target_cpu & CPU_AFF_MASK;
 
-	dpcpu[cpuid - 1] = (void *)kmem_malloc(DPCPU_SIZE, M_WAITOK | M_ZERO);
+	dpcpu[cpuid - 1] = (void *)kmem_malloc_domainset(
+	    DOMAINSET_PREF(domain), DPCPU_SIZE, M_WAITOK | M_ZERO);
 	dpcpu_init(dpcpu[cpuid - 1], cpuid);
 
-	bootstacks[cpuid] = (void *)kmem_malloc(PAGE_SIZE, M_WAITOK | M_ZERO);
+	bootstacks[cpuid] = (void *)kmem_malloc_domainset(
+	    DOMAINSET_PREF(domain), PAGE_SIZE, M_WAITOK | M_ZERO);
 
 	naps = atomic_load_int(&aps_started);
 	bootstack = (char *)bootstacks[cpuid] + PAGE_SIZE;
@@ -549,6 +555,7 @@ madt_handler(ACPI_SUBTABLE_HEADER *entry, void *arg)
 	ACPI_MADT_GENERIC_INTERRUPT *intr;
 	u_int *cpuid;
 	u_int id;
+	int domain;
 
 	switch(entry->Type) {
 	case ACPI_MADT_TYPE_GENERIC_INTERRUPT:
@@ -560,8 +567,14 @@ madt_handler(ACPI_SUBTABLE_HEADER *entry, void *arg)
 		else
 			id = *cpuid;
 
-		if (start_cpu(id, intr->ArmMpidr)) {
-			__pcpu[id].pc_acpi_id = intr->Uid;
+#ifdef NUMA
+		domain = acpi_pxm_get_cpu_locality(*cpuid);
+#else
+		domain = 0;
+#endif
+		if (start_cpu(id, intr->ArmMpidr, domain)) {
+			MPASS(cpuid_to_pcpu[id] != NULL);
+			cpuid_to_pcpu[id]->pc_acpi_id = intr->Uid;
 			/*
 			 * Don't increment for the boot CPU, its CPU ID is
 			 * reserved.
@@ -624,7 +637,7 @@ start_cpu_fdt(u_int id, phandle_t node, u_int addr_size, pcell_t *reg)
 	else
 		cpuid = fdt_cpuid;
 
-	if (!start_cpu(cpuid, target_cpu))
+	if (!start_cpu(cpuid, target_cpu, 0))
 		return (FALSE);
 
 	/*
@@ -637,7 +650,7 @@ start_cpu_fdt(u_int id, phandle_t node, u_int addr_size, pcell_t *reg)
 	if (vm_ndomains == 1 ||
 	    OF_getencprop(node, "numa-node-id", &domain, sizeof(domain)) <= 0)
 		domain = 0;
-	__pcpu[cpuid].pc_domain = domain;
+	cpuid_to_pcpu[cpuid]->pc_domain = domain;
 	if (domain < MAXMEMDOM)
 		CPU_SET(cpuid, &cpuset_domain[domain]);
 	return (TRUE);
@@ -668,7 +681,7 @@ cpu_mp_start(void)
 
 	/* CPU 0 is always boot CPU. */
 	CPU_SET(0, &all_cpus);
-	__pcpu[0].pc_mpidr = READ_SPECIALREG(mpidr_el1) & CPU_AFF_MASK;
+	cpuid_to_pcpu[0]->pc_mpidr = READ_SPECIALREG(mpidr_el1) & CPU_AFF_MASK;
 
 	switch(arm64_bus_method) {
 #ifdef DEV_ACPI
diff --git a/sys/arm64/include/counter.h b/sys/arm64/include/counter.h
index 333015cc7139..7f747b525d9c 100644
--- a/sys/arm64/include/counter.h
+++ b/sys/arm64/include/counter.h
@@ -32,7 +32,7 @@
 #include <sys/pcpu.h>
 #include <machine/atomic.h>
 
-#define	EARLY_COUNTER	&__pcpu[0].pc_early_dummy_counter
+#define	EARLY_COUNTER	&pcpu0.pc_early_dummy_counter
 
 #define	counter_enter()	do {} while (0)
 #define	counter_exit()	do {} while (0)
diff --git a/sys/arm64/include/pcpu_aux.h b/sys/arm64/include/pcpu_aux.h
index 3d4c70c491d6..382811dfa1fb 100644
--- a/sys/arm64/include/pcpu_aux.h
+++ b/sys/arm64/include/pcpu_aux.h
@@ -47,6 +47,6 @@
  */
 _Static_assert(PAGE_SIZE % sizeof(struct pcpu) == 0, "fix pcpu size");
 
-extern struct pcpu __pcpu[];
+extern struct pcpu pcpu0;
 
 #endif	/* _MACHINE_PCPU_AUX_H_ */