svn commit: r359280 - in head/sys: arm64/arm64 riscv/riscv

Mark Johnston markj at FreeBSD.org
Tue Mar 24 18:50:25 UTC 2020


Author: markj
Date: Tue Mar 24 18:43:23 2020
New Revision: 359280
URL: https://svnweb.freebsd.org/changeset/base/359280

Log:
  Remove the secondary_stacks array in arm64 and riscv kernels.
  
  Instead, dynamically allocate a page for the boot stack of each AP when
  starting them up, like we do on x86.  This shrinks the bss by
  MAXCPU*KSTACK_PAGES pages, which corresponds to 4MB on arm64 and 256KB
  on riscv.
  
  Duplicate the logic used on x86 to free the bootstacks, by using a
  sysinit to wait for each AP to switch to a thread before freeing its
  stack.
  
  While here, mark some static MD variables as such.
  
  Reviewed by:	kib
  MFC after:	1 month
  Sponsored by:	Juniper Networks, Klara Inc.
  Differential Revision:	https://reviews.freebsd.org/D24158

Modified:
  head/sys/arm64/arm64/locore.S
  head/sys/arm64/arm64/mp_machdep.c
  head/sys/riscv/riscv/locore.S
  head/sys/riscv/riscv/mp_machdep.c

Modified: head/sys/arm64/arm64/locore.S
==============================================================================
--- head/sys/arm64/arm64/locore.S	Tue Mar 24 18:35:33 2020	(r359279)
+++ head/sys/arm64/arm64/locore.S	Tue Mar 24 18:43:23 2020	(r359280)
@@ -214,11 +214,10 @@ ENTRY(mpentry)
 	br	x15
 
 mp_virtdone:
-	ldr	x4, =secondary_stacks
-	mov	x5, #(PAGE_SIZE * KSTACK_PAGES)
-	mul	x5, x0, x5
-	add	sp, x4, x5
-
+	/* Start using the AP boot stack */
+	ldr	x4, =bootstack
+	ldr	x4, [x4]
+	mov	sp, x4
 	b	init_secondary
 END(mpentry)
 #endif

Modified: head/sys/arm64/arm64/mp_machdep.c
==============================================================================
--- head/sys/arm64/arm64/mp_machdep.c	Tue Mar 24 18:35:33 2020	(r359279)
+++ head/sys/arm64/arm64/mp_machdep.c	Tue Mar 24 18:43:23 2020	(r359280)
@@ -123,7 +123,6 @@ static void ipi_preempt(void *);
 static void ipi_rendezvous(void *);
 static void ipi_stop(void *);
 
-struct mtx ap_boot_mtx;
 struct pcb stoppcbs[MAXCPU];
 
 /*
@@ -136,10 +135,18 @@ static int cpu0 = -1;
 void mpentry(unsigned long cpuid);
 void init_secondary(uint64_t);
 
-uint8_t secondary_stacks[MAXCPU - 1][PAGE_SIZE * KSTACK_PAGES] __aligned(16);
+/* Synchronize AP startup. */
+static struct mtx ap_boot_mtx;
 
+/* Stacks for AP initialization, discarded once idle threads are started. */
+void *bootstack;
+static void *bootstacks[MAXCPU];
+
+/* Count of started APs, used to synchronize access to bootstack. */
+static volatile int aps_started;
+
 /* Set to 1 once we're ready to let the APs out of the pen. */
-volatile int aps_ready = 0;
+static volatile int aps_ready;
 
 /* Temporary variables for init_secondary()  */
 void *dpcpu[MAXCPU - 1];
@@ -205,14 +212,14 @@ init_secondary(uint64_t cpu)
 	    "mov x18, %0 \n"
 	    "msr tpidr_el1, %0" :: "r"(pcpup));
 
-	/* Spin until the BSP releases the APs */
-	while (!aps_ready)
+	/* Signal the BSP and spin until it has released all APs. */
+	atomic_add_int(&aps_started, 1);
+	while (!atomic_load_int(&aps_ready))
 		__asm __volatile("wfe");
 
 	/* Initialize curthread */
 	KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread"));
 	pcpup->pc_curthread = pcpup->pc_idlethread;
-	pcpup->pc_curpcb = pcpup->pc_idlethread->td_pcb;
 
 	/* Initialize curpmap to match TTBR0's current setting. */
 	pmap0 = vmspace_pmap(&vmspace0);
@@ -250,6 +257,11 @@ init_secondary(uint64_t cpu)
 
 	kcsan_cpu_init(cpu);
 
+	/*
+	 * Assert that smp_after_idle_runnable condition is reasonable.
+	 */
+	MPASS(PCPU_GET(curpcb) == NULL);
+
 	/* Enter the scheduler */
 	sched_throw(NULL);
 
@@ -257,6 +269,24 @@ init_secondary(uint64_t cpu)
 	/* NOTREACHED */
 }
 
+static void
+smp_after_idle_runnable(void *arg __unused)
+{
+	struct pcpu *pc;
+	int cpu;
+
+	for (cpu = 1; cpu < mp_ncpus; cpu++) {
+		if (bootstacks[cpu] != NULL) {
+			pc = pcpu_find(cpu);
+			while (atomic_load_ptr(&pc->pc_curpcb) == NULL)
+				cpu_spinwait();
+			kmem_free((vm_offset_t)bootstacks[cpu], PAGE_SIZE);
+		}
+	}
+}
+SYSINIT(smp_after_idle_runnable, SI_SUB_SMP, SI_ORDER_ANY,
+    smp_after_idle_runnable, NULL);
+
 /*
  *  Send IPI thru interrupt controller.
  */
@@ -391,7 +421,7 @@ start_cpu(u_int id, uint64_t target_cpu)
 	struct pcpu *pcpup;
 	vm_paddr_t pa;
 	u_int cpuid;
-	int err;
+	int err, naps;
 
 	/* Check we are able to start this cpu */
 	if (id > mp_maxid)
@@ -405,7 +435,7 @@ start_cpu(u_int id, uint64_t target_cpu)
 
 	/*
 	 * Rotate the CPU IDs to put the boot CPU as CPU 0. We keep the other
-	 * CPUs ordered as the are likely grouped into clusters so it can be
+	 * CPUs ordered as they are likely grouped into clusters so it can be
 	 * useful to keep that property, e.g. for the GICv3 driver to send
 	 * an IPI to all CPUs in the cluster.
 	 */
@@ -420,29 +450,41 @@ start_cpu(u_int id, uint64_t target_cpu)
 	dpcpu[cpuid - 1] = (void *)kmem_malloc(DPCPU_SIZE, M_WAITOK | M_ZERO);
 	dpcpu_init(dpcpu[cpuid - 1], cpuid);
 
+	bootstacks[cpuid] = (void *)kmem_malloc(PAGE_SIZE, M_WAITOK | M_ZERO);
+
+	naps = atomic_load_int(&aps_started);
+	bootstack = (char *)bootstacks[cpuid] + PAGE_SIZE;
+
 	printf("Starting CPU %u (%lx)\n", cpuid, target_cpu);
 	pa = pmap_extract(kernel_pmap, (vm_offset_t)mpentry);
-
 	err = psci_cpu_on(target_cpu, pa, cpuid);
 	if (err != PSCI_RETVAL_SUCCESS) {
 		/*
 		 * Panic here if INVARIANTS are enabled and PSCI failed to
-		 * start the requested CPU. If psci_cpu_on returns PSCI_MISSING
+		 * start the requested CPU.  psci_cpu_on() returns PSCI_MISSING
 		 * to indicate we are unable to use it to start the given CPU.
 		 */
 		KASSERT(err == PSCI_MISSING ||
 		    (mp_quirks & MP_QUIRK_CPULIST) == MP_QUIRK_CPULIST,
-		    ("Failed to start CPU %u (%lx)\n", id, target_cpu));
+		    ("Failed to start CPU %u (%lx), error %d\n",
+		    id, target_cpu, err));
 
 		pcpu_destroy(pcpup);
 		kmem_free((vm_offset_t)dpcpu[cpuid - 1], DPCPU_SIZE);
 		dpcpu[cpuid - 1] = NULL;
+		kmem_free((vm_offset_t)bootstacks[cpuid], PAGE_SIZE);
+		bootstacks[cpuid] = NULL;
 		mp_ncpus--;
 
 		/* Notify the user that the CPU failed to start */
-		printf("Failed to start CPU %u (%lx)\n", id, target_cpu);
-	} else
+		printf("Failed to start CPU %u (%lx), error %d\n",
+		    id, target_cpu, err);
+	} else {
+		/* Wait for the AP to switch to its boot stack. */
+		while (atomic_load_int(&aps_started) < naps + 1)
+			cpu_spinwait();
 		CPU_SET(cpuid, &all_cpus);
+	}
 
 	return (true);
 }

Modified: head/sys/riscv/riscv/locore.S
==============================================================================
--- head/sys/riscv/riscv/locore.S	Tue Mar 24 18:35:33 2020	(r359279)
+++ head/sys/riscv/riscv/locore.S	Tue Mar 24 18:43:23 2020	(r359280)
@@ -301,14 +301,8 @@ ENTRY(mpentry)
 	beqz	t1, 1b
 
 	/* Setup stack pointer */
-	lla	t0, secondary_stacks
-	li	t1, (PAGE_SIZE * KSTACK_PAGES)
-	mulw	t2, t1, a0
-	add	t0, t0, t2
-	add	t0, t0, t1
-	sub	t0, t0, s9
-	li	t1, KERNBASE
-	add	sp, t0, t1
+	lla	t0, bootstack
+	ld	sp, 0(t0)
 
 	/* Setup supervisor trap vector */
 	lla	t0, mpva

Modified: head/sys/riscv/riscv/mp_machdep.c
==============================================================================
--- head/sys/riscv/riscv/mp_machdep.c	Tue Mar 24 18:35:33 2020	(r359279)
+++ head/sys/riscv/riscv/mp_machdep.c	Tue Mar 24 18:43:23 2020	(r359280)
@@ -87,7 +87,6 @@ static device_attach_t riscv64_cpu_attach;
 
 static int ipi_handler(void *);
 
-struct mtx ap_boot_mtx;
 struct pcb stoppcbs[MAXCPU];
 
 extern uint32_t boot_hart;
@@ -98,13 +97,19 @@ static uint32_t cpu_reg[MAXCPU][2];
 #endif
 static device_t cpu_list[MAXCPU];
 
-void mpentry(unsigned long cpuid);
 void init_secondary(uint64_t);
 
-uint8_t secondary_stacks[MAXCPU][PAGE_SIZE * KSTACK_PAGES] __aligned(16);
+static struct mtx ap_boot_mtx;
 
+/* Stacks for AP initialization, discarded once idle threads are started. */
+void *bootstack;
+static void *bootstacks[MAXCPU];
+
+/* Count of started APs, used to synchronize access to bootstack. */
+static volatile int aps_started;
+
 /* Set to 1 once we're ready to let the APs out of the pen. */
-volatile int aps_ready = 0;
+static volatile int aps_ready;
 
 /* Temporary variables for init_secondary()  */
 void *dpcpu[MAXCPU - 1];
@@ -233,14 +238,14 @@ init_secondary(uint64_t hart)
 	csr_set(sie, SIE_SSIE);
 	csr_set(sip, SIE_SSIE);
 
-	/* Spin until the BSP releases the APs */
-	while (!aps_ready)
+	/* Signal the BSP and spin until it has released all APs. */
+	atomic_add_int(&aps_started, 1);
+	while (!atomic_load_int(&aps_ready))
 		__asm __volatile("wfi");
 
 	/* Initialize curthread */
 	KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread"));
 	pcpup->pc_curthread = pcpup->pc_idlethread;
-	pcpup->pc_curpcb = pcpup->pc_idlethread->td_pcb;
 
 	/*
 	 * Identify current CPU. This is necessary to setup
@@ -274,6 +279,11 @@ init_secondary(uint64_t hart)
 
 	mtx_unlock_spin(&ap_boot_mtx);
 
+	/*
+	 * Assert that smp_after_idle_runnable condition is reasonable.
+	 */
+	MPASS(PCPU_GET(curpcb) == NULL);
+
 	/* Enter the scheduler */
 	sched_throw(NULL);
 
@@ -281,6 +291,24 @@ init_secondary(uint64_t hart)
 	/* NOTREACHED */
 }
 
+static void
+smp_after_idle_runnable(void *arg __unused)
+{
+	struct pcpu *pc;
+	int cpu;
+
+	for (cpu = 1; cpu < mp_ncpus; cpu++) {
+		if (bootstacks[cpu] != NULL) {
+			pc = pcpu_find(cpu);
+			while (atomic_load_ptr(&pc->pc_curpcb) == NULL)
+				cpu_spinwait();
+			kmem_free((vm_offset_t)bootstacks[cpu], PAGE_SIZE);
+		}
+	}
+}
+SYSINIT(smp_after_idle_runnable, SI_SUB_SMP, SI_ORDER_ANY,
+    smp_after_idle_runnable, NULL);
+
 static int
 ipi_handler(void *arg)
 {
@@ -373,6 +401,7 @@ cpu_init_fdt(u_int id, phandle_t node, u_int addr_size
 	struct pcpu *pcpup;
 	uint64_t hart;
 	u_int cpuid;
+	int naps;
 
 	/* Check if this hart supports MMU. */
 	if (OF_getproplen(node, "mmu-type") < 0)
@@ -419,8 +448,17 @@ cpu_init_fdt(u_int id, phandle_t node, u_int addr_size
 	dpcpu[cpuid - 1] = (void *)kmem_malloc(DPCPU_SIZE, M_WAITOK | M_ZERO);
 	dpcpu_init(dpcpu[cpuid - 1], cpuid);
 
+	bootstacks[cpuid] = (void *)kmem_malloc(PAGE_SIZE, M_WAITOK | M_ZERO);
+
+	naps = atomic_load_int(&aps_started);
+	bootstack = (char *)bootstacks[cpuid] + PAGE_SIZE;
+
 	printf("Starting CPU %u (hart %lx)\n", cpuid, hart);
-	__riscv_boot_ap[hart] = 1;
+	atomic_store_32(&__riscv_boot_ap[hart], 1);
+
+	/* Wait for the AP to switch to its boot stack. */
+	while (atomic_load_int(&aps_started) < naps + 1)
+		cpu_spinwait();
 
 	CPU_SET(cpuid, &all_cpus);
 	CPU_SET(hart, &all_harts);


More information about the svn-src-head mailing list