git: 1ae25866767d - main - kexec: Introduce basic arm64 support
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Mon, 27 Oct 2025 14:34:38 UTC
The branch main has been updated by jhibbits:
URL: https://cgit.FreeBSD.org/src/commit/?id=1ae25866767d686067fe6678b62681b7a8f0d361
commit 1ae25866767d686067fe6678b62681b7a8f0d361
Author: Justin Hibbits <jhibbits@FreeBSD.org>
AuthorDate: 2025-10-26 02:45:00 +0000
Commit: Justin Hibbits <jhibbits@FreeBSD.org>
CommitDate: 2025-10-27 14:33:50 +0000
kexec: Introduce basic arm64 support
This works on older arm64 platforms, but may not work with arm64 devices
using GICv3, due to a quirk in the GICv3, where some registers are
write-once.
Most of the kexec reboot work on arm64 can be done entirely in C code,
by disabling the MMU, as the kernel is carved out of the vm_phys_segs
array, so cannot be overwritten.
Reviewed by: andrew
Sponsored by: Juniper Networks, Inc.
Differential Revision: https://reviews.freebsd.org/D51621
---
sys/arm64/arm64/kexec_support.c | 188 ++++++++++++++++++++++++++++++++++++++++
sys/arm64/arm64/locore.S | 44 ++++++++++
sys/arm64/arm64/mp_machdep.c | 78 +++++++++++++++++
sys/arm64/include/cpufunc.h | 7 ++
sys/arm64/include/kexec.h | 33 +++++++
sys/arm64/include/pcpu.h | 3 +-
sys/arm64/include/smp.h | 1 +
sys/conf/files.arm64 | 1 +
sys/dev/psci/psci.c | 13 +++
sys/dev/psci/psci.h | 1 +
10 files changed, 368 insertions(+), 1 deletion(-)
diff --git a/sys/arm64/arm64/kexec_support.c b/sys/arm64/arm64/kexec_support.c
new file mode 100644
index 000000000000..8b9719c05b67
--- /dev/null
+++ b/sys/arm64/arm64/kexec_support.c
@@ -0,0 +1,188 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2025 Juniper Networks, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/kexec.h>
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_object.h>
+#include <vm/vm_phys.h>
+#include <vm/vm_radix.h>
+#include <vm/pmap.h>
+#include <vm/vm_page.h>
+
+#include <machine/armreg.h>
+#include <machine/pmap.h>
+#include <machine/pte.h>
+
+/*
+ * Idea behind this:
+ *
+ * kexec_load_md():
+ * - Update boot page tables (identity map) to include all pages needed before
+ * disabling MMU.
+ *
+ * kexec_reboot_md():
+ * - Copy pages into target(s)
+ * - Do "other stuff"
+ * - Does not return
+ */
+
+extern pt_entry_t pagetable_l0_ttbr0_bootstrap[];
+extern unsigned long initstack_end[];
+void switch_stack(void *, void (*)(void *, void *, struct kexec_image *), void *);
+
+#define SCTLR_EL1_NO_MMU (SCTLR_RES1 | SCTLR_LSMAOE | SCTLR_nTLSMD | \
+ SCTLR_EIS | SCTLR_TSCXT | SCTLR_EOS)
+#define vm_page_offset(m) ((vm_offset_t)(m) - vm_page_base)
+static inline vm_page_t
+phys_vm_page(vm_page_t m, vm_offset_t vm_page_v, vm_paddr_t vm_page_p)
+{
+ return ((vm_page_t)((vm_offset_t)m - vm_page_v + vm_page_p));
+}
+
+/* First 2 args are filler for switch_stack() */
+static void __aligned(16) __dead2
+kexec_reboot_bottom( void *arg1 __unused, void *arg2 __unused,
+ struct kexec_image *image)
+{
+ void (*e)(void) = (void *)image->entry;
+ vm_offset_t vm_page_base = (vm_offset_t)vm_page_array;
+ vm_paddr_t vm_page_phys = pmap_kextract((vm_offset_t)vm_page_array);
+ struct kexec_segment_stage *phys_segs =
+ (void *)pmap_kextract((vm_offset_t)&image->segments);
+ vm_paddr_t from_pa, to_pa;
+ vm_size_t size;
+ vm_page_t first, m, mp;
+ struct pctrie_iter pct_i;
+
+ /*
+ * Create a linked list of all pages in the object before we disable the
+ * MMU. Once the MMU is disabled we can't use the vm_radix iterators,
+ * as they rely on virtual address pointers.
+ */
+ first = NULL;
+ vm_radix_iter_init(&pct_i, &image->map_obj->rtree);
+ VM_RADIX_FORALL(m, &pct_i) {
+ if (first == NULL)
+ first = m;
+ else
+ SLIST_INSERT_AFTER(mp, m, plinks.s.ss);
+ mp = m;
+ }
+
+ /*
+ * We're running out of the identity map now, disable the MMU before we
+ * continue. It's possible page tables can be overwritten, which would
+ * be very bad if we were running with the MMU enabled.
+ */
+ WRITE_SPECIALREG(sctlr_el1, SCTLR_EL1_NO_MMU);
+ isb();
+ for (int i = 0; i < KEXEC_SEGMENT_MAX; i++) {
+ if (phys_segs[i].size == 0)
+ break;
+ to_pa = phys_segs[i].target;
+ /* Copy the segment here... */
+ for (vm_page_t p = phys_segs[i].first_page;
+ p != NULL && to_pa - phys_segs[i].target < phys_segs[i].size;
+ p = SLIST_NEXT(p, plinks.s.ss)) {
+ p = phys_vm_page(p, vm_page_base, vm_page_phys);
+ from_pa = p->phys_addr;
+ if (p->phys_addr == to_pa) {
+ to_pa += PAGE_SIZE;
+ continue;
+ }
+ for (size = PAGE_SIZE / sizeof(register_t);
+ size > 0; --size) {
+ *(register_t *)to_pa = *(register_t *)from_pa;
+ to_pa += sizeof(register_t);
+ from_pa += sizeof(register_t);
+ }
+ }
+ }
+ invalidate_icache();
+ e();
+ while (1)
+ ;
+}
+
+void
+kexec_reboot_md(struct kexec_image *image)
+{
+ uintptr_t ptr;
+ register_t reg;
+
+ for (int i = 0; i < KEXEC_SEGMENT_MAX; i++) {
+ if (image->segments[i].size > 0)
+ cpu_dcache_inv_range((void *)PHYS_TO_DMAP(image->segments[i].target),
+ image->segments[i].size);
+ }
+ ptr = pmap_kextract((vm_offset_t)kexec_reboot_bottom);
+ serror_disable();
+
+ reg = pmap_kextract((vm_offset_t)pagetable_l0_ttbr0_bootstrap);
+ set_ttbr0(reg);
+ cpu_tlb_flushID();
+
+ typeof(kexec_reboot_bottom) *p = (void *)ptr;
+ switch_stack((void *)pmap_kextract((vm_offset_t)initstack_end),
+ p, image);
+ while (1)
+ ;
+}
+
+int
+kexec_load_md(struct kexec_image *image)
+{
+ vm_paddr_t tmp;
+ pt_entry_t *pte;
+
+ /* Create L2 page blocks for the trampoline. L0/L1 are from the startup. */
+
+ /*
+ * There are exactly 2 pages before the pagetable_l0_ttbr0_bootstrap, so
+ * move to there.
+ */
+ pte = pagetable_l0_ttbr0_bootstrap;
+ pte -= (Ln_ENTRIES * 2); /* move to start of L2 pages */
+
+ /*
+ * Populate the identity map with symbols we know we'll need before we
+ * turn off the MMU.
+ */
+ tmp = pmap_kextract((vm_offset_t)kexec_reboot_bottom);
+ pte[pmap_l2_index(tmp)] = (tmp | L2_BLOCK | ATTR_AF | ATTR_S1_UXN);
+ tmp = pmap_kextract((vm_offset_t)initstack_end);
+ pte[pmap_l2_index(tmp)] = (tmp | L2_BLOCK | ATTR_AF | ATTR_S1_UXN);
+ /* We'll need vm_page_array for doing offset calculations. */
+ tmp = pmap_kextract((vm_offset_t)&vm_page_array);
+ pte[pmap_l2_index(tmp)] = (tmp | L2_BLOCK | ATTR_AF | ATTR_S1_UXN);
+
+ return (0);
+}
diff --git a/sys/arm64/arm64/locore.S b/sys/arm64/arm64/locore.S
index d35e334905a7..3ec12140f139 100644
--- a/sys/arm64/arm64/locore.S
+++ b/sys/arm64/arm64/locore.S
@@ -325,6 +325,19 @@ mp_virtdone:
b init_secondary
LEND(mpentry_common)
+
+ENTRY(mp_cpu_spinloop)
+0:
+ wfe
+ ldr x0, mp_cpu_spin_table_release_addr
+ cbz x0, 0b
+ blr x0
+ .globl mp_cpu_spin_table_release_addr
+mp_cpu_spin_table_release_addr:
+ .quad 0
+ .globl mp_cpu_spinloop_end
+mp_cpu_spinloop_end:
+END(mp_cpu_spinloop)
#endif
/*
@@ -475,6 +488,29 @@ LENTRY(enter_kernel_el)
eret
LEND(enter_kernel_el)
+/* Turn off the MMU. Install ttbr0 from the bootstrap page table, and go there.
+ * Does not return.
+ * - x0 - target address to jump to after stopping the MMU.
+ * - x1 - kernel load address
+ */
+ENTRY(stop_mmu)
+ mov x16, x0 /* Save target. */
+ ldr x2, =(1f - KERNBASE)
+ add x17, x1, x2
+ ldr x3, =(pagetable_l0_ttbr0_bootstrap - KERNBASE)
+ add x1, x1, x3
+ msr ttbr0_el1, x1
+ isb
+ br x17
+1:
+ BTI_J
+ mrs x0, sctlr_el1
+ bic x0, x0, SCTLR_M
+ bic x0, x0, SCTLR_C
+ msr sctlr_el1, x0
+ isb
+ br x16
+END(stop_mmu)
/*
* Get the physical address the kernel was loaded at.
*/
@@ -1094,12 +1130,19 @@ tcr:
TCR_SH0_IS | TCR_ORGN0_WBWA | TCR_IRGN0_WBWA)
LEND(start_mmu)
+ENTRY(switch_stack)
+ mov sp, x0
+ mov x16, x1
+ br x16
+END(switch_stack)
+
ENTRY(abort)
b abort
END(abort)
.bss
.align PAGE_SHIFT
+ .globl initstack_end
initstack:
.space BOOT_STACK_SIZE
initstack_end:
@@ -1116,6 +1159,7 @@ initstack_end:
* L0 for user
*/
.globl pagetable_l0_ttbr1
+ .globl pagetable_l0_ttbr0_bootstrap
pagetable:
pagetable_l3_ttbr1:
.space (PAGE_SIZE * L3_PAGE_COUNT)
diff --git a/sys/arm64/arm64/mp_machdep.c b/sys/arm64/arm64/mp_machdep.c
index e4d011df3a06..0bdd2ecfd8a7 100644
--- a/sys/arm64/arm64/mp_machdep.c
+++ b/sys/arm64/arm64/mp_machdep.c
@@ -60,6 +60,7 @@
#include <machine/debug_monitor.h>
#include <machine/intr.h>
#include <machine/smp.h>
+#include <machine/vmparam.h>
#ifdef VFP
#include <machine/vfp.h>
#endif
@@ -103,6 +104,7 @@ static void ipi_hardclock(void *);
static void ipi_preempt(void *);
static void ipi_rendezvous(void *);
static void ipi_stop(void *);
+static void ipi_off(void *);
#ifdef FDT
static u_int fdt_cpuid;
@@ -193,6 +195,7 @@ release_aps(void *dummy __unused)
intr_ipi_setup(IPI_STOP, "stop", ipi_stop, NULL);
intr_ipi_setup(IPI_STOP_HARD, "stop hard", ipi_stop, NULL);
intr_ipi_setup(IPI_HARDCLOCK, "hardclock", ipi_hardclock, NULL);
+ intr_ipi_setup(IPI_OFF, "off", ipi_off, NULL);
atomic_store_int(&aps_started, 0);
atomic_store_rel_int(&aps_ready, 1);
@@ -390,6 +393,34 @@ ipi_stop(void *dummy __unused)
CTR0(KTR_SMP, "IPI_STOP (restart)");
}
+void stop_mmu(vm_paddr_t, vm_paddr_t) __dead2;
+extern uint32_t mp_cpu_spinloop[];
+extern uint32_t mp_cpu_spinloop_end[];
+extern uint64_t mp_cpu_spin_table_release_addr;
+static void
+ipi_off(void *dummy __unused)
+{
+ CTR0(KTR_SMP, "IPI_OFF");
+ if (psci_present)
+ psci_cpu_off();
+ else {
+ uint64_t release_addr;
+ vm_size_t size;
+
+ size = (vm_offset_t)&mp_cpu_spin_table_release_addr -
+ (vm_offset_t)mp_cpu_spinloop;
+ release_addr = PCPU_GET(release_addr) - size;
+ isb();
+ invalidate_icache();
+ /* Go catatonic, don't take any interrupts. */
+ intr_disable();
+ stop_mmu(release_addr, pmap_kextract(KERNBASE));
+
+
+ }
+ CTR0(KTR_SMP, "IPI_OFF failed");
+}
+
struct cpu_group *
cpu_topo(void)
{
@@ -511,6 +542,7 @@ start_cpu(u_int cpuid, uint64_t target_cpu, int domain, vm_paddr_t release_addr)
pcpu_init(pcpup, cpuid, sizeof(struct pcpu));
pcpup->pc_mpidr = target_cpu & CPU_AFF_MASK;
bootpcpu = pcpup;
+ pcpup->pc_release_addr = release_addr;
dpcpu[cpuid - 1] = (void *)(pcpup + 1);
dpcpu_init(dpcpu[cpuid - 1], cpuid);
@@ -752,6 +784,52 @@ cpu_mp_start(void)
}
}
+void
+cpu_mp_stop(void)
+{
+
+ /* Short-circuit for single-CPU */
+ if (CPU_COUNT(&all_cpus) == 1)
+ return;
+
+ KASSERT(PCPU_GET(cpuid) == CPU_FIRST(), ("Not on the first CPU!\n"));
+
+ /*
+ * If we use spin-table, assume U-boot method for now (single address
+ * shared by all CPUs).
+ */
+ if (!psci_present) {
+ int cpu;
+ vm_paddr_t release_addr;
+ void *release_vaddr;
+ vm_size_t size;
+
+ /* Find the shared release address. */
+ CPU_FOREACH(cpu) {
+ release_addr = pcpu_find(cpu)->pc_release_addr;
+ if (release_addr != 0)
+ break;
+ }
+ /* No release address? No way of notifying other CPUs. */
+ if (release_addr == 0)
+ return;
+
+ size = (vm_offset_t)&mp_cpu_spinloop_end -
+ (vm_offset_t)&mp_cpu_spinloop;
+
+ release_addr -= (vm_offset_t)&mp_cpu_spin_table_release_addr -
+ (vm_offset_t)mp_cpu_spinloop;
+
+ release_vaddr = pmap_mapdev(release_addr, size);
+ bcopy(mp_cpu_spinloop, release_vaddr, size);
+ cpu_dcache_wbinv_range(release_vaddr, size);
+ pmap_unmapdev(release_vaddr, size);
+ invalidate_icache();
+ }
+ ipi_all_but_self(IPI_OFF);
+ DELAY(1000000);
+}
+
/* Introduce rest of cores to the world */
void
cpu_mp_announce(void)
diff --git a/sys/arm64/include/cpufunc.h b/sys/arm64/include/cpufunc.h
index e6e1f682794e..e9eee643216b 100644
--- a/sys/arm64/include/cpufunc.h
+++ b/sys/arm64/include/cpufunc.h
@@ -96,6 +96,13 @@ serror_enable(void)
__asm __volatile("msr daifclr, #(" __XSTRING(DAIF_A) ")");
}
+static __inline void
+serror_disable(void)
+{
+
+ __asm __volatile("msr daifset, #(" __XSTRING(DAIF_A) ")");
+}
+
static __inline register_t
get_midr(void)
{
diff --git a/sys/arm64/include/kexec.h b/sys/arm64/include/kexec.h
new file mode 100644
index 000000000000..0a8c7a053331
--- /dev/null
+++ b/sys/arm64/include/kexec.h
@@ -0,0 +1,33 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2025 Juniper Networks, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _ARM64_KEXEC_H_
+#define _ARM64_KEXEC_H_
+
+#define KEXEC_MD_PAGES(x) 0
+
+#endif /* _ARM64_KEXEC_H_ */
diff --git a/sys/arm64/include/pcpu.h b/sys/arm64/include/pcpu.h
index 09bd8fa8a966..73399d2c3f8c 100644
--- a/sys/arm64/include/pcpu.h
+++ b/sys/arm64/include/pcpu.h
@@ -50,7 +50,8 @@ struct debug_monitor_state;
struct pmap *pc_curvmpmap; \
uint64_t pc_mpidr; \
u_int pc_bcast_tlbi_workaround; \
- char __pad[197]
+ uint64_t pc_release_addr; \
+ char __pad[189]
#ifdef _KERNEL
diff --git a/sys/arm64/include/smp.h b/sys/arm64/include/smp.h
index 500cd1ef4f02..4a5bfda3ac1c 100644
--- a/sys/arm64/include/smp.h
+++ b/sys/arm64/include/smp.h
@@ -40,6 +40,7 @@ enum {
IPI_STOP,
IPI_STOP_HARD,
IPI_HARDCLOCK,
+ IPI_OFF,
INTR_IPI_COUNT,
};
diff --git a/sys/conf/files.arm64 b/sys/conf/files.arm64
index 2f412fa3cb1b..882aca705336 100644
--- a/sys/conf/files.arm64
+++ b/sys/conf/files.arm64
@@ -55,6 +55,7 @@ arm64/arm64/gic_v3_acpi.c optional acpi
arm64/arm64/gic_v3_fdt.c optional fdt
arm64/arm64/hyp_stub.S standard
arm64/arm64/identcpu.c standard
+arm64/arm64/kexec_support.c standard
arm64/arm64/locore.S standard no-obj
arm64/arm64/machdep.c standard
arm64/arm64/machdep_boot.c standard
diff --git a/sys/dev/psci/psci.c b/sys/dev/psci/psci.c
index 497b23d2d4c3..2b250401ae83 100644
--- a/sys/dev/psci/psci.c
+++ b/sys/dev/psci/psci.c
@@ -474,6 +474,19 @@ psci_cpu_on(unsigned long cpu, unsigned long entry, unsigned long context_id)
return (psci_call(fnid, cpu, entry, context_id));
}
+int
+psci_cpu_off(void)
+{
+ uint32_t fnid;
+
+ fnid = PSCI_FNID_CPU_OFF;
+ if (psci_softc != NULL)
+ fnid = psci_softc->psci_fnids[PSCI_FN_CPU_OFF];
+
+ /* Returns PSCI_RETVAL_DENIED on error. */
+ return (psci_call(fnid, 0, 0, 0));
+}
+
static void
psci_shutdown(void *xsc, int howto)
{
diff --git a/sys/dev/psci/psci.h b/sys/dev/psci/psci.h
index 451d40c0178d..6704eaf26c71 100644
--- a/sys/dev/psci/psci.h
+++ b/sys/dev/psci/psci.h
@@ -39,6 +39,7 @@ typedef int (*psci_callfn_t)(register_t, register_t, register_t, register_t,
extern bool psci_present;
int psci_cpu_on(unsigned long, unsigned long, unsigned long);
+int psci_cpu_off(void); /* Operates on caller. */
void psci_reset(void);
int32_t psci_features(uint32_t);
int psci_get_version(void);