git: 68f185ccc9f8 - main - arm64: Add Arm SPE support

From: Andrew Turner <andrew_at_FreeBSD.org>
Date: Wed, 12 Nov 2025 17:48:37 UTC
The branch main has been updated by andrew:

URL: https://cgit.FreeBSD.org/src/commit/?id=68f185ccc9f8f9498d536f4737d888b37cf11882

commit 68f185ccc9f8f9498d536f4737d888b37cf11882
Author:     Zachary Leaf <zachary.leaf@arm.com>
AuthorDate: 2025-11-12 16:35:05 +0000
Commit:     Andrew Turner <andrew@FreeBSD.org>
CommitDate: 2025-11-12 17:40:41 +0000

    arm64: Add Arm SPE support
    
    Add support for the Arm Statistical Profiling Extension (SPE). This
    is an optional extension added in Armv8.1 to provide profiling of
    software using randomised instruction sampling.
    
    This adds the initial driver, and attached it to the hardware tracing
    framework, hwt(4), in CPU mode to allow tracing of all threads on
    a given CPU.
    
    (commit message by andrew@)
    
    Co-authored-by: Sarah Walker <sarah.walker2@arm.com>
    Co-authored-by: Andrew Turner <andrew@FreeBSD.org>
    Reviewed by:    andrew
    Sponsored by:   Arm Ltd
    Sponsored by:   The FreeBSD Foundation (early driver)
    Differential Revision:  https://reviews.freebsd.org/D46241
---
 sys/arm64/arm64/locore.S        |   4 +
 sys/arm64/spe/arm_spe.h         |  77 ++++++
 sys/arm64/spe/arm_spe_acpi.c    | 146 ++++++++++
 sys/arm64/spe/arm_spe_backend.c | 586 ++++++++++++++++++++++++++++++++++++++++
 sys/arm64/spe/arm_spe_dev.c     | 323 ++++++++++++++++++++++
 sys/arm64/spe/arm_spe_dev.h     | 162 +++++++++++
 sys/arm64/spe/arm_spe_fdt.c     |  75 +++++
 sys/conf/files.arm64            |   4 +
 sys/modules/Makefile            |   5 +
 sys/modules/spe/Makefile        |  24 ++
 10 files changed, 1406 insertions(+)

diff --git a/sys/arm64/arm64/locore.S b/sys/arm64/arm64/locore.S
index 3ec12140f139..c22d5fe76468 100644
--- a/sys/arm64/arm64/locore.S
+++ b/sys/arm64/arm64/locore.S
@@ -445,6 +445,10 @@ LENTRY(enter_kernel_el)
 	ldr	x3, =(CNTHCTL_EL1PCTEN_NOTRAP | CNTHCTL_EL1PCEN_NOTRAP)
 	ldr	x5, =(PSR_DAIF | PSR_M_EL1h)
 
+	/* Enable SPE at EL1 via Monitor Debug Configuration Register */
+	mov     x6, MDCR_EL2_E2PB_EL1_0_NO_TRAP
+	msr     mdcr_el2, x6
+
 .Ldone_vhe:
 
 	msr	cptr_el2, x2
diff --git a/sys/arm64/spe/arm_spe.h b/sys/arm64/spe/arm_spe.h
new file mode 100644
index 000000000000..5dba20673a77
--- /dev/null
+++ b/sys/arm64/spe/arm_spe.h
@@ -0,0 +1,77 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 Arm Ltd
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _ARM64_ARM_SPE_H_
+#define _ARM64_ARM_SPE_H_
+
+/* kqueue events */
+#define ARM_SPE_KQ_BUF		138
+#define ARM_SPE_KQ_SHUTDOWN	139
+#define ARM_SPE_KQ_SIGNAL	140
+
+/* spe_backend_read() u64 data encoding */
+#define KQ_BUF_POS_SHIFT	0
+#define KQ_BUF_POS		(1 << KQ_BUF_POS_SHIFT)
+#define KQ_PARTREC_SHIFT	1
+#define KQ_PARTREC		(1 << KQ_PARTREC_SHIFT)
+#define KQ_FINAL_BUF_SHIFT	2
+#define KQ_FINAL_BUF		(1 << KQ_FINAL_BUF_SHIFT)
+
+enum arm_spe_ctx_field {
+	ARM_SPE_CTX_NONE,
+	ARM_SPE_CTX_PID,
+	ARM_SPE_CTX_CPU_ID
+};
+
+enum arm_spe_profiling_level {
+	ARM_SPE_KERNEL_AND_USER,
+	ARM_SPE_KERNEL_ONLY,
+	ARM_SPE_USER_ONLY
+};
+struct arm_spe_config {
+	/* Minimum interval is IMP DEF up to maximum 24 bit value */
+	uint32_t interval;
+
+	/* Profile kernel (EL1), userspace (EL0) or both */
+	enum arm_spe_profiling_level level;
+
+	/*
+	 * Configure context field in SPE records to store either the
+	 * current PID, the CPU ID or neither
+	 *
+	 * In PID mode, kernel threads without a process context are
+	 * logged as PID 0
+	 */
+	enum arm_spe_ctx_field ctx_field;
+};
+
+struct arm_spe_svc_buf {
+	uint32_t ident;
+	uint8_t buf_idx : 1;
+};
+
+#endif /* _ARM64_ARM_SPE_H_ */
diff --git a/sys/arm64/spe/arm_spe_acpi.c b/sys/arm64/spe/arm_spe_acpi.c
new file mode 100644
index 000000000000..b9f40448d940
--- /dev/null
+++ b/sys/arm64/spe/arm_spe_acpi.c
@@ -0,0 +1,146 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 Arm Ltd
+ * Copyright (c) 2022 The FreeBSD Foundation
+ *
+ * Portions of this software were developed by Andrew Turner under sponsorship
+ * from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+
+#include <contrib/dev/acpica/include/acpi.h>
+#include <dev/acpica/acpivar.h>
+
+#include <arm64/spe/arm_spe_dev.h>
+
+static device_identify_t arm_spe_acpi_identify;
+static device_probe_t arm_spe_acpi_probe;
+
+static device_method_t arm_spe_acpi_methods[] = {
+	/* Device interface */
+	DEVMETHOD(device_identify,      arm_spe_acpi_identify),
+	DEVMETHOD(device_probe,         arm_spe_acpi_probe),
+
+	DEVMETHOD_END,
+};
+
+DEFINE_CLASS_1(spe, arm_spe_acpi_driver, arm_spe_acpi_methods,
+    sizeof(struct arm_spe_softc), arm_spe_driver);
+
+DRIVER_MODULE(spe, acpi, arm_spe_acpi_driver, 0, 0);
+
+struct madt_data {
+	u_int irq;
+	bool found;
+	bool valid;
+};
+
+static void
+madt_handler(ACPI_SUBTABLE_HEADER *entry, void *arg)
+{
+	ACPI_MADT_GENERIC_INTERRUPT *intr;
+	struct madt_data *madt_data;
+	u_int irq;
+
+	madt_data = (struct madt_data *)arg;
+
+	/* Exit early if we are have decided to not attach */
+	if (!madt_data->valid)
+		return;
+
+	switch(entry->Type) {
+	case ACPI_MADT_TYPE_GENERIC_INTERRUPT:
+		intr = (ACPI_MADT_GENERIC_INTERRUPT *)entry;
+		irq = intr->SpeInterrupt;
+
+		if (irq == 0) {
+			madt_data->valid = false;
+		} else if (!madt_data->found) {
+			madt_data->found = true;
+			madt_data->irq = irq;
+		} else if (madt_data->irq != irq) {
+			madt_data->valid = false;
+		}
+		break;
+
+	default:
+		break;
+	}
+}
+
+static void
+arm_spe_acpi_identify(driver_t *driver, device_t parent)
+{
+	struct madt_data madt_data;
+	ACPI_TABLE_MADT *madt;
+	device_t dev;
+	vm_paddr_t physaddr;
+
+	physaddr = acpi_find_table(ACPI_SIG_MADT);
+	if (physaddr == 0)
+		return;
+
+	madt = acpi_map_table(physaddr, ACPI_SIG_MADT);
+	if (madt == NULL) {
+		device_printf(parent, "spe: Unable to map the MADT\n");
+		return;
+	}
+
+	madt_data.irq = 0;
+	madt_data.found = false;
+	madt_data.valid = true;
+
+	acpi_walk_subtables(madt + 1, (char *)madt + madt->Header.Length,
+	    madt_handler, &madt_data);
+
+	if (!madt_data.found || !madt_data.valid)
+		goto out;
+
+	MPASS(madt_data.irq != 0);
+
+	dev = BUS_ADD_CHILD(parent, 0, "spe", -1);
+	if (dev == NULL) {
+		device_printf(parent, "add spe child failed\n");
+		goto out;
+	}
+
+	BUS_SET_RESOURCE(parent, dev, SYS_RES_IRQ, 0, madt_data.irq, 1);
+
+out:
+	acpi_unmap_table(madt);
+}
+
+static int
+arm_spe_acpi_probe(device_t dev)
+{
+	device_set_desc(dev, "ARM Statistical Profiling Extension");
+	return (BUS_PROBE_NOWILDCARD);
+}
diff --git a/sys/arm64/spe/arm_spe_backend.c b/sys/arm64/spe/arm_spe_backend.c
new file mode 100644
index 000000000000..b4e1132f9cbc
--- /dev/null
+++ b/sys/arm64/spe/arm_spe_backend.c
@@ -0,0 +1,586 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 Arm Ltd
+ * Copyright (c) 2022 The FreeBSD Foundation
+ *
+ * Portions of this software were developed by Andrew Turner under sponsorship
+ * from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Arm Statistical Profiling Extension (SPE) backend
+ *
+ * Basic SPE operation
+ *
+ *   SPE is enabled and configured on a per-core basis, with each core requiring
+ *   separate code to enable and configure. Each core also requires a separate
+ *   buffer passed as config where the CPU will write profiling data. When the
+ *   profiling buffer is full, an interrupt will be taken on the same CPU.
+ *
+ * Driver Design
+ *
+ * - HWT allocates a large single buffer per core. This buffer is split in half
+ *   to create a 2 element circular buffer (aka ping-pong buffer) where the
+ *   kernel writes to one half while userspace is copying the other half
+ * - SMP calls are used to enable and configure each core, with SPE initially
+ *   configured to write to the first half of the buffer
+ * - When the first half of the buffer is full, a buffer full interrupt will
+ *   immediately switch writing to the second half. The kernel adds the details
+ *   of the half that needs copying to a FIFO STAILQ and notifies userspace via
+ *   kqueue by sending a ARM_SPE_KQ_BUF kevent with how many buffers on the
+ *   queue need servicing
+ * - The kernel responds to HWT_IOC_BUFPTR_GET ioctl by sending details of the
+ *   first item from the queue
+ * - The buffers pending copying will not be overwritten until an
+ *   HWT_IOC_SVC_BUF ioctl is received from userspace confirming the data has
+ *   been copied out
+ * - In the case where both halfs of the buffer are full, profiling will be
+ *   paused until notification via HWT_IOC_SVC_BUF is received
+ *
+ * Future improvements and limitations
+ *
+ * - Using large buffer sizes should minimise pauses and loss of profiling
+ *   data while kernel is waiting for userspace to copy out data. Since it is
+ *   generally expected that consuming (copying) this data is faster than
+ *   producing it, in practice this has not so far been an issue. If it does
+ *   prove to be an issue even with large buffer sizes then additional buffering
+ *   i.e. n element circular buffers might be required.
+ *
+ * - kqueue can only notify and queue one kevent of the same type, with
+ *   subsequent events overwriting data in the first event. The kevent
+ *   ARM_SPE_KQ_BUF can therefore only contain the number of buffers on the
+ *   STAILQ, incrementing each time a new buffer is full. In this case kqueue
+ *   serves just as a notification to userspace to wake up and query the kernel
+ *   with the appropriate ioctl. An alternative might be custom kevents where
+ *   the kevent identifier is encoded with something like n+cpu_id or n+tid. In
+ *   this case data could be sent directly with kqueue via the kevent data and
+ *   fflags elements, avoiding the extra ioctl.
+ *
+ */
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/conf.h>
+#include <sys/hwt.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mman.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/rman.h>
+#include <sys/rwlock.h>
+#include <sys/smp.h>
+#include <sys/sysctl.h>
+#include <sys/systm.h>
+
+#include <machine/bus.h>
+
+#include <arm64/spe/arm_spe_dev.h>
+
+#include <dev/hwt/hwt_vm.h>
+#include <dev/hwt/hwt_backend.h>
+#include <dev/hwt/hwt_config.h>
+#include <dev/hwt/hwt_context.h>
+#include <dev/hwt/hwt_cpu.h>
+#include <dev/hwt/hwt_thread.h>
+
+MALLOC_DECLARE(M_ARM_SPE);
+
+extern u_int mp_maxid;
+extern struct taskqueue *taskqueue_arm_spe;
+
+int spe_backend_disable_smp(struct hwt_context *ctx);
+
+static device_t spe_dev;
+static struct hwt_backend_ops spe_ops;
+static struct hwt_backend backend = {
+	.ops = &spe_ops,
+	.name = "spe",
+	.kva_req = 1,
+};
+
+static struct arm_spe_info *spe_info;
+
+static int
+spe_backend_init_thread(struct hwt_context *ctx)
+{
+	return (ENOTSUP);
+}
+
+static void
+spe_backend_init_cpu(struct hwt_context *ctx)
+{
+	struct arm_spe_info *info;
+	struct arm_spe_softc *sc = device_get_softc(spe_dev);
+	char lock_name[32];
+	char *tmp = "Arm SPE lock/cpu/";
+	int cpu_id;
+
+	spe_info = malloc(sizeof(struct arm_spe_info) * mp_ncpus,
+	   M_ARM_SPE, M_WAITOK | M_ZERO);
+
+	sc->spe_info = spe_info;
+
+	CPU_FOREACH_ISSET(cpu_id, &ctx->cpu_map) {
+		info = &spe_info[cpu_id];
+		info->sc = sc;
+		info->ident = cpu_id;
+		info->buf_info[0].info = info;
+		info->buf_info[0].buf_idx = 0;
+		info->buf_info[1].info = info;
+		info->buf_info[1].buf_idx = 1;
+		snprintf(lock_name, sizeof(lock_name), "%s%d", tmp, cpu_id);
+		mtx_init(&info->lock, lock_name, NULL, MTX_SPIN);
+	}
+}
+
+static int
+spe_backend_init(struct hwt_context *ctx)
+{
+	struct arm_spe_softc *sc = device_get_softc(spe_dev);
+	int error = 0;
+
+	/*
+	 * HWT currently specifies buffer size must be a multiple of PAGE_SIZE,
+	 * i.e. minimum 4KB + the maximum PMBIDR.Align is 2KB
+	 * This should never happen but it's good to sense check
+	 */
+	if (ctx->bufsize % sc->kva_align != 0)
+		return (EINVAL);
+
+	/*
+	 * Since we're splitting the buffer in half + PMBLIMITR needs to be page
+	 * aligned, minimum buffer size needs to be 2x PAGE_SIZE
+	 */
+	if (ctx->bufsize < (2 * PAGE_SIZE))
+		return (EINVAL);
+
+	sc->ctx = ctx;
+	sc->kqueue_fd = ctx->kqueue_fd;
+	sc->hwt_td = ctx->hwt_td;
+
+	if (ctx->mode == HWT_MODE_THREAD)
+		error = spe_backend_init_thread(ctx);
+	else
+		spe_backend_init_cpu(ctx);
+
+	return (error);
+}
+
+#ifdef ARM_SPE_DEBUG
+static void hex_dump(uint8_t *buf, size_t len)
+{
+	size_t i;
+
+	printf("--------------------------------------------------------------\n");
+	for (i = 0; i < len; ++i) {
+		if (i % 8 == 0) {
+			printf(" ");
+		}
+		if (i % 16 == 0) {
+			if (i != 0) {
+				printf("\r\n");
+			}
+			printf("\t");
+		}
+		printf("%02X ", buf[i]);
+	}
+	printf("\r\n");
+}
+#endif
+
+static int
+spe_backend_deinit(struct hwt_context *ctx)
+{
+#ifdef ARM_SPE_DEBUG
+	struct arm_spe_info *info;
+	int cpu_id;
+
+	CPU_FOREACH_ISSET(cpu_id, &ctx->cpu_map) {
+		info = &spe_info[cpu_id];
+		hex_dump((void *)info->kvaddr, 128);
+		hex_dump((void *)(info->kvaddr + (info->buf_size/2)), 128);
+	}
+#endif
+
+	if (ctx->state == CTX_STATE_RUNNING) {
+		spe_backend_disable_smp(ctx);
+		ctx->state = CTX_STATE_STOPPED;
+	}
+
+	free(spe_info, M_ARM_SPE);
+
+	return (0);
+}
+
+static uint64_t
+arm_spe_min_interval(struct arm_spe_softc *sc)
+{
+	/* IMPLEMENTATION DEFINED */
+	switch (PMSIDR_Interval_VAL(sc->pmsidr))
+	{
+	case PMSIDR_Interval_256:
+		return (256);
+	case PMSIDR_Interval_512:
+		return (512);
+	case PMSIDR_Interval_768:
+		return (768);
+	case PMSIDR_Interval_1024:
+		return (1024);
+	case PMSIDR_Interval_1536:
+		return (1536);
+	case PMSIDR_Interval_2048:
+		return (2048);
+	case PMSIDR_Interval_3072:
+		return (3072);
+	case PMSIDR_Interval_4096:
+		return (4096);
+	default:
+		return (4096);
+	}
+}
+
+static inline void
+arm_spe_set_interval(struct arm_spe_info *info, uint64_t interval)
+{
+	uint64_t min_interval = arm_spe_min_interval(info->sc);
+
+	interval = MAX(interval, min_interval);
+	interval = MIN(interval, 1 << 24);      /* max 24 bits */
+
+	dprintf("%s %lu\n", __func__, interval);
+
+	info->pmsirr &= ~(PMSIRR_INTERVAL_MASK);
+	info->pmsirr |= (interval << PMSIRR_INTERVAL_SHIFT);
+}
+
+static int
+spe_backend_configure(struct hwt_context *ctx, int cpu_id, int session_id)
+{
+	struct arm_spe_info *info = &spe_info[cpu_id];
+	struct arm_spe_config *cfg;
+	int err = 0;
+
+	mtx_lock_spin(&info->lock);
+	info->ident = cpu_id;
+	/* Set defaults */
+	info->pmsfcr = 0;
+	info->pmsevfr = 0xFFFFFFFFFFFFFFFFUL;
+	info->pmslatfr = 0;
+	info->pmsirr =
+	    (arm_spe_min_interval(info->sc) << PMSIRR_INTERVAL_SHIFT)
+	    | PMSIRR_RND;
+	info->pmsicr = 0;
+	info->pmscr = PMSCR_TS | PMSCR_PA | PMSCR_CX | PMSCR_E1SPE | PMSCR_E0SPE;
+
+	if (ctx->config != NULL &&
+	    ctx->config_size == sizeof(struct arm_spe_config) &&
+	    ctx->config_version == 1) {
+		cfg = (struct arm_spe_config *)ctx->config;
+		if (cfg->interval)
+			arm_spe_set_interval(info, cfg->interval);
+		if (cfg->level == ARM_SPE_KERNEL_ONLY)
+			info->pmscr &= ~(PMSCR_E0SPE); /* turn off user */
+		if (cfg->level == ARM_SPE_USER_ONLY)
+			info->pmscr &= ~(PMSCR_E1SPE); /* turn off kern */
+		if (cfg->ctx_field)
+			info->ctx_field = cfg->ctx_field;
+	} else
+		err = (EINVAL);
+	mtx_unlock_spin(&info->lock);
+
+	return (err);
+}
+
+
+static void
+arm_spe_enable(void *arg __unused)
+{
+	struct arm_spe_info *info = &spe_info[PCPU_GET(cpuid)];
+	uint64_t base, limit;
+
+	dprintf("%s on cpu:%d\n", __func__, PCPU_GET(cpuid));
+
+	mtx_lock_spin(&info->lock);
+
+	if (info->ctx_field == ARM_SPE_CTX_CPU_ID)
+		WRITE_SPECIALREG(CONTEXTIDR_EL1_REG, PCPU_GET(cpuid));
+
+	WRITE_SPECIALREG(PMSFCR_EL1_REG, info->pmsfcr);
+	WRITE_SPECIALREG(PMSEVFR_EL1_REG, info->pmsevfr);
+	WRITE_SPECIALREG(PMSLATFR_EL1_REG, info->pmslatfr);
+
+	/* Set the sampling interval */
+	WRITE_SPECIALREG(PMSIRR_EL1_REG, info->pmsirr);
+	isb();
+
+	/* Write 0 here before enabling sampling */
+	WRITE_SPECIALREG(PMSICR_EL1_REG, info->pmsicr);
+	isb();
+
+	base = info->kvaddr;
+	limit = base + (info->buf_size/2);
+	/* Enable the buffer */
+	limit &= PMBLIMITR_LIMIT_MASK; /* Zero lower 12 bits */
+	limit |= PMBLIMITR_E;
+	/* Set the base and limit */
+	WRITE_SPECIALREG(PMBPTR_EL1_REG, base);
+	WRITE_SPECIALREG(PMBLIMITR_EL1_REG, limit);
+	isb();
+
+	/* Enable sampling */
+	WRITE_SPECIALREG(PMSCR_EL1_REG, info->pmscr);
+	isb();
+
+	info->enabled = true;
+
+	mtx_unlock_spin(&info->lock);
+}
+
+static int
+spe_backend_enable_smp(struct hwt_context *ctx)
+{
+	struct arm_spe_info *info;
+	struct hwt_vm *vm;
+	int cpu_id;
+
+	HWT_CTX_LOCK(ctx);
+	CPU_FOREACH_ISSET(cpu_id, &ctx->cpu_map) {
+		vm = hwt_cpu_get(ctx, cpu_id)->vm;
+
+		info = &spe_info[cpu_id];
+
+		mtx_lock_spin(&info->lock);
+		info->kvaddr = vm->kvaddr;
+		info->buf_size = ctx->bufsize;
+		mtx_unlock_spin(&info->lock);
+	}
+	HWT_CTX_UNLOCK(ctx);
+
+	cpu_id = CPU_FFS(&ctx->cpu_map) - 1;
+	info = &spe_info[cpu_id];
+	if (info->ctx_field == ARM_SPE_CTX_PID)
+		arm64_pid_in_contextidr = true;
+	else
+		arm64_pid_in_contextidr = false;
+
+	smp_rendezvous_cpus(ctx->cpu_map, smp_no_rendezvous_barrier,
+	    arm_spe_enable, smp_no_rendezvous_barrier, NULL);
+
+	return (0);
+}
+
+void
+arm_spe_disable(void *arg __unused)
+{
+	struct arm_spe_info *info = &spe_info[PCPU_GET(cpuid)];
+	struct arm_spe_buf_info *buf = &info->buf_info[info->buf_idx];
+
+	if (!info->enabled)
+		return;
+
+	dprintf("%s on cpu:%d\n", __func__, PCPU_GET(cpuid));
+
+	/* Disable profiling */
+	WRITE_SPECIALREG(PMSCR_EL1_REG, 0x0);
+	isb();
+
+	/* Drain any remaining tracing data */
+	psb_csync();
+	dsb(nsh);
+
+	/* Disable the profiling buffer */
+	WRITE_SPECIALREG(PMBLIMITR_EL1_REG, 0);
+	isb();
+
+	/* Clear interrupt status reg */
+	WRITE_SPECIALREG(PMBSR_EL1_REG, 0x0);
+
+	/* Clear PID/CPU_ID from context ID reg */
+	WRITE_SPECIALREG(CONTEXTIDR_EL1_REG, 0);
+
+	mtx_lock_spin(&info->lock);
+	buf->pmbptr = READ_SPECIALREG(PMBPTR_EL1_REG);
+	info->enabled = false;
+	mtx_unlock_spin(&info->lock);
+}
+
+int
+spe_backend_disable_smp(struct hwt_context *ctx)
+{
+	struct kevent kev;
+	struct arm_spe_info *info;
+	struct arm_spe_buf_info *buf;
+	int cpu_id;
+	int ret;
+
+	/* Disable and send out remaining data in bufs */
+	smp_rendezvous_cpus(ctx->cpu_map, smp_no_rendezvous_barrier,
+	    arm_spe_disable, smp_no_rendezvous_barrier, NULL);
+
+	CPU_FOREACH_ISSET(cpu_id, &ctx->cpu_map) {
+		info = &spe_info[cpu_id];
+		buf = &info->buf_info[info->buf_idx];
+		arm_spe_send_buffer(buf, 0);
+	}
+
+	arm64_pid_in_contextidr = false;
+
+	/*
+	 * Tracing on all CPUs has been disabled, and we've sent write ptr
+	 * offsets for all bufs - let userspace know it can shutdown
+	 */
+	EV_SET(&kev, ARM_SPE_KQ_SHUTDOWN, EVFILT_USER, 0, NOTE_TRIGGER, 0, NULL);
+	ret = kqfd_register(ctx->kqueue_fd, &kev, ctx->hwt_td, M_WAITOK);
+	if (ret)
+		dprintf("%s kqfd_register ret:%d\n", __func__, ret);
+
+	return (0);
+}
+
+static void
+spe_backend_stop(struct hwt_context *ctx)
+{
+	spe_backend_disable_smp(ctx);
+}
+
+static void
+arm_spe_reenable(void *arg __unused)
+{
+	struct arm_spe_info *info = &spe_info[PCPU_GET(cpuid)];;
+
+	WRITE_SPECIALREG(PMSCR_EL1_REG, info->pmscr);
+	isb();
+}
+
+static int
+spe_backend_svc_buf(struct hwt_context *ctx, void *data, size_t data_size,
+    int data_version)
+{
+	struct arm_spe_info *info;
+	struct arm_spe_buf_info *buf;
+	struct arm_spe_svc_buf *s;
+	int err = 0;
+	cpuset_t cpu_set;
+
+	if (data_size != sizeof(struct arm_spe_svc_buf))
+		return (E2BIG);
+
+	if (data_version != 1)
+		return (EINVAL);
+
+	s = (struct arm_spe_svc_buf *)data;
+	if (s->buf_idx > 1)
+		return (ENODEV);
+	if (s->ident >= mp_ncpus)
+		return (EINVAL);
+
+	info = &spe_info[s->ident];
+	mtx_lock_spin(&info->lock);
+
+	buf = &info->buf_info[s->buf_idx];
+
+	if (!info->enabled) {
+		err = ENXIO;
+		goto end;
+	}
+
+	/* Clear the flag the signals buffer needs servicing */
+	buf->buf_svc = false;
+
+	/* Re-enable profiling if we've been waiting for this notification */
+	if (buf->buf_wait) {
+		CPU_SETOF(s->ident, &cpu_set);
+
+		mtx_unlock_spin(&info->lock);
+		smp_rendezvous_cpus(cpu_set, smp_no_rendezvous_barrier,
+		    arm_spe_reenable, smp_no_rendezvous_barrier, NULL);
+		mtx_lock_spin(&info->lock);
+
+		buf->buf_wait = false;
+	}
+
+end:
+	mtx_unlock_spin(&info->lock);
+	return (err);
+}
+
+static int
+spe_backend_read(struct hwt_vm *vm, int *ident, vm_offset_t *offset,
+    uint64_t *data)
+{
+	struct arm_spe_queue *q;
+	struct arm_spe_softc *sc = device_get_softc(spe_dev);
+	int error = 0;
+
+	mtx_lock_spin(&sc->sc_lock);
+
+	/* Return the first pending buffer that needs servicing */
+	q = STAILQ_FIRST(&sc->pending);
+	if (q == NULL) {
+		error = ENOENT;
+		goto error;
+	}
+	*ident = q->ident;
+	*offset = q->offset;
+	*data = (q->buf_idx << KQ_BUF_POS_SHIFT) |
+	    (q->partial_rec << KQ_PARTREC_SHIFT) |
+	    (q->final_buf << KQ_FINAL_BUF_SHIFT);
+
+	STAILQ_REMOVE_HEAD(&sc->pending, next);
+	sc->npending--;
+
+error:
+	mtx_unlock_spin(&sc->sc_lock);
+	if (error)
+		return (error);
+
+	free(q, M_ARM_SPE);
+	return (0);
+}
+
+static struct hwt_backend_ops spe_ops = {
+	.hwt_backend_init = spe_backend_init,
+	.hwt_backend_deinit = spe_backend_deinit,
+
+	.hwt_backend_configure = spe_backend_configure,
+	.hwt_backend_svc_buf = spe_backend_svc_buf,
+	.hwt_backend_stop = spe_backend_stop,
+
+	.hwt_backend_enable_smp = spe_backend_enable_smp,
+	.hwt_backend_disable_smp = spe_backend_disable_smp,
+
+	.hwt_backend_read = spe_backend_read,
+};
+
+int
+spe_register(device_t dev)
+{
+	spe_dev = dev;
+
+	return (hwt_backend_register(&backend));
+}
diff --git a/sys/arm64/spe/arm_spe_dev.c b/sys/arm64/spe/arm_spe_dev.c
new file mode 100644
index 000000000000..54623eebda21
--- /dev/null
+++ b/sys/arm64/spe/arm_spe_dev.c
@@ -0,0 +1,323 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 Arm Ltd
+ * Copyright (c) 2022 The FreeBSD Foundation
+ *
+ * Portions of this software were developed by Andrew Turner under sponsorship
+ * from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/conf.h>
+#include <sys/event.h>
+#include <sys/hwt.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/rman.h>
+#include <sys/smp.h>
+#include <sys/systm.h>
+#include <sys/taskqueue.h>
+
+#include <machine/bus.h>
+
+#include <arm64/spe/arm_spe.h>
+#include <arm64/spe/arm_spe_dev.h>
+
+MALLOC_DEFINE(M_ARM_SPE, "armspe", "Arm SPE tracing");
+
+/*
+ * taskqueue(9) used for sleepable routines called from interrupt handlers
+ */
+TASKQUEUE_FAST_DEFINE_THREAD(arm_spe);
+
+void arm_spe_send_buffer(void *, int);
+static void arm_spe_error(void *, int);
+static int arm_spe_intr(void *);
+device_attach_t arm_spe_attach;
+
+static device_method_t arm_spe_methods[] = {
+	/* Device interface */
+	DEVMETHOD(device_attach,        arm_spe_attach),
+
+	DEVMETHOD_END,
+};
+
+DEFINE_CLASS_0(spe, arm_spe_driver, arm_spe_methods,
+    sizeof(struct arm_spe_softc));
+
+#define ARM_SPE_KVA_MAX_ALIGN	UL(2048)
+
+int
+arm_spe_attach(device_t dev)
+{
+	struct arm_spe_softc *sc;
+	int error, rid;
+
+	sc = device_get_softc(dev);
+	sc->dev = dev;
+
+	sc->pmbidr = READ_SPECIALREG(PMBIDR_EL1_REG);
+	sc->pmsidr = READ_SPECIALREG(PMSIDR_EL1_REG);
+	device_printf(dev, "PMBIDR_EL1: %#lx\n", sc->pmbidr);
+	device_printf(dev, "PMSIDR_EL1: %#lx\n", sc->pmsidr);
+	if ((sc->pmbidr & PMBIDR_P) != 0) {
+		device_printf(dev, "Profiling Buffer is owned by a higher Exception level\n");
+		return (EPERM);
+	}
+
+	sc->kva_align = 1 << ((sc->pmbidr & PMBIDR_Align_MASK) >> PMBIDR_Align_SHIFT);
+	if (sc->kva_align > ARM_SPE_KVA_MAX_ALIGN) {
+		device_printf(dev, "Invalid PMBIDR.Align value of %d\n", sc->kva_align);
+		return (EINVAL);
+	}
+
*** 542 LINES SKIPPED ***