git: ddf0ed09bd8f - main - sdt: Implement SDT probes using hot-patching

From: Mark Johnston <markj_at_FreeBSD.org>
Date: Wed, 19 Jun 2024 20:58:11 UTC
The branch main has been updated by markj:

URL: https://cgit.FreeBSD.org/src/commit/?id=ddf0ed09bd8f83677407db36828aca2c10f419c9

commit ddf0ed09bd8f83677407db36828aca2c10f419c9
Author:     Mark Johnston <markj@FreeBSD.org>
AuthorDate: 2024-06-19 20:57:09 +0000
Commit:     Mark Johnston <markj@FreeBSD.org>
CommitDate: 2024-06-19 20:57:41 +0000

    sdt: Implement SDT probes using hot-patching
    
    The idea here is to avoid a memory access and conditional branch per
    probe site.  Instead, the probe is represented by an "unreachable"
    unconditional function call.  asm goto is used to store the address of
    the probe site (represented by a no-op sled) and the address of the
    function call into a tracepoint record.  Each SDT probe carries a list
    of tracepoints.
    
    When the probe is enabled, the no-op sled corresponding to each
    tracepoint is overwritten with a jmp to the corresponding label.  The
    implementation uses smp_rendezvous() to park all other CPUs while the
    instruction is being overwritten, as this can't be done atomically in
    general.  The compiler moves argument marshalling code and the
    sdt_probe() function call out-of-line, i.e., to the end of the function.
    
    Per gallatin@ in D43504, this approach has less overhead when probes are
    disabled.  To make the implementation a bit simpler, I removed support
    for probes with 7 arguments; nothing makes use of this except a
    regression test case.  It could be re-added later if need be.
    
    The approach taken in this patch enables some more improvements:
    1. We can now automatically fill out the "function" field of SDT probe
       names.  The SDT macros let the programmer specify the function and
       module names, but this is really a bug and shouldn't have been
       allowed.  The intent was to be able to have the same probe in
       multiple functions and to let the user restrict which probes actually
       get enabled by specifying a function name or glob.
    2. We can avoid branching on SDT_PROBES_ENABLED() by adding the ability
       to include blocks of code in the out-of-line path.  For example:
    
            if (SDT_PROBES_ENABLED()) {
                    int reason = CLD_EXITED;
    
                    if (WCOREDUMP(signo))
                            reason = CLD_DUMPED;
                    else if (WIFSIGNALED(signo))
                            reason = CLD_KILLED;
                    SDT_PROBE1(proc, , , exit, reason);
            }
    
    could be written
    
            SDT_PROBE1_EXT(proc, , , exit, reason,
                    int reason;
    
                    reason = CLD_EXITED;
                    if (WCOREDUMP(signo))
                            reason = CLD_DUMPED;
                    else if (WIFSIGNALED(signo))
                            reason = CLD_KILLED;
            );
    
    In the future I would like to use this mechanism more generally, e.g.,
    to remove branches and marshalling code used by hwpmc, and generally to
    make it easier to add new tracepoint consumers without having to add
    more conditional branches to hot code paths.
    
    Reviewed by:    Domagoj Stolfa, avg
    MFC after:      2 months
    Differential Revision:  https://reviews.freebsd.org/D44483
---
 .../cmd/dtrace/test/tst/common/sdt/tst.sdtargs.d   |  11 +-
 sys/amd64/include/sdt_machdep.h                    |  12 ++
 sys/arm/arm/sdt_machdep.c                          |  63 ++++++++++
 sys/arm/include/sdt_machdep.h                      |  12 ++
 sys/arm64/arm64/sdt_machdep.c                      |  77 ++++++++++++
 sys/arm64/include/sdt_machdep.h                    |  12 ++
 sys/cddl/dev/dtrace/dtrace_test.c                  |   6 +-
 sys/cddl/dev/sdt/sdt.c                             |  97 +++++++++++++--
 sys/conf/files.arm                                 |   1 +
 sys/conf/files.arm64                               |   1 +
 sys/conf/files.powerpc                             |   1 +
 sys/conf/files.riscv                               |   1 +
 sys/conf/files.x86                                 |   1 +
 sys/i386/include/sdt_machdep.h                     |  19 +++
 sys/kern/kern_sdt.c                                |  21 +++-
 sys/modules/dtrace/Makefile                        |   4 +-
 sys/powerpc/include/sdt_machdep.h                  |  12 ++
 sys/powerpc/powerpc/sdt_machdep.c                  |  59 +++++++++
 sys/riscv/include/sdt_machdep.h                    |  12 ++
 sys/riscv/riscv/sdt_machdep.c                      |  68 +++++++++++
 sys/sys/sdt.h                                      | 133 +++++++++++----------
 sys/x86/x86/sdt_machdep.c                          |  84 +++++++++++++
 22 files changed, 625 insertions(+), 82 deletions(-)

diff --git a/cddl/contrib/opensolaris/cmd/dtrace/test/tst/common/sdt/tst.sdtargs.d b/cddl/contrib/opensolaris/cmd/dtrace/test/tst/common/sdt/tst.sdtargs.d
index e965b05f2405..d3f7bfad5db1 100644
--- a/cddl/contrib/opensolaris/cmd/dtrace/test/tst/common/sdt/tst.sdtargs.d
+++ b/cddl/contrib/opensolaris/cmd/dtrace/test/tst/common/sdt/tst.sdtargs.d
@@ -27,7 +27,7 @@
 #pragma ident	"%Z%%M%	%I%	%E% SMI"
 
 /*
- * ASSERTION: Verify that argN (1..7) variables are properly remapped.
+ * ASSERTION: Verify that argN (1..6) variables are properly remapped.
  */
 
 BEGIN
@@ -44,13 +44,12 @@ ERROR
 }
 
 test:::sdttest
-/arg0 != 1 || arg1 != 2 || arg2 != 3 || arg3 != 4 || arg4 != 5 || arg5 != 6 ||
-    arg6 != 7/
+/arg0 != 1 || arg1 != 2 || arg2 != 3 || arg3 != 4 || arg4 != 5 || arg5 != 6/
 {
 	printf("sdt arg mismatch\n\n");
-	printf("args are  : %d, %d, %d, %d, %d, %d, %d\n", arg0, arg1, arg2,
-	    arg3, arg4, arg5, arg6);
-	printf("should be : 1, 2, 3, 4, 5, 6, 7\n");
+	printf("args are  : %d, %d, %d, %d, %d, %d\n", arg0, arg1, arg2,
+	    arg3, arg4, arg5);
+	printf("should be : 1, 2, 3, 4, 5, 6\n");
 	exit(1);
 }
 
diff --git a/sys/amd64/include/sdt_machdep.h b/sys/amd64/include/sdt_machdep.h
new file mode 100644
index 000000000000..2434abed2e0e
--- /dev/null
+++ b/sys/amd64/include/sdt_machdep.h
@@ -0,0 +1,12 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 Mark Johnston <markj@FreeBSD.org>
+ */
+
+#ifndef _SYS_SDT_MACHDEP_H_
+#define	_SYS_SDT_MACHDEP_H_
+
+#define	_SDT_ASM_PATCH_INSTR	"nop; nop; nop; nop; nop"
+
+#endif
diff --git a/sys/arm/arm/sdt_machdep.c b/sys/arm/arm/sdt_machdep.c
new file mode 100644
index 000000000000..02361497ee8d
--- /dev/null
+++ b/sys/arm/arm/sdt_machdep.c
@@ -0,0 +1,63 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 Mark Johnston <markj@FreeBSD.org>
+ */
+
+#include <sys/systm.h>
+#include <sys/sdt.h>
+
+#include <machine/cpu.h>
+
+/*
+ * Return true if we can overwrite a nop at "patchpoint" with a jump to the
+ * target address.
+ */
+bool
+sdt_tracepoint_valid(uintptr_t patchpoint, uintptr_t target)
+{
+	int32_t offset;
+
+	if (patchpoint == target ||
+	    (patchpoint & (INSN_SIZE - 1)) != 0 ||
+	    (target & (INSN_SIZE - 1)) != 0 ||
+	    patchpoint + 2 * INSN_SIZE < patchpoint)
+		return (false);
+	offset = target - (patchpoint + 2 * INSN_SIZE);
+	if (offset < -(1 << 24) || offset > (1 >> 24))
+		return (false);
+	return (true);
+}
+
+/*
+ * Overwrite the copy of _SDT_ASM_PATCH_INSTR at the tracepoint with a jump to
+ * the target address.
+ */
+void
+sdt_tracepoint_patch(uintptr_t patchpoint, uintptr_t target)
+{
+	uint32_t instr;
+
+	KASSERT(sdt_tracepoint_valid(patchpoint, target),
+	    ("%s: invalid tracepoint %#x -> %#x",
+	    __func__, patchpoint, target));
+
+	instr =
+	    (((target - (patchpoint + 2 * INSN_SIZE)) >> 2) & ((1 << 24) - 1)) |
+	    0xea000000;
+	memcpy((void *)patchpoint, &instr, sizeof(instr));
+	icache_sync(patchpoint, sizeof(instr));
+}
+
+/*
+ * Overwrite the patchpoint with a nop instruction.
+ */
+void
+sdt_tracepoint_restore(uintptr_t patchpoint)
+{
+	uint32_t instr;
+
+	instr = 0xe320f000u;
+	memcpy((void *)patchpoint, &instr, sizeof(instr));
+	icache_sync(patchpoint, sizeof(instr));
+}
diff --git a/sys/arm/include/sdt_machdep.h b/sys/arm/include/sdt_machdep.h
new file mode 100644
index 000000000000..738d246832a2
--- /dev/null
+++ b/sys/arm/include/sdt_machdep.h
@@ -0,0 +1,12 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 Mark Johnston <markj@FreeBSD.org>
+ */
+
+#ifndef _SYS_SDT_MACHDEP_H_
+#define	_SYS_SDT_MACHDEP_H_
+
+#define	_SDT_ASM_PATCH_INSTR	"nop"
+
+#endif /* _SYS_SDT_MACHDEP_H_ */
diff --git a/sys/arm64/arm64/sdt_machdep.c b/sys/arm64/arm64/sdt_machdep.c
new file mode 100644
index 000000000000..23324ffbf333
--- /dev/null
+++ b/sys/arm64/arm64/sdt_machdep.c
@@ -0,0 +1,77 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 Mark Johnston <markj@FreeBSD.org>
+ */
+
+#include <sys/systm.h>
+#include <sys/sdt.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <machine/cpufunc.h>
+#include <machine/md_var.h>
+#include <machine/vmparam.h>
+
+/*
+ * Return true if we can overwrite a nop at "patchpoint" with a jump to the
+ * target address.
+ */
+bool
+sdt_tracepoint_valid(uintptr_t patchpoint, uintptr_t target)
+{
+	void *addr;
+	int64_t offset;
+
+	if (!arm64_get_writable_addr((void *)patchpoint, &addr))
+		return (false);
+
+	if (patchpoint == target ||
+	    (patchpoint & (INSN_SIZE - 1)) != 0 ||
+	    (target & (INSN_SIZE - 1)) != 0)
+		return (false);
+	offset = target - patchpoint;
+	if (offset < -(1 << 26) || offset > (1 << 26))
+		return (false);
+	return (true);
+}
+
+/*
+ * Overwrite the copy of _SDT_ASM_PATCH_INSTR at the tracepoint with a jump to the
+ * target address.
+ */
+void
+sdt_tracepoint_patch(uintptr_t patchpoint, uintptr_t target)
+{
+	void *addr;
+	uint32_t instr;
+
+	KASSERT(sdt_tracepoint_valid(patchpoint, target),
+	    ("%s: invalid tracepoint %#lx -> %#lx",
+	    __func__, patchpoint, target));
+
+	if (!arm64_get_writable_addr((void *)patchpoint, &addr))
+		panic("%s: Unable to write new instruction", __func__);
+
+	instr = (((target - patchpoint) >> 2) & 0x3fffffful) | 0x14000000;
+	memcpy(addr, &instr, sizeof(instr));
+	cpu_icache_sync_range((void *)patchpoint, INSN_SIZE);
+}
+
+/*
+ * Overwrite the patchpoint with a nop instruction.
+ */
+void
+sdt_tracepoint_restore(uintptr_t patchpoint)
+{
+	void *addr;
+	uint32_t instr;
+
+	if (!arm64_get_writable_addr((void *)patchpoint, &addr))
+		panic("%s: Unable to write new instruction", __func__);
+
+	instr = 0xd503201f;
+	memcpy(addr, &instr, sizeof(instr));
+	cpu_icache_sync_range((void *)patchpoint, INSN_SIZE);
+}
diff --git a/sys/arm64/include/sdt_machdep.h b/sys/arm64/include/sdt_machdep.h
new file mode 100644
index 000000000000..738d246832a2
--- /dev/null
+++ b/sys/arm64/include/sdt_machdep.h
@@ -0,0 +1,12 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 Mark Johnston <markj@FreeBSD.org>
+ */
+
+#ifndef _SYS_SDT_MACHDEP_H_
+#define	_SYS_SDT_MACHDEP_H_
+
+#define	_SDT_ASM_PATCH_INSTR	"nop"
+
+#endif /* _SYS_SDT_MACHDEP_H_ */
diff --git a/sys/cddl/dev/dtrace/dtrace_test.c b/sys/cddl/dev/dtrace/dtrace_test.c
index 92ba3a36a24e..c77448670917 100644
--- a/sys/cddl/dev/dtrace/dtrace_test.c
+++ b/sys/cddl/dev/dtrace/dtrace_test.c
@@ -37,8 +37,8 @@
 
 SDT_PROVIDER_DEFINE(test);
 
-SDT_PROBE_DEFINE7(test, , , sdttest, "int", "int", "int", "int", "int",
-    "int", "int");
+SDT_PROBE_DEFINE6(test, , , sdttest, "int", "int", "int", "int", "int",
+    "int");
 
 /*
  * These are variables that the DTrace test suite references in the
@@ -68,7 +68,7 @@ dtrace_test_sdttest(SYSCTL_HANDLER_ARGS)
 	else if (val == 0)
 		return (0);
 
-	SDT_PROBE7(test, , , sdttest, 1, 2, 3, 4, 5, 6, 7);
+	SDT_PROBE6(test, , , sdttest, 1, 2, 3, 4, 5, 6);
 
 	return (error);
 }
diff --git a/sys/cddl/dev/sdt/sdt.c b/sys/cddl/dev/sdt/sdt.c
index 684374848290..51fa0432437c 100644
--- a/sys/cddl/dev/sdt/sdt.c
+++ b/sys/cddl/dev/sdt/sdt.c
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  *
  * Portions Copyright 2006-2008 John Birrell jb@freebsd.org
- *
+ * Copyright 2024 Mark Johnston <markj@FreeBSD.org>
  */
 
 /*
@@ -187,7 +187,7 @@ sdt_create_probe(struct sdt_probe *probe)
 	if (dtrace_probe_lookup(prov->id, mod, func, name) != DTRACE_IDNONE)
 		return;
 
-	(void)dtrace_probe_create(prov->id, mod, func, name, 1, probe);
+	(void)dtrace_probe_create(prov->id, mod, func, name, 0, probe);
 }
 
 /*
@@ -200,10 +200,62 @@ sdt_provide_probes(void *arg, dtrace_probedesc_t *desc)
 {
 }
 
+struct sdt_enable_cb_arg {
+	struct sdt_probe *probe;
+	int cpu;
+	int arrived;
+	int done;
+	bool enable;
+};
+
+static void
+sdt_probe_update_cb(void *_arg)
+{
+	struct sdt_enable_cb_arg *arg;
+	struct sdt_tracepoint *tp;
+
+	arg = _arg;
+	if (arg->cpu != curcpu) {
+		atomic_add_rel_int(&arg->arrived, 1);
+		while (atomic_load_acq_int(&arg->done) == 0)
+			cpu_spinwait();
+		return;
+	} else {
+		while (atomic_load_acq_int(&arg->arrived) != mp_ncpus - 1)
+			cpu_spinwait();
+	}
+
+	STAILQ_FOREACH(tp, &arg->probe->tracepoint_list, tracepoint_entry) {
+		if (arg->enable)
+			sdt_tracepoint_patch(tp->patchpoint, tp->target);
+		else
+			sdt_tracepoint_restore(tp->patchpoint);
+	}
+
+	atomic_store_rel_int(&arg->done, 1);
+}
+
+static void
+sdt_probe_update(struct sdt_probe *probe, bool enable)
+{
+	struct sdt_enable_cb_arg cbarg;
+
+	sched_pin();
+	cbarg.probe = probe;
+	cbarg.cpu = curcpu;
+	atomic_store_rel_int(&cbarg.arrived, 0);
+	atomic_store_rel_int(&cbarg.done, 0);
+	cbarg.enable = enable;
+	smp_rendezvous(NULL, sdt_probe_update_cb, NULL, &cbarg);
+	sched_unpin();
+}
+
 static void
 sdt_enable(void *arg __unused, dtrace_id_t id, void *parg)
 {
-	struct sdt_probe *probe = parg;
+	struct sdt_probe *probe;
+
+	probe = parg;
 
 	probe->id = id;
 	probe->sdtp_lf->nenabled++;
@@ -215,15 +267,20 @@ sdt_enable(void *arg __unused, dtrace_id_t id, void *parg)
 	sdt_probes_enabled_count++;
 	if (sdt_probes_enabled_count == 1)
 		sdt_probes_enabled = true;
+
+	sdt_probe_update(probe, true);
 }
 
 static void
 sdt_disable(void *arg __unused, dtrace_id_t id, void *parg)
 {
-	struct sdt_probe *probe = parg;
+	struct sdt_probe *probe;
 
+	probe = parg;
 	KASSERT(probe->sdtp_lf->nenabled > 0, ("no probes enabled"));
 
+	sdt_probe_update(probe, false);
+
 	sdt_probes_enabled_count--;
 	if (sdt_probes_enabled_count == 0)
 		sdt_probes_enabled = false;
@@ -284,26 +341,47 @@ sdt_kld_load_providers(struct linker_file *lf)
 static void
 sdt_kld_load_probes(struct linker_file *lf)
 {
-	struct sdt_probe **probe, **p_begin, **p_end;
-	struct sdt_argtype **argtype, **a_begin, **a_end;
+	struct sdt_probe **p_begin, **p_end;
+	struct sdt_argtype **a_begin, **a_end;
+	struct sdt_tracepoint *tp_begin, *tp_end;
 
 	if (linker_file_lookup_set(lf, "sdt_probes_set", &p_begin, &p_end,
 	    NULL) == 0) {
-		for (probe = p_begin; probe < p_end; probe++) {
+		for (struct sdt_probe **probe = p_begin; probe < p_end;
+		    probe++) {
 			(*probe)->sdtp_lf = lf;
 			sdt_create_probe(*probe);
 			TAILQ_INIT(&(*probe)->argtype_list);
+			STAILQ_INIT(&(*probe)->tracepoint_list);
 		}
 	}
 
 	if (linker_file_lookup_set(lf, "sdt_argtypes_set", &a_begin, &a_end,
 	    NULL) == 0) {
-		for (argtype = a_begin; argtype < a_end; argtype++) {
+		for (struct sdt_argtype **argtype = a_begin; argtype < a_end;
+		    argtype++) {
 			(*argtype)->probe->n_args++;
 			TAILQ_INSERT_TAIL(&(*argtype)->probe->argtype_list,
 			    *argtype, argtype_entry);
 		}
 	}
+
+	if (linker_file_lookup_set(lf, __XSTRING(_SDT_TRACEPOINT_SET),
+	    &tp_begin, &tp_end, NULL) == 0) {
+		for (struct sdt_tracepoint *tp = tp_begin; tp < tp_end; tp++) {
+			if (!sdt_tracepoint_valid(tp->patchpoint, tp->target)) {
+				printf(
+			    "invalid tracepoint %#jx->%#jx for %s:%s:%s:%s\n",
+				    (uintmax_t)tp->patchpoint,
+				    (uintmax_t)tp->target,
+				    tp->probe->prov->name, tp->probe->mod,
+				    tp->probe->func, tp->probe->name);
+				continue;
+			}
+			STAILQ_INSERT_TAIL(&tp->probe->tracepoint_list, tp,
+			    tracepoint_entry);
+		}
+	}
 }
 
 /*
@@ -378,6 +456,7 @@ sdt_load(void)
 	TAILQ_INIT(&sdt_prov_list);
 
 	sdt_probe_func = dtrace_probe;
+	sdt_probe6_func = (sdt_probe6_func_t)dtrace_probe;
 
 	sdt_kld_load_tag = EVENTHANDLER_REGISTER(kld_load, sdt_kld_load, NULL,
 	    EVENTHANDLER_PRI_ANY);
@@ -403,6 +482,7 @@ sdt_unload(void)
 	EVENTHANDLER_DEREGISTER(kld_unload_try, sdt_kld_unload_try_tag);
 
 	sdt_probe_func = sdt_probe_stub;
+	sdt_probe6_func = (sdt_probe6_func_t)sdt_probe_stub;
 
 	TAILQ_FOREACH_SAFE(prov, &sdt_prov_list, prov_entry, tmp) {
 		ret = dtrace_unregister(prov->id);
@@ -419,7 +499,6 @@ sdt_unload(void)
 static int
 sdt_modevent(module_t mod __unused, int type, void *data __unused)
 {
-
 	switch (type) {
 	case MOD_LOAD:
 	case MOD_UNLOAD:
diff --git a/sys/conf/files.arm b/sys/conf/files.arm
index b049479fbe82..825680b73165 100644
--- a/sys/conf/files.arm
+++ b/sys/conf/files.arm
@@ -58,6 +58,7 @@ arm/arm/pmu.c			optional	pmu | hwpmc
 arm/arm/pmu_fdt.c		optional	fdt pmu | fdt hwpmc
 arm/arm/ptrace_machdep.c	standard
 arm/arm/sc_machdep.c		optional	sc
+arm/arm/sdt_machdep.c		optional	kdtrace_hooks
 arm/arm/setcpsr.S		standard
 arm/arm/setstack.S		standard
 arm/arm/stack_machdep.c		optional	ddb | stack
diff --git a/sys/conf/files.arm64 b/sys/conf/files.arm64
index 26f9eaf193af..a6bd1a1ba60a 100644
--- a/sys/conf/files.arm64
+++ b/sys/conf/files.arm64
@@ -68,6 +68,7 @@ arm64/arm64/ptrauth.c				standard \
 	compile-with	"${NORMAL_C:N-mbranch-protection*} -mbranch-protection=bti"
 arm64/arm64/pmap.c				standard
 arm64/arm64/ptrace_machdep.c			standard
+arm64/arm64/sdt_machdep.c			optional kdtrace_hooks
 arm64/arm64/sigtramp.S				standard
 arm64/arm64/stack_machdep.c			optional ddb | stack
 arm64/arm64/strcmp.S				standard
diff --git a/sys/conf/files.powerpc b/sys/conf/files.powerpc
index 1a0388884ee8..1baacb5628c7 100644
--- a/sys/conf/files.powerpc
+++ b/sys/conf/files.powerpc
@@ -336,6 +336,7 @@ powerpc/powerpc/platform.c	standard
 powerpc/powerpc/platform_if.m	standard
 powerpc/powerpc/ptrace_machdep.c	standard
 powerpc/powerpc/sc_machdep.c	optional	sc
+powerpc/powerpc/sdt_machdep.c	optional	powerpc64 kdtrace_hooks
 powerpc/powerpc/setjmp.S	standard
 powerpc/powerpc/sigcode32.S	optional	powerpc | powerpcspe | compat_freebsd32
 powerpc/powerpc/sigcode64.S	optional	powerpc64 | powerpc64le
diff --git a/sys/conf/files.riscv b/sys/conf/files.riscv
index 90e2826ce55d..5b94eb327f93 100644
--- a/sys/conf/files.riscv
+++ b/sys/conf/files.riscv
@@ -61,6 +61,7 @@ riscv/riscv/riscv_syscon.c	optional	syscon riscv_syscon fdt
 riscv/riscv/sigtramp.S		standard
 riscv/riscv/sbi.c		standard
 riscv/riscv/sbi_ipi.c		optional	smp
+riscv/riscv/sdt_machdep.c	optional	kdtrace_hooks
 riscv/riscv/stack_machdep.c	optional	ddb | stack
 riscv/riscv/support.S		standard
 riscv/riscv/swtch.S		standard
diff --git a/sys/conf/files.x86 b/sys/conf/files.x86
index de698e82c823..8a68459ff215 100644
--- a/sys/conf/files.x86
+++ b/sys/conf/files.x86
@@ -377,6 +377,7 @@ x86/x86/x86_mem.c		optional	mem
 x86/x86/mp_x86.c		optional	smp
 x86/x86/nexus.c			standard
 x86/x86/pvclock.c		optional	kvm_clock | xenhvm
+x86/x86/sdt_machdep.c		optional	kdtrace_hooks
 x86/x86/stack_machdep.c		optional	ddb | stack
 x86/x86/tsc.c			standard
 x86/x86/ucode.c			standard
diff --git a/sys/i386/include/sdt_machdep.h b/sys/i386/include/sdt_machdep.h
new file mode 100644
index 000000000000..550cfa7dd692
--- /dev/null
+++ b/sys/i386/include/sdt_machdep.h
@@ -0,0 +1,19 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 Mark Johnston <markj@FreeBSD.org>
+ */
+
+#ifndef _SYS_SDT_MACHDEP_H_
+#define	_SYS_SDT_MACHDEP_H_
+
+#define	_SDT_ASM_PATCH_INSTR		"nop; nop; nop; nop; nop"
+
+/*
+ * Work around an apparent clang bug or limitation which prevents the use of the
+ * "i" (immediate) constraint with the probe structure.
+ */
+#define	_SDT_ASM_PROBE_CONSTRAINT	"Ws"
+#define	_SDT_ASM_PROBE_OPERAND		"p"
+
+#endif
diff --git a/sys/kern/kern_sdt.c b/sys/kern/kern_sdt.c
index b5f30057d535..b7213d2051fc 100644
--- a/sys/kern/kern_sdt.c
+++ b/sys/kern/kern_sdt.c
@@ -37,6 +37,7 @@ SDT_PROVIDER_DEFINE(sdt);
  * dtrace_probe() when it loads.
  */
 sdt_probe_func_t sdt_probe_func = sdt_probe_stub;
+sdt_probe6_func_t sdt_probe6_func = (sdt_probe6_func_t)sdt_probe_stub;
 volatile bool __read_frequently sdt_probes_enabled;
 
 /*
@@ -45,10 +46,24 @@ volatile bool __read_frequently sdt_probes_enabled;
  * to enable it.
  */
 void
-sdt_probe_stub(uint32_t id, uintptr_t arg0, uintptr_t arg1,
-    uintptr_t arg2, uintptr_t arg3, uintptr_t arg4)
+sdt_probe_stub(uint32_t id __unused, uintptr_t arg0 __unused,
+    uintptr_t arg1 __unused, uintptr_t arg2 __unused, uintptr_t arg3 __unused,
+    uintptr_t arg4 __unused)
 {
-
 	printf("sdt_probe_stub: unexpectedly called\n");
 	kdb_backtrace();
 }
+
+void
+sdt_probe(uint32_t id, uintptr_t arg0, uintptr_t arg1,
+    uintptr_t arg2, uintptr_t arg3, uintptr_t arg4)
+{
+	sdt_probe_func(id, arg0, arg1, arg2, arg3, arg4);
+}
+
+void
+sdt_probe6(uint32_t id, uintptr_t arg0, uintptr_t arg1,
+    uintptr_t arg2, uintptr_t arg3, uintptr_t arg4, uintptr_t arg5)
+{
+	sdt_probe6_func(id, arg0, arg1, arg2, arg3, arg4, arg5);
+}
diff --git a/sys/modules/dtrace/Makefile b/sys/modules/dtrace/Makefile
index 73c71c5bd1fb..3cc672b62c3b 100644
--- a/sys/modules/dtrace/Makefile
+++ b/sys/modules/dtrace/Makefile
@@ -10,7 +10,6 @@ SUBDIR=		dtaudit		\
 		fbt		\
 		profile		\
 		prototype	\
-		sdt		\
 		systrace
 
 .if ${MACHINE_CPUARCH} == "amd64" || ${MACHINE_CPUARCH} == "i386"
@@ -31,5 +30,8 @@ SUBDIR+=	fasttrap
     ${MACHINE_ARCH} == "powerpc64"
 SUBDIR+=	systrace_freebsd32
 .endif
+.if ${MACHINE_CPUARCH} != "powerpc" || ${MACHINE_ARCH} == "powerpc64"
+SUBDIR+=	sdt
+.endif
 
 .include <bsd.subdir.mk>
diff --git a/sys/powerpc/include/sdt_machdep.h b/sys/powerpc/include/sdt_machdep.h
new file mode 100644
index 000000000000..8f6c3d88ea7d
--- /dev/null
+++ b/sys/powerpc/include/sdt_machdep.h
@@ -0,0 +1,12 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 Mark Johnston <markj@FreeBSD.org>
+ */
+
+#ifndef _SYS_SDT_MACHDEP_H_
+#define	_SYS_SDT_MACHDEP_H_
+
+#define	_SDT_ASM_PATCH_INSTR	"nop"
+
+#endif
diff --git a/sys/powerpc/powerpc/sdt_machdep.c b/sys/powerpc/powerpc/sdt_machdep.c
new file mode 100644
index 000000000000..8a84016a9571
--- /dev/null
+++ b/sys/powerpc/powerpc/sdt_machdep.c
@@ -0,0 +1,59 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 Mark Johnston <markj@FreeBSD.org>
+ */
+
+#include <sys/systm.h>
+#include <sys/sdt.h>
+
+#include <machine/md_var.h>
+
+/*
+ * Return true if we can overwrite a nop at "patchpoint" with a jump to the
+ * target address.
+ */
+bool
+sdt_tracepoint_valid(uintptr_t patchpoint, uintptr_t target)
+{
+	int64_t offset;
+
+	if (patchpoint == target ||
+	    (patchpoint & 3) != 0 || (target & 3) != 0)
+		return (false);
+	offset = target - patchpoint;
+	if (offset < -(1 << 26) || offset > (1 << 26))
+		return (false);
+	return (true);
+}
+
+/*
+ * Overwrite the copy of _SDT_ASM_PATCH_INSTR at the tracepoint with a jump to
+ * the target address.
+ */
+void
+sdt_tracepoint_patch(uintptr_t patchpoint, uintptr_t target)
+{
+	uint32_t instr;
+
+	KASSERT(sdt_tracepoint_valid(patchpoint, target),
+	    ("%s: invalid tracepoint %#lx -> %#lx",
+	    __func__, patchpoint, target));
+
+	instr = ((target - patchpoint) & 0x7fffffful) | 0x48000000;
+	memcpy((void *)patchpoint, &instr, sizeof(instr));
+	__syncicache((void *)patchpoint, sizeof(instr));
+}
+
+/*
+ * Overwrite the patchpoint with a nop instruction.
+ */
+void
+sdt_tracepoint_restore(uintptr_t patchpoint)
+{
+	uint32_t instr;
+
+	instr = 0x60000000;
+	memcpy((void *)patchpoint, &instr, sizeof(instr));
+	__syncicache((void *)patchpoint, sizeof(instr));
+}
diff --git a/sys/riscv/include/sdt_machdep.h b/sys/riscv/include/sdt_machdep.h
new file mode 100644
index 000000000000..160f5d404204
--- /dev/null
+++ b/sys/riscv/include/sdt_machdep.h
@@ -0,0 +1,12 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 Mark Johnston <markj@FreeBSD.org>
+ */
+
+#ifndef _SYS_SDT_MACHDEP_H_
+#define	_SYS_SDT_MACHDEP_H_
+
+#define	_SDT_ASM_PATCH_INSTR	".option push; .option norvc; nop; .option pop"
+
+#endif /* _SYS_SDT_MACHDEP_H_ */
diff --git a/sys/riscv/riscv/sdt_machdep.c b/sys/riscv/riscv/sdt_machdep.c
new file mode 100644
index 000000000000..4b139c8edd98
--- /dev/null
+++ b/sys/riscv/riscv/sdt_machdep.c
@@ -0,0 +1,68 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 Mark Johnston <markj@FreeBSD.org>
+ */
+
+#include <sys/systm.h>
+#include <sys/sdt.h>
+
+#include <machine/encoding.h>
+
+/*
+ * Return true if we can overwrite a nop at "patchpoint" with a jump to the
+ * target address.
+ */
+bool
+sdt_tracepoint_valid(uintptr_t patchpoint, uintptr_t target)
+{
+	int64_t offset;
+
+	if (patchpoint == target ||
+	    (patchpoint & (INSN_C_SIZE - 1)) != 0 ||
+	    (target & (INSN_C_SIZE - 1)) != 0)
+		return (false);
+	offset = target - patchpoint;
+	if (offset < -(1 << 19) || offset > (1 << 19))
+		return (false);
+	return (true);
+}
+
+/*
+ * Overwrite the copy of _SDT_ASM_PATCH_INSTR at the tracepoint with a jump to
+ * the target address.
+ */
+void
+sdt_tracepoint_patch(uintptr_t patchpoint, uintptr_t target)
+{
+	int32_t imm;
+	uint32_t instr;
+
+	KASSERT(sdt_tracepoint_valid(patchpoint, target),
+	    ("%s: invalid tracepoint %#lx -> %#lx",
+	    __func__, patchpoint, target));
+
+	imm = target - patchpoint;
+	imm = (imm & 0x100000) |
+	    ((imm & 0x7fe) << 8) |
+	    ((imm & 0x800) >> 2) |
+	    ((imm & 0xff000) >> 12);
+	instr = (imm << 12) | MATCH_JAL;
+
+	memcpy((void *)patchpoint, &instr, sizeof(instr));
+	fence_i();
+}
+
+/*
+ * Overwrite the patchpoint with a nop instruction.
+ */
+void
+sdt_tracepoint_restore(uintptr_t patchpoint)
+{
+	uint32_t instr;
+
+	instr = 0x13; /* uncompressed nop */
+
+	memcpy((void *)patchpoint, &instr, sizeof(instr));
+	fence_i();
+}
diff --git a/sys/sys/sdt.h b/sys/sys/sdt.h
index fabf4d72740c..147d58c53ef4 100644
--- a/sys/sys/sdt.h
+++ b/sys/sys/sdt.h
@@ -77,8 +77,8 @@
 
 #else /* _KERNEL */
 
-#include <sys/cdefs.h>
 #include <sys/linker_set.h>
+#include <machine/sdt_machdep.h>
 
 extern volatile bool sdt_probes_enabled;
 
@@ -102,8 +102,6 @@ extern volatile bool sdt_probes_enabled;
 #define	SDT_PROBE_DEFINE5(prov, mod, func, name, arg0, arg1, arg2, arg3, arg4)
 #define	SDT_PROBE_DEFINE6(prov, mod, func, name, arg0, arg1, arg2,      \
     arg3, arg4, arg5)
-#define	SDT_PROBE_DEFINE7(prov, mod, func, name, arg0, arg1, arg2,      \
-    arg3, arg4, arg5, arg6)
 
 #define	SDT_PROBE0(prov, mod, func, name)
 #define	SDT_PROBE1(prov, mod, func, name, arg0)
@@ -112,8 +110,6 @@ extern volatile bool sdt_probes_enabled;
 #define	SDT_PROBE4(prov, mod, func, name, arg0, arg1, arg2, arg3)
 #define	SDT_PROBE5(prov, mod, func, name, arg0, arg1, arg2, arg3, arg4)
 #define	SDT_PROBE6(prov, mod, func, name, arg0, arg1, arg2, arg3, arg4, arg5)
-#define	SDT_PROBE7(prov, mod, func, name, arg0, arg1, arg2, arg3, arg4, arg5,  \
-    arg6)
 
 #define	MIB_SDT_PROBE1(...)
 #define	MIB_SDT_PROBE2(...)
@@ -130,9 +126,6 @@ extern volatile bool sdt_probes_enabled;
     arg1, xarg1, arg2, xarg2, arg3, xarg3, arg4, xarg4)
 #define	SDT_PROBE_DEFINE6_XLATE(prov, mod, func, name, arg0, xarg0,     \
     arg1,  xarg1, arg2, xarg2, arg3, xarg3, arg4, xarg4, arg5, xarg5)
-#define	SDT_PROBE_DEFINE7_XLATE(prov, mod, func, name, arg0, xarg0,     \
-    arg1, xarg1, arg2, xarg2, arg3, xarg3, arg4, xarg4, arg5, xarg5, arg6,     \
-    xarg6)
 
 #define	DTRACE_PROBE(name)
 #define	DTRACE_PROBE1(name, type0, arg0)
@@ -144,6 +137,18 @@ extern volatile bool sdt_probes_enabled;
 
 #else
 
+void sdt_probe(uint32_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t,
+    uintptr_t);
+void sdt_probe6(uint32_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t,
+    uintptr_t, uintptr_t);
+
+#define	_SDT_TRACEPOINT_SET		sdt_tracepoint_set
+#define	_SDT_TRACEPOINT_SECTION		"set_sdt_tracepoint_set"
+
+bool sdt_tracepoint_valid(uintptr_t patchpoint, uintptr_t target);
+void sdt_tracepoint_patch(uintptr_t patchpoint, uintptr_t target);
+void sdt_tracepoint_restore(uintptr_t patchpoint);
+
 #define __sdt_used
 
 SET_DECLARE(sdt_providers_set, struct sdt_provider);
@@ -181,14 +186,58 @@ SET_DECLARE(sdt_argtypes_set, struct sdt_argtype);
 
 #define	SDT_PROBES_ENABLED()	__predict_false(sdt_probes_enabled)
 
-#define SDT_PROBE(prov, mod, func, name, arg0, arg1, arg2, arg3, arg4)	do {	\
-	if (SDT_PROBES_ENABLED()) {						\
-		if (__predict_false(_SDT_PROBE_NAME(prov, mod, func, name)->id)) \
-		(*sdt_probe_func)(_SDT_PROBE_NAME(prov, mod, func, name)->id,	\
-		    (uintptr_t) arg0, (uintptr_t) arg1, (uintptr_t) arg2,	\
-		    (uintptr_t) arg3, (uintptr_t) arg4);			\
-	} \
+#ifdef _ILP32
+#define	_SDT_ASM_WORD			".long"
+#else
+#define	_SDT_ASM_WORD			".quad"
+#endif
+
+#ifndef _SDT_ASM_PROBE_CONSTRAINT
+#define	_SDT_ASM_PROBE_CONSTRAINT	"i"
+#endif
+#ifndef	_SDT_ASM_PROBE_OPERAND
+#define	_SDT_ASM_PROBE_OPERAND		"c"
+#endif
+
+/*
+ * The asm below generates records corresponding to the structure's layout, so
+ * the two must be kept in sync.
+ */
+struct sdt_tracepoint {
+	struct sdt_probe *probe;
+	uintptr_t	patchpoint;
+	uintptr_t	target;
+	STAILQ_ENTRY(sdt_tracepoint) tracepoint_entry;
+};
+
+#define __SDT_PROBE(prov, mod, func, name, uniq, f, ...) do {		\
+	__WEAK(__CONCAT(__start_set_, _SDT_TRACEPOINT_SET));		\
+	__WEAK(__CONCAT(__stop_set_, _SDT_TRACEPOINT_SET));		\
+	asm goto(							\
+	    "0:\n"							\
+	    _SDT_ASM_PATCH_INSTR "\n"					\
+	    ".pushsection " _SDT_TRACEPOINT_SECTION ", \"aw\"\n"	\
+	    _SDT_ASM_WORD " %" _SDT_ASM_PROBE_OPERAND "0\n"		\
+	    _SDT_ASM_WORD " 0b\n"					\
+	    _SDT_ASM_WORD " %l1\n"					\
+	    _SDT_ASM_WORD " 0\n"					\
+	    ".popsection\n"						\
+	    :								\
+	    : _SDT_ASM_PROBE_CONSTRAINT (_SDT_PROBE_NAME(prov, mod,	\
+	    func, name))						\
+	    :								\
+	    : __sdt_probe##uniq);					\
+	if (0) {							\
+__sdt_probe##uniq:;							\
+		f(_SDT_PROBE_NAME(prov, mod, func, name)->id, __VA_ARGS__); \
+	}								\
 } while (0)
+#define _SDT_PROBE(prov, mod, func, name, uniq, f, ...)			\
+	__SDT_PROBE(prov, mod, func, name, uniq, f, __VA_ARGS__)
+#define SDT_PROBE(prov, mod, func, name, arg0, arg1, arg2, arg3, arg4)	\
+	_SDT_PROBE(prov, mod, func, name, __COUNTER__, sdt_probe,	\
+	    (uintptr_t)arg0, (uintptr_t)arg1, (uintptr_t)arg2,		\
+	    (uintptr_t)arg3, (uintptr_t)arg4)
*** 186 LINES SKIPPED ***