git: 84d7fe4a6f64 - main - kinst: Add per-CPU interrupt trampolines

From: Mark Johnston <markj_at_FreeBSD.org>
Date: Thu, 08 Dec 2022 20:16:09 UTC
The branch main has been updated by markj:

URL: https://cgit.FreeBSD.org/src/commit/?id=84d7fe4a6f647faa2c91cb254b155e88e68c798c

commit 84d7fe4a6f647faa2c91cb254b155e88e68c798c
Author:     Mark Johnston <markj@FreeBSD.org>
AuthorDate: 2022-12-08 20:03:51 +0000
Commit:     Mark Johnston <markj@FreeBSD.org>
CommitDate: 2022-12-08 20:03:51 +0000

    kinst: Add per-CPU interrupt trampolines
    
    In the common case, kinst emulates a traced instruction by copying it to
    a trampoline, where it is followed by a jump back to the original code,
    and pointing the interrupted thread's %rip at the trampoline.  In
    particular, the trampoline is executed with the same CPU context as the
    original instruction, so if interrupts are enabled at the point where
    the probe fires, they will be enabled when the trampoline is
    subsequently executed.
    
    It can happen that an interrupt is raised while a thread is executing a
    kinst trampoline.  In that case, it is possible that the interrupt
    handler will trigger a kinst probe, so we must ensure that the thread
    does not recurse and overwrite its trampoline before it is finished
    executing the original contents, otherwise an attempt to trace code
    called from interrupt handlers can crash the kernel.
    
    To that end, add a per-CPU trampoline, used when the probe fired with
    interrupts disabled.  Note that this is not quite complete since it does
    not handle the possibility of kinst probes firing while executing an NMI
    handler.
    
    Also ensure that we do not trace instructions which set IF, since in
    that case it is not clear which trampoline (the per-thread trampoline or
    the per-CPU trampoline) we should use, and since such instructions are
    rare.
    
    Reported and tested by: Domagoj Stolfa
    Reviewed by:    christos
    Fixes:          f0bc4ed144fc ("kinst: Initial revision")
    Differential Revision:  https://reviews.freebsd.org/D37619
---
 sys/cddl/dev/kinst/amd64/kinst_isa.c | 73 +++++++++++++++++++++++++++++++++---
 sys/cddl/dev/kinst/kinst.c           |  7 ++++
 sys/cddl/dev/kinst/kinst.h           |  3 ++
 3 files changed, 78 insertions(+), 5 deletions(-)

diff --git a/sys/cddl/dev/kinst/amd64/kinst_isa.c b/sys/cddl/dev/kinst/amd64/kinst_isa.c
index e47cfbbf4d4e..f5fdeabd69e4 100644
--- a/sys/cddl/dev/kinst/amd64/kinst_isa.c
+++ b/sys/cddl/dev/kinst/amd64/kinst_isa.c
@@ -6,6 +6,7 @@
  */
 
 #include <sys/param.h>
+#include <sys/pcpu.h>
 
 #include <machine/cpufunc.h>
 #include <machine/md_var.h>
@@ -39,6 +40,14 @@
 #define KINST_F_JMP		0x0008	/* instruction is a %rip-relative jmp */
 #define KINST_F_MOD_DIRECT	0x0010	/* operand is not a memory address */
 
+/*
+ * Per-CPU trampolines used when the interrupted thread is executing with
+ * interrupts disabled.  If an interrupt is raised while executing a trampoline,
+ * the interrupt thread cannot safely overwrite its trampoline if it hits a
+ * kinst probe while executing the interrupt handler.
+ */
+DPCPU_DEFINE_STATIC(uint8_t *, intr_tramp);
+
 /*
  * Map ModR/M register bits to a trapframe offset.
  */
@@ -185,7 +194,10 @@ kinst_invop(uintptr_t addr, struct trapframe *frame, uintptr_t scratch)
 		}
 		return (DTRACE_INVOP_CALL);
 	} else {
-		tramp = curthread->t_kinst;
+		if ((frame->tf_rflags & PSL_I) == 0)
+			tramp = DPCPU_GET(intr_tramp);
+		else
+			tramp = curthread->t_kinst;
 		if (tramp == NULL) {
 			/*
 			 * A trampoline allocation failed, so this probe is
@@ -495,7 +507,7 @@ kinst_make_probe(linker_file_t lf, int symindx, linker_symval_t *symval,
 	struct kinst_probe *kp;
 	dtrace_kinst_probedesc_t *pd;
 	const char *func;
-	int error, n, off;
+	int error, instrsize, n, off;
 	uint8_t *instr, *limit;
 
 	pd = opaque;
@@ -510,17 +522,37 @@ kinst_make_probe(linker_file_t lf, int symindx, linker_symval_t *symval,
 
 	/*
 	 * Ignore functions not beginning with the usual function prologue.
-	 * These might correspond to assembly routines with which we should not
-	 * meddle.
+	 * These might correspond to exception handlers with which we should not
+	 * meddle.  This does however exclude functions which can be safely
+	 * traced, such as cpu_switch().
 	 */
 	if (*instr != KINST_PUSHL_RBP)
 		return (0);
 
 	n = 0;
 	while (instr < limit) {
+		instrsize = dtrace_instr_size(instr);
 		off = (int)(instr - (uint8_t *)symval->value);
 		if (pd->kpd_off != -1 && off != pd->kpd_off) {
-			instr += dtrace_instr_size(instr);
+			instr += instrsize;
+			continue;
+		}
+
+		/*
+		 * Check for instructions which may enable interrupts.  Such
+		 * instructions are tricky to trace since it is unclear whether
+		 * to use the per-thread or per-CPU trampolines.  Since they are
+		 * rare, we don't bother to implement special handling for them.
+		 *
+		 * If the caller specified an offset, return an error, otherwise
+		 * silently ignore the instruction so that it remains possible
+		 * to enable all instructions in a function.
+		 */
+		if (instrsize == 1 &&
+		    (instr[0] == KINST_POPF || instr[0] == KINST_STI)) {
+			if (pd->kpd_off != -1)
+				return (EINVAL);
+			instr += instrsize;
 			continue;
 		}
 
@@ -554,3 +586,34 @@ kinst_make_probe(linker_file_t lf, int symindx, linker_symval_t *symval,
 
 	return (0);
 }
+
+int
+kinst_md_init(void)
+{
+	uint8_t *tramp;
+	int cpu;
+
+	CPU_FOREACH(cpu) {
+		tramp = kinst_trampoline_alloc(M_WAITOK);
+		if (tramp == NULL)
+			return (ENOMEM);
+		DPCPU_ID_SET(cpu, intr_tramp, tramp);
+	}
+
+	return (0);
+}
+
+void
+kinst_md_deinit(void)
+{
+	uint8_t *tramp;
+	int cpu;
+
+	CPU_FOREACH(cpu) {
+		tramp = DPCPU_ID_GET(cpu, intr_tramp);
+		if (tramp != NULL) {
+			kinst_trampoline_dealloc(DPCPU_ID_GET(cpu, intr_tramp));
+			DPCPU_ID_SET(cpu, intr_tramp, NULL);
+		}
+	}
+}
diff --git a/sys/cddl/dev/kinst/kinst.c b/sys/cddl/dev/kinst/kinst.c
index 88d59e0cfec4..8c9872ba86c8 100644
--- a/sys/cddl/dev/kinst/kinst.c
+++ b/sys/cddl/dev/kinst/kinst.c
@@ -180,10 +180,16 @@ kinst_load(void *dummy)
 	error = kinst_trampoline_init();
 	if (error != 0)
 		return (error);
+	error = kinst_md_init();
+	if (error != 0) {
+		kinst_trampoline_deinit();
+		return (error);
+	}
 
 	error = dtrace_register("kinst", &kinst_attr, DTRACE_PRIV_USER, NULL,
 	    &kinst_pops, NULL, &kinst_id);
 	if (error != 0) {
+		kinst_md_deinit();
 		kinst_trampoline_deinit();
 		return (error);
 	}
@@ -201,6 +207,7 @@ static int
 kinst_unload(void *dummy)
 {
 	free(kinst_probetab, M_KINST);
+	kinst_md_deinit();
 	kinst_trampoline_deinit();
 	dtrace_invop_remove(kinst_invop);
 	destroy_dev(kinst_cdev);
diff --git a/sys/cddl/dev/kinst/kinst.h b/sys/cddl/dev/kinst/kinst.h
index ea1a5b50004f..b7abfd1e61e0 100644
--- a/sys/cddl/dev/kinst/kinst.h
+++ b/sys/cddl/dev/kinst/kinst.h
@@ -57,6 +57,9 @@ int	kinst_trampoline_deinit(void);
 uint8_t	*kinst_trampoline_alloc(int);
 void	kinst_trampoline_dealloc(uint8_t *);
 
+int	kinst_md_init(void);
+void	kinst_md_deinit(void);
+
 #ifdef MALLOC_DECLARE
 MALLOC_DECLARE(M_KINST);
 #endif /* MALLOC_DECLARE */