git: 761d4d350d1a - stable/13 - dtrace/amd64: Implement emulation of call instructions

From: Mark Johnston <markj_at_FreeBSD.org>
Date: Tue, 23 Aug 2022 20:46:52 UTC
The branch stable/13 has been updated by markj:

URL: https://cgit.FreeBSD.org/src/commit/?id=761d4d350d1acd44724ab0bfa1d4940613f5f153

commit 761d4d350d1acd44724ab0bfa1d4940613f5f153
Author:     Mark Johnston <markj@FreeBSD.org>
AuthorDate: 2022-08-09 20:08:13 +0000
Commit:     Mark Johnston <markj@FreeBSD.org>
CommitDate: 2022-08-23 20:05:07 +0000

    dtrace/amd64: Implement emulation of call instructions
    
    Here, the provider is responsible for updating the trapframe to redirect
    control flow and for computing the return address.  Once software-saved
    registers are restored, the emulation shifts the remaining context down
    on the stack to make space for the return address, then copies the
    address provided by the invop handler.  dtrace_invop() is modified to
    allocate temporary storage space on the stack for use by the provider to
    return the return address.
    
    This is to support a new provider for amd64 which can instrument
    arbitrary instructions, not just function entry and exit instructions as
    FBT does.
    
    In collaboration with:  christos
    Sponsored by:   Google, Inc. (GSoC 2022)
    Sponsored by:   The FreeBSD Foundation
    
    (cherry picked from commit 3ba8e9dc4a0e0e9c35cfadfe25379871ce581697)
---
 .../contrib/opensolaris/uts/common/sys/dtrace.h    |  4 ++
 sys/cddl/dev/dtrace/amd64/dtrace_asm.S             | 51 +++++++++++++++++++++-
 sys/cddl/dev/dtrace/amd64/dtrace_subr.c            | 11 ++---
 3 files changed, 59 insertions(+), 7 deletions(-)

diff --git a/sys/cddl/contrib/opensolaris/uts/common/sys/dtrace.h b/sys/cddl/contrib/opensolaris/uts/common/sys/dtrace.h
index c15cc39189b1..5cad5a3b18c9 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/sys/dtrace.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/sys/dtrace.h
@@ -2440,6 +2440,10 @@ extern void dtrace_helpers_destroy(proc_t *);
 #define	DTRACE_INVOP_NOP		4
 #define	DTRACE_INVOP_RET		5
 
+#if defined(__amd64)
+#define	DTRACE_INVOP_CALL		6
+#endif
+
 #elif defined(__powerpc__)
 
 #define DTRACE_INVOP_BCTR	1
diff --git a/sys/cddl/dev/dtrace/amd64/dtrace_asm.S b/sys/cddl/dev/dtrace/amd64/dtrace_asm.S
index 13bd930d5f2f..3270aedfb3c8 100644
--- a/sys/cddl/dev/dtrace/amd64/dtrace_asm.S
+++ b/sys/cddl/dev/dtrace/amd64/dtrace_asm.S
@@ -59,6 +59,8 @@
 	swapgs;					\
 1:	addq	$TF_RIP,%rsp;
 
+.globl dtrace_invop_callsite
+.type dtrace_invop_callsite,@function
 
 	ENTRY(dtrace_invop_start)
 
@@ -70,11 +72,22 @@
 	movq	TF_RIP(%rsp), %rdi
 	decq	%rdi
 	movq	%rsp, %rsi
-	movq	TF_RAX(%rsp), %rdx
+
+	/*
+	 * Allocate some scratch space to let the invop handler return a value.
+	 * This is needed when emulating "call" instructions.
+	 */
+	subq	$16, %rsp
+	movq	%rsp, %rdx
+
 	call	dtrace_invop
-	ALTENTRY(dtrace_invop_callsite)
+dtrace_invop_callsite:
+	addq	$16, %rsp
+
 	cmpl	$DTRACE_INVOP_PUSHL_EBP, %eax
 	je	bp_push
+	cmpl	$DTRACE_INVOP_CALL, %eax
+	je	bp_call
 	cmpl	$DTRACE_INVOP_LEAVE, %eax
 	je	bp_leave
 	cmpl	$DTRACE_INVOP_NOP, %eax
@@ -110,6 +123,40 @@ bp_push:
 	iretq				/* return from interrupt */
 	/*NOTREACHED*/
 
+bp_call:
+	/*
+	 * Emulate a "call" instruction.  The invop handler must have already
+	 * updated the saved copy of %rip in the register set.  It's our job to
+	 * pull the hardware-saved registers down to make space for the return
+	 * address, which is provided by the invop handler in our scratch
+	 * space.
+	 */
+	INTR_POP
+	subq	$16, %rsp		/* make room for %rbp */
+	pushq	%rax			/* push temp */
+	pushq	%rbx			/* push temp */
+
+	movq	32(%rsp), %rax		/* load calling RIP */
+	movq	%rax, 16(%rsp)		/* store calling RIP */
+	movq	40(%rsp), %rax		/* load calling CS */
+	movq	%rax, 24(%rsp)		/* store calling CS */
+	movq	48(%rsp), %rax		/* load calling RFLAGS */
+	movq	%rax, 32(%rsp)		/* store calling RFLAGS */
+	movq	56(%rsp), %rax		/* load calling RSP */
+	subq	$8, %rax		/* make room for return address */
+	movq	%rax, 40(%rsp)		/* store calling RSP */
+	movq	64(%rsp), %rax		/* load calling SS */
+	movq	%rax, 48(%rsp)		/* store calling SS */
+
+	movq	-(TF_RIP - 16)(%rsp), %rax /* load return address */
+	movq	40(%rsp), %rbx		/* reload calling RSP */
+	movq	%rax, (%rbx)		/* store return address */
+
+	popq	%rbx			/* pop temp */
+	popq	%rax			/* pop temp */
+	iretq				/* return from interrupt */
+	/*NOTREACHED*/
+
 bp_leave:
 	/*
 	 * We must emulate a "leave", which is the same as a "movq %rbp, %rsp"
diff --git a/sys/cddl/dev/dtrace/amd64/dtrace_subr.c b/sys/cddl/dev/dtrace/amd64/dtrace_subr.c
index 4f9d9995cbab..f4fb70f80a6b 100644
--- a/sys/cddl/dev/dtrace/amd64/dtrace_subr.c
+++ b/sys/cddl/dev/dtrace/amd64/dtrace_subr.c
@@ -50,7 +50,7 @@
 extern void dtrace_getnanotime(struct timespec *tsp);
 extern int (*dtrace_invop_jump_addr)(struct trapframe *);
 
-int	dtrace_invop(uintptr_t, struct trapframe *, uintptr_t);
+int	dtrace_invop(uintptr_t, struct trapframe *, void **);
 int	dtrace_invop_start(struct trapframe *frame);
 void	dtrace_invop_init(void);
 void	dtrace_invop_uninit(void);
@@ -63,15 +63,16 @@ typedef struct dtrace_invop_hdlr {
 dtrace_invop_hdlr_t *dtrace_invop_hdlr;
 
 int
-dtrace_invop(uintptr_t addr, struct trapframe *frame, uintptr_t eax)
+dtrace_invop(uintptr_t addr, struct trapframe *frame, void **scratch)
 {
 	dtrace_invop_hdlr_t *hdlr;
 	int rval;
 
-	for (hdlr = dtrace_invop_hdlr; hdlr != NULL; hdlr = hdlr->dtih_next)
-		if ((rval = hdlr->dtih_func(addr, frame, eax)) != 0)
+	for (hdlr = dtrace_invop_hdlr; hdlr != NULL; hdlr = hdlr->dtih_next) {
+		rval = hdlr->dtih_func(addr, frame, (uintptr_t)scratch);
+		if (rval != 0)
 			return (rval);
-
+	}
 	return (0);
 }