git: e3b4fe645e50 - main - vmm: implement single-stepping for AMD CPUs

From: John Baldwin <jhb_at_FreeBSD.org>
Date: Thu, 07 Dec 2023 23:17:17 UTC
The branch main has been updated by jhb:

URL: https://cgit.FreeBSD.org/src/commit/?id=e3b4fe645e50bfd06becb74e52ea958315024d5f

commit e3b4fe645e50bfd06becb74e52ea958315024d5f
Author:     Bojan Novković <bojan.novkovic@fer.hr>
AuthorDate: 2023-12-07 23:00:31 +0000
Commit:     John Baldwin <jhb@FreeBSD.org>
CommitDate: 2023-12-07 23:11:04 +0000

    vmm: implement single-stepping for AMD CPUs
    
    This patch implements single-stepping for AMD CPUs using the RFLAGS.TF
    single-stepping mechanism.  The GDB stub requests single-stepping
    using the VM_CAP_RFLAGS_TF capability.  Setting this capability will
    set the RFLAGS.TF bit on the selected vCPU, activate DB exception
    intercepts, and activate POPF/PUSH instruction intercepts.  The
    resulting DB exception is then caught by the IDT_DB vmexit handler and
    bounced to userland where it is processed by the GDB stub.  This patch
    also makes sure that the value of the TF bit is correctly updated and
    that it is not erroneously propagated into memory.  Stepping over PUSHF
    will cause the vm_handle_db function to correct the pushed RFLAGS
    value and stepping over POPF will update the shadowed TF bit copy.
    
    Reviewed by:    jhb
    Sponsored by:   Google, Inc. (GSoC 2022)
    Differential Revision:  https://reviews.freebsd.org/D42296
---
 sys/amd64/include/vmm.h       |   8 +++
 sys/amd64/vmm/amd/svm.c       | 151 +++++++++++++++++++++++++++++++++++++++++-
 sys/amd64/vmm/amd/svm_softc.h |   8 +++
 sys/amd64/vmm/vmm.c           |  37 +++++++++++
 4 files changed, 202 insertions(+), 2 deletions(-)

diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h
index 0210aeef80fd..abc7571187fa 100644
--- a/sys/amd64/include/vmm.h
+++ b/sys/amd64/include/vmm.h
@@ -497,6 +497,7 @@ enum vm_cap_type {
 	VM_CAP_RDTSCP,
 	VM_CAP_IPI_EXIT,
 	VM_CAP_MASK_HWINTR,
+	VM_CAP_RFLAGS_TF,
 	VM_CAP_MAX
 };
 
@@ -645,6 +646,7 @@ enum vm_exitcode {
 	VM_EXITCODE_VMINSN,
 	VM_EXITCODE_BPT,
 	VM_EXITCODE_IPI,
+	VM_EXITCODE_DB,
 	VM_EXITCODE_MAX
 };
 
@@ -734,6 +736,12 @@ struct vm_exit {
 		struct {
 			int		inst_length;
 		} bpt;
+		struct {
+			int		trace_trap;
+			int		pushf_intercept;
+			int		tf_shadow_val;
+			struct		vm_guest_paging paging;
+		} dbg;
 		struct {
 			uint32_t	code;		/* ecx value */
 			uint64_t	wval;
diff --git a/sys/amd64/vmm/amd/svm.c b/sys/amd64/vmm/amd/svm.c
index ec0cde31aaad..1507377a0cfe 100644
--- a/sys/amd64/vmm/amd/svm.c
+++ b/sys/amd64/vmm/amd/svm.c
@@ -131,7 +131,7 @@ static VMM_STAT_AMD(VMEXIT_VINTR, "VM exits due to interrupt window");
 
 static int svm_getdesc(void *vcpui, int reg, struct seg_desc *desc);
 static int svm_setreg(void *vcpui, int ident, uint64_t val);
-
+static int svm_getreg(void *vcpui, int ident, uint64_t *val);
 static __inline int
 flush_by_asid(void)
 {
@@ -1282,6 +1282,8 @@ exit_reason_to_str(uint64_t reason)
 		{ .reason = VMCB_EXIT_ICEBP,	.str = "icebp" },
 		{ .reason = VMCB_EXIT_INVD,	.str = "invd" },
 		{ .reason = VMCB_EXIT_INVLPGA,	.str = "invlpga" },
+		{ .reason = VMCB_EXIT_POPF,	.str = "popf" },
+		{ .reason = VMCB_EXIT_PUSHF,	.str = "pushf" },
 	};
 
 	for (i = 0; i < nitems(reasons); i++) {
@@ -1419,7 +1421,69 @@ svm_vmexit(struct svm_softc *svm_sc, struct svm_vcpu *vcpu,
 			errcode_valid = 1;
 			info1 = 0;
 			break;
-
+		case IDT_DB: {
+			/*
+			 * Check if we are being stepped (RFLAGS.TF)
+			 * and bounce vmexit to userland.
+			 */
+			bool stepped = 0;
+			uint64_t dr6 = 0;
+
+			svm_getreg(vcpu, VM_REG_GUEST_DR6, &dr6);
+			stepped = !!(dr6 & DBREG_DR6_BS);
+			if (stepped && (vcpu->caps & (1 << VM_CAP_RFLAGS_TF))) {
+				vmexit->exitcode = VM_EXITCODE_DB;
+				vmexit->u.dbg.trace_trap = 1;
+				vmexit->u.dbg.pushf_intercept = 0;
+
+				if (vcpu->dbg.popf_sstep) {
+					/*
+					 * DB# exit was caused by stepping over
+					 * popf.
+					 */
+					uint64_t rflags;
+
+					vcpu->dbg.popf_sstep = 0;
+
+					/*
+					 * Update shadowed TF bit so the next
+					 * setcap(..., RFLAGS_SSTEP, 0) restores
+					 * the correct value
+					 */
+					svm_getreg(vcpu, VM_REG_GUEST_RFLAGS,
+					    &rflags);
+					vcpu->dbg.rflags_tf = rflags & PSL_T;
+				} else if (vcpu->dbg.pushf_sstep) {
+					/*
+					 * DB# exit was caused by stepping over
+					 * pushf.
+					 */
+					vcpu->dbg.pushf_sstep = 0;
+
+					/*
+					 * Adjusting the pushed rflags after a
+					 * restarted pushf instruction must be
+					 * handled outside of svm.c due to the
+					 * critical_enter() lock being held.
+					 */
+					vmexit->u.dbg.pushf_intercept = 1;
+					vmexit->u.dbg.tf_shadow_val =
+					    vcpu->dbg.rflags_tf;
+					svm_paging_info(svm_get_vmcb(vcpu),
+					    &vmexit->u.dbg.paging);
+				}
+
+				/* Clear DR6 "single-step" bit. */
+				dr6 &= ~DBREG_DR6_BS;
+				error = svm_setreg(vcpu, VM_REG_GUEST_DR6, dr6);
+				KASSERT(error == 0,
+				    ("%s: error %d updating DR6\r\n", __func__,
+					error));
+
+				reflect = 0;
+			}
+			break;
+		}
 		case IDT_BP:
 			vmexit->exitcode = VM_EXITCODE_BPT;
 			vmexit->u.bpt.inst_length = vmexit->inst_length;
@@ -1545,6 +1609,42 @@ svm_vmexit(struct svm_softc *svm_sc, struct svm_vcpu *vcpu,
 	case VMCB_EXIT_MWAIT:
 		vmexit->exitcode = VM_EXITCODE_MWAIT;
 		break;
+	case VMCB_EXIT_PUSHF: {
+		if (vcpu->caps & (1 << VM_CAP_RFLAGS_TF)) {
+			uint64_t rflags;
+
+			svm_getreg(vcpu, VM_REG_GUEST_RFLAGS, &rflags);
+			/* Restart this instruction. */
+			vmexit->inst_length = 0;
+			/* Disable PUSHF intercepts - avoid a loop. */
+			svm_set_intercept(vcpu, VMCB_CTRL1_INTCPT,
+			    VMCB_INTCPT_PUSHF, 0);
+			/* Trace restarted instruction. */
+			svm_setreg(vcpu, VM_REG_GUEST_RFLAGS, (rflags | PSL_T));
+			/* Let the IDT_DB handler know that pushf was stepped.
+			 */
+			vcpu->dbg.pushf_sstep = 1;
+			handled = 1;
+		}
+		break;
+	}
+	case VMCB_EXIT_POPF: {
+		if (vcpu->caps & (1 << VM_CAP_RFLAGS_TF)) {
+			uint64_t rflags;
+
+			svm_getreg(vcpu, VM_REG_GUEST_RFLAGS, &rflags);
+			/* Restart this instruction */
+			vmexit->inst_length = 0;
+			/* Disable POPF intercepts - avoid a loop*/
+			svm_set_intercept(vcpu, VMCB_CTRL1_INTCPT,
+			    VMCB_INTCPT_POPF, 0);
+			/* Trace restarted instruction */
+			svm_setreg(vcpu, VM_REG_GUEST_RFLAGS, (rflags | PSL_T));
+			vcpu->dbg.popf_sstep = 1;
+			handled = 1;
+		}
+		break;
+	}
 	case VMCB_EXIT_SHUTDOWN:
 	case VMCB_EXIT_VMRUN:
 	case VMCB_EXIT_VMMCALL:
@@ -2346,6 +2446,50 @@ svm_setcap(void *vcpui, int type, int val)
 		vlapic = vm_lapic(vcpu->vcpu);
 		vlapic->ipi_exit = val;
 		break;
+	case VM_CAP_RFLAGS_TF: {
+		uint64_t rflags;
+
+		/* Fetch RFLAGS. */
+		if (svm_getreg(vcpu, VM_REG_GUEST_RFLAGS, &rflags)) {
+			error = (EINVAL);
+			break;
+		}
+		if (val) {
+			/* Save current TF bit. */
+			vcpu->dbg.rflags_tf = rflags & PSL_T;
+			/* Trace next instruction. */
+			if (svm_setreg(vcpu, VM_REG_GUEST_RFLAGS,
+				(rflags | PSL_T))) {
+				error = (EINVAL);
+				break;
+			}
+			vcpu->caps |= (1 << VM_CAP_RFLAGS_TF);
+		} else {
+			/*
+			 * Restore shadowed RFLAGS.TF only if vCPU was
+			 * previously stepped
+			 */
+			if (vcpu->caps & (1 << VM_CAP_RFLAGS_TF)) {
+				rflags &= ~PSL_T;
+				rflags |= vcpu->dbg.rflags_tf;
+				vcpu->dbg.rflags_tf = 0;
+
+				if (svm_setreg(vcpu, VM_REG_GUEST_RFLAGS,
+					rflags)) {
+					error = (EINVAL);
+					break;
+				}
+				vcpu->caps &= ~(1 << VM_CAP_RFLAGS_TF);
+			}
+		}
+
+		svm_set_intercept(vcpu, VMCB_EXC_INTCPT, BIT(IDT_DB), val);
+		svm_set_intercept(vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_POPF,
+		    val);
+		svm_set_intercept(vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_PUSHF,
+		    val);
+		break;
+	}
 	default:
 		error = ENOENT;
 		break;
@@ -2382,6 +2526,9 @@ svm_getcap(void *vcpui, int type, int *retval)
 		vlapic = vm_lapic(vcpu->vcpu);
 		*retval = vlapic->ipi_exit;
 		break;
+	case VM_CAP_RFLAGS_TF:
+		*retval = !!(vcpu->caps & (1 << VM_CAP_RFLAGS_TF));
+		break;
 	default:
 		error = ENOENT;
 		break;
diff --git a/sys/amd64/vmm/amd/svm_softc.h b/sys/amd64/vmm/amd/svm_softc.h
index e92d3c2e734c..0fd2303a7242 100644
--- a/sys/amd64/vmm/amd/svm_softc.h
+++ b/sys/amd64/vmm/amd/svm_softc.h
@@ -36,6 +36,12 @@
 
 struct svm_softc;
 
+struct dbg {
+	uint32_t	rflags_tf;   /* saved RFLAGS.TF value when single-stepping a vcpu */
+	bool		popf_sstep;  /* indicates that we've stepped over popf */
+	bool		pushf_sstep; /* indicates that we've stepped over pushf */
+};
+
 struct asid {
 	uint64_t	gen;	/* range is [1, ~0UL] */
 	uint32_t	num;	/* range is [1, nasid - 1] */
@@ -54,6 +60,8 @@ struct svm_vcpu {
 	struct asid	asid;
 	struct vm_mtrr  mtrr;
 	int		vcpuid;
+	struct dbg	dbg;
+	int		caps;	 /* optional vm capabilities */
 };
 
 /*
diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c
index 64ba16cc8969..ae2ed8e6ea0f 100644
--- a/sys/amd64/vmm/vmm.c
+++ b/sys/amd64/vmm/vmm.c
@@ -1746,6 +1746,40 @@ vm_handle_reqidle(struct vcpu *vcpu, bool *retu)
 	return (0);
 }
 
+static int
+vm_handle_db(struct vcpu *vcpu, struct vm_exit *vme, bool *retu)
+{
+	int error, fault;
+	uint64_t rsp;
+	uint64_t rflags;
+	struct vm_copyinfo copyinfo;
+
+	*retu = true;
+	if (!vme->u.dbg.pushf_intercept || vme->u.dbg.tf_shadow_val != 0) {
+		return (0);
+	}
+
+	vm_get_register(vcpu, VM_REG_GUEST_RSP, &rsp);
+	error = vm_copy_setup(vcpu, &vme->u.dbg.paging, rsp, sizeof(uint64_t),
+	    VM_PROT_RW, &copyinfo, 1, &fault);
+	if (error != 0 || fault != 0) {
+		*retu = false;
+		return (EINVAL);
+	}
+
+	/* Read pushed rflags value from top of stack. */
+	vm_copyin(&copyinfo, &rflags, sizeof(uint64_t));
+
+	/* Clear TF bit. */
+	rflags &= ~(PSL_T);
+
+	/* Write updated value back to memory. */
+	vm_copyout(&rflags, &copyinfo, sizeof(uint64_t));
+	vm_copy_teardown(&copyinfo, 1);
+
+	return (0);
+}
+
 int
 vm_suspend(struct vm *vm, enum vm_suspend_how how)
 {
@@ -1914,6 +1948,9 @@ restart:
 		case VM_EXITCODE_INOUT_STR:
 			error = vm_handle_inout(vcpu, vme, &retu);
 			break;
+		case VM_EXITCODE_DB:
+			error = vm_handle_db(vcpu, vme, &retu);
+			break;
 		case VM_EXITCODE_MONITOR:
 		case VM_EXITCODE_MWAIT:
 		case VM_EXITCODE_VMINSN: