svn commit: r271570 - in projects/bhyve_svm: sys/amd64/include sys/amd64/vmm/amd usr.sbin/bhyve
Neel Natu
neel at FreeBSD.org
Sun Sep 14 04:39:05 UTC 2014
Author: neel
Date: Sun Sep 14 04:39:04 2014
New Revision: 271570
URL: http://svnweb.freebsd.org/changeset/base/271570
Log:
Set the 'vmexit->inst_length' field properly depending on the type of the
VM-exit and ultimately on whether nRIP is valid. This allows us to update
the %rip after the emulation is finished so any exceptions triggered during
the emulation will point to the right instruction.
Don't attempt to handle INS/OUTS VM-exits unless the DecodeAssist capability
is available. The effective segment field in EXITINFO1 is not valid without
this capability.
Add VM_EXITCODE_SVM to flag SVM VM-exits that cannot be handled. Provide the
VMCB fields exitinfo1 and exitinfo2 as collateral to help with debugging.
Provide a SVM VM-exit handler to dump the exitcode, exitinfo1 and exitinfo2
fields in bhyve(8).
Reviewed by: Anish Gupta (akgupt3 at gmail.com)
Reviewed by: grehan
Modified:
projects/bhyve_svm/sys/amd64/include/vmm.h
projects/bhyve_svm/sys/amd64/vmm/amd/svm.c
projects/bhyve_svm/usr.sbin/bhyve/bhyverun.c
Modified: projects/bhyve_svm/sys/amd64/include/vmm.h
==============================================================================
--- projects/bhyve_svm/sys/amd64/include/vmm.h Sun Sep 14 03:01:18 2014 (r271569)
+++ projects/bhyve_svm/sys/amd64/include/vmm.h Sun Sep 14 04:39:04 2014 (r271570)
@@ -485,6 +485,7 @@ enum vm_exitcode {
VM_EXITCODE_SUSPENDED,
VM_EXITCODE_INOUT_STR,
VM_EXITCODE_TASK_SWITCH,
+ VM_EXITCODE_SVM,
VM_EXITCODE_MAX
};
@@ -562,6 +563,14 @@ struct vm_exit {
int inst_type;
int inst_error;
} vmx;
+ /*
+ * SVM specific payload.
+ */
+ struct {
+ uint64_t exitcode;
+ uint64_t exitinfo1;
+ uint64_t exitinfo2;
+ } svm;
struct {
uint32_t code; /* ecx value */
uint64_t wval;
Modified: projects/bhyve_svm/sys/amd64/vmm/amd/svm.c
==============================================================================
--- projects/bhyve_svm/sys/amd64/vmm/amd/svm.c Sun Sep 14 03:01:18 2014 (r271569)
+++ projects/bhyve_svm/sys/amd64/vmm/amd/svm.c Sun Sep 14 04:39:04 2014 (r271570)
@@ -735,11 +735,12 @@ svm_paging_info(struct vmcb *vmcb, struc
state->efer);
}
+#define UNHANDLED 0
/*
* Handle guest I/O intercept.
*/
-static bool
+static int
svm_handle_io(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
{
struct vmcb_ctrl *ctrl;
@@ -747,21 +748,34 @@ svm_handle_io(struct svm_softc *svm_sc,
struct svm_regctx *regs;
struct vm_inout_str *vis;
uint64_t info1;
+ int inout_string;
state = svm_get_vmcb_state(svm_sc, vcpu);
ctrl = svm_get_vmcb_ctrl(svm_sc, vcpu);
regs = svm_get_guest_regctx(svm_sc, vcpu);
+
info1 = ctrl->exitinfo1;
-
+ inout_string = info1 & BIT(2) ? 1 : 0;
+
+ /*
+ * The effective segment number in EXITINFO1[12:10] is populated
+ * only if the processor has the DecodeAssist capability.
+ *
+ * XXX this is not specified explicitly in APMv2 but can be verified
+ * empirically.
+ */
+ if (inout_string && !decode_assist())
+ return (UNHANDLED);
+
vmexit->exitcode = VM_EXITCODE_INOUT;
vmexit->u.inout.in = (info1 & BIT(0)) ? 1 : 0;
- vmexit->u.inout.string = (info1 & BIT(2)) ? 1 : 0;
+ vmexit->u.inout.string = inout_string;
vmexit->u.inout.rep = (info1 & BIT(3)) ? 1 : 0;
vmexit->u.inout.bytes = (info1 >> 4) & 0x7;
vmexit->u.inout.port = (uint16_t)(info1 >> 16);
vmexit->u.inout.eax = (uint32_t)(state->rax);
- if (vmexit->u.inout.string) {
+ if (inout_string) {
vmexit->exitcode = VM_EXITCODE_INOUT_STR;
vis = &vmexit->u.inout_str;
svm_paging_info(svm_get_vmcb(svm_sc, vcpu), &vis->paging);
@@ -773,8 +787,8 @@ svm_handle_io(struct svm_softc *svm_sc,
svm_inout_str_seginfo(svm_sc, vcpu, info1,
vmexit->u.inout.in, vis);
}
-
- return (false);
+
+ return (UNHANDLED);
}
static int
@@ -823,11 +837,6 @@ svm_handle_inst_emul(struct vmcb *vmcb,
vmexit->u.inst_emul.gla = VIE_INVALID_GLA;
svm_paging_info(vmcb, paging);
- /*
- * The inst_length will be determined by decoding the instruction.
- */
- vmexit->inst_length = 0;
-
seg = vmcb_seg(vmcb, VM_REG_GUEST_CS);
switch(paging->cpu_mode) {
case CPU_MODE_PROTECTED:
@@ -1054,122 +1063,167 @@ exit_reason_to_str(uint64_t reason)
#endif /* KTR */
/*
- * Determine the cause of virtual cpu exit and handle VMEXIT.
- * Return: false - Break vcpu execution loop and handle vmexit
- * in kernel or user space.
- * true - Continue vcpu run.
+ * From section "State Saved on Exit" in APMv2: nRIP is saved for all #VMEXITs
+ * that are due to instruction intercepts as well as MSR and IOIO intercepts
+ * and exceptions caused by INT3, INTO and BOUND instructions.
+ *
+ * Return 1 if the nRIP is valid and 0 otherwise.
*/
-static bool
+static int
+nrip_valid(uint64_t exitcode)
+{
+ switch (exitcode) {
+ case 0x00 ... 0x0F: /* read of CR0 through CR15 */
+ case 0x10 ... 0x1F: /* write of CR0 through CR15 */
+ case 0x20 ... 0x2F: /* read of DR0 through DR15 */
+ case 0x30 ... 0x3F: /* write of DR0 through DR15 */
+ case 0x43: /* INT3 */
+ case 0x44: /* INTO */
+ case 0x45: /* BOUND */
+ case 0x65 ... 0x7C: /* VMEXIT_CR0_SEL_WRITE ... VMEXIT_MSR */
+ case 0x80 ... 0x8D: /* VMEXIT_VMRUN ... VMEXIT_XSETBV */
+ return (1);
+ default:
+ return (0);
+ }
+}
+
+/*
+ * Collateral for a generic SVM VM-exit.
+ */
+static void
+vm_exit_svm(struct vm_exit *vme, uint64_t code, uint64_t info1, uint64_t info2)
+{
+
+ vme->exitcode = VM_EXITCODE_SVM;
+ vme->u.svm.exitcode = code;
+ vme->u.svm.exitinfo1 = info1;
+ vme->u.svm.exitinfo2 = info2;
+}
+
+static int
svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
{
+ struct vmcb *vmcb;
struct vmcb_state *state;
struct vmcb_ctrl *ctrl;
struct svm_regctx *ctx;
uint64_t code, info1, info2, val;
uint32_t eax, ecx, edx;
- bool update_rip, loop, retu;
+ int handled;
+ bool retu;
- KASSERT(vcpu < svm_sc->vcpu_cnt, ("Guest doesn't have VCPU%d", vcpu));
+ ctx = svm_get_guest_regctx(svm_sc, vcpu);
+ vmcb = svm_get_vmcb(svm_sc, vcpu);
+ state = &vmcb->state;
+ ctrl = &vmcb->ctrl;
- state = svm_get_vmcb_state(svm_sc, vcpu);
- ctrl = svm_get_vmcb_ctrl(svm_sc, vcpu);
- ctx = svm_get_guest_regctx(svm_sc, vcpu);
- code = ctrl->exitcode;
+ handled = 0;
+ code = ctrl->exitcode;
info1 = ctrl->exitinfo1;
info2 = ctrl->exitinfo2;
- update_rip = true;
- loop = true;
- vmexit->exitcode = VM_EXITCODE_VMX;
- vmexit->u.vmx.status = 0;
+ vmexit->exitcode = VM_EXITCODE_BOGUS;
+ vmexit->rip = state->rip;
+ vmexit->inst_length = nrip_valid(code) ? ctrl->nrip - state->rip : 0;
vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_COUNT, 1);
+ /*
+ * #VMEXIT(INVALID) needs to be handled early because the VMCB is
+ * in an inconsistent state and can trigger assertions that would
+ * never happen otherwise.
+ */
+ if (code == VMCB_EXIT_INVALID) {
+ vm_exit_svm(vmexit, code, info1, info2);
+ return (0);
+ }
+
KASSERT((ctrl->eventinj & VMCB_EVENTINJ_VALID) == 0, ("%s: event "
"injection valid bit is set %#lx", __func__, ctrl->eventinj));
+ KASSERT(vmexit->inst_length >= 0 && vmexit->inst_length <= 15,
+ ("invalid inst_length %d: code (%#lx), info1 (%#lx), info2 (%#lx)",
+ vmexit->inst_length, code, info1, info2));
+
svm_save_intinfo(svm_sc, vcpu);
switch (code) {
- case VMCB_EXIT_VINTR:
- update_rip = false;
+ case VMCB_EXIT_VINTR: /* interrupt window exiting */
vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_VINTR, 1);
+ handled = 1;
break;
- case VMCB_EXIT_MC: /* Machine Check. */
- vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_MTRAP, 1);
- vmexit->exitcode = VM_EXITCODE_MTRAP;
- loop = false;
+ case VMCB_EXIT_INTR: /* external interrupt */
+ vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_EXTINT, 1);
+ handled = 1;
+ break;
+ case VMCB_EXIT_NMI: /* external NMI */
+ handled = 1;
+ break;
+ case VMCB_EXIT_MC: /* machine check */
+ vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_EXCEPTION, 1);
break;
case VMCB_EXIT_MSR: /* MSR access. */
eax = state->rax;
ecx = ctx->sctx_rcx;
edx = ctx->e.g.sctx_rdx;
+ retu = false;
if (ecx == MSR_EFER) {
- KASSERT(info1 != 0, ("rdmsr(MSR_EFER) is not "
- "emulated: info1(%#lx) info2(%#lx)",
- info1, info2));
+ KASSERT(info1 != 0, ("rdmsr(MSR_EFER) is not emulated: "
+ "info1(%#lx) info2(%#lx)", info1, info2));
svm_write_efer(svm_sc, vcpu, edx, eax);
+ handled = 1;
+ break;
+ }
+
+#define MSR_AMDK8_IPM 0xc0010055
+ /*
+ * Ignore access to the "Interrupt Pending Message" MSR.
+ */
+ if (ecx == MSR_AMDK8_IPM) {
+ if (!info1)
+ state->rax = ctx->e.g.sctx_rdx = 0;
+ handled = 1;
break;
}
- retu = false;
if (info1) {
- /* VM exited because of write MSR */
vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_WRMSR, 1);
- vmexit->exitcode = VM_EXITCODE_WRMSR;
- vmexit->u.msr.code = ecx;
val = (uint64_t)edx << 32 | eax;
+ VCPU_CTR2(svm_sc->vm, vcpu, "wrmsr %#x val %#lx",
+ ecx, val);
if (emulate_wrmsr(svm_sc->vm, vcpu, ecx, val, &retu)) {
+ vmexit->exitcode = VM_EXITCODE_WRMSR;
+ vmexit->u.msr.code = ecx;
vmexit->u.msr.wval = val;
- loop = false;
- } else
- loop = retu ? false : true;
- VCPU_CTR3(svm_sc->vm, vcpu,
- "VMEXIT WRMSR(%s handling) 0x%lx @0x%x",
- loop ? "kernel" : "user", val, ecx);
+ } else if (!retu) {
+ handled = 1;
+ } else {
+ KASSERT(vmexit->exitcode != VM_EXITCODE_BOGUS,
+ ("emulate_wrmsr retu with bogus exitcode"));
+ }
} else {
+ VCPU_CTR1(svm_sc->vm, vcpu, "rdmsr %#x", ecx);
vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_RDMSR, 1);
- vmexit->exitcode = VM_EXITCODE_RDMSR;
- vmexit->u.msr.code = ecx;
- if (emulate_rdmsr(svm_sc->vm, vcpu, ecx, &retu))
- loop = false;
- else
- loop = retu ? false : true;
- VCPU_CTR3(svm_sc->vm, vcpu, "SVM:VMEXIT RDMSR"
- " MSB=0x%08x, LSB=%08x @0x%x",
- ctx->e.g.sctx_rdx, state->rax, ecx);
+ if (emulate_rdmsr(svm_sc->vm, vcpu, ecx, &retu)) {
+ vmexit->exitcode = VM_EXITCODE_RDMSR;
+ vmexit->u.msr.code = ecx;
+ } else if (!retu) {
+ handled = 1;
+ } else {
+ KASSERT(vmexit->exitcode != VM_EXITCODE_BOGUS,
+ ("emulate_rdmsr retu with bogus exitcode"));
+ }
}
-
-#define MSR_AMDK8_IPM 0xc0010055
- /*
- * We can't hide AMD C1E idle capability since its
- * based on CPU generation, for now ignore access to
- * this MSR by vcpus
- * XXX: special handling of AMD C1E - Ignore.
- */
- if (ecx == MSR_AMDK8_IPM)
- loop = true;
- break;
- case VMCB_EXIT_INTR:
- /*
- * Exit on External Interrupt.
- * Give host interrupt handler to run and if its guest
- * interrupt, local APIC will inject event in guest.
- */
- update_rip = false;
- vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_EXTINT, 1);
- break;
- case VMCB_EXIT_NMI:
- update_rip = false;
break;
case VMCB_EXIT_IO:
- loop = svm_handle_io(svm_sc, vcpu, vmexit);
+ handled = svm_handle_io(svm_sc, vcpu, vmexit);
vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_INOUT, 1);
break;
case VMCB_EXIT_CPUID:
vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_CPUID, 1);
- loop = x86_emulate_cpuid(svm_sc->vm, vcpu,
+ handled = x86_emulate_cpuid(svm_sc->vm, vcpu,
(uint32_t *)&state->rax,
(uint32_t *)&ctx->sctx_rbx,
(uint32_t *)&ctx->sctx_rcx,
@@ -1179,25 +1233,18 @@ svm_vmexit(struct svm_softc *svm_sc, int
vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_HLT, 1);
vmexit->exitcode = VM_EXITCODE_HLT;
vmexit->u.hlt.rflags = state->rflags;
- loop = false;
break;
case VMCB_EXIT_PAUSE:
vmexit->exitcode = VM_EXITCODE_PAUSE;
vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_PAUSE, 1);
- loop = false;
break;
case VMCB_EXIT_NPF:
- loop = false;
- update_rip = false;
+ /* EXITINFO2 contains the faulting guest physical address */
if (info1 & VMCB_NPF_INFO1_RSV) {
VCPU_CTR2(svm_sc->vm, vcpu, "nested page fault with "
"reserved bits set: info1(%#lx) info2(%#lx)",
info1, info2);
- break;
- }
-
- /* EXITINFO2 has the physical fault address (GPA). */
- if(vm_mem_allocated(svm_sc->vm, info2)) {
+ } else if (vm_mem_allocated(svm_sc->vm, info2)) {
vmexit->exitcode = VM_EXITCODE_PAGING;
vmexit->u.paging.gpa = info2;
vmexit->u.paging.fault_type = svm_npf_paging(info1);
@@ -1206,54 +1253,41 @@ svm_vmexit(struct svm_softc *svm_sc, int
"on gpa %#lx/%#lx at rip %#lx",
info2, info1, state->rip);
} else if (svm_npf_emul_fault(info1)) {
- svm_handle_inst_emul(svm_get_vmcb(svm_sc, vcpu),
- info2, vmexit);
+ svm_handle_inst_emul(vmcb, info2, vmexit);
vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_INST_EMUL, 1);
VCPU_CTR3(svm_sc->vm, vcpu, "inst_emul fault "
"for gpa %#lx/%#lx at rip %#lx",
info2, info1, state->rip);
}
break;
- case VMCB_EXIT_SHUTDOWN:
- loop = false;
- break;
- case VMCB_EXIT_INVALID:
- loop = false;
- break;
default:
- /* Return to user space. */
- loop = false;
- update_rip = false;
- VCPU_CTR3(svm_sc->vm, vcpu, "VMEXIT=0x%lx"
- " EXITINFO1: 0x%lx EXITINFO2:0x%lx\n",
- ctrl->exitcode, info1, info2);
- VCPU_CTR3(svm_sc->vm, vcpu, "SVM:RIP: 0x%lx nRIP:0x%lx"
- " Inst decoder len:%d\n", state->rip,
- ctrl->nrip, ctrl->inst_len);
vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_UNKNOWN, 1);
break;
}
- VCPU_CTR4(svm_sc->vm, vcpu, "%s %s vmexit at %#lx nrip %#lx",
- loop ? "handled" : "unhandled", exit_reason_to_str(code),
- state->rip, update_rip ? ctrl->nrip : state->rip);
+ VCPU_CTR4(svm_sc->vm, vcpu, "%s %s vmexit at %#lx/%d",
+ handled ? "handled" : "unhandled", exit_reason_to_str(code),
+ vmexit->rip, vmexit->inst_length);
- vmexit->rip = state->rip;
- if (update_rip) {
- if (ctrl->nrip == 0) {
- VCPU_CTR1(svm_sc->vm, vcpu, "SVM_ERR:nRIP is not set "
- "for RIP0x%lx.\n", state->rip);
- vmexit->exitcode = VM_EXITCODE_VMX;
- } else
- vmexit->rip = ctrl->nrip;
- }
-
- /* If vcpu execution is continued, update RIP. */
- if (loop) {
+ if (handled) {
+ vmexit->rip += vmexit->inst_length;
+ vmexit->inst_length = 0;
state->rip = vmexit->rip;
+ } else {
+ if (vmexit->exitcode == VM_EXITCODE_BOGUS) {
+ /*
+ * If this VM exit was not claimed by anybody then
+ * treat it as a generic SVM exit.
+ */
+ vm_exit_svm(vmexit, code, info1, info2);
+ } else {
+ /*
+ * The exitcode and collateral have been populated.
+ * The VM exit will be processed further in userland.
+ */
+ }
}
-
- return (loop);
+ return (handled);
}
static void
@@ -1562,9 +1596,8 @@ svm_vmrun(void *arg, int vcpu, register_
struct vm *vm;
uint64_t vmcb_pa;
u_int thiscpu;
- bool loop; /* Continue vcpu execution loop. */
+ int handled;
- loop = true;
svm_sc = arg;
vm = svm_sc->vm;
@@ -1616,8 +1649,6 @@ svm_vmrun(void *arg, int vcpu, register_
state->rip = rip;
do {
- vmexit->inst_length = 0;
-
/*
* Disable global interrupts to guarantee atomicity during
* loading of guest state. This includes not only the state
@@ -1692,8 +1723,8 @@ svm_vmrun(void *arg, int vcpu, register_
enable_gintr();
/* Handle #VMEXIT and if required return to user space. */
- loop = svm_vmexit(svm_sc, vcpu, vmexit);
- } while (loop);
+ handled = svm_vmexit(svm_sc, vcpu, vmexit);
+ } while (handled);
return (0);
}
Modified: projects/bhyve_svm/usr.sbin/bhyve/bhyverun.c
==============================================================================
--- projects/bhyve_svm/usr.sbin/bhyve/bhyverun.c Sun Sep 14 03:01:18 2014 (r271569)
+++ projects/bhyve_svm/usr.sbin/bhyve/bhyverun.c Sun Sep 14 04:39:04 2014 (r271570)
@@ -445,6 +445,20 @@ vmexit_vmx(struct vmctx *ctx, struct vm_
}
static int
+vmexit_svm(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
+{
+
+ fprintf(stderr, "vm exit[%d]\n", *pvcpu);
+ fprintf(stderr, "\treason\t\tSVM\n");
+ fprintf(stderr, "\trip\t\t0x%016lx\n", vmexit->rip);
+ fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length);
+ fprintf(stderr, "\texitcode\t%#lx\n", vmexit->u.svm.exitcode);
+ fprintf(stderr, "\texitinfo1\t%#lx\n", vmexit->u.svm.exitinfo1);
+ fprintf(stderr, "\texitinfo2\t%#lx\n", vmexit->u.svm.exitinfo2);
+ return (VMEXIT_ABORT);
+}
+
+static int
vmexit_bogus(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
{
@@ -555,6 +569,7 @@ static vmexit_handler_t handler[VM_EXITC
[VM_EXITCODE_INOUT] = vmexit_inout,
[VM_EXITCODE_INOUT_STR] = vmexit_inout,
[VM_EXITCODE_VMX] = vmexit_vmx,
+ [VM_EXITCODE_SVM] = vmexit_svm,
[VM_EXITCODE_BOGUS] = vmexit_bogus,
[VM_EXITCODE_RDMSR] = vmexit_rdmsr,
[VM_EXITCODE_WRMSR] = vmexit_wrmsr,
More information about the svn-src-projects
mailing list