svn commit: r268889 - in head: lib/libvmmapi sys/amd64/include sys/amd64/vmm sys/amd64/vmm/intel usr.sbin/bhyve usr.sbin/bhyvectl
Adrian Chadd
adrian at freebsd.org
Sun Jul 20 07:48:43 UTC 2014
On 20 July 2014 00:47, Neel Natu <neelnatu at gmail.com> wrote:
> Hi Adrian,
>
> On Sat, Jul 19, 2014 at 11:30 PM, Adrian Chadd <adrian at freebsd.org> wrote:
>> Hi!
>>
>> This broke -HEAD. 'exc' in vmx_inject_interrupts() is no longer
>> initialised before use /and/ it's part of a KASSERT() output.
>>
>
> Are you building with an external toolchain?
Nope; just head with invariants disabled.
-a
> best
> Neel
>
>> Thanks!
>>
>>
>> -a
>>
>>
>> On 19 July 2014 13:59, Neel Natu <neel at freebsd.org> wrote:
>>> Author: neel
>>> Date: Sat Jul 19 20:59:08 2014
>>> New Revision: 268889
>>> URL: http://svnweb.freebsd.org/changeset/base/268889
>>>
>>> Log:
>>> Handle nested exceptions in bhyve.
>>>
>>> A nested exception condition arises when a second exception is triggered while
>>> delivering the first exception. Most nested exceptions can be handled serially
>>> but some are converted into a double fault. If an exception is generated during
>>> delivery of a double fault then the virtual machine shuts down as a result of
>>> a triple fault.
>>>
>>> vm_exit_intinfo() is used to record that a VM-exit happened while an event was
>>> being delivered through the IDT. If an exception is triggered while handling
>>> the VM-exit it will be treated like a nested exception.
>>>
>>> vm_entry_intinfo() is used by processor-specific code to get the event to be
>>> injected into the guest on the next VM-entry. This function is responsible for
>>> deciding the disposition of nested exceptions.
>>>
>>> Modified:
>>> head/lib/libvmmapi/vmmapi.c
>>> head/lib/libvmmapi/vmmapi.h
>>> head/sys/amd64/include/vmm.h
>>> head/sys/amd64/include/vmm_dev.h
>>> head/sys/amd64/vmm/intel/vmx.c
>>> head/sys/amd64/vmm/vmm.c
>>> head/sys/amd64/vmm/vmm_dev.c
>>> head/usr.sbin/bhyve/bhyverun.c
>>> head/usr.sbin/bhyve/task_switch.c
>>> head/usr.sbin/bhyvectl/bhyvectl.c
>>>
>>> Modified: head/lib/libvmmapi/vmmapi.c
>>> ==============================================================================
>>> --- head/lib/libvmmapi/vmmapi.c Sat Jul 19 20:55:13 2014 (r268888)
>>> +++ head/lib/libvmmapi/vmmapi.c Sat Jul 19 20:59:08 2014 (r268889)
>>> @@ -1106,3 +1106,32 @@ vm_activate_cpu(struct vmctx *ctx, int v
>>> error = ioctl(ctx->fd, VM_ACTIVATE_CPU, &ac);
>>> return (error);
>>> }
>>> +
>>> +int
>>> +vm_get_intinfo(struct vmctx *ctx, int vcpu, uint64_t *info1, uint64_t *info2)
>>> +{
>>> + struct vm_intinfo vmii;
>>> + int error;
>>> +
>>> + bzero(&vmii, sizeof(struct vm_intinfo));
>>> + vmii.vcpuid = vcpu;
>>> + error = ioctl(ctx->fd, VM_GET_INTINFO, &vmii);
>>> + if (error == 0) {
>>> + *info1 = vmii.info1;
>>> + *info2 = vmii.info2;
>>> + }
>>> + return (error);
>>> +}
>>> +
>>> +int
>>> +vm_set_intinfo(struct vmctx *ctx, int vcpu, uint64_t info1)
>>> +{
>>> + struct vm_intinfo vmii;
>>> + int error;
>>> +
>>> + bzero(&vmii, sizeof(struct vm_intinfo));
>>> + vmii.vcpuid = vcpu;
>>> + vmii.info1 = info1;
>>> + error = ioctl(ctx->fd, VM_SET_INTINFO, &vmii);
>>> + return (error);
>>> +}
>>>
>>> Modified: head/lib/libvmmapi/vmmapi.h
>>> ==============================================================================
>>> --- head/lib/libvmmapi/vmmapi.h Sat Jul 19 20:55:13 2014 (r268888)
>>> +++ head/lib/libvmmapi/vmmapi.h Sat Jul 19 20:59:08 2014 (r268889)
>>> @@ -104,6 +104,9 @@ int vm_setup_pptdev_msix(struct vmctx *c
>>> int func, int idx, uint64_t addr, uint64_t msg,
>>> uint32_t vector_control);
>>>
>>> +int vm_get_intinfo(struct vmctx *ctx, int vcpu, uint64_t *i1, uint64_t *i2);
>>> +int vm_set_intinfo(struct vmctx *ctx, int vcpu, uint64_t exit_intinfo);
>>> +
>>> /*
>>> * Return a pointer to the statistics buffer. Note that this is not MT-safe.
>>> */
>>>
>>> Modified: head/sys/amd64/include/vmm.h
>>> ==============================================================================
>>> --- head/sys/amd64/include/vmm.h Sat Jul 19 20:55:13 2014 (r268888)
>>> +++ head/sys/amd64/include/vmm.h Sat Jul 19 20:59:08 2014 (r268889)
>>> @@ -34,6 +34,7 @@ enum vm_suspend_how {
>>> VM_SUSPEND_RESET,
>>> VM_SUSPEND_POWEROFF,
>>> VM_SUSPEND_HALT,
>>> + VM_SUSPEND_TRIPLEFAULT,
>>> VM_SUSPEND_LAST
>>> };
>>>
>>> @@ -88,6 +89,16 @@ enum x2apic_state {
>>> X2APIC_STATE_LAST
>>> };
>>>
>>> +#define VM_INTINFO_VECTOR(info) ((info) & 0xff)
>>> +#define VM_INTINFO_DEL_ERRCODE 0x800
>>> +#define VM_INTINFO_RSVD 0x7ffff000
>>> +#define VM_INTINFO_VALID 0x80000000
>>> +#define VM_INTINFO_TYPE 0x700
>>> +#define VM_INTINFO_HWINTR (0 << 8)
>>> +#define VM_INTINFO_NMI (2 << 8)
>>> +#define VM_INTINFO_HWEXCEPTION (3 << 8)
>>> +#define VM_INTINFO_SWINTR (4 << 8)
>>> +
>>> #ifdef _KERNEL
>>>
>>> #define VM_MAX_NAMELEN 32
>>> @@ -278,14 +289,31 @@ struct vatpit *vm_atpit(struct vm *vm);
>>> int vm_inject_exception(struct vm *vm, int vcpuid, struct vm_exception *vme);
>>>
>>> /*
>>> - * Returns 0 if there is no exception pending for this vcpu. Returns 1 if an
>>> - * exception is pending and also updates 'vme'. The pending exception is
>>> - * cleared when this function returns.
>>> + * This function is called after a VM-exit that occurred during exception or
>>> + * interrupt delivery through the IDT. The format of 'intinfo' is described
>>> + * in Figure 15-1, "EXITINTINFO for All Intercepts", APM, Vol 2.
>>> *
>>> - * This function should only be called in the context of the thread that is
>>> - * executing this vcpu.
>>> + * If a VM-exit handler completes the event delivery successfully then it
>>> + * should call vm_exit_intinfo() to extinguish the pending event. For e.g.,
>>> + * if the task switch emulation is triggered via a task gate then it should
>>> + * call this function with 'intinfo=0' to indicate that the external event
>>> + * is not pending anymore.
>>> + *
>>> + * Return value is 0 on success and non-zero on failure.
>>> */
>>> -int vm_exception_pending(struct vm *vm, int vcpuid, struct vm_exception *vme);
>>> +int vm_exit_intinfo(struct vm *vm, int vcpuid, uint64_t intinfo);
>>> +
>>> +/*
>>> + * This function is called before every VM-entry to retrieve a pending
>>> + * event that should be injected into the guest. This function combines
>>> + * nested events into a double or triple fault.
>>> + *
>>> + * Returns 0 if there are no events that need to be injected into the guest
>>> + * and non-zero otherwise.
>>> + */
>>> +int vm_entry_intinfo(struct vm *vm, int vcpuid, uint64_t *info);
>>> +
>>> +int vm_get_intinfo(struct vm *vm, int vcpuid, uint64_t *info1, uint64_t *info2);
>>>
>>> void vm_inject_gp(struct vm *vm, int vcpuid); /* general protection fault */
>>> void vm_inject_ud(struct vm *vm, int vcpuid); /* undefined instruction fault */
>>>
>>> Modified: head/sys/amd64/include/vmm_dev.h
>>> ==============================================================================
>>> --- head/sys/amd64/include/vmm_dev.h Sat Jul 19 20:55:13 2014 (r268888)
>>> +++ head/sys/amd64/include/vmm_dev.h Sat Jul 19 20:59:08 2014 (r268889)
>>> @@ -189,6 +189,12 @@ struct vm_cpuset {
>>> #define VM_ACTIVE_CPUS 0
>>> #define VM_SUSPENDED_CPUS 1
>>>
>>> +struct vm_intinfo {
>>> + int vcpuid;
>>> + uint64_t info1;
>>> + uint64_t info2;
>>> +};
>>> +
>>> enum {
>>> /* general routines */
>>> IOCNUM_ABIVERS = 0,
>>> @@ -211,6 +217,8 @@ enum {
>>> IOCNUM_GET_SEGMENT_DESCRIPTOR = 23,
>>>
>>> /* interrupt injection */
>>> + IOCNUM_GET_INTINFO = 28,
>>> + IOCNUM_SET_INTINFO = 29,
>>> IOCNUM_INJECT_EXCEPTION = 30,
>>> IOCNUM_LAPIC_IRQ = 31,
>>> IOCNUM_INJECT_NMI = 32,
>>> @@ -324,4 +332,8 @@ enum {
>>> _IOW('v', IOCNUM_ACTIVATE_CPU, struct vm_activate_cpu)
>>> #define VM_GET_CPUS \
>>> _IOW('v', IOCNUM_GET_CPUSET, struct vm_cpuset)
>>> +#define VM_SET_INTINFO \
>>> + _IOW('v', IOCNUM_SET_INTINFO, struct vm_intinfo)
>>> +#define VM_GET_INTINFO \
>>> + _IOWR('v', IOCNUM_GET_INTINFO, struct vm_intinfo)
>>> #endif
>>>
>>> Modified: head/sys/amd64/vmm/intel/vmx.c
>>> ==============================================================================
>>> --- head/sys/amd64/vmm/intel/vmx.c Sat Jul 19 20:55:13 2014 (r268888)
>>> +++ head/sys/amd64/vmm/intel/vmx.c Sat Jul 19 20:59:08 2014 (r268889)
>>> @@ -1213,22 +1213,31 @@ vmx_inject_interrupts(struct vmx *vmx, i
>>> {
>>> struct vm_exception exc;
>>> int vector, need_nmi_exiting, extint_pending;
>>> - uint64_t rflags;
>>> + uint64_t rflags, entryinfo;
>>> uint32_t gi, info;
>>>
>>> - if (vm_exception_pending(vmx->vm, vcpu, &exc)) {
>>> - KASSERT(exc.vector >= 0 && exc.vector < 32,
>>> - ("%s: invalid exception vector %d", __func__, exc.vector));
>>> + if (vm_entry_intinfo(vmx->vm, vcpu, &entryinfo)) {
>>> + KASSERT((entryinfo & VMCS_INTR_VALID) != 0, ("%s: entry "
>>> + "intinfo is not valid: %#lx", __func__, entryinfo));
>>>
>>> info = vmcs_read(VMCS_ENTRY_INTR_INFO);
>>> KASSERT((info & VMCS_INTR_VALID) == 0, ("%s: cannot inject "
>>> "pending exception %d: %#x", __func__, exc.vector, info));
>>>
>>> - info = exc.vector | VMCS_INTR_T_HWEXCEPTION | VMCS_INTR_VALID;
>>> - if (exc.error_code_valid) {
>>> - info |= VMCS_INTR_DEL_ERRCODE;
>>> - vmcs_write(VMCS_ENTRY_EXCEPTION_ERROR, exc.error_code);
>>> + info = entryinfo;
>>> + vector = info & 0xff;
>>> + if (vector == IDT_BP || vector == IDT_OF) {
>>> + /*
>>> + * VT-x requires #BP and #OF to be injected as software
>>> + * exceptions.
>>> + */
>>> + info &= ~VMCS_INTR_T_MASK;
>>> + info |= VMCS_INTR_T_SWEXCEPTION;
>>> }
>>> +
>>> + if (info & VMCS_INTR_DEL_ERRCODE)
>>> + vmcs_write(VMCS_ENTRY_EXCEPTION_ERROR, entryinfo >> 32);
>>> +
>>> vmcs_write(VMCS_ENTRY_INTR_INFO, info);
>>> }
>>>
>>> @@ -1407,6 +1416,16 @@ vmx_clear_nmi_blocking(struct vmx *vmx,
>>> vmcs_write(VMCS_GUEST_INTERRUPTIBILITY, gi);
>>> }
>>>
>>> +static void
>>> +vmx_assert_nmi_blocking(struct vmx *vmx, int vcpuid)
>>> +{
>>> + uint32_t gi;
>>> +
>>> + gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY);
>>> + KASSERT(gi & VMCS_INTERRUPTIBILITY_NMI_BLOCKING,
>>> + ("NMI blocking is not in effect %#x", gi));
>>> +}
>>> +
>>> static int
>>> vmx_emulate_xsetbv(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
>>> {
>>> @@ -2050,7 +2069,7 @@ vmx_exit_process(struct vmx *vmx, int vc
>>> struct vm_task_switch *ts;
>>> uint32_t eax, ecx, edx, idtvec_info, idtvec_err, intr_info, inst_info;
>>> uint32_t intr_type, reason;
>>> - uint64_t qual, gpa;
>>> + uint64_t exitintinfo, qual, gpa;
>>> bool retu;
>>>
>>> CTASSERT((PINBASED_CTLS_ONE_SETTING & PINBASED_VIRTUAL_NMI) != 0);
>>> @@ -2070,47 +2089,49 @@ vmx_exit_process(struct vmx *vmx, int vc
>>> * be handled specially by re-injecting the event if the IDT
>>> * vectoring information field's valid bit is set.
>>> *
>>> - * If the VM-exit is due to a task gate in the IDT then we don't
>>> - * reinject the event because emulating the task switch also
>>> - * completes the event delivery.
>>> - *
>>> * See "Information for VM Exits During Event Delivery" in Intel SDM
>>> * for details.
>>> */
>>> - switch (reason) {
>>> - case EXIT_REASON_EPT_FAULT:
>>> - case EXIT_REASON_EPT_MISCONFIG:
>>> - case EXIT_REASON_APIC_ACCESS:
>>> - case EXIT_REASON_TASK_SWITCH:
>>> - case EXIT_REASON_EXCEPTION:
>>> - idtvec_info = vmcs_idt_vectoring_info();
>>> - VCPU_CTR2(vmx->vm, vcpu, "vm exit %s: idtvec_info 0x%08x",
>>> - exit_reason_to_str(reason), idtvec_info);
>>> - if ((idtvec_info & VMCS_IDT_VEC_VALID) &&
>>> - (reason != EXIT_REASON_TASK_SWITCH)) {
>>> - idtvec_info &= ~(1 << 12); /* clear undefined bit */
>>> - vmcs_write(VMCS_ENTRY_INTR_INFO, idtvec_info);
>>> - if (idtvec_info & VMCS_IDT_VEC_ERRCODE_VALID) {
>>> - idtvec_err = vmcs_idt_vectoring_err();
>>> - vmcs_write(VMCS_ENTRY_EXCEPTION_ERROR,
>>> - idtvec_err);
>>> - }
>>> - /*
>>> - * If 'virtual NMIs' are being used and the VM-exit
>>> - * happened while injecting an NMI during the previous
>>> - * VM-entry, then clear "blocking by NMI" in the Guest
>>> - * Interruptibility-state.
>>> - */
>>> - if ((idtvec_info & VMCS_INTR_T_MASK) ==
>>> - VMCS_INTR_T_NMI) {
>>> - vmx_clear_nmi_blocking(vmx, vcpu);
>>> - }
>>> + idtvec_info = vmcs_idt_vectoring_info();
>>> + if (idtvec_info & VMCS_IDT_VEC_VALID) {
>>> + idtvec_info &= ~(1 << 12); /* clear undefined bit */
>>> + exitintinfo = idtvec_info;
>>> + if (idtvec_info & VMCS_IDT_VEC_ERRCODE_VALID) {
>>> + idtvec_err = vmcs_idt_vectoring_err();
>>> + exitintinfo |= (uint64_t)idtvec_err << 32;
>>> + }
>>> + error = vm_exit_intinfo(vmx->vm, vcpu, exitintinfo);
>>> + KASSERT(error == 0, ("%s: vm_set_intinfo error %d",
>>> + __func__, error));
>>> +
>>> + /*
>>> + * If 'virtual NMIs' are being used and the VM-exit
>>> + * happened while injecting an NMI during the previous
>>> + * VM-entry, then clear "blocking by NMI" in the
>>> + * Guest Interruptibility-State so the NMI can be
>>> + * reinjected on the subsequent VM-entry.
>>> + *
>>> + * However, if the NMI was being delivered through a task
>>> + * gate, then the new task must start execution with NMIs
>>> + * blocked so don't clear NMI blocking in this case.
>>> + */
>>> + intr_type = idtvec_info & VMCS_INTR_T_MASK;
>>> + if (intr_type == VMCS_INTR_T_NMI) {
>>> + if (reason != EXIT_REASON_TASK_SWITCH)
>>> + vmx_clear_nmi_blocking(vmx, vcpu);
>>> + else
>>> + vmx_assert_nmi_blocking(vmx, vcpu);
>>> + }
>>> +
>>> + /*
>>> + * Update VM-entry instruction length if the event being
>>> + * delivered was a software interrupt or software exception.
>>> + */
>>> + if (intr_type == VMCS_INTR_T_SWINTR ||
>>> + intr_type == VMCS_INTR_T_PRIV_SWEXCEPTION ||
>>> + intr_type == VMCS_INTR_T_SWEXCEPTION) {
>>> vmcs_write(VMCS_ENTRY_INST_LENGTH, vmexit->inst_length);
>>> }
>>> - break;
>>> - default:
>>> - idtvec_info = 0;
>>> - break;
>>> }
>>>
>>> switch (reason) {
>>> @@ -2136,7 +2157,7 @@ vmx_exit_process(struct vmx *vmx, int vc
>>> */
>>> if (ts->reason == TSR_IDT_GATE) {
>>> KASSERT(idtvec_info & VMCS_IDT_VEC_VALID,
>>> - ("invalid idtvec_info %x for IDT task switch",
>>> + ("invalid idtvec_info %#x for IDT task switch",
>>> idtvec_info));
>>> intr_type = idtvec_info & VMCS_INTR_T_MASK;
>>> if (intr_type != VMCS_INTR_T_SWINTR &&
>>> @@ -2302,6 +2323,7 @@ vmx_exit_process(struct vmx *vmx, int vc
>>> * the guest.
>>> *
>>> * See "Resuming Guest Software after Handling an Exception".
>>> + * See "Information for VM Exits Due to Vectored Events".
>>> */
>>> if ((idtvec_info & VMCS_IDT_VEC_VALID) == 0 &&
>>> (intr_info & 0xff) != IDT_DF &&
>>> @@ -2519,6 +2541,13 @@ vmx_run(void *arg, int vcpu, register_t
>>> * pmap_invalidate_ept().
>>> */
>>> disable_intr();
>>> + vmx_inject_interrupts(vmx, vcpu, vlapic);
>>> +
>>> + /*
>>> + * Check for vcpu suspension after injecting events because
>>> + * vmx_inject_interrupts() can suspend the vcpu due to a
>>> + * triple fault.
>>> + */
>>> if (vcpu_suspended(suspend_cookie)) {
>>> enable_intr();
>>> vm_exit_suspended(vmx->vm, vcpu, vmcs_guest_rip());
>>> @@ -2539,7 +2568,6 @@ vmx_run(void *arg, int vcpu, register_t
>>> break;
>>> }
>>>
>>> - vmx_inject_interrupts(vmx, vcpu, vlapic);
>>> vmx_run_trace(vmx, vcpu);
>>> rc = vmx_enter_guest(vmxctx, vmx, launched);
>>>
>>>
>>> Modified: head/sys/amd64/vmm/vmm.c
>>> ==============================================================================
>>> --- head/sys/amd64/vmm/vmm.c Sat Jul 19 20:55:13 2014 (r268888)
>>> +++ head/sys/amd64/vmm/vmm.c Sat Jul 19 20:59:08 2014 (r268889)
>>> @@ -97,6 +97,7 @@ struct vcpu {
>>> int hostcpu; /* (o) vcpu's host cpu */
>>> struct vlapic *vlapic; /* (i) APIC device model */
>>> enum x2apic_state x2apic_state; /* (i) APIC mode */
>>> + uint64_t exitintinfo; /* (i) events pending at VM exit */
>>> int nmi_pending; /* (i) NMI pending */
>>> int extint_pending; /* (i) INTR pending */
>>> struct vm_exception exception; /* (x) exception collateral */
>>> @@ -241,6 +242,7 @@ vcpu_init(struct vm *vm, int vcpu_id, bo
>>>
>>> vcpu->vlapic = VLAPIC_INIT(vm->cookie, vcpu_id);
>>> vm_set_x2apic_state(vm, vcpu_id, X2APIC_DISABLED);
>>> + vcpu->exitintinfo = 0;
>>> vcpu->nmi_pending = 0;
>>> vcpu->extint_pending = 0;
>>> vcpu->exception_pending = 0;
>>> @@ -1458,6 +1460,202 @@ restart:
>>> }
>>>
>>> int
>>> +vm_exit_intinfo(struct vm *vm, int vcpuid, uint64_t info)
>>> +{
>>> + struct vcpu *vcpu;
>>> + int type, vector;
>>> +
>>> + if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
>>> + return (EINVAL);
>>> +
>>> + vcpu = &vm->vcpu[vcpuid];
>>> +
>>> + if (info & VM_INTINFO_VALID) {
>>> + type = info & VM_INTINFO_TYPE;
>>> + vector = info & 0xff;
>>> + if (type == VM_INTINFO_NMI && vector != IDT_NMI)
>>> + return (EINVAL);
>>> + if (type == VM_INTINFO_HWEXCEPTION && vector >= 32)
>>> + return (EINVAL);
>>> + if (info & VM_INTINFO_RSVD)
>>> + return (EINVAL);
>>> + } else {
>>> + info = 0;
>>> + }
>>> + VCPU_CTR2(vm, vcpuid, "%s: info1(%#lx)", __func__, info);
>>> + vcpu->exitintinfo = info;
>>> + return (0);
>>> +}
>>> +
>>> +enum exc_class {
>>> + EXC_BENIGN,
>>> + EXC_CONTRIBUTORY,
>>> + EXC_PAGEFAULT
>>> +};
>>> +
>>> +#define IDT_VE 20 /* Virtualization Exception (Intel specific) */
>>> +
>>> +static enum exc_class
>>> +exception_class(uint64_t info)
>>> +{
>>> + int type, vector;
>>> +
>>> + KASSERT(info & VM_INTINFO_VALID, ("intinfo must be valid: %#lx", info));
>>> + type = info & VM_INTINFO_TYPE;
>>> + vector = info & 0xff;
>>> +
>>> + /* Table 6-4, "Interrupt and Exception Classes", Intel SDM, Vol 3 */
>>> + switch (type) {
>>> + case VM_INTINFO_HWINTR:
>>> + case VM_INTINFO_SWINTR:
>>> + case VM_INTINFO_NMI:
>>> + return (EXC_BENIGN);
>>> + default:
>>> + /*
>>> + * Hardware exception.
>>> + *
>>> + * SVM and VT-x use identical type values to represent NMI,
>>> + * hardware interrupt and software interrupt.
>>> + *
>>> + * SVM uses type '3' for all exceptions. VT-x uses type '3'
>>> + * for exceptions except #BP and #OF. #BP and #OF use a type
>>> + * value of '5' or '6'. Therefore we don't check for explicit
>>> + * values of 'type' to classify 'intinfo' into a hardware
>>> + * exception.
>>> + */
>>> + break;
>>> + }
>>> +
>>> + switch (vector) {
>>> + case IDT_PF:
>>> + case IDT_VE:
>>> + return (EXC_PAGEFAULT);
>>> + case IDT_DE:
>>> + case IDT_TS:
>>> + case IDT_NP:
>>> + case IDT_SS:
>>> + case IDT_GP:
>>> + return (EXC_CONTRIBUTORY);
>>> + default:
>>> + return (EXC_BENIGN);
>>> + }
>>> +}
>>> +
>>> +static int
>>> +nested_fault(struct vm *vm, int vcpuid, uint64_t info1, uint64_t info2,
>>> + uint64_t *retinfo)
>>> +{
>>> + enum exc_class exc1, exc2;
>>> + int type1, vector1;
>>> +
>>> + KASSERT(info1 & VM_INTINFO_VALID, ("info1 %#lx is not valid", info1));
>>> + KASSERT(info2 & VM_INTINFO_VALID, ("info2 %#lx is not valid", info2));
>>> +
>>> + /*
>>> + * If an exception occurs while attempting to call the double-fault
>>> + * handler the processor enters shutdown mode (aka triple fault).
>>> + */
>>> + type1 = info1 & VM_INTINFO_TYPE;
>>> + vector1 = info1 & 0xff;
>>> + if (type1 == VM_INTINFO_HWEXCEPTION && vector1 == IDT_DF) {
>>> + VCPU_CTR2(vm, vcpuid, "triple fault: info1(%#lx), info2(%#lx)",
>>> + info1, info2);
>>> + vm_suspend(vm, VM_SUSPEND_TRIPLEFAULT);
>>> + *retinfo = 0;
>>> + return (0);
>>> + }
>>> +
>>> + /*
>>> + * Table 6-5 "Conditions for Generating a Double Fault", Intel SDM, Vol3
>>> + */
>>> + exc1 = exception_class(info1);
>>> + exc2 = exception_class(info2);
>>> + if ((exc1 == EXC_CONTRIBUTORY && exc2 == EXC_CONTRIBUTORY) ||
>>> + (exc1 == EXC_PAGEFAULT && exc2 != EXC_BENIGN)) {
>>> + /* Convert nested fault into a double fault. */
>>> + *retinfo = IDT_DF;
>>> + *retinfo |= VM_INTINFO_VALID | VM_INTINFO_HWEXCEPTION;
>>> + *retinfo |= VM_INTINFO_DEL_ERRCODE;
>>> + } else {
>>> + /* Handle exceptions serially */
>>> + *retinfo = info2;
>>> + }
>>> + return (1);
>>> +}
>>> +
>>> +static uint64_t
>>> +vcpu_exception_intinfo(struct vcpu *vcpu)
>>> +{
>>> + uint64_t info = 0;
>>> +
>>> + if (vcpu->exception_pending) {
>>> + info = vcpu->exception.vector & 0xff;
>>> + info |= VM_INTINFO_VALID | VM_INTINFO_HWEXCEPTION;
>>> + if (vcpu->exception.error_code_valid) {
>>> + info |= VM_INTINFO_DEL_ERRCODE;
>>> + info |= (uint64_t)vcpu->exception.error_code << 32;
>>> + }
>>> + }
>>> + return (info);
>>> +}
>>> +
>>> +int
>>> +vm_entry_intinfo(struct vm *vm, int vcpuid, uint64_t *retinfo)
>>> +{
>>> + struct vcpu *vcpu;
>>> + uint64_t info1, info2;
>>> + int valid;
>>> +
>>> + KASSERT(vcpuid >= 0 && vcpuid < VM_MAXCPU, ("invalid vcpu %d", vcpuid));
>>> +
>>> + vcpu = &vm->vcpu[vcpuid];
>>> +
>>> + info1 = vcpu->exitintinfo;
>>> + vcpu->exitintinfo = 0;
>>> +
>>> + info2 = 0;
>>> + if (vcpu->exception_pending) {
>>> + info2 = vcpu_exception_intinfo(vcpu);
>>> + vcpu->exception_pending = 0;
>>> + VCPU_CTR2(vm, vcpuid, "Exception %d delivered: %#lx",
>>> + vcpu->exception.vector, info2);
>>> + }
>>> +
>>> + if ((info1 & VM_INTINFO_VALID) && (info2 & VM_INTINFO_VALID)) {
>>> + valid = nested_fault(vm, vcpuid, info1, info2, retinfo);
>>> + } else if (info1 & VM_INTINFO_VALID) {
>>> + *retinfo = info1;
>>> + valid = 1;
>>> + } else if (info2 & VM_INTINFO_VALID) {
>>> + *retinfo = info2;
>>> + valid = 1;
>>> + } else {
>>> + valid = 0;
>>> + }
>>> +
>>> + if (valid) {
>>> + VCPU_CTR4(vm, vcpuid, "%s: info1(%#lx), info2(%#lx), "
>>> + "retinfo(%#lx)", __func__, info1, info2, *retinfo);
>>> + }
>>> +
>>> + return (valid);
>>> +}
>>> +
>>> +int
>>> +vm_get_intinfo(struct vm *vm, int vcpuid, uint64_t *info1, uint64_t *info2)
>>> +{
>>> + struct vcpu *vcpu;
>>> +
>>> + if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
>>> + return (EINVAL);
>>> +
>>> + vcpu = &vm->vcpu[vcpuid];
>>> + *info1 = vcpu->exitintinfo;
>>> + *info2 = vcpu_exception_intinfo(vcpu);
>>> + return (0);
>>> +}
>>> +
>>> +int
>>> vm_inject_exception(struct vm *vm, int vcpuid, struct vm_exception *exception)
>>> {
>>> struct vcpu *vcpu;
>>> @@ -1468,6 +1666,14 @@ vm_inject_exception(struct vm *vm, int v
>>> if (exception->vector < 0 || exception->vector >= 32)
>>> return (EINVAL);
>>>
>>> + /*
>>> + * A double fault exception should never be injected directly into
>>> + * the guest. It is a derived exception that results from specific
>>> + * combinations of nested faults.
>>> + */
>>> + if (exception->vector == IDT_DF)
>>> + return (EINVAL);
>>> +
>>> vcpu = &vm->vcpu[vcpuid];
>>>
>>> if (vcpu->exception_pending) {
>>> @@ -1483,25 +1689,6 @@ vm_inject_exception(struct vm *vm, int v
>>> return (0);
>>> }
>>>
>>> -int
>>> -vm_exception_pending(struct vm *vm, int vcpuid, struct vm_exception *exception)
>>> -{
>>> - struct vcpu *vcpu;
>>> - int pending;
>>> -
>>> - KASSERT(vcpuid >= 0 && vcpuid < VM_MAXCPU, ("invalid vcpu %d", vcpuid));
>>> -
>>> - vcpu = &vm->vcpu[vcpuid];
>>> - pending = vcpu->exception_pending;
>>> - if (pending) {
>>> - vcpu->exception_pending = 0;
>>> - *exception = vcpu->exception;
>>> - VCPU_CTR1(vm, vcpuid, "Exception %d delivered",
>>> - exception->vector);
>>> - }
>>> - return (pending);
>>> -}
>>> -
>>> static void
>>> vm_inject_fault(struct vm *vm, int vcpuid, struct vm_exception *exception)
>>> {
>>>
>>> Modified: head/sys/amd64/vmm/vmm_dev.c
>>> ==============================================================================
>>> --- head/sys/amd64/vmm/vmm_dev.c Sat Jul 19 20:55:13 2014 (r268888)
>>> +++ head/sys/amd64/vmm/vmm_dev.c Sat Jul 19 20:59:08 2014 (r268889)
>>> @@ -173,6 +173,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long c
>>> struct vm_gla2gpa *gg;
>>> struct vm_activate_cpu *vac;
>>> struct vm_cpuset *vm_cpuset;
>>> + struct vm_intinfo *vmii;
>>>
>>> sc = vmmdev_lookup2(cdev);
>>> if (sc == NULL)
>>> @@ -199,6 +200,8 @@ vmmdev_ioctl(struct cdev *cdev, u_long c
>>> case VM_SET_X2APIC_STATE:
>>> case VM_GLA2GPA:
>>> case VM_ACTIVATE_CPU:
>>> + case VM_SET_INTINFO:
>>> + case VM_GET_INTINFO:
>>> /*
>>> * XXX fragile, handle with care
>>> * Assumes that the first field of the ioctl data is the vcpu.
>>> @@ -470,6 +473,15 @@ vmmdev_ioctl(struct cdev *cdev, u_long c
>>> error = copyout(cpuset, vm_cpuset->cpus, size);
>>> free(cpuset, M_TEMP);
>>> break;
>>> + case VM_SET_INTINFO:
>>> + vmii = (struct vm_intinfo *)data;
>>> + error = vm_exit_intinfo(sc->vm, vmii->vcpuid, vmii->info1);
>>> + break;
>>> + case VM_GET_INTINFO:
>>> + vmii = (struct vm_intinfo *)data;
>>> + error = vm_get_intinfo(sc->vm, vmii->vcpuid, &vmii->info1,
>>> + &vmii->info2);
>>> + break;
>>> default:
>>> error = ENOTTY;
>>> break;
>>>
>>> Modified: head/usr.sbin/bhyve/bhyverun.c
>>> ==============================================================================
>>> --- head/usr.sbin/bhyve/bhyverun.c Sat Jul 19 20:55:13 2014 (r268888)
>>> +++ head/usr.sbin/bhyve/bhyverun.c Sat Jul 19 20:59:08 2014 (r268889)
>>> @@ -534,6 +534,8 @@ vmexit_suspend(struct vmctx *ctx, struct
>>> exit(1);
>>> case VM_SUSPEND_HALT:
>>> exit(2);
>>> + case VM_SUSPEND_TRIPLEFAULT:
>>> + exit(3);
>>> default:
>>> fprintf(stderr, "vmexit_suspend: invalid reason %d\n", how);
>>> exit(100);
>>>
>>> Modified: head/usr.sbin/bhyve/task_switch.c
>>> ==============================================================================
>>> --- head/usr.sbin/bhyve/task_switch.c Sat Jul 19 20:55:13 2014 (r268888)
>>> +++ head/usr.sbin/bhyve/task_switch.c Sat Jul 19 20:59:08 2014 (r268889)
>>> @@ -904,10 +904,14 @@ vmexit_task_switch(struct vmctx *ctx, st
>>> */
>>>
>>> /*
>>> - * XXX is the original task switch was triggered by a hardware
>>> - * exception then do we generate a double-fault if we encounter
>>> - * an exception during the task switch?
>>> + * If the task switch was triggered by an event delivered through
>>> + * the IDT then extinguish the pending event from the vcpu's
>>> + * exitintinfo.
>>> */
>>> + if (task_switch->reason == TSR_IDT_GATE) {
>>> + error = vm_set_intinfo(ctx, vcpu, 0);
>>> + assert(error == 0);
>>> + }
>>>
>>> /*
>>> * XXX should inject debug exception if 'T' bit is 1
>>>
>>> Modified: head/usr.sbin/bhyvectl/bhyvectl.c
>>> ==============================================================================
>>> --- head/usr.sbin/bhyvectl/bhyvectl.c Sat Jul 19 20:55:13 2014 (r268888)
>>> +++ head/usr.sbin/bhyvectl/bhyvectl.c Sat Jul 19 20:59:08 2014 (r268889)
>>> @@ -195,7 +195,8 @@ usage(void)
>>> " [--force-reset]\n"
>>> " [--force-poweroff]\n"
>>> " [--get-active-cpus]\n"
>>> - " [--get-suspended-cpus]\n",
>>> + " [--get-suspended-cpus]\n"
>>> + " [--get-intinfo]\n",
>>> progname);
>>> exit(1);
>>> }
>>> @@ -205,6 +206,7 @@ static int inject_nmi, assert_lapic_lvt;
>>> static int force_reset, force_poweroff;
>>> static const char *capname;
>>> static int create, destroy, get_lowmem, get_highmem;
>>> +static int get_intinfo;
>>> static int get_active_cpus, get_suspended_cpus;
>>> static uint64_t memsize;
>>> static int set_cr0, get_cr0, set_cr3, get_cr3, set_cr4, get_cr4;
>>> @@ -412,6 +414,37 @@ print_cpus(const char *banner, const cpu
>>> printf("\n");
>>> }
>>>
>>> +static void
>>> +print_intinfo(const char *banner, uint64_t info)
>>> +{
>>> + int type;
>>> +
>>> + printf("%s:\t", banner);
>>> + if (info & VM_INTINFO_VALID) {
>>> + type = info & VM_INTINFO_TYPE;
>>> + switch (type) {
>>> + case VM_INTINFO_HWINTR:
>>> + printf("extint");
>>> + break;
>>> + case VM_INTINFO_NMI:
>>> + printf("nmi");
>>> + break;
>>> + case VM_INTINFO_SWINTR:
>>> + printf("swint");
>>> + break;
>>> + default:
>>> + printf("exception");
>>> + break;
>>> + }
>>> + printf(" vector %d", (int)VM_INTINFO_VECTOR(info));
>>> + if (info & VM_INTINFO_DEL_ERRCODE)
>>> + printf(" errcode %#x", (u_int)(info >> 32));
>>> + } else {
>>> + printf("n/a");
>>> + }
>>> + printf("\n");
>>> +}
>>> +
>>> int
>>> main(int argc, char *argv[])
>>> {
>>> @@ -420,7 +453,7 @@ main(int argc, char *argv[])
>>> vm_paddr_t gpa, gpa_pmap;
>>> size_t len;
>>> struct vm_exit vmexit;
>>> - uint64_t ctl, eptp, bm, addr, u64, pteval[4], *pte;
>>> + uint64_t ctl, eptp, bm, addr, u64, pteval[4], *pte, info[2];
>>> struct vmctx *ctx;
>>> int wired;
>>> cpuset_t cpus;
>>> @@ -595,6 +628,7 @@ main(int argc, char *argv[])
>>> { "force-poweroff", NO_ARG, &force_poweroff, 1 },
>>> { "get-active-cpus", NO_ARG, &get_active_cpus, 1 },
>>> { "get-suspended-cpus", NO_ARG, &get_suspended_cpus, 1 },
>>> + { "get-intinfo", NO_ARG, &get_intinfo, 1 },
>>> { NULL, 0, NULL, 0 }
>>> };
>>>
>>> @@ -1566,6 +1600,14 @@ main(int argc, char *argv[])
>>> print_cpus("suspended cpus", &cpus);
>>> }
>>>
>>> + if (!error && (get_intinfo || get_all)) {
>>> + error = vm_get_intinfo(ctx, vcpu, &info[0], &info[1]);
>>> + if (!error) {
>>> + print_intinfo("pending", info[0]);
>>> + print_intinfo("current", info[1]);
>>> + }
>>> + }
>>> +
>>> if (!error && run) {
>>> error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RIP, &rip);
>>> assert(error == 0);
>>>
More information about the svn-src-head
mailing list