svn commit: r268889 - in head: lib/libvmmapi sys/amd64/include sys/amd64/vmm sys/amd64/vmm/intel usr.sbin/bhyve usr.sbin/bhyvectl
Neel Natu
neelnatu at gmail.com
Sun Jul 20 07:54:52 UTC 2014
Hi Adrian,
On Sun, Jul 20, 2014 at 12:48 AM, Adrian Chadd <adrian at freebsd.org> wrote:
> On 20 July 2014 00:47, Neel Natu <neelnatu at gmail.com> wrote:
>> Hi Adrian,
>>
>> On Sat, Jul 19, 2014 at 11:30 PM, Adrian Chadd <adrian at freebsd.org> wrote:
>>> Hi!
>>>
>>> This broke -HEAD. 'exc' in vmx_inject_interrupts() is no longer
>>> initialised before use /and/ it's part of a KASSERT() output.
>>>
>>
>> Are you building with an external toolchain?
>
> Nope; just head with invariants disabled.
>
Ok, so it broke your custom kernel config as opposed to breaking HEAD.
I'll fix it shortly.
best
Neel
>
>
> -a
>
>> best
>> Neel
>>
>>> Thanks!
>>>
>>>
>>> -a
>>>
>>>
>>> On 19 July 2014 13:59, Neel Natu <neel at freebsd.org> wrote:
>>>> Author: neel
>>>> Date: Sat Jul 19 20:59:08 2014
>>>> New Revision: 268889
>>>> URL: http://svnweb.freebsd.org/changeset/base/268889
>>>>
>>>> Log:
>>>> Handle nested exceptions in bhyve.
>>>>
>>>> A nested exception condition arises when a second exception is triggered while
>>>> delivering the first exception. Most nested exceptions can be handled serially
>>>> but some are converted into a double fault. If an exception is generated during
>>>> delivery of a double fault then the virtual machine shuts down as a result of
>>>> a triple fault.
>>>>
>>>> vm_exit_intinfo() is used to record that a VM-exit happened while an event was
>>>> being delivered through the IDT. If an exception is triggered while handling
>>>> the VM-exit it will be treated like a nested exception.
>>>>
>>>> vm_entry_intinfo() is used by processor-specific code to get the event to be
>>>> injected into the guest on the next VM-entry. This function is responsible for
>>>> deciding the disposition of nested exceptions.
>>>>
>>>> Modified:
>>>> head/lib/libvmmapi/vmmapi.c
>>>> head/lib/libvmmapi/vmmapi.h
>>>> head/sys/amd64/include/vmm.h
>>>> head/sys/amd64/include/vmm_dev.h
>>>> head/sys/amd64/vmm/intel/vmx.c
>>>> head/sys/amd64/vmm/vmm.c
>>>> head/sys/amd64/vmm/vmm_dev.c
>>>> head/usr.sbin/bhyve/bhyverun.c
>>>> head/usr.sbin/bhyve/task_switch.c
>>>> head/usr.sbin/bhyvectl/bhyvectl.c
>>>>
>>>> Modified: head/lib/libvmmapi/vmmapi.c
>>>> ==============================================================================
>>>> --- head/lib/libvmmapi/vmmapi.c Sat Jul 19 20:55:13 2014 (r268888)
>>>> +++ head/lib/libvmmapi/vmmapi.c Sat Jul 19 20:59:08 2014 (r268889)
>>>> @@ -1106,3 +1106,32 @@ vm_activate_cpu(struct vmctx *ctx, int v
>>>> error = ioctl(ctx->fd, VM_ACTIVATE_CPU, &ac);
>>>> return (error);
>>>> }
>>>> +
>>>> +int
>>>> +vm_get_intinfo(struct vmctx *ctx, int vcpu, uint64_t *info1, uint64_t *info2)
>>>> +{
>>>> + struct vm_intinfo vmii;
>>>> + int error;
>>>> +
>>>> + bzero(&vmii, sizeof(struct vm_intinfo));
>>>> + vmii.vcpuid = vcpu;
>>>> + error = ioctl(ctx->fd, VM_GET_INTINFO, &vmii);
>>>> + if (error == 0) {
>>>> + *info1 = vmii.info1;
>>>> + *info2 = vmii.info2;
>>>> + }
>>>> + return (error);
>>>> +}
>>>> +
>>>> +int
>>>> +vm_set_intinfo(struct vmctx *ctx, int vcpu, uint64_t info1)
>>>> +{
>>>> + struct vm_intinfo vmii;
>>>> + int error;
>>>> +
>>>> + bzero(&vmii, sizeof(struct vm_intinfo));
>>>> + vmii.vcpuid = vcpu;
>>>> + vmii.info1 = info1;
>>>> + error = ioctl(ctx->fd, VM_SET_INTINFO, &vmii);
>>>> + return (error);
>>>> +}
>>>>
>>>> Modified: head/lib/libvmmapi/vmmapi.h
>>>> ==============================================================================
>>>> --- head/lib/libvmmapi/vmmapi.h Sat Jul 19 20:55:13 2014 (r268888)
>>>> +++ head/lib/libvmmapi/vmmapi.h Sat Jul 19 20:59:08 2014 (r268889)
>>>> @@ -104,6 +104,9 @@ int vm_setup_pptdev_msix(struct vmctx *c
>>>> int func, int idx, uint64_t addr, uint64_t msg,
>>>> uint32_t vector_control);
>>>>
>>>> +int vm_get_intinfo(struct vmctx *ctx, int vcpu, uint64_t *i1, uint64_t *i2);
>>>> +int vm_set_intinfo(struct vmctx *ctx, int vcpu, uint64_t exit_intinfo);
>>>> +
>>>> /*
>>>> * Return a pointer to the statistics buffer. Note that this is not MT-safe.
>>>> */
>>>>
>>>> Modified: head/sys/amd64/include/vmm.h
>>>> ==============================================================================
>>>> --- head/sys/amd64/include/vmm.h Sat Jul 19 20:55:13 2014 (r268888)
>>>> +++ head/sys/amd64/include/vmm.h Sat Jul 19 20:59:08 2014 (r268889)
>>>> @@ -34,6 +34,7 @@ enum vm_suspend_how {
>>>> VM_SUSPEND_RESET,
>>>> VM_SUSPEND_POWEROFF,
>>>> VM_SUSPEND_HALT,
>>>> + VM_SUSPEND_TRIPLEFAULT,
>>>> VM_SUSPEND_LAST
>>>> };
>>>>
>>>> @@ -88,6 +89,16 @@ enum x2apic_state {
>>>> X2APIC_STATE_LAST
>>>> };
>>>>
>>>> +#define VM_INTINFO_VECTOR(info) ((info) & 0xff)
>>>> +#define VM_INTINFO_DEL_ERRCODE 0x800
>>>> +#define VM_INTINFO_RSVD 0x7ffff000
>>>> +#define VM_INTINFO_VALID 0x80000000
>>>> +#define VM_INTINFO_TYPE 0x700
>>>> +#define VM_INTINFO_HWINTR (0 << 8)
>>>> +#define VM_INTINFO_NMI (2 << 8)
>>>> +#define VM_INTINFO_HWEXCEPTION (3 << 8)
>>>> +#define VM_INTINFO_SWINTR (4 << 8)
>>>> +
>>>> #ifdef _KERNEL
>>>>
>>>> #define VM_MAX_NAMELEN 32
>>>> @@ -278,14 +289,31 @@ struct vatpit *vm_atpit(struct vm *vm);
>>>> int vm_inject_exception(struct vm *vm, int vcpuid, struct vm_exception *vme);
>>>>
>>>> /*
>>>> - * Returns 0 if there is no exception pending for this vcpu. Returns 1 if an
>>>> - * exception is pending and also updates 'vme'. The pending exception is
>>>> - * cleared when this function returns.
>>>> + * This function is called after a VM-exit that occurred during exception or
>>>> + * interrupt delivery through the IDT. The format of 'intinfo' is described
>>>> + * in Figure 15-1, "EXITINTINFO for All Intercepts", APM, Vol 2.
>>>> *
>>>> - * This function should only be called in the context of the thread that is
>>>> - * executing this vcpu.
>>>> + * If a VM-exit handler completes the event delivery successfully then it
>>>> + * should call vm_exit_intinfo() to extinguish the pending event. For e.g.,
>>>> + * if the task switch emulation is triggered via a task gate then it should
>>>> + * call this function with 'intinfo=0' to indicate that the external event
>>>> + * is not pending anymore.
>>>> + *
>>>> + * Return value is 0 on success and non-zero on failure.
>>>> */
>>>> -int vm_exception_pending(struct vm *vm, int vcpuid, struct vm_exception *vme);
>>>> +int vm_exit_intinfo(struct vm *vm, int vcpuid, uint64_t intinfo);
>>>> +
>>>> +/*
>>>> + * This function is called before every VM-entry to retrieve a pending
>>>> + * event that should be injected into the guest. This function combines
>>>> + * nested events into a double or triple fault.
>>>> + *
>>>> + * Returns 0 if there are no events that need to be injected into the guest
>>>> + * and non-zero otherwise.
>>>> + */
>>>> +int vm_entry_intinfo(struct vm *vm, int vcpuid, uint64_t *info);
>>>> +
>>>> +int vm_get_intinfo(struct vm *vm, int vcpuid, uint64_t *info1, uint64_t *info2);
>>>>
>>>> void vm_inject_gp(struct vm *vm, int vcpuid); /* general protection fault */
>>>> void vm_inject_ud(struct vm *vm, int vcpuid); /* undefined instruction fault */
>>>>
>>>> Modified: head/sys/amd64/include/vmm_dev.h
>>>> ==============================================================================
>>>> --- head/sys/amd64/include/vmm_dev.h Sat Jul 19 20:55:13 2014 (r268888)
>>>> +++ head/sys/amd64/include/vmm_dev.h Sat Jul 19 20:59:08 2014 (r268889)
>>>> @@ -189,6 +189,12 @@ struct vm_cpuset {
>>>> #define VM_ACTIVE_CPUS 0
>>>> #define VM_SUSPENDED_CPUS 1
>>>>
>>>> +struct vm_intinfo {
>>>> + int vcpuid;
>>>> + uint64_t info1;
>>>> + uint64_t info2;
>>>> +};
>>>> +
>>>> enum {
>>>> /* general routines */
>>>> IOCNUM_ABIVERS = 0,
>>>> @@ -211,6 +217,8 @@ enum {
>>>> IOCNUM_GET_SEGMENT_DESCRIPTOR = 23,
>>>>
>>>> /* interrupt injection */
>>>> + IOCNUM_GET_INTINFO = 28,
>>>> + IOCNUM_SET_INTINFO = 29,
>>>> IOCNUM_INJECT_EXCEPTION = 30,
>>>> IOCNUM_LAPIC_IRQ = 31,
>>>> IOCNUM_INJECT_NMI = 32,
>>>> @@ -324,4 +332,8 @@ enum {
>>>> _IOW('v', IOCNUM_ACTIVATE_CPU, struct vm_activate_cpu)
>>>> #define VM_GET_CPUS \
>>>> _IOW('v', IOCNUM_GET_CPUSET, struct vm_cpuset)
>>>> +#define VM_SET_INTINFO \
>>>> + _IOW('v', IOCNUM_SET_INTINFO, struct vm_intinfo)
>>>> +#define VM_GET_INTINFO \
>>>> + _IOWR('v', IOCNUM_GET_INTINFO, struct vm_intinfo)
>>>> #endif
>>>>
>>>> Modified: head/sys/amd64/vmm/intel/vmx.c
>>>> ==============================================================================
>>>> --- head/sys/amd64/vmm/intel/vmx.c Sat Jul 19 20:55:13 2014 (r268888)
>>>> +++ head/sys/amd64/vmm/intel/vmx.c Sat Jul 19 20:59:08 2014 (r268889)
>>>> @@ -1213,22 +1213,31 @@ vmx_inject_interrupts(struct vmx *vmx, i
>>>> {
>>>> struct vm_exception exc;
>>>> int vector, need_nmi_exiting, extint_pending;
>>>> - uint64_t rflags;
>>>> + uint64_t rflags, entryinfo;
>>>> uint32_t gi, info;
>>>>
>>>> - if (vm_exception_pending(vmx->vm, vcpu, &exc)) {
>>>> - KASSERT(exc.vector >= 0 && exc.vector < 32,
>>>> - ("%s: invalid exception vector %d", __func__, exc.vector));
>>>> + if (vm_entry_intinfo(vmx->vm, vcpu, &entryinfo)) {
>>>> + KASSERT((entryinfo & VMCS_INTR_VALID) != 0, ("%s: entry "
>>>> + "intinfo is not valid: %#lx", __func__, entryinfo));
>>>>
>>>> info = vmcs_read(VMCS_ENTRY_INTR_INFO);
>>>> KASSERT((info & VMCS_INTR_VALID) == 0, ("%s: cannot inject "
>>>> "pending exception %d: %#x", __func__, exc.vector, info));
>>>>
>>>> - info = exc.vector | VMCS_INTR_T_HWEXCEPTION | VMCS_INTR_VALID;
>>>> - if (exc.error_code_valid) {
>>>> - info |= VMCS_INTR_DEL_ERRCODE;
>>>> - vmcs_write(VMCS_ENTRY_EXCEPTION_ERROR, exc.error_code);
>>>> + info = entryinfo;
>>>> + vector = info & 0xff;
>>>> + if (vector == IDT_BP || vector == IDT_OF) {
>>>> + /*
>>>> + * VT-x requires #BP and #OF to be injected as software
>>>> + * exceptions.
>>>> + */
>>>> + info &= ~VMCS_INTR_T_MASK;
>>>> + info |= VMCS_INTR_T_SWEXCEPTION;
>>>> }
>>>> +
>>>> + if (info & VMCS_INTR_DEL_ERRCODE)
>>>> + vmcs_write(VMCS_ENTRY_EXCEPTION_ERROR, entryinfo >> 32);
>>>> +
>>>> vmcs_write(VMCS_ENTRY_INTR_INFO, info);
>>>> }
>>>>
>>>> @@ -1407,6 +1416,16 @@ vmx_clear_nmi_blocking(struct vmx *vmx,
>>>> vmcs_write(VMCS_GUEST_INTERRUPTIBILITY, gi);
>>>> }
>>>>
>>>> +static void
>>>> +vmx_assert_nmi_blocking(struct vmx *vmx, int vcpuid)
>>>> +{
>>>> + uint32_t gi;
>>>> +
>>>> + gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY);
>>>> + KASSERT(gi & VMCS_INTERRUPTIBILITY_NMI_BLOCKING,
>>>> + ("NMI blocking is not in effect %#x", gi));
>>>> +}
>>>> +
>>>> static int
>>>> vmx_emulate_xsetbv(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
>>>> {
>>>> @@ -2050,7 +2069,7 @@ vmx_exit_process(struct vmx *vmx, int vc
>>>> struct vm_task_switch *ts;
>>>> uint32_t eax, ecx, edx, idtvec_info, idtvec_err, intr_info, inst_info;
>>>> uint32_t intr_type, reason;
>>>> - uint64_t qual, gpa;
>>>> + uint64_t exitintinfo, qual, gpa;
>>>> bool retu;
>>>>
>>>> CTASSERT((PINBASED_CTLS_ONE_SETTING & PINBASED_VIRTUAL_NMI) != 0);
>>>> @@ -2070,47 +2089,49 @@ vmx_exit_process(struct vmx *vmx, int vc
>>>> * be handled specially by re-injecting the event if the IDT
>>>> * vectoring information field's valid bit is set.
>>>> *
>>>> - * If the VM-exit is due to a task gate in the IDT then we don't
>>>> - * reinject the event because emulating the task switch also
>>>> - * completes the event delivery.
>>>> - *
>>>> * See "Information for VM Exits During Event Delivery" in Intel SDM
>>>> * for details.
>>>> */
>>>> - switch (reason) {
>>>> - case EXIT_REASON_EPT_FAULT:
>>>> - case EXIT_REASON_EPT_MISCONFIG:
>>>> - case EXIT_REASON_APIC_ACCESS:
>>>> - case EXIT_REASON_TASK_SWITCH:
>>>> - case EXIT_REASON_EXCEPTION:
>>>> - idtvec_info = vmcs_idt_vectoring_info();
>>>> - VCPU_CTR2(vmx->vm, vcpu, "vm exit %s: idtvec_info 0x%08x",
>>>> - exit_reason_to_str(reason), idtvec_info);
>>>> - if ((idtvec_info & VMCS_IDT_VEC_VALID) &&
>>>> - (reason != EXIT_REASON_TASK_SWITCH)) {
>>>> - idtvec_info &= ~(1 << 12); /* clear undefined bit */
>>>> - vmcs_write(VMCS_ENTRY_INTR_INFO, idtvec_info);
>>>> - if (idtvec_info & VMCS_IDT_VEC_ERRCODE_VALID) {
>>>> - idtvec_err = vmcs_idt_vectoring_err();
>>>> - vmcs_write(VMCS_ENTRY_EXCEPTION_ERROR,
>>>> - idtvec_err);
>>>> - }
>>>> - /*
>>>> - * If 'virtual NMIs' are being used and the VM-exit
>>>> - * happened while injecting an NMI during the previous
>>>> - * VM-entry, then clear "blocking by NMI" in the Guest
>>>> - * Interruptibility-state.
>>>> - */
>>>> - if ((idtvec_info & VMCS_INTR_T_MASK) ==
>>>> - VMCS_INTR_T_NMI) {
>>>> - vmx_clear_nmi_blocking(vmx, vcpu);
>>>> - }
>>>> + idtvec_info = vmcs_idt_vectoring_info();
>>>> + if (idtvec_info & VMCS_IDT_VEC_VALID) {
>>>> + idtvec_info &= ~(1 << 12); /* clear undefined bit */
>>>> + exitintinfo = idtvec_info;
>>>> + if (idtvec_info & VMCS_IDT_VEC_ERRCODE_VALID) {
>>>> + idtvec_err = vmcs_idt_vectoring_err();
>>>> + exitintinfo |= (uint64_t)idtvec_err << 32;
>>>> + }
>>>> + error = vm_exit_intinfo(vmx->vm, vcpu, exitintinfo);
>>>> + KASSERT(error == 0, ("%s: vm_set_intinfo error %d",
>>>> + __func__, error));
>>>> +
>>>> + /*
>>>> + * If 'virtual NMIs' are being used and the VM-exit
>>>> + * happened while injecting an NMI during the previous
>>>> + * VM-entry, then clear "blocking by NMI" in the
>>>> + * Guest Interruptibility-State so the NMI can be
>>>> + * reinjected on the subsequent VM-entry.
>>>> + *
>>>> + * However, if the NMI was being delivered through a task
>>>> + * gate, then the new task must start execution with NMIs
>>>> + * blocked so don't clear NMI blocking in this case.
>>>> + */
>>>> + intr_type = idtvec_info & VMCS_INTR_T_MASK;
>>>> + if (intr_type == VMCS_INTR_T_NMI) {
>>>> + if (reason != EXIT_REASON_TASK_SWITCH)
>>>> + vmx_clear_nmi_blocking(vmx, vcpu);
>>>> + else
>>>> + vmx_assert_nmi_blocking(vmx, vcpu);
>>>> + }
>>>> +
>>>> + /*
>>>> + * Update VM-entry instruction length if the event being
>>>> + * delivered was a software interrupt or software exception.
>>>> + */
>>>> + if (intr_type == VMCS_INTR_T_SWINTR ||
>>>> + intr_type == VMCS_INTR_T_PRIV_SWEXCEPTION ||
>>>> + intr_type == VMCS_INTR_T_SWEXCEPTION) {
>>>> vmcs_write(VMCS_ENTRY_INST_LENGTH, vmexit->inst_length);
>>>> }
>>>> - break;
>>>> - default:
>>>> - idtvec_info = 0;
>>>> - break;
>>>> }
>>>>
>>>> switch (reason) {
>>>> @@ -2136,7 +2157,7 @@ vmx_exit_process(struct vmx *vmx, int vc
>>>> */
>>>> if (ts->reason == TSR_IDT_GATE) {
>>>> KASSERT(idtvec_info & VMCS_IDT_VEC_VALID,
>>>> - ("invalid idtvec_info %x for IDT task switch",
>>>> + ("invalid idtvec_info %#x for IDT task switch",
>>>> idtvec_info));
>>>> intr_type = idtvec_info & VMCS_INTR_T_MASK;
>>>> if (intr_type != VMCS_INTR_T_SWINTR &&
>>>> @@ -2302,6 +2323,7 @@ vmx_exit_process(struct vmx *vmx, int vc
>>>> * the guest.
>>>> *
>>>> * See "Resuming Guest Software after Handling an Exception".
>>>> + * See "Information for VM Exits Due to Vectored Events".
>>>> */
>>>> if ((idtvec_info & VMCS_IDT_VEC_VALID) == 0 &&
>>>> (intr_info & 0xff) != IDT_DF &&
>>>> @@ -2519,6 +2541,13 @@ vmx_run(void *arg, int vcpu, register_t
>>>> * pmap_invalidate_ept().
>>>> */
>>>> disable_intr();
>>>> + vmx_inject_interrupts(vmx, vcpu, vlapic);
>>>> +
>>>> + /*
>>>> + * Check for vcpu suspension after injecting events because
>>>> + * vmx_inject_interrupts() can suspend the vcpu due to a
>>>> + * triple fault.
>>>> + */
>>>> if (vcpu_suspended(suspend_cookie)) {
>>>> enable_intr();
>>>> vm_exit_suspended(vmx->vm, vcpu, vmcs_guest_rip());
>>>> @@ -2539,7 +2568,6 @@ vmx_run(void *arg, int vcpu, register_t
>>>> break;
>>>> }
>>>>
>>>> - vmx_inject_interrupts(vmx, vcpu, vlapic);
>>>> vmx_run_trace(vmx, vcpu);
>>>> rc = vmx_enter_guest(vmxctx, vmx, launched);
>>>>
>>>>
>>>> Modified: head/sys/amd64/vmm/vmm.c
>>>> ==============================================================================
>>>> --- head/sys/amd64/vmm/vmm.c Sat Jul 19 20:55:13 2014 (r268888)
>>>> +++ head/sys/amd64/vmm/vmm.c Sat Jul 19 20:59:08 2014 (r268889)
>>>> @@ -97,6 +97,7 @@ struct vcpu {
>>>> int hostcpu; /* (o) vcpu's host cpu */
>>>> struct vlapic *vlapic; /* (i) APIC device model */
>>>> enum x2apic_state x2apic_state; /* (i) APIC mode */
>>>> + uint64_t exitintinfo; /* (i) events pending at VM exit */
>>>> int nmi_pending; /* (i) NMI pending */
>>>> int extint_pending; /* (i) INTR pending */
>>>> struct vm_exception exception; /* (x) exception collateral */
>>>> @@ -241,6 +242,7 @@ vcpu_init(struct vm *vm, int vcpu_id, bo
>>>>
>>>> vcpu->vlapic = VLAPIC_INIT(vm->cookie, vcpu_id);
>>>> vm_set_x2apic_state(vm, vcpu_id, X2APIC_DISABLED);
>>>> + vcpu->exitintinfo = 0;
>>>> vcpu->nmi_pending = 0;
>>>> vcpu->extint_pending = 0;
>>>> vcpu->exception_pending = 0;
>>>> @@ -1458,6 +1460,202 @@ restart:
>>>> }
>>>>
>>>> int
>>>> +vm_exit_intinfo(struct vm *vm, int vcpuid, uint64_t info)
>>>> +{
>>>> + struct vcpu *vcpu;
>>>> + int type, vector;
>>>> +
>>>> + if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
>>>> + return (EINVAL);
>>>> +
>>>> + vcpu = &vm->vcpu[vcpuid];
>>>> +
>>>> + if (info & VM_INTINFO_VALID) {
>>>> + type = info & VM_INTINFO_TYPE;
>>>> + vector = info & 0xff;
>>>> + if (type == VM_INTINFO_NMI && vector != IDT_NMI)
>>>> + return (EINVAL);
>>>> + if (type == VM_INTINFO_HWEXCEPTION && vector >= 32)
>>>> + return (EINVAL);
>>>> + if (info & VM_INTINFO_RSVD)
>>>> + return (EINVAL);
>>>> + } else {
>>>> + info = 0;
>>>> + }
>>>> + VCPU_CTR2(vm, vcpuid, "%s: info1(%#lx)", __func__, info);
>>>> + vcpu->exitintinfo = info;
>>>> + return (0);
>>>> +}
>>>> +
>>>> +enum exc_class {
>>>> + EXC_BENIGN,
>>>> + EXC_CONTRIBUTORY,
>>>> + EXC_PAGEFAULT
>>>> +};
>>>> +
>>>> +#define IDT_VE 20 /* Virtualization Exception (Intel specific) */
>>>> +
>>>> +static enum exc_class
>>>> +exception_class(uint64_t info)
>>>> +{
>>>> + int type, vector;
>>>> +
>>>> + KASSERT(info & VM_INTINFO_VALID, ("intinfo must be valid: %#lx", info));
>>>> + type = info & VM_INTINFO_TYPE;
>>>> + vector = info & 0xff;
>>>> +
>>>> + /* Table 6-4, "Interrupt and Exception Classes", Intel SDM, Vol 3 */
>>>> + switch (type) {
>>>> + case VM_INTINFO_HWINTR:
>>>> + case VM_INTINFO_SWINTR:
>>>> + case VM_INTINFO_NMI:
>>>> + return (EXC_BENIGN);
>>>> + default:
>>>> + /*
>>>> + * Hardware exception.
>>>> + *
>>>> + * SVM and VT-x use identical type values to represent NMI,
>>>> + * hardware interrupt and software interrupt.
>>>> + *
>>>> + * SVM uses type '3' for all exceptions. VT-x uses type '3'
>>>> + * for exceptions except #BP and #OF. #BP and #OF use a type
>>>> + * value of '5' or '6'. Therefore we don't check for explicit
>>>> + * values of 'type' to classify 'intinfo' into a hardware
>>>> + * exception.
>>>> + */
>>>> + break;
>>>> + }
>>>> +
>>>> + switch (vector) {
>>>> + case IDT_PF:
>>>> + case IDT_VE:
>>>> + return (EXC_PAGEFAULT);
>>>> + case IDT_DE:
>>>> + case IDT_TS:
>>>> + case IDT_NP:
>>>> + case IDT_SS:
>>>> + case IDT_GP:
>>>> + return (EXC_CONTRIBUTORY);
>>>> + default:
>>>> + return (EXC_BENIGN);
>>>> + }
>>>> +}
>>>> +
>>>> +static int
>>>> +nested_fault(struct vm *vm, int vcpuid, uint64_t info1, uint64_t info2,
>>>> + uint64_t *retinfo)
>>>> +{
>>>> + enum exc_class exc1, exc2;
>>>> + int type1, vector1;
>>>> +
>>>> + KASSERT(info1 & VM_INTINFO_VALID, ("info1 %#lx is not valid", info1));
>>>> + KASSERT(info2 & VM_INTINFO_VALID, ("info2 %#lx is not valid", info2));
>>>> +
>>>> + /*
>>>> + * If an exception occurs while attempting to call the double-fault
>>>> + * handler the processor enters shutdown mode (aka triple fault).
>>>> + */
>>>> + type1 = info1 & VM_INTINFO_TYPE;
>>>> + vector1 = info1 & 0xff;
>>>> + if (type1 == VM_INTINFO_HWEXCEPTION && vector1 == IDT_DF) {
>>>> + VCPU_CTR2(vm, vcpuid, "triple fault: info1(%#lx), info2(%#lx)",
>>>> + info1, info2);
>>>> + vm_suspend(vm, VM_SUSPEND_TRIPLEFAULT);
>>>> + *retinfo = 0;
>>>> + return (0);
>>>> + }
>>>> +
>>>> + /*
>>>> + * Table 6-5 "Conditions for Generating a Double Fault", Intel SDM, Vol3
>>>> + */
>>>> + exc1 = exception_class(info1);
>>>> + exc2 = exception_class(info2);
>>>> + if ((exc1 == EXC_CONTRIBUTORY && exc2 == EXC_CONTRIBUTORY) ||
>>>> + (exc1 == EXC_PAGEFAULT && exc2 != EXC_BENIGN)) {
>>>> + /* Convert nested fault into a double fault. */
>>>> + *retinfo = IDT_DF;
>>>> + *retinfo |= VM_INTINFO_VALID | VM_INTINFO_HWEXCEPTION;
>>>> + *retinfo |= VM_INTINFO_DEL_ERRCODE;
>>>> + } else {
>>>> + /* Handle exceptions serially */
>>>> + *retinfo = info2;
>>>> + }
>>>> + return (1);
>>>> +}
>>>> +
>>>> +static uint64_t
>>>> +vcpu_exception_intinfo(struct vcpu *vcpu)
>>>> +{
>>>> + uint64_t info = 0;
>>>> +
>>>> + if (vcpu->exception_pending) {
>>>> + info = vcpu->exception.vector & 0xff;
>>>> + info |= VM_INTINFO_VALID | VM_INTINFO_HWEXCEPTION;
>>>> + if (vcpu->exception.error_code_valid) {
>>>> + info |= VM_INTINFO_DEL_ERRCODE;
>>>> + info |= (uint64_t)vcpu->exception.error_code << 32;
>>>> + }
>>>> + }
>>>> + return (info);
>>>> +}
>>>> +
>>>> +int
>>>> +vm_entry_intinfo(struct vm *vm, int vcpuid, uint64_t *retinfo)
>>>> +{
>>>> + struct vcpu *vcpu;
>>>> + uint64_t info1, info2;
>>>> + int valid;
>>>> +
>>>> + KASSERT(vcpuid >= 0 && vcpuid < VM_MAXCPU, ("invalid vcpu %d", vcpuid));
>>>> +
>>>> + vcpu = &vm->vcpu[vcpuid];
>>>> +
>>>> + info1 = vcpu->exitintinfo;
>>>> + vcpu->exitintinfo = 0;
>>>> +
>>>> + info2 = 0;
>>>> + if (vcpu->exception_pending) {
>>>> + info2 = vcpu_exception_intinfo(vcpu);
>>>> + vcpu->exception_pending = 0;
>>>> + VCPU_CTR2(vm, vcpuid, "Exception %d delivered: %#lx",
>>>> + vcpu->exception.vector, info2);
>>>> + }
>>>> +
>>>> + if ((info1 & VM_INTINFO_VALID) && (info2 & VM_INTINFO_VALID)) {
>>>> + valid = nested_fault(vm, vcpuid, info1, info2, retinfo);
>>>> + } else if (info1 & VM_INTINFO_VALID) {
>>>> + *retinfo = info1;
>>>> + valid = 1;
>>>> + } else if (info2 & VM_INTINFO_VALID) {
>>>> + *retinfo = info2;
>>>> + valid = 1;
>>>> + } else {
>>>> + valid = 0;
>>>> + }
>>>> +
>>>> + if (valid) {
>>>> + VCPU_CTR4(vm, vcpuid, "%s: info1(%#lx), info2(%#lx), "
>>>> + "retinfo(%#lx)", __func__, info1, info2, *retinfo);
>>>> + }
>>>> +
>>>> + return (valid);
>>>> +}
>>>> +
>>>> +int
>>>> +vm_get_intinfo(struct vm *vm, int vcpuid, uint64_t *info1, uint64_t *info2)
>>>> +{
>>>> + struct vcpu *vcpu;
>>>> +
>>>> + if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
>>>> + return (EINVAL);
>>>> +
>>>> + vcpu = &vm->vcpu[vcpuid];
>>>> + *info1 = vcpu->exitintinfo;
>>>> + *info2 = vcpu_exception_intinfo(vcpu);
>>>> + return (0);
>>>> +}
>>>> +
>>>> +int
>>>> vm_inject_exception(struct vm *vm, int vcpuid, struct vm_exception *exception)
>>>> {
>>>> struct vcpu *vcpu;
>>>> @@ -1468,6 +1666,14 @@ vm_inject_exception(struct vm *vm, int v
>>>> if (exception->vector < 0 || exception->vector >= 32)
>>>> return (EINVAL);
>>>>
>>>> + /*
>>>> + * A double fault exception should never be injected directly into
>>>> + * the guest. It is a derived exception that results from specific
>>>> + * combinations of nested faults.
>>>> + */
>>>> + if (exception->vector == IDT_DF)
>>>> + return (EINVAL);
>>>> +
>>>> vcpu = &vm->vcpu[vcpuid];
>>>>
>>>> if (vcpu->exception_pending) {
>>>> @@ -1483,25 +1689,6 @@ vm_inject_exception(struct vm *vm, int v
>>>> return (0);
>>>> }
>>>>
>>>> -int
>>>> -vm_exception_pending(struct vm *vm, int vcpuid, struct vm_exception *exception)
>>>> -{
>>>> - struct vcpu *vcpu;
>>>> - int pending;
>>>> -
>>>> - KASSERT(vcpuid >= 0 && vcpuid < VM_MAXCPU, ("invalid vcpu %d", vcpuid));
>>>> -
>>>> - vcpu = &vm->vcpu[vcpuid];
>>>> - pending = vcpu->exception_pending;
>>>> - if (pending) {
>>>> - vcpu->exception_pending = 0;
>>>> - *exception = vcpu->exception;
>>>> - VCPU_CTR1(vm, vcpuid, "Exception %d delivered",
>>>> - exception->vector);
>>>> - }
>>>> - return (pending);
>>>> -}
>>>> -
>>>> static void
>>>> vm_inject_fault(struct vm *vm, int vcpuid, struct vm_exception *exception)
>>>> {
>>>>
>>>> Modified: head/sys/amd64/vmm/vmm_dev.c
>>>> ==============================================================================
>>>> --- head/sys/amd64/vmm/vmm_dev.c Sat Jul 19 20:55:13 2014 (r268888)
>>>> +++ head/sys/amd64/vmm/vmm_dev.c Sat Jul 19 20:59:08 2014 (r268889)
>>>> @@ -173,6 +173,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long c
>>>> struct vm_gla2gpa *gg;
>>>> struct vm_activate_cpu *vac;
>>>> struct vm_cpuset *vm_cpuset;
>>>> + struct vm_intinfo *vmii;
>>>>
>>>> sc = vmmdev_lookup2(cdev);
>>>> if (sc == NULL)
>>>> @@ -199,6 +200,8 @@ vmmdev_ioctl(struct cdev *cdev, u_long c
>>>> case VM_SET_X2APIC_STATE:
>>>> case VM_GLA2GPA:
>>>> case VM_ACTIVATE_CPU:
>>>> + case VM_SET_INTINFO:
>>>> + case VM_GET_INTINFO:
>>>> /*
>>>> * XXX fragile, handle with care
>>>> * Assumes that the first field of the ioctl data is the vcpu.
>>>> @@ -470,6 +473,15 @@ vmmdev_ioctl(struct cdev *cdev, u_long c
>>>> error = copyout(cpuset, vm_cpuset->cpus, size);
>>>> free(cpuset, M_TEMP);
>>>> break;
>>>> + case VM_SET_INTINFO:
>>>> + vmii = (struct vm_intinfo *)data;
>>>> + error = vm_exit_intinfo(sc->vm, vmii->vcpuid, vmii->info1);
>>>> + break;
>>>> + case VM_GET_INTINFO:
>>>> + vmii = (struct vm_intinfo *)data;
>>>> + error = vm_get_intinfo(sc->vm, vmii->vcpuid, &vmii->info1,
>>>> + &vmii->info2);
>>>> + break;
>>>> default:
>>>> error = ENOTTY;
>>>> break;
>>>>
>>>> Modified: head/usr.sbin/bhyve/bhyverun.c
>>>> ==============================================================================
>>>> --- head/usr.sbin/bhyve/bhyverun.c Sat Jul 19 20:55:13 2014 (r268888)
>>>> +++ head/usr.sbin/bhyve/bhyverun.c Sat Jul 19 20:59:08 2014 (r268889)
>>>> @@ -534,6 +534,8 @@ vmexit_suspend(struct vmctx *ctx, struct
>>>> exit(1);
>>>> case VM_SUSPEND_HALT:
>>>> exit(2);
>>>> + case VM_SUSPEND_TRIPLEFAULT:
>>>> + exit(3);
>>>> default:
>>>> fprintf(stderr, "vmexit_suspend: invalid reason %d\n", how);
>>>> exit(100);
>>>>
>>>> Modified: head/usr.sbin/bhyve/task_switch.c
>>>> ==============================================================================
>>>> --- head/usr.sbin/bhyve/task_switch.c Sat Jul 19 20:55:13 2014 (r268888)
>>>> +++ head/usr.sbin/bhyve/task_switch.c Sat Jul 19 20:59:08 2014 (r268889)
>>>> @@ -904,10 +904,14 @@ vmexit_task_switch(struct vmctx *ctx, st
>>>> */
>>>>
>>>> /*
>>>> - * XXX is the original task switch was triggered by a hardware
>>>> - * exception then do we generate a double-fault if we encounter
>>>> - * an exception during the task switch?
>>>> + * If the task switch was triggered by an event delivered through
>>>> + * the IDT then extinguish the pending event from the vcpu's
>>>> + * exitintinfo.
>>>> */
>>>> + if (task_switch->reason == TSR_IDT_GATE) {
>>>> + error = vm_set_intinfo(ctx, vcpu, 0);
>>>> + assert(error == 0);
>>>> + }
>>>>
>>>> /*
>>>> * XXX should inject debug exception if 'T' bit is 1
>>>>
>>>> Modified: head/usr.sbin/bhyvectl/bhyvectl.c
>>>> ==============================================================================
>>>> --- head/usr.sbin/bhyvectl/bhyvectl.c Sat Jul 19 20:55:13 2014 (r268888)
>>>> +++ head/usr.sbin/bhyvectl/bhyvectl.c Sat Jul 19 20:59:08 2014 (r268889)
>>>> @@ -195,7 +195,8 @@ usage(void)
>>>> " [--force-reset]\n"
>>>> " [--force-poweroff]\n"
>>>> " [--get-active-cpus]\n"
>>>> - " [--get-suspended-cpus]\n",
>>>> + " [--get-suspended-cpus]\n"
>>>> + " [--get-intinfo]\n",
>>>> progname);
>>>> exit(1);
>>>> }
>>>> @@ -205,6 +206,7 @@ static int inject_nmi, assert_lapic_lvt;
>>>> static int force_reset, force_poweroff;
>>>> static const char *capname;
>>>> static int create, destroy, get_lowmem, get_highmem;
>>>> +static int get_intinfo;
>>>> static int get_active_cpus, get_suspended_cpus;
>>>> static uint64_t memsize;
>>>> static int set_cr0, get_cr0, set_cr3, get_cr3, set_cr4, get_cr4;
>>>> @@ -412,6 +414,37 @@ print_cpus(const char *banner, const cpu
>>>> printf("\n");
>>>> }
>>>>
>>>> +static void
>>>> +print_intinfo(const char *banner, uint64_t info)
>>>> +{
>>>> + int type;
>>>> +
>>>> + printf("%s:\t", banner);
>>>> + if (info & VM_INTINFO_VALID) {
>>>> + type = info & VM_INTINFO_TYPE;
>>>> + switch (type) {
>>>> + case VM_INTINFO_HWINTR:
>>>> + printf("extint");
>>>> + break;
>>>> + case VM_INTINFO_NMI:
>>>> + printf("nmi");
>>>> + break;
>>>> + case VM_INTINFO_SWINTR:
>>>> + printf("swint");
>>>> + break;
>>>> + default:
>>>> + printf("exception");
>>>> + break;
>>>> + }
>>>> + printf(" vector %d", (int)VM_INTINFO_VECTOR(info));
>>>> + if (info & VM_INTINFO_DEL_ERRCODE)
>>>> + printf(" errcode %#x", (u_int)(info >> 32));
>>>> + } else {
>>>> + printf("n/a");
>>>> + }
>>>> + printf("\n");
>>>> +}
>>>> +
>>>> int
>>>> main(int argc, char *argv[])
>>>> {
>>>> @@ -420,7 +453,7 @@ main(int argc, char *argv[])
>>>> vm_paddr_t gpa, gpa_pmap;
>>>> size_t len;
>>>> struct vm_exit vmexit;
>>>> - uint64_t ctl, eptp, bm, addr, u64, pteval[4], *pte;
>>>> + uint64_t ctl, eptp, bm, addr, u64, pteval[4], *pte, info[2];
>>>> struct vmctx *ctx;
>>>> int wired;
>>>> cpuset_t cpus;
>>>> @@ -595,6 +628,7 @@ main(int argc, char *argv[])
>>>> { "force-poweroff", NO_ARG, &force_poweroff, 1 },
>>>> { "get-active-cpus", NO_ARG, &get_active_cpus, 1 },
>>>> { "get-suspended-cpus", NO_ARG, &get_suspended_cpus, 1 },
>>>> + { "get-intinfo", NO_ARG, &get_intinfo, 1 },
>>>> { NULL, 0, NULL, 0 }
>>>> };
>>>>
>>>> @@ -1566,6 +1600,14 @@ main(int argc, char *argv[])
>>>> print_cpus("suspended cpus", &cpus);
>>>> }
>>>>
>>>> + if (!error && (get_intinfo || get_all)) {
>>>> + error = vm_get_intinfo(ctx, vcpu, &info[0], &info[1]);
>>>> + if (!error) {
>>>> + print_intinfo("pending", info[0]);
>>>> + print_intinfo("current", info[1]);
>>>> + }
>>>> + }
>>>> +
>>>> if (!error && run) {
>>>> error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RIP, &rip);
>>>> assert(error == 0);
>>>>
More information about the svn-src-all
mailing list