svn commit: r328622 - head/sys/amd64/vmm/amd

Wed Jan 31 11:14:27 UTC 2018

Author: avg
Date: Wed Jan 31 11:14:26 2018
New Revision: 328622
URL: https://svnweb.freebsd.org/changeset/base/328622

Log:
  vmm/svm: post LAPIC interrupts using event injection, not virtual interrupts
  
  The virtual interrupt method uses V_IRQ, V_INTR_PRIO, and V_INTR_VECTOR
  fields of VMCB to inject a virtual interrupt into a guest VM.  This
  method has many advantages over the direct event injection as it
  offloads all decisions of whether and when the interrupt can be
  delivered to the guest.  But with a purely software emulated vAPIC the
  advantage is also a problem.  The problem is that the hypervisor does
  not have any precise control over when the interrupt is actually
  delivered to the guest (or a notification about that).  Because of that
  the hypervisor cannot update the interrupt vector in IRR and ISR in the
  same way as real hardware would.  The hypervisor becomes aware that the
  interrupt is being serviced only upon the first VMEXIT after the
  interrupt is delivered.  This creates a window between the actual
  interrupt delivery and the update of IRR and ISR.  That means that IRR
  and ISR might not be correctly set up to the point of the
  end-of-interrupt signal.
  
  The described deviation has been observed to cause an interrupt loss in
  the following scenario.  vCPU0 posts an inter-processor interrupt to
  vCPU1.  The interrupt is injected as a virtual interrupt by the
  hypervisor.  The interrupt is delivered to a guest and an interrupt
  handler is invoked.  The handler performs a requested action and
  acknowledges the request by modifying a global variable.  So far, there
  is no VMEXIT and the hypervisor is unaware of the events.  Then, vCPU0
  notices the acknowledgment and sends another IPI with the same vector.
  The IPI gets collapsed into the previous IPI in the IRR of vCPU1.  Only
  after that a VMEXIT of vCPU1 occurs.  At that time the vector is cleared
  in the IRR and is set in the ISR.  vCPU1 has vAPIC state as if the
  second IPI has never been sent.
  The scenario is impossible on the real hardware because IRR and ISR are
  updated just before the interrupt handler gets started.
  
  I saw several possibilities of fixing the problem.  One is to intercept
  the virtual interrupt delivery to update IRR and ISR at the right
  moment.  The other is to deliver the LAPIC interrupts using the event
  injection, same as legacy interrupts.  I opted to use the latter
  approach for several reasons.  It's equivalent to what VMM/Intel does
  (in !VMX case).  It appears to be what VirtualBox and KVM do.  The code
  is already there (to support legacy interrupts).
  
  Another possibility was to use a special intermediate state for a vector
  after it is injected using a virtual interrupt and before it is known
  whether it was accepted or is still pending.
  That approach was implemented in https://reviews.freebsd.org/D13828
  That method is more complex and does not have any clear advantage.
  
  Please see sections 15.20 and 15.21.4 of "AMD64 Architecture
  Programmer's Manual Volume 2: System Programming" (publication 24593,
  revision 3.29) for comparison between event injection and virtual
  interrupt injection.
  
  PR:		215972
  Reported by:	ajschot at hotmail.com, grehan
  Tested by:	anish, grehan,  Nils Beyer <nbe at renzel.net>
  Reviewed by:	anish, grehan
  MFC after:	2 weeks
  Differential Revision: https://reviews.freebsd.org/D13780

Modified:
  head/sys/amd64/vmm/amd/svm.c

Modified: head/sys/amd64/vmm/amd/svm.c
==============================================================================

--- head/sys/amd64/vmm/amd/svm.c	Wed Jan 31 09:26:28 2018	(r328621)
+++ head/sys/amd64/vmm/amd/svm.c	Wed Jan 31 11:14:26 2018	(r328622)
@@ -933,7 +933,6 @@ svm_update_virqinfo(struct svm_softc *sc, int vcpu)
 	struct vm *vm;
 	struct vlapic *vlapic;
 	struct vmcb_ctrl *ctrl;
-	int pending;
 
 	vm = sc->vm;
 	vlapic = vm_lapic(vm, vcpu);
@@ -942,20 +941,9 @@ svm_update_virqinfo(struct svm_softc *sc, int vcpu)
 	/* Update %cr8 in the emulated vlapic */
 	vlapic_set_cr8(vlapic, ctrl->v_tpr);
 
-	/*
-	 * If V_IRQ indicates that the interrupt injection attempted on then
-	 * last VMRUN was successful then update the vlapic accordingly.
-	 */
-	if (ctrl->v_intr_vector != 0) {
-		pending = ctrl->v_irq;
-		KASSERT(ctrl->v_intr_vector >= 16, ("%s: invalid "
-		    "v_intr_vector %d", __func__, ctrl->v_intr_vector));
-		KASSERT(!ctrl->v_ign_tpr, ("%s: invalid v_ign_tpr", __func__));
-		VCPU_CTR2(vm, vcpu, "v_intr_vector %d %s", ctrl->v_intr_vector,
-		    pending ? "pending" : "accepted");
-		if (!pending)
-			vlapic_intr_accepted(vlapic, ctrl->v_intr_vector);
-	}
+	/* Virtual interrupt injection is not used. */
+	KASSERT(ctrl->v_intr_vector == 0, ("%s: invalid "
+	    "v_intr_vector %d", __func__, ctrl->v_intr_vector));
 }
 
 static void
@@ -1024,12 +1012,7 @@ disable_intr_window_exiting(struct svm_softc *sc, int 
 		return;
 	}
 
-#ifdef KTR
-	if (ctrl->v_intr_vector == 0)
-		VCPU_CTR0(sc->vm, vcpu, "Disable intr window exiting");
-	else
-		VCPU_CTR0(sc->vm, vcpu, "Clearing V_IRQ interrupt injection");
-#endif
+	VCPU_CTR0(sc->vm, vcpu, "Disable intr window exiting");
 	ctrl->v_irq = 0;
 	ctrl->v_intr_vector = 0;
 	svm_set_dirty(sc, vcpu, VMCB_CACHE_TPR);
@@ -1574,14 +1557,14 @@ svm_inj_interrupts(struct svm_softc *sc, int vcpu, str
 	struct vmcb_state *state;
 	struct svm_vcpu *vcpustate;
 	uint8_t v_tpr;
-	int vector, need_intr_window, pending_apic_vector;
+	int vector, need_intr_window;
+	int extint_pending;
 
 	state = svm_get_vmcb_state(sc, vcpu);
 	ctrl  = svm_get_vmcb_ctrl(sc, vcpu);
 	vcpustate = svm_get_vcpu(sc, vcpu);
 
 	need_intr_window = 0;
-	pending_apic_vector = 0;
 
 	if (vcpustate->nextrip != state->rip) {
 		ctrl->intr_shadow = 0;
@@ -1651,40 +1634,19 @@ svm_inj_interrupts(struct svm_softc *sc, int vcpu, str
 		}
 	}
 
-	if (!vm_extint_pending(sc->vm, vcpu)) {
-		/*
-		 * APIC interrupts are delivered using the V_IRQ offload.
-		 *
-		 * The primary benefit is that the hypervisor doesn't need to
-		 * deal with the various conditions that inhibit interrupts.
-		 * It also means that TPR changes via CR8 will be handled
-		 * without any hypervisor involvement.
-		 *
-		 * Note that the APIC vector must remain pending in the vIRR
-		 * until it is confirmed that it was delivered to the guest.
-		 * This can be confirmed based on the value of V_IRQ at the
-		 * next #VMEXIT (1 = pending, 0 = delivered).
-		 *
-		 * Also note that it is possible that another higher priority
-		 * vector can become pending before this vector is delivered
-		 * to the guest. This is alright because vcpu_notify_event()
-		 * will send an IPI and force the vcpu to trap back into the
-		 * hypervisor. The higher priority vector will be injected on
-		 * the next VMRUN.
-		 */
-		if (vlapic_pending_intr(vlapic, &vector)) {
-			KASSERT(vector >= 16 && vector <= 255,
-			    ("invalid vector %d from local APIC", vector));
-			pending_apic_vector = vector;
-		}
-		goto done;
+	extint_pending = vm_extint_pending(sc->vm, vcpu);
+	if (!extint_pending) {
+		if (!vlapic_pending_intr(vlapic, &vector))
+			goto done;
+		KASSERT(vector >= 16 && vector <= 255,
+		    ("invalid vector %d from local APIC", vector));
+	} else {
+		/* Ask the legacy pic for a vector to inject */
+		vatpic_pending_intr(sc->vm, &vector);
+		KASSERT(vector >= 0 && vector <= 255,
+		    ("invalid vector %d from INTR", vector));
 	}
 
-	/* Ask the legacy pic for a vector to inject */
-	vatpic_pending_intr(sc->vm, &vector);
-	KASSERT(vector >= 0 && vector <= 255, ("invalid vector %d from INTR",
-	    vector));
-
 	/*
 	 * If the guest has disabled interrupts or is in an interrupt shadow
 	 * then we cannot inject the pending interrupt.
@@ -1710,14 +1672,14 @@ svm_inj_interrupts(struct svm_softc *sc, int vcpu, str
 		goto done;
 	}
 
-	/*
-	 * Legacy PIC interrupts are delivered via the event injection
-	 * mechanism.
-	 */
 	svm_eventinject(sc, vcpu, VMCB_EVENTINJ_TYPE_INTR, vector, 0, false);
 
-	vm_extint_clear(sc->vm, vcpu);
-	vatpic_intr_accepted(sc->vm, vector);
+	if (!extint_pending) {
+		vlapic_intr_accepted(vlapic, vector);
+	} else {
+		vm_extint_clear(sc->vm, vcpu);
+		vatpic_intr_accepted(sc->vm, vector);
+	}
 
 	/*
 	 * Force a VM-exit as soon as the vcpu is ready to accept another
@@ -1747,21 +1709,7 @@ done:
 		svm_set_dirty(sc, vcpu, VMCB_CACHE_TPR);
 	}
 
-	if (pending_apic_vector) {
-		/*
-		 * If an APIC vector is being injected then interrupt window
-		 * exiting is not possible on this VMRUN.
-		 */
-		KASSERT(!need_intr_window, ("intr_window exiting impossible"));
-		VCPU_CTR1(sc->vm, vcpu, "Injecting vector %d using V_IRQ",
-		    pending_apic_vector);
-
-		ctrl->v_irq = 1;
-		ctrl->v_ign_tpr = 0;
-		ctrl->v_intr_vector = pending_apic_vector;
-		ctrl->v_intr_prio = pending_apic_vector >> 4;
-		svm_set_dirty(sc, vcpu, VMCB_CACHE_TPR);
-	} else if (need_intr_window) {
+	if (need_intr_window) {
 		/*
 		 * We use V_IRQ in conjunction with the VINTR intercept to
 		 * trap into the hypervisor as soon as a virtual interrupt