svn commit: r262350 - in stable/10: lib/libvmmapi sys/amd64/include sys/amd64/vmm sys/amd64/vmm/intel sys/amd64/vmm/io usr.sbin/bhyve usr.sbin/bhyvectl

John Baldwin jhb at FreeBSD.org
Sun Feb 23 00:46:07 UTC 2014


Author: jhb
Date: Sun Feb 23 00:46:05 2014
New Revision: 262350
URL: http://svnweb.freebsd.org/changeset/base/262350

Log:
  MFC 258859,259081,259085,259205,259213,259275,259482,259537,259702,259779:
  Several changes to the local APIC support in bhyve:
  - Rename 'vm_interrupt_hostcpu()' to 'vcpu_notify_event()'.
  - If a vcpu disables its local apic and then executes a 'HLT' then spin
    down the vcpu and destroy its thread context. Also modify the 'HLT'
    processing to ignore pending interrupts in the IRR if interrupts have
    been disabled by the guest.  The interrupt cannot be injected into the
    guest in any case so resuming it is futile.
  - Use callout(9) to drive the vlapic timer instead of clocking it on each
    VM exit.
  - When the guest is bringing up the APs in the x2APIC mode a write to the
    ICR register will now trigger a return to userspace with an exitcode of
    VM_EXITCODE_SPINUP_AP.
  - Change the vlapic timer lock to be a spinlock because the vlapic can be
    accessed from within a critical section (vm run loop) when guest is using
    x2apic mode.
  - Fix the vlapic version register.
  - Add a command to bhyvectl to inject an NMI on a specific vcpu.
  - Add an API to deliver message signalled interrupts to vcpus. This allows
    callers to treat the MSI 'addr' and 'data' fields as opaque and also lets
    bhyve implement multiple destination modes: physical, flat and clustered.
  - Rename the ambiguously named 'vm_setup_msi()' and 'vm_setup_msix()' to
    'vm_setup_pptdev_msi()' and 'vm_setup_pptdev_msix()' respectively.
  - Consolidate the virtual apic initialization in a single function:
    vlapic_reset()
  - Add a generic routine to trigger an LVT interrupt that supports both
    fixed and NMI delivery modes.
  - Add an ioctl and bhyvectl command to trigger local interrupts inside a
    guest.  In particular, a global NMI similar to that raised by SERR# or
    PERR# can be simulated by asserting LINT1 on all vCPUs.
  - Extend the LVT table in the vCPU local APIC to support CMCI.
  - Flesh out the local APIC error reporting a bit to cache errors and
    report them via ESR when ESR is written to.  Add support for asserting
    the error LVT when an error occurs.  Raise illegal vector errors when
    attempting to signal an invalid vector for an interrupt or when sending
    an IPI.
  - Export table entries in the MADT and MP Table advertising the stock x86
    config of LINT0 set to ExtInt and LINT1 wired to NMI.

Modified:
  stable/10/lib/libvmmapi/vmmapi.c
  stable/10/lib/libvmmapi/vmmapi.h
  stable/10/sys/amd64/include/vmm.h
  stable/10/sys/amd64/include/vmm_dev.h
  stable/10/sys/amd64/vmm/intel/vmx.c
  stable/10/sys/amd64/vmm/io/ppt.c
  stable/10/sys/amd64/vmm/io/ppt.h
  stable/10/sys/amd64/vmm/io/vhpet.c
  stable/10/sys/amd64/vmm/io/vioapic.c
  stable/10/sys/amd64/vmm/io/vlapic.c
  stable/10/sys/amd64/vmm/io/vlapic.h
  stable/10/sys/amd64/vmm/vmm.c
  stable/10/sys/amd64/vmm/vmm_dev.c
  stable/10/sys/amd64/vmm/vmm_lapic.c
  stable/10/sys/amd64/vmm/vmm_lapic.h
  stable/10/sys/amd64/vmm/vmm_msr.c
  stable/10/sys/amd64/vmm/vmm_msr.h
  stable/10/usr.sbin/bhyve/acpi.c
  stable/10/usr.sbin/bhyve/bhyverun.c
  stable/10/usr.sbin/bhyve/mptbl.c
  stable/10/usr.sbin/bhyve/pci_emul.c
  stable/10/usr.sbin/bhyve/pci_emul.h
  stable/10/usr.sbin/bhyve/pci_passthru.c
  stable/10/usr.sbin/bhyvectl/bhyvectl.c
Directory Properties:
  stable/10/   (props changed)

Modified: stable/10/lib/libvmmapi/vmmapi.c
==============================================================================
--- stable/10/lib/libvmmapi/vmmapi.c	Sat Feb 22 23:34:39 2014	(r262349)
+++ stable/10/lib/libvmmapi/vmmapi.c	Sun Feb 23 00:46:05 2014	(r262350)
@@ -397,6 +397,30 @@ vm_lapic_irq(struct vmctx *ctx, int vcpu
 }
 
 int
+vm_lapic_local_irq(struct vmctx *ctx, int vcpu, int vector)
+{
+	struct vm_lapic_irq vmirq;
+
+	bzero(&vmirq, sizeof(vmirq));
+	vmirq.cpuid = vcpu;
+	vmirq.vector = vector;
+
+	return (ioctl(ctx->fd, VM_LAPIC_LOCAL_IRQ, &vmirq));
+}
+
+int
+vm_lapic_msi(struct vmctx *ctx, uint64_t addr, uint64_t msg)
+{
+	struct vm_lapic_msi vmmsi;
+
+	bzero(&vmmsi, sizeof(vmmsi));
+	vmmsi.addr = addr;
+	vmmsi.msg = msg;
+
+	return (ioctl(ctx->fd, VM_LAPIC_MSI, &vmmsi));
+}
+
+int
 vm_ioapic_assert_irq(struct vmctx *ctx, int irq)
 {
 	struct vm_ioapic_irq ioapic_irq;
@@ -551,8 +575,8 @@ vm_map_pptdev_mmio(struct vmctx *ctx, in
 }
 
 int
-vm_setup_msi(struct vmctx *ctx, int vcpu, int bus, int slot, int func,
-	     int destcpu, int vector, int numvec)
+vm_setup_pptdev_msi(struct vmctx *ctx, int vcpu, int bus, int slot, int func,
+    uint64_t addr, uint64_t msg, int numvec)
 {
 	struct vm_pptdev_msi pptmsi;
 
@@ -561,16 +585,16 @@ vm_setup_msi(struct vmctx *ctx, int vcpu
 	pptmsi.bus = bus;
 	pptmsi.slot = slot;
 	pptmsi.func = func;
-	pptmsi.destcpu = destcpu;
-	pptmsi.vector = vector;
+	pptmsi.msg = msg;
+	pptmsi.addr = addr;
 	pptmsi.numvec = numvec;
 
 	return (ioctl(ctx->fd, VM_PPTDEV_MSI, &pptmsi));
 }
 
 int	
-vm_setup_msix(struct vmctx *ctx, int vcpu, int bus, int slot, int func,
-	      int idx, uint32_t msg, uint32_t vector_control, uint64_t addr)
+vm_setup_pptdev_msix(struct vmctx *ctx, int vcpu, int bus, int slot, int func,
+    int idx, uint64_t addr, uint64_t msg, uint32_t vector_control)
 {
 	struct vm_pptdev_msix pptmsix;
 

Modified: stable/10/lib/libvmmapi/vmmapi.h
==============================================================================
--- stable/10/lib/libvmmapi/vmmapi.h	Sat Feb 22 23:34:39 2014	(r262349)
+++ stable/10/lib/libvmmapi/vmmapi.h	Sun Feb 23 00:46:05 2014	(r262350)
@@ -67,6 +67,8 @@ int	vm_inject_event(struct vmctx *ctx, i
 int	vm_inject_event2(struct vmctx *ctx, int vcpu, enum vm_event_type type,
 			 int vector, int error_code);
 int	vm_lapic_irq(struct vmctx *ctx, int vcpu, int vector);
+int	vm_lapic_local_irq(struct vmctx *ctx, int vcpu, int vector);
+int	vm_lapic_msi(struct vmctx *ctx, uint64_t addr, uint64_t msg);
 int	vm_ioapic_assert_irq(struct vmctx *ctx, int irq);
 int	vm_ioapic_deassert_irq(struct vmctx *ctx, int irq);
 int	vm_ioapic_pulse_irq(struct vmctx *ctx, int irq);
@@ -81,10 +83,11 @@ int	vm_assign_pptdev(struct vmctx *ctx, 
 int	vm_unassign_pptdev(struct vmctx *ctx, int bus, int slot, int func);
 int	vm_map_pptdev_mmio(struct vmctx *ctx, int bus, int slot, int func,
 			   vm_paddr_t gpa, size_t len, vm_paddr_t hpa);
-int	vm_setup_msi(struct vmctx *ctx, int vcpu, int bus, int slot, int func,
-		     int dest, int vector, int numvec);
-int	vm_setup_msix(struct vmctx *ctx, int vcpu, int bus, int slot, int func,
-		      int idx, uint32_t msg, uint32_t vector_control, uint64_t addr);
+int	vm_setup_pptdev_msi(struct vmctx *ctx, int vcpu, int bus, int slot,
+	    int func, uint64_t addr, uint64_t msg, int numvec);
+int	vm_setup_pptdev_msix(struct vmctx *ctx, int vcpu, int bus, int slot,
+	    int func, int idx, uint64_t addr, uint64_t msg,
+	    uint32_t vector_control);
 
 /*
  * Return a pointer to the statistics buffer. Note that this is not MT-safe.

Modified: stable/10/sys/amd64/include/vmm.h
==============================================================================
--- stable/10/sys/amd64/include/vmm.h	Sat Feb 22 23:34:39 2014	(r262349)
+++ stable/10/sys/amd64/include/vmm.h	Sun Feb 23 00:46:05 2014	(r262350)
@@ -158,7 +158,7 @@ vcpu_is_running(struct vm *vm, int vcpu,
 }
 
 void *vcpu_stats(struct vm *vm, int vcpu);
-void vm_interrupt_hostcpu(struct vm *vm, int vcpu);
+void vcpu_notify_event(struct vm *vm, int vcpuid);
 struct vmspace *vm_get_vmspace(struct vm *vm);
 int vm_assign_pptdev(struct vm *vm, int bus, int slot, int func);
 int vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func);
@@ -266,6 +266,7 @@ enum vm_exitcode {
 	VM_EXITCODE_PAGING,
 	VM_EXITCODE_INST_EMUL,
 	VM_EXITCODE_SPINUP_AP,
+	VM_EXITCODE_SPINDOWN_CPU,
 	VM_EXITCODE_MAX
 };
 
@@ -310,6 +311,9 @@ struct vm_exit {
 			int		vcpu;
 			uint64_t	rip;
 		} spinup_ap;
+		struct {
+			uint64_t	rflags;
+		} hlt;
 	} u;
 };
 

Modified: stable/10/sys/amd64/include/vmm_dev.h
==============================================================================
--- stable/10/sys/amd64/include/vmm_dev.h	Sat Feb 22 23:34:39 2014	(r262349)
+++ stable/10/sys/amd64/include/vmm_dev.h	Sun Feb 23 00:46:05 2014	(r262350)
@@ -66,6 +66,11 @@ struct vm_event {
 	int		error_code_valid;
 };
 
+struct vm_lapic_msi {
+	uint64_t	msg;
+	uint64_t	addr;
+};
+
 struct vm_lapic_irq {
 	int		cpuid;
 	int		vector;
@@ -103,8 +108,8 @@ struct vm_pptdev_msi {
 	int		slot;
 	int		func;
 	int		numvec;		/* 0 means disabled */
-	int		vector;
-	int		destcpu;
+	uint64_t	msg;
+	uint64_t	addr;
 };
 
 struct vm_pptdev_msix {
@@ -113,7 +118,7 @@ struct vm_pptdev_msix {
 	int		slot;
 	int		func;
 	int		idx;
-	uint32_t	msg;
+	uint64_t	msg;
 	uint32_t	vector_control;
 	uint64_t	addr;
 };
@@ -175,6 +180,8 @@ enum {
 	IOCNUM_IOAPIC_ASSERT_IRQ = 33,
 	IOCNUM_IOAPIC_DEASSERT_IRQ = 34,
 	IOCNUM_IOAPIC_PULSE_IRQ = 35,
+	IOCNUM_LAPIC_MSI = 36,
+	IOCNUM_LAPIC_LOCAL_IRQ = 37, 
 
 	/* PCI pass-thru */
 	IOCNUM_BIND_PPTDEV = 40,
@@ -211,6 +218,10 @@ enum {
 	_IOW('v', IOCNUM_INJECT_EVENT, struct vm_event)
 #define	VM_LAPIC_IRQ 		\
 	_IOW('v', IOCNUM_LAPIC_IRQ, struct vm_lapic_irq)
+#define	VM_LAPIC_LOCAL_IRQ 	\
+	_IOW('v', IOCNUM_LAPIC_LOCAL_IRQ, struct vm_lapic_irq)
+#define	VM_LAPIC_MSI		\
+	_IOW('v', IOCNUM_LAPIC_MSI, struct vm_lapic_msi)
 #define	VM_IOAPIC_ASSERT_IRQ	\
 	_IOW('v', IOCNUM_IOAPIC_ASSERT_IRQ, struct vm_ioapic_irq)
 #define	VM_IOAPIC_DEASSERT_IRQ	\

Modified: stable/10/sys/amd64/vmm/intel/vmx.c
==============================================================================
--- stable/10/sys/amd64/vmm/intel/vmx.c	Sat Feb 22 23:34:39 2014	(r262349)
+++ stable/10/sys/amd64/vmm/intel/vmx.c	Sun Feb 23 00:46:05 2014	(r262350)
@@ -1359,7 +1359,8 @@ vmx_exit_process(struct vmx *vmx, int vc
 	struct vmcs *vmcs;
 	struct vmxctx *vmxctx;
 	uint32_t eax, ecx, edx, idtvec_info, idtvec_err, reason;
-	uint64_t qual, gpa;
+	uint64_t qual, gpa, rflags;
+	bool retu;
 
 	handled = 0;
 	vmcs = &vmx->vmcs[vcpu];
@@ -1405,31 +1406,46 @@ vmx_exit_process(struct vmx *vmx, int vc
 		break;
 	case EXIT_REASON_RDMSR:
 		vmm_stat_incr(vmx->vm, vcpu, VMEXIT_RDMSR, 1);
+		retu = false;
 		ecx = vmxctx->guest_rcx;
-		error = emulate_rdmsr(vmx->vm, vcpu, ecx);
+		error = emulate_rdmsr(vmx->vm, vcpu, ecx, &retu);
 		if (error) {
 			vmexit->exitcode = VM_EXITCODE_RDMSR;
 			vmexit->u.msr.code = ecx;
-		} else
+		} else if (!retu) {
 			handled = 1;
+		} else {
+			/* Return to userspace with a valid exitcode */
+			KASSERT(vmexit->exitcode != VM_EXITCODE_BOGUS,
+			    ("emulate_wrmsr retu with bogus exitcode"));
+		}
 		break;
 	case EXIT_REASON_WRMSR:
 		vmm_stat_incr(vmx->vm, vcpu, VMEXIT_WRMSR, 1);
+		retu = false;
 		eax = vmxctx->guest_rax;
 		ecx = vmxctx->guest_rcx;
 		edx = vmxctx->guest_rdx;
 		error = emulate_wrmsr(vmx->vm, vcpu, ecx,
-					(uint64_t)edx << 32 | eax);
+		    (uint64_t)edx << 32 | eax, &retu);
 		if (error) {
 			vmexit->exitcode = VM_EXITCODE_WRMSR;
 			vmexit->u.msr.code = ecx;
 			vmexit->u.msr.wval = (uint64_t)edx << 32 | eax;
-		} else
+		} else if (!retu) {
 			handled = 1;
+		} else {
+			/* Return to userspace with a valid exitcode */
+			KASSERT(vmexit->exitcode != VM_EXITCODE_BOGUS,
+			    ("emulate_wrmsr retu with bogus exitcode"));
+		}
 		break;
 	case EXIT_REASON_HLT:
 		vmm_stat_incr(vmx->vm, vcpu, VMEXIT_HLT, 1);
+		if ((error = vmread(VMCS_GUEST_RFLAGS, &rflags)) != 0)
+			panic("vmx_exit_process: vmread(rflags) %d", error);
 		vmexit->exitcode = VM_EXITCODE_HLT;
+		vmexit->u.hlt.rflags = rflags;
 		break;
 	case EXIT_REASON_MTF:
 		vmm_stat_incr(vmx->vm, vcpu, VMEXIT_MTRAP, 1);
@@ -1584,7 +1600,6 @@ vmx_run(void *arg, int vcpu, register_t 
 		panic("vmx_run: error %d setting up pcpu defaults", error);
 
 	do {
-		lapic_timer_tick(vmx->vm, vcpu);
 		vmx_inject_interrupts(vmx, vcpu);
 		vmx_run_trace(vmx, vcpu);
 		rc = vmx_setjmp(vmxctx);

Modified: stable/10/sys/amd64/vmm/io/ppt.c
==============================================================================
--- stable/10/sys/amd64/vmm/io/ppt.c	Sat Feb 22 23:34:39 2014	(r262349)
+++ stable/10/sys/amd64/vmm/io/ppt.c	Sun Feb 23 00:46:05 2014	(r262350)
@@ -72,8 +72,8 @@ MALLOC_DEFINE(M_PPTMSIX, "pptmsix", "Pas
 
 struct pptintr_arg {				/* pptintr(pptintr_arg) */
 	struct pptdev	*pptdev;
-	int		vec;
-	int 		vcpu;
+	uint64_t	addr;
+	uint64_t	msg_data;
 };
 
 static struct pptdev {
@@ -412,16 +412,14 @@ ppt_map_mmio(struct vm *vm, int bus, int
 static int
 pptintr(void *arg)
 {
-	int vec;
 	struct pptdev *ppt;
 	struct pptintr_arg *pptarg;
 	
 	pptarg = arg;
 	ppt = pptarg->pptdev;
-	vec = pptarg->vec;
 
 	if (ppt->vm != NULL)
-		lapic_intr_edge(ppt->vm, pptarg->vcpu, vec);
+		lapic_intr_msi(ppt->vm, pptarg->addr, pptarg->msg_data);
 	else {
 		/*
 		 * XXX
@@ -441,15 +439,13 @@ pptintr(void *arg)
 
 int
 ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func,
-	      int destcpu, int vector, int numvec)
+	      uint64_t addr, uint64_t msg, int numvec)
 {
 	int i, rid, flags;
 	int msi_count, startrid, error, tmp;
 	struct pptdev *ppt;
 
-	if ((destcpu >= VM_MAXCPU || destcpu < 0) ||
-	    (vector < 0 || vector > 255) ||
-	    (numvec < 0 || numvec > MAX_MSIMSGS))
+	if (numvec < 0 || numvec > MAX_MSIMSGS)
 		return (EINVAL);
 
 	ppt = ppt_find(bus, slot, func);
@@ -513,8 +509,8 @@ ppt_setup_msi(struct vm *vm, int vcpu, i
 			break;
 
 		ppt->msi.arg[i].pptdev = ppt;
-		ppt->msi.arg[i].vec = vector + i;
-		ppt->msi.arg[i].vcpu = destcpu;
+		ppt->msi.arg[i].addr = addr;
+		ppt->msi.arg[i].msg_data = msg + i;
 
 		error = bus_setup_intr(ppt->dev, ppt->msi.res[i],
 				       INTR_TYPE_NET | INTR_MPSAFE,
@@ -534,7 +530,7 @@ ppt_setup_msi(struct vm *vm, int vcpu, i
 
 int
 ppt_setup_msix(struct vm *vm, int vcpu, int bus, int slot, int func,
-	       int idx, uint32_t msg, uint32_t vector_control, uint64_t addr)
+	       int idx, uint64_t addr, uint64_t msg, uint32_t vector_control)
 {
 	struct pptdev *ppt;
 	struct pci_devinfo *dinfo;
@@ -605,8 +601,8 @@ ppt_setup_msix(struct vm *vm, int vcpu, 
 			return (ENXIO);
 	
 		ppt->msix.arg[idx].pptdev = ppt;
-		ppt->msix.arg[idx].vec = msg & 0xFF;
-		ppt->msix.arg[idx].vcpu = (addr >> 12) & 0xFF;
+		ppt->msix.arg[idx].addr = addr;
+		ppt->msix.arg[idx].msg_data = msg;
 	
 		/* Setup the MSI-X interrupt */
 		error = bus_setup_intr(ppt->dev, ppt->msix.res[idx],

Modified: stable/10/sys/amd64/vmm/io/ppt.h
==============================================================================
--- stable/10/sys/amd64/vmm/io/ppt.h	Sat Feb 22 23:34:39 2014	(r262349)
+++ stable/10/sys/amd64/vmm/io/ppt.h	Sun Feb 23 00:46:05 2014	(r262350)
@@ -33,9 +33,9 @@ int	ppt_unassign_all(struct vm *vm);
 int	ppt_map_mmio(struct vm *vm, int bus, int slot, int func,
 		     vm_paddr_t gpa, size_t len, vm_paddr_t hpa);
 int	ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func,
-		      int destcpu, int vector, int numvec);
+		      uint64_t addr, uint64_t msg, int numvec);
 int	ppt_setup_msix(struct vm *vm, int vcpu, int bus, int slot, int func,
-		int idx, uint32_t msg, uint32_t vector_control, uint64_t addr);
+		int idx, uint64_t addr, uint64_t msg, uint32_t vector_control);
 int	ppt_num_devices(struct vm *vm);
 boolean_t ppt_is_mmio(struct vm *vm, vm_paddr_t gpa);
 

Modified: stable/10/sys/amd64/vmm/io/vhpet.c
==============================================================================
--- stable/10/sys/amd64/vmm/io/vhpet.c	Sat Feb 22 23:34:39 2014	(r262349)
+++ stable/10/sys/amd64/vmm/io/vhpet.c	Sun Feb 23 00:46:05 2014	(r262350)
@@ -240,8 +240,7 @@ vhpet_timer_edge_trig(struct vhpet *vhpe
 static void
 vhpet_timer_interrupt(struct vhpet *vhpet, int n)
 {
-	int apicid, vector, vcpuid, pin;
-	cpuset_t dmask;
+	int pin;
 
 	/* If interrupts are not enabled for this timer then just return. */
 	if (!vhpet_timer_interrupt_enabled(vhpet, n))
@@ -256,26 +255,8 @@ vhpet_timer_interrupt(struct vhpet *vhpe
 	}
 
 	if (vhpet_timer_msi_enabled(vhpet, n)) {
-		/*
-		 * XXX should have an API 'vlapic_deliver_msi(vm, addr, data)'
-		 * - assuming physical delivery mode
-		 * - no need to interpret contents of 'msireg' here
-		 */
-		vector = vhpet->timer[n].msireg & 0xff;
-		apicid = (vhpet->timer[n].msireg >> (32 + 12)) & 0xff;
-		if (apicid != 0xff) {
-			/* unicast */
-			vcpuid = vm_apicid2vcpuid(vhpet->vm, apicid);
-			lapic_intr_edge(vhpet->vm, vcpuid, vector);
-		} else {
-			/* broadcast */
-			dmask = vm_active_cpus(vhpet->vm);
-			while ((vcpuid = CPU_FFS(&dmask)) != 0) {
-				vcpuid--;
-				CPU_CLR(vcpuid, &dmask);
-				lapic_intr_edge(vhpet->vm, vcpuid, vector);
-			}
-		}
+		lapic_intr_msi(vhpet->vm, vhpet->timer[n].msireg >> 32,
+		    vhpet->timer[n].msireg & 0xffffffff);
 		return;
 	}	
 

Modified: stable/10/sys/amd64/vmm/io/vioapic.c
==============================================================================
--- stable/10/sys/amd64/vmm/io/vioapic.c	Sat Feb 22 23:34:39 2014	(r262349)
+++ stable/10/sys/amd64/vmm/io/vioapic.c	Sun Feb 23 00:46:05 2014	(r262350)
@@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$");
 
 #include "vmm_ktr.h"
 #include "vmm_lapic.h"
+#include "vlapic.h"
 #include "vioapic.h"
 
 #define	IOREGSEL	0x00
@@ -91,25 +92,14 @@ pinstate_str(bool asserted)
 	else
 		return ("deasserted");
 }
-
-static const char *
-trigger_str(bool level)
-{
-
-	if (level)
-		return ("level");
-	else
-		return ("edge");
-}
 #endif
 
 static void
 vioapic_send_intr(struct vioapic *vioapic, int pin)
 {
-	int vector, apicid, vcpuid;
-	uint32_t low, high;
-	cpuset_t dmask;
-	bool level;
+	int vector, delmode;
+	uint32_t low, high, dest;
+	bool level, phys;
 
 	KASSERT(pin >= 0 && pin < REDIR_ENTRIES,
 	    ("vioapic_set_pinstate: invalid pin number %d", pin));
@@ -120,52 +110,20 @@ vioapic_send_intr(struct vioapic *vioapi
 	low = vioapic->rtbl[pin].reg;
 	high = vioapic->rtbl[pin].reg >> 32;
 
-	/*
-	 * XXX We only deal with:
-	 * - physical destination
-	 * - fixed delivery mode
-	 */
-	if ((low & IOART_DESTMOD) != IOART_DESTPHY) {
-		VIOAPIC_CTR2(vioapic, "ioapic pin%d: unsupported dest mode "
-		    "0x%08x", pin, low);
-		return;
-	}
-
-	if ((low & IOART_DELMOD) != IOART_DELFIXED) {
-		VIOAPIC_CTR2(vioapic, "ioapic pin%d: unsupported delivery mode "
-		    "0x%08x", pin, low);
-		return;
-	}
-
 	if ((low & IOART_INTMASK) == IOART_INTMSET) {
 		VIOAPIC_CTR1(vioapic, "ioapic pin%d: masked", pin);
 		return;
 	}
 
+	phys = ((low & IOART_DESTMOD) == IOART_DESTPHY);
+	delmode = low & IOART_DELMOD;
 	level = low & IOART_TRGRLVL ? true : false;
 	if (level)
 		vioapic->rtbl[pin].reg |= IOART_REM_IRR;
 
 	vector = low & IOART_INTVEC;
-	apicid = high >> APIC_ID_SHIFT;
-	if (apicid != 0xff) {
-		/* unicast */
-		vcpuid = vm_apicid2vcpuid(vioapic->vm, apicid);
-		VIOAPIC_CTR4(vioapic, "ioapic pin%d: %s triggered intr "
-		    "vector %d on vcpuid %d", pin, trigger_str(level),
-		    vector, vcpuid);
-		lapic_set_intr(vioapic->vm, vcpuid, vector, level);
-	} else {
-		/* broadcast */
-		VIOAPIC_CTR3(vioapic, "ioapic pin%d: %s triggered intr "
-		    "vector %d on all vcpus", pin, trigger_str(level), vector);
-		dmask = vm_active_cpus(vioapic->vm);
-		while ((vcpuid = CPU_FFS(&dmask)) != 0) {
-			vcpuid--;
-			CPU_CLR(vcpuid, &dmask);
-			lapic_set_intr(vioapic->vm, vcpuid, vector, level);
-		}
-	}
+	dest = high >> APIC_ID_SHIFT;
+	vlapic_deliver_intr(vioapic->vm, level, dest, phys, delmode, vector);
 }
 
 static void

Modified: stable/10/sys/amd64/vmm/io/vlapic.c
==============================================================================
--- stable/10/sys/amd64/vmm/io/vlapic.c	Sat Feb 22 23:34:39 2014	(r262349)
+++ stable/10/sys/amd64/vmm/io/vlapic.c	Sun Feb 23 00:46:05 2014	(r262350)
@@ -30,8 +30,10 @@
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
+#include <sys/lock.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
+#include <sys/mutex.h>
 #include <sys/systm.h>
 #include <sys/smp.h>
 
@@ -53,6 +55,9 @@ __FBSDID("$FreeBSD$");
 #define	VLAPIC_CTR1(vlapic, format, p1)					\
 	VCPU_CTR1((vlapic)->vm, (vlapic)->vcpuid, format, p1)
 
+#define	VLAPIC_CTR2(vlapic, format, p1, p2)				\
+	VCPU_CTR2((vlapic)->vm, (vlapic)->vcpuid, format, p1, p2)
+
 #define	VLAPIC_CTR_IRR(vlapic, msg)					\
 do {									\
 	uint32_t *irrptr = &(vlapic)->apic.irr0;			\
@@ -86,7 +91,7 @@ static MALLOC_DEFINE(M_VLAPIC, "vlapic",
 #define	PRIO(x)			((x) >> 4)
 
 #define VLAPIC_VERSION		(16)
-#define VLAPIC_MAXLVT_ENTRIES	(5)
+#define VLAPIC_MAXLVT_ENTRIES	(APIC_LVT_CMCI)
 
 #define	x2apic(vlapic)	(((vlapic)->msr_apicbase & APICBASE_X2APIC) ? 1 : 0)
 
@@ -100,12 +105,16 @@ struct vlapic {
 	struct vm		*vm;
 	int			vcpuid;
 
-	struct LAPIC		 apic;
+	struct LAPIC		apic;
 
-	int			 esr_update;
+	uint32_t		esr_pending;
+	int			esr_firing;
 
-	int			 divisor;
-	int			 ccr_ticks;
+	struct callout	callout;	/* vlapic timer */
+	struct bintime	timer_fire_bt;	/* callout expiry time */
+	struct bintime	timer_freq_bt;	/* timer frequency */
+	struct bintime	timer_period_bt; /* timer period */
+	struct mtx	timer_mtx;
 
 	/*
 	 * The 'isrvec_stk' is a stack of vectors injected by the local apic.
@@ -120,8 +129,101 @@ struct vlapic {
 	enum boot_state		boot_state;
 };
 
+/*
+ * The 'vlapic->timer_mtx' is used to provide mutual exclusion between the
+ * vlapic_callout_handler() and vcpu accesses to the following registers:
+ * - initial count register aka icr_timer
+ * - current count register aka ccr_timer
+ * - divide config register aka dcr_timer
+ * - timer LVT register
+ *
+ * Note that the vlapic_callout_handler() does not write to any of these
+ * registers so they can be safely read from the vcpu context without locking.
+ */
+#define	VLAPIC_TIMER_LOCK(vlapic)	mtx_lock_spin(&((vlapic)->timer_mtx))
+#define	VLAPIC_TIMER_UNLOCK(vlapic)	mtx_unlock_spin(&((vlapic)->timer_mtx))
+#define	VLAPIC_TIMER_LOCKED(vlapic)	mtx_owned(&((vlapic)->timer_mtx))
+
 #define VLAPIC_BUS_FREQ	tsc_freq
 
+static __inline uint32_t
+vlapic_get_id(struct vlapic *vlapic)
+{
+
+	if (x2apic(vlapic))
+		return (vlapic->vcpuid);
+	else
+		return (vlapic->vcpuid << 24);
+}
+
+static __inline uint32_t
+vlapic_get_ldr(struct vlapic *vlapic)
+{
+	struct LAPIC *lapic;
+	int apicid;
+	uint32_t ldr;
+
+	lapic = &vlapic->apic;
+	if (x2apic(vlapic)) {
+		apicid = vlapic_get_id(vlapic);
+		ldr = 1 << (apicid & 0xf);
+		ldr |= (apicid & 0xffff0) << 12;
+		return (ldr);
+	} else
+		return (lapic->ldr);
+}
+
+static __inline uint32_t
+vlapic_get_dfr(struct vlapic *vlapic)
+{
+	struct LAPIC *lapic;
+
+	lapic = &vlapic->apic;
+	if (x2apic(vlapic))
+		return (0);
+	else
+		return (lapic->dfr);
+}
+
+static void
+vlapic_set_dfr(struct vlapic *vlapic, uint32_t data)
+{
+	uint32_t dfr;
+	struct LAPIC *lapic;
+	
+	if (x2apic(vlapic)) {
+		VM_CTR1(vlapic->vm, "write to DFR in x2apic mode: %#x", data);
+		return;
+	}
+
+	lapic = &vlapic->apic;
+	dfr = (lapic->dfr & APIC_DFR_RESERVED) | (data & APIC_DFR_MODEL_MASK);
+	if ((dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_FLAT)
+		VLAPIC_CTR0(vlapic, "vlapic DFR in Flat Model");
+	else if ((dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_CLUSTER)
+		VLAPIC_CTR0(vlapic, "vlapic DFR in Cluster Model");
+	else
+		VLAPIC_CTR1(vlapic, "vlapic DFR in Unknown Model %#x", dfr);
+
+	lapic->dfr = dfr;
+}
+
+static void
+vlapic_set_ldr(struct vlapic *vlapic, uint32_t data)
+{
+	struct LAPIC *lapic;
+
+	/* LDR is read-only in x2apic mode */
+	if (x2apic(vlapic)) {
+		VLAPIC_CTR1(vlapic, "write to LDR in x2apic mode: %#x", data);
+		return;
+	}
+
+	lapic = &vlapic->apic;
+	lapic->ldr = data & ~APIC_LDR_RESERVED;
+	VLAPIC_CTR1(vlapic, "vlapic LDR set to %#x", lapic->ldr);
+}
+
 static int
 vlapic_timer_divisor(uint32_t dcr)
 {
@@ -167,48 +269,92 @@ vlapic_dump_lvt(uint32_t offset, uint32_
 }
 #endif
 
-static uint64_t
+static uint32_t
 vlapic_get_ccr(struct vlapic *vlapic)
 {
-	struct LAPIC    *lapic = &vlapic->apic;
-	return lapic->ccr_timer;
+	struct bintime bt_now, bt_rem;
+	struct LAPIC *lapic;
+	uint32_t ccr;
+	
+	ccr = 0;
+	lapic = &vlapic->apic;
+
+	VLAPIC_TIMER_LOCK(vlapic);
+	if (callout_active(&vlapic->callout)) {
+		/*
+		 * If the timer is scheduled to expire in the future then
+		 * compute the value of 'ccr' based on the remaining time.
+		 */
+		binuptime(&bt_now);
+		if (bintime_cmp(&vlapic->timer_fire_bt, &bt_now, >)) {
+			bt_rem = vlapic->timer_fire_bt;
+			bintime_sub(&bt_rem, &bt_now);
+			ccr += bt_rem.sec * BT2FREQ(&vlapic->timer_freq_bt);
+			ccr += bt_rem.frac / vlapic->timer_freq_bt.frac;
+		}
+	}
+	KASSERT(ccr <= lapic->icr_timer, ("vlapic_get_ccr: invalid ccr %#x, "
+	    "icr_timer is %#x", ccr, lapic->icr_timer));
+	VLAPIC_CTR2(vlapic, "vlapic ccr_timer = %#x, icr_timer = %#x",
+	    ccr, lapic->icr_timer);
+	VLAPIC_TIMER_UNLOCK(vlapic);
+	return (ccr);
 }
 
 static void
-vlapic_update_errors(struct vlapic *vlapic)
+vlapic_set_dcr(struct vlapic *vlapic, uint32_t dcr)
 {
-	struct LAPIC    *lapic = &vlapic->apic;
-	lapic->esr = 0; // XXX 
+	struct LAPIC *lapic;
+	int divisor;
+	
+	lapic = &vlapic->apic;
+	VLAPIC_TIMER_LOCK(vlapic);
+
+	lapic->dcr_timer = dcr;
+	divisor = vlapic_timer_divisor(dcr);
+	VLAPIC_CTR2(vlapic, "vlapic dcr_timer=%#x, divisor=%d", dcr, divisor);
+
+	/*
+	 * Update the timer frequency and the timer period.
+	 *
+	 * XXX changes to the frequency divider will not take effect until
+	 * the timer is reloaded.
+	 */
+	FREQ2BT(VLAPIC_BUS_FREQ / divisor, &vlapic->timer_freq_bt);
+	vlapic->timer_period_bt = vlapic->timer_freq_bt;
+	bintime_mul(&vlapic->timer_period_bt, lapic->icr_timer);
+
+	VLAPIC_TIMER_UNLOCK(vlapic);
 }
 
 static void
-vlapic_init_ipi(struct vlapic *vlapic)
+vlapic_update_errors(struct vlapic *vlapic)
 {
 	struct LAPIC    *lapic = &vlapic->apic;
-	lapic->version = VLAPIC_VERSION;
-	lapic->version |= (VLAPIC_MAXLVT_ENTRIES < MAXLVTSHIFT);
-	lapic->dfr = 0xffffffff;
-	lapic->svr = APIC_SVR_VECTOR;
-	vlapic_mask_lvts(&lapic->lvt_timer, VLAPIC_MAXLVT_ENTRIES+1);
+	lapic->esr = vlapic->esr_pending;
+	vlapic->esr_pending = 0;
 }
 
-static int
+static void
 vlapic_reset(struct vlapic *vlapic)
 {
-	struct LAPIC	*lapic = &vlapic->apic;
+	struct LAPIC *lapic;
+	
+	lapic = &vlapic->apic;
+	bzero(lapic, sizeof(struct LAPIC));
 
-	memset(lapic, 0, sizeof(*lapic));
-	lapic->apr = vlapic->vcpuid;
-	vlapic_init_ipi(vlapic);
-	vlapic->divisor = vlapic_timer_divisor(lapic->dcr_timer);
+	lapic->version = VLAPIC_VERSION;
+	lapic->version |= (VLAPIC_MAXLVT_ENTRIES << MAXLVTSHIFT);
+	lapic->dfr = 0xffffffff;
+	lapic->svr = APIC_SVR_VECTOR;
+	vlapic_mask_lvts(&lapic->lvt_timer, 6);
+	vlapic_mask_lvts(&lapic->lvt_cmci, 1);
+	vlapic_set_dcr(vlapic, 0);
 
 	if (vlapic->vcpuid == 0)
 		vlapic->boot_state = BS_RUNNING;	/* BSP */
 	else
 		vlapic->boot_state = BS_INIT;		/* AP */
-	
-	return 0;
-
 }
 
 void
@@ -221,6 +367,17 @@ vlapic_set_intr_ready(struct vlapic *vla
 	if (vector < 0 || vector >= 256)
 		panic("vlapic_set_intr_ready: invalid vector %d\n", vector);
 
+	if (!(lapic->svr & APIC_SVR_ENABLE)) {
+		VLAPIC_CTR1(vlapic, "vlapic is software disabled, ignoring "
+		    "interrupt %d", vector);
+		return;
+	}
+
+	if (vector < 16) {
+		vlapic_set_error(vlapic, APIC_ESR_RECEIVE_ILLEGAL_VECTOR);
+		return;
+	}
+		
 	idx = (vector / 32) * 4;
 	mask = 1 << (vector % 32);
 
@@ -241,39 +398,93 @@ vlapic_set_intr_ready(struct vlapic *vla
 	VLAPIC_CTR_IRR(vlapic, "vlapic_set_intr_ready");
 }
 
+static __inline uint32_t *
+vlapic_get_lvtptr(struct vlapic *vlapic, uint32_t offset)
+{
+	struct LAPIC	*lapic = &vlapic->apic;
+	int 		 i;
+
+	switch (offset) {
+	case APIC_OFFSET_CMCI_LVT:
+		return (&lapic->lvt_cmci);
+	case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT:
+		i = (offset - APIC_OFFSET_TIMER_LVT) >> 2;
+		return ((&lapic->lvt_timer) + i);;
+	default:
+		panic("vlapic_get_lvt: invalid LVT\n");
+	}
+}
+
+static __inline uint32_t
+vlapic_get_lvt(struct vlapic *vlapic, uint32_t offset)
+{
+
+	return (*vlapic_get_lvtptr(vlapic, offset));
+}
+
 static void
-vlapic_start_timer(struct vlapic *vlapic, uint32_t elapsed)
+vlapic_set_lvt(struct vlapic *vlapic, uint32_t offset, uint32_t val)
 {
-	uint32_t icr_timer;
+	uint32_t *lvtptr, mask;
+	struct LAPIC *lapic;
+	
+	lapic = &vlapic->apic;
+	lvtptr = vlapic_get_lvtptr(vlapic, offset);	
 
-	icr_timer = vlapic->apic.icr_timer;
+	if (offset == APIC_OFFSET_TIMER_LVT)
+		VLAPIC_TIMER_LOCK(vlapic);
 
-	vlapic->ccr_ticks = ticks;
-	if (elapsed < icr_timer)
-		vlapic->apic.ccr_timer = icr_timer - elapsed;
-	else {
-		/*
-		 * This can happen when the guest is trying to run its local
-		 * apic timer higher that the setting of 'hz' in the host.
-		 *
-		 * We deal with this by running the guest local apic timer
-		 * at the rate of the host's 'hz' setting.
-		 */
-		vlapic->apic.ccr_timer = 0;
+	if (!(lapic->svr & APIC_SVR_ENABLE))
+		val |= APIC_LVT_M;
+	mask = APIC_LVT_M | APIC_LVT_DS | APIC_LVT_VECTOR;
+	switch (offset) {
+	case APIC_OFFSET_TIMER_LVT:
+		mask |= APIC_LVTT_TM;
+		break;
+	case APIC_OFFSET_ERROR_LVT:
+		break;
+	case APIC_OFFSET_LINT0_LVT:
+	case APIC_OFFSET_LINT1_LVT:
+		mask |= APIC_LVT_TM | APIC_LVT_RIRR | APIC_LVT_IIPP;
+		/* FALLTHROUGH */
+	default:
+		mask |= APIC_LVT_DM;
+		break;
 	}
+	*lvtptr = val & mask;
+
+	if (offset == APIC_OFFSET_TIMER_LVT)
+		VLAPIC_TIMER_UNLOCK(vlapic);
 }
 
-static __inline uint32_t *
-vlapic_get_lvt(struct vlapic *vlapic, uint32_t offset)
+static int
+vlapic_fire_lvt(struct vlapic *vlapic, uint32_t lvt)
 {
-	struct LAPIC	*lapic = &vlapic->apic;
-	int 		 i;
+	uint32_t vec, mode;
 
-	if (offset < APIC_OFFSET_TIMER_LVT || offset > APIC_OFFSET_ERROR_LVT) {
-		panic("vlapic_get_lvt: invalid LVT\n");
+	if (lvt & APIC_LVT_M)
+		return (0);
+
+	vec = lvt & APIC_LVT_VECTOR;
+	mode = lvt & APIC_LVT_DM;
+
+	switch (mode) {
+	case APIC_LVT_DM_FIXED:
+		if (vec < 16) {
+			vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR);
+			return (0);
+		}
+		vlapic_set_intr_ready(vlapic, vec, false);
+		vcpu_notify_event(vlapic->vm, vlapic->vcpuid);
+		break;
+	case APIC_LVT_DM_NMI:
+		vm_inject_nmi(vlapic->vm, vlapic->vcpuid);
+		break;
+	default:
+		// Other modes ignored
+		return (0);
 	}
-	i = (offset - APIC_OFFSET_TIMER_LVT) >> 2;
-	return ((&lapic->lvt_timer) + i);;
+	return (1);
 }
 
 #if 1
@@ -398,44 +609,314 @@ vlapic_process_eoi(struct vlapic *vlapic
 }
 
 static __inline int
-vlapic_get_lvt_field(uint32_t *lvt, uint32_t mask)
+vlapic_get_lvt_field(uint32_t lvt, uint32_t mask)
 {
-	return (*lvt & mask);
+
+	return (lvt & mask);
 }
 
 static __inline int
 vlapic_periodic_timer(struct vlapic *vlapic)
 {
-	uint32_t *lvt;
+	uint32_t lvt;
 	
 	lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT);
 
 	return (vlapic_get_lvt_field(lvt, APIC_LVTT_TM_PERIODIC));
 }
 
+static VMM_STAT(VLAPIC_INTR_ERROR, "error interrupts generated by vlapic");
+
+void
+vlapic_set_error(struct vlapic *vlapic, uint32_t mask)
+{
+	uint32_t lvt;
+
+	vlapic->esr_pending |= mask;
+	if (vlapic->esr_firing)
+		return;
+	vlapic->esr_firing = 1;
+
+	// The error LVT always uses the fixed delivery mode.
+	lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_ERROR_LVT);
+	if (vlapic_fire_lvt(vlapic, lvt | APIC_LVT_DM_FIXED)) {
+		vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_ERROR, 1);
+	}
+	vlapic->esr_firing = 0;
+}
+
 static VMM_STAT(VLAPIC_INTR_TIMER, "timer interrupts generated by vlapic");
 
 static void
 vlapic_fire_timer(struct vlapic *vlapic)
 {
-	int vector;
-	uint32_t *lvt;
+	uint32_t lvt;
+
+	KASSERT(VLAPIC_TIMER_LOCKED(vlapic), ("vlapic_fire_timer not locked"));
 	
+	// The timer LVT always uses the fixed delivery mode.
 	lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT);
-
-	if (!vlapic_get_lvt_field(lvt, APIC_LVTT_M)) {
+	if (vlapic_fire_lvt(vlapic, lvt | APIC_LVT_DM_FIXED)) {
 		vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_TIMER, 1);
-		vector = vlapic_get_lvt_field(lvt,APIC_LVTT_VECTOR);
-		vlapic_set_intr_ready(vlapic, vector, false);
+	}
+}
+
+static VMM_STAT(VLAPIC_INTR_CMC,
+    "corrected machine check interrupts generated by vlapic");
+
+void
+vlapic_fire_cmci(struct vlapic *vlapic)
+{
+	uint32_t lvt;
+
+	lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_CMCI_LVT);
+	if (vlapic_fire_lvt(vlapic, lvt)) {
+		vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_CMC, 1);
+	}
+}
+
+static VMM_STAT_ARRAY(LVTS_TRIGGERRED, VLAPIC_MAXLVT_ENTRIES,
+    "lvts triggered");
+
+int
+vlapic_trigger_lvt(struct vlapic *vlapic, int vector)
+{
+	uint32_t lvt;
+
+	switch (vector) {
+	case APIC_LVT_LINT0:
+		lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_LINT0_LVT);
+		break;
+	case APIC_LVT_LINT1:
+		lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_LINT1_LVT);
+		break;
+	case APIC_LVT_TIMER:
+		lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT);
+		lvt |= APIC_LVT_DM_FIXED;
+		break;
+	case APIC_LVT_ERROR:
+		lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_ERROR_LVT);
+		lvt |= APIC_LVT_DM_FIXED;
+		break;
+	case APIC_LVT_PMC:
+		lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_PERF_LVT);
+		break;
+	case APIC_LVT_THERMAL:
+		lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_THERM_LVT);
+		break;
+	case APIC_LVT_CMCI:
+		lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_CMCI_LVT);
+		break;
+	default:
+		return (EINVAL);
+	}
+	if (vlapic_fire_lvt(vlapic, lvt)) {
+		vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid,
+		    LVTS_TRIGGERRED, vector, 1);
+	}
+	return (0);

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-all mailing list