git: 3fc174845c9c - main - Revert "vmm: permit some IPIs to be handled by userspace"
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Fri, 09 Sep 2022 13:55:34 UTC
The branch main has been updated by manu:
URL: https://cgit.FreeBSD.org/src/commit/?id=3fc174845c9c1da1c1f2b2fa3317f69d74a2ea36
commit 3fc174845c9c1da1c1f2b2fa3317f69d74a2ea36
Author: Emmanuel Vadot <manu@FreeBSD.org>
AuthorDate: 2022-09-09 13:55:01 +0000
Commit: Emmanuel Vadot <manu@FreeBSD.org>
CommitDate: 2022-09-09 13:55:01 +0000
Revert "vmm: permit some IPIs to be handled by userspace"
This reverts commit a5a918b7a906eaa88e0833eac70a15989d535b02.
This cause some problem with vm using bhyveload.
Reported by: pho, kp
---
sys/amd64/include/vmm.h | 8 --
sys/amd64/vmm/amd/svm.c | 10 ---
sys/amd64/vmm/intel/vmx.c | 8 --
sys/amd64/vmm/io/vlapic.c | 166 +++++++++++++++++------------------------
sys/amd64/vmm/io/vlapic_priv.h | 2 -
usr.sbin/bhyve/bhyverun.c | 34 ---------
usr.sbin/bhyve/spinup_ap.c | 3 -
7 files changed, 69 insertions(+), 162 deletions(-)
diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h
index 37a74f053fb3..dcf862c34264 100644
--- a/sys/amd64/include/vmm.h
+++ b/sys/amd64/include/vmm.h
@@ -31,7 +31,6 @@
#ifndef _VMM_H_
#define _VMM_H_
-#include <sys/cpuset.h>
#include <sys/sdt.h>
#include <x86/segments.h>
@@ -484,7 +483,6 @@ enum vm_cap_type {
VM_CAP_BPT_EXIT,
VM_CAP_RDPID,
VM_CAP_RDTSCP,
- VM_CAP_IPI_EXIT,
VM_CAP_MAX
};
@@ -632,7 +630,6 @@ enum vm_exitcode {
VM_EXITCODE_DEBUG,
VM_EXITCODE_VMINSN,
VM_EXITCODE_BPT,
- VM_EXITCODE_IPI,
VM_EXITCODE_MAX
};
@@ -740,11 +737,6 @@ struct vm_exit {
struct {
enum vm_suspend_how how;
} suspended;
- struct {
- uint32_t mode;
- uint8_t vector;
- cpuset_t dmask;
- } ipi;
struct vm_task_switch task_switch;
} u;
};
diff --git a/sys/amd64/vmm/amd/svm.c b/sys/amd64/vmm/amd/svm.c
index 4195cc5bd049..35e8d9833d0e 100644
--- a/sys/amd64/vmm/amd/svm.c
+++ b/sys/amd64/vmm/amd/svm.c
@@ -2315,7 +2315,6 @@ static int
svm_setcap(void *arg, int vcpu, int type, int val)
{
struct svm_softc *sc;
- struct vlapic *vlapic;
int error;
sc = arg;
@@ -2334,10 +2333,6 @@ svm_setcap(void *arg, int vcpu, int type, int val)
if (val == 0)
error = EINVAL;
break;
- case VM_CAP_IPI_EXIT:
- vlapic = vm_lapic(sc->vm, vcpu);
- vlapic->ipi_exit = val;
- break;
default:
error = ENOENT;
break;
@@ -2349,7 +2344,6 @@ static int
svm_getcap(void *arg, int vcpu, int type, int *retval)
{
struct svm_softc *sc;
- struct vlapic *vlapic;
int error;
sc = arg;
@@ -2367,10 +2361,6 @@ svm_getcap(void *arg, int vcpu, int type, int *retval)
case VM_CAP_UNRESTRICTED_GUEST:
*retval = 1; /* unrestricted guest is always enabled */
break;
- case VM_CAP_IPI_EXIT:
- vlapic = vm_lapic(sc->vm, vcpu);
- *retval = vlapic->ipi_exit;
- break;
default:
error = ENOENT;
break;
diff --git a/sys/amd64/vmm/intel/vmx.c b/sys/amd64/vmm/intel/vmx.c
index 857028dcd0f1..64544a6e7955 100644
--- a/sys/amd64/vmm/intel/vmx.c
+++ b/sys/amd64/vmm/intel/vmx.c
@@ -3504,7 +3504,6 @@ vmx_getcap(void *arg, int vcpu, int type, int *retval)
ret = 0;
break;
case VM_CAP_BPT_EXIT:
- case VM_CAP_IPI_EXIT:
ret = 0;
break;
default:
@@ -3522,7 +3521,6 @@ vmx_setcap(void *arg, int vcpu, int type, int val)
{
struct vmx *vmx = arg;
struct vmcs *vmcs = &vmx->vmcs[vcpu];
- struct vlapic *vlapic;
uint32_t baseval;
uint32_t *pptr;
int error;
@@ -3601,12 +3599,6 @@ vmx_setcap(void *arg, int vcpu, int type, int val)
reg = VMCS_EXCEPTION_BITMAP;
}
break;
- case VM_CAP_IPI_EXIT:
- retval = 0;
-
- vlapic = vm_lapic(vmx->vm, vcpu);
- vlapic->ipi_exit = val;
- break;
default:
break;
}
diff --git a/sys/amd64/vmm/io/vlapic.c b/sys/amd64/vmm/io/vlapic.c
index dc9b00d2316e..9599b4b4e62c 100644
--- a/sys/amd64/vmm/io/vlapic.c
+++ b/sys/amd64/vmm/io/vlapic.c
@@ -84,7 +84,6 @@ __FBSDID("$FreeBSD$");
static void vlapic_set_error(struct vlapic *, uint32_t, bool);
static void vlapic_callout_handler(void *arg);
-static void vlapic_reset(struct vlapic *vlapic);
static __inline uint32_t
vlapic_get_id(struct vlapic *vlapic)
@@ -958,9 +957,9 @@ vlapic_icrlo_write_handler(struct vlapic *vlapic, bool *retu)
{
int i;
bool phys;
- cpuset_t dmask, ipimask;
+ cpuset_t dmask;
uint64_t icrval;
- uint32_t dest, vec, mode, shorthand;
+ uint32_t dest, vec, mode;
struct vlapic *vlapic2;
struct vm_exit *vmexit;
struct LAPIC *lapic;
@@ -976,122 +975,97 @@ vlapic_icrlo_write_handler(struct vlapic *vlapic, bool *retu)
dest = icrval >> (32 + 24);
vec = icrval & APIC_VECTOR_MASK;
mode = icrval & APIC_DELMODE_MASK;
- phys = (icrval & APIC_DESTMODE_LOG) == 0;
- shorthand = icrval & APIC_DEST_MASK;
-
- maxcpus = vm_get_maxcpus(vlapic->vm);
-
- VLAPIC_CTR2(vlapic, "icrlo 0x%016lx triggered ipi %d", icrval, vec);
-
- switch (shorthand) {
- case APIC_DEST_DESTFLD:
- vlapic_calcdest(vlapic->vm, &dmask, dest, phys, false, x2apic(vlapic));
- break;
- case APIC_DEST_SELF:
- CPU_SETOF(vlapic->vcpuid, &dmask);
- break;
- case APIC_DEST_ALLISELF:
- dmask = vm_active_cpus(vlapic->vm);
- break;
- case APIC_DEST_ALLESELF:
- dmask = vm_active_cpus(vlapic->vm);
- CPU_CLR(vlapic->vcpuid, &dmask);
- break;
- default:
- __assert_unreachable();
+ if (mode == APIC_DELMODE_FIXED && vec < 16) {
+ vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR, false);
+ VLAPIC_CTR1(vlapic, "Ignoring invalid IPI %d", vec);
+ return (0);
}
- /*
- * ipimask is a set of vCPUs needing userland handling of the current
- * IPI.
- */
- CPU_ZERO(&ipimask);
+ VLAPIC_CTR2(vlapic, "icrlo 0x%016lx triggered ipi %d", icrval, vec);
- switch (mode) {
- case APIC_DELMODE_FIXED:
- if (vec < 16) {
- vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR,
- false);
- VLAPIC_CTR1(vlapic, "Ignoring invalid IPI %d", vec);
- return (0);
+ if (mode == APIC_DELMODE_FIXED || mode == APIC_DELMODE_NMI) {
+ switch (icrval & APIC_DEST_MASK) {
+ case APIC_DEST_DESTFLD:
+ phys = ((icrval & APIC_DESTMODE_LOG) == 0);
+ vlapic_calcdest(vlapic->vm, &dmask, dest, phys, false,
+ x2apic(vlapic));
+ break;
+ case APIC_DEST_SELF:
+ CPU_SETOF(vlapic->vcpuid, &dmask);
+ break;
+ case APIC_DEST_ALLISELF:
+ dmask = vm_active_cpus(vlapic->vm);
+ break;
+ case APIC_DEST_ALLESELF:
+ dmask = vm_active_cpus(vlapic->vm);
+ CPU_CLR(vlapic->vcpuid, &dmask);
+ break;
+ default:
+ CPU_ZERO(&dmask); /* satisfy gcc */
+ break;
}
CPU_FOREACH_ISSET(i, &dmask) {
- lapic_intr_edge(vlapic->vm, i, vec);
- vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid,
- IPIS_SENT, i, 1);
- VLAPIC_CTR2(vlapic,
- "vlapic sending ipi %d to vcpuid %d", vec, i);
+ if (mode == APIC_DELMODE_FIXED) {
+ lapic_intr_edge(vlapic->vm, i, vec);
+ vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid,
+ IPIS_SENT, i, 1);
+ VLAPIC_CTR2(vlapic, "vlapic sending ipi %d "
+ "to vcpuid %d", vec, i);
+ } else {
+ vm_inject_nmi(vlapic->vm, i);
+ VLAPIC_CTR1(vlapic, "vlapic sending ipi nmi "
+ "to vcpuid %d", i);
+ }
}
- break;
- case APIC_DELMODE_NMI:
- CPU_FOREACH_ISSET(i, &dmask) {
- vm_inject_nmi(vlapic->vm, i);
- VLAPIC_CTR1(vlapic,
- "vlapic sending ipi nmi to vcpuid %d", i);
- }
+ return (0); /* handled completely in the kernel */
+ }
- break;
- case APIC_DELMODE_INIT:
+ maxcpus = vm_get_maxcpus(vlapic->vm);
+ if (mode == APIC_DELMODE_INIT) {
if ((icrval & APIC_LEVEL_MASK) == APIC_LEVEL_DEASSERT)
- break;
+ return (0);
- CPU_FOREACH_ISSET(i, &dmask) {
- vlapic2 = vm_lapic(vlapic->vm, i);
- vlapic2->boot_state = BS_SIPI;
- CPU_SET(i, &ipimask);
+ if (vlapic->vcpuid == 0 && dest != 0 && dest < maxcpus) {
+ vlapic2 = vm_lapic(vlapic->vm, dest);
+
+ /* move from INIT to waiting-for-SIPI state */
+ if (vlapic2->boot_state == BS_INIT) {
+ vlapic2->boot_state = BS_SIPI;
+ }
+
+ return (0);
}
+ }
+
+ if (mode == APIC_DELMODE_STARTUP) {
+ if (vlapic->vcpuid == 0 && dest != 0 && dest < maxcpus) {
+ vlapic2 = vm_lapic(vlapic->vm, dest);
- break;
- case APIC_DELMODE_STARTUP:
- CPU_FOREACH_ISSET(i, &dmask) {
- vlapic2 = vm_lapic(vlapic->vm, i);
/*
* Ignore SIPIs in any state other than wait-for-SIPI
*/
if (vlapic2->boot_state != BS_SIPI)
- continue;
- /*
- * TODO:
- * This should be triggered from userspace.
- */
- vlapic_reset(vlapic2);
- vlapic2->boot_state = BS_RUNNING;
- CPU_SET(i, &ipimask);
- }
+ return (0);
- break;
- default:
- return (1);
- }
-
- if (!CPU_EMPTY(&ipimask)) {
- vmexit = vm_exitinfo(vlapic->vm, vlapic->vcpuid);
- vmexit->exitcode = VM_EXITCODE_IPI;
- vmexit->u.ipi.mode = mode;
- vmexit->u.ipi.vector = vec;
- vmexit->u.ipi.dmask = dmask;
+ vlapic2->boot_state = BS_RUNNING;
- *retu = true;
+ *retu = true;
+ vmexit = vm_exitinfo(vlapic->vm, vlapic->vcpuid);
+ vmexit->exitcode = VM_EXITCODE_SPINUP_AP;
+ vmexit->u.spinup_ap.vcpu = dest;
+ vmexit->u.spinup_ap.rip = vec << PAGE_SHIFT;
- /*
- * Old bhyve versions don't support the IPI exit. Translate it
- * into the old style.
- */
- if (!vlapic->ipi_exit) {
- if (mode == APIC_DELMODE_STARTUP) {
- vmexit->exitcode = VM_EXITCODE_SPINUP_AP;
- vmexit->u.spinup_ap.vcpu = CPU_FFS(&ipimask) - 1;
- vmexit->u.spinup_ap.rip = vec << PAGE_SHIFT;
- } else {
- *retu = false;
- }
+ return (0);
}
}
- return (0);
+ /*
+ * This will cause a return to userland.
+ */
+ return (1);
}
void
@@ -1493,8 +1467,6 @@ vlapic_init(struct vlapic *vlapic)
if (vlapic->vcpuid == 0)
vlapic->msr_apicbase |= APICBASE_BSP;
- vlapic->ipi_exit = false;
-
vlapic_reset(vlapic);
}
diff --git a/sys/amd64/vmm/io/vlapic_priv.h b/sys/amd64/vmm/io/vlapic_priv.h
index 4b3e9009e68c..fe7965cb65d7 100644
--- a/sys/amd64/vmm/io/vlapic_priv.h
+++ b/sys/amd64/vmm/io/vlapic_priv.h
@@ -183,8 +183,6 @@ struct vlapic {
*/
uint32_t svr_last;
uint32_t lvt_last[VLAPIC_MAXLVT_INDEX + 1];
-
- bool ipi_exit;
};
void vlapic_init(struct vlapic *vlapic);
diff --git a/usr.sbin/bhyve/bhyverun.c b/usr.sbin/bhyve/bhyverun.c
index a1849519996c..550cc9d15477 100644
--- a/usr.sbin/bhyve/bhyverun.c
+++ b/usr.sbin/bhyve/bhyverun.c
@@ -46,7 +46,6 @@ __FBSDID("$FreeBSD$");
#endif
#include <amd64/vmm/intel/vmcs.h>
-#include <x86/apicreg.h>
#include <machine/atomic.h>
#include <machine/segments.h>
@@ -936,35 +935,6 @@ vmexit_breakpoint(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
return (VMEXIT_CONTINUE);
}
-static int
-vmexit_ipi(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
-{
- int error = -1;
- int i;
- switch (vmexit->u.ipi.mode) {
- case APIC_DELMODE_INIT:
- CPU_FOREACH_ISSET (i, &vmexit->u.ipi.dmask) {
- error = vm_suspend_cpu(ctx, i);
- if (error) {
- warnx("%s: failed to suspend cpu %d\n",
- __func__, i);
- break;
- }
- }
- break;
- case APIC_DELMODE_STARTUP:
- CPU_FOREACH_ISSET (i, &vmexit->u.ipi.dmask) {
- spinup_ap(ctx, i, vmexit->u.ipi.vector << PAGE_SHIFT);
- }
- error = 0;
- break;
- default:
- break;
- }
-
- return (error);
-}
-
static vmexit_handler_t handler[VM_EXITCODE_MAX] = {
[VM_EXITCODE_INOUT] = vmexit_inout,
[VM_EXITCODE_INOUT_STR] = vmexit_inout,
@@ -981,7 +951,6 @@ static vmexit_handler_t handler[VM_EXITCODE_MAX] = {
[VM_EXITCODE_TASK_SWITCH] = vmexit_task_switch,
[VM_EXITCODE_DEBUG] = vmexit_debug,
[VM_EXITCODE_BPT] = vmexit_breakpoint,
- [VM_EXITCODE_IPI] = vmexit_ipi,
};
static void
@@ -1182,9 +1151,6 @@ spinup_vcpu(struct vmctx *ctx, int vcpu, bool suspend)
error = vm_set_capability(ctx, vcpu, VM_CAP_UNRESTRICTED_GUEST, 1);
assert(error == 0);
- error = vm_set_capability(ctx, vcpu, VM_CAP_IPI_EXIT, 1);
- assert(error == 0);
-
fbsdrun_addcpu(ctx, vcpu, rip, suspend);
}
diff --git a/usr.sbin/bhyve/spinup_ap.c b/usr.sbin/bhyve/spinup_ap.c
index 438091e564e7..2b7e602f8003 100644
--- a/usr.sbin/bhyve/spinup_ap.c
+++ b/usr.sbin/bhyve/spinup_ap.c
@@ -98,9 +98,6 @@ spinup_ap(struct vmctx *ctx, int newcpu, uint64_t rip)
error = vm_set_capability(ctx, newcpu, VM_CAP_UNRESTRICTED_GUEST, 1);
assert(error == 0);
- error = vm_set_capability(ctx, newcpu, VM_CAP_IPI_EXIT, 1);
- assert(error == 0);
-
spinup_ap_realmode(ctx, newcpu, &rip);
vm_resume_cpu(ctx, newcpu);