Re: git: a5a918b7a906 - main - vmm: permit some IPIs to be handled by userspace
- In reply to: Emmanuel Vadot : "git: a5a918b7a906 - main - vmm: permit some IPIs to be handled by userspace"
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Wed, 07 Sep 2022 09:31:46 UTC
Am Wed, 7 Sep 2022 07:08:58 GMT
Emmanuel Vadot <manu@FreeBSD.org> schrieb:
> The branch main has been updated by manu:
>
> URL: https://cgit.FreeBSD.org/src/commit/?id=a5a918b7a906eaa88e0833eac70a15989d535b02
>
> commit a5a918b7a906eaa88e0833eac70a15989d535b02
> Author: Corvin Köhne <CorvinK@beckhoff.com>
> AuthorDate: 2022-09-07 07:07:03 +0000
> Commit: Emmanuel Vadot <manu@FreeBSD.org>
> CommitDate: 2022-09-07 07:07:03 +0000
>
> vmm: permit some IPIs to be handled by userspace
>
> Add VM_EXITCODE_IPI to permit returning unhandled IPIs to userland.
> INIT and Startup IPIs are now returned to userland. Due to backward
> compatibility reasons, a new capability is added for enabling
> VM_EXITCODE_IPI.
>
> MFC after: 2 weeks
> Differential Revision: https://reviews.freebsd.org/D35623
> Sponsored by: Beckhoff Automation GmbH & Co. KG
> ---
> sys/amd64/include/vmm.h | 8 ++
> sys/amd64/vmm/amd/svm.c | 10 +++
> sys/amd64/vmm/intel/vmx.c | 8 ++
> sys/amd64/vmm/io/vlapic.c | 166 ++++++++++++++++++++++++-----------------
> sys/amd64/vmm/io/vlapic_priv.h | 2 +
> usr.sbin/bhyve/bhyverun.c | 34 +++++++++
> usr.sbin/bhyve/spinup_ap.c | 3 +
> 7 files changed, 162 insertions(+), 69 deletions(-)
>
> diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h
> index dcf862c34264..37a74f053fb3 100644
> --- a/sys/amd64/include/vmm.h
> +++ b/sys/amd64/include/vmm.h
> @@ -31,6 +31,7 @@
> #ifndef _VMM_H_
> #define _VMM_H_
>
> +#include <sys/cpuset.h>
> #include <sys/sdt.h>
> #include <x86/segments.h>
>
> @@ -483,6 +484,7 @@ enum vm_cap_type {
> VM_CAP_BPT_EXIT,
> VM_CAP_RDPID,
> VM_CAP_RDTSCP,
> + VM_CAP_IPI_EXIT,
> VM_CAP_MAX
> };
>
> @@ -630,6 +632,7 @@ enum vm_exitcode {
> VM_EXITCODE_DEBUG,
> VM_EXITCODE_VMINSN,
> VM_EXITCODE_BPT,
> + VM_EXITCODE_IPI,
> VM_EXITCODE_MAX
> };
>
> @@ -737,6 +740,11 @@ struct vm_exit {
> struct {
> enum vm_suspend_how how;
> } suspended;
> + struct {
> + uint32_t mode;
> + uint8_t vector;
> + cpuset_t dmask;
> + } ipi;
> struct vm_task_switch task_switch;
> } u;
> };
> diff --git a/sys/amd64/vmm/amd/svm.c b/sys/amd64/vmm/amd/svm.c
> index 35e8d9833d0e..4195cc5bd049 100644
> --- a/sys/amd64/vmm/amd/svm.c
> +++ b/sys/amd64/vmm/amd/svm.c
> @@ -2315,6 +2315,7 @@ static int
> svm_setcap(void *arg, int vcpu, int type, int val)
> {
> struct svm_softc *sc;
> + struct vlapic *vlapic;
> int error;
>
> sc = arg;
> @@ -2333,6 +2334,10 @@ svm_setcap(void *arg, int vcpu, int type, int val)
> if (val == 0)
> error = EINVAL;
> break;
> + case VM_CAP_IPI_EXIT:
> + vlapic = vm_lapic(sc->vm, vcpu);
> + vlapic->ipi_exit = val;
> + break;
> default:
> error = ENOENT;
> break;
> @@ -2344,6 +2349,7 @@ static int
> svm_getcap(void *arg, int vcpu, int type, int *retval)
> {
> struct svm_softc *sc;
> + struct vlapic *vlapic;
> int error;
>
> sc = arg;
> @@ -2361,6 +2367,10 @@ svm_getcap(void *arg, int vcpu, int type, int *retval)
> case VM_CAP_UNRESTRICTED_GUEST:
> *retval = 1; /* unrestricted guest is always enabled */
> break;
> + case VM_CAP_IPI_EXIT:
> + vlapic = vm_lapic(sc->vm, vcpu);
> + *retval = vlapic->ipi_exit;
> + break;
> default:
> error = ENOENT;
> break;
> diff --git a/sys/amd64/vmm/intel/vmx.c b/sys/amd64/vmm/intel/vmx.c
> index 64544a6e7955..857028dcd0f1 100644
> --- a/sys/amd64/vmm/intel/vmx.c
> +++ b/sys/amd64/vmm/intel/vmx.c
> @@ -3504,6 +3504,7 @@ vmx_getcap(void *arg, int vcpu, int type, int *retval)
> ret = 0;
> break;
> case VM_CAP_BPT_EXIT:
> + case VM_CAP_IPI_EXIT:
> ret = 0;
> break;
> default:
> @@ -3521,6 +3522,7 @@ vmx_setcap(void *arg, int vcpu, int type, int val)
> {
> struct vmx *vmx = arg;
> struct vmcs *vmcs = &vmx->vmcs[vcpu];
> + struct vlapic *vlapic;
> uint32_t baseval;
> uint32_t *pptr;
> int error;
> @@ -3599,6 +3601,12 @@ vmx_setcap(void *arg, int vcpu, int type, int val)
> reg = VMCS_EXCEPTION_BITMAP;
> }
> break;
> + case VM_CAP_IPI_EXIT:
> + retval = 0;
> +
> + vlapic = vm_lapic(vmx->vm, vcpu);
> + vlapic->ipi_exit = val;
> + break;
> default:
> break;
> }
> diff --git a/sys/amd64/vmm/io/vlapic.c b/sys/amd64/vmm/io/vlapic.c
> index 9599b4b4e62c..dc9b00d2316e 100644
> --- a/sys/amd64/vmm/io/vlapic.c
> +++ b/sys/amd64/vmm/io/vlapic.c
> @@ -84,6 +84,7 @@ __FBSDID("$FreeBSD$");
>
> static void vlapic_set_error(struct vlapic *, uint32_t, bool);
> static void vlapic_callout_handler(void *arg);
> +static void vlapic_reset(struct vlapic *vlapic);
>
> static __inline uint32_t
> vlapic_get_id(struct vlapic *vlapic)
> @@ -957,9 +958,9 @@ vlapic_icrlo_write_handler(struct vlapic *vlapic, bool *retu)
> {
> int i;
> bool phys;
> - cpuset_t dmask;
> + cpuset_t dmask, ipimask;
> uint64_t icrval;
> - uint32_t dest, vec, mode;
> + uint32_t dest, vec, mode, shorthand;
> struct vlapic *vlapic2;
> struct vm_exit *vmexit;
> struct LAPIC *lapic;
> @@ -975,97 +976,122 @@ vlapic_icrlo_write_handler(struct vlapic *vlapic, bool *retu)
> dest = icrval >> (32 + 24);
> vec = icrval & APIC_VECTOR_MASK;
> mode = icrval & APIC_DELMODE_MASK;
> + phys = (icrval & APIC_DESTMODE_LOG) == 0;
> + shorthand = icrval & APIC_DEST_MASK;
> +
> + maxcpus = vm_get_maxcpus(vlapic->vm);
>
> - if (mode == APIC_DELMODE_FIXED && vec < 16) {
> - vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR, false);
> - VLAPIC_CTR1(vlapic, "Ignoring invalid IPI %d", vec);
> - return (0);
> - }
>
> VLAPIC_CTR2(vlapic, "icrlo 0x%016lx triggered ipi %d", icrval, vec);
>
> - if (mode == APIC_DELMODE_FIXED || mode == APIC_DELMODE_NMI) {
> - switch (icrval & APIC_DEST_MASK) {
> - case APIC_DEST_DESTFLD:
> - phys = ((icrval & APIC_DESTMODE_LOG) == 0);
> - vlapic_calcdest(vlapic->vm, &dmask, dest, phys, false,
> - x2apic(vlapic));
> - break;
> - case APIC_DEST_SELF:
> - CPU_SETOF(vlapic->vcpuid, &dmask);
> - break;
> - case APIC_DEST_ALLISELF:
> - dmask = vm_active_cpus(vlapic->vm);
> - break;
> - case APIC_DEST_ALLESELF:
> - dmask = vm_active_cpus(vlapic->vm);
> - CPU_CLR(vlapic->vcpuid, &dmask);
> - break;
> - default:
> - CPU_ZERO(&dmask); /* satisfy gcc */
> - break;
> + switch (shorthand) {
> + case APIC_DEST_DESTFLD:
> + vlapic_calcdest(vlapic->vm, &dmask, dest, phys, false, x2apic(vlapic));
> + break;
> + case APIC_DEST_SELF:
> + CPU_SETOF(vlapic->vcpuid, &dmask);
> + break;
> + case APIC_DEST_ALLISELF:
> + dmask = vm_active_cpus(vlapic->vm);
> + break;
> + case APIC_DEST_ALLESELF:
> + dmask = vm_active_cpus(vlapic->vm);
> + CPU_CLR(vlapic->vcpuid, &dmask);
> + break;
> + default:
> + __assert_unreachable();
> + }
> +
> + /*
> + * ipimask is a set of vCPUs needing userland handling of the current
> + * IPI.
> + */
> + CPU_ZERO(&ipimask);
> +
> + switch (mode) {
> + case APIC_DELMODE_FIXED:
> + if (vec < 16) {
> + vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR,
> + false);
> + VLAPIC_CTR1(vlapic, "Ignoring invalid IPI %d", vec);
> + return (0);
> }
>
> CPU_FOREACH_ISSET(i, &dmask) {
> - if (mode == APIC_DELMODE_FIXED) {
> - lapic_intr_edge(vlapic->vm, i, vec);
> - vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid,
> - IPIS_SENT, i, 1);
> - VLAPIC_CTR2(vlapic, "vlapic sending ipi %d "
> - "to vcpuid %d", vec, i);
> - } else {
> - vm_inject_nmi(vlapic->vm, i);
> - VLAPIC_CTR1(vlapic, "vlapic sending ipi nmi "
> - "to vcpuid %d", i);
> - }
> + lapic_intr_edge(vlapic->vm, i, vec);
> + vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid,
> + IPIS_SENT, i, 1);
> + VLAPIC_CTR2(vlapic,
> + "vlapic sending ipi %d to vcpuid %d", vec, i);
> }
>
> - return (0); /* handled completely in the kernel */
> - }
> + break;
> + case APIC_DELMODE_NMI:
> + CPU_FOREACH_ISSET(i, &dmask) {
> + vm_inject_nmi(vlapic->vm, i);
> + VLAPIC_CTR1(vlapic,
> + "vlapic sending ipi nmi to vcpuid %d", i);
> + }
>
> - maxcpus = vm_get_maxcpus(vlapic->vm);
> - if (mode == APIC_DELMODE_INIT) {
> + break;
> + case APIC_DELMODE_INIT:
> if ((icrval & APIC_LEVEL_MASK) == APIC_LEVEL_DEASSERT)
> - return (0);
> -
> - if (vlapic->vcpuid == 0 && dest != 0 && dest < maxcpus) {
> - vlapic2 = vm_lapic(vlapic->vm, dest);
> -
> - /* move from INIT to waiting-for-SIPI state */
> - if (vlapic2->boot_state == BS_INIT) {
> - vlapic2->boot_state = BS_SIPI;
> - }
> + break;
>
> - return (0);
> + CPU_FOREACH_ISSET(i, &dmask) {
> + vlapic2 = vm_lapic(vlapic->vm, i);
> + vlapic2->boot_state = BS_SIPI;
> + CPU_SET(i, &ipimask);
> }
> - }
> -
> - if (mode == APIC_DELMODE_STARTUP) {
> - if (vlapic->vcpuid == 0 && dest != 0 && dest < maxcpus) {
> - vlapic2 = vm_lapic(vlapic->vm, dest);
>
> + break;
> + case APIC_DELMODE_STARTUP:
> + CPU_FOREACH_ISSET(i, &dmask) {
> + vlapic2 = vm_lapic(vlapic->vm, i);
> /*
> * Ignore SIPIs in any state other than wait-for-SIPI
> */
> if (vlapic2->boot_state != BS_SIPI)
> - return (0);
> -
> + continue;
> + /*
> + * TODO:
> + * This should be triggered from userspace.
> + */
> + vlapic_reset(vlapic2);
> vlapic2->boot_state = BS_RUNNING;
> + CPU_SET(i, &ipimask);
> + }
>
> - *retu = true;
> - vmexit = vm_exitinfo(vlapic->vm, vlapic->vcpuid);
> - vmexit->exitcode = VM_EXITCODE_SPINUP_AP;
> - vmexit->u.spinup_ap.vcpu = dest;
> - vmexit->u.spinup_ap.rip = vec << PAGE_SHIFT;
> + break;
> + default:
> + return (1);
> + }
>
> - return (0);
> + if (!CPU_EMPTY(&ipimask)) {
> + vmexit = vm_exitinfo(vlapic->vm, vlapic->vcpuid);
> + vmexit->exitcode = VM_EXITCODE_IPI;
> + vmexit->u.ipi.mode = mode;
> + vmexit->u.ipi.vector = vec;
> + vmexit->u.ipi.dmask = dmask;
> +
> + *retu = true;
> +
> + /*
> + * Old bhyve versions don't support the IPI exit. Translate it
> + * into the old style.
> + */
> + if (!vlapic->ipi_exit) {
> + if (mode == APIC_DELMODE_STARTUP) {
> + vmexit->exitcode = VM_EXITCODE_SPINUP_AP;
> + vmexit->u.spinup_ap.vcpu = CPU_FFS(&ipimask) - 1;
> + vmexit->u.spinup_ap.rip = vec << PAGE_SHIFT;
> + } else {
> + *retu = false;
> + }
> }
> }
>
> - /*
> - * This will cause a return to userland.
> - */
> - return (1);
> + return (0);
> }
>
> void
> @@ -1467,6 +1493,8 @@ vlapic_init(struct vlapic *vlapic)
> if (vlapic->vcpuid == 0)
> vlapic->msr_apicbase |= APICBASE_BSP;
>
> + vlapic->ipi_exit = false;
> +
> vlapic_reset(vlapic);
> }
>
> diff --git a/sys/amd64/vmm/io/vlapic_priv.h b/sys/amd64/vmm/io/vlapic_priv.h
> index fe7965cb65d7..4b3e9009e68c 100644
> --- a/sys/amd64/vmm/io/vlapic_priv.h
> +++ b/sys/amd64/vmm/io/vlapic_priv.h
> @@ -183,6 +183,8 @@ struct vlapic {
> */
> uint32_t svr_last;
> uint32_t lvt_last[VLAPIC_MAXLVT_INDEX + 1];
> +
> + bool ipi_exit;
> };
>
> void vlapic_init(struct vlapic *vlapic);
> diff --git a/usr.sbin/bhyve/bhyverun.c b/usr.sbin/bhyve/bhyverun.c
> index 550cc9d15477..a1849519996c 100644
> --- a/usr.sbin/bhyve/bhyverun.c
> +++ b/usr.sbin/bhyve/bhyverun.c
> @@ -46,6 +46,7 @@ __FBSDID("$FreeBSD$");
> #endif
>
> #include <amd64/vmm/intel/vmcs.h>
> +#include <x86/apicreg.h>
>
> #include <machine/atomic.h>
> #include <machine/segments.h>
> @@ -935,6 +936,35 @@ vmexit_breakpoint(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
> return (VMEXIT_CONTINUE);
> }
>
> +static int
> +vmexit_ipi(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
> +{
> + int error = -1;
> + int i;
> + switch (vmexit->u.ipi.mode) {
> + case APIC_DELMODE_INIT:
> + CPU_FOREACH_ISSET (i, &vmexit->u.ipi.dmask) {
> + error = vm_suspend_cpu(ctx, i);
> + if (error) {
> + warnx("%s: failed to suspend cpu %d\n",
> + __func__, i);
> + break;
> + }
> + }
> + break;
> + case APIC_DELMODE_STARTUP:
> + CPU_FOREACH_ISSET (i, &vmexit->u.ipi.dmask) {
> + spinup_ap(ctx, i, vmexit->u.ipi.vector << PAGE_SHIFT);
> + }
> + error = 0;
> + break;
> + default:
> + break;
> + }
> +
> + return (error);
> +}
> +
> static vmexit_handler_t handler[VM_EXITCODE_MAX] = {
> [VM_EXITCODE_INOUT] = vmexit_inout,
> [VM_EXITCODE_INOUT_STR] = vmexit_inout,
> @@ -951,6 +981,7 @@ static vmexit_handler_t handler[VM_EXITCODE_MAX] = {
> [VM_EXITCODE_TASK_SWITCH] = vmexit_task_switch,
> [VM_EXITCODE_DEBUG] = vmexit_debug,
> [VM_EXITCODE_BPT] = vmexit_breakpoint,
> + [VM_EXITCODE_IPI] = vmexit_ipi,
> };
>
> static void
> @@ -1151,6 +1182,9 @@ spinup_vcpu(struct vmctx *ctx, int vcpu, bool suspend)
> error = vm_set_capability(ctx, vcpu, VM_CAP_UNRESTRICTED_GUEST, 1);
> assert(error == 0);
>
> + error = vm_set_capability(ctx, vcpu, VM_CAP_IPI_EXIT, 1);
> + assert(error == 0);
> +
> fbsdrun_addcpu(ctx, vcpu, rip, suspend);
> }
>
> diff --git a/usr.sbin/bhyve/spinup_ap.c b/usr.sbin/bhyve/spinup_ap.c
> index 2b7e602f8003..438091e564e7 100644
> --- a/usr.sbin/bhyve/spinup_ap.c
> +++ b/usr.sbin/bhyve/spinup_ap.c
> @@ -98,6 +98,9 @@ spinup_ap(struct vmctx *ctx, int newcpu, uint64_t rip)
> error = vm_set_capability(ctx, newcpu, VM_CAP_UNRESTRICTED_GUEST, 1);
> assert(error == 0);
>
> + error = vm_set_capability(ctx, newcpu, VM_CAP_IPI_EXIT, 1);
> + assert(error == 0);
> +
> spinup_ap_realmode(ctx, newcpu, &rip);
>
> vm_resume_cpu(ctx, newcpu);
>
Buildkernel dies with:
[...]
/usr/src/sys/amd64/vmm/io/vlapic.c:967:11: error: variable 'maxcpus' set but not used
[-Werror,-Wunused-but-set-variable] uint16_t maxcpus;
Regards oh
--
O. Hartmann