[PATCH] Untangle TPR shadowing and APIC virtualization / Make Win guests on Bhyve _fast_
Michael Reifenberger
Michael at reifenberger.com
Sat Dec 28 10:03:57 UTC 2019
Hi,
did you already get a reply from an developer for review?
Could you open a PR/DR for this patch.
I would like to review and commit your patch after further tests.
Thanks!
---
mike (also mr at freebsd.org)
Zitat von Yamagi <lists at yamagi.org>:
> Hi,
> a long known problem with Bhyve is that Windows guests are rather slow.
> With Windows 10 1903 this became much worse, to the point that the
> guest is unusable. I have found the reason for this: Windows hammers on
> the %cr8 control register. For example, Windows 10 1909 on an i7-2620M
> has about 68,000 %cr8 accesses per second. Each of them triggers a vm
> exit.
>
> The most common solution is TPR shadowing. Many thanks to royger in
> #bhyve for getting me on the right track. Bhyve already implements TPR
> shadowing. On AMD SVM it just works, but the implementation for Intel
> VT-x is bound to APIC virtualization. And APIC virtualization is a Xeon
> feature that is missing on most (all?) desktop CPUs.
>
> The patch - further down inline or under [0] - separates TPR shadowing
> from APIC virtualization, so TPR shadowing can be used on desktop CPUs
> as well. The patch doesn't just give a small speed boost, it's a
> difference like day and night. As an example, without the patch, the
> installation of Windows 10 1909 takes about 2280 seconds from start to
> first reboot. With the patch, only 370 seconds. On an old Thinkpad
> X220, Windows 10 guests were previously unusable, now they are resonable
> fast.
>
> The patch does:
>
> * Add a new tuneable 'hw.vmm.vmx.use_tpr_shadowing' to disable TLP
> shadowing. Also add 'hw.vmm.vmx.cap.tpr_shadowing' to be able to query
> if TPR shadowing is used.
>
> * Detach the initialization of TPR shadowing from the initialization of
> APIC virtualization. APIC virtualization still needs TPR shadowing,
> but not vice versa. Any CPU that supports APIC virtualization should
> also support TPR shadowing.
>
> * When TPR shadowing is used, the APIC page of each vCPU is written to
> the VMCS_VIRTUAL_APIC field of the VMCS so that the CPU can write
> directly to the page without intercept.
>
> * On vm exit, vlapic_update_ppr() is called to update the PPR.
>
> The patch was tested on an i7-2620M, an i7-6700k and a Xeon Silver
> 4110. Both Windows and FreeBSD guests work correctly.
>
> Regards,
> Yamagi
>
> 0: https://gist.github.com/Yamagi/de70c08eadeeef14eec4cb42aeb5957f
>
> ----
>
> diff --git a/sys/amd64/vmm/intel/vmx.c b/sys/amd64/vmm/intel/vmx.c
> index 605fd0bda766..324a1e9d0c3c 100644
> --- a/sys/amd64/vmm/intel/vmx.c
> +++ b/sys/amd64/vmm/intel/vmx.c
> @@ -172,6 +172,10 @@ static int cap_invpcid;
> SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, invpcid, CTLFLAG_RD, &cap_invpcid,
> 0, "Guests are allowed to use INVPCID");
>
> +static int tpr_shadowing;
> +SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, tpr_shadowing, CTLFLAG_RD,
> + &tpr_shadowing, 0, "TPR shadowin support");
> +
> static int virtual_interrupt_delivery;
> SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, virtual_interrupt_delivery,
> CTLFLAG_RD,
> &virtual_interrupt_delivery, 0, "APICv virtual interrupt
> delivery support");
> @@ -627,7 +631,7 @@ vmx_restore(void)
> static int
> vmx_init(int ipinum)
> {
> - int error, use_tpr_shadow;
> + int error;
> uint64_t basic, fixed0, fixed1, feature_control;
> uint32_t tmp, procbased2_vid_bits;
>
> @@ -750,6 +754,24 @@ vmx_init(int ipinum)
> MSR_VMX_PROCBASED_CTLS2, PROCBASED2_ENABLE_INVPCID, 0,
> &tmp) == 0);
>
> + /*
> + * Check support for TPR shadow.
> + */
> + error = vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS,
> + MSR_VMX_TRUE_PROCBASED_CTLS, PROCBASED_USE_TPR_SHADOW, 0,
> + &tmp);
> + if (error == 0) {
> + tpr_shadowing = 1;
> + TUNABLE_INT_FETCH("hw.vmm.vmx.use_tpr_shadowing",
> + &tpr_shadowing);
> + }
> +
> + if (tpr_shadowing) {
> + procbased_ctls |= PROCBASED_USE_TPR_SHADOW;
> + procbased_ctls &= ~PROCBASED_CR8_LOAD_EXITING;
> + procbased_ctls &= ~PROCBASED_CR8_STORE_EXITING;
> + }
> +
> /*
> * Check support for virtual interrupt delivery.
> */
> @@ -758,13 +780,9 @@ vmx_init(int ipinum)
> PROCBASED2_APIC_REGISTER_VIRTUALIZATION |
> PROCBASED2_VIRTUAL_INTERRUPT_DELIVERY);
>
> - use_tpr_shadow = (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS,
> - MSR_VMX_TRUE_PROCBASED_CTLS, PROCBASED_USE_TPR_SHADOW, 0,
> - &tmp) == 0);
> -
> error = vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2, MSR_VMX_PROCBASED_CTLS2,
> procbased2_vid_bits, 0, &tmp);
> - if (error == 0 && use_tpr_shadow) {
> + if (error == 0 && tpr_shadowing) {
> virtual_interrupt_delivery = 1;
> TUNABLE_INT_FETCH("hw.vmm.vmx.use_apic_vid",
> &virtual_interrupt_delivery);
> @@ -775,13 +793,6 @@ vmx_init(int ipinum)
> procbased_ctls2 |= procbased2_vid_bits;
> procbased_ctls2 &= ~PROCBASED2_VIRTUALIZE_X2APIC_MODE;
>
> - /*
> - * No need to emulate accesses to %CR8 if virtual
> - * interrupt delivery is enabled.
> - */
> - procbased_ctls &= ~PROCBASED_CR8_LOAD_EXITING;
> - procbased_ctls &= ~PROCBASED_CR8_STORE_EXITING;
> -
> /*
> * Check for Posted Interrupts only if Virtual Interrupt
> * Delivery is enabled.
> @@ -1051,10 +1062,13 @@ vmx_vminit(struct vm *vm, pmap_t pmap)
> vmx->ctx[i].guest_dr6 = DBREG_DR6_RESERVED1;
> error += vmwrite(VMCS_GUEST_DR7, DBREG_DR7_RESERVED1);
>
> - if (virtual_interrupt_delivery) {
> - error += vmwrite(VMCS_APIC_ACCESS, APIC_ACCESS_ADDRESS);
> + if (tpr_shadowing) {
> error += vmwrite(VMCS_VIRTUAL_APIC,
> vtophys(&vmx->apic_page[i]));
> + }
> +
> + if (virtual_interrupt_delivery) {
> + error += vmwrite(VMCS_APIC_ACCESS, APIC_ACCESS_ADDRESS);
> error += vmwrite(VMCS_EOI_EXIT0, 0);
> error += vmwrite(VMCS_EOI_EXIT1, 0);
> error += vmwrite(VMCS_EOI_EXIT2, 0);
> @@ -2313,6 +2327,14 @@ vmx_exit_process(struct vmx *vmx, int vcpu,
> struct vm_exit *vmexit)
> }
> }
>
> + /*
> + * If 'TPR shadowing' is used, update the local APICs PPR.
> + */
> + if (tpr_shadowing) {
> + vlapic = vm_lapic(vmx->vm, vcpu);
> + vlapic_update_ppr(vlapic);
> + }
> +
> switch (reason) {
> case EXIT_REASON_TASK_SWITCH:
> ts = &vmexit->u.task_switch;
> diff --git a/sys/amd64/vmm/io/vlapic.c b/sys/amd64/vmm/io/vlapic.c
> index 74e6cd967396..289fdb7e077d 100644
> --- a/sys/amd64/vmm/io/vlapic.c
> +++ b/sys/amd64/vmm/io/vlapic.c
> @@ -490,7 +490,7 @@ dump_isrvec_stk(struct vlapic *vlapic)
> * Algorithm adopted from section "Interrupt, Task and Processor Priority"
> * in Intel Architecture Manual Vol 3a.
> */
> -static void
> +void
> vlapic_update_ppr(struct vlapic *vlapic)
> {
> int isrvec, tpr, ppr;
> diff --git a/sys/amd64/vmm/io/vlapic.h b/sys/amd64/vmm/io/vlapic.h
> index 2a5f54003253..71b97feab6bc 100644
> --- a/sys/amd64/vmm/io/vlapic.h
> +++ b/sys/amd64/vmm/io/vlapic.h
> @@ -74,6 +74,8 @@ void vlapic_post_intr(struct vlapic *vlapic, int
> hostcpu, int ipinum);
> void vlapic_fire_cmci(struct vlapic *vlapic);
> int vlapic_trigger_lvt(struct vlapic *vlapic, int vector);
>
> +void vlapic_update_ppr(struct vlapic *vlapic);
> +
> uint64_t vlapic_get_apicbase(struct vlapic *vlapic);
> int vlapic_set_apicbase(struct vlapic *vlapic, uint64_t val);
> void vlapic_set_x2apic_state(struct vm *vm, int vcpuid, enum
> x2apic_state s);
>
> --
> Homepage: https://www.yamagi.org
> Github: https://github.com/yamagi
> GPG: 0x1D502515
Gruß
---
Michael Reifenberger
More information about the freebsd-virtualization
mailing list