[PATCH] Untangle TPR shadowing and APIC virtualization / Make Win guests on Bhyve _fast_

Michael Reifenberger Michael at reifenberger.com
Sat Dec 28 10:03:57 UTC 2019


Hi,
did you already get a reply from an developer for review?
Could you open a PR/DR for this patch.

I would like to review and commit your patch after further tests.

Thanks!
---
mike (also mr at freebsd.org)

Zitat von Yamagi <lists at yamagi.org>:

> Hi,
> a long known problem with Bhyve is that Windows guests are rather slow.
> With Windows 10 1903 this became much worse, to the point that the
> guest is unusable. I have found the reason for this: Windows hammers on
> the %cr8 control register. For example, Windows 10 1909 on an i7-2620M
> has about 68,000 %cr8 accesses per second. Each of them triggers a vm
> exit.
>
> The most common solution is TPR shadowing. Many thanks to royger in
> #bhyve for getting me on the right track. Bhyve already implements TPR
> shadowing. On AMD SVM it just works, but the implementation for Intel
> VT-x is bound to APIC virtualization. And APIC virtualization is a Xeon
> feature that is missing on most (all?) desktop CPUs.
>
> The patch - further down inline or under [0] - separates TPR shadowing
> from APIC virtualization, so TPR shadowing can be used on desktop CPUs
> as well. The patch doesn't just give a small speed boost, it's a
> difference like day and night. As an example, without the patch, the
> installation of Windows 10 1909 takes about 2280 seconds from start to
> first reboot. With the patch, only 370 seconds. On an old Thinkpad
> X220, Windows 10 guests were previously unusable, now they are resonable
> fast.
>
> The patch does:
>
> * Add a new tuneable 'hw.vmm.vmx.use_tpr_shadowing' to disable TLP
>   shadowing. Also add 'hw.vmm.vmx.cap.tpr_shadowing' to be able to query
>   if TPR shadowing is used.
>
> * Detach the initialization of TPR shadowing from the initialization of
>   APIC virtualization. APIC virtualization still needs TPR shadowing,
>   but not vice versa. Any CPU that supports APIC virtualization should
>   also support TPR shadowing.
>
> * When TPR shadowing is used, the APIC page of each vCPU is written to
>   the VMCS_VIRTUAL_APIC field of the VMCS so that the CPU can write
>   directly to the page without intercept.
>
> * On vm exit, vlapic_update_ppr() is called to update the PPR.
>
> The patch was tested on an i7-2620M, an i7-6700k and a Xeon Silver
> 4110. Both Windows and FreeBSD guests work correctly.
>
> Regards,
> Yamagi
>
> 0: https://gist.github.com/Yamagi/de70c08eadeeef14eec4cb42aeb5957f
>
> ----
>
> diff --git a/sys/amd64/vmm/intel/vmx.c b/sys/amd64/vmm/intel/vmx.c
> index 605fd0bda766..324a1e9d0c3c 100644
> --- a/sys/amd64/vmm/intel/vmx.c
> +++ b/sys/amd64/vmm/intel/vmx.c
> @@ -172,6 +172,10 @@ static int cap_invpcid;
>  SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, invpcid, CTLFLAG_RD, &cap_invpcid,
>      0, "Guests are allowed to use INVPCID");
>
> +static int tpr_shadowing;
> +SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, tpr_shadowing, CTLFLAG_RD,
> +    &tpr_shadowing, 0, "TPR shadowin support");
> +
>  static int virtual_interrupt_delivery;
>  SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, virtual_interrupt_delivery,  
> CTLFLAG_RD,
>      &virtual_interrupt_delivery, 0, "APICv virtual interrupt  
> delivery support");
> @@ -627,7 +631,7 @@ vmx_restore(void)
>  static int
>  vmx_init(int ipinum)
>  {
> -	int error, use_tpr_shadow;
> +	int error;
>  	uint64_t basic, fixed0, fixed1, feature_control;
>  	uint32_t tmp, procbased2_vid_bits;
>
> @@ -750,6 +754,24 @@ vmx_init(int ipinum)
>  	    MSR_VMX_PROCBASED_CTLS2, PROCBASED2_ENABLE_INVPCID, 0,
>  	    &tmp) == 0);
>
> +	/*
> +	 * Check support for TPR shadow.
> +	 */
> +	error = vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS,
> +	    MSR_VMX_TRUE_PROCBASED_CTLS, PROCBASED_USE_TPR_SHADOW, 0,
> +	    &tmp);
> +	if (error == 0) {
> +		tpr_shadowing = 1;
> +		TUNABLE_INT_FETCH("hw.vmm.vmx.use_tpr_shadowing",
> +		    &tpr_shadowing);
> +	}
> +
> +	if (tpr_shadowing) {
> +		procbased_ctls |= PROCBASED_USE_TPR_SHADOW;
> +		procbased_ctls &= ~PROCBASED_CR8_LOAD_EXITING;
> +		procbased_ctls &= ~PROCBASED_CR8_STORE_EXITING;
> +	}
> +
>  	/*
>  	 * Check support for virtual interrupt delivery.
>  	 */
> @@ -758,13 +780,9 @@ vmx_init(int ipinum)
>  	    PROCBASED2_APIC_REGISTER_VIRTUALIZATION |
>  	    PROCBASED2_VIRTUAL_INTERRUPT_DELIVERY);
>
> -	use_tpr_shadow = (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS,
> -	    MSR_VMX_TRUE_PROCBASED_CTLS, PROCBASED_USE_TPR_SHADOW, 0,
> -	    &tmp) == 0);
> -
>  	error = vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2, MSR_VMX_PROCBASED_CTLS2,
>  	    procbased2_vid_bits, 0, &tmp);
> -	if (error == 0 && use_tpr_shadow) {
> +	if (error == 0 && tpr_shadowing) {
>  		virtual_interrupt_delivery = 1;
>  		TUNABLE_INT_FETCH("hw.vmm.vmx.use_apic_vid",
>  		    &virtual_interrupt_delivery);
> @@ -775,13 +793,6 @@ vmx_init(int ipinum)
>  		procbased_ctls2 |= procbased2_vid_bits;
>  		procbased_ctls2 &= ~PROCBASED2_VIRTUALIZE_X2APIC_MODE;
>
> -		/*
> -		 * No need to emulate accesses to %CR8 if virtual
> -		 * interrupt delivery is enabled.
> -		 */
> -		procbased_ctls &= ~PROCBASED_CR8_LOAD_EXITING;
> -		procbased_ctls &= ~PROCBASED_CR8_STORE_EXITING;
> -
>  		/*
>  		 * Check for Posted Interrupts only if Virtual Interrupt
>  		 * Delivery is enabled.
> @@ -1051,10 +1062,13 @@ vmx_vminit(struct vm *vm, pmap_t pmap)
>  		vmx->ctx[i].guest_dr6 = DBREG_DR6_RESERVED1;
>  		error += vmwrite(VMCS_GUEST_DR7, DBREG_DR7_RESERVED1);
>
> -		if (virtual_interrupt_delivery) {
> -			error += vmwrite(VMCS_APIC_ACCESS, APIC_ACCESS_ADDRESS);
> +		if (tpr_shadowing) {
>  			error += vmwrite(VMCS_VIRTUAL_APIC,
>  			    vtophys(&vmx->apic_page[i]));
> +		}
> +
> +		if (virtual_interrupt_delivery) {
> +			error += vmwrite(VMCS_APIC_ACCESS, APIC_ACCESS_ADDRESS);
>  			error += vmwrite(VMCS_EOI_EXIT0, 0);
>  			error += vmwrite(VMCS_EOI_EXIT1, 0);
>  			error += vmwrite(VMCS_EOI_EXIT2, 0);
> @@ -2313,6 +2327,14 @@ vmx_exit_process(struct vmx *vmx, int vcpu,  
> struct vm_exit *vmexit)
>  		}
>  	}
>
> +	/*
> +	 * If 'TPR shadowing' is used, update the local APICs PPR.
> +	 */
> +	if (tpr_shadowing) {
> +		vlapic = vm_lapic(vmx->vm, vcpu);
> +		vlapic_update_ppr(vlapic);
> +	}
> +
>  	switch (reason) {
>  	case EXIT_REASON_TASK_SWITCH:
>  		ts = &vmexit->u.task_switch;
> diff --git a/sys/amd64/vmm/io/vlapic.c b/sys/amd64/vmm/io/vlapic.c
> index 74e6cd967396..289fdb7e077d 100644
> --- a/sys/amd64/vmm/io/vlapic.c
> +++ b/sys/amd64/vmm/io/vlapic.c
> @@ -490,7 +490,7 @@ dump_isrvec_stk(struct vlapic *vlapic)
>   * Algorithm adopted from section "Interrupt, Task and Processor Priority"
>   * in Intel Architecture Manual Vol 3a.
>   */
> -static void
> +void
>  vlapic_update_ppr(struct vlapic *vlapic)
>  {
>  	int isrvec, tpr, ppr;
> diff --git a/sys/amd64/vmm/io/vlapic.h b/sys/amd64/vmm/io/vlapic.h
> index 2a5f54003253..71b97feab6bc 100644
> --- a/sys/amd64/vmm/io/vlapic.h
> +++ b/sys/amd64/vmm/io/vlapic.h
> @@ -74,6 +74,8 @@ void vlapic_post_intr(struct vlapic *vlapic, int  
> hostcpu, int ipinum);
>  void vlapic_fire_cmci(struct vlapic *vlapic);
>  int vlapic_trigger_lvt(struct vlapic *vlapic, int vector);
>
> +void vlapic_update_ppr(struct vlapic *vlapic);
> +
>  uint64_t vlapic_get_apicbase(struct vlapic *vlapic);
>  int vlapic_set_apicbase(struct vlapic *vlapic, uint64_t val);
>  void vlapic_set_x2apic_state(struct vm *vm, int vcpuid, enum  
> x2apic_state s);
>
> --
> Homepage: https://www.yamagi.org
> Github:   https://github.com/yamagi
> GPG:      0x1D502515



Gruß
---
Michael Reifenberger



More information about the freebsd-virtualization mailing list