git: 3ba952e1a217 - main - vmm: add tunable to trap WBINVD

From: Emmanuel Vadot <manu_at_FreeBSD.org>
Date: Mon, 30 May 2022 09:18:46 UTC
The branch main has been updated by manu:

URL: https://cgit.FreeBSD.org/src/commit/?id=3ba952e1a2179c232402c82d5c7587159b15a8dd

commit 3ba952e1a2179c232402c82d5c7587159b15a8dd
Author:     Corvin Köhne <CorvinK@beckhoff.com>
AuthorDate: 2022-05-30 08:02:52 +0000
Commit:     Emmanuel Vadot <manu@FreeBSD.org>
CommitDate: 2022-05-30 08:04:22 +0000

    vmm: add tunable to trap WBINVD
    
    x86 is cache coherent. However, there are special cases where cache
    coherency isn't ensured (e.g. when switching the caching mode). In these
    cases, WBINVD can be used. WBINVD writes all cache lines back into main
    memory and invalidates the whole cache.
    
    Due to the invalidation of the whole cache, WBINVD is a very heavy
    instruction and degrades the performance on all cores. So, we should
    minimize the use of WBINVD as much as possible.
    
    In a virtual environment, the WBINVD call is mostly useless. The guest
    isn't able to break cache coherency because he can't switch the physical
    cache mode. When using pci passthrough WBINVD might be useful.
    
    Nevertheless, trapping and ignoring WBINVD is an unsafe operation. For
    that reason, we implement it as tunable.
    
    Reviewed by:    jhb
    Sponsored by:   Beckhoff Automation GmbH & Co. KG
    MFC after:      1 week
    Differential Revision:  https://reviews.freebsd.org/D35253
---
 sys/amd64/include/vmm.h   |  1 +
 sys/amd64/vmm/amd/svm.c   |  8 ++++++++
 sys/amd64/vmm/amd/vmcb.h  |  1 +
 sys/amd64/vmm/intel/vmx.c | 18 ++++++++++++++++++
 sys/amd64/vmm/vmm.c       | 10 ++++++++++
 5 files changed, 38 insertions(+)

diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h
index d7d1509248f1..ce61e16522aa 100644
--- a/sys/amd64/include/vmm.h
+++ b/sys/amd64/include/vmm.h
@@ -467,6 +467,7 @@ void vm_copyout(struct vm *vm, int vcpuid, const void *kaddr,
     struct vm_copyinfo *copyinfo, size_t len);
 
 int vcpu_trace_exceptions(struct vm *vm, int vcpuid);
+int vcpu_trap_wbinvd(struct vm *vm, int vcpuid);
 #endif	/* KERNEL */
 
 #ifdef _KERNEL
diff --git a/sys/amd64/vmm/amd/svm.c b/sys/amd64/vmm/amd/svm.c
index 57e941fc6145..a00494c98021 100644
--- a/sys/amd64/vmm/amd/svm.c
+++ b/sys/amd64/vmm/amd/svm.c
@@ -506,6 +506,10 @@ vmcb_init(struct svm_softc *sc, int vcpu, uint64_t iopm_base_pa,
 	svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_CLGI);
 	svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_SKINIT);
 	svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_ICEBP);
+	if (vcpu_trap_wbinvd(sc->vm, vcpu)) {
+		svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT,
+		    VMCB_INTCPT_WBINVD);
+	}
 
 	/*
 	 * From section "Canonicalization and Consistency Checks" in APMv2
@@ -1552,6 +1556,10 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
 		vm_inject_ud(svm_sc->vm, vcpu);
 		handled = 1;
 		break;
+	case VMCB_EXIT_WBINVD:
+		/* ignore WBINVD */
+		handled = 1;
+		break;
 	default:
 		vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_UNKNOWN, 1);
 		break;
diff --git a/sys/amd64/vmm/amd/vmcb.h b/sys/amd64/vmm/amd/vmcb.h
index feea3e149205..847e1f6ad476 100644
--- a/sys/amd64/vmm/amd/vmcb.h
+++ b/sys/amd64/vmm/amd/vmcb.h
@@ -149,6 +149,7 @@
 #define	VMCB_EXIT_CLGI			0x85
 #define	VMCB_EXIT_SKINIT		0x86
 #define	VMCB_EXIT_ICEBP			0x88
+#define VMCB_EXIT_WBINVD		0x89
 #define	VMCB_EXIT_MONITOR		0x8A
 #define	VMCB_EXIT_MWAIT			0x8B
 #define	VMCB_EXIT_NPF			0x400
diff --git a/sys/amd64/vmm/intel/vmx.c b/sys/amd64/vmm/intel/vmx.c
index 72ff227587aa..edb3653aff70 100644
--- a/sys/amd64/vmm/intel/vmx.c
+++ b/sys/amd64/vmm/intel/vmx.c
@@ -168,6 +168,10 @@ static int cap_pause_exit;
 SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, pause_exit, CTLFLAG_RD, &cap_pause_exit,
     0, "PAUSE triggers a VM-exit");
 
+static int cap_wbinvd_exit;
+SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, wbinvd_exit, CTLFLAG_RD, &cap_wbinvd_exit,
+    0, "WBINVD triggers a VM-exit");
+
 static int cap_rdpid;
 SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, rdpid, CTLFLAG_RD, &cap_rdpid, 0,
     "Guests are allowed to use RDPID");
@@ -777,6 +781,12 @@ vmx_modinit(int ipinum)
 					 PROCBASED_PAUSE_EXITING, 0,
 					 &tmp) == 0);
 
+	cap_wbinvd_exit = (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2,
+					MSR_VMX_PROCBASED_CTLS2,
+					PROCBASED2_WBINVD_EXITING,
+					0,
+					&tmp) == 0);
+
 	/*
 	 * Check support for RDPID and/or RDTSCP.
 	 *
@@ -1117,6 +1127,10 @@ vmx_init(struct vm *vm, pmap_t pmap)
 		error += vmwrite(VMCS_EPTP, vmx->eptp);
 		error += vmwrite(VMCS_PIN_BASED_CTLS, pinbased_ctls);
 		error += vmwrite(VMCS_PRI_PROC_BASED_CTLS, procbased_ctls);
+		if (vcpu_trap_wbinvd(vm, i)) {
+			KASSERT(cap_wbinvd_exit, ("WBINVD trap not available"));
+			procbased_ctls2 |= PROCBASED2_WBINVD_EXITING;
+		}
 		error += vmwrite(VMCS_SEC_PROC_BASED_CTLS, procbased_ctls2);
 		error += vmwrite(VMCS_EXIT_CTLS, exit_ctls);
 		error += vmwrite(VMCS_ENTRY_CTLS, entry_ctls);
@@ -2776,6 +2790,10 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
 		SDT_PROBE3(vmm, vmx, exit, vminsn, vmx, vcpu, vmexit);
 		vmexit->exitcode = VM_EXITCODE_VMINSN;
 		break;
+	case EXIT_REASON_WBINVD:
+		/* ignore WBINVD */
+		handled = HANDLED;
+		break;
 	default:
 		SDT_PROBE4(vmm, vmx, exit, unknown,
 		    vmx, vcpu, vmexit, reason);
diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c
index 45125cb92a7e..2375dc0e13c4 100644
--- a/sys/amd64/vmm/vmm.c
+++ b/sys/amd64/vmm/vmm.c
@@ -265,6 +265,10 @@ SYSCTL_INT(_hw_vmm, OID_AUTO, trace_guest_exceptions, CTLFLAG_RDTUN,
     &trace_guest_exceptions, 0,
     "Trap into hypervisor on all guest exceptions and reflect them back");
 
+static int trap_wbinvd;
+SYSCTL_INT(_hw_vmm, OID_AUTO, trap_wbinvd, CTLFLAG_RDTUN, &trap_wbinvd, 0,
+    "WBINVD triggers a VM-exit");
+
 static void vm_free_memmap(struct vm *vm, int ident);
 static bool sysmem_mapping(struct vm *vm, struct mem_map *mm);
 static void vcpu_notify_event_locked(struct vcpu *vcpu, bool lapic_intr);
@@ -341,6 +345,12 @@ vcpu_trace_exceptions(struct vm *vm, int vcpuid)
 	return (trace_guest_exceptions);
 }
 
+int
+vcpu_trap_wbinvd(struct vm *vm, int vcpuid)
+{
+	return (trap_wbinvd);
+}
+
 struct vm_exit *
 vm_exitinfo(struct vm *vm, int cpuid)
 {