svn commit: r338068 - in head/sys/amd64: amd64 include vmm/intel
Konstantin Belousov
kib at FreeBSD.org
Sun Aug 19 18:47:19 UTC 2018
Author: kib
Date: Sun Aug 19 18:47:16 2018
New Revision: 338068
URL: https://svnweb.freebsd.org/changeset/base/338068
Log:
Update L1TF workaround to sustain L1D pollution from NMI.
Current mitigation for L1TF in bhyve flushes L1D either by an explicit
WRMSR command, or by software reading enough uninteresting data to
fully populate all lines of L1D. If NMI occurs after either of
methods is completed, but before VM entry, L1D becomes polluted with
the cache lines touched by NMI handlers. There is no interesting data
which NMI accesses, but something sensitive might be co-located on the
same cache line, and then L1TF exposes that to a rogue guest.
Use VM entry MSR load list to ensure atomicity of L1D cache and VM
entry if updated microcode was loaded. If only software flush method
is available, try to help the bhyve sw flusher by also flushing L1D on
NMI exit to kernel mode.
Suggested by and discussed with: Andrew Cooper <andrew.cooper3 at citrix.com>
Reviewed by: jhb
Sponsored by: The FreeBSD Foundation
MFC after: 2 weeks
Differential revision: https://reviews.freebsd.org/D16790
Modified:
head/sys/amd64/amd64/exception.S
head/sys/amd64/amd64/support.S
head/sys/amd64/amd64/trap.c
head/sys/amd64/include/md_var.h
head/sys/amd64/vmm/intel/vmx.c
head/sys/amd64/vmm/intel/vmx_support.S
Modified: head/sys/amd64/amd64/exception.S
==============================================================================
--- head/sys/amd64/amd64/exception.S Sun Aug 19 18:43:10 2018 (r338067)
+++ head/sys/amd64/amd64/exception.S Sun Aug 19 18:47:16 2018 (r338068)
@@ -864,7 +864,10 @@ nocallchain:
movl %edx,%eax
shrq $32,%rdx
wrmsr
- movq %r13,%cr3
+ cmpb $0, nmi_flush_l1d_sw(%rip)
+ je 2f
+ call flush_l1d_sw /* bhyve L1TF assist */
+2: movq %r13,%cr3
RESTORE_REGS
addq $TF_RIP,%rsp
jmp doreti_iret
Modified: head/sys/amd64/amd64/support.S
==============================================================================
--- head/sys/amd64/amd64/support.S Sun Aug 19 18:43:10 2018 (r338067)
+++ head/sys/amd64/amd64/support.S Sun Aug 19 18:47:16 2018 (r338068)
@@ -1225,3 +1225,36 @@ ENTRY(handle_ibrs_exit_rs)
END(handle_ibrs_exit_rs)
.noaltmacro
+
+/*
+ * Flush L1D cache. Load enough of the data from the kernel text
+ * to flush existing L1D content.
+ *
+ * N.B. The function follows ABI calling conventions, but the vmm.ko
+ * caller expects that only %rax, %rcx, %r9, and %rflags registers
+ * are clobbered.
+ */
+ENTRY(flush_l1d_sw)
+#define L1D_FLUSH_SIZE (64 * 1024)
+ movq $KERNBASE, %r9
+ movq $-L1D_FLUSH_SIZE, %rcx
+ /*
+ * pass 1: Preload TLB.
+ * Kernel text is mapped using superpages. TLB preload is
+ * done for the benefit of older CPUs which split 2M page
+ * into 4k TLB entries.
+ */
+1: movb L1D_FLUSH_SIZE(%r9, %rcx), %al
+ addq $PAGE_SIZE, %rcx
+ jne 1b
+ xorl %eax, %eax
+ cpuid
+ movq $-L1D_FLUSH_SIZE, %rcx
+ /* pass 2: Read each cache line. */
+2: movb L1D_FLUSH_SIZE(%r9, %rcx), %al
+ addq $64, %rcx
+ jne 2b
+ lfence
+ ret
+#undef L1D_FLUSH_SIZE
+END(flush_l1d_sw)
Modified: head/sys/amd64/amd64/trap.c
==============================================================================
--- head/sys/amd64/amd64/trap.c Sun Aug 19 18:43:10 2018 (r338067)
+++ head/sys/amd64/amd64/trap.c Sun Aug 19 18:47:16 2018 (r338068)
@@ -161,6 +161,20 @@ SYSCTL_INT(_machdep, OID_AUTO, uprintf_signal, CTLFLAG
"Print debugging information on trap signal to ctty");
/*
+ * Control L1D flush on return from NMI.
+ *
+ * Tunable can be set to the following values:
+ * 0 - only enable flush on return from NMI if required by vmm.ko (default)
+ * >1 - always flush on return from NMI.
+ *
+ * Post-boot, the sysctl indicates if flushing is currently enabled.
+ */
+int nmi_flush_l1d_sw;
+SYSCTL_INT(_machdep, OID_AUTO, nmi_flush_l1d_sw, CTLFLAG_RWTUN,
+ &nmi_flush_l1d_sw, 0,
+ "Flush L1 Data Cache on NMI exit, software bhyve L1TF mitigation assist");
+
+/*
* Exception, fault, and trap interface to the FreeBSD kernel.
* This common code is called from assembly language IDT gate entry
* routines that prepare a suitable stack frame, and restore this
Modified: head/sys/amd64/include/md_var.h
==============================================================================
--- head/sys/amd64/include/md_var.h Sun Aug 19 18:43:10 2018 (r338067)
+++ head/sys/amd64/include/md_var.h Sun Aug 19 18:47:16 2018 (r338068)
@@ -40,6 +40,7 @@ extern uint64_t *vm_page_dump;
extern int hw_lower_amd64_sharedpage;
extern int hw_ibrs_disable;
extern int hw_ssb_disable;
+extern int nmi_flush_l1d_sw;
/*
* The file "conf/ldscript.amd64" defines the symbol "kernphys". Its
Modified: head/sys/amd64/vmm/intel/vmx.c
==============================================================================
--- head/sys/amd64/vmm/intel/vmx.c Sun Aug 19 18:43:10 2018 (r338067)
+++ head/sys/amd64/vmm/intel/vmx.c Sun Aug 19 18:47:16 2018 (r338068)
@@ -191,8 +191,11 @@ SYSCTL_UINT(_hw_vmm_vmx, OID_AUTO, vpid_alloc_failed,
static int guest_l1d_flush;
SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, l1d_flush, CTLFLAG_RD,
&guest_l1d_flush, 0, NULL);
+static int guest_l1d_flush_sw;
+SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, l1d_flush_sw, CTLFLAG_RD,
+ &guest_l1d_flush_sw, 0, NULL);
-uint64_t vmx_msr_flush_cmd;
+static struct msr_entry msr_load_list[1] __aligned(16);
/*
* The definitions of SDT probes for VMX.
@@ -579,6 +582,9 @@ vmx_cleanup(void)
vpid_unr = NULL;
}
+ if (nmi_flush_l1d_sw == 1)
+ nmi_flush_l1d_sw = 0;
+
smp_rendezvous(NULL, vmx_disable, NULL, NULL);
return (0);
@@ -807,11 +813,30 @@ vmx_init(int ipinum)
guest_l1d_flush = (cpu_ia32_arch_caps & IA32_ARCH_CAP_RDCL_NO) == 0;
TUNABLE_INT_FETCH("hw.vmm.l1d_flush", &guest_l1d_flush);
- if (guest_l1d_flush &&
- (cpu_stdext_feature3 & CPUID_STDEXT3_L1D_FLUSH) != 0)
- vmx_msr_flush_cmd = IA32_FLUSH_CMD_L1D;
/*
+ * L1D cache flush is enabled. Use IA32_FLUSH_CMD MSR when
+ * available. Otherwise fall back to the software flush
+ * method which loads enough data from the kernel text to
+ * flush existing L1D content, both on VMX entry and on NMI
+ * return.
+ */
+ if (guest_l1d_flush) {
+ if ((cpu_stdext_feature3 & CPUID_STDEXT3_L1D_FLUSH) == 0) {
+ guest_l1d_flush_sw = 1;
+ TUNABLE_INT_FETCH("hw.vmm.l1d_flush_sw",
+ &guest_l1d_flush_sw);
+ }
+ if (guest_l1d_flush_sw) {
+ if (nmi_flush_l1d_sw <= 1)
+ nmi_flush_l1d_sw = 1;
+ } else {
+ msr_load_list[0].index = MSR_IA32_FLUSH_CMD;
+ msr_load_list[0].val = IA32_FLUSH_CMD_L1D;
+ }
+ }
+
+ /*
* Stash the cr0 and cr4 bits that must be fixed to 0 or 1
*/
fixed0 = rdmsr(MSR_VMX_CR0_FIXED0);
@@ -999,6 +1024,15 @@ vmx_vminit(struct vm *vm, pmap_t pmap)
error += vmwrite(VMCS_ENTRY_CTLS, entry_ctls);
error += vmwrite(VMCS_MSR_BITMAP, vtophys(vmx->msr_bitmap));
error += vmwrite(VMCS_VPID, vpid[i]);
+
+ if (guest_l1d_flush && !guest_l1d_flush_sw) {
+ vmcs_write(VMCS_ENTRY_MSR_LOAD, pmap_kextract(
+ (vm_offset_t)&msr_load_list[0]));
+ vmcs_write(VMCS_ENTRY_MSR_LOAD_COUNT,
+ nitems(msr_load_list));
+ vmcs_write(VMCS_EXIT_MSR_STORE, 0);
+ vmcs_write(VMCS_EXIT_MSR_STORE_COUNT, 0);
+ }
/* exception bitmap */
if (vcpu_trace_exceptions(vm, i))
Modified: head/sys/amd64/vmm/intel/vmx_support.S
==============================================================================
--- head/sys/amd64/vmm/intel/vmx_support.S Sun Aug 19 18:43:10 2018 (r338067)
+++ head/sys/amd64/vmm/intel/vmx_support.S Sun Aug 19 18:47:16 2018 (r338068)
@@ -176,44 +176,10 @@ ENTRY(vmx_enter_guest)
jbe invept_error /* Check invept instruction error */
guest_restore:
-
- /*
- * Flush L1D cache if requested. Use IA32_FLUSH_CMD MSR if available,
- * otherwise load enough of the data from the zero_region to flush
- * existing L1D content.
- */
-#define L1D_FLUSH_SIZE (64 * 1024)
movl %edx, %r8d
- cmpb $0, guest_l1d_flush(%rip)
+ cmpb $0, guest_l1d_flush_sw(%rip)
je after_l1d
- movq vmx_msr_flush_cmd(%rip), %rax
- testq %rax, %rax
- jz 1f
- movq %rax, %rdx
- shrq $32, %rdx
- movl $MSR_IA32_FLUSH_CMD, %ecx
- wrmsr
- jmp after_l1d
-1: movq $KERNBASE, %r9
- movq $-L1D_FLUSH_SIZE, %rcx
- /*
- * pass 1: Preload TLB.
- * Kernel text is mapped using superpages. TLB preload is
- * done for the benefit of older CPUs which split 2M page
- * into 4k TLB entries.
- */
-2: movb L1D_FLUSH_SIZE(%r9, %rcx), %al
- addq $PAGE_SIZE, %rcx
- jne 2b
- xorl %eax, %eax
- cpuid
- movq $-L1D_FLUSH_SIZE, %rcx
- /* pass 2: Read each cache line */
-3: movb L1D_FLUSH_SIZE(%r9, %rcx), %al
- addq $64, %rcx
- jne 3b
- lfence
-#undef L1D_FLUSH_SIZE
+ call flush_l1d_sw
after_l1d:
cmpl $0, %r8d
je do_launch
More information about the svn-src-all
mailing list