svn commit: r273375 - in head: sys/amd64/include sys/amd64/vmm sys/amd64/vmm/amd sys/amd64/vmm/intel sys/amd64/vmm/io sys/modules/vmm usr.sbin/bhyve usr.sbin/bhyvectl

Neel Natu neel at FreeBSD.org
Tue Oct 21 07:10:46 UTC 2014


Author: neel
Date: Tue Oct 21 07:10:43 2014
New Revision: 273375
URL: https://svnweb.freebsd.org/changeset/base/273375

Log:
  Merge projects/bhyve_svm into HEAD.
  
  After this change bhyve supports AMD processors with the SVM/AMD-V hardware
  extensions.
  
  More details available here:
  https://lists.freebsd.org/pipermail/freebsd-virtualization/2014-October/002905.html
  
  Submitted by:	Anish Gupta (akgupt3 at gmail.com)
  Tested by:	Benjamin Perrault (ben.perrault at gmail.com)
  Tested by:	Willem Jan Withagen (wjw at digiware.nl)

Added:
     - copied unchanged from r273369, projects/bhyve_svm/sys/amd64/vmm/amd/npt.c
     - copied unchanged from r273369, projects/bhyve_svm/sys/amd64/vmm/amd/npt.h
     - copied unchanged from r273369, projects/bhyve_svm/sys/amd64/vmm/amd/svm.c
     - copied unchanged from r273369, projects/bhyve_svm/sys/amd64/vmm/amd/svm.h
     - copied unchanged from r273369, projects/bhyve_svm/sys/amd64/vmm/amd/svm_genassym.c
     - copied unchanged from r273369, projects/bhyve_svm/sys/amd64/vmm/amd/svm_msr.c
     - copied unchanged from r273369, projects/bhyve_svm/sys/amd64/vmm/amd/svm_msr.h
     - copied unchanged from r273369, projects/bhyve_svm/sys/amd64/vmm/amd/svm_softc.h
     - copied unchanged from r273369, projects/bhyve_svm/sys/amd64/vmm/amd/svm_support.S
     - copied unchanged from r273369, projects/bhyve_svm/sys/amd64/vmm/amd/vmcb.c
     - copied unchanged from r273369, projects/bhyve_svm/sys/amd64/vmm/amd/vmcb.h
Directory Properties:
  head/sys/amd64/vmm/amd/npt.c   (props changed)
  head/sys/amd64/vmm/amd/npt.h   (props changed)
  head/sys/amd64/vmm/amd/svm.c   (props changed)
  head/sys/amd64/vmm/amd/svm.h   (props changed)
  head/sys/amd64/vmm/amd/svm_genassym.c   (props changed)
  head/sys/amd64/vmm/amd/svm_msr.c   (props changed)
  head/sys/amd64/vmm/amd/svm_msr.h   (props changed)
  head/sys/amd64/vmm/amd/svm_softc.h   (props changed)
  head/sys/amd64/vmm/amd/svm_support.S   (props changed)
  head/sys/amd64/vmm/amd/vmcb.c   (props changed)
  head/sys/amd64/vmm/amd/vmcb.h   (props changed)
Modified:
  head/sys/amd64/include/vmm.h
  head/sys/amd64/include/vmm_instruction_emul.h
  head/sys/amd64/vmm/amd/amdv.c
  head/sys/amd64/vmm/intel/vmx.c
  head/sys/amd64/vmm/io/vlapic.c
  head/sys/amd64/vmm/vmm.c
  head/sys/amd64/vmm/vmm_instruction_emul.c
  head/sys/amd64/vmm/x86.c
  head/sys/modules/vmm/Makefile
  head/usr.sbin/bhyve/bhyverun.c
  head/usr.sbin/bhyve/xmsr.c
  head/usr.sbin/bhyvectl/bhyvectl.c

Modified: head/sys/amd64/include/vmm.h
==============================================================================
--- head/sys/amd64/include/vmm.h	Tue Oct 21 06:25:41 2014	(r273374)
+++ head/sys/amd64/include/vmm.h	Tue Oct 21 07:10:43 2014	(r273375)
@@ -487,6 +487,7 @@ enum vm_exitcode {
 	VM_EXITCODE_TASK_SWITCH,
 	VM_EXITCODE_MONITOR,
 	VM_EXITCODE_MWAIT,
+	VM_EXITCODE_SVM,
 	VM_EXITCODE_MAX
 };
 
@@ -564,6 +565,14 @@ struct vm_exit {
 			int		inst_type;
 			int		inst_error;
 		} vmx;
+		/*
+		 * SVM specific payload.
+		 */
+		struct {
+			uint64_t	exitcode;
+			uint64_t	exitinfo1;
+			uint64_t	exitinfo2;
+		} svm;
 		struct {
 			uint32_t	code;		/* ecx value */
 			uint64_t	wval;

Modified: head/sys/amd64/include/vmm_instruction_emul.h
==============================================================================
--- head/sys/amd64/include/vmm_instruction_emul.h	Tue Oct 21 06:25:41 2014	(r273374)
+++ head/sys/amd64/include/vmm_instruction_emul.h	Tue Oct 21 07:10:43 2014	(r273375)
@@ -93,7 +93,7 @@ int vmm_fetch_instruction(struct vm *vm,
 int vmm_gla2gpa(struct vm *vm, int vcpuid, struct vm_guest_paging *paging,
     uint64_t gla, int prot, uint64_t *gpa);
 
-void vie_init(struct vie *vie);
+void vie_init(struct vie *vie, const char *inst_bytes, int inst_length);
 
 /*
  * Decode the instruction fetched into 'vie' so it can be emulated.

Modified: head/sys/amd64/vmm/amd/amdv.c
==============================================================================
--- head/sys/amd64/vmm/amd/amdv.c	Tue Oct 21 06:25:41 2014	(r273374)
+++ head/sys/amd64/vmm/amd/amdv.c	Tue Oct 21 07:10:43 2014	(r273375)
@@ -38,149 +38,6 @@ __FBSDID("$FreeBSD$");
 #include "io/iommu.h"
 
 static int
-amdv_init(int ipinum)
-{
-
-	printf("amdv_init: not implemented\n");
-	return (ENXIO);
-}
-
-static int
-amdv_cleanup(void)
-{
-
-	printf("amdv_cleanup: not implemented\n");
-	return (ENXIO);
-}
-
-static void
-amdv_resume(void)
-{
-}
-
-static void *
-amdv_vminit(struct vm *vm, struct pmap *pmap)
-{
-
-	printf("amdv_vminit: not implemented\n");
-	return (NULL);
-}
-
-static int
-amdv_vmrun(void *arg, int vcpu, register_t rip, struct pmap *pmap,
-    void *rptr, void *sptr)
-{
-
-	printf("amdv_vmrun: not implemented\n");
-	return (ENXIO);
-}
-
-static void
-amdv_vmcleanup(void *arg)
-{
-
-	printf("amdv_vmcleanup: not implemented\n");
-	return;
-}
-
-static int
-amdv_getreg(void *arg, int vcpu, int regnum, uint64_t *retval)
-{
-	
-	printf("amdv_getreg: not implemented\n");
-	return (EINVAL);
-}
-
-static int
-amdv_setreg(void *arg, int vcpu, int regnum, uint64_t val)
-{
-	
-	printf("amdv_setreg: not implemented\n");
-	return (EINVAL);
-}
-
-static int
-amdv_getdesc(void *vmi, int vcpu, int num, struct seg_desc *desc)
-{
-
-	printf("amdv_get_desc: not implemented\n");
-	return (EINVAL);
-}
-
-static int
-amdv_setdesc(void *vmi, int vcpu, int num, struct seg_desc *desc)
-{
-
-	printf("amdv_get_desc: not implemented\n");
-	return (EINVAL);
-}
-
-static int
-amdv_getcap(void *arg, int vcpu, int type, int *retval)
-{
-
-	printf("amdv_getcap: not implemented\n");
-	return (EINVAL);
-}
-
-static int
-amdv_setcap(void *arg, int vcpu, int type, int val)
-{
-
-	printf("amdv_setcap: not implemented\n");
-	return (EINVAL);
-}
-
-static struct vmspace *
-amdv_vmspace_alloc(vm_offset_t min, vm_offset_t max)
-{
-
-	printf("amdv_vmspace_alloc: not implemented\n");
-	return (NULL);
-}
-
-static void
-amdv_vmspace_free(struct vmspace *vmspace)
-{
-
-	printf("amdv_vmspace_free: not implemented\n");
-	return;
-}
-
-static struct vlapic *
-amdv_vlapic_init(void *arg, int vcpuid)
-{
-
-	panic("amdv_vlapic_init: not implmented");
-}
-
-static void
-amdv_vlapic_cleanup(void *arg, struct vlapic *vlapic)
-{
-
-	panic("amdv_vlapic_cleanup: not implemented");
-}
-
-struct vmm_ops vmm_ops_amd = {
-	amdv_init,
-	amdv_cleanup,
-	amdv_resume,
-	amdv_vminit,
-	amdv_vmrun,
-	amdv_vmcleanup,
-	amdv_getreg,
-	amdv_setreg,
-	amdv_getdesc,
-	amdv_setdesc,
-	amdv_getcap,
-	amdv_setcap,
-	amdv_vmspace_alloc,
-	amdv_vmspace_free,
-	amdv_vlapic_init,
-	amdv_vlapic_cleanup,
-};
-
-static int
 amd_iommu_init(void)
 {
 

Copied: head/sys/amd64/vmm/amd/npt.c (from r273369, projects/bhyve_svm/sys/amd64/vmm/amd/npt.c)
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/sys/amd64/vmm/amd/npt.c	Tue Oct 21 07:10:43 2014	(r273375, copy of r273369, projects/bhyve_svm/sys/amd64/vmm/amd/npt.c)
@@ -0,0 +1,87 @@
+/*-
+ * Copyright (c) 2013 Anish Gupta (akgupt3 at gmail.com)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/sysctl.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <vm/vm_extern.h>
+
+#include <machine/pmap.h>
+
+#include "npt.h"
+
+SYSCTL_DECL(_hw_vmm);
+SYSCTL_NODE(_hw_vmm, OID_AUTO, npt, CTLFLAG_RW, NULL, NULL);
+
+static int npt_flags;
+SYSCTL_INT(_hw_vmm_npt, OID_AUTO, pmap_flags, CTLFLAG_RD,
+	&npt_flags, 0, NULL);
+
+#define NPT_IPIMASK	0xFF
+
+/*
+ * AMD nested page table init.
+ */
+int
+svm_npt_init(int ipinum)
+{
+	int enable_superpage = 1;
+
+	npt_flags = ipinum & NPT_IPIMASK;
+	TUNABLE_INT_FETCH("hw.vmm.npt.enable_superpage", &enable_superpage);
+	if (enable_superpage)
+		npt_flags |= PMAP_PDE_SUPERPAGE; 
+	
+	return (0);
+}
+
+static int
+npt_pinit(pmap_t pmap)
+{
+
+	return (pmap_pinit_type(pmap, PT_RVI, npt_flags));
+}
+
+struct vmspace *
+svm_npt_alloc(vm_offset_t min, vm_offset_t max)
+{
+	
+	return (vmspace_alloc(min, max, npt_pinit));
+}
+
+void
+svm_npt_free(struct vmspace *vmspace)
+{
+
+	vmspace_free(vmspace);
+}

Copied: head/sys/amd64/vmm/amd/npt.h (from r273369, projects/bhyve_svm/sys/amd64/vmm/amd/npt.h)
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/sys/amd64/vmm/amd/npt.h	Tue Oct 21 07:10:43 2014	(r273375, copy of r273369, projects/bhyve_svm/sys/amd64/vmm/amd/npt.h)
@@ -0,0 +1,36 @@
+/*-
+ * Copyright (c) 2013 Anish Gupta (akgupt3 at gmail.com)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SVM_NPT_H_
+#define _SVM_NPT_H_
+
+int 	svm_npt_init(int ipinum);
+struct	vmspace *svm_npt_alloc(vm_offset_t min, vm_offset_t max);
+void	svm_npt_free(struct vmspace *vmspace);
+
+#endif /* _SVM_NPT_H_ */

Copied: head/sys/amd64/vmm/amd/svm.c (from r273369, projects/bhyve_svm/sys/amd64/vmm/amd/svm.c)
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/sys/amd64/vmm/amd/svm.c	Tue Oct 21 07:10:43 2014	(r273375, copy of r273369, projects/bhyve_svm/sys/amd64/vmm/amd/svm.c)
@@ -0,0 +1,2092 @@
+/*-
+ * Copyright (c) 2013, Anish Gupta (akgupt3 at gmail.com)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/smp.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/pcpu.h>
+#include <sys/proc.h>
+#include <sys/sysctl.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <machine/cpufunc.h>
+#include <machine/psl.h>
+#include <machine/pmap.h>
+#include <machine/md_var.h>
+#include <machine/specialreg.h>
+#include <machine/smp.h>
+#include <machine/vmm.h>
+#include <machine/vmm_instruction_emul.h>
+
+#include "vmm_lapic.h"
+#include "vmm_stat.h"
+#include "vmm_ktr.h"
+#include "vmm_ioport.h"
+#include "vatpic.h"
+#include "vlapic.h"
+#include "vlapic_priv.h"
+
+#include "x86.h"
+#include "vmcb.h"
+#include "svm.h"
+#include "svm_softc.h"
+#include "svm_msr.h"
+#include "npt.h"
+
+SYSCTL_DECL(_hw_vmm);
+SYSCTL_NODE(_hw_vmm, OID_AUTO, svm, CTLFLAG_RW, NULL, NULL);
+
+/*
+ * SVM CPUID function 0x8000_000A, edx bit decoding.
+ */
+#define AMD_CPUID_SVM_NP		BIT(0)  /* Nested paging or RVI */
+#define AMD_CPUID_SVM_LBR		BIT(1)  /* Last branch virtualization */
+#define AMD_CPUID_SVM_SVML		BIT(2)  /* SVM lock */
+#define AMD_CPUID_SVM_NRIP_SAVE		BIT(3)  /* Next RIP is saved */
+#define AMD_CPUID_SVM_TSC_RATE		BIT(4)  /* TSC rate control. */
+#define AMD_CPUID_SVM_VMCB_CLEAN	BIT(5)  /* VMCB state caching */
+#define AMD_CPUID_SVM_FLUSH_BY_ASID	BIT(6)  /* Flush by ASID */
+#define AMD_CPUID_SVM_DECODE_ASSIST	BIT(7)  /* Decode assist */
+#define AMD_CPUID_SVM_PAUSE_INC		BIT(10) /* Pause intercept filter. */
+#define AMD_CPUID_SVM_PAUSE_FTH		BIT(12) /* Pause filter threshold */
+
+#define	VMCB_CACHE_DEFAULT	(VMCB_CACHE_ASID 	|	\
+				VMCB_CACHE_IOPM		|	\
+				VMCB_CACHE_I		|	\
+				VMCB_CACHE_TPR		|	\
+				VMCB_CACHE_CR2		|	\
+				VMCB_CACHE_CR		|	\
+				VMCB_CACHE_DT		|	\
+				VMCB_CACHE_SEG		|	\
+				VMCB_CACHE_NP)
+
+static uint32_t vmcb_clean = VMCB_CACHE_DEFAULT;
+SYSCTL_INT(_hw_vmm_svm, OID_AUTO, vmcb_clean, CTLFLAG_RDTUN, &vmcb_clean,
+    0, NULL);
+
+static MALLOC_DEFINE(M_SVM, "svm", "svm");
+static MALLOC_DEFINE(M_SVM_VLAPIC, "svm-vlapic", "svm-vlapic");
+
+/* Per-CPU context area. */
+extern struct pcpu __pcpu[];
+
+static uint32_t svm_feature;	/* AMD SVM features. */
+SYSCTL_UINT(_hw_vmm_svm, OID_AUTO, features, CTLFLAG_RD, &svm_feature, 0,
+    "SVM features advertised by CPUID.8000000AH:EDX");
+
+static int disable_npf_assist;
+SYSCTL_INT(_hw_vmm_svm, OID_AUTO, disable_npf_assist, CTLFLAG_RWTUN,
+    &disable_npf_assist, 0, NULL);
+
+/* Maximum ASIDs supported by the processor */
+static uint32_t nasid;
+SYSCTL_UINT(_hw_vmm_svm, OID_AUTO, num_asids, CTLFLAG_RD, &nasid, 0,
+    "Number of ASIDs supported by this processor");
+
+/* Current ASID generation for each host cpu */
+static struct asid asid[MAXCPU];
+
+/* 
+ * SVM host state saved area of size 4KB for each core.
+ */
+static uint8_t hsave[MAXCPU][PAGE_SIZE] __aligned(PAGE_SIZE);
+
+static VMM_STAT_AMD(VCPU_EXITINTINFO, "VM exits during event delivery");
+static VMM_STAT_AMD(VCPU_INTINFO_INJECTED, "Events pending at VM entry");
+static VMM_STAT_AMD(VMEXIT_VINTR, "VM exits due to interrupt window");
+
+static int svm_setreg(void *arg, int vcpu, int ident, uint64_t val);
+
+static __inline int
+flush_by_asid(void)
+{
+
+	return (svm_feature & AMD_CPUID_SVM_FLUSH_BY_ASID);
+}
+
+static __inline int
+decode_assist(void)
+{
+
+	return (svm_feature & AMD_CPUID_SVM_DECODE_ASSIST);
+}
+
+static void
+svm_disable(void *arg __unused)
+{
+	uint64_t efer;
+
+	efer = rdmsr(MSR_EFER);
+	efer &= ~EFER_SVM;
+	wrmsr(MSR_EFER, efer);
+}
+
+/*
+ * Disable SVM on all CPUs.
+ */
+static int
+svm_cleanup(void)
+{
+
+	smp_rendezvous(NULL, svm_disable, NULL, NULL);
+	return (0);
+}
+
+/*
+ * Verify that all the features required by bhyve are available.
+ */
+static int
+check_svm_features(void)
+{
+	u_int regs[4];
+
+	/* CPUID Fn8000_000A is for SVM */
+	do_cpuid(0x8000000A, regs);
+	svm_feature = regs[3];
+
+	printf("SVM: Revision %d\n", regs[0] & 0xFF);
+	printf("SVM: NumASID %u\n", regs[1]);
+
+	nasid = regs[1];
+	KASSERT(nasid > 1, ("Insufficient ASIDs for guests: %#x", nasid));
+
+	printf("SVM: Features 0x%b\n", svm_feature,
+		"\020"
+		"\001NP"		/* Nested paging */
+		"\002LbrVirt"		/* LBR virtualization */
+		"\003SVML"		/* SVM lock */
+		"\004NRIPS"		/* NRIP save */
+		"\005TscRateMsr"	/* MSR based TSC rate control */
+		"\006VmcbClean"		/* VMCB clean bits */
+		"\007FlushByAsid"	/* Flush by ASID */
+		"\010DecodeAssist"	/* Decode assist */
+		"\011<b8>"
+		"\012<b9>"
+		"\013PauseFilter"	
+		"\014<b11>"
+		"\015PauseFilterThreshold"	
+		"\016AVIC"	
+		);
+
+	/* bhyve requires the Nested Paging feature */
+	if (!(svm_feature & AMD_CPUID_SVM_NP)) {
+		printf("SVM: Nested Paging feature not available.\n");
+		return (ENXIO);
+	}
+
+	/* bhyve requires the NRIP Save feature */
+	if (!(svm_feature & AMD_CPUID_SVM_NRIP_SAVE)) {
+		printf("SVM: NRIP Save feature not available.\n");
+		return (ENXIO);
+	}
+
+	return (0);
+}
+
+static void
+svm_enable(void *arg __unused)
+{
+	uint64_t efer;
+
+	efer = rdmsr(MSR_EFER);
+	efer |= EFER_SVM;
+	wrmsr(MSR_EFER, efer);
+
+	wrmsr(MSR_VM_HSAVE_PA, vtophys(hsave[curcpu]));
+}
+
+/*
+ * Return 1 if SVM is enabled on this processor and 0 otherwise.
+ */
+static int
+svm_available(void)
+{
+	uint64_t msr;
+
+	/* Section 15.4 Enabling SVM from APM2. */
+	if ((amd_feature2 & AMDID2_SVM) == 0) {
+		printf("SVM: not available.\n");
+		return (0);
+	}
+
+	msr = rdmsr(MSR_VM_CR);
+	if ((msr & VM_CR_SVMDIS) != 0) {
+		printf("SVM: disabled by BIOS.\n");
+		return (0);
+	}
+
+	return (1);
+}
+
+static int
+svm_init(int ipinum)
+{
+	int error, cpu;
+
+	if (!svm_available())
+		return (ENXIO);
+
+	error = check_svm_features();
+	if (error)
+		return (error);
+
+	vmcb_clean &= VMCB_CACHE_DEFAULT;
+
+	for (cpu = 0; cpu < MAXCPU; cpu++) {
+		/*
+		 * Initialize the host ASIDs to their "highest" valid values.
+		 *
+		 * The next ASID allocation will rollover both 'gen' and 'num'
+		 * and start off the sequence at {1,1}.
+		 */
+		asid[cpu].gen = ~0UL;
+		asid[cpu].num = nasid - 1;
+	}
+
+	svm_msr_init();
+	svm_npt_init(ipinum);
+
+	/* Enable SVM on all CPUs */
+	smp_rendezvous(NULL, svm_enable, NULL, NULL);
+
+	return (0);
+}
+
+static void
+svm_restore(void)
+{
+
+	svm_enable(NULL);
+}		
+
+/* Pentium compatible MSRs */
+#define MSR_PENTIUM_START 	0	
+#define MSR_PENTIUM_END 	0x1FFF
+/* AMD 6th generation and Intel compatible MSRs */
+#define MSR_AMD6TH_START 	0xC0000000UL	
+#define MSR_AMD6TH_END 		0xC0001FFFUL	
+/* AMD 7th and 8th generation compatible MSRs */
+#define MSR_AMD7TH_START 	0xC0010000UL	
+#define MSR_AMD7TH_END 		0xC0011FFFUL	
+
+/*
+ * Get the index and bit position for a MSR in permission bitmap.
+ * Two bits are used for each MSR: lower bit for read and higher bit for write.
+ */
+static int
+svm_msr_index(uint64_t msr, int *index, int *bit)
+{
+	uint32_t base, off;
+
+	*index = -1;
+	*bit = (msr % 4) * 2;
+	base = 0;
+
+	if (msr >= MSR_PENTIUM_START && msr <= MSR_PENTIUM_END) {
+		*index = msr / 4;
+		return (0);
+	}
+
+	base += (MSR_PENTIUM_END - MSR_PENTIUM_START + 1); 
+	if (msr >= MSR_AMD6TH_START && msr <= MSR_AMD6TH_END) {
+		off = (msr - MSR_AMD6TH_START); 
+		*index = (off + base) / 4;
+		return (0);
+	} 
+
+	base += (MSR_AMD6TH_END - MSR_AMD6TH_START + 1);
+	if (msr >= MSR_AMD7TH_START && msr <= MSR_AMD7TH_END) {
+		off = (msr - MSR_AMD7TH_START);
+		*index = (off + base) / 4;
+		return (0);
+	}
+
+	return (EINVAL);
+}
+
+/*
+ * Allow vcpu to read or write the 'msr' without trapping into the hypervisor.
+ */
+static void
+svm_msr_perm(uint8_t *perm_bitmap, uint64_t msr, bool read, bool write)
+{
+	int index, bit, error;
+
+	error = svm_msr_index(msr, &index, &bit);
+	KASSERT(error == 0, ("%s: invalid msr %#lx", __func__, msr));
+	KASSERT(index >= 0 && index < SVM_MSR_BITMAP_SIZE,
+	    ("%s: invalid index %d for msr %#lx", __func__, index, msr));
+	KASSERT(bit >= 0 && bit <= 6, ("%s: invalid bit position %d "
+	    "msr %#lx", __func__, bit, msr));
+
+	if (read)
+		perm_bitmap[index] &= ~(1UL << bit);
+
+	if (write)
+		perm_bitmap[index] &= ~(2UL << bit);
+}
+
+static void
+svm_msr_rw_ok(uint8_t *perm_bitmap, uint64_t msr)
+{
+
+	svm_msr_perm(perm_bitmap, msr, true, true);
+}
+
+static void
+svm_msr_rd_ok(uint8_t *perm_bitmap, uint64_t msr)
+{
+
+	svm_msr_perm(perm_bitmap, msr, true, false);
+}
+
+static __inline int
+svm_get_intercept(struct svm_softc *sc, int vcpu, int idx, uint32_t bitmask)
+{
+	struct vmcb_ctrl *ctrl;
+
+	KASSERT(idx >=0 && idx < 5, ("invalid intercept index %d", idx));
+
+	ctrl = svm_get_vmcb_ctrl(sc, vcpu);
+	return (ctrl->intercept[idx] & bitmask ? 1 : 0);
+}
+
+static __inline void
+svm_set_intercept(struct svm_softc *sc, int vcpu, int idx, uint32_t bitmask,
+    int enabled)
+{
+	struct vmcb_ctrl *ctrl;
+	uint32_t oldval;
+
+	KASSERT(idx >=0 && idx < 5, ("invalid intercept index %d", idx));
+
+	ctrl = svm_get_vmcb_ctrl(sc, vcpu);
+	oldval = ctrl->intercept[idx];
+
+	if (enabled)
+		ctrl->intercept[idx] |= bitmask;
+	else
+		ctrl->intercept[idx] &= ~bitmask;
+
+	if (ctrl->intercept[idx] != oldval) {
+		svm_set_dirty(sc, vcpu, VMCB_CACHE_I);
+		VCPU_CTR3(sc->vm, vcpu, "intercept[%d] modified "
+		    "from %#x to %#x", idx, oldval, ctrl->intercept[idx]);
+	}
+}
+
+static __inline void
+svm_disable_intercept(struct svm_softc *sc, int vcpu, int off, uint32_t bitmask)
+{
+
+	svm_set_intercept(sc, vcpu, off, bitmask, 0);
+}
+
+static __inline void
+svm_enable_intercept(struct svm_softc *sc, int vcpu, int off, uint32_t bitmask)
+{
+
+	svm_set_intercept(sc, vcpu, off, bitmask, 1);
+}
+
+static void
+vmcb_init(struct svm_softc *sc, int vcpu, uint64_t iopm_base_pa,
+    uint64_t msrpm_base_pa, uint64_t np_pml4)
+{
+	struct vmcb_ctrl *ctrl;
+	struct vmcb_state *state;
+	uint32_t mask;
+	int n;
+
+	ctrl = svm_get_vmcb_ctrl(sc, vcpu);
+	state = svm_get_vmcb_state(sc, vcpu);
+
+	ctrl->iopm_base_pa = iopm_base_pa;
+	ctrl->msrpm_base_pa = msrpm_base_pa;
+
+	/* Enable nested paging */
+	ctrl->np_enable = 1;
+	ctrl->n_cr3 = np_pml4;
+
+	/*
+	 * Intercept accesses to the control registers that are not shadowed
+	 * in the VMCB - i.e. all except cr0, cr2, cr3, cr4 and cr8.
+	 */
+	for (n = 0; n < 16; n++) {
+		mask = (BIT(n) << 16) | BIT(n);
+		if (n == 0 || n == 2 || n == 3 || n == 4 || n == 8)
+			svm_disable_intercept(sc, vcpu, VMCB_CR_INTCPT, mask);
+		else
+			svm_enable_intercept(sc, vcpu, VMCB_CR_INTCPT, mask);
+	}
+
+	/* Intercept Machine Check exceptions. */
+	svm_enable_intercept(sc, vcpu, VMCB_EXC_INTCPT, BIT(IDT_MC));
+
+	/* Intercept various events (for e.g. I/O, MSR and CPUID accesses) */
+	svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_IO);
+	svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_MSR);
+	svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_CPUID);
+	svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_INTR);
+	svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_INIT);
+	svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_NMI);
+	svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_SMI);
+	svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_SHUTDOWN);
+	svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT,
+	    VMCB_INTCPT_FERR_FREEZE);
+
+	/*
+	 * From section "Canonicalization and Consistency Checks" in APMv2
+	 * the VMRUN intercept bit must be set to pass the consistency check.
+	 */
+	svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_VMRUN);
+
+	/*
+	 * The ASID will be set to a non-zero value just before VMRUN.
+	 */
+	ctrl->asid = 0;
+
+	/*
+	 * Section 15.21.1, Interrupt Masking in EFLAGS
+	 * Section 15.21.2, Virtualizing APIC.TPR
+	 *
+	 * This must be set for %rflag and %cr8 isolation of guest and host.
+	 */
+	ctrl->v_intr_masking = 1;
+
+	/* Enable Last Branch Record aka LBR for debugging */
+	ctrl->lbr_virt_en = 1;
+	state->dbgctl = BIT(0);
+
+	/* EFER_SVM must always be set when the guest is executing */
+	state->efer = EFER_SVM;
+
+	/* Set up the PAT to power-on state */
+	state->g_pat = PAT_VALUE(0, PAT_WRITE_BACK)	|
+	    PAT_VALUE(1, PAT_WRITE_THROUGH)	|
+	    PAT_VALUE(2, PAT_UNCACHED)		|
+	    PAT_VALUE(3, PAT_UNCACHEABLE)	|
+	    PAT_VALUE(4, PAT_WRITE_BACK)	|
+	    PAT_VALUE(5, PAT_WRITE_THROUGH)	|
+	    PAT_VALUE(6, PAT_UNCACHED)		|
+	    PAT_VALUE(7, PAT_UNCACHEABLE);
+}
+
+/*
+ * Initialize a virtual machine.
+ */
+static void *
+svm_vminit(struct vm *vm, pmap_t pmap)
+{
+	struct svm_softc *svm_sc;
+	struct svm_vcpu *vcpu;
+	vm_paddr_t msrpm_pa, iopm_pa, pml4_pa;	
+	int i;
+
+	svm_sc = malloc(sizeof (struct svm_softc), M_SVM, M_WAITOK | M_ZERO);
+	svm_sc->vm = vm;
+	svm_sc->nptp = (vm_offset_t)vtophys(pmap->pm_pml4);
+
+	/*
+	 * Intercept read and write accesses to all MSRs.
+	 */
+	memset(svm_sc->msr_bitmap, 0xFF, sizeof(svm_sc->msr_bitmap));
+
+	/*
+	 * Access to the following MSRs is redirected to the VMCB when the
+	 * guest is executing. Therefore it is safe to allow the guest to
+	 * read/write these MSRs directly without hypervisor involvement.
+	 */
+	svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_GSBASE);
+	svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_FSBASE);
+	svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_KGSBASE);
+	
+	svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_STAR);
+	svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_LSTAR);
+	svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_CSTAR);
+	svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_SF_MASK);
+	svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_SYSENTER_CS_MSR);
+	svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_SYSENTER_ESP_MSR);
+	svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_SYSENTER_EIP_MSR);
+	svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_PAT);
+
+	svm_msr_rd_ok(svm_sc->msr_bitmap, MSR_TSC);
+
+	/*
+	 * Intercept writes to make sure that the EFER_SVM bit is not cleared.
+	 */
+	svm_msr_rd_ok(svm_sc->msr_bitmap, MSR_EFER);
+
+	/* Intercept access to all I/O ports. */
+	memset(svm_sc->iopm_bitmap, 0xFF, sizeof(svm_sc->iopm_bitmap));
+
+	iopm_pa = vtophys(svm_sc->iopm_bitmap);
+	msrpm_pa = vtophys(svm_sc->msr_bitmap);
+	pml4_pa = svm_sc->nptp;
+	for (i = 0; i < VM_MAXCPU; i++) {
+		vcpu = svm_get_vcpu(svm_sc, i);
+		vcpu->lastcpu = NOCPU;
+		vcpu->vmcb_pa = vtophys(&vcpu->vmcb);
+		vmcb_init(svm_sc, i, iopm_pa, msrpm_pa, pml4_pa);
+		svm_msr_guest_init(svm_sc, i);
+	}
+	return (svm_sc);
+}
+
+static int
+svm_cpl(struct vmcb_state *state)
+{
+
+	/*
+	 * From APMv2:
+	 *   "Retrieve the CPL from the CPL field in the VMCB, not
+	 *    from any segment DPL"
+	 */
+	return (state->cpl);
+}
+
+static enum vm_cpu_mode
+svm_vcpu_mode(struct vmcb *vmcb)
+{
+	struct vmcb_segment seg;
+	struct vmcb_state *state;
+	int error;
+
+	state = &vmcb->state;
+
+	if (state->efer & EFER_LMA) {
+		error = vmcb_seg(vmcb, VM_REG_GUEST_CS, &seg);
+		KASSERT(error == 0, ("%s: vmcb_seg(cs) error %d", __func__,
+		    error));
+
+		/*
+		 * Section 4.8.1 for APM2, check if Code Segment has
+		 * Long attribute set in descriptor.
+		 */
+		if (seg.attrib & VMCB_CS_ATTRIB_L)
+			return (CPU_MODE_64BIT);
+		else
+			return (CPU_MODE_COMPATIBILITY);
+	} else  if (state->cr0 & CR0_PE) {
+		return (CPU_MODE_PROTECTED);
+	} else {
+		return (CPU_MODE_REAL);
+	}
+}
+
+static enum vm_paging_mode
+svm_paging_mode(uint64_t cr0, uint64_t cr4, uint64_t efer)
+{
+
+	if ((cr0 & CR0_PG) == 0)
+		return (PAGING_MODE_FLAT);
+	if ((cr4 & CR4_PAE) == 0)
+		return (PAGING_MODE_32);
+	if (efer & EFER_LME)
+		return (PAGING_MODE_64);
+	else
+		return (PAGING_MODE_PAE);
+}
+
+/*
+ * ins/outs utility routines
+ */
+static uint64_t
+svm_inout_str_index(struct svm_regctx *regs, int in)
+{
+	uint64_t val;
+
+	val = in ? regs->sctx_rdi : regs->sctx_rsi;
+
+	return (val);
+}
+
+static uint64_t
+svm_inout_str_count(struct svm_regctx *regs, int rep)
+{
+	uint64_t val;
+
+	val = rep ? regs->sctx_rcx : 1;
+
+	return (val);
+}
+
+static void
+svm_inout_str_seginfo(struct svm_softc *svm_sc, int vcpu, int64_t info1,
+    int in, struct vm_inout_str *vis)
+{
+	int error, s;
+
+	if (in) {
+		vis->seg_name = VM_REG_GUEST_ES;
+	} else {
+		/* The segment field has standard encoding */
+		s = (info1 >> 10) & 0x7;
+		vis->seg_name = vm_segment_name(s);
+	}
+
+	error = vmcb_getdesc(svm_sc, vcpu, vis->seg_name, &vis->seg_desc);
+	KASSERT(error == 0, ("%s: svm_getdesc error %d", __func__, error));
+}
+
+static int
+svm_inout_str_addrsize(uint64_t info1)
+{
+        uint32_t size;
+
+        size = (info1 >> 7) & 0x7;
+        switch (size) {
+        case 1:
+                return (2);     /* 16 bit */
+        case 2:
+                return (4);     /* 32 bit */
+        case 4:
+                return (8);     /* 64 bit */
+        default:
+                panic("%s: invalid size encoding %d", __func__, size);
+        }
+}
+
+static void

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-head mailing list