svn commit: r261504 - in head: lib/libvmmapi sys/amd64/include sys/amd64/vmm sys/amd64/vmm/intel sys/boot/common sys/boot/userboot/userboot usr.sbin/bhyveload

John Baldwin jhb at FreeBSD.org
Wed Feb 5 04:39:07 UTC 2014


Author: jhb
Date: Wed Feb  5 04:39:03 2014
New Revision: 261504
URL: http://svnweb.freebsd.org/changeset/base/261504

Log:
  Add support for FreeBSD/i386 guests under bhyve.
  - Similar to the hack for bootinfo32.c in userboot, define
    _MACHINE_ELF_WANT_32BIT in the load_elf32 file handlers in userboot.
    This allows userboot to load 32-bit kernels and modules.
  - Copy the SMAP generation code out of bootinfo64.c and into its own
    file so it can be shared with bootinfo32.c to pass an SMAP to the i386
    kernel.
  - Use uint32_t instead of u_long when aligning module metadata in
    bootinfo32.c in userboot, as otherwise the metadata used 64-bit
    alignment which corrupted the layout.
  - Populate the basemem and extmem members of the bootinfo struct passed
    to 32-bit kernels.
  - Fix the 32-bit stack in userboot to start at the top of the stack
    instead of the bottom so that there is room to grow before the
    kernel switches to its own stack.
  - Push a fake return address onto the 32-bit stack in addition to the
    arguments normally passed to exec() in the loader.  This return
    address is needed to convince recover_bootinfo() in the 32-bit
    locore code that it is being invoked from a "new" boot block.
  - Add a routine to libvmmapi to setup a 32-bit flat mode register state
    including a GDT and TSS that is able to start the i386 kernel and
    update bhyveload to use it when booting an i386 kernel.
  - Use the guest register state to determine the CPU's current instruction
    mode (32-bit vs 64-bit) and paging mode (flat, 32-bit, PAE, or long
    mode) in the instruction emulation code.  Update the gla2gpa() routine
    used when fetching instructions to handle flat mode, 32-bit paging, and
    PAE paging in addition to long mode paging.  Don't look for a REX
    prefix when the CPU is in 32-bit mode, and use the detected mode to
    enable the existing 32-bit mode code when decoding the mod r/m byte.
  
  Reviewed by:	grehan, neel
  MFC after:	1 month

Added:
  head/sys/boot/userboot/userboot/biossmap.c
     - copied, changed from r261503, head/sys/boot/userboot/userboot/bootinfo64.c
Modified:
  head/lib/libvmmapi/vmmapi.h
  head/lib/libvmmapi/vmmapi_freebsd.c
  head/sys/amd64/include/vmm.h
  head/sys/amd64/include/vmm_instruction_emul.h
  head/sys/amd64/vmm/intel/vmx.c
  head/sys/amd64/vmm/vmm.c
  head/sys/amd64/vmm/vmm_instruction_emul.c
  head/sys/boot/common/load_elf32.c
  head/sys/boot/common/load_elf32_obj.c
  head/sys/boot/userboot/userboot/Makefile
  head/sys/boot/userboot/userboot/bootinfo32.c
  head/sys/boot/userboot/userboot/bootinfo64.c
  head/sys/boot/userboot/userboot/elf32_freebsd.c
  head/sys/boot/userboot/userboot/libuserboot.h
  head/usr.sbin/bhyveload/bhyveload.c

Modified: head/lib/libvmmapi/vmmapi.h
==============================================================================
--- head/lib/libvmmapi/vmmapi.h	Wed Feb  5 02:01:08 2014	(r261503)
+++ head/lib/libvmmapi/vmmapi.h	Wed Feb  5 04:39:03 2014	(r261504)
@@ -111,5 +111,8 @@ int	vcpu_reset(struct vmctx *ctx, int vc
 int	vm_setup_freebsd_registers(struct vmctx *ctx, int vcpu,
 				uint64_t rip, uint64_t cr3, uint64_t gdtbase,
 				uint64_t rsp);
+int	vm_setup_freebsd_registers_i386(struct vmctx *vmctx, int vcpu,
+					uint32_t eip, uint32_t gdtbase,
+					uint32_t esp);
 void	vm_setup_freebsd_gdt(uint64_t *gdtr);
 #endif	/* _VMMAPI_H_ */

Modified: head/lib/libvmmapi/vmmapi_freebsd.c
==============================================================================
--- head/lib/libvmmapi/vmmapi_freebsd.c	Wed Feb  5 02:01:08 2014	(r261503)
+++ head/lib/libvmmapi/vmmapi_freebsd.c	Wed Feb  5 04:39:03 2014	(r261504)
@@ -35,14 +35,176 @@ __FBSDID("$FreeBSD$");
 #include <machine/segments.h>
 #include <machine/vmm.h>
 
+#include <errno.h>
+#include <string.h>
+
 #include "vmmapi.h"
 
+#define	I386_TSS_SIZE		104
+
+#define	DESC_PRESENT		0x00000080
+#define	DESC_LONGMODE		0x00002000
+#define	DESC_DEF32		0x00004000
+#define	DESC_GRAN		0x00008000
 #define	DESC_UNUSABLE		0x00010000
 
 #define	GUEST_NULL_SEL		0
 #define	GUEST_CODE_SEL		1
 #define	GUEST_DATA_SEL		2
-#define	GUEST_GDTR_LIMIT	(3 * 8 - 1)
+#define	GUEST_TSS_SEL		3
+#define	GUEST_GDTR_LIMIT64	(3 * 8 - 1)
+
+static struct segment_descriptor i386_gdt[] = {
+	{},						/* NULL */
+	{ .sd_lolimit = 0xffff, .sd_type = SDT_MEMER,	/* CODE */
+	  .sd_p = 1, .sd_hilimit = 0xf, .sd_def32 = 1, .sd_gran = 1 }, 
+	{ .sd_lolimit = 0xffff, .sd_type = SDT_MEMRW,	/* DATA */
+	  .sd_p = 1, .sd_hilimit = 0xf, .sd_def32 = 1, .sd_gran = 1 },
+	{ .sd_lolimit = I386_TSS_SIZE - 1,		/* TSS */
+	  .sd_type = SDT_SYS386TSS, .sd_p = 1 }
+};
+
+/*
+ * Setup the 'vcpu' register set such that it will begin execution at
+ * 'eip' in flat mode.
+ */
+int
+vm_setup_freebsd_registers_i386(struct vmctx *vmctx, int vcpu, uint32_t eip,
+				uint32_t gdtbase, uint32_t esp)
+{
+	uint64_t cr0, rflags, desc_base;
+	uint32_t desc_access, desc_limit, tssbase;
+	uint16_t gsel;
+	struct segment_descriptor *gdt;
+	int error, tmp;
+
+	/* A 32-bit guest requires unrestricted mode. */	
+	error = vm_get_capability(vmctx, vcpu, VM_CAP_UNRESTRICTED_GUEST, &tmp);
+	if (error)
+		goto done;
+	error = vm_set_capability(vmctx, vcpu, VM_CAP_UNRESTRICTED_GUEST, 1);
+	if (error)
+		goto done;
+
+	cr0 = CR0_PE | CR0_NE;
+	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR0, cr0)) != 0)
+		goto done;
+
+	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR4, 0)) != 0)
+		goto done;
+
+	/*
+	 * Forcing EFER to 0 causes bhyve to clear the "IA-32e guest
+	 * mode" entry control.
+	 */
+	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_EFER, 0)))
+		goto done;
+
+	gdt = vm_map_gpa(vmctx, gdtbase, 0x1000);
+	if (gdt == NULL)
+		return (EFAULT);
+	memcpy(gdt, i386_gdt, sizeof(i386_gdt));
+	desc_base = gdtbase;
+	desc_limit = sizeof(i386_gdt) - 1;
+	error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_GDTR,
+			    desc_base, desc_limit, 0);
+	if (error != 0)
+		goto done;
+
+	/* Place the TSS one page above the GDT. */
+	tssbase = gdtbase + 0x1000;
+	gdt[3].sd_lobase = tssbase;	
+
+	rflags = 0x2;
+	error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RFLAGS, rflags);
+	if (error)
+		goto done;
+
+	desc_base = 0;
+	desc_limit = 0xffffffff;
+	desc_access = DESC_GRAN | DESC_DEF32 | DESC_PRESENT | SDT_MEMERA;
+	error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_CS,
+			    desc_base, desc_limit, desc_access);
+
+	desc_access = DESC_GRAN | DESC_DEF32 | DESC_PRESENT | SDT_MEMRWA;
+	error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_DS,
+			    desc_base, desc_limit, desc_access);
+	if (error)
+		goto done;
+
+	error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_ES,
+			    desc_base, desc_limit, desc_access);
+	if (error)
+		goto done;
+
+	error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_FS,
+			    desc_base, desc_limit, desc_access);
+	if (error)
+		goto done;
+
+	error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_GS,
+			    desc_base, desc_limit, desc_access);
+	if (error)
+		goto done;
+
+	error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_SS,
+			    desc_base, desc_limit, desc_access);
+	if (error)
+		goto done;
+
+	desc_base = tssbase;
+	desc_limit = I386_TSS_SIZE - 1;
+	desc_access = DESC_PRESENT | SDT_SYS386BSY;
+	error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_TR,
+			    desc_base, desc_limit, desc_access);
+	if (error)
+		goto done;
+
+	
+	error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_LDTR, 0, 0,
+			    DESC_UNUSABLE);
+	if (error)
+		goto done;
+
+	gsel = GSEL(GUEST_CODE_SEL, SEL_KPL);
+	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CS, gsel)) != 0)
+		goto done;
+	
+	gsel = GSEL(GUEST_DATA_SEL, SEL_KPL);
+	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_DS, gsel)) != 0)
+		goto done;
+	
+	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_ES, gsel)) != 0)
+		goto done;
+
+	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_FS, gsel)) != 0)
+		goto done;
+	
+	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_GS, gsel)) != 0)
+		goto done;
+	
+	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_SS, gsel)) != 0)
+		goto done;
+
+	gsel = GSEL(GUEST_TSS_SEL, SEL_KPL);
+	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_TR, gsel)) != 0)
+		goto done;
+
+	/* LDTR is pointing to the null selector */
+	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_LDTR, 0)) != 0)
+		goto done;
+
+	/* entry point */
+	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RIP, eip)) != 0)
+		goto done;
+
+	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RSP, esp)) != 0)
+		goto done;
+
+	error = 0;
+done:
+	return (error);
+}
 
 void     
 vm_setup_freebsd_gdt(uint64_t *gdtr)
@@ -168,7 +330,7 @@ vm_setup_freebsd_registers(struct vmctx 
 		goto done;
 
 	desc_base = gdtbase;
-	desc_limit = GUEST_GDTR_LIMIT;
+	desc_limit = GUEST_GDTR_LIMIT64;
 	error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_GDTR,
 			    desc_base, desc_limit, 0);
 	if (error != 0)

Modified: head/sys/amd64/include/vmm.h
==============================================================================
--- head/sys/amd64/include/vmm.h	Wed Feb  5 02:01:08 2014	(r261503)
+++ head/sys/amd64/include/vmm.h	Wed Feb  5 04:39:03 2014	(r261504)
@@ -323,6 +323,8 @@ struct vm_exit {
 			uint64_t	gpa;
 			uint64_t	gla;
 			uint64_t	cr3;
+			enum vie_cpu_mode cpu_mode;
+			enum vie_paging_mode paging_mode;
 			struct vie	vie;
 		} inst_emul;
 		/*

Modified: head/sys/amd64/include/vmm_instruction_emul.h
==============================================================================
--- head/sys/amd64/include/vmm_instruction_emul.h	Wed Feb  5 02:01:08 2014	(r261503)
+++ head/sys/amd64/include/vmm_instruction_emul.h	Wed Feb  5 04:39:03 2014	(r261504)
@@ -29,6 +29,18 @@
 #ifndef	_VMM_INSTRUCTION_EMUL_H_
 #define	_VMM_INSTRUCTION_EMUL_H_
 
+enum vie_cpu_mode {
+	CPU_MODE_COMPATIBILITY,		/* IA-32E mode (CS.L = 0) */
+	CPU_MODE_64BIT,			/* IA-32E mode (CS.L = 1) */
+};
+
+enum vie_paging_mode {
+	PAGING_MODE_FLAT,
+	PAGING_MODE_32,
+	PAGING_MODE_PAE,
+	PAGING_MODE_64,
+};
+
 /*
  * The data structures 'vie' and 'vie_op' are meant to be opaque to the
  * consumers of instruction decoding. The only reason why their contents
@@ -107,7 +119,7 @@ int vmm_emulate_instruction(void *vm, in
  */
 int vmm_fetch_instruction(struct vm *vm, int cpuid,
 			  uint64_t rip, int inst_length, uint64_t cr3,
-			  struct vie *vie);
+			  enum vie_paging_mode paging_mode, struct vie *vie);
 
 void vie_init(struct vie *vie);
 
@@ -123,8 +135,8 @@ void vie_init(struct vie *vie);
  * in VIE_INVALID_GLA instead.
  */
 #define	VIE_INVALID_GLA		(1UL << 63)	/* a non-canonical address */
-int vmm_decode_instruction(struct vm *vm, int cpuid,
-			   uint64_t gla, struct vie *vie);
+int vmm_decode_instruction(struct vm *vm, int cpuid, uint64_t gla,
+			   enum vie_cpu_mode cpu_mode, struct vie *vie);
 #endif	/* _KERNEL */
 
 #endif	/* _VMM_INSTRUCTION_EMUL_H_ */

Modified: head/sys/amd64/vmm/intel/vmx.c
==============================================================================
--- head/sys/amd64/vmm/intel/vmx.c	Wed Feb  5 02:01:08 2014	(r261503)
+++ head/sys/amd64/vmm/intel/vmx.c	Wed Feb  5 04:39:03 2014	(r261504)
@@ -1338,6 +1338,30 @@ vmx_emulate_cr_access(struct vmx *vmx, i
 	return (HANDLED);
 }
 
+static enum vie_cpu_mode
+vmx_cpu_mode(void)
+{
+
+	if (vmcs_read(VMCS_GUEST_IA32_EFER) & EFER_LMA)
+		return (CPU_MODE_64BIT);
+	else
+		return (CPU_MODE_COMPATIBILITY);
+}
+
+static enum vie_paging_mode
+vmx_paging_mode(void)
+{
+
+	if (!(vmcs_read(VMCS_GUEST_CR0) & CR0_PG))
+		return (PAGING_MODE_FLAT);
+	if (!(vmcs_read(VMCS_GUEST_CR4) & CR4_PAE))
+		return (PAGING_MODE_32);
+	if (vmcs_read(VMCS_GUEST_IA32_EFER) & EFER_LME)
+		return (PAGING_MODE_64);
+	else
+		return (PAGING_MODE_PAE);
+}
+
 static int
 ept_fault_type(uint64_t ept_qual)
 {
@@ -1497,6 +1521,8 @@ vmx_handle_apic_access(struct vmx *vmx, 
 		vmexit->u.inst_emul.gpa = DEFAULT_APIC_BASE + offset;
 		vmexit->u.inst_emul.gla = VIE_INVALID_GLA;
 		vmexit->u.inst_emul.cr3 = vmcs_guest_cr3();
+		vmexit->u.inst_emul.cpu_mode = vmx_cpu_mode();
+		vmexit->u.inst_emul.paging_mode = vmx_paging_mode();
 	}
 
 	/*
@@ -1724,6 +1750,8 @@ vmx_exit_process(struct vmx *vmx, int vc
 			vmexit->u.inst_emul.gpa = gpa;
 			vmexit->u.inst_emul.gla = vmcs_gla();
 			vmexit->u.inst_emul.cr3 = vmcs_guest_cr3();
+			vmexit->u.inst_emul.cpu_mode = vmx_cpu_mode();
+			vmexit->u.inst_emul.paging_mode = vmx_paging_mode();
 		}
 		/*
 		 * If Virtual NMIs control is 1 and the VM-exit is due to an

Modified: head/sys/amd64/vmm/vmm.c
==============================================================================
--- head/sys/amd64/vmm/vmm.c	Wed Feb  5 02:01:08 2014	(r261503)
+++ head/sys/amd64/vmm/vmm.c	Wed Feb  5 04:39:03 2014	(r261504)
@@ -1056,6 +1056,8 @@ vm_handle_inst_emul(struct vm *vm, int v
 	struct vm_exit *vme;
 	int error, inst_length;
 	uint64_t rip, gla, gpa, cr3;
+	enum vie_cpu_mode cpu_mode;
+	enum vie_paging_mode paging_mode;
 	mem_region_read_t mread;
 	mem_region_write_t mwrite;
 
@@ -1068,15 +1070,18 @@ vm_handle_inst_emul(struct vm *vm, int v
 	gla = vme->u.inst_emul.gla;
 	gpa = vme->u.inst_emul.gpa;
 	cr3 = vme->u.inst_emul.cr3;
+	cpu_mode = vme->u.inst_emul.cpu_mode;
+	paging_mode = vme->u.inst_emul.paging_mode;
 	vie = &vme->u.inst_emul.vie;
 
 	vie_init(vie);
 
 	/* Fetch, decode and emulate the faulting instruction */
-	if (vmm_fetch_instruction(vm, vcpuid, rip, inst_length, cr3, vie) != 0)
+	if (vmm_fetch_instruction(vm, vcpuid, rip, inst_length, cr3,
+	    paging_mode, vie) != 0)
 		return (EFAULT);
 
-	if (vmm_decode_instruction(vm, vcpuid, gla, vie) != 0)
+	if (vmm_decode_instruction(vm, vcpuid, gla, cpu_mode, vie) != 0)
 		return (EFAULT);
 
 	/* return to userland unless this is an in-kernel emulated device */

Modified: head/sys/amd64/vmm/vmm_instruction_emul.c
==============================================================================
--- head/sys/amd64/vmm/vmm_instruction_emul.c	Wed Feb  5 02:01:08 2014	(r261503)
+++ head/sys/amd64/vmm/vmm_instruction_emul.c	Wed Feb  5 04:39:03 2014	(r261504)
@@ -49,11 +49,6 @@ __FBSDID("$FreeBSD$");
 #include <vmmapi.h>
 #endif	/* _KERNEL */
 
-enum cpu_mode {
-	CPU_MODE_COMPATIBILITY,		/* IA-32E mode (CS.L = 0) */
-	CPU_MODE_64BIT,			/* IA-32E mode (CS.L = 1) */
-};
-
 /* struct vie_op.op_type */
 enum {
 	VIE_OP_TYPE_NONE = 0,
@@ -546,16 +541,76 @@ vie_init(struct vie *vie)
 
 static int
 gla2gpa(struct vm *vm, uint64_t gla, uint64_t ptpphys,
-	uint64_t *gpa, uint64_t *gpaend)
+	uint64_t *gpa, enum vie_paging_mode paging_mode)
 {
 	int nlevels, ptpshift, ptpindex;
 	uint64_t *ptpbase, pte, pgsize;
+	uint32_t *ptpbase32, pte32;
 	void *cookie;
 
-	/*
-	 * XXX assumes 64-bit guest with 4 page walk levels
-	 */
-	nlevels = 4;
+	if (paging_mode == PAGING_MODE_FLAT) {
+		*gpa = gla;
+		return (0);
+	}
+
+	if (paging_mode == PAGING_MODE_32) {
+		nlevels = 2;
+		while (--nlevels >= 0) {
+			/* Zero out the lower 12 bits. */
+			ptpphys &= ~0xfff;
+
+			ptpbase32 = vm_gpa_hold(vm, ptpphys, PAGE_SIZE,
+						VM_PROT_READ, &cookie);
+			
+			if (ptpbase32 == NULL)
+				goto error;
+
+			ptpshift = PAGE_SHIFT + nlevels * 10;
+			ptpindex = (gla >> ptpshift) & 0x3FF;
+			pgsize = 1UL << ptpshift;
+
+			pte32 = ptpbase32[ptpindex];
+
+			vm_gpa_release(cookie);
+
+			if ((pte32 & PG_V) == 0)
+				goto error;
+
+			if (pte32 & PG_PS)
+				break;
+
+			ptpphys = pte32;
+		}
+
+		/* Zero out the lower 'ptpshift' bits */
+		pte32 >>= ptpshift; pte32 <<= ptpshift;
+		*gpa = pte32 | (gla & (pgsize - 1));
+		return (0);
+	}
+
+	if (paging_mode == PAGING_MODE_PAE) {
+		/* Zero out the lower 5 bits and the upper 12 bits */
+		ptpphys >>= 5; ptpphys <<= 17; ptpphys >>= 12;
+
+		ptpbase = vm_gpa_hold(vm, ptpphys, sizeof(*ptpbase) * 4,
+				      VM_PROT_READ, &cookie);
+		if (ptpbase == NULL)
+			goto error;
+
+		ptpindex = (gla >> 30) & 0x3;
+
+		pte = ptpbase[ptpindex];
+
+		vm_gpa_release(cookie);
+
+		if ((pte & PG_V) == 0)
+			goto error;
+
+		ptpphys = pte;
+
+		nlevels = 2;
+	} else
+		nlevels = 4;
 	while (--nlevels >= 0) {
 		/* Zero out the lower 12 bits and the upper 12 bits */
 		ptpphys >>= 12; ptpphys <<= 24; ptpphys >>= 12;
@@ -589,7 +644,6 @@ gla2gpa(struct vm *vm, uint64_t gla, uin
 	/* Zero out the lower 'ptpshift' bits and the upper 12 bits */
 	pte >>= ptpshift; pte <<= (ptpshift + 12); pte >>= 12;
 	*gpa = pte | (gla & (pgsize - 1));
-	*gpaend = pte + pgsize;
 	return (0);
 
 error:
@@ -598,10 +652,11 @@ error:
 
 int
 vmm_fetch_instruction(struct vm *vm, int cpuid, uint64_t rip, int inst_length,
-		      uint64_t cr3, struct vie *vie)
+		      uint64_t cr3, enum vie_paging_mode paging_mode,
+		      struct vie *vie)
 {
 	int n, err, prot;
-	uint64_t gpa, gpaend, off;
+	uint64_t gpa, off;
 	void *hpa, *cookie;
 
 	/*
@@ -614,7 +669,7 @@ vmm_fetch_instruction(struct vm *vm, int
 
 	/* Copy the instruction into 'vie' */
 	while (vie->num_valid < inst_length) {
-		err = gla2gpa(vm, rip, cr3, &gpa, &gpaend);
+		err = gla2gpa(vm, rip, cr3, &gpa, paging_mode);
 		if (err)
 			break;
 
@@ -717,15 +772,9 @@ decode_opcode(struct vie *vie)
 }
 
 static int
-decode_modrm(struct vie *vie)
+decode_modrm(struct vie *vie, enum vie_cpu_mode cpu_mode)
 {
 	uint8_t x;
-	enum cpu_mode cpu_mode;
-
-	/*
-	 * XXX assuming that guest is in IA-32E 64-bit mode
-	 */
-	cpu_mode = CPU_MODE_64BIT;
 
 	if (vie_peek(vie, &x))
 		return (-1);
@@ -1002,16 +1051,19 @@ verify_gla(struct vm *vm, int cpuid, uin
 }
 
 int
-vmm_decode_instruction(struct vm *vm, int cpuid, uint64_t gla, struct vie *vie)
+vmm_decode_instruction(struct vm *vm, int cpuid, uint64_t gla,
+		       enum vie_cpu_mode cpu_mode, struct vie *vie)
 {
 
-	if (decode_rex(vie))
-		return (-1);
+	if (cpu_mode == CPU_MODE_64BIT) {
+		if (decode_rex(vie))
+			return (-1);
+	}
 
 	if (decode_opcode(vie))
 		return (-1);
 
-	if (decode_modrm(vie))
+	if (decode_modrm(vie, cpu_mode))
 		return (-1);
 
 	if (decode_sib(vie))

Modified: head/sys/boot/common/load_elf32.c
==============================================================================
--- head/sys/boot/common/load_elf32.c	Wed Feb  5 02:01:08 2014	(r261503)
+++ head/sys/boot/common/load_elf32.c	Wed Feb  5 04:39:03 2014	(r261504)
@@ -2,5 +2,6 @@
 __FBSDID("$FreeBSD$");
 
 #define __ELF_WORD_SIZE 32
+#define	_MACHINE_ELF_WANT_32BIT
 
 #include "load_elf.c"

Modified: head/sys/boot/common/load_elf32_obj.c
==============================================================================
--- head/sys/boot/common/load_elf32_obj.c	Wed Feb  5 02:01:08 2014	(r261503)
+++ head/sys/boot/common/load_elf32_obj.c	Wed Feb  5 04:39:03 2014	(r261504)
@@ -2,5 +2,6 @@
 __FBSDID("$FreeBSD$");
 
 #define __ELF_WORD_SIZE 32
+#define	_MACHINE_ELF_WANT_32BIT
 
 #include "load_elf_obj.c"

Modified: head/sys/boot/userboot/userboot/Makefile
==============================================================================
--- head/sys/boot/userboot/userboot/Makefile	Wed Feb  5 02:01:08 2014	(r261503)
+++ head/sys/boot/userboot/userboot/Makefile	Wed Feb  5 04:39:03 2014	(r261504)
@@ -11,6 +11,7 @@ STRIP=
 LIBDIR=		/boot
 
 SRCS=		autoload.c
+SRCS+=		biossmap.c
 SRCS+=		bootinfo.c
 SRCS+=		bootinfo32.c
 SRCS+=		bootinfo64.c

Copied and modified: head/sys/boot/userboot/userboot/biossmap.c (from r261503, head/sys/boot/userboot/userboot/bootinfo64.c)
==============================================================================
--- head/sys/boot/userboot/userboot/bootinfo64.c	Wed Feb  5 02:01:08 2014	(r261503, copy source)
+++ head/sys/boot/userboot/userboot/biossmap.c	Wed Feb  5 04:39:03 2014	(r261504)
@@ -31,163 +31,20 @@ __FBSDID("$FreeBSD$");
 #include <sys/param.h>
 #include <sys/reboot.h>
 #include <sys/linker.h>
-#include <i386/include/bootinfo.h>
-#include <machine/cpufunc.h>
-#include <machine/psl.h>
-#include <machine/specialreg.h>
+#include <machine/pc/bios.h>
+#include <machine/metadata.h>
 
 #include "bootstrap.h"
 #include "libuserboot.h"
 
-/*
- * Copy module-related data into the load area, where it can be
- * used as a directory for loaded modules.
- *
- * Module data is presented in a self-describing format.  Each datum
- * is preceded by a 32-bit identifier and a 32-bit size field.
- *
- * Currently, the following data are saved:
- *
- * MOD_NAME	(variable)		module name (string)
- * MOD_TYPE	(variable)		module type (string)
- * MOD_ARGS	(variable)		module parameters (string)
- * MOD_ADDR	sizeof(vm_offset_t)	module load address
- * MOD_SIZE	sizeof(size_t)		module size
- * MOD_METADATA	(variable)		type-specific metadata
- */
-#define COPY32(v, a, c) {			\
-    u_int32_t	x = (v);			\
-    if (c)					\
-        CALLBACK(copyin, &x, a, sizeof(x));	\
-    a += sizeof(x);				\
-}
-
-#define MOD_STR(t, a, s, c) {			\
-    COPY32(t, a, c);				\
-    COPY32(strlen(s) + 1, a, c);		\
-    if (c)					\
-        CALLBACK(copyin, s, a, strlen(s) + 1);  \
-    a += roundup(strlen(s) + 1, sizeof(u_int64_t));\
-}
-
-#define MOD_NAME(a, s, c)	MOD_STR(MODINFO_NAME, a, s, c)
-#define MOD_TYPE(a, s, c)	MOD_STR(MODINFO_TYPE, a, s, c)
-#define MOD_ARGS(a, s, c)	MOD_STR(MODINFO_ARGS, a, s, c)
-
-#define MOD_VAR(t, a, s, c) {			\
-    COPY32(t, a, c);				\
-    COPY32(sizeof(s), a, c);			\
-    if (c)					\
-        CALLBACK(copyin, &s, a, sizeof(s));	\
-    a += roundup(sizeof(s), sizeof(u_int64_t));	\
-}
-
-#define MOD_ADDR(a, s, c)	MOD_VAR(MODINFO_ADDR, a, s, c)
-#define MOD_SIZE(a, s, c)	MOD_VAR(MODINFO_SIZE, a, s, c)
-
-#define MOD_METADATA(a, mm, c) {		\
-    COPY32(MODINFO_METADATA | mm->md_type, a, c); \
-    COPY32(mm->md_size, a, c);			\
-    if (c)					\
-        CALLBACK(copyin, mm->md_data, a, mm->md_size);    \
-    a += roundup(mm->md_size, sizeof(u_int64_t));\
-}
-
-#define MOD_END(a, c) {				\
-    COPY32(MODINFO_END, a, c);			\
-    COPY32(0, a, c);				\
-}
-
-static vm_offset_t
-bi_copymodules64(vm_offset_t addr)
-{
-    struct preloaded_file	*fp;
-    struct file_metadata	*md;
-    int				c;
-    u_int64_t			v;
-
-    c = addr != 0;
-    /* start with the first module on the list, should be the kernel */
-    for (fp = file_findfile(NULL, NULL); fp != NULL; fp = fp->f_next) {
-
-	MOD_NAME(addr, fp->f_name, c);	/* this field must come first */
-	MOD_TYPE(addr, fp->f_type, c);
-	if (fp->f_args)
-	    MOD_ARGS(addr, fp->f_args, c);
-	v = fp->f_addr;
-	MOD_ADDR(addr, v, c);
-	v = fp->f_size;
-	MOD_SIZE(addr, v, c);
-	for (md = fp->f_metadata; md != NULL; md = md->md_next)
-	    if (!(md->md_type & MODINFOMD_NOCOPY))
-		MOD_METADATA(addr, md, c);
-    }
-    MOD_END(addr, c);
-    return(addr);
-}
-
-/*
- * Check to see if this CPU supports long mode.
- */
-static int
-bi_checkcpu(void)
-{
-#if 0
-    char *cpu_vendor;
-    int vendor[3];
-    int eflags, regs[4];
-
-    /* Check for presence of "cpuid". */
-    eflags = read_eflags();
-    write_eflags(eflags ^ PSL_ID);
-    if (!((eflags ^ read_eflags()) & PSL_ID))
-	return (0);
-
-    /* Fetch the vendor string. */
-    do_cpuid(0, regs);
-    vendor[0] = regs[1];
-    vendor[1] = regs[3];
-    vendor[2] = regs[2];
-    cpu_vendor = (char *)vendor;
-
-    /* Check for vendors that support AMD features. */
-    if (strncmp(cpu_vendor, INTEL_VENDOR_ID, 12) != 0 &&
-	strncmp(cpu_vendor, AMD_VENDOR_ID, 12) != 0 &&
-	strncmp(cpu_vendor, CENTAUR_VENDOR_ID, 12) != 0)
-	return (0);
-
-    /* Has to support AMD features. */
-    do_cpuid(0x80000000, regs);
-    if (!(regs[0] >= 0x80000001))
-	return (0);
-
-    /* Check for long mode. */
-    do_cpuid(0x80000001, regs);
-    return (regs[3] & AMDID_LM);
-#else
-	return (1);
-#endif
-}
-
-struct smap {
-        uint64_t       base;
-        uint64_t       length;
-        uint32_t       type;
-} __packed;
-
-/* From FreeBSD <machine/pc/bios.h> */
-#define SMAP_TYPE_MEMORY	1
-
 #define GB (1024UL * 1024 * 1024)
 
-#define MODINFOMD_SMAP          0x1001
-
-static void
+void
 bios_addsmapdata(struct preloaded_file *kfp)
 {
 	uint64_t lowmem, highmem;
 	int smapnum, len;
-	struct smap smap[3], *sm;
+	struct bios_smap smap[3], *sm;
 
 	CALLBACK(getmem, &lowmem, &highmem);
 
@@ -212,93 +69,6 @@ bios_addsmapdata(struct preloaded_file *
 		smapnum++;
         }
 
-        len = smapnum * sizeof (struct smap);
+        len = smapnum * sizeof(struct bios_smap);
         file_addmetadata(kfp, MODINFOMD_SMAP, len, &smap[0]);
 }
-
-/*
- * Load the information expected by an amd64 kernel.
- *
- * - The 'boothowto' argument is constructed
- * - The 'bootdev' argument is constructed
- * - The 'bootinfo' struct is constructed, and copied into the kernel space.
- * - The kernel environment is copied into kernel space.
- * - Module metadata are formatted and placed in kernel space.
- */
-int
-bi_load64(char *args, vm_offset_t *modulep, vm_offset_t *kernendp)
-{
-    struct preloaded_file	*xp, *kfp;
-    struct userboot_devdesc	*rootdev;
-    struct file_metadata	*md;
-    vm_offset_t			addr;
-    u_int64_t			kernend;
-    u_int64_t			envp;
-    vm_offset_t			size;
-    char			*rootdevname;
-    int				howto;
-
-    if (!bi_checkcpu()) {
-	printf("CPU doesn't support long mode\n");
-	return (EINVAL);
-    }
-
-    howto = bi_getboothowto(args);
-
-    /* 
-     * Allow the environment variable 'rootdev' to override the supplied device 
-     * This should perhaps go to MI code and/or have $rootdev tested/set by
-     * MI code before launching the kernel.
-     */
-    rootdevname = getenv("rootdev");
-    userboot_getdev((void **)(&rootdev), rootdevname, NULL);
-    if (rootdev == NULL) {		/* bad $rootdev/$currdev */
-	printf("can't determine root device\n");
-	return(EINVAL);
-    }
-
-    /* Try reading the /etc/fstab file to select the root device */
-    getrootmount(userboot_fmtdev((void *)rootdev));
-
-    /* find the last module in the chain */
-    addr = 0;
-    for (xp = file_findfile(NULL, NULL); xp != NULL; xp = xp->f_next) {
-	if (addr < (xp->f_addr + xp->f_size))
-	    addr = xp->f_addr + xp->f_size;
-    }
-    /* pad to a page boundary */
-    addr = roundup(addr, PAGE_SIZE);
-
-    /* copy our environment */
-    envp = addr;
-    addr = bi_copyenv(addr);
-
-    /* pad to a page boundary */
-    addr = roundup(addr, PAGE_SIZE);
-
-    kfp = file_findfile(NULL, "elf kernel");
-    if (kfp == NULL)
-      kfp = file_findfile(NULL, "elf64 kernel");
-    if (kfp == NULL)
-	panic("can't find kernel file");
-    kernend = 0;	/* fill it in later */
-    file_addmetadata(kfp, MODINFOMD_HOWTO, sizeof howto, &howto);
-    file_addmetadata(kfp, MODINFOMD_ENVP, sizeof envp, &envp);
-    file_addmetadata(kfp, MODINFOMD_KERNEND, sizeof kernend, &kernend);
-    bios_addsmapdata(kfp);
-
-    /* Figure out the size and location of the metadata */
-    *modulep = addr;
-    size = bi_copymodules64(0);
-    kernend = roundup(addr + size, PAGE_SIZE);
-    *kernendp = kernend;
-
-    /* patch MODINFOMD_KERNEND */
-    md = file_findmetadata(kfp, MODINFOMD_KERNEND);
-    bcopy(&kernend, md->md_data, sizeof kernend);
-
-    /* copy module list and metadata */
-    (void)bi_copymodules64(addr);
-
-    return(0);
-}

Modified: head/sys/boot/userboot/userboot/bootinfo32.c
==============================================================================
--- head/sys/boot/userboot/userboot/bootinfo32.c	Wed Feb  5 02:01:08 2014	(r261503)
+++ head/sys/boot/userboot/userboot/bootinfo32.c	Wed Feb  5 04:39:03 2014	(r261504)
@@ -66,7 +66,7 @@ static struct bootinfo  bi;
     COPY32(strlen(s) + 1, a, c);		\
     if (c)					\
         CALLBACK(copyin, s, a, strlen(s) + 1);  \
-    a += roundup(strlen(s) + 1, sizeof(u_long));\
+    a += roundup(strlen(s) + 1, sizeof(uint32_t));\
 }
 
 #define MOD_NAME(a, s, c)	MOD_STR(MODINFO_NAME, a, s, c)
@@ -78,7 +78,7 @@ static struct bootinfo  bi;
     COPY32(sizeof(s), a, c);			\
     if (c)					\
         CALLBACK(copyin, &s, a, sizeof(s));	\
-    a += roundup(sizeof(s), sizeof(u_long));	\
+    a += roundup(sizeof(s), sizeof(uint32_t));	\
 }
 
 #define MOD_ADDR(a, s, c)	MOD_VAR(MODINFO_ADDR, a, s, c)
@@ -89,7 +89,7 @@ static struct bootinfo  bi;
     COPY32(mm->md_size, a, c);			\
     if (c)					\
         CALLBACK(copyin, mm->md_data, a, mm->md_size);    \
-    a += roundup(mm->md_size, sizeof(u_long));\
+    a += roundup(mm->md_size, sizeof(uint32_t));\
 }
 
 #define MOD_END(a, c) {				\
@@ -146,6 +146,7 @@ bi_load32(char *args, int *howtop, int *
     int				bootdevnr, howto;
     char			*kernelname;
     const char			*kernelpath;
+    uint64_t			lowmem, highmem;
 
     howto = bi_getboothowto(args);
 
@@ -198,9 +199,7 @@ bi_load32(char *args, int *howtop, int *
     file_addmetadata(kfp, MODINFOMD_HOWTO, sizeof howto, &howto);
     file_addmetadata(kfp, MODINFOMD_ENVP, sizeof envp, &envp);
     file_addmetadata(kfp, MODINFOMD_KERNEND, sizeof kernend, &kernend);
-#if 0
     bios_addsmapdata(kfp);
-#endif
 
     /* Figure out the size and location of the metadata */
     *modulep = addr;
@@ -237,11 +236,10 @@ bi_load32(char *args, int *howtop, int *
         bi.bi_bios_geom[i] = bd_getbigeom(i);
 #endif
     bi.bi_size = sizeof(bi);
+    CALLBACK(getmem, &lowmem, &highmem);
     bi.bi_memsizes_valid = 1;
-#if 0
-    bi.bi_basemem = bios_basemem / 1024;
-    bi.bi_extmem = bios_extmem / 1024;
-#endif
+    bi.bi_basemem = 640;
+    bi.bi_extmem = (lowmem - 0x100000) / 1024;
     bi.bi_envp = envp;
     bi.bi_modulep = *modulep;
     bi.bi_kernend = kernend;
@@ -251,7 +249,7 @@ bi_load32(char *args, int *howtop, int *
     /*
      * Copy the legacy bootinfo and kernel name to the guest at 0x2000
      */
-    bi.bi_kernelname = (char *) (0x2000 + sizeof(bi));
+    bi.bi_kernelname = 0x2000 + sizeof(bi);
     CALLBACK(copyin, &bi, 0x2000, sizeof(bi));
     CALLBACK(copyin, kernelname, 0x2000 + sizeof(bi), strlen(kernelname) + 1);
 

Modified: head/sys/boot/userboot/userboot/bootinfo64.c
==============================================================================
--- head/sys/boot/userboot/userboot/bootinfo64.c	Wed Feb  5 02:01:08 2014	(r261503)
+++ head/sys/boot/userboot/userboot/bootinfo64.c	Wed Feb  5 04:39:03 2014	(r261504)
@@ -169,53 +169,6 @@ bi_checkcpu(void)
 #endif
 }
 
-struct smap {
-        uint64_t       base;
-        uint64_t       length;
-        uint32_t       type;
-} __packed;
-
-/* From FreeBSD <machine/pc/bios.h> */
-#define SMAP_TYPE_MEMORY	1
-
-#define GB (1024UL * 1024 * 1024)
-
-#define MODINFOMD_SMAP          0x1001
-
-static void
-bios_addsmapdata(struct preloaded_file *kfp)
-{
-	uint64_t lowmem, highmem;
-	int smapnum, len;
-	struct smap smap[3], *sm;
-
-	CALLBACK(getmem, &lowmem, &highmem);
-
-	sm = &smap[0];
-
-	sm->base = 0;				/* base memory */
-	sm->length = 640 * 1024;
-	sm->type = SMAP_TYPE_MEMORY;
-	sm++;
-
-	sm->base = 0x100000;			/* extended memory */
-	sm->length = lowmem - 0x100000;
-	sm->type = SMAP_TYPE_MEMORY;
-	sm++;
-
-	smapnum = 2;
-
-        if (highmem != 0) {
-                sm->base = 4 * GB;
-                sm->length = highmem;
-                sm->type = SMAP_TYPE_MEMORY;
-		smapnum++;
-        }
-
-        len = smapnum * sizeof (struct smap);
-        file_addmetadata(kfp, MODINFOMD_SMAP, len, &smap[0]);
-}
-
 /*
  * Load the information expected by an amd64 kernel.
  *

Modified: head/sys/boot/userboot/userboot/elf32_freebsd.c
==============================================================================
--- head/sys/boot/userboot/userboot/elf32_freebsd.c	Wed Feb  5 02:01:08 2014	(r261503)
+++ head/sys/boot/userboot/userboot/elf32_freebsd.c	Wed Feb  5 04:39:03 2014	(r261504)
@@ -45,6 +45,9 @@ static int	elf32_obj_exec(struct preload
 struct file_format i386_elf = { elf32_loadfile, elf32_exec };
 struct file_format i386_elf_obj = { elf32_obj_loadfile, elf32_obj_exec };
 
+#define	GUEST_STACK	0x1000		/* Initial stack base */
+#define	GUEST_GDT	0x3000		/* Address of initial GDT */
+
 /*
  * There is an ELF kernel and one or more ELF modules loaded.  
  * We wish to start executing the kernel image, so make such 
@@ -57,7 +60,7 @@ elf32_exec(struct preloaded_file *fp)
 	Elf_Ehdr 		*ehdr;
 	vm_offset_t		entry, bootinfop, modulep, kernend;
 	int			boothowto, err, bootdev;
-	uint32_t		stack[1024];
+	uint32_t		stack[1024], *sp;
 
 
 	if ((md = file_findmetadata(fp, MODINFOMD_ELFHDR)) == NULL)
@@ -78,16 +81,27 @@ elf32_exec(struct preloaded_file *fp)
 	/*
 	 * Build a scratch stack at physical 0x1000
 	 */
-	stack[0] = boothowto;
-	stack[1] = bootdev;
-	stack[2] = 0;
-	stack[3] = 0;
-	stack[4] = 0;
-	stack[5] = bootinfop;
-	stack[6] = modulep;
-	stack[7] = kernend;
-	CALLBACK(copyin, stack, 0x1000, sizeof(stack));
-	CALLBACK(setreg, 4, 0x1000);
+	memset(stack, 0, sizeof(stack));
+	sp = (uint32_t *)((char *)stack + sizeof(stack));
+	*--sp = kernend;
+	*--sp = modulep;
+	*--sp = bootinfop;
+	*--sp = 0;
+	*--sp = 0;
+	*--sp = 0;
+	*--sp = bootdev;
+	*--sp = boothowto;
+
+	/*
+	 * Fake return address to mimic "new" boot blocks.  For more
+	 * details see recover_bootinfo in locore.S.
+	 */
+	*--sp = 0xbeefface;
+	CALLBACK(copyin, stack, GUEST_STACK, sizeof(stack));
+	CALLBACK(setreg, 4, (char *)sp - (char *)stack + GUEST_STACK);
+
+	CALLBACK(setgdt, GUEST_GDT, 8 * 4 - 1);
+
         CALLBACK(exec, entry);
 
 	panic("exec returned");

Modified: head/sys/boot/userboot/userboot/libuserboot.h
==============================================================================
--- head/sys/boot/userboot/userboot/libuserboot.h	Wed Feb  5 02:01:08 2014	(r261503)
+++ head/sys/boot/userboot/userboot/libuserboot.h	Wed Feb  5 04:39:03 2014	(r261504)
@@ -65,3 +65,4 @@ vm_offset_t	bi_copyenv(vm_offset_t addr)
 int	bi_load32(char *args, int *howtop, int *bootdevp, vm_offset_t *bip,
     vm_offset_t *modulep, vm_offset_t *kernend);
 int	bi_load64(char *args, vm_offset_t *modulep, vm_offset_t *kernend);
+void	bios_addsmapdata(struct preloaded_file *kfp);

Modified: head/usr.sbin/bhyveload/bhyveload.c
==============================================================================
--- head/usr.sbin/bhyveload/bhyveload.c	Wed Feb  5 02:01:08 2014	(r261503)
+++ head/usr.sbin/bhyveload/bhyveload.c	Wed Feb  5 04:39:03 2014	(r261504)
@@ -465,7 +465,12 @@ cb_exec(void *arg, uint64_t rip)
 {
 	int error;
 
-	error = vm_setup_freebsd_registers(ctx, BSP, rip, cr3, gdtbase, rsp);
+	if (cr3 == 0)
+		error = vm_setup_freebsd_registers_i386(ctx, BSP, rip, gdtbase,
+		    rsp);
+	else

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-head mailing list