svn commit: r269008 - in head: lib/libvmmapi sys/amd64/include sys/amd64/vmm usr.sbin/bhyve

Wed Jul 23 04:28:53 UTC 2014

Author: neel
Date: Wed Jul 23 04:28:51 2014
New Revision: 269008
URL: http://svnweb.freebsd.org/changeset/base/269008

Log:
  Emulate instructions emitted by OpenBSD/i386 version 5.5:
  - CMP REG, r/m
  - MOV AX/EAX/RAX, moffset
  - MOV moffset, AX/EAX/RAX
  - PUSH r/m

Modified:
  head/lib/libvmmapi/vmmapi.c
  head/lib/libvmmapi/vmmapi.h
  head/sys/amd64/include/vmm.h
  head/sys/amd64/include/vmm_instruction_emul.h
  head/sys/amd64/vmm/vmm.c
  head/sys/amd64/vmm/vmm_instruction_emul.c
  head/usr.sbin/bhyve/bhyverun.c
  head/usr.sbin/bhyve/inout.c
  head/usr.sbin/bhyve/mem.c
  head/usr.sbin/bhyve/mem.h
  head/usr.sbin/bhyve/task_switch.c

Modified: head/lib/libvmmapi/vmmapi.c
==============================================================================

--- head/lib/libvmmapi/vmmapi.c	Wed Jul 23 02:55:03 2014	(r269007)
+++ head/lib/libvmmapi/vmmapi.c	Wed Jul 23 04:28:51 2014	(r269008)
@@ -36,6 +36,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/_iovec.h>
 #include <sys/cpuset.h>
 
+#include <x86/segments.h>
 #include <machine/specialreg.h>
 #include <machine/param.h>
 
@@ -327,6 +328,16 @@ vm_get_desc(struct vmctx *ctx, int vcpu,
 }
 
 int
+vm_get_seg_desc(struct vmctx *ctx, int vcpu, int reg, struct seg_desc *seg_desc)
+{
+	int error;
+
+	error = vm_get_desc(ctx, vcpu, reg, &seg_desc->base, &seg_desc->limit,
+	    &seg_desc->access);
+	return (error);
+}
+
+int
 vm_set_register(struct vmctx *ctx, int vcpu, int reg, uint64_t val)
 {
 	int error;
@@ -988,7 +999,7 @@ gla2gpa(struct vmctx *ctx, int vcpu, str
 #endif
 
 int
-vm_gla2gpa(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
+vm_copy_setup(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
     uint64_t gla, size_t len, int prot, struct iovec *iov, int iovcnt)
 {
 	uint64_t gpa;
@@ -1135,3 +1146,30 @@ vm_set_intinfo(struct vmctx *ctx, int vc
 	error = ioctl(ctx->fd, VM_SET_INTINFO, &vmii);
 	return (error);
 }
+
+void
+vm_inject_ss(struct vmctx *ctx, int vcpu, int errcode)
+{
+	int error;
+
+	error = vm_inject_exception2(ctx, vcpu, IDT_SS, errcode);
+	assert(error == 0);
+}
+
+void
+vm_inject_ac(struct vmctx *ctx, int vcpu, int errcode)
+{
+	int error;
+
+	error = vm_inject_exception2(ctx, vcpu, IDT_AC, errcode);
+	assert(error == 0);
+}
+
+void
+vm_inject_gp(struct vmctx *ctx, int vcpu, int errcode)
+{
+	int error;
+
+	error = vm_inject_exception2(ctx, vcpu, IDT_GP, errcode);
+	assert(error == 0);
+}

Modified: head/lib/libvmmapi/vmmapi.h
==============================================================================
--- head/lib/libvmmapi/vmmapi.h	Wed Jul 23 02:55:03 2014	(r269007)
+++ head/lib/libvmmapi/vmmapi.h	Wed Jul 23 04:28:51 2014	(r269008)
@@ -66,6 +66,8 @@ int	vm_set_desc(struct vmctx *ctx, int v
 		    uint64_t base, uint32_t limit, uint32_t access);
 int	vm_get_desc(struct vmctx *ctx, int vcpu, int reg,
 		    uint64_t *base, uint32_t *limit, uint32_t *access);
+int	vm_get_seg_desc(struct vmctx *ctx, int vcpu, int reg,
+			struct seg_desc *seg_desc);
 int	vm_set_register(struct vmctx *ctx, int vcpu, int reg, uint64_t val);
 int	vm_get_register(struct vmctx *ctx, int vcpu, int reg, uint64_t *retval);
 int	vm_run(struct vmctx *ctx, int vcpu, uint64_t rip,
@@ -124,13 +126,18 @@ int	vm_get_hpet_capabilities(struct vmct
  * The 'iovcnt' should be big enough to accomodate all GPA segments.
  * Returns 0 on success, 1 on a guest fault condition and -1 otherwise.
  */
-int	vm_gla2gpa(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
+int	vm_copy_setup(struct vmctx *ctx, int vcpu, struct vm_guest_paging *pg,
 	    uint64_t gla, size_t len, int prot, struct iovec *iov, int iovcnt);
 void	vm_copyin(struct vmctx *ctx, int vcpu, struct iovec *guest_iov,
 	    void *host_dst, size_t len);
 void	vm_copyout(struct vmctx *ctx, int vcpu, const void *host_src,
 	    struct iovec *guest_iov, size_t len);
 
+/* Helper functions to inject exceptions */
+void	vm_inject_ss(struct vmctx *ctx, int vcpu, int errcode);
+void	vm_inject_ac(struct vmctx *ctx, int vcpu, int errcode);
+void	vm_inject_gp(struct vmctx *ctx, int vcpu, int errcode);
+
 /* Reset vcpu register state */
 int	vcpu_reset(struct vmctx *ctx, int vcpu);
 

Modified: head/sys/amd64/include/vmm.h
==============================================================================
--- head/sys/amd64/include/vmm.h	Wed Jul 23 02:55:03 2014	(r269007)
+++ head/sys/amd64/include/vmm.h	Wed Jul 23 04:28:51 2014	(r269008)
@@ -114,6 +114,7 @@ struct vioapic;
 struct vlapic;
 struct vmspace;
 struct vm_object;
+struct vm_guest_paging;
 struct pmap;
 
 typedef int	(*vmm_init_func_t)(int ipinum);
@@ -317,10 +318,41 @@ int vm_get_intinfo(struct vm *vm, int vc
 
 void vm_inject_gp(struct vm *vm, int vcpuid); /* general protection fault */
 void vm_inject_ud(struct vm *vm, int vcpuid); /* undefined instruction fault */
+void vm_inject_ac(struct vm *vm, int vcpuid, int errcode); /* #AC */
+void vm_inject_ss(struct vm *vm, int vcpuid, int errcode); /* #SS */
 void vm_inject_pf(struct vm *vm, int vcpuid, int error_code, uint64_t cr2);
 
 enum vm_reg_name vm_segment_name(int seg_encoding);
 
+struct vm_copyinfo {
+	uint64_t	gpa;
+	size_t		len;
+	void		*hva;
+	void		*cookie;
+};
+
+/*
+ * Set up 'copyinfo[]' to copy to/from guest linear address space starting
+ * at 'gla' and 'len' bytes long. The 'prot' should be set to PROT_READ for
+ * a copyin or PROT_WRITE for a copyout. 
+ *
+ * Returns 0 on success.
+ * Returns 1 if an exception was injected into the guest.
+ * Returns -1 otherwise.
+ *
+ * The 'copyinfo[]' can be passed to 'vm_copyin()' or 'vm_copyout()' only if
+ * the return value is 0. The 'copyinfo[]' resources should be freed by calling
+ * 'vm_copy_teardown()' after the copy is done.
+ */
+int vm_copy_setup(struct vm *vm, int vcpuid, struct vm_guest_paging *paging,
+    uint64_t gla, size_t len, int prot, struct vm_copyinfo *copyinfo,
+    int num_copyinfo);
+void vm_copy_teardown(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo,
+    int num_copyinfo);
+void vm_copyin(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo,
+    void *kaddr, size_t len);
+void vm_copyout(struct vm *vm, int vcpuid, const void *kaddr,
+    struct vm_copyinfo *copyinfo, size_t len);
 #endif	/* KERNEL */
 
 #define	VM_MAXCPU	16			/* maximum virtual cpus */

Modified: head/sys/amd64/include/vmm_instruction_emul.h
==============================================================================
--- head/sys/amd64/include/vmm_instruction_emul.h	Wed Jul 23 02:55:03 2014	(r269007)
+++ head/sys/amd64/include/vmm_instruction_emul.h	Wed Jul 23 04:28:51 2014	(r269008)
@@ -52,8 +52,8 @@ typedef int (*mem_region_write_t)(void *
  * s
  */
 int vmm_emulate_instruction(void *vm, int cpuid, uint64_t gpa, struct vie *vie,
-			    mem_region_read_t mrr, mem_region_write_t mrw,
-			    void *mrarg);
+    struct vm_guest_paging *paging, mem_region_read_t mrr,
+    mem_region_write_t mrw, void *mrarg);
 
 int vie_update_register(void *vm, int vcpuid, enum vm_reg_name reg,
     uint64_t val, int size);

Modified: head/sys/amd64/vmm/vmm.c
==============================================================================
--- head/sys/amd64/vmm/vmm.c	Wed Jul 23 02:55:03 2014	(r269007)
+++ head/sys/amd64/vmm/vmm.c	Wed Jul 23 04:28:51 2014	(r269008)
@@ -1235,8 +1235,8 @@ vm_handle_inst_emul(struct vm *vm, int v
 		return (0);
 	}
 
-	error = vmm_emulate_instruction(vm, vcpuid, gpa, vie, mread, mwrite,
-	    retu);
+	error = vmm_emulate_instruction(vm, vcpuid, gpa, vie, paging,
+	    mread, mwrite, retu);
 
 	return (error);
 }
@@ -1751,6 +1751,30 @@ vm_inject_ud(struct vm *vm, int vcpuid)
 	vm_inject_fault(vm, vcpuid, &udf);
 }
 
+void
+vm_inject_ac(struct vm *vm, int vcpuid, int error_code)
+{
+	struct vm_exception acf = {
+		.vector = IDT_AC,
+		.error_code_valid = 1,
+		.error_code = error_code
+	};
+
+	vm_inject_fault(vm, vcpuid, &acf);
+}
+
+void
+vm_inject_ss(struct vm *vm, int vcpuid, int error_code)
+{
+	struct vm_exception ssf = {
+		.vector = IDT_SS,
+		.error_code_valid = 1,
+		.error_code = error_code
+	};
+
+	vm_inject_fault(vm, vcpuid, &ssf);
+}
+
 static VMM_STAT(VCPU_NMI_COUNT, "number of NMIs delivered to vcpu");
 
 int
@@ -2182,6 +2206,97 @@ vm_segment_name(int seg)
 	return (seg_names[seg]);
 }
 
+void
+vm_copy_teardown(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo,
+    int num_copyinfo)
+{
+	int idx;
+
+	for (idx = 0; idx < num_copyinfo; idx++) {
+		if (copyinfo[idx].cookie != NULL)
+			vm_gpa_release(copyinfo[idx].cookie);
+	}
+	bzero(copyinfo, num_copyinfo * sizeof(struct vm_copyinfo));
+}
+
+int
+vm_copy_setup(struct vm *vm, int vcpuid, struct vm_guest_paging *paging,
+    uint64_t gla, size_t len, int prot, struct vm_copyinfo *copyinfo,
+    int num_copyinfo)
+{
+	int error, idx, nused;
+	size_t n, off, remaining;
+	void *hva, *cookie;
+	uint64_t gpa;
+
+	bzero(copyinfo, sizeof(struct vm_copyinfo) * num_copyinfo);
+
+	nused = 0;
+	remaining = len;
+	while (remaining > 0) {
+		KASSERT(nused < num_copyinfo, ("insufficient vm_copyinfo"));
+		error = vmm_gla2gpa(vm, vcpuid, paging, gla, prot, &gpa);
+		if (error)
+			return (error);
+		off = gpa & PAGE_MASK;
+		n = min(remaining, PAGE_SIZE - off);
+		copyinfo[nused].gpa = gpa;
+		copyinfo[nused].len = n;
+		remaining -= n;
+		gla += n;
+		nused++;
+	}
+
+	for (idx = 0; idx < nused; idx++) {
+		hva = vm_gpa_hold(vm, copyinfo[idx].gpa, copyinfo[idx].len,
+		    prot, &cookie);
+		if (hva == NULL)
+			break;
+		copyinfo[idx].hva = hva;
+		copyinfo[idx].cookie = cookie;
+	}
+
+	if (idx != nused) {
+		vm_copy_teardown(vm, vcpuid, copyinfo, num_copyinfo);
+		return (-1);
+	} else {
+		return (0);
+	}
+}
+
+void
+vm_copyin(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo, void *kaddr,
+    size_t len)
+{
+	char *dst;
+	int idx;
+	
+	dst = kaddr;
+	idx = 0;
+	while (len > 0) {
+		bcopy(copyinfo[idx].hva, dst, copyinfo[idx].len);
+		len -= copyinfo[idx].len;
+		dst += copyinfo[idx].len;
+		idx++;
+	}
+}
+
+void
+vm_copyout(struct vm *vm, int vcpuid, const void *kaddr,
+    struct vm_copyinfo *copyinfo, size_t len)
+{
+	const char *src;
+	int idx;
+
+	src = kaddr;
+	idx = 0;
+	while (len > 0) {
+		bcopy(src, copyinfo[idx].hva, copyinfo[idx].len);
+		len -= copyinfo[idx].len;
+		src += copyinfo[idx].len;
+		idx++;
+	}
+}
 
 /*
  * Return the amount of in-use and wired memory for the VM. Since

Modified: head/sys/amd64/vmm/vmm_instruction_emul.c
==============================================================================
--- head/sys/amd64/vmm/vmm_instruction_emul.c	Wed Jul 23 02:55:03 2014	(r269007)
+++ head/sys/amd64/vmm/vmm_instruction_emul.c	Wed Jul 23 04:28:51 2014	(r269008)
@@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$");
 #else	/* !_KERNEL */
 #include <sys/types.h>
 #include <sys/errno.h>
+#include <sys/_iovec.h>
 
 #include <machine/vmm.h>
 
@@ -65,6 +66,8 @@ enum {
 	VIE_OP_TYPE_AND,
 	VIE_OP_TYPE_OR,
 	VIE_OP_TYPE_TWO_BYTE,
+	VIE_OP_TYPE_PUSH,
+	VIE_OP_TYPE_CMP,
 	VIE_OP_TYPE_LAST
 };
 
@@ -72,6 +75,7 @@ enum {
 #define	VIE_OP_F_IMM		(1 << 0)  /* 16/32-bit immediate operand */
 #define	VIE_OP_F_IMM8		(1 << 1)  /* 8-bit immediate operand */
 #define	VIE_OP_F_MOFFSET	(1 << 2)  /* 16/32/64-bit immediate moffset */
+#define	VIE_OP_F_NO_MODRM	(1 << 3)
 
 static const struct vie_op two_byte_opcodes[256] = {
 	[0xB6] = {
@@ -89,6 +93,10 @@ static const struct vie_op one_byte_opco
 		.op_byte = 0x0F,
 		.op_type = VIE_OP_TYPE_TWO_BYTE
 	},
+	[0x3B] = {
+		.op_byte = 0x3B,
+		.op_type = VIE_OP_TYPE_CMP,
+	},
 	[0x88] = {
 		.op_byte = 0x88,
 		.op_type = VIE_OP_TYPE_MOV,
@@ -105,6 +113,16 @@ static const struct vie_op one_byte_opco
 		.op_byte = 0x8B,
 		.op_type = VIE_OP_TYPE_MOV,
 	},
+	[0xA1] = {
+		.op_byte = 0xA1,
+		.op_type = VIE_OP_TYPE_MOV,
+		.op_flags = VIE_OP_F_MOFFSET | VIE_OP_F_NO_MODRM,
+	},
+	[0xA3] = {
+		.op_byte = 0xA3,
+		.op_type = VIE_OP_TYPE_MOV,
+		.op_flags = VIE_OP_F_MOFFSET | VIE_OP_F_NO_MODRM,
+	},
 	[0xC6] = {
 		/* XXX Group 11 extended opcode - not just MOV */
 		.op_byte = 0xC6,
@@ -132,6 +150,11 @@ static const struct vie_op one_byte_opco
 		.op_type = VIE_OP_TYPE_OR,
 		.op_flags = VIE_OP_F_IMM8,
 	},
+	[0xFF] = {
+		/* XXX Group 5 extended opcode - not just PUSH */
+		.op_byte = 0xFF,
+		.op_type = VIE_OP_TYPE_PUSH,
+	}
 };
 
 /* struct vie.mod */
@@ -284,6 +307,53 @@ vie_update_register(void *vm, int vcpuid
 	return (error);
 }
 
+/*
+ * Return the status flags that would result from doing (x - y).
+ */
+static u_long
+getcc16(uint16_t x, uint16_t y)
+{
+	u_long rflags;
+
+	__asm __volatile("sub %1,%2; pushfq; popq %0" :
+	    "=r" (rflags) : "m" (y), "r" (x));
+	return (rflags);
+}
+
+static u_long
+getcc32(uint32_t x, uint32_t y)
+{
+	u_long rflags;
+
+	__asm __volatile("sub %1,%2; pushfq; popq %0" :
+	    "=r" (rflags) : "m" (y), "r" (x));
+	return (rflags);
+}
+
+static u_long
+getcc64(uint64_t x, uint64_t y)
+{
+	u_long rflags;
+
+	__asm __volatile("sub %1,%2; pushfq; popq %0" :
+	    "=r" (rflags) : "m" (y), "r" (x));
+	return (rflags);
+}
+
+static u_long
+getcc(int opsize, uint64_t x, uint64_t y)
+{
+	KASSERT(opsize == 2 || opsize == 4 || opsize == 8,
+	    ("getcc: invalid operand size %d", opsize));
+
+	if (opsize == 2)
+		return (getcc16(x, y));
+	else if (opsize == 4)
+		return (getcc32(x, y));
+	else
+		return (getcc64(x, y));
+}
+
 static int
 emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
 	    mem_region_read_t memread, mem_region_write_t memwrite, void *arg)
@@ -346,6 +416,32 @@ emulate_mov(void *vm, int vcpuid, uint64
 			error = vie_update_register(vm, vcpuid, reg, val, size);
 		}
 		break;
+	case 0xA1:
+		/*
+		 * MOV from seg:moffset to AX/EAX/RAX
+		 * A1:		mov AX, moffs16
+		 * A1:		mov EAX, moffs32
+		 * REX.W + A1:	mov RAX, moffs64
+		 */
+		error = memread(vm, vcpuid, gpa, &val, size, arg);
+		if (error == 0) {
+			reg = VM_REG_GUEST_RAX;
+			error = vie_update_register(vm, vcpuid, reg, val, size);
+		}
+		break;
+	case 0xA3:
+		/*
+		 * MOV from AX/EAX/RAX to seg:moffset
+		 * A3:		mov moffs16, AX
+		 * A3:		mov moffs32, EAX 
+		 * REX.W + A3:	mov moffs64, RAX
+		 */
+		error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RAX, &val);
+		if (error == 0) {
+			val &= size2mask[size];
+			error = memwrite(vm, vcpuid, gpa, val, size, arg);
+		}
+		break;
 	case 0xC6:
 		/*
 		 * MOV from imm8 to mem (ModRM:r/m)
@@ -553,10 +649,150 @@ emulate_or(void *vm, int vcpuid, uint64_
 	return (error);
 }
 
+#define	RFLAGS_STATUS_BITS    (PSL_C | PSL_PF | PSL_AF | PSL_Z | PSL_N | PSL_V)
+
+static int
+emulate_cmp(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
+	    mem_region_read_t memread, mem_region_write_t memwrite, void *arg)
+{
+	int error, size;
+	uint64_t op1, op2, rflags, rflags2;
+	enum vm_reg_name reg;
+
+	size = vie->opsize;
+	switch (vie->op.op_byte) {
+	case 0x3B:
+		/*
+		 * 3B/r		CMP r16, r/m16
+		 * 3B/r		CMP r32, r/m32
+		 * REX.W + 3B/r	CMP r64, r/m64
+		 *
+		 * Compare first operand (reg) with second operand (r/m) and
+		 * set status flags in EFLAGS register. The comparison is
+		 * performed by subtracting the second operand from the first
+		 * operand and then setting the status flags.
+		 */
+
+		/* Get the first operand */
+		reg = gpr_map[vie->reg];
+		error = vie_read_register(vm, vcpuid, reg, &op1);
+		if (error)
+			return (error);
+
+		/* Get the second operand */
+		error = memread(vm, vcpuid, gpa, &op2, size, arg);
+		if (error)
+			return (error);
+
+		break;
+	default:
+		return (EINVAL);
+	}
+	rflags2 = getcc(size, op1, op2);
+	error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
+	if (error)
+		return (error);
+	rflags &= ~RFLAGS_STATUS_BITS;
+	rflags |= rflags2 & RFLAGS_STATUS_BITS;
+
+	error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, rflags, 8);
+	return (error);
+}
+
+static int
+emulate_push(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie,
+    struct vm_guest_paging *paging, mem_region_read_t memread,
+    mem_region_write_t memwrite, void *arg)
+{
+#ifdef _KERNEL
+	struct vm_copyinfo copyinfo[2];
+#else
+	struct iovec copyinfo[2];
+#endif
+	struct seg_desc ss_desc;
+	uint64_t cr0, rflags, rsp, stack_gla, val;
+	int error, size, stackaddrsize;
+
+	/*
+	 * Table A-6, "Opcode Extensions", Intel SDM, Vol 2.
+	 *
+	 * PUSH is part of the group 5 extended opcodes and is identified
+	 * by ModRM:reg = b110.
+	 */
+	if ((vie->reg & 7) != 6)
+		return (EINVAL);
+
+	size = vie->opsize;
+	/*
+	 * From "Address-Size Attributes for Stack Accesses", Intel SDL, Vol 1
+	 */
+	if (paging->cpu_mode == CPU_MODE_REAL)
+		stackaddrsize = 2;
+	else if (paging->cpu_mode == CPU_MODE_64BIT)
+		stackaddrsize = 8;
+	else {
+		/*
+		 * In protected or compability mode the 'B' flag in the
+		 * stack-segment descriptor determines the size of the
+		 * stack pointer.
+		 */
+		error = vm_get_seg_desc(vm, vcpuid, VM_REG_GUEST_SS, &ss_desc);
+		KASSERT(error == 0, ("%s: error %d getting SS descriptor",
+		    __func__, error));
+		if (SEG_DESC_DEF32(ss_desc.access))
+			stackaddrsize = 4;
+		else
+			stackaddrsize = 2;
+	}
+
+	error = vie_read_register(vm, vcpuid, VM_REG_GUEST_CR0, &cr0);
+	KASSERT(error == 0, ("%s: error %d getting cr0", __func__, error));
+
+	error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
+	KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error));
+
+	error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RSP, &rsp);
+	KASSERT(error == 0, ("%s: error %d getting rsp", __func__, error));
+
+	rsp -= size;
+	if (vie_calculate_gla(paging->cpu_mode, VM_REG_GUEST_SS, &ss_desc,
+	    rsp, size, stackaddrsize, PROT_WRITE, &stack_gla)) {
+		vm_inject_ss(vm, vcpuid, 0);
+		return (0);
+	}
+
+	if (vie_canonical_check(paging->cpu_mode, stack_gla)) {
+		vm_inject_ss(vm, vcpuid, 0);
+		return (0);
+	}
+
+	if (vie_alignment_check(paging->cpl, size, cr0, rflags, stack_gla)) {
+		vm_inject_ac(vm, vcpuid, 0);
+		return (0);
+	}
+
+	error = vm_copy_setup(vm, vcpuid, paging, stack_gla, size, PROT_WRITE,
+	    copyinfo, nitems(copyinfo));
+	if (error)
+		return (error);
+
+	error = memread(vm, vcpuid, mmio_gpa, &val, size, arg);
+	if (error == 0) {
+		vm_copyout(vm, vcpuid, &val, copyinfo, size);
+		error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RSP, rsp,
+		    stackaddrsize);
+		KASSERT(error == 0, ("error %d updating rsp", error));
+	}
+#ifdef _KERNEL
+	vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo));
+#endif
+	return (error);
+}
+
 int
 vmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
-			mem_region_read_t memread, mem_region_write_t memwrite,
-			void *memarg)
+    struct vm_guest_paging *paging, mem_region_read_t memread,
+    mem_region_write_t memwrite, void *memarg)
 {
 	int error;
 
@@ -564,6 +800,14 @@ vmm_emulate_instruction(void *vm, int vc
 		return (EINVAL);
 
 	switch (vie->op.op_type) {
+	case VIE_OP_TYPE_PUSH:
+		error = emulate_push(vm, vcpuid, gpa, vie, paging, memread,
+		    memwrite, memarg);
+		break;
+	case VIE_OP_TYPE_CMP:
+		error = emulate_cmp(vm, vcpuid, gpa, vie,
+				    memread, memwrite, memarg);
+		break;
 	case VIE_OP_TYPE_MOV:
 		error = emulate_mov(vm, vcpuid, gpa, vie,
 				    memread, memwrite, memarg);
@@ -970,45 +1214,24 @@ fault:
 }
 
 int
-vmm_fetch_instruction(struct vm *vm, int cpuid, struct vm_guest_paging *paging,
+vmm_fetch_instruction(struct vm *vm, int vcpuid, struct vm_guest_paging *paging,
     uint64_t rip, int inst_length, struct vie *vie)
 {
-	int n, error, prot;
-	uint64_t gpa, off;
-	void *hpa, *cookie;
+	struct vm_copyinfo copyinfo[2];
+	int error, prot;
 
-	/*
-	 * XXX cache previously fetched instructions using 'rip' as the tag
-	 */
-
-	prot = VM_PROT_READ | VM_PROT_EXECUTE;
 	if (inst_length > VIE_INST_SIZE)
 		panic("vmm_fetch_instruction: invalid length %d", inst_length);
 
-	/* Copy the instruction into 'vie' */
-	while (vie->num_valid < inst_length) {
-		error = vmm_gla2gpa(vm, cpuid, paging, rip, prot, &gpa);
-		if (error)
-			return (error);
-
-		off = gpa & PAGE_MASK;
-		n = min(inst_length - vie->num_valid, PAGE_SIZE - off);
-
-		if ((hpa = vm_gpa_hold(vm, gpa, n, prot, &cookie)) == NULL)
-			break;
-
-		bcopy(hpa, &vie->inst[vie->num_valid], n);
-
-		vm_gpa_release(cookie);
-
-		rip += n;
-		vie->num_valid += n;
+	prot = PROT_READ | PROT_EXEC;
+	error = vm_copy_setup(vm, vcpuid, paging, rip, inst_length, prot,
+	    copyinfo, nitems(copyinfo));
+	if (error == 0) {
+		vm_copyin(vm, vcpuid, copyinfo, vie->inst, inst_length);
+		vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo));
+		vie->num_valid = inst_length;
 	}
-
-	if (vie->num_valid == inst_length)
-		return (0);
-	else
-		return (-1);
+	return (error);
 }
 
 static int
@@ -1138,6 +1361,9 @@ decode_modrm(struct vie *vie, enum vm_cp
 	if (cpu_mode == CPU_MODE_REAL)
 		return (-1);
 
+	if (vie->op.op_flags & VIE_OP_F_NO_MODRM)
+		return (0);
+
 	if (vie_peek(vie, &x))
 		return (-1);
 
@@ -1314,24 +1540,14 @@ decode_immediate(struct vie *vie)
 	int i, n;
 	uint8_t x;
 	union {
-		char	buf[8];
+		char	buf[4];
 		int8_t	signed8;
 		int16_t	signed16;
 		int32_t	signed32;
-		int64_t	signed64;
 	} u;
 
 	/* Figure out immediate operand size (if any) */
-	if (vie->op.op_flags & VIE_OP_F_MOFFSET) {
-		/*
-		 * Section 2.2.1.4, "Direct Memory-Offset MOVs", Intel SDM:
-		 * The memory offset size follows the address-size of the
-		 * instruction. Although this is treated as an immediate
-		 * value during instruction decoding it is interpreted as
-		 * a segment offset by the instruction emulation.
-		 */
-		vie->imm_bytes = vie->addrsize;
-	} else if (vie->op.op_flags & VIE_OP_F_IMM) {
+	if (vie->op.op_flags & VIE_OP_F_IMM) {
 		/*
 		 * Section 2.2.1.5 "Immediates", Intel SDM:
 		 * In 64-bit mode the typical size of immediate operands
@@ -1350,7 +1566,7 @@ decode_immediate(struct vie *vie)
 	if ((n = vie->imm_bytes) == 0)
 		return (0);
 
-	KASSERT(n == 1 || n == 2 || n == 4 || n == 8,
+	KASSERT(n == 1 || n == 2 || n == 4,
 	    ("%s: invalid number of immediate bytes: %d", __func__, n));
 
 	for (i = 0; i < n; i++) {
@@ -1366,20 +1582,41 @@ decode_immediate(struct vie *vie)
 		vie->immediate = u.signed8;
 	else if (n == 2)
 		vie->immediate = u.signed16;
-	else if (n == 4)
-		vie->immediate = u.signed32;
 	else
-		vie->immediate = u.signed64;
+		vie->immediate = u.signed32;
 
+	return (0);
+}
 
-	if (vie->op.op_flags & VIE_OP_F_MOFFSET) {
-		/*
-		 * If the immediate value is going to be interpreted as a
-		 * segment offset then undo the sign-extension above.
-		 */
-		vie->immediate &= size2mask[n];
-	}
+static int
+decode_moffset(struct vie *vie)
+{
+	int i, n;
+	uint8_t x;
+	union {
+		char	buf[8];
+		uint64_t u64;
+	} u;
+
+	if ((vie->op.op_flags & VIE_OP_F_MOFFSET) == 0)
+		return (0);
 
+	/*
+	 * Section 2.2.1.4, "Direct Memory-Offset MOVs", Intel SDM:
+	 * The memory offset size follows the address-size of the instruction.
+	 */
+	n = vie->addrsize;
+	KASSERT(n == 2 || n == 4 || n == 8, ("invalid moffset bytes: %d", n));
+
+	u.u64 = 0;
+	for (i = 0; i < n; i++) {
+		if (vie_peek(vie, &x))
+			return (-1);
+
+		u.buf[i] = x;
+		vie_advance(vie);
+	}
+	vie->displacement = u.u64;
 	return (0);
 }
 
@@ -1470,10 +1707,13 @@ vmm_decode_instruction(struct vm *vm, in
 
 	if (decode_displacement(vie))
 		return (-1);
-	
+
 	if (decode_immediate(vie))
 		return (-1);
 
+	if (decode_moffset(vie))
+		return (-1);
+
 	if (verify_inst_length(vie))
 		return (-1);
 

Modified: head/usr.sbin/bhyve/bhyverun.c
==============================================================================
--- head/usr.sbin/bhyve/bhyverun.c	Wed Jul 23 02:55:03 2014	(r269007)
+++ head/usr.sbin/bhyve/bhyverun.c	Wed Jul 23 04:28:51 2014	(r269008)
@@ -347,8 +347,7 @@ vmexit_rdmsr(struct vmctx *ctx, struct v
 		fprintf(stderr, "rdmsr to register %#x on vcpu %d\n",
 		    vme->u.msr.code, *pvcpu);
 		if (strictmsr) {
-			error = vm_inject_exception2(ctx, *pvcpu, IDT_GP, 0);
-			assert(error == 0);
+			vm_inject_gp(ctx, *pvcpu, 0);
 			return (VMEXIT_RESTART);
 		}
 	}
@@ -374,8 +373,7 @@ vmexit_wrmsr(struct vmctx *ctx, struct v
 		fprintf(stderr, "wrmsr to register %#x(%#lx) on vcpu %d\n",
 		    vme->u.msr.code, vme->u.msr.wval, *pvcpu);
 		if (strictmsr) {
-			error = vm_inject_exception2(ctx, *pvcpu, IDT_GP, 0);
-			assert(error == 0);
+			vm_inject_gp(ctx, *pvcpu, 0);
 			return (VMEXIT_RESTART);
 		}
 	}
@@ -484,7 +482,7 @@ vmexit_inst_emul(struct vmctx *ctx, stru
 	stats.vmexit_inst_emul++;
 
 	err = emulate_mem(ctx, *pvcpu, vmexit->u.inst_emul.gpa,
-			  &vmexit->u.inst_emul.vie);
+	    &vmexit->u.inst_emul.vie, &vmexit->u.inst_emul.paging);
 
 	if (err) {
 		if (err == EINVAL) {

Modified: head/usr.sbin/bhyve/inout.c
==============================================================================
--- head/usr.sbin/bhyve/inout.c	Wed Jul 23 02:55:03 2014	(r269007)
+++ head/usr.sbin/bhyve/inout.c	Wed Jul 23 04:28:51 2014	(r269008)
@@ -157,15 +157,13 @@ emulate_inout(struct vmctx *ctx, int vcp
 			if (vie_calculate_gla(vis->paging.cpu_mode,
 			    vis->seg_name, &vis->seg_desc, index, bytes,
 			    addrsize, prot, &gla)) {
-				error = vm_inject_exception2(ctx, vcpu,
-				    IDT_GP, 0);
-				assert(error == 0);
+				vm_inject_gp(ctx, vcpu, 0);
 				retval = INOUT_RESTART;
 				break;
 			}
 
-			error = vm_gla2gpa(ctx, vcpu, &vis->paging, gla, bytes,
-			    prot, iov, nitems(iov));
+			error = vm_copy_setup(ctx, vcpu, &vis->paging, gla,
+			    bytes, prot, iov, nitems(iov));
 			assert(error == 0 || error == 1 || error == -1);
 			if (error) {
 				retval = (error == 1) ? INOUT_RESTART :
@@ -175,9 +173,7 @@ emulate_inout(struct vmctx *ctx, int vcp
 
 			if (vie_alignment_check(vis->paging.cpl, bytes,
 			    vis->cr0, vis->rflags, gla)) {
-				error = vm_inject_exception2(ctx, vcpu,
-				    IDT_AC, 0);
-				assert(error == 0);
+				vm_inject_ac(ctx, vcpu, 0);
 				return (INOUT_RESTART);
 			}
 

Modified: head/usr.sbin/bhyve/mem.c
==============================================================================
--- head/usr.sbin/bhyve/mem.c	Wed Jul 23 02:55:03 2014	(r269007)
+++ head/usr.sbin/bhyve/mem.c	Wed Jul 23 04:28:51 2014	(r269008)
@@ -157,7 +157,9 @@ mem_write(void *ctx, int vcpu, uint64_t 
 }
 
 int
-emulate_mem(struct vmctx *ctx, int vcpu, uint64_t paddr, struct vie *vie)
+emulate_mem(struct vmctx *ctx, int vcpu, uint64_t paddr, struct vie *vie,
+    struct vm_guest_paging *paging)
+
 {
 	struct mmio_rb_range *entry;
 	int err;
@@ -184,10 +186,10 @@ emulate_mem(struct vmctx *ctx, int vcpu,
 	}
 
 	assert(entry != NULL);
-	err = vmm_emulate_instruction(ctx, vcpu, paddr, vie,
+	err = vmm_emulate_instruction(ctx, vcpu, paddr, vie, paging,
 				      mem_read, mem_write, &entry->mr_param);
 	pthread_rwlock_unlock(&mmio_rwlock);
-	
+
 	return (err);
 }
 

Modified: head/usr.sbin/bhyve/mem.h
==============================================================================
--- head/usr.sbin/bhyve/mem.h	Wed Jul 23 02:55:03 2014	(r269007)
+++ head/usr.sbin/bhyve/mem.h	Wed Jul 23 04:28:51 2014	(r269008)
@@ -50,7 +50,8 @@ struct mem_range {
 #define	MEM_F_RW		0x3
 
 void	init_mem(void);
-int     emulate_mem(struct vmctx *, int vcpu, uint64_t paddr, struct vie *vie);
+int     emulate_mem(struct vmctx *, int vcpu, uint64_t paddr, struct vie *vie,
+		    struct vm_guest_paging *paging);
 		    
 int	register_mem(struct mem_range *memp);
 int	register_mem_fallback(struct mem_range *memp);

Modified: head/usr.sbin/bhyve/task_switch.c
==============================================================================
--- head/usr.sbin/bhyve/task_switch.c	Wed Jul 23 02:55:03 2014	(r269007)
+++ head/usr.sbin/bhyve/task_switch.c	Wed Jul 23 04:28:51 2014	(r269008)
@@ -214,7 +214,7 @@ desc_table_rw(struct vmctx *ctx, int vcp
 	assert(error == 0);
 	assert(limit >= SEL_LIMIT(sel));
 
-	error = vm_gla2gpa(ctx, vcpu, paging, base + SEL_START(sel),
+	error = vm_copy_setup(ctx, vcpu, paging, base + SEL_START(sel),
 	    sizeof(*desc), doread ? PROT_READ : PROT_WRITE, iov, nitems(iov));
 	if (error == 0) {
 		if (doread)
@@ -508,9 +508,7 @@ tss32_restore(struct vmctx *ctx, int vcp
 				 */
 				reserved = ~maxphyaddr | 0x1E6;
 				if (pdpte[i] & reserved) {
-					error = vm_inject_exception2(ctx, vcpu,
-					    IDT_GP, 0);
-					assert(error == 0);
+					vm_inject_gp(ctx, vcpu, 0);
 					return (VMEXIT_RESTART);
 				}
 			}
@@ -649,12 +647,11 @@ push_errcode(struct vmctx *ctx, int vcpu
 	}
 
 	if (vie_alignment_check(paging->cpl, bytes, cr0, rflags, gla)) {
-		error = vm_inject_exception2(ctx, vcpu, IDT_AC, 1);
-		assert(error == 0);
+		vm_inject_ac(ctx, vcpu, 1);
 		return (VMEXIT_RESTART);
 	}
 
-	error = vm_gla2gpa(ctx, vcpu, paging, gla, bytes, PROT_WRITE,
+	error = vm_copy_setup(ctx, vcpu, paging, gla, bytes, PROT_WRITE,
 	    iov, nitems(iov));
 	assert(error == 0 || error == 1 || error == -1);
 	if (error) {
@@ -753,7 +750,7 @@ vmexit_task_switch(struct vmctx *ctx, st
 	}
 
 	/* Fetch the new TSS */
-	error = vm_gla2gpa(ctx, vcpu, &sup_paging, nt.base, minlimit + 1,
+	error = vm_copy_setup(ctx, vcpu, &sup_paging, nt.base, minlimit + 1,
 	    PROT_READ | PROT_WRITE, nt_iov, nitems(nt_iov));
 	if (error == 1) {
 		/* Restart vcpu execution to handle the page fault */
@@ -793,7 +790,7 @@ vmexit_task_switch(struct vmctx *ctx, st
 		return (error);
 
 	/* Get the old TSS */
-	error = vm_gla2gpa(ctx, vcpu, &sup_paging, ot_base, minlimit + 1,
+	error = vm_copy_setup(ctx, vcpu, &sup_paging, ot_base, minlimit + 1,
 	    PROT_READ | PROT_WRITE, ot_iov, nitems(ot_iov));
 	if (error == 1) {
 		/* Restart vcpu execution to handle the page fault */