svn commit: r282558 - in head: lib/libvmmapi sys/amd64/include sys/amd64/vmm usr.sbin/bhyve

Wed May 6 16:25:23 UTC 2015

Author: neel
Date: Wed May  6 16:25:20 2015
New Revision: 282558
URL: https://svnweb.freebsd.org/changeset/base/282558

Log:
  Deprecate the 3-way return values from vm_gla2gpa() and vm_copy_setup().
  
  Prior to this change both functions returned 0 for success, -1 for failure
  and +1 to indicate that an exception was injected into the guest.
  
  The numerical value of ERESTART also happens to be -1 so when these functions
  returned -1 it had to be translated to a positive errno value to prevent the
  VM_RUN ioctl from being inadvertently restarted. This made it easy to introduce
  bugs when writing emulation code.
  
  Fix this by adding an 'int *guest_fault' parameter and setting it to '1' if
  an exception was delivered to the guest. The return value is 0 or EFAULT so
  no additional translation is needed.
  
  Reviewed by:	tychon
  MFC after:	2 weeks
  Differential Revision:	https://reviews.freebsd.org/D2428

Modified:
  head/lib/libvmmapi/vmmapi.c
  head/lib/libvmmapi/vmmapi.h
  head/sys/amd64/include/vmm.h
  head/sys/amd64/include/vmm_instruction_emul.h
  head/sys/amd64/vmm/vmm.c
  head/sys/amd64/vmm/vmm_dev.c
  head/sys/amd64/vmm/vmm_instruction_emul.c
  head/usr.sbin/bhyve/inout.c
  head/usr.sbin/bhyve/task_switch.c

Modified: head/lib/libvmmapi/vmmapi.c
==============================================================================

--- head/lib/libvmmapi/vmmapi.c	Wed May  6 16:21:12 2015	(r282557)
+++ head/lib/libvmmapi/vmmapi.c	Wed May  6 16:25:20 2015	(r282558)
@@ -40,6 +40,7 @@ __FBSDID("$FreeBSD$");
 #include <machine/specialreg.h>
 #include <machine/param.h>
 
+#include <errno.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <assert.h>
@@ -958,9 +959,9 @@ vm_get_hpet_capabilities(struct vmctx *c
 	return (error);
 }
 
-static int
-gla2gpa(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
-    uint64_t gla, int prot, int *fault, uint64_t *gpa)
+int
+vm_gla2gpa(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
+    uint64_t gla, int prot, uint64_t *gpa, int *fault)
 {
 	struct vm_gla2gpa gg;
 	int error;
@@ -979,29 +980,18 @@ gla2gpa(struct vmctx *ctx, int vcpu, str
 	return (error);
 }
 
-int
-vm_gla2gpa(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
-    uint64_t gla, int prot, uint64_t *gpa)
-{
-	int error, fault;
-
-	error = gla2gpa(ctx, vcpu, paging, gla, prot, &fault, gpa);
-	if (fault)
-		error = fault;
-	return (error);
-}
-
 #ifndef min
 #define	min(a,b)	(((a) < (b)) ? (a) : (b))
 #endif
 
 int
 vm_copy_setup(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
-    uint64_t gla, size_t len, int prot, struct iovec *iov, int iovcnt)
+    uint64_t gla, size_t len, int prot, struct iovec *iov, int iovcnt,
+    int *fault)
 {
 	void *va;
 	uint64_t gpa;
-	int error, fault, i, n, off;
+	int error, i, n, off;
 
 	for (i = 0; i < iovcnt; i++) {
 		iov[i].iov_base = 0;
@@ -1010,18 +1000,16 @@ vm_copy_setup(struct vmctx *ctx, int vcp
 
 	while (len) {
 		assert(iovcnt > 0);
-		error = gla2gpa(ctx, vcpu, paging, gla, prot, &fault, &gpa);
-		if (error)
-			return (-1);
-		if (fault)
-			return (1);
+		error = vm_gla2gpa(ctx, vcpu, paging, gla, prot, &gpa, fault);
+		if (error || *fault)
+			return (error);
 
 		off = gpa & PAGE_MASK;
 		n = min(len, PAGE_SIZE - off);
 
 		va = vm_map_gpa(ctx, gpa, n);
 		if (va == NULL)
-			return (-1);
+			return (EFAULT);
 
 		iov->iov_base = va;
 		iov->iov_len = n;

Modified: head/lib/libvmmapi/vmmapi.h
==============================================================================
--- head/lib/libvmmapi/vmmapi.h	Wed May  6 16:21:12 2015	(r282557)
+++ head/lib/libvmmapi/vmmapi.h	Wed May  6 16:25:20 2015	(r282558)
@@ -64,7 +64,7 @@ int	vm_setup_memory(struct vmctx *ctx, s
 void	*vm_map_gpa(struct vmctx *ctx, vm_paddr_t gaddr, size_t len);
 int	vm_get_gpa_pmap(struct vmctx *, uint64_t gpa, uint64_t *pte, int *num);
 int	vm_gla2gpa(struct vmctx *, int vcpuid, struct vm_guest_paging *paging,
-		   uint64_t gla, int prot, uint64_t *gpa);
+		   uint64_t gla, int prot, uint64_t *gpa, int *fault);
 uint32_t vm_get_lowmem_limit(struct vmctx *ctx);
 void	vm_set_lowmem_limit(struct vmctx *ctx, uint32_t limit);
 void	vm_set_memflags(struct vmctx *ctx, int flags);
@@ -131,10 +131,15 @@ int	vm_get_hpet_capabilities(struct vmct
 /*
  * Translate the GLA range [gla,gla+len) into GPA segments in 'iov'.
  * The 'iovcnt' should be big enough to accomodate all GPA segments.
- * Returns 0 on success, 1 on a guest fault condition and -1 otherwise.
+ *
+ * retval	fault		Interpretation
+ *   0		  0		Success
+ *   0		  1		An exception was injected into the guest
+ * EFAULT	 N/A		Error
  */
 int	vm_copy_setup(struct vmctx *ctx, int vcpu, struct vm_guest_paging *pg,
-	    uint64_t gla, size_t len, int prot, struct iovec *iov, int iovcnt);
+	    uint64_t gla, size_t len, int prot, struct iovec *iov, int iovcnt,
+	    int *fault);
 void	vm_copyin(struct vmctx *ctx, int vcpu, struct iovec *guest_iov,
 	    void *host_dst, size_t len);
 void	vm_copyout(struct vmctx *ctx, int vcpu, const void *host_src,

Modified: head/sys/amd64/include/vmm.h
==============================================================================
--- head/sys/amd64/include/vmm.h	Wed May  6 16:21:12 2015	(r282557)
+++ head/sys/amd64/include/vmm.h	Wed May  6 16:25:20 2015	(r282558)
@@ -345,9 +345,10 @@ struct vm_copyinfo {
  * at 'gla' and 'len' bytes long. The 'prot' should be set to PROT_READ for
  * a copyin or PROT_WRITE for a copyout. 
  *
- * Returns 0 on success.
- * Returns 1 if an exception was injected into the guest.
- * Returns -1 otherwise.
+ * retval	is_fault	Intepretation
+ *   0		   0		Success
+ *   0		   1		An exception was injected into the guest
+ * EFAULT	  N/A		Unrecoverable error
  *
  * The 'copyinfo[]' can be passed to 'vm_copyin()' or 'vm_copyout()' only if
  * the return value is 0. The 'copyinfo[]' resources should be freed by calling
@@ -355,7 +356,7 @@ struct vm_copyinfo {
  */
 int vm_copy_setup(struct vm *vm, int vcpuid, struct vm_guest_paging *paging,
     uint64_t gla, size_t len, int prot, struct vm_copyinfo *copyinfo,
-    int num_copyinfo);
+    int num_copyinfo, int *is_fault);
 void vm_copy_teardown(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo,
     int num_copyinfo);
 void vm_copyin(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo,

Modified: head/sys/amd64/include/vmm_instruction_emul.h
==============================================================================
--- head/sys/amd64/include/vmm_instruction_emul.h	Wed May  6 16:21:12 2015	(r282557)
+++ head/sys/amd64/include/vmm_instruction_emul.h	Wed May  6 16:25:20 2015	(r282558)
@@ -81,17 +81,19 @@ int vie_calculate_gla(enum vm_cpu_mode c
  */
 int vmm_fetch_instruction(struct vm *vm, int cpuid,
 			  struct vm_guest_paging *guest_paging,
-			  uint64_t rip, int inst_length, struct vie *vie);
+			  uint64_t rip, int inst_length, struct vie *vie,
+			  int *is_fault);
 
 /*
  * Translate the guest linear address 'gla' to a guest physical address.
  *
- * Returns 0 on success and '*gpa' contains the result of the translation.
- * Returns 1 if an exception was injected into the guest.
- * Returns -1 otherwise.
+ * retval	is_fault	Interpretation
+ *   0		   0		'gpa' contains result of the translation
+ *   0		   1		An exception was injected into the guest
+ * EFAULT	  N/A		An unrecoverable hypervisor error occurred
  */
 int vm_gla2gpa(struct vm *vm, int vcpuid, struct vm_guest_paging *paging,
-    uint64_t gla, int prot, uint64_t *gpa);
+    uint64_t gla, int prot, uint64_t *gpa, int *is_fault);
 
 void vie_init(struct vie *vie, const char *inst_bytes, int inst_length);
 

Modified: head/sys/amd64/vmm/vmm.c
==============================================================================
--- head/sys/amd64/vmm/vmm.c	Wed May  6 16:21:12 2015	(r282557)
+++ head/sys/amd64/vmm/vmm.c	Wed May  6 16:25:20 2015	(r282558)
@@ -1256,7 +1256,7 @@ vm_handle_inst_emul(struct vm *vm, int v
 	mem_region_read_t mread;
 	mem_region_write_t mwrite;
 	enum vm_cpu_mode cpu_mode;
-	int cs_d, error, length;
+	int cs_d, error, fault, length;
 
 	vcpu = &vm->vcpu[vcpuid];
 	vme = &vcpu->exitinfo;
@@ -1279,19 +1279,15 @@ vm_handle_inst_emul(struct vm *vm, int v
 		 */
 		length = vme->inst_length ? vme->inst_length : VIE_INST_SIZE;
 		error = vmm_fetch_instruction(vm, vcpuid, paging, vme->rip +
-		    cs_base, length, vie);
+		    cs_base, length, vie, &fault);
 	} else {
 		/*
 		 * The instruction bytes have already been copied into 'vie'
 		 */
-		error = 0;
+		error = fault = 0;
 	}
-	if (error == 1)
-		return (0);		/* Resume guest to handle page fault */
-	else if (error == -1)
-		return (EFAULT);
-	else if (error != 0)
-		panic("%s: vmm_fetch_instruction error %d", __func__, error);
+	if (error || fault)
+		return (error);
 
 	if (vmm_decode_instruction(vm, vcpuid, gla, cpu_mode, cs_d, vie) != 0) {
 		VCPU_CTR1(vm, vcpuid, "Error decoding instruction at %#lx",
@@ -2323,7 +2319,7 @@ vm_copy_teardown(struct vm *vm, int vcpu
 int
 vm_copy_setup(struct vm *vm, int vcpuid, struct vm_guest_paging *paging,
     uint64_t gla, size_t len, int prot, struct vm_copyinfo *copyinfo,
-    int num_copyinfo)
+    int num_copyinfo, int *fault)
 {
 	int error, idx, nused;
 	size_t n, off, remaining;
@@ -2336,8 +2332,8 @@ vm_copy_setup(struct vm *vm, int vcpuid,
 	remaining = len;
 	while (remaining > 0) {
 		KASSERT(nused < num_copyinfo, ("insufficient vm_copyinfo"));
-		error = vm_gla2gpa(vm, vcpuid, paging, gla, prot, &gpa);
-		if (error)
+		error = vm_gla2gpa(vm, vcpuid, paging, gla, prot, &gpa, fault);
+		if (error || *fault)
 			return (error);
 		off = gpa & PAGE_MASK;
 		n = min(remaining, PAGE_SIZE - off);
@@ -2359,8 +2355,9 @@ vm_copy_setup(struct vm *vm, int vcpuid,
 
 	if (idx != nused) {
 		vm_copy_teardown(vm, vcpuid, copyinfo, num_copyinfo);
-		return (-1);
+		return (EFAULT);
 	} else {
+		*fault = 0;
 		return (0);
 	}
 }

Modified: head/sys/amd64/vmm/vmm_dev.c
==============================================================================
--- head/sys/amd64/vmm/vmm_dev.c	Wed May  6 16:21:12 2015	(r282557)
+++ head/sys/amd64/vmm/vmm_dev.c	Wed May  6 16:25:20 2015	(r282558)
@@ -441,19 +441,9 @@ vmmdev_ioctl(struct cdev *cdev, u_long c
 		CTASSERT(PROT_EXEC == VM_PROT_EXECUTE);
 		gg = (struct vm_gla2gpa *)data;
 		error = vm_gla2gpa(sc->vm, gg->vcpuid, &gg->paging, gg->gla,
-		    gg->prot, &gg->gpa);
-		KASSERT(error == 0 || error == 1 || error == -1,
+		    gg->prot, &gg->gpa, &gg->fault);
+		KASSERT(error == 0 || error == EFAULT,
 		    ("%s: vm_gla2gpa unknown error %d", __func__, error));
-		if (error >= 0) {
-			/*
-			 * error = 0: the translation was successful
-			 * error = 1: a fault was injected into the guest
-			 */
-			gg->fault = error;
-			error = 0;
-		} else {
-			error = EFAULT;
-		}
 		break;
 	}
 	case VM_ACTIVATE_CPU:

Modified: head/sys/amd64/vmm/vmm_instruction_emul.c
==============================================================================
--- head/sys/amd64/vmm/vmm_instruction_emul.c	Wed May  6 16:21:12 2015	(r282557)
+++ head/sys/amd64/vmm/vmm_instruction_emul.c	Wed May  6 16:25:20 2015	(r282558)
@@ -597,13 +597,11 @@ emulate_movx(void *vm, int vcpuid, uint6
 
 /*
  * Helper function to calculate and validate a linear address.
- *
- * Returns 0 on success and 1 if an exception was injected into the guest.
  */
 static int
 get_gla(void *vm, int vcpuid, struct vie *vie, struct vm_guest_paging *paging,
     int opsize, int addrsize, int prot, enum vm_reg_name seg,
-    enum vm_reg_name gpr, uint64_t *gla)
+    enum vm_reg_name gpr, uint64_t *gla, int *fault)
 {
 	struct seg_desc desc;
 	uint64_t cr0, val, rflags;
@@ -629,7 +627,7 @@ get_gla(void *vm, int vcpuid, struct vie
 			vm_inject_ss(vm, vcpuid, 0);
 		else
 			vm_inject_gp(vm, vcpuid);
-		return (1);
+		goto guest_fault;
 	}
 
 	if (vie_canonical_check(paging->cpu_mode, *gla)) {
@@ -637,14 +635,19 @@ get_gla(void *vm, int vcpuid, struct vie
 			vm_inject_ss(vm, vcpuid, 0);
 		else
 			vm_inject_gp(vm, vcpuid);
-		return (1);
+		goto guest_fault;
 	}
 
 	if (vie_alignment_check(paging->cpl, opsize, cr0, rflags, *gla)) {
 		vm_inject_ac(vm, vcpuid, 0);
-		return (1);
+		goto guest_fault;
 	}
 
+	*fault = 0;
+	return (0);
+
+guest_fault:
+	*fault = 1;
 	return (0);
 }
 
@@ -660,7 +663,7 @@ emulate_movs(void *vm, int vcpuid, uint6
 #endif
 	uint64_t dstaddr, srcaddr, dstgpa, srcgpa, val;
 	uint64_t rcx, rdi, rsi, rflags;
-	int error, opsize, seg, repeat;
+	int error, fault, opsize, seg, repeat;
 
 	opsize = (vie->op.op_byte == 0xA4) ? 1 : vie->opsize;
 	val = 0;
@@ -683,8 +686,10 @@ emulate_movs(void *vm, int vcpuid, uint6
 		 * The count register is %rcx, %ecx or %cx depending on the
 		 * address size of the instruction.
 		 */
-		if ((rcx & vie_size2mask(vie->addrsize)) == 0)
-			return (0);
+		if ((rcx & vie_size2mask(vie->addrsize)) == 0) {
+			error = 0;
+			goto done;
+		}
 	}
 
 	/*
@@ -705,13 +710,16 @@ emulate_movs(void *vm, int vcpuid, uint6
 
 	seg = vie->segment_override ? vie->segment_register : VM_REG_GUEST_DS;
 	error = get_gla(vm, vcpuid, vie, paging, opsize, vie->addrsize,
-	    PROT_READ, seg, VM_REG_GUEST_RSI, &srcaddr);
-	if (error)
+	    PROT_READ, seg, VM_REG_GUEST_RSI, &srcaddr, &fault);
+	if (error || fault)
 		goto done;
 
 	error = vm_copy_setup(vm, vcpuid, paging, srcaddr, opsize, PROT_READ,
-	    copyinfo, nitems(copyinfo));
+	    copyinfo, nitems(copyinfo), &fault);
 	if (error == 0) {
+		if (fault)
+			goto done;	/* Resume guest to handle fault */
+
 		/*
 		 * case (2): read from system memory and write to mmio.
 		 */
@@ -720,11 +728,6 @@ emulate_movs(void *vm, int vcpuid, uint6
 		error = memwrite(vm, vcpuid, gpa, val, opsize, arg);
 		if (error)
 			goto done;
-	} else if (error > 0) {
-		/*
-		 * Resume guest execution to handle fault.
-		 */
-		goto done;
 	} else {
 		/*
 		 * 'vm_copy_setup()' is expected to fail for cases (3) and (4)
@@ -732,13 +735,17 @@ emulate_movs(void *vm, int vcpuid, uint6
 		 */
 
 		error = get_gla(vm, vcpuid, vie, paging, opsize, vie->addrsize,
-		    PROT_WRITE, VM_REG_GUEST_ES, VM_REG_GUEST_RDI, &dstaddr);
-		if (error)
+		    PROT_WRITE, VM_REG_GUEST_ES, VM_REG_GUEST_RDI, &dstaddr,
+		    &fault);
+		if (error || fault)
 			goto done;
 
 		error = vm_copy_setup(vm, vcpuid, paging, dstaddr, opsize,
-		    PROT_WRITE, copyinfo, nitems(copyinfo));
+		    PROT_WRITE, copyinfo, nitems(copyinfo), &fault);
 		if (error == 0) {
+			if (fault)
+				goto done;    /* Resume guest to handle fault */
+
 			/*
 			 * case (3): read from MMIO and write to system memory.
 			 *
@@ -754,27 +761,29 @@ emulate_movs(void *vm, int vcpuid, uint6
 
 			vm_copyout(vm, vcpuid, &val, copyinfo, opsize);
 			vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo));
-		} else if (error > 0) {
-			/*
-			 * Resume guest execution to handle fault.
-			 */
-			goto done;
 		} else {
 			/*
 			 * Case (4): read from and write to mmio.
+			 *
+			 * Commit to the MMIO read/write (with potential
+			 * side-effects) only after we are sure that the
+			 * instruction is not going to be restarted due
+			 * to address translation faults.
 			 */
 			error = vm_gla2gpa(vm, vcpuid, paging, srcaddr,
-			    PROT_READ, &srcgpa);
-			if (error)
-				goto done;
-			error = memread(vm, vcpuid, srcgpa, &val, opsize, arg);
-			if (error)
+			    PROT_READ, &srcgpa, &fault);
+			if (error || fault)
 				goto done;
 
 			error = vm_gla2gpa(vm, vcpuid, paging, dstaddr,
-			   PROT_WRITE, &dstgpa);
+			   PROT_WRITE, &dstgpa, &fault);
+			if (error || fault)
+				goto done;
+
+			error = memread(vm, vcpuid, srcgpa, &val, opsize, arg);
 			if (error)
 				goto done;
+
 			error = memwrite(vm, vcpuid, dstgpa, val, opsize, arg);
 			if (error)
 				goto done;
@@ -819,10 +828,9 @@ emulate_movs(void *vm, int vcpuid, uint6
 			vm_restart_instruction(vm, vcpuid);
 	}
 done:
-	if (error < 0)
-		return (EFAULT);
-	else
-		return (0);
+	KASSERT(error == 0 || error == EFAULT, ("%s: unexpected error %d",
+	    __func__, error));
+	return (error);
 }
 
 static int
@@ -1185,7 +1193,7 @@ emulate_stack_op(void *vm, int vcpuid, u
 #endif
 	struct seg_desc ss_desc;
 	uint64_t cr0, rflags, rsp, stack_gla, val;
-	int error, size, stackaddrsize, pushop;
+	int error, fault, size, stackaddrsize, pushop;
 
 	val = 0;
 	size = vie->opsize;
@@ -1251,18 +1259,10 @@ emulate_stack_op(void *vm, int vcpuid, u
 	}
 
 	error = vm_copy_setup(vm, vcpuid, paging, stack_gla, size,
-	    pushop ? PROT_WRITE : PROT_READ, copyinfo, nitems(copyinfo));
-	if (error == -1) {
-		/*
-		 * XXX cannot return a negative error value here because it
-		 * ends up being the return value of the VM_RUN() ioctl and
-		 * is interpreted as a pseudo-error (for e.g. ERESTART).
-		 */
-		return (EFAULT);
-	} else if (error == 1) {
-		/* Resume guest execution to handle page fault */
-		return (0);
-	}
+	    pushop ? PROT_WRITE : PROT_READ, copyinfo, nitems(copyinfo),
+	    &fault);
+	if (error || fault)
+		return (error);
 
 	if (pushop) {
 		error = memread(vm, vcpuid, mmio_gpa, &val, size, arg);
@@ -1672,7 +1672,7 @@ ptp_hold(struct vm *vm, vm_paddr_t ptpph
 
 int
 vm_gla2gpa(struct vm *vm, int vcpuid, struct vm_guest_paging *paging,
-    uint64_t gla, int prot, uint64_t *gpa)
+    uint64_t gla, int prot, uint64_t *gpa, int *guest_fault)
 {
 	int nlevels, pfcode, ptpshift, ptpindex, retval, usermode, writable;
 	u_int retries;
@@ -1680,6 +1680,8 @@ vm_gla2gpa(struct vm *vm, int vcpuid, st
 	uint32_t *ptpbase32, pte32;
 	void *cookie;
 
+	*guest_fault = 0;
+
 	usermode = (paging->cpl == 3 ? 1 : 0);
 	writable = prot & VM_PROT_WRITE;
 	cookie = NULL;
@@ -1842,18 +1844,20 @@ restart:
 	*gpa = pte | (gla & (pgsize - 1));
 done:
 	ptp_release(&cookie);
+	KASSERT(retval == 0 || retval == EFAULT, ("%s: unexpected retval %d",
+	    __func__, retval));
 	return (retval);
 error:
-	retval = -1;
+	retval = EFAULT;
 	goto done;
 fault:
-	retval = 1;
+	*guest_fault = 1;
 	goto done;
 }
 
 int
 vmm_fetch_instruction(struct vm *vm, int vcpuid, struct vm_guest_paging *paging,
-    uint64_t rip, int inst_length, struct vie *vie)
+    uint64_t rip, int inst_length, struct vie *vie, int *faultptr)
 {
 	struct vm_copyinfo copyinfo[2];
 	int error, prot;
@@ -1863,13 +1867,14 @@ vmm_fetch_instruction(struct vm *vm, int
 
 	prot = PROT_READ | PROT_EXEC;
 	error = vm_copy_setup(vm, vcpuid, paging, rip, inst_length, prot,
-	    copyinfo, nitems(copyinfo));
-	if (error == 0) {
-		vm_copyin(vm, vcpuid, copyinfo, vie->inst, inst_length);
-		vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo));
-		vie->num_valid = inst_length;
-	}
-	return (error);
+	    copyinfo, nitems(copyinfo), faultptr);
+	if (error || *faultptr)
+		return (error);
+
+	vm_copyin(vm, vcpuid, copyinfo, vie->inst, inst_length);
+	vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo));
+	vie->num_valid = inst_length;
+	return (0);
 }
 
 static int

Modified: head/usr.sbin/bhyve/inout.c
==============================================================================
--- head/usr.sbin/bhyve/inout.c	Wed May  6 16:21:12 2015	(r282557)
+++ head/usr.sbin/bhyve/inout.c	Wed May  6 16:25:20 2015	(r282558)
@@ -107,7 +107,7 @@ emulate_inout(struct vmctx *ctx, int vcp
 	uint32_t eax, val;
 	inout_func_t handler;
 	void *arg;
-	int error, retval;
+	int error, fault, retval;
 	enum vm_reg_name idxreg;
 	uint64_t gla, index, iterations, count;
 	struct vm_inout_str *vis;
@@ -163,11 +163,11 @@ emulate_inout(struct vmctx *ctx, int vcp
 			}
 
 			error = vm_copy_setup(ctx, vcpu, &vis->paging, gla,
-			    bytes, prot, iov, nitems(iov));
-			if (error == -1) {
+			    bytes, prot, iov, nitems(iov), &fault);
+			if (error) {
 				retval = -1;  /* Unrecoverable error */
 				break;
-			} else if (error == 1) {
+			} else if (fault) {
 				retval = 0;  /* Resume guest to handle fault */
 				break;
 			}

Modified: head/usr.sbin/bhyve/task_switch.c
==============================================================================
--- head/usr.sbin/bhyve/task_switch.c	Wed May  6 16:21:12 2015	(r282557)
+++ head/usr.sbin/bhyve/task_switch.c	Wed May  6 16:25:20 2015	(r282558)
@@ -202,7 +202,8 @@ desc_table_limit_check(struct vmctx *ctx
  */
 static int
 desc_table_rw(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
-    uint16_t sel, struct user_segment_descriptor *desc, bool doread)
+    uint16_t sel, struct user_segment_descriptor *desc, bool doread,
+    int *faultptr)
 {
 	struct iovec iov[2];
 	uint64_t base;
@@ -215,28 +216,30 @@ desc_table_rw(struct vmctx *ctx, int vcp
 	assert(limit >= SEL_LIMIT(sel));
 
 	error = vm_copy_setup(ctx, vcpu, paging, base + SEL_START(sel),
-	    sizeof(*desc), doread ? PROT_READ : PROT_WRITE, iov, nitems(iov));
-	if (error == 0) {
-		if (doread)
-			vm_copyin(ctx, vcpu, iov, desc, sizeof(*desc));
-		else
-			vm_copyout(ctx, vcpu, desc, iov, sizeof(*desc));
-	}
-	return (error);
+	    sizeof(*desc), doread ? PROT_READ : PROT_WRITE, iov, nitems(iov),
+	    faultptr);
+	if (error || *faultptr)
+		return (error);
+
+	if (doread)
+		vm_copyin(ctx, vcpu, iov, desc, sizeof(*desc));
+	else
+		vm_copyout(ctx, vcpu, desc, iov, sizeof(*desc));
+	return (0);
 }
 
 static int
 desc_table_read(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
-    uint16_t sel, struct user_segment_descriptor *desc)
+    uint16_t sel, struct user_segment_descriptor *desc, int *faultptr)
 {
-	return (desc_table_rw(ctx, vcpu, paging, sel, desc, true));
+	return (desc_table_rw(ctx, vcpu, paging, sel, desc, true, faultptr));
 }
 
 static int
 desc_table_write(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
-    uint16_t sel, struct user_segment_descriptor *desc)
+    uint16_t sel, struct user_segment_descriptor *desc, int *faultptr)
 {
-	return (desc_table_rw(ctx, vcpu, paging, sel, desc, false));
+	return (desc_table_rw(ctx, vcpu, paging, sel, desc, false, faultptr));
 }
 
 /*
@@ -248,7 +251,7 @@ desc_table_write(struct vmctx *ctx, int 
  */
 static int
 read_tss_descriptor(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts,
-    uint16_t sel, struct user_segment_descriptor *desc)
+    uint16_t sel, struct user_segment_descriptor *desc, int *faultptr)
 {
 	struct vm_guest_paging sup_paging;
 	int error;
@@ -267,7 +270,7 @@ read_tss_descriptor(struct vmctx *ctx, i
 
 	sup_paging = ts->paging;
 	sup_paging.cpl = 0;		/* implicit supervisor mode */
-	error = desc_table_read(ctx, vcpu, &sup_paging, sel, desc);
+	error = desc_table_read(ctx, vcpu, &sup_paging, sel, desc, faultptr);
 	return (error);
 }
 
@@ -301,14 +304,10 @@ ldt_desc(int sd_type)
 
 /*
  * Validate the descriptor 'seg_desc' associated with 'segment'.
- *
- * Returns 0 on success.
- * Returns 1 if an exception was injected into the guest.
- * Returns -1 otherwise.
  */
 static int
 validate_seg_desc(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts,
-    int segment, struct seg_desc *seg_desc)
+    int segment, struct seg_desc *seg_desc, int *faultptr)
 {
 	struct vm_guest_paging sup_paging;
 	struct user_segment_descriptor usd;
@@ -369,8 +368,8 @@ validate_seg_desc(struct vmctx *ctx, int
 	/* Read the descriptor from the GDT/LDT */
 	sup_paging = ts->paging;
 	sup_paging.cpl = 0;	/* implicit supervisor mode */
-	error = desc_table_read(ctx, vcpu, &sup_paging, sel, &usd);
-	if (error)
+	error = desc_table_read(ctx, vcpu, &sup_paging, sel, &usd, faultptr);
+	if (error || *faultptr)
 		return (error);
 
 	/* Verify that the descriptor type is compatible with the segment */
@@ -476,14 +475,10 @@ update_seg_desc(struct vmctx *ctx, int v
 
 /*
  * Update the vcpu registers to reflect the state of the new task.
- *
- * Returns 0 on success.
- * Returns 1 if an exception was injected into the guest.
- * Returns -1 otherwise.
  */
 static int
 tss32_restore(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts,
-    uint16_t ot_sel, struct tss32 *tss, struct iovec *iov)
+    uint16_t ot_sel, struct tss32 *tss, struct iovec *iov, int *faultptr)
 {
 	struct seg_desc seg_desc, seg_desc2;
 	uint64_t *pdpte, maxphyaddr, reserved;
@@ -565,8 +560,9 @@ tss32_restore(struct vmctx *ctx, int vcp
 		vm_copyout(ctx, vcpu, tss, iov, sizeof(*tss));
 
 	/* Validate segment descriptors */
-	error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_LDTR, &seg_desc);
-	if (error)
+	error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_LDTR, &seg_desc,
+	    faultptr);
+	if (error || *faultptr)
 		return (error);
 	update_seg_desc(ctx, vcpu, VM_REG_GUEST_LDTR, &seg_desc);
 
@@ -579,33 +575,40 @@ tss32_restore(struct vmctx *ctx, int vcp
 	 * VM-entry checks so the guest can handle any exception injected
 	 * during task switch emulation.
 	 */
-	error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_CS, &seg_desc);
-	if (error)
+	error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_CS, &seg_desc,
+	    faultptr);
+	if (error || *faultptr)
 		return (error);
-	error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_SS, &seg_desc2);
-	if (error)
+
+	error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_SS, &seg_desc2,
+	    faultptr);
+	if (error || *faultptr)
 		return (error);
 	update_seg_desc(ctx, vcpu, VM_REG_GUEST_CS, &seg_desc);
 	update_seg_desc(ctx, vcpu, VM_REG_GUEST_SS, &seg_desc2);
 	ts->paging.cpl = tss->tss_cs & SEL_RPL_MASK;
 
-	error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_DS, &seg_desc);
-	if (error)
+	error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_DS, &seg_desc,
+	    faultptr);
+	if (error || *faultptr)
 		return (error);
 	update_seg_desc(ctx, vcpu, VM_REG_GUEST_DS, &seg_desc);
 
-	error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_ES, &seg_desc);
-	if (error)
+	error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_ES, &seg_desc,
+	    faultptr);
+	if (error || *faultptr)
 		return (error);
 	update_seg_desc(ctx, vcpu, VM_REG_GUEST_ES, &seg_desc);
 
-	error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_FS, &seg_desc);
-	if (error)
+	error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_FS, &seg_desc,
+	    faultptr);
+	if (error || *faultptr)
 		return (error);
 	update_seg_desc(ctx, vcpu, VM_REG_GUEST_FS, &seg_desc);
 
-	error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_GS, &seg_desc);
-	if (error)
+	error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_GS, &seg_desc,
+	    faultptr);
+	if (error || *faultptr)
 		return (error);
 	update_seg_desc(ctx, vcpu, VM_REG_GUEST_GS, &seg_desc);
 
@@ -616,14 +619,10 @@ tss32_restore(struct vmctx *ctx, int vcp
  * Push an error code on the stack of the new task. This is needed if the
  * task switch was triggered by a hardware exception that causes an error
  * code to be saved (e.g. #PF).
- *
- * Returns 0 on success.
- * Returns 1 if an exception was injected into the guest.
- * Returns -1 otherwise.
  */
 static int
 push_errcode(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
-    int task_type, uint32_t errcode)
+    int task_type, uint32_t errcode, int *faultptr)
 {
 	struct iovec iov[2];
 	struct seg_desc seg_desc;
@@ -632,6 +631,8 @@ push_errcode(struct vmctx *ctx, int vcpu
 	uint32_t esp;
 	uint16_t stacksel;
 
+	*faultptr = 0;
+
 	cr0 = GETREG(ctx, vcpu, VM_REG_GUEST_CR0);
 	rflags = GETREG(ctx, vcpu, VM_REG_GUEST_RFLAGS);
 	stacksel = GETREG(ctx, vcpu, VM_REG_GUEST_SS);
@@ -666,17 +667,19 @@ push_errcode(struct vmctx *ctx, int vcpu
 	if (vie_calculate_gla(paging->cpu_mode, VM_REG_GUEST_SS,
 	    &seg_desc, esp, bytes, stacksize, PROT_WRITE, &gla)) {
 		sel_exception(ctx, vcpu, IDT_SS, stacksel, 1);
-		return (1);
+		*faultptr = 1;
+		return (0);
 	}
 
 	if (vie_alignment_check(paging->cpl, bytes, cr0, rflags, gla)) {
 		vm_inject_ac(ctx, vcpu, 1);
-		return (1);
+		*faultptr = 1;
+		return (0);
 	}
 
 	error = vm_copy_setup(ctx, vcpu, paging, gla, bytes, PROT_WRITE,
-	    iov, nitems(iov));
-	if (error)
+	    iov, nitems(iov), faultptr);
+	if (error || *faultptr)
 		return (error);
 
 	vm_copyout(ctx, vcpu, &errcode, iov, bytes);
@@ -687,16 +690,13 @@ push_errcode(struct vmctx *ctx, int vcpu
 /*
  * Evaluate return value from helper functions and potentially return to
  * the VM run loop.
- *  0: success
- * +1: an exception was injected into the guest vcpu
- * -1: unrecoverable/programming error
  */
-#define	CHKERR(x)							\
+#define	CHKERR(error,fault)						\
 	do {								\
-		assert(((x) == 0) || ((x) == 1) || ((x) == -1));	\
-		if ((x) == -1)						\
+		assert((error == 0) || (error == EFAULT));		\
+		if (error)						\
 			return (VMEXIT_ABORT);				\
-		else if ((x) == 1)					\
+		else if (fault)						\
 			return (VMEXIT_CONTINUE);			\
 	} while (0)
 
@@ -711,7 +711,7 @@ vmexit_task_switch(struct vmctx *ctx, st
 	struct iovec nt_iov[2], ot_iov[2];
 	uint64_t cr0, ot_base;
 	uint32_t eip, ot_lim, access;
-	int error, ext, minlimit, nt_type, ot_type, vcpu;
+	int error, ext, fault, minlimit, nt_type, ot_type, vcpu;
 	enum task_switch_reason reason;
 	uint16_t nt_sel, ot_sel;
 
@@ -739,8 +739,9 @@ vmexit_task_switch(struct vmctx *ctx, st
 	sup_paging.cpl = 0;	/* implicit supervisor mode */
 
 	/* Fetch the new TSS descriptor */
-	error = read_tss_descriptor(ctx, vcpu, task_switch, nt_sel, &nt_desc);
-	CHKERR(error);
+	error = read_tss_descriptor(ctx, vcpu, task_switch, nt_sel, &nt_desc,
+	    &fault);
+	CHKERR(error, fault);
 
 	nt = usd_to_seg_desc(&nt_desc);
 
@@ -792,8 +793,8 @@ vmexit_task_switch(struct vmctx *ctx, st
 
 	/* Fetch the new TSS */
 	error = vm_copy_setup(ctx, vcpu, &sup_paging, nt.base, minlimit + 1,
-	    PROT_READ | PROT_WRITE, nt_iov, nitems(nt_iov));
-	CHKERR(error);
+	    PROT_READ | PROT_WRITE, nt_iov, nitems(nt_iov), &fault);
+	CHKERR(error, fault);
 	vm_copyin(ctx, vcpu, nt_iov, &newtss, minlimit + 1);
 
 	/* Get the old TSS selector from the guest's task register */
@@ -818,13 +819,14 @@ vmexit_task_switch(struct vmctx *ctx, st
 	assert(ot_type == SDT_SYS386BSY || ot_type == SDT_SYS286BSY);
 
 	/* Fetch the old TSS descriptor */
-	error = read_tss_descriptor(ctx, vcpu, task_switch, ot_sel, &ot_desc);
-	CHKERR(error);
+	error = read_tss_descriptor(ctx, vcpu, task_switch, ot_sel, &ot_desc,
+	    &fault);
+	CHKERR(error, fault);
 
 	/* Get the old TSS */
 	error = vm_copy_setup(ctx, vcpu, &sup_paging, ot_base, minlimit + 1,
-	    PROT_READ | PROT_WRITE, ot_iov, nitems(ot_iov));
-	CHKERR(error);
+	    PROT_READ | PROT_WRITE, ot_iov, nitems(ot_iov), &fault);
+	CHKERR(error, fault);
 	vm_copyin(ctx, vcpu, ot_iov, &oldtss, minlimit + 1);
 
 	/*
@@ -834,8 +836,8 @@ vmexit_task_switch(struct vmctx *ctx, st
 	if (reason == TSR_IRET || reason == TSR_JMP) {
 		ot_desc.sd_type &= ~0x2;
 		error = desc_table_write(ctx, vcpu, &sup_paging, ot_sel,
-		    &ot_desc);
-		CHKERR(error);
+		    &ot_desc, &fault);
+		CHKERR(error, fault);
 	}
 
 	if (nt_type == SDT_SYS286BSY || nt_type == SDT_SYS286TSS) {
@@ -853,8 +855,8 @@ vmexit_task_switch(struct vmctx *ctx, st
 	if (reason != TSR_IRET) {
 		nt_desc.sd_type |= 0x2;
 		error = desc_table_write(ctx, vcpu, &sup_paging, nt_sel,
-		    &nt_desc);
-		CHKERR(error);
+		    &nt_desc, &fault);
+		CHKERR(error, fault);
 	}
 
 	/* Update task register to point at the new TSS */
@@ -877,8 +879,9 @@ vmexit_task_switch(struct vmctx *ctx, st
 	assert(error == 0);
 
 	/* Load processor state from new TSS */
-	error = tss32_restore(ctx, vcpu, task_switch, ot_sel, &newtss, nt_iov);
-	CHKERR(error);
+	error = tss32_restore(ctx, vcpu, task_switch, ot_sel, &newtss, nt_iov,
+	    &fault);
+	CHKERR(error, fault);
 
 	/*
 	 * Section "Interrupt Tasks" in Intel SDM, Vol 3: if an exception
@@ -889,8 +892,8 @@ vmexit_task_switch(struct vmctx *ctx, st
 		assert(task_switch->ext);
 		assert(task_switch->reason == TSR_IDT_GATE);
 		error = push_errcode(ctx, vcpu, &task_switch->paging, nt_type,
-		    task_switch->errcode);
-		CHKERR(error);
+		    task_switch->errcode, &fault);
+		CHKERR(error, fault);
 	}
 
 	/*