svn commit: r241744 - projects/bhyve/usr.sbin/bhyve

Peter Grehan grehan at FreeBSD.org
Fri Oct 19 18:11:18 UTC 2012


Author: grehan
Date: Fri Oct 19 18:11:17 2012
New Revision: 241744
URL: http://svn.freebsd.org/changeset/base/241744

Log:
  Rework how guest MMIO regions are dealt with.
  
  - New memory region interface. An RB tree holds the regions,
  with a last-found per-vCPU cache to deal with the common case
  of repeated guest accesses to MMIO registers in the same page.
  
  - Support memory-mapped BARs in PCI emulation.
  
   mem.c/h - memory region interface
  
   instruction_emul.c/h - remove old region interface.
   Use gpa from EPT exit to avoid a tablewalk to
   determine operand address. Determine operand size
   and use when calling through to region handler.
  
   fbsdrun.c - call into region interface on paging
    exit. Distinguish between instruction emul error
    and region not found
  
   pci_emul.c/h - implement new BAR callback api.
   Split BAR alloc routine into routines that
   require/don't require the BAR phys address.
  
   ioapic.c
   pci_passthru.c
   pci_virtio_block.c
   pci_virtio_net.c
   pci_uart.c  - update to new BAR callback i/f
  
  Reviewed by:	neel
  Obtained from:	NetApp

Added:
  projects/bhyve/usr.sbin/bhyve/mem.c   (contents, props changed)
  projects/bhyve/usr.sbin/bhyve/mem.h   (contents, props changed)
Modified:
  projects/bhyve/usr.sbin/bhyve/Makefile
  projects/bhyve/usr.sbin/bhyve/fbsdrun.c
  projects/bhyve/usr.sbin/bhyve/instruction_emul.c
  projects/bhyve/usr.sbin/bhyve/instruction_emul.h
  projects/bhyve/usr.sbin/bhyve/ioapic.c
  projects/bhyve/usr.sbin/bhyve/pci_emul.c
  projects/bhyve/usr.sbin/bhyve/pci_emul.h
  projects/bhyve/usr.sbin/bhyve/pci_passthru.c
  projects/bhyve/usr.sbin/bhyve/pci_uart.c
  projects/bhyve/usr.sbin/bhyve/pci_virtio_block.c
  projects/bhyve/usr.sbin/bhyve/pci_virtio_net.c

Modified: projects/bhyve/usr.sbin/bhyve/Makefile
==============================================================================
--- projects/bhyve/usr.sbin/bhyve/Makefile	Fri Oct 19 17:45:56 2012	(r241743)
+++ projects/bhyve/usr.sbin/bhyve/Makefile	Fri Oct 19 18:11:17 2012	(r241744)
@@ -5,7 +5,7 @@
 PROG=	bhyve
 
 SRCS=	atpic.c consport.c dbgport.c elcr.c fbsdrun.c inout.c 
-SRCS+=  instruction_emul.c ioapic.c mevent.c
+SRCS+=  instruction_emul.c ioapic.c mem.c mevent.c
 SRCS+=	pci_emul.c pci_hostbridge.c pci_passthru.c pci_virtio_block.c
 SRCS+=	pci_virtio_net.c pci_uart.c pit_8254.c post.c rtc.c uart.c xmsr.c
 SRCS+=	spinup_ap.c

Modified: projects/bhyve/usr.sbin/bhyve/fbsdrun.c
==============================================================================
--- projects/bhyve/usr.sbin/bhyve/fbsdrun.c	Fri Oct 19 17:45:56 2012	(r241743)
+++ projects/bhyve/usr.sbin/bhyve/fbsdrun.c	Fri Oct 19 18:11:17 2012	(r241744)
@@ -50,6 +50,7 @@ __FBSDID("$FreeBSD$");
 #include "fbsdrun.h"
 #include "inout.h"
 #include "dbgport.h"
+#include "mem.h"
 #include "mevent.h"
 #include "pci_emul.h"
 #include "xmsr.h"
@@ -446,11 +447,21 @@ vmexit_mtrap(struct vmctx *ctx, struct v
 static int
 vmexit_paging(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
 {
-
+	int err;
 	stats.vmexit_paging++;
 
-	if (emulate_instruction(ctx, *pvcpu, vmexit->rip, vmexit->u.paging.cr3) != 0) {
-		printf("Failed to emulate instruction at 0x%lx\n", vmexit->rip);
+	err = emulate_mem(ctx, *pvcpu, vmexit->u.paging.gpa, vmexit->rip,
+			  vmexit->u.paging.cr3, vmexit->u.paging.rwx);
+
+	if (err) {
+		if (err == EINVAL) {
+			printf("Failed to emulate instruction at 0x%lx\n", 
+			       vmexit->rip);
+		} else if (err == ESRCH) {
+			printf("Unhandled memory access to 0x%lx\n",
+			       vmexit->u.paging.gpa);
+		}
+
 		return (VMEXIT_ABORT);
 	}
 

Modified: projects/bhyve/usr.sbin/bhyve/instruction_emul.c
==============================================================================
--- projects/bhyve/usr.sbin/bhyve/instruction_emul.c	Fri Oct 19 17:45:56 2012	(r241743)
+++ projects/bhyve/usr.sbin/bhyve/instruction_emul.c	Fri Oct 19 18:11:17 2012	(r241744)
@@ -28,10 +28,12 @@
 
 #include <strings.h>
 #include <unistd.h>
+#include <assert.h>
 #include <machine/vmm.h>
 #include <vmmapi.h>
 
 #include "fbsdrun.h"
+#include "mem.h"
 #include "instruction_emul.h"
 
 #define PREFIX_LOCK 		0xF0
@@ -46,6 +48,7 @@
 #define PREFIX_BRANCH_NOT_TAKEN	0x2E
 #define PREFIX_BRANCH_TAKEN	0x3E
 #define PREFIX_OPSIZE		0x66
+#define is_opsz_prefix(x)	((x) == PREFIX_OPSIZE)
 #define PREFIX_ADDRSIZE 	0x67
 
 #define OPCODE_2BYTE_ESCAPE	0x0F
@@ -95,6 +98,11 @@
 #define FROM_REG		(1<<2)
 #define TO_RM			(1<<3)
 #define TO_REG			(1<<4)
+#define ZEXT			(1<<5)
+#define FROM_8			(1<<6)
+#define FROM_16			(1<<7)
+#define TO_8			(1<<8)
+#define TO_16			(1<<9)
 
 #define REX_MASK		0xF0
 #define REX_PREFIX		0x40
@@ -118,16 +126,7 @@
 #define PML4E_OFFSET_MASK	0x0000FF8000000000
 #define PML4E_SHIFT		39
 
-#define MAX_EMULATED_REGIONS 8
-int registered_regions = 0;
-struct memory_region
-{
-	uintptr_t start;
-	uintptr_t end;
-	emulated_read_func_t memread;
-	emulated_write_func_t memwrite;
-	void *arg;
-} emulated_regions[MAX_EMULATED_REGIONS];
+#define INSTR_VERIFY
 
 struct decoded_instruction
 {
@@ -138,11 +137,12 @@ struct decoded_instruction
 	uint8_t  *displacement;
 	uint8_t *immediate;
 
-	uint8_t opcode_flags;
+	uint16_t opcode_flags;
 
 	uint8_t addressing_mode;
 	uint8_t rm;
 	uint8_t reg;
+	uint8_t opsz;
 	uint8_t rex_r;
 	uint8_t rex_w;
 	uint8_t rex_b;
@@ -170,11 +170,17 @@ static enum vm_reg_name vm_reg_name_mapp
 	[REG_R15] = VM_REG_GUEST_R15
 };
 
-uint8_t one_byte_opcodes[256] = {
-	[0x89]  = HAS_MODRM | FROM_REG | TO_RM,
+uint16_t one_byte_opcodes[256] = {
+	[0x88]  = HAS_MODRM | FROM_REG | TO_RM | TO_8 | FROM_8,
+      	[0x89]  = HAS_MODRM | FROM_REG | TO_RM,
 	[0x8B]	= HAS_MODRM | FROM_RM | TO_REG,
 };
 
+uint16_t two_byte_opcodes[256] = {
+	[0xB6]	= HAS_MODRM | FROM_RM | TO_REG | ZEXT | FROM_8,
+	[0xB7]	= HAS_MODRM | FROM_RM | TO_REG | ZEXT | FROM_16,
+};
+
 static uintptr_t 
 gla2gpa(uint64_t gla, uint64_t guest_cr3)
 {
@@ -211,7 +217,8 @@ gla2hla(uint64_t gla, uint64_t guest_cr3
 	uintptr_t gpa;
 
 	gpa = gla2gpa(gla, guest_cr3);
-	return paddr_guest2host(gpa);
+
+	return (paddr_guest2host(gpa));
 }
 
 /*
@@ -232,6 +239,9 @@ decode_prefixes(struct decoded_instructi
 		decoded->rex_x = *current_prefix & REX_X_MASK;
 		decoded->rex_b = *current_prefix & REX_B_MASK;
 		current_prefix++;
+	} else if (is_opsz_prefix(*current_prefix)) {
+		decoded->opsz = 1;
+		current_prefix++;
 	} else if (is_prefix(*current_prefix)) {
 		return (-1);
 	}
@@ -248,16 +258,26 @@ decode_prefixes(struct decoded_instructi
 static int 
 decode_opcode(struct decoded_instruction *decoded)
 {
-	uint8_t opcode, flags;
+	uint8_t opcode;
+	uint16_t flags;
+	int extra;
 
 	opcode = *decoded->opcode;
-	flags = one_byte_opcodes[opcode];
+	extra = 0;
 
+	if (opcode != 0xf)
+		flags = one_byte_opcodes[opcode];
+	else {
+		opcode = *(decoded->opcode + 1);
+		flags = two_byte_opcodes[opcode];
+		extra = 1;
+	}
+		
 	if (!flags) 
 		return (-1);
 
 	if (flags & HAS_MODRM) {
-		decoded->modrm = decoded->opcode + 1;
+		decoded->modrm = decoded->opcode + 1 + extra;
 	}
 
 	decoded->opcode_flags = flags;
@@ -381,37 +401,70 @@ decode_instruction(void *instr, struct d
 	return (0);
 }
 
-static struct memory_region * 
-find_region(uintptr_t addr)
+static enum vm_reg_name
+get_vm_reg_name(uint8_t reg)
 {
-	int i;
 
-	for (i = 0; i < registered_regions; ++i) {
-		if (emulated_regions[i].start <= addr && 
-		   emulated_regions[i].end >= addr) {
-			return &emulated_regions[i];
-		}
-	}
-
-	return (0);
+	return (vm_reg_name_mappings[reg]);
 }
 
-static enum vm_reg_name
-get_vm_reg_name(uint8_t reg)
+static uint64_t
+adjust_operand(const struct decoded_instruction *instruction, uint64_t val,
+	       int size)
 {
-	return vm_reg_name_mappings[reg];
+	uint64_t ret;
+
+	if (instruction->opcode_flags & ZEXT) {
+		switch (size) {
+		case 1:
+			ret = val & 0xff;
+			break;
+		case 2:
+			ret = val & 0xffff;
+			break;
+		case 4:
+			ret = val & 0xffffffff;
+			break;
+		case 8:
+			ret = val;
+			break;
+		default:
+			break;
+		}
+	} else {
+		/*
+		 * Extend the sign
+		 */
+		switch (size) {
+		case 1:
+			ret = (int8_t)(val & 0xff);
+			break;
+		case 2:
+			ret = (int16_t)(val & 0xffff);
+			break;
+		case 4:
+			ret = (int32_t)(val & 0xffffffff);
+			break;
+		case 8:
+			ret = val;
+			break;
+		default:
+			break;
+		}
+	}
+	
+	return (ret);
 }
 
 static int 
-get_operand(struct vmctx *vm, int vcpu, uint64_t guest_cr3,
-	    const struct decoded_instruction *instruction, uint64_t *operand)
+get_operand(struct vmctx *vm, int vcpu, uint64_t gpa, uint64_t guest_cr3,
+	    const struct decoded_instruction *instruction, uint64_t *operand,
+	    struct mem_range *mr)
 {
 	enum vm_reg_name regname;
 	uint64_t reg;
-	uintptr_t target;
 	int error;
-	uint8_t rm, addressing_mode;
-	struct memory_region *emulated_memory;
+	uint8_t rm, addressing_mode, size;
 
 	if (instruction->opcode_flags & FROM_RM) {
 		rm = instruction->rm;
@@ -422,6 +475,17 @@ get_operand(struct vmctx *vm, int vcpu, 
 	} else 
 		return (-1);
 
+	/*
+	 * Determine size of operand
+	 */
+	size = 4;
+	if (instruction->opcode_flags & FROM_8) {
+		size = 1;
+	} else if (instruction->opcode_flags & FROM_16 ||
+		   instruction->opsz) {
+		size = 2;
+	}
+
 	regname = get_vm_reg_name(rm);
 	error = vm_get_register(vm, vcpu, regname, &reg);
 	if (error) 
@@ -430,33 +494,67 @@ get_operand(struct vmctx *vm, int vcpu, 
 	switch (addressing_mode) {
 	case MOD_DIRECT:
 		*operand = reg;
-		return (0);
+		error = 0;
+		break;
 	case MOD_INDIRECT:
 	case MOD_INDIRECT_DISP8:
 	case MOD_INDIRECT_DISP32:
+#ifdef INSTR_VERIFY		
+	{
+		uintptr_t target;
+
 		target = gla2gpa(reg, guest_cr3);
 		target += instruction->disp;
-		emulated_memory = find_region(target);
-		if (emulated_memory) {
-			return emulated_memory->memread(vm, vcpu, target, 
-							4, operand, 
-							emulated_memory->arg);
-		}
-                return (-1);
+		assert(gpa == target);
+	}
+#endif
+		error = (*mr->handler)(vm, vcpu, MEM_F_READ, gpa, size,
+				       operand, mr->arg1, mr->arg2);
+		break;
 	default:
 		return (-1);
 	}
+
+	if (!error)
+		*operand = adjust_operand(instruction, *operand, size);
+
+	return (error);
+}
+
+static uint64_t
+adjust_write(uint64_t reg, uint64_t operand, int size)
+{
+	uint64_t val;
+
+	switch (size) {
+	case 1:
+		val = (reg & ~0xff) | (operand & 0xff);
+		break;
+	case 2:
+		val = (reg & ~0xffff) | (operand & 0xffff);
+		break;
+	case 4:
+		val = (reg & ~0xffffffff) | (operand & 0xffffffff);
+		break;
+	case 8:
+		val = operand;
+	default:
+		break;
+	}
+
+	return (val);
 }
 
 static int 
-perform_write(struct vmctx *vm, int vcpu, uint64_t guest_cr3,
-	      const struct decoded_instruction *instruction, uint64_t operand)
+perform_write(struct vmctx *vm, int vcpu, uint64_t gpa, uint64_t guest_cr3,
+	      const struct decoded_instruction *instruction, uint64_t operand,
+	      struct mem_range *mr)
 {
 	enum vm_reg_name regname;
 	uintptr_t target;
 	int error;
+	int size;
 	uint64_t reg;
-	struct memory_region *emulated_memory;
 	uint8_t addressing_mode;
 
 	if (instruction->opcode_flags & TO_RM) {
@@ -467,83 +565,77 @@ perform_write(struct vmctx *vm, int vcpu
 		addressing_mode = MOD_DIRECT;
 	} else
 		return (-1);
-
-	regname = get_vm_reg_name(reg);
-	error = vm_get_register(vm, vcpu, regname, &reg);
-	if (error)
-		return (error);
-
+	
+	/*
+	 * Determine the operand size. rex.w has priority
+	 */
+	size = 4;
+	if (instruction->rex_w) {
+		size = 8;
+	} else if (instruction->opcode_flags & TO_8) {
+		size = 1;
+	} else if (instruction->opsz) {
+		size = 2;
+	};
+	
 	switch(addressing_mode) {
 	case MOD_DIRECT:
-		return vm_set_register(vm, vcpu, regname, operand);
+		regname = get_vm_reg_name(reg);
+		error = vm_get_register(vm, vcpu, regname, &reg);
+		if (error)
+			return (error);
+		operand = adjust_write(reg, operand, size);
+
+		return (vm_set_register(vm, vcpu, regname, operand));
 	case MOD_INDIRECT:
 	case MOD_INDIRECT_DISP8:
 	case MOD_INDIRECT_DISP32:
+#ifdef INSTR_VERIFY
+		regname = get_vm_reg_name(reg);
+		error = vm_get_register(vm, vcpu, regname, &reg);
+		assert(!error);
 		target = gla2gpa(reg, guest_cr3);
 		target += instruction->disp;
-		emulated_memory = find_region(target);
-		if (emulated_memory) {
-			return emulated_memory->memwrite(vm, vcpu, target, 
-							 4, operand, 
-							 emulated_memory->arg);
-		}
-		return (-1);
+		assert(gpa == target);
+#endif
+		error = (*mr->handler)(vm, vcpu, MEM_F_WRITE, gpa, size,
+				       &operand, mr->arg1, mr->arg2);
+		return (error);
 	default:
 		return (-1);
 	}
 }
 
 static int 
-emulate_decoded_instruction(struct vmctx *vm, int vcpu, uint64_t cr3,
-			    const struct decoded_instruction *instruction)
+emulate_decoded_instruction(struct vmctx *vm, int vcpu, uint64_t gpa,
+			    uint64_t cr3,
+			    const struct decoded_instruction *instruction,
+			    struct mem_range *mr)
 {
 	uint64_t operand;
 	int error;
 
-	error = get_operand(vm, vcpu, cr3, instruction, &operand);
+	error = get_operand(vm, vcpu, gpa, cr3, instruction, &operand, mr);
 	if (error)
 		return (error);
 
-	return perform_write(vm, vcpu, cr3, instruction, operand);
+	return perform_write(vm, vcpu, gpa, cr3, instruction, operand, mr);
 }
 
-int 
-emulate_instruction(struct vmctx *vm, int vcpu, uint64_t rip, uint64_t cr3)
+int
+emulate_instruction(struct vmctx *vm, int vcpu, uint64_t rip, uint64_t cr3,
+		    uint64_t gpa, int flags, struct mem_range *mr)
 {
 	struct decoded_instruction instr;
 	int error;
-	void *instruction = gla2hla(rip, cr3);
-
-	if ((error = decode_instruction(instruction, &instr)) != 0)
-		return (error);
-
-	return emulate_decoded_instruction(vm, vcpu, cr3, &instr);
-}
-
-struct memory_region *
-register_emulated_memory(uintptr_t start, size_t len, emulated_read_func_t memread,
-			 emulated_write_func_t memwrite, void *arg)
-{
-	if (registered_regions >= MAX_EMULATED_REGIONS) 
-		return (NULL);
-
-	struct memory_region *region = &emulated_regions[registered_regions];
-	region->start = start;
-	region->end = start + len;
-	region->memread = memread;
-	region->memwrite = memwrite;
-	region->arg = arg;
+	void *instruction;
 
-	registered_regions++;
-	return (region);
-}
+	instruction = gla2hla(rip, cr3);
 
-void 
-move_memory_region(struct memory_region *region, uintptr_t start)
-{
-	size_t len;
+	error = decode_instruction(instruction, &instr);
+	if (!error)
+		error = emulate_decoded_instruction(vm, vcpu, gpa, cr3,
+						    &instr, mr);
 
-	len = region->end - region->start;
-	region->start = start;
-	region->end = start + len;
+	return (error);
 }

Modified: projects/bhyve/usr.sbin/bhyve/instruction_emul.h
==============================================================================
--- projects/bhyve/usr.sbin/bhyve/instruction_emul.h	Fri Oct 19 17:45:56 2012	(r241743)
+++ projects/bhyve/usr.sbin/bhyve/instruction_emul.h	Fri Oct 19 18:11:17 2012	(r241744)
@@ -29,19 +29,8 @@
 #ifndef _INSTRUCTION_EMUL_H_
 #define _INSTRUCTION_EMUL_H_
 
-struct memory_region;
-
-typedef int (*emulated_read_func_t)(struct vmctx *vm, int vcpu, uintptr_t addr, 
-				    int size, uint64_t *data, void *arg);
-typedef int (*emulated_write_func_t)(struct vmctx *vm, int vcpu, uintptr_t addr, 
-				     int size, uint64_t data, void *arg);
-
 int emulate_instruction(struct vmctx *vm, int vcpu, uint64_t rip, 
-			uint64_t cr3);
-struct memory_region *register_emulated_memory(uintptr_t start, size_t len, 
-					       emulated_read_func_t memread, 
-					       emulated_write_func_t memwrite, 
-					       void *arg);
-void move_memory_region(struct memory_region *memory_region, uintptr_t start);
+			uint64_t cr3, uint64_t gpa, int flags,
+			struct mem_range *mr);
 
 #endif

Modified: projects/bhyve/usr.sbin/bhyve/ioapic.c
==============================================================================
--- projects/bhyve/usr.sbin/bhyve/ioapic.c	Fri Oct 19 17:45:56 2012	(r241743)
+++ projects/bhyve/usr.sbin/bhyve/ioapic.c	Fri Oct 19 18:11:17 2012	(r241744)
@@ -41,6 +41,7 @@ __FBSDID("$FreeBSD$");
 #include <vmmapi.h>
 
 #include "inout.h"
+#include "mem.h"
 #include "instruction_emul.h"
 #include "fbsdrun.h"
 
@@ -67,10 +68,13 @@ struct ioapic {
 
 static struct ioapic ioapics[1];	/* only a single ioapic for now */
 
-static int ioapic_region_read(struct vmctx *vm, int vcpu, uintptr_t paddr,
-				int size, uint64_t *data, void *arg);
-static int ioapic_region_write(struct vmctx *vm, int vcpu, uintptr_t paddr,
-				int size, uint64_t data, void *arg);
+static int ioapic_region_read(struct ioapic *ioapic, uintptr_t paddr,
+				int size, uint64_t *data);
+static int ioapic_region_write(struct ioapic *ioapic, uintptr_t paddr,
+				int size, uint64_t data);
+static int ioapic_region_handler(struct vmctx *vm, int vcpu, int dir,
+				 uintptr_t paddr, int size, uint64_t *val,
+				 void *arg1, long arg2);
 
 static void
 ioapic_set_pinstate(struct vmctx *ctx, int pin, bool newstate)
@@ -139,8 +143,10 @@ ioapic_assert_pin(struct vmctx *ctx, int
 void
 ioapic_init(int which)
 {
-	int i;
+	struct mem_range memp;
 	struct ioapic *ioapic;
+	int error;
+	int i;
 
 	assert(which == 0);
 
@@ -153,14 +159,19 @@ ioapic_init(int which)
 	for (i = 0; i < REDIR_ENTRIES; i++)
 		ioapic->redtbl[i] = 0x0001000000010000UL;
 
-	/* Register emulated memory region */
 	ioapic->paddr = IOAPIC_PADDR;
-	ioapic->region = register_emulated_memory(ioapic->paddr,
-						  sizeof(struct IOAPIC),
-						  ioapic_region_read,
-						  ioapic_region_write,
-						  (void *)(uintptr_t)which);
-	assert(ioapic->region != NULL);
+
+	/* Register emulated memory region */
+	memp.name = "ioapic";
+	memp.flags = MEM_F_RW;
+	memp.handler = ioapic_region_handler;
+	memp.arg1 = ioapic;
+	memp.arg2 = which;
+	memp.base = ioapic->paddr;
+	memp.size = sizeof(struct IOAPIC);
+	error = register_mem(&memp);
+
+	assert (error == 0);
 
 	ioapic->inited = 1;
 }
@@ -237,15 +248,11 @@ ioapic_write(struct ioapic *ioapic, uint
 }
 
 static int
-ioapic_region_read(struct vmctx *vm, int vcpu, uintptr_t paddr, int size,
-		   uint64_t *data, void *arg)
+ioapic_region_read(struct ioapic *ioapic, uintptr_t paddr, int size,
+		   uint64_t *data)
 {
-	int which, offset;
-	struct ioapic *ioapic;
-
-	which = (uintptr_t)arg;
+	int offset;
 
-	ioapic = &ioapics[which];
 	offset = paddr - ioapic->paddr;
 
 	/*
@@ -255,7 +262,7 @@ ioapic_region_read(struct vmctx *vm, int
 	if (size != 4 || (offset != IOREGSEL && offset != IOWIN)) {
 #if 1
 		printf("invalid access to ioapic%d: size %d, offset %d\n",
-			which, size, offset);
+		       (int)(ioapic - ioapics), size, offset);
 #endif
 		*data = 0;
 		return (0);
@@ -270,15 +277,11 @@ ioapic_region_read(struct vmctx *vm, int
 }
 
 static int
-ioapic_region_write(struct vmctx *vm, int vcpu, uintptr_t paddr, int size,
-		    uint64_t data, void *arg)
+ioapic_region_write(struct ioapic *ioapic, uintptr_t paddr, int size,
+		    uint64_t data)
 {
-	int which, offset;
-	struct ioapic *ioapic;
-
-	which = (uintptr_t)arg;
+	int offset;
 
-	ioapic = &ioapics[which];
 	offset = paddr - ioapic->paddr;
 
 	/*
@@ -288,7 +291,7 @@ ioapic_region_write(struct vmctx *vm, in
 	if (size != 4 || (offset != IOREGSEL && offset != IOWIN)) {
 #if 1
 		printf("invalid access to ioapic%d: size %d, offset %d\n",
-			which, size, offset);
+		       (int)(ioapic - ioapics), size, offset);
 #endif
 		return (0);
 	}
@@ -300,3 +303,23 @@ ioapic_region_write(struct vmctx *vm, in
 
 	return (0);
 }
+
+static int
+ioapic_region_handler(struct vmctx *vm, int vcpu, int dir, uintptr_t paddr,
+		      int size, uint64_t *val, void *arg1, long arg2)
+{
+	struct ioapic *ioapic;
+	int which;
+
+	ioapic = arg1;
+	which = arg2;
+
+	assert(ioapic == &ioapics[which]);
+
+	if (dir == MEM_F_READ)
+		ioapic_region_read(ioapic, paddr, size, val);
+	else
+		ioapic_region_write(ioapic, paddr, size, *val);
+
+	return (0);
+}

Added: projects/bhyve/usr.sbin/bhyve/mem.c
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ projects/bhyve/usr.sbin/bhyve/mem.c	Fri Oct 19 18:11:17 2012	(r241744)
@@ -0,0 +1,196 @@
+/*-
+ * Copyright (c) 2012 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/*
+ * Memory ranges are represented with an RB tree. On insertion, the range
+ * is checked for overlaps. On lookup, the key has the same base and limit
+ * so it can be searched within the range.
+ *
+ * It is assumed that all setup of ranges takes place in single-threaded
+ * mode before vCPUs have been started. As such, no locks are used on the
+ * RB tree. If this is no longer the case, then a r/w lock could be used,
+ * with readers on the lookup and a writer if the tree needs to be changed
+ * (and per vCPU caches flushed)
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/tree.h>
+#include <sys/errno.h>
+#include <machine/vmm.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+
+#include "mem.h"
+#include "instruction_emul.h"
+
+struct mmio_rb_range {
+	RB_ENTRY(mmio_rb_range)	mr_link;	/* RB tree links */
+	struct mem_range	mr_param;
+	uint64_t                mr_base;
+	uint64_t                mr_end;
+};
+
+struct mmio_rb_tree;
+RB_PROTOTYPE(mmio_rb_tree, mmio_rb_range, mr_link, mmio_rb_range_compare);
+
+RB_HEAD(mmio_rb_tree, mmio_rb_range) mmio_rbroot;
+
+/*
+ * Per-vCPU cache. Since most accesses from a vCPU will be to
+ * consecutive addresses in a range, it makes sense to cache the
+ * result of a lookup.
+ */
+static struct mmio_rb_range	*mmio_hint[VM_MAXCPU];
+
+static int
+mmio_rb_range_compare(struct mmio_rb_range *a, struct mmio_rb_range *b)
+{
+	if (a->mr_end < b->mr_base)
+		return (-1);
+	else if (a->mr_base > b->mr_end)
+		return (1);
+	return (0);
+}
+
+static int
+mmio_rb_lookup(uint64_t addr, struct mmio_rb_range **entry)
+{
+	struct mmio_rb_range find, *res;
+
+	find.mr_base = find.mr_end = addr;
+
+	res = RB_FIND(mmio_rb_tree, &mmio_rbroot, &find);
+
+	if (res != NULL) {
+		*entry = res;
+		return (0);
+	}
+	
+	return (ENOENT);
+}
+
+static int
+mmio_rb_add(struct mmio_rb_range *new)
+{
+	struct mmio_rb_range *overlap;
+
+	overlap = RB_INSERT(mmio_rb_tree, &mmio_rbroot, new);
+
+	if (overlap != NULL) {
+#ifdef RB_DEBUG
+		printf("overlap detected: new %lx:%lx, tree %lx:%lx\n",
+		       new->mr_base, new->mr_end,
+		       overlap->mr_base, overlap->mr_end);
+#endif
+
+		return (EEXIST);
+	}
+
+	return (0);
+}
+
+#if 0
+static void
+mmio_rb_dump(void)
+{
+	struct mmio_rb_range *np;
+
+	RB_FOREACH(np, mmio_rb_tree, &mmio_rbroot) {
+		printf(" %lx:%lx, %s\n", np->mr_base, np->mr_end,
+		       np->mr_param.name);
+	}
+}
+#endif
+
+RB_GENERATE(mmio_rb_tree, mmio_rb_range, mr_link, mmio_rb_range_compare);
+
+int
+emulate_mem(struct vmctx *ctx, int vcpu, uint64_t paddr, uint64_t rip,
+	    uint64_t cr3, int mode)
+{
+	struct mmio_rb_range *entry;
+	int err;
+
+	err = 0;
+
+	/*
+	 * First check the per-vCPU cache
+	 */
+	if (mmio_hint[vcpu] &&
+	    paddr >= mmio_hint[vcpu]->mr_base &&
+	    paddr <= mmio_hint[vcpu]->mr_end) {
+		err = emulate_instruction(ctx, vcpu, rip, cr3, paddr, mode,
+					  &mmio_hint[vcpu]->mr_param);
+	} else {
+		if (mmio_rb_lookup(paddr, &entry)) {
+			err = ENOENT;
+		} else {
+			mmio_hint[vcpu] = entry;
+			err = emulate_instruction(ctx, vcpu, rip, cr3, paddr,
+						  mode, &entry->mr_param);
+		}
+	}
+
+	return (err);
+}
+
+int
+register_mem(struct mem_range *memp)
+{
+	struct mmio_rb_range *mrp;
+	int		err;
+
+	err = 0;
+
+	mrp = malloc(sizeof(struct mmio_rb_range));
+
+	if (mrp != NULL) {
+		mrp->mr_param = *memp;
+		mrp->mr_base = memp->base;
+		mrp->mr_end = memp->base + memp->size - 1;
+
+		err = mmio_rb_add(mrp);
+		if (err)
+			free(mrp);
+	} else
+		err = ENOMEM;
+
+	return (err);
+}
+
+void
+init_mem(void)
+{
+
+	RB_INIT(&mmio_rbroot);
+}

Added: projects/bhyve/usr.sbin/bhyve/mem.h
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ projects/bhyve/usr.sbin/bhyve/mem.h	Fri Oct 19 18:11:17 2012	(r241744)
@@ -0,0 +1,58 @@
+/*-
+ * Copyright (c) 2012 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _MEM_H_
+#define	_MEM_H_
+
+#include <sys/linker_set.h>
+
+struct vmctx;
+
+typedef int (*mem_func_t)(struct vmctx *ctx, int vcpu, int dir, uint64_t addr,
+			  int size, uint64_t *val, void *arg1, long arg2);
+
+struct mem_range {
+	const char 	*name;
+	int		flags;
+	mem_func_t	handler;
+	void		*arg1;
+	long		arg2;
+	uint64_t  	base;
+	uint64_t  	size;
+};
+#define	MEM_F_READ		0x1
+#define	MEM_F_WRITE		0x2
+#define	MEM_F_RW		0x3
+
+void	init_mem(void);
+int     emulate_mem(struct vmctx *, int vcpu, uint64_t paddr, uint64_t rip,
+		    uint64_t cr3, int mode);
+		    
+int	register_mem(struct mem_range *memp);
+
+#endif	/* _MEM_H_ */

Modified: projects/bhyve/usr.sbin/bhyve/pci_emul.c
==============================================================================
--- projects/bhyve/usr.sbin/bhyve/pci_emul.c	Fri Oct 19 17:45:56 2012	(r241743)
+++ projects/bhyve/usr.sbin/bhyve/pci_emul.c	Fri Oct 19 18:11:17 2012	(r241744)
@@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$");
 
 #include "fbsdrun.h"
 #include "inout.h"
+#include "mem.h"
 #include "pci_emul.h"
 #include "ioapic.h"
 
@@ -364,22 +365,26 @@ pci_finish_mptable_names(void)
 }
 
 static int
-pci_emul_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
-		 uint32_t *eax, void *arg)
+pci_emul_io_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
+		    uint32_t *eax, void *arg)
 {
 	struct pci_devinst *pdi = arg;
 	struct pci_devemu *pe = pdi->pi_d;
-	int offset, i;
+	uint64_t offset;
+	int i;
 
 	for (i = 0; i <= PCI_BARMAX; i++) {
 		if (pdi->pi_bar[i].type == PCIBAR_IO &&
 		    port >= pdi->pi_bar[i].addr &&
-		    port + bytes <= pdi->pi_bar[i].addr + pdi->pi_bar[i].size) {
+		    port + bytes <=
+		        pdi->pi_bar[i].addr + pdi->pi_bar[i].size) {
 			offset = port - pdi->pi_bar[i].addr;
 			if (in)
-				*eax = (*pe->pe_ior)(pdi, i, offset, bytes);
+				*eax = (*pe->pe_barread)(ctx, vcpu, pdi, i,
+							 offset, bytes);
 			else
-				(*pe->pe_iow)(pdi, i, offset, bytes, *eax);
+				(*pe->pe_barwrite)(ctx, vcpu, pdi, i, offset,
+						   bytes, *eax);
 			return (0);
 		}
 	}
@@ -387,6 +392,32 @@ pci_emul_handler(struct vmctx *ctx, int 
 }
 
 static int
+pci_emul_mem_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr,
+		     int size, uint64_t *val, void *arg1, long arg2)
+{
+	struct pci_devinst *pdi = arg1;
+	struct pci_devemu *pe = pdi->pi_d;
+	uint64_t offset;
+	int bidx = (int) arg2;
+
+	assert(bidx <= PCI_BARMAX);
+	assert(pdi->pi_bar[bidx].type == PCIBAR_MEM32 ||
+	       pdi->pi_bar[bidx].type == PCIBAR_MEM64);
+	assert(addr >= pdi->pi_bar[bidx].addr &&
+	       addr + size <= pdi->pi_bar[bidx].addr + pdi->pi_bar[bidx].size);
+
+	offset = addr - pdi->pi_bar[bidx].addr;
+
+	if (dir == MEM_F_WRITE)
+		(*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset, size, *val);
+	else
+		*val = (*pe->pe_barread)(ctx, vcpu, pdi, bidx, offset, size);
+
+	return (0);
+}

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-projects mailing list