svn commit: r241744 - projects/bhyve/usr.sbin/bhyve
Peter Grehan
grehan at FreeBSD.org
Fri Oct 19 18:11:18 UTC 2012
Author: grehan
Date: Fri Oct 19 18:11:17 2012
New Revision: 241744
URL: http://svn.freebsd.org/changeset/base/241744
Log:
Rework how guest MMIO regions are dealt with.
- New memory region interface. An RB tree holds the regions,
with a last-found per-vCPU cache to deal with the common case
of repeated guest accesses to MMIO registers in the same page.
- Support memory-mapped BARs in PCI emulation.
mem.c/h - memory region interface
instruction_emul.c/h - remove old region interface.
Use gpa from EPT exit to avoid a tablewalk to
determine operand address. Determine operand size
and use when calling through to region handler.
fbsdrun.c - call into region interface on paging
exit. Distinguish between instruction emul error
and region not found
pci_emul.c/h - implement new BAR callback api.
Split BAR alloc routine into routines that
require/don't require the BAR phys address.
ioapic.c
pci_passthru.c
pci_virtio_block.c
pci_virtio_net.c
pci_uart.c - update to new BAR callback i/f
Reviewed by: neel
Obtained from: NetApp
Added:
projects/bhyve/usr.sbin/bhyve/mem.c (contents, props changed)
projects/bhyve/usr.sbin/bhyve/mem.h (contents, props changed)
Modified:
projects/bhyve/usr.sbin/bhyve/Makefile
projects/bhyve/usr.sbin/bhyve/fbsdrun.c
projects/bhyve/usr.sbin/bhyve/instruction_emul.c
projects/bhyve/usr.sbin/bhyve/instruction_emul.h
projects/bhyve/usr.sbin/bhyve/ioapic.c
projects/bhyve/usr.sbin/bhyve/pci_emul.c
projects/bhyve/usr.sbin/bhyve/pci_emul.h
projects/bhyve/usr.sbin/bhyve/pci_passthru.c
projects/bhyve/usr.sbin/bhyve/pci_uart.c
projects/bhyve/usr.sbin/bhyve/pci_virtio_block.c
projects/bhyve/usr.sbin/bhyve/pci_virtio_net.c
Modified: projects/bhyve/usr.sbin/bhyve/Makefile
==============================================================================
--- projects/bhyve/usr.sbin/bhyve/Makefile Fri Oct 19 17:45:56 2012 (r241743)
+++ projects/bhyve/usr.sbin/bhyve/Makefile Fri Oct 19 18:11:17 2012 (r241744)
@@ -5,7 +5,7 @@
PROG= bhyve
SRCS= atpic.c consport.c dbgport.c elcr.c fbsdrun.c inout.c
-SRCS+= instruction_emul.c ioapic.c mevent.c
+SRCS+= instruction_emul.c ioapic.c mem.c mevent.c
SRCS+= pci_emul.c pci_hostbridge.c pci_passthru.c pci_virtio_block.c
SRCS+= pci_virtio_net.c pci_uart.c pit_8254.c post.c rtc.c uart.c xmsr.c
SRCS+= spinup_ap.c
Modified: projects/bhyve/usr.sbin/bhyve/fbsdrun.c
==============================================================================
--- projects/bhyve/usr.sbin/bhyve/fbsdrun.c Fri Oct 19 17:45:56 2012 (r241743)
+++ projects/bhyve/usr.sbin/bhyve/fbsdrun.c Fri Oct 19 18:11:17 2012 (r241744)
@@ -50,6 +50,7 @@ __FBSDID("$FreeBSD$");
#include "fbsdrun.h"
#include "inout.h"
#include "dbgport.h"
+#include "mem.h"
#include "mevent.h"
#include "pci_emul.h"
#include "xmsr.h"
@@ -446,11 +447,21 @@ vmexit_mtrap(struct vmctx *ctx, struct v
static int
vmexit_paging(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
{
-
+ int err;
stats.vmexit_paging++;
- if (emulate_instruction(ctx, *pvcpu, vmexit->rip, vmexit->u.paging.cr3) != 0) {
- printf("Failed to emulate instruction at 0x%lx\n", vmexit->rip);
+ err = emulate_mem(ctx, *pvcpu, vmexit->u.paging.gpa, vmexit->rip,
+ vmexit->u.paging.cr3, vmexit->u.paging.rwx);
+
+ if (err) {
+ if (err == EINVAL) {
+ printf("Failed to emulate instruction at 0x%lx\n",
+ vmexit->rip);
+ } else if (err == ESRCH) {
+ printf("Unhandled memory access to 0x%lx\n",
+ vmexit->u.paging.gpa);
+ }
+
return (VMEXIT_ABORT);
}
Modified: projects/bhyve/usr.sbin/bhyve/instruction_emul.c
==============================================================================
--- projects/bhyve/usr.sbin/bhyve/instruction_emul.c Fri Oct 19 17:45:56 2012 (r241743)
+++ projects/bhyve/usr.sbin/bhyve/instruction_emul.c Fri Oct 19 18:11:17 2012 (r241744)
@@ -28,10 +28,12 @@
#include <strings.h>
#include <unistd.h>
+#include <assert.h>
#include <machine/vmm.h>
#include <vmmapi.h>
#include "fbsdrun.h"
+#include "mem.h"
#include "instruction_emul.h"
#define PREFIX_LOCK 0xF0
@@ -46,6 +48,7 @@
#define PREFIX_BRANCH_NOT_TAKEN 0x2E
#define PREFIX_BRANCH_TAKEN 0x3E
#define PREFIX_OPSIZE 0x66
+#define is_opsz_prefix(x) ((x) == PREFIX_OPSIZE)
#define PREFIX_ADDRSIZE 0x67
#define OPCODE_2BYTE_ESCAPE 0x0F
@@ -95,6 +98,11 @@
#define FROM_REG (1<<2)
#define TO_RM (1<<3)
#define TO_REG (1<<4)
+#define ZEXT (1<<5)
+#define FROM_8 (1<<6)
+#define FROM_16 (1<<7)
+#define TO_8 (1<<8)
+#define TO_16 (1<<9)
#define REX_MASK 0xF0
#define REX_PREFIX 0x40
@@ -118,16 +126,7 @@
#define PML4E_OFFSET_MASK 0x0000FF8000000000
#define PML4E_SHIFT 39
-#define MAX_EMULATED_REGIONS 8
-int registered_regions = 0;
-struct memory_region
-{
- uintptr_t start;
- uintptr_t end;
- emulated_read_func_t memread;
- emulated_write_func_t memwrite;
- void *arg;
-} emulated_regions[MAX_EMULATED_REGIONS];
+#define INSTR_VERIFY
struct decoded_instruction
{
@@ -138,11 +137,12 @@ struct decoded_instruction
uint8_t *displacement;
uint8_t *immediate;
- uint8_t opcode_flags;
+ uint16_t opcode_flags;
uint8_t addressing_mode;
uint8_t rm;
uint8_t reg;
+ uint8_t opsz;
uint8_t rex_r;
uint8_t rex_w;
uint8_t rex_b;
@@ -170,11 +170,17 @@ static enum vm_reg_name vm_reg_name_mapp
[REG_R15] = VM_REG_GUEST_R15
};
-uint8_t one_byte_opcodes[256] = {
- [0x89] = HAS_MODRM | FROM_REG | TO_RM,
+uint16_t one_byte_opcodes[256] = {
+ [0x88] = HAS_MODRM | FROM_REG | TO_RM | TO_8 | FROM_8,
+ [0x89] = HAS_MODRM | FROM_REG | TO_RM,
[0x8B] = HAS_MODRM | FROM_RM | TO_REG,
};
+uint16_t two_byte_opcodes[256] = {
+ [0xB6] = HAS_MODRM | FROM_RM | TO_REG | ZEXT | FROM_8,
+ [0xB7] = HAS_MODRM | FROM_RM | TO_REG | ZEXT | FROM_16,
+};
+
static uintptr_t
gla2gpa(uint64_t gla, uint64_t guest_cr3)
{
@@ -211,7 +217,8 @@ gla2hla(uint64_t gla, uint64_t guest_cr3
uintptr_t gpa;
gpa = gla2gpa(gla, guest_cr3);
- return paddr_guest2host(gpa);
+
+ return (paddr_guest2host(gpa));
}
/*
@@ -232,6 +239,9 @@ decode_prefixes(struct decoded_instructi
decoded->rex_x = *current_prefix & REX_X_MASK;
decoded->rex_b = *current_prefix & REX_B_MASK;
current_prefix++;
+ } else if (is_opsz_prefix(*current_prefix)) {
+ decoded->opsz = 1;
+ current_prefix++;
} else if (is_prefix(*current_prefix)) {
return (-1);
}
@@ -248,16 +258,26 @@ decode_prefixes(struct decoded_instructi
static int
decode_opcode(struct decoded_instruction *decoded)
{
- uint8_t opcode, flags;
+ uint8_t opcode;
+ uint16_t flags;
+ int extra;
opcode = *decoded->opcode;
- flags = one_byte_opcodes[opcode];
+ extra = 0;
+ if (opcode != 0xf)
+ flags = one_byte_opcodes[opcode];
+ else {
+ opcode = *(decoded->opcode + 1);
+ flags = two_byte_opcodes[opcode];
+ extra = 1;
+ }
+
if (!flags)
return (-1);
if (flags & HAS_MODRM) {
- decoded->modrm = decoded->opcode + 1;
+ decoded->modrm = decoded->opcode + 1 + extra;
}
decoded->opcode_flags = flags;
@@ -381,37 +401,70 @@ decode_instruction(void *instr, struct d
return (0);
}
-static struct memory_region *
-find_region(uintptr_t addr)
+static enum vm_reg_name
+get_vm_reg_name(uint8_t reg)
{
- int i;
- for (i = 0; i < registered_regions; ++i) {
- if (emulated_regions[i].start <= addr &&
- emulated_regions[i].end >= addr) {
- return &emulated_regions[i];
- }
- }
-
- return (0);
+ return (vm_reg_name_mappings[reg]);
}
-static enum vm_reg_name
-get_vm_reg_name(uint8_t reg)
+static uint64_t
+adjust_operand(const struct decoded_instruction *instruction, uint64_t val,
+ int size)
{
- return vm_reg_name_mappings[reg];
+ uint64_t ret;
+
+ if (instruction->opcode_flags & ZEXT) {
+ switch (size) {
+ case 1:
+ ret = val & 0xff;
+ break;
+ case 2:
+ ret = val & 0xffff;
+ break;
+ case 4:
+ ret = val & 0xffffffff;
+ break;
+ case 8:
+ ret = val;
+ break;
+ default:
+ break;
+ }
+ } else {
+ /*
+ * Extend the sign
+ */
+ switch (size) {
+ case 1:
+ ret = (int8_t)(val & 0xff);
+ break;
+ case 2:
+ ret = (int16_t)(val & 0xffff);
+ break;
+ case 4:
+ ret = (int32_t)(val & 0xffffffff);
+ break;
+ case 8:
+ ret = val;
+ break;
+ default:
+ break;
+ }
+ }
+
+ return (ret);
}
static int
-get_operand(struct vmctx *vm, int vcpu, uint64_t guest_cr3,
- const struct decoded_instruction *instruction, uint64_t *operand)
+get_operand(struct vmctx *vm, int vcpu, uint64_t gpa, uint64_t guest_cr3,
+ const struct decoded_instruction *instruction, uint64_t *operand,
+ struct mem_range *mr)
{
enum vm_reg_name regname;
uint64_t reg;
- uintptr_t target;
int error;
- uint8_t rm, addressing_mode;
- struct memory_region *emulated_memory;
+ uint8_t rm, addressing_mode, size;
if (instruction->opcode_flags & FROM_RM) {
rm = instruction->rm;
@@ -422,6 +475,17 @@ get_operand(struct vmctx *vm, int vcpu,
} else
return (-1);
+ /*
+ * Determine size of operand
+ */
+ size = 4;
+ if (instruction->opcode_flags & FROM_8) {
+ size = 1;
+ } else if (instruction->opcode_flags & FROM_16 ||
+ instruction->opsz) {
+ size = 2;
+ }
+
regname = get_vm_reg_name(rm);
error = vm_get_register(vm, vcpu, regname, ®);
if (error)
@@ -430,33 +494,67 @@ get_operand(struct vmctx *vm, int vcpu,
switch (addressing_mode) {
case MOD_DIRECT:
*operand = reg;
- return (0);
+ error = 0;
+ break;
case MOD_INDIRECT:
case MOD_INDIRECT_DISP8:
case MOD_INDIRECT_DISP32:
+#ifdef INSTR_VERIFY
+ {
+ uintptr_t target;
+
target = gla2gpa(reg, guest_cr3);
target += instruction->disp;
- emulated_memory = find_region(target);
- if (emulated_memory) {
- return emulated_memory->memread(vm, vcpu, target,
- 4, operand,
- emulated_memory->arg);
- }
- return (-1);
+ assert(gpa == target);
+ }
+#endif
+ error = (*mr->handler)(vm, vcpu, MEM_F_READ, gpa, size,
+ operand, mr->arg1, mr->arg2);
+ break;
default:
return (-1);
}
+
+ if (!error)
+ *operand = adjust_operand(instruction, *operand, size);
+
+ return (error);
+}
+
+static uint64_t
+adjust_write(uint64_t reg, uint64_t operand, int size)
+{
+ uint64_t val;
+
+ switch (size) {
+ case 1:
+ val = (reg & ~0xff) | (operand & 0xff);
+ break;
+ case 2:
+ val = (reg & ~0xffff) | (operand & 0xffff);
+ break;
+ case 4:
+ val = (reg & ~0xffffffff) | (operand & 0xffffffff);
+ break;
+ case 8:
+ val = operand;
+ default:
+ break;
+ }
+
+ return (val);
}
static int
-perform_write(struct vmctx *vm, int vcpu, uint64_t guest_cr3,
- const struct decoded_instruction *instruction, uint64_t operand)
+perform_write(struct vmctx *vm, int vcpu, uint64_t gpa, uint64_t guest_cr3,
+ const struct decoded_instruction *instruction, uint64_t operand,
+ struct mem_range *mr)
{
enum vm_reg_name regname;
uintptr_t target;
int error;
+ int size;
uint64_t reg;
- struct memory_region *emulated_memory;
uint8_t addressing_mode;
if (instruction->opcode_flags & TO_RM) {
@@ -467,83 +565,77 @@ perform_write(struct vmctx *vm, int vcpu
addressing_mode = MOD_DIRECT;
} else
return (-1);
-
- regname = get_vm_reg_name(reg);
- error = vm_get_register(vm, vcpu, regname, ®);
- if (error)
- return (error);
-
+
+ /*
+ * Determine the operand size. rex.w has priority
+ */
+ size = 4;
+ if (instruction->rex_w) {
+ size = 8;
+ } else if (instruction->opcode_flags & TO_8) {
+ size = 1;
+ } else if (instruction->opsz) {
+ size = 2;
+ };
+
switch(addressing_mode) {
case MOD_DIRECT:
- return vm_set_register(vm, vcpu, regname, operand);
+ regname = get_vm_reg_name(reg);
+ error = vm_get_register(vm, vcpu, regname, ®);
+ if (error)
+ return (error);
+ operand = adjust_write(reg, operand, size);
+
+ return (vm_set_register(vm, vcpu, regname, operand));
case MOD_INDIRECT:
case MOD_INDIRECT_DISP8:
case MOD_INDIRECT_DISP32:
+#ifdef INSTR_VERIFY
+ regname = get_vm_reg_name(reg);
+ error = vm_get_register(vm, vcpu, regname, ®);
+ assert(!error);
target = gla2gpa(reg, guest_cr3);
target += instruction->disp;
- emulated_memory = find_region(target);
- if (emulated_memory) {
- return emulated_memory->memwrite(vm, vcpu, target,
- 4, operand,
- emulated_memory->arg);
- }
- return (-1);
+ assert(gpa == target);
+#endif
+ error = (*mr->handler)(vm, vcpu, MEM_F_WRITE, gpa, size,
+ &operand, mr->arg1, mr->arg2);
+ return (error);
default:
return (-1);
}
}
static int
-emulate_decoded_instruction(struct vmctx *vm, int vcpu, uint64_t cr3,
- const struct decoded_instruction *instruction)
+emulate_decoded_instruction(struct vmctx *vm, int vcpu, uint64_t gpa,
+ uint64_t cr3,
+ const struct decoded_instruction *instruction,
+ struct mem_range *mr)
{
uint64_t operand;
int error;
- error = get_operand(vm, vcpu, cr3, instruction, &operand);
+ error = get_operand(vm, vcpu, gpa, cr3, instruction, &operand, mr);
if (error)
return (error);
- return perform_write(vm, vcpu, cr3, instruction, operand);
+ return perform_write(vm, vcpu, gpa, cr3, instruction, operand, mr);
}
-int
-emulate_instruction(struct vmctx *vm, int vcpu, uint64_t rip, uint64_t cr3)
+int
+emulate_instruction(struct vmctx *vm, int vcpu, uint64_t rip, uint64_t cr3,
+ uint64_t gpa, int flags, struct mem_range *mr)
{
struct decoded_instruction instr;
int error;
- void *instruction = gla2hla(rip, cr3);
-
- if ((error = decode_instruction(instruction, &instr)) != 0)
- return (error);
-
- return emulate_decoded_instruction(vm, vcpu, cr3, &instr);
-}
-
-struct memory_region *
-register_emulated_memory(uintptr_t start, size_t len, emulated_read_func_t memread,
- emulated_write_func_t memwrite, void *arg)
-{
- if (registered_regions >= MAX_EMULATED_REGIONS)
- return (NULL);
-
- struct memory_region *region = &emulated_regions[registered_regions];
- region->start = start;
- region->end = start + len;
- region->memread = memread;
- region->memwrite = memwrite;
- region->arg = arg;
+ void *instruction;
- registered_regions++;
- return (region);
-}
+ instruction = gla2hla(rip, cr3);
-void
-move_memory_region(struct memory_region *region, uintptr_t start)
-{
- size_t len;
+ error = decode_instruction(instruction, &instr);
+ if (!error)
+ error = emulate_decoded_instruction(vm, vcpu, gpa, cr3,
+ &instr, mr);
- len = region->end - region->start;
- region->start = start;
- region->end = start + len;
+ return (error);
}
Modified: projects/bhyve/usr.sbin/bhyve/instruction_emul.h
==============================================================================
--- projects/bhyve/usr.sbin/bhyve/instruction_emul.h Fri Oct 19 17:45:56 2012 (r241743)
+++ projects/bhyve/usr.sbin/bhyve/instruction_emul.h Fri Oct 19 18:11:17 2012 (r241744)
@@ -29,19 +29,8 @@
#ifndef _INSTRUCTION_EMUL_H_
#define _INSTRUCTION_EMUL_H_
-struct memory_region;
-
-typedef int (*emulated_read_func_t)(struct vmctx *vm, int vcpu, uintptr_t addr,
- int size, uint64_t *data, void *arg);
-typedef int (*emulated_write_func_t)(struct vmctx *vm, int vcpu, uintptr_t addr,
- int size, uint64_t data, void *arg);
-
int emulate_instruction(struct vmctx *vm, int vcpu, uint64_t rip,
- uint64_t cr3);
-struct memory_region *register_emulated_memory(uintptr_t start, size_t len,
- emulated_read_func_t memread,
- emulated_write_func_t memwrite,
- void *arg);
-void move_memory_region(struct memory_region *memory_region, uintptr_t start);
+ uint64_t cr3, uint64_t gpa, int flags,
+ struct mem_range *mr);
#endif
Modified: projects/bhyve/usr.sbin/bhyve/ioapic.c
==============================================================================
--- projects/bhyve/usr.sbin/bhyve/ioapic.c Fri Oct 19 17:45:56 2012 (r241743)
+++ projects/bhyve/usr.sbin/bhyve/ioapic.c Fri Oct 19 18:11:17 2012 (r241744)
@@ -41,6 +41,7 @@ __FBSDID("$FreeBSD$");
#include <vmmapi.h>
#include "inout.h"
+#include "mem.h"
#include "instruction_emul.h"
#include "fbsdrun.h"
@@ -67,10 +68,13 @@ struct ioapic {
static struct ioapic ioapics[1]; /* only a single ioapic for now */
-static int ioapic_region_read(struct vmctx *vm, int vcpu, uintptr_t paddr,
- int size, uint64_t *data, void *arg);
-static int ioapic_region_write(struct vmctx *vm, int vcpu, uintptr_t paddr,
- int size, uint64_t data, void *arg);
+static int ioapic_region_read(struct ioapic *ioapic, uintptr_t paddr,
+ int size, uint64_t *data);
+static int ioapic_region_write(struct ioapic *ioapic, uintptr_t paddr,
+ int size, uint64_t data);
+static int ioapic_region_handler(struct vmctx *vm, int vcpu, int dir,
+ uintptr_t paddr, int size, uint64_t *val,
+ void *arg1, long arg2);
static void
ioapic_set_pinstate(struct vmctx *ctx, int pin, bool newstate)
@@ -139,8 +143,10 @@ ioapic_assert_pin(struct vmctx *ctx, int
void
ioapic_init(int which)
{
- int i;
+ struct mem_range memp;
struct ioapic *ioapic;
+ int error;
+ int i;
assert(which == 0);
@@ -153,14 +159,19 @@ ioapic_init(int which)
for (i = 0; i < REDIR_ENTRIES; i++)
ioapic->redtbl[i] = 0x0001000000010000UL;
- /* Register emulated memory region */
ioapic->paddr = IOAPIC_PADDR;
- ioapic->region = register_emulated_memory(ioapic->paddr,
- sizeof(struct IOAPIC),
- ioapic_region_read,
- ioapic_region_write,
- (void *)(uintptr_t)which);
- assert(ioapic->region != NULL);
+
+ /* Register emulated memory region */
+ memp.name = "ioapic";
+ memp.flags = MEM_F_RW;
+ memp.handler = ioapic_region_handler;
+ memp.arg1 = ioapic;
+ memp.arg2 = which;
+ memp.base = ioapic->paddr;
+ memp.size = sizeof(struct IOAPIC);
+ error = register_mem(&memp);
+
+ assert (error == 0);
ioapic->inited = 1;
}
@@ -237,15 +248,11 @@ ioapic_write(struct ioapic *ioapic, uint
}
static int
-ioapic_region_read(struct vmctx *vm, int vcpu, uintptr_t paddr, int size,
- uint64_t *data, void *arg)
+ioapic_region_read(struct ioapic *ioapic, uintptr_t paddr, int size,
+ uint64_t *data)
{
- int which, offset;
- struct ioapic *ioapic;
-
- which = (uintptr_t)arg;
+ int offset;
- ioapic = &ioapics[which];
offset = paddr - ioapic->paddr;
/*
@@ -255,7 +262,7 @@ ioapic_region_read(struct vmctx *vm, int
if (size != 4 || (offset != IOREGSEL && offset != IOWIN)) {
#if 1
printf("invalid access to ioapic%d: size %d, offset %d\n",
- which, size, offset);
+ (int)(ioapic - ioapics), size, offset);
#endif
*data = 0;
return (0);
@@ -270,15 +277,11 @@ ioapic_region_read(struct vmctx *vm, int
}
static int
-ioapic_region_write(struct vmctx *vm, int vcpu, uintptr_t paddr, int size,
- uint64_t data, void *arg)
+ioapic_region_write(struct ioapic *ioapic, uintptr_t paddr, int size,
+ uint64_t data)
{
- int which, offset;
- struct ioapic *ioapic;
-
- which = (uintptr_t)arg;
+ int offset;
- ioapic = &ioapics[which];
offset = paddr - ioapic->paddr;
/*
@@ -288,7 +291,7 @@ ioapic_region_write(struct vmctx *vm, in
if (size != 4 || (offset != IOREGSEL && offset != IOWIN)) {
#if 1
printf("invalid access to ioapic%d: size %d, offset %d\n",
- which, size, offset);
+ (int)(ioapic - ioapics), size, offset);
#endif
return (0);
}
@@ -300,3 +303,23 @@ ioapic_region_write(struct vmctx *vm, in
return (0);
}
+
+static int
+ioapic_region_handler(struct vmctx *vm, int vcpu, int dir, uintptr_t paddr,
+ int size, uint64_t *val, void *arg1, long arg2)
+{
+ struct ioapic *ioapic;
+ int which;
+
+ ioapic = arg1;
+ which = arg2;
+
+ assert(ioapic == &ioapics[which]);
+
+ if (dir == MEM_F_READ)
+ ioapic_region_read(ioapic, paddr, size, val);
+ else
+ ioapic_region_write(ioapic, paddr, size, *val);
+
+ return (0);
+}
Added: projects/bhyve/usr.sbin/bhyve/mem.c
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ projects/bhyve/usr.sbin/bhyve/mem.c Fri Oct 19 18:11:17 2012 (r241744)
@@ -0,0 +1,196 @@
+/*-
+ * Copyright (c) 2012 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/*
+ * Memory ranges are represented with an RB tree. On insertion, the range
+ * is checked for overlaps. On lookup, the key has the same base and limit
+ * so it can be searched within the range.
+ *
+ * It is assumed that all setup of ranges takes place in single-threaded
+ * mode before vCPUs have been started. As such, no locks are used on the
+ * RB tree. If this is no longer the case, then a r/w lock could be used,
+ * with readers on the lookup and a writer if the tree needs to be changed
+ * (and per vCPU caches flushed)
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/tree.h>
+#include <sys/errno.h>
+#include <machine/vmm.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+
+#include "mem.h"
+#include "instruction_emul.h"
+
+struct mmio_rb_range {
+ RB_ENTRY(mmio_rb_range) mr_link; /* RB tree links */
+ struct mem_range mr_param;
+ uint64_t mr_base;
+ uint64_t mr_end;
+};
+
+struct mmio_rb_tree;
+RB_PROTOTYPE(mmio_rb_tree, mmio_rb_range, mr_link, mmio_rb_range_compare);
+
+RB_HEAD(mmio_rb_tree, mmio_rb_range) mmio_rbroot;
+
+/*
+ * Per-vCPU cache. Since most accesses from a vCPU will be to
+ * consecutive addresses in a range, it makes sense to cache the
+ * result of a lookup.
+ */
+static struct mmio_rb_range *mmio_hint[VM_MAXCPU];
+
+static int
+mmio_rb_range_compare(struct mmio_rb_range *a, struct mmio_rb_range *b)
+{
+ if (a->mr_end < b->mr_base)
+ return (-1);
+ else if (a->mr_base > b->mr_end)
+ return (1);
+ return (0);
+}
+
+static int
+mmio_rb_lookup(uint64_t addr, struct mmio_rb_range **entry)
+{
+ struct mmio_rb_range find, *res;
+
+ find.mr_base = find.mr_end = addr;
+
+ res = RB_FIND(mmio_rb_tree, &mmio_rbroot, &find);
+
+ if (res != NULL) {
+ *entry = res;
+ return (0);
+ }
+
+ return (ENOENT);
+}
+
+static int
+mmio_rb_add(struct mmio_rb_range *new)
+{
+ struct mmio_rb_range *overlap;
+
+ overlap = RB_INSERT(mmio_rb_tree, &mmio_rbroot, new);
+
+ if (overlap != NULL) {
+#ifdef RB_DEBUG
+ printf("overlap detected: new %lx:%lx, tree %lx:%lx\n",
+ new->mr_base, new->mr_end,
+ overlap->mr_base, overlap->mr_end);
+#endif
+
+ return (EEXIST);
+ }
+
+ return (0);
+}
+
+#if 0
+static void
+mmio_rb_dump(void)
+{
+ struct mmio_rb_range *np;
+
+ RB_FOREACH(np, mmio_rb_tree, &mmio_rbroot) {
+ printf(" %lx:%lx, %s\n", np->mr_base, np->mr_end,
+ np->mr_param.name);
+ }
+}
+#endif
+
+RB_GENERATE(mmio_rb_tree, mmio_rb_range, mr_link, mmio_rb_range_compare);
+
+int
+emulate_mem(struct vmctx *ctx, int vcpu, uint64_t paddr, uint64_t rip,
+ uint64_t cr3, int mode)
+{
+ struct mmio_rb_range *entry;
+ int err;
+
+ err = 0;
+
+ /*
+ * First check the per-vCPU cache
+ */
+ if (mmio_hint[vcpu] &&
+ paddr >= mmio_hint[vcpu]->mr_base &&
+ paddr <= mmio_hint[vcpu]->mr_end) {
+ err = emulate_instruction(ctx, vcpu, rip, cr3, paddr, mode,
+ &mmio_hint[vcpu]->mr_param);
+ } else {
+ if (mmio_rb_lookup(paddr, &entry)) {
+ err = ENOENT;
+ } else {
+ mmio_hint[vcpu] = entry;
+ err = emulate_instruction(ctx, vcpu, rip, cr3, paddr,
+ mode, &entry->mr_param);
+ }
+ }
+
+ return (err);
+}
+
+int
+register_mem(struct mem_range *memp)
+{
+ struct mmio_rb_range *mrp;
+ int err;
+
+ err = 0;
+
+ mrp = malloc(sizeof(struct mmio_rb_range));
+
+ if (mrp != NULL) {
+ mrp->mr_param = *memp;
+ mrp->mr_base = memp->base;
+ mrp->mr_end = memp->base + memp->size - 1;
+
+ err = mmio_rb_add(mrp);
+ if (err)
+ free(mrp);
+ } else
+ err = ENOMEM;
+
+ return (err);
+}
+
+void
+init_mem(void)
+{
+
+ RB_INIT(&mmio_rbroot);
+}
Added: projects/bhyve/usr.sbin/bhyve/mem.h
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ projects/bhyve/usr.sbin/bhyve/mem.h Fri Oct 19 18:11:17 2012 (r241744)
@@ -0,0 +1,58 @@
+/*-
+ * Copyright (c) 2012 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _MEM_H_
+#define _MEM_H_
+
+#include <sys/linker_set.h>
+
+struct vmctx;
+
+typedef int (*mem_func_t)(struct vmctx *ctx, int vcpu, int dir, uint64_t addr,
+ int size, uint64_t *val, void *arg1, long arg2);
+
+struct mem_range {
+ const char *name;
+ int flags;
+ mem_func_t handler;
+ void *arg1;
+ long arg2;
+ uint64_t base;
+ uint64_t size;
+};
+#define MEM_F_READ 0x1
+#define MEM_F_WRITE 0x2
+#define MEM_F_RW 0x3
+
+void init_mem(void);
+int emulate_mem(struct vmctx *, int vcpu, uint64_t paddr, uint64_t rip,
+ uint64_t cr3, int mode);
+
+int register_mem(struct mem_range *memp);
+
+#endif /* _MEM_H_ */
Modified: projects/bhyve/usr.sbin/bhyve/pci_emul.c
==============================================================================
--- projects/bhyve/usr.sbin/bhyve/pci_emul.c Fri Oct 19 17:45:56 2012 (r241743)
+++ projects/bhyve/usr.sbin/bhyve/pci_emul.c Fri Oct 19 18:11:17 2012 (r241744)
@@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$");
#include "fbsdrun.h"
#include "inout.h"
+#include "mem.h"
#include "pci_emul.h"
#include "ioapic.h"
@@ -364,22 +365,26 @@ pci_finish_mptable_names(void)
}
static int
-pci_emul_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
- uint32_t *eax, void *arg)
+pci_emul_io_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
+ uint32_t *eax, void *arg)
{
struct pci_devinst *pdi = arg;
struct pci_devemu *pe = pdi->pi_d;
- int offset, i;
+ uint64_t offset;
+ int i;
for (i = 0; i <= PCI_BARMAX; i++) {
if (pdi->pi_bar[i].type == PCIBAR_IO &&
port >= pdi->pi_bar[i].addr &&
- port + bytes <= pdi->pi_bar[i].addr + pdi->pi_bar[i].size) {
+ port + bytes <=
+ pdi->pi_bar[i].addr + pdi->pi_bar[i].size) {
offset = port - pdi->pi_bar[i].addr;
if (in)
- *eax = (*pe->pe_ior)(pdi, i, offset, bytes);
+ *eax = (*pe->pe_barread)(ctx, vcpu, pdi, i,
+ offset, bytes);
else
- (*pe->pe_iow)(pdi, i, offset, bytes, *eax);
+ (*pe->pe_barwrite)(ctx, vcpu, pdi, i, offset,
+ bytes, *eax);
return (0);
}
}
@@ -387,6 +392,32 @@ pci_emul_handler(struct vmctx *ctx, int
}
static int
+pci_emul_mem_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr,
+ int size, uint64_t *val, void *arg1, long arg2)
+{
+ struct pci_devinst *pdi = arg1;
+ struct pci_devemu *pe = pdi->pi_d;
+ uint64_t offset;
+ int bidx = (int) arg2;
+
+ assert(bidx <= PCI_BARMAX);
+ assert(pdi->pi_bar[bidx].type == PCIBAR_MEM32 ||
+ pdi->pi_bar[bidx].type == PCIBAR_MEM64);
+ assert(addr >= pdi->pi_bar[bidx].addr &&
+ addr + size <= pdi->pi_bar[bidx].addr + pdi->pi_bar[bidx].size);
+
+ offset = addr - pdi->pi_bar[bidx].addr;
+
+ if (dir == MEM_F_WRITE)
+ (*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset, size, *val);
+ else
+ *val = (*pe->pe_barread)(ctx, vcpu, pdi, bidx, offset, size);
+
+ return (0);
+}
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-projects
mailing list