svn commit: r249321 - head/usr.sbin/bhyve

Neel Natu neel at FreeBSD.org
Wed Apr 10 02:12:41 UTC 2013


Author: neel
Date: Wed Apr 10 02:12:39 2013
New Revision: 249321
URL: http://svnweb.freebsd.org/changeset/base/249321

Log:
  Improve PCI BAR emulation:
  - Respect the MEMEN and PORTEN bits in the command register
  - Allow the guest to reprogram the address decoded by the BAR
  
  Submitted by:	Gopakumar T
  Obtained from:	NetApp

Modified:
  head/usr.sbin/bhyve/consport.c
  head/usr.sbin/bhyve/dbgport.c
  head/usr.sbin/bhyve/inout.c
  head/usr.sbin/bhyve/inout.h
  head/usr.sbin/bhyve/mem.c
  head/usr.sbin/bhyve/mem.h
  head/usr.sbin/bhyve/pci_emul.c

Modified: head/usr.sbin/bhyve/consport.c
==============================================================================
--- head/usr.sbin/bhyve/consport.c	Wed Apr 10 00:35:08 2013	(r249320)
+++ head/usr.sbin/bhyve/consport.c	Wed Apr 10 02:12:39 2013	(r249321)
@@ -128,6 +128,7 @@ console_handler(struct vmctx *ctx, int v
 static struct inout_port consport = {
 	"bvmcons",
 	BVM_CONSOLE_PORT,
+	1,
 	IOPORT_F_INOUT,
 	console_handler
 };

Modified: head/usr.sbin/bhyve/dbgport.c
==============================================================================
--- head/usr.sbin/bhyve/dbgport.c	Wed Apr 10 00:35:08 2013	(r249320)
+++ head/usr.sbin/bhyve/dbgport.c	Wed Apr 10 02:12:39 2013	(r249321)
@@ -105,6 +105,7 @@ again:
 static struct inout_port dbgport = {
 	"bvmdbg",
 	BVM_DBG_PORT,
+	1,
 	IOPORT_F_INOUT,
 	dbg_handler
 };

Modified: head/usr.sbin/bhyve/inout.c
==============================================================================
--- head/usr.sbin/bhyve/inout.c	Wed Apr 10 00:35:08 2013	(r249320)
+++ head/usr.sbin/bhyve/inout.c	Wed Apr 10 02:12:39 2013	(r249321)
@@ -33,6 +33,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/linker_set.h>
 
 #include <stdio.h>
+#include <string.h>
 #include <assert.h>
 
 #include "inout.h"
@@ -41,6 +42,9 @@ SET_DECLARE(inout_port_set, struct inout
 
 #define	MAX_IOPORTS	(1 << 16)
 
+#define	VERIFY_IOPORT(port, size) \
+	assert((port) >= 0 && (size) > 0 && ((port) + (size)) <= MAX_IOPORTS)
+
 static struct {
 	const char	*name;
 	int		flags;
@@ -69,6 +73,23 @@ default_inout(struct vmctx *ctx, int vcp
         return (0);
 }
 
+static void 
+register_default_iohandler(int start, int size)
+{
+	struct inout_port iop;
+	
+	VERIFY_IOPORT(start, size);
+
+	bzero(&iop, sizeof(iop));
+	iop.name = "default";
+	iop.port = start;
+	iop.size = size;
+	iop.flags = IOPORT_F_INOUT;
+	iop.handler = default_inout;
+
+	register_inout(&iop);
+}
+
 int
 emulate_inout(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
 	      uint32_t *eax, int strict)
@@ -113,17 +134,11 @@ void
 init_inout(void)
 {
 	struct inout_port **iopp, *iop;
-	int i;
 
 	/*
 	 * Set up the default handler for all ports
 	 */
-	for (i = 0; i < MAX_IOPORTS; i++) {
-		inout_handlers[i].name = "default";
-		inout_handlers[i].flags = IOPORT_F_IN | IOPORT_F_OUT;
-		inout_handlers[i].handler = default_inout;
-		inout_handlers[i].arg = NULL;
-	}
+	register_default_iohandler(0, MAX_IOPORTS);
 
 	/*
 	 * Overwrite with specified handlers
@@ -141,11 +156,28 @@ init_inout(void)
 int
 register_inout(struct inout_port *iop)
 {
-	assert(iop->port < MAX_IOPORTS);
-	inout_handlers[iop->port].name = iop->name;
-	inout_handlers[iop->port].flags = iop->flags;
-	inout_handlers[iop->port].handler = iop->handler;
-	inout_handlers[iop->port].arg = iop->arg;
+	int i;
+
+	VERIFY_IOPORT(iop->port, iop->size);
+	
+	for (i = iop->port; i < iop->port + iop->size; i++) {
+		inout_handlers[i].name = iop->name;
+		inout_handlers[i].flags = iop->flags;
+		inout_handlers[i].handler = iop->handler;
+		inout_handlers[i].arg = iop->arg;
+	}
+
+	return (0);
+}
+
+int
+unregister_inout(struct inout_port *iop)
+{
+
+	VERIFY_IOPORT(iop->port, iop->size);
+	assert(inout_handlers[iop->port].name == iop->name);
+
+	register_default_iohandler(iop->port, iop->size);
 
 	return (0);
 }

Modified: head/usr.sbin/bhyve/inout.h
==============================================================================
--- head/usr.sbin/bhyve/inout.h	Wed Apr 10 00:35:08 2013	(r249320)
+++ head/usr.sbin/bhyve/inout.h	Wed Apr 10 02:12:39 2013	(r249321)
@@ -39,6 +39,7 @@ typedef int (*inout_func_t)(struct vmctx
 struct inout_port {
 	const char 	*name;
 	int		port;
+	int		size;
 	int		flags;
 	inout_func_t	handler;
 	void		*arg;
@@ -51,6 +52,7 @@ struct inout_port {
 	static struct inout_port __CONCAT(__inout_port, __LINE__) = {	\
 		#name,							\
 		(port),							\
+		1,							\
 		(flags),						\
 		(handler),						\
 		0							\
@@ -61,7 +63,7 @@ void	init_inout(void);
 int	emulate_inout(struct vmctx *, int vcpu, int in, int port, int bytes,
 		      uint32_t *eax, int strict);
 int	register_inout(struct inout_port *iop);
-
+int	unregister_inout(struct inout_port *iop);
 void	init_bvmcons(void);
 
 #endif	/* _INOUT_H_ */

Modified: head/usr.sbin/bhyve/mem.c
==============================================================================
--- head/usr.sbin/bhyve/mem.c	Wed Apr 10 00:35:08 2013	(r249320)
+++ head/usr.sbin/bhyve/mem.c	Wed Apr 10 02:12:39 2013	(r249321)
@@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$");
 #include <stdio.h>
 #include <stdlib.h>
 #include <assert.h>
+#include <pthread.h>
 
 #include "mem.h"
 
@@ -71,6 +72,8 @@ RB_HEAD(mmio_rb_tree, mmio_rb_range) mmi
  */
 static struct mmio_rb_range	*mmio_hint[VM_MAXCPU];
 
+static pthread_rwlock_t rwlock;
+
 static int
 mmio_rb_range_compare(struct mmio_rb_range *a, struct mmio_rb_range *b)
 {
@@ -125,10 +128,12 @@ mmio_rb_dump(struct mmio_rb_tree *rbt)
 {
 	struct mmio_rb_range *np;
 
+	pthread_rwlock_rdlock(&rwlock);
 	RB_FOREACH(np, mmio_rb_tree, rbt) {
 		printf(" %lx:%lx, %s\n", np->mr_base, np->mr_end,
 		       np->mr_param.name);
 	}
+	pthread_rwlock_unlock(&rwlock);
 }
 #endif
 
@@ -161,7 +166,8 @@ emulate_mem(struct vmctx *ctx, int vcpu,
 {
 	struct mmio_rb_range *entry;
 	int err;
-
+	
+	pthread_rwlock_rdlock(&rwlock);
 	/*
 	 * First check the per-vCPU cache
 	 */
@@ -173,10 +179,11 @@ emulate_mem(struct vmctx *ctx, int vcpu,
 		entry = NULL;
 
 	if (entry == NULL) {
-		if (!mmio_rb_lookup(&mmio_rb_root, paddr, &entry)) {
+		if (mmio_rb_lookup(&mmio_rb_root, paddr, &entry) == 0) {
 			/* Update the per-vCPU cache */
 			mmio_hint[vcpu] = entry;			
 		} else if (mmio_rb_lookup(&mmio_rb_fallback, paddr, &entry)) {
+			pthread_rwlock_unlock(&rwlock);
 			return (ESRCH);
 		}
 	}
@@ -184,25 +191,29 @@ emulate_mem(struct vmctx *ctx, int vcpu,
 	assert(entry != NULL);
 	err = vmm_emulate_instruction(ctx, vcpu, paddr, vie,
 				      mem_read, mem_write, &entry->mr_param);
+	pthread_rwlock_unlock(&rwlock);
+	
 	return (err);
 }
 
 static int
 register_mem_int(struct mmio_rb_tree *rbt, struct mem_range *memp)
 {
-	struct mmio_rb_range *mrp;
+	struct mmio_rb_range *entry, *mrp;
 	int		err;
 
 	err = 0;
 
 	mrp = malloc(sizeof(struct mmio_rb_range));
-
+	
 	if (mrp != NULL) {
 		mrp->mr_param = *memp;
 		mrp->mr_base = memp->base;
 		mrp->mr_end = memp->base + memp->size - 1;
-
-		err = mmio_rb_add(rbt, mrp);
+		pthread_rwlock_wrlock(&rwlock);
+		if (mmio_rb_lookup(rbt, memp->base, &entry) != 0)
+			err = mmio_rb_add(rbt, mrp);
+		pthread_rwlock_unlock(&rwlock);
 		if (err)
 			free(mrp);
 	} else
@@ -225,10 +236,40 @@ register_mem_fallback(struct mem_range *
 	return (register_mem_int(&mmio_rb_fallback, memp));
 }
 
+int 
+unregister_mem(struct mem_range *memp)
+{
+	struct mem_range *mr;
+	struct mmio_rb_range *entry = NULL;
+	int err, i;
+	
+	pthread_rwlock_wrlock(&rwlock);
+	err = mmio_rb_lookup(&mmio_rb_root, memp->base, &entry);
+	if (err == 0) {
+		mr = &entry->mr_param;
+		assert(mr->name == memp->name);
+		assert(mr->base == memp->base && mr->size == memp->size); 
+		RB_REMOVE(mmio_rb_tree, &mmio_rb_root, entry);
+
+		/* flush Per-vCPU cache */	
+		for (i=0; i < VM_MAXCPU; i++) {
+			if (mmio_hint[i] == entry)
+				mmio_hint[i] = NULL;
+		}
+	}
+	pthread_rwlock_unlock(&rwlock);
+
+	if (entry)
+		free(entry);
+	
+	return (err);
+}
+
 void
 init_mem(void)
 {
 
 	RB_INIT(&mmio_rb_root);
 	RB_INIT(&mmio_rb_fallback);
+	pthread_rwlock_init(&rwlock, NULL);
 }

Modified: head/usr.sbin/bhyve/mem.h
==============================================================================
--- head/usr.sbin/bhyve/mem.h	Wed Apr 10 00:35:08 2013	(r249320)
+++ head/usr.sbin/bhyve/mem.h	Wed Apr 10 02:12:39 2013	(r249321)
@@ -54,5 +54,6 @@ int     emulate_mem(struct vmctx *, int 
 		    
 int	register_mem(struct mem_range *memp);
 int	register_mem_fallback(struct mem_range *memp);
+int	unregister_mem(struct mem_range *memp);
 
 #endif	/* _MEM_H_ */

Modified: head/usr.sbin/bhyve/pci_emul.c
==============================================================================
--- head/usr.sbin/bhyve/pci_emul.c	Wed Apr 10 00:35:08 2013	(r249320)
+++ head/usr.sbin/bhyve/pci_emul.c	Wed Apr 10 02:12:39 2013	(r249321)
@@ -31,6 +31,7 @@ __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/linker_set.h>
+#include <sys/errno.h>
 
 #include <ctype.h>
 #include <stdio.h>
@@ -38,6 +39,7 @@ __FBSDID("$FreeBSD$");
 #include <string.h>
 #include <strings.h>
 #include <assert.h>
+#include <stdbool.h>
 
 #include <machine/vmm.h>
 #include <vmmapi.h>
@@ -353,20 +355,150 @@ pci_emul_alloc_bar(struct pci_devinst *p
 	return (pci_emul_alloc_pbar(pdi, idx, 0, type, size));
 }
 
+/*
+ * Register (or unregister) the MMIO or I/O region associated with the BAR
+ * register 'idx' of an emulated pci device.
+ */
+static void
+modify_bar_registration(struct pci_devinst *pi, int idx, int registration)
+{
+	int error;
+	struct inout_port iop;
+	struct mem_range mr;
+
+	switch (pi->pi_bar[idx].type) {
+	case PCIBAR_IO:
+		bzero(&iop, sizeof(struct inout_port));
+		iop.name = pi->pi_name;
+		iop.port = pi->pi_bar[idx].addr;
+		iop.size = pi->pi_bar[idx].size;
+		if (registration) {
+			iop.flags = IOPORT_F_INOUT;
+			iop.handler = pci_emul_io_handler;
+			iop.arg = pi;
+			error = register_inout(&iop);
+		} else 
+			error = unregister_inout(&iop);
+		break;
+	case PCIBAR_MEM32:
+	case PCIBAR_MEM64:
+		bzero(&mr, sizeof(struct mem_range));
+		mr.name = pi->pi_name;
+		mr.base = pi->pi_bar[idx].addr;
+		mr.size = pi->pi_bar[idx].size;
+		if (registration) {
+			mr.flags = MEM_F_RW;
+			mr.handler = pci_emul_mem_handler;
+			mr.arg1 = pi;
+			mr.arg2 = idx;
+			error = register_mem(&mr);
+		} else
+			error = unregister_mem(&mr);
+		break;
+	default:
+		error = EINVAL;
+		break;
+	}
+	assert(error == 0);
+}
+
+static void
+unregister_bar(struct pci_devinst *pi, int idx)
+{
+
+	modify_bar_registration(pi, idx, 0);
+}
+
+static void
+register_bar(struct pci_devinst *pi, int idx)
+{
+
+	modify_bar_registration(pi, idx, 1);
+}
+
+/* Are we decoding i/o port accesses for the emulated pci device? */
+static int
+porten(struct pci_devinst *pi)
+{
+	uint16_t cmd;
+
+	cmd = pci_get_cfgdata16(pi, PCIR_COMMAND);
+
+	return (cmd & PCIM_CMD_PORTEN);
+}
+
+/* Are we decoding memory accesses for the emulated pci device? */
+static int
+memen(struct pci_devinst *pi)
+{
+	uint16_t cmd;
+
+	cmd = pci_get_cfgdata16(pi, PCIR_COMMAND);
+
+	return (cmd & PCIM_CMD_MEMEN);
+}
+
+/*
+ * Update the MMIO or I/O address that is decoded by the BAR register.
+ *
+ * If the pci device has enabled the address space decoding then intercept
+ * the address range decoded by the BAR register.
+ */
+static void
+update_bar_address(struct  pci_devinst *pi, uint64_t addr, int idx, int type)
+{
+	int decode;
+
+	if (pi->pi_bar[idx].type == PCIBAR_IO)
+		decode = porten(pi);
+	else
+		decode = memen(pi);
+
+	if (decode)
+		unregister_bar(pi, idx);
+
+	switch (type) {
+	case PCIBAR_IO:
+	case PCIBAR_MEM32:
+		pi->pi_bar[idx].addr = addr;
+		break;
+	case PCIBAR_MEM64:
+		pi->pi_bar[idx].addr &= ~0xffffffffUL;
+		pi->pi_bar[idx].addr |= addr;
+		break;
+	case PCIBAR_MEMHI64:
+		pi->pi_bar[idx].addr &= 0xffffffff;
+		pi->pi_bar[idx].addr |= addr;
+		break;
+	default:
+		assert(0);
+	}
+
+	if (decode)
+		register_bar(pi, idx);
+}
+
 int
 pci_emul_alloc_pbar(struct pci_devinst *pdi, int idx, uint64_t hostbase,
 		    enum pcibar_type type, uint64_t size)
 {
-	int i, error;
+	int error;
 	uint64_t *baseptr, limit, addr, mask, lobits, bar;
-	struct inout_port iop;
-	struct mem_range memp;
 
 	assert(idx >= 0 && idx <= PCI_BARMAX);
 
 	if ((size & (size - 1)) != 0)
 		size = 1UL << flsl(size);	/* round up to a power of 2 */
 
+	/* Enforce minimum BAR sizes required by the PCI standard */
+	if (type == PCIBAR_IO) {
+		if (size < 4)
+			size = 4;
+	} else {
+		if (size < 16)
+			size = 16;
+	}
+
 	switch (type) {
 	case PCIBAR_NONE:
 		baseptr = NULL;
@@ -443,30 +575,7 @@ pci_emul_alloc_pbar(struct pci_devinst *
 		pci_set_cfgdata32(pdi, PCIR_BAR(idx + 1), bar >> 32);
 	}
 	
-	/* add a handler to intercept accesses to the I/O bar */
-	if (type == PCIBAR_IO) {
-		iop.name = pdi->pi_name;
-		iop.flags = IOPORT_F_INOUT;
-		iop.handler = pci_emul_io_handler;
-		iop.arg = pdi;
-
-		for (i = 0; i < size; i++) {
-			iop.port = addr + i;
-			register_inout(&iop);
-		}
-	} else if (type == PCIBAR_MEM32 || type == PCIBAR_MEM64) {
-		/* add memory bar intercept handler */
-		memp.name = pdi->pi_name;
-		memp.flags = MEM_F_RW;
-		memp.base = addr;
-		memp.size = size;
-		memp.handler = pci_emul_mem_handler;
-		memp.arg1 = pdi;
-		memp.arg2 = idx;
-
-		error = register_mem(&memp);
-		assert(error == 0);
-	}
+	register_bar(pdi, idx);
 
 	return (0);
 }
@@ -1101,6 +1210,62 @@ pci_emul_cfgaddr(struct vmctx *ctx, int 
 }
 INOUT_PORT(pci_cfgaddr, CONF1_ADDR_PORT, IOPORT_F_OUT, pci_emul_cfgaddr);
 
+static uint32_t
+bits_changed(uint32_t old, uint32_t new, uint32_t mask)
+{
+
+	return ((old ^ new) & mask);
+}
+
+static void
+pci_emul_cmdwrite(struct pci_devinst *pi, uint32_t new, int bytes)
+{
+	int i;
+	uint16_t old;
+
+	/*
+	 * The command register is at an offset of 4 bytes and thus the
+	 * guest could write 1, 2 or 4 bytes starting at this offset.
+	 */
+
+	old = pci_get_cfgdata16(pi, PCIR_COMMAND);	/* stash old value */
+	CFGWRITE(pi, PCIR_COMMAND, new, bytes);		/* update config */
+	new = pci_get_cfgdata16(pi, PCIR_COMMAND);	/* get updated value */
+
+	/*
+	 * If the MMIO or I/O address space decoding has changed then
+	 * register/unregister all BARs that decode that address space.
+	 */
+	for (i = 0; i < PCI_BARMAX; i++) {
+		switch (pi->pi_bar[i].type) {
+			case PCIBAR_NONE:
+			case PCIBAR_MEMHI64:
+				break;
+			case PCIBAR_IO:
+				/* I/O address space decoding changed? */
+				if (bits_changed(old, new, PCIM_CMD_PORTEN)) {
+					if (porten(pi))
+						register_bar(pi, i);
+					else
+						unregister_bar(pi, i);
+				}
+				break;
+			case PCIBAR_MEM32:
+			case PCIBAR_MEM64:
+				/* MMIO address space decoding changed? */
+				if (bits_changed(old, new, PCIM_CMD_MEMEN)) {
+					if (memen(pi))
+						register_bar(pi, i);
+					else
+						unregister_bar(pi, i);
+				}
+				break; 
+			default:
+				assert(0); 
+		}
+	}
+}	
+
 static int
 pci_emul_cfgdata(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
 		 uint32_t *eax, void *arg)
@@ -1108,7 +1273,7 @@ pci_emul_cfgdata(struct vmctx *ctx, int 
 	struct pci_devinst *pi;
 	struct pci_devemu *pe;
 	int coff, idx, needcfg;
-	uint64_t mask, bar;
+	uint64_t addr, bar, mask;
 
 	assert(bytes == 1 || bytes == 2 || bytes == 4);
 	
@@ -1175,33 +1340,48 @@ pci_emul_cfgdata(struct vmctx *ctx, int 
 			if (bytes != 4 || (coff & 0x3) != 0)
 				return (0);
 			idx = (coff - PCIR_BAR(0)) / 4;
+			mask = ~(pi->pi_bar[idx].size - 1);
 			switch (pi->pi_bar[idx].type) {
 			case PCIBAR_NONE:
-				bar = 0;
+				pi->pi_bar[idx].addr = bar = 0;
 				break;
 			case PCIBAR_IO:
-				mask = ~(pi->pi_bar[idx].size - 1);
-				mask &= PCIM_BAR_IO_BASE;
-				bar = (*eax & mask) | PCIM_BAR_IO_SPACE;
+				addr = *eax & mask;
+				addr &= 0xffff;
+				bar = addr | PCIM_BAR_IO_SPACE;
+				/*
+				 * Register the new BAR value for interception
+				 */
+				if (addr != pi->pi_bar[idx].addr) {
+					update_bar_address(pi, addr, idx,
+							   PCIBAR_IO);
+				}
 				break;
 			case PCIBAR_MEM32:
-				mask = ~(pi->pi_bar[idx].size - 1);
-				mask &= PCIM_BAR_MEM_BASE;
-				bar = *eax & mask;
+				addr = bar = *eax & mask;
 				bar |= PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32;
+				if (addr != pi->pi_bar[idx].addr) {
+					update_bar_address(pi, addr, idx,
+							   PCIBAR_MEM32);
+				}
 				break;
 			case PCIBAR_MEM64:
-				mask = ~(pi->pi_bar[idx].size - 1);
-				mask &= PCIM_BAR_MEM_BASE;
-				bar = *eax & mask;
+				addr = bar = *eax & mask;
 				bar |= PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 |
 				       PCIM_BAR_MEM_PREFETCH;
+				if (addr != (uint32_t)pi->pi_bar[idx].addr) {
+					update_bar_address(pi, addr, idx,
+							   PCIBAR_MEM64);
+				}
 				break;
 			case PCIBAR_MEMHI64:
 				mask = ~(pi->pi_bar[idx - 1].size - 1);
-				mask &= PCIM_BAR_MEM_BASE;
-				bar = ((uint64_t)*eax << 32) & mask;
-				bar = bar >> 32;
+				addr = ((uint64_t)*eax << 32) & mask;
+				bar = addr >> 32;
+				if (bar != pi->pi_bar[idx - 1].addr >> 32) {
+					update_bar_address(pi, addr, idx - 1,
+							   PCIBAR_MEMHI64);
+				}
 				break;
 			default:
 				assert(0);
@@ -1210,6 +1390,8 @@ pci_emul_cfgdata(struct vmctx *ctx, int 
 
 		} else if (pci_emul_iscap(pi, coff)) {
 			pci_emul_capwrite(pi, coff, bytes, *eax);
+		} else if (coff == PCIR_COMMAND) {
+			pci_emul_cmdwrite(pi, *eax, bytes);
 		} else {
 			CFGWRITE(pi, coff, *eax, bytes);
 		}


More information about the svn-src-all mailing list