git: 104c83a999fa - stable/13 - bhyve: enumerate BARs by size

From: Emmanuel Vadot <manu_at_FreeBSD.org>
Date: Mon, 17 Jan 2022 16:22:14 UTC
The branch stable/13 has been updated by manu:

URL: https://cgit.FreeBSD.org/src/commit/?id=104c83a999fa459fd59f63374d7145b156ba76d1

commit 104c83a999fa459fd59f63374d7145b156ba76d1
Author:     Corvin Köhne <CorvinK@beckhoff.com>
AuthorDate: 2022-01-03 13:16:59 +0000
Commit:     Emmanuel Vadot <manu@FreeBSD.org>
CommitDate: 2022-01-17 12:53:10 +0000

    bhyve: enumerate BARs by size
    
    E.g. Framebuffers can require large space and BARs need to be aligned
    by their size. If BARs aren't allocated by size, it'll cause much
    fragmentation of the MMIO space. Reduce fragmentation by ordering
    the BAR allocation on their size to reduce the risk of
    OUT_OF_MMIO_SPACE issues.
    
    Reviewed by:    markj
    MFC after:      2 weeks
    Sponsored by:   Beckhoff Automation GmbH & Co. KG
    Differential Revision:  https://reviews.freebsd.org/D28278
    
    (cherry picked from commit 01f9362ef4eb14b041ccdf935fccf0f794074258)
---
 usr.sbin/bhyve/pci_emul.c | 105 +++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 94 insertions(+), 11 deletions(-)

diff --git a/usr.sbin/bhyve/pci_emul.c b/usr.sbin/bhyve/pci_emul.c
index 0b46bcc6200e..4365cda234b3 100644
--- a/usr.sbin/bhyve/pci_emul.c
+++ b/usr.sbin/bhyve/pci_emul.c
@@ -105,6 +105,16 @@ static uint64_t pci_emul_membase32;
 static uint64_t pci_emul_membase64;
 static uint64_t pci_emul_memlim64;
 
+struct pci_bar_allocation {
+	TAILQ_ENTRY(pci_bar_allocation) chain;
+	struct pci_devinst *pdi;
+	int idx;
+	enum pcibar_type type;
+	uint64_t size;
+};
+TAILQ_HEAD(pci_bar_list, pci_bar_allocation) pci_bars = TAILQ_HEAD_INITIALIZER(
+    pci_bars);
+
 #define	PCI_EMUL_IOBASE		0x2000
 #define	PCI_EMUL_IOLIMIT	0x10000
 
@@ -639,10 +649,6 @@ int
 pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, enum pcibar_type type,
     uint64_t size)
 {
-	int error;
-	uint64_t *baseptr, limit, addr, mask, lobits, bar;
-	uint16_t cmd, enbit;
-
 	assert(idx >= 0 && idx <= PCI_BARMAX);
 
 	if ((size & (size - 1)) != 0)
@@ -657,17 +663,88 @@ pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, enum pcibar_type type,
 			size = 16;
 	}
 
+	/*
+	 * To reduce fragmentation of the MMIO space, we allocate the BARs by
+	 * size. Therefore, don't allocate the BAR yet. We create a list of all
+	 * BAR allocation which is sorted by BAR size. When all PCI devices are
+	 * initialized, we will assign an address to the BARs.
+	 */
+
+	/* create a new list entry */
+	struct pci_bar_allocation *const new_bar = malloc(sizeof(*new_bar));
+	memset(new_bar, 0, sizeof(*new_bar));
+	new_bar->pdi = pdi;
+	new_bar->idx = idx;
+	new_bar->type = type;
+	new_bar->size = size;
+
+	/*
+	 * Search for a BAR which size is lower than the size of our newly
+	 * allocated BAR.
+	 */
+	struct pci_bar_allocation *bar = NULL;
+	TAILQ_FOREACH(bar, &pci_bars, chain) {
+		if (bar->size < size) {
+			break;
+		}
+	}
+
+	if (bar == NULL) {
+		/*
+		 * Either the list is empty or new BAR is the smallest BAR of
+		 * the list. Append it to the end of our list.
+		 */
+		TAILQ_INSERT_TAIL(&pci_bars, new_bar, chain);
+	} else {
+		/*
+		 * The found BAR is smaller than our new BAR. For that reason,
+		 * insert our new BAR before the found BAR.
+		 */
+		TAILQ_INSERT_BEFORE(bar, new_bar, chain);
+	}
+
+	/*
+	 * pci_passthru devices synchronize their physical and virtual command
+	 * register on init. For that reason, the virtual cmd reg should be
+	 * updated as early as possible.
+	 */
+	uint16_t enbit = 0;
+	switch (type) {
+	case PCIBAR_IO:
+		enbit = PCIM_CMD_PORTEN;
+		break;
+	case PCIBAR_MEM64:
+	case PCIBAR_MEM32:
+		enbit = PCIM_CMD_MEMEN;
+		break;
+	default:
+		enbit = 0;
+		break;
+	}
+
+	const uint16_t cmd = pci_get_cfgdata16(pdi, PCIR_COMMAND);
+	pci_set_cfgdata16(pdi, PCIR_COMMAND, cmd | enbit);
+
+	return (0);
+}
+
+static int
+pci_emul_assign_bar(struct pci_devinst *const pdi, const int idx,
+    const enum pcibar_type type, const uint64_t size)
+{
+	int error;
+	uint64_t *baseptr, limit, addr, mask, lobits, bar;
+
 	switch (type) {
 	case PCIBAR_NONE:
 		baseptr = NULL;
-		addr = mask = lobits = enbit = 0;
+		addr = mask = lobits = 0;
 		break;
 	case PCIBAR_IO:
 		baseptr = &pci_emul_iobase;
 		limit = PCI_EMUL_IOLIMIT;
 		mask = PCIM_BAR_IO_BASE;
 		lobits = PCIM_BAR_IO_SPACE;
-		enbit = PCIM_CMD_PORTEN;
 		break;
 	case PCIBAR_MEM64:
 		/*
@@ -689,14 +766,12 @@ pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, enum pcibar_type type,
 			mask = PCIM_BAR_MEM_BASE;
 			lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64;
 		}
-		enbit = PCIM_CMD_MEMEN;
 		break;
 	case PCIBAR_MEM32:
 		baseptr = &pci_emul_membase32;
 		limit = PCI_EMUL_MEMLIMIT32;
 		mask = PCIM_BAR_MEM_BASE;
 		lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32;
-		enbit = PCIM_CMD_MEMEN;
 		break;
 	default:
 		printf("pci_emul_alloc_base: invalid bar type %d\n", type);
@@ -732,9 +807,6 @@ pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, enum pcibar_type type,
 		pci_set_cfgdata32(pdi, PCIR_BAR(idx + 1), bar >> 32);
 	}
 
-	cmd = pci_get_cfgdata16(pdi, PCIR_COMMAND);
-	if ((cmd & enbit) != enbit)
-		pci_set_cfgdata16(pdi, PCIR_COMMAND, cmd | enbit);
 	register_bar(pdi, idx);
 
 	return (0);
@@ -1189,6 +1261,7 @@ init_pci(struct vmctx *ctx)
 		bi->membase32 = pci_emul_membase32;
 		bi->membase64 = pci_emul_membase64;
 
+		/* first run: init devices */
 		for (slot = 0; slot < MAXSLOTS; slot++) {
 			si = &bi->slotinfo[slot];
 			for (func = 0; func < MAXFUNCS; func++) {
@@ -1228,6 +1301,16 @@ init_pci(struct vmctx *ctx)
 			}
 		}
 
+		/* second run: assign BARs and free list */
+		struct pci_bar_allocation *bar;
+		struct pci_bar_allocation *bar_tmp;
+		TAILQ_FOREACH_SAFE(bar, &pci_bars, chain, bar_tmp) {
+			pci_emul_assign_bar(bar->pdi, bar->idx, bar->type,
+			    bar->size);
+			free(bar);
+		}
+		TAILQ_INIT(&pci_bars);
+
 		/*
 		 * Add some slop to the I/O and memory resources decoded by
 		 * this bus to give a guest some flexibility if it wants to