git: 1d8cda26cad2 - stable/13 - vmd(4): Major driver refactoring

Alexander Motin mav at FreeBSD.org
Sun Sep 12 23:03:09 UTC 2021


The branch stable/13 has been updated by mav:

URL: https://cgit.FreeBSD.org/src/commit/?id=1d8cda26cad2b1c2613edad2f3be0222c5691ad1

commit 1d8cda26cad2b1c2613edad2f3be0222c5691ad1
Author:     Alexander Motin <mav at FreeBSD.org>
AuthorDate: 2021-09-03 00:58:02 +0000
Commit:     Alexander Motin <mav at FreeBSD.org>
CommitDate: 2021-09-12 22:44:13 +0000

    vmd(4): Major driver refactoring
    
     - Re-implement pcib interface to use standard pci bus driver on top of
    vmd(4) instead of custom one.
     - Re-implement memory/bus resource allocation to properly handle even
    complicated configurations.
     - Re-implement interrupt handling to evenly distribute children's MSI/
    MSI-X interrupts between available vmd(4) MSI-X vectors and setup them
    to be handled by standard OS mechanisms with minimal overhead, except
    sharing when unavoidable.
    
    Successfully tested on Dell XPS 13 laptop with Core i7-1185G7 CPU (VMD
    device ID 0x9a0b) and single NVMe SSD, dual-booting with Windows 10.
    
    Successfully tested on Supermicro X11DPI-NT motherboard with Xeon(R)
    Gold 6242R CPUs (VMD device ID 0x201d), simultaneously handling NVMe
    SSD on one PCIe port and PLX bridge with 3 NVMe and 1 AHCI SSDs on
    another.  Handles SSD hot-plug (except Optane 905p for some reason,
    which are not detected until manual bus rescan) and enabled IOMMU
    (directly connected SSDs work, but ones connected to the PLX fail
    without errors from IOMMU).
    
    MFC after:      2 weeks
    Sponsored by:   iXsystems, Inc.
    Differential revision:  https://reviews.freebsd.org/D31762
    
    (cherry picked from commit 7af4475a6e31202a865b1dd3727018659b44470f)
---
 share/man/man4/Makefile  |   2 +-
 share/man/man4/vmd.4     |  35 ++-
 sys/amd64/conf/GENERIC   |   3 +-
 sys/amd64/conf/NOTES     |   3 +-
 sys/conf/files.amd64     |   3 +-
 sys/conf/files.i386      |   1 +
 sys/dev/vmd/vmd.c        | 690 ++++++++++++++++++++++-------------------------
 sys/dev/vmd/vmd.h        |  72 ++---
 sys/dev/vmd/vmd_bus.c    | 220 ---------------
 sys/i386/conf/NOTES      |   4 +
 sys/modules/Makefile     |   2 +-
 sys/modules/vmd/Makefile |   1 -
 12 files changed, 381 insertions(+), 655 deletions(-)

diff --git a/share/man/man4/Makefile b/share/man/man4/Makefile
index c912abccae6e..2f3283e14573 100644
--- a/share/man/man4/Makefile
+++ b/share/man/man4/Makefile
@@ -851,6 +851,7 @@ _tpm.4=		tpm.4
 _urtw.4=	urtw.4
 _viawd.4=	viawd.4
 _vmci.4=	vmci.4
+_vmd.4=		vmd.4
 _vmx.4=		vmx.4
 _wbwd.4=	wbwd.4
 _wpi.4=		wpi.4
@@ -869,7 +870,6 @@ _qlnxe.4=	qlnxe.4
 _sfxge.4=	sfxge.4
 _smartpqi.4=	smartpqi.4
 _sume.4=	sume.4
-_vmd.4=		vmd.4
 
 MLINKS+=qlxge.4 if_qlxge.4
 MLINKS+=qlxgb.4 if_qlxgb.4
diff --git a/share/man/man4/vmd.4 b/share/man/man4/vmd.4
index eb72a653adfb..74419fd87ffe 100644
--- a/share/man/man4/vmd.4
+++ b/share/man/man4/vmd.4
@@ -1,6 +1,7 @@
 .\"-
 .\" SPDX-License-Identifier: BSD-2-Clause-FreeBSD
 .\"
+.\" Copyright (c) 2021 Alexander Motin <mav at FreeBSD.org>
 .\" Copyright 2019 Cisco Systems, Inc.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
@@ -26,7 +27,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd October 9, 2019
+.Dd August 31, 2021
 .Dt VMD 4
 .Os
 .Sh NAME
@@ -37,7 +38,6 @@ To compile this driver into the kernel, place the following lines in your
 kernel configuration file:
 .Bd -ragged -offset -indent
 .Cd "device vmd"
-.Cd "device vmd_bus"
 .Ed
 .Pp
 Alternatively, to load the driver as a module at boot time, place the following
@@ -47,10 +47,30 @@ line in
 vmd_load="YES"
 .Ed
 .Sh DESCRIPTION
-This driver attaches to Intel VMD devices as a new PCI domain and then
-triggers a probe of PCI devices.
-Intel VMD is used with Intel's VROC (Virtual RAID on chip) used with
-NVME drives on Skylake SP servers.
+This driver attaches to Intel VMD devices, representing them as PCI-to-PCI
+bridges and providing access to children PCI devices via new PCI domains.
+Intel VMD is used by Intel's VROC (Virtual RAID on chip) to manage NVMe
+drives.
+.Sh LOADER TUNABLES
+The following tunables are settable via
+.Xr loader 8
+or
+.Xr sysctl 8 :
+.Bl -tag -width indent
+.It Va hw.vmd.max_msi
+Limits number of Message Signaled Interrupt (MSI) vectors allowed to each
+child device.
+VMD can't distinguish MSI vectors of the same device, so there are no
+benefits to have more than one, unless it is required by specific device
+driver.
+Defaults to 1.
+.It Va hw.vmd.max_msix
+Limits number of Extended Message Signaled Interrupt (MSI-X) vectors
+allowed to each child device.
+VMD has limited number of interrupt vectors to map children interrupts into,
+so to avoid/reduce sharing children devices/drivers need to be constrained.
+Defaults to 3.
+.El
 .Sh SEE ALSO
 .Xr graid 8
 .Sh HISTORY
@@ -58,6 +78,3 @@ The
 .Nm
 driver first appeared in
 .Fx 13.0 .
-.Sh BUGS
-.Nm
-is currently only available on amd64.
diff --git a/sys/amd64/conf/GENERIC b/sys/amd64/conf/GENERIC
index f9e578a56902..e1a4560ab7f5 100644
--- a/sys/amd64/conf/GENERIC
+++ b/sys/amd64/conf/GENERIC
@@ -186,8 +186,7 @@ device		nvme			# base NVMe driver
 device		nvd			# expose NVMe namespaces as disks, depends on nvme
 
 # Intel Volume Management Device (VMD) support
-device		vmd			# base VMD device
-device		vmd_bus			# bus for VMD children
+device		vmd
 
 # atkbdc0 controls both the keyboard and the PS/2 mouse
 device		atkbdc			# AT keyboard controller
diff --git a/sys/amd64/conf/NOTES b/sys/amd64/conf/NOTES
index a3cb84698748..714b20101703 100644
--- a/sys/amd64/conf/NOTES
+++ b/sys/amd64/conf/NOTES
@@ -466,8 +466,7 @@ device         nvd     # expose NVMe namespaces as disks, depends on nvme
 
 #
 # Intel Volume Management Device (VMD) support
-device		vmd		# base VMD device
-device		vmd_bus		# bus for VMD children
+device		vmd
 
 #
 # PMC-Sierra SAS/SATA controller
diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64
index 1b56254dc892..b1593006e110 100644
--- a/sys/conf/files.amd64
+++ b/sys/conf/files.amd64
@@ -383,8 +383,7 @@ dev/tpm/tpm_acpi.c		optional	tpm acpi
 dev/tpm/tpm_isa.c		optional	tpm isa
 dev/uart/uart_cpu_x86.c		optional	uart
 dev/viawd/viawd.c		optional	viawd
-dev/vmd/vmd.c			optional	vmd
-dev/vmd/vmd_bus.c		optional	vmd_bus
+dev/vmd/vmd.c			optional	vmd | vmd_bus
 dev/wbwd/wbwd.c			optional	wbwd
 dev/xen/pci/xen_acpi_pci.c	optional	xenhvm
 dev/xen/pci/xen_pci.c		optional	xenhvm
diff --git a/sys/conf/files.i386 b/sys/conf/files.i386
index 602b86a7bdf3..926451976372 100644
--- a/sys/conf/files.i386
+++ b/sys/conf/files.i386
@@ -128,6 +128,7 @@ dev/tpm/tpm_acpi.c		optional tpm acpi
 dev/tpm/tpm_isa.c		optional tpm isa
 dev/uart/uart_cpu_x86.c		optional uart
 dev/viawd/viawd.c		optional viawd
+dev/vmd/vmd.c			optional vmd
 dev/acpi_support/acpi_wmi_if.m	standard
 dev/wbwd/wbwd.c			optional wbwd
 i386/acpica/acpi_machdep.c	optional acpi
diff --git a/sys/dev/vmd/vmd.c b/sys/dev/vmd/vmd.c
index 80eeda4639fa..72146a7d16df 100644
--- a/sys/dev/vmd/vmd.c
+++ b/sys/dev/vmd/vmd.c
@@ -1,6 +1,7 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause
  *
+ * Copyright (c) 2021 Alexander Motin <mav at FreeBSD.org>
  * Copyright 2019 Cisco Systems, Inc.
  * All rights reserved.
  *
@@ -34,16 +35,18 @@ __FBSDID("$FreeBSD$");
 #include <sys/bus.h>
 #include <sys/conf.h>
 #include <sys/kernel.h>
+#include <sys/limits.h>
 #include <sys/module.h>
+#include <sys/sysctl.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 
 #include <machine/bus.h>
 #include <machine/resource.h>
+#include <machine/intr_machdep.h>
 #include <sys/rman.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
-#include <sys/taskqueue.h>
 
 #include <sys/pciio.h>
 #include <dev/pci/pcivar.h>
@@ -51,27 +54,19 @@ __FBSDID("$FreeBSD$");
 #include <dev/pci/pci_private.h>
 #include <dev/pci/pcib_private.h>
 
-#define TASK_QUEUE_INTR 1
 #include <dev/vmd/vmd.h>
 
 #include "pcib_if.h"
-#include "pci_if.h"
 
 struct vmd_type {
 	u_int16_t	vmd_vid;
 	u_int16_t	vmd_did;
 	char		*vmd_name;
 	int		flags;
-#define BUS_RESTRICT 1
+#define BUS_RESTRICT	1
+#define VECTOR_OFFSET	2
 };
 
-#define INTEL_VENDOR_ID		0x8086
-#define INTEL_DEVICE_ID_201d	0x201d
-#define INTEL_DEVICE_ID_28c0	0x28c0
-#define INTEL_DEVICE_ID_467f	0x467f
-#define INTEL_DEVICE_ID_4c3d	0x4c3d
-#define INTEL_DEVICE_ID_9a0b	0x9a0b
-
 #define VMD_CAP		0x40
 #define VMD_BUS_RESTRICT	0x1
 
@@ -80,12 +75,32 @@ struct vmd_type {
 
 #define VMD_LOCK	0x70
 
+SYSCTL_NODE(_hw, OID_AUTO, vmd, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
+    "Intel Volume Management Device tuning parameters");
+
+/*
+ * All MSIs within a group share address, so VMD can't distinguish them.
+ * It makes no sense to use more than one per device, only if required by
+ * some specific device drivers.
+ */
+static int vmd_max_msi = 1;
+SYSCTL_INT(_hw_vmd, OID_AUTO, max_msi, CTLFLAG_RWTUN, &vmd_max_msi, 0,
+    "Maximum number of MSI vectors per device");
+
+/*
+ * MSI-X can use different addresses, but we have limited number of MSI-X
+ * we can route to, so use conservative default to try to avoid sharing.
+ */
+static int vmd_max_msix = 3;
+SYSCTL_INT(_hw_vmd, OID_AUTO, max_msix, CTLFLAG_RWTUN, &vmd_max_msix, 0,
+    "Maximum number of MSI-X vectors per device");
+
 static struct vmd_type vmd_devs[] = {
-        { INTEL_VENDOR_ID, INTEL_DEVICE_ID_201d, "Intel Volume Management Device", 0 },
-        { INTEL_VENDOR_ID, INTEL_DEVICE_ID_28c0, "Intel Volume Management Device", BUS_RESTRICT },
-        { INTEL_VENDOR_ID, INTEL_DEVICE_ID_467f, "Intel Volume Management Device", BUS_RESTRICT },
-        { INTEL_VENDOR_ID, INTEL_DEVICE_ID_4c3d, "Intel Volume Management Device", BUS_RESTRICT },
-        { INTEL_VENDOR_ID, INTEL_DEVICE_ID_9a0b, "Intel Volume Management Device", BUS_RESTRICT },
+        { 0x8086, 0x201d, "Intel Volume Management Device", 0 },
+        { 0x8086, 0x28c0, "Intel Volume Management Device", BUS_RESTRICT },
+        { 0x8086, 0x467f, "Intel Volume Management Device", BUS_RESTRICT | VECTOR_OFFSET },
+        { 0x8086, 0x4c3d, "Intel Volume Management Device", BUS_RESTRICT | VECTOR_OFFSET },
+        { 0x8086, 0x9a0b, "Intel Volume Management Device", BUS_RESTRICT | VECTOR_OFFSET },
         { 0, 0, NULL, 0 }
 };
 
@@ -95,73 +110,51 @@ vmd_probe(device_t dev)
 	struct vmd_type *t;
 	uint16_t vid, did;
 
-	t = vmd_devs;
 	vid = pci_get_vendor(dev);
 	did = pci_get_device(dev);
-
-	while (t->vmd_name != NULL) {
-		if (vid == t->vmd_vid &&
-			did == t->vmd_did) {
+	for (t = vmd_devs; t->vmd_name != NULL; t++) {
+		if (vid == t->vmd_vid && did == t->vmd_did) {
 			device_set_desc(dev, t->vmd_name);
 			return (BUS_PROBE_DEFAULT);
 		}
-		t++;
 	}
-
 	return (ENXIO);
 }
 
 static void
 vmd_free(struct vmd_softc *sc)
 {
+	struct vmd_irq *vi;
+	struct vmd_irq_user *u;
 	int i;
-	struct vmd_irq_handler *elm, *tmp;
 
-	if (sc->vmd_bus.rman.rm_end != 0)
-		rman_fini(&sc->vmd_bus.rman);
-
-#ifdef TASK_QUEUE_INTR
-	if (sc->vmd_irq_tq != NULL) {
-		taskqueue_drain(sc->vmd_irq_tq, &sc->vmd_irq_task);
-		taskqueue_free(sc->vmd_irq_tq);
-		sc->vmd_irq_tq = NULL;
+	if (sc->psc.bus.rman.rm_end != 0)
+		rman_fini(&sc->psc.bus.rman);
+	if (sc->psc.mem.rman.rm_end != 0)
+		rman_fini(&sc->psc.mem.rman);
+	while ((u = LIST_FIRST(&sc->vmd_users)) != NULL) {
+		LIST_REMOVE(u, viu_link);
+		free(u, M_DEVBUF);
 	}
-#endif
 	if (sc->vmd_irq != NULL) {
 		for (i = 0; i < sc->vmd_msix_count; i++) {
-			if (sc->vmd_irq[i].vmd_res != NULL) {
-				bus_teardown_intr(sc->vmd_dev,
-				    sc->vmd_irq[i].vmd_res,
-				    sc->vmd_irq[i].vmd_handle);
-				bus_release_resource(sc->vmd_dev, SYS_RES_IRQ,
-				    sc->vmd_irq[i].vmd_rid,
-				    sc->vmd_irq[i].vmd_res);
-			}
-		}
-		TAILQ_FOREACH_SAFE(elm, &sc->vmd_irq[0].vmd_list ,vmd_link,
-		    tmp) {
-			TAILQ_REMOVE(&sc->vmd_irq[0].vmd_list, elm, vmd_link);
-			free(elm, M_DEVBUF);
+			vi = &sc->vmd_irq[i];
+			if (vi->vi_res == NULL)
+				continue;
+			bus_teardown_intr(sc->psc.dev, vi->vi_res,
+			    vi->vi_handle);
+			bus_release_resource(sc->psc.dev, SYS_RES_IRQ,
+			    vi->vi_rid, vi->vi_res);
 		}
 	}
 	free(sc->vmd_irq, M_DEVBUF);
 	sc->vmd_irq = NULL;
-	pci_release_msi(sc->vmd_dev);
+	pci_release_msi(sc->psc.dev);
 	for (i = 0; i < VMD_MAX_BAR; i++) {
-		if (sc->vmd_regs_resource[i] != NULL)
-			bus_release_resource(sc->vmd_dev, SYS_RES_MEMORY,
-			    sc->vmd_regs_rid[i],
-			    sc->vmd_regs_resource[i]);
-	}
-	if (sc->vmd_io_resource)
-		bus_release_resource(device_get_parent(sc->vmd_dev),
-		    SYS_RES_IOPORT, sc->vmd_io_rid, sc->vmd_io_resource);
-
-#ifndef TASK_QUEUE_INTR
-	if (mtx_initialized(&sc->vmd_irq_lock)) {
-		mtx_destroy(&sc->vmd_irq_lock);
+		if (sc->vmd_regs_res[i] != NULL)
+			bus_release_resource(sc->psc.dev, SYS_RES_MEMORY,
+			    sc->vmd_regs_rid[i], sc->vmd_regs_res[i]);
 	}
-#endif
 }
 
 /* Hidden PCI Roots are hidden in BAR(0). */
@@ -169,17 +162,16 @@ vmd_free(struct vmd_softc *sc)
 static uint32_t
 vmd_read_config(device_t dev, u_int b, u_int s, u_int f, u_int reg, int width)
 {
-
 	struct vmd_softc *sc;
 	bus_addr_t offset;
 
 	sc = device_get_softc(dev);
-	if (b < sc->vmd_bus_start)
+	if (b < sc->vmd_bus_start || b > sc->vmd_bus_end)
 		return (0xffffffff);
 
 	offset = ((b - sc->vmd_bus_start) << 20) + (s << 15) + (f << 12) + reg;
 
-	switch(width) {
+	switch (width) {
 	case 4:
 		return (bus_space_read_4(sc->vmd_btag, sc->vmd_bhandle,
 		    offset));
@@ -190,7 +182,7 @@ vmd_read_config(device_t dev, u_int b, u_int s, u_int f, u_int reg, int width)
 		return (bus_space_read_1(sc->vmd_btag, sc->vmd_bhandle,
 		    offset));
 	default:
-		KASSERT(1, ("Invalid width requested"));
+		__assert_unreachable();
 		return (0xffffffff);
 	}
 }
@@ -199,17 +191,16 @@ static void
 vmd_write_config(device_t dev, u_int b, u_int s, u_int f, u_int reg,
     uint32_t val, int width)
 {
-
 	struct vmd_softc *sc;
 	bus_addr_t offset;
 
 	sc = device_get_softc(dev);
-	if (b < sc->vmd_bus_start)
+	if (b < sc->vmd_bus_start || b > sc->vmd_bus_end)
 		return;
 
 	offset = ((b - sc->vmd_bus_start) << 20) + (s << 15) + (f << 12) + reg;
 
-	switch(width) {
+	switch (width) {
 	case 4:
 		return (bus_space_write_4(sc->vmd_btag, sc->vmd_bhandle,
 		    offset, val));
@@ -220,269 +211,162 @@ vmd_write_config(device_t dev, u_int b, u_int s, u_int f, u_int reg,
 		return (bus_space_write_1(sc->vmd_btag, sc->vmd_bhandle,
 		    offset, val));
 	default:
-		panic("Failed to specific width");
+		__assert_unreachable();
 	}
 }
 
-static uint32_t
-vmd_pci_read_config(device_t dev, device_t child, int reg, int width)
-{
-	struct pci_devinfo *dinfo = device_get_ivars(child);
-	pcicfgregs *cfg = &dinfo->cfg;
-
-	return vmd_read_config(dev, cfg->bus, cfg->slot, cfg->func, reg, width);
-}
-
-static void
-vmd_pci_write_config(device_t dev, device_t child, int reg, uint32_t val,
-    int width)
-{
-	struct pci_devinfo *dinfo = device_get_ivars(child);
-	pcicfgregs *cfg = &dinfo->cfg;
-
-	vmd_write_config(dev, cfg->bus, cfg->slot, cfg->func, reg, val, width);
-}
-
-static struct pci_devinfo *
-vmd_alloc_devinfo(device_t dev)
-{
-	struct pci_devinfo *dinfo;
-
-	dinfo = malloc(sizeof(*dinfo), M_DEVBUF, M_WAITOK | M_ZERO);
-	return (dinfo);
-}
-
-static void
+static int
 vmd_intr(void *arg)
 {
-	struct vmd_irq  *irq;
-	struct vmd_softc *sc;
-#ifndef TASK_QUEUE_INTR
-	struct vmd_irq_handler *elm, *tmp_elm;
-#endif
-
-	irq = (struct vmd_irq *)arg;
-	sc = irq->vmd_sc;
-#ifdef TASK_QUEUE_INTR
-	taskqueue_enqueue(sc->vmd_irq_tq, &sc->vmd_irq_task);
-#else
-	mtx_lock(&sc->vmd_irq_lock);
-	TAILQ_FOREACH_SAFE(elm, &sc->vmd_irq[0].vmd_list, vmd_link, tmp_elm) {
-		(elm->vmd_intr)(elm->vmd_arg);
-	}
-	mtx_unlock(&sc->vmd_irq_lock);
-#endif
-}
-
-#ifdef TASK_QUEUE_INTR
-static void
-vmd_handle_irq(void *context, int pending)
-{
-	struct vmd_irq_handler *elm, *tmp_elm;
-	struct vmd_softc *sc;
-
-	sc = context;
-
-	TAILQ_FOREACH_SAFE(elm, &sc->vmd_irq[0].vmd_list, vmd_link, tmp_elm) {
-		(elm->vmd_intr)(elm->vmd_arg);
-	}
+	/*
+	 * We have nothing to do here, but we have to register some interrupt
+	 * handler to make PCI code setup and enable the MSI-X vector.
+	 */
+	return (FILTER_STRAY);
 }
-#endif
 
 static int
 vmd_attach(device_t dev)
 {
 	struct vmd_softc *sc;
 	struct pcib_secbus *bus;
+	struct pcib_window *w;
 	struct vmd_type *t;
+	struct vmd_irq *vi;
 	uint16_t vid, did;
 	uint32_t bar;
 	int i, j, error;
-	int rid, sec_reg;
-	static int b;
-	static int s;
-	static int f;
-	int min_count = 1;
 	char buf[64];
 
 	sc = device_get_softc(dev);
 	bzero(sc, sizeof(*sc));
-	sc->vmd_dev = dev;
-	b = s = f = 0;
+	sc->psc.dev = dev;
+	sc->psc.domain = PCI_DOMAINMAX - device_get_unit(dev);
 
 	pci_enable_busmaster(dev);
 
-#ifdef TASK_QUEUE_INTR
-	sc->vmd_irq_tq = taskqueue_create_fast("vmd_taskq", M_NOWAIT,
-	    taskqueue_thread_enqueue, &sc->vmd_irq_tq);
-	taskqueue_start_threads(&sc->vmd_irq_tq, 1, PI_DISK, "%s taskq",
-            device_get_nameunit(sc->vmd_dev));
-	TASK_INIT(&sc->vmd_irq_task, 0, vmd_handle_irq, sc);
-#else
-	mtx_init(&sc->vmd_irq_lock, "VMD IRQ lock", NULL, MTX_DEF);
-#endif
-	for (i = 0, j = 0; i < VMD_MAX_BAR; i++, j++ ) {
+	for (i = 0, j = 0; i < VMD_MAX_BAR; i++, j++) {
 		sc->vmd_regs_rid[i] = PCIR_BAR(j);
 		bar = pci_read_config(dev, PCIR_BAR(0), 4);
 		if (PCI_BAR_MEM(bar) && (bar & PCIM_BAR_MEM_TYPE) ==
 		    PCIM_BAR_MEM_64)
 			j++;
-		if ((sc->vmd_regs_resource[i] = bus_alloc_resource_any(
-		    sc->vmd_dev, SYS_RES_MEMORY, &sc->vmd_regs_rid[i],
-		    RF_ACTIVE)) == NULL) {
+		if ((sc->vmd_regs_res[i] = bus_alloc_resource_any(dev,
+		    SYS_RES_MEMORY, &sc->vmd_regs_rid[i], RF_ACTIVE)) == NULL) {
 			device_printf(dev, "Cannot allocate resources\n");
 			goto fail;
 		}
 	}
 
-	sc->vmd_io_rid = PCIR_IOBASEL_1;
-	sc->vmd_io_resource = bus_alloc_resource_any(
-	    device_get_parent(sc->vmd_dev), SYS_RES_IOPORT, &sc->vmd_io_rid,
-	    RF_ACTIVE);
-	if (sc->vmd_io_resource == NULL) {
-		device_printf(dev, "Cannot allocate IO\n");
-		goto fail;
-	}
-
-	sc->vmd_btag = rman_get_bustag(sc->vmd_regs_resource[0]);
-	sc->vmd_bhandle = rman_get_bushandle(sc->vmd_regs_resource[0]);
+	sc->vmd_btag = rman_get_bustag(sc->vmd_regs_res[0]);
+	sc->vmd_bhandle = rman_get_bushandle(sc->vmd_regs_res[0]);
 
-	pci_write_config(dev, PCIR_PRIBUS_2,
-	    pcib_get_bus(device_get_parent(dev)), 1);
-
-	t = vmd_devs;
 	vid = pci_get_vendor(dev);
 	did = pci_get_device(dev);
+	for (t = vmd_devs; t->vmd_name != NULL; t++) {
+		if (vid == t->vmd_vid && did == t->vmd_did)
+			break;
+	}
 
 	sc->vmd_bus_start = 0;
-	while (t->vmd_name != NULL) {
-		if (vid == t->vmd_vid &&
-			did == t->vmd_did) {
-			if (t->flags == BUS_RESTRICT) {
-				if (pci_read_config(dev, VMD_CAP, 2) &
-				    VMD_BUS_RESTRICT)
-					switch (VMD_BUS_START(pci_read_config(
-					    dev, VMD_CONFIG, 2))) {
-					case 1:
-						sc->vmd_bus_start = 128;
-						break;
-					case 2:
-						sc->vmd_bus_start = 224;
-						break;
-					case 3:
-						device_printf(dev,
-						    "Unknown bug offset\n");
-						goto fail;
-						break;
-					}
-			}
+	if ((t->flags & BUS_RESTRICT) &&
+	    (pci_read_config(dev, VMD_CAP, 2) & VMD_BUS_RESTRICT)) {
+		switch (VMD_BUS_START(pci_read_config(dev, VMD_CONFIG, 2))) {
+		case 0:
+			sc->vmd_bus_start = 0;
+			break;
+		case 1:
+			sc->vmd_bus_start = 128;
+			break;
+		case 2:
+			sc->vmd_bus_start = 224;
+			break;
+		default:
+			device_printf(dev, "Unknown bus offset\n");
+			goto fail;
 		}
-		t++;
 	}
+	sc->vmd_bus_end = MIN(PCI_BUSMAX, sc->vmd_bus_start +
+	    (rman_get_size(sc->vmd_regs_res[0]) >> 20) - 1);
 
-	device_printf(dev, "VMD bus starts at %d\n", sc->vmd_bus_start);
-
-	sec_reg = PCIR_SECBUS_1;
-	bus = &sc->vmd_bus;
-	bus->sub_reg = PCIR_SUBBUS_1;
-	bus->sec = vmd_read_config(dev, b, s, f, sec_reg, 1);
-	bus->sub = vmd_read_config(dev, b, s, f, bus->sub_reg, 1);
+	bus = &sc->psc.bus;
+	bus->sec = sc->vmd_bus_start;
+	bus->sub = sc->vmd_bus_end;
 	bus->dev = dev;
-	bus->rman.rm_start = sc->vmd_bus_start;
+	bus->rman.rm_start = 0;
 	bus->rman.rm_end = PCI_BUSMAX;
 	bus->rman.rm_type = RMAN_ARRAY;
 	snprintf(buf, sizeof(buf), "%s bus numbers", device_get_nameunit(dev));
 	bus->rman.rm_descr = strdup(buf, M_DEVBUF);
 	error = rman_init(&bus->rman);
 	if (error) {
-		device_printf(dev, "Failed to initialize %s bus number rman\n",
-		    device_get_nameunit(dev));
+		device_printf(dev, "Failed to initialize bus rman\n");
 		bus->rman.rm_end = 0;
 		goto fail;
 	}
-
-	/*
-	 * Allocate a bus range.  This will return an existing bus range
-	 * if one exists, or a new bus range if one does not.
-	 */
-	rid = 0;
-	bus->res = bus_alloc_resource_anywhere(dev, PCI_RES_BUS, &rid,
-	    min_count, 0);
-	if (bus->res == NULL) {
-		/*
-		 * Fall back to just allocating a range of a single bus
-		 * number.
-		 */
-		bus->res = bus_alloc_resource_anywhere(dev, PCI_RES_BUS, &rid,
-		    1, 0);
-	} else if (rman_get_size(bus->res) < min_count) {
-		/*
-		 * Attempt to grow the existing range to satisfy the
-		 * minimum desired count.
-		 */
-		(void)bus_adjust_resource(dev, PCI_RES_BUS, bus->res,
-		    rman_get_start(bus->res), rman_get_start(bus->res) +
-		    min_count - 1);
+	error = rman_manage_region(&bus->rman, sc->vmd_bus_start,
+	    sc->vmd_bus_end);
+	if (error) {
+		device_printf(dev, "Failed to add resource to bus rman\n");
+		goto fail;
 	}
 
-	/*
-	 * Add the initial resource to the rman.
-	 */
-	if (bus->res != NULL) {
-		error = rman_manage_region(&bus->rman, rman_get_start(bus->res),
-		    rman_get_end(bus->res));
-		if (error) {
-			device_printf(dev, "Failed to add resource to rman\n");
-			goto fail;
-		}
-		bus->sec = rman_get_start(bus->res);
-		bus->sub = rman_get_end(bus->res);
+	w = &sc->psc.mem;
+	w->rman.rm_type = RMAN_ARRAY;
+	snprintf(buf, sizeof(buf), "%s memory window", device_get_nameunit(dev));
+	w->rman.rm_descr = strdup(buf, M_DEVBUF);
+	error = rman_init(&w->rman);
+	if (error) {
+		device_printf(dev, "Failed to initialize memory rman\n");
+		w->rman.rm_end = 0;
+		goto fail;
+	}
+	error = rman_manage_region(&w->rman,
+	    rman_get_start(sc->vmd_regs_res[1]),
+	    rman_get_end(sc->vmd_regs_res[1]));
+	if (error) {
+		device_printf(dev, "Failed to add resource to memory rman\n");
+		goto fail;
+	}
+	error = rman_manage_region(&w->rman,
+	    rman_get_start(sc->vmd_regs_res[2]) + 0x2000,
+	    rman_get_end(sc->vmd_regs_res[2]));
+	if (error) {
+		device_printf(dev, "Failed to add resource to memory rman\n");
+		goto fail;
 	}
 
+	LIST_INIT(&sc->vmd_users);
+	sc->vmd_fist_vector = (t->flags & VECTOR_OFFSET) ? 1 : 0;
 	sc->vmd_msix_count = pci_msix_count(dev);
 	if (pci_alloc_msix(dev, &sc->vmd_msix_count) == 0) {
 		sc->vmd_irq = malloc(sizeof(struct vmd_irq) *
-		    sc->vmd_msix_count,
-		    M_DEVBUF, M_WAITOK | M_ZERO);
-
+		    sc->vmd_msix_count, M_DEVBUF, M_WAITOK | M_ZERO);
 		for (i = 0; i < sc->vmd_msix_count; i++) {
-			sc->vmd_irq[i].vmd_rid = i + 1;
-			sc->vmd_irq[i].vmd_sc = sc;
-			sc->vmd_irq[i].vmd_instance = i;
-			sc->vmd_irq[i].vmd_res = bus_alloc_resource_any(dev,
-			    SYS_RES_IRQ, &sc->vmd_irq[i].vmd_rid,
-			    RF_ACTIVE);
-			if (sc->vmd_irq[i].vmd_res == NULL) {
-				device_printf(dev,"Failed to alloc irq\n");
+			vi = &sc->vmd_irq[i];
+			vi->vi_rid = i + 1;
+			vi->vi_res = bus_alloc_resource_any(dev, SYS_RES_IRQ,
+			    &vi->vi_rid, RF_ACTIVE | RF_SHAREABLE);
+			if (vi->vi_res == NULL) {
+				device_printf(dev, "Failed to allocate irq\n");
 				goto fail;
 			}
-
-			TAILQ_INIT(&sc->vmd_irq[i].vmd_list);
-			if (bus_setup_intr(dev, sc->vmd_irq[i].vmd_res,
-			    INTR_TYPE_MISC | INTR_MPSAFE, NULL, vmd_intr,
-			    &sc->vmd_irq[i], &sc->vmd_irq[i].vmd_handle)) {
-				device_printf(sc->vmd_dev,
-				    "Cannot set up interrupt\n");
-				sc->vmd_irq[i].vmd_res = NULL;
+			vi->vi_irq = rman_get_start(vi->vi_res);
+			if (bus_setup_intr(dev, vi->vi_res, INTR_TYPE_MISC |
+			    INTR_MPSAFE, vmd_intr, NULL, vi, &vi->vi_handle)) {
+				device_printf(dev, "Can't set up interrupt\n");
+				bus_release_resource(dev, SYS_RES_IRQ,
+				    vi->vi_rid, vi->vi_res);
+				vi->vi_res = NULL;
 				goto fail;
 			}
 		}
 	}
 
-	sc->vmd_child = device_add_child(dev, NULL, -1);
-	if (sc->vmd_child == NULL) {
-		device_printf(dev, "Failed to attach child\n");
-		goto fail;
-	}
-
-	error = device_probe_and_attach(sc->vmd_child);
-	if (error) {
-		device_printf(dev, "Failed to add probe child: %d\n", error);
-		(void)device_delete_child(dev, sc->vmd_child);
-		goto fail;
-	}
+	sc->vmd_dma_tag = bus_get_dma_tag(dev);
 
-	return (0);
+	sc->psc.child = device_add_child(dev, "pci", -1);
+	return (bus_generic_attach(dev));
 
 fail:
 	vmd_free(sc);
@@ -492,150 +376,218 @@ fail:
 static int
 vmd_detach(device_t dev)
 {
-	struct vmd_softc *sc;
-	int err;
-
-	sc = device_get_softc(dev);
-	if (sc->vmd_child != NULL) {
-		err = bus_generic_detach(sc->vmd_child);
-		if (err)
-			return (err);
-		err = device_delete_child(dev, sc->vmd_child);
-		if (err)
-			return (err);
-	}
+	struct vmd_softc *sc = device_get_softc(dev);
+	int error;
+
+	error = bus_generic_detach(dev);
+	if (error)
+		return (error);
+	error = device_delete_children(dev);
+	if (error)
+		return (error);
 	vmd_free(sc);
 	return (0);
 }
 
-/* Pass request to alloc an MSI-X message up to the parent bridge. */
-static int
-vmd_alloc_msix(device_t pcib, device_t dev, int *irq)
+static bus_dma_tag_t
+vmd_get_dma_tag(device_t dev, device_t child)
 {
-	struct vmd_softc *sc = device_get_softc(pcib);
-	device_t bus;
-	int ret;
-
-	if (sc->vmd_flags & PCIB_DISABLE_MSIX)
-		return (ENXIO);
-	bus = device_get_parent(pcib);
-	ret = PCIB_ALLOC_MSIX(device_get_parent(bus), dev, irq);
-        return (ret);
+	struct vmd_softc *sc = device_get_softc(dev);
+
+	return (sc->vmd_dma_tag);
 }
 
 static struct resource *
 vmd_alloc_resource(device_t dev, device_t child, int type, int *rid,
     rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
 {
-	/* Start at max PCI vmd_domain and work down */
-	if (type == PCI_RES_BUS) {
-		return (pci_domain_alloc_bus(PCI_DOMAINMAX -
-		    device_get_unit(dev), child, rid, start, end,
-		    count, flags));
+	struct vmd_softc *sc = device_get_softc(dev);
+	struct resource *res;
+
+	switch (type) {
+	case SYS_RES_IRQ:
+		/* VMD harwdare does not support legacy interrupts. */
+		if (*rid == 0)
+			return (NULL);
+		return (bus_generic_alloc_resource(dev, child, type, rid,
+		    start, end, count, flags | RF_SHAREABLE));
+	case SYS_RES_MEMORY:
+		res = rman_reserve_resource(&sc->psc.mem.rman, start, end,
+		    count, flags, child);
+		if (res == NULL)
+			return (NULL);
+		if (bootverbose)
+			device_printf(dev,
+			    "allocated memory range (%#jx-%#jx) for rid %d of %s\n",
+			    rman_get_start(res), rman_get_end(res), *rid,
+			    pcib_child_name(child));
+		break;
+	case PCI_RES_BUS:
+		res = rman_reserve_resource(&sc->psc.bus.rman, start, end,
+		    count, flags, child);
+		if (res == NULL)
+			return (NULL);
+		if (bootverbose)
+			device_printf(dev,
+			    "allocated bus range (%ju-%ju) for rid %d of %s\n",
+			    rman_get_start(res), rman_get_end(res), *rid,
+			    pcib_child_name(child));
+		break;
+	default:
+		/* VMD harwdare does not support I/O ports. */
+		return (NULL);
 	}
-
-	return (pcib_alloc_resource(dev, child, type, rid, start, end,
-				    count, flags));
+	rman_set_rid(res, *rid);
+	return (res);
 }
 
 static int
 vmd_adjust_resource(device_t dev, device_t child, int type,
     struct resource *r, rman_res_t start, rman_res_t end)
 {
-	struct resource *res = r;
 
-	if (type == PCI_RES_BUS)
-		return (pci_domain_adjust_bus(PCI_DOMAINMAX -
-			device_get_unit(dev), child, res, start, end));
-	return (pcib_adjust_resource(dev, child, type, res, start, end));
+	if (type == SYS_RES_IRQ) {
+		return (bus_generic_adjust_resource(dev, child, type, r,
+		    start, end));
+	}
+	return (rman_adjust_resource(r, start, end));
 }
 
 static int
 vmd_release_resource(device_t dev, device_t child, int type, int rid,
     struct resource *r)
 {
-	if (type == PCI_RES_BUS)
-		return (pci_domain_release_bus(PCI_DOMAINMAX -
-		    device_get_unit(dev), child, rid, r));
-	return (pcib_release_resource(dev, child, type, rid, r));
-}
 
-static int
-vmd_shutdown(device_t dev)
-{
-	return (0);
+	if (type == SYS_RES_IRQ) {
+		return (bus_generic_release_resource(dev, child, type, rid,
+		    r));
+	}
+	return (rman_release_resource(r));
 }
 
 static int
-vmd_pcib_route_interrupt(device_t pcib, device_t dev, int pin)
+vmd_route_interrupt(device_t dev, device_t child, int pin)
 {
-	return (pcib_route_interrupt(pcib, dev, pin));
+
+	/* VMD harwdare does not support legacy interrupts. */
+	return (PCI_INVALID_IRQ);
 }
 
 static int
-vmd_pcib_alloc_msi(device_t pcib, device_t dev, int count, int maxcount,
+vmd_alloc_msi(device_t dev, device_t child, int count, int maxcount,
     int *irqs)
 {
-	return (pcib_alloc_msi(pcib, dev, count, maxcount, irqs));
-}
+	struct vmd_softc *sc = device_get_softc(dev);
+	struct vmd_irq_user *u;
+	int i, ibest = 0, best = INT_MAX;
+
+	if (count > vmd_max_msi)
+		return (ENOSPC);
+	LIST_FOREACH(u, &sc->vmd_users, viu_link) {
+		if (u->viu_child == child)
+			return (EBUSY);
+	}
 
-static int
-vmd_pcib_release_msi(device_t pcib, device_t dev, int count, int *irqs)
-{
+	for (i = sc->vmd_fist_vector; i < sc->vmd_msix_count; i++) {
+		if (best > sc->vmd_irq[i].vi_nusers) {
+			best = sc->vmd_irq[i].vi_nusers;
+			ibest = i;
+		}
+	}
+
+	u = malloc(sizeof(*u), M_DEVBUF, M_WAITOK | M_ZERO);
+	u->viu_child = child;
+	u->viu_vector = ibest;
+	LIST_INSERT_HEAD(&sc->vmd_users, u, viu_link);
+	sc->vmd_irq[ibest].vi_nusers += count;
 
-	return (pcib_release_msi(pcib, dev, count, irqs));
+	for (i = 0; i < count; i++)
+		irqs[i] = sc->vmd_irq[ibest].vi_irq;
*** 550 LINES SKIPPED ***


More information about the dev-commits-src-all mailing list