git: 5ba7dc4c2176 - stable/12 - AMD-vi: Fix IOMMU device interrupts being overridden

Ka Ho Ng khng at FreeBSD.org
Fri Sep 3 19:05:57 UTC 2021


The branch stable/12 has been updated by khng:

URL: https://cgit.FreeBSD.org/src/commit/?id=5ba7dc4c21767288f44b5a31c49b65663ae7eb25

commit 5ba7dc4c21767288f44b5a31c49b65663ae7eb25
Author:     Ka Ho Ng <khng at FreeBSD.org>
AuthorDate: 2021-03-22 09:33:43 +0000
Commit:     Ka Ho Ng <khng at FreeBSD.org>
CommitDate: 2021-09-03 18:46:40 +0000

    AMD-vi: Fix IOMMU device interrupts being overridden
    
    Currently, AMD-vi PCI-e passthrough will lead to the following lines in
    dmesg:
    "kernel: CPU0: local APIC error 0x40
    ivhd0: Error: completion failed tail:0x720, head:0x0."
    
    After some tracing, the problem is due to the interaction with
    amdvi_alloc_intr_resources() and pci_driver_added(). In ivrs_drv, the
    identification of AMD-vi IVHD is done by walking over the ACPI IVRS
    table and ivhdX device_ts are added under the acpi bus, while there are
    no driver handling the corresponding IOMMU PCI function. In
    amdvi_alloc_intr_resources(), the MSI intr are allocated with the ivhdX
    device_t instead of the IOMMU PCI function device_t. bus_setup_intr() is
    called on ivhdX. the IOMMU pci function device_t is only used for
    pci_enable_msi(). Since bus_setup_intr() is not called on IOMMU pci
    function, the IOMMU PCI function device_t's dinfo->cfg.msi is never
    updated to reflect the supposed msi_data and msi_addr. So the msi_data
    and msi_addr stay in the value 0. When pci_driver_added() tried to loop
    over the children of a pci bus, and do pci_cfg_restore() on each of
    them, msi_addr and msi_data with value 0 will be written to the MSI
    capability of the IOMMU pci function, thus explaining the errors in
    dmesg.
    
    This change includes an amdiommu driver which currently does attaching,
    detaching and providing DEVMETHODs for setting up and tearing down
    interrupt. The purpose of the driver is to prevent pci_driver_added()
    from calling pci_cfg_restore() on the IOMMU PCI function device_t.
    The introduction of the amdiommu driver handles allocation of an IRQ
    resource within the IOMMU PCI function, so that the dinfo->cfg.msi is
    populated.
    
    This has been tested on EPYC Rome 7282 with Radeon 5700XT GPU.
    
    Sponsored by:   The FreeBSD Foundation
    Reviewed by:    jhb
    Approved by:    philip (mentor)
    MFC after:      2 weeks
    Differential Revision:  https://reviews.freebsd.org/D28984
    
    (cherry picked from commit 74ada297e8978b8efda3dffdd1bb24aee7c5faa4)
---
 sys/amd64/vmm/amd/amdiommu.c   | 185 +++++++++++++++++++++++++++++++++++++++++
 sys/amd64/vmm/amd/amdvi_hw.c   |  89 +++-----------------
 sys/amd64/vmm/amd/amdvi_priv.h |   5 +-
 sys/amd64/vmm/amd/ivhd_if.m    |  46 ++++++++++
 sys/amd64/vmm/amd/ivrs_drv.c   |   6 ++
 sys/modules/vmm/Makefile       |   3 +
 6 files changed, 253 insertions(+), 81 deletions(-)

diff --git a/sys/amd64/vmm/amd/amdiommu.c b/sys/amd64/vmm/amd/amdiommu.c
new file mode 100644
index 000000000000..4ded23dff003
--- /dev/null
+++ b/sys/amd64/vmm/amd/amdiommu.c
@@ -0,0 +1,185 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2021 The FreeBSD Fondation
+ *
+ * Portions of this software were developed by Ka Ho Ng
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <sys/rman.h>
+
+#include <dev/pci/pcireg.h>
+#include <dev/pci/pcivar.h>
+
+#include "amdvi_priv.h"
+#include "ivhd_if.h"
+
+struct amdiommu_softc {
+	struct resource *event_res;	/* Event interrupt resource. */
+	void   		*event_tag;	/* Event interrupt tag. */
+	int		event_rid;
+};
+
+static int	amdiommu_probe(device_t);
+static int	amdiommu_attach(device_t);
+static int	amdiommu_detach(device_t);
+static int	ivhd_setup_intr(device_t, driver_intr_t, void *,
+		    const char *);
+static int	ivhd_teardown_intr(device_t);
+
+static device_method_t amdiommu_methods[] = {
+	/* device interface */
+	DEVMETHOD(device_probe,			amdiommu_probe),
+	DEVMETHOD(device_attach,		amdiommu_attach),
+	DEVMETHOD(device_detach,		amdiommu_detach),
+	DEVMETHOD(ivhd_setup_intr,		ivhd_setup_intr),
+	DEVMETHOD(ivhd_teardown_intr,		ivhd_teardown_intr),
+	DEVMETHOD_END
+};
+static driver_t amdiommu_driver = {
+	"amdiommu",
+	amdiommu_methods,
+	sizeof(struct amdiommu_softc),
+};
+
+static int
+amdiommu_probe(device_t dev)
+{
+	int error;
+	int capoff;
+
+	/*
+	 * Check base class and sub-class
+	 */
+	if (pci_get_class(dev) != PCIC_BASEPERIPH ||
+	    pci_get_subclass(dev) != PCIS_BASEPERIPH_IOMMU)
+		return (ENXIO);
+
+	/*
+	 * A IOMMU capability block carries a 0Fh capid.
+	 */
+	error = pci_find_cap(dev, PCIY_SECDEV, &capoff);
+	if (error)
+		return (ENXIO);
+
+	/*
+	 * bit [18:16] == 011b indicates the capability block is IOMMU
+	 * capability block. If the field is not set to 011b, bail out.
+	 */
+	if ((pci_read_config(dev, capoff + 2, 2) & 0x7) != 0x3)
+		return (ENXIO);
+
+	return (BUS_PROBE_SPECIFIC);
+}
+
+static int
+amdiommu_attach(device_t dev)
+{
+
+	device_set_desc(dev, "AMD-Vi/IOMMU PCI function");
+	return (0);
+}
+
+static int
+amdiommu_detach(device_t dev)
+{
+
+	return (0);
+}
+
+static int
+ivhd_setup_intr(device_t dev, driver_intr_t handler, void *arg,
+    const char *desc)
+{
+	struct amdiommu_softc *sc;
+	int error, msicnt;
+
+	sc = device_get_softc(dev);
+	msicnt = 1;
+	if (sc->event_res != NULL)
+		panic("%s is called without intr teardown", __func__);
+	sc->event_rid = 1;
+
+	error = pci_alloc_msi(dev, &msicnt);
+	if (error) {
+		device_printf(dev, "Couldn't find event MSI IRQ resource.\n");
+		return (ENOENT);
+	}
+
+	sc->event_res = bus_alloc_resource_any(dev, SYS_RES_IRQ,
+	    &sc->event_rid, RF_ACTIVE);
+	if (sc->event_res == NULL) {
+		device_printf(dev, "Unable to allocate event INTR resource.\n");
+		error = ENOMEM;
+		goto fail;
+	}
+
+	error = bus_setup_intr(dev, sc->event_res, INTR_TYPE_MISC | INTR_MPSAFE,
+	    NULL, handler, arg, &sc->event_tag);
+	if (error) {
+		device_printf(dev, "Fail to setup event intr\n");
+		goto fail;
+	}
+
+	bus_describe_intr(dev, sc->event_res, sc->event_tag, "%s", desc);
+	return (0);
+
+fail:
+	ivhd_teardown_intr(dev);
+	return (error);
+}
+
+static int
+ivhd_teardown_intr(device_t dev)
+{
+	struct amdiommu_softc *sc;
+
+	sc = device_get_softc(dev);
+
+	if (sc->event_res != NULL) {
+		bus_teardown_intr(dev, sc->event_res, sc->event_tag);
+		sc->event_tag = NULL;
+	}
+	if (sc->event_res != NULL) {
+		bus_release_resource(dev, SYS_RES_IRQ, sc->event_rid,
+		    sc->event_res);
+		sc->event_res = NULL;
+	}
+	pci_release_msi(dev);
+	return (0);
+}
+
+static devclass_t amdiommu_devclass;
+
+/* This driver has to be loaded before ivhd */
+DRIVER_MODULE(amdiommu, pci, amdiommu_driver, amdiommu_devclass, 0, 0);
+MODULE_DEPEND(amdiommu, pci, 1, 1, 1);
diff --git a/sys/amd64/vmm/amd/amdvi_hw.c b/sys/amd64/vmm/amd/amdvi_hw.c
index 0d3dee68059c..3a317ffbe29d 100644
--- a/sys/amd64/vmm/amd/amdvi_hw.c
+++ b/sys/amd64/vmm/amd/amdvi_hw.c
@@ -52,6 +52,7 @@ __FBSDID("$FreeBSD$");
 #include <machine/vmparam.h>
 #include <machine/pci_cfgreg.h>
 
+#include "ivhd_if.h"
 #include "pcib_if.h"
 
 #include "io/iommu.h"
@@ -773,99 +774,33 @@ amdvi_free_evt_intr_res(device_t dev)
 {
 
 	struct amdvi_softc *softc;
+	device_t mmio_dev;
 
 	softc = device_get_softc(dev);
-	if (softc->event_tag != NULL) {
-		bus_teardown_intr(dev, softc->event_res, softc->event_tag);
-	}
-	if (softc->event_res != NULL) {
-		bus_release_resource(dev, SYS_RES_IRQ, softc->event_rid,
-		    softc->event_res);
-	}
-	bus_delete_resource(dev, SYS_RES_IRQ, softc->event_rid);
-	PCIB_RELEASE_MSI(device_get_parent(device_get_parent(dev)),
-	    dev, 1, &softc->event_irq);
+	mmio_dev = softc->pci_dev;
+
+	IVHD_TEARDOWN_INTR(mmio_dev);
 }
 
 static bool
 amdvi_alloc_intr_resources(struct amdvi_softc *softc)
 {
 	struct amdvi_ctrl *ctrl;
-	device_t dev, pcib;
-	device_t mmio_dev;
-	uint64_t msi_addr;
-	uint32_t msi_data;
+	device_t dev, mmio_dev;
 	int err;
 
 	dev = softc->dev;
-	pcib = device_get_parent(device_get_parent(dev));
-	mmio_dev = pci_find_bsf(PCI_RID2BUS(softc->pci_rid),
-            PCI_RID2SLOT(softc->pci_rid), PCI_RID2FUNC(softc->pci_rid));
-	if (device_is_attached(mmio_dev)) {
-		device_printf(dev,
-		    "warning: IOMMU device is claimed by another driver %s\n",
-		    device_get_driver(mmio_dev)->name);
-	}
-
-	softc->event_irq = -1;
-	softc->event_rid = 0;
-
-	/*
-	 * Section 3.7.1 of IOMMU rev 2.0. With MSI, there is only one
-	 * interrupt. XXX: Enable MSI/X support.
-	 */
-	err = PCIB_ALLOC_MSI(pcib, dev, 1, 1, &softc->event_irq);
-	if (err) {
-		device_printf(dev,
-		    "Couldn't find event MSI IRQ resource.\n");
-		return (ENOENT);
-	}
-
-	err = bus_set_resource(dev, SYS_RES_IRQ, softc->event_rid,
-	    softc->event_irq, 1);
-	if (err) {
-		device_printf(dev, "Couldn't set event MSI resource.\n");
-		return (ENXIO);
-	}
-
-	softc->event_res = bus_alloc_resource_any(dev, SYS_RES_IRQ,
-	    &softc->event_rid, RF_ACTIVE);
-	if (!softc->event_res) {
-		device_printf(dev,
-		    "Unable to allocate event INTR resource.\n");
-		return (ENOMEM);
-	}
-
-	if (bus_setup_intr(dev, softc->event_res,
-	    INTR_TYPE_MISC | INTR_MPSAFE, NULL, amdvi_event_intr,
-	    softc, &softc->event_tag)) {
-		device_printf(dev, "Fail to setup event intr\n");
-		bus_release_resource(softc->dev, SYS_RES_IRQ,
-		    softc->event_rid, softc->event_res);
-		softc->event_res = NULL;
-		return (ENXIO);
-	}
-
-	bus_describe_intr(dev, softc->event_res, softc->event_tag,
-	    "fault");
-
-	err = PCIB_MAP_MSI(pcib, dev, softc->event_irq, &msi_addr,
-	    &msi_data);
-	if (err) {
-		device_printf(dev,
-		    "Event interrupt config failed, err=%d.\n",
-		    err);
-		amdvi_free_evt_intr_res(softc->dev);
-		return (err);
-	}
+	mmio_dev = softc->pci_dev;
 
 	/* Clear interrupt status bits. */
 	ctrl = softc->ctrl;
 	ctrl->status &= AMDVI_STATUS_EV_OF | AMDVI_STATUS_EV_INTR;
 
-	/* Now enable MSI interrupt. */
-	pci_enable_msi(mmio_dev, msi_addr, msi_data);
-	return (0);
+	err = IVHD_SETUP_INTR(mmio_dev, amdvi_event_intr, softc, "fault");
+	if (err)
+		device_printf(dev, "Interrupt setup failed on %s\n",
+		    device_get_nameunit(mmio_dev));
+	return (err);
 }
 
 
diff --git a/sys/amd64/vmm/amd/amdvi_priv.h b/sys/amd64/vmm/amd/amdvi_priv.h
index 2db6914f08f4..0eae7ca6ca4c 100644
--- a/sys/amd64/vmm/amd/amdvi_priv.h
+++ b/sys/amd64/vmm/amd/amdvi_priv.h
@@ -375,17 +375,14 @@ enum IvrsType
 struct amdvi_softc {
 	struct amdvi_ctrl *ctrl;	/* Control area. */
 	device_t 	dev;		/* IOMMU device. */
+	device_t 	pci_dev;	/* IOMMU PCI function device. */
 	enum IvrsType   ivhd_type;	/* IOMMU IVHD type. */
 	bool		iotlb;		/* IOTLB supported by IOMMU */
 	struct amdvi_cmd *cmd;		/* Command descriptor area. */
 	int 		cmd_max;	/* Max number of commands. */
 	uint64_t	cmp_data;	/* Command completion write back. */
 	struct amdvi_event *event;	/* Event descriptor area. */
-	struct resource *event_res;	/* Event interrupt resource. */
-	void   		*event_tag;	/* Event interrupt tag. */
 	int		event_max;	/* Max number of events. */
-	int		event_irq;
-	int		event_rid;
 	/* ACPI various flags. */
 	uint32_t 	ivhd_flag;	/* ACPI IVHD flag. */
 	uint32_t 	ivhd_feature;	/* ACPI v1 Reserved or v2 attribute. */
diff --git a/sys/amd64/vmm/amd/ivhd_if.m b/sys/amd64/vmm/amd/ivhd_if.m
new file mode 100644
index 000000000000..eaae01ae0131
--- /dev/null
+++ b/sys/amd64/vmm/amd/ivhd_if.m
@@ -0,0 +1,46 @@
+#-
+# Copyright (c) 2021 The FreeBSD Fondation
+#
+# Portions of this software were developed by Ka Ho Ng
+# under sponsorship from the FreeBSD Foundation.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+# $FreeBSD$
+#
+
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+
+INTERFACE ivhd;
+
+METHOD int setup_intr {
+	device_t dev;
+	driver_intr_t handler;
+	void *arg;
+	const char *desc;
+};
+
+METHOD int teardown_intr {
+	device_t dev;
+};
diff --git a/sys/amd64/vmm/amd/ivrs_drv.c b/sys/amd64/vmm/amd/ivrs_drv.c
index a02a3ff0b8e9..90b4a874c345 100644
--- a/sys/amd64/vmm/amd/ivrs_drv.c
+++ b/sys/amd64/vmm/amd/ivrs_drv.c
@@ -2,6 +2,7 @@
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2016, Anish Gupta (anish at freebsd.org)
+ * Copyright (c) 2021 The FreeBSD Foundation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -44,6 +45,8 @@ __FBSDID("$FreeBSD$");
 #include <contrib/dev/acpica/include/acpi.h>
 #include <contrib/dev/acpica/include/accommon.h>
 #include <dev/acpica/acpivar.h>
+#include <dev/pci/pcireg.h>
+#include <dev/pci/pcivar.h>
 
 #include "io/iommu.h"
 #include "amdvi_priv.h"
@@ -629,6 +632,9 @@ ivhd_attach(device_t dev)
 	softc->dev = dev;
 	ivhd = ivhd_hdrs[unit];
 	KASSERT(ivhd, ("ivhd is NULL"));
+	softc->pci_dev = pci_find_bsf(PCI_RID2BUS(ivhd->Header.DeviceId),
+	    PCI_RID2SLOT(ivhd->Header.DeviceId),
+	    PCI_RID2FUNC(ivhd->Header.DeviceId));
 
 	softc->ivhd_type = ivhd->Header.Type;
 	softc->pci_seg = ivhd->PciSegmentGroup;
diff --git a/sys/modules/vmm/Makefile b/sys/modules/vmm/Makefile
index e4346447c840..848ec421e4f7 100644
--- a/sys/modules/vmm/Makefile
+++ b/sys/modules/vmm/Makefile
@@ -48,6 +48,9 @@ SRCS+=	ept.c		\
 # amd-specific files
 .PATH: ${SRCTOP}/sys/amd64/vmm/amd
 SRCS+=	vmcb.c		\
+	amdiommu.c	\
+	ivhd_if.c	\
+	ivhd_if.h	\
 	svm.c		\
 	svm_support.S	\
 	npt.c		\


More information about the dev-commits-src-branches mailing list