git: db877a06ec41 - stable/12 - vmm: Fix AMD-vi using wrong rid range

Ka Ho Ng khng at FreeBSD.org
Fri Sep 3 19:06:05 UTC 2021


The branch stable/12 has been updated by khng:

URL: https://cgit.FreeBSD.org/src/commit/?id=db877a06ec414ce40ce5113c1913110e5e604e34

commit db877a06ec414ce40ce5113c1913110e5e604e34
Author:     Ka Ho Ng <khng at FreeBSD.org>
AuthorDate: 2021-07-13 17:53:10 +0000
Commit:     Ka Ho Ng <khng at FreeBSD.org>
CommitDate: 2021-09-03 18:46:41 +0000

    vmm: Fix AMD-vi using wrong rid range
    
    The ACPI parsing code around rid range was wrong on assuming there is
    only one pair of start/end device id range. Besides, ivhd_dev_parse()
    never work as supposed. The start/end rid info was always zero.
    
    Restructure the code to build dynamic-sized tables for each IOMMU softc
    holding device entries. The device entries are enumerated to find a
    suitable IOMMU unit. Operations on devices not governed (e.g. the IOMMU
    unit itself) are no-op from now on. There are also a minor fix on wrong
    %b formatting string usage.
    
    Tested on my EPYC 7282.
    
    Sponsored by:   The FreeBSD Foundation
    Reviewed by:    grehan
    Differential Revision:  https://reviews.freebsd.org/D30827
    
    (cherry picked from commit b5c74dfd6434b7f4dcc59dbd61b508acc5ec3ecf)
---
 sys/amd64/vmm/amd/amdvi_hw.c   | 52 ++++++++++++++------------------
 sys/amd64/vmm/amd/amdvi_priv.h | 13 ++++----
 sys/amd64/vmm/amd/ivrs_drv.c   | 68 +++++++++++++++++++++++++-----------------
 3 files changed, 70 insertions(+), 63 deletions(-)

diff --git a/sys/amd64/vmm/amd/amdvi_hw.c b/sys/amd64/vmm/amd/amdvi_hw.c
index 5102b714b0a0..054dfa650a3c 100644
--- a/sys/amd64/vmm/amd/amdvi_hw.c
+++ b/sys/amd64/vmm/amd/amdvi_hw.c
@@ -811,11 +811,11 @@ amdvi_print_dev_cap(struct amdvi_softc *softc)
 
 	cfg = softc->dev_cfg;
 	for (i = 0; i < softc->dev_cfg_cnt; i++) {
-		device_printf(softc->dev, "device [0x%x - 0x%x]"
+		device_printf(softc->dev, "device [0x%x - 0x%x] "
 		    "config:%b%s\n", cfg->start_id, cfg->end_id,
 		    cfg->data,
 		    "\020\001INIT\002ExtInt\003NMI"
-		    "\007LINT0\008LINT1",
+		    "\007LINT0\010LINT1",
 		    cfg->enable_ats ? "ATS enabled" : "");
 		cfg++;
 	}
@@ -876,10 +876,6 @@ amdvi_add_sysctl(struct amdvi_softc *softc)
 	    &softc->total_cmd, "Command submitted count");
 	SYSCTL_ADD_U16(ctx, child, OID_AUTO, "pci_rid", CTLFLAG_RD,
 	    &softc->pci_rid, 0, "IOMMU RID");
-	SYSCTL_ADD_U16(ctx, child, OID_AUTO, "start_dev_rid", CTLFLAG_RD,
-	    &softc->start_dev_rid, 0, "Start of device under this IOMMU");
-	SYSCTL_ADD_U16(ctx, child, OID_AUTO, "end_dev_rid", CTLFLAG_RD,
-	    &softc->end_dev_rid, 0, "End of device under this IOMMU");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "command_head",
 	    CTLTYPE_UINT | CTLFLAG_RD, softc, 0,
 	    amdvi_handle_sysctl, "IU", "Command head");
@@ -1207,22 +1203,17 @@ static struct amdvi_softc *
 amdvi_find_iommu(uint16_t devid)
 {
 	struct amdvi_softc *softc;
-	int i;
+	int i, j;
 
 	for (i = 0; i < ivhd_count; i++) {
 		softc = device_get_softc(ivhd_devs[i]);
-		if ((devid >= softc->start_dev_rid) &&
-		    (devid <= softc->end_dev_rid))
-			return (softc);
+		for (j = 0; j < softc->dev_cfg_cnt; j++)
+			if ((devid >= softc->dev_cfg[j].start_id) &&
+			    (devid <= softc->dev_cfg[j].end_id))
+				return (softc);
 	}
 
-	/*
-	 * XXX: BIOS bug, device not in IVRS table, assume its from first IOMMU.
-	 */
-	printf("BIOS bug device(%d.%d.%d) doesn't have IVHD entry.\n",
-	    RID2PCI_STR(devid));
-
-	return (device_get_softc(ivhd_devs[0]));
+	return (NULL);
 }
 
 /*
@@ -1231,14 +1222,12 @@ amdvi_find_iommu(uint16_t devid)
  * be set concurrently, e.g. read and write bits.
  */
 static void
-amdvi_set_dte(struct amdvi_domain *domain, uint16_t devid, bool enable)
+amdvi_set_dte(struct amdvi_domain *domain, struct amdvi_softc *softc,
+    uint16_t devid, bool enable)
 {
-	struct amdvi_softc *softc;
 	struct amdvi_dte* temp;
 
 	KASSERT(domain, ("domain is NULL for pci_rid:0x%x\n", devid));
-	
-	softc = amdvi_find_iommu(devid);
 	KASSERT(softc, ("softc is NULL for pci_rid:0x%x\n", devid));
 
 	temp = &amdvi_dte[devid];
@@ -1272,11 +1261,8 @@ amdvi_set_dte(struct amdvi_domain *domain, uint16_t devid, bool enable)
 }
 
 static void
-amdvi_inv_device(uint16_t devid)
+amdvi_inv_device(struct amdvi_softc *softc, uint16_t devid)
 {
-	struct amdvi_softc *softc;
-
-	softc = amdvi_find_iommu(devid);
 	KASSERT(softc, ("softc is NULL"));
 
 	amdvi_cmd_inv_dte(softc, devid);
@@ -1291,6 +1277,7 @@ static void
 amdvi_add_device(void *arg, uint16_t devid)
 {
 	struct amdvi_domain *domain;
+	struct amdvi_softc *softc;
 
 	domain = (struct amdvi_domain *)arg;
 	KASSERT(domain != NULL, ("domain is NULL"));
@@ -1298,22 +1285,29 @@ amdvi_add_device(void *arg, uint16_t devid)
 	printf("Assigning device(%d.%d.%d) to domain:%d\n",
 	    RID2PCI_STR(devid), domain->id);
 #endif
-	amdvi_set_dte(domain, devid, true);
-	amdvi_inv_device(devid);
+	softc = amdvi_find_iommu(devid);
+	if (softc == NULL)
+		return;
+	amdvi_set_dte(domain, softc, devid, true);
+	amdvi_inv_device(softc, devid);
 }
 
 static void
 amdvi_remove_device(void *arg, uint16_t devid)
 {
 	struct amdvi_domain *domain;
+	struct amdvi_softc *softc;
 
 	domain = (struct amdvi_domain *)arg;
 #ifdef AMDVI_DEBUG_CMD
 	printf("Remove device(0x%x) from domain:%d\n",
 	       devid, domain->id);
 #endif
-	amdvi_set_dte(domain, devid, false);
-	amdvi_inv_device(devid);
+	softc = amdvi_find_iommu(devid);
+	if (softc == NULL)
+		return;
+	amdvi_set_dte(domain, softc, devid, false);
+	amdvi_inv_device(softc, devid);
 }
 
 static void
diff --git a/sys/amd64/vmm/amd/amdvi_priv.h b/sys/amd64/vmm/amd/amdvi_priv.h
index 0eae7ca6ca4c..6960ef24d683 100644
--- a/sys/amd64/vmm/amd/amdvi_priv.h
+++ b/sys/amd64/vmm/amd/amdvi_priv.h
@@ -2,7 +2,10 @@
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2016 Anish Gupta (anish at freebsd.org)
- * All rights reserved.
+ * Copyright (c) 2021 The FreeBSD Foundation
+ *
+ * Portions of this software were developed by Ka Ho Ng
+ * under sponsorship from the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -392,13 +395,11 @@ struct amdvi_softc {
 	uint8_t		pci_cap;	/* PCI capability. */
 	uint16_t 	pci_seg;	/* IOMMU PCI domain/segment. */
 	uint16_t 	pci_rid;	/* PCI BDF of IOMMU */
-	/* Device range under this IOMMU. */
-	uint16_t 	start_dev_rid;	/* First device under this IOMMU. */
-	uint16_t 	end_dev_rid;	/* Last device under this IOMMU. */
 
-	/* BIOS provided device configuration for end points. */
-	struct 		ivhd_dev_cfg dev_cfg[10];
+	/* ACPI device configuration for end points. */
+	struct 		ivhd_dev_cfg *dev_cfg;
 	int		dev_cfg_cnt;
+	int		dev_cfg_cap;
 
 	/* Software statistics. */
 	uint64_t 	event_intr_cnt;	/* Total event INTR count. */
diff --git a/sys/amd64/vmm/amd/ivrs_drv.c b/sys/amd64/vmm/amd/ivrs_drv.c
index 10d14ec2f210..1455e314b692 100644
--- a/sys/amd64/vmm/amd/ivrs_drv.c
+++ b/sys/amd64/vmm/amd/ivrs_drv.c
@@ -185,9 +185,17 @@ ivhd_dev_add_entry(struct amdvi_softc *softc, uint32_t start_id,
 {
 	struct ivhd_dev_cfg *dev_cfg;
 
-	/* If device doesn't have special data, don't add it. */
-	if (!cfg)
-		return;
+	KASSERT(softc->dev_cfg_cap <= softc->dev_cfg_cnt,
+	    ("Impossible case: number of dev_cfg exceeding capacity"));
+	if (softc->dev_cfg_cap == softc->dev_cfg_cnt) {
+		if (softc->dev_cfg_cap == 0)
+			softc->dev_cfg_cap = 1;
+		else
+			softc->dev_cfg_cap <<= 2;
+		softc->dev_cfg = realloc(softc->dev_cfg,
+		    sizeof(*softc->dev_cfg) * softc->dev_cfg_cap, M_DEVBUF,
+		    M_WAITOK);
+	}
 
 	dev_cfg = &softc->dev_cfg[softc->dev_cfg_cnt++];
 	dev_cfg->start_id = start_id;
@@ -204,14 +212,11 @@ ivhd_dev_parse(ACPI_IVRS_HARDWARE1 *ivhd, struct amdvi_softc *softc)
 {
 	ACPI_IVRS_DE_HEADER *de;
 	uint8_t *p, *end;
-	int range_start_id = 0, range_end_id = 0;
+	int range_start_id = -1, range_end_id = -1, i;
 	uint32_t *extended;
 	uint8_t all_data = 0, range_data = 0;
 	bool range_enable_ats = false, enable_ats;
 
-	softc->start_dev_rid = ~0;
-	softc->end_dev_rid = 0;
-
 	switch (ivhd->Header.Type) {
 		case IVRS_TYPE_HARDWARE_LEGACY:
 			p = (uint8_t *)ivhd + sizeof(ACPI_IVRS_HARDWARE1);
@@ -232,11 +237,11 @@ ivhd_dev_parse(ACPI_IVRS_HARDWARE1 *ivhd, struct amdvi_softc *softc)
 
 	while (p < end) {
 		de = (ACPI_IVRS_DE_HEADER *)p;
-		softc->start_dev_rid = MIN(softc->start_dev_rid, de->Id);
-		softc->end_dev_rid = MAX(softc->end_dev_rid, de->Id);
 		switch (de->Type) {
 		case ACPI_IVRS_TYPE_ALL:
 			all_data = de->DataSetting;
+			for (i = 0; i < softc->dev_cfg_cnt; i++)
+				softc->dev_cfg[i].data |= all_data;
 			break;
 
 		case ACPI_IVRS_TYPE_SELECT:
@@ -256,6 +261,11 @@ ivhd_dev_parse(ACPI_IVRS_HARDWARE1 *ivhd, struct amdvi_softc *softc)
 		case ACPI_IVRS_TYPE_START:
 		case ACPI_IVRS_TYPE_ALIAS_START:
 		case ACPI_IVRS_TYPE_EXT_START:
+			if (range_start_id != -1) {
+				device_printf(softc->dev,
+				    "Unexpected start-of-range device entry\n");
+				return (EINVAL);
+			}
 			range_start_id = de->Id;
 			range_data = de->DataSetting;
 			if (de->Type == ACPI_IVRS_TYPE_EXT_START) {
@@ -267,10 +277,20 @@ ivhd_dev_parse(ACPI_IVRS_HARDWARE1 *ivhd, struct amdvi_softc *softc)
 			break;
 
 		case ACPI_IVRS_TYPE_END:
+			if (range_start_id == -1) {
+				device_printf(softc->dev,
+				    "Unexpected end-of-range device entry\n");
+				return (EINVAL);
+			}
 			range_end_id = de->Id;
+			if (range_end_id < range_start_id) {
+				device_printf(softc->dev,
+				    "Device entry range going backward\n");
+				return (EINVAL);
+			}
 			ivhd_dev_add_entry(softc, range_start_id, range_end_id,
-				range_data | all_data, range_enable_ats);
-			range_start_id = range_end_id = 0;
+			    range_data | all_data, range_enable_ats);
+			range_start_id = range_end_id = -1;
 			range_data = 0;
 			all_data = 0;
 			break;
@@ -288,12 +308,6 @@ ivhd_dev_parse(ACPI_IVRS_HARDWARE1 *ivhd, struct amdvi_softc *softc)
 				    "Unknown dev entry:0x%x\n", de->Type);
 		}
 
-		if (softc->dev_cfg_cnt >
-			(sizeof(softc->dev_cfg) / sizeof(softc->dev_cfg[0]))) {
-			device_printf(softc->dev,
-			    "WARN Too many device entries.\n");
-			return (EINVAL);
-		}
 		if (de->Type < 0x40)
 			p += sizeof(ACPI_IVRS_DEVICE4);
 		else if (de->Type < 0x80)
@@ -305,10 +319,6 @@ ivhd_dev_parse(ACPI_IVRS_HARDWARE1 *ivhd, struct amdvi_softc *softc)
 		}
 	}
 
-	KASSERT((softc->end_dev_rid >= softc->start_dev_rid),
-	    ("Device end[0x%x] < start[0x%x.\n",
-	    softc->end_dev_rid, softc->start_dev_rid));
-
 	return (0);
 }
 
@@ -620,9 +630,6 @@ ivhd_print_cap(struct amdvi_softc *softc, ACPI_IVRS_HARDWARE1 * ivhd)
 	    		max_ptp_level, amdvi_ptp_level);
 	}
 
-	device_printf(softc->dev, "device range: 0x%x - 0x%x\n",
-			softc->start_dev_rid, softc->end_dev_rid);
-
 	return (0);
 }
 
@@ -680,21 +687,25 @@ ivhd_attach(device_t dev)
 	if (status != 0) {
 		device_printf(dev,
 		    "endpoint device parsing error=%d\n", status);
+		goto fail;
 	}
 
 	status = ivhd_print_cap(softc, ivhd);
-	if (status != 0) {
-		return (status);
-	}
+	if (status != 0)
+		goto fail;
 
 	status = amdvi_setup_hw(softc);
 	if (status != 0) {
 		device_printf(dev, "couldn't be initialised, error=%d\n", 
 		    status);
-		return (status);
+		goto fail;
 	}
 
 	return (0);
+
+fail:
+	free(softc->dev_cfg, M_DEVBUF);
+	return (status);
 }
 
 static int
@@ -705,6 +716,7 @@ ivhd_detach(device_t dev)
 	softc = device_get_softc(dev);
 
 	amdvi_teardown_hw(softc);
+	free(softc->dev_cfg, M_DEVBUF);
 
 	/*
 	 * XXX: delete the device.


More information about the dev-commits-src-all mailing list