svn commit: r349184 - head/sys/amd64/vmm/intel
Rodney W. Grimes
freebsd at gndrsh.dnsmgr.net
Wed Jun 19 14:37:57 UTC 2019
> Author: scottl
> Date: Wed Jun 19 06:41:07 2019
> New Revision: 349184
> URL: https://svnweb.freebsd.org/changeset/base/349184
>
> Log:
> Implement VT-d capability detection on chipsets that have multiple
> translation units with differing capabilities
>
> From the author via Bugzilla:
> ---
If you had read the full bug report you would also know:
https://reviews.freebsd.org/D19001
existed and that some code cleanup had occurred since
this bug was created.
The review was pending approval by bhyve maintainer(s).
> When an attempt is made to passthrough a PCI device to a bhyve VM
> (causing initialisation of IOMMU) on certain Intel chipsets using
> VT-d the PCI bus stops working entirely. This issue occurs on the
> E3-1275 v5 processor on C236 chipset and has also been encountered
> by others on the forums with different hardware in the Skylake
> series.
>
> The chipset has two VT-d translation units. The issue is caused by
> an attempt to use the VT-d device-IOTLB capability that is
> supported by only the first unit for devices attached to the
> second unit which lacks that capability. Only the capabilities of
> the first unit are checked and are assumed to be the same for all
> units.
>
> Attached is a patch to rectify this issue by determining which
> unit is responsible for the device being added to a domain and
> then checking that unit's device-IOTLB capability. In addition to
> this a few fixes have been made to other instances where the first
> unit's capabilities are assumed for all units for domains they
> share. In these cases a mutual set of capabilities is determined.
> The patch should hopefully fix any bugs for current/future
> hardware with multiple translation units supporting different
> capabilities.
>
> A description is on the forums at
> https://forums.freebsd.org/threads/pci-passthrough-bhyve-usb-xhci.65235
> The thread includes observations by other users of the bug
> occurring, and description as well as confirmation of the fix.
> I'd also like to thank Ordoban for their help.
>
> ---
> Personally tested on a Skylake laptop, Skylake Xeon server, and
> a Xeon-D-1541, passing through XHCI and NVMe functions. Passthru
> is hit-or-miss to the point of being unusable without this
> patch.
>
> PR: 229852
> Submitted by: callum at aitchison.org
> MFC after: 1 week
>
> Modified:
> head/sys/amd64/vmm/intel/vtd.c
>
> Modified: head/sys/amd64/vmm/intel/vtd.c
> ==============================================================================
> --- head/sys/amd64/vmm/intel/vtd.c Wed Jun 19 03:33:00 2019 (r349183)
> +++ head/sys/amd64/vmm/intel/vtd.c Wed Jun 19 06:41:07 2019 (r349184)
> @@ -51,6 +51,8 @@ __FBSDID("$FreeBSD$");
> * Architecture Spec, September 2008.
> */
>
> +#define VTD_DRHD_INCLUDE_PCI_ALL(Flags) (((Flags) >> 0) & 0x1)
> +
> /* Section 10.4 "Register Descriptions" */
> struct vtdmap {
> volatile uint32_t version;
> @@ -116,10 +118,11 @@ struct domain {
> static SLIST_HEAD(, domain) domhead;
>
> #define DRHD_MAX_UNITS 8
> -static int drhd_num;
> -static struct vtdmap *vtdmaps[DRHD_MAX_UNITS];
> -static int max_domains;
> -typedef int (*drhd_ident_func_t)(void);
> +static ACPI_DMAR_HARDWARE_UNIT *drhds[DRHD_MAX_UNITS];
> +static int drhd_num;
> +static struct vtdmap *vtdmaps[DRHD_MAX_UNITS];
> +static int max_domains;
> +typedef int (*drhd_ident_func_t)(void);
>
> static uint64_t root_table[PAGE_SIZE / sizeof(uint64_t)] __aligned(4096);
> static uint64_t ctx_tables[256][PAGE_SIZE / sizeof(uint64_t)] __aligned(4096);
> @@ -175,6 +178,69 @@ domain_id(void)
> return (id);
> }
>
> +static struct vtdmap *
> +vtd_device_scope(uint16_t rid)
> +{
> + int i, remaining, pathremaining;
> + char *end, *pathend;
> + struct vtdmap *vtdmap;
> + ACPI_DMAR_HARDWARE_UNIT *drhd;
> + ACPI_DMAR_DEVICE_SCOPE *device_scope;
> + ACPI_DMAR_PCI_PATH *path;
> +
> + for (i = 0; i < drhd_num; i++) {
> + drhd = drhds[i];
> +
> + if (VTD_DRHD_INCLUDE_PCI_ALL(drhd->Flags)) {
> + /*
> + * From Intel VT-d arch spec, version 3.0:
> + * If a DRHD structure with INCLUDE_PCI_ALL flag Set is reported
> + * for a Segment, it must be enumerated by BIOS after all other
> + * DRHD structures for the same Segment.
> + */
> + vtdmap = vtdmaps[i];
> + return(vtdmap);
> + }
> +
> + end = (char *)drhd + drhd->Header.Length;
> + remaining = drhd->Header.Length - sizeof(ACPI_DMAR_HARDWARE_UNIT);
> + while (remaining > sizeof(ACPI_DMAR_DEVICE_SCOPE)) {
> + device_scope = (ACPI_DMAR_DEVICE_SCOPE *)(end - remaining);
> + remaining -= device_scope->Length;
> +
> + switch (device_scope->EntryType){
> + /* 0x01 and 0x02 are PCI device entries */
> + case 0x01:
> + case 0x02:
> + break;
> + default:
> + continue;
> + }
> +
> + if (PCI_RID2BUS(rid) != device_scope->Bus)
> + continue;
> +
> + pathend = (char *)device_scope + device_scope->Length;
> + pathremaining = device_scope->Length - sizeof(ACPI_DMAR_DEVICE_SCOPE);
> + while (pathremaining >= sizeof(ACPI_DMAR_PCI_PATH)) {
> + path = (ACPI_DMAR_PCI_PATH *)(pathend - pathremaining);
> + pathremaining -= sizeof(ACPI_DMAR_PCI_PATH);
> +
> + if (PCI_RID2SLOT(rid) != path->Device)
> + continue;
> + if (PCI_RID2FUNC(rid) != path->Function)
> + continue;
> +
> + vtdmap = vtdmaps[i];
> + return (vtdmap);
> + }
> + }
> + }
> +
> + /* No matching scope */
> + return (NULL);
> +}
> +
> static void
> vtd_wbflush(struct vtdmap *vtdmap)
> {
> @@ -240,7 +306,7 @@ vtd_translation_disable(struct vtdmap *vtdmap)
> static int
> vtd_init(void)
> {
> - int i, units, remaining;
> + int i, units, remaining, tmp;
> struct vtdmap *vtdmap;
> vm_paddr_t ctx_paddr;
> char *end, envname[32];
> @@ -291,8 +357,9 @@ vtd_init(void)
> break;
>
> drhd = (ACPI_DMAR_HARDWARE_UNIT *)hdr;
> - vtdmaps[units++] = (struct vtdmap *)PHYS_TO_DMAP(drhd->Address);
> - if (units >= DRHD_MAX_UNITS)
> + drhds[units] = drhd;
> + vtdmaps[units] = (struct vtdmap *)PHYS_TO_DMAP(drhd->Address);
> + if (++units >= DRHD_MAX_UNITS)
> break;
> remaining -= hdr->Length;
> }
> @@ -302,13 +369,19 @@ vtd_init(void)
>
> skip_dmar:
> drhd_num = units;
> - vtdmap = vtdmaps[0];
>
> - if (VTD_CAP_CM(vtdmap->cap) != 0)
> - panic("vtd_init: invalid caching mode");
> + max_domains = 64 * 1024; /* maximum valid value */
> + for (i = 0; i < drhd_num; i++){
> + vtdmap = vtdmaps[i];
>
> - max_domains = vtd_max_domains(vtdmap);
> + if (VTD_CAP_CM(vtdmap->cap) != 0)
> + panic("vtd_init: invalid caching mode");
>
> + /* take most compatible (minimum) value */
> + if ((tmp = vtd_max_domains(vtdmap)) < max_domains)
> + max_domains = tmp;
> + }
> +
> /*
> * Set up the root-table to point to the context-entry tables
> */
> @@ -373,7 +446,6 @@ vtd_add_device(void *arg, uint16_t rid)
> struct vtdmap *vtdmap;
> uint8_t bus;
>
> - vtdmap = vtdmaps[0];
> bus = PCI_RID2BUS(rid);
> ctxp = ctx_tables[bus];
> pt_paddr = vtophys(dom->ptp);
> @@ -385,6 +457,10 @@ vtd_add_device(void *arg, uint16_t rid)
> (uint16_t)(ctxp[idx + 1] >> 8));
> }
>
> + if ((vtdmap = vtd_device_scope(rid)) == NULL)
> + panic("vtd_add_device: device %x is not in scope for "
> + "any DMA remapping unit", rid);
> +
> /*
> * Order is important. The 'present' bit is set only after all fields
> * of the context pointer are initialized.
> @@ -568,8 +644,6 @@ vtd_create_domain(vm_paddr_t maxaddr)
> if (drhd_num <= 0)
> panic("vtd_create_domain: no dma remapping hardware available");
>
> - vtdmap = vtdmaps[0];
> -
> /*
> * Calculate AGAW.
> * Section 3.4.2 "Adjusted Guest Address Width", Architecture Spec.
> @@ -594,7 +668,14 @@ vtd_create_domain(vm_paddr_t maxaddr)
> pt_levels = 2;
> sagaw = 30;
> addrwidth = 0;
> - tmp = VTD_CAP_SAGAW(vtdmap->cap);
> +
> + tmp = ~0;
> + for (i = 0; i < drhd_num; i++) {
> + vtdmap = vtdmaps[i];
> + /* take most compatible value */
> + tmp &= VTD_CAP_SAGAW(vtdmap->cap);
> + }
> +
> for (i = 0; i < 5; i++) {
> if ((tmp & (1 << i)) != 0 && sagaw >= agaw)
> break;
> @@ -606,8 +687,8 @@ vtd_create_domain(vm_paddr_t maxaddr)
> }
>
> if (i >= 5) {
> - panic("vtd_create_domain: SAGAW 0x%lx does not support AGAW %d",
> - VTD_CAP_SAGAW(vtdmap->cap), agaw);
> + panic("vtd_create_domain: SAGAW 0x%x does not support AGAW %d",
> + tmp, agaw);
> }
>
> dom = malloc(sizeof(struct domain), M_VTD, M_ZERO | M_WAITOK);
> @@ -634,7 +715,12 @@ vtd_create_domain(vm_paddr_t maxaddr)
> * There is not any code to deal with the demotion at the moment
> * so we disable superpage mappings altogether.
> */
> - dom->spsmask = VTD_CAP_SPS(vtdmap->cap);
> + dom->spsmask = ~0;
> + for (i = 0; i < drhd_num; i++) {
> + vtdmap = vtdmaps[i];
> + /* take most compatible value */
> + dom->spsmask &= VTD_CAP_SPS(vtdmap->cap);
> + }
> #endif
>
> SLIST_INSERT_HEAD(&domhead, dom, next);
>
>
--
Rod Grimes rgrimes at freebsd.org
More information about the svn-src-all
mailing list