git: fa6787221742 - stable/14 - AMD IOMMU driver
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Sat, 09 Nov 2024 20:19:15 UTC
The branch stable/14 has been updated by kib:
URL: https://cgit.FreeBSD.org/src/commit/?id=fa6787221742a00d410920a4a917bf2f9b1ed192
commit fa6787221742a00d410920a4a917bf2f9b1ed192
Author: Konstantin Belousov <kib@FreeBSD.org>
AuthorDate: 2024-05-12 10:20:11 +0000
Commit: Konstantin Belousov <kib@FreeBSD.org>
CommitDate: 2024-11-09 19:04:18 +0000
AMD IOMMU driver
(cherry picked from commit 0f5116d7efe33c81f0b24b56eec78af37898f500)
---
sys/conf/files.x86 | 6 +
sys/x86/iommu/amd_cmd.c | 360 +++++++++++++
sys/x86/iommu/amd_ctx.c | 639 +++++++++++++++++++++++
sys/x86/iommu/amd_drv.c | 1205 +++++++++++++++++++++++++++++++++++++++++++
sys/x86/iommu/amd_event.c | 323 ++++++++++++
sys/x86/iommu/amd_idpgtbl.c | 396 ++++++++++++++
sys/x86/iommu/amd_intrmap.c | 391 ++++++++++++++
sys/x86/iommu/amd_iommu.h | 243 +++++++++
8 files changed, 3563 insertions(+)
diff --git a/sys/conf/files.x86 b/sys/conf/files.x86
index c6d705e9715d..33da95a65ba4 100644
--- a/sys/conf/files.x86
+++ b/sys/conf/files.x86
@@ -344,6 +344,12 @@ x86/cpufreq/hwpstate_amd.c optional cpufreq
x86/cpufreq/hwpstate_intel.c optional cpufreq
x86/cpufreq/p4tcc.c optional cpufreq
x86/cpufreq/powernow.c optional cpufreq
+x86/iommu/amd_cmd.c optional acpi iommu pci
+x86/iommu/amd_ctx.c optional acpi iommu pci
+x86/iommu/amd_drv.c optional acpi iommu pci
+x86/iommu/amd_event.c optional acpi iommu pci
+x86/iommu/amd_idpgtbl.c optional acpi iommu pci
+x86/iommu/amd_intrmap.c optional acpi iommu pci
x86/iommu/intel_ctx.c optional acpi iommu pci
x86/iommu/intel_drv.c optional acpi iommu pci
x86/iommu/intel_fault.c optional acpi iommu pci
diff --git a/sys/x86/iommu/amd_cmd.c b/sys/x86/iommu/amd_cmd.c
new file mode 100644
index 000000000000..bbc2a8e0ad9f
--- /dev/null
+++ b/sys/x86/iommu/amd_cmd.c
@@ -0,0 +1,360 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 The FreeBSD Foundation
+ *
+ * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "opt_acpi.h"
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/memdesc.h>
+#include <sys/module.h>
+#include <sys/rman.h>
+#include <sys/taskqueue.h>
+#include <sys/time.h>
+#include <sys/tree.h>
+#include <sys/vmem.h>
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_page.h>
+#include <vm/vm_map.h>
+#include <contrib/dev/acpica/include/acpi.h>
+#include <contrib/dev/acpica/include/accommon.h>
+#include <dev/acpica/acpivar.h>
+#include <dev/pci/pcireg.h>
+#include <machine/bus.h>
+#include <machine/cpu.h>
+#include <x86/include/busdma_impl.h>
+#include <dev/iommu/busdma_iommu.h>
+#include <x86/iommu/amd_reg.h>
+#include <x86/iommu/x86_iommu.h>
+#include <x86/iommu/amd_iommu.h>
+
+static void
+amdiommu_enable_cmdbuf(struct amdiommu_unit *unit)
+{
+ AMDIOMMU_ASSERT_LOCKED(unit);
+
+ unit->hw_ctrl |= AMDIOMMU_CTRL_CMDBUF_EN;
+ amdiommu_write8(unit, AMDIOMMU_CTRL, unit->hw_ctrl);
+}
+
+static void
+amdiommu_disable_cmdbuf(struct amdiommu_unit *unit)
+{
+ AMDIOMMU_ASSERT_LOCKED(unit);
+
+ unit->hw_ctrl &= ~AMDIOMMU_CTRL_CMDBUF_EN;
+ amdiommu_write8(unit, AMDIOMMU_CTRL, unit->hw_ctrl);
+}
+
+
+static void
+amdiommu_enable_qi_intr(struct iommu_unit *iommu)
+{
+ struct amdiommu_unit *unit;
+
+ unit = IOMMU2AMD(iommu);
+ AMDIOMMU_ASSERT_LOCKED(unit);
+ unit->hw_ctrl |= AMDIOMMU_CTRL_COMWINT_EN;
+ amdiommu_write8(unit, AMDIOMMU_CTRL, unit->hw_ctrl);
+ amdiommu_write8(unit, AMDIOMMU_CMDEV_STATUS,
+ AMDIOMMU_CMDEVS_COMWAITINT);
+}
+
+static void
+amdiommu_disable_qi_intr(struct iommu_unit *iommu)
+{
+ struct amdiommu_unit *unit;
+
+ unit = IOMMU2AMD(iommu);
+ AMDIOMMU_ASSERT_LOCKED(unit);
+ unit->hw_ctrl &= ~AMDIOMMU_CTRL_COMWINT_EN;
+ amdiommu_write8(unit, AMDIOMMU_CTRL, unit->hw_ctrl);
+}
+
+static void
+amdiommu_cmd_advance_tail(struct iommu_unit *iommu)
+{
+ struct amdiommu_unit *unit;
+
+ unit = IOMMU2AMD(iommu);
+ AMDIOMMU_ASSERT_LOCKED(unit);
+ amdiommu_write8(unit, AMDIOMMU_CMDBUF_TAIL, unit->x86c.inv_queue_tail);
+}
+
+static void
+amdiommu_cmd_ensure(struct iommu_unit *iommu, int descr_count)
+{
+ struct amdiommu_unit *unit;
+ uint64_t head;
+ int bytes;
+
+ unit = IOMMU2AMD(iommu);
+ AMDIOMMU_ASSERT_LOCKED(unit);
+ bytes = descr_count << AMDIOMMU_CMD_SZ_SHIFT;
+ for (;;) {
+ if (bytes <= unit->x86c.inv_queue_avail)
+ break;
+ /* refill */
+ head = amdiommu_read8(unit, AMDIOMMU_CMDBUF_HEAD);
+ head &= AMDIOMMU_CMDPTR_MASK;
+ unit->x86c.inv_queue_avail = head - unit->x86c.inv_queue_tail -
+ AMDIOMMU_CMD_SZ;
+ if (head <= unit->x86c.inv_queue_tail)
+ unit->x86c.inv_queue_avail += unit->x86c.inv_queue_size;
+ if (bytes <= unit->x86c.inv_queue_avail)
+ break;
+
+ /*
+ * No space in the queue, do busy wait. Hardware must
+ * make a progress. But first advance the tail to
+ * inform the descriptor streamer about entries we
+ * might have already filled, otherwise they could
+ * clog the whole queue..
+ *
+ * See dmar_qi_invalidate_locked() for a discussion
+ * about data race prevention.
+ */
+ amdiommu_cmd_advance_tail(iommu);
+ unit->x86c.inv_queue_full++;
+ cpu_spinwait();
+ }
+ unit->x86c.inv_queue_avail -= bytes;
+}
+
+static void
+amdiommu_cmd_emit(struct amdiommu_unit *unit, const struct
+ amdiommu_cmd_generic *cmd)
+{
+ AMDIOMMU_ASSERT_LOCKED(unit);
+
+ memcpy(unit->x86c.inv_queue + unit->x86c.inv_queue_tail, cmd,
+ sizeof(*cmd));
+ unit->x86c.inv_queue_tail += AMDIOMMU_CMD_SZ;
+ KASSERT(unit->x86c.inv_queue_tail <= unit->x86c.inv_queue_size,
+ ("tail overflow 0x%x 0x%jx", unit->x86c.inv_queue_tail,
+ (uintmax_t)unit->x86c.inv_queue_size));
+ unit->x86c.inv_queue_tail &= unit->x86c.inv_queue_size - 1;
+}
+
+static void
+amdiommu_cmd_emit_wait_descr(struct iommu_unit *iommu, uint32_t seq,
+ bool intr, bool memw, bool fence)
+{
+ struct amdiommu_unit *unit;
+ struct amdiommu_cmd_completion_wait c;
+
+ unit = IOMMU2AMD(iommu);
+ AMDIOMMU_ASSERT_LOCKED(unit);
+
+ bzero(&c, sizeof(c));
+ c.op = AMDIOMMU_CMD_COMPLETION_WAIT;
+ if (memw) {
+ uint32_t x;
+
+ c.s = 1;
+ x = unit->x86c.inv_waitd_seq_hw_phys;
+ x >>= 3;
+ c.address0 = x;
+ x = unit->x86c.inv_waitd_seq_hw_phys >> 32;
+ c.address1 = x;
+ c.data0 = seq;
+ }
+ if (fence)
+ c.f = 1;
+ if (intr)
+ c.i = 1;
+ amdiommu_cmd_emit(unit, (struct amdiommu_cmd_generic *)&c);
+}
+
+static void
+amdiommu_qi_invalidate_emit(struct iommu_domain *adomain, iommu_gaddr_t base,
+ iommu_gaddr_t size, struct iommu_qi_genseq *pseq, bool emit_wait)
+{
+ struct amdiommu_domain *domain;
+ struct amdiommu_unit *unit;
+ struct amdiommu_cmd_invalidate_iommu_pages c;
+ u_int isize;
+
+ domain = IODOM2DOM(adomain);
+ unit = domain->unit;
+ AMDIOMMU_ASSERT_LOCKED(unit);
+ bzero(&c, sizeof(c));
+ c.op = AMDIOMMU_CMD_INVALIDATE_IOMMU_PAGES;
+ c.domainid = domain->domain;
+ isize = IOMMU_PAGE_SIZE; /* XXXKIB handle superpages */
+
+ for (; size > 0; base += isize, size -= isize) {
+ amdiommu_cmd_ensure(AMD2IOMMU(unit), 1);
+ c.s = 0;
+ c.pde = 1;
+ c.address = base >> IOMMU_PAGE_SHIFT;
+ amdiommu_cmd_emit(unit, (struct amdiommu_cmd_generic *)&c);
+ }
+ iommu_qi_emit_wait_seq(AMD2IOMMU(unit), pseq, emit_wait);
+}
+
+void
+amdiommu_qi_invalidate_all_pages_locked_nowait(struct amdiommu_domain *domain)
+{
+ struct amdiommu_unit *unit;
+ struct amdiommu_cmd_invalidate_iommu_pages c;
+
+ unit = domain->unit;
+ AMDIOMMU_ASSERT_LOCKED(unit);
+ bzero(&c, sizeof(c));
+ c.op = AMDIOMMU_CMD_INVALIDATE_IOMMU_PAGES;
+ c.domainid = domain->domain;
+
+ /*
+ * The magic specified in the note for INVALIDATE_IOMMU_PAGES
+ * description.
+ */
+ c.s = 1;
+ c.pde = 1;
+ c.address = 0x7ffffffffffff;
+
+ amdiommu_cmd_ensure(AMD2IOMMU(unit), 1);
+ amdiommu_cmd_emit(unit, (struct amdiommu_cmd_generic *)&c);
+}
+
+void
+amdiommu_qi_invalidate_wait_sync(struct iommu_unit *iommu)
+{
+ struct iommu_qi_genseq gseq;
+
+ amdiommu_cmd_ensure(iommu, 1);
+ iommu_qi_emit_wait_seq(iommu, &gseq, true);
+ IOMMU2AMD(iommu)->x86c.inv_seq_waiters++;
+ amdiommu_cmd_advance_tail(iommu);
+ iommu_qi_wait_for_seq(iommu, &gseq, true);
+}
+
+void
+amdiommu_qi_invalidate_ctx_locked_nowait(struct amdiommu_ctx *ctx)
+{
+ struct amdiommu_cmd_invalidate_devtab_entry c;
+
+ amdiommu_cmd_ensure(AMD2IOMMU(CTX2AMD(ctx)), 1);
+ bzero(&c, sizeof(c));
+ c.op = AMDIOMMU_CMD_INVALIDATE_DEVTAB_ENTRY;
+ c.devid = ctx->context.rid;
+ amdiommu_cmd_emit(CTX2AMD(ctx), (struct amdiommu_cmd_generic *)&c);
+}
+
+
+void
+amdiommu_qi_invalidate_ctx_locked(struct amdiommu_ctx *ctx)
+{
+ amdiommu_qi_invalidate_ctx_locked_nowait(ctx);
+ amdiommu_qi_invalidate_wait_sync(AMD2IOMMU(CTX2AMD(ctx)));
+}
+
+void
+amdiommu_qi_invalidate_ir_locked_nowait(struct amdiommu_unit *unit,
+ uint16_t devid)
+{
+ struct amdiommu_cmd_invalidate_interrupt_table c;
+
+ AMDIOMMU_ASSERT_LOCKED(unit);
+
+ amdiommu_cmd_ensure(AMD2IOMMU(unit), 1);
+ bzero(&c, sizeof(c));
+ c.op = AMDIOMMU_CMD_INVALIDATE_INTERRUPT_TABLE;
+ c.devid = devid;
+ amdiommu_cmd_emit(unit, (struct amdiommu_cmd_generic *)&c);
+}
+
+void
+amdiommu_qi_invalidate_ir_locked(struct amdiommu_unit *unit, uint16_t devid)
+{
+ amdiommu_qi_invalidate_ir_locked_nowait(unit, devid);
+ amdiommu_qi_invalidate_wait_sync(AMD2IOMMU(unit));
+}
+
+static void
+amdiommu_qi_task(void *arg, int pending __unused)
+{
+ struct amdiommu_unit *unit;
+
+ unit = IOMMU2AMD(arg);
+ iommu_qi_drain_tlb_flush(AMD2IOMMU(unit));
+
+ AMDIOMMU_LOCK(unit);
+ if (unit->x86c.inv_seq_waiters > 0)
+ wakeup(&unit->x86c.inv_seq_waiters);
+ AMDIOMMU_UNLOCK(unit);
+}
+
+int
+amdiommu_init_cmd(struct amdiommu_unit *unit)
+{
+ uint64_t qi_sz, rv;
+
+ unit->x86c.qi_buf_maxsz = ilog2(AMDIOMMU_CMDBUF_MAX / PAGE_SIZE);
+ unit->x86c.qi_cmd_sz = AMDIOMMU_CMD_SZ;
+ iommu_qi_common_init(AMD2IOMMU(unit), amdiommu_qi_task);
+ get_x86_iommu()->qi_ensure = amdiommu_cmd_ensure;
+ get_x86_iommu()->qi_emit_wait_descr = amdiommu_cmd_emit_wait_descr;
+ get_x86_iommu()->qi_advance_tail = amdiommu_cmd_advance_tail;
+ get_x86_iommu()->qi_invalidate_emit = amdiommu_qi_invalidate_emit;
+
+ rv = pmap_kextract((uintptr_t)unit->x86c.inv_queue);
+
+ /*
+ * See the description of the ComLen encoding for Command
+ * buffer Base Address Register.
+ */
+ qi_sz = ilog2(unit->x86c.inv_queue_size / PAGE_SIZE) + 8;
+ rv |= qi_sz << AMDIOMMU_CMDBUF_BASE_SZSHIFT;
+
+ AMDIOMMU_LOCK(unit);
+ amdiommu_write8(unit, AMDIOMMU_CMDBUF_BASE, rv);
+ amdiommu_enable_cmdbuf(unit);
+ amdiommu_enable_qi_intr(AMD2IOMMU(unit));
+ AMDIOMMU_UNLOCK(unit);
+
+ return (0);
+}
+
+static void
+amdiommu_fini_cmd_helper(struct iommu_unit *iommu)
+{
+ amdiommu_disable_cmdbuf(IOMMU2AMD(iommu));
+ amdiommu_disable_qi_intr(iommu);
+}
+
+void
+amdiommu_fini_cmd(struct amdiommu_unit *unit)
+{
+ iommu_qi_common_fini(AMD2IOMMU(unit), amdiommu_fini_cmd_helper);
+}
diff --git a/sys/x86/iommu/amd_ctx.c b/sys/x86/iommu/amd_ctx.c
new file mode 100644
index 000000000000..b3e85350a995
--- /dev/null
+++ b/sys/x86/iommu/amd_ctx.c
@@ -0,0 +1,639 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 The FreeBSD Foundation
+ *
+ * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/bus.h>
+#include <sys/interrupt.h>
+#include <sys/kernel.h>
+#include <sys/ktr.h>
+#include <sys/limits.h>
+#include <sys/lock.h>
+#include <sys/memdesc.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/rwlock.h>
+#include <sys/rman.h>
+#include <sys/sysctl.h>
+#include <sys/taskqueue.h>
+#include <sys/tree.h>
+#include <sys/uio.h>
+#include <sys/vmem.h>
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pager.h>
+#include <vm/vm_map.h>
+#include <contrib/dev/acpica/include/acpi.h>
+#include <contrib/dev/acpica/include/accommon.h>
+#include <dev/pci/pcireg.h>
+#include <dev/pci/pcivar.h>
+#include <machine/atomic.h>
+#include <machine/bus.h>
+#include <machine/md_var.h>
+#include <machine/specialreg.h>
+#include <x86/include/busdma_impl.h>
+#include <dev/iommu/busdma_iommu.h>
+#include <x86/iommu/amd_reg.h>
+#include <x86/iommu/x86_iommu.h>
+#include <x86/iommu/amd_iommu.h>
+
+static MALLOC_DEFINE(M_AMDIOMMU_CTX, "amdiommu_ctx", "AMD IOMMU Context");
+static MALLOC_DEFINE(M_AMDIOMMU_DOMAIN, "amdiommu_dom", "AMD IOMMU Domain");
+
+static void amdiommu_unref_domain_locked(struct amdiommu_unit *unit,
+ struct amdiommu_domain *domain);
+
+static struct amdiommu_dte *
+amdiommu_get_dtep(struct amdiommu_ctx *ctx)
+{
+ return (&CTX2AMD(ctx)->dev_tbl[ctx->context.rid]);
+}
+
+void
+amdiommu_domain_unload_entry(struct iommu_map_entry *entry, bool free,
+ bool cansleep)
+{
+ struct amdiommu_domain *domain;
+ struct amdiommu_unit *unit;
+
+ domain = IODOM2DOM(entry->domain);
+ unit = DOM2AMD(domain);
+
+ /*
+ * If "free" is false, then the IOTLB invalidation must be performed
+ * synchronously. Otherwise, the caller might free the entry before
+ * dmar_qi_task() is finished processing it.
+ */
+ if (free) {
+ AMDIOMMU_LOCK(unit);
+ iommu_qi_invalidate_locked(&domain->iodom, entry, true);
+ AMDIOMMU_UNLOCK(unit);
+ } else {
+ iommu_qi_invalidate_sync(&domain->iodom, entry->start,
+ entry->end - entry->start, cansleep);
+ iommu_domain_free_entry(entry, false);
+ }
+}
+
+static bool
+amdiommu_domain_unload_emit_wait(struct amdiommu_domain *domain,
+ struct iommu_map_entry *entry)
+{
+ return (true); /* XXXKIB */
+}
+
+void
+amdiommu_domain_unload(struct iommu_domain *iodom,
+ struct iommu_map_entries_tailq *entries, bool cansleep)
+{
+ struct amdiommu_domain *domain;
+ struct amdiommu_unit *unit;
+ struct iommu_map_entry *entry, *entry1;
+ int error __diagused;
+
+ domain = IODOM2DOM(iodom);
+ unit = DOM2AMD(domain);
+
+ TAILQ_FOREACH_SAFE(entry, entries, dmamap_link, entry1) {
+ KASSERT((entry->flags & IOMMU_MAP_ENTRY_MAP) != 0,
+ ("not mapped entry %p %p", domain, entry));
+ error = iodom->ops->unmap(iodom, entry,
+ cansleep ? IOMMU_PGF_WAITOK : 0);
+ KASSERT(error == 0, ("unmap %p error %d", domain, error));
+ }
+ if (TAILQ_EMPTY(entries))
+ return;
+
+ AMDIOMMU_LOCK(unit);
+ while ((entry = TAILQ_FIRST(entries)) != NULL) {
+ TAILQ_REMOVE(entries, entry, dmamap_link);
+ iommu_qi_invalidate_locked(&domain->iodom, entry,
+ amdiommu_domain_unload_emit_wait(domain, entry));
+ }
+ AMDIOMMU_UNLOCK(unit);
+}
+
+static void
+amdiommu_domain_destroy(struct amdiommu_domain *domain)
+{
+ struct iommu_domain *iodom;
+ struct amdiommu_unit *unit;
+
+ iodom = DOM2IODOM(domain);
+
+ KASSERT(TAILQ_EMPTY(&domain->iodom.unload_entries),
+ ("unfinished unloads %p", domain));
+ KASSERT(LIST_EMPTY(&iodom->contexts),
+ ("destroying dom %p with contexts", domain));
+ KASSERT(domain->ctx_cnt == 0,
+ ("destroying dom %p with ctx_cnt %d", domain, domain->ctx_cnt));
+ KASSERT(domain->refs == 0,
+ ("destroying dom %p with refs %d", domain, domain->refs));
+
+ if ((domain->iodom.flags & IOMMU_DOMAIN_GAS_INITED) != 0) {
+ AMDIOMMU_DOMAIN_LOCK(domain);
+ iommu_gas_fini_domain(iodom);
+ AMDIOMMU_DOMAIN_UNLOCK(domain);
+ }
+ if ((domain->iodom.flags & IOMMU_DOMAIN_PGTBL_INITED) != 0) {
+ if (domain->pgtbl_obj != NULL)
+ AMDIOMMU_DOMAIN_PGLOCK(domain);
+ amdiommu_domain_free_pgtbl(domain);
+ }
+ iommu_domain_fini(iodom);
+ unit = DOM2AMD(domain);
+ free_unr(unit->domids, domain->domain);
+ free(domain, M_AMDIOMMU_DOMAIN);
+}
+
+static iommu_gaddr_t
+lvl2addr(int lvl)
+{
+ int x;
+
+ x = IOMMU_PAGE_SHIFT + IOMMU_NPTEPGSHIFT * lvl;
+ /* Level 6 has only 8 bits for page table index */
+ if (x >= NBBY * sizeof(uint64_t))
+ return (-1ull);
+ return (1ull < (1ull << x));
+}
+
+static void
+amdiommu_domain_init_pglvl(struct amdiommu_unit *unit,
+ struct amdiommu_domain *domain)
+{
+ iommu_gaddr_t end;
+ int hats, i;
+ uint64_t efr_hats;
+
+ end = DOM2IODOM(domain)->end;
+ for (i = AMDIOMMU_PGTBL_MAXLVL; i > 1; i--) {
+ if (lvl2addr(i) >= end && lvl2addr(i - 1) < end)
+ break;
+ }
+ domain->pglvl = i;
+
+ efr_hats = unit->efr & AMDIOMMU_EFR_HATS_MASK;
+ switch (efr_hats) {
+ case AMDIOMMU_EFR_HATS_6LVL:
+ hats = 6;
+ break;
+ case AMDIOMMU_EFR_HATS_5LVL:
+ hats = 5;
+ break;
+ case AMDIOMMU_EFR_HATS_4LVL:
+ hats = 4;
+ break;
+ default:
+ printf("amdiommu%d: HATS %#jx (reserved) ignoring\n",
+ unit->iommu.unit, (uintmax_t)efr_hats);
+ return;
+ }
+ if (hats >= domain->pglvl)
+ return;
+
+ printf("amdiommu%d: domain %d HATS %d pglvl %d reducing to HATS\n",
+ unit->iommu.unit, domain->domain, hats, domain->pglvl);
+ domain->pglvl = hats;
+ domain->iodom.end = lvl2addr(hats);
+}
+
+static struct amdiommu_domain *
+amdiommu_domain_alloc(struct amdiommu_unit *unit, bool id_mapped)
+{
+ struct amdiommu_domain *domain;
+ struct iommu_domain *iodom;
+ int error, id;
+
+ id = alloc_unr(unit->domids);
+ if (id == -1)
+ return (NULL);
+ domain = malloc(sizeof(*domain), M_AMDIOMMU_DOMAIN, M_WAITOK | M_ZERO);
+ iodom = DOM2IODOM(domain);
+ domain->domain = id;
+ LIST_INIT(&iodom->contexts);
+ iommu_domain_init(AMD2IOMMU(unit), iodom, &amdiommu_domain_map_ops);
+
+ domain->unit = unit;
+
+ domain->iodom.end = id_mapped ? ptoa(Maxmem) : BUS_SPACE_MAXADDR;
+ amdiommu_domain_init_pglvl(unit, domain);
+ iommu_gas_init_domain(DOM2IODOM(domain));
+
+ if (id_mapped) {
+ domain->iodom.flags |= IOMMU_DOMAIN_IDMAP;
+ } else {
+ error = amdiommu_domain_alloc_pgtbl(domain);
+ if (error != 0)
+ goto fail;
+ /* Disable local apic region access */
+ error = iommu_gas_reserve_region(iodom, 0xfee00000,
+ 0xfeefffff + 1, &iodom->msi_entry);
+ if (error != 0)
+ goto fail;
+ }
+
+ return (domain);
+
+fail:
+ amdiommu_domain_destroy(domain);
+ return (NULL);
+}
+
+static struct amdiommu_ctx *
+amdiommu_ctx_alloc(struct amdiommu_domain *domain, uint16_t rid)
+{
+ struct amdiommu_ctx *ctx;
+
+ ctx = malloc(sizeof(*ctx), M_AMDIOMMU_CTX, M_WAITOK | M_ZERO);
+ ctx->context.domain = DOM2IODOM(domain);
+ ctx->context.tag = malloc(sizeof(struct bus_dma_tag_iommu),
+ M_AMDIOMMU_CTX, M_WAITOK | M_ZERO);
+ ctx->context.rid = rid;
+ ctx->context.refs = 1;
+ return (ctx);
+}
+
+static void
+amdiommu_ctx_link(struct amdiommu_ctx *ctx)
+{
+ struct amdiommu_domain *domain;
+
+ domain = CTX2DOM(ctx);
+ IOMMU_ASSERT_LOCKED(domain->iodom.iommu);
+ KASSERT(domain->refs >= domain->ctx_cnt,
+ ("dom %p ref underflow %d %d", domain, domain->refs,
+ domain->ctx_cnt));
+ domain->refs++;
+ domain->ctx_cnt++;
+ LIST_INSERT_HEAD(&domain->iodom.contexts, &ctx->context, link);
+}
+
+static void
+amdiommu_ctx_unlink(struct amdiommu_ctx *ctx)
+{
+ struct amdiommu_domain *domain;
+
+ domain = CTX2DOM(ctx);
+ IOMMU_ASSERT_LOCKED(domain->iodom.iommu);
+ KASSERT(domain->refs > 0,
+ ("domain %p ctx dtr refs %d", domain, domain->refs));
+ KASSERT(domain->ctx_cnt >= domain->refs,
+ ("domain %p ctx dtr refs %d ctx_cnt %d", domain,
+ domain->refs, domain->ctx_cnt));
+ domain->refs--;
+ domain->ctx_cnt--;
+ LIST_REMOVE(&ctx->context, link);
+}
+
+struct amdiommu_ctx *
+amdiommu_find_ctx_locked(struct amdiommu_unit *unit, uint16_t rid)
+{
+ struct amdiommu_domain *domain;
+ struct iommu_ctx *ctx;
+
+ AMDIOMMU_ASSERT_LOCKED(unit);
+
+ LIST_FOREACH(domain, &unit->domains, link) {
+ LIST_FOREACH(ctx, &domain->iodom.contexts, link) {
+ if (ctx->rid == rid)
+ return (IOCTX2CTX(ctx));
+ }
+ }
+ return (NULL);
+}
+
+struct amdiommu_domain *
+amdiommu_find_domain(struct amdiommu_unit *unit, uint16_t rid)
+{
+ struct amdiommu_domain *domain;
+ struct iommu_ctx *ctx;
+
+ AMDIOMMU_LOCK(unit);
+ LIST_FOREACH(domain, &unit->domains, link) {
+ LIST_FOREACH(ctx, &domain->iodom.contexts, link) {
+ if (ctx->rid == rid)
+ break;
+ }
+ }
+ AMDIOMMU_UNLOCK(unit);
+ return (domain);
+}
+
+static void
+amdiommu_free_ctx_locked(struct amdiommu_unit *unit, struct amdiommu_ctx *ctx)
+{
+ struct amdiommu_dte *dtep;
+ struct amdiommu_domain *domain;
+
+ AMDIOMMU_ASSERT_LOCKED(unit);
+ KASSERT(ctx->context.refs >= 1,
+ ("amdiommu %p ctx %p refs %u", unit, ctx, ctx->context.refs));
+
+ /*
+ * If our reference is not last, only the dereference should
+ * be performed.
+ */
+ if (ctx->context.refs > 1) {
+ ctx->context.refs--;
+ AMDIOMMU_UNLOCK(unit);
+ return;
+ }
+
+ KASSERT((ctx->context.flags & IOMMU_CTX_DISABLED) == 0,
+ ("lost ref on disabled ctx %p", ctx));
+
+ /*
+ * Otherwise, the device table entry must be cleared before
+ * the page table is destroyed.
+ */
+ dtep = amdiommu_get_dtep(ctx);
+ dtep->v = 0;
+ atomic_thread_fence_rel();
+ memset(dtep, 0, sizeof(*dtep));
+
+ domain = CTX2DOM(ctx);
+ amdiommu_qi_invalidate_ctx_locked_nowait(ctx);
+ amdiommu_qi_invalidate_ir_locked_nowait(unit, ctx->context.rid);
+ amdiommu_qi_invalidate_all_pages_locked_nowait(domain);
+ amdiommu_qi_invalidate_wait_sync(AMD2IOMMU(CTX2AMD(ctx)));
+
+ if (unit->irte_enabled)
+ amdiommu_ctx_fini_irte(ctx);
+
+ amdiommu_ctx_unlink(ctx);
+ free(ctx->context.tag, M_AMDIOMMU_CTX);
+ free(ctx, M_AMDIOMMU_CTX);
+ amdiommu_unref_domain_locked(unit, domain);
+}
+
+static void
+amdiommu_free_ctx(struct amdiommu_ctx *ctx)
+{
+ struct amdiommu_unit *unit;
+
+ unit = CTX2AMD(ctx);
+ AMDIOMMU_LOCK(unit);
+ amdiommu_free_ctx_locked(unit, ctx);
+}
+
+static void
+amdiommu_unref_domain_locked(struct amdiommu_unit *unit,
+ struct amdiommu_domain *domain)
+{
+ AMDIOMMU_ASSERT_LOCKED(unit);
+ KASSERT(domain->refs >= 1,
+ ("amdiommu%d domain %p refs %u", unit->iommu.unit, domain,
+ domain->refs));
+ KASSERT(domain->refs > domain->ctx_cnt,
+ ("amdiommu%d domain %p refs %d ctx_cnt %d", unit->iommu.unit,
+ domain, domain->refs, domain->ctx_cnt));
+
+ if (domain->refs > 1) {
+ domain->refs--;
+ AMDIOMMU_UNLOCK(unit);
+ return;
+ }
+
+ LIST_REMOVE(domain, link);
+ AMDIOMMU_UNLOCK(unit);
+
+ taskqueue_drain(unit->iommu.delayed_taskqueue,
+ &domain->iodom.unload_task);
+ amdiommu_domain_destroy(domain);
+}
+
+static void
+dte_entry_init_one(struct amdiommu_dte *dtep, struct amdiommu_ctx *ctx,
+ vm_page_t pgtblr, uint8_t dte, uint32_t edte)
+{
+ struct amdiommu_domain *domain;
+ struct amdiommu_unit *unit;
+
+ domain = CTX2DOM(ctx);
+ unit = DOM2AMD(domain);
+
+ dtep->tv = 1;
+ /* dtep->had not used for now */
+ dtep->ir = 1;
+ dtep->iw = 1;
+ dtep->domainid = domain->domain;
+ dtep->pioctl = AMDIOMMU_DTE_PIOCTL_DIS;
+
+ /* fill device interrupt passing hints from IVHD. */
+ dtep->initpass = (dte & ACPI_IVHD_INIT_PASS) != 0;
+ dtep->eintpass = (dte & ACPI_IVHD_EINT_PASS) != 0;
+ dtep->nmipass = (dte & ACPI_IVHD_NMI_PASS) != 0;
+ dtep->sysmgt = (dte & ACPI_IVHD_SYSTEM_MGMT) >> 4;
+ dtep->lint0pass = (dte & ACPI_IVHD_LINT0_PASS) != 0;
+ dtep->lint1pass = (dte & ACPI_IVHD_LINT1_PASS) != 0;
+
+ if (unit->irte_enabled) {
+ dtep->iv = 1;
+ dtep->i = 0;
+ dtep->inttablen = ilog2(unit->irte_nentries);
+ dtep->intrroot = pmap_kextract(unit->irte_x2apic ?
+ (vm_offset_t)ctx->irtx2 :
+ (vm_offset_t)ctx->irtb) >> 6;
+
+ dtep->intctl = AMDIOMMU_DTE_INTCTL_MAP;
+ }
+
+ if ((DOM2IODOM(domain)->flags & IOMMU_DOMAIN_IDMAP) != 0) {
+ dtep->pgmode = AMDIOMMU_DTE_PGMODE_1T1;
+ } else {
+ MPASS(domain->pglvl > 0 && domain->pglvl <=
+ AMDIOMMU_PGTBL_MAXLVL);
+ dtep->pgmode = domain->pglvl;
+ dtep->ptroot = VM_PAGE_TO_PHYS(pgtblr) >> 12;
+ }
+
+ atomic_thread_fence_rel();
+ dtep->v = 1;
+}
+
+static void
+dte_entry_init(struct amdiommu_ctx *ctx, bool move, uint8_t dte, uint32_t edte)
+{
+ struct amdiommu_dte *dtep;
+ struct amdiommu_unit *unit;
+ struct amdiommu_domain *domain;
+ int i;
+
+ domain = CTX2DOM(ctx);
+ unit = DOM2AMD(domain);
+
+ dtep = amdiommu_get_dtep(ctx);
+ KASSERT(dtep->v == 0,
+ ("amdiommu%d initializing valid dte @%p %#jx",
+ CTX2AMD(ctx)->iommu.unit, dtep, (uintmax_t)(*(uint64_t *)dtep)));
+
+ if (iommu_is_buswide_ctx(AMD2IOMMU(unit),
+ PCI_RID2BUS(ctx->context.rid))) {
+ MPASS(!move);
+ for (i = 0; i <= PCI_BUSMAX; i++) {
+ dte_entry_init_one(&dtep[i], ctx, domain->pgtblr,
+ dte, edte);
+ }
+ } else {
+ dte_entry_init_one(dtep, ctx, domain->pgtblr, dte, edte);
+ }
+}
+
+struct amdiommu_ctx *
+amdiommu_get_ctx_for_dev(struct amdiommu_unit *unit, device_t dev, uint16_t rid,
+ int dev_domain, bool id_mapped, bool rmrr_init, uint8_t dte, uint32_t edte)
+{
+ struct amdiommu_domain *domain, *domain1;
+ struct amdiommu_ctx *ctx, *ctx1;
+ int bus, slot, func;
+
+ if (dev != NULL) {
+ bus = pci_get_bus(dev);
+ slot = pci_get_slot(dev);
+ func = pci_get_function(dev);
+ } else {
+ bus = PCI_RID2BUS(rid);
+ slot = PCI_RID2SLOT(rid);
+ func = PCI_RID2FUNC(rid);
+ }
+ AMDIOMMU_LOCK(unit);
+ KASSERT(!iommu_is_buswide_ctx(AMD2IOMMU(unit), bus) ||
+ (slot == 0 && func == 0),
+ ("iommu%d pci%d:%d:%d get_ctx for buswide", AMD2IOMMU(unit)->unit,
+ bus, slot, func));
+ ctx = amdiommu_find_ctx_locked(unit, rid);
+ if (ctx == NULL) {
+ /*
+ * Perform the allocations which require sleep or have
+ * higher chance to succeed if the sleep is allowed.
+ */
+ AMDIOMMU_UNLOCK(unit);
+ domain1 = amdiommu_domain_alloc(unit, id_mapped);
+ if (domain1 == NULL)
+ return (NULL);
+ if (!id_mapped) {
+ /*
+ * XXXKIB IVMD seems to be less significant
+ * and less used on AMD than RMRR on Intel.
+ * Not implemented for now.
+ */
+ }
+ ctx1 = amdiommu_ctx_alloc(domain1, rid);
+ amdiommu_ctx_init_irte(ctx1);
+ AMDIOMMU_LOCK(unit);
+
+ /*
+ * Recheck the contexts, other thread might have
+ * already allocated needed one.
+ */
+ ctx = amdiommu_find_ctx_locked(unit, rid);
+ if (ctx == NULL) {
+ domain = domain1;
+ ctx = ctx1;
+ amdiommu_ctx_link(ctx);
+ ctx->context.tag->owner = dev;
+ iommu_device_tag_init(CTX2IOCTX(ctx), dev);
+
+ LIST_INSERT_HEAD(&unit->domains, domain, link);
+ dte_entry_init(ctx, false, dte, edte);
+ amdiommu_qi_invalidate_ctx_locked(ctx);
+ if (dev != NULL) {
+ device_printf(dev,
+ "amdiommu%d pci%d:%d:%d:%d rid %x domain %d "
*** 2653 LINES SKIPPED ***