git: ad794e6d7d02 - main - x86 iommu: move DMAR-independent parts of the qi code into common
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Wed, 04 Sep 2024 21:50:41 UTC
The branch main has been updated by kib:
URL: https://cgit.FreeBSD.org/src/commit/?id=ad794e6d7d02a11b01e721859e096efeb258a4d4
commit ad794e6d7d02a11b01e721859e096efeb258a4d4
Author: Konstantin Belousov <kib@FreeBSD.org>
AuthorDate: 2024-06-06 01:16:36 +0000
Commit: Konstantin Belousov <kib@FreeBSD.org>
CommitDate: 2024-09-04 21:50:18 +0000
x86 iommu: move DMAR-independent parts of the qi code into common
Sponsored by: Advanced Micro Devices (AMD)
Sponsored by: The FreeBSD Foundation
MFC after: 1 week
---
sys/x86/iommu/intel_ctx.c | 7 +-
sys/x86/iommu/intel_dmar.h | 42 +-----
sys/x86/iommu/intel_drv.c | 26 ++--
sys/x86/iommu/intel_qi.c | 330 ++++++++++++--------------------------------
sys/x86/iommu/iommu_utils.c | 234 ++++++++++++++++++++++++++++++-
sys/x86/iommu/x86_iommu.h | 72 ++++++++++
6 files changed, 413 insertions(+), 298 deletions(-)
diff --git a/sys/x86/iommu/intel_ctx.c b/sys/x86/iommu/intel_ctx.c
index a3ff35dc527e..03ef196c4cb0 100644
--- a/sys/x86/iommu/intel_ctx.c
+++ b/sys/x86/iommu/intel_ctx.c
@@ -887,10 +887,11 @@ dmar_domain_unload_entry(struct iommu_map_entry *entry, bool free,
if (unit->qi_enabled) {
if (free) {
DMAR_LOCK(unit);
- dmar_qi_invalidate_locked(domain, entry, true);
+ iommu_qi_invalidate_locked(&domain->iodom, entry,
+ true);
DMAR_UNLOCK(unit);
} else {
- dmar_qi_invalidate_sync(domain, entry->start,
+ iommu_qi_invalidate_sync(&domain->iodom, entry->start,
entry->end - entry->start, cansleep);
dmar_domain_free_entry(entry, false);
}
@@ -943,7 +944,7 @@ dmar_domain_unload(struct iommu_domain *iodom,
DMAR_LOCK(unit);
while ((entry = TAILQ_FIRST(entries)) != NULL) {
TAILQ_REMOVE(entries, entry, dmamap_link);
- dmar_qi_invalidate_locked(domain, entry,
+ iommu_qi_invalidate_locked(&domain->iodom, entry,
dmar_domain_unload_emit_wait(domain, entry));
}
DMAR_UNLOCK(unit);
diff --git a/sys/x86/iommu/intel_dmar.h b/sys/x86/iommu/intel_dmar.h
index 0ede955e12b9..8a815d5cfca6 100644
--- a/sys/x86/iommu/intel_dmar.h
+++ b/sys/x86/iommu/intel_dmar.h
@@ -123,6 +123,7 @@ struct dmar_msi_data {
struct dmar_unit {
struct iommu_unit iommu;
+ struct x86_unit_common x86c;
uint16_t segment;
uint64_t base;
@@ -155,17 +156,6 @@ struct dmar_unit {
/* QI */
int qi_enabled;
- char *inv_queue;
- vm_size_t inv_queue_size;
- uint32_t inv_queue_avail;
- uint32_t inv_queue_tail;
- volatile uint32_t inv_waitd_seq_hw; /* hw writes there on wait
- descr completion */
- uint64_t inv_waitd_seq_hw_phys;
- uint32_t inv_waitd_seq; /* next sequence number to use for wait descr */
- u_int inv_waitd_gen; /* seq number generation AKA seq overflows */
- u_int inv_seq_waiters; /* count of waiters for seq */
- u_int inv_queue_full; /* informational counter */
/* IR */
int ir_enabled;
@@ -173,36 +163,6 @@ struct dmar_unit {
dmar_irte_t *irt;
u_int irte_cnt;
vmem_t *irtids;
-
- /*
- * Delayed freeing of map entries queue processing:
- *
- * tlb_flush_head and tlb_flush_tail are used to implement a FIFO
- * queue that supports concurrent dequeues and enqueues. However,
- * there can only be a single dequeuer (accessing tlb_flush_head) and
- * a single enqueuer (accessing tlb_flush_tail) at a time. Since the
- * unit's qi_task is the only dequeuer, it can access tlb_flush_head
- * without any locking. In contrast, there may be multiple enqueuers,
- * so the enqueuers acquire the iommu unit lock to serialize their
- * accesses to tlb_flush_tail.
- *
- * In this FIFO queue implementation, the key to enabling concurrent
- * dequeues and enqueues is that the dequeuer never needs to access
- * tlb_flush_tail and the enqueuer never needs to access
- * tlb_flush_head. In particular, tlb_flush_head and tlb_flush_tail
- * are never NULL, so neither a dequeuer nor an enqueuer ever needs to
- * update both. Instead, tlb_flush_head always points to a "zombie"
- * struct, which previously held the last dequeued item. Thus, the
- * zombie's next field actually points to the struct holding the first
- * item in the queue. When an item is dequeued, the current zombie is
- * finally freed, and the struct that held the just dequeued item
- * becomes the new zombie. When the queue is empty, tlb_flush_tail
- * also points to the zombie.
- */
- struct iommu_map_entry *tlb_flush_head;
- struct iommu_map_entry *tlb_flush_tail;
- struct task qi_task;
- struct taskqueue *qi_taskqueue;
};
#define DMAR_LOCK(dmar) mtx_lock(&DMAR2IOMMU(dmar)->lock)
diff --git a/sys/x86/iommu/intel_drv.c b/sys/x86/iommu/intel_drv.c
index 79350358cced..9fa1b3f98dc6 100644
--- a/sys/x86/iommu/intel_drv.c
+++ b/sys/x86/iommu/intel_drv.c
@@ -1303,19 +1303,19 @@ dmar_print_one(int idx, bool show_domains, bool show_mappings)
"size 0x%jx\n"
" head 0x%x tail 0x%x avail 0x%x status 0x%x ctrl 0x%x\n"
" hw compl 0x%x@%p/phys@%jx next seq 0x%x gen 0x%x\n",
- (uintmax_t)unit->inv_queue,
+ (uintmax_t)unit->x86c.inv_queue,
(uintmax_t)dmar_read8(unit, DMAR_IQA_REG),
- (uintmax_t)unit->inv_queue_size,
+ (uintmax_t)unit->x86c.inv_queue_size,
dmar_read4(unit, DMAR_IQH_REG),
dmar_read4(unit, DMAR_IQT_REG),
- unit->inv_queue_avail,
+ unit->x86c.inv_queue_avail,
dmar_read4(unit, DMAR_ICS_REG),
dmar_read4(unit, DMAR_IECTL_REG),
- unit->inv_waitd_seq_hw,
- &unit->inv_waitd_seq_hw,
- (uintmax_t)unit->inv_waitd_seq_hw_phys,
- unit->inv_waitd_seq,
- unit->inv_waitd_gen);
+ unit->x86c.inv_waitd_seq_hw,
+ &unit->x86c.inv_waitd_seq_hw,
+ (uintmax_t)unit->x86c.inv_waitd_seq_hw_phys,
+ unit->x86c.inv_waitd_seq,
+ unit->x86c.inv_waitd_gen);
} else {
db_printf("qi is disabled\n");
}
@@ -1368,7 +1368,17 @@ dmar_find_method(device_t dev, bool verbose)
return (&dmar->iommu);
}
+static struct x86_unit_common *
+dmar_get_x86_common(struct iommu_unit *unit)
+{
+ struct dmar_unit *dmar;
+
+ dmar = IOMMU2DMAR(unit);
+ return (&dmar->x86c);
+}
+
static struct x86_iommu dmar_x86_iommu = {
+ .get_x86_common = dmar_get_x86_common,
.domain_unload_entry = dmar_domain_unload_entry,
.domain_unload = dmar_domain_unload,
.get_ctx = dmar_get_ctx,
diff --git a/sys/x86/iommu/intel_qi.c b/sys/x86/iommu/intel_qi.c
index 590cbac9bcbd..a94fbb54e7f7 100644
--- a/sys/x86/iommu/intel_qi.c
+++ b/sys/x86/iommu/intel_qi.c
@@ -58,17 +58,6 @@
#include <x86/iommu/x86_iommu.h>
#include <x86/iommu/intel_dmar.h>
-static bool
-dmar_qi_seq_processed(const struct dmar_unit *unit,
- const struct iommu_qi_genseq *pseq)
-{
- u_int gen;
-
- gen = unit->inv_waitd_gen;
- return (pseq->gen < gen ||
- (pseq->gen == gen && pseq->seq <= unit->inv_waitd_seq_hw));
-}
-
static int
dmar_enable_qi(struct dmar_unit *unit)
{
@@ -96,32 +85,36 @@ dmar_disable_qi(struct dmar_unit *unit)
}
static void
-dmar_qi_advance_tail(struct dmar_unit *unit)
+dmar_qi_advance_tail(struct iommu_unit *iommu)
{
+ struct dmar_unit *unit;
+ unit = IOMMU2DMAR(iommu);
DMAR_ASSERT_LOCKED(unit);
- dmar_write4(unit, DMAR_IQT_REG, unit->inv_queue_tail);
+ dmar_write4(unit, DMAR_IQT_REG, unit->x86c.inv_queue_tail);
}
static void
-dmar_qi_ensure(struct dmar_unit *unit, int descr_count)
+dmar_qi_ensure(struct iommu_unit *iommu, int descr_count)
{
+ struct dmar_unit *unit;
uint32_t head;
int bytes;
+ unit = IOMMU2DMAR(iommu);
DMAR_ASSERT_LOCKED(unit);
bytes = descr_count << DMAR_IQ_DESCR_SZ_SHIFT;
for (;;) {
- if (bytes <= unit->inv_queue_avail)
+ if (bytes <= unit->x86c.inv_queue_avail)
break;
/* refill */
head = dmar_read4(unit, DMAR_IQH_REG);
head &= DMAR_IQH_MASK;
- unit->inv_queue_avail = head - unit->inv_queue_tail -
+ unit->x86c.inv_queue_avail = head - unit->x86c.inv_queue_tail -
DMAR_IQ_DESCR_SZ;
- if (head <= unit->inv_queue_tail)
- unit->inv_queue_avail += unit->inv_queue_size;
- if (bytes <= unit->inv_queue_avail)
+ if (head <= unit->x86c.inv_queue_tail)
+ unit->x86c.inv_queue_avail += unit->x86c.inv_queue_size;
+ if (bytes <= unit->x86c.inv_queue_avail)
break;
/*
@@ -134,11 +127,11 @@ dmar_qi_ensure(struct dmar_unit *unit, int descr_count)
* See dmar_qi_invalidate_locked() for a discussion
* about data race prevention.
*/
- dmar_qi_advance_tail(unit);
- unit->inv_queue_full++;
+ dmar_qi_advance_tail(DMAR2IOMMU(unit));
+ unit->x86c.inv_queue_full++;
cpu_spinwait();
}
- unit->inv_queue_avail -= bytes;
+ unit->x86c.inv_queue_avail -= bytes;
}
static void
@@ -146,162 +139,60 @@ dmar_qi_emit(struct dmar_unit *unit, uint64_t data1, uint64_t data2)
{
DMAR_ASSERT_LOCKED(unit);
- *(volatile uint64_t *)(unit->inv_queue + unit->inv_queue_tail) = data1;
- unit->inv_queue_tail += DMAR_IQ_DESCR_SZ / 2;
- KASSERT(unit->inv_queue_tail <= unit->inv_queue_size,
- ("tail overflow 0x%x 0x%jx", unit->inv_queue_tail,
- (uintmax_t)unit->inv_queue_size));
- unit->inv_queue_tail &= unit->inv_queue_size - 1;
- *(volatile uint64_t *)(unit->inv_queue + unit->inv_queue_tail) = data2;
- unit->inv_queue_tail += DMAR_IQ_DESCR_SZ / 2;
- KASSERT(unit->inv_queue_tail <= unit->inv_queue_size,
- ("tail overflow 0x%x 0x%jx", unit->inv_queue_tail,
- (uintmax_t)unit->inv_queue_size));
- unit->inv_queue_tail &= unit->inv_queue_size - 1;
+ *(volatile uint64_t *)(unit->x86c.inv_queue +
+ unit->x86c.inv_queue_tail) = data1;
+ unit->x86c.inv_queue_tail += DMAR_IQ_DESCR_SZ / 2;
+ KASSERT(unit->x86c.inv_queue_tail <= unit->x86c.inv_queue_size,
+ ("tail overflow 0x%x 0x%jx", unit->x86c.inv_queue_tail,
+ (uintmax_t)unit->x86c.inv_queue_size));
+ unit->x86c.inv_queue_tail &= unit->x86c.inv_queue_size - 1;
+ *(volatile uint64_t *)(unit->x86c.inv_queue +
+ unit->x86c.inv_queue_tail) = data2;
+ unit->x86c.inv_queue_tail += DMAR_IQ_DESCR_SZ / 2;
+ KASSERT(unit->x86c.inv_queue_tail <= unit->x86c.inv_queue_size,
+ ("tail overflow 0x%x 0x%jx", unit->x86c.inv_queue_tail,
+ (uintmax_t)unit->x86c.inv_queue_size));
+ unit->x86c.inv_queue_tail &= unit->x86c.inv_queue_size - 1;
}
static void
-dmar_qi_emit_wait_descr(struct dmar_unit *unit, uint32_t seq, bool intr,
+dmar_qi_emit_wait_descr(struct iommu_unit *iommu, uint32_t seq, bool intr,
bool memw, bool fence)
{
+ struct dmar_unit *unit;
+ unit = IOMMU2DMAR(iommu);
DMAR_ASSERT_LOCKED(unit);
dmar_qi_emit(unit, DMAR_IQ_DESCR_WAIT_ID |
(intr ? DMAR_IQ_DESCR_WAIT_IF : 0) |
(memw ? DMAR_IQ_DESCR_WAIT_SW : 0) |
(fence ? DMAR_IQ_DESCR_WAIT_FN : 0) |
(memw ? DMAR_IQ_DESCR_WAIT_SD(seq) : 0),
- memw ? unit->inv_waitd_seq_hw_phys : 0);
-}
-
-static void
-dmar_qi_emit_wait_seq(struct dmar_unit *unit, struct iommu_qi_genseq *pseq,
- bool emit_wait)
-{
- struct iommu_qi_genseq gsec;
- uint32_t seq;
-
- KASSERT(pseq != NULL, ("wait descriptor with no place for seq"));
- DMAR_ASSERT_LOCKED(unit);
- if (unit->inv_waitd_seq == 0xffffffff) {
- gsec.gen = unit->inv_waitd_gen;
- gsec.seq = unit->inv_waitd_seq;
- dmar_qi_ensure(unit, 1);
- dmar_qi_emit_wait_descr(unit, gsec.seq, false, true, false);
- dmar_qi_advance_tail(unit);
- while (!dmar_qi_seq_processed(unit, &gsec))
- cpu_spinwait();
- unit->inv_waitd_gen++;
- unit->inv_waitd_seq = 1;
- }
- seq = unit->inv_waitd_seq++;
- pseq->gen = unit->inv_waitd_gen;
- pseq->seq = seq;
- if (emit_wait) {
- dmar_qi_ensure(unit, 1);
- dmar_qi_emit_wait_descr(unit, seq, true, true, false);
- }
+ memw ? unit->x86c.inv_waitd_seq_hw_phys : 0);
}
-/*
- * To avoid missed wakeups, callers must increment the unit's waiters count
- * before advancing the tail past the wait descriptor.
- */
static void
-dmar_qi_wait_for_seq(struct dmar_unit *unit, const struct iommu_qi_genseq *gseq,
- bool nowait)
-{
-
- DMAR_ASSERT_LOCKED(unit);
- KASSERT(unit->inv_seq_waiters > 0, ("%s: no waiters", __func__));
- while (!dmar_qi_seq_processed(unit, gseq)) {
- if (cold || nowait) {
- cpu_spinwait();
- } else {
- msleep(&unit->inv_seq_waiters, &unit->iommu.lock, 0,
- "dmarse", hz);
- }
- }
- unit->inv_seq_waiters--;
-}
-
-static void
-dmar_qi_invalidate_emit(struct dmar_domain *domain, iommu_gaddr_t base,
+dmar_qi_invalidate_emit(struct iommu_domain *idomain, iommu_gaddr_t base,
iommu_gaddr_t size, struct iommu_qi_genseq *pseq, bool emit_wait)
{
struct dmar_unit *unit;
+ struct dmar_domain *domain;
iommu_gaddr_t isize;
int am;
+ domain = __containerof(idomain, struct dmar_domain, iodom);
unit = domain->dmar;
DMAR_ASSERT_LOCKED(unit);
for (; size > 0; base += isize, size -= isize) {
am = calc_am(unit, base, size, &isize);
- dmar_qi_ensure(unit, 1);
+ dmar_qi_ensure(DMAR2IOMMU(unit), 1);
dmar_qi_emit(unit, DMAR_IQ_DESCR_IOTLB_INV |
DMAR_IQ_DESCR_IOTLB_PAGE | DMAR_IQ_DESCR_IOTLB_DW |
DMAR_IQ_DESCR_IOTLB_DR |
DMAR_IQ_DESCR_IOTLB_DID(domain->domain),
base | am);
}
- dmar_qi_emit_wait_seq(unit, pseq, emit_wait);
-}
-
-/*
- * The caller must not be using the entry's dmamap_link field.
- */
-void
-dmar_qi_invalidate_locked(struct dmar_domain *domain,
- struct iommu_map_entry *entry, bool emit_wait)
-{
- struct dmar_unit *unit;
-
- unit = domain->dmar;
- DMAR_ASSERT_LOCKED(unit);
- dmar_qi_invalidate_emit(domain, entry->start, entry->end -
- entry->start, &entry->gseq, emit_wait);
-
- /*
- * To avoid a data race in dmar_qi_task(), the entry's gseq must be
- * initialized before the entry is added to the TLB flush list, and the
- * entry must be added to that list before the tail is advanced. More
- * precisely, the tail must not be advanced past the wait descriptor
- * that will generate the interrupt that schedules dmar_qi_task() for
- * execution before the entry is added to the list. While an earlier
- * call to dmar_qi_ensure() might have advanced the tail, it will not
- * advance it past the wait descriptor.
- *
- * See the definition of struct dmar_unit for more information on
- * synchronization.
- */
- entry->tlb_flush_next = NULL;
- atomic_store_rel_ptr((uintptr_t *)&unit->tlb_flush_tail->tlb_flush_next,
- (uintptr_t)entry);
- unit->tlb_flush_tail = entry;
-
- dmar_qi_advance_tail(unit);
-}
-
-void
-dmar_qi_invalidate_sync(struct dmar_domain *domain, iommu_gaddr_t base,
- iommu_gaddr_t size, bool cansleep)
-{
- struct dmar_unit *unit;
- struct iommu_qi_genseq gseq;
-
- unit = domain->dmar;
- DMAR_LOCK(unit);
- dmar_qi_invalidate_emit(domain, base, size, &gseq, true);
-
- /*
- * To avoid a missed wakeup in dmar_qi_task(), the unit's waiters count
- * must be incremented before the tail is advanced.
- */
- unit->inv_seq_waiters++;
-
- dmar_qi_advance_tail(unit);
- dmar_qi_wait_for_seq(unit, &gseq, !cansleep);
- DMAR_UNLOCK(unit);
+ iommu_qi_emit_wait_seq(DMAR2IOMMU(unit), pseq, emit_wait);
}
void
@@ -310,13 +201,13 @@ dmar_qi_invalidate_ctx_glob_locked(struct dmar_unit *unit)
struct iommu_qi_genseq gseq;
DMAR_ASSERT_LOCKED(unit);
- dmar_qi_ensure(unit, 2);
+ dmar_qi_ensure(DMAR2IOMMU(unit), 2);
dmar_qi_emit(unit, DMAR_IQ_DESCR_CTX_INV | DMAR_IQ_DESCR_CTX_GLOB, 0);
- dmar_qi_emit_wait_seq(unit, &gseq, true);
+ iommu_qi_emit_wait_seq(DMAR2IOMMU(unit), &gseq, true);
/* See dmar_qi_invalidate_sync(). */
- unit->inv_seq_waiters++;
- dmar_qi_advance_tail(unit);
- dmar_qi_wait_for_seq(unit, &gseq, false);
+ unit->x86c.inv_seq_waiters++;
+ dmar_qi_advance_tail(DMAR2IOMMU(unit));
+ iommu_qi_wait_for_seq(DMAR2IOMMU(unit), &gseq, false);
}
void
@@ -325,14 +216,14 @@ dmar_qi_invalidate_iotlb_glob_locked(struct dmar_unit *unit)
struct iommu_qi_genseq gseq;
DMAR_ASSERT_LOCKED(unit);
- dmar_qi_ensure(unit, 2);
+ dmar_qi_ensure(DMAR2IOMMU(unit), 2);
dmar_qi_emit(unit, DMAR_IQ_DESCR_IOTLB_INV | DMAR_IQ_DESCR_IOTLB_GLOB |
DMAR_IQ_DESCR_IOTLB_DW | DMAR_IQ_DESCR_IOTLB_DR, 0);
- dmar_qi_emit_wait_seq(unit, &gseq, true);
+ iommu_qi_emit_wait_seq(DMAR2IOMMU(unit), &gseq, true);
/* See dmar_qi_invalidate_sync(). */
- unit->inv_seq_waiters++;
- dmar_qi_advance_tail(unit);
- dmar_qi_wait_for_seq(unit, &gseq, false);
+ unit->x86c.inv_seq_waiters++;
+ dmar_qi_advance_tail(DMAR2IOMMU(unit));
+ iommu_qi_wait_for_seq(DMAR2IOMMU(unit), &gseq, false);
}
void
@@ -341,13 +232,13 @@ dmar_qi_invalidate_iec_glob(struct dmar_unit *unit)
struct iommu_qi_genseq gseq;
DMAR_ASSERT_LOCKED(unit);
- dmar_qi_ensure(unit, 2);
+ dmar_qi_ensure(DMAR2IOMMU(unit), 2);
dmar_qi_emit(unit, DMAR_IQ_DESCR_IEC_INV, 0);
- dmar_qi_emit_wait_seq(unit, &gseq, true);
+ iommu_qi_emit_wait_seq(DMAR2IOMMU(unit), &gseq, true);
/* See dmar_qi_invalidate_sync(). */
- unit->inv_seq_waiters++;
- dmar_qi_advance_tail(unit);
- dmar_qi_wait_for_seq(unit, &gseq, false);
+ unit->x86c.inv_seq_waiters++;
+ dmar_qi_advance_tail(DMAR2IOMMU(unit));
+ iommu_qi_wait_for_seq(DMAR2IOMMU(unit), &gseq, false);
}
void
@@ -363,21 +254,21 @@ dmar_qi_invalidate_iec(struct dmar_unit *unit, u_int start, u_int cnt)
for (; cnt > 0; cnt -= c, start += c) {
l = ffs(start | cnt) - 1;
c = 1 << l;
- dmar_qi_ensure(unit, 1);
+ dmar_qi_ensure(DMAR2IOMMU(unit), 1);
dmar_qi_emit(unit, DMAR_IQ_DESCR_IEC_INV |
DMAR_IQ_DESCR_IEC_IDX | DMAR_IQ_DESCR_IEC_IIDX(start) |
DMAR_IQ_DESCR_IEC_IM(l), 0);
}
- dmar_qi_ensure(unit, 1);
- dmar_qi_emit_wait_seq(unit, &gseq, true);
+ dmar_qi_ensure(DMAR2IOMMU(unit), 1);
+ iommu_qi_emit_wait_seq(DMAR2IOMMU(unit), &gseq, true);
/*
- * Since dmar_qi_wait_for_seq() will not sleep, this increment's
+ * Since iommu_qi_wait_for_seq() will not sleep, this increment's
* placement relative to advancing the tail doesn't matter.
*/
- unit->inv_seq_waiters++;
+ unit->x86c.inv_seq_waiters++;
- dmar_qi_advance_tail(unit);
+ dmar_qi_advance_tail(DMAR2IOMMU(unit));
/*
* The caller of the function, in particular,
@@ -394,7 +285,7 @@ dmar_qi_invalidate_iec(struct dmar_unit *unit, u_int start, u_int cnt)
* queue is processed, which includes requests possibly issued
* before our request.
*/
- dmar_qi_wait_for_seq(unit, &gseq, true);
+ iommu_qi_wait_for_seq(DMAR2IOMMU(unit), &gseq, true);
}
int
@@ -405,38 +296,18 @@ dmar_qi_intr(void *arg)
unit = arg;
KASSERT(unit->qi_enabled, ("dmar%d: QI is not enabled",
unit->iommu.unit));
- taskqueue_enqueue(unit->qi_taskqueue, &unit->qi_task);
+ taskqueue_enqueue(unit->x86c.qi_taskqueue, &unit->x86c.qi_task);
return (FILTER_HANDLED);
}
-static void
-dmar_qi_drain_tlb_flush(struct dmar_unit *unit)
-{
- struct iommu_map_entry *entry, *head;
-
- for (head = unit->tlb_flush_head;; head = entry) {
- entry = (struct iommu_map_entry *)
- atomic_load_acq_ptr((uintptr_t *)&head->tlb_flush_next);
- if (entry == NULL ||
- !dmar_qi_seq_processed(unit, &entry->gseq))
- break;
- unit->tlb_flush_head = entry;
- iommu_gas_free_entry(head);
- if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0)
- iommu_gas_free_region(entry);
- else
- iommu_gas_free_space(entry);
- }
-}
-
static void
dmar_qi_task(void *arg, int pending __unused)
{
struct dmar_unit *unit;
uint32_t ics;
- unit = arg;
- dmar_qi_drain_tlb_flush(unit);
+ unit = IOMMU2DMAR(arg);
+ iommu_qi_drain_tlb_flush(DMAR2IOMMU(unit));
/*
* Request an interrupt on the completion of the next invalidation
@@ -453,16 +324,16 @@ dmar_qi_task(void *arg, int pending __unused)
* Otherwise, such entries will linger until a later entry
* that requests an interrupt is processed.
*/
- dmar_qi_drain_tlb_flush(unit);
+ iommu_qi_drain_tlb_flush(DMAR2IOMMU(unit));
}
- if (unit->inv_seq_waiters > 0) {
+ if (unit->x86c.inv_seq_waiters > 0) {
/*
* Acquire the DMAR lock so that wakeup() is called only after
* the waiter is sleeping.
*/
DMAR_LOCK(unit);
- wakeup(&unit->inv_seq_waiters);
+ wakeup(&unit->x86c.inv_seq_waiters);
DMAR_UNLOCK(unit);
}
}
@@ -472,7 +343,7 @@ dmar_init_qi(struct dmar_unit *unit)
{
uint64_t iqa;
uint32_t ics;
- int qi_sz;
+ u_int qi_sz;
if (!DMAR_HAS_QI(unit) || (unit->hw_cap & DMAR_CAP_CM) != 0)
return (0);
@@ -481,34 +352,19 @@ dmar_init_qi(struct dmar_unit *unit)
if (!unit->qi_enabled)
return (0);
- unit->tlb_flush_head = unit->tlb_flush_tail =
- iommu_gas_alloc_entry(NULL, 0);
- TASK_INIT(&unit->qi_task, 0, dmar_qi_task, unit);
- unit->qi_taskqueue = taskqueue_create_fast("dmarqf", M_WAITOK,
- taskqueue_thread_enqueue, &unit->qi_taskqueue);
- taskqueue_start_threads(&unit->qi_taskqueue, 1, PI_AV,
- "dmar%d qi taskq", unit->iommu.unit);
-
- unit->inv_waitd_gen = 0;
- unit->inv_waitd_seq = 1;
-
- qi_sz = DMAR_IQA_QS_DEF;
- TUNABLE_INT_FETCH("hw.dmar.qi_size", &qi_sz);
- if (qi_sz > DMAR_IQA_QS_MAX)
- qi_sz = DMAR_IQA_QS_MAX;
- unit->inv_queue_size = (1ULL << qi_sz) * PAGE_SIZE;
- /* Reserve one descriptor to prevent wraparound. */
- unit->inv_queue_avail = unit->inv_queue_size - DMAR_IQ_DESCR_SZ;
-
- /* The invalidation queue reads by DMARs are always coherent. */
- unit->inv_queue = kmem_alloc_contig(unit->inv_queue_size, M_WAITOK |
- M_ZERO, 0, iommu_high, PAGE_SIZE, 0, VM_MEMATTR_DEFAULT);
- unit->inv_waitd_seq_hw_phys = pmap_kextract(
- (vm_offset_t)&unit->inv_waitd_seq_hw);
+ unit->x86c.qi_buf_maxsz = DMAR_IQA_QS_MAX;
+ unit->x86c.qi_cmd_sz = DMAR_IQ_DESCR_SZ;
+ iommu_qi_common_init(DMAR2IOMMU(unit), dmar_qi_task);
+ get_x86_iommu()->qi_ensure = dmar_qi_ensure;
+ get_x86_iommu()->qi_emit_wait_descr = dmar_qi_emit_wait_descr;
+ get_x86_iommu()->qi_advance_tail = dmar_qi_advance_tail;
+ get_x86_iommu()->qi_invalidate_emit = dmar_qi_invalidate_emit;
+
+ qi_sz = ilog2(unit->x86c.inv_queue_size / PAGE_SIZE);
DMAR_LOCK(unit);
dmar_write8(unit, DMAR_IQT_REG, 0);
- iqa = pmap_kextract((uintptr_t)unit->inv_queue);
+ iqa = pmap_kextract((uintptr_t)unit->x86c.inv_queue);
iqa |= qi_sz;
dmar_write8(unit, DMAR_IQA_REG, iqa);
dmar_enable_qi(unit);
@@ -523,35 +379,19 @@ dmar_init_qi(struct dmar_unit *unit)
return (0);
}
+static void
+dmar_fini_qi_helper(struct iommu_unit *iommu)
+{
+ dmar_disable_qi_intr(IOMMU2DMAR(iommu));
+ dmar_disable_qi(IOMMU2DMAR(iommu));
+}
+
void
dmar_fini_qi(struct dmar_unit *unit)
{
- struct iommu_qi_genseq gseq;
-
if (!unit->qi_enabled)
return;
- taskqueue_drain(unit->qi_taskqueue, &unit->qi_task);
- taskqueue_free(unit->qi_taskqueue);
- unit->qi_taskqueue = NULL;
-
- DMAR_LOCK(unit);
- /* quisce */
- dmar_qi_ensure(unit, 1);
- dmar_qi_emit_wait_seq(unit, &gseq, true);
- /* See dmar_qi_invalidate_sync_locked(). */
- unit->inv_seq_waiters++;
- dmar_qi_advance_tail(unit);
- dmar_qi_wait_for_seq(unit, &gseq, false);
- /* only after the quisce, disable queue */
- dmar_disable_qi_intr(unit);
- dmar_disable_qi(unit);
- KASSERT(unit->inv_seq_waiters == 0,
- ("dmar%d: waiters on disabled queue", unit->iommu.unit));
- DMAR_UNLOCK(unit);
-
- kmem_free(unit->inv_queue, unit->inv_queue_size);
- unit->inv_queue = NULL;
- unit->inv_queue_size = 0;
+ iommu_qi_common_fini(DMAR2IOMMU(unit), dmar_fini_qi_helper);
unit->qi_enabled = 0;
}
diff --git a/sys/x86/iommu/iommu_utils.c b/sys/x86/iommu/iommu_utils.c
index ea2c0358e072..571e5a2e65cd 100644
--- a/sys/x86/iommu/iommu_utils.c
+++ b/sys/x86/iommu/iommu_utils.c
@@ -29,7 +29,9 @@
*/
#include <sys/systm.h>
+#include <sys/kernel.h>
#include <sys/lock.h>
+#include <sys/malloc.h>
#include <sys/memdesc.h>
#include <sys/mutex.h>
#include <sys/sf_buf.h>
@@ -40,8 +42,11 @@
#include <sys/taskqueue.h>
#include <sys/tree.h>
#include <vm/vm.h>
-#include <vm/vm_page.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_map.h>
#include <vm/vm_object.h>
+#include <vm/vm_page.h>
#include <dev/pci/pcireg.h>
#include <machine/atomic.h>
#include <machine/bus.h>
@@ -251,3 +256,230 @@ iommu_unmap_ioapic_intr(u_int ioapic_id, u_int *cookie)
{
return (x86_iommu->unmap_ioapic_intr(ioapic_id, cookie));
}
+
+#define IOMMU2X86C(iommu) (x86_iommu->get_x86_common(iommu))
+
+static bool
+iommu_qi_seq_processed(struct iommu_unit *unit,
+ const struct iommu_qi_genseq *pseq)
+{
+ struct x86_unit_common *x86c;
+ u_int gen;
+
+ x86c = IOMMU2X86C(unit);
+ gen = x86c->inv_waitd_gen;
+ return (pseq->gen < gen ||
+ (pseq->gen == gen && pseq->seq <= x86c->inv_waitd_seq_hw));
+}
+
+void
+iommu_qi_emit_wait_seq(struct iommu_unit *unit, struct iommu_qi_genseq *pseq,
+ bool emit_wait)
+{
+ struct x86_unit_common *x86c;
+ struct iommu_qi_genseq gsec;
+ uint32_t seq;
+
+ KASSERT(pseq != NULL, ("wait descriptor with no place for seq"));
+ IOMMU_ASSERT_LOCKED(unit);
+ x86c = IOMMU2X86C(unit);
+
+ if (x86c->inv_waitd_seq == 0xffffffff) {
+ gsec.gen = x86c->inv_waitd_gen;
+ gsec.seq = x86c->inv_waitd_seq;
+ x86_iommu->qi_ensure(unit, 1);
+ x86_iommu->qi_emit_wait_descr(unit, gsec.seq, false,
+ true, false);
+ x86_iommu->qi_advance_tail(unit);
+ while (!iommu_qi_seq_processed(unit, &gsec))
+ cpu_spinwait();
+ x86c->inv_waitd_gen++;
+ x86c->inv_waitd_seq = 1;
+ }
+ seq = x86c->inv_waitd_seq++;
+ pseq->gen = x86c->inv_waitd_gen;
+ pseq->seq = seq;
+ if (emit_wait) {
+ x86_iommu->qi_ensure(unit, 1);
+ x86_iommu->qi_emit_wait_descr(unit, seq, true, true, false);
+ }
+}
+
+/*
+ * To avoid missed wakeups, callers must increment the unit's waiters count
+ * before advancing the tail past the wait descriptor.
+ */
+void
+iommu_qi_wait_for_seq(struct iommu_unit *unit, const struct iommu_qi_genseq *
+ gseq, bool nowait)
+{
+ struct x86_unit_common *x86c;
+
+ IOMMU_ASSERT_LOCKED(unit);
+ x86c = IOMMU2X86C(unit);
+
+ KASSERT(x86c->inv_seq_waiters > 0, ("%s: no waiters", __func__));
+ while (!iommu_qi_seq_processed(unit, gseq)) {
+ if (cold || nowait) {
+ cpu_spinwait();
+ } else {
+ msleep(&x86c->inv_seq_waiters, &unit->lock, 0,
+ "dmarse", hz);
+ }
+ }
+ x86c->inv_seq_waiters--;
+}
+
+/*
+ * The caller must not be using the entry's dmamap_link field.
+ */
+void
+iommu_qi_invalidate_locked(struct iommu_domain *domain,
+ struct iommu_map_entry *entry, bool emit_wait)
+{
+ struct iommu_unit *unit;
+ struct x86_unit_common *x86c;
+
+ unit = domain->iommu;
+ x86c = IOMMU2X86C(unit);
+ IOMMU_ASSERT_LOCKED(unit);
+
+ x86_iommu->qi_invalidate_emit(domain, entry->start, entry->end -
+ entry->start, &entry->gseq, emit_wait);
+
+ /*
+ * To avoid a data race in dmar_qi_task(), the entry's gseq must be
+ * initialized before the entry is added to the TLB flush list, and the
+ * entry must be added to that list before the tail is advanced. More
+ * precisely, the tail must not be advanced past the wait descriptor
+ * that will generate the interrupt that schedules dmar_qi_task() for
+ * execution before the entry is added to the list. While an earlier
+ * call to dmar_qi_ensure() might have advanced the tail, it will not
+ * advance it past the wait descriptor.
+ *
+ * See the definition of struct dmar_unit for more information on
+ * synchronization.
+ */
+ entry->tlb_flush_next = NULL;
+ atomic_store_rel_ptr((uintptr_t *)&x86c->tlb_flush_tail->
+ tlb_flush_next, (uintptr_t)entry);
+ x86c->tlb_flush_tail = entry;
+
+ x86_iommu->qi_advance_tail(unit);
+}
+
+void
+iommu_qi_invalidate_sync(struct iommu_domain *domain, iommu_gaddr_t base,
+ iommu_gaddr_t size, bool cansleep)
+{
+ struct iommu_unit *unit;
+ struct iommu_qi_genseq gseq;
+
+ unit = domain->iommu;
+ IOMMU_LOCK(unit);
+ x86_iommu->qi_invalidate_emit(domain, base, size, &gseq, true);
+
+ /*
+ * To avoid a missed wakeup in iommu_qi_task(), the unit's
+ * waiters count must be incremented before the tail is
+ * advanced.
+ */
+ IOMMU2X86C(unit)->inv_seq_waiters++;
+
+ x86_iommu->qi_advance_tail(unit);
+ iommu_qi_wait_for_seq(unit, &gseq, !cansleep);
+ IOMMU_UNLOCK(unit);
+}
+
+void
+iommu_qi_drain_tlb_flush(struct iommu_unit *unit)
+{
+ struct x86_unit_common *x86c;
+ struct iommu_map_entry *entry, *head;
+
+ x86c = IOMMU2X86C(unit);
+ for (head = x86c->tlb_flush_head;; head = entry) {
+ entry = (struct iommu_map_entry *)
+ atomic_load_acq_ptr((uintptr_t *)&head->tlb_flush_next);
+ if (entry == NULL ||
+ !iommu_qi_seq_processed(unit, &entry->gseq))
+ break;
+ x86c->tlb_flush_head = entry;
+ iommu_gas_free_entry(head);
+ if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0)
+ iommu_gas_free_region(entry);
+ else
+ iommu_gas_free_space(entry);
+ }
+}
+
+void
+iommu_qi_common_init(struct iommu_unit *unit, task_fn_t qi_task)
+{
+ struct x86_unit_common *x86c;
+ u_int qi_sz;
+
+ x86c = IOMMU2X86C(unit);
+
+ x86c->tlb_flush_head = x86c->tlb_flush_tail =
+ iommu_gas_alloc_entry(NULL, 0);
+ TASK_INIT(&x86c->qi_task, 0, qi_task, unit);
+ x86c->qi_taskqueue = taskqueue_create_fast("iommuqf", M_WAITOK,
+ taskqueue_thread_enqueue, &x86c->qi_taskqueue);
+ taskqueue_start_threads(&x86c->qi_taskqueue, 1, PI_AV,
+ "iommu%d qi taskq", unit->unit);
+
+ x86c->inv_waitd_gen = 0;
+ x86c->inv_waitd_seq = 1;
+
+ qi_sz = 3;
+ TUNABLE_INT_FETCH("hw.iommu.qi_size", &qi_sz);
+ if (qi_sz > x86c->qi_buf_maxsz)
+ qi_sz = x86c->qi_buf_maxsz;
+ x86c->inv_queue_size = (1ULL << qi_sz) * PAGE_SIZE;
+ /* Reserve one descriptor to prevent wraparound. */
+ x86c->inv_queue_avail = x86c->inv_queue_size -
+ x86c->qi_cmd_sz;
+
+ /*
+ * The invalidation queue reads by DMARs/AMDIOMMUs are always
+ * coherent.
+ */
+ x86c->inv_queue = kmem_alloc_contig(x86c->inv_queue_size,
+ M_WAITOK | M_ZERO, 0, iommu_high, PAGE_SIZE, 0,
+ VM_MEMATTR_DEFAULT);
+ x86c->inv_waitd_seq_hw_phys = pmap_kextract(
+ (vm_offset_t)&x86c->inv_waitd_seq_hw);
+}
+
+void
+iommu_qi_common_fini(struct iommu_unit *unit, void (*disable_qi)(
+ struct iommu_unit *))
+{
+ struct x86_unit_common *x86c;
+ struct iommu_qi_genseq gseq;
+
+ x86c = IOMMU2X86C(unit);
+
+ taskqueue_drain(x86c->qi_taskqueue, &x86c->qi_task);
+ taskqueue_free(x86c->qi_taskqueue);
+ x86c->qi_taskqueue = NULL;
+
+ IOMMU_LOCK(unit);
+ /* quisce */
+ x86_iommu->qi_ensure(unit, 1);
+ iommu_qi_emit_wait_seq(unit, &gseq, true);
+ /* See iommu_qi_invalidate_locked(). */
+ x86c->inv_seq_waiters++;
+ x86_iommu->qi_advance_tail(unit);
+ iommu_qi_wait_for_seq(unit, &gseq, false);
+ /* only after the quisce, disable queue */
+ disable_qi(unit);
+ KASSERT(x86c->inv_seq_waiters == 0,
+ ("iommu%d: waiters on disabled queue", unit->unit));
+ IOMMU_UNLOCK(unit);
+
+ kmem_free(x86c->inv_queue, x86c->inv_queue_size);
+ x86c->inv_queue = NULL;
+ x86c->inv_queue_size = 0;
+}
diff --git a/sys/x86/iommu/x86_iommu.h b/sys/x86/iommu/x86_iommu.h
index 8c908964acd0..eb1bbafbeb77 100644
--- a/sys/x86/iommu/x86_iommu.h
+++ b/sys/x86/iommu/x86_iommu.h
@@ -59,7 +59,18 @@ extern int iommu_tbl_pagecnt;
SYSCTL_DECL(_hw_iommu);
SYSCTL_DECL(_hw_iommu_dmar);
+struct x86_unit_common;
+
struct x86_iommu {
+ struct x86_unit_common *(*get_x86_common)(struct
+ iommu_unit *iommu);
+ void (*qi_ensure)(struct iommu_unit *unit, int descr_count);
+ void (*qi_emit_wait_descr)(struct iommu_unit *unit, uint32_t seq,
+ bool, bool, bool);
+ void (*qi_advance_tail)(struct iommu_unit *unit);
+ void (*qi_invalidate_emit)(struct iommu_domain *idomain,
+ iommu_gaddr_t base, iommu_gaddr_t size, struct iommu_qi_genseq *
+ pseq, bool emit_wait);
void (*domain_unload_entry)(struct iommu_map_entry *entry, bool free,
bool cansleep);
void (*domain_unload)(struct iommu_domain *iodom,
@@ -82,4 +93,65 @@ struct x86_iommu {
void set_x86_iommu(struct x86_iommu *);
struct x86_iommu *get_x86_iommu(void);
+struct x86_unit_common {
+ uint32_t qi_buf_maxsz;
+ uint32_t qi_cmd_sz;
+
+ char *inv_queue;
+ vm_size_t inv_queue_size;
+ uint32_t inv_queue_avail;
+ uint32_t inv_queue_tail;
+ volatile uint32_t inv_waitd_seq_hw; /* hw writes there on wait
+ descr completion */
*** 52 LINES SKIPPED ***