Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F102952282
D35951.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
11 KB
Referenced Files
None
Subscribers
None
D35951.diff
View Options
diff --git a/sys/dev/iommu/iommu.h b/sys/dev/iommu/iommu.h
--- a/sys/dev/iommu/iommu.h
+++ b/sys/dev/iommu/iommu.h
@@ -56,7 +56,10 @@
iommu_gaddr_t free_down; /* Max free space below the
current R/B tree node */
u_int flags;
- TAILQ_ENTRY(iommu_map_entry) dmamap_link; /* Link for dmamap entries */
+ union {
+ TAILQ_ENTRY(iommu_map_entry) dmamap_link; /* DMA map entries */
+ struct iommu_map_entry *tlb_flush_next;
+ };
RB_ENTRY(iommu_map_entry) rb_entry; /* Links for domain entries */
struct iommu_domain *domain;
struct iommu_qi_genseq gseq;
diff --git a/sys/dev/iommu/iommu_gas.c b/sys/dev/iommu/iommu_gas.c
--- a/sys/dev/iommu/iommu_gas.c
+++ b/sys/dev/iommu/iommu_gas.c
@@ -99,7 +99,7 @@
res = uma_zalloc(iommu_map_entry_zone, ((flags & IOMMU_PGF_WAITOK) !=
0 ? M_WAITOK : M_NOWAIT) | M_ZERO);
- if (res != NULL) {
+ if (res != NULL && domain != NULL) {
res->domain = domain;
atomic_add_int(&domain->entries_cnt, 1);
}
@@ -113,7 +113,8 @@
KASSERT(domain == entry->domain,
("mismatched free domain %p entry %p entry->domain %p", domain,
entry, entry->domain));
- atomic_subtract_int(&domain->entries_cnt, 1);
+ if (domain != NULL)
+ atomic_subtract_int(&domain->entries_cnt, 1);
uma_zfree(iommu_map_entry_zone, entry);
}
diff --git a/sys/x86/iommu/intel_ctx.c b/sys/x86/iommu/intel_ctx.c
--- a/sys/x86/iommu/intel_ctx.c
+++ b/sys/x86/iommu/intel_ctx.c
@@ -867,6 +867,10 @@
entry->flags = 0;
}
+/*
+ * If the given value for "free" is true, then the caller must not be using
+ * the entry's dmamap_link field.
+ */
void
iommu_domain_unload_entry(struct iommu_map_entry *entry, bool free,
bool cansleep)
@@ -885,10 +889,7 @@
if (unit->qi_enabled) {
if (free) {
DMAR_LOCK(unit);
- dmar_qi_invalidate_locked(domain, entry->start,
- entry->end - entry->start, &entry->gseq, true);
- TAILQ_INSERT_TAIL(&unit->tlb_flush_entries, entry,
- dmamap_link);
+ dmar_qi_invalidate_locked(domain, entry, true);
DMAR_UNLOCK(unit);
} else {
dmar_qi_invalidate_sync(domain, entry->start,
@@ -942,12 +943,11 @@
KASSERT(unit->qi_enabled, ("loaded entry left"));
DMAR_LOCK(unit);
- TAILQ_FOREACH(entry, entries, dmamap_link) {
- dmar_qi_invalidate_locked(domain, entry->start, entry->end -
- entry->start, &entry->gseq,
+ while ((entry = TAILQ_FIRST(entries)) != NULL) {
+ TAILQ_REMOVE(entries, entry, dmamap_link);
+ dmar_qi_invalidate_locked(domain, entry,
dmar_domain_unload_emit_wait(domain, entry));
}
- TAILQ_CONCAT(&unit->tlb_flush_entries, entries, dmamap_link);
DMAR_UNLOCK(unit);
}
diff --git a/sys/x86/iommu/intel_dmar.h b/sys/x86/iommu/intel_dmar.h
--- a/sys/x86/iommu/intel_dmar.h
+++ b/sys/x86/iommu/intel_dmar.h
@@ -177,8 +177,33 @@
u_int irte_cnt;
vmem_t *irtids;
- /* Delayed freeing of map entries queue processing */
- struct iommu_map_entries_tailq tlb_flush_entries;
+ /*
+ * Delayed freeing of map entries queue processing:
+ *
+ * tlb_flush_head and tlb_flush_tail are used to implement a FIFO
+ * queue that supports concurrent dequeues and enqueues. However,
+ * there can only be a single dequeuer (accessing tlb_flush_head) and
+ * a single enqueuer (accessing tlb_flush_tail) at a time. Since the
+ * unit's qi_task is the only dequeuer, it can access tlb_flush_head
+ * without any locking. In contrast, there may be multiple enqueuers,
+ * so the enqueuers acquire the iommu unit lock to serialize their
+ * accesses to tlb_flush_tail.
+ *
+ * In this FIFO queue implementation, the key to enabling concurrent
+ * dequeues and enqueues is that the dequeuer never needs to access
+ * tlb_flush_tail and the enqueuer never needs to access
+ * tlb_flush_head. In particular, tlb_flush_head and tlb_flush_tail
+ * are never NULL, so neither a dequeuer nor an enqueuer ever needs to
+ * update both. Instead, tlb_flush_head always points to a "zombie"
+ * struct, which previously held the last dequeued item. Thus, the
+ * zombie's next field actually points to the struct holding the first
+ * item in the queue. When an item is dequeued, the current zombie is
+ * finally freed, and the struct that held the just dequeued item
+ * becomes the new zombie. When the queue is empty, tlb_flush_tail
+ * also points to the zombie.
+ */
+ struct iommu_map_entry *tlb_flush_head;
+ struct iommu_map_entry *tlb_flush_tail;
struct task qi_task;
struct taskqueue *qi_taskqueue;
};
@@ -249,8 +274,8 @@
void dmar_disable_qi_intr(struct dmar_unit *unit);
int dmar_init_qi(struct dmar_unit *unit);
void dmar_fini_qi(struct dmar_unit *unit);
-void dmar_qi_invalidate_locked(struct dmar_domain *domain, iommu_gaddr_t start,
- iommu_gaddr_t size, struct iommu_qi_genseq *psec, bool emit_wait);
+void dmar_qi_invalidate_locked(struct dmar_domain *domain,
+ struct iommu_map_entry *entry, bool emit_wait);
void dmar_qi_invalidate_sync(struct dmar_domain *domain, iommu_gaddr_t start,
iommu_gaddr_t size, bool cansleep);
void dmar_qi_invalidate_ctx_glob_locked(struct dmar_unit *unit);
diff --git a/sys/x86/iommu/intel_qi.c b/sys/x86/iommu/intel_qi.c
--- a/sys/x86/iommu/intel_qi.c
+++ b/sys/x86/iommu/intel_qi.c
@@ -64,10 +64,11 @@
dmar_qi_seq_processed(const struct dmar_unit *unit,
const struct iommu_qi_genseq *pseq)
{
+ u_int gen;
- return (pseq->gen < unit->inv_waitd_gen ||
- (pseq->gen == unit->inv_waitd_gen &&
- pseq->seq <= unit->inv_waitd_seq_hw));
+ gen = unit->inv_waitd_gen;
+ return (pseq->gen < gen ||
+ (pseq->gen == gen && pseq->seq <= unit->inv_waitd_seq_hw));
}
static int
@@ -131,6 +132,9 @@
* inform the descriptor streamer about entries we
* might have already filled, otherwise they could
* clog the whole queue..
+ *
+ * See dmar_qi_invalidate_locked() for a discussion
+ * about data race prevention.
*/
dmar_qi_advance_tail(unit);
unit->inv_queue_full++;
@@ -201,13 +205,17 @@
}
}
+/*
+ * To avoid missed wakeups, callers must increment the unit's waiters count
+ * before advancing the tail past the wait descriptor.
+ */
static void
dmar_qi_wait_for_seq(struct dmar_unit *unit, const struct iommu_qi_genseq *gseq,
bool nowait)
{
DMAR_ASSERT_LOCKED(unit);
- unit->inv_seq_waiters++;
+ KASSERT(unit->inv_seq_waiters > 0, ("%s: no waiters", __func__));
while (!dmar_qi_seq_processed(unit, gseq)) {
if (cold || nowait) {
cpu_spinwait();
@@ -219,8 +227,8 @@
unit->inv_seq_waiters--;
}
-void
-dmar_qi_invalidate_locked(struct dmar_domain *domain, iommu_gaddr_t base,
+static void
+dmar_qi_invalidate_emit(struct dmar_domain *domain, iommu_gaddr_t base,
iommu_gaddr_t size, struct iommu_qi_genseq *pseq, bool emit_wait)
{
struct dmar_unit *unit;
@@ -239,6 +247,40 @@
base | am);
}
dmar_qi_emit_wait_seq(unit, pseq, emit_wait);
+}
+
+/*
+ * The caller must not be using the entry's dmamap_link field.
+ */
+void
+dmar_qi_invalidate_locked(struct dmar_domain *domain,
+ struct iommu_map_entry *entry, bool emit_wait)
+{
+ struct dmar_unit *unit;
+
+ unit = domain->dmar;
+ DMAR_ASSERT_LOCKED(unit);
+ dmar_qi_invalidate_emit(domain, entry->start, entry->end -
+ entry->start, &entry->gseq, emit_wait);
+
+ /*
+ * To avoid a data race in dmar_qi_task(), the entry's gseq must be
+ * initialized before the entry is added to the TLB flush list, and the
+ * entry must be added to that list before the tail is advanced. More
+ * precisely, the tail must not be advanced past the wait descriptor
+ * that will generate the interrupt that schedules dmar_qi_task() for
+ * execution before the entry is added to the list. While an earlier
+ * call to dmar_qi_ensure() might have advanced the tail, it will not
+ * advance it past the wait descriptor.
+ *
+ * See the definition of struct dmar_unit for more information on
+ * synchronization.
+ */
+ entry->tlb_flush_next = NULL;
+ atomic_store_rel_ptr((uintptr_t *)&unit->tlb_flush_tail->tlb_flush_next,
+ (uintptr_t)entry);
+ unit->tlb_flush_tail = entry;
+
dmar_qi_advance_tail(unit);
}
@@ -251,7 +293,15 @@
unit = domain->dmar;
DMAR_LOCK(unit);
- dmar_qi_invalidate_locked(domain, base, size, &gseq, true);
+ dmar_qi_invalidate_emit(domain, base, size, &gseq, true);
+
+ /*
+ * To avoid a missed wakeup in dmar_qi_task(), the unit's waiters count
+ * must be incremented before the tail is advanced.
+ */
+ unit->inv_seq_waiters++;
+
+ dmar_qi_advance_tail(unit);
dmar_qi_wait_for_seq(unit, &gseq, !cansleep);
DMAR_UNLOCK(unit);
}
@@ -265,6 +315,8 @@
dmar_qi_ensure(unit, 2);
dmar_qi_emit(unit, DMAR_IQ_DESCR_CTX_INV | DMAR_IQ_DESCR_CTX_GLOB, 0);
dmar_qi_emit_wait_seq(unit, &gseq, true);
+ /* See dmar_qi_invalidate_sync(). */
+ unit->inv_seq_waiters++;
dmar_qi_advance_tail(unit);
dmar_qi_wait_for_seq(unit, &gseq, false);
}
@@ -279,6 +331,8 @@
dmar_qi_emit(unit, DMAR_IQ_DESCR_IOTLB_INV | DMAR_IQ_DESCR_IOTLB_GLOB |
DMAR_IQ_DESCR_IOTLB_DW | DMAR_IQ_DESCR_IOTLB_DR, 0);
dmar_qi_emit_wait_seq(unit, &gseq, true);
+ /* See dmar_qi_invalidate_sync(). */
+ unit->inv_seq_waiters++;
dmar_qi_advance_tail(unit);
dmar_qi_wait_for_seq(unit, &gseq, false);
}
@@ -292,6 +346,8 @@
dmar_qi_ensure(unit, 2);
dmar_qi_emit(unit, DMAR_IQ_DESCR_IEC_INV, 0);
dmar_qi_emit_wait_seq(unit, &gseq, true);
+ /* See dmar_qi_invalidate_sync(). */
+ unit->inv_seq_waiters++;
dmar_qi_advance_tail(unit);
dmar_qi_wait_for_seq(unit, &gseq, false);
}
@@ -316,6 +372,13 @@
}
dmar_qi_ensure(unit, 1);
dmar_qi_emit_wait_seq(unit, &gseq, true);
+
+ /*
+ * Since dmar_qi_wait_for_seq() will not sleep, this increment's
+ * placement relative to advancing the tail doesn't matter.
+ */
+ unit->inv_seq_waiters++;
+
dmar_qi_advance_tail(unit);
/*
@@ -352,7 +415,8 @@
dmar_qi_task(void *arg, int pending __unused)
{
struct dmar_unit *unit;
- struct iommu_map_entry *entry;
+ struct iommu_domain *domain;
+ struct iommu_map_entry *entry, *head;
uint32_t ics;
unit = arg;
@@ -367,21 +431,33 @@
dmar_write4(unit, DMAR_ICS_REG, ics);
}
- DMAR_LOCK(unit);
for (;;) {
- entry = TAILQ_FIRST(&unit->tlb_flush_entries);
+ head = unit->tlb_flush_head;
+ entry = (struct iommu_map_entry *)
+ atomic_load_acq_ptr((uintptr_t *)&head->tlb_flush_next);
if (entry == NULL)
break;
if (!dmar_qi_seq_processed(unit, &entry->gseq))
break;
- TAILQ_REMOVE(&unit->tlb_flush_entries, entry, dmamap_link);
- DMAR_UNLOCK(unit);
- dmar_domain_free_entry(entry, true);
- DMAR_LOCK(unit);
+ unit->tlb_flush_head = entry;
+ iommu_gas_free_entry(head->domain, head);
+ domain = entry->domain;
+ IOMMU_DOMAIN_LOCK(domain);
+ if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0)
+ iommu_gas_free_region(domain, entry);
+ else
+ iommu_gas_free_space(domain, entry);
+ IOMMU_DOMAIN_UNLOCK(domain);
}
- if (unit->inv_seq_waiters > 0)
+ if (unit->inv_seq_waiters > 0) {
+ /*
+ * Acquire the DMAR lock so that wakeup() is called only after
+ * the waiter is sleeping.
+ */
+ DMAR_LOCK(unit);
wakeup(&unit->inv_seq_waiters);
- DMAR_UNLOCK(unit);
+ DMAR_UNLOCK(unit);
+ }
}
int
@@ -398,7 +474,8 @@
if (!unit->qi_enabled)
return (0);
- TAILQ_INIT(&unit->tlb_flush_entries);
+ unit->tlb_flush_head = unit->tlb_flush_tail =
+ iommu_gas_alloc_entry(NULL, 0);
TASK_INIT(&unit->qi_task, 0, dmar_qi_task, unit);
unit->qi_taskqueue = taskqueue_create_fast("dmarqf", M_WAITOK,
taskqueue_thread_enqueue, &unit->qi_taskqueue);
@@ -454,6 +531,8 @@
/* quisce */
dmar_qi_ensure(unit, 1);
dmar_qi_emit_wait_seq(unit, &gseq, true);
+ /* See dmar_qi_invalidate_sync_locked(). */
+ unit->inv_seq_waiters++;
dmar_qi_advance_tail(unit);
dmar_qi_wait_for_seq(unit, &gseq, false);
/* only after the quisce, disable queue */
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Wed, Nov 20, 3:25 AM (22 h, 10 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
14729267
Default Alt Text
D35951.diff (11 KB)
Attached To
Mode
D35951: x86/iommu: Reduce DMAR lock contention
Attached
Detach File
Event Timeline
Log In to Comment