Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F102033176
D47256.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
103 KB
Referenced Files
None
Subscribers
None
D47256.diff
View Options
diff --git a/sys/conf/files.x86 b/sys/conf/files.x86
--- a/sys/conf/files.x86
+++ b/sys/conf/files.x86
@@ -344,6 +344,12 @@
x86/cpufreq/hwpstate_intel.c optional cpufreq
x86/cpufreq/p4tcc.c optional cpufreq
x86/cpufreq/powernow.c optional cpufreq
+x86/iommu/amd_cmd.c optional acpi iommu pci
+x86/iommu/amd_ctx.c optional acpi iommu pci
+x86/iommu/amd_drv.c optional acpi iommu pci
+x86/iommu/amd_event.c optional acpi iommu pci
+x86/iommu/amd_idpgtbl.c optional acpi iommu pci
+x86/iommu/amd_intrmap.c optional acpi iommu pci
x86/iommu/intel_ctx.c optional acpi iommu pci
x86/iommu/intel_drv.c optional acpi iommu pci
x86/iommu/intel_fault.c optional acpi iommu pci
diff --git a/sys/x86/iommu/amd_cmd.c b/sys/x86/iommu/amd_cmd.c
new file mode 100644
--- /dev/null
+++ b/sys/x86/iommu/amd_cmd.c
@@ -0,0 +1,360 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 The FreeBSD Foundation
+ *
+ * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "opt_acpi.h"
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/memdesc.h>
+#include <sys/module.h>
+#include <sys/rman.h>
+#include <sys/taskqueue.h>
+#include <sys/time.h>
+#include <sys/tree.h>
+#include <sys/vmem.h>
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_page.h>
+#include <vm/vm_map.h>
+#include <contrib/dev/acpica/include/acpi.h>
+#include <contrib/dev/acpica/include/accommon.h>
+#include <dev/acpica/acpivar.h>
+#include <dev/pci/pcireg.h>
+#include <machine/bus.h>
+#include <machine/cpu.h>
+#include <x86/include/busdma_impl.h>
+#include <dev/iommu/busdma_iommu.h>
+#include <x86/iommu/amd_reg.h>
+#include <x86/iommu/x86_iommu.h>
+#include <x86/iommu/amd_iommu.h>
+
+static void
+amdiommu_enable_cmdbuf(struct amdiommu_unit *unit)
+{
+ AMDIOMMU_ASSERT_LOCKED(unit);
+
+ unit->hw_ctrl |= AMDIOMMU_CTRL_CMDBUF_EN;
+ amdiommu_write8(unit, AMDIOMMU_CTRL, unit->hw_ctrl);
+}
+
+static void
+amdiommu_disable_cmdbuf(struct amdiommu_unit *unit)
+{
+ AMDIOMMU_ASSERT_LOCKED(unit);
+
+ unit->hw_ctrl &= ~AMDIOMMU_CTRL_CMDBUF_EN;
+ amdiommu_write8(unit, AMDIOMMU_CTRL, unit->hw_ctrl);
+}
+
+
+static void
+amdiommu_enable_qi_intr(struct iommu_unit *iommu)
+{
+ struct amdiommu_unit *unit;
+
+ unit = IOMMU2AMD(iommu);
+ AMDIOMMU_ASSERT_LOCKED(unit);
+ unit->hw_ctrl |= AMDIOMMU_CTRL_COMWINT_EN;
+ amdiommu_write8(unit, AMDIOMMU_CTRL, unit->hw_ctrl);
+ amdiommu_write8(unit, AMDIOMMU_CMDEV_STATUS,
+ AMDIOMMU_CMDEVS_COMWAITINT);
+}
+
+static void
+amdiommu_disable_qi_intr(struct iommu_unit *iommu)
+{
+ struct amdiommu_unit *unit;
+
+ unit = IOMMU2AMD(iommu);
+ AMDIOMMU_ASSERT_LOCKED(unit);
+ unit->hw_ctrl &= ~AMDIOMMU_CTRL_COMWINT_EN;
+ amdiommu_write8(unit, AMDIOMMU_CTRL, unit->hw_ctrl);
+}
+
+static void
+amdiommu_cmd_advance_tail(struct iommu_unit *iommu)
+{
+ struct amdiommu_unit *unit;
+
+ unit = IOMMU2AMD(iommu);
+ AMDIOMMU_ASSERT_LOCKED(unit);
+ amdiommu_write8(unit, AMDIOMMU_CMDBUF_TAIL, unit->x86c.inv_queue_tail);
+}
+
+static void
+amdiommu_cmd_ensure(struct iommu_unit *iommu, int descr_count)
+{
+ struct amdiommu_unit *unit;
+ uint64_t head;
+ int bytes;
+
+ unit = IOMMU2AMD(iommu);
+ AMDIOMMU_ASSERT_LOCKED(unit);
+ bytes = descr_count << AMDIOMMU_CMD_SZ_SHIFT;
+ for (;;) {
+ if (bytes <= unit->x86c.inv_queue_avail)
+ break;
+ /* refill */
+ head = amdiommu_read8(unit, AMDIOMMU_CMDBUF_HEAD);
+ head &= AMDIOMMU_CMDPTR_MASK;
+ unit->x86c.inv_queue_avail = head - unit->x86c.inv_queue_tail -
+ AMDIOMMU_CMD_SZ;
+ if (head <= unit->x86c.inv_queue_tail)
+ unit->x86c.inv_queue_avail += unit->x86c.inv_queue_size;
+ if (bytes <= unit->x86c.inv_queue_avail)
+ break;
+
+ /*
+ * No space in the queue, do busy wait. Hardware must
+ * make a progress. But first advance the tail to
+ * inform the descriptor streamer about entries we
+ * might have already filled, otherwise they could
+ * clog the whole queue..
+ *
+ * See dmar_qi_invalidate_locked() for a discussion
+ * about data race prevention.
+ */
+ amdiommu_cmd_advance_tail(iommu);
+ unit->x86c.inv_queue_full++;
+ cpu_spinwait();
+ }
+ unit->x86c.inv_queue_avail -= bytes;
+}
+
+static void
+amdiommu_cmd_emit(struct amdiommu_unit *unit, const struct
+ amdiommu_cmd_generic *cmd)
+{
+ AMDIOMMU_ASSERT_LOCKED(unit);
+
+ memcpy(unit->x86c.inv_queue + unit->x86c.inv_queue_tail, cmd,
+ sizeof(*cmd));
+ unit->x86c.inv_queue_tail += AMDIOMMU_CMD_SZ;
+ KASSERT(unit->x86c.inv_queue_tail <= unit->x86c.inv_queue_size,
+ ("tail overflow 0x%x 0x%jx", unit->x86c.inv_queue_tail,
+ (uintmax_t)unit->x86c.inv_queue_size));
+ unit->x86c.inv_queue_tail &= unit->x86c.inv_queue_size - 1;
+}
+
+static void
+amdiommu_cmd_emit_wait_descr(struct iommu_unit *iommu, uint32_t seq,
+ bool intr, bool memw, bool fence)
+{
+ struct amdiommu_unit *unit;
+ struct amdiommu_cmd_completion_wait c;
+
+ unit = IOMMU2AMD(iommu);
+ AMDIOMMU_ASSERT_LOCKED(unit);
+
+ bzero(&c, sizeof(c));
+ c.op = AMDIOMMU_CMD_COMPLETION_WAIT;
+ if (memw) {
+ uint32_t x;
+
+ c.s = 1;
+ x = unit->x86c.inv_waitd_seq_hw_phys;
+ x >>= 3;
+ c.address0 = x;
+ x = unit->x86c.inv_waitd_seq_hw_phys >> 32;
+ c.address1 = x;
+ c.data0 = seq;
+ }
+ if (fence)
+ c.f = 1;
+ if (intr)
+ c.i = 1;
+ amdiommu_cmd_emit(unit, (struct amdiommu_cmd_generic *)&c);
+}
+
+static void
+amdiommu_qi_invalidate_emit(struct iommu_domain *adomain, iommu_gaddr_t base,
+ iommu_gaddr_t size, struct iommu_qi_genseq *pseq, bool emit_wait)
+{
+ struct amdiommu_domain *domain;
+ struct amdiommu_unit *unit;
+ struct amdiommu_cmd_invalidate_iommu_pages c;
+ u_int isize;
+
+ domain = IODOM2DOM(adomain);
+ unit = domain->unit;
+ AMDIOMMU_ASSERT_LOCKED(unit);
+ bzero(&c, sizeof(c));
+ c.op = AMDIOMMU_CMD_INVALIDATE_IOMMU_PAGES;
+ c.domainid = domain->domain;
+ isize = IOMMU_PAGE_SIZE; /* XXXKIB handle superpages */
+
+ for (; size > 0; base += isize, size -= isize) {
+ amdiommu_cmd_ensure(AMD2IOMMU(unit), 1);
+ c.s = 0;
+ c.pde = 1;
+ c.address = base >> IOMMU_PAGE_SHIFT;
+ amdiommu_cmd_emit(unit, (struct amdiommu_cmd_generic *)&c);
+ }
+ iommu_qi_emit_wait_seq(AMD2IOMMU(unit), pseq, emit_wait);
+}
+
+void
+amdiommu_qi_invalidate_all_pages_locked_nowait(struct amdiommu_domain *domain)
+{
+ struct amdiommu_unit *unit;
+ struct amdiommu_cmd_invalidate_iommu_pages c;
+
+ unit = domain->unit;
+ AMDIOMMU_ASSERT_LOCKED(unit);
+ bzero(&c, sizeof(c));
+ c.op = AMDIOMMU_CMD_INVALIDATE_IOMMU_PAGES;
+ c.domainid = domain->domain;
+
+ /*
+ * The magic specified in the note for INVALIDATE_IOMMU_PAGES
+ * description.
+ */
+ c.s = 1;
+ c.pde = 1;
+ c.address = 0x7ffffffffffff;
+
+ amdiommu_cmd_ensure(AMD2IOMMU(unit), 1);
+ amdiommu_cmd_emit(unit, (struct amdiommu_cmd_generic *)&c);
+}
+
+void
+amdiommu_qi_invalidate_wait_sync(struct iommu_unit *iommu)
+{
+ struct iommu_qi_genseq gseq;
+
+ amdiommu_cmd_ensure(iommu, 1);
+ iommu_qi_emit_wait_seq(iommu, &gseq, true);
+ IOMMU2AMD(iommu)->x86c.inv_seq_waiters++;
+ amdiommu_cmd_advance_tail(iommu);
+ iommu_qi_wait_for_seq(iommu, &gseq, true);
+}
+
+void
+amdiommu_qi_invalidate_ctx_locked_nowait(struct amdiommu_ctx *ctx)
+{
+ struct amdiommu_cmd_invalidate_devtab_entry c;
+
+ amdiommu_cmd_ensure(AMD2IOMMU(CTX2AMD(ctx)), 1);
+ bzero(&c, sizeof(c));
+ c.op = AMDIOMMU_CMD_INVALIDATE_DEVTAB_ENTRY;
+ c.devid = ctx->context.rid;
+ amdiommu_cmd_emit(CTX2AMD(ctx), (struct amdiommu_cmd_generic *)&c);
+}
+
+
+void
+amdiommu_qi_invalidate_ctx_locked(struct amdiommu_ctx *ctx)
+{
+ amdiommu_qi_invalidate_ctx_locked_nowait(ctx);
+ amdiommu_qi_invalidate_wait_sync(AMD2IOMMU(CTX2AMD(ctx)));
+}
+
+void
+amdiommu_qi_invalidate_ir_locked_nowait(struct amdiommu_unit *unit,
+ uint16_t devid)
+{
+ struct amdiommu_cmd_invalidate_interrupt_table c;
+
+ AMDIOMMU_ASSERT_LOCKED(unit);
+
+ amdiommu_cmd_ensure(AMD2IOMMU(unit), 1);
+ bzero(&c, sizeof(c));
+ c.op = AMDIOMMU_CMD_INVALIDATE_INTERRUPT_TABLE;
+ c.devid = devid;
+ amdiommu_cmd_emit(unit, (struct amdiommu_cmd_generic *)&c);
+}
+
+void
+amdiommu_qi_invalidate_ir_locked(struct amdiommu_unit *unit, uint16_t devid)
+{
+ amdiommu_qi_invalidate_ir_locked_nowait(unit, devid);
+ amdiommu_qi_invalidate_wait_sync(AMD2IOMMU(unit));
+}
+
+static void
+amdiommu_qi_task(void *arg, int pending __unused)
+{
+ struct amdiommu_unit *unit;
+
+ unit = IOMMU2AMD(arg);
+ iommu_qi_drain_tlb_flush(AMD2IOMMU(unit));
+
+ AMDIOMMU_LOCK(unit);
+ if (unit->x86c.inv_seq_waiters > 0)
+ wakeup(&unit->x86c.inv_seq_waiters);
+ AMDIOMMU_UNLOCK(unit);
+}
+
+int
+amdiommu_init_cmd(struct amdiommu_unit *unit)
+{
+ uint64_t qi_sz, rv;
+
+ unit->x86c.qi_buf_maxsz = ilog2(AMDIOMMU_CMDBUF_MAX / PAGE_SIZE);
+ unit->x86c.qi_cmd_sz = AMDIOMMU_CMD_SZ;
+ iommu_qi_common_init(AMD2IOMMU(unit), amdiommu_qi_task);
+ get_x86_iommu()->qi_ensure = amdiommu_cmd_ensure;
+ get_x86_iommu()->qi_emit_wait_descr = amdiommu_cmd_emit_wait_descr;
+ get_x86_iommu()->qi_advance_tail = amdiommu_cmd_advance_tail;
+ get_x86_iommu()->qi_invalidate_emit = amdiommu_qi_invalidate_emit;
+
+ rv = pmap_kextract((uintptr_t)unit->x86c.inv_queue);
+
+ /*
+ * See the description of the ComLen encoding for Command
+ * buffer Base Address Register.
+ */
+ qi_sz = ilog2(unit->x86c.inv_queue_size / PAGE_SIZE) + 8;
+ rv |= qi_sz << AMDIOMMU_CMDBUF_BASE_SZSHIFT;
+
+ AMDIOMMU_LOCK(unit);
+ amdiommu_write8(unit, AMDIOMMU_CMDBUF_BASE, rv);
+ amdiommu_enable_cmdbuf(unit);
+ amdiommu_enable_qi_intr(AMD2IOMMU(unit));
+ AMDIOMMU_UNLOCK(unit);
+
+ return (0);
+}
+
+static void
+amdiommu_fini_cmd_helper(struct iommu_unit *iommu)
+{
+ amdiommu_disable_cmdbuf(IOMMU2AMD(iommu));
+ amdiommu_disable_qi_intr(iommu);
+}
+
+void
+amdiommu_fini_cmd(struct amdiommu_unit *unit)
+{
+ iommu_qi_common_fini(AMD2IOMMU(unit), amdiommu_fini_cmd_helper);
+}
diff --git a/sys/x86/iommu/amd_ctx.c b/sys/x86/iommu/amd_ctx.c
new file mode 100644
--- /dev/null
+++ b/sys/x86/iommu/amd_ctx.c
@@ -0,0 +1,639 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 The FreeBSD Foundation
+ *
+ * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/bus.h>
+#include <sys/interrupt.h>
+#include <sys/kernel.h>
+#include <sys/ktr.h>
+#include <sys/limits.h>
+#include <sys/lock.h>
+#include <sys/memdesc.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/rwlock.h>
+#include <sys/rman.h>
+#include <sys/sysctl.h>
+#include <sys/taskqueue.h>
+#include <sys/tree.h>
+#include <sys/uio.h>
+#include <sys/vmem.h>
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pager.h>
+#include <vm/vm_map.h>
+#include <contrib/dev/acpica/include/acpi.h>
+#include <contrib/dev/acpica/include/accommon.h>
+#include <dev/pci/pcireg.h>
+#include <dev/pci/pcivar.h>
+#include <machine/atomic.h>
+#include <machine/bus.h>
+#include <machine/md_var.h>
+#include <machine/specialreg.h>
+#include <x86/include/busdma_impl.h>
+#include <dev/iommu/busdma_iommu.h>
+#include <x86/iommu/amd_reg.h>
+#include <x86/iommu/x86_iommu.h>
+#include <x86/iommu/amd_iommu.h>
+
+static MALLOC_DEFINE(M_AMDIOMMU_CTX, "amdiommu_ctx", "AMD IOMMU Context");
+static MALLOC_DEFINE(M_AMDIOMMU_DOMAIN, "amdiommu_dom", "AMD IOMMU Domain");
+
+static void amdiommu_unref_domain_locked(struct amdiommu_unit *unit,
+ struct amdiommu_domain *domain);
+
+static struct amdiommu_dte *
+amdiommu_get_dtep(struct amdiommu_ctx *ctx)
+{
+ return (&CTX2AMD(ctx)->dev_tbl[ctx->context.rid]);
+}
+
+void
+amdiommu_domain_unload_entry(struct iommu_map_entry *entry, bool free,
+ bool cansleep)
+{
+ struct amdiommu_domain *domain;
+ struct amdiommu_unit *unit;
+
+ domain = IODOM2DOM(entry->domain);
+ unit = DOM2AMD(domain);
+
+ /*
+ * If "free" is false, then the IOTLB invalidation must be performed
+ * synchronously. Otherwise, the caller might free the entry before
+ * dmar_qi_task() is finished processing it.
+ */
+ if (free) {
+ AMDIOMMU_LOCK(unit);
+ iommu_qi_invalidate_locked(&domain->iodom, entry, true);
+ AMDIOMMU_UNLOCK(unit);
+ } else {
+ iommu_qi_invalidate_sync(&domain->iodom, entry->start,
+ entry->end - entry->start, cansleep);
+ iommu_domain_free_entry(entry, false);
+ }
+}
+
+static bool
+amdiommu_domain_unload_emit_wait(struct amdiommu_domain *domain,
+ struct iommu_map_entry *entry)
+{
+ return (true); /* XXXKIB */
+}
+
+void
+amdiommu_domain_unload(struct iommu_domain *iodom,
+ struct iommu_map_entries_tailq *entries, bool cansleep)
+{
+ struct amdiommu_domain *domain;
+ struct amdiommu_unit *unit;
+ struct iommu_map_entry *entry, *entry1;
+ int error __diagused;
+
+ domain = IODOM2DOM(iodom);
+ unit = DOM2AMD(domain);
+
+ TAILQ_FOREACH_SAFE(entry, entries, dmamap_link, entry1) {
+ KASSERT((entry->flags & IOMMU_MAP_ENTRY_MAP) != 0,
+ ("not mapped entry %p %p", domain, entry));
+ error = iodom->ops->unmap(iodom, entry,
+ cansleep ? IOMMU_PGF_WAITOK : 0);
+ KASSERT(error == 0, ("unmap %p error %d", domain, error));
+ }
+ if (TAILQ_EMPTY(entries))
+ return;
+
+ AMDIOMMU_LOCK(unit);
+ while ((entry = TAILQ_FIRST(entries)) != NULL) {
+ TAILQ_REMOVE(entries, entry, dmamap_link);
+ iommu_qi_invalidate_locked(&domain->iodom, entry,
+ amdiommu_domain_unload_emit_wait(domain, entry));
+ }
+ AMDIOMMU_UNLOCK(unit);
+}
+
+static void
+amdiommu_domain_destroy(struct amdiommu_domain *domain)
+{
+ struct iommu_domain *iodom;
+ struct amdiommu_unit *unit;
+
+ iodom = DOM2IODOM(domain);
+
+ KASSERT(TAILQ_EMPTY(&domain->iodom.unload_entries),
+ ("unfinished unloads %p", domain));
+ KASSERT(LIST_EMPTY(&iodom->contexts),
+ ("destroying dom %p with contexts", domain));
+ KASSERT(domain->ctx_cnt == 0,
+ ("destroying dom %p with ctx_cnt %d", domain, domain->ctx_cnt));
+ KASSERT(domain->refs == 0,
+ ("destroying dom %p with refs %d", domain, domain->refs));
+
+ if ((domain->iodom.flags & IOMMU_DOMAIN_GAS_INITED) != 0) {
+ AMDIOMMU_DOMAIN_LOCK(domain);
+ iommu_gas_fini_domain(iodom);
+ AMDIOMMU_DOMAIN_UNLOCK(domain);
+ }
+ if ((domain->iodom.flags & IOMMU_DOMAIN_PGTBL_INITED) != 0) {
+ if (domain->pgtbl_obj != NULL)
+ AMDIOMMU_DOMAIN_PGLOCK(domain);
+ amdiommu_domain_free_pgtbl(domain);
+ }
+ iommu_domain_fini(iodom);
+ unit = DOM2AMD(domain);
+ free_unr(unit->domids, domain->domain);
+ free(domain, M_AMDIOMMU_DOMAIN);
+}
+
+static iommu_gaddr_t
+lvl2addr(int lvl)
+{
+ int x;
+
+ x = IOMMU_PAGE_SHIFT + IOMMU_NPTEPGSHIFT * lvl;
+ /* Level 6 has only 8 bits for page table index */
+ if (x >= NBBY * sizeof(uint64_t))
+ return (-1ull);
+ return (1ull < (1ull << x));
+}
+
+static void
+amdiommu_domain_init_pglvl(struct amdiommu_unit *unit,
+ struct amdiommu_domain *domain)
+{
+ iommu_gaddr_t end;
+ int hats, i;
+ uint64_t efr_hats;
+
+ end = DOM2IODOM(domain)->end;
+ for (i = AMDIOMMU_PGTBL_MAXLVL; i > 1; i--) {
+ if (lvl2addr(i) >= end && lvl2addr(i - 1) < end)
+ break;
+ }
+ domain->pglvl = i;
+
+ efr_hats = unit->efr & AMDIOMMU_EFR_HATS_MASK;
+ switch (efr_hats) {
+ case AMDIOMMU_EFR_HATS_6LVL:
+ hats = 6;
+ break;
+ case AMDIOMMU_EFR_HATS_5LVL:
+ hats = 5;
+ break;
+ case AMDIOMMU_EFR_HATS_4LVL:
+ hats = 4;
+ break;
+ default:
+ printf("amdiommu%d: HATS %#jx (reserved) ignoring\n",
+ unit->iommu.unit, (uintmax_t)efr_hats);
+ return;
+ }
+ if (hats >= domain->pglvl)
+ return;
+
+ printf("amdiommu%d: domain %d HATS %d pglvl %d reducing to HATS\n",
+ unit->iommu.unit, domain->domain, hats, domain->pglvl);
+ domain->pglvl = hats;
+ domain->iodom.end = lvl2addr(hats);
+}
+
+static struct amdiommu_domain *
+amdiommu_domain_alloc(struct amdiommu_unit *unit, bool id_mapped)
+{
+ struct amdiommu_domain *domain;
+ struct iommu_domain *iodom;
+ int error, id;
+
+ id = alloc_unr(unit->domids);
+ if (id == -1)
+ return (NULL);
+ domain = malloc(sizeof(*domain), M_AMDIOMMU_DOMAIN, M_WAITOK | M_ZERO);
+ iodom = DOM2IODOM(domain);
+ domain->domain = id;
+ LIST_INIT(&iodom->contexts);
+ iommu_domain_init(AMD2IOMMU(unit), iodom, &amdiommu_domain_map_ops);
+
+ domain->unit = unit;
+
+ domain->iodom.end = id_mapped ? ptoa(Maxmem) : BUS_SPACE_MAXADDR;
+ amdiommu_domain_init_pglvl(unit, domain);
+ iommu_gas_init_domain(DOM2IODOM(domain));
+
+ if (id_mapped) {
+ domain->iodom.flags |= IOMMU_DOMAIN_IDMAP;
+ } else {
+ error = amdiommu_domain_alloc_pgtbl(domain);
+ if (error != 0)
+ goto fail;
+ /* Disable local apic region access */
+ error = iommu_gas_reserve_region(iodom, 0xfee00000,
+ 0xfeefffff + 1, &iodom->msi_entry);
+ if (error != 0)
+ goto fail;
+ }
+
+ return (domain);
+
+fail:
+ amdiommu_domain_destroy(domain);
+ return (NULL);
+}
+
+static struct amdiommu_ctx *
+amdiommu_ctx_alloc(struct amdiommu_domain *domain, uint16_t rid)
+{
+ struct amdiommu_ctx *ctx;
+
+ ctx = malloc(sizeof(*ctx), M_AMDIOMMU_CTX, M_WAITOK | M_ZERO);
+ ctx->context.domain = DOM2IODOM(domain);
+ ctx->context.tag = malloc(sizeof(struct bus_dma_tag_iommu),
+ M_AMDIOMMU_CTX, M_WAITOK | M_ZERO);
+ ctx->context.rid = rid;
+ ctx->context.refs = 1;
+ return (ctx);
+}
+
+static void
+amdiommu_ctx_link(struct amdiommu_ctx *ctx)
+{
+ struct amdiommu_domain *domain;
+
+ domain = CTX2DOM(ctx);
+ IOMMU_ASSERT_LOCKED(domain->iodom.iommu);
+ KASSERT(domain->refs >= domain->ctx_cnt,
+ ("dom %p ref underflow %d %d", domain, domain->refs,
+ domain->ctx_cnt));
+ domain->refs++;
+ domain->ctx_cnt++;
+ LIST_INSERT_HEAD(&domain->iodom.contexts, &ctx->context, link);
+}
+
+static void
+amdiommu_ctx_unlink(struct amdiommu_ctx *ctx)
+{
+ struct amdiommu_domain *domain;
+
+ domain = CTX2DOM(ctx);
+ IOMMU_ASSERT_LOCKED(domain->iodom.iommu);
+ KASSERT(domain->refs > 0,
+ ("domain %p ctx dtr refs %d", domain, domain->refs));
+ KASSERT(domain->ctx_cnt >= domain->refs,
+ ("domain %p ctx dtr refs %d ctx_cnt %d", domain,
+ domain->refs, domain->ctx_cnt));
+ domain->refs--;
+ domain->ctx_cnt--;
+ LIST_REMOVE(&ctx->context, link);
+}
+
+struct amdiommu_ctx *
+amdiommu_find_ctx_locked(struct amdiommu_unit *unit, uint16_t rid)
+{
+ struct amdiommu_domain *domain;
+ struct iommu_ctx *ctx;
+
+ AMDIOMMU_ASSERT_LOCKED(unit);
+
+ LIST_FOREACH(domain, &unit->domains, link) {
+ LIST_FOREACH(ctx, &domain->iodom.contexts, link) {
+ if (ctx->rid == rid)
+ return (IOCTX2CTX(ctx));
+ }
+ }
+ return (NULL);
+}
+
+struct amdiommu_domain *
+amdiommu_find_domain(struct amdiommu_unit *unit, uint16_t rid)
+{
+ struct amdiommu_domain *domain;
+ struct iommu_ctx *ctx;
+
+ AMDIOMMU_LOCK(unit);
+ LIST_FOREACH(domain, &unit->domains, link) {
+ LIST_FOREACH(ctx, &domain->iodom.contexts, link) {
+ if (ctx->rid == rid)
+ break;
+ }
+ }
+ AMDIOMMU_UNLOCK(unit);
+ return (domain);
+}
+
+static void
+amdiommu_free_ctx_locked(struct amdiommu_unit *unit, struct amdiommu_ctx *ctx)
+{
+ struct amdiommu_dte *dtep;
+ struct amdiommu_domain *domain;
+
+ AMDIOMMU_ASSERT_LOCKED(unit);
+ KASSERT(ctx->context.refs >= 1,
+ ("amdiommu %p ctx %p refs %u", unit, ctx, ctx->context.refs));
+
+ /*
+ * If our reference is not last, only the dereference should
+ * be performed.
+ */
+ if (ctx->context.refs > 1) {
+ ctx->context.refs--;
+ AMDIOMMU_UNLOCK(unit);
+ return;
+ }
+
+ KASSERT((ctx->context.flags & IOMMU_CTX_DISABLED) == 0,
+ ("lost ref on disabled ctx %p", ctx));
+
+ /*
+ * Otherwise, the device table entry must be cleared before
+ * the page table is destroyed.
+ */
+ dtep = amdiommu_get_dtep(ctx);
+ dtep->v = 0;
+ atomic_thread_fence_rel();
+ memset(dtep, 0, sizeof(*dtep));
+
+ domain = CTX2DOM(ctx);
+ amdiommu_qi_invalidate_ctx_locked_nowait(ctx);
+ amdiommu_qi_invalidate_ir_locked_nowait(unit, ctx->context.rid);
+ amdiommu_qi_invalidate_all_pages_locked_nowait(domain);
+ amdiommu_qi_invalidate_wait_sync(AMD2IOMMU(CTX2AMD(ctx)));
+
+ if (unit->irte_enabled)
+ amdiommu_ctx_fini_irte(ctx);
+
+ amdiommu_ctx_unlink(ctx);
+ free(ctx->context.tag, M_AMDIOMMU_CTX);
+ free(ctx, M_AMDIOMMU_CTX);
+ amdiommu_unref_domain_locked(unit, domain);
+}
+
+static void
+amdiommu_free_ctx(struct amdiommu_ctx *ctx)
+{
+ struct amdiommu_unit *unit;
+
+ unit = CTX2AMD(ctx);
+ AMDIOMMU_LOCK(unit);
+ amdiommu_free_ctx_locked(unit, ctx);
+}
+
+static void
+amdiommu_unref_domain_locked(struct amdiommu_unit *unit,
+ struct amdiommu_domain *domain)
+{
+ AMDIOMMU_ASSERT_LOCKED(unit);
+ KASSERT(domain->refs >= 1,
+ ("amdiommu%d domain %p refs %u", unit->iommu.unit, domain,
+ domain->refs));
+ KASSERT(domain->refs > domain->ctx_cnt,
+ ("amdiommu%d domain %p refs %d ctx_cnt %d", unit->iommu.unit,
+ domain, domain->refs, domain->ctx_cnt));
+
+ if (domain->refs > 1) {
+ domain->refs--;
+ AMDIOMMU_UNLOCK(unit);
+ return;
+ }
+
+ LIST_REMOVE(domain, link);
+ AMDIOMMU_UNLOCK(unit);
+
+ taskqueue_drain(unit->iommu.delayed_taskqueue,
+ &domain->iodom.unload_task);
+ amdiommu_domain_destroy(domain);
+}
+
+static void
+dte_entry_init_one(struct amdiommu_dte *dtep, struct amdiommu_ctx *ctx,
+ vm_page_t pgtblr, uint8_t dte, uint32_t edte)
+{
+ struct amdiommu_domain *domain;
+ struct amdiommu_unit *unit;
+
+ domain = CTX2DOM(ctx);
+ unit = DOM2AMD(domain);
+
+ dtep->tv = 1;
+ /* dtep->had not used for now */
+ dtep->ir = 1;
+ dtep->iw = 1;
+ dtep->domainid = domain->domain;
+ dtep->pioctl = AMDIOMMU_DTE_PIOCTL_DIS;
+
+ /* fill device interrupt passing hints from IVHD. */
+ dtep->initpass = (dte & ACPI_IVHD_INIT_PASS) != 0;
+ dtep->eintpass = (dte & ACPI_IVHD_EINT_PASS) != 0;
+ dtep->nmipass = (dte & ACPI_IVHD_NMI_PASS) != 0;
+ dtep->sysmgt = (dte & ACPI_IVHD_SYSTEM_MGMT) >> 4;
+ dtep->lint0pass = (dte & ACPI_IVHD_LINT0_PASS) != 0;
+ dtep->lint1pass = (dte & ACPI_IVHD_LINT1_PASS) != 0;
+
+ if (unit->irte_enabled) {
+ dtep->iv = 1;
+ dtep->i = 0;
+ dtep->inttablen = ilog2(unit->irte_nentries);
+ dtep->intrroot = pmap_kextract(unit->irte_x2apic ?
+ (vm_offset_t)ctx->irtx2 :
+ (vm_offset_t)ctx->irtb) >> 6;
+
+ dtep->intctl = AMDIOMMU_DTE_INTCTL_MAP;
+ }
+
+ if ((DOM2IODOM(domain)->flags & IOMMU_DOMAIN_IDMAP) != 0) {
+ dtep->pgmode = AMDIOMMU_DTE_PGMODE_1T1;
+ } else {
+ MPASS(domain->pglvl > 0 && domain->pglvl <=
+ AMDIOMMU_PGTBL_MAXLVL);
+ dtep->pgmode = domain->pglvl;
+ dtep->ptroot = VM_PAGE_TO_PHYS(pgtblr) >> 12;
+ }
+
+ atomic_thread_fence_rel();
+ dtep->v = 1;
+}
+
+static void
+dte_entry_init(struct amdiommu_ctx *ctx, bool move, uint8_t dte, uint32_t edte)
+{
+ struct amdiommu_dte *dtep;
+ struct amdiommu_unit *unit;
+ struct amdiommu_domain *domain;
+ int i;
+
+ domain = CTX2DOM(ctx);
+ unit = DOM2AMD(domain);
+
+ dtep = amdiommu_get_dtep(ctx);
+ KASSERT(dtep->v == 0,
+ ("amdiommu%d initializing valid dte @%p %#jx",
+ CTX2AMD(ctx)->iommu.unit, dtep, (uintmax_t)(*(uint64_t *)dtep)));
+
+ if (iommu_is_buswide_ctx(AMD2IOMMU(unit),
+ PCI_RID2BUS(ctx->context.rid))) {
+ MPASS(!move);
+ for (i = 0; i <= PCI_BUSMAX; i++) {
+ dte_entry_init_one(&dtep[i], ctx, domain->pgtblr,
+ dte, edte);
+ }
+ } else {
+ dte_entry_init_one(dtep, ctx, domain->pgtblr, dte, edte);
+ }
+}
+
+struct amdiommu_ctx *
+amdiommu_get_ctx_for_dev(struct amdiommu_unit *unit, device_t dev, uint16_t rid,
+ int dev_domain, bool id_mapped, bool rmrr_init, uint8_t dte, uint32_t edte)
+{
+ struct amdiommu_domain *domain, *domain1;
+ struct amdiommu_ctx *ctx, *ctx1;
+ int bus, slot, func;
+
+ if (dev != NULL) {
+ bus = pci_get_bus(dev);
+ slot = pci_get_slot(dev);
+ func = pci_get_function(dev);
+ } else {
+ bus = PCI_RID2BUS(rid);
+ slot = PCI_RID2SLOT(rid);
+ func = PCI_RID2FUNC(rid);
+ }
+ AMDIOMMU_LOCK(unit);
+ KASSERT(!iommu_is_buswide_ctx(AMD2IOMMU(unit), bus) ||
+ (slot == 0 && func == 0),
+ ("iommu%d pci%d:%d:%d get_ctx for buswide", AMD2IOMMU(unit)->unit,
+ bus, slot, func));
+ ctx = amdiommu_find_ctx_locked(unit, rid);
+ if (ctx == NULL) {
+ /*
+ * Perform the allocations which require sleep or have
+ * higher chance to succeed if the sleep is allowed.
+ */
+ AMDIOMMU_UNLOCK(unit);
+ domain1 = amdiommu_domain_alloc(unit, id_mapped);
+ if (domain1 == NULL)
+ return (NULL);
+ if (!id_mapped) {
+ /*
+ * XXXKIB IVMD seems to be less significant
+ * and less used on AMD than RMRR on Intel.
+ * Not implemented for now.
+ */
+ }
+ ctx1 = amdiommu_ctx_alloc(domain1, rid);
+ amdiommu_ctx_init_irte(ctx1);
+ AMDIOMMU_LOCK(unit);
+
+ /*
+ * Recheck the contexts, other thread might have
+ * already allocated needed one.
+ */
+ ctx = amdiommu_find_ctx_locked(unit, rid);
+ if (ctx == NULL) {
+ domain = domain1;
+ ctx = ctx1;
+ amdiommu_ctx_link(ctx);
+ ctx->context.tag->owner = dev;
+ iommu_device_tag_init(CTX2IOCTX(ctx), dev);
+
+ LIST_INSERT_HEAD(&unit->domains, domain, link);
+ dte_entry_init(ctx, false, dte, edte);
+ amdiommu_qi_invalidate_ctx_locked(ctx);
+ if (dev != NULL) {
+ device_printf(dev,
+ "amdiommu%d pci%d:%d:%d:%d rid %x domain %d "
+ "%s-mapped\n",
+ AMD2IOMMU(unit)->unit, unit->unit_dom,
+ bus, slot, func, rid, domain->domain,
+ id_mapped ? "id" : "re");
+ }
+ } else {
+ amdiommu_domain_destroy(domain1);
+ /* Nothing needs to be done to destroy ctx1. */
+ free(ctx1, M_AMDIOMMU_CTX);
+ domain = CTX2DOM(ctx);
+ ctx->context.refs++; /* tag referenced us */
+ }
+ } else {
+ domain = CTX2DOM(ctx);
+ if (ctx->context.tag->owner == NULL)
+ ctx->context.tag->owner = dev;
+ ctx->context.refs++; /* tag referenced us */
+ }
+ AMDIOMMU_UNLOCK(unit);
+
+ return (ctx);
+}
+
+struct iommu_ctx *
+amdiommu_get_ctx(struct iommu_unit *iommu, device_t dev, uint16_t rid,
+ bool id_mapped, bool rmrr_init)
+{
+ struct amdiommu_unit *unit;
+ struct amdiommu_ctx *ret;
+ int error;
+ uint32_t edte;
+ uint16_t rid1;
+ uint8_t dte;
+
+ error = amdiommu_find_unit(dev, &unit, &rid1, &dte, &edte,
+ bootverbose);
+ if (error != 0)
+ return (NULL);
+ if (AMD2IOMMU(unit) != iommu) /* XXX complain loudly */
+ return (NULL);
+ ret = amdiommu_get_ctx_for_dev(unit, dev, rid1, pci_get_domain(dev),
+ id_mapped, rmrr_init, dte, edte);
+ return (CTX2IOCTX(ret));
+}
+
+void
+amdiommu_free_ctx_locked_method(struct iommu_unit *iommu,
+ struct iommu_ctx *context)
+{
+ struct amdiommu_unit *unit;
+ struct amdiommu_ctx *ctx;
+
+ unit = IOMMU2AMD(iommu);
+ ctx = IOCTX2CTX(context);
+ amdiommu_free_ctx_locked(unit, ctx);
+}
+
+void
+amdiommu_free_ctx_method(struct iommu_ctx *context)
+{
+ struct amdiommu_ctx *ctx;
+
+ ctx = IOCTX2CTX(context);
+ amdiommu_free_ctx(ctx);
+}
diff --git a/sys/x86/iommu/amd_drv.c b/sys/x86/iommu/amd_drv.c
new file mode 100644
--- /dev/null
+++ b/sys/x86/iommu/amd_drv.c
@@ -0,0 +1,1205 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 The FreeBSD Foundation
+ *
+ * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "opt_acpi.h"
+#include "opt_ddb.h"
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/domainset.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/memdesc.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/rman.h>
+#include <sys/rwlock.h>
+#include <sys/smp.h>
+#include <sys/taskqueue.h>
+#include <sys/tree.h>
+#include <sys/vmem.h>
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pageout.h>
+#include <vm/vm_pager.h>
+#include <contrib/dev/acpica/include/acpi.h>
+#include <contrib/dev/acpica/include/accommon.h>
+#include <dev/acpica/acpivar.h>
+#include <dev/pci/pcireg.h>
+#include <dev/pci/pcivar.h>
+#include <machine/bus.h>
+#include <machine/pci_cfgreg.h>
+#include "pcib_if.h"
+#include <machine/intr_machdep.h>
+#include <machine/md_var.h>
+#include <machine/cputypes.h>
+#include <x86/apicreg.h>
+#include <x86/apicvar.h>
+#include <dev/iommu/iommu.h>
+#include <x86/iommu/amd_reg.h>
+#include <x86/iommu/x86_iommu.h>
+#include <x86/iommu/amd_iommu.h>
+
+static int amdiommu_enable = 0;
+
+/*
+ * All enumerated AMD IOMMU units.
+ * Access is unlocked, the list is not modified after early
+ * single-threaded startup.
+ */
+static TAILQ_HEAD(, amdiommu_unit) amdiommu_units =
+ TAILQ_HEAD_INITIALIZER(amdiommu_units);
+
+static u_int
+ivrs_info_to_unit_id(UINT32 info)
+{
+ return ((info & ACPI_IVHD_UNIT_ID_MASK) >> 8);
+}
+
+typedef bool (*amdiommu_itercc_t)(void *, void *);
+typedef bool (*amdiommu_iter40_t)(ACPI_IVRS_HARDWARE2 *, void *);
+typedef bool (*amdiommu_iter11_t)(ACPI_IVRS_HARDWARE2 *, void *);
+typedef bool (*amdiommu_iter10_t)(ACPI_IVRS_HARDWARE1 *, void *);
+
+static bool
+amdiommu_ivrs_iterate_tbl_typed(amdiommu_itercc_t iter, void *arg,
+ int type, ACPI_TABLE_IVRS *ivrs_tbl)
+{
+ char *ptr, *ptrend;
+ bool done;
+
+ done = false;
+ ptr = (char *)ivrs_tbl + sizeof(*ivrs_tbl);
+ ptrend = (char *)ivrs_tbl + ivrs_tbl->Header.Length;
+ for (;;) {
+ ACPI_IVRS_HEADER *ivrsh;
+
+ if (ptr >= ptrend)
+ break;
+ ivrsh = (ACPI_IVRS_HEADER *)ptr;
+ if (ivrsh->Length <= 0) {
+ printf("amdiommu_iterate_tbl: corrupted IVRS table, "
+ "length %d\n", ivrsh->Length);
+ break;
+ }
+ ptr += ivrsh->Length;
+ if (ivrsh->Type == type) {
+ done = iter((void *)ivrsh, arg);
+ if (done)
+ break;
+ }
+ }
+ return (done);
+}
+
+/*
+ * Walk over IVRS, calling callback iterators following priority:
+ * 0x40, then 0x11, then 0x10 subtable. First iterator returning true
+ * ends the walk.
+ * Returns true if any iterator returned true, otherwise false.
+ */
+static bool
+amdiommu_ivrs_iterate_tbl(amdiommu_iter40_t iter40, amdiommu_iter11_t iter11,
+ amdiommu_iter10_t iter10, void *arg)
+{
+ ACPI_TABLE_IVRS *ivrs_tbl;
+ ACPI_STATUS status;
+ bool done;
+
+ status = AcpiGetTable(ACPI_SIG_IVRS, 1,
+ (ACPI_TABLE_HEADER **)&ivrs_tbl);
+ if (ACPI_FAILURE(status))
+ return (false);
+ done = false;
+ if (iter40 != NULL)
+ done = amdiommu_ivrs_iterate_tbl_typed(
+ (amdiommu_itercc_t)iter40, arg,
+ ACPI_IVRS_TYPE_HARDWARE3, ivrs_tbl);
+ if (!done && iter11 != NULL)
+ done = amdiommu_ivrs_iterate_tbl_typed(
+ (amdiommu_itercc_t)iter11, arg, ACPI_IVRS_TYPE_HARDWARE2,
+ ivrs_tbl);
+ if (!done && iter10 != NULL)
+ done = amdiommu_ivrs_iterate_tbl_typed(
+ (amdiommu_itercc_t)iter10, arg, ACPI_IVRS_TYPE_HARDWARE1,
+ ivrs_tbl);
+ AcpiPutTable((ACPI_TABLE_HEADER *)ivrs_tbl);
+ return (done);
+}
+
+struct ivhd_lookup_data {
+ struct amdiommu_unit *sc;
+ uint16_t devid;
+};
+
+static bool
+ivrs_lookup_ivhd_0x40(ACPI_IVRS_HARDWARE2 *h2, void *arg)
+{
+ struct ivhd_lookup_data *ildp;
+
+ KASSERT(h2->Header.Type == ACPI_IVRS_TYPE_HARDWARE2 ||
+ h2->Header.Type == ACPI_IVRS_TYPE_HARDWARE3,
+ ("Misparsed IVHD, h2 type %#x", h2->Header.Type));
+
+ ildp = arg;
+ if (h2->Header.DeviceId != ildp->devid)
+ return (false);
+
+ ildp->sc->unit_dom = h2->PciSegmentGroup;
+ ildp->sc->iommu.unit = ivrs_info_to_unit_id(h2->Info);
+ ildp->sc->efr = h2->EfrRegisterImage;
+ return (true);
+}
+
+static bool
+ivrs_lookup_ivhd_0x10(ACPI_IVRS_HARDWARE1 *h1, void *arg)
+{
+ struct ivhd_lookup_data *ildp;
+
+ KASSERT(h1->Header.Type == ACPI_IVRS_TYPE_HARDWARE1,
+ ("Misparsed IVHD, h1 type %#x", h1->Header.Type));
+
+ ildp = arg;
+ if (h1->Header.DeviceId != ildp->devid)
+ return (false);
+
+ ildp->sc->unit_dom = h1->PciSegmentGroup;
+ ildp->sc->iommu.unit = ivrs_info_to_unit_id(h1->Info);
+ return (true);
+}
+
+static u_int
+amdiommu_devtbl_sz(struct amdiommu_unit *sc __unused)
+{
+ return (sizeof(struct amdiommu_dte) * (1 << 16));
+}
+
+static void
+amdiommu_free_dev_tbl(struct amdiommu_unit *sc)
+{
+ u_int devtbl_sz;
+
+ devtbl_sz = amdiommu_devtbl_sz(sc);
+ pmap_qremove((vm_offset_t)sc->dev_tbl, atop(devtbl_sz));
+ kva_free((vm_offset_t)sc->dev_tbl, devtbl_sz);
+ sc->dev_tbl = NULL;
+ vm_object_deallocate(sc->devtbl_obj);
+ sc->devtbl_obj = NULL;
+}
+
+static int
+amdiommu_create_dev_tbl(struct amdiommu_unit *sc)
+{
+ vm_offset_t seg_vaddr;
+ u_int devtbl_sz, dom, i, reclaimno, segnum_log, segnum, seg_sz;
+ int error;
+
+ segnum_log = (sc->efr & AMDIOMMU_EFR_DEVTBLSEG_MASK) >>
+ AMDIOMMU_EFR_DEVTBLSEG_SHIFT;
+ segnum = 1 << segnum_log;
+
+ devtbl_sz = amdiommu_devtbl_sz(sc);
+ seg_sz = devtbl_sz / segnum;
+ sc->devtbl_obj = vm_pager_allocate(OBJT_PHYS, NULL, atop(devtbl_sz),
+ VM_PROT_ALL, 0, NULL);
+ if (bus_get_domain(sc->iommu.dev, &dom) == 0)
+ sc->devtbl_obj->domain.dr_policy = DOMAINSET_PREF(dom);
+
+ sc->hw_ctrl &= ~AMDIOMMU_CTRL_DEVTABSEG_MASK;
+ sc->hw_ctrl |= (uint64_t)segnum_log << ilog2(AMDIOMMU_CTRL_DEVTABSEG_2);
+ sc->hw_ctrl |= AMDIOMMU_CTRL_COHERENT;
+ amdiommu_write8(sc, AMDIOMMU_CTRL, sc->hw_ctrl);
+
+ seg_vaddr = kva_alloc(devtbl_sz);
+ if (seg_vaddr == 0)
+ return (ENOMEM);
+ sc->dev_tbl = (void *)seg_vaddr;
+
+ for (i = 0; i < segnum; i++) {
+ vm_page_t m;
+ uint64_t rval;
+ u_int reg;
+
+ for (reclaimno = 0; reclaimno < 3; reclaimno++) {
+ VM_OBJECT_WLOCK(sc->devtbl_obj);
+ m = vm_page_alloc_contig(sc->devtbl_obj,
+ i * atop(seg_sz),
+ VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY,
+ atop(seg_sz), 0, ~0ul, IOMMU_PAGE_SIZE, 0,
+ VM_MEMATTR_DEFAULT);
+ VM_OBJECT_WUNLOCK(sc->devtbl_obj);
+ if (m != NULL)
+ break;
+ error = vm_page_reclaim_contig(VM_ALLOC_NORMAL,
+ atop(seg_sz), 0, ~0ul, IOMMU_PAGE_SIZE, 0);
+ if (error != 0)
+ vm_wait(sc->devtbl_obj);
+ }
+ if (m == NULL) {
+ amdiommu_free_dev_tbl(sc);
+ return (ENOMEM);
+ }
+
+ rval = VM_PAGE_TO_PHYS(m) | (atop(seg_sz) - 1);
+ for (u_int j = 0; j < atop(seg_sz);
+ j++, seg_vaddr += PAGE_SIZE, m++) {
+ pmap_zero_page(m);
+ pmap_qenter(seg_vaddr, &m, 1);
+ }
+ reg = i == 0 ? AMDIOMMU_DEVTAB_BASE : AMDIOMMU_DEVTAB_S1_BASE +
+ i - 1;
+ amdiommu_write8(sc, reg, rval);
+ }
+
+ return (0);
+}
+
+static int
+amdiommu_cmd_event_intr(void *arg)
+{
+ struct amdiommu_unit *unit;
+ uint64_t status;
+
+ unit = arg;
+ status = amdiommu_read8(unit, AMDIOMMU_CMDEV_STATUS);
+ if ((status & AMDIOMMU_CMDEVS_COMWAITINT) != 0) {
+ amdiommu_write8(unit, AMDIOMMU_CMDEV_STATUS,
+ AMDIOMMU_CMDEVS_COMWAITINT);
+ taskqueue_enqueue(unit->x86c.qi_taskqueue,
+ &unit->x86c.qi_task);
+ }
+ if ((status & (AMDIOMMU_CMDEVS_EVLOGINT |
+ AMDIOMMU_CMDEVS_EVOVRFLW)) != 0)
+ amdiommu_event_intr(unit, status);
+ return (FILTER_HANDLED);
+}
+
+static int
+amdiommu_setup_intr(struct amdiommu_unit *sc)
+{
+ int error, msi_count, msix_count;
+
+ msi_count = pci_msi_count(sc->iommu.dev);
+ msix_count = pci_msix_count(sc->iommu.dev);
+ if (msi_count == 0 && msix_count == 0) {
+ device_printf(sc->iommu.dev, "needs MSI-class intr\n");
+ return (ENXIO);
+ }
+
+#if 0
+ /*
+ * XXXKIB how MSI-X is supposed to be organized for BAR-less
+ * function? Practically available hardware implements only
+ * one IOMMU unit per function, and uses MSI.
+ */
+ if (msix_count > 0) {
+ sc->msix_table = bus_alloc_resource_any(sc->iommu.dev,
+ SYS_RES_MEMORY, &sc->msix_tab_rid, RF_ACTIVE);
+ if (sc->msix_table == NULL)
+ return (ENXIO);
+
+ if (sc->msix_pba_rid != sc->msix_tab_rid) {
+ /* Separate BAR for PBA */
+ sc->msix_pba = bus_alloc_resource_any(sc->iommu.dev,
+ SYS_RES_MEMORY,
+ &sc->msix_pba_rid, RF_ACTIVE);
+ if (sc->msix_pba == NULL) {
+ bus_release_resource(sc->iommu.dev,
+ SYS_RES_MEMORY, &sc->msix_tab_rid,
+ sc->msix_table);
+ return (ENXIO);
+ }
+ }
+ }
+#endif
+
+ error = ENXIO;
+ if (msix_count > 0) {
+ error = pci_alloc_msix(sc->iommu.dev, &msix_count);
+ if (error == 0)
+ sc->numirqs = msix_count;
+ }
+ if (error != 0 && msi_count > 0) {
+ error = pci_alloc_msi(sc->iommu.dev, &msi_count);
+ if (error == 0)
+ sc->numirqs = msi_count;
+ }
+ if (error != 0) {
+ device_printf(sc->iommu.dev,
+ "Failed to allocate MSI/MSI-x (%d)\n", error);
+ return (ENXIO);
+ }
+
+ /*
+ * XXXKIB Spec states that MISC0.MsiNum must be zero for IOMMU
+ * using MSI interrupts. But at least one BIOS programmed '2'
+ * there, making driver use wrong rid and causing
+ * command/event interrupt ignored as stray. Try to fix it
+ * with dirty force by assuming MsiNum is zero for MSI.
+ */
+ sc->irq_cmdev_rid = 1;
+ if (msix_count > 0) {
+ sc->irq_cmdev_rid += pci_read_config(sc->iommu.dev,
+ sc->seccap_reg + PCIR_AMDIOMMU_MISC0, 4) &
+ PCIM_AMDIOMMU_MISC0_MSINUM_MASK;
+ }
+
+ sc->irq_cmdev = bus_alloc_resource_any(sc->iommu.dev, SYS_RES_IRQ,
+ &sc->irq_cmdev_rid, RF_SHAREABLE | RF_ACTIVE);
+ if (sc->irq_cmdev == NULL) {
+ device_printf(sc->iommu.dev,
+ "unable to map CMD/EV interrupt\n");
+ return (ENXIO);
+ }
+ error = bus_setup_intr(sc->iommu.dev, sc->irq_cmdev,
+ INTR_TYPE_MISC, amdiommu_cmd_event_intr, NULL, sc,
+ &sc->irq_cmdev_cookie);
+ if (error != 0) {
+ device_printf(sc->iommu.dev,
+ "unable to setup interrupt (%d)\n", error);
+ return (ENXIO);
+ }
+ bus_describe_intr(sc->iommu.dev, sc->irq_cmdev, sc->irq_cmdev_cookie,
+ "cmdev");
+
+ if (x2apic_mode) {
+ AMDIOMMU_LOCK(sc);
+ sc->hw_ctrl |= AMDIOMMU_CTRL_GA_EN | AMDIOMMU_CTRL_XT_EN;
+ amdiommu_write8(sc, AMDIOMMU_CTRL, sc->hw_ctrl);
+ // XXXKIB AMDIOMMU_CTRL_INTCAPXT_EN and program x2APIC_CTRL
+ AMDIOMMU_UNLOCK(sc);
+ }
+
+ return (0);
+}
+
+static int
+amdiommu_probe(device_t dev)
+{
+ int seccap_reg;
+ int error;
+ uint32_t cap_h, cap_type, cap_rev;
+
+ if (acpi_disabled("amdiommu"))
+ return (ENXIO);
+ TUNABLE_INT_FETCH("hw.amdiommu.enable", &amdiommu_enable);
+ if (!amdiommu_enable)
+ return (ENXIO);
+ if (pci_get_class(dev) != PCIC_BASEPERIPH ||
+ pci_get_subclass(dev) != PCIS_BASEPERIPH_IOMMU)
+ return (ENXIO);
+
+ error = pci_find_cap(dev, PCIY_SECDEV, &seccap_reg);
+ if (error != 0 || seccap_reg == 0)
+ return (ENXIO);
+
+ cap_h = pci_read_config(dev, seccap_reg + PCIR_AMDIOMMU_CAP_HEADER,
+ 4);
+ cap_type = cap_h & PCIM_AMDIOMMU_CAP_TYPE_MASK;
+ cap_rev = cap_h & PCIM_AMDIOMMU_CAP_REV_MASK;
+ if (cap_type != PCIM_AMDIOMMU_CAP_TYPE_VAL &&
+ cap_rev != PCIM_AMDIOMMU_CAP_REV_VAL)
+ return (ENXIO);
+
+ device_set_desc(dev, "DMA remap");
+ return (BUS_PROBE_SPECIFIC);
+}
+
+static int
+amdiommu_attach(device_t dev)
+{
+ struct amdiommu_unit *sc;
+ struct ivhd_lookup_data ild;
+ int error;
+ uint32_t base_low, base_high;
+ bool res;
+
+ sc = device_get_softc(dev);
+ sc->iommu.dev = dev;
+
+ error = pci_find_cap(dev, PCIY_SECDEV, &sc->seccap_reg);
+ if (error != 0 || sc->seccap_reg == 0)
+ return (ENXIO);
+
+ base_low = pci_read_config(dev, sc->seccap_reg +
+ PCIR_AMDIOMMU_BASE_LOW, 4);
+ base_high = pci_read_config(dev, sc->seccap_reg +
+ PCIR_AMDIOMMU_BASE_HIGH, 4);
+ sc->mmio_base = (base_low & PCIM_AMDIOMMU_BASE_LOW_ADDRM) |
+ ((uint64_t)base_high << 32);
+
+ sc->device_id = pci_get_rid(dev);
+ ild.sc = sc;
+ ild.devid = sc->device_id;
+ res = amdiommu_ivrs_iterate_tbl(ivrs_lookup_ivhd_0x40,
+ ivrs_lookup_ivhd_0x40, ivrs_lookup_ivhd_0x10, &ild);
+ if (!res) {
+ device_printf(dev, "Cannot find IVHD\n");
+ return (ENXIO);
+ }
+
+ mtx_init(&sc->iommu.lock, "amdihw", NULL, MTX_DEF);
+ sc->domids = new_unrhdr(0, 0xffff, &sc->iommu.lock);
+ LIST_INIT(&sc->domains);
+ sysctl_ctx_init(&sc->iommu.sysctl_ctx);
+
+ sc->mmio_sz = ((sc->efr & AMDIOMMU_EFR_PC_SUP) != 0 ? 512 : 16) *
+ 1024;
+
+ sc->mmio_rid = AMDIOMMU_RID;
+ error = bus_set_resource(dev, SYS_RES_MEMORY, AMDIOMMU_RID,
+ sc->mmio_base, sc->mmio_sz);
+ if (error != 0) {
+ device_printf(dev,
+ "bus_set_resource %#jx-%#jx failed, error %d\n",
+ (uintmax_t)sc->mmio_base, (uintmax_t)sc->mmio_base +
+ sc->mmio_sz, error);
+ error = ENXIO;
+ goto errout1;
+ }
+ sc->mmio_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &sc->mmio_rid,
+ sc->mmio_base, sc->mmio_base + sc->mmio_sz - 1, sc->mmio_sz,
+ RF_ALLOCATED | RF_ACTIVE | RF_SHAREABLE);
+ if (sc->mmio_res == NULL) {
+ device_printf(dev,
+ "bus_alloc_resource %#jx-%#jx failed\n",
+ (uintmax_t)sc->mmio_base, (uintmax_t)sc->mmio_base +
+ sc->mmio_sz);
+ error = ENXIO;
+ goto errout2;
+ }
+
+ sc->hw_ctrl = amdiommu_read8(sc, AMDIOMMU_CTRL);
+ if (bootverbose)
+ device_printf(dev, "ctrl reg %#jx\n", (uintmax_t)sc->hw_ctrl);
+ if ((sc->hw_ctrl & AMDIOMMU_CTRL_EN) != 0) {
+ device_printf(dev, "CTRL_EN is set, bailing out\n");
+ error = EBUSY;
+ goto errout2;
+ }
+
+ iommu_high = BUS_SPACE_MAXADDR;
+
+ error = amdiommu_create_dev_tbl(sc);
+ if (error != 0)
+ goto errout3;
+
+ error = amdiommu_init_cmd(sc);
+ if (error != 0)
+ goto errout4;
+
+ error = amdiommu_init_event(sc);
+ if (error != 0)
+ goto errout5;
+
+ error = amdiommu_setup_intr(sc);
+ if (error != 0)
+ goto errout6;
+
+ error = iommu_init_busdma(AMD2IOMMU(sc));
+ if (error != 0)
+ goto errout7;
+
+ error = amdiommu_init_irt(sc);
+ if (error != 0)
+ goto errout8;
+
+ /*
+ * Unlike DMAR, AMD IOMMU does not process command queue
+ * unless IOMMU is enabled. But since non-present devtab
+ * entry makes IOMMU ignore transactions from corresponding
+ * initiator, de-facto IOMMU operations are disabled for the
+ * DMA and intr remapping.
+ */
+ AMDIOMMU_LOCK(sc);
+ sc->hw_ctrl |= AMDIOMMU_CTRL_EN;
+ amdiommu_write8(sc, AMDIOMMU_CTRL, sc->hw_ctrl);
+ if (bootverbose) {
+ printf("amdiommu%d: enabled translation\n",
+ AMD2IOMMU(sc)->unit);
+ }
+ AMDIOMMU_UNLOCK(sc);
+
+ TAILQ_INSERT_TAIL(&amdiommu_units, sc, unit_next);
+ return (0);
+
+errout8:
+ iommu_fini_busdma(&sc->iommu);
+errout7:
+ pci_release_msi(dev);
+errout6:
+ amdiommu_fini_event(sc);
+errout5:
+ amdiommu_fini_cmd(sc);
+errout4:
+ amdiommu_free_dev_tbl(sc);
+errout3:
+ bus_release_resource(dev, SYS_RES_MEMORY, sc->mmio_rid, sc->mmio_res);
+errout2:
+ bus_delete_resource(dev, SYS_RES_MEMORY, sc->mmio_rid);
+errout1:
+ sysctl_ctx_free(&sc->iommu.sysctl_ctx);
+ delete_unrhdr(sc->domids);
+ mtx_destroy(&sc->iommu.lock);
+
+ return (error);
+}
+
+static int
+amdiommu_detach(device_t dev)
+{
+ return (EBUSY);
+}
+
+static int
+amdiommu_suspend(device_t dev)
+{
+ /* XXXKIB */
+ return (0);
+}
+
+static int
+amdiommu_resume(device_t dev)
+{
+ /* XXXKIB */
+ return (0);
+}
+
+static device_method_t amdiommu_methods[] = {
+ DEVMETHOD(device_probe, amdiommu_probe),
+ DEVMETHOD(device_attach, amdiommu_attach),
+ DEVMETHOD(device_detach, amdiommu_detach),
+ DEVMETHOD(device_suspend, amdiommu_suspend),
+ DEVMETHOD(device_resume, amdiommu_resume),
+ DEVMETHOD_END
+};
+
+static driver_t amdiommu_driver = {
+ "amdiommu",
+ amdiommu_methods,
+ sizeof(struct amdiommu_unit),
+};
+
+EARLY_DRIVER_MODULE(amdiommu, pci, amdiommu_driver, 0, 0, BUS_PASS_SUPPORTDEV);
+MODULE_DEPEND(amdiommu, pci, 1, 1, 1);
+
+static struct amdiommu_unit *
+amdiommu_unit_by_device_id(u_int pci_seg, u_int device_id)
+{
+ struct amdiommu_unit *unit;
+
+ TAILQ_FOREACH(unit, &amdiommu_units, unit_next) {
+ if (unit->unit_dom == pci_seg && unit->device_id == device_id)
+ return (unit);
+ }
+ return (NULL);
+}
+
+struct ivhd_find_unit {
+ u_int domain;
+ uintptr_t rid;
+ int devno;
+ enum {
+ IFU_DEV_PCI,
+ IFU_DEV_IOAPIC,
+ IFU_DEV_HPET,
+ } type;
+ u_int device_id;
+ uint16_t rid_real;
+ uint8_t dte;
+ uint32_t edte;
+};
+
+static bool
+amdiommu_find_unit_scan_ivrs(ACPI_IVRS_DE_HEADER *d, size_t tlen,
+ struct ivhd_find_unit *ifu)
+{
+ char *db, *de;
+ size_t len;
+
+ for (de = (char *)d + tlen; (char *)d < de;
+ d = (ACPI_IVRS_DE_HEADER *)(db + len)) {
+ db = (char *)d;
+ if (d->Type == ACPI_IVRS_TYPE_PAD4) {
+ len = sizeof(ACPI_IVRS_DEVICE4);
+ } else if (d->Type == ACPI_IVRS_TYPE_ALL) {
+ ACPI_IVRS_DEVICE4 *d4;
+
+ d4 = (ACPI_IVRS_DEVICE4 *)db;
+ len = sizeof(*d4);
+ ifu->dte = d4->Header.DataSetting;
+ } else if (d->Type == ACPI_IVRS_TYPE_SELECT) {
+ ACPI_IVRS_DEVICE4 *d4;
+
+ d4 = (ACPI_IVRS_DEVICE4 *)db;
+ if (d4->Header.Id == ifu->rid) {
+ ifu->dte = d4->Header.DataSetting;
+ ifu->rid_real = ifu->rid;
+ return (true);
+ }
+ len = sizeof(*d4);
+ } else if (d->Type == ACPI_IVRS_TYPE_START) {
+ ACPI_IVRS_DEVICE4 *d4, *d4n;
+
+ d4 = (ACPI_IVRS_DEVICE4 *)db;
+ d4n = d4 + 1;
+ if (d4n->Header.Type != ACPI_IVRS_TYPE_END) {
+ printf("IVRS dev4 start not followed by END "
+ "(%#x)\n", d4n->Header.Type);
+ return (false);
+ }
+ if (d4->Header.Id <= ifu->rid &&
+ ifu->rid <= d4n->Header.Id) {
+ ifu->dte = d4->Header.DataSetting;
+ ifu->rid_real = ifu->rid;
+ return (true);
+ }
+ len = 2 * sizeof(*d4);
+ } else if (d->Type == ACPI_IVRS_TYPE_PAD8) {
+ len = sizeof(ACPI_IVRS_DEVICE8A);
+ } else if (d->Type == ACPI_IVRS_TYPE_ALIAS_SELECT) {
+ ACPI_IVRS_DEVICE8A *d8a;
+
+ d8a = (ACPI_IVRS_DEVICE8A *)db;
+ if (d8a->Header.Id == ifu->rid) {
+ ifu->dte = d8a->Header.DataSetting;
+ ifu->rid_real = d8a->UsedId;
+ return (true);
+ }
+ len = sizeof(*d8a);
+ } else if (d->Type == ACPI_IVRS_TYPE_ALIAS_START) {
+ ACPI_IVRS_DEVICE8A *d8a;
+ ACPI_IVRS_DEVICE4 *d4;
+
+ d8a = (ACPI_IVRS_DEVICE8A *)db;
+ d4 = (ACPI_IVRS_DEVICE4 *)(d8a + 1);
+ if (d4->Header.Type != ACPI_IVRS_TYPE_END) {
+ printf("IVRS alias start not followed by END "
+ "(%#x)\n", d4->Header.Type);
+ return (false);
+ }
+ if (d8a->Header.Id <= ifu->rid &&
+ ifu->rid <= d4->Header.Id) {
+ ifu->dte = d8a->Header.DataSetting;
+ ifu->rid_real = d8a->UsedId;
+ return (true);
+ }
+ len = sizeof(*d8a) + sizeof(*d4);
+ } else if (d->Type == ACPI_IVRS_TYPE_EXT_SELECT) {
+ ACPI_IVRS_DEVICE8B *d8b;
+
+ d8b = (ACPI_IVRS_DEVICE8B *)db;
+ if (d8b->Header.Id == ifu->rid) {
+ ifu->dte = d8b->Header.DataSetting;
+ ifu->rid_real = ifu->rid;
+ ifu->edte = d8b->ExtendedData;
+ return (true);
+ }
+ len = sizeof(*d8b);
+ } else if (d->Type == ACPI_IVRS_TYPE_EXT_START) {
+ ACPI_IVRS_DEVICE8B *d8b;
+ ACPI_IVRS_DEVICE4 *d4;
+
+ d8b = (ACPI_IVRS_DEVICE8B *)db;
+ d4 = (ACPI_IVRS_DEVICE4 *)(db + sizeof(*d8b));
+ if (d4->Header.Type != ACPI_IVRS_TYPE_END) {
+ printf("IVRS ext start not followed by END "
+ "(%#x)\n", d4->Header.Type);
+ return (false);
+ }
+ if (d8b->Header.Id >= ifu->rid &&
+ ifu->rid <= d4->Header.Id) {
+ ifu->dte = d8b->Header.DataSetting;
+ ifu->rid_real = ifu->rid;
+ ifu->edte = d8b->ExtendedData;
+ return (true);
+ }
+ len = sizeof(*d8b) + sizeof(*d4);
+ } else if (d->Type == ACPI_IVRS_TYPE_SPECIAL) {
+ ACPI_IVRS_DEVICE8C *d8c;
+
+ d8c = (ACPI_IVRS_DEVICE8C *)db;
+ if (((ifu->type == IFU_DEV_IOAPIC &&
+ d8c->Variety == ACPI_IVHD_IOAPIC) ||
+ (ifu->type == IFU_DEV_HPET &&
+ d8c->Variety == ACPI_IVHD_HPET)) &&
+ ifu->devno == d8c->Handle) {
+ ifu->dte = d8c->Header.DataSetting;
+ ifu->rid_real = d8c->UsedId;
+ return (true);
+ }
+ len = sizeof(*d8c);
+ } else if (d->Type == ACPI_IVRS_TYPE_HID) {
+ ACPI_IVRS_DEVICE_HID *dh;
+
+ dh = (ACPI_IVRS_DEVICE_HID *)db;
+ len = sizeof(*dh) + dh->UidLength;
+ /* XXXKIB */
+ } else {
+#if 0
+ printf("amdiommu: unknown IVRS device entry type %#x\n",
+ d->Type);
+#endif
+ if (d->Type <= 63)
+ len = sizeof(ACPI_IVRS_DEVICE4);
+ else if (d->Type <= 127)
+ len = sizeof(ACPI_IVRS_DEVICE8A);
+ else {
+ printf("amdiommu: abort, cannot "
+ "advance iterator, item type %#x\n",
+ d->Type);
+ return (false);
+ }
+ }
+ }
+ return (false);
+}
+
+static bool
+amdiommu_find_unit_scan_0x11(ACPI_IVRS_HARDWARE2 *ivrs, void *arg)
+{
+ struct ivhd_find_unit *ifu = arg;
+ ACPI_IVRS_DE_HEADER *d;
+ bool res;
+
+ KASSERT(ivrs->Header.Type == ACPI_IVRS_TYPE_HARDWARE2 ||
+ ivrs->Header.Type == ACPI_IVRS_TYPE_HARDWARE3,
+ ("Misparsed IVHD h2, ivrs type %#x", ivrs->Header.Type));
+
+ if (ifu->domain != ivrs->PciSegmentGroup)
+ return (false);
+ d = (ACPI_IVRS_DE_HEADER *)(ivrs + 1);
+ res = amdiommu_find_unit_scan_ivrs(d, ivrs->Header.Length, ifu);
+ if (res)
+ ifu->device_id = ivrs->Header.DeviceId;
+ return (res);
+}
+
+static bool
+amdiommu_find_unit_scan_0x10(ACPI_IVRS_HARDWARE1 *ivrs, void *arg)
+{
+ struct ivhd_find_unit *ifu = arg;
+ ACPI_IVRS_DE_HEADER *d;
+ bool res;
+
+ KASSERT(ivrs->Header.Type == ACPI_IVRS_TYPE_HARDWARE1,
+ ("Misparsed IVHD h1, ivrs type %#x", ivrs->Header.Type));
+
+ if (ifu->domain != ivrs->PciSegmentGroup)
+ return (false);
+ d = (ACPI_IVRS_DE_HEADER *)(ivrs + 1);
+ res = amdiommu_find_unit_scan_ivrs(d, ivrs->Header.Length, ifu);
+ if (res)
+ ifu->device_id = ivrs->Header.DeviceId;
+ return (res);
+}
+
+static void
+amdiommu_dev_prop_dtr(device_t dev, const char *name, void *val, void *dtr_ctx)
+{
+ free(val, M_DEVBUF);
+}
+
+static int *
+amdiommu_dev_fetch_flagsp(struct amdiommu_unit *unit, device_t dev)
+{
+ int *flagsp, error;
+
+ bus_topo_assert();
+ error = device_get_prop(dev, device_get_nameunit(unit->iommu.dev),
+ (void **)&flagsp);
+ if (error == ENOENT) {
+ flagsp = malloc(sizeof(int), M_DEVBUF, M_WAITOK | M_ZERO);
+ device_set_prop(dev, device_get_nameunit(unit->iommu.dev),
+ flagsp, amdiommu_dev_prop_dtr, unit);
+ }
+ return (flagsp);
+}
+
+static int
+amdiommu_get_dev_prop_flags(struct amdiommu_unit *unit, device_t dev)
+{
+ int *flagsp, flags;
+
+ bus_topo_lock();
+ flagsp = amdiommu_dev_fetch_flagsp(unit, dev);
+ flags = *flagsp;
+ bus_topo_unlock();
+ return (flags);
+}
+
+static void
+amdiommu_set_dev_prop_flags(struct amdiommu_unit *unit, device_t dev,
+ int flag)
+{
+ int *flagsp;
+
+ bus_topo_lock();
+ flagsp = amdiommu_dev_fetch_flagsp(unit, dev);
+ *flagsp |= flag;
+ bus_topo_unlock();
+}
+
+int
+amdiommu_find_unit(device_t dev, struct amdiommu_unit **unitp, uint16_t *ridp,
+ uint8_t *dtep, uint32_t *edtep, bool verbose)
+{
+ struct ivhd_find_unit ifu;
+ struct amdiommu_unit *unit;
+ int error, flags;
+ bool res;
+
+ if (device_get_devclass(device_get_parent(dev)) !=
+ devclass_find("pci"))
+ return (ENXIO);
+
+ bzero(&ifu, sizeof(ifu));
+ ifu.type = IFU_DEV_PCI;
+
+ error = pci_get_id(dev, PCI_ID_RID, &ifu.rid);
+ if (error != 0) {
+ if (verbose)
+ device_printf(dev,
+ "amdiommu cannot get rid, error %d\n", error);
+ return (ENXIO);
+ }
+
+ ifu.domain = pci_get_domain(dev);
+ res = amdiommu_ivrs_iterate_tbl(amdiommu_find_unit_scan_0x11,
+ amdiommu_find_unit_scan_0x11, amdiommu_find_unit_scan_0x10, &ifu);
+ if (!res) {
+ if (verbose)
+ device_printf(dev,
+ "(%#06x:%#06x) amdiommu cannot match rid in IVHD\n",
+ ifu.domain, (unsigned)ifu.rid);
+ return (ENXIO);
+ }
+
+ unit = amdiommu_unit_by_device_id(ifu.domain, ifu.device_id);
+ if (unit == NULL) {
+ if (verbose)
+ device_printf(dev,
+ "(%#06x:%#06x) amdiommu cannot find unit\n",
+ ifu.domain, (unsigned)ifu.rid);
+ return (ENXIO);
+ }
+ *unitp = unit;
+ iommu_device_set_iommu_prop(dev, unit->iommu.dev);
+ if (ridp != NULL)
+ *ridp = ifu.rid_real;
+ if (dtep != NULL)
+ *dtep = ifu.dte;
+ if (edtep != NULL)
+ *edtep = ifu.edte;
+ if (verbose) {
+ flags = amdiommu_get_dev_prop_flags(unit, dev);
+ if ((flags & AMDIOMMU_DEV_REPORTED) == 0) {
+ amdiommu_set_dev_prop_flags(unit, dev,
+ AMDIOMMU_DEV_REPORTED);
+ device_printf(dev, "amdiommu%d "
+ "initiator rid %#06x dte %#x edte %#x\n",
+ unit->iommu.unit, ifu.rid_real, ifu.dte, ifu.edte);
+ }
+ }
+ return (0);
+}
+
+int
+amdiommu_find_unit_for_ioapic(int apic_id, struct amdiommu_unit **unitp,
+ uint16_t *ridp, uint8_t *dtep, uint32_t *edtep, bool verbose)
+{
+ struct ivhd_find_unit ifu;
+ struct amdiommu_unit *unit;
+ device_t apic_dev;
+ bool res;
+
+ bzero(&ifu, sizeof(ifu));
+ ifu.type = IFU_DEV_IOAPIC;
+ ifu.devno = apic_id;
+ ifu.rid = -1;
+
+ res = amdiommu_ivrs_iterate_tbl(amdiommu_find_unit_scan_0x11,
+ amdiommu_find_unit_scan_0x11, amdiommu_find_unit_scan_0x10, &ifu);
+ if (!res) {
+ if (verbose)
+ printf("amdiommu cannot match ioapic no %d in IVHD\n",
+ apic_id);
+ return (ENXIO);
+ }
+
+ unit = amdiommu_unit_by_device_id(0, ifu.device_id);
+ apic_dev = ioapic_get_dev(apic_id);
+ if (apic_dev != NULL)
+ iommu_device_set_iommu_prop(apic_dev, unit->iommu.dev);
+ if (unit == NULL) {
+ if (verbose)
+ printf("amdiommu cannot find unit by dev id %#x\n",
+ ifu.device_id);
+ return (ENXIO);
+ }
+ *unitp = unit;
+ if (ridp != NULL)
+ *ridp = ifu.rid_real;
+ if (dtep != NULL)
+ *dtep = ifu.dte;
+ if (edtep != NULL)
+ *edtep = ifu.edte;
+ if (verbose) {
+ printf("amdiommu%d IOAPIC %d "
+ "initiator rid %#06x dte %#x edte %#x\n",
+ unit->iommu.unit, apic_id, ifu.rid_real, ifu.dte,
+ ifu.edte);
+ }
+ return (0);
+}
+
+int
+amdiommu_find_unit_for_hpet(device_t hpet, struct amdiommu_unit **unitp,
+ uint16_t *ridp, uint8_t *dtep, uint32_t *edtep, bool verbose)
+{
+ struct ivhd_find_unit ifu;
+ struct amdiommu_unit *unit;
+ int hpet_no;
+ bool res;
+
+ hpet_no = hpet_get_uid(hpet);
+ bzero(&ifu, sizeof(ifu));
+ ifu.type = IFU_DEV_HPET;
+ ifu.devno = hpet_no;
+ ifu.rid = -1;
+
+ res = amdiommu_ivrs_iterate_tbl(amdiommu_find_unit_scan_0x11,
+ amdiommu_find_unit_scan_0x11, amdiommu_find_unit_scan_0x10, &ifu);
+ if (!res) {
+ printf("amdiommu cannot match hpet no %d in IVHD\n",
+ hpet_no);
+ return (ENXIO);
+ }
+
+ unit = amdiommu_unit_by_device_id(0, ifu.device_id);
+ if (unit == NULL) {
+ if (verbose)
+ printf("amdiommu cannot find unit id %d\n",
+ hpet_no);
+ return (ENXIO);
+ }
+ *unitp = unit;
+ iommu_device_set_iommu_prop(hpet, unit->iommu.dev);
+ if (ridp != NULL)
+ *ridp = ifu.rid_real;
+ if (dtep != NULL)
+ *dtep = ifu.dte;
+ if (edtep != NULL)
+ *edtep = ifu.edte;
+ if (verbose) {
+ printf("amdiommu%d HPET no %d "
+ "initiator rid %#06x dte %#x edte %#x\n",
+ unit->iommu.unit, hpet_no, ifu.rid_real, ifu.dte,
+ ifu.edte);
+ }
+ return (0);
+}
+
+static struct iommu_unit *
+amdiommu_find_method(device_t dev, bool verbose)
+{
+ struct amdiommu_unit *unit;
+ int error;
+ uint32_t edte;
+ uint16_t rid;
+ uint8_t dte;
+
+ error = amdiommu_find_unit(dev, &unit, &rid, &dte, &edte, verbose);
+ if (error != 0) {
+ if (verbose)
+ device_printf(dev,
+ "cannot find amdiommu unit, error %d\n",
+ error);
+ return (NULL);
+ }
+ return (&unit->iommu);
+}
+
+static struct x86_unit_common *
+amdiommu_get_x86_common(struct iommu_unit *unit)
+{
+ struct amdiommu_unit *iommu;
+
+ iommu = IOMMU2AMD(unit);
+ return (&iommu->x86c);
+}
+
+static void
+amdiommu_unit_pre_instantiate_ctx(struct iommu_unit *unit)
+{
+}
+
+static struct x86_iommu amd_x86_iommu = {
+ .get_x86_common = amdiommu_get_x86_common,
+ .unit_pre_instantiate_ctx = amdiommu_unit_pre_instantiate_ctx,
+ .find = amdiommu_find_method,
+ .domain_unload_entry = amdiommu_domain_unload_entry,
+ .domain_unload = amdiommu_domain_unload,
+ .get_ctx = amdiommu_get_ctx,
+ .free_ctx_locked = amdiommu_free_ctx_locked_method,
+ .free_ctx = amdiommu_free_ctx_method,
+ .alloc_msi_intr = amdiommu_alloc_msi_intr,
+ .map_msi_intr = amdiommu_map_msi_intr,
+ .unmap_msi_intr = amdiommu_unmap_msi_intr,
+ .map_ioapic_intr = amdiommu_map_ioapic_intr,
+ .unmap_ioapic_intr = amdiommu_unmap_ioapic_intr,
+};
+
+static void
+x86_iommu_set_amd(void *arg __unused)
+{
+ if (cpu_vendor_id == CPU_VENDOR_AMD)
+ set_x86_iommu(&amd_x86_iommu);
+}
+
+SYSINIT(x86_iommu, SI_SUB_TUNABLES, SI_ORDER_ANY, x86_iommu_set_amd, NULL);
+
+#ifdef DDB
+#include <ddb/ddb.h>
+#include <ddb/db_lex.h>
+
+static void
+amdiommu_print_domain(struct amdiommu_domain *domain, bool show_mappings)
+{
+ struct iommu_domain *iodom;
+
+ iodom = DOM2IODOM(domain);
+
+#if 0
+ db_printf(
+ " @%p dom %d mgaw %d agaw %d pglvl %d end %jx refs %d\n"
+ " ctx_cnt %d flags %x pgobj %p map_ents %u\n",
+ domain, domain->domain, domain->mgaw, domain->agaw, domain->pglvl,
+ (uintmax_t)domain->iodom.end, domain->refs, domain->ctx_cnt,
+ domain->iodom.flags, domain->pgtbl_obj, domain->iodom.entries_cnt);
+#endif
+
+ iommu_db_domain_print_contexts(iodom);
+
+ if (show_mappings)
+ iommu_db_domain_print_mappings(iodom);
+}
+
+static void
+amdiommu_print_one(struct amdiommu_unit *unit, bool show_domains,
+ bool show_mappings, bool show_cmdq)
+{
+ struct amdiommu_domain *domain;
+ struct amdiommu_cmd_generic *cp;
+ u_int cmd_head, cmd_tail, ci;
+
+ cmd_head = amdiommu_read4(unit, AMDIOMMU_CMDBUF_HEAD);
+ cmd_tail = amdiommu_read4(unit, AMDIOMMU_CMDBUF_TAIL);
+ db_printf("amdiommu%d at %p, mmio at %#jx/sz %#jx\n",
+ unit->iommu.unit, unit, (uintmax_t)unit->mmio_base,
+ (uintmax_t)unit->mmio_sz);
+ db_printf(" hw ctrl %#018jx cmdevst %#018jx\n",
+ (uintmax_t)amdiommu_read8(unit, AMDIOMMU_CTRL),
+ (uintmax_t)amdiommu_read8(unit, AMDIOMMU_CMDEV_STATUS));
+ db_printf(" devtbl at %p\n", unit->dev_tbl);
+ db_printf(" hwseq at %p phys %#jx val %#jx\n",
+ &unit->x86c.inv_waitd_seq_hw,
+ pmap_kextract((vm_offset_t)&unit->x86c.inv_waitd_seq_hw),
+ unit->x86c.inv_waitd_seq_hw);
+ db_printf(" invq at %p base %#jx hw head/tail %#x/%#x\n",
+ unit->x86c.inv_queue,
+ (uintmax_t)amdiommu_read8(unit, AMDIOMMU_CMDBUF_BASE),
+ cmd_head, cmd_tail);
+
+ if (show_cmdq) {
+ db_printf(" cmd q:\n");
+ for (ci = cmd_head; ci != cmd_tail;) {
+ cp = (struct amdiommu_cmd_generic *)(unit->
+ x86c.inv_queue + ci);
+ db_printf(
+ " idx %#x op %#x %#010x %#010x %#010x %#010x\n",
+ ci >> AMDIOMMU_CMD_SZ_SHIFT, cp->op,
+ cp->w0, cp->ww1, cp->w2, cp->w3);
+
+ ci += AMDIOMMU_CMD_SZ;
+ if (ci == unit->x86c.inv_queue_size)
+ ci = 0;
+ }
+ }
+
+ if (show_domains) {
+ db_printf(" domains:\n");
+ LIST_FOREACH(domain, &unit->domains, link) {
+ amdiommu_print_domain(domain, show_mappings);
+ if (db_pager_quit)
+ break;
+ }
+ }
+}
+
+DB_SHOW_COMMAND(amdiommu, db_amdiommu_print)
+{
+ struct amdiommu_unit *unit;
+ bool show_domains, show_mappings, show_cmdq;
+
+ show_domains = strchr(modif, 'd') != NULL;
+ show_mappings = strchr(modif, 'm') != NULL;
+ show_cmdq = strchr(modif, 'q') != NULL;
+ if (!have_addr) {
+ db_printf("usage: show amdiommu [/d] [/m] [/q] index\n");
+ return;
+ }
+ if ((vm_offset_t)addr < 0x10000)
+ unit = amdiommu_unit_by_device_id(0, (u_int)addr);
+ else
+ unit = (struct amdiommu_unit *)addr;
+ amdiommu_print_one(unit, show_domains, show_mappings, show_cmdq);
+}
+
+DB_SHOW_ALL_COMMAND(amdiommus, db_show_all_amdiommus)
+{
+ struct amdiommu_unit *unit;
+ bool show_domains, show_mappings, show_cmdq;
+
+ show_domains = strchr(modif, 'd') != NULL;
+ show_mappings = strchr(modif, 'm') != NULL;
+ show_cmdq = strchr(modif, 'q') != NULL;
+
+ TAILQ_FOREACH(unit, &amdiommu_units, unit_next) {
+ amdiommu_print_one(unit, show_domains, show_mappings,
+ show_cmdq);
+ if (db_pager_quit)
+ break;
+ }
+}
+#endif
diff --git a/sys/x86/iommu/amd_event.c b/sys/x86/iommu/amd_event.c
new file mode 100644
--- /dev/null
+++ b/sys/x86/iommu/amd_event.c
@@ -0,0 +1,323 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 The FreeBSD Foundation
+ *
+ * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "opt_acpi.h"
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/memdesc.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/rman.h>
+#include <sys/rwlock.h>
+#include <sys/smp.h>
+#include <sys/taskqueue.h>
+#include <sys/tree.h>
+#include <sys/vmem.h>
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pageout.h>
+#include <vm/vm_pager.h>
+#include <contrib/dev/acpica/include/acpi.h>
+#include <contrib/dev/acpica/include/accommon.h>
+#include <dev/acpica/acpivar.h>
+#include <dev/pci/pcireg.h>
+#include <dev/pci/pcivar.h>
+#include <machine/bus.h>
+#include <machine/pci_cfgreg.h>
+#include "pcib_if.h"
+#include <machine/intr_machdep.h>
+#include <machine/md_var.h>
+#include <machine/cputypes.h>
+#include <x86/apicreg.h>
+#include <x86/apicvar.h>
+#include <dev/iommu/iommu.h>
+#include <dev/iommu/busdma_iommu.h>
+#include <x86/iommu/amd_reg.h>
+#include <x86/iommu/x86_iommu.h>
+#include <x86/iommu/amd_iommu.h>
+
+static void
+amdiommu_event_rearm_intr(struct amdiommu_unit *unit)
+{
+ amdiommu_write8(unit, AMDIOMMU_CMDEV_STATUS,
+ AMDIOMMU_CMDEVS_EVLOGINT);
+}
+
+static void
+amdiommu_event_log_inc_head(struct amdiommu_unit *unit)
+{
+ unit->event_log_head++;
+ if (unit->event_log_head >= unit->event_log_size)
+ unit->event_log_head = 0;
+}
+
+static void
+amdiommu_event_log_print(struct amdiommu_unit *unit,
+ const struct amdiommu_event_generic *evp, bool fancy)
+{
+ printf("amdiommu%d: event type 0x%x 0x%08x 0x%08x 0x%08x 0x%08x\n",
+ unit->iommu.unit, evp->code, evp->w0, evp->ww1, evp->w2, evp->w3);
+ if (!fancy)
+ return;
+
+ AMDIOMMU_ASSERT_LOCKED(unit);
+ if (evp->code == AMDIOMMU_EV_ILL_DEV_TABLE_ENTRY) {
+ const struct amdiommu_event_ill_dev_table_entry *ev_dte_p;
+ const struct amdiommu_dte *dte;
+ const uint32_t *x;
+ int i;
+
+ ev_dte_p = (const struct
+ amdiommu_event_ill_dev_table_entry *)evp;
+ dte = &unit->dev_tbl[ev_dte_p->devid];
+
+ printf("\tIllegal Dev Tab Entry dte@%p:", dte);
+ for (i = 0, x = (const uint32_t *)dte; i < sizeof(*dte) /
+ sizeof(uint32_t); i++, x++)
+ printf(" 0x%08x", *x);
+ printf("\n");
+ } else if (evp->code == AMDIOMMU_EV_IO_PAGE_FAULT) {
+ const struct amdiommu_event_io_page_fault_entry *ev_iopf_p;
+ struct amdiommu_ctx *ctx;
+ device_t dev;
+
+ ev_iopf_p = (const struct
+ amdiommu_event_io_page_fault_entry *)evp;
+ printf("\tPage Fault rid %#x dom %d",
+ ev_iopf_p->devid, ev_iopf_p->pasid);
+ ctx = amdiommu_find_ctx_locked(unit, ev_iopf_p->devid);
+ if (ctx != NULL) {
+ dev = ctx->context.tag->owner;
+ if (dev != NULL)
+ printf(" %s", device_get_nameunit(dev));
+ }
+ printf("\n\t"
+ "gn %d nx %d us %d i %d pr %d rw %d pe %d rz %d tr %d"
+ "\n\tgaddr %#jx\n",
+ ev_iopf_p->gn, ev_iopf_p->nx, ev_iopf_p->us, ev_iopf_p->i,
+ ev_iopf_p->pr, ev_iopf_p->rw, ev_iopf_p->pe, ev_iopf_p->rz,
+ ev_iopf_p->tr,
+ (((uintmax_t)(ev_iopf_p->addr2)) << 32) |
+ ev_iopf_p->addr1);
+ }
+}
+
+static u_int
+amdiommu_event_log_tail(struct amdiommu_unit *unit)
+{
+ return (amdiommu_read8(unit, AMDIOMMU_EVNTLOG_TAIL) >>
+ AMDIOMMU_EV_SZ_SHIFT);
+}
+
+static u_int
+amdiommu_event_copy_log_inc(u_int idx)
+{
+ idx++;
+ if (idx == nitems(((struct amdiommu_unit *)NULL)->event_copy_log))
+ idx = 0;
+ return (idx);
+}
+
+static bool
+amdiommu_event_copy_log_hasspace(struct amdiommu_unit *unit)
+{
+ return (unit->event_copy_tail != amdiommu_event_copy_log_inc(
+ unit->event_copy_head));
+}
+
+void
+amdiommu_event_intr(struct amdiommu_unit *unit, uint64_t status)
+{
+ struct amdiommu_event_generic *evp;
+ u_int hw_tail, hw_tail1;
+ bool enqueue;
+
+ enqueue = (status & AMDIOMMU_CMDEVS_EVOVRFLW) != 0;
+
+ hw_tail1 = amdiommu_event_log_tail(unit);
+ do {
+ hw_tail = hw_tail1;
+ for (; hw_tail != unit->event_log_head;
+ amdiommu_event_log_inc_head(unit)) {
+ evp = &unit->event_log[unit->event_log_head];
+ mtx_lock_spin(&unit->event_lock);
+ if (amdiommu_event_copy_log_hasspace(unit)) {
+ unit->event_copy_log[unit->event_copy_head] =
+ *evp;
+ unit->event_copy_head =
+ amdiommu_event_copy_log_inc(unit->
+ event_copy_head);
+ enqueue = true;
+ } else {
+ amdiommu_event_log_print(unit, evp, false);
+ }
+ mtx_unlock_spin(&unit->event_lock);
+ }
+ amdiommu_write8(unit, AMDIOMMU_EVNTLOG_HEAD,
+ unit->event_log_head << AMDIOMMU_EV_SZ_SHIFT);
+ hw_tail1 = amdiommu_event_log_tail(unit);
+ } while (hw_tail1 != hw_tail);
+ amdiommu_event_rearm_intr(unit);
+
+ if (enqueue)
+ taskqueue_enqueue(unit->event_taskqueue, &unit->event_task);
+}
+
+static void
+amdiommu_event_task(void *arg, int pending __unused)
+{
+ struct amdiommu_unit *unit;
+ uint64_t hwev_status, status;
+ struct amdiommu_event_generic hwev;
+
+ unit = arg;
+ AMDIOMMU_LOCK(unit);
+
+ if ((unit->efr & AMDIOMMU_EFR_HWEV_SUP) != 0) {
+ hwev_status = amdiommu_read8(unit, AMDIOMMU_HWEV_STATUS);
+ if ((hwev_status & AMDIOMMU_HWEVS_HEV) != 0) {
+ *(uint64_t *)&hwev = amdiommu_read8(unit,
+ AMDIOMMU_HWEV_LOWER);
+ *((uint64_t *)&hwev + 1) = amdiommu_read8(unit,
+ AMDIOMMU_HWEV_UPPER);
+ printf("amdiommu%d: hw event%s\n", unit->iommu.unit,
+ (hwev_status & AMDIOMMU_HWEVS_HEO) != 0 ?
+ " (overflown)" : "");
+ amdiommu_event_log_print(unit, &hwev, true);
+ amdiommu_write8(unit, AMDIOMMU_HWEV_STATUS,
+ hwev_status);
+ }
+ }
+
+ status = amdiommu_read8(unit, AMDIOMMU_CMDEV_STATUS);
+ if ((status & AMDIOMMU_CMDEVS_EVOVRFLW) != 0) {
+ printf("amdiommu%d: event log overflow\n", unit->iommu.unit);
+
+ while ((status & AMDIOMMU_CMDEVS_EVLOGRUN) != 0) {
+ DELAY(1);
+ status = amdiommu_read8(unit, AMDIOMMU_CMDEV_STATUS);
+ }
+
+ unit->hw_ctrl &= ~AMDIOMMU_CTRL_EVNTLOG_EN;
+ amdiommu_write8(unit, AMDIOMMU_CTRL, unit->hw_ctrl);
+
+ unit->event_log_head = 0;
+ amdiommu_write8(unit, AMDIOMMU_EVNTLOG_HEAD, 0);
+
+ amdiommu_write8(unit, AMDIOMMU_CMDEV_STATUS,
+ AMDIOMMU_CMDEVS_EVOVRFLW); /* RW1C */
+
+ unit->hw_ctrl |= AMDIOMMU_CTRL_EVNTLOG_EN;
+ amdiommu_write8(unit, AMDIOMMU_CTRL, unit->hw_ctrl);
+
+ amdiommu_event_rearm_intr(unit);
+ }
+
+ mtx_lock_spin(&unit->event_lock);
+ while (unit->event_copy_head != unit->event_copy_tail) {
+ mtx_unlock_spin(&unit->event_lock);
+ amdiommu_event_log_print(unit, &unit->event_copy_log[
+ unit->event_copy_tail], true);
+ mtx_lock_spin(&unit->event_lock);
+ unit->event_copy_tail = amdiommu_event_copy_log_inc(unit->
+ event_copy_tail);
+ }
+ mtx_unlock_spin(&unit->event_lock);
+
+ AMDIOMMU_UNLOCK(unit);
+}
+
+int
+amdiommu_init_event(struct amdiommu_unit *unit)
+{
+ uint64_t base_reg;
+
+ mtx_init(&unit->event_lock, "amdevl", NULL, MTX_SPIN);
+
+ /* event log entries */
+ unit->event_log_size = AMDIOMMU_EVNTLOG_MIN;
+ TUNABLE_INT_FETCH("hw.amdiommu.event_log_size", &unit->event_log_size);
+ if (unit->event_log_size < AMDIOMMU_EVNTLOG_MIN ||
+ unit->event_log_size > AMDIOMMU_EVNTLOG_MAX ||
+ !powerof2(unit->event_log_size))
+ panic("invalid hw.amdiommu.event_log_size");
+ unit->event_log = kmem_alloc_contig(AMDIOMMU_EV_SZ *
+ unit->event_log_size, M_WAITOK | M_ZERO, 0, ~0ull, PAGE_SIZE,
+ 0, VM_MEMATTR_DEFAULT);
+
+ TASK_INIT(&unit->event_task, 0, amdiommu_event_task, unit);
+ unit->event_taskqueue = taskqueue_create_fast("amdiommuff", M_WAITOK,
+ taskqueue_thread_enqueue, &unit->event_taskqueue);
+ taskqueue_start_threads(&unit->event_taskqueue, 1, PI_AV,
+ "amdiommu%d event taskq", unit->iommu.unit);
+
+ base_reg = pmap_kextract((vm_offset_t)unit->event_log) |
+ (((uint64_t)0x8 + ilog2(unit->event_log_size /
+ AMDIOMMU_EVNTLOG_MIN)) << AMDIOMMU_EVNTLOG_BASE_SZSHIFT);
+ AMDIOMMU_LOCK(unit);
+ /*
+ * Re-arm before enabling interrupt, to not loose it when
+ * re-arming in the interrupt handler.
+ */
+ amdiommu_event_rearm_intr(unit);
+ amdiommu_write8(unit, AMDIOMMU_EVNTLOG_BASE, base_reg);
+ unit->hw_ctrl |= AMDIOMMU_CTRL_EVNTLOG_EN | AMDIOMMU_CTRL_EVENTINT_EN;
+ amdiommu_write8(unit, AMDIOMMU_CTRL, unit->hw_ctrl);
+ AMDIOMMU_UNLOCK(unit);
+
+ return (0);
+}
+
+void
+amdiommu_fini_event(struct amdiommu_unit *unit)
+{
+ AMDIOMMU_LOCK(unit);
+ unit->hw_ctrl &= ~(AMDIOMMU_CTRL_EVNTLOG_EN |
+ AMDIOMMU_CTRL_EVENTINT_EN);
+ amdiommu_write8(unit, AMDIOMMU_CTRL, unit->hw_ctrl);
+ amdiommu_write8(unit, AMDIOMMU_EVNTLOG_BASE, 0);
+ AMDIOMMU_UNLOCK(unit);
+
+ taskqueue_drain(unit->event_taskqueue, &unit->event_task);
+ taskqueue_free(unit->event_taskqueue);
+ unit->event_taskqueue = NULL;
+
+ kmem_free(unit->event_log, unit->event_log_size * AMDIOMMU_EV_SZ);
+ unit->event_log = NULL;
+ unit->event_log_head = unit->event_log_tail = 0;
+
+ mtx_destroy(&unit->event_lock);
+}
diff --git a/sys/x86/iommu/amd_idpgtbl.c b/sys/x86/iommu/amd_idpgtbl.c
new file mode 100644
--- /dev/null
+++ b/sys/x86/iommu/amd_idpgtbl.c
@@ -0,0 +1,396 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 The FreeBSD Foundation
+ *
+ * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/bus.h>
+#include <sys/domainset.h>
+#include <sys/interrupt.h>
+#include <sys/kernel.h>
+#include <sys/ktr.h>
+#include <sys/lock.h>
+#include <sys/memdesc.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/rwlock.h>
+#include <sys/rman.h>
+#include <sys/sf_buf.h>
+#include <sys/sysctl.h>
+#include <sys/taskqueue.h>
+#include <sys/tree.h>
+#include <sys/uio.h>
+#include <sys/vmem.h>
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pager.h>
+#include <vm/vm_map.h>
+#include <dev/pci/pcireg.h>
+#include <machine/atomic.h>
+#include <machine/bus.h>
+#include <machine/cpu.h>
+#include <machine/md_var.h>
+#include <machine/specialreg.h>
+#include <x86/include/busdma_impl.h>
+#include <dev/iommu/busdma_iommu.h>
+#include <x86/iommu/amd_reg.h>
+#include <x86/iommu/x86_iommu.h>
+#include <x86/iommu/amd_iommu.h>
+
+static void amdiommu_unmap_clear_pte(struct amdiommu_domain *domain,
+ iommu_gaddr_t base, int lvl, int flags, iommu_pte_t *pte,
+ struct sf_buf **sf, struct iommu_map_entry *entry, bool free_sf);
+static int amdiommu_unmap_buf_locked(struct amdiommu_domain *domain,
+ iommu_gaddr_t base, iommu_gaddr_t size, int flags,
+ struct iommu_map_entry *entry);
+
+int
+amdiommu_domain_alloc_pgtbl(struct amdiommu_domain *domain)
+{
+ vm_page_t m;
+ int dom;
+
+ KASSERT(domain->pgtbl_obj == NULL,
+ ("already initialized %p", domain));
+
+ domain->pgtbl_obj = vm_pager_allocate(OBJT_PHYS, NULL,
+ IDX_TO_OFF(pglvl_max_pages(domain->pglvl)), 0, 0, NULL);
+ if (bus_get_domain(domain->iodom.iommu->dev, &dom) == 0)
+ domain->pgtbl_obj->domain.dr_policy = DOMAINSET_PREF(dom);
+ AMDIOMMU_DOMAIN_PGLOCK(domain);
+ m = iommu_pgalloc(domain->pgtbl_obj, 0, IOMMU_PGF_WAITOK |
+ IOMMU_PGF_ZERO | IOMMU_PGF_OBJL);
+ /* No implicit free of the top level page table page. */
+ vm_page_wire(m);
+ domain->pgtblr = m;
+ AMDIOMMU_DOMAIN_PGUNLOCK(domain);
+ AMDIOMMU_LOCK(domain->unit);
+ domain->iodom.flags |= IOMMU_DOMAIN_PGTBL_INITED;
+ AMDIOMMU_UNLOCK(domain->unit);
+ return (0);
+}
+
+void
+amdiommu_domain_free_pgtbl(struct amdiommu_domain *domain)
+{
+ vm_object_t obj;
+ vm_page_t m;
+
+ obj = domain->pgtbl_obj;
+ if (obj == NULL) {
+ KASSERT((domain->iodom.flags & IOMMU_DOMAIN_IDMAP) != 0,
+ ("lost pagetable object domain %p", domain));
+ return;
+ }
+ AMDIOMMU_DOMAIN_ASSERT_PGLOCKED(domain);
+ domain->pgtbl_obj = NULL;
+ domain->pgtblr = NULL;
+
+ /* Obliterate ref_counts */
+ VM_OBJECT_ASSERT_WLOCKED(obj);
+ for (m = vm_page_lookup(obj, 0); m != NULL; m = vm_page_next(m))
+ vm_page_clearref(m);
+ VM_OBJECT_WUNLOCK(obj);
+ vm_object_deallocate(obj);
+}
+
+static iommu_pte_t *
+amdiommu_pgtbl_map_pte(struct amdiommu_domain *domain, iommu_gaddr_t base,
+ int lvl, int flags, vm_pindex_t *idxp, struct sf_buf **sf)
+{
+ iommu_pte_t *pte, *ptep;
+ struct sf_buf *sfp;
+ vm_page_t m;
+ vm_pindex_t idx, idx1;
+
+ idx = pglvl_pgtbl_get_pindex(domain->pglvl, base, lvl);
+ if (*sf != NULL && idx == *idxp) {
+ pte = (iommu_pte_t *)sf_buf_kva(*sf);
+ } else {
+ if (*sf != NULL)
+ iommu_unmap_pgtbl(*sf);
+ *idxp = idx;
+retry:
+ pte = iommu_map_pgtbl(domain->pgtbl_obj, idx, flags, sf);
+ if (pte == NULL) {
+ KASSERT(lvl > 0,
+ ("lost root page table page %p", domain));
+ /*
+ * Page table page does not exist, allocate
+ * it and create a pte in the preceeding page level
+ * to reference the allocated page table page.
+ */
+ m = iommu_pgalloc(domain->pgtbl_obj, idx, flags |
+ IOMMU_PGF_ZERO);
+ if (m == NULL)
+ return (NULL);
+
+ vm_page_wire(m);
+
+ sfp = NULL;
+ ptep = amdiommu_pgtbl_map_pte(domain, base, lvl - 1,
+ flags, &idx1, &sfp);
+ if (ptep == NULL) {
+ KASSERT(m->pindex != 0,
+ ("loosing root page %p", domain));
+ vm_page_unwire_noq(m);
+ iommu_pgfree(domain->pgtbl_obj, m->pindex,
+ flags, NULL);
+ return (NULL);
+ }
+ ptep->pte = VM_PAGE_TO_PHYS(m) | AMDIOMMU_PTE_IR |
+ AMDIOMMU_PTE_IW | AMDIOMMU_PTE_PR |
+ ((domain->pglvl - lvl) << AMDIOMMU_PTE_NLVL_SHIFT);
+ vm_page_wire(sf_buf_page(sfp));
+ vm_page_unwire_noq(m);
+ iommu_unmap_pgtbl(sfp);
+ /* Only executed once. */
+ goto retry;
+ }
+ }
+ pte += pglvl_pgtbl_pte_off(domain->pglvl, base, lvl);
+ return (pte);
+}
+
+static int
+amdiommu_map_buf_locked(struct amdiommu_domain *domain, iommu_gaddr_t base,
+ iommu_gaddr_t size, vm_page_t *ma, uint64_t pflags, int flags,
+ struct iommu_map_entry *entry)
+{
+ iommu_pte_t *pte;
+ struct sf_buf *sf;
+ iommu_gaddr_t base1;
+ vm_pindex_t pi, idx;
+
+ AMDIOMMU_DOMAIN_ASSERT_PGLOCKED(domain);
+
+ base1 = base;
+ flags |= IOMMU_PGF_OBJL;
+ idx = -1;
+ pte = NULL;
+ sf = NULL;
+
+ for (pi = 0; size > 0; base += IOMMU_PAGE_SIZE, size -= IOMMU_PAGE_SIZE,
+ pi++) {
+ KASSERT(size >= IOMMU_PAGE_SIZE,
+ ("mapping loop overflow %p %jx %jx %jx", domain,
+ (uintmax_t)base, (uintmax_t)size, (uintmax_t)IOMMU_PAGE_SIZE));
+ pte = amdiommu_pgtbl_map_pte(domain, base, domain->pglvl - 1,
+ flags, &idx, &sf);
+ if (pte == NULL) {
+ KASSERT((flags & IOMMU_PGF_WAITOK) == 0,
+ ("failed waitable pte alloc %p", domain));
+ if (sf != NULL)
+ iommu_unmap_pgtbl(sf);
+ amdiommu_unmap_buf_locked(domain, base1, base - base1,
+ flags, entry);
+ return (ENOMEM);
+ }
+ /* next level 0, no superpages */
+ pte->pte = VM_PAGE_TO_PHYS(ma[pi]) | pflags | AMDIOMMU_PTE_PR;
+ vm_page_wire(sf_buf_page(sf));
+ }
+ if (sf != NULL)
+ iommu_unmap_pgtbl(sf);
+ return (0);
+}
+
+static int
+amdiommu_map_buf(struct iommu_domain *iodom, struct iommu_map_entry *entry,
+ vm_page_t *ma, uint64_t eflags, int flags)
+{
+ struct amdiommu_domain *domain;
+ uint64_t pflags;
+ iommu_gaddr_t base, size;
+ int error;
+
+ base = entry->start;
+ size = entry->end - entry->start;
+ pflags = ((eflags & IOMMU_MAP_ENTRY_READ) != 0 ? AMDIOMMU_PTE_IR : 0) |
+ ((eflags & IOMMU_MAP_ENTRY_WRITE) != 0 ? AMDIOMMU_PTE_IW : 0) |
+ ((eflags & IOMMU_MAP_ENTRY_SNOOP) != 0 ? AMDIOMMU_PTE_FC : 0);
+ /* IOMMU_MAP_ENTRY_TM ignored */
+
+ domain = IODOM2DOM(iodom);
+
+ KASSERT((iodom->flags & IOMMU_DOMAIN_IDMAP) == 0,
+ ("modifying idmap pagetable domain %p", domain));
+ KASSERT((base & IOMMU_PAGE_MASK) == 0,
+ ("non-aligned base %p %jx %jx", domain, (uintmax_t)base,
+ (uintmax_t)size));
+ KASSERT((size & IOMMU_PAGE_MASK) == 0,
+ ("non-aligned size %p %jx %jx", domain, (uintmax_t)base,
+ (uintmax_t)size));
+ KASSERT(size > 0, ("zero size %p %jx %jx", domain, (uintmax_t)base,
+ (uintmax_t)size));
+ KASSERT(base < iodom->end,
+ ("base too high %p %jx %jx end %jx", domain, (uintmax_t)base,
+ (uintmax_t)size, (uintmax_t)iodom->end));
+ KASSERT(base + size < iodom->end,
+ ("end too high %p %jx %jx end %jx", domain, (uintmax_t)base,
+ (uintmax_t)size, (uintmax_t)iodom->end));
+ KASSERT(base + size > base,
+ ("size overflow %p %jx %jx", domain, (uintmax_t)base,
+ (uintmax_t)size));
+ KASSERT((pflags & (AMDIOMMU_PTE_IR | AMDIOMMU_PTE_IW)) != 0,
+ ("neither read nor write %jx", (uintmax_t)pflags));
+ KASSERT((pflags & ~(AMDIOMMU_PTE_IR | AMDIOMMU_PTE_IW | AMDIOMMU_PTE_FC
+ )) == 0,
+ ("invalid pte flags %jx", (uintmax_t)pflags));
+ KASSERT((flags & ~IOMMU_PGF_WAITOK) == 0, ("invalid flags %x", flags));
+
+ AMDIOMMU_DOMAIN_PGLOCK(domain);
+ error = amdiommu_map_buf_locked(domain, base, size, ma, pflags,
+ flags, entry);
+ AMDIOMMU_DOMAIN_PGUNLOCK(domain);
+
+ /*
+ * XXXKIB invalidation seems to be needed even for non-valid->valid
+ * updates. Recheck.
+ */
+ iommu_qi_invalidate_sync(iodom, base, size,
+ (flags & IOMMU_PGF_WAITOK) != 0);
+ return (error);
+}
+
+static void
+amdiommu_free_pgtbl_pde(struct amdiommu_domain *domain, iommu_gaddr_t base,
+ int lvl, int flags, struct iommu_map_entry *entry)
+{
+ struct sf_buf *sf;
+ iommu_pte_t *pde;
+ vm_pindex_t idx;
+
+ sf = NULL;
+ pde = amdiommu_pgtbl_map_pte(domain, base, lvl, flags, &idx, &sf);
+ amdiommu_unmap_clear_pte(domain, base, lvl, flags, pde, &sf, entry,
+ true);
+}
+
+static void
+amdiommu_unmap_clear_pte(struct amdiommu_domain *domain, iommu_gaddr_t base,
+ int lvl, int flags, iommu_pte_t *pte, struct sf_buf **sf,
+ struct iommu_map_entry *entry, bool free_sf)
+{
+ vm_page_t m;
+
+ pte->pte = 0;
+ m = sf_buf_page(*sf);
+ if (free_sf) {
+ iommu_unmap_pgtbl(*sf);
+ *sf = NULL;
+ }
+ if (!vm_page_unwire_noq(m))
+ return;
+ KASSERT(lvl != 0,
+ ("lost reference (lvl) on root pg domain %p base %jx lvl %d",
+ domain, (uintmax_t)base, lvl));
+ KASSERT(m->pindex != 0,
+ ("lost reference (idx) on root pg domain %p base %jx lvl %d",
+ domain, (uintmax_t)base, lvl));
+ iommu_pgfree(domain->pgtbl_obj, m->pindex, flags, entry);
+ amdiommu_free_pgtbl_pde(domain, base, lvl - 1, flags, entry);
+}
+
+static int
+amdiommu_unmap_buf_locked(struct amdiommu_domain *domain, iommu_gaddr_t base,
+ iommu_gaddr_t size, int flags, struct iommu_map_entry *entry)
+{
+ iommu_pte_t *pte;
+ struct sf_buf *sf;
+ vm_pindex_t idx;
+ iommu_gaddr_t pg_sz;
+
+ AMDIOMMU_DOMAIN_ASSERT_PGLOCKED(domain);
+ if (size == 0)
+ return (0);
+
+ KASSERT((domain->iodom.flags & IOMMU_DOMAIN_IDMAP) == 0,
+ ("modifying idmap pagetable domain %p", domain));
+ KASSERT((base & IOMMU_PAGE_MASK) == 0,
+ ("non-aligned base %p %jx %jx", domain, (uintmax_t)base,
+ (uintmax_t)size));
+ KASSERT((size & IOMMU_PAGE_MASK) == 0,
+ ("non-aligned size %p %jx %jx", domain, (uintmax_t)base,
+ (uintmax_t)size));
+ KASSERT(base < DOM2IODOM(domain)->end,
+ ("base too high %p %jx %jx end %jx", domain, (uintmax_t)base,
+ (uintmax_t)size, (uintmax_t)DOM2IODOM(domain)->end));
+ KASSERT(base + size < DOM2IODOM(domain)->end,
+ ("end too high %p %jx %jx end %jx", domain, (uintmax_t)base,
+ (uintmax_t)size, (uintmax_t)DOM2IODOM(domain)->end));
+ KASSERT(base + size > base,
+ ("size overflow %p %jx %jx", domain, (uintmax_t)base,
+ (uintmax_t)size));
+ KASSERT((flags & ~IOMMU_PGF_WAITOK) == 0, ("invalid flags %x", flags));
+
+ pg_sz = IOMMU_PAGE_SIZE;
+ flags |= IOMMU_PGF_OBJL;
+
+ for (sf = NULL; size > 0; base += pg_sz, size -= pg_sz) {
+ pte = amdiommu_pgtbl_map_pte(domain, base,
+ domain->pglvl - 1, flags, &idx, &sf);
+ KASSERT(pte != NULL,
+ ("sleeping or page missed %p %jx %d 0x%x",
+ domain, (uintmax_t)base, domain->pglvl - 1, flags));
+ amdiommu_unmap_clear_pte(domain, base, domain->pglvl - 1,
+ flags, pte, &sf, entry, false);
+ KASSERT(size >= pg_sz,
+ ("unmapping loop overflow %p %jx %jx %jx", domain,
+ (uintmax_t)base, (uintmax_t)size, (uintmax_t)pg_sz));
+ }
+ if (sf != NULL)
+ iommu_unmap_pgtbl(sf);
+ return (0);
+}
+
+static int
+amdiommu_unmap_buf(struct iommu_domain *iodom, struct iommu_map_entry *entry,
+ int flags)
+{
+ struct amdiommu_domain *domain;
+ int error;
+
+ domain = IODOM2DOM(iodom);
+
+ AMDIOMMU_DOMAIN_PGLOCK(domain);
+ error = amdiommu_unmap_buf_locked(domain, entry->start,
+ entry->end - entry->start, flags, entry);
+ AMDIOMMU_DOMAIN_PGUNLOCK(domain);
+ return (error);
+}
+
+const struct iommu_domain_map_ops amdiommu_domain_map_ops = {
+ .map = amdiommu_map_buf,
+ .unmap = amdiommu_unmap_buf,
+};
diff --git a/sys/x86/iommu/amd_intrmap.c b/sys/x86/iommu/amd_intrmap.c
new file mode 100644
--- /dev/null
+++ b/sys/x86/iommu/amd_intrmap.c
@@ -0,0 +1,391 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 The FreeBSD Foundation
+ *
+ * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/bus.h>
+#include <sys/interrupt.h>
+#include <sys/domainset.h>
+#include <sys/kernel.h>
+#include <sys/ktr.h>
+#include <sys/limits.h>
+#include <sys/lock.h>
+#include <sys/memdesc.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/rwlock.h>
+#include <sys/rman.h>
+#include <sys/sysctl.h>
+#include <sys/taskqueue.h>
+#include <sys/tree.h>
+#include <sys/uio.h>
+#include <sys/vmem.h>
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pager.h>
+#include <vm/vm_map.h>
+#include <contrib/dev/acpica/include/acpi.h>
+#include <contrib/dev/acpica/include/accommon.h>
+#include <dev/acpica/acpivar.h>
+#include <dev/pci/pcireg.h>
+#include <dev/pci/pcivar.h>
+#include <machine/atomic.h>
+#include <machine/bus.h>
+#include <machine/md_var.h>
+#include <machine/intr_machdep.h>
+#include <x86/include/apicreg.h>
+#include <x86/include/apicvar.h>
+#include <machine/specialreg.h>
+#include <x86/include/busdma_impl.h>
+#include <dev/iommu/busdma_iommu.h>
+#include <x86/iommu/amd_reg.h>
+#include <x86/iommu/x86_iommu.h>
+#include <x86/iommu/amd_iommu.h>
+
+static struct amdiommu_ctx *amdiommu_ir_find(device_t src, uint16_t *rid,
+ bool *is_iommu);
+static void amdiommu_ir_free_irte(struct amdiommu_ctx *ctx, device_t src,
+ u_int cookie);
+
+int
+amdiommu_alloc_msi_intr(device_t src, u_int *cookies, u_int count)
+{
+ struct amdiommu_ctx *ctx;
+ vmem_addr_t vmem_res;
+ u_int idx, i;
+ int error;
+
+ ctx = amdiommu_ir_find(src, NULL, NULL);
+ if (ctx == NULL || !CTX2AMD(ctx)->irte_enabled) {
+ for (i = 0; i < count; i++)
+ cookies[i] = -1;
+ return (EOPNOTSUPP);
+ }
+
+ error = vmem_alloc(ctx->irtids, count, M_FIRSTFIT | M_NOWAIT,
+ &vmem_res);
+ if (error != 0) {
+ KASSERT(error != EOPNOTSUPP,
+ ("impossible EOPNOTSUPP from vmem"));
+ return (error);
+ }
+ idx = vmem_res;
+ for (i = 0; i < count; i++)
+ cookies[i] = idx + i;
+ return (0);
+}
+
+int
+amdiommu_map_msi_intr(device_t src, u_int cpu, u_int vector,
+ u_int cookie, uint64_t *addr, uint32_t *data)
+{
+ struct amdiommu_ctx *ctx;
+ struct amdiommu_unit *unit;
+ uint16_t rid;
+ bool is_iommu;
+
+ ctx = amdiommu_ir_find(src, &rid, &is_iommu);
+ if (is_iommu) {
+ if (addr != NULL) {
+ *data = vector;
+ *addr = MSI_INTEL_ADDR_BASE | ((cpu & 0xff) << 12);
+ if (x2apic_mode)
+ *addr |= ((uint64_t)cpu & 0xffffff00) << 32;
+ else
+ KASSERT(cpu <= 0xff,
+ ("cpu id too big %d", cpu));
+ }
+ return (0);
+ }
+
+ if (ctx == NULL)
+ return (EOPNOTSUPP);
+ unit = CTX2AMD(ctx);
+ if (!unit->irte_enabled || cookie == -1)
+ return (EOPNOTSUPP);
+ if (cookie >= unit->irte_nentries) {
+ device_printf(src, "amdiommu%d: cookie %u irte max %u\n",
+ unit->iommu.unit, cookie, unit->irte_nentries);
+ return (EINVAL);
+ }
+
+ if (unit->irte_x2apic) {
+ struct amdiommu_irte_basic_vapic_x2 *irte;
+
+ irte = &ctx->irtx2[cookie];
+ irte->supiopf = 0;
+ irte->inttype = 0;
+ irte->rqeoi = 0;
+ irte->dm = 0;
+ irte->guestmode = 0;
+ irte->dest0 = cpu;
+ irte->rsrv0 = 0;
+ irte->vector = vector;
+ irte->rsrv1 = 0;
+ irte->rsrv2 = 0;
+ irte->dest1 = cpu >> 24;
+ atomic_thread_fence_rel();
+ irte->remapen = 1;
+ } else {
+ struct amdiommu_irte_basic_novapic *irte;
+
+ irte = &ctx->irtb[cookie];
+ irte->supiopf = 0;
+ irte->inttype = 0; /* fixed */
+ irte->rqeoi = 0;
+ irte->dm = 0; /* phys */
+ irte->guestmode = 0;
+ irte->dest = cpu;
+ irte->vector = vector;
+ irte->rsrv = 0;
+ atomic_thread_fence_rel();
+ irte->remapen = 1;
+ }
+
+ if (addr != NULL) {
+ *data = cookie;
+ *addr = MSI_INTEL_ADDR_BASE | ((cpu & 0xff) << 12);
+ if (unit->irte_x2apic)
+ *addr |= ((uint64_t)cpu & 0xffffff00) << 32;
+ }
+
+ iommu_get_requester(src, &rid);
+ AMDIOMMU_LOCK(unit);
+ amdiommu_qi_invalidate_ir_locked(unit, rid);
+ AMDIOMMU_UNLOCK(unit);
+
+ return (0);
+}
+
+int
+amdiommu_unmap_msi_intr(device_t src, u_int cookie)
+{
+ struct amdiommu_ctx *ctx;
+
+ if (cookie == -1)
+ return (0);
+ ctx = amdiommu_ir_find(src, NULL, NULL);
+ amdiommu_ir_free_irte(ctx, src, cookie);
+ return (0);
+}
+
+int
+amdiommu_map_ioapic_intr(u_int ioapic_id, u_int cpu, u_int vector,
+ bool edge, bool activehi, int irq, u_int *cookie, uint32_t *hi,
+ uint32_t *lo)
+{
+ /* XXXKIB for early call from ioapic_create() */
+ return (EOPNOTSUPP);
+}
+
+int
+amdiommu_unmap_ioapic_intr(u_int ioapic_id, u_int *cookie)
+{
+ /* XXXKIB */
+ return (0);
+}
+
+static struct amdiommu_ctx *
+amdiommu_ir_find(device_t src, uint16_t *ridp, bool *is_iommu)
+{
+ devclass_t src_class;
+ struct amdiommu_unit *unit;
+ struct amdiommu_ctx *ctx;
+ uint32_t edte;
+ uint16_t rid;
+ uint8_t dte;
+ int error;
+
+ /*
+ * We need to determine if the interrupt source generates FSB
+ * interrupts. If yes, it is either IOMMU, in which case
+ * interrupts are not remapped. Or it is HPET, and interrupts
+ * are remapped. For HPET, source id is reported by HPET
+ * record in IVHD ACPI table.
+ */
+ if (is_iommu != NULL)
+ *is_iommu = false;
+
+ ctx = NULL;
+
+ src_class = device_get_devclass(src);
+ if (src_class == devclass_find("amdiommu")) {
+ if (is_iommu != NULL)
+ *is_iommu = true;
+ } else if (src_class == devclass_find("hpet")) {
+ error = amdiommu_find_unit_for_hpet(src, &unit, &rid, &dte,
+ &edte, bootverbose);
+ ctx = NULL; // XXXKIB allocate ctx
+ } else {
+ error = amdiommu_find_unit(src, &unit, &rid, &dte, &edte,
+ bootverbose);
+ if (error == 0) {
+ iommu_get_requester(src, &rid);
+ ctx = amdiommu_get_ctx_for_dev(unit, src,
+ rid, 0, false /* XXXKIB */, false, dte, edte);
+ }
+ }
+ if (ridp != NULL)
+ *ridp = rid;
+ return (ctx);
+}
+
+static void
+amdiommu_ir_free_irte(struct amdiommu_ctx *ctx, device_t src,
+ u_int cookie)
+{
+ struct amdiommu_unit *unit;
+ uint16_t rid;
+
+ MPASS(ctx != NULL);
+ unit = CTX2AMD(ctx);
+
+ KASSERT(unit->irte_enabled,
+ ("unmap: cookie %d ctx %p unit %p", cookie, ctx, unit));
+ KASSERT(cookie < unit->irte_nentries,
+ ("bad cookie %u %u", cookie, unit->irte_nentries));
+
+ if (unit->irte_x2apic) {
+ struct amdiommu_irte_basic_vapic_x2 *irte;
+
+ irte = &ctx->irtx2[cookie];
+ irte->remapen = 0;
+ atomic_thread_fence_rel();
+ bzero(irte, sizeof(*irte));
+ } else {
+ struct amdiommu_irte_basic_novapic *irte;
+
+ irte = &ctx->irtb[cookie];
+ irte->remapen = 0;
+ atomic_thread_fence_rel();
+ bzero(irte, sizeof(*irte));
+ }
+ iommu_get_requester(src, &rid);
+ AMDIOMMU_LOCK(unit);
+ amdiommu_qi_invalidate_ir_locked(unit, rid);
+ AMDIOMMU_UNLOCK(unit);
+}
+
+int
+amdiommu_ctx_init_irte(struct amdiommu_ctx *ctx)
+{
+ struct amdiommu_unit *unit;
+ void *ptr;
+ unsigned long sz;
+ int dom;
+
+ unit = CTX2AMD(ctx);
+ if (!unit->irte_enabled)
+ return (0);
+
+ KASSERT(unit->irte_nentries > 0 &&
+ unit->irte_nentries <= 2048 &&
+ powerof2(unit->irte_nentries),
+ ("amdiommu%d: unit %p irte_nentries %u", unit->iommu.unit,
+ unit, unit->irte_nentries));
+
+ if (bus_get_domain(unit->iommu.dev, &dom) != 0)
+ dom = -1;
+ sz = unit->irte_nentries;
+ sz *= unit->irte_x2apic ? sizeof(struct amdiommu_irte_basic_vapic_x2) :
+ sizeof(struct amdiommu_irte_basic_novapic);
+
+ if (dom != -1) {
+ ptr = contigmalloc_domainset(sz, M_DEVBUF, DOMAINSET_PREF(dom),
+ M_WAITOK | M_ZERO, 0, ~0ull, 128, 0);
+ } else {
+ ptr = contigmalloc(sz, M_DEVBUF, M_WAITOK | M_ZERO,
+ 0, ~0ull, 128, 0);
+ }
+ if (unit->irte_x2apic)
+ ctx->irtx2 = ptr;
+ else
+ ctx->irtb = ptr;
+ ctx->irtids = vmem_create("amdirt", 0, unit->irte_nentries, 1, 0,
+ M_FIRSTFIT | M_NOWAIT);
+
+ intr_reprogram(); // XXXKIB
+
+ return (0);
+}
+
+void
+amdiommu_ctx_fini_irte(struct amdiommu_ctx *ctx)
+{
+ struct amdiommu_unit *unit;
+
+ unit = CTX2AMD(ctx);
+ if (!unit->irte_enabled)
+ return;
+ if (unit->irte_x2apic)
+ free(ctx->irtx2, M_DEVBUF);
+ else
+ free(ctx->irtb, M_DEVBUF);
+ vmem_destroy(ctx->irtids);
+}
+
+int
+amdiommu_init_irt(struct amdiommu_unit *unit)
+{
+ int enabled, nentries;
+
+ SYSCTL_ADD_INT(&unit->iommu.sysctl_ctx,
+ SYSCTL_CHILDREN(device_get_sysctl_tree(unit->iommu.dev)),
+ OID_AUTO, "ir", CTLFLAG_RD, &unit->irte_enabled, 0,
+ "Interrupt remapping ops enabled");
+
+ enabled = 1;
+ TUNABLE_INT_FETCH("hw.iommu.ir", &enabled);
+
+ unit->irte_enabled = enabled != 0;
+ if (!unit->irte_enabled)
+ return (0);
+
+ nentries = 32;
+ TUNABLE_INT_FETCH("hw.iommu.amd.ir_num", &nentries);
+ nentries = roundup_pow_of_two(nentries);
+ if (nentries < 1)
+ nentries = 1;
+ if (nentries > 2048)
+ nentries = 2048;
+ unit->irte_nentries = nentries;
+
+ unit->irte_x2apic = x2apic_mode;
+ return (0);
+}
+
+void
+amdiommu_fini_irt(struct amdiommu_unit *unit)
+{
+}
diff --git a/sys/x86/iommu/amd_iommu.h b/sys/x86/iommu/amd_iommu.h
new file mode 100644
--- /dev/null
+++ b/sys/x86/iommu/amd_iommu.h
@@ -0,0 +1,243 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 The FreeBSD Foundation
+ *
+ * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef __X86_IOMMU_AMD_IOMMU_H
+#define __X86_IOMMU_AMD_IOMMU_H
+
+#include <dev/iommu/iommu.h>
+
+#define AMDIOMMU_DEV_REPORTED 0x00000001
+
+struct amdiommu_unit;
+
+struct amdiommu_domain {
+ struct iommu_domain iodom;
+ int domain; /* (c) DID, written in context entry */
+ struct amdiommu_unit *unit; /* (c) */
+
+ u_int ctx_cnt; /* (u) Number of contexts owned */
+ u_int refs; /* (u) Refs, including ctx */
+ LIST_ENTRY(amdiommu_domain) link;/* (u) Member in the iommu list */
+ vm_object_t pgtbl_obj; /* (c) Page table pages */
+ vm_page_t pgtblr; /* (c) Page table root page */
+ u_int pglvl; /* (c) Page table levels */
+};
+
+struct amdiommu_ctx {
+ struct iommu_ctx context;
+ struct amdiommu_irte_basic_novapic *irtb;
+ struct amdiommu_irte_basic_vapic_x2 *irtx2;
+ vmem_t *irtids;
+};
+
+struct amdiommu_unit {
+ struct iommu_unit iommu;
+ struct x86_unit_common x86c;
+ u_int unit_dom; /* Served PCI domain, from IVRS */
+ u_int device_id; /* basically PCI RID */
+ u_int unit_id; /* Hypertransport Unit ID, deprecated */
+ TAILQ_ENTRY(amdiommu_unit) unit_next;
+ int seccap_reg;
+ uint64_t efr;
+ vm_paddr_t mmio_base;
+ vm_size_t mmio_sz;
+ struct resource *mmio_res;
+ int mmio_rid;
+ uint64_t hw_ctrl;
+
+ u_int numirqs;
+ struct resource *msix_table;
+ int msix_table_rid;
+ int irq_cmdev_rid;
+ struct resource *irq_cmdev;
+ void *irq_cmdev_cookie;
+
+ struct amdiommu_dte *dev_tbl;
+ vm_object_t devtbl_obj;
+
+ LIST_HEAD(, amdiommu_domain) domains;
+ struct unrhdr *domids;
+
+ struct mtx event_lock;
+ struct amdiommu_event_generic *event_log;
+ u_int event_log_size;
+ u_int event_log_head;
+ u_int event_log_tail;
+ struct task event_task;
+ struct taskqueue *event_taskqueue;
+ struct amdiommu_event_generic event_copy_log[16];
+ u_int event_copy_head;
+ u_int event_copy_tail;
+
+ int irte_enabled; /* int for sysctl type */
+ bool irte_x2apic;
+ u_int irte_nentries;
+};
+
+#define AMD2IOMMU(unit) (&((unit)->iommu))
+#define IOMMU2AMD(unit) \
+ __containerof((unit), struct amdiommu_unit, iommu)
+
+#define AMDIOMMU_LOCK(unit) mtx_lock(&AMD2IOMMU(unit)->lock)
+#define AMDIOMMU_UNLOCK(unit) mtx_unlock(&AMD2IOMMU(unit)->lock)
+#define AMDIOMMU_ASSERT_LOCKED(unit) mtx_assert(&AMD2IOMMU(unit)->lock, \
+ MA_OWNED)
+
+#define AMDIOMMU_EVENT_LOCK(unit) mtx_lock_spin(&(unit)->event_lock)
+#define AMDIOMMU_EVENT_UNLOCK(unit) mtx_unlock_spin(&(unit)->event_lock)
+#define AMDIOMMU_EVENT_ASSERT_LOCKED(unit) \
+ mtx_assert(&(unit)->event_lock, MA_OWNED)
+
+#define DOM2IODOM(domain) (&((domain)->iodom))
+#define IODOM2DOM(domain) \
+ __containerof((domain), struct amdiommu_domain, iodom)
+
+#define CTX2IOCTX(ctx) (&((ctx)->context))
+#define IOCTX2CTX(ctx) \
+ __containerof((ctx), struct amdiommu_ctx, context)
+
+#define CTX2DOM(ctx) IODOM2DOM((ctx)->context.domain)
+#define CTX2AMD(ctx) (CTX2DOM(ctx)->unit)
+#define DOM2AMD(domain) ((domain)->unit)
+
+#define AMDIOMMU_DOMAIN_LOCK(dom) mtx_lock(&(dom)->iodom.lock)
+#define AMDIOMMU_DOMAIN_UNLOCK(dom) mtx_unlock(&(dom)->iodom.lock)
+#define AMDIOMMU_DOMAIN_ASSERT_LOCKED(dom) \
+ mtx_assert(&(dom)->iodom.lock, MA_OWNED)
+
+#define AMDIOMMU_DOMAIN_PGLOCK(dom) VM_OBJECT_WLOCK((dom)->pgtbl_obj)
+#define AMDIOMMU_DOMAIN_PGTRYLOCK(dom) VM_OBJECT_TRYWLOCK((dom)->pgtbl_obj)
+#define AMDIOMMU_DOMAIN_PGUNLOCK(dom) VM_OBJECT_WUNLOCK((dom)->pgtbl_obj)
+#define AMDIOMMU_DOMAIN_ASSERT_PGLOCKED(dom) \
+ VM_OBJECT_ASSERT_WLOCKED((dom)->pgtbl_obj)
+
+#define AMDIOMMU_RID 1001
+
+static inline uint32_t
+amdiommu_read4(const struct amdiommu_unit *unit, int reg)
+{
+
+ return (bus_read_4(unit->mmio_res, reg));
+}
+
+static inline uint64_t
+amdiommu_read8(const struct amdiommu_unit *unit, int reg)
+{
+#ifdef __i386__
+ uint32_t high, low;
+
+ low = bus_read_4(unit->mmio_res, reg);
+ high = bus_read_4(unit->mmio_res, reg + 4);
+ return (low | ((uint64_t)high << 32));
+#else
+ return (bus_read_8(unit->mmio_res, reg));
+#endif
+}
+
+static inline void
+amdiommu_write4(const struct amdiommu_unit *unit, int reg, uint32_t val)
+{
+ bus_write_4(unit->mmio_res, reg, val);
+}
+
+static inline void
+amdiommu_write8(const struct amdiommu_unit *unit, int reg, uint64_t val)
+{
+#ifdef __i386__
+ uint32_t high, low;
+
+ low = val;
+ high = val >> 32;
+ bus_write_4(unit->mmio_res, reg, low);
+ bus_write_4(unit->mmio_res, reg + 4, high);
+#else
+ bus_write_8(unit->mmio_res, reg, val);
+#endif
+}
+
+int amdiommu_find_unit(device_t dev, struct amdiommu_unit **unitp,
+ uint16_t *ridp, uint8_t *dtep, uint32_t *edtep, bool verbose);
+int amdiommu_find_unit_for_ioapic(int apic_id, struct amdiommu_unit **unitp,
+ uint16_t *ridp, uint8_t *dtep, uint32_t *edtep, bool verbose);
+int amdiommu_find_unit_for_hpet(device_t hpet, struct amdiommu_unit **unitp,
+ uint16_t *ridp, uint8_t *dtep, uint32_t *edtep, bool verbose);
+
+int amdiommu_init_cmd(struct amdiommu_unit *unit);
+void amdiommu_fini_cmd(struct amdiommu_unit *unit);
+
+void amdiommu_event_intr(struct amdiommu_unit *unit, uint64_t status);
+int amdiommu_init_event(struct amdiommu_unit *unit);
+void amdiommu_fini_event(struct amdiommu_unit *unit);
+
+int amdiommu_alloc_msi_intr(device_t src, u_int *cookies, u_int count);
+int amdiommu_map_msi_intr(device_t src, u_int cpu, u_int vector,
+ u_int cookie, uint64_t *addr, uint32_t *data);
+int amdiommu_unmap_msi_intr(device_t src, u_int cookie);
+int amdiommu_map_ioapic_intr(u_int ioapic_id, u_int cpu, u_int vector,
+ bool edge, bool activehi, int irq, u_int *cookie, uint32_t *hi,
+ uint32_t *lo);
+int amdiommu_unmap_ioapic_intr(u_int ioapic_id, u_int *cookie);
+int amdiommu_init_irt(struct amdiommu_unit *unit);
+void amdiommu_fini_irt(struct amdiommu_unit *unit);
+int amdiommu_ctx_init_irte(struct amdiommu_ctx *ctx);
+void amdiommu_ctx_fini_irte(struct amdiommu_ctx *ctx);
+
+void amdiommu_domain_unload_entry(struct iommu_map_entry *entry, bool free,
+ bool cansleep);
+void amdiommu_domain_unload(struct iommu_domain *iodom,
+ struct iommu_map_entries_tailq *entries, bool cansleep);
+struct amdiommu_ctx *amdiommu_get_ctx_for_dev(struct amdiommu_unit *unit,
+ device_t dev, uint16_t rid, int dev_domain, bool id_mapped,
+ bool rmrr_init, uint8_t dte, uint32_t edte);
+struct iommu_ctx *amdiommu_get_ctx(struct iommu_unit *iommu, device_t dev,
+ uint16_t rid, bool id_mapped, bool rmrr_init);
+struct amdiommu_ctx *amdiommu_find_ctx_locked(struct amdiommu_unit *unit,
+ uint16_t rid);
+void amdiommu_free_ctx_locked_method(struct iommu_unit *iommu,
+ struct iommu_ctx *context);
+void amdiommu_free_ctx_method(struct iommu_ctx *context);
+struct amdiommu_domain *amdiommu_find_domain(struct amdiommu_unit *unit,
+ uint16_t rid);
+
+void amdiommu_qi_invalidate_ctx_locked(struct amdiommu_ctx *ctx);
+void amdiommu_qi_invalidate_ctx_locked_nowait(struct amdiommu_ctx *ctx);
+void amdiommu_qi_invalidate_ir_locked(struct amdiommu_unit *unit,
+ uint16_t devid);
+void amdiommu_qi_invalidate_ir_locked_nowait(struct amdiommu_unit *unit,
+ uint16_t devid);
+void amdiommu_qi_invalidate_all_pages_locked_nowait(
+ struct amdiommu_domain *domain);
+void amdiommu_qi_invalidate_wait_sync(struct iommu_unit *iommu);
+
+int amdiommu_domain_alloc_pgtbl(struct amdiommu_domain *domain);
+void amdiommu_domain_free_pgtbl(struct amdiommu_domain *domain);
+extern const struct iommu_domain_map_ops amdiommu_domain_map_ops;
+
+#endif
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Nov 7, 6:37 PM (21 h, 13 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
14518571
Default Alt Text
D47256.diff (103 KB)
Attached To
Mode
D47256: AMD IOMMU driver
Attached
Detach File
Event Timeline
Log In to Comment