Page MenuHomeFreeBSD

D47256.diff
No OneTemporary

D47256.diff

diff --git a/sys/conf/files.x86 b/sys/conf/files.x86
--- a/sys/conf/files.x86
+++ b/sys/conf/files.x86
@@ -344,6 +344,12 @@
x86/cpufreq/hwpstate_intel.c optional cpufreq
x86/cpufreq/p4tcc.c optional cpufreq
x86/cpufreq/powernow.c optional cpufreq
+x86/iommu/amd_cmd.c optional acpi iommu pci
+x86/iommu/amd_ctx.c optional acpi iommu pci
+x86/iommu/amd_drv.c optional acpi iommu pci
+x86/iommu/amd_event.c optional acpi iommu pci
+x86/iommu/amd_idpgtbl.c optional acpi iommu pci
+x86/iommu/amd_intrmap.c optional acpi iommu pci
x86/iommu/intel_ctx.c optional acpi iommu pci
x86/iommu/intel_drv.c optional acpi iommu pci
x86/iommu/intel_fault.c optional acpi iommu pci
diff --git a/sys/x86/iommu/amd_cmd.c b/sys/x86/iommu/amd_cmd.c
new file mode 100644
--- /dev/null
+++ b/sys/x86/iommu/amd_cmd.c
@@ -0,0 +1,360 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 The FreeBSD Foundation
+ *
+ * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "opt_acpi.h"
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/memdesc.h>
+#include <sys/module.h>
+#include <sys/rman.h>
+#include <sys/taskqueue.h>
+#include <sys/time.h>
+#include <sys/tree.h>
+#include <sys/vmem.h>
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_page.h>
+#include <vm/vm_map.h>
+#include <contrib/dev/acpica/include/acpi.h>
+#include <contrib/dev/acpica/include/accommon.h>
+#include <dev/acpica/acpivar.h>
+#include <dev/pci/pcireg.h>
+#include <machine/bus.h>
+#include <machine/cpu.h>
+#include <x86/include/busdma_impl.h>
+#include <dev/iommu/busdma_iommu.h>
+#include <x86/iommu/amd_reg.h>
+#include <x86/iommu/x86_iommu.h>
+#include <x86/iommu/amd_iommu.h>
+
+static void
+amdiommu_enable_cmdbuf(struct amdiommu_unit *unit)
+{
+ AMDIOMMU_ASSERT_LOCKED(unit);
+
+ unit->hw_ctrl |= AMDIOMMU_CTRL_CMDBUF_EN;
+ amdiommu_write8(unit, AMDIOMMU_CTRL, unit->hw_ctrl);
+}
+
+static void
+amdiommu_disable_cmdbuf(struct amdiommu_unit *unit)
+{
+ AMDIOMMU_ASSERT_LOCKED(unit);
+
+ unit->hw_ctrl &= ~AMDIOMMU_CTRL_CMDBUF_EN;
+ amdiommu_write8(unit, AMDIOMMU_CTRL, unit->hw_ctrl);
+}
+
+
+static void
+amdiommu_enable_qi_intr(struct iommu_unit *iommu)
+{
+ struct amdiommu_unit *unit;
+
+ unit = IOMMU2AMD(iommu);
+ AMDIOMMU_ASSERT_LOCKED(unit);
+ unit->hw_ctrl |= AMDIOMMU_CTRL_COMWINT_EN;
+ amdiommu_write8(unit, AMDIOMMU_CTRL, unit->hw_ctrl);
+ amdiommu_write8(unit, AMDIOMMU_CMDEV_STATUS,
+ AMDIOMMU_CMDEVS_COMWAITINT);
+}
+
+static void
+amdiommu_disable_qi_intr(struct iommu_unit *iommu)
+{
+ struct amdiommu_unit *unit;
+
+ unit = IOMMU2AMD(iommu);
+ AMDIOMMU_ASSERT_LOCKED(unit);
+ unit->hw_ctrl &= ~AMDIOMMU_CTRL_COMWINT_EN;
+ amdiommu_write8(unit, AMDIOMMU_CTRL, unit->hw_ctrl);
+}
+
+static void
+amdiommu_cmd_advance_tail(struct iommu_unit *iommu)
+{
+ struct amdiommu_unit *unit;
+
+ unit = IOMMU2AMD(iommu);
+ AMDIOMMU_ASSERT_LOCKED(unit);
+ amdiommu_write8(unit, AMDIOMMU_CMDBUF_TAIL, unit->x86c.inv_queue_tail);
+}
+
+static void
+amdiommu_cmd_ensure(struct iommu_unit *iommu, int descr_count)
+{
+ struct amdiommu_unit *unit;
+ uint64_t head;
+ int bytes;
+
+ unit = IOMMU2AMD(iommu);
+ AMDIOMMU_ASSERT_LOCKED(unit);
+ bytes = descr_count << AMDIOMMU_CMD_SZ_SHIFT;
+ for (;;) {
+ if (bytes <= unit->x86c.inv_queue_avail)
+ break;
+ /* refill */
+ head = amdiommu_read8(unit, AMDIOMMU_CMDBUF_HEAD);
+ head &= AMDIOMMU_CMDPTR_MASK;
+ unit->x86c.inv_queue_avail = head - unit->x86c.inv_queue_tail -
+ AMDIOMMU_CMD_SZ;
+ if (head <= unit->x86c.inv_queue_tail)
+ unit->x86c.inv_queue_avail += unit->x86c.inv_queue_size;
+ if (bytes <= unit->x86c.inv_queue_avail)
+ break;
+
+ /*
+ * No space in the queue, do busy wait. Hardware must
+ * make a progress. But first advance the tail to
+ * inform the descriptor streamer about entries we
+ * might have already filled, otherwise they could
+ * clog the whole queue..
+ *
+ * See dmar_qi_invalidate_locked() for a discussion
+ * about data race prevention.
+ */
+ amdiommu_cmd_advance_tail(iommu);
+ unit->x86c.inv_queue_full++;
+ cpu_spinwait();
+ }
+ unit->x86c.inv_queue_avail -= bytes;
+}
+
+static void
+amdiommu_cmd_emit(struct amdiommu_unit *unit, const struct
+ amdiommu_cmd_generic *cmd)
+{
+ AMDIOMMU_ASSERT_LOCKED(unit);
+
+ memcpy(unit->x86c.inv_queue + unit->x86c.inv_queue_tail, cmd,
+ sizeof(*cmd));
+ unit->x86c.inv_queue_tail += AMDIOMMU_CMD_SZ;
+ KASSERT(unit->x86c.inv_queue_tail <= unit->x86c.inv_queue_size,
+ ("tail overflow 0x%x 0x%jx", unit->x86c.inv_queue_tail,
+ (uintmax_t)unit->x86c.inv_queue_size));
+ unit->x86c.inv_queue_tail &= unit->x86c.inv_queue_size - 1;
+}
+
+static void
+amdiommu_cmd_emit_wait_descr(struct iommu_unit *iommu, uint32_t seq,
+ bool intr, bool memw, bool fence)
+{
+ struct amdiommu_unit *unit;
+ struct amdiommu_cmd_completion_wait c;
+
+ unit = IOMMU2AMD(iommu);
+ AMDIOMMU_ASSERT_LOCKED(unit);
+
+ bzero(&c, sizeof(c));
+ c.op = AMDIOMMU_CMD_COMPLETION_WAIT;
+ if (memw) {
+ uint32_t x;
+
+ c.s = 1;
+ x = unit->x86c.inv_waitd_seq_hw_phys;
+ x >>= 3;
+ c.address0 = x;
+ x = unit->x86c.inv_waitd_seq_hw_phys >> 32;
+ c.address1 = x;
+ c.data0 = seq;
+ }
+ if (fence)
+ c.f = 1;
+ if (intr)
+ c.i = 1;
+ amdiommu_cmd_emit(unit, (struct amdiommu_cmd_generic *)&c);
+}
+
+static void
+amdiommu_qi_invalidate_emit(struct iommu_domain *adomain, iommu_gaddr_t base,
+ iommu_gaddr_t size, struct iommu_qi_genseq *pseq, bool emit_wait)
+{
+ struct amdiommu_domain *domain;
+ struct amdiommu_unit *unit;
+ struct amdiommu_cmd_invalidate_iommu_pages c;
+ u_int isize;
+
+ domain = IODOM2DOM(adomain);
+ unit = domain->unit;
+ AMDIOMMU_ASSERT_LOCKED(unit);
+ bzero(&c, sizeof(c));
+ c.op = AMDIOMMU_CMD_INVALIDATE_IOMMU_PAGES;
+ c.domainid = domain->domain;
+ isize = IOMMU_PAGE_SIZE; /* XXXKIB handle superpages */
+
+ for (; size > 0; base += isize, size -= isize) {
+ amdiommu_cmd_ensure(AMD2IOMMU(unit), 1);
+ c.s = 0;
+ c.pde = 1;
+ c.address = base >> IOMMU_PAGE_SHIFT;
+ amdiommu_cmd_emit(unit, (struct amdiommu_cmd_generic *)&c);
+ }
+ iommu_qi_emit_wait_seq(AMD2IOMMU(unit), pseq, emit_wait);
+}
+
+void
+amdiommu_qi_invalidate_all_pages_locked_nowait(struct amdiommu_domain *domain)
+{
+ struct amdiommu_unit *unit;
+ struct amdiommu_cmd_invalidate_iommu_pages c;
+
+ unit = domain->unit;
+ AMDIOMMU_ASSERT_LOCKED(unit);
+ bzero(&c, sizeof(c));
+ c.op = AMDIOMMU_CMD_INVALIDATE_IOMMU_PAGES;
+ c.domainid = domain->domain;
+
+ /*
+ * The magic specified in the note for INVALIDATE_IOMMU_PAGES
+ * description.
+ */
+ c.s = 1;
+ c.pde = 1;
+ c.address = 0x7ffffffffffff;
+
+ amdiommu_cmd_ensure(AMD2IOMMU(unit), 1);
+ amdiommu_cmd_emit(unit, (struct amdiommu_cmd_generic *)&c);
+}
+
+void
+amdiommu_qi_invalidate_wait_sync(struct iommu_unit *iommu)
+{
+ struct iommu_qi_genseq gseq;
+
+ amdiommu_cmd_ensure(iommu, 1);
+ iommu_qi_emit_wait_seq(iommu, &gseq, true);
+ IOMMU2AMD(iommu)->x86c.inv_seq_waiters++;
+ amdiommu_cmd_advance_tail(iommu);
+ iommu_qi_wait_for_seq(iommu, &gseq, true);
+}
+
+void
+amdiommu_qi_invalidate_ctx_locked_nowait(struct amdiommu_ctx *ctx)
+{
+ struct amdiommu_cmd_invalidate_devtab_entry c;
+
+ amdiommu_cmd_ensure(AMD2IOMMU(CTX2AMD(ctx)), 1);
+ bzero(&c, sizeof(c));
+ c.op = AMDIOMMU_CMD_INVALIDATE_DEVTAB_ENTRY;
+ c.devid = ctx->context.rid;
+ amdiommu_cmd_emit(CTX2AMD(ctx), (struct amdiommu_cmd_generic *)&c);
+}
+
+
+void
+amdiommu_qi_invalidate_ctx_locked(struct amdiommu_ctx *ctx)
+{
+ amdiommu_qi_invalidate_ctx_locked_nowait(ctx);
+ amdiommu_qi_invalidate_wait_sync(AMD2IOMMU(CTX2AMD(ctx)));
+}
+
+void
+amdiommu_qi_invalidate_ir_locked_nowait(struct amdiommu_unit *unit,
+ uint16_t devid)
+{
+ struct amdiommu_cmd_invalidate_interrupt_table c;
+
+ AMDIOMMU_ASSERT_LOCKED(unit);
+
+ amdiommu_cmd_ensure(AMD2IOMMU(unit), 1);
+ bzero(&c, sizeof(c));
+ c.op = AMDIOMMU_CMD_INVALIDATE_INTERRUPT_TABLE;
+ c.devid = devid;
+ amdiommu_cmd_emit(unit, (struct amdiommu_cmd_generic *)&c);
+}
+
+void
+amdiommu_qi_invalidate_ir_locked(struct amdiommu_unit *unit, uint16_t devid)
+{
+ amdiommu_qi_invalidate_ir_locked_nowait(unit, devid);
+ amdiommu_qi_invalidate_wait_sync(AMD2IOMMU(unit));
+}
+
+static void
+amdiommu_qi_task(void *arg, int pending __unused)
+{
+ struct amdiommu_unit *unit;
+
+ unit = IOMMU2AMD(arg);
+ iommu_qi_drain_tlb_flush(AMD2IOMMU(unit));
+
+ AMDIOMMU_LOCK(unit);
+ if (unit->x86c.inv_seq_waiters > 0)
+ wakeup(&unit->x86c.inv_seq_waiters);
+ AMDIOMMU_UNLOCK(unit);
+}
+
+int
+amdiommu_init_cmd(struct amdiommu_unit *unit)
+{
+ uint64_t qi_sz, rv;
+
+ unit->x86c.qi_buf_maxsz = ilog2(AMDIOMMU_CMDBUF_MAX / PAGE_SIZE);
+ unit->x86c.qi_cmd_sz = AMDIOMMU_CMD_SZ;
+ iommu_qi_common_init(AMD2IOMMU(unit), amdiommu_qi_task);
+ get_x86_iommu()->qi_ensure = amdiommu_cmd_ensure;
+ get_x86_iommu()->qi_emit_wait_descr = amdiommu_cmd_emit_wait_descr;
+ get_x86_iommu()->qi_advance_tail = amdiommu_cmd_advance_tail;
+ get_x86_iommu()->qi_invalidate_emit = amdiommu_qi_invalidate_emit;
+
+ rv = pmap_kextract((uintptr_t)unit->x86c.inv_queue);
+
+ /*
+ * See the description of the ComLen encoding for Command
+ * buffer Base Address Register.
+ */
+ qi_sz = ilog2(unit->x86c.inv_queue_size / PAGE_SIZE) + 8;
+ rv |= qi_sz << AMDIOMMU_CMDBUF_BASE_SZSHIFT;
+
+ AMDIOMMU_LOCK(unit);
+ amdiommu_write8(unit, AMDIOMMU_CMDBUF_BASE, rv);
+ amdiommu_enable_cmdbuf(unit);
+ amdiommu_enable_qi_intr(AMD2IOMMU(unit));
+ AMDIOMMU_UNLOCK(unit);
+
+ return (0);
+}
+
+static void
+amdiommu_fini_cmd_helper(struct iommu_unit *iommu)
+{
+ amdiommu_disable_cmdbuf(IOMMU2AMD(iommu));
+ amdiommu_disable_qi_intr(iommu);
+}
+
+void
+amdiommu_fini_cmd(struct amdiommu_unit *unit)
+{
+ iommu_qi_common_fini(AMD2IOMMU(unit), amdiommu_fini_cmd_helper);
+}
diff --git a/sys/x86/iommu/amd_ctx.c b/sys/x86/iommu/amd_ctx.c
new file mode 100644
--- /dev/null
+++ b/sys/x86/iommu/amd_ctx.c
@@ -0,0 +1,639 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 The FreeBSD Foundation
+ *
+ * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/bus.h>
+#include <sys/interrupt.h>
+#include <sys/kernel.h>
+#include <sys/ktr.h>
+#include <sys/limits.h>
+#include <sys/lock.h>
+#include <sys/memdesc.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/rwlock.h>
+#include <sys/rman.h>
+#include <sys/sysctl.h>
+#include <sys/taskqueue.h>
+#include <sys/tree.h>
+#include <sys/uio.h>
+#include <sys/vmem.h>
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pager.h>
+#include <vm/vm_map.h>
+#include <contrib/dev/acpica/include/acpi.h>
+#include <contrib/dev/acpica/include/accommon.h>
+#include <dev/pci/pcireg.h>
+#include <dev/pci/pcivar.h>
+#include <machine/atomic.h>
+#include <machine/bus.h>
+#include <machine/md_var.h>
+#include <machine/specialreg.h>
+#include <x86/include/busdma_impl.h>
+#include <dev/iommu/busdma_iommu.h>
+#include <x86/iommu/amd_reg.h>
+#include <x86/iommu/x86_iommu.h>
+#include <x86/iommu/amd_iommu.h>
+
+static MALLOC_DEFINE(M_AMDIOMMU_CTX, "amdiommu_ctx", "AMD IOMMU Context");
+static MALLOC_DEFINE(M_AMDIOMMU_DOMAIN, "amdiommu_dom", "AMD IOMMU Domain");
+
+static void amdiommu_unref_domain_locked(struct amdiommu_unit *unit,
+ struct amdiommu_domain *domain);
+
+static struct amdiommu_dte *
+amdiommu_get_dtep(struct amdiommu_ctx *ctx)
+{
+ return (&CTX2AMD(ctx)->dev_tbl[ctx->context.rid]);
+}
+
+void
+amdiommu_domain_unload_entry(struct iommu_map_entry *entry, bool free,
+ bool cansleep)
+{
+ struct amdiommu_domain *domain;
+ struct amdiommu_unit *unit;
+
+ domain = IODOM2DOM(entry->domain);
+ unit = DOM2AMD(domain);
+
+ /*
+ * If "free" is false, then the IOTLB invalidation must be performed
+ * synchronously. Otherwise, the caller might free the entry before
+ * dmar_qi_task() is finished processing it.
+ */
+ if (free) {
+ AMDIOMMU_LOCK(unit);
+ iommu_qi_invalidate_locked(&domain->iodom, entry, true);
+ AMDIOMMU_UNLOCK(unit);
+ } else {
+ iommu_qi_invalidate_sync(&domain->iodom, entry->start,
+ entry->end - entry->start, cansleep);
+ iommu_domain_free_entry(entry, false);
+ }
+}
+
+static bool
+amdiommu_domain_unload_emit_wait(struct amdiommu_domain *domain,
+ struct iommu_map_entry *entry)
+{
+ return (true); /* XXXKIB */
+}
+
+void
+amdiommu_domain_unload(struct iommu_domain *iodom,
+ struct iommu_map_entries_tailq *entries, bool cansleep)
+{
+ struct amdiommu_domain *domain;
+ struct amdiommu_unit *unit;
+ struct iommu_map_entry *entry, *entry1;
+ int error __diagused;
+
+ domain = IODOM2DOM(iodom);
+ unit = DOM2AMD(domain);
+
+ TAILQ_FOREACH_SAFE(entry, entries, dmamap_link, entry1) {
+ KASSERT((entry->flags & IOMMU_MAP_ENTRY_MAP) != 0,
+ ("not mapped entry %p %p", domain, entry));
+ error = iodom->ops->unmap(iodom, entry,
+ cansleep ? IOMMU_PGF_WAITOK : 0);
+ KASSERT(error == 0, ("unmap %p error %d", domain, error));
+ }
+ if (TAILQ_EMPTY(entries))
+ return;
+
+ AMDIOMMU_LOCK(unit);
+ while ((entry = TAILQ_FIRST(entries)) != NULL) {
+ TAILQ_REMOVE(entries, entry, dmamap_link);
+ iommu_qi_invalidate_locked(&domain->iodom, entry,
+ amdiommu_domain_unload_emit_wait(domain, entry));
+ }
+ AMDIOMMU_UNLOCK(unit);
+}
+
+static void
+amdiommu_domain_destroy(struct amdiommu_domain *domain)
+{
+ struct iommu_domain *iodom;
+ struct amdiommu_unit *unit;
+
+ iodom = DOM2IODOM(domain);
+
+ KASSERT(TAILQ_EMPTY(&domain->iodom.unload_entries),
+ ("unfinished unloads %p", domain));
+ KASSERT(LIST_EMPTY(&iodom->contexts),
+ ("destroying dom %p with contexts", domain));
+ KASSERT(domain->ctx_cnt == 0,
+ ("destroying dom %p with ctx_cnt %d", domain, domain->ctx_cnt));
+ KASSERT(domain->refs == 0,
+ ("destroying dom %p with refs %d", domain, domain->refs));
+
+ if ((domain->iodom.flags & IOMMU_DOMAIN_GAS_INITED) != 0) {
+ AMDIOMMU_DOMAIN_LOCK(domain);
+ iommu_gas_fini_domain(iodom);
+ AMDIOMMU_DOMAIN_UNLOCK(domain);
+ }
+ if ((domain->iodom.flags & IOMMU_DOMAIN_PGTBL_INITED) != 0) {
+ if (domain->pgtbl_obj != NULL)
+ AMDIOMMU_DOMAIN_PGLOCK(domain);
+ amdiommu_domain_free_pgtbl(domain);
+ }
+ iommu_domain_fini(iodom);
+ unit = DOM2AMD(domain);
+ free_unr(unit->domids, domain->domain);
+ free(domain, M_AMDIOMMU_DOMAIN);
+}
+
+static iommu_gaddr_t
+lvl2addr(int lvl)
+{
+ int x;
+
+ x = IOMMU_PAGE_SHIFT + IOMMU_NPTEPGSHIFT * lvl;
+ /* Level 6 has only 8 bits for page table index */
+ if (x >= NBBY * sizeof(uint64_t))
+ return (-1ull);
+ return (1ull < (1ull << x));
+}
+
+static void
+amdiommu_domain_init_pglvl(struct amdiommu_unit *unit,
+ struct amdiommu_domain *domain)
+{
+ iommu_gaddr_t end;
+ int hats, i;
+ uint64_t efr_hats;
+
+ end = DOM2IODOM(domain)->end;
+ for (i = AMDIOMMU_PGTBL_MAXLVL; i > 1; i--) {
+ if (lvl2addr(i) >= end && lvl2addr(i - 1) < end)
+ break;
+ }
+ domain->pglvl = i;
+
+ efr_hats = unit->efr & AMDIOMMU_EFR_HATS_MASK;
+ switch (efr_hats) {
+ case AMDIOMMU_EFR_HATS_6LVL:
+ hats = 6;
+ break;
+ case AMDIOMMU_EFR_HATS_5LVL:
+ hats = 5;
+ break;
+ case AMDIOMMU_EFR_HATS_4LVL:
+ hats = 4;
+ break;
+ default:
+ printf("amdiommu%d: HATS %#jx (reserved) ignoring\n",
+ unit->iommu.unit, (uintmax_t)efr_hats);
+ return;
+ }
+ if (hats >= domain->pglvl)
+ return;
+
+ printf("amdiommu%d: domain %d HATS %d pglvl %d reducing to HATS\n",
+ unit->iommu.unit, domain->domain, hats, domain->pglvl);
+ domain->pglvl = hats;
+ domain->iodom.end = lvl2addr(hats);
+}
+
+static struct amdiommu_domain *
+amdiommu_domain_alloc(struct amdiommu_unit *unit, bool id_mapped)
+{
+ struct amdiommu_domain *domain;
+ struct iommu_domain *iodom;
+ int error, id;
+
+ id = alloc_unr(unit->domids);
+ if (id == -1)
+ return (NULL);
+ domain = malloc(sizeof(*domain), M_AMDIOMMU_DOMAIN, M_WAITOK | M_ZERO);
+ iodom = DOM2IODOM(domain);
+ domain->domain = id;
+ LIST_INIT(&iodom->contexts);
+ iommu_domain_init(AMD2IOMMU(unit), iodom, &amdiommu_domain_map_ops);
+
+ domain->unit = unit;
+
+ domain->iodom.end = id_mapped ? ptoa(Maxmem) : BUS_SPACE_MAXADDR;
+ amdiommu_domain_init_pglvl(unit, domain);
+ iommu_gas_init_domain(DOM2IODOM(domain));
+
+ if (id_mapped) {
+ domain->iodom.flags |= IOMMU_DOMAIN_IDMAP;
+ } else {
+ error = amdiommu_domain_alloc_pgtbl(domain);
+ if (error != 0)
+ goto fail;
+ /* Disable local apic region access */
+ error = iommu_gas_reserve_region(iodom, 0xfee00000,
+ 0xfeefffff + 1, &iodom->msi_entry);
+ if (error != 0)
+ goto fail;
+ }
+
+ return (domain);
+
+fail:
+ amdiommu_domain_destroy(domain);
+ return (NULL);
+}
+
+static struct amdiommu_ctx *
+amdiommu_ctx_alloc(struct amdiommu_domain *domain, uint16_t rid)
+{
+ struct amdiommu_ctx *ctx;
+
+ ctx = malloc(sizeof(*ctx), M_AMDIOMMU_CTX, M_WAITOK | M_ZERO);
+ ctx->context.domain = DOM2IODOM(domain);
+ ctx->context.tag = malloc(sizeof(struct bus_dma_tag_iommu),
+ M_AMDIOMMU_CTX, M_WAITOK | M_ZERO);
+ ctx->context.rid = rid;
+ ctx->context.refs = 1;
+ return (ctx);
+}
+
+static void
+amdiommu_ctx_link(struct amdiommu_ctx *ctx)
+{
+ struct amdiommu_domain *domain;
+
+ domain = CTX2DOM(ctx);
+ IOMMU_ASSERT_LOCKED(domain->iodom.iommu);
+ KASSERT(domain->refs >= domain->ctx_cnt,
+ ("dom %p ref underflow %d %d", domain, domain->refs,
+ domain->ctx_cnt));
+ domain->refs++;
+ domain->ctx_cnt++;
+ LIST_INSERT_HEAD(&domain->iodom.contexts, &ctx->context, link);
+}
+
+static void
+amdiommu_ctx_unlink(struct amdiommu_ctx *ctx)
+{
+ struct amdiommu_domain *domain;
+
+ domain = CTX2DOM(ctx);
+ IOMMU_ASSERT_LOCKED(domain->iodom.iommu);
+ KASSERT(domain->refs > 0,
+ ("domain %p ctx dtr refs %d", domain, domain->refs));
+ KASSERT(domain->ctx_cnt >= domain->refs,
+ ("domain %p ctx dtr refs %d ctx_cnt %d", domain,
+ domain->refs, domain->ctx_cnt));
+ domain->refs--;
+ domain->ctx_cnt--;
+ LIST_REMOVE(&ctx->context, link);
+}
+
+struct amdiommu_ctx *
+amdiommu_find_ctx_locked(struct amdiommu_unit *unit, uint16_t rid)
+{
+ struct amdiommu_domain *domain;
+ struct iommu_ctx *ctx;
+
+ AMDIOMMU_ASSERT_LOCKED(unit);
+
+ LIST_FOREACH(domain, &unit->domains, link) {
+ LIST_FOREACH(ctx, &domain->iodom.contexts, link) {
+ if (ctx->rid == rid)
+ return (IOCTX2CTX(ctx));
+ }
+ }
+ return (NULL);
+}
+
+struct amdiommu_domain *
+amdiommu_find_domain(struct amdiommu_unit *unit, uint16_t rid)
+{
+ struct amdiommu_domain *domain;
+ struct iommu_ctx *ctx;
+
+ AMDIOMMU_LOCK(unit);
+ LIST_FOREACH(domain, &unit->domains, link) {
+ LIST_FOREACH(ctx, &domain->iodom.contexts, link) {
+ if (ctx->rid == rid)
+ break;
+ }
+ }
+ AMDIOMMU_UNLOCK(unit);
+ return (domain);
+}
+
+static void
+amdiommu_free_ctx_locked(struct amdiommu_unit *unit, struct amdiommu_ctx *ctx)
+{
+ struct amdiommu_dte *dtep;
+ struct amdiommu_domain *domain;
+
+ AMDIOMMU_ASSERT_LOCKED(unit);
+ KASSERT(ctx->context.refs >= 1,
+ ("amdiommu %p ctx %p refs %u", unit, ctx, ctx->context.refs));
+
+ /*
+ * If our reference is not last, only the dereference should
+ * be performed.
+ */
+ if (ctx->context.refs > 1) {
+ ctx->context.refs--;
+ AMDIOMMU_UNLOCK(unit);
+ return;
+ }
+
+ KASSERT((ctx->context.flags & IOMMU_CTX_DISABLED) == 0,
+ ("lost ref on disabled ctx %p", ctx));
+
+ /*
+ * Otherwise, the device table entry must be cleared before
+ * the page table is destroyed.
+ */
+ dtep = amdiommu_get_dtep(ctx);
+ dtep->v = 0;
+ atomic_thread_fence_rel();
+ memset(dtep, 0, sizeof(*dtep));
+
+ domain = CTX2DOM(ctx);
+ amdiommu_qi_invalidate_ctx_locked_nowait(ctx);
+ amdiommu_qi_invalidate_ir_locked_nowait(unit, ctx->context.rid);
+ amdiommu_qi_invalidate_all_pages_locked_nowait(domain);
+ amdiommu_qi_invalidate_wait_sync(AMD2IOMMU(CTX2AMD(ctx)));
+
+ if (unit->irte_enabled)
+ amdiommu_ctx_fini_irte(ctx);
+
+ amdiommu_ctx_unlink(ctx);
+ free(ctx->context.tag, M_AMDIOMMU_CTX);
+ free(ctx, M_AMDIOMMU_CTX);
+ amdiommu_unref_domain_locked(unit, domain);
+}
+
+static void
+amdiommu_free_ctx(struct amdiommu_ctx *ctx)
+{
+ struct amdiommu_unit *unit;
+
+ unit = CTX2AMD(ctx);
+ AMDIOMMU_LOCK(unit);
+ amdiommu_free_ctx_locked(unit, ctx);
+}
+
+static void
+amdiommu_unref_domain_locked(struct amdiommu_unit *unit,
+ struct amdiommu_domain *domain)
+{
+ AMDIOMMU_ASSERT_LOCKED(unit);
+ KASSERT(domain->refs >= 1,
+ ("amdiommu%d domain %p refs %u", unit->iommu.unit, domain,
+ domain->refs));
+ KASSERT(domain->refs > domain->ctx_cnt,
+ ("amdiommu%d domain %p refs %d ctx_cnt %d", unit->iommu.unit,
+ domain, domain->refs, domain->ctx_cnt));
+
+ if (domain->refs > 1) {
+ domain->refs--;
+ AMDIOMMU_UNLOCK(unit);
+ return;
+ }
+
+ LIST_REMOVE(domain, link);
+ AMDIOMMU_UNLOCK(unit);
+
+ taskqueue_drain(unit->iommu.delayed_taskqueue,
+ &domain->iodom.unload_task);
+ amdiommu_domain_destroy(domain);
+}
+
+static void
+dte_entry_init_one(struct amdiommu_dte *dtep, struct amdiommu_ctx *ctx,
+ vm_page_t pgtblr, uint8_t dte, uint32_t edte)
+{
+ struct amdiommu_domain *domain;
+ struct amdiommu_unit *unit;
+
+ domain = CTX2DOM(ctx);
+ unit = DOM2AMD(domain);
+
+ dtep->tv = 1;
+ /* dtep->had not used for now */
+ dtep->ir = 1;
+ dtep->iw = 1;
+ dtep->domainid = domain->domain;
+ dtep->pioctl = AMDIOMMU_DTE_PIOCTL_DIS;
+
+ /* fill device interrupt passing hints from IVHD. */
+ dtep->initpass = (dte & ACPI_IVHD_INIT_PASS) != 0;
+ dtep->eintpass = (dte & ACPI_IVHD_EINT_PASS) != 0;
+ dtep->nmipass = (dte & ACPI_IVHD_NMI_PASS) != 0;
+ dtep->sysmgt = (dte & ACPI_IVHD_SYSTEM_MGMT) >> 4;
+ dtep->lint0pass = (dte & ACPI_IVHD_LINT0_PASS) != 0;
+ dtep->lint1pass = (dte & ACPI_IVHD_LINT1_PASS) != 0;
+
+ if (unit->irte_enabled) {
+ dtep->iv = 1;
+ dtep->i = 0;
+ dtep->inttablen = ilog2(unit->irte_nentries);
+ dtep->intrroot = pmap_kextract(unit->irte_x2apic ?
+ (vm_offset_t)ctx->irtx2 :
+ (vm_offset_t)ctx->irtb) >> 6;
+
+ dtep->intctl = AMDIOMMU_DTE_INTCTL_MAP;
+ }
+
+ if ((DOM2IODOM(domain)->flags & IOMMU_DOMAIN_IDMAP) != 0) {
+ dtep->pgmode = AMDIOMMU_DTE_PGMODE_1T1;
+ } else {
+ MPASS(domain->pglvl > 0 && domain->pglvl <=
+ AMDIOMMU_PGTBL_MAXLVL);
+ dtep->pgmode = domain->pglvl;
+ dtep->ptroot = VM_PAGE_TO_PHYS(pgtblr) >> 12;
+ }
+
+ atomic_thread_fence_rel();
+ dtep->v = 1;
+}
+
+static void
+dte_entry_init(struct amdiommu_ctx *ctx, bool move, uint8_t dte, uint32_t edte)
+{
+ struct amdiommu_dte *dtep;
+ struct amdiommu_unit *unit;
+ struct amdiommu_domain *domain;
+ int i;
+
+ domain = CTX2DOM(ctx);
+ unit = DOM2AMD(domain);
+
+ dtep = amdiommu_get_dtep(ctx);
+ KASSERT(dtep->v == 0,
+ ("amdiommu%d initializing valid dte @%p %#jx",
+ CTX2AMD(ctx)->iommu.unit, dtep, (uintmax_t)(*(uint64_t *)dtep)));
+
+ if (iommu_is_buswide_ctx(AMD2IOMMU(unit),
+ PCI_RID2BUS(ctx->context.rid))) {
+ MPASS(!move);
+ for (i = 0; i <= PCI_BUSMAX; i++) {
+ dte_entry_init_one(&dtep[i], ctx, domain->pgtblr,
+ dte, edte);
+ }
+ } else {
+ dte_entry_init_one(dtep, ctx, domain->pgtblr, dte, edte);
+ }
+}
+
+struct amdiommu_ctx *
+amdiommu_get_ctx_for_dev(struct amdiommu_unit *unit, device_t dev, uint16_t rid,
+ int dev_domain, bool id_mapped, bool rmrr_init, uint8_t dte, uint32_t edte)
+{
+ struct amdiommu_domain *domain, *domain1;
+ struct amdiommu_ctx *ctx, *ctx1;
+ int bus, slot, func;
+
+ if (dev != NULL) {
+ bus = pci_get_bus(dev);
+ slot = pci_get_slot(dev);
+ func = pci_get_function(dev);
+ } else {
+ bus = PCI_RID2BUS(rid);
+ slot = PCI_RID2SLOT(rid);
+ func = PCI_RID2FUNC(rid);
+ }
+ AMDIOMMU_LOCK(unit);
+ KASSERT(!iommu_is_buswide_ctx(AMD2IOMMU(unit), bus) ||
+ (slot == 0 && func == 0),
+ ("iommu%d pci%d:%d:%d get_ctx for buswide", AMD2IOMMU(unit)->unit,
+ bus, slot, func));
+ ctx = amdiommu_find_ctx_locked(unit, rid);
+ if (ctx == NULL) {
+ /*
+ * Perform the allocations which require sleep or have
+ * higher chance to succeed if the sleep is allowed.
+ */
+ AMDIOMMU_UNLOCK(unit);
+ domain1 = amdiommu_domain_alloc(unit, id_mapped);
+ if (domain1 == NULL)
+ return (NULL);
+ if (!id_mapped) {
+ /*
+ * XXXKIB IVMD seems to be less significant
+ * and less used on AMD than RMRR on Intel.
+ * Not implemented for now.
+ */
+ }
+ ctx1 = amdiommu_ctx_alloc(domain1, rid);
+ amdiommu_ctx_init_irte(ctx1);
+ AMDIOMMU_LOCK(unit);
+
+ /*
+ * Recheck the contexts, other thread might have
+ * already allocated needed one.
+ */
+ ctx = amdiommu_find_ctx_locked(unit, rid);
+ if (ctx == NULL) {
+ domain = domain1;
+ ctx = ctx1;
+ amdiommu_ctx_link(ctx);
+ ctx->context.tag->owner = dev;
+ iommu_device_tag_init(CTX2IOCTX(ctx), dev);
+
+ LIST_INSERT_HEAD(&unit->domains, domain, link);
+ dte_entry_init(ctx, false, dte, edte);
+ amdiommu_qi_invalidate_ctx_locked(ctx);
+ if (dev != NULL) {
+ device_printf(dev,
+ "amdiommu%d pci%d:%d:%d:%d rid %x domain %d "
+ "%s-mapped\n",
+ AMD2IOMMU(unit)->unit, unit->unit_dom,
+ bus, slot, func, rid, domain->domain,
+ id_mapped ? "id" : "re");
+ }
+ } else {
+ amdiommu_domain_destroy(domain1);
+ /* Nothing needs to be done to destroy ctx1. */
+ free(ctx1, M_AMDIOMMU_CTX);
+ domain = CTX2DOM(ctx);
+ ctx->context.refs++; /* tag referenced us */
+ }
+ } else {
+ domain = CTX2DOM(ctx);
+ if (ctx->context.tag->owner == NULL)
+ ctx->context.tag->owner = dev;
+ ctx->context.refs++; /* tag referenced us */
+ }
+ AMDIOMMU_UNLOCK(unit);
+
+ return (ctx);
+}
+
+struct iommu_ctx *
+amdiommu_get_ctx(struct iommu_unit *iommu, device_t dev, uint16_t rid,
+ bool id_mapped, bool rmrr_init)
+{
+ struct amdiommu_unit *unit;
+ struct amdiommu_ctx *ret;
+ int error;
+ uint32_t edte;
+ uint16_t rid1;
+ uint8_t dte;
+
+ error = amdiommu_find_unit(dev, &unit, &rid1, &dte, &edte,
+ bootverbose);
+ if (error != 0)
+ return (NULL);
+ if (AMD2IOMMU(unit) != iommu) /* XXX complain loudly */
+ return (NULL);
+ ret = amdiommu_get_ctx_for_dev(unit, dev, rid1, pci_get_domain(dev),
+ id_mapped, rmrr_init, dte, edte);
+ return (CTX2IOCTX(ret));
+}
+
+void
+amdiommu_free_ctx_locked_method(struct iommu_unit *iommu,
+ struct iommu_ctx *context)
+{
+ struct amdiommu_unit *unit;
+ struct amdiommu_ctx *ctx;
+
+ unit = IOMMU2AMD(iommu);
+ ctx = IOCTX2CTX(context);
+ amdiommu_free_ctx_locked(unit, ctx);
+}
+
+void
+amdiommu_free_ctx_method(struct iommu_ctx *context)
+{
+ struct amdiommu_ctx *ctx;
+
+ ctx = IOCTX2CTX(context);
+ amdiommu_free_ctx(ctx);
+}
diff --git a/sys/x86/iommu/amd_drv.c b/sys/x86/iommu/amd_drv.c
new file mode 100644
--- /dev/null
+++ b/sys/x86/iommu/amd_drv.c
@@ -0,0 +1,1205 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 The FreeBSD Foundation
+ *
+ * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "opt_acpi.h"
+#include "opt_ddb.h"
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/domainset.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/memdesc.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/rman.h>
+#include <sys/rwlock.h>
+#include <sys/smp.h>
+#include <sys/taskqueue.h>
+#include <sys/tree.h>
+#include <sys/vmem.h>
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pageout.h>
+#include <vm/vm_pager.h>
+#include <contrib/dev/acpica/include/acpi.h>
+#include <contrib/dev/acpica/include/accommon.h>
+#include <dev/acpica/acpivar.h>
+#include <dev/pci/pcireg.h>
+#include <dev/pci/pcivar.h>
+#include <machine/bus.h>
+#include <machine/pci_cfgreg.h>
+#include "pcib_if.h"
+#include <machine/intr_machdep.h>
+#include <machine/md_var.h>
+#include <machine/cputypes.h>
+#include <x86/apicreg.h>
+#include <x86/apicvar.h>
+#include <dev/iommu/iommu.h>
+#include <x86/iommu/amd_reg.h>
+#include <x86/iommu/x86_iommu.h>
+#include <x86/iommu/amd_iommu.h>
+
+static int amdiommu_enable = 0;
+
+/*
+ * All enumerated AMD IOMMU units.
+ * Access is unlocked, the list is not modified after early
+ * single-threaded startup.
+ */
+static TAILQ_HEAD(, amdiommu_unit) amdiommu_units =
+ TAILQ_HEAD_INITIALIZER(amdiommu_units);
+
+static u_int
+ivrs_info_to_unit_id(UINT32 info)
+{
+ return ((info & ACPI_IVHD_UNIT_ID_MASK) >> 8);
+}
+
+typedef bool (*amdiommu_itercc_t)(void *, void *);
+typedef bool (*amdiommu_iter40_t)(ACPI_IVRS_HARDWARE2 *, void *);
+typedef bool (*amdiommu_iter11_t)(ACPI_IVRS_HARDWARE2 *, void *);
+typedef bool (*amdiommu_iter10_t)(ACPI_IVRS_HARDWARE1 *, void *);
+
+static bool
+amdiommu_ivrs_iterate_tbl_typed(amdiommu_itercc_t iter, void *arg,
+ int type, ACPI_TABLE_IVRS *ivrs_tbl)
+{
+ char *ptr, *ptrend;
+ bool done;
+
+ done = false;
+ ptr = (char *)ivrs_tbl + sizeof(*ivrs_tbl);
+ ptrend = (char *)ivrs_tbl + ivrs_tbl->Header.Length;
+ for (;;) {
+ ACPI_IVRS_HEADER *ivrsh;
+
+ if (ptr >= ptrend)
+ break;
+ ivrsh = (ACPI_IVRS_HEADER *)ptr;
+ if (ivrsh->Length <= 0) {
+ printf("amdiommu_iterate_tbl: corrupted IVRS table, "
+ "length %d\n", ivrsh->Length);
+ break;
+ }
+ ptr += ivrsh->Length;
+ if (ivrsh->Type == type) {
+ done = iter((void *)ivrsh, arg);
+ if (done)
+ break;
+ }
+ }
+ return (done);
+}
+
+/*
+ * Walk over IVRS, calling callback iterators following priority:
+ * 0x40, then 0x11, then 0x10 subtable. First iterator returning true
+ * ends the walk.
+ * Returns true if any iterator returned true, otherwise false.
+ */
+static bool
+amdiommu_ivrs_iterate_tbl(amdiommu_iter40_t iter40, amdiommu_iter11_t iter11,
+ amdiommu_iter10_t iter10, void *arg)
+{
+ ACPI_TABLE_IVRS *ivrs_tbl;
+ ACPI_STATUS status;
+ bool done;
+
+ status = AcpiGetTable(ACPI_SIG_IVRS, 1,
+ (ACPI_TABLE_HEADER **)&ivrs_tbl);
+ if (ACPI_FAILURE(status))
+ return (false);
+ done = false;
+ if (iter40 != NULL)
+ done = amdiommu_ivrs_iterate_tbl_typed(
+ (amdiommu_itercc_t)iter40, arg,
+ ACPI_IVRS_TYPE_HARDWARE3, ivrs_tbl);
+ if (!done && iter11 != NULL)
+ done = amdiommu_ivrs_iterate_tbl_typed(
+ (amdiommu_itercc_t)iter11, arg, ACPI_IVRS_TYPE_HARDWARE2,
+ ivrs_tbl);
+ if (!done && iter10 != NULL)
+ done = amdiommu_ivrs_iterate_tbl_typed(
+ (amdiommu_itercc_t)iter10, arg, ACPI_IVRS_TYPE_HARDWARE1,
+ ivrs_tbl);
+ AcpiPutTable((ACPI_TABLE_HEADER *)ivrs_tbl);
+ return (done);
+}
+
+struct ivhd_lookup_data {
+ struct amdiommu_unit *sc;
+ uint16_t devid;
+};
+
+static bool
+ivrs_lookup_ivhd_0x40(ACPI_IVRS_HARDWARE2 *h2, void *arg)
+{
+ struct ivhd_lookup_data *ildp;
+
+ KASSERT(h2->Header.Type == ACPI_IVRS_TYPE_HARDWARE2 ||
+ h2->Header.Type == ACPI_IVRS_TYPE_HARDWARE3,
+ ("Misparsed IVHD, h2 type %#x", h2->Header.Type));
+
+ ildp = arg;
+ if (h2->Header.DeviceId != ildp->devid)
+ return (false);
+
+ ildp->sc->unit_dom = h2->PciSegmentGroup;
+ ildp->sc->iommu.unit = ivrs_info_to_unit_id(h2->Info);
+ ildp->sc->efr = h2->EfrRegisterImage;
+ return (true);
+}
+
+static bool
+ivrs_lookup_ivhd_0x10(ACPI_IVRS_HARDWARE1 *h1, void *arg)
+{
+ struct ivhd_lookup_data *ildp;
+
+ KASSERT(h1->Header.Type == ACPI_IVRS_TYPE_HARDWARE1,
+ ("Misparsed IVHD, h1 type %#x", h1->Header.Type));
+
+ ildp = arg;
+ if (h1->Header.DeviceId != ildp->devid)
+ return (false);
+
+ ildp->sc->unit_dom = h1->PciSegmentGroup;
+ ildp->sc->iommu.unit = ivrs_info_to_unit_id(h1->Info);
+ return (true);
+}
+
+static u_int
+amdiommu_devtbl_sz(struct amdiommu_unit *sc __unused)
+{
+ return (sizeof(struct amdiommu_dte) * (1 << 16));
+}
+
+static void
+amdiommu_free_dev_tbl(struct amdiommu_unit *sc)
+{
+ u_int devtbl_sz;
+
+ devtbl_sz = amdiommu_devtbl_sz(sc);
+ pmap_qremove((vm_offset_t)sc->dev_tbl, atop(devtbl_sz));
+ kva_free((vm_offset_t)sc->dev_tbl, devtbl_sz);
+ sc->dev_tbl = NULL;
+ vm_object_deallocate(sc->devtbl_obj);
+ sc->devtbl_obj = NULL;
+}
+
+static int
+amdiommu_create_dev_tbl(struct amdiommu_unit *sc)
+{
+ vm_offset_t seg_vaddr;
+ u_int devtbl_sz, dom, i, reclaimno, segnum_log, segnum, seg_sz;
+ int error;
+
+ segnum_log = (sc->efr & AMDIOMMU_EFR_DEVTBLSEG_MASK) >>
+ AMDIOMMU_EFR_DEVTBLSEG_SHIFT;
+ segnum = 1 << segnum_log;
+
+ devtbl_sz = amdiommu_devtbl_sz(sc);
+ seg_sz = devtbl_sz / segnum;
+ sc->devtbl_obj = vm_pager_allocate(OBJT_PHYS, NULL, atop(devtbl_sz),
+ VM_PROT_ALL, 0, NULL);
+ if (bus_get_domain(sc->iommu.dev, &dom) == 0)
+ sc->devtbl_obj->domain.dr_policy = DOMAINSET_PREF(dom);
+
+ sc->hw_ctrl &= ~AMDIOMMU_CTRL_DEVTABSEG_MASK;
+ sc->hw_ctrl |= (uint64_t)segnum_log << ilog2(AMDIOMMU_CTRL_DEVTABSEG_2);
+ sc->hw_ctrl |= AMDIOMMU_CTRL_COHERENT;
+ amdiommu_write8(sc, AMDIOMMU_CTRL, sc->hw_ctrl);
+
+ seg_vaddr = kva_alloc(devtbl_sz);
+ if (seg_vaddr == 0)
+ return (ENOMEM);
+ sc->dev_tbl = (void *)seg_vaddr;
+
+ for (i = 0; i < segnum; i++) {
+ vm_page_t m;
+ uint64_t rval;
+ u_int reg;
+
+ for (reclaimno = 0; reclaimno < 3; reclaimno++) {
+ VM_OBJECT_WLOCK(sc->devtbl_obj);
+ m = vm_page_alloc_contig(sc->devtbl_obj,
+ i * atop(seg_sz),
+ VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY,
+ atop(seg_sz), 0, ~0ul, IOMMU_PAGE_SIZE, 0,
+ VM_MEMATTR_DEFAULT);
+ VM_OBJECT_WUNLOCK(sc->devtbl_obj);
+ if (m != NULL)
+ break;
+ error = vm_page_reclaim_contig(VM_ALLOC_NORMAL,
+ atop(seg_sz), 0, ~0ul, IOMMU_PAGE_SIZE, 0);
+ if (error != 0)
+ vm_wait(sc->devtbl_obj);
+ }
+ if (m == NULL) {
+ amdiommu_free_dev_tbl(sc);
+ return (ENOMEM);
+ }
+
+ rval = VM_PAGE_TO_PHYS(m) | (atop(seg_sz) - 1);
+ for (u_int j = 0; j < atop(seg_sz);
+ j++, seg_vaddr += PAGE_SIZE, m++) {
+ pmap_zero_page(m);
+ pmap_qenter(seg_vaddr, &m, 1);
+ }
+ reg = i == 0 ? AMDIOMMU_DEVTAB_BASE : AMDIOMMU_DEVTAB_S1_BASE +
+ i - 1;
+ amdiommu_write8(sc, reg, rval);
+ }
+
+ return (0);
+}
+
+static int
+amdiommu_cmd_event_intr(void *arg)
+{
+ struct amdiommu_unit *unit;
+ uint64_t status;
+
+ unit = arg;
+ status = amdiommu_read8(unit, AMDIOMMU_CMDEV_STATUS);
+ if ((status & AMDIOMMU_CMDEVS_COMWAITINT) != 0) {
+ amdiommu_write8(unit, AMDIOMMU_CMDEV_STATUS,
+ AMDIOMMU_CMDEVS_COMWAITINT);
+ taskqueue_enqueue(unit->x86c.qi_taskqueue,
+ &unit->x86c.qi_task);
+ }
+ if ((status & (AMDIOMMU_CMDEVS_EVLOGINT |
+ AMDIOMMU_CMDEVS_EVOVRFLW)) != 0)
+ amdiommu_event_intr(unit, status);
+ return (FILTER_HANDLED);
+}
+
+static int
+amdiommu_setup_intr(struct amdiommu_unit *sc)
+{
+ int error, msi_count, msix_count;
+
+ msi_count = pci_msi_count(sc->iommu.dev);
+ msix_count = pci_msix_count(sc->iommu.dev);
+ if (msi_count == 0 && msix_count == 0) {
+ device_printf(sc->iommu.dev, "needs MSI-class intr\n");
+ return (ENXIO);
+ }
+
+#if 0
+ /*
+ * XXXKIB how MSI-X is supposed to be organized for BAR-less
+ * function? Practically available hardware implements only
+ * one IOMMU unit per function, and uses MSI.
+ */
+ if (msix_count > 0) {
+ sc->msix_table = bus_alloc_resource_any(sc->iommu.dev,
+ SYS_RES_MEMORY, &sc->msix_tab_rid, RF_ACTIVE);
+ if (sc->msix_table == NULL)
+ return (ENXIO);
+
+ if (sc->msix_pba_rid != sc->msix_tab_rid) {
+ /* Separate BAR for PBA */
+ sc->msix_pba = bus_alloc_resource_any(sc->iommu.dev,
+ SYS_RES_MEMORY,
+ &sc->msix_pba_rid, RF_ACTIVE);
+ if (sc->msix_pba == NULL) {
+ bus_release_resource(sc->iommu.dev,
+ SYS_RES_MEMORY, &sc->msix_tab_rid,
+ sc->msix_table);
+ return (ENXIO);
+ }
+ }
+ }
+#endif
+
+ error = ENXIO;
+ if (msix_count > 0) {
+ error = pci_alloc_msix(sc->iommu.dev, &msix_count);
+ if (error == 0)
+ sc->numirqs = msix_count;
+ }
+ if (error != 0 && msi_count > 0) {
+ error = pci_alloc_msi(sc->iommu.dev, &msi_count);
+ if (error == 0)
+ sc->numirqs = msi_count;
+ }
+ if (error != 0) {
+ device_printf(sc->iommu.dev,
+ "Failed to allocate MSI/MSI-x (%d)\n", error);
+ return (ENXIO);
+ }
+
+ /*
+ * XXXKIB Spec states that MISC0.MsiNum must be zero for IOMMU
+ * using MSI interrupts. But at least one BIOS programmed '2'
+ * there, making driver use wrong rid and causing
+ * command/event interrupt ignored as stray. Try to fix it
+ * with dirty force by assuming MsiNum is zero for MSI.
+ */
+ sc->irq_cmdev_rid = 1;
+ if (msix_count > 0) {
+ sc->irq_cmdev_rid += pci_read_config(sc->iommu.dev,
+ sc->seccap_reg + PCIR_AMDIOMMU_MISC0, 4) &
+ PCIM_AMDIOMMU_MISC0_MSINUM_MASK;
+ }
+
+ sc->irq_cmdev = bus_alloc_resource_any(sc->iommu.dev, SYS_RES_IRQ,
+ &sc->irq_cmdev_rid, RF_SHAREABLE | RF_ACTIVE);
+ if (sc->irq_cmdev == NULL) {
+ device_printf(sc->iommu.dev,
+ "unable to map CMD/EV interrupt\n");
+ return (ENXIO);
+ }
+ error = bus_setup_intr(sc->iommu.dev, sc->irq_cmdev,
+ INTR_TYPE_MISC, amdiommu_cmd_event_intr, NULL, sc,
+ &sc->irq_cmdev_cookie);
+ if (error != 0) {
+ device_printf(sc->iommu.dev,
+ "unable to setup interrupt (%d)\n", error);
+ return (ENXIO);
+ }
+ bus_describe_intr(sc->iommu.dev, sc->irq_cmdev, sc->irq_cmdev_cookie,
+ "cmdev");
+
+ if (x2apic_mode) {
+ AMDIOMMU_LOCK(sc);
+ sc->hw_ctrl |= AMDIOMMU_CTRL_GA_EN | AMDIOMMU_CTRL_XT_EN;
+ amdiommu_write8(sc, AMDIOMMU_CTRL, sc->hw_ctrl);
+ // XXXKIB AMDIOMMU_CTRL_INTCAPXT_EN and program x2APIC_CTRL
+ AMDIOMMU_UNLOCK(sc);
+ }
+
+ return (0);
+}
+
+static int
+amdiommu_probe(device_t dev)
+{
+ int seccap_reg;
+ int error;
+ uint32_t cap_h, cap_type, cap_rev;
+
+ if (acpi_disabled("amdiommu"))
+ return (ENXIO);
+ TUNABLE_INT_FETCH("hw.amdiommu.enable", &amdiommu_enable);
+ if (!amdiommu_enable)
+ return (ENXIO);
+ if (pci_get_class(dev) != PCIC_BASEPERIPH ||
+ pci_get_subclass(dev) != PCIS_BASEPERIPH_IOMMU)
+ return (ENXIO);
+
+ error = pci_find_cap(dev, PCIY_SECDEV, &seccap_reg);
+ if (error != 0 || seccap_reg == 0)
+ return (ENXIO);
+
+ cap_h = pci_read_config(dev, seccap_reg + PCIR_AMDIOMMU_CAP_HEADER,
+ 4);
+ cap_type = cap_h & PCIM_AMDIOMMU_CAP_TYPE_MASK;
+ cap_rev = cap_h & PCIM_AMDIOMMU_CAP_REV_MASK;
+ if (cap_type != PCIM_AMDIOMMU_CAP_TYPE_VAL &&
+ cap_rev != PCIM_AMDIOMMU_CAP_REV_VAL)
+ return (ENXIO);
+
+ device_set_desc(dev, "DMA remap");
+ return (BUS_PROBE_SPECIFIC);
+}
+
+static int
+amdiommu_attach(device_t dev)
+{
+ struct amdiommu_unit *sc;
+ struct ivhd_lookup_data ild;
+ int error;
+ uint32_t base_low, base_high;
+ bool res;
+
+ sc = device_get_softc(dev);
+ sc->iommu.dev = dev;
+
+ error = pci_find_cap(dev, PCIY_SECDEV, &sc->seccap_reg);
+ if (error != 0 || sc->seccap_reg == 0)
+ return (ENXIO);
+
+ base_low = pci_read_config(dev, sc->seccap_reg +
+ PCIR_AMDIOMMU_BASE_LOW, 4);
+ base_high = pci_read_config(dev, sc->seccap_reg +
+ PCIR_AMDIOMMU_BASE_HIGH, 4);
+ sc->mmio_base = (base_low & PCIM_AMDIOMMU_BASE_LOW_ADDRM) |
+ ((uint64_t)base_high << 32);
+
+ sc->device_id = pci_get_rid(dev);
+ ild.sc = sc;
+ ild.devid = sc->device_id;
+ res = amdiommu_ivrs_iterate_tbl(ivrs_lookup_ivhd_0x40,
+ ivrs_lookup_ivhd_0x40, ivrs_lookup_ivhd_0x10, &ild);
+ if (!res) {
+ device_printf(dev, "Cannot find IVHD\n");
+ return (ENXIO);
+ }
+
+ mtx_init(&sc->iommu.lock, "amdihw", NULL, MTX_DEF);
+ sc->domids = new_unrhdr(0, 0xffff, &sc->iommu.lock);
+ LIST_INIT(&sc->domains);
+ sysctl_ctx_init(&sc->iommu.sysctl_ctx);
+
+ sc->mmio_sz = ((sc->efr & AMDIOMMU_EFR_PC_SUP) != 0 ? 512 : 16) *
+ 1024;
+
+ sc->mmio_rid = AMDIOMMU_RID;
+ error = bus_set_resource(dev, SYS_RES_MEMORY, AMDIOMMU_RID,
+ sc->mmio_base, sc->mmio_sz);
+ if (error != 0) {
+ device_printf(dev,
+ "bus_set_resource %#jx-%#jx failed, error %d\n",
+ (uintmax_t)sc->mmio_base, (uintmax_t)sc->mmio_base +
+ sc->mmio_sz, error);
+ error = ENXIO;
+ goto errout1;
+ }
+ sc->mmio_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &sc->mmio_rid,
+ sc->mmio_base, sc->mmio_base + sc->mmio_sz - 1, sc->mmio_sz,
+ RF_ALLOCATED | RF_ACTIVE | RF_SHAREABLE);
+ if (sc->mmio_res == NULL) {
+ device_printf(dev,
+ "bus_alloc_resource %#jx-%#jx failed\n",
+ (uintmax_t)sc->mmio_base, (uintmax_t)sc->mmio_base +
+ sc->mmio_sz);
+ error = ENXIO;
+ goto errout2;
+ }
+
+ sc->hw_ctrl = amdiommu_read8(sc, AMDIOMMU_CTRL);
+ if (bootverbose)
+ device_printf(dev, "ctrl reg %#jx\n", (uintmax_t)sc->hw_ctrl);
+ if ((sc->hw_ctrl & AMDIOMMU_CTRL_EN) != 0) {
+ device_printf(dev, "CTRL_EN is set, bailing out\n");
+ error = EBUSY;
+ goto errout2;
+ }
+
+ iommu_high = BUS_SPACE_MAXADDR;
+
+ error = amdiommu_create_dev_tbl(sc);
+ if (error != 0)
+ goto errout3;
+
+ error = amdiommu_init_cmd(sc);
+ if (error != 0)
+ goto errout4;
+
+ error = amdiommu_init_event(sc);
+ if (error != 0)
+ goto errout5;
+
+ error = amdiommu_setup_intr(sc);
+ if (error != 0)
+ goto errout6;
+
+ error = iommu_init_busdma(AMD2IOMMU(sc));
+ if (error != 0)
+ goto errout7;
+
+ error = amdiommu_init_irt(sc);
+ if (error != 0)
+ goto errout8;
+
+ /*
+ * Unlike DMAR, AMD IOMMU does not process command queue
+ * unless IOMMU is enabled. But since non-present devtab
+ * entry makes IOMMU ignore transactions from corresponding
+ * initiator, de-facto IOMMU operations are disabled for the
+ * DMA and intr remapping.
+ */
+ AMDIOMMU_LOCK(sc);
+ sc->hw_ctrl |= AMDIOMMU_CTRL_EN;
+ amdiommu_write8(sc, AMDIOMMU_CTRL, sc->hw_ctrl);
+ if (bootverbose) {
+ printf("amdiommu%d: enabled translation\n",
+ AMD2IOMMU(sc)->unit);
+ }
+ AMDIOMMU_UNLOCK(sc);
+
+ TAILQ_INSERT_TAIL(&amdiommu_units, sc, unit_next);
+ return (0);
+
+errout8:
+ iommu_fini_busdma(&sc->iommu);
+errout7:
+ pci_release_msi(dev);
+errout6:
+ amdiommu_fini_event(sc);
+errout5:
+ amdiommu_fini_cmd(sc);
+errout4:
+ amdiommu_free_dev_tbl(sc);
+errout3:
+ bus_release_resource(dev, SYS_RES_MEMORY, sc->mmio_rid, sc->mmio_res);
+errout2:
+ bus_delete_resource(dev, SYS_RES_MEMORY, sc->mmio_rid);
+errout1:
+ sysctl_ctx_free(&sc->iommu.sysctl_ctx);
+ delete_unrhdr(sc->domids);
+ mtx_destroy(&sc->iommu.lock);
+
+ return (error);
+}
+
+static int
+amdiommu_detach(device_t dev)
+{
+ return (EBUSY);
+}
+
+static int
+amdiommu_suspend(device_t dev)
+{
+ /* XXXKIB */
+ return (0);
+}
+
+static int
+amdiommu_resume(device_t dev)
+{
+ /* XXXKIB */
+ return (0);
+}
+
+static device_method_t amdiommu_methods[] = {
+ DEVMETHOD(device_probe, amdiommu_probe),
+ DEVMETHOD(device_attach, amdiommu_attach),
+ DEVMETHOD(device_detach, amdiommu_detach),
+ DEVMETHOD(device_suspend, amdiommu_suspend),
+ DEVMETHOD(device_resume, amdiommu_resume),
+ DEVMETHOD_END
+};
+
+static driver_t amdiommu_driver = {
+ "amdiommu",
+ amdiommu_methods,
+ sizeof(struct amdiommu_unit),
+};
+
+EARLY_DRIVER_MODULE(amdiommu, pci, amdiommu_driver, 0, 0, BUS_PASS_SUPPORTDEV);
+MODULE_DEPEND(amdiommu, pci, 1, 1, 1);
+
+static struct amdiommu_unit *
+amdiommu_unit_by_device_id(u_int pci_seg, u_int device_id)
+{
+ struct amdiommu_unit *unit;
+
+ TAILQ_FOREACH(unit, &amdiommu_units, unit_next) {
+ if (unit->unit_dom == pci_seg && unit->device_id == device_id)
+ return (unit);
+ }
+ return (NULL);
+}
+
+struct ivhd_find_unit {
+ u_int domain;
+ uintptr_t rid;
+ int devno;
+ enum {
+ IFU_DEV_PCI,
+ IFU_DEV_IOAPIC,
+ IFU_DEV_HPET,
+ } type;
+ u_int device_id;
+ uint16_t rid_real;
+ uint8_t dte;
+ uint32_t edte;
+};
+
+static bool
+amdiommu_find_unit_scan_ivrs(ACPI_IVRS_DE_HEADER *d, size_t tlen,
+ struct ivhd_find_unit *ifu)
+{
+ char *db, *de;
+ size_t len;
+
+ for (de = (char *)d + tlen; (char *)d < de;
+ d = (ACPI_IVRS_DE_HEADER *)(db + len)) {
+ db = (char *)d;
+ if (d->Type == ACPI_IVRS_TYPE_PAD4) {
+ len = sizeof(ACPI_IVRS_DEVICE4);
+ } else if (d->Type == ACPI_IVRS_TYPE_ALL) {
+ ACPI_IVRS_DEVICE4 *d4;
+
+ d4 = (ACPI_IVRS_DEVICE4 *)db;
+ len = sizeof(*d4);
+ ifu->dte = d4->Header.DataSetting;
+ } else if (d->Type == ACPI_IVRS_TYPE_SELECT) {
+ ACPI_IVRS_DEVICE4 *d4;
+
+ d4 = (ACPI_IVRS_DEVICE4 *)db;
+ if (d4->Header.Id == ifu->rid) {
+ ifu->dte = d4->Header.DataSetting;
+ ifu->rid_real = ifu->rid;
+ return (true);
+ }
+ len = sizeof(*d4);
+ } else if (d->Type == ACPI_IVRS_TYPE_START) {
+ ACPI_IVRS_DEVICE4 *d4, *d4n;
+
+ d4 = (ACPI_IVRS_DEVICE4 *)db;
+ d4n = d4 + 1;
+ if (d4n->Header.Type != ACPI_IVRS_TYPE_END) {
+ printf("IVRS dev4 start not followed by END "
+ "(%#x)\n", d4n->Header.Type);
+ return (false);
+ }
+ if (d4->Header.Id <= ifu->rid &&
+ ifu->rid <= d4n->Header.Id) {
+ ifu->dte = d4->Header.DataSetting;
+ ifu->rid_real = ifu->rid;
+ return (true);
+ }
+ len = 2 * sizeof(*d4);
+ } else if (d->Type == ACPI_IVRS_TYPE_PAD8) {
+ len = sizeof(ACPI_IVRS_DEVICE8A);
+ } else if (d->Type == ACPI_IVRS_TYPE_ALIAS_SELECT) {
+ ACPI_IVRS_DEVICE8A *d8a;
+
+ d8a = (ACPI_IVRS_DEVICE8A *)db;
+ if (d8a->Header.Id == ifu->rid) {
+ ifu->dte = d8a->Header.DataSetting;
+ ifu->rid_real = d8a->UsedId;
+ return (true);
+ }
+ len = sizeof(*d8a);
+ } else if (d->Type == ACPI_IVRS_TYPE_ALIAS_START) {
+ ACPI_IVRS_DEVICE8A *d8a;
+ ACPI_IVRS_DEVICE4 *d4;
+
+ d8a = (ACPI_IVRS_DEVICE8A *)db;
+ d4 = (ACPI_IVRS_DEVICE4 *)(d8a + 1);
+ if (d4->Header.Type != ACPI_IVRS_TYPE_END) {
+ printf("IVRS alias start not followed by END "
+ "(%#x)\n", d4->Header.Type);
+ return (false);
+ }
+ if (d8a->Header.Id <= ifu->rid &&
+ ifu->rid <= d4->Header.Id) {
+ ifu->dte = d8a->Header.DataSetting;
+ ifu->rid_real = d8a->UsedId;
+ return (true);
+ }
+ len = sizeof(*d8a) + sizeof(*d4);
+ } else if (d->Type == ACPI_IVRS_TYPE_EXT_SELECT) {
+ ACPI_IVRS_DEVICE8B *d8b;
+
+ d8b = (ACPI_IVRS_DEVICE8B *)db;
+ if (d8b->Header.Id == ifu->rid) {
+ ifu->dte = d8b->Header.DataSetting;
+ ifu->rid_real = ifu->rid;
+ ifu->edte = d8b->ExtendedData;
+ return (true);
+ }
+ len = sizeof(*d8b);
+ } else if (d->Type == ACPI_IVRS_TYPE_EXT_START) {
+ ACPI_IVRS_DEVICE8B *d8b;
+ ACPI_IVRS_DEVICE4 *d4;
+
+ d8b = (ACPI_IVRS_DEVICE8B *)db;
+ d4 = (ACPI_IVRS_DEVICE4 *)(db + sizeof(*d8b));
+ if (d4->Header.Type != ACPI_IVRS_TYPE_END) {
+ printf("IVRS ext start not followed by END "
+ "(%#x)\n", d4->Header.Type);
+ return (false);
+ }
+ if (d8b->Header.Id >= ifu->rid &&
+ ifu->rid <= d4->Header.Id) {
+ ifu->dte = d8b->Header.DataSetting;
+ ifu->rid_real = ifu->rid;
+ ifu->edte = d8b->ExtendedData;
+ return (true);
+ }
+ len = sizeof(*d8b) + sizeof(*d4);
+ } else if (d->Type == ACPI_IVRS_TYPE_SPECIAL) {
+ ACPI_IVRS_DEVICE8C *d8c;
+
+ d8c = (ACPI_IVRS_DEVICE8C *)db;
+ if (((ifu->type == IFU_DEV_IOAPIC &&
+ d8c->Variety == ACPI_IVHD_IOAPIC) ||
+ (ifu->type == IFU_DEV_HPET &&
+ d8c->Variety == ACPI_IVHD_HPET)) &&
+ ifu->devno == d8c->Handle) {
+ ifu->dte = d8c->Header.DataSetting;
+ ifu->rid_real = d8c->UsedId;
+ return (true);
+ }
+ len = sizeof(*d8c);
+ } else if (d->Type == ACPI_IVRS_TYPE_HID) {
+ ACPI_IVRS_DEVICE_HID *dh;
+
+ dh = (ACPI_IVRS_DEVICE_HID *)db;
+ len = sizeof(*dh) + dh->UidLength;
+ /* XXXKIB */
+ } else {
+#if 0
+ printf("amdiommu: unknown IVRS device entry type %#x\n",
+ d->Type);
+#endif
+ if (d->Type <= 63)
+ len = sizeof(ACPI_IVRS_DEVICE4);
+ else if (d->Type <= 127)
+ len = sizeof(ACPI_IVRS_DEVICE8A);
+ else {
+ printf("amdiommu: abort, cannot "
+ "advance iterator, item type %#x\n",
+ d->Type);
+ return (false);
+ }
+ }
+ }
+ return (false);
+}
+
+static bool
+amdiommu_find_unit_scan_0x11(ACPI_IVRS_HARDWARE2 *ivrs, void *arg)
+{
+ struct ivhd_find_unit *ifu = arg;
+ ACPI_IVRS_DE_HEADER *d;
+ bool res;
+
+ KASSERT(ivrs->Header.Type == ACPI_IVRS_TYPE_HARDWARE2 ||
+ ivrs->Header.Type == ACPI_IVRS_TYPE_HARDWARE3,
+ ("Misparsed IVHD h2, ivrs type %#x", ivrs->Header.Type));
+
+ if (ifu->domain != ivrs->PciSegmentGroup)
+ return (false);
+ d = (ACPI_IVRS_DE_HEADER *)(ivrs + 1);
+ res = amdiommu_find_unit_scan_ivrs(d, ivrs->Header.Length, ifu);
+ if (res)
+ ifu->device_id = ivrs->Header.DeviceId;
+ return (res);
+}
+
+static bool
+amdiommu_find_unit_scan_0x10(ACPI_IVRS_HARDWARE1 *ivrs, void *arg)
+{
+ struct ivhd_find_unit *ifu = arg;
+ ACPI_IVRS_DE_HEADER *d;
+ bool res;
+
+ KASSERT(ivrs->Header.Type == ACPI_IVRS_TYPE_HARDWARE1,
+ ("Misparsed IVHD h1, ivrs type %#x", ivrs->Header.Type));
+
+ if (ifu->domain != ivrs->PciSegmentGroup)
+ return (false);
+ d = (ACPI_IVRS_DE_HEADER *)(ivrs + 1);
+ res = amdiommu_find_unit_scan_ivrs(d, ivrs->Header.Length, ifu);
+ if (res)
+ ifu->device_id = ivrs->Header.DeviceId;
+ return (res);
+}
+
+static void
+amdiommu_dev_prop_dtr(device_t dev, const char *name, void *val, void *dtr_ctx)
+{
+ free(val, M_DEVBUF);
+}
+
+static int *
+amdiommu_dev_fetch_flagsp(struct amdiommu_unit *unit, device_t dev)
+{
+ int *flagsp, error;
+
+ bus_topo_assert();
+ error = device_get_prop(dev, device_get_nameunit(unit->iommu.dev),
+ (void **)&flagsp);
+ if (error == ENOENT) {
+ flagsp = malloc(sizeof(int), M_DEVBUF, M_WAITOK | M_ZERO);
+ device_set_prop(dev, device_get_nameunit(unit->iommu.dev),
+ flagsp, amdiommu_dev_prop_dtr, unit);
+ }
+ return (flagsp);
+}
+
+static int
+amdiommu_get_dev_prop_flags(struct amdiommu_unit *unit, device_t dev)
+{
+ int *flagsp, flags;
+
+ bus_topo_lock();
+ flagsp = amdiommu_dev_fetch_flagsp(unit, dev);
+ flags = *flagsp;
+ bus_topo_unlock();
+ return (flags);
+}
+
+static void
+amdiommu_set_dev_prop_flags(struct amdiommu_unit *unit, device_t dev,
+ int flag)
+{
+ int *flagsp;
+
+ bus_topo_lock();
+ flagsp = amdiommu_dev_fetch_flagsp(unit, dev);
+ *flagsp |= flag;
+ bus_topo_unlock();
+}
+
+int
+amdiommu_find_unit(device_t dev, struct amdiommu_unit **unitp, uint16_t *ridp,
+ uint8_t *dtep, uint32_t *edtep, bool verbose)
+{
+ struct ivhd_find_unit ifu;
+ struct amdiommu_unit *unit;
+ int error, flags;
+ bool res;
+
+ if (device_get_devclass(device_get_parent(dev)) !=
+ devclass_find("pci"))
+ return (ENXIO);
+
+ bzero(&ifu, sizeof(ifu));
+ ifu.type = IFU_DEV_PCI;
+
+ error = pci_get_id(dev, PCI_ID_RID, &ifu.rid);
+ if (error != 0) {
+ if (verbose)
+ device_printf(dev,
+ "amdiommu cannot get rid, error %d\n", error);
+ return (ENXIO);
+ }
+
+ ifu.domain = pci_get_domain(dev);
+ res = amdiommu_ivrs_iterate_tbl(amdiommu_find_unit_scan_0x11,
+ amdiommu_find_unit_scan_0x11, amdiommu_find_unit_scan_0x10, &ifu);
+ if (!res) {
+ if (verbose)
+ device_printf(dev,
+ "(%#06x:%#06x) amdiommu cannot match rid in IVHD\n",
+ ifu.domain, (unsigned)ifu.rid);
+ return (ENXIO);
+ }
+
+ unit = amdiommu_unit_by_device_id(ifu.domain, ifu.device_id);
+ if (unit == NULL) {
+ if (verbose)
+ device_printf(dev,
+ "(%#06x:%#06x) amdiommu cannot find unit\n",
+ ifu.domain, (unsigned)ifu.rid);
+ return (ENXIO);
+ }
+ *unitp = unit;
+ iommu_device_set_iommu_prop(dev, unit->iommu.dev);
+ if (ridp != NULL)
+ *ridp = ifu.rid_real;
+ if (dtep != NULL)
+ *dtep = ifu.dte;
+ if (edtep != NULL)
+ *edtep = ifu.edte;
+ if (verbose) {
+ flags = amdiommu_get_dev_prop_flags(unit, dev);
+ if ((flags & AMDIOMMU_DEV_REPORTED) == 0) {
+ amdiommu_set_dev_prop_flags(unit, dev,
+ AMDIOMMU_DEV_REPORTED);
+ device_printf(dev, "amdiommu%d "
+ "initiator rid %#06x dte %#x edte %#x\n",
+ unit->iommu.unit, ifu.rid_real, ifu.dte, ifu.edte);
+ }
+ }
+ return (0);
+}
+
+int
+amdiommu_find_unit_for_ioapic(int apic_id, struct amdiommu_unit **unitp,
+ uint16_t *ridp, uint8_t *dtep, uint32_t *edtep, bool verbose)
+{
+ struct ivhd_find_unit ifu;
+ struct amdiommu_unit *unit;
+ device_t apic_dev;
+ bool res;
+
+ bzero(&ifu, sizeof(ifu));
+ ifu.type = IFU_DEV_IOAPIC;
+ ifu.devno = apic_id;
+ ifu.rid = -1;
+
+ res = amdiommu_ivrs_iterate_tbl(amdiommu_find_unit_scan_0x11,
+ amdiommu_find_unit_scan_0x11, amdiommu_find_unit_scan_0x10, &ifu);
+ if (!res) {
+ if (verbose)
+ printf("amdiommu cannot match ioapic no %d in IVHD\n",
+ apic_id);
+ return (ENXIO);
+ }
+
+ unit = amdiommu_unit_by_device_id(0, ifu.device_id);
+ apic_dev = ioapic_get_dev(apic_id);
+ if (apic_dev != NULL)
+ iommu_device_set_iommu_prop(apic_dev, unit->iommu.dev);
+ if (unit == NULL) {
+ if (verbose)
+ printf("amdiommu cannot find unit by dev id %#x\n",
+ ifu.device_id);
+ return (ENXIO);
+ }
+ *unitp = unit;
+ if (ridp != NULL)
+ *ridp = ifu.rid_real;
+ if (dtep != NULL)
+ *dtep = ifu.dte;
+ if (edtep != NULL)
+ *edtep = ifu.edte;
+ if (verbose) {
+ printf("amdiommu%d IOAPIC %d "
+ "initiator rid %#06x dte %#x edte %#x\n",
+ unit->iommu.unit, apic_id, ifu.rid_real, ifu.dte,
+ ifu.edte);
+ }
+ return (0);
+}
+
+int
+amdiommu_find_unit_for_hpet(device_t hpet, struct amdiommu_unit **unitp,
+ uint16_t *ridp, uint8_t *dtep, uint32_t *edtep, bool verbose)
+{
+ struct ivhd_find_unit ifu;
+ struct amdiommu_unit *unit;
+ int hpet_no;
+ bool res;
+
+ hpet_no = hpet_get_uid(hpet);
+ bzero(&ifu, sizeof(ifu));
+ ifu.type = IFU_DEV_HPET;
+ ifu.devno = hpet_no;
+ ifu.rid = -1;
+
+ res = amdiommu_ivrs_iterate_tbl(amdiommu_find_unit_scan_0x11,
+ amdiommu_find_unit_scan_0x11, amdiommu_find_unit_scan_0x10, &ifu);
+ if (!res) {
+ printf("amdiommu cannot match hpet no %d in IVHD\n",
+ hpet_no);
+ return (ENXIO);
+ }
+
+ unit = amdiommu_unit_by_device_id(0, ifu.device_id);
+ if (unit == NULL) {
+ if (verbose)
+ printf("amdiommu cannot find unit id %d\n",
+ hpet_no);
+ return (ENXIO);
+ }
+ *unitp = unit;
+ iommu_device_set_iommu_prop(hpet, unit->iommu.dev);
+ if (ridp != NULL)
+ *ridp = ifu.rid_real;
+ if (dtep != NULL)
+ *dtep = ifu.dte;
+ if (edtep != NULL)
+ *edtep = ifu.edte;
+ if (verbose) {
+ printf("amdiommu%d HPET no %d "
+ "initiator rid %#06x dte %#x edte %#x\n",
+ unit->iommu.unit, hpet_no, ifu.rid_real, ifu.dte,
+ ifu.edte);
+ }
+ return (0);
+}
+
+static struct iommu_unit *
+amdiommu_find_method(device_t dev, bool verbose)
+{
+ struct amdiommu_unit *unit;
+ int error;
+ uint32_t edte;
+ uint16_t rid;
+ uint8_t dte;
+
+ error = amdiommu_find_unit(dev, &unit, &rid, &dte, &edte, verbose);
+ if (error != 0) {
+ if (verbose)
+ device_printf(dev,
+ "cannot find amdiommu unit, error %d\n",
+ error);
+ return (NULL);
+ }
+ return (&unit->iommu);
+}
+
+static struct x86_unit_common *
+amdiommu_get_x86_common(struct iommu_unit *unit)
+{
+ struct amdiommu_unit *iommu;
+
+ iommu = IOMMU2AMD(unit);
+ return (&iommu->x86c);
+}
+
+static void
+amdiommu_unit_pre_instantiate_ctx(struct iommu_unit *unit)
+{
+}
+
+static struct x86_iommu amd_x86_iommu = {
+ .get_x86_common = amdiommu_get_x86_common,
+ .unit_pre_instantiate_ctx = amdiommu_unit_pre_instantiate_ctx,
+ .find = amdiommu_find_method,
+ .domain_unload_entry = amdiommu_domain_unload_entry,
+ .domain_unload = amdiommu_domain_unload,
+ .get_ctx = amdiommu_get_ctx,
+ .free_ctx_locked = amdiommu_free_ctx_locked_method,
+ .free_ctx = amdiommu_free_ctx_method,
+ .alloc_msi_intr = amdiommu_alloc_msi_intr,
+ .map_msi_intr = amdiommu_map_msi_intr,
+ .unmap_msi_intr = amdiommu_unmap_msi_intr,
+ .map_ioapic_intr = amdiommu_map_ioapic_intr,
+ .unmap_ioapic_intr = amdiommu_unmap_ioapic_intr,
+};
+
+static void
+x86_iommu_set_amd(void *arg __unused)
+{
+ if (cpu_vendor_id == CPU_VENDOR_AMD)
+ set_x86_iommu(&amd_x86_iommu);
+}
+
+SYSINIT(x86_iommu, SI_SUB_TUNABLES, SI_ORDER_ANY, x86_iommu_set_amd, NULL);
+
+#ifdef DDB
+#include <ddb/ddb.h>
+#include <ddb/db_lex.h>
+
+static void
+amdiommu_print_domain(struct amdiommu_domain *domain, bool show_mappings)
+{
+ struct iommu_domain *iodom;
+
+ iodom = DOM2IODOM(domain);
+
+#if 0
+ db_printf(
+ " @%p dom %d mgaw %d agaw %d pglvl %d end %jx refs %d\n"
+ " ctx_cnt %d flags %x pgobj %p map_ents %u\n",
+ domain, domain->domain, domain->mgaw, domain->agaw, domain->pglvl,
+ (uintmax_t)domain->iodom.end, domain->refs, domain->ctx_cnt,
+ domain->iodom.flags, domain->pgtbl_obj, domain->iodom.entries_cnt);
+#endif
+
+ iommu_db_domain_print_contexts(iodom);
+
+ if (show_mappings)
+ iommu_db_domain_print_mappings(iodom);
+}
+
+static void
+amdiommu_print_one(struct amdiommu_unit *unit, bool show_domains,
+ bool show_mappings, bool show_cmdq)
+{
+ struct amdiommu_domain *domain;
+ struct amdiommu_cmd_generic *cp;
+ u_int cmd_head, cmd_tail, ci;
+
+ cmd_head = amdiommu_read4(unit, AMDIOMMU_CMDBUF_HEAD);
+ cmd_tail = amdiommu_read4(unit, AMDIOMMU_CMDBUF_TAIL);
+ db_printf("amdiommu%d at %p, mmio at %#jx/sz %#jx\n",
+ unit->iommu.unit, unit, (uintmax_t)unit->mmio_base,
+ (uintmax_t)unit->mmio_sz);
+ db_printf(" hw ctrl %#018jx cmdevst %#018jx\n",
+ (uintmax_t)amdiommu_read8(unit, AMDIOMMU_CTRL),
+ (uintmax_t)amdiommu_read8(unit, AMDIOMMU_CMDEV_STATUS));
+ db_printf(" devtbl at %p\n", unit->dev_tbl);
+ db_printf(" hwseq at %p phys %#jx val %#jx\n",
+ &unit->x86c.inv_waitd_seq_hw,
+ pmap_kextract((vm_offset_t)&unit->x86c.inv_waitd_seq_hw),
+ unit->x86c.inv_waitd_seq_hw);
+ db_printf(" invq at %p base %#jx hw head/tail %#x/%#x\n",
+ unit->x86c.inv_queue,
+ (uintmax_t)amdiommu_read8(unit, AMDIOMMU_CMDBUF_BASE),
+ cmd_head, cmd_tail);
+
+ if (show_cmdq) {
+ db_printf(" cmd q:\n");
+ for (ci = cmd_head; ci != cmd_tail;) {
+ cp = (struct amdiommu_cmd_generic *)(unit->
+ x86c.inv_queue + ci);
+ db_printf(
+ " idx %#x op %#x %#010x %#010x %#010x %#010x\n",
+ ci >> AMDIOMMU_CMD_SZ_SHIFT, cp->op,
+ cp->w0, cp->ww1, cp->w2, cp->w3);
+
+ ci += AMDIOMMU_CMD_SZ;
+ if (ci == unit->x86c.inv_queue_size)
+ ci = 0;
+ }
+ }
+
+ if (show_domains) {
+ db_printf(" domains:\n");
+ LIST_FOREACH(domain, &unit->domains, link) {
+ amdiommu_print_domain(domain, show_mappings);
+ if (db_pager_quit)
+ break;
+ }
+ }
+}
+
+DB_SHOW_COMMAND(amdiommu, db_amdiommu_print)
+{
+ struct amdiommu_unit *unit;
+ bool show_domains, show_mappings, show_cmdq;
+
+ show_domains = strchr(modif, 'd') != NULL;
+ show_mappings = strchr(modif, 'm') != NULL;
+ show_cmdq = strchr(modif, 'q') != NULL;
+ if (!have_addr) {
+ db_printf("usage: show amdiommu [/d] [/m] [/q] index\n");
+ return;
+ }
+ if ((vm_offset_t)addr < 0x10000)
+ unit = amdiommu_unit_by_device_id(0, (u_int)addr);
+ else
+ unit = (struct amdiommu_unit *)addr;
+ amdiommu_print_one(unit, show_domains, show_mappings, show_cmdq);
+}
+
+DB_SHOW_ALL_COMMAND(amdiommus, db_show_all_amdiommus)
+{
+ struct amdiommu_unit *unit;
+ bool show_domains, show_mappings, show_cmdq;
+
+ show_domains = strchr(modif, 'd') != NULL;
+ show_mappings = strchr(modif, 'm') != NULL;
+ show_cmdq = strchr(modif, 'q') != NULL;
+
+ TAILQ_FOREACH(unit, &amdiommu_units, unit_next) {
+ amdiommu_print_one(unit, show_domains, show_mappings,
+ show_cmdq);
+ if (db_pager_quit)
+ break;
+ }
+}
+#endif
diff --git a/sys/x86/iommu/amd_event.c b/sys/x86/iommu/amd_event.c
new file mode 100644
--- /dev/null
+++ b/sys/x86/iommu/amd_event.c
@@ -0,0 +1,323 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 The FreeBSD Foundation
+ *
+ * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "opt_acpi.h"
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/memdesc.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/rman.h>
+#include <sys/rwlock.h>
+#include <sys/smp.h>
+#include <sys/taskqueue.h>
+#include <sys/tree.h>
+#include <sys/vmem.h>
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pageout.h>
+#include <vm/vm_pager.h>
+#include <contrib/dev/acpica/include/acpi.h>
+#include <contrib/dev/acpica/include/accommon.h>
+#include <dev/acpica/acpivar.h>
+#include <dev/pci/pcireg.h>
+#include <dev/pci/pcivar.h>
+#include <machine/bus.h>
+#include <machine/pci_cfgreg.h>
+#include "pcib_if.h"
+#include <machine/intr_machdep.h>
+#include <machine/md_var.h>
+#include <machine/cputypes.h>
+#include <x86/apicreg.h>
+#include <x86/apicvar.h>
+#include <dev/iommu/iommu.h>
+#include <dev/iommu/busdma_iommu.h>
+#include <x86/iommu/amd_reg.h>
+#include <x86/iommu/x86_iommu.h>
+#include <x86/iommu/amd_iommu.h>
+
+static void
+amdiommu_event_rearm_intr(struct amdiommu_unit *unit)
+{
+ amdiommu_write8(unit, AMDIOMMU_CMDEV_STATUS,
+ AMDIOMMU_CMDEVS_EVLOGINT);
+}
+
+static void
+amdiommu_event_log_inc_head(struct amdiommu_unit *unit)
+{
+ unit->event_log_head++;
+ if (unit->event_log_head >= unit->event_log_size)
+ unit->event_log_head = 0;
+}
+
+static void
+amdiommu_event_log_print(struct amdiommu_unit *unit,
+ const struct amdiommu_event_generic *evp, bool fancy)
+{
+ printf("amdiommu%d: event type 0x%x 0x%08x 0x%08x 0x%08x 0x%08x\n",
+ unit->iommu.unit, evp->code, evp->w0, evp->ww1, evp->w2, evp->w3);
+ if (!fancy)
+ return;
+
+ AMDIOMMU_ASSERT_LOCKED(unit);
+ if (evp->code == AMDIOMMU_EV_ILL_DEV_TABLE_ENTRY) {
+ const struct amdiommu_event_ill_dev_table_entry *ev_dte_p;
+ const struct amdiommu_dte *dte;
+ const uint32_t *x;
+ int i;
+
+ ev_dte_p = (const struct
+ amdiommu_event_ill_dev_table_entry *)evp;
+ dte = &unit->dev_tbl[ev_dte_p->devid];
+
+ printf("\tIllegal Dev Tab Entry dte@%p:", dte);
+ for (i = 0, x = (const uint32_t *)dte; i < sizeof(*dte) /
+ sizeof(uint32_t); i++, x++)
+ printf(" 0x%08x", *x);
+ printf("\n");
+ } else if (evp->code == AMDIOMMU_EV_IO_PAGE_FAULT) {
+ const struct amdiommu_event_io_page_fault_entry *ev_iopf_p;
+ struct amdiommu_ctx *ctx;
+ device_t dev;
+
+ ev_iopf_p = (const struct
+ amdiommu_event_io_page_fault_entry *)evp;
+ printf("\tPage Fault rid %#x dom %d",
+ ev_iopf_p->devid, ev_iopf_p->pasid);
+ ctx = amdiommu_find_ctx_locked(unit, ev_iopf_p->devid);
+ if (ctx != NULL) {
+ dev = ctx->context.tag->owner;
+ if (dev != NULL)
+ printf(" %s", device_get_nameunit(dev));
+ }
+ printf("\n\t"
+ "gn %d nx %d us %d i %d pr %d rw %d pe %d rz %d tr %d"
+ "\n\tgaddr %#jx\n",
+ ev_iopf_p->gn, ev_iopf_p->nx, ev_iopf_p->us, ev_iopf_p->i,
+ ev_iopf_p->pr, ev_iopf_p->rw, ev_iopf_p->pe, ev_iopf_p->rz,
+ ev_iopf_p->tr,
+ (((uintmax_t)(ev_iopf_p->addr2)) << 32) |
+ ev_iopf_p->addr1);
+ }
+}
+
+static u_int
+amdiommu_event_log_tail(struct amdiommu_unit *unit)
+{
+ return (amdiommu_read8(unit, AMDIOMMU_EVNTLOG_TAIL) >>
+ AMDIOMMU_EV_SZ_SHIFT);
+}
+
+static u_int
+amdiommu_event_copy_log_inc(u_int idx)
+{
+ idx++;
+ if (idx == nitems(((struct amdiommu_unit *)NULL)->event_copy_log))
+ idx = 0;
+ return (idx);
+}
+
+static bool
+amdiommu_event_copy_log_hasspace(struct amdiommu_unit *unit)
+{
+ return (unit->event_copy_tail != amdiommu_event_copy_log_inc(
+ unit->event_copy_head));
+}
+
+void
+amdiommu_event_intr(struct amdiommu_unit *unit, uint64_t status)
+{
+ struct amdiommu_event_generic *evp;
+ u_int hw_tail, hw_tail1;
+ bool enqueue;
+
+ enqueue = (status & AMDIOMMU_CMDEVS_EVOVRFLW) != 0;
+
+ hw_tail1 = amdiommu_event_log_tail(unit);
+ do {
+ hw_tail = hw_tail1;
+ for (; hw_tail != unit->event_log_head;
+ amdiommu_event_log_inc_head(unit)) {
+ evp = &unit->event_log[unit->event_log_head];
+ mtx_lock_spin(&unit->event_lock);
+ if (amdiommu_event_copy_log_hasspace(unit)) {
+ unit->event_copy_log[unit->event_copy_head] =
+ *evp;
+ unit->event_copy_head =
+ amdiommu_event_copy_log_inc(unit->
+ event_copy_head);
+ enqueue = true;
+ } else {
+ amdiommu_event_log_print(unit, evp, false);
+ }
+ mtx_unlock_spin(&unit->event_lock);
+ }
+ amdiommu_write8(unit, AMDIOMMU_EVNTLOG_HEAD,
+ unit->event_log_head << AMDIOMMU_EV_SZ_SHIFT);
+ hw_tail1 = amdiommu_event_log_tail(unit);
+ } while (hw_tail1 != hw_tail);
+ amdiommu_event_rearm_intr(unit);
+
+ if (enqueue)
+ taskqueue_enqueue(unit->event_taskqueue, &unit->event_task);
+}
+
+static void
+amdiommu_event_task(void *arg, int pending __unused)
+{
+ struct amdiommu_unit *unit;
+ uint64_t hwev_status, status;
+ struct amdiommu_event_generic hwev;
+
+ unit = arg;
+ AMDIOMMU_LOCK(unit);
+
+ if ((unit->efr & AMDIOMMU_EFR_HWEV_SUP) != 0) {
+ hwev_status = amdiommu_read8(unit, AMDIOMMU_HWEV_STATUS);
+ if ((hwev_status & AMDIOMMU_HWEVS_HEV) != 0) {
+ *(uint64_t *)&hwev = amdiommu_read8(unit,
+ AMDIOMMU_HWEV_LOWER);
+ *((uint64_t *)&hwev + 1) = amdiommu_read8(unit,
+ AMDIOMMU_HWEV_UPPER);
+ printf("amdiommu%d: hw event%s\n", unit->iommu.unit,
+ (hwev_status & AMDIOMMU_HWEVS_HEO) != 0 ?
+ " (overflown)" : "");
+ amdiommu_event_log_print(unit, &hwev, true);
+ amdiommu_write8(unit, AMDIOMMU_HWEV_STATUS,
+ hwev_status);
+ }
+ }
+
+ status = amdiommu_read8(unit, AMDIOMMU_CMDEV_STATUS);
+ if ((status & AMDIOMMU_CMDEVS_EVOVRFLW) != 0) {
+ printf("amdiommu%d: event log overflow\n", unit->iommu.unit);
+
+ while ((status & AMDIOMMU_CMDEVS_EVLOGRUN) != 0) {
+ DELAY(1);
+ status = amdiommu_read8(unit, AMDIOMMU_CMDEV_STATUS);
+ }
+
+ unit->hw_ctrl &= ~AMDIOMMU_CTRL_EVNTLOG_EN;
+ amdiommu_write8(unit, AMDIOMMU_CTRL, unit->hw_ctrl);
+
+ unit->event_log_head = 0;
+ amdiommu_write8(unit, AMDIOMMU_EVNTLOG_HEAD, 0);
+
+ amdiommu_write8(unit, AMDIOMMU_CMDEV_STATUS,
+ AMDIOMMU_CMDEVS_EVOVRFLW); /* RW1C */
+
+ unit->hw_ctrl |= AMDIOMMU_CTRL_EVNTLOG_EN;
+ amdiommu_write8(unit, AMDIOMMU_CTRL, unit->hw_ctrl);
+
+ amdiommu_event_rearm_intr(unit);
+ }
+
+ mtx_lock_spin(&unit->event_lock);
+ while (unit->event_copy_head != unit->event_copy_tail) {
+ mtx_unlock_spin(&unit->event_lock);
+ amdiommu_event_log_print(unit, &unit->event_copy_log[
+ unit->event_copy_tail], true);
+ mtx_lock_spin(&unit->event_lock);
+ unit->event_copy_tail = amdiommu_event_copy_log_inc(unit->
+ event_copy_tail);
+ }
+ mtx_unlock_spin(&unit->event_lock);
+
+ AMDIOMMU_UNLOCK(unit);
+}
+
+int
+amdiommu_init_event(struct amdiommu_unit *unit)
+{
+ uint64_t base_reg;
+
+ mtx_init(&unit->event_lock, "amdevl", NULL, MTX_SPIN);
+
+ /* event log entries */
+ unit->event_log_size = AMDIOMMU_EVNTLOG_MIN;
+ TUNABLE_INT_FETCH("hw.amdiommu.event_log_size", &unit->event_log_size);
+ if (unit->event_log_size < AMDIOMMU_EVNTLOG_MIN ||
+ unit->event_log_size > AMDIOMMU_EVNTLOG_MAX ||
+ !powerof2(unit->event_log_size))
+ panic("invalid hw.amdiommu.event_log_size");
+ unit->event_log = kmem_alloc_contig(AMDIOMMU_EV_SZ *
+ unit->event_log_size, M_WAITOK | M_ZERO, 0, ~0ull, PAGE_SIZE,
+ 0, VM_MEMATTR_DEFAULT);
+
+ TASK_INIT(&unit->event_task, 0, amdiommu_event_task, unit);
+ unit->event_taskqueue = taskqueue_create_fast("amdiommuff", M_WAITOK,
+ taskqueue_thread_enqueue, &unit->event_taskqueue);
+ taskqueue_start_threads(&unit->event_taskqueue, 1, PI_AV,
+ "amdiommu%d event taskq", unit->iommu.unit);
+
+ base_reg = pmap_kextract((vm_offset_t)unit->event_log) |
+ (((uint64_t)0x8 + ilog2(unit->event_log_size /
+ AMDIOMMU_EVNTLOG_MIN)) << AMDIOMMU_EVNTLOG_BASE_SZSHIFT);
+ AMDIOMMU_LOCK(unit);
+ /*
+ * Re-arm before enabling interrupt, to not loose it when
+ * re-arming in the interrupt handler.
+ */
+ amdiommu_event_rearm_intr(unit);
+ amdiommu_write8(unit, AMDIOMMU_EVNTLOG_BASE, base_reg);
+ unit->hw_ctrl |= AMDIOMMU_CTRL_EVNTLOG_EN | AMDIOMMU_CTRL_EVENTINT_EN;
+ amdiommu_write8(unit, AMDIOMMU_CTRL, unit->hw_ctrl);
+ AMDIOMMU_UNLOCK(unit);
+
+ return (0);
+}
+
+void
+amdiommu_fini_event(struct amdiommu_unit *unit)
+{
+ AMDIOMMU_LOCK(unit);
+ unit->hw_ctrl &= ~(AMDIOMMU_CTRL_EVNTLOG_EN |
+ AMDIOMMU_CTRL_EVENTINT_EN);
+ amdiommu_write8(unit, AMDIOMMU_CTRL, unit->hw_ctrl);
+ amdiommu_write8(unit, AMDIOMMU_EVNTLOG_BASE, 0);
+ AMDIOMMU_UNLOCK(unit);
+
+ taskqueue_drain(unit->event_taskqueue, &unit->event_task);
+ taskqueue_free(unit->event_taskqueue);
+ unit->event_taskqueue = NULL;
+
+ kmem_free(unit->event_log, unit->event_log_size * AMDIOMMU_EV_SZ);
+ unit->event_log = NULL;
+ unit->event_log_head = unit->event_log_tail = 0;
+
+ mtx_destroy(&unit->event_lock);
+}
diff --git a/sys/x86/iommu/amd_idpgtbl.c b/sys/x86/iommu/amd_idpgtbl.c
new file mode 100644
--- /dev/null
+++ b/sys/x86/iommu/amd_idpgtbl.c
@@ -0,0 +1,396 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 The FreeBSD Foundation
+ *
+ * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/bus.h>
+#include <sys/domainset.h>
+#include <sys/interrupt.h>
+#include <sys/kernel.h>
+#include <sys/ktr.h>
+#include <sys/lock.h>
+#include <sys/memdesc.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/rwlock.h>
+#include <sys/rman.h>
+#include <sys/sf_buf.h>
+#include <sys/sysctl.h>
+#include <sys/taskqueue.h>
+#include <sys/tree.h>
+#include <sys/uio.h>
+#include <sys/vmem.h>
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pager.h>
+#include <vm/vm_map.h>
+#include <dev/pci/pcireg.h>
+#include <machine/atomic.h>
+#include <machine/bus.h>
+#include <machine/cpu.h>
+#include <machine/md_var.h>
+#include <machine/specialreg.h>
+#include <x86/include/busdma_impl.h>
+#include <dev/iommu/busdma_iommu.h>
+#include <x86/iommu/amd_reg.h>
+#include <x86/iommu/x86_iommu.h>
+#include <x86/iommu/amd_iommu.h>
+
+static void amdiommu_unmap_clear_pte(struct amdiommu_domain *domain,
+ iommu_gaddr_t base, int lvl, int flags, iommu_pte_t *pte,
+ struct sf_buf **sf, struct iommu_map_entry *entry, bool free_sf);
+static int amdiommu_unmap_buf_locked(struct amdiommu_domain *domain,
+ iommu_gaddr_t base, iommu_gaddr_t size, int flags,
+ struct iommu_map_entry *entry);
+
+int
+amdiommu_domain_alloc_pgtbl(struct amdiommu_domain *domain)
+{
+ vm_page_t m;
+ int dom;
+
+ KASSERT(domain->pgtbl_obj == NULL,
+ ("already initialized %p", domain));
+
+ domain->pgtbl_obj = vm_pager_allocate(OBJT_PHYS, NULL,
+ IDX_TO_OFF(pglvl_max_pages(domain->pglvl)), 0, 0, NULL);
+ if (bus_get_domain(domain->iodom.iommu->dev, &dom) == 0)
+ domain->pgtbl_obj->domain.dr_policy = DOMAINSET_PREF(dom);
+ AMDIOMMU_DOMAIN_PGLOCK(domain);
+ m = iommu_pgalloc(domain->pgtbl_obj, 0, IOMMU_PGF_WAITOK |
+ IOMMU_PGF_ZERO | IOMMU_PGF_OBJL);
+ /* No implicit free of the top level page table page. */
+ vm_page_wire(m);
+ domain->pgtblr = m;
+ AMDIOMMU_DOMAIN_PGUNLOCK(domain);
+ AMDIOMMU_LOCK(domain->unit);
+ domain->iodom.flags |= IOMMU_DOMAIN_PGTBL_INITED;
+ AMDIOMMU_UNLOCK(domain->unit);
+ return (0);
+}
+
+void
+amdiommu_domain_free_pgtbl(struct amdiommu_domain *domain)
+{
+ vm_object_t obj;
+ vm_page_t m;
+
+ obj = domain->pgtbl_obj;
+ if (obj == NULL) {
+ KASSERT((domain->iodom.flags & IOMMU_DOMAIN_IDMAP) != 0,
+ ("lost pagetable object domain %p", domain));
+ return;
+ }
+ AMDIOMMU_DOMAIN_ASSERT_PGLOCKED(domain);
+ domain->pgtbl_obj = NULL;
+ domain->pgtblr = NULL;
+
+ /* Obliterate ref_counts */
+ VM_OBJECT_ASSERT_WLOCKED(obj);
+ for (m = vm_page_lookup(obj, 0); m != NULL; m = vm_page_next(m))
+ vm_page_clearref(m);
+ VM_OBJECT_WUNLOCK(obj);
+ vm_object_deallocate(obj);
+}
+
+static iommu_pte_t *
+amdiommu_pgtbl_map_pte(struct amdiommu_domain *domain, iommu_gaddr_t base,
+ int lvl, int flags, vm_pindex_t *idxp, struct sf_buf **sf)
+{
+ iommu_pte_t *pte, *ptep;
+ struct sf_buf *sfp;
+ vm_page_t m;
+ vm_pindex_t idx, idx1;
+
+ idx = pglvl_pgtbl_get_pindex(domain->pglvl, base, lvl);
+ if (*sf != NULL && idx == *idxp) {
+ pte = (iommu_pte_t *)sf_buf_kva(*sf);
+ } else {
+ if (*sf != NULL)
+ iommu_unmap_pgtbl(*sf);
+ *idxp = idx;
+retry:
+ pte = iommu_map_pgtbl(domain->pgtbl_obj, idx, flags, sf);
+ if (pte == NULL) {
+ KASSERT(lvl > 0,
+ ("lost root page table page %p", domain));
+ /*
+ * Page table page does not exist, allocate
+ * it and create a pte in the preceeding page level
+ * to reference the allocated page table page.
+ */
+ m = iommu_pgalloc(domain->pgtbl_obj, idx, flags |
+ IOMMU_PGF_ZERO);
+ if (m == NULL)
+ return (NULL);
+
+ vm_page_wire(m);
+
+ sfp = NULL;
+ ptep = amdiommu_pgtbl_map_pte(domain, base, lvl - 1,
+ flags, &idx1, &sfp);
+ if (ptep == NULL) {
+ KASSERT(m->pindex != 0,
+ ("loosing root page %p", domain));
+ vm_page_unwire_noq(m);
+ iommu_pgfree(domain->pgtbl_obj, m->pindex,
+ flags, NULL);
+ return (NULL);
+ }
+ ptep->pte = VM_PAGE_TO_PHYS(m) | AMDIOMMU_PTE_IR |
+ AMDIOMMU_PTE_IW | AMDIOMMU_PTE_PR |
+ ((domain->pglvl - lvl) << AMDIOMMU_PTE_NLVL_SHIFT);
+ vm_page_wire(sf_buf_page(sfp));
+ vm_page_unwire_noq(m);
+ iommu_unmap_pgtbl(sfp);
+ /* Only executed once. */
+ goto retry;
+ }
+ }
+ pte += pglvl_pgtbl_pte_off(domain->pglvl, base, lvl);
+ return (pte);
+}
+
+static int
+amdiommu_map_buf_locked(struct amdiommu_domain *domain, iommu_gaddr_t base,
+ iommu_gaddr_t size, vm_page_t *ma, uint64_t pflags, int flags,
+ struct iommu_map_entry *entry)
+{
+ iommu_pte_t *pte;
+ struct sf_buf *sf;
+ iommu_gaddr_t base1;
+ vm_pindex_t pi, idx;
+
+ AMDIOMMU_DOMAIN_ASSERT_PGLOCKED(domain);
+
+ base1 = base;
+ flags |= IOMMU_PGF_OBJL;
+ idx = -1;
+ pte = NULL;
+ sf = NULL;
+
+ for (pi = 0; size > 0; base += IOMMU_PAGE_SIZE, size -= IOMMU_PAGE_SIZE,
+ pi++) {
+ KASSERT(size >= IOMMU_PAGE_SIZE,
+ ("mapping loop overflow %p %jx %jx %jx", domain,
+ (uintmax_t)base, (uintmax_t)size, (uintmax_t)IOMMU_PAGE_SIZE));
+ pte = amdiommu_pgtbl_map_pte(domain, base, domain->pglvl - 1,
+ flags, &idx, &sf);
+ if (pte == NULL) {
+ KASSERT((flags & IOMMU_PGF_WAITOK) == 0,
+ ("failed waitable pte alloc %p", domain));
+ if (sf != NULL)
+ iommu_unmap_pgtbl(sf);
+ amdiommu_unmap_buf_locked(domain, base1, base - base1,
+ flags, entry);
+ return (ENOMEM);
+ }
+ /* next level 0, no superpages */
+ pte->pte = VM_PAGE_TO_PHYS(ma[pi]) | pflags | AMDIOMMU_PTE_PR;
+ vm_page_wire(sf_buf_page(sf));
+ }
+ if (sf != NULL)
+ iommu_unmap_pgtbl(sf);
+ return (0);
+}
+
+static int
+amdiommu_map_buf(struct iommu_domain *iodom, struct iommu_map_entry *entry,
+ vm_page_t *ma, uint64_t eflags, int flags)
+{
+ struct amdiommu_domain *domain;
+ uint64_t pflags;
+ iommu_gaddr_t base, size;
+ int error;
+
+ base = entry->start;
+ size = entry->end - entry->start;
+ pflags = ((eflags & IOMMU_MAP_ENTRY_READ) != 0 ? AMDIOMMU_PTE_IR : 0) |
+ ((eflags & IOMMU_MAP_ENTRY_WRITE) != 0 ? AMDIOMMU_PTE_IW : 0) |
+ ((eflags & IOMMU_MAP_ENTRY_SNOOP) != 0 ? AMDIOMMU_PTE_FC : 0);
+ /* IOMMU_MAP_ENTRY_TM ignored */
+
+ domain = IODOM2DOM(iodom);
+
+ KASSERT((iodom->flags & IOMMU_DOMAIN_IDMAP) == 0,
+ ("modifying idmap pagetable domain %p", domain));
+ KASSERT((base & IOMMU_PAGE_MASK) == 0,
+ ("non-aligned base %p %jx %jx", domain, (uintmax_t)base,
+ (uintmax_t)size));
+ KASSERT((size & IOMMU_PAGE_MASK) == 0,
+ ("non-aligned size %p %jx %jx", domain, (uintmax_t)base,
+ (uintmax_t)size));
+ KASSERT(size > 0, ("zero size %p %jx %jx", domain, (uintmax_t)base,
+ (uintmax_t)size));
+ KASSERT(base < iodom->end,
+ ("base too high %p %jx %jx end %jx", domain, (uintmax_t)base,
+ (uintmax_t)size, (uintmax_t)iodom->end));
+ KASSERT(base + size < iodom->end,
+ ("end too high %p %jx %jx end %jx", domain, (uintmax_t)base,
+ (uintmax_t)size, (uintmax_t)iodom->end));
+ KASSERT(base + size > base,
+ ("size overflow %p %jx %jx", domain, (uintmax_t)base,
+ (uintmax_t)size));
+ KASSERT((pflags & (AMDIOMMU_PTE_IR | AMDIOMMU_PTE_IW)) != 0,
+ ("neither read nor write %jx", (uintmax_t)pflags));
+ KASSERT((pflags & ~(AMDIOMMU_PTE_IR | AMDIOMMU_PTE_IW | AMDIOMMU_PTE_FC
+ )) == 0,
+ ("invalid pte flags %jx", (uintmax_t)pflags));
+ KASSERT((flags & ~IOMMU_PGF_WAITOK) == 0, ("invalid flags %x", flags));
+
+ AMDIOMMU_DOMAIN_PGLOCK(domain);
+ error = amdiommu_map_buf_locked(domain, base, size, ma, pflags,
+ flags, entry);
+ AMDIOMMU_DOMAIN_PGUNLOCK(domain);
+
+ /*
+ * XXXKIB invalidation seems to be needed even for non-valid->valid
+ * updates. Recheck.
+ */
+ iommu_qi_invalidate_sync(iodom, base, size,
+ (flags & IOMMU_PGF_WAITOK) != 0);
+ return (error);
+}
+
+static void
+amdiommu_free_pgtbl_pde(struct amdiommu_domain *domain, iommu_gaddr_t base,
+ int lvl, int flags, struct iommu_map_entry *entry)
+{
+ struct sf_buf *sf;
+ iommu_pte_t *pde;
+ vm_pindex_t idx;
+
+ sf = NULL;
+ pde = amdiommu_pgtbl_map_pte(domain, base, lvl, flags, &idx, &sf);
+ amdiommu_unmap_clear_pte(domain, base, lvl, flags, pde, &sf, entry,
+ true);
+}
+
+static void
+amdiommu_unmap_clear_pte(struct amdiommu_domain *domain, iommu_gaddr_t base,
+ int lvl, int flags, iommu_pte_t *pte, struct sf_buf **sf,
+ struct iommu_map_entry *entry, bool free_sf)
+{
+ vm_page_t m;
+
+ pte->pte = 0;
+ m = sf_buf_page(*sf);
+ if (free_sf) {
+ iommu_unmap_pgtbl(*sf);
+ *sf = NULL;
+ }
+ if (!vm_page_unwire_noq(m))
+ return;
+ KASSERT(lvl != 0,
+ ("lost reference (lvl) on root pg domain %p base %jx lvl %d",
+ domain, (uintmax_t)base, lvl));
+ KASSERT(m->pindex != 0,
+ ("lost reference (idx) on root pg domain %p base %jx lvl %d",
+ domain, (uintmax_t)base, lvl));
+ iommu_pgfree(domain->pgtbl_obj, m->pindex, flags, entry);
+ amdiommu_free_pgtbl_pde(domain, base, lvl - 1, flags, entry);
+}
+
+static int
+amdiommu_unmap_buf_locked(struct amdiommu_domain *domain, iommu_gaddr_t base,
+ iommu_gaddr_t size, int flags, struct iommu_map_entry *entry)
+{
+ iommu_pte_t *pte;
+ struct sf_buf *sf;
+ vm_pindex_t idx;
+ iommu_gaddr_t pg_sz;
+
+ AMDIOMMU_DOMAIN_ASSERT_PGLOCKED(domain);
+ if (size == 0)
+ return (0);
+
+ KASSERT((domain->iodom.flags & IOMMU_DOMAIN_IDMAP) == 0,
+ ("modifying idmap pagetable domain %p", domain));
+ KASSERT((base & IOMMU_PAGE_MASK) == 0,
+ ("non-aligned base %p %jx %jx", domain, (uintmax_t)base,
+ (uintmax_t)size));
+ KASSERT((size & IOMMU_PAGE_MASK) == 0,
+ ("non-aligned size %p %jx %jx", domain, (uintmax_t)base,
+ (uintmax_t)size));
+ KASSERT(base < DOM2IODOM(domain)->end,
+ ("base too high %p %jx %jx end %jx", domain, (uintmax_t)base,
+ (uintmax_t)size, (uintmax_t)DOM2IODOM(domain)->end));
+ KASSERT(base + size < DOM2IODOM(domain)->end,
+ ("end too high %p %jx %jx end %jx", domain, (uintmax_t)base,
+ (uintmax_t)size, (uintmax_t)DOM2IODOM(domain)->end));
+ KASSERT(base + size > base,
+ ("size overflow %p %jx %jx", domain, (uintmax_t)base,
+ (uintmax_t)size));
+ KASSERT((flags & ~IOMMU_PGF_WAITOK) == 0, ("invalid flags %x", flags));
+
+ pg_sz = IOMMU_PAGE_SIZE;
+ flags |= IOMMU_PGF_OBJL;
+
+ for (sf = NULL; size > 0; base += pg_sz, size -= pg_sz) {
+ pte = amdiommu_pgtbl_map_pte(domain, base,
+ domain->pglvl - 1, flags, &idx, &sf);
+ KASSERT(pte != NULL,
+ ("sleeping or page missed %p %jx %d 0x%x",
+ domain, (uintmax_t)base, domain->pglvl - 1, flags));
+ amdiommu_unmap_clear_pte(domain, base, domain->pglvl - 1,
+ flags, pte, &sf, entry, false);
+ KASSERT(size >= pg_sz,
+ ("unmapping loop overflow %p %jx %jx %jx", domain,
+ (uintmax_t)base, (uintmax_t)size, (uintmax_t)pg_sz));
+ }
+ if (sf != NULL)
+ iommu_unmap_pgtbl(sf);
+ return (0);
+}
+
+static int
+amdiommu_unmap_buf(struct iommu_domain *iodom, struct iommu_map_entry *entry,
+ int flags)
+{
+ struct amdiommu_domain *domain;
+ int error;
+
+ domain = IODOM2DOM(iodom);
+
+ AMDIOMMU_DOMAIN_PGLOCK(domain);
+ error = amdiommu_unmap_buf_locked(domain, entry->start,
+ entry->end - entry->start, flags, entry);
+ AMDIOMMU_DOMAIN_PGUNLOCK(domain);
+ return (error);
+}
+
+const struct iommu_domain_map_ops amdiommu_domain_map_ops = {
+ .map = amdiommu_map_buf,
+ .unmap = amdiommu_unmap_buf,
+};
diff --git a/sys/x86/iommu/amd_intrmap.c b/sys/x86/iommu/amd_intrmap.c
new file mode 100644
--- /dev/null
+++ b/sys/x86/iommu/amd_intrmap.c
@@ -0,0 +1,391 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 The FreeBSD Foundation
+ *
+ * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/bus.h>
+#include <sys/interrupt.h>
+#include <sys/domainset.h>
+#include <sys/kernel.h>
+#include <sys/ktr.h>
+#include <sys/limits.h>
+#include <sys/lock.h>
+#include <sys/memdesc.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/rwlock.h>
+#include <sys/rman.h>
+#include <sys/sysctl.h>
+#include <sys/taskqueue.h>
+#include <sys/tree.h>
+#include <sys/uio.h>
+#include <sys/vmem.h>
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pager.h>
+#include <vm/vm_map.h>
+#include <contrib/dev/acpica/include/acpi.h>
+#include <contrib/dev/acpica/include/accommon.h>
+#include <dev/acpica/acpivar.h>
+#include <dev/pci/pcireg.h>
+#include <dev/pci/pcivar.h>
+#include <machine/atomic.h>
+#include <machine/bus.h>
+#include <machine/md_var.h>
+#include <machine/intr_machdep.h>
+#include <x86/include/apicreg.h>
+#include <x86/include/apicvar.h>
+#include <machine/specialreg.h>
+#include <x86/include/busdma_impl.h>
+#include <dev/iommu/busdma_iommu.h>
+#include <x86/iommu/amd_reg.h>
+#include <x86/iommu/x86_iommu.h>
+#include <x86/iommu/amd_iommu.h>
+
+static struct amdiommu_ctx *amdiommu_ir_find(device_t src, uint16_t *rid,
+ bool *is_iommu);
+static void amdiommu_ir_free_irte(struct amdiommu_ctx *ctx, device_t src,
+ u_int cookie);
+
+int
+amdiommu_alloc_msi_intr(device_t src, u_int *cookies, u_int count)
+{
+ struct amdiommu_ctx *ctx;
+ vmem_addr_t vmem_res;
+ u_int idx, i;
+ int error;
+
+ ctx = amdiommu_ir_find(src, NULL, NULL);
+ if (ctx == NULL || !CTX2AMD(ctx)->irte_enabled) {
+ for (i = 0; i < count; i++)
+ cookies[i] = -1;
+ return (EOPNOTSUPP);
+ }
+
+ error = vmem_alloc(ctx->irtids, count, M_FIRSTFIT | M_NOWAIT,
+ &vmem_res);
+ if (error != 0) {
+ KASSERT(error != EOPNOTSUPP,
+ ("impossible EOPNOTSUPP from vmem"));
+ return (error);
+ }
+ idx = vmem_res;
+ for (i = 0; i < count; i++)
+ cookies[i] = idx + i;
+ return (0);
+}
+
+int
+amdiommu_map_msi_intr(device_t src, u_int cpu, u_int vector,
+ u_int cookie, uint64_t *addr, uint32_t *data)
+{
+ struct amdiommu_ctx *ctx;
+ struct amdiommu_unit *unit;
+ uint16_t rid;
+ bool is_iommu;
+
+ ctx = amdiommu_ir_find(src, &rid, &is_iommu);
+ if (is_iommu) {
+ if (addr != NULL) {
+ *data = vector;
+ *addr = MSI_INTEL_ADDR_BASE | ((cpu & 0xff) << 12);
+ if (x2apic_mode)
+ *addr |= ((uint64_t)cpu & 0xffffff00) << 32;
+ else
+ KASSERT(cpu <= 0xff,
+ ("cpu id too big %d", cpu));
+ }
+ return (0);
+ }
+
+ if (ctx == NULL)
+ return (EOPNOTSUPP);
+ unit = CTX2AMD(ctx);
+ if (!unit->irte_enabled || cookie == -1)
+ return (EOPNOTSUPP);
+ if (cookie >= unit->irte_nentries) {
+ device_printf(src, "amdiommu%d: cookie %u irte max %u\n",
+ unit->iommu.unit, cookie, unit->irte_nentries);
+ return (EINVAL);
+ }
+
+ if (unit->irte_x2apic) {
+ struct amdiommu_irte_basic_vapic_x2 *irte;
+
+ irte = &ctx->irtx2[cookie];
+ irte->supiopf = 0;
+ irte->inttype = 0;
+ irte->rqeoi = 0;
+ irte->dm = 0;
+ irte->guestmode = 0;
+ irte->dest0 = cpu;
+ irte->rsrv0 = 0;
+ irte->vector = vector;
+ irte->rsrv1 = 0;
+ irte->rsrv2 = 0;
+ irte->dest1 = cpu >> 24;
+ atomic_thread_fence_rel();
+ irte->remapen = 1;
+ } else {
+ struct amdiommu_irte_basic_novapic *irte;
+
+ irte = &ctx->irtb[cookie];
+ irte->supiopf = 0;
+ irte->inttype = 0; /* fixed */
+ irte->rqeoi = 0;
+ irte->dm = 0; /* phys */
+ irte->guestmode = 0;
+ irte->dest = cpu;
+ irte->vector = vector;
+ irte->rsrv = 0;
+ atomic_thread_fence_rel();
+ irte->remapen = 1;
+ }
+
+ if (addr != NULL) {
+ *data = cookie;
+ *addr = MSI_INTEL_ADDR_BASE | ((cpu & 0xff) << 12);
+ if (unit->irte_x2apic)
+ *addr |= ((uint64_t)cpu & 0xffffff00) << 32;
+ }
+
+ iommu_get_requester(src, &rid);
+ AMDIOMMU_LOCK(unit);
+ amdiommu_qi_invalidate_ir_locked(unit, rid);
+ AMDIOMMU_UNLOCK(unit);
+
+ return (0);
+}
+
+int
+amdiommu_unmap_msi_intr(device_t src, u_int cookie)
+{
+ struct amdiommu_ctx *ctx;
+
+ if (cookie == -1)
+ return (0);
+ ctx = amdiommu_ir_find(src, NULL, NULL);
+ amdiommu_ir_free_irte(ctx, src, cookie);
+ return (0);
+}
+
+int
+amdiommu_map_ioapic_intr(u_int ioapic_id, u_int cpu, u_int vector,
+ bool edge, bool activehi, int irq, u_int *cookie, uint32_t *hi,
+ uint32_t *lo)
+{
+ /* XXXKIB for early call from ioapic_create() */
+ return (EOPNOTSUPP);
+}
+
+int
+amdiommu_unmap_ioapic_intr(u_int ioapic_id, u_int *cookie)
+{
+ /* XXXKIB */
+ return (0);
+}
+
+static struct amdiommu_ctx *
+amdiommu_ir_find(device_t src, uint16_t *ridp, bool *is_iommu)
+{
+ devclass_t src_class;
+ struct amdiommu_unit *unit;
+ struct amdiommu_ctx *ctx;
+ uint32_t edte;
+ uint16_t rid;
+ uint8_t dte;
+ int error;
+
+ /*
+ * We need to determine if the interrupt source generates FSB
+ * interrupts. If yes, it is either IOMMU, in which case
+ * interrupts are not remapped. Or it is HPET, and interrupts
+ * are remapped. For HPET, source id is reported by HPET
+ * record in IVHD ACPI table.
+ */
+ if (is_iommu != NULL)
+ *is_iommu = false;
+
+ ctx = NULL;
+
+ src_class = device_get_devclass(src);
+ if (src_class == devclass_find("amdiommu")) {
+ if (is_iommu != NULL)
+ *is_iommu = true;
+ } else if (src_class == devclass_find("hpet")) {
+ error = amdiommu_find_unit_for_hpet(src, &unit, &rid, &dte,
+ &edte, bootverbose);
+ ctx = NULL; // XXXKIB allocate ctx
+ } else {
+ error = amdiommu_find_unit(src, &unit, &rid, &dte, &edte,
+ bootverbose);
+ if (error == 0) {
+ iommu_get_requester(src, &rid);
+ ctx = amdiommu_get_ctx_for_dev(unit, src,
+ rid, 0, false /* XXXKIB */, false, dte, edte);
+ }
+ }
+ if (ridp != NULL)
+ *ridp = rid;
+ return (ctx);
+}
+
+static void
+amdiommu_ir_free_irte(struct amdiommu_ctx *ctx, device_t src,
+ u_int cookie)
+{
+ struct amdiommu_unit *unit;
+ uint16_t rid;
+
+ MPASS(ctx != NULL);
+ unit = CTX2AMD(ctx);
+
+ KASSERT(unit->irte_enabled,
+ ("unmap: cookie %d ctx %p unit %p", cookie, ctx, unit));
+ KASSERT(cookie < unit->irte_nentries,
+ ("bad cookie %u %u", cookie, unit->irte_nentries));
+
+ if (unit->irte_x2apic) {
+ struct amdiommu_irte_basic_vapic_x2 *irte;
+
+ irte = &ctx->irtx2[cookie];
+ irte->remapen = 0;
+ atomic_thread_fence_rel();
+ bzero(irte, sizeof(*irte));
+ } else {
+ struct amdiommu_irte_basic_novapic *irte;
+
+ irte = &ctx->irtb[cookie];
+ irte->remapen = 0;
+ atomic_thread_fence_rel();
+ bzero(irte, sizeof(*irte));
+ }
+ iommu_get_requester(src, &rid);
+ AMDIOMMU_LOCK(unit);
+ amdiommu_qi_invalidate_ir_locked(unit, rid);
+ AMDIOMMU_UNLOCK(unit);
+}
+
+int
+amdiommu_ctx_init_irte(struct amdiommu_ctx *ctx)
+{
+ struct amdiommu_unit *unit;
+ void *ptr;
+ unsigned long sz;
+ int dom;
+
+ unit = CTX2AMD(ctx);
+ if (!unit->irte_enabled)
+ return (0);
+
+ KASSERT(unit->irte_nentries > 0 &&
+ unit->irte_nentries <= 2048 &&
+ powerof2(unit->irte_nentries),
+ ("amdiommu%d: unit %p irte_nentries %u", unit->iommu.unit,
+ unit, unit->irte_nentries));
+
+ if (bus_get_domain(unit->iommu.dev, &dom) != 0)
+ dom = -1;
+ sz = unit->irte_nentries;
+ sz *= unit->irte_x2apic ? sizeof(struct amdiommu_irte_basic_vapic_x2) :
+ sizeof(struct amdiommu_irte_basic_novapic);
+
+ if (dom != -1) {
+ ptr = contigmalloc_domainset(sz, M_DEVBUF, DOMAINSET_PREF(dom),
+ M_WAITOK | M_ZERO, 0, ~0ull, 128, 0);
+ } else {
+ ptr = contigmalloc(sz, M_DEVBUF, M_WAITOK | M_ZERO,
+ 0, ~0ull, 128, 0);
+ }
+ if (unit->irte_x2apic)
+ ctx->irtx2 = ptr;
+ else
+ ctx->irtb = ptr;
+ ctx->irtids = vmem_create("amdirt", 0, unit->irte_nentries, 1, 0,
+ M_FIRSTFIT | M_NOWAIT);
+
+ intr_reprogram(); // XXXKIB
+
+ return (0);
+}
+
+void
+amdiommu_ctx_fini_irte(struct amdiommu_ctx *ctx)
+{
+ struct amdiommu_unit *unit;
+
+ unit = CTX2AMD(ctx);
+ if (!unit->irte_enabled)
+ return;
+ if (unit->irte_x2apic)
+ free(ctx->irtx2, M_DEVBUF);
+ else
+ free(ctx->irtb, M_DEVBUF);
+ vmem_destroy(ctx->irtids);
+}
+
+int
+amdiommu_init_irt(struct amdiommu_unit *unit)
+{
+ int enabled, nentries;
+
+ SYSCTL_ADD_INT(&unit->iommu.sysctl_ctx,
+ SYSCTL_CHILDREN(device_get_sysctl_tree(unit->iommu.dev)),
+ OID_AUTO, "ir", CTLFLAG_RD, &unit->irte_enabled, 0,
+ "Interrupt remapping ops enabled");
+
+ enabled = 1;
+ TUNABLE_INT_FETCH("hw.iommu.ir", &enabled);
+
+ unit->irte_enabled = enabled != 0;
+ if (!unit->irte_enabled)
+ return (0);
+
+ nentries = 32;
+ TUNABLE_INT_FETCH("hw.iommu.amd.ir_num", &nentries);
+ nentries = roundup_pow_of_two(nentries);
+ if (nentries < 1)
+ nentries = 1;
+ if (nentries > 2048)
+ nentries = 2048;
+ unit->irte_nentries = nentries;
+
+ unit->irte_x2apic = x2apic_mode;
+ return (0);
+}
+
+void
+amdiommu_fini_irt(struct amdiommu_unit *unit)
+{
+}
diff --git a/sys/x86/iommu/amd_iommu.h b/sys/x86/iommu/amd_iommu.h
new file mode 100644
--- /dev/null
+++ b/sys/x86/iommu/amd_iommu.h
@@ -0,0 +1,243 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 The FreeBSD Foundation
+ *
+ * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef __X86_IOMMU_AMD_IOMMU_H
+#define __X86_IOMMU_AMD_IOMMU_H
+
+#include <dev/iommu/iommu.h>
+
+#define AMDIOMMU_DEV_REPORTED 0x00000001
+
+struct amdiommu_unit;
+
+struct amdiommu_domain {
+ struct iommu_domain iodom;
+ int domain; /* (c) DID, written in context entry */
+ struct amdiommu_unit *unit; /* (c) */
+
+ u_int ctx_cnt; /* (u) Number of contexts owned */
+ u_int refs; /* (u) Refs, including ctx */
+ LIST_ENTRY(amdiommu_domain) link;/* (u) Member in the iommu list */
+ vm_object_t pgtbl_obj; /* (c) Page table pages */
+ vm_page_t pgtblr; /* (c) Page table root page */
+ u_int pglvl; /* (c) Page table levels */
+};
+
+struct amdiommu_ctx {
+ struct iommu_ctx context;
+ struct amdiommu_irte_basic_novapic *irtb;
+ struct amdiommu_irte_basic_vapic_x2 *irtx2;
+ vmem_t *irtids;
+};
+
+struct amdiommu_unit {
+ struct iommu_unit iommu;
+ struct x86_unit_common x86c;
+ u_int unit_dom; /* Served PCI domain, from IVRS */
+ u_int device_id; /* basically PCI RID */
+ u_int unit_id; /* Hypertransport Unit ID, deprecated */
+ TAILQ_ENTRY(amdiommu_unit) unit_next;
+ int seccap_reg;
+ uint64_t efr;
+ vm_paddr_t mmio_base;
+ vm_size_t mmio_sz;
+ struct resource *mmio_res;
+ int mmio_rid;
+ uint64_t hw_ctrl;
+
+ u_int numirqs;
+ struct resource *msix_table;
+ int msix_table_rid;
+ int irq_cmdev_rid;
+ struct resource *irq_cmdev;
+ void *irq_cmdev_cookie;
+
+ struct amdiommu_dte *dev_tbl;
+ vm_object_t devtbl_obj;
+
+ LIST_HEAD(, amdiommu_domain) domains;
+ struct unrhdr *domids;
+
+ struct mtx event_lock;
+ struct amdiommu_event_generic *event_log;
+ u_int event_log_size;
+ u_int event_log_head;
+ u_int event_log_tail;
+ struct task event_task;
+ struct taskqueue *event_taskqueue;
+ struct amdiommu_event_generic event_copy_log[16];
+ u_int event_copy_head;
+ u_int event_copy_tail;
+
+ int irte_enabled; /* int for sysctl type */
+ bool irte_x2apic;
+ u_int irte_nentries;
+};
+
+#define AMD2IOMMU(unit) (&((unit)->iommu))
+#define IOMMU2AMD(unit) \
+ __containerof((unit), struct amdiommu_unit, iommu)
+
+#define AMDIOMMU_LOCK(unit) mtx_lock(&AMD2IOMMU(unit)->lock)
+#define AMDIOMMU_UNLOCK(unit) mtx_unlock(&AMD2IOMMU(unit)->lock)
+#define AMDIOMMU_ASSERT_LOCKED(unit) mtx_assert(&AMD2IOMMU(unit)->lock, \
+ MA_OWNED)
+
+#define AMDIOMMU_EVENT_LOCK(unit) mtx_lock_spin(&(unit)->event_lock)
+#define AMDIOMMU_EVENT_UNLOCK(unit) mtx_unlock_spin(&(unit)->event_lock)
+#define AMDIOMMU_EVENT_ASSERT_LOCKED(unit) \
+ mtx_assert(&(unit)->event_lock, MA_OWNED)
+
+#define DOM2IODOM(domain) (&((domain)->iodom))
+#define IODOM2DOM(domain) \
+ __containerof((domain), struct amdiommu_domain, iodom)
+
+#define CTX2IOCTX(ctx) (&((ctx)->context))
+#define IOCTX2CTX(ctx) \
+ __containerof((ctx), struct amdiommu_ctx, context)
+
+#define CTX2DOM(ctx) IODOM2DOM((ctx)->context.domain)
+#define CTX2AMD(ctx) (CTX2DOM(ctx)->unit)
+#define DOM2AMD(domain) ((domain)->unit)
+
+#define AMDIOMMU_DOMAIN_LOCK(dom) mtx_lock(&(dom)->iodom.lock)
+#define AMDIOMMU_DOMAIN_UNLOCK(dom) mtx_unlock(&(dom)->iodom.lock)
+#define AMDIOMMU_DOMAIN_ASSERT_LOCKED(dom) \
+ mtx_assert(&(dom)->iodom.lock, MA_OWNED)
+
+#define AMDIOMMU_DOMAIN_PGLOCK(dom) VM_OBJECT_WLOCK((dom)->pgtbl_obj)
+#define AMDIOMMU_DOMAIN_PGTRYLOCK(dom) VM_OBJECT_TRYWLOCK((dom)->pgtbl_obj)
+#define AMDIOMMU_DOMAIN_PGUNLOCK(dom) VM_OBJECT_WUNLOCK((dom)->pgtbl_obj)
+#define AMDIOMMU_DOMAIN_ASSERT_PGLOCKED(dom) \
+ VM_OBJECT_ASSERT_WLOCKED((dom)->pgtbl_obj)
+
+#define AMDIOMMU_RID 1001
+
+static inline uint32_t
+amdiommu_read4(const struct amdiommu_unit *unit, int reg)
+{
+
+ return (bus_read_4(unit->mmio_res, reg));
+}
+
+static inline uint64_t
+amdiommu_read8(const struct amdiommu_unit *unit, int reg)
+{
+#ifdef __i386__
+ uint32_t high, low;
+
+ low = bus_read_4(unit->mmio_res, reg);
+ high = bus_read_4(unit->mmio_res, reg + 4);
+ return (low | ((uint64_t)high << 32));
+#else
+ return (bus_read_8(unit->mmio_res, reg));
+#endif
+}
+
+static inline void
+amdiommu_write4(const struct amdiommu_unit *unit, int reg, uint32_t val)
+{
+ bus_write_4(unit->mmio_res, reg, val);
+}
+
+static inline void
+amdiommu_write8(const struct amdiommu_unit *unit, int reg, uint64_t val)
+{
+#ifdef __i386__
+ uint32_t high, low;
+
+ low = val;
+ high = val >> 32;
+ bus_write_4(unit->mmio_res, reg, low);
+ bus_write_4(unit->mmio_res, reg + 4, high);
+#else
+ bus_write_8(unit->mmio_res, reg, val);
+#endif
+}
+
+int amdiommu_find_unit(device_t dev, struct amdiommu_unit **unitp,
+ uint16_t *ridp, uint8_t *dtep, uint32_t *edtep, bool verbose);
+int amdiommu_find_unit_for_ioapic(int apic_id, struct amdiommu_unit **unitp,
+ uint16_t *ridp, uint8_t *dtep, uint32_t *edtep, bool verbose);
+int amdiommu_find_unit_for_hpet(device_t hpet, struct amdiommu_unit **unitp,
+ uint16_t *ridp, uint8_t *dtep, uint32_t *edtep, bool verbose);
+
+int amdiommu_init_cmd(struct amdiommu_unit *unit);
+void amdiommu_fini_cmd(struct amdiommu_unit *unit);
+
+void amdiommu_event_intr(struct amdiommu_unit *unit, uint64_t status);
+int amdiommu_init_event(struct amdiommu_unit *unit);
+void amdiommu_fini_event(struct amdiommu_unit *unit);
+
+int amdiommu_alloc_msi_intr(device_t src, u_int *cookies, u_int count);
+int amdiommu_map_msi_intr(device_t src, u_int cpu, u_int vector,
+ u_int cookie, uint64_t *addr, uint32_t *data);
+int amdiommu_unmap_msi_intr(device_t src, u_int cookie);
+int amdiommu_map_ioapic_intr(u_int ioapic_id, u_int cpu, u_int vector,
+ bool edge, bool activehi, int irq, u_int *cookie, uint32_t *hi,
+ uint32_t *lo);
+int amdiommu_unmap_ioapic_intr(u_int ioapic_id, u_int *cookie);
+int amdiommu_init_irt(struct amdiommu_unit *unit);
+void amdiommu_fini_irt(struct amdiommu_unit *unit);
+int amdiommu_ctx_init_irte(struct amdiommu_ctx *ctx);
+void amdiommu_ctx_fini_irte(struct amdiommu_ctx *ctx);
+
+void amdiommu_domain_unload_entry(struct iommu_map_entry *entry, bool free,
+ bool cansleep);
+void amdiommu_domain_unload(struct iommu_domain *iodom,
+ struct iommu_map_entries_tailq *entries, bool cansleep);
+struct amdiommu_ctx *amdiommu_get_ctx_for_dev(struct amdiommu_unit *unit,
+ device_t dev, uint16_t rid, int dev_domain, bool id_mapped,
+ bool rmrr_init, uint8_t dte, uint32_t edte);
+struct iommu_ctx *amdiommu_get_ctx(struct iommu_unit *iommu, device_t dev,
+ uint16_t rid, bool id_mapped, bool rmrr_init);
+struct amdiommu_ctx *amdiommu_find_ctx_locked(struct amdiommu_unit *unit,
+ uint16_t rid);
+void amdiommu_free_ctx_locked_method(struct iommu_unit *iommu,
+ struct iommu_ctx *context);
+void amdiommu_free_ctx_method(struct iommu_ctx *context);
+struct amdiommu_domain *amdiommu_find_domain(struct amdiommu_unit *unit,
+ uint16_t rid);
+
+void amdiommu_qi_invalidate_ctx_locked(struct amdiommu_ctx *ctx);
+void amdiommu_qi_invalidate_ctx_locked_nowait(struct amdiommu_ctx *ctx);
+void amdiommu_qi_invalidate_ir_locked(struct amdiommu_unit *unit,
+ uint16_t devid);
+void amdiommu_qi_invalidate_ir_locked_nowait(struct amdiommu_unit *unit,
+ uint16_t devid);
+void amdiommu_qi_invalidate_all_pages_locked_nowait(
+ struct amdiommu_domain *domain);
+void amdiommu_qi_invalidate_wait_sync(struct iommu_unit *iommu);
+
+int amdiommu_domain_alloc_pgtbl(struct amdiommu_domain *domain);
+void amdiommu_domain_free_pgtbl(struct amdiommu_domain *domain);
+extern const struct iommu_domain_map_ops amdiommu_domain_map_ops;
+
+#endif

File Metadata

Mime Type
text/plain
Expires
Thu, Nov 7, 6:37 PM (21 h, 13 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
14518571
Default Alt Text
D47256.diff (103 KB)

Event Timeline