D37428.diff
No OneTemporary
Actions

Size

302 KB

Referenced Files

None

Subscribers

None

D37428.diff
View Options

This file is larger than 256 KB, so syntax highlighting was skipped.

	diff --git a/sys/arm64/include/vmm.h b/sys/arm64/include/vmm.h
	new file mode 100644
	--- /dev/null
	+++ b/sys/arm64/include/vmm.h
	@@ -0,0 +1,362 @@
	+/*
	+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#ifndef _VMM_H_
	+#define _VMM_H_
	+
	+#include <sys/param.h>
	+#include <sys/cpuset.h>
	+#include <vm/vm.h>
	+#include <vm/pmap.h>
	+
	+#include "pte.h"
	+#include "pmap.h"
	+
	+struct vcpu;
	+
	+enum vm_suspend_how {
	+ VM_SUSPEND_NONE,
	+ VM_SUSPEND_RESET,
	+ VM_SUSPEND_POWEROFF,
	+ VM_SUSPEND_HALT,
	+ VM_SUSPEND_LAST
	+};
	+
	+/*
	+ * Identifiers for architecturally defined registers.
	+ */
	+enum vm_reg_name {
	+ VM_REG_GUEST_X0 = 0,
	+ VM_REG_GUEST_X1,
	+ VM_REG_GUEST_X2,
	+ VM_REG_GUEST_X3,
	+ VM_REG_GUEST_X4,
	+ VM_REG_GUEST_X5,
	+ VM_REG_GUEST_X6,
	+ VM_REG_GUEST_X7,
	+ VM_REG_GUEST_X8,
	+ VM_REG_GUEST_X9,
	+ VM_REG_GUEST_X10,
	+ VM_REG_GUEST_X11,
	+ VM_REG_GUEST_X12,
	+ VM_REG_GUEST_X13,
	+ VM_REG_GUEST_X14,
	+ VM_REG_GUEST_X15,
	+ VM_REG_GUEST_X16,
	+ VM_REG_GUEST_X17,
	+ VM_REG_GUEST_X18,
	+ VM_REG_GUEST_X19,
	+ VM_REG_GUEST_X20,
	+ VM_REG_GUEST_X21,
	+ VM_REG_GUEST_X22,
	+ VM_REG_GUEST_X23,
	+ VM_REG_GUEST_X24,
	+ VM_REG_GUEST_X25,
	+ VM_REG_GUEST_X26,
	+ VM_REG_GUEST_X27,
	+ VM_REG_GUEST_X28,
	+ VM_REG_GUEST_X29,
	+ VM_REG_GUEST_LR,
	+ VM_REG_GUEST_SP,
	+ VM_REG_GUEST_PC,
	+ VM_REG_GUEST_CPSR,
	+
	+ VM_REG_GUEST_SCTLR_EL1,
	+ VM_REG_GUEST_TTBR0_EL1,
	+ VM_REG_GUEST_TTBR1_EL1,
	+ VM_REG_GUEST_TCR_EL1,
	+ VM_REG_GUEST_TCR2_EL1,
	+ VM_REG_LAST
	+};
	+
	+#define VM_INTINFO_VECTOR(info) ((info) & 0xff)
	+#define VM_INTINFO_DEL_ERRCODE 0x800
	+#define VM_INTINFO_RSVD 0x7ffff000
	+#define VM_INTINFO_VALID 0x80000000
	+#define VM_INTINFO_TYPE 0x700
	+#define VM_INTINFO_HWINTR (0 << 8)
	+#define VM_INTINFO_NMI (2 << 8)
	+#define VM_INTINFO_HWEXCEPTION (3 << 8)
	+#define VM_INTINFO_SWINTR (4 << 8)
	+
	+#define VM_MAX_SUFFIXLEN 15
	+
	+#define VM_GUEST_BASE_IPA 0x80000000UL /* Guest kernel start ipa */
	+
	+#ifdef _KERNEL
	+
	+#define VM_MAX_NAMELEN 32
	+
	+struct vm;
	+struct vm_exception;
	+struct vm_exit;
	+struct vm_run;
	+struct vm_object;
	+struct vm_guest_paging;
	+struct vm_vgic_descr;
	+struct pmap;
	+
	+struct vm_eventinfo {
	+ void rptr; / rendezvous cookie */
	+ int sptr; / suspend cookie */
	+ int iptr; / reqidle cookie */
	+};
	+
	+int vm_create(const char name, struct vm *retvm);
	+struct vcpu vm_alloc_vcpu(struct vm vm, int vcpuid);
	+void vm_slock_vcpus(struct vm *vm);
	+void vm_unlock_vcpus(struct vm *vm);
	+void vm_destroy(struct vm *vm);
	+int vm_reinit(struct vm *vm);
	+const char vm_name(struct vm vm);
	+
	+/*
	+ * APIs that modify the guest memory map require all vcpus to be frozen.
	+ */
	+void vm_slock_memsegs(struct vm *vm);
	+void vm_xlock_memsegs(struct vm *vm);
	+void vm_unlock_memsegs(struct vm *vm);
	+int vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t off,
	+ size_t len, int prot, int flags);
	+int vm_munmap_memseg(struct vm *vm, vm_paddr_t gpa, size_t len);
	+int vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem);
	+void vm_free_memseg(struct vm *vm, int ident);
	+
	+/*
	+ * APIs that inspect the guest memory map require only a single vcpu to
	+ * be frozen. This acts like a read lock on the guest memory map since any
	+ * modification requires all vcpus to be frozen.
	+ */
	+int vm_mmap_getnext(struct vm vm, vm_paddr_t gpa, int *segid,
	+ vm_ooffset_t segoff, size_t len, int prot, int flags);
	+int vm_get_memseg(struct vm vm, int ident, size_t len, bool *sysmem,
	+ struct vm_object **objptr);
	+vm_paddr_t vmm_sysmem_maxaddr(struct vm *vm);
	+void vm_gpa_hold(struct vcpu vcpu, vm_paddr_t gpa, size_t len,
	+ int prot, void **cookie);
	+void vm_gpa_hold_global(struct vm vm, vm_paddr_t gpa, size_t len,
	+ int prot, void **cookie);
	+void vm_gpa_release(void *cookie);
	+bool vm_mem_allocated(struct vcpu *vcpu, vm_paddr_t gpa);
	+
	+int vm_gla2gpa_nofault(struct vcpu vcpu, struct vm_guest_paging paging,
	+ uint64_t gla, int prot, uint64_t gpa, int is_fault);
	+
	+uint16_t vm_get_maxcpus(struct vm *vm);
	+void vm_get_topology(struct vm vm, uint16_t sockets, uint16_t *cores,
	+ uint16_t threads, uint16_t maxcpus);
	+int vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores,
	+ uint16_t threads, uint16_t maxcpus);
	+int vm_get_register(struct vcpu vcpu, int reg, uint64_t retval);
	+int vm_set_register(struct vcpu *vcpu, int reg, uint64_t val);
	+int vm_run(struct vcpu *vcpu);
	+int vm_suspend(struct vm *vm, enum vm_suspend_how how);
	+void* vm_get_cookie(struct vm *vm);
	+int vcpu_vcpuid(struct vcpu *vcpu);
	+void vcpu_get_cookie(struct vcpu vcpu);
	+struct vm vcpu_vm(struct vcpu vcpu);
	+struct vcpu vm_vcpu(struct vm vm, int cpu);
	+int vm_get_capability(struct vcpu vcpu, int type, int val);
	+int vm_set_capability(struct vcpu *vcpu, int type, int val);
	+int vm_activate_cpu(struct vcpu *vcpu);
	+int vm_suspend_cpu(struct vm vm, struct vcpu vcpu);
	+int vm_resume_cpu(struct vm vm, struct vcpu vcpu);
	+int vm_inject_exception(struct vcpu *vcpu, uint64_t esr, uint64_t far);
	+int vm_attach_vgic(struct vm vm, struct vm_vgic_descr descr);
	+int vm_assert_irq(struct vm *vm, uint32_t irq);
	+int vm_deassert_irq(struct vm *vm, uint32_t irq);
	+int vm_raise_msi(struct vm *vm, uint64_t msg, uint64_t addr, int bus, int slot,
	+ int func);
	+struct vm_exit vm_exitinfo(struct vcpu vcpu);
	+void vm_exit_suspended(struct vcpu *vcpu, uint64_t pc);
	+void vm_exit_debug(struct vcpu *vcpu, uint64_t pc);
	+void vm_exit_rendezvous(struct vcpu *vcpu, uint64_t pc);
	+void vm_exit_astpending(struct vcpu *vcpu, uint64_t pc);
	+
	+cpuset_t vm_active_cpus(struct vm *vm);
	+cpuset_t vm_debug_cpus(struct vm *vm);
	+cpuset_t vm_suspended_cpus(struct vm *vm);
	+
	+static __inline bool
	+virt_enabled(void)
	+{
	+
	+ return (has_hyp());
	+}
	+
	+static __inline int
	+vcpu_rendezvous_pending(struct vm_eventinfo *info)
	+{
	+
	+ return (((uintptr_t )(info->rptr)) != 0);
	+}
	+
	+static __inline int
	+vcpu_suspended(struct vm_eventinfo *info)
	+{
	+
	+ return (*info->sptr);
	+}
	+
	+int vcpu_debugged(struct vcpu *vcpu);
	+
	+enum vcpu_state {
	+ VCPU_IDLE,
	+ VCPU_FROZEN,
	+ VCPU_RUNNING,
	+ VCPU_SLEEPING,
	+};
	+
	+int vcpu_set_state(struct vcpu *vcpu, enum vcpu_state state, bool from_idle);
	+enum vcpu_state vcpu_get_state(struct vcpu vcpu, int hostcpu);
	+
	+static int __inline
	+vcpu_is_running(struct vcpu vcpu, int hostcpu)
	+{
	+ return (vcpu_get_state(vcpu, hostcpu) == VCPU_RUNNING);
	+}
	+
	+#ifdef _SYS_PROC_H_
	+static int __inline
	+vcpu_should_yield(struct vcpu *vcpu)
	+{
	+ struct thread *td;
	+
	+ td = curthread;
	+ return (td->td_ast != 0 \|\| td->td_owepreempt != 0);
	+}
	+#endif
	+
	+void vcpu_stats(struct vcpu vcpu);
	+void vcpu_notify_event(struct vcpu *vcpu);
	+
	+enum vm_reg_name vm_segment_name(int seg_encoding);
	+
	+struct vm_copyinfo {
	+ uint64_t gpa;
	+ size_t len;
	+ void *hva;
	+ void *cookie;
	+};
	+
	+#endif /* _KERNEL */
	+
	+#define VM_DIR_READ 0
	+#define VM_DIR_WRITE 1
	+
	+#define VM_GP_M_MASK 0x1f
	+#define VM_GP_MMU_ENABLED (1 << 5)
	+
	+struct vm_guest_paging {
	+ uint64_t ttbr0_addr;
	+ uint64_t ttbr1_addr;
	+ uint64_t tcr_el1;
	+ uint64_t tcr2_el1;
	+ int flags;
	+ int padding;
	+};
	+
	+struct vie {
	+ uint8_t access_size:4, sign_extend:1, dir:1, unused:2;
	+ enum vm_reg_name reg;
	+};
	+
	+struct vre {
	+ uint32_t inst_syndrome;
	+ uint8_t dir:1, unused:7;
	+ enum vm_reg_name reg;
	+};
	+
	+/*
	+ * Identifiers for optional vmm capabilities
	+ */
	+enum vm_cap_type {
	+ VM_CAP_HALT_EXIT,
	+ VM_CAP_MTRAP_EXIT,
	+ VM_CAP_PAUSE_EXIT,
	+ VM_CAP_UNRESTRICTED_GUEST,
	+ VM_CAP_MAX
	+};
	+
	+enum vm_exitcode {
	+ VM_EXITCODE_BOGUS,
	+ VM_EXITCODE_INST_EMUL,
	+ VM_EXITCODE_REG_EMUL,
	+ VM_EXITCODE_HVC,
	+ VM_EXITCODE_SUSPENDED,
	+ VM_EXITCODE_HYP,
	+ VM_EXITCODE_WFI,
	+ VM_EXITCODE_PAGING,
	+ VM_EXITCODE_SMCCC,
	+ VM_EXITCODE_DEBUG,
	+ VM_EXITCODE_MAX
	+};
	+
	+struct vm_exit {
	+ enum vm_exitcode exitcode;
	+ int inst_length;
	+ uint64_t pc;
	+ union {
	+ /*
	+ * ARM specific payload.
	+ */
	+ struct {
	+ uint32_t exception_nr;
	+ uint32_t pad;
	+ uint64_t esr_el2; /* Exception Syndrome Register */
	+ uint64_t far_el2; /* Fault Address Register */
	+ uint64_t hpfar_el2; /* Hypervisor IPA Fault Address Register */
	+ } hyp;
	+ struct {
	+ struct vre vre;
	+ } reg_emul;
	+ struct {
	+ uint64_t gpa;
	+ uint64_t esr;
	+ } paging;
	+ struct {
	+ uint64_t gpa;
	+ struct vm_guest_paging paging;
	+ struct vie vie;
	+ } inst_emul;
	+
	+ /*
	+ * A SMCCC call, e.g. starting a core via PSCI.
	+ * Further arguments can be read by asking the kernel for
	+ * all register values.
	+ */
	+ struct {
	+ uint64_t func_id;
	+ uint64_t args[7];
	+ } smccc_call;
	+
	+ struct {
	+ enum vm_suspend_how how;
	+ } suspended;
	+ } u;
	+};
	+
	+#endif /* _VMM_H_ */
	diff --git a/sys/arm64/include/vmm_dev.h b/sys/arm64/include/vmm_dev.h
	new file mode 100644
	--- /dev/null
	+++ b/sys/arm64/include/vmm_dev.h
	@@ -0,0 +1,272 @@
	+/*
	+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#ifndef _VMM_DEV_H_
	+#define _VMM_DEV_H_
	+
	+#ifdef _KERNEL
	+void vmmdev_init(void);
	+int vmmdev_cleanup(void);
	+#endif
	+
	+struct vm_memmap {
	+ vm_paddr_t gpa;
	+ int segid; /* memory segment */
	+ vm_ooffset_t segoff; /* offset into memory segment */
	+ size_t len; /* mmap length */
	+ int prot; /* RWX */
	+ int flags;
	+};
	+#define VM_MEMMAP_F_WIRED 0x01
	+
	+struct vm_munmap {
	+ vm_paddr_t gpa;
	+ size_t len;
	+};
	+
	+#define VM_MEMSEG_NAME(m) ((m)->name[0] != '\0' ? (m)->name : NULL)
	+struct vm_memseg {
	+ int segid;
	+ size_t len;
	+ char name[VM_MAX_SUFFIXLEN + 1];
	+};
	+
	+struct vm_register {
	+ int cpuid;
	+ int regnum; /* enum vm_reg_name */
	+ uint64_t regval;
	+};
	+
	+struct vm_register_set {
	+ int cpuid;
	+ unsigned int count;
	+ const int regnums; / enum vm_reg_name */
	+ uint64_t *regvals;
	+};
	+
	+struct vm_run {
	+ int cpuid;
	+ cpuset_t cpuset; / CPU set storage */
	+ size_t cpusetsize;
	+ struct vm_exit *vm_exit;
	+};
	+
	+struct vm_exception {
	+ int cpuid;
	+ uint64_t esr;
	+ uint64_t far;
	+};
	+
	+struct vm_msi {
	+ uint64_t msg;
	+ uint64_t addr;
	+ int bus;
	+ int slot;
	+ int func;
	+};
	+
	+struct vm_capability {
	+ int cpuid;
	+ enum vm_cap_type captype;
	+ int capval;
	+ int allcpus;
	+};
	+
	+#define MAX_VM_STATS 64
	+struct vm_stats {
	+ int cpuid; /* in */
	+ int index; /* in */
	+ int num_entries; /* out */
	+ struct timeval tv;
	+ uint64_t statbuf[MAX_VM_STATS];
	+};
	+struct vm_stat_desc {
	+ int index; /* in */
	+ char desc[128]; /* out */
	+};
	+
	+struct vm_suspend {
	+ enum vm_suspend_how how;
	+};
	+
	+struct vm_gla2gpa {
	+ int vcpuid; /* inputs */
	+ int prot; /* PROT_READ or PROT_WRITE */
	+ uint64_t gla;
	+ struct vm_guest_paging paging;
	+ int fault; /* outputs */
	+ uint64_t gpa;
	+};
	+
	+struct vm_activate_cpu {
	+ int vcpuid;
	+};
	+
	+struct vm_cpuset {
	+ int which;
	+ int cpusetsize;
	+ cpuset_t *cpus;
	+};
	+#define VM_ACTIVE_CPUS 0
	+#define VM_SUSPENDED_CPUS 1
	+#define VM_DEBUG_CPUS 2
	+
	+struct vm_vgic_version {
	+ u_int version;
	+ u_int flags;
	+};
	+
	+struct vm_vgic_descr {
	+ struct vm_vgic_version ver;
	+ union {
	+ struct {
	+ uint64_t dist_start;
	+ uint64_t dist_size;
	+ uint64_t redist_start;
	+ uint64_t redist_size;
	+ } v3_regs;
	+ };
	+};
	+
	+struct vm_irq {
	+ uint32_t irq;
	+};
	+
	+struct vm_cpu_topology {
	+ uint16_t sockets;
	+ uint16_t cores;
	+ uint16_t threads;
	+ uint16_t maxcpus;
	+};
	+
	+enum {
	+ /* general routines */
	+ IOCNUM_ABIVERS = 0,
	+ IOCNUM_RUN = 1,
	+ IOCNUM_SET_CAPABILITY = 2,
	+ IOCNUM_GET_CAPABILITY = 3,
	+ IOCNUM_SUSPEND = 4,
	+ IOCNUM_REINIT = 5,
	+
	+ /* memory apis */
	+ IOCNUM_GET_GPA_PMAP = 12,
	+ IOCNUM_GLA2GPA_NOFAULT = 13,
	+ IOCNUM_ALLOC_MEMSEG = 14,
	+ IOCNUM_GET_MEMSEG = 15,
	+ IOCNUM_MMAP_MEMSEG = 16,
	+ IOCNUM_MMAP_GETNEXT = 17,
	+ IOCNUM_MUNMAP_MEMSEG = 18,
	+
	+ /* register/state accessors */
	+ IOCNUM_SET_REGISTER = 20,
	+ IOCNUM_GET_REGISTER = 21,
	+ IOCNUM_SET_REGISTER_SET = 24,
	+ IOCNUM_GET_REGISTER_SET = 25,
	+
	+ /* statistics */
	+ IOCNUM_VM_STATS = 50,
	+ IOCNUM_VM_STAT_DESC = 51,
	+
	+ /* CPU Topology */
	+ IOCNUM_SET_TOPOLOGY = 63,
	+ IOCNUM_GET_TOPOLOGY = 64,
	+
	+ /* interrupt injection */
	+ IOCNUM_ASSERT_IRQ = 80,
	+ IOCNUM_DEASSERT_IRQ = 81,
	+ IOCNUM_RAISE_MSI = 82,
	+ IOCNUM_INJECT_EXCEPTION = 83,
	+
	+ /* vm_cpuset */
	+ IOCNUM_ACTIVATE_CPU = 90,
	+ IOCNUM_GET_CPUSET = 91,
	+ IOCNUM_SUSPEND_CPU = 92,
	+ IOCNUM_RESUME_CPU = 93,
	+
	+ /* vm_attach_vgic */
	+ IOCNUM_GET_VGIC_VERSION = 110,
	+ IOCNUM_ATTACH_VGIC = 111,
	+};
	+
	+#define VM_RUN \
	+ _IOWR('v', IOCNUM_RUN, struct vm_run)
	+#define VM_SUSPEND \
	+ _IOW('v', IOCNUM_SUSPEND, struct vm_suspend)
	+#define VM_REINIT \
	+ _IO('v', IOCNUM_REINIT)
	+#define VM_ALLOC_MEMSEG \
	+ _IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg)
	+#define VM_GET_MEMSEG \
	+ _IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg)
	+#define VM_MMAP_MEMSEG \
	+ _IOW('v', IOCNUM_MMAP_MEMSEG, struct vm_memmap)
	+#define VM_MMAP_GETNEXT \
	+ _IOWR('v', IOCNUM_MMAP_GETNEXT, struct vm_memmap)
	+#define VM_MUNMAP_MEMSEG \
	+ _IOW('v', IOCNUM_MUNMAP_MEMSEG, struct vm_munmap)
	+#define VM_SET_REGISTER \
	+ _IOW('v', IOCNUM_SET_REGISTER, struct vm_register)
	+#define VM_GET_REGISTER \
	+ _IOWR('v', IOCNUM_GET_REGISTER, struct vm_register)
	+#define VM_SET_REGISTER_SET \
	+ _IOW('v', IOCNUM_SET_REGISTER_SET, struct vm_register_set)
	+#define VM_GET_REGISTER_SET \
	+ _IOWR('v', IOCNUM_GET_REGISTER_SET, struct vm_register_set)
	+#define VM_SET_CAPABILITY \
	+ _IOW('v', IOCNUM_SET_CAPABILITY, struct vm_capability)
	+#define VM_GET_CAPABILITY \
	+ _IOWR('v', IOCNUM_GET_CAPABILITY, struct vm_capability)
	+#define VM_STATS \
	+ _IOWR('v', IOCNUM_VM_STATS, struct vm_stats)
	+#define VM_STAT_DESC \
	+ _IOWR('v', IOCNUM_VM_STAT_DESC, struct vm_stat_desc)
	+#define VM_ASSERT_IRQ \
	+ _IOW('v', IOCNUM_ASSERT_IRQ, struct vm_irq)
	+#define VM_DEASSERT_IRQ \
	+ _IOW('v', IOCNUM_DEASSERT_IRQ, struct vm_irq)
	+#define VM_RAISE_MSI \
	+ _IOW('v', IOCNUM_RAISE_MSI, struct vm_msi)
	+#define VM_INJECT_EXCEPTION \
	+ _IOW('v', IOCNUM_INJECT_EXCEPTION, struct vm_exception)
	+#define VM_SET_TOPOLOGY \
	+ _IOW('v', IOCNUM_SET_TOPOLOGY, struct vm_cpu_topology)
	+#define VM_GET_TOPOLOGY \
	+ _IOR('v', IOCNUM_GET_TOPOLOGY, struct vm_cpu_topology)
	+#define VM_GLA2GPA_NOFAULT \
	+ _IOWR('v', IOCNUM_GLA2GPA_NOFAULT, struct vm_gla2gpa)
	+#define VM_ACTIVATE_CPU \
	+ _IOW('v', IOCNUM_ACTIVATE_CPU, struct vm_activate_cpu)
	+#define VM_GET_CPUS \
	+ _IOW('v', IOCNUM_GET_CPUSET, struct vm_cpuset)
	+#define VM_SUSPEND_CPU \
	+ _IOW('v', IOCNUM_SUSPEND_CPU, struct vm_activate_cpu)
	+#define VM_RESUME_CPU \
	+ _IOW('v', IOCNUM_RESUME_CPU, struct vm_activate_cpu)
	+#define VM_GET_VGIC_VERSION \
	+ _IOR('v', IOCNUM_GET_VGIC_VERSION, struct vm_vgic_version)
	+#define VM_ATTACH_VGIC \
	+ _IOW('v', IOCNUM_ATTACH_VGIC, struct vm_vgic_descr)
	+#endif
	diff --git a/sys/arm64/include/vmm_instruction_emul.h b/sys/arm64/include/vmm_instruction_emul.h
	new file mode 100644
	--- /dev/null
	+++ b/sys/arm64/include/vmm_instruction_emul.h
	@@ -0,0 +1,83 @@
	+/*
	+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#ifndef _VMM_INSTRUCTION_EMUL_H_
	+#define _VMM_INSTRUCTION_EMUL_H_
	+
	+/*
	+ * Callback functions to read and write memory regions.
	+ */
	+typedef int (mem_region_read_t)(struct vcpu vcpu, uint64_t gpa,
	+ uint64_t rval, int rsize, void arg);
	+typedef int (mem_region_write_t)(struct vcpu vcpu, uint64_t gpa,
	+ uint64_t wval, int wsize, void *arg);
	+
	+/*
	+ * Callback functions to read and write registers.
	+ */
	+typedef int (reg_read_t)(struct vcpu vcpu, uint64_t rval, void arg);
	+typedef int (reg_write_t)(struct vcpu vcpu, uint64_t wval, void *arg);
	+
	+/*
	+ * Emulate the decoded 'vie' instruction when it contains a memory operation.
	+ *
	+ * The callbacks 'mrr' and 'mrw' emulate reads and writes to the memory region
	+ * containing 'gpa'. 'mrarg' is an opaque argument that is passed into the
	+ * callback functions.
	+ *
	+ * 'void vm' should be 'struct vm ' when called from kernel context and
	+ * 'struct vmctx *' when called from user context.
	+ *
	+ */
	+int vmm_emulate_instruction(struct vcpu vcpu, uint64_t gpa, struct vie vie,
	+ struct vm_guest_paging *paging, mem_region_read_t mrr,
	+ mem_region_write_t mrw, void *mrarg);
	+
	+/*
	+ * Emulate the decoded 'vre' instruction when it contains a register access.
	+ *
	+ * The callbacks 'regread' and 'regwrite' emulate reads and writes to the
	+ * register from 'vie'. 'regarg' is an opaque argument that is passed into the
	+ * callback functions.
	+ *
	+ * 'void vm' should be 'struct vm ' when called from kernel context and
	+ * 'struct vmctx *' when called from user context.
	+ *
	+ */
	+int vmm_emulate_register(struct vcpu vcpu, struct vre vre, reg_read_t regread,
	+ reg_write_t regwrite, void *regarg);
	+
	+#ifdef _KERNEL
	+void vm_register_reg_handler(struct vm *vm, uint64_t iss, uint64_t mask,
	+ reg_read_t reg_read, reg_write_t reg_write, void *arg);
	+void vm_deregister_reg_handler(struct vm *vm, uint64_t iss, uint64_t mask);
	+
	+void vm_register_inst_handler(struct vm *vm, uint64_t start, uint64_t size,
	+ mem_region_read_t mmio_read, mem_region_write_t mmio_write);
	+void vm_deregister_inst_handler(struct vm *vm, uint64_t start, uint64_t size);
	+#endif
	+
	+#endif /* _VMM_INSTRUCTION_EMUL_H_ */
	diff --git a/sys/arm64/include/vmm_snapshot.h b/sys/arm64/include/vmm_snapshot.h
	new file mode 100644
	--- /dev/null
	+++ b/sys/arm64/include/vmm_snapshot.h
	@@ -0,0 +1 @@
	+/* $FreeBSD$ */
	diff --git a/sys/arm64/vmm/arm64.h b/sys/arm64/vmm/arm64.h
	new file mode 100644
	--- /dev/null
	+++ b/sys/arm64/vmm/arm64.h
	@@ -0,0 +1,165 @@
	+/*-
	+ * SPDX-License-Identifier: BSD-2-Clause
	+ *
	+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+#ifndef _VMM_ARM64_H_
	+#define _VMM_ARM64_H_
	+
	+#include <machine/reg.h>
	+#include <machine/hypervisor.h>
	+#include <machine/pcpu.h>
	+
	+#include "mmu.h"
	+#include "io/vgic_v3.h"
	+#include "io/vtimer.h"
	+
	+struct vgic_v3;
	+struct vgic_v3_cpu;
	+
	+struct hypctx {
	+ struct trapframe tf;
	+
	+ /*
	+ * EL1 control registers.
	+ */
	+ uint64_t elr_el1; /* Exception Link Register */
	+ uint64_t sp_el0; /* Stack pointer */
	+ uint64_t tpidr_el0; /* EL0 Software ID Register */
	+ uint64_t tpidrro_el0; /* Read-only Thread ID Register */
	+ uint64_t tpidr_el1; /* EL1 Software ID Register */
	+ uint64_t vbar_el1; /* Vector Base Address Register */
	+
	+ uint64_t actlr_el1; /* Auxiliary Control Register */
	+ uint64_t afsr0_el1; /* Auxiliary Fault Status Register 0 */
	+ uint64_t afsr1_el1; /* Auxiliary Fault Status Register 1 */
	+ uint64_t amair_el1; /* Auxiliary Memory Attribute Indirection Register */
	+ uint64_t contextidr_el1; /* Current Process Identifier */
	+ uint64_t cpacr_el1; /* Architectural Feature Access Control Register */
	+ uint64_t csselr_el1; /* Cache Size Selection Register */
	+ uint64_t esr_el1; /* Exception Syndrome Register */
	+ uint64_t far_el1; /* Fault Address Register */
	+ uint64_t mair_el1; /* Memory Attribute Indirection Register */
	+ uint64_t mdccint_el1; /* Monitor DCC Interrupt Enable Register */
	+ uint64_t mdscr_el1; /* Monitor Debug System Control Register */
	+ uint64_t par_el1; /* Physical Address Register */
	+ uint64_t sctlr_el1; /* System Control Register */
	+ uint64_t tcr_el1; /* Translation Control Register */
	+ uint64_t tcr2_el1; /* Translation Control Register 2 */
	+ uint64_t ttbr0_el1; /* Translation Table Base Register 0 */
	+ uint64_t ttbr1_el1; /* Translation Table Base Register 1 */
	+ uint64_t spsr_el1; /* Saved Program Status Register */
	+
	+ uint64_t pmcr_el0; /* Performance Monitors Control Register */
	+ uint64_t pmccntr_el0;
	+ uint64_t pmccfiltr_el0;
	+ uint64_t pmcntenset_el0;
	+ uint64_t pmintenset_el1;
	+ uint64_t pmovsset_el0;
	+ uint64_t pmselr_el0;
	+ uint64_t pmuserenr_el0;
	+ uint64_t pmevcntr_el0[31];
	+ uint64_t pmevtyper_el0[31];
	+
	+ uint64_t dbgbcr_el1[16]; /* Debug Breakpoint Control Registers */
	+ uint64_t dbgbvr_el1[16]; /* Debug Breakpoint Value Registers */
	+ uint64_t dbgwcr_el1[16]; /* Debug Watchpoint Control Registers */
	+ uint64_t dbgwvr_el1[16]; /* Debug Watchpoint Value Registers */
	+
	+ /* EL2 control registers */
	+ uint64_t cptr_el2; /* Architectural Feature Trap Register */
	+ uint64_t hcr_el2; /* Hypervisor Configuration Register */
	+ uint64_t mdcr_el2; /* Monitor Debug Configuration Register */
	+ uint64_t vpidr_el2; /* Virtualization Processor ID Register */
	+ uint64_t vmpidr_el2; /* Virtualization Multiprocessor ID Register */
	+ uint64_t el2_addr; /* The address of this in el2 space */
	+ struct hyp *hyp;
	+ struct vcpu *vcpu;
	+ struct {
	+ uint64_t far_el2; /* Fault Address Register */
	+ uint64_t hpfar_el2; /* Hypervisor IPA Fault Address Register */
	+ } exit_info;
	+
	+ struct vtimer_cpu vtimer_cpu;
	+
	+ struct vgic_v3_regs vgic_v3_regs;
	+ struct vgic_v3_cpu *vgic_cpu;
	+ bool has_exception;
	+};
	+
	+struct hyp {
	+ struct vm *vm;
	+ struct vtimer vtimer;
	+ uint64_t vmid_generation;
	+ uint64_t vttbr_el2;
	+ uint64_t el2_addr; /* The address of this in el2 space */
	+ bool vgic_attached;
	+ struct vgic_v3 *vgic;
	+ struct hypctx *ctx[];
	+};
	+
	+#define DEFINE_VMMOPS_IFUNC(ret_type, opname, args) \
	+ ret_type vmmops_##opname args;
	+
	+DEFINE_VMMOPS_IFUNC(int, modinit, (int ipinum))
	+DEFINE_VMMOPS_IFUNC(int, modcleanup, (void))
	+DEFINE_VMMOPS_IFUNC(void , init, (struct vm vm, struct pmap *pmap))
	+DEFINE_VMMOPS_IFUNC(int, gla2gpa, (void vcpui, struct vm_guest_paging paging,
	+ uint64_t gla, int prot, uint64_t gpa, int is_fault))
	+DEFINE_VMMOPS_IFUNC(int, run, (void vcpui, register_t pc, struct pmap pmap,
	+ struct vm_eventinfo *info))
	+DEFINE_VMMOPS_IFUNC(void, cleanup, (void *vmi))
	+DEFINE_VMMOPS_IFUNC(void , vcpu_init, (void vmi, struct vcpu *vcpu,
	+ int vcpu_id))
	+DEFINE_VMMOPS_IFUNC(void, vcpu_cleanup, (void *vcpui))
	+DEFINE_VMMOPS_IFUNC(int, exception, (void *vcpui, uint64_t esr, uint64_t far))
	+DEFINE_VMMOPS_IFUNC(int, getreg, (void vcpui, int num, uint64_t retval))
	+DEFINE_VMMOPS_IFUNC(int, setreg, (void *vcpui, int num, uint64_t val))
	+DEFINE_VMMOPS_IFUNC(int, getcap, (void vcpui, int num, int retval))
	+DEFINE_VMMOPS_IFUNC(int, setcap, (void *vcpui, int num, int val))
	+DEFINE_VMMOPS_IFUNC(struct vmspace *, vmspace_alloc, (vm_offset_t min,
	+ vm_offset_t max))
	+DEFINE_VMMOPS_IFUNC(void, vmspace_free, (struct vmspace *vmspace))
	+#ifdef notyet
	+#ifdef BHYVE_SNAPSHOT
	+DEFINE_VMMOPS_IFUNC(int, snapshot, (void vmi, struct vm_snapshot_meta meta))
	+DEFINE_VMMOPS_IFUNC(int, vcpu_snapshot, (void *vcpui,
	+ struct vm_snapshot_meta *meta))
	+DEFINE_VMMOPS_IFUNC(int, restore_tsc, (void *vcpui, uint64_t now))
	+#endif
	+#endif
	+
	+uint64_t vmm_call_hyp(uint64_t, ...);
	+
	+#if 0
	+#define eprintf(fmt, ...) printf("%s:%d " fmt, __func__, __LINE__, ##__VA_ARGS__)
	+#else
	+#define eprintf(fmt, ...) do {} while(0)
	+#endif
	+
	+struct hypctx *arm64_get_active_vcpu(void);
	+void raise_data_insn_abort(struct hypctx *, uint64_t, bool, int);
	+
	+#endif /* !_VMM_ARM64_H_ */
	diff --git a/sys/arm64/vmm/hyp.h b/sys/arm64/vmm/hyp.h
	new file mode 100644
	--- /dev/null
	+++ b/sys/arm64/vmm/hyp.h
	@@ -0,0 +1,114 @@
	+/*-
	+ * SPDX-License-Identifier: BSD-2-Clause
	+ *
	+ * Copyright (C) 2017 Alexandru Elisei <alexandru.elisei@gmail.com>
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#ifndef _VMM_HYP_H_
	+#define _VMM_HYP_H_
	+
	+/*
	+ * The translation tables for the hypervisor mode will hold mappings for kernel
	+ * virtual addresses and an identity mapping (VA == PA) necessary when
	+ * enabling/disabling the MMU.
	+ *
	+ * When in EL2 exception level the translation table base register is TTBR0_EL2
	+ * and the virtual addresses generated by the CPU must be at the bottom of the
	+ * memory, with the first 16 bits all set to zero:
	+ *
	+ * 0x0000ffffffffffff End hyp address space
	+ * 0x0000000000000000 Start of hyp address space
	+ *
	+ * To run code in hyp mode we need to convert kernel virtual addresses to
	+ * addreses that fit into this address space.
	+ *
	+ * The kernel virtual address range is:
	+ *
	+ * 0xffff007fffffffff End of KVA
	+ * 0xffff000000000000 Kernel base address & start of KVA
	+ *
	+ * (see /sys/arm64/include/vmparam.h).
	+ *
	+ * We could convert the kernel virtual addresses to valid EL2 addresses by
	+ * setting the first 16 bits to zero and thus mapping the kernel addresses in
	+ * the bottom half of the EL2 address space, but then they might clash with the
	+ * identity mapping addresses. Instead we map the kernel addresses in the upper
	+ * half of the EL2 address space.
	+ *
	+ * The hypervisor address space will look like this:
	+ *
	+ * 0x0000807fffffffff End of KVA mapping
	+ * 0x0000800000000000 Start of KVA mapping
	+ *
	+ * 0x00007fffffffffff End of identity mapping
	+ * 0x0000000000000000 Start of identity mapping
	+ *
	+ * With the scheme we have 47 bits at our disposable for the identity map and
	+ * another 47 bits for the kernel virtual addresses. For a maximum physical
	+ * memory size of 128TB we are guaranteed to not have any clashes between
	+ * addresses.
	+ */
	+#define HYP_VM_MIN_ADDRESS 0x0000000000000000
	+#define HYP_VM_MAX_ADDRESS 0x0001000000000000
	+
	+/*
	+ * When the vmm code is installed the following handles can be used by
	+ * the host to call into EL2.
	+ */
	+#define HYP_CLEANUP 0x00000001
	+#define HYP_ENTER_GUEST 0x00000002
	+#define HYP_READ_REGISTER 0x00000003
	+#define HYP_REG_ICH_VTR 0x1
	+#define HYP_REG_CNTHCTL 0x2
	+#define HYP_CLEAN_S2_TLBI 0x00000004
	+#define HYP_DC_CIVAC 0x00000005
	+#define HYP_EL2_TLBI 0x00000006
	+#define HYP_EL2_TLBI_ALL 0x1
	+#define HYP_EL2_TLBI_VA 0x2
	+#define HYP_S2_TLBI_RANGE 0x00000010
	+#define HYP_S2_TLBI_ALL 0x00000011
	+
	+/*
	+ * When taking asynchronous exceptions, or interrupts, with the exception of the
	+ * SError interrupt, the exception syndrome register is not updated with the
	+ * exception code. We need to differentiate between the different exception
	+ * types taken to EL2.
	+ */
	+#define EXCP_TYPE_EL1_SYNC 0
	+#define EXCP_TYPE_EL1_IRQ 1
	+#define EXCP_TYPE_EL1_FIQ 2
	+#define EXCP_TYPE_EL1_ERROR 3
	+
	+#define EXCP_TYPE_EL2_SYNC 4
	+#define EXCP_TYPE_EL2_IRQ 5
	+#define EXCP_TYPE_EL2_FIQ 6
	+#define EXCP_TYPE_EL2_ERROR 7
	+
	+#define EXCP_TYPE_MAINT_IRQ 8
	+/* Used internally in vmm_hyp.c */
	+#define EXCP_TYPE_REENTER 9
	+
	+#define HYP_GET_VECTOR_TABLE -1
	+
	+#endif /* !_VMM_HYP_H_ */
	diff --git a/sys/arm64/vmm/io/vgic.h b/sys/arm64/vmm/io/vgic.h
	new file mode 100644
	--- /dev/null
	+++ b/sys/arm64/vmm/io/vgic.h
	@@ -0,0 +1,52 @@
	+/*-
	+ * SPDX-License-Identifier: BSD-2-Clause
	+ *
	+ * Copyright (c) 2023 Arm Ltd
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#ifndef _VGIC_H_
	+#define _VGIC_H_
	+
	+struct hyp;
	+struct hypctx;
	+struct vm_vgic_descr;
	+
	+extern device_t vgic_dev;
	+
	+bool vgic_present(void);
	+void vgic_init(void);
	+int vgic_attach_to_vm(struct hyp hyp, struct vm_vgic_descr descr);
	+void vgic_detach_from_vm(struct hyp *hyp);
	+void vgic_vminit(struct hyp *hyp);
	+void vgic_cpuinit(struct hypctx *hypctx);
	+void vgic_cpucleanup(struct hypctx *hypctx);
	+void vgic_vmcleanup(struct hyp *hyp);
	+int vgic_max_cpu_count(struct hyp *hyp);
	+bool vgic_has_pending_irq(struct hypctx *hypctx);
	+int vgic_inject_irq(struct hyp *hyp, int vcpuid, uint32_t irqid, bool level);
	+int vgic_inject_msi(struct hyp *hyp, uint64_t msg, uint64_t addr);
	+void vgic_flush_hwstate(struct hypctx *hypctx);
	+void vgic_sync_hwstate(struct hypctx *hypctx);
	+
	+#endif /* _VGIC_H_ */
	diff --git a/sys/arm64/vmm/io/vgic.c b/sys/arm64/vmm/io/vgic.c
	new file mode 100644
	--- /dev/null
	+++ b/sys/arm64/vmm/io/vgic.c
	@@ -0,0 +1,122 @@
	+/*-
	+ * SPDX-License-Identifier: BSD-2-Clause
	+ *
	+ * Copyright (c) 2023 Arm Ltd
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#include <sys/cdefs.h>
	+#include <sys/types.h>
	+#include <sys/systm.h>
	+#include <sys/bus.h>
	+
	+#include "vgic.h"
	+#include "vgic_if.h"
	+
	+device_t vgic_dev;
	+
	+bool
	+vgic_present(void)
	+{
	+ return (vgic_dev != NULL);
	+}
	+
	+void
	+vgic_init(void)
	+{
	+ VGIC_INIT(vgic_dev);
	+}
	+
	+int
	+vgic_attach_to_vm(struct hyp hyp, struct vm_vgic_descr descr)
	+{
	+ return (VGIC_ATTACH_TO_VM(vgic_dev, hyp, descr));
	+}
	+
	+void
	+vgic_detach_from_vm(struct hyp *hyp)
	+{
	+ VGIC_DETACH_FROM_VM(vgic_dev, hyp);
	+}
	+
	+void
	+vgic_vminit(struct hyp *hyp)
	+{
	+ VGIC_VMINIT(vgic_dev, hyp);
	+}
	+
	+void
	+vgic_cpuinit(struct hypctx *hypctx)
	+{
	+ VGIC_CPUINIT(vgic_dev, hypctx);
	+}
	+
	+void
	+vgic_cpucleanup(struct hypctx *hypctx)
	+{
	+ VGIC_CPUCLEANUP(vgic_dev, hypctx);
	+}
	+
	+void
	+vgic_vmcleanup(struct hyp *hyp)
	+{
	+ VGIC_VMCLEANUP(vgic_dev, hyp);
	+}
	+
	+int
	+vgic_max_cpu_count(struct hyp *hyp)
	+{
	+ return (VGIC_MAX_CPU_COUNT(vgic_dev, hyp));
	+}
	+
	+bool
	+vgic_has_pending_irq(struct hypctx *hypctx)
	+{
	+ return (VGIC_HAS_PENDING_IRQ(vgic_dev, hypctx));
	+}
	+
	+/* TODO: vcpuid -> hypctx ? */
	+/* TODO: Add a vgic interface */
	+int
	+vgic_inject_irq(struct hyp *hyp, int vcpuid, uint32_t irqid, bool level)
	+{
	+ return (VGIC_INJECT_IRQ(vgic_dev, hyp, vcpuid, irqid, level));
	+}
	+
	+int
	+vgic_inject_msi(struct hyp *hyp, uint64_t msg, uint64_t addr)
	+{
	+ return (VGIC_INJECT_MSI(vgic_dev, hyp, msg, addr));
	+}
	+
	+void
	+vgic_flush_hwstate(struct hypctx *hypctx)
	+{
	+ VGIC_FLUSH_HWSTATE(vgic_dev, hypctx);
	+}
	+
	+void
	+vgic_sync_hwstate(struct hypctx *hypctx)
	+{
	+ VGIC_SYNC_HWSTATE(vgic_dev, hypctx);
	+}
	diff --git a/sys/arm64/vmm/io/vgic_if.m b/sys/arm64/vmm/io/vgic_if.m
	new file mode 100644
	--- /dev/null
	+++ b/sys/arm64/vmm/io/vgic_if.m
	@@ -0,0 +1,104 @@
	+#-
	+# SPDX-License-Identifier: BSD-2-Clause
	+#
	+# Copyright (c) 2023 Arm Ltd
	+#
	+# Redistribution and use in source and binary forms, with or without
	+# modification, are permitted provided that the following conditions
	+# are met:
	+# 1. Redistributions of source code must retain the above copyright
	+# notice, this list of conditions and the following disclaimer.
	+# 2. Redistributions in binary form must reproduce the above copyright
	+# notice, this list of conditions and the following disclaimer in the
	+# documentation and/or other materials provided with the distribution.
	+#
	+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
	+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+# SUCH DAMAGE.
	+#
	+
	+INTERFACE vgic;
	+
	+HEADER {
	+ struct hyp;
	+ struct hypctx;
	+ struct vm_vgic_descr;
	+};
	+
	+METHOD void init {
	+ device_t dev;
	+}
	+
	+METHOD int attach_to_vm {
	+ device_t dev;
	+ struct hyp *hyp;
	+ struct vm_vgic_descr *descr;
	+};
	+
	+METHOD void detach_from_vm {
	+ device_t dev;
	+ struct hyp *hyp;
	+}
	+
	+METHOD void vminit {
	+ device_t dev;
	+ struct hyp *hyp;
	+}
	+
	+METHOD void cpuinit {
	+ device_t dev;
	+ struct hypctx *hypctx;
	+}
	+
	+METHOD void cpucleanup {
	+ device_t dev;
	+ struct hypctx *hypctx;
	+}
	+
	+METHOD void vmcleanup {
	+ device_t dev;
	+ struct hyp *hyp;
	+}
	+
	+METHOD int max_cpu_count {
	+ device_t dev;
	+ struct hyp *hyp;
	+}
	+
	+METHOD bool has_pending_irq {
	+ device_t dev;
	+ struct hypctx *hypctx;
	+}
	+
	+METHOD int inject_irq {
	+ device_t dev;
	+ struct hyp *hyp;
	+ int vcpuid;
	+ uint32_t irqid;
	+ bool level;
	+}
	+
	+METHOD int inject_msi {
	+ device_t dev;
	+ struct hyp *hyp;
	+ uint64_t msg;
	+ uint64_t addr;
	+}
	+
	+METHOD void flush_hwstate {
	+ device_t dev;
	+ struct hypctx *hypctx;
	+}
	+
	+METHOD void sync_hwstate {
	+ device_t dev;
	+ struct hypctx *hypctx;
	+}
	diff --git a/sys/arm64/vmm/io/vgic_v3.h b/sys/arm64/vmm/io/vgic_v3.h
	new file mode 100644
	--- /dev/null
	+++ b/sys/arm64/vmm/io/vgic_v3.h
	@@ -0,0 +1,57 @@
	+/*-
	+ * SPDX-License-Identifier: BSD-2-Clause
	+ *
	+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#ifndef _VMM_VGIC_V3_H_
	+#define _VMM_VGIC_V3_H_
	+
	+#define VGIC_ICH_LR_NUM_MAX 16
	+#define VGIC_ICH_APR_NUM_MAX 4
	+
	+/* Registers accessed by EL2 */
	+struct vgic_v3_regs {
	+ uint32_t ich_eisr_el2; /* End of Interrupt Status Register */
	+ uint32_t ich_elrsr_el2; /* Empty List register Status Register (ICH_ELRSR_EL2) */
	+ uint32_t ich_hcr_el2; /* Hyp Control Register */
	+ uint32_t ich_misr_el2; /* Maintenance Interrupt State Register */
	+ uint32_t ich_vmcr_el2; /* Virtual Machine Control Register */
	+
	+ /*
	+ * The List Registers are part of the VM context and are modified on a
	+ * world switch. They need to be allocated statically so they are
	+ * mapped in the EL2 translation tables when struct hypctx is mapped.
	+ */
	+ uint64_t ich_lr_el2[VGIC_ICH_LR_NUM_MAX];
	+ uint16_t ich_lr_num;
	+
	+ /* Active Priorities Registers for Group 0 and 1 interrupts */
	+ uint16_t ich_apr_num;
	+ uint32_t ich_ap0r_el2[VGIC_ICH_APR_NUM_MAX];
	+ uint32_t ich_ap1r_el2[VGIC_ICH_APR_NUM_MAX];
	+};
	+
	+#endif /* !_VMM_VGIC_V3_H_ */
	diff --git a/sys/arm64/vmm/io/vgic_v3.c b/sys/arm64/vmm/io/vgic_v3.c
	new file mode 100644
	--- /dev/null
	+++ b/sys/arm64/vmm/io/vgic_v3.c
	@@ -0,0 +1,2348 @@
	+/*-
	+ * SPDX-License-Identifier: BSD-2-Clause
	+ *
	+ * Copyright (C) 2018 Alexandru Elisei <alexandru.elisei@gmail.com>
	+ * Copyright (C) 2020-2022 Andrew Turner
	+ * Copyright (C) 2023 Arm Ltd
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#include <sys/cdefs.h>
	+
	+#include <sys/types.h>
	+#include <sys/errno.h>
	+#include <sys/systm.h>
	+#include <sys/bitstring.h>
	+#include <sys/bus.h>
	+#include <sys/kernel.h>
	+#include <sys/lock.h>
	+#include <sys/malloc.h>
	+#include <sys/module.h>
	+#include <sys/mutex.h>
	+#include <sys/rman.h>
	+#include <sys/smp.h>
	+
	+#include <vm/vm.h>
	+#include <vm/pmap.h>
	+
	+#include <dev/ofw/openfirm.h>
	+
	+#include <machine/armreg.h>
	+#include <machine/atomic.h>
	+#include <machine/bus.h>
	+#include <machine/cpufunc.h>
	+#include <machine/cpu.h>
	+#include <machine/machdep.h>
	+#include <machine/param.h>
	+#include <machine/pmap.h>
	+#include <machine/vmparam.h>
	+#include <machine/intr.h>
	+#include <machine/vmm.h>
	+#include <machine/vmm_dev.h>
	+#include <machine/vmm_instruction_emul.h>
	+
	+#include <arm/arm/gic_common.h>
	+#include <arm64/arm64/gic_v3_reg.h>
	+#include <arm64/arm64/gic_v3_var.h>
	+
	+#include <arm64/vmm/hyp.h>
	+#include <arm64/vmm/mmu.h>
	+#include <arm64/vmm/arm64.h>
	+
	+#include "vgic.h"
	+#include "vgic_v3.h"
	+#include "vgic_v3_reg.h"
	+
	+#include "vgic_if.h"
	+
	+#define VGIC_SGI_NUM (GIC_LAST_SGI - GIC_FIRST_SGI + 1)
	+#define VGIC_PPI_NUM (GIC_LAST_PPI - GIC_FIRST_PPI + 1)
	+#define VGIC_SPI_NUM (GIC_LAST_SPI - GIC_FIRST_SPI + 1)
	+#define VGIC_PRV_I_NUM (VGIC_SGI_NUM + VGIC_PPI_NUM)
	+#define VGIC_SHR_I_NUM (VGIC_SPI_NUM)
	+
	+MALLOC_DEFINE(M_VGIC_V3, "ARM VMM VGIC V3", "ARM VMM VGIC V3");
	+
	+/* TODO: Move to softc */
	+struct vgic_v3_virt_features {
	+ uint8_t min_prio;
	+ size_t ich_lr_num;
	+ size_t ich_apr_num;
	+};
	+
	+struct vgic_v3_irq {
	+ /* List of IRQs that are active or pending */
	+ TAILQ_ENTRY(vgic_v3_irq) act_pend_list;
	+ struct mtx irq_spinmtx;
	+ uint64_t mpidr;
	+ int target_vcpu;
	+ uint32_t irq;
	+ bool active;
	+ bool pending;
	+ bool enabled;
	+ bool level;
	+ bool on_aplist;
	+ uint8_t priority;
	+ uint8_t config;
	+#define VGIC_CONFIG_MASK 0x2
	+#define VGIC_CONFIG_LEVEL 0x0
	+#define VGIC_CONFIG_EDGE 0x2
	+};
	+
	+/* Global data not needed by EL2 */
	+struct vgic_v3 {
	+ struct mtx dist_mtx;
	+ uint64_t dist_start;
	+ size_t dist_end;
	+
	+ uint64_t redist_start;
	+ size_t redist_end;
	+
	+ uint32_t gicd_ctlr; /* Distributor Control Register */
	+
	+ struct vgic_v3_irq *irqs;
	+};
	+
	+/* Per-CPU data not needed by EL2 */
	+struct vgic_v3_cpu {
	+ /*
	+ * We need a mutex for accessing the list registers because they are
	+ * modified asynchronously by the virtual timer.
	+ *
	+ * Note that the mutex MUST be a spin mutex because an interrupt can
	+ * be injected by a callout callback function, thereby modifying the
	+ * list registers from a context where sleeping is forbidden.
	+ */
	+ struct mtx lr_mtx;
	+
	+ struct vgic_v3_irq private_irqs[VGIC_PRV_I_NUM];
	+ TAILQ_HEAD(, vgic_v3_irq) irq_act_pend;
	+ u_int ich_lr_used;
	+};
	+
	+/* How many IRQs we support (SGIs + PPIs + SPIs). Not including LPIs */
	+#define VGIC_NIRQS 1023
	+/* Pretend to be an Arm design */
	+#define VGIC_IIDR 0x43b
	+
	+static vgic_inject_irq_t vgic_v3_inject_irq;
	+static vgic_inject_msi_t vgic_v3_inject_msi;
	+
	+static int vgic_v3_max_cpu_count(device_t dev, struct hyp *hyp);
	+
	+#define INJECT_IRQ(hyp, vcpuid, irqid, level) \
	+ vgic_v3_inject_irq(NULL, (hyp), (vcpuid), (irqid), (level))
	+
	+typedef void (register_read)(struct hypctx , u_int, uint64_t , void *);
	+typedef void (register_write)(struct hypctx *, u_int, u_int, u_int,
	+ uint64_t, void *);
	+
	+#define VGIC_8_BIT (1 << 0)
	+/* (1 << 1) is reserved for 16 bit accesses */
	+#define VGIC_32_BIT (1 << 2)
	+#define VGIC_64_BIT (1 << 3)
	+
	+struct vgic_register {
	+ u_int start; /* Start within a memory region */
	+ u_int end;
	+ u_int size;
	+ u_int flags;
	+ register_read *read;
	+ register_write *write;
	+};
	+
	+#define VGIC_REGISTER_RANGE(reg_start, reg_end, reg_size, reg_flags, readf, \
	+ writef) \
	+{ \
	+ .start = (reg_start), \
	+ .end = (reg_end), \
	+ .size = (reg_size), \
	+ .flags = (reg_flags), \
	+ .read = (readf), \
	+ .write = (writef), \
	+}
	+
	+#define VGIC_REGISTER_RANGE_RAZ_WI(reg_start, reg_end, reg_size, reg_flags) \
	+ VGIC_REGISTER_RANGE(reg_start, reg_end, reg_size, reg_flags, \
	+ gic_zero_read, gic_ignore_write)
	+
	+#define VGIC_REGISTER(start_addr, reg_size, reg_flags, readf, writef) \
	+ VGIC_REGISTER_RANGE(start_addr, (start_addr) + (reg_size), \
	+ reg_size, reg_flags, readf, writef)
	+
	+#define VGIC_REGISTER_RAZ_WI(start_addr, reg_size, reg_flags) \
	+ VGIC_REGISTER_RANGE_RAZ_WI(start_addr, \
	+ (start_addr) + (reg_size), reg_size, reg_flags)
	+
	+static register_read gic_pidr2_read;
	+static register_read gic_zero_read;
	+static register_write gic_ignore_write;
	+
	+/* GICD_CTLR */
	+static register_read dist_ctlr_read;
	+static register_write dist_ctlr_write;
	+/* GICD_TYPER */
	+static register_read dist_typer_read;
	+/* GICD_IIDR */
	+static register_read dist_iidr_read;
	+/* GICD_STATUSR - RAZ/WI as we don't report errors (yet) */
	+/* GICD_SETSPI_NSR & GICD_CLRSPI_NSR */
	+static register_write dist_setclrspi_nsr_write;
	+/* GICD_SETSPI_SR - RAZ/WI */
	+/* GICD_CLRSPI_SR - RAZ/WI */
	+/* GICD_IGROUPR - RAZ/WI as GICD_CTLR.ARE == 1 */
	+/* GICD_ISENABLER */
	+static register_read dist_isenabler_read;
	+static register_write dist_isenabler_write;
	+/* GICD_ICENABLER */
	+static register_read dist_icenabler_read;
	+static register_write dist_icenabler_write;
	+/* GICD_ISPENDR */
	+static register_read dist_ispendr_read;
	+static register_write dist_ispendr_write;
	+/* GICD_ICPENDR */
	+static register_read dist_icpendr_read;
	+static register_write dist_icpendr_write;
	+/* GICD_ISACTIVER */
	+static register_read dist_isactiver_read;
	+static register_write dist_isactiver_write;
	+/* GICD_ICACTIVER */
	+static register_read dist_icactiver_read;
	+static register_write dist_icactiver_write;
	+/* GICD_IPRIORITYR */
	+static register_read dist_ipriorityr_read;
	+static register_write dist_ipriorityr_write;
	+/* GICD_ITARGETSR - RAZ/WI as GICD_CTLR.ARE == 1 */
	+/* GICD_ICFGR */
	+static register_read dist_icfgr_read;
	+static register_write dist_icfgr_write;
	+/* GICD_IGRPMODR - RAZ/WI from non-secure mode */
	+/* GICD_NSACR - RAZ/WI from non-secure mode */
	+/* GICD_SGIR - RAZ/WI as GICD_CTLR.ARE == 1 */
	+/* GICD_CPENDSGIR - RAZ/WI as GICD_CTLR.ARE == 1 */
	+/* GICD_SPENDSGIR - RAZ/WI as GICD_CTLR.ARE == 1 */
	+/* GICD_IROUTER */
	+static register_read dist_irouter_read;
	+static register_write dist_irouter_write;
	+
	+static struct vgic_register dist_registers[] = {
	+ VGIC_REGISTER(GICD_CTLR, 4, VGIC_32_BIT, dist_ctlr_read,
	+ dist_ctlr_write),
	+ VGIC_REGISTER(GICD_TYPER, 4, VGIC_32_BIT, dist_typer_read,
	+ gic_ignore_write),
	+ VGIC_REGISTER(GICD_IIDR, 4, VGIC_32_BIT, dist_iidr_read,
	+ gic_ignore_write),
	+ VGIC_REGISTER_RAZ_WI(GICD_STATUSR, 4, VGIC_32_BIT),
	+ VGIC_REGISTER(GICD_SETSPI_NSR, 4, VGIC_32_BIT, gic_zero_read,
	+ dist_setclrspi_nsr_write),
	+ VGIC_REGISTER(GICD_CLRSPI_NSR, 4, VGIC_32_BIT, gic_zero_read,
	+ dist_setclrspi_nsr_write),
	+ VGIC_REGISTER_RAZ_WI(GICD_SETSPI_SR, 4, VGIC_32_BIT),
	+ VGIC_REGISTER_RAZ_WI(GICD_CLRSPI_SR, 4, VGIC_32_BIT),
	+ VGIC_REGISTER_RANGE_RAZ_WI(GICD_IGROUPR(0), GICD_IGROUPR(1024), 4,
	+ VGIC_32_BIT),
	+
	+ VGIC_REGISTER_RAZ_WI(GICD_ISENABLER(0), 4, VGIC_32_BIT),
	+ VGIC_REGISTER_RANGE(GICD_ISENABLER(32), GICD_ISENABLER(1024), 4,
	+ VGIC_32_BIT, dist_isenabler_read, dist_isenabler_write),
	+
	+ VGIC_REGISTER_RAZ_WI(GICD_ICENABLER(0), 4, VGIC_32_BIT),
	+ VGIC_REGISTER_RANGE(GICD_ICENABLER(32), GICD_ICENABLER(1024), 4,
	+ VGIC_32_BIT, dist_icenabler_read, dist_icenabler_write),
	+
	+ VGIC_REGISTER_RAZ_WI(GICD_ISPENDR(0), 4, VGIC_32_BIT),
	+ VGIC_REGISTER_RANGE(GICD_ISPENDR(32), GICD_ISPENDR(1024), 4,
	+ VGIC_32_BIT, dist_ispendr_read, dist_ispendr_write),
	+
	+ VGIC_REGISTER_RAZ_WI(GICD_ICPENDR(0), 4, VGIC_32_BIT),
	+ VGIC_REGISTER_RANGE(GICD_ICPENDR(32), GICD_ICPENDR(1024), 4,
	+ VGIC_32_BIT, dist_icpendr_read, dist_icpendr_write),
	+
	+ VGIC_REGISTER_RAZ_WI(GICD_ISACTIVER(0), 4, VGIC_32_BIT),
	+ VGIC_REGISTER_RANGE(GICD_ISACTIVER(32), GICD_ISACTIVER(1024), 4,
	+ VGIC_32_BIT, dist_isactiver_read, dist_isactiver_write),
	+
	+ VGIC_REGISTER_RAZ_WI(GICD_ICACTIVER(0), 4, VGIC_32_BIT),
	+ VGIC_REGISTER_RANGE(GICD_ICACTIVER(32), GICD_ICACTIVER(1024), 4,
	+ VGIC_32_BIT, dist_icactiver_read, dist_icactiver_write),
	+
	+ VGIC_REGISTER_RANGE_RAZ_WI(GICD_IPRIORITYR(0), GICD_IPRIORITYR(32), 4,
	+ VGIC_32_BIT \| VGIC_8_BIT),
	+ VGIC_REGISTER_RANGE(GICD_IPRIORITYR(32), GICD_IPRIORITYR(1024), 4,
	+ VGIC_32_BIT \| VGIC_8_BIT, dist_ipriorityr_read,
	+ dist_ipriorityr_write),
	+
	+ VGIC_REGISTER_RANGE_RAZ_WI(GICD_ITARGETSR(0), GICD_ITARGETSR(1024), 4,
	+ VGIC_32_BIT \| VGIC_8_BIT),
	+
	+ VGIC_REGISTER_RANGE_RAZ_WI(GICD_ICFGR(0), GICD_ICFGR(32), 4,
	+ VGIC_32_BIT),
	+ VGIC_REGISTER_RANGE(GICD_ICFGR(32), GICD_ICFGR(1024), 4,
	+ VGIC_32_BIT, dist_icfgr_read, dist_icfgr_write),
	+/*
	+ VGIC_REGISTER_RANGE(GICD_IGRPMODR(0), GICD_IGRPMODR(1024), 4,
	+ VGIC_32_BIT, dist_igrpmodr_read, dist_igrpmodr_write),
	+ VGIC_REGISTER_RANGE(GICD_NSACR(0), GICD_NSACR(1024), 4,
	+ VGIC_32_BIT, dist_nsacr_read, dist_nsacr_write),
	+*/
	+ VGIC_REGISTER_RAZ_WI(GICD_SGIR, 4, VGIC_32_BIT),
	+/*
	+ VGIC_REGISTER_RANGE(GICD_CPENDSGIR(0), GICD_CPENDSGIR(1024), 4,
	+ VGIC_32_BIT \| VGIC_8_BIT, dist_cpendsgir_read,
	+ dist_cpendsgir_write),
	+ VGIC_REGISTER_RANGE(GICD_SPENDSGIR(0), GICD_SPENDSGIR(1024), 4,
	+ VGIC_32_BIT \| VGIC_8_BIT, dist_spendsgir_read,
	+ dist_spendsgir_write),
	+*/
	+ VGIC_REGISTER_RANGE(GICD_IROUTER(32), GICD_IROUTER(1024), 8,
	+ VGIC_64_BIT \| VGIC_32_BIT, dist_irouter_read, dist_irouter_write),
	+
	+ VGIC_REGISTER_RANGE_RAZ_WI(GICD_PIDR4, GICD_PIDR2, 4, VGIC_32_BIT),
	+ VGIC_REGISTER(GICD_PIDR2, 4, VGIC_32_BIT, gic_pidr2_read,
	+ gic_ignore_write),
	+ VGIC_REGISTER_RANGE_RAZ_WI(GICD_PIDR2 + 4, GICD_SIZE, 4, VGIC_32_BIT),
	+};
	+
	+/* GICR_CTLR - Ignore writes as no bits can be set */
	+static register_read redist_ctlr_read;
	+/* GICR_IIDR */
	+static register_read redist_iidr_read;
	+/* GICR_TYPER */
	+static register_read redist_typer_read;
	+/* GICR_STATUSR - RAZ/WI as we don't report errors (yet) */
	+/* GICR_WAKER - RAZ/WI from non-secure mode */
	+/* GICR_SETLPIR - RAZ/WI as no LPIs are supported */
	+/* GICR_CLRLPIR - RAZ/WI as no LPIs are supported */
	+/* GICR_PROPBASER - RAZ/WI as no LPIs are supported */
	+/* GICR_PENDBASER - RAZ/WI as no LPIs are supported */
	+/* GICR_INVLPIR - RAZ/WI as no LPIs are supported */
	+/* GICR_INVALLR - RAZ/WI as no LPIs are supported */
	+/* GICR_SYNCR - RAZ/WI as no LPIs are supported */
	+
	+static struct vgic_register redist_rd_registers[] = {
	+ VGIC_REGISTER(GICR_CTLR, 4, VGIC_32_BIT, redist_ctlr_read,
	+ gic_ignore_write),
	+ VGIC_REGISTER(GICR_IIDR, 4, VGIC_32_BIT, redist_iidr_read,
	+ gic_ignore_write),
	+ VGIC_REGISTER(GICR_TYPER, 8, VGIC_64_BIT \| VGIC_32_BIT,
	+ redist_typer_read, gic_ignore_write),
	+ VGIC_REGISTER_RAZ_WI(GICR_STATUSR, 4, VGIC_32_BIT),
	+ VGIC_REGISTER_RAZ_WI(GICR_WAKER, 4, VGIC_32_BIT),
	+ VGIC_REGISTER_RAZ_WI(GICR_SETLPIR, 8, VGIC_64_BIT \| VGIC_32_BIT),
	+ VGIC_REGISTER_RAZ_WI(GICR_CLRLPIR, 8, VGIC_64_BIT \| VGIC_32_BIT),
	+ VGIC_REGISTER_RAZ_WI(GICR_PROPBASER, 8, VGIC_64_BIT \| VGIC_32_BIT),
	+ VGIC_REGISTER_RAZ_WI(GICR_PENDBASER, 8, VGIC_64_BIT \| VGIC_32_BIT),
	+ VGIC_REGISTER_RAZ_WI(GICR_INVLPIR, 8, VGIC_64_BIT \| VGIC_32_BIT),
	+ VGIC_REGISTER_RAZ_WI(GICR_INVALLR, 8, VGIC_64_BIT \| VGIC_32_BIT),
	+ VGIC_REGISTER_RAZ_WI(GICR_SYNCR, 4, VGIC_32_BIT),
	+
	+ /* These are identical to the dist registers */
	+ VGIC_REGISTER_RANGE_RAZ_WI(GICD_PIDR4, GICD_PIDR2, 4, VGIC_32_BIT),
	+ VGIC_REGISTER(GICD_PIDR2, 4, VGIC_32_BIT, gic_pidr2_read,
	+ gic_ignore_write),
	+ VGIC_REGISTER_RANGE_RAZ_WI(GICD_PIDR2 + 4, GICD_SIZE, 4,
	+ VGIC_32_BIT),
	+};
	+
	+/* GICR_IGROUPR0 - RAZ/WI from non-secure mode */
	+/* GICR_ISENABLER0 */
	+static register_read redist_ienabler0_read;
	+static register_write redist_isenabler0_write;
	+/* GICR_ICENABLER0 */
	+static register_write redist_icenabler0_write;
	+/* GICR_ISPENDR0 */
	+static register_read redist_ipendr0_read;
	+static register_write redist_ispendr0_write;
	+/* GICR_ICPENDR0 */
	+static register_write redist_icpendr0_write;
	+/* GICR_ISACTIVER0 */
	+static register_read redist_iactiver0_read;
	+static register_write redist_isactiver0_write;
	+/* GICR_ICACTIVER0 */
	+static register_write redist_icactiver0_write;
	+/* GICR_IPRIORITYR */
	+static register_read redist_ipriorityr_read;
	+static register_write redist_ipriorityr_write;
	+/* GICR_ICFGR0 - RAZ/WI from non-secure mode */
	+/* GICR_ICFGR1 */
	+static register_read redist_icfgr1_read;
	+static register_write redist_icfgr1_write;
	+/* GICR_IGRPMODR0 - RAZ/WI from non-secure mode */
	+/* GICR_NSCAR - RAZ/WI from non-secure mode */
	+
	+static struct vgic_register redist_sgi_registers[] = {
	+ VGIC_REGISTER_RAZ_WI(GICR_IGROUPR0, 4, VGIC_32_BIT),
	+ VGIC_REGISTER(GICR_ISENABLER0, 4, VGIC_32_BIT, redist_ienabler0_read,
	+ redist_isenabler0_write),
	+ VGIC_REGISTER(GICR_ICENABLER0, 4, VGIC_32_BIT, redist_ienabler0_read,
	+ redist_icenabler0_write),
	+ VGIC_REGISTER(GICR_ISPENDR0, 4, VGIC_32_BIT, redist_ipendr0_read,
	+ redist_ispendr0_write),
	+ VGIC_REGISTER(GICR_ICPENDR0, 4, VGIC_32_BIT, redist_ipendr0_read,
	+ redist_icpendr0_write),
	+ VGIC_REGISTER(GICR_ISACTIVER0, 4, VGIC_32_BIT, redist_iactiver0_read,
	+ redist_isactiver0_write),
	+ VGIC_REGISTER(GICR_ICACTIVER0, 4, VGIC_32_BIT, redist_iactiver0_read,
	+ redist_icactiver0_write),
	+ VGIC_REGISTER_RANGE(GICR_IPRIORITYR(0), GICR_IPRIORITYR(32), 4,
	+ VGIC_32_BIT \| VGIC_8_BIT, redist_ipriorityr_read,
	+ redist_ipriorityr_write),
	+ VGIC_REGISTER_RAZ_WI(GICR_ICFGR0, 4, VGIC_32_BIT),
	+ VGIC_REGISTER(GICR_ICFGR1, 4, VGIC_32_BIT, redist_icfgr1_read,
	+ redist_icfgr1_write),
	+ VGIC_REGISTER_RAZ_WI(GICR_IGRPMODR0, 4, VGIC_32_BIT),
	+ VGIC_REGISTER_RAZ_WI(GICR_NSACR, 4, VGIC_32_BIT),
	+};
	+
	+static struct vgic_v3_virt_features virt_features;
	+
	+static struct vgic_v3_irq vgic_v3_get_irq(struct hyp , int, uint32_t);
	+static void vgic_v3_release_irq(struct vgic_v3_irq *);
	+
	+/* TODO: Move to a common file */
	+static int
	+mpidr_to_vcpu(struct hyp *hyp, uint64_t mpidr)
	+{
	+ struct vm *vm;
	+ struct hypctx *hypctx;
	+
	+ vm = hyp->vm;
	+ for (int i = 0; i < vm_get_maxcpus(vm); i++) {
	+ hypctx = hyp->ctx[i];
	+ if (hypctx != NULL && (hypctx->vmpidr_el2 & GICD_AFF) == mpidr)
	+ return (i);
	+ }
	+ return (-1);
	+}
	+
	+static void
	+vgic_v3_vminit(device_t dev, struct hyp *hyp)
	+{
	+ struct vgic_v3 *vgic;
	+
	+ hyp->vgic = malloc(sizeof(*hyp->vgic), M_VGIC_V3,
	+ M_WAITOK \| M_ZERO);
	+ vgic = hyp->vgic;
	+
	+ /*
	+ * Configure the Distributor control register. The register resets to an
	+ * architecturally UNKNOWN value, so we reset to 0 to disable all
	+ * functionality controlled by the register.
	+ *
	+ * The exception is GICD_CTLR.DS, which is RA0/WI when the Distributor
	+ * supports one security state (ARM GIC Architecture Specification for
	+ * GICv3 and GICv4, p. 4-464)
	+ */
	+ vgic->gicd_ctlr = 0;
	+
	+ mtx_init(&vgic->dist_mtx, "VGICv3 Distributor lock", NULL,
	+ MTX_SPIN);
	+}
	+
	+static void
	+vgic_v3_cpuinit(device_t dev, struct hypctx *hypctx)
	+{
	+ struct vgic_v3_cpu *vgic_cpu;
	+ struct vgic_v3_irq *irq;
	+ int i, irqid;
	+
	+ hypctx->vgic_cpu = malloc(sizeof(*hypctx->vgic_cpu),
	+ M_VGIC_V3, M_WAITOK \| M_ZERO);
	+ vgic_cpu = hypctx->vgic_cpu;
	+
	+ mtx_init(&vgic_cpu->lr_mtx, "VGICv3 ICH_LR_EL2 lock", NULL, MTX_SPIN);
	+
	+ /* Set the SGI and PPI state */
	+ for (irqid = 0; irqid < VGIC_PRV_I_NUM; irqid++) {
	+ irq = &vgic_cpu->private_irqs[irqid];
	+
	+ mtx_init(&irq->irq_spinmtx, "VGIC IRQ spinlock", NULL,
	+ MTX_SPIN);
	+ irq->irq = irqid;
	+ irq->mpidr = hypctx->vmpidr_el2 & GICD_AFF;
	+ irq->target_vcpu = vcpu_vcpuid(hypctx->vcpu);
	+ MPASS(irq->target_vcpu >= 0);
	+
	+ if (irqid < VGIC_SGI_NUM) {
	+ /* SGIs */
	+ irq->enabled = true;
	+ irq->config = VGIC_CONFIG_EDGE;
	+ } else {
	+ /* PPIs */
	+ irq->config = VGIC_CONFIG_LEVEL;
	+ }
	+ irq->priority = 0;
	+ }
	+
	+ /*
	+ * Configure the Interrupt Controller Hyp Control Register.
	+ *
	+ * ICH_HCR_EL2_En: enable virtual CPU interface.
	+ *
	+ * Maintenance interrupts are disabled.
	+ */
	+ hypctx->vgic_v3_regs.ich_hcr_el2 = ICH_HCR_EL2_En;
	+
	+ /*
	+ * Configure the Interrupt Controller Virtual Machine Control Register.
	+ *
	+ * ICH_VMCR_EL2_VPMR: lowest priority mask for the VCPU interface
	+ * ICH_VMCR_EL2_VBPR1_NO_PREEMPTION: disable interrupt preemption for
	+ * Group 1 interrupts
	+ * ICH_VMCR_EL2_VBPR0_NO_PREEMPTION: disable interrupt preemption for
	+ * Group 0 interrupts
	+ * ~ICH_VMCR_EL2_VEOIM: writes to EOI registers perform priority drop
	+ * and interrupt deactivation.
	+ * ICH_VMCR_EL2_VENG0: virtual Group 0 interrupts enabled.
	+ * ICH_VMCR_EL2_VENG1: virtual Group 1 interrupts enabled.
	+ */
	+ hypctx->vgic_v3_regs.ich_vmcr_el2 =
	+ (virt_features.min_prio << ICH_VMCR_EL2_VPMR_SHIFT) \|
	+ ICH_VMCR_EL2_VBPR1_NO_PREEMPTION \| ICH_VMCR_EL2_VBPR0_NO_PREEMPTION;
	+ hypctx->vgic_v3_regs.ich_vmcr_el2 &= ~ICH_VMCR_EL2_VEOIM;
	+ hypctx->vgic_v3_regs.ich_vmcr_el2 \|= ICH_VMCR_EL2_VENG0 \|
	+ ICH_VMCR_EL2_VENG1;
	+
	+ hypctx->vgic_v3_regs.ich_lr_num = virt_features.ich_lr_num;
	+ for (i = 0; i < hypctx->vgic_v3_regs.ich_lr_num; i++)
	+ hypctx->vgic_v3_regs.ich_lr_el2[i] = 0UL;
	+ vgic_cpu->ich_lr_used = 0;
	+ TAILQ_INIT(&vgic_cpu->irq_act_pend);
	+
	+ hypctx->vgic_v3_regs.ich_apr_num = virt_features.ich_apr_num;
	+}
	+
	+static void
	+vgic_v3_cpucleanup(device_t dev, struct hypctx *hypctx)
	+{
	+ struct vgic_v3_cpu *vgic_cpu;
	+ struct vgic_v3_irq *irq;
	+ int irqid;
	+
	+ vgic_cpu = hypctx->vgic_cpu;
	+ for (irqid = 0; irqid < VGIC_PRV_I_NUM; irqid++) {
	+ irq = &vgic_cpu->private_irqs[irqid];
	+ mtx_destroy(&irq->irq_spinmtx);
	+ }
	+
	+ mtx_destroy(&vgic_cpu->lr_mtx);
	+ free(hypctx->vgic_cpu, M_VGIC_V3);
	+}
	+
	+static void
	+vgic_v3_vmcleanup(device_t dev, struct hyp *hyp)
	+{
	+ mtx_destroy(&hyp->vgic->dist_mtx);
	+ free(hyp->vgic, M_VGIC_V3);
	+}
	+
	+static int
	+vgic_v3_max_cpu_count(device_t dev, struct hyp *hyp)
	+{
	+ struct vgic_v3 *vgic;
	+ size_t count;
	+ int16_t max_count;
	+
	+ vgic = hyp->vgic;
	+ max_count = vm_get_maxcpus(hyp->vm);
	+
	+ /* No registers, assume the maximum CPUs */
	+ if (vgic->redist_start == 0 && vgic->redist_end == 0)
	+ return (max_count);
	+
	+ count = (vgic->redist_end - vgic->redist_start) /
	+ (GICR_RD_BASE_SIZE + GICR_SGI_BASE_SIZE);
	+
	+ /*
	+ * max_count is smaller than INT_MAX so will also limit count
	+ * to a positive integer value.
	+ */
	+ if (count > max_count)
	+ return (max_count);
	+
	+ return (count);
	+}
	+
	+static bool
	+vgic_v3_irq_pending(struct vgic_v3_irq *irq)
	+{
	+ if ((irq->config & VGIC_CONFIG_MASK) == VGIC_CONFIG_LEVEL) {
	+ return (irq->pending \|\| irq->level);
	+ } else {
	+ return (irq->pending);
	+ }
	+}
	+
	+static bool
	+vgic_v3_queue_irq(struct hyp hyp, struct vgic_v3_cpu vgic_cpu,
	+ int vcpuid, struct vgic_v3_irq *irq)
	+{
	+ MPASS(vcpuid >= 0);
	+ MPASS(vcpuid < vm_get_maxcpus(hyp->vm));
	+
	+ mtx_assert(&vgic_cpu->lr_mtx, MA_OWNED);
	+ mtx_assert(&irq->irq_spinmtx, MA_OWNED);
	+
	+ /* No need to queue the IRQ */
	+ if (!irq->level && !irq->pending)
	+ return (false);
	+
	+ if (!irq->on_aplist) {
	+ irq->on_aplist = true;
	+ TAILQ_INSERT_TAIL(&vgic_cpu->irq_act_pend, irq, act_pend_list);
	+ }
	+ return (true);
	+}
	+
	+static uint64_t
	+gic_reg_value_64(uint64_t field, uint64_t val, u_int offset, u_int size)
	+{
	+ uint32_t mask;
	+
	+ if (offset != 0 \|\| size != 8) {
	+ mask = ((1ul << (size * 8)) - 1) << (offset * 8);
	+ /* Shift the new bits to the correct place */
	+ val <<= (offset * 8);
	+ /* Keep only the interesting bits */
	+ val &= mask;
	+ /* Add the bits we are keeping from the old value */
	+ val \|= field & ~mask;
	+ }
	+
	+ return (val);
	+}
	+
	+static void
	+gic_pidr2_read(struct hypctx hypctx, u_int reg, uint64_t rval,
	+ void *arg)
	+{
	+ *rval = GICR_PIDR2_ARCH_GICv3 << GICR_PIDR2_ARCH_SHIFT;
	+}
	+
	+/* Common read-only/write-ignored helpers */
	+static void
	+gic_zero_read(struct hypctx hypctx, u_int reg, uint64_t rval,
	+ void *arg)
	+{
	+ *rval = 0;
	+}
	+
	+static void
	+gic_ignore_write(struct hypctx *hypctx, u_int reg, u_int offset, u_int size,
	+ uint64_t wval, void *arg)
	+{
	+ /* Nothing to do */
	+}
	+
	+static uint64_t
	+read_enabler(struct hypctx *hypctx, int n)
	+{
	+ struct vgic_v3_irq *irq;
	+ uint64_t ret;
	+ uint32_t irq_base;
	+ int i;
	+
	+ ret = 0;
	+ irq_base = n * 32;
	+ for (i = 0; i < 32; i++) {
	+ irq = vgic_v3_get_irq(hypctx->hyp, vcpu_vcpuid(hypctx->vcpu),
	+ irq_base + i);
	+ if (irq == NULL)
	+ continue;
	+
	+ if (!irq->enabled)
	+ ret \|= 1u << i;
	+ vgic_v3_release_irq(irq);
	+ }
	+
	+ return (ret);
	+}
	+
	+static void
	+write_enabler(struct hypctx *hypctx,int n, bool set, uint64_t val)
	+{
	+ struct vgic_v3_irq *irq;
	+ uint32_t irq_base;
	+ int i;
	+
	+ irq_base = n * 32;
	+ for (i = 0; i < 32; i++) {
	+ /* We only change interrupts when the appropriate bit is set */
	+ if ((val & (1u << i)) == 0)
	+ continue;
	+
	+ /* Find the interrupt this bit represents */
	+ irq = vgic_v3_get_irq(hypctx->hyp, vcpu_vcpuid(hypctx->vcpu),
	+ irq_base + i);
	+ if (irq == NULL)
	+ continue;
	+
	+ irq->enabled = set;
	+ vgic_v3_release_irq(irq);
	+ }
	+}
	+
	+static uint64_t
	+read_pendr(struct hypctx *hypctx, int n)
	+{
	+ struct vgic_v3_irq *irq;
	+ uint64_t ret;
	+ uint32_t irq_base;
	+ int i;
	+
	+ ret = 0;
	+ irq_base = n * 32;
	+ for (i = 0; i < 32; i++) {
	+ irq = vgic_v3_get_irq(hypctx->hyp, vcpu_vcpuid(hypctx->vcpu),
	+ irq_base + i);
	+ if (irq == NULL)
	+ continue;
	+
	+ if (vgic_v3_irq_pending(irq))
	+ ret \|= 1u << i;
	+ vgic_v3_release_irq(irq);
	+ }
	+
	+ return (ret);
	+}
	+
	+static uint64_t
	+write_pendr(struct hypctx *hypctx, int n, bool set, uint64_t val)
	+{
	+ struct vgic_v3_cpu *vgic_cpu;
	+ struct vgic_v3_irq *irq;
	+ struct hyp *hyp;
	+ struct hypctx *target_hypctx;
	+ uint64_t ret;
	+ uint32_t irq_base;
	+ int target_vcpu, i;
	+ bool notify;
	+
	+ hyp = hypctx->hyp;
	+ ret = 0;
	+ irq_base = n * 32;
	+ for (i = 0; i < 32; i++) {
	+ /* We only change interrupts when the appropriate bit is set */
	+ if ((val & (1u << i)) == 0)
	+ continue;
	+
	+ irq = vgic_v3_get_irq(hypctx->hyp, vcpu_vcpuid(hypctx->vcpu),
	+ irq_base + i);
	+ if (irq == NULL)
	+ continue;
	+
	+ notify = false;
	+ target_vcpu = irq->target_vcpu;
	+ if (target_vcpu < 0)
	+ goto next_irq;
	+ target_hypctx = hyp->ctx[target_vcpu];
	+ if (target_hypctx == NULL)
	+ goto next_irq;
	+ vgic_cpu = target_hypctx->vgic_cpu;
	+
	+ if (!set) {
	+ /* pending -> not pending */
	+ irq->pending = false;
	+ } else {
	+ irq->pending = true;
	+ mtx_lock_spin(&vgic_cpu->lr_mtx);
	+ notify = vgic_v3_queue_irq(hyp, vgic_cpu, target_vcpu,
	+ irq);
	+ mtx_unlock_spin(&vgic_cpu->lr_mtx);
	+ }
	+next_irq:
	+ vgic_v3_release_irq(irq);
	+
	+ if (notify)
	+ vcpu_notify_event(vm_vcpu(hyp->vm, target_vcpu));
	+ }
	+
	+ return (ret);
	+}
	+
	+static uint64_t
	+read_activer(struct hypctx *hypctx, int n)
	+{
	+ struct vgic_v3_irq *irq;
	+ uint64_t ret;
	+ uint32_t irq_base;
	+ int i;
	+
	+ ret = 0;
	+ irq_base = n * 32;
	+ for (i = 0; i < 32; i++) {
	+ irq = vgic_v3_get_irq(hypctx->hyp, vcpu_vcpuid(hypctx->vcpu),
	+ irq_base + i);
	+ if (irq == NULL)
	+ continue;
	+
	+ if (irq->active)
	+ ret \|= 1u << i;
	+ vgic_v3_release_irq(irq);
	+ }
	+
	+ return (ret);
	+}
	+
	+static void
	+write_activer(struct hypctx *hypctx, u_int n, bool set, uint64_t val)
	+{
	+ struct vgic_v3_cpu *vgic_cpu;
	+ struct vgic_v3_irq *irq;
	+ struct hyp *hyp;
	+ struct hypctx *target_hypctx;
	+ uint32_t irq_base;
	+ int target_vcpu, i;
	+ bool notify;
	+
	+ hyp = hypctx->hyp;
	+ irq_base = n * 32;
	+ for (i = 0; i < 32; i++) {
	+ /* We only change interrupts when the appropriate bit is set */
	+ if ((val & (1u << i)) == 0)
	+ continue;
	+
	+ irq = vgic_v3_get_irq(hypctx->hyp, vcpu_vcpuid(hypctx->vcpu),
	+ irq_base + i);
	+ if (irq == NULL)
	+ continue;
	+
	+ notify = false;
	+ target_vcpu = irq->target_vcpu;
	+ if (target_vcpu < 0)
	+ goto next_irq;
	+ target_hypctx = hyp->ctx[target_vcpu];
	+ if (target_hypctx == NULL)
	+ goto next_irq;
	+ vgic_cpu = target_hypctx->vgic_cpu;
	+
	+ if (!set) {
	+ /* active -> not active */
	+ irq->active = false;
	+ } else {
	+ /* not active -> active */
	+ irq->active = true;
	+ mtx_lock_spin(&vgic_cpu->lr_mtx);
	+ notify = vgic_v3_queue_irq(hyp, vgic_cpu, target_vcpu,
	+ irq);
	+ mtx_unlock_spin(&vgic_cpu->lr_mtx);
	+ }
	+next_irq:
	+ vgic_v3_release_irq(irq);
	+
	+ if (notify)
	+ vcpu_notify_event(vm_vcpu(hyp->vm, target_vcpu));
	+ }
	+}
	+
	+static uint64_t
	+read_priorityr(struct hypctx *hypctx, int n)
	+{
	+ struct vgic_v3_irq *irq;
	+ uint64_t ret;
	+ uint32_t irq_base;
	+ int i;
	+
	+ ret = 0;
	+ irq_base = n * 4;
	+ for (i = 0; i < 4; i++) {
	+ irq = vgic_v3_get_irq(hypctx->hyp, vcpu_vcpuid(hypctx->vcpu),
	+ irq_base + i);
	+ if (irq == NULL)
	+ continue;
	+
	+ ret \|= ((uint64_t)irq->priority) << (i * 8);
	+ vgic_v3_release_irq(irq);
	+ }
	+
	+ return (ret);
	+}
	+
	+static void
	+write_priorityr(struct hypctx *hypctx, u_int irq_base, u_int size, uint64_t val)
	+{
	+ struct vgic_v3_irq *irq;
	+ int i;
	+
	+ for (i = 0; i < size; i++) {
	+ irq = vgic_v3_get_irq(hypctx->hyp, vcpu_vcpuid(hypctx->vcpu),
	+ irq_base + i);
	+ if (irq == NULL)
	+ continue;
	+
	+ /* Set the priority. We support 32 priority steps (5 bits) */
	+ irq->priority = (val >> (i * 8)) & 0xf8;
	+ vgic_v3_release_irq(irq);
	+ }
	+}
	+
	+static uint64_t
	+read_config(struct hypctx *hypctx, int n)
	+{
	+ struct vgic_v3_irq *irq;
	+ uint64_t ret;
	+ uint32_t irq_base;
	+ int i;
	+
	+ ret = 0;
	+ irq_base = n * 16;
	+ for (i = 0; i < 16; i++) {
	+ irq = vgic_v3_get_irq(hypctx->hyp, vcpu_vcpuid(hypctx->vcpu),
	+ irq_base + i);
	+ if (irq == NULL)
	+ continue;
	+
	+ ret \|= ((uint64_t)irq->config) << (i * 2);
	+ vgic_v3_release_irq(irq);
	+ }
	+
	+ return (ret);
	+}
	+
	+static void
	+write_config(struct hypctx *hypctx, int n, uint64_t val)
	+{
	+ struct vgic_v3_irq *irq;
	+ uint32_t irq_base;
	+ int i;
	+
	+ irq_base = n * 16;
	+ for (i = 0; i < 16; i++) {
	+ /*
	+ * The config can't be changed for SGIs and PPIs. SGIs have
	+ * an edge-triggered behaviour, and the register is
	+ * implementation defined to be read-only for PPIs.
	+ */
	+ if (irq_base + i < VGIC_PRV_I_NUM)
	+ continue;
	+
	+ irq = vgic_v3_get_irq(hypctx->hyp, vcpu_vcpuid(hypctx->vcpu),
	+ irq_base + i);
	+ if (irq == NULL)
	+ continue;
	+
	+ /* Bit 0 is RES0 */
	+ irq->config = (val >> (i * 2)) & VGIC_CONFIG_MASK;
	+ vgic_v3_release_irq(irq);
	+ }
	+}
	+
	+static uint64_t
	+read_route(struct hypctx *hypctx, int n)
	+{
	+ struct vgic_v3_irq *irq;
	+ uint64_t mpidr;
	+
	+ irq = vgic_v3_get_irq(hypctx->hyp, vcpu_vcpuid(hypctx->vcpu), n);
	+ if (irq == NULL)
	+ return (0);
	+
	+ mpidr = irq->mpidr;
	+ vgic_v3_release_irq(irq);
	+
	+ return (mpidr);
	+}
	+
	+static void
	+write_route(struct hypctx *hypctx, int n, uint64_t val, u_int offset,
	+ u_int size)
	+{
	+ struct vgic_v3_irq *irq;
	+
	+ irq = vgic_v3_get_irq(hypctx->hyp, vcpu_vcpuid(hypctx->vcpu), n);
	+ if (irq == NULL)
	+ return;
	+
	+ irq->mpidr = gic_reg_value_64(irq->mpidr, val, offset, size) & GICD_AFF;
	+ irq->target_vcpu = mpidr_to_vcpu(hypctx->hyp, irq->mpidr);
	+ /*
	+ * If the interrupt is pending we can either use the old mpidr, or
	+ * the new mpidr. To simplify this code we use the old value so we
	+ * don't need to move the interrupt until the next time it is
	+ * moved to the pending state.
	+ */
	+ vgic_v3_release_irq(irq);
	+}
	+
	+/*
	+ * Distributor register handlers.
	+ */
	+/* GICD_CTLR */
	+static void
	+dist_ctlr_read(struct hypctx hypctx, u_int reg, uint64_t rval,
	+ void *arg)
	+{
	+ struct hyp *hyp;
	+ struct vgic_v3 *vgic;
	+
	+ hyp = hypctx->hyp;
	+ vgic = hyp->vgic;
	+
	+ mtx_lock_spin(&vgic->dist_mtx);
	+ *rval = vgic->gicd_ctlr;
	+ mtx_unlock_spin(&vgic->dist_mtx);
	+
	+ /* Writes are never pending */
	+ *rval &= ~GICD_CTLR_RWP;
	+}
	+
	+static void
	+dist_ctlr_write(struct hypctx *hypctx, u_int reg, u_int offset, u_int size,
	+ uint64_t wval, void *arg)
	+{
	+ struct vgic_v3 *vgic;
	+
	+ MPASS(offset == 0);
	+ MPASS(size == 4);
	+ vgic = hypctx->hyp->vgic;
	+
	+ /*
	+ * GICv2 backwards compatibility is not implemented so
	+ * ARE_NS is RAO/WI. This means EnableGrp1 is RES0.
	+ *
	+ * EnableGrp1A is supported, and RWP is read-only.
	+ *
	+ * All other bits are RES0 from non-secure mode as we
	+ * implement as if we are in a system with two security
	+ * states.
	+ */
	+ wval &= GICD_CTLR_G1A;
	+ wval \|= GICD_CTLR_ARE_NS;
	+ mtx_lock_spin(&vgic->dist_mtx);
	+ vgic->gicd_ctlr = wval;
	+ /* TODO: Wake any vcpus that have interrupts pending */
	+ mtx_unlock_spin(&vgic->dist_mtx);
	+}
	+
	+/* GICD_TYPER */
	+static void
	+dist_typer_read(struct hypctx hypctx, u_int reg, uint64_t rval,
	+ void *arg)
	+{
	+ uint32_t typer;
	+
	+ typer = (10 - 1) << GICD_TYPER_IDBITS_SHIFT;
	+ typer \|= GICD_TYPER_MBIS;
	+ /* ITLinesNumber: */
	+ typer \|= howmany(VGIC_NIRQS + 1, 32) - 1;
	+
	+ *rval = typer;
	+}
	+
	+/* GICD_IIDR */
	+static void
	+dist_iidr_read(struct hypctx hypctx, u_int reg, uint64_t rval, void *arg)
	+{
	+ *rval = VGIC_IIDR;
	+}
	+
	+/* GICD_SETSPI_NSR & GICD_CLRSPI_NSR */
	+static void
	+dist_setclrspi_nsr_write(struct hypctx *hypctx, u_int reg, u_int offset,
	+ u_int size, uint64_t wval, void *arg)
	+{
	+ uint32_t irqid;
	+
	+ MPASS(offset == 0);
	+ MPASS(size == 4);
	+ irqid = wval & GICD_SPI_INTID_MASK;
	+ INJECT_IRQ(hypctx->hyp, vcpu_vcpuid(hypctx->vcpu), irqid,
	+ reg == GICD_SETSPI_NSR);
	+}
	+
	+/* GICD_ISENABLER */
	+static void
	+dist_isenabler_read(struct hypctx hypctx, u_int reg, uint64_t rval, void *arg)
	+{
	+ int n;
	+
	+ n = (reg - GICD_ISENABLER(0)) / 4;
	+ /* GICD_ISENABLER0 is RAZ/WI so handled separately */
	+ MPASS(n > 0);
	+ *rval = read_enabler(hypctx, n);
	+}
	+
	+static void
	+dist_isenabler_write(struct hypctx *hypctx, u_int reg, u_int offset, u_int size,
	+ uint64_t wval, void *arg)
	+{
	+ int n;
	+
	+ MPASS(offset == 0);
	+ MPASS(size == 4);
	+ n = (reg - GICD_ISENABLER(0)) / 4;
	+ /* GICD_ISENABLER0 is RAZ/WI so handled separately */
	+ MPASS(n > 0);
	+ write_enabler(hypctx, n, true, wval);
	+}
	+
	+/* GICD_ICENABLER */
	+static void
	+dist_icenabler_read(struct hypctx hypctx, u_int reg, uint64_t rval, void *arg)
	+{
	+ int n;
	+
	+ n = (reg - GICD_ICENABLER(0)) / 4;
	+ /* GICD_ICENABLER0 is RAZ/WI so handled separately */
	+ MPASS(n > 0);
	+ *rval = read_enabler(hypctx, n);
	+}
	+
	+static void
	+dist_icenabler_write(struct hypctx *hypctx, u_int reg, u_int offset, u_int size,
	+ uint64_t wval, void *arg)
	+{
	+ int n;
	+
	+ MPASS(offset == 0);
	+ MPASS(size == 4);
	+ n = (reg - GICD_ISENABLER(0)) / 4;
	+ /* GICD_ICENABLER0 is RAZ/WI so handled separately */
	+ MPASS(n > 0);
	+ write_enabler(hypctx, n, false, wval);
	+}
	+
	+/* GICD_ISPENDR */
	+static void
	+dist_ispendr_read(struct hypctx hypctx, u_int reg, uint64_t rval, void *arg)
	+{
	+ int n;
	+
	+ n = (reg - GICD_ISPENDR(0)) / 4;
	+ /* GICD_ISPENDR0 is RAZ/WI so handled separately */
	+ MPASS(n > 0);
	+ *rval = read_pendr(hypctx, n);
	+}
	+
	+static void
	+dist_ispendr_write(struct hypctx *hypctx, u_int reg, u_int offset, u_int size,
	+ uint64_t wval, void *arg)
	+{
	+ int n;
	+
	+ MPASS(offset == 0);
	+ MPASS(size == 4);
	+ n = (reg - GICD_ISPENDR(0)) / 4;
	+ /* GICD_ISPENDR0 is RAZ/WI so handled separately */
	+ MPASS(n > 0);
	+ write_pendr(hypctx, n, true, wval);
	+}
	+
	+/* GICD_ICPENDR */
	+static void
	+dist_icpendr_read(struct hypctx hypctx, u_int reg, uint64_t rval, void *arg)
	+{
	+ int n;
	+
	+ n = (reg - GICD_ICPENDR(0)) / 4;
	+ /* GICD_ICPENDR0 is RAZ/WI so handled separately */
	+ MPASS(n > 0);
	+ *rval = read_pendr(hypctx, n);
	+}
	+
	+static void
	+dist_icpendr_write(struct hypctx *hypctx, u_int reg, u_int offset, u_int size,
	+ uint64_t wval, void *arg)
	+{
	+ int n;
	+
	+ MPASS(offset == 0);
	+ MPASS(size == 4);
	+ n = (reg - GICD_ICPENDR(0)) / 4;
	+ /* GICD_ICPENDR0 is RAZ/WI so handled separately */
	+ MPASS(n > 0);
	+ write_pendr(hypctx, n, false, wval);
	+}
	+
	+/* GICD_ISACTIVER */
	+/* Affinity routing is enabled so isactiver0 is RAZ/WI */
	+static void
	+dist_isactiver_read(struct hypctx hypctx, u_int reg, uint64_t rval, void *arg)
	+{
	+ int n;
	+
	+ n = (reg - GICD_ISACTIVER(0)) / 4;
	+ /* GICD_ISACTIVER0 is RAZ/WI so handled separately */
	+ MPASS(n > 0);
	+ *rval = read_activer(hypctx, n);
	+}
	+
	+static void
	+dist_isactiver_write(struct hypctx *hypctx, u_int reg, u_int offset, u_int size,
	+ uint64_t wval, void *arg)
	+{
	+ int n;
	+
	+ MPASS(offset == 0);
	+ MPASS(size == 4);
	+ n = (reg - GICD_ISACTIVER(0)) / 4;
	+ /* GICD_ISACTIVE0 is RAZ/WI so handled separately */
	+ MPASS(n > 0);
	+ write_activer(hypctx, n, true, wval);
	+}
	+
	+/* GICD_ICACTIVER */
	+static void
	+dist_icactiver_read(struct hypctx hypctx, u_int reg, uint64_t rval,
	+ void *arg)
	+{
	+ int n;
	+
	+ n = (reg - GICD_ICACTIVER(0)) / 4;
	+ /* GICD_ICACTIVE0 is RAZ/WI so handled separately */
	+ MPASS(n > 0);
	+ *rval = read_activer(hypctx, n);
	+}
	+
	+static void
	+dist_icactiver_write(struct hypctx *hypctx, u_int reg, u_int offset, u_int size,
	+ uint64_t wval, void *arg)
	+{
	+ int n;
	+
	+ MPASS(offset == 0);
	+ MPASS(size == 4);
	+ n = (reg - GICD_ICACTIVER(0)) / 4;
	+ /* GICD_ICACTIVE0 is RAZ/WI so handled separately */
	+ MPASS(n > 0);
	+ write_activer(hypctx, n, false, wval);
	+}
	+
	+/* GICD_IPRIORITYR */
	+/* Affinity routing is enabled so ipriorityr0-7 is RAZ/WI */
	+static void
	+dist_ipriorityr_read(struct hypctx hypctx, u_int reg, uint64_t rval,
	+ void *arg)
	+{
	+ int n;
	+
	+ n = (reg - GICD_IPRIORITYR(0)) / 4;
	+ /* GICD_IPRIORITY0-7 is RAZ/WI so handled separately */
	+ MPASS(n > 7);
	+ *rval = read_priorityr(hypctx, n);
	+}
	+
	+static void
	+dist_ipriorityr_write(struct hypctx *hypctx, u_int reg, u_int offset,
	+ u_int size, uint64_t wval, void *arg)
	+{
	+ u_int irq_base;
	+
	+ irq_base = (reg - GICD_IPRIORITYR(0)) + offset;
	+ /* GICD_IPRIORITY0-7 is RAZ/WI so handled separately */
	+ MPASS(irq_base > 31);
	+ write_priorityr(hypctx, irq_base, size, wval);
	+}
	+
	+/* GICD_ICFGR */
	+static void
	+dist_icfgr_read(struct hypctx hypctx, u_int reg, uint64_t rval, void *arg)
	+{
	+ int n;
	+
	+ n = (reg - GICD_ICFGR(0)) / 4;
	+ /* GICD_ICFGR0-1 are RAZ/WI so handled separately */
	+ MPASS(n > 1);
	+ *rval = read_config(hypctx, n);
	+}
	+
	+static void
	+dist_icfgr_write(struct hypctx *hypctx, u_int reg, u_int offset, u_int size,
	+ uint64_t wval, void *arg)
	+{
	+ int n;
	+
	+ MPASS(offset == 0);
	+ MPASS(size == 4);
	+ n = (reg - GICD_ICFGR(0)) / 4;
	+ /* GICD_ICFGR0-1 are RAZ/WI so handled separately */
	+ MPASS(n > 1);
	+ write_config(hypctx, n, wval);
	+}
	+
	+/* GICD_IROUTER */
	+static void
	+dist_irouter_read(struct hypctx hypctx, u_int reg, uint64_t rval, void *arg)
	+{
	+ int n;
	+
	+ n = (reg - GICD_IROUTER(0)) / 8;
	+ /* GICD_IROUTER0-31 don't exist */
	+ MPASS(n > 31);
	+ *rval = read_route(hypctx, n);
	+}
	+
	+static void
	+dist_irouter_write(struct hypctx *hypctx, u_int reg, u_int offset, u_int size,
	+ uint64_t wval, void *arg)
	+{
	+ int n;
	+
	+ n = (reg - GICD_IROUTER(0)) / 8;
	+ /* GICD_IROUTER0-31 don't exist */
	+ MPASS(n > 31);
	+ write_route(hypctx, n, wval, offset, size);
	+}
	+
	+static bool
	+vgic_register_read(struct hypctx hypctx, struct vgic_register reg_list,
	+ u_int reg_list_size, u_int reg, u_int size, uint64_t rval, void arg)
	+{
	+ u_int i, offset;
	+
	+ for (i = 0; i < reg_list_size; i++) {
	+ if (reg_list[i].start <= reg && reg_list[i].end >= reg + size) {
	+ offset = reg & (reg_list[i].size - 1);
	+ reg -= offset;
	+ if ((reg_list[i].flags & size) != 0) {
	+ reg_list[i].read(hypctx, reg, rval, NULL);
	+
	+ /* Move the bits into the correct place */
	+ rval >>= (offset 8);
	+ if (size < 8) {
	+ rval &= (1ul << (size 8)) - 1;
	+ }
	+ } else {
	+ /*
	+ * The access is an invalid size. Section
	+ * 12.1.3 "GIC memory-mapped register access"
	+ * of the GICv3 and GICv4 spec issue H
	+ * (IHI0069) lists the options. For a read
	+ * the controller returns unknown data, in
	+ * this case it is zero.
	+ */
	+ *rval = 0;
	+ }
	+ return (true);
	+ }
	+ }
	+ return (false);
	+}
	+
	+static bool
	+vgic_register_write(struct hypctx hypctx, struct vgic_register reg_list,
	+ u_int reg_list_size, u_int reg, u_int size, uint64_t wval, void *arg)
	+{
	+ u_int i, offset;
	+
	+ for (i = 0; i < reg_list_size; i++) {
	+ if (reg_list[i].start <= reg && reg_list[i].end >= reg + size) {
	+ offset = reg & (reg_list[i].size - 1);
	+ reg -= offset;
	+ if ((reg_list[i].flags & size) != 0) {
	+ reg_list[i].write(hypctx, reg, offset,
	+ size, wval, NULL);
	+ } else {
	+ /*
	+ * See the comment in vgic_register_read.
	+ * For writes the controller ignores the
	+ * operation.
	+ */
	+ }
	+ return (true);
	+ }
	+ }
	+ return (false);
	+}
	+
	+static int
	+dist_read(struct vcpu vcpu, uint64_t fault_ipa, uint64_t rval,
	+ int size, void *arg)
	+{
	+ struct hyp *hyp;
	+ struct hypctx *hypctx;
	+ struct vgic_v3 *vgic;
	+ uint64_t reg;
	+
	+ hypctx = vcpu_get_cookie(vcpu);
	+ hyp = hypctx->hyp;
	+ vgic = hyp->vgic;
	+
	+ /* Check the register is one of ours and is the correct size */
	+ if (fault_ipa < vgic->dist_start \|\| fault_ipa + size > vgic->dist_end) {
	+ return (EINVAL);
	+ }
	+
	+ reg = fault_ipa - vgic->dist_start;
	+ /*
	+ * As described in vgic_register_read an access with an invalid
	+ * alignment is read with an unknown value
	+ */
	+ if ((reg & (size - 1)) != 0) {
	+ *rval = 0;
	+ return (0);
	+ }
	+
	+ if (vgic_register_read(hypctx, dist_registers, nitems(dist_registers),
	+ reg, size, rval, NULL))
	+ return (0);
	+
	+ /* Reserved register addresses are RES0 so we can hardware it to 0 */
	+ *rval = 0;
	+
	+ return (0);
	+}
	+
	+static int
	+dist_write(struct vcpu *vcpu, uint64_t fault_ipa, uint64_t wval,
	+ int size, void *arg)
	+{
	+ struct hyp *hyp;
	+ struct hypctx *hypctx;
	+ struct vgic_v3 *vgic;
	+ uint64_t reg;
	+
	+ hypctx = vcpu_get_cookie(vcpu);
	+ hyp = hypctx->hyp;
	+ vgic = hyp->vgic;
	+
	+ /* Check the register is one of ours and is the correct size */
	+ if (fault_ipa < vgic->dist_start \|\| fault_ipa + size > vgic->dist_end) {
	+ return (EINVAL);
	+ }
	+
	+ reg = fault_ipa - vgic->dist_start;
	+ /*
	+ * As described in vgic_register_read an access with an invalid
	+ * alignment is write ignored.
	+ */
	+ if ((reg & (size - 1)) != 0)
	+ return (0);
	+
	+ if (vgic_register_write(hypctx, dist_registers, nitems(dist_registers),
	+ reg, size, wval, NULL))
	+ return (0);
	+
	+ /* Reserved register addresses are RES0 so we can ignore the write */
	+ return (0);
	+}
	+
	+/*
	+ * Redistributor register handlers.
	+ *
	+ * RD_base:
	+ */
	+/* GICR_CTLR */
	+static void
	+redist_ctlr_read(struct hypctx hypctx, u_int reg, uint64_t rval, void *arg)
	+{
	+ /* LPIs not supported */
	+ *rval = 0;
	+}
	+
	+/* GICR_IIDR */
	+static void
	+redist_iidr_read(struct hypctx hypctx, u_int reg, uint64_t rval, void *arg)
	+{
	+ *rval = VGIC_IIDR;
	+}
	+
	+/* GICR_TYPER */
	+static void
	+redist_typer_read(struct hypctx hypctx, u_int reg, uint64_t rval, void *arg)
	+{
	+ uint64_t aff, gicr_typer, vmpidr_el2;
	+ bool last_vcpu;
	+
	+ last_vcpu = false;
	+ if (vcpu_vcpuid(hypctx->vcpu) == (vgic_max_cpu_count(hypctx->hyp) - 1))
	+ last_vcpu = true;
	+
	+ vmpidr_el2 = hypctx->vmpidr_el2;
	+ MPASS(vmpidr_el2 != 0);
	+ /*
	+ * Get affinity for the current CPU. The guest CPU affinity is taken
	+ * from VMPIDR_EL2. The Redistributor corresponding to this CPU is
	+ * the Redistributor with the same affinity from GICR_TYPER.
	+ */
	+ aff = (CPU_AFF3(vmpidr_el2) << 24) \| (CPU_AFF2(vmpidr_el2) << 16) \|
	+ (CPU_AFF1(vmpidr_el2) << 8) \| CPU_AFF0(vmpidr_el2);
	+
	+ /* Set up GICR_TYPER. */
	+ gicr_typer = aff << GICR_TYPER_AFF_SHIFT;
	+ /* Set the vcpu as the processsor ID */
	+ gicr_typer \|=
	+ (uint64_t)vcpu_vcpuid(hypctx->vcpu) << GICR_TYPER_CPUNUM_SHIFT;
	+
	+ if (last_vcpu)
	+ /* Mark the last Redistributor */
	+ gicr_typer \|= GICR_TYPER_LAST;
	+
	+ *rval = gicr_typer;
	+}
	+
	+/*
	+ * SGI_base:
	+ */
	+/* GICR_ISENABLER0 */
	+static void
	+redist_ienabler0_read(struct hypctx hypctx, u_int reg, uint64_t rval,
	+ void *arg)
	+{
	+ *rval = read_enabler(hypctx, 0);
	+}
	+
	+static void
	+redist_isenabler0_write(struct hypctx *hypctx, u_int reg, u_int offset,
	+ u_int size, uint64_t wval, void *arg)
	+{
	+ MPASS(offset == 0);
	+ MPASS(size == 4);
	+ write_enabler(hypctx, 0, true, wval);
	+}
	+
	+/* GICR_ICENABLER0 */
	+static void
	+redist_icenabler0_write(struct hypctx *hypctx, u_int reg, u_int offset,
	+ u_int size, uint64_t wval, void *arg)
	+{
	+ MPASS(offset == 0);
	+ MPASS(size == 4);
	+ write_enabler(hypctx, 0, false, wval);
	+}
	+
	+/* GICR_ISPENDR0 */
	+static void
	+redist_ipendr0_read(struct hypctx hypctx, u_int reg, uint64_t rval,
	+ void *arg)
	+{
	+ *rval = read_pendr(hypctx, 0);
	+}
	+
	+static void
	+redist_ispendr0_write(struct hypctx *hypctx, u_int reg, u_int offset,
	+ u_int size, uint64_t wval, void *arg)
	+{
	+ MPASS(offset == 0);
	+ MPASS(size == 4);
	+ write_pendr(hypctx, 0, true, wval);
	+}
	+
	+/* GICR_ICPENDR0 */
	+static void
	+redist_icpendr0_write(struct hypctx *hypctx, u_int reg, u_int offset,
	+ u_int size, uint64_t wval, void *arg)
	+{
	+ MPASS(offset == 0);
	+ MPASS(size == 4);
	+ write_pendr(hypctx, 0, false, wval);
	+}
	+
	+/* GICR_ISACTIVER0 */
	+static void
	+redist_iactiver0_read(struct hypctx hypctx, u_int reg, uint64_t rval,
	+ void *arg)
	+{
	+ *rval = read_activer(hypctx, 0);
	+}
	+
	+static void
	+redist_isactiver0_write(struct hypctx *hypctx, u_int reg, u_int offset,
	+ u_int size, uint64_t wval, void *arg)
	+{
	+ write_activer(hypctx, 0, true, wval);
	+}
	+
	+/* GICR_ICACTIVER0 */
	+static void
	+redist_icactiver0_write(struct hypctx *hypctx, u_int reg, u_int offset,
	+ u_int size, uint64_t wval, void *arg)
	+{
	+ write_activer(hypctx, 0, false, wval);
	+}
	+
	+/* GICR_IPRIORITYR */
	+static void
	+redist_ipriorityr_read(struct hypctx hypctx, u_int reg, uint64_t rval,
	+ void *arg)
	+{
	+ int n;
	+
	+ n = (reg - GICR_IPRIORITYR(0)) / 4;
	+ *rval = read_priorityr(hypctx, n);
	+}
	+
	+static void
	+redist_ipriorityr_write(struct hypctx *hypctx, u_int reg, u_int offset,
	+ u_int size, uint64_t wval, void *arg)
	+{
	+ u_int irq_base;
	+
	+ irq_base = (reg - GICR_IPRIORITYR(0)) + offset;
	+ write_priorityr(hypctx, irq_base, size, wval);
	+}
	+
	+/* GICR_ICFGR1 */
	+static void
	+redist_icfgr1_read(struct hypctx hypctx, u_int reg, uint64_t rval, void *arg)
	+{
	+ *rval = read_config(hypctx, 1);
	+}
	+
	+static void
	+redist_icfgr1_write(struct hypctx *hypctx, u_int reg, u_int offset, u_int size,
	+ uint64_t wval, void *arg)
	+{
	+ MPASS(offset == 0);
	+ MPASS(size == 4);
	+ write_config(hypctx, 1, wval);
	+}
	+
	+static int
	+redist_read(struct vcpu vcpu, uint64_t fault_ipa, uint64_t rval,
	+ int size, void *arg)
	+{
	+ struct hyp *hyp;
	+ struct hypctx hypctx, target_hypctx;
	+ struct vgic_v3 *vgic;
	+ uint64_t reg;
	+ int vcpuid;
	+
	+ /* Find the current vcpu ctx to get the vgic struct */
	+ hypctx = vcpu_get_cookie(vcpu);
	+ hyp = hypctx->hyp;
	+ vgic = hyp->vgic;
	+
	+ /* Check the register is one of ours and is the correct size */
	+ if (fault_ipa < vgic->redist_start \|\|
	+ fault_ipa + size > vgic->redist_end) {
	+ return (EINVAL);
	+ }
	+
	+ vcpuid = (fault_ipa - vgic->redist_start) /
	+ (GICR_RD_BASE_SIZE + GICR_SGI_BASE_SIZE);
	+ if (vcpuid >= vm_get_maxcpus(hyp->vm)) {
	+ /*
	+ * This should never happen, but lets be defensive so if it
	+ * does we don't panic a non-INVARIANTS kernel.
	+ */
	+#ifdef INVARIANTS
	+ panic("%s: Invalid vcpuid %d", __func__, vcpuid);
	+#else
	+ *rval = 0;
	+ return (0);
	+#endif
	+ }
	+
	+ /* Find the target vcpu ctx for the access */
	+ target_hypctx = hyp->ctx[vcpuid];
	+ if (target_hypctx == NULL) {
	+ /*
	+ * The CPU has not yet started. The redistributor and CPU are
	+ * in the same power domain. As such the redistributor will
	+ * also be powered down so any access will raise an external
	+ * abort.
	+ */
	+ raise_data_insn_abort(hypctx, fault_ipa, true,
	+ ISS_DATA_DFSC_EXT);
	+ return (0);
	+ }
	+
	+ reg = (fault_ipa - vgic->redist_start) %
	+ (GICR_RD_BASE_SIZE + GICR_SGI_BASE_SIZE);
	+
	+ /*
	+ * As described in vgic_register_read an access with an invalid
	+ * alignment is read with an unknown value
	+ */
	+ if ((reg & (size - 1)) != 0) {
	+ *rval = 0;
	+ return (0);
	+ }
	+
	+ if (reg < GICR_RD_BASE_SIZE) {
	+ if (vgic_register_read(target_hypctx, redist_rd_registers,
	+ nitems(redist_rd_registers), reg, size, rval, NULL))
	+ return (0);
	+ } else if (reg < (GICR_SGI_BASE + GICR_SGI_BASE_SIZE)) {
	+ if (vgic_register_read(target_hypctx, redist_sgi_registers,
	+ nitems(redist_sgi_registers), reg - GICR_SGI_BASE, size,
	+ rval, NULL))
	+ return (0);
	+ }
	+
	+ /* Reserved register addresses are RES0 so we can hardware it to 0 */
	+ *rval = 0;
	+ return (0);
	+}
	+
	+static int
	+redist_write(struct vcpu *vcpu, uint64_t fault_ipa, uint64_t wval,
	+ int size, void *arg)
	+{
	+ struct hyp *hyp;
	+ struct hypctx hypctx, target_hypctx;
	+ struct vgic_v3 *vgic;
	+ uint64_t reg;
	+ int vcpuid;
	+
	+ /* Find the current vcpu ctx to get the vgic struct */
	+ hypctx = vcpu_get_cookie(vcpu);
	+ hyp = hypctx->hyp;
	+ vgic = hyp->vgic;
	+
	+ /* Check the register is one of ours and is the correct size */
	+ if (fault_ipa < vgic->redist_start \|\|
	+ fault_ipa + size > vgic->redist_end) {
	+ return (EINVAL);
	+ }
	+
	+ vcpuid = (fault_ipa - vgic->redist_start) /
	+ (GICR_RD_BASE_SIZE + GICR_SGI_BASE_SIZE);
	+ if (vcpuid >= vm_get_maxcpus(hyp->vm)) {
	+ /*
	+ * This should never happen, but lets be defensive so if it
	+ * does we don't panic a non-INVARIANTS kernel.
	+ */
	+#ifdef INVARIANTS
	+ panic("%s: Invalid vcpuid %d", __func__, vcpuid);
	+#else
	+ return (0);
	+#endif
	+ }
	+
	+ /* Find the target vcpu ctx for the access */
	+ target_hypctx = hyp->ctx[vcpuid];
	+ if (target_hypctx == NULL) {
	+ /*
	+ * The CPU has not yet started. The redistributor and CPU are
	+ * in the same power domain. As such the redistributor will
	+ * also be powered down so any access will raise an external
	+ * abort.
	+ */
	+ raise_data_insn_abort(hypctx, fault_ipa, true,
	+ ISS_DATA_DFSC_EXT);
	+ return (0);
	+ }
	+
	+ reg = (fault_ipa - vgic->redist_start) %
	+ (GICR_RD_BASE_SIZE + GICR_SGI_BASE_SIZE);
	+
	+ /*
	+ * As described in vgic_register_read an access with an invalid
	+ * alignment is write ignored.
	+ */
	+ if ((reg & (size - 1)) != 0)
	+ return (0);
	+
	+ if (reg < GICR_RD_BASE_SIZE) {
	+ if (vgic_register_write(target_hypctx, redist_rd_registers,
	+ nitems(redist_rd_registers), reg, size, wval, NULL))
	+ return (0);
	+ } else if (reg < (GICR_SGI_BASE + GICR_SGI_BASE_SIZE)) {
	+ if (vgic_register_write(target_hypctx, redist_sgi_registers,
	+ nitems(redist_sgi_registers), reg - GICR_SGI_BASE, size,
	+ wval, NULL))
	+ return (0);
	+ }
	+
	+ /* Reserved register addresses are RES0 so we can ignore the write */
	+ return (0);
	+}
	+
	+static int
	+vgic_v3_icc_sgi1r_read(struct vcpu vcpu, uint64_t rval, void *arg)
	+{
	+ /*
	+ * TODO: Inject an unknown exception.
	+ */
	+ *rval = 0;
	+ return (0);
	+}
	+
	+static int
	+vgic_v3_icc_sgi1r_write(struct vcpu vcpu, uint64_t rval, void arg)
	+{
	+ struct vm *vm;
	+ struct hyp *hyp;
	+ cpuset_t active_cpus;
	+ uint64_t mpidr, aff1, aff2, aff3;
	+ uint32_t irqid;
	+ int cpus, cpu_off, target_vcpuid, vcpuid;
	+
	+ vm = vcpu_vm(vcpu);
	+ hyp = vm_get_cookie(vm);
	+ active_cpus = vm_active_cpus(vm);
	+ vcpuid = vcpu_vcpuid(vcpu);
	+
	+ irqid = ICC_SGI1R_EL1_SGIID_VAL(rval) >> ICC_SGI1R_EL1_SGIID_SHIFT;
	+ if ((rval & ICC_SGI1R_EL1_IRM) == 0) {
	+ /* Non-zero points at no vcpus */
	+ if (ICC_SGI1R_EL1_RS_VAL(rval) != 0)
	+ return (0);
	+
	+ aff1 = ICC_SGI1R_EL1_AFF1_VAL(rval) >> ICC_SGI1R_EL1_AFF1_SHIFT;
	+ aff2 = ICC_SGI1R_EL1_AFF2_VAL(rval) >> ICC_SGI1R_EL1_AFF2_SHIFT;
	+ aff3 = ICC_SGI1R_EL1_AFF3_VAL(rval) >> ICC_SGI1R_EL1_AFF3_SHIFT;
	+ mpidr = aff3 << MPIDR_AFF3_SHIFT \|
	+ aff2 << MPIDR_AFF2_SHIFT \| aff1 << MPIDR_AFF1_SHIFT;
	+
	+ cpus = ICC_SGI1R_EL1_TL_VAL(rval) >> ICC_SGI1R_EL1_TL_SHIFT;
	+ cpu_off = 0;
	+ while (cpus > 0) {
	+ if (cpus & 1) {
	+ target_vcpuid = mpidr_to_vcpu(hyp,
	+ mpidr \| (cpu_off << MPIDR_AFF0_SHIFT));
	+ if (target_vcpuid >= 0 &&
	+ CPU_ISSET(target_vcpuid, &active_cpus)) {
	+ INJECT_IRQ(hyp, target_vcpuid, irqid,
	+ true);
	+ }
	+ }
	+ cpu_off++;
	+ cpus >>= 1;
	+ }
	+ } else {
	+ /* Send an IPI to all CPUs other than the current CPU */
	+ for (target_vcpuid = 0; target_vcpuid < vm_get_maxcpus(vm);
	+ target_vcpuid++) {
	+ if (CPU_ISSET(target_vcpuid, &active_cpus) &&
	+ target_vcpuid != vcpuid) {
	+ INJECT_IRQ(hyp, target_vcpuid, irqid, true);
	+ }
	+ }
	+ }
	+
	+ return (0);
	+}
	+
	+static void
	+vgic_v3_mmio_init(struct hyp *hyp)
	+{
	+ struct vgic_v3 *vgic;
	+ struct vgic_v3_irq *irq;
	+ int i;
	+
	+ /* Allocate memory for the SPIs */
	+ vgic = hyp->vgic;
	+ vgic->irqs = malloc((VGIC_NIRQS - VGIC_PRV_I_NUM) *
	+ sizeof(*vgic->irqs), M_VGIC_V3, M_WAITOK \| M_ZERO);
	+
	+ for (i = 0; i < VGIC_NIRQS - VGIC_PRV_I_NUM; i++) {
	+ irq = &vgic->irqs[i];
	+
	+ mtx_init(&irq->irq_spinmtx, "VGIC IRQ spinlock", NULL,
	+ MTX_SPIN);
	+
	+ irq->irq = i + VGIC_PRV_I_NUM;
	+ }
	+}
	+
	+static void
	+vgic_v3_mmio_destroy(struct hyp *hyp)
	+{
	+ struct vgic_v3 *vgic;
	+ struct vgic_v3_irq *irq;
	+ int i;
	+
	+ vgic = hyp->vgic;
	+ for (i = 0; i < VGIC_NIRQS - VGIC_PRV_I_NUM; i++) {
	+ irq = &vgic->irqs[i];
	+
	+ mtx_destroy(&irq->irq_spinmtx);
	+ }
	+
	+ free(vgic->irqs, M_VGIC_V3);
	+}
	+
	+static int
	+vgic_v3_attach_to_vm(device_t dev, struct hyp hyp, struct vm_vgic_descr descr)
	+{
	+ struct vm *vm;
	+ struct vgic_v3 *vgic;
	+ size_t cpu_count;
	+
	+ if (descr->ver.version != 3)
	+ return (EINVAL);
	+
	+ /*
	+ * The register bases need to be 64k aligned
	+ * The redist register space is the RD + SGI size
	+ */
	+ if (!__is_aligned(descr->v3_regs.dist_start, PAGE_SIZE_64K) \|\|
	+ !__is_aligned(descr->v3_regs.redist_start, PAGE_SIZE_64K) \|\|
	+ !__is_aligned(descr->v3_regs.redist_size,
	+ GICR_RD_BASE_SIZE + GICR_SGI_BASE_SIZE))
	+ return (EINVAL);
	+
	+ /* The dist register space is 1 64k block */
	+ if (descr->v3_regs.dist_size != PAGE_SIZE_64K)
	+ return (EINVAL);
	+
	+ vm = hyp->vm;
	+
	+ /*
	+ * Return an error if the redist space is too large for the maximum
	+ * number of CPUs we support.
	+ */
	+ cpu_count = descr->v3_regs.redist_size /
	+ (GICR_RD_BASE_SIZE + GICR_SGI_BASE_SIZE);
	+ if (cpu_count > vm_get_maxcpus(vm))
	+ return (EINVAL);
	+
	+ vgic = hyp->vgic;
	+
	+ /* Set the distributor address and size for trapping guest access. */
	+ vgic->dist_start = descr->v3_regs.dist_start;
	+ vgic->dist_end = descr->v3_regs.dist_start + descr->v3_regs.dist_size;
	+
	+ vgic->redist_start = descr->v3_regs.redist_start;
	+ vgic->redist_end = descr->v3_regs.redist_start +
	+ descr->v3_regs.redist_size;
	+
	+ vm_register_inst_handler(vm, descr->v3_regs.dist_start,
	+ descr->v3_regs.dist_size, dist_read, dist_write);
	+ vm_register_inst_handler(vm, descr->v3_regs.redist_start,
	+ descr->v3_regs.redist_size, redist_read, redist_write);
	+
	+ vm_register_reg_handler(vm, ISS_MSR_REG(ICC_SGI1R_EL1),
	+ ISS_MSR_REG_MASK, vgic_v3_icc_sgi1r_read, vgic_v3_icc_sgi1r_write,
	+ NULL);
	+
	+ vgic_v3_mmio_init(hyp);
	+
	+ hyp->vgic_attached = true;
	+
	+ return (0);
	+}
	+
	+static void
	+vgic_v3_detach_from_vm(device_t dev, struct hyp *hyp)
	+{
	+ if (hyp->vgic_attached) {
	+ hyp->vgic_attached = false;
	+ vgic_v3_mmio_destroy(hyp);
	+ }
	+}
	+
	+static struct vgic_v3_irq *
	+vgic_v3_get_irq(struct hyp *hyp, int vcpuid, uint32_t irqid)
	+{
	+ struct vgic_v3_cpu *vgic_cpu;
	+ struct vgic_v3_irq *irq;
	+ struct hypctx *hypctx;
	+
	+ if (irqid < VGIC_PRV_I_NUM) {
	+ if (vcpuid < 0 \|\| vcpuid >= vm_get_maxcpus(hyp->vm))
	+ return (NULL);
	+ hypctx = hyp->ctx[vcpuid];
	+ if (hypctx == NULL)
	+ return (NULL);
	+ vgic_cpu = hypctx->vgic_cpu;
	+ irq = &vgic_cpu->private_irqs[irqid];
	+ } else if (irqid <= GIC_LAST_SPI) {
	+ irqid -= VGIC_PRV_I_NUM;
	+ if (irqid >= VGIC_NIRQS)
	+ return (NULL);
	+ irq = &hyp->vgic->irqs[irqid];
	+ } else if (irqid < GIC_FIRST_LPI) {
	+ return (NULL);
	+ } else {
	+ /* No support for LPIs */
	+ return (NULL);
	+ }
	+
	+ mtx_lock_spin(&irq->irq_spinmtx);
	+ return (irq);
	+}
	+
	+static void
	+vgic_v3_release_irq(struct vgic_v3_irq *irq)
	+{
	+
	+ mtx_unlock_spin(&irq->irq_spinmtx);
	+}
	+
	+static bool
	+vgic_v3_has_pending_irq(device_t dev, struct hypctx *hypctx)
	+{
	+ struct vgic_v3_cpu *vgic_cpu;
	+ bool empty;
	+
	+ vgic_cpu = hypctx->vgic_cpu;
	+ mtx_lock_spin(&vgic_cpu->lr_mtx);
	+ empty = TAILQ_EMPTY(&vgic_cpu->irq_act_pend);
	+ mtx_unlock_spin(&vgic_cpu->lr_mtx);
	+
	+ return (!empty);
	+}
	+
	+static bool
	+vgic_v3_check_irq(struct vgic_v3_irq *irq, bool level)
	+{
	+ /*
	+ * Only inject if:
	+ * - Level-triggered IRQ: level changes low -> high
	+ * - Edge-triggered IRQ: level is high
	+ */
	+ switch (irq->config & VGIC_CONFIG_MASK) {
	+ case VGIC_CONFIG_LEVEL:
	+ return (level != irq->level);
	+ case VGIC_CONFIG_EDGE:
	+ return (level);
	+ default:
	+ break;
	+ }
	+
	+ return (false);
	+}
	+
	+static int
	+vgic_v3_inject_irq(device_t dev, struct hyp *hyp, int vcpuid, uint32_t irqid,
	+ bool level)
	+{
	+ struct vgic_v3_cpu *vgic_cpu;
	+ struct vgic_v3_irq *irq;
	+ struct hypctx *hypctx;
	+ int target_vcpu;
	+ bool notify;
	+
	+ if (!hyp->vgic_attached)
	+ return (ENODEV);
	+
	+ KASSERT(vcpuid == -1 \|\| irqid < VGIC_PRV_I_NUM,
	+ ("%s: SPI/LPI with vcpuid set: irq %u vcpuid %u", __func__, irqid,
	+ vcpuid));
	+
	+ irq = vgic_v3_get_irq(hyp, vcpuid, irqid);
	+ if (irq == NULL) {
	+ eprintf("Malformed IRQ %u.\n", irqid);
	+ return (EINVAL);
	+ }
	+
	+ target_vcpu = irq->target_vcpu;
	+ KASSERT(vcpuid == -1 \|\| vcpuid == target_vcpu,
	+ ("%s: Interrupt %u has bad cpu affinity: vcpu %d target vcpu %d",
	+ __func__, irqid, vcpuid, target_vcpu));
	+ KASSERT(target_vcpu >= 0 && target_vcpu < vm_get_maxcpus(hyp->vm),
	+ ("%s: Interrupt %u sent to invalid vcpu %d", __func__, irqid,
	+ target_vcpu));
	+
	+ if (vcpuid == -1)
	+ vcpuid = target_vcpu;
	+ /* TODO: Check from 0 to vm->maxcpus */
	+ if (vcpuid < 0 \|\| vcpuid >= vm_get_maxcpus(hyp->vm)) {
	+ vgic_v3_release_irq(irq);
	+ return (EINVAL);
	+ }
	+
	+ hypctx = hyp->ctx[vcpuid];
	+ if (hypctx == NULL) {
	+ vgic_v3_release_irq(irq);
	+ return (EINVAL);
	+ }
	+
	+ notify = false;
	+ vgic_cpu = hypctx->vgic_cpu;
	+
	+ mtx_lock_spin(&vgic_cpu->lr_mtx);
	+
	+ if (!vgic_v3_check_irq(irq, level)) {
	+ goto out;
	+ }
	+
	+ if ((irq->config & VGIC_CONFIG_MASK) == VGIC_CONFIG_LEVEL)
	+ irq->level = level;
	+ else /* VGIC_CONFIG_EDGE */
	+ irq->pending = true;
	+
	+ notify = vgic_v3_queue_irq(hyp, vgic_cpu, vcpuid, irq);
	+
	+out:
	+ mtx_unlock_spin(&vgic_cpu->lr_mtx);
	+ vgic_v3_release_irq(irq);
	+
	+ if (notify)
	+ vcpu_notify_event(vm_vcpu(hyp->vm, vcpuid));
	+
	+ return (0);
	+}
	+
	+static int
	+vgic_v3_inject_msi(device_t dev, struct hyp *hyp, uint64_t msg, uint64_t addr)
	+{
	+ struct vgic_v3 *vgic;
	+ uint64_t reg;
	+
	+ vgic = hyp->vgic;
	+
	+ /* This is a 4 byte register */
	+ if (addr < vgic->dist_start \|\| addr + 4 > vgic->dist_end) {
	+ return (EINVAL);
	+ }
	+
	+ reg = addr - vgic->dist_start;
	+ if (reg != GICD_SETSPI_NSR)
	+ return (EINVAL);
	+
	+ return (INJECT_IRQ(hyp, -1, msg, true));
	+}
	+
	+static void
	+vgic_v3_flush_hwstate(device_t dev, struct hypctx *hypctx)
	+{
	+ struct vgic_v3_cpu *vgic_cpu;
	+ struct vgic_v3_irq *irq;
	+ int i;
	+
	+ vgic_cpu = hypctx->vgic_cpu;
	+
	+ /*
	+ * All Distributor writes have been executed at this point, do not
	+ * protect Distributor reads with a mutex.
	+ *
	+ * This is callled with all interrupts disabled, so there is no need for
	+ * a List Register spinlock either.
	+ */
	+ mtx_lock_spin(&vgic_cpu->lr_mtx);
	+
	+ hypctx->vgic_v3_regs.ich_hcr_el2 &= ~ICH_HCR_EL2_UIE;
	+
	+ /* Exit early if there are no buffered interrupts */
	+ if (TAILQ_EMPTY(&vgic_cpu->irq_act_pend))
	+ goto out;
	+
	+ KASSERT(vgic_cpu->ich_lr_used == 0, ("%s: Used LR count not zero %u",
	+ __func__, vgic_cpu->ich_lr_used));
	+
	+ i = 0;
	+ hypctx->vgic_v3_regs.ich_elrsr_el2 =
	+ (1u << hypctx->vgic_v3_regs.ich_lr_num) - 1;
	+ TAILQ_FOREACH(irq, &vgic_cpu->irq_act_pend, act_pend_list) {
	+ /* No free list register, stop searching for IRQs */
	+ if (i == hypctx->vgic_v3_regs.ich_lr_num)
	+ break;
	+
	+ if (!irq->enabled)
	+ continue;
	+
	+ hypctx->vgic_v3_regs.ich_lr_el2[i] = ICH_LR_EL2_GROUP1 \|
	+ ((uint64_t)irq->priority << ICH_LR_EL2_PRIO_SHIFT) \|
	+ irq->irq;
	+
	+ if (irq->active) {
	+ hypctx->vgic_v3_regs.ich_lr_el2[i] \|=
	+ ICH_LR_EL2_STATE_ACTIVE;
	+ }
	+
	+#ifdef notyet
	+ /* TODO: Check why this is needed */
	+ if ((irq->config & _MASK) == LEVEL)
	+ hypctx->vgic_v3_regs.ich_lr_el2[i] \|= ICH_LR_EL2_EOI;
	+#endif
	+
	+ if (!irq->active && vgic_v3_irq_pending(irq)) {
	+ hypctx->vgic_v3_regs.ich_lr_el2[i] \|=
	+ ICH_LR_EL2_STATE_PENDING;
	+
	+ /*
	+ * This IRQ is now pending on the guest. Allow for
	+ * another edge that could cause the interrupt to
	+ * be raised again.
	+ */
	+ if ((irq->config & VGIC_CONFIG_MASK) ==
	+ VGIC_CONFIG_EDGE) {
	+ irq->pending = false;
	+ }
	+ }
	+
	+ i++;
	+ }
	+ vgic_cpu->ich_lr_used = i;
	+
	+out:
	+ mtx_unlock_spin(&vgic_cpu->lr_mtx);
	+}
	+
	+static void
	+vgic_v3_sync_hwstate(device_t dev, struct hypctx *hypctx)
	+{
	+ struct vgic_v3_cpu *vgic_cpu;
	+ struct vgic_v3_irq *irq;
	+ uint64_t lr;
	+ int i;
	+
	+ vgic_cpu = hypctx->vgic_cpu;
	+
	+ /* Exit early if there are no buffered interrupts */
	+ if (vgic_cpu->ich_lr_used == 0)
	+ return;
	+
	+ /*
	+ * Check on the IRQ state after running the guest. ich_lr_used and
	+ * ich_lr_el2 are only ever used within this thread so is safe to
	+ * access unlocked.
	+ */
	+ for (i = 0; i < vgic_cpu->ich_lr_used; i++) {
	+ lr = hypctx->vgic_v3_regs.ich_lr_el2[i];
	+ hypctx->vgic_v3_regs.ich_lr_el2[i] = 0;
	+
	+ irq = vgic_v3_get_irq(hypctx->hyp, vcpu_vcpuid(hypctx->vcpu),
	+ ICH_LR_EL2_VINTID(lr));
	+ if (irq == NULL)
	+ continue;
	+
	+ irq->active = (lr & ICH_LR_EL2_STATE_ACTIVE) != 0;
	+
	+ if ((irq->config & VGIC_CONFIG_MASK) == VGIC_CONFIG_EDGE) {
	+ /*
	+ * If we have an edge triggered IRQ preserve the
	+ * pending bit until the IRQ has been handled.
	+ */
	+ if ((lr & ICH_LR_EL2_STATE_PENDING) != 0) {
	+ irq->pending = true;
	+ }
	+ } else {
	+ /*
	+ * If we have a level triggerend IRQ remove the
	+ * pending bit if the IRQ has been handled.
	+ * The level is separate, so may still be high
	+ * triggering another IRQ.
	+ */
	+ if ((lr & ICH_LR_EL2_STATE_PENDING) == 0) {
	+ irq->pending = false;
	+ }
	+ }
	+
	+ /* Lock to update irq_act_pend */
	+ mtx_lock_spin(&vgic_cpu->lr_mtx);
	+ if (irq->active) {
	+ /* Ensure the active IRQ is at the head of the list */
	+ TAILQ_REMOVE(&vgic_cpu->irq_act_pend, irq,
	+ act_pend_list);
	+ TAILQ_INSERT_HEAD(&vgic_cpu->irq_act_pend, irq,
	+ act_pend_list);
	+ } else if (!vgic_v3_irq_pending(irq)) {
	+ /* If pending or active remove from the list */
	+ TAILQ_REMOVE(&vgic_cpu->irq_act_pend, irq,
	+ act_pend_list);
	+ irq->on_aplist = false;
	+ }
	+ mtx_unlock_spin(&vgic_cpu->lr_mtx);
	+ vgic_v3_release_irq(irq);
	+ }
	+
	+ hypctx->vgic_v3_regs.ich_hcr_el2 &= ~ICH_HCR_EL2_EOICOUNT_MASK;
	+ vgic_cpu->ich_lr_used = 0;
	+}
	+
	+static void
	+vgic_v3_init(device_t dev)
	+{
	+ uint64_t ich_vtr_el2;
	+ uint32_t pribits, prebits;
	+
	+ ich_vtr_el2 = vmm_call_hyp(HYP_READ_REGISTER, HYP_REG_ICH_VTR);
	+
	+ /* TODO: These fields are common with the vgicv2 driver */
	+ pribits = ICH_VTR_EL2_PRIBITS(ich_vtr_el2);
	+ switch (pribits) {
	+ default:
	+ case 5:
	+ virt_features.min_prio = 0xf8;
	+ break;
	+ case 6:
	+ virt_features.min_prio = 0xfc;
	+ break;
	+ case 7:
	+ virt_features.min_prio = 0xfe;
	+ break;
	+ case 8:
	+ virt_features.min_prio = 0xff;
	+ break;
	+ }
	+
	+ prebits = ICH_VTR_EL2_PREBITS(ich_vtr_el2);
	+ switch (prebits) {
	+ default:
	+ case 5:
	+ virt_features.ich_apr_num = 1;
	+ break;
	+ case 6:
	+ virt_features.ich_apr_num = 2;
	+ break;
	+ case 7:
	+ virt_features.ich_apr_num = 4;
	+ break;
	+ }
	+
	+ virt_features.ich_lr_num = ICH_VTR_EL2_LISTREGS(ich_vtr_el2);
	+}
	+
	+static int
	+vgic_v3_probe(device_t dev)
	+{
	+ if (!gic_get_vgic(dev))
	+ return (EINVAL);
	+
	+ /* We currently only support the GICv3 */
	+ if (gic_get_hw_rev(dev) < 3)
	+ return (EINVAL);
	+
	+ device_set_desc(dev, "Virtual GIC v3");
	+ return (BUS_PROBE_DEFAULT);
	+}
	+
	+static int
	+vgic_v3_attach(device_t dev)
	+{
	+ vgic_dev = dev;
	+ return (0);
	+}
	+
	+static int
	+vgic_v3_detach(device_t dev)
	+{
	+ vgic_dev = NULL;
	+ return (0);
	+}
	+
	+static device_method_t vgic_v3_methods[] = {
	+ /* Device interface */
	+ DEVMETHOD(device_probe, vgic_v3_probe),
	+ DEVMETHOD(device_attach, vgic_v3_attach),
	+ DEVMETHOD(device_detach, vgic_v3_detach),
	+
	+ /* VGIC interface */
	+ DEVMETHOD(vgic_init, vgic_v3_init),
	+ DEVMETHOD(vgic_attach_to_vm, vgic_v3_attach_to_vm),
	+ DEVMETHOD(vgic_detach_from_vm, vgic_v3_detach_from_vm),
	+ DEVMETHOD(vgic_vminit, vgic_v3_vminit),
	+ DEVMETHOD(vgic_cpuinit, vgic_v3_cpuinit),
	+ DEVMETHOD(vgic_cpucleanup, vgic_v3_cpucleanup),
	+ DEVMETHOD(vgic_vmcleanup, vgic_v3_vmcleanup),
	+ DEVMETHOD(vgic_max_cpu_count, vgic_v3_max_cpu_count),
	+ DEVMETHOD(vgic_has_pending_irq, vgic_v3_has_pending_irq),
	+ DEVMETHOD(vgic_inject_irq, vgic_v3_inject_irq),
	+ DEVMETHOD(vgic_inject_msi, vgic_v3_inject_msi),
	+ DEVMETHOD(vgic_flush_hwstate, vgic_v3_flush_hwstate),
	+ DEVMETHOD(vgic_sync_hwstate, vgic_v3_sync_hwstate),
	+
	+ /* End */
	+ DEVMETHOD_END
	+};
	+
	+/* TODO: Create a vgic base class? */
	+DEFINE_CLASS_0(vgic, vgic_v3_driver, vgic_v3_methods, 0);
	+
	+DRIVER_MODULE(vgic_v3, gic, vgic_v3_driver, 0, 0);
	diff --git a/sys/arm64/vmm/io/vgic_v3_reg.h b/sys/arm64/vmm/io/vgic_v3_reg.h
	new file mode 100644
	--- /dev/null
	+++ b/sys/arm64/vmm/io/vgic_v3_reg.h
	@@ -0,0 +1,129 @@
	+/*-
	+ * SPDX-License-Identifier: BSD-2-Clause
	+ *
	+ * Copyright (C) 2018 The FreeBSD Foundation
	+ *
	+ * This software was developed by Alexandru Elisei under sponsorship
	+ * from the FreeBSD Foundation.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#ifndef _VGIC_V3_REG_H_
	+#define _VGIC_V3_REG_H_
	+
	+/* Interrupt Controller End of Interrupt Status Register */
	+#define ICH_EISR_EL2_STATUS_MASK 0xffff
	+#define ICH_EISR_EL2_EOI_NOT_HANDLED(lr) ((1 << lr) & ICH_EISR_EL2_STATUS_MASK)
	+
	+/* Interrupt Controller Empty List Register Status Register */
	+#define ICH_ELSR_EL2_STATUS_MASK 0xffff
	+#define ICH_ELSR_EL2_LR_EMPTY(x) ((1 << x) & ICH_ELSR_EL2_STATUS_MASK)
	+
	+/* Interrupt Controller Hyp Control Register */
	+#define ICH_HCR_EL2_EOICOUNT_SHIFT 27
	+#define ICH_HCR_EL2_EOICOUNT_MASK (0x1f << ICH_HCR_EL2_EOICOUNT_SHIFT)
	+#define ICH_HCR_EL2_TDIR (1 << 14) /* Trap non-secure EL1 writes to IC{C, V}_DIR_EL1 */
	+#define ICH_HCR_EL2_TSEI (1 << 14) /* Trap System Error Interupts (SEI) to EL2 */
	+#define ICH_HCR_EL2_TALL1 (1 << 12) /* Trap non-secure EL1 accesses to IC{C, V}_* for Group 1 interrupts */
	+#define ICH_HCR_EL2_TALL0 (1 << 11) /* Trap non-secure EL1 accesses to IC{C, V}_* for Group 0 interrupts */
	+#define ICH_HCR_EL2_TC (1 << 10) /* Trap non-secure EL1 accesses to common IC{C, V}_* registers */
	+#define ICH_HCR_EL2_VGRP1DIE (1 << 7) /* VM Group 1 Disabled Interrupt Enable */
	+#define ICH_HCR_EL2_VGRP1EIE (1 << 6) /* VM Group 1 Enabled Interrupt Enable */
	+#define ICH_HCR_EL2_VGRP0DIE (1 << 5) /* VM Group 0 Disabled Interrupt Enable */
	+#define ICH_HCR_EL2_VGRP0EIE (1 << 4) /* VM Group 0 Enabled Interrupt Enable */
	+#define ICH_HCR_EL2_NPIE (1 << 3) /* No Pending Interrupt Enable */
	+#define ICH_HCR_EL2_LRENPIE (1 << 2) /* List Register Entry Not Present Interrupt Enable */
	+#define ICH_HCR_EL2_UIE (1 << 1) /* Underflow Interrupt Enable */
	+#define ICH_HCR_EL2_En (1 << 0) /* Global enable for the virtual CPU interface */
	+
	+/* Interrupt Controller List Registers */
	+#define ICH_LR_EL2_VINTID_MASK 0xffffffff
	+#define ICH_LR_EL2_VINTID(x) ((x) & ICH_LR_EL2_VINTID_MASK)
	+#define ICH_LR_EL2_PINTID_SHIFT 32
	+#define ICH_LR_EL2_PINTID_MASK (0x3fUL << ICH_LR_EL2_PINTID_SHIFT)
	+/* Raise a maintanance IRQ when deactivated (only non-HW virqs) */
	+#define ICH_LR_EL2_EOI (1UL << 41)
	+#define ICH_LR_EL2_PRIO_SHIFT 48
	+#define ICH_LR_EL2_PRIO_MASK (0xffUL << ICH_LR_EL2_PRIO_SHIFT)
	+#define ICH_LR_EL2_GROUP_SHIFT 60
	+#define ICH_LR_EL2_GROUP1 (1UL << ICH_LR_EL2_GROUP_SHIFT)
	+#define ICH_LR_EL2_HW (1UL << 61)
	+#define ICH_LR_EL2_STATE_SHIFT 62
	+#define ICH_LR_EL2_STATE_MASK (0x3UL << ICH_LR_EL2_STATE_SHIFT)
	+#define ICH_LR_EL2_STATE(x) ((x) & ICH_LR_EL2_STATE_MASK)
	+#define ICH_LR_EL2_STATE_INACTIVE (0x0UL << ICH_LR_EL2_STATE_SHIFT)
	+#define ICH_LR_EL2_STATE_PENDING (0x1UL << ICH_LR_EL2_STATE_SHIFT)
	+#define ICH_LR_EL2_STATE_ACTIVE (0x2UL << ICH_LR_EL2_STATE_SHIFT)
	+#define ICH_LR_EL2_STATE_PENDING_ACTIVE (0x3UL << ICH_LR_EL2_STATE_SHIFT)
	+
	+/* Interrupt Controller Maintenance Interrupt State Register */
	+#define ICH_MISR_EL2_VGRP1D (1 << 7) /* vPE Group 1 Disabled */
	+#define ICH_MISR_EL2_VGRP1E (1 << 6) /* vPE Group 1 Enabled */
	+#define ICH_MISR_EL2_VGRP0D (1 << 5) /* vPE Group 0 Disabled */
	+#define ICH_MISR_EL2_VGRP0E (1 << 4) /* vPE Group 0 Enabled */
	+#define ICH_MISR_EL2_NP (1 << 3) /* No Pending */
	+#define ICH_MISR_EL2_LRENP (1 << 2) /* List Register Entry Not Present */
	+#define ICH_MISR_EL2_U (1 << 1) /* Underflow */
	+#define ICH_MISR_EL2_EOI (1 << 0) /* End Of Interrupt */
	+
	+/* Interrupt Controller Virtual Machine Control Register */
	+#define ICH_VMCR_EL2_VPMR_SHIFT 24
	+#define ICH_VMCR_EL2_VPMR_MASK (0xff << ICH_VMCR_EL2_VPMR_SHIFT)
	+#define ICH_VMCR_EL2_VPMR_PRIO_LOWEST (0xff << ICH_VMCR_EL2_VPMR_SHIFT)
	+#define ICH_VMCR_EL2_VPMR_PRIO_HIGHEST (0x00 << ICH_VMCR_EL2_VPMR_SHIFT)
	+#define ICH_VMCR_EL2_VBPR0_SHIFT 21
	+#define ICH_VMCR_EL2_VBPR0_MASK (0x7 << ICH_VMCR_EL2_VBPR0_SHIFT)
	+#define ICH_VMCR_EL2_VBPR0_NO_PREEMPTION \
	+ (0x7 << ICH_VMCR_EL2_VBPR0_SHIFT)
	+#define ICH_VMCR_EL2_VBPR1_SHIFT 18
	+#define ICH_VMCR_EL2_VBPR1_MASK (0x7 << ICH_VMCR_EL2_VBPR1_SHIFT)
	+#define ICH_VMCR_EL2_VBPR1_NO_PREEMPTION \
	+ (0x7 << ICH_VMCR_EL2_VBPR1_SHIFT)
	+#define ICH_VMCR_EL2_VEOIM (1 << 9) /* Virtual EOI mode */
	+#define ICH_VMCR_EL2_VCBPR (1 << 4) /* Virtual Common binary Point Register */
	+#define ICH_VMCR_EL2_VFIQEN (1 << 3) /* Virtual FIQ enable */
	+#define ICH_VMCR_EL2_VACKCTL (1 << 2) /* Virtual AckCtl */
	+#define ICH_VMCR_EL2_VENG1 (1 << 1) /* Virtual Group 1 Interrupt Enable */
	+#define ICH_VMCR_EL2_VENG0 (1 << 0) /* Virtual Group 0 Interrupt Enable */
	+
	+/* Interrupt Controller VGIC Type Register */
	+#define ICH_VTR_EL2_PRIBITS_SHIFT 29
	+#define ICH_VTR_EL2_PRIBITS_MASK (0x7 << ICH_VTR_EL2_PRIBITS_SHIFT)
	+#define ICH_VTR_EL2_PRIBITS(x) \
	+ ((((x) & ICH_VTR_EL2_PRIBITS_MASK) >> ICH_VTR_EL2_PRIBITS_SHIFT) + 1)
	+#define ICH_VTR_EL2_PREBITS_SHIFT 26
	+#define ICH_VTR_EL2_PREBITS_MASK (0x7 << ICH_VTR_EL2_PREBITS_SHIFT)
	+#define ICH_VTR_EL2_PREBITS(x) \
	+ (((x) & ICH_VTR_EL2_PREBITS_MASK) >> ICH_VTR_EL2_PREBITS_SHIFT)
	+#define ICH_VTR_EL2_SEIS (1 << 22) /* System Error Interrupt (SEI) Support */
	+#define ICH_VTR_EL2_A3V (1 << 21) /* Affinity 3 Valid */
	+#define ICH_VTR_EL2_NV4 (1 << 20) /* Direct injection of virtual interrupts. RES1 for GICv3 */
	+#define ICH_VTR_EL2_TDS (1 << 19) /* Implementation supports ICH_HCR_EL2.TDIR */
	+#define ICH_VTR_EL2_LISTREGS_MASK 0x1f
	+/*
	+ * ICH_VTR_EL2.ListRegs holds the number of list registers, minus one. Add one
	+ * to get the actual number of list registers.
	+ */
	+#define ICH_VTR_EL2_LISTREGS(x) (((x) & ICH_VTR_EL2_LISTREGS_MASK) + 1)
	+
	+#endif /* !_VGIC_V3_REG_H_ */
	diff --git a/sys/arm64/vmm/io/vtimer.h b/sys/arm64/vmm/io/vtimer.h
	new file mode 100644
	--- /dev/null
	+++ b/sys/arm64/vmm/io/vtimer.h
	@@ -0,0 +1,85 @@
	+/*-
	+ * SPDX-License-Identifier: BSD-2-Clause
	+ *
	+ * Copyright (c) 2017 The FreeBSD Foundation
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ * 3. The name of the company nor the name of the author may be used to
	+ * endorse or promote products derived from this software without specific
	+ * prior written permission.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#ifndef _VMM_VTIMER_H_
	+#define _VMM_VTIMER_H_
	+
	+#define GT_PHYS_NS_IRQ 30
	+#define GT_VIRT_IRQ 27
	+
	+struct hyp;
	+struct hypctx;
	+
	+struct vtimer {
	+ uint64_t cnthctl_el2;
	+ uint64_t cntvoff_el2;
	+};
	+
	+struct vtimer_timer {
	+ struct callout callout;
	+ struct mtx mtx;
	+
	+ uint32_t irqid;
	+
	+ /*
	+ * These registers are either emulated for the physical timer, or
	+ * the guest has full access to them for the virtual timer.
	+
	+ * CNTx_CTL_EL0: Counter-timer Timer Control Register
	+ * CNTx_CVAL_EL0: Counter-timer Timer CompareValue Register
	+ */
	+ uint64_t cntx_cval_el0;
	+ uint64_t cntx_ctl_el0;
	+};
	+
	+struct vtimer_cpu {
	+ struct vtimer_timer phys_timer;
	+ struct vtimer_timer virt_timer;
	+
	+ uint32_t cntkctl_el1;
	+};
	+
	+int vtimer_init(uint64_t cnthctl_el2);
	+void vtimer_vminit(struct hyp *);
	+void vtimer_cpuinit(struct hypctx *);
	+void vtimer_cpucleanup(struct hypctx *);
	+void vtimer_vmcleanup(struct hyp *);
	+void vtimer_cleanup(void);
	+void vtimer_sync_hwstate(struct hypctx *hypctx);
	+
	+int vtimer_phys_ctl_read(struct vcpu vcpu, uint64_t rval, void *arg);
	+int vtimer_phys_ctl_write(struct vcpu vcpu, uint64_t wval, void arg);
	+int vtimer_phys_cnt_read(struct vcpu vcpu, uint64_t rval, void *arg);
	+int vtimer_phys_cnt_write(struct vcpu vcpu, uint64_t wval, void arg);
	+int vtimer_phys_cval_read(struct vcpu vcpu, uint64_t rval, void *arg);
	+int vtimer_phys_cval_write(struct vcpu vcpu, uint64_t wval, void arg);
	+int vtimer_phys_tval_read(struct vcpu vcpu, uint64_t rval, void *arg);
	+int vtimer_phys_tval_write(struct vcpu vcpu, uint64_t wval, void arg);
	+#endif
	diff --git a/sys/arm64/vmm/io/vtimer.c b/sys/arm64/vmm/io/vtimer.c
	new file mode 100644
	--- /dev/null
	+++ b/sys/arm64/vmm/io/vtimer.c
	@@ -0,0 +1,503 @@
	+/*-
	+ * SPDX-License-Identifier: BSD-2-Clause
	+ *
	+ * Copyright (c) 2017 The FreeBSD Foundation
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ * 3. The name of the company nor the name of the author may be used to
	+ * endorse or promote products derived from this software without specific
	+ * prior written permission.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#include <sys/cdefs.h>
	+#include <sys/types.h>
	+#include <sys/systm.h>
	+#include <sys/bus.h>
	+#include <sys/kernel.h>
	+#include <sys/module.h>
	+#include <sys/mutex.h>
	+#include <sys/rman.h>
	+#include <sys/time.h>
	+#include <sys/timeet.h>
	+#include <sys/timetc.h>
	+
	+#include <machine/bus.h>
	+#include <machine/machdep.h>
	+#include <machine/vmm.h>
	+#include <machine/armreg.h>
	+
	+#include <arm64/vmm/arm64.h>
	+
	+#include "vgic.h"
	+#include "vtimer.h"
	+
	+#define RES1 0xffffffffffffffffUL
	+
	+#define timer_enabled(ctl) \
	+ (!((ctl) & CNTP_CTL_IMASK) && ((ctl) & CNTP_CTL_ENABLE))
	+
	+static uint64_t cnthctl_el2_reg;
	+static uint32_t tmr_frq;
	+
	+#define timer_condition_met(ctl) ((ctl) & CNTP_CTL_ISTATUS)
	+
	+static void vtimer_schedule_irq(struct hypctx *hypctx, bool phys);
	+
	+static int
	+vtimer_virtual_timer_intr(void *arg)
	+{
	+ struct hypctx *hypctx;
	+ uint64_t cntpct_el0;
	+ uint32_t cntv_ctl;
	+
	+ hypctx = arm64_get_active_vcpu();
	+ cntv_ctl = READ_SPECIALREG(cntv_ctl_el0);
	+
	+ if (!hypctx) {
	+ /* vm_destroy() was called. */
	+ eprintf("No active vcpu\n");
	+ cntv_ctl = READ_SPECIALREG(cntv_ctl_el0);
	+ goto out;
	+ }
	+ if (!timer_enabled(cntv_ctl)) {
	+ eprintf("Timer not enabled\n");
	+ goto out;
	+ }
	+ if (!timer_condition_met(cntv_ctl)) {
	+ eprintf("Timer condition not met\n");
	+ goto out;
	+ }
	+
	+ cntpct_el0 = READ_SPECIALREG(cntpct_el0) -
	+ hypctx->hyp->vtimer.cntvoff_el2;
	+ if (hypctx->vtimer_cpu.virt_timer.cntx_cval_el0 < cntpct_el0)
	+ vgic_inject_irq(hypctx->hyp, vcpu_vcpuid(hypctx->vcpu),
	+ GT_VIRT_IRQ, true);
	+
	+ cntv_ctl = hypctx->vtimer_cpu.virt_timer.cntx_ctl_el0;
	+
	+out:
	+ /*
	+ * Disable the timer interrupt. This will prevent the interrupt from
	+ * being reasserted as soon as we exit the handler and getting stuck
	+ * in an infinite loop.
	+ *
	+ * This is safe to do because the guest disabled the timer, and then
	+ * enables it as part of the interrupt handling routine.
	+ */
	+ cntv_ctl &= ~CNTP_CTL_ENABLE;
	+ WRITE_SPECIALREG(cntv_ctl_el0, cntv_ctl);
	+
	+ return (FILTER_HANDLED);
	+}
	+
	+int
	+vtimer_init(uint64_t cnthctl_el2)
	+{
	+ cnthctl_el2_reg = cnthctl_el2;
	+ /*
	+ * The guest MUST use the same timer frequency as the host. The
	+ * register CNTFRQ_EL0 is accessible to the guest and a different value
	+ * in the guest dts file might have unforseen consequences.
	+ */
	+ tmr_frq = READ_SPECIALREG(cntfrq_el0);
	+
	+ return (0);
	+}
	+
	+void
	+vtimer_vminit(struct hyp *hyp)
	+{
	+ uint64_t now;
	+
	+ /*
	+ * Configure the Counter-timer Hypervisor Control Register for the VM.
	+ *
	+ * CNTHCTL_EL1PCEN: trap access to CNTP_{CTL, CVAL, TVAL}_EL0 from EL1
	+ * CNTHCTL_EL1PCTEN: trap access to CNTPCT_EL0
	+ */
	+ hyp->vtimer.cnthctl_el2 = cnthctl_el2_reg & ~CNTHCTL_EL1PCEN;
	+ hyp->vtimer.cnthctl_el2 &= ~CNTHCTL_EL1PCTEN;
	+
	+ now = READ_SPECIALREG(cntpct_el0);
	+ hyp->vtimer.cntvoff_el2 = now;
	+
	+ return;
	+}
	+
	+void
	+vtimer_cpuinit(struct hypctx *hypctx)
	+{
	+ struct vtimer_cpu *vtimer_cpu;
	+
	+ vtimer_cpu = &hypctx->vtimer_cpu;
	+ /*
	+ * Configure physical timer interrupts for the VCPU.
	+ *
	+ * CNTP_CTL_IMASK: mask interrupts
	+ * ~CNTP_CTL_ENABLE: disable the timer
	+ */
	+ vtimer_cpu->phys_timer.cntx_ctl_el0 = CNTP_CTL_IMASK & ~CNTP_CTL_ENABLE;
	+
	+ mtx_init(&vtimer_cpu->phys_timer.mtx, "vtimer phys callout mutex", NULL,
	+ MTX_DEF);
	+ callout_init_mtx(&vtimer_cpu->phys_timer.callout,
	+ &vtimer_cpu->phys_timer.mtx, 0);
	+ vtimer_cpu->phys_timer.irqid = GT_PHYS_NS_IRQ;
	+
	+ mtx_init(&vtimer_cpu->virt_timer.mtx, "vtimer virt callout mutex", NULL,
	+ MTX_DEF);
	+ callout_init_mtx(&vtimer_cpu->virt_timer.callout,
	+ &vtimer_cpu->virt_timer.mtx, 0);
	+ vtimer_cpu->virt_timer.irqid = GT_VIRT_IRQ;
	+}
	+
	+void
	+vtimer_cpucleanup(struct hypctx *hypctx)
	+{
	+ struct vtimer_cpu *vtimer_cpu;
	+
	+ vtimer_cpu = &hypctx->vtimer_cpu;
	+ callout_drain(&vtimer_cpu->phys_timer.callout);
	+ callout_drain(&vtimer_cpu->virt_timer.callout);
	+ mtx_destroy(&vtimer_cpu->phys_timer.mtx);
	+ mtx_destroy(&vtimer_cpu->virt_timer.mtx);
	+}
	+
	+void
	+vtimer_vmcleanup(struct hyp *hyp)
	+{
	+ struct hypctx *hypctx;
	+ uint32_t cntv_ctl;
	+
	+ hypctx = arm64_get_active_vcpu();
	+ if (!hypctx) {
	+ /* The active VM was destroyed, stop the timer. */
	+ cntv_ctl = READ_SPECIALREG(cntv_ctl_el0);
	+ cntv_ctl &= ~CNTP_CTL_ENABLE;
	+ WRITE_SPECIALREG(cntv_ctl_el0, cntv_ctl);
	+ }
	+}
	+
	+void
	+vtimer_cleanup(void)
	+{
	+}
	+
	+void
	+vtimer_sync_hwstate(struct hypctx *hypctx)
	+{
	+ struct vtimer_timer *timer;
	+ uint64_t cntpct_el0;
	+
	+ timer = &hypctx->vtimer_cpu.virt_timer;
	+ cntpct_el0 = READ_SPECIALREG(cntpct_el0) -
	+ hypctx->hyp->vtimer.cntvoff_el2;
	+ if (!timer_enabled(timer->cntx_ctl_el0)) {
	+ vgic_inject_irq(hypctx->hyp, vcpu_vcpuid(hypctx->vcpu),
	+ timer->irqid, false);
	+ } else if (timer->cntx_cval_el0 < cntpct_el0) {
	+ vgic_inject_irq(hypctx->hyp, vcpu_vcpuid(hypctx->vcpu),
	+ timer->irqid, true);
	+ } else {
	+ vgic_inject_irq(hypctx->hyp, vcpu_vcpuid(hypctx->vcpu),
	+ timer->irqid, false);
	+ vtimer_schedule_irq(hypctx, false);
	+ }
	+}
	+
	+static void
	+vtimer_inject_irq_callout_phys(void *context)
	+{
	+ struct hypctx *hypctx;
	+
	+ hypctx = context;
	+ vgic_inject_irq(hypctx->hyp, vcpu_vcpuid(hypctx->vcpu),
	+ hypctx->vtimer_cpu.phys_timer.irqid, true);
	+}
	+
	+static void
	+vtimer_inject_irq_callout_virt(void *context)
	+{
	+ struct hypctx *hypctx;
	+
	+ hypctx = context;
	+ vgic_inject_irq(hypctx->hyp, vcpu_vcpuid(hypctx->vcpu),
	+ hypctx->vtimer_cpu.virt_timer.irqid, true);
	+}
	+
	+static void
	+vtimer_schedule_irq(struct hypctx *hypctx, bool phys)
	+{
	+ sbintime_t time;
	+ struct vtimer_timer *timer;
	+ uint64_t cntpct_el0;
	+ uint64_t diff;
	+
	+ if (phys)
	+ timer = &hypctx->vtimer_cpu.phys_timer;
	+ else
	+ timer = &hypctx->vtimer_cpu.virt_timer;
	+ cntpct_el0 = READ_SPECIALREG(cntpct_el0) -
	+ hypctx->hyp->vtimer.cntvoff_el2;
	+ if (timer->cntx_cval_el0 < cntpct_el0) {
	+ /* Timer set in the past, trigger interrupt */
	+ vgic_inject_irq(hypctx->hyp, vcpu_vcpuid(hypctx->vcpu),
	+ timer->irqid, true);
	+ } else {
	+ diff = timer->cntx_cval_el0 - cntpct_el0;
	+ time = diff * SBT_1S / tmr_frq;
	+ if (phys)
	+ callout_reset_sbt(&timer->callout, time, 0,
	+ vtimer_inject_irq_callout_phys, hypctx, 0);
	+ else
	+ callout_reset_sbt(&timer->callout, time, 0,
	+ vtimer_inject_irq_callout_virt, hypctx, 0);
	+ }
	+}
	+
	+static void
	+vtimer_remove_irq(struct hypctx hypctx, struct vcpu vcpu)
	+{
	+ struct vtimer_cpu *vtimer_cpu;
	+ struct vtimer_timer *timer;
	+
	+ vtimer_cpu = &hypctx->vtimer_cpu;
	+ timer = &vtimer_cpu->phys_timer;
	+
	+ callout_drain(&timer->callout);
	+ /*
	+ * The interrupt needs to be deactivated here regardless of the callout
	+ * function having been executed. The timer interrupt can be masked with
	+ * the CNTP_CTL_EL0.IMASK bit instead of reading the IAR register.
	+ * Masking the interrupt doesn't remove it from the list registers.
	+ */
	+ vgic_inject_irq(hypctx->hyp, vcpu_vcpuid(vcpu), timer->irqid, false);
	+}
	+
	+/*
	+ * Timer emulation functions.
	+ *
	+ * The guest should use the virtual timer, however some software, e.g. u-boot,
	+ * used the physical timer. Emulate this in software for the guest to use.
	+ *
	+ * Adjust for cntvoff_el2 so the physical and virtual timers are at similar
	+ * times. This simplifies interrupt handling in the virtual timer as the
	+ * adjustment will have already happened.
	+ */
	+
	+int
	+vtimer_phys_ctl_read(struct vcpu vcpu, uint64_t rval, void *arg)
	+{
	+ struct hyp *hyp;
	+ struct hypctx *hypctx;
	+ struct vtimer_cpu *vtimer_cpu;
	+ uint64_t cntpct_el0;
	+
	+ hypctx = vcpu_get_cookie(vcpu);
	+ hyp = hypctx->hyp;
	+ vtimer_cpu = &hypctx->vtimer_cpu;
	+
	+ cntpct_el0 = READ_SPECIALREG(cntpct_el0) - hyp->vtimer.cntvoff_el2;
	+ if (vtimer_cpu->phys_timer.cntx_cval_el0 < cntpct_el0)
	+ /* Timer condition met */
	+ *rval = vtimer_cpu->phys_timer.cntx_ctl_el0 \| CNTP_CTL_ISTATUS;
	+ else
	+ *rval = vtimer_cpu->phys_timer.cntx_ctl_el0 & ~CNTP_CTL_ISTATUS;
	+
	+ return (0);
	+}
	+
	+int
	+vtimer_phys_ctl_write(struct vcpu vcpu, uint64_t wval, void arg)
	+{
	+ struct hypctx *hypctx;
	+ struct vtimer_cpu *vtimer_cpu;
	+ uint64_t ctl_el0;
	+ bool timer_toggled_on;
	+
	+ hypctx = vcpu_get_cookie(vcpu);
	+ vtimer_cpu = &hypctx->vtimer_cpu;
	+
	+ timer_toggled_on = false;
	+ ctl_el0 = vtimer_cpu->phys_timer.cntx_ctl_el0;
	+
	+ if (!timer_enabled(ctl_el0) && timer_enabled(wval))
	+ timer_toggled_on = true;
	+ else if (timer_enabled(ctl_el0) && !timer_enabled(wval))
	+ vtimer_remove_irq(hypctx, vcpu);
	+
	+ vtimer_cpu->phys_timer.cntx_ctl_el0 = wval;
	+
	+ if (timer_toggled_on)
	+ vtimer_schedule_irq(hypctx, true);
	+
	+ return (0);
	+}
	+
	+int
	+vtimer_phys_cnt_read(struct vcpu vcpu, uint64_t rval, void *arg)
	+{
	+ struct vm *vm;
	+ struct hyp *hyp;
	+
	+ vm = vcpu_vm(vcpu);
	+ hyp = vm_get_cookie(vm);
	+ *rval = READ_SPECIALREG(cntpct_el0) - hyp->vtimer.cntvoff_el2;
	+ return (0);
	+}
	+
	+int
	+vtimer_phys_cnt_write(struct vcpu vcpu, uint64_t wval, void arg)
	+{
	+ return (0);
	+}
	+
	+int
	+vtimer_phys_cval_read(struct vcpu vcpu, uint64_t rval, void *arg)
	+{
	+ struct hypctx *hypctx;
	+ struct vtimer_cpu *vtimer_cpu;
	+
	+ hypctx = vcpu_get_cookie(vcpu);
	+ vtimer_cpu = &hypctx->vtimer_cpu;
	+
	+ *rval = vtimer_cpu->phys_timer.cntx_cval_el0;
	+
	+ return (0);
	+}
	+
	+int
	+vtimer_phys_cval_write(struct vcpu vcpu, uint64_t wval, void arg)
	+{
	+ struct hypctx *hypctx;
	+ struct vtimer_cpu *vtimer_cpu;
	+
	+ hypctx = vcpu_get_cookie(vcpu);
	+ vtimer_cpu = &hypctx->vtimer_cpu;
	+
	+ vtimer_cpu->phys_timer.cntx_cval_el0 = wval;
	+
	+ vtimer_remove_irq(hypctx, vcpu);
	+ if (timer_enabled(vtimer_cpu->phys_timer.cntx_ctl_el0)) {
	+ vtimer_schedule_irq(hypctx, true);
	+ }
	+
	+ return (0);
	+}
	+
	+int
	+vtimer_phys_tval_read(struct vcpu vcpu, uint64_t rval, void *arg)
	+{
	+ struct hyp *hyp;
	+ struct hypctx *hypctx;
	+ struct vtimer_cpu *vtimer_cpu;
	+ uint32_t cntpct_el0;
	+
	+ hypctx = vcpu_get_cookie(vcpu);
	+ hyp = hypctx->hyp;
	+ vtimer_cpu = &hypctx->vtimer_cpu;
	+
	+ if (!(vtimer_cpu->phys_timer.cntx_ctl_el0 & CNTP_CTL_ENABLE)) {
	+ /*
	+ * ARMv8 Architecture Manual, p. D7-2702: the result of reading
	+ * TVAL when the timer is disabled is UNKNOWN. I have chosen to
	+ * return the maximum value possible on 32 bits which means the
	+ * timer will fire very far into the future.
	+ */
	+ *rval = (uint32_t)RES1;
	+ } else {
	+ cntpct_el0 = READ_SPECIALREG(cntpct_el0) -
	+ hyp->vtimer.cntvoff_el2;
	+ *rval = vtimer_cpu->phys_timer.cntx_cval_el0 - cntpct_el0;
	+ }
	+
	+ return (0);
	+}
	+
	+int
	+vtimer_phys_tval_write(struct vcpu vcpu, uint64_t wval, void arg)
	+{
	+ struct hyp *hyp;
	+ struct hypctx *hypctx;
	+ struct vtimer_cpu *vtimer_cpu;
	+ uint64_t cntpct_el0;
	+
	+ hypctx = vcpu_get_cookie(vcpu);
	+ hyp = hypctx->hyp;
	+ vtimer_cpu = &hypctx->vtimer_cpu;
	+
	+ cntpct_el0 = READ_SPECIALREG(cntpct_el0) - hyp->vtimer.cntvoff_el2;
	+ vtimer_cpu->phys_timer.cntx_cval_el0 = (int32_t)wval + cntpct_el0;
	+
	+ vtimer_remove_irq(hypctx, vcpu);
	+ if (timer_enabled(vtimer_cpu->phys_timer.cntx_ctl_el0)) {
	+ vtimer_schedule_irq(hypctx, true);
	+ }
	+
	+ return (0);
	+}
	+
	+struct vtimer_softc {
	+ struct resource *res;
	+ void *ihl;
	+ int rid;
	+};
	+
	+static int
	+vtimer_probe(device_t dev)
	+{
	+ device_set_desc(dev, "Virtual timer");
	+ return (BUS_PROBE_DEFAULT);
	+}
	+
	+static int
	+vtimer_attach(device_t dev)
	+{
	+ struct vtimer_softc *sc;
	+
	+ sc = device_get_softc(dev);
	+
	+ sc->rid = 0;
	+ sc->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &sc->rid, RF_ACTIVE);
	+ if (sc->res == NULL)
	+ return (ENXIO);
	+
	+ bus_setup_intr(dev, sc->res, INTR_TYPE_CLK, vtimer_virtual_timer_intr,
	+ NULL, NULL, &sc->ihl);
	+
	+ return (0);
	+}
	+
	+static device_method_t vtimer_methods[] = {
	+ /* Device interface */
	+ DEVMETHOD(device_probe, vtimer_probe),
	+ DEVMETHOD(device_attach, vtimer_attach),
	+
	+ /* End */
	+ DEVMETHOD_END
	+};
	+
	+DEFINE_CLASS_0(vtimer, vtimer_driver, vtimer_methods,
	+ sizeof(struct vtimer_softc));
	+
	+DRIVER_MODULE(vtimer, generic_timer, vtimer_driver, 0, 0);
	diff --git a/sys/arm64/vmm/mmu.h b/sys/arm64/vmm/mmu.h
	new file mode 100644
	--- /dev/null
	+++ b/sys/arm64/vmm/mmu.h
	@@ -0,0 +1,52 @@
	+/*-
	+ * SPDX-License-Identifier: BSD-2-Clause
	+ *
	+ * Copyright (C) 2017 Alexandru Elisei <alexandru.elisei@gmail.com>
	+ *
	+ * This software was developed by Alexandru Elisei under sponsorship
	+ * from the FreeBSD Foundation.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#ifndef _VMM_MMU_H_
	+#define _VMM_MMU_H_
	+
	+#include <machine/machdep.h>
	+#include <machine/vmparam.h>
	+#include <machine/vmm.h>
	+
	+#include "hyp.h"
	+
	+extern char vmm_hyp_code;
	+extern char vmm_hyp_code_end;
	+
	+extern char _vmm_start;
	+extern char _vmm_end;
	+
	+bool vmmpmap_init(void);
	+void vmmpmap_fini(void);
	+uint64_t vmmpmap_to_ttbr0(void);
	+bool vmmpmap_enter(vm_offset_t, vm_size_t, vm_paddr_t, vm_prot_t);
	+void vmmpmap_remove(vm_offset_t, vm_size_t, bool);
	+
	+#endif
	diff --git a/sys/arm64/vmm/reset.h b/sys/arm64/vmm/reset.h
	new file mode 100644
	--- /dev/null
	+++ b/sys/arm64/vmm/reset.h
	@@ -0,0 +1,33 @@
	+/*-
	+ * SPDX-License-Identifier: BSD-2-Clause
	+ *
	+ * Copyright (C) 2018 Alexandru Elisei <alexandru.elisei@gmail.com>
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+#ifndef _VMM_RESET_H_
	+#define _VMM_RESET_H_
	+
	+void reset_vm_el01_regs(void *vcpu);
	+void reset_vm_el2_regs(void *vcpu);
	+
	+#endif
	diff --git a/sys/arm64/vmm/vmm.c b/sys/arm64/vmm/vmm.c
	new file mode 100644
	--- /dev/null
	+++ b/sys/arm64/vmm/vmm.c
	@@ -0,0 +1,1803 @@
	+/*-
	+ * SPDX-License-Identifier: BSD-2-Clause
	+ *
	+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#include <sys/param.h>
	+#include <sys/systm.h>
	+#include <sys/cpuset.h>
	+#include <sys/kernel.h>
	+#include <sys/linker.h>
	+#include <sys/lock.h>
	+#include <sys/malloc.h>
	+#include <sys/module.h>
	+#include <sys/mutex.h>
	+#include <sys/pcpu.h>
	+#include <sys/proc.h>
	+#include <sys/queue.h>
	+#include <sys/rwlock.h>
	+#include <sys/sched.h>
	+#include <sys/smp.h>
	+#include <sys/sysctl.h>
	+
	+#include <vm/vm.h>
	+#include <vm/vm_object.h>
	+#include <vm/vm_page.h>
	+#include <vm/pmap.h>
	+#include <vm/vm_map.h>
	+#include <vm/vm_extern.h>
	+#include <vm/vm_param.h>
	+
	+#include <machine/armreg.h>
	+#include <machine/cpu.h>
	+#include <machine/fpu.h>
	+#include <machine/machdep.h>
	+#include <machine/pcb.h>
	+#include <machine/smp.h>
	+#include <machine/vm.h>
	+#include <machine/vmparam.h>
	+#include <machine/vmm.h>
	+#include <machine/vmm_dev.h>
	+#include <machine/vmm_instruction_emul.h>
	+
	+#include <dev/pci/pcireg.h>
	+
	+#include "vmm_ktr.h"
	+#include "vmm_stat.h"
	+#include "arm64.h"
	+#include "mmu.h"
	+
	+#include "io/vgic.h"
	+#include "io/vtimer.h"
	+
	+struct vcpu {
	+ int flags;
	+ enum vcpu_state state;
	+ struct mtx mtx;
	+ int hostcpu; /* host cpuid this vcpu last ran on */
	+ int vcpuid;
	+ void *stats;
	+ struct vm_exit exitinfo;
	+ uint64_t nextpc; /* (x) next instruction to execute */
	+ struct vm vm; / (o) */
	+ void cookie; / (i) cpu-specific data */
	+ struct vfpstate guestfpu; / (a,i) guest fpu state */
	+};
	+
	+#define vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx))
	+#define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN)
	+#define vcpu_lock_destroy(v) mtx_destroy(&((v)->mtx))
	+#define vcpu_lock(v) mtx_lock_spin(&((v)->mtx))
	+#define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx))
	+#define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED)
	+
	+struct mem_seg {
	+ uint64_t gpa;
	+ size_t len;
	+ bool wired;
	+ bool sysmem;
	+ vm_object_t object;
	+};
	+#define VM_MAX_MEMSEGS 3
	+
	+struct mem_map {
	+ vm_paddr_t gpa;
	+ size_t len;
	+ vm_ooffset_t segoff;
	+ int segid;
	+ int prot;
	+ int flags;
	+};
	+#define VM_MAX_MEMMAPS 4
	+
	+struct vmm_mmio_region {
	+ uint64_t start;
	+ uint64_t end;
	+ mem_region_read_t read;
	+ mem_region_write_t write;
	+};
	+#define VM_MAX_MMIO_REGIONS 4
	+
	+struct vmm_special_reg {
	+ uint32_t esr_iss;
	+ uint32_t esr_mask;
	+ reg_read_t reg_read;
	+ reg_write_t reg_write;
	+ void *arg;
	+};
	+#define VM_MAX_SPECIAL_REGS 16
	+
	+/*
	+ * Initialization:
	+ * (o) initialized the first time the VM is created
	+ * (i) initialized when VM is created and when it is reinitialized
	+ * (x) initialized before use
	+ */
	+struct vm {
	+ void cookie; / (i) cpu-specific data */
	+ volatile cpuset_t active_cpus; /* (i) active vcpus */
	+ volatile cpuset_t debug_cpus; /* (i) vcpus stopped for debug */
	+ int suspend; /* (i) stop VM execution */
	+ volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */
	+ volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */
	+ struct mem_map mem_maps[VM_MAX_MEMMAPS]; /* (i) guest address space */
	+ struct mem_seg mem_segs[VM_MAX_MEMSEGS]; /* (o) guest memory regions */
	+ struct vmspace vmspace; / (o) guest's address space */
	+ char name[VM_MAX_NAMELEN]; /* (o) virtual machine name */
	+ struct vcpu *vcpu; / (i) guest vcpus */
	+ struct vmm_mmio_region mmio_region[VM_MAX_MMIO_REGIONS];
	+ /* (o) guest MMIO regions */
	+ struct vmm_special_reg special_reg[VM_MAX_SPECIAL_REGS];
	+ /* The following describe the vm cpu topology */
	+ uint16_t sockets; /* (o) num of sockets */
	+ uint16_t cores; /* (o) num of cores/socket */
	+ uint16_t threads; /* (o) num of threads/core */
	+ uint16_t maxcpus; /* (o) max pluggable cpus */
	+ struct sx mem_segs_lock; /* (o) */
	+ struct sx vcpus_init_lock; /* (o) */
	+};
	+
	+static bool vmm_initialized = false;
	+
	+static int vm_handle_wfi(struct vcpu *vcpu,
	+ struct vm_exit vme, bool retu);
	+
	+static MALLOC_DEFINE(M_VMM, "vmm", "vmm");
	+
	+/* statistics */
	+static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime");
	+
	+SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL);
	+
	+static int vmm_ipinum;
	+SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0,
	+ "IPI vector used for vcpu notifications");
	+
	+struct vmm_regs {
	+ uint64_t id_aa64afr0;
	+ uint64_t id_aa64afr1;
	+ uint64_t id_aa64dfr0;
	+ uint64_t id_aa64dfr1;
	+ uint64_t id_aa64isar0;
	+ uint64_t id_aa64isar1;
	+ uint64_t id_aa64isar2;
	+ uint64_t id_aa64mmfr0;
	+ uint64_t id_aa64mmfr1;
	+ uint64_t id_aa64mmfr2;
	+ uint64_t id_aa64pfr0;
	+ uint64_t id_aa64pfr1;
	+};
	+
	+static const struct vmm_regs vmm_arch_regs_masks = {
	+ .id_aa64dfr0 =
	+ ID_AA64DFR0_CTX_CMPs_MASK \|
	+ ID_AA64DFR0_WRPs_MASK \|
	+ ID_AA64DFR0_BRPs_MASK \|
	+ ID_AA64DFR0_PMUVer_3 \|
	+ ID_AA64DFR0_DebugVer_8,
	+ .id_aa64isar0 =
	+ ID_AA64ISAR0_TLB_TLBIOSR \|
	+ ID_AA64ISAR0_SHA3_IMPL \|
	+ ID_AA64ISAR0_RDM_IMPL \|
	+ ID_AA64ISAR0_Atomic_IMPL \|
	+ ID_AA64ISAR0_CRC32_BASE \|
	+ ID_AA64ISAR0_SHA2_512 \|
	+ ID_AA64ISAR0_SHA1_BASE \|
	+ ID_AA64ISAR0_AES_PMULL,
	+ .id_aa64mmfr0 =
	+ ID_AA64MMFR0_TGran4_IMPL \|
	+ ID_AA64MMFR0_TGran64_IMPL \|
	+ ID_AA64MMFR0_TGran16_IMPL \|
	+ ID_AA64MMFR0_ASIDBits_16 \|
	+ ID_AA64MMFR0_PARange_4P,
	+ .id_aa64mmfr1 =
	+ ID_AA64MMFR1_SpecSEI_IMPL \|
	+ ID_AA64MMFR1_PAN_ATS1E1 \|
	+ ID_AA64MMFR1_HAFDBS_AF,
	+ .id_aa64pfr0 =
	+ ID_AA64PFR0_GIC_CPUIF_NONE \|
	+ ID_AA64PFR0_AdvSIMD_HP \|
	+ ID_AA64PFR0_FP_HP \|
	+ ID_AA64PFR0_EL3_64 \|
	+ ID_AA64PFR0_EL2_64 \|
	+ ID_AA64PFR0_EL1_64 \|
	+ ID_AA64PFR0_EL0_64,
	+};
	+
	+/* Host registers masked by vmm_arch_regs_masks. */
	+static struct vmm_regs vmm_arch_regs;
	+
	+u_int vm_maxcpu;
	+SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN \| CTLFLAG_NOFETCH,
	+ &vm_maxcpu, 0, "Maximum number of vCPUs");
	+
	+static void vm_free_memmap(struct vm *vm, int ident);
	+static bool sysmem_mapping(struct vm vm, struct mem_map mm);
	+static void vcpu_notify_event_locked(struct vcpu *vcpu);
	+
	+/*
	+ * Upper limit on vm_maxcpu. We could increase this to 28 bits, but this
	+ * is a safe value for now.
	+ */
	+#define VM_MAXCPU MIN(0xffff - 1, CPU_SETSIZE)
	+
	+static int
	+vmm_regs_init(struct vmm_regs regs, const struct vmm_regs masks)
	+{
	+#define _FETCH_KERN_REG(reg, field) do { \
	+ regs->field = vmm_arch_regs_masks.field; \
	+ if (!get_kernel_reg_masked(reg, &regs->field, masks->field)) \
	+ regs->field = 0; \
	+} while (0)
	+ _FETCH_KERN_REG(ID_AA64AFR0_EL1, id_aa64afr0);
	+ _FETCH_KERN_REG(ID_AA64AFR1_EL1, id_aa64afr1);
	+ _FETCH_KERN_REG(ID_AA64DFR0_EL1, id_aa64dfr0);
	+ _FETCH_KERN_REG(ID_AA64DFR1_EL1, id_aa64dfr1);
	+ _FETCH_KERN_REG(ID_AA64ISAR0_EL1, id_aa64isar0);
	+ _FETCH_KERN_REG(ID_AA64ISAR1_EL1, id_aa64isar1);
	+ _FETCH_KERN_REG(ID_AA64ISAR2_EL1, id_aa64isar2);
	+ _FETCH_KERN_REG(ID_AA64MMFR0_EL1, id_aa64mmfr0);
	+ _FETCH_KERN_REG(ID_AA64MMFR1_EL1, id_aa64mmfr1);
	+ _FETCH_KERN_REG(ID_AA64MMFR2_EL1, id_aa64mmfr2);
	+ _FETCH_KERN_REG(ID_AA64PFR0_EL1, id_aa64pfr0);
	+ _FETCH_KERN_REG(ID_AA64PFR1_EL1, id_aa64pfr1);
	+#undef _FETCH_KERN_REG
	+ return (0);
	+}
	+
	+static void
	+vcpu_cleanup(struct vcpu *vcpu, bool destroy)
	+{
	+ vmmops_vcpu_cleanup(vcpu->cookie);
	+ vcpu->cookie = NULL;
	+ if (destroy) {
	+ vmm_stat_free(vcpu->stats);
	+ fpu_save_area_free(vcpu->guestfpu);
	+ vcpu_lock_destroy(vcpu);
	+ }
	+}
	+
	+static struct vcpu *
	+vcpu_alloc(struct vm *vm, int vcpu_id)
	+{
	+ struct vcpu *vcpu;
	+
	+ KASSERT(vcpu_id >= 0 && vcpu_id < vm->maxcpus,
	+ ("vcpu_alloc: invalid vcpu %d", vcpu_id));
	+
	+ vcpu = malloc(sizeof(*vcpu), M_VMM, M_WAITOK \| M_ZERO);
	+ vcpu_lock_init(vcpu);
	+ vcpu->state = VCPU_IDLE;
	+ vcpu->hostcpu = NOCPU;
	+ vcpu->vcpuid = vcpu_id;
	+ vcpu->vm = vm;
	+ vcpu->guestfpu = fpu_save_area_alloc();
	+ vcpu->stats = vmm_stat_alloc();
	+ return (vcpu);
	+}
	+
	+static void
	+vcpu_init(struct vcpu *vcpu)
	+{
	+ vcpu->cookie = vmmops_vcpu_init(vcpu->vm->cookie, vcpu, vcpu->vcpuid);
	+ MPASS(vcpu->cookie != NULL);
	+ fpu_save_area_reset(vcpu->guestfpu);
	+ vmm_stat_init(vcpu->stats);
	+}
	+
	+struct vm_exit *
	+vm_exitinfo(struct vcpu *vcpu)
	+{
	+ return (&vcpu->exitinfo);
	+}
	+
	+static int
	+vmm_init(void)
	+{
	+ int error;
	+
	+ vm_maxcpu = mp_ncpus;
	+ TUNABLE_INT_FETCH("hw.vmm.maxcpu", &vm_maxcpu);
	+
	+ if (vm_maxcpu > VM_MAXCPU) {
	+ printf("vmm: vm_maxcpu clamped to %u\n", VM_MAXCPU);
	+ vm_maxcpu = VM_MAXCPU;
	+ }
	+ if (vm_maxcpu == 0)
	+ vm_maxcpu = 1;
	+
	+ error = vmm_regs_init(&vmm_arch_regs, &vmm_arch_regs_masks);
	+ if (error != 0)
	+ return (error);
	+
	+ return (vmmops_modinit(0));
	+}
	+
	+static int
	+vmm_handler(module_t mod, int what, void *arg)
	+{
	+ int error;
	+
	+ switch (what) {
	+ case MOD_LOAD:
	+ /* TODO: if (vmm_is_hw_supported()) { */
	+ vmmdev_init();
	+ error = vmm_init();
	+ if (error == 0)
	+ vmm_initialized = true;
	+ break;
	+ case MOD_UNLOAD:
	+ /* TODO: if (vmm_is_hw_supported()) { */
	+ error = vmmdev_cleanup();
	+ if (error == 0 && vmm_initialized) {
	+ error = vmmops_modcleanup();
	+ if (error)
	+ vmm_initialized = false;
	+ }
	+ break;
	+ default:
	+ error = 0;
	+ break;
	+ }
	+ return (error);
	+}
	+
	+static moduledata_t vmm_kmod = {
	+ "vmm",
	+ vmm_handler,
	+ NULL
	+};
	+
	+/*
	+ * vmm initialization has the following dependencies:
	+ *
	+ * - HYP initialization requires smp_rendezvous() and therefore must happen
	+ * after SMP is fully functional (after SI_SUB_SMP).
	+ */
	+DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_SMP + 1, SI_ORDER_ANY);
	+MODULE_VERSION(vmm, 1);
	+
	+static void
	+vm_init(struct vm *vm, bool create)
	+{
	+ int i;
	+
	+ vm->cookie = vmmops_init(vm, vmspace_pmap(vm->vmspace));
	+ MPASS(vm->cookie != NULL);
	+
	+ CPU_ZERO(&vm->active_cpus);
	+ CPU_ZERO(&vm->debug_cpus);
	+
	+ vm->suspend = 0;
	+ CPU_ZERO(&vm->suspended_cpus);
	+
	+ memset(vm->mmio_region, 0, sizeof(vm->mmio_region));
	+ memset(vm->special_reg, 0, sizeof(vm->special_reg));
	+
	+ if (!create) {
	+ for (i = 0; i < vm->maxcpus; i++) {
	+ if (vm->vcpu[i] != NULL)
	+ vcpu_init(vm->vcpu[i]);
	+ }
	+ }
	+}
	+
	+struct vcpu *
	+vm_alloc_vcpu(struct vm *vm, int vcpuid)
	+{
	+ struct vcpu *vcpu;
	+
	+ if (vcpuid < 0 \|\| vcpuid >= vm_get_maxcpus(vm))
	+ return (NULL);
	+
	+ /* Some interrupt controllers may have a CPU limit */
	+ if (vcpuid >= vgic_max_cpu_count(vm->cookie))
	+ return (NULL);
	+
	+ vcpu = atomic_load_ptr(&vm->vcpu[vcpuid]);
	+ if (__predict_true(vcpu != NULL))
	+ return (vcpu);
	+
	+ sx_xlock(&vm->vcpus_init_lock);
	+ vcpu = vm->vcpu[vcpuid];
	+ if (vcpu == NULL/* && !vm->dying*/) {
	+ vcpu = vcpu_alloc(vm, vcpuid);
	+ vcpu_init(vcpu);
	+
	+ /*
	+ * Ensure vCPU is fully created before updating pointer
	+ * to permit unlocked reads above.
	+ */
	+ atomic_store_rel_ptr((uintptr_t *)&vm->vcpu[vcpuid],
	+ (uintptr_t)vcpu);
	+ }
	+ sx_xunlock(&vm->vcpus_init_lock);
	+ return (vcpu);
	+}
	+
	+void
	+vm_slock_vcpus(struct vm *vm)
	+{
	+ sx_slock(&vm->vcpus_init_lock);
	+}
	+
	+void
	+vm_unlock_vcpus(struct vm *vm)
	+{
	+ sx_unlock(&vm->vcpus_init_lock);
	+}
	+
	+int
	+vm_create(const char name, struct vm *retvm)
	+{
	+ struct vm *vm;
	+ struct vmspace *vmspace;
	+
	+ /*
	+ * If vmm.ko could not be successfully initialized then don't attempt
	+ * to create the virtual machine.
	+ */
	+ if (!vmm_initialized)
	+ return (ENXIO);
	+
	+ if (name == NULL \|\| strlen(name) >= VM_MAX_NAMELEN)
	+ return (EINVAL);
	+
	+ vmspace = vmmops_vmspace_alloc(0, 1ul << 39);
	+ if (vmspace == NULL)
	+ return (ENOMEM);
	+
	+ vm = malloc(sizeof(struct vm), M_VMM, M_WAITOK \| M_ZERO);
	+ strcpy(vm->name, name);
	+ vm->vmspace = vmspace;
	+ sx_init(&vm->mem_segs_lock, "vm mem_segs");
	+ sx_init(&vm->vcpus_init_lock, "vm vcpus");
	+
	+ vm->sockets = 1;
	+ vm->cores = 1; /* XXX backwards compatibility */
	+ vm->threads = 1; /* XXX backwards compatibility */
	+ vm->maxcpus = vm_maxcpu;
	+
	+ vm->vcpu = malloc(sizeof(vm->vcpu) vm->maxcpus, M_VMM,
	+ M_WAITOK \| M_ZERO);
	+
	+ vm_init(vm, true);
	+
	+ *retvm = vm;
	+ return (0);
	+}
	+
	+void
	+vm_get_topology(struct vm vm, uint16_t sockets, uint16_t *cores,
	+ uint16_t threads, uint16_t maxcpus)
	+{
	+ *sockets = vm->sockets;
	+ *cores = vm->cores;
	+ *threads = vm->threads;
	+ *maxcpus = vm->maxcpus;
	+}
	+
	+uint16_t
	+vm_get_maxcpus(struct vm *vm)
	+{
	+ return (vm->maxcpus);
	+}
	+
	+int
	+vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores,
	+ uint16_t threads, uint16_t maxcpus)
	+{
	+ /* Ignore maxcpus. */
	+ if ((sockets * cores * threads) > vm->maxcpus)
	+ return (EINVAL);
	+ vm->sockets = sockets;
	+ vm->cores = cores;
	+ vm->threads = threads;
	+ return(0);
	+}
	+
	+static void
	+vm_cleanup(struct vm *vm, bool destroy)
	+{
	+ struct mem_map *mm;
	+ pmap_t pmap __diagused;
	+ int i;
	+
	+ if (destroy) {
	+ pmap = vmspace_pmap(vm->vmspace);
	+ sched_pin();
	+ PCPU_SET(curvmpmap, NULL);
	+ sched_unpin();
	+ CPU_FOREACH(i) {
	+ MPASS(cpuid_to_pcpu[i]->pc_curvmpmap != pmap);
	+ }
	+ }
	+
	+ vgic_detach_from_vm(vm->cookie);
	+
	+ for (i = 0; i < vm->maxcpus; i++) {
	+ if (vm->vcpu[i] != NULL)
	+ vcpu_cleanup(vm->vcpu[i], destroy);
	+ }
	+
	+ vmmops_cleanup(vm->cookie);
	+
	+ /*
	+ * System memory is removed from the guest address space only when
	+ * the VM is destroyed. This is because the mapping remains the same
	+ * across VM reset.
	+ *
	+ * Device memory can be relocated by the guest (e.g. using PCI BARs)
	+ * so those mappings are removed on a VM reset.
	+ */
	+ if (!destroy) {
	+ for (i = 0; i < VM_MAX_MEMMAPS; i++) {
	+ mm = &vm->mem_maps[i];
	+ if (destroy \|\| !sysmem_mapping(vm, mm))
	+ vm_free_memmap(vm, i);
	+ }
	+ }
	+
	+ if (destroy) {
	+ for (i = 0; i < VM_MAX_MEMSEGS; i++)
	+ vm_free_memseg(vm, i);
	+
	+ vmmops_vmspace_free(vm->vmspace);
	+ vm->vmspace = NULL;
	+
	+ for (i = 0; i < vm->maxcpus; i++)
	+ free(vm->vcpu[i], M_VMM);
	+ free(vm->vcpu, M_VMM);
	+ sx_destroy(&vm->vcpus_init_lock);
	+ sx_destroy(&vm->mem_segs_lock);
	+ }
	+}
	+
	+void
	+vm_destroy(struct vm *vm)
	+{
	+ vm_cleanup(vm, true);
	+ free(vm, M_VMM);
	+}
	+
	+int
	+vm_reinit(struct vm *vm)
	+{
	+ int error;
	+
	+ /*
	+ * A virtual machine can be reset only if all vcpus are suspended.
	+ */
	+ if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) {
	+ vm_cleanup(vm, false);
	+ vm_init(vm, false);
	+ error = 0;
	+ } else {
	+ error = EBUSY;
	+ }
	+
	+ return (error);
	+}
	+
	+const char *
	+vm_name(struct vm *vm)
	+{
	+ return (vm->name);
	+}
	+
	+void
	+vm_slock_memsegs(struct vm *vm)
	+{
	+ sx_slock(&vm->mem_segs_lock);
	+}
	+
	+void
	+vm_xlock_memsegs(struct vm *vm)
	+{
	+ sx_xlock(&vm->mem_segs_lock);
	+}
	+
	+void
	+vm_unlock_memsegs(struct vm *vm)
	+{
	+ sx_unlock(&vm->mem_segs_lock);
	+}
	+
	+/*
	+ * Return 'true' if 'gpa' is allocated in the guest address space.
	+ *
	+ * This function is called in the context of a running vcpu which acts as
	+ * an implicit lock on 'vm->mem_maps[]'.
	+ */
	+bool
	+vm_mem_allocated(struct vcpu *vcpu, vm_paddr_t gpa)
	+{
	+ struct vm *vm = vcpu->vm;
	+ struct mem_map *mm;
	+ int i;
	+
	+#ifdef INVARIANTS
	+ int hostcpu, state;
	+ state = vcpu_get_state(vcpu, &hostcpu);
	+ KASSERT(state == VCPU_RUNNING && hostcpu == curcpu,
	+ ("%s: invalid vcpu state %d/%d", __func__, state, hostcpu));
	+#endif
	+
	+ for (i = 0; i < VM_MAX_MEMMAPS; i++) {
	+ mm = &vm->mem_maps[i];
	+ if (mm->len != 0 && gpa >= mm->gpa && gpa < mm->gpa + mm->len)
	+ return (true); /* 'gpa' is sysmem or devmem */
	+ }
	+
	+ return (false);
	+}
	+
	+int
	+vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem)
	+{
	+ struct mem_seg *seg;
	+ vm_object_t obj;
	+
	+ sx_assert(&vm->mem_segs_lock, SX_XLOCKED);
	+
	+ if (ident < 0 \|\| ident >= VM_MAX_MEMSEGS)
	+ return (EINVAL);
	+
	+ if (len == 0 \|\| (len & PAGE_MASK))
	+ return (EINVAL);
	+
	+ seg = &vm->mem_segs[ident];
	+ if (seg->object != NULL) {
	+ if (seg->len == len && seg->sysmem == sysmem)
	+ return (EEXIST);
	+ else
	+ return (EINVAL);
	+ }
	+
	+ obj = vm_object_allocate(OBJT_DEFAULT, len >> PAGE_SHIFT);
	+ if (obj == NULL)
	+ return (ENOMEM);
	+
	+ seg->len = len;
	+ seg->object = obj;
	+ seg->sysmem = sysmem;
	+ return (0);
	+}
	+
	+int
	+vm_get_memseg(struct vm vm, int ident, size_t len, bool *sysmem,
	+ vm_object_t *objptr)
	+{
	+ struct mem_seg *seg;
	+
	+ sx_assert(&vm->mem_segs_lock, SX_LOCKED);
	+
	+ if (ident < 0 \|\| ident >= VM_MAX_MEMSEGS)
	+ return (EINVAL);
	+
	+ seg = &vm->mem_segs[ident];
	+ if (len)
	+ *len = seg->len;
	+ if (sysmem)
	+ *sysmem = seg->sysmem;
	+ if (objptr)
	+ *objptr = seg->object;
	+ return (0);
	+}
	+
	+void
	+vm_free_memseg(struct vm *vm, int ident)
	+{
	+ struct mem_seg *seg;
	+
	+ KASSERT(ident >= 0 && ident < VM_MAX_MEMSEGS,
	+ ("%s: invalid memseg ident %d", __func__, ident));
	+
	+ seg = &vm->mem_segs[ident];
	+ if (seg->object != NULL) {
	+ vm_object_deallocate(seg->object);
	+ bzero(seg, sizeof(struct mem_seg));
	+ }
	+}
	+
	+int
	+vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t first,
	+ size_t len, int prot, int flags)
	+{
	+ struct mem_seg *seg;
	+ struct mem_map m, map;
	+ vm_ooffset_t last;
	+ int i, error;
	+
	+ if (prot == 0 \|\| (prot & ~(VM_PROT_ALL)) != 0)
	+ return (EINVAL);
	+
	+ if (flags & ~VM_MEMMAP_F_WIRED)
	+ return (EINVAL);
	+
	+ if (segid < 0 \|\| segid >= VM_MAX_MEMSEGS)
	+ return (EINVAL);
	+
	+ seg = &vm->mem_segs[segid];
	+ if (seg->object == NULL)
	+ return (EINVAL);
	+
	+ last = first + len;
	+ if (first < 0 \|\| first >= last \|\| last > seg->len)
	+ return (EINVAL);
	+
	+ if ((gpa \| first \| last) & PAGE_MASK)
	+ return (EINVAL);
	+
	+ map = NULL;
	+ for (i = 0; i < VM_MAX_MEMMAPS; i++) {
	+ m = &vm->mem_maps[i];
	+ if (m->len == 0) {
	+ map = m;
	+ break;
	+ }
	+ }
	+
	+ if (map == NULL)
	+ return (ENOSPC);
	+
	+ error = vm_map_find(&vm->vmspace->vm_map, seg->object, first, &gpa,
	+ len, 0, VMFS_NO_SPACE, prot, prot, 0);
	+ if (error != KERN_SUCCESS)
	+ return (EFAULT);
	+
	+ vm_object_reference(seg->object);
	+
	+ if (flags & VM_MEMMAP_F_WIRED) {
	+ error = vm_map_wire(&vm->vmspace->vm_map, gpa, gpa + len,
	+ VM_MAP_WIRE_USER \| VM_MAP_WIRE_NOHOLES);
	+ if (error != KERN_SUCCESS) {
	+ vm_map_remove(&vm->vmspace->vm_map, gpa, gpa + len);
	+ return (error == KERN_RESOURCE_SHORTAGE ? ENOMEM :
	+ EFAULT);
	+ }
	+ }
	+
	+ map->gpa = gpa;
	+ map->len = len;
	+ map->segoff = first;
	+ map->segid = segid;
	+ map->prot = prot;
	+ map->flags = flags;
	+ return (0);
	+}
	+
	+int
	+vm_munmap_memseg(struct vm *vm, vm_paddr_t gpa, size_t len)
	+{
	+ struct mem_map *m;
	+ int i;
	+
	+ for (i = 0; i < VM_MAX_MEMMAPS; i++) {
	+ m = &vm->mem_maps[i];
	+ if (m->gpa == gpa && m->len == len) {
	+ vm_free_memmap(vm, i);
	+ return (0);
	+ }
	+ }
	+
	+ return (EINVAL);
	+}
	+
	+int
	+vm_mmap_getnext(struct vm vm, vm_paddr_t gpa, int *segid,
	+ vm_ooffset_t segoff, size_t len, int prot, int flags)
	+{
	+ struct mem_map mm, mmnext;
	+ int i;
	+
	+ mmnext = NULL;
	+ for (i = 0; i < VM_MAX_MEMMAPS; i++) {
	+ mm = &vm->mem_maps[i];
	+ if (mm->len == 0 \|\| mm->gpa < *gpa)
	+ continue;
	+ if (mmnext == NULL \|\| mm->gpa < mmnext->gpa)
	+ mmnext = mm;
	+ }
	+
	+ if (mmnext != NULL) {
	+ *gpa = mmnext->gpa;
	+ if (segid)
	+ *segid = mmnext->segid;
	+ if (segoff)
	+ *segoff = mmnext->segoff;
	+ if (len)
	+ *len = mmnext->len;
	+ if (prot)
	+ *prot = mmnext->prot;
	+ if (flags)
	+ *flags = mmnext->flags;
	+ return (0);
	+ } else {
	+ return (ENOENT);
	+ }
	+}
	+
	+static void
	+vm_free_memmap(struct vm *vm, int ident)
	+{
	+ struct mem_map *mm;
	+ int error __diagused;
	+
	+ mm = &vm->mem_maps[ident];
	+ if (mm->len) {
	+ error = vm_map_remove(&vm->vmspace->vm_map, mm->gpa,
	+ mm->gpa + mm->len);
	+ KASSERT(error == KERN_SUCCESS, ("%s: vm_map_remove error %d",
	+ __func__, error));
	+ bzero(mm, sizeof(struct mem_map));
	+ }
	+}
	+
	+static __inline bool
	+sysmem_mapping(struct vm vm, struct mem_map mm)
	+{
	+
	+ if (mm->len != 0 && vm->mem_segs[mm->segid].sysmem)
	+ return (true);
	+ else
	+ return (false);
	+}
	+
	+vm_paddr_t
	+vmm_sysmem_maxaddr(struct vm *vm)
	+{
	+ struct mem_map *mm;
	+ vm_paddr_t maxaddr;
	+ int i;
	+
	+ maxaddr = 0;
	+ for (i = 0; i < VM_MAX_MEMMAPS; i++) {
	+ mm = &vm->mem_maps[i];
	+ if (sysmem_mapping(vm, mm)) {
	+ if (maxaddr < mm->gpa + mm->len)
	+ maxaddr = mm->gpa + mm->len;
	+ }
	+ }
	+ return (maxaddr);
	+}
	+
	+int
	+vm_gla2gpa_nofault(struct vcpu vcpu, struct vm_guest_paging paging,
	+ uint64_t gla, int prot, uint64_t gpa, int is_fault)
	+{
	+
	+ vmmops_gla2gpa(vcpu->cookie, paging, gla, prot, gpa, is_fault);
	+ return (0);
	+}
	+
	+static int
	+vmm_reg_raz(struct vcpu vcpu, uint64_t rval, void *arg)
	+{
	+ *rval = 0;
	+ return (0);
	+}
	+
	+static int
	+vmm_reg_read_arg(struct vcpu vcpu, uint64_t rval, void *arg)
	+{
	+ rval = (uint64_t *)arg;
	+ return (0);
	+}
	+
	+static int
	+vmm_reg_wi(struct vcpu vcpu, uint64_t wval, void arg)
	+{
	+ return (0);
	+}
	+
	+static const struct vmm_special_reg vmm_special_regs[] = {
	+#define SPECIAL_REG(_reg, _read, _write) \
	+ { \
	+ .esr_iss = ((_reg ## _op0) << ISS_MSR_OP0_SHIFT) \| \
	+ ((_reg ## _op1) << ISS_MSR_OP1_SHIFT) \| \
	+ ((_reg ## _CRn) << ISS_MSR_CRn_SHIFT) \| \
	+ ((_reg ## _CRm) << ISS_MSR_CRm_SHIFT) \| \
	+ ((_reg ## _op2) << ISS_MSR_OP2_SHIFT), \
	+ .esr_mask = ISS_MSR_REG_MASK, \
	+ .reg_read = (_read), \
	+ .reg_write = (_write), \
	+ .arg = NULL, \
	+ }
	+#define ID_SPECIAL_REG(_reg, _name) \
	+ { \
	+ .esr_iss = ((_reg ## _op0) << ISS_MSR_OP0_SHIFT) \| \
	+ ((_reg ## _op1) << ISS_MSR_OP1_SHIFT) \| \
	+ ((_reg ## _CRn) << ISS_MSR_CRn_SHIFT) \| \
	+ ((_reg ## _CRm) << ISS_MSR_CRm_SHIFT) \| \
	+ ((_reg ## _op2) << ISS_MSR_OP2_SHIFT), \
	+ .esr_mask = ISS_MSR_REG_MASK, \
	+ .reg_read = vmm_reg_read_arg, \
	+ .reg_write = vmm_reg_wi, \
	+ .arg = &(vmm_arch_regs._name), \
	+ }
	+
	+ /* ID registers */
	+ ID_SPECIAL_REG(ID_AA64PFR0_EL1, id_aa64pfr0),
	+ ID_SPECIAL_REG(ID_AA64DFR0_EL1, id_aa64dfr0),
	+ ID_SPECIAL_REG(ID_AA64ISAR0_EL1, id_aa64isar0),
	+ ID_SPECIAL_REG(ID_AA64MMFR0_EL1, id_aa64mmfr0),
	+ ID_SPECIAL_REG(ID_AA64MMFR1_EL1, id_aa64mmfr1),
	+
	+ /*
	+ * All other ID registers are read as zero.
	+ * They are all in the op0=3, op1=0, CRn=0, CRm={0..7} space.
	+ */
	+ {
	+ .esr_iss = (3 << ISS_MSR_OP0_SHIFT) \|
	+ (0 << ISS_MSR_OP1_SHIFT) \|
	+ (0 << ISS_MSR_CRn_SHIFT) \|
	+ (0 << ISS_MSR_CRm_SHIFT),
	+ .esr_mask = ISS_MSR_OP0_MASK \| ISS_MSR_OP1_MASK \|
	+ ISS_MSR_CRn_MASK \| (0x8 << ISS_MSR_CRm_SHIFT),
	+ .reg_read = vmm_reg_raz,
	+ .reg_write = vmm_reg_wi,
	+ .arg = NULL,
	+ },
	+
	+ /* Counter physical registers */
	+ SPECIAL_REG(CNTP_CTL_EL0, vtimer_phys_ctl_read, vtimer_phys_ctl_write),
	+ SPECIAL_REG(CNTP_CVAL_EL0, vtimer_phys_cval_read,
	+ vtimer_phys_cval_write),
	+ SPECIAL_REG(CNTP_TVAL_EL0, vtimer_phys_tval_read,
	+ vtimer_phys_tval_write),
	+ SPECIAL_REG(CNTPCT_EL0, vtimer_phys_cnt_read, vtimer_phys_cnt_write),
	+#undef SPECIAL_REG
	+};
	+
	+void
	+vm_register_reg_handler(struct vm *vm, uint64_t iss, uint64_t mask,
	+ reg_read_t reg_read, reg_write_t reg_write, void *arg)
	+{
	+ int i;
	+
	+ for (i = 0; i < nitems(vm->special_reg); i++) {
	+ if (vm->special_reg[i].esr_iss == 0 &&
	+ vm->special_reg[i].esr_mask == 0) {
	+ vm->special_reg[i].esr_iss = iss;
	+ vm->special_reg[i].esr_mask = mask;
	+ vm->special_reg[i].reg_read = reg_read;
	+ vm->special_reg[i].reg_write = reg_write;
	+ vm->special_reg[i].arg = arg;
	+ return;
	+ }
	+ }
	+
	+ panic("%s: No free special register slot", __func__);
	+}
	+
	+void
	+vm_deregister_reg_handler(struct vm *vm, uint64_t iss, uint64_t mask)
	+{
	+ int i;
	+
	+ for (i = 0; i < nitems(vm->special_reg); i++) {
	+ if (vm->special_reg[i].esr_iss == iss &&
	+ vm->special_reg[i].esr_mask == mask) {
	+ memset(&vm->special_reg[i], 0,
	+ sizeof(vm->special_reg[i]));
	+ return;
	+ }
	+ }
	+
	+ panic("%s: Invalid special register: iss %lx mask %lx", __func__, iss,
	+ mask);
	+}
	+
	+static int
	+vm_handle_reg_emul(struct vcpu vcpu, bool retu)
	+{
	+ struct vm *vm;
	+ struct vm_exit *vme;
	+ struct vre *vre;
	+ int i, rv;
	+
	+ vm = vcpu->vm;
	+ vme = &vcpu->exitinfo;
	+ vre = &vme->u.reg_emul.vre;
	+
	+ for (i = 0; i < nitems(vm->special_reg); i++) {
	+ if (vm->special_reg[i].esr_iss == 0 &&
	+ vm->special_reg[i].esr_mask == 0)
	+ continue;
	+
	+ if ((vre->inst_syndrome & vm->special_reg[i].esr_mask) ==
	+ vm->special_reg[i].esr_iss) {
	+ rv = vmm_emulate_register(vcpu, vre,
	+ vm->special_reg[i].reg_read,
	+ vm->special_reg[i].reg_write,
	+ vm->special_reg[i].arg);
	+ if (rv == 0) {
	+ *retu = false;
	+ }
	+ return (rv);
	+ }
	+ }
	+ for (i = 0; i < nitems(vmm_special_regs); i++) {
	+ if ((vre->inst_syndrome & vmm_special_regs[i].esr_mask) ==
	+ vmm_special_regs[i].esr_iss) {
	+ rv = vmm_emulate_register(vcpu, vre,
	+ vmm_special_regs[i].reg_read,
	+ vmm_special_regs[i].reg_write,
	+ vmm_special_regs[i].arg);
	+ if (rv == 0) {
	+ *retu = false;
	+ }
	+ return (rv);
	+ }
	+ }
	+
	+
	+ *retu = true;
	+ return (0);
	+}
	+
	+void
	+vm_register_inst_handler(struct vm *vm, uint64_t start, uint64_t size,
	+ mem_region_read_t mmio_read, mem_region_write_t mmio_write)
	+{
	+ int i;
	+
	+ for (i = 0; i < nitems(vm->mmio_region); i++) {
	+ if (vm->mmio_region[i].start == 0 &&
	+ vm->mmio_region[i].end == 0) {
	+ vm->mmio_region[i].start = start;
	+ vm->mmio_region[i].end = start + size;
	+ vm->mmio_region[i].read = mmio_read;
	+ vm->mmio_region[i].write = mmio_write;
	+ return;
	+ }
	+ }
	+
	+ panic("%s: No free MMIO region", __func__);
	+}
	+
	+void
	+vm_deregister_inst_handler(struct vm *vm, uint64_t start, uint64_t size)
	+{
	+ int i;
	+
	+ for (i = 0; i < nitems(vm->mmio_region); i++) {
	+ if (vm->mmio_region[i].start == start &&
	+ vm->mmio_region[i].end == start + size) {
	+ memset(&vm->mmio_region[i], 0,
	+ sizeof(vm->mmio_region[i]));
	+ return;
	+ }
	+ }
	+
	+ panic("%s: Invalid MMIO region: %lx - %lx", __func__, start,
	+ start + size);
	+}
	+
	+static int
	+vm_handle_inst_emul(struct vcpu vcpu, bool retu)
	+{
	+ struct vm *vm;
	+ struct vm_exit *vme;
	+ struct vie *vie;
	+ struct hyp *hyp;
	+ uint64_t fault_ipa;
	+ struct vm_guest_paging *paging;
	+ struct vmm_mmio_region *vmr;
	+ int error, i;
	+
	+ vm = vcpu->vm;
	+ hyp = vm->cookie;
	+ if (!hyp->vgic_attached)
	+ goto out_user;
	+
	+ vme = &vcpu->exitinfo;
	+ vie = &vme->u.inst_emul.vie;
	+ paging = &vme->u.inst_emul.paging;
	+
	+ fault_ipa = vme->u.inst_emul.gpa;
	+
	+ vmr = NULL;
	+ for (i = 0; i < nitems(vm->mmio_region); i++) {
	+ if (vm->mmio_region[i].start <= fault_ipa &&
	+ vm->mmio_region[i].end > fault_ipa) {
	+ vmr = &vm->mmio_region[i];
	+ break;
	+ }
	+ }
	+ if (vmr == NULL)
	+ goto out_user;
	+
	+ error = vmm_emulate_instruction(vcpu, fault_ipa, vie, paging,
	+ vmr->read, vmr->write, retu);
	+ return (error);
	+
	+out_user:
	+ *retu = true;
	+ return (0);
	+}
	+
	+int
	+vm_suspend(struct vm *vm, enum vm_suspend_how how)
	+{
	+ int i;
	+
	+ if (how <= VM_SUSPEND_NONE \|\| how >= VM_SUSPEND_LAST)
	+ return (EINVAL);
	+
	+ if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) {
	+ VM_CTR2(vm, "virtual machine already suspended %d/%d",
	+ vm->suspend, how);
	+ return (EALREADY);
	+ }
	+
	+ VM_CTR1(vm, "virtual machine successfully suspended %d", how);
	+
	+ /*
	+ * Notify all active vcpus that they are now suspended.
	+ */
	+ for (i = 0; i < vm->maxcpus; i++) {
	+ if (CPU_ISSET(i, &vm->active_cpus))
	+ vcpu_notify_event(vm_vcpu(vm, i));
	+ }
	+
	+ return (0);
	+}
	+
	+void
	+vm_exit_suspended(struct vcpu *vcpu, uint64_t pc)
	+{
	+ struct vm *vm = vcpu->vm;
	+ struct vm_exit *vmexit;
	+
	+ KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST,
	+ ("vm_exit_suspended: invalid suspend type %d", vm->suspend));
	+
	+ vmexit = vm_exitinfo(vcpu);
	+ vmexit->pc = pc;
	+ vmexit->inst_length = 4;
	+ vmexit->exitcode = VM_EXITCODE_SUSPENDED;
	+ vmexit->u.suspended.how = vm->suspend;
	+}
	+
	+void
	+vm_exit_debug(struct vcpu *vcpu, uint64_t pc)
	+{
	+ struct vm_exit *vmexit;
	+
	+ vmexit = vm_exitinfo(vcpu);
	+ vmexit->pc = pc;
	+ vmexit->inst_length = 4;
	+ vmexit->exitcode = VM_EXITCODE_DEBUG;
	+}
	+
	+int
	+vm_activate_cpu(struct vcpu *vcpu)
	+{
	+ struct vm *vm = vcpu->vm;
	+
	+ if (CPU_ISSET(vcpu->vcpuid, &vm->active_cpus))
	+ return (EBUSY);
	+
	+ CPU_SET_ATOMIC(vcpu->vcpuid, &vm->active_cpus);
	+ return (0);
	+
	+}
	+
	+int
	+vm_suspend_cpu(struct vm vm, struct vcpu vcpu)
	+{
	+ if (vcpu == NULL) {
	+ vm->debug_cpus = vm->active_cpus;
	+ for (int i = 0; i < vm->maxcpus; i++) {
	+ if (CPU_ISSET(i, &vm->active_cpus))
	+ vcpu_notify_event(vm_vcpu(vm, i));
	+ }
	+ } else {
	+ if (!CPU_ISSET(vcpu->vcpuid, &vm->active_cpus))
	+ return (EINVAL);
	+
	+ CPU_SET_ATOMIC(vcpu->vcpuid, &vm->debug_cpus);
	+ vcpu_notify_event(vcpu);
	+ }
	+ return (0);
	+}
	+
	+int
	+vm_resume_cpu(struct vm vm, struct vcpu vcpu)
	+{
	+
	+ if (vcpu == NULL) {
	+ CPU_ZERO(&vm->debug_cpus);
	+ } else {
	+ if (!CPU_ISSET(vcpu->vcpuid, &vm->debug_cpus))
	+ return (EINVAL);
	+
	+ CPU_CLR_ATOMIC(vcpu->vcpuid, &vm->debug_cpus);
	+ }
	+ return (0);
	+}
	+
	+int
	+vcpu_debugged(struct vcpu *vcpu)
	+{
	+
	+ return (CPU_ISSET(vcpu->vcpuid, &vcpu->vm->debug_cpus));
	+}
	+
	+cpuset_t
	+vm_active_cpus(struct vm *vm)
	+{
	+
	+ return (vm->active_cpus);
	+}
	+
	+cpuset_t
	+vm_debug_cpus(struct vm *vm)
	+{
	+
	+ return (vm->debug_cpus);
	+}
	+
	+cpuset_t
	+vm_suspended_cpus(struct vm *vm)
	+{
	+
	+ return (vm->suspended_cpus);
	+}
	+
	+
	+void *
	+vcpu_stats(struct vcpu *vcpu)
	+{
	+
	+ return (vcpu->stats);
	+}
	+
	+/*
	+ * This function is called to ensure that a vcpu "sees" a pending event
	+ * as soon as possible:
	+ * - If the vcpu thread is sleeping then it is woken up.
	+ * - If the vcpu is running on a different host_cpu then an IPI will be directed
	+ * to the host_cpu to cause the vcpu to trap into the hypervisor.
	+ */
	+static void
	+vcpu_notify_event_locked(struct vcpu *vcpu)
	+{
	+ int hostcpu;
	+
	+ hostcpu = vcpu->hostcpu;
	+ if (vcpu->state == VCPU_RUNNING) {
	+ KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu"));
	+ if (hostcpu != curcpu) {
	+ ipi_cpu(hostcpu, vmm_ipinum);
	+ } else {
	+ /*
	+ * If the 'vcpu' is running on 'curcpu' then it must
	+ * be sending a notification to itself (e.g. SELF_IPI).
	+ * The pending event will be picked up when the vcpu
	+ * transitions back to guest context.
	+ */
	+ }
	+ } else {
	+ KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent "
	+ "with hostcpu %d", vcpu->state, hostcpu));
	+ if (vcpu->state == VCPU_SLEEPING)
	+ wakeup_one(vcpu);
	+ }
	+}
	+
	+void
	+vcpu_notify_event(struct vcpu *vcpu)
	+{
	+ vcpu_lock(vcpu);
	+ vcpu_notify_event_locked(vcpu);
	+ vcpu_unlock(vcpu);
	+}
	+
	+static void
	+restore_guest_fpustate(struct vcpu *vcpu)
	+{
	+
	+ /* flush host state to the pcb */
	+ vfp_save_state(curthread, curthread->td_pcb);
	+ /* Ensure the VFP state will be re-loaded when exiting the guest */
	+ PCPU_SET(fpcurthread, NULL);
	+
	+ /* restore guest FPU state */
	+ vfp_enable();
	+ vfp_restore(vcpu->guestfpu);
	+
	+ /*
	+ * The FPU is now "dirty" with the guest's state so turn on emulation
	+ * to trap any access to the FPU by the host.
	+ */
	+ vfp_disable();
	+}
	+
	+static void
	+save_guest_fpustate(struct vcpu *vcpu)
	+{
	+ if ((READ_SPECIALREG(cpacr_el1) & CPACR_FPEN_MASK) !=
	+ CPACR_FPEN_TRAP_ALL1)
	+ panic("VFP not enabled in host!");
	+
	+ /* save guest FPU state */
	+ vfp_enable();
	+ vfp_store(vcpu->guestfpu);
	+ vfp_disable();
	+
	+ KASSERT(PCPU_GET(fpcurthread) == NULL,
	+ ("%s: fpcurthread set with guest registers", __func__));
	+}
	+static int
	+vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate,
	+ bool from_idle)
	+{
	+ int error;
	+
	+ vcpu_assert_locked(vcpu);
	+
	+ /*
	+ * State transitions from the vmmdev_ioctl() must always begin from
	+ * the VCPU_IDLE state. This guarantees that there is only a single
	+ * ioctl() operating on a vcpu at any point.
	+ */
	+ if (from_idle) {
	+ while (vcpu->state != VCPU_IDLE) {
	+ vcpu_notify_event_locked(vcpu);
	+ msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz);
	+ }
	+ } else {
	+ KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from "
	+ "vcpu idle state"));
	+ }
	+
	+ if (vcpu->state == VCPU_RUNNING) {
	+ KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d "
	+ "mismatch for running vcpu", curcpu, vcpu->hostcpu));
	+ } else {
	+ KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a "
	+ "vcpu that is not running", vcpu->hostcpu));
	+ }
	+
	+ /*
	+ * The following state transitions are allowed:
	+ * IDLE -> FROZEN -> IDLE
	+ * FROZEN -> RUNNING -> FROZEN
	+ * FROZEN -> SLEEPING -> FROZEN
	+ */
	+ switch (vcpu->state) {
	+ case VCPU_IDLE:
	+ case VCPU_RUNNING:
	+ case VCPU_SLEEPING:
	+ error = (newstate != VCPU_FROZEN);
	+ break;
	+ case VCPU_FROZEN:
	+ error = (newstate == VCPU_FROZEN);
	+ break;
	+ default:
	+ error = 1;
	+ break;
	+ }
	+
	+ if (error)
	+ return (EBUSY);
	+
	+ vcpu->state = newstate;
	+ if (newstate == VCPU_RUNNING)
	+ vcpu->hostcpu = curcpu;
	+ else
	+ vcpu->hostcpu = NOCPU;
	+
	+ if (newstate == VCPU_IDLE)
	+ wakeup(&vcpu->state);
	+
	+ return (0);
	+}
	+
	+static void
	+vcpu_require_state(struct vcpu *vcpu, enum vcpu_state newstate)
	+{
	+ int error;
	+
	+ if ((error = vcpu_set_state(vcpu, newstate, false)) != 0)
	+ panic("Error %d setting state to %d\n", error, newstate);
	+}
	+
	+static void
	+vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate)
	+{
	+ int error;
	+
	+ if ((error = vcpu_set_state_locked(vcpu, newstate, false)) != 0)
	+ panic("Error %d setting state to %d", error, newstate);
	+}
	+
	+int
	+vm_get_capability(struct vcpu vcpu, int type, int retval)
	+{
	+ if (type < 0 \|\| type >= VM_CAP_MAX)
	+ return (EINVAL);
	+
	+ return (vmmops_getcap(vcpu->cookie, type, retval));
	+}
	+
	+int
	+vm_set_capability(struct vcpu *vcpu, int type, int val)
	+{
	+ if (type < 0 \|\| type >= VM_CAP_MAX)
	+ return (EINVAL);
	+
	+ return (vmmops_setcap(vcpu->cookie, type, val));
	+}
	+
	+struct vm *
	+vcpu_vm(struct vcpu *vcpu)
	+{
	+ return (vcpu->vm);
	+}
	+
	+int
	+vcpu_vcpuid(struct vcpu *vcpu)
	+{
	+ return (vcpu->vcpuid);
	+}
	+
	+void *
	+vcpu_get_cookie(struct vcpu *vcpu)
	+{
	+ return (vcpu->cookie);
	+}
	+
	+struct vcpu *
	+vm_vcpu(struct vm *vm, int vcpuid)
	+{
	+ return (vm->vcpu[vcpuid]);
	+}
	+
	+int
	+vcpu_set_state(struct vcpu *vcpu, enum vcpu_state newstate, bool from_idle)
	+{
	+ int error;
	+
	+ vcpu_lock(vcpu);
	+ error = vcpu_set_state_locked(vcpu, newstate, from_idle);
	+ vcpu_unlock(vcpu);
	+
	+ return (error);
	+}
	+
	+enum vcpu_state
	+vcpu_get_state(struct vcpu vcpu, int hostcpu)
	+{
	+ enum vcpu_state state;
	+
	+ vcpu_lock(vcpu);
	+ state = vcpu->state;
	+ if (hostcpu != NULL)
	+ *hostcpu = vcpu->hostcpu;
	+ vcpu_unlock(vcpu);
	+
	+ return (state);
	+}
	+
	+static void *
	+_vm_gpa_hold(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot,
	+ void **cookie)
	+{
	+ int i, count, pageoff;
	+ struct mem_map *mm;
	+ vm_page_t m;
	+
	+ pageoff = gpa & PAGE_MASK;
	+ if (len > PAGE_SIZE - pageoff)
	+ panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len);
	+
	+ count = 0;
	+ for (i = 0; i < VM_MAX_MEMMAPS; i++) {
	+ mm = &vm->mem_maps[i];
	+ if (sysmem_mapping(vm, mm) && gpa >= mm->gpa &&
	+ gpa < mm->gpa + mm->len) {
	+ count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map,
	+ trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1);
	+ break;
	+ }
	+ }
	+
	+ if (count == 1) {
	+ *cookie = m;
	+ return ((void *)(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)) + pageoff));
	+ } else {
	+ *cookie = NULL;
	+ return (NULL);
	+ }
	+}
	+
	+void *
	+vm_gpa_hold(struct vcpu *vcpu, vm_paddr_t gpa, size_t len, int reqprot,
	+ void **cookie)
	+{
	+#ifdef INVARIANTS
	+ /*
	+ * The current vcpu should be frozen to ensure 'vm_memmap[]'
	+ * stability.
	+ */
	+ int state = vcpu_get_state(vcpu, NULL);
	+ KASSERT(state == VCPU_FROZEN, ("%s: invalid vcpu state %d",
	+ __func__, state));
	+#endif
	+ return (_vm_gpa_hold(vcpu->vm, gpa, len, reqprot, cookie));
	+}
	+
	+void *
	+vm_gpa_hold_global(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot,
	+ void **cookie)
	+{
	+ sx_assert(&vm->mem_segs_lock, SX_LOCKED);
	+ return (_vm_gpa_hold(vm, gpa, len, reqprot, cookie));
	+}
	+
	+void
	+vm_gpa_release(void *cookie)
	+{
	+ vm_page_t m = cookie;
	+
	+ vm_page_unwire(m, PQ_ACTIVE);
	+}
	+
	+int
	+vm_get_register(struct vcpu vcpu, int reg, uint64_t retval)
	+{
	+
	+ if (reg >= VM_REG_LAST)
	+ return (EINVAL);
	+
	+ return (vmmops_getreg(vcpu->cookie, reg, retval));
	+}
	+
	+int
	+vm_set_register(struct vcpu *vcpu, int reg, uint64_t val)
	+{
	+ int error;
	+
	+ if (reg >= VM_REG_LAST)
	+ return (EINVAL);
	+ error = vmmops_setreg(vcpu->cookie, reg, val);
	+ if (error \|\| reg != VM_REG_GUEST_PC)
	+ return (error);
	+
	+ vcpu->nextpc = val;
	+
	+ return (0);
	+}
	+
	+void *
	+vm_get_cookie(struct vm *vm)
	+{
	+ return (vm->cookie);
	+}
	+
	+int
	+vm_inject_exception(struct vcpu *vcpu, uint64_t esr, uint64_t far)
	+{
	+ return (vmmops_exception(vcpu->cookie, esr, far));
	+}
	+
	+int
	+vm_attach_vgic(struct vm vm, struct vm_vgic_descr descr)
	+{
	+ return (vgic_attach_to_vm(vm->cookie, descr));
	+}
	+
	+int
	+vm_assert_irq(struct vm *vm, uint32_t irq)
	+{
	+ return (vgic_inject_irq(vm->cookie, -1, irq, true));
	+}
	+
	+int
	+vm_deassert_irq(struct vm *vm, uint32_t irq)
	+{
	+ return (vgic_inject_irq(vm->cookie, -1, irq, false));
	+}
	+
	+int
	+vm_raise_msi(struct vm *vm, uint64_t msg, uint64_t addr, int bus, int slot,
	+ int func)
	+{
	+ /* TODO: Should we raise an SError? */
	+ return (vgic_inject_msi(vm->cookie, msg, addr));
	+}
	+
	+static int
	+vm_handle_smccc_call(struct vcpu vcpu, struct vm_exit vme, bool *retu)
	+{
	+ struct hypctx *hypctx;
	+ int i;
	+
	+ hypctx = vcpu_get_cookie(vcpu);
	+
	+ if ((hypctx->tf.tf_esr & ESR_ELx_ISS_MASK) != 0)
	+ return (1);
	+
	+ vme->exitcode = VM_EXITCODE_SMCCC;
	+ vme->u.smccc_call.func_id = hypctx->tf.tf_x[0];
	+ for (i = 0; i < nitems(vme->u.smccc_call.args); i++)
	+ vme->u.smccc_call.args[i] = hypctx->tf.tf_x[i + 1];
	+
	+ *retu = true;
	+ return (0);
	+}
	+
	+static int
	+vm_handle_wfi(struct vcpu vcpu, struct vm_exit vme, bool *retu)
	+{
	+ vcpu_lock(vcpu);
	+ while (1) {
	+ if (vgic_has_pending_irq(vcpu->cookie))
	+ break;
	+
	+ if (vcpu_should_yield(vcpu))
	+ break;
	+
	+ vcpu_require_state_locked(vcpu, VCPU_SLEEPING);
	+ /*
	+ * XXX msleep_spin() cannot be interrupted by signals so
	+ * wake up periodically to check pending signals.
	+ */
	+ msleep_spin(vcpu, &vcpu->mtx, "vmidle", hz);
	+ vcpu_require_state_locked(vcpu, VCPU_FROZEN);
	+ }
	+ vcpu_unlock(vcpu);
	+
	+ *retu = false;
	+ return (0);
	+}
	+
	+static int
	+vm_handle_paging(struct vcpu vcpu, bool retu)
	+{
	+ struct vm *vm = vcpu->vm;
	+ struct vm_exit *vme;
	+ struct vm_map *map;
	+ uint64_t addr, esr;
	+ pmap_t pmap;
	+ int ftype, rv;
	+
	+ vme = &vcpu->exitinfo;
	+
	+ pmap = vmspace_pmap(vcpu->vm->vmspace);
	+ addr = vme->u.paging.gpa;
	+ esr = vme->u.paging.esr;
	+
	+ /* The page exists, but the page table needs to be updated. */
	+ if (pmap_fault(pmap, esr, addr) == KERN_SUCCESS)
	+ return (0);
	+
	+ switch (ESR_ELx_EXCEPTION(esr)) {
	+ case EXCP_INSN_ABORT_L:
	+ case EXCP_DATA_ABORT_L:
	+ ftype = VM_PROT_EXECUTE \| VM_PROT_READ \| VM_PROT_WRITE;
	+ break;
	+ default:
	+ panic("%s: Invalid exception (esr = %lx)", __func__, esr);
	+ }
	+
	+ map = &vm->vmspace->vm_map;
	+ rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL, NULL);
	+ if (rv != KERN_SUCCESS)
	+ return (EFAULT);
	+
	+ return (0);
	+}
	+
	+int
	+vm_run(struct vcpu *vcpu)
	+{
	+ struct vm *vm = vcpu->vm;
	+ struct vm_eventinfo evinfo;
	+ int error, vcpuid;
	+ struct vm_exit *vme;
	+ bool retu;
	+ pmap_t pmap;
	+
	+ vcpuid = vcpu->vcpuid;
	+
	+ if (!CPU_ISSET(vcpuid, &vm->active_cpus))
	+ return (EINVAL);
	+
	+ if (CPU_ISSET(vcpuid, &vm->suspended_cpus))
	+ return (EINVAL);
	+
	+ pmap = vmspace_pmap(vm->vmspace);
	+ vme = &vcpu->exitinfo;
	+ evinfo.rptr = NULL;
	+ evinfo.sptr = &vm->suspend;
	+ evinfo.iptr = NULL;
	+restart:
	+ critical_enter();
	+
	+ restore_guest_fpustate(vcpu);
	+
	+ vcpu_require_state(vcpu, VCPU_RUNNING);
	+ error = vmmops_run(vcpu->cookie, vcpu->nextpc, pmap, &evinfo);
	+ vcpu_require_state(vcpu, VCPU_FROZEN);
	+
	+ save_guest_fpustate(vcpu);
	+
	+ critical_exit();
	+
	+ if (error == 0) {
	+ retu = false;
	+ switch (vme->exitcode) {
	+ case VM_EXITCODE_INST_EMUL:
	+ vcpu->nextpc = vme->pc + vme->inst_length;
	+ error = vm_handle_inst_emul(vcpu, &retu);
	+ break;
	+
	+ case VM_EXITCODE_REG_EMUL:
	+ vcpu->nextpc = vme->pc + vme->inst_length;
	+ error = vm_handle_reg_emul(vcpu, &retu);
	+ break;
	+
	+ case VM_EXITCODE_HVC:
	+ /*
	+ * The HVC instruction saves the address for the
	+ * next instruction as the return address.
	+ */
	+ vcpu->nextpc = vme->pc;
	+ /*
	+ * The PSCI call can change the exit information in the
	+ * case of suspend/reset/poweroff/cpu off/cpu on.
	+ */
	+ error = vm_handle_smccc_call(vcpu, vme, &retu);
	+ break;
	+
	+ case VM_EXITCODE_WFI:
	+ vcpu->nextpc = vme->pc + vme->inst_length;
	+ error = vm_handle_wfi(vcpu, vme, &retu);
	+ break;
	+
	+ case VM_EXITCODE_PAGING:
	+ vcpu->nextpc = vme->pc;
	+ error = vm_handle_paging(vcpu, &retu);
	+ break;
	+
	+ default:
	+ /* Handle in userland */
	+ vcpu->nextpc = vme->pc;
	+ retu = true;
	+ break;
	+ }
	+ }
	+
	+ if (error == 0 && retu == false)
	+ goto restart;
	+
	+ return (error);
	+}
	diff --git a/sys/arm64/vmm/vmm_arm64.c b/sys/arm64/vmm/vmm_arm64.c
	new file mode 100644
	--- /dev/null
	+++ b/sys/arm64/vmm/vmm_arm64.c
	@@ -0,0 +1,1337 @@
	+/*-
	+ * SPDX-License-Identifier: BSD-2-Clause
	+ *
	+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#include <sys/cdefs.h>
	+#include <sys/param.h>
	+#include <sys/systm.h>
	+#include <sys/smp.h>
	+#include <sys/kernel.h>
	+#include <sys/malloc.h>
	+#include <sys/mman.h>
	+#include <sys/pcpu.h>
	+#include <sys/proc.h>
	+#include <sys/sysctl.h>
	+#include <sys/lock.h>
	+#include <sys/mutex.h>
	+#include <sys/vmem.h>
	+
	+#include <vm/vm.h>
	+#include <vm/pmap.h>
	+#include <vm/vm_extern.h>
	+#include <vm/vm_map.h>
	+#include <vm/vm_page.h>
	+#include <vm/vm_param.h>
	+
	+#include <machine/armreg.h>
	+#include <machine/vm.h>
	+#include <machine/cpufunc.h>
	+#include <machine/cpu.h>
	+#include <machine/machdep.h>
	+#include <machine/vmm.h>
	+#include <machine/vmm_dev.h>
	+#include <machine/atomic.h>
	+#include <machine/hypervisor.h>
	+#include <machine/pmap.h>
	+
	+#include "mmu.h"
	+#include "arm64.h"
	+#include "hyp.h"
	+#include "reset.h"
	+#include "io/vgic.h"
	+#include "io/vgic_v3.h"
	+#include "io/vtimer.h"
	+#include "vmm_stat.h"
	+
	+#define HANDLED 1
	+#define UNHANDLED 0
	+
	+/* Number of bits in an EL2 virtual address */
	+#define EL2_VIRT_BITS 48
	+CTASSERT((1ul << EL2_VIRT_BITS) >= HYP_VM_MAX_ADDRESS);
	+
	+/* TODO: Move the host hypctx off the stack */
	+#define VMM_STACK_PAGES 4
	+#define VMM_STACK_SIZE (VMM_STACK_PAGES * PAGE_SIZE)
	+
	+static int vmm_pmap_levels, vmm_virt_bits, vmm_max_ipa_bits;
	+
	+/* Register values passed to arm_setup_vectors to set in the hypervisor */
	+struct vmm_init_regs {
	+ uint64_t tcr_el2;
	+ uint64_t vtcr_el2;
	+};
	+
	+MALLOC_DEFINE(M_HYP, "ARM VMM HYP", "ARM VMM HYP");
	+
	+extern char hyp_init_vectors[];
	+extern char hyp_vectors[];
	+extern char hyp_stub_vectors[];
	+
	+static vm_paddr_t hyp_code_base;
	+static size_t hyp_code_len;
	+
	+static char *stack[MAXCPU];
	+static vm_offset_t stack_hyp_va[MAXCPU];
	+
	+static vmem_t *el2_mem_alloc;
	+
	+static void arm_setup_vectors(void *arg);
	+static void vmm_pmap_clean_stage2_tlbi(void);
	+static void vmm_pmap_invalidate_range(uint64_t, vm_offset_t, vm_offset_t, bool);
	+static void vmm_pmap_invalidate_all(uint64_t);
	+
	+DPCPU_DEFINE_STATIC(struct hypctx *, vcpu);
	+
	+static inline void
	+arm64_set_active_vcpu(struct hypctx *hypctx)
	+{
	+ DPCPU_SET(vcpu, hypctx);
	+}
	+
	+struct hypctx *
	+arm64_get_active_vcpu(void)
	+{
	+ return (DPCPU_GET(vcpu));
	+}
	+
	+static void
	+arm_setup_vectors(void *arg)
	+{
	+ struct vmm_init_regs *el2_regs;
	+ uintptr_t stack_top;
	+ uint32_t sctlr_el2;
	+ register_t daif;
	+
	+ el2_regs = arg;
	+ arm64_set_active_vcpu(NULL);
	+
	+ daif = intr_disable();
	+
	+ /*
	+ * Install the temporary vectors which will be responsible for
	+ * initializing the VMM when we next trap into EL2.
	+ *
	+ * x0: the exception vector table responsible for hypervisor
	+ * initialization on the next call.
	+ */
	+ vmm_call_hyp(vtophys(&vmm_hyp_code));
	+
	+ /* Create and map the hypervisor stack */
	+ stack_top = stack_hyp_va[PCPU_GET(cpuid)] + VMM_STACK_SIZE;
	+
	+ /*
	+ * Configure the system control register for EL2:
	+ *
	+ * SCTLR_EL2_M: MMU on
	+ * SCTLR_EL2_C: Data cacheability not affected
	+ * SCTLR_EL2_I: Instruction cacheability not affected
	+ * SCTLR_EL2_A: Instruction alignment check
	+ * SCTLR_EL2_SA: Stack pointer alignment check
	+ * SCTLR_EL2_WXN: Treat writable memory as execute never
	+ * ~SCTLR_EL2_EE: Data accesses are little-endian
	+ */
	+ sctlr_el2 = SCTLR_EL2_RES1;
	+ sctlr_el2 \|= SCTLR_EL2_M \| SCTLR_EL2_C \| SCTLR_EL2_I;
	+ sctlr_el2 \|= SCTLR_EL2_A \| SCTLR_EL2_SA;
	+ sctlr_el2 \|= SCTLR_EL2_WXN;
	+ sctlr_el2 &= ~SCTLR_EL2_EE;
	+
	+ /* Special call to initialize EL2 */
	+ vmm_call_hyp(vmmpmap_to_ttbr0(), stack_top, el2_regs->tcr_el2,
	+ sctlr_el2, el2_regs->vtcr_el2);
	+
	+ intr_restore(daif);
	+}
	+
	+static void
	+arm_teardown_vectors(void *arg)
	+{
	+ register_t daif;
	+
	+ /*
	+ * vmm_cleanup() will disable the MMU. For the next few instructions,
	+ * before the hardware disables the MMU, one of the following is
	+ * possible:
	+ *
	+ * a. The instruction addresses are fetched with the MMU disabled,
	+ * and they must represent the actual physical addresses. This will work
	+ * because we call the vmm_cleanup() function by its physical address.
	+ *
	+ * b. The instruction addresses are fetched using the old translation
	+ * tables. This will work because we have an identity mapping in place
	+ * in the translation tables and vmm_cleanup() is called by its physical
	+ * address.
	+ */
	+ daif = intr_disable();
	+ /* TODO: Invalidate the cache */
	+ vmm_call_hyp(HYP_CLEANUP, vtophys(hyp_stub_vectors));
	+ intr_restore(daif);
	+
	+ arm64_set_active_vcpu(NULL);
	+}
	+
	+static uint64_t
	+vmm_vtcr_el2_sl(u_int levels)
	+{
	+#if PAGE_SIZE == PAGE_SIZE_4K
	+ switch (levels) {
	+ case 2:
	+ return (VTCR_EL2_SL0_4K_LVL2);
	+ case 3:
	+ return (VTCR_EL2_SL0_4K_LVL1);
	+ case 4:
	+ return (VTCR_EL2_SL0_4K_LVL0);
	+ default:
	+ panic("%s: Invalid number of page table levels %u", __func__,
	+ levels);
	+ }
	+#elif PAGE_SIZE == PAGE_SIZE_16K
	+ switch (levels) {
	+ case 2:
	+ return (VTCR_EL2_SL0_16K_LVL2);
	+ case 3:
	+ return (VTCR_EL2_SL0_16K_LVL1);
	+ case 4:
	+ return (VTCR_EL2_SL0_16K_LVL0);
	+ default:
	+ panic("%s: Invalid number of page table levels %u", __func__,
	+ levels);
	+ }
	+#else
	+#error Unsupported page size
	+#endif
	+}
	+
	+int
	+vmmops_modinit(int ipinum)
	+{
	+ struct vmm_init_regs el2_regs;
	+ vm_offset_t next_hyp_va;
	+ vm_paddr_t vmm_base;
	+ uint64_t id_aa64mmfr0_el1, pa_range_bits, pa_range_field;
	+ uint64_t cnthctl_el2;
	+ register_t daif;
	+ int cpu, i;
	+ bool rv __diagused;
	+
	+ if (!virt_enabled()) {
	+ printf(
	+ "vmm: Processor doesn't have support for virtualization\n");
	+ return (ENXIO);
	+ }
	+
	+ /* TODO: Support VHE */
	+ if (in_vhe()) {
	+ printf("vmm: VHE is unsupported\n");
	+ return (ENXIO);
	+ }
	+
	+ if (!vgic_present()) {
	+ printf("vmm: No vgic found\n");
	+ return (ENODEV);
	+ }
	+
	+ if (!get_kernel_reg(ID_AA64MMFR0_EL1, &id_aa64mmfr0_el1)) {
	+ printf("vmm: Unable to read ID_AA64MMFR0_EL1\n");
	+ return (ENXIO);
	+ }
	+ pa_range_field = ID_AA64MMFR0_PARange_VAL(id_aa64mmfr0_el1);
	+ /*
	+ * Use 3 levels to give us up to 39 bits with 4k pages, or
	+ * 47 bits with 16k pages.
	+ */
	+ /* TODO: Check the number of levels for 64k pages */
	+ vmm_pmap_levels = 3;
	+ switch (pa_range_field) {
	+ case ID_AA64MMFR0_PARange_4G:
	+ printf("vmm: Not enough physical address bits\n");
	+ return (ENXIO);
	+ case ID_AA64MMFR0_PARange_64G:
	+ vmm_virt_bits = 36;
	+#if PAGE_SIZE == PAGE_SIZE_16K
	+ vmm_pmap_levels = 2;
	+#endif
	+ break;
	+ default:
	+ vmm_virt_bits = 39;
	+ break;
	+ }
	+ pa_range_bits = pa_range_field >> ID_AA64MMFR0_PARange_SHIFT;
	+
	+ /* Initialise the EL2 MMU */
	+ if (!vmmpmap_init()) {
	+ printf("vmm: Failed to init the EL2 MMU\n");
	+ return (ENOMEM);
	+ }
	+
	+ /* Set up the stage 2 pmap callbacks */
	+ MPASS(pmap_clean_stage2_tlbi == NULL);
	+ pmap_clean_stage2_tlbi = vmm_pmap_clean_stage2_tlbi;
	+ pmap_stage2_invalidate_range = vmm_pmap_invalidate_range;
	+ pmap_stage2_invalidate_all = vmm_pmap_invalidate_all;
	+
	+ /*
	+ * Create an allocator for the virtual address space used by EL2.
	+ * EL2 code is identity-mapped; the allocator is used to find space for
	+ * VM structures.
	+ */
	+ el2_mem_alloc = vmem_create("VMM EL2", 0, 0, PAGE_SIZE, 0, M_WAITOK);
	+
	+ /* Create the mappings for the hypervisor translation table. */
	+ hyp_code_len = round_page(&vmm_hyp_code_end - &vmm_hyp_code);
	+
	+ /* We need an physical identity mapping for when we activate the MMU */
	+ hyp_code_base = vmm_base = vtophys(&vmm_hyp_code);
	+ rv = vmmpmap_enter(vmm_base, hyp_code_len, vmm_base,
	+ VM_PROT_READ \| VM_PROT_EXECUTE);
	+ MPASS(rv);
	+
	+ next_hyp_va = roundup2(vmm_base + hyp_code_len, L2_SIZE);
	+
	+ /* Create a per-CPU hypervisor stack */
	+ CPU_FOREACH(cpu) {
	+ stack[cpu] = malloc(VMM_STACK_SIZE, M_HYP, M_WAITOK \| M_ZERO);
	+ stack_hyp_va[cpu] = next_hyp_va;
	+
	+ for (i = 0; i < VMM_STACK_PAGES; i++) {
	+ rv = vmmpmap_enter(stack_hyp_va[cpu] + ptoa(i),
	+ PAGE_SIZE, vtophys(stack[cpu] + ptoa(i)),
	+ VM_PROT_READ \| VM_PROT_WRITE);
	+ MPASS(rv);
	+ }
	+ next_hyp_va += L2_SIZE;
	+ }
	+
	+ el2_regs.tcr_el2 = TCR_EL2_RES1;
	+ el2_regs.tcr_el2 \|= min(pa_range_bits << TCR_EL2_PS_SHIFT,
	+ TCR_EL2_PS_52BITS);
	+ el2_regs.tcr_el2 \|= TCR_EL2_T0SZ(64 - EL2_VIRT_BITS);
	+ el2_regs.tcr_el2 \|= TCR_EL2_IRGN0_WBWA \| TCR_EL2_ORGN0_WBWA;
	+#if PAGE_SIZE == PAGE_SIZE_4K
	+ el2_regs.tcr_el2 \|= TCR_EL2_TG0_4K;
	+#elif PAGE_SIZE == PAGE_SIZE_16K
	+ el2_regs.tcr_el2 \|= TCR_EL2_TG0_16K;
	+#else
	+#error Unsupported page size
	+#endif
	+#ifdef SMP
	+ el2_regs.tcr_el2 \|= TCR_EL2_SH0_IS;
	+#endif
	+
	+ switch (el2_regs.tcr_el2 & TCR_EL2_PS_MASK) {
	+ case TCR_EL2_PS_32BITS:
	+ vmm_max_ipa_bits = 32;
	+ break;
	+ case TCR_EL2_PS_36BITS:
	+ vmm_max_ipa_bits = 36;
	+ break;
	+ case TCR_EL2_PS_40BITS:
	+ vmm_max_ipa_bits = 40;
	+ break;
	+ case TCR_EL2_PS_42BITS:
	+ vmm_max_ipa_bits = 42;
	+ break;
	+ case TCR_EL2_PS_44BITS:
	+ vmm_max_ipa_bits = 44;
	+ break;
	+ case TCR_EL2_PS_48BITS:
	+ vmm_max_ipa_bits = 48;
	+ break;
	+ case TCR_EL2_PS_52BITS:
	+ default:
	+ vmm_max_ipa_bits = 52;
	+ break;
	+ }
	+
	+ /*
	+ * Configure the Stage 2 translation control register:
	+ *
	+ * VTCR_IRGN0_WBWA: Translation table walks access inner cacheable
	+ * normal memory
	+ * VTCR_ORGN0_WBWA: Translation table walks access outer cacheable
	+ * normal memory
	+ * VTCR_EL2_TG0_4K/16K: Stage 2 uses the same page size as the kernel
	+ * VTCR_EL2_SL0_4K_LVL1: Stage 2 uses concatenated level 1 tables
	+ * VTCR_EL2_SH0_IS: Memory associated with Stage 2 walks is inner
	+ * shareable
	+ */
	+ el2_regs.vtcr_el2 = VTCR_EL2_RES1;
	+ el2_regs.vtcr_el2 \|=
	+ min(pa_range_bits << VTCR_EL2_PS_SHIFT, VTCR_EL2_PS_48BIT);
	+ el2_regs.vtcr_el2 \|= VTCR_EL2_IRGN0_WBWA \| VTCR_EL2_ORGN0_WBWA;
	+ el2_regs.vtcr_el2 \|= VTCR_EL2_T0SZ(64 - vmm_virt_bits);
	+ el2_regs.vtcr_el2 \|= vmm_vtcr_el2_sl(vmm_pmap_levels);
	+#if PAGE_SIZE == PAGE_SIZE_4K
	+ el2_regs.vtcr_el2 \|= VTCR_EL2_TG0_4K;
	+#elif PAGE_SIZE == PAGE_SIZE_16K
	+ el2_regs.vtcr_el2 \|= VTCR_EL2_TG0_16K;
	+#else
	+#error Unsupported page size
	+#endif
	+#ifdef SMP
	+ el2_regs.vtcr_el2 \|= VTCR_EL2_SH0_IS;
	+#endif
	+
	+ smp_rendezvous(NULL, arm_setup_vectors, NULL, &el2_regs);
	+
	+ /* Add memory to the vmem allocator (checking there is space) */
	+ if (vmm_base > (L2_SIZE + PAGE_SIZE)) {
	+ /*
	+ * Ensure there is an L2 block before the vmm code to check
	+ * for buffer overflows on earlier data. Include the PAGE_SIZE
	+ * of the minimum we can allocate.
	+ */
	+ vmm_base -= L2_SIZE + PAGE_SIZE;
	+ vmm_base = rounddown2(vmm_base, L2_SIZE);
	+
	+ /*
	+ * Check there is memory before the vmm code to add.
	+ *
	+ * Reserve the L2 block at address 0 so NULL dereference will
	+ * raise an exception.
	+ */
	+ if (vmm_base > L2_SIZE)
	+ vmem_add(el2_mem_alloc, L2_SIZE, vmm_base - L2_SIZE,
	+ M_WAITOK);
	+ }
	+
	+ /*
	+ * Add the memory after the stacks. There is most of an L2 block
	+ * between the last stack and the first allocation so this should
	+ * be safe without adding more padding.
	+ */
	+ if (next_hyp_va < HYP_VM_MAX_ADDRESS - PAGE_SIZE)
	+ vmem_add(el2_mem_alloc, next_hyp_va,
	+ HYP_VM_MAX_ADDRESS - next_hyp_va, M_WAITOK);
	+
	+ daif = intr_disable();
	+ cnthctl_el2 = vmm_call_hyp(HYP_READ_REGISTER, HYP_REG_CNTHCTL);
	+ intr_restore(daif);
	+
	+ vgic_init();
	+ vtimer_init(cnthctl_el2);
	+
	+ return (0);
	+}
	+
	+int
	+vmmops_modcleanup(void)
	+{
	+ int cpu;
	+
	+ smp_rendezvous(NULL, arm_teardown_vectors, NULL, NULL);
	+
	+ CPU_FOREACH(cpu) {
	+ vmmpmap_remove(stack_hyp_va[cpu], VMM_STACK_PAGES * PAGE_SIZE,
	+ false);
	+ }
	+
	+ vmmpmap_remove(hyp_code_base, hyp_code_len, false);
	+
	+ vtimer_cleanup();
	+
	+ vmmpmap_fini();
	+
	+ CPU_FOREACH(cpu)
	+ free(stack[cpu], M_HYP);
	+
	+ pmap_clean_stage2_tlbi = NULL;
	+ pmap_stage2_invalidate_range = NULL;
	+ pmap_stage2_invalidate_all = NULL;
	+
	+ return (0);
	+}
	+
	+static vm_size_t
	+el2_hyp_size(struct vm *vm)
	+{
	+ return (round_page(sizeof(struct hyp) +
	+ sizeof(struct hypctx ) vm_get_maxcpus(vm)));
	+}
	+
	+static vm_size_t
	+el2_hypctx_size(void)
	+{
	+ return (round_page(sizeof(struct hypctx)));
	+}
	+
	+static vm_offset_t
	+el2_map_enter(vm_offset_t data, vm_size_t size, vm_prot_t prot)
	+{
	+ vmem_addr_t addr;
	+ int err __diagused;
	+ bool rv __diagused;
	+
	+ err = vmem_alloc(el2_mem_alloc, size, M_NEXTFIT \| M_WAITOK, &addr);
	+ MPASS(err == 0);
	+ rv = vmmpmap_enter(addr, size, vtophys(data), prot);
	+ MPASS(rv);
	+
	+ return (addr);
	+}
	+
	+void *
	+vmmops_init(struct vm *vm, pmap_t pmap)
	+{
	+ struct hyp *hyp;
	+ vm_size_t size;
	+
	+ size = el2_hyp_size(vm);
	+ hyp = malloc_aligned(size, PAGE_SIZE, M_HYP, M_WAITOK \| M_ZERO);
	+
	+ hyp->vm = vm;
	+ hyp->vgic_attached = false;
	+
	+ vtimer_vminit(hyp);
	+ vgic_vminit(hyp);
	+
	+ hyp->el2_addr = el2_map_enter((vm_offset_t)hyp, size,
	+ VM_PROT_READ \| VM_PROT_WRITE);
	+
	+ return (hyp);
	+}
	+
	+void *
	+vmmops_vcpu_init(void vmi, struct vcpu vcpu1, int vcpuid)
	+{
	+ struct hyp *hyp = vmi;
	+ struct hypctx *hypctx;
	+ vm_size_t size;
	+
	+ size = el2_hypctx_size();
	+ hypctx = malloc_aligned(size, PAGE_SIZE, M_HYP, M_WAITOK \| M_ZERO);
	+
	+ KASSERT(vcpuid >= 0 && vcpuid < vm_get_maxcpus(hyp->vm),
	+ ("%s: Invalid vcpuid %d", __func__, vcpuid));
	+ hyp->ctx[vcpuid] = hypctx;
	+
	+ hypctx->hyp = hyp;
	+ hypctx->vcpu = vcpu1;
	+
	+ reset_vm_el01_regs(hypctx);
	+ reset_vm_el2_regs(hypctx);
	+
	+ vtimer_cpuinit(hypctx);
	+ vgic_cpuinit(hypctx);
	+
	+ hypctx->el2_addr = el2_map_enter((vm_offset_t)hypctx, size,
	+ VM_PROT_READ \| VM_PROT_WRITE);
	+
	+ return (hypctx);
	+}
	+
	+static int
	+arm_vmm_pinit(pmap_t pmap)
	+{
	+
	+ pmap_pinit_stage(pmap, PM_STAGE2, vmm_pmap_levels);
	+ return (1);
	+}
	+
	+struct vmspace *
	+vmmops_vmspace_alloc(vm_offset_t min, vm_offset_t max)
	+{
	+ return (vmspace_alloc(min, max, arm_vmm_pinit));
	+}
	+
	+void
	+vmmops_vmspace_free(struct vmspace *vmspace)
	+{
	+
	+ pmap_remove_pages(vmspace_pmap(vmspace));
	+ vmspace_free(vmspace);
	+}
	+
	+static void
	+vmm_pmap_clean_stage2_tlbi(void)
	+{
	+ vmm_call_hyp(HYP_CLEAN_S2_TLBI);
	+}
	+
	+static void
	+vmm_pmap_invalidate_range(uint64_t vttbr, vm_offset_t sva, vm_offset_t eva,
	+ bool final_only)
	+{
	+ MPASS(eva > sva);
	+ vmm_call_hyp(HYP_S2_TLBI_RANGE, vttbr, sva, eva, final_only);
	+}
	+
	+static void
	+vmm_pmap_invalidate_all(uint64_t vttbr)
	+{
	+ vmm_call_hyp(HYP_S2_TLBI_ALL, vttbr);
	+}
	+
	+static inline void
	+arm64_print_hyp_regs(struct vm_exit *vme)
	+{
	+ printf("esr_el2: 0x%016lx\n", vme->u.hyp.esr_el2);
	+ printf("far_el2: 0x%016lx\n", vme->u.hyp.far_el2);
	+ printf("hpfar_el2: 0x%016lx\n", vme->u.hyp.hpfar_el2);
	+ printf("elr_el2: 0x%016lx\n", vme->pc);
	+}
	+
	+static void
	+arm64_gen_inst_emul_data(struct hypctx *hypctx, uint32_t esr_iss,
	+ struct vm_exit *vme_ret)
	+{
	+ struct vm_guest_paging *paging;
	+ struct vie *vie;
	+ uint32_t esr_sas, reg_num;
	+
	+ /*
	+ * Get the page address from HPFAR_EL2.
	+ */
	+ vme_ret->u.inst_emul.gpa =
	+ HPFAR_EL2_FIPA_ADDR(hypctx->exit_info.hpfar_el2);
	+ /* Bits [11:0] are the same as bits [11:0] from the virtual address. */
	+ vme_ret->u.inst_emul.gpa += hypctx->exit_info.far_el2 &
	+ FAR_EL2_HPFAR_PAGE_MASK;
	+
	+ esr_sas = (esr_iss & ISS_DATA_SAS_MASK) >> ISS_DATA_SAS_SHIFT;
	+ reg_num = (esr_iss & ISS_DATA_SRT_MASK) >> ISS_DATA_SRT_SHIFT;
	+
	+ vie = &vme_ret->u.inst_emul.vie;
	+ vie->access_size = 1 << esr_sas;
	+ vie->sign_extend = (esr_iss & ISS_DATA_SSE) ? 1 : 0;
	+ vie->dir = (esr_iss & ISS_DATA_WnR) ? VM_DIR_WRITE : VM_DIR_READ;
	+ vie->reg = reg_num;
	+
	+ paging = &vme_ret->u.inst_emul.paging;
	+ paging->ttbr0_addr = hypctx->ttbr0_el1 & ~(TTBR_ASID_MASK \| TTBR_CnP);
	+ paging->ttbr1_addr = hypctx->ttbr1_el1 & ~(TTBR_ASID_MASK \| TTBR_CnP);
	+ paging->tcr_el1 = hypctx->tcr_el1;
	+ paging->tcr2_el1 = hypctx->tcr2_el1;
	+ paging->flags = hypctx->tf.tf_spsr & (PSR_M_MASK \| PSR_M_32);
	+ if ((hypctx->sctlr_el1 & SCTLR_M) != 0)
	+ paging->flags \|= VM_GP_MMU_ENABLED;
	+}
	+
	+static void
	+arm64_gen_reg_emul_data(uint32_t esr_iss, struct vm_exit *vme_ret)
	+{
	+ uint32_t reg_num;
	+ struct vre *vre;
	+
	+ /* u.hyp member will be replaced by u.reg_emul */
	+ vre = &vme_ret->u.reg_emul.vre;
	+
	+ vre->inst_syndrome = esr_iss;
	+ /* ARMv8 Architecture Manual, p. D7-2273: 1 means read */
	+ vre->dir = (esr_iss & ISS_MSR_DIR) ? VM_DIR_READ : VM_DIR_WRITE;
	+ reg_num = ISS_MSR_Rt(esr_iss);
	+ vre->reg = reg_num;
	+}
	+
	+void
	+raise_data_insn_abort(struct hypctx *hypctx, uint64_t far, bool dabort, int fsc)
	+{
	+ uint64_t esr;
	+
	+ if ((hypctx->tf.tf_spsr & PSR_M_MASK) == PSR_M_EL0t)
	+ esr = EXCP_INSN_ABORT_L << ESR_ELx_EC_SHIFT;
	+ else
	+ esr = EXCP_INSN_ABORT << ESR_ELx_EC_SHIFT;
	+ /* Set the bit that changes from insn -> data abort */
	+ if (dabort)
	+ esr \|= EXCP_DATA_ABORT_L << ESR_ELx_EC_SHIFT;
	+ /* Set the IL bit if set by hardware */
	+ esr \|= hypctx->tf.tf_esr & ESR_ELx_IL;
	+
	+ vmmops_exception(hypctx, esr \| fsc, far);
	+}
	+
	+static int
	+handle_el1_sync_excp(struct hypctx hypctx, struct vm_exit vme_ret,
	+ pmap_t pmap)
	+{
	+ uint64_t gpa;
	+ uint32_t esr_ec, esr_iss;
	+
	+ esr_ec = ESR_ELx_EXCEPTION(hypctx->tf.tf_esr);
	+ esr_iss = hypctx->tf.tf_esr & ESR_ELx_ISS_MASK;
	+
	+ switch (esr_ec) {
	+ case EXCP_UNKNOWN:
	+ vmm_stat_incr(hypctx->vcpu, VMEXIT_UNKNOWN, 1);
	+ arm64_print_hyp_regs(vme_ret);
	+ vme_ret->exitcode = VM_EXITCODE_HYP;
	+ break;
	+ case EXCP_TRAP_WFI_WFE:
	+ if ((hypctx->tf.tf_esr & 0x3) == 0) { /* WFI */
	+ vmm_stat_incr(hypctx->vcpu, VMEXIT_WFI, 1);
	+ vme_ret->exitcode = VM_EXITCODE_WFI;
	+ } else {
	+ vmm_stat_incr(hypctx->vcpu, VMEXIT_WFE, 1);
	+ vme_ret->exitcode = VM_EXITCODE_HYP;
	+ }
	+ break;
	+ case EXCP_HVC:
	+ vmm_stat_incr(hypctx->vcpu, VMEXIT_HVC, 1);
	+ vme_ret->exitcode = VM_EXITCODE_HVC;
	+ break;
	+ case EXCP_MSR:
	+ vmm_stat_incr(hypctx->vcpu, VMEXIT_MSR, 1);
	+ arm64_gen_reg_emul_data(esr_iss, vme_ret);
	+ vme_ret->exitcode = VM_EXITCODE_REG_EMUL;
	+ break;
	+
	+ case EXCP_INSN_ABORT_L:
	+ case EXCP_DATA_ABORT_L:
	+ vmm_stat_incr(hypctx->vcpu, esr_ec == EXCP_DATA_ABORT_L ?
	+ VMEXIT_DATA_ABORT : VMEXIT_INSN_ABORT, 1);
	+ switch (hypctx->tf.tf_esr & ISS_DATA_DFSC_MASK) {
	+ case ISS_DATA_DFSC_TF_L0:
	+ case ISS_DATA_DFSC_TF_L1:
	+ case ISS_DATA_DFSC_TF_L2:
	+ case ISS_DATA_DFSC_TF_L3:
	+ case ISS_DATA_DFSC_AFF_L1:
	+ case ISS_DATA_DFSC_AFF_L2:
	+ case ISS_DATA_DFSC_AFF_L3:
	+ case ISS_DATA_DFSC_PF_L1:
	+ case ISS_DATA_DFSC_PF_L2:
	+ case ISS_DATA_DFSC_PF_L3:
	+ gpa = HPFAR_EL2_FIPA_ADDR(hypctx->exit_info.hpfar_el2);
	+ /* Check the IPA is valid */
	+ if (gpa >= (1ul << vmm_max_ipa_bits)) {
	+ raise_data_insn_abort(hypctx,
	+ hypctx->exit_info.far_el2,
	+ esr_ec == EXCP_DATA_ABORT_L,
	+ ISS_DATA_DFSC_ASF_L0);
	+ vme_ret->inst_length = 0;
	+ return (HANDLED);
	+ }
	+
	+ if (vm_mem_allocated(hypctx->vcpu, gpa)) {
	+ vme_ret->exitcode = VM_EXITCODE_PAGING;
	+ vme_ret->inst_length = 0;
	+ vme_ret->u.paging.esr = hypctx->tf.tf_esr;
	+ vme_ret->u.paging.gpa = gpa;
	+ } else if (esr_ec == EXCP_INSN_ABORT_L) {
	+ /*
	+ * Raise an external abort. Device memory is
	+ * not executable
	+ */
	+ raise_data_insn_abort(hypctx,
	+ hypctx->exit_info.far_el2, false,
	+ ISS_DATA_DFSC_EXT);
	+ vme_ret->inst_length = 0;
	+ return (HANDLED);
	+ } else {
	+ arm64_gen_inst_emul_data(hypctx, esr_iss,
	+ vme_ret);
	+ vme_ret->exitcode = VM_EXITCODE_INST_EMUL;
	+ }
	+ break;
	+ default:
	+ arm64_print_hyp_regs(vme_ret);
	+ vme_ret->exitcode = VM_EXITCODE_HYP;
	+ break;
	+ }
	+
	+ break;
	+
	+ default:
	+ vmm_stat_incr(hypctx->vcpu, VMEXIT_UNHANDLED_SYNC, 1);
	+ arm64_print_hyp_regs(vme_ret);
	+ vme_ret->exitcode = VM_EXITCODE_HYP;
	+ break;
	+ }
	+
	+ /* We don't don't do any instruction emulation here */
	+ return (UNHANDLED);
	+}
	+
	+static int
	+arm64_handle_world_switch(struct hypctx *hypctx, int excp_type,
	+ struct vm_exit *vme, pmap_t pmap)
	+{
	+ int handled;
	+
	+ switch (excp_type) {
	+ case EXCP_TYPE_EL1_SYNC:
	+ /* The exit code will be set by handle_el1_sync_excp(). */
	+ handled = handle_el1_sync_excp(hypctx, vme, pmap);
	+ break;
	+
	+ case EXCP_TYPE_EL1_IRQ:
	+ case EXCP_TYPE_EL1_FIQ:
	+ /* The host kernel will handle IRQs and FIQs. */
	+ vmm_stat_incr(hypctx->vcpu,
	+ excp_type == EXCP_TYPE_EL1_IRQ ? VMEXIT_IRQ : VMEXIT_FIQ,1);
	+ vme->exitcode = VM_EXITCODE_BOGUS;
	+ handled = UNHANDLED;
	+ break;
	+
	+ case EXCP_TYPE_EL1_ERROR:
	+ case EXCP_TYPE_EL2_SYNC:
	+ case EXCP_TYPE_EL2_IRQ:
	+ case EXCP_TYPE_EL2_FIQ:
	+ case EXCP_TYPE_EL2_ERROR:
	+ vmm_stat_incr(hypctx->vcpu, VMEXIT_UNHANDLED_EL2, 1);
	+ vme->exitcode = VM_EXITCODE_BOGUS;
	+ handled = UNHANDLED;
	+ break;
	+
	+ default:
	+ vmm_stat_incr(hypctx->vcpu, VMEXIT_UNHANDLED, 1);
	+ vme->exitcode = VM_EXITCODE_BOGUS;
	+ handled = UNHANDLED;
	+ break;
	+ }
	+
	+ return (handled);
	+}
	+
	+static void
	+ptp_release(void **cookie)
	+{
	+ if (*cookie != NULL) {
	+ vm_gpa_release(*cookie);
	+ *cookie = NULL;
	+ }
	+}
	+
	+static void *
	+ptp_hold(struct vcpu vcpu, vm_paddr_t ptpphys, size_t len, void *cookie)
	+{
	+ void *ptr;
	+
	+ ptp_release(cookie);
	+ ptr = vm_gpa_hold(vcpu, ptpphys, len, VM_PROT_RW, cookie);
	+ return (ptr);
	+}
	+
	+/* log2 of the number of bytes in a page table entry */
	+#define PTE_SHIFT 3
	+int
	+vmmops_gla2gpa(void vcpui, struct vm_guest_paging paging, uint64_t gla,
	+ int prot, uint64_t gpa, int is_fault)
	+{
	+ struct hypctx *hypctx;
	+ void *cookie;
	+ uint64_t mask, *ptep, pte, pte_addr;
	+ int address_bits, granule_shift, ia_bits, levels, pte_shift, tsz;
	+ bool is_el0;
	+
	+ /* Check if the MMU is off */
	+ if ((paging->flags & VM_GP_MMU_ENABLED) == 0) {
	+ *is_fault = 0;
	+ *gpa = gla;
	+ return (0);
	+ }
	+
	+ is_el0 = (paging->flags & PSR_M_MASK) == PSR_M_EL0t;
	+
	+ if (ADDR_IS_KERNEL(gla)) {
	+ /* If address translation is disabled raise an exception */
	+ if ((paging->tcr_el1 & TCR_EPD1) != 0) {
	+ *is_fault = 1;
	+ return (0);
	+ }
	+ if (is_el0 && (paging->tcr_el1 & TCR_E0PD1) != 0) {
	+ *is_fault = 1;
	+ return (0);
	+ }
	+ pte_addr = paging->ttbr1_addr;
	+ tsz = (paging->tcr_el1 & TCR_T1SZ_MASK) >> TCR_T1SZ_SHIFT;
	+ /* Clear the top byte if TBI is on */
	+ if ((paging->tcr_el1 & TCR_TBI1) != 0)
	+ gla \|= (0xfful << 56);
	+ switch (paging->tcr_el1 & TCR_TG1_MASK) {
	+ case TCR_TG1_4K:
	+ granule_shift = PAGE_SHIFT_4K;
	+ break;
	+ case TCR_TG1_16K:
	+ granule_shift = PAGE_SHIFT_16K;
	+ break;
	+ case TCR_TG1_64K:
	+ granule_shift = PAGE_SHIFT_64K;
	+ break;
	+ default:
	+ *is_fault = 1;
	+ return (EINVAL);
	+ }
	+ } else {
	+ /* If address translation is disabled raise an exception */
	+ if ((paging->tcr_el1 & TCR_EPD0) != 0) {
	+ *is_fault = 1;
	+ return (0);
	+ }
	+ if (is_el0 && (paging->tcr_el1 & TCR_E0PD0) != 0) {
	+ *is_fault = 1;
	+ return (0);
	+ }
	+ pte_addr = paging->ttbr0_addr;
	+ tsz = (paging->tcr_el1 & TCR_T0SZ_MASK) >> TCR_T0SZ_SHIFT;
	+ /* Clear the top byte if TBI is on */
	+ if ((paging->tcr_el1 & TCR_TBI0) != 0)
	+ gla &= ~(0xfful << 56);
	+ switch (paging->tcr_el1 & TCR_TG0_MASK) {
	+ case TCR_TG0_4K:
	+ granule_shift = PAGE_SHIFT_4K;
	+ break;
	+ case TCR_TG0_16K:
	+ granule_shift = PAGE_SHIFT_16K;
	+ break;
	+ case TCR_TG0_64K:
	+ granule_shift = PAGE_SHIFT_64K;
	+ break;
	+ default:
	+ *is_fault = 1;
	+ return (EINVAL);
	+ }
	+ }
	+
	+ /*
	+ * TODO: Support FEAT_TTST for smaller tsz values and FEAT_LPA2
	+ * for larger values.
	+ */
	+ switch (granule_shift) {
	+ case PAGE_SHIFT_4K:
	+ case PAGE_SHIFT_16K:
	+ /*
	+ * See "Table D8-11 4KB granule, determining stage 1 initial
	+ * lookup level" and "Table D8-21 16KB granule, determining
	+ * stage 1 initial lookup level" from the "Arm Architecture
	+ * Reference Manual for A-Profile architecture" revision I.a
	+ * for the minimum and maximum values.
	+ *
	+ * TODO: Support less than 16 when FEAT_LPA2 is implemented
	+ * and TCR_EL1.DS == 1
	+ * TODO: Support more than 39 when FEAT_TTST is implemented
	+ */
	+ if (tsz < 16 \|\| tsz > 39) {
	+ *is_fault = 1;
	+ return (EINVAL);
	+ }
	+ break;
	+ case PAGE_SHIFT_64K:
	+ /* TODO: Support 64k granule. It will probably work, but is untested */
	+ default:
	+ *is_fault = 1;
	+ return (EINVAL);
	+ }
	+
	+ /*
	+ * Calculate the input address bits. These are 64 bit in an address
	+ * with the top tsz bits being all 0 or all 1.
	+ */
	+ ia_bits = 64 - tsz;
	+
	+ /*
	+ * Calculate the number of address bits used in the page table
	+ * calculation. This is ia_bits minus the bottom granule_shift
	+ * bits that are passed to the output address.
	+ */
	+ address_bits = ia_bits - granule_shift;
	+
	+ /*
	+ * Calculate the number of levels. Each level uses
	+ * granule_shift - PTE_SHIFT bits of the input address.
	+ * This is because the table is 1 << granule_shift and each
	+ * entry is 1 << PTE_SHIFT bytes.
	+ */
	+ levels = howmany(address_bits, granule_shift - PTE_SHIFT);
	+
	+ /* Mask of the upper unused bits in the virtual address */
	+ gla &= (1ul << ia_bits) - 1;
	+ hypctx = (struct hypctx *)vcpui;
	+ cookie = NULL;
	+ /* TODO: Check if the level supports block descriptors */
	+ for (;levels > 0; levels--) {
	+ int idx;
	+
	+ pte_shift = (levels - 1) * (granule_shift - PTE_SHIFT) +
	+ granule_shift;
	+ idx = (gla >> pte_shift) &
	+ ((1ul << (granule_shift - PTE_SHIFT)) - 1);
	+ while (idx > PAGE_SIZE / sizeof(pte)) {
	+ idx -= PAGE_SIZE / sizeof(pte);
	+ pte_addr += PAGE_SIZE;
	+ }
	+
	+ ptep = ptp_hold(hypctx->vcpu, pte_addr, PAGE_SIZE, &cookie);
	+ if (ptep == NULL)
	+ goto error;
	+ pte = ptep[idx];
	+
	+ /* Calculate the level we are looking at */
	+ switch (levels) {
	+ default:
	+ goto fault;
	+ /* TODO: Level -1 when FEAT_LPA2 is implemented */
	+ case 4: /* Level 0 */
	+ if ((pte & ATTR_DESCR_MASK) != L0_TABLE)
	+ goto fault;
	+ /* FALLTHROUGH */
	+ case 3: /* Level 1 */
	+ case 2: /* Level 2 */
	+ switch (pte & ATTR_DESCR_MASK) {
	+ /* Use L1 macro as all levels are the same */
	+ case L1_TABLE:
	+ /* Check if EL0 can access this address space */
	+ if (is_el0 &&
	+ (pte & TATTR_AP_TABLE_NO_EL0) != 0)
	+ goto fault;
	+ /* Check if the address space is writable */
	+ if ((prot & PROT_WRITE) != 0 &&
	+ (pte & TATTR_AP_TABLE_RO) != 0)
	+ goto fault;
	+ if ((prot & PROT_EXEC) != 0) {
	+ /* Check the table exec attribute */
	+ if ((is_el0 &&
	+ (pte & TATTR_UXN_TABLE) != 0) \|\|
	+ (!is_el0 &&
	+ (pte & TATTR_PXN_TABLE) != 0))
	+ goto fault;
	+ }
	+ pte_addr = pte & ~ATTR_MASK;
	+ break;
	+ case L1_BLOCK:
	+ goto done;
	+ default:
	+ goto fault;
	+ }
	+ break;
	+ case 1: /* Level 3 */
	+ if ((pte & ATTR_DESCR_MASK) == L3_PAGE)
	+ goto done;
	+ goto fault;
	+ }
	+ }
	+
	+done:
	+ /* Check if EL0 has access to the block/page */
	+ if (is_el0 && (pte & ATTR_S1_AP(ATTR_S1_AP_USER)) == 0)
	+ goto fault;
	+ if ((prot & PROT_WRITE) != 0 && (pte & ATTR_S1_AP_RW_BIT) != 0)
	+ goto fault;
	+ if ((prot & PROT_EXEC) != 0) {
	+ if ((is_el0 && (pte & ATTR_S1_UXN) != 0) \|\|
	+ (!is_el0 && (pte & ATTR_S1_PXN) != 0))
	+ goto fault;
	+ }
	+ mask = (1ul << pte_shift) - 1;
	+ *gpa = (pte & ~ATTR_MASK) \| (gla & mask);
	+ *is_fault = 0;
	+ ptp_release(&cookie);
	+ return (0);
	+
	+error:
	+ ptp_release(&cookie);
	+ return (EFAULT);
	+fault:
	+ *is_fault = 1;
	+ ptp_release(&cookie);
	+ return (0);
	+}
	+
	+int
	+vmmops_run(void vcpui, register_t pc, pmap_t pmap, struct vm_eventinfo evinfo)
	+{
	+ uint64_t excp_type;
	+ int handled;
	+ register_t daif;
	+ struct hyp *hyp;
	+ struct hypctx *hypctx;
	+ struct vcpu *vcpu;
	+ struct vm_exit *vme;
	+ int mode;
	+
	+ hypctx = (struct hypctx *)vcpui;
	+ hyp = hypctx->hyp;
	+ vcpu = hypctx->vcpu;
	+ vme = vm_exitinfo(vcpu);
	+
	+ hypctx->tf.tf_elr = (uint64_t)pc;
	+
	+ for (;;) {
	+ if (hypctx->has_exception) {
	+ hypctx->has_exception = false;
	+ hypctx->elr_el1 = hypctx->tf.tf_elr;
	+
	+ mode = hypctx->tf.tf_spsr & (PSR_M_MASK \| PSR_M_32);
	+
	+ if (mode == PSR_M_EL1t) {
	+ hypctx->tf.tf_elr = hypctx->vbar_el1 + 0x0;
	+ } else if (mode == PSR_M_EL1h) {
	+ hypctx->tf.tf_elr = hypctx->vbar_el1 + 0x200;
	+ } else if ((mode & PSR_M_32) == PSR_M_64) {
	+ /* 64-bit EL0 */
	+ hypctx->tf.tf_elr = hypctx->vbar_el1 + 0x400;
	+ } else {
	+ /* 32-bit EL0 */
	+ hypctx->tf.tf_elr = hypctx->vbar_el1 + 0x600;
	+ }
	+
	+ /* Set the new spsr */
	+ hypctx->spsr_el1 = hypctx->tf.tf_spsr;
	+
	+ /* Set the new cpsr */
	+ hypctx->tf.tf_spsr = hypctx->spsr_el1 & PSR_FLAGS;
	+ hypctx->tf.tf_spsr \|= PSR_DAIF \| PSR_M_EL1h;
	+
	+ /*
	+ * Update fields that may change on exeption entry
	+ * based on how sctlr_el1 is configured.
	+ */
	+ if ((hypctx->sctlr_el1 & SCTLR_SPAN) != 0)
	+ hypctx->tf.tf_spsr \|= PSR_PAN;
	+ if ((hypctx->sctlr_el1 & SCTLR_DSSBS) == 0)
	+ hypctx->tf.tf_spsr &= ~PSR_SSBS;
	+ else
	+ hypctx->tf.tf_spsr \|= PSR_SSBS;
	+ }
	+
	+ daif = intr_disable();
	+
	+ /* Check if the vcpu is suspended */
	+ if (vcpu_suspended(evinfo)) {
	+ intr_restore(daif);
	+ vm_exit_suspended(vcpu, pc);
	+ break;
	+ }
	+
	+ if (vcpu_debugged(vcpu)) {
	+ intr_restore(daif);
	+ vm_exit_debug(vcpu, pc);
	+ break;
	+ }
	+
	+ /* Activate the stage2 pmap so the vmid is valid */
	+ pmap_activate_vm(pmap);
	+ hyp->vttbr_el2 = pmap_to_ttbr0(pmap);
	+
	+ /*
	+ * TODO: What happens if a timer interrupt is asserted exactly
	+ * here, but for the previous VM?
	+ */
	+ arm64_set_active_vcpu(hypctx);
	+ vgic_flush_hwstate(hypctx);
	+
	+ /* Call into EL2 to switch to the guest */
	+ excp_type = vmm_call_hyp(HYP_ENTER_GUEST,
	+ hyp->el2_addr, hypctx->el2_addr);
	+
	+ vgic_sync_hwstate(hypctx);
	+ vtimer_sync_hwstate(hypctx);
	+
	+ /*
	+ * Deactivate the stage2 pmap. vmm_pmap_clean_stage2_tlbi
	+ * depends on this meaning we activate the VM before entering
	+ * the vm again
	+ */
	+ PCPU_SET(curvmpmap, NULL);
	+ intr_restore(daif);
	+
	+ vmm_stat_incr(vcpu, VMEXIT_COUNT, 1);
	+ if (excp_type == EXCP_TYPE_MAINT_IRQ)
	+ continue;
	+
	+ vme->pc = hypctx->tf.tf_elr;
	+ vme->inst_length = INSN_SIZE;
	+ vme->u.hyp.exception_nr = excp_type;
	+ vme->u.hyp.esr_el2 = hypctx->tf.tf_esr;
	+ vme->u.hyp.far_el2 = hypctx->exit_info.far_el2;
	+ vme->u.hyp.hpfar_el2 = hypctx->exit_info.hpfar_el2;
	+
	+ handled = arm64_handle_world_switch(hypctx, excp_type, vme,
	+ pmap);
	+ if (handled == UNHANDLED)
	+ /* Exit loop to emulate instruction. */
	+ break;
	+ else
	+ /* Resume guest execution from the next instruction. */
	+ hypctx->tf.tf_elr += vme->inst_length;
	+ }
	+
	+ return (0);
	+}
	+
	+static void
	+arm_pcpu_vmcleanup(void *arg)
	+{
	+ struct hyp *hyp;
	+ int i, maxcpus;
	+
	+ hyp = arg;
	+ maxcpus = vm_get_maxcpus(hyp->vm);
	+ for (i = 0; i < maxcpus; i++) {
	+ if (arm64_get_active_vcpu() == hyp->ctx[i]) {
	+ arm64_set_active_vcpu(NULL);
	+ break;
	+ }
	+ }
	+}
	+
	+void
	+vmmops_vcpu_cleanup(void *vcpui)
	+{
	+ struct hypctx *hypctx = vcpui;
	+
	+ vtimer_cpucleanup(hypctx);
	+ vgic_cpucleanup(hypctx);
	+
	+ vmmpmap_remove(hypctx->el2_addr, el2_hypctx_size(), true);
	+
	+ free(hypctx, M_HYP);
	+}
	+
	+void
	+vmmops_cleanup(void *vmi)
	+{
	+ struct hyp *hyp = vmi;
	+
	+ vtimer_vmcleanup(hyp);
	+ vgic_vmcleanup(hyp);
	+
	+ smp_rendezvous(NULL, arm_pcpu_vmcleanup, NULL, hyp);
	+
	+ vmmpmap_remove(hyp->el2_addr, el2_hyp_size(hyp->vm), true);
	+
	+ free(hyp, M_HYP);
	+}
	+
	+/*
	+ * Return register value. Registers have different sizes and an explicit cast
	+ * must be made to ensure proper conversion.
	+ */
	+static uint64_t *
	+hypctx_regptr(struct hypctx *hypctx, int reg)
	+{
	+ switch (reg) {
	+ case VM_REG_GUEST_X0 ... VM_REG_GUEST_X29:
	+ return (&hypctx->tf.tf_x[reg]);
	+ case VM_REG_GUEST_LR:
	+ return (&hypctx->tf.tf_lr);
	+ case VM_REG_GUEST_SP:
	+ return (&hypctx->tf.tf_sp);
	+ case VM_REG_GUEST_CPSR:
	+ return (&hypctx->tf.tf_spsr);
	+ case VM_REG_GUEST_PC:
	+ return (&hypctx->tf.tf_elr);
	+ case VM_REG_GUEST_SCTLR_EL1:
	+ return (&hypctx->sctlr_el1);
	+ case VM_REG_GUEST_TTBR0_EL1:
	+ return (&hypctx->ttbr0_el1);
	+ case VM_REG_GUEST_TTBR1_EL1:
	+ return (&hypctx->ttbr1_el1);
	+ case VM_REG_GUEST_TCR_EL1:
	+ return (&hypctx->tcr_el1);
	+ case VM_REG_GUEST_TCR2_EL1:
	+ return (&hypctx->tcr2_el1);
	+ default:
	+ break;
	+ }
	+ return (NULL);
	+}
	+
	+int
	+vmmops_getreg(void vcpui, int reg, uint64_t retval)
	+{
	+ uint64_t *regp;
	+ int running, hostcpu;
	+ struct hypctx *hypctx = vcpui;
	+
	+ running = vcpu_is_running(hypctx->vcpu, &hostcpu);
	+ if (running && hostcpu != curcpu)
	+ panic("arm_getreg: %s%d is running", vm_name(hypctx->hyp->vm),
	+ vcpu_vcpuid(hypctx->vcpu));
	+
	+ regp = hypctx_regptr(hypctx, reg);
	+ if (regp == NULL)
	+ return (EINVAL);
	+
	+ retval = regp;
	+ return (0);
	+}
	+
	+int
	+vmmops_setreg(void *vcpui, int reg, uint64_t val)
	+{
	+ uint64_t *regp;
	+ struct hypctx *hypctx = vcpui;
	+ int running, hostcpu;
	+
	+ running = vcpu_is_running(hypctx->vcpu, &hostcpu);
	+ if (running && hostcpu != curcpu)
	+ panic("arm_setreg: %s%d is running", vm_name(hypctx->hyp->vm),
	+ vcpu_vcpuid(hypctx->vcpu));
	+
	+ regp = hypctx_regptr(hypctx, reg);
	+ if (regp == NULL)
	+ return (EINVAL);
	+
	+ *regp = val;
	+ return (0);
	+}
	+
	+int
	+vmmops_exception(void *vcpui, uint64_t esr, uint64_t far)
	+{
	+ struct hypctx *hypctx = vcpui;
	+ int running, hostcpu;
	+
	+ running = vcpu_is_running(hypctx->vcpu, &hostcpu);
	+ if (running && hostcpu != curcpu)
	+ panic("%s: %s%d is running", __func__, vm_name(hypctx->hyp->vm),
	+ vcpu_vcpuid(hypctx->vcpu));
	+
	+ hypctx->far_el1 = far;
	+ hypctx->esr_el1 = esr;
	+ hypctx->has_exception = true;
	+
	+ return (0);
	+}
	+
	+int
	+vmmops_getcap(void vcpui, int num, int retval)
	+{
	+ int ret;
	+
	+ ret = ENOENT;
	+
	+ switch (num) {
	+ case VM_CAP_UNRESTRICTED_GUEST:
	+ *retval = 1;
	+ ret = 0;
	+ break;
	+ default:
	+ break;
	+ }
	+
	+ return (ret);
	+}
	+
	+int
	+vmmops_setcap(void *vcpui, int num, int val)
	+{
	+
	+ return (ENOENT);
	+}
	diff --git a/sys/arm64/vmm/vmm_call.S b/sys/arm64/vmm/vmm_call.S
	new file mode 100644
	--- /dev/null
	+++ b/sys/arm64/vmm/vmm_call.S
	@@ -0,0 +1,39 @@
	+/*-
	+ * SPDX-License-Identifier: BSD-2-Clause
	+ *
	+ * Copyright (C) 2017 Alexandru Elisei <alexandru.elisei@gmail.com>
	+ *
	+ * This software was developed by Alexandru Elisei under sponsorship
	+ * from the FreeBSD Foundation.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+
	+#include <machine/asm.h>
	+
	+ .text
	+
	+ENTRY(vmm_call_hyp)
	+ hvc #0
	+ ret
	+END(vmm_call_hyp)
	diff --git a/sys/arm64/vmm/vmm_dev.c b/sys/arm64/vmm/vmm_dev.c
	new file mode 100644
	--- /dev/null
	+++ b/sys/arm64/vmm/vmm_dev.c
	@@ -0,0 +1,1054 @@
	+/*-
	+ * SPDX-License-Identifier: BSD-2-Clause
	+ *
	+ * Copyright (c) 2011 NetApp, Inc.
	+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#include <sys/param.h>
	+#include <sys/kernel.h>
	+#include <sys/jail.h>
	+#include <sys/queue.h>
	+#include <sys/lock.h>
	+#include <sys/mutex.h>
	+#include <sys/malloc.h>
	+#include <sys/conf.h>
	+#include <sys/sysctl.h>
	+#include <sys/libkern.h>
	+#include <sys/ioccom.h>
	+#include <sys/mman.h>
	+#include <sys/uio.h>
	+#include <sys/proc.h>
	+
	+#include <vm/vm.h>
	+#include <vm/pmap.h>
	+#include <vm/vm_map.h>
	+#include <vm/vm_object.h>
	+
	+#include <machine/machdep.h>
	+#include <machine/vmparam.h>
	+#include <machine/vmm.h>
	+#include <machine/vmm_dev.h>
	+
	+#include "vmm_stat.h"
	+
	+#include "io/vgic.h"
	+
	+struct devmem_softc {
	+ int segid;
	+ char *name;
	+ struct cdev *cdev;
	+ struct vmmdev_softc *sc;
	+ SLIST_ENTRY(devmem_softc) link;
	+};
	+
	+struct vmmdev_softc {
	+ struct vm vm; / vm instance cookie */
	+ struct cdev *cdev;
	+ struct ucred *ucred;
	+ SLIST_ENTRY(vmmdev_softc) link;
	+ SLIST_HEAD(, devmem_softc) devmem;
	+ int flags;
	+};
	+#define VSC_LINKED 0x01
	+
	+static SLIST_HEAD(, vmmdev_softc) head;
	+
	+static unsigned pr_allow_flag;
	+static struct mtx vmmdev_mtx;
	+MTX_SYSINIT(vmmdev_mtx, &vmmdev_mtx, "vmm device mutex", MTX_DEF);
	+
	+static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev");
	+
	+SYSCTL_DECL(_hw_vmm);
	+
	+static int vmm_priv_check(struct ucred *ucred);
	+static int devmem_create_cdev(const char vmname, int id, char devmem);
	+static void devmem_destroy(void *arg);
	+
	+static int
	+vmm_priv_check(struct ucred *ucred)
	+{
	+
	+ if (jailed(ucred) &&
	+ !(ucred->cr_prison->pr_allow & pr_allow_flag))
	+ return (EPERM);
	+
	+ return (0);
	+}
	+
	+static int
	+vcpu_lock_one(struct vcpu *vcpu)
	+{
	+ int error;
	+
	+ error = vcpu_set_state(vcpu, VCPU_FROZEN, true);
	+ return (error);
	+}
	+
	+static void
	+vcpu_unlock_one(struct vcpu *vcpu)
	+{
	+ enum vcpu_state state;
	+
	+ state = vcpu_get_state(vcpu, NULL);
	+ if (state != VCPU_FROZEN) {
	+ panic("vcpu %s(%d) has invalid state %d",
	+ vm_name(vcpu_vm(vcpu)), vcpu_vcpuid(vcpu), state);
	+ }
	+
	+ vcpu_set_state(vcpu, VCPU_IDLE, false);
	+}
	+
	+static int
	+vcpu_lock_all(struct vmmdev_softc *sc)
	+{
	+ struct vcpu *vcpu;
	+ int error;
	+ uint16_t i, j, maxcpus;
	+
	+ error = 0;
	+ vm_slock_vcpus(sc->vm);
	+ maxcpus = vm_get_maxcpus(sc->vm);
	+ for (i = 0; i < maxcpus; i++) {
	+ vcpu = vm_vcpu(sc->vm, i);
	+ if (vcpu == NULL)
	+ continue;
	+ error = vcpu_lock_one(vcpu);
	+ if (error)
	+ break;
	+ }
	+
	+ if (error) {
	+ for (j = 0; j < i; j++) {
	+ vcpu = vm_vcpu(sc->vm, j);
	+ if (vcpu == NULL)
	+ continue;
	+ vcpu_unlock_one(vcpu);
	+ }
	+ vm_unlock_vcpus(sc->vm);
	+ }
	+
	+ return (error);
	+}
	+
	+static void
	+vcpu_unlock_all(struct vmmdev_softc *sc)
	+{
	+ struct vcpu *vcpu;
	+ uint16_t i, maxcpus;
	+
	+ maxcpus = vm_get_maxcpus(sc->vm);
	+ for (i = 0; i < maxcpus; i++) {
	+ vcpu = vm_vcpu(sc->vm, i);
	+ if (vcpu == NULL)
	+ continue;
	+ vcpu_unlock_one(vcpu);
	+ }
	+ vm_unlock_vcpus(sc->vm);
	+}
	+
	+static struct vmmdev_softc *
	+vmmdev_lookup(const char *name)
	+{
	+ struct vmmdev_softc *sc;
	+
	+#ifdef notyet /* XXX kernel is not compiled with invariants */
	+ mtx_assert(&vmmdev_mtx, MA_OWNED);
	+#endif
	+
	+ SLIST_FOREACH(sc, &head, link) {
	+ if (strcmp(name, vm_name(sc->vm)) == 0)
	+ break;
	+ }
	+
	+ if (sc == NULL)
	+ return (NULL);
	+
	+ if (cr_cansee(curthread->td_ucred, sc->ucred))
	+ return (NULL);
	+
	+ return (sc);
	+}
	+
	+static struct vmmdev_softc *
	+vmmdev_lookup2(struct cdev *cdev)
	+{
	+
	+ return (cdev->si_drv1);
	+}
	+
	+static int
	+vmmdev_rw(struct cdev cdev, struct uio uio, int flags)
	+{
	+ int error, off, c, prot;
	+ vm_paddr_t gpa, maxaddr;
	+ void hpa, cookie;
	+ struct vmmdev_softc *sc;
	+
	+ error = vmm_priv_check(curthread->td_ucred);
	+ if (error)
	+ return (error);
	+
	+ sc = vmmdev_lookup2(cdev);
	+ if (sc == NULL)
	+ return (ENXIO);
	+
	+ /*
	+ * Get a read lock on the guest memory map.
	+ */
	+ vm_slock_memsegs(sc->vm);
	+
	+ prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ);
	+ maxaddr = vmm_sysmem_maxaddr(sc->vm);
	+ while (uio->uio_resid > 0 && error == 0) {
	+ gpa = uio->uio_offset;
	+ off = gpa & PAGE_MASK;
	+ c = min(uio->uio_resid, PAGE_SIZE - off);
	+
	+ /*
	+ * The VM has a hole in its physical memory map. If we want to
	+ * use 'dd' to inspect memory beyond the hole we need to
	+ * provide bogus data for memory that lies in the hole.
	+ *
	+ * Since this device does not support lseek(2), dd(1) will
	+ * read(2) blocks of data to simulate the lseek(2).
	+ */
	+ hpa = vm_gpa_hold_global(sc->vm, gpa, c, prot, &cookie);
	+ if (hpa == NULL) {
	+ if (uio->uio_rw == UIO_READ && gpa < maxaddr)
	+ error = uiomove(__DECONST(void *, zero_region),
	+ c, uio);
	+ else
	+ error = EFAULT;
	+ } else {
	+ error = uiomove(hpa, c, uio);
	+ vm_gpa_release(cookie);
	+ }
	+ }
	+ vm_unlock_memsegs(sc->vm);
	+ return (error);
	+}
	+
	+static int
	+get_memseg(struct vmmdev_softc sc, struct vm_memseg mseg)
	+{
	+ struct devmem_softc *dsc;
	+ int error;
	+ bool sysmem;
	+
	+ error = vm_get_memseg(sc->vm, mseg->segid, &mseg->len, &sysmem, NULL);
	+ if (error \|\| mseg->len == 0)
	+ return (error);
	+
	+ if (!sysmem) {
	+ SLIST_FOREACH(dsc, &sc->devmem, link) {
	+ if (dsc->segid == mseg->segid)
	+ break;
	+ }
	+ KASSERT(dsc != NULL, ("%s: devmem segment %d not found",
	+ __func__, mseg->segid));
	+ error = copystr(dsc->name, mseg->name, sizeof(mseg->name),
	+ NULL);
	+ } else {
	+ bzero(mseg->name, sizeof(mseg->name));
	+ }
	+
	+ return (error);
	+}
	+
	+static int
	+alloc_memseg(struct vmmdev_softc sc, struct vm_memseg mseg)
	+{
	+ char *name;
	+ int error;
	+ bool sysmem;
	+
	+ error = 0;
	+ name = NULL;
	+ sysmem = true;
	+
	+ /*
	+ * The allocation is lengthened by 1 to hold a terminating NUL. It'll
	+ * by stripped off when devfs processes the full string.
	+ */
	+ if (VM_MEMSEG_NAME(mseg)) {
	+ sysmem = false;
	+ name = malloc(sizeof(mseg->name), M_VMMDEV, M_WAITOK);
	+ error = copystr(mseg->name, name, sizeof(mseg->name), NULL);
	+ if (error)
	+ goto done;
	+ }
	+
	+ error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem);
	+ if (error)
	+ goto done;
	+
	+ if (VM_MEMSEG_NAME(mseg)) {
	+ error = devmem_create_cdev(vm_name(sc->vm), mseg->segid, name);
	+ if (error)
	+ vm_free_memseg(sc->vm, mseg->segid);
	+ else
	+ name = NULL; /* freed when 'cdev' is destroyed */
	+ }
	+done:
	+ free(name, M_VMMDEV);
	+ return (error);
	+}
	+
	+static int
	+vm_get_register_set(struct vcpu vcpu, unsigned int count, int regnum,
	+ uint64_t *regval)
	+{
	+ int error, i;
	+
	+ error = 0;
	+ for (i = 0; i < count; i++) {
	+ error = vm_get_register(vcpu, regnum[i], &regval[i]);
	+ if (error)
	+ break;
	+ }
	+ return (error);
	+}
	+
	+static int
	+vm_set_register_set(struct vcpu vcpu, unsigned int count, int regnum,
	+ uint64_t *regval)
	+{
	+ int error, i;
	+
	+ error = 0;
	+ for (i = 0; i < count; i++) {
	+ error = vm_set_register(vcpu, regnum[i], regval[i]);
	+ if (error)
	+ break;
	+ }
	+ return (error);
	+}
	+
	+static int
	+vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
	+ struct thread *td)
	+{
	+ int error, vcpuid, size;
	+ cpuset_t *cpuset;
	+ struct vmmdev_softc *sc;
	+ struct vcpu *vcpu;
	+ struct vm_register *vmreg;
	+ struct vm_register_set *vmregset;
	+ struct vm_run *vmrun;
	+ struct vm_vgic_version *vgv;
	+ struct vm_vgic_descr *vgic;
	+ struct vm_cpuset *vm_cpuset;
	+ struct vm_irq *vi;
	+ struct vm_capability *vmcap;
	+ struct vm_stats *vmstats;
	+ struct vm_stat_desc *statdesc;
	+ struct vm_suspend *vmsuspend;
	+ struct vm_exception *vmexc;
	+ struct vm_gla2gpa *gg;
	+ struct vm_memmap *mm;
	+ struct vm_munmap *mu;
	+ struct vm_msi *vmsi;
	+ struct vm_cpu_topology *topology;
	+ uint64_t *regvals;
	+ int *regnums;
	+ enum { NONE, SINGLE, ALL } vcpus_locked;
	+ bool memsegs_locked;
	+
	+ error = vmm_priv_check(curthread->td_ucred);
	+ if (error)
	+ return (error);
	+
	+ sc = vmmdev_lookup2(cdev);
	+ if (sc == NULL)
	+ return (ENXIO);
	+
	+ error = 0;
	+ vcpuid = -1;
	+ vcpu = NULL;
	+ vcpus_locked = NONE;
	+ memsegs_locked = false;
	+
	+ /*
	+ * Some VMM ioctls can operate only on vcpus that are not running.
	+ */
	+ switch (cmd) {
	+ case VM_RUN:
	+ case VM_GET_REGISTER:
	+ case VM_SET_REGISTER:
	+ case VM_GET_REGISTER_SET:
	+ case VM_SET_REGISTER_SET:
	+ case VM_INJECT_EXCEPTION:
	+ case VM_GET_CAPABILITY:
	+ case VM_SET_CAPABILITY:
	+ case VM_GLA2GPA_NOFAULT:
	+ case VM_ACTIVATE_CPU:
	+ /*
	+ * ioctls that can operate only on vcpus that are not running.
	+ */
	+ vcpuid = (int )data;
	+ vcpu = vm_alloc_vcpu(sc->vm, vcpuid);
	+ if (vcpu == NULL) {
	+ error = EINVAL;
	+ goto done;
	+ }
	+ error = vcpu_lock_one(vcpu);
	+ if (error)
	+ goto done;
	+ vcpus_locked = SINGLE;
	+ break;
	+
	+ case VM_ALLOC_MEMSEG:
	+ case VM_MMAP_MEMSEG:
	+ case VM_MUNMAP_MEMSEG:
	+ case VM_REINIT:
	+ case VM_ATTACH_VGIC:
	+ /*
	+ * ioctls that modify the memory map must lock memory
	+ * segments exclusively.
	+ */
	+ vm_xlock_memsegs(sc->vm);
	+ memsegs_locked = true;
	+
	+ /*
	+ * ioctls that operate on the entire virtual machine must
	+ * prevent all vcpus from running.
	+ */
	+ error = vcpu_lock_all(sc);
	+ if (error)
	+ goto done;
	+ vcpus_locked = ALL;
	+ break;
	+ case VM_GET_MEMSEG:
	+ case VM_MMAP_GETNEXT:
	+ /*
	+ * Lock the memory map while it is being inspected.
	+ */
	+ vm_slock_memsegs(sc->vm);
	+ memsegs_locked = true;
	+ break;
	+
	+ case VM_STATS:
	+ /*
	+ * These do not need the vCPU locked but do operate on
	+ * a specific vCPU.
	+ */
	+ vcpuid = (int )data;
	+ vcpu = vm_alloc_vcpu(sc->vm, vcpuid);
	+ if (vcpu == NULL) {
	+ error = EINVAL;
	+ goto done;
	+ }
	+ break;
	+
	+ case VM_SUSPEND_CPU:
	+ case VM_RESUME_CPU:
	+ /*
	+ * These can either operate on all CPUs via a vcpuid of
	+ * -1 or on a specific vCPU.
	+ */
	+ vcpuid = (int )data;
	+ if (vcpuid == -1)
	+ break;
	+ vcpu = vm_alloc_vcpu(sc->vm, vcpuid);
	+ if (vcpu == NULL) {
	+ error = EINVAL;
	+ goto done;
	+ }
	+ break;
	+
	+ case VM_ASSERT_IRQ:
	+ vi = (struct vm_irq *)data;
	+ error = vm_assert_irq(sc->vm, vi->irq);
	+ break;
	+ case VM_DEASSERT_IRQ:
	+ vi = (struct vm_irq *)data;
	+ error = vm_deassert_irq(sc->vm, vi->irq);
	+ break;
	+ default:
	+ break;
	+ }
	+
	+ switch (cmd) {
	+ case VM_RUN: {
	+ struct vm_exit *vme;
	+
	+ vmrun = (struct vm_run *)data;
	+ vme = vm_exitinfo(vcpu);
	+
	+ error = vm_run(vcpu);
	+ if (error != 0)
	+ break;
	+
	+ error = copyout(vme, vmrun->vm_exit, sizeof(*vme));
	+ if (error != 0)
	+ break;
	+ break;
	+ }
	+ case VM_SUSPEND:
	+ vmsuspend = (struct vm_suspend *)data;
	+ error = vm_suspend(sc->vm, vmsuspend->how);
	+ break;
	+ case VM_REINIT:
	+ error = vm_reinit(sc->vm);
	+ break;
	+ case VM_STAT_DESC: {
	+ statdesc = (struct vm_stat_desc *)data;
	+ error = vmm_stat_desc_copy(statdesc->index,
	+ statdesc->desc, sizeof(statdesc->desc));
	+ break;
	+ }
	+ case VM_STATS: {
	+ CTASSERT(MAX_VM_STATS >= MAX_VMM_STAT_ELEMS);
	+ vmstats = (struct vm_stats *)data;
	+ getmicrotime(&vmstats->tv);
	+ error = vmm_stat_copy(vcpu, vmstats->index,
	+ nitems(vmstats->statbuf),
	+ &vmstats->num_entries, vmstats->statbuf);
	+ break;
	+ }
	+ case VM_MMAP_GETNEXT:
	+ mm = (struct vm_memmap *)data;
	+ error = vm_mmap_getnext(sc->vm, &mm->gpa, &mm->segid,
	+ &mm->segoff, &mm->len, &mm->prot, &mm->flags);
	+ break;
	+ case VM_MMAP_MEMSEG:
	+ mm = (struct vm_memmap *)data;
	+ error = vm_mmap_memseg(sc->vm, mm->gpa, mm->segid, mm->segoff,
	+ mm->len, mm->prot, mm->flags);
	+ break;
	+ case VM_MUNMAP_MEMSEG:
	+ mu = (struct vm_munmap *)data;
	+ error = vm_munmap_memseg(sc->vm, mu->gpa, mu->len);
	+ break;
	+ case VM_ALLOC_MEMSEG:
	+ error = alloc_memseg(sc, (struct vm_memseg *)data);
	+ break;
	+ case VM_GET_MEMSEG:
	+ error = get_memseg(sc, (struct vm_memseg *)data);
	+ break;
	+ case VM_GET_REGISTER:
	+ vmreg = (struct vm_register *)data;
	+ error = vm_get_register(vcpu, vmreg->regnum, &vmreg->regval);
	+ break;
	+ case VM_SET_REGISTER:
	+ vmreg = (struct vm_register *)data;
	+ error = vm_set_register(vcpu, vmreg->regnum, vmreg->regval);
	+ break;
	+ case VM_GET_REGISTER_SET:
	+ vmregset = (struct vm_register_set *)data;
	+ if (vmregset->count > VM_REG_LAST) {
	+ error = EINVAL;
	+ break;
	+ }
	+ regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV,
	+ M_WAITOK);
	+ regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV,
	+ M_WAITOK);
	+ error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) *
	+ vmregset->count);
	+ if (error == 0)
	+ error = vm_get_register_set(vcpu, vmregset->count,
	+ regnums, regvals);
	+ if (error == 0)
	+ error = copyout(regvals, vmregset->regvals,
	+ sizeof(regvals[0]) * vmregset->count);
	+ free(regvals, M_VMMDEV);
	+ free(regnums, M_VMMDEV);
	+ break;
	+ case VM_SET_REGISTER_SET:
	+ vmregset = (struct vm_register_set *)data;
	+ if (vmregset->count > VM_REG_LAST) {
	+ error = EINVAL;
	+ break;
	+ }
	+ regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV,
	+ M_WAITOK);
	+ regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV,
	+ M_WAITOK);
	+ error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) *
	+ vmregset->count);
	+ if (error == 0)
	+ error = copyin(vmregset->regvals, regvals,
	+ sizeof(regvals[0]) * vmregset->count);
	+ if (error == 0)
	+ error = vm_set_register_set(vcpu, vmregset->count,
	+ regnums, regvals);
	+ free(regvals, M_VMMDEV);
	+ free(regnums, M_VMMDEV);
	+ break;
	+ case VM_GET_CAPABILITY:
	+ vmcap = (struct vm_capability *)data;
	+ error = vm_get_capability(vcpu,
	+ vmcap->captype,
	+ &vmcap->capval);
	+ break;
	+ case VM_SET_CAPABILITY:
	+ vmcap = (struct vm_capability *)data;
	+ error = vm_set_capability(vcpu,
	+ vmcap->captype,
	+ vmcap->capval);
	+ break;
	+ case VM_INJECT_EXCEPTION:
	+ vmexc = (struct vm_exception *)data;
	+ error = vm_inject_exception(vcpu, vmexc->esr, vmexc->far);
	+ break;
	+ case VM_GLA2GPA_NOFAULT:
	+ gg = (struct vm_gla2gpa *)data;
	+ error = vm_gla2gpa_nofault(vcpu, &gg->paging, gg->gla,
	+ gg->prot, &gg->gpa, &gg->fault);
	+ KASSERT(error == 0 \|\| error == EFAULT,
	+ ("%s: vm_gla2gpa unknown error %d", __func__, error));
	+ break;
	+ case VM_ACTIVATE_CPU:
	+ error = vm_activate_cpu(vcpu);
	+ break;
	+ case VM_GET_CPUS:
	+ error = 0;
	+ vm_cpuset = (struct vm_cpuset *)data;
	+ size = vm_cpuset->cpusetsize;
	+ if (size < sizeof(cpuset_t) \|\| size > CPU_MAXSIZE / NBBY) {
	+ error = ERANGE;
	+ break;
	+ }
	+ cpuset = malloc(size, M_TEMP, M_WAITOK \| M_ZERO);
	+ if (vm_cpuset->which == VM_ACTIVE_CPUS)
	+ *cpuset = vm_active_cpus(sc->vm);
	+ else if (vm_cpuset->which == VM_SUSPENDED_CPUS)
	+ *cpuset = vm_suspended_cpus(sc->vm);
	+ else if (vm_cpuset->which == VM_DEBUG_CPUS)
	+ *cpuset = vm_debug_cpus(sc->vm);
	+ else
	+ error = EINVAL;
	+ if (error == 0)
	+ error = copyout(cpuset, vm_cpuset->cpus, size);
	+ free(cpuset, M_TEMP);
	+ break;
	+ case VM_SUSPEND_CPU:
	+ error = vm_suspend_cpu(sc->vm, vcpu);
	+ break;
	+ case VM_RESUME_CPU:
	+ error = vm_resume_cpu(sc->vm, vcpu);
	+ break;
	+ case VM_GET_VGIC_VERSION:
	+ vgv = (struct vm_vgic_version *)data;
	+ /* TODO: Query the vgic driver for this */
	+ vgv->version = 3;
	+ vgv->flags = 0;
	+ error = 0;
	+ break;
	+ case VM_ATTACH_VGIC:
	+ vgic = (struct vm_vgic_descr *)data;
	+ error = vm_attach_vgic(sc->vm, vgic);
	+ break;
	+ case VM_RAISE_MSI:
	+ vmsi = (struct vm_msi *)data;
	+ error = vm_raise_msi(sc->vm, vmsi->msg, vmsi->addr, vmsi->bus,
	+ vmsi->slot, vmsi->func);
	+ break;
	+ case VM_SET_TOPOLOGY:
	+ topology = (struct vm_cpu_topology *)data;
	+ error = vm_set_topology(sc->vm, topology->sockets,
	+ topology->cores, topology->threads, topology->maxcpus);
	+ break;
	+ case VM_GET_TOPOLOGY:
	+ topology = (struct vm_cpu_topology *)data;
	+ vm_get_topology(sc->vm, &topology->sockets, &topology->cores,
	+ &topology->threads, &topology->maxcpus);
	+ error = 0;
	+ break;
	+ default:
	+ error = ENOTTY;
	+ break;
	+ }
	+
	+done:
	+ if (vcpus_locked == SINGLE)
	+ vcpu_unlock_one(vcpu);
	+ else if (vcpus_locked == ALL)
	+ vcpu_unlock_all(sc);
	+ if (memsegs_locked)
	+ vm_unlock_memsegs(sc->vm);
	+
	+ /*
	+ * Make sure that no handler returns a kernel-internal
	+ * error value to userspace.
	+ */
	+ KASSERT(error == ERESTART \|\| error >= 0,
	+ ("vmmdev_ioctl: invalid error return %d", error));
	+ return (error);
	+}
	+
	+static int
	+vmmdev_mmap_single(struct cdev cdev, vm_ooffset_t offset, vm_size_t mapsize,
	+ struct vm_object **objp, int nprot)
	+{
	+ struct vmmdev_softc *sc;
	+ vm_paddr_t gpa;
	+ size_t len;
	+ vm_ooffset_t segoff, first, last;
	+ int error, found, segid;
	+ bool sysmem;
	+
	+ error = vmm_priv_check(curthread->td_ucred);
	+ if (error)
	+ return (error);
	+
	+ first = *offset;
	+ last = first + mapsize;
	+ if ((nprot & PROT_EXEC) \|\| first < 0 \|\| first >= last)
	+ return (EINVAL);
	+
	+ sc = vmmdev_lookup2(cdev);
	+ if (sc == NULL) {
	+ /* virtual machine is in the process of being created */
	+ return (EINVAL);
	+ }
	+
	+ /*
	+ * Get a read lock on the guest memory map.
	+ */
	+ vm_slock_memsegs(sc->vm);
	+
	+ gpa = 0;
	+ found = 0;
	+ while (!found) {
	+ error = vm_mmap_getnext(sc->vm, &gpa, &segid, &segoff, &len,
	+ NULL, NULL);
	+ if (error)
	+ break;
	+
	+ if (first >= gpa && last <= gpa + len)
	+ found = 1;
	+ else
	+ gpa += len;
	+ }
	+
	+ if (found) {
	+ error = vm_get_memseg(sc->vm, segid, &len, &sysmem, objp);
	+ KASSERT(error == 0 && *objp != NULL,
	+ ("%s: invalid memory segment %d", __func__, segid));
	+ if (sysmem) {
	+ vm_object_reference(*objp);
	+ *offset = segoff + (first - gpa);
	+ } else {
	+ error = EINVAL;
	+ }
	+ }
	+ vm_unlock_memsegs(sc->vm);
	+ return (error);
	+}
	+
	+static void
	+vmmdev_destroy(void *arg)
	+{
	+ struct vmmdev_softc *sc = arg;
	+ struct devmem_softc *dsc;
	+ int error __diagused;
	+
	+ error = vcpu_lock_all(sc);
	+ KASSERT(error == 0, ("%s: error %d freezing vcpus", __func__, error));
	+ vm_unlock_vcpus(sc->vm);
	+
	+ while ((dsc = SLIST_FIRST(&sc->devmem)) != NULL) {
	+ KASSERT(dsc->cdev == NULL, ("%s: devmem not free", __func__));
	+ SLIST_REMOVE_HEAD(&sc->devmem, link);
	+ free(dsc->name, M_VMMDEV);
	+ free(dsc, M_VMMDEV);
	+ }
	+
	+ if (sc->cdev != NULL)
	+ destroy_dev(sc->cdev);
	+
	+ if (sc->vm != NULL)
	+ vm_destroy(sc->vm);
	+
	+ if (sc->ucred != NULL)
	+ crfree(sc->ucred);
	+
	+ if ((sc->flags & VSC_LINKED) != 0) {
	+ mtx_lock(&vmmdev_mtx);
	+ SLIST_REMOVE(&head, sc, vmmdev_softc, link);
	+ mtx_unlock(&vmmdev_mtx);
	+ }
	+
	+ free(sc, M_VMMDEV);
	+}
	+
	+static int
	+sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)
	+{
	+ struct devmem_softc *dsc;
	+ struct vmmdev_softc *sc;
	+ struct cdev *cdev;
	+ char *buf;
	+ int error, buflen;
	+
	+ error = vmm_priv_check(req->td->td_ucred);
	+ if (error)
	+ return (error);
	+
	+ buflen = VM_MAX_NAMELEN + 1;
	+ buf = malloc(buflen, M_VMMDEV, M_WAITOK \| M_ZERO);
	+ strlcpy(buf, "beavis", buflen);
	+ error = sysctl_handle_string(oidp, buf, buflen, req);
	+ if (error != 0 \|\| req->newptr == NULL)
	+ goto out;
	+
	+ mtx_lock(&vmmdev_mtx);
	+ sc = vmmdev_lookup(buf);
	+ if (sc == NULL \|\| sc->cdev == NULL) {
	+ mtx_unlock(&vmmdev_mtx);
	+ error = EINVAL;
	+ goto out;
	+ }
	+
	+ /*
	+ * Setting 'sc->cdev' to NULL is used to indicate that the VM
	+ * is scheduled for destruction.
	+ */
	+ cdev = sc->cdev;
	+ sc->cdev = NULL;
	+ mtx_unlock(&vmmdev_mtx);
	+
	+ /*
	+ * Destroy all cdevs:
	+ *
	+ * - any new operations on the 'cdev' will return an error (ENXIO).
	+ *
	+ * - the 'devmem' cdevs are destroyed before the virtual machine 'cdev'
	+ */
	+ SLIST_FOREACH(dsc, &sc->devmem, link) {
	+ KASSERT(dsc->cdev != NULL, ("devmem cdev already destroyed"));
	+ destroy_dev(dsc->cdev);
	+ devmem_destroy(dsc);
	+ }
	+ destroy_dev(cdev);
	+ vmmdev_destroy(sc);
	+ error = 0;
	+
	+out:
	+ free(buf, M_VMMDEV);
	+ return (error);
	+}
	+SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy,
	+ CTLTYPE_STRING \| CTLFLAG_RW \| CTLFLAG_PRISON \| CTLFLAG_MPSAFE,
	+ NULL, 0, sysctl_vmm_destroy, "A",
	+ NULL);
	+
	+static struct cdevsw vmmdevsw = {
	+ .d_name = "vmmdev",
	+ .d_version = D_VERSION,
	+ .d_ioctl = vmmdev_ioctl,
	+ .d_mmap_single = vmmdev_mmap_single,
	+ .d_read = vmmdev_rw,
	+ .d_write = vmmdev_rw,
	+};
	+
	+static int
	+sysctl_vmm_create(SYSCTL_HANDLER_ARGS)
	+{
	+ struct vm *vm;
	+ struct cdev *cdev;
	+ struct vmmdev_softc sc, sc2;
	+ char *buf;
	+ int error, buflen;
	+
	+ error = vmm_priv_check(req->td->td_ucred);
	+ if (error)
	+ return (error);
	+
	+ buflen = VM_MAX_NAMELEN + 1;
	+ buf = malloc(buflen, M_VMMDEV, M_WAITOK \| M_ZERO);
	+ strlcpy(buf, "beavis", buflen);
	+ error = sysctl_handle_string(oidp, buf, buflen, req);
	+ if (error != 0 \|\| req->newptr == NULL)
	+ goto out;
	+
	+ mtx_lock(&vmmdev_mtx);
	+ sc = vmmdev_lookup(buf);
	+ mtx_unlock(&vmmdev_mtx);
	+ if (sc != NULL) {
	+ error = EEXIST;
	+ goto out;
	+ }
	+
	+ error = vm_create(buf, &vm);
	+ if (error != 0)
	+ goto out;
	+
	+ sc = malloc(sizeof(struct vmmdev_softc), M_VMMDEV, M_WAITOK \| M_ZERO);
	+ sc->ucred = crhold(curthread->td_ucred);
	+ sc->vm = vm;
	+ SLIST_INIT(&sc->devmem);
	+
	+ /*
	+ * Lookup the name again just in case somebody sneaked in when we
	+ * dropped the lock.
	+ */
	+ mtx_lock(&vmmdev_mtx);
	+ sc2 = vmmdev_lookup(buf);
	+ if (sc2 == NULL) {
	+ SLIST_INSERT_HEAD(&head, sc, link);
	+ sc->flags \|= VSC_LINKED;
	+ }
	+ mtx_unlock(&vmmdev_mtx);
	+
	+ if (sc2 != NULL) {
	+ vmmdev_destroy(sc);
	+ error = EEXIST;
	+ goto out;
	+ }
	+
	+ error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &vmmdevsw, sc->ucred,
	+ UID_ROOT, GID_WHEEL, 0600, "vmm/%s", buf);
	+ if (error != 0) {
	+ vmmdev_destroy(sc);
	+ goto out;
	+ }
	+
	+ mtx_lock(&vmmdev_mtx);
	+ sc->cdev = cdev;
	+ sc->cdev->si_drv1 = sc;
	+ mtx_unlock(&vmmdev_mtx);
	+
	+out:
	+ free(buf, M_VMMDEV);
	+ return (error);
	+}
	+SYSCTL_PROC(_hw_vmm, OID_AUTO, create,
	+ CTLTYPE_STRING \| CTLFLAG_RW \| CTLFLAG_PRISON \| CTLFLAG_MPSAFE,
	+ NULL, 0, sysctl_vmm_create, "A",
	+ NULL);
	+
	+void
	+vmmdev_init(void)
	+{
	+ pr_allow_flag = prison_add_allow(NULL, "vmm", NULL,
	+ "Allow use of vmm in a jail.");
	+}
	+
	+int
	+vmmdev_cleanup(void)
	+{
	+ int error;
	+
	+ if (SLIST_EMPTY(&head))
	+ error = 0;
	+ else
	+ error = EBUSY;
	+
	+ return (error);
	+}
	+
	+static int
	+devmem_mmap_single(struct cdev cdev, vm_ooffset_t offset, vm_size_t len,
	+ struct vm_object **objp, int nprot)
	+{
	+ struct devmem_softc *dsc;
	+ vm_ooffset_t first, last;
	+ size_t seglen;
	+ int error;
	+ bool sysmem;
	+
	+ dsc = cdev->si_drv1;
	+ if (dsc == NULL) {
	+ /* 'cdev' has been created but is not ready for use */
	+ return (ENXIO);
	+ }
	+
	+ first = *offset;
	+ last = *offset + len;
	+ if ((nprot & PROT_EXEC) \|\| first < 0 \|\| first >= last)
	+ return (EINVAL);
	+
	+ vm_slock_memsegs(dsc->sc->vm);
	+
	+ error = vm_get_memseg(dsc->sc->vm, dsc->segid, &seglen, &sysmem, objp);
	+ KASSERT(error == 0 && !sysmem && *objp != NULL,
	+ ("%s: invalid devmem segment %d", __func__, dsc->segid));
	+
	+ if (seglen >= last)
	+ vm_object_reference(*objp);
	+ else
	+ error = 0;
	+ vm_unlock_memsegs(dsc->sc->vm);
	+ return (error);
	+}
	+
	+static struct cdevsw devmemsw = {
	+ .d_name = "devmem",
	+ .d_version = D_VERSION,
	+ .d_mmap_single = devmem_mmap_single,
	+};
	+
	+static int
	+devmem_create_cdev(const char vmname, int segid, char devname)
	+{
	+ struct devmem_softc *dsc;
	+ struct vmmdev_softc *sc;
	+ struct cdev *cdev;
	+ int error;
	+
	+ error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &devmemsw, NULL,
	+ UID_ROOT, GID_WHEEL, 0600, "vmm.io/%s.%s", vmname, devname);
	+ if (error)
	+ return (error);
	+
	+ dsc = malloc(sizeof(struct devmem_softc), M_VMMDEV, M_WAITOK \| M_ZERO);
	+
	+ mtx_lock(&vmmdev_mtx);
	+ sc = vmmdev_lookup(vmname);
	+ KASSERT(sc != NULL, ("%s: vm %s softc not found", __func__, vmname));
	+ if (sc->cdev == NULL) {
	+ /* virtual machine is being created or destroyed */
	+ mtx_unlock(&vmmdev_mtx);
	+ free(dsc, M_VMMDEV);
	+ destroy_dev_sched_cb(cdev, NULL, 0);
	+ return (ENODEV);
	+ }
	+
	+ dsc->segid = segid;
	+ dsc->name = devname;
	+ dsc->cdev = cdev;
	+ dsc->sc = sc;
	+ SLIST_INSERT_HEAD(&sc->devmem, dsc, link);
	+ mtx_unlock(&vmmdev_mtx);
	+
	+ /* The 'cdev' is ready for use after 'si_drv1' is initialized */
	+ cdev->si_drv1 = dsc;
	+ return (0);
	+}
	+
	+static void
	+devmem_destroy(void *arg)
	+{
	+ struct devmem_softc *dsc = arg;
	+
	+ KASSERT(dsc->cdev, ("%s: devmem cdev already destroyed", __func__));
	+ dsc->cdev = NULL;
	+ dsc->sc = NULL;
	+}
	diff --git a/sys/arm64/vmm/vmm_hyp.c b/sys/arm64/vmm/vmm_hyp.c
	new file mode 100644
	--- /dev/null
	+++ b/sys/arm64/vmm/vmm_hyp.c
	@@ -0,0 +1,735 @@
	+/*-
	+ * SPDX-License-Identifier: BSD-2-Clause
	+ *
	+ * Copyright (c) 2021 Andrew Turner
	+ *
	+ * This work was supported by Innovate UK project 105694, "Digital Security
	+ * by Design (DSbD) Technology Platform Prototype".
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#include <sys/cdefs.h>
	+#include <sys/types.h>
	+#include <sys/proc.h>
	+
	+#include <machine/armreg.h>
	+
	+#include "arm64.h"
	+#include "hyp.h"
	+
	+struct hypctx;
	+
	+uint64_t vmm_hyp_enter(uint64_t, uint64_t, uint64_t, uint64_t, uint64_t,
	+ uint64_t, uint64_t, uint64_t);
	+uint64_t vmm_enter_guest(struct hypctx *);
	+
	+static void
	+vmm_hyp_reg_store(struct hypctx hypctx, struct hyp hyp, bool guest)
	+{
	+ uint64_t dfr0;
	+
	+ /* Store the guest VFP registers */
	+ if (guest) {
	+ /* Store the timer registers */
	+ hypctx->vtimer_cpu.cntkctl_el1 = READ_SPECIALREG(cntkctl_el1);
	+ hypctx->vtimer_cpu.virt_timer.cntx_cval_el0 =
	+ READ_SPECIALREG(cntv_cval_el0);
	+ hypctx->vtimer_cpu.virt_timer.cntx_ctl_el0 =
	+ READ_SPECIALREG(cntv_ctl_el0);
	+
	+ /* Store the GICv3 registers */
	+ hypctx->vgic_v3_regs.ich_eisr_el2 =
	+ READ_SPECIALREG(ich_eisr_el2);
	+ hypctx->vgic_v3_regs.ich_elrsr_el2 =
	+ READ_SPECIALREG(ich_elrsr_el2);
	+ hypctx->vgic_v3_regs.ich_hcr_el2 =
	+ READ_SPECIALREG(ich_hcr_el2);
	+ hypctx->vgic_v3_regs.ich_misr_el2 =
	+ READ_SPECIALREG(ich_misr_el2);
	+ hypctx->vgic_v3_regs.ich_vmcr_el2 =
	+ READ_SPECIALREG(ich_vmcr_el2);
	+ switch (hypctx->vgic_v3_regs.ich_lr_num - 1) {
	+#define STORE_LR(x) \
	+ case x: \
	+ hypctx->vgic_v3_regs.ich_lr_el2[x] = \
	+ READ_SPECIALREG(ich_lr ## x ##_el2)
	+ STORE_LR(15);
	+ STORE_LR(14);
	+ STORE_LR(13);
	+ STORE_LR(12);
	+ STORE_LR(11);
	+ STORE_LR(10);
	+ STORE_LR(9);
	+ STORE_LR(8);
	+ STORE_LR(7);
	+ STORE_LR(6);
	+ STORE_LR(5);
	+ STORE_LR(4);
	+ STORE_LR(3);
	+ STORE_LR(2);
	+ STORE_LR(1);
	+ default:
	+ STORE_LR(0);
	+#undef STORE_LR
	+ }
	+
	+ switch (hypctx->vgic_v3_regs.ich_apr_num - 1) {
	+#define STORE_APR(x) \
	+ case x: \
	+ hypctx->vgic_v3_regs.ich_ap0r_el2[x] = \
	+ READ_SPECIALREG(ich_ap0r ## x ##_el2); \
	+ hypctx->vgic_v3_regs.ich_ap1r_el2[x] = \
	+ READ_SPECIALREG(ich_ap1r ## x ##_el2)
	+ STORE_APR(3);
	+ STORE_APR(2);
	+ STORE_APR(1);
	+ default:
	+ STORE_APR(0);
	+#undef STORE_APR
	+ }
	+ }
	+
	+ dfr0 = READ_SPECIALREG(id_aa64dfr0_el1);
	+ switch (ID_AA64DFR0_BRPs_VAL(dfr0) - 1) {
	+#define STORE_DBG_BRP(x) \
	+ case x: \
	+ hypctx->dbgbcr_el1[x] = \
	+ READ_SPECIALREG(dbgbcr ## x ## _el1); \
	+ hypctx->dbgbvr_el1[x] = \
	+ READ_SPECIALREG(dbgbvr ## x ## _el1)
	+ STORE_DBG_BRP(15);
	+ STORE_DBG_BRP(14);
	+ STORE_DBG_BRP(13);
	+ STORE_DBG_BRP(12);
	+ STORE_DBG_BRP(11);
	+ STORE_DBG_BRP(10);
	+ STORE_DBG_BRP(9);
	+ STORE_DBG_BRP(8);
	+ STORE_DBG_BRP(7);
	+ STORE_DBG_BRP(6);
	+ STORE_DBG_BRP(5);
	+ STORE_DBG_BRP(4);
	+ STORE_DBG_BRP(3);
	+ STORE_DBG_BRP(2);
	+ STORE_DBG_BRP(1);
	+ default:
	+ STORE_DBG_BRP(0);
	+#undef STORE_DBG_BRP
	+ }
	+
	+ switch (ID_AA64DFR0_WRPs_VAL(dfr0) - 1) {
	+#define STORE_DBG_WRP(x) \
	+ case x: \
	+ hypctx->dbgwcr_el1[x] = \
	+ READ_SPECIALREG(dbgwcr ## x ## _el1); \
	+ hypctx->dbgwvr_el1[x] = \
	+ READ_SPECIALREG(dbgwvr ## x ## _el1)
	+ STORE_DBG_WRP(15);
	+ STORE_DBG_WRP(14);
	+ STORE_DBG_WRP(13);
	+ STORE_DBG_WRP(12);
	+ STORE_DBG_WRP(11);
	+ STORE_DBG_WRP(10);
	+ STORE_DBG_WRP(9);
	+ STORE_DBG_WRP(8);
	+ STORE_DBG_WRP(7);
	+ STORE_DBG_WRP(6);
	+ STORE_DBG_WRP(5);
	+ STORE_DBG_WRP(4);
	+ STORE_DBG_WRP(3);
	+ STORE_DBG_WRP(2);
	+ STORE_DBG_WRP(1);
	+ default:
	+ STORE_DBG_WRP(0);
	+#undef STORE_DBG_WRP
	+ }
	+
	+ /* Store the PMU registers */
	+ hypctx->pmcr_el0 = READ_SPECIALREG(pmcr_el0);
	+ hypctx->pmccntr_el0 = READ_SPECIALREG(pmccntr_el0);
	+ hypctx->pmccfiltr_el0 = READ_SPECIALREG(pmccfiltr_el0);
	+ hypctx->pmcntenset_el0 = READ_SPECIALREG(pmcntenset_el0);
	+ hypctx->pmintenset_el1 = READ_SPECIALREG(pmintenset_el1);
	+ hypctx->pmovsset_el0 = READ_SPECIALREG(pmovsset_el0);
	+ hypctx->pmuserenr_el0 = READ_SPECIALREG(pmuserenr_el0);
	+ switch ((hypctx->pmcr_el0 & PMCR_N_MASK) >> PMCR_N_SHIFT) {
	+#define STORE_PMU(x) \
	+ case (x + 1): \
	+ hypctx->pmevcntr_el0[x] = \
	+ READ_SPECIALREG(pmevcntr ## x ## _el0); \
	+ hypctx->pmevtyper_el0[x] = \
	+ READ_SPECIALREG(pmevtyper ## x ## _el0)
	+ STORE_PMU(30);
	+ STORE_PMU(29);
	+ STORE_PMU(28);
	+ STORE_PMU(27);
	+ STORE_PMU(26);
	+ STORE_PMU(25);
	+ STORE_PMU(24);
	+ STORE_PMU(23);
	+ STORE_PMU(22);
	+ STORE_PMU(21);
	+ STORE_PMU(20);
	+ STORE_PMU(19);
	+ STORE_PMU(18);
	+ STORE_PMU(17);
	+ STORE_PMU(16);
	+ STORE_PMU(15);
	+ STORE_PMU(14);
	+ STORE_PMU(13);
	+ STORE_PMU(12);
	+ STORE_PMU(11);
	+ STORE_PMU(10);
	+ STORE_PMU(9);
	+ STORE_PMU(8);
	+ STORE_PMU(7);
	+ STORE_PMU(6);
	+ STORE_PMU(5);
	+ STORE_PMU(4);
	+ STORE_PMU(3);
	+ STORE_PMU(2);
	+ STORE_PMU(1);
	+ STORE_PMU(0);
	+ default: /* N == 0 when only PMCCNTR_EL0 is available */
	+ break;
	+#undef STORE_PMU
	+ }
	+
	+ /* Store the special to from the trapframe */
	+ hypctx->tf.tf_sp = READ_SPECIALREG(sp_el1);
	+ hypctx->tf.tf_elr = READ_SPECIALREG(elr_el2);
	+ hypctx->tf.tf_spsr = READ_SPECIALREG(spsr_el2);
	+ if (guest) {
	+ hypctx->tf.tf_esr = READ_SPECIALREG(esr_el2);
	+ }
	+
	+ /* Store the guest special registers */
	+ hypctx->elr_el1 = READ_SPECIALREG(elr_el1);
	+ hypctx->sp_el0 = READ_SPECIALREG(sp_el0);
	+ hypctx->tpidr_el0 = READ_SPECIALREG(tpidr_el0);
	+ hypctx->tpidrro_el0 = READ_SPECIALREG(tpidrro_el0);
	+ hypctx->tpidr_el1 = READ_SPECIALREG(tpidr_el1);
	+ hypctx->vbar_el1 = READ_SPECIALREG(vbar_el1);
	+
	+ hypctx->actlr_el1 = READ_SPECIALREG(actlr_el1);
	+ hypctx->afsr0_el1 = READ_SPECIALREG(afsr0_el1);
	+ hypctx->afsr1_el1 = READ_SPECIALREG(afsr1_el1);
	+ hypctx->amair_el1 = READ_SPECIALREG(amair_el1);
	+ hypctx->contextidr_el1 = READ_SPECIALREG(contextidr_el1);
	+ hypctx->cpacr_el1 = READ_SPECIALREG(cpacr_el1);
	+ hypctx->csselr_el1 = READ_SPECIALREG(csselr_el1);
	+ hypctx->esr_el1 = READ_SPECIALREG(esr_el1);
	+ hypctx->far_el1 = READ_SPECIALREG(far_el1);
	+ hypctx->mair_el1 = READ_SPECIALREG(mair_el1);
	+ hypctx->mdccint_el1 = READ_SPECIALREG(mdccint_el1);
	+ hypctx->mdscr_el1 = READ_SPECIALREG(mdscr_el1);
	+ hypctx->par_el1 = READ_SPECIALREG(par_el1);
	+ hypctx->sctlr_el1 = READ_SPECIALREG(sctlr_el1);
	+ hypctx->spsr_el1 = READ_SPECIALREG(spsr_el1);
	+ hypctx->tcr_el1 = READ_SPECIALREG(tcr_el1);
	+ /* TODO: Support when this is not res0 */
	+ hypctx->tcr2_el1 = 0;
	+ hypctx->ttbr0_el1 = READ_SPECIALREG(ttbr0_el1);
	+ hypctx->ttbr1_el1 = READ_SPECIALREG(ttbr1_el1);
	+
	+ hypctx->cptr_el2 = READ_SPECIALREG(cptr_el2);
	+ hypctx->hcr_el2 = READ_SPECIALREG(hcr_el2);
	+ hypctx->vpidr_el2 = READ_SPECIALREG(vpidr_el2);
	+ hypctx->vmpidr_el2 = READ_SPECIALREG(vmpidr_el2);
	+}
	+
	+static void
	+vmm_hyp_reg_restore(struct hypctx hypctx, struct hyp hyp, bool guest)
	+{
	+ uint64_t dfr0;
	+
	+ /* Restore the special registers */
	+ WRITE_SPECIALREG(elr_el1, hypctx->elr_el1);
	+ WRITE_SPECIALREG(sp_el0, hypctx->sp_el0);
	+ WRITE_SPECIALREG(tpidr_el0, hypctx->tpidr_el0);
	+ WRITE_SPECIALREG(tpidrro_el0, hypctx->tpidrro_el0);
	+ WRITE_SPECIALREG(tpidr_el1, hypctx->tpidr_el1);
	+ WRITE_SPECIALREG(vbar_el1, hypctx->vbar_el1);
	+
	+ WRITE_SPECIALREG(actlr_el1, hypctx->actlr_el1);
	+ WRITE_SPECIALREG(afsr0_el1, hypctx->afsr0_el1);
	+ WRITE_SPECIALREG(afsr1_el1, hypctx->afsr1_el1);
	+ WRITE_SPECIALREG(amair_el1, hypctx->amair_el1);
	+ WRITE_SPECIALREG(contextidr_el1, hypctx->contextidr_el1);
	+ WRITE_SPECIALREG(cpacr_el1, hypctx->cpacr_el1);
	+ WRITE_SPECIALREG(csselr_el1, hypctx->csselr_el1);
	+ WRITE_SPECIALREG(esr_el1, hypctx->esr_el1);
	+ WRITE_SPECIALREG(far_el1, hypctx->far_el1);
	+ WRITE_SPECIALREG(mdccint_el1, hypctx->mdccint_el1);
	+ WRITE_SPECIALREG(mdscr_el1, hypctx->mdscr_el1);
	+ WRITE_SPECIALREG(mair_el1, hypctx->mair_el1);
	+ WRITE_SPECIALREG(par_el1, hypctx->par_el1);
	+ WRITE_SPECIALREG(sctlr_el1, hypctx->sctlr_el1);
	+ WRITE_SPECIALREG(tcr_el1, hypctx->tcr_el1);
	+ /* TODO: tcr2_el1 */
	+ WRITE_SPECIALREG(ttbr0_el1, hypctx->ttbr0_el1);
	+ WRITE_SPECIALREG(ttbr1_el1, hypctx->ttbr1_el1);
	+ WRITE_SPECIALREG(spsr_el1, hypctx->spsr_el1);
	+
	+ WRITE_SPECIALREG(cptr_el2, hypctx->cptr_el2);
	+ WRITE_SPECIALREG(hcr_el2, hypctx->hcr_el2);
	+ WRITE_SPECIALREG(vpidr_el2, hypctx->vpidr_el2);
	+ WRITE_SPECIALREG(vmpidr_el2, hypctx->vmpidr_el2);
	+
	+ /* Load the special regs from the trapframe */
	+ WRITE_SPECIALREG(sp_el1, hypctx->tf.tf_sp);
	+ WRITE_SPECIALREG(elr_el2, hypctx->tf.tf_elr);
	+ WRITE_SPECIALREG(spsr_el2, hypctx->tf.tf_spsr);
	+
	+ /* Restore the PMU registers */
	+ WRITE_SPECIALREG(pmcr_el0, hypctx->pmcr_el0);
	+ WRITE_SPECIALREG(pmccntr_el0, hypctx->pmccntr_el0);
	+ WRITE_SPECIALREG(pmccfiltr_el0, hypctx->pmccfiltr_el0);
	+ /* Clear all events/interrupts then enable them */
	+ WRITE_SPECIALREG(pmcntenclr_el0, 0xfffffffful);
	+ WRITE_SPECIALREG(pmcntenset_el0, hypctx->pmcntenset_el0);
	+ WRITE_SPECIALREG(pmintenclr_el1, 0xfffffffful);
	+ WRITE_SPECIALREG(pmintenset_el1, hypctx->pmintenset_el1);
	+ WRITE_SPECIALREG(pmovsclr_el0, 0xfffffffful);
	+ WRITE_SPECIALREG(pmovsset_el0, hypctx->pmovsset_el0);
	+
	+ switch ((hypctx->pmcr_el0 & PMCR_N_MASK) >> PMCR_N_SHIFT) {
	+#define LOAD_PMU(x) \
	+ case (x + 1): \
	+ WRITE_SPECIALREG(pmevcntr ## x ## _el0, \
	+ hypctx->pmevcntr_el0[x]); \
	+ WRITE_SPECIALREG(pmevtyper ## x ## _el0, \
	+ hypctx->pmevtyper_el0[x])
	+ LOAD_PMU(30);
	+ LOAD_PMU(29);
	+ LOAD_PMU(28);
	+ LOAD_PMU(27);
	+ LOAD_PMU(26);
	+ LOAD_PMU(25);
	+ LOAD_PMU(24);
	+ LOAD_PMU(23);
	+ LOAD_PMU(22);
	+ LOAD_PMU(21);
	+ LOAD_PMU(20);
	+ LOAD_PMU(19);
	+ LOAD_PMU(18);
	+ LOAD_PMU(17);
	+ LOAD_PMU(16);
	+ LOAD_PMU(15);
	+ LOAD_PMU(14);
	+ LOAD_PMU(13);
	+ LOAD_PMU(12);
	+ LOAD_PMU(11);
	+ LOAD_PMU(10);
	+ LOAD_PMU(9);
	+ LOAD_PMU(8);
	+ LOAD_PMU(7);
	+ LOAD_PMU(6);
	+ LOAD_PMU(5);
	+ LOAD_PMU(4);
	+ LOAD_PMU(3);
	+ LOAD_PMU(2);
	+ LOAD_PMU(1);
	+ LOAD_PMU(0);
	+ default: /* N == 0 when only PMCCNTR_EL0 is available */
	+ break;
	+#undef LOAD_PMU
	+ }
	+
	+ dfr0 = READ_SPECIALREG(id_aa64dfr0_el1);
	+ switch (ID_AA64DFR0_BRPs_VAL(dfr0) - 1) {
	+#define LOAD_DBG_BRP(x) \
	+ case x: \
	+ WRITE_SPECIALREG(dbgbcr ## x ## _el1, \
	+ hypctx->dbgbcr_el1[x]); \
	+ WRITE_SPECIALREG(dbgbvr ## x ## _el1, \
	+ hypctx->dbgbvr_el1[x])
	+ LOAD_DBG_BRP(15);
	+ LOAD_DBG_BRP(14);
	+ LOAD_DBG_BRP(13);
	+ LOAD_DBG_BRP(12);
	+ LOAD_DBG_BRP(11);
	+ LOAD_DBG_BRP(10);
	+ LOAD_DBG_BRP(9);
	+ LOAD_DBG_BRP(8);
	+ LOAD_DBG_BRP(7);
	+ LOAD_DBG_BRP(6);
	+ LOAD_DBG_BRP(5);
	+ LOAD_DBG_BRP(4);
	+ LOAD_DBG_BRP(3);
	+ LOAD_DBG_BRP(2);
	+ LOAD_DBG_BRP(1);
	+ default:
	+ LOAD_DBG_BRP(0);
	+#undef LOAD_DBG_BRP
	+ }
	+
	+ switch (ID_AA64DFR0_WRPs_VAL(dfr0) - 1) {
	+#define LOAD_DBG_WRP(x) \
	+ case x: \
	+ WRITE_SPECIALREG(dbgwcr ## x ## _el1, \
	+ hypctx->dbgwcr_el1[x]); \
	+ WRITE_SPECIALREG(dbgwvr ## x ## _el1, \
	+ hypctx->dbgwvr_el1[x])
	+ LOAD_DBG_WRP(15);
	+ LOAD_DBG_WRP(14);
	+ LOAD_DBG_WRP(13);
	+ LOAD_DBG_WRP(12);
	+ LOAD_DBG_WRP(11);
	+ LOAD_DBG_WRP(10);
	+ LOAD_DBG_WRP(9);
	+ LOAD_DBG_WRP(8);
	+ LOAD_DBG_WRP(7);
	+ LOAD_DBG_WRP(6);
	+ LOAD_DBG_WRP(5);
	+ LOAD_DBG_WRP(4);
	+ LOAD_DBG_WRP(3);
	+ LOAD_DBG_WRP(2);
	+ LOAD_DBG_WRP(1);
	+ default:
	+ LOAD_DBG_WRP(0);
	+#undef LOAD_DBG_WRP
	+ }
	+
	+ if (guest) {
	+ /* Load the timer registers */
	+ WRITE_SPECIALREG(cntkctl_el1, hypctx->vtimer_cpu.cntkctl_el1);
	+ WRITE_SPECIALREG(cntv_cval_el0,
	+ hypctx->vtimer_cpu.virt_timer.cntx_cval_el0);
	+ WRITE_SPECIALREG(cntv_ctl_el0,
	+ hypctx->vtimer_cpu.virt_timer.cntx_ctl_el0);
	+ WRITE_SPECIALREG(cnthctl_el2, hyp->vtimer.cnthctl_el2);
	+ WRITE_SPECIALREG(cntvoff_el2, hyp->vtimer.cntvoff_el2);
	+
	+ /* Load the GICv3 registers */
	+ WRITE_SPECIALREG(ich_hcr_el2, hypctx->vgic_v3_regs.ich_hcr_el2);
	+ WRITE_SPECIALREG(ich_vmcr_el2,
	+ hypctx->vgic_v3_regs.ich_vmcr_el2);
	+ switch (hypctx->vgic_v3_regs.ich_lr_num - 1) {
	+#define LOAD_LR(x) \
	+ case x: \
	+ WRITE_SPECIALREG(ich_lr ## x ##_el2, \
	+ hypctx->vgic_v3_regs.ich_lr_el2[x])
	+ LOAD_LR(15);
	+ LOAD_LR(14);
	+ LOAD_LR(13);
	+ LOAD_LR(12);
	+ LOAD_LR(11);
	+ LOAD_LR(10);
	+ LOAD_LR(9);
	+ LOAD_LR(8);
	+ LOAD_LR(7);
	+ LOAD_LR(6);
	+ LOAD_LR(5);
	+ LOAD_LR(4);
	+ LOAD_LR(3);
	+ LOAD_LR(2);
	+ LOAD_LR(1);
	+ default:
	+ LOAD_LR(0);
	+#undef LOAD_LR
	+ }
	+
	+ switch (hypctx->vgic_v3_regs.ich_apr_num - 1) {
	+#define LOAD_APR(x) \
	+ case x: \
	+ WRITE_SPECIALREG(ich_ap0r ## x ##_el2, \
	+ hypctx->vgic_v3_regs.ich_ap0r_el2[x]); \
	+ WRITE_SPECIALREG(ich_ap1r ## x ##_el2, \
	+ hypctx->vgic_v3_regs.ich_ap1r_el2[x])
	+ LOAD_APR(3);
	+ LOAD_APR(2);
	+ LOAD_APR(1);
	+ default:
	+ LOAD_APR(0);
	+#undef LOAD_APR
	+ }
	+ }
	+}
	+
	+static uint64_t
	+vmm_hyp_call_guest(struct hyp hyp, struct hypctx hypctx)
	+{
	+ struct hypctx host_hypctx;
	+ uint64_t cntvoff_el2;
	+ uint64_t ich_hcr_el2, ich_vmcr_el2, cnthctl_el2, cntkctl_el1;
	+ uint64_t ret;
	+ uint64_t s1e1r, hpfar_el2;
	+ bool hpfar_valid;
	+
	+ vmm_hyp_reg_store(&host_hypctx, NULL, false);
	+
	+ /* Save the host special registers */
	+ cnthctl_el2 = READ_SPECIALREG(cnthctl_el2);
	+ cntkctl_el1 = READ_SPECIALREG(cntkctl_el1);
	+ cntvoff_el2 = READ_SPECIALREG(cntvoff_el2);
	+
	+ ich_hcr_el2 = READ_SPECIALREG(ich_hcr_el2);
	+ ich_vmcr_el2 = READ_SPECIALREG(ich_vmcr_el2);
	+
	+ vmm_hyp_reg_restore(hypctx, hyp, true);
	+
	+ /* Load the common hypervisor registers */
	+ WRITE_SPECIALREG(vttbr_el2, hyp->vttbr_el2);
	+
	+ host_hypctx.mdcr_el2 = READ_SPECIALREG(mdcr_el2);
	+ WRITE_SPECIALREG(mdcr_el2, hypctx->mdcr_el2);
	+
	+ /* Call into the guest */
	+ ret = vmm_enter_guest(hypctx);
	+
	+ WRITE_SPECIALREG(mdcr_el2, host_hypctx.mdcr_el2);
	+ isb();
	+
	+ /* Store the exit info */
	+ hypctx->exit_info.far_el2 = READ_SPECIALREG(far_el2);
	+ vmm_hyp_reg_store(hypctx, hyp, true);
	+
	+ hpfar_valid = true;
	+ if (ret == EXCP_TYPE_EL1_SYNC) {
	+ switch (ESR_ELx_EXCEPTION(hypctx->tf.tf_esr)) {
	+ case EXCP_INSN_ABORT_L:
	+ case EXCP_DATA_ABORT_L:
	+ /*
	+ * The hpfar_el2 register is valid for:
	+ * - Translation and Access faults.
	+ * - Translation, Access, and permission faults on
	+ * the translation table walk on the stage 1 tables.
	+ * - A stage 2 Address size fault.
	+ *
	+ * As we only need it in the first 2 cases we can just
	+ * exclude it on permission faults that are not from
	+ * the stage 1 table walk.
	+ *
	+ * TODO: Add a case for Arm erratum 834220.
	+ */
	+ if ((hypctx->tf.tf_esr & ISS_DATA_S1PTW) != 0)
	+ break;
	+ switch (hypctx->tf.tf_esr & ISS_DATA_DFSC_MASK) {
	+ case ISS_DATA_DFSC_PF_L1:
	+ case ISS_DATA_DFSC_PF_L2:
	+ case ISS_DATA_DFSC_PF_L3:
	+ hpfar_valid = false;
	+ break;
	+ }
	+ break;
	+ }
	+ }
	+ if (hpfar_valid) {
	+ hypctx->exit_info.hpfar_el2 = READ_SPECIALREG(hpfar_el2);
	+ } else {
	+ /*
	+ * TODO: There is a risk the at instruction could cause an
	+ * exception here. We should handle it & return a failure.
	+ */
	+ s1e1r =
	+ arm64_address_translate_s1e1r(hypctx->exit_info.far_el2);
	+ if (PAR_SUCCESS(s1e1r)) {
	+ hpfar_el2 = (s1e1r & PAR_PA_MASK) >> PAR_PA_SHIFT;
	+ hpfar_el2 <<= HPFAR_EL2_FIPA_SHIFT;
	+ hypctx->exit_info.hpfar_el2 = hpfar_el2;
	+ } else {
	+ ret = EXCP_TYPE_REENTER;
	+ }
	+ }
	+
	+ vmm_hyp_reg_restore(&host_hypctx, NULL, false);
	+
	+ /* Restore the host special registers */
	+ WRITE_SPECIALREG(ich_hcr_el2, ich_hcr_el2);
	+ WRITE_SPECIALREG(ich_vmcr_el2, ich_vmcr_el2);
	+
	+ WRITE_SPECIALREG(cnthctl_el2, cnthctl_el2);
	+ WRITE_SPECIALREG(cntkctl_el1, cntkctl_el1);
	+ WRITE_SPECIALREG(cntvoff_el2, cntvoff_el2);
	+
	+ return (ret);
	+}
	+
	+static uint64_t
	+vmm_hyp_read_reg(uint64_t reg)
	+{
	+ switch (reg) {
	+ case HYP_REG_ICH_VTR:
	+ return (READ_SPECIALREG(ich_vtr_el2));
	+ case HYP_REG_CNTHCTL:
	+ return (READ_SPECIALREG(cnthctl_el2));
	+ }
	+
	+ return (0);
	+}
	+
	+static int
	+vmm_clean_s2_tlbi(void)
	+{
	+ dsb(ishst);
	+ __asm __volatile("tlbi alle1is");
	+ dsb(ish);
	+
	+ return (0);
	+}
	+
	+static int
	+vm_s2_tlbi_range(uint64_t vttbr, vm_offset_t sva, vm_size_t eva,
	+ bool final_only)
	+{
	+ uint64_t end, r, start;
	+ uint64_t host_vttbr;
	+
	+#define TLBI_VA_SHIFT 12
	+#define TLBI_VA_MASK ((1ul << 44) - 1)
	+#define TLBI_VA(addr) (((addr) >> TLBI_VA_SHIFT) & TLBI_VA_MASK)
	+#define TLBI_VA_L3_INCR (L3_SIZE >> TLBI_VA_SHIFT)
	+
	+ /* Switch to the guest vttbr */
	+ /* TODO: Handle Cortex-A57/A72 erratum 131936 */
	+ host_vttbr = READ_SPECIALREG(vttbr_el2);
	+ WRITE_SPECIALREG(vttbr_el2, vttbr);
	+ isb();
	+
	+ /*
	+ * The CPU can cache the stage 1 + 2 combination so we need to ensure
	+ * the stage 2 is invalidated first, then when this has completed we
	+ * invalidate the stage 1 TLB. As we don't know which stage 1 virtual
	+ * addresses point at the stage 2 IPA we need to invalidate the entire
	+ * stage 1 TLB.
	+ */
	+
	+ start = TLBI_VA(sva);
	+ end = TLBI_VA(eva);
	+ for (r = start; r < end; r += TLBI_VA_L3_INCR) {
	+ /* Invalidate the stage 2 TLB entry */
	+ if (final_only)
	+ __asm __volatile("tlbi ipas2le1is, %0" : : "r"(r));
	+ else
	+ __asm __volatile("tlbi ipas2e1is, %0" : : "r"(r));
	+ }
	+ /* Ensure the entry has been invalidated */
	+ dsb(ish);
	+ /* Invalidate the stage 1 TLB. */
	+ __asm __volatile("tlbi vmalle1is");
	+ dsb(ish);
	+ isb();
	+
	+ /* Switch back t othe host vttbr */
	+ WRITE_SPECIALREG(vttbr_el2, host_vttbr);
	+ isb();
	+
	+ return (0);
	+}
	+
	+static int
	+vm_s2_tlbi_all(uint64_t vttbr)
	+{
	+ uint64_t host_vttbr;
	+
	+ /* Switch to the guest vttbr */
	+ /* TODO: Handle Cortex-A57/A72 erratum 131936 */
	+ host_vttbr = READ_SPECIALREG(vttbr_el2);
	+ WRITE_SPECIALREG(vttbr_el2, vttbr);
	+ isb();
	+
	+ __asm __volatile("tlbi vmalls12e1is");
	+ dsb(ish);
	+ isb();
	+
	+ /* Switch back t othe host vttbr */
	+ WRITE_SPECIALREG(vttbr_el2, host_vttbr);
	+ isb();
	+
	+ return (0);
	+}
	+
	+static int
	+vmm_dc_civac(uint64_t start, uint64_t len)
	+{
	+ size_t line_size, end;
	+ uint64_t ctr;
	+
	+ ctr = READ_SPECIALREG(ctr_el0);
	+ line_size = sizeof(int) << CTR_DLINE_SIZE(ctr);
	+ end = start + len;
	+ dsb(ishst);
	+ /* Clean and Invalidate the D-cache */
	+ for (; start < end; start += line_size)
	+ __asm __volatile("dc civac, %0" :: "r" (start) : "memory");
	+ dsb(ish);
	+ return (0);
	+}
	+
	+static int
	+vmm_el2_tlbi(uint64_t type, uint64_t start, uint64_t len)
	+{
	+ uint64_t end, r;
	+
	+ dsb(ishst);
	+ switch (type) {
	+ default:
	+ case HYP_EL2_TLBI_ALL:
	+ __asm __volatile("tlbi alle2" ::: "memory");
	+ break;
	+ case HYP_EL2_TLBI_VA:
	+ end = TLBI_VA(start + len);
	+ start = TLBI_VA(start);
	+ for (r = start; r < end; r += TLBI_VA_L3_INCR) {
	+ __asm __volatile("tlbi vae2is, %0" :: "r"(r));
	+ }
	+ break;
	+ }
	+ dsb(ish);
	+
	+ return (0);
	+}
	+
	+uint64_t
	+vmm_hyp_enter(uint64_t handle, uint64_t x1, uint64_t x2, uint64_t x3,
	+ uint64_t x4, uint64_t x5, uint64_t x6, uint64_t x7)
	+{
	+ uint64_t ret;
	+
	+ switch (handle) {
	+ case HYP_ENTER_GUEST:
	+ do {
	+ ret = vmm_hyp_call_guest((struct hyp *)x1,
	+ (struct hypctx *)x2);
	+ } while (ret == EXCP_TYPE_REENTER);
	+ return (ret);
	+ case HYP_READ_REGISTER:
	+ return (vmm_hyp_read_reg(x1));
	+ case HYP_CLEAN_S2_TLBI:
	+ return (vmm_clean_s2_tlbi());
	+ case HYP_DC_CIVAC:
	+ return (vmm_dc_civac(x1, x2));
	+ case HYP_EL2_TLBI:
	+ return (vmm_el2_tlbi(x1, x2, x3));
	+ case HYP_S2_TLBI_RANGE:
	+ return (vm_s2_tlbi_range(x1, x2, x3, x4));
	+ case HYP_S2_TLBI_ALL:
	+ return (vm_s2_tlbi_all(x1));
	+ case HYP_CLEANUP: /* Handled in vmm_hyp_exception.S */
	+ default:
	+ break;
	+ }
	+
	+ return (0);
	+}
	diff --git a/sys/arm64/vmm/vmm_hyp_el2.S b/sys/arm64/vmm/vmm_hyp_el2.S
	new file mode 100644
	--- /dev/null
	+++ b/sys/arm64/vmm/vmm_hyp_el2.S
	@@ -0,0 +1,39 @@
	+/*-
	+ * SPDX-License-Identifier: BSD-2-Clause
	+ *
	+ * Copyright (c) 2021 Andrew Turner
	+ *
	+ * This work was supported by Innovate UK project 105694, "Digital Security
	+ * by Design (DSbD) Technology Platform Prototype".
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#include <machine/param.h>
	+
	+ .rodata
	+ .align PAGE_SHIFT
	+ .globl vmm_hyp_code
	+vmm_hyp_code:
	+ .incbin "vmm_hyp_blob.bin"
	+ .globl vmm_hyp_code_end
	+vmm_hyp_code_end:
	diff --git a/sys/arm64/vmm/vmm_hyp_exception.S b/sys/arm64/vmm/vmm_hyp_exception.S
	new file mode 100644
	--- /dev/null
	+++ b/sys/arm64/vmm/vmm_hyp_exception.S
	@@ -0,0 +1,384 @@
	+/*-
	+ * SPDX-License-Identifier: BSD-2-Clause
	+ *
	+ * Copyright (C) 2017 Alexandru Elisei <alexandru.elisei@gmail.com>
	+ * Copyright (c) 2021 Andrew Turner
	+ *
	+ * This software was developed by Alexandru Elisei under sponsorship
	+ * from the FreeBSD Foundation.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+
	+#include <machine/asm.h>
	+#include <machine/hypervisor.h>
	+
	+#include "assym.inc"
	+#include "hyp.h"
	+
	+.macro save_host_registers
	+ /* TODO: Only store callee saved registers */
	+ sub sp, sp, #(32 * 8)
	+ str x30, [sp, #(30 * 8)]
	+ stp x28, x29, [sp, #(28 * 8)]
	+ stp x26, x27, [sp, #(26 * 8)]
	+ stp x24, x25, [sp, #(24 * 8)]
	+ stp x22, x23, [sp, #(22 * 8)]
	+ stp x20, x21, [sp, #(20 * 8)]
	+ stp x18, x19, [sp, #(18 * 8)]
	+ stp x16, x17, [sp, #(16 * 8)]
	+ stp x14, x15, [sp, #(14 * 8)]
	+ stp x12, x13, [sp, #(12 * 8)]
	+ stp x10, x11, [sp, #(10 * 8)]
	+ stp x8, x9, [sp, #(8 * 8)]
	+ stp x6, x7, [sp, #(6 * 8)]
	+ stp x4, x5, [sp, #(4 * 8)]
	+ stp x2, x3, [sp, #(2 * 8)]
	+ stp x0, x1, [sp, #(0 * 8)]
	+.endm
	+
	+.macro restore_host_registers
	+ /* TODO: Only restore callee saved registers */
	+ ldp x0, x1, [sp, #(0 * 8)]
	+ ldp x2, x3, [sp, #(2 * 8)]
	+ ldp x4, x5, [sp, #(4 * 8)]
	+ ldp x6, x7, [sp, #(6 * 8)]
	+ ldp x8, x9, [sp, #(8 * 8)]
	+ ldp x10, x11, [sp, #(10 * 8)]
	+ ldp x12, x13, [sp, #(12 * 8)]
	+ ldp x14, x15, [sp, #(14 * 8)]
	+ ldp x16, x17, [sp, #(16 * 8)]
	+ ldp x18, x19, [sp, #(18 * 8)]
	+ ldp x20, x21, [sp, #(20 * 8)]
	+ ldp x22, x23, [sp, #(22 * 8)]
	+ ldp x24, x25, [sp, #(24 * 8)]
	+ ldp x26, x27, [sp, #(26 * 8)]
	+ ldp x28, x29, [sp, #(28 * 8)]
	+ ldr x30, [sp, #(30 * 8)]
	+ add sp, sp, #(32 * 8)
	+.endm
	+
	+.macro save_guest_registers
	+ /* Back up x0 so we can use it as a temporary register */
	+ stp x0, x1, [sp, #-(2 * 8)]!
	+
	+ /* Restore the hypctx pointer */
	+ mrs x0, tpidr_el2
	+
	+ stp x2, x3, [x0, #(TF_X + 2 * 8)]
	+ stp x4, x5, [x0, #(TF_X + 4 * 8)]
	+ stp x6, x7, [x0, #(TF_X + 6 * 8)]
	+ stp x8, x9, [x0, #(TF_X + 8 * 8)]
	+ stp x10, x11, [x0, #(TF_X + 10 * 8)]
	+ stp x12, x13, [x0, #(TF_X + 12 * 8)]
	+ stp x14, x15, [x0, #(TF_X + 14 * 8)]
	+ stp x16, x17, [x0, #(TF_X + 16 * 8)]
	+ stp x18, x19, [x0, #(TF_X + 18 * 8)]
	+ stp x20, x21, [x0, #(TF_X + 20 * 8)]
	+ stp x22, x23, [x0, #(TF_X + 22 * 8)]
	+ stp x24, x25, [x0, #(TF_X + 24 * 8)]
	+ stp x26, x27, [x0, #(TF_X + 26 * 8)]
	+ stp x28, x29, [x0, #(TF_X + 28 * 8)]
	+
	+ str lr, [x0, #(TF_LR)]
	+
	+ /* Restore the saved x0 & x1 and save them */
	+ ldp x2, x3, [sp], #(2 * 8)
	+ stp x2, x3, [x0, #(TF_X + 0 * 8)]
	+.endm
	+
	+.macro restore_guest_registers
	+ /*
	+ * Copy the guest x0 and x1 to the stack so we can restore them
	+ * after loading the other registers.
	+ */
	+ ldp x2, x3, [x0, #(TF_X + 0 * 8)]
	+ stp x2, x3, [sp, #-(2 * 8)]!
	+
	+ ldr lr, [x0, #(TF_LR)]
	+
	+ ldp x28, x29, [x0, #(TF_X + 28 * 8)]
	+ ldp x26, x27, [x0, #(TF_X + 26 * 8)]
	+ ldp x24, x25, [x0, #(TF_X + 24 * 8)]
	+ ldp x22, x23, [x0, #(TF_X + 22 * 8)]
	+ ldp x20, x21, [x0, #(TF_X + 20 * 8)]
	+ ldp x18, x19, [x0, #(TF_X + 18 * 8)]
	+ ldp x16, x17, [x0, #(TF_X + 16 * 8)]
	+ ldp x14, x15, [x0, #(TF_X + 14 * 8)]
	+ ldp x12, x13, [x0, #(TF_X + 12 * 8)]
	+ ldp x10, x11, [x0, #(TF_X + 10 * 8)]
	+ ldp x8, x9, [x0, #(TF_X + 8 * 8)]
	+ ldp x6, x7, [x0, #(TF_X + 6 * 8)]
	+ ldp x4, x5, [x0, #(TF_X + 4 * 8)]
	+ ldp x2, x3, [x0, #(TF_X + 2 * 8)]
	+
	+ ldp x0, x1, [sp], #(2 * 8)
	+.endm
	+
	+.macro vempty
	+ .align 7
	+ 1: b 1b
	+.endm
	+
	+.macro vector name
	+ .align 7
	+ b handle_\name
	+.endm
	+
	+ .section ".vmm_vectors","ax"
	+ .align 11
	+hyp_init_vectors:
	+ vempty /* Synchronous EL2t */
	+ vempty /* IRQ EL2t */
	+ vempty /* FIQ EL2t */
	+ vempty /* Error EL2t */
	+
	+ vempty /* Synchronous EL2h */
	+ vempty /* IRQ EL2h */
	+ vempty /* FIQ EL2h */
	+ vempty /* Error EL2h */
	+
	+ vector hyp_init /* Synchronous 64-bit EL1 */
	+ vempty /* IRQ 64-bit EL1 */
	+ vempty /* FIQ 64-bit EL1 */
	+ vempty /* Error 64-bit EL1 */
	+
	+ vempty /* Synchronous 32-bit EL1 */
	+ vempty /* IRQ 32-bit EL1 */
	+ vempty /* FIQ 32-bit EL1 */
	+ vempty /* Error 32-bit EL1 */
	+
	+ .text
	+ .align 11
	+hyp_vectors:
	+ vempty /* Synchronous EL2t */
	+ vempty /* IRQ EL2t */
	+ vempty /* FIQ EL2t */
	+ vempty /* Error EL2t */
	+
	+ vector el2_el2h_sync /* Synchronous EL2h */
	+ vector el2_el2h_irq /* IRQ EL2h */
	+ vector el2_el2h_fiq /* FIQ EL2h */
	+ vector el2_el2h_error /* Error EL2h */
	+
	+ vector el2_el1_sync64 /* Synchronous 64-bit EL1 */
	+ vector el2_el1_irq64 /* IRQ 64-bit EL1 */
	+ vector el2_el1_fiq64 /* FIQ 64-bit EL1 */
	+ vector el2_el1_error64 /* Error 64-bit EL1 */
	+
	+ vempty /* Synchronous 32-bit EL1 */
	+ vempty /* IRQ 32-bit EL1 */
	+ vempty /* FIQ 32-bit EL1 */
	+ vempty /* Error 32-bit EL1 */
	+
	+/*
	+ * Initialize the hypervisor mode with a new exception vector table, translation
	+ * table and stack.
	+ *
	+ * Expecting:
	+ * x0 - translation tables physical address
	+ * x1 - stack top virtual address
	+ * x2 - TCR_EL2 value
	+ * x3 - SCTLR_EL2 value
	+ * x4 - VTCR_EL2 value
	+ */
	+LENTRY(handle_hyp_init)
	+ /* Install the new exception vectors */
	+ adrp x6, hyp_vectors
	+ add x6, x6, :lo12:hyp_vectors
	+ msr vbar_el2, x6
	+ /* Set the stack top address */
	+ mov sp, x1
	+ /* Use the host VTTBR_EL2 to tell the host and the guests apart */
	+ mov x9, #VTTBR_HOST
	+ msr vttbr_el2, x9
	+ /* Load the base address for the translation tables */
	+ msr ttbr0_el2, x0
	+ /* Invalidate the TLB */
	+ tlbi alle2
	+ /* Use the same memory attributes as EL1 */
	+ mrs x9, mair_el1
	+ msr mair_el2, x9
	+ /* Configure address translation */
	+ msr tcr_el2, x2
	+ isb
	+ /* Set the system control register for EL2 */
	+ msr sctlr_el2, x3
	+ /* Set the Stage 2 translation control register */
	+ msr vtcr_el2, x4
	+ /* Return success */
	+ mov x0, #0
	+ /* MMU is up and running */
	+ ERET
	+LEND(handle_hyp_init)
	+
	+.macro do_world_switch_to_host
	+ save_guest_registers
	+ restore_host_registers
	+
	+ /* Restore host VTTBR */
	+ mov x9, #VTTBR_HOST
	+ msr vttbr_el2, x9
	+.endm
	+
	+
	+.macro handle_el2_excp type
	+ /* Save registers before modifying so we can restore them */
	+ str x9, [sp, #-16]!
	+
	+ /* Test if the exception happened when the host was running */
	+ mrs x9, vttbr_el2
	+ cmp x9, #VTTBR_HOST
	+ beq 1f
	+
	+ /* We got the exception while the guest was running */
	+ ldr x9, [sp], #16
	+ do_world_switch_to_host
	+ mov x0, \type
	+ ret
	+
	+1:
	+ /* We got the exception while the host was running */
	+ ldr x9, [sp], #16
	+ mov x0, \type
	+ ERET
	+.endm
	+
	+
	+LENTRY(handle_el2_el2h_sync)
	+ handle_el2_excp #EXCP_TYPE_EL2_SYNC
	+LEND(handle_el2_el2h_sync)
	+
	+LENTRY(handle_el2_el2h_irq)
	+ handle_el2_excp #EXCP_TYPE_EL2_IRQ
	+LEND(handle_el2_el2h_irq)
	+
	+LENTRY(handle_el2_el2h_fiq)
	+ handle_el2_excp #EXCP_TYPE_EL2_FIQ
	+LEND(handle_el2_el2h_fiq)
	+
	+LENTRY(handle_el2_el2h_error)
	+ handle_el2_excp #EXCP_TYPE_EL2_ERROR
	+LEND(handle_el2_el2h_error)
	+
	+
	+LENTRY(handle_el2_el1_sync64)
	+ /* Save registers before modifying so we can restore them */
	+ str x9, [sp, #-16]!
	+
	+ /* Check for host hypervisor call */
	+ mrs x9, vttbr_el2
	+ cmp x9, #VTTBR_HOST
	+ ldr x9, [sp], #16 /* Restore the temp register */
	+ bne 1f
	+
	+ /*
	+ * Called from the host
	+ */
	+
	+ /* Check if this is a cleanup call and handle in a controlled state */
	+ cmp x0, #(HYP_CLEANUP)
	+ b.eq vmm_cleanup
	+
	+ str lr, [sp, #-16]!
	+ bl vmm_hyp_enter
	+ ldr lr, [sp], #16
	+ ERET
	+
	+1: /* Guest exception taken to EL2 */
	+ do_world_switch_to_host
	+ mov x0, #EXCP_TYPE_EL1_SYNC
	+ ret
	+LEND(handle_el2_el1_sync64)
	+
	+/*
	+ * We only trap IRQ, FIQ and SError exceptions when a guest is running. Do a
	+ * world switch to host to handle these exceptions.
	+ */
	+
	+LENTRY(handle_el2_el1_irq64)
	+ do_world_switch_to_host
	+ str x9, [sp, #-16]!
	+ mrs x9, ich_misr_el2
	+ cmp x9, xzr
	+ beq 1f
	+ mov x0, #EXCP_TYPE_MAINT_IRQ
	+ b 2f
	+1:
	+ mov x0, #EXCP_TYPE_EL1_IRQ
	+2:
	+ ldr x9, [sp], #16
	+ ret
	+LEND(handle_el2_el1_irq)
	+
	+LENTRY(handle_el2_el1_fiq64)
	+ do_world_switch_to_host
	+ mov x0, #EXCP_TYPE_EL1_FIQ
	+ ret
	+LEND(handle_el2_el1_fiq64)
	+
	+LENTRY(handle_el2_el1_error64)
	+ do_world_switch_to_host
	+ mov x0, #EXCP_TYPE_EL1_ERROR
	+ ret
	+LEND(handle_el2_el1_error64)
	+
	+
	+/*
	+ * Usage:
	+ * uint64_t vmm_enter_guest(struct hypctx *hypctx)
	+ *
	+ * Expecting:
	+ * x0 - hypctx address
	+ */
	+ENTRY(vmm_enter_guest)
	+ /* Save hypctx address */
	+ msr tpidr_el2, x0
	+
	+ save_host_registers
	+ restore_guest_registers
	+
	+ /* Enter guest */
	+ ERET
	+END(vmm_enter_guest)
	+
	+/*
	+ * Usage:
	+ * void vmm_cleanup(uint64_t handle, void *hyp_stub_vectors)
	+ *
	+ * Expecting:
	+ * x1 - physical address of hyp_stub_vectors
	+ */
	+LENTRY(vmm_cleanup)
	+ /* Restore the stub vectors */
	+ msr vbar_el2, x1
	+
	+ /* Disable the MMU */
	+ dsb sy
	+ mrs x2, sctlr_el2
	+ bic x2, x2, #SCTLR_EL2_M
	+ msr sctlr_el2, x2
	+ isb
	+
	+ ERET
	+LEND(vmm_cleanup)
	diff --git a/sys/arm64/vmm/vmm_instruction_emul.c b/sys/arm64/vmm/vmm_instruction_emul.c
	new file mode 100644
	--- /dev/null
	+++ b/sys/arm64/vmm/vmm_instruction_emul.c
	@@ -0,0 +1,102 @@
	+/*-
	+ * SPDX-License-Identifier: BSD-2-Clause
	+ *
	+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#ifdef _KERNEL
	+#include <sys/param.h>
	+#include <sys/pcpu.h>
	+#include <sys/systm.h>
	+#include <sys/proc.h>
	+
	+#include <vm/vm.h>
	+
	+#include <machine/machdep.h>
	+#include <machine/vmm.h>
	+#else
	+#include <sys/types.h>
	+#include <sys/errno.h>
	+#include <sys/_iovec.h>
	+
	+#include <machine/vmm.h>
	+
	+#include <assert.h>
	+#include <stdio.h>
	+#include <stdlib.h>
	+#include <vmmapi.h>
	+#endif
	+
	+#include <machine/vmm_instruction_emul.h>
	+
	+int
	+vmm_emulate_instruction(struct vcpu vcpu, uint64_t gpa, struct vie vie,
	+ struct vm_guest_paging *paging __unused, mem_region_read_t memread,
	+ mem_region_write_t memwrite, void *memarg)
	+{
	+ uint64_t val;
	+ int error;
	+
	+ if (vie->dir == VM_DIR_READ) {
	+ error = memread(vcpu, gpa, &val, vie->access_size, memarg);
	+ if (error)
	+ goto out;
	+ error = vm_set_register(vcpu, vie->reg, val);
	+ } else {
	+ error = vm_get_register(vcpu, vie->reg, &val);
	+ if (error)
	+ goto out;
	+ /* Mask any unneeded bits from the register */
	+ if (vie->access_size < 8)
	+ val &= (1ul << (vie->access_size * 8)) - 1;
	+ error = memwrite(vcpu, gpa, val, vie->access_size, memarg);
	+ }
	+
	+out:
	+ return (error);
	+}
	+
	+int
	+vmm_emulate_register(struct vcpu vcpu, struct vre vre, reg_read_t regread,
	+ reg_write_t regwrite, void *regarg)
	+{
	+ uint64_t val;
	+ int error;
	+
	+ if (vre->dir == VM_DIR_READ) {
	+ error = regread(vcpu, &val, regarg);
	+ if (error)
	+ goto out;
	+ error = vm_set_register(vcpu, vre->reg, val);
	+ } else {
	+ error = vm_get_register(vcpu, vre->reg, &val);
	+ if (error)
	+ goto out;
	+ error = regwrite(vcpu, val, regarg);
	+ }
	+
	+out:
	+ return (error);
	+}
	diff --git a/sys/arm64/vmm/vmm_ktr.h b/sys/arm64/vmm/vmm_ktr.h
	new file mode 100644
	--- /dev/null
	+++ b/sys/arm64/vmm/vmm_ktr.h
	@@ -0,0 +1,69 @@
	+/*-
	+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
	+ *
	+ * Copyright (c) 2011 NetApp, Inc.
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#ifndef _VMM_KTR_H_
	+#define _VMM_KTR_H_
	+
	+#include <sys/ktr.h>
	+#include <sys/pcpu.h>
	+
	+#ifndef KTR_VMM
	+#define KTR_VMM KTR_GEN
	+#endif
	+
	+#define VCPU_CTR0(vm, vcpuid, format) \
	+CTR2(KTR_VMM, "vm %s[%d]: " format, vm_name((vm)), (vcpuid))
	+
	+#define VCPU_CTR1(vm, vcpuid, format, p1) \
	+CTR3(KTR_VMM, "vm %s[%d]: " format, vm_name((vm)), (vcpuid), (p1))
	+
	+#define VCPU_CTR2(vm, vcpuid, format, p1, p2) \
	+CTR4(KTR_VMM, "vm %s[%d]: " format, vm_name((vm)), (vcpuid), (p1), (p2))
	+
	+#define VCPU_CTR3(vm, vcpuid, format, p1, p2, p3) \
	+CTR5(KTR_VMM, "vm %s[%d]: " format, vm_name((vm)), (vcpuid), (p1), (p2), (p3))
	+
	+#define VCPU_CTR4(vm, vcpuid, format, p1, p2, p3, p4) \
	+CTR6(KTR_VMM, "vm %s[%d]: " format, vm_name((vm)), (vcpuid), \
	+ (p1), (p2), (p3), (p4))
	+
	+#define VM_CTR0(vm, format) \
	+CTR1(KTR_VMM, "vm %s: " format, vm_name((vm)))
	+
	+#define VM_CTR1(vm, format, p1) \
	+CTR2(KTR_VMM, "vm %s: " format, vm_name((vm)), (p1))
	+
	+#define VM_CTR2(vm, format, p1, p2) \
	+CTR3(KTR_VMM, "vm %s: " format, vm_name((vm)), (p1), (p2))
	+
	+#define VM_CTR3(vm, format, p1, p2, p3) \
	+CTR4(KTR_VMM, "vm %s: " format, vm_name((vm)), (p1), (p2), (p3))
	+
	+#define VM_CTR4(vm, format, p1, p2, p3, p4) \
	+CTR5(KTR_VMM, "vm %s: " format, vm_name((vm)), (p1), (p2), (p3), (p4))
	+#endif
	diff --git a/sys/arm64/vmm/vmm_mmu.c b/sys/arm64/vmm/vmm_mmu.c
	new file mode 100644
	--- /dev/null
	+++ b/sys/arm64/vmm/vmm_mmu.c
	@@ -0,0 +1,430 @@
	+/*-
	+ * SPDX-License-Identifier: BSD-2-Clause
	+ *
	+ * Copyright (C) 2017 Alexandru Elisei <alexandru.elisei@gmail.com>
	+ *
	+ * This software was developed by Alexandru Elisei under sponsorship
	+ * from the FreeBSD Foundation.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#include <sys/cdefs.h>
	+
	+#include <sys/types.h>
	+#include <sys/malloc.h>
	+#include <sys/lock.h>
	+#include <sys/mutex.h>
	+#include <vm/vm.h>
	+#include <vm/pmap.h>
	+#include <vm/vm_page.h>
	+#include <vm/vm_param.h>
	+#include <vm/vm_phys.h>
	+
	+#include <machine/atomic.h>
	+#include <machine/machdep.h>
	+#include <machine/vm.h>
	+#include <machine/vmm.h>
	+#include <machine/vmparam.h>
	+
	+#include "mmu.h"
	+#include "arm64.h"
	+
	+static struct mtx vmmpmap_mtx;
	+static pt_entry_t *l0;
	+static vm_paddr_t l0_paddr;
	+
	+bool
	+vmmpmap_init(void)
	+{
	+ vm_page_t m;
	+
	+ m = vm_page_alloc_noobj(VM_ALLOC_WIRED \| VM_ALLOC_ZERO);
	+ if (m == NULL)
	+ return (false);
	+
	+ l0_paddr = VM_PAGE_TO_PHYS(m);
	+ l0 = (pd_entry_t *)PHYS_TO_DMAP(l0_paddr);
	+
	+ mtx_init(&vmmpmap_mtx, "vmm pmap", NULL, MTX_DEF);
	+
	+ return (true);
	+}
	+
	+static void
	+vmmpmap_release_l3(pd_entry_t l2e)
	+{
	+ pt_entry_t *l3 __diagused;
	+ vm_page_t m;
	+ int i;
	+
	+ l3 = (pd_entry_t *)PHYS_TO_DMAP(l2e & ~ATTR_MASK);
	+ for (i = 0; i < Ln_ENTRIES; i++) {
	+ KASSERT(l3[i] == 0, ("%s: l3 still mapped: %p %lx", __func__,
	+ &l3[i], l3[i]));
	+ }
	+
	+ m = PHYS_TO_VM_PAGE(l2e & ~ATTR_MASK);
	+ vm_page_unwire_noq(m);
	+ vm_page_free(m);
	+}
	+
	+static void
	+vmmpmap_release_l2(pd_entry_t l1e)
	+{
	+ pt_entry_t *l2;
	+ vm_page_t m;
	+ int i;
	+
	+ l2 = (pd_entry_t *)PHYS_TO_DMAP(l1e & ~ATTR_MASK);
	+ for (i = 0; i < Ln_ENTRIES; i++) {
	+ if (l2[i] != 0) {
	+ vmmpmap_release_l3(l2[i]);
	+ }
	+ }
	+
	+ m = PHYS_TO_VM_PAGE(l1e & ~ATTR_MASK);
	+ vm_page_unwire_noq(m);
	+ vm_page_free(m);
	+}
	+
	+static void
	+vmmpmap_release_l1(pd_entry_t l0e)
	+{
	+ pt_entry_t *l1;
	+ vm_page_t m;
	+ int i;
	+
	+ l1 = (pd_entry_t *)PHYS_TO_DMAP(l0e & ~ATTR_MASK);
	+ for (i = 0; i < Ln_ENTRIES; i++) {
	+ if (l1[i] != 0) {
	+ vmmpmap_release_l2(l1[i]);
	+ }
	+ }
	+
	+ m = PHYS_TO_VM_PAGE(l0e & ~ATTR_MASK);
	+ vm_page_unwire_noq(m);
	+ vm_page_free(m);
	+}
	+
	+void
	+vmmpmap_fini(void)
	+{
	+ vm_page_t m;
	+ int i;
	+
	+ /* Remove the remaining entries */
	+ for (i = 0; i < L0_ENTRIES; i++) {
	+ if (l0[i] != 0) {
	+ vmmpmap_release_l1(l0[i]);
	+ }
	+ }
	+
	+ m = PHYS_TO_VM_PAGE(l0_paddr);
	+ vm_page_unwire_noq(m);
	+ vm_page_free(m);
	+
	+ mtx_destroy(&vmmpmap_mtx);
	+}
	+
	+uint64_t
	+vmmpmap_to_ttbr0(void)
	+{
	+
	+ return (l0_paddr);
	+}
	+
	+/* Returns a pointer to the level 1 table, allocating if needed. */
	+static pt_entry_t *
	+vmmpmap_l1_table(vm_offset_t va)
	+{
	+ pt_entry_t new_l0e, l0e, *l1;
	+ vm_page_t m;
	+ int rv;
	+
	+ m = NULL;
	+again:
	+ l0e = atomic_load_64(&l0[pmap_l0_index(va)]);
	+ if ((l0e & ATTR_DESCR_VALID) == 0) {
	+ /* Allocate a page for the level 1 table */
	+ if (m == NULL) {
	+ m = vm_page_alloc_noobj(VM_ALLOC_WIRED \| VM_ALLOC_ZERO);
	+ if (m == NULL)
	+ return (NULL);
	+ }
	+
	+ new_l0e = VM_PAGE_TO_PHYS(m) \| L0_TABLE;
	+
	+ mtx_lock(&vmmpmap_mtx);
	+ rv = atomic_cmpset_64(&l0[pmap_l0_index(va)], l0e, new_l0e);
	+ mtx_unlock(&vmmpmap_mtx);
	+ /* We may have raced another thread, try again */
	+ if (rv == 0)
	+ goto again;
	+
	+ /* The cmpset succeeded */
	+ l0e = new_l0e;
	+ } else if (m != NULL) {
	+ /* We allocated a page that wasn't used */
	+ vm_page_unwire_noq(m);
	+ vm_page_free_zero(m);
	+ }
	+
	+ l1 = (pd_entry_t *)PHYS_TO_DMAP(l0e & ~ATTR_MASK);
	+ return (l1);
	+}
	+
	+static pt_entry_t *
	+vmmpmap_l2_table(vm_offset_t va)
	+{
	+ pt_entry_t new_l1e, l1e, l1, l2;
	+ vm_page_t m;
	+ int rv;
	+
	+ l1 = vmmpmap_l1_table(va);
	+ if (l1 == NULL)
	+ return (NULL);
	+
	+ m = NULL;
	+again:
	+ l1e = atomic_load_64(&l1[pmap_l1_index(va)]);
	+ if ((l1e & ATTR_DESCR_VALID) == 0) {
	+ /* Allocate a page for the level 2 table */
	+ if (m == NULL) {
	+ m = vm_page_alloc_noobj(VM_ALLOC_WIRED \| VM_ALLOC_ZERO);
	+ if (m == NULL)
	+ return (NULL);
	+ }
	+
	+ new_l1e = VM_PAGE_TO_PHYS(m) \| L1_TABLE;
	+
	+ mtx_lock(&vmmpmap_mtx);
	+ rv = atomic_cmpset_64(&l1[pmap_l1_index(va)], l1e, new_l1e);
	+ mtx_unlock(&vmmpmap_mtx);
	+ /* We may have raced another thread, try again */
	+ if (rv == 0)
	+ goto again;
	+
	+ /* The cmpset succeeded */
	+ l1e = new_l1e;
	+ } else if (m != NULL) {
	+ /* We allocated a page that wasn't used */
	+ vm_page_unwire_noq(m);
	+ vm_page_free_zero(m);
	+ }
	+
	+ l2 = (pd_entry_t *)PHYS_TO_DMAP(l1e & ~ATTR_MASK);
	+ return (l2);
	+}
	+
	+static pd_entry_t *
	+vmmpmap_l3_table(vm_offset_t va)
	+{
	+ pt_entry_t new_l2e, l2e, l2, l3;
	+ vm_page_t m;
	+ int rv;
	+
	+ l2 = vmmpmap_l2_table(va);
	+ if (l2 == NULL)
	+ return (NULL);
	+
	+ m = NULL;
	+again:
	+ l2e = atomic_load_64(&l2[pmap_l2_index(va)]);
	+ if ((l2e & ATTR_DESCR_VALID) == 0) {
	+ /* Allocate a page for the level 3 table */
	+ if (m == NULL) {
	+ m = vm_page_alloc_noobj(VM_ALLOC_WIRED \| VM_ALLOC_ZERO);
	+ if (m == NULL)
	+ return (NULL);
	+ }
	+
	+ new_l2e = VM_PAGE_TO_PHYS(m) \| L2_TABLE;
	+
	+ mtx_lock(&vmmpmap_mtx);
	+ rv = atomic_cmpset_64(&l2[pmap_l2_index(va)], l2e, new_l2e);
	+ mtx_unlock(&vmmpmap_mtx);
	+ /* We may have raced another thread, try again */
	+ if (rv == 0)
	+ goto again;
	+
	+ /* The cmpset succeeded */
	+ l2e = new_l2e;
	+ } else if (m != NULL) {
	+ /* We allocated a page that wasn't used */
	+ vm_page_unwire_noq(m);
	+ vm_page_free_zero(m);
	+ }
	+
	+ l3 = (pt_entry_t *)PHYS_TO_DMAP(l2e & ~ATTR_MASK);
	+ return (l3);
	+}
	+
	+/*
	+ * Creates an EL2 entry in the hyp_pmap. Similar to pmap_kenter.
	+ */
	+bool
	+vmmpmap_enter(vm_offset_t va, vm_size_t size, vm_paddr_t pa, vm_prot_t prot)
	+{
	+ pd_entry_t l3e, *l3;
	+
	+ KASSERT((pa & L3_OFFSET) == 0,
	+ ("%s: Invalid physical address", __func__));
	+ KASSERT((va & L3_OFFSET) == 0,
	+ ("%s: Invalid virtual address", __func__));
	+ KASSERT((size & PAGE_MASK) == 0,
	+ ("%s: Mapping is not page-sized", __func__));
	+
	+ l3e = ATTR_DEFAULT \| L3_PAGE;
	+ /* This bit is res1 at EL2 */
	+ l3e \|= ATTR_S1_AP(ATTR_S1_AP_USER);
	+ /* Only normal memory is used at EL2 */
	+ l3e \|= ATTR_S1_IDX(VM_MEMATTR_DEFAULT);
	+
	+ if ((prot & VM_PROT_EXECUTE) == 0) {
	+ /* PXN is res0 at EL2. UXN is XN */
	+ l3e \|= ATTR_S1_UXN;
	+ }
	+ if ((prot & VM_PROT_WRITE) == 0) {
	+ l3e \|= ATTR_S1_AP(ATTR_S1_AP_RO);
	+ }
	+
	+ while (size > 0) {
	+ l3 = vmmpmap_l3_table(va);
	+ if (l3 == NULL)
	+ return (false);
	+
	+#ifdef INVARIANTS
	+ /*
	+ * Ensure no other threads can write to l3 between the KASSERT
	+ * and store.
	+ */
	+ mtx_lock(&vmmpmap_mtx);
	+#endif
	+ KASSERT(atomic_load_64(&l3[pmap_l3_index(va)]) == 0,
	+ ("%s: VA already mapped", __func__));
	+
	+ atomic_store_64(&l3[pmap_l3_index(va)], l3e \| pa);
	+#ifdef INVARIANTS
	+ mtx_unlock(&vmmpmap_mtx);
	+#endif
	+
	+ size -= PAGE_SIZE;
	+ pa += PAGE_SIZE;
	+ va += PAGE_SIZE;
	+ }
	+
	+ return (true);
	+}
	+
	+void
	+vmmpmap_remove(vm_offset_t va, vm_size_t size, bool invalidate)
	+{
	+ pt_entry_t l0e, l1, l1e, l2, l2e;
	+ pd_entry_t l3, l3e, *l3_list;
	+ vm_offset_t eva, va_next, sva;
	+ size_t i;
	+
	+ KASSERT((va & L3_OFFSET) == 0,
	+ ("%s: Invalid virtual address", __func__));
	+ KASSERT((size & PAGE_MASK) == 0,
	+ ("%s: Mapping is not page-sized", __func__));
	+
	+ if (invalidate) {
	+ l3_list = malloc((size / PAGE_SIZE) * sizeof(l3_list[0]),
	+ M_TEMP, M_WAITOK \| M_ZERO);
	+ }
	+
	+ sva = va;
	+ eva = va + size;
	+ mtx_lock(&vmmpmap_mtx);
	+ for (i = 0; va < eva; va = va_next) {
	+ l0e = atomic_load_64(&l0[pmap_l0_index(va)]);
	+ if (l0e == 0) {
	+ va_next = (va + L0_SIZE) & ~L0_OFFSET;
	+ if (va_next < va)
	+ va_next = eva;
	+ continue;
	+ }
	+ MPASS((l0e & ATTR_DESCR_MASK) == L0_TABLE);
	+
	+ l1 = (pd_entry_t *)PHYS_TO_DMAP(l0e & ~ATTR_MASK);
	+ l1e = atomic_load_64(&l1[pmap_l1_index(va)]);
	+ if (l1e == 0) {
	+ va_next = (va + L1_SIZE) & ~L1_OFFSET;
	+ if (va_next < va)
	+ va_next = eva;
	+ continue;
	+ }
	+ MPASS((l1e & ATTR_DESCR_MASK) == L1_TABLE);
	+
	+ l2 = (pd_entry_t *)PHYS_TO_DMAP(l1e & ~ATTR_MASK);
	+ l2e = atomic_load_64(&l2[pmap_l2_index(va)]);
	+ if (l2e == 0) {
	+ va_next = (va + L2_SIZE) & ~L2_OFFSET;
	+ if (va_next < va)
	+ va_next = eva;
	+ continue;
	+ }
	+ MPASS((l2e & ATTR_DESCR_MASK) == L2_TABLE);
	+
	+ l3 = (pd_entry_t *)PHYS_TO_DMAP(l2e & ~ATTR_MASK);
	+ if (invalidate) {
	+ l3e = atomic_load_64(&l3[pmap_l3_index(va)]);
	+ MPASS(l3e != 0);
	+ /*
	+ * Mark memory as read-only so we can invalidate
	+ * the cache.
	+ */
	+ l3e &= ~ATTR_S1_AP_MASK;
	+ l3e \|= ATTR_S1_AP(ATTR_S1_AP_RO);
	+ atomic_store_64(&l3[pmap_l3_index(va)], l3e);
	+
	+ l3_list[i] = &l3[pmap_l3_index(va)];
	+ i++;
	+ } else {
	+ /*
	+ * The caller is responsible for clearing the cache &
	+ * handling the TLB
	+ */
	+ atomic_store_64(&l3[pmap_l3_index(va)], 0);
	+ }
	+
	+ va_next = (va + L3_SIZE) & ~L3_OFFSET;
	+ if (va_next < va)
	+ va_next = eva;
	+ }
	+ mtx_unlock(&vmmpmap_mtx);
	+
	+ if (invalidate) {
	+ /* Invalidate the memory from the D-cache */
	+ vmm_call_hyp(HYP_DC_CIVAC, sva, size);
	+
	+ for (i = 0; i < (size / PAGE_SIZE); i++) {
	+ atomic_store_64(l3_list[i], 0);
	+ }
	+
	+ vmm_call_hyp(HYP_EL2_TLBI, HYP_EL2_TLBI_VA, sva, size);
	+
	+ free(l3_list, M_TEMP);
	+ }
	+}
	diff --git a/sys/arm64/vmm/vmm_reset.c b/sys/arm64/vmm/vmm_reset.c
	new file mode 100644
	--- /dev/null
	+++ b/sys/arm64/vmm/vmm_reset.c
	@@ -0,0 +1,177 @@
	+/*-
	+ * SPDX-License-Identifier: BSD-2-Clause
	+ *
	+ * Copyright (C) 2018 Alexandru Elisei <alexandru.elisei@gmail.com>
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#include <sys/cdefs.h>
	+#include <sys/types.h>
	+#include <sys/systm.h>
	+#include <sys/kernel.h>
	+#include <sys/lock.h>
	+
	+#include <machine/armreg.h>
	+#include <machine/cpu.h>
	+#include <machine/hypervisor.h>
	+
	+#include "arm64.h"
	+#include "reset.h"
	+
	+/*
	+ * Make the architecturally UNKNOWN value 0. As a bonus, we don't have to
	+ * manually set all those RES0 fields.
	+ */
	+#define ARCH_UNKNOWN 0
	+#define set_arch_unknown(reg) (memset(&(reg), ARCH_UNKNOWN, sizeof(reg)))
	+
	+void
	+reset_vm_el01_regs(void *vcpu)
	+{
	+ struct hypctx *el2ctx;
	+
	+ el2ctx = vcpu;
	+
	+ set_arch_unknown(el2ctx->tf);
	+
	+ set_arch_unknown(el2ctx->actlr_el1);
	+ set_arch_unknown(el2ctx->afsr0_el1);
	+ set_arch_unknown(el2ctx->afsr1_el1);
	+ set_arch_unknown(el2ctx->amair_el1);
	+ set_arch_unknown(el2ctx->contextidr_el1);
	+ set_arch_unknown(el2ctx->cpacr_el1);
	+ set_arch_unknown(el2ctx->csselr_el1);
	+ set_arch_unknown(el2ctx->elr_el1);
	+ set_arch_unknown(el2ctx->esr_el1);
	+ set_arch_unknown(el2ctx->far_el1);
	+ set_arch_unknown(el2ctx->mair_el1);
	+ set_arch_unknown(el2ctx->mdccint_el1);
	+ set_arch_unknown(el2ctx->mdscr_el1);
	+ set_arch_unknown(el2ctx->par_el1);
	+
	+ /*
	+ * Guest starts with:
	+ * ~SCTLR_M: MMU off
	+ * ~SCTLR_C: data cache off
	+ * SCTLR_CP15BEN: memory barrier instruction enable from EL0; RAO/WI
	+ * ~SCTLR_I: instruction cache off
	+ */
	+ el2ctx->sctlr_el1 = SCTLR_RES1;
	+ el2ctx->sctlr_el1 &= ~SCTLR_M & ~SCTLR_C & ~SCTLR_I;
	+ el2ctx->sctlr_el1 \|= SCTLR_CP15BEN;
	+
	+ set_arch_unknown(el2ctx->sp_el0);
	+ set_arch_unknown(el2ctx->tcr_el1);
	+ set_arch_unknown(el2ctx->tpidr_el0);
	+ set_arch_unknown(el2ctx->tpidr_el1);
	+ set_arch_unknown(el2ctx->tpidrro_el0);
	+ set_arch_unknown(el2ctx->ttbr0_el1);
	+ set_arch_unknown(el2ctx->ttbr1_el1);
	+ set_arch_unknown(el2ctx->vbar_el1);
	+ set_arch_unknown(el2ctx->spsr_el1);
	+
	+ set_arch_unknown(el2ctx->dbgbcr_el1);
	+ set_arch_unknown(el2ctx->dbgbvr_el1);
	+ set_arch_unknown(el2ctx->dbgwcr_el1);
	+ set_arch_unknown(el2ctx->dbgwvr_el1);
	+
	+ el2ctx->pmcr_el0 = READ_SPECIALREG(pmcr_el0) & PMCR_N_MASK;
	+ /* PMCR_LC is unknown when AArch32 is supported or RES1 otherwise */
	+ el2ctx->pmcr_el0 \|= PMCR_LC;
	+ set_arch_unknown(el2ctx->pmccntr_el0);
	+ set_arch_unknown(el2ctx->pmccfiltr_el0);
	+ set_arch_unknown(el2ctx->pmcntenset_el0);
	+ set_arch_unknown(el2ctx->pmintenset_el1);
	+ set_arch_unknown(el2ctx->pmovsset_el0);
	+ set_arch_unknown(el2ctx->pmuserenr_el0);
	+ memset(el2ctx->pmevcntr_el0, 0, sizeof(el2ctx->pmevcntr_el0));
	+ memset(el2ctx->pmevtyper_el0, 0, sizeof(el2ctx->pmevtyper_el0));
	+}
	+
	+void
	+reset_vm_el2_regs(void *vcpu)
	+{
	+ struct hypctx *el2ctx;
	+ uint64_t cpu_aff, vcpuid;
	+
	+ el2ctx = vcpu;
	+ vcpuid = vcpu_vcpuid(el2ctx->vcpu);
	+
	+ /*
	+ * Set the Hypervisor Configuration Register:
	+ *
	+ * HCR_RW: use AArch64 for EL1
	+ * HCR_TID3: handle ID registers in the vmm to privide a common
	+ * set of featers on all vcpus
	+ * HCR_TWI: Trap WFI to the hypervisor
	+ * HCR_BSU_IS: barrier instructions apply to the inner shareable
	+ * domain
	+ * HCR_FB: broadcast maintenance operations
	+ * HCR_AMO: route physical SError interrupts to EL2
	+ * HCR_IMO: route physical IRQ interrupts to EL2
	+ * HCR_FMO: route physical FIQ interrupts to EL2
	+ * HCR_SWIO: turn set/way invalidate into set/way clean and
	+ * invalidate
	+ * HCR_VM: use stage 2 translation
	+ */
	+ el2ctx->hcr_el2 = HCR_RW \| HCR_TID3 \| HCR_TWI \| HCR_BSU_IS \| HCR_FB \|
	+ HCR_AMO \| HCR_IMO \| HCR_FMO \| HCR_SWIO \| HCR_VM;
	+
	+ /* TODO: Trap all extensions we don't support */
	+ el2ctx->mdcr_el2 = 0;
	+ /* PMCR_EL0.N is read from MDCR_EL2.HPMN */
	+ el2ctx->mdcr_el2 \|= (el2ctx->pmcr_el0 & PMCR_N_MASK) >> PMCR_N_SHIFT;
	+
	+ el2ctx->vmpidr_el2 = VMPIDR_EL2_RES1;
	+ /* The guest will detect a multi-core, single-threaded CPU */
	+ el2ctx->vmpidr_el2 &= ~VMPIDR_EL2_U & ~VMPIDR_EL2_MT;
	+ /*
	+ * Generate the guest MPIDR value. We only support 16 CPUs at affinity
	+ * level 0 to simplify the vgicv3 driver (see writing sgi1r_el1).
	+ */
	+ cpu_aff = (vcpuid & 0xf) << MPIDR_AFF0_SHIFT \|
	+ ((vcpuid >> 4) & 0xff) << MPIDR_AFF1_SHIFT \|
	+ ((vcpuid >> 12) & 0xff) << MPIDR_AFF2_SHIFT \|
	+ ((vcpuid >> 20) & 0xff) << MPIDR_AFF3_SHIFT;
	+ el2ctx->vmpidr_el2 \|= cpu_aff;
	+
	+ /* Use the same CPU identification information as the host */
	+ el2ctx->vpidr_el2 = CPU_IMPL_TO_MIDR(CPU_IMPL_ARM);
	+ el2ctx->vpidr_el2 \|= CPU_VAR_TO_MIDR(0);
	+ el2ctx->vpidr_el2 \|= CPU_ARCH_TO_MIDR(0xf);
	+ el2ctx->vpidr_el2 \|= CPU_PART_TO_MIDR(CPU_PART_FOUNDATION);
	+ el2ctx->vpidr_el2 \|= CPU_REV_TO_MIDR(0);
	+
	+ /*
	+ * Don't trap accesses to CPACR_EL1, trace, SVE, Advanced SIMD
	+ * and floating point functionality to EL2.
	+ */
	+ el2ctx->cptr_el2 = CPTR_RES1;
	+ /*
	+ * Disable interrupts in the guest. The guest OS will re-enable
	+ * them.
	+ */
	+ el2ctx->tf.tf_spsr = PSR_D \| PSR_A \| PSR_I \| PSR_F;
	+ /* Use the EL1 stack when taking exceptions to EL1 */
	+ el2ctx->tf.tf_spsr \|= PSR_M_EL1h;
	+}
	diff --git a/sys/arm64/vmm/vmm_stat.h b/sys/arm64/vmm/vmm_stat.h
	new file mode 100644
	--- /dev/null
	+++ b/sys/arm64/vmm/vmm_stat.h
	@@ -0,0 +1,145 @@
	+/*-
	+ * SPDX-License-Identifier: BSD-3-Clause
	+ *
	+ * Copyright (c) 2011 NetApp, Inc.
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ * 3. Neither the name of the University nor the names of its contributors
	+ * may be used to endorse or promote products derived from this software
	+ * without specific prior written permission.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#ifndef _VMM_STAT_H_
	+#define _VMM_STAT_H_
	+
	+struct vm;
	+
	+#define MAX_VMM_STAT_ELEMS 64 /* arbitrary */
	+
	+enum vmm_stat_scope {
	+ VMM_STAT_SCOPE_ANY,
	+};
	+
	+struct vmm_stat_type;
	+typedef void (vmm_stat_func_t)(struct vcpu vcpu,
	+ struct vmm_stat_type *stat);
	+
	+struct vmm_stat_type {
	+ int index; /* position in the stats buffer */
	+ int nelems; /* standalone or array */
	+ const char desc; / description of statistic */
	+ vmm_stat_func_t func;
	+ enum vmm_stat_scope scope;
	+};
	+
	+void vmm_stat_register(void *arg);
	+
	+#define VMM_STAT_FDEFINE(type, nelems, desc, func, scope) \
	+ struct vmm_stat_type type[1] = { \
	+ { -1, nelems, desc, func, scope } \
	+ }; \
	+ SYSINIT(type##_stat, SI_SUB_KLD, SI_ORDER_ANY, vmm_stat_register, type)
	+
	+#define VMM_STAT_DEFINE(type, nelems, desc, scope) \
	+ VMM_STAT_FDEFINE(type, nelems, desc, NULL, scope)
	+
	+#define VMM_STAT_DECLARE(type) \
	+ extern struct vmm_stat_type type[1]
	+
	+#define VMM_STAT(type, desc) \
	+ VMM_STAT_DEFINE(type, 1, desc, VMM_STAT_SCOPE_ANY)
	+
	+#define VMM_STAT_FUNC(type, desc, func) \
	+ VMM_STAT_FDEFINE(type, 1, desc, func, VMM_STAT_SCOPE_ANY)
	+
	+#define VMM_STAT_ARRAY(type, nelems, desc) \
	+ VMM_STAT_DEFINE(type, nelems, desc, VMM_STAT_SCOPE_ANY)
	+
	+void *vmm_stat_alloc(void);
	+void vmm_stat_init(void *vp);
	+void vmm_stat_free(void *vp);
	+
	+int vmm_stat_copy(struct vcpu *vcpu, int index, int count,
	+ int num_stats, uint64_t buf);
	+int vmm_stat_desc_copy(int index, char *buf, int buflen);
	+
	+static void __inline
	+vmm_stat_array_incr(struct vcpu vcpu, struct vmm_stat_type vst, int statidx,
	+ uint64_t x)
	+{
	+#ifdef VMM_KEEP_STATS
	+ uint64_t *stats;
	+
	+ stats = vcpu_stats(vcpu);
	+
	+ if (vst->index >= 0 && statidx < vst->nelems)
	+ stats[vst->index + statidx] += x;
	+#endif
	+}
	+
	+static void __inline
	+vmm_stat_array_set(struct vcpu vcpu, struct vmm_stat_type vst, int statidx,
	+ uint64_t val)
	+{
	+#ifdef VMM_KEEP_STATS
	+ uint64_t *stats;
	+
	+ stats = vcpu_stats(vcpu);
	+
	+ if (vst->index >= 0 && statidx < vst->nelems)
	+ stats[vst->index + statidx] = val;
	+#endif
	+}
	+
	+static void __inline
	+vmm_stat_incr(struct vcpu vcpu, struct vmm_stat_type vst, uint64_t x)
	+{
	+
	+#ifdef VMM_KEEP_STATS
	+ vmm_stat_array_incr(vcpu, vst, 0, x);
	+#endif
	+}
	+
	+static void __inline
	+vmm_stat_set(struct vcpu vcpu, struct vmm_stat_type vst, uint64_t val)
	+{
	+
	+#ifdef VMM_KEEP_STATS
	+ vmm_stat_array_set(vcpu, vst, 0, val);
	+#endif
	+}
	+
	+VMM_STAT_DECLARE(VMEXIT_COUNT);
	+VMM_STAT_DECLARE(VMEXIT_UNKNOWN);
	+VMM_STAT_DECLARE(VMEXIT_WFI);
	+VMM_STAT_DECLARE(VMEXIT_WFE);
	+VMM_STAT_DECLARE(VMEXIT_HVC);
	+VMM_STAT_DECLARE(VMEXIT_MSR);
	+VMM_STAT_DECLARE(VMEXIT_DATA_ABORT);
	+VMM_STAT_DECLARE(VMEXIT_INSN_ABORT);
	+VMM_STAT_DECLARE(VMEXIT_UNHANDLED_SYNC);
	+VMM_STAT_DECLARE(VMEXIT_IRQ);
	+VMM_STAT_DECLARE(VMEXIT_FIQ);
	+VMM_STAT_DECLARE(VMEXIT_UNHANDLED_EL2);
	+VMM_STAT_DECLARE(VMEXIT_UNHANDLED);
	+#endif
	diff --git a/sys/arm64/vmm/vmm_stat.c b/sys/arm64/vmm/vmm_stat.c
	new file mode 100644
	--- /dev/null
	+++ b/sys/arm64/vmm/vmm_stat.c
	@@ -0,0 +1,165 @@
	+/*-
	+ * SPDX-License-Identifier: BSD-2-Clause
	+ *
	+ * Copyright (c) 2011 NetApp, Inc.
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#include <sys/cdefs.h>
	+
	+#include <sys/param.h>
	+#include <sys/kernel.h>
	+#include <sys/systm.h>
	+#include <sys/malloc.h>
	+
	+#include <machine/machdep.h>
	+#include <machine/vmm.h>
	+#include "vmm_stat.h"
	+
	+/*
	+ * 'vst_num_elems' is the total number of addressable statistic elements
	+ * 'vst_num_types' is the number of unique statistic types
	+ *
	+ * It is always true that 'vst_num_elems' is greater than or equal to
	+ * 'vst_num_types'. This is because a stat type may represent more than
	+ * one element (for e.g. VMM_STAT_ARRAY).
	+ */
	+static int vst_num_elems, vst_num_types;
	+static struct vmm_stat_type *vsttab[MAX_VMM_STAT_ELEMS];
	+
	+static MALLOC_DEFINE(M_VMM_STAT, "vmm stat", "vmm stat");
	+
	+#define vst_size ((size_t)vst_num_elems * sizeof(uint64_t))
	+
	+void
	+vmm_stat_register(void *arg)
	+{
	+ struct vmm_stat_type *vst = arg;
	+
	+ /* We require all stats to identify themselves with a description */
	+ if (vst->desc == NULL)
	+ return;
	+
	+ if (vst_num_elems + vst->nelems >= MAX_VMM_STAT_ELEMS) {
	+ printf("Cannot accommodate vmm stat type \"%s\"!\n", vst->desc);
	+ return;
	+ }
	+
	+ vst->index = vst_num_elems;
	+ vst_num_elems += vst->nelems;
	+
	+ vsttab[vst_num_types++] = vst;
	+}
	+
	+int
	+vmm_stat_copy(struct vcpu vcpu, int index, int count, int num_stats,
	+ uint64_t *buf)
	+{
	+ struct vmm_stat_type *vst;
	+ uint64_t *stats;
	+ int i, tocopy;
	+
	+ if (index < 0 \|\| count < 0)
	+ return (EINVAL);
	+
	+ if (index > vst_num_elems)
	+ return (ENOENT);
	+
	+ if (index == vst_num_elems) {
	+ *num_stats = 0;
	+ return (0);
	+ }
	+
	+ tocopy = min(vst_num_elems - index, count);
	+
	+ /* Let stats functions update their counters */
	+ for (i = 0; i < vst_num_types; i++) {
	+ vst = vsttab[i];
	+ if (vst->func != NULL)
	+ (*vst->func)(vcpu, vst);
	+ }
	+
	+ /* Copy over the stats */
	+ stats = vcpu_stats(vcpu);
	+ memcpy(buf, stats + index, tocopy * sizeof(stats[0]));
	+ *num_stats = tocopy;
	+ return (0);
	+}
	+
	+void *
	+vmm_stat_alloc(void)
	+{
	+
	+ return (malloc(vst_size, M_VMM_STAT, M_WAITOK));
	+}
	+
	+void
	+vmm_stat_init(void *vp)
	+{
	+
	+ bzero(vp, vst_size);
	+}
	+
	+void
	+vmm_stat_free(void *vp)
	+{
	+ free(vp, M_VMM_STAT);
	+}
	+
	+int
	+vmm_stat_desc_copy(int index, char *buf, int bufsize)
	+{
	+ int i;
	+ struct vmm_stat_type *vst;
	+
	+ for (i = 0; i < vst_num_types; i++) {
	+ vst = vsttab[i];
	+ if (index >= vst->index && index < vst->index + vst->nelems) {
	+ if (vst->nelems > 1) {
	+ snprintf(buf, bufsize, "%s[%d]",
	+ vst->desc, index - vst->index);
	+ } else {
	+ strlcpy(buf, vst->desc, bufsize);
	+ }
	+ return (0); /* found it */
	+ }
	+ }
	+
	+ return (EINVAL);
	+}
	+
	+/* global statistics */
	+VMM_STAT(VMEXIT_COUNT, "total number of vm exits");
	+VMM_STAT(VMEXIT_UNKNOWN, "number of vmexits for the unknown exception");
	+VMM_STAT(VMEXIT_WFI, "number of times wfi was intercepted");
	+VMM_STAT(VMEXIT_WFE, "number of times wfe was intercepted");
	+VMM_STAT(VMEXIT_HVC, "number of times hvc was intercepted");
	+VMM_STAT(VMEXIT_MSR, "number of times msr/mrs was intercepted");
	+VMM_STAT(VMEXIT_DATA_ABORT, "number of vmexits for a data abort");
	+VMM_STAT(VMEXIT_INSN_ABORT, "number of vmexits for an instruction abort");
	+VMM_STAT(VMEXIT_UNHANDLED_SYNC, "number of vmexits for an unhandled synchronous exception");
	+VMM_STAT(VMEXIT_IRQ, "number of vmexits for an irq");
	+VMM_STAT(VMEXIT_FIQ, "number of vmexits for an interrupt");
	+VMM_STAT(VMEXIT_UNHANDLED_EL2, "number of vmexits for an unhandled EL2 exception");
	+VMM_STAT(VMEXIT_UNHANDLED, "number of vmexits for an unhandled exception");
	diff --git a/sys/conf/files.arm64 b/sys/conf/files.arm64
	--- a/sys/conf/files.arm64
	+++ b/sys/conf/files.arm64
	@@ -116,6 +116,39 @@
	dev/iommu/busdma_iommu.c optional iommu
	dev/iommu/iommu_gas.c optional iommu

	+arm64/vmm/vmm.c optional vmm
	+arm64/vmm/vmm_dev.c optional vmm
	+arm64/vmm/vmm_instruction_emul.c optional vmm
	+arm64/vmm/vmm_stat.c optional vmm
	+arm64/vmm/vmm_arm64.c optional vmm
	+arm64/vmm/vmm_reset.c optional vmm
	+arm64/vmm/vmm_call.S optional vmm
	+arm64/vmm/vmm_hyp_exception.S optional vmm \
	+ compile-with "${NORMAL_C:N-fsanitize:N-mbranch-protection} -fpie" \
	+ no-obj
	+arm64/vmm/vmm_hyp.c optional vmm \
	+ compile-with "${NORMAL_C:N-fsanitize:N-mbranch-protection} -fpie" \
	+ no-obj
	+vmm_hyp_blob.elf.full optional vmm \
	+ dependency "vmm_hyp.o vmm_hyp_exception.o" \
	+ compile-with "${SYSTEM_LD_BASECMD} -o ${.TARGET} ${.ALLSRC} --defsym=text_start='0x0'" \
	+ no-obj no-implicit-rule
	+vmm_hyp_blob.elf optional vmm \
	+ dependency "vmm_hyp_blob.elf.full" \
	+ compile-with "${OBJCOPY} --strip-debug ${.ALLSRC} ${.TARGET}" \
	+ no-obj no-implicit-rule
	+vmm_hyp_blob.bin optional vmm \
	+ dependency vmm_hyp_blob.elf \
	+ compile-with "${OBJCOPY} --output-target=binary ${.ALLSRC} ${.TARGET}" \
	+ no-obj no-implicit-rule
	+arm64/vmm/vmm_hyp_el2.S optional vmm \
	+ dependency vmm_hyp_blob.bin
	+arm64/vmm/vmm_mmu.c optional vmm
	+arm64/vmm/io/vgic.c optional vmm
	+arm64/vmm/io/vgic_v3.c optional vmm
	+arm64/vmm/io/vgic_if.m optional vmm
	+arm64/vmm/io/vtimer.c optional vmm
	+
	crypto/armv8/armv8_crypto.c optional armv8crypto
	armv8_crypto_wrap.o optional armv8crypto \
	dependency "$S/crypto/armv8/armv8_crypto_wrap.c" \
	diff --git a/sys/conf/ldscript.arm64 b/sys/conf/ldscript.arm64
	--- a/sys/conf/ldscript.arm64
	+++ b/sys/conf/ldscript.arm64
	@@ -6,6 +6,7 @@
	{
	/* Read-only sections, merged into text segment: */
	. = text_start; /* This is set using --defsym= on the command line. */
	+ .vmm_vectors : { *(.vmm_vectors) }
	.text :
	{
	*(.text)
	@@ -16,6 +17,7 @@
	} =0x9090
	_etext = .;
	PROVIDE (etext = .);
	+
	.fini : { *(.fini) } =0x9090
	.rodata : { (.rodata) (.gnu.linkonce.r) }
	.rodata1 : { *(.rodata1) }
	diff --git a/sys/conf/options.arm64 b/sys/conf/options.arm64
	--- a/sys/conf/options.arm64
	+++ b/sys/conf/options.arm64
	@@ -19,6 +19,9 @@
	# EFI Runtime services support
	EFIRT opt_efirt.h

	+# Bhyve
	+VMM opt_global.h
	+
	# SoC Support
	SOC_ALLWINNER_A64 opt_soc.h
	SOC_ALLWINNER_H5 opt_soc.h
	diff --git a/sys/modules/Makefile b/sys/modules/Makefile
	--- a/sys/modules/Makefile
	+++ b/sys/modules/Makefile
	@@ -841,7 +841,9 @@
	_sgx_linux= sgx_linux
	_smartpqi= smartpqi
	_p2sb= p2sb
	+.endif

	+.if ${MACHINE_CPUARCH} == "aarch64" \|\| ${MACHINE_CPUARCH} == "amd64"
	.if ${MK_BHYVE} != "no" \|\| defined(ALL_MODULES)
	.if ${KERN_OPTS:MSMP}
	_vmm= vmm
	diff --git a/sys/modules/vmm/Makefile b/sys/modules/vmm/Makefile
	--- a/sys/modules/vmm/Makefile
	+++ b/sys/modules/vmm/Makefile
	@@ -3,31 +3,79 @@

	KMOD= vmm

	-SRCS= opt_acpi.h opt_bhyve_snapshot.h opt_ddb.h
	-SRCS+= device_if.h bus_if.h pci_if.h pcib_if.h acpi_if.h vnode_if.h
	-DPSRCS+= vmx_assym.h svm_assym.h
	-DPSRCS+= vmx_genassym.c svm_genassym.c offset.inc
	+SRCS= opt_acpi.h opt_ddb.h device_if.h bus_if.h pci_if.h pcib_if.h acpi_if.h

	CFLAGS+= -DVMM_KEEP_STATS
	-CFLAGS+= -I${SRCTOP}/sys/amd64/vmm
	-CFLAGS+= -I${SRCTOP}/sys/amd64/vmm/io
	-CFLAGS+= -I${SRCTOP}/sys/amd64/vmm/intel
	-CFLAGS+= -I${SRCTOP}/sys/amd64/vmm/amd
	+CFLAGS+= -I${SRCTOP}/sys/${MACHINE}/vmm
	+CFLAGS+= -I${SRCTOP}/sys/${MACHINE}/vmm/io

	# generic vmm support
	-.PATH: ${SRCTOP}/sys/amd64/vmm
	+.PATH: ${SRCTOP}/sys/${MACHINE}/vmm
	SRCS+= vmm.c \
	vmm_dev.c \
	- vmm_host.c \
	vmm_instruction_emul.c \
	+ vmm_stat.c
	+
	+.if ${MACHINE_CPUARCH} == "aarch64"
	+DPSRCS+= assym.inc
	+
	+# TODO: Add the new EL2 code
	+SRCS+= vmm_arm64.c \
	+ vmm_reset.c \
	+ vmm_call.S \
	+ vmm_mmu.c \
	+ vmm_hyp_el2.S
	+
	+.PATH: ${SRCTOP}/sys/${MACHINE}/vmm/io
	+SRCS+= vgic.c \
	+ vgic_if.h \
	+ vgic_if.c \
	+ vgic_v3.c \
	+ vtimer.c
	+
	+SRCS+= vmm_hyp_exception.S vmm_hyp.c
	+
	+CLEANFILES+= vmm_hyp_blob.elf.full
	+CLEANFILES+= vmm_hyp_blob.elf vmm_hyp_blob.bin
	+
	+vmm_hyp_exception.o: vmm_hyp_exception.S
	+ ${CC} -c -x assembler-with-cpp -DLOCORE \
	+ ${CFLAGS:N-fsanitize:N-mbranch-protection} \
	+ ${.IMPSRC} -o ${.TARGET} -fpie
	+
	+vmm_hyp.o: vmm_hyp.c
	+ ${CC} -c ${CFLAGS:N-fsanitize:N-mbranch-protection} \
	+ ${.IMPSRC} -o ${.TARGET} -fpie
	+
	+vmm_hyp_blob.elf.full: vmm_hyp_exception.o vmm_hyp.o
	+ ${LD} -m ${LD_EMULATION} -Bdynamic -T ${SYSDIR}/conf/ldscript.arm64 \
	+ ${_LDFLAGS} --no-warn-mismatch --warn-common --export-dynamic \
	+ --dynamic-linker /red/herring -X -o ${.TARGET} ${.ALLSRC} \
	+ --defsym=text_start='0x0'
	+
	+vmm_hyp_blob.elf: vmm_hyp_blob.elf.full
	+ ${OBJCOPY} --strip-debug ${.ALLSRC} ${.TARGET}
	+
	+vmm_hyp_blob.bin: vmm_hyp_blob.elf
	+ ${OBJCOPY} --output-target=binary ${.ALLSRC} ${.TARGET}
	+
	+vmm_hyp_el2.o: vmm_hyp_blob.bin
	+
	+.elif ${MACHINE_CPUARCH} == "amd64"
	+DPSRCS+= vmx_assym.h svm_assym.h
	+DPSRCS+= vmx_genassym.c svm_genassym.c offset.inc
	+
	+CFLAGS+= -I${SRCTOP}/sys/amd64/vmm/intel
	+CFLAGS+= -I${SRCTOP}/sys/amd64/vmm/amd
	+
	+SRCS+= vmm_host.c \
	vmm_ioport.c \
	vmm_lapic.c \
	vmm_mem.c \
	- vmm_stat.c \
	vmm_util.c \
	x86.c

	-.PATH: ${SRCTOP}/sys/amd64/vmm/io
	+.PATH: ${SRCTOP}/sys/${MACHINE}/vmm/io
	SRCS+= iommu.c \
	ppt.c \
	vatpic.c \
	@@ -62,10 +110,11 @@

	SRCS.BHYVE_SNAPSHOT= vmm_snapshot.c

	-CLEANFILES= vmx_assym.h vmx_genassym.o svm_assym.h svm_genassym.o
	+CLEANFILES+= vmx_assym.h vmx_genassym.o svm_assym.h svm_genassym.o

	OBJS_DEPEND_GUESS.vmx_support.o+= vmx_assym.h
	OBJS_DEPEND_GUESS.svm_support.o+= svm_assym.h
	+.endif

	vmx_assym.h: vmx_genassym.o
	sh ${SYSDIR}/kern/genassym.sh vmx_genassym.o > ${.TARGET}
	@@ -81,6 +130,9 @@
	${CC} -c -x assembler-with-cpp -DLOCORE ${CFLAGS} \
	${.IMPSRC} -o ${.TARGET}

	+hyp_genassym.o: offset.inc
	+ ${CC} -c ${CFLAGS:N-flto:N-fno-common} -fcommon ${.IMPSRC}
	+
	vmx_genassym.o: offset.inc
	${CC} -c ${CFLAGS:N-flto:N-fno-common:N-fsanitize:N-fno-sanitize*} \
	-fcommon ${.IMPSRC}

File Metadata

Mime Type: text/plain
Expires: Thu, Jan 9, 8:51 PM (9 h, 35 m)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 15735762
Default Alt Text: D37428.diff (302 KB)

D37428.diffNo OneTemporaryActions

D37428.diffView Options

File Metadata

Event Timeline

D37428.diff
No OneTemporary
Actions

D37428.diff
View Options