Page MenuHomeFreeBSD

D37428.diff
No OneTemporary

D37428.diff

This file is larger than 256 KB, so syntax highlighting was skipped.
diff --git a/sys/arm64/include/vmm.h b/sys/arm64/include/vmm.h
new file mode 100644
--- /dev/null
+++ b/sys/arm64/include/vmm.h
@@ -0,0 +1,362 @@
+/*
+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _VMM_H_
+#define _VMM_H_
+
+#include <sys/param.h>
+#include <sys/cpuset.h>
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include "pte.h"
+#include "pmap.h"
+
+struct vcpu;
+
+enum vm_suspend_how {
+ VM_SUSPEND_NONE,
+ VM_SUSPEND_RESET,
+ VM_SUSPEND_POWEROFF,
+ VM_SUSPEND_HALT,
+ VM_SUSPEND_LAST
+};
+
+/*
+ * Identifiers for architecturally defined registers.
+ */
+enum vm_reg_name {
+ VM_REG_GUEST_X0 = 0,
+ VM_REG_GUEST_X1,
+ VM_REG_GUEST_X2,
+ VM_REG_GUEST_X3,
+ VM_REG_GUEST_X4,
+ VM_REG_GUEST_X5,
+ VM_REG_GUEST_X6,
+ VM_REG_GUEST_X7,
+ VM_REG_GUEST_X8,
+ VM_REG_GUEST_X9,
+ VM_REG_GUEST_X10,
+ VM_REG_GUEST_X11,
+ VM_REG_GUEST_X12,
+ VM_REG_GUEST_X13,
+ VM_REG_GUEST_X14,
+ VM_REG_GUEST_X15,
+ VM_REG_GUEST_X16,
+ VM_REG_GUEST_X17,
+ VM_REG_GUEST_X18,
+ VM_REG_GUEST_X19,
+ VM_REG_GUEST_X20,
+ VM_REG_GUEST_X21,
+ VM_REG_GUEST_X22,
+ VM_REG_GUEST_X23,
+ VM_REG_GUEST_X24,
+ VM_REG_GUEST_X25,
+ VM_REG_GUEST_X26,
+ VM_REG_GUEST_X27,
+ VM_REG_GUEST_X28,
+ VM_REG_GUEST_X29,
+ VM_REG_GUEST_LR,
+ VM_REG_GUEST_SP,
+ VM_REG_GUEST_PC,
+ VM_REG_GUEST_CPSR,
+
+ VM_REG_GUEST_SCTLR_EL1,
+ VM_REG_GUEST_TTBR0_EL1,
+ VM_REG_GUEST_TTBR1_EL1,
+ VM_REG_GUEST_TCR_EL1,
+ VM_REG_GUEST_TCR2_EL1,
+ VM_REG_LAST
+};
+
+#define VM_INTINFO_VECTOR(info) ((info) & 0xff)
+#define VM_INTINFO_DEL_ERRCODE 0x800
+#define VM_INTINFO_RSVD 0x7ffff000
+#define VM_INTINFO_VALID 0x80000000
+#define VM_INTINFO_TYPE 0x700
+#define VM_INTINFO_HWINTR (0 << 8)
+#define VM_INTINFO_NMI (2 << 8)
+#define VM_INTINFO_HWEXCEPTION (3 << 8)
+#define VM_INTINFO_SWINTR (4 << 8)
+
+#define VM_MAX_SUFFIXLEN 15
+
+#define VM_GUEST_BASE_IPA 0x80000000UL /* Guest kernel start ipa */
+
+#ifdef _KERNEL
+
+#define VM_MAX_NAMELEN 32
+
+struct vm;
+struct vm_exception;
+struct vm_exit;
+struct vm_run;
+struct vm_object;
+struct vm_guest_paging;
+struct vm_vgic_descr;
+struct pmap;
+
+struct vm_eventinfo {
+ void *rptr; /* rendezvous cookie */
+ int *sptr; /* suspend cookie */
+ int *iptr; /* reqidle cookie */
+};
+
+int vm_create(const char *name, struct vm **retvm);
+struct vcpu *vm_alloc_vcpu(struct vm *vm, int vcpuid);
+void vm_slock_vcpus(struct vm *vm);
+void vm_unlock_vcpus(struct vm *vm);
+void vm_destroy(struct vm *vm);
+int vm_reinit(struct vm *vm);
+const char *vm_name(struct vm *vm);
+
+/*
+ * APIs that modify the guest memory map require all vcpus to be frozen.
+ */
+void vm_slock_memsegs(struct vm *vm);
+void vm_xlock_memsegs(struct vm *vm);
+void vm_unlock_memsegs(struct vm *vm);
+int vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t off,
+ size_t len, int prot, int flags);
+int vm_munmap_memseg(struct vm *vm, vm_paddr_t gpa, size_t len);
+int vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem);
+void vm_free_memseg(struct vm *vm, int ident);
+
+/*
+ * APIs that inspect the guest memory map require only a *single* vcpu to
+ * be frozen. This acts like a read lock on the guest memory map since any
+ * modification requires *all* vcpus to be frozen.
+ */
+int vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid,
+ vm_ooffset_t *segoff, size_t *len, int *prot, int *flags);
+int vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem,
+ struct vm_object **objptr);
+vm_paddr_t vmm_sysmem_maxaddr(struct vm *vm);
+void *vm_gpa_hold(struct vcpu *vcpu, vm_paddr_t gpa, size_t len,
+ int prot, void **cookie);
+void *vm_gpa_hold_global(struct vm *vm, vm_paddr_t gpa, size_t len,
+ int prot, void **cookie);
+void vm_gpa_release(void *cookie);
+bool vm_mem_allocated(struct vcpu *vcpu, vm_paddr_t gpa);
+
+int vm_gla2gpa_nofault(struct vcpu *vcpu, struct vm_guest_paging *paging,
+ uint64_t gla, int prot, uint64_t *gpa, int *is_fault);
+
+uint16_t vm_get_maxcpus(struct vm *vm);
+void vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores,
+ uint16_t *threads, uint16_t *maxcpus);
+int vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores,
+ uint16_t threads, uint16_t maxcpus);
+int vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval);
+int vm_set_register(struct vcpu *vcpu, int reg, uint64_t val);
+int vm_run(struct vcpu *vcpu);
+int vm_suspend(struct vm *vm, enum vm_suspend_how how);
+void* vm_get_cookie(struct vm *vm);
+int vcpu_vcpuid(struct vcpu *vcpu);
+void *vcpu_get_cookie(struct vcpu *vcpu);
+struct vm *vcpu_vm(struct vcpu *vcpu);
+struct vcpu *vm_vcpu(struct vm *vm, int cpu);
+int vm_get_capability(struct vcpu *vcpu, int type, int *val);
+int vm_set_capability(struct vcpu *vcpu, int type, int val);
+int vm_activate_cpu(struct vcpu *vcpu);
+int vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu);
+int vm_resume_cpu(struct vm *vm, struct vcpu *vcpu);
+int vm_inject_exception(struct vcpu *vcpu, uint64_t esr, uint64_t far);
+int vm_attach_vgic(struct vm *vm, struct vm_vgic_descr *descr);
+int vm_assert_irq(struct vm *vm, uint32_t irq);
+int vm_deassert_irq(struct vm *vm, uint32_t irq);
+int vm_raise_msi(struct vm *vm, uint64_t msg, uint64_t addr, int bus, int slot,
+ int func);
+struct vm_exit *vm_exitinfo(struct vcpu *vcpu);
+void vm_exit_suspended(struct vcpu *vcpu, uint64_t pc);
+void vm_exit_debug(struct vcpu *vcpu, uint64_t pc);
+void vm_exit_rendezvous(struct vcpu *vcpu, uint64_t pc);
+void vm_exit_astpending(struct vcpu *vcpu, uint64_t pc);
+
+cpuset_t vm_active_cpus(struct vm *vm);
+cpuset_t vm_debug_cpus(struct vm *vm);
+cpuset_t vm_suspended_cpus(struct vm *vm);
+
+static __inline bool
+virt_enabled(void)
+{
+
+ return (has_hyp());
+}
+
+static __inline int
+vcpu_rendezvous_pending(struct vm_eventinfo *info)
+{
+
+ return (*((uintptr_t *)(info->rptr)) != 0);
+}
+
+static __inline int
+vcpu_suspended(struct vm_eventinfo *info)
+{
+
+ return (*info->sptr);
+}
+
+int vcpu_debugged(struct vcpu *vcpu);
+
+enum vcpu_state {
+ VCPU_IDLE,
+ VCPU_FROZEN,
+ VCPU_RUNNING,
+ VCPU_SLEEPING,
+};
+
+int vcpu_set_state(struct vcpu *vcpu, enum vcpu_state state, bool from_idle);
+enum vcpu_state vcpu_get_state(struct vcpu *vcpu, int *hostcpu);
+
+static int __inline
+vcpu_is_running(struct vcpu *vcpu, int *hostcpu)
+{
+ return (vcpu_get_state(vcpu, hostcpu) == VCPU_RUNNING);
+}
+
+#ifdef _SYS_PROC_H_
+static int __inline
+vcpu_should_yield(struct vcpu *vcpu)
+{
+ struct thread *td;
+
+ td = curthread;
+ return (td->td_ast != 0 || td->td_owepreempt != 0);
+}
+#endif
+
+void *vcpu_stats(struct vcpu *vcpu);
+void vcpu_notify_event(struct vcpu *vcpu);
+
+enum vm_reg_name vm_segment_name(int seg_encoding);
+
+struct vm_copyinfo {
+ uint64_t gpa;
+ size_t len;
+ void *hva;
+ void *cookie;
+};
+
+#endif /* _KERNEL */
+
+#define VM_DIR_READ 0
+#define VM_DIR_WRITE 1
+
+#define VM_GP_M_MASK 0x1f
+#define VM_GP_MMU_ENABLED (1 << 5)
+
+struct vm_guest_paging {
+ uint64_t ttbr0_addr;
+ uint64_t ttbr1_addr;
+ uint64_t tcr_el1;
+ uint64_t tcr2_el1;
+ int flags;
+ int padding;
+};
+
+struct vie {
+ uint8_t access_size:4, sign_extend:1, dir:1, unused:2;
+ enum vm_reg_name reg;
+};
+
+struct vre {
+ uint32_t inst_syndrome;
+ uint8_t dir:1, unused:7;
+ enum vm_reg_name reg;
+};
+
+/*
+ * Identifiers for optional vmm capabilities
+ */
+enum vm_cap_type {
+ VM_CAP_HALT_EXIT,
+ VM_CAP_MTRAP_EXIT,
+ VM_CAP_PAUSE_EXIT,
+ VM_CAP_UNRESTRICTED_GUEST,
+ VM_CAP_MAX
+};
+
+enum vm_exitcode {
+ VM_EXITCODE_BOGUS,
+ VM_EXITCODE_INST_EMUL,
+ VM_EXITCODE_REG_EMUL,
+ VM_EXITCODE_HVC,
+ VM_EXITCODE_SUSPENDED,
+ VM_EXITCODE_HYP,
+ VM_EXITCODE_WFI,
+ VM_EXITCODE_PAGING,
+ VM_EXITCODE_SMCCC,
+ VM_EXITCODE_DEBUG,
+ VM_EXITCODE_MAX
+};
+
+struct vm_exit {
+ enum vm_exitcode exitcode;
+ int inst_length;
+ uint64_t pc;
+ union {
+ /*
+ * ARM specific payload.
+ */
+ struct {
+ uint32_t exception_nr;
+ uint32_t pad;
+ uint64_t esr_el2; /* Exception Syndrome Register */
+ uint64_t far_el2; /* Fault Address Register */
+ uint64_t hpfar_el2; /* Hypervisor IPA Fault Address Register */
+ } hyp;
+ struct {
+ struct vre vre;
+ } reg_emul;
+ struct {
+ uint64_t gpa;
+ uint64_t esr;
+ } paging;
+ struct {
+ uint64_t gpa;
+ struct vm_guest_paging paging;
+ struct vie vie;
+ } inst_emul;
+
+ /*
+ * A SMCCC call, e.g. starting a core via PSCI.
+ * Further arguments can be read by asking the kernel for
+ * all register values.
+ */
+ struct {
+ uint64_t func_id;
+ uint64_t args[7];
+ } smccc_call;
+
+ struct {
+ enum vm_suspend_how how;
+ } suspended;
+ } u;
+};
+
+#endif /* _VMM_H_ */
diff --git a/sys/arm64/include/vmm_dev.h b/sys/arm64/include/vmm_dev.h
new file mode 100644
--- /dev/null
+++ b/sys/arm64/include/vmm_dev.h
@@ -0,0 +1,272 @@
+/*
+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _VMM_DEV_H_
+#define _VMM_DEV_H_
+
+#ifdef _KERNEL
+void vmmdev_init(void);
+int vmmdev_cleanup(void);
+#endif
+
+struct vm_memmap {
+ vm_paddr_t gpa;
+ int segid; /* memory segment */
+ vm_ooffset_t segoff; /* offset into memory segment */
+ size_t len; /* mmap length */
+ int prot; /* RWX */
+ int flags;
+};
+#define VM_MEMMAP_F_WIRED 0x01
+
+struct vm_munmap {
+ vm_paddr_t gpa;
+ size_t len;
+};
+
+#define VM_MEMSEG_NAME(m) ((m)->name[0] != '\0' ? (m)->name : NULL)
+struct vm_memseg {
+ int segid;
+ size_t len;
+ char name[VM_MAX_SUFFIXLEN + 1];
+};
+
+struct vm_register {
+ int cpuid;
+ int regnum; /* enum vm_reg_name */
+ uint64_t regval;
+};
+
+struct vm_register_set {
+ int cpuid;
+ unsigned int count;
+ const int *regnums; /* enum vm_reg_name */
+ uint64_t *regvals;
+};
+
+struct vm_run {
+ int cpuid;
+ cpuset_t *cpuset; /* CPU set storage */
+ size_t cpusetsize;
+ struct vm_exit *vm_exit;
+};
+
+struct vm_exception {
+ int cpuid;
+ uint64_t esr;
+ uint64_t far;
+};
+
+struct vm_msi {
+ uint64_t msg;
+ uint64_t addr;
+ int bus;
+ int slot;
+ int func;
+};
+
+struct vm_capability {
+ int cpuid;
+ enum vm_cap_type captype;
+ int capval;
+ int allcpus;
+};
+
+#define MAX_VM_STATS 64
+struct vm_stats {
+ int cpuid; /* in */
+ int index; /* in */
+ int num_entries; /* out */
+ struct timeval tv;
+ uint64_t statbuf[MAX_VM_STATS];
+};
+struct vm_stat_desc {
+ int index; /* in */
+ char desc[128]; /* out */
+};
+
+struct vm_suspend {
+ enum vm_suspend_how how;
+};
+
+struct vm_gla2gpa {
+ int vcpuid; /* inputs */
+ int prot; /* PROT_READ or PROT_WRITE */
+ uint64_t gla;
+ struct vm_guest_paging paging;
+ int fault; /* outputs */
+ uint64_t gpa;
+};
+
+struct vm_activate_cpu {
+ int vcpuid;
+};
+
+struct vm_cpuset {
+ int which;
+ int cpusetsize;
+ cpuset_t *cpus;
+};
+#define VM_ACTIVE_CPUS 0
+#define VM_SUSPENDED_CPUS 1
+#define VM_DEBUG_CPUS 2
+
+struct vm_vgic_version {
+ u_int version;
+ u_int flags;
+};
+
+struct vm_vgic_descr {
+ struct vm_vgic_version ver;
+ union {
+ struct {
+ uint64_t dist_start;
+ uint64_t dist_size;
+ uint64_t redist_start;
+ uint64_t redist_size;
+ } v3_regs;
+ };
+};
+
+struct vm_irq {
+ uint32_t irq;
+};
+
+struct vm_cpu_topology {
+ uint16_t sockets;
+ uint16_t cores;
+ uint16_t threads;
+ uint16_t maxcpus;
+};
+
+enum {
+ /* general routines */
+ IOCNUM_ABIVERS = 0,
+ IOCNUM_RUN = 1,
+ IOCNUM_SET_CAPABILITY = 2,
+ IOCNUM_GET_CAPABILITY = 3,
+ IOCNUM_SUSPEND = 4,
+ IOCNUM_REINIT = 5,
+
+ /* memory apis */
+ IOCNUM_GET_GPA_PMAP = 12,
+ IOCNUM_GLA2GPA_NOFAULT = 13,
+ IOCNUM_ALLOC_MEMSEG = 14,
+ IOCNUM_GET_MEMSEG = 15,
+ IOCNUM_MMAP_MEMSEG = 16,
+ IOCNUM_MMAP_GETNEXT = 17,
+ IOCNUM_MUNMAP_MEMSEG = 18,
+
+ /* register/state accessors */
+ IOCNUM_SET_REGISTER = 20,
+ IOCNUM_GET_REGISTER = 21,
+ IOCNUM_SET_REGISTER_SET = 24,
+ IOCNUM_GET_REGISTER_SET = 25,
+
+ /* statistics */
+ IOCNUM_VM_STATS = 50,
+ IOCNUM_VM_STAT_DESC = 51,
+
+ /* CPU Topology */
+ IOCNUM_SET_TOPOLOGY = 63,
+ IOCNUM_GET_TOPOLOGY = 64,
+
+ /* interrupt injection */
+ IOCNUM_ASSERT_IRQ = 80,
+ IOCNUM_DEASSERT_IRQ = 81,
+ IOCNUM_RAISE_MSI = 82,
+ IOCNUM_INJECT_EXCEPTION = 83,
+
+ /* vm_cpuset */
+ IOCNUM_ACTIVATE_CPU = 90,
+ IOCNUM_GET_CPUSET = 91,
+ IOCNUM_SUSPEND_CPU = 92,
+ IOCNUM_RESUME_CPU = 93,
+
+ /* vm_attach_vgic */
+ IOCNUM_GET_VGIC_VERSION = 110,
+ IOCNUM_ATTACH_VGIC = 111,
+};
+
+#define VM_RUN \
+ _IOWR('v', IOCNUM_RUN, struct vm_run)
+#define VM_SUSPEND \
+ _IOW('v', IOCNUM_SUSPEND, struct vm_suspend)
+#define VM_REINIT \
+ _IO('v', IOCNUM_REINIT)
+#define VM_ALLOC_MEMSEG \
+ _IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg)
+#define VM_GET_MEMSEG \
+ _IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg)
+#define VM_MMAP_MEMSEG \
+ _IOW('v', IOCNUM_MMAP_MEMSEG, struct vm_memmap)
+#define VM_MMAP_GETNEXT \
+ _IOWR('v', IOCNUM_MMAP_GETNEXT, struct vm_memmap)
+#define VM_MUNMAP_MEMSEG \
+ _IOW('v', IOCNUM_MUNMAP_MEMSEG, struct vm_munmap)
+#define VM_SET_REGISTER \
+ _IOW('v', IOCNUM_SET_REGISTER, struct vm_register)
+#define VM_GET_REGISTER \
+ _IOWR('v', IOCNUM_GET_REGISTER, struct vm_register)
+#define VM_SET_REGISTER_SET \
+ _IOW('v', IOCNUM_SET_REGISTER_SET, struct vm_register_set)
+#define VM_GET_REGISTER_SET \
+ _IOWR('v', IOCNUM_GET_REGISTER_SET, struct vm_register_set)
+#define VM_SET_CAPABILITY \
+ _IOW('v', IOCNUM_SET_CAPABILITY, struct vm_capability)
+#define VM_GET_CAPABILITY \
+ _IOWR('v', IOCNUM_GET_CAPABILITY, struct vm_capability)
+#define VM_STATS \
+ _IOWR('v', IOCNUM_VM_STATS, struct vm_stats)
+#define VM_STAT_DESC \
+ _IOWR('v', IOCNUM_VM_STAT_DESC, struct vm_stat_desc)
+#define VM_ASSERT_IRQ \
+ _IOW('v', IOCNUM_ASSERT_IRQ, struct vm_irq)
+#define VM_DEASSERT_IRQ \
+ _IOW('v', IOCNUM_DEASSERT_IRQ, struct vm_irq)
+#define VM_RAISE_MSI \
+ _IOW('v', IOCNUM_RAISE_MSI, struct vm_msi)
+#define VM_INJECT_EXCEPTION \
+ _IOW('v', IOCNUM_INJECT_EXCEPTION, struct vm_exception)
+#define VM_SET_TOPOLOGY \
+ _IOW('v', IOCNUM_SET_TOPOLOGY, struct vm_cpu_topology)
+#define VM_GET_TOPOLOGY \
+ _IOR('v', IOCNUM_GET_TOPOLOGY, struct vm_cpu_topology)
+#define VM_GLA2GPA_NOFAULT \
+ _IOWR('v', IOCNUM_GLA2GPA_NOFAULT, struct vm_gla2gpa)
+#define VM_ACTIVATE_CPU \
+ _IOW('v', IOCNUM_ACTIVATE_CPU, struct vm_activate_cpu)
+#define VM_GET_CPUS \
+ _IOW('v', IOCNUM_GET_CPUSET, struct vm_cpuset)
+#define VM_SUSPEND_CPU \
+ _IOW('v', IOCNUM_SUSPEND_CPU, struct vm_activate_cpu)
+#define VM_RESUME_CPU \
+ _IOW('v', IOCNUM_RESUME_CPU, struct vm_activate_cpu)
+#define VM_GET_VGIC_VERSION \
+ _IOR('v', IOCNUM_GET_VGIC_VERSION, struct vm_vgic_version)
+#define VM_ATTACH_VGIC \
+ _IOW('v', IOCNUM_ATTACH_VGIC, struct vm_vgic_descr)
+#endif
diff --git a/sys/arm64/include/vmm_instruction_emul.h b/sys/arm64/include/vmm_instruction_emul.h
new file mode 100644
--- /dev/null
+++ b/sys/arm64/include/vmm_instruction_emul.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _VMM_INSTRUCTION_EMUL_H_
+#define _VMM_INSTRUCTION_EMUL_H_
+
+/*
+ * Callback functions to read and write memory regions.
+ */
+typedef int (*mem_region_read_t)(struct vcpu *vcpu, uint64_t gpa,
+ uint64_t *rval, int rsize, void *arg);
+typedef int (*mem_region_write_t)(struct vcpu *vcpu, uint64_t gpa,
+ uint64_t wval, int wsize, void *arg);
+
+/*
+ * Callback functions to read and write registers.
+ */
+typedef int (*reg_read_t)(struct vcpu *vcpu, uint64_t *rval, void *arg);
+typedef int (*reg_write_t)(struct vcpu *vcpu, uint64_t wval, void *arg);
+
+/*
+ * Emulate the decoded 'vie' instruction when it contains a memory operation.
+ *
+ * The callbacks 'mrr' and 'mrw' emulate reads and writes to the memory region
+ * containing 'gpa'. 'mrarg' is an opaque argument that is passed into the
+ * callback functions.
+ *
+ * 'void *vm' should be 'struct vm *' when called from kernel context and
+ * 'struct vmctx *' when called from user context.
+ *
+ */
+int vmm_emulate_instruction(struct vcpu *vcpu, uint64_t gpa, struct vie *vie,
+ struct vm_guest_paging *paging, mem_region_read_t mrr,
+ mem_region_write_t mrw, void *mrarg);
+
+/*
+ * Emulate the decoded 'vre' instruction when it contains a register access.
+ *
+ * The callbacks 'regread' and 'regwrite' emulate reads and writes to the
+ * register from 'vie'. 'regarg' is an opaque argument that is passed into the
+ * callback functions.
+ *
+ * 'void *vm' should be 'struct vm *' when called from kernel context and
+ * 'struct vmctx *' when called from user context.
+ *
+ */
+int vmm_emulate_register(struct vcpu *vcpu, struct vre *vre, reg_read_t regread,
+ reg_write_t regwrite, void *regarg);
+
+#ifdef _KERNEL
+void vm_register_reg_handler(struct vm *vm, uint64_t iss, uint64_t mask,
+ reg_read_t reg_read, reg_write_t reg_write, void *arg);
+void vm_deregister_reg_handler(struct vm *vm, uint64_t iss, uint64_t mask);
+
+void vm_register_inst_handler(struct vm *vm, uint64_t start, uint64_t size,
+ mem_region_read_t mmio_read, mem_region_write_t mmio_write);
+void vm_deregister_inst_handler(struct vm *vm, uint64_t start, uint64_t size);
+#endif
+
+#endif /* _VMM_INSTRUCTION_EMUL_H_ */
diff --git a/sys/arm64/include/vmm_snapshot.h b/sys/arm64/include/vmm_snapshot.h
new file mode 100644
--- /dev/null
+++ b/sys/arm64/include/vmm_snapshot.h
@@ -0,0 +1 @@
+/* $FreeBSD$ */
diff --git a/sys/arm64/vmm/arm64.h b/sys/arm64/vmm/arm64.h
new file mode 100644
--- /dev/null
+++ b/sys/arm64/vmm/arm64.h
@@ -0,0 +1,165 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#ifndef _VMM_ARM64_H_
+#define _VMM_ARM64_H_
+
+#include <machine/reg.h>
+#include <machine/hypervisor.h>
+#include <machine/pcpu.h>
+
+#include "mmu.h"
+#include "io/vgic_v3.h"
+#include "io/vtimer.h"
+
+struct vgic_v3;
+struct vgic_v3_cpu;
+
+struct hypctx {
+ struct trapframe tf;
+
+ /*
+ * EL1 control registers.
+ */
+ uint64_t elr_el1; /* Exception Link Register */
+ uint64_t sp_el0; /* Stack pointer */
+ uint64_t tpidr_el0; /* EL0 Software ID Register */
+ uint64_t tpidrro_el0; /* Read-only Thread ID Register */
+ uint64_t tpidr_el1; /* EL1 Software ID Register */
+ uint64_t vbar_el1; /* Vector Base Address Register */
+
+ uint64_t actlr_el1; /* Auxiliary Control Register */
+ uint64_t afsr0_el1; /* Auxiliary Fault Status Register 0 */
+ uint64_t afsr1_el1; /* Auxiliary Fault Status Register 1 */
+ uint64_t amair_el1; /* Auxiliary Memory Attribute Indirection Register */
+ uint64_t contextidr_el1; /* Current Process Identifier */
+ uint64_t cpacr_el1; /* Architectural Feature Access Control Register */
+ uint64_t csselr_el1; /* Cache Size Selection Register */
+ uint64_t esr_el1; /* Exception Syndrome Register */
+ uint64_t far_el1; /* Fault Address Register */
+ uint64_t mair_el1; /* Memory Attribute Indirection Register */
+ uint64_t mdccint_el1; /* Monitor DCC Interrupt Enable Register */
+ uint64_t mdscr_el1; /* Monitor Debug System Control Register */
+ uint64_t par_el1; /* Physical Address Register */
+ uint64_t sctlr_el1; /* System Control Register */
+ uint64_t tcr_el1; /* Translation Control Register */
+ uint64_t tcr2_el1; /* Translation Control Register 2 */
+ uint64_t ttbr0_el1; /* Translation Table Base Register 0 */
+ uint64_t ttbr1_el1; /* Translation Table Base Register 1 */
+ uint64_t spsr_el1; /* Saved Program Status Register */
+
+ uint64_t pmcr_el0; /* Performance Monitors Control Register */
+ uint64_t pmccntr_el0;
+ uint64_t pmccfiltr_el0;
+ uint64_t pmcntenset_el0;
+ uint64_t pmintenset_el1;
+ uint64_t pmovsset_el0;
+ uint64_t pmselr_el0;
+ uint64_t pmuserenr_el0;
+ uint64_t pmevcntr_el0[31];
+ uint64_t pmevtyper_el0[31];
+
+ uint64_t dbgbcr_el1[16]; /* Debug Breakpoint Control Registers */
+ uint64_t dbgbvr_el1[16]; /* Debug Breakpoint Value Registers */
+ uint64_t dbgwcr_el1[16]; /* Debug Watchpoint Control Registers */
+ uint64_t dbgwvr_el1[16]; /* Debug Watchpoint Value Registers */
+
+ /* EL2 control registers */
+ uint64_t cptr_el2; /* Architectural Feature Trap Register */
+ uint64_t hcr_el2; /* Hypervisor Configuration Register */
+ uint64_t mdcr_el2; /* Monitor Debug Configuration Register */
+ uint64_t vpidr_el2; /* Virtualization Processor ID Register */
+ uint64_t vmpidr_el2; /* Virtualization Multiprocessor ID Register */
+ uint64_t el2_addr; /* The address of this in el2 space */
+ struct hyp *hyp;
+ struct vcpu *vcpu;
+ struct {
+ uint64_t far_el2; /* Fault Address Register */
+ uint64_t hpfar_el2; /* Hypervisor IPA Fault Address Register */
+ } exit_info;
+
+ struct vtimer_cpu vtimer_cpu;
+
+ struct vgic_v3_regs vgic_v3_regs;
+ struct vgic_v3_cpu *vgic_cpu;
+ bool has_exception;
+};
+
+struct hyp {
+ struct vm *vm;
+ struct vtimer vtimer;
+ uint64_t vmid_generation;
+ uint64_t vttbr_el2;
+ uint64_t el2_addr; /* The address of this in el2 space */
+ bool vgic_attached;
+ struct vgic_v3 *vgic;
+ struct hypctx *ctx[];
+};
+
+#define DEFINE_VMMOPS_IFUNC(ret_type, opname, args) \
+ ret_type vmmops_##opname args;
+
+DEFINE_VMMOPS_IFUNC(int, modinit, (int ipinum))
+DEFINE_VMMOPS_IFUNC(int, modcleanup, (void))
+DEFINE_VMMOPS_IFUNC(void *, init, (struct vm *vm, struct pmap *pmap))
+DEFINE_VMMOPS_IFUNC(int, gla2gpa, (void *vcpui, struct vm_guest_paging *paging,
+ uint64_t gla, int prot, uint64_t *gpa, int *is_fault))
+DEFINE_VMMOPS_IFUNC(int, run, (void *vcpui, register_t pc, struct pmap *pmap,
+ struct vm_eventinfo *info))
+DEFINE_VMMOPS_IFUNC(void, cleanup, (void *vmi))
+DEFINE_VMMOPS_IFUNC(void *, vcpu_init, (void *vmi, struct vcpu *vcpu,
+ int vcpu_id))
+DEFINE_VMMOPS_IFUNC(void, vcpu_cleanup, (void *vcpui))
+DEFINE_VMMOPS_IFUNC(int, exception, (void *vcpui, uint64_t esr, uint64_t far))
+DEFINE_VMMOPS_IFUNC(int, getreg, (void *vcpui, int num, uint64_t *retval))
+DEFINE_VMMOPS_IFUNC(int, setreg, (void *vcpui, int num, uint64_t val))
+DEFINE_VMMOPS_IFUNC(int, getcap, (void *vcpui, int num, int *retval))
+DEFINE_VMMOPS_IFUNC(int, setcap, (void *vcpui, int num, int val))
+DEFINE_VMMOPS_IFUNC(struct vmspace *, vmspace_alloc, (vm_offset_t min,
+ vm_offset_t max))
+DEFINE_VMMOPS_IFUNC(void, vmspace_free, (struct vmspace *vmspace))
+#ifdef notyet
+#ifdef BHYVE_SNAPSHOT
+DEFINE_VMMOPS_IFUNC(int, snapshot, (void *vmi, struct vm_snapshot_meta *meta))
+DEFINE_VMMOPS_IFUNC(int, vcpu_snapshot, (void *vcpui,
+ struct vm_snapshot_meta *meta))
+DEFINE_VMMOPS_IFUNC(int, restore_tsc, (void *vcpui, uint64_t now))
+#endif
+#endif
+
+uint64_t vmm_call_hyp(uint64_t, ...);
+
+#if 0
+#define eprintf(fmt, ...) printf("%s:%d " fmt, __func__, __LINE__, ##__VA_ARGS__)
+#else
+#define eprintf(fmt, ...) do {} while(0)
+#endif
+
+struct hypctx *arm64_get_active_vcpu(void);
+void raise_data_insn_abort(struct hypctx *, uint64_t, bool, int);
+
+#endif /* !_VMM_ARM64_H_ */
diff --git a/sys/arm64/vmm/hyp.h b/sys/arm64/vmm/hyp.h
new file mode 100644
--- /dev/null
+++ b/sys/arm64/vmm/hyp.h
@@ -0,0 +1,114 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (C) 2017 Alexandru Elisei <alexandru.elisei@gmail.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _VMM_HYP_H_
+#define _VMM_HYP_H_
+
+/*
+ * The translation tables for the hypervisor mode will hold mappings for kernel
+ * virtual addresses and an identity mapping (VA == PA) necessary when
+ * enabling/disabling the MMU.
+ *
+ * When in EL2 exception level the translation table base register is TTBR0_EL2
+ * and the virtual addresses generated by the CPU must be at the bottom of the
+ * memory, with the first 16 bits all set to zero:
+ *
+ * 0x0000ffffffffffff End hyp address space
+ * 0x0000000000000000 Start of hyp address space
+ *
+ * To run code in hyp mode we need to convert kernel virtual addresses to
+ * addreses that fit into this address space.
+ *
+ * The kernel virtual address range is:
+ *
+ * 0xffff007fffffffff End of KVA
+ * 0xffff000000000000 Kernel base address & start of KVA
+ *
+ * (see /sys/arm64/include/vmparam.h).
+ *
+ * We could convert the kernel virtual addresses to valid EL2 addresses by
+ * setting the first 16 bits to zero and thus mapping the kernel addresses in
+ * the bottom half of the EL2 address space, but then they might clash with the
+ * identity mapping addresses. Instead we map the kernel addresses in the upper
+ * half of the EL2 address space.
+ *
+ * The hypervisor address space will look like this:
+ *
+ * 0x0000807fffffffff End of KVA mapping
+ * 0x0000800000000000 Start of KVA mapping
+ *
+ * 0x00007fffffffffff End of identity mapping
+ * 0x0000000000000000 Start of identity mapping
+ *
+ * With the scheme we have 47 bits at our disposable for the identity map and
+ * another 47 bits for the kernel virtual addresses. For a maximum physical
+ * memory size of 128TB we are guaranteed to not have any clashes between
+ * addresses.
+ */
+#define HYP_VM_MIN_ADDRESS 0x0000000000000000
+#define HYP_VM_MAX_ADDRESS 0x0001000000000000
+
+/*
+ * When the vmm code is installed the following handles can be used by
+ * the host to call into EL2.
+ */
+#define HYP_CLEANUP 0x00000001
+#define HYP_ENTER_GUEST 0x00000002
+#define HYP_READ_REGISTER 0x00000003
+#define HYP_REG_ICH_VTR 0x1
+#define HYP_REG_CNTHCTL 0x2
+#define HYP_CLEAN_S2_TLBI 0x00000004
+#define HYP_DC_CIVAC 0x00000005
+#define HYP_EL2_TLBI 0x00000006
+#define HYP_EL2_TLBI_ALL 0x1
+#define HYP_EL2_TLBI_VA 0x2
+#define HYP_S2_TLBI_RANGE 0x00000010
+#define HYP_S2_TLBI_ALL 0x00000011
+
+/*
+ * When taking asynchronous exceptions, or interrupts, with the exception of the
+ * SError interrupt, the exception syndrome register is not updated with the
+ * exception code. We need to differentiate between the different exception
+ * types taken to EL2.
+ */
+#define EXCP_TYPE_EL1_SYNC 0
+#define EXCP_TYPE_EL1_IRQ 1
+#define EXCP_TYPE_EL1_FIQ 2
+#define EXCP_TYPE_EL1_ERROR 3
+
+#define EXCP_TYPE_EL2_SYNC 4
+#define EXCP_TYPE_EL2_IRQ 5
+#define EXCP_TYPE_EL2_FIQ 6
+#define EXCP_TYPE_EL2_ERROR 7
+
+#define EXCP_TYPE_MAINT_IRQ 8
+/* Used internally in vmm_hyp.c */
+#define EXCP_TYPE_REENTER 9
+
+#define HYP_GET_VECTOR_TABLE -1
+
+#endif /* !_VMM_HYP_H_ */
diff --git a/sys/arm64/vmm/io/vgic.h b/sys/arm64/vmm/io/vgic.h
new file mode 100644
--- /dev/null
+++ b/sys/arm64/vmm/io/vgic.h
@@ -0,0 +1,52 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2023 Arm Ltd
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _VGIC_H_
+#define _VGIC_H_
+
+struct hyp;
+struct hypctx;
+struct vm_vgic_descr;
+
+extern device_t vgic_dev;
+
+bool vgic_present(void);
+void vgic_init(void);
+int vgic_attach_to_vm(struct hyp *hyp, struct vm_vgic_descr *descr);
+void vgic_detach_from_vm(struct hyp *hyp);
+void vgic_vminit(struct hyp *hyp);
+void vgic_cpuinit(struct hypctx *hypctx);
+void vgic_cpucleanup(struct hypctx *hypctx);
+void vgic_vmcleanup(struct hyp *hyp);
+int vgic_max_cpu_count(struct hyp *hyp);
+bool vgic_has_pending_irq(struct hypctx *hypctx);
+int vgic_inject_irq(struct hyp *hyp, int vcpuid, uint32_t irqid, bool level);
+int vgic_inject_msi(struct hyp *hyp, uint64_t msg, uint64_t addr);
+void vgic_flush_hwstate(struct hypctx *hypctx);
+void vgic_sync_hwstate(struct hypctx *hypctx);
+
+#endif /* _VGIC_H_ */
diff --git a/sys/arm64/vmm/io/vgic.c b/sys/arm64/vmm/io/vgic.c
new file mode 100644
--- /dev/null
+++ b/sys/arm64/vmm/io/vgic.c
@@ -0,0 +1,122 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2023 Arm Ltd
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+
+#include "vgic.h"
+#include "vgic_if.h"
+
+device_t vgic_dev;
+
+bool
+vgic_present(void)
+{
+ return (vgic_dev != NULL);
+}
+
+void
+vgic_init(void)
+{
+ VGIC_INIT(vgic_dev);
+}
+
+int
+vgic_attach_to_vm(struct hyp *hyp, struct vm_vgic_descr *descr)
+{
+ return (VGIC_ATTACH_TO_VM(vgic_dev, hyp, descr));
+}
+
+void
+vgic_detach_from_vm(struct hyp *hyp)
+{
+ VGIC_DETACH_FROM_VM(vgic_dev, hyp);
+}
+
+void
+vgic_vminit(struct hyp *hyp)
+{
+ VGIC_VMINIT(vgic_dev, hyp);
+}
+
+void
+vgic_cpuinit(struct hypctx *hypctx)
+{
+ VGIC_CPUINIT(vgic_dev, hypctx);
+}
+
+void
+vgic_cpucleanup(struct hypctx *hypctx)
+{
+ VGIC_CPUCLEANUP(vgic_dev, hypctx);
+}
+
+void
+vgic_vmcleanup(struct hyp *hyp)
+{
+ VGIC_VMCLEANUP(vgic_dev, hyp);
+}
+
+int
+vgic_max_cpu_count(struct hyp *hyp)
+{
+ return (VGIC_MAX_CPU_COUNT(vgic_dev, hyp));
+}
+
+bool
+vgic_has_pending_irq(struct hypctx *hypctx)
+{
+ return (VGIC_HAS_PENDING_IRQ(vgic_dev, hypctx));
+}
+
+/* TODO: vcpuid -> hypctx ? */
+/* TODO: Add a vgic interface */
+int
+vgic_inject_irq(struct hyp *hyp, int vcpuid, uint32_t irqid, bool level)
+{
+ return (VGIC_INJECT_IRQ(vgic_dev, hyp, vcpuid, irqid, level));
+}
+
+int
+vgic_inject_msi(struct hyp *hyp, uint64_t msg, uint64_t addr)
+{
+ return (VGIC_INJECT_MSI(vgic_dev, hyp, msg, addr));
+}
+
+void
+vgic_flush_hwstate(struct hypctx *hypctx)
+{
+ VGIC_FLUSH_HWSTATE(vgic_dev, hypctx);
+}
+
+void
+vgic_sync_hwstate(struct hypctx *hypctx)
+{
+ VGIC_SYNC_HWSTATE(vgic_dev, hypctx);
+}
diff --git a/sys/arm64/vmm/io/vgic_if.m b/sys/arm64/vmm/io/vgic_if.m
new file mode 100644
--- /dev/null
+++ b/sys/arm64/vmm/io/vgic_if.m
@@ -0,0 +1,104 @@
+#-
+# SPDX-License-Identifier: BSD-2-Clause
+#
+# Copyright (c) 2023 Arm Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+
+INTERFACE vgic;
+
+HEADER {
+ struct hyp;
+ struct hypctx;
+ struct vm_vgic_descr;
+};
+
+METHOD void init {
+ device_t dev;
+}
+
+METHOD int attach_to_vm {
+ device_t dev;
+ struct hyp *hyp;
+ struct vm_vgic_descr *descr;
+};
+
+METHOD void detach_from_vm {
+ device_t dev;
+ struct hyp *hyp;
+}
+
+METHOD void vminit {
+ device_t dev;
+ struct hyp *hyp;
+}
+
+METHOD void cpuinit {
+ device_t dev;
+ struct hypctx *hypctx;
+}
+
+METHOD void cpucleanup {
+ device_t dev;
+ struct hypctx *hypctx;
+}
+
+METHOD void vmcleanup {
+ device_t dev;
+ struct hyp *hyp;
+}
+
+METHOD int max_cpu_count {
+ device_t dev;
+ struct hyp *hyp;
+}
+
+METHOD bool has_pending_irq {
+ device_t dev;
+ struct hypctx *hypctx;
+}
+
+METHOD int inject_irq {
+ device_t dev;
+ struct hyp *hyp;
+ int vcpuid;
+ uint32_t irqid;
+ bool level;
+}
+
+METHOD int inject_msi {
+ device_t dev;
+ struct hyp *hyp;
+ uint64_t msg;
+ uint64_t addr;
+}
+
+METHOD void flush_hwstate {
+ device_t dev;
+ struct hypctx *hypctx;
+}
+
+METHOD void sync_hwstate {
+ device_t dev;
+ struct hypctx *hypctx;
+}
diff --git a/sys/arm64/vmm/io/vgic_v3.h b/sys/arm64/vmm/io/vgic_v3.h
new file mode 100644
--- /dev/null
+++ b/sys/arm64/vmm/io/vgic_v3.h
@@ -0,0 +1,57 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _VMM_VGIC_V3_H_
+#define _VMM_VGIC_V3_H_
+
+#define VGIC_ICH_LR_NUM_MAX 16
+#define VGIC_ICH_APR_NUM_MAX 4
+
+/* Registers accessed by EL2 */
+struct vgic_v3_regs {
+ uint32_t ich_eisr_el2; /* End of Interrupt Status Register */
+ uint32_t ich_elrsr_el2; /* Empty List register Status Register (ICH_ELRSR_EL2) */
+ uint32_t ich_hcr_el2; /* Hyp Control Register */
+ uint32_t ich_misr_el2; /* Maintenance Interrupt State Register */
+ uint32_t ich_vmcr_el2; /* Virtual Machine Control Register */
+
+ /*
+ * The List Registers are part of the VM context and are modified on a
+ * world switch. They need to be allocated statically so they are
+ * mapped in the EL2 translation tables when struct hypctx is mapped.
+ */
+ uint64_t ich_lr_el2[VGIC_ICH_LR_NUM_MAX];
+ uint16_t ich_lr_num;
+
+ /* Active Priorities Registers for Group 0 and 1 interrupts */
+ uint16_t ich_apr_num;
+ uint32_t ich_ap0r_el2[VGIC_ICH_APR_NUM_MAX];
+ uint32_t ich_ap1r_el2[VGIC_ICH_APR_NUM_MAX];
+};
+
+#endif /* !_VMM_VGIC_V3_H_ */
diff --git a/sys/arm64/vmm/io/vgic_v3.c b/sys/arm64/vmm/io/vgic_v3.c
new file mode 100644
--- /dev/null
+++ b/sys/arm64/vmm/io/vgic_v3.c
@@ -0,0 +1,2348 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (C) 2018 Alexandru Elisei <alexandru.elisei@gmail.com>
+ * Copyright (C) 2020-2022 Andrew Turner
+ * Copyright (C) 2023 Arm Ltd
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/systm.h>
+#include <sys/bitstring.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/rman.h>
+#include <sys/smp.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <dev/ofw/openfirm.h>
+
+#include <machine/armreg.h>
+#include <machine/atomic.h>
+#include <machine/bus.h>
+#include <machine/cpufunc.h>
+#include <machine/cpu.h>
+#include <machine/machdep.h>
+#include <machine/param.h>
+#include <machine/pmap.h>
+#include <machine/vmparam.h>
+#include <machine/intr.h>
+#include <machine/vmm.h>
+#include <machine/vmm_dev.h>
+#include <machine/vmm_instruction_emul.h>
+
+#include <arm/arm/gic_common.h>
+#include <arm64/arm64/gic_v3_reg.h>
+#include <arm64/arm64/gic_v3_var.h>
+
+#include <arm64/vmm/hyp.h>
+#include <arm64/vmm/mmu.h>
+#include <arm64/vmm/arm64.h>
+
+#include "vgic.h"
+#include "vgic_v3.h"
+#include "vgic_v3_reg.h"
+
+#include "vgic_if.h"
+
+#define VGIC_SGI_NUM (GIC_LAST_SGI - GIC_FIRST_SGI + 1)
+#define VGIC_PPI_NUM (GIC_LAST_PPI - GIC_FIRST_PPI + 1)
+#define VGIC_SPI_NUM (GIC_LAST_SPI - GIC_FIRST_SPI + 1)
+#define VGIC_PRV_I_NUM (VGIC_SGI_NUM + VGIC_PPI_NUM)
+#define VGIC_SHR_I_NUM (VGIC_SPI_NUM)
+
+MALLOC_DEFINE(M_VGIC_V3, "ARM VMM VGIC V3", "ARM VMM VGIC V3");
+
+/* TODO: Move to softc */
+struct vgic_v3_virt_features {
+ uint8_t min_prio;
+ size_t ich_lr_num;
+ size_t ich_apr_num;
+};
+
+struct vgic_v3_irq {
+ /* List of IRQs that are active or pending */
+ TAILQ_ENTRY(vgic_v3_irq) act_pend_list;
+ struct mtx irq_spinmtx;
+ uint64_t mpidr;
+ int target_vcpu;
+ uint32_t irq;
+ bool active;
+ bool pending;
+ bool enabled;
+ bool level;
+ bool on_aplist;
+ uint8_t priority;
+ uint8_t config;
+#define VGIC_CONFIG_MASK 0x2
+#define VGIC_CONFIG_LEVEL 0x0
+#define VGIC_CONFIG_EDGE 0x2
+};
+
+/* Global data not needed by EL2 */
+struct vgic_v3 {
+ struct mtx dist_mtx;
+ uint64_t dist_start;
+ size_t dist_end;
+
+ uint64_t redist_start;
+ size_t redist_end;
+
+ uint32_t gicd_ctlr; /* Distributor Control Register */
+
+ struct vgic_v3_irq *irqs;
+};
+
+/* Per-CPU data not needed by EL2 */
+struct vgic_v3_cpu {
+ /*
+ * We need a mutex for accessing the list registers because they are
+ * modified asynchronously by the virtual timer.
+ *
+ * Note that the mutex *MUST* be a spin mutex because an interrupt can
+ * be injected by a callout callback function, thereby modifying the
+ * list registers from a context where sleeping is forbidden.
+ */
+ struct mtx lr_mtx;
+
+ struct vgic_v3_irq private_irqs[VGIC_PRV_I_NUM];
+ TAILQ_HEAD(, vgic_v3_irq) irq_act_pend;
+ u_int ich_lr_used;
+};
+
+/* How many IRQs we support (SGIs + PPIs + SPIs). Not including LPIs */
+#define VGIC_NIRQS 1023
+/* Pretend to be an Arm design */
+#define VGIC_IIDR 0x43b
+
+static vgic_inject_irq_t vgic_v3_inject_irq;
+static vgic_inject_msi_t vgic_v3_inject_msi;
+
+static int vgic_v3_max_cpu_count(device_t dev, struct hyp *hyp);
+
+#define INJECT_IRQ(hyp, vcpuid, irqid, level) \
+ vgic_v3_inject_irq(NULL, (hyp), (vcpuid), (irqid), (level))
+
+typedef void (register_read)(struct hypctx *, u_int, uint64_t *, void *);
+typedef void (register_write)(struct hypctx *, u_int, u_int, u_int,
+ uint64_t, void *);
+
+#define VGIC_8_BIT (1 << 0)
+/* (1 << 1) is reserved for 16 bit accesses */
+#define VGIC_32_BIT (1 << 2)
+#define VGIC_64_BIT (1 << 3)
+
+struct vgic_register {
+ u_int start; /* Start within a memory region */
+ u_int end;
+ u_int size;
+ u_int flags;
+ register_read *read;
+ register_write *write;
+};
+
+#define VGIC_REGISTER_RANGE(reg_start, reg_end, reg_size, reg_flags, readf, \
+ writef) \
+{ \
+ .start = (reg_start), \
+ .end = (reg_end), \
+ .size = (reg_size), \
+ .flags = (reg_flags), \
+ .read = (readf), \
+ .write = (writef), \
+}
+
+#define VGIC_REGISTER_RANGE_RAZ_WI(reg_start, reg_end, reg_size, reg_flags) \
+ VGIC_REGISTER_RANGE(reg_start, reg_end, reg_size, reg_flags, \
+ gic_zero_read, gic_ignore_write)
+
+#define VGIC_REGISTER(start_addr, reg_size, reg_flags, readf, writef) \
+ VGIC_REGISTER_RANGE(start_addr, (start_addr) + (reg_size), \
+ reg_size, reg_flags, readf, writef)
+
+#define VGIC_REGISTER_RAZ_WI(start_addr, reg_size, reg_flags) \
+ VGIC_REGISTER_RANGE_RAZ_WI(start_addr, \
+ (start_addr) + (reg_size), reg_size, reg_flags)
+
+static register_read gic_pidr2_read;
+static register_read gic_zero_read;
+static register_write gic_ignore_write;
+
+/* GICD_CTLR */
+static register_read dist_ctlr_read;
+static register_write dist_ctlr_write;
+/* GICD_TYPER */
+static register_read dist_typer_read;
+/* GICD_IIDR */
+static register_read dist_iidr_read;
+/* GICD_STATUSR - RAZ/WI as we don't report errors (yet) */
+/* GICD_SETSPI_NSR & GICD_CLRSPI_NSR */
+static register_write dist_setclrspi_nsr_write;
+/* GICD_SETSPI_SR - RAZ/WI */
+/* GICD_CLRSPI_SR - RAZ/WI */
+/* GICD_IGROUPR - RAZ/WI as GICD_CTLR.ARE == 1 */
+/* GICD_ISENABLER */
+static register_read dist_isenabler_read;
+static register_write dist_isenabler_write;
+/* GICD_ICENABLER */
+static register_read dist_icenabler_read;
+static register_write dist_icenabler_write;
+/* GICD_ISPENDR */
+static register_read dist_ispendr_read;
+static register_write dist_ispendr_write;
+/* GICD_ICPENDR */
+static register_read dist_icpendr_read;
+static register_write dist_icpendr_write;
+/* GICD_ISACTIVER */
+static register_read dist_isactiver_read;
+static register_write dist_isactiver_write;
+/* GICD_ICACTIVER */
+static register_read dist_icactiver_read;
+static register_write dist_icactiver_write;
+/* GICD_IPRIORITYR */
+static register_read dist_ipriorityr_read;
+static register_write dist_ipriorityr_write;
+/* GICD_ITARGETSR - RAZ/WI as GICD_CTLR.ARE == 1 */
+/* GICD_ICFGR */
+static register_read dist_icfgr_read;
+static register_write dist_icfgr_write;
+/* GICD_IGRPMODR - RAZ/WI from non-secure mode */
+/* GICD_NSACR - RAZ/WI from non-secure mode */
+/* GICD_SGIR - RAZ/WI as GICD_CTLR.ARE == 1 */
+/* GICD_CPENDSGIR - RAZ/WI as GICD_CTLR.ARE == 1 */
+/* GICD_SPENDSGIR - RAZ/WI as GICD_CTLR.ARE == 1 */
+/* GICD_IROUTER */
+static register_read dist_irouter_read;
+static register_write dist_irouter_write;
+
+static struct vgic_register dist_registers[] = {
+ VGIC_REGISTER(GICD_CTLR, 4, VGIC_32_BIT, dist_ctlr_read,
+ dist_ctlr_write),
+ VGIC_REGISTER(GICD_TYPER, 4, VGIC_32_BIT, dist_typer_read,
+ gic_ignore_write),
+ VGIC_REGISTER(GICD_IIDR, 4, VGIC_32_BIT, dist_iidr_read,
+ gic_ignore_write),
+ VGIC_REGISTER_RAZ_WI(GICD_STATUSR, 4, VGIC_32_BIT),
+ VGIC_REGISTER(GICD_SETSPI_NSR, 4, VGIC_32_BIT, gic_zero_read,
+ dist_setclrspi_nsr_write),
+ VGIC_REGISTER(GICD_CLRSPI_NSR, 4, VGIC_32_BIT, gic_zero_read,
+ dist_setclrspi_nsr_write),
+ VGIC_REGISTER_RAZ_WI(GICD_SETSPI_SR, 4, VGIC_32_BIT),
+ VGIC_REGISTER_RAZ_WI(GICD_CLRSPI_SR, 4, VGIC_32_BIT),
+ VGIC_REGISTER_RANGE_RAZ_WI(GICD_IGROUPR(0), GICD_IGROUPR(1024), 4,
+ VGIC_32_BIT),
+
+ VGIC_REGISTER_RAZ_WI(GICD_ISENABLER(0), 4, VGIC_32_BIT),
+ VGIC_REGISTER_RANGE(GICD_ISENABLER(32), GICD_ISENABLER(1024), 4,
+ VGIC_32_BIT, dist_isenabler_read, dist_isenabler_write),
+
+ VGIC_REGISTER_RAZ_WI(GICD_ICENABLER(0), 4, VGIC_32_BIT),
+ VGIC_REGISTER_RANGE(GICD_ICENABLER(32), GICD_ICENABLER(1024), 4,
+ VGIC_32_BIT, dist_icenabler_read, dist_icenabler_write),
+
+ VGIC_REGISTER_RAZ_WI(GICD_ISPENDR(0), 4, VGIC_32_BIT),
+ VGIC_REGISTER_RANGE(GICD_ISPENDR(32), GICD_ISPENDR(1024), 4,
+ VGIC_32_BIT, dist_ispendr_read, dist_ispendr_write),
+
+ VGIC_REGISTER_RAZ_WI(GICD_ICPENDR(0), 4, VGIC_32_BIT),
+ VGIC_REGISTER_RANGE(GICD_ICPENDR(32), GICD_ICPENDR(1024), 4,
+ VGIC_32_BIT, dist_icpendr_read, dist_icpendr_write),
+
+ VGIC_REGISTER_RAZ_WI(GICD_ISACTIVER(0), 4, VGIC_32_BIT),
+ VGIC_REGISTER_RANGE(GICD_ISACTIVER(32), GICD_ISACTIVER(1024), 4,
+ VGIC_32_BIT, dist_isactiver_read, dist_isactiver_write),
+
+ VGIC_REGISTER_RAZ_WI(GICD_ICACTIVER(0), 4, VGIC_32_BIT),
+ VGIC_REGISTER_RANGE(GICD_ICACTIVER(32), GICD_ICACTIVER(1024), 4,
+ VGIC_32_BIT, dist_icactiver_read, dist_icactiver_write),
+
+ VGIC_REGISTER_RANGE_RAZ_WI(GICD_IPRIORITYR(0), GICD_IPRIORITYR(32), 4,
+ VGIC_32_BIT | VGIC_8_BIT),
+ VGIC_REGISTER_RANGE(GICD_IPRIORITYR(32), GICD_IPRIORITYR(1024), 4,
+ VGIC_32_BIT | VGIC_8_BIT, dist_ipriorityr_read,
+ dist_ipriorityr_write),
+
+ VGIC_REGISTER_RANGE_RAZ_WI(GICD_ITARGETSR(0), GICD_ITARGETSR(1024), 4,
+ VGIC_32_BIT | VGIC_8_BIT),
+
+ VGIC_REGISTER_RANGE_RAZ_WI(GICD_ICFGR(0), GICD_ICFGR(32), 4,
+ VGIC_32_BIT),
+ VGIC_REGISTER_RANGE(GICD_ICFGR(32), GICD_ICFGR(1024), 4,
+ VGIC_32_BIT, dist_icfgr_read, dist_icfgr_write),
+/*
+ VGIC_REGISTER_RANGE(GICD_IGRPMODR(0), GICD_IGRPMODR(1024), 4,
+ VGIC_32_BIT, dist_igrpmodr_read, dist_igrpmodr_write),
+ VGIC_REGISTER_RANGE(GICD_NSACR(0), GICD_NSACR(1024), 4,
+ VGIC_32_BIT, dist_nsacr_read, dist_nsacr_write),
+*/
+ VGIC_REGISTER_RAZ_WI(GICD_SGIR, 4, VGIC_32_BIT),
+/*
+ VGIC_REGISTER_RANGE(GICD_CPENDSGIR(0), GICD_CPENDSGIR(1024), 4,
+ VGIC_32_BIT | VGIC_8_BIT, dist_cpendsgir_read,
+ dist_cpendsgir_write),
+ VGIC_REGISTER_RANGE(GICD_SPENDSGIR(0), GICD_SPENDSGIR(1024), 4,
+ VGIC_32_BIT | VGIC_8_BIT, dist_spendsgir_read,
+ dist_spendsgir_write),
+*/
+ VGIC_REGISTER_RANGE(GICD_IROUTER(32), GICD_IROUTER(1024), 8,
+ VGIC_64_BIT | VGIC_32_BIT, dist_irouter_read, dist_irouter_write),
+
+ VGIC_REGISTER_RANGE_RAZ_WI(GICD_PIDR4, GICD_PIDR2, 4, VGIC_32_BIT),
+ VGIC_REGISTER(GICD_PIDR2, 4, VGIC_32_BIT, gic_pidr2_read,
+ gic_ignore_write),
+ VGIC_REGISTER_RANGE_RAZ_WI(GICD_PIDR2 + 4, GICD_SIZE, 4, VGIC_32_BIT),
+};
+
+/* GICR_CTLR - Ignore writes as no bits can be set */
+static register_read redist_ctlr_read;
+/* GICR_IIDR */
+static register_read redist_iidr_read;
+/* GICR_TYPER */
+static register_read redist_typer_read;
+/* GICR_STATUSR - RAZ/WI as we don't report errors (yet) */
+/* GICR_WAKER - RAZ/WI from non-secure mode */
+/* GICR_SETLPIR - RAZ/WI as no LPIs are supported */
+/* GICR_CLRLPIR - RAZ/WI as no LPIs are supported */
+/* GICR_PROPBASER - RAZ/WI as no LPIs are supported */
+/* GICR_PENDBASER - RAZ/WI as no LPIs are supported */
+/* GICR_INVLPIR - RAZ/WI as no LPIs are supported */
+/* GICR_INVALLR - RAZ/WI as no LPIs are supported */
+/* GICR_SYNCR - RAZ/WI as no LPIs are supported */
+
+static struct vgic_register redist_rd_registers[] = {
+ VGIC_REGISTER(GICR_CTLR, 4, VGIC_32_BIT, redist_ctlr_read,
+ gic_ignore_write),
+ VGIC_REGISTER(GICR_IIDR, 4, VGIC_32_BIT, redist_iidr_read,
+ gic_ignore_write),
+ VGIC_REGISTER(GICR_TYPER, 8, VGIC_64_BIT | VGIC_32_BIT,
+ redist_typer_read, gic_ignore_write),
+ VGIC_REGISTER_RAZ_WI(GICR_STATUSR, 4, VGIC_32_BIT),
+ VGIC_REGISTER_RAZ_WI(GICR_WAKER, 4, VGIC_32_BIT),
+ VGIC_REGISTER_RAZ_WI(GICR_SETLPIR, 8, VGIC_64_BIT | VGIC_32_BIT),
+ VGIC_REGISTER_RAZ_WI(GICR_CLRLPIR, 8, VGIC_64_BIT | VGIC_32_BIT),
+ VGIC_REGISTER_RAZ_WI(GICR_PROPBASER, 8, VGIC_64_BIT | VGIC_32_BIT),
+ VGIC_REGISTER_RAZ_WI(GICR_PENDBASER, 8, VGIC_64_BIT | VGIC_32_BIT),
+ VGIC_REGISTER_RAZ_WI(GICR_INVLPIR, 8, VGIC_64_BIT | VGIC_32_BIT),
+ VGIC_REGISTER_RAZ_WI(GICR_INVALLR, 8, VGIC_64_BIT | VGIC_32_BIT),
+ VGIC_REGISTER_RAZ_WI(GICR_SYNCR, 4, VGIC_32_BIT),
+
+ /* These are identical to the dist registers */
+ VGIC_REGISTER_RANGE_RAZ_WI(GICD_PIDR4, GICD_PIDR2, 4, VGIC_32_BIT),
+ VGIC_REGISTER(GICD_PIDR2, 4, VGIC_32_BIT, gic_pidr2_read,
+ gic_ignore_write),
+ VGIC_REGISTER_RANGE_RAZ_WI(GICD_PIDR2 + 4, GICD_SIZE, 4,
+ VGIC_32_BIT),
+};
+
+/* GICR_IGROUPR0 - RAZ/WI from non-secure mode */
+/* GICR_ISENABLER0 */
+static register_read redist_ienabler0_read;
+static register_write redist_isenabler0_write;
+/* GICR_ICENABLER0 */
+static register_write redist_icenabler0_write;
+/* GICR_ISPENDR0 */
+static register_read redist_ipendr0_read;
+static register_write redist_ispendr0_write;
+/* GICR_ICPENDR0 */
+static register_write redist_icpendr0_write;
+/* GICR_ISACTIVER0 */
+static register_read redist_iactiver0_read;
+static register_write redist_isactiver0_write;
+/* GICR_ICACTIVER0 */
+static register_write redist_icactiver0_write;
+/* GICR_IPRIORITYR */
+static register_read redist_ipriorityr_read;
+static register_write redist_ipriorityr_write;
+/* GICR_ICFGR0 - RAZ/WI from non-secure mode */
+/* GICR_ICFGR1 */
+static register_read redist_icfgr1_read;
+static register_write redist_icfgr1_write;
+/* GICR_IGRPMODR0 - RAZ/WI from non-secure mode */
+/* GICR_NSCAR - RAZ/WI from non-secure mode */
+
+static struct vgic_register redist_sgi_registers[] = {
+ VGIC_REGISTER_RAZ_WI(GICR_IGROUPR0, 4, VGIC_32_BIT),
+ VGIC_REGISTER(GICR_ISENABLER0, 4, VGIC_32_BIT, redist_ienabler0_read,
+ redist_isenabler0_write),
+ VGIC_REGISTER(GICR_ICENABLER0, 4, VGIC_32_BIT, redist_ienabler0_read,
+ redist_icenabler0_write),
+ VGIC_REGISTER(GICR_ISPENDR0, 4, VGIC_32_BIT, redist_ipendr0_read,
+ redist_ispendr0_write),
+ VGIC_REGISTER(GICR_ICPENDR0, 4, VGIC_32_BIT, redist_ipendr0_read,
+ redist_icpendr0_write),
+ VGIC_REGISTER(GICR_ISACTIVER0, 4, VGIC_32_BIT, redist_iactiver0_read,
+ redist_isactiver0_write),
+ VGIC_REGISTER(GICR_ICACTIVER0, 4, VGIC_32_BIT, redist_iactiver0_read,
+ redist_icactiver0_write),
+ VGIC_REGISTER_RANGE(GICR_IPRIORITYR(0), GICR_IPRIORITYR(32), 4,
+ VGIC_32_BIT | VGIC_8_BIT, redist_ipriorityr_read,
+ redist_ipriorityr_write),
+ VGIC_REGISTER_RAZ_WI(GICR_ICFGR0, 4, VGIC_32_BIT),
+ VGIC_REGISTER(GICR_ICFGR1, 4, VGIC_32_BIT, redist_icfgr1_read,
+ redist_icfgr1_write),
+ VGIC_REGISTER_RAZ_WI(GICR_IGRPMODR0, 4, VGIC_32_BIT),
+ VGIC_REGISTER_RAZ_WI(GICR_NSACR, 4, VGIC_32_BIT),
+};
+
+static struct vgic_v3_virt_features virt_features;
+
+static struct vgic_v3_irq *vgic_v3_get_irq(struct hyp *, int, uint32_t);
+static void vgic_v3_release_irq(struct vgic_v3_irq *);
+
+/* TODO: Move to a common file */
+static int
+mpidr_to_vcpu(struct hyp *hyp, uint64_t mpidr)
+{
+ struct vm *vm;
+ struct hypctx *hypctx;
+
+ vm = hyp->vm;
+ for (int i = 0; i < vm_get_maxcpus(vm); i++) {
+ hypctx = hyp->ctx[i];
+ if (hypctx != NULL && (hypctx->vmpidr_el2 & GICD_AFF) == mpidr)
+ return (i);
+ }
+ return (-1);
+}
+
+static void
+vgic_v3_vminit(device_t dev, struct hyp *hyp)
+{
+ struct vgic_v3 *vgic;
+
+ hyp->vgic = malloc(sizeof(*hyp->vgic), M_VGIC_V3,
+ M_WAITOK | M_ZERO);
+ vgic = hyp->vgic;
+
+ /*
+ * Configure the Distributor control register. The register resets to an
+ * architecturally UNKNOWN value, so we reset to 0 to disable all
+ * functionality controlled by the register.
+ *
+ * The exception is GICD_CTLR.DS, which is RA0/WI when the Distributor
+ * supports one security state (ARM GIC Architecture Specification for
+ * GICv3 and GICv4, p. 4-464)
+ */
+ vgic->gicd_ctlr = 0;
+
+ mtx_init(&vgic->dist_mtx, "VGICv3 Distributor lock", NULL,
+ MTX_SPIN);
+}
+
+static void
+vgic_v3_cpuinit(device_t dev, struct hypctx *hypctx)
+{
+ struct vgic_v3_cpu *vgic_cpu;
+ struct vgic_v3_irq *irq;
+ int i, irqid;
+
+ hypctx->vgic_cpu = malloc(sizeof(*hypctx->vgic_cpu),
+ M_VGIC_V3, M_WAITOK | M_ZERO);
+ vgic_cpu = hypctx->vgic_cpu;
+
+ mtx_init(&vgic_cpu->lr_mtx, "VGICv3 ICH_LR_EL2 lock", NULL, MTX_SPIN);
+
+ /* Set the SGI and PPI state */
+ for (irqid = 0; irqid < VGIC_PRV_I_NUM; irqid++) {
+ irq = &vgic_cpu->private_irqs[irqid];
+
+ mtx_init(&irq->irq_spinmtx, "VGIC IRQ spinlock", NULL,
+ MTX_SPIN);
+ irq->irq = irqid;
+ irq->mpidr = hypctx->vmpidr_el2 & GICD_AFF;
+ irq->target_vcpu = vcpu_vcpuid(hypctx->vcpu);
+ MPASS(irq->target_vcpu >= 0);
+
+ if (irqid < VGIC_SGI_NUM) {
+ /* SGIs */
+ irq->enabled = true;
+ irq->config = VGIC_CONFIG_EDGE;
+ } else {
+ /* PPIs */
+ irq->config = VGIC_CONFIG_LEVEL;
+ }
+ irq->priority = 0;
+ }
+
+ /*
+ * Configure the Interrupt Controller Hyp Control Register.
+ *
+ * ICH_HCR_EL2_En: enable virtual CPU interface.
+ *
+ * Maintenance interrupts are disabled.
+ */
+ hypctx->vgic_v3_regs.ich_hcr_el2 = ICH_HCR_EL2_En;
+
+ /*
+ * Configure the Interrupt Controller Virtual Machine Control Register.
+ *
+ * ICH_VMCR_EL2_VPMR: lowest priority mask for the VCPU interface
+ * ICH_VMCR_EL2_VBPR1_NO_PREEMPTION: disable interrupt preemption for
+ * Group 1 interrupts
+ * ICH_VMCR_EL2_VBPR0_NO_PREEMPTION: disable interrupt preemption for
+ * Group 0 interrupts
+ * ~ICH_VMCR_EL2_VEOIM: writes to EOI registers perform priority drop
+ * and interrupt deactivation.
+ * ICH_VMCR_EL2_VENG0: virtual Group 0 interrupts enabled.
+ * ICH_VMCR_EL2_VENG1: virtual Group 1 interrupts enabled.
+ */
+ hypctx->vgic_v3_regs.ich_vmcr_el2 =
+ (virt_features.min_prio << ICH_VMCR_EL2_VPMR_SHIFT) |
+ ICH_VMCR_EL2_VBPR1_NO_PREEMPTION | ICH_VMCR_EL2_VBPR0_NO_PREEMPTION;
+ hypctx->vgic_v3_regs.ich_vmcr_el2 &= ~ICH_VMCR_EL2_VEOIM;
+ hypctx->vgic_v3_regs.ich_vmcr_el2 |= ICH_VMCR_EL2_VENG0 |
+ ICH_VMCR_EL2_VENG1;
+
+ hypctx->vgic_v3_regs.ich_lr_num = virt_features.ich_lr_num;
+ for (i = 0; i < hypctx->vgic_v3_regs.ich_lr_num; i++)
+ hypctx->vgic_v3_regs.ich_lr_el2[i] = 0UL;
+ vgic_cpu->ich_lr_used = 0;
+ TAILQ_INIT(&vgic_cpu->irq_act_pend);
+
+ hypctx->vgic_v3_regs.ich_apr_num = virt_features.ich_apr_num;
+}
+
+static void
+vgic_v3_cpucleanup(device_t dev, struct hypctx *hypctx)
+{
+ struct vgic_v3_cpu *vgic_cpu;
+ struct vgic_v3_irq *irq;
+ int irqid;
+
+ vgic_cpu = hypctx->vgic_cpu;
+ for (irqid = 0; irqid < VGIC_PRV_I_NUM; irqid++) {
+ irq = &vgic_cpu->private_irqs[irqid];
+ mtx_destroy(&irq->irq_spinmtx);
+ }
+
+ mtx_destroy(&vgic_cpu->lr_mtx);
+ free(hypctx->vgic_cpu, M_VGIC_V3);
+}
+
+static void
+vgic_v3_vmcleanup(device_t dev, struct hyp *hyp)
+{
+ mtx_destroy(&hyp->vgic->dist_mtx);
+ free(hyp->vgic, M_VGIC_V3);
+}
+
+static int
+vgic_v3_max_cpu_count(device_t dev, struct hyp *hyp)
+{
+ struct vgic_v3 *vgic;
+ size_t count;
+ int16_t max_count;
+
+ vgic = hyp->vgic;
+ max_count = vm_get_maxcpus(hyp->vm);
+
+ /* No registers, assume the maximum CPUs */
+ if (vgic->redist_start == 0 && vgic->redist_end == 0)
+ return (max_count);
+
+ count = (vgic->redist_end - vgic->redist_start) /
+ (GICR_RD_BASE_SIZE + GICR_SGI_BASE_SIZE);
+
+ /*
+ * max_count is smaller than INT_MAX so will also limit count
+ * to a positive integer value.
+ */
+ if (count > max_count)
+ return (max_count);
+
+ return (count);
+}
+
+static bool
+vgic_v3_irq_pending(struct vgic_v3_irq *irq)
+{
+ if ((irq->config & VGIC_CONFIG_MASK) == VGIC_CONFIG_LEVEL) {
+ return (irq->pending || irq->level);
+ } else {
+ return (irq->pending);
+ }
+}
+
+static bool
+vgic_v3_queue_irq(struct hyp *hyp, struct vgic_v3_cpu *vgic_cpu,
+ int vcpuid, struct vgic_v3_irq *irq)
+{
+ MPASS(vcpuid >= 0);
+ MPASS(vcpuid < vm_get_maxcpus(hyp->vm));
+
+ mtx_assert(&vgic_cpu->lr_mtx, MA_OWNED);
+ mtx_assert(&irq->irq_spinmtx, MA_OWNED);
+
+ /* No need to queue the IRQ */
+ if (!irq->level && !irq->pending)
+ return (false);
+
+ if (!irq->on_aplist) {
+ irq->on_aplist = true;
+ TAILQ_INSERT_TAIL(&vgic_cpu->irq_act_pend, irq, act_pend_list);
+ }
+ return (true);
+}
+
+static uint64_t
+gic_reg_value_64(uint64_t field, uint64_t val, u_int offset, u_int size)
+{
+ uint32_t mask;
+
+ if (offset != 0 || size != 8) {
+ mask = ((1ul << (size * 8)) - 1) << (offset * 8);
+ /* Shift the new bits to the correct place */
+ val <<= (offset * 8);
+ /* Keep only the interesting bits */
+ val &= mask;
+ /* Add the bits we are keeping from the old value */
+ val |= field & ~mask;
+ }
+
+ return (val);
+}
+
+static void
+gic_pidr2_read(struct hypctx *hypctx, u_int reg, uint64_t *rval,
+ void *arg)
+{
+ *rval = GICR_PIDR2_ARCH_GICv3 << GICR_PIDR2_ARCH_SHIFT;
+}
+
+/* Common read-only/write-ignored helpers */
+static void
+gic_zero_read(struct hypctx *hypctx, u_int reg, uint64_t *rval,
+ void *arg)
+{
+ *rval = 0;
+}
+
+static void
+gic_ignore_write(struct hypctx *hypctx, u_int reg, u_int offset, u_int size,
+ uint64_t wval, void *arg)
+{
+ /* Nothing to do */
+}
+
+static uint64_t
+read_enabler(struct hypctx *hypctx, int n)
+{
+ struct vgic_v3_irq *irq;
+ uint64_t ret;
+ uint32_t irq_base;
+ int i;
+
+ ret = 0;
+ irq_base = n * 32;
+ for (i = 0; i < 32; i++) {
+ irq = vgic_v3_get_irq(hypctx->hyp, vcpu_vcpuid(hypctx->vcpu),
+ irq_base + i);
+ if (irq == NULL)
+ continue;
+
+ if (!irq->enabled)
+ ret |= 1u << i;
+ vgic_v3_release_irq(irq);
+ }
+
+ return (ret);
+}
+
+static void
+write_enabler(struct hypctx *hypctx,int n, bool set, uint64_t val)
+{
+ struct vgic_v3_irq *irq;
+ uint32_t irq_base;
+ int i;
+
+ irq_base = n * 32;
+ for (i = 0; i < 32; i++) {
+ /* We only change interrupts when the appropriate bit is set */
+ if ((val & (1u << i)) == 0)
+ continue;
+
+ /* Find the interrupt this bit represents */
+ irq = vgic_v3_get_irq(hypctx->hyp, vcpu_vcpuid(hypctx->vcpu),
+ irq_base + i);
+ if (irq == NULL)
+ continue;
+
+ irq->enabled = set;
+ vgic_v3_release_irq(irq);
+ }
+}
+
+static uint64_t
+read_pendr(struct hypctx *hypctx, int n)
+{
+ struct vgic_v3_irq *irq;
+ uint64_t ret;
+ uint32_t irq_base;
+ int i;
+
+ ret = 0;
+ irq_base = n * 32;
+ for (i = 0; i < 32; i++) {
+ irq = vgic_v3_get_irq(hypctx->hyp, vcpu_vcpuid(hypctx->vcpu),
+ irq_base + i);
+ if (irq == NULL)
+ continue;
+
+ if (vgic_v3_irq_pending(irq))
+ ret |= 1u << i;
+ vgic_v3_release_irq(irq);
+ }
+
+ return (ret);
+}
+
+static uint64_t
+write_pendr(struct hypctx *hypctx, int n, bool set, uint64_t val)
+{
+ struct vgic_v3_cpu *vgic_cpu;
+ struct vgic_v3_irq *irq;
+ struct hyp *hyp;
+ struct hypctx *target_hypctx;
+ uint64_t ret;
+ uint32_t irq_base;
+ int target_vcpu, i;
+ bool notify;
+
+ hyp = hypctx->hyp;
+ ret = 0;
+ irq_base = n * 32;
+ for (i = 0; i < 32; i++) {
+ /* We only change interrupts when the appropriate bit is set */
+ if ((val & (1u << i)) == 0)
+ continue;
+
+ irq = vgic_v3_get_irq(hypctx->hyp, vcpu_vcpuid(hypctx->vcpu),
+ irq_base + i);
+ if (irq == NULL)
+ continue;
+
+ notify = false;
+ target_vcpu = irq->target_vcpu;
+ if (target_vcpu < 0)
+ goto next_irq;
+ target_hypctx = hyp->ctx[target_vcpu];
+ if (target_hypctx == NULL)
+ goto next_irq;
+ vgic_cpu = target_hypctx->vgic_cpu;
+
+ if (!set) {
+ /* pending -> not pending */
+ irq->pending = false;
+ } else {
+ irq->pending = true;
+ mtx_lock_spin(&vgic_cpu->lr_mtx);
+ notify = vgic_v3_queue_irq(hyp, vgic_cpu, target_vcpu,
+ irq);
+ mtx_unlock_spin(&vgic_cpu->lr_mtx);
+ }
+next_irq:
+ vgic_v3_release_irq(irq);
+
+ if (notify)
+ vcpu_notify_event(vm_vcpu(hyp->vm, target_vcpu));
+ }
+
+ return (ret);
+}
+
+static uint64_t
+read_activer(struct hypctx *hypctx, int n)
+{
+ struct vgic_v3_irq *irq;
+ uint64_t ret;
+ uint32_t irq_base;
+ int i;
+
+ ret = 0;
+ irq_base = n * 32;
+ for (i = 0; i < 32; i++) {
+ irq = vgic_v3_get_irq(hypctx->hyp, vcpu_vcpuid(hypctx->vcpu),
+ irq_base + i);
+ if (irq == NULL)
+ continue;
+
+ if (irq->active)
+ ret |= 1u << i;
+ vgic_v3_release_irq(irq);
+ }
+
+ return (ret);
+}
+
+static void
+write_activer(struct hypctx *hypctx, u_int n, bool set, uint64_t val)
+{
+ struct vgic_v3_cpu *vgic_cpu;
+ struct vgic_v3_irq *irq;
+ struct hyp *hyp;
+ struct hypctx *target_hypctx;
+ uint32_t irq_base;
+ int target_vcpu, i;
+ bool notify;
+
+ hyp = hypctx->hyp;
+ irq_base = n * 32;
+ for (i = 0; i < 32; i++) {
+ /* We only change interrupts when the appropriate bit is set */
+ if ((val & (1u << i)) == 0)
+ continue;
+
+ irq = vgic_v3_get_irq(hypctx->hyp, vcpu_vcpuid(hypctx->vcpu),
+ irq_base + i);
+ if (irq == NULL)
+ continue;
+
+ notify = false;
+ target_vcpu = irq->target_vcpu;
+ if (target_vcpu < 0)
+ goto next_irq;
+ target_hypctx = hyp->ctx[target_vcpu];
+ if (target_hypctx == NULL)
+ goto next_irq;
+ vgic_cpu = target_hypctx->vgic_cpu;
+
+ if (!set) {
+ /* active -> not active */
+ irq->active = false;
+ } else {
+ /* not active -> active */
+ irq->active = true;
+ mtx_lock_spin(&vgic_cpu->lr_mtx);
+ notify = vgic_v3_queue_irq(hyp, vgic_cpu, target_vcpu,
+ irq);
+ mtx_unlock_spin(&vgic_cpu->lr_mtx);
+ }
+next_irq:
+ vgic_v3_release_irq(irq);
+
+ if (notify)
+ vcpu_notify_event(vm_vcpu(hyp->vm, target_vcpu));
+ }
+}
+
+static uint64_t
+read_priorityr(struct hypctx *hypctx, int n)
+{
+ struct vgic_v3_irq *irq;
+ uint64_t ret;
+ uint32_t irq_base;
+ int i;
+
+ ret = 0;
+ irq_base = n * 4;
+ for (i = 0; i < 4; i++) {
+ irq = vgic_v3_get_irq(hypctx->hyp, vcpu_vcpuid(hypctx->vcpu),
+ irq_base + i);
+ if (irq == NULL)
+ continue;
+
+ ret |= ((uint64_t)irq->priority) << (i * 8);
+ vgic_v3_release_irq(irq);
+ }
+
+ return (ret);
+}
+
+static void
+write_priorityr(struct hypctx *hypctx, u_int irq_base, u_int size, uint64_t val)
+{
+ struct vgic_v3_irq *irq;
+ int i;
+
+ for (i = 0; i < size; i++) {
+ irq = vgic_v3_get_irq(hypctx->hyp, vcpu_vcpuid(hypctx->vcpu),
+ irq_base + i);
+ if (irq == NULL)
+ continue;
+
+ /* Set the priority. We support 32 priority steps (5 bits) */
+ irq->priority = (val >> (i * 8)) & 0xf8;
+ vgic_v3_release_irq(irq);
+ }
+}
+
+static uint64_t
+read_config(struct hypctx *hypctx, int n)
+{
+ struct vgic_v3_irq *irq;
+ uint64_t ret;
+ uint32_t irq_base;
+ int i;
+
+ ret = 0;
+ irq_base = n * 16;
+ for (i = 0; i < 16; i++) {
+ irq = vgic_v3_get_irq(hypctx->hyp, vcpu_vcpuid(hypctx->vcpu),
+ irq_base + i);
+ if (irq == NULL)
+ continue;
+
+ ret |= ((uint64_t)irq->config) << (i * 2);
+ vgic_v3_release_irq(irq);
+ }
+
+ return (ret);
+}
+
+static void
+write_config(struct hypctx *hypctx, int n, uint64_t val)
+{
+ struct vgic_v3_irq *irq;
+ uint32_t irq_base;
+ int i;
+
+ irq_base = n * 16;
+ for (i = 0; i < 16; i++) {
+ /*
+ * The config can't be changed for SGIs and PPIs. SGIs have
+ * an edge-triggered behaviour, and the register is
+ * implementation defined to be read-only for PPIs.
+ */
+ if (irq_base + i < VGIC_PRV_I_NUM)
+ continue;
+
+ irq = vgic_v3_get_irq(hypctx->hyp, vcpu_vcpuid(hypctx->vcpu),
+ irq_base + i);
+ if (irq == NULL)
+ continue;
+
+ /* Bit 0 is RES0 */
+ irq->config = (val >> (i * 2)) & VGIC_CONFIG_MASK;
+ vgic_v3_release_irq(irq);
+ }
+}
+
+static uint64_t
+read_route(struct hypctx *hypctx, int n)
+{
+ struct vgic_v3_irq *irq;
+ uint64_t mpidr;
+
+ irq = vgic_v3_get_irq(hypctx->hyp, vcpu_vcpuid(hypctx->vcpu), n);
+ if (irq == NULL)
+ return (0);
+
+ mpidr = irq->mpidr;
+ vgic_v3_release_irq(irq);
+
+ return (mpidr);
+}
+
+static void
+write_route(struct hypctx *hypctx, int n, uint64_t val, u_int offset,
+ u_int size)
+{
+ struct vgic_v3_irq *irq;
+
+ irq = vgic_v3_get_irq(hypctx->hyp, vcpu_vcpuid(hypctx->vcpu), n);
+ if (irq == NULL)
+ return;
+
+ irq->mpidr = gic_reg_value_64(irq->mpidr, val, offset, size) & GICD_AFF;
+ irq->target_vcpu = mpidr_to_vcpu(hypctx->hyp, irq->mpidr);
+ /*
+ * If the interrupt is pending we can either use the old mpidr, or
+ * the new mpidr. To simplify this code we use the old value so we
+ * don't need to move the interrupt until the next time it is
+ * moved to the pending state.
+ */
+ vgic_v3_release_irq(irq);
+}
+
+/*
+ * Distributor register handlers.
+ */
+/* GICD_CTLR */
+static void
+dist_ctlr_read(struct hypctx *hypctx, u_int reg, uint64_t *rval,
+ void *arg)
+{
+ struct hyp *hyp;
+ struct vgic_v3 *vgic;
+
+ hyp = hypctx->hyp;
+ vgic = hyp->vgic;
+
+ mtx_lock_spin(&vgic->dist_mtx);
+ *rval = vgic->gicd_ctlr;
+ mtx_unlock_spin(&vgic->dist_mtx);
+
+ /* Writes are never pending */
+ *rval &= ~GICD_CTLR_RWP;
+}
+
+static void
+dist_ctlr_write(struct hypctx *hypctx, u_int reg, u_int offset, u_int size,
+ uint64_t wval, void *arg)
+{
+ struct vgic_v3 *vgic;
+
+ MPASS(offset == 0);
+ MPASS(size == 4);
+ vgic = hypctx->hyp->vgic;
+
+ /*
+ * GICv2 backwards compatibility is not implemented so
+ * ARE_NS is RAO/WI. This means EnableGrp1 is RES0.
+ *
+ * EnableGrp1A is supported, and RWP is read-only.
+ *
+ * All other bits are RES0 from non-secure mode as we
+ * implement as if we are in a system with two security
+ * states.
+ */
+ wval &= GICD_CTLR_G1A;
+ wval |= GICD_CTLR_ARE_NS;
+ mtx_lock_spin(&vgic->dist_mtx);
+ vgic->gicd_ctlr = wval;
+ /* TODO: Wake any vcpus that have interrupts pending */
+ mtx_unlock_spin(&vgic->dist_mtx);
+}
+
+/* GICD_TYPER */
+static void
+dist_typer_read(struct hypctx *hypctx, u_int reg, uint64_t *rval,
+ void *arg)
+{
+ uint32_t typer;
+
+ typer = (10 - 1) << GICD_TYPER_IDBITS_SHIFT;
+ typer |= GICD_TYPER_MBIS;
+ /* ITLinesNumber: */
+ typer |= howmany(VGIC_NIRQS + 1, 32) - 1;
+
+ *rval = typer;
+}
+
+/* GICD_IIDR */
+static void
+dist_iidr_read(struct hypctx *hypctx, u_int reg, uint64_t *rval, void *arg)
+{
+ *rval = VGIC_IIDR;
+}
+
+/* GICD_SETSPI_NSR & GICD_CLRSPI_NSR */
+static void
+dist_setclrspi_nsr_write(struct hypctx *hypctx, u_int reg, u_int offset,
+ u_int size, uint64_t wval, void *arg)
+{
+ uint32_t irqid;
+
+ MPASS(offset == 0);
+ MPASS(size == 4);
+ irqid = wval & GICD_SPI_INTID_MASK;
+ INJECT_IRQ(hypctx->hyp, vcpu_vcpuid(hypctx->vcpu), irqid,
+ reg == GICD_SETSPI_NSR);
+}
+
+/* GICD_ISENABLER */
+static void
+dist_isenabler_read(struct hypctx *hypctx, u_int reg, uint64_t *rval, void *arg)
+{
+ int n;
+
+ n = (reg - GICD_ISENABLER(0)) / 4;
+ /* GICD_ISENABLER0 is RAZ/WI so handled separately */
+ MPASS(n > 0);
+ *rval = read_enabler(hypctx, n);
+}
+
+static void
+dist_isenabler_write(struct hypctx *hypctx, u_int reg, u_int offset, u_int size,
+ uint64_t wval, void *arg)
+{
+ int n;
+
+ MPASS(offset == 0);
+ MPASS(size == 4);
+ n = (reg - GICD_ISENABLER(0)) / 4;
+ /* GICD_ISENABLER0 is RAZ/WI so handled separately */
+ MPASS(n > 0);
+ write_enabler(hypctx, n, true, wval);
+}
+
+/* GICD_ICENABLER */
+static void
+dist_icenabler_read(struct hypctx *hypctx, u_int reg, uint64_t *rval, void *arg)
+{
+ int n;
+
+ n = (reg - GICD_ICENABLER(0)) / 4;
+ /* GICD_ICENABLER0 is RAZ/WI so handled separately */
+ MPASS(n > 0);
+ *rval = read_enabler(hypctx, n);
+}
+
+static void
+dist_icenabler_write(struct hypctx *hypctx, u_int reg, u_int offset, u_int size,
+ uint64_t wval, void *arg)
+{
+ int n;
+
+ MPASS(offset == 0);
+ MPASS(size == 4);
+ n = (reg - GICD_ISENABLER(0)) / 4;
+ /* GICD_ICENABLER0 is RAZ/WI so handled separately */
+ MPASS(n > 0);
+ write_enabler(hypctx, n, false, wval);
+}
+
+/* GICD_ISPENDR */
+static void
+dist_ispendr_read(struct hypctx *hypctx, u_int reg, uint64_t *rval, void *arg)
+{
+ int n;
+
+ n = (reg - GICD_ISPENDR(0)) / 4;
+ /* GICD_ISPENDR0 is RAZ/WI so handled separately */
+ MPASS(n > 0);
+ *rval = read_pendr(hypctx, n);
+}
+
+static void
+dist_ispendr_write(struct hypctx *hypctx, u_int reg, u_int offset, u_int size,
+ uint64_t wval, void *arg)
+{
+ int n;
+
+ MPASS(offset == 0);
+ MPASS(size == 4);
+ n = (reg - GICD_ISPENDR(0)) / 4;
+ /* GICD_ISPENDR0 is RAZ/WI so handled separately */
+ MPASS(n > 0);
+ write_pendr(hypctx, n, true, wval);
+}
+
+/* GICD_ICPENDR */
+static void
+dist_icpendr_read(struct hypctx *hypctx, u_int reg, uint64_t *rval, void *arg)
+{
+ int n;
+
+ n = (reg - GICD_ICPENDR(0)) / 4;
+ /* GICD_ICPENDR0 is RAZ/WI so handled separately */
+ MPASS(n > 0);
+ *rval = read_pendr(hypctx, n);
+}
+
+static void
+dist_icpendr_write(struct hypctx *hypctx, u_int reg, u_int offset, u_int size,
+ uint64_t wval, void *arg)
+{
+ int n;
+
+ MPASS(offset == 0);
+ MPASS(size == 4);
+ n = (reg - GICD_ICPENDR(0)) / 4;
+ /* GICD_ICPENDR0 is RAZ/WI so handled separately */
+ MPASS(n > 0);
+ write_pendr(hypctx, n, false, wval);
+}
+
+/* GICD_ISACTIVER */
+/* Affinity routing is enabled so isactiver0 is RAZ/WI */
+static void
+dist_isactiver_read(struct hypctx *hypctx, u_int reg, uint64_t *rval, void *arg)
+{
+ int n;
+
+ n = (reg - GICD_ISACTIVER(0)) / 4;
+ /* GICD_ISACTIVER0 is RAZ/WI so handled separately */
+ MPASS(n > 0);
+ *rval = read_activer(hypctx, n);
+}
+
+static void
+dist_isactiver_write(struct hypctx *hypctx, u_int reg, u_int offset, u_int size,
+ uint64_t wval, void *arg)
+{
+ int n;
+
+ MPASS(offset == 0);
+ MPASS(size == 4);
+ n = (reg - GICD_ISACTIVER(0)) / 4;
+ /* GICD_ISACTIVE0 is RAZ/WI so handled separately */
+ MPASS(n > 0);
+ write_activer(hypctx, n, true, wval);
+}
+
+/* GICD_ICACTIVER */
+static void
+dist_icactiver_read(struct hypctx *hypctx, u_int reg, uint64_t *rval,
+ void *arg)
+{
+ int n;
+
+ n = (reg - GICD_ICACTIVER(0)) / 4;
+ /* GICD_ICACTIVE0 is RAZ/WI so handled separately */
+ MPASS(n > 0);
+ *rval = read_activer(hypctx, n);
+}
+
+static void
+dist_icactiver_write(struct hypctx *hypctx, u_int reg, u_int offset, u_int size,
+ uint64_t wval, void *arg)
+{
+ int n;
+
+ MPASS(offset == 0);
+ MPASS(size == 4);
+ n = (reg - GICD_ICACTIVER(0)) / 4;
+ /* GICD_ICACTIVE0 is RAZ/WI so handled separately */
+ MPASS(n > 0);
+ write_activer(hypctx, n, false, wval);
+}
+
+/* GICD_IPRIORITYR */
+/* Affinity routing is enabled so ipriorityr0-7 is RAZ/WI */
+static void
+dist_ipriorityr_read(struct hypctx *hypctx, u_int reg, uint64_t *rval,
+ void *arg)
+{
+ int n;
+
+ n = (reg - GICD_IPRIORITYR(0)) / 4;
+ /* GICD_IPRIORITY0-7 is RAZ/WI so handled separately */
+ MPASS(n > 7);
+ *rval = read_priorityr(hypctx, n);
+}
+
+static void
+dist_ipriorityr_write(struct hypctx *hypctx, u_int reg, u_int offset,
+ u_int size, uint64_t wval, void *arg)
+{
+ u_int irq_base;
+
+ irq_base = (reg - GICD_IPRIORITYR(0)) + offset;
+ /* GICD_IPRIORITY0-7 is RAZ/WI so handled separately */
+ MPASS(irq_base > 31);
+ write_priorityr(hypctx, irq_base, size, wval);
+}
+
+/* GICD_ICFGR */
+static void
+dist_icfgr_read(struct hypctx *hypctx, u_int reg, uint64_t *rval, void *arg)
+{
+ int n;
+
+ n = (reg - GICD_ICFGR(0)) / 4;
+ /* GICD_ICFGR0-1 are RAZ/WI so handled separately */
+ MPASS(n > 1);
+ *rval = read_config(hypctx, n);
+}
+
+static void
+dist_icfgr_write(struct hypctx *hypctx, u_int reg, u_int offset, u_int size,
+ uint64_t wval, void *arg)
+{
+ int n;
+
+ MPASS(offset == 0);
+ MPASS(size == 4);
+ n = (reg - GICD_ICFGR(0)) / 4;
+ /* GICD_ICFGR0-1 are RAZ/WI so handled separately */
+ MPASS(n > 1);
+ write_config(hypctx, n, wval);
+}
+
+/* GICD_IROUTER */
+static void
+dist_irouter_read(struct hypctx *hypctx, u_int reg, uint64_t *rval, void *arg)
+{
+ int n;
+
+ n = (reg - GICD_IROUTER(0)) / 8;
+ /* GICD_IROUTER0-31 don't exist */
+ MPASS(n > 31);
+ *rval = read_route(hypctx, n);
+}
+
+static void
+dist_irouter_write(struct hypctx *hypctx, u_int reg, u_int offset, u_int size,
+ uint64_t wval, void *arg)
+{
+ int n;
+
+ n = (reg - GICD_IROUTER(0)) / 8;
+ /* GICD_IROUTER0-31 don't exist */
+ MPASS(n > 31);
+ write_route(hypctx, n, wval, offset, size);
+}
+
+static bool
+vgic_register_read(struct hypctx *hypctx, struct vgic_register *reg_list,
+ u_int reg_list_size, u_int reg, u_int size, uint64_t *rval, void *arg)
+{
+ u_int i, offset;
+
+ for (i = 0; i < reg_list_size; i++) {
+ if (reg_list[i].start <= reg && reg_list[i].end >= reg + size) {
+ offset = reg & (reg_list[i].size - 1);
+ reg -= offset;
+ if ((reg_list[i].flags & size) != 0) {
+ reg_list[i].read(hypctx, reg, rval, NULL);
+
+ /* Move the bits into the correct place */
+ *rval >>= (offset * 8);
+ if (size < 8) {
+ *rval &= (1ul << (size * 8)) - 1;
+ }
+ } else {
+ /*
+ * The access is an invalid size. Section
+ * 12.1.3 "GIC memory-mapped register access"
+ * of the GICv3 and GICv4 spec issue H
+ * (IHI0069) lists the options. For a read
+ * the controller returns unknown data, in
+ * this case it is zero.
+ */
+ *rval = 0;
+ }
+ return (true);
+ }
+ }
+ return (false);
+}
+
+static bool
+vgic_register_write(struct hypctx *hypctx, struct vgic_register *reg_list,
+ u_int reg_list_size, u_int reg, u_int size, uint64_t wval, void *arg)
+{
+ u_int i, offset;
+
+ for (i = 0; i < reg_list_size; i++) {
+ if (reg_list[i].start <= reg && reg_list[i].end >= reg + size) {
+ offset = reg & (reg_list[i].size - 1);
+ reg -= offset;
+ if ((reg_list[i].flags & size) != 0) {
+ reg_list[i].write(hypctx, reg, offset,
+ size, wval, NULL);
+ } else {
+ /*
+ * See the comment in vgic_register_read.
+ * For writes the controller ignores the
+ * operation.
+ */
+ }
+ return (true);
+ }
+ }
+ return (false);
+}
+
+static int
+dist_read(struct vcpu *vcpu, uint64_t fault_ipa, uint64_t *rval,
+ int size, void *arg)
+{
+ struct hyp *hyp;
+ struct hypctx *hypctx;
+ struct vgic_v3 *vgic;
+ uint64_t reg;
+
+ hypctx = vcpu_get_cookie(vcpu);
+ hyp = hypctx->hyp;
+ vgic = hyp->vgic;
+
+ /* Check the register is one of ours and is the correct size */
+ if (fault_ipa < vgic->dist_start || fault_ipa + size > vgic->dist_end) {
+ return (EINVAL);
+ }
+
+ reg = fault_ipa - vgic->dist_start;
+ /*
+ * As described in vgic_register_read an access with an invalid
+ * alignment is read with an unknown value
+ */
+ if ((reg & (size - 1)) != 0) {
+ *rval = 0;
+ return (0);
+ }
+
+ if (vgic_register_read(hypctx, dist_registers, nitems(dist_registers),
+ reg, size, rval, NULL))
+ return (0);
+
+ /* Reserved register addresses are RES0 so we can hardware it to 0 */
+ *rval = 0;
+
+ return (0);
+}
+
+static int
+dist_write(struct vcpu *vcpu, uint64_t fault_ipa, uint64_t wval,
+ int size, void *arg)
+{
+ struct hyp *hyp;
+ struct hypctx *hypctx;
+ struct vgic_v3 *vgic;
+ uint64_t reg;
+
+ hypctx = vcpu_get_cookie(vcpu);
+ hyp = hypctx->hyp;
+ vgic = hyp->vgic;
+
+ /* Check the register is one of ours and is the correct size */
+ if (fault_ipa < vgic->dist_start || fault_ipa + size > vgic->dist_end) {
+ return (EINVAL);
+ }
+
+ reg = fault_ipa - vgic->dist_start;
+ /*
+ * As described in vgic_register_read an access with an invalid
+ * alignment is write ignored.
+ */
+ if ((reg & (size - 1)) != 0)
+ return (0);
+
+ if (vgic_register_write(hypctx, dist_registers, nitems(dist_registers),
+ reg, size, wval, NULL))
+ return (0);
+
+ /* Reserved register addresses are RES0 so we can ignore the write */
+ return (0);
+}
+
+/*
+ * Redistributor register handlers.
+ *
+ * RD_base:
+ */
+/* GICR_CTLR */
+static void
+redist_ctlr_read(struct hypctx *hypctx, u_int reg, uint64_t *rval, void *arg)
+{
+ /* LPIs not supported */
+ *rval = 0;
+}
+
+/* GICR_IIDR */
+static void
+redist_iidr_read(struct hypctx *hypctx, u_int reg, uint64_t *rval, void *arg)
+{
+ *rval = VGIC_IIDR;
+}
+
+/* GICR_TYPER */
+static void
+redist_typer_read(struct hypctx *hypctx, u_int reg, uint64_t *rval, void *arg)
+{
+ uint64_t aff, gicr_typer, vmpidr_el2;
+ bool last_vcpu;
+
+ last_vcpu = false;
+ if (vcpu_vcpuid(hypctx->vcpu) == (vgic_max_cpu_count(hypctx->hyp) - 1))
+ last_vcpu = true;
+
+ vmpidr_el2 = hypctx->vmpidr_el2;
+ MPASS(vmpidr_el2 != 0);
+ /*
+ * Get affinity for the current CPU. The guest CPU affinity is taken
+ * from VMPIDR_EL2. The Redistributor corresponding to this CPU is
+ * the Redistributor with the same affinity from GICR_TYPER.
+ */
+ aff = (CPU_AFF3(vmpidr_el2) << 24) | (CPU_AFF2(vmpidr_el2) << 16) |
+ (CPU_AFF1(vmpidr_el2) << 8) | CPU_AFF0(vmpidr_el2);
+
+ /* Set up GICR_TYPER. */
+ gicr_typer = aff << GICR_TYPER_AFF_SHIFT;
+ /* Set the vcpu as the processsor ID */
+ gicr_typer |=
+ (uint64_t)vcpu_vcpuid(hypctx->vcpu) << GICR_TYPER_CPUNUM_SHIFT;
+
+ if (last_vcpu)
+ /* Mark the last Redistributor */
+ gicr_typer |= GICR_TYPER_LAST;
+
+ *rval = gicr_typer;
+}
+
+/*
+ * SGI_base:
+ */
+/* GICR_ISENABLER0 */
+static void
+redist_ienabler0_read(struct hypctx *hypctx, u_int reg, uint64_t *rval,
+ void *arg)
+{
+ *rval = read_enabler(hypctx, 0);
+}
+
+static void
+redist_isenabler0_write(struct hypctx *hypctx, u_int reg, u_int offset,
+ u_int size, uint64_t wval, void *arg)
+{
+ MPASS(offset == 0);
+ MPASS(size == 4);
+ write_enabler(hypctx, 0, true, wval);
+}
+
+/* GICR_ICENABLER0 */
+static void
+redist_icenabler0_write(struct hypctx *hypctx, u_int reg, u_int offset,
+ u_int size, uint64_t wval, void *arg)
+{
+ MPASS(offset == 0);
+ MPASS(size == 4);
+ write_enabler(hypctx, 0, false, wval);
+}
+
+/* GICR_ISPENDR0 */
+static void
+redist_ipendr0_read(struct hypctx *hypctx, u_int reg, uint64_t *rval,
+ void *arg)
+{
+ *rval = read_pendr(hypctx, 0);
+}
+
+static void
+redist_ispendr0_write(struct hypctx *hypctx, u_int reg, u_int offset,
+ u_int size, uint64_t wval, void *arg)
+{
+ MPASS(offset == 0);
+ MPASS(size == 4);
+ write_pendr(hypctx, 0, true, wval);
+}
+
+/* GICR_ICPENDR0 */
+static void
+redist_icpendr0_write(struct hypctx *hypctx, u_int reg, u_int offset,
+ u_int size, uint64_t wval, void *arg)
+{
+ MPASS(offset == 0);
+ MPASS(size == 4);
+ write_pendr(hypctx, 0, false, wval);
+}
+
+/* GICR_ISACTIVER0 */
+static void
+redist_iactiver0_read(struct hypctx *hypctx, u_int reg, uint64_t *rval,
+ void *arg)
+{
+ *rval = read_activer(hypctx, 0);
+}
+
+static void
+redist_isactiver0_write(struct hypctx *hypctx, u_int reg, u_int offset,
+ u_int size, uint64_t wval, void *arg)
+{
+ write_activer(hypctx, 0, true, wval);
+}
+
+/* GICR_ICACTIVER0 */
+static void
+redist_icactiver0_write(struct hypctx *hypctx, u_int reg, u_int offset,
+ u_int size, uint64_t wval, void *arg)
+{
+ write_activer(hypctx, 0, false, wval);
+}
+
+/* GICR_IPRIORITYR */
+static void
+redist_ipriorityr_read(struct hypctx *hypctx, u_int reg, uint64_t *rval,
+ void *arg)
+{
+ int n;
+
+ n = (reg - GICR_IPRIORITYR(0)) / 4;
+ *rval = read_priorityr(hypctx, n);
+}
+
+static void
+redist_ipriorityr_write(struct hypctx *hypctx, u_int reg, u_int offset,
+ u_int size, uint64_t wval, void *arg)
+{
+ u_int irq_base;
+
+ irq_base = (reg - GICR_IPRIORITYR(0)) + offset;
+ write_priorityr(hypctx, irq_base, size, wval);
+}
+
+/* GICR_ICFGR1 */
+static void
+redist_icfgr1_read(struct hypctx *hypctx, u_int reg, uint64_t *rval, void *arg)
+{
+ *rval = read_config(hypctx, 1);
+}
+
+static void
+redist_icfgr1_write(struct hypctx *hypctx, u_int reg, u_int offset, u_int size,
+ uint64_t wval, void *arg)
+{
+ MPASS(offset == 0);
+ MPASS(size == 4);
+ write_config(hypctx, 1, wval);
+}
+
+static int
+redist_read(struct vcpu *vcpu, uint64_t fault_ipa, uint64_t *rval,
+ int size, void *arg)
+{
+ struct hyp *hyp;
+ struct hypctx *hypctx, *target_hypctx;
+ struct vgic_v3 *vgic;
+ uint64_t reg;
+ int vcpuid;
+
+ /* Find the current vcpu ctx to get the vgic struct */
+ hypctx = vcpu_get_cookie(vcpu);
+ hyp = hypctx->hyp;
+ vgic = hyp->vgic;
+
+ /* Check the register is one of ours and is the correct size */
+ if (fault_ipa < vgic->redist_start ||
+ fault_ipa + size > vgic->redist_end) {
+ return (EINVAL);
+ }
+
+ vcpuid = (fault_ipa - vgic->redist_start) /
+ (GICR_RD_BASE_SIZE + GICR_SGI_BASE_SIZE);
+ if (vcpuid >= vm_get_maxcpus(hyp->vm)) {
+ /*
+ * This should never happen, but lets be defensive so if it
+ * does we don't panic a non-INVARIANTS kernel.
+ */
+#ifdef INVARIANTS
+ panic("%s: Invalid vcpuid %d", __func__, vcpuid);
+#else
+ *rval = 0;
+ return (0);
+#endif
+ }
+
+ /* Find the target vcpu ctx for the access */
+ target_hypctx = hyp->ctx[vcpuid];
+ if (target_hypctx == NULL) {
+ /*
+ * The CPU has not yet started. The redistributor and CPU are
+ * in the same power domain. As such the redistributor will
+ * also be powered down so any access will raise an external
+ * abort.
+ */
+ raise_data_insn_abort(hypctx, fault_ipa, true,
+ ISS_DATA_DFSC_EXT);
+ return (0);
+ }
+
+ reg = (fault_ipa - vgic->redist_start) %
+ (GICR_RD_BASE_SIZE + GICR_SGI_BASE_SIZE);
+
+ /*
+ * As described in vgic_register_read an access with an invalid
+ * alignment is read with an unknown value
+ */
+ if ((reg & (size - 1)) != 0) {
+ *rval = 0;
+ return (0);
+ }
+
+ if (reg < GICR_RD_BASE_SIZE) {
+ if (vgic_register_read(target_hypctx, redist_rd_registers,
+ nitems(redist_rd_registers), reg, size, rval, NULL))
+ return (0);
+ } else if (reg < (GICR_SGI_BASE + GICR_SGI_BASE_SIZE)) {
+ if (vgic_register_read(target_hypctx, redist_sgi_registers,
+ nitems(redist_sgi_registers), reg - GICR_SGI_BASE, size,
+ rval, NULL))
+ return (0);
+ }
+
+ /* Reserved register addresses are RES0 so we can hardware it to 0 */
+ *rval = 0;
+ return (0);
+}
+
+static int
+redist_write(struct vcpu *vcpu, uint64_t fault_ipa, uint64_t wval,
+ int size, void *arg)
+{
+ struct hyp *hyp;
+ struct hypctx *hypctx, *target_hypctx;
+ struct vgic_v3 *vgic;
+ uint64_t reg;
+ int vcpuid;
+
+ /* Find the current vcpu ctx to get the vgic struct */
+ hypctx = vcpu_get_cookie(vcpu);
+ hyp = hypctx->hyp;
+ vgic = hyp->vgic;
+
+ /* Check the register is one of ours and is the correct size */
+ if (fault_ipa < vgic->redist_start ||
+ fault_ipa + size > vgic->redist_end) {
+ return (EINVAL);
+ }
+
+ vcpuid = (fault_ipa - vgic->redist_start) /
+ (GICR_RD_BASE_SIZE + GICR_SGI_BASE_SIZE);
+ if (vcpuid >= vm_get_maxcpus(hyp->vm)) {
+ /*
+ * This should never happen, but lets be defensive so if it
+ * does we don't panic a non-INVARIANTS kernel.
+ */
+#ifdef INVARIANTS
+ panic("%s: Invalid vcpuid %d", __func__, vcpuid);
+#else
+ return (0);
+#endif
+ }
+
+ /* Find the target vcpu ctx for the access */
+ target_hypctx = hyp->ctx[vcpuid];
+ if (target_hypctx == NULL) {
+ /*
+ * The CPU has not yet started. The redistributor and CPU are
+ * in the same power domain. As such the redistributor will
+ * also be powered down so any access will raise an external
+ * abort.
+ */
+ raise_data_insn_abort(hypctx, fault_ipa, true,
+ ISS_DATA_DFSC_EXT);
+ return (0);
+ }
+
+ reg = (fault_ipa - vgic->redist_start) %
+ (GICR_RD_BASE_SIZE + GICR_SGI_BASE_SIZE);
+
+ /*
+ * As described in vgic_register_read an access with an invalid
+ * alignment is write ignored.
+ */
+ if ((reg & (size - 1)) != 0)
+ return (0);
+
+ if (reg < GICR_RD_BASE_SIZE) {
+ if (vgic_register_write(target_hypctx, redist_rd_registers,
+ nitems(redist_rd_registers), reg, size, wval, NULL))
+ return (0);
+ } else if (reg < (GICR_SGI_BASE + GICR_SGI_BASE_SIZE)) {
+ if (vgic_register_write(target_hypctx, redist_sgi_registers,
+ nitems(redist_sgi_registers), reg - GICR_SGI_BASE, size,
+ wval, NULL))
+ return (0);
+ }
+
+ /* Reserved register addresses are RES0 so we can ignore the write */
+ return (0);
+}
+
+static int
+vgic_v3_icc_sgi1r_read(struct vcpu *vcpu, uint64_t *rval, void *arg)
+{
+ /*
+ * TODO: Inject an unknown exception.
+ */
+ *rval = 0;
+ return (0);
+}
+
+static int
+vgic_v3_icc_sgi1r_write(struct vcpu *vcpu, uint64_t rval, void *arg)
+{
+ struct vm *vm;
+ struct hyp *hyp;
+ cpuset_t active_cpus;
+ uint64_t mpidr, aff1, aff2, aff3;
+ uint32_t irqid;
+ int cpus, cpu_off, target_vcpuid, vcpuid;
+
+ vm = vcpu_vm(vcpu);
+ hyp = vm_get_cookie(vm);
+ active_cpus = vm_active_cpus(vm);
+ vcpuid = vcpu_vcpuid(vcpu);
+
+ irqid = ICC_SGI1R_EL1_SGIID_VAL(rval) >> ICC_SGI1R_EL1_SGIID_SHIFT;
+ if ((rval & ICC_SGI1R_EL1_IRM) == 0) {
+ /* Non-zero points at no vcpus */
+ if (ICC_SGI1R_EL1_RS_VAL(rval) != 0)
+ return (0);
+
+ aff1 = ICC_SGI1R_EL1_AFF1_VAL(rval) >> ICC_SGI1R_EL1_AFF1_SHIFT;
+ aff2 = ICC_SGI1R_EL1_AFF2_VAL(rval) >> ICC_SGI1R_EL1_AFF2_SHIFT;
+ aff3 = ICC_SGI1R_EL1_AFF3_VAL(rval) >> ICC_SGI1R_EL1_AFF3_SHIFT;
+ mpidr = aff3 << MPIDR_AFF3_SHIFT |
+ aff2 << MPIDR_AFF2_SHIFT | aff1 << MPIDR_AFF1_SHIFT;
+
+ cpus = ICC_SGI1R_EL1_TL_VAL(rval) >> ICC_SGI1R_EL1_TL_SHIFT;
+ cpu_off = 0;
+ while (cpus > 0) {
+ if (cpus & 1) {
+ target_vcpuid = mpidr_to_vcpu(hyp,
+ mpidr | (cpu_off << MPIDR_AFF0_SHIFT));
+ if (target_vcpuid >= 0 &&
+ CPU_ISSET(target_vcpuid, &active_cpus)) {
+ INJECT_IRQ(hyp, target_vcpuid, irqid,
+ true);
+ }
+ }
+ cpu_off++;
+ cpus >>= 1;
+ }
+ } else {
+ /* Send an IPI to all CPUs other than the current CPU */
+ for (target_vcpuid = 0; target_vcpuid < vm_get_maxcpus(vm);
+ target_vcpuid++) {
+ if (CPU_ISSET(target_vcpuid, &active_cpus) &&
+ target_vcpuid != vcpuid) {
+ INJECT_IRQ(hyp, target_vcpuid, irqid, true);
+ }
+ }
+ }
+
+ return (0);
+}
+
+static void
+vgic_v3_mmio_init(struct hyp *hyp)
+{
+ struct vgic_v3 *vgic;
+ struct vgic_v3_irq *irq;
+ int i;
+
+ /* Allocate memory for the SPIs */
+ vgic = hyp->vgic;
+ vgic->irqs = malloc((VGIC_NIRQS - VGIC_PRV_I_NUM) *
+ sizeof(*vgic->irqs), M_VGIC_V3, M_WAITOK | M_ZERO);
+
+ for (i = 0; i < VGIC_NIRQS - VGIC_PRV_I_NUM; i++) {
+ irq = &vgic->irqs[i];
+
+ mtx_init(&irq->irq_spinmtx, "VGIC IRQ spinlock", NULL,
+ MTX_SPIN);
+
+ irq->irq = i + VGIC_PRV_I_NUM;
+ }
+}
+
+static void
+vgic_v3_mmio_destroy(struct hyp *hyp)
+{
+ struct vgic_v3 *vgic;
+ struct vgic_v3_irq *irq;
+ int i;
+
+ vgic = hyp->vgic;
+ for (i = 0; i < VGIC_NIRQS - VGIC_PRV_I_NUM; i++) {
+ irq = &vgic->irqs[i];
+
+ mtx_destroy(&irq->irq_spinmtx);
+ }
+
+ free(vgic->irqs, M_VGIC_V3);
+}
+
+static int
+vgic_v3_attach_to_vm(device_t dev, struct hyp *hyp, struct vm_vgic_descr *descr)
+{
+ struct vm *vm;
+ struct vgic_v3 *vgic;
+ size_t cpu_count;
+
+ if (descr->ver.version != 3)
+ return (EINVAL);
+
+ /*
+ * The register bases need to be 64k aligned
+ * The redist register space is the RD + SGI size
+ */
+ if (!__is_aligned(descr->v3_regs.dist_start, PAGE_SIZE_64K) ||
+ !__is_aligned(descr->v3_regs.redist_start, PAGE_SIZE_64K) ||
+ !__is_aligned(descr->v3_regs.redist_size,
+ GICR_RD_BASE_SIZE + GICR_SGI_BASE_SIZE))
+ return (EINVAL);
+
+ /* The dist register space is 1 64k block */
+ if (descr->v3_regs.dist_size != PAGE_SIZE_64K)
+ return (EINVAL);
+
+ vm = hyp->vm;
+
+ /*
+ * Return an error if the redist space is too large for the maximum
+ * number of CPUs we support.
+ */
+ cpu_count = descr->v3_regs.redist_size /
+ (GICR_RD_BASE_SIZE + GICR_SGI_BASE_SIZE);
+ if (cpu_count > vm_get_maxcpus(vm))
+ return (EINVAL);
+
+ vgic = hyp->vgic;
+
+ /* Set the distributor address and size for trapping guest access. */
+ vgic->dist_start = descr->v3_regs.dist_start;
+ vgic->dist_end = descr->v3_regs.dist_start + descr->v3_regs.dist_size;
+
+ vgic->redist_start = descr->v3_regs.redist_start;
+ vgic->redist_end = descr->v3_regs.redist_start +
+ descr->v3_regs.redist_size;
+
+ vm_register_inst_handler(vm, descr->v3_regs.dist_start,
+ descr->v3_regs.dist_size, dist_read, dist_write);
+ vm_register_inst_handler(vm, descr->v3_regs.redist_start,
+ descr->v3_regs.redist_size, redist_read, redist_write);
+
+ vm_register_reg_handler(vm, ISS_MSR_REG(ICC_SGI1R_EL1),
+ ISS_MSR_REG_MASK, vgic_v3_icc_sgi1r_read, vgic_v3_icc_sgi1r_write,
+ NULL);
+
+ vgic_v3_mmio_init(hyp);
+
+ hyp->vgic_attached = true;
+
+ return (0);
+}
+
+static void
+vgic_v3_detach_from_vm(device_t dev, struct hyp *hyp)
+{
+ if (hyp->vgic_attached) {
+ hyp->vgic_attached = false;
+ vgic_v3_mmio_destroy(hyp);
+ }
+}
+
+static struct vgic_v3_irq *
+vgic_v3_get_irq(struct hyp *hyp, int vcpuid, uint32_t irqid)
+{
+ struct vgic_v3_cpu *vgic_cpu;
+ struct vgic_v3_irq *irq;
+ struct hypctx *hypctx;
+
+ if (irqid < VGIC_PRV_I_NUM) {
+ if (vcpuid < 0 || vcpuid >= vm_get_maxcpus(hyp->vm))
+ return (NULL);
+ hypctx = hyp->ctx[vcpuid];
+ if (hypctx == NULL)
+ return (NULL);
+ vgic_cpu = hypctx->vgic_cpu;
+ irq = &vgic_cpu->private_irqs[irqid];
+ } else if (irqid <= GIC_LAST_SPI) {
+ irqid -= VGIC_PRV_I_NUM;
+ if (irqid >= VGIC_NIRQS)
+ return (NULL);
+ irq = &hyp->vgic->irqs[irqid];
+ } else if (irqid < GIC_FIRST_LPI) {
+ return (NULL);
+ } else {
+ /* No support for LPIs */
+ return (NULL);
+ }
+
+ mtx_lock_spin(&irq->irq_spinmtx);
+ return (irq);
+}
+
+static void
+vgic_v3_release_irq(struct vgic_v3_irq *irq)
+{
+
+ mtx_unlock_spin(&irq->irq_spinmtx);
+}
+
+static bool
+vgic_v3_has_pending_irq(device_t dev, struct hypctx *hypctx)
+{
+ struct vgic_v3_cpu *vgic_cpu;
+ bool empty;
+
+ vgic_cpu = hypctx->vgic_cpu;
+ mtx_lock_spin(&vgic_cpu->lr_mtx);
+ empty = TAILQ_EMPTY(&vgic_cpu->irq_act_pend);
+ mtx_unlock_spin(&vgic_cpu->lr_mtx);
+
+ return (!empty);
+}
+
+static bool
+vgic_v3_check_irq(struct vgic_v3_irq *irq, bool level)
+{
+ /*
+ * Only inject if:
+ * - Level-triggered IRQ: level changes low -> high
+ * - Edge-triggered IRQ: level is high
+ */
+ switch (irq->config & VGIC_CONFIG_MASK) {
+ case VGIC_CONFIG_LEVEL:
+ return (level != irq->level);
+ case VGIC_CONFIG_EDGE:
+ return (level);
+ default:
+ break;
+ }
+
+ return (false);
+}
+
+static int
+vgic_v3_inject_irq(device_t dev, struct hyp *hyp, int vcpuid, uint32_t irqid,
+ bool level)
+{
+ struct vgic_v3_cpu *vgic_cpu;
+ struct vgic_v3_irq *irq;
+ struct hypctx *hypctx;
+ int target_vcpu;
+ bool notify;
+
+ if (!hyp->vgic_attached)
+ return (ENODEV);
+
+ KASSERT(vcpuid == -1 || irqid < VGIC_PRV_I_NUM,
+ ("%s: SPI/LPI with vcpuid set: irq %u vcpuid %u", __func__, irqid,
+ vcpuid));
+
+ irq = vgic_v3_get_irq(hyp, vcpuid, irqid);
+ if (irq == NULL) {
+ eprintf("Malformed IRQ %u.\n", irqid);
+ return (EINVAL);
+ }
+
+ target_vcpu = irq->target_vcpu;
+ KASSERT(vcpuid == -1 || vcpuid == target_vcpu,
+ ("%s: Interrupt %u has bad cpu affinity: vcpu %d target vcpu %d",
+ __func__, irqid, vcpuid, target_vcpu));
+ KASSERT(target_vcpu >= 0 && target_vcpu < vm_get_maxcpus(hyp->vm),
+ ("%s: Interrupt %u sent to invalid vcpu %d", __func__, irqid,
+ target_vcpu));
+
+ if (vcpuid == -1)
+ vcpuid = target_vcpu;
+ /* TODO: Check from 0 to vm->maxcpus */
+ if (vcpuid < 0 || vcpuid >= vm_get_maxcpus(hyp->vm)) {
+ vgic_v3_release_irq(irq);
+ return (EINVAL);
+ }
+
+ hypctx = hyp->ctx[vcpuid];
+ if (hypctx == NULL) {
+ vgic_v3_release_irq(irq);
+ return (EINVAL);
+ }
+
+ notify = false;
+ vgic_cpu = hypctx->vgic_cpu;
+
+ mtx_lock_spin(&vgic_cpu->lr_mtx);
+
+ if (!vgic_v3_check_irq(irq, level)) {
+ goto out;
+ }
+
+ if ((irq->config & VGIC_CONFIG_MASK) == VGIC_CONFIG_LEVEL)
+ irq->level = level;
+ else /* VGIC_CONFIG_EDGE */
+ irq->pending = true;
+
+ notify = vgic_v3_queue_irq(hyp, vgic_cpu, vcpuid, irq);
+
+out:
+ mtx_unlock_spin(&vgic_cpu->lr_mtx);
+ vgic_v3_release_irq(irq);
+
+ if (notify)
+ vcpu_notify_event(vm_vcpu(hyp->vm, vcpuid));
+
+ return (0);
+}
+
+static int
+vgic_v3_inject_msi(device_t dev, struct hyp *hyp, uint64_t msg, uint64_t addr)
+{
+ struct vgic_v3 *vgic;
+ uint64_t reg;
+
+ vgic = hyp->vgic;
+
+ /* This is a 4 byte register */
+ if (addr < vgic->dist_start || addr + 4 > vgic->dist_end) {
+ return (EINVAL);
+ }
+
+ reg = addr - vgic->dist_start;
+ if (reg != GICD_SETSPI_NSR)
+ return (EINVAL);
+
+ return (INJECT_IRQ(hyp, -1, msg, true));
+}
+
+static void
+vgic_v3_flush_hwstate(device_t dev, struct hypctx *hypctx)
+{
+ struct vgic_v3_cpu *vgic_cpu;
+ struct vgic_v3_irq *irq;
+ int i;
+
+ vgic_cpu = hypctx->vgic_cpu;
+
+ /*
+ * All Distributor writes have been executed at this point, do not
+ * protect Distributor reads with a mutex.
+ *
+ * This is callled with all interrupts disabled, so there is no need for
+ * a List Register spinlock either.
+ */
+ mtx_lock_spin(&vgic_cpu->lr_mtx);
+
+ hypctx->vgic_v3_regs.ich_hcr_el2 &= ~ICH_HCR_EL2_UIE;
+
+ /* Exit early if there are no buffered interrupts */
+ if (TAILQ_EMPTY(&vgic_cpu->irq_act_pend))
+ goto out;
+
+ KASSERT(vgic_cpu->ich_lr_used == 0, ("%s: Used LR count not zero %u",
+ __func__, vgic_cpu->ich_lr_used));
+
+ i = 0;
+ hypctx->vgic_v3_regs.ich_elrsr_el2 =
+ (1u << hypctx->vgic_v3_regs.ich_lr_num) - 1;
+ TAILQ_FOREACH(irq, &vgic_cpu->irq_act_pend, act_pend_list) {
+ /* No free list register, stop searching for IRQs */
+ if (i == hypctx->vgic_v3_regs.ich_lr_num)
+ break;
+
+ if (!irq->enabled)
+ continue;
+
+ hypctx->vgic_v3_regs.ich_lr_el2[i] = ICH_LR_EL2_GROUP1 |
+ ((uint64_t)irq->priority << ICH_LR_EL2_PRIO_SHIFT) |
+ irq->irq;
+
+ if (irq->active) {
+ hypctx->vgic_v3_regs.ich_lr_el2[i] |=
+ ICH_LR_EL2_STATE_ACTIVE;
+ }
+
+#ifdef notyet
+ /* TODO: Check why this is needed */
+ if ((irq->config & _MASK) == LEVEL)
+ hypctx->vgic_v3_regs.ich_lr_el2[i] |= ICH_LR_EL2_EOI;
+#endif
+
+ if (!irq->active && vgic_v3_irq_pending(irq)) {
+ hypctx->vgic_v3_regs.ich_lr_el2[i] |=
+ ICH_LR_EL2_STATE_PENDING;
+
+ /*
+ * This IRQ is now pending on the guest. Allow for
+ * another edge that could cause the interrupt to
+ * be raised again.
+ */
+ if ((irq->config & VGIC_CONFIG_MASK) ==
+ VGIC_CONFIG_EDGE) {
+ irq->pending = false;
+ }
+ }
+
+ i++;
+ }
+ vgic_cpu->ich_lr_used = i;
+
+out:
+ mtx_unlock_spin(&vgic_cpu->lr_mtx);
+}
+
+static void
+vgic_v3_sync_hwstate(device_t dev, struct hypctx *hypctx)
+{
+ struct vgic_v3_cpu *vgic_cpu;
+ struct vgic_v3_irq *irq;
+ uint64_t lr;
+ int i;
+
+ vgic_cpu = hypctx->vgic_cpu;
+
+ /* Exit early if there are no buffered interrupts */
+ if (vgic_cpu->ich_lr_used == 0)
+ return;
+
+ /*
+ * Check on the IRQ state after running the guest. ich_lr_used and
+ * ich_lr_el2 are only ever used within this thread so is safe to
+ * access unlocked.
+ */
+ for (i = 0; i < vgic_cpu->ich_lr_used; i++) {
+ lr = hypctx->vgic_v3_regs.ich_lr_el2[i];
+ hypctx->vgic_v3_regs.ich_lr_el2[i] = 0;
+
+ irq = vgic_v3_get_irq(hypctx->hyp, vcpu_vcpuid(hypctx->vcpu),
+ ICH_LR_EL2_VINTID(lr));
+ if (irq == NULL)
+ continue;
+
+ irq->active = (lr & ICH_LR_EL2_STATE_ACTIVE) != 0;
+
+ if ((irq->config & VGIC_CONFIG_MASK) == VGIC_CONFIG_EDGE) {
+ /*
+ * If we have an edge triggered IRQ preserve the
+ * pending bit until the IRQ has been handled.
+ */
+ if ((lr & ICH_LR_EL2_STATE_PENDING) != 0) {
+ irq->pending = true;
+ }
+ } else {
+ /*
+ * If we have a level triggerend IRQ remove the
+ * pending bit if the IRQ has been handled.
+ * The level is separate, so may still be high
+ * triggering another IRQ.
+ */
+ if ((lr & ICH_LR_EL2_STATE_PENDING) == 0) {
+ irq->pending = false;
+ }
+ }
+
+ /* Lock to update irq_act_pend */
+ mtx_lock_spin(&vgic_cpu->lr_mtx);
+ if (irq->active) {
+ /* Ensure the active IRQ is at the head of the list */
+ TAILQ_REMOVE(&vgic_cpu->irq_act_pend, irq,
+ act_pend_list);
+ TAILQ_INSERT_HEAD(&vgic_cpu->irq_act_pend, irq,
+ act_pend_list);
+ } else if (!vgic_v3_irq_pending(irq)) {
+ /* If pending or active remove from the list */
+ TAILQ_REMOVE(&vgic_cpu->irq_act_pend, irq,
+ act_pend_list);
+ irq->on_aplist = false;
+ }
+ mtx_unlock_spin(&vgic_cpu->lr_mtx);
+ vgic_v3_release_irq(irq);
+ }
+
+ hypctx->vgic_v3_regs.ich_hcr_el2 &= ~ICH_HCR_EL2_EOICOUNT_MASK;
+ vgic_cpu->ich_lr_used = 0;
+}
+
+static void
+vgic_v3_init(device_t dev)
+{
+ uint64_t ich_vtr_el2;
+ uint32_t pribits, prebits;
+
+ ich_vtr_el2 = vmm_call_hyp(HYP_READ_REGISTER, HYP_REG_ICH_VTR);
+
+ /* TODO: These fields are common with the vgicv2 driver */
+ pribits = ICH_VTR_EL2_PRIBITS(ich_vtr_el2);
+ switch (pribits) {
+ default:
+ case 5:
+ virt_features.min_prio = 0xf8;
+ break;
+ case 6:
+ virt_features.min_prio = 0xfc;
+ break;
+ case 7:
+ virt_features.min_prio = 0xfe;
+ break;
+ case 8:
+ virt_features.min_prio = 0xff;
+ break;
+ }
+
+ prebits = ICH_VTR_EL2_PREBITS(ich_vtr_el2);
+ switch (prebits) {
+ default:
+ case 5:
+ virt_features.ich_apr_num = 1;
+ break;
+ case 6:
+ virt_features.ich_apr_num = 2;
+ break;
+ case 7:
+ virt_features.ich_apr_num = 4;
+ break;
+ }
+
+ virt_features.ich_lr_num = ICH_VTR_EL2_LISTREGS(ich_vtr_el2);
+}
+
+static int
+vgic_v3_probe(device_t dev)
+{
+ if (!gic_get_vgic(dev))
+ return (EINVAL);
+
+ /* We currently only support the GICv3 */
+ if (gic_get_hw_rev(dev) < 3)
+ return (EINVAL);
+
+ device_set_desc(dev, "Virtual GIC v3");
+ return (BUS_PROBE_DEFAULT);
+}
+
+static int
+vgic_v3_attach(device_t dev)
+{
+ vgic_dev = dev;
+ return (0);
+}
+
+static int
+vgic_v3_detach(device_t dev)
+{
+ vgic_dev = NULL;
+ return (0);
+}
+
+static device_method_t vgic_v3_methods[] = {
+ /* Device interface */
+ DEVMETHOD(device_probe, vgic_v3_probe),
+ DEVMETHOD(device_attach, vgic_v3_attach),
+ DEVMETHOD(device_detach, vgic_v3_detach),
+
+ /* VGIC interface */
+ DEVMETHOD(vgic_init, vgic_v3_init),
+ DEVMETHOD(vgic_attach_to_vm, vgic_v3_attach_to_vm),
+ DEVMETHOD(vgic_detach_from_vm, vgic_v3_detach_from_vm),
+ DEVMETHOD(vgic_vminit, vgic_v3_vminit),
+ DEVMETHOD(vgic_cpuinit, vgic_v3_cpuinit),
+ DEVMETHOD(vgic_cpucleanup, vgic_v3_cpucleanup),
+ DEVMETHOD(vgic_vmcleanup, vgic_v3_vmcleanup),
+ DEVMETHOD(vgic_max_cpu_count, vgic_v3_max_cpu_count),
+ DEVMETHOD(vgic_has_pending_irq, vgic_v3_has_pending_irq),
+ DEVMETHOD(vgic_inject_irq, vgic_v3_inject_irq),
+ DEVMETHOD(vgic_inject_msi, vgic_v3_inject_msi),
+ DEVMETHOD(vgic_flush_hwstate, vgic_v3_flush_hwstate),
+ DEVMETHOD(vgic_sync_hwstate, vgic_v3_sync_hwstate),
+
+ /* End */
+ DEVMETHOD_END
+};
+
+/* TODO: Create a vgic base class? */
+DEFINE_CLASS_0(vgic, vgic_v3_driver, vgic_v3_methods, 0);
+
+DRIVER_MODULE(vgic_v3, gic, vgic_v3_driver, 0, 0);
diff --git a/sys/arm64/vmm/io/vgic_v3_reg.h b/sys/arm64/vmm/io/vgic_v3_reg.h
new file mode 100644
--- /dev/null
+++ b/sys/arm64/vmm/io/vgic_v3_reg.h
@@ -0,0 +1,129 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (C) 2018 The FreeBSD Foundation
+ *
+ * This software was developed by Alexandru Elisei under sponsorship
+ * from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _VGIC_V3_REG_H_
+#define _VGIC_V3_REG_H_
+
+/* Interrupt Controller End of Interrupt Status Register */
+#define ICH_EISR_EL2_STATUS_MASK 0xffff
+#define ICH_EISR_EL2_EOI_NOT_HANDLED(lr) ((1 << lr) & ICH_EISR_EL2_STATUS_MASK)
+
+/* Interrupt Controller Empty List Register Status Register */
+#define ICH_ELSR_EL2_STATUS_MASK 0xffff
+#define ICH_ELSR_EL2_LR_EMPTY(x) ((1 << x) & ICH_ELSR_EL2_STATUS_MASK)
+
+/* Interrupt Controller Hyp Control Register */
+#define ICH_HCR_EL2_EOICOUNT_SHIFT 27
+#define ICH_HCR_EL2_EOICOUNT_MASK (0x1f << ICH_HCR_EL2_EOICOUNT_SHIFT)
+#define ICH_HCR_EL2_TDIR (1 << 14) /* Trap non-secure EL1 writes to IC{C, V}_DIR_EL1 */
+#define ICH_HCR_EL2_TSEI (1 << 14) /* Trap System Error Interupts (SEI) to EL2 */
+#define ICH_HCR_EL2_TALL1 (1 << 12) /* Trap non-secure EL1 accesses to IC{C, V}_* for Group 1 interrupts */
+#define ICH_HCR_EL2_TALL0 (1 << 11) /* Trap non-secure EL1 accesses to IC{C, V}_* for Group 0 interrupts */
+#define ICH_HCR_EL2_TC (1 << 10) /* Trap non-secure EL1 accesses to common IC{C, V}_* registers */
+#define ICH_HCR_EL2_VGRP1DIE (1 << 7) /* VM Group 1 Disabled Interrupt Enable */
+#define ICH_HCR_EL2_VGRP1EIE (1 << 6) /* VM Group 1 Enabled Interrupt Enable */
+#define ICH_HCR_EL2_VGRP0DIE (1 << 5) /* VM Group 0 Disabled Interrupt Enable */
+#define ICH_HCR_EL2_VGRP0EIE (1 << 4) /* VM Group 0 Enabled Interrupt Enable */
+#define ICH_HCR_EL2_NPIE (1 << 3) /* No Pending Interrupt Enable */
+#define ICH_HCR_EL2_LRENPIE (1 << 2) /* List Register Entry Not Present Interrupt Enable */
+#define ICH_HCR_EL2_UIE (1 << 1) /* Underflow Interrupt Enable */
+#define ICH_HCR_EL2_En (1 << 0) /* Global enable for the virtual CPU interface */
+
+/* Interrupt Controller List Registers */
+#define ICH_LR_EL2_VINTID_MASK 0xffffffff
+#define ICH_LR_EL2_VINTID(x) ((x) & ICH_LR_EL2_VINTID_MASK)
+#define ICH_LR_EL2_PINTID_SHIFT 32
+#define ICH_LR_EL2_PINTID_MASK (0x3fUL << ICH_LR_EL2_PINTID_SHIFT)
+/* Raise a maintanance IRQ when deactivated (only non-HW virqs) */
+#define ICH_LR_EL2_EOI (1UL << 41)
+#define ICH_LR_EL2_PRIO_SHIFT 48
+#define ICH_LR_EL2_PRIO_MASK (0xffUL << ICH_LR_EL2_PRIO_SHIFT)
+#define ICH_LR_EL2_GROUP_SHIFT 60
+#define ICH_LR_EL2_GROUP1 (1UL << ICH_LR_EL2_GROUP_SHIFT)
+#define ICH_LR_EL2_HW (1UL << 61)
+#define ICH_LR_EL2_STATE_SHIFT 62
+#define ICH_LR_EL2_STATE_MASK (0x3UL << ICH_LR_EL2_STATE_SHIFT)
+#define ICH_LR_EL2_STATE(x) ((x) & ICH_LR_EL2_STATE_MASK)
+#define ICH_LR_EL2_STATE_INACTIVE (0x0UL << ICH_LR_EL2_STATE_SHIFT)
+#define ICH_LR_EL2_STATE_PENDING (0x1UL << ICH_LR_EL2_STATE_SHIFT)
+#define ICH_LR_EL2_STATE_ACTIVE (0x2UL << ICH_LR_EL2_STATE_SHIFT)
+#define ICH_LR_EL2_STATE_PENDING_ACTIVE (0x3UL << ICH_LR_EL2_STATE_SHIFT)
+
+/* Interrupt Controller Maintenance Interrupt State Register */
+#define ICH_MISR_EL2_VGRP1D (1 << 7) /* vPE Group 1 Disabled */
+#define ICH_MISR_EL2_VGRP1E (1 << 6) /* vPE Group 1 Enabled */
+#define ICH_MISR_EL2_VGRP0D (1 << 5) /* vPE Group 0 Disabled */
+#define ICH_MISR_EL2_VGRP0E (1 << 4) /* vPE Group 0 Enabled */
+#define ICH_MISR_EL2_NP (1 << 3) /* No Pending */
+#define ICH_MISR_EL2_LRENP (1 << 2) /* List Register Entry Not Present */
+#define ICH_MISR_EL2_U (1 << 1) /* Underflow */
+#define ICH_MISR_EL2_EOI (1 << 0) /* End Of Interrupt */
+
+/* Interrupt Controller Virtual Machine Control Register */
+#define ICH_VMCR_EL2_VPMR_SHIFT 24
+#define ICH_VMCR_EL2_VPMR_MASK (0xff << ICH_VMCR_EL2_VPMR_SHIFT)
+#define ICH_VMCR_EL2_VPMR_PRIO_LOWEST (0xff << ICH_VMCR_EL2_VPMR_SHIFT)
+#define ICH_VMCR_EL2_VPMR_PRIO_HIGHEST (0x00 << ICH_VMCR_EL2_VPMR_SHIFT)
+#define ICH_VMCR_EL2_VBPR0_SHIFT 21
+#define ICH_VMCR_EL2_VBPR0_MASK (0x7 << ICH_VMCR_EL2_VBPR0_SHIFT)
+#define ICH_VMCR_EL2_VBPR0_NO_PREEMPTION \
+ (0x7 << ICH_VMCR_EL2_VBPR0_SHIFT)
+#define ICH_VMCR_EL2_VBPR1_SHIFT 18
+#define ICH_VMCR_EL2_VBPR1_MASK (0x7 << ICH_VMCR_EL2_VBPR1_SHIFT)
+#define ICH_VMCR_EL2_VBPR1_NO_PREEMPTION \
+ (0x7 << ICH_VMCR_EL2_VBPR1_SHIFT)
+#define ICH_VMCR_EL2_VEOIM (1 << 9) /* Virtual EOI mode */
+#define ICH_VMCR_EL2_VCBPR (1 << 4) /* Virtual Common binary Point Register */
+#define ICH_VMCR_EL2_VFIQEN (1 << 3) /* Virtual FIQ enable */
+#define ICH_VMCR_EL2_VACKCTL (1 << 2) /* Virtual AckCtl */
+#define ICH_VMCR_EL2_VENG1 (1 << 1) /* Virtual Group 1 Interrupt Enable */
+#define ICH_VMCR_EL2_VENG0 (1 << 0) /* Virtual Group 0 Interrupt Enable */
+
+/* Interrupt Controller VGIC Type Register */
+#define ICH_VTR_EL2_PRIBITS_SHIFT 29
+#define ICH_VTR_EL2_PRIBITS_MASK (0x7 << ICH_VTR_EL2_PRIBITS_SHIFT)
+#define ICH_VTR_EL2_PRIBITS(x) \
+ ((((x) & ICH_VTR_EL2_PRIBITS_MASK) >> ICH_VTR_EL2_PRIBITS_SHIFT) + 1)
+#define ICH_VTR_EL2_PREBITS_SHIFT 26
+#define ICH_VTR_EL2_PREBITS_MASK (0x7 << ICH_VTR_EL2_PREBITS_SHIFT)
+#define ICH_VTR_EL2_PREBITS(x) \
+ (((x) & ICH_VTR_EL2_PREBITS_MASK) >> ICH_VTR_EL2_PREBITS_SHIFT)
+#define ICH_VTR_EL2_SEIS (1 << 22) /* System Error Interrupt (SEI) Support */
+#define ICH_VTR_EL2_A3V (1 << 21) /* Affinity 3 Valid */
+#define ICH_VTR_EL2_NV4 (1 << 20) /* Direct injection of virtual interrupts. RES1 for GICv3 */
+#define ICH_VTR_EL2_TDS (1 << 19) /* Implementation supports ICH_HCR_EL2.TDIR */
+#define ICH_VTR_EL2_LISTREGS_MASK 0x1f
+/*
+ * ICH_VTR_EL2.ListRegs holds the number of list registers, minus one. Add one
+ * to get the actual number of list registers.
+ */
+#define ICH_VTR_EL2_LISTREGS(x) (((x) & ICH_VTR_EL2_LISTREGS_MASK) + 1)
+
+#endif /* !_VGIC_V3_REG_H_ */
diff --git a/sys/arm64/vmm/io/vtimer.h b/sys/arm64/vmm/io/vtimer.h
new file mode 100644
--- /dev/null
+++ b/sys/arm64/vmm/io/vtimer.h
@@ -0,0 +1,85 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2017 The FreeBSD Foundation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the company nor the name of the author may be used to
+ * endorse or promote products derived from this software without specific
+ * prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _VMM_VTIMER_H_
+#define _VMM_VTIMER_H_
+
+#define GT_PHYS_NS_IRQ 30
+#define GT_VIRT_IRQ 27
+
+struct hyp;
+struct hypctx;
+
+struct vtimer {
+ uint64_t cnthctl_el2;
+ uint64_t cntvoff_el2;
+};
+
+struct vtimer_timer {
+ struct callout callout;
+ struct mtx mtx;
+
+ uint32_t irqid;
+
+ /*
+ * These registers are either emulated for the physical timer, or
+ * the guest has full access to them for the virtual timer.
+
+ * CNTx_CTL_EL0: Counter-timer Timer Control Register
+ * CNTx_CVAL_EL0: Counter-timer Timer CompareValue Register
+ */
+ uint64_t cntx_cval_el0;
+ uint64_t cntx_ctl_el0;
+};
+
+struct vtimer_cpu {
+ struct vtimer_timer phys_timer;
+ struct vtimer_timer virt_timer;
+
+ uint32_t cntkctl_el1;
+};
+
+int vtimer_init(uint64_t cnthctl_el2);
+void vtimer_vminit(struct hyp *);
+void vtimer_cpuinit(struct hypctx *);
+void vtimer_cpucleanup(struct hypctx *);
+void vtimer_vmcleanup(struct hyp *);
+void vtimer_cleanup(void);
+void vtimer_sync_hwstate(struct hypctx *hypctx);
+
+int vtimer_phys_ctl_read(struct vcpu *vcpu, uint64_t *rval, void *arg);
+int vtimer_phys_ctl_write(struct vcpu *vcpu, uint64_t wval, void *arg);
+int vtimer_phys_cnt_read(struct vcpu *vcpu, uint64_t *rval, void *arg);
+int vtimer_phys_cnt_write(struct vcpu *vcpu, uint64_t wval, void *arg);
+int vtimer_phys_cval_read(struct vcpu *vcpu, uint64_t *rval, void *arg);
+int vtimer_phys_cval_write(struct vcpu *vcpu, uint64_t wval, void *arg);
+int vtimer_phys_tval_read(struct vcpu *vcpu, uint64_t *rval, void *arg);
+int vtimer_phys_tval_write(struct vcpu *vcpu, uint64_t wval, void *arg);
+#endif
diff --git a/sys/arm64/vmm/io/vtimer.c b/sys/arm64/vmm/io/vtimer.c
new file mode 100644
--- /dev/null
+++ b/sys/arm64/vmm/io/vtimer.c
@@ -0,0 +1,503 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2017 The FreeBSD Foundation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the company nor the name of the author may be used to
+ * endorse or promote products derived from this software without specific
+ * prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/rman.h>
+#include <sys/time.h>
+#include <sys/timeet.h>
+#include <sys/timetc.h>
+
+#include <machine/bus.h>
+#include <machine/machdep.h>
+#include <machine/vmm.h>
+#include <machine/armreg.h>
+
+#include <arm64/vmm/arm64.h>
+
+#include "vgic.h"
+#include "vtimer.h"
+
+#define RES1 0xffffffffffffffffUL
+
+#define timer_enabled(ctl) \
+ (!((ctl) & CNTP_CTL_IMASK) && ((ctl) & CNTP_CTL_ENABLE))
+
+static uint64_t cnthctl_el2_reg;
+static uint32_t tmr_frq;
+
+#define timer_condition_met(ctl) ((ctl) & CNTP_CTL_ISTATUS)
+
+static void vtimer_schedule_irq(struct hypctx *hypctx, bool phys);
+
+static int
+vtimer_virtual_timer_intr(void *arg)
+{
+ struct hypctx *hypctx;
+ uint64_t cntpct_el0;
+ uint32_t cntv_ctl;
+
+ hypctx = arm64_get_active_vcpu();
+ cntv_ctl = READ_SPECIALREG(cntv_ctl_el0);
+
+ if (!hypctx) {
+ /* vm_destroy() was called. */
+ eprintf("No active vcpu\n");
+ cntv_ctl = READ_SPECIALREG(cntv_ctl_el0);
+ goto out;
+ }
+ if (!timer_enabled(cntv_ctl)) {
+ eprintf("Timer not enabled\n");
+ goto out;
+ }
+ if (!timer_condition_met(cntv_ctl)) {
+ eprintf("Timer condition not met\n");
+ goto out;
+ }
+
+ cntpct_el0 = READ_SPECIALREG(cntpct_el0) -
+ hypctx->hyp->vtimer.cntvoff_el2;
+ if (hypctx->vtimer_cpu.virt_timer.cntx_cval_el0 < cntpct_el0)
+ vgic_inject_irq(hypctx->hyp, vcpu_vcpuid(hypctx->vcpu),
+ GT_VIRT_IRQ, true);
+
+ cntv_ctl = hypctx->vtimer_cpu.virt_timer.cntx_ctl_el0;
+
+out:
+ /*
+ * Disable the timer interrupt. This will prevent the interrupt from
+ * being reasserted as soon as we exit the handler and getting stuck
+ * in an infinite loop.
+ *
+ * This is safe to do because the guest disabled the timer, and then
+ * enables it as part of the interrupt handling routine.
+ */
+ cntv_ctl &= ~CNTP_CTL_ENABLE;
+ WRITE_SPECIALREG(cntv_ctl_el0, cntv_ctl);
+
+ return (FILTER_HANDLED);
+}
+
+int
+vtimer_init(uint64_t cnthctl_el2)
+{
+ cnthctl_el2_reg = cnthctl_el2;
+ /*
+ * The guest *MUST* use the same timer frequency as the host. The
+ * register CNTFRQ_EL0 is accessible to the guest and a different value
+ * in the guest dts file might have unforseen consequences.
+ */
+ tmr_frq = READ_SPECIALREG(cntfrq_el0);
+
+ return (0);
+}
+
+void
+vtimer_vminit(struct hyp *hyp)
+{
+ uint64_t now;
+
+ /*
+ * Configure the Counter-timer Hypervisor Control Register for the VM.
+ *
+ * CNTHCTL_EL1PCEN: trap access to CNTP_{CTL, CVAL, TVAL}_EL0 from EL1
+ * CNTHCTL_EL1PCTEN: trap access to CNTPCT_EL0
+ */
+ hyp->vtimer.cnthctl_el2 = cnthctl_el2_reg & ~CNTHCTL_EL1PCEN;
+ hyp->vtimer.cnthctl_el2 &= ~CNTHCTL_EL1PCTEN;
+
+ now = READ_SPECIALREG(cntpct_el0);
+ hyp->vtimer.cntvoff_el2 = now;
+
+ return;
+}
+
+void
+vtimer_cpuinit(struct hypctx *hypctx)
+{
+ struct vtimer_cpu *vtimer_cpu;
+
+ vtimer_cpu = &hypctx->vtimer_cpu;
+ /*
+ * Configure physical timer interrupts for the VCPU.
+ *
+ * CNTP_CTL_IMASK: mask interrupts
+ * ~CNTP_CTL_ENABLE: disable the timer
+ */
+ vtimer_cpu->phys_timer.cntx_ctl_el0 = CNTP_CTL_IMASK & ~CNTP_CTL_ENABLE;
+
+ mtx_init(&vtimer_cpu->phys_timer.mtx, "vtimer phys callout mutex", NULL,
+ MTX_DEF);
+ callout_init_mtx(&vtimer_cpu->phys_timer.callout,
+ &vtimer_cpu->phys_timer.mtx, 0);
+ vtimer_cpu->phys_timer.irqid = GT_PHYS_NS_IRQ;
+
+ mtx_init(&vtimer_cpu->virt_timer.mtx, "vtimer virt callout mutex", NULL,
+ MTX_DEF);
+ callout_init_mtx(&vtimer_cpu->virt_timer.callout,
+ &vtimer_cpu->virt_timer.mtx, 0);
+ vtimer_cpu->virt_timer.irqid = GT_VIRT_IRQ;
+}
+
+void
+vtimer_cpucleanup(struct hypctx *hypctx)
+{
+ struct vtimer_cpu *vtimer_cpu;
+
+ vtimer_cpu = &hypctx->vtimer_cpu;
+ callout_drain(&vtimer_cpu->phys_timer.callout);
+ callout_drain(&vtimer_cpu->virt_timer.callout);
+ mtx_destroy(&vtimer_cpu->phys_timer.mtx);
+ mtx_destroy(&vtimer_cpu->virt_timer.mtx);
+}
+
+void
+vtimer_vmcleanup(struct hyp *hyp)
+{
+ struct hypctx *hypctx;
+ uint32_t cntv_ctl;
+
+ hypctx = arm64_get_active_vcpu();
+ if (!hypctx) {
+ /* The active VM was destroyed, stop the timer. */
+ cntv_ctl = READ_SPECIALREG(cntv_ctl_el0);
+ cntv_ctl &= ~CNTP_CTL_ENABLE;
+ WRITE_SPECIALREG(cntv_ctl_el0, cntv_ctl);
+ }
+}
+
+void
+vtimer_cleanup(void)
+{
+}
+
+void
+vtimer_sync_hwstate(struct hypctx *hypctx)
+{
+ struct vtimer_timer *timer;
+ uint64_t cntpct_el0;
+
+ timer = &hypctx->vtimer_cpu.virt_timer;
+ cntpct_el0 = READ_SPECIALREG(cntpct_el0) -
+ hypctx->hyp->vtimer.cntvoff_el2;
+ if (!timer_enabled(timer->cntx_ctl_el0)) {
+ vgic_inject_irq(hypctx->hyp, vcpu_vcpuid(hypctx->vcpu),
+ timer->irqid, false);
+ } else if (timer->cntx_cval_el0 < cntpct_el0) {
+ vgic_inject_irq(hypctx->hyp, vcpu_vcpuid(hypctx->vcpu),
+ timer->irqid, true);
+ } else {
+ vgic_inject_irq(hypctx->hyp, vcpu_vcpuid(hypctx->vcpu),
+ timer->irqid, false);
+ vtimer_schedule_irq(hypctx, false);
+ }
+}
+
+static void
+vtimer_inject_irq_callout_phys(void *context)
+{
+ struct hypctx *hypctx;
+
+ hypctx = context;
+ vgic_inject_irq(hypctx->hyp, vcpu_vcpuid(hypctx->vcpu),
+ hypctx->vtimer_cpu.phys_timer.irqid, true);
+}
+
+static void
+vtimer_inject_irq_callout_virt(void *context)
+{
+ struct hypctx *hypctx;
+
+ hypctx = context;
+ vgic_inject_irq(hypctx->hyp, vcpu_vcpuid(hypctx->vcpu),
+ hypctx->vtimer_cpu.virt_timer.irqid, true);
+}
+
+static void
+vtimer_schedule_irq(struct hypctx *hypctx, bool phys)
+{
+ sbintime_t time;
+ struct vtimer_timer *timer;
+ uint64_t cntpct_el0;
+ uint64_t diff;
+
+ if (phys)
+ timer = &hypctx->vtimer_cpu.phys_timer;
+ else
+ timer = &hypctx->vtimer_cpu.virt_timer;
+ cntpct_el0 = READ_SPECIALREG(cntpct_el0) -
+ hypctx->hyp->vtimer.cntvoff_el2;
+ if (timer->cntx_cval_el0 < cntpct_el0) {
+ /* Timer set in the past, trigger interrupt */
+ vgic_inject_irq(hypctx->hyp, vcpu_vcpuid(hypctx->vcpu),
+ timer->irqid, true);
+ } else {
+ diff = timer->cntx_cval_el0 - cntpct_el0;
+ time = diff * SBT_1S / tmr_frq;
+ if (phys)
+ callout_reset_sbt(&timer->callout, time, 0,
+ vtimer_inject_irq_callout_phys, hypctx, 0);
+ else
+ callout_reset_sbt(&timer->callout, time, 0,
+ vtimer_inject_irq_callout_virt, hypctx, 0);
+ }
+}
+
+static void
+vtimer_remove_irq(struct hypctx *hypctx, struct vcpu *vcpu)
+{
+ struct vtimer_cpu *vtimer_cpu;
+ struct vtimer_timer *timer;
+
+ vtimer_cpu = &hypctx->vtimer_cpu;
+ timer = &vtimer_cpu->phys_timer;
+
+ callout_drain(&timer->callout);
+ /*
+ * The interrupt needs to be deactivated here regardless of the callout
+ * function having been executed. The timer interrupt can be masked with
+ * the CNTP_CTL_EL0.IMASK bit instead of reading the IAR register.
+ * Masking the interrupt doesn't remove it from the list registers.
+ */
+ vgic_inject_irq(hypctx->hyp, vcpu_vcpuid(vcpu), timer->irqid, false);
+}
+
+/*
+ * Timer emulation functions.
+ *
+ * The guest should use the virtual timer, however some software, e.g. u-boot,
+ * used the physical timer. Emulate this in software for the guest to use.
+ *
+ * Adjust for cntvoff_el2 so the physical and virtual timers are at similar
+ * times. This simplifies interrupt handling in the virtual timer as the
+ * adjustment will have already happened.
+ */
+
+int
+vtimer_phys_ctl_read(struct vcpu *vcpu, uint64_t *rval, void *arg)
+{
+ struct hyp *hyp;
+ struct hypctx *hypctx;
+ struct vtimer_cpu *vtimer_cpu;
+ uint64_t cntpct_el0;
+
+ hypctx = vcpu_get_cookie(vcpu);
+ hyp = hypctx->hyp;
+ vtimer_cpu = &hypctx->vtimer_cpu;
+
+ cntpct_el0 = READ_SPECIALREG(cntpct_el0) - hyp->vtimer.cntvoff_el2;
+ if (vtimer_cpu->phys_timer.cntx_cval_el0 < cntpct_el0)
+ /* Timer condition met */
+ *rval = vtimer_cpu->phys_timer.cntx_ctl_el0 | CNTP_CTL_ISTATUS;
+ else
+ *rval = vtimer_cpu->phys_timer.cntx_ctl_el0 & ~CNTP_CTL_ISTATUS;
+
+ return (0);
+}
+
+int
+vtimer_phys_ctl_write(struct vcpu *vcpu, uint64_t wval, void *arg)
+{
+ struct hypctx *hypctx;
+ struct vtimer_cpu *vtimer_cpu;
+ uint64_t ctl_el0;
+ bool timer_toggled_on;
+
+ hypctx = vcpu_get_cookie(vcpu);
+ vtimer_cpu = &hypctx->vtimer_cpu;
+
+ timer_toggled_on = false;
+ ctl_el0 = vtimer_cpu->phys_timer.cntx_ctl_el0;
+
+ if (!timer_enabled(ctl_el0) && timer_enabled(wval))
+ timer_toggled_on = true;
+ else if (timer_enabled(ctl_el0) && !timer_enabled(wval))
+ vtimer_remove_irq(hypctx, vcpu);
+
+ vtimer_cpu->phys_timer.cntx_ctl_el0 = wval;
+
+ if (timer_toggled_on)
+ vtimer_schedule_irq(hypctx, true);
+
+ return (0);
+}
+
+int
+vtimer_phys_cnt_read(struct vcpu *vcpu, uint64_t *rval, void *arg)
+{
+ struct vm *vm;
+ struct hyp *hyp;
+
+ vm = vcpu_vm(vcpu);
+ hyp = vm_get_cookie(vm);
+ *rval = READ_SPECIALREG(cntpct_el0) - hyp->vtimer.cntvoff_el2;
+ return (0);
+}
+
+int
+vtimer_phys_cnt_write(struct vcpu *vcpu, uint64_t wval, void *arg)
+{
+ return (0);
+}
+
+int
+vtimer_phys_cval_read(struct vcpu *vcpu, uint64_t *rval, void *arg)
+{
+ struct hypctx *hypctx;
+ struct vtimer_cpu *vtimer_cpu;
+
+ hypctx = vcpu_get_cookie(vcpu);
+ vtimer_cpu = &hypctx->vtimer_cpu;
+
+ *rval = vtimer_cpu->phys_timer.cntx_cval_el0;
+
+ return (0);
+}
+
+int
+vtimer_phys_cval_write(struct vcpu *vcpu, uint64_t wval, void *arg)
+{
+ struct hypctx *hypctx;
+ struct vtimer_cpu *vtimer_cpu;
+
+ hypctx = vcpu_get_cookie(vcpu);
+ vtimer_cpu = &hypctx->vtimer_cpu;
+
+ vtimer_cpu->phys_timer.cntx_cval_el0 = wval;
+
+ vtimer_remove_irq(hypctx, vcpu);
+ if (timer_enabled(vtimer_cpu->phys_timer.cntx_ctl_el0)) {
+ vtimer_schedule_irq(hypctx, true);
+ }
+
+ return (0);
+}
+
+int
+vtimer_phys_tval_read(struct vcpu *vcpu, uint64_t *rval, void *arg)
+{
+ struct hyp *hyp;
+ struct hypctx *hypctx;
+ struct vtimer_cpu *vtimer_cpu;
+ uint32_t cntpct_el0;
+
+ hypctx = vcpu_get_cookie(vcpu);
+ hyp = hypctx->hyp;
+ vtimer_cpu = &hypctx->vtimer_cpu;
+
+ if (!(vtimer_cpu->phys_timer.cntx_ctl_el0 & CNTP_CTL_ENABLE)) {
+ /*
+ * ARMv8 Architecture Manual, p. D7-2702: the result of reading
+ * TVAL when the timer is disabled is UNKNOWN. I have chosen to
+ * return the maximum value possible on 32 bits which means the
+ * timer will fire very far into the future.
+ */
+ *rval = (uint32_t)RES1;
+ } else {
+ cntpct_el0 = READ_SPECIALREG(cntpct_el0) -
+ hyp->vtimer.cntvoff_el2;
+ *rval = vtimer_cpu->phys_timer.cntx_cval_el0 - cntpct_el0;
+ }
+
+ return (0);
+}
+
+int
+vtimer_phys_tval_write(struct vcpu *vcpu, uint64_t wval, void *arg)
+{
+ struct hyp *hyp;
+ struct hypctx *hypctx;
+ struct vtimer_cpu *vtimer_cpu;
+ uint64_t cntpct_el0;
+
+ hypctx = vcpu_get_cookie(vcpu);
+ hyp = hypctx->hyp;
+ vtimer_cpu = &hypctx->vtimer_cpu;
+
+ cntpct_el0 = READ_SPECIALREG(cntpct_el0) - hyp->vtimer.cntvoff_el2;
+ vtimer_cpu->phys_timer.cntx_cval_el0 = (int32_t)wval + cntpct_el0;
+
+ vtimer_remove_irq(hypctx, vcpu);
+ if (timer_enabled(vtimer_cpu->phys_timer.cntx_ctl_el0)) {
+ vtimer_schedule_irq(hypctx, true);
+ }
+
+ return (0);
+}
+
+struct vtimer_softc {
+ struct resource *res;
+ void *ihl;
+ int rid;
+};
+
+static int
+vtimer_probe(device_t dev)
+{
+ device_set_desc(dev, "Virtual timer");
+ return (BUS_PROBE_DEFAULT);
+}
+
+static int
+vtimer_attach(device_t dev)
+{
+ struct vtimer_softc *sc;
+
+ sc = device_get_softc(dev);
+
+ sc->rid = 0;
+ sc->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &sc->rid, RF_ACTIVE);
+ if (sc->res == NULL)
+ return (ENXIO);
+
+ bus_setup_intr(dev, sc->res, INTR_TYPE_CLK, vtimer_virtual_timer_intr,
+ NULL, NULL, &sc->ihl);
+
+ return (0);
+}
+
+static device_method_t vtimer_methods[] = {
+ /* Device interface */
+ DEVMETHOD(device_probe, vtimer_probe),
+ DEVMETHOD(device_attach, vtimer_attach),
+
+ /* End */
+ DEVMETHOD_END
+};
+
+DEFINE_CLASS_0(vtimer, vtimer_driver, vtimer_methods,
+ sizeof(struct vtimer_softc));
+
+DRIVER_MODULE(vtimer, generic_timer, vtimer_driver, 0, 0);
diff --git a/sys/arm64/vmm/mmu.h b/sys/arm64/vmm/mmu.h
new file mode 100644
--- /dev/null
+++ b/sys/arm64/vmm/mmu.h
@@ -0,0 +1,52 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (C) 2017 Alexandru Elisei <alexandru.elisei@gmail.com>
+ *
+ * This software was developed by Alexandru Elisei under sponsorship
+ * from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _VMM_MMU_H_
+#define _VMM_MMU_H_
+
+#include <machine/machdep.h>
+#include <machine/vmparam.h>
+#include <machine/vmm.h>
+
+#include "hyp.h"
+
+extern char vmm_hyp_code;
+extern char vmm_hyp_code_end;
+
+extern char _vmm_start;
+extern char _vmm_end;
+
+bool vmmpmap_init(void);
+void vmmpmap_fini(void);
+uint64_t vmmpmap_to_ttbr0(void);
+bool vmmpmap_enter(vm_offset_t, vm_size_t, vm_paddr_t, vm_prot_t);
+void vmmpmap_remove(vm_offset_t, vm_size_t, bool);
+
+#endif
diff --git a/sys/arm64/vmm/reset.h b/sys/arm64/vmm/reset.h
new file mode 100644
--- /dev/null
+++ b/sys/arm64/vmm/reset.h
@@ -0,0 +1,33 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (C) 2018 Alexandru Elisei <alexandru.elisei@gmail.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#ifndef _VMM_RESET_H_
+#define _VMM_RESET_H_
+
+void reset_vm_el01_regs(void *vcpu);
+void reset_vm_el2_regs(void *vcpu);
+
+#endif
diff --git a/sys/arm64/vmm/vmm.c b/sys/arm64/vmm/vmm.c
new file mode 100644
--- /dev/null
+++ b/sys/arm64/vmm/vmm.c
@@ -0,0 +1,1803 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/cpuset.h>
+#include <sys/kernel.h>
+#include <sys/linker.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/pcpu.h>
+#include <sys/proc.h>
+#include <sys/queue.h>
+#include <sys/rwlock.h>
+#include <sys/sched.h>
+#include <sys/smp.h>
+#include <sys/sysctl.h>
+
+#include <vm/vm.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_param.h>
+
+#include <machine/armreg.h>
+#include <machine/cpu.h>
+#include <machine/fpu.h>
+#include <machine/machdep.h>
+#include <machine/pcb.h>
+#include <machine/smp.h>
+#include <machine/vm.h>
+#include <machine/vmparam.h>
+#include <machine/vmm.h>
+#include <machine/vmm_dev.h>
+#include <machine/vmm_instruction_emul.h>
+
+#include <dev/pci/pcireg.h>
+
+#include "vmm_ktr.h"
+#include "vmm_stat.h"
+#include "arm64.h"
+#include "mmu.h"
+
+#include "io/vgic.h"
+#include "io/vtimer.h"
+
+struct vcpu {
+ int flags;
+ enum vcpu_state state;
+ struct mtx mtx;
+ int hostcpu; /* host cpuid this vcpu last ran on */
+ int vcpuid;
+ void *stats;
+ struct vm_exit exitinfo;
+ uint64_t nextpc; /* (x) next instruction to execute */
+ struct vm *vm; /* (o) */
+ void *cookie; /* (i) cpu-specific data */
+ struct vfpstate *guestfpu; /* (a,i) guest fpu state */
+};
+
+#define vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx))
+#define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN)
+#define vcpu_lock_destroy(v) mtx_destroy(&((v)->mtx))
+#define vcpu_lock(v) mtx_lock_spin(&((v)->mtx))
+#define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx))
+#define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED)
+
+struct mem_seg {
+ uint64_t gpa;
+ size_t len;
+ bool wired;
+ bool sysmem;
+ vm_object_t object;
+};
+#define VM_MAX_MEMSEGS 3
+
+struct mem_map {
+ vm_paddr_t gpa;
+ size_t len;
+ vm_ooffset_t segoff;
+ int segid;
+ int prot;
+ int flags;
+};
+#define VM_MAX_MEMMAPS 4
+
+struct vmm_mmio_region {
+ uint64_t start;
+ uint64_t end;
+ mem_region_read_t read;
+ mem_region_write_t write;
+};
+#define VM_MAX_MMIO_REGIONS 4
+
+struct vmm_special_reg {
+ uint32_t esr_iss;
+ uint32_t esr_mask;
+ reg_read_t reg_read;
+ reg_write_t reg_write;
+ void *arg;
+};
+#define VM_MAX_SPECIAL_REGS 16
+
+/*
+ * Initialization:
+ * (o) initialized the first time the VM is created
+ * (i) initialized when VM is created and when it is reinitialized
+ * (x) initialized before use
+ */
+struct vm {
+ void *cookie; /* (i) cpu-specific data */
+ volatile cpuset_t active_cpus; /* (i) active vcpus */
+ volatile cpuset_t debug_cpus; /* (i) vcpus stopped for debug */
+ int suspend; /* (i) stop VM execution */
+ volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */
+ volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */
+ struct mem_map mem_maps[VM_MAX_MEMMAPS]; /* (i) guest address space */
+ struct mem_seg mem_segs[VM_MAX_MEMSEGS]; /* (o) guest memory regions */
+ struct vmspace *vmspace; /* (o) guest's address space */
+ char name[VM_MAX_NAMELEN]; /* (o) virtual machine name */
+ struct vcpu **vcpu; /* (i) guest vcpus */
+ struct vmm_mmio_region mmio_region[VM_MAX_MMIO_REGIONS];
+ /* (o) guest MMIO regions */
+ struct vmm_special_reg special_reg[VM_MAX_SPECIAL_REGS];
+ /* The following describe the vm cpu topology */
+ uint16_t sockets; /* (o) num of sockets */
+ uint16_t cores; /* (o) num of cores/socket */
+ uint16_t threads; /* (o) num of threads/core */
+ uint16_t maxcpus; /* (o) max pluggable cpus */
+ struct sx mem_segs_lock; /* (o) */
+ struct sx vcpus_init_lock; /* (o) */
+};
+
+static bool vmm_initialized = false;
+
+static int vm_handle_wfi(struct vcpu *vcpu,
+ struct vm_exit *vme, bool *retu);
+
+static MALLOC_DEFINE(M_VMM, "vmm", "vmm");
+
+/* statistics */
+static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime");
+
+SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL);
+
+static int vmm_ipinum;
+SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0,
+ "IPI vector used for vcpu notifications");
+
+struct vmm_regs {
+ uint64_t id_aa64afr0;
+ uint64_t id_aa64afr1;
+ uint64_t id_aa64dfr0;
+ uint64_t id_aa64dfr1;
+ uint64_t id_aa64isar0;
+ uint64_t id_aa64isar1;
+ uint64_t id_aa64isar2;
+ uint64_t id_aa64mmfr0;
+ uint64_t id_aa64mmfr1;
+ uint64_t id_aa64mmfr2;
+ uint64_t id_aa64pfr0;
+ uint64_t id_aa64pfr1;
+};
+
+static const struct vmm_regs vmm_arch_regs_masks = {
+ .id_aa64dfr0 =
+ ID_AA64DFR0_CTX_CMPs_MASK |
+ ID_AA64DFR0_WRPs_MASK |
+ ID_AA64DFR0_BRPs_MASK |
+ ID_AA64DFR0_PMUVer_3 |
+ ID_AA64DFR0_DebugVer_8,
+ .id_aa64isar0 =
+ ID_AA64ISAR0_TLB_TLBIOSR |
+ ID_AA64ISAR0_SHA3_IMPL |
+ ID_AA64ISAR0_RDM_IMPL |
+ ID_AA64ISAR0_Atomic_IMPL |
+ ID_AA64ISAR0_CRC32_BASE |
+ ID_AA64ISAR0_SHA2_512 |
+ ID_AA64ISAR0_SHA1_BASE |
+ ID_AA64ISAR0_AES_PMULL,
+ .id_aa64mmfr0 =
+ ID_AA64MMFR0_TGran4_IMPL |
+ ID_AA64MMFR0_TGran64_IMPL |
+ ID_AA64MMFR0_TGran16_IMPL |
+ ID_AA64MMFR0_ASIDBits_16 |
+ ID_AA64MMFR0_PARange_4P,
+ .id_aa64mmfr1 =
+ ID_AA64MMFR1_SpecSEI_IMPL |
+ ID_AA64MMFR1_PAN_ATS1E1 |
+ ID_AA64MMFR1_HAFDBS_AF,
+ .id_aa64pfr0 =
+ ID_AA64PFR0_GIC_CPUIF_NONE |
+ ID_AA64PFR0_AdvSIMD_HP |
+ ID_AA64PFR0_FP_HP |
+ ID_AA64PFR0_EL3_64 |
+ ID_AA64PFR0_EL2_64 |
+ ID_AA64PFR0_EL1_64 |
+ ID_AA64PFR0_EL0_64,
+};
+
+/* Host registers masked by vmm_arch_regs_masks. */
+static struct vmm_regs vmm_arch_regs;
+
+u_int vm_maxcpu;
+SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
+ &vm_maxcpu, 0, "Maximum number of vCPUs");
+
+static void vm_free_memmap(struct vm *vm, int ident);
+static bool sysmem_mapping(struct vm *vm, struct mem_map *mm);
+static void vcpu_notify_event_locked(struct vcpu *vcpu);
+
+/*
+ * Upper limit on vm_maxcpu. We could increase this to 28 bits, but this
+ * is a safe value for now.
+ */
+#define VM_MAXCPU MIN(0xffff - 1, CPU_SETSIZE)
+
+static int
+vmm_regs_init(struct vmm_regs *regs, const struct vmm_regs *masks)
+{
+#define _FETCH_KERN_REG(reg, field) do { \
+ regs->field = vmm_arch_regs_masks.field; \
+ if (!get_kernel_reg_masked(reg, &regs->field, masks->field)) \
+ regs->field = 0; \
+} while (0)
+ _FETCH_KERN_REG(ID_AA64AFR0_EL1, id_aa64afr0);
+ _FETCH_KERN_REG(ID_AA64AFR1_EL1, id_aa64afr1);
+ _FETCH_KERN_REG(ID_AA64DFR0_EL1, id_aa64dfr0);
+ _FETCH_KERN_REG(ID_AA64DFR1_EL1, id_aa64dfr1);
+ _FETCH_KERN_REG(ID_AA64ISAR0_EL1, id_aa64isar0);
+ _FETCH_KERN_REG(ID_AA64ISAR1_EL1, id_aa64isar1);
+ _FETCH_KERN_REG(ID_AA64ISAR2_EL1, id_aa64isar2);
+ _FETCH_KERN_REG(ID_AA64MMFR0_EL1, id_aa64mmfr0);
+ _FETCH_KERN_REG(ID_AA64MMFR1_EL1, id_aa64mmfr1);
+ _FETCH_KERN_REG(ID_AA64MMFR2_EL1, id_aa64mmfr2);
+ _FETCH_KERN_REG(ID_AA64PFR0_EL1, id_aa64pfr0);
+ _FETCH_KERN_REG(ID_AA64PFR1_EL1, id_aa64pfr1);
+#undef _FETCH_KERN_REG
+ return (0);
+}
+
+static void
+vcpu_cleanup(struct vcpu *vcpu, bool destroy)
+{
+ vmmops_vcpu_cleanup(vcpu->cookie);
+ vcpu->cookie = NULL;
+ if (destroy) {
+ vmm_stat_free(vcpu->stats);
+ fpu_save_area_free(vcpu->guestfpu);
+ vcpu_lock_destroy(vcpu);
+ }
+}
+
+static struct vcpu *
+vcpu_alloc(struct vm *vm, int vcpu_id)
+{
+ struct vcpu *vcpu;
+
+ KASSERT(vcpu_id >= 0 && vcpu_id < vm->maxcpus,
+ ("vcpu_alloc: invalid vcpu %d", vcpu_id));
+
+ vcpu = malloc(sizeof(*vcpu), M_VMM, M_WAITOK | M_ZERO);
+ vcpu_lock_init(vcpu);
+ vcpu->state = VCPU_IDLE;
+ vcpu->hostcpu = NOCPU;
+ vcpu->vcpuid = vcpu_id;
+ vcpu->vm = vm;
+ vcpu->guestfpu = fpu_save_area_alloc();
+ vcpu->stats = vmm_stat_alloc();
+ return (vcpu);
+}
+
+static void
+vcpu_init(struct vcpu *vcpu)
+{
+ vcpu->cookie = vmmops_vcpu_init(vcpu->vm->cookie, vcpu, vcpu->vcpuid);
+ MPASS(vcpu->cookie != NULL);
+ fpu_save_area_reset(vcpu->guestfpu);
+ vmm_stat_init(vcpu->stats);
+}
+
+struct vm_exit *
+vm_exitinfo(struct vcpu *vcpu)
+{
+ return (&vcpu->exitinfo);
+}
+
+static int
+vmm_init(void)
+{
+ int error;
+
+ vm_maxcpu = mp_ncpus;
+ TUNABLE_INT_FETCH("hw.vmm.maxcpu", &vm_maxcpu);
+
+ if (vm_maxcpu > VM_MAXCPU) {
+ printf("vmm: vm_maxcpu clamped to %u\n", VM_MAXCPU);
+ vm_maxcpu = VM_MAXCPU;
+ }
+ if (vm_maxcpu == 0)
+ vm_maxcpu = 1;
+
+ error = vmm_regs_init(&vmm_arch_regs, &vmm_arch_regs_masks);
+ if (error != 0)
+ return (error);
+
+ return (vmmops_modinit(0));
+}
+
+static int
+vmm_handler(module_t mod, int what, void *arg)
+{
+ int error;
+
+ switch (what) {
+ case MOD_LOAD:
+ /* TODO: if (vmm_is_hw_supported()) { */
+ vmmdev_init();
+ error = vmm_init();
+ if (error == 0)
+ vmm_initialized = true;
+ break;
+ case MOD_UNLOAD:
+ /* TODO: if (vmm_is_hw_supported()) { */
+ error = vmmdev_cleanup();
+ if (error == 0 && vmm_initialized) {
+ error = vmmops_modcleanup();
+ if (error)
+ vmm_initialized = false;
+ }
+ break;
+ default:
+ error = 0;
+ break;
+ }
+ return (error);
+}
+
+static moduledata_t vmm_kmod = {
+ "vmm",
+ vmm_handler,
+ NULL
+};
+
+/*
+ * vmm initialization has the following dependencies:
+ *
+ * - HYP initialization requires smp_rendezvous() and therefore must happen
+ * after SMP is fully functional (after SI_SUB_SMP).
+ */
+DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_SMP + 1, SI_ORDER_ANY);
+MODULE_VERSION(vmm, 1);
+
+static void
+vm_init(struct vm *vm, bool create)
+{
+ int i;
+
+ vm->cookie = vmmops_init(vm, vmspace_pmap(vm->vmspace));
+ MPASS(vm->cookie != NULL);
+
+ CPU_ZERO(&vm->active_cpus);
+ CPU_ZERO(&vm->debug_cpus);
+
+ vm->suspend = 0;
+ CPU_ZERO(&vm->suspended_cpus);
+
+ memset(vm->mmio_region, 0, sizeof(vm->mmio_region));
+ memset(vm->special_reg, 0, sizeof(vm->special_reg));
+
+ if (!create) {
+ for (i = 0; i < vm->maxcpus; i++) {
+ if (vm->vcpu[i] != NULL)
+ vcpu_init(vm->vcpu[i]);
+ }
+ }
+}
+
+struct vcpu *
+vm_alloc_vcpu(struct vm *vm, int vcpuid)
+{
+ struct vcpu *vcpu;
+
+ if (vcpuid < 0 || vcpuid >= vm_get_maxcpus(vm))
+ return (NULL);
+
+ /* Some interrupt controllers may have a CPU limit */
+ if (vcpuid >= vgic_max_cpu_count(vm->cookie))
+ return (NULL);
+
+ vcpu = atomic_load_ptr(&vm->vcpu[vcpuid]);
+ if (__predict_true(vcpu != NULL))
+ return (vcpu);
+
+ sx_xlock(&vm->vcpus_init_lock);
+ vcpu = vm->vcpu[vcpuid];
+ if (vcpu == NULL/* && !vm->dying*/) {
+ vcpu = vcpu_alloc(vm, vcpuid);
+ vcpu_init(vcpu);
+
+ /*
+ * Ensure vCPU is fully created before updating pointer
+ * to permit unlocked reads above.
+ */
+ atomic_store_rel_ptr((uintptr_t *)&vm->vcpu[vcpuid],
+ (uintptr_t)vcpu);
+ }
+ sx_xunlock(&vm->vcpus_init_lock);
+ return (vcpu);
+}
+
+void
+vm_slock_vcpus(struct vm *vm)
+{
+ sx_slock(&vm->vcpus_init_lock);
+}
+
+void
+vm_unlock_vcpus(struct vm *vm)
+{
+ sx_unlock(&vm->vcpus_init_lock);
+}
+
+int
+vm_create(const char *name, struct vm **retvm)
+{
+ struct vm *vm;
+ struct vmspace *vmspace;
+
+ /*
+ * If vmm.ko could not be successfully initialized then don't attempt
+ * to create the virtual machine.
+ */
+ if (!vmm_initialized)
+ return (ENXIO);
+
+ if (name == NULL || strlen(name) >= VM_MAX_NAMELEN)
+ return (EINVAL);
+
+ vmspace = vmmops_vmspace_alloc(0, 1ul << 39);
+ if (vmspace == NULL)
+ return (ENOMEM);
+
+ vm = malloc(sizeof(struct vm), M_VMM, M_WAITOK | M_ZERO);
+ strcpy(vm->name, name);
+ vm->vmspace = vmspace;
+ sx_init(&vm->mem_segs_lock, "vm mem_segs");
+ sx_init(&vm->vcpus_init_lock, "vm vcpus");
+
+ vm->sockets = 1;
+ vm->cores = 1; /* XXX backwards compatibility */
+ vm->threads = 1; /* XXX backwards compatibility */
+ vm->maxcpus = vm_maxcpu;
+
+ vm->vcpu = malloc(sizeof(*vm->vcpu) * vm->maxcpus, M_VMM,
+ M_WAITOK | M_ZERO);
+
+ vm_init(vm, true);
+
+ *retvm = vm;
+ return (0);
+}
+
+void
+vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores,
+ uint16_t *threads, uint16_t *maxcpus)
+{
+ *sockets = vm->sockets;
+ *cores = vm->cores;
+ *threads = vm->threads;
+ *maxcpus = vm->maxcpus;
+}
+
+uint16_t
+vm_get_maxcpus(struct vm *vm)
+{
+ return (vm->maxcpus);
+}
+
+int
+vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores,
+ uint16_t threads, uint16_t maxcpus)
+{
+ /* Ignore maxcpus. */
+ if ((sockets * cores * threads) > vm->maxcpus)
+ return (EINVAL);
+ vm->sockets = sockets;
+ vm->cores = cores;
+ vm->threads = threads;
+ return(0);
+}
+
+static void
+vm_cleanup(struct vm *vm, bool destroy)
+{
+ struct mem_map *mm;
+ pmap_t pmap __diagused;
+ int i;
+
+ if (destroy) {
+ pmap = vmspace_pmap(vm->vmspace);
+ sched_pin();
+ PCPU_SET(curvmpmap, NULL);
+ sched_unpin();
+ CPU_FOREACH(i) {
+ MPASS(cpuid_to_pcpu[i]->pc_curvmpmap != pmap);
+ }
+ }
+
+ vgic_detach_from_vm(vm->cookie);
+
+ for (i = 0; i < vm->maxcpus; i++) {
+ if (vm->vcpu[i] != NULL)
+ vcpu_cleanup(vm->vcpu[i], destroy);
+ }
+
+ vmmops_cleanup(vm->cookie);
+
+ /*
+ * System memory is removed from the guest address space only when
+ * the VM is destroyed. This is because the mapping remains the same
+ * across VM reset.
+ *
+ * Device memory can be relocated by the guest (e.g. using PCI BARs)
+ * so those mappings are removed on a VM reset.
+ */
+ if (!destroy) {
+ for (i = 0; i < VM_MAX_MEMMAPS; i++) {
+ mm = &vm->mem_maps[i];
+ if (destroy || !sysmem_mapping(vm, mm))
+ vm_free_memmap(vm, i);
+ }
+ }
+
+ if (destroy) {
+ for (i = 0; i < VM_MAX_MEMSEGS; i++)
+ vm_free_memseg(vm, i);
+
+ vmmops_vmspace_free(vm->vmspace);
+ vm->vmspace = NULL;
+
+ for (i = 0; i < vm->maxcpus; i++)
+ free(vm->vcpu[i], M_VMM);
+ free(vm->vcpu, M_VMM);
+ sx_destroy(&vm->vcpus_init_lock);
+ sx_destroy(&vm->mem_segs_lock);
+ }
+}
+
+void
+vm_destroy(struct vm *vm)
+{
+ vm_cleanup(vm, true);
+ free(vm, M_VMM);
+}
+
+int
+vm_reinit(struct vm *vm)
+{
+ int error;
+
+ /*
+ * A virtual machine can be reset only if all vcpus are suspended.
+ */
+ if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) {
+ vm_cleanup(vm, false);
+ vm_init(vm, false);
+ error = 0;
+ } else {
+ error = EBUSY;
+ }
+
+ return (error);
+}
+
+const char *
+vm_name(struct vm *vm)
+{
+ return (vm->name);
+}
+
+void
+vm_slock_memsegs(struct vm *vm)
+{
+ sx_slock(&vm->mem_segs_lock);
+}
+
+void
+vm_xlock_memsegs(struct vm *vm)
+{
+ sx_xlock(&vm->mem_segs_lock);
+}
+
+void
+vm_unlock_memsegs(struct vm *vm)
+{
+ sx_unlock(&vm->mem_segs_lock);
+}
+
+/*
+ * Return 'true' if 'gpa' is allocated in the guest address space.
+ *
+ * This function is called in the context of a running vcpu which acts as
+ * an implicit lock on 'vm->mem_maps[]'.
+ */
+bool
+vm_mem_allocated(struct vcpu *vcpu, vm_paddr_t gpa)
+{
+ struct vm *vm = vcpu->vm;
+ struct mem_map *mm;
+ int i;
+
+#ifdef INVARIANTS
+ int hostcpu, state;
+ state = vcpu_get_state(vcpu, &hostcpu);
+ KASSERT(state == VCPU_RUNNING && hostcpu == curcpu,
+ ("%s: invalid vcpu state %d/%d", __func__, state, hostcpu));
+#endif
+
+ for (i = 0; i < VM_MAX_MEMMAPS; i++) {
+ mm = &vm->mem_maps[i];
+ if (mm->len != 0 && gpa >= mm->gpa && gpa < mm->gpa + mm->len)
+ return (true); /* 'gpa' is sysmem or devmem */
+ }
+
+ return (false);
+}
+
+int
+vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem)
+{
+ struct mem_seg *seg;
+ vm_object_t obj;
+
+ sx_assert(&vm->mem_segs_lock, SX_XLOCKED);
+
+ if (ident < 0 || ident >= VM_MAX_MEMSEGS)
+ return (EINVAL);
+
+ if (len == 0 || (len & PAGE_MASK))
+ return (EINVAL);
+
+ seg = &vm->mem_segs[ident];
+ if (seg->object != NULL) {
+ if (seg->len == len && seg->sysmem == sysmem)
+ return (EEXIST);
+ else
+ return (EINVAL);
+ }
+
+ obj = vm_object_allocate(OBJT_DEFAULT, len >> PAGE_SHIFT);
+ if (obj == NULL)
+ return (ENOMEM);
+
+ seg->len = len;
+ seg->object = obj;
+ seg->sysmem = sysmem;
+ return (0);
+}
+
+int
+vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem,
+ vm_object_t *objptr)
+{
+ struct mem_seg *seg;
+
+ sx_assert(&vm->mem_segs_lock, SX_LOCKED);
+
+ if (ident < 0 || ident >= VM_MAX_MEMSEGS)
+ return (EINVAL);
+
+ seg = &vm->mem_segs[ident];
+ if (len)
+ *len = seg->len;
+ if (sysmem)
+ *sysmem = seg->sysmem;
+ if (objptr)
+ *objptr = seg->object;
+ return (0);
+}
+
+void
+vm_free_memseg(struct vm *vm, int ident)
+{
+ struct mem_seg *seg;
+
+ KASSERT(ident >= 0 && ident < VM_MAX_MEMSEGS,
+ ("%s: invalid memseg ident %d", __func__, ident));
+
+ seg = &vm->mem_segs[ident];
+ if (seg->object != NULL) {
+ vm_object_deallocate(seg->object);
+ bzero(seg, sizeof(struct mem_seg));
+ }
+}
+
+int
+vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t first,
+ size_t len, int prot, int flags)
+{
+ struct mem_seg *seg;
+ struct mem_map *m, *map;
+ vm_ooffset_t last;
+ int i, error;
+
+ if (prot == 0 || (prot & ~(VM_PROT_ALL)) != 0)
+ return (EINVAL);
+
+ if (flags & ~VM_MEMMAP_F_WIRED)
+ return (EINVAL);
+
+ if (segid < 0 || segid >= VM_MAX_MEMSEGS)
+ return (EINVAL);
+
+ seg = &vm->mem_segs[segid];
+ if (seg->object == NULL)
+ return (EINVAL);
+
+ last = first + len;
+ if (first < 0 || first >= last || last > seg->len)
+ return (EINVAL);
+
+ if ((gpa | first | last) & PAGE_MASK)
+ return (EINVAL);
+
+ map = NULL;
+ for (i = 0; i < VM_MAX_MEMMAPS; i++) {
+ m = &vm->mem_maps[i];
+ if (m->len == 0) {
+ map = m;
+ break;
+ }
+ }
+
+ if (map == NULL)
+ return (ENOSPC);
+
+ error = vm_map_find(&vm->vmspace->vm_map, seg->object, first, &gpa,
+ len, 0, VMFS_NO_SPACE, prot, prot, 0);
+ if (error != KERN_SUCCESS)
+ return (EFAULT);
+
+ vm_object_reference(seg->object);
+
+ if (flags & VM_MEMMAP_F_WIRED) {
+ error = vm_map_wire(&vm->vmspace->vm_map, gpa, gpa + len,
+ VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
+ if (error != KERN_SUCCESS) {
+ vm_map_remove(&vm->vmspace->vm_map, gpa, gpa + len);
+ return (error == KERN_RESOURCE_SHORTAGE ? ENOMEM :
+ EFAULT);
+ }
+ }
+
+ map->gpa = gpa;
+ map->len = len;
+ map->segoff = first;
+ map->segid = segid;
+ map->prot = prot;
+ map->flags = flags;
+ return (0);
+}
+
+int
+vm_munmap_memseg(struct vm *vm, vm_paddr_t gpa, size_t len)
+{
+ struct mem_map *m;
+ int i;
+
+ for (i = 0; i < VM_MAX_MEMMAPS; i++) {
+ m = &vm->mem_maps[i];
+ if (m->gpa == gpa && m->len == len) {
+ vm_free_memmap(vm, i);
+ return (0);
+ }
+ }
+
+ return (EINVAL);
+}
+
+int
+vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid,
+ vm_ooffset_t *segoff, size_t *len, int *prot, int *flags)
+{
+ struct mem_map *mm, *mmnext;
+ int i;
+
+ mmnext = NULL;
+ for (i = 0; i < VM_MAX_MEMMAPS; i++) {
+ mm = &vm->mem_maps[i];
+ if (mm->len == 0 || mm->gpa < *gpa)
+ continue;
+ if (mmnext == NULL || mm->gpa < mmnext->gpa)
+ mmnext = mm;
+ }
+
+ if (mmnext != NULL) {
+ *gpa = mmnext->gpa;
+ if (segid)
+ *segid = mmnext->segid;
+ if (segoff)
+ *segoff = mmnext->segoff;
+ if (len)
+ *len = mmnext->len;
+ if (prot)
+ *prot = mmnext->prot;
+ if (flags)
+ *flags = mmnext->flags;
+ return (0);
+ } else {
+ return (ENOENT);
+ }
+}
+
+static void
+vm_free_memmap(struct vm *vm, int ident)
+{
+ struct mem_map *mm;
+ int error __diagused;
+
+ mm = &vm->mem_maps[ident];
+ if (mm->len) {
+ error = vm_map_remove(&vm->vmspace->vm_map, mm->gpa,
+ mm->gpa + mm->len);
+ KASSERT(error == KERN_SUCCESS, ("%s: vm_map_remove error %d",
+ __func__, error));
+ bzero(mm, sizeof(struct mem_map));
+ }
+}
+
+static __inline bool
+sysmem_mapping(struct vm *vm, struct mem_map *mm)
+{
+
+ if (mm->len != 0 && vm->mem_segs[mm->segid].sysmem)
+ return (true);
+ else
+ return (false);
+}
+
+vm_paddr_t
+vmm_sysmem_maxaddr(struct vm *vm)
+{
+ struct mem_map *mm;
+ vm_paddr_t maxaddr;
+ int i;
+
+ maxaddr = 0;
+ for (i = 0; i < VM_MAX_MEMMAPS; i++) {
+ mm = &vm->mem_maps[i];
+ if (sysmem_mapping(vm, mm)) {
+ if (maxaddr < mm->gpa + mm->len)
+ maxaddr = mm->gpa + mm->len;
+ }
+ }
+ return (maxaddr);
+}
+
+int
+vm_gla2gpa_nofault(struct vcpu *vcpu, struct vm_guest_paging *paging,
+ uint64_t gla, int prot, uint64_t *gpa, int *is_fault)
+{
+
+ vmmops_gla2gpa(vcpu->cookie, paging, gla, prot, gpa, is_fault);
+ return (0);
+}
+
+static int
+vmm_reg_raz(struct vcpu *vcpu, uint64_t *rval, void *arg)
+{
+ *rval = 0;
+ return (0);
+}
+
+static int
+vmm_reg_read_arg(struct vcpu *vcpu, uint64_t *rval, void *arg)
+{
+ *rval = *(uint64_t *)arg;
+ return (0);
+}
+
+static int
+vmm_reg_wi(struct vcpu *vcpu, uint64_t wval, void *arg)
+{
+ return (0);
+}
+
+static const struct vmm_special_reg vmm_special_regs[] = {
+#define SPECIAL_REG(_reg, _read, _write) \
+ { \
+ .esr_iss = ((_reg ## _op0) << ISS_MSR_OP0_SHIFT) | \
+ ((_reg ## _op1) << ISS_MSR_OP1_SHIFT) | \
+ ((_reg ## _CRn) << ISS_MSR_CRn_SHIFT) | \
+ ((_reg ## _CRm) << ISS_MSR_CRm_SHIFT) | \
+ ((_reg ## _op2) << ISS_MSR_OP2_SHIFT), \
+ .esr_mask = ISS_MSR_REG_MASK, \
+ .reg_read = (_read), \
+ .reg_write = (_write), \
+ .arg = NULL, \
+ }
+#define ID_SPECIAL_REG(_reg, _name) \
+ { \
+ .esr_iss = ((_reg ## _op0) << ISS_MSR_OP0_SHIFT) | \
+ ((_reg ## _op1) << ISS_MSR_OP1_SHIFT) | \
+ ((_reg ## _CRn) << ISS_MSR_CRn_SHIFT) | \
+ ((_reg ## _CRm) << ISS_MSR_CRm_SHIFT) | \
+ ((_reg ## _op2) << ISS_MSR_OP2_SHIFT), \
+ .esr_mask = ISS_MSR_REG_MASK, \
+ .reg_read = vmm_reg_read_arg, \
+ .reg_write = vmm_reg_wi, \
+ .arg = &(vmm_arch_regs._name), \
+ }
+
+ /* ID registers */
+ ID_SPECIAL_REG(ID_AA64PFR0_EL1, id_aa64pfr0),
+ ID_SPECIAL_REG(ID_AA64DFR0_EL1, id_aa64dfr0),
+ ID_SPECIAL_REG(ID_AA64ISAR0_EL1, id_aa64isar0),
+ ID_SPECIAL_REG(ID_AA64MMFR0_EL1, id_aa64mmfr0),
+ ID_SPECIAL_REG(ID_AA64MMFR1_EL1, id_aa64mmfr1),
+
+ /*
+ * All other ID registers are read as zero.
+ * They are all in the op0=3, op1=0, CRn=0, CRm={0..7} space.
+ */
+ {
+ .esr_iss = (3 << ISS_MSR_OP0_SHIFT) |
+ (0 << ISS_MSR_OP1_SHIFT) |
+ (0 << ISS_MSR_CRn_SHIFT) |
+ (0 << ISS_MSR_CRm_SHIFT),
+ .esr_mask = ISS_MSR_OP0_MASK | ISS_MSR_OP1_MASK |
+ ISS_MSR_CRn_MASK | (0x8 << ISS_MSR_CRm_SHIFT),
+ .reg_read = vmm_reg_raz,
+ .reg_write = vmm_reg_wi,
+ .arg = NULL,
+ },
+
+ /* Counter physical registers */
+ SPECIAL_REG(CNTP_CTL_EL0, vtimer_phys_ctl_read, vtimer_phys_ctl_write),
+ SPECIAL_REG(CNTP_CVAL_EL0, vtimer_phys_cval_read,
+ vtimer_phys_cval_write),
+ SPECIAL_REG(CNTP_TVAL_EL0, vtimer_phys_tval_read,
+ vtimer_phys_tval_write),
+ SPECIAL_REG(CNTPCT_EL0, vtimer_phys_cnt_read, vtimer_phys_cnt_write),
+#undef SPECIAL_REG
+};
+
+void
+vm_register_reg_handler(struct vm *vm, uint64_t iss, uint64_t mask,
+ reg_read_t reg_read, reg_write_t reg_write, void *arg)
+{
+ int i;
+
+ for (i = 0; i < nitems(vm->special_reg); i++) {
+ if (vm->special_reg[i].esr_iss == 0 &&
+ vm->special_reg[i].esr_mask == 0) {
+ vm->special_reg[i].esr_iss = iss;
+ vm->special_reg[i].esr_mask = mask;
+ vm->special_reg[i].reg_read = reg_read;
+ vm->special_reg[i].reg_write = reg_write;
+ vm->special_reg[i].arg = arg;
+ return;
+ }
+ }
+
+ panic("%s: No free special register slot", __func__);
+}
+
+void
+vm_deregister_reg_handler(struct vm *vm, uint64_t iss, uint64_t mask)
+{
+ int i;
+
+ for (i = 0; i < nitems(vm->special_reg); i++) {
+ if (vm->special_reg[i].esr_iss == iss &&
+ vm->special_reg[i].esr_mask == mask) {
+ memset(&vm->special_reg[i], 0,
+ sizeof(vm->special_reg[i]));
+ return;
+ }
+ }
+
+ panic("%s: Invalid special register: iss %lx mask %lx", __func__, iss,
+ mask);
+}
+
+static int
+vm_handle_reg_emul(struct vcpu *vcpu, bool *retu)
+{
+ struct vm *vm;
+ struct vm_exit *vme;
+ struct vre *vre;
+ int i, rv;
+
+ vm = vcpu->vm;
+ vme = &vcpu->exitinfo;
+ vre = &vme->u.reg_emul.vre;
+
+ for (i = 0; i < nitems(vm->special_reg); i++) {
+ if (vm->special_reg[i].esr_iss == 0 &&
+ vm->special_reg[i].esr_mask == 0)
+ continue;
+
+ if ((vre->inst_syndrome & vm->special_reg[i].esr_mask) ==
+ vm->special_reg[i].esr_iss) {
+ rv = vmm_emulate_register(vcpu, vre,
+ vm->special_reg[i].reg_read,
+ vm->special_reg[i].reg_write,
+ vm->special_reg[i].arg);
+ if (rv == 0) {
+ *retu = false;
+ }
+ return (rv);
+ }
+ }
+ for (i = 0; i < nitems(vmm_special_regs); i++) {
+ if ((vre->inst_syndrome & vmm_special_regs[i].esr_mask) ==
+ vmm_special_regs[i].esr_iss) {
+ rv = vmm_emulate_register(vcpu, vre,
+ vmm_special_regs[i].reg_read,
+ vmm_special_regs[i].reg_write,
+ vmm_special_regs[i].arg);
+ if (rv == 0) {
+ *retu = false;
+ }
+ return (rv);
+ }
+ }
+
+
+ *retu = true;
+ return (0);
+}
+
+void
+vm_register_inst_handler(struct vm *vm, uint64_t start, uint64_t size,
+ mem_region_read_t mmio_read, mem_region_write_t mmio_write)
+{
+ int i;
+
+ for (i = 0; i < nitems(vm->mmio_region); i++) {
+ if (vm->mmio_region[i].start == 0 &&
+ vm->mmio_region[i].end == 0) {
+ vm->mmio_region[i].start = start;
+ vm->mmio_region[i].end = start + size;
+ vm->mmio_region[i].read = mmio_read;
+ vm->mmio_region[i].write = mmio_write;
+ return;
+ }
+ }
+
+ panic("%s: No free MMIO region", __func__);
+}
+
+void
+vm_deregister_inst_handler(struct vm *vm, uint64_t start, uint64_t size)
+{
+ int i;
+
+ for (i = 0; i < nitems(vm->mmio_region); i++) {
+ if (vm->mmio_region[i].start == start &&
+ vm->mmio_region[i].end == start + size) {
+ memset(&vm->mmio_region[i], 0,
+ sizeof(vm->mmio_region[i]));
+ return;
+ }
+ }
+
+ panic("%s: Invalid MMIO region: %lx - %lx", __func__, start,
+ start + size);
+}
+
+static int
+vm_handle_inst_emul(struct vcpu *vcpu, bool *retu)
+{
+ struct vm *vm;
+ struct vm_exit *vme;
+ struct vie *vie;
+ struct hyp *hyp;
+ uint64_t fault_ipa;
+ struct vm_guest_paging *paging;
+ struct vmm_mmio_region *vmr;
+ int error, i;
+
+ vm = vcpu->vm;
+ hyp = vm->cookie;
+ if (!hyp->vgic_attached)
+ goto out_user;
+
+ vme = &vcpu->exitinfo;
+ vie = &vme->u.inst_emul.vie;
+ paging = &vme->u.inst_emul.paging;
+
+ fault_ipa = vme->u.inst_emul.gpa;
+
+ vmr = NULL;
+ for (i = 0; i < nitems(vm->mmio_region); i++) {
+ if (vm->mmio_region[i].start <= fault_ipa &&
+ vm->mmio_region[i].end > fault_ipa) {
+ vmr = &vm->mmio_region[i];
+ break;
+ }
+ }
+ if (vmr == NULL)
+ goto out_user;
+
+ error = vmm_emulate_instruction(vcpu, fault_ipa, vie, paging,
+ vmr->read, vmr->write, retu);
+ return (error);
+
+out_user:
+ *retu = true;
+ return (0);
+}
+
+int
+vm_suspend(struct vm *vm, enum vm_suspend_how how)
+{
+ int i;
+
+ if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST)
+ return (EINVAL);
+
+ if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) {
+ VM_CTR2(vm, "virtual machine already suspended %d/%d",
+ vm->suspend, how);
+ return (EALREADY);
+ }
+
+ VM_CTR1(vm, "virtual machine successfully suspended %d", how);
+
+ /*
+ * Notify all active vcpus that they are now suspended.
+ */
+ for (i = 0; i < vm->maxcpus; i++) {
+ if (CPU_ISSET(i, &vm->active_cpus))
+ vcpu_notify_event(vm_vcpu(vm, i));
+ }
+
+ return (0);
+}
+
+void
+vm_exit_suspended(struct vcpu *vcpu, uint64_t pc)
+{
+ struct vm *vm = vcpu->vm;
+ struct vm_exit *vmexit;
+
+ KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST,
+ ("vm_exit_suspended: invalid suspend type %d", vm->suspend));
+
+ vmexit = vm_exitinfo(vcpu);
+ vmexit->pc = pc;
+ vmexit->inst_length = 4;
+ vmexit->exitcode = VM_EXITCODE_SUSPENDED;
+ vmexit->u.suspended.how = vm->suspend;
+}
+
+void
+vm_exit_debug(struct vcpu *vcpu, uint64_t pc)
+{
+ struct vm_exit *vmexit;
+
+ vmexit = vm_exitinfo(vcpu);
+ vmexit->pc = pc;
+ vmexit->inst_length = 4;
+ vmexit->exitcode = VM_EXITCODE_DEBUG;
+}
+
+int
+vm_activate_cpu(struct vcpu *vcpu)
+{
+ struct vm *vm = vcpu->vm;
+
+ if (CPU_ISSET(vcpu->vcpuid, &vm->active_cpus))
+ return (EBUSY);
+
+ CPU_SET_ATOMIC(vcpu->vcpuid, &vm->active_cpus);
+ return (0);
+
+}
+
+int
+vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu)
+{
+ if (vcpu == NULL) {
+ vm->debug_cpus = vm->active_cpus;
+ for (int i = 0; i < vm->maxcpus; i++) {
+ if (CPU_ISSET(i, &vm->active_cpus))
+ vcpu_notify_event(vm_vcpu(vm, i));
+ }
+ } else {
+ if (!CPU_ISSET(vcpu->vcpuid, &vm->active_cpus))
+ return (EINVAL);
+
+ CPU_SET_ATOMIC(vcpu->vcpuid, &vm->debug_cpus);
+ vcpu_notify_event(vcpu);
+ }
+ return (0);
+}
+
+int
+vm_resume_cpu(struct vm *vm, struct vcpu *vcpu)
+{
+
+ if (vcpu == NULL) {
+ CPU_ZERO(&vm->debug_cpus);
+ } else {
+ if (!CPU_ISSET(vcpu->vcpuid, &vm->debug_cpus))
+ return (EINVAL);
+
+ CPU_CLR_ATOMIC(vcpu->vcpuid, &vm->debug_cpus);
+ }
+ return (0);
+}
+
+int
+vcpu_debugged(struct vcpu *vcpu)
+{
+
+ return (CPU_ISSET(vcpu->vcpuid, &vcpu->vm->debug_cpus));
+}
+
+cpuset_t
+vm_active_cpus(struct vm *vm)
+{
+
+ return (vm->active_cpus);
+}
+
+cpuset_t
+vm_debug_cpus(struct vm *vm)
+{
+
+ return (vm->debug_cpus);
+}
+
+cpuset_t
+vm_suspended_cpus(struct vm *vm)
+{
+
+ return (vm->suspended_cpus);
+}
+
+
+void *
+vcpu_stats(struct vcpu *vcpu)
+{
+
+ return (vcpu->stats);
+}
+
+/*
+ * This function is called to ensure that a vcpu "sees" a pending event
+ * as soon as possible:
+ * - If the vcpu thread is sleeping then it is woken up.
+ * - If the vcpu is running on a different host_cpu then an IPI will be directed
+ * to the host_cpu to cause the vcpu to trap into the hypervisor.
+ */
+static void
+vcpu_notify_event_locked(struct vcpu *vcpu)
+{
+ int hostcpu;
+
+ hostcpu = vcpu->hostcpu;
+ if (vcpu->state == VCPU_RUNNING) {
+ KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu"));
+ if (hostcpu != curcpu) {
+ ipi_cpu(hostcpu, vmm_ipinum);
+ } else {
+ /*
+ * If the 'vcpu' is running on 'curcpu' then it must
+ * be sending a notification to itself (e.g. SELF_IPI).
+ * The pending event will be picked up when the vcpu
+ * transitions back to guest context.
+ */
+ }
+ } else {
+ KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent "
+ "with hostcpu %d", vcpu->state, hostcpu));
+ if (vcpu->state == VCPU_SLEEPING)
+ wakeup_one(vcpu);
+ }
+}
+
+void
+vcpu_notify_event(struct vcpu *vcpu)
+{
+ vcpu_lock(vcpu);
+ vcpu_notify_event_locked(vcpu);
+ vcpu_unlock(vcpu);
+}
+
+static void
+restore_guest_fpustate(struct vcpu *vcpu)
+{
+
+ /* flush host state to the pcb */
+ vfp_save_state(curthread, curthread->td_pcb);
+ /* Ensure the VFP state will be re-loaded when exiting the guest */
+ PCPU_SET(fpcurthread, NULL);
+
+ /* restore guest FPU state */
+ vfp_enable();
+ vfp_restore(vcpu->guestfpu);
+
+ /*
+ * The FPU is now "dirty" with the guest's state so turn on emulation
+ * to trap any access to the FPU by the host.
+ */
+ vfp_disable();
+}
+
+static void
+save_guest_fpustate(struct vcpu *vcpu)
+{
+ if ((READ_SPECIALREG(cpacr_el1) & CPACR_FPEN_MASK) !=
+ CPACR_FPEN_TRAP_ALL1)
+ panic("VFP not enabled in host!");
+
+ /* save guest FPU state */
+ vfp_enable();
+ vfp_store(vcpu->guestfpu);
+ vfp_disable();
+
+ KASSERT(PCPU_GET(fpcurthread) == NULL,
+ ("%s: fpcurthread set with guest registers", __func__));
+}
+static int
+vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate,
+ bool from_idle)
+{
+ int error;
+
+ vcpu_assert_locked(vcpu);
+
+ /*
+ * State transitions from the vmmdev_ioctl() must always begin from
+ * the VCPU_IDLE state. This guarantees that there is only a single
+ * ioctl() operating on a vcpu at any point.
+ */
+ if (from_idle) {
+ while (vcpu->state != VCPU_IDLE) {
+ vcpu_notify_event_locked(vcpu);
+ msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz);
+ }
+ } else {
+ KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from "
+ "vcpu idle state"));
+ }
+
+ if (vcpu->state == VCPU_RUNNING) {
+ KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d "
+ "mismatch for running vcpu", curcpu, vcpu->hostcpu));
+ } else {
+ KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a "
+ "vcpu that is not running", vcpu->hostcpu));
+ }
+
+ /*
+ * The following state transitions are allowed:
+ * IDLE -> FROZEN -> IDLE
+ * FROZEN -> RUNNING -> FROZEN
+ * FROZEN -> SLEEPING -> FROZEN
+ */
+ switch (vcpu->state) {
+ case VCPU_IDLE:
+ case VCPU_RUNNING:
+ case VCPU_SLEEPING:
+ error = (newstate != VCPU_FROZEN);
+ break;
+ case VCPU_FROZEN:
+ error = (newstate == VCPU_FROZEN);
+ break;
+ default:
+ error = 1;
+ break;
+ }
+
+ if (error)
+ return (EBUSY);
+
+ vcpu->state = newstate;
+ if (newstate == VCPU_RUNNING)
+ vcpu->hostcpu = curcpu;
+ else
+ vcpu->hostcpu = NOCPU;
+
+ if (newstate == VCPU_IDLE)
+ wakeup(&vcpu->state);
+
+ return (0);
+}
+
+static void
+vcpu_require_state(struct vcpu *vcpu, enum vcpu_state newstate)
+{
+ int error;
+
+ if ((error = vcpu_set_state(vcpu, newstate, false)) != 0)
+ panic("Error %d setting state to %d\n", error, newstate);
+}
+
+static void
+vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate)
+{
+ int error;
+
+ if ((error = vcpu_set_state_locked(vcpu, newstate, false)) != 0)
+ panic("Error %d setting state to %d", error, newstate);
+}
+
+int
+vm_get_capability(struct vcpu *vcpu, int type, int *retval)
+{
+ if (type < 0 || type >= VM_CAP_MAX)
+ return (EINVAL);
+
+ return (vmmops_getcap(vcpu->cookie, type, retval));
+}
+
+int
+vm_set_capability(struct vcpu *vcpu, int type, int val)
+{
+ if (type < 0 || type >= VM_CAP_MAX)
+ return (EINVAL);
+
+ return (vmmops_setcap(vcpu->cookie, type, val));
+}
+
+struct vm *
+vcpu_vm(struct vcpu *vcpu)
+{
+ return (vcpu->vm);
+}
+
+int
+vcpu_vcpuid(struct vcpu *vcpu)
+{
+ return (vcpu->vcpuid);
+}
+
+void *
+vcpu_get_cookie(struct vcpu *vcpu)
+{
+ return (vcpu->cookie);
+}
+
+struct vcpu *
+vm_vcpu(struct vm *vm, int vcpuid)
+{
+ return (vm->vcpu[vcpuid]);
+}
+
+int
+vcpu_set_state(struct vcpu *vcpu, enum vcpu_state newstate, bool from_idle)
+{
+ int error;
+
+ vcpu_lock(vcpu);
+ error = vcpu_set_state_locked(vcpu, newstate, from_idle);
+ vcpu_unlock(vcpu);
+
+ return (error);
+}
+
+enum vcpu_state
+vcpu_get_state(struct vcpu *vcpu, int *hostcpu)
+{
+ enum vcpu_state state;
+
+ vcpu_lock(vcpu);
+ state = vcpu->state;
+ if (hostcpu != NULL)
+ *hostcpu = vcpu->hostcpu;
+ vcpu_unlock(vcpu);
+
+ return (state);
+}
+
+static void *
+_vm_gpa_hold(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot,
+ void **cookie)
+{
+ int i, count, pageoff;
+ struct mem_map *mm;
+ vm_page_t m;
+
+ pageoff = gpa & PAGE_MASK;
+ if (len > PAGE_SIZE - pageoff)
+ panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len);
+
+ count = 0;
+ for (i = 0; i < VM_MAX_MEMMAPS; i++) {
+ mm = &vm->mem_maps[i];
+ if (sysmem_mapping(vm, mm) && gpa >= mm->gpa &&
+ gpa < mm->gpa + mm->len) {
+ count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map,
+ trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1);
+ break;
+ }
+ }
+
+ if (count == 1) {
+ *cookie = m;
+ return ((void *)(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)) + pageoff));
+ } else {
+ *cookie = NULL;
+ return (NULL);
+ }
+}
+
+void *
+vm_gpa_hold(struct vcpu *vcpu, vm_paddr_t gpa, size_t len, int reqprot,
+ void **cookie)
+{
+#ifdef INVARIANTS
+ /*
+ * The current vcpu should be frozen to ensure 'vm_memmap[]'
+ * stability.
+ */
+ int state = vcpu_get_state(vcpu, NULL);
+ KASSERT(state == VCPU_FROZEN, ("%s: invalid vcpu state %d",
+ __func__, state));
+#endif
+ return (_vm_gpa_hold(vcpu->vm, gpa, len, reqprot, cookie));
+}
+
+void *
+vm_gpa_hold_global(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot,
+ void **cookie)
+{
+ sx_assert(&vm->mem_segs_lock, SX_LOCKED);
+ return (_vm_gpa_hold(vm, gpa, len, reqprot, cookie));
+}
+
+void
+vm_gpa_release(void *cookie)
+{
+ vm_page_t m = cookie;
+
+ vm_page_unwire(m, PQ_ACTIVE);
+}
+
+int
+vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval)
+{
+
+ if (reg >= VM_REG_LAST)
+ return (EINVAL);
+
+ return (vmmops_getreg(vcpu->cookie, reg, retval));
+}
+
+int
+vm_set_register(struct vcpu *vcpu, int reg, uint64_t val)
+{
+ int error;
+
+ if (reg >= VM_REG_LAST)
+ return (EINVAL);
+ error = vmmops_setreg(vcpu->cookie, reg, val);
+ if (error || reg != VM_REG_GUEST_PC)
+ return (error);
+
+ vcpu->nextpc = val;
+
+ return (0);
+}
+
+void *
+vm_get_cookie(struct vm *vm)
+{
+ return (vm->cookie);
+}
+
+int
+vm_inject_exception(struct vcpu *vcpu, uint64_t esr, uint64_t far)
+{
+ return (vmmops_exception(vcpu->cookie, esr, far));
+}
+
+int
+vm_attach_vgic(struct vm *vm, struct vm_vgic_descr *descr)
+{
+ return (vgic_attach_to_vm(vm->cookie, descr));
+}
+
+int
+vm_assert_irq(struct vm *vm, uint32_t irq)
+{
+ return (vgic_inject_irq(vm->cookie, -1, irq, true));
+}
+
+int
+vm_deassert_irq(struct vm *vm, uint32_t irq)
+{
+ return (vgic_inject_irq(vm->cookie, -1, irq, false));
+}
+
+int
+vm_raise_msi(struct vm *vm, uint64_t msg, uint64_t addr, int bus, int slot,
+ int func)
+{
+ /* TODO: Should we raise an SError? */
+ return (vgic_inject_msi(vm->cookie, msg, addr));
+}
+
+static int
+vm_handle_smccc_call(struct vcpu *vcpu, struct vm_exit *vme, bool *retu)
+{
+ struct hypctx *hypctx;
+ int i;
+
+ hypctx = vcpu_get_cookie(vcpu);
+
+ if ((hypctx->tf.tf_esr & ESR_ELx_ISS_MASK) != 0)
+ return (1);
+
+ vme->exitcode = VM_EXITCODE_SMCCC;
+ vme->u.smccc_call.func_id = hypctx->tf.tf_x[0];
+ for (i = 0; i < nitems(vme->u.smccc_call.args); i++)
+ vme->u.smccc_call.args[i] = hypctx->tf.tf_x[i + 1];
+
+ *retu = true;
+ return (0);
+}
+
+static int
+vm_handle_wfi(struct vcpu *vcpu, struct vm_exit *vme, bool *retu)
+{
+ vcpu_lock(vcpu);
+ while (1) {
+ if (vgic_has_pending_irq(vcpu->cookie))
+ break;
+
+ if (vcpu_should_yield(vcpu))
+ break;
+
+ vcpu_require_state_locked(vcpu, VCPU_SLEEPING);
+ /*
+ * XXX msleep_spin() cannot be interrupted by signals so
+ * wake up periodically to check pending signals.
+ */
+ msleep_spin(vcpu, &vcpu->mtx, "vmidle", hz);
+ vcpu_require_state_locked(vcpu, VCPU_FROZEN);
+ }
+ vcpu_unlock(vcpu);
+
+ *retu = false;
+ return (0);
+}
+
+static int
+vm_handle_paging(struct vcpu *vcpu, bool *retu)
+{
+ struct vm *vm = vcpu->vm;
+ struct vm_exit *vme;
+ struct vm_map *map;
+ uint64_t addr, esr;
+ pmap_t pmap;
+ int ftype, rv;
+
+ vme = &vcpu->exitinfo;
+
+ pmap = vmspace_pmap(vcpu->vm->vmspace);
+ addr = vme->u.paging.gpa;
+ esr = vme->u.paging.esr;
+
+ /* The page exists, but the page table needs to be updated. */
+ if (pmap_fault(pmap, esr, addr) == KERN_SUCCESS)
+ return (0);
+
+ switch (ESR_ELx_EXCEPTION(esr)) {
+ case EXCP_INSN_ABORT_L:
+ case EXCP_DATA_ABORT_L:
+ ftype = VM_PROT_EXECUTE | VM_PROT_READ | VM_PROT_WRITE;
+ break;
+ default:
+ panic("%s: Invalid exception (esr = %lx)", __func__, esr);
+ }
+
+ map = &vm->vmspace->vm_map;
+ rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL, NULL);
+ if (rv != KERN_SUCCESS)
+ return (EFAULT);
+
+ return (0);
+}
+
+int
+vm_run(struct vcpu *vcpu)
+{
+ struct vm *vm = vcpu->vm;
+ struct vm_eventinfo evinfo;
+ int error, vcpuid;
+ struct vm_exit *vme;
+ bool retu;
+ pmap_t pmap;
+
+ vcpuid = vcpu->vcpuid;
+
+ if (!CPU_ISSET(vcpuid, &vm->active_cpus))
+ return (EINVAL);
+
+ if (CPU_ISSET(vcpuid, &vm->suspended_cpus))
+ return (EINVAL);
+
+ pmap = vmspace_pmap(vm->vmspace);
+ vme = &vcpu->exitinfo;
+ evinfo.rptr = NULL;
+ evinfo.sptr = &vm->suspend;
+ evinfo.iptr = NULL;
+restart:
+ critical_enter();
+
+ restore_guest_fpustate(vcpu);
+
+ vcpu_require_state(vcpu, VCPU_RUNNING);
+ error = vmmops_run(vcpu->cookie, vcpu->nextpc, pmap, &evinfo);
+ vcpu_require_state(vcpu, VCPU_FROZEN);
+
+ save_guest_fpustate(vcpu);
+
+ critical_exit();
+
+ if (error == 0) {
+ retu = false;
+ switch (vme->exitcode) {
+ case VM_EXITCODE_INST_EMUL:
+ vcpu->nextpc = vme->pc + vme->inst_length;
+ error = vm_handle_inst_emul(vcpu, &retu);
+ break;
+
+ case VM_EXITCODE_REG_EMUL:
+ vcpu->nextpc = vme->pc + vme->inst_length;
+ error = vm_handle_reg_emul(vcpu, &retu);
+ break;
+
+ case VM_EXITCODE_HVC:
+ /*
+ * The HVC instruction saves the address for the
+ * next instruction as the return address.
+ */
+ vcpu->nextpc = vme->pc;
+ /*
+ * The PSCI call can change the exit information in the
+ * case of suspend/reset/poweroff/cpu off/cpu on.
+ */
+ error = vm_handle_smccc_call(vcpu, vme, &retu);
+ break;
+
+ case VM_EXITCODE_WFI:
+ vcpu->nextpc = vme->pc + vme->inst_length;
+ error = vm_handle_wfi(vcpu, vme, &retu);
+ break;
+
+ case VM_EXITCODE_PAGING:
+ vcpu->nextpc = vme->pc;
+ error = vm_handle_paging(vcpu, &retu);
+ break;
+
+ default:
+ /* Handle in userland */
+ vcpu->nextpc = vme->pc;
+ retu = true;
+ break;
+ }
+ }
+
+ if (error == 0 && retu == false)
+ goto restart;
+
+ return (error);
+}
diff --git a/sys/arm64/vmm/vmm_arm64.c b/sys/arm64/vmm/vmm_arm64.c
new file mode 100644
--- /dev/null
+++ b/sys/arm64/vmm/vmm_arm64.c
@@ -0,0 +1,1337 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/smp.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/mman.h>
+#include <sys/pcpu.h>
+#include <sys/proc.h>
+#include <sys/sysctl.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/vmem.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_map.h>
+#include <vm/vm_page.h>
+#include <vm/vm_param.h>
+
+#include <machine/armreg.h>
+#include <machine/vm.h>
+#include <machine/cpufunc.h>
+#include <machine/cpu.h>
+#include <machine/machdep.h>
+#include <machine/vmm.h>
+#include <machine/vmm_dev.h>
+#include <machine/atomic.h>
+#include <machine/hypervisor.h>
+#include <machine/pmap.h>
+
+#include "mmu.h"
+#include "arm64.h"
+#include "hyp.h"
+#include "reset.h"
+#include "io/vgic.h"
+#include "io/vgic_v3.h"
+#include "io/vtimer.h"
+#include "vmm_stat.h"
+
+#define HANDLED 1
+#define UNHANDLED 0
+
+/* Number of bits in an EL2 virtual address */
+#define EL2_VIRT_BITS 48
+CTASSERT((1ul << EL2_VIRT_BITS) >= HYP_VM_MAX_ADDRESS);
+
+/* TODO: Move the host hypctx off the stack */
+#define VMM_STACK_PAGES 4
+#define VMM_STACK_SIZE (VMM_STACK_PAGES * PAGE_SIZE)
+
+static int vmm_pmap_levels, vmm_virt_bits, vmm_max_ipa_bits;
+
+/* Register values passed to arm_setup_vectors to set in the hypervisor */
+struct vmm_init_regs {
+ uint64_t tcr_el2;
+ uint64_t vtcr_el2;
+};
+
+MALLOC_DEFINE(M_HYP, "ARM VMM HYP", "ARM VMM HYP");
+
+extern char hyp_init_vectors[];
+extern char hyp_vectors[];
+extern char hyp_stub_vectors[];
+
+static vm_paddr_t hyp_code_base;
+static size_t hyp_code_len;
+
+static char *stack[MAXCPU];
+static vm_offset_t stack_hyp_va[MAXCPU];
+
+static vmem_t *el2_mem_alloc;
+
+static void arm_setup_vectors(void *arg);
+static void vmm_pmap_clean_stage2_tlbi(void);
+static void vmm_pmap_invalidate_range(uint64_t, vm_offset_t, vm_offset_t, bool);
+static void vmm_pmap_invalidate_all(uint64_t);
+
+DPCPU_DEFINE_STATIC(struct hypctx *, vcpu);
+
+static inline void
+arm64_set_active_vcpu(struct hypctx *hypctx)
+{
+ DPCPU_SET(vcpu, hypctx);
+}
+
+struct hypctx *
+arm64_get_active_vcpu(void)
+{
+ return (DPCPU_GET(vcpu));
+}
+
+static void
+arm_setup_vectors(void *arg)
+{
+ struct vmm_init_regs *el2_regs;
+ uintptr_t stack_top;
+ uint32_t sctlr_el2;
+ register_t daif;
+
+ el2_regs = arg;
+ arm64_set_active_vcpu(NULL);
+
+ daif = intr_disable();
+
+ /*
+ * Install the temporary vectors which will be responsible for
+ * initializing the VMM when we next trap into EL2.
+ *
+ * x0: the exception vector table responsible for hypervisor
+ * initialization on the next call.
+ */
+ vmm_call_hyp(vtophys(&vmm_hyp_code));
+
+ /* Create and map the hypervisor stack */
+ stack_top = stack_hyp_va[PCPU_GET(cpuid)] + VMM_STACK_SIZE;
+
+ /*
+ * Configure the system control register for EL2:
+ *
+ * SCTLR_EL2_M: MMU on
+ * SCTLR_EL2_C: Data cacheability not affected
+ * SCTLR_EL2_I: Instruction cacheability not affected
+ * SCTLR_EL2_A: Instruction alignment check
+ * SCTLR_EL2_SA: Stack pointer alignment check
+ * SCTLR_EL2_WXN: Treat writable memory as execute never
+ * ~SCTLR_EL2_EE: Data accesses are little-endian
+ */
+ sctlr_el2 = SCTLR_EL2_RES1;
+ sctlr_el2 |= SCTLR_EL2_M | SCTLR_EL2_C | SCTLR_EL2_I;
+ sctlr_el2 |= SCTLR_EL2_A | SCTLR_EL2_SA;
+ sctlr_el2 |= SCTLR_EL2_WXN;
+ sctlr_el2 &= ~SCTLR_EL2_EE;
+
+ /* Special call to initialize EL2 */
+ vmm_call_hyp(vmmpmap_to_ttbr0(), stack_top, el2_regs->tcr_el2,
+ sctlr_el2, el2_regs->vtcr_el2);
+
+ intr_restore(daif);
+}
+
+static void
+arm_teardown_vectors(void *arg)
+{
+ register_t daif;
+
+ /*
+ * vmm_cleanup() will disable the MMU. For the next few instructions,
+ * before the hardware disables the MMU, one of the following is
+ * possible:
+ *
+ * a. The instruction addresses are fetched with the MMU disabled,
+ * and they must represent the actual physical addresses. This will work
+ * because we call the vmm_cleanup() function by its physical address.
+ *
+ * b. The instruction addresses are fetched using the old translation
+ * tables. This will work because we have an identity mapping in place
+ * in the translation tables and vmm_cleanup() is called by its physical
+ * address.
+ */
+ daif = intr_disable();
+ /* TODO: Invalidate the cache */
+ vmm_call_hyp(HYP_CLEANUP, vtophys(hyp_stub_vectors));
+ intr_restore(daif);
+
+ arm64_set_active_vcpu(NULL);
+}
+
+static uint64_t
+vmm_vtcr_el2_sl(u_int levels)
+{
+#if PAGE_SIZE == PAGE_SIZE_4K
+ switch (levels) {
+ case 2:
+ return (VTCR_EL2_SL0_4K_LVL2);
+ case 3:
+ return (VTCR_EL2_SL0_4K_LVL1);
+ case 4:
+ return (VTCR_EL2_SL0_4K_LVL0);
+ default:
+ panic("%s: Invalid number of page table levels %u", __func__,
+ levels);
+ }
+#elif PAGE_SIZE == PAGE_SIZE_16K
+ switch (levels) {
+ case 2:
+ return (VTCR_EL2_SL0_16K_LVL2);
+ case 3:
+ return (VTCR_EL2_SL0_16K_LVL1);
+ case 4:
+ return (VTCR_EL2_SL0_16K_LVL0);
+ default:
+ panic("%s: Invalid number of page table levels %u", __func__,
+ levels);
+ }
+#else
+#error Unsupported page size
+#endif
+}
+
+int
+vmmops_modinit(int ipinum)
+{
+ struct vmm_init_regs el2_regs;
+ vm_offset_t next_hyp_va;
+ vm_paddr_t vmm_base;
+ uint64_t id_aa64mmfr0_el1, pa_range_bits, pa_range_field;
+ uint64_t cnthctl_el2;
+ register_t daif;
+ int cpu, i;
+ bool rv __diagused;
+
+ if (!virt_enabled()) {
+ printf(
+ "vmm: Processor doesn't have support for virtualization\n");
+ return (ENXIO);
+ }
+
+ /* TODO: Support VHE */
+ if (in_vhe()) {
+ printf("vmm: VHE is unsupported\n");
+ return (ENXIO);
+ }
+
+ if (!vgic_present()) {
+ printf("vmm: No vgic found\n");
+ return (ENODEV);
+ }
+
+ if (!get_kernel_reg(ID_AA64MMFR0_EL1, &id_aa64mmfr0_el1)) {
+ printf("vmm: Unable to read ID_AA64MMFR0_EL1\n");
+ return (ENXIO);
+ }
+ pa_range_field = ID_AA64MMFR0_PARange_VAL(id_aa64mmfr0_el1);
+ /*
+ * Use 3 levels to give us up to 39 bits with 4k pages, or
+ * 47 bits with 16k pages.
+ */
+ /* TODO: Check the number of levels for 64k pages */
+ vmm_pmap_levels = 3;
+ switch (pa_range_field) {
+ case ID_AA64MMFR0_PARange_4G:
+ printf("vmm: Not enough physical address bits\n");
+ return (ENXIO);
+ case ID_AA64MMFR0_PARange_64G:
+ vmm_virt_bits = 36;
+#if PAGE_SIZE == PAGE_SIZE_16K
+ vmm_pmap_levels = 2;
+#endif
+ break;
+ default:
+ vmm_virt_bits = 39;
+ break;
+ }
+ pa_range_bits = pa_range_field >> ID_AA64MMFR0_PARange_SHIFT;
+
+ /* Initialise the EL2 MMU */
+ if (!vmmpmap_init()) {
+ printf("vmm: Failed to init the EL2 MMU\n");
+ return (ENOMEM);
+ }
+
+ /* Set up the stage 2 pmap callbacks */
+ MPASS(pmap_clean_stage2_tlbi == NULL);
+ pmap_clean_stage2_tlbi = vmm_pmap_clean_stage2_tlbi;
+ pmap_stage2_invalidate_range = vmm_pmap_invalidate_range;
+ pmap_stage2_invalidate_all = vmm_pmap_invalidate_all;
+
+ /*
+ * Create an allocator for the virtual address space used by EL2.
+ * EL2 code is identity-mapped; the allocator is used to find space for
+ * VM structures.
+ */
+ el2_mem_alloc = vmem_create("VMM EL2", 0, 0, PAGE_SIZE, 0, M_WAITOK);
+
+ /* Create the mappings for the hypervisor translation table. */
+ hyp_code_len = round_page(&vmm_hyp_code_end - &vmm_hyp_code);
+
+ /* We need an physical identity mapping for when we activate the MMU */
+ hyp_code_base = vmm_base = vtophys(&vmm_hyp_code);
+ rv = vmmpmap_enter(vmm_base, hyp_code_len, vmm_base,
+ VM_PROT_READ | VM_PROT_EXECUTE);
+ MPASS(rv);
+
+ next_hyp_va = roundup2(vmm_base + hyp_code_len, L2_SIZE);
+
+ /* Create a per-CPU hypervisor stack */
+ CPU_FOREACH(cpu) {
+ stack[cpu] = malloc(VMM_STACK_SIZE, M_HYP, M_WAITOK | M_ZERO);
+ stack_hyp_va[cpu] = next_hyp_va;
+
+ for (i = 0; i < VMM_STACK_PAGES; i++) {
+ rv = vmmpmap_enter(stack_hyp_va[cpu] + ptoa(i),
+ PAGE_SIZE, vtophys(stack[cpu] + ptoa(i)),
+ VM_PROT_READ | VM_PROT_WRITE);
+ MPASS(rv);
+ }
+ next_hyp_va += L2_SIZE;
+ }
+
+ el2_regs.tcr_el2 = TCR_EL2_RES1;
+ el2_regs.tcr_el2 |= min(pa_range_bits << TCR_EL2_PS_SHIFT,
+ TCR_EL2_PS_52BITS);
+ el2_regs.tcr_el2 |= TCR_EL2_T0SZ(64 - EL2_VIRT_BITS);
+ el2_regs.tcr_el2 |= TCR_EL2_IRGN0_WBWA | TCR_EL2_ORGN0_WBWA;
+#if PAGE_SIZE == PAGE_SIZE_4K
+ el2_regs.tcr_el2 |= TCR_EL2_TG0_4K;
+#elif PAGE_SIZE == PAGE_SIZE_16K
+ el2_regs.tcr_el2 |= TCR_EL2_TG0_16K;
+#else
+#error Unsupported page size
+#endif
+#ifdef SMP
+ el2_regs.tcr_el2 |= TCR_EL2_SH0_IS;
+#endif
+
+ switch (el2_regs.tcr_el2 & TCR_EL2_PS_MASK) {
+ case TCR_EL2_PS_32BITS:
+ vmm_max_ipa_bits = 32;
+ break;
+ case TCR_EL2_PS_36BITS:
+ vmm_max_ipa_bits = 36;
+ break;
+ case TCR_EL2_PS_40BITS:
+ vmm_max_ipa_bits = 40;
+ break;
+ case TCR_EL2_PS_42BITS:
+ vmm_max_ipa_bits = 42;
+ break;
+ case TCR_EL2_PS_44BITS:
+ vmm_max_ipa_bits = 44;
+ break;
+ case TCR_EL2_PS_48BITS:
+ vmm_max_ipa_bits = 48;
+ break;
+ case TCR_EL2_PS_52BITS:
+ default:
+ vmm_max_ipa_bits = 52;
+ break;
+ }
+
+ /*
+ * Configure the Stage 2 translation control register:
+ *
+ * VTCR_IRGN0_WBWA: Translation table walks access inner cacheable
+ * normal memory
+ * VTCR_ORGN0_WBWA: Translation table walks access outer cacheable
+ * normal memory
+ * VTCR_EL2_TG0_4K/16K: Stage 2 uses the same page size as the kernel
+ * VTCR_EL2_SL0_4K_LVL1: Stage 2 uses concatenated level 1 tables
+ * VTCR_EL2_SH0_IS: Memory associated with Stage 2 walks is inner
+ * shareable
+ */
+ el2_regs.vtcr_el2 = VTCR_EL2_RES1;
+ el2_regs.vtcr_el2 |=
+ min(pa_range_bits << VTCR_EL2_PS_SHIFT, VTCR_EL2_PS_48BIT);
+ el2_regs.vtcr_el2 |= VTCR_EL2_IRGN0_WBWA | VTCR_EL2_ORGN0_WBWA;
+ el2_regs.vtcr_el2 |= VTCR_EL2_T0SZ(64 - vmm_virt_bits);
+ el2_regs.vtcr_el2 |= vmm_vtcr_el2_sl(vmm_pmap_levels);
+#if PAGE_SIZE == PAGE_SIZE_4K
+ el2_regs.vtcr_el2 |= VTCR_EL2_TG0_4K;
+#elif PAGE_SIZE == PAGE_SIZE_16K
+ el2_regs.vtcr_el2 |= VTCR_EL2_TG0_16K;
+#else
+#error Unsupported page size
+#endif
+#ifdef SMP
+ el2_regs.vtcr_el2 |= VTCR_EL2_SH0_IS;
+#endif
+
+ smp_rendezvous(NULL, arm_setup_vectors, NULL, &el2_regs);
+
+ /* Add memory to the vmem allocator (checking there is space) */
+ if (vmm_base > (L2_SIZE + PAGE_SIZE)) {
+ /*
+ * Ensure there is an L2 block before the vmm code to check
+ * for buffer overflows on earlier data. Include the PAGE_SIZE
+ * of the minimum we can allocate.
+ */
+ vmm_base -= L2_SIZE + PAGE_SIZE;
+ vmm_base = rounddown2(vmm_base, L2_SIZE);
+
+ /*
+ * Check there is memory before the vmm code to add.
+ *
+ * Reserve the L2 block at address 0 so NULL dereference will
+ * raise an exception.
+ */
+ if (vmm_base > L2_SIZE)
+ vmem_add(el2_mem_alloc, L2_SIZE, vmm_base - L2_SIZE,
+ M_WAITOK);
+ }
+
+ /*
+ * Add the memory after the stacks. There is most of an L2 block
+ * between the last stack and the first allocation so this should
+ * be safe without adding more padding.
+ */
+ if (next_hyp_va < HYP_VM_MAX_ADDRESS - PAGE_SIZE)
+ vmem_add(el2_mem_alloc, next_hyp_va,
+ HYP_VM_MAX_ADDRESS - next_hyp_va, M_WAITOK);
+
+ daif = intr_disable();
+ cnthctl_el2 = vmm_call_hyp(HYP_READ_REGISTER, HYP_REG_CNTHCTL);
+ intr_restore(daif);
+
+ vgic_init();
+ vtimer_init(cnthctl_el2);
+
+ return (0);
+}
+
+int
+vmmops_modcleanup(void)
+{
+ int cpu;
+
+ smp_rendezvous(NULL, arm_teardown_vectors, NULL, NULL);
+
+ CPU_FOREACH(cpu) {
+ vmmpmap_remove(stack_hyp_va[cpu], VMM_STACK_PAGES * PAGE_SIZE,
+ false);
+ }
+
+ vmmpmap_remove(hyp_code_base, hyp_code_len, false);
+
+ vtimer_cleanup();
+
+ vmmpmap_fini();
+
+ CPU_FOREACH(cpu)
+ free(stack[cpu], M_HYP);
+
+ pmap_clean_stage2_tlbi = NULL;
+ pmap_stage2_invalidate_range = NULL;
+ pmap_stage2_invalidate_all = NULL;
+
+ return (0);
+}
+
+static vm_size_t
+el2_hyp_size(struct vm *vm)
+{
+ return (round_page(sizeof(struct hyp) +
+ sizeof(struct hypctx *) * vm_get_maxcpus(vm)));
+}
+
+static vm_size_t
+el2_hypctx_size(void)
+{
+ return (round_page(sizeof(struct hypctx)));
+}
+
+static vm_offset_t
+el2_map_enter(vm_offset_t data, vm_size_t size, vm_prot_t prot)
+{
+ vmem_addr_t addr;
+ int err __diagused;
+ bool rv __diagused;
+
+ err = vmem_alloc(el2_mem_alloc, size, M_NEXTFIT | M_WAITOK, &addr);
+ MPASS(err == 0);
+ rv = vmmpmap_enter(addr, size, vtophys(data), prot);
+ MPASS(rv);
+
+ return (addr);
+}
+
+void *
+vmmops_init(struct vm *vm, pmap_t pmap)
+{
+ struct hyp *hyp;
+ vm_size_t size;
+
+ size = el2_hyp_size(vm);
+ hyp = malloc_aligned(size, PAGE_SIZE, M_HYP, M_WAITOK | M_ZERO);
+
+ hyp->vm = vm;
+ hyp->vgic_attached = false;
+
+ vtimer_vminit(hyp);
+ vgic_vminit(hyp);
+
+ hyp->el2_addr = el2_map_enter((vm_offset_t)hyp, size,
+ VM_PROT_READ | VM_PROT_WRITE);
+
+ return (hyp);
+}
+
+void *
+vmmops_vcpu_init(void *vmi, struct vcpu *vcpu1, int vcpuid)
+{
+ struct hyp *hyp = vmi;
+ struct hypctx *hypctx;
+ vm_size_t size;
+
+ size = el2_hypctx_size();
+ hypctx = malloc_aligned(size, PAGE_SIZE, M_HYP, M_WAITOK | M_ZERO);
+
+ KASSERT(vcpuid >= 0 && vcpuid < vm_get_maxcpus(hyp->vm),
+ ("%s: Invalid vcpuid %d", __func__, vcpuid));
+ hyp->ctx[vcpuid] = hypctx;
+
+ hypctx->hyp = hyp;
+ hypctx->vcpu = vcpu1;
+
+ reset_vm_el01_regs(hypctx);
+ reset_vm_el2_regs(hypctx);
+
+ vtimer_cpuinit(hypctx);
+ vgic_cpuinit(hypctx);
+
+ hypctx->el2_addr = el2_map_enter((vm_offset_t)hypctx, size,
+ VM_PROT_READ | VM_PROT_WRITE);
+
+ return (hypctx);
+}
+
+static int
+arm_vmm_pinit(pmap_t pmap)
+{
+
+ pmap_pinit_stage(pmap, PM_STAGE2, vmm_pmap_levels);
+ return (1);
+}
+
+struct vmspace *
+vmmops_vmspace_alloc(vm_offset_t min, vm_offset_t max)
+{
+ return (vmspace_alloc(min, max, arm_vmm_pinit));
+}
+
+void
+vmmops_vmspace_free(struct vmspace *vmspace)
+{
+
+ pmap_remove_pages(vmspace_pmap(vmspace));
+ vmspace_free(vmspace);
+}
+
+static void
+vmm_pmap_clean_stage2_tlbi(void)
+{
+ vmm_call_hyp(HYP_CLEAN_S2_TLBI);
+}
+
+static void
+vmm_pmap_invalidate_range(uint64_t vttbr, vm_offset_t sva, vm_offset_t eva,
+ bool final_only)
+{
+ MPASS(eva > sva);
+ vmm_call_hyp(HYP_S2_TLBI_RANGE, vttbr, sva, eva, final_only);
+}
+
+static void
+vmm_pmap_invalidate_all(uint64_t vttbr)
+{
+ vmm_call_hyp(HYP_S2_TLBI_ALL, vttbr);
+}
+
+static inline void
+arm64_print_hyp_regs(struct vm_exit *vme)
+{
+ printf("esr_el2: 0x%016lx\n", vme->u.hyp.esr_el2);
+ printf("far_el2: 0x%016lx\n", vme->u.hyp.far_el2);
+ printf("hpfar_el2: 0x%016lx\n", vme->u.hyp.hpfar_el2);
+ printf("elr_el2: 0x%016lx\n", vme->pc);
+}
+
+static void
+arm64_gen_inst_emul_data(struct hypctx *hypctx, uint32_t esr_iss,
+ struct vm_exit *vme_ret)
+{
+ struct vm_guest_paging *paging;
+ struct vie *vie;
+ uint32_t esr_sas, reg_num;
+
+ /*
+ * Get the page address from HPFAR_EL2.
+ */
+ vme_ret->u.inst_emul.gpa =
+ HPFAR_EL2_FIPA_ADDR(hypctx->exit_info.hpfar_el2);
+ /* Bits [11:0] are the same as bits [11:0] from the virtual address. */
+ vme_ret->u.inst_emul.gpa += hypctx->exit_info.far_el2 &
+ FAR_EL2_HPFAR_PAGE_MASK;
+
+ esr_sas = (esr_iss & ISS_DATA_SAS_MASK) >> ISS_DATA_SAS_SHIFT;
+ reg_num = (esr_iss & ISS_DATA_SRT_MASK) >> ISS_DATA_SRT_SHIFT;
+
+ vie = &vme_ret->u.inst_emul.vie;
+ vie->access_size = 1 << esr_sas;
+ vie->sign_extend = (esr_iss & ISS_DATA_SSE) ? 1 : 0;
+ vie->dir = (esr_iss & ISS_DATA_WnR) ? VM_DIR_WRITE : VM_DIR_READ;
+ vie->reg = reg_num;
+
+ paging = &vme_ret->u.inst_emul.paging;
+ paging->ttbr0_addr = hypctx->ttbr0_el1 & ~(TTBR_ASID_MASK | TTBR_CnP);
+ paging->ttbr1_addr = hypctx->ttbr1_el1 & ~(TTBR_ASID_MASK | TTBR_CnP);
+ paging->tcr_el1 = hypctx->tcr_el1;
+ paging->tcr2_el1 = hypctx->tcr2_el1;
+ paging->flags = hypctx->tf.tf_spsr & (PSR_M_MASK | PSR_M_32);
+ if ((hypctx->sctlr_el1 & SCTLR_M) != 0)
+ paging->flags |= VM_GP_MMU_ENABLED;
+}
+
+static void
+arm64_gen_reg_emul_data(uint32_t esr_iss, struct vm_exit *vme_ret)
+{
+ uint32_t reg_num;
+ struct vre *vre;
+
+ /* u.hyp member will be replaced by u.reg_emul */
+ vre = &vme_ret->u.reg_emul.vre;
+
+ vre->inst_syndrome = esr_iss;
+ /* ARMv8 Architecture Manual, p. D7-2273: 1 means read */
+ vre->dir = (esr_iss & ISS_MSR_DIR) ? VM_DIR_READ : VM_DIR_WRITE;
+ reg_num = ISS_MSR_Rt(esr_iss);
+ vre->reg = reg_num;
+}
+
+void
+raise_data_insn_abort(struct hypctx *hypctx, uint64_t far, bool dabort, int fsc)
+{
+ uint64_t esr;
+
+ if ((hypctx->tf.tf_spsr & PSR_M_MASK) == PSR_M_EL0t)
+ esr = EXCP_INSN_ABORT_L << ESR_ELx_EC_SHIFT;
+ else
+ esr = EXCP_INSN_ABORT << ESR_ELx_EC_SHIFT;
+ /* Set the bit that changes from insn -> data abort */
+ if (dabort)
+ esr |= EXCP_DATA_ABORT_L << ESR_ELx_EC_SHIFT;
+ /* Set the IL bit if set by hardware */
+ esr |= hypctx->tf.tf_esr & ESR_ELx_IL;
+
+ vmmops_exception(hypctx, esr | fsc, far);
+}
+
+static int
+handle_el1_sync_excp(struct hypctx *hypctx, struct vm_exit *vme_ret,
+ pmap_t pmap)
+{
+ uint64_t gpa;
+ uint32_t esr_ec, esr_iss;
+
+ esr_ec = ESR_ELx_EXCEPTION(hypctx->tf.tf_esr);
+ esr_iss = hypctx->tf.tf_esr & ESR_ELx_ISS_MASK;
+
+ switch (esr_ec) {
+ case EXCP_UNKNOWN:
+ vmm_stat_incr(hypctx->vcpu, VMEXIT_UNKNOWN, 1);
+ arm64_print_hyp_regs(vme_ret);
+ vme_ret->exitcode = VM_EXITCODE_HYP;
+ break;
+ case EXCP_TRAP_WFI_WFE:
+ if ((hypctx->tf.tf_esr & 0x3) == 0) { /* WFI */
+ vmm_stat_incr(hypctx->vcpu, VMEXIT_WFI, 1);
+ vme_ret->exitcode = VM_EXITCODE_WFI;
+ } else {
+ vmm_stat_incr(hypctx->vcpu, VMEXIT_WFE, 1);
+ vme_ret->exitcode = VM_EXITCODE_HYP;
+ }
+ break;
+ case EXCP_HVC:
+ vmm_stat_incr(hypctx->vcpu, VMEXIT_HVC, 1);
+ vme_ret->exitcode = VM_EXITCODE_HVC;
+ break;
+ case EXCP_MSR:
+ vmm_stat_incr(hypctx->vcpu, VMEXIT_MSR, 1);
+ arm64_gen_reg_emul_data(esr_iss, vme_ret);
+ vme_ret->exitcode = VM_EXITCODE_REG_EMUL;
+ break;
+
+ case EXCP_INSN_ABORT_L:
+ case EXCP_DATA_ABORT_L:
+ vmm_stat_incr(hypctx->vcpu, esr_ec == EXCP_DATA_ABORT_L ?
+ VMEXIT_DATA_ABORT : VMEXIT_INSN_ABORT, 1);
+ switch (hypctx->tf.tf_esr & ISS_DATA_DFSC_MASK) {
+ case ISS_DATA_DFSC_TF_L0:
+ case ISS_DATA_DFSC_TF_L1:
+ case ISS_DATA_DFSC_TF_L2:
+ case ISS_DATA_DFSC_TF_L3:
+ case ISS_DATA_DFSC_AFF_L1:
+ case ISS_DATA_DFSC_AFF_L2:
+ case ISS_DATA_DFSC_AFF_L3:
+ case ISS_DATA_DFSC_PF_L1:
+ case ISS_DATA_DFSC_PF_L2:
+ case ISS_DATA_DFSC_PF_L3:
+ gpa = HPFAR_EL2_FIPA_ADDR(hypctx->exit_info.hpfar_el2);
+ /* Check the IPA is valid */
+ if (gpa >= (1ul << vmm_max_ipa_bits)) {
+ raise_data_insn_abort(hypctx,
+ hypctx->exit_info.far_el2,
+ esr_ec == EXCP_DATA_ABORT_L,
+ ISS_DATA_DFSC_ASF_L0);
+ vme_ret->inst_length = 0;
+ return (HANDLED);
+ }
+
+ if (vm_mem_allocated(hypctx->vcpu, gpa)) {
+ vme_ret->exitcode = VM_EXITCODE_PAGING;
+ vme_ret->inst_length = 0;
+ vme_ret->u.paging.esr = hypctx->tf.tf_esr;
+ vme_ret->u.paging.gpa = gpa;
+ } else if (esr_ec == EXCP_INSN_ABORT_L) {
+ /*
+ * Raise an external abort. Device memory is
+ * not executable
+ */
+ raise_data_insn_abort(hypctx,
+ hypctx->exit_info.far_el2, false,
+ ISS_DATA_DFSC_EXT);
+ vme_ret->inst_length = 0;
+ return (HANDLED);
+ } else {
+ arm64_gen_inst_emul_data(hypctx, esr_iss,
+ vme_ret);
+ vme_ret->exitcode = VM_EXITCODE_INST_EMUL;
+ }
+ break;
+ default:
+ arm64_print_hyp_regs(vme_ret);
+ vme_ret->exitcode = VM_EXITCODE_HYP;
+ break;
+ }
+
+ break;
+
+ default:
+ vmm_stat_incr(hypctx->vcpu, VMEXIT_UNHANDLED_SYNC, 1);
+ arm64_print_hyp_regs(vme_ret);
+ vme_ret->exitcode = VM_EXITCODE_HYP;
+ break;
+ }
+
+ /* We don't don't do any instruction emulation here */
+ return (UNHANDLED);
+}
+
+static int
+arm64_handle_world_switch(struct hypctx *hypctx, int excp_type,
+ struct vm_exit *vme, pmap_t pmap)
+{
+ int handled;
+
+ switch (excp_type) {
+ case EXCP_TYPE_EL1_SYNC:
+ /* The exit code will be set by handle_el1_sync_excp(). */
+ handled = handle_el1_sync_excp(hypctx, vme, pmap);
+ break;
+
+ case EXCP_TYPE_EL1_IRQ:
+ case EXCP_TYPE_EL1_FIQ:
+ /* The host kernel will handle IRQs and FIQs. */
+ vmm_stat_incr(hypctx->vcpu,
+ excp_type == EXCP_TYPE_EL1_IRQ ? VMEXIT_IRQ : VMEXIT_FIQ,1);
+ vme->exitcode = VM_EXITCODE_BOGUS;
+ handled = UNHANDLED;
+ break;
+
+ case EXCP_TYPE_EL1_ERROR:
+ case EXCP_TYPE_EL2_SYNC:
+ case EXCP_TYPE_EL2_IRQ:
+ case EXCP_TYPE_EL2_FIQ:
+ case EXCP_TYPE_EL2_ERROR:
+ vmm_stat_incr(hypctx->vcpu, VMEXIT_UNHANDLED_EL2, 1);
+ vme->exitcode = VM_EXITCODE_BOGUS;
+ handled = UNHANDLED;
+ break;
+
+ default:
+ vmm_stat_incr(hypctx->vcpu, VMEXIT_UNHANDLED, 1);
+ vme->exitcode = VM_EXITCODE_BOGUS;
+ handled = UNHANDLED;
+ break;
+ }
+
+ return (handled);
+}
+
+static void
+ptp_release(void **cookie)
+{
+ if (*cookie != NULL) {
+ vm_gpa_release(*cookie);
+ *cookie = NULL;
+ }
+}
+
+static void *
+ptp_hold(struct vcpu *vcpu, vm_paddr_t ptpphys, size_t len, void **cookie)
+{
+ void *ptr;
+
+ ptp_release(cookie);
+ ptr = vm_gpa_hold(vcpu, ptpphys, len, VM_PROT_RW, cookie);
+ return (ptr);
+}
+
+/* log2 of the number of bytes in a page table entry */
+#define PTE_SHIFT 3
+int
+vmmops_gla2gpa(void *vcpui, struct vm_guest_paging *paging, uint64_t gla,
+ int prot, uint64_t *gpa, int *is_fault)
+{
+ struct hypctx *hypctx;
+ void *cookie;
+ uint64_t mask, *ptep, pte, pte_addr;
+ int address_bits, granule_shift, ia_bits, levels, pte_shift, tsz;
+ bool is_el0;
+
+ /* Check if the MMU is off */
+ if ((paging->flags & VM_GP_MMU_ENABLED) == 0) {
+ *is_fault = 0;
+ *gpa = gla;
+ return (0);
+ }
+
+ is_el0 = (paging->flags & PSR_M_MASK) == PSR_M_EL0t;
+
+ if (ADDR_IS_KERNEL(gla)) {
+ /* If address translation is disabled raise an exception */
+ if ((paging->tcr_el1 & TCR_EPD1) != 0) {
+ *is_fault = 1;
+ return (0);
+ }
+ if (is_el0 && (paging->tcr_el1 & TCR_E0PD1) != 0) {
+ *is_fault = 1;
+ return (0);
+ }
+ pte_addr = paging->ttbr1_addr;
+ tsz = (paging->tcr_el1 & TCR_T1SZ_MASK) >> TCR_T1SZ_SHIFT;
+ /* Clear the top byte if TBI is on */
+ if ((paging->tcr_el1 & TCR_TBI1) != 0)
+ gla |= (0xfful << 56);
+ switch (paging->tcr_el1 & TCR_TG1_MASK) {
+ case TCR_TG1_4K:
+ granule_shift = PAGE_SHIFT_4K;
+ break;
+ case TCR_TG1_16K:
+ granule_shift = PAGE_SHIFT_16K;
+ break;
+ case TCR_TG1_64K:
+ granule_shift = PAGE_SHIFT_64K;
+ break;
+ default:
+ *is_fault = 1;
+ return (EINVAL);
+ }
+ } else {
+ /* If address translation is disabled raise an exception */
+ if ((paging->tcr_el1 & TCR_EPD0) != 0) {
+ *is_fault = 1;
+ return (0);
+ }
+ if (is_el0 && (paging->tcr_el1 & TCR_E0PD0) != 0) {
+ *is_fault = 1;
+ return (0);
+ }
+ pte_addr = paging->ttbr0_addr;
+ tsz = (paging->tcr_el1 & TCR_T0SZ_MASK) >> TCR_T0SZ_SHIFT;
+ /* Clear the top byte if TBI is on */
+ if ((paging->tcr_el1 & TCR_TBI0) != 0)
+ gla &= ~(0xfful << 56);
+ switch (paging->tcr_el1 & TCR_TG0_MASK) {
+ case TCR_TG0_4K:
+ granule_shift = PAGE_SHIFT_4K;
+ break;
+ case TCR_TG0_16K:
+ granule_shift = PAGE_SHIFT_16K;
+ break;
+ case TCR_TG0_64K:
+ granule_shift = PAGE_SHIFT_64K;
+ break;
+ default:
+ *is_fault = 1;
+ return (EINVAL);
+ }
+ }
+
+ /*
+ * TODO: Support FEAT_TTST for smaller tsz values and FEAT_LPA2
+ * for larger values.
+ */
+ switch (granule_shift) {
+ case PAGE_SHIFT_4K:
+ case PAGE_SHIFT_16K:
+ /*
+ * See "Table D8-11 4KB granule, determining stage 1 initial
+ * lookup level" and "Table D8-21 16KB granule, determining
+ * stage 1 initial lookup level" from the "Arm Architecture
+ * Reference Manual for A-Profile architecture" revision I.a
+ * for the minimum and maximum values.
+ *
+ * TODO: Support less than 16 when FEAT_LPA2 is implemented
+ * and TCR_EL1.DS == 1
+ * TODO: Support more than 39 when FEAT_TTST is implemented
+ */
+ if (tsz < 16 || tsz > 39) {
+ *is_fault = 1;
+ return (EINVAL);
+ }
+ break;
+ case PAGE_SHIFT_64K:
+ /* TODO: Support 64k granule. It will probably work, but is untested */
+ default:
+ *is_fault = 1;
+ return (EINVAL);
+ }
+
+ /*
+ * Calculate the input address bits. These are 64 bit in an address
+ * with the top tsz bits being all 0 or all 1.
+ */
+ ia_bits = 64 - tsz;
+
+ /*
+ * Calculate the number of address bits used in the page table
+ * calculation. This is ia_bits minus the bottom granule_shift
+ * bits that are passed to the output address.
+ */
+ address_bits = ia_bits - granule_shift;
+
+ /*
+ * Calculate the number of levels. Each level uses
+ * granule_shift - PTE_SHIFT bits of the input address.
+ * This is because the table is 1 << granule_shift and each
+ * entry is 1 << PTE_SHIFT bytes.
+ */
+ levels = howmany(address_bits, granule_shift - PTE_SHIFT);
+
+ /* Mask of the upper unused bits in the virtual address */
+ gla &= (1ul << ia_bits) - 1;
+ hypctx = (struct hypctx *)vcpui;
+ cookie = NULL;
+ /* TODO: Check if the level supports block descriptors */
+ for (;levels > 0; levels--) {
+ int idx;
+
+ pte_shift = (levels - 1) * (granule_shift - PTE_SHIFT) +
+ granule_shift;
+ idx = (gla >> pte_shift) &
+ ((1ul << (granule_shift - PTE_SHIFT)) - 1);
+ while (idx > PAGE_SIZE / sizeof(pte)) {
+ idx -= PAGE_SIZE / sizeof(pte);
+ pte_addr += PAGE_SIZE;
+ }
+
+ ptep = ptp_hold(hypctx->vcpu, pte_addr, PAGE_SIZE, &cookie);
+ if (ptep == NULL)
+ goto error;
+ pte = ptep[idx];
+
+ /* Calculate the level we are looking at */
+ switch (levels) {
+ default:
+ goto fault;
+ /* TODO: Level -1 when FEAT_LPA2 is implemented */
+ case 4: /* Level 0 */
+ if ((pte & ATTR_DESCR_MASK) != L0_TABLE)
+ goto fault;
+ /* FALLTHROUGH */
+ case 3: /* Level 1 */
+ case 2: /* Level 2 */
+ switch (pte & ATTR_DESCR_MASK) {
+ /* Use L1 macro as all levels are the same */
+ case L1_TABLE:
+ /* Check if EL0 can access this address space */
+ if (is_el0 &&
+ (pte & TATTR_AP_TABLE_NO_EL0) != 0)
+ goto fault;
+ /* Check if the address space is writable */
+ if ((prot & PROT_WRITE) != 0 &&
+ (pte & TATTR_AP_TABLE_RO) != 0)
+ goto fault;
+ if ((prot & PROT_EXEC) != 0) {
+ /* Check the table exec attribute */
+ if ((is_el0 &&
+ (pte & TATTR_UXN_TABLE) != 0) ||
+ (!is_el0 &&
+ (pte & TATTR_PXN_TABLE) != 0))
+ goto fault;
+ }
+ pte_addr = pte & ~ATTR_MASK;
+ break;
+ case L1_BLOCK:
+ goto done;
+ default:
+ goto fault;
+ }
+ break;
+ case 1: /* Level 3 */
+ if ((pte & ATTR_DESCR_MASK) == L3_PAGE)
+ goto done;
+ goto fault;
+ }
+ }
+
+done:
+ /* Check if EL0 has access to the block/page */
+ if (is_el0 && (pte & ATTR_S1_AP(ATTR_S1_AP_USER)) == 0)
+ goto fault;
+ if ((prot & PROT_WRITE) != 0 && (pte & ATTR_S1_AP_RW_BIT) != 0)
+ goto fault;
+ if ((prot & PROT_EXEC) != 0) {
+ if ((is_el0 && (pte & ATTR_S1_UXN) != 0) ||
+ (!is_el0 && (pte & ATTR_S1_PXN) != 0))
+ goto fault;
+ }
+ mask = (1ul << pte_shift) - 1;
+ *gpa = (pte & ~ATTR_MASK) | (gla & mask);
+ *is_fault = 0;
+ ptp_release(&cookie);
+ return (0);
+
+error:
+ ptp_release(&cookie);
+ return (EFAULT);
+fault:
+ *is_fault = 1;
+ ptp_release(&cookie);
+ return (0);
+}
+
+int
+vmmops_run(void *vcpui, register_t pc, pmap_t pmap, struct vm_eventinfo *evinfo)
+{
+ uint64_t excp_type;
+ int handled;
+ register_t daif;
+ struct hyp *hyp;
+ struct hypctx *hypctx;
+ struct vcpu *vcpu;
+ struct vm_exit *vme;
+ int mode;
+
+ hypctx = (struct hypctx *)vcpui;
+ hyp = hypctx->hyp;
+ vcpu = hypctx->vcpu;
+ vme = vm_exitinfo(vcpu);
+
+ hypctx->tf.tf_elr = (uint64_t)pc;
+
+ for (;;) {
+ if (hypctx->has_exception) {
+ hypctx->has_exception = false;
+ hypctx->elr_el1 = hypctx->tf.tf_elr;
+
+ mode = hypctx->tf.tf_spsr & (PSR_M_MASK | PSR_M_32);
+
+ if (mode == PSR_M_EL1t) {
+ hypctx->tf.tf_elr = hypctx->vbar_el1 + 0x0;
+ } else if (mode == PSR_M_EL1h) {
+ hypctx->tf.tf_elr = hypctx->vbar_el1 + 0x200;
+ } else if ((mode & PSR_M_32) == PSR_M_64) {
+ /* 64-bit EL0 */
+ hypctx->tf.tf_elr = hypctx->vbar_el1 + 0x400;
+ } else {
+ /* 32-bit EL0 */
+ hypctx->tf.tf_elr = hypctx->vbar_el1 + 0x600;
+ }
+
+ /* Set the new spsr */
+ hypctx->spsr_el1 = hypctx->tf.tf_spsr;
+
+ /* Set the new cpsr */
+ hypctx->tf.tf_spsr = hypctx->spsr_el1 & PSR_FLAGS;
+ hypctx->tf.tf_spsr |= PSR_DAIF | PSR_M_EL1h;
+
+ /*
+ * Update fields that may change on exeption entry
+ * based on how sctlr_el1 is configured.
+ */
+ if ((hypctx->sctlr_el1 & SCTLR_SPAN) != 0)
+ hypctx->tf.tf_spsr |= PSR_PAN;
+ if ((hypctx->sctlr_el1 & SCTLR_DSSBS) == 0)
+ hypctx->tf.tf_spsr &= ~PSR_SSBS;
+ else
+ hypctx->tf.tf_spsr |= PSR_SSBS;
+ }
+
+ daif = intr_disable();
+
+ /* Check if the vcpu is suspended */
+ if (vcpu_suspended(evinfo)) {
+ intr_restore(daif);
+ vm_exit_suspended(vcpu, pc);
+ break;
+ }
+
+ if (vcpu_debugged(vcpu)) {
+ intr_restore(daif);
+ vm_exit_debug(vcpu, pc);
+ break;
+ }
+
+ /* Activate the stage2 pmap so the vmid is valid */
+ pmap_activate_vm(pmap);
+ hyp->vttbr_el2 = pmap_to_ttbr0(pmap);
+
+ /*
+ * TODO: What happens if a timer interrupt is asserted exactly
+ * here, but for the previous VM?
+ */
+ arm64_set_active_vcpu(hypctx);
+ vgic_flush_hwstate(hypctx);
+
+ /* Call into EL2 to switch to the guest */
+ excp_type = vmm_call_hyp(HYP_ENTER_GUEST,
+ hyp->el2_addr, hypctx->el2_addr);
+
+ vgic_sync_hwstate(hypctx);
+ vtimer_sync_hwstate(hypctx);
+
+ /*
+ * Deactivate the stage2 pmap. vmm_pmap_clean_stage2_tlbi
+ * depends on this meaning we activate the VM before entering
+ * the vm again
+ */
+ PCPU_SET(curvmpmap, NULL);
+ intr_restore(daif);
+
+ vmm_stat_incr(vcpu, VMEXIT_COUNT, 1);
+ if (excp_type == EXCP_TYPE_MAINT_IRQ)
+ continue;
+
+ vme->pc = hypctx->tf.tf_elr;
+ vme->inst_length = INSN_SIZE;
+ vme->u.hyp.exception_nr = excp_type;
+ vme->u.hyp.esr_el2 = hypctx->tf.tf_esr;
+ vme->u.hyp.far_el2 = hypctx->exit_info.far_el2;
+ vme->u.hyp.hpfar_el2 = hypctx->exit_info.hpfar_el2;
+
+ handled = arm64_handle_world_switch(hypctx, excp_type, vme,
+ pmap);
+ if (handled == UNHANDLED)
+ /* Exit loop to emulate instruction. */
+ break;
+ else
+ /* Resume guest execution from the next instruction. */
+ hypctx->tf.tf_elr += vme->inst_length;
+ }
+
+ return (0);
+}
+
+static void
+arm_pcpu_vmcleanup(void *arg)
+{
+ struct hyp *hyp;
+ int i, maxcpus;
+
+ hyp = arg;
+ maxcpus = vm_get_maxcpus(hyp->vm);
+ for (i = 0; i < maxcpus; i++) {
+ if (arm64_get_active_vcpu() == hyp->ctx[i]) {
+ arm64_set_active_vcpu(NULL);
+ break;
+ }
+ }
+}
+
+void
+vmmops_vcpu_cleanup(void *vcpui)
+{
+ struct hypctx *hypctx = vcpui;
+
+ vtimer_cpucleanup(hypctx);
+ vgic_cpucleanup(hypctx);
+
+ vmmpmap_remove(hypctx->el2_addr, el2_hypctx_size(), true);
+
+ free(hypctx, M_HYP);
+}
+
+void
+vmmops_cleanup(void *vmi)
+{
+ struct hyp *hyp = vmi;
+
+ vtimer_vmcleanup(hyp);
+ vgic_vmcleanup(hyp);
+
+ smp_rendezvous(NULL, arm_pcpu_vmcleanup, NULL, hyp);
+
+ vmmpmap_remove(hyp->el2_addr, el2_hyp_size(hyp->vm), true);
+
+ free(hyp, M_HYP);
+}
+
+/*
+ * Return register value. Registers have different sizes and an explicit cast
+ * must be made to ensure proper conversion.
+ */
+static uint64_t *
+hypctx_regptr(struct hypctx *hypctx, int reg)
+{
+ switch (reg) {
+ case VM_REG_GUEST_X0 ... VM_REG_GUEST_X29:
+ return (&hypctx->tf.tf_x[reg]);
+ case VM_REG_GUEST_LR:
+ return (&hypctx->tf.tf_lr);
+ case VM_REG_GUEST_SP:
+ return (&hypctx->tf.tf_sp);
+ case VM_REG_GUEST_CPSR:
+ return (&hypctx->tf.tf_spsr);
+ case VM_REG_GUEST_PC:
+ return (&hypctx->tf.tf_elr);
+ case VM_REG_GUEST_SCTLR_EL1:
+ return (&hypctx->sctlr_el1);
+ case VM_REG_GUEST_TTBR0_EL1:
+ return (&hypctx->ttbr0_el1);
+ case VM_REG_GUEST_TTBR1_EL1:
+ return (&hypctx->ttbr1_el1);
+ case VM_REG_GUEST_TCR_EL1:
+ return (&hypctx->tcr_el1);
+ case VM_REG_GUEST_TCR2_EL1:
+ return (&hypctx->tcr2_el1);
+ default:
+ break;
+ }
+ return (NULL);
+}
+
+int
+vmmops_getreg(void *vcpui, int reg, uint64_t *retval)
+{
+ uint64_t *regp;
+ int running, hostcpu;
+ struct hypctx *hypctx = vcpui;
+
+ running = vcpu_is_running(hypctx->vcpu, &hostcpu);
+ if (running && hostcpu != curcpu)
+ panic("arm_getreg: %s%d is running", vm_name(hypctx->hyp->vm),
+ vcpu_vcpuid(hypctx->vcpu));
+
+ regp = hypctx_regptr(hypctx, reg);
+ if (regp == NULL)
+ return (EINVAL);
+
+ *retval = *regp;
+ return (0);
+}
+
+int
+vmmops_setreg(void *vcpui, int reg, uint64_t val)
+{
+ uint64_t *regp;
+ struct hypctx *hypctx = vcpui;
+ int running, hostcpu;
+
+ running = vcpu_is_running(hypctx->vcpu, &hostcpu);
+ if (running && hostcpu != curcpu)
+ panic("arm_setreg: %s%d is running", vm_name(hypctx->hyp->vm),
+ vcpu_vcpuid(hypctx->vcpu));
+
+ regp = hypctx_regptr(hypctx, reg);
+ if (regp == NULL)
+ return (EINVAL);
+
+ *regp = val;
+ return (0);
+}
+
+int
+vmmops_exception(void *vcpui, uint64_t esr, uint64_t far)
+{
+ struct hypctx *hypctx = vcpui;
+ int running, hostcpu;
+
+ running = vcpu_is_running(hypctx->vcpu, &hostcpu);
+ if (running && hostcpu != curcpu)
+ panic("%s: %s%d is running", __func__, vm_name(hypctx->hyp->vm),
+ vcpu_vcpuid(hypctx->vcpu));
+
+ hypctx->far_el1 = far;
+ hypctx->esr_el1 = esr;
+ hypctx->has_exception = true;
+
+ return (0);
+}
+
+int
+vmmops_getcap(void *vcpui, int num, int *retval)
+{
+ int ret;
+
+ ret = ENOENT;
+
+ switch (num) {
+ case VM_CAP_UNRESTRICTED_GUEST:
+ *retval = 1;
+ ret = 0;
+ break;
+ default:
+ break;
+ }
+
+ return (ret);
+}
+
+int
+vmmops_setcap(void *vcpui, int num, int val)
+{
+
+ return (ENOENT);
+}
diff --git a/sys/arm64/vmm/vmm_call.S b/sys/arm64/vmm/vmm_call.S
new file mode 100644
--- /dev/null
+++ b/sys/arm64/vmm/vmm_call.S
@@ -0,0 +1,39 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (C) 2017 Alexandru Elisei <alexandru.elisei@gmail.com>
+ *
+ * This software was developed by Alexandru Elisei under sponsorship
+ * from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+
+#include <machine/asm.h>
+
+ .text
+
+ENTRY(vmm_call_hyp)
+ hvc #0
+ ret
+END(vmm_call_hyp)
diff --git a/sys/arm64/vmm/vmm_dev.c b/sys/arm64/vmm/vmm_dev.c
new file mode 100644
--- /dev/null
+++ b/sys/arm64/vmm/vmm_dev.c
@@ -0,0 +1,1054 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2011 NetApp, Inc.
+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/jail.h>
+#include <sys/queue.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/malloc.h>
+#include <sys/conf.h>
+#include <sys/sysctl.h>
+#include <sys/libkern.h>
+#include <sys/ioccom.h>
+#include <sys/mman.h>
+#include <sys/uio.h>
+#include <sys/proc.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+#include <vm/vm_object.h>
+
+#include <machine/machdep.h>
+#include <machine/vmparam.h>
+#include <machine/vmm.h>
+#include <machine/vmm_dev.h>
+
+#include "vmm_stat.h"
+
+#include "io/vgic.h"
+
+struct devmem_softc {
+ int segid;
+ char *name;
+ struct cdev *cdev;
+ struct vmmdev_softc *sc;
+ SLIST_ENTRY(devmem_softc) link;
+};
+
+struct vmmdev_softc {
+ struct vm *vm; /* vm instance cookie */
+ struct cdev *cdev;
+ struct ucred *ucred;
+ SLIST_ENTRY(vmmdev_softc) link;
+ SLIST_HEAD(, devmem_softc) devmem;
+ int flags;
+};
+#define VSC_LINKED 0x01
+
+static SLIST_HEAD(, vmmdev_softc) head;
+
+static unsigned pr_allow_flag;
+static struct mtx vmmdev_mtx;
+MTX_SYSINIT(vmmdev_mtx, &vmmdev_mtx, "vmm device mutex", MTX_DEF);
+
+static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev");
+
+SYSCTL_DECL(_hw_vmm);
+
+static int vmm_priv_check(struct ucred *ucred);
+static int devmem_create_cdev(const char *vmname, int id, char *devmem);
+static void devmem_destroy(void *arg);
+
+static int
+vmm_priv_check(struct ucred *ucred)
+{
+
+ if (jailed(ucred) &&
+ !(ucred->cr_prison->pr_allow & pr_allow_flag))
+ return (EPERM);
+
+ return (0);
+}
+
+static int
+vcpu_lock_one(struct vcpu *vcpu)
+{
+ int error;
+
+ error = vcpu_set_state(vcpu, VCPU_FROZEN, true);
+ return (error);
+}
+
+static void
+vcpu_unlock_one(struct vcpu *vcpu)
+{
+ enum vcpu_state state;
+
+ state = vcpu_get_state(vcpu, NULL);
+ if (state != VCPU_FROZEN) {
+ panic("vcpu %s(%d) has invalid state %d",
+ vm_name(vcpu_vm(vcpu)), vcpu_vcpuid(vcpu), state);
+ }
+
+ vcpu_set_state(vcpu, VCPU_IDLE, false);
+}
+
+static int
+vcpu_lock_all(struct vmmdev_softc *sc)
+{
+ struct vcpu *vcpu;
+ int error;
+ uint16_t i, j, maxcpus;
+
+ error = 0;
+ vm_slock_vcpus(sc->vm);
+ maxcpus = vm_get_maxcpus(sc->vm);
+ for (i = 0; i < maxcpus; i++) {
+ vcpu = vm_vcpu(sc->vm, i);
+ if (vcpu == NULL)
+ continue;
+ error = vcpu_lock_one(vcpu);
+ if (error)
+ break;
+ }
+
+ if (error) {
+ for (j = 0; j < i; j++) {
+ vcpu = vm_vcpu(sc->vm, j);
+ if (vcpu == NULL)
+ continue;
+ vcpu_unlock_one(vcpu);
+ }
+ vm_unlock_vcpus(sc->vm);
+ }
+
+ return (error);
+}
+
+static void
+vcpu_unlock_all(struct vmmdev_softc *sc)
+{
+ struct vcpu *vcpu;
+ uint16_t i, maxcpus;
+
+ maxcpus = vm_get_maxcpus(sc->vm);
+ for (i = 0; i < maxcpus; i++) {
+ vcpu = vm_vcpu(sc->vm, i);
+ if (vcpu == NULL)
+ continue;
+ vcpu_unlock_one(vcpu);
+ }
+ vm_unlock_vcpus(sc->vm);
+}
+
+static struct vmmdev_softc *
+vmmdev_lookup(const char *name)
+{
+ struct vmmdev_softc *sc;
+
+#ifdef notyet /* XXX kernel is not compiled with invariants */
+ mtx_assert(&vmmdev_mtx, MA_OWNED);
+#endif
+
+ SLIST_FOREACH(sc, &head, link) {
+ if (strcmp(name, vm_name(sc->vm)) == 0)
+ break;
+ }
+
+ if (sc == NULL)
+ return (NULL);
+
+ if (cr_cansee(curthread->td_ucred, sc->ucred))
+ return (NULL);
+
+ return (sc);
+}
+
+static struct vmmdev_softc *
+vmmdev_lookup2(struct cdev *cdev)
+{
+
+ return (cdev->si_drv1);
+}
+
+static int
+vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags)
+{
+ int error, off, c, prot;
+ vm_paddr_t gpa, maxaddr;
+ void *hpa, *cookie;
+ struct vmmdev_softc *sc;
+
+ error = vmm_priv_check(curthread->td_ucred);
+ if (error)
+ return (error);
+
+ sc = vmmdev_lookup2(cdev);
+ if (sc == NULL)
+ return (ENXIO);
+
+ /*
+ * Get a read lock on the guest memory map.
+ */
+ vm_slock_memsegs(sc->vm);
+
+ prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ);
+ maxaddr = vmm_sysmem_maxaddr(sc->vm);
+ while (uio->uio_resid > 0 && error == 0) {
+ gpa = uio->uio_offset;
+ off = gpa & PAGE_MASK;
+ c = min(uio->uio_resid, PAGE_SIZE - off);
+
+ /*
+ * The VM has a hole in its physical memory map. If we want to
+ * use 'dd' to inspect memory beyond the hole we need to
+ * provide bogus data for memory that lies in the hole.
+ *
+ * Since this device does not support lseek(2), dd(1) will
+ * read(2) blocks of data to simulate the lseek(2).
+ */
+ hpa = vm_gpa_hold_global(sc->vm, gpa, c, prot, &cookie);
+ if (hpa == NULL) {
+ if (uio->uio_rw == UIO_READ && gpa < maxaddr)
+ error = uiomove(__DECONST(void *, zero_region),
+ c, uio);
+ else
+ error = EFAULT;
+ } else {
+ error = uiomove(hpa, c, uio);
+ vm_gpa_release(cookie);
+ }
+ }
+ vm_unlock_memsegs(sc->vm);
+ return (error);
+}
+
+static int
+get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg)
+{
+ struct devmem_softc *dsc;
+ int error;
+ bool sysmem;
+
+ error = vm_get_memseg(sc->vm, mseg->segid, &mseg->len, &sysmem, NULL);
+ if (error || mseg->len == 0)
+ return (error);
+
+ if (!sysmem) {
+ SLIST_FOREACH(dsc, &sc->devmem, link) {
+ if (dsc->segid == mseg->segid)
+ break;
+ }
+ KASSERT(dsc != NULL, ("%s: devmem segment %d not found",
+ __func__, mseg->segid));
+ error = copystr(dsc->name, mseg->name, sizeof(mseg->name),
+ NULL);
+ } else {
+ bzero(mseg->name, sizeof(mseg->name));
+ }
+
+ return (error);
+}
+
+static int
+alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg)
+{
+ char *name;
+ int error;
+ bool sysmem;
+
+ error = 0;
+ name = NULL;
+ sysmem = true;
+
+ /*
+ * The allocation is lengthened by 1 to hold a terminating NUL. It'll
+ * by stripped off when devfs processes the full string.
+ */
+ if (VM_MEMSEG_NAME(mseg)) {
+ sysmem = false;
+ name = malloc(sizeof(mseg->name), M_VMMDEV, M_WAITOK);
+ error = copystr(mseg->name, name, sizeof(mseg->name), NULL);
+ if (error)
+ goto done;
+ }
+
+ error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem);
+ if (error)
+ goto done;
+
+ if (VM_MEMSEG_NAME(mseg)) {
+ error = devmem_create_cdev(vm_name(sc->vm), mseg->segid, name);
+ if (error)
+ vm_free_memseg(sc->vm, mseg->segid);
+ else
+ name = NULL; /* freed when 'cdev' is destroyed */
+ }
+done:
+ free(name, M_VMMDEV);
+ return (error);
+}
+
+static int
+vm_get_register_set(struct vcpu *vcpu, unsigned int count, int *regnum,
+ uint64_t *regval)
+{
+ int error, i;
+
+ error = 0;
+ for (i = 0; i < count; i++) {
+ error = vm_get_register(vcpu, regnum[i], &regval[i]);
+ if (error)
+ break;
+ }
+ return (error);
+}
+
+static int
+vm_set_register_set(struct vcpu *vcpu, unsigned int count, int *regnum,
+ uint64_t *regval)
+{
+ int error, i;
+
+ error = 0;
+ for (i = 0; i < count; i++) {
+ error = vm_set_register(vcpu, regnum[i], regval[i]);
+ if (error)
+ break;
+ }
+ return (error);
+}
+
+static int
+vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
+ struct thread *td)
+{
+ int error, vcpuid, size;
+ cpuset_t *cpuset;
+ struct vmmdev_softc *sc;
+ struct vcpu *vcpu;
+ struct vm_register *vmreg;
+ struct vm_register_set *vmregset;
+ struct vm_run *vmrun;
+ struct vm_vgic_version *vgv;
+ struct vm_vgic_descr *vgic;
+ struct vm_cpuset *vm_cpuset;
+ struct vm_irq *vi;
+ struct vm_capability *vmcap;
+ struct vm_stats *vmstats;
+ struct vm_stat_desc *statdesc;
+ struct vm_suspend *vmsuspend;
+ struct vm_exception *vmexc;
+ struct vm_gla2gpa *gg;
+ struct vm_memmap *mm;
+ struct vm_munmap *mu;
+ struct vm_msi *vmsi;
+ struct vm_cpu_topology *topology;
+ uint64_t *regvals;
+ int *regnums;
+ enum { NONE, SINGLE, ALL } vcpus_locked;
+ bool memsegs_locked;
+
+ error = vmm_priv_check(curthread->td_ucred);
+ if (error)
+ return (error);
+
+ sc = vmmdev_lookup2(cdev);
+ if (sc == NULL)
+ return (ENXIO);
+
+ error = 0;
+ vcpuid = -1;
+ vcpu = NULL;
+ vcpus_locked = NONE;
+ memsegs_locked = false;
+
+ /*
+ * Some VMM ioctls can operate only on vcpus that are not running.
+ */
+ switch (cmd) {
+ case VM_RUN:
+ case VM_GET_REGISTER:
+ case VM_SET_REGISTER:
+ case VM_GET_REGISTER_SET:
+ case VM_SET_REGISTER_SET:
+ case VM_INJECT_EXCEPTION:
+ case VM_GET_CAPABILITY:
+ case VM_SET_CAPABILITY:
+ case VM_GLA2GPA_NOFAULT:
+ case VM_ACTIVATE_CPU:
+ /*
+ * ioctls that can operate only on vcpus that are not running.
+ */
+ vcpuid = *(int *)data;
+ vcpu = vm_alloc_vcpu(sc->vm, vcpuid);
+ if (vcpu == NULL) {
+ error = EINVAL;
+ goto done;
+ }
+ error = vcpu_lock_one(vcpu);
+ if (error)
+ goto done;
+ vcpus_locked = SINGLE;
+ break;
+
+ case VM_ALLOC_MEMSEG:
+ case VM_MMAP_MEMSEG:
+ case VM_MUNMAP_MEMSEG:
+ case VM_REINIT:
+ case VM_ATTACH_VGIC:
+ /*
+ * ioctls that modify the memory map must lock memory
+ * segments exclusively.
+ */
+ vm_xlock_memsegs(sc->vm);
+ memsegs_locked = true;
+
+ /*
+ * ioctls that operate on the entire virtual machine must
+ * prevent all vcpus from running.
+ */
+ error = vcpu_lock_all(sc);
+ if (error)
+ goto done;
+ vcpus_locked = ALL;
+ break;
+ case VM_GET_MEMSEG:
+ case VM_MMAP_GETNEXT:
+ /*
+ * Lock the memory map while it is being inspected.
+ */
+ vm_slock_memsegs(sc->vm);
+ memsegs_locked = true;
+ break;
+
+ case VM_STATS:
+ /*
+ * These do not need the vCPU locked but do operate on
+ * a specific vCPU.
+ */
+ vcpuid = *(int *)data;
+ vcpu = vm_alloc_vcpu(sc->vm, vcpuid);
+ if (vcpu == NULL) {
+ error = EINVAL;
+ goto done;
+ }
+ break;
+
+ case VM_SUSPEND_CPU:
+ case VM_RESUME_CPU:
+ /*
+ * These can either operate on all CPUs via a vcpuid of
+ * -1 or on a specific vCPU.
+ */
+ vcpuid = *(int *)data;
+ if (vcpuid == -1)
+ break;
+ vcpu = vm_alloc_vcpu(sc->vm, vcpuid);
+ if (vcpu == NULL) {
+ error = EINVAL;
+ goto done;
+ }
+ break;
+
+ case VM_ASSERT_IRQ:
+ vi = (struct vm_irq *)data;
+ error = vm_assert_irq(sc->vm, vi->irq);
+ break;
+ case VM_DEASSERT_IRQ:
+ vi = (struct vm_irq *)data;
+ error = vm_deassert_irq(sc->vm, vi->irq);
+ break;
+ default:
+ break;
+ }
+
+ switch (cmd) {
+ case VM_RUN: {
+ struct vm_exit *vme;
+
+ vmrun = (struct vm_run *)data;
+ vme = vm_exitinfo(vcpu);
+
+ error = vm_run(vcpu);
+ if (error != 0)
+ break;
+
+ error = copyout(vme, vmrun->vm_exit, sizeof(*vme));
+ if (error != 0)
+ break;
+ break;
+ }
+ case VM_SUSPEND:
+ vmsuspend = (struct vm_suspend *)data;
+ error = vm_suspend(sc->vm, vmsuspend->how);
+ break;
+ case VM_REINIT:
+ error = vm_reinit(sc->vm);
+ break;
+ case VM_STAT_DESC: {
+ statdesc = (struct vm_stat_desc *)data;
+ error = vmm_stat_desc_copy(statdesc->index,
+ statdesc->desc, sizeof(statdesc->desc));
+ break;
+ }
+ case VM_STATS: {
+ CTASSERT(MAX_VM_STATS >= MAX_VMM_STAT_ELEMS);
+ vmstats = (struct vm_stats *)data;
+ getmicrotime(&vmstats->tv);
+ error = vmm_stat_copy(vcpu, vmstats->index,
+ nitems(vmstats->statbuf),
+ &vmstats->num_entries, vmstats->statbuf);
+ break;
+ }
+ case VM_MMAP_GETNEXT:
+ mm = (struct vm_memmap *)data;
+ error = vm_mmap_getnext(sc->vm, &mm->gpa, &mm->segid,
+ &mm->segoff, &mm->len, &mm->prot, &mm->flags);
+ break;
+ case VM_MMAP_MEMSEG:
+ mm = (struct vm_memmap *)data;
+ error = vm_mmap_memseg(sc->vm, mm->gpa, mm->segid, mm->segoff,
+ mm->len, mm->prot, mm->flags);
+ break;
+ case VM_MUNMAP_MEMSEG:
+ mu = (struct vm_munmap *)data;
+ error = vm_munmap_memseg(sc->vm, mu->gpa, mu->len);
+ break;
+ case VM_ALLOC_MEMSEG:
+ error = alloc_memseg(sc, (struct vm_memseg *)data);
+ break;
+ case VM_GET_MEMSEG:
+ error = get_memseg(sc, (struct vm_memseg *)data);
+ break;
+ case VM_GET_REGISTER:
+ vmreg = (struct vm_register *)data;
+ error = vm_get_register(vcpu, vmreg->regnum, &vmreg->regval);
+ break;
+ case VM_SET_REGISTER:
+ vmreg = (struct vm_register *)data;
+ error = vm_set_register(vcpu, vmreg->regnum, vmreg->regval);
+ break;
+ case VM_GET_REGISTER_SET:
+ vmregset = (struct vm_register_set *)data;
+ if (vmregset->count > VM_REG_LAST) {
+ error = EINVAL;
+ break;
+ }
+ regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV,
+ M_WAITOK);
+ regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV,
+ M_WAITOK);
+ error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) *
+ vmregset->count);
+ if (error == 0)
+ error = vm_get_register_set(vcpu, vmregset->count,
+ regnums, regvals);
+ if (error == 0)
+ error = copyout(regvals, vmregset->regvals,
+ sizeof(regvals[0]) * vmregset->count);
+ free(regvals, M_VMMDEV);
+ free(regnums, M_VMMDEV);
+ break;
+ case VM_SET_REGISTER_SET:
+ vmregset = (struct vm_register_set *)data;
+ if (vmregset->count > VM_REG_LAST) {
+ error = EINVAL;
+ break;
+ }
+ regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV,
+ M_WAITOK);
+ regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV,
+ M_WAITOK);
+ error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) *
+ vmregset->count);
+ if (error == 0)
+ error = copyin(vmregset->regvals, regvals,
+ sizeof(regvals[0]) * vmregset->count);
+ if (error == 0)
+ error = vm_set_register_set(vcpu, vmregset->count,
+ regnums, regvals);
+ free(regvals, M_VMMDEV);
+ free(regnums, M_VMMDEV);
+ break;
+ case VM_GET_CAPABILITY:
+ vmcap = (struct vm_capability *)data;
+ error = vm_get_capability(vcpu,
+ vmcap->captype,
+ &vmcap->capval);
+ break;
+ case VM_SET_CAPABILITY:
+ vmcap = (struct vm_capability *)data;
+ error = vm_set_capability(vcpu,
+ vmcap->captype,
+ vmcap->capval);
+ break;
+ case VM_INJECT_EXCEPTION:
+ vmexc = (struct vm_exception *)data;
+ error = vm_inject_exception(vcpu, vmexc->esr, vmexc->far);
+ break;
+ case VM_GLA2GPA_NOFAULT:
+ gg = (struct vm_gla2gpa *)data;
+ error = vm_gla2gpa_nofault(vcpu, &gg->paging, gg->gla,
+ gg->prot, &gg->gpa, &gg->fault);
+ KASSERT(error == 0 || error == EFAULT,
+ ("%s: vm_gla2gpa unknown error %d", __func__, error));
+ break;
+ case VM_ACTIVATE_CPU:
+ error = vm_activate_cpu(vcpu);
+ break;
+ case VM_GET_CPUS:
+ error = 0;
+ vm_cpuset = (struct vm_cpuset *)data;
+ size = vm_cpuset->cpusetsize;
+ if (size < sizeof(cpuset_t) || size > CPU_MAXSIZE / NBBY) {
+ error = ERANGE;
+ break;
+ }
+ cpuset = malloc(size, M_TEMP, M_WAITOK | M_ZERO);
+ if (vm_cpuset->which == VM_ACTIVE_CPUS)
+ *cpuset = vm_active_cpus(sc->vm);
+ else if (vm_cpuset->which == VM_SUSPENDED_CPUS)
+ *cpuset = vm_suspended_cpus(sc->vm);
+ else if (vm_cpuset->which == VM_DEBUG_CPUS)
+ *cpuset = vm_debug_cpus(sc->vm);
+ else
+ error = EINVAL;
+ if (error == 0)
+ error = copyout(cpuset, vm_cpuset->cpus, size);
+ free(cpuset, M_TEMP);
+ break;
+ case VM_SUSPEND_CPU:
+ error = vm_suspend_cpu(sc->vm, vcpu);
+ break;
+ case VM_RESUME_CPU:
+ error = vm_resume_cpu(sc->vm, vcpu);
+ break;
+ case VM_GET_VGIC_VERSION:
+ vgv = (struct vm_vgic_version *)data;
+ /* TODO: Query the vgic driver for this */
+ vgv->version = 3;
+ vgv->flags = 0;
+ error = 0;
+ break;
+ case VM_ATTACH_VGIC:
+ vgic = (struct vm_vgic_descr *)data;
+ error = vm_attach_vgic(sc->vm, vgic);
+ break;
+ case VM_RAISE_MSI:
+ vmsi = (struct vm_msi *)data;
+ error = vm_raise_msi(sc->vm, vmsi->msg, vmsi->addr, vmsi->bus,
+ vmsi->slot, vmsi->func);
+ break;
+ case VM_SET_TOPOLOGY:
+ topology = (struct vm_cpu_topology *)data;
+ error = vm_set_topology(sc->vm, topology->sockets,
+ topology->cores, topology->threads, topology->maxcpus);
+ break;
+ case VM_GET_TOPOLOGY:
+ topology = (struct vm_cpu_topology *)data;
+ vm_get_topology(sc->vm, &topology->sockets, &topology->cores,
+ &topology->threads, &topology->maxcpus);
+ error = 0;
+ break;
+ default:
+ error = ENOTTY;
+ break;
+ }
+
+done:
+ if (vcpus_locked == SINGLE)
+ vcpu_unlock_one(vcpu);
+ else if (vcpus_locked == ALL)
+ vcpu_unlock_all(sc);
+ if (memsegs_locked)
+ vm_unlock_memsegs(sc->vm);
+
+ /*
+ * Make sure that no handler returns a kernel-internal
+ * error value to userspace.
+ */
+ KASSERT(error == ERESTART || error >= 0,
+ ("vmmdev_ioctl: invalid error return %d", error));
+ return (error);
+}
+
+static int
+vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t mapsize,
+ struct vm_object **objp, int nprot)
+{
+ struct vmmdev_softc *sc;
+ vm_paddr_t gpa;
+ size_t len;
+ vm_ooffset_t segoff, first, last;
+ int error, found, segid;
+ bool sysmem;
+
+ error = vmm_priv_check(curthread->td_ucred);
+ if (error)
+ return (error);
+
+ first = *offset;
+ last = first + mapsize;
+ if ((nprot & PROT_EXEC) || first < 0 || first >= last)
+ return (EINVAL);
+
+ sc = vmmdev_lookup2(cdev);
+ if (sc == NULL) {
+ /* virtual machine is in the process of being created */
+ return (EINVAL);
+ }
+
+ /*
+ * Get a read lock on the guest memory map.
+ */
+ vm_slock_memsegs(sc->vm);
+
+ gpa = 0;
+ found = 0;
+ while (!found) {
+ error = vm_mmap_getnext(sc->vm, &gpa, &segid, &segoff, &len,
+ NULL, NULL);
+ if (error)
+ break;
+
+ if (first >= gpa && last <= gpa + len)
+ found = 1;
+ else
+ gpa += len;
+ }
+
+ if (found) {
+ error = vm_get_memseg(sc->vm, segid, &len, &sysmem, objp);
+ KASSERT(error == 0 && *objp != NULL,
+ ("%s: invalid memory segment %d", __func__, segid));
+ if (sysmem) {
+ vm_object_reference(*objp);
+ *offset = segoff + (first - gpa);
+ } else {
+ error = EINVAL;
+ }
+ }
+ vm_unlock_memsegs(sc->vm);
+ return (error);
+}
+
+static void
+vmmdev_destroy(void *arg)
+{
+ struct vmmdev_softc *sc = arg;
+ struct devmem_softc *dsc;
+ int error __diagused;
+
+ error = vcpu_lock_all(sc);
+ KASSERT(error == 0, ("%s: error %d freezing vcpus", __func__, error));
+ vm_unlock_vcpus(sc->vm);
+
+ while ((dsc = SLIST_FIRST(&sc->devmem)) != NULL) {
+ KASSERT(dsc->cdev == NULL, ("%s: devmem not free", __func__));
+ SLIST_REMOVE_HEAD(&sc->devmem, link);
+ free(dsc->name, M_VMMDEV);
+ free(dsc, M_VMMDEV);
+ }
+
+ if (sc->cdev != NULL)
+ destroy_dev(sc->cdev);
+
+ if (sc->vm != NULL)
+ vm_destroy(sc->vm);
+
+ if (sc->ucred != NULL)
+ crfree(sc->ucred);
+
+ if ((sc->flags & VSC_LINKED) != 0) {
+ mtx_lock(&vmmdev_mtx);
+ SLIST_REMOVE(&head, sc, vmmdev_softc, link);
+ mtx_unlock(&vmmdev_mtx);
+ }
+
+ free(sc, M_VMMDEV);
+}
+
+static int
+sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)
+{
+ struct devmem_softc *dsc;
+ struct vmmdev_softc *sc;
+ struct cdev *cdev;
+ char *buf;
+ int error, buflen;
+
+ error = vmm_priv_check(req->td->td_ucred);
+ if (error)
+ return (error);
+
+ buflen = VM_MAX_NAMELEN + 1;
+ buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO);
+ strlcpy(buf, "beavis", buflen);
+ error = sysctl_handle_string(oidp, buf, buflen, req);
+ if (error != 0 || req->newptr == NULL)
+ goto out;
+
+ mtx_lock(&vmmdev_mtx);
+ sc = vmmdev_lookup(buf);
+ if (sc == NULL || sc->cdev == NULL) {
+ mtx_unlock(&vmmdev_mtx);
+ error = EINVAL;
+ goto out;
+ }
+
+ /*
+ * Setting 'sc->cdev' to NULL is used to indicate that the VM
+ * is scheduled for destruction.
+ */
+ cdev = sc->cdev;
+ sc->cdev = NULL;
+ mtx_unlock(&vmmdev_mtx);
+
+ /*
+ * Destroy all cdevs:
+ *
+ * - any new operations on the 'cdev' will return an error (ENXIO).
+ *
+ * - the 'devmem' cdevs are destroyed before the virtual machine 'cdev'
+ */
+ SLIST_FOREACH(dsc, &sc->devmem, link) {
+ KASSERT(dsc->cdev != NULL, ("devmem cdev already destroyed"));
+ destroy_dev(dsc->cdev);
+ devmem_destroy(dsc);
+ }
+ destroy_dev(cdev);
+ vmmdev_destroy(sc);
+ error = 0;
+
+out:
+ free(buf, M_VMMDEV);
+ return (error);
+}
+SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy,
+ CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
+ NULL, 0, sysctl_vmm_destroy, "A",
+ NULL);
+
+static struct cdevsw vmmdevsw = {
+ .d_name = "vmmdev",
+ .d_version = D_VERSION,
+ .d_ioctl = vmmdev_ioctl,
+ .d_mmap_single = vmmdev_mmap_single,
+ .d_read = vmmdev_rw,
+ .d_write = vmmdev_rw,
+};
+
+static int
+sysctl_vmm_create(SYSCTL_HANDLER_ARGS)
+{
+ struct vm *vm;
+ struct cdev *cdev;
+ struct vmmdev_softc *sc, *sc2;
+ char *buf;
+ int error, buflen;
+
+ error = vmm_priv_check(req->td->td_ucred);
+ if (error)
+ return (error);
+
+ buflen = VM_MAX_NAMELEN + 1;
+ buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO);
+ strlcpy(buf, "beavis", buflen);
+ error = sysctl_handle_string(oidp, buf, buflen, req);
+ if (error != 0 || req->newptr == NULL)
+ goto out;
+
+ mtx_lock(&vmmdev_mtx);
+ sc = vmmdev_lookup(buf);
+ mtx_unlock(&vmmdev_mtx);
+ if (sc != NULL) {
+ error = EEXIST;
+ goto out;
+ }
+
+ error = vm_create(buf, &vm);
+ if (error != 0)
+ goto out;
+
+ sc = malloc(sizeof(struct vmmdev_softc), M_VMMDEV, M_WAITOK | M_ZERO);
+ sc->ucred = crhold(curthread->td_ucred);
+ sc->vm = vm;
+ SLIST_INIT(&sc->devmem);
+
+ /*
+ * Lookup the name again just in case somebody sneaked in when we
+ * dropped the lock.
+ */
+ mtx_lock(&vmmdev_mtx);
+ sc2 = vmmdev_lookup(buf);
+ if (sc2 == NULL) {
+ SLIST_INSERT_HEAD(&head, sc, link);
+ sc->flags |= VSC_LINKED;
+ }
+ mtx_unlock(&vmmdev_mtx);
+
+ if (sc2 != NULL) {
+ vmmdev_destroy(sc);
+ error = EEXIST;
+ goto out;
+ }
+
+ error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &vmmdevsw, sc->ucred,
+ UID_ROOT, GID_WHEEL, 0600, "vmm/%s", buf);
+ if (error != 0) {
+ vmmdev_destroy(sc);
+ goto out;
+ }
+
+ mtx_lock(&vmmdev_mtx);
+ sc->cdev = cdev;
+ sc->cdev->si_drv1 = sc;
+ mtx_unlock(&vmmdev_mtx);
+
+out:
+ free(buf, M_VMMDEV);
+ return (error);
+}
+SYSCTL_PROC(_hw_vmm, OID_AUTO, create,
+ CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
+ NULL, 0, sysctl_vmm_create, "A",
+ NULL);
+
+void
+vmmdev_init(void)
+{
+ pr_allow_flag = prison_add_allow(NULL, "vmm", NULL,
+ "Allow use of vmm in a jail.");
+}
+
+int
+vmmdev_cleanup(void)
+{
+ int error;
+
+ if (SLIST_EMPTY(&head))
+ error = 0;
+ else
+ error = EBUSY;
+
+ return (error);
+}
+
+static int
+devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len,
+ struct vm_object **objp, int nprot)
+{
+ struct devmem_softc *dsc;
+ vm_ooffset_t first, last;
+ size_t seglen;
+ int error;
+ bool sysmem;
+
+ dsc = cdev->si_drv1;
+ if (dsc == NULL) {
+ /* 'cdev' has been created but is not ready for use */
+ return (ENXIO);
+ }
+
+ first = *offset;
+ last = *offset + len;
+ if ((nprot & PROT_EXEC) || first < 0 || first >= last)
+ return (EINVAL);
+
+ vm_slock_memsegs(dsc->sc->vm);
+
+ error = vm_get_memseg(dsc->sc->vm, dsc->segid, &seglen, &sysmem, objp);
+ KASSERT(error == 0 && !sysmem && *objp != NULL,
+ ("%s: invalid devmem segment %d", __func__, dsc->segid));
+
+ if (seglen >= last)
+ vm_object_reference(*objp);
+ else
+ error = 0;
+ vm_unlock_memsegs(dsc->sc->vm);
+ return (error);
+}
+
+static struct cdevsw devmemsw = {
+ .d_name = "devmem",
+ .d_version = D_VERSION,
+ .d_mmap_single = devmem_mmap_single,
+};
+
+static int
+devmem_create_cdev(const char *vmname, int segid, char *devname)
+{
+ struct devmem_softc *dsc;
+ struct vmmdev_softc *sc;
+ struct cdev *cdev;
+ int error;
+
+ error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &devmemsw, NULL,
+ UID_ROOT, GID_WHEEL, 0600, "vmm.io/%s.%s", vmname, devname);
+ if (error)
+ return (error);
+
+ dsc = malloc(sizeof(struct devmem_softc), M_VMMDEV, M_WAITOK | M_ZERO);
+
+ mtx_lock(&vmmdev_mtx);
+ sc = vmmdev_lookup(vmname);
+ KASSERT(sc != NULL, ("%s: vm %s softc not found", __func__, vmname));
+ if (sc->cdev == NULL) {
+ /* virtual machine is being created or destroyed */
+ mtx_unlock(&vmmdev_mtx);
+ free(dsc, M_VMMDEV);
+ destroy_dev_sched_cb(cdev, NULL, 0);
+ return (ENODEV);
+ }
+
+ dsc->segid = segid;
+ dsc->name = devname;
+ dsc->cdev = cdev;
+ dsc->sc = sc;
+ SLIST_INSERT_HEAD(&sc->devmem, dsc, link);
+ mtx_unlock(&vmmdev_mtx);
+
+ /* The 'cdev' is ready for use after 'si_drv1' is initialized */
+ cdev->si_drv1 = dsc;
+ return (0);
+}
+
+static void
+devmem_destroy(void *arg)
+{
+ struct devmem_softc *dsc = arg;
+
+ KASSERT(dsc->cdev, ("%s: devmem cdev already destroyed", __func__));
+ dsc->cdev = NULL;
+ dsc->sc = NULL;
+}
diff --git a/sys/arm64/vmm/vmm_hyp.c b/sys/arm64/vmm/vmm_hyp.c
new file mode 100644
--- /dev/null
+++ b/sys/arm64/vmm/vmm_hyp.c
@@ -0,0 +1,735 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2021 Andrew Turner
+ *
+ * This work was supported by Innovate UK project 105694, "Digital Security
+ * by Design (DSbD) Technology Platform Prototype".
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+#include <sys/types.h>
+#include <sys/proc.h>
+
+#include <machine/armreg.h>
+
+#include "arm64.h"
+#include "hyp.h"
+
+struct hypctx;
+
+uint64_t vmm_hyp_enter(uint64_t, uint64_t, uint64_t, uint64_t, uint64_t,
+ uint64_t, uint64_t, uint64_t);
+uint64_t vmm_enter_guest(struct hypctx *);
+
+static void
+vmm_hyp_reg_store(struct hypctx *hypctx, struct hyp *hyp, bool guest)
+{
+ uint64_t dfr0;
+
+ /* Store the guest VFP registers */
+ if (guest) {
+ /* Store the timer registers */
+ hypctx->vtimer_cpu.cntkctl_el1 = READ_SPECIALREG(cntkctl_el1);
+ hypctx->vtimer_cpu.virt_timer.cntx_cval_el0 =
+ READ_SPECIALREG(cntv_cval_el0);
+ hypctx->vtimer_cpu.virt_timer.cntx_ctl_el0 =
+ READ_SPECIALREG(cntv_ctl_el0);
+
+ /* Store the GICv3 registers */
+ hypctx->vgic_v3_regs.ich_eisr_el2 =
+ READ_SPECIALREG(ich_eisr_el2);
+ hypctx->vgic_v3_regs.ich_elrsr_el2 =
+ READ_SPECIALREG(ich_elrsr_el2);
+ hypctx->vgic_v3_regs.ich_hcr_el2 =
+ READ_SPECIALREG(ich_hcr_el2);
+ hypctx->vgic_v3_regs.ich_misr_el2 =
+ READ_SPECIALREG(ich_misr_el2);
+ hypctx->vgic_v3_regs.ich_vmcr_el2 =
+ READ_SPECIALREG(ich_vmcr_el2);
+ switch (hypctx->vgic_v3_regs.ich_lr_num - 1) {
+#define STORE_LR(x) \
+ case x: \
+ hypctx->vgic_v3_regs.ich_lr_el2[x] = \
+ READ_SPECIALREG(ich_lr ## x ##_el2)
+ STORE_LR(15);
+ STORE_LR(14);
+ STORE_LR(13);
+ STORE_LR(12);
+ STORE_LR(11);
+ STORE_LR(10);
+ STORE_LR(9);
+ STORE_LR(8);
+ STORE_LR(7);
+ STORE_LR(6);
+ STORE_LR(5);
+ STORE_LR(4);
+ STORE_LR(3);
+ STORE_LR(2);
+ STORE_LR(1);
+ default:
+ STORE_LR(0);
+#undef STORE_LR
+ }
+
+ switch (hypctx->vgic_v3_regs.ich_apr_num - 1) {
+#define STORE_APR(x) \
+ case x: \
+ hypctx->vgic_v3_regs.ich_ap0r_el2[x] = \
+ READ_SPECIALREG(ich_ap0r ## x ##_el2); \
+ hypctx->vgic_v3_regs.ich_ap1r_el2[x] = \
+ READ_SPECIALREG(ich_ap1r ## x ##_el2)
+ STORE_APR(3);
+ STORE_APR(2);
+ STORE_APR(1);
+ default:
+ STORE_APR(0);
+#undef STORE_APR
+ }
+ }
+
+ dfr0 = READ_SPECIALREG(id_aa64dfr0_el1);
+ switch (ID_AA64DFR0_BRPs_VAL(dfr0) - 1) {
+#define STORE_DBG_BRP(x) \
+ case x: \
+ hypctx->dbgbcr_el1[x] = \
+ READ_SPECIALREG(dbgbcr ## x ## _el1); \
+ hypctx->dbgbvr_el1[x] = \
+ READ_SPECIALREG(dbgbvr ## x ## _el1)
+ STORE_DBG_BRP(15);
+ STORE_DBG_BRP(14);
+ STORE_DBG_BRP(13);
+ STORE_DBG_BRP(12);
+ STORE_DBG_BRP(11);
+ STORE_DBG_BRP(10);
+ STORE_DBG_BRP(9);
+ STORE_DBG_BRP(8);
+ STORE_DBG_BRP(7);
+ STORE_DBG_BRP(6);
+ STORE_DBG_BRP(5);
+ STORE_DBG_BRP(4);
+ STORE_DBG_BRP(3);
+ STORE_DBG_BRP(2);
+ STORE_DBG_BRP(1);
+ default:
+ STORE_DBG_BRP(0);
+#undef STORE_DBG_BRP
+ }
+
+ switch (ID_AA64DFR0_WRPs_VAL(dfr0) - 1) {
+#define STORE_DBG_WRP(x) \
+ case x: \
+ hypctx->dbgwcr_el1[x] = \
+ READ_SPECIALREG(dbgwcr ## x ## _el1); \
+ hypctx->dbgwvr_el1[x] = \
+ READ_SPECIALREG(dbgwvr ## x ## _el1)
+ STORE_DBG_WRP(15);
+ STORE_DBG_WRP(14);
+ STORE_DBG_WRP(13);
+ STORE_DBG_WRP(12);
+ STORE_DBG_WRP(11);
+ STORE_DBG_WRP(10);
+ STORE_DBG_WRP(9);
+ STORE_DBG_WRP(8);
+ STORE_DBG_WRP(7);
+ STORE_DBG_WRP(6);
+ STORE_DBG_WRP(5);
+ STORE_DBG_WRP(4);
+ STORE_DBG_WRP(3);
+ STORE_DBG_WRP(2);
+ STORE_DBG_WRP(1);
+ default:
+ STORE_DBG_WRP(0);
+#undef STORE_DBG_WRP
+ }
+
+ /* Store the PMU registers */
+ hypctx->pmcr_el0 = READ_SPECIALREG(pmcr_el0);
+ hypctx->pmccntr_el0 = READ_SPECIALREG(pmccntr_el0);
+ hypctx->pmccfiltr_el0 = READ_SPECIALREG(pmccfiltr_el0);
+ hypctx->pmcntenset_el0 = READ_SPECIALREG(pmcntenset_el0);
+ hypctx->pmintenset_el1 = READ_SPECIALREG(pmintenset_el1);
+ hypctx->pmovsset_el0 = READ_SPECIALREG(pmovsset_el0);
+ hypctx->pmuserenr_el0 = READ_SPECIALREG(pmuserenr_el0);
+ switch ((hypctx->pmcr_el0 & PMCR_N_MASK) >> PMCR_N_SHIFT) {
+#define STORE_PMU(x) \
+ case (x + 1): \
+ hypctx->pmevcntr_el0[x] = \
+ READ_SPECIALREG(pmevcntr ## x ## _el0); \
+ hypctx->pmevtyper_el0[x] = \
+ READ_SPECIALREG(pmevtyper ## x ## _el0)
+ STORE_PMU(30);
+ STORE_PMU(29);
+ STORE_PMU(28);
+ STORE_PMU(27);
+ STORE_PMU(26);
+ STORE_PMU(25);
+ STORE_PMU(24);
+ STORE_PMU(23);
+ STORE_PMU(22);
+ STORE_PMU(21);
+ STORE_PMU(20);
+ STORE_PMU(19);
+ STORE_PMU(18);
+ STORE_PMU(17);
+ STORE_PMU(16);
+ STORE_PMU(15);
+ STORE_PMU(14);
+ STORE_PMU(13);
+ STORE_PMU(12);
+ STORE_PMU(11);
+ STORE_PMU(10);
+ STORE_PMU(9);
+ STORE_PMU(8);
+ STORE_PMU(7);
+ STORE_PMU(6);
+ STORE_PMU(5);
+ STORE_PMU(4);
+ STORE_PMU(3);
+ STORE_PMU(2);
+ STORE_PMU(1);
+ STORE_PMU(0);
+ default: /* N == 0 when only PMCCNTR_EL0 is available */
+ break;
+#undef STORE_PMU
+ }
+
+ /* Store the special to from the trapframe */
+ hypctx->tf.tf_sp = READ_SPECIALREG(sp_el1);
+ hypctx->tf.tf_elr = READ_SPECIALREG(elr_el2);
+ hypctx->tf.tf_spsr = READ_SPECIALREG(spsr_el2);
+ if (guest) {
+ hypctx->tf.tf_esr = READ_SPECIALREG(esr_el2);
+ }
+
+ /* Store the guest special registers */
+ hypctx->elr_el1 = READ_SPECIALREG(elr_el1);
+ hypctx->sp_el0 = READ_SPECIALREG(sp_el0);
+ hypctx->tpidr_el0 = READ_SPECIALREG(tpidr_el0);
+ hypctx->tpidrro_el0 = READ_SPECIALREG(tpidrro_el0);
+ hypctx->tpidr_el1 = READ_SPECIALREG(tpidr_el1);
+ hypctx->vbar_el1 = READ_SPECIALREG(vbar_el1);
+
+ hypctx->actlr_el1 = READ_SPECIALREG(actlr_el1);
+ hypctx->afsr0_el1 = READ_SPECIALREG(afsr0_el1);
+ hypctx->afsr1_el1 = READ_SPECIALREG(afsr1_el1);
+ hypctx->amair_el1 = READ_SPECIALREG(amair_el1);
+ hypctx->contextidr_el1 = READ_SPECIALREG(contextidr_el1);
+ hypctx->cpacr_el1 = READ_SPECIALREG(cpacr_el1);
+ hypctx->csselr_el1 = READ_SPECIALREG(csselr_el1);
+ hypctx->esr_el1 = READ_SPECIALREG(esr_el1);
+ hypctx->far_el1 = READ_SPECIALREG(far_el1);
+ hypctx->mair_el1 = READ_SPECIALREG(mair_el1);
+ hypctx->mdccint_el1 = READ_SPECIALREG(mdccint_el1);
+ hypctx->mdscr_el1 = READ_SPECIALREG(mdscr_el1);
+ hypctx->par_el1 = READ_SPECIALREG(par_el1);
+ hypctx->sctlr_el1 = READ_SPECIALREG(sctlr_el1);
+ hypctx->spsr_el1 = READ_SPECIALREG(spsr_el1);
+ hypctx->tcr_el1 = READ_SPECIALREG(tcr_el1);
+ /* TODO: Support when this is not res0 */
+ hypctx->tcr2_el1 = 0;
+ hypctx->ttbr0_el1 = READ_SPECIALREG(ttbr0_el1);
+ hypctx->ttbr1_el1 = READ_SPECIALREG(ttbr1_el1);
+
+ hypctx->cptr_el2 = READ_SPECIALREG(cptr_el2);
+ hypctx->hcr_el2 = READ_SPECIALREG(hcr_el2);
+ hypctx->vpidr_el2 = READ_SPECIALREG(vpidr_el2);
+ hypctx->vmpidr_el2 = READ_SPECIALREG(vmpidr_el2);
+}
+
+static void
+vmm_hyp_reg_restore(struct hypctx *hypctx, struct hyp *hyp, bool guest)
+{
+ uint64_t dfr0;
+
+ /* Restore the special registers */
+ WRITE_SPECIALREG(elr_el1, hypctx->elr_el1);
+ WRITE_SPECIALREG(sp_el0, hypctx->sp_el0);
+ WRITE_SPECIALREG(tpidr_el0, hypctx->tpidr_el0);
+ WRITE_SPECIALREG(tpidrro_el0, hypctx->tpidrro_el0);
+ WRITE_SPECIALREG(tpidr_el1, hypctx->tpidr_el1);
+ WRITE_SPECIALREG(vbar_el1, hypctx->vbar_el1);
+
+ WRITE_SPECIALREG(actlr_el1, hypctx->actlr_el1);
+ WRITE_SPECIALREG(afsr0_el1, hypctx->afsr0_el1);
+ WRITE_SPECIALREG(afsr1_el1, hypctx->afsr1_el1);
+ WRITE_SPECIALREG(amair_el1, hypctx->amair_el1);
+ WRITE_SPECIALREG(contextidr_el1, hypctx->contextidr_el1);
+ WRITE_SPECIALREG(cpacr_el1, hypctx->cpacr_el1);
+ WRITE_SPECIALREG(csselr_el1, hypctx->csselr_el1);
+ WRITE_SPECIALREG(esr_el1, hypctx->esr_el1);
+ WRITE_SPECIALREG(far_el1, hypctx->far_el1);
+ WRITE_SPECIALREG(mdccint_el1, hypctx->mdccint_el1);
+ WRITE_SPECIALREG(mdscr_el1, hypctx->mdscr_el1);
+ WRITE_SPECIALREG(mair_el1, hypctx->mair_el1);
+ WRITE_SPECIALREG(par_el1, hypctx->par_el1);
+ WRITE_SPECIALREG(sctlr_el1, hypctx->sctlr_el1);
+ WRITE_SPECIALREG(tcr_el1, hypctx->tcr_el1);
+ /* TODO: tcr2_el1 */
+ WRITE_SPECIALREG(ttbr0_el1, hypctx->ttbr0_el1);
+ WRITE_SPECIALREG(ttbr1_el1, hypctx->ttbr1_el1);
+ WRITE_SPECIALREG(spsr_el1, hypctx->spsr_el1);
+
+ WRITE_SPECIALREG(cptr_el2, hypctx->cptr_el2);
+ WRITE_SPECIALREG(hcr_el2, hypctx->hcr_el2);
+ WRITE_SPECIALREG(vpidr_el2, hypctx->vpidr_el2);
+ WRITE_SPECIALREG(vmpidr_el2, hypctx->vmpidr_el2);
+
+ /* Load the special regs from the trapframe */
+ WRITE_SPECIALREG(sp_el1, hypctx->tf.tf_sp);
+ WRITE_SPECIALREG(elr_el2, hypctx->tf.tf_elr);
+ WRITE_SPECIALREG(spsr_el2, hypctx->tf.tf_spsr);
+
+ /* Restore the PMU registers */
+ WRITE_SPECIALREG(pmcr_el0, hypctx->pmcr_el0);
+ WRITE_SPECIALREG(pmccntr_el0, hypctx->pmccntr_el0);
+ WRITE_SPECIALREG(pmccfiltr_el0, hypctx->pmccfiltr_el0);
+ /* Clear all events/interrupts then enable them */
+ WRITE_SPECIALREG(pmcntenclr_el0, 0xfffffffful);
+ WRITE_SPECIALREG(pmcntenset_el0, hypctx->pmcntenset_el0);
+ WRITE_SPECIALREG(pmintenclr_el1, 0xfffffffful);
+ WRITE_SPECIALREG(pmintenset_el1, hypctx->pmintenset_el1);
+ WRITE_SPECIALREG(pmovsclr_el0, 0xfffffffful);
+ WRITE_SPECIALREG(pmovsset_el0, hypctx->pmovsset_el0);
+
+ switch ((hypctx->pmcr_el0 & PMCR_N_MASK) >> PMCR_N_SHIFT) {
+#define LOAD_PMU(x) \
+ case (x + 1): \
+ WRITE_SPECIALREG(pmevcntr ## x ## _el0, \
+ hypctx->pmevcntr_el0[x]); \
+ WRITE_SPECIALREG(pmevtyper ## x ## _el0, \
+ hypctx->pmevtyper_el0[x])
+ LOAD_PMU(30);
+ LOAD_PMU(29);
+ LOAD_PMU(28);
+ LOAD_PMU(27);
+ LOAD_PMU(26);
+ LOAD_PMU(25);
+ LOAD_PMU(24);
+ LOAD_PMU(23);
+ LOAD_PMU(22);
+ LOAD_PMU(21);
+ LOAD_PMU(20);
+ LOAD_PMU(19);
+ LOAD_PMU(18);
+ LOAD_PMU(17);
+ LOAD_PMU(16);
+ LOAD_PMU(15);
+ LOAD_PMU(14);
+ LOAD_PMU(13);
+ LOAD_PMU(12);
+ LOAD_PMU(11);
+ LOAD_PMU(10);
+ LOAD_PMU(9);
+ LOAD_PMU(8);
+ LOAD_PMU(7);
+ LOAD_PMU(6);
+ LOAD_PMU(5);
+ LOAD_PMU(4);
+ LOAD_PMU(3);
+ LOAD_PMU(2);
+ LOAD_PMU(1);
+ LOAD_PMU(0);
+ default: /* N == 0 when only PMCCNTR_EL0 is available */
+ break;
+#undef LOAD_PMU
+ }
+
+ dfr0 = READ_SPECIALREG(id_aa64dfr0_el1);
+ switch (ID_AA64DFR0_BRPs_VAL(dfr0) - 1) {
+#define LOAD_DBG_BRP(x) \
+ case x: \
+ WRITE_SPECIALREG(dbgbcr ## x ## _el1, \
+ hypctx->dbgbcr_el1[x]); \
+ WRITE_SPECIALREG(dbgbvr ## x ## _el1, \
+ hypctx->dbgbvr_el1[x])
+ LOAD_DBG_BRP(15);
+ LOAD_DBG_BRP(14);
+ LOAD_DBG_BRP(13);
+ LOAD_DBG_BRP(12);
+ LOAD_DBG_BRP(11);
+ LOAD_DBG_BRP(10);
+ LOAD_DBG_BRP(9);
+ LOAD_DBG_BRP(8);
+ LOAD_DBG_BRP(7);
+ LOAD_DBG_BRP(6);
+ LOAD_DBG_BRP(5);
+ LOAD_DBG_BRP(4);
+ LOAD_DBG_BRP(3);
+ LOAD_DBG_BRP(2);
+ LOAD_DBG_BRP(1);
+ default:
+ LOAD_DBG_BRP(0);
+#undef LOAD_DBG_BRP
+ }
+
+ switch (ID_AA64DFR0_WRPs_VAL(dfr0) - 1) {
+#define LOAD_DBG_WRP(x) \
+ case x: \
+ WRITE_SPECIALREG(dbgwcr ## x ## _el1, \
+ hypctx->dbgwcr_el1[x]); \
+ WRITE_SPECIALREG(dbgwvr ## x ## _el1, \
+ hypctx->dbgwvr_el1[x])
+ LOAD_DBG_WRP(15);
+ LOAD_DBG_WRP(14);
+ LOAD_DBG_WRP(13);
+ LOAD_DBG_WRP(12);
+ LOAD_DBG_WRP(11);
+ LOAD_DBG_WRP(10);
+ LOAD_DBG_WRP(9);
+ LOAD_DBG_WRP(8);
+ LOAD_DBG_WRP(7);
+ LOAD_DBG_WRP(6);
+ LOAD_DBG_WRP(5);
+ LOAD_DBG_WRP(4);
+ LOAD_DBG_WRP(3);
+ LOAD_DBG_WRP(2);
+ LOAD_DBG_WRP(1);
+ default:
+ LOAD_DBG_WRP(0);
+#undef LOAD_DBG_WRP
+ }
+
+ if (guest) {
+ /* Load the timer registers */
+ WRITE_SPECIALREG(cntkctl_el1, hypctx->vtimer_cpu.cntkctl_el1);
+ WRITE_SPECIALREG(cntv_cval_el0,
+ hypctx->vtimer_cpu.virt_timer.cntx_cval_el0);
+ WRITE_SPECIALREG(cntv_ctl_el0,
+ hypctx->vtimer_cpu.virt_timer.cntx_ctl_el0);
+ WRITE_SPECIALREG(cnthctl_el2, hyp->vtimer.cnthctl_el2);
+ WRITE_SPECIALREG(cntvoff_el2, hyp->vtimer.cntvoff_el2);
+
+ /* Load the GICv3 registers */
+ WRITE_SPECIALREG(ich_hcr_el2, hypctx->vgic_v3_regs.ich_hcr_el2);
+ WRITE_SPECIALREG(ich_vmcr_el2,
+ hypctx->vgic_v3_regs.ich_vmcr_el2);
+ switch (hypctx->vgic_v3_regs.ich_lr_num - 1) {
+#define LOAD_LR(x) \
+ case x: \
+ WRITE_SPECIALREG(ich_lr ## x ##_el2, \
+ hypctx->vgic_v3_regs.ich_lr_el2[x])
+ LOAD_LR(15);
+ LOAD_LR(14);
+ LOAD_LR(13);
+ LOAD_LR(12);
+ LOAD_LR(11);
+ LOAD_LR(10);
+ LOAD_LR(9);
+ LOAD_LR(8);
+ LOAD_LR(7);
+ LOAD_LR(6);
+ LOAD_LR(5);
+ LOAD_LR(4);
+ LOAD_LR(3);
+ LOAD_LR(2);
+ LOAD_LR(1);
+ default:
+ LOAD_LR(0);
+#undef LOAD_LR
+ }
+
+ switch (hypctx->vgic_v3_regs.ich_apr_num - 1) {
+#define LOAD_APR(x) \
+ case x: \
+ WRITE_SPECIALREG(ich_ap0r ## x ##_el2, \
+ hypctx->vgic_v3_regs.ich_ap0r_el2[x]); \
+ WRITE_SPECIALREG(ich_ap1r ## x ##_el2, \
+ hypctx->vgic_v3_regs.ich_ap1r_el2[x])
+ LOAD_APR(3);
+ LOAD_APR(2);
+ LOAD_APR(1);
+ default:
+ LOAD_APR(0);
+#undef LOAD_APR
+ }
+ }
+}
+
+static uint64_t
+vmm_hyp_call_guest(struct hyp *hyp, struct hypctx *hypctx)
+{
+ struct hypctx host_hypctx;
+ uint64_t cntvoff_el2;
+ uint64_t ich_hcr_el2, ich_vmcr_el2, cnthctl_el2, cntkctl_el1;
+ uint64_t ret;
+ uint64_t s1e1r, hpfar_el2;
+ bool hpfar_valid;
+
+ vmm_hyp_reg_store(&host_hypctx, NULL, false);
+
+ /* Save the host special registers */
+ cnthctl_el2 = READ_SPECIALREG(cnthctl_el2);
+ cntkctl_el1 = READ_SPECIALREG(cntkctl_el1);
+ cntvoff_el2 = READ_SPECIALREG(cntvoff_el2);
+
+ ich_hcr_el2 = READ_SPECIALREG(ich_hcr_el2);
+ ich_vmcr_el2 = READ_SPECIALREG(ich_vmcr_el2);
+
+ vmm_hyp_reg_restore(hypctx, hyp, true);
+
+ /* Load the common hypervisor registers */
+ WRITE_SPECIALREG(vttbr_el2, hyp->vttbr_el2);
+
+ host_hypctx.mdcr_el2 = READ_SPECIALREG(mdcr_el2);
+ WRITE_SPECIALREG(mdcr_el2, hypctx->mdcr_el2);
+
+ /* Call into the guest */
+ ret = vmm_enter_guest(hypctx);
+
+ WRITE_SPECIALREG(mdcr_el2, host_hypctx.mdcr_el2);
+ isb();
+
+ /* Store the exit info */
+ hypctx->exit_info.far_el2 = READ_SPECIALREG(far_el2);
+ vmm_hyp_reg_store(hypctx, hyp, true);
+
+ hpfar_valid = true;
+ if (ret == EXCP_TYPE_EL1_SYNC) {
+ switch (ESR_ELx_EXCEPTION(hypctx->tf.tf_esr)) {
+ case EXCP_INSN_ABORT_L:
+ case EXCP_DATA_ABORT_L:
+ /*
+ * The hpfar_el2 register is valid for:
+ * - Translation and Access faults.
+ * - Translation, Access, and permission faults on
+ * the translation table walk on the stage 1 tables.
+ * - A stage 2 Address size fault.
+ *
+ * As we only need it in the first 2 cases we can just
+ * exclude it on permission faults that are not from
+ * the stage 1 table walk.
+ *
+ * TODO: Add a case for Arm erratum 834220.
+ */
+ if ((hypctx->tf.tf_esr & ISS_DATA_S1PTW) != 0)
+ break;
+ switch (hypctx->tf.tf_esr & ISS_DATA_DFSC_MASK) {
+ case ISS_DATA_DFSC_PF_L1:
+ case ISS_DATA_DFSC_PF_L2:
+ case ISS_DATA_DFSC_PF_L3:
+ hpfar_valid = false;
+ break;
+ }
+ break;
+ }
+ }
+ if (hpfar_valid) {
+ hypctx->exit_info.hpfar_el2 = READ_SPECIALREG(hpfar_el2);
+ } else {
+ /*
+ * TODO: There is a risk the at instruction could cause an
+ * exception here. We should handle it & return a failure.
+ */
+ s1e1r =
+ arm64_address_translate_s1e1r(hypctx->exit_info.far_el2);
+ if (PAR_SUCCESS(s1e1r)) {
+ hpfar_el2 = (s1e1r & PAR_PA_MASK) >> PAR_PA_SHIFT;
+ hpfar_el2 <<= HPFAR_EL2_FIPA_SHIFT;
+ hypctx->exit_info.hpfar_el2 = hpfar_el2;
+ } else {
+ ret = EXCP_TYPE_REENTER;
+ }
+ }
+
+ vmm_hyp_reg_restore(&host_hypctx, NULL, false);
+
+ /* Restore the host special registers */
+ WRITE_SPECIALREG(ich_hcr_el2, ich_hcr_el2);
+ WRITE_SPECIALREG(ich_vmcr_el2, ich_vmcr_el2);
+
+ WRITE_SPECIALREG(cnthctl_el2, cnthctl_el2);
+ WRITE_SPECIALREG(cntkctl_el1, cntkctl_el1);
+ WRITE_SPECIALREG(cntvoff_el2, cntvoff_el2);
+
+ return (ret);
+}
+
+static uint64_t
+vmm_hyp_read_reg(uint64_t reg)
+{
+ switch (reg) {
+ case HYP_REG_ICH_VTR:
+ return (READ_SPECIALREG(ich_vtr_el2));
+ case HYP_REG_CNTHCTL:
+ return (READ_SPECIALREG(cnthctl_el2));
+ }
+
+ return (0);
+}
+
+static int
+vmm_clean_s2_tlbi(void)
+{
+ dsb(ishst);
+ __asm __volatile("tlbi alle1is");
+ dsb(ish);
+
+ return (0);
+}
+
+static int
+vm_s2_tlbi_range(uint64_t vttbr, vm_offset_t sva, vm_size_t eva,
+ bool final_only)
+{
+ uint64_t end, r, start;
+ uint64_t host_vttbr;
+
+#define TLBI_VA_SHIFT 12
+#define TLBI_VA_MASK ((1ul << 44) - 1)
+#define TLBI_VA(addr) (((addr) >> TLBI_VA_SHIFT) & TLBI_VA_MASK)
+#define TLBI_VA_L3_INCR (L3_SIZE >> TLBI_VA_SHIFT)
+
+ /* Switch to the guest vttbr */
+ /* TODO: Handle Cortex-A57/A72 erratum 131936 */
+ host_vttbr = READ_SPECIALREG(vttbr_el2);
+ WRITE_SPECIALREG(vttbr_el2, vttbr);
+ isb();
+
+ /*
+ * The CPU can cache the stage 1 + 2 combination so we need to ensure
+ * the stage 2 is invalidated first, then when this has completed we
+ * invalidate the stage 1 TLB. As we don't know which stage 1 virtual
+ * addresses point at the stage 2 IPA we need to invalidate the entire
+ * stage 1 TLB.
+ */
+
+ start = TLBI_VA(sva);
+ end = TLBI_VA(eva);
+ for (r = start; r < end; r += TLBI_VA_L3_INCR) {
+ /* Invalidate the stage 2 TLB entry */
+ if (final_only)
+ __asm __volatile("tlbi ipas2le1is, %0" : : "r"(r));
+ else
+ __asm __volatile("tlbi ipas2e1is, %0" : : "r"(r));
+ }
+ /* Ensure the entry has been invalidated */
+ dsb(ish);
+ /* Invalidate the stage 1 TLB. */
+ __asm __volatile("tlbi vmalle1is");
+ dsb(ish);
+ isb();
+
+ /* Switch back t othe host vttbr */
+ WRITE_SPECIALREG(vttbr_el2, host_vttbr);
+ isb();
+
+ return (0);
+}
+
+static int
+vm_s2_tlbi_all(uint64_t vttbr)
+{
+ uint64_t host_vttbr;
+
+ /* Switch to the guest vttbr */
+ /* TODO: Handle Cortex-A57/A72 erratum 131936 */
+ host_vttbr = READ_SPECIALREG(vttbr_el2);
+ WRITE_SPECIALREG(vttbr_el2, vttbr);
+ isb();
+
+ __asm __volatile("tlbi vmalls12e1is");
+ dsb(ish);
+ isb();
+
+ /* Switch back t othe host vttbr */
+ WRITE_SPECIALREG(vttbr_el2, host_vttbr);
+ isb();
+
+ return (0);
+}
+
+static int
+vmm_dc_civac(uint64_t start, uint64_t len)
+{
+ size_t line_size, end;
+ uint64_t ctr;
+
+ ctr = READ_SPECIALREG(ctr_el0);
+ line_size = sizeof(int) << CTR_DLINE_SIZE(ctr);
+ end = start + len;
+ dsb(ishst);
+ /* Clean and Invalidate the D-cache */
+ for (; start < end; start += line_size)
+ __asm __volatile("dc civac, %0" :: "r" (start) : "memory");
+ dsb(ish);
+ return (0);
+}
+
+static int
+vmm_el2_tlbi(uint64_t type, uint64_t start, uint64_t len)
+{
+ uint64_t end, r;
+
+ dsb(ishst);
+ switch (type) {
+ default:
+ case HYP_EL2_TLBI_ALL:
+ __asm __volatile("tlbi alle2" ::: "memory");
+ break;
+ case HYP_EL2_TLBI_VA:
+ end = TLBI_VA(start + len);
+ start = TLBI_VA(start);
+ for (r = start; r < end; r += TLBI_VA_L3_INCR) {
+ __asm __volatile("tlbi vae2is, %0" :: "r"(r));
+ }
+ break;
+ }
+ dsb(ish);
+
+ return (0);
+}
+
+uint64_t
+vmm_hyp_enter(uint64_t handle, uint64_t x1, uint64_t x2, uint64_t x3,
+ uint64_t x4, uint64_t x5, uint64_t x6, uint64_t x7)
+{
+ uint64_t ret;
+
+ switch (handle) {
+ case HYP_ENTER_GUEST:
+ do {
+ ret = vmm_hyp_call_guest((struct hyp *)x1,
+ (struct hypctx *)x2);
+ } while (ret == EXCP_TYPE_REENTER);
+ return (ret);
+ case HYP_READ_REGISTER:
+ return (vmm_hyp_read_reg(x1));
+ case HYP_CLEAN_S2_TLBI:
+ return (vmm_clean_s2_tlbi());
+ case HYP_DC_CIVAC:
+ return (vmm_dc_civac(x1, x2));
+ case HYP_EL2_TLBI:
+ return (vmm_el2_tlbi(x1, x2, x3));
+ case HYP_S2_TLBI_RANGE:
+ return (vm_s2_tlbi_range(x1, x2, x3, x4));
+ case HYP_S2_TLBI_ALL:
+ return (vm_s2_tlbi_all(x1));
+ case HYP_CLEANUP: /* Handled in vmm_hyp_exception.S */
+ default:
+ break;
+ }
+
+ return (0);
+}
diff --git a/sys/arm64/vmm/vmm_hyp_el2.S b/sys/arm64/vmm/vmm_hyp_el2.S
new file mode 100644
--- /dev/null
+++ b/sys/arm64/vmm/vmm_hyp_el2.S
@@ -0,0 +1,39 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2021 Andrew Turner
+ *
+ * This work was supported by Innovate UK project 105694, "Digital Security
+ * by Design (DSbD) Technology Platform Prototype".
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <machine/param.h>
+
+ .rodata
+ .align PAGE_SHIFT
+ .globl vmm_hyp_code
+vmm_hyp_code:
+ .incbin "vmm_hyp_blob.bin"
+ .globl vmm_hyp_code_end
+vmm_hyp_code_end:
diff --git a/sys/arm64/vmm/vmm_hyp_exception.S b/sys/arm64/vmm/vmm_hyp_exception.S
new file mode 100644
--- /dev/null
+++ b/sys/arm64/vmm/vmm_hyp_exception.S
@@ -0,0 +1,384 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (C) 2017 Alexandru Elisei <alexandru.elisei@gmail.com>
+ * Copyright (c) 2021 Andrew Turner
+ *
+ * This software was developed by Alexandru Elisei under sponsorship
+ * from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+
+#include <machine/asm.h>
+#include <machine/hypervisor.h>
+
+#include "assym.inc"
+#include "hyp.h"
+
+.macro save_host_registers
+ /* TODO: Only store callee saved registers */
+ sub sp, sp, #(32 * 8)
+ str x30, [sp, #(30 * 8)]
+ stp x28, x29, [sp, #(28 * 8)]
+ stp x26, x27, [sp, #(26 * 8)]
+ stp x24, x25, [sp, #(24 * 8)]
+ stp x22, x23, [sp, #(22 * 8)]
+ stp x20, x21, [sp, #(20 * 8)]
+ stp x18, x19, [sp, #(18 * 8)]
+ stp x16, x17, [sp, #(16 * 8)]
+ stp x14, x15, [sp, #(14 * 8)]
+ stp x12, x13, [sp, #(12 * 8)]
+ stp x10, x11, [sp, #(10 * 8)]
+ stp x8, x9, [sp, #(8 * 8)]
+ stp x6, x7, [sp, #(6 * 8)]
+ stp x4, x5, [sp, #(4 * 8)]
+ stp x2, x3, [sp, #(2 * 8)]
+ stp x0, x1, [sp, #(0 * 8)]
+.endm
+
+.macro restore_host_registers
+ /* TODO: Only restore callee saved registers */
+ ldp x0, x1, [sp, #(0 * 8)]
+ ldp x2, x3, [sp, #(2 * 8)]
+ ldp x4, x5, [sp, #(4 * 8)]
+ ldp x6, x7, [sp, #(6 * 8)]
+ ldp x8, x9, [sp, #(8 * 8)]
+ ldp x10, x11, [sp, #(10 * 8)]
+ ldp x12, x13, [sp, #(12 * 8)]
+ ldp x14, x15, [sp, #(14 * 8)]
+ ldp x16, x17, [sp, #(16 * 8)]
+ ldp x18, x19, [sp, #(18 * 8)]
+ ldp x20, x21, [sp, #(20 * 8)]
+ ldp x22, x23, [sp, #(22 * 8)]
+ ldp x24, x25, [sp, #(24 * 8)]
+ ldp x26, x27, [sp, #(26 * 8)]
+ ldp x28, x29, [sp, #(28 * 8)]
+ ldr x30, [sp, #(30 * 8)]
+ add sp, sp, #(32 * 8)
+.endm
+
+.macro save_guest_registers
+ /* Back up x0 so we can use it as a temporary register */
+ stp x0, x1, [sp, #-(2 * 8)]!
+
+ /* Restore the hypctx pointer */
+ mrs x0, tpidr_el2
+
+ stp x2, x3, [x0, #(TF_X + 2 * 8)]
+ stp x4, x5, [x0, #(TF_X + 4 * 8)]
+ stp x6, x7, [x0, #(TF_X + 6 * 8)]
+ stp x8, x9, [x0, #(TF_X + 8 * 8)]
+ stp x10, x11, [x0, #(TF_X + 10 * 8)]
+ stp x12, x13, [x0, #(TF_X + 12 * 8)]
+ stp x14, x15, [x0, #(TF_X + 14 * 8)]
+ stp x16, x17, [x0, #(TF_X + 16 * 8)]
+ stp x18, x19, [x0, #(TF_X + 18 * 8)]
+ stp x20, x21, [x0, #(TF_X + 20 * 8)]
+ stp x22, x23, [x0, #(TF_X + 22 * 8)]
+ stp x24, x25, [x0, #(TF_X + 24 * 8)]
+ stp x26, x27, [x0, #(TF_X + 26 * 8)]
+ stp x28, x29, [x0, #(TF_X + 28 * 8)]
+
+ str lr, [x0, #(TF_LR)]
+
+ /* Restore the saved x0 & x1 and save them */
+ ldp x2, x3, [sp], #(2 * 8)
+ stp x2, x3, [x0, #(TF_X + 0 * 8)]
+.endm
+
+.macro restore_guest_registers
+ /*
+ * Copy the guest x0 and x1 to the stack so we can restore them
+ * after loading the other registers.
+ */
+ ldp x2, x3, [x0, #(TF_X + 0 * 8)]
+ stp x2, x3, [sp, #-(2 * 8)]!
+
+ ldr lr, [x0, #(TF_LR)]
+
+ ldp x28, x29, [x0, #(TF_X + 28 * 8)]
+ ldp x26, x27, [x0, #(TF_X + 26 * 8)]
+ ldp x24, x25, [x0, #(TF_X + 24 * 8)]
+ ldp x22, x23, [x0, #(TF_X + 22 * 8)]
+ ldp x20, x21, [x0, #(TF_X + 20 * 8)]
+ ldp x18, x19, [x0, #(TF_X + 18 * 8)]
+ ldp x16, x17, [x0, #(TF_X + 16 * 8)]
+ ldp x14, x15, [x0, #(TF_X + 14 * 8)]
+ ldp x12, x13, [x0, #(TF_X + 12 * 8)]
+ ldp x10, x11, [x0, #(TF_X + 10 * 8)]
+ ldp x8, x9, [x0, #(TF_X + 8 * 8)]
+ ldp x6, x7, [x0, #(TF_X + 6 * 8)]
+ ldp x4, x5, [x0, #(TF_X + 4 * 8)]
+ ldp x2, x3, [x0, #(TF_X + 2 * 8)]
+
+ ldp x0, x1, [sp], #(2 * 8)
+.endm
+
+.macro vempty
+ .align 7
+ 1: b 1b
+.endm
+
+.macro vector name
+ .align 7
+ b handle_\name
+.endm
+
+ .section ".vmm_vectors","ax"
+ .align 11
+hyp_init_vectors:
+ vempty /* Synchronous EL2t */
+ vempty /* IRQ EL2t */
+ vempty /* FIQ EL2t */
+ vempty /* Error EL2t */
+
+ vempty /* Synchronous EL2h */
+ vempty /* IRQ EL2h */
+ vempty /* FIQ EL2h */
+ vempty /* Error EL2h */
+
+ vector hyp_init /* Synchronous 64-bit EL1 */
+ vempty /* IRQ 64-bit EL1 */
+ vempty /* FIQ 64-bit EL1 */
+ vempty /* Error 64-bit EL1 */
+
+ vempty /* Synchronous 32-bit EL1 */
+ vempty /* IRQ 32-bit EL1 */
+ vempty /* FIQ 32-bit EL1 */
+ vempty /* Error 32-bit EL1 */
+
+ .text
+ .align 11
+hyp_vectors:
+ vempty /* Synchronous EL2t */
+ vempty /* IRQ EL2t */
+ vempty /* FIQ EL2t */
+ vempty /* Error EL2t */
+
+ vector el2_el2h_sync /* Synchronous EL2h */
+ vector el2_el2h_irq /* IRQ EL2h */
+ vector el2_el2h_fiq /* FIQ EL2h */
+ vector el2_el2h_error /* Error EL2h */
+
+ vector el2_el1_sync64 /* Synchronous 64-bit EL1 */
+ vector el2_el1_irq64 /* IRQ 64-bit EL1 */
+ vector el2_el1_fiq64 /* FIQ 64-bit EL1 */
+ vector el2_el1_error64 /* Error 64-bit EL1 */
+
+ vempty /* Synchronous 32-bit EL1 */
+ vempty /* IRQ 32-bit EL1 */
+ vempty /* FIQ 32-bit EL1 */
+ vempty /* Error 32-bit EL1 */
+
+/*
+ * Initialize the hypervisor mode with a new exception vector table, translation
+ * table and stack.
+ *
+ * Expecting:
+ * x0 - translation tables physical address
+ * x1 - stack top virtual address
+ * x2 - TCR_EL2 value
+ * x3 - SCTLR_EL2 value
+ * x4 - VTCR_EL2 value
+ */
+LENTRY(handle_hyp_init)
+ /* Install the new exception vectors */
+ adrp x6, hyp_vectors
+ add x6, x6, :lo12:hyp_vectors
+ msr vbar_el2, x6
+ /* Set the stack top address */
+ mov sp, x1
+ /* Use the host VTTBR_EL2 to tell the host and the guests apart */
+ mov x9, #VTTBR_HOST
+ msr vttbr_el2, x9
+ /* Load the base address for the translation tables */
+ msr ttbr0_el2, x0
+ /* Invalidate the TLB */
+ tlbi alle2
+ /* Use the same memory attributes as EL1 */
+ mrs x9, mair_el1
+ msr mair_el2, x9
+ /* Configure address translation */
+ msr tcr_el2, x2
+ isb
+ /* Set the system control register for EL2 */
+ msr sctlr_el2, x3
+ /* Set the Stage 2 translation control register */
+ msr vtcr_el2, x4
+ /* Return success */
+ mov x0, #0
+ /* MMU is up and running */
+ ERET
+LEND(handle_hyp_init)
+
+.macro do_world_switch_to_host
+ save_guest_registers
+ restore_host_registers
+
+ /* Restore host VTTBR */
+ mov x9, #VTTBR_HOST
+ msr vttbr_el2, x9
+.endm
+
+
+.macro handle_el2_excp type
+ /* Save registers before modifying so we can restore them */
+ str x9, [sp, #-16]!
+
+ /* Test if the exception happened when the host was running */
+ mrs x9, vttbr_el2
+ cmp x9, #VTTBR_HOST
+ beq 1f
+
+ /* We got the exception while the guest was running */
+ ldr x9, [sp], #16
+ do_world_switch_to_host
+ mov x0, \type
+ ret
+
+1:
+ /* We got the exception while the host was running */
+ ldr x9, [sp], #16
+ mov x0, \type
+ ERET
+.endm
+
+
+LENTRY(handle_el2_el2h_sync)
+ handle_el2_excp #EXCP_TYPE_EL2_SYNC
+LEND(handle_el2_el2h_sync)
+
+LENTRY(handle_el2_el2h_irq)
+ handle_el2_excp #EXCP_TYPE_EL2_IRQ
+LEND(handle_el2_el2h_irq)
+
+LENTRY(handle_el2_el2h_fiq)
+ handle_el2_excp #EXCP_TYPE_EL2_FIQ
+LEND(handle_el2_el2h_fiq)
+
+LENTRY(handle_el2_el2h_error)
+ handle_el2_excp #EXCP_TYPE_EL2_ERROR
+LEND(handle_el2_el2h_error)
+
+
+LENTRY(handle_el2_el1_sync64)
+ /* Save registers before modifying so we can restore them */
+ str x9, [sp, #-16]!
+
+ /* Check for host hypervisor call */
+ mrs x9, vttbr_el2
+ cmp x9, #VTTBR_HOST
+ ldr x9, [sp], #16 /* Restore the temp register */
+ bne 1f
+
+ /*
+ * Called from the host
+ */
+
+ /* Check if this is a cleanup call and handle in a controlled state */
+ cmp x0, #(HYP_CLEANUP)
+ b.eq vmm_cleanup
+
+ str lr, [sp, #-16]!
+ bl vmm_hyp_enter
+ ldr lr, [sp], #16
+ ERET
+
+1: /* Guest exception taken to EL2 */
+ do_world_switch_to_host
+ mov x0, #EXCP_TYPE_EL1_SYNC
+ ret
+LEND(handle_el2_el1_sync64)
+
+/*
+ * We only trap IRQ, FIQ and SError exceptions when a guest is running. Do a
+ * world switch to host to handle these exceptions.
+ */
+
+LENTRY(handle_el2_el1_irq64)
+ do_world_switch_to_host
+ str x9, [sp, #-16]!
+ mrs x9, ich_misr_el2
+ cmp x9, xzr
+ beq 1f
+ mov x0, #EXCP_TYPE_MAINT_IRQ
+ b 2f
+1:
+ mov x0, #EXCP_TYPE_EL1_IRQ
+2:
+ ldr x9, [sp], #16
+ ret
+LEND(handle_el2_el1_irq)
+
+LENTRY(handle_el2_el1_fiq64)
+ do_world_switch_to_host
+ mov x0, #EXCP_TYPE_EL1_FIQ
+ ret
+LEND(handle_el2_el1_fiq64)
+
+LENTRY(handle_el2_el1_error64)
+ do_world_switch_to_host
+ mov x0, #EXCP_TYPE_EL1_ERROR
+ ret
+LEND(handle_el2_el1_error64)
+
+
+/*
+ * Usage:
+ * uint64_t vmm_enter_guest(struct hypctx *hypctx)
+ *
+ * Expecting:
+ * x0 - hypctx address
+ */
+ENTRY(vmm_enter_guest)
+ /* Save hypctx address */
+ msr tpidr_el2, x0
+
+ save_host_registers
+ restore_guest_registers
+
+ /* Enter guest */
+ ERET
+END(vmm_enter_guest)
+
+/*
+ * Usage:
+ * void vmm_cleanup(uint64_t handle, void *hyp_stub_vectors)
+ *
+ * Expecting:
+ * x1 - physical address of hyp_stub_vectors
+ */
+LENTRY(vmm_cleanup)
+ /* Restore the stub vectors */
+ msr vbar_el2, x1
+
+ /* Disable the MMU */
+ dsb sy
+ mrs x2, sctlr_el2
+ bic x2, x2, #SCTLR_EL2_M
+ msr sctlr_el2, x2
+ isb
+
+ ERET
+LEND(vmm_cleanup)
diff --git a/sys/arm64/vmm/vmm_instruction_emul.c b/sys/arm64/vmm/vmm_instruction_emul.c
new file mode 100644
--- /dev/null
+++ b/sys/arm64/vmm/vmm_instruction_emul.c
@@ -0,0 +1,102 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifdef _KERNEL
+#include <sys/param.h>
+#include <sys/pcpu.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+
+#include <vm/vm.h>
+
+#include <machine/machdep.h>
+#include <machine/vmm.h>
+#else
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/_iovec.h>
+
+#include <machine/vmm.h>
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <vmmapi.h>
+#endif
+
+#include <machine/vmm_instruction_emul.h>
+
+int
+vmm_emulate_instruction(struct vcpu *vcpu, uint64_t gpa, struct vie *vie,
+ struct vm_guest_paging *paging __unused, mem_region_read_t memread,
+ mem_region_write_t memwrite, void *memarg)
+{
+ uint64_t val;
+ int error;
+
+ if (vie->dir == VM_DIR_READ) {
+ error = memread(vcpu, gpa, &val, vie->access_size, memarg);
+ if (error)
+ goto out;
+ error = vm_set_register(vcpu, vie->reg, val);
+ } else {
+ error = vm_get_register(vcpu, vie->reg, &val);
+ if (error)
+ goto out;
+ /* Mask any unneeded bits from the register */
+ if (vie->access_size < 8)
+ val &= (1ul << (vie->access_size * 8)) - 1;
+ error = memwrite(vcpu, gpa, val, vie->access_size, memarg);
+ }
+
+out:
+ return (error);
+}
+
+int
+vmm_emulate_register(struct vcpu *vcpu, struct vre *vre, reg_read_t regread,
+ reg_write_t regwrite, void *regarg)
+{
+ uint64_t val;
+ int error;
+
+ if (vre->dir == VM_DIR_READ) {
+ error = regread(vcpu, &val, regarg);
+ if (error)
+ goto out;
+ error = vm_set_register(vcpu, vre->reg, val);
+ } else {
+ error = vm_get_register(vcpu, vre->reg, &val);
+ if (error)
+ goto out;
+ error = regwrite(vcpu, val, regarg);
+ }
+
+out:
+ return (error);
+}
diff --git a/sys/arm64/vmm/vmm_ktr.h b/sys/arm64/vmm/vmm_ktr.h
new file mode 100644
--- /dev/null
+++ b/sys/arm64/vmm/vmm_ktr.h
@@ -0,0 +1,69 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _VMM_KTR_H_
+#define _VMM_KTR_H_
+
+#include <sys/ktr.h>
+#include <sys/pcpu.h>
+
+#ifndef KTR_VMM
+#define KTR_VMM KTR_GEN
+#endif
+
+#define VCPU_CTR0(vm, vcpuid, format) \
+CTR2(KTR_VMM, "vm %s[%d]: " format, vm_name((vm)), (vcpuid))
+
+#define VCPU_CTR1(vm, vcpuid, format, p1) \
+CTR3(KTR_VMM, "vm %s[%d]: " format, vm_name((vm)), (vcpuid), (p1))
+
+#define VCPU_CTR2(vm, vcpuid, format, p1, p2) \
+CTR4(KTR_VMM, "vm %s[%d]: " format, vm_name((vm)), (vcpuid), (p1), (p2))
+
+#define VCPU_CTR3(vm, vcpuid, format, p1, p2, p3) \
+CTR5(KTR_VMM, "vm %s[%d]: " format, vm_name((vm)), (vcpuid), (p1), (p2), (p3))
+
+#define VCPU_CTR4(vm, vcpuid, format, p1, p2, p3, p4) \
+CTR6(KTR_VMM, "vm %s[%d]: " format, vm_name((vm)), (vcpuid), \
+ (p1), (p2), (p3), (p4))
+
+#define VM_CTR0(vm, format) \
+CTR1(KTR_VMM, "vm %s: " format, vm_name((vm)))
+
+#define VM_CTR1(vm, format, p1) \
+CTR2(KTR_VMM, "vm %s: " format, vm_name((vm)), (p1))
+
+#define VM_CTR2(vm, format, p1, p2) \
+CTR3(KTR_VMM, "vm %s: " format, vm_name((vm)), (p1), (p2))
+
+#define VM_CTR3(vm, format, p1, p2, p3) \
+CTR4(KTR_VMM, "vm %s: " format, vm_name((vm)), (p1), (p2), (p3))
+
+#define VM_CTR4(vm, format, p1, p2, p3, p4) \
+CTR5(KTR_VMM, "vm %s: " format, vm_name((vm)), (p1), (p2), (p3), (p4))
+#endif
diff --git a/sys/arm64/vmm/vmm_mmu.c b/sys/arm64/vmm/vmm_mmu.c
new file mode 100644
--- /dev/null
+++ b/sys/arm64/vmm/vmm_mmu.c
@@ -0,0 +1,430 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (C) 2017 Alexandru Elisei <alexandru.elisei@gmail.com>
+ *
+ * This software was developed by Alexandru Elisei under sponsorship
+ * from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+
+#include <sys/types.h>
+#include <sys/malloc.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <vm/vm_page.h>
+#include <vm/vm_param.h>
+#include <vm/vm_phys.h>
+
+#include <machine/atomic.h>
+#include <machine/machdep.h>
+#include <machine/vm.h>
+#include <machine/vmm.h>
+#include <machine/vmparam.h>
+
+#include "mmu.h"
+#include "arm64.h"
+
+static struct mtx vmmpmap_mtx;
+static pt_entry_t *l0;
+static vm_paddr_t l0_paddr;
+
+bool
+vmmpmap_init(void)
+{
+ vm_page_t m;
+
+ m = vm_page_alloc_noobj(VM_ALLOC_WIRED | VM_ALLOC_ZERO);
+ if (m == NULL)
+ return (false);
+
+ l0_paddr = VM_PAGE_TO_PHYS(m);
+ l0 = (pd_entry_t *)PHYS_TO_DMAP(l0_paddr);
+
+ mtx_init(&vmmpmap_mtx, "vmm pmap", NULL, MTX_DEF);
+
+ return (true);
+}
+
+static void
+vmmpmap_release_l3(pd_entry_t l2e)
+{
+ pt_entry_t *l3 __diagused;
+ vm_page_t m;
+ int i;
+
+ l3 = (pd_entry_t *)PHYS_TO_DMAP(l2e & ~ATTR_MASK);
+ for (i = 0; i < Ln_ENTRIES; i++) {
+ KASSERT(l3[i] == 0, ("%s: l3 still mapped: %p %lx", __func__,
+ &l3[i], l3[i]));
+ }
+
+ m = PHYS_TO_VM_PAGE(l2e & ~ATTR_MASK);
+ vm_page_unwire_noq(m);
+ vm_page_free(m);
+}
+
+static void
+vmmpmap_release_l2(pd_entry_t l1e)
+{
+ pt_entry_t *l2;
+ vm_page_t m;
+ int i;
+
+ l2 = (pd_entry_t *)PHYS_TO_DMAP(l1e & ~ATTR_MASK);
+ for (i = 0; i < Ln_ENTRIES; i++) {
+ if (l2[i] != 0) {
+ vmmpmap_release_l3(l2[i]);
+ }
+ }
+
+ m = PHYS_TO_VM_PAGE(l1e & ~ATTR_MASK);
+ vm_page_unwire_noq(m);
+ vm_page_free(m);
+}
+
+static void
+vmmpmap_release_l1(pd_entry_t l0e)
+{
+ pt_entry_t *l1;
+ vm_page_t m;
+ int i;
+
+ l1 = (pd_entry_t *)PHYS_TO_DMAP(l0e & ~ATTR_MASK);
+ for (i = 0; i < Ln_ENTRIES; i++) {
+ if (l1[i] != 0) {
+ vmmpmap_release_l2(l1[i]);
+ }
+ }
+
+ m = PHYS_TO_VM_PAGE(l0e & ~ATTR_MASK);
+ vm_page_unwire_noq(m);
+ vm_page_free(m);
+}
+
+void
+vmmpmap_fini(void)
+{
+ vm_page_t m;
+ int i;
+
+ /* Remove the remaining entries */
+ for (i = 0; i < L0_ENTRIES; i++) {
+ if (l0[i] != 0) {
+ vmmpmap_release_l1(l0[i]);
+ }
+ }
+
+ m = PHYS_TO_VM_PAGE(l0_paddr);
+ vm_page_unwire_noq(m);
+ vm_page_free(m);
+
+ mtx_destroy(&vmmpmap_mtx);
+}
+
+uint64_t
+vmmpmap_to_ttbr0(void)
+{
+
+ return (l0_paddr);
+}
+
+/* Returns a pointer to the level 1 table, allocating if needed. */
+static pt_entry_t *
+vmmpmap_l1_table(vm_offset_t va)
+{
+ pt_entry_t new_l0e, l0e, *l1;
+ vm_page_t m;
+ int rv;
+
+ m = NULL;
+again:
+ l0e = atomic_load_64(&l0[pmap_l0_index(va)]);
+ if ((l0e & ATTR_DESCR_VALID) == 0) {
+ /* Allocate a page for the level 1 table */
+ if (m == NULL) {
+ m = vm_page_alloc_noobj(VM_ALLOC_WIRED | VM_ALLOC_ZERO);
+ if (m == NULL)
+ return (NULL);
+ }
+
+ new_l0e = VM_PAGE_TO_PHYS(m) | L0_TABLE;
+
+ mtx_lock(&vmmpmap_mtx);
+ rv = atomic_cmpset_64(&l0[pmap_l0_index(va)], l0e, new_l0e);
+ mtx_unlock(&vmmpmap_mtx);
+ /* We may have raced another thread, try again */
+ if (rv == 0)
+ goto again;
+
+ /* The cmpset succeeded */
+ l0e = new_l0e;
+ } else if (m != NULL) {
+ /* We allocated a page that wasn't used */
+ vm_page_unwire_noq(m);
+ vm_page_free_zero(m);
+ }
+
+ l1 = (pd_entry_t *)PHYS_TO_DMAP(l0e & ~ATTR_MASK);
+ return (l1);
+}
+
+static pt_entry_t *
+vmmpmap_l2_table(vm_offset_t va)
+{
+ pt_entry_t new_l1e, l1e, *l1, *l2;
+ vm_page_t m;
+ int rv;
+
+ l1 = vmmpmap_l1_table(va);
+ if (l1 == NULL)
+ return (NULL);
+
+ m = NULL;
+again:
+ l1e = atomic_load_64(&l1[pmap_l1_index(va)]);
+ if ((l1e & ATTR_DESCR_VALID) == 0) {
+ /* Allocate a page for the level 2 table */
+ if (m == NULL) {
+ m = vm_page_alloc_noobj(VM_ALLOC_WIRED | VM_ALLOC_ZERO);
+ if (m == NULL)
+ return (NULL);
+ }
+
+ new_l1e = VM_PAGE_TO_PHYS(m) | L1_TABLE;
+
+ mtx_lock(&vmmpmap_mtx);
+ rv = atomic_cmpset_64(&l1[pmap_l1_index(va)], l1e, new_l1e);
+ mtx_unlock(&vmmpmap_mtx);
+ /* We may have raced another thread, try again */
+ if (rv == 0)
+ goto again;
+
+ /* The cmpset succeeded */
+ l1e = new_l1e;
+ } else if (m != NULL) {
+ /* We allocated a page that wasn't used */
+ vm_page_unwire_noq(m);
+ vm_page_free_zero(m);
+ }
+
+ l2 = (pd_entry_t *)PHYS_TO_DMAP(l1e & ~ATTR_MASK);
+ return (l2);
+}
+
+static pd_entry_t *
+vmmpmap_l3_table(vm_offset_t va)
+{
+ pt_entry_t new_l2e, l2e, *l2, *l3;
+ vm_page_t m;
+ int rv;
+
+ l2 = vmmpmap_l2_table(va);
+ if (l2 == NULL)
+ return (NULL);
+
+ m = NULL;
+again:
+ l2e = atomic_load_64(&l2[pmap_l2_index(va)]);
+ if ((l2e & ATTR_DESCR_VALID) == 0) {
+ /* Allocate a page for the level 3 table */
+ if (m == NULL) {
+ m = vm_page_alloc_noobj(VM_ALLOC_WIRED | VM_ALLOC_ZERO);
+ if (m == NULL)
+ return (NULL);
+ }
+
+ new_l2e = VM_PAGE_TO_PHYS(m) | L2_TABLE;
+
+ mtx_lock(&vmmpmap_mtx);
+ rv = atomic_cmpset_64(&l2[pmap_l2_index(va)], l2e, new_l2e);
+ mtx_unlock(&vmmpmap_mtx);
+ /* We may have raced another thread, try again */
+ if (rv == 0)
+ goto again;
+
+ /* The cmpset succeeded */
+ l2e = new_l2e;
+ } else if (m != NULL) {
+ /* We allocated a page that wasn't used */
+ vm_page_unwire_noq(m);
+ vm_page_free_zero(m);
+ }
+
+ l3 = (pt_entry_t *)PHYS_TO_DMAP(l2e & ~ATTR_MASK);
+ return (l3);
+}
+
+/*
+ * Creates an EL2 entry in the hyp_pmap. Similar to pmap_kenter.
+ */
+bool
+vmmpmap_enter(vm_offset_t va, vm_size_t size, vm_paddr_t pa, vm_prot_t prot)
+{
+ pd_entry_t l3e, *l3;
+
+ KASSERT((pa & L3_OFFSET) == 0,
+ ("%s: Invalid physical address", __func__));
+ KASSERT((va & L3_OFFSET) == 0,
+ ("%s: Invalid virtual address", __func__));
+ KASSERT((size & PAGE_MASK) == 0,
+ ("%s: Mapping is not page-sized", __func__));
+
+ l3e = ATTR_DEFAULT | L3_PAGE;
+ /* This bit is res1 at EL2 */
+ l3e |= ATTR_S1_AP(ATTR_S1_AP_USER);
+ /* Only normal memory is used at EL2 */
+ l3e |= ATTR_S1_IDX(VM_MEMATTR_DEFAULT);
+
+ if ((prot & VM_PROT_EXECUTE) == 0) {
+ /* PXN is res0 at EL2. UXN is XN */
+ l3e |= ATTR_S1_UXN;
+ }
+ if ((prot & VM_PROT_WRITE) == 0) {
+ l3e |= ATTR_S1_AP(ATTR_S1_AP_RO);
+ }
+
+ while (size > 0) {
+ l3 = vmmpmap_l3_table(va);
+ if (l3 == NULL)
+ return (false);
+
+#ifdef INVARIANTS
+ /*
+ * Ensure no other threads can write to l3 between the KASSERT
+ * and store.
+ */
+ mtx_lock(&vmmpmap_mtx);
+#endif
+ KASSERT(atomic_load_64(&l3[pmap_l3_index(va)]) == 0,
+ ("%s: VA already mapped", __func__));
+
+ atomic_store_64(&l3[pmap_l3_index(va)], l3e | pa);
+#ifdef INVARIANTS
+ mtx_unlock(&vmmpmap_mtx);
+#endif
+
+ size -= PAGE_SIZE;
+ pa += PAGE_SIZE;
+ va += PAGE_SIZE;
+ }
+
+ return (true);
+}
+
+void
+vmmpmap_remove(vm_offset_t va, vm_size_t size, bool invalidate)
+{
+ pt_entry_t l0e, *l1, l1e, *l2, l2e;
+ pd_entry_t *l3, l3e, **l3_list;
+ vm_offset_t eva, va_next, sva;
+ size_t i;
+
+ KASSERT((va & L3_OFFSET) == 0,
+ ("%s: Invalid virtual address", __func__));
+ KASSERT((size & PAGE_MASK) == 0,
+ ("%s: Mapping is not page-sized", __func__));
+
+ if (invalidate) {
+ l3_list = malloc((size / PAGE_SIZE) * sizeof(l3_list[0]),
+ M_TEMP, M_WAITOK | M_ZERO);
+ }
+
+ sva = va;
+ eva = va + size;
+ mtx_lock(&vmmpmap_mtx);
+ for (i = 0; va < eva; va = va_next) {
+ l0e = atomic_load_64(&l0[pmap_l0_index(va)]);
+ if (l0e == 0) {
+ va_next = (va + L0_SIZE) & ~L0_OFFSET;
+ if (va_next < va)
+ va_next = eva;
+ continue;
+ }
+ MPASS((l0e & ATTR_DESCR_MASK) == L0_TABLE);
+
+ l1 = (pd_entry_t *)PHYS_TO_DMAP(l0e & ~ATTR_MASK);
+ l1e = atomic_load_64(&l1[pmap_l1_index(va)]);
+ if (l1e == 0) {
+ va_next = (va + L1_SIZE) & ~L1_OFFSET;
+ if (va_next < va)
+ va_next = eva;
+ continue;
+ }
+ MPASS((l1e & ATTR_DESCR_MASK) == L1_TABLE);
+
+ l2 = (pd_entry_t *)PHYS_TO_DMAP(l1e & ~ATTR_MASK);
+ l2e = atomic_load_64(&l2[pmap_l2_index(va)]);
+ if (l2e == 0) {
+ va_next = (va + L2_SIZE) & ~L2_OFFSET;
+ if (va_next < va)
+ va_next = eva;
+ continue;
+ }
+ MPASS((l2e & ATTR_DESCR_MASK) == L2_TABLE);
+
+ l3 = (pd_entry_t *)PHYS_TO_DMAP(l2e & ~ATTR_MASK);
+ if (invalidate) {
+ l3e = atomic_load_64(&l3[pmap_l3_index(va)]);
+ MPASS(l3e != 0);
+ /*
+ * Mark memory as read-only so we can invalidate
+ * the cache.
+ */
+ l3e &= ~ATTR_S1_AP_MASK;
+ l3e |= ATTR_S1_AP(ATTR_S1_AP_RO);
+ atomic_store_64(&l3[pmap_l3_index(va)], l3e);
+
+ l3_list[i] = &l3[pmap_l3_index(va)];
+ i++;
+ } else {
+ /*
+ * The caller is responsible for clearing the cache &
+ * handling the TLB
+ */
+ atomic_store_64(&l3[pmap_l3_index(va)], 0);
+ }
+
+ va_next = (va + L3_SIZE) & ~L3_OFFSET;
+ if (va_next < va)
+ va_next = eva;
+ }
+ mtx_unlock(&vmmpmap_mtx);
+
+ if (invalidate) {
+ /* Invalidate the memory from the D-cache */
+ vmm_call_hyp(HYP_DC_CIVAC, sva, size);
+
+ for (i = 0; i < (size / PAGE_SIZE); i++) {
+ atomic_store_64(l3_list[i], 0);
+ }
+
+ vmm_call_hyp(HYP_EL2_TLBI, HYP_EL2_TLBI_VA, sva, size);
+
+ free(l3_list, M_TEMP);
+ }
+}
diff --git a/sys/arm64/vmm/vmm_reset.c b/sys/arm64/vmm/vmm_reset.c
new file mode 100644
--- /dev/null
+++ b/sys/arm64/vmm/vmm_reset.c
@@ -0,0 +1,177 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (C) 2018 Alexandru Elisei <alexandru.elisei@gmail.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+
+#include <machine/armreg.h>
+#include <machine/cpu.h>
+#include <machine/hypervisor.h>
+
+#include "arm64.h"
+#include "reset.h"
+
+/*
+ * Make the architecturally UNKNOWN value 0. As a bonus, we don't have to
+ * manually set all those RES0 fields.
+ */
+#define ARCH_UNKNOWN 0
+#define set_arch_unknown(reg) (memset(&(reg), ARCH_UNKNOWN, sizeof(reg)))
+
+void
+reset_vm_el01_regs(void *vcpu)
+{
+ struct hypctx *el2ctx;
+
+ el2ctx = vcpu;
+
+ set_arch_unknown(el2ctx->tf);
+
+ set_arch_unknown(el2ctx->actlr_el1);
+ set_arch_unknown(el2ctx->afsr0_el1);
+ set_arch_unknown(el2ctx->afsr1_el1);
+ set_arch_unknown(el2ctx->amair_el1);
+ set_arch_unknown(el2ctx->contextidr_el1);
+ set_arch_unknown(el2ctx->cpacr_el1);
+ set_arch_unknown(el2ctx->csselr_el1);
+ set_arch_unknown(el2ctx->elr_el1);
+ set_arch_unknown(el2ctx->esr_el1);
+ set_arch_unknown(el2ctx->far_el1);
+ set_arch_unknown(el2ctx->mair_el1);
+ set_arch_unknown(el2ctx->mdccint_el1);
+ set_arch_unknown(el2ctx->mdscr_el1);
+ set_arch_unknown(el2ctx->par_el1);
+
+ /*
+ * Guest starts with:
+ * ~SCTLR_M: MMU off
+ * ~SCTLR_C: data cache off
+ * SCTLR_CP15BEN: memory barrier instruction enable from EL0; RAO/WI
+ * ~SCTLR_I: instruction cache off
+ */
+ el2ctx->sctlr_el1 = SCTLR_RES1;
+ el2ctx->sctlr_el1 &= ~SCTLR_M & ~SCTLR_C & ~SCTLR_I;
+ el2ctx->sctlr_el1 |= SCTLR_CP15BEN;
+
+ set_arch_unknown(el2ctx->sp_el0);
+ set_arch_unknown(el2ctx->tcr_el1);
+ set_arch_unknown(el2ctx->tpidr_el0);
+ set_arch_unknown(el2ctx->tpidr_el1);
+ set_arch_unknown(el2ctx->tpidrro_el0);
+ set_arch_unknown(el2ctx->ttbr0_el1);
+ set_arch_unknown(el2ctx->ttbr1_el1);
+ set_arch_unknown(el2ctx->vbar_el1);
+ set_arch_unknown(el2ctx->spsr_el1);
+
+ set_arch_unknown(el2ctx->dbgbcr_el1);
+ set_arch_unknown(el2ctx->dbgbvr_el1);
+ set_arch_unknown(el2ctx->dbgwcr_el1);
+ set_arch_unknown(el2ctx->dbgwvr_el1);
+
+ el2ctx->pmcr_el0 = READ_SPECIALREG(pmcr_el0) & PMCR_N_MASK;
+ /* PMCR_LC is unknown when AArch32 is supported or RES1 otherwise */
+ el2ctx->pmcr_el0 |= PMCR_LC;
+ set_arch_unknown(el2ctx->pmccntr_el0);
+ set_arch_unknown(el2ctx->pmccfiltr_el0);
+ set_arch_unknown(el2ctx->pmcntenset_el0);
+ set_arch_unknown(el2ctx->pmintenset_el1);
+ set_arch_unknown(el2ctx->pmovsset_el0);
+ set_arch_unknown(el2ctx->pmuserenr_el0);
+ memset(el2ctx->pmevcntr_el0, 0, sizeof(el2ctx->pmevcntr_el0));
+ memset(el2ctx->pmevtyper_el0, 0, sizeof(el2ctx->pmevtyper_el0));
+}
+
+void
+reset_vm_el2_regs(void *vcpu)
+{
+ struct hypctx *el2ctx;
+ uint64_t cpu_aff, vcpuid;
+
+ el2ctx = vcpu;
+ vcpuid = vcpu_vcpuid(el2ctx->vcpu);
+
+ /*
+ * Set the Hypervisor Configuration Register:
+ *
+ * HCR_RW: use AArch64 for EL1
+ * HCR_TID3: handle ID registers in the vmm to privide a common
+ * set of featers on all vcpus
+ * HCR_TWI: Trap WFI to the hypervisor
+ * HCR_BSU_IS: barrier instructions apply to the inner shareable
+ * domain
+ * HCR_FB: broadcast maintenance operations
+ * HCR_AMO: route physical SError interrupts to EL2
+ * HCR_IMO: route physical IRQ interrupts to EL2
+ * HCR_FMO: route physical FIQ interrupts to EL2
+ * HCR_SWIO: turn set/way invalidate into set/way clean and
+ * invalidate
+ * HCR_VM: use stage 2 translation
+ */
+ el2ctx->hcr_el2 = HCR_RW | HCR_TID3 | HCR_TWI | HCR_BSU_IS | HCR_FB |
+ HCR_AMO | HCR_IMO | HCR_FMO | HCR_SWIO | HCR_VM;
+
+ /* TODO: Trap all extensions we don't support */
+ el2ctx->mdcr_el2 = 0;
+ /* PMCR_EL0.N is read from MDCR_EL2.HPMN */
+ el2ctx->mdcr_el2 |= (el2ctx->pmcr_el0 & PMCR_N_MASK) >> PMCR_N_SHIFT;
+
+ el2ctx->vmpidr_el2 = VMPIDR_EL2_RES1;
+ /* The guest will detect a multi-core, single-threaded CPU */
+ el2ctx->vmpidr_el2 &= ~VMPIDR_EL2_U & ~VMPIDR_EL2_MT;
+ /*
+ * Generate the guest MPIDR value. We only support 16 CPUs at affinity
+ * level 0 to simplify the vgicv3 driver (see writing sgi1r_el1).
+ */
+ cpu_aff = (vcpuid & 0xf) << MPIDR_AFF0_SHIFT |
+ ((vcpuid >> 4) & 0xff) << MPIDR_AFF1_SHIFT |
+ ((vcpuid >> 12) & 0xff) << MPIDR_AFF2_SHIFT |
+ ((vcpuid >> 20) & 0xff) << MPIDR_AFF3_SHIFT;
+ el2ctx->vmpidr_el2 |= cpu_aff;
+
+ /* Use the same CPU identification information as the host */
+ el2ctx->vpidr_el2 = CPU_IMPL_TO_MIDR(CPU_IMPL_ARM);
+ el2ctx->vpidr_el2 |= CPU_VAR_TO_MIDR(0);
+ el2ctx->vpidr_el2 |= CPU_ARCH_TO_MIDR(0xf);
+ el2ctx->vpidr_el2 |= CPU_PART_TO_MIDR(CPU_PART_FOUNDATION);
+ el2ctx->vpidr_el2 |= CPU_REV_TO_MIDR(0);
+
+ /*
+ * Don't trap accesses to CPACR_EL1, trace, SVE, Advanced SIMD
+ * and floating point functionality to EL2.
+ */
+ el2ctx->cptr_el2 = CPTR_RES1;
+ /*
+ * Disable interrupts in the guest. The guest OS will re-enable
+ * them.
+ */
+ el2ctx->tf.tf_spsr = PSR_D | PSR_A | PSR_I | PSR_F;
+ /* Use the EL1 stack when taking exceptions to EL1 */
+ el2ctx->tf.tf_spsr |= PSR_M_EL1h;
+}
diff --git a/sys/arm64/vmm/vmm_stat.h b/sys/arm64/vmm/vmm_stat.h
new file mode 100644
--- /dev/null
+++ b/sys/arm64/vmm/vmm_stat.h
@@ -0,0 +1,145 @@
+/*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _VMM_STAT_H_
+#define _VMM_STAT_H_
+
+struct vm;
+
+#define MAX_VMM_STAT_ELEMS 64 /* arbitrary */
+
+enum vmm_stat_scope {
+ VMM_STAT_SCOPE_ANY,
+};
+
+struct vmm_stat_type;
+typedef void (*vmm_stat_func_t)(struct vcpu *vcpu,
+ struct vmm_stat_type *stat);
+
+struct vmm_stat_type {
+ int index; /* position in the stats buffer */
+ int nelems; /* standalone or array */
+ const char *desc; /* description of statistic */
+ vmm_stat_func_t func;
+ enum vmm_stat_scope scope;
+};
+
+void vmm_stat_register(void *arg);
+
+#define VMM_STAT_FDEFINE(type, nelems, desc, func, scope) \
+ struct vmm_stat_type type[1] = { \
+ { -1, nelems, desc, func, scope } \
+ }; \
+ SYSINIT(type##_stat, SI_SUB_KLD, SI_ORDER_ANY, vmm_stat_register, type)
+
+#define VMM_STAT_DEFINE(type, nelems, desc, scope) \
+ VMM_STAT_FDEFINE(type, nelems, desc, NULL, scope)
+
+#define VMM_STAT_DECLARE(type) \
+ extern struct vmm_stat_type type[1]
+
+#define VMM_STAT(type, desc) \
+ VMM_STAT_DEFINE(type, 1, desc, VMM_STAT_SCOPE_ANY)
+
+#define VMM_STAT_FUNC(type, desc, func) \
+ VMM_STAT_FDEFINE(type, 1, desc, func, VMM_STAT_SCOPE_ANY)
+
+#define VMM_STAT_ARRAY(type, nelems, desc) \
+ VMM_STAT_DEFINE(type, nelems, desc, VMM_STAT_SCOPE_ANY)
+
+void *vmm_stat_alloc(void);
+void vmm_stat_init(void *vp);
+void vmm_stat_free(void *vp);
+
+int vmm_stat_copy(struct vcpu *vcpu, int index, int count,
+ int *num_stats, uint64_t *buf);
+int vmm_stat_desc_copy(int index, char *buf, int buflen);
+
+static void __inline
+vmm_stat_array_incr(struct vcpu *vcpu, struct vmm_stat_type *vst, int statidx,
+ uint64_t x)
+{
+#ifdef VMM_KEEP_STATS
+ uint64_t *stats;
+
+ stats = vcpu_stats(vcpu);
+
+ if (vst->index >= 0 && statidx < vst->nelems)
+ stats[vst->index + statidx] += x;
+#endif
+}
+
+static void __inline
+vmm_stat_array_set(struct vcpu *vcpu, struct vmm_stat_type *vst, int statidx,
+ uint64_t val)
+{
+#ifdef VMM_KEEP_STATS
+ uint64_t *stats;
+
+ stats = vcpu_stats(vcpu);
+
+ if (vst->index >= 0 && statidx < vst->nelems)
+ stats[vst->index + statidx] = val;
+#endif
+}
+
+static void __inline
+vmm_stat_incr(struct vcpu *vcpu, struct vmm_stat_type *vst, uint64_t x)
+{
+
+#ifdef VMM_KEEP_STATS
+ vmm_stat_array_incr(vcpu, vst, 0, x);
+#endif
+}
+
+static void __inline
+vmm_stat_set(struct vcpu *vcpu, struct vmm_stat_type *vst, uint64_t val)
+{
+
+#ifdef VMM_KEEP_STATS
+ vmm_stat_array_set(vcpu, vst, 0, val);
+#endif
+}
+
+VMM_STAT_DECLARE(VMEXIT_COUNT);
+VMM_STAT_DECLARE(VMEXIT_UNKNOWN);
+VMM_STAT_DECLARE(VMEXIT_WFI);
+VMM_STAT_DECLARE(VMEXIT_WFE);
+VMM_STAT_DECLARE(VMEXIT_HVC);
+VMM_STAT_DECLARE(VMEXIT_MSR);
+VMM_STAT_DECLARE(VMEXIT_DATA_ABORT);
+VMM_STAT_DECLARE(VMEXIT_INSN_ABORT);
+VMM_STAT_DECLARE(VMEXIT_UNHANDLED_SYNC);
+VMM_STAT_DECLARE(VMEXIT_IRQ);
+VMM_STAT_DECLARE(VMEXIT_FIQ);
+VMM_STAT_DECLARE(VMEXIT_UNHANDLED_EL2);
+VMM_STAT_DECLARE(VMEXIT_UNHANDLED);
+#endif
diff --git a/sys/arm64/vmm/vmm_stat.c b/sys/arm64/vmm/vmm_stat.c
new file mode 100644
--- /dev/null
+++ b/sys/arm64/vmm/vmm_stat.c
@@ -0,0 +1,165 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+
+#include <machine/machdep.h>
+#include <machine/vmm.h>
+#include "vmm_stat.h"
+
+/*
+ * 'vst_num_elems' is the total number of addressable statistic elements
+ * 'vst_num_types' is the number of unique statistic types
+ *
+ * It is always true that 'vst_num_elems' is greater than or equal to
+ * 'vst_num_types'. This is because a stat type may represent more than
+ * one element (for e.g. VMM_STAT_ARRAY).
+ */
+static int vst_num_elems, vst_num_types;
+static struct vmm_stat_type *vsttab[MAX_VMM_STAT_ELEMS];
+
+static MALLOC_DEFINE(M_VMM_STAT, "vmm stat", "vmm stat");
+
+#define vst_size ((size_t)vst_num_elems * sizeof(uint64_t))
+
+void
+vmm_stat_register(void *arg)
+{
+ struct vmm_stat_type *vst = arg;
+
+ /* We require all stats to identify themselves with a description */
+ if (vst->desc == NULL)
+ return;
+
+ if (vst_num_elems + vst->nelems >= MAX_VMM_STAT_ELEMS) {
+ printf("Cannot accommodate vmm stat type \"%s\"!\n", vst->desc);
+ return;
+ }
+
+ vst->index = vst_num_elems;
+ vst_num_elems += vst->nelems;
+
+ vsttab[vst_num_types++] = vst;
+}
+
+int
+vmm_stat_copy(struct vcpu *vcpu, int index, int count, int *num_stats,
+ uint64_t *buf)
+{
+ struct vmm_stat_type *vst;
+ uint64_t *stats;
+ int i, tocopy;
+
+ if (index < 0 || count < 0)
+ return (EINVAL);
+
+ if (index > vst_num_elems)
+ return (ENOENT);
+
+ if (index == vst_num_elems) {
+ *num_stats = 0;
+ return (0);
+ }
+
+ tocopy = min(vst_num_elems - index, count);
+
+ /* Let stats functions update their counters */
+ for (i = 0; i < vst_num_types; i++) {
+ vst = vsttab[i];
+ if (vst->func != NULL)
+ (*vst->func)(vcpu, vst);
+ }
+
+ /* Copy over the stats */
+ stats = vcpu_stats(vcpu);
+ memcpy(buf, stats + index, tocopy * sizeof(stats[0]));
+ *num_stats = tocopy;
+ return (0);
+}
+
+void *
+vmm_stat_alloc(void)
+{
+
+ return (malloc(vst_size, M_VMM_STAT, M_WAITOK));
+}
+
+void
+vmm_stat_init(void *vp)
+{
+
+ bzero(vp, vst_size);
+}
+
+void
+vmm_stat_free(void *vp)
+{
+ free(vp, M_VMM_STAT);
+}
+
+int
+vmm_stat_desc_copy(int index, char *buf, int bufsize)
+{
+ int i;
+ struct vmm_stat_type *vst;
+
+ for (i = 0; i < vst_num_types; i++) {
+ vst = vsttab[i];
+ if (index >= vst->index && index < vst->index + vst->nelems) {
+ if (vst->nelems > 1) {
+ snprintf(buf, bufsize, "%s[%d]",
+ vst->desc, index - vst->index);
+ } else {
+ strlcpy(buf, vst->desc, bufsize);
+ }
+ return (0); /* found it */
+ }
+ }
+
+ return (EINVAL);
+}
+
+/* global statistics */
+VMM_STAT(VMEXIT_COUNT, "total number of vm exits");
+VMM_STAT(VMEXIT_UNKNOWN, "number of vmexits for the unknown exception");
+VMM_STAT(VMEXIT_WFI, "number of times wfi was intercepted");
+VMM_STAT(VMEXIT_WFE, "number of times wfe was intercepted");
+VMM_STAT(VMEXIT_HVC, "number of times hvc was intercepted");
+VMM_STAT(VMEXIT_MSR, "number of times msr/mrs was intercepted");
+VMM_STAT(VMEXIT_DATA_ABORT, "number of vmexits for a data abort");
+VMM_STAT(VMEXIT_INSN_ABORT, "number of vmexits for an instruction abort");
+VMM_STAT(VMEXIT_UNHANDLED_SYNC, "number of vmexits for an unhandled synchronous exception");
+VMM_STAT(VMEXIT_IRQ, "number of vmexits for an irq");
+VMM_STAT(VMEXIT_FIQ, "number of vmexits for an interrupt");
+VMM_STAT(VMEXIT_UNHANDLED_EL2, "number of vmexits for an unhandled EL2 exception");
+VMM_STAT(VMEXIT_UNHANDLED, "number of vmexits for an unhandled exception");
diff --git a/sys/conf/files.arm64 b/sys/conf/files.arm64
--- a/sys/conf/files.arm64
+++ b/sys/conf/files.arm64
@@ -116,6 +116,39 @@
dev/iommu/busdma_iommu.c optional iommu
dev/iommu/iommu_gas.c optional iommu
+arm64/vmm/vmm.c optional vmm
+arm64/vmm/vmm_dev.c optional vmm
+arm64/vmm/vmm_instruction_emul.c optional vmm
+arm64/vmm/vmm_stat.c optional vmm
+arm64/vmm/vmm_arm64.c optional vmm
+arm64/vmm/vmm_reset.c optional vmm
+arm64/vmm/vmm_call.S optional vmm
+arm64/vmm/vmm_hyp_exception.S optional vmm \
+ compile-with "${NORMAL_C:N-fsanitize*:N-mbranch-protection*} -fpie" \
+ no-obj
+arm64/vmm/vmm_hyp.c optional vmm \
+ compile-with "${NORMAL_C:N-fsanitize*:N-mbranch-protection*} -fpie" \
+ no-obj
+vmm_hyp_blob.elf.full optional vmm \
+ dependency "vmm_hyp.o vmm_hyp_exception.o" \
+ compile-with "${SYSTEM_LD_BASECMD} -o ${.TARGET} ${.ALLSRC} --defsym=text_start='0x0'" \
+ no-obj no-implicit-rule
+vmm_hyp_blob.elf optional vmm \
+ dependency "vmm_hyp_blob.elf.full" \
+ compile-with "${OBJCOPY} --strip-debug ${.ALLSRC} ${.TARGET}" \
+ no-obj no-implicit-rule
+vmm_hyp_blob.bin optional vmm \
+ dependency vmm_hyp_blob.elf \
+ compile-with "${OBJCOPY} --output-target=binary ${.ALLSRC} ${.TARGET}" \
+ no-obj no-implicit-rule
+arm64/vmm/vmm_hyp_el2.S optional vmm \
+ dependency vmm_hyp_blob.bin
+arm64/vmm/vmm_mmu.c optional vmm
+arm64/vmm/io/vgic.c optional vmm
+arm64/vmm/io/vgic_v3.c optional vmm
+arm64/vmm/io/vgic_if.m optional vmm
+arm64/vmm/io/vtimer.c optional vmm
+
crypto/armv8/armv8_crypto.c optional armv8crypto
armv8_crypto_wrap.o optional armv8crypto \
dependency "$S/crypto/armv8/armv8_crypto_wrap.c" \
diff --git a/sys/conf/ldscript.arm64 b/sys/conf/ldscript.arm64
--- a/sys/conf/ldscript.arm64
+++ b/sys/conf/ldscript.arm64
@@ -6,6 +6,7 @@
{
/* Read-only sections, merged into text segment: */
. = text_start; /* This is set using --defsym= on the command line. */
+ .vmm_vectors : { *(.vmm_vectors) }
.text :
{
*(.text)
@@ -16,6 +17,7 @@
} =0x9090
_etext = .;
PROVIDE (etext = .);
+
.fini : { *(.fini) } =0x9090
.rodata : { *(.rodata*) *(.gnu.linkonce.r*) }
.rodata1 : { *(.rodata1) }
diff --git a/sys/conf/options.arm64 b/sys/conf/options.arm64
--- a/sys/conf/options.arm64
+++ b/sys/conf/options.arm64
@@ -19,6 +19,9 @@
# EFI Runtime services support
EFIRT opt_efirt.h
+# Bhyve
+VMM opt_global.h
+
# SoC Support
SOC_ALLWINNER_A64 opt_soc.h
SOC_ALLWINNER_H5 opt_soc.h
diff --git a/sys/modules/Makefile b/sys/modules/Makefile
--- a/sys/modules/Makefile
+++ b/sys/modules/Makefile
@@ -841,7 +841,9 @@
_sgx_linux= sgx_linux
_smartpqi= smartpqi
_p2sb= p2sb
+.endif
+.if ${MACHINE_CPUARCH} == "aarch64" || ${MACHINE_CPUARCH} == "amd64"
.if ${MK_BHYVE} != "no" || defined(ALL_MODULES)
.if ${KERN_OPTS:MSMP}
_vmm= vmm
diff --git a/sys/modules/vmm/Makefile b/sys/modules/vmm/Makefile
--- a/sys/modules/vmm/Makefile
+++ b/sys/modules/vmm/Makefile
@@ -3,31 +3,79 @@
KMOD= vmm
-SRCS= opt_acpi.h opt_bhyve_snapshot.h opt_ddb.h
-SRCS+= device_if.h bus_if.h pci_if.h pcib_if.h acpi_if.h vnode_if.h
-DPSRCS+= vmx_assym.h svm_assym.h
-DPSRCS+= vmx_genassym.c svm_genassym.c offset.inc
+SRCS= opt_acpi.h opt_ddb.h device_if.h bus_if.h pci_if.h pcib_if.h acpi_if.h
CFLAGS+= -DVMM_KEEP_STATS
-CFLAGS+= -I${SRCTOP}/sys/amd64/vmm
-CFLAGS+= -I${SRCTOP}/sys/amd64/vmm/io
-CFLAGS+= -I${SRCTOP}/sys/amd64/vmm/intel
-CFLAGS+= -I${SRCTOP}/sys/amd64/vmm/amd
+CFLAGS+= -I${SRCTOP}/sys/${MACHINE}/vmm
+CFLAGS+= -I${SRCTOP}/sys/${MACHINE}/vmm/io
# generic vmm support
-.PATH: ${SRCTOP}/sys/amd64/vmm
+.PATH: ${SRCTOP}/sys/${MACHINE}/vmm
SRCS+= vmm.c \
vmm_dev.c \
- vmm_host.c \
vmm_instruction_emul.c \
+ vmm_stat.c
+
+.if ${MACHINE_CPUARCH} == "aarch64"
+DPSRCS+= assym.inc
+
+# TODO: Add the new EL2 code
+SRCS+= vmm_arm64.c \
+ vmm_reset.c \
+ vmm_call.S \
+ vmm_mmu.c \
+ vmm_hyp_el2.S
+
+.PATH: ${SRCTOP}/sys/${MACHINE}/vmm/io
+SRCS+= vgic.c \
+ vgic_if.h \
+ vgic_if.c \
+ vgic_v3.c \
+ vtimer.c
+
+SRCS+= vmm_hyp_exception.S vmm_hyp.c
+
+CLEANFILES+= vmm_hyp_blob.elf.full
+CLEANFILES+= vmm_hyp_blob.elf vmm_hyp_blob.bin
+
+vmm_hyp_exception.o: vmm_hyp_exception.S
+ ${CC} -c -x assembler-with-cpp -DLOCORE \
+ ${CFLAGS:N-fsanitize*:N-mbranch-protection*} \
+ ${.IMPSRC} -o ${.TARGET} -fpie
+
+vmm_hyp.o: vmm_hyp.c
+ ${CC} -c ${CFLAGS:N-fsanitize*:N-mbranch-protection*} \
+ ${.IMPSRC} -o ${.TARGET} -fpie
+
+vmm_hyp_blob.elf.full: vmm_hyp_exception.o vmm_hyp.o
+ ${LD} -m ${LD_EMULATION} -Bdynamic -T ${SYSDIR}/conf/ldscript.arm64 \
+ ${_LDFLAGS} --no-warn-mismatch --warn-common --export-dynamic \
+ --dynamic-linker /red/herring -X -o ${.TARGET} ${.ALLSRC} \
+ --defsym=text_start='0x0'
+
+vmm_hyp_blob.elf: vmm_hyp_blob.elf.full
+ ${OBJCOPY} --strip-debug ${.ALLSRC} ${.TARGET}
+
+vmm_hyp_blob.bin: vmm_hyp_blob.elf
+ ${OBJCOPY} --output-target=binary ${.ALLSRC} ${.TARGET}
+
+vmm_hyp_el2.o: vmm_hyp_blob.bin
+
+.elif ${MACHINE_CPUARCH} == "amd64"
+DPSRCS+= vmx_assym.h svm_assym.h
+DPSRCS+= vmx_genassym.c svm_genassym.c offset.inc
+
+CFLAGS+= -I${SRCTOP}/sys/amd64/vmm/intel
+CFLAGS+= -I${SRCTOP}/sys/amd64/vmm/amd
+
+SRCS+= vmm_host.c \
vmm_ioport.c \
vmm_lapic.c \
vmm_mem.c \
- vmm_stat.c \
vmm_util.c \
x86.c
-.PATH: ${SRCTOP}/sys/amd64/vmm/io
+.PATH: ${SRCTOP}/sys/${MACHINE}/vmm/io
SRCS+= iommu.c \
ppt.c \
vatpic.c \
@@ -62,10 +110,11 @@
SRCS.BHYVE_SNAPSHOT= vmm_snapshot.c
-CLEANFILES= vmx_assym.h vmx_genassym.o svm_assym.h svm_genassym.o
+CLEANFILES+= vmx_assym.h vmx_genassym.o svm_assym.h svm_genassym.o
OBJS_DEPEND_GUESS.vmx_support.o+= vmx_assym.h
OBJS_DEPEND_GUESS.svm_support.o+= svm_assym.h
+.endif
vmx_assym.h: vmx_genassym.o
sh ${SYSDIR}/kern/genassym.sh vmx_genassym.o > ${.TARGET}
@@ -81,6 +130,9 @@
${CC} -c -x assembler-with-cpp -DLOCORE ${CFLAGS} \
${.IMPSRC} -o ${.TARGET}
+hyp_genassym.o: offset.inc
+ ${CC} -c ${CFLAGS:N-flto:N-fno-common} -fcommon ${.IMPSRC}
+
vmx_genassym.o: offset.inc
${CC} -c ${CFLAGS:N-flto*:N-fno-common:N-fsanitize*:N-fno-sanitize*} \
-fcommon ${.IMPSRC}

File Metadata

Mime Type
text/plain
Expires
Thu, Jan 9, 8:51 PM (9 h, 35 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
15735762
Default Alt Text
D37428.diff (302 KB)

Event Timeline