Page MenuHomeFreeBSD

D45553.id.diff
No OneTemporary

D45553.id.diff

diff --git a/sys/conf/files.riscv b/sys/conf/files.riscv
--- a/sys/conf/files.riscv
+++ b/sys/conf/files.riscv
@@ -12,6 +12,8 @@
dev/pci/pci_host_generic_fdt.c optional pci fdt
dev/uart/uart_cpu_fdt.c optional uart fdt
dev/uart/uart_dev_lowrisc.c optional uart_lowrisc
+dev/vmm/vmm_dev.c optional vmm
+dev/vmm/vmm_stat.c optional vmm
dev/xilinx/axi_quad_spi.c optional xilinx_spi
dev/xilinx/axidma.c optional axidma xdma
dev/xilinx/if_xae.c optional xae
@@ -44,6 +46,7 @@
riscv/riscv/elf_machdep.c standard
riscv/riscv/exception.S standard
riscv/riscv/exec_machdep.c standard
+riscv/riscv/fpe.c optional vmm
riscv/riscv/gdb_machdep.c optional gdb
riscv/riscv/intc.c standard
riscv/riscv/identcpu.c standard
@@ -72,6 +75,13 @@
riscv/riscv/uio_machdep.c standard
riscv/riscv/unwind.c optional ddb | kdtrace_hooks | stack
riscv/riscv/vm_machdep.c standard
+riscv/vmm/vmm.c optional vmm
+riscv/vmm/vmm_aplic.c optional vmm
+riscv/vmm/vmm_dev_machdep.c optional vmm
+riscv/vmm/vmm_instruction_emul.c optional vmm
+riscv/vmm/vmm_riscv.c optional vmm
+riscv/vmm/vmm_sbi.c optional vmm
+riscv/vmm/vmm_switch.S optional vmm
# Zstd
contrib/zstd/lib/freebsd/zstd_kfreebsd.c optional zstdio compile-with ${ZSTD_C}
diff --git a/sys/conf/kern.mk b/sys/conf/kern.mk
--- a/sys/conf/kern.mk
+++ b/sys/conf/kern.mk
@@ -163,7 +163,7 @@
# code model as "medium" and "medany" respectively.
#
.if ${MACHINE_CPUARCH} == "riscv"
-CFLAGS+= -march=rv64imafdc
+CFLAGS+= -march=rv64imafdch
CFLAGS+= -mabi=lp64
CFLAGS.clang+= -mcmodel=medium
CFLAGS.gcc+= -mcmodel=medany
diff --git a/sys/riscv/include/cpu.h b/sys/riscv/include/cpu.h
--- a/sys/riscv/include/cpu.h
+++ b/sys/riscv/include/cpu.h
@@ -47,8 +47,6 @@
#define cpu_spinwait() /* nothing */
#define cpu_lock_delay() DELAY(1)
-#ifdef _KERNEL
-
/*
* Core manufacturer IDs, as reported by the mvendorid CSR.
*/
@@ -89,6 +87,8 @@
#define MMU_SV48 0x2 /* 4-level paging */
#define MMU_SV57 0x4 /* 5-level paging */
+#ifdef _KERNEL
+
extern char btext[];
extern char etext[];
diff --git a/sys/riscv/include/elf.h b/sys/riscv/include/elf.h
--- a/sys/riscv/include/elf.h
+++ b/sys/riscv/include/elf.h
@@ -80,6 +80,7 @@
#define HWCAP_ISA_F HWCAP_ISA_BIT('f')
#define HWCAP_ISA_D HWCAP_ISA_BIT('d')
#define HWCAP_ISA_C HWCAP_ISA_BIT('c')
+#define HWCAP_ISA_H HWCAP_ISA_BIT('h')
#define HWCAP_ISA_G \
(HWCAP_ISA_I | HWCAP_ISA_M | HWCAP_ISA_A | HWCAP_ISA_F | HWCAP_ISA_D)
#define HWCAP_ISA_B HWCAP_ISA_BIT('b')
diff --git a/sys/riscv/include/md_var.h b/sys/riscv/include/md_var.h
--- a/sys/riscv/include/md_var.h
+++ b/sys/riscv/include/md_var.h
@@ -42,6 +42,7 @@
extern u_int mmu_caps;
/* Supervisor-mode extension support */
+extern bool has_hyp;
extern bool has_sstc;
extern bool has_sscofpmf;
extern bool has_svpbmt;
diff --git a/sys/riscv/include/riscvreg.h b/sys/riscv/include/riscvreg.h
--- a/sys/riscv/include/riscvreg.h
+++ b/sys/riscv/include/riscvreg.h
@@ -47,9 +47,15 @@
#define SCAUSE_STORE_ACCESS_FAULT 7
#define SCAUSE_ECALL_USER 8
#define SCAUSE_ECALL_SUPERVISOR 9
+#define SCAUSE_VIRTUAL_SUPERVISOR_ECALL 10
+#define SCAUSE_MACHINE_ECALL 11
#define SCAUSE_INST_PAGE_FAULT 12
#define SCAUSE_LOAD_PAGE_FAULT 13
#define SCAUSE_STORE_PAGE_FAULT 15
+#define SCAUSE_FETCH_GUEST_PAGE_FAULT 20
+#define SCAUSE_LOAD_GUEST_PAGE_FAULT 21
+#define SCAUSE_VIRTUAL_INSTRUCTION 22
+#define SCAUSE_STORE_GUEST_PAGE_FAULT 23
#define SSTATUS_UIE (1 << 0)
#define SSTATUS_SIE (1 << 1)
@@ -116,6 +122,17 @@
#define MSTATUS_PRV_H 2 /* hypervisor */
#define MSTATUS_PRV_M 3 /* machine */
+#define HSTATUS_VSBE (1 << 5)
+#define HSTATUS_GVA (1 << 6)
+#define HSTATUS_SPV (1 << 7)
+#define HSTATUS_SPVP (1 << 8)
+#define HSTATUS_HU (1 << 9)
+#define HSTATUS_VGEIN_S 12
+#define HSTATUS_VGEIN_M (0xf << HSTATUS_VGEIN_S)
+#define HSTATUS_VTVM (1 << 20)
+#define HSTATUS_VTW (1 << 21)
+#define HSTATUS_VTSR (1 << 22)
+
#define MIE_USIE (1 << 0)
#define MIE_SSIE (1 << 1)
#define MIE_HSIE (1 << 2)
@@ -143,10 +160,31 @@
#define MIP_SEIP (1 << 9)
+#define HVIP_VSSIP (1 << 2)
+#define HVIP_VSTIP (1 << 6)
+#define HVIP_VSEIP (1 << 10)
+
+#define HIE_VSSIE (1 << 2)
+#define HIE_VSTIE (1 << 6)
+#define HIE_VSEIE (1 << 10)
+#define HIE_SGEIE (1 << 12)
+
/* Note: sip register has no SIP_STIP bit in Spike simulator */
#define SIP_SSIP (1 << 1)
#define SIP_STIP (1 << 5)
+#define HENVCFG_STCE (1UL << 63)
+#define HENVCFG_PBMTE (1UL << 62)
+#define HENVCFG_CBZE (1UL << 7)
+#define HENVCFG_CBCFE (1UL << 6)
+#define HENVCFG_CBIE_S 4
+#define HENVCFG_CBIE_M (0x3 << HENVCFG_CBIE_S)
+#define HENVCFG_FIOM (1UL << 0)
+
+#define HCOUNTEREN_CY (1UL << 0) /* Cycle */
+#define HCOUNTEREN_TM (1UL << 1) /* Time */
+#define HCOUNTEREN_IR (1UL << 2) /* Instret */
+
#define SATP_PPN_S 0
#define SATP_PPN_M (0xfffffffffffUL << SATP_PPN_S)
#define SATP_ASID_S 44
diff --git a/sys/riscv/include/vmm.h b/sys/riscv/include/vmm.h
new file mode 100644
--- /dev/null
+++ b/sys/riscv/include/vmm.h
@@ -0,0 +1,328 @@
+/*
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2015 Mihai Carabas <mihai.carabas@gmail.com>
+ * Copyright (c) 2024 Ruslan Bukin <br@bsdpad.com>
+ *
+ * This software was developed by the University of Cambridge Computer
+ * Laboratory (Department of Computer Science and Technology) under Innovate
+ * UK project 105694, "Digital Security by Design (DSbD) Technology Platform
+ * Prototype".
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _VMM_H_
+#define _VMM_H_
+
+#include <sys/param.h>
+#include <sys/cpuset.h>
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include "pte.h"
+#include "pmap.h"
+
+struct vcpu;
+
+enum vm_suspend_how {
+ VM_SUSPEND_NONE,
+ VM_SUSPEND_RESET,
+ VM_SUSPEND_POWEROFF,
+ VM_SUSPEND_HALT,
+ VM_SUSPEND_LAST
+};
+
+/*
+ * Identifiers for architecturally defined registers.
+ */
+enum vm_reg_name {
+ VM_REG_GUEST_ZERO = 0,
+ VM_REG_GUEST_RA,
+ VM_REG_GUEST_SP,
+ VM_REG_GUEST_GP,
+ VM_REG_GUEST_TP,
+ VM_REG_GUEST_T0,
+ VM_REG_GUEST_T1,
+ VM_REG_GUEST_T2,
+ VM_REG_GUEST_S0,
+ VM_REG_GUEST_S1,
+ VM_REG_GUEST_A0,
+ VM_REG_GUEST_A1,
+ VM_REG_GUEST_A2,
+ VM_REG_GUEST_A3,
+ VM_REG_GUEST_A4,
+ VM_REG_GUEST_A5,
+ VM_REG_GUEST_A6,
+ VM_REG_GUEST_A7,
+ VM_REG_GUEST_S2,
+ VM_REG_GUEST_S3,
+ VM_REG_GUEST_S4,
+ VM_REG_GUEST_S5,
+ VM_REG_GUEST_S6,
+ VM_REG_GUEST_S7,
+ VM_REG_GUEST_S8,
+ VM_REG_GUEST_S9,
+ VM_REG_GUEST_S10,
+ VM_REG_GUEST_S11,
+ VM_REG_GUEST_T3,
+ VM_REG_GUEST_T4,
+ VM_REG_GUEST_T5,
+ VM_REG_GUEST_T6,
+ VM_REG_GUEST_SEPC,
+ VM_REG_LAST
+};
+
+#define VM_INTINFO_VECTOR(info) ((info) & 0xff)
+#define VM_INTINFO_DEL_ERRCODE 0x800
+#define VM_INTINFO_RSVD 0x7ffff000
+#define VM_INTINFO_VALID 0x80000000
+#define VM_INTINFO_TYPE 0x700
+#define VM_INTINFO_HWINTR (0 << 8)
+#define VM_INTINFO_NMI (2 << 8)
+#define VM_INTINFO_HWEXCEPTION (3 << 8)
+#define VM_INTINFO_SWINTR (4 << 8)
+
+#define VM_MAX_SUFFIXLEN 15
+
+#ifdef _KERNEL
+
+#define VM_MAX_NAMELEN 32
+
+struct vm;
+struct vm_exception;
+struct vm_exit;
+struct vm_run;
+struct vm_object;
+struct vm_guest_paging;
+struct vm_aplic_descr;
+struct pmap;
+
+struct vm_eventinfo {
+ void *rptr; /* rendezvous cookie */
+ int *sptr; /* suspend cookie */
+ int *iptr; /* reqidle cookie */
+};
+
+int vm_create(const char *name, struct vm **retvm);
+struct vcpu *vm_alloc_vcpu(struct vm *vm, int vcpuid);
+void vm_disable_vcpu_creation(struct vm *vm);
+void vm_slock_vcpus(struct vm *vm);
+void vm_unlock_vcpus(struct vm *vm);
+void vm_destroy(struct vm *vm);
+int vm_reinit(struct vm *vm);
+const char *vm_name(struct vm *vm);
+
+/*
+ * APIs that modify the guest memory map require all vcpus to be frozen.
+ */
+void vm_slock_memsegs(struct vm *vm);
+void vm_xlock_memsegs(struct vm *vm);
+void vm_unlock_memsegs(struct vm *vm);
+int vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t off,
+ size_t len, int prot, int flags);
+int vm_munmap_memseg(struct vm *vm, vm_paddr_t gpa, size_t len);
+int vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem);
+void vm_free_memseg(struct vm *vm, int ident);
+
+/*
+ * APIs that inspect the guest memory map require only a *single* vcpu to
+ * be frozen. This acts like a read lock on the guest memory map since any
+ * modification requires *all* vcpus to be frozen.
+ */
+int vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid,
+ vm_ooffset_t *segoff, size_t *len, int *prot, int *flags);
+int vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem,
+ struct vm_object **objptr);
+vm_paddr_t vmm_sysmem_maxaddr(struct vm *vm);
+void *vm_gpa_hold(struct vcpu *vcpu, vm_paddr_t gpa, size_t len,
+ int prot, void **cookie);
+void *vm_gpa_hold_global(struct vm *vm, vm_paddr_t gpa, size_t len,
+ int prot, void **cookie);
+void vm_gpa_release(void *cookie);
+bool vm_mem_allocated(struct vcpu *vcpu, vm_paddr_t gpa);
+
+int vm_gla2gpa_nofault(struct vcpu *vcpu, struct vm_guest_paging *paging,
+ uint64_t gla, int prot, uint64_t *gpa, int *is_fault);
+
+uint16_t vm_get_maxcpus(struct vm *vm);
+void vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores,
+ uint16_t *threads, uint16_t *maxcpus);
+int vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores,
+ uint16_t threads, uint16_t maxcpus);
+int vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval);
+int vm_set_register(struct vcpu *vcpu, int reg, uint64_t val);
+int vm_run(struct vcpu *vcpu);
+int vm_suspend(struct vm *vm, enum vm_suspend_how how);
+void* vm_get_cookie(struct vm *vm);
+int vcpu_vcpuid(struct vcpu *vcpu);
+void *vcpu_get_cookie(struct vcpu *vcpu);
+struct vm *vcpu_vm(struct vcpu *vcpu);
+struct vcpu *vm_vcpu(struct vm *vm, int cpu);
+int vm_get_capability(struct vcpu *vcpu, int type, int *val);
+int vm_set_capability(struct vcpu *vcpu, int type, int val);
+int vm_activate_cpu(struct vcpu *vcpu);
+int vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu);
+int vm_resume_cpu(struct vm *vm, struct vcpu *vcpu);
+int vm_inject_exception(struct vcpu *vcpu, uint64_t scause);
+int vm_attach_aplic(struct vm *vm, struct vm_aplic_descr *descr);
+int vm_assert_irq(struct vm *vm, uint32_t irq);
+int vm_deassert_irq(struct vm *vm, uint32_t irq);
+int vm_raise_msi(struct vm *vm, uint64_t msg, uint64_t addr, int bus, int slot,
+ int func);
+struct vm_exit *vm_exitinfo(struct vcpu *vcpu);
+void vm_exit_suspended(struct vcpu *vcpu, uint64_t pc);
+void vm_exit_debug(struct vcpu *vcpu, uint64_t pc);
+void vm_exit_rendezvous(struct vcpu *vcpu, uint64_t pc);
+void vm_exit_astpending(struct vcpu *vcpu, uint64_t pc);
+
+cpuset_t vm_active_cpus(struct vm *vm);
+cpuset_t vm_debug_cpus(struct vm *vm);
+cpuset_t vm_suspended_cpus(struct vm *vm);
+
+static __inline int
+vcpu_rendezvous_pending(struct vm_eventinfo *info)
+{
+
+ return (*((uintptr_t *)(info->rptr)) != 0);
+}
+
+static __inline int
+vcpu_suspended(struct vm_eventinfo *info)
+{
+
+ return (*info->sptr);
+}
+
+int vcpu_debugged(struct vcpu *vcpu);
+
+enum vcpu_state {
+ VCPU_IDLE,
+ VCPU_FROZEN,
+ VCPU_RUNNING,
+ VCPU_SLEEPING,
+};
+
+int vcpu_set_state(struct vcpu *vcpu, enum vcpu_state state, bool from_idle);
+enum vcpu_state vcpu_get_state(struct vcpu *vcpu, int *hostcpu);
+
+static int __inline
+vcpu_is_running(struct vcpu *vcpu, int *hostcpu)
+{
+ return (vcpu_get_state(vcpu, hostcpu) == VCPU_RUNNING);
+}
+
+#ifdef _SYS_PROC_H_
+static int __inline
+vcpu_should_yield(struct vcpu *vcpu)
+{
+ struct thread *td;
+
+ td = curthread;
+ return (td->td_ast != 0 || td->td_owepreempt != 0);
+}
+#endif
+
+void *vcpu_stats(struct vcpu *vcpu);
+void vcpu_notify_event(struct vcpu *vcpu);
+
+enum vm_reg_name vm_segment_name(int seg_encoding);
+
+#endif /* _KERNEL */
+
+#define VM_DIR_READ 0
+#define VM_DIR_WRITE 1
+
+#define VM_GP_M_MASK 0x1f
+#define VM_GP_MMU_ENABLED (1 << 5)
+
+struct vm_guest_paging {
+ int flags;
+ int padding;
+};
+
+struct vie {
+ uint8_t access_size:4, sign_extend:1, dir:1, unused:2;
+ enum vm_reg_name reg;
+};
+
+struct vre {
+ uint32_t inst_syndrome;
+ uint8_t dir:1, unused:7;
+ enum vm_reg_name reg;
+};
+
+/*
+ * Identifiers for optional vmm capabilities
+ */
+enum vm_cap_type {
+ VM_CAP_UNRESTRICTED_GUEST,
+ VM_CAP_MAX
+};
+
+enum vm_exitcode {
+ VM_EXITCODE_BOGUS,
+ VM_EXITCODE_ECALL,
+ VM_EXITCODE_HYP,
+ VM_EXITCODE_PAGING,
+ VM_EXITCODE_SUSPENDED,
+ VM_EXITCODE_DEBUG,
+ VM_EXITCODE_INST_EMUL,
+ VM_EXITCODE_WFI,
+ VM_EXITCODE_MAX
+};
+
+struct vm_exit {
+ uint64_t scause;
+ uint64_t sepc;
+ uint64_t stval;
+ uint64_t htval;
+ uint64_t htinst;
+ enum vm_exitcode exitcode;
+ int inst_length;
+ uint64_t pc;
+ union {
+ struct {
+ uint64_t gpa;
+ } paging;
+
+ struct {
+ uint64_t gpa;
+ struct vm_guest_paging paging;
+ struct vie vie;
+ } inst_emul;
+
+ struct {
+ uint64_t args[8];
+ } ecall;
+
+ struct {
+ enum vm_suspend_how how;
+ } suspended;
+
+ struct {
+ uint64_t scause;
+ } hyp;
+ } u;
+};
+
+#endif /* _VMM_H_ */
diff --git a/sys/riscv/include/vmm_dev.h b/sys/riscv/include/vmm_dev.h
new file mode 100644
--- /dev/null
+++ b/sys/riscv/include/vmm_dev.h
@@ -0,0 +1,258 @@
+/*
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2015 Mihai Carabas <mihai.carabas@gmail.com>
+ * Copyright (c) 2024 Ruslan Bukin <br@bsdpad.com>
+ *
+ * This software was developed by the University of Cambridge Computer
+ * Laboratory (Department of Computer Science and Technology) under Innovate
+ * UK project 105694, "Digital Security by Design (DSbD) Technology Platform
+ * Prototype".
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _VMM_DEV_H_
+#define _VMM_DEV_H_
+
+struct vm_memmap {
+ vm_paddr_t gpa;
+ int segid; /* memory segment */
+ vm_ooffset_t segoff; /* offset into memory segment */
+ size_t len; /* mmap length */
+ int prot; /* RWX */
+ int flags;
+};
+#define VM_MEMMAP_F_WIRED 0x01
+
+struct vm_munmap {
+ vm_paddr_t gpa;
+ size_t len;
+};
+
+#define VM_MEMSEG_NAME(m) ((m)->name[0] != '\0' ? (m)->name : NULL)
+struct vm_memseg {
+ int segid;
+ size_t len;
+ char name[VM_MAX_SUFFIXLEN + 1];
+};
+
+struct vm_register {
+ int cpuid;
+ int regnum; /* enum vm_reg_name */
+ uint64_t regval;
+};
+
+struct vm_register_set {
+ int cpuid;
+ unsigned int count;
+ const int *regnums; /* enum vm_reg_name */
+ uint64_t *regvals;
+};
+
+struct vm_run {
+ int cpuid;
+ cpuset_t *cpuset; /* CPU set storage */
+ size_t cpusetsize;
+ struct vm_exit *vm_exit;
+};
+
+struct vm_exception {
+ int cpuid;
+ uint64_t scause;
+};
+
+struct vm_msi {
+ uint64_t msg;
+ uint64_t addr;
+ int bus;
+ int slot;
+ int func;
+};
+
+struct vm_capability {
+ int cpuid;
+ enum vm_cap_type captype;
+ int capval;
+ int allcpus;
+};
+
+#define MAX_VM_STATS 64
+struct vm_stats {
+ int cpuid; /* in */
+ int index; /* in */
+ int num_entries; /* out */
+ struct timeval tv;
+ uint64_t statbuf[MAX_VM_STATS];
+};
+struct vm_stat_desc {
+ int index; /* in */
+ char desc[128]; /* out */
+};
+
+struct vm_suspend {
+ enum vm_suspend_how how;
+};
+
+struct vm_gla2gpa {
+ int vcpuid; /* inputs */
+ int prot; /* PROT_READ or PROT_WRITE */
+ uint64_t gla;
+ struct vm_guest_paging paging;
+ int fault; /* outputs */
+ uint64_t gpa;
+};
+
+struct vm_activate_cpu {
+ int vcpuid;
+};
+
+struct vm_cpuset {
+ int which;
+ int cpusetsize;
+ cpuset_t *cpus;
+};
+#define VM_ACTIVE_CPUS 0
+#define VM_SUSPENDED_CPUS 1
+#define VM_DEBUG_CPUS 2
+
+struct vm_aplic_descr {
+ uint64_t mem_start;
+ uint64_t mem_size;
+};
+
+struct vm_irq {
+ uint32_t irq;
+};
+
+struct vm_cpu_topology {
+ uint16_t sockets;
+ uint16_t cores;
+ uint16_t threads;
+ uint16_t maxcpus;
+};
+
+enum {
+ /* general routines */
+ IOCNUM_ABIVERS = 0,
+ IOCNUM_RUN = 1,
+ IOCNUM_SET_CAPABILITY = 2,
+ IOCNUM_GET_CAPABILITY = 3,
+ IOCNUM_SUSPEND = 4,
+ IOCNUM_REINIT = 5,
+
+ /* memory apis */
+ IOCNUM_GET_GPA_PMAP = 12,
+ IOCNUM_GLA2GPA_NOFAULT = 13,
+ IOCNUM_ALLOC_MEMSEG = 14,
+ IOCNUM_GET_MEMSEG = 15,
+ IOCNUM_MMAP_MEMSEG = 16,
+ IOCNUM_MMAP_GETNEXT = 17,
+ IOCNUM_MUNMAP_MEMSEG = 18,
+
+ /* register/state accessors */
+ IOCNUM_SET_REGISTER = 20,
+ IOCNUM_GET_REGISTER = 21,
+ IOCNUM_SET_REGISTER_SET = 24,
+ IOCNUM_GET_REGISTER_SET = 25,
+
+ /* statistics */
+ IOCNUM_VM_STATS = 50,
+ IOCNUM_VM_STAT_DESC = 51,
+
+ /* CPU Topology */
+ IOCNUM_SET_TOPOLOGY = 63,
+ IOCNUM_GET_TOPOLOGY = 64,
+
+ /* interrupt injection */
+ IOCNUM_ASSERT_IRQ = 80,
+ IOCNUM_DEASSERT_IRQ = 81,
+ IOCNUM_RAISE_MSI = 82,
+ IOCNUM_INJECT_EXCEPTION = 83,
+
+ /* vm_cpuset */
+ IOCNUM_ACTIVATE_CPU = 90,
+ IOCNUM_GET_CPUSET = 91,
+ IOCNUM_SUSPEND_CPU = 92,
+ IOCNUM_RESUME_CPU = 93,
+
+ /* vm_attach_aplic */
+ IOCNUM_ATTACH_APLIC = 110,
+};
+
+#define VM_RUN \
+ _IOWR('v', IOCNUM_RUN, struct vm_run)
+#define VM_SUSPEND \
+ _IOW('v', IOCNUM_SUSPEND, struct vm_suspend)
+#define VM_REINIT \
+ _IO('v', IOCNUM_REINIT)
+#define VM_ALLOC_MEMSEG \
+ _IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg)
+#define VM_GET_MEMSEG \
+ _IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg)
+#define VM_MMAP_MEMSEG \
+ _IOW('v', IOCNUM_MMAP_MEMSEG, struct vm_memmap)
+#define VM_MMAP_GETNEXT \
+ _IOWR('v', IOCNUM_MMAP_GETNEXT, struct vm_memmap)
+#define VM_MUNMAP_MEMSEG \
+ _IOW('v', IOCNUM_MUNMAP_MEMSEG, struct vm_munmap)
+#define VM_SET_REGISTER \
+ _IOW('v', IOCNUM_SET_REGISTER, struct vm_register)
+#define VM_GET_REGISTER \
+ _IOWR('v', IOCNUM_GET_REGISTER, struct vm_register)
+#define VM_SET_REGISTER_SET \
+ _IOW('v', IOCNUM_SET_REGISTER_SET, struct vm_register_set)
+#define VM_GET_REGISTER_SET \
+ _IOWR('v', IOCNUM_GET_REGISTER_SET, struct vm_register_set)
+#define VM_SET_CAPABILITY \
+ _IOW('v', IOCNUM_SET_CAPABILITY, struct vm_capability)
+#define VM_GET_CAPABILITY \
+ _IOWR('v', IOCNUM_GET_CAPABILITY, struct vm_capability)
+#define VM_STATS \
+ _IOWR('v', IOCNUM_VM_STATS, struct vm_stats)
+#define VM_STAT_DESC \
+ _IOWR('v', IOCNUM_VM_STAT_DESC, struct vm_stat_desc)
+#define VM_ASSERT_IRQ \
+ _IOW('v', IOCNUM_ASSERT_IRQ, struct vm_irq)
+#define VM_DEASSERT_IRQ \
+ _IOW('v', IOCNUM_DEASSERT_IRQ, struct vm_irq)
+#define VM_RAISE_MSI \
+ _IOW('v', IOCNUM_RAISE_MSI, struct vm_msi)
+#define VM_INJECT_EXCEPTION \
+ _IOW('v', IOCNUM_INJECT_EXCEPTION, struct vm_exception)
+#define VM_SET_TOPOLOGY \
+ _IOW('v', IOCNUM_SET_TOPOLOGY, struct vm_cpu_topology)
+#define VM_GET_TOPOLOGY \
+ _IOR('v', IOCNUM_GET_TOPOLOGY, struct vm_cpu_topology)
+#define VM_GLA2GPA_NOFAULT \
+ _IOWR('v', IOCNUM_GLA2GPA_NOFAULT, struct vm_gla2gpa)
+#define VM_ACTIVATE_CPU \
+ _IOW('v', IOCNUM_ACTIVATE_CPU, struct vm_activate_cpu)
+#define VM_GET_CPUS \
+ _IOW('v', IOCNUM_GET_CPUSET, struct vm_cpuset)
+#define VM_SUSPEND_CPU \
+ _IOW('v', IOCNUM_SUSPEND_CPU, struct vm_activate_cpu)
+#define VM_RESUME_CPU \
+ _IOW('v', IOCNUM_RESUME_CPU, struct vm_activate_cpu)
+#define VM_ATTACH_APLIC \
+ _IOW('v', IOCNUM_ATTACH_APLIC, struct vm_aplic_descr)
+#endif
diff --git a/sys/riscv/include/vmm_instruction_emul.h b/sys/riscv/include/vmm_instruction_emul.h
new file mode 100644
--- /dev/null
+++ b/sys/riscv/include/vmm_instruction_emul.h
@@ -0,0 +1,85 @@
+/*
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2015 Mihai Carabas <mihai.carabas@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _VMM_INSTRUCTION_EMUL_H_
+#define _VMM_INSTRUCTION_EMUL_H_
+
+/*
+ * Callback functions to read and write memory regions.
+ */
+typedef int (*mem_region_read_t)(struct vcpu *vcpu, uint64_t gpa,
+ uint64_t *rval, int rsize, void *arg);
+typedef int (*mem_region_write_t)(struct vcpu *vcpu, uint64_t gpa,
+ uint64_t wval, int wsize, void *arg);
+
+/*
+ * Callback functions to read and write registers.
+ */
+typedef int (*reg_read_t)(struct vcpu *vcpu, uint64_t *rval, void *arg);
+typedef int (*reg_write_t)(struct vcpu *vcpu, uint64_t wval, void *arg);
+
+/*
+ * Emulate the decoded 'vie' instruction when it contains a memory operation.
+ *
+ * The callbacks 'mrr' and 'mrw' emulate reads and writes to the memory region
+ * containing 'gpa'. 'mrarg' is an opaque argument that is passed into the
+ * callback functions.
+ *
+ * 'void *vm' should be 'struct vm *' when called from kernel context and
+ * 'struct vmctx *' when called from user context.
+ *
+ */
+int vmm_emulate_instruction(struct vcpu *vcpu, uint64_t gpa, struct vie *vie,
+ struct vm_guest_paging *paging, mem_region_read_t mrr,
+ mem_region_write_t mrw, void *mrarg);
+
+/*
+ * Emulate the decoded 'vre' instruction when it contains a register access.
+ *
+ * The callbacks 'regread' and 'regwrite' emulate reads and writes to the
+ * register from 'vie'. 'regarg' is an opaque argument that is passed into the
+ * callback functions.
+ *
+ * 'void *vm' should be 'struct vm *' when called from kernel context and
+ * 'struct vmctx *' when called from user context.
+ *
+ */
+int vmm_emulate_register(struct vcpu *vcpu, struct vre *vre, reg_read_t regread,
+ reg_write_t regwrite, void *regarg);
+
+#ifdef _KERNEL
+void vm_register_reg_handler(struct vm *vm, uint64_t iss, uint64_t mask,
+ reg_read_t reg_read, reg_write_t reg_write, void *arg);
+void vm_deregister_reg_handler(struct vm *vm, uint64_t iss, uint64_t mask);
+
+void vm_register_inst_handler(struct vm *vm, uint64_t start, uint64_t size,
+ mem_region_read_t mmio_read, mem_region_write_t mmio_write);
+void vm_deregister_inst_handler(struct vm *vm, uint64_t start, uint64_t size);
+#endif
+
+#endif /* _VMM_INSTRUCTION_EMUL_H_ */
diff --git a/sys/riscv/include/vmm_snapshot.h b/sys/riscv/include/vmm_snapshot.h
new file mode 100644
--- /dev/null
+++ b/sys/riscv/include/vmm_snapshot.h
@@ -0,0 +1 @@
+/* $FreeBSD$ */
diff --git a/sys/riscv/riscv/genassym.c b/sys/riscv/riscv/genassym.c
--- a/sys/riscv/riscv/genassym.c
+++ b/sys/riscv/riscv/genassym.c
@@ -55,6 +55,8 @@
#include <machine/machdep.h>
#include <machine/vmparam.h>
+#include <riscv/vmm/riscv.h>
+
ASSYM(KERNBASE, KERNBASE);
ASSYM(VM_MAXUSER_ADDRESS, VM_MAXUSER_ADDRESS);
ASSYM(VM_MAX_KERNEL_ADDRESS, VM_MAX_KERNEL_ADDRESS);
@@ -98,6 +100,38 @@
ASSYM(TF_SCAUSE, offsetof(struct trapframe, tf_scause));
ASSYM(TF_SSTATUS, offsetof(struct trapframe, tf_sstatus));
+ASSYM(HYP_H_RA, offsetof(struct hypctx, host_regs.hyp_ra));
+ASSYM(HYP_H_SP, offsetof(struct hypctx, host_regs.hyp_sp));
+ASSYM(HYP_H_GP, offsetof(struct hypctx, host_regs.hyp_gp));
+ASSYM(HYP_H_TP, offsetof(struct hypctx, host_regs.hyp_tp));
+ASSYM(HYP_H_T, offsetof(struct hypctx, host_regs.hyp_t));
+ASSYM(HYP_H_S, offsetof(struct hypctx, host_regs.hyp_s));
+ASSYM(HYP_H_A, offsetof(struct hypctx, host_regs.hyp_a));
+ASSYM(HYP_H_SEPC, offsetof(struct hypctx, host_regs.hyp_sepc));
+ASSYM(HYP_H_SSTATUS, offsetof(struct hypctx, host_regs.hyp_sstatus));
+ASSYM(HYP_H_HSTATUS, offsetof(struct hypctx, host_regs.hyp_hstatus));
+ASSYM(HYP_H_SSCRATCH, offsetof(struct hypctx, host_sscratch));
+ASSYM(HYP_H_STVEC, offsetof(struct hypctx, host_stvec));
+ASSYM(HYP_H_SCOUNTEREN, offsetof(struct hypctx, host_scounteren));
+
+ASSYM(HYP_G_RA, offsetof(struct hypctx, guest_regs.hyp_ra));
+ASSYM(HYP_G_SP, offsetof(struct hypctx, guest_regs.hyp_sp));
+ASSYM(HYP_G_GP, offsetof(struct hypctx, guest_regs.hyp_gp));
+ASSYM(HYP_G_TP, offsetof(struct hypctx, guest_regs.hyp_tp));
+ASSYM(HYP_G_T, offsetof(struct hypctx, guest_regs.hyp_t));
+ASSYM(HYP_G_S, offsetof(struct hypctx, guest_regs.hyp_s));
+ASSYM(HYP_G_A, offsetof(struct hypctx, guest_regs.hyp_a));
+ASSYM(HYP_G_SEPC, offsetof(struct hypctx, guest_regs.hyp_sepc));
+ASSYM(HYP_G_SSTATUS, offsetof(struct hypctx, guest_regs.hyp_sstatus));
+ASSYM(HYP_G_HSTATUS, offsetof(struct hypctx, guest_regs.hyp_hstatus));
+ASSYM(HYP_G_SCOUNTEREN, offsetof(struct hypctx, guest_scounteren));
+
+ASSYM(HYP_TRAP_SEPC, offsetof(struct hyptrap, sepc));
+ASSYM(HYP_TRAP_SCAUSE, offsetof(struct hyptrap, scause));
+ASSYM(HYP_TRAP_STVAL, offsetof(struct hyptrap, stval));
+ASSYM(HYP_TRAP_HTVAL, offsetof(struct hyptrap, htval));
+ASSYM(HYP_TRAP_HTINST, offsetof(struct hyptrap, htinst));
+
ASSYM(RISCV_BOOTPARAMS_SIZE, sizeof(struct riscv_bootparams));
ASSYM(RISCV_BOOTPARAMS_KERN_PHYS, offsetof(struct riscv_bootparams, kern_phys));
ASSYM(RISCV_BOOTPARAMS_KERN_STACK, offsetof(struct riscv_bootparams,
diff --git a/sys/riscv/riscv/identcpu.c b/sys/riscv/riscv/identcpu.c
--- a/sys/riscv/riscv/identcpu.c
+++ b/sys/riscv/riscv/identcpu.c
@@ -72,6 +72,7 @@
u_int mmu_caps;
/* Supervisor-mode extension support. */
+bool has_hyp;
bool __read_frequently has_sstc;
bool __read_frequently has_sscofpmf;
bool has_svpbmt;
@@ -249,6 +250,7 @@
case 'c':
case 'd':
case 'f':
+ case 'h':
case 'i':
case 'm':
desc->isa_extensions |= HWCAP_ISA_BIT(isa[i]);
@@ -414,6 +416,7 @@
UPDATE_CAP(mmu_caps, desc->mmu_caps);
/* Supervisor-mode extension support. */
+ UPDATE_CAP(has_hyp, (desc->isa_extensions & HWCAP_ISA_H) != 0);
UPDATE_CAP(has_sstc, (desc->smode_extensions & SV_SSTC) != 0);
UPDATE_CAP(has_sscofpmf, (desc->smode_extensions & SV_SSCOFPMF) != 0);
UPDATE_CAP(has_svpbmt, (desc->smode_extensions & SV_SVPBMT) != 0);
@@ -514,6 +517,7 @@
"\03Compressed"
"\04Double"
"\06Float"
+ "\10Hypervisor"
"\15Mult/Div");
}
diff --git a/sys/riscv/vmm/riscv.h b/sys/riscv/vmm/riscv.h
new file mode 100644
--- /dev/null
+++ b/sys/riscv/vmm/riscv.h
@@ -0,0 +1,132 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2015 Mihai Carabas <mihai.carabas@gmail.com>
+ * Copyright (c) 2024 Ruslan Bukin <br@bsdpad.com>
+ *
+ * This software was developed by the University of Cambridge Computer
+ * Laboratory (Department of Computer Science and Technology) under Innovate
+ * UK project 105694, "Digital Security by Design (DSbD) Technology Platform
+ * Prototype".
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _VMM_RISCV_H_
+#define _VMM_RISCV_H_
+
+#include <machine/reg.h>
+#include <machine/pcpu.h>
+#include <machine/vmm.h>
+
+struct hypregs {
+ uint64_t hyp_ra;
+ uint64_t hyp_sp;
+ uint64_t hyp_gp;
+ uint64_t hyp_tp;
+ uint64_t hyp_t[7];
+ uint64_t hyp_s[12];
+ uint64_t hyp_a[8];
+ uint64_t hyp_sepc;
+ uint64_t hyp_sstatus;
+ uint64_t hyp_hstatus;
+};
+
+struct hypcsr {
+ uint64_t hvip;
+ uint64_t vsstatus;
+ uint64_t vsie;
+ uint64_t vstvec;
+ uint64_t vsscratch;
+ uint64_t vsepc;
+ uint64_t vscause;
+ uint64_t vstval;
+ uint64_t vsatp;
+ uint64_t scounteren;
+ uint64_t senvcfg;
+};
+
+struct hypctx {
+ struct hypregs host_regs;
+ struct hypregs guest_regs;
+ struct hypcsr guest_csrs;
+ uint64_t host_sscratch;
+ uint64_t host_stvec;
+ uint64_t host_scounteren;
+ uint64_t guest_scounteren;
+ struct hyp *hyp;
+ struct vcpu *vcpu;
+ bool has_exception;
+ int cpu_id;
+ int ipi_pending;
+};
+
+struct hyp {
+ struct vm *vm;
+ uint64_t vmid_generation;
+ bool aplic_attached;
+ struct aplic *aplic;
+ struct hypctx *ctx[];
+};
+
+struct hyptrap {
+ uint64_t sepc;
+ uint64_t scause;
+ uint64_t stval;
+ uint64_t htval;
+ uint64_t htinst;
+};
+
+#define DEFINE_VMMOPS_IFUNC(ret_type, opname, args) \
+ ret_type vmmops_##opname args;
+
+DEFINE_VMMOPS_IFUNC(int, modinit, (void))
+DEFINE_VMMOPS_IFUNC(int, modcleanup, (void))
+DEFINE_VMMOPS_IFUNC(void *, init, (struct vm *vm, struct pmap *pmap))
+DEFINE_VMMOPS_IFUNC(int, gla2gpa, (void *vcpui, struct vm_guest_paging *paging,
+ uint64_t gla, int prot, uint64_t *gpa, int *is_fault))
+DEFINE_VMMOPS_IFUNC(int, run, (void *vcpui, register_t pc, struct pmap *pmap,
+ struct vm_eventinfo *info))
+DEFINE_VMMOPS_IFUNC(void, cleanup, (void *vmi))
+DEFINE_VMMOPS_IFUNC(void *, vcpu_init, (void *vmi, struct vcpu *vcpu,
+ int vcpu_id))
+DEFINE_VMMOPS_IFUNC(void, vcpu_cleanup, (void *vcpui))
+DEFINE_VMMOPS_IFUNC(int, exception, (void *vcpui, uint64_t scause))
+DEFINE_VMMOPS_IFUNC(int, getreg, (void *vcpui, int num, uint64_t *retval))
+DEFINE_VMMOPS_IFUNC(int, setreg, (void *vcpui, int num, uint64_t val))
+DEFINE_VMMOPS_IFUNC(int, getcap, (void *vcpui, int num, int *retval))
+DEFINE_VMMOPS_IFUNC(int, setcap, (void *vcpui, int num, int val))
+DEFINE_VMMOPS_IFUNC(struct vmspace *, vmspace_alloc, (vm_offset_t min,
+ vm_offset_t max))
+DEFINE_VMMOPS_IFUNC(void, vmspace_free, (struct vmspace *vmspace))
+
+#define dprintf(fmt, ...)
+
+struct hypctx *riscv_get_active_vcpu(void);
+void vmm_switch(struct hypctx *);
+void vmm_unpriv_trap(struct hyptrap *, uint64_t tmp);
+int vmm_sbi_ecall(struct vcpu *, bool *);
+
+void riscv_send_ipi(struct hypctx *hypctx, int hart_id);
+int riscv_check_ipi(struct hypctx *hypctx, bool clear);
+
+#endif /* !_VMM_RISCV_H_ */
diff --git a/sys/riscv/vmm/vmm.c b/sys/riscv/vmm/vmm.c
new file mode 100644
--- /dev/null
+++ b/sys/riscv/vmm/vmm.c
@@ -0,0 +1,1606 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2015 Mihai Carabas <mihai.carabas@gmail.com>
+ * Copyright (c) 2024 Ruslan Bukin <br@bsdpad.com>
+ *
+ * This software was developed by the University of Cambridge Computer
+ * Laboratory (Department of Computer Science and Technology) under Innovate
+ * UK project 105694, "Digital Security by Design (DSbD) Technology Platform
+ * Prototype".
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/cpuset.h>
+#include <sys/kernel.h>
+#include <sys/linker.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/pcpu.h>
+#include <sys/proc.h>
+#include <sys/queue.h>
+#include <sys/rwlock.h>
+#include <sys/sched.h>
+#include <sys/smp.h>
+#include <sys/sysctl.h>
+
+#include <vm/vm.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_param.h>
+
+#include <machine/riscvreg.h>
+#include <machine/cpu.h>
+#include <machine/fpe.h>
+#include <machine/machdep.h>
+#include <machine/pcb.h>
+#include <machine/smp.h>
+#include <machine/vm.h>
+#include <machine/vmparam.h>
+#include <machine/vmm.h>
+#include <machine/vmm_instruction_emul.h>
+
+#include <dev/pci/pcireg.h>
+
+#include <dev/vmm/vmm_dev.h>
+#include <dev/vmm/vmm_ktr.h>
+
+#include "vmm_stat.h"
+#include "riscv.h"
+
+#include "vmm_aplic.h"
+
+struct vcpu {
+ int flags;
+ enum vcpu_state state;
+ struct mtx mtx;
+ int hostcpu; /* host cpuid this vcpu last ran on */
+ int vcpuid;
+ void *stats;
+ struct vm_exit exitinfo;
+ uint64_t nextpc; /* (x) next instruction to execute */
+ struct vm *vm; /* (o) */
+ void *cookie; /* (i) cpu-specific data */
+ struct fpreg *guestfpu; /* (a,i) guest fpu state */
+};
+
+#define vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx))
+#define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN)
+#define vcpu_lock_destroy(v) mtx_destroy(&((v)->mtx))
+#define vcpu_lock(v) mtx_lock_spin(&((v)->mtx))
+#define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx))
+#define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED)
+
+struct mem_seg {
+ uint64_t gpa;
+ size_t len;
+ bool wired;
+ bool sysmem;
+ vm_object_t object;
+};
+#define VM_MAX_MEMSEGS 3
+
+struct mem_map {
+ vm_paddr_t gpa;
+ size_t len;
+ vm_ooffset_t segoff;
+ int segid;
+ int prot;
+ int flags;
+};
+#define VM_MAX_MEMMAPS 4
+
+struct vmm_mmio_region {
+ uint64_t start;
+ uint64_t end;
+ mem_region_read_t read;
+ mem_region_write_t write;
+};
+#define VM_MAX_MMIO_REGIONS 4
+
+/*
+ * Initialization:
+ * (o) initialized the first time the VM is created
+ * (i) initialized when VM is created and when it is reinitialized
+ * (x) initialized before use
+ */
+struct vm {
+ void *cookie; /* (i) cpu-specific data */
+ volatile cpuset_t active_cpus; /* (i) active vcpus */
+ volatile cpuset_t debug_cpus; /* (i) vcpus stopped for debug*/
+ int suspend; /* (i) stop VM execution */
+ bool dying; /* (o) is dying */
+ volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */
+ volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */
+ struct mem_map mem_maps[VM_MAX_MEMMAPS]; /* (i) guest address space */
+ struct mem_seg mem_segs[VM_MAX_MEMSEGS]; /* (o) guest memory regions */
+ struct vmspace *vmspace; /* (o) guest's address space */
+ char name[VM_MAX_NAMELEN]; /* (o) virtual machine name */
+ struct vcpu **vcpu; /* (i) guest vcpus */
+ struct vmm_mmio_region mmio_region[VM_MAX_MMIO_REGIONS];
+ /* (o) guest MMIO regions */
+ /* The following describe the vm cpu topology */
+ uint16_t sockets; /* (o) num of sockets */
+ uint16_t cores; /* (o) num of cores/socket */
+ uint16_t threads; /* (o) num of threads/core */
+ uint16_t maxcpus; /* (o) max pluggable cpus */
+ struct sx mem_segs_lock; /* (o) */
+ struct sx vcpus_init_lock; /* (o) */
+};
+
+static bool vmm_initialized = false;
+
+static MALLOC_DEFINE(M_VMM, "vmm", "vmm");
+
+/* statistics */
+static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime");
+
+SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL);
+
+static int vmm_ipinum;
+SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0,
+ "IPI vector used for vcpu notifications");
+
+u_int vm_maxcpu;
+SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
+ &vm_maxcpu, 0, "Maximum number of vCPUs");
+
+static void vm_free_memmap(struct vm *vm, int ident);
+static bool sysmem_mapping(struct vm *vm, struct mem_map *mm);
+static void vcpu_notify_event_locked(struct vcpu *vcpu);
+
+/*
+ * Upper limit on vm_maxcpu. We could increase this to 28 bits, but this
+ * is a safe value for now.
+ */
+#define VM_MAXCPU MIN(0xffff - 1, CPU_SETSIZE)
+
+static void
+vcpu_cleanup(struct vcpu *vcpu, bool destroy)
+{
+ vmmops_vcpu_cleanup(vcpu->cookie);
+ vcpu->cookie = NULL;
+ if (destroy) {
+ vmm_stat_free(vcpu->stats);
+ fpu_save_area_free(vcpu->guestfpu);
+ vcpu_lock_destroy(vcpu);
+ }
+}
+
+static struct vcpu *
+vcpu_alloc(struct vm *vm, int vcpu_id)
+{
+ struct vcpu *vcpu;
+
+ KASSERT(vcpu_id >= 0 && vcpu_id < vm->maxcpus,
+ ("vcpu_alloc: invalid vcpu %d", vcpu_id));
+
+ vcpu = malloc(sizeof(*vcpu), M_VMM, M_WAITOK | M_ZERO);
+ vcpu_lock_init(vcpu);
+ vcpu->state = VCPU_IDLE;
+ vcpu->hostcpu = NOCPU;
+ vcpu->vcpuid = vcpu_id;
+ vcpu->vm = vm;
+ vcpu->guestfpu = fpu_save_area_alloc();
+ vcpu->stats = vmm_stat_alloc();
+ return (vcpu);
+}
+
+static void
+vcpu_init(struct vcpu *vcpu)
+{
+ vcpu->cookie = vmmops_vcpu_init(vcpu->vm->cookie, vcpu, vcpu->vcpuid);
+ MPASS(vcpu->cookie != NULL);
+ fpu_save_area_reset(vcpu->guestfpu);
+ vmm_stat_init(vcpu->stats);
+}
+
+struct vm_exit *
+vm_exitinfo(struct vcpu *vcpu)
+{
+ return (&vcpu->exitinfo);
+}
+
+static int
+vmm_init(void)
+{
+
+ vm_maxcpu = mp_ncpus;
+
+ TUNABLE_INT_FETCH("hw.vmm.maxcpu", &vm_maxcpu);
+
+ if (vm_maxcpu > VM_MAXCPU) {
+ printf("vmm: vm_maxcpu clamped to %u\n", VM_MAXCPU);
+ vm_maxcpu = VM_MAXCPU;
+ }
+
+ if (vm_maxcpu == 0)
+ vm_maxcpu = 1;
+
+ return (vmmops_modinit());
+}
+
+static int
+vmm_handler(module_t mod, int what, void *arg)
+{
+ int error;
+
+ switch (what) {
+ case MOD_LOAD:
+ /* TODO: check if has_hyp here? */
+ vmmdev_init();
+ error = vmm_init();
+ if (error == 0)
+ vmm_initialized = true;
+ break;
+ case MOD_UNLOAD:
+ /* TODO: check if has_hyp here? */
+ error = vmmdev_cleanup();
+ if (error == 0 && vmm_initialized) {
+ error = vmmops_modcleanup();
+ if (error)
+ vmm_initialized = false;
+ }
+ break;
+ default:
+ error = 0;
+ break;
+ }
+ return (error);
+}
+
+static moduledata_t vmm_kmod = {
+ "vmm",
+ vmm_handler,
+ NULL
+};
+
+/*
+ * vmm initialization has the following dependencies:
+ *
+ * - HYP initialization requires smp_rendezvous() and therefore must happen
+ * after SMP is fully functional (after SI_SUB_SMP).
+ */
+DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_SMP + 1, SI_ORDER_ANY);
+MODULE_VERSION(vmm, 1);
+
+static void
+vm_init(struct vm *vm, bool create)
+{
+ int i;
+
+ vm->cookie = vmmops_init(vm, vmspace_pmap(vm->vmspace));
+ MPASS(vm->cookie != NULL);
+
+ CPU_ZERO(&vm->active_cpus);
+ CPU_ZERO(&vm->debug_cpus);
+
+ vm->suspend = 0;
+ CPU_ZERO(&vm->suspended_cpus);
+
+ memset(vm->mmio_region, 0, sizeof(vm->mmio_region));
+
+ if (!create) {
+ for (i = 0; i < vm->maxcpus; i++) {
+ if (vm->vcpu[i] != NULL)
+ vcpu_init(vm->vcpu[i]);
+ }
+ }
+}
+
+void
+vm_disable_vcpu_creation(struct vm *vm)
+{
+ sx_xlock(&vm->vcpus_init_lock);
+ vm->dying = true;
+ sx_xunlock(&vm->vcpus_init_lock);
+}
+
+struct vcpu *
+vm_alloc_vcpu(struct vm *vm, int vcpuid)
+{
+ struct vcpu *vcpu;
+
+ if (vcpuid < 0 || vcpuid >= vm_get_maxcpus(vm))
+ return (NULL);
+
+ /* Some interrupt controllers may have a CPU limit */
+ if (vcpuid >= aplic_max_cpu_count(vm->cookie))
+ return (NULL);
+
+ vcpu = (struct vcpu *)
+ atomic_load_acq_ptr((uintptr_t *)&vm->vcpu[vcpuid]);
+ if (__predict_true(vcpu != NULL))
+ return (vcpu);
+
+ sx_xlock(&vm->vcpus_init_lock);
+ vcpu = vm->vcpu[vcpuid];
+ if (vcpu == NULL && !vm->dying) {
+ vcpu = vcpu_alloc(vm, vcpuid);
+ vcpu_init(vcpu);
+
+ /*
+ * Ensure vCPU is fully created before updating pointer
+ * to permit unlocked reads above.
+ */
+ atomic_store_rel_ptr((uintptr_t *)&vm->vcpu[vcpuid],
+ (uintptr_t)vcpu);
+ }
+ sx_xunlock(&vm->vcpus_init_lock);
+ return (vcpu);
+}
+
+void
+vm_slock_vcpus(struct vm *vm)
+{
+ sx_slock(&vm->vcpus_init_lock);
+}
+
+void
+vm_unlock_vcpus(struct vm *vm)
+{
+ sx_unlock(&vm->vcpus_init_lock);
+}
+
+int
+vm_create(const char *name, struct vm **retvm)
+{
+ struct vm *vm;
+ struct vmspace *vmspace;
+
+ /*
+ * If vmm.ko could not be successfully initialized then don't attempt
+ * to create the virtual machine.
+ */
+ if (!vmm_initialized)
+ return (ENXIO);
+
+ if (name == NULL || strlen(name) >= VM_MAX_NAMELEN)
+ return (EINVAL);
+
+ vmspace = vmmops_vmspace_alloc(0, 1ul << 39);
+ if (vmspace == NULL)
+ return (ENOMEM);
+
+ vm = malloc(sizeof(struct vm), M_VMM, M_WAITOK | M_ZERO);
+ strcpy(vm->name, name);
+ vm->vmspace = vmspace;
+ sx_init(&vm->mem_segs_lock, "vm mem_segs");
+ sx_init(&vm->vcpus_init_lock, "vm vcpus");
+
+ vm->sockets = 1;
+ vm->cores = 1; /* XXX backwards compatibility */
+ vm->threads = 1; /* XXX backwards compatibility */
+ vm->maxcpus = vm_maxcpu;
+
+ vm->vcpu = malloc(sizeof(*vm->vcpu) * vm->maxcpus, M_VMM,
+ M_WAITOK | M_ZERO);
+
+ vm_init(vm, true);
+
+ *retvm = vm;
+ return (0);
+}
+
+void
+vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores,
+ uint16_t *threads, uint16_t *maxcpus)
+{
+ *sockets = vm->sockets;
+ *cores = vm->cores;
+ *threads = vm->threads;
+ *maxcpus = vm->maxcpus;
+}
+
+uint16_t
+vm_get_maxcpus(struct vm *vm)
+{
+ return (vm->maxcpus);
+}
+
+int
+vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores,
+ uint16_t threads, uint16_t maxcpus)
+{
+ /* Ignore maxcpus. */
+ if ((sockets * cores * threads) > vm->maxcpus)
+ return (EINVAL);
+ vm->sockets = sockets;
+ vm->cores = cores;
+ vm->threads = threads;
+ return(0);
+}
+
+static void
+vm_cleanup(struct vm *vm, bool destroy)
+{
+ struct mem_map *mm;
+ int i;
+
+ aplic_detach_from_vm(vm->cookie);
+
+ for (i = 0; i < vm->maxcpus; i++) {
+ if (vm->vcpu[i] != NULL)
+ vcpu_cleanup(vm->vcpu[i], destroy);
+ }
+
+ vmmops_cleanup(vm->cookie);
+
+ /*
+ * System memory is removed from the guest address space only when
+ * the VM is destroyed. This is because the mapping remains the same
+ * across VM reset.
+ *
+ * Device memory can be relocated by the guest (e.g. using PCI BARs)
+ * so those mappings are removed on a VM reset.
+ */
+ if (!destroy) {
+ for (i = 0; i < VM_MAX_MEMMAPS; i++) {
+ mm = &vm->mem_maps[i];
+ if (destroy || !sysmem_mapping(vm, mm))
+ vm_free_memmap(vm, i);
+ }
+ }
+
+ if (destroy) {
+ for (i = 0; i < VM_MAX_MEMSEGS; i++)
+ vm_free_memseg(vm, i);
+
+ vmmops_vmspace_free(vm->vmspace);
+ vm->vmspace = NULL;
+
+ for (i = 0; i < vm->maxcpus; i++)
+ free(vm->vcpu[i], M_VMM);
+ free(vm->vcpu, M_VMM);
+ sx_destroy(&vm->vcpus_init_lock);
+ sx_destroy(&vm->mem_segs_lock);
+ }
+}
+
+void
+vm_destroy(struct vm *vm)
+{
+
+ vm_cleanup(vm, true);
+
+ free(vm, M_VMM);
+}
+
+int
+vm_reinit(struct vm *vm)
+{
+ int error;
+
+ /*
+ * A virtual machine can be reset only if all vcpus are suspended.
+ */
+ if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) {
+ vm_cleanup(vm, false);
+ vm_init(vm, false);
+ error = 0;
+ } else {
+ error = EBUSY;
+ }
+
+ return (error);
+}
+
+const char *
+vm_name(struct vm *vm)
+{
+ return (vm->name);
+}
+
+void
+vm_slock_memsegs(struct vm *vm)
+{
+ sx_slock(&vm->mem_segs_lock);
+}
+
+void
+vm_xlock_memsegs(struct vm *vm)
+{
+ sx_xlock(&vm->mem_segs_lock);
+}
+
+void
+vm_unlock_memsegs(struct vm *vm)
+{
+ sx_unlock(&vm->mem_segs_lock);
+}
+
+/*
+ * Return 'true' if 'gpa' is allocated in the guest address space.
+ *
+ * This function is called in the context of a running vcpu which acts as
+ * an implicit lock on 'vm->mem_maps[]'.
+ */
+bool
+vm_mem_allocated(struct vcpu *vcpu, vm_paddr_t gpa)
+{
+ struct vm *vm = vcpu->vm;
+ struct mem_map *mm;
+ int i;
+
+#ifdef INVARIANTS
+ int hostcpu, state;
+ state = vcpu_get_state(vcpu, &hostcpu);
+ KASSERT(state == VCPU_RUNNING && hostcpu == curcpu,
+ ("%s: invalid vcpu state %d/%d", __func__, state, hostcpu));
+#endif
+
+ for (i = 0; i < VM_MAX_MEMMAPS; i++) {
+ mm = &vm->mem_maps[i];
+ if (mm->len != 0 && gpa >= mm->gpa && gpa < mm->gpa + mm->len)
+ return (true); /* 'gpa' is sysmem or devmem */
+ }
+
+ return (false);
+}
+
+int
+vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem)
+{
+ struct mem_seg *seg;
+ vm_object_t obj;
+
+ sx_assert(&vm->mem_segs_lock, SX_XLOCKED);
+
+ if (ident < 0 || ident >= VM_MAX_MEMSEGS)
+ return (EINVAL);
+
+ if (len == 0 || (len & PAGE_MASK))
+ return (EINVAL);
+
+ seg = &vm->mem_segs[ident];
+ if (seg->object != NULL) {
+ if (seg->len == len && seg->sysmem == sysmem)
+ return (EEXIST);
+ else
+ return (EINVAL);
+ }
+
+ obj = vm_object_allocate(OBJT_DEFAULT, len >> PAGE_SHIFT);
+ if (obj == NULL)
+ return (ENOMEM);
+
+ seg->len = len;
+ seg->object = obj;
+ seg->sysmem = sysmem;
+ return (0);
+}
+
+int
+vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem,
+ vm_object_t *objptr)
+{
+ struct mem_seg *seg;
+
+ sx_assert(&vm->mem_segs_lock, SX_LOCKED);
+
+ if (ident < 0 || ident >= VM_MAX_MEMSEGS)
+ return (EINVAL);
+
+ seg = &vm->mem_segs[ident];
+ if (len)
+ *len = seg->len;
+ if (sysmem)
+ *sysmem = seg->sysmem;
+ if (objptr)
+ *objptr = seg->object;
+ return (0);
+}
+
+void
+vm_free_memseg(struct vm *vm, int ident)
+{
+ struct mem_seg *seg;
+
+ KASSERT(ident >= 0 && ident < VM_MAX_MEMSEGS,
+ ("%s: invalid memseg ident %d", __func__, ident));
+
+ seg = &vm->mem_segs[ident];
+ if (seg->object != NULL) {
+ vm_object_deallocate(seg->object);
+ bzero(seg, sizeof(struct mem_seg));
+ }
+}
+
+int
+vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t first,
+ size_t len, int prot, int flags)
+{
+ struct mem_seg *seg;
+ struct mem_map *m, *map;
+ vm_ooffset_t last;
+ int i, error;
+
+ dprintf("%s: gpa %lx first %lx len %lx\n", __func__, gpa, first, len);
+
+ if (prot == 0 || (prot & ~(VM_PROT_ALL)) != 0)
+ return (EINVAL);
+
+ if (flags & ~VM_MEMMAP_F_WIRED)
+ return (EINVAL);
+
+ if (segid < 0 || segid >= VM_MAX_MEMSEGS)
+ return (EINVAL);
+
+ seg = &vm->mem_segs[segid];
+ if (seg->object == NULL)
+ return (EINVAL);
+
+ last = first + len;
+ if (first < 0 || first >= last || last > seg->len)
+ return (EINVAL);
+
+ if ((gpa | first | last) & PAGE_MASK)
+ return (EINVAL);
+
+ map = NULL;
+ for (i = 0; i < VM_MAX_MEMMAPS; i++) {
+ m = &vm->mem_maps[i];
+ if (m->len == 0) {
+ map = m;
+ break;
+ }
+ }
+
+ if (map == NULL)
+ return (ENOSPC);
+
+ error = vm_map_find(&vm->vmspace->vm_map, seg->object, first, &gpa,
+ len, 0, VMFS_NO_SPACE, prot, prot, 0);
+ if (error != KERN_SUCCESS)
+ return (EFAULT);
+
+ vm_object_reference(seg->object);
+
+ if (flags & VM_MEMMAP_F_WIRED) {
+ error = vm_map_wire(&vm->vmspace->vm_map, gpa, gpa + len,
+ VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
+ if (error != KERN_SUCCESS) {
+ vm_map_remove(&vm->vmspace->vm_map, gpa, gpa + len);
+ return (error == KERN_RESOURCE_SHORTAGE ? ENOMEM :
+ EFAULT);
+ }
+ }
+
+ map->gpa = gpa;
+ map->len = len;
+ map->segoff = first;
+ map->segid = segid;
+ map->prot = prot;
+ map->flags = flags;
+ return (0);
+}
+
+int
+vm_munmap_memseg(struct vm *vm, vm_paddr_t gpa, size_t len)
+{
+ struct mem_map *m;
+ int i;
+
+ dprintf("%s: gpa %lx len %lx\n", __func__, gpa, len);
+
+ for (i = 0; i < VM_MAX_MEMMAPS; i++) {
+ m = &vm->mem_maps[i];
+ if (m->gpa == gpa && m->len == len) {
+ vm_free_memmap(vm, i);
+ return (0);
+ }
+ }
+
+ return (EINVAL);
+}
+
+int
+vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid,
+ vm_ooffset_t *segoff, size_t *len, int *prot, int *flags)
+{
+ struct mem_map *mm, *mmnext;
+ int i;
+
+ mmnext = NULL;
+ for (i = 0; i < VM_MAX_MEMMAPS; i++) {
+ mm = &vm->mem_maps[i];
+ if (mm->len == 0 || mm->gpa < *gpa)
+ continue;
+ if (mmnext == NULL || mm->gpa < mmnext->gpa)
+ mmnext = mm;
+ }
+
+ if (mmnext != NULL) {
+ *gpa = mmnext->gpa;
+ if (segid)
+ *segid = mmnext->segid;
+ if (segoff)
+ *segoff = mmnext->segoff;
+ if (len)
+ *len = mmnext->len;
+ if (prot)
+ *prot = mmnext->prot;
+ if (flags)
+ *flags = mmnext->flags;
+ return (0);
+ } else {
+ return (ENOENT);
+ }
+}
+
+static void
+vm_free_memmap(struct vm *vm, int ident)
+{
+ struct mem_map *mm;
+ int error __diagused;
+
+ mm = &vm->mem_maps[ident];
+ if (mm->len) {
+ error = vm_map_remove(&vm->vmspace->vm_map, mm->gpa,
+ mm->gpa + mm->len);
+ KASSERT(error == KERN_SUCCESS, ("%s: vm_map_remove error %d",
+ __func__, error));
+ bzero(mm, sizeof(struct mem_map));
+ }
+}
+
+static __inline bool
+sysmem_mapping(struct vm *vm, struct mem_map *mm)
+{
+
+ if (mm->len != 0 && vm->mem_segs[mm->segid].sysmem)
+ return (true);
+ else
+ return (false);
+}
+
+vm_paddr_t
+vmm_sysmem_maxaddr(struct vm *vm)
+{
+ struct mem_map *mm;
+ vm_paddr_t maxaddr;
+ int i;
+
+ maxaddr = 0;
+ for (i = 0; i < VM_MAX_MEMMAPS; i++) {
+ mm = &vm->mem_maps[i];
+ if (sysmem_mapping(vm, mm)) {
+ if (maxaddr < mm->gpa + mm->len)
+ maxaddr = mm->gpa + mm->len;
+ }
+ }
+ return (maxaddr);
+}
+
+int
+vm_gla2gpa_nofault(struct vcpu *vcpu, struct vm_guest_paging *paging,
+ uint64_t gla, int prot, uint64_t *gpa, int *is_fault)
+{
+ int error;
+
+ error = vmmops_gla2gpa(vcpu->cookie, paging, gla, prot, gpa, is_fault);
+
+ return (error);
+}
+
+void
+vm_register_inst_handler(struct vm *vm, uint64_t start, uint64_t size,
+ mem_region_read_t mmio_read, mem_region_write_t mmio_write)
+{
+ int i;
+
+ for (i = 0; i < nitems(vm->mmio_region); i++) {
+ if (vm->mmio_region[i].start == 0 &&
+ vm->mmio_region[i].end == 0) {
+ vm->mmio_region[i].start = start;
+ vm->mmio_region[i].end = start + size;
+ vm->mmio_region[i].read = mmio_read;
+ vm->mmio_region[i].write = mmio_write;
+ return;
+ }
+ }
+
+ panic("%s: No free MMIO region", __func__);
+}
+
+void
+vm_deregister_inst_handler(struct vm *vm, uint64_t start, uint64_t size)
+{
+ int i;
+
+ for (i = 0; i < nitems(vm->mmio_region); i++) {
+ if (vm->mmio_region[i].start == start &&
+ vm->mmio_region[i].end == start + size) {
+ memset(&vm->mmio_region[i], 0,
+ sizeof(vm->mmio_region[i]));
+ return;
+ }
+ }
+
+ panic("%s: Invalid MMIO region: %lx - %lx", __func__, start,
+ start + size);
+}
+
+static int
+vm_handle_inst_emul(struct vcpu *vcpu, bool *retu)
+{
+ struct vm *vm;
+ struct vm_exit *vme;
+ struct vie *vie;
+ struct hyp *hyp;
+ uint64_t fault_ipa;
+ struct vm_guest_paging *paging;
+ struct vmm_mmio_region *vmr;
+ int error, i;
+
+ vm = vcpu->vm;
+ hyp = vm->cookie;
+ if (!hyp->aplic_attached)
+ goto out_user;
+
+ vme = &vcpu->exitinfo;
+ vie = &vme->u.inst_emul.vie;
+ paging = &vme->u.inst_emul.paging;
+
+ fault_ipa = vme->u.inst_emul.gpa;
+
+ vmr = NULL;
+ for (i = 0; i < nitems(vm->mmio_region); i++) {
+ if (vm->mmio_region[i].start <= fault_ipa &&
+ vm->mmio_region[i].end > fault_ipa) {
+ vmr = &vm->mmio_region[i];
+ break;
+ }
+ }
+ if (vmr == NULL)
+ goto out_user;
+
+ error = vmm_emulate_instruction(vcpu, fault_ipa, vie, paging,
+ vmr->read, vmr->write, retu);
+ return (error);
+
+out_user:
+ *retu = true;
+ return (0);
+}
+
+int
+vm_suspend(struct vm *vm, enum vm_suspend_how how)
+{
+ int i;
+
+ if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST)
+ return (EINVAL);
+
+ if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) {
+ VM_CTR2(vm, "virtual machine already suspended %d/%d",
+ vm->suspend, how);
+ return (EALREADY);
+ }
+
+ VM_CTR1(vm, "virtual machine successfully suspended %d", how);
+
+ /*
+ * Notify all active vcpus that they are now suspended.
+ */
+ for (i = 0; i < vm->maxcpus; i++) {
+ if (CPU_ISSET(i, &vm->active_cpus))
+ vcpu_notify_event(vm_vcpu(vm, i));
+ }
+
+ return (0);
+}
+
+void
+vm_exit_suspended(struct vcpu *vcpu, uint64_t pc)
+{
+ struct vm *vm = vcpu->vm;
+ struct vm_exit *vmexit;
+
+ KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST,
+ ("vm_exit_suspended: invalid suspend type %d", vm->suspend));
+
+ vmexit = vm_exitinfo(vcpu);
+ vmexit->pc = pc;
+ vmexit->inst_length = 4;
+ vmexit->exitcode = VM_EXITCODE_SUSPENDED;
+ vmexit->u.suspended.how = vm->suspend;
+}
+
+void
+vm_exit_debug(struct vcpu *vcpu, uint64_t pc)
+{
+ struct vm_exit *vmexit;
+
+ vmexit = vm_exitinfo(vcpu);
+ vmexit->pc = pc;
+ vmexit->inst_length = 4;
+ vmexit->exitcode = VM_EXITCODE_DEBUG;
+}
+
+int
+vm_activate_cpu(struct vcpu *vcpu)
+{
+ struct vm *vm = vcpu->vm;
+
+ if (CPU_ISSET(vcpu->vcpuid, &vm->active_cpus))
+ return (EBUSY);
+
+ CPU_SET_ATOMIC(vcpu->vcpuid, &vm->active_cpus);
+ return (0);
+
+}
+
+int
+vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu)
+{
+ if (vcpu == NULL) {
+ vm->debug_cpus = vm->active_cpus;
+ for (int i = 0; i < vm->maxcpus; i++) {
+ if (CPU_ISSET(i, &vm->active_cpus))
+ vcpu_notify_event(vm_vcpu(vm, i));
+ }
+ } else {
+ if (!CPU_ISSET(vcpu->vcpuid, &vm->active_cpus))
+ return (EINVAL);
+
+ CPU_SET_ATOMIC(vcpu->vcpuid, &vm->debug_cpus);
+ vcpu_notify_event(vcpu);
+ }
+ return (0);
+}
+
+int
+vm_resume_cpu(struct vm *vm, struct vcpu *vcpu)
+{
+
+ if (vcpu == NULL) {
+ CPU_ZERO(&vm->debug_cpus);
+ } else {
+ if (!CPU_ISSET(vcpu->vcpuid, &vm->debug_cpus))
+ return (EINVAL);
+
+ CPU_CLR_ATOMIC(vcpu->vcpuid, &vm->debug_cpus);
+ }
+ return (0);
+}
+
+int
+vcpu_debugged(struct vcpu *vcpu)
+{
+
+ return (CPU_ISSET(vcpu->vcpuid, &vcpu->vm->debug_cpus));
+}
+
+cpuset_t
+vm_active_cpus(struct vm *vm)
+{
+
+ return (vm->active_cpus);
+}
+
+cpuset_t
+vm_debug_cpus(struct vm *vm)
+{
+
+ return (vm->debug_cpus);
+}
+
+cpuset_t
+vm_suspended_cpus(struct vm *vm)
+{
+
+ return (vm->suspended_cpus);
+}
+
+
+void *
+vcpu_stats(struct vcpu *vcpu)
+{
+
+ return (vcpu->stats);
+}
+
+/*
+ * This function is called to ensure that a vcpu "sees" a pending event
+ * as soon as possible:
+ * - If the vcpu thread is sleeping then it is woken up.
+ * - If the vcpu is running on a different host_cpu then an IPI will be directed
+ * to the host_cpu to cause the vcpu to trap into the hypervisor.
+ */
+static void
+vcpu_notify_event_locked(struct vcpu *vcpu)
+{
+ int hostcpu;
+
+ hostcpu = vcpu->hostcpu;
+ if (vcpu->state == VCPU_RUNNING) {
+ KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu"));
+ if (hostcpu != curcpu) {
+ ipi_cpu(hostcpu, vmm_ipinum);
+ } else {
+ /*
+ * If the 'vcpu' is running on 'curcpu' then it must
+ * be sending a notification to itself (e.g. SELF_IPI).
+ * The pending event will be picked up when the vcpu
+ * transitions back to guest context.
+ */
+ }
+ } else {
+ KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent "
+ "with hostcpu %d", vcpu->state, hostcpu));
+ if (vcpu->state == VCPU_SLEEPING)
+ wakeup_one(vcpu);
+ }
+}
+
+void
+vcpu_notify_event(struct vcpu *vcpu)
+{
+ vcpu_lock(vcpu);
+ vcpu_notify_event_locked(vcpu);
+ vcpu_unlock(vcpu);
+}
+
+static void
+restore_guest_fpustate(struct vcpu *vcpu)
+{
+
+ /* Flush host state to the pcb. */
+ fpe_state_save(curthread);
+
+ /* Ensure the VFP state will be re-loaded when exiting the guest. */
+ PCPU_SET(fpcurthread, NULL);
+
+ /* restore guest FPU state */
+ fpe_enable();
+ fpe_restore(vcpu->guestfpu);
+
+ /*
+ * The FPU is now "dirty" with the guest's state so turn on emulation
+ * to trap any access to the FPU by the host.
+ */
+ fpe_disable();
+}
+
+static void
+save_guest_fpustate(struct vcpu *vcpu)
+{
+
+ /* Save guest FPE state. */
+ fpe_enable();
+ fpe_store(vcpu->guestfpu);
+ fpe_disable();
+
+ KASSERT(PCPU_GET(fpcurthread) == NULL,
+ ("%s: fpcurthread set with guest registers", __func__));
+}
+
+static int
+vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate,
+ bool from_idle)
+{
+ int error;
+
+ vcpu_assert_locked(vcpu);
+
+ /*
+ * State transitions from the vmmdev_ioctl() must always begin from
+ * the VCPU_IDLE state. This guarantees that there is only a single
+ * ioctl() operating on a vcpu at any point.
+ */
+ if (from_idle) {
+ while (vcpu->state != VCPU_IDLE) {
+ vcpu_notify_event_locked(vcpu);
+ msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat",
+ hz / 1000);
+ }
+ } else {
+ KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from "
+ "vcpu idle state"));
+ }
+
+ if (vcpu->state == VCPU_RUNNING) {
+ KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d "
+ "mismatch for running vcpu", curcpu, vcpu->hostcpu));
+ } else {
+ KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a "
+ "vcpu that is not running", vcpu->hostcpu));
+ }
+
+ /*
+ * The following state transitions are allowed:
+ * IDLE -> FROZEN -> IDLE
+ * FROZEN -> RUNNING -> FROZEN
+ * FROZEN -> SLEEPING -> FROZEN
+ */
+ switch (vcpu->state) {
+ case VCPU_IDLE:
+ case VCPU_RUNNING:
+ case VCPU_SLEEPING:
+ error = (newstate != VCPU_FROZEN);
+ break;
+ case VCPU_FROZEN:
+ error = (newstate == VCPU_FROZEN);
+ break;
+ default:
+ error = 1;
+ break;
+ }
+
+ if (error)
+ return (EBUSY);
+
+ vcpu->state = newstate;
+ if (newstate == VCPU_RUNNING)
+ vcpu->hostcpu = curcpu;
+ else
+ vcpu->hostcpu = NOCPU;
+
+ if (newstate == VCPU_IDLE)
+ wakeup(&vcpu->state);
+
+ return (0);
+}
+
+static void
+vcpu_require_state(struct vcpu *vcpu, enum vcpu_state newstate)
+{
+ int error;
+
+ if ((error = vcpu_set_state(vcpu, newstate, false)) != 0)
+ panic("Error %d setting state to %d\n", error, newstate);
+}
+
+static void
+vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate)
+{
+ int error;
+
+ if ((error = vcpu_set_state_locked(vcpu, newstate, false)) != 0)
+ panic("Error %d setting state to %d", error, newstate);
+}
+
+int
+vm_get_capability(struct vcpu *vcpu, int type, int *retval)
+{
+
+ if (type < 0 || type >= VM_CAP_MAX)
+ return (EINVAL);
+
+ return (vmmops_getcap(vcpu->cookie, type, retval));
+}
+
+int
+vm_set_capability(struct vcpu *vcpu, int type, int val)
+{
+
+ if (type < 0 || type >= VM_CAP_MAX)
+ return (EINVAL);
+
+ return (vmmops_setcap(vcpu->cookie, type, val));
+}
+
+struct vm *
+vcpu_vm(struct vcpu *vcpu)
+{
+
+ return (vcpu->vm);
+}
+
+int
+vcpu_vcpuid(struct vcpu *vcpu)
+{
+
+ return (vcpu->vcpuid);
+}
+
+void *
+vcpu_get_cookie(struct vcpu *vcpu)
+{
+
+ return (vcpu->cookie);
+}
+
+struct vcpu *
+vm_vcpu(struct vm *vm, int vcpuid)
+{
+
+ return (vm->vcpu[vcpuid]);
+}
+
+int
+vcpu_set_state(struct vcpu *vcpu, enum vcpu_state newstate, bool from_idle)
+{
+ int error;
+
+ vcpu_lock(vcpu);
+ error = vcpu_set_state_locked(vcpu, newstate, from_idle);
+ vcpu_unlock(vcpu);
+
+ return (error);
+}
+
+enum vcpu_state
+vcpu_get_state(struct vcpu *vcpu, int *hostcpu)
+{
+ enum vcpu_state state;
+
+ vcpu_lock(vcpu);
+ state = vcpu->state;
+ if (hostcpu != NULL)
+ *hostcpu = vcpu->hostcpu;
+ vcpu_unlock(vcpu);
+
+ return (state);
+}
+
+static void *
+_vm_gpa_hold(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot,
+ void **cookie)
+{
+ int i, count, pageoff;
+ struct mem_map *mm;
+ vm_page_t m;
+
+ pageoff = gpa & PAGE_MASK;
+ if (len > PAGE_SIZE - pageoff)
+ panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len);
+
+ count = 0;
+ for (i = 0; i < VM_MAX_MEMMAPS; i++) {
+ mm = &vm->mem_maps[i];
+ if (sysmem_mapping(vm, mm) && gpa >= mm->gpa &&
+ gpa < mm->gpa + mm->len) {
+ count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map,
+ trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1);
+ break;
+ }
+ }
+
+ if (count == 1) {
+ *cookie = m;
+ return ((void *)(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)) + pageoff));
+ } else {
+ *cookie = NULL;
+ return (NULL);
+ }
+}
+
+void *
+vm_gpa_hold(struct vcpu *vcpu, vm_paddr_t gpa, size_t len, int reqprot,
+ void **cookie)
+{
+#ifdef INVARIANTS
+ /*
+ * The current vcpu should be frozen to ensure 'vm_memmap[]'
+ * stability.
+ */
+ int state = vcpu_get_state(vcpu, NULL);
+ KASSERT(state == VCPU_FROZEN, ("%s: invalid vcpu state %d",
+ __func__, state));
+#endif
+ return (_vm_gpa_hold(vcpu->vm, gpa, len, reqprot, cookie));
+}
+
+void *
+vm_gpa_hold_global(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot,
+ void **cookie)
+{
+ sx_assert(&vm->mem_segs_lock, SX_LOCKED);
+ return (_vm_gpa_hold(vm, gpa, len, reqprot, cookie));
+}
+
+void
+vm_gpa_release(void *cookie)
+{
+ vm_page_t m = cookie;
+
+ vm_page_unwire(m, PQ_ACTIVE);
+}
+
+int
+vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval)
+{
+
+ if (reg >= VM_REG_LAST)
+ return (EINVAL);
+
+ return (vmmops_getreg(vcpu->cookie, reg, retval));
+}
+
+int
+vm_set_register(struct vcpu *vcpu, int reg, uint64_t val)
+{
+ int error;
+
+ if (reg >= VM_REG_LAST)
+ return (EINVAL);
+ error = vmmops_setreg(vcpu->cookie, reg, val);
+ if (error || reg != VM_REG_GUEST_SEPC)
+ return (error);
+
+ vcpu->nextpc = val;
+
+ return (0);
+}
+
+void *
+vm_get_cookie(struct vm *vm)
+{
+
+ return (vm->cookie);
+}
+
+int
+vm_inject_exception(struct vcpu *vcpu, uint64_t scause)
+{
+
+ return (vmmops_exception(vcpu->cookie, scause));
+}
+
+int
+vm_attach_aplic(struct vm *vm, struct vm_aplic_descr *descr)
+{
+
+ return (aplic_attach_to_vm(vm->cookie, descr));
+}
+
+int
+vm_assert_irq(struct vm *vm, uint32_t irq)
+{
+
+ return (aplic_inject_irq(vm->cookie, -1, irq, true));
+}
+
+int
+vm_deassert_irq(struct vm *vm, uint32_t irq)
+{
+
+ return (aplic_inject_irq(vm->cookie, -1, irq, false));
+}
+
+int
+vm_raise_msi(struct vm *vm, uint64_t msg, uint64_t addr, int bus, int slot,
+ int func)
+{
+
+ return (aplic_inject_msi(vm->cookie, msg, addr));
+}
+
+static int
+vm_handle_wfi(struct vcpu *vcpu, struct vm_exit *vme, bool *retu)
+{
+
+ vcpu_lock(vcpu);
+
+ while (1) {
+ if (aplic_check_pending(vcpu->cookie))
+ break;
+
+ if (riscv_check_ipi(vcpu->cookie, false))
+ break;
+
+ if (vcpu_should_yield(vcpu))
+ break;
+
+ vcpu_require_state_locked(vcpu, VCPU_SLEEPING);
+ /*
+ * XXX msleep_spin() cannot be interrupted by signals so
+ * wake up periodically to check pending signals.
+ */
+ msleep_spin(vcpu, &vcpu->mtx, "vmidle", hz / 1000);
+ vcpu_require_state_locked(vcpu, VCPU_FROZEN);
+ }
+ vcpu_unlock(vcpu);
+
+ *retu = false;
+
+ return (0);
+}
+
+static int
+vm_handle_paging(struct vcpu *vcpu, bool *retu)
+{
+ struct vm *vm;
+ struct vm_exit *vme;
+ struct vm_map *map;
+ uint64_t addr;
+ pmap_t pmap;
+ int ftype, rv;
+
+ vm = vcpu->vm;
+ vme = &vcpu->exitinfo;
+
+ pmap = vmspace_pmap(vm->vmspace);
+ addr = (vme->htval << 2) & ~(PAGE_SIZE - 1);
+
+ dprintf("%s: %lx\n", __func__, addr);
+
+ switch (vme->scause) {
+ case SCAUSE_STORE_GUEST_PAGE_FAULT:
+ ftype = VM_PROT_WRITE;
+ break;
+ case SCAUSE_FETCH_GUEST_PAGE_FAULT:
+ ftype = VM_PROT_EXECUTE;
+ break;
+ case SCAUSE_LOAD_GUEST_PAGE_FAULT:
+ ftype = VM_PROT_READ;
+ break;
+ default:
+ panic("unknown page trap: %lu", vme->scause);
+ }
+
+ /* The page exists, but the page table needs to be updated. */
+ if (pmap_fault(pmap, addr, ftype))
+ return (0);
+
+ map = &vm->vmspace->vm_map;
+ rv = vm_fault(map, addr, ftype, VM_FAULT_NORMAL, NULL);
+ if (rv != KERN_SUCCESS) {
+ printf("%s: vm_fault failed, addr %lx, ftype %d, err %d\n",
+ __func__, addr, ftype, rv);
+ return (EFAULT);
+ }
+
+ return (0);
+}
+
+static int
+vm_handle_suspend(struct vcpu *vcpu, bool *retu)
+{
+ struct vm *vm = vcpu->vm;
+ int error, i;
+ struct thread *td;
+
+ error = 0;
+ td = curthread;
+
+ CPU_SET_ATOMIC(vcpu->vcpuid, &vm->suspended_cpus);
+
+ /*
+ * Wait until all 'active_cpus' have suspended themselves.
+ *
+ * Since a VM may be suspended at any time including when one or
+ * more vcpus are doing a rendezvous we need to call the rendezvous
+ * handler while we are waiting to prevent a deadlock.
+ */
+ vcpu_lock(vcpu);
+ while (error == 0) {
+ if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0)
+ break;
+
+ vcpu_require_state_locked(vcpu, VCPU_SLEEPING);
+ msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz);
+ vcpu_require_state_locked(vcpu, VCPU_FROZEN);
+ if (td_ast_pending(td, TDA_SUSPEND)) {
+ vcpu_unlock(vcpu);
+ error = thread_check_susp(td, false);
+ vcpu_lock(vcpu);
+ }
+ }
+ vcpu_unlock(vcpu);
+
+ /*
+ * Wakeup the other sleeping vcpus and return to userspace.
+ */
+ for (i = 0; i < vm->maxcpus; i++) {
+ if (CPU_ISSET(i, &vm->suspended_cpus)) {
+ vcpu_notify_event(vm_vcpu(vm, i));
+ }
+ }
+
+ *retu = true;
+ return (error);
+}
+
+int
+vm_run(struct vcpu *vcpu)
+{
+ struct vm_eventinfo evinfo;
+ struct vm_exit *vme;
+ struct vm *vm;
+ pmap_t pmap;
+ int error;
+ int vcpuid;
+ bool retu;
+
+ vm = vcpu->vm;
+
+ dprintf("%s\n", __func__);
+
+ vcpuid = vcpu->vcpuid;
+
+ if (!CPU_ISSET(vcpuid, &vm->active_cpus))
+ return (EINVAL);
+
+ if (CPU_ISSET(vcpuid, &vm->suspended_cpus))
+ return (EINVAL);
+
+ pmap = vmspace_pmap(vm->vmspace);
+ vme = &vcpu->exitinfo;
+ evinfo.rptr = NULL;
+ evinfo.sptr = &vm->suspend;
+ evinfo.iptr = NULL;
+restart:
+ critical_enter();
+
+ restore_guest_fpustate(vcpu);
+
+ vcpu_require_state(vcpu, VCPU_RUNNING);
+ error = vmmops_run(vcpu->cookie, vcpu->nextpc, pmap, &evinfo);
+ vcpu_require_state(vcpu, VCPU_FROZEN);
+
+ save_guest_fpustate(vcpu);
+
+ critical_exit();
+
+ if (error == 0) {
+ retu = false;
+ switch (vme->exitcode) {
+ case VM_EXITCODE_INST_EMUL:
+ vcpu->nextpc = vme->pc + vme->inst_length;
+ error = vm_handle_inst_emul(vcpu, &retu);
+ break;
+ case VM_EXITCODE_WFI:
+ vcpu->nextpc = vme->pc + vme->inst_length;
+ error = vm_handle_wfi(vcpu, vme, &retu);
+ break;
+ case VM_EXITCODE_ECALL:
+ /* Handle in userland. */
+ vcpu->nextpc = vme->pc + vme->inst_length;
+ retu = true;
+ break;
+ case VM_EXITCODE_PAGING:
+ vcpu->nextpc = vme->pc;
+ error = vm_handle_paging(vcpu, &retu);
+ break;
+ case VM_EXITCODE_BOGUS:
+ vcpu->nextpc = vme->pc;
+ retu = false;
+ error = 0;
+ break;
+ case VM_EXITCODE_SUSPENDED:
+ vcpu->nextpc = vme->pc;
+ error = vm_handle_suspend(vcpu, &retu);
+ break;
+ default:
+ /* Handle in userland. */
+ vcpu->nextpc = vme->pc;
+ retu = true;
+ break;
+ }
+ }
+
+ if (error == 0 && retu == false)
+ goto restart;
+
+ return (error);
+}
diff --git a/sys/riscv/vmm/vmm_aplic.h b/sys/riscv/vmm/vmm_aplic.h
new file mode 100644
--- /dev/null
+++ b/sys/riscv/vmm/vmm_aplic.h
@@ -0,0 +1,54 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 Ruslan Bukin <br@bsdpad.com>
+ *
+ * This software was developed by the University of Cambridge Computer
+ * Laboratory (Department of Computer Science and Technology) under Innovate
+ * UK project 105694, "Digital Security by Design (DSbD) Technology Platform
+ * Prototype".
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _VMM_APLIC_H_
+#define _VMM_APLIC_H_
+
+struct hyp;
+struct hypctx;
+struct vm_aplic_descr;
+
+int aplic_attach_to_vm(struct hyp *hyp, struct vm_aplic_descr *descr);
+void aplic_detach_from_vm(struct hyp *hyp);
+int aplic_inject_irq(struct hyp *hyp, int vcpuid, uint32_t irqid, bool level);
+int aplic_inject_msi(struct hyp *hyp, uint64_t msg, uint64_t addr);
+void aplic_vminit(struct hyp *hyp);
+void aplic_vmcleanup(struct hyp *hyp);
+int aplic_check_pending(struct hypctx *hypctx);
+
+void aplic_cpuinit(struct hypctx *hypctx);
+void aplic_cpucleanup(struct hypctx *hypctx);
+void aplic_flush_hwstate(struct hypctx *hypctx);
+void aplic_sync_hwstate(struct hypctx *hypctx);
+int aplic_max_cpu_count(struct hyp *hyp);
+
+#endif /* !_VMM_APLIC_H_ */
diff --git a/sys/riscv/vmm/vmm_aplic.c b/sys/riscv/vmm/vmm_aplic.c
new file mode 100644
--- /dev/null
+++ b/sys/riscv/vmm/vmm_aplic.c
@@ -0,0 +1,528 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 Ruslan Bukin <br@bsdpad.com>
+ *
+ * This software was developed by the University of Cambridge Computer
+ * Laboratory (Department of Computer Science and Technology) under Innovate
+ * UK project 105694, "Digital Security by Design (DSbD) Technology Platform
+ * Prototype".
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/rman.h>
+#include <sys/smp.h>
+
+#include <riscv/vmm/riscv.h>
+#include <riscv/vmm/vmm_aplic.h>
+
+#include <machine/vmm_instruction_emul.h>
+#include <machine/vmm_dev.h>
+
+MALLOC_DEFINE(M_APLIC, "RISC-V VMM APLIC", "RISC-V AIA APLIC");
+
+#define APLIC_DOMAINCFG 0x0000
+#define DOMAINCFG_IE (1 << 8) /* Interrupt Enable. */
+#define DOMAINCFG_DM (1 << 2) /* Direct Mode. */
+#define DOMAINCFG_BE (1 << 0) /* Big-Endian. */
+#define APLIC_SOURCECFG(x) (0x0004 + ((x) - 1) * 4)
+#define SOURCECFG_D (1 << 10) /* D - Delegate. */
+/* If D == 0. */
+#define SOURCECFG_SM_S (0)
+#define SOURCECFG_SM_M (0x7 << SOURCECFG_SM_S)
+#define SOURCECFG_SM_INACTIVE (0) /* Not delegated. */
+#define SOURCECFG_SM_DETACHED (1)
+#define SOURCECFG_SM_RESERVED (2)
+#define SOURCECFG_SM_RESERVED1 (3)
+#define SOURCECFG_SM_EDGE1 (4) /* Rising edge. */
+#define SOURCECFG_SM_EDGE0 (5) /* Falling edge. */
+#define SOURCECFG_SM_LEVEL1 (6) /* High. */
+#define SOURCECFG_SM_LEVEL0 (7) /* Low. */
+/* If D == 1. */
+#define SOURCECFG_CHILD_INDEX_S (0)
+#define SOURCECFG_CHILD_INDEX_M (0x3ff << SOURCECFG_CHILD_INDEX_S)
+#define APLIC_SETIPNUM 0x1cdc
+#define APLIC_CLRIPNUM 0x1ddc
+#define APLIC_SETIENUM 0x1edc
+#define APLIC_CLRIENUM 0x1fdc
+#define APLIC_GENMSI 0x3000
+#define APLIC_TARGET(x) (0x3004 + ((x) - 1) * 4)
+#define TARGET_HART_S 18
+#define TARGET_HART_M 0x3fff
+#define APLIC_IDC(x) (0x4000 + (x) * 32)
+#define IDC_IDELIVERY(x) (APLIC_IDC(x) + 0x0)
+#define IDC_IFORCE(x) (APLIC_IDC(x) + 0x4)
+#define IDC_ITHRESHOLD(x) (APLIC_IDC(x) + 0x8)
+#define IDC_TOPI(x) (APLIC_IDC(x) + 0x18)
+#define IDC_CLAIMI(x) (APLIC_IDC(x) + 0x1C)
+#define CLAIMI_IRQ_S (16)
+#define CLAIMI_IRQ_M (0x3ff << CLAIMI_IRQ_S)
+#define CLAIMI_PRIO_S (0)
+#define CLAIMI_PRIO_M (0xff << CLAIMI_PRIO_S)
+
+#define APLIC_NIRQS 63
+
+struct aplic_irq {
+ uint32_t sourcecfg;
+ uint32_t state;
+#define APLIC_IRQ_STATE_PENDING (1 << 0)
+#define APLIC_IRQ_STATE_ENABLED (1 << 1)
+ uint32_t target;
+ uint32_t target_hart;
+};
+
+struct aplic {
+ uint32_t mem_start;
+ uint32_t mem_end;
+ struct mtx mtx;
+ struct aplic_irq *irqs;
+ int nirqs;
+ uint32_t domaincfg;
+};
+
+static int
+aplic_handle_sourcecfg(struct aplic *aplic, int i, bool write, uint64_t *val)
+{
+ struct aplic_irq *irq;
+
+ if (i <= 0 || i > aplic->nirqs)
+ return (ENOENT);
+
+ mtx_lock_spin(&aplic->mtx);
+ irq = &aplic->irqs[i];
+ if (write)
+ irq->sourcecfg = *val;
+ else
+ *val = irq->sourcecfg;
+ mtx_unlock_spin(&aplic->mtx);
+
+ return (0);
+}
+
+static int
+aplic_set_enabled(struct aplic *aplic, bool write, uint64_t *val, bool enabled)
+{
+ struct aplic_irq *irq;
+ int i;
+
+ if (!write) {
+ *val = 0;
+ return (0);
+ }
+
+ i = *val;
+ if (i <= 0 || i > aplic->nirqs)
+ return (-1);
+
+ irq = &aplic->irqs[i];
+
+ mtx_lock_spin(&aplic->mtx);
+ if (enabled)
+ irq->state |= APLIC_IRQ_STATE_ENABLED;
+ else
+ irq->state &= ~APLIC_IRQ_STATE_ENABLED;
+ mtx_unlock_spin(&aplic->mtx);
+
+ return (0);
+}
+
+static int
+aplic_handle_target(struct aplic *aplic, int i, bool write, uint64_t *val)
+{
+ struct aplic_irq *irq;
+
+ mtx_lock_spin(&aplic->mtx);
+ irq = &aplic->irqs[i];
+ if (write) {
+ irq->target = *val;
+ irq->target_hart = (irq->target >> TARGET_HART_S);
+ } else
+ *val = irq->target;
+ mtx_unlock_spin(&aplic->mtx);
+
+ return (0);
+}
+
+static int
+aplic_handle_idc_claimi(struct hyp *hyp, struct aplic *aplic, int cpu_id,
+ bool write, uint64_t *val)
+{
+ struct aplic_irq *irq;
+ bool found;
+ int i;
+
+ /* Writes to claimi are ignored. */
+ if (write)
+ return (-1);
+
+ found = false;
+
+ mtx_lock_spin(&aplic->mtx);
+ for (i = 0; i < aplic->nirqs; i++) {
+ irq = &aplic->irqs[i];
+ if (irq->target_hart != cpu_id)
+ continue;
+ if (irq->state & APLIC_IRQ_STATE_PENDING) {
+ *val = (i << CLAIMI_IRQ_S) | (0 << CLAIMI_PRIO_S);
+ irq->state &= ~APLIC_IRQ_STATE_PENDING;
+ found = true;
+ break;
+ }
+ }
+ mtx_unlock_spin(&aplic->mtx);
+
+ if (found == false)
+ *val = 0;
+
+ return (0);
+}
+
+static int
+aplic_handle_idc(struct hyp *hyp, struct aplic *aplic, int cpu, int reg,
+ bool write, uint64_t *val)
+{
+ int error;
+
+ switch (reg + APLIC_IDC(0)) {
+ case IDC_IDELIVERY(0):
+ case IDC_IFORCE(0):
+ case IDC_ITHRESHOLD(0):
+ case IDC_TOPI(0):
+ error = 0;
+ break;
+ case IDC_CLAIMI(0):
+ error = aplic_handle_idc_claimi(hyp, aplic, cpu, write, val);
+ break;
+ default:
+ error = ENOENT;
+ }
+
+ return (error);
+}
+
+static int
+aplic_mmio_access(struct hyp *hyp, struct aplic *aplic, uint64_t reg,
+ bool write, uint64_t *val)
+{
+ int error;
+ int cpu;
+ int r;
+ int i;
+
+ if ((reg >= APLIC_SOURCECFG(1)) &&
+ (reg <= APLIC_SOURCECFG(aplic->nirqs))) {
+ i = ((reg - APLIC_SOURCECFG(1)) >> 2) + 1;
+ error = aplic_handle_sourcecfg(aplic, i, write, val);
+ return (error);
+ }
+
+ if ((reg >= APLIC_TARGET(1)) && (reg <= APLIC_TARGET(aplic->nirqs))) {
+ i = ((reg - APLIC_TARGET(1)) >> 2) + 1;
+ error = aplic_handle_target(aplic, i, write, val);
+ return (error);
+ }
+
+ if ((reg >= APLIC_IDC(0)) && (reg < APLIC_IDC(mp_ncpus))) {
+ cpu = (reg - APLIC_IDC(0)) >> 5;
+ r = (reg - APLIC_IDC(0)) % 32;
+ error = aplic_handle_idc(hyp, aplic, cpu, r, write, val);
+ return (error);
+ }
+
+ switch (reg) {
+ case APLIC_DOMAINCFG:
+ aplic->domaincfg = *val & DOMAINCFG_IE;
+ error = 0;
+ break;
+ case APLIC_SETIENUM:
+ error = aplic_set_enabled(aplic, write, val, true);
+ break;
+ case APLIC_CLRIENUM:
+ error = aplic_set_enabled(aplic, write, val, false);
+ break;
+ default:
+ dprintf("%s: unknown reg %lx", __func__, reg);
+ error = ENOENT;
+ break;
+ };
+
+ return (error);
+}
+
+static int
+mem_read(struct vcpu *vcpu, uint64_t fault_ipa, uint64_t *rval, int size,
+ void *arg)
+{
+ struct hypctx *hypctx;
+ struct hyp *hyp;
+ struct aplic *aplic;
+ uint64_t reg;
+ uint64_t val;
+ int error;
+
+ hypctx = vcpu_get_cookie(vcpu);
+ hyp = hypctx->hyp;
+ aplic = hyp->aplic;
+
+ dprintf("%s: fault_ipa %lx size %d\n", __func__, fault_ipa, size);
+
+ if (fault_ipa < aplic->mem_start || fault_ipa + size > aplic->mem_end)
+ return (EINVAL);
+
+ reg = fault_ipa - aplic->mem_start;
+
+ error = aplic_mmio_access(hyp, aplic, reg, false, &val);
+ if (error == 0)
+ *rval = val;
+
+ return (error);
+}
+
+static int
+mem_write(struct vcpu *vcpu, uint64_t fault_ipa, uint64_t wval, int size,
+ void *arg)
+{
+ struct hypctx *hypctx;
+ struct hyp *hyp;
+ struct aplic *aplic;
+ uint64_t reg;
+ uint64_t val;
+ int error;
+
+ hypctx = vcpu_get_cookie(vcpu);
+ hyp = hypctx->hyp;
+ aplic = hyp->aplic;
+
+ dprintf("%s: fault_ipa %lx wval %lx size %d\n", __func__, fault_ipa,
+ wval, size);
+
+ if (fault_ipa < aplic->mem_start || fault_ipa + size > aplic->mem_end)
+ return (EINVAL);
+
+ reg = fault_ipa - aplic->mem_start;
+
+ val = wval;
+
+ error = aplic_mmio_access(hyp, aplic, reg, true, &val);
+
+ return (error);
+}
+
+void
+aplic_vminit(struct hyp *hyp)
+{
+ struct aplic *aplic;
+
+ hyp->aplic = malloc(sizeof(*hyp->aplic), M_APLIC,
+ M_WAITOK | M_ZERO);
+ aplic = hyp->aplic;
+
+ mtx_init(&aplic->mtx, "APLIC lock", NULL, MTX_SPIN);
+}
+
+void
+aplic_vmcleanup(struct hyp *hyp)
+{
+ struct aplic *aplic;
+
+ aplic = hyp->aplic;
+
+ mtx_destroy(&aplic->mtx);
+
+ free(hyp->aplic, M_APLIC);
+}
+
+int
+aplic_attach_to_vm(struct hyp *hyp, struct vm_aplic_descr *descr)
+{
+ struct aplic *aplic;
+ struct vm *vm;
+
+ vm = hyp->vm;
+
+ dprintf("%s\n", __func__);
+
+ vm_register_inst_handler(vm, descr->mem_start, descr->mem_size,
+ mem_read, mem_write);
+
+ aplic = hyp->aplic;
+ aplic->nirqs = APLIC_NIRQS;
+ aplic->mem_start = descr->mem_start;
+ aplic->mem_end = descr->mem_start + descr->mem_size;
+ aplic->irqs = malloc(sizeof(struct aplic_irq) * aplic->nirqs, M_APLIC,
+ M_WAITOK | M_ZERO);
+
+ hyp->aplic_attached = true;
+
+ return (0);
+}
+
+void
+aplic_detach_from_vm(struct hyp *hyp)
+{
+ struct aplic *aplic;
+
+ aplic = hyp->aplic;
+
+ dprintf("%s\n", __func__);
+
+ if (hyp->aplic_attached) {
+ hyp->aplic_attached = false;
+ free(aplic->irqs, M_APLIC);
+ }
+}
+
+int
+aplic_check_pending(struct hypctx *hypctx)
+{
+ struct aplic_irq *irq;
+ struct aplic *aplic;
+ struct hyp *hyp;
+ int i;
+
+ hyp = hypctx->hyp;
+ aplic = hyp->aplic;
+
+ mtx_lock_spin(&aplic->mtx);
+ if ((aplic->domaincfg & DOMAINCFG_IE) == 0) {
+ mtx_unlock_spin(&aplic->mtx);
+ return (0);
+ }
+
+ for (i = 0; i < aplic->nirqs; i++) {
+ irq = &aplic->irqs[i];
+ if (irq->target_hart != hypctx->cpu_id)
+ continue;
+ if ((irq->state & APLIC_IRQ_STATE_ENABLED) &&
+ (irq->state & APLIC_IRQ_STATE_PENDING)) {
+ mtx_unlock_spin(&aplic->mtx);
+ /* Found. */
+ return (1);
+ }
+ }
+ mtx_unlock_spin(&aplic->mtx);
+
+ return (0);
+}
+
+int
+aplic_inject_irq(struct hyp *hyp, int vcpuid, uint32_t irqid, bool level)
+{
+ struct aplic_irq *irq;
+ struct aplic *aplic;
+ bool notify;
+ int error;
+
+ aplic = hyp->aplic;
+
+ error = 0;
+
+ mtx_lock_spin(&aplic->mtx);
+ if ((aplic->domaincfg & DOMAINCFG_IE) == 0) {
+ mtx_unlock_spin(&aplic->mtx);
+ return (error);
+ }
+
+ irq = &aplic->irqs[irqid];
+ if (irq->sourcecfg & SOURCECFG_D) {
+ mtx_unlock_spin(&aplic->mtx);
+ return (error);
+ }
+
+ notify = false;
+ switch (irq->sourcecfg & SOURCECFG_SM_M) {
+ case SOURCECFG_SM_EDGE1:
+ if (level) {
+ irq->state |= APLIC_IRQ_STATE_PENDING;
+ if (irq->state & APLIC_IRQ_STATE_ENABLED)
+ notify = true;
+ } else
+ irq->state &= ~APLIC_IRQ_STATE_PENDING;
+ break;
+ case SOURCECFG_SM_DETACHED:
+ break;
+ default:
+ /* TODO. */
+ dprintf("sourcecfg %d\n", irq->sourcecfg & SOURCECFG_SM_M);
+ error = ENXIO;
+ break;
+ }
+ mtx_unlock_spin(&aplic->mtx);
+
+ if (notify)
+ vcpu_notify_event(vm_vcpu(hyp->vm, irq->target_hart));
+
+ return (error);
+}
+
+int
+aplic_inject_msi(struct hyp *hyp, uint64_t msg, uint64_t addr)
+{
+
+ /* TODO. */
+
+ return (ENXIO);
+}
+
+void
+aplic_cpuinit(struct hypctx *hypctx)
+{
+
+}
+
+void
+aplic_cpucleanup(struct hypctx *hypctx)
+{
+
+}
+
+void
+aplic_flush_hwstate(struct hypctx *hypctx)
+{
+
+}
+
+void
+aplic_sync_hwstate(struct hypctx *hypctx)
+{
+
+}
+
+int
+aplic_max_cpu_count(struct hyp *hyp)
+{
+ int16_t max_count;
+
+ max_count = vm_get_maxcpus(hyp->vm);
+
+ return (max_count);
+}
diff --git a/sys/riscv/vmm/vmm_dev_machdep.c b/sys/riscv/vmm/vmm_dev_machdep.c
new file mode 100644
--- /dev/null
+++ b/sys/riscv/vmm/vmm_dev_machdep.c
@@ -0,0 +1,126 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2011 NetApp, Inc.
+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/conf.h>
+#include <sys/libkern.h>
+#include <sys/ioccom.h>
+#include <sys/mman.h>
+#include <sys/uio.h>
+#include <sys/proc.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+
+#include <machine/machdep.h>
+#include <machine/vmparam.h>
+#include <machine/vmm.h>
+
+#include <dev/vmm/vmm_dev.h>
+
+#include "vmm_aplic.h"
+
+const struct vmmdev_ioctl vmmdev_machdep_ioctls[] = {
+ VMMDEV_IOCTL(VM_RUN, VMMDEV_IOCTL_LOCK_ONE_VCPU),
+ VMMDEV_IOCTL(VM_INJECT_EXCEPTION, VMMDEV_IOCTL_LOCK_ONE_VCPU),
+ VMMDEV_IOCTL(VM_GLA2GPA_NOFAULT, VMMDEV_IOCTL_LOCK_ONE_VCPU),
+
+ VMMDEV_IOCTL(VM_ATTACH_APLIC,
+ VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
+
+ VMMDEV_IOCTL(VM_RAISE_MSI, 0),
+ VMMDEV_IOCTL(VM_ASSERT_IRQ, 0),
+ VMMDEV_IOCTL(VM_DEASSERT_IRQ, 0),
+};
+const size_t vmmdev_machdep_ioctl_count = nitems(vmmdev_machdep_ioctls);
+
+int
+vmmdev_machdep_ioctl(struct vm *vm, struct vcpu *vcpu, u_long cmd, caddr_t data,
+ int fflag, struct thread *td)
+{
+ struct vm_run *vmrun;
+ struct vm_aplic_descr *aplic;
+ struct vm_irq *vi;
+ struct vm_exception *vmexc;
+ struct vm_gla2gpa *gg;
+ struct vm_msi *vmsi;
+ int error;
+
+ error = 0;
+ switch (cmd) {
+ case VM_RUN: {
+ struct vm_exit *vme;
+
+ vmrun = (struct vm_run *)data;
+ vme = vm_exitinfo(vcpu);
+
+ error = vm_run(vcpu);
+ if (error != 0)
+ break;
+
+ error = copyout(vme, vmrun->vm_exit, sizeof(*vme));
+ break;
+ }
+ case VM_INJECT_EXCEPTION:
+ vmexc = (struct vm_exception *)data;
+ error = vm_inject_exception(vcpu, vmexc->scause);
+ break;
+ case VM_GLA2GPA_NOFAULT:
+ gg = (struct vm_gla2gpa *)data;
+ error = vm_gla2gpa_nofault(vcpu, &gg->paging, gg->gla,
+ gg->prot, &gg->gpa, &gg->fault);
+ KASSERT(error == 0 || error == EFAULT,
+ ("%s: vm_gla2gpa unknown error %d", __func__, error));
+ break;
+ case VM_ATTACH_APLIC:
+ aplic = (struct vm_aplic_descr *)data;
+ error = vm_attach_aplic(vm, aplic);
+ break;
+ case VM_RAISE_MSI:
+ vmsi = (struct vm_msi *)data;
+ error = vm_raise_msi(vm, vmsi->msg, vmsi->addr, vmsi->bus,
+ vmsi->slot, vmsi->func);
+ break;
+ case VM_ASSERT_IRQ:
+ vi = (struct vm_irq *)data;
+ error = vm_assert_irq(vm, vi->irq);
+ break;
+ case VM_DEASSERT_IRQ:
+ vi = (struct vm_irq *)data;
+ error = vm_deassert_irq(vm, vi->irq);
+ break;
+ default:
+ error = ENOTTY;
+ break;
+ }
+
+ return (error);
+}
diff --git a/sys/riscv/vmm/vmm_instruction_emul.c b/sys/riscv/vmm/vmm_instruction_emul.c
new file mode 100644
--- /dev/null
+++ b/sys/riscv/vmm/vmm_instruction_emul.c
@@ -0,0 +1,109 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2015 Mihai Carabas <mihai.carabas@gmail.com>
+ * Copyright (c) 2024 Ruslan Bukin <br@bsdpad.com>
+ *
+ * This software was developed by the University of Cambridge Computer
+ * Laboratory (Department of Computer Science and Technology) under Innovate
+ * UK project 105694, "Digital Security by Design (DSbD) Technology Platform
+ * Prototype".
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifdef _KERNEL
+#include <sys/param.h>
+#include <sys/pcpu.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+
+#include <vm/vm.h>
+
+#include <machine/machdep.h>
+#include <machine/vmm.h>
+#else
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/_iovec.h>
+
+#include <machine/vmm.h>
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <vmmapi.h>
+#endif
+
+#include <machine/vmm_instruction_emul.h>
+
+int
+vmm_emulate_instruction(struct vcpu *vcpu, uint64_t gpa, struct vie *vie,
+ struct vm_guest_paging *paging __unused, mem_region_read_t memread,
+ mem_region_write_t memwrite, void *memarg)
+{
+ uint64_t val;
+ int error;
+
+ if (vie->dir == VM_DIR_READ) {
+ error = memread(vcpu, gpa, &val, vie->access_size, memarg);
+ if (error)
+ goto out;
+ if ((vie->sign_extend == 0) && (vie->access_size < 8))
+ val &= (1ul << (vie->access_size * 8)) - 1;
+ error = vm_set_register(vcpu, vie->reg, val);
+ } else {
+ error = vm_get_register(vcpu, vie->reg, &val);
+ if (error)
+ goto out;
+ /* Mask any unneeded bits from the register */
+ if (vie->access_size < 8)
+ val &= (1ul << (vie->access_size * 8)) - 1;
+ error = memwrite(vcpu, gpa, val, vie->access_size, memarg);
+ }
+
+out:
+ return (error);
+}
+
+int
+vmm_emulate_register(struct vcpu *vcpu, struct vre *vre, reg_read_t regread,
+ reg_write_t regwrite, void *regarg)
+{
+ uint64_t val;
+ int error;
+
+ if (vre->dir == VM_DIR_READ) {
+ error = regread(vcpu, &val, regarg);
+ if (error)
+ goto out;
+ error = vm_set_register(vcpu, vre->reg, val);
+ } else {
+ error = vm_get_register(vcpu, vre->reg, &val);
+ if (error)
+ goto out;
+ error = regwrite(vcpu, val, regarg);
+ }
+
+out:
+ return (error);
+}
diff --git a/sys/riscv/vmm/vmm_riscv.c b/sys/riscv/vmm/vmm_riscv.c
new file mode 100644
--- /dev/null
+++ b/sys/riscv/vmm/vmm_riscv.c
@@ -0,0 +1,922 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 Ruslan Bukin <br@bsdpad.com>
+ *
+ * This software was developed by the University of Cambridge Computer
+ * Laboratory (Department of Computer Science and Technology) under Innovate
+ * UK project 105694, "Digital Security by Design (DSbD) Technology Platform
+ * Prototype".
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/smp.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/mman.h>
+#include <sys/pcpu.h>
+#include <sys/proc.h>
+#include <sys/rman.h>
+#include <sys/sysctl.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/vmem.h>
+#include <sys/bus.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_map.h>
+#include <vm/vm_page.h>
+#include <vm/vm_param.h>
+
+#include <machine/md_var.h>
+#include <machine/riscvreg.h>
+#include <machine/vm.h>
+#include <machine/cpufunc.h>
+#include <machine/cpu.h>
+#include <machine/machdep.h>
+#include <machine/vmm.h>
+#include <machine/vmm_dev.h>
+#include <machine/atomic.h>
+#include <machine/pmap.h>
+#include <machine/intr.h>
+#include <machine/encoding.h>
+#include <machine/db_machdep.h>
+
+#include "riscv.h"
+#include "vmm_aplic.h"
+#include "vmm_stat.h"
+
+MALLOC_DEFINE(M_HYP, "RISC-V VMM HYP", "RISC-V VMM HYP");
+
+DPCPU_DEFINE_STATIC(struct hypctx *, vcpu);
+
+static int
+m_op(uint32_t insn, int match, int mask)
+{
+
+ if (((insn ^ match) & mask) == 0)
+ return (1);
+
+ return (0);
+}
+
+static inline void
+riscv_set_active_vcpu(struct hypctx *hypctx)
+{
+
+ DPCPU_SET(vcpu, hypctx);
+}
+
+struct hypctx *
+riscv_get_active_vcpu(void)
+{
+
+ return (DPCPU_GET(vcpu));
+}
+
+int
+vmmops_modinit(void)
+{
+
+ if (!has_hyp) {
+ printf("vmm: riscv hart doesn't support H-extension.\n");
+ return (ENXIO);
+ }
+
+ if (!has_sstc) {
+ printf("vmm: riscv hart doesn't support SSTC extension.\n");
+ return (ENXIO);
+ }
+
+ return (0);
+}
+
+int
+vmmops_modcleanup(void)
+{
+
+ return (0);
+}
+
+void *
+vmmops_init(struct vm *vm, pmap_t pmap)
+{
+ struct hyp *hyp;
+ vm_size_t size;
+
+ size = round_page(sizeof(struct hyp) +
+ sizeof(struct hypctx *) * vm_get_maxcpus(vm));
+ hyp = malloc_aligned(size, PAGE_SIZE, M_HYP, M_WAITOK | M_ZERO);
+ hyp->vm = vm;
+ hyp->aplic_attached = false;
+
+ aplic_vminit(hyp);
+
+ return (hyp);
+}
+
+static void
+vmmops_delegate(void)
+{
+ uint64_t hedeleg;
+ uint64_t hideleg;
+
+ hedeleg = (1UL << SCAUSE_INST_MISALIGNED);
+ hedeleg |= (1UL << SCAUSE_ILLEGAL_INSTRUCTION);
+ hedeleg |= (1UL << SCAUSE_BREAKPOINT);
+ hedeleg |= (1UL << SCAUSE_ECALL_USER);
+ hedeleg |= (1UL << SCAUSE_INST_PAGE_FAULT);
+ hedeleg |= (1UL << SCAUSE_LOAD_PAGE_FAULT);
+ hedeleg |= (1UL << SCAUSE_STORE_PAGE_FAULT);
+ csr_write(hedeleg, hedeleg);
+
+ hideleg = (1UL << IRQ_SOFTWARE_HYPERVISOR);
+ hideleg |= (1UL << IRQ_TIMER_HYPERVISOR);
+ hideleg |= (1UL << IRQ_EXTERNAL_HYPERVISOR);
+ csr_write(hideleg, hideleg);
+}
+
+static void
+vmmops_vcpu_restore_csrs(struct hypctx *hypctx)
+{
+ struct hypcsr *csrs;
+
+ csrs = &hypctx->guest_csrs;
+
+ csr_write(vsstatus, csrs->vsstatus);
+ csr_write(vsie, csrs->vsie);
+ csr_write(vstvec, csrs->vstvec);
+ csr_write(vsscratch, csrs->vsscratch);
+ csr_write(vsepc, csrs->vsepc);
+ csr_write(vscause, csrs->vscause);
+ csr_write(vstval, csrs->vstval);
+ csr_write(hvip, csrs->hvip);
+ csr_write(vsatp, csrs->vsatp);
+}
+
+static void
+vmmops_vcpu_save_csrs(struct hypctx *hypctx)
+{
+ struct hypcsr *csrs;
+
+ csrs = &hypctx->guest_csrs;
+
+ csrs->vsstatus = csr_read(vsstatus);
+ csrs->vsie = csr_read(vsie);
+ csrs->vstvec = csr_read(vstvec);
+ csrs->vsscratch = csr_read(vsscratch);
+ csrs->vsepc = csr_read(vsepc);
+ csrs->vscause = csr_read(vscause);
+ csrs->vstval = csr_read(vstval);
+ csrs->hvip = csr_read(hvip);
+ csrs->vsatp = csr_read(vsatp);
+}
+
+void *
+vmmops_vcpu_init(void *vmi, struct vcpu *vcpu1, int vcpuid)
+{
+ struct hypctx *hypctx;
+ struct hyp *hyp;
+ vm_size_t size;
+
+ hyp = vmi;
+
+ dprintf("%s: hyp %p\n", __func__, hyp);
+
+ KASSERT(vcpuid >= 0 && vcpuid < vm_get_maxcpus(hyp->vm),
+ ("%s: Invalid vcpuid %d", __func__, vcpuid));
+
+ size = round_page(sizeof(struct hypctx));
+
+ hypctx = malloc_aligned(size, PAGE_SIZE, M_HYP, M_WAITOK | M_ZERO);
+ hypctx->hyp = hyp;
+ hypctx->vcpu = vcpu1;
+ hypctx->guest_scounteren = HCOUNTEREN_CY | HCOUNTEREN_TM;
+
+ /* sstatus */
+ hypctx->guest_regs.hyp_sstatus = SSTATUS_SPP | SSTATUS_SPIE;
+ hypctx->guest_regs.hyp_sstatus |= SSTATUS_FS_INITIAL;
+
+ /* hstatus */
+ hypctx->guest_regs.hyp_hstatus = HSTATUS_SPV | HSTATUS_VTW;
+ hypctx->guest_regs.hyp_hstatus |= HSTATUS_SPVP;
+
+ hypctx->cpu_id = vcpuid;
+ hyp->ctx[vcpuid] = hypctx;
+
+ aplic_cpuinit(hypctx);
+
+ return (hypctx);
+}
+
+static int
+riscv_vmm_pinit(pmap_t pmap)
+{
+
+ dprintf("%s: pmap %p\n", __func__, pmap);
+
+ pmap_pinit_stage(pmap, PM_STAGE2);
+
+ return (1);
+}
+
+struct vmspace *
+vmmops_vmspace_alloc(vm_offset_t min, vm_offset_t max)
+{
+
+ return (vmspace_alloc(min, max, riscv_vmm_pinit));
+}
+
+void
+vmmops_vmspace_free(struct vmspace *vmspace)
+{
+
+ pmap_remove_pages(vmspace_pmap(vmspace));
+ vmspace_free(vmspace);
+}
+
+static void
+riscv_unpriv_read(struct hypctx *hypctx, uintptr_t guest_addr, uint64_t *data,
+ struct hyptrap *trap)
+{
+ register struct hyptrap * htrap asm("a0");
+ uintptr_t old_hstatus;
+ uintptr_t old_stvec;
+ uintptr_t entry;
+ uint64_t val;
+ uint64_t tmp;
+ int intr;
+
+ entry = (uintptr_t)&vmm_unpriv_trap;
+ htrap = trap;
+
+ intr = intr_disable();
+
+ old_hstatus = csr_swap(hstatus, hypctx->guest_regs.hyp_hstatus);
+ /*
+ * Setup a temporary exception vector, so that if hlvx.hu raises
+ * an exception we catch it in the vmm_unpriv_trap().
+ */
+ old_stvec = csr_swap(stvec, entry);
+
+ /*
+ * Read first two bytes of instruction assuming it could be a
+ * compressed one.
+ */
+ __asm __volatile(".option push\n"
+ ".option norvc\n"
+ "hlvx.hu %[val], (%[addr])\n"
+ ".option pop\n"
+ : [val] "=r" (val)
+ : [addr] "r" (guest_addr), "r" (htrap)
+ : "a1", "memory");
+
+ /*
+ * Check if previous hlvx.hu did not raise an exception, and then
+ * read the rest of instruction if it is a full-length one.
+ */
+ if (trap->scause == -1 && (val & 0x3) == 0x3) {
+ guest_addr += 2;
+ __asm __volatile(".option push\n"
+ ".option norvc\n"
+ "hlvx.hu %[tmp], (%[addr])\n"
+ ".option pop\n"
+ : [tmp] "=r" (tmp)
+ : [addr] "r" (guest_addr), "r" (htrap)
+ : "a1", "memory");
+ val |= (tmp << 16);
+ }
+
+ csr_write(hstatus, old_hstatus);
+ csr_write(stvec, old_stvec);
+
+ intr_restore(intr);
+
+ *data = val;
+}
+
+static int
+riscv_gen_inst_emul_data(struct hypctx *hypctx, struct vm_exit *vme_ret,
+ struct hyptrap *trap)
+{
+ uintptr_t guest_addr;
+ struct vie *vie;
+ uint64_t insn;
+ int reg_num;
+ int rs2, rd;
+ int direction;
+ int sign_extend;
+ int access_size;
+
+ guest_addr = vme_ret->sepc;
+
+ KASSERT(vme_ret->scause == SCAUSE_FETCH_GUEST_PAGE_FAULT ||
+ vme_ret->scause == SCAUSE_LOAD_GUEST_PAGE_FAULT ||
+ vme_ret->scause == SCAUSE_STORE_GUEST_PAGE_FAULT,
+ ("Invalid scause"));
+
+ direction = vme_ret->scause == SCAUSE_STORE_GUEST_PAGE_FAULT ?
+ VM_DIR_WRITE : VM_DIR_READ;
+
+ sign_extend = 1;
+
+ bzero(trap, sizeof(struct hyptrap));
+ trap->scause = -1;
+ riscv_unpriv_read(hypctx, guest_addr, &insn, trap);
+ if (trap->scause != -1)
+ return (-1);
+
+ if ((insn & 0x3) == 0x3) {
+ rs2 = (insn & RS2_MASK) >> RS2_SHIFT;
+ rd = (insn & RD_MASK) >> RD_SHIFT;
+
+ if (direction == VM_DIR_WRITE) {
+ if (m_op(insn, MATCH_SB, MASK_SB))
+ access_size = 1;
+ else if (m_op(insn, MATCH_SH, MASK_SH))
+ access_size = 2;
+ else if (m_op(insn, MATCH_SW, MASK_SW))
+ access_size = 4;
+ else if (m_op(insn, MATCH_SD, MASK_SD))
+ access_size = 8;
+ else {
+ printf("unknown store instr at %lx",
+ guest_addr);
+ return (-2);
+ }
+ reg_num = rs2;
+ } else {
+ if (m_op(insn, MATCH_LB, MASK_LB))
+ access_size = 1;
+ else if (m_op(insn, MATCH_LH, MASK_LH))
+ access_size = 2;
+ else if (m_op(insn, MATCH_LW, MASK_LW))
+ access_size = 4;
+ else if (m_op(insn, MATCH_LD, MASK_LD))
+ access_size = 8;
+ else if (m_op(insn, MATCH_LBU, MASK_LBU)) {
+ access_size = 1;
+ sign_extend = 0;
+ } else if (m_op(insn, MATCH_LHU, MASK_LHU)) {
+ access_size = 2;
+ sign_extend = 0;
+ } else if (m_op(insn, MATCH_LWU, MASK_LWU)) {
+ access_size = 4;
+ sign_extend = 0;
+ } else {
+ printf("unknown load instr at %lx",
+ guest_addr);
+ return (-3);
+ }
+ reg_num = rd;
+ }
+ vme_ret->inst_length = 4;
+ } else {
+ rs2 = (insn >> 7) & 0x7;
+ rs2 += 0x8;
+ rd = (insn >> 2) & 0x7;
+ rd += 0x8;
+
+ if (direction == VM_DIR_WRITE) {
+ if (m_op(insn, MATCH_C_SW, MASK_C_SW))
+ access_size = 4;
+ else if (m_op(insn, MATCH_C_SD, MASK_C_SD))
+ access_size = 8;
+ else {
+ printf("unknown compressed store instr at %lx",
+ guest_addr);
+ return (-4);
+ }
+ } else {
+ if (m_op(insn, MATCH_C_LW, MASK_C_LW))
+ access_size = 4;
+ else if (m_op(insn, MATCH_C_LD, MASK_C_LD))
+ access_size = 8;
+ else {
+ printf("unknown load instr at %lx", guest_addr);
+ return (-5);
+ }
+ }
+ reg_num = rd;
+ vme_ret->inst_length = 2;
+ }
+
+ vme_ret->u.inst_emul.gpa = (vme_ret->htval << 2) |
+ (vme_ret->stval & 0x3);
+
+ dprintf("guest_addr %lx insn %lx, reg %d, gpa %lx\n", guest_addr, insn,
+ reg_num, vme_ret->u.inst_emul.gpa);
+
+ vie = &vme_ret->u.inst_emul.vie;
+ vie->dir = direction;
+ vie->reg = reg_num;
+ vie->sign_extend = sign_extend;
+ vie->access_size = access_size;
+
+ return (0);
+}
+
+static bool
+riscv_handle_world_switch(struct hypctx *hypctx, struct vm_exit *vme,
+ pmap_t pmap)
+{
+ struct hyptrap trap;
+ uint64_t insn;
+ uint64_t gpa;
+ bool handled;
+ bool retu;
+ int ret;
+ int i;
+
+ handled = false;
+
+ if (vme->scause & SCAUSE_INTR) {
+ /*
+ * Host interrupt? Leave critical section to handle.
+ */
+ vmm_stat_incr(hypctx->vcpu, VMEXIT_IRQ, 1);
+ vme->exitcode = VM_EXITCODE_BOGUS;
+ vme->inst_length = 0;
+ return (handled);
+ }
+
+ switch (vme->scause) {
+ case SCAUSE_FETCH_GUEST_PAGE_FAULT:
+ case SCAUSE_LOAD_GUEST_PAGE_FAULT:
+ case SCAUSE_STORE_GUEST_PAGE_FAULT:
+ gpa = (vme->htval << 2) | (vme->stval & 0x3);
+ if (vm_mem_allocated(hypctx->vcpu, gpa)) {
+ vme->exitcode = VM_EXITCODE_PAGING;
+ vme->inst_length = 0;
+ vme->u.paging.gpa = gpa;
+ } else {
+ ret = riscv_gen_inst_emul_data(hypctx, vme, &trap);
+ if (ret != 0) {
+ vme->exitcode = VM_EXITCODE_HYP;
+ vme->u.hyp.scause = trap.scause;
+ break;
+ }
+ vme->exitcode = VM_EXITCODE_INST_EMUL;
+ }
+ break;
+ case SCAUSE_ILLEGAL_INSTRUCTION:
+ /*
+ * TODO: handle illegal instruction properly.
+ */
+ printf("%s: Illegal instruction at %lx stval 0x%lx htval "
+ "0x%lx\n", __func__, vme->sepc, vme->stval, vme->htval);
+ vmm_stat_incr(hypctx->vcpu, VMEXIT_UNHANDLED, 1);
+ vme->exitcode = VM_EXITCODE_BOGUS;
+ handled = false;
+ break;
+ case SCAUSE_VIRTUAL_SUPERVISOR_ECALL:
+ retu = false;
+ vmm_sbi_ecall(hypctx->vcpu, &retu);
+ if (retu == false) {
+ handled = true;
+ break;
+ }
+ for (i = 0; i < nitems(vme->u.ecall.args); i++)
+ vme->u.ecall.args[i] = hypctx->guest_regs.hyp_a[i];
+ vme->exitcode = VM_EXITCODE_ECALL;
+ handled = false;
+ break;
+ case SCAUSE_VIRTUAL_INSTRUCTION:
+ insn = vme->stval;
+ if (m_op(insn, MATCH_WFI, MASK_WFI))
+ vme->exitcode = VM_EXITCODE_WFI;
+ else
+ vme->exitcode = VM_EXITCODE_BOGUS;
+ handled = false;
+ break;
+ default:
+ printf("unknown scause %lx\n", vme->scause);
+ vmm_stat_incr(hypctx->vcpu, VMEXIT_UNHANDLED, 1);
+ vme->exitcode = VM_EXITCODE_BOGUS;
+ handled = false;
+ break;
+ }
+
+ return (handled);
+}
+
+int
+vmmops_gla2gpa(void *vcpui, struct vm_guest_paging *paging, uint64_t gla,
+ int prot, uint64_t *gpa, int *is_fault)
+{
+
+ /* Implement me. */
+
+ return (ENOSYS);
+}
+
+void
+riscv_send_ipi(struct hypctx *hypctx, int hart_id)
+{
+ struct hyp *hyp;
+ struct vm *vm;
+
+ hyp = hypctx->hyp;
+ vm = hyp->vm;
+
+ atomic_set_32(&hypctx->ipi_pending, 1);
+
+ vcpu_notify_event(vm_vcpu(vm, hart_id));
+}
+
+int
+riscv_check_ipi(struct hypctx *hypctx, bool clear)
+{
+ int val;
+
+ if (clear)
+ val = atomic_swap_32(&hypctx->ipi_pending, 0);
+ else
+ val = hypctx->ipi_pending;
+
+ return (val);
+}
+
+static void
+riscv_sync_interrupts(struct hypctx *hypctx)
+{
+ int pending;
+
+ pending = aplic_check_pending(hypctx);
+
+ if (pending)
+ hypctx->guest_csrs.hvip |= HVIP_VSEIP;
+ else
+ hypctx->guest_csrs.hvip &= ~HVIP_VSEIP;
+
+ csr_write(hvip, hypctx->guest_csrs.hvip);
+}
+
+static void
+riscv_sync_ipi(struct hypctx *hypctx)
+{
+
+ /* Guest clears VSSIP bit manually. */
+ if (riscv_check_ipi(hypctx, true))
+ hypctx->guest_csrs.hvip |= HVIP_VSSIP;
+
+ csr_write(hvip, hypctx->guest_csrs.hvip);
+}
+
+int
+vmmops_run(void *vcpui, register_t pc, pmap_t pmap, struct vm_eventinfo *evinfo)
+{
+ struct hypctx *hypctx;
+ struct vm_exit *vme;
+ struct vcpu *vcpu;
+ register_t val;
+ bool handled;
+
+ hypctx = (struct hypctx *)vcpui;
+ vcpu = hypctx->vcpu;
+ vme = vm_exitinfo(vcpu);
+
+ hypctx->guest_regs.hyp_sepc = (uint64_t)pc;
+
+ vmmops_delegate();
+
+ /*
+ * From The RISC-V Instruction Set Manual
+ * Volume II: RISC-V Privileged Architectures
+ *
+ * If the new virtual machine's guest physical page tables
+ * have been modified, it may be necessary to execute an HFENCE.GVMA
+ * instruction (see Section 5.3.2) before or after writing hgatp.
+ */
+ __asm __volatile("hfence.gvma" ::: "memory");
+
+ csr_write(hgatp, pmap->pm_satp);
+ csr_write(henvcfg, HENVCFG_STCE);
+ csr_write(hie, HIE_VSEIE | HIE_VSSIE | HIE_SGEIE);
+ /* TODO: should we trap rdcycle / rdtime? */
+ csr_write(hcounteren, HCOUNTEREN_CY | HCOUNTEREN_TM);
+
+ vmmops_vcpu_restore_csrs(hypctx);
+
+ for (;;) {
+ dprintf("%s: pc %lx\n", __func__, pc);
+
+ if (hypctx->has_exception) {
+ hypctx->has_exception = false;
+ /*
+ * TODO: implement exception injection.
+ */
+ }
+
+ val = intr_disable();
+
+ /* Check if the vcpu is suspended */
+ if (vcpu_suspended(evinfo)) {
+ intr_restore(val);
+ vm_exit_suspended(vcpu, pc);
+ break;
+ }
+
+ if (vcpu_debugged(vcpu)) {
+ intr_restore(val);
+ vm_exit_debug(vcpu, pc);
+ break;
+ }
+
+ /*
+ * TODO: What happens if a timer interrupt is asserted exactly
+ * here, but for the previous VM?
+ */
+ riscv_set_active_vcpu(hypctx);
+ aplic_flush_hwstate(hypctx);
+
+ riscv_sync_interrupts(hypctx);
+ riscv_sync_ipi(hypctx);
+
+ dprintf("%s: Entering guest VM, vsatp %lx, ss %lx hs %lx\n",
+ __func__, csr_read(vsatp), hypctx->guest_regs.hyp_sstatus,
+ hypctx->guest_regs.hyp_hstatus);
+
+ vmm_switch(hypctx);
+
+ dprintf("%s: Leaving guest VM, hstatus %lx\n", __func__,
+ hypctx->guest_regs.hyp_hstatus);
+
+ aplic_sync_hwstate(hypctx);
+ riscv_sync_interrupts(hypctx);
+
+ /*
+ * TODO: deactivate stage 2 pmap here if needed.
+ */
+
+ vme->scause = csr_read(scause);
+ vme->sepc = csr_read(sepc);
+ vme->stval = csr_read(stval);
+ vme->htval = csr_read(htval);
+ vme->htinst = csr_read(htinst);
+
+ intr_restore(val);
+
+ vmm_stat_incr(vcpu, VMEXIT_COUNT, 1);
+ vme->pc = hypctx->guest_regs.hyp_sepc;
+ vme->inst_length = INSN_SIZE;
+
+ handled = riscv_handle_world_switch(hypctx, vme, pmap);
+ if (handled == false)
+ /* Exit loop to emulate instruction. */
+ break;
+ else {
+ /* Resume guest execution from the next instruction. */
+ hypctx->guest_regs.hyp_sepc += vme->inst_length;
+ }
+ }
+
+ vmmops_vcpu_save_csrs(hypctx);
+
+ return (0);
+}
+
+static void
+riscv_pcpu_vmcleanup(void *arg)
+{
+ struct hyp *hyp;
+ int i, maxcpus;
+
+ hyp = arg;
+ maxcpus = vm_get_maxcpus(hyp->vm);
+ for (i = 0; i < maxcpus; i++) {
+ if (riscv_get_active_vcpu() == hyp->ctx[i]) {
+ riscv_set_active_vcpu(NULL);
+ break;
+ }
+ }
+}
+
+void
+vmmops_vcpu_cleanup(void *vcpui)
+{
+ struct hypctx *hypctx;
+
+ hypctx = vcpui;
+
+ dprintf("%s\n", __func__);
+
+ aplic_cpucleanup(hypctx);
+
+ free(hypctx, M_HYP);
+}
+
+void
+vmmops_cleanup(void *vmi)
+{
+ struct hyp *hyp;
+
+ hyp = vmi;
+
+ dprintf("%s\n", __func__);
+
+ aplic_vmcleanup(hyp);
+
+ smp_rendezvous(NULL, riscv_pcpu_vmcleanup, NULL, hyp);
+
+ free(hyp, M_HYP);
+}
+
+/*
+ * Return register value. Registers have different sizes and an explicit cast
+ * must be made to ensure proper conversion.
+ */
+static uint64_t *
+hypctx_regptr(struct hypctx *hypctx, int reg)
+{
+
+ switch (reg) {
+ case VM_REG_GUEST_RA:
+ return (&hypctx->guest_regs.hyp_ra);
+ case VM_REG_GUEST_SP:
+ return (&hypctx->guest_regs.hyp_sp);
+ case VM_REG_GUEST_GP:
+ return (&hypctx->guest_regs.hyp_gp);
+ case VM_REG_GUEST_TP:
+ return (&hypctx->guest_regs.hyp_tp);
+ case VM_REG_GUEST_T0:
+ return (&hypctx->guest_regs.hyp_t[0]);
+ case VM_REG_GUEST_T1:
+ return (&hypctx->guest_regs.hyp_t[1]);
+ case VM_REG_GUEST_T2:
+ return (&hypctx->guest_regs.hyp_t[2]);
+ case VM_REG_GUEST_S0:
+ return (&hypctx->guest_regs.hyp_s[0]);
+ case VM_REG_GUEST_S1:
+ return (&hypctx->guest_regs.hyp_s[1]);
+ case VM_REG_GUEST_A0:
+ return (&hypctx->guest_regs.hyp_a[0]);
+ case VM_REG_GUEST_A1:
+ return (&hypctx->guest_regs.hyp_a[1]);
+ case VM_REG_GUEST_A2:
+ return (&hypctx->guest_regs.hyp_a[2]);
+ case VM_REG_GUEST_A3:
+ return (&hypctx->guest_regs.hyp_a[3]);
+ case VM_REG_GUEST_A4:
+ return (&hypctx->guest_regs.hyp_a[4]);
+ case VM_REG_GUEST_A5:
+ return (&hypctx->guest_regs.hyp_a[5]);
+ case VM_REG_GUEST_A6:
+ return (&hypctx->guest_regs.hyp_a[6]);
+ case VM_REG_GUEST_A7:
+ return (&hypctx->guest_regs.hyp_a[7]);
+ case VM_REG_GUEST_S2:
+ return (&hypctx->guest_regs.hyp_s[2]);
+ case VM_REG_GUEST_S3:
+ return (&hypctx->guest_regs.hyp_s[3]);
+ case VM_REG_GUEST_S4:
+ return (&hypctx->guest_regs.hyp_s[4]);
+ case VM_REG_GUEST_S5:
+ return (&hypctx->guest_regs.hyp_s[5]);
+ case VM_REG_GUEST_S6:
+ return (&hypctx->guest_regs.hyp_s[6]);
+ case VM_REG_GUEST_S7:
+ return (&hypctx->guest_regs.hyp_s[7]);
+ case VM_REG_GUEST_S8:
+ return (&hypctx->guest_regs.hyp_s[8]);
+ case VM_REG_GUEST_S9:
+ return (&hypctx->guest_regs.hyp_s[9]);
+ case VM_REG_GUEST_S10:
+ return (&hypctx->guest_regs.hyp_s[10]);
+ case VM_REG_GUEST_S11:
+ return (&hypctx->guest_regs.hyp_s[11]);
+ case VM_REG_GUEST_T3:
+ return (&hypctx->guest_regs.hyp_t[3]);
+ case VM_REG_GUEST_T4:
+ return (&hypctx->guest_regs.hyp_t[4]);
+ case VM_REG_GUEST_T5:
+ return (&hypctx->guest_regs.hyp_t[5]);
+ case VM_REG_GUEST_T6:
+ return (&hypctx->guest_regs.hyp_t[6]);
+ case VM_REG_GUEST_SEPC:
+ return (&hypctx->guest_regs.hyp_sepc);
+ default:
+ break;
+ }
+
+ return (NULL);
+}
+
+int
+vmmops_getreg(void *vcpui, int reg, uint64_t *retval)
+{
+ uint64_t *regp;
+ int running, hostcpu;
+ struct hypctx *hypctx;
+
+ hypctx = vcpui;
+
+ running = vcpu_is_running(hypctx->vcpu, &hostcpu);
+ if (running && hostcpu != curcpu)
+ panic("%s: %s%d is running", __func__, vm_name(hypctx->hyp->vm),
+ vcpu_vcpuid(hypctx->vcpu));
+
+ if (reg == VM_REG_GUEST_ZERO) {
+ *retval = 0;
+ return (0);
+ }
+
+ regp = hypctx_regptr(hypctx, reg);
+ if (regp == NULL)
+ return (EINVAL);
+
+ *retval = *regp;
+
+ return (0);
+}
+
+int
+vmmops_setreg(void *vcpui, int reg, uint64_t val)
+{
+ struct hypctx *hypctx;
+ int running, hostcpu;
+ uint64_t *regp;
+
+ hypctx = vcpui;
+
+ running = vcpu_is_running(hypctx->vcpu, &hostcpu);
+ if (running && hostcpu != curcpu)
+ panic("%s: %s%d is running", __func__, vm_name(hypctx->hyp->vm),
+ vcpu_vcpuid(hypctx->vcpu));
+
+ regp = hypctx_regptr(hypctx, reg);
+ if (regp == NULL)
+ return (EINVAL);
+
+ *regp = val;
+
+ return (0);
+}
+
+int
+vmmops_exception(void *vcpui, uint64_t scause)
+{
+ struct hypctx *hypctx;
+ int running, hostcpu;
+
+ hypctx = vcpui;
+
+ running = vcpu_is_running(hypctx->vcpu, &hostcpu);
+ if (running && hostcpu != curcpu)
+ panic("%s: %s%d is running", __func__, vm_name(hypctx->hyp->vm),
+ vcpu_vcpuid(hypctx->vcpu));
+
+ /* TODO: implement me. */
+
+ return (ENOSYS);
+}
+
+int
+vmmops_getcap(void *vcpui, int num, int *retval)
+{
+ int ret;
+
+ ret = ENOENT;
+
+ switch (num) {
+ case VM_CAP_UNRESTRICTED_GUEST:
+ *retval = 1;
+ ret = 0;
+ break;
+ default:
+ break;
+ }
+
+ return (ret);
+}
+
+int
+vmmops_setcap(void *vcpui, int num, int val)
+{
+
+ return (ENOENT);
+}
diff --git a/sys/riscv/vmm/vmm_sbi.c b/sys/riscv/vmm/vmm_sbi.c
new file mode 100644
--- /dev/null
+++ b/sys/riscv/vmm/vmm_sbi.c
@@ -0,0 +1,179 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 Ruslan Bukin <br@bsdpad.com>
+ *
+ * This software was developed by the University of Cambridge Computer
+ * Laboratory (Department of Computer Science and Technology) under Innovate
+ * UK project 105694, "Digital Security by Design (DSbD) Technology Platform
+ * Prototype".
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/jail.h>
+#include <sys/queue.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/malloc.h>
+#include <sys/conf.h>
+#include <sys/sysctl.h>
+#include <sys/libkern.h>
+#include <sys/ioccom.h>
+#include <sys/mman.h>
+#include <sys/uio.h>
+#include <sys/proc.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+#include <vm/vm_object.h>
+
+#include <machine/machdep.h>
+#include <machine/vmparam.h>
+#include <machine/vmm.h>
+#include <machine/vmm_dev.h>
+#include <machine/md_var.h>
+#include <machine/sbi.h>
+
+#include "riscv.h"
+
+static int
+vmm_sbi_handle_rfnc(struct vcpu *vcpu, struct hypctx *hypctx)
+{
+ uint64_t hart_mask __unused;
+ uint64_t start __unused;
+ uint64_t size __unused;
+ uint64_t asid __unused;
+ uint64_t func_id;
+
+ func_id = hypctx->guest_regs.hyp_a[6];
+ hart_mask = hypctx->guest_regs.hyp_a[0];
+ start = hypctx->guest_regs.hyp_a[2];
+ size = hypctx->guest_regs.hyp_a[3];
+ asid = hypctx->guest_regs.hyp_a[4];
+
+ dprintf("%s: %ld hart_mask %lx start %lx size %lx\n", __func__,
+ func_id, hart_mask, start, size);
+
+ /* TODO: implement remote sfence. */
+
+ switch (func_id) {
+ case SBI_RFNC_REMOTE_FENCE_I:
+ break;
+ case SBI_RFNC_REMOTE_SFENCE_VMA:
+ break;
+ case SBI_RFNC_REMOTE_SFENCE_VMA_ASID:
+ break;
+ default:
+ break;
+ }
+
+ hypctx->guest_regs.hyp_a[0] = 0;
+
+ return (0);
+}
+
+static int
+vmm_sbi_handle_ipi(struct vcpu *vcpu, struct hypctx *hypctx)
+{
+ struct hypctx *target_hypctx;
+ struct vcpu *target_vcpu __unused;
+ cpuset_t active_cpus;
+ struct hyp *hyp;
+ uint64_t hart_mask;
+ uint64_t func_id;
+ int hart_id;
+ int bit;
+ int ret;
+
+ func_id = hypctx->guest_regs.hyp_a[6];
+ hart_mask = hypctx->guest_regs.hyp_a[0];
+
+ dprintf("%s: hart_mask %lx\n", __func__, hart_mask);
+
+ hyp = hypctx->hyp;
+
+ active_cpus = vm_active_cpus(hyp->vm);
+
+ switch (func_id) {
+ case SBI_IPI_SEND_IPI:
+ while ((bit = ffs(hart_mask))) {
+ hart_id = (bit - 1);
+ hart_mask &= ~(1u << hart_id);
+ if (CPU_ISSET(hart_id, &active_cpus)) {
+ /* TODO. */
+ target_vcpu = vm_vcpu(hyp->vm, hart_id);
+ target_hypctx = hypctx->hyp->ctx[hart_id];
+ riscv_send_ipi(target_hypctx, hart_id);
+ }
+ }
+ ret = 0;
+ break;
+ default:
+ printf("%s: unknown func %ld\n", __func__, func_id);
+ ret = -1;
+ break;
+ }
+
+ hypctx->guest_regs.hyp_a[0] = ret;
+
+ return (0);
+}
+
+int
+vmm_sbi_ecall(struct vcpu *vcpu, bool *retu)
+{
+ int sbi_extension_id __unused;
+ struct hypctx *hypctx;
+
+ hypctx = riscv_get_active_vcpu();
+ sbi_extension_id = hypctx->guest_regs.hyp_a[7];
+
+ dprintf("%s: args %lx %lx %lx %lx %lx %lx %lx %lx\n", __func__,
+ hypctx->guest_regs.hyp_a[0],
+ hypctx->guest_regs.hyp_a[1],
+ hypctx->guest_regs.hyp_a[2],
+ hypctx->guest_regs.hyp_a[3],
+ hypctx->guest_regs.hyp_a[4],
+ hypctx->guest_regs.hyp_a[5],
+ hypctx->guest_regs.hyp_a[6],
+ hypctx->guest_regs.hyp_a[7]);
+
+ switch (sbi_extension_id) {
+ case SBI_EXT_ID_RFNC:
+ vmm_sbi_handle_rfnc(vcpu, hypctx);
+ break;
+ case SBI_EXT_ID_TIME:
+ break;
+ case SBI_EXT_ID_IPI:
+ vmm_sbi_handle_ipi(vcpu, hypctx);
+ break;
+ default:
+ *retu = true;
+ break;
+ }
+
+ return (0);
+}
diff --git a/sys/riscv/include/md_var.h b/sys/riscv/vmm/vmm_stat.h
copy from sys/riscv/include/md_var.h
copy to sys/riscv/vmm/vmm_stat.h
--- a/sys/riscv/include/md_var.h
+++ b/sys/riscv/vmm/vmm_stat.h
@@ -1,5 +1,7 @@
/*-
- * Copyright (c) 1995 Bruce D. Evans.
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 2011 NetApp, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -10,14 +12,14 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the author nor the names of contributors
+ * 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
@@ -25,30 +27,17 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
- *
- * from: FreeBSD: src/sys/i386/include/md_var.h,v 1.40 2001/07/12
*/
-#ifndef _MACHINE_MD_VAR_H_
-#define _MACHINE_MD_VAR_H_
-
-extern long Maxmem;
-extern char sigcode[];
-extern int szsigcode;
-extern u_long elf_hwcap;
-extern register_t mvendorid;
-extern register_t marchid;
-extern register_t mimpid;
-extern u_int mmu_caps;
-
-/* Supervisor-mode extension support */
-extern bool has_sstc;
-extern bool has_sscofpmf;
-extern bool has_svpbmt;
+#ifndef _VMM_STAT_H_
+#define _VMM_STAT_H_
-struct dumperinfo;
-struct minidumpstate;
+#include <dev/vmm/vmm_stat.h>
-int cpu_minidumpsys(struct dumperinfo *, const struct minidumpstate *);
+VMM_STAT_DECLARE(VMEXIT_COUNT);
+VMM_STAT_DECLARE(VMEXIT_UNKNOWN);
+VMM_STAT_DECLARE(VMEXIT_WFI);
+VMM_STAT_DECLARE(VMEXIT_IRQ);
+VMM_STAT_DECLARE(VMEXIT_UNHANDLED);
-#endif /* !_MACHINE_MD_VAR_H_ */
+#endif
diff --git a/sys/riscv/vmm/vmm_switch.S b/sys/riscv/vmm/vmm_switch.S
new file mode 100644
--- /dev/null
+++ b/sys/riscv/vmm/vmm_switch.S
@@ -0,0 +1,220 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 Ruslan Bukin <br@bsdpad.com>
+ *
+ * This software was developed by the University of Cambridge Computer
+ * Laboratory (Department of Computer Science and Technology) under Innovate
+ * UK project 105694, "Digital Security by Design (DSbD) Technology Platform
+ * Prototype".
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <machine/asm.h>
+#include "assym.inc"
+
+ .text
+
+/*
+ * a0 == hypctx *
+ */
+ENTRY(vmm_switch)
+ sd ra, (HYP_H_RA)(a0)
+ sd sp, (HYP_H_SP)(a0)
+ sd tp, (HYP_H_TP)(a0)
+ sd gp, (HYP_H_GP)(a0)
+ sd s0, (HYP_H_S + 0 * 8)(a0)
+ sd s1, (HYP_H_S + 1 * 8)(a0)
+ sd s2, (HYP_H_S + 2 * 8)(a0)
+ sd s3, (HYP_H_S + 3 * 8)(a0)
+ sd s4, (HYP_H_S + 4 * 8)(a0)
+ sd s5, (HYP_H_S + 5 * 8)(a0)
+ sd s6, (HYP_H_S + 6 * 8)(a0)
+ sd s7, (HYP_H_S + 7 * 8)(a0)
+ sd s8, (HYP_H_S + 8 * 8)(a0)
+ sd s9, (HYP_H_S + 9 * 8)(a0)
+ sd s10, (HYP_H_S + 10 * 8)(a0)
+ sd s11, (HYP_H_S + 11 * 8)(a0)
+
+ sd a1, (HYP_H_A + 1 * 8)(a0)
+ sd a2, (HYP_H_A + 2 * 8)(a0)
+ sd a3, (HYP_H_A + 3 * 8)(a0)
+ sd a4, (HYP_H_A + 4 * 8)(a0)
+ sd a5, (HYP_H_A + 5 * 8)(a0)
+ sd a6, (HYP_H_A + 6 * 8)(a0)
+ sd a7, (HYP_H_A + 7 * 8)(a0)
+
+ ld t0, (HYP_G_SSTATUS)(a0)
+ ld t1, (HYP_G_HSTATUS)(a0)
+ ld t2, (HYP_G_SCOUNTEREN)(a0)
+ la t4, .Lswitch_return
+ ld t5, (HYP_G_SEPC)(a0)
+
+ csrrw t0, sstatus, t0
+ csrrw t1, hstatus, t1
+ csrrw t2, scounteren, t2
+ csrrw t3, sscratch, a0
+ csrrw t4, stvec, t4
+ csrw sepc, t5
+
+ sd t0, (HYP_H_SSTATUS)(a0)
+ sd t1, (HYP_H_HSTATUS)(a0)
+ sd t2, (HYP_H_SCOUNTEREN)(a0)
+ sd t3, (HYP_H_SSCRATCH)(a0)
+ sd t4, (HYP_H_STVEC)(a0)
+
+ ld ra, (HYP_G_RA)(a0)
+ ld sp, (HYP_G_SP)(a0)
+ ld gp, (HYP_G_GP)(a0)
+ ld tp, (HYP_G_TP)(a0)
+ ld t0, (HYP_G_T + 0 * 8)(a0)
+ ld t1, (HYP_G_T + 1 * 8)(a0)
+ ld t2, (HYP_G_T + 2 * 8)(a0)
+ ld t3, (HYP_G_T + 3 * 8)(a0)
+ ld t4, (HYP_G_T + 4 * 8)(a0)
+ ld t5, (HYP_G_T + 5 * 8)(a0)
+ ld t6, (HYP_G_T + 6 * 8)(a0)
+ ld s0, (HYP_G_S + 0 * 8)(a0)
+ ld s1, (HYP_G_S + 1 * 8)(a0)
+ ld s2, (HYP_G_S + 2 * 8)(a0)
+ ld s3, (HYP_G_S + 3 * 8)(a0)
+ ld s4, (HYP_G_S + 4 * 8)(a0)
+ ld s5, (HYP_G_S + 5 * 8)(a0)
+ ld s6, (HYP_G_S + 6 * 8)(a0)
+ ld s7, (HYP_G_S + 7 * 8)(a0)
+ ld s8, (HYP_G_S + 8 * 8)(a0)
+ ld s9, (HYP_G_S + 9 * 8)(a0)
+ ld s10, (HYP_G_S + 10 * 8)(a0)
+ ld s11, (HYP_G_S + 11 * 8)(a0)
+ /* skip a0 for now. */
+ ld a1, (HYP_G_A + 1 * 8)(a0)
+ ld a2, (HYP_G_A + 2 * 8)(a0)
+ ld a3, (HYP_G_A + 3 * 8)(a0)
+ ld a4, (HYP_G_A + 4 * 8)(a0)
+ ld a5, (HYP_G_A + 5 * 8)(a0)
+ ld a6, (HYP_G_A + 6 * 8)(a0)
+ ld a7, (HYP_G_A + 7 * 8)(a0)
+ /* now load a0. */
+ ld a0, (HYP_G_A + 0 * 8)(a0)
+
+ sret
+
+ .align 2
+.Lswitch_return:
+
+ csrrw a0, sscratch, a0
+ sd ra, (HYP_G_RA)(a0)
+ sd sp, (HYP_G_SP)(a0)
+ sd gp, (HYP_G_GP)(a0)
+ sd tp, (HYP_G_TP)(a0)
+ sd t0, (HYP_G_T + 0 * 8)(a0)
+ sd t1, (HYP_G_T + 1 * 8)(a0)
+ sd t2, (HYP_G_T + 2 * 8)(a0)
+ sd t3, (HYP_G_T + 3 * 8)(a0)
+ sd t4, (HYP_G_T + 4 * 8)(a0)
+ sd t5, (HYP_G_T + 5 * 8)(a0)
+ sd t6, (HYP_G_T + 6 * 8)(a0)
+ sd s0, (HYP_G_S + 0 * 8)(a0)
+ sd s1, (HYP_G_S + 1 * 8)(a0)
+ sd s2, (HYP_G_S + 2 * 8)(a0)
+ sd s3, (HYP_G_S + 3 * 8)(a0)
+ sd s4, (HYP_G_S + 4 * 8)(a0)
+ sd s5, (HYP_G_S + 5 * 8)(a0)
+ sd s6, (HYP_G_S + 6 * 8)(a0)
+ sd s7, (HYP_G_S + 7 * 8)(a0)
+ sd s8, (HYP_G_S + 8 * 8)(a0)
+ sd s9, (HYP_G_S + 9 * 8)(a0)
+ sd s10, (HYP_G_S + 10 * 8)(a0)
+ sd s11, (HYP_G_S + 11 * 8)(a0)
+ /* skip a0 */
+ sd a1, (HYP_G_A + 1 * 8)(a0)
+ sd a2, (HYP_G_A + 2 * 8)(a0)
+ sd a3, (HYP_G_A + 3 * 8)(a0)
+ sd a4, (HYP_G_A + 4 * 8)(a0)
+ sd a5, (HYP_G_A + 5 * 8)(a0)
+ sd a6, (HYP_G_A + 6 * 8)(a0)
+ sd a7, (HYP_G_A + 7 * 8)(a0)
+
+ ld t1, (HYP_H_STVEC)(a0)
+ ld t2, (HYP_H_SSCRATCH)(a0)
+ ld t3, (HYP_H_SCOUNTEREN)(a0)
+ ld t4, (HYP_H_HSTATUS)(a0)
+ ld t5, (HYP_H_SSTATUS)(a0)
+
+ csrr t0, sepc
+ csrw stvec, t1
+ csrrw t2, sscratch, t2
+ csrrw t3, scounteren, t3
+ csrrw t4, hstatus, t4
+ csrrw t5, sstatus, t5
+
+ sd t0, (HYP_G_SEPC)(a0)
+ sd t2, (HYP_G_A + 0 * 8)(a0)
+ sd t3, (HYP_G_SCOUNTEREN)(a0)
+ sd t4, (HYP_G_HSTATUS)(a0)
+ sd t5, (HYP_G_SSTATUS)(a0)
+
+ ld ra, (HYP_H_RA)(a0)
+ ld sp, (HYP_H_SP)(a0)
+ ld tp, (HYP_H_TP)(a0)
+ ld gp, (HYP_H_GP)(a0)
+ ld s0, (HYP_H_S + 0 * 8)(a0)
+ ld s1, (HYP_H_S + 1 * 8)(a0)
+ ld s2, (HYP_H_S + 2 * 8)(a0)
+ ld s3, (HYP_H_S + 3 * 8)(a0)
+ ld s4, (HYP_H_S + 4 * 8)(a0)
+ ld s5, (HYP_H_S + 5 * 8)(a0)
+ ld s6, (HYP_H_S + 6 * 8)(a0)
+ ld s7, (HYP_H_S + 7 * 8)(a0)
+ ld s8, (HYP_H_S + 8 * 8)(a0)
+ ld s9, (HYP_H_S + 9 * 8)(a0)
+ ld s10, (HYP_H_S + 10 * 8)(a0)
+ ld s11, (HYP_H_S + 11 * 8)(a0)
+
+ ld a1, (HYP_H_A + 1 * 8)(a0)
+ ld a2, (HYP_H_A + 2 * 8)(a0)
+ ld a3, (HYP_H_A + 3 * 8)(a0)
+ ld a4, (HYP_H_A + 4 * 8)(a0)
+ ld a5, (HYP_H_A + 5 * 8)(a0)
+ ld a6, (HYP_H_A + 6 * 8)(a0)
+ ld a7, (HYP_H_A + 7 * 8)(a0)
+
+ ret
+
+END(vmm_switch)
+
+ENTRY(vmm_unpriv_trap)
+ csrr a1, sepc
+ sd a1, HYP_TRAP_SEPC(a0)
+ addi a1, a1, 4 /* Next instruction after hlvx.hu */
+ csrw sepc, a1
+ csrr a1, scause
+ sd a1, HYP_TRAP_SCAUSE(a0)
+ csrr a1, stval
+ sd a1, HYP_TRAP_STVAL(a0)
+ csrr a1, htval
+ sd a1, HYP_TRAP_HTVAL(a0)
+ csrr a1, htinst
+ sd a1, HYP_TRAP_HTINST(a0)
+ sret
+END(vmm_unpriv_trap)

File Metadata

Mime Type
text/plain
Expires
Thu, Nov 7, 8:16 AM (9 h, 42 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
14507412
Default Alt Text
D45553.id.diff (125 KB)

Event Timeline