Page MenuHomeFreeBSD

D26209.id98407.diff
No OneTemporary

D26209.id98407.diff

diff --git a/lib/libvmmapi/vmmapi.h b/lib/libvmmapi/vmmapi.h
--- a/lib/libvmmapi/vmmapi.h
+++ b/lib/libvmmapi/vmmapi.h
@@ -73,6 +73,7 @@
VM_SYSMEM,
VM_BOOTROM,
VM_FRAMEBUFFER,
+ VM_PCIROM,
};
/*
@@ -180,6 +181,8 @@
vm_paddr_t gpa, size_t len, vm_paddr_t hpa);
int vm_unmap_pptdev_mmio(struct vmctx *ctx, int bus, int slot, int func,
vm_paddr_t gpa, size_t len);
+int vm_get_memory_region_info(struct vmctx *ctx, vm_paddr_t *base,
+ vm_paddr_t *size, enum vm_memory_region_type type);
int vm_setup_pptdev_msi(struct vmctx *ctx, int vcpu, int bus, int slot,
int func, uint64_t addr, uint64_t msg, int numvec);
int vm_setup_pptdev_msix(struct vmctx *ctx, int vcpu, int bus, int slot,
diff --git a/lib/libvmmapi/vmmapi.c b/lib/libvmmapi/vmmapi.c
--- a/lib/libvmmapi/vmmapi.c
+++ b/lib/libvmmapi/vmmapi.c
@@ -1009,6 +1009,25 @@
return (ioctl(ctx->fd, VM_UNMAP_PPTDEV_MMIO, &pptmmio));
}
+int
+vm_get_memory_region_info(struct vmctx *ctx, vm_paddr_t *base, vm_paddr_t *size,
+ enum vm_memory_region_type type)
+{
+ struct vm_memory_region_info memory_region_info;
+
+ bzero(&memory_region_info, sizeof(memory_region_info));
+ memory_region_info.type = type;
+
+ const int error = ioctl(ctx->fd, VM_GET_MEMORY_REGION_INFO, &memory_region_info);
+
+ if (base)
+ *base = memory_region_info.base;
+ if (size)
+ *size = memory_region_info.size;
+
+ return (error);
+}
+
int
vm_setup_pptdev_msi(struct vmctx *ctx, int vcpu, int bus, int slot, int func,
uint64_t addr, uint64_t msg, int numvec)
@@ -1684,7 +1703,7 @@
VM_SET_CAPABILITY, VM_GET_CAPABILITY, VM_BIND_PPTDEV,
VM_UNBIND_PPTDEV, VM_MAP_PPTDEV_MMIO, VM_PPTDEV_MSI,
VM_PPTDEV_MSIX, VM_UNMAP_PPTDEV_MMIO, VM_PPTDEV_DISABLE_MSIX,
- VM_INJECT_NMI, VM_STATS, VM_STAT_DESC,
+ VM_GET_MEMORY_REGION_INFO, VM_INJECT_NMI, VM_STATS, VM_STAT_DESC,
VM_SET_X2APIC_STATE, VM_GET_X2APIC_STATE,
VM_GET_HPET_CAPABILITIES, VM_GET_GPA_PMAP, VM_GLA2GPA,
VM_GLA2GPA_NOFAULT,
diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h
--- a/sys/amd64/include/vmm.h
+++ b/sys/amd64/include/vmm.h
@@ -741,6 +741,11 @@
} u;
};
+enum vm_memory_region_type {
+ MEMORY_REGION_INTEL_GSM,
+ MEMORY_REGION_INTEL_OPREGION
+};
+
/* APIs to inject faults into the guest */
void vm_inject_fault(void *vm, int vcpuid, int vector, int errcode_valid,
int errcode);
diff --git a/sys/amd64/include/vmm_dev.h b/sys/amd64/include/vmm_dev.h
--- a/sys/amd64/include/vmm_dev.h
+++ b/sys/amd64/include/vmm_dev.h
@@ -146,6 +146,17 @@
size_t len;
};
+struct vm_memory_region_info {
+ vm_paddr_t base;
+ vm_paddr_t size;
+ enum vm_memory_region_type type;
+};
+
+#ifdef _KERNEL
+extern vm_paddr_t intel_graphics_stolen_base;
+extern vm_paddr_t intel_graphics_stolen_size;
+#endif
+
struct vm_pptdev_msi {
int vcpu;
int bus;
@@ -309,6 +320,7 @@
IOCNUM_PPTDEV_MSIX = 44,
IOCNUM_PPTDEV_DISABLE_MSIX = 45,
IOCNUM_UNMAP_PPTDEV_MMIO = 46,
+ IOCNUM_GET_MEMORY_REGION_INFO = 47,
/* statistics */
IOCNUM_VM_STATS = 50,
@@ -427,6 +439,8 @@
_IOW('v', IOCNUM_PPTDEV_DISABLE_MSIX, struct vm_pptdev)
#define VM_UNMAP_PPTDEV_MMIO \
_IOW('v', IOCNUM_UNMAP_PPTDEV_MMIO, struct vm_pptdev_mmio)
+#define VM_GET_MEMORY_REGION_INFO \
+ _IOWR('v', IOCNUM_GET_MEMORY_REGION_INFO, struct vm_memory_region_info)
#define VM_INJECT_NMI \
_IOW('v', IOCNUM_INJECT_NMI, struct vm_nmi)
#define VM_STATS \
diff --git a/sys/amd64/vmm/intel/intelgpu.h b/sys/amd64/vmm/intel/intelgpu.h
new file mode 100644
--- /dev/null
+++ b/sys/amd64/vmm/intel/intelgpu.h
@@ -0,0 +1,206 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2021 Beckhoff Automation GmbH & Co. KG
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR OR CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#pragma once
+
+#include <machine/vmm.h>
+#include <machine/vmm_dev.h>
+
+#define IGD_OPREGION_HEADER_SIGN "IntelGraphicsMem"
+#define IGD_OPREGION_HEADER_MBOX1 BIT0
+#define IGD_OPREGION_HEADER_MBOX2 BIT1
+#define IGD_OPREGION_HEADER_MBOX3 BIT2
+#define IGD_OPREGION_HEADER_MBOX4 BIT3
+#define IGD_OPREGION_HEADER_MBOX5 BIT4
+
+#define IGD_OPREGION_VBT_SIZE_6K (6 * 1024UL)
+
+/**
+ OpRegion structures:
+ Sub-structures define the different parts of the OpRegion followed by the
+ main structure representing the entire OpRegion.
+
+ @note These structures are packed to 1 byte offsets because the exact
+ data location is required by the supporting design specification due to
+ the fact that the data is used by ASL and Graphics driver code compiled
+ separately.
+**/
+#pragma pack(push, 1)
+///
+/// OpRegion Mailbox 0 Header structure. The OpRegion Header is used to
+/// identify a block of memory as the graphics driver OpRegion.
+/// Offset 0x0, Size 0x100
+///
+struct igd_opregion_header {
+ int8_t sign[0x10]; ///< Offset 0x00 OpRegion Signature
+ uint32_t size; ///< Offset 0x10 OpRegion Size
+ uint32_t over; ///< Offset 0x14 OpRegion Structure Version
+ uint8_t sver[0x20]; ///< Offset 0x18 System BIOS Build Version
+ uint8_t vver[0x10]; ///< Offset 0x38 Video BIOS Build Version
+ uint8_t gver[0x10]; ///< Offset 0x48 Graphic Driver Build Version
+ uint32_t mbox; ///< Offset 0x58 Supported Mailboxes
+ uint32_t dmod; ///< Offset 0x5C Driver Model
+ uint32_t pcon; ///< Offset 0x60 Platform Configuration
+ int16_t dver[0x10]; ///< Offset 0x64 GOP Version
+ uint8_t rm01[0x7C]; ///< Offset 0x84 Reserved Must be zero
+};
+
+///
+/// OpRegion Mailbox 1 - Public ACPI Methods
+/// Offset 0x100, Size 0x100
+///
+struct igd_opregion_mbox1 {
+ uint32_t drdy; ///< Offset 0x100 Driver Readiness
+ uint32_t csts; ///< Offset 0x104 Status
+ uint32_t cevt; ///< Offset 0x108 Current Event
+ uint8_t rm11[0x14]; ///< Offset 0x10C Reserved Must be Zero
+ uint32_t didl[8]; ///< Offset 0x120 Supported Display Devices ID List
+ uint32_t
+ cpdl[8]; ///< Offset 0x140 Currently Attached Display Devices List
+ uint32_t
+ cadl[8]; ///< Offset 0x160 Currently Active Display Devices List
+ uint32_t nadl[8]; ///< Offset 0x180 Next Active Devices List
+ uint32_t aslp; ///< Offset 0x1A0 ASL Sleep Time Out
+ uint32_t tidx; ///< Offset 0x1A4 Toggle Table Index
+ uint32_t chpd; ///< Offset 0x1A8 Current Hotplug Enable Indicator
+ uint32_t clid; ///< Offset 0x1AC Current Lid State Indicator
+ uint32_t cdck; ///< Offset 0x1B0 Current Docking State Indicator
+ uint32_t sxsw; ///< Offset 0x1B4 Display Switch Notification on Sx
+ ///< StateResume
+ uint32_t evts; ///< Offset 0x1B8 Events supported by ASL
+ uint32_t cnot; ///< Offset 0x1BC Current OS Notification
+ uint32_t NRDY; ///< Offset 0x1C0 Driver Status
+ uint8_t did2[0x1C]; ///< Offset 0x1C4 Extended Supported Devices ID
+ ///< List(DOD)
+ uint8_t
+ cpd2[0x1C]; ///< Offset 0x1E0 Extended Attached Display Devices List
+ uint8_t rm12[4]; ///< Offset 0x1FC - 0x1FF Reserved Must be zero
+};
+
+///
+/// OpRegion Mailbox 2 - Software SCI Interface
+/// Offset 0x200, Size 0x100
+///
+struct igd_opregion_mbox2 {
+ uint32_t scic; ///< Offset 0x200 Software SCI Command / Status / Data
+ uint32_t parm; ///< Offset 0x204 Software SCI Parameters
+ uint32_t dslp; ///< Offset 0x208 Driver Sleep Time Out
+ uint8_t rm21[0xF4]; ///< Offset 0x20C - 0x2FF Reserved Must be zero
+};
+
+///
+/// OpRegion Mailbox 3 - BIOS/Driver Notification - ASLE Support
+/// Offset 0x300, Size 0x100
+///
+struct igd_opregion_mbox3 {
+ uint32_t ardy; ///< Offset 0x300 Driver Readiness
+ uint32_t aslc; ///< Offset 0x304 ASLE Interrupt Command / Status
+ uint32_t tche; ///< Offset 0x308 Technology Enabled Indicator
+ uint32_t alsi; ///< Offset 0x30C Current ALS Luminance Reading
+ uint32_t bclp; ///< Offset 0x310 Requested Backlight Brightness
+ uint32_t pfit; ///< Offset 0x314 Panel Fitting State or Request
+ uint32_t cblv; ///< Offset 0x318 Current Brightness Level
+ uint16_t bclm[0x14]; ///< Offset 0x31C Backlight Brightness Levels Duty
+ ///< Cycle Mapping Table
+ uint32_t cpfm; ///< Offset 0x344 Current Panel Fitting Mode
+ uint32_t epfm; ///< Offset 0x348 Enabled Panel Fitting Modes
+ uint8_t plut[0x4A]; ///< Offset 0x34C Panel Look Up Table & Identifier
+ uint32_t pfmb; ///< Offset 0x396 PWM Frequency and Minimum Brightness
+ uint32_t ccdv; ///< Offset 0x39A Color Correction Default Values
+ uint32_t pcft; ///< Offset 0x39E Power Conservation Features
+ uint32_t srot; ///< Offset 0x3A2 Supported Rotation Angles
+ uint32_t iuer; ///< Offset 0x3A6 Intel Ultrabook(TM) Event Register
+ uint64_t fdss; ///< Offset 0x3AA DSS Buffer address allocated for IFFS
+ ///< feature
+ uint32_t fdsp; ///< Offset 0x3B2 Size of DSS buffer
+ uint32_t stat; ///< Offset 0x3B6 State Indicator
+ uint64_t rvda; ///< Offset 0x3BA Absolute/Relative Address of Raw VBT
+ ///< Data from OpRegion Base
+ uint32_t rvds; ///< Offset 0x3C2 Raw VBT Data Size
+ uint8_t rsvd2[0x3A]; ///< Offset 0x3C6 - 0x3FF Reserved Must be zero.
+ ///< Bug in spec 0x45(69)
+};
+
+///
+/// OpRegion Mailbox 4 - VBT Video BIOS Table
+/// Offset 0x400, Size 0x1800
+///
+struct igd_opregion_mbox4 {
+ uint8_t rvbt[IGD_OPREGION_VBT_SIZE_6K]; ///< Offset 0x400 - 0x1BFF Raw
+ ///< VBT Data
+};
+
+///
+/// OpRegion Mailbox 5 - BIOS/Driver Notification - Data storage BIOS to Driver
+/// data sync Offset 0x1C00, Size 0x400
+///
+struct igd_opregion_mbox5 {
+ uint32_t phed; ///< Offset 0x1C00 Panel Header
+ uint8_t bddc[0x100]; ///< Offset 0x1C04 Panel EDID (DDC data)
+ uint8_t rm51[0x2FC]; ///< Offset 0x1D04 - 0x1FFF Reserved Must be zero
+};
+
+///
+/// IGD OpRegion Structure
+///
+struct igd_opregion {
+ struct igd_opregion_header
+ header; ///< OpRegion header (Offset 0x0, Size 0x100)
+ struct igd_opregion_mbox1 mbox1; ///< Mailbox 1: Public ACPI Methods
+ ///< (Offset 0x100, Size 0x100)
+ struct igd_opregion_mbox2 mbox2; ///< Mailbox 2: Software SCI Interface
+ ///< (Offset 0x200, Size 0x100)
+ struct igd_opregion_mbox3
+ mbox3; ///< Mailbox 3: BIOS to Driver Notification (Offset 0x300,
+ ///< Size 0x100)
+ struct igd_opregion_mbox4 mbox4; ///< Mailbox 4: Video BIOS Table (VBT)
+ ///< (Offset 0x400, Size 0x1800)
+ struct igd_opregion_mbox5
+ mbox5; ///< Mailbox 5: BIOS to Driver Notification Extension (Offset
+ ///< 0x1C00, Size 0x400)
+};
+
+///
+/// VBT Header Structure
+///
+struct vbt_header {
+ uint8_t product_string[20];
+ uint16_t version;
+ uint16_t header_size;
+ uint16_t table_size;
+ uint8_t checksum;
+ uint8_t reserved1;
+ uint32_t bios_data_offset;
+ uint32_t aim_data_offset[4];
+};
+
+#pragma pack(pop)
+
+int vm_intelgpu_get_opregion(struct vm *vm, vm_paddr_t *base, vm_paddr_t *size);
diff --git a/sys/amd64/vmm/intel/intelgpu.c b/sys/amd64/vmm/intel/intelgpu.c
new file mode 100644
--- /dev/null
+++ b/sys/amd64/vmm/intel/intelgpu.c
@@ -0,0 +1,78 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2021 Beckhoff Automation GmbH & Co. KG
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR OR CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <dev/pci/pcireg.h>
+#include <dev/pci/pcivar.h>
+
+#include "intelgpu.h"
+
+#define KB (1024UL)
+
+int
+vm_intelgpu_get_opregion(struct vm *vm, vm_paddr_t *base, vm_paddr_t *size)
+{
+ /* intel graphics device is always located at 0:2.0 */
+ device_t dev = pci_find_bsf(0, 2, 0);
+ if (dev == NULL) {
+ return (ENOENT);
+ }
+
+ if ((pci_get_vendor(dev) != PCI_VENDOR_INTEL) ||
+ (pci_get_class(dev) != PCIC_DISPLAY) ||
+ (pci_get_subclass(dev) != PCIS_DISPLAY_VGA)) {
+ return (ENODEV);
+ }
+
+ uint64_t asls = pci_read_config(dev, PCIR_ASLS_CTL, 4);
+
+ struct igd_opregion_header *opregion_header =
+ (struct igd_opregion_header *)pmap_map(NULL, asls,
+ asls + sizeof(*opregion_header), VM_PROT_READ);
+ if (opregion_header == NULL ||
+ memcmp(opregion_header->sign, IGD_OPREGION_HEADER_SIGN,
+ sizeof(opregion_header->sign))) {
+ return (ENODEV);
+ }
+
+ *base = asls;
+ *size = opregion_header->size * KB;
+
+ return (0);
+}
diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c
--- a/sys/amd64/vmm/vmm.c
+++ b/sys/amd64/vmm/vmm.c
@@ -134,7 +134,7 @@
bool sysmem;
struct vm_object *object;
};
-#define VM_MAX_MEMSEGS 3
+#define VM_MAX_MEMSEGS 4
struct mem_map {
vm_paddr_t gpa;
diff --git a/sys/amd64/vmm/vmm_dev.c b/sys/amd64/vmm/vmm_dev.c
--- a/sys/amd64/vmm/vmm_dev.c
+++ b/sys/amd64/vmm/vmm_dev.c
@@ -60,6 +60,7 @@
#include <machine/vmm_snapshot.h>
#include <x86/apicreg.h>
+#include "intel/intelgpu.h"
#include "vmm_lapic.h"
#include "vmm_stat.h"
#include "vmm_mem.h"
@@ -366,6 +367,7 @@
struct vm_capability *vmcap;
struct vm_pptdev *pptdev;
struct vm_pptdev_mmio *pptmmio;
+ struct vm_memory_region_info *memory_region_info;
struct vm_pptdev_msi *pptmsi;
struct vm_pptdev_msix *pptmsix;
struct vm_nmi *vmnmi;
@@ -533,6 +535,24 @@
error = ppt_unmap_mmio(sc->vm, pptmmio->bus, pptmmio->slot,
pptmmio->func, pptmmio->gpa, pptmmio->len);
break;
+ case VM_GET_MEMORY_REGION_INFO:
+ memory_region_info = (struct vm_memory_region_info *)data;
+ switch (memory_region_info->type) {
+ case MEMORY_REGION_INTEL_GSM:
+ memory_region_info->base = intel_graphics_stolen_base;
+ memory_region_info->size = intel_graphics_stolen_size;
+ error = 0;
+ break;
+ case MEMORY_REGION_INTEL_OPREGION:
+ error = vm_intelgpu_get_opregion(sc->vm,
+ &memory_region_info->base,
+ &memory_region_info->size);
+ break;
+ default:
+ error = EINVAL;
+ break;
+ }
+ break;
case VM_BIND_PPTDEV:
pptdev = (struct vm_pptdev *)data;
error = vm_assign_pptdev(sc->vm, pptdev->bus, pptdev->slot,
diff --git a/sys/dev/pci/pcireg.h b/sys/dev/pci/pcireg.h
--- a/sys/dev/pci/pcireg.h
+++ b/sys/dev/pci/pcireg.h
@@ -1098,3 +1098,14 @@
#define PCIM_OSC_CTL_PCIE_PME 0x04 /* PCIe Native Power Mgt Events */
#define PCIM_OSC_CTL_PCIE_AER 0x08 /* PCIe Advanced Error Reporting */
#define PCIM_OSC_CTL_PCIE_CAP_STRUCT 0x10 /* Various Capability Structures */
+
+/*
+ * Intel graphics device definitions
+ */
+#define PCIR_BDSM 0x5C /* Base of Data Stolen Memory register */
+#define PCIR_ASLS_CTL 0xFC /* Opregion start address register */
+
+/*
+ * PCI Vendors
+ */
+#define PCI_VENDOR_INTEL 0x8086
diff --git a/sys/modules/vmm/Makefile b/sys/modules/vmm/Makefile
--- a/sys/modules/vmm/Makefile
+++ b/sys/modules/vmm/Makefile
@@ -42,6 +42,7 @@
# intel-specific files
.PATH: ${SRCTOP}/sys/amd64/vmm/intel
SRCS+= ept.c \
+ intelgpu.c \
vmcs.c \
vmx_msr.c \
vmx_support.S \
diff --git a/usr.sbin/bhyve/Makefile b/usr.sbin/bhyve/Makefile
--- a/usr.sbin/bhyve/Makefile
+++ b/usr.sbin/bhyve/Makefile
@@ -15,6 +15,7 @@
BHYVE_SYSDIR?=${SRCTOP}
SRCS= \
+ acpi_device.c \
atkbdc.c \
acpi.c \
audio.c \
@@ -25,7 +26,7 @@
console.c \
ctl_util.c \
ctl_scsi_all.c \
- fwctl.c \
+ e820.c \
gdb.c \
hda_codec.c \
inout.c \
@@ -41,6 +42,7 @@
pci_emul.c \
pci_hda.c \
pci_fbuf.c \
+ pci_gvt-d.c \
pci_hostbridge.c \
pci_irq.c \
pci_lpc.c \
@@ -59,6 +61,7 @@
post.c \
ps2kbd.c \
ps2mouse.c \
+ qemu_fwcfg.c \
rfb.c \
rtc.c \
smbiostbl.c \
diff --git a/usr.sbin/bhyve/acpi.h b/usr.sbin/bhyve/acpi.h
--- a/usr.sbin/bhyve/acpi.h
+++ b/usr.sbin/bhyve/acpi.h
@@ -31,6 +31,8 @@
#ifndef _ACPI_H_
#define _ACPI_H_
+#include "acpi_device.h"
+
#define SCI_INT 9
#define SMI_CMD 0xb2
@@ -55,6 +57,7 @@
int acpi_build(struct vmctx *ctx, int ncpu);
void acpi_raise_gpe(struct vmctx *ctx, unsigned bit);
+int acpi_tables_add_device(const struct acpi_device *const dev);
void dsdt_line(const char *fmt, ...);
void dsdt_fixed_ioport(uint16_t iobase, uint16_t length);
void dsdt_fixed_irq(uint8_t irq);
diff --git a/usr.sbin/bhyve/acpi.c b/usr.sbin/bhyve/acpi.c
--- a/usr.sbin/bhyve/acpi.c
+++ b/usr.sbin/bhyve/acpi.c
@@ -139,6 +139,30 @@
#define EFFLUSH(x) \
if (fflush(x) != 0) goto err_exit;
+/*
+ * A list for additional ACPI devices like a TPM.
+ */
+struct acpi_device_list_entry {
+ SLIST_ENTRY(acpi_device_list_entry) chain;
+ const struct acpi_device *dev;
+};
+SLIST_HEAD(acpi_device_list,
+ acpi_device_list_entry) acpi_devices = SLIST_HEAD_INITIALIZER(acpi_devices);
+
+int
+acpi_tables_add_device(const struct acpi_device *const dev)
+{
+ struct acpi_device_list_entry *const entry = calloc(1, sizeof(*entry));
+ if (entry == NULL) {
+ return (ENOMEM);
+ }
+
+ entry->dev = dev;
+ SLIST_INSERT_HEAD(&acpi_devices, entry, chain);
+
+ return (0);
+}
+
static int
basl_fwrite_rsdp(FILE *fp)
{
@@ -760,6 +784,11 @@
vmgenc_write_dsdt();
+ const struct acpi_device_list_entry *entry;
+ SLIST_FOREACH(entry, &acpi_devices, chain) {
+ acpi_device_write_dsdt(entry->dev);
+ }
+
dsdt_line("}");
if (dsdt_error != 0)
diff --git a/usr.sbin/bhyve/acpi_device.h b/usr.sbin/bhyve/acpi_device.h
new file mode 100644
--- /dev/null
+++ b/usr.sbin/bhyve/acpi_device.h
@@ -0,0 +1,42 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2021 Beckhoff Automation GmbH & Co. KG
+ * Author: Corvin Köhne <c.koehne@beckhoff.com>
+ */
+
+#pragma once
+
+#include <contrib/dev/acpica/include/acpi.h>
+
+struct vmctx;
+
+struct acpi_device;
+
+/**
+ * Creates an ACPI device.
+ *
+ * @param[out] new_dev Returns the newly create ACPI device.
+ * @param[in] vm_ctx VM context the ACPI device is created in.
+ * @param[in] name Name of the ACPI device. Should always be a NULL
+ * terminated string.
+ * @param[in] hid Hardware ID of the ACPI device. Should always be a NULL
+ * terminated string.
+ */
+int acpi_device_create(struct acpi_device **const new_dev,
+ struct vmctx *const vm_ctx, const char *const name, const char *const hid);
+void acpi_device_destroy(struct acpi_device *const dev);
+
+/**
+ * @note: acpi_device_add_res_acpi_buffer doesn't ensure that no resources are
+ * added on an error condition. On error the caller should assume that
+ * the ACPI_BUFFER is partially added to the ACPI device.
+ */
+int acpi_device_add_res_acpi_buffer(struct acpi_device *const dev,
+ const ACPI_BUFFER resources);
+int acpi_device_add_res_fixed_ioport(struct acpi_device *const dev,
+ const UINT16 port, UINT8 length);
+int acpi_device_add_res_fixed_memory32(struct acpi_device *const dev,
+ const UINT8 write_protected, const UINT32 address, const UINT32 length);
+
+void acpi_device_write_dsdt(const struct acpi_device *const dev);
diff --git a/usr.sbin/bhyve/acpi_device.c b/usr.sbin/bhyve/acpi_device.c
new file mode 100644
--- /dev/null
+++ b/usr.sbin/bhyve/acpi_device.c
@@ -0,0 +1,240 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2021 Beckhoff Automation GmbH & Co. KG
+ * Author: Corvin Köhne <c.koehne@beckhoff.com>
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/queue.h>
+
+#include <machine/vmm.h>
+
+#include <err.h>
+#include <errno.h>
+#include <vmmapi.h>
+
+#include "acpi.h"
+#include "acpi_device.h"
+
+/**
+ * List entry to enumerate all resources used by an ACPI device.
+ *
+ * @param chain Used to chain multiple elements together.
+ * @param type Type of the ACPI resource.
+ * @param data Data of the ACPI resource.
+ */
+struct acpi_resource_list_entry {
+ SLIST_ENTRY(acpi_resource_list_entry) chain;
+ UINT32 type;
+ ACPI_RESOURCE_DATA data;
+};
+
+/**
+ * Holds information about an ACPI device.
+ *
+ * @param vm_ctx VM context the ACPI device was created in.
+ * @param name Name of the ACPI device.
+ * @param hid Hardware ID of the ACPI device.
+ * @param crs Current resources used by the ACPI device.
+ */
+struct acpi_device {
+ struct vmctx *vm_ctx;
+ const char *name;
+ const char *hid;
+ SLIST_HEAD(acpi_resource_list, acpi_resource_list_entry) crs;
+};
+
+int
+acpi_device_create(struct acpi_device **const new_dev,
+ struct vmctx *const vm_ctx, const char *const name, const char *const hid)
+{
+ if (new_dev == NULL || vm_ctx == NULL || name == NULL || hid == NULL) {
+ return (EINVAL);
+ }
+
+ struct acpi_device *const dev = calloc(1, sizeof(*dev));
+ if (dev == NULL) {
+ return (ENOMEM);
+ }
+
+ dev->vm_ctx = vm_ctx;
+ dev->name = name;
+ dev->hid = hid;
+ SLIST_INIT(&dev->crs);
+
+ /* current resources always contain an end tag */
+ struct acpi_resource_list_entry *const crs_end_tag = calloc(1,
+ sizeof(*crs_end_tag));
+ if (crs_end_tag == NULL) {
+ acpi_device_destroy(dev);
+ return (ENOMEM);
+ }
+ crs_end_tag->type = ACPI_RESOURCE_TYPE_END_TAG;
+ SLIST_INSERT_HEAD(&dev->crs, crs_end_tag, chain);
+
+ const int error = acpi_tables_add_device(dev);
+ if (error) {
+ acpi_device_destroy(dev);
+ return (error);
+ }
+
+ *new_dev = dev;
+
+ return (0);
+}
+
+void
+acpi_device_destroy(struct acpi_device *const dev)
+{
+ if (dev == NULL) {
+ return;
+ }
+
+ struct acpi_resource_list_entry *res;
+ while (!SLIST_EMPTY(&dev->crs)) {
+ res = SLIST_FIRST(&dev->crs);
+ SLIST_REMOVE_HEAD(&dev->crs, chain);
+ free(res);
+ }
+}
+
+int
+acpi_device_add_res_acpi_buffer(struct acpi_device *const dev,
+ const ACPI_BUFFER resources)
+{
+ if (dev == NULL) {
+ return (EINVAL);
+ }
+
+ int error = 0;
+ size_t offset = 0;
+ while (offset < resources.Length) {
+ const ACPI_RESOURCE *const res =
+ (const ACPI_RESOURCE *)((UINT8 *)resources.Pointer +
+ offset);
+ switch (res->Type) {
+ case ACPI_RESOURCE_TYPE_FIXED_IO:
+ error = acpi_device_add_res_fixed_ioport(dev,
+ res->Data.FixedIo.Address,
+ res->Data.FixedIo.AddressLength);
+ break;
+ case ACPI_RESOURCE_TYPE_FIXED_MEMORY32:
+ error = acpi_device_add_res_fixed_memory32(dev,
+ res->Data.FixedMemory32.WriteProtect,
+ res->Data.FixedMemory32.Address,
+ res->Data.FixedMemory32.AddressLength);
+ break;
+ case ACPI_RESOURCE_TYPE_END_TAG:
+ break;
+ default:
+ warnx("%s: unknown resource type %d", __func__,
+ res->Type);
+ return (ENODEV);
+ }
+ if (error) {
+ break;
+ }
+ offset += res->Length;
+ }
+
+ return (error);
+}
+
+int
+acpi_device_add_res_fixed_ioport(struct acpi_device *const dev,
+ const UINT16 port, const UINT8 length)
+{
+ if (dev == NULL) {
+ return (EINVAL);
+ }
+
+ struct acpi_resource_list_entry *const res = calloc(1, sizeof(*res));
+ if (res == NULL) {
+ return (ENOMEM);
+ }
+
+ res->type = ACPI_RESOURCE_TYPE_FIXED_IO;
+ res->data.FixedIo.Address = port;
+ res->data.FixedIo.AddressLength = length;
+
+ SLIST_INSERT_HEAD(&dev->crs, res, chain);
+
+ return (0);
+}
+
+int
+acpi_device_add_res_fixed_memory32(struct acpi_device *const dev,
+ const UINT8 write_protected, const UINT32 address, const UINT32 length)
+{
+ if (dev == NULL) {
+ return (EINVAL);
+ }
+
+ struct acpi_resource_list_entry *const res = calloc(1, sizeof(*res));
+ if (res == NULL) {
+ return (ENOMEM);
+ }
+
+ res->type = ACPI_RESOURCE_TYPE_FIXED_MEMORY32;
+ res->data.FixedMemory32.WriteProtect = write_protected;
+ res->data.FixedMemory32.Address = address;
+ res->data.FixedMemory32.AddressLength = length;
+
+ SLIST_INSERT_HEAD(&dev->crs, res, chain);
+
+ return (0);
+}
+
+static void
+acpi_device_write_dsdt_crs(const struct acpi_device *const dev)
+{
+ const struct acpi_resource_list_entry *res;
+ SLIST_FOREACH (res, &dev->crs, chain) {
+ switch (res->type) {
+ case ACPI_RESOURCE_TYPE_FIXED_IO:
+ dsdt_fixed_ioport(res->data.FixedIo.Address,
+ res->data.FixedIo.AddressLength);
+ break;
+ case ACPI_RESOURCE_TYPE_FIXED_MEMORY32: {
+ dsdt_fixed_mem32(res->data.FixedMemory32.Address,
+ res->data.FixedMemory32.AddressLength);
+ break;
+ }
+ case ACPI_RESOURCE_TYPE_END_TAG:
+ break;
+ default:
+ warnx("%s: unknown resource type %d", __func__,
+ res->type);
+ return;
+ }
+ }
+}
+
+void
+acpi_device_write_dsdt(const struct acpi_device *const dev)
+{
+ if (dev == NULL) {
+ return;
+ }
+
+ dsdt_line("");
+ dsdt_line(" Scope (\\_SB)");
+ dsdt_line(" {");
+ dsdt_line(" Device (%s)", dev->name);
+ dsdt_line(" {");
+ dsdt_line(" Name (_HID, \"%s\")", dev->hid);
+ dsdt_line(" Name (_STA, 0x0F)");
+ dsdt_line(" Name (_CRS, ResourceTemplate ()");
+ dsdt_line(" {");
+ dsdt_indent(4);
+ acpi_device_write_dsdt_crs(dev);
+ dsdt_unindent(4);
+ dsdt_line(" })");
+ dsdt_line(" }");
+ dsdt_line(" }");
+}
diff --git a/usr.sbin/bhyve/bhyve.8 b/usr.sbin/bhyve/bhyve.8
--- a/usr.sbin/bhyve/bhyve.8
+++ b/usr.sbin/bhyve/bhyve.8
@@ -409,6 +409,11 @@
and
.Ar function
numbers.
+.It Li rom= Ns Ar romfile
+Add
+.Ar romfile
+as option ROM to the PCI device.
+The ROM will be loaded by firmware and should be capable of initializing the device.
.El
.Pp
Guest memory must be wired using the
diff --git a/usr.sbin/bhyve/bhyverun.c b/usr.sbin/bhyve/bhyverun.c
--- a/usr.sbin/bhyve/bhyverun.c
+++ b/usr.sbin/bhyve/bhyverun.c
@@ -89,7 +89,7 @@
#include "bootrom.h"
#include "inout.h"
#include "debug.h"
-#include "fwctl.h"
+#include "e820.h"
#include "gdb.h"
#include "ioapic.h"
#include "kernemu_dev.h"
@@ -99,6 +99,7 @@
#include "pci_emul.h"
#include "pci_irq.h"
#include "pci_lpc.h"
+#include "qemu_fwcfg.h"
#include "smbiostbl.h"
#ifdef BHYVE_SNAPSHOT
#include "snapshot.h"
@@ -1296,6 +1297,41 @@
rtc_init(ctx, rtc_localtime);
sci_init(ctx);
+ if (qemu_fwcfg_init(ctx) != 0) {
+ fprintf(stderr, "qemu fwcfg initialization error");
+ exit(4);
+ }
+
+ /*
+ * QEMU uses fwcfg item 0x0f (FW_CFG_MAX_CPUS) to report the number of
+ * cpus to the guest but states that it has a special meaning for x86.
+ * Don't know yet if that can cause unintented side-effects. Use an own
+ * fwcfg item to be safe.
+ *
+ * QEMU comment:
+ * FW_CFG_MAX_CPUS is a bit confusing/problematic on x86:
+ *
+ * For machine types prior to 1.8, SeaBIOS needs FW_CFG_MAX_CPUS
+ * for building MPTable, ACPI MADT, ACPI CPU hotplug and ACPI SRAT
+ * table, that tables are based on xAPIC ID and QEMU<->SeaBIOS
+ * interface for CPU hotplug also uses APIC ID and not "CPU index".
+ * This means that FW_CFG_MAX_CPUS is not the "maximum number of
+ * CPUs", but the "limit to the APIC ID values SeaBIOS may see".
+ *
+ * So for compatibility reasons with old BIOSes we are stuck with
+ * "etc/max-cpus" actually being apic_id_limit
+ */
+ if (qemu_fwcfg_add_file("opt/bhyve/hw.ncpu", sizeof(guest_ncpus),
+ &guest_ncpus) != 0) {
+ fprintf(stderr, "could not add qemu fwcfg opt/bhyve/hw.ncpu");
+ exit(4);
+ }
+
+ if (e820_init(ctx) != 0) {
+ fprintf(stderr, "Unable to setup E820");
+ exit(4);
+ }
+
/*
* Exit if a device emulation finds an error in its initilization
*/
@@ -1380,8 +1416,17 @@
assert(error == 0);
}
- if (lpc_bootrom())
- fwctl_init();
+ struct qemu_fwcfg_item *fwcfg_item = e820_get_fwcfg_item();
+ if (fwcfg_item == NULL) {
+ fprintf(stderr, "invalid e820 table");
+ exit(4);
+ }
+ if (qemu_fwcfg_add_file("etc/e820", fwcfg_item->size,
+ fwcfg_item->data) != 0) {
+ fprintf(stderr, "could not add qemu fwcfg etc/e820");
+ exit(4);
+ }
+ free(fwcfg_item);
/*
* Change the proc title to include the VM name.
diff --git a/usr.sbin/bhyve/e820.h b/usr.sbin/bhyve/e820.h
new file mode 100644
--- /dev/null
+++ b/usr.sbin/bhyve/e820.h
@@ -0,0 +1,71 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2021 Beckhoff Automation GmbH & Co. KG
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR OR CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#pragma once
+
+#include <vmmapi.h>
+
+#include "qemu_fwcfg.h"
+
+#pragma pack(push, 1)
+
+enum e820_memory_type {
+ E820_TYPE_MEMORY = 1,
+ E820_TYPE_RESERVED = 2,
+ E820_TYPE_ACPI = 3,
+ E820_TYPE_NVS = 4
+};
+
+enum e820_allocation_strategy {
+ /* allocate any address */
+ E820_ALLOCATE_ANY,
+ /* allocate lowest address larger than address */
+ E820_ALLOCATE_LOWEST,
+ /* allocate highest address lower than address */
+ E820_ALLOCATE_HIGHEST,
+ /* allocate a specific address */
+ E820_ALLOCATE_SPECIFIC
+};
+
+struct e820_entry {
+ uint64_t base;
+ uint64_t length;
+ enum e820_memory_type type;
+};
+
+#pragma pack(pop)
+
+#define E820_ALIGNMENT_NONE 1
+
+uint64_t e820_alloc(uint64_t address, uint64_t length, uint64_t alignment,
+ enum e820_memory_type type, enum e820_allocation_strategy strategy);
+void e820_dump_table();
+struct qemu_fwcfg_item *e820_get_fwcfg_item();
+int e820_init(struct vmctx *ctx);
diff --git a/usr.sbin/bhyve/e820.c b/usr.sbin/bhyve/e820.c
new file mode 100644
--- /dev/null
+++ b/usr.sbin/bhyve/e820.c
@@ -0,0 +1,460 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2021 Beckhoff Automation GmbH & Co. KG
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR OR CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/queue.h>
+
+#include <machine/vmm.h>
+
+#include <assert.h>
+#include <err.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "e820.h"
+#include "qemu_fwcfg.h"
+
+/*
+ * E820 always uses 64 bit entries. Emulation code will use vm_paddr_t since it
+ * works on physical addresses. If vm_paddr_t is larger than uint64_t E820 can't
+ * hold all possible physical addresses and we can get into trouble.
+ */
+static_assert(sizeof(vm_paddr_t) <= sizeof(uint64_t),
+ "Unable to represent physical memory by E820 table");
+
+#define E820_FWCFG_FILE_NAME "etc/e820"
+
+#define KB (1024UL)
+#define MB (1024 * KB)
+#define GB (1024 * MB)
+
+/*
+ * Fix E820 memory holes:
+ * [ A0000, C0000) VGA
+ * [ C0000, 100000) ROM
+ */
+#define E820_VGA_MEM_BASE 0xA0000
+#define E820_VGA_MEM_END 0xC0000
+#define E820_ROM_MEM_BASE 0xC0000
+#define E820_ROM_MEM_END 0x100000
+
+struct e820_element {
+ TAILQ_ENTRY(e820_element) chain;
+ uint64_t base;
+ uint64_t end;
+ enum e820_memory_type type;
+};
+TAILQ_HEAD(e820_table, e820_element) e820_table = TAILQ_HEAD_INITIALIZER(
+ e820_table);
+
+static char *
+e820_get_type_name(enum e820_memory_type type)
+{
+ switch (type) {
+ case E820_TYPE_MEMORY:
+ return "RAM ";
+ case E820_TYPE_RESERVED:
+ return "Reserved";
+ case E820_TYPE_ACPI:
+ return "ACPI ";
+ case E820_TYPE_NVS:
+ return "NVS ";
+ default:
+ return "Unknown ";
+ }
+}
+
+void
+e820_dump_table()
+{
+ fprintf(stderr, "E820 map:\n\r");
+ uint64_t i = 0;
+ struct e820_element *element;
+ TAILQ_FOREACH (element, &e820_table, chain) {
+ fprintf(stderr, " (%4lu) [ %16lx, %16lx] %s\n\r", i,
+ element->base, element->end,
+ e820_get_type_name(element->type));
+ ++i;
+ }
+}
+
+struct qemu_fwcfg_item *
+e820_get_fwcfg_item()
+{
+ uint64_t count = 0;
+ struct e820_element *element;
+ TAILQ_FOREACH (element, &e820_table, chain) {
+ ++count;
+ }
+
+ struct qemu_fwcfg_item *fwcfg_item = malloc(
+ sizeof(struct qemu_fwcfg_item));
+ if (fwcfg_item == NULL) {
+ return (NULL);
+ }
+ fwcfg_item->size = count * sizeof(struct e820_entry);
+ fwcfg_item->data = malloc(fwcfg_item->size);
+ if (fwcfg_item->data == NULL) {
+ free(fwcfg_item);
+ return (NULL);
+ }
+ uint64_t i = 0;
+ struct e820_entry *entries = (struct e820_entry *)fwcfg_item->data;
+ TAILQ_FOREACH (element, &e820_table, chain) {
+ struct e820_entry *entry = &entries[i];
+ entry->base = element->base;
+ entry->length = element->end - element->base;
+ entry->type = element->type;
+ ++i;
+ }
+
+ return fwcfg_item;
+}
+
+int
+e820_add_entry(uint64_t base, uint64_t end, enum e820_memory_type type)
+{
+ if (end < base) {
+ return (-1);
+ }
+
+ struct e820_element *new_element = malloc(sizeof(struct e820_element));
+ if (new_element == NULL) {
+ return (-ENOMEM);
+ }
+
+ new_element->base = base;
+ new_element->end = end;
+ new_element->type = type;
+
+ /*
+ * E820 table should be always sorted in ascending order. Therefore,
+ * search for an element which end is larger than the base parameter.
+ */
+ struct e820_element *element;
+ TAILQ_FOREACH (element, &e820_table, chain) {
+ if (element->end > base) {
+ break;
+ }
+ }
+
+ /*
+ * System memory requires special handling.
+ */
+ if (type == E820_TYPE_MEMORY) {
+ /*
+ * base is larger than of any existing element. Add new system
+ * memory at the end of the table.
+ */
+ if (element == NULL) {
+ TAILQ_INSERT_TAIL(&e820_table, new_element, chain);
+ return (0);
+ }
+
+ /*
+ * System memory shouldn't overlap with any existing element.
+ */
+ if (end > element->base) {
+ return (-1);
+ }
+ TAILQ_INSERT_BEFORE(element, new_element, chain);
+ return (0);
+ }
+
+ if (element == NULL) {
+ /* No suitable element found */
+ return (-1);
+ }
+
+ /*
+ * Non system memory should be allocated inside system memory.
+ */
+ if (element->type != E820_TYPE_MEMORY) {
+ return (-1);
+ }
+ /*
+ * New element should fit into existing system memory element.
+ */
+ if (base < element->base || end > element->end) {
+ return (-1);
+ }
+
+ if (base == element->base) {
+ /*
+ * New element at system memory base boundary. Add new
+ * element before current and adjust the base of the old
+ * element.
+ *
+ * Old table:
+ * [ 0x1000, 0x4000] RAM <-- element
+ * New table:
+ * [ 0x1000, 0x2000] Reserved
+ * [ 0x2000, 0x4000] RAM <-- element
+ */
+ TAILQ_INSERT_BEFORE(element, new_element, chain);
+ element->base = end;
+ } else if (end == element->end) {
+ /*
+ * New element at system memory end boundary. Add new
+ * element after current and adjust the end of the
+ * current element.
+ *
+ * Old table:
+ * [ 0x1000, 0x4000] RAM <-- element
+ * New table:
+ * [ 0x1000, 0x3000] RAM <-- element
+ * [ 0x3000, 0x4000] Reserved
+ */
+ TAILQ_INSERT_AFTER(&e820_table, element, new_element, chain);
+ element->end = base;
+ } else {
+ /*
+ * New element inside system memory entry. Split it by
+ * adding a system memory element and the new element
+ * before current.
+ *
+ * Old table:
+ * [ 0x1000, 0x4000] RAM <-- element
+ * New table:
+ * [ 0x1000, 0x2000] RAM
+ * [ 0x2000, 0x3000] Reserved
+ * [ 0x3000, 0x4000] RAM <-- element
+ */
+ struct e820_element *ram_element = malloc(
+ sizeof(struct e820_element));
+ if (ram_element == NULL) {
+ return (-ENOMEM);
+ }
+ ram_element->base = element->base;
+ ram_element->end = base;
+ ram_element->type = E820_TYPE_MEMORY;
+ TAILQ_INSERT_BEFORE(element, ram_element, chain);
+ TAILQ_INSERT_BEFORE(element, new_element, chain);
+ element->base = end;
+ }
+
+ return (0);
+}
+
+int
+e820_add_memory_hole(uint64_t base, uint64_t end)
+{
+ if (end < base) {
+ return (-1);
+ }
+
+ /*
+ * E820 table should be always sorted in ascending order. Therefore,
+ * search for an element which end is larger than the base parameter.
+ */
+ struct e820_element *element;
+ TAILQ_FOREACH (element, &e820_table, chain) {
+ if (element->end > base) {
+ break;
+ }
+ }
+
+ if (element == NULL || end <= element->base) {
+ /* Nothing to do. Hole already exists */
+ return (0);
+ }
+
+ if (element->type != E820_TYPE_MEMORY) {
+ /* Memory holes are only allowed in system memory */
+ return (-1);
+ }
+
+ if (base == element->base) {
+ /*
+ * New hole at system memory base boundary.
+ *
+ * Old table:
+ * [ 0x1000, 0x4000] RAM
+ * New table:
+ * [ 0x2000, 0x4000] RAM
+ */
+ element->base = end;
+
+ } else if (end == element->end) {
+ /*
+ * New hole at system memory end boundary.
+ *
+ * Old table:
+ * [ 0x1000, 0x4000] RAM
+ * New table:
+ * [ 0x1000, 0x3000] RAM
+ */
+ element->end = base;
+
+ } else {
+ /*
+ * New hole inside system memory entry. Split the system memory.
+ *
+ * Old table:
+ * [ 0x1000, 0x4000] RAM <-- element
+ * New table:
+ * [ 0x1000, 0x2000] RAM
+ * [ 0x3000, 0x4000] RAM <-- element
+ */
+ struct e820_element *ram_element = malloc(
+ sizeof(struct e820_element));
+ if (ram_element == NULL) {
+ return (-ENOMEM);
+ }
+ ram_element->base = element->base;
+ ram_element->end = base;
+ ram_element->type = E820_TYPE_MEMORY;
+ TAILQ_INSERT_BEFORE(element, ram_element, chain);
+ element->base = end;
+ }
+
+ return (0);
+}
+
+uint64_t
+e820_alloc(uint64_t address, uint64_t length, uint64_t alignment,
+ enum e820_memory_type type, enum e820_allocation_strategy strategy)
+{
+ /* address should be aligned */
+ if (!powerof2(alignment) || (address & (alignment - 1)) != 0) {
+ return 0;
+ }
+
+ struct e820_element *element;
+ uint64_t end;
+ uint64_t base;
+ switch (strategy) {
+ case E820_ALLOCATE_ANY:
+ /*
+ * Allocate any address. Therefore, ignore the address parameter
+ * and reuse the code path for allocating the lowest address.
+ */
+ address = 0;
+ /* fallthrough */
+ case E820_ALLOCATE_LOWEST:
+ TAILQ_FOREACH (element, &e820_table, chain) {
+ end = element->end;
+ base = roundup2(element->base, alignment);
+ if (address != 0) {
+ base = MAX(base, address);
+ }
+
+ if (element->type != E820_TYPE_MEMORY || end < base ||
+ end - base < length || base == 0) {
+ continue;
+ }
+
+ if (e820_add_entry(base, base + length, type) != 0) {
+ return 0;
+ }
+
+ return base;
+ }
+ break;
+ case E820_ALLOCATE_HIGHEST:
+ TAILQ_FOREACH_REVERSE (element, &e820_table, e820_table,
+ chain) {
+ end = element->end;
+ base = roundup2(element->base, alignment);
+ if (address != 0) {
+ end = MIN(end, address);
+ }
+
+ if (element->type != E820_TYPE_MEMORY || end < base ||
+ end - base < length || end - length == 0) {
+ continue;
+ }
+ base = rounddown2(end - length, alignment);
+
+ if (e820_add_entry(base, base + length, type) != 0) {
+ return 0;
+ }
+
+ return base;
+ }
+ break;
+ case E820_ALLOCATE_SPECIFIC:
+ base = address;
+ if (e820_add_entry(base, base + length, type) != 0) {
+ return 0;
+ }
+
+ return address;
+ }
+
+ return 0;
+}
+
+int
+e820_init(struct vmctx *ctx)
+{
+ int error;
+
+ TAILQ_INIT(&e820_table);
+
+ /* add memory below 4 GB to E820 table */
+ const uint64_t lowmem_length = vm_get_lowmem_size(ctx);
+ error = e820_add_entry(0, lowmem_length, E820_TYPE_MEMORY);
+ if (error) {
+ warnx("%s: Could not add lowmem", __func__);
+ return (error);
+ }
+
+ /* add memory above 4 GB to E820 table */
+ const uint64_t highmem_length = vm_get_highmem_size(ctx);
+ if (highmem_length != 0) {
+ error = e820_add_entry(4 * GB, 4 * GB + highmem_length,
+ E820_TYPE_MEMORY);
+ if (error) {
+ warnx("%s: Could not add highmem", __func__);
+ return (error);
+ }
+ }
+
+ /* add memory holes to E820 table */
+ error = e820_add_memory_hole(E820_VGA_MEM_BASE, E820_VGA_MEM_END);
+ if (error) {
+ warnx("%s: Could not add VGA memory", __func__);
+ return (error);
+ }
+
+ error = e820_add_memory_hole(E820_ROM_MEM_BASE, E820_ROM_MEM_END);
+ if (error) {
+ warnx("%s: Could not add ROM area", __func__);
+ return (error);
+ }
+
+ return (0);
+}
diff --git a/usr.sbin/bhyve/fwctl.c b/usr.sbin/bhyve/fwctl.c
deleted file mode 100644
--- a/usr.sbin/bhyve/fwctl.c
+++ /dev/null
@@ -1,552 +0,0 @@
-/*-
- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
- *
- * Copyright (c) 2015 Peter Grehan <grehan@freebsd.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-/*
- * Guest firmware interface. Uses i/o ports x510/x511 as Qemu does,
- * but with a request/response messaging protocol.
- */
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <sys/param.h>
-#include <sys/types.h>
-#include <sys/errno.h>
-#include <sys/uio.h>
-
-#include <assert.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "bhyverun.h"
-#include "inout.h"
-#include "fwctl.h"
-
-/*
- * Messaging protocol base operations
- */
-#define OP_NULL 1
-#define OP_ECHO 2
-#define OP_GET 3
-#define OP_GET_LEN 4
-#define OP_SET 5
-#define OP_MAX OP_SET
-
-/* I/O ports */
-#define FWCTL_OUT 0x510
-#define FWCTL_IN 0x511
-
-/*
- * Back-end state-machine
- */
-enum state {
- DORMANT,
- IDENT_WAIT,
- IDENT_SEND,
- REQ,
- RESP
-} be_state = DORMANT;
-
-static uint8_t sig[] = { 'B', 'H', 'Y', 'V' };
-static u_int ident_idx;
-
-struct op_info {
- int op;
- int (*op_start)(uint32_t len);
- void (*op_data)(uint32_t data, uint32_t len);
- int (*op_result)(struct iovec **data);
- void (*op_done)(struct iovec *data);
-};
-static struct op_info *ops[OP_MAX+1];
-
-/* Return 0-padded uint32_t */
-static uint32_t
-fwctl_send_rest(uint32_t *data, size_t len)
-{
- union {
- uint8_t c[4];
- uint32_t w;
- } u;
- uint8_t *cdata;
- int i;
-
- cdata = (uint8_t *) data;
- u.w = 0;
-
- for (i = 0, u.w = 0; i < len; i++)
- u.c[i] = *cdata++;
-
- return (u.w);
-}
-
-/*
- * error op dummy proto - drop all data sent and return an error
-*/
-static int errop_code;
-
-static void
-errop_set(int err)
-{
-
- errop_code = err;
-}
-
-static int
-errop_start(uint32_t len)
-{
- errop_code = ENOENT;
-
- /* accept any length */
- return (errop_code);
-}
-
-static void
-errop_data(uint32_t data, uint32_t len)
-{
-
- /* ignore */
-}
-
-static int
-errop_result(struct iovec **data)
-{
-
- /* no data to send back; always successful */
- *data = NULL;
- return (errop_code);
-}
-
-static void
-errop_done(struct iovec *data)
-{
-
- /* assert data is NULL */
-}
-
-static struct op_info errop_info = {
- .op_start = errop_start,
- .op_data = errop_data,
- .op_result = errop_result,
- .op_done = errop_done
-};
-
-/* OID search */
-SET_DECLARE(ctl_set, struct ctl);
-
-CTL_NODE("hw.ncpu", &guest_ncpus, sizeof(guest_ncpus));
-
-static struct ctl *
-ctl_locate(const char *str, int maxlen)
-{
- struct ctl *cp, **cpp;
-
- SET_FOREACH(cpp, ctl_set) {
- cp = *cpp;
- if (!strncmp(str, cp->c_oid, maxlen))
- return (cp);
- }
-
- return (NULL);
-}
-
-/* uefi-sysctl get-len */
-#define FGET_STRSZ 80
-static struct iovec fget_biov[2];
-static char fget_str[FGET_STRSZ];
-static struct {
- size_t f_sz;
- uint32_t f_data[1024];
-} fget_buf;
-static int fget_cnt;
-static size_t fget_size;
-
-static int
-fget_start(uint32_t len)
-{
-
- if (len > FGET_STRSZ)
- return(E2BIG);
-
- fget_cnt = 0;
-
- return (0);
-}
-
-static void
-fget_data(uint32_t data, uint32_t len)
-{
-
- *((uint32_t *) &fget_str[fget_cnt]) = data;
- fget_cnt += sizeof(uint32_t);
-}
-
-static int
-fget_result(struct iovec **data, int val)
-{
- struct ctl *cp;
- int err;
-
- err = 0;
-
- /* Locate the OID */
- cp = ctl_locate(fget_str, fget_cnt);
- if (cp == NULL) {
- *data = NULL;
- err = ENOENT;
- } else {
- if (val) {
- /* For now, copy the len/data into a buffer */
- memset(&fget_buf, 0, sizeof(fget_buf));
- fget_buf.f_sz = cp->c_len;
- memcpy(fget_buf.f_data, cp->c_data, cp->c_len);
- fget_biov[0].iov_base = (char *)&fget_buf;
- fget_biov[0].iov_len = sizeof(fget_buf.f_sz) +
- cp->c_len;
- } else {
- fget_size = cp->c_len;
- fget_biov[0].iov_base = (char *)&fget_size;
- fget_biov[0].iov_len = sizeof(fget_size);
- }
-
- fget_biov[1].iov_base = NULL;
- fget_biov[1].iov_len = 0;
- *data = fget_biov;
- }
-
- return (err);
-}
-
-static void
-fget_done(struct iovec *data)
-{
-
- /* nothing needs to be freed */
-}
-
-static int
-fget_len_result(struct iovec **data)
-{
- return (fget_result(data, 0));
-}
-
-static int
-fget_val_result(struct iovec **data)
-{
- return (fget_result(data, 1));
-}
-
-static struct op_info fgetlen_info = {
- .op_start = fget_start,
- .op_data = fget_data,
- .op_result = fget_len_result,
- .op_done = fget_done
-};
-
-static struct op_info fgetval_info = {
- .op_start = fget_start,
- .op_data = fget_data,
- .op_result = fget_val_result,
- .op_done = fget_done
-};
-
-static struct req_info {
- int req_error;
- u_int req_count;
- uint32_t req_size;
- uint32_t req_type;
- uint32_t req_txid;
- struct op_info *req_op;
- int resp_error;
- int resp_count;
- size_t resp_size;
- size_t resp_off;
- struct iovec *resp_biov;
-} rinfo;
-
-static void
-fwctl_response_done(void)
-{
-
- (*rinfo.req_op->op_done)(rinfo.resp_biov);
-
- /* reinit the req data struct */
- memset(&rinfo, 0, sizeof(rinfo));
-}
-
-static void
-fwctl_request_done(void)
-{
-
- rinfo.resp_error = (*rinfo.req_op->op_result)(&rinfo.resp_biov);
-
- /* XXX only a single vector supported at the moment */
- rinfo.resp_off = 0;
- if (rinfo.resp_biov == NULL) {
- rinfo.resp_size = 0;
- } else {
- rinfo.resp_size = rinfo.resp_biov[0].iov_len;
- }
-}
-
-static int
-fwctl_request_start(void)
-{
- int err;
-
- /* Data size doesn't include header */
- rinfo.req_size -= 12;
-
- rinfo.req_op = &errop_info;
- if (rinfo.req_type <= OP_MAX && ops[rinfo.req_type] != NULL)
- rinfo.req_op = ops[rinfo.req_type];
-
- err = (*rinfo.req_op->op_start)(rinfo.req_size);
-
- if (err) {
- errop_set(err);
- rinfo.req_op = &errop_info;
- }
-
- /* Catch case of zero-length message here */
- if (rinfo.req_size == 0) {
- fwctl_request_done();
- return (1);
- }
-
- return (0);
-}
-
-static int
-fwctl_request_data(uint32_t value)
-{
-
- /* Make sure remaining size is >= 0 */
- if (rinfo.req_size <= sizeof(uint32_t))
- rinfo.req_size = 0;
- else
- rinfo.req_size -= sizeof(uint32_t);
-
- (*rinfo.req_op->op_data)(value, rinfo.req_size);
-
- if (rinfo.req_size < sizeof(uint32_t)) {
- fwctl_request_done();
- return (1);
- }
-
- return (0);
-}
-
-static int
-fwctl_request(uint32_t value)
-{
-
- int ret;
-
- ret = 0;
-
- switch (rinfo.req_count) {
- case 0:
- /* Verify size */
- if (value < 12) {
- printf("msg size error");
- exit(4);
- }
- rinfo.req_size = value;
- rinfo.req_count = 1;
- break;
- case 1:
- rinfo.req_type = value;
- rinfo.req_count++;
- break;
- case 2:
- rinfo.req_txid = value;
- rinfo.req_count++;
- ret = fwctl_request_start();
- break;
- default:
- ret = fwctl_request_data(value);
- break;
- }
-
- return (ret);
-}
-
-static int
-fwctl_response(uint32_t *retval)
-{
- uint32_t *dp;
- ssize_t remlen;
-
- switch(rinfo.resp_count) {
- case 0:
- /* 4 x u32 header len + data */
- *retval = 4*sizeof(uint32_t) +
- roundup(rinfo.resp_size, sizeof(uint32_t));
- rinfo.resp_count++;
- break;
- case 1:
- *retval = rinfo.req_type;
- rinfo.resp_count++;
- break;
- case 2:
- *retval = rinfo.req_txid;
- rinfo.resp_count++;
- break;
- case 3:
- *retval = rinfo.resp_error;
- rinfo.resp_count++;
- break;
- default:
- remlen = rinfo.resp_size - rinfo.resp_off;
- dp = (uint32_t *)
- ((uint8_t *)rinfo.resp_biov->iov_base + rinfo.resp_off);
- if (remlen >= sizeof(uint32_t)) {
- *retval = *dp;
- } else if (remlen > 0) {
- *retval = fwctl_send_rest(dp, remlen);
- }
- rinfo.resp_off += sizeof(uint32_t);
- break;
- }
-
- if (rinfo.resp_count > 3 &&
- rinfo.resp_off >= rinfo.resp_size) {
- fwctl_response_done();
- return (1);
- }
-
- return (0);
-}
-
-
-/*
- * i/o port handling.
- */
-static uint8_t
-fwctl_inb(void)
-{
- uint8_t retval;
-
- retval = 0xff;
-
- switch (be_state) {
- case IDENT_SEND:
- retval = sig[ident_idx++];
- if (ident_idx >= sizeof(sig))
- be_state = REQ;
- break;
- default:
- break;
- }
-
- return (retval);
-}
-
-static void
-fwctl_outw(uint16_t val)
-{
- switch (be_state) {
- case IDENT_WAIT:
- if (val == 0) {
- be_state = IDENT_SEND;
- ident_idx = 0;
- }
- break;
- default:
- /* ignore */
- break;
- }
-}
-
-static uint32_t
-fwctl_inl(void)
-{
- uint32_t retval;
-
- switch (be_state) {
- case RESP:
- if (fwctl_response(&retval))
- be_state = REQ;
- break;
- default:
- retval = 0xffffffff;
- break;
- }
-
- return (retval);
-}
-
-static void
-fwctl_outl(uint32_t val)
-{
-
- switch (be_state) {
- case REQ:
- if (fwctl_request(val))
- be_state = RESP;
- default:
- break;
- }
-
-}
-
-static int
-fwctl_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
- uint32_t *eax, void *arg)
-{
-
- if (in) {
- if (bytes == 1)
- *eax = fwctl_inb();
- else if (bytes == 4)
- *eax = fwctl_inl();
- else
- *eax = 0xffff;
- } else {
- if (bytes == 2)
- fwctl_outw(*eax);
- else if (bytes == 4)
- fwctl_outl(*eax);
- }
-
- return (0);
-}
-INOUT_PORT(fwctl_wreg, FWCTL_OUT, IOPORT_F_INOUT, fwctl_handler);
-INOUT_PORT(fwctl_rreg, FWCTL_IN, IOPORT_F_IN, fwctl_handler);
-
-void
-fwctl_init(void)
-{
-
- ops[OP_GET_LEN] = &fgetlen_info;
- ops[OP_GET] = &fgetval_info;
-
- be_state = IDENT_WAIT;
-}
diff --git a/usr.sbin/bhyve/pci_emul.h b/usr.sbin/bhyve/pci_emul.h
--- a/usr.sbin/bhyve/pci_emul.h
+++ b/usr.sbin/bhyve/pci_emul.h
@@ -41,6 +41,8 @@
#include <assert.h>
#define PCI_BARMAX PCIR_MAX_BAR_0 /* BAR registers in a Type 0 header */
+#define PCI_BARMAX_WITH_ROM (PCI_BARMAX + 1)
+#define PCI_ROM_IDX (PCI_BARMAX + 1)
struct vmctx;
struct pci_devinst;
@@ -88,13 +90,15 @@
PCIBAR_IO,
PCIBAR_MEM32,
PCIBAR_MEM64,
- PCIBAR_MEMHI64
+ PCIBAR_MEMHI64,
+ PCIBAR_ROM,
};
struct pcibar {
enum pcibar_type type; /* io or memory */
uint64_t size;
uint64_t addr;
+ uint8_t lobits;
};
#define PI_NAMESZ 40
@@ -160,7 +164,9 @@
void *pi_arg; /* devemu-private data */
u_char pi_cfgdata[PCI_REGMAX + 1];
- struct pcibar pi_bar[PCI_BARMAX + 1];
+ /* ROM is handled like a BAR */
+ struct pcibar pi_bar[PCI_BARMAX_WITH_ROM + 1];
+ uint64_t pi_romoffset;
};
struct msicap {
@@ -224,6 +230,7 @@
void pci_callback(void);
int pci_emul_alloc_bar(struct pci_devinst *pdi, int idx,
enum pcibar_type type, uint64_t size);
+int pci_emul_alloc_rom(struct pci_devinst *pdi, uint64_t size, uint64_t *addr);
int pci_emul_add_msicap(struct pci_devinst *pi, int msgnum);
int pci_emul_add_pciecap(struct pci_devinst *pi, int pcie_device_type);
void pci_emul_capwrite(struct pci_devinst *pi, int offset, int bytes,
diff --git a/usr.sbin/bhyve/pci_emul.c b/usr.sbin/bhyve/pci_emul.c
--- a/usr.sbin/bhyve/pci_emul.c
+++ b/usr.sbin/bhyve/pci_emul.c
@@ -33,12 +33,15 @@
#include <sys/param.h>
#include <sys/linker_set.h>
+#include <sys/mman.h>
+
#include <vm/vm.h>
#include <vm/vm_param.h>
#include <vm/pmap.h>
#include <ctype.h>
#include <errno.h>
+#include <err.h>
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
@@ -72,6 +75,8 @@
#define MAXSLOTS (PCI_SLOTMAX + 1)
#define MAXFUNCS (PCI_FUNCMAX + 1)
+#define GB (1024 * 1024 * 1024UL)
+
struct funcinfo {
char *fi_name;
char *fi_param;
@@ -101,18 +106,36 @@
SET_DECLARE(pci_devemu_set, struct pci_devemu);
static uint64_t pci_emul_iobase;
+static uint64_t pci_emul_iolim;
+static uint64_t pci_emul_rombase;
+static uint64_t pci_emul_romoffset;
+static uint64_t pci_emul_romlim;
static uint64_t pci_emul_membase32;
+static uint64_t pci_emul_memlim32;
static uint64_t pci_emul_membase64;
static uint64_t pci_emul_memlim64;
+struct pci_bar_allocation {
+ TAILQ_ENTRY(pci_bar_allocation) pci_bar_chain;
+ struct pci_devinst *pdi;
+ int idx;
+ enum pcibar_type type;
+ uint64_t size;
+};
+TAILQ_HEAD(pci_bar_list, pci_bar_allocation) pci_bars = TAILQ_HEAD_INITIALIZER(
+ pci_bars);
+
#define PCI_EMUL_IOBASE 0x2000
#define PCI_EMUL_IOLIMIT 0x10000
+#define PCI_EMUL_ROMSIZE 0x10000000
+
#define PCI_EMUL_ECFG_BASE 0xE0000000 /* 3.5GB */
#define PCI_EMUL_ECFG_SIZE (MAXBUSES * 1024 * 1024) /* 1MB per bus */
SYSRES_MEM(PCI_EMUL_ECFG_BASE, PCI_EMUL_ECFG_SIZE);
#define PCI_EMUL_MEMLIMIT32 PCI_EMUL_ECFG_BASE
+#define PCI_EMUL_MEMSIZE64 (32 * GB)
static struct pci_devemu *pci_emul_finddev(char *name);
static void pci_lintr_route(struct pci_devinst *pi);
@@ -502,6 +525,12 @@
(*pe->pe_baraddr)(pi->pi_vmctx, pi, idx, registration,
pi->pi_bar[idx].addr);
break;
+ case PCIBAR_ROM:
+ error = 0;
+ if (pe->pe_baraddr != NULL)
+ (*pe->pe_baraddr)(pi->pi_vmctx, pi, idx, registration,
+ pi->pi_bar[idx].addr);
+ break;
default:
error = EINVAL;
break;
@@ -523,6 +552,13 @@
modify_bar_registration(pi, idx, 1);
}
+/* Is the ROM enabled for the emulated pci device? */
+static int
+romen(struct pci_devinst *pi)
+{
+ return (pi->pi_bar[PCI_ROM_IDX].lobits & PCIM_BIOS_ENABLE) == PCIM_BIOS_ENABLE;
+}
+
/* Are we decoding i/o port accesses for the emulated pci device? */
static int
porten(struct pci_devinst *pi)
@@ -589,11 +625,11 @@
pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, enum pcibar_type type,
uint64_t size)
{
- int error;
- uint64_t *baseptr, limit, addr, mask, lobits, bar;
- uint16_t cmd, enbit;
-
- assert(idx >= 0 && idx <= PCI_BARMAX);
+ if ((type != PCIBAR_ROM) && (idx < 0 || idx > PCI_BARMAX)) {
+ errx(4, "Illegal BAR idx");
+ } else if ((type == PCIBAR_ROM) && (idx != PCI_ROM_IDX)) {
+ errx(4, "Illegal ROM idx");
+ }
if ((size & (size - 1)) != 0)
size = 1UL << flsl(size); /* round up to a power of 2 */
@@ -602,11 +638,89 @@
if (type == PCIBAR_IO) {
if (size < 4)
size = 4;
+ } else if (type == PCIBAR_ROM) {
+ if (size < ~PCIM_BIOS_ADDR_MASK + 1)
+ size = ~PCIM_BIOS_ADDR_MASK + 1;
} else {
if (size < 16)
size = 16;
}
+ /* allocate new bar */
+ struct pci_bar_allocation *new_bar = malloc(sizeof(struct pci_bar_allocation));
+ memset(new_bar, 0, sizeof(struct pci_bar_allocation));
+ new_bar->pdi = pdi;
+ new_bar->idx = idx;
+ new_bar->type = type;
+ new_bar->size = size;
+
+ /* get bar position */
+ struct pci_bar_allocation *bar = NULL;
+ TAILQ_FOREACH (bar, &pci_bars, pci_bar_chain) {
+ if (bar->size < size) {
+ break;
+ }
+ }
+
+ /* insert bar into queue */
+ if (bar == NULL) {
+ TAILQ_INSERT_TAIL(&pci_bars, new_bar, pci_bar_chain);
+ } else {
+ TAILQ_INSERT_BEFORE(bar, new_bar, pci_bar_chain);
+ }
+
+ return (0);
+}
+
+int
+pci_emul_alloc_rom(struct pci_devinst *pdi, uint64_t size, uint64_t *addr)
+{
+ /* allocate ROM-Space once */
+ if (pci_emul_rombase == 0) {
+ pci_emul_rombase = (uint64_t)vm_create_devmem(pdi->pi_vmctx, VM_PCIROM,
+ "pcirom", PCI_EMUL_ROMSIZE);
+ if ((void *)pci_emul_rombase == MAP_FAILED)
+ return -ENOMEM;
+ pci_emul_romlim = pci_emul_rombase + PCI_EMUL_ROMSIZE;
+ pci_emul_romoffset = 0;
+ }
+
+ /* round up to a power of 2 */
+ uint64_t rom_size = 1UL << flsl(size);
+ /* ROM size should be greater than 2 KB */
+ rom_size = MAX(rom_size, ~PCIM_BIOS_ADDR_MASK + 1);
+
+ /* check if ROM fits into ROM-Space */
+ if (pci_emul_romoffset + rom_size > PCI_EMUL_ROMSIZE)
+ return -E2BIG;
+
+ /* allocate ROM BAR */
+ const int error = pci_emul_alloc_bar(pdi, PCI_ROM_IDX, PCIBAR_ROM, rom_size);
+ if (error)
+ return error;
+
+ /* return address */
+ *addr = pci_emul_rombase + pci_emul_romoffset;
+ /* save offset into ROM Space */
+ pdi->pi_romoffset = pci_emul_romoffset;
+ /* increase offset for next ROM */
+ pci_emul_romoffset += rom_size;
+
+ return (0);
+}
+
+static int
+pci_emul_assign_bar(struct pci_bar_allocation *pci_bar)
+{
+ struct pci_devinst *pdi = pci_bar->pdi;
+ int idx = pci_bar->idx;
+ enum pcibar_type type = pci_bar->type;
+ uint64_t size = pci_bar->size;
+
+ int error;
+ uint64_t *baseptr, limit, addr, mask, lobits, bar;
+ uint16_t cmd, enbit;
+
switch (type) {
case PCIBAR_NONE:
baseptr = NULL;
@@ -614,7 +728,7 @@
break;
case PCIBAR_IO:
baseptr = &pci_emul_iobase;
- limit = PCI_EMUL_IOLIMIT;
+ limit = pci_emul_iolim;
mask = PCIM_BAR_IO_BASE;
lobits = PCIM_BAR_IO_SPACE;
enbit = PCIM_CMD_PORTEN;
@@ -633,21 +747,33 @@
mask = PCIM_BAR_MEM_BASE;
lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 |
PCIM_BAR_MEM_PREFETCH;
- } else {
- baseptr = &pci_emul_membase32;
- limit = PCI_EMUL_MEMLIMIT32;
- mask = PCIM_BAR_MEM_BASE;
- lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64;
+ enbit = PCIM_CMD_MEMEN;
+ break;
}
- enbit = PCIM_CMD_MEMEN;
- break;
+ /*
+ * Use 32 bit BARs for small requests:
+ * Fallthrough into MEM32 case
+ */
+ type = PCIBAR_MEM32;
+ pdi->pi_bar[idx + 1].type = PCIBAR_NONE;
+ /* clear 64-bit flag */
+ pdi->pi_bar[idx].lobits &= ~PCIM_BAR_MEM_64;
+ /* [fallthrough] */
case PCIBAR_MEM32:
baseptr = &pci_emul_membase32;
- limit = PCI_EMUL_MEMLIMIT32;
+ limit = pci_emul_memlim32;
mask = PCIM_BAR_MEM_BASE;
lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32;
enbit = PCIM_CMD_MEMEN;
break;
+ case PCIBAR_ROM:
+ /* do not claim memory for ROM. OVMF will do it for us. */
+ baseptr = NULL;
+ limit = 0;
+ mask = PCIM_BIOS_ADDR_MASK;
+ lobits = 0;
+ enbit = PCIM_CMD_MEMEN;
+ break;
default:
printf("pci_emul_alloc_base: invalid bar type %d\n", type);
assert(0);
@@ -662,6 +788,13 @@
pdi->pi_bar[idx].type = type;
pdi->pi_bar[idx].addr = addr;
pdi->pi_bar[idx].size = size;
+ /* passthru devices are using same lobits as physical device
+ * they set this property
+ */
+ if (pdi->pi_bar[idx].lobits != 0)
+ lobits = pdi->pi_bar[idx].lobits;
+ else
+ pdi->pi_bar[idx].lobits = lobits;
/* Initialize the BAR register in config space */
bar = (addr & mask) | lobits;
@@ -676,7 +809,9 @@
cmd = pci_get_cfgdata16(pdi, PCIR_COMMAND);
if ((cmd & enbit) != enbit)
pci_set_cfgdata16(pdi, PCIR_COMMAND, cmd | enbit);
- register_bar(pdi, idx);
+ if (type != PCIBAR_ROM) {
+ register_bar(pdi, idx);
+ }
return (0);
}
@@ -1098,25 +1233,17 @@
struct slotinfo *si;
struct funcinfo *fi;
size_t lowmem;
- uint64_t cpu_maxphysaddr, pci_emul_memresv64;
- u_int regs[4];
int bus, slot, func, error;
pci_emul_iobase = PCI_EMUL_IOBASE;
+ pci_emul_iolim = PCI_EMUL_IOLIMIT;
+
pci_emul_membase32 = vm_get_lowmem_limit(ctx);
+ pci_emul_memlim32 = PCI_EMUL_MEMLIMIT32;
- do_cpuid(0x80000008, regs);
- cpu_maxphysaddr = 1ULL << (regs[0] & 0xff);
- if (cpu_maxphysaddr > VM_MAXUSER_ADDRESS_LA48)
- cpu_maxphysaddr = VM_MAXUSER_ADDRESS_LA48;
- pci_emul_memresv64 = cpu_maxphysaddr / 4;
- /*
- * Max power of 2 that is less then
- * cpu_maxphysaddr - pci_emul_memresv64.
- */
- pci_emul_membase64 = 1ULL << (flsl(cpu_maxphysaddr -
- pci_emul_memresv64) - 1);
- pci_emul_memlim64 = cpu_maxphysaddr;
+ pci_emul_membase64 = 4 * GB + vm_get_highmem_size(ctx);
+ pci_emul_membase64 = roundup2(pci_emul_membase64, PCI_EMUL_MEMSIZE64);
+ pci_emul_memlim64 = pci_emul_membase64 + PCI_EMUL_MEMSIZE64;
for (bus = 0; bus < MAXBUSES; bus++) {
if ((bi = pci_businfo[bus]) == NULL)
@@ -1129,6 +1256,7 @@
bi->membase32 = pci_emul_membase32;
bi->membase64 = pci_emul_membase64;
+ /* first run: init devices */
for (slot = 0; slot < MAXSLOTS; slot++) {
si = &bi->slotinfo[slot];
for (func = 0; func < MAXFUNCS; func++) {
@@ -1144,6 +1272,18 @@
}
}
+ /* second run: assign BARs */
+ struct pci_bar_allocation *bar;
+ TAILQ_FOREACH (bar, &pci_bars, pci_bar_chain) {
+ pci_emul_assign_bar(bar);
+ }
+ /* free BARs */
+ while (!TAILQ_EMPTY(&pci_bars)) {
+ bar = TAILQ_FIRST(&pci_bars);
+ TAILQ_REMOVE(&pci_bars, bar, pci_bar_chain);
+ free(bar);
+ }
+
/*
* Add some slop to the I/O and memory resources decoded by
* this bus to give a guest some flexibility if it wants to
@@ -1717,7 +1857,7 @@
* If the MMIO or I/O address space decoding has changed then
* register/unregister all BARs that decode that address space.
*/
- for (i = 0; i <= PCI_BARMAX; i++) {
+ for (i = 0; i <= PCI_BARMAX_WITH_ROM; i++) {
switch (pi->pi_bar[i].type) {
case PCIBAR_NONE:
case PCIBAR_MEMHI64:
@@ -1731,6 +1871,11 @@
unregister_bar(pi, i);
}
break;
+ case PCIBAR_ROM:
+ /* skip (un-)register of ROM if it disabled */
+ if (pi->pi_bar[i].lobits == 0)
+ break;
+ /* fallthrough */
case PCIBAR_MEM32:
case PCIBAR_MEM64:
/* MMIO address space decoding changed? */
@@ -1851,16 +1996,21 @@
return;
/*
- * Special handling for write to BAR registers
+ * Special handling for write to BAR and ROM registers
*/
- if (coff >= PCIR_BAR(0) && coff < PCIR_BAR(PCI_BARMAX + 1)) {
+ if ((coff >= PCIR_BAR(0) && coff <= PCIR_BAR(PCI_BARMAX)) ||
+ (coff >= PCIR_BIOS && coff < PCIR_BIOS + 4)) {
/*
* Ignore writes to BAR registers that are not
* 4-byte aligned.
*/
if (bytes != 4 || (coff & 0x3) != 0)
return;
- idx = (coff - PCIR_BAR(0)) / 4;
+ if (coff != PCIR_BIOS) {
+ idx = (coff - PCIR_BAR(0)) / 4;
+ } else {
+ idx = PCI_ROM_IDX;
+ }
mask = ~(pi->pi_bar[idx].size - 1);
switch (pi->pi_bar[idx].type) {
case PCIBAR_NONE:
@@ -1869,7 +2019,7 @@
case PCIBAR_IO:
addr = *eax & mask;
addr &= 0xffff;
- bar = addr | PCIM_BAR_IO_SPACE;
+ bar = addr | pi->pi_bar[idx].lobits;
/*
* Register the new BAR value for interception
*/
@@ -1880,7 +2030,7 @@
break;
case PCIBAR_MEM32:
addr = bar = *eax & mask;
- bar |= PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32;
+ bar |= pi->pi_bar[idx].lobits;
if (addr != pi->pi_bar[idx].addr) {
update_bar_address(pi, addr, idx,
PCIBAR_MEM32);
@@ -1888,8 +2038,7 @@
break;
case PCIBAR_MEM64:
addr = bar = *eax & mask;
- bar |= PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 |
- PCIM_BAR_MEM_PREFETCH;
+ bar |= pi->pi_bar[idx].lobits;
if (addr != (uint32_t)pi->pi_bar[idx].addr) {
update_bar_address(pi, addr, idx,
PCIBAR_MEM64);
@@ -1904,6 +2053,20 @@
PCIBAR_MEMHI64);
}
break;
+ case PCIBAR_ROM:
+ addr = bar = *eax & mask;
+ if (memen(pi) && romen(pi)) {
+ unregister_bar(pi, idx);
+ }
+ pi->pi_bar[idx].addr = addr;
+ pi->pi_bar[idx].lobits = *eax &
+ PCIM_BIOS_ENABLE;
+ /* romen could have changed it value */
+ if (memen(pi) && romen(pi)) {
+ register_bar(pi, idx);
+ }
+ bar |= pi->pi_bar[idx].lobits;
+ break;
default:
assert(0);
}
@@ -1941,7 +2104,7 @@
} else {
x = *eax;
cfgenable = (x & CONF1_ENABLE) == CONF1_ENABLE;
- cfgoff = x & PCI_REGMAX;
+ cfgoff = (x & PCI_REGMAX) & ~0x03;
cfgfunc = (x >> 8) & PCI_FUNCMAX;
cfgslot = (x >> 11) & PCI_SLOTMAX;
cfgbus = (x >> 16) & PCI_BUSMAX;
diff --git a/usr.sbin/bhyve/pci_gvt-d.c b/usr.sbin/bhyve/pci_gvt-d.c
new file mode 100644
--- /dev/null
+++ b/usr.sbin/bhyve/pci_gvt-d.c
@@ -0,0 +1,288 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2020 Beckhoff Automation GmbH & Co. KG
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR OR CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/mman.h>
+
+#include <machine/vmm.h>
+
+#include <dev/pci/pcireg.h>
+
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sysexits.h>
+#include <unistd.h>
+
+#include "e820.h"
+#include "inout.h"
+#include "pci_passthru.h"
+
+#define MB (1024 * 1024UL)
+#define GB (1024 * MB)
+
+#ifndef _PATH_MEM
+#define _PATH_MEM "/dev/mem"
+#endif
+
+/*
+ * PCI definitions
+ */
+#define PCIM_BDSM_GSM_ALIGNMENT \
+ 0x00100000 /* Graphics Stolen Memory is 1 MB aligned */
+
+/* GVT-d definitions */
+#define GVT_D_MAP_OPREGION 0
+#define GVT_D_MAP_GSM 1
+
+static int
+gvt_d_aslswrite(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int coff,
+ int bytes, uint32_t val)
+{
+ struct passthru_softc *sc;
+
+ sc = pi->pi_arg;
+
+ struct passthru_mmio_mapping *opregion =
+ &sc->psc_mmio_map[GVT_D_MAP_OPREGION];
+
+ /* write new value to cfg space */
+ if (bytes == 1) {
+ pci_set_cfgdata8(pi, coff, val);
+ } else if (bytes == 2) {
+ pci_set_cfgdata16(pi, coff, val);
+ } else {
+ pci_set_cfgdata32(pi, coff, val);
+ }
+
+ /* get new address of opregion */
+ opregion->gpa = pci_get_cfgdata32(pi, PCIR_ASLS_CTL);
+
+ /* copy opregion into guest mem */
+ opregion->gva = vm_map_gpa(ctx, opregion->gpa, opregion->len);
+ if (opregion->gva == 0) {
+ warnx("%s: Unable to map opregion (0x%016lx)", __func__,
+ opregion->gpa);
+ /* return 0 to avoid emulation of ASLS register */
+ return (0);
+ }
+ memcpy(opregion->gva, opregion->hva, opregion->len);
+
+ return (0);
+}
+
+static vm_paddr_t
+gvt_d_alloc_mmio_memory(vm_paddr_t host_address, vm_paddr_t length, vm_paddr_t alignment,
+ enum e820_memory_type type)
+{
+ /* try to use host address */
+ vm_paddr_t address = e820_alloc(host_address, length, E820_ALIGNMENT_NONE,
+ type, E820_ALLOCATE_SPECIFIC);
+ if (address != 0) {
+ return address;
+ }
+
+ /* try to use highest address below 4 GB */
+ return e820_alloc(4 * GB, length, alignment, type,
+ E820_ALLOCATE_HIGHEST);
+}
+
+static int
+gvt_d_setup_gsm(struct vmctx *ctx, struct pci_devinst *pi)
+{
+ struct passthru_softc *sc = pi->pi_arg;
+
+ struct passthru_mmio_mapping *gsm = &sc->psc_mmio_map[GVT_D_MAP_GSM];
+
+ const int error = vm_get_memory_region_info(ctx, &gsm->hpa, &gsm->len,
+ MEMORY_REGION_INTEL_GSM);
+ if (error) {
+ warnx(
+ "%s: Unable to get Graphics Stolen Memory base and length",
+ __func__);
+ return (error);
+ }
+ gsm->hva = NULL; /* unused */
+ gsm->gva = NULL; /* unused */
+ gsm->gpa = gvt_d_alloc_mmio_memory(gsm->hpa, gsm->len,
+ PCIM_BDSM_GSM_ALIGNMENT, E820_TYPE_RESERVED);
+ if (gsm->gpa == 0) {
+ warnx(
+ "%s: Unable to add Graphics Stolen Memory to E820 table (hpa 0x%lx len 0x%lx)",
+ __func__, gsm->hpa, gsm->len);
+ e820_dump_table();
+ return (-1);
+ }
+ if (gsm->gpa != gsm->hpa) {
+ /*
+ * ACRN source code implies that graphics driver for newer Intel
+ * platforms like Tiger Lake will read the Graphics Stolen
+ * Memory address from an MMIO register. We have three options
+ * to solve this issue:
+ * 1. Patch the value in the MMIO register
+ * This could have unintended side effects. Without
+ * any documentation how this register is used by
+ * the GPU, don't do it.
+ * 2. Trap the MMIO register
+ * It's not possible to trap a single MMIO
+ * register. We need to trap a whole page. Trapping
+ * a bunch of MMIO register could degrade the
+ * performance noticeably.
+ * 3. Use an 1:1 host to guest mapping
+ * Maybe not always possible.
+ * As far as we know, no supported platform requires a 1:1
+ * mapping. For that reason, just log a warning.
+ */
+ warnx(
+ "Warning: Unable to reuse host address of Graphics Stolen Memory. GPU passthrough might not work properly.");
+ }
+
+ const uint64_t bdsm = read_config(&sc->psc_sel, PCIR_BDSM, 4);
+ pci_set_cfgdata32(pi, PCIR_BDSM,
+ gsm->gpa | (bdsm & (PCIM_BDSM_GSM_ALIGNMENT - 1)));
+
+ return (0);
+}
+
+static int
+gvt_d_setup_opregion(struct vmctx *ctx, struct pci_devinst *pi, const int memfd)
+{
+ struct passthru_softc *sc = pi->pi_arg;
+
+ struct passthru_mmio_mapping *opregion =
+ &sc->psc_mmio_map[GVT_D_MAP_OPREGION];
+
+ const int error = vm_get_memory_region_info(ctx, &opregion->hpa,
+ &opregion->len, MEMORY_REGION_INTEL_OPREGION);
+ if (error) {
+ warnx(
+ "%s: Unable to get OpRegion base and length",
+ __func__);
+ return (error);
+ }
+ opregion->hva = mmap(NULL, opregion->len, PROT_READ, MAP_SHARED, memfd,
+ opregion->hpa);
+ if (opregion->hva == MAP_FAILED) {
+ warnx("%s: Unable to map host OpRegion", __func__);
+ return (-1);
+ }
+ opregion->gpa = gvt_d_alloc_mmio_memory(opregion->hpa, opregion->len,
+ E820_ALIGNMENT_NONE, E820_TYPE_NVS);
+ if (opregion->gpa == 0) {
+ warnx(
+ "%s: Unable to add OpRegion to E820 table (hpa 0x%lx len 0x%lx)",
+ __func__, opregion->hpa, opregion->len);
+ e820_dump_table();
+ return (-1);
+ }
+ opregion->gva = vm_map_gpa(ctx, opregion->gpa, opregion->len);
+ if (opregion->gva == NULL) {
+ warnx("%s: Unable to map guest OpRegion", __func__);
+ return (-1);
+ }
+ if (opregion->gpa != opregion->hpa) {
+ /*
+ * A 1:1 host to guest mapping is not required but this could
+ * change in the future.
+ */
+ warnx(
+ "Warning: Unable to reuse host address of OpRegion. GPU passthrough might not work properly.");
+ }
+
+ memcpy(opregion->gva, opregion->hva, opregion->len);
+
+ pci_set_cfgdata32(pi, PCIR_ASLS_CTL, opregion->gpa);
+
+ return (0);
+}
+
+int
+gvt_d_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
+{
+ int error;
+ struct passthru_softc *sc;
+
+ sc = pi->pi_arg;
+
+ /* get memory descriptor */
+ const int memfd = open(_PATH_MEM, O_RDWR, 0);
+ if (memfd < 0) {
+ warn("%s: Failed to open %s", __func__, _PATH_MEM);
+ return (-1);
+ }
+
+ if ((error = gvt_d_setup_gsm(ctx, pi)) != 0) {
+ warnx("%s: Unable to setup Graphics Stolen Memory", __func__);
+ goto done;
+ }
+
+ if ((error = gvt_d_setup_opregion(ctx, pi, memfd)) != 0) {
+ warnx("%s: Unable to setup OpRegion", __func__);
+ goto done;
+ }
+
+ /* protect Graphics Stolen Memory register */
+ if ((error = set_pcir_handler(sc, PCIR_BDSM, 4,
+ passthru_cfgread_emulate, passthru_cfgwrite_emulate)) != 0) {
+ warnx("%s: Unable to protect opregion", __func__);
+ goto done;
+ }
+ /* protect opregion register */
+ if ((error = set_pcir_handler(sc, PCIR_ASLS_CTL, 4,
+ passthru_cfgread_emulate, gvt_d_aslswrite)) != 0) {
+ warnx("%s: Unable to protect opregion", __func__);
+ goto done;
+ }
+
+done:
+ return (error);
+}
+
+void
+gvt_d_deinit(struct vmctx *ctx, struct pci_devinst *pi)
+{
+ struct passthru_softc *sc;
+
+ sc = pi->pi_arg;
+
+ struct passthru_mmio_mapping *opregion =
+ &sc->psc_mmio_map[GVT_D_MAP_OPREGION];
+
+ /* HVA is only set, if it's initialized */
+ if (opregion->hva)
+ munmap((void *)opregion->hva, opregion->len);
+}
diff --git a/usr.sbin/bhyve/pci_lpc.c b/usr.sbin/bhyve/pci_lpc.c
--- a/usr.sbin/bhyve/pci_lpc.c
+++ b/usr.sbin/bhyve/pci_lpc.c
@@ -33,9 +33,13 @@
__FBSDID("$FreeBSD$");
#include <sys/types.h>
+#include <sys/pciio.h>
#include <machine/vmm.h>
#include <machine/vmm_snapshot.h>
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -83,6 +87,29 @@
static bool pctestdev_present;
+#ifndef _PATH_DEVPCI
+#define _PATH_DEVPCI "/dev/pci"
+#endif
+
+static int pcifd = -1;
+
+static uint32_t
+read_config(struct pcisel *sel, long reg, int width)
+{
+ struct pci_io pi;
+ pi.pi_sel.pc_domain = sel->pc_domain;
+ pi.pi_sel.pc_bus = sel->pc_bus;
+ pi.pi_sel.pc_dev = sel->pc_dev;
+ pi.pi_sel.pc_func = sel->pc_func;
+ pi.pi_reg = reg;
+ pi.pi_width = width;
+
+ if (ioctl(pcifd, PCIOCREAD, &pi) < 0)
+ return (0);
+
+ return (pi.pi_data);
+}
+
/*
* LPC device configuration is in the following form:
* <lpc_device_name>[,<options>]
@@ -446,6 +473,40 @@
pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_BRIDGE);
pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_BRIDGE_ISA);
+ /* open host device */
+ if (pcifd < 0) {
+ pcifd = open(_PATH_DEVPCI, O_RDWR, 0);
+ if (pcifd < 0) {
+ warn("failed to open %s", _PATH_DEVPCI);
+ return (-1);
+ }
+ }
+
+ /* on Intel systems lpc is always connected to 0:1f.0 */
+ struct pcisel sel;
+ sel.pc_domain = 0;
+ sel.pc_bus = 0;
+ sel.pc_dev = 0x1f;
+ sel.pc_func = 0;
+
+ if (read_config(&sel, PCIR_VENDOR, 2) == PCI_VENDOR_INTEL) {
+ /*
+ * The VID, DID, REVID, SUBVID and SUBDID of igd-lpc need to be
+ * aligned with the physical ones. Without these physical
+ * values, GVT-d GOP driver couldn't work.
+ */
+ pci_set_cfgdata16(
+ pi, PCIR_DEVICE, read_config(&sel, PCIR_DEVICE, 2));
+ pci_set_cfgdata16(
+ pi, PCIR_VENDOR, read_config(&sel, PCIR_VENDOR, 2));
+ pci_set_cfgdata8(
+ pi, PCIR_REVID, read_config(&sel, PCIR_REVID, 1));
+ pci_set_cfgdata16(
+ pi, PCIR_SUBVEND_0, read_config(&sel, PCIR_SUBVEND_0, 2));
+ pci_set_cfgdata16(
+ pi, PCIR_SUBDEV_0, read_config(&sel, PCIR_SUBDEV_0, 2));
+ }
+
lpc_bridge = pi;
return (0);
diff --git a/usr.sbin/bhyve/pci_passthru.h b/usr.sbin/bhyve/pci_passthru.h
new file mode 100644
--- /dev/null
+++ b/usr.sbin/bhyve/pci_passthru.h
@@ -0,0 +1,84 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2020 Beckhoff Automation GmbH & Co. KG
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR OR CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#pragma once
+
+#include <sys/pciio.h>
+
+#include <vmmapi.h>
+
+#include "pci_emul.h"
+
+struct passthru_mmio_mapping {
+ vm_paddr_t gpa; /* guest physical address */
+ void *gva; /* guest virtual address */
+ vm_paddr_t hpa; /* host physical address */
+ void *hva; /* guest virtual address */
+ vm_paddr_t len;
+};
+
+typedef int (*cfgread_handler)(struct vmctx *ctx, int vcpu,
+ struct pci_devinst *pi, int coff, int bytes, uint32_t *rv);
+typedef int (*cfgwrite_handler)(struct vmctx *ctx, int vcpu,
+ struct pci_devinst *pi, int coff, int bytes, uint32_t val);
+
+struct passthru_softc {
+ struct pci_devinst *psc_pi;
+ /* ROM is handled like a BAR */
+ struct pcibar psc_bar[PCI_BARMAX_WITH_ROM + 1];
+ struct {
+ int capoff;
+ int msgctrl;
+ int emulated;
+ } psc_msi;
+ struct {
+ int capoff;
+ } psc_msix;
+ struct pcisel psc_sel;
+
+ struct passthru_mmio_mapping psc_mmio_map[2];
+ cfgread_handler psc_pcir_rhandler[PCI_REGMAX + 1];
+ cfgwrite_handler psc_pcir_whandler[PCI_REGMAX + 1];
+};
+
+uint32_t read_config(const struct pcisel *sel, long reg, int width);
+void write_config(const struct pcisel *sel, long reg, int width, uint32_t data);
+int passthru_cfgread_default(struct vmctx *ctx, int vcpu,
+ struct pci_devinst *pi, int coff, int bytes, uint32_t *rv);
+int passthru_cfgread_emulate(struct vmctx *ctx, int vcpu,
+ struct pci_devinst *pi, int coff, int bytes, uint32_t *rv);
+int passthru_cfgwrite_default(struct vmctx *ctx, int vcpu,
+ struct pci_devinst *pi, int coff, int bytes, uint32_t val);
+int passthru_cfgwrite_emulate(struct vmctx *ctx, int vcpu,
+ struct pci_devinst *pi, int coff, int bytes, uint32_t val);
+int set_pcir_handler(struct passthru_softc *sc, uint32_t reg, uint32_t len,
+ cfgread_handler rhandler, cfgwrite_handler whandler);
+int gvt_d_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts);
+void gvt_d_deinit(struct vmctx *ctx, struct pci_devinst *pi);
diff --git a/usr.sbin/bhyve/pci_passthru.c b/usr.sbin/bhyve/pci_passthru.c
--- a/usr.sbin/bhyve/pci_passthru.c
+++ b/usr.sbin/bhyve/pci_passthru.c
@@ -48,19 +48,19 @@
#ifndef WITHOUT_CAPSICUM
#include <capsicum_helpers.h>
#endif
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
+#include <machine/vmm.h>
+
#include <err.h>
#include <errno.h>
#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
#include <sysexits.h>
#include <unistd.h>
-#include <machine/vmm.h>
-#include <vmmapi.h>
-#include "pci_emul.h"
#include "mem.h"
+#include "pci_passthru.h"
#ifndef _PATH_DEVPCI
#define _PATH_DEVPCI "/dev/pci"
@@ -79,24 +79,12 @@
#define MSIX_TABLE_COUNT(ctrl) (((ctrl) & PCIM_MSIXCTRL_TABLE_SIZE) + 1)
#define MSIX_CAPLEN 12
+#define PCI_CAP_START_OFFSET 0x40
+
static int pcifd = -1;
static int iofd = -1;
static int memfd = -1;
-struct passthru_softc {
- struct pci_devinst *psc_pi;
- struct pcibar psc_bar[PCI_BARMAX + 1];
- struct {
- int capoff;
- int msgctrl;
- int emulated;
- } psc_msi;
- struct {
- int capoff;
- } psc_msix;
- struct pcisel psc_sel;
-};
-
static int
msi_caplen(int msgctrl)
{
@@ -119,7 +107,7 @@
return (len);
}
-static uint32_t
+uint32_t
read_config(const struct pcisel *sel, long reg, int width)
{
struct pci_io pi;
@@ -135,7 +123,7 @@
return (pi.pi_data);
}
-static void
+void
write_config(const struct pcisel *sel, long reg, int width, uint32_t data)
{
struct pci_io pi;
@@ -556,12 +544,23 @@
sc->psc_bar[i].type = bartype;
sc->psc_bar[i].size = size;
sc->psc_bar[i].addr = base;
+ sc->psc_bar[i].lobits = 0;
/* Allocate the BAR in the guest I/O or MMIO space */
error = pci_emul_alloc_bar(pi, i, bartype, size);
if (error)
return (-1);
+ /* Use same lobits as physical bar */
+ uint8_t lobits = read_config(&sc->psc_sel, PCIR_BAR(i), 0x01);
+ if (bartype == PCIBAR_MEM32 || bartype == PCIBAR_MEM64) {
+ lobits &= ~PCIM_BAR_MEM_BASE;
+ } else {
+ lobits &= ~PCIM_BAR_IO_BASE;
+ }
+ sc->psc_bar[i].lobits = lobits;
+ pi->pi_bar[i].lobits = lobits;
+
/* The MSI-X table needs special handling */
if (i == pci_msix_table_bar(pi)) {
error = init_msix_table(ctx, sc, base);
@@ -595,6 +594,17 @@
sc->psc_sel.pc_dev = slot;
sc->psc_sel.pc_func = func;
+ /* copy physical PCI header to virtual cfgspace */
+ for (uint32_t i = 0; i < PCI_CAP_START_OFFSET; ++i) {
+ /*
+ * INTLINE and INTPIN shouldn't be aligned with it's physical
+ * value and they are already set by pci_emul_init
+ */
+ if (i == PCIR_INTLINE || i == PCIR_INTPIN)
+ continue;
+ pci_set_cfgdata8(pi, i, read_config(&sc->psc_sel, i, 1));
+ }
+
if (cfginitmsi(sc) != 0) {
warnx("failed to initialize MSI for PCI %d/%d/%d",
bus, slot, func);
@@ -607,14 +617,154 @@
goto done;
}
- pci_set_cfgdata16(pi, PCIR_COMMAND, read_config(&sc->psc_sel,
- PCIR_COMMAND, 2));
+ write_config(
+ &sc->psc_sel, PCIR_COMMAND, 2, pci_get_cfgdata16(pi, PCIR_COMMAND));
error = 0; /* success */
done:
return (error);
}
+int
+set_pcir_handler(struct passthru_softc *sc, uint32_t reg, uint32_t len, cfgread_handler rhandler, cfgwrite_handler whandler)
+{
+ if (reg > PCI_REGMAX || reg + len > PCI_REGMAX + 1)
+ return (-1);
+
+ for (uint32_t i = reg; i < reg + len; ++i) {
+ sc->psc_pcir_rhandler[i] = rhandler;
+ sc->psc_pcir_whandler[i] = whandler;
+ }
+
+ return 0;
+}
+
+static int
+passthru_init_quirks(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
+{
+ struct passthru_softc *sc = pi->pi_arg;
+
+ uint16_t vendor = read_config(&sc->psc_sel, PCIR_VENDOR, 0x02);
+ uint8_t class = read_config(&sc->psc_sel, PCIR_CLASS, 0x01);
+
+ /* currently only display devices have quirks */
+ if (class != PCIC_DISPLAY)
+ return (0);
+
+ if (vendor == PCI_VENDOR_INTEL)
+ return gvt_d_init(ctx, pi, opts);
+
+ return (0);
+}
+
+static void
+passthru_deinit_quirks(struct vmctx *ctx, struct pci_devinst *pi)
+{
+ struct passthru_softc *sc = pi->pi_arg;
+
+ if (sc == NULL)
+ return;
+
+ uint16_t vendor = read_config(&sc->psc_sel, PCIR_VENDOR, 0x02);
+ uint8_t class = read_config(&sc->psc_sel, PCIR_CLASS, 0x01);
+
+ /* currently only display devices have quirks */
+ if (class != PCIC_DISPLAY)
+ return;
+
+ if (vendor == PCI_VENDOR_INTEL)
+ return gvt_d_deinit(ctx, pi);
+
+ return;
+}
+
+static void
+passthru_usage(char *opt)
+{
+ warnx("Invalid passthru option \"%s\"", opt);
+ warnx("passthru,<bus>/<dev>/<func>,{rom=rom_file}");
+}
+
+static int
+passthru_parse_opts(struct passthru_softc *sc, char *opts)
+{
+ int error = 0;
+ char *uopts = strdup(opts);
+ char *xopt = strtok(uopts, ",");
+ for (xopt = strtok(NULL, ","); xopt != NULL; xopt = strtok(NULL, ",")) {
+ char *config = strchr(xopt, '=');
+ if (config == NULL) {
+ error = -1;
+ break;
+ }
+ *config = '\0';
+ ++config;
+ if (strcmp(xopt, "rom") == 0) {
+ const int fd = open(config, O_RDONLY);
+ if (fd < 0) {
+ warnx("Can't open romfile \"%s\"", config);
+ error = -1;
+ break;
+ }
+ /* determine file size */
+ uint64_t rom_size = lseek(fd, 0, SEEK_END);
+ lseek(fd, 0, SEEK_SET);
+ /* read bios */
+ void *rom_addr = malloc(rom_size);
+ if (rom_addr == NULL) {
+ warnx("Can't malloc rom \"%s\" (size: 0x%8lx)",
+ config, rom_size);
+ error = -ENOMEM;
+ close(fd);
+ break;
+ }
+ rom_size = read(fd, rom_addr, rom_size);
+ close(fd);
+
+ /* save physical values of ROM */
+ sc->psc_bar[PCI_ROM_IDX].type = PCIBAR_ROM;
+ sc->psc_bar[PCI_ROM_IDX].addr = (uint64_t)rom_addr;
+ sc->psc_bar[PCI_ROM_IDX].size = rom_size;
+
+ continue;
+ }
+ /* option wasn't processed */
+ passthru_usage(xopt);
+ error = -1;
+ break;
+ }
+
+ return (error);
+}
+
+static int
+passthru_init_rom(struct vmctx *ctx, struct passthru_softc *sc)
+{
+ /* check if this device has a rom */
+ if (sc->psc_bar[PCI_ROM_IDX].size == 0)
+ return (0);
+
+ /* allocate ROM */
+ uint64_t rom_addr;
+ int error = pci_emul_alloc_rom(sc->psc_pi,
+ sc->psc_bar[PCI_ROM_IDX].size, &rom_addr);
+ if (error) {
+ warnx("Failed to alloc ROM");
+ goto done;
+ }
+
+ /* copy ROM to guest */
+ memcpy((void *)rom_addr, (void *)sc->psc_bar[PCI_ROM_IDX].addr,
+ sc->psc_bar[PCI_ROM_IDX].size);
+ /* free ROM */
+ free((void *)sc->psc_bar[PCI_ROM_IDX].addr);
+ /* save new address of ROM */
+ sc->psc_bar[PCI_ROM_IDX].addr = rom_addr;
+
+done:
+ return error;
+}
+
static int
passthru_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
{
@@ -701,10 +851,47 @@
pi->pi_arg = sc;
sc->psc_pi = pi;
+ /* parse opts */
+ if ((error = passthru_parse_opts(sc, opts)) != 0) {
+ warnx("invalid passthru options");
+ goto done;
+ }
+
/* initialize config space */
- error = cfginit(ctx, pi, bus, slot, func);
+ if ((error = cfginit(ctx, pi, bus, slot, func)) != 0)
+ goto done;
+
+ /* set default handler for all PCI registers */
+ if ((error = set_pcir_handler(sc, 0, PCI_REGMAX + 1,
+ passthru_cfgread_default, passthru_cfgwrite_default)) != 0)
+ goto done;
+ /* protect PCI header */
+ if ((error = set_pcir_handler(sc, 0, PCI_CAP_START_OFFSET,
+ passthru_cfgread_emulate, passthru_cfgwrite_emulate)) != 0)
+ goto done;
+ /* allow access to command and status register */
+ if ((error = set_pcir_handler(sc, PCIR_COMMAND, 0x04,
+ passthru_cfgread_default, passthru_cfgwrite_default)) != 0)
+ goto done;
+
+ /*
+ * Keep following order!!
+ * Before init_quirks:
+ * set protection for PCI register
+ * After init_quirks:
+ * init ROM
+ */
+ if ((error = passthru_init_quirks(ctx, pi, opts)) != 0)
+ goto done;
+
+ /* initialize ROM */
+ if ((error = passthru_init_rom(ctx, sc)) != 0)
+ goto done;
+
+ error = 0; /* success */
done:
if (error) {
+ passthru_deinit_quirks(ctx, pi);
free(sc);
vm_unassign_pptdev(ctx, bus, slot, func);
}
@@ -747,29 +934,29 @@
}
static int
-passthru_cfgread(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
- int coff, int bytes, uint32_t *rv)
+passthru_cfgread(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int coff,
+ int bytes, uint32_t *rv)
{
struct passthru_softc *sc;
sc = pi->pi_arg;
- /*
- * PCI BARs and MSI capability is emulated.
- */
- if (bar_access(coff) || msicap_access(sc, coff))
- return (-1);
+ return sc->psc_pcir_rhandler[coff](ctx, vcpu, pi, coff, bytes, rv);
+}
+
+int
+passthru_cfgread_default(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
+ int coff, int bytes, uint32_t *rv)
+{
+ struct passthru_softc *sc;
+
+ sc = pi->pi_arg;
-#ifdef LEGACY_SUPPORT
/*
- * Emulate PCIR_CAP_PTR if this device does not support MSI capability
- * natively.
+ * MSI capability is emulated.
*/
- if (sc->psc_msi.emulated) {
- if (coff >= PCIR_CAP_PTR && coff < PCIR_CAP_PTR + 4)
- return (-1);
- }
-#endif
+ if (msicap_access(sc, coff) || msixcap_access(sc, coff))
+ return (-1);
/*
* Emulate the command register. If a single read reads both the
@@ -790,9 +977,28 @@
return (0);
}
+int
+passthru_cfgread_emulate(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
+ int coff, int bytes, uint32_t *rv)
+{
+ return (-1);
+}
+
static int
-passthru_cfgwrite(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
- int coff, int bytes, uint32_t val)
+passthru_cfgwrite(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int coff,
+ int bytes, uint32_t val)
+{
+
+ struct passthru_softc *sc;
+
+ sc = pi->pi_arg;
+
+ return sc->psc_pcir_whandler[coff](ctx, vcpu, pi, coff, bytes, val);
+}
+
+int
+passthru_cfgwrite_default(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
+ int coff, int bytes, uint32_t val)
{
int error, msix_table_entries, i;
struct passthru_softc *sc;
@@ -800,12 +1006,6 @@
sc = pi->pi_arg;
- /*
- * PCI BARs are emulated
- */
- if (bar_access(coff))
- return (-1);
-
/*
* MSI capability is emulated
*/
@@ -871,6 +1071,13 @@
return (0);
}
+int
+passthru_cfgwrite_emulate(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
+ int coff, int bytes, uint32_t val)
+{
+ return (-1);
+}
+
static void
passthru_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
uint64_t offset, int size, uint64_t value)
@@ -995,17 +1202,39 @@
}
}
+static void
+passthru_addr_rom(struct pci_devinst *pi, int idx, int enabled)
+{
+ if (!enabled)
+ vm_munmap_memseg(pi->pi_vmctx, pi->pi_bar[idx].addr,
+ pi->pi_bar[idx].size);
+ else
+ vm_mmap_memseg(pi->pi_vmctx, pi->pi_bar[idx].addr, VM_PCIROM,
+ pi->pi_romoffset, pi->pi_bar[idx].size,
+ PROT_READ | PROT_EXEC);
+}
+
static void
passthru_addr(struct vmctx *ctx, struct pci_devinst *pi, int baridx,
int enabled, uint64_t address)
{
-
- if (pi->pi_bar[baridx].type == PCIBAR_IO)
- return;
- if (baridx == pci_msix_table_bar(pi))
- passthru_msix_addr(ctx, pi, baridx, enabled, address);
- else
- passthru_mmio_addr(ctx, pi, baridx, enabled, address);
+ switch (pi->pi_bar[baridx].type) {
+ case PCIBAR_IO:
+ /* IO BARs are emulated */
+ break;
+ case PCIBAR_ROM:
+ passthru_addr_rom(pi, baridx, enabled);
+ break;
+ case PCIBAR_MEM32:
+ case PCIBAR_MEM64:
+ if (baridx == pci_msix_table_bar(pi))
+ passthru_msix_addr(ctx, pi, baridx, enabled, address);
+ else
+ passthru_mmio_addr(ctx, pi, baridx, enabled, address);
+ break;
+ default:
+ errx(4, "%s: invalid BAR type %d", __func__, pi->pi_bar[baridx].type);
+ }
}
struct pci_devemu passthru = {
diff --git a/usr.sbin/bhyve/fwctl.h b/usr.sbin/bhyve/qemu_fwcfg.h
rename from usr.sbin/bhyve/fwctl.h
rename to usr.sbin/bhyve/qemu_fwcfg.h
--- a/usr.sbin/bhyve/fwctl.h
+++ b/usr.sbin/bhyve/qemu_fwcfg.h
@@ -1,7 +1,7 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
- * Copyright (c) 2015 Peter Grehan <grehan@freebsd.org>
+ * Copyright (c) 2021 Beckhoff Automation GmbH & Co. KG
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -13,10 +13,10 @@
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR OR CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
@@ -28,29 +28,19 @@
* $FreeBSD$
*/
-#ifndef _FWCTL_H_
-#define _FWCTL_H_
+#pragma once
-#include <sys/linker_set.h>
+#include <vmmapi.h>
-/*
- * Linker set api for export of information to guest firmware via
- * a sysctl-like OID interface
- */
-struct ctl {
- const char *c_oid;
- const void *c_data;
- const int c_len;
-};
+#define QEMU_FWCFG_MAX_ARCHS 0x2
+#define QEMU_FWCFG_MAX_ENTRIES 0x3FFF
+#define QEMU_FWCFG_MAX_NAME 56
-#define CTL_NODE(oid, data, len) \
- static struct ctl __CONCAT(__ctl, __LINE__) = { \
- oid, \
- (data), \
- (len), \
- }; \
- DATA_SET(ctl_set, __CONCAT(__ctl, __LINE__))
-
-void fwctl_init(void);
+struct qemu_fwcfg_item {
+ uint32_t size;
+ uint8_t *data;
+};
-#endif /* _FWCTL_H_ */
+int qemu_fwcfg_add_file(uint8_t name[QEMU_FWCFG_MAX_NAME], uint32_t size,
+ void *data);
+int qemu_fwcfg_init(struct vmctx *ctx);
diff --git a/usr.sbin/bhyve/qemu_fwcfg.c b/usr.sbin/bhyve/qemu_fwcfg.c
new file mode 100644
--- /dev/null
+++ b/usr.sbin/bhyve/qemu_fwcfg.c
@@ -0,0 +1,433 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2021 Beckhoff Automation GmbH & Co. KG
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR OR CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/endian.h>
+
+#include <machine/vmm.h>
+
+#include <err.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "acpi_device.h"
+#include "inout.h"
+#include "qemu_fwcfg.h"
+
+#define QEMU_FWCFG_ACPI_DEVICE_NAME "FWCF"
+#define QEMU_FWCFG_ACPI_HARDWARE_ID "QEMU0002"
+
+#define QEMU_FWCFG_SELECTOR_PORT_NUMBER 0x510
+#define QEMU_FWCFG_SELECTOR_PORT_SIZE 1
+#define QEMU_FWCFG_SELECTOR_PORT_FLAGS IOPORT_F_INOUT
+#define QEMU_FWCFG_DATA_PORT_NUMBER 0x511
+#define QEMU_FWCFG_DATA_PORT_SIZE 1
+#define QEMU_FWCFG_DATA_PORT_FLAGS \
+ IOPORT_F_INOUT /* QEMU v2.4+ ignores writes */
+
+#define QEMU_FWCFG_ARCHITECTURE_MASK 0x0001
+#define QEMU_FWCFG_INDEX_MASK 0x3FFF
+
+#define QEMU_FWCFG_SELECT_READ 0
+#define QEMU_FWCFG_SELECT_WRITE 1
+
+#define QEMU_FWCFG_ARCHITECTURE_GENERIC 0
+#define QEMU_FWCFG_ARCHITECTURE_SPECIFIC 1
+
+#define QEMU_FWCFG_INDEX_SIGNATURE 0x00
+#define QEMU_FWCFG_INDEX_ID 0x01
+#define QEMU_FWCFG_INDEX_FILE_DIR 0x19
+
+#define QEMU_FWCFG_FIRST_FILE_INDEX 0x20
+
+#define QEMU_FWCFG_MIN_FILES 10
+
+#pragma pack(1)
+
+union qemu_fwcfg_selector {
+ struct {
+ uint16_t index : 14;
+ uint16_t writeable : 1;
+ /*
+ * 0 = generic | for all architectures
+ * 1 = specific | only for current architecture
+ */
+ uint16_t architecture : 1;
+ };
+ uint16_t bits;
+};
+
+struct qemu_fwcfg_signature {
+ uint8_t signature[4];
+};
+
+struct qemu_fwcfg_id {
+ uint32_t interface : 1; /* always set */
+ uint32_t DMA : 1;
+ uint32_t reserved : 30;
+};
+
+struct qemu_fwcfg_file {
+ uint32_t be_size;
+ uint16_t be_selector;
+ uint16_t reserved;
+ uint8_t name[QEMU_FWCFG_MAX_NAME];
+};
+
+struct qemu_fwcfg_directory {
+ uint32_t be_count;
+ struct qemu_fwcfg_file files[0];
+};
+
+struct qemu_fwcfg_softc {
+ struct acpi_device *acpi_dev;
+
+ uint32_t data_offset;
+ union qemu_fwcfg_selector selector;
+ struct qemu_fwcfg_item items[QEMU_FWCFG_MAX_ARCHS]
+ [QEMU_FWCFG_MAX_ENTRIES];
+ struct qemu_fwcfg_directory *directory;
+};
+
+#pragma pack()
+
+static struct qemu_fwcfg_softc sc;
+
+static int
+qemu_fwcfg_selector_port_handler(struct vmctx *ctx, int vcpu, int in, int port,
+ int bytes, uint32_t *eax, void *arg)
+{
+ if (in) {
+ *eax = *(uint16_t *)&sc.selector;
+ return (0);
+ }
+
+ sc.data_offset = 0;
+ sc.selector.bits = *eax;
+
+ return (0);
+}
+
+static int
+qemu_fwcfg_data_port_handler(struct vmctx *ctx, int vcpu, int in, int port,
+ int bytes, uint32_t *eax, void *arg)
+{
+ if (!in) {
+ warnx("%s: Writes to qemu fwcfg data port aren't allowed",
+ __func__);
+ return (-1);
+ }
+
+ /* get fwcfg item */
+ struct qemu_fwcfg_item *item =
+ &sc.items[sc.selector.architecture][sc.selector.index];
+ if (item->data == NULL) {
+ warnx(
+ "%s: qemu fwcfg item doesn't exist (architecture %s index 0x%x)",
+ __func__, sc.selector.architecture ? "specific" : "generic",
+ sc.selector.index);
+ *eax = 0x00;
+ return (0);
+ } else if (sc.data_offset >= item->size) {
+ warnx(
+ "%s: qemu fwcfg item read exceeds size (architecture %s index 0x%x size 0x%x offset 0x%x)",
+ __func__, sc.selector.architecture ? "specific" : "generic",
+ sc.selector.index, item->size, sc.data_offset);
+ *eax = 0x00;
+ return (0);
+ }
+
+ /* return item data */
+ *eax = item->data[sc.data_offset];
+ sc.data_offset++;
+
+ return (0);
+}
+
+static int
+qemu_fwcfg_add_item(uint16_t architecture, uint16_t index, uint32_t size,
+ void *data)
+{
+ /* truncate architecture and index to their desired size */
+ architecture &= QEMU_FWCFG_ARCHITECTURE_MASK;
+ index &= QEMU_FWCFG_INDEX_MASK;
+
+ /* get pointer to item specified by selector */
+ struct qemu_fwcfg_item *fwcfg_item = &sc.items[architecture][index];
+
+ /* check if item is already used */
+ if (fwcfg_item->data != NULL) {
+ warnx("%s: qemu fwcfg item exists (architecture %s index 0x%x)",
+ __func__, architecture ? "specific" : "generic", index);
+ return (-1);
+ }
+
+ /* save data of the item */
+ fwcfg_item->size = size;
+ fwcfg_item->data = data;
+
+ return (0);
+}
+
+static int
+qemu_fwcfg_add_item_file_dir()
+{
+ /* alloc directory */
+ uint64_t size = sizeof(struct qemu_fwcfg_directory) +
+ QEMU_FWCFG_MIN_FILES * sizeof(struct qemu_fwcfg_file);
+ struct qemu_fwcfg_directory *fwcfg_directory = calloc(1, size);
+ if (fwcfg_directory == NULL) {
+ return (-ENOMEM);
+ }
+
+ /* init directory */
+ sc.directory = fwcfg_directory;
+
+ /* add directory */
+ return qemu_fwcfg_add_item(QEMU_FWCFG_ARCHITECTURE_GENERIC,
+ QEMU_FWCFG_INDEX_FILE_DIR, sizeof(struct qemu_fwcfg_directory), (uint8_t *)sc.directory);
+}
+
+static int
+qemu_fwcfg_add_item_id()
+{
+ /* alloc id */
+ struct qemu_fwcfg_id *fwcfg_id = calloc(1,
+ sizeof(struct qemu_fwcfg_id));
+ if (fwcfg_id == NULL) {
+ return (-ENOMEM);
+ }
+
+ /* init id */
+ fwcfg_id->interface = 1;
+ fwcfg_id->DMA = 0;
+
+ /*
+ * QEMU specifies ID as little endian.
+ * Convert fwcfg_id to little endian.
+ */
+ uint32_t *le_fwcfg_id_ptr = (uint32_t *)fwcfg_id;
+ *le_fwcfg_id_ptr = htole32(*le_fwcfg_id_ptr);
+
+ /* add id */
+ return qemu_fwcfg_add_item(QEMU_FWCFG_ARCHITECTURE_GENERIC,
+ QEMU_FWCFG_INDEX_ID, sizeof(struct qemu_fwcfg_id),
+ (uint8_t *)fwcfg_id);
+}
+
+static int
+qemu_fwcfg_add_item_signature()
+{
+ /* alloc signature */
+ struct qemu_fwcfg_signature *fwcfg_signature = calloc(1,
+ sizeof(struct qemu_fwcfg_signature));
+ if (fwcfg_signature == NULL) {
+ return (-ENOMEM);
+ }
+
+ /* init signature */
+ fwcfg_signature->signature[0] = 'Q';
+ fwcfg_signature->signature[1] = 'E';
+ fwcfg_signature->signature[2] = 'M';
+ fwcfg_signature->signature[3] = 'U';
+
+ /* add signature */
+ return qemu_fwcfg_add_item(QEMU_FWCFG_ARCHITECTURE_GENERIC,
+ QEMU_FWCFG_INDEX_SIGNATURE, sizeof(struct qemu_fwcfg_signature),
+ (uint8_t *)fwcfg_signature);
+}
+
+static int
+qemu_fwcfg_register_port(const char *name, int port, int size, int flags,
+ inout_func_t handler)
+{
+ struct inout_port iop;
+
+ bzero(&iop, sizeof(iop));
+ iop.name = name;
+ iop.port = port;
+ iop.size = size;
+ iop.flags = flags;
+ iop.handler = handler;
+
+ return register_inout(&iop);
+}
+
+int
+qemu_fwcfg_add_file(uint8_t name[QEMU_FWCFG_MAX_NAME], uint32_t size,
+ void *data)
+{
+ /*
+ * QEMU specifies count as big endian.
+ * Convert it to host endian to work with it.
+ */
+ uint32_t count = be32toh(sc.directory->be_count);
+
+ /* add file to items list */
+ uint32_t index = QEMU_FWCFG_FIRST_FILE_INDEX + count;
+ const int error = qemu_fwcfg_add_item(QEMU_FWCFG_ARCHITECTURE_GENERIC,
+ index, size, data);
+ if (error != 0) {
+ return (error);
+ }
+
+ /*
+ * files should be sorted alphabetical, get index for new file
+ */
+ uint32_t file_index;
+ for (file_index = 0; file_index < count; ++file_index) {
+ if (strcmp(name, sc.directory->files[file_index].name) < 0)
+ break;
+ }
+
+ ++count;
+ if (count > QEMU_FWCFG_MIN_FILES) {
+ /* alloc new file directory */
+ uint64_t new_size = sizeof(struct qemu_fwcfg_directory) +
+ count * sizeof(struct qemu_fwcfg_file);
+ struct qemu_fwcfg_directory *new_directory = calloc(1,
+ new_size);
+ if (new_directory == NULL) {
+ warnx(
+ "%s: Unable to allocate a new qemu fwcfg files directory (count %d)",
+ __func__, count);
+ return (-ENOMEM);
+ }
+
+ /* copy files below file_index to new directory */
+ memcpy(new_directory->files, sc.directory->files,
+ file_index * sizeof(struct qemu_fwcfg_file));
+
+ /* copy files behind file_index to directory */
+ memcpy(&new_directory->files[file_index + 1],
+ &sc.directory->files[file_index],
+ (count - file_index) * sizeof(struct qemu_fwcfg_file));
+
+ /* free old directory */
+ free(sc.directory);
+
+ /* set directory pointer to new directory */
+ sc.directory = new_directory;
+
+ /* adjust directory pointer */
+ sc.items[0][QEMU_FWCFG_INDEX_FILE_DIR].data = (uint8_t *)
+ sc.directory;
+ } else {
+ /* shift files behind file_index */
+ for (uint32_t i = QEMU_FWCFG_MIN_FILES - 1; i > file_index; --i) {
+ memcpy(&sc.directory->files[i],
+ &sc.directory->files[i - 1],
+ sizeof(struct qemu_fwcfg_file));
+ }
+ }
+
+ /*
+ * QEMU specifies count, size and index as big endian.
+ * Save these values in big endian to simplify guest reads of these
+ * values.
+ */
+ sc.directory->be_count = htobe32(count);
+ sc.directory->files[file_index].be_size = htobe32(size);
+ sc.directory->files[file_index].be_selector = htobe16(index);
+ strcpy(sc.directory->files[file_index].name, name);
+
+ /* set new size for the fwcfg_file_directory */
+ sc.items[0][QEMU_FWCFG_INDEX_FILE_DIR].size =
+ sizeof(struct qemu_fwcfg_directory) +
+ count * sizeof(struct qemu_fwcfg_file);
+
+ return (0);
+}
+
+int
+qemu_fwcfg_init(struct vmctx *ctx)
+{
+ int error;
+
+ error = acpi_device_create(&sc.acpi_dev, ctx, QEMU_FWCFG_ACPI_DEVICE_NAME,
+ QEMU_FWCFG_ACPI_HARDWARE_ID);
+ if (error) {
+ warnx("%s: failed to create ACPI device for QEMU FwCfg",
+ __func__);
+ goto done;
+ }
+
+ error = acpi_device_add_res_fixed_ioport(sc.acpi_dev,
+ QEMU_FWCFG_SELECTOR_PORT_NUMBER, 2);
+ if (error) {
+ warnx("%s: failed to add fixed IO port for QEMU FwCfg",
+ __func__);
+ goto done;
+ }
+
+ /* add common fwcfg items */
+ if ((error = qemu_fwcfg_add_item_signature()) != 0) {
+ warnx("%s: Unable to add signature item", __func__);
+ goto done;
+ }
+ if ((error = qemu_fwcfg_add_item_id()) != 0) {
+ warnx("%s: Unable to add id item", __func__);
+ goto done;
+ }
+ if ((error = qemu_fwcfg_add_item_file_dir()) != 0) {
+ warnx("%s: Unable to add file_dir item", __func__);
+ goto done;
+ }
+
+ /* add handlers for fwcfg ports */
+ if ((error = qemu_fwcfg_register_port("qemu_fwcfg_selector",
+ QEMU_FWCFG_SELECTOR_PORT_NUMBER, QEMU_FWCFG_SELECTOR_PORT_SIZE,
+ QEMU_FWCFG_SELECTOR_PORT_FLAGS,
+ qemu_fwcfg_selector_port_handler)) != 0) {
+ warnx("%s: Unable to register qemu fwcfg selector port 0x%x",
+ __func__, QEMU_FWCFG_SELECTOR_PORT_NUMBER);
+ goto done;
+ }
+ if ((error = qemu_fwcfg_register_port("qemu_fwcfg_data",
+ QEMU_FWCFG_DATA_PORT_NUMBER, QEMU_FWCFG_DATA_PORT_SIZE,
+ QEMU_FWCFG_DATA_PORT_FLAGS, qemu_fwcfg_data_port_handler)) !=
+ 0) {
+ warnx("%s: Unable to register qemu fwcfg data port 0x%x",
+ __func__, QEMU_FWCFG_DATA_PORT_NUMBER);
+ goto done;
+ }
+
+done:
+ if (error) {
+ acpi_device_destroy(sc.acpi_dev);
+ }
+
+ return (error);
+}

File Metadata

Mime Type
text/plain
Expires
Thu, Feb 13, 9:43 PM (3 h, 56 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
16633535
Default Alt Text
D26209.id98407.diff (106 KB)

Event Timeline