Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F110043411
D26209.id98407.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
106 KB
Referenced Files
None
Subscribers
None
D26209.id98407.diff
View Options
diff --git a/lib/libvmmapi/vmmapi.h b/lib/libvmmapi/vmmapi.h
--- a/lib/libvmmapi/vmmapi.h
+++ b/lib/libvmmapi/vmmapi.h
@@ -73,6 +73,7 @@
VM_SYSMEM,
VM_BOOTROM,
VM_FRAMEBUFFER,
+ VM_PCIROM,
};
/*
@@ -180,6 +181,8 @@
vm_paddr_t gpa, size_t len, vm_paddr_t hpa);
int vm_unmap_pptdev_mmio(struct vmctx *ctx, int bus, int slot, int func,
vm_paddr_t gpa, size_t len);
+int vm_get_memory_region_info(struct vmctx *ctx, vm_paddr_t *base,
+ vm_paddr_t *size, enum vm_memory_region_type type);
int vm_setup_pptdev_msi(struct vmctx *ctx, int vcpu, int bus, int slot,
int func, uint64_t addr, uint64_t msg, int numvec);
int vm_setup_pptdev_msix(struct vmctx *ctx, int vcpu, int bus, int slot,
diff --git a/lib/libvmmapi/vmmapi.c b/lib/libvmmapi/vmmapi.c
--- a/lib/libvmmapi/vmmapi.c
+++ b/lib/libvmmapi/vmmapi.c
@@ -1009,6 +1009,25 @@
return (ioctl(ctx->fd, VM_UNMAP_PPTDEV_MMIO, &pptmmio));
}
+int
+vm_get_memory_region_info(struct vmctx *ctx, vm_paddr_t *base, vm_paddr_t *size,
+ enum vm_memory_region_type type)
+{
+ struct vm_memory_region_info memory_region_info;
+
+ bzero(&memory_region_info, sizeof(memory_region_info));
+ memory_region_info.type = type;
+
+ const int error = ioctl(ctx->fd, VM_GET_MEMORY_REGION_INFO, &memory_region_info);
+
+ if (base)
+ *base = memory_region_info.base;
+ if (size)
+ *size = memory_region_info.size;
+
+ return (error);
+}
+
int
vm_setup_pptdev_msi(struct vmctx *ctx, int vcpu, int bus, int slot, int func,
uint64_t addr, uint64_t msg, int numvec)
@@ -1684,7 +1703,7 @@
VM_SET_CAPABILITY, VM_GET_CAPABILITY, VM_BIND_PPTDEV,
VM_UNBIND_PPTDEV, VM_MAP_PPTDEV_MMIO, VM_PPTDEV_MSI,
VM_PPTDEV_MSIX, VM_UNMAP_PPTDEV_MMIO, VM_PPTDEV_DISABLE_MSIX,
- VM_INJECT_NMI, VM_STATS, VM_STAT_DESC,
+ VM_GET_MEMORY_REGION_INFO, VM_INJECT_NMI, VM_STATS, VM_STAT_DESC,
VM_SET_X2APIC_STATE, VM_GET_X2APIC_STATE,
VM_GET_HPET_CAPABILITIES, VM_GET_GPA_PMAP, VM_GLA2GPA,
VM_GLA2GPA_NOFAULT,
diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h
--- a/sys/amd64/include/vmm.h
+++ b/sys/amd64/include/vmm.h
@@ -741,6 +741,11 @@
} u;
};
+enum vm_memory_region_type {
+ MEMORY_REGION_INTEL_GSM,
+ MEMORY_REGION_INTEL_OPREGION
+};
+
/* APIs to inject faults into the guest */
void vm_inject_fault(void *vm, int vcpuid, int vector, int errcode_valid,
int errcode);
diff --git a/sys/amd64/include/vmm_dev.h b/sys/amd64/include/vmm_dev.h
--- a/sys/amd64/include/vmm_dev.h
+++ b/sys/amd64/include/vmm_dev.h
@@ -146,6 +146,17 @@
size_t len;
};
+struct vm_memory_region_info {
+ vm_paddr_t base;
+ vm_paddr_t size;
+ enum vm_memory_region_type type;
+};
+
+#ifdef _KERNEL
+extern vm_paddr_t intel_graphics_stolen_base;
+extern vm_paddr_t intel_graphics_stolen_size;
+#endif
+
struct vm_pptdev_msi {
int vcpu;
int bus;
@@ -309,6 +320,7 @@
IOCNUM_PPTDEV_MSIX = 44,
IOCNUM_PPTDEV_DISABLE_MSIX = 45,
IOCNUM_UNMAP_PPTDEV_MMIO = 46,
+ IOCNUM_GET_MEMORY_REGION_INFO = 47,
/* statistics */
IOCNUM_VM_STATS = 50,
@@ -427,6 +439,8 @@
_IOW('v', IOCNUM_PPTDEV_DISABLE_MSIX, struct vm_pptdev)
#define VM_UNMAP_PPTDEV_MMIO \
_IOW('v', IOCNUM_UNMAP_PPTDEV_MMIO, struct vm_pptdev_mmio)
+#define VM_GET_MEMORY_REGION_INFO \
+ _IOWR('v', IOCNUM_GET_MEMORY_REGION_INFO, struct vm_memory_region_info)
#define VM_INJECT_NMI \
_IOW('v', IOCNUM_INJECT_NMI, struct vm_nmi)
#define VM_STATS \
diff --git a/sys/amd64/vmm/intel/intelgpu.h b/sys/amd64/vmm/intel/intelgpu.h
new file mode 100644
--- /dev/null
+++ b/sys/amd64/vmm/intel/intelgpu.h
@@ -0,0 +1,206 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2021 Beckhoff Automation GmbH & Co. KG
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR OR CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#pragma once
+
+#include <machine/vmm.h>
+#include <machine/vmm_dev.h>
+
+#define IGD_OPREGION_HEADER_SIGN "IntelGraphicsMem"
+#define IGD_OPREGION_HEADER_MBOX1 BIT0
+#define IGD_OPREGION_HEADER_MBOX2 BIT1
+#define IGD_OPREGION_HEADER_MBOX3 BIT2
+#define IGD_OPREGION_HEADER_MBOX4 BIT3
+#define IGD_OPREGION_HEADER_MBOX5 BIT4
+
+#define IGD_OPREGION_VBT_SIZE_6K (6 * 1024UL)
+
+/**
+ OpRegion structures:
+ Sub-structures define the different parts of the OpRegion followed by the
+ main structure representing the entire OpRegion.
+
+ @note These structures are packed to 1 byte offsets because the exact
+ data location is required by the supporting design specification due to
+ the fact that the data is used by ASL and Graphics driver code compiled
+ separately.
+**/
+#pragma pack(push, 1)
+///
+/// OpRegion Mailbox 0 Header structure. The OpRegion Header is used to
+/// identify a block of memory as the graphics driver OpRegion.
+/// Offset 0x0, Size 0x100
+///
+struct igd_opregion_header {
+ int8_t sign[0x10]; ///< Offset 0x00 OpRegion Signature
+ uint32_t size; ///< Offset 0x10 OpRegion Size
+ uint32_t over; ///< Offset 0x14 OpRegion Structure Version
+ uint8_t sver[0x20]; ///< Offset 0x18 System BIOS Build Version
+ uint8_t vver[0x10]; ///< Offset 0x38 Video BIOS Build Version
+ uint8_t gver[0x10]; ///< Offset 0x48 Graphic Driver Build Version
+ uint32_t mbox; ///< Offset 0x58 Supported Mailboxes
+ uint32_t dmod; ///< Offset 0x5C Driver Model
+ uint32_t pcon; ///< Offset 0x60 Platform Configuration
+ int16_t dver[0x10]; ///< Offset 0x64 GOP Version
+ uint8_t rm01[0x7C]; ///< Offset 0x84 Reserved Must be zero
+};
+
+///
+/// OpRegion Mailbox 1 - Public ACPI Methods
+/// Offset 0x100, Size 0x100
+///
+struct igd_opregion_mbox1 {
+ uint32_t drdy; ///< Offset 0x100 Driver Readiness
+ uint32_t csts; ///< Offset 0x104 Status
+ uint32_t cevt; ///< Offset 0x108 Current Event
+ uint8_t rm11[0x14]; ///< Offset 0x10C Reserved Must be Zero
+ uint32_t didl[8]; ///< Offset 0x120 Supported Display Devices ID List
+ uint32_t
+ cpdl[8]; ///< Offset 0x140 Currently Attached Display Devices List
+ uint32_t
+ cadl[8]; ///< Offset 0x160 Currently Active Display Devices List
+ uint32_t nadl[8]; ///< Offset 0x180 Next Active Devices List
+ uint32_t aslp; ///< Offset 0x1A0 ASL Sleep Time Out
+ uint32_t tidx; ///< Offset 0x1A4 Toggle Table Index
+ uint32_t chpd; ///< Offset 0x1A8 Current Hotplug Enable Indicator
+ uint32_t clid; ///< Offset 0x1AC Current Lid State Indicator
+ uint32_t cdck; ///< Offset 0x1B0 Current Docking State Indicator
+ uint32_t sxsw; ///< Offset 0x1B4 Display Switch Notification on Sx
+ ///< StateResume
+ uint32_t evts; ///< Offset 0x1B8 Events supported by ASL
+ uint32_t cnot; ///< Offset 0x1BC Current OS Notification
+ uint32_t NRDY; ///< Offset 0x1C0 Driver Status
+ uint8_t did2[0x1C]; ///< Offset 0x1C4 Extended Supported Devices ID
+ ///< List(DOD)
+ uint8_t
+ cpd2[0x1C]; ///< Offset 0x1E0 Extended Attached Display Devices List
+ uint8_t rm12[4]; ///< Offset 0x1FC - 0x1FF Reserved Must be zero
+};
+
+///
+/// OpRegion Mailbox 2 - Software SCI Interface
+/// Offset 0x200, Size 0x100
+///
+struct igd_opregion_mbox2 {
+ uint32_t scic; ///< Offset 0x200 Software SCI Command / Status / Data
+ uint32_t parm; ///< Offset 0x204 Software SCI Parameters
+ uint32_t dslp; ///< Offset 0x208 Driver Sleep Time Out
+ uint8_t rm21[0xF4]; ///< Offset 0x20C - 0x2FF Reserved Must be zero
+};
+
+///
+/// OpRegion Mailbox 3 - BIOS/Driver Notification - ASLE Support
+/// Offset 0x300, Size 0x100
+///
+struct igd_opregion_mbox3 {
+ uint32_t ardy; ///< Offset 0x300 Driver Readiness
+ uint32_t aslc; ///< Offset 0x304 ASLE Interrupt Command / Status
+ uint32_t tche; ///< Offset 0x308 Technology Enabled Indicator
+ uint32_t alsi; ///< Offset 0x30C Current ALS Luminance Reading
+ uint32_t bclp; ///< Offset 0x310 Requested Backlight Brightness
+ uint32_t pfit; ///< Offset 0x314 Panel Fitting State or Request
+ uint32_t cblv; ///< Offset 0x318 Current Brightness Level
+ uint16_t bclm[0x14]; ///< Offset 0x31C Backlight Brightness Levels Duty
+ ///< Cycle Mapping Table
+ uint32_t cpfm; ///< Offset 0x344 Current Panel Fitting Mode
+ uint32_t epfm; ///< Offset 0x348 Enabled Panel Fitting Modes
+ uint8_t plut[0x4A]; ///< Offset 0x34C Panel Look Up Table & Identifier
+ uint32_t pfmb; ///< Offset 0x396 PWM Frequency and Minimum Brightness
+ uint32_t ccdv; ///< Offset 0x39A Color Correction Default Values
+ uint32_t pcft; ///< Offset 0x39E Power Conservation Features
+ uint32_t srot; ///< Offset 0x3A2 Supported Rotation Angles
+ uint32_t iuer; ///< Offset 0x3A6 Intel Ultrabook(TM) Event Register
+ uint64_t fdss; ///< Offset 0x3AA DSS Buffer address allocated for IFFS
+ ///< feature
+ uint32_t fdsp; ///< Offset 0x3B2 Size of DSS buffer
+ uint32_t stat; ///< Offset 0x3B6 State Indicator
+ uint64_t rvda; ///< Offset 0x3BA Absolute/Relative Address of Raw VBT
+ ///< Data from OpRegion Base
+ uint32_t rvds; ///< Offset 0x3C2 Raw VBT Data Size
+ uint8_t rsvd2[0x3A]; ///< Offset 0x3C6 - 0x3FF Reserved Must be zero.
+ ///< Bug in spec 0x45(69)
+};
+
+///
+/// OpRegion Mailbox 4 - VBT Video BIOS Table
+/// Offset 0x400, Size 0x1800
+///
+struct igd_opregion_mbox4 {
+ uint8_t rvbt[IGD_OPREGION_VBT_SIZE_6K]; ///< Offset 0x400 - 0x1BFF Raw
+ ///< VBT Data
+};
+
+///
+/// OpRegion Mailbox 5 - BIOS/Driver Notification - Data storage BIOS to Driver
+/// data sync Offset 0x1C00, Size 0x400
+///
+struct igd_opregion_mbox5 {
+ uint32_t phed; ///< Offset 0x1C00 Panel Header
+ uint8_t bddc[0x100]; ///< Offset 0x1C04 Panel EDID (DDC data)
+ uint8_t rm51[0x2FC]; ///< Offset 0x1D04 - 0x1FFF Reserved Must be zero
+};
+
+///
+/// IGD OpRegion Structure
+///
+struct igd_opregion {
+ struct igd_opregion_header
+ header; ///< OpRegion header (Offset 0x0, Size 0x100)
+ struct igd_opregion_mbox1 mbox1; ///< Mailbox 1: Public ACPI Methods
+ ///< (Offset 0x100, Size 0x100)
+ struct igd_opregion_mbox2 mbox2; ///< Mailbox 2: Software SCI Interface
+ ///< (Offset 0x200, Size 0x100)
+ struct igd_opregion_mbox3
+ mbox3; ///< Mailbox 3: BIOS to Driver Notification (Offset 0x300,
+ ///< Size 0x100)
+ struct igd_opregion_mbox4 mbox4; ///< Mailbox 4: Video BIOS Table (VBT)
+ ///< (Offset 0x400, Size 0x1800)
+ struct igd_opregion_mbox5
+ mbox5; ///< Mailbox 5: BIOS to Driver Notification Extension (Offset
+ ///< 0x1C00, Size 0x400)
+};
+
+///
+/// VBT Header Structure
+///
+struct vbt_header {
+ uint8_t product_string[20];
+ uint16_t version;
+ uint16_t header_size;
+ uint16_t table_size;
+ uint8_t checksum;
+ uint8_t reserved1;
+ uint32_t bios_data_offset;
+ uint32_t aim_data_offset[4];
+};
+
+#pragma pack(pop)
+
+int vm_intelgpu_get_opregion(struct vm *vm, vm_paddr_t *base, vm_paddr_t *size);
diff --git a/sys/amd64/vmm/intel/intelgpu.c b/sys/amd64/vmm/intel/intelgpu.c
new file mode 100644
--- /dev/null
+++ b/sys/amd64/vmm/intel/intelgpu.c
@@ -0,0 +1,78 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2021 Beckhoff Automation GmbH & Co. KG
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR OR CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <dev/pci/pcireg.h>
+#include <dev/pci/pcivar.h>
+
+#include "intelgpu.h"
+
+#define KB (1024UL)
+
+int
+vm_intelgpu_get_opregion(struct vm *vm, vm_paddr_t *base, vm_paddr_t *size)
+{
+ /* intel graphics device is always located at 0:2.0 */
+ device_t dev = pci_find_bsf(0, 2, 0);
+ if (dev == NULL) {
+ return (ENOENT);
+ }
+
+ if ((pci_get_vendor(dev) != PCI_VENDOR_INTEL) ||
+ (pci_get_class(dev) != PCIC_DISPLAY) ||
+ (pci_get_subclass(dev) != PCIS_DISPLAY_VGA)) {
+ return (ENODEV);
+ }
+
+ uint64_t asls = pci_read_config(dev, PCIR_ASLS_CTL, 4);
+
+ struct igd_opregion_header *opregion_header =
+ (struct igd_opregion_header *)pmap_map(NULL, asls,
+ asls + sizeof(*opregion_header), VM_PROT_READ);
+ if (opregion_header == NULL ||
+ memcmp(opregion_header->sign, IGD_OPREGION_HEADER_SIGN,
+ sizeof(opregion_header->sign))) {
+ return (ENODEV);
+ }
+
+ *base = asls;
+ *size = opregion_header->size * KB;
+
+ return (0);
+}
diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c
--- a/sys/amd64/vmm/vmm.c
+++ b/sys/amd64/vmm/vmm.c
@@ -134,7 +134,7 @@
bool sysmem;
struct vm_object *object;
};
-#define VM_MAX_MEMSEGS 3
+#define VM_MAX_MEMSEGS 4
struct mem_map {
vm_paddr_t gpa;
diff --git a/sys/amd64/vmm/vmm_dev.c b/sys/amd64/vmm/vmm_dev.c
--- a/sys/amd64/vmm/vmm_dev.c
+++ b/sys/amd64/vmm/vmm_dev.c
@@ -60,6 +60,7 @@
#include <machine/vmm_snapshot.h>
#include <x86/apicreg.h>
+#include "intel/intelgpu.h"
#include "vmm_lapic.h"
#include "vmm_stat.h"
#include "vmm_mem.h"
@@ -366,6 +367,7 @@
struct vm_capability *vmcap;
struct vm_pptdev *pptdev;
struct vm_pptdev_mmio *pptmmio;
+ struct vm_memory_region_info *memory_region_info;
struct vm_pptdev_msi *pptmsi;
struct vm_pptdev_msix *pptmsix;
struct vm_nmi *vmnmi;
@@ -533,6 +535,24 @@
error = ppt_unmap_mmio(sc->vm, pptmmio->bus, pptmmio->slot,
pptmmio->func, pptmmio->gpa, pptmmio->len);
break;
+ case VM_GET_MEMORY_REGION_INFO:
+ memory_region_info = (struct vm_memory_region_info *)data;
+ switch (memory_region_info->type) {
+ case MEMORY_REGION_INTEL_GSM:
+ memory_region_info->base = intel_graphics_stolen_base;
+ memory_region_info->size = intel_graphics_stolen_size;
+ error = 0;
+ break;
+ case MEMORY_REGION_INTEL_OPREGION:
+ error = vm_intelgpu_get_opregion(sc->vm,
+ &memory_region_info->base,
+ &memory_region_info->size);
+ break;
+ default:
+ error = EINVAL;
+ break;
+ }
+ break;
case VM_BIND_PPTDEV:
pptdev = (struct vm_pptdev *)data;
error = vm_assign_pptdev(sc->vm, pptdev->bus, pptdev->slot,
diff --git a/sys/dev/pci/pcireg.h b/sys/dev/pci/pcireg.h
--- a/sys/dev/pci/pcireg.h
+++ b/sys/dev/pci/pcireg.h
@@ -1098,3 +1098,14 @@
#define PCIM_OSC_CTL_PCIE_PME 0x04 /* PCIe Native Power Mgt Events */
#define PCIM_OSC_CTL_PCIE_AER 0x08 /* PCIe Advanced Error Reporting */
#define PCIM_OSC_CTL_PCIE_CAP_STRUCT 0x10 /* Various Capability Structures */
+
+/*
+ * Intel graphics device definitions
+ */
+#define PCIR_BDSM 0x5C /* Base of Data Stolen Memory register */
+#define PCIR_ASLS_CTL 0xFC /* Opregion start address register */
+
+/*
+ * PCI Vendors
+ */
+#define PCI_VENDOR_INTEL 0x8086
diff --git a/sys/modules/vmm/Makefile b/sys/modules/vmm/Makefile
--- a/sys/modules/vmm/Makefile
+++ b/sys/modules/vmm/Makefile
@@ -42,6 +42,7 @@
# intel-specific files
.PATH: ${SRCTOP}/sys/amd64/vmm/intel
SRCS+= ept.c \
+ intelgpu.c \
vmcs.c \
vmx_msr.c \
vmx_support.S \
diff --git a/usr.sbin/bhyve/Makefile b/usr.sbin/bhyve/Makefile
--- a/usr.sbin/bhyve/Makefile
+++ b/usr.sbin/bhyve/Makefile
@@ -15,6 +15,7 @@
BHYVE_SYSDIR?=${SRCTOP}
SRCS= \
+ acpi_device.c \
atkbdc.c \
acpi.c \
audio.c \
@@ -25,7 +26,7 @@
console.c \
ctl_util.c \
ctl_scsi_all.c \
- fwctl.c \
+ e820.c \
gdb.c \
hda_codec.c \
inout.c \
@@ -41,6 +42,7 @@
pci_emul.c \
pci_hda.c \
pci_fbuf.c \
+ pci_gvt-d.c \
pci_hostbridge.c \
pci_irq.c \
pci_lpc.c \
@@ -59,6 +61,7 @@
post.c \
ps2kbd.c \
ps2mouse.c \
+ qemu_fwcfg.c \
rfb.c \
rtc.c \
smbiostbl.c \
diff --git a/usr.sbin/bhyve/acpi.h b/usr.sbin/bhyve/acpi.h
--- a/usr.sbin/bhyve/acpi.h
+++ b/usr.sbin/bhyve/acpi.h
@@ -31,6 +31,8 @@
#ifndef _ACPI_H_
#define _ACPI_H_
+#include "acpi_device.h"
+
#define SCI_INT 9
#define SMI_CMD 0xb2
@@ -55,6 +57,7 @@
int acpi_build(struct vmctx *ctx, int ncpu);
void acpi_raise_gpe(struct vmctx *ctx, unsigned bit);
+int acpi_tables_add_device(const struct acpi_device *const dev);
void dsdt_line(const char *fmt, ...);
void dsdt_fixed_ioport(uint16_t iobase, uint16_t length);
void dsdt_fixed_irq(uint8_t irq);
diff --git a/usr.sbin/bhyve/acpi.c b/usr.sbin/bhyve/acpi.c
--- a/usr.sbin/bhyve/acpi.c
+++ b/usr.sbin/bhyve/acpi.c
@@ -139,6 +139,30 @@
#define EFFLUSH(x) \
if (fflush(x) != 0) goto err_exit;
+/*
+ * A list for additional ACPI devices like a TPM.
+ */
+struct acpi_device_list_entry {
+ SLIST_ENTRY(acpi_device_list_entry) chain;
+ const struct acpi_device *dev;
+};
+SLIST_HEAD(acpi_device_list,
+ acpi_device_list_entry) acpi_devices = SLIST_HEAD_INITIALIZER(acpi_devices);
+
+int
+acpi_tables_add_device(const struct acpi_device *const dev)
+{
+ struct acpi_device_list_entry *const entry = calloc(1, sizeof(*entry));
+ if (entry == NULL) {
+ return (ENOMEM);
+ }
+
+ entry->dev = dev;
+ SLIST_INSERT_HEAD(&acpi_devices, entry, chain);
+
+ return (0);
+}
+
static int
basl_fwrite_rsdp(FILE *fp)
{
@@ -760,6 +784,11 @@
vmgenc_write_dsdt();
+ const struct acpi_device_list_entry *entry;
+ SLIST_FOREACH(entry, &acpi_devices, chain) {
+ acpi_device_write_dsdt(entry->dev);
+ }
+
dsdt_line("}");
if (dsdt_error != 0)
diff --git a/usr.sbin/bhyve/acpi_device.h b/usr.sbin/bhyve/acpi_device.h
new file mode 100644
--- /dev/null
+++ b/usr.sbin/bhyve/acpi_device.h
@@ -0,0 +1,42 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2021 Beckhoff Automation GmbH & Co. KG
+ * Author: Corvin Köhne <c.koehne@beckhoff.com>
+ */
+
+#pragma once
+
+#include <contrib/dev/acpica/include/acpi.h>
+
+struct vmctx;
+
+struct acpi_device;
+
+/**
+ * Creates an ACPI device.
+ *
+ * @param[out] new_dev Returns the newly create ACPI device.
+ * @param[in] vm_ctx VM context the ACPI device is created in.
+ * @param[in] name Name of the ACPI device. Should always be a NULL
+ * terminated string.
+ * @param[in] hid Hardware ID of the ACPI device. Should always be a NULL
+ * terminated string.
+ */
+int acpi_device_create(struct acpi_device **const new_dev,
+ struct vmctx *const vm_ctx, const char *const name, const char *const hid);
+void acpi_device_destroy(struct acpi_device *const dev);
+
+/**
+ * @note: acpi_device_add_res_acpi_buffer doesn't ensure that no resources are
+ * added on an error condition. On error the caller should assume that
+ * the ACPI_BUFFER is partially added to the ACPI device.
+ */
+int acpi_device_add_res_acpi_buffer(struct acpi_device *const dev,
+ const ACPI_BUFFER resources);
+int acpi_device_add_res_fixed_ioport(struct acpi_device *const dev,
+ const UINT16 port, UINT8 length);
+int acpi_device_add_res_fixed_memory32(struct acpi_device *const dev,
+ const UINT8 write_protected, const UINT32 address, const UINT32 length);
+
+void acpi_device_write_dsdt(const struct acpi_device *const dev);
diff --git a/usr.sbin/bhyve/acpi_device.c b/usr.sbin/bhyve/acpi_device.c
new file mode 100644
--- /dev/null
+++ b/usr.sbin/bhyve/acpi_device.c
@@ -0,0 +1,240 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2021 Beckhoff Automation GmbH & Co. KG
+ * Author: Corvin Köhne <c.koehne@beckhoff.com>
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/queue.h>
+
+#include <machine/vmm.h>
+
+#include <err.h>
+#include <errno.h>
+#include <vmmapi.h>
+
+#include "acpi.h"
+#include "acpi_device.h"
+
+/**
+ * List entry to enumerate all resources used by an ACPI device.
+ *
+ * @param chain Used to chain multiple elements together.
+ * @param type Type of the ACPI resource.
+ * @param data Data of the ACPI resource.
+ */
+struct acpi_resource_list_entry {
+ SLIST_ENTRY(acpi_resource_list_entry) chain;
+ UINT32 type;
+ ACPI_RESOURCE_DATA data;
+};
+
+/**
+ * Holds information about an ACPI device.
+ *
+ * @param vm_ctx VM context the ACPI device was created in.
+ * @param name Name of the ACPI device.
+ * @param hid Hardware ID of the ACPI device.
+ * @param crs Current resources used by the ACPI device.
+ */
+struct acpi_device {
+ struct vmctx *vm_ctx;
+ const char *name;
+ const char *hid;
+ SLIST_HEAD(acpi_resource_list, acpi_resource_list_entry) crs;
+};
+
+int
+acpi_device_create(struct acpi_device **const new_dev,
+ struct vmctx *const vm_ctx, const char *const name, const char *const hid)
+{
+ if (new_dev == NULL || vm_ctx == NULL || name == NULL || hid == NULL) {
+ return (EINVAL);
+ }
+
+ struct acpi_device *const dev = calloc(1, sizeof(*dev));
+ if (dev == NULL) {
+ return (ENOMEM);
+ }
+
+ dev->vm_ctx = vm_ctx;
+ dev->name = name;
+ dev->hid = hid;
+ SLIST_INIT(&dev->crs);
+
+ /* current resources always contain an end tag */
+ struct acpi_resource_list_entry *const crs_end_tag = calloc(1,
+ sizeof(*crs_end_tag));
+ if (crs_end_tag == NULL) {
+ acpi_device_destroy(dev);
+ return (ENOMEM);
+ }
+ crs_end_tag->type = ACPI_RESOURCE_TYPE_END_TAG;
+ SLIST_INSERT_HEAD(&dev->crs, crs_end_tag, chain);
+
+ const int error = acpi_tables_add_device(dev);
+ if (error) {
+ acpi_device_destroy(dev);
+ return (error);
+ }
+
+ *new_dev = dev;
+
+ return (0);
+}
+
+void
+acpi_device_destroy(struct acpi_device *const dev)
+{
+ if (dev == NULL) {
+ return;
+ }
+
+ struct acpi_resource_list_entry *res;
+ while (!SLIST_EMPTY(&dev->crs)) {
+ res = SLIST_FIRST(&dev->crs);
+ SLIST_REMOVE_HEAD(&dev->crs, chain);
+ free(res);
+ }
+}
+
+int
+acpi_device_add_res_acpi_buffer(struct acpi_device *const dev,
+ const ACPI_BUFFER resources)
+{
+ if (dev == NULL) {
+ return (EINVAL);
+ }
+
+ int error = 0;
+ size_t offset = 0;
+ while (offset < resources.Length) {
+ const ACPI_RESOURCE *const res =
+ (const ACPI_RESOURCE *)((UINT8 *)resources.Pointer +
+ offset);
+ switch (res->Type) {
+ case ACPI_RESOURCE_TYPE_FIXED_IO:
+ error = acpi_device_add_res_fixed_ioport(dev,
+ res->Data.FixedIo.Address,
+ res->Data.FixedIo.AddressLength);
+ break;
+ case ACPI_RESOURCE_TYPE_FIXED_MEMORY32:
+ error = acpi_device_add_res_fixed_memory32(dev,
+ res->Data.FixedMemory32.WriteProtect,
+ res->Data.FixedMemory32.Address,
+ res->Data.FixedMemory32.AddressLength);
+ break;
+ case ACPI_RESOURCE_TYPE_END_TAG:
+ break;
+ default:
+ warnx("%s: unknown resource type %d", __func__,
+ res->Type);
+ return (ENODEV);
+ }
+ if (error) {
+ break;
+ }
+ offset += res->Length;
+ }
+
+ return (error);
+}
+
+int
+acpi_device_add_res_fixed_ioport(struct acpi_device *const dev,
+ const UINT16 port, const UINT8 length)
+{
+ if (dev == NULL) {
+ return (EINVAL);
+ }
+
+ struct acpi_resource_list_entry *const res = calloc(1, sizeof(*res));
+ if (res == NULL) {
+ return (ENOMEM);
+ }
+
+ res->type = ACPI_RESOURCE_TYPE_FIXED_IO;
+ res->data.FixedIo.Address = port;
+ res->data.FixedIo.AddressLength = length;
+
+ SLIST_INSERT_HEAD(&dev->crs, res, chain);
+
+ return (0);
+}
+
+int
+acpi_device_add_res_fixed_memory32(struct acpi_device *const dev,
+ const UINT8 write_protected, const UINT32 address, const UINT32 length)
+{
+ if (dev == NULL) {
+ return (EINVAL);
+ }
+
+ struct acpi_resource_list_entry *const res = calloc(1, sizeof(*res));
+ if (res == NULL) {
+ return (ENOMEM);
+ }
+
+ res->type = ACPI_RESOURCE_TYPE_FIXED_MEMORY32;
+ res->data.FixedMemory32.WriteProtect = write_protected;
+ res->data.FixedMemory32.Address = address;
+ res->data.FixedMemory32.AddressLength = length;
+
+ SLIST_INSERT_HEAD(&dev->crs, res, chain);
+
+ return (0);
+}
+
+static void
+acpi_device_write_dsdt_crs(const struct acpi_device *const dev)
+{
+ const struct acpi_resource_list_entry *res;
+ SLIST_FOREACH (res, &dev->crs, chain) {
+ switch (res->type) {
+ case ACPI_RESOURCE_TYPE_FIXED_IO:
+ dsdt_fixed_ioport(res->data.FixedIo.Address,
+ res->data.FixedIo.AddressLength);
+ break;
+ case ACPI_RESOURCE_TYPE_FIXED_MEMORY32: {
+ dsdt_fixed_mem32(res->data.FixedMemory32.Address,
+ res->data.FixedMemory32.AddressLength);
+ break;
+ }
+ case ACPI_RESOURCE_TYPE_END_TAG:
+ break;
+ default:
+ warnx("%s: unknown resource type %d", __func__,
+ res->type);
+ return;
+ }
+ }
+}
+
+void
+acpi_device_write_dsdt(const struct acpi_device *const dev)
+{
+ if (dev == NULL) {
+ return;
+ }
+
+ dsdt_line("");
+ dsdt_line(" Scope (\\_SB)");
+ dsdt_line(" {");
+ dsdt_line(" Device (%s)", dev->name);
+ dsdt_line(" {");
+ dsdt_line(" Name (_HID, \"%s\")", dev->hid);
+ dsdt_line(" Name (_STA, 0x0F)");
+ dsdt_line(" Name (_CRS, ResourceTemplate ()");
+ dsdt_line(" {");
+ dsdt_indent(4);
+ acpi_device_write_dsdt_crs(dev);
+ dsdt_unindent(4);
+ dsdt_line(" })");
+ dsdt_line(" }");
+ dsdt_line(" }");
+}
diff --git a/usr.sbin/bhyve/bhyve.8 b/usr.sbin/bhyve/bhyve.8
--- a/usr.sbin/bhyve/bhyve.8
+++ b/usr.sbin/bhyve/bhyve.8
@@ -409,6 +409,11 @@
and
.Ar function
numbers.
+.It Li rom= Ns Ar romfile
+Add
+.Ar romfile
+as option ROM to the PCI device.
+The ROM will be loaded by firmware and should be capable of initializing the device.
.El
.Pp
Guest memory must be wired using the
diff --git a/usr.sbin/bhyve/bhyverun.c b/usr.sbin/bhyve/bhyverun.c
--- a/usr.sbin/bhyve/bhyverun.c
+++ b/usr.sbin/bhyve/bhyverun.c
@@ -89,7 +89,7 @@
#include "bootrom.h"
#include "inout.h"
#include "debug.h"
-#include "fwctl.h"
+#include "e820.h"
#include "gdb.h"
#include "ioapic.h"
#include "kernemu_dev.h"
@@ -99,6 +99,7 @@
#include "pci_emul.h"
#include "pci_irq.h"
#include "pci_lpc.h"
+#include "qemu_fwcfg.h"
#include "smbiostbl.h"
#ifdef BHYVE_SNAPSHOT
#include "snapshot.h"
@@ -1296,6 +1297,41 @@
rtc_init(ctx, rtc_localtime);
sci_init(ctx);
+ if (qemu_fwcfg_init(ctx) != 0) {
+ fprintf(stderr, "qemu fwcfg initialization error");
+ exit(4);
+ }
+
+ /*
+ * QEMU uses fwcfg item 0x0f (FW_CFG_MAX_CPUS) to report the number of
+ * cpus to the guest but states that it has a special meaning for x86.
+ * Don't know yet if that can cause unintented side-effects. Use an own
+ * fwcfg item to be safe.
+ *
+ * QEMU comment:
+ * FW_CFG_MAX_CPUS is a bit confusing/problematic on x86:
+ *
+ * For machine types prior to 1.8, SeaBIOS needs FW_CFG_MAX_CPUS
+ * for building MPTable, ACPI MADT, ACPI CPU hotplug and ACPI SRAT
+ * table, that tables are based on xAPIC ID and QEMU<->SeaBIOS
+ * interface for CPU hotplug also uses APIC ID and not "CPU index".
+ * This means that FW_CFG_MAX_CPUS is not the "maximum number of
+ * CPUs", but the "limit to the APIC ID values SeaBIOS may see".
+ *
+ * So for compatibility reasons with old BIOSes we are stuck with
+ * "etc/max-cpus" actually being apic_id_limit
+ */
+ if (qemu_fwcfg_add_file("opt/bhyve/hw.ncpu", sizeof(guest_ncpus),
+ &guest_ncpus) != 0) {
+ fprintf(stderr, "could not add qemu fwcfg opt/bhyve/hw.ncpu");
+ exit(4);
+ }
+
+ if (e820_init(ctx) != 0) {
+ fprintf(stderr, "Unable to setup E820");
+ exit(4);
+ }
+
/*
* Exit if a device emulation finds an error in its initilization
*/
@@ -1380,8 +1416,17 @@
assert(error == 0);
}
- if (lpc_bootrom())
- fwctl_init();
+ struct qemu_fwcfg_item *fwcfg_item = e820_get_fwcfg_item();
+ if (fwcfg_item == NULL) {
+ fprintf(stderr, "invalid e820 table");
+ exit(4);
+ }
+ if (qemu_fwcfg_add_file("etc/e820", fwcfg_item->size,
+ fwcfg_item->data) != 0) {
+ fprintf(stderr, "could not add qemu fwcfg etc/e820");
+ exit(4);
+ }
+ free(fwcfg_item);
/*
* Change the proc title to include the VM name.
diff --git a/usr.sbin/bhyve/e820.h b/usr.sbin/bhyve/e820.h
new file mode 100644
--- /dev/null
+++ b/usr.sbin/bhyve/e820.h
@@ -0,0 +1,71 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2021 Beckhoff Automation GmbH & Co. KG
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR OR CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#pragma once
+
+#include <vmmapi.h>
+
+#include "qemu_fwcfg.h"
+
+#pragma pack(push, 1)
+
+enum e820_memory_type {
+ E820_TYPE_MEMORY = 1,
+ E820_TYPE_RESERVED = 2,
+ E820_TYPE_ACPI = 3,
+ E820_TYPE_NVS = 4
+};
+
+enum e820_allocation_strategy {
+ /* allocate any address */
+ E820_ALLOCATE_ANY,
+ /* allocate lowest address larger than address */
+ E820_ALLOCATE_LOWEST,
+ /* allocate highest address lower than address */
+ E820_ALLOCATE_HIGHEST,
+ /* allocate a specific address */
+ E820_ALLOCATE_SPECIFIC
+};
+
+struct e820_entry {
+ uint64_t base;
+ uint64_t length;
+ enum e820_memory_type type;
+};
+
+#pragma pack(pop)
+
+#define E820_ALIGNMENT_NONE 1
+
+uint64_t e820_alloc(uint64_t address, uint64_t length, uint64_t alignment,
+ enum e820_memory_type type, enum e820_allocation_strategy strategy);
+void e820_dump_table();
+struct qemu_fwcfg_item *e820_get_fwcfg_item();
+int e820_init(struct vmctx *ctx);
diff --git a/usr.sbin/bhyve/e820.c b/usr.sbin/bhyve/e820.c
new file mode 100644
--- /dev/null
+++ b/usr.sbin/bhyve/e820.c
@@ -0,0 +1,460 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2021 Beckhoff Automation GmbH & Co. KG
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR OR CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/queue.h>
+
+#include <machine/vmm.h>
+
+#include <assert.h>
+#include <err.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "e820.h"
+#include "qemu_fwcfg.h"
+
+/*
+ * E820 always uses 64 bit entries. Emulation code will use vm_paddr_t since it
+ * works on physical addresses. If vm_paddr_t is larger than uint64_t E820 can't
+ * hold all possible physical addresses and we can get into trouble.
+ */
+static_assert(sizeof(vm_paddr_t) <= sizeof(uint64_t),
+ "Unable to represent physical memory by E820 table");
+
+#define E820_FWCFG_FILE_NAME "etc/e820"
+
+#define KB (1024UL)
+#define MB (1024 * KB)
+#define GB (1024 * MB)
+
+/*
+ * Fix E820 memory holes:
+ * [ A0000, C0000) VGA
+ * [ C0000, 100000) ROM
+ */
+#define E820_VGA_MEM_BASE 0xA0000
+#define E820_VGA_MEM_END 0xC0000
+#define E820_ROM_MEM_BASE 0xC0000
+#define E820_ROM_MEM_END 0x100000
+
+struct e820_element {
+ TAILQ_ENTRY(e820_element) chain;
+ uint64_t base;
+ uint64_t end;
+ enum e820_memory_type type;
+};
+TAILQ_HEAD(e820_table, e820_element) e820_table = TAILQ_HEAD_INITIALIZER(
+ e820_table);
+
+static char *
+e820_get_type_name(enum e820_memory_type type)
+{
+ switch (type) {
+ case E820_TYPE_MEMORY:
+ return "RAM ";
+ case E820_TYPE_RESERVED:
+ return "Reserved";
+ case E820_TYPE_ACPI:
+ return "ACPI ";
+ case E820_TYPE_NVS:
+ return "NVS ";
+ default:
+ return "Unknown ";
+ }
+}
+
+void
+e820_dump_table()
+{
+ fprintf(stderr, "E820 map:\n\r");
+ uint64_t i = 0;
+ struct e820_element *element;
+ TAILQ_FOREACH (element, &e820_table, chain) {
+ fprintf(stderr, " (%4lu) [ %16lx, %16lx] %s\n\r", i,
+ element->base, element->end,
+ e820_get_type_name(element->type));
+ ++i;
+ }
+}
+
+struct qemu_fwcfg_item *
+e820_get_fwcfg_item()
+{
+ uint64_t count = 0;
+ struct e820_element *element;
+ TAILQ_FOREACH (element, &e820_table, chain) {
+ ++count;
+ }
+
+ struct qemu_fwcfg_item *fwcfg_item = malloc(
+ sizeof(struct qemu_fwcfg_item));
+ if (fwcfg_item == NULL) {
+ return (NULL);
+ }
+ fwcfg_item->size = count * sizeof(struct e820_entry);
+ fwcfg_item->data = malloc(fwcfg_item->size);
+ if (fwcfg_item->data == NULL) {
+ free(fwcfg_item);
+ return (NULL);
+ }
+ uint64_t i = 0;
+ struct e820_entry *entries = (struct e820_entry *)fwcfg_item->data;
+ TAILQ_FOREACH (element, &e820_table, chain) {
+ struct e820_entry *entry = &entries[i];
+ entry->base = element->base;
+ entry->length = element->end - element->base;
+ entry->type = element->type;
+ ++i;
+ }
+
+ return fwcfg_item;
+}
+
+int
+e820_add_entry(uint64_t base, uint64_t end, enum e820_memory_type type)
+{
+ if (end < base) {
+ return (-1);
+ }
+
+ struct e820_element *new_element = malloc(sizeof(struct e820_element));
+ if (new_element == NULL) {
+ return (-ENOMEM);
+ }
+
+ new_element->base = base;
+ new_element->end = end;
+ new_element->type = type;
+
+ /*
+ * E820 table should be always sorted in ascending order. Therefore,
+ * search for an element which end is larger than the base parameter.
+ */
+ struct e820_element *element;
+ TAILQ_FOREACH (element, &e820_table, chain) {
+ if (element->end > base) {
+ break;
+ }
+ }
+
+ /*
+ * System memory requires special handling.
+ */
+ if (type == E820_TYPE_MEMORY) {
+ /*
+ * base is larger than of any existing element. Add new system
+ * memory at the end of the table.
+ */
+ if (element == NULL) {
+ TAILQ_INSERT_TAIL(&e820_table, new_element, chain);
+ return (0);
+ }
+
+ /*
+ * System memory shouldn't overlap with any existing element.
+ */
+ if (end > element->base) {
+ return (-1);
+ }
+ TAILQ_INSERT_BEFORE(element, new_element, chain);
+ return (0);
+ }
+
+ if (element == NULL) {
+ /* No suitable element found */
+ return (-1);
+ }
+
+ /*
+ * Non system memory should be allocated inside system memory.
+ */
+ if (element->type != E820_TYPE_MEMORY) {
+ return (-1);
+ }
+ /*
+ * New element should fit into existing system memory element.
+ */
+ if (base < element->base || end > element->end) {
+ return (-1);
+ }
+
+ if (base == element->base) {
+ /*
+ * New element at system memory base boundary. Add new
+ * element before current and adjust the base of the old
+ * element.
+ *
+ * Old table:
+ * [ 0x1000, 0x4000] RAM <-- element
+ * New table:
+ * [ 0x1000, 0x2000] Reserved
+ * [ 0x2000, 0x4000] RAM <-- element
+ */
+ TAILQ_INSERT_BEFORE(element, new_element, chain);
+ element->base = end;
+ } else if (end == element->end) {
+ /*
+ * New element at system memory end boundary. Add new
+ * element after current and adjust the end of the
+ * current element.
+ *
+ * Old table:
+ * [ 0x1000, 0x4000] RAM <-- element
+ * New table:
+ * [ 0x1000, 0x3000] RAM <-- element
+ * [ 0x3000, 0x4000] Reserved
+ */
+ TAILQ_INSERT_AFTER(&e820_table, element, new_element, chain);
+ element->end = base;
+ } else {
+ /*
+ * New element inside system memory entry. Split it by
+ * adding a system memory element and the new element
+ * before current.
+ *
+ * Old table:
+ * [ 0x1000, 0x4000] RAM <-- element
+ * New table:
+ * [ 0x1000, 0x2000] RAM
+ * [ 0x2000, 0x3000] Reserved
+ * [ 0x3000, 0x4000] RAM <-- element
+ */
+ struct e820_element *ram_element = malloc(
+ sizeof(struct e820_element));
+ if (ram_element == NULL) {
+ return (-ENOMEM);
+ }
+ ram_element->base = element->base;
+ ram_element->end = base;
+ ram_element->type = E820_TYPE_MEMORY;
+ TAILQ_INSERT_BEFORE(element, ram_element, chain);
+ TAILQ_INSERT_BEFORE(element, new_element, chain);
+ element->base = end;
+ }
+
+ return (0);
+}
+
+int
+e820_add_memory_hole(uint64_t base, uint64_t end)
+{
+ if (end < base) {
+ return (-1);
+ }
+
+ /*
+ * E820 table should be always sorted in ascending order. Therefore,
+ * search for an element which end is larger than the base parameter.
+ */
+ struct e820_element *element;
+ TAILQ_FOREACH (element, &e820_table, chain) {
+ if (element->end > base) {
+ break;
+ }
+ }
+
+ if (element == NULL || end <= element->base) {
+ /* Nothing to do. Hole already exists */
+ return (0);
+ }
+
+ if (element->type != E820_TYPE_MEMORY) {
+ /* Memory holes are only allowed in system memory */
+ return (-1);
+ }
+
+ if (base == element->base) {
+ /*
+ * New hole at system memory base boundary.
+ *
+ * Old table:
+ * [ 0x1000, 0x4000] RAM
+ * New table:
+ * [ 0x2000, 0x4000] RAM
+ */
+ element->base = end;
+
+ } else if (end == element->end) {
+ /*
+ * New hole at system memory end boundary.
+ *
+ * Old table:
+ * [ 0x1000, 0x4000] RAM
+ * New table:
+ * [ 0x1000, 0x3000] RAM
+ */
+ element->end = base;
+
+ } else {
+ /*
+ * New hole inside system memory entry. Split the system memory.
+ *
+ * Old table:
+ * [ 0x1000, 0x4000] RAM <-- element
+ * New table:
+ * [ 0x1000, 0x2000] RAM
+ * [ 0x3000, 0x4000] RAM <-- element
+ */
+ struct e820_element *ram_element = malloc(
+ sizeof(struct e820_element));
+ if (ram_element == NULL) {
+ return (-ENOMEM);
+ }
+ ram_element->base = element->base;
+ ram_element->end = base;
+ ram_element->type = E820_TYPE_MEMORY;
+ TAILQ_INSERT_BEFORE(element, ram_element, chain);
+ element->base = end;
+ }
+
+ return (0);
+}
+
+uint64_t
+e820_alloc(uint64_t address, uint64_t length, uint64_t alignment,
+ enum e820_memory_type type, enum e820_allocation_strategy strategy)
+{
+ /* address should be aligned */
+ if (!powerof2(alignment) || (address & (alignment - 1)) != 0) {
+ return 0;
+ }
+
+ struct e820_element *element;
+ uint64_t end;
+ uint64_t base;
+ switch (strategy) {
+ case E820_ALLOCATE_ANY:
+ /*
+ * Allocate any address. Therefore, ignore the address parameter
+ * and reuse the code path for allocating the lowest address.
+ */
+ address = 0;
+ /* fallthrough */
+ case E820_ALLOCATE_LOWEST:
+ TAILQ_FOREACH (element, &e820_table, chain) {
+ end = element->end;
+ base = roundup2(element->base, alignment);
+ if (address != 0) {
+ base = MAX(base, address);
+ }
+
+ if (element->type != E820_TYPE_MEMORY || end < base ||
+ end - base < length || base == 0) {
+ continue;
+ }
+
+ if (e820_add_entry(base, base + length, type) != 0) {
+ return 0;
+ }
+
+ return base;
+ }
+ break;
+ case E820_ALLOCATE_HIGHEST:
+ TAILQ_FOREACH_REVERSE (element, &e820_table, e820_table,
+ chain) {
+ end = element->end;
+ base = roundup2(element->base, alignment);
+ if (address != 0) {
+ end = MIN(end, address);
+ }
+
+ if (element->type != E820_TYPE_MEMORY || end < base ||
+ end - base < length || end - length == 0) {
+ continue;
+ }
+ base = rounddown2(end - length, alignment);
+
+ if (e820_add_entry(base, base + length, type) != 0) {
+ return 0;
+ }
+
+ return base;
+ }
+ break;
+ case E820_ALLOCATE_SPECIFIC:
+ base = address;
+ if (e820_add_entry(base, base + length, type) != 0) {
+ return 0;
+ }
+
+ return address;
+ }
+
+ return 0;
+}
+
+int
+e820_init(struct vmctx *ctx)
+{
+ int error;
+
+ TAILQ_INIT(&e820_table);
+
+ /* add memory below 4 GB to E820 table */
+ const uint64_t lowmem_length = vm_get_lowmem_size(ctx);
+ error = e820_add_entry(0, lowmem_length, E820_TYPE_MEMORY);
+ if (error) {
+ warnx("%s: Could not add lowmem", __func__);
+ return (error);
+ }
+
+ /* add memory above 4 GB to E820 table */
+ const uint64_t highmem_length = vm_get_highmem_size(ctx);
+ if (highmem_length != 0) {
+ error = e820_add_entry(4 * GB, 4 * GB + highmem_length,
+ E820_TYPE_MEMORY);
+ if (error) {
+ warnx("%s: Could not add highmem", __func__);
+ return (error);
+ }
+ }
+
+ /* add memory holes to E820 table */
+ error = e820_add_memory_hole(E820_VGA_MEM_BASE, E820_VGA_MEM_END);
+ if (error) {
+ warnx("%s: Could not add VGA memory", __func__);
+ return (error);
+ }
+
+ error = e820_add_memory_hole(E820_ROM_MEM_BASE, E820_ROM_MEM_END);
+ if (error) {
+ warnx("%s: Could not add ROM area", __func__);
+ return (error);
+ }
+
+ return (0);
+}
diff --git a/usr.sbin/bhyve/fwctl.c b/usr.sbin/bhyve/fwctl.c
deleted file mode 100644
--- a/usr.sbin/bhyve/fwctl.c
+++ /dev/null
@@ -1,552 +0,0 @@
-/*-
- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
- *
- * Copyright (c) 2015 Peter Grehan <grehan@freebsd.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-/*
- * Guest firmware interface. Uses i/o ports x510/x511 as Qemu does,
- * but with a request/response messaging protocol.
- */
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <sys/param.h>
-#include <sys/types.h>
-#include <sys/errno.h>
-#include <sys/uio.h>
-
-#include <assert.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "bhyverun.h"
-#include "inout.h"
-#include "fwctl.h"
-
-/*
- * Messaging protocol base operations
- */
-#define OP_NULL 1
-#define OP_ECHO 2
-#define OP_GET 3
-#define OP_GET_LEN 4
-#define OP_SET 5
-#define OP_MAX OP_SET
-
-/* I/O ports */
-#define FWCTL_OUT 0x510
-#define FWCTL_IN 0x511
-
-/*
- * Back-end state-machine
- */
-enum state {
- DORMANT,
- IDENT_WAIT,
- IDENT_SEND,
- REQ,
- RESP
-} be_state = DORMANT;
-
-static uint8_t sig[] = { 'B', 'H', 'Y', 'V' };
-static u_int ident_idx;
-
-struct op_info {
- int op;
- int (*op_start)(uint32_t len);
- void (*op_data)(uint32_t data, uint32_t len);
- int (*op_result)(struct iovec **data);
- void (*op_done)(struct iovec *data);
-};
-static struct op_info *ops[OP_MAX+1];
-
-/* Return 0-padded uint32_t */
-static uint32_t
-fwctl_send_rest(uint32_t *data, size_t len)
-{
- union {
- uint8_t c[4];
- uint32_t w;
- } u;
- uint8_t *cdata;
- int i;
-
- cdata = (uint8_t *) data;
- u.w = 0;
-
- for (i = 0, u.w = 0; i < len; i++)
- u.c[i] = *cdata++;
-
- return (u.w);
-}
-
-/*
- * error op dummy proto - drop all data sent and return an error
-*/
-static int errop_code;
-
-static void
-errop_set(int err)
-{
-
- errop_code = err;
-}
-
-static int
-errop_start(uint32_t len)
-{
- errop_code = ENOENT;
-
- /* accept any length */
- return (errop_code);
-}
-
-static void
-errop_data(uint32_t data, uint32_t len)
-{
-
- /* ignore */
-}
-
-static int
-errop_result(struct iovec **data)
-{
-
- /* no data to send back; always successful */
- *data = NULL;
- return (errop_code);
-}
-
-static void
-errop_done(struct iovec *data)
-{
-
- /* assert data is NULL */
-}
-
-static struct op_info errop_info = {
- .op_start = errop_start,
- .op_data = errop_data,
- .op_result = errop_result,
- .op_done = errop_done
-};
-
-/* OID search */
-SET_DECLARE(ctl_set, struct ctl);
-
-CTL_NODE("hw.ncpu", &guest_ncpus, sizeof(guest_ncpus));
-
-static struct ctl *
-ctl_locate(const char *str, int maxlen)
-{
- struct ctl *cp, **cpp;
-
- SET_FOREACH(cpp, ctl_set) {
- cp = *cpp;
- if (!strncmp(str, cp->c_oid, maxlen))
- return (cp);
- }
-
- return (NULL);
-}
-
-/* uefi-sysctl get-len */
-#define FGET_STRSZ 80
-static struct iovec fget_biov[2];
-static char fget_str[FGET_STRSZ];
-static struct {
- size_t f_sz;
- uint32_t f_data[1024];
-} fget_buf;
-static int fget_cnt;
-static size_t fget_size;
-
-static int
-fget_start(uint32_t len)
-{
-
- if (len > FGET_STRSZ)
- return(E2BIG);
-
- fget_cnt = 0;
-
- return (0);
-}
-
-static void
-fget_data(uint32_t data, uint32_t len)
-{
-
- *((uint32_t *) &fget_str[fget_cnt]) = data;
- fget_cnt += sizeof(uint32_t);
-}
-
-static int
-fget_result(struct iovec **data, int val)
-{
- struct ctl *cp;
- int err;
-
- err = 0;
-
- /* Locate the OID */
- cp = ctl_locate(fget_str, fget_cnt);
- if (cp == NULL) {
- *data = NULL;
- err = ENOENT;
- } else {
- if (val) {
- /* For now, copy the len/data into a buffer */
- memset(&fget_buf, 0, sizeof(fget_buf));
- fget_buf.f_sz = cp->c_len;
- memcpy(fget_buf.f_data, cp->c_data, cp->c_len);
- fget_biov[0].iov_base = (char *)&fget_buf;
- fget_biov[0].iov_len = sizeof(fget_buf.f_sz) +
- cp->c_len;
- } else {
- fget_size = cp->c_len;
- fget_biov[0].iov_base = (char *)&fget_size;
- fget_biov[0].iov_len = sizeof(fget_size);
- }
-
- fget_biov[1].iov_base = NULL;
- fget_biov[1].iov_len = 0;
- *data = fget_biov;
- }
-
- return (err);
-}
-
-static void
-fget_done(struct iovec *data)
-{
-
- /* nothing needs to be freed */
-}
-
-static int
-fget_len_result(struct iovec **data)
-{
- return (fget_result(data, 0));
-}
-
-static int
-fget_val_result(struct iovec **data)
-{
- return (fget_result(data, 1));
-}
-
-static struct op_info fgetlen_info = {
- .op_start = fget_start,
- .op_data = fget_data,
- .op_result = fget_len_result,
- .op_done = fget_done
-};
-
-static struct op_info fgetval_info = {
- .op_start = fget_start,
- .op_data = fget_data,
- .op_result = fget_val_result,
- .op_done = fget_done
-};
-
-static struct req_info {
- int req_error;
- u_int req_count;
- uint32_t req_size;
- uint32_t req_type;
- uint32_t req_txid;
- struct op_info *req_op;
- int resp_error;
- int resp_count;
- size_t resp_size;
- size_t resp_off;
- struct iovec *resp_biov;
-} rinfo;
-
-static void
-fwctl_response_done(void)
-{
-
- (*rinfo.req_op->op_done)(rinfo.resp_biov);
-
- /* reinit the req data struct */
- memset(&rinfo, 0, sizeof(rinfo));
-}
-
-static void
-fwctl_request_done(void)
-{
-
- rinfo.resp_error = (*rinfo.req_op->op_result)(&rinfo.resp_biov);
-
- /* XXX only a single vector supported at the moment */
- rinfo.resp_off = 0;
- if (rinfo.resp_biov == NULL) {
- rinfo.resp_size = 0;
- } else {
- rinfo.resp_size = rinfo.resp_biov[0].iov_len;
- }
-}
-
-static int
-fwctl_request_start(void)
-{
- int err;
-
- /* Data size doesn't include header */
- rinfo.req_size -= 12;
-
- rinfo.req_op = &errop_info;
- if (rinfo.req_type <= OP_MAX && ops[rinfo.req_type] != NULL)
- rinfo.req_op = ops[rinfo.req_type];
-
- err = (*rinfo.req_op->op_start)(rinfo.req_size);
-
- if (err) {
- errop_set(err);
- rinfo.req_op = &errop_info;
- }
-
- /* Catch case of zero-length message here */
- if (rinfo.req_size == 0) {
- fwctl_request_done();
- return (1);
- }
-
- return (0);
-}
-
-static int
-fwctl_request_data(uint32_t value)
-{
-
- /* Make sure remaining size is >= 0 */
- if (rinfo.req_size <= sizeof(uint32_t))
- rinfo.req_size = 0;
- else
- rinfo.req_size -= sizeof(uint32_t);
-
- (*rinfo.req_op->op_data)(value, rinfo.req_size);
-
- if (rinfo.req_size < sizeof(uint32_t)) {
- fwctl_request_done();
- return (1);
- }
-
- return (0);
-}
-
-static int
-fwctl_request(uint32_t value)
-{
-
- int ret;
-
- ret = 0;
-
- switch (rinfo.req_count) {
- case 0:
- /* Verify size */
- if (value < 12) {
- printf("msg size error");
- exit(4);
- }
- rinfo.req_size = value;
- rinfo.req_count = 1;
- break;
- case 1:
- rinfo.req_type = value;
- rinfo.req_count++;
- break;
- case 2:
- rinfo.req_txid = value;
- rinfo.req_count++;
- ret = fwctl_request_start();
- break;
- default:
- ret = fwctl_request_data(value);
- break;
- }
-
- return (ret);
-}
-
-static int
-fwctl_response(uint32_t *retval)
-{
- uint32_t *dp;
- ssize_t remlen;
-
- switch(rinfo.resp_count) {
- case 0:
- /* 4 x u32 header len + data */
- *retval = 4*sizeof(uint32_t) +
- roundup(rinfo.resp_size, sizeof(uint32_t));
- rinfo.resp_count++;
- break;
- case 1:
- *retval = rinfo.req_type;
- rinfo.resp_count++;
- break;
- case 2:
- *retval = rinfo.req_txid;
- rinfo.resp_count++;
- break;
- case 3:
- *retval = rinfo.resp_error;
- rinfo.resp_count++;
- break;
- default:
- remlen = rinfo.resp_size - rinfo.resp_off;
- dp = (uint32_t *)
- ((uint8_t *)rinfo.resp_biov->iov_base + rinfo.resp_off);
- if (remlen >= sizeof(uint32_t)) {
- *retval = *dp;
- } else if (remlen > 0) {
- *retval = fwctl_send_rest(dp, remlen);
- }
- rinfo.resp_off += sizeof(uint32_t);
- break;
- }
-
- if (rinfo.resp_count > 3 &&
- rinfo.resp_off >= rinfo.resp_size) {
- fwctl_response_done();
- return (1);
- }
-
- return (0);
-}
-
-
-/*
- * i/o port handling.
- */
-static uint8_t
-fwctl_inb(void)
-{
- uint8_t retval;
-
- retval = 0xff;
-
- switch (be_state) {
- case IDENT_SEND:
- retval = sig[ident_idx++];
- if (ident_idx >= sizeof(sig))
- be_state = REQ;
- break;
- default:
- break;
- }
-
- return (retval);
-}
-
-static void
-fwctl_outw(uint16_t val)
-{
- switch (be_state) {
- case IDENT_WAIT:
- if (val == 0) {
- be_state = IDENT_SEND;
- ident_idx = 0;
- }
- break;
- default:
- /* ignore */
- break;
- }
-}
-
-static uint32_t
-fwctl_inl(void)
-{
- uint32_t retval;
-
- switch (be_state) {
- case RESP:
- if (fwctl_response(&retval))
- be_state = REQ;
- break;
- default:
- retval = 0xffffffff;
- break;
- }
-
- return (retval);
-}
-
-static void
-fwctl_outl(uint32_t val)
-{
-
- switch (be_state) {
- case REQ:
- if (fwctl_request(val))
- be_state = RESP;
- default:
- break;
- }
-
-}
-
-static int
-fwctl_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
- uint32_t *eax, void *arg)
-{
-
- if (in) {
- if (bytes == 1)
- *eax = fwctl_inb();
- else if (bytes == 4)
- *eax = fwctl_inl();
- else
- *eax = 0xffff;
- } else {
- if (bytes == 2)
- fwctl_outw(*eax);
- else if (bytes == 4)
- fwctl_outl(*eax);
- }
-
- return (0);
-}
-INOUT_PORT(fwctl_wreg, FWCTL_OUT, IOPORT_F_INOUT, fwctl_handler);
-INOUT_PORT(fwctl_rreg, FWCTL_IN, IOPORT_F_IN, fwctl_handler);
-
-void
-fwctl_init(void)
-{
-
- ops[OP_GET_LEN] = &fgetlen_info;
- ops[OP_GET] = &fgetval_info;
-
- be_state = IDENT_WAIT;
-}
diff --git a/usr.sbin/bhyve/pci_emul.h b/usr.sbin/bhyve/pci_emul.h
--- a/usr.sbin/bhyve/pci_emul.h
+++ b/usr.sbin/bhyve/pci_emul.h
@@ -41,6 +41,8 @@
#include <assert.h>
#define PCI_BARMAX PCIR_MAX_BAR_0 /* BAR registers in a Type 0 header */
+#define PCI_BARMAX_WITH_ROM (PCI_BARMAX + 1)
+#define PCI_ROM_IDX (PCI_BARMAX + 1)
struct vmctx;
struct pci_devinst;
@@ -88,13 +90,15 @@
PCIBAR_IO,
PCIBAR_MEM32,
PCIBAR_MEM64,
- PCIBAR_MEMHI64
+ PCIBAR_MEMHI64,
+ PCIBAR_ROM,
};
struct pcibar {
enum pcibar_type type; /* io or memory */
uint64_t size;
uint64_t addr;
+ uint8_t lobits;
};
#define PI_NAMESZ 40
@@ -160,7 +164,9 @@
void *pi_arg; /* devemu-private data */
u_char pi_cfgdata[PCI_REGMAX + 1];
- struct pcibar pi_bar[PCI_BARMAX + 1];
+ /* ROM is handled like a BAR */
+ struct pcibar pi_bar[PCI_BARMAX_WITH_ROM + 1];
+ uint64_t pi_romoffset;
};
struct msicap {
@@ -224,6 +230,7 @@
void pci_callback(void);
int pci_emul_alloc_bar(struct pci_devinst *pdi, int idx,
enum pcibar_type type, uint64_t size);
+int pci_emul_alloc_rom(struct pci_devinst *pdi, uint64_t size, uint64_t *addr);
int pci_emul_add_msicap(struct pci_devinst *pi, int msgnum);
int pci_emul_add_pciecap(struct pci_devinst *pi, int pcie_device_type);
void pci_emul_capwrite(struct pci_devinst *pi, int offset, int bytes,
diff --git a/usr.sbin/bhyve/pci_emul.c b/usr.sbin/bhyve/pci_emul.c
--- a/usr.sbin/bhyve/pci_emul.c
+++ b/usr.sbin/bhyve/pci_emul.c
@@ -33,12 +33,15 @@
#include <sys/param.h>
#include <sys/linker_set.h>
+#include <sys/mman.h>
+
#include <vm/vm.h>
#include <vm/vm_param.h>
#include <vm/pmap.h>
#include <ctype.h>
#include <errno.h>
+#include <err.h>
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
@@ -72,6 +75,8 @@
#define MAXSLOTS (PCI_SLOTMAX + 1)
#define MAXFUNCS (PCI_FUNCMAX + 1)
+#define GB (1024 * 1024 * 1024UL)
+
struct funcinfo {
char *fi_name;
char *fi_param;
@@ -101,18 +106,36 @@
SET_DECLARE(pci_devemu_set, struct pci_devemu);
static uint64_t pci_emul_iobase;
+static uint64_t pci_emul_iolim;
+static uint64_t pci_emul_rombase;
+static uint64_t pci_emul_romoffset;
+static uint64_t pci_emul_romlim;
static uint64_t pci_emul_membase32;
+static uint64_t pci_emul_memlim32;
static uint64_t pci_emul_membase64;
static uint64_t pci_emul_memlim64;
+struct pci_bar_allocation {
+ TAILQ_ENTRY(pci_bar_allocation) pci_bar_chain;
+ struct pci_devinst *pdi;
+ int idx;
+ enum pcibar_type type;
+ uint64_t size;
+};
+TAILQ_HEAD(pci_bar_list, pci_bar_allocation) pci_bars = TAILQ_HEAD_INITIALIZER(
+ pci_bars);
+
#define PCI_EMUL_IOBASE 0x2000
#define PCI_EMUL_IOLIMIT 0x10000
+#define PCI_EMUL_ROMSIZE 0x10000000
+
#define PCI_EMUL_ECFG_BASE 0xE0000000 /* 3.5GB */
#define PCI_EMUL_ECFG_SIZE (MAXBUSES * 1024 * 1024) /* 1MB per bus */
SYSRES_MEM(PCI_EMUL_ECFG_BASE, PCI_EMUL_ECFG_SIZE);
#define PCI_EMUL_MEMLIMIT32 PCI_EMUL_ECFG_BASE
+#define PCI_EMUL_MEMSIZE64 (32 * GB)
static struct pci_devemu *pci_emul_finddev(char *name);
static void pci_lintr_route(struct pci_devinst *pi);
@@ -502,6 +525,12 @@
(*pe->pe_baraddr)(pi->pi_vmctx, pi, idx, registration,
pi->pi_bar[idx].addr);
break;
+ case PCIBAR_ROM:
+ error = 0;
+ if (pe->pe_baraddr != NULL)
+ (*pe->pe_baraddr)(pi->pi_vmctx, pi, idx, registration,
+ pi->pi_bar[idx].addr);
+ break;
default:
error = EINVAL;
break;
@@ -523,6 +552,13 @@
modify_bar_registration(pi, idx, 1);
}
+/* Is the ROM enabled for the emulated pci device? */
+static int
+romen(struct pci_devinst *pi)
+{
+ return (pi->pi_bar[PCI_ROM_IDX].lobits & PCIM_BIOS_ENABLE) == PCIM_BIOS_ENABLE;
+}
+
/* Are we decoding i/o port accesses for the emulated pci device? */
static int
porten(struct pci_devinst *pi)
@@ -589,11 +625,11 @@
pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, enum pcibar_type type,
uint64_t size)
{
- int error;
- uint64_t *baseptr, limit, addr, mask, lobits, bar;
- uint16_t cmd, enbit;
-
- assert(idx >= 0 && idx <= PCI_BARMAX);
+ if ((type != PCIBAR_ROM) && (idx < 0 || idx > PCI_BARMAX)) {
+ errx(4, "Illegal BAR idx");
+ } else if ((type == PCIBAR_ROM) && (idx != PCI_ROM_IDX)) {
+ errx(4, "Illegal ROM idx");
+ }
if ((size & (size - 1)) != 0)
size = 1UL << flsl(size); /* round up to a power of 2 */
@@ -602,11 +638,89 @@
if (type == PCIBAR_IO) {
if (size < 4)
size = 4;
+ } else if (type == PCIBAR_ROM) {
+ if (size < ~PCIM_BIOS_ADDR_MASK + 1)
+ size = ~PCIM_BIOS_ADDR_MASK + 1;
} else {
if (size < 16)
size = 16;
}
+ /* allocate new bar */
+ struct pci_bar_allocation *new_bar = malloc(sizeof(struct pci_bar_allocation));
+ memset(new_bar, 0, sizeof(struct pci_bar_allocation));
+ new_bar->pdi = pdi;
+ new_bar->idx = idx;
+ new_bar->type = type;
+ new_bar->size = size;
+
+ /* get bar position */
+ struct pci_bar_allocation *bar = NULL;
+ TAILQ_FOREACH (bar, &pci_bars, pci_bar_chain) {
+ if (bar->size < size) {
+ break;
+ }
+ }
+
+ /* insert bar into queue */
+ if (bar == NULL) {
+ TAILQ_INSERT_TAIL(&pci_bars, new_bar, pci_bar_chain);
+ } else {
+ TAILQ_INSERT_BEFORE(bar, new_bar, pci_bar_chain);
+ }
+
+ return (0);
+}
+
+int
+pci_emul_alloc_rom(struct pci_devinst *pdi, uint64_t size, uint64_t *addr)
+{
+ /* allocate ROM-Space once */
+ if (pci_emul_rombase == 0) {
+ pci_emul_rombase = (uint64_t)vm_create_devmem(pdi->pi_vmctx, VM_PCIROM,
+ "pcirom", PCI_EMUL_ROMSIZE);
+ if ((void *)pci_emul_rombase == MAP_FAILED)
+ return -ENOMEM;
+ pci_emul_romlim = pci_emul_rombase + PCI_EMUL_ROMSIZE;
+ pci_emul_romoffset = 0;
+ }
+
+ /* round up to a power of 2 */
+ uint64_t rom_size = 1UL << flsl(size);
+ /* ROM size should be greater than 2 KB */
+ rom_size = MAX(rom_size, ~PCIM_BIOS_ADDR_MASK + 1);
+
+ /* check if ROM fits into ROM-Space */
+ if (pci_emul_romoffset + rom_size > PCI_EMUL_ROMSIZE)
+ return -E2BIG;
+
+ /* allocate ROM BAR */
+ const int error = pci_emul_alloc_bar(pdi, PCI_ROM_IDX, PCIBAR_ROM, rom_size);
+ if (error)
+ return error;
+
+ /* return address */
+ *addr = pci_emul_rombase + pci_emul_romoffset;
+ /* save offset into ROM Space */
+ pdi->pi_romoffset = pci_emul_romoffset;
+ /* increase offset for next ROM */
+ pci_emul_romoffset += rom_size;
+
+ return (0);
+}
+
+static int
+pci_emul_assign_bar(struct pci_bar_allocation *pci_bar)
+{
+ struct pci_devinst *pdi = pci_bar->pdi;
+ int idx = pci_bar->idx;
+ enum pcibar_type type = pci_bar->type;
+ uint64_t size = pci_bar->size;
+
+ int error;
+ uint64_t *baseptr, limit, addr, mask, lobits, bar;
+ uint16_t cmd, enbit;
+
switch (type) {
case PCIBAR_NONE:
baseptr = NULL;
@@ -614,7 +728,7 @@
break;
case PCIBAR_IO:
baseptr = &pci_emul_iobase;
- limit = PCI_EMUL_IOLIMIT;
+ limit = pci_emul_iolim;
mask = PCIM_BAR_IO_BASE;
lobits = PCIM_BAR_IO_SPACE;
enbit = PCIM_CMD_PORTEN;
@@ -633,21 +747,33 @@
mask = PCIM_BAR_MEM_BASE;
lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 |
PCIM_BAR_MEM_PREFETCH;
- } else {
- baseptr = &pci_emul_membase32;
- limit = PCI_EMUL_MEMLIMIT32;
- mask = PCIM_BAR_MEM_BASE;
- lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64;
+ enbit = PCIM_CMD_MEMEN;
+ break;
}
- enbit = PCIM_CMD_MEMEN;
- break;
+ /*
+ * Use 32 bit BARs for small requests:
+ * Fallthrough into MEM32 case
+ */
+ type = PCIBAR_MEM32;
+ pdi->pi_bar[idx + 1].type = PCIBAR_NONE;
+ /* clear 64-bit flag */
+ pdi->pi_bar[idx].lobits &= ~PCIM_BAR_MEM_64;
+ /* [fallthrough] */
case PCIBAR_MEM32:
baseptr = &pci_emul_membase32;
- limit = PCI_EMUL_MEMLIMIT32;
+ limit = pci_emul_memlim32;
mask = PCIM_BAR_MEM_BASE;
lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32;
enbit = PCIM_CMD_MEMEN;
break;
+ case PCIBAR_ROM:
+ /* do not claim memory for ROM. OVMF will do it for us. */
+ baseptr = NULL;
+ limit = 0;
+ mask = PCIM_BIOS_ADDR_MASK;
+ lobits = 0;
+ enbit = PCIM_CMD_MEMEN;
+ break;
default:
printf("pci_emul_alloc_base: invalid bar type %d\n", type);
assert(0);
@@ -662,6 +788,13 @@
pdi->pi_bar[idx].type = type;
pdi->pi_bar[idx].addr = addr;
pdi->pi_bar[idx].size = size;
+ /* passthru devices are using same lobits as physical device
+ * they set this property
+ */
+ if (pdi->pi_bar[idx].lobits != 0)
+ lobits = pdi->pi_bar[idx].lobits;
+ else
+ pdi->pi_bar[idx].lobits = lobits;
/* Initialize the BAR register in config space */
bar = (addr & mask) | lobits;
@@ -676,7 +809,9 @@
cmd = pci_get_cfgdata16(pdi, PCIR_COMMAND);
if ((cmd & enbit) != enbit)
pci_set_cfgdata16(pdi, PCIR_COMMAND, cmd | enbit);
- register_bar(pdi, idx);
+ if (type != PCIBAR_ROM) {
+ register_bar(pdi, idx);
+ }
return (0);
}
@@ -1098,25 +1233,17 @@
struct slotinfo *si;
struct funcinfo *fi;
size_t lowmem;
- uint64_t cpu_maxphysaddr, pci_emul_memresv64;
- u_int regs[4];
int bus, slot, func, error;
pci_emul_iobase = PCI_EMUL_IOBASE;
+ pci_emul_iolim = PCI_EMUL_IOLIMIT;
+
pci_emul_membase32 = vm_get_lowmem_limit(ctx);
+ pci_emul_memlim32 = PCI_EMUL_MEMLIMIT32;
- do_cpuid(0x80000008, regs);
- cpu_maxphysaddr = 1ULL << (regs[0] & 0xff);
- if (cpu_maxphysaddr > VM_MAXUSER_ADDRESS_LA48)
- cpu_maxphysaddr = VM_MAXUSER_ADDRESS_LA48;
- pci_emul_memresv64 = cpu_maxphysaddr / 4;
- /*
- * Max power of 2 that is less then
- * cpu_maxphysaddr - pci_emul_memresv64.
- */
- pci_emul_membase64 = 1ULL << (flsl(cpu_maxphysaddr -
- pci_emul_memresv64) - 1);
- pci_emul_memlim64 = cpu_maxphysaddr;
+ pci_emul_membase64 = 4 * GB + vm_get_highmem_size(ctx);
+ pci_emul_membase64 = roundup2(pci_emul_membase64, PCI_EMUL_MEMSIZE64);
+ pci_emul_memlim64 = pci_emul_membase64 + PCI_EMUL_MEMSIZE64;
for (bus = 0; bus < MAXBUSES; bus++) {
if ((bi = pci_businfo[bus]) == NULL)
@@ -1129,6 +1256,7 @@
bi->membase32 = pci_emul_membase32;
bi->membase64 = pci_emul_membase64;
+ /* first run: init devices */
for (slot = 0; slot < MAXSLOTS; slot++) {
si = &bi->slotinfo[slot];
for (func = 0; func < MAXFUNCS; func++) {
@@ -1144,6 +1272,18 @@
}
}
+ /* second run: assign BARs */
+ struct pci_bar_allocation *bar;
+ TAILQ_FOREACH (bar, &pci_bars, pci_bar_chain) {
+ pci_emul_assign_bar(bar);
+ }
+ /* free BARs */
+ while (!TAILQ_EMPTY(&pci_bars)) {
+ bar = TAILQ_FIRST(&pci_bars);
+ TAILQ_REMOVE(&pci_bars, bar, pci_bar_chain);
+ free(bar);
+ }
+
/*
* Add some slop to the I/O and memory resources decoded by
* this bus to give a guest some flexibility if it wants to
@@ -1717,7 +1857,7 @@
* If the MMIO or I/O address space decoding has changed then
* register/unregister all BARs that decode that address space.
*/
- for (i = 0; i <= PCI_BARMAX; i++) {
+ for (i = 0; i <= PCI_BARMAX_WITH_ROM; i++) {
switch (pi->pi_bar[i].type) {
case PCIBAR_NONE:
case PCIBAR_MEMHI64:
@@ -1731,6 +1871,11 @@
unregister_bar(pi, i);
}
break;
+ case PCIBAR_ROM:
+ /* skip (un-)register of ROM if it disabled */
+ if (pi->pi_bar[i].lobits == 0)
+ break;
+ /* fallthrough */
case PCIBAR_MEM32:
case PCIBAR_MEM64:
/* MMIO address space decoding changed? */
@@ -1851,16 +1996,21 @@
return;
/*
- * Special handling for write to BAR registers
+ * Special handling for write to BAR and ROM registers
*/
- if (coff >= PCIR_BAR(0) && coff < PCIR_BAR(PCI_BARMAX + 1)) {
+ if ((coff >= PCIR_BAR(0) && coff <= PCIR_BAR(PCI_BARMAX)) ||
+ (coff >= PCIR_BIOS && coff < PCIR_BIOS + 4)) {
/*
* Ignore writes to BAR registers that are not
* 4-byte aligned.
*/
if (bytes != 4 || (coff & 0x3) != 0)
return;
- idx = (coff - PCIR_BAR(0)) / 4;
+ if (coff != PCIR_BIOS) {
+ idx = (coff - PCIR_BAR(0)) / 4;
+ } else {
+ idx = PCI_ROM_IDX;
+ }
mask = ~(pi->pi_bar[idx].size - 1);
switch (pi->pi_bar[idx].type) {
case PCIBAR_NONE:
@@ -1869,7 +2019,7 @@
case PCIBAR_IO:
addr = *eax & mask;
addr &= 0xffff;
- bar = addr | PCIM_BAR_IO_SPACE;
+ bar = addr | pi->pi_bar[idx].lobits;
/*
* Register the new BAR value for interception
*/
@@ -1880,7 +2030,7 @@
break;
case PCIBAR_MEM32:
addr = bar = *eax & mask;
- bar |= PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32;
+ bar |= pi->pi_bar[idx].lobits;
if (addr != pi->pi_bar[idx].addr) {
update_bar_address(pi, addr, idx,
PCIBAR_MEM32);
@@ -1888,8 +2038,7 @@
break;
case PCIBAR_MEM64:
addr = bar = *eax & mask;
- bar |= PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 |
- PCIM_BAR_MEM_PREFETCH;
+ bar |= pi->pi_bar[idx].lobits;
if (addr != (uint32_t)pi->pi_bar[idx].addr) {
update_bar_address(pi, addr, idx,
PCIBAR_MEM64);
@@ -1904,6 +2053,20 @@
PCIBAR_MEMHI64);
}
break;
+ case PCIBAR_ROM:
+ addr = bar = *eax & mask;
+ if (memen(pi) && romen(pi)) {
+ unregister_bar(pi, idx);
+ }
+ pi->pi_bar[idx].addr = addr;
+ pi->pi_bar[idx].lobits = *eax &
+ PCIM_BIOS_ENABLE;
+ /* romen could have changed it value */
+ if (memen(pi) && romen(pi)) {
+ register_bar(pi, idx);
+ }
+ bar |= pi->pi_bar[idx].lobits;
+ break;
default:
assert(0);
}
@@ -1941,7 +2104,7 @@
} else {
x = *eax;
cfgenable = (x & CONF1_ENABLE) == CONF1_ENABLE;
- cfgoff = x & PCI_REGMAX;
+ cfgoff = (x & PCI_REGMAX) & ~0x03;
cfgfunc = (x >> 8) & PCI_FUNCMAX;
cfgslot = (x >> 11) & PCI_SLOTMAX;
cfgbus = (x >> 16) & PCI_BUSMAX;
diff --git a/usr.sbin/bhyve/pci_gvt-d.c b/usr.sbin/bhyve/pci_gvt-d.c
new file mode 100644
--- /dev/null
+++ b/usr.sbin/bhyve/pci_gvt-d.c
@@ -0,0 +1,288 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2020 Beckhoff Automation GmbH & Co. KG
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR OR CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/mman.h>
+
+#include <machine/vmm.h>
+
+#include <dev/pci/pcireg.h>
+
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sysexits.h>
+#include <unistd.h>
+
+#include "e820.h"
+#include "inout.h"
+#include "pci_passthru.h"
+
+#define MB (1024 * 1024UL)
+#define GB (1024 * MB)
+
+#ifndef _PATH_MEM
+#define _PATH_MEM "/dev/mem"
+#endif
+
+/*
+ * PCI definitions
+ */
+#define PCIM_BDSM_GSM_ALIGNMENT \
+ 0x00100000 /* Graphics Stolen Memory is 1 MB aligned */
+
+/* GVT-d definitions */
+#define GVT_D_MAP_OPREGION 0
+#define GVT_D_MAP_GSM 1
+
+static int
+gvt_d_aslswrite(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int coff,
+ int bytes, uint32_t val)
+{
+ struct passthru_softc *sc;
+
+ sc = pi->pi_arg;
+
+ struct passthru_mmio_mapping *opregion =
+ &sc->psc_mmio_map[GVT_D_MAP_OPREGION];
+
+ /* write new value to cfg space */
+ if (bytes == 1) {
+ pci_set_cfgdata8(pi, coff, val);
+ } else if (bytes == 2) {
+ pci_set_cfgdata16(pi, coff, val);
+ } else {
+ pci_set_cfgdata32(pi, coff, val);
+ }
+
+ /* get new address of opregion */
+ opregion->gpa = pci_get_cfgdata32(pi, PCIR_ASLS_CTL);
+
+ /* copy opregion into guest mem */
+ opregion->gva = vm_map_gpa(ctx, opregion->gpa, opregion->len);
+ if (opregion->gva == 0) {
+ warnx("%s: Unable to map opregion (0x%016lx)", __func__,
+ opregion->gpa);
+ /* return 0 to avoid emulation of ASLS register */
+ return (0);
+ }
+ memcpy(opregion->gva, opregion->hva, opregion->len);
+
+ return (0);
+}
+
+static vm_paddr_t
+gvt_d_alloc_mmio_memory(vm_paddr_t host_address, vm_paddr_t length, vm_paddr_t alignment,
+ enum e820_memory_type type)
+{
+ /* try to use host address */
+ vm_paddr_t address = e820_alloc(host_address, length, E820_ALIGNMENT_NONE,
+ type, E820_ALLOCATE_SPECIFIC);
+ if (address != 0) {
+ return address;
+ }
+
+ /* try to use highest address below 4 GB */
+ return e820_alloc(4 * GB, length, alignment, type,
+ E820_ALLOCATE_HIGHEST);
+}
+
+static int
+gvt_d_setup_gsm(struct vmctx *ctx, struct pci_devinst *pi)
+{
+ struct passthru_softc *sc = pi->pi_arg;
+
+ struct passthru_mmio_mapping *gsm = &sc->psc_mmio_map[GVT_D_MAP_GSM];
+
+ const int error = vm_get_memory_region_info(ctx, &gsm->hpa, &gsm->len,
+ MEMORY_REGION_INTEL_GSM);
+ if (error) {
+ warnx(
+ "%s: Unable to get Graphics Stolen Memory base and length",
+ __func__);
+ return (error);
+ }
+ gsm->hva = NULL; /* unused */
+ gsm->gva = NULL; /* unused */
+ gsm->gpa = gvt_d_alloc_mmio_memory(gsm->hpa, gsm->len,
+ PCIM_BDSM_GSM_ALIGNMENT, E820_TYPE_RESERVED);
+ if (gsm->gpa == 0) {
+ warnx(
+ "%s: Unable to add Graphics Stolen Memory to E820 table (hpa 0x%lx len 0x%lx)",
+ __func__, gsm->hpa, gsm->len);
+ e820_dump_table();
+ return (-1);
+ }
+ if (gsm->gpa != gsm->hpa) {
+ /*
+ * ACRN source code implies that graphics driver for newer Intel
+ * platforms like Tiger Lake will read the Graphics Stolen
+ * Memory address from an MMIO register. We have three options
+ * to solve this issue:
+ * 1. Patch the value in the MMIO register
+ * This could have unintended side effects. Without
+ * any documentation how this register is used by
+ * the GPU, don't do it.
+ * 2. Trap the MMIO register
+ * It's not possible to trap a single MMIO
+ * register. We need to trap a whole page. Trapping
+ * a bunch of MMIO register could degrade the
+ * performance noticeably.
+ * 3. Use an 1:1 host to guest mapping
+ * Maybe not always possible.
+ * As far as we know, no supported platform requires a 1:1
+ * mapping. For that reason, just log a warning.
+ */
+ warnx(
+ "Warning: Unable to reuse host address of Graphics Stolen Memory. GPU passthrough might not work properly.");
+ }
+
+ const uint64_t bdsm = read_config(&sc->psc_sel, PCIR_BDSM, 4);
+ pci_set_cfgdata32(pi, PCIR_BDSM,
+ gsm->gpa | (bdsm & (PCIM_BDSM_GSM_ALIGNMENT - 1)));
+
+ return (0);
+}
+
+static int
+gvt_d_setup_opregion(struct vmctx *ctx, struct pci_devinst *pi, const int memfd)
+{
+ struct passthru_softc *sc = pi->pi_arg;
+
+ struct passthru_mmio_mapping *opregion =
+ &sc->psc_mmio_map[GVT_D_MAP_OPREGION];
+
+ const int error = vm_get_memory_region_info(ctx, &opregion->hpa,
+ &opregion->len, MEMORY_REGION_INTEL_OPREGION);
+ if (error) {
+ warnx(
+ "%s: Unable to get OpRegion base and length",
+ __func__);
+ return (error);
+ }
+ opregion->hva = mmap(NULL, opregion->len, PROT_READ, MAP_SHARED, memfd,
+ opregion->hpa);
+ if (opregion->hva == MAP_FAILED) {
+ warnx("%s: Unable to map host OpRegion", __func__);
+ return (-1);
+ }
+ opregion->gpa = gvt_d_alloc_mmio_memory(opregion->hpa, opregion->len,
+ E820_ALIGNMENT_NONE, E820_TYPE_NVS);
+ if (opregion->gpa == 0) {
+ warnx(
+ "%s: Unable to add OpRegion to E820 table (hpa 0x%lx len 0x%lx)",
+ __func__, opregion->hpa, opregion->len);
+ e820_dump_table();
+ return (-1);
+ }
+ opregion->gva = vm_map_gpa(ctx, opregion->gpa, opregion->len);
+ if (opregion->gva == NULL) {
+ warnx("%s: Unable to map guest OpRegion", __func__);
+ return (-1);
+ }
+ if (opregion->gpa != opregion->hpa) {
+ /*
+ * A 1:1 host to guest mapping is not required but this could
+ * change in the future.
+ */
+ warnx(
+ "Warning: Unable to reuse host address of OpRegion. GPU passthrough might not work properly.");
+ }
+
+ memcpy(opregion->gva, opregion->hva, opregion->len);
+
+ pci_set_cfgdata32(pi, PCIR_ASLS_CTL, opregion->gpa);
+
+ return (0);
+}
+
+int
+gvt_d_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
+{
+ int error;
+ struct passthru_softc *sc;
+
+ sc = pi->pi_arg;
+
+ /* get memory descriptor */
+ const int memfd = open(_PATH_MEM, O_RDWR, 0);
+ if (memfd < 0) {
+ warn("%s: Failed to open %s", __func__, _PATH_MEM);
+ return (-1);
+ }
+
+ if ((error = gvt_d_setup_gsm(ctx, pi)) != 0) {
+ warnx("%s: Unable to setup Graphics Stolen Memory", __func__);
+ goto done;
+ }
+
+ if ((error = gvt_d_setup_opregion(ctx, pi, memfd)) != 0) {
+ warnx("%s: Unable to setup OpRegion", __func__);
+ goto done;
+ }
+
+ /* protect Graphics Stolen Memory register */
+ if ((error = set_pcir_handler(sc, PCIR_BDSM, 4,
+ passthru_cfgread_emulate, passthru_cfgwrite_emulate)) != 0) {
+ warnx("%s: Unable to protect opregion", __func__);
+ goto done;
+ }
+ /* protect opregion register */
+ if ((error = set_pcir_handler(sc, PCIR_ASLS_CTL, 4,
+ passthru_cfgread_emulate, gvt_d_aslswrite)) != 0) {
+ warnx("%s: Unable to protect opregion", __func__);
+ goto done;
+ }
+
+done:
+ return (error);
+}
+
+void
+gvt_d_deinit(struct vmctx *ctx, struct pci_devinst *pi)
+{
+ struct passthru_softc *sc;
+
+ sc = pi->pi_arg;
+
+ struct passthru_mmio_mapping *opregion =
+ &sc->psc_mmio_map[GVT_D_MAP_OPREGION];
+
+ /* HVA is only set, if it's initialized */
+ if (opregion->hva)
+ munmap((void *)opregion->hva, opregion->len);
+}
diff --git a/usr.sbin/bhyve/pci_lpc.c b/usr.sbin/bhyve/pci_lpc.c
--- a/usr.sbin/bhyve/pci_lpc.c
+++ b/usr.sbin/bhyve/pci_lpc.c
@@ -33,9 +33,13 @@
__FBSDID("$FreeBSD$");
#include <sys/types.h>
+#include <sys/pciio.h>
#include <machine/vmm.h>
#include <machine/vmm_snapshot.h>
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -83,6 +87,29 @@
static bool pctestdev_present;
+#ifndef _PATH_DEVPCI
+#define _PATH_DEVPCI "/dev/pci"
+#endif
+
+static int pcifd = -1;
+
+static uint32_t
+read_config(struct pcisel *sel, long reg, int width)
+{
+ struct pci_io pi;
+ pi.pi_sel.pc_domain = sel->pc_domain;
+ pi.pi_sel.pc_bus = sel->pc_bus;
+ pi.pi_sel.pc_dev = sel->pc_dev;
+ pi.pi_sel.pc_func = sel->pc_func;
+ pi.pi_reg = reg;
+ pi.pi_width = width;
+
+ if (ioctl(pcifd, PCIOCREAD, &pi) < 0)
+ return (0);
+
+ return (pi.pi_data);
+}
+
/*
* LPC device configuration is in the following form:
* <lpc_device_name>[,<options>]
@@ -446,6 +473,40 @@
pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_BRIDGE);
pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_BRIDGE_ISA);
+ /* open host device */
+ if (pcifd < 0) {
+ pcifd = open(_PATH_DEVPCI, O_RDWR, 0);
+ if (pcifd < 0) {
+ warn("failed to open %s", _PATH_DEVPCI);
+ return (-1);
+ }
+ }
+
+ /* on Intel systems lpc is always connected to 0:1f.0 */
+ struct pcisel sel;
+ sel.pc_domain = 0;
+ sel.pc_bus = 0;
+ sel.pc_dev = 0x1f;
+ sel.pc_func = 0;
+
+ if (read_config(&sel, PCIR_VENDOR, 2) == PCI_VENDOR_INTEL) {
+ /*
+ * The VID, DID, REVID, SUBVID and SUBDID of igd-lpc need to be
+ * aligned with the physical ones. Without these physical
+ * values, GVT-d GOP driver couldn't work.
+ */
+ pci_set_cfgdata16(
+ pi, PCIR_DEVICE, read_config(&sel, PCIR_DEVICE, 2));
+ pci_set_cfgdata16(
+ pi, PCIR_VENDOR, read_config(&sel, PCIR_VENDOR, 2));
+ pci_set_cfgdata8(
+ pi, PCIR_REVID, read_config(&sel, PCIR_REVID, 1));
+ pci_set_cfgdata16(
+ pi, PCIR_SUBVEND_0, read_config(&sel, PCIR_SUBVEND_0, 2));
+ pci_set_cfgdata16(
+ pi, PCIR_SUBDEV_0, read_config(&sel, PCIR_SUBDEV_0, 2));
+ }
+
lpc_bridge = pi;
return (0);
diff --git a/usr.sbin/bhyve/pci_passthru.h b/usr.sbin/bhyve/pci_passthru.h
new file mode 100644
--- /dev/null
+++ b/usr.sbin/bhyve/pci_passthru.h
@@ -0,0 +1,84 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2020 Beckhoff Automation GmbH & Co. KG
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR OR CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#pragma once
+
+#include <sys/pciio.h>
+
+#include <vmmapi.h>
+
+#include "pci_emul.h"
+
+struct passthru_mmio_mapping {
+ vm_paddr_t gpa; /* guest physical address */
+ void *gva; /* guest virtual address */
+ vm_paddr_t hpa; /* host physical address */
+ void *hva; /* guest virtual address */
+ vm_paddr_t len;
+};
+
+typedef int (*cfgread_handler)(struct vmctx *ctx, int vcpu,
+ struct pci_devinst *pi, int coff, int bytes, uint32_t *rv);
+typedef int (*cfgwrite_handler)(struct vmctx *ctx, int vcpu,
+ struct pci_devinst *pi, int coff, int bytes, uint32_t val);
+
+struct passthru_softc {
+ struct pci_devinst *psc_pi;
+ /* ROM is handled like a BAR */
+ struct pcibar psc_bar[PCI_BARMAX_WITH_ROM + 1];
+ struct {
+ int capoff;
+ int msgctrl;
+ int emulated;
+ } psc_msi;
+ struct {
+ int capoff;
+ } psc_msix;
+ struct pcisel psc_sel;
+
+ struct passthru_mmio_mapping psc_mmio_map[2];
+ cfgread_handler psc_pcir_rhandler[PCI_REGMAX + 1];
+ cfgwrite_handler psc_pcir_whandler[PCI_REGMAX + 1];
+};
+
+uint32_t read_config(const struct pcisel *sel, long reg, int width);
+void write_config(const struct pcisel *sel, long reg, int width, uint32_t data);
+int passthru_cfgread_default(struct vmctx *ctx, int vcpu,
+ struct pci_devinst *pi, int coff, int bytes, uint32_t *rv);
+int passthru_cfgread_emulate(struct vmctx *ctx, int vcpu,
+ struct pci_devinst *pi, int coff, int bytes, uint32_t *rv);
+int passthru_cfgwrite_default(struct vmctx *ctx, int vcpu,
+ struct pci_devinst *pi, int coff, int bytes, uint32_t val);
+int passthru_cfgwrite_emulate(struct vmctx *ctx, int vcpu,
+ struct pci_devinst *pi, int coff, int bytes, uint32_t val);
+int set_pcir_handler(struct passthru_softc *sc, uint32_t reg, uint32_t len,
+ cfgread_handler rhandler, cfgwrite_handler whandler);
+int gvt_d_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts);
+void gvt_d_deinit(struct vmctx *ctx, struct pci_devinst *pi);
diff --git a/usr.sbin/bhyve/pci_passthru.c b/usr.sbin/bhyve/pci_passthru.c
--- a/usr.sbin/bhyve/pci_passthru.c
+++ b/usr.sbin/bhyve/pci_passthru.c
@@ -48,19 +48,19 @@
#ifndef WITHOUT_CAPSICUM
#include <capsicum_helpers.h>
#endif
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
+#include <machine/vmm.h>
+
#include <err.h>
#include <errno.h>
#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
#include <sysexits.h>
#include <unistd.h>
-#include <machine/vmm.h>
-#include <vmmapi.h>
-#include "pci_emul.h"
#include "mem.h"
+#include "pci_passthru.h"
#ifndef _PATH_DEVPCI
#define _PATH_DEVPCI "/dev/pci"
@@ -79,24 +79,12 @@
#define MSIX_TABLE_COUNT(ctrl) (((ctrl) & PCIM_MSIXCTRL_TABLE_SIZE) + 1)
#define MSIX_CAPLEN 12
+#define PCI_CAP_START_OFFSET 0x40
+
static int pcifd = -1;
static int iofd = -1;
static int memfd = -1;
-struct passthru_softc {
- struct pci_devinst *psc_pi;
- struct pcibar psc_bar[PCI_BARMAX + 1];
- struct {
- int capoff;
- int msgctrl;
- int emulated;
- } psc_msi;
- struct {
- int capoff;
- } psc_msix;
- struct pcisel psc_sel;
-};
-
static int
msi_caplen(int msgctrl)
{
@@ -119,7 +107,7 @@
return (len);
}
-static uint32_t
+uint32_t
read_config(const struct pcisel *sel, long reg, int width)
{
struct pci_io pi;
@@ -135,7 +123,7 @@
return (pi.pi_data);
}
-static void
+void
write_config(const struct pcisel *sel, long reg, int width, uint32_t data)
{
struct pci_io pi;
@@ -556,12 +544,23 @@
sc->psc_bar[i].type = bartype;
sc->psc_bar[i].size = size;
sc->psc_bar[i].addr = base;
+ sc->psc_bar[i].lobits = 0;
/* Allocate the BAR in the guest I/O or MMIO space */
error = pci_emul_alloc_bar(pi, i, bartype, size);
if (error)
return (-1);
+ /* Use same lobits as physical bar */
+ uint8_t lobits = read_config(&sc->psc_sel, PCIR_BAR(i), 0x01);
+ if (bartype == PCIBAR_MEM32 || bartype == PCIBAR_MEM64) {
+ lobits &= ~PCIM_BAR_MEM_BASE;
+ } else {
+ lobits &= ~PCIM_BAR_IO_BASE;
+ }
+ sc->psc_bar[i].lobits = lobits;
+ pi->pi_bar[i].lobits = lobits;
+
/* The MSI-X table needs special handling */
if (i == pci_msix_table_bar(pi)) {
error = init_msix_table(ctx, sc, base);
@@ -595,6 +594,17 @@
sc->psc_sel.pc_dev = slot;
sc->psc_sel.pc_func = func;
+ /* copy physical PCI header to virtual cfgspace */
+ for (uint32_t i = 0; i < PCI_CAP_START_OFFSET; ++i) {
+ /*
+ * INTLINE and INTPIN shouldn't be aligned with it's physical
+ * value and they are already set by pci_emul_init
+ */
+ if (i == PCIR_INTLINE || i == PCIR_INTPIN)
+ continue;
+ pci_set_cfgdata8(pi, i, read_config(&sc->psc_sel, i, 1));
+ }
+
if (cfginitmsi(sc) != 0) {
warnx("failed to initialize MSI for PCI %d/%d/%d",
bus, slot, func);
@@ -607,14 +617,154 @@
goto done;
}
- pci_set_cfgdata16(pi, PCIR_COMMAND, read_config(&sc->psc_sel,
- PCIR_COMMAND, 2));
+ write_config(
+ &sc->psc_sel, PCIR_COMMAND, 2, pci_get_cfgdata16(pi, PCIR_COMMAND));
error = 0; /* success */
done:
return (error);
}
+int
+set_pcir_handler(struct passthru_softc *sc, uint32_t reg, uint32_t len, cfgread_handler rhandler, cfgwrite_handler whandler)
+{
+ if (reg > PCI_REGMAX || reg + len > PCI_REGMAX + 1)
+ return (-1);
+
+ for (uint32_t i = reg; i < reg + len; ++i) {
+ sc->psc_pcir_rhandler[i] = rhandler;
+ sc->psc_pcir_whandler[i] = whandler;
+ }
+
+ return 0;
+}
+
+static int
+passthru_init_quirks(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
+{
+ struct passthru_softc *sc = pi->pi_arg;
+
+ uint16_t vendor = read_config(&sc->psc_sel, PCIR_VENDOR, 0x02);
+ uint8_t class = read_config(&sc->psc_sel, PCIR_CLASS, 0x01);
+
+ /* currently only display devices have quirks */
+ if (class != PCIC_DISPLAY)
+ return (0);
+
+ if (vendor == PCI_VENDOR_INTEL)
+ return gvt_d_init(ctx, pi, opts);
+
+ return (0);
+}
+
+static void
+passthru_deinit_quirks(struct vmctx *ctx, struct pci_devinst *pi)
+{
+ struct passthru_softc *sc = pi->pi_arg;
+
+ if (sc == NULL)
+ return;
+
+ uint16_t vendor = read_config(&sc->psc_sel, PCIR_VENDOR, 0x02);
+ uint8_t class = read_config(&sc->psc_sel, PCIR_CLASS, 0x01);
+
+ /* currently only display devices have quirks */
+ if (class != PCIC_DISPLAY)
+ return;
+
+ if (vendor == PCI_VENDOR_INTEL)
+ return gvt_d_deinit(ctx, pi);
+
+ return;
+}
+
+static void
+passthru_usage(char *opt)
+{
+ warnx("Invalid passthru option \"%s\"", opt);
+ warnx("passthru,<bus>/<dev>/<func>,{rom=rom_file}");
+}
+
+static int
+passthru_parse_opts(struct passthru_softc *sc, char *opts)
+{
+ int error = 0;
+ char *uopts = strdup(opts);
+ char *xopt = strtok(uopts, ",");
+ for (xopt = strtok(NULL, ","); xopt != NULL; xopt = strtok(NULL, ",")) {
+ char *config = strchr(xopt, '=');
+ if (config == NULL) {
+ error = -1;
+ break;
+ }
+ *config = '\0';
+ ++config;
+ if (strcmp(xopt, "rom") == 0) {
+ const int fd = open(config, O_RDONLY);
+ if (fd < 0) {
+ warnx("Can't open romfile \"%s\"", config);
+ error = -1;
+ break;
+ }
+ /* determine file size */
+ uint64_t rom_size = lseek(fd, 0, SEEK_END);
+ lseek(fd, 0, SEEK_SET);
+ /* read bios */
+ void *rom_addr = malloc(rom_size);
+ if (rom_addr == NULL) {
+ warnx("Can't malloc rom \"%s\" (size: 0x%8lx)",
+ config, rom_size);
+ error = -ENOMEM;
+ close(fd);
+ break;
+ }
+ rom_size = read(fd, rom_addr, rom_size);
+ close(fd);
+
+ /* save physical values of ROM */
+ sc->psc_bar[PCI_ROM_IDX].type = PCIBAR_ROM;
+ sc->psc_bar[PCI_ROM_IDX].addr = (uint64_t)rom_addr;
+ sc->psc_bar[PCI_ROM_IDX].size = rom_size;
+
+ continue;
+ }
+ /* option wasn't processed */
+ passthru_usage(xopt);
+ error = -1;
+ break;
+ }
+
+ return (error);
+}
+
+static int
+passthru_init_rom(struct vmctx *ctx, struct passthru_softc *sc)
+{
+ /* check if this device has a rom */
+ if (sc->psc_bar[PCI_ROM_IDX].size == 0)
+ return (0);
+
+ /* allocate ROM */
+ uint64_t rom_addr;
+ int error = pci_emul_alloc_rom(sc->psc_pi,
+ sc->psc_bar[PCI_ROM_IDX].size, &rom_addr);
+ if (error) {
+ warnx("Failed to alloc ROM");
+ goto done;
+ }
+
+ /* copy ROM to guest */
+ memcpy((void *)rom_addr, (void *)sc->psc_bar[PCI_ROM_IDX].addr,
+ sc->psc_bar[PCI_ROM_IDX].size);
+ /* free ROM */
+ free((void *)sc->psc_bar[PCI_ROM_IDX].addr);
+ /* save new address of ROM */
+ sc->psc_bar[PCI_ROM_IDX].addr = rom_addr;
+
+done:
+ return error;
+}
+
static int
passthru_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
{
@@ -701,10 +851,47 @@
pi->pi_arg = sc;
sc->psc_pi = pi;
+ /* parse opts */
+ if ((error = passthru_parse_opts(sc, opts)) != 0) {
+ warnx("invalid passthru options");
+ goto done;
+ }
+
/* initialize config space */
- error = cfginit(ctx, pi, bus, slot, func);
+ if ((error = cfginit(ctx, pi, bus, slot, func)) != 0)
+ goto done;
+
+ /* set default handler for all PCI registers */
+ if ((error = set_pcir_handler(sc, 0, PCI_REGMAX + 1,
+ passthru_cfgread_default, passthru_cfgwrite_default)) != 0)
+ goto done;
+ /* protect PCI header */
+ if ((error = set_pcir_handler(sc, 0, PCI_CAP_START_OFFSET,
+ passthru_cfgread_emulate, passthru_cfgwrite_emulate)) != 0)
+ goto done;
+ /* allow access to command and status register */
+ if ((error = set_pcir_handler(sc, PCIR_COMMAND, 0x04,
+ passthru_cfgread_default, passthru_cfgwrite_default)) != 0)
+ goto done;
+
+ /*
+ * Keep following order!!
+ * Before init_quirks:
+ * set protection for PCI register
+ * After init_quirks:
+ * init ROM
+ */
+ if ((error = passthru_init_quirks(ctx, pi, opts)) != 0)
+ goto done;
+
+ /* initialize ROM */
+ if ((error = passthru_init_rom(ctx, sc)) != 0)
+ goto done;
+
+ error = 0; /* success */
done:
if (error) {
+ passthru_deinit_quirks(ctx, pi);
free(sc);
vm_unassign_pptdev(ctx, bus, slot, func);
}
@@ -747,29 +934,29 @@
}
static int
-passthru_cfgread(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
- int coff, int bytes, uint32_t *rv)
+passthru_cfgread(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int coff,
+ int bytes, uint32_t *rv)
{
struct passthru_softc *sc;
sc = pi->pi_arg;
- /*
- * PCI BARs and MSI capability is emulated.
- */
- if (bar_access(coff) || msicap_access(sc, coff))
- return (-1);
+ return sc->psc_pcir_rhandler[coff](ctx, vcpu, pi, coff, bytes, rv);
+}
+
+int
+passthru_cfgread_default(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
+ int coff, int bytes, uint32_t *rv)
+{
+ struct passthru_softc *sc;
+
+ sc = pi->pi_arg;
-#ifdef LEGACY_SUPPORT
/*
- * Emulate PCIR_CAP_PTR if this device does not support MSI capability
- * natively.
+ * MSI capability is emulated.
*/
- if (sc->psc_msi.emulated) {
- if (coff >= PCIR_CAP_PTR && coff < PCIR_CAP_PTR + 4)
- return (-1);
- }
-#endif
+ if (msicap_access(sc, coff) || msixcap_access(sc, coff))
+ return (-1);
/*
* Emulate the command register. If a single read reads both the
@@ -790,9 +977,28 @@
return (0);
}
+int
+passthru_cfgread_emulate(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
+ int coff, int bytes, uint32_t *rv)
+{
+ return (-1);
+}
+
static int
-passthru_cfgwrite(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
- int coff, int bytes, uint32_t val)
+passthru_cfgwrite(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int coff,
+ int bytes, uint32_t val)
+{
+
+ struct passthru_softc *sc;
+
+ sc = pi->pi_arg;
+
+ return sc->psc_pcir_whandler[coff](ctx, vcpu, pi, coff, bytes, val);
+}
+
+int
+passthru_cfgwrite_default(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
+ int coff, int bytes, uint32_t val)
{
int error, msix_table_entries, i;
struct passthru_softc *sc;
@@ -800,12 +1006,6 @@
sc = pi->pi_arg;
- /*
- * PCI BARs are emulated
- */
- if (bar_access(coff))
- return (-1);
-
/*
* MSI capability is emulated
*/
@@ -871,6 +1071,13 @@
return (0);
}
+int
+passthru_cfgwrite_emulate(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
+ int coff, int bytes, uint32_t val)
+{
+ return (-1);
+}
+
static void
passthru_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
uint64_t offset, int size, uint64_t value)
@@ -995,17 +1202,39 @@
}
}
+static void
+passthru_addr_rom(struct pci_devinst *pi, int idx, int enabled)
+{
+ if (!enabled)
+ vm_munmap_memseg(pi->pi_vmctx, pi->pi_bar[idx].addr,
+ pi->pi_bar[idx].size);
+ else
+ vm_mmap_memseg(pi->pi_vmctx, pi->pi_bar[idx].addr, VM_PCIROM,
+ pi->pi_romoffset, pi->pi_bar[idx].size,
+ PROT_READ | PROT_EXEC);
+}
+
static void
passthru_addr(struct vmctx *ctx, struct pci_devinst *pi, int baridx,
int enabled, uint64_t address)
{
-
- if (pi->pi_bar[baridx].type == PCIBAR_IO)
- return;
- if (baridx == pci_msix_table_bar(pi))
- passthru_msix_addr(ctx, pi, baridx, enabled, address);
- else
- passthru_mmio_addr(ctx, pi, baridx, enabled, address);
+ switch (pi->pi_bar[baridx].type) {
+ case PCIBAR_IO:
+ /* IO BARs are emulated */
+ break;
+ case PCIBAR_ROM:
+ passthru_addr_rom(pi, baridx, enabled);
+ break;
+ case PCIBAR_MEM32:
+ case PCIBAR_MEM64:
+ if (baridx == pci_msix_table_bar(pi))
+ passthru_msix_addr(ctx, pi, baridx, enabled, address);
+ else
+ passthru_mmio_addr(ctx, pi, baridx, enabled, address);
+ break;
+ default:
+ errx(4, "%s: invalid BAR type %d", __func__, pi->pi_bar[baridx].type);
+ }
}
struct pci_devemu passthru = {
diff --git a/usr.sbin/bhyve/fwctl.h b/usr.sbin/bhyve/qemu_fwcfg.h
rename from usr.sbin/bhyve/fwctl.h
rename to usr.sbin/bhyve/qemu_fwcfg.h
--- a/usr.sbin/bhyve/fwctl.h
+++ b/usr.sbin/bhyve/qemu_fwcfg.h
@@ -1,7 +1,7 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
- * Copyright (c) 2015 Peter Grehan <grehan@freebsd.org>
+ * Copyright (c) 2021 Beckhoff Automation GmbH & Co. KG
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -13,10 +13,10 @@
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR OR CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
@@ -28,29 +28,19 @@
* $FreeBSD$
*/
-#ifndef _FWCTL_H_
-#define _FWCTL_H_
+#pragma once
-#include <sys/linker_set.h>
+#include <vmmapi.h>
-/*
- * Linker set api for export of information to guest firmware via
- * a sysctl-like OID interface
- */
-struct ctl {
- const char *c_oid;
- const void *c_data;
- const int c_len;
-};
+#define QEMU_FWCFG_MAX_ARCHS 0x2
+#define QEMU_FWCFG_MAX_ENTRIES 0x3FFF
+#define QEMU_FWCFG_MAX_NAME 56
-#define CTL_NODE(oid, data, len) \
- static struct ctl __CONCAT(__ctl, __LINE__) = { \
- oid, \
- (data), \
- (len), \
- }; \
- DATA_SET(ctl_set, __CONCAT(__ctl, __LINE__))
-
-void fwctl_init(void);
+struct qemu_fwcfg_item {
+ uint32_t size;
+ uint8_t *data;
+};
-#endif /* _FWCTL_H_ */
+int qemu_fwcfg_add_file(uint8_t name[QEMU_FWCFG_MAX_NAME], uint32_t size,
+ void *data);
+int qemu_fwcfg_init(struct vmctx *ctx);
diff --git a/usr.sbin/bhyve/qemu_fwcfg.c b/usr.sbin/bhyve/qemu_fwcfg.c
new file mode 100644
--- /dev/null
+++ b/usr.sbin/bhyve/qemu_fwcfg.c
@@ -0,0 +1,433 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2021 Beckhoff Automation GmbH & Co. KG
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR OR CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/endian.h>
+
+#include <machine/vmm.h>
+
+#include <err.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "acpi_device.h"
+#include "inout.h"
+#include "qemu_fwcfg.h"
+
+#define QEMU_FWCFG_ACPI_DEVICE_NAME "FWCF"
+#define QEMU_FWCFG_ACPI_HARDWARE_ID "QEMU0002"
+
+#define QEMU_FWCFG_SELECTOR_PORT_NUMBER 0x510
+#define QEMU_FWCFG_SELECTOR_PORT_SIZE 1
+#define QEMU_FWCFG_SELECTOR_PORT_FLAGS IOPORT_F_INOUT
+#define QEMU_FWCFG_DATA_PORT_NUMBER 0x511
+#define QEMU_FWCFG_DATA_PORT_SIZE 1
+#define QEMU_FWCFG_DATA_PORT_FLAGS \
+ IOPORT_F_INOUT /* QEMU v2.4+ ignores writes */
+
+#define QEMU_FWCFG_ARCHITECTURE_MASK 0x0001
+#define QEMU_FWCFG_INDEX_MASK 0x3FFF
+
+#define QEMU_FWCFG_SELECT_READ 0
+#define QEMU_FWCFG_SELECT_WRITE 1
+
+#define QEMU_FWCFG_ARCHITECTURE_GENERIC 0
+#define QEMU_FWCFG_ARCHITECTURE_SPECIFIC 1
+
+#define QEMU_FWCFG_INDEX_SIGNATURE 0x00
+#define QEMU_FWCFG_INDEX_ID 0x01
+#define QEMU_FWCFG_INDEX_FILE_DIR 0x19
+
+#define QEMU_FWCFG_FIRST_FILE_INDEX 0x20
+
+#define QEMU_FWCFG_MIN_FILES 10
+
+#pragma pack(1)
+
+union qemu_fwcfg_selector {
+ struct {
+ uint16_t index : 14;
+ uint16_t writeable : 1;
+ /*
+ * 0 = generic | for all architectures
+ * 1 = specific | only for current architecture
+ */
+ uint16_t architecture : 1;
+ };
+ uint16_t bits;
+};
+
+struct qemu_fwcfg_signature {
+ uint8_t signature[4];
+};
+
+struct qemu_fwcfg_id {
+ uint32_t interface : 1; /* always set */
+ uint32_t DMA : 1;
+ uint32_t reserved : 30;
+};
+
+struct qemu_fwcfg_file {
+ uint32_t be_size;
+ uint16_t be_selector;
+ uint16_t reserved;
+ uint8_t name[QEMU_FWCFG_MAX_NAME];
+};
+
+struct qemu_fwcfg_directory {
+ uint32_t be_count;
+ struct qemu_fwcfg_file files[0];
+};
+
+struct qemu_fwcfg_softc {
+ struct acpi_device *acpi_dev;
+
+ uint32_t data_offset;
+ union qemu_fwcfg_selector selector;
+ struct qemu_fwcfg_item items[QEMU_FWCFG_MAX_ARCHS]
+ [QEMU_FWCFG_MAX_ENTRIES];
+ struct qemu_fwcfg_directory *directory;
+};
+
+#pragma pack()
+
+static struct qemu_fwcfg_softc sc;
+
+static int
+qemu_fwcfg_selector_port_handler(struct vmctx *ctx, int vcpu, int in, int port,
+ int bytes, uint32_t *eax, void *arg)
+{
+ if (in) {
+ *eax = *(uint16_t *)&sc.selector;
+ return (0);
+ }
+
+ sc.data_offset = 0;
+ sc.selector.bits = *eax;
+
+ return (0);
+}
+
+static int
+qemu_fwcfg_data_port_handler(struct vmctx *ctx, int vcpu, int in, int port,
+ int bytes, uint32_t *eax, void *arg)
+{
+ if (!in) {
+ warnx("%s: Writes to qemu fwcfg data port aren't allowed",
+ __func__);
+ return (-1);
+ }
+
+ /* get fwcfg item */
+ struct qemu_fwcfg_item *item =
+ &sc.items[sc.selector.architecture][sc.selector.index];
+ if (item->data == NULL) {
+ warnx(
+ "%s: qemu fwcfg item doesn't exist (architecture %s index 0x%x)",
+ __func__, sc.selector.architecture ? "specific" : "generic",
+ sc.selector.index);
+ *eax = 0x00;
+ return (0);
+ } else if (sc.data_offset >= item->size) {
+ warnx(
+ "%s: qemu fwcfg item read exceeds size (architecture %s index 0x%x size 0x%x offset 0x%x)",
+ __func__, sc.selector.architecture ? "specific" : "generic",
+ sc.selector.index, item->size, sc.data_offset);
+ *eax = 0x00;
+ return (0);
+ }
+
+ /* return item data */
+ *eax = item->data[sc.data_offset];
+ sc.data_offset++;
+
+ return (0);
+}
+
+static int
+qemu_fwcfg_add_item(uint16_t architecture, uint16_t index, uint32_t size,
+ void *data)
+{
+ /* truncate architecture and index to their desired size */
+ architecture &= QEMU_FWCFG_ARCHITECTURE_MASK;
+ index &= QEMU_FWCFG_INDEX_MASK;
+
+ /* get pointer to item specified by selector */
+ struct qemu_fwcfg_item *fwcfg_item = &sc.items[architecture][index];
+
+ /* check if item is already used */
+ if (fwcfg_item->data != NULL) {
+ warnx("%s: qemu fwcfg item exists (architecture %s index 0x%x)",
+ __func__, architecture ? "specific" : "generic", index);
+ return (-1);
+ }
+
+ /* save data of the item */
+ fwcfg_item->size = size;
+ fwcfg_item->data = data;
+
+ return (0);
+}
+
+static int
+qemu_fwcfg_add_item_file_dir()
+{
+ /* alloc directory */
+ uint64_t size = sizeof(struct qemu_fwcfg_directory) +
+ QEMU_FWCFG_MIN_FILES * sizeof(struct qemu_fwcfg_file);
+ struct qemu_fwcfg_directory *fwcfg_directory = calloc(1, size);
+ if (fwcfg_directory == NULL) {
+ return (-ENOMEM);
+ }
+
+ /* init directory */
+ sc.directory = fwcfg_directory;
+
+ /* add directory */
+ return qemu_fwcfg_add_item(QEMU_FWCFG_ARCHITECTURE_GENERIC,
+ QEMU_FWCFG_INDEX_FILE_DIR, sizeof(struct qemu_fwcfg_directory), (uint8_t *)sc.directory);
+}
+
+static int
+qemu_fwcfg_add_item_id()
+{
+ /* alloc id */
+ struct qemu_fwcfg_id *fwcfg_id = calloc(1,
+ sizeof(struct qemu_fwcfg_id));
+ if (fwcfg_id == NULL) {
+ return (-ENOMEM);
+ }
+
+ /* init id */
+ fwcfg_id->interface = 1;
+ fwcfg_id->DMA = 0;
+
+ /*
+ * QEMU specifies ID as little endian.
+ * Convert fwcfg_id to little endian.
+ */
+ uint32_t *le_fwcfg_id_ptr = (uint32_t *)fwcfg_id;
+ *le_fwcfg_id_ptr = htole32(*le_fwcfg_id_ptr);
+
+ /* add id */
+ return qemu_fwcfg_add_item(QEMU_FWCFG_ARCHITECTURE_GENERIC,
+ QEMU_FWCFG_INDEX_ID, sizeof(struct qemu_fwcfg_id),
+ (uint8_t *)fwcfg_id);
+}
+
+static int
+qemu_fwcfg_add_item_signature()
+{
+ /* alloc signature */
+ struct qemu_fwcfg_signature *fwcfg_signature = calloc(1,
+ sizeof(struct qemu_fwcfg_signature));
+ if (fwcfg_signature == NULL) {
+ return (-ENOMEM);
+ }
+
+ /* init signature */
+ fwcfg_signature->signature[0] = 'Q';
+ fwcfg_signature->signature[1] = 'E';
+ fwcfg_signature->signature[2] = 'M';
+ fwcfg_signature->signature[3] = 'U';
+
+ /* add signature */
+ return qemu_fwcfg_add_item(QEMU_FWCFG_ARCHITECTURE_GENERIC,
+ QEMU_FWCFG_INDEX_SIGNATURE, sizeof(struct qemu_fwcfg_signature),
+ (uint8_t *)fwcfg_signature);
+}
+
+static int
+qemu_fwcfg_register_port(const char *name, int port, int size, int flags,
+ inout_func_t handler)
+{
+ struct inout_port iop;
+
+ bzero(&iop, sizeof(iop));
+ iop.name = name;
+ iop.port = port;
+ iop.size = size;
+ iop.flags = flags;
+ iop.handler = handler;
+
+ return register_inout(&iop);
+}
+
+int
+qemu_fwcfg_add_file(uint8_t name[QEMU_FWCFG_MAX_NAME], uint32_t size,
+ void *data)
+{
+ /*
+ * QEMU specifies count as big endian.
+ * Convert it to host endian to work with it.
+ */
+ uint32_t count = be32toh(sc.directory->be_count);
+
+ /* add file to items list */
+ uint32_t index = QEMU_FWCFG_FIRST_FILE_INDEX + count;
+ const int error = qemu_fwcfg_add_item(QEMU_FWCFG_ARCHITECTURE_GENERIC,
+ index, size, data);
+ if (error != 0) {
+ return (error);
+ }
+
+ /*
+ * files should be sorted alphabetical, get index for new file
+ */
+ uint32_t file_index;
+ for (file_index = 0; file_index < count; ++file_index) {
+ if (strcmp(name, sc.directory->files[file_index].name) < 0)
+ break;
+ }
+
+ ++count;
+ if (count > QEMU_FWCFG_MIN_FILES) {
+ /* alloc new file directory */
+ uint64_t new_size = sizeof(struct qemu_fwcfg_directory) +
+ count * sizeof(struct qemu_fwcfg_file);
+ struct qemu_fwcfg_directory *new_directory = calloc(1,
+ new_size);
+ if (new_directory == NULL) {
+ warnx(
+ "%s: Unable to allocate a new qemu fwcfg files directory (count %d)",
+ __func__, count);
+ return (-ENOMEM);
+ }
+
+ /* copy files below file_index to new directory */
+ memcpy(new_directory->files, sc.directory->files,
+ file_index * sizeof(struct qemu_fwcfg_file));
+
+ /* copy files behind file_index to directory */
+ memcpy(&new_directory->files[file_index + 1],
+ &sc.directory->files[file_index],
+ (count - file_index) * sizeof(struct qemu_fwcfg_file));
+
+ /* free old directory */
+ free(sc.directory);
+
+ /* set directory pointer to new directory */
+ sc.directory = new_directory;
+
+ /* adjust directory pointer */
+ sc.items[0][QEMU_FWCFG_INDEX_FILE_DIR].data = (uint8_t *)
+ sc.directory;
+ } else {
+ /* shift files behind file_index */
+ for (uint32_t i = QEMU_FWCFG_MIN_FILES - 1; i > file_index; --i) {
+ memcpy(&sc.directory->files[i],
+ &sc.directory->files[i - 1],
+ sizeof(struct qemu_fwcfg_file));
+ }
+ }
+
+ /*
+ * QEMU specifies count, size and index as big endian.
+ * Save these values in big endian to simplify guest reads of these
+ * values.
+ */
+ sc.directory->be_count = htobe32(count);
+ sc.directory->files[file_index].be_size = htobe32(size);
+ sc.directory->files[file_index].be_selector = htobe16(index);
+ strcpy(sc.directory->files[file_index].name, name);
+
+ /* set new size for the fwcfg_file_directory */
+ sc.items[0][QEMU_FWCFG_INDEX_FILE_DIR].size =
+ sizeof(struct qemu_fwcfg_directory) +
+ count * sizeof(struct qemu_fwcfg_file);
+
+ return (0);
+}
+
+int
+qemu_fwcfg_init(struct vmctx *ctx)
+{
+ int error;
+
+ error = acpi_device_create(&sc.acpi_dev, ctx, QEMU_FWCFG_ACPI_DEVICE_NAME,
+ QEMU_FWCFG_ACPI_HARDWARE_ID);
+ if (error) {
+ warnx("%s: failed to create ACPI device for QEMU FwCfg",
+ __func__);
+ goto done;
+ }
+
+ error = acpi_device_add_res_fixed_ioport(sc.acpi_dev,
+ QEMU_FWCFG_SELECTOR_PORT_NUMBER, 2);
+ if (error) {
+ warnx("%s: failed to add fixed IO port for QEMU FwCfg",
+ __func__);
+ goto done;
+ }
+
+ /* add common fwcfg items */
+ if ((error = qemu_fwcfg_add_item_signature()) != 0) {
+ warnx("%s: Unable to add signature item", __func__);
+ goto done;
+ }
+ if ((error = qemu_fwcfg_add_item_id()) != 0) {
+ warnx("%s: Unable to add id item", __func__);
+ goto done;
+ }
+ if ((error = qemu_fwcfg_add_item_file_dir()) != 0) {
+ warnx("%s: Unable to add file_dir item", __func__);
+ goto done;
+ }
+
+ /* add handlers for fwcfg ports */
+ if ((error = qemu_fwcfg_register_port("qemu_fwcfg_selector",
+ QEMU_FWCFG_SELECTOR_PORT_NUMBER, QEMU_FWCFG_SELECTOR_PORT_SIZE,
+ QEMU_FWCFG_SELECTOR_PORT_FLAGS,
+ qemu_fwcfg_selector_port_handler)) != 0) {
+ warnx("%s: Unable to register qemu fwcfg selector port 0x%x",
+ __func__, QEMU_FWCFG_SELECTOR_PORT_NUMBER);
+ goto done;
+ }
+ if ((error = qemu_fwcfg_register_port("qemu_fwcfg_data",
+ QEMU_FWCFG_DATA_PORT_NUMBER, QEMU_FWCFG_DATA_PORT_SIZE,
+ QEMU_FWCFG_DATA_PORT_FLAGS, qemu_fwcfg_data_port_handler)) !=
+ 0) {
+ warnx("%s: Unable to register qemu fwcfg data port 0x%x",
+ __func__, QEMU_FWCFG_DATA_PORT_NUMBER);
+ goto done;
+ }
+
+done:
+ if (error) {
+ acpi_device_destroy(sc.acpi_dev);
+ }
+
+ return (error);
+}
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Feb 13, 9:43 PM (3 h, 56 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
16633535
Default Alt Text
D26209.id98407.diff (106 KB)
Attached To
Mode
D26209: GVT-d support for bhyve
Attached
Detach File
Event Timeline
Log In to Comment