Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F115646314
D26209.id99348.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
94 KB
Referenced Files
None
Subscribers
None
D26209.id99348.diff
View Options
diff --git a/lib/libvmmapi/vmmapi.h b/lib/libvmmapi/vmmapi.h
--- a/lib/libvmmapi/vmmapi.h
+++ b/lib/libvmmapi/vmmapi.h
@@ -73,6 +73,7 @@
VM_SYSMEM,
VM_BOOTROM,
VM_FRAMEBUFFER,
+ VM_PCIROM,
};
/*
@@ -180,6 +181,8 @@
vm_paddr_t gpa, size_t len, vm_paddr_t hpa);
int vm_unmap_pptdev_mmio(struct vmctx *ctx, int bus, int slot, int func,
vm_paddr_t gpa, size_t len);
+int vm_get_memory_region_info(struct vmctx *const ctx, vm_paddr_t *const base,
+ vm_paddr_t *const size, const enum vm_memory_region_type type);
int vm_setup_pptdev_msi(struct vmctx *ctx, int vcpu, int bus, int slot,
int func, uint64_t addr, uint64_t msg, int numvec);
int vm_setup_pptdev_msix(struct vmctx *ctx, int vcpu, int bus, int slot,
diff --git a/lib/libvmmapi/vmmapi.c b/lib/libvmmapi/vmmapi.c
--- a/lib/libvmmapi/vmmapi.c
+++ b/lib/libvmmapi/vmmapi.c
@@ -1012,6 +1012,25 @@
return (ioctl(ctx->fd, VM_UNMAP_PPTDEV_MMIO, &pptmmio));
}
+int
+vm_get_memory_region_info(struct vmctx *const ctx, vm_paddr_t *const base,
+ vm_paddr_t *const size, const enum vm_memory_region_type type)
+{
+ struct vm_memory_region_info memory_region_info;
+
+ bzero(&memory_region_info, sizeof(memory_region_info));
+ memory_region_info.type = type;
+
+ const int error = ioctl(ctx->fd, VM_GET_MEMORY_REGION_INFO, &memory_region_info);
+
+ if (base)
+ *base = memory_region_info.base;
+ if (size)
+ *size = memory_region_info.size;
+
+ return (error);
+}
+
int
vm_setup_pptdev_msi(struct vmctx *ctx, int vcpu, int bus, int slot, int func,
uint64_t addr, uint64_t msg, int numvec)
@@ -1687,7 +1706,7 @@
VM_SET_CAPABILITY, VM_GET_CAPABILITY, VM_BIND_PPTDEV,
VM_UNBIND_PPTDEV, VM_MAP_PPTDEV_MMIO, VM_PPTDEV_MSI,
VM_PPTDEV_MSIX, VM_UNMAP_PPTDEV_MMIO, VM_PPTDEV_DISABLE_MSIX,
- VM_INJECT_NMI, VM_STATS, VM_STAT_DESC,
+ VM_GET_MEMORY_REGION_INFO, VM_INJECT_NMI, VM_STATS, VM_STAT_DESC,
VM_SET_X2APIC_STATE, VM_GET_X2APIC_STATE,
VM_GET_HPET_CAPABILITIES, VM_GET_GPA_PMAP, VM_GLA2GPA,
VM_GLA2GPA_NOFAULT,
diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h
--- a/sys/amd64/include/vmm.h
+++ b/sys/amd64/include/vmm.h
@@ -741,6 +741,12 @@
} u;
};
+enum vm_memory_region_type {
+ MEMORY_REGION_INTEL_GSM,
+ MEMORY_REGION_INTEL_OPREGION,
+ MEMORY_REGION_TPM_CONTROL_ADDRESS,
+};
+
/* APIs to inject faults into the guest */
void vm_inject_fault(void *vm, int vcpuid, int vector, int errcode_valid,
int errcode);
diff --git a/sys/amd64/include/vmm_dev.h b/sys/amd64/include/vmm_dev.h
--- a/sys/amd64/include/vmm_dev.h
+++ b/sys/amd64/include/vmm_dev.h
@@ -146,6 +146,17 @@
size_t len;
};
+struct vm_memory_region_info {
+ vm_paddr_t base;
+ vm_paddr_t size;
+ enum vm_memory_region_type type;
+};
+
+#ifdef _KERNEL
+extern vm_paddr_t intel_graphics_stolen_base;
+extern vm_paddr_t intel_graphics_stolen_size;
+#endif
+
struct vm_pptdev_msi {
int vcpu;
int bus;
@@ -309,6 +320,7 @@
IOCNUM_PPTDEV_MSIX = 44,
IOCNUM_PPTDEV_DISABLE_MSIX = 45,
IOCNUM_UNMAP_PPTDEV_MMIO = 46,
+ IOCNUM_GET_MEMORY_REGION_INFO = 47,
/* statistics */
IOCNUM_VM_STATS = 50,
@@ -427,6 +439,8 @@
_IOW('v', IOCNUM_PPTDEV_DISABLE_MSIX, struct vm_pptdev)
#define VM_UNMAP_PPTDEV_MMIO \
_IOW('v', IOCNUM_UNMAP_PPTDEV_MMIO, struct vm_pptdev_mmio)
+#define VM_GET_MEMORY_REGION_INFO \
+ _IOWR('v', IOCNUM_GET_MEMORY_REGION_INFO, struct vm_memory_region_info)
#define VM_INJECT_NMI \
_IOW('v', IOCNUM_INJECT_NMI, struct vm_nmi)
#define VM_STATS \
diff --git a/sys/amd64/vmm/intel/intelgpu.h b/sys/amd64/vmm/intel/intelgpu.h
new file mode 100644
--- /dev/null
+++ b/sys/amd64/vmm/intel/intelgpu.h
@@ -0,0 +1,185 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2021 Beckhoff Automation GmbH & Co. KG
+ * Author: Corvin Köhne <c.koehne@beckhoff.com>
+ */
+
+#pragma once
+
+/*
+ * See
+ * <https://github.com/tianocore/edk2-platforms/blob/82979ab1ca44101e0b92a9c4bda1dfe64a8249f6/Silicon/Intel/IntelSiliconPkg/Include/IndustryStandard/IgdOpRegion.h>
+ */
+
+#define IGD_OPREGION_HEADER_SIGN "IntelGraphicsMem"
+#define IGD_OPREGION_HEADER_MBOX1 BIT0
+#define IGD_OPREGION_HEADER_MBOX2 BIT1
+#define IGD_OPREGION_HEADER_MBOX3 BIT2
+#define IGD_OPREGION_HEADER_MBOX4 BIT3
+#define IGD_OPREGION_HEADER_MBOX5 BIT4
+
+#define IGD_OPREGION_VBT_SIZE_6K (6 * 1024UL)
+
+/**
+ OpRegion structures:
+ Sub-structures define the different parts of the OpRegion followed by the
+ main structure representing the entire OpRegion.
+
+ @note These structures are packed to 1 byte offsets because the exact
+ data location is required by the supporting design specification due to
+ the fact that the data is used by ASL and Graphics driver code compiled
+ separately.
+**/
+#pragma pack(1)
+///
+/// OpRegion Mailbox 0 Header structure. The OpRegion Header is used to
+/// identify a block of memory as the graphics driver OpRegion.
+/// Offset 0x0, Size 0x100
+///
+struct igd_opregion_header {
+ int8_t sign[0x10]; ///< Offset 0x00 OpRegion Signature
+ uint32_t size; ///< Offset 0x10 OpRegion Size
+ uint32_t over; ///< Offset 0x14 OpRegion Structure Version
+ uint8_t sver[0x20]; ///< Offset 0x18 System BIOS Build Version
+ uint8_t vver[0x10]; ///< Offset 0x38 Video BIOS Build Version
+ uint8_t gver[0x10]; ///< Offset 0x48 Graphic Driver Build Version
+ uint32_t mbox; ///< Offset 0x58 Supported Mailboxes
+ uint32_t dmod; ///< Offset 0x5C Driver Model
+ uint32_t pcon; ///< Offset 0x60 Platform Configuration
+ int16_t dver[0x10]; ///< Offset 0x64 GOP Version
+ uint8_t rm01[0x7C]; ///< Offset 0x84 Reserved Must be zero
+};
+
+///
+/// OpRegion Mailbox 1 - Public ACPI Methods
+/// Offset 0x100, Size 0x100
+///
+struct igd_opregion_mbox1 {
+ uint32_t drdy; ///< Offset 0x100 Driver Readiness
+ uint32_t csts; ///< Offset 0x104 Status
+ uint32_t cevt; ///< Offset 0x108 Current Event
+ uint8_t rm11[0x14]; ///< Offset 0x10C Reserved Must be Zero
+ uint32_t didl[8]; ///< Offset 0x120 Supported Display Devices ID List
+ uint32_t
+ cpdl[8]; ///< Offset 0x140 Currently Attached Display Devices List
+ uint32_t
+ cadl[8]; ///< Offset 0x160 Currently Active Display Devices List
+ uint32_t nadl[8]; ///< Offset 0x180 Next Active Devices List
+ uint32_t aslp; ///< Offset 0x1A0 ASL Sleep Time Out
+ uint32_t tidx; ///< Offset 0x1A4 Toggle Table Index
+ uint32_t chpd; ///< Offset 0x1A8 Current Hotplug Enable Indicator
+ uint32_t clid; ///< Offset 0x1AC Current Lid State Indicator
+ uint32_t cdck; ///< Offset 0x1B0 Current Docking State Indicator
+ uint32_t sxsw; ///< Offset 0x1B4 Display Switch Notification on Sx
+ ///< StateResume
+ uint32_t evts; ///< Offset 0x1B8 Events supported by ASL
+ uint32_t cnot; ///< Offset 0x1BC Current OS Notification
+ uint32_t NRDY; ///< Offset 0x1C0 Driver Status
+ uint8_t did2[0x1C]; ///< Offset 0x1C4 Extended Supported Devices ID
+ ///< List(DOD)
+ uint8_t
+ cpd2[0x1C]; ///< Offset 0x1E0 Extended Attached Display Devices List
+ uint8_t rm12[4]; ///< Offset 0x1FC - 0x1FF Reserved Must be zero
+};
+
+///
+/// OpRegion Mailbox 2 - Software SCI Interface
+/// Offset 0x200, Size 0x100
+///
+struct igd_opregion_mbox2 {
+ uint32_t scic; ///< Offset 0x200 Software SCI Command / Status / Data
+ uint32_t parm; ///< Offset 0x204 Software SCI Parameters
+ uint32_t dslp; ///< Offset 0x208 Driver Sleep Time Out
+ uint8_t rm21[0xF4]; ///< Offset 0x20C - 0x2FF Reserved Must be zero
+};
+
+///
+/// OpRegion Mailbox 3 - BIOS/Driver Notification - ASLE Support
+/// Offset 0x300, Size 0x100
+///
+struct igd_opregion_mbox3 {
+ uint32_t ardy; ///< Offset 0x300 Driver Readiness
+ uint32_t aslc; ///< Offset 0x304 ASLE Interrupt Command / Status
+ uint32_t tche; ///< Offset 0x308 Technology Enabled Indicator
+ uint32_t alsi; ///< Offset 0x30C Current ALS Luminance Reading
+ uint32_t bclp; ///< Offset 0x310 Requested Backlight Brightness
+ uint32_t pfit; ///< Offset 0x314 Panel Fitting State or Request
+ uint32_t cblv; ///< Offset 0x318 Current Brightness Level
+ uint16_t bclm[0x14]; ///< Offset 0x31C Backlight Brightness Levels Duty
+ ///< Cycle Mapping Table
+ uint32_t cpfm; ///< Offset 0x344 Current Panel Fitting Mode
+ uint32_t epfm; ///< Offset 0x348 Enabled Panel Fitting Modes
+ uint8_t plut[0x4A]; ///< Offset 0x34C Panel Look Up Table & Identifier
+ uint32_t pfmb; ///< Offset 0x396 PWM Frequency and Minimum Brightness
+ uint32_t ccdv; ///< Offset 0x39A Color Correction Default Values
+ uint32_t pcft; ///< Offset 0x39E Power Conservation Features
+ uint32_t srot; ///< Offset 0x3A2 Supported Rotation Angles
+ uint32_t iuer; ///< Offset 0x3A6 Intel Ultrabook(TM) Event Register
+ uint64_t fdss; ///< Offset 0x3AA DSS Buffer address allocated for IFFS
+ ///< feature
+ uint32_t fdsp; ///< Offset 0x3B2 Size of DSS buffer
+ uint32_t stat; ///< Offset 0x3B6 State Indicator
+ uint64_t rvda; ///< Offset 0x3BA Absolute/Relative Address of Raw VBT
+ ///< Data from OpRegion Base
+ uint32_t rvds; ///< Offset 0x3C2 Raw VBT Data Size
+ uint8_t rsvd2[0x3A]; ///< Offset 0x3C6 - 0x3FF Reserved Must be zero.
+ ///< Bug in spec 0x45(69)
+};
+
+///
+/// OpRegion Mailbox 4 - VBT Video BIOS Table
+/// Offset 0x400, Size 0x1800
+///
+struct igd_opregion_mbox4 {
+ uint8_t rvbt[IGD_OPREGION_VBT_SIZE_6K]; ///< Offset 0x400 - 0x1BFF Raw
+ ///< VBT Data
+};
+
+///
+/// OpRegion Mailbox 5 - BIOS/Driver Notification - Data storage BIOS to Driver
+/// data sync Offset 0x1C00, Size 0x400
+///
+struct igd_opregion_mbox5 {
+ uint32_t phed; ///< Offset 0x1C00 Panel Header
+ uint8_t bddc[0x100]; ///< Offset 0x1C04 Panel EDID (DDC data)
+ uint8_t rm51[0x2FC]; ///< Offset 0x1D04 - 0x1FFF Reserved Must be zero
+};
+
+///
+/// IGD OpRegion Structure
+///
+struct igd_opregion {
+ struct igd_opregion_header
+ header; ///< OpRegion header (Offset 0x0, Size 0x100)
+ struct igd_opregion_mbox1 mbox1; ///< Mailbox 1: Public ACPI Methods
+ ///< (Offset 0x100, Size 0x100)
+ struct igd_opregion_mbox2 mbox2; ///< Mailbox 2: Software SCI Interface
+ ///< (Offset 0x200, Size 0x100)
+ struct igd_opregion_mbox3
+ mbox3; ///< Mailbox 3: BIOS to Driver Notification (Offset 0x300,
+ ///< Size 0x100)
+ struct igd_opregion_mbox4 mbox4; ///< Mailbox 4: Video BIOS Table (VBT)
+ ///< (Offset 0x400, Size 0x1800)
+ struct igd_opregion_mbox5
+ mbox5; ///< Mailbox 5: BIOS to Driver Notification Extension (Offset
+ ///< 0x1C00, Size 0x400)
+};
+
+///
+/// VBT Header Structure
+///
+struct vbt_header {
+ uint8_t product_string[20];
+ uint16_t version;
+ uint16_t header_size;
+ uint16_t table_size;
+ uint8_t checksum;
+ uint8_t reserved1;
+ uint32_t bios_data_offset;
+ uint32_t aim_data_offset[4];
+};
+
+#pragma pack()
+
+int vm_intelgpu_get_opregion(vm_paddr_t *const base, vm_paddr_t *const size);
diff --git a/sys/amd64/vmm/intel/intelgpu.c b/sys/amd64/vmm/intel/intelgpu.c
new file mode 100644
--- /dev/null
+++ b/sys/amd64/vmm/intel/intelgpu.c
@@ -0,0 +1,55 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2021 Beckhoff Automation GmbH & Co. KG
+ * Author: Corvin Köhne <c.koehne@beckhoff.com>
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <dev/pci/pcireg.h>
+#include <dev/pci/pcivar.h>
+
+#include "intelgpu.h"
+
+#define KB (1024UL)
+
+int
+vm_intelgpu_get_opregion(vm_paddr_t *const base, vm_paddr_t *const size)
+{
+ /* intel graphics device is always located at 0:2.0 */
+ device_t dev = pci_find_bsf(0, 2, 0);
+ if (dev == NULL) {
+ return (ENOENT);
+ }
+
+ if ((pci_get_vendor(dev) != PCI_VENDOR_INTEL) ||
+ (pci_get_class(dev) != PCIC_DISPLAY) ||
+ (pci_get_subclass(dev) != PCIS_DISPLAY_VGA)) {
+ return (ENODEV);
+ }
+
+ const uint64_t asls = pci_read_config(dev, PCIR_ASLS_CTL, 4);
+
+ const struct igd_opregion_header *const opregion_header =
+ (struct igd_opregion_header *)pmap_map(NULL, asls,
+ asls + sizeof(*opregion_header), VM_PROT_READ);
+ if (opregion_header == NULL ||
+ memcmp(opregion_header->sign, IGD_OPREGION_HEADER_SIGN,
+ sizeof(opregion_header->sign))) {
+ return (ENODEV);
+ }
+
+ *base = asls;
+ *size = opregion_header->size * KB;
+
+ return (0);
+}
diff --git a/sys/amd64/vmm/io/acpi.h b/sys/amd64/vmm/io/acpi.h
new file mode 100644
--- /dev/null
+++ b/sys/amd64/vmm/io/acpi.h
@@ -0,0 +1,14 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2021 Beckhoff Automation GmbH & Co. KG
+ * Author: Corvin Köhne <c.koehne@beckhoff.com>
+ */
+
+#pragma once
+
+#include <machine/vmm.h>
+#include <machine/vmm_dev.h>
+
+int vmm_tpm2_get_control_address(vm_paddr_t *const base,
+ vm_paddr_t *const size);
diff --git a/sys/amd64/vmm/io/acpi.c b/sys/amd64/vmm/io/acpi.c
new file mode 100644
--- /dev/null
+++ b/sys/amd64/vmm/io/acpi.c
@@ -0,0 +1,37 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2021 Beckhoff Automation GmbH & Co. KG
+ * Author: Corvin Köhne <c.koehne@beckhoff.com>
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/systm.h>
+
+#include <contrib/dev/acpica/include/acpi.h>
+#include <contrib/dev/acpica/include/acpixf.h>
+
+#include "acpi.h"
+int
+vmm_tpm2_get_control_address(vm_paddr_t *const base, vm_paddr_t *const size)
+{
+ ACPI_TABLE_HEADER *tpm_header;
+ if (!ACPI_SUCCESS(AcpiGetTable("TPM2", 1, &tpm_header))) {
+ return (ENOENT);
+ }
+
+ if (base) {
+ const ACPI_TABLE_TPM2 *const tpm_table = (ACPI_TABLE_TPM2 *)
+ tpm_header;
+ *base = tpm_table->ControlAddress;
+ }
+ if (size) {
+ *size = 0;
+ }
+
+ return (0);
+}
diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c
--- a/sys/amd64/vmm/vmm.c
+++ b/sys/amd64/vmm/vmm.c
@@ -134,7 +134,7 @@
bool sysmem;
struct vm_object *object;
};
-#define VM_MAX_MEMSEGS 3
+#define VM_MAX_MEMSEGS 4
struct mem_map {
vm_paddr_t gpa;
diff --git a/sys/amd64/vmm/vmm_dev.c b/sys/amd64/vmm/vmm_dev.c
--- a/sys/amd64/vmm/vmm_dev.c
+++ b/sys/amd64/vmm/vmm_dev.c
@@ -60,9 +60,11 @@
#include <machine/vmm_snapshot.h>
#include <x86/apicreg.h>
+#include "intel/intelgpu.h"
#include "vmm_lapic.h"
#include "vmm_stat.h"
#include "vmm_mem.h"
+#include "io/acpi.h"
#include "io/ppt.h"
#include "io/vatpic.h"
#include "io/vioapic.h"
@@ -373,6 +375,7 @@
struct vm_capability *vmcap;
struct vm_pptdev *pptdev;
struct vm_pptdev_mmio *pptmmio;
+ struct vm_memory_region_info *memory_region_info;
struct vm_pptdev_msi *pptmsi;
struct vm_pptdev_msix *pptmsix;
struct vm_nmi *vmnmi;
@@ -540,6 +543,29 @@
error = ppt_unmap_mmio(sc->vm, pptmmio->bus, pptmmio->slot,
pptmmio->func, pptmmio->gpa, pptmmio->len);
break;
+ case VM_GET_MEMORY_REGION_INFO:
+ memory_region_info = (struct vm_memory_region_info *)data;
+ switch (memory_region_info->type) {
+ case MEMORY_REGION_INTEL_GSM:
+ memory_region_info->base = intel_graphics_stolen_base;
+ memory_region_info->size = intel_graphics_stolen_size;
+ error = 0;
+ break;
+ case MEMORY_REGION_INTEL_OPREGION:
+ error =
+ vm_intelgpu_get_opregion(&memory_region_info->base,
+ &memory_region_info->size);
+ break;
+ case MEMORY_REGION_TPM_CONTROL_ADDRESS:
+ error = vmm_tpm2_get_control_address(
+ &memory_region_info->base,
+ &memory_region_info->size);
+ break;
+ default:
+ error = EINVAL;
+ break;
+ }
+ break;
case VM_BIND_PPTDEV:
pptdev = (struct vm_pptdev *)data;
error = vm_assign_pptdev(sc->vm, pptdev->bus, pptdev->slot,
diff --git a/sys/dev/pci/pcireg.h b/sys/dev/pci/pcireg.h
--- a/sys/dev/pci/pcireg.h
+++ b/sys/dev/pci/pcireg.h
@@ -1098,3 +1098,14 @@
#define PCIM_OSC_CTL_PCIE_PME 0x04 /* PCIe Native Power Mgt Events */
#define PCIM_OSC_CTL_PCIE_AER 0x08 /* PCIe Advanced Error Reporting */
#define PCIM_OSC_CTL_PCIE_CAP_STRUCT 0x10 /* Various Capability Structures */
+
+/*
+ * Intel graphics device definitions
+ */
+#define PCIR_BDSM 0x5C /* Base of Data Stolen Memory register */
+#define PCIR_ASLS_CTL 0xFC /* Opregion start address register */
+
+/*
+ * PCI Vendors
+ */
+#define PCI_VENDOR_INTEL 0x8086
diff --git a/sys/modules/vmm/Makefile b/sys/modules/vmm/Makefile
--- a/sys/modules/vmm/Makefile
+++ b/sys/modules/vmm/Makefile
@@ -29,7 +29,8 @@
x86.c
.PATH: ${SRCTOP}/sys/amd64/vmm/io
-SRCS+= iommu.c \
+SRCS+= acpi.c \
+ iommu.c \
ppt.c \
vatpic.c \
vatpit.c \
@@ -42,6 +43,7 @@
# intel-specific files
.PATH: ${SRCTOP}/sys/amd64/vmm/intel
SRCS+= ept.c \
+ intelgpu.c \
vmcs.c \
vmx_msr.c \
vmx_support.S \
diff --git a/usr.sbin/bhyve/Makefile b/usr.sbin/bhyve/Makefile
--- a/usr.sbin/bhyve/Makefile
+++ b/usr.sbin/bhyve/Makefile
@@ -15,6 +15,7 @@
BHYVE_SYSDIR?=${SRCTOP}
SRCS= \
+ acpi_device.c \
atkbdc.c \
acpi.c \
audio.c \
@@ -26,6 +27,7 @@
console.c \
ctl_util.c \
ctl_scsi_all.c \
+ e820.c \
fwctl.c \
gdb.c \
hda_codec.c \
@@ -42,6 +44,7 @@
pci_emul.c \
pci_hda.c \
pci_fbuf.c \
+ pci_gvt-d.c \
pci_hostbridge.c \
pci_irq.c \
pci_lpc.c \
@@ -61,6 +64,7 @@
post.c \
ps2kbd.c \
ps2mouse.c \
+ qemu_fwcfg.c \
rfb.c \
rtc.c \
smbiostbl.c \
diff --git a/usr.sbin/bhyve/acpi.h b/usr.sbin/bhyve/acpi.h
--- a/usr.sbin/bhyve/acpi.h
+++ b/usr.sbin/bhyve/acpi.h
@@ -31,6 +31,8 @@
#ifndef _ACPI_H_
#define _ACPI_H_
+#include "acpi_device.h"
+
#define SCI_INT 9
#define SMI_CMD 0xb2
@@ -55,6 +57,7 @@
int acpi_build(struct vmctx *ctx, int ncpu);
void acpi_raise_gpe(struct vmctx *ctx, unsigned bit);
+int acpi_tables_add_device(const struct acpi_device *const dev);
void dsdt_line(const char *fmt, ...);
void dsdt_fixed_ioport(uint16_t iobase, uint16_t length);
void dsdt_fixed_irq(uint8_t irq);
diff --git a/usr.sbin/bhyve/acpi.c b/usr.sbin/bhyve/acpi.c
--- a/usr.sbin/bhyve/acpi.c
+++ b/usr.sbin/bhyve/acpi.c
@@ -139,6 +139,30 @@
#define EFFLUSH(x) \
if (fflush(x) != 0) goto err_exit;
+/*
+ * A list for additional ACPI devices like a TPM.
+ */
+struct acpi_device_list_entry {
+ SLIST_ENTRY(acpi_device_list_entry) chain;
+ const struct acpi_device *dev;
+};
+SLIST_HEAD(acpi_device_list,
+ acpi_device_list_entry) acpi_devices = SLIST_HEAD_INITIALIZER(acpi_devices);
+
+int
+acpi_tables_add_device(const struct acpi_device *const dev)
+{
+ struct acpi_device_list_entry *const entry = calloc(1, sizeof(*entry));
+ if (entry == NULL) {
+ return (ENOMEM);
+ }
+
+ entry->dev = dev;
+ SLIST_INSERT_HEAD(&acpi_devices, entry, chain);
+
+ return (0);
+}
+
static int
basl_fwrite_rsdp(FILE *fp)
{
@@ -760,6 +784,11 @@
vmgenc_write_dsdt();
+ const struct acpi_device_list_entry *entry;
+ SLIST_FOREACH(entry, &acpi_devices, chain) {
+ acpi_device_write_dsdt(entry->dev);
+ }
+
dsdt_line("}");
if (dsdt_error != 0)
diff --git a/usr.sbin/bhyve/acpi_device.h b/usr.sbin/bhyve/acpi_device.h
new file mode 100644
--- /dev/null
+++ b/usr.sbin/bhyve/acpi_device.h
@@ -0,0 +1,42 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2021 Beckhoff Automation GmbH & Co. KG
+ * Author: Corvin Köhne <c.koehne@beckhoff.com>
+ */
+
+#pragma once
+
+#include <contrib/dev/acpica/include/acpi.h>
+
+struct vmctx;
+
+struct acpi_device;
+
+/**
+ * Creates an ACPI device.
+ *
+ * @param[out] new_dev Returns the newly create ACPI device.
+ * @param[in] vm_ctx VM context the ACPI device is created in.
+ * @param[in] name Name of the ACPI device. Should always be a NULL
+ * terminated string.
+ * @param[in] hid Hardware ID of the ACPI device. Should always be a NULL
+ * terminated string.
+ */
+int acpi_device_create(struct acpi_device **const new_dev,
+ struct vmctx *const vm_ctx, const char *const name, const char *const hid);
+void acpi_device_destroy(struct acpi_device *const dev);
+
+/**
+ * @note: acpi_device_add_res_acpi_buffer doesn't ensure that no resources are
+ * added on an error condition. On error the caller should assume that
+ * the ACPI_BUFFER is partially added to the ACPI device.
+ */
+int acpi_device_add_res_acpi_buffer(struct acpi_device *const dev,
+ const ACPI_BUFFER resources);
+int acpi_device_add_res_fixed_ioport(struct acpi_device *const dev,
+ const UINT16 port, UINT8 length);
+int acpi_device_add_res_fixed_memory32(struct acpi_device *const dev,
+ const UINT8 write_protected, const UINT32 address, const UINT32 length);
+
+void acpi_device_write_dsdt(const struct acpi_device *const dev);
diff --git a/usr.sbin/bhyve/acpi_device.c b/usr.sbin/bhyve/acpi_device.c
new file mode 100644
--- /dev/null
+++ b/usr.sbin/bhyve/acpi_device.c
@@ -0,0 +1,240 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2021 Beckhoff Automation GmbH & Co. KG
+ * Author: Corvin Köhne <c.koehne@beckhoff.com>
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/queue.h>
+
+#include <machine/vmm.h>
+
+#include <err.h>
+#include <errno.h>
+#include <vmmapi.h>
+
+#include "acpi.h"
+#include "acpi_device.h"
+
+/**
+ * List entry to enumerate all resources used by an ACPI device.
+ *
+ * @param chain Used to chain multiple elements together.
+ * @param type Type of the ACPI resource.
+ * @param data Data of the ACPI resource.
+ */
+struct acpi_resource_list_entry {
+ SLIST_ENTRY(acpi_resource_list_entry) chain;
+ UINT32 type;
+ ACPI_RESOURCE_DATA data;
+};
+
+/**
+ * Holds information about an ACPI device.
+ *
+ * @param vm_ctx VM context the ACPI device was created in.
+ * @param name Name of the ACPI device.
+ * @param hid Hardware ID of the ACPI device.
+ * @param crs Current resources used by the ACPI device.
+ */
+struct acpi_device {
+ struct vmctx *vm_ctx;
+ const char *name;
+ const char *hid;
+ SLIST_HEAD(acpi_resource_list, acpi_resource_list_entry) crs;
+};
+
+int
+acpi_device_create(struct acpi_device **const new_dev,
+ struct vmctx *const vm_ctx, const char *const name, const char *const hid)
+{
+ if (new_dev == NULL || vm_ctx == NULL || name == NULL || hid == NULL) {
+ return (EINVAL);
+ }
+
+ struct acpi_device *const dev = calloc(1, sizeof(*dev));
+ if (dev == NULL) {
+ return (ENOMEM);
+ }
+
+ dev->vm_ctx = vm_ctx;
+ dev->name = name;
+ dev->hid = hid;
+ SLIST_INIT(&dev->crs);
+
+ /* current resources always contain an end tag */
+ struct acpi_resource_list_entry *const crs_end_tag = calloc(1,
+ sizeof(*crs_end_tag));
+ if (crs_end_tag == NULL) {
+ acpi_device_destroy(dev);
+ return (ENOMEM);
+ }
+ crs_end_tag->type = ACPI_RESOURCE_TYPE_END_TAG;
+ SLIST_INSERT_HEAD(&dev->crs, crs_end_tag, chain);
+
+ const int error = acpi_tables_add_device(dev);
+ if (error) {
+ acpi_device_destroy(dev);
+ return (error);
+ }
+
+ *new_dev = dev;
+
+ return (0);
+}
+
+void
+acpi_device_destroy(struct acpi_device *const dev)
+{
+ if (dev == NULL) {
+ return;
+ }
+
+ struct acpi_resource_list_entry *res;
+ while (!SLIST_EMPTY(&dev->crs)) {
+ res = SLIST_FIRST(&dev->crs);
+ SLIST_REMOVE_HEAD(&dev->crs, chain);
+ free(res);
+ }
+}
+
+int
+acpi_device_add_res_acpi_buffer(struct acpi_device *const dev,
+ const ACPI_BUFFER resources)
+{
+ if (dev == NULL) {
+ return (EINVAL);
+ }
+
+ int error = 0;
+ size_t offset = 0;
+ while (offset < resources.Length) {
+ const ACPI_RESOURCE *const res =
+ (const ACPI_RESOURCE *)((UINT8 *)resources.Pointer +
+ offset);
+ switch (res->Type) {
+ case ACPI_RESOURCE_TYPE_FIXED_IO:
+ error = acpi_device_add_res_fixed_ioport(dev,
+ res->Data.FixedIo.Address,
+ res->Data.FixedIo.AddressLength);
+ break;
+ case ACPI_RESOURCE_TYPE_FIXED_MEMORY32:
+ error = acpi_device_add_res_fixed_memory32(dev,
+ res->Data.FixedMemory32.WriteProtect,
+ res->Data.FixedMemory32.Address,
+ res->Data.FixedMemory32.AddressLength);
+ break;
+ case ACPI_RESOURCE_TYPE_END_TAG:
+ break;
+ default:
+ warnx("%s: unknown resource type %d", __func__,
+ res->Type);
+ return (ENODEV);
+ }
+ if (error) {
+ break;
+ }
+ offset += res->Length;
+ }
+
+ return (error);
+}
+
+int
+acpi_device_add_res_fixed_ioport(struct acpi_device *const dev,
+ const UINT16 port, const UINT8 length)
+{
+ if (dev == NULL) {
+ return (EINVAL);
+ }
+
+ struct acpi_resource_list_entry *const res = calloc(1, sizeof(*res));
+ if (res == NULL) {
+ return (ENOMEM);
+ }
+
+ res->type = ACPI_RESOURCE_TYPE_FIXED_IO;
+ res->data.FixedIo.Address = port;
+ res->data.FixedIo.AddressLength = length;
+
+ SLIST_INSERT_HEAD(&dev->crs, res, chain);
+
+ return (0);
+}
+
+int
+acpi_device_add_res_fixed_memory32(struct acpi_device *const dev,
+ const UINT8 write_protected, const UINT32 address, const UINT32 length)
+{
+ if (dev == NULL) {
+ return (EINVAL);
+ }
+
+ struct acpi_resource_list_entry *const res = calloc(1, sizeof(*res));
+ if (res == NULL) {
+ return (ENOMEM);
+ }
+
+ res->type = ACPI_RESOURCE_TYPE_FIXED_MEMORY32;
+ res->data.FixedMemory32.WriteProtect = write_protected;
+ res->data.FixedMemory32.Address = address;
+ res->data.FixedMemory32.AddressLength = length;
+
+ SLIST_INSERT_HEAD(&dev->crs, res, chain);
+
+ return (0);
+}
+
+static void
+acpi_device_write_dsdt_crs(const struct acpi_device *const dev)
+{
+ const struct acpi_resource_list_entry *res;
+ SLIST_FOREACH (res, &dev->crs, chain) {
+ switch (res->type) {
+ case ACPI_RESOURCE_TYPE_FIXED_IO:
+ dsdt_fixed_ioport(res->data.FixedIo.Address,
+ res->data.FixedIo.AddressLength);
+ break;
+ case ACPI_RESOURCE_TYPE_FIXED_MEMORY32: {
+ dsdt_fixed_mem32(res->data.FixedMemory32.Address,
+ res->data.FixedMemory32.AddressLength);
+ break;
+ }
+ case ACPI_RESOURCE_TYPE_END_TAG:
+ break;
+ default:
+ warnx("%s: unknown resource type %d", __func__,
+ res->type);
+ return;
+ }
+ }
+}
+
+void
+acpi_device_write_dsdt(const struct acpi_device *const dev)
+{
+ if (dev == NULL) {
+ return;
+ }
+
+ dsdt_line("");
+ dsdt_line(" Scope (\\_SB)");
+ dsdt_line(" {");
+ dsdt_line(" Device (%s)", dev->name);
+ dsdt_line(" {");
+ dsdt_line(" Name (_HID, \"%s\")", dev->hid);
+ dsdt_line(" Name (_STA, 0x0F)");
+ dsdt_line(" Name (_CRS, ResourceTemplate ()");
+ dsdt_line(" {");
+ dsdt_indent(4);
+ acpi_device_write_dsdt_crs(dev);
+ dsdt_unindent(4);
+ dsdt_line(" })");
+ dsdt_line(" }");
+ dsdt_line(" }");
+}
diff --git a/usr.sbin/bhyve/bhyve.8 b/usr.sbin/bhyve/bhyve.8
--- a/usr.sbin/bhyve/bhyve.8
+++ b/usr.sbin/bhyve/bhyve.8
@@ -45,6 +45,15 @@
.Op Cm ,threads= Ar n
.Oc
.Sm on
+.Oo Fl f
+.Sm off
+.Ar name Cm \&,
+.Oo
+.Cm string No | Cm file
+.Oc
+.Cm \&= Ar data
+.Sm on
+.Oc
.Oo
.Sm off
.Fl G\~
@@ -144,6 +153,16 @@
.Nm
to exit when a guest issues an access to an I/O port that is not emulated.
This is intended for debug purposes.
+.It Fl f Ar name Ns Cm \&, Ns Oo Cm string Ns No | Ns Cm file Ns Oc Ns Cm \&= Ns Ar data
+Add a fw_cfg file
+.Ar name
+to the fw_cfg interface.
+If a
+.Cm string
+is specified, the fw_cfg file contains the string as data.
+If a
+.Cm file
+is specified, bhyve reads the file and adds the file content as fw_cfg data.
.It Fl G Xo
.Sm off
.Oo Ar w Oc
@@ -515,6 +534,11 @@
and
.Ar function
numbers.
+.It Li rom= Ns Ar romfile
+Add
+.Ar romfile
+as option ROM to the PCI device.
+The ROM will be loaded by firmware and should be capable of initializing the device.
.El
.Pp
Guest memory must be wired using the
diff --git a/usr.sbin/bhyve/bhyverun.c b/usr.sbin/bhyve/bhyverun.c
--- a/usr.sbin/bhyve/bhyverun.c
+++ b/usr.sbin/bhyve/bhyverun.c
@@ -90,6 +90,7 @@
#include "config.h"
#include "inout.h"
#include "debug.h"
+#include "e820.h"
#include "fwctl.h"
#include "gdb.h"
#include "ioapic.h"
@@ -100,6 +101,7 @@
#include "pci_emul.h"
#include "pci_irq.h"
#include "pci_lpc.h"
+#include "qemu_fwcfg.h"
#include "smbiostbl.h"
#ifdef BHYVE_SNAPSHOT
#include "snapshot.h"
@@ -1249,9 +1251,9 @@
progname = basename(argv[0]);
#ifdef BHYVE_SNAPSHOT
- optstr = "aehuwxACDHIPSWYk:o:p:G:c:s:m:l:U:r:";
+ optstr = "aehuwxACDHIPSWYk:f:o:p:G:c:s:m:l:U:r:";
#else
- optstr = "aehuwxACDHIPSWYk:o:p:G:c:s:m:l:U:";
+ optstr = "aehuwxACDHIPSWYk:f:o:p:G:c:s:m:l:U:";
#endif
while ((c = getopt(argc, argv, optstr)) != -1) {
switch (c) {
@@ -1279,6 +1281,11 @@
case 'C':
set_config_bool("memory.guest_in_core", true);
break;
+ case 'f':
+ if (qemu_fwcfg_parse_cmdline_arg(optarg) != 0) {
+ exit(1);
+ }
+ break;
case 'G':
parse_gdb_options(optarg);
break;
@@ -1452,6 +1459,61 @@
rtc_init(ctx);
sci_init(ctx);
+ const char *fwcfg = lpc_fwcfg();
+ if (lpc_bootrom()) {
+ if (fwcfg == NULL || strcmp(fwcfg, "bhyve") == 0) {
+ if (fwctl_init() != 0) {
+ fprintf(stderr,
+ "bhyve fwctl initialization error");
+ exit(4);
+ }
+ } else if (strcmp(fwcfg, "qemu") == 0) {
+ if (qemu_fwcfg_init(ctx) != 0) {
+ fprintf(stderr,
+ "qemu fwcfg initialization error");
+ exit(4);
+ }
+ /*
+ * QEMU uses fwcfg item 0x0f (FW_CFG_MAX_CPUS) to report
+ * the number of cpus to the guest but states that it
+ * has a special meaning for x86. Don't know yet if that
+ * can cause unintented side-effects. Use an own fwcfg
+ * item to be safe.
+ *
+ * QEMU comment:
+ * FW_CFG_MAX_CPUS is a bit confusing/problematic
+ * on x86:
+ *
+ * For machine types prior to 1.8, SeaBIOS needs
+ * FW_CFG_MAX_CPUS for building MPTable, ACPI MADT,
+ * ACPI CPU hotplug and ACPI SRAT table, that
+ * tables are based on xAPIC ID and QEMU<->SeaBIOS
+ * interface for CPU hotplug also uses APIC ID and
+ * not "CPU index". This means that FW_CFG_MAX_CPUS
+ * is not the "maximum number of CPUs", but the
+ * "limit to the APIC ID values SeaBIOS may see".
+ *
+ * So for compatibility reasons with old BIOSes we
+ * are stuck with "etc/max-cpus" actually being
+ * apic_id_limit
+ */
+ if (qemu_fwcfg_add_file("opt/bhyve/hw.ncpu",
+ sizeof(guest_ncpus), &guest_ncpus) != 0) {
+ fprintf(stderr,
+ "Could not add qemu fwcfg opt/bhyve/hw.ncpu");
+ exit(4);
+ }
+
+ if (e820_init(ctx) != 0) {
+ fprintf(stderr, "Unable to setup E820");
+ exit(4);
+ }
+ } else {
+ fprintf(stderr, "Invalid fwcfg %s", fwcfg);
+ exit(4);
+ }
+ }
+
/*
* Exit if a device emulation finds an error in its initilization
*/
@@ -1535,8 +1597,20 @@
assert(error == 0);
}
- if (lpc_bootrom())
- fwctl_init();
+ if (strcmp(fwcfg, "qemu") == 0) {
+ struct qemu_fwcfg_item *const e820_fwcfg_item =
+ e820_get_fwcfg_item();
+ if (e820_fwcfg_item == NULL) {
+ fprintf(stderr, "invalid e820 table");
+ exit(4);
+ }
+ if (qemu_fwcfg_add_file("etc/e820", e820_fwcfg_item->size,
+ e820_fwcfg_item->data) != 0) {
+ fprintf(stderr, "could not add qemu fwcfg etc/e820");
+ exit(4);
+ }
+ free(e820_fwcfg_item);
+ }
/*
* Change the proc title to include the VM name.
diff --git a/usr.sbin/bhyve/e820.h b/usr.sbin/bhyve/e820.h
new file mode 100644
--- /dev/null
+++ b/usr.sbin/bhyve/e820.h
@@ -0,0 +1,49 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2021 Beckhoff Automation GmbH & Co. KG
+ * Author: Corvin Köhne <c.koehne@beckhoff.com>
+ */
+
+#pragma once
+
+#include <vmmapi.h>
+
+#include "qemu_fwcfg.h"
+
+#pragma pack(push, 1)
+
+enum e820_memory_type {
+ E820_TYPE_MEMORY = 1,
+ E820_TYPE_RESERVED = 2,
+ E820_TYPE_ACPI = 3,
+ E820_TYPE_NVS = 4
+};
+
+enum e820_allocation_strategy {
+ /* allocate any address */
+ E820_ALLOCATE_ANY,
+ /* allocate lowest address larger than address */
+ E820_ALLOCATE_LOWEST,
+ /* allocate highest address lower than address */
+ E820_ALLOCATE_HIGHEST,
+ /* allocate a specific address */
+ E820_ALLOCATE_SPECIFIC
+};
+
+struct e820_entry {
+ uint64_t base;
+ uint64_t length;
+ enum e820_memory_type type;
+};
+
+#pragma pack(pop)
+
+#define E820_ALIGNMENT_NONE 1
+
+uint64_t e820_alloc(const uint64_t address, const uint64_t length,
+ const uint64_t alignment, const enum e820_memory_type type,
+ const enum e820_allocation_strategy strategy);
+void e820_dump_table();
+struct qemu_fwcfg_item *e820_get_fwcfg_item();
+int e820_init(struct vmctx *const ctx);
diff --git a/usr.sbin/bhyve/e820.c b/usr.sbin/bhyve/e820.c
new file mode 100644
--- /dev/null
+++ b/usr.sbin/bhyve/e820.c
@@ -0,0 +1,452 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2021 Beckhoff Automation GmbH & Co. KG
+ * Author: Corvin Köhne <c.koehne@beckhoff.com>
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/queue.h>
+
+#include <machine/vmm.h>
+
+#include <assert.h>
+#include <err.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "e820.h"
+#include "qemu_fwcfg.h"
+
+/*
+ * E820 always uses 64 bit entries. Emulation code will use vm_paddr_t since it
+ * works on physical addresses. If vm_paddr_t is larger than uint64_t E820 can't
+ * hold all possible physical addresses and we can get into trouble.
+ */
+static_assert(sizeof(vm_paddr_t) <= sizeof(uint64_t),
+ "Unable to represent physical memory by E820 table");
+
+#define E820_FWCFG_FILE_NAME "etc/e820"
+
+#define KB (1024UL)
+#define MB (1024 * KB)
+#define GB (1024 * MB)
+
+/*
+ * Fix E820 memory holes:
+ * [ A0000, C0000) VGA
+ * [ C0000, 100000) ROM
+ */
+#define E820_VGA_MEM_BASE 0xA0000
+#define E820_VGA_MEM_END 0xC0000
+#define E820_ROM_MEM_BASE 0xC0000
+#define E820_ROM_MEM_END 0x100000
+
+struct e820_element {
+ TAILQ_ENTRY(e820_element) chain;
+ uint64_t base;
+ uint64_t end;
+ enum e820_memory_type type;
+};
+TAILQ_HEAD(e820_table, e820_element) e820_table = TAILQ_HEAD_INITIALIZER(
+ e820_table);
+
+static const char *
+e820_get_type_name(const enum e820_memory_type type)
+{
+ switch (type) {
+ case E820_TYPE_MEMORY:
+ return "RAM ";
+ case E820_TYPE_RESERVED:
+ return "Reserved";
+ case E820_TYPE_ACPI:
+ return "ACPI ";
+ case E820_TYPE_NVS:
+ return "NVS ";
+ default:
+ return "Unknown ";
+ }
+}
+
+void
+e820_dump_table()
+{
+ fprintf(stderr, "E820 map:\n\r");
+ uint64_t i = 0;
+ struct e820_element *element;
+ TAILQ_FOREACH (element, &e820_table, chain) {
+ fprintf(stderr, " (%4lu) [ %16lx, %16lx] %s\n\r", i,
+ element->base, element->end,
+ e820_get_type_name(element->type));
+ ++i;
+ }
+}
+
+struct qemu_fwcfg_item *
+e820_get_fwcfg_item()
+{
+ uint64_t count = 0;
+ struct e820_element *element;
+ TAILQ_FOREACH (element, &e820_table, chain) {
+ ++count;
+ }
+ if (count == 0) {
+ warnx("%s: E820 table empty", __func__);
+ return (NULL);
+ }
+
+ struct qemu_fwcfg_item *const fwcfg_item = malloc(
+ sizeof(struct qemu_fwcfg_item));
+ if (fwcfg_item == NULL) {
+ return (NULL);
+ }
+ fwcfg_item->size = count * sizeof(struct e820_entry);
+ fwcfg_item->data = malloc(fwcfg_item->size);
+ if (fwcfg_item->data == NULL) {
+ free(fwcfg_item);
+ return (NULL);
+ }
+ uint64_t i = 0;
+ struct e820_entry *entries = (struct e820_entry *)fwcfg_item->data;
+ TAILQ_FOREACH (element, &e820_table, chain) {
+ struct e820_entry *entry = &entries[i];
+ entry->base = element->base;
+ entry->length = element->end - element->base;
+ entry->type = element->type;
+ ++i;
+ }
+
+ return fwcfg_item;
+}
+
+int
+e820_add_entry(const uint64_t base, const uint64_t end,
+ const enum e820_memory_type type)
+{
+ if (end < base) {
+ return (-1);
+ }
+
+ struct e820_element *const new_element = malloc(
+ sizeof(struct e820_element));
+ if (new_element == NULL) {
+ return (-ENOMEM);
+ }
+
+ new_element->base = base;
+ new_element->end = end;
+ new_element->type = type;
+
+ /*
+ * E820 table should be always sorted in ascending order. Therefore,
+ * search for an element which end is larger than the base parameter.
+ */
+ struct e820_element *element;
+ TAILQ_FOREACH (element, &e820_table, chain) {
+ if (element->end > base) {
+ break;
+ }
+ }
+
+ /*
+ * System memory requires special handling.
+ */
+ if (type == E820_TYPE_MEMORY) {
+ /*
+ * base is larger than of any existing element. Add new system
+ * memory at the end of the table.
+ */
+ if (element == NULL) {
+ TAILQ_INSERT_TAIL(&e820_table, new_element, chain);
+ return (0);
+ }
+
+ /*
+ * System memory shouldn't overlap with any existing element.
+ */
+ if (end > element->base) {
+ return (-1);
+ }
+ TAILQ_INSERT_BEFORE(element, new_element, chain);
+ return (0);
+ }
+
+ if (element == NULL) {
+ /* No suitable element found */
+ return (-1);
+ }
+
+ /*
+ * Non system memory should be allocated inside system memory.
+ */
+ if (element->type != E820_TYPE_MEMORY) {
+ return (-1);
+ }
+ /*
+ * New element should fit into existing system memory element.
+ */
+ if (base < element->base || end > element->end) {
+ return (-1);
+ }
+
+ if (base == element->base) {
+ /*
+ * New element at system memory base boundary. Add new
+ * element before current and adjust the base of the old
+ * element.
+ *
+ * Old table:
+ * [ 0x1000, 0x4000] RAM <-- element
+ * New table:
+ * [ 0x1000, 0x2000] Reserved
+ * [ 0x2000, 0x4000] RAM <-- element
+ */
+ TAILQ_INSERT_BEFORE(element, new_element, chain);
+ element->base = end;
+ } else if (end == element->end) {
+ /*
+ * New element at system memory end boundary. Add new
+ * element after current and adjust the end of the
+ * current element.
+ *
+ * Old table:
+ * [ 0x1000, 0x4000] RAM <-- element
+ * New table:
+ * [ 0x1000, 0x3000] RAM <-- element
+ * [ 0x3000, 0x4000] Reserved
+ */
+ TAILQ_INSERT_AFTER(&e820_table, element, new_element, chain);
+ element->end = base;
+ } else {
+ /*
+ * New element inside system memory entry. Split it by
+ * adding a system memory element and the new element
+ * before current.
+ *
+ * Old table:
+ * [ 0x1000, 0x4000] RAM <-- element
+ * New table:
+ * [ 0x1000, 0x2000] RAM
+ * [ 0x2000, 0x3000] Reserved
+ * [ 0x3000, 0x4000] RAM <-- element
+ */
+ struct e820_element *ram_element = malloc(
+ sizeof(struct e820_element));
+ if (ram_element == NULL) {
+ return (-ENOMEM);
+ }
+ ram_element->base = element->base;
+ ram_element->end = base;
+ ram_element->type = E820_TYPE_MEMORY;
+ TAILQ_INSERT_BEFORE(element, ram_element, chain);
+ TAILQ_INSERT_BEFORE(element, new_element, chain);
+ element->base = end;
+ }
+
+ return (0);
+}
+
+static int
+e820_add_memory_hole(const uint64_t base, const uint64_t end)
+{
+ if (end < base) {
+ return (-1);
+ }
+
+ /*
+ * E820 table should be always sorted in ascending order. Therefore,
+ * search for an element which end is larger than the base parameter.
+ */
+ struct e820_element *element;
+ TAILQ_FOREACH (element, &e820_table, chain) {
+ if (element->end > base) {
+ break;
+ }
+ }
+
+ if (element == NULL || end <= element->base) {
+ /* Nothing to do. Hole already exists */
+ return (0);
+ }
+
+ if (element->type != E820_TYPE_MEMORY) {
+ /* Memory holes are only allowed in system memory */
+ return (-1);
+ }
+
+ if (base == element->base) {
+ /*
+ * New hole at system memory base boundary.
+ *
+ * Old table:
+ * [ 0x1000, 0x4000] RAM
+ * New table:
+ * [ 0x2000, 0x4000] RAM
+ */
+ element->base = end;
+
+ } else if (end == element->end) {
+ /*
+ * New hole at system memory end boundary.
+ *
+ * Old table:
+ * [ 0x1000, 0x4000] RAM
+ * New table:
+ * [ 0x1000, 0x3000] RAM
+ */
+ element->end = base;
+
+ } else {
+ /*
+ * New hole inside system memory entry. Split the system memory.
+ *
+ * Old table:
+ * [ 0x1000, 0x4000] RAM <-- element
+ * New table:
+ * [ 0x1000, 0x2000] RAM
+ * [ 0x3000, 0x4000] RAM <-- element
+ */
+ struct e820_element *const ram_element = malloc(
+ sizeof(struct e820_element));
+ if (ram_element == NULL) {
+ return (-ENOMEM);
+ }
+ ram_element->base = element->base;
+ ram_element->end = base;
+ ram_element->type = E820_TYPE_MEMORY;
+ TAILQ_INSERT_BEFORE(element, ram_element, chain);
+ element->base = end;
+ }
+
+ return (0);
+}
+
+static uint64_t
+e820_alloc_highest(const uint64_t max_address, const uint64_t length,
+ const uint64_t alignment, const enum e820_memory_type type)
+{
+ struct e820_element *element;
+ TAILQ_FOREACH_REVERSE (element, &e820_table, e820_table, chain) {
+ const uint64_t end = MIN(max_address, element->end);
+ const uint64_t base = roundup2(element->base, alignment);
+
+ if (element->type != E820_TYPE_MEMORY || end < base ||
+ end - base < length || end - length == 0) {
+ continue;
+ }
+
+ const uint64_t address = rounddown2(end - length, alignment);
+
+ if (e820_add_entry(address, address + length, type) != 0) {
+ return 0;
+ }
+
+ return address;
+ }
+
+ return 0;
+}
+
+static uint64_t
+e820_alloc_lowest(const uint64_t min_address, const uint64_t length,
+ const uint64_t alignment, const enum e820_memory_type type)
+{
+ struct e820_element *element;
+ TAILQ_FOREACH (element, &e820_table, chain) {
+ const uint64_t end = element->end;
+ const uint64_t base = MAX(min_address,
+ roundup2(element->base, alignment));
+
+ if (element->type != E820_TYPE_MEMORY || end < base ||
+ end - base < length || base == 0) {
+ continue;
+ }
+
+ if (e820_add_entry(base, base + length, type) != 0) {
+ return 0;
+ }
+
+ return base;
+ }
+
+ return 0;
+}
+
+uint64_t
+e820_alloc(const uint64_t address, const uint64_t length,
+ const uint64_t alignment, const enum e820_memory_type type,
+ const enum e820_allocation_strategy strategy)
+{
+ /* address should be aligned */
+ if (!powerof2(alignment) || (address & (alignment - 1)) != 0) {
+ return 0;
+ }
+
+ switch (strategy) {
+ case E820_ALLOCATE_ANY:
+ /*
+ * Allocate any address. Therefore, ignore the address parameter
+ * and reuse the code path for allocating the lowest address.
+ */
+ return e820_alloc_lowest(0, length, alignment, type);
+ case E820_ALLOCATE_LOWEST:
+ return e820_alloc_lowest(address, length, alignment, type);
+ case E820_ALLOCATE_HIGHEST:
+ return e820_alloc_highest(address, length, alignment, type);
+ case E820_ALLOCATE_SPECIFIC:
+ if (e820_add_entry(address, address + length, type) != 0) {
+ return 0;
+ }
+
+ return address;
+ }
+
+ return 0;
+}
+
+int
+e820_init(struct vmctx *const ctx)
+{
+ int error;
+
+ TAILQ_INIT(&e820_table);
+
+ /* add memory below 4 GB to E820 table */
+ const uint64_t lowmem_length = vm_get_lowmem_size(ctx);
+ error = e820_add_entry(0, lowmem_length, E820_TYPE_MEMORY);
+ if (error) {
+ warnx("%s: Could not add lowmem", __func__);
+ return (error);
+ }
+
+ /* add memory above 4 GB to E820 table */
+ const uint64_t highmem_length = vm_get_highmem_size(ctx);
+ if (highmem_length != 0) {
+ error = e820_add_entry(4 * GB, 4 * GB + highmem_length,
+ E820_TYPE_MEMORY);
+ if (error) {
+ warnx("%s: Could not add highmem", __func__);
+ return (error);
+ }
+ }
+
+ /* add memory holes to E820 table */
+ error = e820_add_memory_hole(E820_VGA_MEM_BASE, E820_VGA_MEM_END);
+ if (error) {
+ warnx("%s: Could not add VGA memory", __func__);
+ return (error);
+ }
+
+ error = e820_add_memory_hole(E820_ROM_MEM_BASE, E820_ROM_MEM_END);
+ if (error) {
+ warnx("%s: Could not add ROM area", __func__);
+ return (error);
+ }
+
+ return (0);
+}
diff --git a/usr.sbin/bhyve/fwctl.h b/usr.sbin/bhyve/fwctl.h
--- a/usr.sbin/bhyve/fwctl.h
+++ b/usr.sbin/bhyve/fwctl.h
@@ -51,6 +51,6 @@
}; \
DATA_SET(ctl_set, __CONCAT(__ctl, __LINE__))
-void fwctl_init(void);
+int fwctl_init(void);
#endif /* _FWCTL_H_ */
diff --git a/usr.sbin/bhyve/fwctl.c b/usr.sbin/bhyve/fwctl.c
--- a/usr.sbin/bhyve/fwctl.c
+++ b/usr.sbin/bhyve/fwctl.c
@@ -472,16 +472,9 @@
static void
fwctl_outw(uint16_t val)
{
- switch (be_state) {
- case IDENT_WAIT:
- if (val == 0) {
- be_state = IDENT_SEND;
- ident_idx = 0;
- }
- break;
- default:
- /* ignore */
- break;
+ if (val == 0) {
+ be_state = IDENT_SEND;
+ ident_idx = 0;
}
}
@@ -538,15 +531,39 @@
return (0);
}
-INOUT_PORT(fwctl_wreg, FWCTL_OUT, IOPORT_F_INOUT, fwctl_handler);
-INOUT_PORT(fwctl_rreg, FWCTL_IN, IOPORT_F_IN, fwctl_handler);
-void
+int
fwctl_init(void)
{
+ struct inout_port iop;
+ int error;
+
+ bzero(&iop, sizeof(iop));
+ iop.name = "fwctl_wreg";
+ iop.port = FWCTL_OUT;
+ iop.size = 1;
+ iop.flags = IOPORT_F_INOUT;
+ iop.handler = fwctl_handler;
+
+ if ((error = register_inout(&iop)) != 0) {
+ return (error);
+ }
+
+ bzero(&iop, sizeof(iop));
+ iop.name = "fwctl_rreg";
+ iop.port = FWCTL_IN;
+ iop.size = 1;
+ iop.flags = IOPORT_F_IN;
+ iop.handler = fwctl_handler;
+
+ if ((error = register_inout(&iop)) != 0) {
+ return (error);
+ }
ops[OP_GET_LEN] = &fgetlen_info;
ops[OP_GET] = &fgetval_info;
be_state = IDENT_WAIT;
+
+ return (0);
}
diff --git a/usr.sbin/bhyve/pci_emul.h b/usr.sbin/bhyve/pci_emul.h
--- a/usr.sbin/bhyve/pci_emul.h
+++ b/usr.sbin/bhyve/pci_emul.h
@@ -42,6 +42,8 @@
#include <assert.h>
#define PCI_BARMAX PCIR_MAX_BAR_0 /* BAR registers in a Type 0 header */
+#define PCI_BARMAX_WITH_ROM (PCI_BARMAX + 1)
+#define PCI_ROM_IDX (PCI_BARMAX + 1)
struct vmctx;
struct pci_devinst;
@@ -92,7 +94,8 @@
PCIBAR_IO,
PCIBAR_MEM32,
PCIBAR_MEM64,
- PCIBAR_MEMHI64
+ PCIBAR_MEMHI64,
+ PCIBAR_ROM,
};
struct pcibar {
@@ -165,7 +168,9 @@
void *pi_arg; /* devemu-private data */
u_char pi_cfgdata[PCI_REGMAX + 1];
- struct pcibar pi_bar[PCI_BARMAX + 1];
+ /* ROM is handled like a BAR */
+ struct pcibar pi_bar[PCI_BARMAX_WITH_ROM + 1];
+ uint64_t pi_romoffset;
};
struct msicap {
@@ -229,6 +234,8 @@
void pci_callback(void);
int pci_emul_alloc_bar(struct pci_devinst *pdi, int idx,
enum pcibar_type type, uint64_t size);
+int pci_emul_alloc_rom(struct pci_devinst *const pdi, const uint64_t size,
+ void **const addr);
int pci_emul_add_msicap(struct pci_devinst *pi, int msgnum);
int pci_emul_add_pciecap(struct pci_devinst *pi, int pcie_device_type);
void pci_emul_capwrite(struct pci_devinst *pi, int offset, int bytes,
diff --git a/usr.sbin/bhyve/pci_emul.c b/usr.sbin/bhyve/pci_emul.c
--- a/usr.sbin/bhyve/pci_emul.c
+++ b/usr.sbin/bhyve/pci_emul.c
@@ -33,10 +33,12 @@
#include <sys/param.h>
#include <sys/linker_set.h>
+#include <sys/mman.h>
#include <ctype.h>
#include <err.h>
#include <errno.h>
+#include <err.h>
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
@@ -101,13 +103,28 @@
SET_DECLARE(pci_devemu_set, struct pci_devemu);
static uint64_t pci_emul_iobase;
+static uint8_t *pci_emul_rombase;
+static uint64_t pci_emul_romoffset;
+static uint8_t *pci_emul_romlim;
static uint64_t pci_emul_membase32;
static uint64_t pci_emul_membase64;
static uint64_t pci_emul_memlim64;
+struct pci_bar_allocation {
+ TAILQ_ENTRY(pci_bar_allocation) chain;
+ struct pci_devinst *pdi;
+ int idx;
+ enum pcibar_type type;
+ uint64_t size;
+};
+TAILQ_HEAD(pci_bar_list, pci_bar_allocation) pci_bars = TAILQ_HEAD_INITIALIZER(
+ pci_bars);
+
#define PCI_EMUL_IOBASE 0x2000
#define PCI_EMUL_IOLIMIT 0x10000
+#define PCI_EMUL_ROMSIZE 0x10000000
+
#define PCI_EMUL_ECFG_BASE 0xE0000000 /* 3.5GB */
#define PCI_EMUL_ECFG_SIZE (MAXBUSES * 1024 * 1024) /* 1MB per bus */
SYSRES_MEM(PCI_EMUL_ECFG_BASE, PCI_EMUL_ECFG_SIZE);
@@ -552,6 +569,12 @@
(*pe->pe_baraddr)(pi->pi_vmctx, pi, idx, registration,
pi->pi_bar[idx].addr);
break;
+ case PCIBAR_ROM:
+ error = 0;
+ if (pe->pe_baraddr != NULL)
+ (*pe->pe_baraddr)(pi->pi_vmctx, pi, idx, registration,
+ pi->pi_bar[idx].addr);
+ break;
default:
error = EINVAL;
break;
@@ -573,6 +596,14 @@
modify_bar_registration(pi, idx, 1);
}
+/* Is the ROM enabled for the emulated pci device? */
+static int
+romen(struct pci_devinst *pi)
+{
+ return (pi->pi_bar[PCI_ROM_IDX].lobits & PCIM_BIOS_ENABLE) ==
+ PCIM_BIOS_ENABLE;
+}
+
/* Are we decoding i/o port accesses for the emulated pci device? */
static int
porten(struct pci_devinst *pi)
@@ -639,11 +670,11 @@
pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, enum pcibar_type type,
uint64_t size)
{
- int error;
- uint64_t *baseptr, limit, addr, mask, lobits, bar;
- uint16_t cmd, enbit;
-
- assert(idx >= 0 && idx <= PCI_BARMAX);
+ if ((type != PCIBAR_ROM) && (idx < 0 || idx > PCI_BARMAX)) {
+ errx(4, "Illegal BAR idx");
+ } else if ((type == PCIBAR_ROM) && (idx != PCI_ROM_IDX)) {
+ errx(4, "Illegal ROM idx");
+ }
if ((size & (size - 1)) != 0)
size = 1UL << flsl(size); /* round up to a power of 2 */
@@ -652,22 +683,94 @@
if (type == PCIBAR_IO) {
if (size < 4)
size = 4;
+ } else if (type == PCIBAR_ROM) {
+ if (size < ~PCIM_BIOS_ADDR_MASK + 1)
+ size = ~PCIM_BIOS_ADDR_MASK + 1;
} else {
if (size < 16)
size = 16;
}
+ /*
+ * To reduce fragmentation of the MMIO space, we allocate the BARs by
+ * size. Therefore, don't allocate the BAR yet. We create a list of all
+ * BAR allocation which is sorted by BAR size. When all PCI devices are
+ * initialized, we will assign an address to the BARs.
+ */
+
+ /* create a new list entry */
+ struct pci_bar_allocation *const new_bar = malloc(
+ sizeof(struct pci_bar_allocation));
+ memset(new_bar, 0, sizeof(struct pci_bar_allocation));
+ new_bar->pdi = pdi;
+ new_bar->idx = idx;
+ new_bar->type = type;
+ new_bar->size = size;
+
+ /*
+ * Search for a BAR which size is lower than the size of our newly
+ * allocated BAR.
+ */
+ struct pci_bar_allocation *bar = NULL;
+ TAILQ_FOREACH(bar, &pci_bars, chain) {
+ if (bar->size < size) {
+ break;
+ }
+ }
+
+ if (bar == NULL) {
+ /*
+ * Either the list is empty or new BAR is the smallest BAR of
+ * the list. Append it to the end of our list.
+ */
+ TAILQ_INSERT_TAIL(&pci_bars, new_bar, chain);
+ } else {
+ /*
+ * The found BAR is smaller than our new BAR. For that reason,
+ * insert our new BAR before the found BAR.
+ */
+ TAILQ_INSERT_BEFORE(bar, new_bar, chain);
+ }
+
+ /* update cmd reg */
+ uint16_t enbit = 0;
+ switch (type) {
+ case PCIBAR_IO:
+ enbit = PCIM_CMD_PORTEN;
+ break;
+ case PCIBAR_MEM64:
+ case PCIBAR_MEM32:
+ case PCIBAR_ROM:
+ enbit = PCIM_CMD_MEMEN;
+ break;
+ default:
+ enbit = 0;
+ break;
+ }
+
+ const uint16_t cmd = pci_get_cfgdata16(pdi, PCIR_COMMAND);
+ pci_set_cfgdata16(pdi, PCIR_COMMAND, cmd | enbit);
+
+ return (0);
+}
+
+static int
+pci_emul_assign_bar(struct pci_devinst *const pdi, const int idx, const enum pcibar_type type,
+ const uint64_t size)
+{
+ int error;
+ uint64_t *baseptr, limit, addr, mask, lobits, bar;
+
switch (type) {
case PCIBAR_NONE:
baseptr = NULL;
- addr = mask = lobits = enbit = 0;
+ addr = mask = lobits = 0;
break;
case PCIBAR_IO:
baseptr = &pci_emul_iobase;
limit = PCI_EMUL_IOLIMIT;
mask = PCIM_BAR_IO_BASE;
lobits = PCIM_BAR_IO_SPACE;
- enbit = PCIM_CMD_PORTEN;
break;
case PCIBAR_MEM64:
/*
@@ -689,14 +792,19 @@
mask = PCIM_BAR_MEM_BASE;
lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64;
}
- enbit = PCIM_CMD_MEMEN;
break;
case PCIBAR_MEM32:
baseptr = &pci_emul_membase32;
limit = PCI_EMUL_MEMLIMIT32;
mask = PCIM_BAR_MEM_BASE;
lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32;
- enbit = PCIM_CMD_MEMEN;
+ break;
+ case PCIBAR_ROM:
+ /* do not claim memory for ROM. OVMF will do it for us. */
+ baseptr = NULL;
+ limit = 0;
+ mask = PCIM_BIOS_ADDR_MASK;
+ lobits = 0;
break;
default:
printf("pci_emul_alloc_base: invalid bar type %d\n", type);
@@ -732,10 +840,57 @@
pci_set_cfgdata32(pdi, PCIR_BAR(idx + 1), bar >> 32);
}
- cmd = pci_get_cfgdata16(pdi, PCIR_COMMAND);
- if ((cmd & enbit) != enbit)
- pci_set_cfgdata16(pdi, PCIR_COMMAND, cmd | enbit);
- register_bar(pdi, idx);
+ if (type != PCIBAR_ROM) {
+ register_bar(pdi, idx);
+ }
+
+ return (0);
+}
+
+int
+pci_emul_alloc_rom(struct pci_devinst *const pdi, const uint64_t size,
+ void **const addr)
+{
+ /* allocate ROM space once on first call */
+ if (pci_emul_rombase == 0) {
+ pci_emul_rombase = vm_create_devmem(pdi->pi_vmctx, VM_PCIROM,
+ "pcirom", PCI_EMUL_ROMSIZE);
+ if (pci_emul_rombase == MAP_FAILED) {
+ warnx("%s: failed to create rom segment", __func__);
+ return (-1);
+ }
+ pci_emul_romlim = pci_emul_rombase + PCI_EMUL_ROMSIZE;
+ pci_emul_romoffset = 0;
+ }
+
+ /* ROM size should be a power of 2 and greater than 2 KB */
+ const uint64_t rom_size = MAX(1UL << flsl(size),
+ ~PCIM_BIOS_ADDR_MASK + 1);
+
+ /* check if ROM fits into ROM space */
+ if (pci_emul_romoffset + rom_size > PCI_EMUL_ROMSIZE) {
+ warnx("%s: no space left in rom segment:", __func__);
+ warnx("%16lu bytes left",
+ PCI_EMUL_ROMSIZE - pci_emul_romoffset);
+ warnx("%16lu bytes required by %d/%d/%d", rom_size, pdi->pi_bus,
+ pdi->pi_slot, pdi->pi_func);
+ return (-1);
+ }
+
+ /* allocate ROM BAR */
+ const int error = pci_emul_alloc_bar(pdi, PCI_ROM_IDX, PCIBAR_ROM,
+ rom_size);
+ if (error)
+ return error;
+
+ /* return address */
+ *addr = pci_emul_rombase + pci_emul_romoffset;
+
+ /* save offset into ROM Space */
+ pdi->pi_romoffset = pci_emul_romoffset;
+
+ /* increase offset for next ROM */
+ pci_emul_romoffset += rom_size;
return (0);
}
@@ -1146,7 +1301,8 @@
}
#define BUSIO_ROUNDUP 32
-#define BUSMEM_ROUNDUP (1024 * 1024)
+#define BUSMEM32_ROUNDUP (1024 * 1024)
+#define BUSMEM64_ROUNDUP (512 * 1024 * 1024)
int
init_pci(struct vmctx *ctx)
@@ -1189,6 +1345,7 @@
bi->membase32 = pci_emul_membase32;
bi->membase64 = pci_emul_membase64;
+ /* first run: init devices */
for (slot = 0; slot < MAXSLOTS; slot++) {
si = &bi->slotinfo[slot];
for (func = 0; func < MAXFUNCS; func++) {
@@ -1228,6 +1385,16 @@
}
}
+ /* second run: assign BARs and free list */
+ struct pci_bar_allocation *bar;
+ struct pci_bar_allocation *bar_tmp;
+ TAILQ_FOREACH_SAFE(bar, &pci_bars, chain, bar_tmp) {
+ pci_emul_assign_bar(bar->pdi, bar->idx, bar->type,
+ bar->size);
+ free(bar);
+ }
+ TAILQ_INIT(&pci_bars);
+
/*
* Add some slop to the I/O and memory resources decoded by
* this bus to give a guest some flexibility if it wants to
@@ -1237,14 +1404,14 @@
pci_emul_iobase = roundup2(pci_emul_iobase, BUSIO_ROUNDUP);
bi->iolimit = pci_emul_iobase;
- pci_emul_membase32 += BUSMEM_ROUNDUP;
+ pci_emul_membase32 += BUSMEM32_ROUNDUP;
pci_emul_membase32 = roundup2(pci_emul_membase32,
- BUSMEM_ROUNDUP);
+ BUSMEM32_ROUNDUP);
bi->memlimit32 = pci_emul_membase32;
- pci_emul_membase64 += BUSMEM_ROUNDUP;
+ pci_emul_membase64 += BUSMEM64_ROUNDUP;
pci_emul_membase64 = roundup2(pci_emul_membase64,
- BUSMEM_ROUNDUP);
+ BUSMEM64_ROUNDUP);
bi->memlimit64 = pci_emul_membase64;
}
@@ -1801,7 +1968,7 @@
* If the MMIO or I/O address space decoding has changed then
* register/unregister all BARs that decode that address space.
*/
- for (i = 0; i <= PCI_BARMAX; i++) {
+ for (i = 0; i <= PCI_BARMAX_WITH_ROM; i++) {
switch (pi->pi_bar[i].type) {
case PCIBAR_NONE:
case PCIBAR_MEMHI64:
@@ -1815,6 +1982,11 @@
unregister_bar(pi, i);
}
break;
+ case PCIBAR_ROM:
+ /* skip (un-)register of ROM if it disabled */
+ if (!romen(pi))
+ break;
+ /* fallthrough */
case PCIBAR_MEM32:
case PCIBAR_MEM64:
/* MMIO address space decoding changed? */
@@ -1935,16 +2107,21 @@
return;
/*
- * Special handling for write to BAR registers
+ * Special handling for write to BAR and ROM registers
*/
- if (coff >= PCIR_BAR(0) && coff < PCIR_BAR(PCI_BARMAX + 1)) {
+ if ((coff >= PCIR_BAR(0) && coff < PCIR_BAR(PCI_BARMAX + 1)) ||
+ (coff >= PCIR_BIOS && coff < PCIR_BIOS + 4)) {
/*
* Ignore writes to BAR registers that are not
* 4-byte aligned.
*/
if (bytes != 4 || (coff & 0x3) != 0)
return;
- idx = (coff - PCIR_BAR(0)) / 4;
+ if (coff != PCIR_BIOS) {
+ idx = (coff - PCIR_BAR(0)) / 4;
+ } else {
+ idx = PCI_ROM_IDX;
+ }
mask = ~(pi->pi_bar[idx].size - 1);
switch (pi->pi_bar[idx].type) {
case PCIBAR_NONE:
@@ -1987,6 +2164,20 @@
PCIBAR_MEMHI64);
}
break;
+ case PCIBAR_ROM:
+ addr = bar = *eax & mask;
+ if (memen(pi) && romen(pi)) {
+ unregister_bar(pi, idx);
+ }
+ pi->pi_bar[idx].addr = addr;
+ pi->pi_bar[idx].lobits = *eax &
+ PCIM_BIOS_ENABLE;
+ /* romen could have changed it value */
+ if (memen(pi) && romen(pi)) {
+ register_bar(pi, idx);
+ }
+ bar |= pi->pi_bar[idx].lobits;
+ break;
default:
assert(0);
}
diff --git a/usr.sbin/bhyve/pci_gvt-d.c b/usr.sbin/bhyve/pci_gvt-d.c
new file mode 100644
--- /dev/null
+++ b/usr.sbin/bhyve/pci_gvt-d.c
@@ -0,0 +1,262 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2020 Beckhoff Automation GmbH & Co. KG
+ * Author: Corvin Köhne <c.koehne@beckhoff.com>
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/mman.h>
+
+#include <machine/vmm.h>
+
+#include <dev/pci/pcireg.h>
+
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sysexits.h>
+#include <unistd.h>
+
+#include "e820.h"
+#include "inout.h"
+#include "pci_passthru.h"
+
+#define MB (1024 * 1024UL)
+#define GB (1024 * MB)
+
+#ifndef _PATH_MEM
+#define _PATH_MEM "/dev/mem"
+#endif
+
+/*
+ * PCI definitions
+ */
+#define PCIM_BDSM_GSM_ALIGNMENT \
+ 0x00100000 /* Graphics Stolen Memory is 1 MB aligned */
+
+/* GVT-d definitions */
+#define GVT_D_MAP_OPREGION 0
+#define GVT_D_MAP_GSM 1
+
+static int
+gvt_d_aslswrite(struct vmctx *const ctx, const int vcpu,
+ struct pci_devinst *const pi, const int coff, const int bytes,
+ const uint32_t val)
+{
+ struct passthru_softc *const sc = pi->pi_arg;
+
+ struct passthru_mmio_mapping *const opregion =
+ &sc->psc_mmio_map[GVT_D_MAP_OPREGION];
+
+ /* write new value to cfg space */
+ if (bytes == 1) {
+ pci_set_cfgdata8(pi, coff, val);
+ } else if (bytes == 2) {
+ pci_set_cfgdata16(pi, coff, val);
+ } else {
+ pci_set_cfgdata32(pi, coff, val);
+ }
+
+ /* get new address of opregion */
+ opregion->gpa = pci_get_cfgdata32(pi, PCIR_ASLS_CTL);
+
+ /* copy opregion into guest mem */
+ opregion->gva = vm_map_gpa(ctx, opregion->gpa, opregion->len);
+ if (opregion->gva == 0) {
+ warnx("%s: Unable to map opregion (0x%016lx)", __func__,
+ opregion->gpa);
+ /* return 0 to avoid emulation of ASLS register */
+ return (0);
+ }
+ memcpy(opregion->gva, opregion->hva, opregion->len);
+
+ return (0);
+}
+
+static vm_paddr_t
+gvt_d_alloc_mmio_memory(const vm_paddr_t host_address, const vm_paddr_t length,
+ const vm_paddr_t alignment, const enum e820_memory_type type)
+{
+ /* try to use host address */
+ const vm_paddr_t address = e820_alloc(host_address, length,
+ E820_ALIGNMENT_NONE, type, E820_ALLOCATE_SPECIFIC);
+ if (address != 0) {
+ return address;
+ }
+
+ /* try to use highest address below 4 GB */
+ return e820_alloc(4 * GB, length, alignment, type,
+ E820_ALLOCATE_HIGHEST);
+}
+
+static int
+gvt_d_setup_gsm(struct vmctx *const ctx, struct pci_devinst *const pi)
+{
+ struct passthru_softc *const sc = pi->pi_arg;
+
+ struct passthru_mmio_mapping *const gsm =
+ &sc->psc_mmio_map[GVT_D_MAP_GSM];
+
+ const int error = vm_get_memory_region_info(ctx, &gsm->hpa, &gsm->len,
+ MEMORY_REGION_INTEL_GSM);
+ if (error) {
+ warnx(
+ "%s: Unable to get Graphics Stolen Memory base and length",
+ __func__);
+ return (error);
+ }
+ gsm->hva = NULL; /* unused */
+ gsm->gva = NULL; /* unused */
+ gsm->gpa = gvt_d_alloc_mmio_memory(gsm->hpa, gsm->len,
+ PCIM_BDSM_GSM_ALIGNMENT, E820_TYPE_RESERVED);
+ if (gsm->gpa == 0) {
+ warnx(
+ "%s: Unable to add Graphics Stolen Memory to E820 table (hpa 0x%lx len 0x%lx)",
+ __func__, gsm->hpa, gsm->len);
+ e820_dump_table();
+ return (-1);
+ }
+ if (gsm->gpa != gsm->hpa) {
+ /*
+ * ACRN source code implies that graphics driver for newer Intel
+ * platforms like Tiger Lake will read the Graphics Stolen
+ * Memory address from an MMIO register. We have three options
+ * to solve this issue:
+ * 1. Patch the value in the MMIO register
+ * This could have unintended side effects. Without
+ * any documentation how this register is used by
+ * the GPU, don't do it.
+ * 2. Trap the MMIO register
+ * It's not possible to trap a single MMIO
+ * register. We need to trap a whole page. Trapping
+ * a bunch of MMIO register could degrade the
+ * performance noticeably.
+ * 3. Use an 1:1 host to guest mapping
+ * Maybe not always possible.
+ * As far as we know, no supported platform requires a 1:1
+ * mapping. For that reason, just log a warning.
+ */
+ warnx(
+ "Warning: Unable to reuse host address of Graphics Stolen Memory. GPU passthrough might not work properly.");
+ }
+
+ const uint64_t bdsm = read_config(&sc->psc_sel, PCIR_BDSM, 4);
+ pci_set_cfgdata32(pi, PCIR_BDSM,
+ gsm->gpa | (bdsm & (PCIM_BDSM_GSM_ALIGNMENT - 1)));
+
+ return (0);
+}
+
+static int
+gvt_d_setup_opregion(struct vmctx *const ctx, struct pci_devinst *const pi,
+ const int memfd)
+{
+ struct passthru_softc *const sc = pi->pi_arg;
+
+ struct passthru_mmio_mapping *const opregion =
+ &sc->psc_mmio_map[GVT_D_MAP_OPREGION];
+
+ const int error = vm_get_memory_region_info(ctx, &opregion->hpa,
+ &opregion->len, MEMORY_REGION_INTEL_OPREGION);
+ if (error) {
+ warnx("%s: Unable to get OpRegion base and length", __func__);
+ return (error);
+ }
+ opregion->hva = mmap(NULL, opregion->len, PROT_READ, MAP_SHARED, memfd,
+ opregion->hpa);
+ if (opregion->hva == MAP_FAILED) {
+ warnx("%s: Unable to map host OpRegion", __func__);
+ return (-1);
+ }
+ opregion->gpa = gvt_d_alloc_mmio_memory(opregion->hpa, opregion->len,
+ E820_ALIGNMENT_NONE, E820_TYPE_NVS);
+ if (opregion->gpa == 0) {
+ warnx(
+ "%s: Unable to add OpRegion to E820 table (hpa 0x%lx len 0x%lx)",
+ __func__, opregion->hpa, opregion->len);
+ e820_dump_table();
+ return (-1);
+ }
+ opregion->gva = vm_map_gpa(ctx, opregion->gpa, opregion->len);
+ if (opregion->gva == NULL) {
+ warnx("%s: Unable to map guest OpRegion", __func__);
+ return (-1);
+ }
+ if (opregion->gpa != opregion->hpa) {
+ /*
+ * A 1:1 host to guest mapping is not required but this could
+ * change in the future.
+ */
+ warnx(
+ "Warning: Unable to reuse host address of OpRegion. GPU passthrough might not work properly.");
+ }
+
+ memcpy(opregion->gva, opregion->hva, opregion->len);
+
+ pci_set_cfgdata32(pi, PCIR_ASLS_CTL, opregion->gpa);
+
+ return (0);
+}
+
+int
+gvt_d_init(struct vmctx *const ctx, struct pci_devinst *const pi,
+ nvlist_t *const nvl)
+{
+ int error;
+
+ struct passthru_softc *const sc = pi->pi_arg;
+
+ /* get memory descriptor */
+ const int memfd = open(_PATH_MEM, O_RDWR, 0);
+ if (memfd < 0) {
+ warn("%s: Failed to open %s", __func__, _PATH_MEM);
+ return (-1);
+ }
+
+ if ((error = gvt_d_setup_gsm(ctx, pi)) != 0) {
+ warnx("%s: Unable to setup Graphics Stolen Memory", __func__);
+ goto done;
+ }
+
+ if ((error = gvt_d_setup_opregion(ctx, pi, memfd)) != 0) {
+ warnx("%s: Unable to setup OpRegion", __func__);
+ goto done;
+ }
+
+ /* protect Graphics Stolen Memory register */
+ if ((error = set_pcir_handler(sc, PCIR_BDSM, 4,
+ passthru_cfgread_emulate, passthru_cfgwrite_emulate)) != 0) {
+ warnx("%s: Unable to protect opregion", __func__);
+ goto done;
+ }
+ /* protect opregion register */
+ if ((error = set_pcir_handler(sc, PCIR_ASLS_CTL, 4,
+ passthru_cfgread_emulate, gvt_d_aslswrite)) != 0) {
+ warnx("%s: Unable to protect opregion", __func__);
+ goto done;
+ }
+
+done:
+ return (error);
+}
+
+void
+gvt_d_deinit(struct vmctx *const ctx, struct pci_devinst *const pi)
+{
+ struct passthru_softc *const sc = pi->pi_arg;
+
+ struct passthru_mmio_mapping *const opregion =
+ &sc->psc_mmio_map[GVT_D_MAP_OPREGION];
+
+ /* HVA is only set, if it's initialized */
+ if (opregion->hva)
+ munmap((void *)opregion->hva, opregion->len);
+}
diff --git a/usr.sbin/bhyve/pci_lpc.h b/usr.sbin/bhyve/pci_lpc.h
--- a/usr.sbin/bhyve/pci_lpc.h
+++ b/usr.sbin/bhyve/pci_lpc.h
@@ -72,5 +72,6 @@
char *lpc_pirq_name(int pin);
void lpc_pirq_routed(void);
const char *lpc_bootrom(void);
+const char *lpc_fwcfg(void);
#endif
diff --git a/usr.sbin/bhyve/pci_lpc.c b/usr.sbin/bhyve/pci_lpc.c
--- a/usr.sbin/bhyve/pci_lpc.c
+++ b/usr.sbin/bhyve/pci_lpc.c
@@ -32,13 +32,24 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#ifndef WITHOUT_CAPSICUM
+#include <sys/capsicum.h>
+#endif
#include <sys/types.h>
+#include <sys/pciio.h>
#include <machine/vmm.h>
#include <machine/vmm_snapshot.h>
+#ifndef WITHOUT_CAPSICUM
+#include <capsicum_helpers.h>
+#endif
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <sysexits.h>
#include <vmmapi.h>
@@ -85,6 +96,29 @@
"COM1", "COM2", "COM3", "COM4"
};
+#ifndef _PATH_DEVPCI
+#define _PATH_DEVPCI "/dev/pci"
+#endif
+
+static int pcifd = -1;
+
+static uint32_t
+read_config(const struct pcisel *const sel, const long reg, const int width)
+{
+ struct pci_io pi;
+ pi.pi_sel.pc_domain = sel->pc_domain;
+ pi.pi_sel.pc_bus = sel->pc_bus;
+ pi.pi_sel.pc_dev = sel->pc_dev;
+ pi.pi_sel.pc_func = sel->pc_func;
+ pi.pi_reg = reg;
+ pi.pi_width = width;
+
+ if (ioctl(pcifd, PCIOCREAD, &pi) < 0)
+ return (0);
+
+ return (pi.pi_data);
+}
+
/*
* LPC device configuration is in the following form:
* <lpc_device_name>[,<options>]
@@ -101,7 +135,13 @@
lpcdev = strsep(&str, ",");
if (lpcdev != NULL) {
if (strcasecmp(lpcdev, "bootrom") == 0) {
- set_config_value("lpc.bootrom", str);
+ nvlist_t *const nvl = create_config_node("lpc.bootrom");
+ /* use qemu as default fwcfg */
+ set_config_value_node(nvl, "fwcfg", "qemu");
+
+ const char *const code = strsep(&str, ",");
+ set_config_value_node(nvl, "code", code);
+ pci_parse_legacy_config(nvl, str);
error = 0;
goto done;
}
@@ -145,7 +185,13 @@
lpc_bootrom(void)
{
- return (get_config_value("lpc.bootrom"));
+ return (get_config_value("lpc.bootrom.code"));
+}
+
+const char *
+lpc_fwcfg(void)
+{
+ return (get_config_value("lpc.bootrom.fwcfg"));
}
static void
@@ -208,7 +254,7 @@
char *node_name;
int unit, error;
- romfile = get_config_value("lpc.bootrom");
+ romfile = get_config_value("lpc.bootrom.code");
if (romfile != NULL) {
error = bootrom_loadrom(ctx, romfile);
if (error)
@@ -452,6 +498,48 @@
pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_BRIDGE);
pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_BRIDGE_ISA);
+ pcifd = open(_PATH_DEVPCI, O_RDWR, 0);
+ if (pcifd < 0) {
+ warn("failed to open %s", _PATH_DEVPCI);
+ return (-1);
+ }
+
+#ifndef WITHOUT_CAPSICUM
+ cap_rights_t pcifd_rights;
+ cap_rights_init(&pcifd_rights, CAP_IOCTL, CAP_READ);
+
+ const cap_ioctl_t pcifd_ioctls[] = { PCIOCREAD };
+
+ if (caph_rights_limit(pcifd, &pcifd_rights) == -1)
+ errx(EX_OSERR, "Unable to apply rights for sandbox");
+ if (caph_ioctls_limit(pcifd, pcifd_ioctls, nitems(pcifd_ioctls)) == -1)
+ errx(EX_OSERR, "Unable to apply rights for sandbox");
+#endif
+
+ /* on Intel systems lpc is always connected to 0:1f.0 */
+ const struct pcisel sel = { .pc_dev = 0x1f };
+
+ if (read_config(&sel, PCIR_VENDOR, 2) == PCI_VENDOR_INTEL) {
+ /*
+ * The VID, DID, REVID, SUBVID and SUBDID of igd-lpc need to be
+ * aligned with the physical ones. Without these physical
+ * values, GVT-d GOP driver couldn't work.
+ */
+ pci_set_cfgdata16(pi, PCIR_DEVICE,
+ read_config(&sel, PCIR_DEVICE, 2));
+ pci_set_cfgdata16(pi, PCIR_VENDOR,
+ read_config(&sel, PCIR_VENDOR, 2));
+ pci_set_cfgdata8(pi, PCIR_REVID,
+ read_config(&sel, PCIR_REVID, 1));
+ pci_set_cfgdata16(pi, PCIR_SUBVEND_0,
+ read_config(&sel, PCIR_SUBVEND_0, 2));
+ pci_set_cfgdata16(pi, PCIR_SUBDEV_0,
+ read_config(&sel, PCIR_SUBDEV_0, 2));
+ }
+
+ close(pcifd);
+ pcifd = -1;
+
lpc_bridge = pi;
return (0);
diff --git a/usr.sbin/bhyve/pci_passthru.h b/usr.sbin/bhyve/pci_passthru.h
new file mode 100644
--- /dev/null
+++ b/usr.sbin/bhyve/pci_passthru.h
@@ -0,0 +1,69 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2020 Beckhoff Automation GmbH & Co. KG
+ * Author: Corvin Köhne <c.koehne@beckhoff.com>
+ */
+
+#pragma once
+
+#include <sys/pciio.h>
+
+#include <vmmapi.h>
+
+#include "pci_emul.h"
+
+struct passthru_mmio_mapping {
+ vm_paddr_t gpa; /* guest physical address */
+ void *gva; /* guest virtual address */
+ vm_paddr_t hpa; /* host physical address */
+ void *hva; /* guest virtual address */
+ vm_paddr_t len;
+};
+
+typedef int (*cfgread_handler)(struct vmctx *const ctx, const int vcpu,
+ struct pci_devinst *const pi, const int coff, const int bytes,
+ uint32_t *const rv);
+typedef int (*cfgwrite_handler)(struct vmctx *const ctx, const int vcpu,
+ struct pci_devinst *const pi, const int coff, const int bytes,
+ const uint32_t val);
+
+struct passthru_softc {
+ struct pci_devinst *psc_pi;
+ /* ROM is handled like a BAR */
+ struct pcibar psc_bar[PCI_BARMAX_WITH_ROM + 1];
+ struct {
+ int capoff;
+ int msgctrl;
+ int emulated;
+ } psc_msi;
+ struct {
+ int capoff;
+ } psc_msix;
+ struct pcisel psc_sel;
+
+ struct passthru_mmio_mapping psc_mmio_map[2];
+ cfgread_handler psc_pcir_rhandler[PCI_REGMAX + 1];
+ cfgwrite_handler psc_pcir_whandler[PCI_REGMAX + 1];
+};
+
+uint32_t read_config(const struct pcisel *sel, long reg, int width);
+void write_config(const struct pcisel *sel, long reg, int width, uint32_t data);
+int passthru_cfgread_default(struct vmctx *const ctx, const int vcpu,
+ struct pci_devinst *const pi, const int coff, const int bytes,
+ uint32_t *const rv);
+int passthru_cfgread_emulate(struct vmctx *const ctx, const int vcpu,
+ struct pci_devinst *const pi, const int coff, const int bytes,
+ uint32_t *const rv);
+int passthru_cfgwrite_default(struct vmctx *const ctx, const int vcpu,
+ struct pci_devinst *const pi, const int coff, const int bytes,
+ const uint32_t val);
+int passthru_cfgwrite_emulate(struct vmctx *const ctx, const int vcpu,
+ struct pci_devinst *const pi, const int coff, const int bytes,
+ const uint32_t val);
+int set_pcir_handler(struct passthru_softc *const sc, const uint32_t reg,
+ const uint32_t len, const cfgread_handler rhandler,
+ const cfgwrite_handler whandler);
+int gvt_d_init(struct vmctx *const ctx, struct pci_devinst *const pi,
+ nvlist_t *const nvl);
+void gvt_d_deinit(struct vmctx *const ctx, struct pci_devinst *const pi);
diff --git a/usr.sbin/bhyve/pci_passthru.c b/usr.sbin/bhyve/pci_passthru.c
--- a/usr.sbin/bhyve/pci_passthru.c
+++ b/usr.sbin/bhyve/pci_passthru.c
@@ -37,8 +37,8 @@
#endif
#include <sys/types.h>
#include <sys/mman.h>
-#include <sys/pciio.h>
#include <sys/ioctl.h>
+#include <sys/stat.h>
#include <dev/io/iodev.h>
#include <dev/pci/pcireg.h>
@@ -61,12 +61,11 @@
#include <unistd.h>
#include <machine/vmm.h>
-#include <vmmapi.h>
#include "config.h"
#include "debug.h"
-#include "pci_emul.h"
#include "mem.h"
+#include "pci_passthru.h"
#ifndef _PATH_DEVPCI
#define _PATH_DEVPCI "/dev/pci"
@@ -77,21 +76,9 @@
#define MSIX_TABLE_COUNT(ctrl) (((ctrl) & PCIM_MSIXCTRL_TABLE_SIZE) + 1)
#define MSIX_CAPLEN 12
-static int pcifd = -1;
+#define PCI_CAP_START_OFFSET 0x40
-struct passthru_softc {
- struct pci_devinst *psc_pi;
- struct pcibar psc_bar[PCI_BARMAX + 1];
- struct {
- int capoff;
- int msgctrl;
- int emulated;
- } psc_msi;
- struct {
- int capoff;
- } psc_msix;
- struct pcisel psc_sel;
-};
+static int pcifd = -1;
static int
msi_caplen(int msgctrl)
@@ -115,7 +102,7 @@
return (len);
}
-static uint32_t
+uint32_t
read_config(const struct pcisel *sel, long reg, int width)
{
struct pci_io pi;
@@ -131,7 +118,7 @@
return (pi.pi_data);
}
-static void
+void
write_config(const struct pcisel *sel, long reg, int width, uint32_t data)
{
struct pci_io pi;
@@ -581,6 +568,17 @@
sc->psc_sel.pc_dev = slot;
sc->psc_sel.pc_func = func;
+ /* copy physical PCI header to virtual cfgspace */
+ for (uint32_t i = 0; i < PCI_CAP_START_OFFSET; ++i) {
+ /*
+ * INTLINE and INTPIN shouldn't be aligned with it's physical
+ * value. They are already set by pci_emul_init.
+ */
+ if (i == PCIR_INTLINE || i == PCIR_INTPIN)
+ continue;
+ pci_set_cfgdata8(pi, i, read_config(&sc->psc_sel, i, 1));
+ }
+
if (cfginitmsi(sc) != 0) {
warnx("failed to initialize MSI for PCI %d/%d/%d",
bus, slot, func);
@@ -601,6 +599,22 @@
return (error);
}
+int
+set_pcir_handler(struct passthru_softc *const sc, const uint32_t reg,
+ const uint32_t len, const cfgread_handler rhandler,
+ const cfgwrite_handler whandler)
+{
+ if (reg > PCI_REGMAX || reg + len > PCI_REGMAX + 1)
+ return (-1);
+
+ for (uint32_t i = reg; i < reg + len; ++i) {
+ sc->psc_pcir_rhandler[i] = rhandler;
+ sc->psc_pcir_whandler[i] = whandler;
+ }
+
+ return 0;
+}
+
static int
passthru_legacy_config(nvlist_t *nvl, const char *opts)
{
@@ -621,9 +635,101 @@
set_config_value_node(nvl, "slot", value);
snprintf(value, sizeof(value), "%d", func);
set_config_value_node(nvl, "func", value);
+
+ return (pci_parse_legacy_config(nvl, strchr(opts, ',')));
+}
+
+static int
+passthru_init_rom(struct vmctx *const ctx, struct passthru_softc *const sc,
+ const char *const romfile)
+{
+ if (romfile == NULL) {
+ return (0);
+ }
+
+ const int fd = open(romfile, O_RDONLY);
+ if (fd < 0) {
+ warnx("%s: can't open romfile \"%s\"", __func__, romfile);
+ return (-1);
+ }
+
+ struct stat sbuf;
+ if (fstat(fd, &sbuf) < 0) {
+ warnx("%s: can't fstat romfile \"%s\"", __func__, romfile);
+ close(fd);
+ return (-1);
+ }
+ const uint64_t rom_size = sbuf.st_size;
+
+ void *const rom_data = mmap(NULL, rom_size, PROT_READ, MAP_SHARED, fd,
+ 0);
+ if (rom_data == MAP_FAILED) {
+ warnx("%s: unable to mmap romfile \"%s\" (%d)", __func__,
+ romfile, errno);
+ close(fd);
+ return (-1);
+ }
+
+ void *rom_addr;
+ int error = pci_emul_alloc_rom(sc->psc_pi, rom_size, &rom_addr);
+ if (error) {
+ warnx("%s: failed to alloc rom segment", __func__);
+ munmap(rom_data, rom_size);
+ close(fd);
+ return (error);
+ }
+ memcpy(rom_addr, rom_data, rom_size);
+
+ sc->psc_bar[PCI_ROM_IDX].type = PCIBAR_ROM;
+ sc->psc_bar[PCI_ROM_IDX].addr = (uint64_t)rom_addr;
+ sc->psc_bar[PCI_ROM_IDX].size = rom_size;
+
+ munmap(rom_data, rom_size);
+ close(fd);
+
return (0);
}
+static int
+passthru_init_quirks(struct vmctx *const ctx, struct pci_devinst *const pi,
+ nvlist_t *const nvl)
+{
+ struct passthru_softc *const sc = pi->pi_arg;
+
+ const uint16_t vendor = read_config(&sc->psc_sel, PCIR_VENDOR, 0x02);
+ const uint8_t class = read_config(&sc->psc_sel, PCIR_CLASS, 0x01);
+
+ /* currently only display devices have quirks */
+ if (class != PCIC_DISPLAY)
+ return (0);
+
+ if (vendor == PCI_VENDOR_INTEL)
+ return gvt_d_init(ctx, pi, nvl);
+
+ return (0);
+}
+
+static void
+passthru_deinit_quirks(struct vmctx *const ctx, struct pci_devinst *const pi)
+{
+ struct passthru_softc *const sc = pi->pi_arg;
+
+ if (sc == NULL)
+ return;
+
+ const uint16_t vendor = read_config(&sc->psc_sel, PCIR_VENDOR, 0x02);
+ const uint8_t class = read_config(&sc->psc_sel, PCIR_CLASS, 0x01);
+
+ /* currently only display devices have quirks */
+ if (class != PCIC_DISPLAY)
+ return;
+
+ if (vendor == PCI_VENDOR_INTEL)
+ return gvt_d_deinit(ctx, pi);
+
+ return;
+}
+
static int
passthru_init(struct vmctx *ctx, struct pci_devinst *pi, nvlist_t *nvl)
{
@@ -689,9 +795,34 @@
sc->psc_pi = pi;
/* initialize config space */
- error = cfginit(ctx, pi, bus, slot, func);
+ if ((error = cfginit(ctx, pi, bus, slot, func)) != 0)
+ goto done;
+
+ /* set default handler for all PCI registers */
+ if ((error = set_pcir_handler(sc, 0, PCI_REGMAX + 1,
+ passthru_cfgread_default, passthru_cfgwrite_default)) != 0)
+ goto done;
+ /* protect PCI header */
+ if ((error = set_pcir_handler(sc, 0, PCI_CAP_START_OFFSET,
+ passthru_cfgread_emulate, passthru_cfgwrite_emulate)) != 0)
+ goto done;
+ /* allow access to command and status register */
+ if ((error = set_pcir_handler(sc, PCIR_COMMAND, 0x04,
+ passthru_cfgread_default, passthru_cfgwrite_default)) != 0)
+ goto done;
+
+ if ((error = passthru_init_quirks(ctx, pi, nvl)) != 0)
+ goto done;
+
+ /* initialize ROM */
+ if ((error = passthru_init_rom(ctx, sc,
+ get_config_value_node(nvl, "rom"))) != 0)
+ goto done;
+
+ error = 0; /* success */
done:
if (error) {
+ passthru_deinit_quirks(ctx, pi);
free(sc);
vm_unassign_pptdev(ctx, bus, slot, func);
}
@@ -701,7 +832,8 @@
static int
bar_access(int coff)
{
- if (coff >= PCIR_BAR(0) && coff < PCIR_BAR(PCI_BARMAX + 1))
+ if ((coff >= PCIR_BAR(0) && coff < PCIR_BAR(PCI_BARMAX + 1)) ||
+ coff == PCIR_BIOS)
return (1);
else
return (0);
@@ -736,29 +868,27 @@
static int
passthru_cfgread(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
int coff, int bytes, uint32_t *rv)
+{
+ struct passthru_softc *const sc = pi->pi_arg;
+
+ return sc->psc_pcir_rhandler[coff](ctx, vcpu, pi, coff, bytes, rv);
+}
+
+int
+passthru_cfgread_default(struct vmctx *const ctx, const int vcpu,
+ struct pci_devinst *const pi, const int coff, const int bytes,
+ uint32_t *const rv)
{
struct passthru_softc *sc;
sc = pi->pi_arg;
/*
- * PCI BARs and MSI capability is emulated.
+ * MSI capability is emulated.
*/
- if (bar_access(coff) || msicap_access(sc, coff) ||
- msixcap_access(sc, coff))
+ if (msicap_access(sc, coff) || msixcap_access(sc, coff))
return (-1);
-#ifdef LEGACY_SUPPORT
- /*
- * Emulate PCIR_CAP_PTR if this device does not support MSI capability
- * natively.
- */
- if (sc->psc_msi.emulated) {
- if (coff >= PCIR_CAP_PTR && coff < PCIR_CAP_PTR + 4)
- return (-1);
- }
-#endif
-
/*
* Emulate the command register. If a single read reads both the
* command and status registers, read the status register from the
@@ -778,9 +908,27 @@
return (0);
}
+int
+passthru_cfgread_emulate(struct vmctx *const ctx, const int vcpu,
+ struct pci_devinst *const pi, const int coff, const int bytes,
+ uint32_t *const rv)
+{
+ return (-1);
+}
+
static int
passthru_cfgwrite(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
int coff, int bytes, uint32_t val)
+{
+ struct passthru_softc *const sc = pi->pi_arg;
+
+ return sc->psc_pcir_whandler[coff](ctx, vcpu, pi, coff, bytes, val);
+}
+
+int
+passthru_cfgwrite_default(struct vmctx *const ctx, const int vcpu,
+ struct pci_devinst *const pi, const int coff, const int bytes,
+ const uint32_t val)
{
int error, msix_table_entries, i;
struct passthru_softc *sc;
@@ -788,12 +936,6 @@
sc = pi->pi_arg;
- /*
- * PCI BARs are emulated
- */
- if (bar_access(coff))
- return (-1);
-
/*
* MSI capability is emulated
*/
@@ -834,6 +976,7 @@
return (0);
}
+ uint32_t write_val = val;
#ifdef LEGACY_SUPPORT
/*
* If this device does not support MSI natively then we cannot let
@@ -842,23 +985,31 @@
*/
if (sc->psc_msi.emulated && pci_msi_enabled(pi)) {
if (coff == PCIR_COMMAND && bytes == 2)
- val &= ~PCIM_CMD_INTxDIS;
+ write_val &= ~PCIM_CMD_INTxDIS;
}
#endif
- write_config(&sc->psc_sel, coff, bytes, val);
+ write_config(&sc->psc_sel, coff, bytes, write_val);
if (coff == PCIR_COMMAND) {
cmd_old = pci_get_cfgdata16(pi, PCIR_COMMAND);
if (bytes == 1)
- pci_set_cfgdata8(pi, PCIR_COMMAND, val);
+ pci_set_cfgdata8(pi, PCIR_COMMAND, write_val);
else if (bytes == 2)
- pci_set_cfgdata16(pi, PCIR_COMMAND, val);
+ pci_set_cfgdata16(pi, PCIR_COMMAND, write_val);
pci_emul_cmd_changed(pi, cmd_old);
}
return (0);
}
+int
+passthru_cfgwrite_emulate(struct vmctx *const ctx, const int vcpu,
+ struct pci_devinst *const pi, const int coff, const int bytes,
+ const uint32_t val)
+{
+ return (-1);
+}
+
static void
passthru_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
uint64_t offset, int size, uint64_t value)
@@ -993,16 +1144,49 @@
}
static void
-passthru_addr(struct vmctx *ctx, struct pci_devinst *pi, int baridx,
- int enabled, uint64_t address)
+passthru_addr_rom(struct pci_devinst *const pi, const int idx,
+ const int enabled)
{
+ const uint64_t addr = pi->pi_bar[idx].addr;
+ const uint64_t size = pi->pi_bar[idx].size;
- if (pi->pi_bar[baridx].type == PCIBAR_IO)
- return;
- if (baridx == pci_msix_table_bar(pi))
- passthru_msix_addr(ctx, pi, baridx, enabled, address);
- else
- passthru_mmio_addr(ctx, pi, baridx, enabled, address);
+ if (!enabled) {
+ if (vm_munmap_memseg(pi->pi_vmctx, addr, size) != 0) {
+ warnx("%s: munmap_memseg @ [%016lx - %016lx] failed",
+ __func__, addr, addr + size);
+ }
+
+ } else {
+ if (vm_mmap_memseg(pi->pi_vmctx, addr, VM_PCIROM,
+ pi->pi_romoffset, size, PROT_READ | PROT_EXEC) != 0) {
+ warnx("%s: mnmap_memseg @ [%016lx - %016lx] failed",
+ __func__, addr, addr + size);
+ }
+ }
+}
+
+static void
+passthru_addr(struct vmctx *ctx, struct pci_devinst *pi, int baridx,
+ int enabled, uint64_t address)
+{
+ switch (pi->pi_bar[baridx].type) {
+ case PCIBAR_IO:
+ /* IO BARs are emulated */
+ break;
+ case PCIBAR_ROM:
+ passthru_addr_rom(pi, baridx, enabled);
+ break;
+ case PCIBAR_MEM32:
+ case PCIBAR_MEM64:
+ if (baridx == pci_msix_table_bar(pi))
+ passthru_msix_addr(ctx, pi, baridx, enabled, address);
+ else
+ passthru_mmio_addr(ctx, pi, baridx, enabled, address);
+ break;
+ default:
+ errx(4, "%s: invalid BAR type %d", __func__,
+ pi->pi_bar[baridx].type);
+ }
}
struct pci_devemu passthru = {
diff --git a/usr.sbin/bhyve/qemu_fwcfg.h b/usr.sbin/bhyve/qemu_fwcfg.h
new file mode 100644
--- /dev/null
+++ b/usr.sbin/bhyve/qemu_fwcfg.h
@@ -0,0 +1,24 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2021 Beckhoff Automation GmbH & Co. KG
+ * Author: Corvin Köhne <c.koehne@beckhoff.com>
+ */
+
+#pragma once
+
+#include <vmmapi.h>
+
+#define QEMU_FWCFG_MAX_ARCHS 0x2
+#define QEMU_FWCFG_MAX_ENTRIES 0x3FFF
+#define QEMU_FWCFG_MAX_NAME 56
+
+struct qemu_fwcfg_item {
+ uint32_t size;
+ uint8_t *data;
+};
+
+int qemu_fwcfg_add_file(const uint8_t name[QEMU_FWCFG_MAX_NAME],
+ const uint32_t size, void *const data);
+int qemu_fwcfg_init(struct vmctx *const ctx);
+int qemu_fwcfg_parse_cmdline_arg(const char *opt);
diff --git a/usr.sbin/bhyve/qemu_fwcfg.c b/usr.sbin/bhyve/qemu_fwcfg.c
new file mode 100644
--- /dev/null
+++ b/usr.sbin/bhyve/qemu_fwcfg.c
@@ -0,0 +1,541 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2021 Beckhoff Automation GmbH & Co. KG
+ * Author: Corvin Köhne <c.koehne@beckhoff.com>
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/endian.h>
+#include <sys/queue.h>
+
+#include <machine/vmm.h>
+
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "acpi_device.h"
+#include "inout.h"
+#include "qemu_fwcfg.h"
+
+#define QEMU_FWCFG_ACPI_DEVICE_NAME "FWCF"
+#define QEMU_FWCFG_ACPI_HARDWARE_ID "QEMU0002"
+
+#define QEMU_FWCFG_SELECTOR_PORT_NUMBER 0x510
+#define QEMU_FWCFG_SELECTOR_PORT_SIZE 1
+#define QEMU_FWCFG_SELECTOR_PORT_FLAGS IOPORT_F_INOUT
+#define QEMU_FWCFG_DATA_PORT_NUMBER 0x511
+#define QEMU_FWCFG_DATA_PORT_SIZE 1
+#define QEMU_FWCFG_DATA_PORT_FLAGS \
+ IOPORT_F_INOUT /* QEMU v2.4+ ignores writes */
+
+#define QEMU_FWCFG_ARCHITECTURE_MASK 0x0001
+#define QEMU_FWCFG_INDEX_MASK 0x3FFF
+
+#define QEMU_FWCFG_SELECT_READ 0
+#define QEMU_FWCFG_SELECT_WRITE 1
+
+#define QEMU_FWCFG_ARCHITECTURE_GENERIC 0
+#define QEMU_FWCFG_ARCHITECTURE_SPECIFIC 1
+
+#define QEMU_FWCFG_INDEX_SIGNATURE 0x00
+#define QEMU_FWCFG_INDEX_ID 0x01
+#define QEMU_FWCFG_INDEX_FILE_DIR 0x19
+
+#define QEMU_FWCFG_FIRST_FILE_INDEX 0x20
+
+#define QEMU_FWCFG_MIN_FILES 10
+
+#pragma pack(1)
+
+union qemu_fwcfg_selector {
+ struct {
+ uint16_t index : 14;
+ uint16_t writeable : 1;
+ /*
+ * 0 = generic | for all architectures
+ * 1 = specific | only for current architecture
+ */
+ uint16_t architecture : 1;
+ };
+ uint16_t bits;
+};
+
+struct qemu_fwcfg_signature {
+ uint8_t signature[4];
+};
+
+struct qemu_fwcfg_id {
+ uint32_t interface : 1; /* always set */
+ uint32_t DMA : 1;
+ uint32_t reserved : 30;
+};
+
+struct qemu_fwcfg_file {
+ uint32_t be_size;
+ uint16_t be_selector;
+ uint16_t reserved;
+ uint8_t name[QEMU_FWCFG_MAX_NAME];
+};
+
+struct qemu_fwcfg_directory {
+ uint32_t be_count;
+ struct qemu_fwcfg_file files[0];
+};
+
+struct qemu_fwcfg_softc {
+ struct acpi_device *acpi_dev;
+
+ uint32_t data_offset;
+ union qemu_fwcfg_selector selector;
+ struct qemu_fwcfg_item items[QEMU_FWCFG_MAX_ARCHS]
+ [QEMU_FWCFG_MAX_ENTRIES];
+ struct qemu_fwcfg_directory *directory;
+};
+
+#pragma pack()
+
+static struct qemu_fwcfg_softc sc;
+
+struct qemu_fwcfg_user_file {
+ STAILQ_ENTRY(qemu_fwcfg_user_file) chain;
+ uint8_t name[QEMU_FWCFG_MAX_NAME];
+ uint32_t size;
+ void *data;
+};
+STAILQ_HEAD(qemu_fwcfg_user_file_list,
+ qemu_fwcfg_user_file) user_files = STAILQ_HEAD_INITIALIZER(user_files);
+
+static int
+qemu_fwcfg_selector_port_handler(struct vmctx *const ctx, const int vcpu,
+ const int in, const int port, const int bytes, uint32_t *const eax,
+ void *const arg)
+{
+ if (in) {
+ *eax = *(uint16_t *)&sc.selector;
+ return (0);
+ }
+
+ sc.data_offset = 0;
+ sc.selector.bits = *eax;
+
+ return (0);
+}
+
+static int
+qemu_fwcfg_data_port_handler(struct vmctx *const ctx, const int vcpu,
+ const int in, const int port, const int bytes, uint32_t *const eax,
+ void *const arg)
+{
+ if (!in) {
+ warnx("%s: Writes to qemu fwcfg data port aren't allowed",
+ __func__);
+ return (-1);
+ }
+
+ /* get fwcfg item */
+ struct qemu_fwcfg_item *const item =
+ &sc.items[sc.selector.architecture][sc.selector.index];
+ if (item->data == NULL) {
+ warnx(
+ "%s: qemu fwcfg item doesn't exist (architecture %s index 0x%x)",
+ __func__, sc.selector.architecture ? "specific" : "generic",
+ sc.selector.index);
+ *eax = 0x00;
+ return (0);
+ } else if (sc.data_offset >= item->size) {
+ warnx(
+ "%s: qemu fwcfg item read exceeds size (architecture %s index 0x%x size 0x%x offset 0x%x)",
+ __func__, sc.selector.architecture ? "specific" : "generic",
+ sc.selector.index, item->size, sc.data_offset);
+ *eax = 0x00;
+ return (0);
+ }
+
+ /* return item data */
+ *eax = item->data[sc.data_offset];
+ sc.data_offset++;
+
+ return (0);
+}
+
+static int
+qemu_fwcfg_add_item(const uint16_t architecture, const uint16_t index,
+ const uint32_t size, void *const data)
+{
+ /* truncate architecture and index to their desired size */
+ const uint16_t arch = architecture & QEMU_FWCFG_ARCHITECTURE_MASK;
+ const uint16_t idx = index & QEMU_FWCFG_INDEX_MASK;
+
+ /* get pointer to item specified by selector */
+ struct qemu_fwcfg_item *const fwcfg_item = &sc.items[arch][idx];
+
+ /* check if item is already used */
+ if (fwcfg_item->data != NULL) {
+ warnx("%s: qemu fwcfg item exists (architecture %s index 0x%x)",
+ __func__, arch ? "specific" : "generic", idx);
+ return (-1);
+ }
+
+ /* save data of the item */
+ fwcfg_item->size = size;
+ fwcfg_item->data = data;
+
+ return (0);
+}
+
+static int
+qemu_fwcfg_add_item_file_dir()
+{
+ /* alloc directory */
+ const size_t size = sizeof(struct qemu_fwcfg_directory) +
+ QEMU_FWCFG_MIN_FILES * sizeof(struct qemu_fwcfg_file);
+ struct qemu_fwcfg_directory *const fwcfg_directory = calloc(1, size);
+ if (fwcfg_directory == NULL) {
+ return (-ENOMEM);
+ }
+
+ /* init directory */
+ sc.directory = fwcfg_directory;
+
+ /* add directory */
+ return qemu_fwcfg_add_item(QEMU_FWCFG_ARCHITECTURE_GENERIC,
+ QEMU_FWCFG_INDEX_FILE_DIR, sizeof(struct qemu_fwcfg_directory), (uint8_t *)sc.directory);
+}
+
+static int
+qemu_fwcfg_add_item_id()
+{
+ /* alloc id */
+ struct qemu_fwcfg_id *const fwcfg_id = calloc(1,
+ sizeof(struct qemu_fwcfg_id));
+ if (fwcfg_id == NULL) {
+ return (-ENOMEM);
+ }
+
+ /* init id */
+ fwcfg_id->interface = 1;
+ fwcfg_id->DMA = 0;
+
+ /*
+ * QEMU specifies ID as little endian.
+ * Convert fwcfg_id to little endian.
+ */
+ uint32_t *const le_fwcfg_id_ptr = (uint32_t *)fwcfg_id;
+ *le_fwcfg_id_ptr = htole32(*le_fwcfg_id_ptr);
+
+ /* add id */
+ return qemu_fwcfg_add_item(QEMU_FWCFG_ARCHITECTURE_GENERIC,
+ QEMU_FWCFG_INDEX_ID, sizeof(struct qemu_fwcfg_id),
+ (uint8_t *)fwcfg_id);
+}
+
+static int
+qemu_fwcfg_add_item_signature()
+{
+ /* alloc signature */
+ struct qemu_fwcfg_signature *const fwcfg_signature = calloc(1,
+ sizeof(struct qemu_fwcfg_signature));
+ if (fwcfg_signature == NULL) {
+ return (-ENOMEM);
+ }
+
+ /* init signature */
+ fwcfg_signature->signature[0] = 'Q';
+ fwcfg_signature->signature[1] = 'E';
+ fwcfg_signature->signature[2] = 'M';
+ fwcfg_signature->signature[3] = 'U';
+
+ /* add signature */
+ return qemu_fwcfg_add_item(QEMU_FWCFG_ARCHITECTURE_GENERIC,
+ QEMU_FWCFG_INDEX_SIGNATURE, sizeof(struct qemu_fwcfg_signature),
+ (uint8_t *)fwcfg_signature);
+}
+
+static int
+qemu_fwcfg_register_port(const char *const name, const int port, const int size,
+ const int flags, const inout_func_t handler)
+{
+ struct inout_port iop;
+
+ bzero(&iop, sizeof(iop));
+ iop.name = name;
+ iop.port = port;
+ iop.size = size;
+ iop.flags = flags;
+ iop.handler = handler;
+
+ return register_inout(&iop);
+}
+
+int
+qemu_fwcfg_add_file(const uint8_t name[QEMU_FWCFG_MAX_NAME], const uint32_t size,
+ void *const data)
+{
+ /*
+ * QEMU specifies count as big endian.
+ * Convert it to host endian to work with it.
+ */
+ const uint32_t count = be32toh(sc.directory->be_count) + 1;
+
+ /* add file to items list */
+ const uint32_t index = QEMU_FWCFG_FIRST_FILE_INDEX + count - 1;
+ const int error = qemu_fwcfg_add_item(QEMU_FWCFG_ARCHITECTURE_GENERIC,
+ index, size, data);
+ if (error != 0) {
+ return (error);
+ }
+
+ /*
+ * files should be sorted alphabetical, get index for new file
+ */
+ uint32_t file_index;
+ for (file_index = 0; file_index < count; ++file_index) {
+ if (strcmp(name, sc.directory->files[file_index].name) < 0)
+ break;
+ }
+
+ if (count > QEMU_FWCFG_MIN_FILES) {
+ /* alloc new file directory */
+ const uint64_t new_size = sizeof(struct qemu_fwcfg_directory) +
+ count * sizeof(struct qemu_fwcfg_file);
+ struct qemu_fwcfg_directory *const new_directory = calloc(1,
+ new_size);
+ if (new_directory == NULL) {
+ warnx(
+ "%s: Unable to allocate a new qemu fwcfg files directory (count %d)",
+ __func__, count);
+ return (-ENOMEM);
+ }
+
+ /* copy files below file_index to new directory */
+ memcpy(new_directory->files, sc.directory->files,
+ file_index * sizeof(struct qemu_fwcfg_file));
+
+ /* copy files behind file_index to directory */
+ memcpy(&new_directory->files[file_index + 1],
+ &sc.directory->files[file_index],
+ (count - file_index) * sizeof(struct qemu_fwcfg_file));
+
+ /* free old directory */
+ free(sc.directory);
+
+ /* set directory pointer to new directory */
+ sc.directory = new_directory;
+
+ /* adjust directory pointer */
+ sc.items[0][QEMU_FWCFG_INDEX_FILE_DIR].data = (uint8_t *)
+ sc.directory;
+ } else {
+ /* shift files behind file_index */
+ for (uint32_t i = QEMU_FWCFG_MIN_FILES - 1; i > file_index; --i) {
+ memcpy(&sc.directory->files[i],
+ &sc.directory->files[i - 1],
+ sizeof(struct qemu_fwcfg_file));
+ }
+ }
+
+ /*
+ * QEMU specifies count, size and index as big endian.
+ * Save these values in big endian to simplify guest reads of these
+ * values.
+ */
+ sc.directory->be_count = htobe32(count);
+ sc.directory->files[file_index].be_size = htobe32(size);
+ sc.directory->files[file_index].be_selector = htobe16(index);
+ strcpy(sc.directory->files[file_index].name, name);
+
+ /* set new size for the fwcfg_file_directory */
+ sc.items[0][QEMU_FWCFG_INDEX_FILE_DIR].size =
+ sizeof(struct qemu_fwcfg_directory) +
+ count * sizeof(struct qemu_fwcfg_file);
+
+ return (0);
+}
+
+static int
+qemu_fwcfg_add_user_files()
+{
+ const struct qemu_fwcfg_user_file *fwcfg_file;
+ STAILQ_FOREACH (fwcfg_file, &user_files, chain) {
+ const int error = qemu_fwcfg_add_file(fwcfg_file->name,
+ fwcfg_file->size, fwcfg_file->data);
+ if (error)
+ return (error);
+ }
+
+ return (0);
+}
+
+int
+qemu_fwcfg_init(struct vmctx *const ctx)
+{
+ int error;
+
+ error = acpi_device_create(&sc.acpi_dev, ctx,
+ QEMU_FWCFG_ACPI_DEVICE_NAME, QEMU_FWCFG_ACPI_HARDWARE_ID);
+ if (error) {
+ warnx("%s: failed to create ACPI device for QEMU FwCfg",
+ __func__);
+ goto done;
+ }
+
+ error = acpi_device_add_res_fixed_ioport(sc.acpi_dev,
+ QEMU_FWCFG_SELECTOR_PORT_NUMBER, 2);
+ if (error) {
+ warnx("%s: failed to add fixed IO port for QEMU FwCfg",
+ __func__);
+ goto done;
+ }
+
+ /* add common fwcfg items */
+ if ((error = qemu_fwcfg_add_item_signature()) != 0) {
+ warnx("%s: Unable to add signature item", __func__);
+ goto done;
+ }
+ if ((error = qemu_fwcfg_add_item_id()) != 0) {
+ warnx("%s: Unable to add id item", __func__);
+ goto done;
+ }
+ if ((error = qemu_fwcfg_add_item_file_dir()) != 0) {
+ warnx("%s: Unable to add file_dir item", __func__);
+ goto done;
+ }
+
+ /* add handlers for fwcfg ports */
+ if ((error = qemu_fwcfg_register_port("qemu_fwcfg_selector",
+ QEMU_FWCFG_SELECTOR_PORT_NUMBER, QEMU_FWCFG_SELECTOR_PORT_SIZE,
+ QEMU_FWCFG_SELECTOR_PORT_FLAGS,
+ qemu_fwcfg_selector_port_handler)) != 0) {
+ warnx("%s: Unable to register qemu fwcfg selector port 0x%x",
+ __func__, QEMU_FWCFG_SELECTOR_PORT_NUMBER);
+ goto done;
+ }
+ if ((error = qemu_fwcfg_register_port("qemu_fwcfg_data",
+ QEMU_FWCFG_DATA_PORT_NUMBER, QEMU_FWCFG_DATA_PORT_SIZE,
+ QEMU_FWCFG_DATA_PORT_FLAGS, qemu_fwcfg_data_port_handler)) !=
+ 0) {
+ warnx("%s: Unable to register qemu fwcfg data port 0x%x",
+ __func__, QEMU_FWCFG_DATA_PORT_NUMBER);
+ goto done;
+ }
+
+ if ((error = qemu_fwcfg_add_user_files()) != 0) {
+ warnx("%s: Unable to add user files", __func__);
+ goto done;
+ }
+
+done:
+ if (error) {
+ acpi_device_destroy(sc.acpi_dev);
+ }
+
+ return (error);
+}
+
+static void
+qemu_fwcfg_usage(const char *opt)
+{
+ warnx("Invalid fw_cfg option \"%s\"", opt);
+ warnx("-f [name=]<name>,(string|file)=<value>");
+}
+
+/*
+ * Parses the cmdline argument for user defined fw_cfg items. The cmdline
+ * argument has the format:
+ * "-f [name=]<name>,(string|file)=<value>"
+ *
+ * E.g.: "-f opt/com.page/example,string=Hello"
+ */
+int
+qemu_fwcfg_parse_cmdline_arg(const char *opt)
+{
+ struct qemu_fwcfg_user_file *const fwcfg_file = malloc(sizeof(*fwcfg_file));
+ if (fwcfg_file == NULL) {
+ warnx("Unable to allocate fw_cfg_user_file");
+ return (-ENOMEM);
+ }
+
+ /* get pointer to <name> */
+ const char *opt_ptr = opt;
+ /* If [name=] is specified, skip it */
+ if (strncmp(opt_ptr, "name=", sizeof("name=") - 1) == 0) {
+ opt_ptr += sizeof("name=") - 1;
+ }
+
+ /* get the end of <name> */
+ const char *opt_end = strchr(opt_ptr, ',');
+ if (opt_end == NULL) {
+ qemu_fwcfg_usage(opt);
+ return (-1);
+ }
+
+ /* check if <name> is too long */
+ if (opt_end - opt_ptr > QEMU_FWCFG_MAX_NAME) {
+ warnx("fw_cfg name too long: \"%s\"", opt);
+ return (-1);
+ }
+
+ /* save <name> */
+ strncpy(fwcfg_file->name, opt_ptr, opt_end - opt_ptr);
+
+ /* set opt_ptr and opt_end to <value> */
+ opt_ptr = opt_end + 1;
+ opt_end = opt_ptr + strlen(opt_ptr);
+
+ if (strncmp(opt_ptr, "string=", sizeof("string=") - 1) == 0) {
+ opt_ptr += sizeof("string=") - 1;
+ fwcfg_file->data = strdup(opt_ptr);
+ if (fwcfg_file->data == NULL) {
+ warnx(" Can't duplicate fw_cfg_user_file string \"%s\"",
+ opt_ptr);
+ return (-ENOMEM);
+ }
+ fwcfg_file->size = strlen(opt_ptr) + 1;
+
+ } else if (strncmp(opt_ptr, "file=", sizeof("file=") - 1) == 0) {
+ opt_ptr += sizeof("file=") - 1;
+
+ /* open file */
+ const int fd = open(opt_ptr, O_RDONLY);
+ if (fd < 0) {
+ warnx("Can't open fw_cfg_user_file file \"%s\"",
+ opt_ptr);
+ return (-1);
+ }
+
+ /* get file size */
+ const uint64_t size = lseek(fd, 0, SEEK_END);
+ lseek(fd, 0, SEEK_SET);
+
+ /* read file */
+ fwcfg_file->data = malloc(size);
+ if (fwcfg_file->data == NULL) {
+ warnx(
+ "Can't allocate fw_cfg_user_file file \"%s\" (size: 0x%16lx)",
+ opt_ptr, size);
+ close(fd);
+ return (-ENOMEM);
+ }
+ fwcfg_file->size = read(fd, fwcfg_file->data, size);
+
+ close(fd);
+
+ } else {
+ qemu_fwcfg_usage(opt);
+ return (-1);
+ }
+
+ STAILQ_INSERT_TAIL(&user_files, fwcfg_file, chain);
+
+ return (0);
+}
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sun, Apr 27, 12:21 PM (1 h, 49 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
17817582
Default Alt Text
D26209.id99348.diff (94 KB)
Attached To
Mode
D26209: GVT-d support for bhyve
Attached
Detach File
Event Timeline
Log In to Comment