Page MenuHomeFreeBSD

D26209.id99348.diff
No OneTemporary

D26209.id99348.diff

diff --git a/lib/libvmmapi/vmmapi.h b/lib/libvmmapi/vmmapi.h
--- a/lib/libvmmapi/vmmapi.h
+++ b/lib/libvmmapi/vmmapi.h
@@ -73,6 +73,7 @@
VM_SYSMEM,
VM_BOOTROM,
VM_FRAMEBUFFER,
+ VM_PCIROM,
};
/*
@@ -180,6 +181,8 @@
vm_paddr_t gpa, size_t len, vm_paddr_t hpa);
int vm_unmap_pptdev_mmio(struct vmctx *ctx, int bus, int slot, int func,
vm_paddr_t gpa, size_t len);
+int vm_get_memory_region_info(struct vmctx *const ctx, vm_paddr_t *const base,
+ vm_paddr_t *const size, const enum vm_memory_region_type type);
int vm_setup_pptdev_msi(struct vmctx *ctx, int vcpu, int bus, int slot,
int func, uint64_t addr, uint64_t msg, int numvec);
int vm_setup_pptdev_msix(struct vmctx *ctx, int vcpu, int bus, int slot,
diff --git a/lib/libvmmapi/vmmapi.c b/lib/libvmmapi/vmmapi.c
--- a/lib/libvmmapi/vmmapi.c
+++ b/lib/libvmmapi/vmmapi.c
@@ -1012,6 +1012,25 @@
return (ioctl(ctx->fd, VM_UNMAP_PPTDEV_MMIO, &pptmmio));
}
+int
+vm_get_memory_region_info(struct vmctx *const ctx, vm_paddr_t *const base,
+ vm_paddr_t *const size, const enum vm_memory_region_type type)
+{
+ struct vm_memory_region_info memory_region_info;
+
+ bzero(&memory_region_info, sizeof(memory_region_info));
+ memory_region_info.type = type;
+
+ const int error = ioctl(ctx->fd, VM_GET_MEMORY_REGION_INFO, &memory_region_info);
+
+ if (base)
+ *base = memory_region_info.base;
+ if (size)
+ *size = memory_region_info.size;
+
+ return (error);
+}
+
int
vm_setup_pptdev_msi(struct vmctx *ctx, int vcpu, int bus, int slot, int func,
uint64_t addr, uint64_t msg, int numvec)
@@ -1687,7 +1706,7 @@
VM_SET_CAPABILITY, VM_GET_CAPABILITY, VM_BIND_PPTDEV,
VM_UNBIND_PPTDEV, VM_MAP_PPTDEV_MMIO, VM_PPTDEV_MSI,
VM_PPTDEV_MSIX, VM_UNMAP_PPTDEV_MMIO, VM_PPTDEV_DISABLE_MSIX,
- VM_INJECT_NMI, VM_STATS, VM_STAT_DESC,
+ VM_GET_MEMORY_REGION_INFO, VM_INJECT_NMI, VM_STATS, VM_STAT_DESC,
VM_SET_X2APIC_STATE, VM_GET_X2APIC_STATE,
VM_GET_HPET_CAPABILITIES, VM_GET_GPA_PMAP, VM_GLA2GPA,
VM_GLA2GPA_NOFAULT,
diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h
--- a/sys/amd64/include/vmm.h
+++ b/sys/amd64/include/vmm.h
@@ -741,6 +741,12 @@
} u;
};
+enum vm_memory_region_type {
+ MEMORY_REGION_INTEL_GSM,
+ MEMORY_REGION_INTEL_OPREGION,
+ MEMORY_REGION_TPM_CONTROL_ADDRESS,
+};
+
/* APIs to inject faults into the guest */
void vm_inject_fault(void *vm, int vcpuid, int vector, int errcode_valid,
int errcode);
diff --git a/sys/amd64/include/vmm_dev.h b/sys/amd64/include/vmm_dev.h
--- a/sys/amd64/include/vmm_dev.h
+++ b/sys/amd64/include/vmm_dev.h
@@ -146,6 +146,17 @@
size_t len;
};
+struct vm_memory_region_info {
+ vm_paddr_t base;
+ vm_paddr_t size;
+ enum vm_memory_region_type type;
+};
+
+#ifdef _KERNEL
+extern vm_paddr_t intel_graphics_stolen_base;
+extern vm_paddr_t intel_graphics_stolen_size;
+#endif
+
struct vm_pptdev_msi {
int vcpu;
int bus;
@@ -309,6 +320,7 @@
IOCNUM_PPTDEV_MSIX = 44,
IOCNUM_PPTDEV_DISABLE_MSIX = 45,
IOCNUM_UNMAP_PPTDEV_MMIO = 46,
+ IOCNUM_GET_MEMORY_REGION_INFO = 47,
/* statistics */
IOCNUM_VM_STATS = 50,
@@ -427,6 +439,8 @@
_IOW('v', IOCNUM_PPTDEV_DISABLE_MSIX, struct vm_pptdev)
#define VM_UNMAP_PPTDEV_MMIO \
_IOW('v', IOCNUM_UNMAP_PPTDEV_MMIO, struct vm_pptdev_mmio)
+#define VM_GET_MEMORY_REGION_INFO \
+ _IOWR('v', IOCNUM_GET_MEMORY_REGION_INFO, struct vm_memory_region_info)
#define VM_INJECT_NMI \
_IOW('v', IOCNUM_INJECT_NMI, struct vm_nmi)
#define VM_STATS \
diff --git a/sys/amd64/vmm/intel/intelgpu.h b/sys/amd64/vmm/intel/intelgpu.h
new file mode 100644
--- /dev/null
+++ b/sys/amd64/vmm/intel/intelgpu.h
@@ -0,0 +1,185 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2021 Beckhoff Automation GmbH & Co. KG
+ * Author: Corvin Köhne <c.koehne@beckhoff.com>
+ */
+
+#pragma once
+
+/*
+ * See
+ * <https://github.com/tianocore/edk2-platforms/blob/82979ab1ca44101e0b92a9c4bda1dfe64a8249f6/Silicon/Intel/IntelSiliconPkg/Include/IndustryStandard/IgdOpRegion.h>
+ */
+
+#define IGD_OPREGION_HEADER_SIGN "IntelGraphicsMem"
+#define IGD_OPREGION_HEADER_MBOX1 BIT0
+#define IGD_OPREGION_HEADER_MBOX2 BIT1
+#define IGD_OPREGION_HEADER_MBOX3 BIT2
+#define IGD_OPREGION_HEADER_MBOX4 BIT3
+#define IGD_OPREGION_HEADER_MBOX5 BIT4
+
+#define IGD_OPREGION_VBT_SIZE_6K (6 * 1024UL)
+
+/**
+ OpRegion structures:
+ Sub-structures define the different parts of the OpRegion followed by the
+ main structure representing the entire OpRegion.
+
+ @note These structures are packed to 1 byte offsets because the exact
+ data location is required by the supporting design specification due to
+ the fact that the data is used by ASL and Graphics driver code compiled
+ separately.
+**/
+#pragma pack(1)
+///
+/// OpRegion Mailbox 0 Header structure. The OpRegion Header is used to
+/// identify a block of memory as the graphics driver OpRegion.
+/// Offset 0x0, Size 0x100
+///
+struct igd_opregion_header {
+ int8_t sign[0x10]; ///< Offset 0x00 OpRegion Signature
+ uint32_t size; ///< Offset 0x10 OpRegion Size
+ uint32_t over; ///< Offset 0x14 OpRegion Structure Version
+ uint8_t sver[0x20]; ///< Offset 0x18 System BIOS Build Version
+ uint8_t vver[0x10]; ///< Offset 0x38 Video BIOS Build Version
+ uint8_t gver[0x10]; ///< Offset 0x48 Graphic Driver Build Version
+ uint32_t mbox; ///< Offset 0x58 Supported Mailboxes
+ uint32_t dmod; ///< Offset 0x5C Driver Model
+ uint32_t pcon; ///< Offset 0x60 Platform Configuration
+ int16_t dver[0x10]; ///< Offset 0x64 GOP Version
+ uint8_t rm01[0x7C]; ///< Offset 0x84 Reserved Must be zero
+};
+
+///
+/// OpRegion Mailbox 1 - Public ACPI Methods
+/// Offset 0x100, Size 0x100
+///
+struct igd_opregion_mbox1 {
+ uint32_t drdy; ///< Offset 0x100 Driver Readiness
+ uint32_t csts; ///< Offset 0x104 Status
+ uint32_t cevt; ///< Offset 0x108 Current Event
+ uint8_t rm11[0x14]; ///< Offset 0x10C Reserved Must be Zero
+ uint32_t didl[8]; ///< Offset 0x120 Supported Display Devices ID List
+ uint32_t
+ cpdl[8]; ///< Offset 0x140 Currently Attached Display Devices List
+ uint32_t
+ cadl[8]; ///< Offset 0x160 Currently Active Display Devices List
+ uint32_t nadl[8]; ///< Offset 0x180 Next Active Devices List
+ uint32_t aslp; ///< Offset 0x1A0 ASL Sleep Time Out
+ uint32_t tidx; ///< Offset 0x1A4 Toggle Table Index
+ uint32_t chpd; ///< Offset 0x1A8 Current Hotplug Enable Indicator
+ uint32_t clid; ///< Offset 0x1AC Current Lid State Indicator
+ uint32_t cdck; ///< Offset 0x1B0 Current Docking State Indicator
+ uint32_t sxsw; ///< Offset 0x1B4 Display Switch Notification on Sx
+ ///< StateResume
+ uint32_t evts; ///< Offset 0x1B8 Events supported by ASL
+ uint32_t cnot; ///< Offset 0x1BC Current OS Notification
+ uint32_t NRDY; ///< Offset 0x1C0 Driver Status
+ uint8_t did2[0x1C]; ///< Offset 0x1C4 Extended Supported Devices ID
+ ///< List(DOD)
+ uint8_t
+ cpd2[0x1C]; ///< Offset 0x1E0 Extended Attached Display Devices List
+ uint8_t rm12[4]; ///< Offset 0x1FC - 0x1FF Reserved Must be zero
+};
+
+///
+/// OpRegion Mailbox 2 - Software SCI Interface
+/// Offset 0x200, Size 0x100
+///
+struct igd_opregion_mbox2 {
+ uint32_t scic; ///< Offset 0x200 Software SCI Command / Status / Data
+ uint32_t parm; ///< Offset 0x204 Software SCI Parameters
+ uint32_t dslp; ///< Offset 0x208 Driver Sleep Time Out
+ uint8_t rm21[0xF4]; ///< Offset 0x20C - 0x2FF Reserved Must be zero
+};
+
+///
+/// OpRegion Mailbox 3 - BIOS/Driver Notification - ASLE Support
+/// Offset 0x300, Size 0x100
+///
+struct igd_opregion_mbox3 {
+ uint32_t ardy; ///< Offset 0x300 Driver Readiness
+ uint32_t aslc; ///< Offset 0x304 ASLE Interrupt Command / Status
+ uint32_t tche; ///< Offset 0x308 Technology Enabled Indicator
+ uint32_t alsi; ///< Offset 0x30C Current ALS Luminance Reading
+ uint32_t bclp; ///< Offset 0x310 Requested Backlight Brightness
+ uint32_t pfit; ///< Offset 0x314 Panel Fitting State or Request
+ uint32_t cblv; ///< Offset 0x318 Current Brightness Level
+ uint16_t bclm[0x14]; ///< Offset 0x31C Backlight Brightness Levels Duty
+ ///< Cycle Mapping Table
+ uint32_t cpfm; ///< Offset 0x344 Current Panel Fitting Mode
+ uint32_t epfm; ///< Offset 0x348 Enabled Panel Fitting Modes
+ uint8_t plut[0x4A]; ///< Offset 0x34C Panel Look Up Table & Identifier
+ uint32_t pfmb; ///< Offset 0x396 PWM Frequency and Minimum Brightness
+ uint32_t ccdv; ///< Offset 0x39A Color Correction Default Values
+ uint32_t pcft; ///< Offset 0x39E Power Conservation Features
+ uint32_t srot; ///< Offset 0x3A2 Supported Rotation Angles
+ uint32_t iuer; ///< Offset 0x3A6 Intel Ultrabook(TM) Event Register
+ uint64_t fdss; ///< Offset 0x3AA DSS Buffer address allocated for IFFS
+ ///< feature
+ uint32_t fdsp; ///< Offset 0x3B2 Size of DSS buffer
+ uint32_t stat; ///< Offset 0x3B6 State Indicator
+ uint64_t rvda; ///< Offset 0x3BA Absolute/Relative Address of Raw VBT
+ ///< Data from OpRegion Base
+ uint32_t rvds; ///< Offset 0x3C2 Raw VBT Data Size
+ uint8_t rsvd2[0x3A]; ///< Offset 0x3C6 - 0x3FF Reserved Must be zero.
+ ///< Bug in spec 0x45(69)
+};
+
+///
+/// OpRegion Mailbox 4 - VBT Video BIOS Table
+/// Offset 0x400, Size 0x1800
+///
+struct igd_opregion_mbox4 {
+ uint8_t rvbt[IGD_OPREGION_VBT_SIZE_6K]; ///< Offset 0x400 - 0x1BFF Raw
+ ///< VBT Data
+};
+
+///
+/// OpRegion Mailbox 5 - BIOS/Driver Notification - Data storage BIOS to Driver
+/// data sync Offset 0x1C00, Size 0x400
+///
+struct igd_opregion_mbox5 {
+ uint32_t phed; ///< Offset 0x1C00 Panel Header
+ uint8_t bddc[0x100]; ///< Offset 0x1C04 Panel EDID (DDC data)
+ uint8_t rm51[0x2FC]; ///< Offset 0x1D04 - 0x1FFF Reserved Must be zero
+};
+
+///
+/// IGD OpRegion Structure
+///
+struct igd_opregion {
+ struct igd_opregion_header
+ header; ///< OpRegion header (Offset 0x0, Size 0x100)
+ struct igd_opregion_mbox1 mbox1; ///< Mailbox 1: Public ACPI Methods
+ ///< (Offset 0x100, Size 0x100)
+ struct igd_opregion_mbox2 mbox2; ///< Mailbox 2: Software SCI Interface
+ ///< (Offset 0x200, Size 0x100)
+ struct igd_opregion_mbox3
+ mbox3; ///< Mailbox 3: BIOS to Driver Notification (Offset 0x300,
+ ///< Size 0x100)
+ struct igd_opregion_mbox4 mbox4; ///< Mailbox 4: Video BIOS Table (VBT)
+ ///< (Offset 0x400, Size 0x1800)
+ struct igd_opregion_mbox5
+ mbox5; ///< Mailbox 5: BIOS to Driver Notification Extension (Offset
+ ///< 0x1C00, Size 0x400)
+};
+
+///
+/// VBT Header Structure
+///
+struct vbt_header {
+ uint8_t product_string[20];
+ uint16_t version;
+ uint16_t header_size;
+ uint16_t table_size;
+ uint8_t checksum;
+ uint8_t reserved1;
+ uint32_t bios_data_offset;
+ uint32_t aim_data_offset[4];
+};
+
+#pragma pack()
+
+int vm_intelgpu_get_opregion(vm_paddr_t *const base, vm_paddr_t *const size);
diff --git a/sys/amd64/vmm/intel/intelgpu.c b/sys/amd64/vmm/intel/intelgpu.c
new file mode 100644
--- /dev/null
+++ b/sys/amd64/vmm/intel/intelgpu.c
@@ -0,0 +1,55 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2021 Beckhoff Automation GmbH & Co. KG
+ * Author: Corvin Köhne <c.koehne@beckhoff.com>
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <dev/pci/pcireg.h>
+#include <dev/pci/pcivar.h>
+
+#include "intelgpu.h"
+
+#define KB (1024UL)
+
+int
+vm_intelgpu_get_opregion(vm_paddr_t *const base, vm_paddr_t *const size)
+{
+ /* intel graphics device is always located at 0:2.0 */
+ device_t dev = pci_find_bsf(0, 2, 0);
+ if (dev == NULL) {
+ return (ENOENT);
+ }
+
+ if ((pci_get_vendor(dev) != PCI_VENDOR_INTEL) ||
+ (pci_get_class(dev) != PCIC_DISPLAY) ||
+ (pci_get_subclass(dev) != PCIS_DISPLAY_VGA)) {
+ return (ENODEV);
+ }
+
+ const uint64_t asls = pci_read_config(dev, PCIR_ASLS_CTL, 4);
+
+ const struct igd_opregion_header *const opregion_header =
+ (struct igd_opregion_header *)pmap_map(NULL, asls,
+ asls + sizeof(*opregion_header), VM_PROT_READ);
+ if (opregion_header == NULL ||
+ memcmp(opregion_header->sign, IGD_OPREGION_HEADER_SIGN,
+ sizeof(opregion_header->sign))) {
+ return (ENODEV);
+ }
+
+ *base = asls;
+ *size = opregion_header->size * KB;
+
+ return (0);
+}
diff --git a/sys/amd64/vmm/io/acpi.h b/sys/amd64/vmm/io/acpi.h
new file mode 100644
--- /dev/null
+++ b/sys/amd64/vmm/io/acpi.h
@@ -0,0 +1,14 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2021 Beckhoff Automation GmbH & Co. KG
+ * Author: Corvin Köhne <c.koehne@beckhoff.com>
+ */
+
+#pragma once
+
+#include <machine/vmm.h>
+#include <machine/vmm_dev.h>
+
+int vmm_tpm2_get_control_address(vm_paddr_t *const base,
+ vm_paddr_t *const size);
diff --git a/sys/amd64/vmm/io/acpi.c b/sys/amd64/vmm/io/acpi.c
new file mode 100644
--- /dev/null
+++ b/sys/amd64/vmm/io/acpi.c
@@ -0,0 +1,37 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2021 Beckhoff Automation GmbH & Co. KG
+ * Author: Corvin Köhne <c.koehne@beckhoff.com>
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/systm.h>
+
+#include <contrib/dev/acpica/include/acpi.h>
+#include <contrib/dev/acpica/include/acpixf.h>
+
+#include "acpi.h"
+int
+vmm_tpm2_get_control_address(vm_paddr_t *const base, vm_paddr_t *const size)
+{
+ ACPI_TABLE_HEADER *tpm_header;
+ if (!ACPI_SUCCESS(AcpiGetTable("TPM2", 1, &tpm_header))) {
+ return (ENOENT);
+ }
+
+ if (base) {
+ const ACPI_TABLE_TPM2 *const tpm_table = (ACPI_TABLE_TPM2 *)
+ tpm_header;
+ *base = tpm_table->ControlAddress;
+ }
+ if (size) {
+ *size = 0;
+ }
+
+ return (0);
+}
diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c
--- a/sys/amd64/vmm/vmm.c
+++ b/sys/amd64/vmm/vmm.c
@@ -134,7 +134,7 @@
bool sysmem;
struct vm_object *object;
};
-#define VM_MAX_MEMSEGS 3
+#define VM_MAX_MEMSEGS 4
struct mem_map {
vm_paddr_t gpa;
diff --git a/sys/amd64/vmm/vmm_dev.c b/sys/amd64/vmm/vmm_dev.c
--- a/sys/amd64/vmm/vmm_dev.c
+++ b/sys/amd64/vmm/vmm_dev.c
@@ -60,9 +60,11 @@
#include <machine/vmm_snapshot.h>
#include <x86/apicreg.h>
+#include "intel/intelgpu.h"
#include "vmm_lapic.h"
#include "vmm_stat.h"
#include "vmm_mem.h"
+#include "io/acpi.h"
#include "io/ppt.h"
#include "io/vatpic.h"
#include "io/vioapic.h"
@@ -373,6 +375,7 @@
struct vm_capability *vmcap;
struct vm_pptdev *pptdev;
struct vm_pptdev_mmio *pptmmio;
+ struct vm_memory_region_info *memory_region_info;
struct vm_pptdev_msi *pptmsi;
struct vm_pptdev_msix *pptmsix;
struct vm_nmi *vmnmi;
@@ -540,6 +543,29 @@
error = ppt_unmap_mmio(sc->vm, pptmmio->bus, pptmmio->slot,
pptmmio->func, pptmmio->gpa, pptmmio->len);
break;
+ case VM_GET_MEMORY_REGION_INFO:
+ memory_region_info = (struct vm_memory_region_info *)data;
+ switch (memory_region_info->type) {
+ case MEMORY_REGION_INTEL_GSM:
+ memory_region_info->base = intel_graphics_stolen_base;
+ memory_region_info->size = intel_graphics_stolen_size;
+ error = 0;
+ break;
+ case MEMORY_REGION_INTEL_OPREGION:
+ error =
+ vm_intelgpu_get_opregion(&memory_region_info->base,
+ &memory_region_info->size);
+ break;
+ case MEMORY_REGION_TPM_CONTROL_ADDRESS:
+ error = vmm_tpm2_get_control_address(
+ &memory_region_info->base,
+ &memory_region_info->size);
+ break;
+ default:
+ error = EINVAL;
+ break;
+ }
+ break;
case VM_BIND_PPTDEV:
pptdev = (struct vm_pptdev *)data;
error = vm_assign_pptdev(sc->vm, pptdev->bus, pptdev->slot,
diff --git a/sys/dev/pci/pcireg.h b/sys/dev/pci/pcireg.h
--- a/sys/dev/pci/pcireg.h
+++ b/sys/dev/pci/pcireg.h
@@ -1098,3 +1098,14 @@
#define PCIM_OSC_CTL_PCIE_PME 0x04 /* PCIe Native Power Mgt Events */
#define PCIM_OSC_CTL_PCIE_AER 0x08 /* PCIe Advanced Error Reporting */
#define PCIM_OSC_CTL_PCIE_CAP_STRUCT 0x10 /* Various Capability Structures */
+
+/*
+ * Intel graphics device definitions
+ */
+#define PCIR_BDSM 0x5C /* Base of Data Stolen Memory register */
+#define PCIR_ASLS_CTL 0xFC /* Opregion start address register */
+
+/*
+ * PCI Vendors
+ */
+#define PCI_VENDOR_INTEL 0x8086
diff --git a/sys/modules/vmm/Makefile b/sys/modules/vmm/Makefile
--- a/sys/modules/vmm/Makefile
+++ b/sys/modules/vmm/Makefile
@@ -29,7 +29,8 @@
x86.c
.PATH: ${SRCTOP}/sys/amd64/vmm/io
-SRCS+= iommu.c \
+SRCS+= acpi.c \
+ iommu.c \
ppt.c \
vatpic.c \
vatpit.c \
@@ -42,6 +43,7 @@
# intel-specific files
.PATH: ${SRCTOP}/sys/amd64/vmm/intel
SRCS+= ept.c \
+ intelgpu.c \
vmcs.c \
vmx_msr.c \
vmx_support.S \
diff --git a/usr.sbin/bhyve/Makefile b/usr.sbin/bhyve/Makefile
--- a/usr.sbin/bhyve/Makefile
+++ b/usr.sbin/bhyve/Makefile
@@ -15,6 +15,7 @@
BHYVE_SYSDIR?=${SRCTOP}
SRCS= \
+ acpi_device.c \
atkbdc.c \
acpi.c \
audio.c \
@@ -26,6 +27,7 @@
console.c \
ctl_util.c \
ctl_scsi_all.c \
+ e820.c \
fwctl.c \
gdb.c \
hda_codec.c \
@@ -42,6 +44,7 @@
pci_emul.c \
pci_hda.c \
pci_fbuf.c \
+ pci_gvt-d.c \
pci_hostbridge.c \
pci_irq.c \
pci_lpc.c \
@@ -61,6 +64,7 @@
post.c \
ps2kbd.c \
ps2mouse.c \
+ qemu_fwcfg.c \
rfb.c \
rtc.c \
smbiostbl.c \
diff --git a/usr.sbin/bhyve/acpi.h b/usr.sbin/bhyve/acpi.h
--- a/usr.sbin/bhyve/acpi.h
+++ b/usr.sbin/bhyve/acpi.h
@@ -31,6 +31,8 @@
#ifndef _ACPI_H_
#define _ACPI_H_
+#include "acpi_device.h"
+
#define SCI_INT 9
#define SMI_CMD 0xb2
@@ -55,6 +57,7 @@
int acpi_build(struct vmctx *ctx, int ncpu);
void acpi_raise_gpe(struct vmctx *ctx, unsigned bit);
+int acpi_tables_add_device(const struct acpi_device *const dev);
void dsdt_line(const char *fmt, ...);
void dsdt_fixed_ioport(uint16_t iobase, uint16_t length);
void dsdt_fixed_irq(uint8_t irq);
diff --git a/usr.sbin/bhyve/acpi.c b/usr.sbin/bhyve/acpi.c
--- a/usr.sbin/bhyve/acpi.c
+++ b/usr.sbin/bhyve/acpi.c
@@ -139,6 +139,30 @@
#define EFFLUSH(x) \
if (fflush(x) != 0) goto err_exit;
+/*
+ * A list for additional ACPI devices like a TPM.
+ */
+struct acpi_device_list_entry {
+ SLIST_ENTRY(acpi_device_list_entry) chain;
+ const struct acpi_device *dev;
+};
+SLIST_HEAD(acpi_device_list,
+ acpi_device_list_entry) acpi_devices = SLIST_HEAD_INITIALIZER(acpi_devices);
+
+int
+acpi_tables_add_device(const struct acpi_device *const dev)
+{
+ struct acpi_device_list_entry *const entry = calloc(1, sizeof(*entry));
+ if (entry == NULL) {
+ return (ENOMEM);
+ }
+
+ entry->dev = dev;
+ SLIST_INSERT_HEAD(&acpi_devices, entry, chain);
+
+ return (0);
+}
+
static int
basl_fwrite_rsdp(FILE *fp)
{
@@ -760,6 +784,11 @@
vmgenc_write_dsdt();
+ const struct acpi_device_list_entry *entry;
+ SLIST_FOREACH(entry, &acpi_devices, chain) {
+ acpi_device_write_dsdt(entry->dev);
+ }
+
dsdt_line("}");
if (dsdt_error != 0)
diff --git a/usr.sbin/bhyve/acpi_device.h b/usr.sbin/bhyve/acpi_device.h
new file mode 100644
--- /dev/null
+++ b/usr.sbin/bhyve/acpi_device.h
@@ -0,0 +1,42 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2021 Beckhoff Automation GmbH & Co. KG
+ * Author: Corvin Köhne <c.koehne@beckhoff.com>
+ */
+
+#pragma once
+
+#include <contrib/dev/acpica/include/acpi.h>
+
+struct vmctx;
+
+struct acpi_device;
+
+/**
+ * Creates an ACPI device.
+ *
+ * @param[out] new_dev Returns the newly create ACPI device.
+ * @param[in] vm_ctx VM context the ACPI device is created in.
+ * @param[in] name Name of the ACPI device. Should always be a NULL
+ * terminated string.
+ * @param[in] hid Hardware ID of the ACPI device. Should always be a NULL
+ * terminated string.
+ */
+int acpi_device_create(struct acpi_device **const new_dev,
+ struct vmctx *const vm_ctx, const char *const name, const char *const hid);
+void acpi_device_destroy(struct acpi_device *const dev);
+
+/**
+ * @note: acpi_device_add_res_acpi_buffer doesn't ensure that no resources are
+ * added on an error condition. On error the caller should assume that
+ * the ACPI_BUFFER is partially added to the ACPI device.
+ */
+int acpi_device_add_res_acpi_buffer(struct acpi_device *const dev,
+ const ACPI_BUFFER resources);
+int acpi_device_add_res_fixed_ioport(struct acpi_device *const dev,
+ const UINT16 port, UINT8 length);
+int acpi_device_add_res_fixed_memory32(struct acpi_device *const dev,
+ const UINT8 write_protected, const UINT32 address, const UINT32 length);
+
+void acpi_device_write_dsdt(const struct acpi_device *const dev);
diff --git a/usr.sbin/bhyve/acpi_device.c b/usr.sbin/bhyve/acpi_device.c
new file mode 100644
--- /dev/null
+++ b/usr.sbin/bhyve/acpi_device.c
@@ -0,0 +1,240 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2021 Beckhoff Automation GmbH & Co. KG
+ * Author: Corvin Köhne <c.koehne@beckhoff.com>
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/queue.h>
+
+#include <machine/vmm.h>
+
+#include <err.h>
+#include <errno.h>
+#include <vmmapi.h>
+
+#include "acpi.h"
+#include "acpi_device.h"
+
+/**
+ * List entry to enumerate all resources used by an ACPI device.
+ *
+ * @param chain Used to chain multiple elements together.
+ * @param type Type of the ACPI resource.
+ * @param data Data of the ACPI resource.
+ */
+struct acpi_resource_list_entry {
+ SLIST_ENTRY(acpi_resource_list_entry) chain;
+ UINT32 type;
+ ACPI_RESOURCE_DATA data;
+};
+
+/**
+ * Holds information about an ACPI device.
+ *
+ * @param vm_ctx VM context the ACPI device was created in.
+ * @param name Name of the ACPI device.
+ * @param hid Hardware ID of the ACPI device.
+ * @param crs Current resources used by the ACPI device.
+ */
+struct acpi_device {
+ struct vmctx *vm_ctx;
+ const char *name;
+ const char *hid;
+ SLIST_HEAD(acpi_resource_list, acpi_resource_list_entry) crs;
+};
+
+int
+acpi_device_create(struct acpi_device **const new_dev,
+ struct vmctx *const vm_ctx, const char *const name, const char *const hid)
+{
+ if (new_dev == NULL || vm_ctx == NULL || name == NULL || hid == NULL) {
+ return (EINVAL);
+ }
+
+ struct acpi_device *const dev = calloc(1, sizeof(*dev));
+ if (dev == NULL) {
+ return (ENOMEM);
+ }
+
+ dev->vm_ctx = vm_ctx;
+ dev->name = name;
+ dev->hid = hid;
+ SLIST_INIT(&dev->crs);
+
+ /* current resources always contain an end tag */
+ struct acpi_resource_list_entry *const crs_end_tag = calloc(1,
+ sizeof(*crs_end_tag));
+ if (crs_end_tag == NULL) {
+ acpi_device_destroy(dev);
+ return (ENOMEM);
+ }
+ crs_end_tag->type = ACPI_RESOURCE_TYPE_END_TAG;
+ SLIST_INSERT_HEAD(&dev->crs, crs_end_tag, chain);
+
+ const int error = acpi_tables_add_device(dev);
+ if (error) {
+ acpi_device_destroy(dev);
+ return (error);
+ }
+
+ *new_dev = dev;
+
+ return (0);
+}
+
+void
+acpi_device_destroy(struct acpi_device *const dev)
+{
+ if (dev == NULL) {
+ return;
+ }
+
+ struct acpi_resource_list_entry *res;
+ while (!SLIST_EMPTY(&dev->crs)) {
+ res = SLIST_FIRST(&dev->crs);
+ SLIST_REMOVE_HEAD(&dev->crs, chain);
+ free(res);
+ }
+}
+
+int
+acpi_device_add_res_acpi_buffer(struct acpi_device *const dev,
+ const ACPI_BUFFER resources)
+{
+ if (dev == NULL) {
+ return (EINVAL);
+ }
+
+ int error = 0;
+ size_t offset = 0;
+ while (offset < resources.Length) {
+ const ACPI_RESOURCE *const res =
+ (const ACPI_RESOURCE *)((UINT8 *)resources.Pointer +
+ offset);
+ switch (res->Type) {
+ case ACPI_RESOURCE_TYPE_FIXED_IO:
+ error = acpi_device_add_res_fixed_ioport(dev,
+ res->Data.FixedIo.Address,
+ res->Data.FixedIo.AddressLength);
+ break;
+ case ACPI_RESOURCE_TYPE_FIXED_MEMORY32:
+ error = acpi_device_add_res_fixed_memory32(dev,
+ res->Data.FixedMemory32.WriteProtect,
+ res->Data.FixedMemory32.Address,
+ res->Data.FixedMemory32.AddressLength);
+ break;
+ case ACPI_RESOURCE_TYPE_END_TAG:
+ break;
+ default:
+ warnx("%s: unknown resource type %d", __func__,
+ res->Type);
+ return (ENODEV);
+ }
+ if (error) {
+ break;
+ }
+ offset += res->Length;
+ }
+
+ return (error);
+}
+
+int
+acpi_device_add_res_fixed_ioport(struct acpi_device *const dev,
+ const UINT16 port, const UINT8 length)
+{
+ if (dev == NULL) {
+ return (EINVAL);
+ }
+
+ struct acpi_resource_list_entry *const res = calloc(1, sizeof(*res));
+ if (res == NULL) {
+ return (ENOMEM);
+ }
+
+ res->type = ACPI_RESOURCE_TYPE_FIXED_IO;
+ res->data.FixedIo.Address = port;
+ res->data.FixedIo.AddressLength = length;
+
+ SLIST_INSERT_HEAD(&dev->crs, res, chain);
+
+ return (0);
+}
+
+int
+acpi_device_add_res_fixed_memory32(struct acpi_device *const dev,
+ const UINT8 write_protected, const UINT32 address, const UINT32 length)
+{
+ if (dev == NULL) {
+ return (EINVAL);
+ }
+
+ struct acpi_resource_list_entry *const res = calloc(1, sizeof(*res));
+ if (res == NULL) {
+ return (ENOMEM);
+ }
+
+ res->type = ACPI_RESOURCE_TYPE_FIXED_MEMORY32;
+ res->data.FixedMemory32.WriteProtect = write_protected;
+ res->data.FixedMemory32.Address = address;
+ res->data.FixedMemory32.AddressLength = length;
+
+ SLIST_INSERT_HEAD(&dev->crs, res, chain);
+
+ return (0);
+}
+
+static void
+acpi_device_write_dsdt_crs(const struct acpi_device *const dev)
+{
+ const struct acpi_resource_list_entry *res;
+ SLIST_FOREACH (res, &dev->crs, chain) {
+ switch (res->type) {
+ case ACPI_RESOURCE_TYPE_FIXED_IO:
+ dsdt_fixed_ioport(res->data.FixedIo.Address,
+ res->data.FixedIo.AddressLength);
+ break;
+ case ACPI_RESOURCE_TYPE_FIXED_MEMORY32: {
+ dsdt_fixed_mem32(res->data.FixedMemory32.Address,
+ res->data.FixedMemory32.AddressLength);
+ break;
+ }
+ case ACPI_RESOURCE_TYPE_END_TAG:
+ break;
+ default:
+ warnx("%s: unknown resource type %d", __func__,
+ res->type);
+ return;
+ }
+ }
+}
+
+void
+acpi_device_write_dsdt(const struct acpi_device *const dev)
+{
+ if (dev == NULL) {
+ return;
+ }
+
+ dsdt_line("");
+ dsdt_line(" Scope (\\_SB)");
+ dsdt_line(" {");
+ dsdt_line(" Device (%s)", dev->name);
+ dsdt_line(" {");
+ dsdt_line(" Name (_HID, \"%s\")", dev->hid);
+ dsdt_line(" Name (_STA, 0x0F)");
+ dsdt_line(" Name (_CRS, ResourceTemplate ()");
+ dsdt_line(" {");
+ dsdt_indent(4);
+ acpi_device_write_dsdt_crs(dev);
+ dsdt_unindent(4);
+ dsdt_line(" })");
+ dsdt_line(" }");
+ dsdt_line(" }");
+}
diff --git a/usr.sbin/bhyve/bhyve.8 b/usr.sbin/bhyve/bhyve.8
--- a/usr.sbin/bhyve/bhyve.8
+++ b/usr.sbin/bhyve/bhyve.8
@@ -45,6 +45,15 @@
.Op Cm ,threads= Ar n
.Oc
.Sm on
+.Oo Fl f
+.Sm off
+.Ar name Cm \&,
+.Oo
+.Cm string No | Cm file
+.Oc
+.Cm \&= Ar data
+.Sm on
+.Oc
.Oo
.Sm off
.Fl G\~
@@ -144,6 +153,16 @@
.Nm
to exit when a guest issues an access to an I/O port that is not emulated.
This is intended for debug purposes.
+.It Fl f Ar name Ns Cm \&, Ns Oo Cm string Ns No | Ns Cm file Ns Oc Ns Cm \&= Ns Ar data
+Add a fw_cfg file
+.Ar name
+to the fw_cfg interface.
+If a
+.Cm string
+is specified, the fw_cfg file contains the string as data.
+If a
+.Cm file
+is specified, bhyve reads the file and adds the file content as fw_cfg data.
.It Fl G Xo
.Sm off
.Oo Ar w Oc
@@ -515,6 +534,11 @@
and
.Ar function
numbers.
+.It Li rom= Ns Ar romfile
+Add
+.Ar romfile
+as option ROM to the PCI device.
+The ROM will be loaded by firmware and should be capable of initializing the device.
.El
.Pp
Guest memory must be wired using the
diff --git a/usr.sbin/bhyve/bhyverun.c b/usr.sbin/bhyve/bhyverun.c
--- a/usr.sbin/bhyve/bhyverun.c
+++ b/usr.sbin/bhyve/bhyverun.c
@@ -90,6 +90,7 @@
#include "config.h"
#include "inout.h"
#include "debug.h"
+#include "e820.h"
#include "fwctl.h"
#include "gdb.h"
#include "ioapic.h"
@@ -100,6 +101,7 @@
#include "pci_emul.h"
#include "pci_irq.h"
#include "pci_lpc.h"
+#include "qemu_fwcfg.h"
#include "smbiostbl.h"
#ifdef BHYVE_SNAPSHOT
#include "snapshot.h"
@@ -1249,9 +1251,9 @@
progname = basename(argv[0]);
#ifdef BHYVE_SNAPSHOT
- optstr = "aehuwxACDHIPSWYk:o:p:G:c:s:m:l:U:r:";
+ optstr = "aehuwxACDHIPSWYk:f:o:p:G:c:s:m:l:U:r:";
#else
- optstr = "aehuwxACDHIPSWYk:o:p:G:c:s:m:l:U:";
+ optstr = "aehuwxACDHIPSWYk:f:o:p:G:c:s:m:l:U:";
#endif
while ((c = getopt(argc, argv, optstr)) != -1) {
switch (c) {
@@ -1279,6 +1281,11 @@
case 'C':
set_config_bool("memory.guest_in_core", true);
break;
+ case 'f':
+ if (qemu_fwcfg_parse_cmdline_arg(optarg) != 0) {
+ exit(1);
+ }
+ break;
case 'G':
parse_gdb_options(optarg);
break;
@@ -1452,6 +1459,61 @@
rtc_init(ctx);
sci_init(ctx);
+ const char *fwcfg = lpc_fwcfg();
+ if (lpc_bootrom()) {
+ if (fwcfg == NULL || strcmp(fwcfg, "bhyve") == 0) {
+ if (fwctl_init() != 0) {
+ fprintf(stderr,
+ "bhyve fwctl initialization error");
+ exit(4);
+ }
+ } else if (strcmp(fwcfg, "qemu") == 0) {
+ if (qemu_fwcfg_init(ctx) != 0) {
+ fprintf(stderr,
+ "qemu fwcfg initialization error");
+ exit(4);
+ }
+ /*
+ * QEMU uses fwcfg item 0x0f (FW_CFG_MAX_CPUS) to report
+ * the number of cpus to the guest but states that it
+ * has a special meaning for x86. Don't know yet if that
+ * can cause unintented side-effects. Use an own fwcfg
+ * item to be safe.
+ *
+ * QEMU comment:
+ * FW_CFG_MAX_CPUS is a bit confusing/problematic
+ * on x86:
+ *
+ * For machine types prior to 1.8, SeaBIOS needs
+ * FW_CFG_MAX_CPUS for building MPTable, ACPI MADT,
+ * ACPI CPU hotplug and ACPI SRAT table, that
+ * tables are based on xAPIC ID and QEMU<->SeaBIOS
+ * interface for CPU hotplug also uses APIC ID and
+ * not "CPU index". This means that FW_CFG_MAX_CPUS
+ * is not the "maximum number of CPUs", but the
+ * "limit to the APIC ID values SeaBIOS may see".
+ *
+ * So for compatibility reasons with old BIOSes we
+ * are stuck with "etc/max-cpus" actually being
+ * apic_id_limit
+ */
+ if (qemu_fwcfg_add_file("opt/bhyve/hw.ncpu",
+ sizeof(guest_ncpus), &guest_ncpus) != 0) {
+ fprintf(stderr,
+ "Could not add qemu fwcfg opt/bhyve/hw.ncpu");
+ exit(4);
+ }
+
+ if (e820_init(ctx) != 0) {
+ fprintf(stderr, "Unable to setup E820");
+ exit(4);
+ }
+ } else {
+ fprintf(stderr, "Invalid fwcfg %s", fwcfg);
+ exit(4);
+ }
+ }
+
/*
* Exit if a device emulation finds an error in its initilization
*/
@@ -1535,8 +1597,20 @@
assert(error == 0);
}
- if (lpc_bootrom())
- fwctl_init();
+ if (strcmp(fwcfg, "qemu") == 0) {
+ struct qemu_fwcfg_item *const e820_fwcfg_item =
+ e820_get_fwcfg_item();
+ if (e820_fwcfg_item == NULL) {
+ fprintf(stderr, "invalid e820 table");
+ exit(4);
+ }
+ if (qemu_fwcfg_add_file("etc/e820", e820_fwcfg_item->size,
+ e820_fwcfg_item->data) != 0) {
+ fprintf(stderr, "could not add qemu fwcfg etc/e820");
+ exit(4);
+ }
+ free(e820_fwcfg_item);
+ }
/*
* Change the proc title to include the VM name.
diff --git a/usr.sbin/bhyve/e820.h b/usr.sbin/bhyve/e820.h
new file mode 100644
--- /dev/null
+++ b/usr.sbin/bhyve/e820.h
@@ -0,0 +1,49 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2021 Beckhoff Automation GmbH & Co. KG
+ * Author: Corvin Köhne <c.koehne@beckhoff.com>
+ */
+
+#pragma once
+
+#include <vmmapi.h>
+
+#include "qemu_fwcfg.h"
+
+#pragma pack(push, 1)
+
+enum e820_memory_type {
+ E820_TYPE_MEMORY = 1,
+ E820_TYPE_RESERVED = 2,
+ E820_TYPE_ACPI = 3,
+ E820_TYPE_NVS = 4
+};
+
+enum e820_allocation_strategy {
+ /* allocate any address */
+ E820_ALLOCATE_ANY,
+ /* allocate lowest address larger than address */
+ E820_ALLOCATE_LOWEST,
+ /* allocate highest address lower than address */
+ E820_ALLOCATE_HIGHEST,
+ /* allocate a specific address */
+ E820_ALLOCATE_SPECIFIC
+};
+
+struct e820_entry {
+ uint64_t base;
+ uint64_t length;
+ enum e820_memory_type type;
+};
+
+#pragma pack(pop)
+
+#define E820_ALIGNMENT_NONE 1
+
+uint64_t e820_alloc(const uint64_t address, const uint64_t length,
+ const uint64_t alignment, const enum e820_memory_type type,
+ const enum e820_allocation_strategy strategy);
+void e820_dump_table();
+struct qemu_fwcfg_item *e820_get_fwcfg_item();
+int e820_init(struct vmctx *const ctx);
diff --git a/usr.sbin/bhyve/e820.c b/usr.sbin/bhyve/e820.c
new file mode 100644
--- /dev/null
+++ b/usr.sbin/bhyve/e820.c
@@ -0,0 +1,452 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2021 Beckhoff Automation GmbH & Co. KG
+ * Author: Corvin Köhne <c.koehne@beckhoff.com>
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/queue.h>
+
+#include <machine/vmm.h>
+
+#include <assert.h>
+#include <err.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "e820.h"
+#include "qemu_fwcfg.h"
+
+/*
+ * E820 always uses 64 bit entries. Emulation code will use vm_paddr_t since it
+ * works on physical addresses. If vm_paddr_t is larger than uint64_t E820 can't
+ * hold all possible physical addresses and we can get into trouble.
+ */
+static_assert(sizeof(vm_paddr_t) <= sizeof(uint64_t),
+ "Unable to represent physical memory by E820 table");
+
+#define E820_FWCFG_FILE_NAME "etc/e820"
+
+#define KB (1024UL)
+#define MB (1024 * KB)
+#define GB (1024 * MB)
+
+/*
+ * Fix E820 memory holes:
+ * [ A0000, C0000) VGA
+ * [ C0000, 100000) ROM
+ */
+#define E820_VGA_MEM_BASE 0xA0000
+#define E820_VGA_MEM_END 0xC0000
+#define E820_ROM_MEM_BASE 0xC0000
+#define E820_ROM_MEM_END 0x100000
+
+struct e820_element {
+ TAILQ_ENTRY(e820_element) chain;
+ uint64_t base;
+ uint64_t end;
+ enum e820_memory_type type;
+};
+TAILQ_HEAD(e820_table, e820_element) e820_table = TAILQ_HEAD_INITIALIZER(
+ e820_table);
+
+static const char *
+e820_get_type_name(const enum e820_memory_type type)
+{
+ switch (type) {
+ case E820_TYPE_MEMORY:
+ return "RAM ";
+ case E820_TYPE_RESERVED:
+ return "Reserved";
+ case E820_TYPE_ACPI:
+ return "ACPI ";
+ case E820_TYPE_NVS:
+ return "NVS ";
+ default:
+ return "Unknown ";
+ }
+}
+
+void
+e820_dump_table()
+{
+ fprintf(stderr, "E820 map:\n\r");
+ uint64_t i = 0;
+ struct e820_element *element;
+ TAILQ_FOREACH (element, &e820_table, chain) {
+ fprintf(stderr, " (%4lu) [ %16lx, %16lx] %s\n\r", i,
+ element->base, element->end,
+ e820_get_type_name(element->type));
+ ++i;
+ }
+}
+
+struct qemu_fwcfg_item *
+e820_get_fwcfg_item()
+{
+ uint64_t count = 0;
+ struct e820_element *element;
+ TAILQ_FOREACH (element, &e820_table, chain) {
+ ++count;
+ }
+ if (count == 0) {
+ warnx("%s: E820 table empty", __func__);
+ return (NULL);
+ }
+
+ struct qemu_fwcfg_item *const fwcfg_item = malloc(
+ sizeof(struct qemu_fwcfg_item));
+ if (fwcfg_item == NULL) {
+ return (NULL);
+ }
+ fwcfg_item->size = count * sizeof(struct e820_entry);
+ fwcfg_item->data = malloc(fwcfg_item->size);
+ if (fwcfg_item->data == NULL) {
+ free(fwcfg_item);
+ return (NULL);
+ }
+ uint64_t i = 0;
+ struct e820_entry *entries = (struct e820_entry *)fwcfg_item->data;
+ TAILQ_FOREACH (element, &e820_table, chain) {
+ struct e820_entry *entry = &entries[i];
+ entry->base = element->base;
+ entry->length = element->end - element->base;
+ entry->type = element->type;
+ ++i;
+ }
+
+ return fwcfg_item;
+}
+
+int
+e820_add_entry(const uint64_t base, const uint64_t end,
+ const enum e820_memory_type type)
+{
+ if (end < base) {
+ return (-1);
+ }
+
+ struct e820_element *const new_element = malloc(
+ sizeof(struct e820_element));
+ if (new_element == NULL) {
+ return (-ENOMEM);
+ }
+
+ new_element->base = base;
+ new_element->end = end;
+ new_element->type = type;
+
+ /*
+ * E820 table should be always sorted in ascending order. Therefore,
+ * search for an element which end is larger than the base parameter.
+ */
+ struct e820_element *element;
+ TAILQ_FOREACH (element, &e820_table, chain) {
+ if (element->end > base) {
+ break;
+ }
+ }
+
+ /*
+ * System memory requires special handling.
+ */
+ if (type == E820_TYPE_MEMORY) {
+ /*
+ * base is larger than of any existing element. Add new system
+ * memory at the end of the table.
+ */
+ if (element == NULL) {
+ TAILQ_INSERT_TAIL(&e820_table, new_element, chain);
+ return (0);
+ }
+
+ /*
+ * System memory shouldn't overlap with any existing element.
+ */
+ if (end > element->base) {
+ return (-1);
+ }
+ TAILQ_INSERT_BEFORE(element, new_element, chain);
+ return (0);
+ }
+
+ if (element == NULL) {
+ /* No suitable element found */
+ return (-1);
+ }
+
+ /*
+ * Non system memory should be allocated inside system memory.
+ */
+ if (element->type != E820_TYPE_MEMORY) {
+ return (-1);
+ }
+ /*
+ * New element should fit into existing system memory element.
+ */
+ if (base < element->base || end > element->end) {
+ return (-1);
+ }
+
+ if (base == element->base) {
+ /*
+ * New element at system memory base boundary. Add new
+ * element before current and adjust the base of the old
+ * element.
+ *
+ * Old table:
+ * [ 0x1000, 0x4000] RAM <-- element
+ * New table:
+ * [ 0x1000, 0x2000] Reserved
+ * [ 0x2000, 0x4000] RAM <-- element
+ */
+ TAILQ_INSERT_BEFORE(element, new_element, chain);
+ element->base = end;
+ } else if (end == element->end) {
+ /*
+ * New element at system memory end boundary. Add new
+ * element after current and adjust the end of the
+ * current element.
+ *
+ * Old table:
+ * [ 0x1000, 0x4000] RAM <-- element
+ * New table:
+ * [ 0x1000, 0x3000] RAM <-- element
+ * [ 0x3000, 0x4000] Reserved
+ */
+ TAILQ_INSERT_AFTER(&e820_table, element, new_element, chain);
+ element->end = base;
+ } else {
+ /*
+ * New element inside system memory entry. Split it by
+ * adding a system memory element and the new element
+ * before current.
+ *
+ * Old table:
+ * [ 0x1000, 0x4000] RAM <-- element
+ * New table:
+ * [ 0x1000, 0x2000] RAM
+ * [ 0x2000, 0x3000] Reserved
+ * [ 0x3000, 0x4000] RAM <-- element
+ */
+ struct e820_element *ram_element = malloc(
+ sizeof(struct e820_element));
+ if (ram_element == NULL) {
+ return (-ENOMEM);
+ }
+ ram_element->base = element->base;
+ ram_element->end = base;
+ ram_element->type = E820_TYPE_MEMORY;
+ TAILQ_INSERT_BEFORE(element, ram_element, chain);
+ TAILQ_INSERT_BEFORE(element, new_element, chain);
+ element->base = end;
+ }
+
+ return (0);
+}
+
+static int
+e820_add_memory_hole(const uint64_t base, const uint64_t end)
+{
+ if (end < base) {
+ return (-1);
+ }
+
+ /*
+ * E820 table should be always sorted in ascending order. Therefore,
+ * search for an element which end is larger than the base parameter.
+ */
+ struct e820_element *element;
+ TAILQ_FOREACH (element, &e820_table, chain) {
+ if (element->end > base) {
+ break;
+ }
+ }
+
+ if (element == NULL || end <= element->base) {
+ /* Nothing to do. Hole already exists */
+ return (0);
+ }
+
+ if (element->type != E820_TYPE_MEMORY) {
+ /* Memory holes are only allowed in system memory */
+ return (-1);
+ }
+
+ if (base == element->base) {
+ /*
+ * New hole at system memory base boundary.
+ *
+ * Old table:
+ * [ 0x1000, 0x4000] RAM
+ * New table:
+ * [ 0x2000, 0x4000] RAM
+ */
+ element->base = end;
+
+ } else if (end == element->end) {
+ /*
+ * New hole at system memory end boundary.
+ *
+ * Old table:
+ * [ 0x1000, 0x4000] RAM
+ * New table:
+ * [ 0x1000, 0x3000] RAM
+ */
+ element->end = base;
+
+ } else {
+ /*
+ * New hole inside system memory entry. Split the system memory.
+ *
+ * Old table:
+ * [ 0x1000, 0x4000] RAM <-- element
+ * New table:
+ * [ 0x1000, 0x2000] RAM
+ * [ 0x3000, 0x4000] RAM <-- element
+ */
+ struct e820_element *const ram_element = malloc(
+ sizeof(struct e820_element));
+ if (ram_element == NULL) {
+ return (-ENOMEM);
+ }
+ ram_element->base = element->base;
+ ram_element->end = base;
+ ram_element->type = E820_TYPE_MEMORY;
+ TAILQ_INSERT_BEFORE(element, ram_element, chain);
+ element->base = end;
+ }
+
+ return (0);
+}
+
+static uint64_t
+e820_alloc_highest(const uint64_t max_address, const uint64_t length,
+ const uint64_t alignment, const enum e820_memory_type type)
+{
+ struct e820_element *element;
+ TAILQ_FOREACH_REVERSE (element, &e820_table, e820_table, chain) {
+ const uint64_t end = MIN(max_address, element->end);
+ const uint64_t base = roundup2(element->base, alignment);
+
+ if (element->type != E820_TYPE_MEMORY || end < base ||
+ end - base < length || end - length == 0) {
+ continue;
+ }
+
+ const uint64_t address = rounddown2(end - length, alignment);
+
+ if (e820_add_entry(address, address + length, type) != 0) {
+ return 0;
+ }
+
+ return address;
+ }
+
+ return 0;
+}
+
+static uint64_t
+e820_alloc_lowest(const uint64_t min_address, const uint64_t length,
+ const uint64_t alignment, const enum e820_memory_type type)
+{
+ struct e820_element *element;
+ TAILQ_FOREACH (element, &e820_table, chain) {
+ const uint64_t end = element->end;
+ const uint64_t base = MAX(min_address,
+ roundup2(element->base, alignment));
+
+ if (element->type != E820_TYPE_MEMORY || end < base ||
+ end - base < length || base == 0) {
+ continue;
+ }
+
+ if (e820_add_entry(base, base + length, type) != 0) {
+ return 0;
+ }
+
+ return base;
+ }
+
+ return 0;
+}
+
+uint64_t
+e820_alloc(const uint64_t address, const uint64_t length,
+ const uint64_t alignment, const enum e820_memory_type type,
+ const enum e820_allocation_strategy strategy)
+{
+ /* address should be aligned */
+ if (!powerof2(alignment) || (address & (alignment - 1)) != 0) {
+ return 0;
+ }
+
+ switch (strategy) {
+ case E820_ALLOCATE_ANY:
+ /*
+ * Allocate any address. Therefore, ignore the address parameter
+ * and reuse the code path for allocating the lowest address.
+ */
+ return e820_alloc_lowest(0, length, alignment, type);
+ case E820_ALLOCATE_LOWEST:
+ return e820_alloc_lowest(address, length, alignment, type);
+ case E820_ALLOCATE_HIGHEST:
+ return e820_alloc_highest(address, length, alignment, type);
+ case E820_ALLOCATE_SPECIFIC:
+ if (e820_add_entry(address, address + length, type) != 0) {
+ return 0;
+ }
+
+ return address;
+ }
+
+ return 0;
+}
+
+int
+e820_init(struct vmctx *const ctx)
+{
+ int error;
+
+ TAILQ_INIT(&e820_table);
+
+ /* add memory below 4 GB to E820 table */
+ const uint64_t lowmem_length = vm_get_lowmem_size(ctx);
+ error = e820_add_entry(0, lowmem_length, E820_TYPE_MEMORY);
+ if (error) {
+ warnx("%s: Could not add lowmem", __func__);
+ return (error);
+ }
+
+ /* add memory above 4 GB to E820 table */
+ const uint64_t highmem_length = vm_get_highmem_size(ctx);
+ if (highmem_length != 0) {
+ error = e820_add_entry(4 * GB, 4 * GB + highmem_length,
+ E820_TYPE_MEMORY);
+ if (error) {
+ warnx("%s: Could not add highmem", __func__);
+ return (error);
+ }
+ }
+
+ /* add memory holes to E820 table */
+ error = e820_add_memory_hole(E820_VGA_MEM_BASE, E820_VGA_MEM_END);
+ if (error) {
+ warnx("%s: Could not add VGA memory", __func__);
+ return (error);
+ }
+
+ error = e820_add_memory_hole(E820_ROM_MEM_BASE, E820_ROM_MEM_END);
+ if (error) {
+ warnx("%s: Could not add ROM area", __func__);
+ return (error);
+ }
+
+ return (0);
+}
diff --git a/usr.sbin/bhyve/fwctl.h b/usr.sbin/bhyve/fwctl.h
--- a/usr.sbin/bhyve/fwctl.h
+++ b/usr.sbin/bhyve/fwctl.h
@@ -51,6 +51,6 @@
}; \
DATA_SET(ctl_set, __CONCAT(__ctl, __LINE__))
-void fwctl_init(void);
+int fwctl_init(void);
#endif /* _FWCTL_H_ */
diff --git a/usr.sbin/bhyve/fwctl.c b/usr.sbin/bhyve/fwctl.c
--- a/usr.sbin/bhyve/fwctl.c
+++ b/usr.sbin/bhyve/fwctl.c
@@ -472,16 +472,9 @@
static void
fwctl_outw(uint16_t val)
{
- switch (be_state) {
- case IDENT_WAIT:
- if (val == 0) {
- be_state = IDENT_SEND;
- ident_idx = 0;
- }
- break;
- default:
- /* ignore */
- break;
+ if (val == 0) {
+ be_state = IDENT_SEND;
+ ident_idx = 0;
}
}
@@ -538,15 +531,39 @@
return (0);
}
-INOUT_PORT(fwctl_wreg, FWCTL_OUT, IOPORT_F_INOUT, fwctl_handler);
-INOUT_PORT(fwctl_rreg, FWCTL_IN, IOPORT_F_IN, fwctl_handler);
-void
+int
fwctl_init(void)
{
+ struct inout_port iop;
+ int error;
+
+ bzero(&iop, sizeof(iop));
+ iop.name = "fwctl_wreg";
+ iop.port = FWCTL_OUT;
+ iop.size = 1;
+ iop.flags = IOPORT_F_INOUT;
+ iop.handler = fwctl_handler;
+
+ if ((error = register_inout(&iop)) != 0) {
+ return (error);
+ }
+
+ bzero(&iop, sizeof(iop));
+ iop.name = "fwctl_rreg";
+ iop.port = FWCTL_IN;
+ iop.size = 1;
+ iop.flags = IOPORT_F_IN;
+ iop.handler = fwctl_handler;
+
+ if ((error = register_inout(&iop)) != 0) {
+ return (error);
+ }
ops[OP_GET_LEN] = &fgetlen_info;
ops[OP_GET] = &fgetval_info;
be_state = IDENT_WAIT;
+
+ return (0);
}
diff --git a/usr.sbin/bhyve/pci_emul.h b/usr.sbin/bhyve/pci_emul.h
--- a/usr.sbin/bhyve/pci_emul.h
+++ b/usr.sbin/bhyve/pci_emul.h
@@ -42,6 +42,8 @@
#include <assert.h>
#define PCI_BARMAX PCIR_MAX_BAR_0 /* BAR registers in a Type 0 header */
+#define PCI_BARMAX_WITH_ROM (PCI_BARMAX + 1)
+#define PCI_ROM_IDX (PCI_BARMAX + 1)
struct vmctx;
struct pci_devinst;
@@ -92,7 +94,8 @@
PCIBAR_IO,
PCIBAR_MEM32,
PCIBAR_MEM64,
- PCIBAR_MEMHI64
+ PCIBAR_MEMHI64,
+ PCIBAR_ROM,
};
struct pcibar {
@@ -165,7 +168,9 @@
void *pi_arg; /* devemu-private data */
u_char pi_cfgdata[PCI_REGMAX + 1];
- struct pcibar pi_bar[PCI_BARMAX + 1];
+ /* ROM is handled like a BAR */
+ struct pcibar pi_bar[PCI_BARMAX_WITH_ROM + 1];
+ uint64_t pi_romoffset;
};
struct msicap {
@@ -229,6 +234,8 @@
void pci_callback(void);
int pci_emul_alloc_bar(struct pci_devinst *pdi, int idx,
enum pcibar_type type, uint64_t size);
+int pci_emul_alloc_rom(struct pci_devinst *const pdi, const uint64_t size,
+ void **const addr);
int pci_emul_add_msicap(struct pci_devinst *pi, int msgnum);
int pci_emul_add_pciecap(struct pci_devinst *pi, int pcie_device_type);
void pci_emul_capwrite(struct pci_devinst *pi, int offset, int bytes,
diff --git a/usr.sbin/bhyve/pci_emul.c b/usr.sbin/bhyve/pci_emul.c
--- a/usr.sbin/bhyve/pci_emul.c
+++ b/usr.sbin/bhyve/pci_emul.c
@@ -33,10 +33,12 @@
#include <sys/param.h>
#include <sys/linker_set.h>
+#include <sys/mman.h>
#include <ctype.h>
#include <err.h>
#include <errno.h>
+#include <err.h>
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
@@ -101,13 +103,28 @@
SET_DECLARE(pci_devemu_set, struct pci_devemu);
static uint64_t pci_emul_iobase;
+static uint8_t *pci_emul_rombase;
+static uint64_t pci_emul_romoffset;
+static uint8_t *pci_emul_romlim;
static uint64_t pci_emul_membase32;
static uint64_t pci_emul_membase64;
static uint64_t pci_emul_memlim64;
+struct pci_bar_allocation {
+ TAILQ_ENTRY(pci_bar_allocation) chain;
+ struct pci_devinst *pdi;
+ int idx;
+ enum pcibar_type type;
+ uint64_t size;
+};
+TAILQ_HEAD(pci_bar_list, pci_bar_allocation) pci_bars = TAILQ_HEAD_INITIALIZER(
+ pci_bars);
+
#define PCI_EMUL_IOBASE 0x2000
#define PCI_EMUL_IOLIMIT 0x10000
+#define PCI_EMUL_ROMSIZE 0x10000000
+
#define PCI_EMUL_ECFG_BASE 0xE0000000 /* 3.5GB */
#define PCI_EMUL_ECFG_SIZE (MAXBUSES * 1024 * 1024) /* 1MB per bus */
SYSRES_MEM(PCI_EMUL_ECFG_BASE, PCI_EMUL_ECFG_SIZE);
@@ -552,6 +569,12 @@
(*pe->pe_baraddr)(pi->pi_vmctx, pi, idx, registration,
pi->pi_bar[idx].addr);
break;
+ case PCIBAR_ROM:
+ error = 0;
+ if (pe->pe_baraddr != NULL)
+ (*pe->pe_baraddr)(pi->pi_vmctx, pi, idx, registration,
+ pi->pi_bar[idx].addr);
+ break;
default:
error = EINVAL;
break;
@@ -573,6 +596,14 @@
modify_bar_registration(pi, idx, 1);
}
+/* Is the ROM enabled for the emulated pci device? */
+static int
+romen(struct pci_devinst *pi)
+{
+ return (pi->pi_bar[PCI_ROM_IDX].lobits & PCIM_BIOS_ENABLE) ==
+ PCIM_BIOS_ENABLE;
+}
+
/* Are we decoding i/o port accesses for the emulated pci device? */
static int
porten(struct pci_devinst *pi)
@@ -639,11 +670,11 @@
pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, enum pcibar_type type,
uint64_t size)
{
- int error;
- uint64_t *baseptr, limit, addr, mask, lobits, bar;
- uint16_t cmd, enbit;
-
- assert(idx >= 0 && idx <= PCI_BARMAX);
+ if ((type != PCIBAR_ROM) && (idx < 0 || idx > PCI_BARMAX)) {
+ errx(4, "Illegal BAR idx");
+ } else if ((type == PCIBAR_ROM) && (idx != PCI_ROM_IDX)) {
+ errx(4, "Illegal ROM idx");
+ }
if ((size & (size - 1)) != 0)
size = 1UL << flsl(size); /* round up to a power of 2 */
@@ -652,22 +683,94 @@
if (type == PCIBAR_IO) {
if (size < 4)
size = 4;
+ } else if (type == PCIBAR_ROM) {
+ if (size < ~PCIM_BIOS_ADDR_MASK + 1)
+ size = ~PCIM_BIOS_ADDR_MASK + 1;
} else {
if (size < 16)
size = 16;
}
+ /*
+ * To reduce fragmentation of the MMIO space, we allocate the BARs by
+ * size. Therefore, don't allocate the BAR yet. We create a list of all
+ * BAR allocation which is sorted by BAR size. When all PCI devices are
+ * initialized, we will assign an address to the BARs.
+ */
+
+ /* create a new list entry */
+ struct pci_bar_allocation *const new_bar = malloc(
+ sizeof(struct pci_bar_allocation));
+ memset(new_bar, 0, sizeof(struct pci_bar_allocation));
+ new_bar->pdi = pdi;
+ new_bar->idx = idx;
+ new_bar->type = type;
+ new_bar->size = size;
+
+ /*
+ * Search for a BAR which size is lower than the size of our newly
+ * allocated BAR.
+ */
+ struct pci_bar_allocation *bar = NULL;
+ TAILQ_FOREACH(bar, &pci_bars, chain) {
+ if (bar->size < size) {
+ break;
+ }
+ }
+
+ if (bar == NULL) {
+ /*
+ * Either the list is empty or new BAR is the smallest BAR of
+ * the list. Append it to the end of our list.
+ */
+ TAILQ_INSERT_TAIL(&pci_bars, new_bar, chain);
+ } else {
+ /*
+ * The found BAR is smaller than our new BAR. For that reason,
+ * insert our new BAR before the found BAR.
+ */
+ TAILQ_INSERT_BEFORE(bar, new_bar, chain);
+ }
+
+ /* update cmd reg */
+ uint16_t enbit = 0;
+ switch (type) {
+ case PCIBAR_IO:
+ enbit = PCIM_CMD_PORTEN;
+ break;
+ case PCIBAR_MEM64:
+ case PCIBAR_MEM32:
+ case PCIBAR_ROM:
+ enbit = PCIM_CMD_MEMEN;
+ break;
+ default:
+ enbit = 0;
+ break;
+ }
+
+ const uint16_t cmd = pci_get_cfgdata16(pdi, PCIR_COMMAND);
+ pci_set_cfgdata16(pdi, PCIR_COMMAND, cmd | enbit);
+
+ return (0);
+}
+
+static int
+pci_emul_assign_bar(struct pci_devinst *const pdi, const int idx, const enum pcibar_type type,
+ const uint64_t size)
+{
+ int error;
+ uint64_t *baseptr, limit, addr, mask, lobits, bar;
+
switch (type) {
case PCIBAR_NONE:
baseptr = NULL;
- addr = mask = lobits = enbit = 0;
+ addr = mask = lobits = 0;
break;
case PCIBAR_IO:
baseptr = &pci_emul_iobase;
limit = PCI_EMUL_IOLIMIT;
mask = PCIM_BAR_IO_BASE;
lobits = PCIM_BAR_IO_SPACE;
- enbit = PCIM_CMD_PORTEN;
break;
case PCIBAR_MEM64:
/*
@@ -689,14 +792,19 @@
mask = PCIM_BAR_MEM_BASE;
lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64;
}
- enbit = PCIM_CMD_MEMEN;
break;
case PCIBAR_MEM32:
baseptr = &pci_emul_membase32;
limit = PCI_EMUL_MEMLIMIT32;
mask = PCIM_BAR_MEM_BASE;
lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32;
- enbit = PCIM_CMD_MEMEN;
+ break;
+ case PCIBAR_ROM:
+ /* do not claim memory for ROM. OVMF will do it for us. */
+ baseptr = NULL;
+ limit = 0;
+ mask = PCIM_BIOS_ADDR_MASK;
+ lobits = 0;
break;
default:
printf("pci_emul_alloc_base: invalid bar type %d\n", type);
@@ -732,10 +840,57 @@
pci_set_cfgdata32(pdi, PCIR_BAR(idx + 1), bar >> 32);
}
- cmd = pci_get_cfgdata16(pdi, PCIR_COMMAND);
- if ((cmd & enbit) != enbit)
- pci_set_cfgdata16(pdi, PCIR_COMMAND, cmd | enbit);
- register_bar(pdi, idx);
+ if (type != PCIBAR_ROM) {
+ register_bar(pdi, idx);
+ }
+
+ return (0);
+}
+
+int
+pci_emul_alloc_rom(struct pci_devinst *const pdi, const uint64_t size,
+ void **const addr)
+{
+ /* allocate ROM space once on first call */
+ if (pci_emul_rombase == 0) {
+ pci_emul_rombase = vm_create_devmem(pdi->pi_vmctx, VM_PCIROM,
+ "pcirom", PCI_EMUL_ROMSIZE);
+ if (pci_emul_rombase == MAP_FAILED) {
+ warnx("%s: failed to create rom segment", __func__);
+ return (-1);
+ }
+ pci_emul_romlim = pci_emul_rombase + PCI_EMUL_ROMSIZE;
+ pci_emul_romoffset = 0;
+ }
+
+ /* ROM size should be a power of 2 and greater than 2 KB */
+ const uint64_t rom_size = MAX(1UL << flsl(size),
+ ~PCIM_BIOS_ADDR_MASK + 1);
+
+ /* check if ROM fits into ROM space */
+ if (pci_emul_romoffset + rom_size > PCI_EMUL_ROMSIZE) {
+ warnx("%s: no space left in rom segment:", __func__);
+ warnx("%16lu bytes left",
+ PCI_EMUL_ROMSIZE - pci_emul_romoffset);
+ warnx("%16lu bytes required by %d/%d/%d", rom_size, pdi->pi_bus,
+ pdi->pi_slot, pdi->pi_func);
+ return (-1);
+ }
+
+ /* allocate ROM BAR */
+ const int error = pci_emul_alloc_bar(pdi, PCI_ROM_IDX, PCIBAR_ROM,
+ rom_size);
+ if (error)
+ return error;
+
+ /* return address */
+ *addr = pci_emul_rombase + pci_emul_romoffset;
+
+ /* save offset into ROM Space */
+ pdi->pi_romoffset = pci_emul_romoffset;
+
+ /* increase offset for next ROM */
+ pci_emul_romoffset += rom_size;
return (0);
}
@@ -1146,7 +1301,8 @@
}
#define BUSIO_ROUNDUP 32
-#define BUSMEM_ROUNDUP (1024 * 1024)
+#define BUSMEM32_ROUNDUP (1024 * 1024)
+#define BUSMEM64_ROUNDUP (512 * 1024 * 1024)
int
init_pci(struct vmctx *ctx)
@@ -1189,6 +1345,7 @@
bi->membase32 = pci_emul_membase32;
bi->membase64 = pci_emul_membase64;
+ /* first run: init devices */
for (slot = 0; slot < MAXSLOTS; slot++) {
si = &bi->slotinfo[slot];
for (func = 0; func < MAXFUNCS; func++) {
@@ -1228,6 +1385,16 @@
}
}
+ /* second run: assign BARs and free list */
+ struct pci_bar_allocation *bar;
+ struct pci_bar_allocation *bar_tmp;
+ TAILQ_FOREACH_SAFE(bar, &pci_bars, chain, bar_tmp) {
+ pci_emul_assign_bar(bar->pdi, bar->idx, bar->type,
+ bar->size);
+ free(bar);
+ }
+ TAILQ_INIT(&pci_bars);
+
/*
* Add some slop to the I/O and memory resources decoded by
* this bus to give a guest some flexibility if it wants to
@@ -1237,14 +1404,14 @@
pci_emul_iobase = roundup2(pci_emul_iobase, BUSIO_ROUNDUP);
bi->iolimit = pci_emul_iobase;
- pci_emul_membase32 += BUSMEM_ROUNDUP;
+ pci_emul_membase32 += BUSMEM32_ROUNDUP;
pci_emul_membase32 = roundup2(pci_emul_membase32,
- BUSMEM_ROUNDUP);
+ BUSMEM32_ROUNDUP);
bi->memlimit32 = pci_emul_membase32;
- pci_emul_membase64 += BUSMEM_ROUNDUP;
+ pci_emul_membase64 += BUSMEM64_ROUNDUP;
pci_emul_membase64 = roundup2(pci_emul_membase64,
- BUSMEM_ROUNDUP);
+ BUSMEM64_ROUNDUP);
bi->memlimit64 = pci_emul_membase64;
}
@@ -1801,7 +1968,7 @@
* If the MMIO or I/O address space decoding has changed then
* register/unregister all BARs that decode that address space.
*/
- for (i = 0; i <= PCI_BARMAX; i++) {
+ for (i = 0; i <= PCI_BARMAX_WITH_ROM; i++) {
switch (pi->pi_bar[i].type) {
case PCIBAR_NONE:
case PCIBAR_MEMHI64:
@@ -1815,6 +1982,11 @@
unregister_bar(pi, i);
}
break;
+ case PCIBAR_ROM:
+ /* skip (un-)register of ROM if it disabled */
+ if (!romen(pi))
+ break;
+ /* fallthrough */
case PCIBAR_MEM32:
case PCIBAR_MEM64:
/* MMIO address space decoding changed? */
@@ -1935,16 +2107,21 @@
return;
/*
- * Special handling for write to BAR registers
+ * Special handling for write to BAR and ROM registers
*/
- if (coff >= PCIR_BAR(0) && coff < PCIR_BAR(PCI_BARMAX + 1)) {
+ if ((coff >= PCIR_BAR(0) && coff < PCIR_BAR(PCI_BARMAX + 1)) ||
+ (coff >= PCIR_BIOS && coff < PCIR_BIOS + 4)) {
/*
* Ignore writes to BAR registers that are not
* 4-byte aligned.
*/
if (bytes != 4 || (coff & 0x3) != 0)
return;
- idx = (coff - PCIR_BAR(0)) / 4;
+ if (coff != PCIR_BIOS) {
+ idx = (coff - PCIR_BAR(0)) / 4;
+ } else {
+ idx = PCI_ROM_IDX;
+ }
mask = ~(pi->pi_bar[idx].size - 1);
switch (pi->pi_bar[idx].type) {
case PCIBAR_NONE:
@@ -1987,6 +2164,20 @@
PCIBAR_MEMHI64);
}
break;
+ case PCIBAR_ROM:
+ addr = bar = *eax & mask;
+ if (memen(pi) && romen(pi)) {
+ unregister_bar(pi, idx);
+ }
+ pi->pi_bar[idx].addr = addr;
+ pi->pi_bar[idx].lobits = *eax &
+ PCIM_BIOS_ENABLE;
+ /* romen could have changed it value */
+ if (memen(pi) && romen(pi)) {
+ register_bar(pi, idx);
+ }
+ bar |= pi->pi_bar[idx].lobits;
+ break;
default:
assert(0);
}
diff --git a/usr.sbin/bhyve/pci_gvt-d.c b/usr.sbin/bhyve/pci_gvt-d.c
new file mode 100644
--- /dev/null
+++ b/usr.sbin/bhyve/pci_gvt-d.c
@@ -0,0 +1,262 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2020 Beckhoff Automation GmbH & Co. KG
+ * Author: Corvin Köhne <c.koehne@beckhoff.com>
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/mman.h>
+
+#include <machine/vmm.h>
+
+#include <dev/pci/pcireg.h>
+
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sysexits.h>
+#include <unistd.h>
+
+#include "e820.h"
+#include "inout.h"
+#include "pci_passthru.h"
+
+#define MB (1024 * 1024UL)
+#define GB (1024 * MB)
+
+#ifndef _PATH_MEM
+#define _PATH_MEM "/dev/mem"
+#endif
+
+/*
+ * PCI definitions
+ */
+#define PCIM_BDSM_GSM_ALIGNMENT \
+ 0x00100000 /* Graphics Stolen Memory is 1 MB aligned */
+
+/* GVT-d definitions */
+#define GVT_D_MAP_OPREGION 0
+#define GVT_D_MAP_GSM 1
+
+static int
+gvt_d_aslswrite(struct vmctx *const ctx, const int vcpu,
+ struct pci_devinst *const pi, const int coff, const int bytes,
+ const uint32_t val)
+{
+ struct passthru_softc *const sc = pi->pi_arg;
+
+ struct passthru_mmio_mapping *const opregion =
+ &sc->psc_mmio_map[GVT_D_MAP_OPREGION];
+
+ /* write new value to cfg space */
+ if (bytes == 1) {
+ pci_set_cfgdata8(pi, coff, val);
+ } else if (bytes == 2) {
+ pci_set_cfgdata16(pi, coff, val);
+ } else {
+ pci_set_cfgdata32(pi, coff, val);
+ }
+
+ /* get new address of opregion */
+ opregion->gpa = pci_get_cfgdata32(pi, PCIR_ASLS_CTL);
+
+ /* copy opregion into guest mem */
+ opregion->gva = vm_map_gpa(ctx, opregion->gpa, opregion->len);
+ if (opregion->gva == 0) {
+ warnx("%s: Unable to map opregion (0x%016lx)", __func__,
+ opregion->gpa);
+ /* return 0 to avoid emulation of ASLS register */
+ return (0);
+ }
+ memcpy(opregion->gva, opregion->hva, opregion->len);
+
+ return (0);
+}
+
+static vm_paddr_t
+gvt_d_alloc_mmio_memory(const vm_paddr_t host_address, const vm_paddr_t length,
+ const vm_paddr_t alignment, const enum e820_memory_type type)
+{
+ /* try to use host address */
+ const vm_paddr_t address = e820_alloc(host_address, length,
+ E820_ALIGNMENT_NONE, type, E820_ALLOCATE_SPECIFIC);
+ if (address != 0) {
+ return address;
+ }
+
+ /* try to use highest address below 4 GB */
+ return e820_alloc(4 * GB, length, alignment, type,
+ E820_ALLOCATE_HIGHEST);
+}
+
+static int
+gvt_d_setup_gsm(struct vmctx *const ctx, struct pci_devinst *const pi)
+{
+ struct passthru_softc *const sc = pi->pi_arg;
+
+ struct passthru_mmio_mapping *const gsm =
+ &sc->psc_mmio_map[GVT_D_MAP_GSM];
+
+ const int error = vm_get_memory_region_info(ctx, &gsm->hpa, &gsm->len,
+ MEMORY_REGION_INTEL_GSM);
+ if (error) {
+ warnx(
+ "%s: Unable to get Graphics Stolen Memory base and length",
+ __func__);
+ return (error);
+ }
+ gsm->hva = NULL; /* unused */
+ gsm->gva = NULL; /* unused */
+ gsm->gpa = gvt_d_alloc_mmio_memory(gsm->hpa, gsm->len,
+ PCIM_BDSM_GSM_ALIGNMENT, E820_TYPE_RESERVED);
+ if (gsm->gpa == 0) {
+ warnx(
+ "%s: Unable to add Graphics Stolen Memory to E820 table (hpa 0x%lx len 0x%lx)",
+ __func__, gsm->hpa, gsm->len);
+ e820_dump_table();
+ return (-1);
+ }
+ if (gsm->gpa != gsm->hpa) {
+ /*
+ * ACRN source code implies that graphics driver for newer Intel
+ * platforms like Tiger Lake will read the Graphics Stolen
+ * Memory address from an MMIO register. We have three options
+ * to solve this issue:
+ * 1. Patch the value in the MMIO register
+ * This could have unintended side effects. Without
+ * any documentation how this register is used by
+ * the GPU, don't do it.
+ * 2. Trap the MMIO register
+ * It's not possible to trap a single MMIO
+ * register. We need to trap a whole page. Trapping
+ * a bunch of MMIO register could degrade the
+ * performance noticeably.
+ * 3. Use an 1:1 host to guest mapping
+ * Maybe not always possible.
+ * As far as we know, no supported platform requires a 1:1
+ * mapping. For that reason, just log a warning.
+ */
+ warnx(
+ "Warning: Unable to reuse host address of Graphics Stolen Memory. GPU passthrough might not work properly.");
+ }
+
+ const uint64_t bdsm = read_config(&sc->psc_sel, PCIR_BDSM, 4);
+ pci_set_cfgdata32(pi, PCIR_BDSM,
+ gsm->gpa | (bdsm & (PCIM_BDSM_GSM_ALIGNMENT - 1)));
+
+ return (0);
+}
+
+static int
+gvt_d_setup_opregion(struct vmctx *const ctx, struct pci_devinst *const pi,
+ const int memfd)
+{
+ struct passthru_softc *const sc = pi->pi_arg;
+
+ struct passthru_mmio_mapping *const opregion =
+ &sc->psc_mmio_map[GVT_D_MAP_OPREGION];
+
+ const int error = vm_get_memory_region_info(ctx, &opregion->hpa,
+ &opregion->len, MEMORY_REGION_INTEL_OPREGION);
+ if (error) {
+ warnx("%s: Unable to get OpRegion base and length", __func__);
+ return (error);
+ }
+ opregion->hva = mmap(NULL, opregion->len, PROT_READ, MAP_SHARED, memfd,
+ opregion->hpa);
+ if (opregion->hva == MAP_FAILED) {
+ warnx("%s: Unable to map host OpRegion", __func__);
+ return (-1);
+ }
+ opregion->gpa = gvt_d_alloc_mmio_memory(opregion->hpa, opregion->len,
+ E820_ALIGNMENT_NONE, E820_TYPE_NVS);
+ if (opregion->gpa == 0) {
+ warnx(
+ "%s: Unable to add OpRegion to E820 table (hpa 0x%lx len 0x%lx)",
+ __func__, opregion->hpa, opregion->len);
+ e820_dump_table();
+ return (-1);
+ }
+ opregion->gva = vm_map_gpa(ctx, opregion->gpa, opregion->len);
+ if (opregion->gva == NULL) {
+ warnx("%s: Unable to map guest OpRegion", __func__);
+ return (-1);
+ }
+ if (opregion->gpa != opregion->hpa) {
+ /*
+ * A 1:1 host to guest mapping is not required but this could
+ * change in the future.
+ */
+ warnx(
+ "Warning: Unable to reuse host address of OpRegion. GPU passthrough might not work properly.");
+ }
+
+ memcpy(opregion->gva, opregion->hva, opregion->len);
+
+ pci_set_cfgdata32(pi, PCIR_ASLS_CTL, opregion->gpa);
+
+ return (0);
+}
+
+int
+gvt_d_init(struct vmctx *const ctx, struct pci_devinst *const pi,
+ nvlist_t *const nvl)
+{
+ int error;
+
+ struct passthru_softc *const sc = pi->pi_arg;
+
+ /* get memory descriptor */
+ const int memfd = open(_PATH_MEM, O_RDWR, 0);
+ if (memfd < 0) {
+ warn("%s: Failed to open %s", __func__, _PATH_MEM);
+ return (-1);
+ }
+
+ if ((error = gvt_d_setup_gsm(ctx, pi)) != 0) {
+ warnx("%s: Unable to setup Graphics Stolen Memory", __func__);
+ goto done;
+ }
+
+ if ((error = gvt_d_setup_opregion(ctx, pi, memfd)) != 0) {
+ warnx("%s: Unable to setup OpRegion", __func__);
+ goto done;
+ }
+
+ /* protect Graphics Stolen Memory register */
+ if ((error = set_pcir_handler(sc, PCIR_BDSM, 4,
+ passthru_cfgread_emulate, passthru_cfgwrite_emulate)) != 0) {
+ warnx("%s: Unable to protect opregion", __func__);
+ goto done;
+ }
+ /* protect opregion register */
+ if ((error = set_pcir_handler(sc, PCIR_ASLS_CTL, 4,
+ passthru_cfgread_emulate, gvt_d_aslswrite)) != 0) {
+ warnx("%s: Unable to protect opregion", __func__);
+ goto done;
+ }
+
+done:
+ return (error);
+}
+
+void
+gvt_d_deinit(struct vmctx *const ctx, struct pci_devinst *const pi)
+{
+ struct passthru_softc *const sc = pi->pi_arg;
+
+ struct passthru_mmio_mapping *const opregion =
+ &sc->psc_mmio_map[GVT_D_MAP_OPREGION];
+
+ /* HVA is only set, if it's initialized */
+ if (opregion->hva)
+ munmap((void *)opregion->hva, opregion->len);
+}
diff --git a/usr.sbin/bhyve/pci_lpc.h b/usr.sbin/bhyve/pci_lpc.h
--- a/usr.sbin/bhyve/pci_lpc.h
+++ b/usr.sbin/bhyve/pci_lpc.h
@@ -72,5 +72,6 @@
char *lpc_pirq_name(int pin);
void lpc_pirq_routed(void);
const char *lpc_bootrom(void);
+const char *lpc_fwcfg(void);
#endif
diff --git a/usr.sbin/bhyve/pci_lpc.c b/usr.sbin/bhyve/pci_lpc.c
--- a/usr.sbin/bhyve/pci_lpc.c
+++ b/usr.sbin/bhyve/pci_lpc.c
@@ -32,13 +32,24 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#ifndef WITHOUT_CAPSICUM
+#include <sys/capsicum.h>
+#endif
#include <sys/types.h>
+#include <sys/pciio.h>
#include <machine/vmm.h>
#include <machine/vmm_snapshot.h>
+#ifndef WITHOUT_CAPSICUM
+#include <capsicum_helpers.h>
+#endif
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <sysexits.h>
#include <vmmapi.h>
@@ -85,6 +96,29 @@
"COM1", "COM2", "COM3", "COM4"
};
+#ifndef _PATH_DEVPCI
+#define _PATH_DEVPCI "/dev/pci"
+#endif
+
+static int pcifd = -1;
+
+static uint32_t
+read_config(const struct pcisel *const sel, const long reg, const int width)
+{
+ struct pci_io pi;
+ pi.pi_sel.pc_domain = sel->pc_domain;
+ pi.pi_sel.pc_bus = sel->pc_bus;
+ pi.pi_sel.pc_dev = sel->pc_dev;
+ pi.pi_sel.pc_func = sel->pc_func;
+ pi.pi_reg = reg;
+ pi.pi_width = width;
+
+ if (ioctl(pcifd, PCIOCREAD, &pi) < 0)
+ return (0);
+
+ return (pi.pi_data);
+}
+
/*
* LPC device configuration is in the following form:
* <lpc_device_name>[,<options>]
@@ -101,7 +135,13 @@
lpcdev = strsep(&str, ",");
if (lpcdev != NULL) {
if (strcasecmp(lpcdev, "bootrom") == 0) {
- set_config_value("lpc.bootrom", str);
+ nvlist_t *const nvl = create_config_node("lpc.bootrom");
+ /* use qemu as default fwcfg */
+ set_config_value_node(nvl, "fwcfg", "qemu");
+
+ const char *const code = strsep(&str, ",");
+ set_config_value_node(nvl, "code", code);
+ pci_parse_legacy_config(nvl, str);
error = 0;
goto done;
}
@@ -145,7 +185,13 @@
lpc_bootrom(void)
{
- return (get_config_value("lpc.bootrom"));
+ return (get_config_value("lpc.bootrom.code"));
+}
+
+const char *
+lpc_fwcfg(void)
+{
+ return (get_config_value("lpc.bootrom.fwcfg"));
}
static void
@@ -208,7 +254,7 @@
char *node_name;
int unit, error;
- romfile = get_config_value("lpc.bootrom");
+ romfile = get_config_value("lpc.bootrom.code");
if (romfile != NULL) {
error = bootrom_loadrom(ctx, romfile);
if (error)
@@ -452,6 +498,48 @@
pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_BRIDGE);
pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_BRIDGE_ISA);
+ pcifd = open(_PATH_DEVPCI, O_RDWR, 0);
+ if (pcifd < 0) {
+ warn("failed to open %s", _PATH_DEVPCI);
+ return (-1);
+ }
+
+#ifndef WITHOUT_CAPSICUM
+ cap_rights_t pcifd_rights;
+ cap_rights_init(&pcifd_rights, CAP_IOCTL, CAP_READ);
+
+ const cap_ioctl_t pcifd_ioctls[] = { PCIOCREAD };
+
+ if (caph_rights_limit(pcifd, &pcifd_rights) == -1)
+ errx(EX_OSERR, "Unable to apply rights for sandbox");
+ if (caph_ioctls_limit(pcifd, pcifd_ioctls, nitems(pcifd_ioctls)) == -1)
+ errx(EX_OSERR, "Unable to apply rights for sandbox");
+#endif
+
+ /* on Intel systems lpc is always connected to 0:1f.0 */
+ const struct pcisel sel = { .pc_dev = 0x1f };
+
+ if (read_config(&sel, PCIR_VENDOR, 2) == PCI_VENDOR_INTEL) {
+ /*
+ * The VID, DID, REVID, SUBVID and SUBDID of igd-lpc need to be
+ * aligned with the physical ones. Without these physical
+ * values, GVT-d GOP driver couldn't work.
+ */
+ pci_set_cfgdata16(pi, PCIR_DEVICE,
+ read_config(&sel, PCIR_DEVICE, 2));
+ pci_set_cfgdata16(pi, PCIR_VENDOR,
+ read_config(&sel, PCIR_VENDOR, 2));
+ pci_set_cfgdata8(pi, PCIR_REVID,
+ read_config(&sel, PCIR_REVID, 1));
+ pci_set_cfgdata16(pi, PCIR_SUBVEND_0,
+ read_config(&sel, PCIR_SUBVEND_0, 2));
+ pci_set_cfgdata16(pi, PCIR_SUBDEV_0,
+ read_config(&sel, PCIR_SUBDEV_0, 2));
+ }
+
+ close(pcifd);
+ pcifd = -1;
+
lpc_bridge = pi;
return (0);
diff --git a/usr.sbin/bhyve/pci_passthru.h b/usr.sbin/bhyve/pci_passthru.h
new file mode 100644
--- /dev/null
+++ b/usr.sbin/bhyve/pci_passthru.h
@@ -0,0 +1,69 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2020 Beckhoff Automation GmbH & Co. KG
+ * Author: Corvin Köhne <c.koehne@beckhoff.com>
+ */
+
+#pragma once
+
+#include <sys/pciio.h>
+
+#include <vmmapi.h>
+
+#include "pci_emul.h"
+
+struct passthru_mmio_mapping {
+ vm_paddr_t gpa; /* guest physical address */
+ void *gva; /* guest virtual address */
+ vm_paddr_t hpa; /* host physical address */
+ void *hva; /* guest virtual address */
+ vm_paddr_t len;
+};
+
+typedef int (*cfgread_handler)(struct vmctx *const ctx, const int vcpu,
+ struct pci_devinst *const pi, const int coff, const int bytes,
+ uint32_t *const rv);
+typedef int (*cfgwrite_handler)(struct vmctx *const ctx, const int vcpu,
+ struct pci_devinst *const pi, const int coff, const int bytes,
+ const uint32_t val);
+
+struct passthru_softc {
+ struct pci_devinst *psc_pi;
+ /* ROM is handled like a BAR */
+ struct pcibar psc_bar[PCI_BARMAX_WITH_ROM + 1];
+ struct {
+ int capoff;
+ int msgctrl;
+ int emulated;
+ } psc_msi;
+ struct {
+ int capoff;
+ } psc_msix;
+ struct pcisel psc_sel;
+
+ struct passthru_mmio_mapping psc_mmio_map[2];
+ cfgread_handler psc_pcir_rhandler[PCI_REGMAX + 1];
+ cfgwrite_handler psc_pcir_whandler[PCI_REGMAX + 1];
+};
+
+uint32_t read_config(const struct pcisel *sel, long reg, int width);
+void write_config(const struct pcisel *sel, long reg, int width, uint32_t data);
+int passthru_cfgread_default(struct vmctx *const ctx, const int vcpu,
+ struct pci_devinst *const pi, const int coff, const int bytes,
+ uint32_t *const rv);
+int passthru_cfgread_emulate(struct vmctx *const ctx, const int vcpu,
+ struct pci_devinst *const pi, const int coff, const int bytes,
+ uint32_t *const rv);
+int passthru_cfgwrite_default(struct vmctx *const ctx, const int vcpu,
+ struct pci_devinst *const pi, const int coff, const int bytes,
+ const uint32_t val);
+int passthru_cfgwrite_emulate(struct vmctx *const ctx, const int vcpu,
+ struct pci_devinst *const pi, const int coff, const int bytes,
+ const uint32_t val);
+int set_pcir_handler(struct passthru_softc *const sc, const uint32_t reg,
+ const uint32_t len, const cfgread_handler rhandler,
+ const cfgwrite_handler whandler);
+int gvt_d_init(struct vmctx *const ctx, struct pci_devinst *const pi,
+ nvlist_t *const nvl);
+void gvt_d_deinit(struct vmctx *const ctx, struct pci_devinst *const pi);
diff --git a/usr.sbin/bhyve/pci_passthru.c b/usr.sbin/bhyve/pci_passthru.c
--- a/usr.sbin/bhyve/pci_passthru.c
+++ b/usr.sbin/bhyve/pci_passthru.c
@@ -37,8 +37,8 @@
#endif
#include <sys/types.h>
#include <sys/mman.h>
-#include <sys/pciio.h>
#include <sys/ioctl.h>
+#include <sys/stat.h>
#include <dev/io/iodev.h>
#include <dev/pci/pcireg.h>
@@ -61,12 +61,11 @@
#include <unistd.h>
#include <machine/vmm.h>
-#include <vmmapi.h>
#include "config.h"
#include "debug.h"
-#include "pci_emul.h"
#include "mem.h"
+#include "pci_passthru.h"
#ifndef _PATH_DEVPCI
#define _PATH_DEVPCI "/dev/pci"
@@ -77,21 +76,9 @@
#define MSIX_TABLE_COUNT(ctrl) (((ctrl) & PCIM_MSIXCTRL_TABLE_SIZE) + 1)
#define MSIX_CAPLEN 12
-static int pcifd = -1;
+#define PCI_CAP_START_OFFSET 0x40
-struct passthru_softc {
- struct pci_devinst *psc_pi;
- struct pcibar psc_bar[PCI_BARMAX + 1];
- struct {
- int capoff;
- int msgctrl;
- int emulated;
- } psc_msi;
- struct {
- int capoff;
- } psc_msix;
- struct pcisel psc_sel;
-};
+static int pcifd = -1;
static int
msi_caplen(int msgctrl)
@@ -115,7 +102,7 @@
return (len);
}
-static uint32_t
+uint32_t
read_config(const struct pcisel *sel, long reg, int width)
{
struct pci_io pi;
@@ -131,7 +118,7 @@
return (pi.pi_data);
}
-static void
+void
write_config(const struct pcisel *sel, long reg, int width, uint32_t data)
{
struct pci_io pi;
@@ -581,6 +568,17 @@
sc->psc_sel.pc_dev = slot;
sc->psc_sel.pc_func = func;
+ /* copy physical PCI header to virtual cfgspace */
+ for (uint32_t i = 0; i < PCI_CAP_START_OFFSET; ++i) {
+ /*
+ * INTLINE and INTPIN shouldn't be aligned with it's physical
+ * value. They are already set by pci_emul_init.
+ */
+ if (i == PCIR_INTLINE || i == PCIR_INTPIN)
+ continue;
+ pci_set_cfgdata8(pi, i, read_config(&sc->psc_sel, i, 1));
+ }
+
if (cfginitmsi(sc) != 0) {
warnx("failed to initialize MSI for PCI %d/%d/%d",
bus, slot, func);
@@ -601,6 +599,22 @@
return (error);
}
+int
+set_pcir_handler(struct passthru_softc *const sc, const uint32_t reg,
+ const uint32_t len, const cfgread_handler rhandler,
+ const cfgwrite_handler whandler)
+{
+ if (reg > PCI_REGMAX || reg + len > PCI_REGMAX + 1)
+ return (-1);
+
+ for (uint32_t i = reg; i < reg + len; ++i) {
+ sc->psc_pcir_rhandler[i] = rhandler;
+ sc->psc_pcir_whandler[i] = whandler;
+ }
+
+ return 0;
+}
+
static int
passthru_legacy_config(nvlist_t *nvl, const char *opts)
{
@@ -621,9 +635,101 @@
set_config_value_node(nvl, "slot", value);
snprintf(value, sizeof(value), "%d", func);
set_config_value_node(nvl, "func", value);
+
+ return (pci_parse_legacy_config(nvl, strchr(opts, ',')));
+}
+
+static int
+passthru_init_rom(struct vmctx *const ctx, struct passthru_softc *const sc,
+ const char *const romfile)
+{
+ if (romfile == NULL) {
+ return (0);
+ }
+
+ const int fd = open(romfile, O_RDONLY);
+ if (fd < 0) {
+ warnx("%s: can't open romfile \"%s\"", __func__, romfile);
+ return (-1);
+ }
+
+ struct stat sbuf;
+ if (fstat(fd, &sbuf) < 0) {
+ warnx("%s: can't fstat romfile \"%s\"", __func__, romfile);
+ close(fd);
+ return (-1);
+ }
+ const uint64_t rom_size = sbuf.st_size;
+
+ void *const rom_data = mmap(NULL, rom_size, PROT_READ, MAP_SHARED, fd,
+ 0);
+ if (rom_data == MAP_FAILED) {
+ warnx("%s: unable to mmap romfile \"%s\" (%d)", __func__,
+ romfile, errno);
+ close(fd);
+ return (-1);
+ }
+
+ void *rom_addr;
+ int error = pci_emul_alloc_rom(sc->psc_pi, rom_size, &rom_addr);
+ if (error) {
+ warnx("%s: failed to alloc rom segment", __func__);
+ munmap(rom_data, rom_size);
+ close(fd);
+ return (error);
+ }
+ memcpy(rom_addr, rom_data, rom_size);
+
+ sc->psc_bar[PCI_ROM_IDX].type = PCIBAR_ROM;
+ sc->psc_bar[PCI_ROM_IDX].addr = (uint64_t)rom_addr;
+ sc->psc_bar[PCI_ROM_IDX].size = rom_size;
+
+ munmap(rom_data, rom_size);
+ close(fd);
+
return (0);
}
+static int
+passthru_init_quirks(struct vmctx *const ctx, struct pci_devinst *const pi,
+ nvlist_t *const nvl)
+{
+ struct passthru_softc *const sc = pi->pi_arg;
+
+ const uint16_t vendor = read_config(&sc->psc_sel, PCIR_VENDOR, 0x02);
+ const uint8_t class = read_config(&sc->psc_sel, PCIR_CLASS, 0x01);
+
+ /* currently only display devices have quirks */
+ if (class != PCIC_DISPLAY)
+ return (0);
+
+ if (vendor == PCI_VENDOR_INTEL)
+ return gvt_d_init(ctx, pi, nvl);
+
+ return (0);
+}
+
+static void
+passthru_deinit_quirks(struct vmctx *const ctx, struct pci_devinst *const pi)
+{
+ struct passthru_softc *const sc = pi->pi_arg;
+
+ if (sc == NULL)
+ return;
+
+ const uint16_t vendor = read_config(&sc->psc_sel, PCIR_VENDOR, 0x02);
+ const uint8_t class = read_config(&sc->psc_sel, PCIR_CLASS, 0x01);
+
+ /* currently only display devices have quirks */
+ if (class != PCIC_DISPLAY)
+ return;
+
+ if (vendor == PCI_VENDOR_INTEL)
+ return gvt_d_deinit(ctx, pi);
+
+ return;
+}
+
static int
passthru_init(struct vmctx *ctx, struct pci_devinst *pi, nvlist_t *nvl)
{
@@ -689,9 +795,34 @@
sc->psc_pi = pi;
/* initialize config space */
- error = cfginit(ctx, pi, bus, slot, func);
+ if ((error = cfginit(ctx, pi, bus, slot, func)) != 0)
+ goto done;
+
+ /* set default handler for all PCI registers */
+ if ((error = set_pcir_handler(sc, 0, PCI_REGMAX + 1,
+ passthru_cfgread_default, passthru_cfgwrite_default)) != 0)
+ goto done;
+ /* protect PCI header */
+ if ((error = set_pcir_handler(sc, 0, PCI_CAP_START_OFFSET,
+ passthru_cfgread_emulate, passthru_cfgwrite_emulate)) != 0)
+ goto done;
+ /* allow access to command and status register */
+ if ((error = set_pcir_handler(sc, PCIR_COMMAND, 0x04,
+ passthru_cfgread_default, passthru_cfgwrite_default)) != 0)
+ goto done;
+
+ if ((error = passthru_init_quirks(ctx, pi, nvl)) != 0)
+ goto done;
+
+ /* initialize ROM */
+ if ((error = passthru_init_rom(ctx, sc,
+ get_config_value_node(nvl, "rom"))) != 0)
+ goto done;
+
+ error = 0; /* success */
done:
if (error) {
+ passthru_deinit_quirks(ctx, pi);
free(sc);
vm_unassign_pptdev(ctx, bus, slot, func);
}
@@ -701,7 +832,8 @@
static int
bar_access(int coff)
{
- if (coff >= PCIR_BAR(0) && coff < PCIR_BAR(PCI_BARMAX + 1))
+ if ((coff >= PCIR_BAR(0) && coff < PCIR_BAR(PCI_BARMAX + 1)) ||
+ coff == PCIR_BIOS)
return (1);
else
return (0);
@@ -736,29 +868,27 @@
static int
passthru_cfgread(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
int coff, int bytes, uint32_t *rv)
+{
+ struct passthru_softc *const sc = pi->pi_arg;
+
+ return sc->psc_pcir_rhandler[coff](ctx, vcpu, pi, coff, bytes, rv);
+}
+
+int
+passthru_cfgread_default(struct vmctx *const ctx, const int vcpu,
+ struct pci_devinst *const pi, const int coff, const int bytes,
+ uint32_t *const rv)
{
struct passthru_softc *sc;
sc = pi->pi_arg;
/*
- * PCI BARs and MSI capability is emulated.
+ * MSI capability is emulated.
*/
- if (bar_access(coff) || msicap_access(sc, coff) ||
- msixcap_access(sc, coff))
+ if (msicap_access(sc, coff) || msixcap_access(sc, coff))
return (-1);
-#ifdef LEGACY_SUPPORT
- /*
- * Emulate PCIR_CAP_PTR if this device does not support MSI capability
- * natively.
- */
- if (sc->psc_msi.emulated) {
- if (coff >= PCIR_CAP_PTR && coff < PCIR_CAP_PTR + 4)
- return (-1);
- }
-#endif
-
/*
* Emulate the command register. If a single read reads both the
* command and status registers, read the status register from the
@@ -778,9 +908,27 @@
return (0);
}
+int
+passthru_cfgread_emulate(struct vmctx *const ctx, const int vcpu,
+ struct pci_devinst *const pi, const int coff, const int bytes,
+ uint32_t *const rv)
+{
+ return (-1);
+}
+
static int
passthru_cfgwrite(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
int coff, int bytes, uint32_t val)
+{
+ struct passthru_softc *const sc = pi->pi_arg;
+
+ return sc->psc_pcir_whandler[coff](ctx, vcpu, pi, coff, bytes, val);
+}
+
+int
+passthru_cfgwrite_default(struct vmctx *const ctx, const int vcpu,
+ struct pci_devinst *const pi, const int coff, const int bytes,
+ const uint32_t val)
{
int error, msix_table_entries, i;
struct passthru_softc *sc;
@@ -788,12 +936,6 @@
sc = pi->pi_arg;
- /*
- * PCI BARs are emulated
- */
- if (bar_access(coff))
- return (-1);
-
/*
* MSI capability is emulated
*/
@@ -834,6 +976,7 @@
return (0);
}
+ uint32_t write_val = val;
#ifdef LEGACY_SUPPORT
/*
* If this device does not support MSI natively then we cannot let
@@ -842,23 +985,31 @@
*/
if (sc->psc_msi.emulated && pci_msi_enabled(pi)) {
if (coff == PCIR_COMMAND && bytes == 2)
- val &= ~PCIM_CMD_INTxDIS;
+ write_val &= ~PCIM_CMD_INTxDIS;
}
#endif
- write_config(&sc->psc_sel, coff, bytes, val);
+ write_config(&sc->psc_sel, coff, bytes, write_val);
if (coff == PCIR_COMMAND) {
cmd_old = pci_get_cfgdata16(pi, PCIR_COMMAND);
if (bytes == 1)
- pci_set_cfgdata8(pi, PCIR_COMMAND, val);
+ pci_set_cfgdata8(pi, PCIR_COMMAND, write_val);
else if (bytes == 2)
- pci_set_cfgdata16(pi, PCIR_COMMAND, val);
+ pci_set_cfgdata16(pi, PCIR_COMMAND, write_val);
pci_emul_cmd_changed(pi, cmd_old);
}
return (0);
}
+int
+passthru_cfgwrite_emulate(struct vmctx *const ctx, const int vcpu,
+ struct pci_devinst *const pi, const int coff, const int bytes,
+ const uint32_t val)
+{
+ return (-1);
+}
+
static void
passthru_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
uint64_t offset, int size, uint64_t value)
@@ -993,16 +1144,49 @@
}
static void
-passthru_addr(struct vmctx *ctx, struct pci_devinst *pi, int baridx,
- int enabled, uint64_t address)
+passthru_addr_rom(struct pci_devinst *const pi, const int idx,
+ const int enabled)
{
+ const uint64_t addr = pi->pi_bar[idx].addr;
+ const uint64_t size = pi->pi_bar[idx].size;
- if (pi->pi_bar[baridx].type == PCIBAR_IO)
- return;
- if (baridx == pci_msix_table_bar(pi))
- passthru_msix_addr(ctx, pi, baridx, enabled, address);
- else
- passthru_mmio_addr(ctx, pi, baridx, enabled, address);
+ if (!enabled) {
+ if (vm_munmap_memseg(pi->pi_vmctx, addr, size) != 0) {
+ warnx("%s: munmap_memseg @ [%016lx - %016lx] failed",
+ __func__, addr, addr + size);
+ }
+
+ } else {
+ if (vm_mmap_memseg(pi->pi_vmctx, addr, VM_PCIROM,
+ pi->pi_romoffset, size, PROT_READ | PROT_EXEC) != 0) {
+ warnx("%s: mnmap_memseg @ [%016lx - %016lx] failed",
+ __func__, addr, addr + size);
+ }
+ }
+}
+
+static void
+passthru_addr(struct vmctx *ctx, struct pci_devinst *pi, int baridx,
+ int enabled, uint64_t address)
+{
+ switch (pi->pi_bar[baridx].type) {
+ case PCIBAR_IO:
+ /* IO BARs are emulated */
+ break;
+ case PCIBAR_ROM:
+ passthru_addr_rom(pi, baridx, enabled);
+ break;
+ case PCIBAR_MEM32:
+ case PCIBAR_MEM64:
+ if (baridx == pci_msix_table_bar(pi))
+ passthru_msix_addr(ctx, pi, baridx, enabled, address);
+ else
+ passthru_mmio_addr(ctx, pi, baridx, enabled, address);
+ break;
+ default:
+ errx(4, "%s: invalid BAR type %d", __func__,
+ pi->pi_bar[baridx].type);
+ }
}
struct pci_devemu passthru = {
diff --git a/usr.sbin/bhyve/qemu_fwcfg.h b/usr.sbin/bhyve/qemu_fwcfg.h
new file mode 100644
--- /dev/null
+++ b/usr.sbin/bhyve/qemu_fwcfg.h
@@ -0,0 +1,24 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2021 Beckhoff Automation GmbH & Co. KG
+ * Author: Corvin Köhne <c.koehne@beckhoff.com>
+ */
+
+#pragma once
+
+#include <vmmapi.h>
+
+#define QEMU_FWCFG_MAX_ARCHS 0x2
+#define QEMU_FWCFG_MAX_ENTRIES 0x3FFF
+#define QEMU_FWCFG_MAX_NAME 56
+
+struct qemu_fwcfg_item {
+ uint32_t size;
+ uint8_t *data;
+};
+
+int qemu_fwcfg_add_file(const uint8_t name[QEMU_FWCFG_MAX_NAME],
+ const uint32_t size, void *const data);
+int qemu_fwcfg_init(struct vmctx *const ctx);
+int qemu_fwcfg_parse_cmdline_arg(const char *opt);
diff --git a/usr.sbin/bhyve/qemu_fwcfg.c b/usr.sbin/bhyve/qemu_fwcfg.c
new file mode 100644
--- /dev/null
+++ b/usr.sbin/bhyve/qemu_fwcfg.c
@@ -0,0 +1,541 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2021 Beckhoff Automation GmbH & Co. KG
+ * Author: Corvin Köhne <c.koehne@beckhoff.com>
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/endian.h>
+#include <sys/queue.h>
+
+#include <machine/vmm.h>
+
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "acpi_device.h"
+#include "inout.h"
+#include "qemu_fwcfg.h"
+
+#define QEMU_FWCFG_ACPI_DEVICE_NAME "FWCF"
+#define QEMU_FWCFG_ACPI_HARDWARE_ID "QEMU0002"
+
+#define QEMU_FWCFG_SELECTOR_PORT_NUMBER 0x510
+#define QEMU_FWCFG_SELECTOR_PORT_SIZE 1
+#define QEMU_FWCFG_SELECTOR_PORT_FLAGS IOPORT_F_INOUT
+#define QEMU_FWCFG_DATA_PORT_NUMBER 0x511
+#define QEMU_FWCFG_DATA_PORT_SIZE 1
+#define QEMU_FWCFG_DATA_PORT_FLAGS \
+ IOPORT_F_INOUT /* QEMU v2.4+ ignores writes */
+
+#define QEMU_FWCFG_ARCHITECTURE_MASK 0x0001
+#define QEMU_FWCFG_INDEX_MASK 0x3FFF
+
+#define QEMU_FWCFG_SELECT_READ 0
+#define QEMU_FWCFG_SELECT_WRITE 1
+
+#define QEMU_FWCFG_ARCHITECTURE_GENERIC 0
+#define QEMU_FWCFG_ARCHITECTURE_SPECIFIC 1
+
+#define QEMU_FWCFG_INDEX_SIGNATURE 0x00
+#define QEMU_FWCFG_INDEX_ID 0x01
+#define QEMU_FWCFG_INDEX_FILE_DIR 0x19
+
+#define QEMU_FWCFG_FIRST_FILE_INDEX 0x20
+
+#define QEMU_FWCFG_MIN_FILES 10
+
+#pragma pack(1)
+
+union qemu_fwcfg_selector {
+ struct {
+ uint16_t index : 14;
+ uint16_t writeable : 1;
+ /*
+ * 0 = generic | for all architectures
+ * 1 = specific | only for current architecture
+ */
+ uint16_t architecture : 1;
+ };
+ uint16_t bits;
+};
+
+struct qemu_fwcfg_signature {
+ uint8_t signature[4];
+};
+
+struct qemu_fwcfg_id {
+ uint32_t interface : 1; /* always set */
+ uint32_t DMA : 1;
+ uint32_t reserved : 30;
+};
+
+struct qemu_fwcfg_file {
+ uint32_t be_size;
+ uint16_t be_selector;
+ uint16_t reserved;
+ uint8_t name[QEMU_FWCFG_MAX_NAME];
+};
+
+struct qemu_fwcfg_directory {
+ uint32_t be_count;
+ struct qemu_fwcfg_file files[0];
+};
+
+struct qemu_fwcfg_softc {
+ struct acpi_device *acpi_dev;
+
+ uint32_t data_offset;
+ union qemu_fwcfg_selector selector;
+ struct qemu_fwcfg_item items[QEMU_FWCFG_MAX_ARCHS]
+ [QEMU_FWCFG_MAX_ENTRIES];
+ struct qemu_fwcfg_directory *directory;
+};
+
+#pragma pack()
+
+static struct qemu_fwcfg_softc sc;
+
+struct qemu_fwcfg_user_file {
+ STAILQ_ENTRY(qemu_fwcfg_user_file) chain;
+ uint8_t name[QEMU_FWCFG_MAX_NAME];
+ uint32_t size;
+ void *data;
+};
+STAILQ_HEAD(qemu_fwcfg_user_file_list,
+ qemu_fwcfg_user_file) user_files = STAILQ_HEAD_INITIALIZER(user_files);
+
+static int
+qemu_fwcfg_selector_port_handler(struct vmctx *const ctx, const int vcpu,
+ const int in, const int port, const int bytes, uint32_t *const eax,
+ void *const arg)
+{
+ if (in) {
+ *eax = *(uint16_t *)&sc.selector;
+ return (0);
+ }
+
+ sc.data_offset = 0;
+ sc.selector.bits = *eax;
+
+ return (0);
+}
+
+static int
+qemu_fwcfg_data_port_handler(struct vmctx *const ctx, const int vcpu,
+ const int in, const int port, const int bytes, uint32_t *const eax,
+ void *const arg)
+{
+ if (!in) {
+ warnx("%s: Writes to qemu fwcfg data port aren't allowed",
+ __func__);
+ return (-1);
+ }
+
+ /* get fwcfg item */
+ struct qemu_fwcfg_item *const item =
+ &sc.items[sc.selector.architecture][sc.selector.index];
+ if (item->data == NULL) {
+ warnx(
+ "%s: qemu fwcfg item doesn't exist (architecture %s index 0x%x)",
+ __func__, sc.selector.architecture ? "specific" : "generic",
+ sc.selector.index);
+ *eax = 0x00;
+ return (0);
+ } else if (sc.data_offset >= item->size) {
+ warnx(
+ "%s: qemu fwcfg item read exceeds size (architecture %s index 0x%x size 0x%x offset 0x%x)",
+ __func__, sc.selector.architecture ? "specific" : "generic",
+ sc.selector.index, item->size, sc.data_offset);
+ *eax = 0x00;
+ return (0);
+ }
+
+ /* return item data */
+ *eax = item->data[sc.data_offset];
+ sc.data_offset++;
+
+ return (0);
+}
+
+static int
+qemu_fwcfg_add_item(const uint16_t architecture, const uint16_t index,
+ const uint32_t size, void *const data)
+{
+ /* truncate architecture and index to their desired size */
+ const uint16_t arch = architecture & QEMU_FWCFG_ARCHITECTURE_MASK;
+ const uint16_t idx = index & QEMU_FWCFG_INDEX_MASK;
+
+ /* get pointer to item specified by selector */
+ struct qemu_fwcfg_item *const fwcfg_item = &sc.items[arch][idx];
+
+ /* check if item is already used */
+ if (fwcfg_item->data != NULL) {
+ warnx("%s: qemu fwcfg item exists (architecture %s index 0x%x)",
+ __func__, arch ? "specific" : "generic", idx);
+ return (-1);
+ }
+
+ /* save data of the item */
+ fwcfg_item->size = size;
+ fwcfg_item->data = data;
+
+ return (0);
+}
+
+static int
+qemu_fwcfg_add_item_file_dir()
+{
+ /* alloc directory */
+ const size_t size = sizeof(struct qemu_fwcfg_directory) +
+ QEMU_FWCFG_MIN_FILES * sizeof(struct qemu_fwcfg_file);
+ struct qemu_fwcfg_directory *const fwcfg_directory = calloc(1, size);
+ if (fwcfg_directory == NULL) {
+ return (-ENOMEM);
+ }
+
+ /* init directory */
+ sc.directory = fwcfg_directory;
+
+ /* add directory */
+ return qemu_fwcfg_add_item(QEMU_FWCFG_ARCHITECTURE_GENERIC,
+ QEMU_FWCFG_INDEX_FILE_DIR, sizeof(struct qemu_fwcfg_directory), (uint8_t *)sc.directory);
+}
+
+static int
+qemu_fwcfg_add_item_id()
+{
+ /* alloc id */
+ struct qemu_fwcfg_id *const fwcfg_id = calloc(1,
+ sizeof(struct qemu_fwcfg_id));
+ if (fwcfg_id == NULL) {
+ return (-ENOMEM);
+ }
+
+ /* init id */
+ fwcfg_id->interface = 1;
+ fwcfg_id->DMA = 0;
+
+ /*
+ * QEMU specifies ID as little endian.
+ * Convert fwcfg_id to little endian.
+ */
+ uint32_t *const le_fwcfg_id_ptr = (uint32_t *)fwcfg_id;
+ *le_fwcfg_id_ptr = htole32(*le_fwcfg_id_ptr);
+
+ /* add id */
+ return qemu_fwcfg_add_item(QEMU_FWCFG_ARCHITECTURE_GENERIC,
+ QEMU_FWCFG_INDEX_ID, sizeof(struct qemu_fwcfg_id),
+ (uint8_t *)fwcfg_id);
+}
+
+static int
+qemu_fwcfg_add_item_signature()
+{
+ /* alloc signature */
+ struct qemu_fwcfg_signature *const fwcfg_signature = calloc(1,
+ sizeof(struct qemu_fwcfg_signature));
+ if (fwcfg_signature == NULL) {
+ return (-ENOMEM);
+ }
+
+ /* init signature */
+ fwcfg_signature->signature[0] = 'Q';
+ fwcfg_signature->signature[1] = 'E';
+ fwcfg_signature->signature[2] = 'M';
+ fwcfg_signature->signature[3] = 'U';
+
+ /* add signature */
+ return qemu_fwcfg_add_item(QEMU_FWCFG_ARCHITECTURE_GENERIC,
+ QEMU_FWCFG_INDEX_SIGNATURE, sizeof(struct qemu_fwcfg_signature),
+ (uint8_t *)fwcfg_signature);
+}
+
+static int
+qemu_fwcfg_register_port(const char *const name, const int port, const int size,
+ const int flags, const inout_func_t handler)
+{
+ struct inout_port iop;
+
+ bzero(&iop, sizeof(iop));
+ iop.name = name;
+ iop.port = port;
+ iop.size = size;
+ iop.flags = flags;
+ iop.handler = handler;
+
+ return register_inout(&iop);
+}
+
+int
+qemu_fwcfg_add_file(const uint8_t name[QEMU_FWCFG_MAX_NAME], const uint32_t size,
+ void *const data)
+{
+ /*
+ * QEMU specifies count as big endian.
+ * Convert it to host endian to work with it.
+ */
+ const uint32_t count = be32toh(sc.directory->be_count) + 1;
+
+ /* add file to items list */
+ const uint32_t index = QEMU_FWCFG_FIRST_FILE_INDEX + count - 1;
+ const int error = qemu_fwcfg_add_item(QEMU_FWCFG_ARCHITECTURE_GENERIC,
+ index, size, data);
+ if (error != 0) {
+ return (error);
+ }
+
+ /*
+ * files should be sorted alphabetical, get index for new file
+ */
+ uint32_t file_index;
+ for (file_index = 0; file_index < count; ++file_index) {
+ if (strcmp(name, sc.directory->files[file_index].name) < 0)
+ break;
+ }
+
+ if (count > QEMU_FWCFG_MIN_FILES) {
+ /* alloc new file directory */
+ const uint64_t new_size = sizeof(struct qemu_fwcfg_directory) +
+ count * sizeof(struct qemu_fwcfg_file);
+ struct qemu_fwcfg_directory *const new_directory = calloc(1,
+ new_size);
+ if (new_directory == NULL) {
+ warnx(
+ "%s: Unable to allocate a new qemu fwcfg files directory (count %d)",
+ __func__, count);
+ return (-ENOMEM);
+ }
+
+ /* copy files below file_index to new directory */
+ memcpy(new_directory->files, sc.directory->files,
+ file_index * sizeof(struct qemu_fwcfg_file));
+
+ /* copy files behind file_index to directory */
+ memcpy(&new_directory->files[file_index + 1],
+ &sc.directory->files[file_index],
+ (count - file_index) * sizeof(struct qemu_fwcfg_file));
+
+ /* free old directory */
+ free(sc.directory);
+
+ /* set directory pointer to new directory */
+ sc.directory = new_directory;
+
+ /* adjust directory pointer */
+ sc.items[0][QEMU_FWCFG_INDEX_FILE_DIR].data = (uint8_t *)
+ sc.directory;
+ } else {
+ /* shift files behind file_index */
+ for (uint32_t i = QEMU_FWCFG_MIN_FILES - 1; i > file_index; --i) {
+ memcpy(&sc.directory->files[i],
+ &sc.directory->files[i - 1],
+ sizeof(struct qemu_fwcfg_file));
+ }
+ }
+
+ /*
+ * QEMU specifies count, size and index as big endian.
+ * Save these values in big endian to simplify guest reads of these
+ * values.
+ */
+ sc.directory->be_count = htobe32(count);
+ sc.directory->files[file_index].be_size = htobe32(size);
+ sc.directory->files[file_index].be_selector = htobe16(index);
+ strcpy(sc.directory->files[file_index].name, name);
+
+ /* set new size for the fwcfg_file_directory */
+ sc.items[0][QEMU_FWCFG_INDEX_FILE_DIR].size =
+ sizeof(struct qemu_fwcfg_directory) +
+ count * sizeof(struct qemu_fwcfg_file);
+
+ return (0);
+}
+
+static int
+qemu_fwcfg_add_user_files()
+{
+ const struct qemu_fwcfg_user_file *fwcfg_file;
+ STAILQ_FOREACH (fwcfg_file, &user_files, chain) {
+ const int error = qemu_fwcfg_add_file(fwcfg_file->name,
+ fwcfg_file->size, fwcfg_file->data);
+ if (error)
+ return (error);
+ }
+
+ return (0);
+}
+
+int
+qemu_fwcfg_init(struct vmctx *const ctx)
+{
+ int error;
+
+ error = acpi_device_create(&sc.acpi_dev, ctx,
+ QEMU_FWCFG_ACPI_DEVICE_NAME, QEMU_FWCFG_ACPI_HARDWARE_ID);
+ if (error) {
+ warnx("%s: failed to create ACPI device for QEMU FwCfg",
+ __func__);
+ goto done;
+ }
+
+ error = acpi_device_add_res_fixed_ioport(sc.acpi_dev,
+ QEMU_FWCFG_SELECTOR_PORT_NUMBER, 2);
+ if (error) {
+ warnx("%s: failed to add fixed IO port for QEMU FwCfg",
+ __func__);
+ goto done;
+ }
+
+ /* add common fwcfg items */
+ if ((error = qemu_fwcfg_add_item_signature()) != 0) {
+ warnx("%s: Unable to add signature item", __func__);
+ goto done;
+ }
+ if ((error = qemu_fwcfg_add_item_id()) != 0) {
+ warnx("%s: Unable to add id item", __func__);
+ goto done;
+ }
+ if ((error = qemu_fwcfg_add_item_file_dir()) != 0) {
+ warnx("%s: Unable to add file_dir item", __func__);
+ goto done;
+ }
+
+ /* add handlers for fwcfg ports */
+ if ((error = qemu_fwcfg_register_port("qemu_fwcfg_selector",
+ QEMU_FWCFG_SELECTOR_PORT_NUMBER, QEMU_FWCFG_SELECTOR_PORT_SIZE,
+ QEMU_FWCFG_SELECTOR_PORT_FLAGS,
+ qemu_fwcfg_selector_port_handler)) != 0) {
+ warnx("%s: Unable to register qemu fwcfg selector port 0x%x",
+ __func__, QEMU_FWCFG_SELECTOR_PORT_NUMBER);
+ goto done;
+ }
+ if ((error = qemu_fwcfg_register_port("qemu_fwcfg_data",
+ QEMU_FWCFG_DATA_PORT_NUMBER, QEMU_FWCFG_DATA_PORT_SIZE,
+ QEMU_FWCFG_DATA_PORT_FLAGS, qemu_fwcfg_data_port_handler)) !=
+ 0) {
+ warnx("%s: Unable to register qemu fwcfg data port 0x%x",
+ __func__, QEMU_FWCFG_DATA_PORT_NUMBER);
+ goto done;
+ }
+
+ if ((error = qemu_fwcfg_add_user_files()) != 0) {
+ warnx("%s: Unable to add user files", __func__);
+ goto done;
+ }
+
+done:
+ if (error) {
+ acpi_device_destroy(sc.acpi_dev);
+ }
+
+ return (error);
+}
+
+static void
+qemu_fwcfg_usage(const char *opt)
+{
+ warnx("Invalid fw_cfg option \"%s\"", opt);
+ warnx("-f [name=]<name>,(string|file)=<value>");
+}
+
+/*
+ * Parses the cmdline argument for user defined fw_cfg items. The cmdline
+ * argument has the format:
+ * "-f [name=]<name>,(string|file)=<value>"
+ *
+ * E.g.: "-f opt/com.page/example,string=Hello"
+ */
+int
+qemu_fwcfg_parse_cmdline_arg(const char *opt)
+{
+ struct qemu_fwcfg_user_file *const fwcfg_file = malloc(sizeof(*fwcfg_file));
+ if (fwcfg_file == NULL) {
+ warnx("Unable to allocate fw_cfg_user_file");
+ return (-ENOMEM);
+ }
+
+ /* get pointer to <name> */
+ const char *opt_ptr = opt;
+ /* If [name=] is specified, skip it */
+ if (strncmp(opt_ptr, "name=", sizeof("name=") - 1) == 0) {
+ opt_ptr += sizeof("name=") - 1;
+ }
+
+ /* get the end of <name> */
+ const char *opt_end = strchr(opt_ptr, ',');
+ if (opt_end == NULL) {
+ qemu_fwcfg_usage(opt);
+ return (-1);
+ }
+
+ /* check if <name> is too long */
+ if (opt_end - opt_ptr > QEMU_FWCFG_MAX_NAME) {
+ warnx("fw_cfg name too long: \"%s\"", opt);
+ return (-1);
+ }
+
+ /* save <name> */
+ strncpy(fwcfg_file->name, opt_ptr, opt_end - opt_ptr);
+
+ /* set opt_ptr and opt_end to <value> */
+ opt_ptr = opt_end + 1;
+ opt_end = opt_ptr + strlen(opt_ptr);
+
+ if (strncmp(opt_ptr, "string=", sizeof("string=") - 1) == 0) {
+ opt_ptr += sizeof("string=") - 1;
+ fwcfg_file->data = strdup(opt_ptr);
+ if (fwcfg_file->data == NULL) {
+ warnx(" Can't duplicate fw_cfg_user_file string \"%s\"",
+ opt_ptr);
+ return (-ENOMEM);
+ }
+ fwcfg_file->size = strlen(opt_ptr) + 1;
+
+ } else if (strncmp(opt_ptr, "file=", sizeof("file=") - 1) == 0) {
+ opt_ptr += sizeof("file=") - 1;
+
+ /* open file */
+ const int fd = open(opt_ptr, O_RDONLY);
+ if (fd < 0) {
+ warnx("Can't open fw_cfg_user_file file \"%s\"",
+ opt_ptr);
+ return (-1);
+ }
+
+ /* get file size */
+ const uint64_t size = lseek(fd, 0, SEEK_END);
+ lseek(fd, 0, SEEK_SET);
+
+ /* read file */
+ fwcfg_file->data = malloc(size);
+ if (fwcfg_file->data == NULL) {
+ warnx(
+ "Can't allocate fw_cfg_user_file file \"%s\" (size: 0x%16lx)",
+ opt_ptr, size);
+ close(fd);
+ return (-ENOMEM);
+ }
+ fwcfg_file->size = read(fd, fwcfg_file->data, size);
+
+ close(fd);
+
+ } else {
+ qemu_fwcfg_usage(opt);
+ return (-1);
+ }
+
+ STAILQ_INSERT_TAIL(&user_files, fwcfg_file, chain);
+
+ return (0);
+}

File Metadata

Mime Type
text/plain
Expires
Sun, Apr 27, 12:21 PM (1 h, 49 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
17817582
Default Alt Text
D26209.id99348.diff (94 KB)

Event Timeline