Page MenuHomeFreeBSD

D44566.diff
No OneTemporary

D44566.diff

diff --git a/lib/libvmmapi/amd64/vmmapi_machdep.c b/lib/libvmmapi/amd64/vmmapi_machdep.c
--- a/lib/libvmmapi/amd64/vmmapi_machdep.c
+++ b/lib/libvmmapi/amd64/vmmapi_machdep.c
@@ -28,6 +28,7 @@
#include <sys/types.h>
#include <sys/ioctl.h>
+#include <sys/domainset.h>
#include <machine/specialreg.h>
#include <machine/vmm.h>
diff --git a/lib/libvmmapi/internal.h b/lib/libvmmapi/internal.h
--- a/lib/libvmmapi/internal.h
+++ b/lib/libvmmapi/internal.h
@@ -9,21 +9,15 @@
#include <sys/types.h>
-enum {
- VM_MEMSEG_LOW,
- VM_MEMSEG_HIGH,
- VM_MEMSEG_COUNT,
-};
struct vmctx {
int fd;
- struct {
- vm_paddr_t base;
- vm_size_t size;
- } memsegs[VM_MEMSEG_COUNT];
+ vm_size_t lowmem_size;
+ vm_size_t highmem_size;
int memflags;
char *baseaddr;
char *name;
+ cpuset_t cpu_affinity[VM_MAXSYSMEM];
};
struct vcpu {
diff --git a/lib/libvmmapi/vmmapi.h b/lib/libvmmapi/vmmapi.h
--- a/lib/libvmmapi/vmmapi.h
+++ b/lib/libvmmapi/vmmapi.h
@@ -64,16 +64,10 @@
#define VM_MEM_F_INCORE 0x01 /* include guest memory in core file */
#define VM_MEM_F_WIRED 0x02 /* guest memory is wired */
-/*
- * Identifiers for memory segments:
- * - vm_setup_memory() uses VM_SYSMEM for the system memory segment.
- * - the remaining identifiers can be used to create devmem segments.
- */
-enum {
- VM_SYSMEM,
- VM_BOOTROM,
- VM_FRAMEBUFFER,
- VM_PCIROM,
+struct vmdom {
+ size_t size;
+ int ds_policy;
+ domainset_t ds_mask;
};
__BEGIN_DECLS
@@ -124,7 +118,8 @@
void vm_vcpu_close(struct vcpu *vcpu);
int vcpu_id(struct vcpu *vcpu);
int vm_parse_memsize(const char *optarg, size_t *memsize);
-int vm_setup_memory(struct vmctx *ctx, size_t len, enum vm_mmap_style s);
+int vm_setup_memory(struct vmctx *ctx, struct vmdom *doms, int ndoms,
+ size_t len, enum vm_mmap_style s);
void *vm_map_gpa(struct vmctx *ctx, vm_paddr_t gaddr, size_t len);
/* inverse operation to vm_map_gpa - extract guest address from host pointer */
vm_paddr_t vm_rev_map_gpa(struct vmctx *ctx, void *addr);
@@ -266,6 +261,8 @@
uint16_t threads, uint16_t maxcpus);
int vm_get_topology(struct vmctx *ctx, uint16_t *sockets, uint16_t *cores,
uint16_t *threads, uint16_t *maxcpus);
+int vm_set_domain_cpus(struct vmctx *ctx, int domain, cpuset_t *cpus);
+int vm_get_domain_cpus(struct vmctx *ctx, int domain, cpuset_t *cpus);
/*
* FreeBSD specific APIs
diff --git a/lib/libvmmapi/vmmapi.c b/lib/libvmmapi/vmmapi.c
--- a/lib/libvmmapi/vmmapi.c
+++ b/lib/libvmmapi/vmmapi.c
@@ -35,6 +35,7 @@
#include <sys/module.h>
#include <sys/_iovec.h>
#include <sys/cpuset.h>
+#include <sys/domainset.h>
#include <capsicum_helpers.h>
#include <errno.h>
@@ -121,8 +122,7 @@
vm->memflags = 0;
vm->name = (char *)(vm + 1);
strcpy(vm->name, name);
- memset(vm->memsegs, 0, sizeof(vm->memsegs));
-
+ memset(vm->cpu_affinity, 0, sizeof(vm->cpu_affinity));
if ((vm->fd = vm_device_open(vm->name)) < 0)
goto err;
@@ -268,8 +268,8 @@
{
*guest_baseaddr = ctx->baseaddr;
- *lowmem_size = ctx->memsegs[VM_MEMSEG_LOW].size;
- *highmem_size = ctx->memsegs[VM_MEMSEG_HIGH].size;
+ *lowmem_size = ctx->lowmem_size;
+ *highmem_size = ctx->highmem_size;
return (0);
}
@@ -325,7 +325,8 @@
}
static int
-vm_alloc_memseg(struct vmctx *ctx, int segid, size_t len, const char *name)
+vm_alloc_memseg(struct vmctx *ctx, int segid, size_t len, const char *name,
+ int ds_policy, domainset_t *ds_mask)
{
struct vm_memseg memseg;
size_t n;
@@ -353,6 +354,13 @@
bzero(&memseg, sizeof(struct vm_memseg));
memseg.segid = segid;
memseg.len = len;
+ if (ds_mask == NULL) {
+ memseg.ds_policy = DOMAINSET_POLICY_INVALID;
+ } else {
+ memseg.ds_policy = ds_policy;
+ memseg.ds_mask = ds_mask;
+ memseg.ds_mask_size = sizeof(*ds_mask);
+ }
if (name != NULL) {
n = strlcpy(memseg.name, name, sizeof(memseg.name));
if (n >= sizeof(memseg.name)) {
@@ -388,13 +396,14 @@
}
static int
-setup_memory_segment(struct vmctx *ctx, vm_paddr_t gpa, size_t len, char *base)
+map_memory_segment(struct vmctx *ctx, int segid, vm_paddr_t gpa, size_t len,
+ size_t segoff, char *base)
{
char *ptr;
int error, flags;
/* Map 'len' bytes starting at 'gpa' in the guest address space */
- error = vm_mmap_memseg(ctx, gpa, VM_SYSMEM, gpa, len, PROT_ALL);
+ error = vm_mmap_memseg(ctx, gpa, segid, segoff, len, PROT_ALL);
if (error)
return (error);
@@ -411,59 +420,126 @@
}
int
-vm_setup_memory(struct vmctx *ctx, size_t memsize, enum vm_mmap_style vms)
+vm_setup_memory(struct vmctx *ctx, struct vmdom *doms, int ndoms,
+ size_t memsize, enum vm_mmap_style vms)
{
- size_t objsize, len;
- vm_paddr_t gpa;
+ struct vmdom *dom, dom0;
+ struct vm_memseg memseg;
+ size_t low_len, len, totalsize;
char *baseaddr, *ptr;
- int error;
+ int error, i, segid;
+ vm_paddr_t gpa;
+ size_t objsize;
assert(vms == VM_MMAP_ALL);
- /*
- * If 'memsize' cannot fit entirely in the 'lowmem' segment then create
- * another 'highmem' segment above VM_HIGHMEM_BASE for the remainder.
- */
+ /* Sanity checks. */
+ if (ndoms < 0 || ndoms > VM_MAXMEMDOM) {
+ error = -1;
+ errno = EINVAL;
+ return (error);
+ }
+
if (memsize > VM_LOWMEM_LIMIT) {
- ctx->memsegs[VM_MEMSEG_LOW].size = VM_LOWMEM_LIMIT;
- ctx->memsegs[VM_MEMSEG_HIGH].size = memsize - VM_LOWMEM_LIMIT;
- objsize = VM_HIGHMEM_BASE + ctx->memsegs[VM_MEMSEG_HIGH].size;
+ totalsize = VM_HIGHMEM_BASE + (memsize - VM_LOWMEM_LIMIT);
} else {
- ctx->memsegs[VM_MEMSEG_LOW].size = memsize;
- ctx->memsegs[VM_MEMSEG_HIGH].size = 0;
- objsize = memsize;
+ totalsize = memsize;
}
- error = vm_alloc_memseg(ctx, VM_SYSMEM, objsize, NULL);
- if (error)
- return (error);
+ /*
+ * If no domain information was passed, pretend
+ * that only one domain was requested.
+ */
+ if (doms == NULL || ndoms == 0) {
+ memset(&dom0, 0, sizeof(struct vmdom));
+ dom0.size = memsize;
+ dom0.ds_policy = DOMAINSET_POLICY_INVALID;
+ doms = &dom0;
+ ndoms = 1;
+ }
/*
* Stake out a contiguous region covering the guest physical memory
* and the adjoining guard regions.
*/
- len = VM_MMAP_GUARD_SIZE + objsize + VM_MMAP_GUARD_SIZE;
+ len = VM_MMAP_GUARD_SIZE + totalsize + VM_MMAP_GUARD_SIZE;
ptr = mmap(NULL, len, PROT_NONE, MAP_GUARD | MAP_ALIGNED_SUPER, -1, 0);
if (ptr == MAP_FAILED)
return (-1);
-
baseaddr = ptr + VM_MMAP_GUARD_SIZE;
- if (ctx->memsegs[VM_MEMSEG_HIGH].size > 0) {
- gpa = VM_HIGHMEM_BASE;
- len = ctx->memsegs[VM_MEMSEG_HIGH].size;
- error = setup_memory_segment(ctx, gpa, len, baseaddr);
- if (error)
- return (error);
- }
- if (ctx->memsegs[VM_MEMSEG_LOW].size > 0) {
- gpa = 0;
- len = ctx->memsegs[VM_MEMSEG_LOW].size;
- error = setup_memory_segment(ctx, gpa, len, baseaddr);
- if (error)
- return (error);
- }
+ /*
+ * Allocate and map memory segments for the virutal machine.
+ */
+ gpa = 0;
+ ctx->lowmem_size = 0;
+ ctx->highmem_size = 0;
+ for (i = 0; i < ndoms; i++) {
+ segid = VM_SYSMEM + i;
+ dom = &doms[i];
+ /*
+ * Check if the memory segment already exists.
+ * If 'ndoms' is greater than one, refuse to proceed if the
+ * memseg already exists. If only one domain was requested, use
+ * the existing segment.
+ *
+ * Splitting existing memory segments is tedious and
+ * error-prone, which is why we don't support NUMA
+ * domains for bhyveload(8)-loaded VMs.
+ */
+ error = vm_get_memseg(ctx, segid, &len, memseg.name,
+ sizeof(memseg.name));
+ if (error == 0 && len != 0) {
+ if (ndoms != 1) {
+ error = -1;
+ errno = EEXIST;
+ return (error);
+ } else {
+ doms[0].size = len;
+ }
+ } else {
+ objsize = dom->size;
+ /* Allocate new segment. */
+ error = vm_alloc_memseg(ctx, segid, objsize, NULL,
+ dom->ds_policy, &dom->ds_mask);
+ if (error)
+ return (error);
+ }
+
+ /*
+ * If a domain is split by VM_LOWMEM_LIMIT then break
+ * its segment mapping into two parts, one below VM_LOWMEM_LIMIT
+ * and one above VM_HIGHMEM_BASE.
+ */
+ if (gpa < VM_LOWMEM_LIMIT &&
+ gpa + dom->size > VM_LOWMEM_LIMIT) {
+ low_len = VM_LOWMEM_LIMIT - gpa;
+ error = map_memory_segment(ctx, segid, gpa, low_len, 0,
+ baseaddr);
+ if (error)
+ return (error);
+ ctx->lowmem_size = VM_LOWMEM_LIMIT;
+ /* Map the remainder. */
+ gpa = VM_HIGHMEM_BASE;
+ len = dom->size - low_len;
+ error = map_memory_segment(ctx, segid, gpa, len,
+ low_len, baseaddr);
+ if (error)
+ return (error);
+ } else {
+ len = dom->size;
+ error = map_memory_segment(ctx, segid, gpa, len, 0,
+ baseaddr);
+ if (error)
+ return (error);
+ }
+ if (gpa < VM_LOWMEM_LIMIT)
+ ctx->lowmem_size += len;
+ else
+ ctx->highmem_size += len;
+ gpa += len;
+ }
ctx->baseaddr = baseaddr;
return (0);
@@ -481,13 +557,13 @@
{
vm_size_t lowsize, highsize;
- lowsize = ctx->memsegs[VM_MEMSEG_LOW].size;
+ lowsize = ctx->lowmem_size;
if (lowsize > 0) {
if (gaddr < lowsize && len <= lowsize && gaddr + len <= lowsize)
return (ctx->baseaddr + gaddr);
}
- highsize = ctx->memsegs[VM_MEMSEG_HIGH].size;
+ highsize = ctx->highmem_size;
if (highsize > 0 && gaddr >= VM_HIGHMEM_BASE) {
if (gaddr < VM_HIGHMEM_BASE + highsize && len <= highsize &&
gaddr + len <= VM_HIGHMEM_BASE + highsize)
@@ -505,12 +581,12 @@
offaddr = (char *)addr - ctx->baseaddr;
- lowsize = ctx->memsegs[VM_MEMSEG_LOW].size;
+ lowsize = ctx->lowmem_size;
if (lowsize > 0)
if (offaddr <= lowsize)
return (offaddr);
- highsize = ctx->memsegs[VM_MEMSEG_HIGH].size;
+ highsize = ctx->highmem_size;
if (highsize > 0)
if (offaddr >= VM_HIGHMEM_BASE &&
offaddr < VM_HIGHMEM_BASE + highsize)
@@ -529,8 +605,7 @@
size_t
vm_get_lowmem_size(struct vmctx *ctx)
{
-
- return (ctx->memsegs[VM_MEMSEG_LOW].size);
+ return (ctx->lowmem_size);
}
vm_paddr_t
@@ -543,8 +618,7 @@
size_t
vm_get_highmem_size(struct vmctx *ctx)
{
-
- return (ctx->memsegs[VM_MEMSEG_HIGH].size);
+ return (ctx->highmem_size);
}
void *
@@ -562,7 +636,7 @@
goto done;
}
- error = vm_alloc_memseg(ctx, segid, len, name);
+ error = vm_alloc_memseg(ctx, segid, len, name, 0, NULL);
if (error)
goto done;
@@ -1141,6 +1215,88 @@
return (error);
}
+int
+vm_set_domain_cpus(struct vmctx *ctx, int domain, cpuset_t *cpus)
+{
+ struct vm_memseg memseg;
+ int error, segid;
+ cpuset_t domcpus;
+ int curdom;
+
+ segid = VM_SYSMEM + domain;
+ if (segid < VM_SYSMEM || segid >= VM_MAXSYSMEM) {
+ errno = EINVAL;
+ return (-1);
+ }
+ if (cpus == NULL) {
+ errno = EINVAL;
+ return (-1);
+ }
+ /* Check if memory segment for domain exists. */
+ memset(&memseg, 0, sizeof(memseg));
+ error = vm_get_memseg(ctx, VM_SYSMEM + domain, &memseg.len, memseg.name,
+ sizeof(memseg.name));
+ if (error)
+ return (error);
+ if (memseg.len == 0) {
+ errno = ENOENT;
+ return (-1);
+ }
+
+ /* Check for overlapping cpusets. */
+ for (curdom = 0; curdom < VM_MAXMEMDOM; curdom++) {
+ if (curdom == domain)
+ continue;
+ error = vm_get_domain_cpus(ctx, curdom, &domcpus);
+ if (error) {
+ if (errno == ENOENT)
+ break;
+ return (error);
+ }
+ if (CPU_OVERLAP(cpus, &domcpus)) {
+ errno = EEXIST;
+ return (-1);
+ }
+ }
+ errno = 0;
+ ctx->cpu_affinity[domain] = *cpus;
+
+ return (0);
+}
+
+int
+vm_get_domain_cpus(struct vmctx *ctx, int domain, cpuset_t *cpus)
+{
+ struct vm_memseg memseg;
+ size_t len;
+ int segid;
+ int error;
+
+ segid = VM_SYSMEM + domain;
+ if (segid < VM_SYSMEM || segid >= VM_MAXSYSMEM) {
+ errno = EINVAL;
+ return (-1);
+ }
+ if (cpus == NULL) {
+ errno = EINVAL;
+ return (-1);
+ }
+ memset(&memseg, 0, sizeof(memseg));
+ /* Check if memory segment for domain exists. */
+ error = vm_get_memseg(ctx, segid, &len, memseg.name,
+ sizeof(memseg.name));
+ if (error)
+ return (error);
+ if (len == 0) {
+ errno = ENOENT;
+ return (-1);
+ }
+
+ *cpus = ctx->cpu_affinity[domain];
+
+ return (0);
+}
+
int
vm_limit_rights(struct vmctx *ctx)
{

File Metadata

Mime Type
text/plain
Expires
Thu, Sep 26, 1:36 AM (22 h, 27 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
12804994
Default Alt Text
D44566.diff (11 KB)

Event Timeline