Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F96463928
D44566.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
11 KB
Referenced Files
None
Subscribers
None
D44566.diff
View Options
diff --git a/lib/libvmmapi/amd64/vmmapi_machdep.c b/lib/libvmmapi/amd64/vmmapi_machdep.c
--- a/lib/libvmmapi/amd64/vmmapi_machdep.c
+++ b/lib/libvmmapi/amd64/vmmapi_machdep.c
@@ -28,6 +28,7 @@
#include <sys/types.h>
#include <sys/ioctl.h>
+#include <sys/domainset.h>
#include <machine/specialreg.h>
#include <machine/vmm.h>
diff --git a/lib/libvmmapi/internal.h b/lib/libvmmapi/internal.h
--- a/lib/libvmmapi/internal.h
+++ b/lib/libvmmapi/internal.h
@@ -9,21 +9,15 @@
#include <sys/types.h>
-enum {
- VM_MEMSEG_LOW,
- VM_MEMSEG_HIGH,
- VM_MEMSEG_COUNT,
-};
struct vmctx {
int fd;
- struct {
- vm_paddr_t base;
- vm_size_t size;
- } memsegs[VM_MEMSEG_COUNT];
+ vm_size_t lowmem_size;
+ vm_size_t highmem_size;
int memflags;
char *baseaddr;
char *name;
+ cpuset_t cpu_affinity[VM_MAXSYSMEM];
};
struct vcpu {
diff --git a/lib/libvmmapi/vmmapi.h b/lib/libvmmapi/vmmapi.h
--- a/lib/libvmmapi/vmmapi.h
+++ b/lib/libvmmapi/vmmapi.h
@@ -64,16 +64,10 @@
#define VM_MEM_F_INCORE 0x01 /* include guest memory in core file */
#define VM_MEM_F_WIRED 0x02 /* guest memory is wired */
-/*
- * Identifiers for memory segments:
- * - vm_setup_memory() uses VM_SYSMEM for the system memory segment.
- * - the remaining identifiers can be used to create devmem segments.
- */
-enum {
- VM_SYSMEM,
- VM_BOOTROM,
- VM_FRAMEBUFFER,
- VM_PCIROM,
+struct vmdom {
+ size_t size;
+ int ds_policy;
+ domainset_t ds_mask;
};
__BEGIN_DECLS
@@ -124,7 +118,8 @@
void vm_vcpu_close(struct vcpu *vcpu);
int vcpu_id(struct vcpu *vcpu);
int vm_parse_memsize(const char *optarg, size_t *memsize);
-int vm_setup_memory(struct vmctx *ctx, size_t len, enum vm_mmap_style s);
+int vm_setup_memory(struct vmctx *ctx, struct vmdom *doms, int ndoms,
+ size_t len, enum vm_mmap_style s);
void *vm_map_gpa(struct vmctx *ctx, vm_paddr_t gaddr, size_t len);
/* inverse operation to vm_map_gpa - extract guest address from host pointer */
vm_paddr_t vm_rev_map_gpa(struct vmctx *ctx, void *addr);
@@ -266,6 +261,8 @@
uint16_t threads, uint16_t maxcpus);
int vm_get_topology(struct vmctx *ctx, uint16_t *sockets, uint16_t *cores,
uint16_t *threads, uint16_t *maxcpus);
+int vm_set_domain_cpus(struct vmctx *ctx, int domain, cpuset_t *cpus);
+int vm_get_domain_cpus(struct vmctx *ctx, int domain, cpuset_t *cpus);
/*
* FreeBSD specific APIs
diff --git a/lib/libvmmapi/vmmapi.c b/lib/libvmmapi/vmmapi.c
--- a/lib/libvmmapi/vmmapi.c
+++ b/lib/libvmmapi/vmmapi.c
@@ -35,6 +35,7 @@
#include <sys/module.h>
#include <sys/_iovec.h>
#include <sys/cpuset.h>
+#include <sys/domainset.h>
#include <capsicum_helpers.h>
#include <errno.h>
@@ -121,8 +122,7 @@
vm->memflags = 0;
vm->name = (char *)(vm + 1);
strcpy(vm->name, name);
- memset(vm->memsegs, 0, sizeof(vm->memsegs));
-
+ memset(vm->cpu_affinity, 0, sizeof(vm->cpu_affinity));
if ((vm->fd = vm_device_open(vm->name)) < 0)
goto err;
@@ -268,8 +268,8 @@
{
*guest_baseaddr = ctx->baseaddr;
- *lowmem_size = ctx->memsegs[VM_MEMSEG_LOW].size;
- *highmem_size = ctx->memsegs[VM_MEMSEG_HIGH].size;
+ *lowmem_size = ctx->lowmem_size;
+ *highmem_size = ctx->highmem_size;
return (0);
}
@@ -325,7 +325,8 @@
}
static int
-vm_alloc_memseg(struct vmctx *ctx, int segid, size_t len, const char *name)
+vm_alloc_memseg(struct vmctx *ctx, int segid, size_t len, const char *name,
+ int ds_policy, domainset_t *ds_mask)
{
struct vm_memseg memseg;
size_t n;
@@ -353,6 +354,13 @@
bzero(&memseg, sizeof(struct vm_memseg));
memseg.segid = segid;
memseg.len = len;
+ if (ds_mask == NULL) {
+ memseg.ds_policy = DOMAINSET_POLICY_INVALID;
+ } else {
+ memseg.ds_policy = ds_policy;
+ memseg.ds_mask = ds_mask;
+ memseg.ds_mask_size = sizeof(*ds_mask);
+ }
if (name != NULL) {
n = strlcpy(memseg.name, name, sizeof(memseg.name));
if (n >= sizeof(memseg.name)) {
@@ -388,13 +396,14 @@
}
static int
-setup_memory_segment(struct vmctx *ctx, vm_paddr_t gpa, size_t len, char *base)
+map_memory_segment(struct vmctx *ctx, int segid, vm_paddr_t gpa, size_t len,
+ size_t segoff, char *base)
{
char *ptr;
int error, flags;
/* Map 'len' bytes starting at 'gpa' in the guest address space */
- error = vm_mmap_memseg(ctx, gpa, VM_SYSMEM, gpa, len, PROT_ALL);
+ error = vm_mmap_memseg(ctx, gpa, segid, segoff, len, PROT_ALL);
if (error)
return (error);
@@ -411,59 +420,126 @@
}
int
-vm_setup_memory(struct vmctx *ctx, size_t memsize, enum vm_mmap_style vms)
+vm_setup_memory(struct vmctx *ctx, struct vmdom *doms, int ndoms,
+ size_t memsize, enum vm_mmap_style vms)
{
- size_t objsize, len;
- vm_paddr_t gpa;
+ struct vmdom *dom, dom0;
+ struct vm_memseg memseg;
+ size_t low_len, len, totalsize;
char *baseaddr, *ptr;
- int error;
+ int error, i, segid;
+ vm_paddr_t gpa;
+ size_t objsize;
assert(vms == VM_MMAP_ALL);
- /*
- * If 'memsize' cannot fit entirely in the 'lowmem' segment then create
- * another 'highmem' segment above VM_HIGHMEM_BASE for the remainder.
- */
+ /* Sanity checks. */
+ if (ndoms < 0 || ndoms > VM_MAXMEMDOM) {
+ error = -1;
+ errno = EINVAL;
+ return (error);
+ }
+
if (memsize > VM_LOWMEM_LIMIT) {
- ctx->memsegs[VM_MEMSEG_LOW].size = VM_LOWMEM_LIMIT;
- ctx->memsegs[VM_MEMSEG_HIGH].size = memsize - VM_LOWMEM_LIMIT;
- objsize = VM_HIGHMEM_BASE + ctx->memsegs[VM_MEMSEG_HIGH].size;
+ totalsize = VM_HIGHMEM_BASE + (memsize - VM_LOWMEM_LIMIT);
} else {
- ctx->memsegs[VM_MEMSEG_LOW].size = memsize;
- ctx->memsegs[VM_MEMSEG_HIGH].size = 0;
- objsize = memsize;
+ totalsize = memsize;
}
- error = vm_alloc_memseg(ctx, VM_SYSMEM, objsize, NULL);
- if (error)
- return (error);
+ /*
+ * If no domain information was passed, pretend
+ * that only one domain was requested.
+ */
+ if (doms == NULL || ndoms == 0) {
+ memset(&dom0, 0, sizeof(struct vmdom));
+ dom0.size = memsize;
+ dom0.ds_policy = DOMAINSET_POLICY_INVALID;
+ doms = &dom0;
+ ndoms = 1;
+ }
/*
* Stake out a contiguous region covering the guest physical memory
* and the adjoining guard regions.
*/
- len = VM_MMAP_GUARD_SIZE + objsize + VM_MMAP_GUARD_SIZE;
+ len = VM_MMAP_GUARD_SIZE + totalsize + VM_MMAP_GUARD_SIZE;
ptr = mmap(NULL, len, PROT_NONE, MAP_GUARD | MAP_ALIGNED_SUPER, -1, 0);
if (ptr == MAP_FAILED)
return (-1);
-
baseaddr = ptr + VM_MMAP_GUARD_SIZE;
- if (ctx->memsegs[VM_MEMSEG_HIGH].size > 0) {
- gpa = VM_HIGHMEM_BASE;
- len = ctx->memsegs[VM_MEMSEG_HIGH].size;
- error = setup_memory_segment(ctx, gpa, len, baseaddr);
- if (error)
- return (error);
- }
- if (ctx->memsegs[VM_MEMSEG_LOW].size > 0) {
- gpa = 0;
- len = ctx->memsegs[VM_MEMSEG_LOW].size;
- error = setup_memory_segment(ctx, gpa, len, baseaddr);
- if (error)
- return (error);
- }
+ /*
+ * Allocate and map memory segments for the virutal machine.
+ */
+ gpa = 0;
+ ctx->lowmem_size = 0;
+ ctx->highmem_size = 0;
+ for (i = 0; i < ndoms; i++) {
+ segid = VM_SYSMEM + i;
+ dom = &doms[i];
+ /*
+ * Check if the memory segment already exists.
+ * If 'ndoms' is greater than one, refuse to proceed if the
+ * memseg already exists. If only one domain was requested, use
+ * the existing segment.
+ *
+ * Splitting existing memory segments is tedious and
+ * error-prone, which is why we don't support NUMA
+ * domains for bhyveload(8)-loaded VMs.
+ */
+ error = vm_get_memseg(ctx, segid, &len, memseg.name,
+ sizeof(memseg.name));
+ if (error == 0 && len != 0) {
+ if (ndoms != 1) {
+ error = -1;
+ errno = EEXIST;
+ return (error);
+ } else {
+ doms[0].size = len;
+ }
+ } else {
+ objsize = dom->size;
+ /* Allocate new segment. */
+ error = vm_alloc_memseg(ctx, segid, objsize, NULL,
+ dom->ds_policy, &dom->ds_mask);
+ if (error)
+ return (error);
+ }
+
+ /*
+ * If a domain is split by VM_LOWMEM_LIMIT then break
+ * its segment mapping into two parts, one below VM_LOWMEM_LIMIT
+ * and one above VM_HIGHMEM_BASE.
+ */
+ if (gpa < VM_LOWMEM_LIMIT &&
+ gpa + dom->size > VM_LOWMEM_LIMIT) {
+ low_len = VM_LOWMEM_LIMIT - gpa;
+ error = map_memory_segment(ctx, segid, gpa, low_len, 0,
+ baseaddr);
+ if (error)
+ return (error);
+ ctx->lowmem_size = VM_LOWMEM_LIMIT;
+ /* Map the remainder. */
+ gpa = VM_HIGHMEM_BASE;
+ len = dom->size - low_len;
+ error = map_memory_segment(ctx, segid, gpa, len,
+ low_len, baseaddr);
+ if (error)
+ return (error);
+ } else {
+ len = dom->size;
+ error = map_memory_segment(ctx, segid, gpa, len, 0,
+ baseaddr);
+ if (error)
+ return (error);
+ }
+ if (gpa < VM_LOWMEM_LIMIT)
+ ctx->lowmem_size += len;
+ else
+ ctx->highmem_size += len;
+ gpa += len;
+ }
ctx->baseaddr = baseaddr;
return (0);
@@ -481,13 +557,13 @@
{
vm_size_t lowsize, highsize;
- lowsize = ctx->memsegs[VM_MEMSEG_LOW].size;
+ lowsize = ctx->lowmem_size;
if (lowsize > 0) {
if (gaddr < lowsize && len <= lowsize && gaddr + len <= lowsize)
return (ctx->baseaddr + gaddr);
}
- highsize = ctx->memsegs[VM_MEMSEG_HIGH].size;
+ highsize = ctx->highmem_size;
if (highsize > 0 && gaddr >= VM_HIGHMEM_BASE) {
if (gaddr < VM_HIGHMEM_BASE + highsize && len <= highsize &&
gaddr + len <= VM_HIGHMEM_BASE + highsize)
@@ -505,12 +581,12 @@
offaddr = (char *)addr - ctx->baseaddr;
- lowsize = ctx->memsegs[VM_MEMSEG_LOW].size;
+ lowsize = ctx->lowmem_size;
if (lowsize > 0)
if (offaddr <= lowsize)
return (offaddr);
- highsize = ctx->memsegs[VM_MEMSEG_HIGH].size;
+ highsize = ctx->highmem_size;
if (highsize > 0)
if (offaddr >= VM_HIGHMEM_BASE &&
offaddr < VM_HIGHMEM_BASE + highsize)
@@ -529,8 +605,7 @@
size_t
vm_get_lowmem_size(struct vmctx *ctx)
{
-
- return (ctx->memsegs[VM_MEMSEG_LOW].size);
+ return (ctx->lowmem_size);
}
vm_paddr_t
@@ -543,8 +618,7 @@
size_t
vm_get_highmem_size(struct vmctx *ctx)
{
-
- return (ctx->memsegs[VM_MEMSEG_HIGH].size);
+ return (ctx->highmem_size);
}
void *
@@ -562,7 +636,7 @@
goto done;
}
- error = vm_alloc_memseg(ctx, segid, len, name);
+ error = vm_alloc_memseg(ctx, segid, len, name, 0, NULL);
if (error)
goto done;
@@ -1141,6 +1215,88 @@
return (error);
}
+int
+vm_set_domain_cpus(struct vmctx *ctx, int domain, cpuset_t *cpus)
+{
+ struct vm_memseg memseg;
+ int error, segid;
+ cpuset_t domcpus;
+ int curdom;
+
+ segid = VM_SYSMEM + domain;
+ if (segid < VM_SYSMEM || segid >= VM_MAXSYSMEM) {
+ errno = EINVAL;
+ return (-1);
+ }
+ if (cpus == NULL) {
+ errno = EINVAL;
+ return (-1);
+ }
+ /* Check if memory segment for domain exists. */
+ memset(&memseg, 0, sizeof(memseg));
+ error = vm_get_memseg(ctx, VM_SYSMEM + domain, &memseg.len, memseg.name,
+ sizeof(memseg.name));
+ if (error)
+ return (error);
+ if (memseg.len == 0) {
+ errno = ENOENT;
+ return (-1);
+ }
+
+ /* Check for overlapping cpusets. */
+ for (curdom = 0; curdom < VM_MAXMEMDOM; curdom++) {
+ if (curdom == domain)
+ continue;
+ error = vm_get_domain_cpus(ctx, curdom, &domcpus);
+ if (error) {
+ if (errno == ENOENT)
+ break;
+ return (error);
+ }
+ if (CPU_OVERLAP(cpus, &domcpus)) {
+ errno = EEXIST;
+ return (-1);
+ }
+ }
+ errno = 0;
+ ctx->cpu_affinity[domain] = *cpus;
+
+ return (0);
+}
+
+int
+vm_get_domain_cpus(struct vmctx *ctx, int domain, cpuset_t *cpus)
+{
+ struct vm_memseg memseg;
+ size_t len;
+ int segid;
+ int error;
+
+ segid = VM_SYSMEM + domain;
+ if (segid < VM_SYSMEM || segid >= VM_MAXSYSMEM) {
+ errno = EINVAL;
+ return (-1);
+ }
+ if (cpus == NULL) {
+ errno = EINVAL;
+ return (-1);
+ }
+ memset(&memseg, 0, sizeof(memseg));
+ /* Check if memory segment for domain exists. */
+ error = vm_get_memseg(ctx, segid, &len, memseg.name,
+ sizeof(memseg.name));
+ if (error)
+ return (error);
+ if (len == 0) {
+ errno = ENOENT;
+ return (-1);
+ }
+
+ *cpus = ctx->cpu_affinity[domain];
+
+ return (0);
+}
+
int
vm_limit_rights(struct vmctx *ctx)
{
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Sep 26, 1:36 AM (22 h, 27 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
12804994
Default Alt Text
D44566.diff (11 KB)
Attached To
Mode
D44566: libvmmapi: Add support for setting up and configuring guest NUMA domains
Attached
Detach File
Event Timeline
Log In to Comment