Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F107606288
D34547.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
13 KB
Referenced Files
None
Subscribers
None
D34547.diff
View Options
Index: lib/libvmmapi/vmmapi.c
===================================================================
--- lib/libvmmapi/vmmapi.c
+++ lib/libvmmapi/vmmapi.c
@@ -1719,7 +1719,7 @@
VM_ACTIVATE_CPU, VM_GET_CPUS, VM_SUSPEND_CPU, VM_RESUME_CPU,
VM_SET_INTINFO, VM_GET_INTINFO,
VM_RTC_WRITE, VM_RTC_READ, VM_RTC_SETTIME, VM_RTC_GETTIME,
- VM_RESTART_INSTRUCTION, VM_SET_TOPOLOGY, VM_GET_TOPOLOGY };
+ VM_RESTART_INSTRUCTION, VM_SET_TOPOLOGY, VM_GET_TOPOLOGY, VM_SNAPSHOT_REQ };
if (len == NULL) {
cmds = malloc(sizeof(vm_ioctl_cmds));
Index: usr.sbin/bhyve/Makefile
===================================================================
--- usr.sbin/bhyve/Makefile
+++ usr.sbin/bhyve/Makefile
@@ -91,8 +91,8 @@
LIBADD+= casper
LIBADD+= cap_pwd
LIBADD+= cap_grp
-# Temporary disable capsicum, until we integrate checkpoint code with it.
-#CFLAGS+=-DWITH_CASPER
+LIBADD+= cap_sysctl
+CFLAGS+=-DWITH_CASPER
.endif
.if ${MK_BHYVE_SNAPSHOT} != "no"
@@ -121,9 +121,6 @@
.if ${MK_BHYVE_SNAPSHOT} != "no"
CFLAGS+= -I${SRCTOP}/contrib/libucl/include
-# Temporary disable capsicum, until we integrate checkpoint code with it.
-CFLAGS+= -DWITHOUT_CAPSICUM
-
CFLAGS+= -DBHYVE_SNAPSHOT
.endif
Index: usr.sbin/bhyve/bhyverun.c
===================================================================
--- usr.sbin/bhyve/bhyverun.c
+++ usr.sbin/bhyve/bhyverun.c
@@ -1244,13 +1244,14 @@
restore_file = NULL;
#endif
+ char *ckp_path = NULL;
init_config();
set_defaults();
progname = basename(argv[0]);
#ifdef BHYVE_SNAPSHOT
- optstr = "aehuwxACDHIPSWYk:o:p:G:c:s:m:l:K:U:r:";
+ optstr = "aehuwxACDHIPSWYk:o:p:G:c:s:m:l:K:U:r:t:";
#else
optstr = "aehuwxACDHIPSWYk:o:p:G:c:s:m:l:K:U:";
#endif
@@ -1302,6 +1303,9 @@
case 'r':
restore_file = optarg;
break;
+ case 't':
+ ckp_path = optarg;
+ break;
#endif
case 's':
if (strncmp(optarg, "help", strlen(optarg)) == 0) {
@@ -1547,22 +1551,12 @@
*/
setproctitle("%s", vmname);
-#ifndef WITHOUT_CAPSICUM
- caph_cache_catpages();
-
- if (caph_limit_stdout() == -1 || caph_limit_stderr() == -1)
- errx(EX_OSERR, "Unable to apply rights for sandbox");
-
- if (caph_enter() == -1)
- errx(EX_OSERR, "cap_enter() failed");
-#endif
-
#ifdef BHYVE_SNAPSHOT
if (restore_file != NULL)
destroy_restore_state(&rstate);
/* initialize mutex/cond variables */
- init_snapshot();
+ init_snapshot(ckp_path);
/*
* checkpointing thread for communication with bhyvectl
@@ -1574,6 +1568,16 @@
vm_restore_time(ctx);
#endif
+#ifndef WITHOUT_CAPSICUM
+ caph_cache_catpages();
+
+ if (caph_limit_stdout() == -1 || caph_limit_stderr() == -1)
+ errx(EX_OSERR, "Unable to apply rights for sandbox");
+
+ if (caph_enter() == -1)
+ errx(EX_OSERR, "cap_enter() failed");
+#endif
+
/*
* Add CPU 0
*/
Index: usr.sbin/bhyve/snapshot.h
===================================================================
--- usr.sbin/bhyve/snapshot.h
+++ usr.sbin/bhyve/snapshot.h
@@ -42,6 +42,11 @@
#include <libxo/xo.h>
#include <ucl.h>
+#ifndef WITHOUT_CAPSICUM
+#include <libcasper.h>
+#include <casper/cap_sysctl.h>
+#endif
+
#define BHYVE_RUN_DIR "/var/run/bhyve/"
#define MAX_SNAPSHOT_FILENAME PATH_MAX
@@ -101,8 +106,8 @@
int get_checkpoint_msg(int conn_fd, struct vmctx *ctx);
void *checkpoint_thread(void *param);
+void init_snapshot(char *ckp_path);
int init_checkpoint_thread(struct vmctx *ctx);
-void init_snapshot(void);
int load_restore_file(const char *filename, struct restore_state *rstate);
Index: usr.sbin/bhyve/snapshot.c
===================================================================
--- usr.sbin/bhyve/snapshot.c
+++ usr.sbin/bhyve/snapshot.c
@@ -37,9 +37,7 @@
__FBSDID("$FreeBSD$");
#include <sys/types.h>
-#ifndef WITHOUT_CAPSICUM
#include <sys/capsicum.h>
-#endif
#include <sys/mman.h>
#include <sys/socket.h>
#include <sys/stat.h>
@@ -49,9 +47,7 @@
#include <machine/atomic.h>
#include <machine/segments.h>
-#ifndef WITHOUT_CAPSICUM
#include <capsicum_helpers.h>
-#endif
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -70,9 +66,7 @@
#include <sys/ioctl.h>
#include <machine/vmm.h>
-#ifndef WITHOUT_CAPSICUM
#include <machine/vmm_dev.h>
-#endif
#include <machine/vmm_snapshot.h>
#include <vmmapi.h>
@@ -95,6 +89,8 @@
#include "spinup_ap.h"
#include "rtc.h"
+#include <libcasper.h>
+#include <casper/cap_sysctl.h>
#include <libxo/xo.h>
#include <ucl.h>
@@ -171,6 +167,9 @@
static pthread_cond_t vcpus_idle, vcpus_can_run;
static bool checkpoint_active;
+static int cdir_fd = AT_FDCWD;
+static cap_channel_t *capsysctl = NULL;
+
/*
* TODO: Harden this function and all of its callers since 'base_str' is a user
* provided string.
@@ -210,18 +209,58 @@
return;
}
- if (rstate->kdata_map != MAP_FAILED)
+ if (rstate->kdata_map != MAP_FAILED) {
munmap(rstate->kdata_map, rstate->kdata_len);
+ EPRINTLN("%s: destroying kdata_map", __func__);
+ }
- if (rstate->kdata_fd > 0)
+ if (rstate->kdata_fd > 0) {
close(rstate->kdata_fd);
- if (rstate->vmmem_fd > 0)
+ EPRINTLN("%s: destroying kdata_fd", __func__);
+ }
+ if (rstate->vmmem_fd > 0) {
close(rstate->vmmem_fd);
+ EPRINTLN("%s: destroying vmmem_fd", __func__);
+ }
- if (rstate->meta_root_obj != NULL)
+ if (rstate->meta_root_obj != NULL) {
ucl_object_unref(rstate->meta_root_obj);
- if (rstate->meta_parser != NULL)
+ EPRINTLN("%s: destroying meta_root_obj", __func__);
+ }
+ if (rstate->meta_parser != NULL) {
ucl_parser_free(rstate->meta_parser);
+ EPRINTLN("%s: destroying meta_parser", __func__);
+ }
+}
+
+static void
+limit_vmmem_rights(int s)
+{
+ cap_rights_t rights;
+
+ cap_rights_init(&rights, CAP_FSTAT, CAP_MMAP_R, CAP_IOCTL, CAP_READ);
+ if (caph_rights_limit(s, &rights) == -1)
+ errx(EX_OSERR, "Unable to apply rights for sandbox");
+}
+
+static void
+limit_kerneldata_rights(int s)
+{
+ cap_rights_t rights;
+
+ cap_rights_init(&rights, CAP_FSTAT, CAP_MMAP_R, CAP_READ);
+ if (caph_rights_limit(s, &rights) == -1)
+ errx(EX_OSERR, "Unable to apply rights for sandbox");
+}
+
+static void
+limit_metadata_rights(int s)
+{
+ cap_rights_t rights;
+
+ cap_rights_init(&rights, CAP_FSTAT, CAP_MMAP_R, CAP_READ);
+ if (caph_rights_limit(s, &rights) == -1)
+ errx(EX_OSERR, "Unable to apply rights for sandbox");
}
static int
@@ -236,6 +275,8 @@
return (-1);
}
+ limit_vmmem_rights(rstate->vmmem_fd);
+
err = fstat(rstate->vmmem_fd, &sb);
if (err < 0) {
perror("Failed to stat restore file");
@@ -269,6 +310,8 @@
return (-1);
}
+ limit_kerneldata_rights(rstate->kdata_fd);
+
err = fstat(rstate->kdata_fd, &sb);
if (err < 0) {
perror("Failed to stat kernel data file");
@@ -301,15 +344,24 @@
{
const ucl_object_t *obj;
struct ucl_parser *parser;
+ int md_fd = -1;
int err;
+ md_fd = open(filename, O_RDONLY);
+ if (md_fd < 0) {
+ perror("Failed to open metadata snapshot file.");
+ return (-1);
+ }
+
parser = ucl_parser_new(UCL_PARSER_DEFAULT);
if (parser == NULL) {
fprintf(stderr, "Failed to initialize UCL parser.\n");
goto err_load_metadata;
}
- err = ucl_parser_add_file(parser, filename);
+ limit_metadata_rights(md_fd);
+
+ err = ucl_parser_add_fd(parser, md_fd);
if (err == 0) {
fprintf(stderr, "Failed to parse metadata file: '%s'\n",
filename);
@@ -330,6 +382,8 @@
return (0);
err_load_metadata:
+ if (md_fd > 0)
+ close(md_fd);
if (parser != NULL)
ucl_parser_free(parser);
return (err);
@@ -1304,10 +1358,16 @@
static void
vm_vcpu_pause(struct vmctx *ctx)
{
+ int err;
pthread_mutex_lock(&vcpu_lock);
checkpoint_active = true;
- vm_suspend_cpu(ctx, -1);
+ err = vm_suspend_cpu(ctx, -1);
+ if (err != 0) {
+ EPRINTLN("%s: Could not suspend vcpus", __func__);
+ pthread_mutex_unlock(&vcpu_lock);
+ return;
+ }
while (CPU_CMP(&vcpus_active, &vcpus_suspended) != 0)
pthread_cond_wait(&vcpus_idle, &vcpu_lock);
pthread_mutex_unlock(&vcpu_lock);
@@ -1324,10 +1384,42 @@
pthread_cond_broadcast(&vcpus_can_run);
}
+static int
+local_vm_destroy(char *vm)
+{
+ char *name = "hw.vmm.destroy";
+ void *limit;
+ int err = 0;
+ int ret = 0;
+
+ /* Create limit for one MIB with write access only. */
+ limit = cap_sysctl_limit_init(capsysctl);
+ (void)cap_sysctl_limit_name(limit, name, CAP_SYSCTL_WRITE);
+
+ /* Limit system.sysctl. */
+ if (cap_sysctl_limit(limit) < 0) {
+ EPRINTLN("%s: Unable to set limits", __func__);
+ ret = -1;
+ goto done;
+ }
+
+ err = cap_sysctlbyname(capsysctl, name, NULL, NULL, vm, strlen(vm));
+
+ cap_close(capsysctl);
+ if (err != 0) {
+ EPRINTLN("%s: err is %d\r\n", __func__, errno);
+ ret = errno;
+ goto done;
+ }
+
+done:
+ return (ret);
+}
+
static int
vm_checkpoint(struct vmctx *ctx, const char *checkpoint_file, bool stop_vm)
{
- int fd_checkpoint = 0, kdata_fd = 0;
+ int fd_checkpoint = 0, kdata_fd = 0, meta_fd = 0;
int ret = 0;
int error = 0;
size_t memsz;
@@ -1335,6 +1427,7 @@
char *meta_filename = NULL;
char *kdata_filename = NULL;
FILE *meta_file = NULL;
+ char vmname[MAX_VMNAME];
kdata_filename = strcat_extension(checkpoint_file, ".kern");
if (kdata_filename == NULL) {
@@ -1342,15 +1435,14 @@
return (-1);
}
- kdata_fd = open(kdata_filename, O_WRONLY | O_CREAT | O_TRUNC, 0700);
+ kdata_fd = openat(cdir_fd, kdata_filename, O_WRONLY | O_CREAT | O_TRUNC, 0700);
if (kdata_fd < 0) {
perror("Failed to open kernel data snapshot file.");
error = -1;
goto done;
}
- fd_checkpoint = open(checkpoint_file, O_RDWR | O_CREAT | O_TRUNC, 0700);
-
+ fd_checkpoint = openat(cdir_fd, checkpoint_file, O_RDWR | O_CREAT | O_TRUNC, 0700);
if (fd_checkpoint < 0) {
perror("Failed to create checkpoint file");
error = -1;
@@ -1363,7 +1455,13 @@
goto done;
}
- meta_file = fopen(meta_filename, "w");
+ meta_fd = openat(cdir_fd, meta_filename, O_WRONLY | O_CREAT | O_TRUNC, 0700);
+ if (meta_fd < 0) {
+ perror("Failed to open vm metadata snapshot file descriptor.");
+ goto done;
+ }
+
+ meta_file = fdopen(meta_fd, "w");
if (meta_file == NULL) {
perror("Failed to open vm metadata snapshot file.");
goto done;
@@ -1398,7 +1496,6 @@
goto done;
}
-
ret = vm_snapshot_kern_structs(ctx, kdata_fd, xop);
if (ret != 0) {
fprintf(stderr, "Failed to snapshot vm kernel data.\n");
@@ -1415,8 +1512,19 @@
xo_finish_h(xop);
+
if (stop_vm) {
- vm_destroy(ctx);
+ if (capsysctl != NULL) {
+ error = vm_get_name(ctx, vmname, MAX_VMNAME - 1);
+ if (error != 0) {
+ EPRINTLN("%s: Failed to get VM name.", __func__);
+ goto done;
+ }
+ local_vm_destroy(vmname);
+ free(ctx);
+ } else
+ vm_destroy(ctx);
+
exit(0);
}
@@ -1437,10 +1545,12 @@
fclose(meta_file);
if (kdata_fd > 0)
close(kdata_fd);
+ if (cdir_fd > 0)
+ close(cdir_fd);
return (error);
}
-static int
+int
handle_message(struct vmctx *ctx, nvlist_t *nvl)
{
int err;
@@ -1453,13 +1563,13 @@
if (strcmp(cmd, "checkpoint") == 0) {
if (!nvlist_exists_string(nvl, "filename") ||
!nvlist_exists_bool(nvl, "suspend"))
- err = -1;
+ err = -1;
else
- err = vm_checkpoint(ctx, nvlist_get_string(nvl, "filename"),
- nvlist_get_bool(nvl, "suspend"));
+ err = vm_checkpoint(ctx, nvlist_get_string(nvl, "filename"),
+ nvlist_get_bool(nvl, "suspend"));
} else {
- EPRINTLN("Unrecognized checkpoint operation\n");
- err = -1;
+ EPRINTLN("Unrecognized checkpoint operation\n");
+ err = -1;
}
if (err != 0)
@@ -1483,6 +1593,11 @@
for (;;) {
nvl = nvlist_recv(thread_info->socket_fd, 0);
+
+ /*
+ * slight sanity check: see if there's enough data to at
+ * least determine the type of message.
+ */
if (nvl != NULL)
handle_message(thread_info->ctx, nvl);
else
@@ -1492,8 +1607,58 @@
return (NULL);
}
+static void
+limit_control_socket(int s)
+{
+ cap_rights_t rights;
+
+ cap_rights_init(&rights, CAP_BIND, CAP_READ, CAP_GETSOCKOPT);
+ if (caph_rights_limit(s, &rights) == -1)
+ errx(EX_OSERR, "Unable to apply rights for sandbox");
+}
+
+static void
+limit_file_operations(void)
+{
+ cap_rights_t rights;
+
+ cap_rights_init(&rights, CAP_LOOKUP, CAP_FTRUNCATE, CAP_PWRITE, CAP_PREAD, CAP_FCNTL, CAP_CREATE);
+ if (caph_rights_limit(cdir_fd, &rights) == -1)
+ errx(EX_OSERR, "Unable to apply rights for sandbox");
+}
+
+static void
+init_capsicum_info(char *ckp_path)
+{
+ /* Open capability to Casper. */
+ cap_channel_t *casper_channel = cap_init();
+ if (casper_channel == NULL)
+ errx(EX_OSERR, "cap_init() failed");
+
+ /* Create capability to the system.sysctl service with Casper. */
+ capsysctl = cap_service_open(casper_channel, "system.sysctl");
+ if (capsysctl == NULL)
+ fprintf(stderr, "%s: Unable to open system.sysctl service", __func__);
+
+ cap_close(casper_channel);
+
+ /*
+ * If the path for the parent directory is not specified then
+ * the directory where the bhyve command is called will be used
+ */
+ if (ckp_path == NULL) {
+ ckp_path = ".";
+ }
+
+ cdir_fd = open(ckp_path, O_RDONLY | O_DIRECTORY);
+ if (cdir_fd < 0)
+ errc(1, cdir_fd, "open snapshot files directory");
+
+ limit_file_operations();
+}
+
void
-init_snapshot(void)
+init_snapshot(char *ckp_path)
{
int err;
@@ -1506,6 +1671,7 @@
err = pthread_cond_init(&vcpus_can_run, NULL);
if (err != 0)
errc(1, err, "checkpoint cv init (vcpus_can_run)");
+ init_capsicum_info(ckp_path);
}
/*
@@ -1519,7 +1685,7 @@
int socket_fd;
pthread_t checkpoint_pthread;
char vmname_buf[MAX_VMNAME];
- int err;
+ int err = 0;
memset(&addr, 0, sizeof(addr));
@@ -1530,6 +1696,8 @@
goto fail;
}
+ limit_control_socket(socket_fd);
+
addr.sun_family = AF_UNIX;
err = vm_get_name(ctx, vmname_buf, MAX_VMNAME - 1);
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Fri, Jan 17, 1:56 PM (20 h, 31 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
15839310
Default Alt Text
D34547.diff (13 KB)
Attached To
Mode
D34547: bhyve - snapshot capsicum integration[Part 1]
Attached
Detach File
Event Timeline
Log In to Comment