Page MenuHomeFreeBSD

D34718.id122190.diff
No OneTemporary

D34718.id122190.diff

diff --git a/usr.sbin/bhyve/Makefile b/usr.sbin/bhyve/Makefile
--- a/usr.sbin/bhyve/Makefile
+++ b/usr.sbin/bhyve/Makefile
@@ -94,6 +94,13 @@
LIBADD= vmmapi md nv pthread z util sbuf cam 9p
+.if ${MK_CASPER} != "no"
+LIBADD+= casper
+LIBADD+= cap_net
+LIBADD+= cap_sysctl
+CFLAGS+=-DWITH_CASPER
+.endif
+
.if ${MK_BHYVE_SNAPSHOT} != "no"
LIBADD+= ucl xo
.endif
diff --git a/usr.sbin/bhyve/migration.h b/usr.sbin/bhyve/migration.h
--- a/usr.sbin/bhyve/migration.h
+++ b/usr.sbin/bhyve/migration.h
@@ -19,9 +19,48 @@
struct vmctx;
+/* Warm Migration */
+#define MAX_DEV_NAME_LEN 64
+
+#define MAX_SPEC_LEN 256
+
+#define MIGRATION_SPECS_OK 0
+#define MIGRATION_SPECS_NOT_OK 1
+
+enum migration_transfer_req {
+ MIGRATION_SEND_REQ = 0,
+ MIGRATION_RECV_REQ = 1
+};
+
+enum message_types {
+ MESSAGE_TYPE_SPECS = 1,
+ MESSAGE_TYPE_METADATA = 2,
+ MESSAGE_TYPE_RAM = 3,
+ MESSAGE_TYPE_KERN = 4,
+ MESSAGE_TYPE_DEV = 5,
+ MESSAGE_TYPE_UNKNOWN = 8,
+};
+
struct __attribute__((packed)) migrate_req {
char host[MAXHOSTNAMELEN];
unsigned int port;
};
-int receive_vm_migration(struct vmctx *ctx, char *migration_data);
\ No newline at end of file
+struct __attribute__((packed)) migration_message_type {
+ size_t len;
+ unsigned int type; /* enum message_type */
+ unsigned int req_type; /* enum snapshot_req */
+ char name[MAX_DEV_NAME_LEN];
+};
+
+struct __attribute__((packed)) migration_system_specs {
+ char hw_machine[MAX_SPEC_LEN];
+ char hw_model[MAX_SPEC_LEN];
+ size_t hw_pagesize;
+};
+
+int receive_vm_migration(struct vmctx *ctx, char *migration_data);
+int vm_send_migrate_req(struct vmctx *ctx, struct migrate_req req, bool live);
+#ifdef WITH_CASPER
+int migration_cap_setup(void);
+#endif
\ No newline at end of file
diff --git a/usr.sbin/bhyve/migration.c b/usr.sbin/bhyve/migration.c
--- a/usr.sbin/bhyve/migration.c
+++ b/usr.sbin/bhyve/migration.c
@@ -50,6 +50,11 @@
fprintf(stderr, "%s: " FMT "\n", __func__, ##__VA_ARGS__); \
})
+static cap_channel_t *capnet;
+static cap_channel_t *capsysctl;
+
+static int vm_recv_migrate_req(struct vmctx *ctx, struct migrate_req req);
+
int
receive_vm_migration(struct vmctx *ctx, char *migration_data)
{
@@ -89,10 +94,511 @@
strlcpy(req.host, hostname, MAXHOSTNAMELEN);
- // rc = vm_recv_migrate_req(ctx, req);
- rc = EOPNOTSUPP;
- EPRINTF("Migration not implemented yet");
+ rc = vm_recv_migrate_req(ctx, req);
free(hostname);
return (rc);
}
+
+static int
+get_system_specs_for_migration(struct migration_system_specs *specs)
+{
+ int mib[2];
+ size_t len_machine, len_model, len_pagesize;
+ char interm[MAX_SPEC_LEN];
+ int rc;
+ int num;
+
+ mib[0] = CTL_HW;
+ mib[1] = HW_MACHINE;
+ memset(interm, 0, MAX_SPEC_LEN);
+ len_machine = sizeof(interm);
+
+ // For sending we use casper method, for recv we can't
+ if (capsysctl != NULL)
+ rc = cap_sysctl(capsysctl, mib, 2, interm, &len_machine, NULL, 0);
+ else
+ rc = sysctl(mib, 2, interm, &len_machine, NULL, 0);
+ if (rc != 0) {
+ perror("Could not retrieve HW_MACHINE specs");
+ return (rc);
+ }
+ strlcpy(specs->hw_machine, interm, len_machine);
+
+ memset(interm, 0, MAX_SPEC_LEN);
+ mib[0] = CTL_HW;
+ mib[1] = HW_MODEL;
+ len_model = sizeof(interm);
+
+ if (capsysctl != NULL)
+ rc = cap_sysctl(capsysctl, mib, 2, interm, &len_model, NULL, 0);
+ else
+ rc = sysctl(mib, 2, interm, &len_model, NULL, 0);
+ if (rc != 0) {
+ perror("Could not retrieve HW_MODEL specs");
+ return (rc);
+ }
+ strlcpy(specs->hw_model, interm, len_model);
+
+ mib[0] = CTL_HW;
+ mib[1] = HW_PAGESIZE;
+ len_pagesize = sizeof(num);
+
+ if (capsysctl != NULL)
+ rc = cap_sysctl(capsysctl, mib, 2, &num, &len_pagesize, NULL, 0);
+ else
+ rc = sysctl(mib, 2, &num, &len_pagesize, NULL, 0);
+ if (rc != 0) {
+ perror("Could not retrieve HW_PAGESIZE specs");
+ return (rc);
+ }
+ specs->hw_pagesize = num;
+
+ return (0);
+}
+
+static int
+migration_transfer_data(int socket, void *msg, size_t len, enum migration_transfer_req req)
+{
+ size_t to_transfer, total_transferred;
+ ssize_t transferred;
+
+ to_transfer = len;
+ total_transferred = 0;
+
+ while (to_transfer > 0) {
+ switch (req) {
+ case MIGRATION_SEND_REQ:
+ transferred = send(socket, (char *) msg + total_transferred,
+ to_transfer, 0);
+ break;
+ case MIGRATION_RECV_REQ:
+ transferred = recv(socket, (char *) msg + total_transferred,
+ to_transfer, 0);
+ break;
+ default:
+ DPRINTF("Unknown transfer option");
+ return (EINVAL);
+ break;
+ }
+
+ if (transferred == 0)
+ break;
+ if (transferred < 0) {
+ perror("Error while transfering data");
+ return (errno);
+ }
+
+ to_transfer -= transferred;
+ total_transferred += transferred;
+ }
+
+ return (to_transfer == 0 ? 0 : ECONNRESET);
+}
+
+static int
+migration_check_specs(int socket, enum migration_transfer_req req)
+{
+ struct migration_system_specs local_specs;
+ struct migration_system_specs remote_specs;
+ struct migration_system_specs transfer_specs;
+ struct migration_message_type msg;
+ enum migration_transfer_req rev_req;
+ size_t response;
+ int rc;
+
+ if ((req != MIGRATION_SEND_REQ) && (req != MIGRATION_RECV_REQ)) {
+ EPRINTF("Unknown option for migration req");
+ return (EINVAL);
+ }
+
+ if (req == MIGRATION_SEND_REQ)
+ rev_req = MIGRATION_RECV_REQ;
+ else
+ rev_req = MIGRATION_SEND_REQ;
+
+ rc = get_system_specs_for_migration(&local_specs);
+ if (rc != 0) {
+ EPRINTF("Could not retrieve local specs");
+ return (rc);
+ }
+
+ if (req == MIGRATION_SEND_REQ) {
+ /* Send message type to server: specs & len */
+ msg.type = MESSAGE_TYPE_SPECS;
+ msg.len = sizeof(local_specs);
+ }
+
+ rc = migration_transfer_data(socket, &msg, sizeof(msg), req);
+ if (rc != 0) {
+ EPRINTF("Could not send message type");
+ return (rc);
+ }
+
+ if ((req == MIGRATION_RECV_REQ) && (msg.type != MESSAGE_TYPE_SPECS)) {
+ EPRINTF("Wrong message type received from remote");
+ return (EINVAL);
+ }
+
+ /* For the send req, we send the local specs and for the receive req
+ * we receive the remote specs.
+ */
+ if (req == MIGRATION_SEND_REQ)
+ transfer_specs = local_specs;
+
+ rc = migration_transfer_data(socket, &transfer_specs, sizeof(transfer_specs), req);
+ if (rc != 0) {
+ EPRINTF("Could not transfer system specs");
+ return (rc);
+ }
+
+ if (req == MIGRATION_RECV_REQ) {
+ remote_specs = transfer_specs;
+
+ /* Check specs */
+ response = MIGRATION_SPECS_OK;
+ if ((strncmp(local_specs.hw_model, remote_specs.hw_model, MAX_SPEC_LEN) != 0)
+ || (strncmp(local_specs.hw_machine, remote_specs.hw_machine, MAX_SPEC_LEN) != 0)
+ || (local_specs.hw_pagesize != remote_specs.hw_pagesize)
+ ) {
+ EPRINTF("System specification mismatch");
+ DPRINTF("Local specs vs Remote Specs: \n"
+ "\tmachine: %s vs %s\n"
+ "\tmodel: %s vs %s\n"
+ "\tpagesize: %zu vs %zu\n",
+ local_specs.hw_machine,
+ remote_specs.hw_machine,
+ local_specs.hw_model,
+ remote_specs.hw_model,
+ local_specs.hw_pagesize,
+ remote_specs.hw_pagesize
+ );
+ response = MIGRATION_SPECS_NOT_OK;
+ }
+ }
+
+ /* The source will receive the result of the checkup (i.e.
+ * whether the migration is possible or the source and destination
+ * are incompatible for migration) and the destination will send the
+ * result of the checkup.
+ */
+ rc = migration_transfer_data(socket, &response, sizeof(response), rev_req);
+ if (rc != 0) {
+ EPRINTF("Could not transfer response from server");
+ return (rc);
+ }
+
+ if (response == MIGRATION_SPECS_NOT_OK)
+ return (EINVAL);
+
+ printf("%s: System specification accepted\n", __func__);
+
+ return (0);
+
+}
+
+static int
+get_migration_address(const char *hostname, struct in_addr *addr)
+{
+ struct addrinfo hints, *res;
+ int rc, error = 0;
+
+ memset(&hints, 0, sizeof(hints));
+ hints.ai_family = AF_INET;
+
+ if (capnet != NULL)
+ rc = cap_getaddrinfo(capnet, hostname, NULL, &hints, &res);
+ else
+ rc = getaddrinfo(hostname, NULL, &hints, &res);
+
+ if (rc != 0) {
+ EPRINTF("Could not get address info: %s", gai_strerror(rc));
+ return (rc);
+ }
+
+ switch(res->ai_family) {
+ case AF_INET:
+ addr->s_addr = ((struct sockaddr_in *) res->ai_addr)->sin_addr.s_addr;
+ break;
+ default:
+ EPRINTF("Unknown address family.");
+ error = EINVAL;
+ }
+
+ freeaddrinfo(res);
+ return (error);
+}
+
+static inline int
+migrate_connections(struct migrate_req req, int *socket_fd,
+ enum migration_transfer_req type)
+{
+ int error;
+ int s, con_socket;
+ struct sockaddr_in sa, client_sa;
+ struct in_addr req_addr;
+ socklen_t client_len;
+ int rc;
+
+ rc = get_migration_address(req.host, &req_addr);
+
+ if (rc != 0) {
+ EPRINTF("Invalid address.");
+ DPRINTF("IP address used for migration: %s;\n"
+ "Port used for migration: %d",
+ req.host, req.port);
+ return (rc);
+ }
+
+ s = socket(AF_INET, SOCK_STREAM, 0);
+
+ if (s < 0) {
+ perror("Could not create socket");
+ return (errno);
+ }
+
+ bzero(&sa, sizeof(sa));
+ sa.sin_family = AF_INET;
+ sa.sin_port = htons(req.port);
+
+ switch (type) {
+ case MIGRATION_SEND_REQ:
+ printf("%s: Starting connection to %s on %d port...\n",
+ __func__, inet_ntoa(req_addr), req.port);
+
+ sa.sin_addr = req_addr;
+ rc = cap_connect(capnet, s, (struct sockaddr *)&sa, sizeof(sa));
+
+ if (rc != 0) {
+ perror("Could not connect to the remote host");
+ error = errno;
+ goto done_close_s;
+ }
+
+ *socket_fd = s;
+ break;
+ case MIGRATION_RECV_REQ:
+ printf("%s: Waiting for connections from %s on %d port...\n",
+ __func__, inet_ntoa(req_addr), req.port);
+
+ sa.sin_addr.s_addr = htonl(INADDR_ANY);
+
+ rc = bind(s, (struct sockaddr *)&sa, sizeof(sa));
+
+ if (rc != 0) {
+ perror("Could not bind");
+ error = errno;
+ goto done_close_s;
+ }
+
+ listen(s, 1);
+
+ while (1) {
+ client_len = sizeof(client_sa);
+ con_socket = accept(s, (struct sockaddr *)&client_sa, &client_len);
+ if (con_socket < 0) {
+ EPRINTF("Could not accept connection");
+ error = errno;
+ goto done_close_s;
+ }
+
+ if (client_sa.sin_addr.s_addr == req_addr.s_addr) {
+ printf("%s: Accepted connection from %s\n", __func__, inet_ntoa(req_addr));
+ break;
+ } else {
+ DPRINTF("Invalid connection from IP: %s", inet_ntoa(client_sa.sin_addr));
+ }
+ close(con_socket);
+ }
+ *socket_fd = con_socket;
+ close(s);
+ break;
+ default:
+ DPRINTF("unknown operation request");
+ error = EINVAL;
+ goto done;
+ }
+
+ error = 0;
+ goto done;
+
+done_close_s:
+ close(s);
+done:
+ return (error);
+}
+
+int
+vm_send_migrate_req(struct vmctx *ctx, struct migrate_req req, bool is_live)
+{
+ int s;
+ int rc, error;
+ size_t migration_completed;
+
+#if !defined(WITHOUT_CAPSICUM) && !defined(WITH_CASPER)
+ EPRINTF("Migration is not possible with Capsicum enabled and without Casper support");
+ return (EOPNOTSUPP);
+#endif
+
+ rc = migrate_connections(req, &s, MIGRATION_SEND_REQ);
+ if (rc != 0) {
+ EPRINTF("Could not create connection");
+ return (rc);
+ }
+
+ rc = migration_check_specs(s, MIGRATION_SEND_REQ);
+ if (rc != 0) {
+ EPRINTF("Error while checking system requirements");
+ error = rc;
+ goto done;
+ }
+
+ rc = migration_transfer_data(s, &is_live, sizeof(is_live), MIGRATION_SEND_REQ);
+ if (rc != 0) {
+ EPRINTF("Could not send migration type");
+ error = rc;
+ goto done;
+ }
+
+ vm_vcpu_pause(ctx);
+
+ rc = vm_pause_user_devs();
+ if (rc != 0) {
+ EPRINTF("Could not pause devices");
+ error = rc;
+ goto unlock_vm_and_exit;
+ }
+
+ rc = migration_transfer_data(s, &migration_completed,
+ sizeof(migration_completed), MIGRATION_RECV_REQ);
+ if ((rc != 0) || (migration_completed != MIGRATION_SPECS_OK)) {
+ EPRINTF("Could not recv 'migration completed' from remote or received error");
+ error = -1;
+ goto unlock_vm_and_exit;
+ }
+
+ EPRINTF("Rest of migration not yet implemented");
+ error = EOPNOTSUPP;
+ goto unlock_vm_and_exit;
+
+ vm_destroy(ctx);
+ exit(0);
+
+unlock_vm_and_exit:
+ rc = vm_resume_user_devs();
+ if (rc != 0)
+ EPRINTF("Could not resume devices");
+ vm_vcpu_resume(ctx);
+
+done:
+ close(s);
+ return (error);
+}
+
+static int
+vm_recv_migrate_req(struct vmctx __unused *ctx, struct migrate_req req)
+{
+ int s;
+ int rc;
+ bool is_live;
+ size_t migration_completed;
+
+ rc = migrate_connections(req, &s, MIGRATION_RECV_REQ);
+ if (rc != 0) {
+ EPRINTF("Could not create connections");
+ return (rc);
+ }
+
+ rc = migration_check_specs(s, MIGRATION_RECV_REQ);
+ if (rc != 0) {
+ EPRINTF("Error while checking specs");
+ close(s);
+ return (rc);
+ }
+
+ rc = migration_transfer_data(s, &is_live, sizeof(is_live), MIGRATION_RECV_REQ);
+ if (rc != 0) {
+ EPRINTF("Could not recv migration type");
+ close(s);
+ return (rc);
+ }
+
+ // fprintf(stdout, "%s: Migration completed\n", __func__);
+
+ migration_completed = MIGRATION_SPECS_OK;
+ rc = migration_transfer_data(s, &migration_completed,
+ sizeof(migration_completed), MIGRATION_SEND_REQ);
+ if (rc != 0) {
+ EPRINTF("Could not send 'migration completed' to remote");
+ close(s);
+ return (rc);
+ }
+
+ close(s);
+ EPRINTF("Rest of migration not currently implemented");
+ return (EOPNOTSUPP);
+}
+
+#ifdef WITH_CASPER
+int
+migration_cap_setup(void)
+{
+ cap_channel_t *capcas;
+ cap_net_limit_t *limit_net;
+ cap_sysctl_limit_t *limit_sysctl;
+ int familylimit;
+
+ capcas = cap_init();
+ if (capcas == NULL) {
+ warn("Unable to create casper process");
+ return (errno);
+ }
+
+ caph_cache_catpages();
+
+ capnet = cap_service_open(capcas, "system.net");
+ if (capnet == NULL) {
+ warn("Unable to open system.net service");
+ return (errno);
+ }
+
+ capsysctl = cap_service_open(capcas, "system.sysctl");
+ if (capsysctl == NULL) {
+ warn("Unable to open system.sysctl service");
+ return (errno);
+ }
+
+ cap_close(capcas);
+
+ limit_net = cap_net_limit_init(capnet, CAPNET_NAME2ADDR | CAPNET_CONNECT);
+ if (limit_net == NULL) {
+ warn("Unable to create cap_net limits.");
+ return (errno);
+ }
+
+ familylimit = AF_INET;
+ cap_net_limit_name2addr_family(limit_net, &familylimit, 1);
+
+ if (cap_net_limit(limit_net) < 0) {
+ warn("Unable to apply cap_net limits.");
+ return (errno);
+ }
+
+ limit_sysctl = cap_sysctl_limit_init(capsysctl);
+ if (limit_sysctl == NULL) {
+ warn("Unable to create cap_sysctl limits.");
+ return (errno);
+ }
+
+ cap_sysctl_limit_name(limit_sysctl, "hw.machine", CAP_SYSCTL_READ);
+ cap_sysctl_limit_name(limit_sysctl, "hw.model", CAP_SYSCTL_READ);
+ cap_sysctl_limit_name(limit_sysctl, "hw.pagesize", CAP_SYSCTL_READ);
+
+ if (cap_sysctl_limit(limit_sysctl) < 0) {
+ warn("Unable to apply cap_sysctl limits.");
+ return (errno);
+ }
+
+ return (0);
+}
+#endif
diff --git a/usr.sbin/bhyve/snapshot.h b/usr.sbin/bhyve/snapshot.h
--- a/usr.sbin/bhyve/snapshot.h
+++ b/usr.sbin/bhyve/snapshot.h
@@ -91,6 +91,8 @@
void checkpoint_cpu_add(int vcpu);
void checkpoint_cpu_resume(int vcpu);
void checkpoint_cpu_suspend(int vcpu);
+void vm_vcpu_pause(struct vmctx *ctx);
+void vm_vcpu_resume(struct vmctx *ctx);
int restore_vm_mem(struct vmctx *ctx, struct restore_state *rstate);
int vm_restore_kern_structs(struct vmctx *ctx, struct restore_state *rstate);
diff --git a/usr.sbin/bhyve/snapshot.c b/usr.sbin/bhyve/snapshot.c
--- a/usr.sbin/bhyve/snapshot.c
+++ b/usr.sbin/bhyve/snapshot.c
@@ -1285,7 +1285,7 @@
pthread_mutex_unlock(&vcpu_lock);
}
-static void
+void
vm_vcpu_pause(struct vmctx *ctx)
{
@@ -1297,7 +1297,7 @@
pthread_mutex_unlock(&vcpu_lock);
}
-static void
+void
vm_vcpu_resume(struct vmctx *ctx)
{
@@ -1493,7 +1493,7 @@
IPC_COMMAND(ipc_cmd_set, checkpoint, vm_do_checkpoint);
static int
-vm_do_migrate(struct vmctx __unused *ctx, const nvlist_t *nvl)
+vm_do_migrate(struct vmctx *ctx, const nvlist_t *nvl)
{
size_t len;
struct migrate_req req;
@@ -1520,9 +1520,7 @@
req.host,
req.port);
- // return (vm_send_migrate_req(ctx, req, nvlist_get_bool(nvl, "live")));
- EPRINTLN("Migration operation not implemented yet\n");
- return (EOPNOTSUPP);
+ return (vm_send_migrate_req(ctx, req, nvlist_get_bool(nvl, "live")));
}
IPC_COMMAND(ipc_cmd_set, migrate, vm_do_migrate);
@@ -1592,7 +1590,15 @@
if (caph_rights_limit(socket_fd, &rights) == -1)
errx(EX_OSERR, "Unable to apply rights for sandbox");
+
+#ifdef WITH_CASPER
+ err = migration_cap_setup();
+ if (err != 0) {
+ errx(EX_OSERR, "Unable to setup capabilities for migration");
+ }
#endif
+#endif
+
checkpoint_info = calloc(1, sizeof(*checkpoint_info));
checkpoint_info->ctx = ctx;
checkpoint_info->socket_fd = socket_fd;

File Metadata

Mime Type
text/plain
Expires
Fri, Nov 8, 9:04 PM (11 h, 4 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
14542911
Default Alt Text
D34718.id122190.diff (16 KB)

Event Timeline