Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F102120329
D34718.id122190.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
16 KB
Referenced Files
None
Subscribers
None
D34718.id122190.diff
View Options
diff --git a/usr.sbin/bhyve/Makefile b/usr.sbin/bhyve/Makefile
--- a/usr.sbin/bhyve/Makefile
+++ b/usr.sbin/bhyve/Makefile
@@ -94,6 +94,13 @@
LIBADD= vmmapi md nv pthread z util sbuf cam 9p
+.if ${MK_CASPER} != "no"
+LIBADD+= casper
+LIBADD+= cap_net
+LIBADD+= cap_sysctl
+CFLAGS+=-DWITH_CASPER
+.endif
+
.if ${MK_BHYVE_SNAPSHOT} != "no"
LIBADD+= ucl xo
.endif
diff --git a/usr.sbin/bhyve/migration.h b/usr.sbin/bhyve/migration.h
--- a/usr.sbin/bhyve/migration.h
+++ b/usr.sbin/bhyve/migration.h
@@ -19,9 +19,48 @@
struct vmctx;
+/* Warm Migration */
+#define MAX_DEV_NAME_LEN 64
+
+#define MAX_SPEC_LEN 256
+
+#define MIGRATION_SPECS_OK 0
+#define MIGRATION_SPECS_NOT_OK 1
+
+enum migration_transfer_req {
+ MIGRATION_SEND_REQ = 0,
+ MIGRATION_RECV_REQ = 1
+};
+
+enum message_types {
+ MESSAGE_TYPE_SPECS = 1,
+ MESSAGE_TYPE_METADATA = 2,
+ MESSAGE_TYPE_RAM = 3,
+ MESSAGE_TYPE_KERN = 4,
+ MESSAGE_TYPE_DEV = 5,
+ MESSAGE_TYPE_UNKNOWN = 8,
+};
+
struct __attribute__((packed)) migrate_req {
char host[MAXHOSTNAMELEN];
unsigned int port;
};
-int receive_vm_migration(struct vmctx *ctx, char *migration_data);
\ No newline at end of file
+struct __attribute__((packed)) migration_message_type {
+ size_t len;
+ unsigned int type; /* enum message_type */
+ unsigned int req_type; /* enum snapshot_req */
+ char name[MAX_DEV_NAME_LEN];
+};
+
+struct __attribute__((packed)) migration_system_specs {
+ char hw_machine[MAX_SPEC_LEN];
+ char hw_model[MAX_SPEC_LEN];
+ size_t hw_pagesize;
+};
+
+int receive_vm_migration(struct vmctx *ctx, char *migration_data);
+int vm_send_migrate_req(struct vmctx *ctx, struct migrate_req req, bool live);
+#ifdef WITH_CASPER
+int migration_cap_setup(void);
+#endif
\ No newline at end of file
diff --git a/usr.sbin/bhyve/migration.c b/usr.sbin/bhyve/migration.c
--- a/usr.sbin/bhyve/migration.c
+++ b/usr.sbin/bhyve/migration.c
@@ -50,6 +50,11 @@
fprintf(stderr, "%s: " FMT "\n", __func__, ##__VA_ARGS__); \
})
+static cap_channel_t *capnet;
+static cap_channel_t *capsysctl;
+
+static int vm_recv_migrate_req(struct vmctx *ctx, struct migrate_req req);
+
int
receive_vm_migration(struct vmctx *ctx, char *migration_data)
{
@@ -89,10 +94,511 @@
strlcpy(req.host, hostname, MAXHOSTNAMELEN);
- // rc = vm_recv_migrate_req(ctx, req);
- rc = EOPNOTSUPP;
- EPRINTF("Migration not implemented yet");
+ rc = vm_recv_migrate_req(ctx, req);
free(hostname);
return (rc);
}
+
+static int
+get_system_specs_for_migration(struct migration_system_specs *specs)
+{
+ int mib[2];
+ size_t len_machine, len_model, len_pagesize;
+ char interm[MAX_SPEC_LEN];
+ int rc;
+ int num;
+
+ mib[0] = CTL_HW;
+ mib[1] = HW_MACHINE;
+ memset(interm, 0, MAX_SPEC_LEN);
+ len_machine = sizeof(interm);
+
+ // For sending we use casper method, for recv we can't
+ if (capsysctl != NULL)
+ rc = cap_sysctl(capsysctl, mib, 2, interm, &len_machine, NULL, 0);
+ else
+ rc = sysctl(mib, 2, interm, &len_machine, NULL, 0);
+ if (rc != 0) {
+ perror("Could not retrieve HW_MACHINE specs");
+ return (rc);
+ }
+ strlcpy(specs->hw_machine, interm, len_machine);
+
+ memset(interm, 0, MAX_SPEC_LEN);
+ mib[0] = CTL_HW;
+ mib[1] = HW_MODEL;
+ len_model = sizeof(interm);
+
+ if (capsysctl != NULL)
+ rc = cap_sysctl(capsysctl, mib, 2, interm, &len_model, NULL, 0);
+ else
+ rc = sysctl(mib, 2, interm, &len_model, NULL, 0);
+ if (rc != 0) {
+ perror("Could not retrieve HW_MODEL specs");
+ return (rc);
+ }
+ strlcpy(specs->hw_model, interm, len_model);
+
+ mib[0] = CTL_HW;
+ mib[1] = HW_PAGESIZE;
+ len_pagesize = sizeof(num);
+
+ if (capsysctl != NULL)
+ rc = cap_sysctl(capsysctl, mib, 2, &num, &len_pagesize, NULL, 0);
+ else
+ rc = sysctl(mib, 2, &num, &len_pagesize, NULL, 0);
+ if (rc != 0) {
+ perror("Could not retrieve HW_PAGESIZE specs");
+ return (rc);
+ }
+ specs->hw_pagesize = num;
+
+ return (0);
+}
+
+static int
+migration_transfer_data(int socket, void *msg, size_t len, enum migration_transfer_req req)
+{
+ size_t to_transfer, total_transferred;
+ ssize_t transferred;
+
+ to_transfer = len;
+ total_transferred = 0;
+
+ while (to_transfer > 0) {
+ switch (req) {
+ case MIGRATION_SEND_REQ:
+ transferred = send(socket, (char *) msg + total_transferred,
+ to_transfer, 0);
+ break;
+ case MIGRATION_RECV_REQ:
+ transferred = recv(socket, (char *) msg + total_transferred,
+ to_transfer, 0);
+ break;
+ default:
+ DPRINTF("Unknown transfer option");
+ return (EINVAL);
+ break;
+ }
+
+ if (transferred == 0)
+ break;
+ if (transferred < 0) {
+ perror("Error while transfering data");
+ return (errno);
+ }
+
+ to_transfer -= transferred;
+ total_transferred += transferred;
+ }
+
+ return (to_transfer == 0 ? 0 : ECONNRESET);
+}
+
+static int
+migration_check_specs(int socket, enum migration_transfer_req req)
+{
+ struct migration_system_specs local_specs;
+ struct migration_system_specs remote_specs;
+ struct migration_system_specs transfer_specs;
+ struct migration_message_type msg;
+ enum migration_transfer_req rev_req;
+ size_t response;
+ int rc;
+
+ if ((req != MIGRATION_SEND_REQ) && (req != MIGRATION_RECV_REQ)) {
+ EPRINTF("Unknown option for migration req");
+ return (EINVAL);
+ }
+
+ if (req == MIGRATION_SEND_REQ)
+ rev_req = MIGRATION_RECV_REQ;
+ else
+ rev_req = MIGRATION_SEND_REQ;
+
+ rc = get_system_specs_for_migration(&local_specs);
+ if (rc != 0) {
+ EPRINTF("Could not retrieve local specs");
+ return (rc);
+ }
+
+ if (req == MIGRATION_SEND_REQ) {
+ /* Send message type to server: specs & len */
+ msg.type = MESSAGE_TYPE_SPECS;
+ msg.len = sizeof(local_specs);
+ }
+
+ rc = migration_transfer_data(socket, &msg, sizeof(msg), req);
+ if (rc != 0) {
+ EPRINTF("Could not send message type");
+ return (rc);
+ }
+
+ if ((req == MIGRATION_RECV_REQ) && (msg.type != MESSAGE_TYPE_SPECS)) {
+ EPRINTF("Wrong message type received from remote");
+ return (EINVAL);
+ }
+
+ /* For the send req, we send the local specs and for the receive req
+ * we receive the remote specs.
+ */
+ if (req == MIGRATION_SEND_REQ)
+ transfer_specs = local_specs;
+
+ rc = migration_transfer_data(socket, &transfer_specs, sizeof(transfer_specs), req);
+ if (rc != 0) {
+ EPRINTF("Could not transfer system specs");
+ return (rc);
+ }
+
+ if (req == MIGRATION_RECV_REQ) {
+ remote_specs = transfer_specs;
+
+ /* Check specs */
+ response = MIGRATION_SPECS_OK;
+ if ((strncmp(local_specs.hw_model, remote_specs.hw_model, MAX_SPEC_LEN) != 0)
+ || (strncmp(local_specs.hw_machine, remote_specs.hw_machine, MAX_SPEC_LEN) != 0)
+ || (local_specs.hw_pagesize != remote_specs.hw_pagesize)
+ ) {
+ EPRINTF("System specification mismatch");
+ DPRINTF("Local specs vs Remote Specs: \n"
+ "\tmachine: %s vs %s\n"
+ "\tmodel: %s vs %s\n"
+ "\tpagesize: %zu vs %zu\n",
+ local_specs.hw_machine,
+ remote_specs.hw_machine,
+ local_specs.hw_model,
+ remote_specs.hw_model,
+ local_specs.hw_pagesize,
+ remote_specs.hw_pagesize
+ );
+ response = MIGRATION_SPECS_NOT_OK;
+ }
+ }
+
+ /* The source will receive the result of the checkup (i.e.
+ * whether the migration is possible or the source and destination
+ * are incompatible for migration) and the destination will send the
+ * result of the checkup.
+ */
+ rc = migration_transfer_data(socket, &response, sizeof(response), rev_req);
+ if (rc != 0) {
+ EPRINTF("Could not transfer response from server");
+ return (rc);
+ }
+
+ if (response == MIGRATION_SPECS_NOT_OK)
+ return (EINVAL);
+
+ printf("%s: System specification accepted\n", __func__);
+
+ return (0);
+
+}
+
+static int
+get_migration_address(const char *hostname, struct in_addr *addr)
+{
+ struct addrinfo hints, *res;
+ int rc, error = 0;
+
+ memset(&hints, 0, sizeof(hints));
+ hints.ai_family = AF_INET;
+
+ if (capnet != NULL)
+ rc = cap_getaddrinfo(capnet, hostname, NULL, &hints, &res);
+ else
+ rc = getaddrinfo(hostname, NULL, &hints, &res);
+
+ if (rc != 0) {
+ EPRINTF("Could not get address info: %s", gai_strerror(rc));
+ return (rc);
+ }
+
+ switch(res->ai_family) {
+ case AF_INET:
+ addr->s_addr = ((struct sockaddr_in *) res->ai_addr)->sin_addr.s_addr;
+ break;
+ default:
+ EPRINTF("Unknown address family.");
+ error = EINVAL;
+ }
+
+ freeaddrinfo(res);
+ return (error);
+}
+
+static inline int
+migrate_connections(struct migrate_req req, int *socket_fd,
+ enum migration_transfer_req type)
+{
+ int error;
+ int s, con_socket;
+ struct sockaddr_in sa, client_sa;
+ struct in_addr req_addr;
+ socklen_t client_len;
+ int rc;
+
+ rc = get_migration_address(req.host, &req_addr);
+
+ if (rc != 0) {
+ EPRINTF("Invalid address.");
+ DPRINTF("IP address used for migration: %s;\n"
+ "Port used for migration: %d",
+ req.host, req.port);
+ return (rc);
+ }
+
+ s = socket(AF_INET, SOCK_STREAM, 0);
+
+ if (s < 0) {
+ perror("Could not create socket");
+ return (errno);
+ }
+
+ bzero(&sa, sizeof(sa));
+ sa.sin_family = AF_INET;
+ sa.sin_port = htons(req.port);
+
+ switch (type) {
+ case MIGRATION_SEND_REQ:
+ printf("%s: Starting connection to %s on %d port...\n",
+ __func__, inet_ntoa(req_addr), req.port);
+
+ sa.sin_addr = req_addr;
+ rc = cap_connect(capnet, s, (struct sockaddr *)&sa, sizeof(sa));
+
+ if (rc != 0) {
+ perror("Could not connect to the remote host");
+ error = errno;
+ goto done_close_s;
+ }
+
+ *socket_fd = s;
+ break;
+ case MIGRATION_RECV_REQ:
+ printf("%s: Waiting for connections from %s on %d port...\n",
+ __func__, inet_ntoa(req_addr), req.port);
+
+ sa.sin_addr.s_addr = htonl(INADDR_ANY);
+
+ rc = bind(s, (struct sockaddr *)&sa, sizeof(sa));
+
+ if (rc != 0) {
+ perror("Could not bind");
+ error = errno;
+ goto done_close_s;
+ }
+
+ listen(s, 1);
+
+ while (1) {
+ client_len = sizeof(client_sa);
+ con_socket = accept(s, (struct sockaddr *)&client_sa, &client_len);
+ if (con_socket < 0) {
+ EPRINTF("Could not accept connection");
+ error = errno;
+ goto done_close_s;
+ }
+
+ if (client_sa.sin_addr.s_addr == req_addr.s_addr) {
+ printf("%s: Accepted connection from %s\n", __func__, inet_ntoa(req_addr));
+ break;
+ } else {
+ DPRINTF("Invalid connection from IP: %s", inet_ntoa(client_sa.sin_addr));
+ }
+ close(con_socket);
+ }
+ *socket_fd = con_socket;
+ close(s);
+ break;
+ default:
+ DPRINTF("unknown operation request");
+ error = EINVAL;
+ goto done;
+ }
+
+ error = 0;
+ goto done;
+
+done_close_s:
+ close(s);
+done:
+ return (error);
+}
+
+int
+vm_send_migrate_req(struct vmctx *ctx, struct migrate_req req, bool is_live)
+{
+ int s;
+ int rc, error;
+ size_t migration_completed;
+
+#if !defined(WITHOUT_CAPSICUM) && !defined(WITH_CASPER)
+ EPRINTF("Migration is not possible with Capsicum enabled and without Casper support");
+ return (EOPNOTSUPP);
+#endif
+
+ rc = migrate_connections(req, &s, MIGRATION_SEND_REQ);
+ if (rc != 0) {
+ EPRINTF("Could not create connection");
+ return (rc);
+ }
+
+ rc = migration_check_specs(s, MIGRATION_SEND_REQ);
+ if (rc != 0) {
+ EPRINTF("Error while checking system requirements");
+ error = rc;
+ goto done;
+ }
+
+ rc = migration_transfer_data(s, &is_live, sizeof(is_live), MIGRATION_SEND_REQ);
+ if (rc != 0) {
+ EPRINTF("Could not send migration type");
+ error = rc;
+ goto done;
+ }
+
+ vm_vcpu_pause(ctx);
+
+ rc = vm_pause_user_devs();
+ if (rc != 0) {
+ EPRINTF("Could not pause devices");
+ error = rc;
+ goto unlock_vm_and_exit;
+ }
+
+ rc = migration_transfer_data(s, &migration_completed,
+ sizeof(migration_completed), MIGRATION_RECV_REQ);
+ if ((rc != 0) || (migration_completed != MIGRATION_SPECS_OK)) {
+ EPRINTF("Could not recv 'migration completed' from remote or received error");
+ error = -1;
+ goto unlock_vm_and_exit;
+ }
+
+ EPRINTF("Rest of migration not yet implemented");
+ error = EOPNOTSUPP;
+ goto unlock_vm_and_exit;
+
+ vm_destroy(ctx);
+ exit(0);
+
+unlock_vm_and_exit:
+ rc = vm_resume_user_devs();
+ if (rc != 0)
+ EPRINTF("Could not resume devices");
+ vm_vcpu_resume(ctx);
+
+done:
+ close(s);
+ return (error);
+}
+
+static int
+vm_recv_migrate_req(struct vmctx __unused *ctx, struct migrate_req req)
+{
+ int s;
+ int rc;
+ bool is_live;
+ size_t migration_completed;
+
+ rc = migrate_connections(req, &s, MIGRATION_RECV_REQ);
+ if (rc != 0) {
+ EPRINTF("Could not create connections");
+ return (rc);
+ }
+
+ rc = migration_check_specs(s, MIGRATION_RECV_REQ);
+ if (rc != 0) {
+ EPRINTF("Error while checking specs");
+ close(s);
+ return (rc);
+ }
+
+ rc = migration_transfer_data(s, &is_live, sizeof(is_live), MIGRATION_RECV_REQ);
+ if (rc != 0) {
+ EPRINTF("Could not recv migration type");
+ close(s);
+ return (rc);
+ }
+
+ // fprintf(stdout, "%s: Migration completed\n", __func__);
+
+ migration_completed = MIGRATION_SPECS_OK;
+ rc = migration_transfer_data(s, &migration_completed,
+ sizeof(migration_completed), MIGRATION_SEND_REQ);
+ if (rc != 0) {
+ EPRINTF("Could not send 'migration completed' to remote");
+ close(s);
+ return (rc);
+ }
+
+ close(s);
+ EPRINTF("Rest of migration not currently implemented");
+ return (EOPNOTSUPP);
+}
+
+#ifdef WITH_CASPER
+int
+migration_cap_setup(void)
+{
+ cap_channel_t *capcas;
+ cap_net_limit_t *limit_net;
+ cap_sysctl_limit_t *limit_sysctl;
+ int familylimit;
+
+ capcas = cap_init();
+ if (capcas == NULL) {
+ warn("Unable to create casper process");
+ return (errno);
+ }
+
+ caph_cache_catpages();
+
+ capnet = cap_service_open(capcas, "system.net");
+ if (capnet == NULL) {
+ warn("Unable to open system.net service");
+ return (errno);
+ }
+
+ capsysctl = cap_service_open(capcas, "system.sysctl");
+ if (capsysctl == NULL) {
+ warn("Unable to open system.sysctl service");
+ return (errno);
+ }
+
+ cap_close(capcas);
+
+ limit_net = cap_net_limit_init(capnet, CAPNET_NAME2ADDR | CAPNET_CONNECT);
+ if (limit_net == NULL) {
+ warn("Unable to create cap_net limits.");
+ return (errno);
+ }
+
+ familylimit = AF_INET;
+ cap_net_limit_name2addr_family(limit_net, &familylimit, 1);
+
+ if (cap_net_limit(limit_net) < 0) {
+ warn("Unable to apply cap_net limits.");
+ return (errno);
+ }
+
+ limit_sysctl = cap_sysctl_limit_init(capsysctl);
+ if (limit_sysctl == NULL) {
+ warn("Unable to create cap_sysctl limits.");
+ return (errno);
+ }
+
+ cap_sysctl_limit_name(limit_sysctl, "hw.machine", CAP_SYSCTL_READ);
+ cap_sysctl_limit_name(limit_sysctl, "hw.model", CAP_SYSCTL_READ);
+ cap_sysctl_limit_name(limit_sysctl, "hw.pagesize", CAP_SYSCTL_READ);
+
+ if (cap_sysctl_limit(limit_sysctl) < 0) {
+ warn("Unable to apply cap_sysctl limits.");
+ return (errno);
+ }
+
+ return (0);
+}
+#endif
diff --git a/usr.sbin/bhyve/snapshot.h b/usr.sbin/bhyve/snapshot.h
--- a/usr.sbin/bhyve/snapshot.h
+++ b/usr.sbin/bhyve/snapshot.h
@@ -91,6 +91,8 @@
void checkpoint_cpu_add(int vcpu);
void checkpoint_cpu_resume(int vcpu);
void checkpoint_cpu_suspend(int vcpu);
+void vm_vcpu_pause(struct vmctx *ctx);
+void vm_vcpu_resume(struct vmctx *ctx);
int restore_vm_mem(struct vmctx *ctx, struct restore_state *rstate);
int vm_restore_kern_structs(struct vmctx *ctx, struct restore_state *rstate);
diff --git a/usr.sbin/bhyve/snapshot.c b/usr.sbin/bhyve/snapshot.c
--- a/usr.sbin/bhyve/snapshot.c
+++ b/usr.sbin/bhyve/snapshot.c
@@ -1285,7 +1285,7 @@
pthread_mutex_unlock(&vcpu_lock);
}
-static void
+void
vm_vcpu_pause(struct vmctx *ctx)
{
@@ -1297,7 +1297,7 @@
pthread_mutex_unlock(&vcpu_lock);
}
-static void
+void
vm_vcpu_resume(struct vmctx *ctx)
{
@@ -1493,7 +1493,7 @@
IPC_COMMAND(ipc_cmd_set, checkpoint, vm_do_checkpoint);
static int
-vm_do_migrate(struct vmctx __unused *ctx, const nvlist_t *nvl)
+vm_do_migrate(struct vmctx *ctx, const nvlist_t *nvl)
{
size_t len;
struct migrate_req req;
@@ -1520,9 +1520,7 @@
req.host,
req.port);
- // return (vm_send_migrate_req(ctx, req, nvlist_get_bool(nvl, "live")));
- EPRINTLN("Migration operation not implemented yet\n");
- return (EOPNOTSUPP);
+ return (vm_send_migrate_req(ctx, req, nvlist_get_bool(nvl, "live")));
}
IPC_COMMAND(ipc_cmd_set, migrate, vm_do_migrate);
@@ -1592,7 +1590,15 @@
if (caph_rights_limit(socket_fd, &rights) == -1)
errx(EX_OSERR, "Unable to apply rights for sandbox");
+
+#ifdef WITH_CASPER
+ err = migration_cap_setup();
+ if (err != 0) {
+ errx(EX_OSERR, "Unable to setup capabilities for migration");
+ }
#endif
+#endif
+
checkpoint_info = calloc(1, sizeof(*checkpoint_info));
checkpoint_info->ctx = ctx;
checkpoint_info->socket_fd = socket_fd;
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Fri, Nov 8, 9:04 PM (11 h, 4 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
14542911
Default Alt Text
D34718.id122190.diff (16 KB)
Attached To
Mode
D34718: Warm Migration feature for bhyve [Part 2]
Attached
Detach File
Event Timeline
Log In to Comment