git: c9fdd4f3cc18 - main - bhyve: add command line parameter and parsing for migration

From: Corvin Köhne <corvink_at_FreeBSD.org>
Date: Mon, 19 Jun 2023 06:54:50 UTC
The branch main has been updated by corvink:

URL: https://cgit.FreeBSD.org/src/commit/?id=c9fdd4f3cc18c03683de85318ba8d318f96b58c4

commit c9fdd4f3cc18c03683de85318ba8d318f96b58c4
Author:     Mihai Burcea <mihaiburcea15@gmail.com>
AuthorDate: 2023-06-19 06:46:28 +0000
Commit:     Corvin Köhne <corvink@FreeBSD.org>
CommitDate: 2023-06-19 06:47:35 +0000

    bhyve: add command line parameter and parsing for migration
    
    This covers warm and live migration.
    
    Reviewed by:            corvink
    MFC after:              1 week
    Differential Revision:  https://reviews.freebsd.org/D34717
---
 usr.sbin/bhyve/Makefile      |  1 +
 usr.sbin/bhyve/bhyve.8       | 12 ++++++
 usr.sbin/bhyve/bhyverun.c    | 66 ++++++++++++++++++++---------
 usr.sbin/bhyve/migration.c   | 98 ++++++++++++++++++++++++++++++++++++++++++++
 usr.sbin/bhyve/migration.h   | 27 ++++++++++++
 usr.sbin/bhyve/snapshot.c    | 35 ++++++++++++++++
 usr.sbin/bhyvectl/bhyvectl.8 | 19 +++++++++
 usr.sbin/bhyvectl/bhyvectl.c | 57 +++++++++++++++++++++++++-
 8 files changed, 294 insertions(+), 21 deletions(-)

diff --git a/usr.sbin/bhyve/Makefile b/usr.sbin/bhyve/Makefile
index 9b8a7274d793..d4909fb87b76 100644
--- a/usr.sbin/bhyve/Makefile
+++ b/usr.sbin/bhyve/Makefile
@@ -88,6 +88,7 @@ SRCS=	\
 
 .if ${MK_BHYVE_SNAPSHOT} != "no"
 SRCS+=	snapshot.c
+SRCS+=	migration.c
 .endif
 
 CFLAGS.kernemu_dev.c+=	-I${SRCTOP}/sys/amd64
diff --git a/usr.sbin/bhyve/bhyve.8 b/usr.sbin/bhyve/bhyve.8
index 38891ab33c40..479ab75be60f 100644
--- a/usr.sbin/bhyve/bhyve.8
+++ b/usr.sbin/bhyve/bhyve.8
@@ -80,6 +80,11 @@
 .Op Fl o Ar var Ns Cm = Ns Ar value
 .Op Fl p Ar vcpu Ns Cm \&: Ns Ar hostcpu
 .Op Fl r Ar file
+.Oo Fl R
+.Sm off
+.Ar host Op Cm \&: Ar port
+.Sm on
+.Oc
 .Sm off
 .Oo Fl s\~
 .Ar slot Cm \&, Ar emulation Op Cm \&, Ar conf
@@ -276,6 +281,13 @@ and
 .Fl l
 options.
 The count of vCPUs and memory configuration are read from the snapshot.
+.It Fl R Ar host Ns Op Cm \&: Ns Ar port
+Receive migration from a source guest.
+Await for a connection from
+.Ar host
+on the specified
+.Ar port
+and resume execution. The default migration port is 24983.
 .It Fl S
 Wire guest memory.
 .It Fl s Cm help
diff --git a/usr.sbin/bhyve/bhyverun.c b/usr.sbin/bhyve/bhyverun.c
index 5ced5df0b4cb..f5365a352d07 100644
--- a/usr.sbin/bhyve/bhyverun.c
+++ b/usr.sbin/bhyve/bhyverun.c
@@ -98,6 +98,9 @@ __FBSDID("$FreeBSD$");
 #include "kernemu_dev.h"
 #include "mem.h"
 #include "mevent.h"
+#ifdef BHYVE_SNAPSHOT
+#include "migration.h"
+#endif
 #include "mptbl.h"
 #include "pci_emul.h"
 #include "pci_irq.h"
@@ -231,6 +234,7 @@ usage(int code)
 		"       -p: pin 'vcpu' to 'hostcpu'\n"
 #ifdef BHYVE_SNAPSHOT
 		"       -r: path to checkpoint file\n"
+		"       -R: <host[:port]> the source vm host and port for migration\n"
 #endif
 		"       -S: guest memory cannot be swapped\n"
 		"       -s: <slot,driver,configinfo> PCI slot config\n"
@@ -1082,7 +1086,11 @@ do_open(const char *vmname)
 			exit(4);
 		}
 	} else {
+#ifndef BHYVE_SNAPSHOT
 		if (!romboot) {
+#else
+		if (!romboot && !get_config_bool_default("is_migrated", false)) {
+#endif
 			/*
 			 * If the virtual machine was just created then a
 			 * bootrom must be configured to boot it.
@@ -1227,9 +1235,11 @@ main(int argc, char *argv[])
 	const char *optstr, *value, *vmname;
 #ifdef BHYVE_SNAPSHOT
 	char *restore_file;
+	char *migration_host;
 	struct restore_state rstate;
 
 	restore_file = NULL;
+	migration_host = NULL;
 #endif
 
 	init_config();
@@ -1237,7 +1247,7 @@ main(int argc, char *argv[])
 	progname = basename(argv[0]);
 
 #ifdef BHYVE_SNAPSHOT
-	optstr = "aehuwxACDHIPSWYk:f:o:p:G:c:s:m:l:K:U:r:";
+	optstr = "aehuwxACDHIPSWYk:f:o:p:G:c:s:m:l:K:U:r:R:";
 #else
 	optstr = "aehuwxACDHIPSWYk:f:o:p:G:c:s:m:l:K:U:";
 #endif
@@ -1294,6 +1304,10 @@ main(int argc, char *argv[])
 		case 'r':
 			restore_file = optarg;
 			break;
+		case 'R':
+			migration_host = optarg;
+			set_config_bool("is_migrated", true);
+			break;
 #endif
 		case 's':
 			if (strncmp(optarg, "help", strlen(optarg)) == 0) {
@@ -1510,38 +1524,51 @@ main(int argc, char *argv[])
 		spinup_vcpu(&vcpu_info[vcpuid], vcpuid == BSP);
 
 #ifdef BHYVE_SNAPSHOT
-	if (restore_file != NULL) {
-		fprintf(stdout, "Pausing pci devs...\r\n");
+	if (restore_file != NULL || migration_host != NULL) {
+		fprintf(stdout, "Pausing pci devs...\n");
 		if (vm_pause_devices() != 0) {
 			fprintf(stderr, "Failed to pause PCI device state.\n");
 			exit(1);
 		}
 
-		fprintf(stdout, "Restoring vm mem...\r\n");
-		if (restore_vm_mem(ctx, &rstate) != 0) {
-			fprintf(stderr, "Failed to restore VM memory.\n");
-			exit(1);
-		}
+		if (restore_file != NULL) {
+			fprintf(stdout, "Restoring vm mem...\n");
+			if (restore_vm_mem(ctx, &rstate) != 0) {
+				fprintf(stderr,
+				    "Failed to restore VM memory.\n");
+				exit(1);
+			}
 
-		fprintf(stdout, "Restoring pci devs...\r\n");
-		if (vm_restore_devices(&rstate) != 0) {
-			fprintf(stderr, "Failed to restore PCI device state.\n");
-			exit(1);
+			fprintf(stdout, "Restoring pci devs...\n");
+			if (vm_restore_devices(&rstate) != 0) {
+				fprintf(stderr,
+				    "Failed to restore PCI device state.\n");
+				exit(1);
+			}
+
+			fprintf(stdout, "Restoring kernel structs...\n");
+			if (vm_restore_kern_structs(ctx, &rstate) != 0) {
+				fprintf(stderr,
+				    "Failed to restore kernel structs.\n");
+				exit(1);
+			}
 		}
 
-		fprintf(stdout, "Restoring kernel structs...\r\n");
-		if (vm_restore_kern_structs(ctx, &rstate) != 0) {
-			fprintf(stderr, "Failed to restore kernel structs.\n");
-			exit(1);
+		if (migration_host != NULL) {
+			fprintf(stdout, "Starting the migration process...\n");
+			if (receive_vm_migration(ctx, migration_host) != 0) {
+				fprintf(stderr, "Failed to migrate the vm.\n");
+				exit(1);
+			}
 		}
 
-		fprintf(stdout, "Resuming pci devs...\r\n");
+		fprintf(stdout, "Resuming pci devs...\n");
 		if (vm_resume_devices() != 0) {
 			fprintf(stderr, "Failed to resume PCI device state.\n");
 			exit(1);
 		}
 	}
-#endif
+#endif /* BHYVE_SNAPSHOT */
 
 	error = vm_get_register(bsp, VM_REG_GUEST_RIP, &rip);
 	assert(error == 0);
@@ -1609,8 +1636,9 @@ main(int argc, char *argv[])
 #endif
 
 #ifdef BHYVE_SNAPSHOT
-	if (restore_file != NULL) {
+	if (restore_file != NULL)
 		destroy_restore_state(&rstate);
+	if (restore_file != NULL || migration_host != NULL) {
 		if (vm_restore_time(ctx) < 0)
 			err(EX_OSERR, "Unable to restore time");
 
diff --git a/usr.sbin/bhyve/migration.c b/usr.sbin/bhyve/migration.c
new file mode 100644
index 000000000000..daa67b370d47
--- /dev/null
+++ b/usr.sbin/bhyve/migration.c
@@ -0,0 +1,98 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2017-2020 Elena Mihailescu
+ * Copyright (c) 2017-2020 Darius Mihai
+ * Copyright (c) 2017-2020 Mihai Carabas
+ *
+ * The migration feature was developed under sponsorships
+ * from Matthew Grooms.
+ *
+ */
+
+#include <sys/param.h>
+#include <sys/mman.h>
+#include <sys/nv.h>
+#include <sys/socket.h>
+#include <sys/sysctl.h>
+
+#include <arpa/inet.h>
+#ifndef WITHOUT_CAPSICUM
+#include <capsicum_helpers.h>
+#include <libcasper.h>
+#include <casper/cap_net.h>
+#include <casper/cap_sysctl.h>
+#endif
+#include <err.h>
+#include <netdb.h>
+#include <netinet/in.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <vmmapi.h>
+
+#include "migration.h"
+#include "pci_emul.h"
+#include "snapshot.h"
+
+
+#ifdef BHYVE_DEBUG
+#define DPRINTF(FMT, ...)							\
+({										\
+	fprintf(stderr, "%s: " FMT "\n", __func__, ##__VA_ARGS__);		\
+ })
+#else
+#define DPRINTF(FMT, ...)
+#endif
+
+#define EPRINTF(FMT, ...)							\
+({										\
+	fprintf(stderr, "%s: " FMT "\n", __func__, ##__VA_ARGS__);		\
+ })
+
+int
+receive_vm_migration(struct vmctx *ctx, char *migration_data)
+{
+	struct migrate_req req;
+	size_t len;
+	char *hostname, *pos;
+	unsigned int port = DEFAULT_MIGRATION_PORT;
+	int rc;
+
+	assert(ctx != NULL);
+	assert(migration_data != NULL);
+
+	memset(req.host, 0, MAXHOSTNAMELEN);
+	hostname = strdup(migration_data);
+
+	if ((pos = strchr(hostname, ':')) != NULL) {
+		*pos = '\0';
+		pos = pos + 1;
+
+		rc = sscanf(pos, "%u", &port);
+
+		if (rc <= 0) {
+			EPRINTF("Could not parse the port");
+			free(hostname);
+			return (EINVAL);
+		}
+	}
+	req.port = port;
+
+	len = strlen(hostname);
+	if (len > MAXHOSTNAMELEN - 1) {
+		EPRINTF("Hostname length %lu bigger than maximum allowed %d",
+			len, MAXHOSTNAMELEN - 1);
+		free(hostname);
+		return (EINVAL);
+	}
+
+	strlcpy(req.host, hostname, MAXHOSTNAMELEN);
+
+	// rc = vm_recv_migrate_req(ctx, req);
+	rc = EOPNOTSUPP;
+	EPRINTF("Migration not implemented yet");
+
+	free(hostname);
+	return (rc);
+}
diff --git a/usr.sbin/bhyve/migration.h b/usr.sbin/bhyve/migration.h
new file mode 100644
index 000000000000..b72a4aa3611f
--- /dev/null
+++ b/usr.sbin/bhyve/migration.h
@@ -0,0 +1,27 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2017-2020 Elena Mihailescu
+ * Copyright (c) 2017-2020 Darius Mihai
+ * Copyright (c) 2017-2020 Mihai Carabas
+ *
+ * The migration feature was developed under sponsorships
+ * from Matthew Grooms.
+ *
+ */
+
+#pragma once
+
+#include <sys/param.h>
+#include <stdbool.h>
+
+#define DEFAULT_MIGRATION_PORT	24983
+
+struct vmctx;
+
+struct migrate_req {
+	char host[MAXHOSTNAMELEN];
+	unsigned int port;
+};
+
+int receive_vm_migration(struct vmctx *ctx, char *migration_data);
diff --git a/usr.sbin/bhyve/snapshot.c b/usr.sbin/bhyve/snapshot.c
index 5569ffcb2e24..58ae29782b43 100644
--- a/usr.sbin/bhyve/snapshot.c
+++ b/usr.sbin/bhyve/snapshot.c
@@ -86,6 +86,7 @@ __FBSDID("$FreeBSD$");
 #include "ioapic.h"
 #include "mem.h"
 #include "mevent.h"
+#include "migration.h"
 #include "mptbl.h"
 #include "pci_emul.h"
 #include "pci_irq.h"
@@ -1400,6 +1401,40 @@ vm_do_checkpoint(struct vmctx *ctx, const nvlist_t *nvl)
 }
 IPC_COMMAND(ipc_cmd_set, checkpoint, vm_do_checkpoint);
 
+static int
+vm_do_migrate(struct vmctx __unused *ctx, const nvlist_t *nvl)
+{
+	size_t len;
+	struct migrate_req req;
+
+	if (!nvlist_exists_string(nvl, "hostname") ||
+		!nvlist_exists_number(nvl, "port"))
+			return (EINVAL);
+
+	memset(&req, 0, sizeof(struct migrate_req));
+	req.port = nvlist_get_number(nvl, "port");
+
+	len = strlen(nvlist_get_string(nvl, "hostname"));
+	if (len > MAXHOSTNAMELEN - 1) {
+		EPRINTLN("Hostname length %lu bigger than maximum allowed %d",
+			len, MAXHOSTNAMELEN - 1);
+		return (EINVAL);
+	}
+
+	strlcpy(req.host, nvlist_get_string(nvl, "hostname"), MAXHOSTNAMELEN);
+
+	printf("%s: IP address used for migration: %s;\n"
+		"Port used for migration: %d\n",
+		__func__,
+		req.host,
+		req.port);
+
+	// return (vm_send_migrate_req(ctx, req, nvlist_get_bool(nvl, "live")));
+	EPRINTLN("Migration operation not implemented yet\n");
+	return (EOPNOTSUPP);
+}
+IPC_COMMAND(ipc_cmd_set, migrate, vm_do_migrate);
+
 void
 init_snapshot(void)
 {
diff --git a/usr.sbin/bhyvectl/bhyvectl.8 b/usr.sbin/bhyvectl/bhyvectl.8
index 636529a7a53b..9b917f14270e 100644
--- a/usr.sbin/bhyvectl/bhyvectl.8
+++ b/usr.sbin/bhyvectl/bhyvectl.8
@@ -41,6 +41,11 @@
 .Op Fl -force-poweroff
 .Op Fl -checkpoint= Ns Ar <filename>
 .Op Fl -suspend= Ns Ar <filename>
+.Oo
+.Fl -migrate= Ns Ar host Ns Op Cm \&: Ns Ar port
+|
+.Fl -migrate-live= Ns Ar host Ns Op Cm \&: Ns Ar port
+.Oc
 .Sh DESCRIPTION
 The
 .Nm
@@ -85,6 +90,20 @@ Save a snapshot of a virtual machine similar to
 .Fl -checkpoint .
 The virtual machine will terminate after the snapshot has been
 saved.
+.It Fl -migrate= Ns Ar host Ns Op Cm \&: Ns Ar port
+Warm migrate the virtual machine to a
+.Ar host
+on the specified
+.Ar port .
+The default migration port is 24983.
+The virtual machine will be destroyed after the migration finishes.
+.It Fl -migrate-live= Ns Ar host Ns Op Cm \&: Ns Ar port
+Live migrate the virtual machine to a
+.Ar host
+on the specified
+.Ar port .
+The default migration port is 24983.
+The virtual machine will be destroyed after the migration finishes.
 .El
 .Sh EXIT STATUS
 .Ex -std
diff --git a/usr.sbin/bhyvectl/bhyvectl.c b/usr.sbin/bhyvectl/bhyvectl.c
index f723ff1f2e82..3c41938d0e18 100644
--- a/usr.sbin/bhyvectl/bhyvectl.c
+++ b/usr.sbin/bhyvectl/bhyvectl.c
@@ -65,6 +65,7 @@ __FBSDID("$FreeBSD$");
 
 #ifdef BHYVE_SNAPSHOT
 #include "snapshot.h"
+#include "migration.h"
 #endif
 
 #define	MB	(1UL << 20)
@@ -87,6 +88,7 @@ usage(bool cpu_intel)
 	"       [--destroy]\n"
 #ifdef BHYVE_SNAPSHOT
 	"       [--checkpoint=<filename> | --suspend=<filename>]\n"
+	"       [--migrate=<host>[:<port>] | --migrate-live=<host>[:<port>]]\n"
 #endif
 	"       [--get-all]\n"
 	"       [--get-stats]\n"
@@ -299,6 +301,7 @@ static int run;
 static int get_cpu_topology;
 #ifdef BHYVE_SNAPSHOT
 static int vm_suspend_opt;
+static int vm_migrate_live;
 #endif
 
 /*
@@ -589,6 +592,8 @@ enum {
 #ifdef BHYVE_SNAPSHOT
 	SET_CHECKPOINT_FILE,
 	SET_SUSPEND_FILE,
+	MIGRATE_VM,
+	MIGRATE_VM_LIVE,
 #endif
 };
 
@@ -1456,6 +1461,8 @@ setup_options(bool cpu_intel)
 #ifdef BHYVE_SNAPSHOT
 		{ "checkpoint", 	REQ_ARG, 0,	SET_CHECKPOINT_FILE},
 		{ "suspend", 		REQ_ARG, 0,	SET_SUSPEND_FILE},
+		{ "migrate", 		REQ_ARG, 0,	MIGRATE_VM},
+		{ "migrate-live", 	REQ_ARG, 0,	MIGRATE_VM_LIVE},
 #endif
 	};
 
@@ -1743,7 +1750,42 @@ snapshot_request(const char *vmname, char *file, bool suspend)
 
 	return (send_message(vmname, nvl));
 }
-#endif
+
+static int
+migration_request(const char *vmname, const char *migrate_vm, bool live)
+{
+	nvlist_t *nvl;
+	char *hostname, *pos;
+	int rc;
+	unsigned int port = DEFAULT_MIGRATION_PORT;
+
+	hostname = strdup(migrate_vm);
+
+	if ((pos = strchr(hostname, ':')) != NULL) {
+		*pos = '\0';
+		pos = pos + 1;
+
+		rc = sscanf(pos, "%u", &port);
+
+		if (rc <= 0) {
+			fprintf(stderr, "Could not parse the port\n");
+			free(hostname);
+			return (EINVAL);
+		}
+	}
+
+	nvl = nvlist_create(0);
+	nvlist_add_string(nvl, "cmd", "migrate");
+	nvlist_add_string(nvl, "hostname", hostname);
+	nvlist_add_number(nvl, "port", port);
+	nvlist_add_bool(nvl, "live", live);
+
+	free(hostname);
+
+	return (send_message(vmname, nvl));
+}
+
+#endif /* BHYVE_SNAPSHOT */
 
 int
 main(int argc, char *argv[])
@@ -1763,7 +1805,7 @@ main(int argc, char *argv[])
 	struct tm tm;
 	struct option *opts;
 #ifdef BHYVE_SNAPSHOT
-	char *checkpoint_file = NULL;
+	char *checkpoint_file = NULL, *migrate_host = NULL;
 #endif
 
 	cpu_intel = cpu_vendor_intel();
@@ -1932,6 +1974,14 @@ main(int argc, char *argv[])
 			checkpoint_file = optarg;
 			vm_suspend_opt = (ch == SET_SUSPEND_FILE);
 			break;
+		case MIGRATE_VM:
+		case MIGRATE_VM_LIVE:
+			if (migrate_host != NULL)
+				usage(cpu_intel);
+			
+			migrate_host = optarg;
+			vm_migrate_live = (ch == MIGRATE_VM_LIVE);
+			break;
 #endif
 		default:
 			usage(cpu_intel);
@@ -2414,6 +2464,9 @@ main(int argc, char *argv[])
 #ifdef BHYVE_SNAPSHOT
 	if (!error && checkpoint_file)
 		error = snapshot_request(vmname, checkpoint_file, vm_suspend_opt);
+	
+	if (!error && migrate_host)
+		error = migration_request(vmname, migrate_host, vm_migrate_live);
 #endif
 
 	free (opts);