aboutsummaryrefslogtreecommitdiff
path: root/migration/migration.c
diff options
context:
space:
mode:
Diffstat (limited to 'migration/migration.c')
-rw-r--r--migration/migration.c409
1 files changed, 391 insertions, 18 deletions
diff --git a/migration/migration.c b/migration/migration.c
index 1986cb8573..a5ddf43559 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -58,6 +58,7 @@
#include "qemu/queue.h"
#include "multifd.h"
#include "qemu/yank.h"
+#include "sysemu/cpus.h"
#ifdef CONFIG_VFIO
#include "hw/vfio/vfio-common.h"
@@ -134,6 +135,38 @@ enum mig_rp_message_type {
MIG_RP_MSG_MAX
};
+/* Migration capabilities set */
+struct MigrateCapsSet {
+ int size; /* Capability set size */
+ MigrationCapability caps[]; /* Variadic array of capabilities */
+};
+typedef struct MigrateCapsSet MigrateCapsSet;
+
+/* Define and initialize MigrateCapsSet */
+#define INITIALIZE_MIGRATE_CAPS_SET(_name, ...) \
+ MigrateCapsSet _name = { \
+ .size = sizeof((int []) { __VA_ARGS__ }) / sizeof(int), \
+ .caps = { __VA_ARGS__ } \
+ }
+
+/* Background-snapshot compatibility check list */
+static const
+INITIALIZE_MIGRATE_CAPS_SET(check_caps_background_snapshot,
+ MIGRATION_CAPABILITY_POSTCOPY_RAM,
+ MIGRATION_CAPABILITY_DIRTY_BITMAPS,
+ MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME,
+ MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE,
+ MIGRATION_CAPABILITY_RETURN_PATH,
+ MIGRATION_CAPABILITY_MULTIFD,
+ MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER,
+ MIGRATION_CAPABILITY_AUTO_CONVERGE,
+ MIGRATION_CAPABILITY_RELEASE_RAM,
+ MIGRATION_CAPABILITY_RDMA_PIN_ALL,
+ MIGRATION_CAPABILITY_COMPRESS,
+ MIGRATION_CAPABILITY_XBZRLE,
+ MIGRATION_CAPABILITY_X_COLO,
+ MIGRATION_CAPABILITY_VALIDATE_UUID);
+
/* When we add fault tolerance, we could have several
migrations at once. For now we don't need to add
dynamic creation of migration */
@@ -141,6 +174,8 @@ enum mig_rp_message_type {
static MigrationState *current_migration;
static MigrationIncomingState *current_incoming;
+static GSList *migration_blockers;
+
static bool migration_object_check(MigrationState *ms, Error **errp);
static int migration_maybe_pause(MigrationState *s,
int *current_active_state,
@@ -1041,6 +1076,27 @@ static void fill_source_migration_info(MigrationInfo *info)
{
MigrationState *s = migrate_get_current();
+ info->blocked = migration_is_blocked(NULL);
+ info->has_blocked_reasons = info->blocked;
+ info->blocked_reasons = NULL;
+ if (info->blocked) {
+ GSList *cur_blocker = migration_blockers;
+
+ /*
+ * There are two types of reasons a migration might be blocked;
+ * a) devices marked in VMState as non-migratable, and
+ * b) Explicit migration blockers
+ * We need to add both of them here.
+ */
+ qemu_savevm_non_migratable_list(&info->blocked_reasons);
+
+ while (cur_blocker) {
+ QAPI_LIST_PREPEND(info->blocked_reasons,
+ g_strdup(error_get_pretty(cur_blocker->data)));
+ cur_blocker = g_slist_next(cur_blocker);
+ }
+ }
+
switch (s->state) {
case MIGRATION_STATUS_NONE:
/* no migration has happened ever */
@@ -1089,6 +1145,31 @@ static void fill_source_migration_info(MigrationInfo *info)
info->status = s->state;
}
+typedef enum WriteTrackingSupport {
+ WT_SUPPORT_UNKNOWN = 0,
+ WT_SUPPORT_ABSENT,
+ WT_SUPPORT_AVAILABLE,
+ WT_SUPPORT_COMPATIBLE
+} WriteTrackingSupport;
+
+static
+WriteTrackingSupport migrate_query_write_tracking(void)
+{
+ /* Check if kernel supports required UFFD features */
+ if (!ram_write_tracking_available()) {
+ return WT_SUPPORT_ABSENT;
+ }
+ /*
+ * Check if current memory configuration is
+ * compatible with required UFFD features.
+ */
+ if (!ram_write_tracking_compatible()) {
+ return WT_SUPPORT_AVAILABLE;
+ }
+
+ return WT_SUPPORT_COMPATIBLE;
+}
+
/**
* @migration_caps_check - check capability validity
*
@@ -1150,6 +1231,39 @@ static bool migrate_caps_check(bool *cap_list,
}
}
+ if (cap_list[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]) {
+ WriteTrackingSupport wt_support;
+ int idx;
+ /*
+ * Check if 'background-snapshot' capability is supported by
+ * host kernel and compatible with guest memory configuration.
+ */
+ wt_support = migrate_query_write_tracking();
+ if (wt_support < WT_SUPPORT_AVAILABLE) {
+ error_setg(errp, "Background-snapshot is not supported by host kernel");
+ return false;
+ }
+ if (wt_support < WT_SUPPORT_COMPATIBLE) {
+ error_setg(errp, "Background-snapshot is not compatible "
+ "with guest memory configuration");
+ return false;
+ }
+
+ /*
+ * Check if there are any migration capabilities
+ * incompatible with 'background-snapshot'.
+ */
+ for (idx = 0; idx < check_caps_background_snapshot.size; idx++) {
+ int incomp_cap = check_caps_background_snapshot.caps[idx];
+ if (cap_list[incomp_cap]) {
+ error_setg(errp,
+ "Background-snapshot is not compatible with %s",
+ MigrationCapability_str(incomp_cap));
+ return false;
+ }
+ }
+ }
+
return true;
}
@@ -1226,21 +1340,21 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp)
if (params->has_compress_level &&
(params->compress_level > 9)) {
error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "compress_level",
- "is invalid, it should be in the range of 0 to 9");
+ "a value between 0 and 9");
return false;
}
if (params->has_compress_threads && (params->compress_threads < 1)) {
error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
"compress_threads",
- "is invalid, it should be in the range of 1 to 255");
+ "a value between 1 and 255");
return false;
}
if (params->has_decompress_threads && (params->decompress_threads < 1)) {
error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
"decompress_threads",
- "is invalid, it should be in the range of 1 to 255");
+ "a value between 1 and 255");
return false;
}
@@ -1293,21 +1407,21 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp)
if (params->has_multifd_channels && (params->multifd_channels < 1)) {
error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
"multifd_channels",
- "is invalid, it should be in the range of 1 to 255");
+ "a value between 1 and 255");
return false;
}
if (params->has_multifd_zlib_level &&
(params->multifd_zlib_level > 9)) {
error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "multifd_zlib_level",
- "is invalid, it should be in the range of 0 to 9");
+ "a value between 0 and 9");
return false;
}
if (params->has_multifd_zstd_level &&
(params->multifd_zstd_level > 20)) {
error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "multifd_zstd_level",
- "is invalid, it should be in the range of 0 to 20");
+ "a value between 0 and 20");
return false;
}
@@ -1316,8 +1430,7 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp)
!is_power_of_2(params->xbzrle_cache_size))) {
error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
"xbzrle_cache_size",
- "is invalid, it should be bigger than target page size"
- " and a power of 2");
+ "a power of two no less than the target page size");
return false;
}
@@ -1334,21 +1447,21 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp)
params->announce_initial > 100000) {
error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
"announce_initial",
- "is invalid, it must be less than 100000 ms");
+ "a value between 0 and 100000");
return false;
}
if (params->has_announce_max &&
params->announce_max > 100000) {
error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
"announce_max",
- "is invalid, it must be less than 100000 ms");
+ "a value between 0 and 100000");
return false;
}
if (params->has_announce_rounds &&
params->announce_rounds > 1000) {
error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
"announce_rounds",
- "is invalid, it must be in the range of 0 to 1000");
+ "a value between 0 and 1000");
return false;
}
if (params->has_announce_step &&
@@ -1356,7 +1469,7 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp)
params->announce_step > 10000)) {
error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
"announce_step",
- "is invalid, it must be in the range of 1 to 10000 ms");
+ "a value between 0 and 10000");
return false;
}
@@ -1909,6 +2022,7 @@ void migrate_init(MigrationState *s)
* locks.
*/
s->cleanup_bh = 0;
+ s->vm_start_bh = 0;
s->to_dst_file = NULL;
s->state = MIGRATION_STATUS_NONE;
s->rp_state.from_dst_file = NULL;
@@ -1934,8 +2048,6 @@ void migrate_init(MigrationState *s)
s->threshold_size = 0;
}
-static GSList *migration_blockers;
-
int migrate_add_blocker(Error *reason, Error **errp)
{
if (only_migratable) {
@@ -2216,7 +2328,7 @@ void qmp_migrate_set_cache_size(int64_t value, Error **errp)
qmp_migrate_set_parameters(&p, errp);
}
-int64_t qmp_query_migrate_cache_size(Error **errp)
+uint64_t qmp_query_migrate_cache_size(Error **errp)
{
return migrate_xbzrle_cache_size();
}
@@ -2446,7 +2558,7 @@ int migrate_use_xbzrle(void)
return s->enabled_capabilities[MIGRATION_CAPABILITY_XBZRLE];
}
-int64_t migrate_xbzrle_cache_size(void)
+uint64_t migrate_xbzrle_cache_size(void)
{
MigrationState *s;
@@ -2491,6 +2603,15 @@ bool migrate_use_block_incremental(void)
return s->parameters.block_incremental;
}
+bool migrate_background_snapshot(void)
+{
+ MigrationState *s;
+
+ s = migrate_get_current();
+
+ return s->enabled_capabilities[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT];
+}
+
/* migration thread support */
/*
* Something bad happened to the RP stream, mark an error
@@ -3117,6 +3238,50 @@ fail:
MIGRATION_STATUS_FAILED);
}
+/**
+ * bg_migration_completion: Used by bg_migration_thread when after all the
+ * RAM has been saved. The caller 'breaks' the loop when this returns.
+ *
+ * @s: Current migration state
+ */
+static void bg_migration_completion(MigrationState *s)
+{
+ int current_active_state = s->state;
+
+ /*
+ * Stop tracking RAM writes - un-protect memory, un-register UFFD
+ * memory ranges, flush kernel wait queues and wake up threads
+ * waiting for write fault to be resolved.
+ */
+ ram_write_tracking_stop();
+
+ if (s->state == MIGRATION_STATUS_ACTIVE) {
+ /*
+ * By this moment we have RAM content saved into the migration stream.
+ * The next step is to flush the non-RAM content (device state)
+ * right after the ram content. The device state has been stored into
+ * the temporary buffer before RAM saving started.
+ */
+ qemu_put_buffer(s->to_dst_file, s->bioc->data, s->bioc->usage);
+ qemu_fflush(s->to_dst_file);
+ } else if (s->state == MIGRATION_STATUS_CANCELLING) {
+ goto fail;
+ }
+
+ if (qemu_file_get_error(s->to_dst_file)) {
+ trace_migration_completion_file_err();
+ goto fail;
+ }
+
+ migrate_set_state(&s->state, current_active_state,
+ MIGRATION_STATUS_COMPLETED);
+ return;
+
+fail:
+ migrate_set_state(&s->state, current_active_state,
+ MIGRATION_STATUS_FAILED);
+}
+
bool migrate_colo_enabled(void)
{
MigrationState *s = migrate_get_current();
@@ -3457,6 +3622,47 @@ static void migration_iteration_finish(MigrationState *s)
qemu_mutex_unlock_iothread();
}
+static void bg_migration_iteration_finish(MigrationState *s)
+{
+ qemu_mutex_lock_iothread();
+ switch (s->state) {
+ case MIGRATION_STATUS_COMPLETED:
+ migration_calculate_complete(s);
+ break;
+
+ case MIGRATION_STATUS_ACTIVE:
+ case MIGRATION_STATUS_FAILED:
+ case MIGRATION_STATUS_CANCELLED:
+ case MIGRATION_STATUS_CANCELLING:
+ break;
+
+ default:
+ /* Should not reach here, but if so, forgive the VM. */
+ error_report("%s: Unknown ending state %d", __func__, s->state);
+ break;
+ }
+
+ migrate_fd_cleanup_schedule(s);
+ qemu_mutex_unlock_iothread();
+}
+
+/*
+ * Return true if continue to the next iteration directly, false
+ * otherwise.
+ */
+static MigIterateState bg_migration_iteration_run(MigrationState *s)
+{
+ int res;
+
+ res = qemu_savevm_state_iterate(s->to_dst_file, false);
+ if (res > 0) {
+ bg_migration_completion(s);
+ return MIG_ITERATE_BREAK;
+ }
+
+ return MIG_ITERATE_RESUME;
+}
+
void migration_make_urgent_request(void)
{
qemu_sem_post(&migrate_get_current()->rate_limit_sem);
@@ -3604,6 +3810,165 @@ static void *migration_thread(void *opaque)
return NULL;
}
+static void bg_migration_vm_start_bh(void *opaque)
+{
+ MigrationState *s = opaque;
+
+ qemu_bh_delete(s->vm_start_bh);
+ s->vm_start_bh = NULL;
+
+ vm_start();
+ s->downtime = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - s->downtime_start;
+}
+
+/**
+ * Background snapshot thread, based on live migration code.
+ * This is an alternative implementation of live migration mechanism
+ * introduced specifically to support background snapshots.
+ *
+ * It takes advantage of userfault_fd write protection mechanism introduced
+ * in v5.7 kernel. Compared to existing dirty page logging migration much
+ * lesser stream traffic is produced resulting in smaller snapshot images,
+ * simply cause of no page duplicates can get into the stream.
+ *
+ * Another key point is that generated vmstate stream reflects machine state
+ * 'frozen' at the beginning of snapshot creation compared to dirty page logging
+ * mechanism, which effectively results in that saved snapshot is the state of VM
+ * at the end of the process.
+ */
+static void *bg_migration_thread(void *opaque)
+{
+ MigrationState *s = opaque;
+ int64_t setup_start;
+ MigThrError thr_error;
+ QEMUFile *fb;
+ bool early_fail = true;
+
+ rcu_register_thread();
+ object_ref(OBJECT(s));
+
+ qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX);
+
+ setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
+ /*
+ * We want to save vmstate for the moment when migration has been
+ * initiated but also we want to save RAM content while VM is running.
+ * The RAM content should appear first in the vmstate. So, we first
+ * stash the non-RAM part of the vmstate to the temporary buffer,
+ * then write RAM part of the vmstate to the migration stream
+ * with vCPUs running and, finally, write stashed non-RAM part of
+ * the vmstate from the buffer to the migration stream.
+ */
+ s->bioc = qio_channel_buffer_new(128 * 1024);
+ qio_channel_set_name(QIO_CHANNEL(s->bioc), "vmstate-buffer");
+ fb = qemu_fopen_channel_output(QIO_CHANNEL(s->bioc));
+ object_unref(OBJECT(s->bioc));
+
+ update_iteration_initial_status(s);
+
+ qemu_savevm_state_header(s->to_dst_file);
+ qemu_savevm_state_setup(s->to_dst_file);
+
+ if (qemu_savevm_state_guest_unplug_pending()) {
+ migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
+ MIGRATION_STATUS_WAIT_UNPLUG);
+
+ while (s->state == MIGRATION_STATUS_WAIT_UNPLUG &&
+ qemu_savevm_state_guest_unplug_pending()) {
+ qemu_sem_timedwait(&s->wait_unplug_sem, 250);
+ }
+
+ migrate_set_state(&s->state, MIGRATION_STATUS_WAIT_UNPLUG,
+ MIGRATION_STATUS_ACTIVE);
+ } else {
+ migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
+ MIGRATION_STATUS_ACTIVE);
+ }
+ s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
+
+ trace_migration_thread_setup_complete();
+ s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+
+ qemu_mutex_lock_iothread();
+
+ /*
+ * If VM is currently in suspended state, then, to make a valid runstate
+ * transition in vm_stop_force_state() we need to wakeup it up.
+ */
+ qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
+ s->vm_was_running = runstate_is_running();
+
+ if (global_state_store()) {
+ goto fail;
+ }
+ /* Forcibly stop VM before saving state of vCPUs and devices */
+ if (vm_stop_force_state(RUN_STATE_PAUSED)) {
+ goto fail;
+ }
+ /*
+ * Put vCPUs in sync with shadow context structures, then
+ * save their state to channel-buffer along with devices.
+ */
+ cpu_synchronize_all_states();
+ if (qemu_savevm_state_complete_precopy_non_iterable(fb, false, false)) {
+ goto fail;
+ }
+ /* Now initialize UFFD context and start tracking RAM writes */
+ if (ram_write_tracking_start()) {
+ goto fail;
+ }
+ early_fail = false;
+
+ /*
+ * Start VM from BH handler to avoid write-fault lock here.
+ * UFFD-WP protection for the whole RAM is already enabled so
+ * calling VM state change notifiers from vm_start() would initiate
+ * writes to virtio VQs memory which is in write-protected region.
+ */
+ s->vm_start_bh = qemu_bh_new(bg_migration_vm_start_bh, s);
+ qemu_bh_schedule(s->vm_start_bh);
+
+ qemu_mutex_unlock_iothread();
+
+ while (migration_is_active(s)) {
+ MigIterateState iter_state = bg_migration_iteration_run(s);
+ if (iter_state == MIG_ITERATE_SKIP) {
+ continue;
+ } else if (iter_state == MIG_ITERATE_BREAK) {
+ break;
+ }
+
+ /*
+ * Try to detect any kind of failures, and see whether we
+ * should stop the migration now.
+ */
+ thr_error = migration_detect_error(s);
+ if (thr_error == MIG_THR_ERR_FATAL) {
+ /* Stop migration */
+ break;
+ }
+
+ migration_update_counters(s, qemu_clock_get_ms(QEMU_CLOCK_REALTIME));
+ }
+
+ trace_migration_thread_after_loop();
+
+fail:
+ if (early_fail) {
+ migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
+ MIGRATION_STATUS_FAILED);
+ qemu_mutex_unlock_iothread();
+ }
+
+ bg_migration_iteration_finish(s);
+
+ qemu_fclose(fb);
+ object_unref(OBJECT(s));
+ rcu_unregister_thread();
+
+ return NULL;
+}
+
void migrate_fd_connect(MigrationState *s, Error *error_in)
{
Error *local_err = NULL;
@@ -3667,8 +4032,14 @@ void migrate_fd_connect(MigrationState *s, Error *error_in)
migrate_fd_cleanup(s);
return;
}
- qemu_thread_create(&s->thread, "live_migration", migration_thread, s,
- QEMU_THREAD_JOINABLE);
+
+ if (migrate_background_snapshot()) {
+ qemu_thread_create(&s->thread, "bg_snapshot",
+ bg_migration_thread, s, QEMU_THREAD_JOINABLE);
+ } else {
+ qemu_thread_create(&s->thread, "live_migration",
+ migration_thread, s, QEMU_THREAD_JOINABLE);
+ }
s->migration_thread_running = true;
}
@@ -3784,6 +4155,8 @@ static Property migration_properties[] = {
DEFINE_PROP_MIG_CAP("x-block", MIGRATION_CAPABILITY_BLOCK),
DEFINE_PROP_MIG_CAP("x-return-path", MIGRATION_CAPABILITY_RETURN_PATH),
DEFINE_PROP_MIG_CAP("x-multifd", MIGRATION_CAPABILITY_MULTIFD),
+ DEFINE_PROP_MIG_CAP("x-background-snapshot",
+ MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT),
DEFINE_PROP_END_OF_LIST(),
};