aboutsummaryrefslogtreecommitdiff
path: root/migration/migration.c
diff options
context:
space:
mode:
Diffstat (limited to 'migration/migration.c')
-rw-r--r--migration/migration.c3201
1 files changed, 1365 insertions, 1836 deletions
diff --git a/migration/migration.c b/migration/migration.c
index bb909781b7..86bf76e925 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -20,18 +20,21 @@
#include "migration/blocker.h"
#include "exec.h"
#include "fd.h"
+#include "file.h"
#include "socket.h"
#include "sysemu/runstate.h"
#include "sysemu/sysemu.h"
#include "sysemu/cpu-throttle.h"
#include "rdma.h"
#include "ram.h"
+#include "ram-compress.h"
#include "migration/global_state.h"
#include "migration/misc.h"
#include "migration.h"
+#include "migration-stats.h"
#include "savevm.h"
-#include "qemu-file-channel.h"
#include "qemu-file.h"
+#include "channel.h"
#include "migration/vmstate.h"
#include "block/block.h"
#include "qapi/error.h"
@@ -49,74 +52,30 @@
#include "trace.h"
#include "exec/target_page.h"
#include "io/channel-buffer.h"
+#include "io/channel-tls.h"
#include "migration/colo.h"
#include "hw/boards.h"
-#include "hw/qdev-properties.h"
-#include "hw/qdev-properties-system.h"
#include "monitor/monitor.h"
#include "net/announce.h"
#include "qemu/queue.h"
#include "multifd.h"
+#include "threadinfo.h"
#include "qemu/yank.h"
#include "sysemu/cpus.h"
#include "yank_functions.h"
-
-#define MAX_THROTTLE (128 << 20) /* Migration transfer speed throttling */
-
-/* Amount of time to allocate to each "chunk" of bandwidth-throttled
- * data. */
-#define BUFFER_DELAY 100
-#define XFER_LIMIT_RATIO (1000 / BUFFER_DELAY)
-
-/* Time in milliseconds we are allowed to stop the source,
- * for sending the last part */
-#define DEFAULT_MIGRATE_SET_DOWNTIME 300
-
-/* Maximum migrate downtime set to 2000 seconds */
-#define MAX_MIGRATE_DOWNTIME_SECONDS 2000
-#define MAX_MIGRATE_DOWNTIME (MAX_MIGRATE_DOWNTIME_SECONDS * 1000)
-
-/* Default compression thread count */
-#define DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT 8
-/* Default decompression thread count, usually decompression is at
- * least 4 times as fast as compression.*/
-#define DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT 2
-/*0: means nocompress, 1: best speed, ... 9: best compress ratio */
-#define DEFAULT_MIGRATE_COMPRESS_LEVEL 1
-/* Define default autoconverge cpu throttle migration parameters */
-#define DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD 50
-#define DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL 20
-#define DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT 10
-#define DEFAULT_MIGRATE_MAX_CPU_THROTTLE 99
-
-/* Migration XBZRLE default cache size */
-#define DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE (64 * 1024 * 1024)
-
-/* The delay time (in ms) between two COLO checkpoints */
-#define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY (200 * 100)
-#define DEFAULT_MIGRATE_MULTIFD_CHANNELS 2
-#define DEFAULT_MIGRATE_MULTIFD_COMPRESSION MULTIFD_COMPRESSION_NONE
-/* 0: means nocompress, 1: best speed, ... 9: best compress ratio */
-#define DEFAULT_MIGRATE_MULTIFD_ZLIB_LEVEL 1
-/* 0: means nocompress, 1: best speed, ... 20: best compress ratio */
-#define DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL 1
-
-/* Background transfer rate for postcopy, 0 means unlimited, note
- * that page requests can still exceed this limit.
- */
-#define DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH 0
-
-/*
- * Parameters for self_announce_delay giving a stream of RARP/ARP
- * packets after migration.
- */
-#define DEFAULT_MIGRATE_ANNOUNCE_INITIAL 50
-#define DEFAULT_MIGRATE_ANNOUNCE_MAX 550
-#define DEFAULT_MIGRATE_ANNOUNCE_ROUNDS 5
-#define DEFAULT_MIGRATE_ANNOUNCE_STEP 100
-
-static NotifierList migration_state_notifiers =
- NOTIFIER_LIST_INITIALIZER(migration_state_notifiers);
+#include "sysemu/qtest.h"
+#include "options.h"
+#include "sysemu/dirtylimit.h"
+#include "qemu/sockets.h"
+#include "sysemu/kvm.h"
+
+#define NOTIFIER_ELEM_INIT(array, elem) \
+ [elem] = NOTIFIER_WITH_RETURN_LIST_INITIALIZER((array)[elem])
+
+static NotifierWithReturnList migration_state_notifiers[] = {
+ NOTIFIER_ELEM_INIT(migration_state_notifiers, MIG_MODE_NORMAL),
+ NOTIFIER_ELEM_INIT(migration_state_notifiers, MIG_MODE_CPR_REBOOT),
+};
/* Messages sent on the return path from destination to source */
enum mig_rp_message_type {
@@ -128,42 +87,11 @@ enum mig_rp_message_type {
MIG_RP_MSG_REQ_PAGES, /* data (start: be64, len: be32) */
MIG_RP_MSG_RECV_BITMAP, /* send recved_bitmap back to source */
MIG_RP_MSG_RESUME_ACK, /* tell source that we are ready to resume */
+ MIG_RP_MSG_SWITCHOVER_ACK, /* Tell source it's OK to do switchover */
MIG_RP_MSG_MAX
};
-/* Migration capabilities set */
-struct MigrateCapsSet {
- int size; /* Capability set size */
- MigrationCapability caps[]; /* Variadic array of capabilities */
-};
-typedef struct MigrateCapsSet MigrateCapsSet;
-
-/* Define and initialize MigrateCapsSet */
-#define INITIALIZE_MIGRATE_CAPS_SET(_name, ...) \
- MigrateCapsSet _name = { \
- .size = sizeof((int []) { __VA_ARGS__ }) / sizeof(int), \
- .caps = { __VA_ARGS__ } \
- }
-
-/* Background-snapshot compatibility check list */
-static const
-INITIALIZE_MIGRATE_CAPS_SET(check_caps_background_snapshot,
- MIGRATION_CAPABILITY_POSTCOPY_RAM,
- MIGRATION_CAPABILITY_DIRTY_BITMAPS,
- MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME,
- MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE,
- MIGRATION_CAPABILITY_RETURN_PATH,
- MIGRATION_CAPABILITY_MULTIFD,
- MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER,
- MIGRATION_CAPABILITY_AUTO_CONVERGE,
- MIGRATION_CAPABILITY_RELEASE_RAM,
- MIGRATION_CAPABILITY_RDMA_PIN_ALL,
- MIGRATION_CAPABILITY_COMPRESS,
- MIGRATION_CAPABILITY_XBZRLE,
- MIGRATION_CAPABILITY_X_COLO,
- MIGRATION_CAPABILITY_VALIDATE_UUID);
-
/* When we add fault tolerance, we could have several
migrations at once. For now we don't need to add
dynamic creation of migration */
@@ -171,13 +99,89 @@ INITIALIZE_MIGRATE_CAPS_SET(check_caps_background_snapshot,
static MigrationState *current_migration;
static MigrationIncomingState *current_incoming;
-static GSList *migration_blockers;
+static GSList *migration_blockers[MIG_MODE__MAX];
static bool migration_object_check(MigrationState *ms, Error **errp);
static int migration_maybe_pause(MigrationState *s,
int *current_active_state,
int new_state);
static void migrate_fd_cancel(MigrationState *s);
+static bool close_return_path_on_source(MigrationState *s);
+static void migration_completion_end(MigrationState *s);
+
+static void migration_downtime_start(MigrationState *s)
+{
+ trace_vmstate_downtime_checkpoint("src-downtime-start");
+ s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+}
+
+static void migration_downtime_end(MigrationState *s)
+{
+ int64_t now = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+
+ /*
+ * If downtime already set, should mean that postcopy already set it,
+ * then that should be the real downtime already.
+ */
+ if (!s->downtime) {
+ s->downtime = now - s->downtime_start;
+ }
+
+ trace_vmstate_downtime_checkpoint("src-downtime-end");
+}
+
+static bool migration_needs_multiple_sockets(void)
+{
+ return migrate_multifd() || migrate_postcopy_preempt();
+}
+
+static bool transport_supports_multi_channels(MigrationAddress *addr)
+{
+ if (addr->transport == MIGRATION_ADDRESS_TYPE_SOCKET) {
+ SocketAddress *saddr = &addr->u.socket;
+
+ return (saddr->type == SOCKET_ADDRESS_TYPE_INET ||
+ saddr->type == SOCKET_ADDRESS_TYPE_UNIX ||
+ saddr->type == SOCKET_ADDRESS_TYPE_VSOCK);
+ } else if (addr->transport == MIGRATION_ADDRESS_TYPE_FILE) {
+ return migrate_mapped_ram();
+ } else {
+ return false;
+ }
+}
+
+static bool migration_needs_seekable_channel(void)
+{
+ return migrate_mapped_ram();
+}
+
+static bool transport_supports_seeking(MigrationAddress *addr)
+{
+ if (addr->transport == MIGRATION_ADDRESS_TYPE_FILE) {
+ return true;
+ }
+
+ return false;
+}
+
+static bool
+migration_channels_and_transport_compatible(MigrationAddress *addr,
+ Error **errp)
+{
+ if (migration_needs_seekable_channel() &&
+ !transport_supports_seeking(addr)) {
+ error_setg(errp, "Migration requires seekable transport (e.g. file)");
+ return false;
+ }
+
+ if (migration_needs_multiple_sockets() &&
+ !transport_supports_multi_channels(addr)) {
+ error_setg(errp, "Migration requires multi-channel URIs (e.g. tcp)");
+ return false;
+ }
+
+ return true;
+}
static gint page_request_addr_cmp(gconstpointer ap, gconstpointer bp)
{
@@ -186,6 +190,23 @@ static gint page_request_addr_cmp(gconstpointer ap, gconstpointer bp)
return (a > b) - (a < b);
}
+static int migration_stop_vm(MigrationState *s, RunState state)
+{
+ int ret;
+
+ migration_downtime_start(s);
+
+ s->vm_old_state = runstate_get();
+ global_state_store();
+
+ ret = vm_stop_force_state(state);
+
+ trace_vmstate_downtime_checkpoint("src-vm-stopped");
+ trace_migration_completion_vm_stop(ret);
+
+ return ret;
+}
+
void migration_object_init(void)
{
/* This can only be called once. */
@@ -202,10 +223,15 @@ void migration_object_init(void)
current_incoming->postcopy_remote_fds =
g_array_new(FALSE, TRUE, sizeof(struct PostCopyFD));
qemu_mutex_init(&current_incoming->rp_mutex);
+ qemu_mutex_init(&current_incoming->postcopy_prio_thread_mutex);
qemu_event_init(&current_incoming->main_thread_load_event, false);
qemu_sem_init(&current_incoming->postcopy_pause_sem_dst, 0);
qemu_sem_init(&current_incoming->postcopy_pause_sem_fault, 0);
+ qemu_sem_init(&current_incoming->postcopy_pause_sem_fast_load, 0);
+ qemu_sem_init(&current_incoming->postcopy_qemufile_dst_done, 0);
+
qemu_mutex_init(&current_incoming->page_request_mutex);
+ qemu_cond_init(&current_incoming->page_request_cond);
current_incoming->page_requested = g_tree_new(page_request_addr_cmp);
migration_object_check(current_migration, &error_fatal);
@@ -215,18 +241,71 @@ void migration_object_init(void)
dirty_bitmap_mig_init();
}
-void migration_cancel(void)
+typedef struct {
+ QEMUBH *bh;
+ QEMUBHFunc *cb;
+ void *opaque;
+} MigrationBH;
+
+static void migration_bh_dispatch_bh(void *opaque)
+{
+ MigrationState *s = migrate_get_current();
+ MigrationBH *migbh = opaque;
+
+ /* cleanup this BH */
+ qemu_bh_delete(migbh->bh);
+ migbh->bh = NULL;
+
+ /* dispatch the other one */
+ migbh->cb(migbh->opaque);
+ object_unref(OBJECT(s));
+
+ g_free(migbh);
+}
+
+void migration_bh_schedule(QEMUBHFunc *cb, void *opaque)
+{
+ MigrationState *s = migrate_get_current();
+ MigrationBH *migbh = g_new0(MigrationBH, 1);
+ QEMUBH *bh = qemu_bh_new(migration_bh_dispatch_bh, migbh);
+
+ /* Store these to dispatch when the BH runs */
+ migbh->bh = bh;
+ migbh->cb = cb;
+ migbh->opaque = opaque;
+
+ /*
+ * Ref the state for bh, because it may be called when
+ * there're already no other refs
+ */
+ object_ref(OBJECT(s));
+ qemu_bh_schedule(bh);
+}
+
+void migration_cancel(const Error *error)
{
+ if (error) {
+ migrate_set_error(current_migration, error);
+ }
+ if (migrate_dirty_limit()) {
+ qmp_cancel_vcpu_dirty_limit(false, -1, NULL);
+ }
migrate_fd_cancel(current_migration);
}
void migration_shutdown(void)
{
/*
+ * When the QEMU main thread exit, the COLO thread
+ * may wait a semaphore. So, we should wakeup the
+ * COLO thread before migration shutdown.
+ */
+ colo_shutdown();
+ /*
* Cancel the current migration - that will (eventually)
* stop the migration using this structure
*/
- migration_cancel();
+ migration_cancel(NULL);
object_unref(OBJECT(current_migration));
/*
@@ -257,10 +336,26 @@ MigrationIncomingState *migration_incoming_get_current(void)
return current_incoming;
}
+void migration_incoming_transport_cleanup(MigrationIncomingState *mis)
+{
+ if (mis->socket_address_list) {
+ qapi_free_SocketAddressList(mis->socket_address_list);
+ mis->socket_address_list = NULL;
+ }
+
+ if (mis->transport_cleanup) {
+ mis->transport_cleanup(mis->transport_data);
+ mis->transport_data = mis->transport_cleanup = NULL;
+ }
+}
+
void migration_incoming_state_destroy(void)
{
struct MigrationIncomingState *mis = migration_incoming_get_current();
+ multifd_recv_cleanup();
+ compress_threads_load_cleanup();
+
if (mis->to_src_file) {
/* Tell source that we are done */
migrate_send_rp_shut(mis, qemu_file_get_error(mis->from_src_file) != 0);
@@ -277,10 +372,8 @@ void migration_incoming_state_destroy(void)
g_array_free(mis->postcopy_remote_fds, TRUE);
mis->postcopy_remote_fds = NULL;
}
- if (mis->transport_cleanup) {
- mis->transport_cleanup(mis->transport_data);
- }
+ migration_incoming_transport_cleanup(mis);
qemu_event_reset(&mis->main_thread_load_event);
if (mis->page_requested) {
@@ -288,9 +381,10 @@ void migration_incoming_state_destroy(void)
mis->page_requested = NULL;
}
- if (mis->socket_address_list) {
- qapi_free_SocketAddressList(mis->socket_address_list);
- mis->socket_address_list = NULL;
+ if (mis->postcopy_qemufile_dst) {
+ migration_ioc_unregister_yank_from_file(mis->postcopy_qemufile_dst);
+ qemu_fclose(mis->postcopy_qemufile_dst);
+ mis->postcopy_qemufile_dst = NULL;
}
yank_unregister_instance(MIGRATION_YANK_INSTANCE);
@@ -298,21 +392,11 @@ void migration_incoming_state_destroy(void)
static void migrate_generate_event(int new_state)
{
- if (migrate_use_events()) {
+ if (migrate_events()) {
qapi_event_send_migration(new_state);
}
}
-static bool migrate_late_block_activate(void)
-{
- MigrationState *s;
-
- s = migrate_get_current();
-
- return s->enabled_capabilities[
- MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE];
-}
-
/*
* Send a message on the return channel back to the source
* of the migration.
@@ -338,12 +422,7 @@ static int migrate_send_rp_message(MigrationIncomingState *mis,
qemu_put_be16(mis->to_src_file, (unsigned int)message_type);
qemu_put_be16(mis->to_src_file, len);
qemu_put_buffer(mis->to_src_file, data, len);
- qemu_fflush(mis->to_src_file);
-
- /* It's possible that qemu file got error during sending */
- ret = qemu_file_get_error(mis->to_src_file);
-
- return ret;
+ return qemu_fflush(mis->to_src_file);
}
/* Request one page from the source VM at the given start address.
@@ -391,7 +470,7 @@ int migrate_send_rp_message_req_pages(MigrationIncomingState *mis,
int migrate_send_rp_req_pages(MigrationIncomingState *mis,
RAMBlock *rb, ram_addr_t start, uint64_t haddr)
{
- void *aligned = (void *)(uintptr_t)(haddr & (-qemu_ram_pagesize(rb)));
+ void *aligned = (void *)(uintptr_t)ROUND_DOWN(haddr, qemu_ram_pagesize(rb));
bool received = false;
WITH_QEMU_LOCK_GUARD(&mis->page_request_mutex) {
@@ -403,7 +482,7 @@ int migrate_send_rp_req_pages(MigrationIncomingState *mis,
* things like g_tree_lookup() will return TRUE (1) when found.
*/
g_tree_insert(mis->page_requested, aligned, (gpointer)1);
- mis->page_requested_count++;
+ qatomic_inc(&mis->page_requested_count);
trace_postcopy_page_req_add(aligned, mis->page_requested_count);
}
}
@@ -433,6 +512,18 @@ void migration_incoming_disable_colo(void)
int migration_incoming_enable_colo(void)
{
+#ifndef CONFIG_REPLICATION
+ error_report("ENABLE_COLO command come in migration stream, but COLO "
+ "module is not built in");
+ return -ENOTSUP;
+#endif
+
+ if (!migrate_colo()) {
+ error_report("ENABLE_COLO command come in migration stream, but c-colo "
+ "capability is not set");
+ return -EINVAL;
+ }
+
if (ram_block_discard_disable(true)) {
error_report("COLO: cannot disable RAM discard");
return -EBUSY;
@@ -449,23 +540,131 @@ void migrate_add_address(SocketAddress *address)
QAPI_CLONE(SocketAddress, address));
}
-static void qemu_start_incoming_migration(const char *uri, Error **errp)
+bool migrate_uri_parse(const char *uri, MigrationChannel **channel,
+ Error **errp)
{
- const char *p = NULL;
+ g_autoptr(MigrationChannel) val = g_new0(MigrationChannel, 1);
+ g_autoptr(MigrationAddress) addr = g_new0(MigrationAddress, 1);
+ InetSocketAddress *isock = &addr->u.rdma;
+ strList **tail = &addr->u.exec.args;
- qapi_event_send_migration(MIGRATION_STATUS_SETUP);
- if (strstart(uri, "tcp:", &p) ||
- strstart(uri, "unix:", NULL) ||
- strstart(uri, "vsock:", NULL)) {
- socket_start_incoming_migration(p ? p : uri, errp);
+ if (strstart(uri, "exec:", NULL)) {
+ addr->transport = MIGRATION_ADDRESS_TYPE_EXEC;
+#ifdef WIN32
+ QAPI_LIST_APPEND(tail, g_strdup(exec_get_cmd_path()));
+ QAPI_LIST_APPEND(tail, g_strdup("/c"));
+#else
+ QAPI_LIST_APPEND(tail, g_strdup("/bin/sh"));
+ QAPI_LIST_APPEND(tail, g_strdup("-c"));
+#endif
+ QAPI_LIST_APPEND(tail, g_strdup(uri + strlen("exec:")));
+ } else if (strstart(uri, "rdma:", NULL)) {
+ if (inet_parse(isock, uri + strlen("rdma:"), errp)) {
+ qapi_free_InetSocketAddress(isock);
+ return false;
+ }
+ addr->transport = MIGRATION_ADDRESS_TYPE_RDMA;
+ } else if (strstart(uri, "tcp:", NULL) ||
+ strstart(uri, "unix:", NULL) ||
+ strstart(uri, "vsock:", NULL) ||
+ strstart(uri, "fd:", NULL)) {
+ addr->transport = MIGRATION_ADDRESS_TYPE_SOCKET;
+ SocketAddress *saddr = socket_parse(uri, errp);
+ if (!saddr) {
+ return false;
+ }
+ addr->u.socket.type = saddr->type;
+ addr->u.socket.u = saddr->u;
+ /* Don't free the objects inside; their ownership moved to "addr" */
+ g_free(saddr);
+ } else if (strstart(uri, "file:", NULL)) {
+ addr->transport = MIGRATION_ADDRESS_TYPE_FILE;
+ addr->u.file.filename = g_strdup(uri + strlen("file:"));
+ if (file_parse_offset(addr->u.file.filename, &addr->u.file.offset,
+ errp)) {
+ return false;
+ }
+ } else {
+ error_setg(errp, "unknown migration protocol: %s", uri);
+ return false;
+ }
+
+ val->channel_type = MIGRATION_CHANNEL_TYPE_MAIN;
+ val->addr = g_steal_pointer(&addr);
+ *channel = g_steal_pointer(&val);
+ return true;
+}
+
+static void qemu_start_incoming_migration(const char *uri, bool has_channels,
+ MigrationChannelList *channels,
+ Error **errp)
+{
+ g_autoptr(MigrationChannel) channel = NULL;
+ MigrationAddress *addr = NULL;
+ MigrationIncomingState *mis = migration_incoming_get_current();
+
+ /*
+ * Having preliminary checks for uri and channel
+ */
+ if (!uri == !channels) {
+ error_setg(errp, "need either 'uri' or 'channels' argument");
+ return;
+ }
+
+ if (channels) {
+ /* To verify that Migrate channel list has only item */
+ if (channels->next) {
+ error_setg(errp, "Channel list has more than one entries");
+ return;
+ }
+ addr = channels->value->addr;
+ }
+
+ if (uri) {
+ /* caller uses the old URI syntax */
+ if (!migrate_uri_parse(uri, &channel, errp)) {
+ return;
+ }
+ addr = channel->addr;
+ }
+
+ /* transport mechanism not suitable for migration? */
+ if (!migration_channels_and_transport_compatible(addr, errp)) {
+ return;
+ }
+
+ migrate_set_state(&mis->state, MIGRATION_STATUS_NONE,
+ MIGRATION_STATUS_SETUP);
+
+ if (addr->transport == MIGRATION_ADDRESS_TYPE_SOCKET) {
+ SocketAddress *saddr = &addr->u.socket;
+ if (saddr->type == SOCKET_ADDRESS_TYPE_INET ||
+ saddr->type == SOCKET_ADDRESS_TYPE_UNIX ||
+ saddr->type == SOCKET_ADDRESS_TYPE_VSOCK) {
+ socket_start_incoming_migration(saddr, errp);
+ } else if (saddr->type == SOCKET_ADDRESS_TYPE_FD) {
+ fd_start_incoming_migration(saddr->u.fd.str, errp);
+ }
#ifdef CONFIG_RDMA
- } else if (strstart(uri, "rdma:", &p)) {
- rdma_start_incoming_migration(p, errp);
+ } else if (addr->transport == MIGRATION_ADDRESS_TYPE_RDMA) {
+ if (migrate_compress()) {
+ error_setg(errp, "RDMA and compression can't be used together");
+ return;
+ }
+ if (migrate_xbzrle()) {
+ error_setg(errp, "RDMA and XBZRLE can't be used together");
+ return;
+ }
+ if (migrate_multifd()) {
+ error_setg(errp, "RDMA and multifd can't be used together");
+ return;
+ }
+ rdma_start_incoming_migration(&addr->u.rdma, errp);
#endif
- } else if (strstart(uri, "exec:", &p)) {
- exec_start_incoming_migration(p, errp);
- } else if (strstart(uri, "fd:", &p)) {
- fd_start_incoming_migration(p, errp);
+ } else if (addr->transport == MIGRATION_ADDRESS_TYPE_EXEC) {
+ exec_start_incoming_migration(addr->u.exec.args, errp);
+ } else if (addr->transport == MIGRATION_ADDRESS_TYPE_FILE) {
+ file_start_incoming_migration(&addr->u.file, errp);
} else {
error_setg(errp, "unknown migration protocol: %s", uri);
}
@@ -476,6 +675,8 @@ static void process_incoming_migration_bh(void *opaque)
Error *local_err = NULL;
MigrationIncomingState *mis = opaque;
+ trace_vmstate_downtime_checkpoint("dst-precopy-bh-enter");
+
/* If capability late_block_activate is set:
* Only fire up the block code now if we're going to restart the
* VM, else 'cont' will do it.
@@ -484,10 +685,10 @@ static void process_incoming_migration_bh(void *opaque)
*/
if (!migrate_late_block_activate() ||
(autostart && (!global_state_received() ||
- global_state_get_runstate() == RUN_STATE_RUNNING))) {
- /* Make sure all file formats flush their mutable metadata.
+ runstate_is_live(global_state_get_runstate())))) {
+ /* Make sure all file formats throw away their mutable metadata.
* If we get an error here, just don't restart the VM yet. */
- bdrv_invalidate_cache_all(&local_err);
+ bdrv_activate_all(&local_err);
if (local_err) {
error_report_err(local_err);
local_err = NULL;
@@ -501,18 +702,14 @@ static void process_incoming_migration_bh(void *opaque)
*/
qemu_announce_self(&mis->announce_timer, migrate_announce_params());
- if (multifd_load_cleanup(&local_err) != 0) {
- error_report_err(local_err);
- autostart = false;
- }
- /* If global state section was not received or we are in running
- state, we need to obey autostart. Any other state is set with
- runstate_set. */
+ trace_vmstate_downtime_checkpoint("dst-precopy-bh-announced");
+
+ multifd_recv_shutdown();
dirty_bitmap_mig_before_vm_start();
if (!global_state_received() ||
- global_state_get_runstate() == RUN_STATE_RUNNING) {
+ runstate_is_live(global_state_get_runstate())) {
if (autostart) {
vm_start();
} else {
@@ -524,6 +721,7 @@ static void process_incoming_migration_bh(void *opaque)
} else {
runstate_set(global_state_get_runstate());
}
+ trace_vmstate_downtime_checkpoint("dst-precopy-bh-vm-started");
/*
* This must happen after any state changes since as soon as an external
* observer sees this event they might start to prod at the VM assuming
@@ -531,24 +729,33 @@ static void process_incoming_migration_bh(void *opaque)
*/
migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
MIGRATION_STATUS_COMPLETED);
- qemu_bh_delete(mis->bh);
migration_incoming_state_destroy();
}
-static void process_incoming_migration_co(void *opaque)
+static void coroutine_fn
+process_incoming_migration_co(void *opaque)
{
MigrationIncomingState *mis = migration_incoming_get_current();
PostcopyState ps;
int ret;
- Error *local_err = NULL;
assert(mis->from_src_file);
- mis->migration_incoming_co = qemu_coroutine_self();
+
+ if (compress_threads_load_setup(mis->from_src_file)) {
+ error_report("Failed to setup decompress threads");
+ goto fail;
+ }
+
mis->largest_page_size = qemu_ram_pagesize_largest();
postcopy_state_set(POSTCOPY_INCOMING_NONE);
- migrate_set_state(&mis->state, MIGRATION_STATUS_NONE,
+ migrate_set_state(&mis->state, MIGRATION_STATUS_SETUP,
MIGRATION_STATUS_ACTIVE);
+
+ mis->loadvm_co = qemu_coroutine_self();
ret = qemu_loadvm_state(mis->from_src_file);
+ mis->loadvm_co = NULL;
+
+ trace_vmstate_downtime_checkpoint("dst-precopy-loadvm-completed");
ps = postcopy_state_get();
trace_process_incoming_migration_co_end(ret, ps);
@@ -571,65 +778,47 @@ static void process_incoming_migration_co(void *opaque)
/* Else if something went wrong then just fall out of the normal exit */
}
- /* we get COLO info, and know if we are in COLO mode */
- if (!ret && migration_incoming_colo_enabled()) {
- /* Make sure all file formats flush their mutable metadata */
- bdrv_invalidate_cache_all(&local_err);
- if (local_err) {
- error_report_err(local_err);
- goto fail;
- }
-
- qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming",
- colo_process_incoming_thread, mis, QEMU_THREAD_JOINABLE);
- mis->have_colo_incoming_thread = true;
- qemu_coroutine_yield();
+ if (ret < 0) {
+ MigrationState *s = migrate_get_current();
- /* Wait checkpoint incoming thread exit before free resource */
- qemu_thread_join(&mis->colo_incoming_thread);
- /* We hold the global iothread lock, so it is safe here */
- colo_release_ram_cache();
+ if (migrate_has_error(s)) {
+ WITH_QEMU_LOCK_GUARD(&s->error_mutex) {
+ error_report_err(s->error);
+ }
+ }
+ error_report("load of migration failed: %s", strerror(-ret));
+ goto fail;
}
- if (ret < 0) {
- error_report("load of migration failed: %s", strerror(-ret));
+ if (colo_incoming_co() < 0) {
goto fail;
}
- mis->bh = qemu_bh_new(process_incoming_migration_bh, mis);
- qemu_bh_schedule(mis->bh);
- mis->migration_incoming_co = NULL;
+
+ migration_bh_schedule(process_incoming_migration_bh, mis);
return;
fail:
- local_err = NULL;
migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
MIGRATION_STATUS_FAILED);
qemu_fclose(mis->from_src_file);
- if (multifd_load_cleanup(&local_err) != 0) {
- error_report_err(local_err);
- }
+
+ multifd_recv_cleanup();
+ compress_threads_load_cleanup();
+
exit(EXIT_FAILURE);
}
/**
* migration_incoming_setup: Setup incoming migration
* @f: file for main migration channel
- * @errp: where to put errors
- *
- * Returns: %true on success, %false on error.
*/
-static bool migration_incoming_setup(QEMUFile *f, Error **errp)
+static void migration_incoming_setup(QEMUFile *f)
{
MigrationIncomingState *mis = migration_incoming_get_current();
- if (multifd_load_setup(errp) != 0) {
- return false;
- }
-
if (!mis->from_src_file) {
mis->from_src_file = f;
}
qemu_file_set_blocking(f, false);
- return true;
}
void migration_incoming_process(void)
@@ -639,28 +828,29 @@ void migration_incoming_process(void)
}
/* Returns true if recovered from a paused migration, otherwise false */
-static bool postcopy_try_recover(QEMUFile *f)
+static bool postcopy_try_recover(void)
{
MigrationIncomingState *mis = migration_incoming_get_current();
if (mis->state == MIGRATION_STATUS_POSTCOPY_PAUSED) {
/* Resumed from a paused postcopy migration */
- mis->from_src_file = f;
+ /* This should be set already in migration_incoming_setup() */
+ assert(mis->from_src_file);
/* Postcopy has standalone thread to do vm load */
- qemu_file_set_blocking(f, true);
+ qemu_file_set_blocking(mis->from_src_file, true);
/* Re-configure the return path */
- mis->to_src_file = qemu_file_get_return_path(f);
+ mis->to_src_file = qemu_file_get_return_path(mis->from_src_file);
migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_PAUSED,
MIGRATION_STATUS_POSTCOPY_RECOVER);
/*
* Here, we only wake up the main loading thread (while the
- * fault thread will still be waiting), so that we can receive
+ * rest threads will still be waiting), so that we can receive
* commands from source now, and answer it if needed. The
- * fault thread will be woken up afterwards until we are sure
+ * rest threads will be woken up afterwards until we are sure
* that source is ready to reply to page requests.
*/
qemu_sem_post(&mis->postcopy_pause_sem_dst);
@@ -670,53 +860,102 @@ static bool postcopy_try_recover(QEMUFile *f)
return false;
}
-void migration_fd_process_incoming(QEMUFile *f, Error **errp)
+void migration_fd_process_incoming(QEMUFile *f)
{
- if (postcopy_try_recover(f)) {
+ migration_incoming_setup(f);
+ if (postcopy_try_recover()) {
return;
}
+ migration_incoming_process();
+}
- if (!migration_incoming_setup(f, errp)) {
- return;
+/*
+ * Returns true when we want to start a new incoming migration process,
+ * false otherwise.
+ */
+static bool migration_should_start_incoming(bool main_channel)
+{
+ /* Multifd doesn't start unless all channels are established */
+ if (migrate_multifd()) {
+ return migration_has_all_channels();
}
- migration_incoming_process();
+
+ /* Preempt channel only starts when the main channel is created */
+ if (migrate_postcopy_preempt()) {
+ return main_channel;
+ }
+
+ /*
+ * For all the rest types of migration, we should only reach here when
+ * it's the main channel that's being created, and we should always
+ * proceed with this channel.
+ */
+ assert(main_channel);
+ return true;
}
void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
{
MigrationIncomingState *mis = migration_incoming_get_current();
Error *local_err = NULL;
- bool start_migration;
+ QEMUFile *f;
+ bool default_channel = true;
+ uint32_t channel_magic = 0;
+ int ret = 0;
- if (!mis->from_src_file) {
- /* The first connection (multifd may have multiple) */
- QEMUFile *f = qemu_fopen_channel_input(ioc);
+ if (migrate_multifd() && !migrate_mapped_ram() &&
+ !migrate_postcopy_ram() &&
+ qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) {
+ /*
+ * With multiple channels, it is possible that we receive channels
+ * out of order on destination side, causing incorrect mapping of
+ * source channels on destination side. Check channel MAGIC to
+ * decide type of channel. Please note this is best effort, postcopy
+ * preempt channel does not send any magic number so avoid it for
+ * postcopy live migration. Also tls live migration already does
+ * tls handshake while initializing main channel so with tls this
+ * issue is not possible.
+ */
+ ret = migration_channel_read_peek(ioc, (void *)&channel_magic,
+ sizeof(channel_magic), errp);
- /* If it's a recovery, we're done */
- if (postcopy_try_recover(f)) {
+ if (ret != 0) {
return;
}
- if (!migration_incoming_setup(f, errp)) {
- return;
- }
+ default_channel = (channel_magic == cpu_to_be32(QEMU_VM_FILE_MAGIC));
+ } else {
+ default_channel = !mis->from_src_file;
+ }
- /*
- * Common migration only needs one channel, so we can start
- * right now. Multifd needs more than one channel, we wait.
- */
- start_migration = !migrate_use_multifd();
+ if (multifd_recv_setup(errp) != 0) {
+ return;
+ }
+
+ if (default_channel) {
+ f = qemu_file_new_input(ioc);
+ migration_incoming_setup(f);
} else {
/* Multiple connections */
- assert(migrate_use_multifd());
- start_migration = multifd_recv_new_channel(ioc, &local_err);
+ assert(migration_needs_multiple_sockets());
+ if (migrate_multifd()) {
+ multifd_recv_new_channel(ioc, &local_err);
+ } else {
+ assert(migrate_postcopy_preempt());
+ f = qemu_file_new_input(ioc);
+ postcopy_preempt_new_channel(mis, f);
+ }
if (local_err) {
error_propagate(errp, local_err);
return;
}
}
- if (start_migration) {
+ if (migration_should_start_incoming(default_channel)) {
+ /* If it's a recovery, we're done */
+ if (postcopy_try_recover()) {
+ return;
+ }
migration_incoming_process();
}
}
@@ -730,11 +969,25 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
bool migration_has_all_channels(void)
{
MigrationIncomingState *mis = migration_incoming_get_current();
- bool all_channels;
- all_channels = multifd_recv_all_channels_created();
+ if (!mis->from_src_file) {
+ return false;
+ }
+
+ if (migrate_multifd()) {
+ return multifd_recv_all_channels_created();
+ }
- return all_channels && mis->from_src_file != NULL;
+ if (migrate_postcopy_preempt()) {
+ return mis->postcopy_qemufile_dst != NULL;
+ }
+
+ return true;
+}
+
+int migrate_send_rp_switchover_ack(MigrationIncomingState *mis)
+{
+ return migrate_send_rp_message(mis, MIG_RP_MSG_SWITCHOVER_ACK, 0, NULL);
}
/*
@@ -811,120 +1064,15 @@ void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value)
migrate_send_rp_message(mis, MIG_RP_MSG_RESUME_ACK, sizeof(buf), &buf);
}
-MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp)
-{
- MigrationCapabilityStatusList *head = NULL, **tail = &head;
- MigrationCapabilityStatus *caps;
- MigrationState *s = migrate_get_current();
- int i;
-
- for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
-#ifndef CONFIG_LIVE_BLOCK_MIGRATION
- if (i == MIGRATION_CAPABILITY_BLOCK) {
- continue;
- }
-#endif
- caps = g_malloc0(sizeof(*caps));
- caps->capability = i;
- caps->state = s->enabled_capabilities[i];
- QAPI_LIST_APPEND(tail, caps);
- }
-
- return head;
-}
-
-MigrationParameters *qmp_query_migrate_parameters(Error **errp)
-{
- MigrationParameters *params;
- MigrationState *s = migrate_get_current();
-
- /* TODO use QAPI_CLONE() instead of duplicating it inline */
- params = g_malloc0(sizeof(*params));
- params->has_compress_level = true;
- params->compress_level = s->parameters.compress_level;
- params->has_compress_threads = true;
- params->compress_threads = s->parameters.compress_threads;
- params->has_compress_wait_thread = true;
- params->compress_wait_thread = s->parameters.compress_wait_thread;
- params->has_decompress_threads = true;
- params->decompress_threads = s->parameters.decompress_threads;
- params->has_throttle_trigger_threshold = true;
- params->throttle_trigger_threshold = s->parameters.throttle_trigger_threshold;
- params->has_cpu_throttle_initial = true;
- params->cpu_throttle_initial = s->parameters.cpu_throttle_initial;
- params->has_cpu_throttle_increment = true;
- params->cpu_throttle_increment = s->parameters.cpu_throttle_increment;
- params->has_cpu_throttle_tailslow = true;
- params->cpu_throttle_tailslow = s->parameters.cpu_throttle_tailslow;
- params->has_tls_creds = true;
- params->tls_creds = g_strdup(s->parameters.tls_creds);
- params->has_tls_hostname = true;
- params->tls_hostname = g_strdup(s->parameters.tls_hostname);
- params->has_tls_authz = true;
- params->tls_authz = g_strdup(s->parameters.tls_authz ?
- s->parameters.tls_authz : "");
- params->has_max_bandwidth = true;
- params->max_bandwidth = s->parameters.max_bandwidth;
- params->has_downtime_limit = true;
- params->downtime_limit = s->parameters.downtime_limit;
- params->has_x_checkpoint_delay = true;
- params->x_checkpoint_delay = s->parameters.x_checkpoint_delay;
- params->has_block_incremental = true;
- params->block_incremental = s->parameters.block_incremental;
- params->has_multifd_channels = true;
- params->multifd_channels = s->parameters.multifd_channels;
- params->has_multifd_compression = true;
- params->multifd_compression = s->parameters.multifd_compression;
- params->has_multifd_zlib_level = true;
- params->multifd_zlib_level = s->parameters.multifd_zlib_level;
- params->has_multifd_zstd_level = true;
- params->multifd_zstd_level = s->parameters.multifd_zstd_level;
- params->has_xbzrle_cache_size = true;
- params->xbzrle_cache_size = s->parameters.xbzrle_cache_size;
- params->has_max_postcopy_bandwidth = true;
- params->max_postcopy_bandwidth = s->parameters.max_postcopy_bandwidth;
- params->has_max_cpu_throttle = true;
- params->max_cpu_throttle = s->parameters.max_cpu_throttle;
- params->has_announce_initial = true;
- params->announce_initial = s->parameters.announce_initial;
- params->has_announce_max = true;
- params->announce_max = s->parameters.announce_max;
- params->has_announce_rounds = true;
- params->announce_rounds = s->parameters.announce_rounds;
- params->has_announce_step = true;
- params->announce_step = s->parameters.announce_step;
-
- if (s->parameters.has_block_bitmap_mapping) {
- params->has_block_bitmap_mapping = true;
- params->block_bitmap_mapping =
- QAPI_CLONE(BitmapMigrationNodeAliasList,
- s->parameters.block_bitmap_mapping);
- }
-
- return params;
-}
-
-AnnounceParameters *migrate_announce_params(void)
-{
- static AnnounceParameters ap;
-
- MigrationState *s = migrate_get_current();
-
- ap.initial = s->parameters.announce_initial;
- ap.max = s->parameters.announce_max;
- ap.rounds = s->parameters.announce_rounds;
- ap.step = s->parameters.announce_step;
-
- return &ap;
-}
-
/*
* Return true if we're already in the middle of a migration
* (i.e. any of the active or setup states)
*/
-bool migration_is_setup_or_active(int state)
+bool migration_is_setup_or_active(void)
{
- switch (state) {
+ MigrationState *s = current_migration;
+
+ switch (s->state) {
case MIGRATION_STATUS_ACTIVE:
case MIGRATION_STATUS_POSTCOPY_ACTIVE:
case MIGRATION_STATUS_POSTCOPY_PAUSED:
@@ -942,9 +1090,11 @@ bool migration_is_setup_or_active(int state)
}
}
-bool migration_is_running(int state)
+bool migration_is_running(void)
{
- switch (state) {
+ MigrationState *s = current_migration;
+
+ switch (s->state) {
case MIGRATION_STATUS_ACTIVE:
case MIGRATION_STATUS_POSTCOPY_ACTIVE:
case MIGRATION_STATUS_POSTCOPY_PAUSED:
@@ -962,20 +1112,30 @@ bool migration_is_running(int state)
}
}
+static bool migrate_show_downtime(MigrationState *s)
+{
+ return (s->state == MIGRATION_STATUS_COMPLETED) || migration_in_postcopy();
+}
+
static void populate_time_info(MigrationInfo *info, MigrationState *s)
{
info->has_status = true;
info->has_setup_time = true;
info->setup_time = s->setup_time;
+
if (s->state == MIGRATION_STATUS_COMPLETED) {
info->has_total_time = true;
info->total_time = s->total_time;
- info->has_downtime = true;
- info->downtime = s->downtime;
} else {
info->has_total_time = true;
info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) -
s->start_time;
+ }
+
+ if (migrate_show_downtime(s)) {
+ info->has_downtime = true;
+ info->downtime = s->downtime;
+ } else {
info->has_expected_downtime = true;
info->expected_downtime = s->expected_downtime;
}
@@ -983,25 +1143,31 @@ static void populate_time_info(MigrationInfo *info, MigrationState *s)
static void populate_ram_info(MigrationInfo *info, MigrationState *s)
{
- info->has_ram = true;
+ size_t page_size = qemu_target_page_size();
+
info->ram = g_malloc0(sizeof(*info->ram));
- info->ram->transferred = ram_counters.transferred;
+ info->ram->transferred = migration_transferred_bytes();
info->ram->total = ram_bytes_total();
- info->ram->duplicate = ram_counters.duplicate;
+ info->ram->duplicate = stat64_get(&mig_stats.zero_pages);
/* legacy value. It is not used anymore */
info->ram->skipped = 0;
- info->ram->normal = ram_counters.normal;
- info->ram->normal_bytes = ram_counters.normal *
- qemu_target_page_size();
+ info->ram->normal = stat64_get(&mig_stats.normal_pages);
+ info->ram->normal_bytes = info->ram->normal * page_size;
info->ram->mbps = s->mbps;
- info->ram->dirty_sync_count = ram_counters.dirty_sync_count;
- info->ram->postcopy_requests = ram_counters.postcopy_requests;
- info->ram->page_size = qemu_target_page_size();
- info->ram->multifd_bytes = ram_counters.multifd_bytes;
+ info->ram->dirty_sync_count =
+ stat64_get(&mig_stats.dirty_sync_count);
+ info->ram->dirty_sync_missed_zero_copy =
+ stat64_get(&mig_stats.dirty_sync_missed_zero_copy);
+ info->ram->postcopy_requests =
+ stat64_get(&mig_stats.postcopy_requests);
+ info->ram->page_size = page_size;
+ info->ram->multifd_bytes = stat64_get(&mig_stats.multifd_bytes);
info->ram->pages_per_second = s->pages_per_second;
+ info->ram->precopy_bytes = stat64_get(&mig_stats.precopy_bytes);
+ info->ram->downtime_bytes = stat64_get(&mig_stats.downtime_bytes);
+ info->ram->postcopy_bytes = stat64_get(&mig_stats.postcopy_bytes);
- if (migrate_use_xbzrle()) {
- info->has_xbzrle_cache = true;
+ if (migrate_xbzrle()) {
info->xbzrle_cache = g_malloc0(sizeof(*info->xbzrle_cache));
info->xbzrle_cache->cache_size = migrate_xbzrle_cache_size();
info->xbzrle_cache->bytes = xbzrle_counters.bytes;
@@ -1012,17 +1178,7 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s)
info->xbzrle_cache->overflow = xbzrle_counters.overflow;
}
- if (migrate_use_compression()) {
- info->has_compression = true;
- info->compression = g_malloc0(sizeof(*info->compression));
- info->compression->pages = compression_counters.pages;
- info->compression->busy = compression_counters.busy;
- info->compression->busy_rate = compression_counters.busy_rate;
- info->compression->compressed_size =
- compression_counters.compressed_size;
- info->compression->compression_rate =
- compression_counters.compression_rate;
- }
+ populate_compress(info);
if (cpu_throttle_active()) {
info->has_cpu_throttle_percentage = true;
@@ -1031,14 +1187,23 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s)
if (s->state != MIGRATION_STATUS_COMPLETED) {
info->ram->remaining = ram_bytes_remaining();
- info->ram->dirty_pages_rate = ram_counters.dirty_pages_rate;
+ info->ram->dirty_pages_rate =
+ stat64_get(&mig_stats.dirty_pages_rate);
+ }
+
+ if (migrate_dirty_limit() && dirtylimit_in_service()) {
+ info->has_dirty_limit_throttle_time_per_round = true;
+ info->dirty_limit_throttle_time_per_round =
+ dirtylimit_throttle_time_per_round();
+
+ info->has_dirty_limit_ring_full_time = true;
+ info->dirty_limit_ring_full_time = dirtylimit_ring_full_time();
}
}
static void populate_disk_info(MigrationInfo *info)
{
if (blk_mig_active()) {
- info->has_disk = true;
info->disk = g_malloc0(sizeof(*info->disk));
info->disk->transferred = blk_mig_bytes_transferred();
info->disk->remaining = blk_mig_bytes_remaining();
@@ -1049,7 +1214,8 @@ static void populate_disk_info(MigrationInfo *info)
static void fill_source_migration_info(MigrationInfo *info)
{
MigrationState *s = migrate_get_current();
- GSList *cur_blocker = migration_blockers;
+ int state = qatomic_read(&s->state);
+ GSList *cur_blocker = migration_blockers[migrate_mode()];
info->blocked_reasons = NULL;
@@ -1068,7 +1234,7 @@ static void fill_source_migration_info(MigrationInfo *info)
}
info->has_blocked_reasons = info->blocked_reasons != NULL;
- switch (s->state) {
+ switch (state) {
case MIGRATION_STATUS_NONE:
/* no migration has happened ever */
/* do not overwrite destination migration status */
@@ -1088,7 +1254,7 @@ static void fill_source_migration_info(MigrationInfo *info)
populate_time_info(info, s);
populate_ram_info(info, s);
populate_disk_info(info);
- populate_vfio_info(info);
+ migration_populate_vfio_info(info);
break;
case MIGRATION_STATUS_COLO:
info->has_status = true;
@@ -1097,14 +1263,10 @@ static void fill_source_migration_info(MigrationInfo *info)
case MIGRATION_STATUS_COMPLETED:
populate_time_info(info, s);
populate_ram_info(info, s);
- populate_vfio_info(info);
+ migration_populate_vfio_info(info);
break;
case MIGRATION_STATUS_FAILED:
info->has_status = true;
- if (s->error) {
- info->has_error_desc = true;
- info->error_desc = g_strdup(error_get_pretty(s->error));
- }
break;
case MIGRATION_STATUS_CANCELLED:
info->has_status = true;
@@ -1113,129 +1275,12 @@ static void fill_source_migration_info(MigrationInfo *info)
info->has_status = true;
break;
}
- info->status = s->state;
-}
-
-typedef enum WriteTrackingSupport {
- WT_SUPPORT_UNKNOWN = 0,
- WT_SUPPORT_ABSENT,
- WT_SUPPORT_AVAILABLE,
- WT_SUPPORT_COMPATIBLE
-} WriteTrackingSupport;
-
-static
-WriteTrackingSupport migrate_query_write_tracking(void)
-{
- /* Check if kernel supports required UFFD features */
- if (!ram_write_tracking_available()) {
- return WT_SUPPORT_ABSENT;
- }
- /*
- * Check if current memory configuration is
- * compatible with required UFFD features.
- */
- if (!ram_write_tracking_compatible()) {
- return WT_SUPPORT_AVAILABLE;
- }
-
- return WT_SUPPORT_COMPATIBLE;
-}
-
-/**
- * @migration_caps_check - check capability validity
- *
- * @cap_list: old capability list, array of bool
- * @params: new capabilities to be applied soon
- * @errp: set *errp if the check failed, with reason
- *
- * Returns true if check passed, otherwise false.
- */
-static bool migrate_caps_check(bool *cap_list,
- MigrationCapabilityStatusList *params,
- Error **errp)
-{
- MigrationCapabilityStatusList *cap;
- bool old_postcopy_cap;
- MigrationIncomingState *mis = migration_incoming_get_current();
-
- old_postcopy_cap = cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM];
-
- for (cap = params; cap; cap = cap->next) {
- cap_list[cap->value->capability] = cap->value->state;
- }
-
-#ifndef CONFIG_LIVE_BLOCK_MIGRATION
- if (cap_list[MIGRATION_CAPABILITY_BLOCK]) {
- error_setg(errp, "QEMU compiled without old-style (blk/-b, inc/-i) "
- "block migration");
- error_append_hint(errp, "Use drive_mirror+NBD instead.\n");
- return false;
- }
-#endif
-
-#ifndef CONFIG_REPLICATION
- if (cap_list[MIGRATION_CAPABILITY_X_COLO]) {
- error_setg(errp, "QEMU compiled without replication module"
- " can't enable COLO");
- error_append_hint(errp, "Please enable replication before COLO.\n");
- return false;
- }
-#endif
-
- if (cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]) {
- /* This check is reasonably expensive, so only when it's being
- * set the first time, also it's only the destination that needs
- * special support.
- */
- if (!old_postcopy_cap && runstate_check(RUN_STATE_INMIGRATE) &&
- !postcopy_ram_supported_by_host(mis)) {
- /* postcopy_ram_supported_by_host will have emitted a more
- * detailed message
- */
- error_setg(errp, "Postcopy is not supported");
- return false;
- }
-
- if (cap_list[MIGRATION_CAPABILITY_X_IGNORE_SHARED]) {
- error_setg(errp, "Postcopy is not compatible with ignore-shared");
- return false;
- }
- }
+ info->status = state;
- if (cap_list[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]) {
- WriteTrackingSupport wt_support;
- int idx;
- /*
- * Check if 'background-snapshot' capability is supported by
- * host kernel and compatible with guest memory configuration.
- */
- wt_support = migrate_query_write_tracking();
- if (wt_support < WT_SUPPORT_AVAILABLE) {
- error_setg(errp, "Background-snapshot is not supported by host kernel");
- return false;
- }
- if (wt_support < WT_SUPPORT_COMPATIBLE) {
- error_setg(errp, "Background-snapshot is not compatible "
- "with guest memory configuration");
- return false;
- }
-
- /*
- * Check if there are any migration capabilities
- * incompatible with 'background-snapshot'.
- */
- for (idx = 0; idx < check_caps_background_snapshot.size; idx++) {
- int incomp_cap = check_caps_background_snapshot.caps[idx];
- if (cap_list[incomp_cap]) {
- error_setg(errp,
- "Background-snapshot is not compatible with %s",
- MigrationCapability_str(incomp_cap));
- return false;
- }
- }
+ QEMU_LOCK_GUARD(&s->error_mutex);
+ if (s->error) {
+ info->error_desc = g_strdup(error_get_pretty(s->error));
}
-
- return true;
}
static void fill_destination_migration_info(MigrationInfo *info)
@@ -1280,429 +1325,6 @@ MigrationInfo *qmp_query_migrate(Error **errp)
return info;
}
-void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
- Error **errp)
-{
- MigrationState *s = migrate_get_current();
- MigrationCapabilityStatusList *cap;
- bool cap_list[MIGRATION_CAPABILITY__MAX];
-
- if (migration_is_running(s->state)) {
- error_setg(errp, QERR_MIGRATION_ACTIVE);
- return;
- }
-
- memcpy(cap_list, s->enabled_capabilities, sizeof(cap_list));
- if (!migrate_caps_check(cap_list, params, errp)) {
- return;
- }
-
- for (cap = params; cap; cap = cap->next) {
- s->enabled_capabilities[cap->value->capability] = cap->value->state;
- }
-}
-
-/*
- * Check whether the parameters are valid. Error will be put into errp
- * (if provided). Return true if valid, otherwise false.
- */
-static bool migrate_params_check(MigrationParameters *params, Error **errp)
-{
- if (params->has_compress_level &&
- (params->compress_level > 9)) {
- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "compress_level",
- "a value between 0 and 9");
- return false;
- }
-
- if (params->has_compress_threads && (params->compress_threads < 1)) {
- error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
- "compress_threads",
- "a value between 1 and 255");
- return false;
- }
-
- if (params->has_decompress_threads && (params->decompress_threads < 1)) {
- error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
- "decompress_threads",
- "a value between 1 and 255");
- return false;
- }
-
- if (params->has_throttle_trigger_threshold &&
- (params->throttle_trigger_threshold < 1 ||
- params->throttle_trigger_threshold > 100)) {
- error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
- "throttle_trigger_threshold",
- "an integer in the range of 1 to 100");
- return false;
- }
-
- if (params->has_cpu_throttle_initial &&
- (params->cpu_throttle_initial < 1 ||
- params->cpu_throttle_initial > 99)) {
- error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
- "cpu_throttle_initial",
- "an integer in the range of 1 to 99");
- return false;
- }
-
- if (params->has_cpu_throttle_increment &&
- (params->cpu_throttle_increment < 1 ||
- params->cpu_throttle_increment > 99)) {
- error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
- "cpu_throttle_increment",
- "an integer in the range of 1 to 99");
- return false;
- }
-
- if (params->has_max_bandwidth && (params->max_bandwidth > SIZE_MAX)) {
- error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
- "max_bandwidth",
- "an integer in the range of 0 to "stringify(SIZE_MAX)
- " bytes/second");
- return false;
- }
-
- if (params->has_downtime_limit &&
- (params->downtime_limit > MAX_MIGRATE_DOWNTIME)) {
- error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
- "downtime_limit",
- "an integer in the range of 0 to "
- stringify(MAX_MIGRATE_DOWNTIME)" ms");
- return false;
- }
-
- /* x_checkpoint_delay is now always positive */
-
- if (params->has_multifd_channels && (params->multifd_channels < 1)) {
- error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
- "multifd_channels",
- "a value between 1 and 255");
- return false;
- }
-
- if (params->has_multifd_zlib_level &&
- (params->multifd_zlib_level > 9)) {
- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "multifd_zlib_level",
- "a value between 0 and 9");
- return false;
- }
-
- if (params->has_multifd_zstd_level &&
- (params->multifd_zstd_level > 20)) {
- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "multifd_zstd_level",
- "a value between 0 and 20");
- return false;
- }
-
- if (params->has_xbzrle_cache_size &&
- (params->xbzrle_cache_size < qemu_target_page_size() ||
- !is_power_of_2(params->xbzrle_cache_size))) {
- error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
- "xbzrle_cache_size",
- "a power of two no less than the target page size");
- return false;
- }
-
- if (params->has_max_cpu_throttle &&
- (params->max_cpu_throttle < params->cpu_throttle_initial ||
- params->max_cpu_throttle > 99)) {
- error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
- "max_cpu_throttle",
- "an integer in the range of cpu_throttle_initial to 99");
- return false;
- }
-
- if (params->has_announce_initial &&
- params->announce_initial > 100000) {
- error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
- "announce_initial",
- "a value between 0 and 100000");
- return false;
- }
- if (params->has_announce_max &&
- params->announce_max > 100000) {
- error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
- "announce_max",
- "a value between 0 and 100000");
- return false;
- }
- if (params->has_announce_rounds &&
- params->announce_rounds > 1000) {
- error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
- "announce_rounds",
- "a value between 0 and 1000");
- return false;
- }
- if (params->has_announce_step &&
- (params->announce_step < 1 ||
- params->announce_step > 10000)) {
- error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
- "announce_step",
- "a value between 0 and 10000");
- return false;
- }
-
- if (params->has_block_bitmap_mapping &&
- !check_dirty_bitmap_mig_alias_map(params->block_bitmap_mapping, errp)) {
- error_prepend(errp, "Invalid mapping given for block-bitmap-mapping: ");
- return false;
- }
-
- return true;
-}
-
-static void migrate_params_test_apply(MigrateSetParameters *params,
- MigrationParameters *dest)
-{
- *dest = migrate_get_current()->parameters;
-
- /* TODO use QAPI_CLONE() instead of duplicating it inline */
-
- if (params->has_compress_level) {
- dest->compress_level = params->compress_level;
- }
-
- if (params->has_compress_threads) {
- dest->compress_threads = params->compress_threads;
- }
-
- if (params->has_compress_wait_thread) {
- dest->compress_wait_thread = params->compress_wait_thread;
- }
-
- if (params->has_decompress_threads) {
- dest->decompress_threads = params->decompress_threads;
- }
-
- if (params->has_throttle_trigger_threshold) {
- dest->throttle_trigger_threshold = params->throttle_trigger_threshold;
- }
-
- if (params->has_cpu_throttle_initial) {
- dest->cpu_throttle_initial = params->cpu_throttle_initial;
- }
-
- if (params->has_cpu_throttle_increment) {
- dest->cpu_throttle_increment = params->cpu_throttle_increment;
- }
-
- if (params->has_cpu_throttle_tailslow) {
- dest->cpu_throttle_tailslow = params->cpu_throttle_tailslow;
- }
-
- if (params->has_tls_creds) {
- assert(params->tls_creds->type == QTYPE_QSTRING);
- dest->tls_creds = params->tls_creds->u.s;
- }
-
- if (params->has_tls_hostname) {
- assert(params->tls_hostname->type == QTYPE_QSTRING);
- dest->tls_hostname = params->tls_hostname->u.s;
- }
-
- if (params->has_max_bandwidth) {
- dest->max_bandwidth = params->max_bandwidth;
- }
-
- if (params->has_downtime_limit) {
- dest->downtime_limit = params->downtime_limit;
- }
-
- if (params->has_x_checkpoint_delay) {
- dest->x_checkpoint_delay = params->x_checkpoint_delay;
- }
-
- if (params->has_block_incremental) {
- dest->block_incremental = params->block_incremental;
- }
- if (params->has_multifd_channels) {
- dest->multifd_channels = params->multifd_channels;
- }
- if (params->has_multifd_compression) {
- dest->multifd_compression = params->multifd_compression;
- }
- if (params->has_xbzrle_cache_size) {
- dest->xbzrle_cache_size = params->xbzrle_cache_size;
- }
- if (params->has_max_postcopy_bandwidth) {
- dest->max_postcopy_bandwidth = params->max_postcopy_bandwidth;
- }
- if (params->has_max_cpu_throttle) {
- dest->max_cpu_throttle = params->max_cpu_throttle;
- }
- if (params->has_announce_initial) {
- dest->announce_initial = params->announce_initial;
- }
- if (params->has_announce_max) {
- dest->announce_max = params->announce_max;
- }
- if (params->has_announce_rounds) {
- dest->announce_rounds = params->announce_rounds;
- }
- if (params->has_announce_step) {
- dest->announce_step = params->announce_step;
- }
-
- if (params->has_block_bitmap_mapping) {
- dest->has_block_bitmap_mapping = true;
- dest->block_bitmap_mapping = params->block_bitmap_mapping;
- }
-}
-
-static void migrate_params_apply(MigrateSetParameters *params, Error **errp)
-{
- MigrationState *s = migrate_get_current();
-
- /* TODO use QAPI_CLONE() instead of duplicating it inline */
-
- if (params->has_compress_level) {
- s->parameters.compress_level = params->compress_level;
- }
-
- if (params->has_compress_threads) {
- s->parameters.compress_threads = params->compress_threads;
- }
-
- if (params->has_compress_wait_thread) {
- s->parameters.compress_wait_thread = params->compress_wait_thread;
- }
-
- if (params->has_decompress_threads) {
- s->parameters.decompress_threads = params->decompress_threads;
- }
-
- if (params->has_throttle_trigger_threshold) {
- s->parameters.throttle_trigger_threshold = params->throttle_trigger_threshold;
- }
-
- if (params->has_cpu_throttle_initial) {
- s->parameters.cpu_throttle_initial = params->cpu_throttle_initial;
- }
-
- if (params->has_cpu_throttle_increment) {
- s->parameters.cpu_throttle_increment = params->cpu_throttle_increment;
- }
-
- if (params->has_cpu_throttle_tailslow) {
- s->parameters.cpu_throttle_tailslow = params->cpu_throttle_tailslow;
- }
-
- if (params->has_tls_creds) {
- g_free(s->parameters.tls_creds);
- assert(params->tls_creds->type == QTYPE_QSTRING);
- s->parameters.tls_creds = g_strdup(params->tls_creds->u.s);
- }
-
- if (params->has_tls_hostname) {
- g_free(s->parameters.tls_hostname);
- assert(params->tls_hostname->type == QTYPE_QSTRING);
- s->parameters.tls_hostname = g_strdup(params->tls_hostname->u.s);
- }
-
- if (params->has_tls_authz) {
- g_free(s->parameters.tls_authz);
- assert(params->tls_authz->type == QTYPE_QSTRING);
- s->parameters.tls_authz = g_strdup(params->tls_authz->u.s);
- }
-
- if (params->has_max_bandwidth) {
- s->parameters.max_bandwidth = params->max_bandwidth;
- if (s->to_dst_file && !migration_in_postcopy()) {
- qemu_file_set_rate_limit(s->to_dst_file,
- s->parameters.max_bandwidth / XFER_LIMIT_RATIO);
- }
- }
-
- if (params->has_downtime_limit) {
- s->parameters.downtime_limit = params->downtime_limit;
- }
-
- if (params->has_x_checkpoint_delay) {
- s->parameters.x_checkpoint_delay = params->x_checkpoint_delay;
- if (migration_in_colo_state()) {
- colo_checkpoint_notify(s);
- }
- }
-
- if (params->has_block_incremental) {
- s->parameters.block_incremental = params->block_incremental;
- }
- if (params->has_multifd_channels) {
- s->parameters.multifd_channels = params->multifd_channels;
- }
- if (params->has_multifd_compression) {
- s->parameters.multifd_compression = params->multifd_compression;
- }
- if (params->has_xbzrle_cache_size) {
- s->parameters.xbzrle_cache_size = params->xbzrle_cache_size;
- xbzrle_cache_resize(params->xbzrle_cache_size, errp);
- }
- if (params->has_max_postcopy_bandwidth) {
- s->parameters.max_postcopy_bandwidth = params->max_postcopy_bandwidth;
- if (s->to_dst_file && migration_in_postcopy()) {
- qemu_file_set_rate_limit(s->to_dst_file,
- s->parameters.max_postcopy_bandwidth / XFER_LIMIT_RATIO);
- }
- }
- if (params->has_max_cpu_throttle) {
- s->parameters.max_cpu_throttle = params->max_cpu_throttle;
- }
- if (params->has_announce_initial) {
- s->parameters.announce_initial = params->announce_initial;
- }
- if (params->has_announce_max) {
- s->parameters.announce_max = params->announce_max;
- }
- if (params->has_announce_rounds) {
- s->parameters.announce_rounds = params->announce_rounds;
- }
- if (params->has_announce_step) {
- s->parameters.announce_step = params->announce_step;
- }
-
- if (params->has_block_bitmap_mapping) {
- qapi_free_BitmapMigrationNodeAliasList(
- s->parameters.block_bitmap_mapping);
-
- s->parameters.has_block_bitmap_mapping = true;
- s->parameters.block_bitmap_mapping =
- QAPI_CLONE(BitmapMigrationNodeAliasList,
- params->block_bitmap_mapping);
- }
-}
-
-void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp)
-{
- MigrationParameters tmp;
-
- /* TODO Rewrite "" to null instead */
- if (params->has_tls_creds
- && params->tls_creds->type == QTYPE_QNULL) {
- qobject_unref(params->tls_creds->u.n);
- params->tls_creds->type = QTYPE_QSTRING;
- params->tls_creds->u.s = strdup("");
- }
- /* TODO Rewrite "" to null instead */
- if (params->has_tls_hostname
- && params->tls_hostname->type == QTYPE_QNULL) {
- qobject_unref(params->tls_hostname->u.n);
- params->tls_hostname->type = QTYPE_QSTRING;
- params->tls_hostname->u.s = strdup("");
- }
-
- migrate_params_test_apply(params, &tmp);
-
- if (!migrate_params_check(&tmp, errp)) {
- /* Invalid parameter */
- return;
- }
-
- migrate_params_apply(params, errp);
-}
-
-
void qmp_migrate_start_postcopy(Error **errp)
{
MigrationState *s = migrate_get_current();
@@ -1736,61 +1358,31 @@ void migrate_set_state(int *state, int old_state, int new_state)
}
}
-static MigrationCapabilityStatus *migrate_cap_add(MigrationCapability index,
- bool state)
-{
- MigrationCapabilityStatus *cap;
-
- cap = g_new0(MigrationCapabilityStatus, 1);
- cap->capability = index;
- cap->state = state;
-
- return cap;
-}
-
-void migrate_set_block_enabled(bool value, Error **errp)
-{
- MigrationCapabilityStatusList *cap = NULL;
-
- QAPI_LIST_PREPEND(cap, migrate_cap_add(MIGRATION_CAPABILITY_BLOCK, value));
- qmp_migrate_set_capabilities(cap, errp);
- qapi_free_MigrationCapabilityStatusList(cap);
-}
-
-static void migrate_set_block_incremental(MigrationState *s, bool value)
-{
- s->parameters.block_incremental = value;
-}
-
-static void block_cleanup_parameters(MigrationState *s)
-{
- if (s->must_remove_block_options) {
- /* setting to false can never fail */
- migrate_set_block_enabled(false, &error_abort);
- migrate_set_block_incremental(s, false);
- s->must_remove_block_options = false;
- }
-}
-
static void migrate_fd_cleanup(MigrationState *s)
{
- qemu_bh_delete(s->cleanup_bh);
- s->cleanup_bh = NULL;
+ MigrationEventType type;
+
+ g_free(s->hostname);
+ s->hostname = NULL;
+ json_writer_free(s->vmdesc);
+ s->vmdesc = NULL;
qemu_savevm_state_cleanup();
+ close_return_path_on_source(s);
+
if (s->to_dst_file) {
QEMUFile *tmp;
trace_migrate_fd_cleanup();
- qemu_mutex_unlock_iothread();
+ bql_unlock();
if (s->migration_thread_running) {
qemu_thread_join(&s->thread);
s->migration_thread_running = false;
}
- qemu_mutex_lock_iothread();
+ bql_lock();
- multifd_save_cleanup();
+ multifd_send_shutdown();
qemu_mutex_lock(&s->qemu_file_lock);
tmp = s->to_dst_file;
s->to_dst_file = NULL;
@@ -1803,7 +1395,7 @@ static void migrate_fd_cleanup(MigrationState *s)
qemu_fclose(tmp);
}
- assert(!migration_is_active(s));
+ assert(!migration_is_active());
if (s->state == MIGRATION_STATUS_CANCELLING) {
migrate_set_state(&s->state, MIGRATION_STATUS_CANCELLING,
@@ -1814,26 +1406,16 @@ static void migrate_fd_cleanup(MigrationState *s)
/* It is used on info migrate. We can't free it */
error_report_err(error_copy(s->error));
}
- notifier_list_notify(&migration_state_notifiers, s);
- block_cleanup_parameters(s);
+ type = migration_has_failed(s) ? MIG_EVENT_PRECOPY_FAILED :
+ MIG_EVENT_PRECOPY_DONE;
+ migration_call_notifiers(s, type, NULL);
+ block_cleanup_parameters();
yank_unregister_instance(MIGRATION_YANK_INSTANCE);
}
-static void migrate_fd_cleanup_schedule(MigrationState *s)
-{
- /*
- * Ref the state for bh, because it may be called when
- * there're already no other refs
- */
- object_ref(OBJECT(s));
- qemu_bh_schedule(s->cleanup_bh);
-}
-
static void migrate_fd_cleanup_bh(void *opaque)
{
- MigrationState *s = opaque;
- migrate_fd_cleanup(s);
- object_unref(OBJECT(s));
+ migrate_fd_cleanup(opaque);
}
void migrate_set_error(MigrationState *s, const Error *error)
@@ -1844,6 +1426,13 @@ void migrate_set_error(MigrationState *s, const Error *error)
}
}
+bool migrate_has_error(MigrationState *s)
+{
+ /* The lock is not helpful here, but still follow the rule */
+ QEMU_LOCK_GUARD(&s->error_mutex);
+ return qatomic_read(&s->error);
+}
+
static void migrate_error_free(MigrationState *s)
{
QEMU_LOCK_GUARD(&s->error_mutex);
@@ -1853,7 +1442,7 @@ static void migrate_error_free(MigrationState *s)
}
}
-void migrate_fd_error(MigrationState *s, const Error *error)
+static void migrate_fd_error(MigrationState *s, const Error *error)
{
trace_migrate_fd_error(error_get_pretty(error));
assert(s->to_dst_file == NULL);
@@ -1865,7 +1454,7 @@ void migrate_fd_error(MigrationState *s, const Error *error)
static void migrate_fd_cancel(MigrationState *s)
{
int old_state ;
- QEMUFile *f = migrate_get_current()->to_dst_file;
+
trace_migrate_fd_cancel();
WITH_QEMU_LOCK_GUARD(&s->qemu_file_lock) {
@@ -1877,7 +1466,7 @@ static void migrate_fd_cancel(MigrationState *s)
do {
old_state = s->state;
- if (!migration_is_running(old_state)) {
+ if (!migration_is_running()) {
break;
}
/* If the migration is paused, kick it out of the pause */
@@ -1891,16 +1480,18 @@ static void migrate_fd_cancel(MigrationState *s)
* If we're unlucky the migration code might be stuck somewhere in a
* send/write while the network has failed and is waiting to timeout;
* if we've got shutdown(2) available then we can force it to quit.
- * The outgoing qemu file gets closed in migrate_fd_cleanup that is
- * called in a bh, so there is no race against this cancel.
*/
- if (s->state == MIGRATION_STATUS_CANCELLING && f) {
- qemu_file_shutdown(f);
+ if (s->state == MIGRATION_STATUS_CANCELLING) {
+ WITH_QEMU_LOCK_GUARD(&s->qemu_file_lock) {
+ if (s->to_dst_file) {
+ qemu_file_shutdown(s->to_dst_file);
+ }
+ }
}
if (s->state == MIGRATION_STATUS_CANCELLING && s->block_inactive) {
Error *local_err = NULL;
- bdrv_invalidate_cache_all(&local_err);
+ bdrv_activate_all(&local_err);
if (local_err) {
error_report_err(local_err);
} else {
@@ -1909,24 +1500,39 @@ static void migrate_fd_cancel(MigrationState *s)
}
}
-void add_migration_state_change_notifier(Notifier *notify)
+void migration_add_notifier_mode(NotifierWithReturn *notify,
+ MigrationNotifyFunc func, MigMode mode)
{
- notifier_list_add(&migration_state_notifiers, notify);
+ notify->notify = (NotifierWithReturnFunc)func;
+ notifier_with_return_list_add(&migration_state_notifiers[mode], notify);
}
-void remove_migration_state_change_notifier(Notifier *notify)
+void migration_add_notifier(NotifierWithReturn *notify,
+ MigrationNotifyFunc func)
{
- notifier_remove(notify);
+ migration_add_notifier_mode(notify, func, MIG_MODE_NORMAL);
}
-bool migration_in_setup(MigrationState *s)
+void migration_remove_notifier(NotifierWithReturn *notify)
{
- return s->state == MIGRATION_STATUS_SETUP;
+ if (notify->notify) {
+ notifier_with_return_remove(notify);
+ notify->notify = NULL;
+ }
}
-bool migration_has_finished(MigrationState *s)
+int migration_call_notifiers(MigrationState *s, MigrationEventType type,
+ Error **errp)
{
- return s->state == MIGRATION_STATUS_COMPLETED;
+ MigMode mode = s->parameters.mode;
+ MigrationEvent e;
+ int ret;
+
+ e.type = type;
+ ret = notifier_with_return_list_notify(&migration_state_notifiers[mode],
+ &e, errp);
+ assert(!ret || type == MIG_EVENT_PRECOPY_SETUP);
+ return ret;
}
bool migration_has_failed(MigrationState *s)
@@ -1949,9 +1555,15 @@ bool migration_in_postcopy(void)
}
}
-bool migration_in_postcopy_after_devices(MigrationState *s)
+bool migration_postcopy_is_alive(int state)
{
- return migration_in_postcopy() && s->postcopy_after_devices;
+ switch (state) {
+ case MIGRATION_STATUS_POSTCOPY_ACTIVE:
+ case MIGRATION_STATUS_POSTCOPY_RECOVER:
+ return true;
+ default:
+ return false;
+ }
}
bool migration_in_incoming_postcopy(void)
@@ -1961,12 +1573,17 @@ bool migration_in_incoming_postcopy(void)
return ps >= POSTCOPY_INCOMING_DISCARD && ps < POSTCOPY_INCOMING_END;
}
-bool migration_in_bg_snapshot(void)
+bool migration_incoming_postcopy_advised(void)
{
- MigrationState *s = migrate_get_current();
+ PostcopyState ps = postcopy_state_get();
+
+ return ps >= POSTCOPY_INCOMING_ADVISE && ps < POSTCOPY_INCOMING_END;
+}
+bool migration_in_bg_snapshot(void)
+{
return migrate_background_snapshot() &&
- migration_is_setup_or_active(s->state);
+ migration_is_setup_or_active();
}
bool migration_is_idle(void)
@@ -1999,72 +1616,185 @@ bool migration_is_idle(void)
return false;
}
-bool migration_is_active(MigrationState *s)
+bool migration_is_active(void)
{
+ MigrationState *s = current_migration;
+
return (s->state == MIGRATION_STATUS_ACTIVE ||
s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE);
}
-void migrate_init(MigrationState *s)
+bool migration_is_device(void)
+{
+ MigrationState *s = current_migration;
+
+ return s->state == MIGRATION_STATUS_DEVICE;
+}
+
+bool migration_thread_is_self(void)
{
+ MigrationState *s = current_migration;
+
+ return qemu_thread_is_self(&s->thread);
+}
+
+bool migrate_mode_is_cpr(MigrationState *s)
+{
+ return s->parameters.mode == MIG_MODE_CPR_REBOOT;
+}
+
+int migrate_init(MigrationState *s, Error **errp)
+{
+ int ret;
+
+ ret = qemu_savevm_state_prepare(errp);
+ if (ret) {
+ return ret;
+ }
+
/*
* Reinitialise all migration state, except
* parameters/capabilities that the user set, and
* locks.
*/
- s->cleanup_bh = 0;
- s->vm_start_bh = 0;
s->to_dst_file = NULL;
s->state = MIGRATION_STATUS_NONE;
s->rp_state.from_dst_file = NULL;
- s->rp_state.error = false;
s->mbps = 0.0;
s->pages_per_second = 0.0;
s->downtime = 0;
s->expected_downtime = 0;
s->setup_time = 0;
s->start_postcopy = false;
- s->postcopy_after_devices = false;
s->migration_thread_running = false;
error_free(s->error);
s->error = NULL;
- s->hostname = NULL;
+ s->vmdesc = NULL;
migrate_set_state(&s->state, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP);
s->start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
s->total_time = 0;
- s->vm_was_running = false;
+ s->vm_old_state = -1;
s->iteration_initial_bytes = 0;
s->threshold_size = 0;
+ s->switchover_acked = false;
+ s->rdma_migration = false;
+ /*
+ * set mig_stats memory to zero for a new migration
+ */
+ memset(&mig_stats, 0, sizeof(mig_stats));
+ migration_reset_vfio_bytes_transferred();
+
+ return 0;
+}
+
+static bool is_busy(Error **reasonp, Error **errp)
+{
+ ERRP_GUARD();
+
+ /* Snapshots are similar to migrations, so check RUN_STATE_SAVE_VM too. */
+ if (runstate_check(RUN_STATE_SAVE_VM) || !migration_is_idle()) {
+ error_propagate_prepend(errp, *reasonp,
+ "disallowing migration blocker "
+ "(migration/snapshot in progress) for: ");
+ *reasonp = NULL;
+ return true;
+ }
+ return false;
}
-int migrate_add_blocker(Error *reason, Error **errp)
+static bool is_only_migratable(Error **reasonp, Error **errp, int modes)
{
- if (only_migratable) {
- error_propagate_prepend(errp, error_copy(reason),
+ ERRP_GUARD();
+
+ if (only_migratable && (modes & BIT(MIG_MODE_NORMAL))) {
+ error_propagate_prepend(errp, *reasonp,
"disallowing migration blocker "
"(--only-migratable) for: ");
- return -EACCES;
+ *reasonp = NULL;
+ return true;
+ }
+ return false;
+}
+
+static int get_modes(MigMode mode, va_list ap)
+{
+ int modes = 0;
+
+ while (mode != -1 && mode != MIG_MODE_ALL) {
+ assert(mode >= MIG_MODE_NORMAL && mode < MIG_MODE__MAX);
+ modes |= BIT(mode);
+ mode = va_arg(ap, MigMode);
+ }
+ if (mode == MIG_MODE_ALL) {
+ modes = BIT(MIG_MODE__MAX) - 1;
}
+ return modes;
+}
- if (migration_is_idle()) {
- migration_blockers = g_slist_prepend(migration_blockers, reason);
- return 0;
+static int add_blockers(Error **reasonp, Error **errp, int modes)
+{
+ for (MigMode mode = 0; mode < MIG_MODE__MAX; mode++) {
+ if (modes & BIT(mode)) {
+ migration_blockers[mode] = g_slist_prepend(migration_blockers[mode],
+ *reasonp);
+ }
}
+ return 0;
+}
+
+int migrate_add_blocker(Error **reasonp, Error **errp)
+{
+ return migrate_add_blocker_modes(reasonp, errp, MIG_MODE_ALL);
+}
+
+int migrate_add_blocker_normal(Error **reasonp, Error **errp)
+{
+ return migrate_add_blocker_modes(reasonp, errp, MIG_MODE_NORMAL, -1);
+}
+
+int migrate_add_blocker_modes(Error **reasonp, Error **errp, MigMode mode, ...)
+{
+ int modes;
+ va_list ap;
+
+ va_start(ap, mode);
+ modes = get_modes(mode, ap);
+ va_end(ap);
+
+ if (is_only_migratable(reasonp, errp, modes)) {
+ return -EACCES;
+ } else if (is_busy(reasonp, errp)) {
+ return -EBUSY;
+ }
+ return add_blockers(reasonp, errp, modes);
+}
+
+int migrate_add_blocker_internal(Error **reasonp, Error **errp)
+{
+ int modes = BIT(MIG_MODE__MAX) - 1;
- error_propagate_prepend(errp, error_copy(reason),
- "disallowing migration blocker "
- "(migration in progress) for: ");
- return -EBUSY;
+ if (is_busy(reasonp, errp)) {
+ return -EBUSY;
+ }
+ return add_blockers(reasonp, errp, modes);
}
-void migrate_del_blocker(Error *reason)
+void migrate_del_blocker(Error **reasonp)
{
- migration_blockers = g_slist_remove(migration_blockers, reason);
+ if (*reasonp) {
+ for (MigMode mode = 0; mode < MIG_MODE__MAX; mode++) {
+ migration_blockers[mode] = g_slist_remove(migration_blockers[mode],
+ *reasonp);
+ }
+ error_free(*reasonp);
+ *reasonp = NULL;
+ }
}
-void qmp_migrate_incoming(const char *uri, Error **errp)
+void qmp_migrate_incoming(const char *uri, bool has_channels,
+ MigrationChannelList *channels, Error **errp)
{
Error *local_err = NULL;
static bool once = true;
@@ -2082,7 +1812,7 @@ void qmp_migrate_incoming(const char *uri, Error **errp)
return;
}
- qemu_start_incoming_migration(uri, &local_err);
+ qemu_start_incoming_migration(uri, has_channels, channels, &local_err);
if (local_err) {
yank_unregister_instance(MIGRATION_YANK_INSTANCE);
@@ -2110,44 +1840,51 @@ void qmp_migrate_recover(const char *uri, Error **errp)
return;
}
- if (qatomic_cmpxchg(&mis->postcopy_recover_triggered,
- false, true) == true) {
- error_setg(errp, "Migrate recovery is triggered already");
- return;
- }
+ /* If there's an existing transport, release it */
+ migration_incoming_transport_cleanup(mis);
/*
* Note that this call will never start a real migration; it will
* only re-setup the migration stream and poke existing migration
* to continue using that newly established channel.
*/
- qemu_start_incoming_migration(uri, errp);
-
- /* Safe to dereference with the assert above */
- if (*errp) {
- /* Reset the flag so user could still retry */
- qatomic_set(&mis->postcopy_recover_triggered, false);
- }
+ qemu_start_incoming_migration(uri, false, NULL, errp);
}
void qmp_migrate_pause(Error **errp)
{
MigrationState *ms = migrate_get_current();
MigrationIncomingState *mis = migration_incoming_get_current();
- int ret;
+ int ret = 0;
- if (ms->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
+ if (migration_postcopy_is_alive(ms->state)) {
/* Source side, during postcopy */
+ Error *error = NULL;
+
+ /* Tell the core migration that we're pausing */
+ error_setg(&error, "Postcopy migration is paused by the user");
+ migrate_set_error(ms, error);
+ error_free(error);
+
qemu_mutex_lock(&ms->qemu_file_lock);
- ret = qemu_file_shutdown(ms->to_dst_file);
+ if (ms->to_dst_file) {
+ ret = qemu_file_shutdown(ms->to_dst_file);
+ }
qemu_mutex_unlock(&ms->qemu_file_lock);
if (ret) {
error_setg(errp, "Failed to pause source migration");
}
+
+ /*
+ * Kick the migration thread out of any waiting windows (on behalf
+ * of the rp thread).
+ */
+ migration_rp_kick(ms);
+
return;
}
- if (mis->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
+ if (migration_postcopy_is_alive(mis->state)) {
ret = qemu_file_shutdown(mis->from_src_file);
if (ret) {
error_setg(errp, "Failed to pause destination migration");
@@ -2156,17 +1893,19 @@ void qmp_migrate_pause(Error **errp)
}
error_setg(errp, "migrate-pause is currently only supported "
- "during postcopy-active state");
+ "during postcopy-active or postcopy-recover state");
}
bool migration_is_blocked(Error **errp)
{
+ GSList *blockers = migration_blockers[migrate_mode()];
+
if (qemu_savevm_state_blocked(errp)) {
return true;
}
- if (migration_blockers) {
- error_propagate(errp, error_copy(migration_blockers->data));
+ if (blockers) {
+ error_propagate(errp, error_copy(blockers->data));
return true;
}
@@ -2177,7 +1916,15 @@ bool migration_is_blocked(Error **errp)
static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc,
bool resume, Error **errp)
{
- Error *local_err = NULL;
+ if (blk_inc) {
+ warn_report("parameter 'inc' is deprecated;"
+ " use blockdev-mirror with NBD instead");
+ }
+
+ if (blk) {
+ warn_report("parameter 'blk' is deprecated;"
+ " use blockdev-mirror with NBD instead");
+ }
if (resume) {
if (s->state != MIGRATION_STATUS_POSTCOPY_PAUSED) {
@@ -2208,7 +1955,7 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc,
return true;
}
- if (migration_is_running(s->state)) {
+ if (migration_is_running()) {
error_setg(errp, QERR_MIGRATION_ACTIVE);
return false;
}
@@ -2224,88 +1971,153 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc,
return false;
}
+ if (kvm_hwpoisoned_mem()) {
+ error_setg(errp, "Can't migrate this vm with hardware poisoned memory, "
+ "please reboot the vm and try again");
+ return false;
+ }
+
if (migration_is_blocked(errp)) {
return false;
}
+ if (migrate_mapped_ram()) {
+ if (migrate_tls()) {
+ error_setg(errp, "Cannot use TLS with mapped-ram");
+ return false;
+ }
+
+ if (migrate_multifd_compression()) {
+ error_setg(errp, "Cannot use compression with mapped-ram");
+ return false;
+ }
+ }
+
+ if (migrate_mode_is_cpr(s)) {
+ const char *conflict = NULL;
+
+ if (migrate_postcopy()) {
+ conflict = "postcopy";
+ } else if (migrate_background_snapshot()) {
+ conflict = "background snapshot";
+ } else if (migrate_colo()) {
+ conflict = "COLO";
+ }
+
+ if (conflict) {
+ error_setg(errp, "Cannot use %s with CPR", conflict);
+ return false;
+ }
+ }
+
if (blk || blk_inc) {
- if (migrate_colo_enabled()) {
+ if (migrate_colo()) {
error_setg(errp, "No disk migration is required in COLO mode");
return false;
}
- if (migrate_use_block() || migrate_use_block_incremental()) {
+ if (migrate_block() || migrate_block_incremental()) {
error_setg(errp, "Command options are incompatible with "
"current migration capabilities");
return false;
}
- migrate_set_block_enabled(true, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
+ if (!migrate_cap_set(MIGRATION_CAPABILITY_BLOCK, true, errp)) {
return false;
}
s->must_remove_block_options = true;
}
if (blk_inc) {
- migrate_set_block_incremental(s, true);
+ migrate_set_block_incremental(true);
}
- migrate_init(s);
- /*
- * set ram_counters memory to zero for a
- * new migration
- */
- memset(&ram_counters, 0, sizeof(ram_counters));
+ if (migrate_init(s, errp)) {
+ return false;
+ }
return true;
}
-void qmp_migrate(const char *uri, bool has_blk, bool blk,
+void qmp_migrate(const char *uri, bool has_channels,
+ MigrationChannelList *channels, bool has_blk, bool blk,
bool has_inc, bool inc, bool has_detach, bool detach,
bool has_resume, bool resume, Error **errp)
{
+ bool resume_requested;
Error *local_err = NULL;
MigrationState *s = migrate_get_current();
- const char *p = NULL;
+ g_autoptr(MigrationChannel) channel = NULL;
+ MigrationAddress *addr = NULL;
+ /*
+ * Having preliminary checks for uri and channel
+ */
+ if (!uri == !channels) {
+ error_setg(errp, "need either 'uri' or 'channels' argument");
+ return;
+ }
+
+ if (channels) {
+ /* To verify that Migrate channel list has only item */
+ if (channels->next) {
+ error_setg(errp, "Channel list has more than one entries");
+ return;
+ }
+ addr = channels->value->addr;
+ }
+
+ if (uri) {
+ /* caller uses the old URI syntax */
+ if (!migrate_uri_parse(uri, &channel, errp)) {
+ return;
+ }
+ addr = channel->addr;
+ }
+
+ /* transport mechanism not suitable for migration? */
+ if (!migration_channels_and_transport_compatible(addr, errp)) {
+ return;
+ }
+
+ resume_requested = has_resume && resume;
if (!migrate_prepare(s, has_blk && blk, has_inc && inc,
- has_resume && resume, errp)) {
+ resume_requested, errp)) {
/* Error detected, put into errp */
return;
}
- if (!(has_resume && resume)) {
+ if (!resume_requested) {
if (!yank_register_instance(MIGRATION_YANK_INSTANCE, errp)) {
return;
}
}
- if (strstart(uri, "tcp:", &p) ||
- strstart(uri, "unix:", NULL) ||
- strstart(uri, "vsock:", NULL)) {
- socket_start_outgoing_migration(s, p ? p : uri, &local_err);
+ if (addr->transport == MIGRATION_ADDRESS_TYPE_SOCKET) {
+ SocketAddress *saddr = &addr->u.socket;
+ if (saddr->type == SOCKET_ADDRESS_TYPE_INET ||
+ saddr->type == SOCKET_ADDRESS_TYPE_UNIX ||
+ saddr->type == SOCKET_ADDRESS_TYPE_VSOCK) {
+ socket_start_outgoing_migration(s, saddr, &local_err);
+ } else if (saddr->type == SOCKET_ADDRESS_TYPE_FD) {
+ fd_start_outgoing_migration(s, saddr->u.fd.str, &local_err);
+ }
#ifdef CONFIG_RDMA
- } else if (strstart(uri, "rdma:", &p)) {
- rdma_start_outgoing_migration(s, p, &local_err);
+ } else if (addr->transport == MIGRATION_ADDRESS_TYPE_RDMA) {
+ rdma_start_outgoing_migration(s, &addr->u.rdma, &local_err);
#endif
- } else if (strstart(uri, "exec:", &p)) {
- exec_start_outgoing_migration(s, p, &local_err);
- } else if (strstart(uri, "fd:", &p)) {
- fd_start_outgoing_migration(s, p, &local_err);
+ } else if (addr->transport == MIGRATION_ADDRESS_TYPE_EXEC) {
+ exec_start_outgoing_migration(s, addr->u.exec.args, &local_err);
+ } else if (addr->transport == MIGRATION_ADDRESS_TYPE_FILE) {
+ file_start_outgoing_migration(s, &addr->u.file, &local_err);
} else {
- if (!(has_resume && resume)) {
- yank_unregister_instance(MIGRATION_YANK_INSTANCE);
- }
- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "uri",
+ error_setg(&local_err, QERR_INVALID_PARAMETER_VALUE, "uri",
"a valid migration protocol");
migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
MIGRATION_STATUS_FAILED);
- block_cleanup_parameters(s);
- return;
+ block_cleanup_parameters();
}
if (local_err) {
- if (!(has_resume && resume)) {
+ if (!resume_requested) {
yank_unregister_instance(MIGRATION_YANK_INSTANCE);
}
migrate_fd_error(s, local_err);
@@ -2316,7 +2128,7 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
void qmp_migrate_cancel(Error **errp)
{
- migration_cancel();
+ migration_cancel(NULL);
}
void qmp_migrate_continue(MigrationStatus state, Error **errp)
@@ -2330,263 +2142,26 @@ void qmp_migrate_continue(MigrationStatus state, Error **errp)
qemu_sem_post(&s->pause_sem);
}
-bool migrate_release_ram(void)
+int migration_rp_wait(MigrationState *s)
{
- MigrationState *s;
-
- s = migrate_get_current();
-
- return s->enabled_capabilities[MIGRATION_CAPABILITY_RELEASE_RAM];
-}
-
-bool migrate_postcopy_ram(void)
-{
- MigrationState *s;
-
- s = migrate_get_current();
-
- return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM];
-}
-
-bool migrate_postcopy(void)
-{
- return migrate_postcopy_ram() || migrate_dirty_bitmaps();
-}
-
-bool migrate_auto_converge(void)
-{
- MigrationState *s;
-
- s = migrate_get_current();
-
- return s->enabled_capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE];
-}
-
-bool migrate_zero_blocks(void)
-{
- MigrationState *s;
-
- s = migrate_get_current();
-
- return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS];
-}
-
-bool migrate_postcopy_blocktime(void)
-{
- MigrationState *s;
-
- s = migrate_get_current();
-
- return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME];
-}
-
-bool migrate_use_compression(void)
-{
- MigrationState *s;
-
- s = migrate_get_current();
-
- return s->enabled_capabilities[MIGRATION_CAPABILITY_COMPRESS];
-}
-
-int migrate_compress_level(void)
-{
- MigrationState *s;
-
- s = migrate_get_current();
-
- return s->parameters.compress_level;
-}
-
-int migrate_compress_threads(void)
-{
- MigrationState *s;
-
- s = migrate_get_current();
-
- return s->parameters.compress_threads;
-}
-
-int migrate_compress_wait_thread(void)
-{
- MigrationState *s;
-
- s = migrate_get_current();
-
- return s->parameters.compress_wait_thread;
-}
-
-int migrate_decompress_threads(void)
-{
- MigrationState *s;
-
- s = migrate_get_current();
-
- return s->parameters.decompress_threads;
-}
-
-bool migrate_dirty_bitmaps(void)
-{
- MigrationState *s;
-
- s = migrate_get_current();
-
- return s->enabled_capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS];
-}
-
-bool migrate_ignore_shared(void)
-{
- MigrationState *s;
-
- s = migrate_get_current();
-
- return s->enabled_capabilities[MIGRATION_CAPABILITY_X_IGNORE_SHARED];
-}
-
-bool migrate_validate_uuid(void)
-{
- MigrationState *s;
-
- s = migrate_get_current();
-
- return s->enabled_capabilities[MIGRATION_CAPABILITY_VALIDATE_UUID];
-}
-
-bool migrate_use_events(void)
-{
- MigrationState *s;
-
- s = migrate_get_current();
-
- return s->enabled_capabilities[MIGRATION_CAPABILITY_EVENTS];
-}
-
-bool migrate_use_multifd(void)
-{
- MigrationState *s;
-
- s = migrate_get_current();
-
- return s->enabled_capabilities[MIGRATION_CAPABILITY_MULTIFD];
-}
-
-bool migrate_pause_before_switchover(void)
-{
- MigrationState *s;
-
- s = migrate_get_current();
-
- return s->enabled_capabilities[
- MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER];
-}
-
-int migrate_multifd_channels(void)
-{
- MigrationState *s;
-
- s = migrate_get_current();
-
- return s->parameters.multifd_channels;
-}
-
-MultiFDCompression migrate_multifd_compression(void)
-{
- MigrationState *s;
-
- s = migrate_get_current();
-
- return s->parameters.multifd_compression;
-}
-
-int migrate_multifd_zlib_level(void)
-{
- MigrationState *s;
-
- s = migrate_get_current();
-
- return s->parameters.multifd_zlib_level;
-}
-
-int migrate_multifd_zstd_level(void)
-{
- MigrationState *s;
-
- s = migrate_get_current();
-
- return s->parameters.multifd_zstd_level;
-}
-
-int migrate_use_xbzrle(void)
-{
- MigrationState *s;
-
- s = migrate_get_current();
-
- return s->enabled_capabilities[MIGRATION_CAPABILITY_XBZRLE];
-}
-
-uint64_t migrate_xbzrle_cache_size(void)
-{
- MigrationState *s;
-
- s = migrate_get_current();
-
- return s->parameters.xbzrle_cache_size;
-}
-
-static int64_t migrate_max_postcopy_bandwidth(void)
-{
- MigrationState *s;
-
- s = migrate_get_current();
-
- return s->parameters.max_postcopy_bandwidth;
-}
-
-bool migrate_use_block(void)
-{
- MigrationState *s;
-
- s = migrate_get_current();
-
- return s->enabled_capabilities[MIGRATION_CAPABILITY_BLOCK];
-}
-
-bool migrate_use_return_path(void)
-{
- MigrationState *s;
-
- s = migrate_get_current();
-
- return s->enabled_capabilities[MIGRATION_CAPABILITY_RETURN_PATH];
-}
-
-bool migrate_use_block_incremental(void)
-{
- MigrationState *s;
-
- s = migrate_get_current();
-
- return s->parameters.block_incremental;
-}
+ /* If migration has failure already, ignore the wait */
+ if (migrate_has_error(s)) {
+ return -1;
+ }
-bool migrate_background_snapshot(void)
-{
- MigrationState *s;
+ qemu_sem_wait(&s->rp_state.rp_sem);
- s = migrate_get_current();
+ /* After wait, double check that there's no failure */
+ if (migrate_has_error(s)) {
+ return -1;
+ }
- return s->enabled_capabilities[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT];
+ return 0;
}
-/* migration thread support */
-/*
- * Something bad happened to the RP stream, mark an error
- * The caller shall print or trace something to indicate why
- */
-static void mark_source_rp_bad(MigrationState *s)
+void migration_rp_kick(MigrationState *s)
{
- s->rp_state.error = true;
+ qemu_sem_post(&s->rp_state.rp_sem);
}
static struct rp_cmd_args {
@@ -2600,6 +2175,7 @@ static struct rp_cmd_args {
[MIG_RP_MSG_REQ_PAGES_ID] = { .len = -1, .name = "REQ_PAGES_ID" },
[MIG_RP_MSG_RECV_BITMAP] = { .len = -1, .name = "RECV_BITMAP" },
[MIG_RP_MSG_RESUME_ACK] = { .len = 4, .name = "RESUME_ACK" },
+ [MIG_RP_MSG_SWITCHOVER_ACK] = { .len = 0, .name = "SWITCHOVER_ACK" },
[MIG_RP_MSG_MAX] = { .len = -1, .name = "MAX" },
};
@@ -2608,10 +2184,11 @@ static struct rp_cmd_args {
* We're allowed to send more than requested (e.g. to round to our page size)
* and we don't need to send pages that have already been sent.
*/
-static void migrate_handle_rp_req_pages(MigrationState *ms, const char* rbname,
- ram_addr_t start, size_t len)
+static void
+migrate_handle_rp_req_pages(MigrationState *ms, const char* rbname,
+ ram_addr_t start, size_t len, Error **errp)
{
- long our_host_ps = qemu_real_host_page_size;
+ long our_host_ps = qemu_real_host_page_size();
trace_migrate_handle_rp_req_pages(rbname, start, len);
@@ -2619,52 +2196,39 @@ static void migrate_handle_rp_req_pages(MigrationState *ms, const char* rbname,
* Since we currently insist on matching page sizes, just sanity check
* we're being asked for whole host pages.
*/
- if (start & (our_host_ps - 1) ||
- (len & (our_host_ps - 1))) {
- error_report("%s: Misaligned page request, start: " RAM_ADDR_FMT
- " len: %zd", __func__, start, len);
- mark_source_rp_bad(ms);
+ if (!QEMU_IS_ALIGNED(start, our_host_ps) ||
+ !QEMU_IS_ALIGNED(len, our_host_ps)) {
+ error_setg(errp, "MIG_RP_MSG_REQ_PAGES: Misaligned page request, start:"
+ RAM_ADDR_FMT " len: %zd", start, len);
return;
}
- if (ram_save_queue_pages(rbname, start, len)) {
- mark_source_rp_bad(ms);
- }
-}
-
-/* Return true to retry, false to quit */
-static bool postcopy_pause_return_path_thread(MigrationState *s)
-{
- trace_postcopy_pause_return_path();
-
- qemu_sem_wait(&s->postcopy_pause_rp_sem);
-
- trace_postcopy_pause_return_path_continued();
-
- return true;
+ ram_save_queue_pages(rbname, start, len, errp);
}
-static int migrate_handle_rp_recv_bitmap(MigrationState *s, char *block_name)
+static bool migrate_handle_rp_recv_bitmap(MigrationState *s, char *block_name,
+ Error **errp)
{
RAMBlock *block = qemu_ram_block_by_name(block_name);
if (!block) {
- error_report("%s: invalid block name '%s'", __func__, block_name);
- return -EINVAL;
+ error_setg(errp, "MIG_RP_MSG_RECV_BITMAP has invalid block name '%s'",
+ block_name);
+ return false;
}
/* Fetch the received bitmap and refresh the dirty bitmap */
- return ram_dirty_bitmap_reload(s, block);
+ return ram_dirty_bitmap_reload(s, block, errp);
}
-static int migrate_handle_rp_resume_ack(MigrationState *s, uint32_t value)
+static bool migrate_handle_rp_resume_ack(MigrationState *s,
+ uint32_t value, Error **errp)
{
trace_source_return_path_thread_resume_ack(value);
if (value != MIGRATION_RESUME_ACK_VALUE) {
- error_report("%s: illegal resume_ack value %"PRIu32,
- __func__, value);
- return -1;
+ error_setg(errp, "illegal resume_ack value %"PRIu32, value);
+ return false;
}
/* Now both sides are active. */
@@ -2672,13 +2236,16 @@ static int migrate_handle_rp_resume_ack(MigrationState *s, uint32_t value)
MIGRATION_STATUS_POSTCOPY_ACTIVE);
/* Notify send thread that time to continue send pages */
- qemu_sem_post(&s->rp_state.rp_sem);
+ migration_rp_kick(s);
- return 0;
+ return true;
}
-/* Release ms->rp_state.from_dst_file in a safe way */
-static void migration_release_from_dst_file(MigrationState *ms)
+/*
+ * Release ms->rp_state.from_dst_file (and postcopy_qemufile_src if
+ * existed) in a safe way.
+ */
+static void migration_release_dst_files(MigrationState *ms)
{
QEMUFile *file;
@@ -2691,6 +2258,18 @@ static void migration_release_from_dst_file(MigrationState *ms)
ms->rp_state.from_dst_file = NULL;
}
+ /*
+ * Do the same to postcopy fast path socket too if there is. No
+ * locking needed because this qemufile should only be managed by
+ * return path thread.
+ */
+ if (ms->postcopy_qemufile_src) {
+ migration_ioc_unregister_yank_from_file(ms->postcopy_qemufile_src);
+ qemu_file_shutdown(ms->postcopy_qemufile_src);
+ qemu_fclose(ms->postcopy_qemufile_src);
+ ms->postcopy_qemufile_src = NULL;
+ }
+
qemu_fclose(file);
}
@@ -2707,49 +2286,46 @@ static void *source_return_path_thread(void *opaque)
uint32_t tmp32, sibling_error;
ram_addr_t start = 0; /* =0 to silence warning */
size_t len = 0, expected_len;
+ Error *err = NULL;
int res;
trace_source_return_path_thread_entry();
rcu_register_thread();
-retry:
- while (!ms->rp_state.error && !qemu_file_get_error(rp) &&
- migration_is_setup_or_active(ms->state)) {
+ while (migration_is_setup_or_active()) {
trace_source_return_path_thread_loop_top();
+
header_type = qemu_get_be16(rp);
header_len = qemu_get_be16(rp);
if (qemu_file_get_error(rp)) {
- mark_source_rp_bad(ms);
+ qemu_file_get_error_obj(rp, &err);
goto out;
}
if (header_type >= MIG_RP_MSG_MAX ||
header_type == MIG_RP_MSG_INVALID) {
- error_report("RP: Received invalid message 0x%04x length 0x%04x",
- header_type, header_len);
- mark_source_rp_bad(ms);
+ error_setg(&err, "Received invalid message 0x%04x length 0x%04x",
+ header_type, header_len);
goto out;
}
if ((rp_cmd_args[header_type].len != -1 &&
header_len != rp_cmd_args[header_type].len) ||
header_len > sizeof(buf)) {
- error_report("RP: Received '%s' message (0x%04x) with"
- "incorrect length %d expecting %zu",
- rp_cmd_args[header_type].name, header_type, header_len,
- (size_t)rp_cmd_args[header_type].len);
- mark_source_rp_bad(ms);
+ error_setg(&err, "Received '%s' message (0x%04x) with"
+ "incorrect length %d expecting %zu",
+ rp_cmd_args[header_type].name, header_type, header_len,
+ (size_t)rp_cmd_args[header_type].len);
goto out;
}
/* We know we've got a valid header by this point */
res = qemu_get_buffer(rp, buf, header_len);
if (res != header_len) {
- error_report("RP: Failed reading data for message 0x%04x"
- " read %d expected %d",
- header_type, res, header_len);
- mark_source_rp_bad(ms);
+ error_setg(&err, "Failed reading data for message 0x%04x"
+ " read %d expected %d",
+ header_type, res, header_len);
goto out;
}
@@ -2759,8 +2335,7 @@ retry:
sibling_error = ldl_be_p(buf);
trace_source_return_path_thread_shut(sibling_error);
if (sibling_error) {
- error_report("RP: Sibling indicated error %d", sibling_error);
- mark_source_rp_bad(ms);
+ error_setg(&err, "Sibling indicated error %d", sibling_error);
}
/*
* We'll let the main thread deal with closing the RP
@@ -2772,12 +2347,16 @@ retry:
case MIG_RP_MSG_PONG:
tmp32 = ldl_be_p(buf);
trace_source_return_path_thread_pong(tmp32);
+ qemu_sem_post(&ms->rp_state.rp_pong_acks);
break;
case MIG_RP_MSG_REQ_PAGES:
start = ldq_be_p(buf);
len = ldl_be_p(buf + 8);
- migrate_handle_rp_req_pages(ms, NULL, start, len);
+ migrate_handle_rp_req_pages(ms, NULL, start, len, &err);
+ if (err) {
+ goto out;
+ }
break;
case MIG_RP_MSG_REQ_PAGES_ID:
@@ -2792,74 +2371,74 @@ retry:
expected_len += tmp32;
}
if (header_len != expected_len) {
- error_report("RP: Req_Page_id with length %d expecting %zd",
- header_len, expected_len);
- mark_source_rp_bad(ms);
+ error_setg(&err, "Req_Page_id with length %d expecting %zd",
+ header_len, expected_len);
+ goto out;
+ }
+ migrate_handle_rp_req_pages(ms, (char *)&buf[13], start, len,
+ &err);
+ if (err) {
goto out;
}
- migrate_handle_rp_req_pages(ms, (char *)&buf[13], start, len);
break;
case MIG_RP_MSG_RECV_BITMAP:
if (header_len < 1) {
- error_report("%s: missing block name", __func__);
- mark_source_rp_bad(ms);
+ error_setg(&err, "MIG_RP_MSG_RECV_BITMAP missing block name");
goto out;
}
/* Format: len (1B) + idstr (<255B). This ends the idstr. */
buf[buf[0] + 1] = '\0';
- if (migrate_handle_rp_recv_bitmap(ms, (char *)(buf + 1))) {
- mark_source_rp_bad(ms);
+ if (!migrate_handle_rp_recv_bitmap(ms, (char *)(buf + 1), &err)) {
goto out;
}
break;
case MIG_RP_MSG_RESUME_ACK:
tmp32 = ldl_be_p(buf);
- if (migrate_handle_rp_resume_ack(ms, tmp32)) {
- mark_source_rp_bad(ms);
+ if (!migrate_handle_rp_resume_ack(ms, tmp32, &err)) {
goto out;
}
break;
+ case MIG_RP_MSG_SWITCHOVER_ACK:
+ ms->switchover_acked = true;
+ trace_source_return_path_thread_switchover_acked();
+ break;
+
default:
break;
}
}
out:
- res = qemu_file_get_error(rp);
- if (res) {
- if (res == -EIO && migration_in_postcopy()) {
- /*
- * Maybe there is something we can do: it looks like a
- * network down issue, and we pause for a recovery.
- */
- migration_release_from_dst_file(ms);
- rp = NULL;
- if (postcopy_pause_return_path_thread(ms)) {
- /*
- * Reload rp, reset the rest. Referencing it is safe since
- * it's reset only by us above, or when migration completes
- */
- rp = ms->rp_state.from_dst_file;
- ms->rp_state.error = false;
- goto retry;
- }
- }
-
+ if (err) {
+ migrate_set_error(ms, err);
+ error_free(err);
trace_source_return_path_thread_bad_end();
- mark_source_rp_bad(ms);
+ }
+
+ if (ms->state == MIGRATION_STATUS_POSTCOPY_RECOVER) {
+ /*
+ * this will be extremely unlikely: that we got yet another network
+ * issue during recovering of the 1st network failure.. during this
+ * period the main migration thread can be waiting on rp_sem for
+ * this thread to sync with the other side.
+ *
+ * When this happens, explicitly kick the migration thread out of
+ * RECOVER stage and back to PAUSED, so the admin can try
+ * everything again.
+ */
+ migration_rp_kick(ms);
}
trace_source_return_path_thread_end();
- migration_release_from_dst_file(ms);
rcu_unregister_thread();
+
return NULL;
}
-static int open_return_path_on_source(MigrationState *ms,
- bool create_thread)
+static int open_return_path_on_source(MigrationState *ms)
{
ms->rp_state.from_dst_file = qemu_file_get_return_path(ms->to_dst_file);
if (!ms->rp_state.from_dst_file) {
@@ -2868,11 +2447,6 @@ static int open_return_path_on_source(MigrationState *ms,
trace_open_return_path_on_source();
- if (!create_thread) {
- /* We're done */
- return 0;
- }
-
qemu_thread_create(&ms->rp_state.rp_thread, "return path",
source_return_path_thread, ms, QEMU_THREAD_JOINABLE);
ms->rp_state.rp_thread_created = true;
@@ -2882,66 +2456,93 @@ static int open_return_path_on_source(MigrationState *ms,
return 0;
}
-/* Returns 0 if the RP was ok, otherwise there was an error on the RP */
-static int await_return_path_close_on_source(MigrationState *ms)
+/* Return true if error detected, or false otherwise */
+static bool close_return_path_on_source(MigrationState *ms)
{
+ if (!ms->rp_state.rp_thread_created) {
+ return false;
+ }
+
+ trace_migration_return_path_end_before();
+
/*
- * If this is a normal exit then the destination will send a SHUT and the
- * rp_thread will exit, however if there's an error we need to cause
- * it to exit.
+ * If this is a normal exit then the destination will send a SHUT
+ * and the rp_thread will exit, however if there's an error we
+ * need to cause it to exit. shutdown(2), if we have it, will
+ * cause it to unblock if it's stuck waiting for the destination.
*/
- if (qemu_file_get_error(ms->to_dst_file) && ms->rp_state.from_dst_file) {
- /*
- * shutdown(2), if we have it, will cause it to unblock if it's stuck
- * waiting for the destination.
- */
- qemu_file_shutdown(ms->rp_state.from_dst_file);
- mark_source_rp_bad(ms);
+ WITH_QEMU_LOCK_GUARD(&ms->qemu_file_lock) {
+ if (migrate_has_error(ms) && ms->rp_state.from_dst_file) {
+ qemu_file_shutdown(ms->rp_state.from_dst_file);
+ }
}
- trace_await_return_path_close_on_source_joining();
+
qemu_thread_join(&ms->rp_state.rp_thread);
ms->rp_state.rp_thread_created = false;
- trace_await_return_path_close_on_source_close();
- return ms->rp_state.error;
+ migration_release_dst_files(ms);
+ trace_migration_return_path_end_after();
+
+ /* Return path will persist the error in MigrationState when quit */
+ return migrate_has_error(ms);
+}
+
+static inline void
+migration_wait_main_channel(MigrationState *ms)
+{
+ /* Wait until one PONG message received */
+ qemu_sem_wait(&ms->rp_state.rp_pong_acks);
}
/*
* Switch from normal iteration to postcopy
* Returns non-0 on error
*/
-static int postcopy_start(MigrationState *ms)
+static int postcopy_start(MigrationState *ms, Error **errp)
{
int ret;
QIOChannelBuffer *bioc;
QEMUFile *fb;
- int64_t time_at_stop = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
- int64_t bandwidth = migrate_max_postcopy_bandwidth();
+ uint64_t bandwidth = migrate_max_postcopy_bandwidth();
bool restart_block = false;
int cur_state = MIGRATION_STATUS_ACTIVE;
+
+ if (migrate_postcopy_preempt()) {
+ migration_wait_main_channel(ms);
+ if (postcopy_preempt_establish_channel(ms)) {
+ migrate_set_state(&ms->state, ms->state, MIGRATION_STATUS_FAILED);
+ error_setg(errp, "%s: Failed to establish preempt channel",
+ __func__);
+ return -1;
+ }
+ }
+
if (!migrate_pause_before_switchover()) {
migrate_set_state(&ms->state, MIGRATION_STATUS_ACTIVE,
MIGRATION_STATUS_POSTCOPY_ACTIVE);
}
trace_postcopy_start();
- qemu_mutex_lock_iothread();
+ bql_lock();
trace_postcopy_start_set_run();
- qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
- global_state_store();
- ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
+ ret = migration_stop_vm(ms, RUN_STATE_FINISH_MIGRATE);
if (ret < 0) {
+ error_setg_errno(errp, -ret, "%s: Failed to stop the VM", __func__);
goto fail;
}
ret = migration_maybe_pause(ms, &cur_state,
MIGRATION_STATUS_POSTCOPY_ACTIVE);
if (ret < 0) {
+ error_setg_errno(errp, -ret, "%s: Failed in migration_maybe_pause()",
+ __func__);
goto fail;
}
ret = bdrv_inactivate_all();
if (ret < 0) {
+ error_setg_errno(errp, -ret, "%s: Failed in bdrv_inactivate_all()",
+ __func__);
goto fail;
}
restart_block = true;
@@ -2959,10 +2560,7 @@ static int postcopy_start(MigrationState *ms)
* that are dirty
*/
if (migrate_postcopy_ram()) {
- if (ram_postcopy_send_discard_bitmap(ms)) {
- error_report("postcopy send discard bitmap failed");
- goto fail;
- }
+ ram_postcopy_send_discard_bitmap(ms);
}
/*
@@ -2970,12 +2568,7 @@ static int postcopy_start(MigrationState *ms)
* will notice we're in POSTCOPY_ACTIVE and not actually
* wrap their state up here
*/
- /* 0 max-postcopy-bandwidth means unlimited */
- if (!bandwidth) {
- qemu_file_set_rate_limit(ms->to_dst_file, INT64_MAX);
- } else {
- qemu_file_set_rate_limit(ms->to_dst_file, bandwidth / XFER_LIMIT_RATIO);
- }
+ migration_rate_set(bandwidth);
if (migrate_postcopy_ram()) {
/* Ping just for debugging, helps line traces up */
qemu_savevm_send_ping(ms->to_dst_file, 2);
@@ -2994,7 +2587,7 @@ static int postcopy_start(MigrationState *ms)
*/
bioc = qio_channel_buffer_new(4096);
qio_channel_set_name(QIO_CHANNEL(bioc), "migration-postcopy-buffer");
- fb = qemu_fopen_channel_output(QIO_CHANNEL(bioc));
+ fb = qemu_file_new_output(QIO_CHANNEL(bioc));
object_unref(OBJECT(bioc));
/*
@@ -3018,7 +2611,7 @@ static int postcopy_start(MigrationState *ms)
*/
ret = qemu_file_get_error(ms->to_dst_file);
if (ret) {
- error_report("postcopy_start: Migration stream errored (pre package)");
+ error_setg(errp, "postcopy_start: Migration stream errored (pre package)");
goto fail_closefb;
}
@@ -3026,6 +2619,7 @@ static int postcopy_start(MigrationState *ms)
/* Now send that blob */
if (qemu_savevm_send_packaged(ms->to_dst_file, bioc->data, bioc->usage)) {
+ error_setg(errp, "%s: Failed to send packaged data", __func__);
goto fail_closefb;
}
qemu_fclose(fb);
@@ -3034,12 +2628,11 @@ static int postcopy_start(MigrationState *ms)
* at the transition to postcopy and after the device state; in particular
* spice needs to trigger a transition now
*/
- ms->postcopy_after_devices = true;
- notifier_list_notify(&migration_state_notifiers, ms);
+ migration_call_notifiers(ms, MIG_EVENT_PRECOPY_DONE, NULL);
- ms->downtime = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - time_at_stop;
+ migration_downtime_end(ms);
- qemu_mutex_unlock_iothread();
+ bql_unlock();
if (migrate_postcopy_ram()) {
/*
@@ -3055,10 +2648,11 @@ static int postcopy_start(MigrationState *ms)
ret = qemu_file_get_error(ms->to_dst_file);
if (ret) {
- error_report("postcopy_start: Migration stream errored");
- migrate_set_state(&ms->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
- MIGRATION_STATUS_FAILED);
+ error_setg_errno(errp, -ret, "postcopy_start: Migration stream error");
+ bql_lock();
+ goto fail;
}
+ trace_postcopy_preempt_enabled(migrate_postcopy_preempt());
return ret;
@@ -3073,18 +2667,19 @@ fail:
*/
Error *local_err = NULL;
- bdrv_invalidate_cache_all(&local_err);
+ bdrv_activate_all(&local_err);
if (local_err) {
error_report_err(local_err);
}
}
- qemu_mutex_unlock_iothread();
+ migration_call_notifiers(ms, MIG_EVENT_PRECOPY_FAILED, NULL);
+ bql_unlock();
return -1;
}
/**
* migration_maybe_pause: Pause if required to by
- * migrate_pause_before_switchover called with the iothread locked
+ * migrate_pause_before_switchover called with the BQL locked
* Returns: 0 on success
*/
static int migration_maybe_pause(MigrationState *s,
@@ -3112,19 +2707,96 @@ static int migration_maybe_pause(MigrationState *s,
* wait for the 'pause_sem' semaphore.
*/
if (s->state != MIGRATION_STATUS_CANCELLING) {
- qemu_mutex_unlock_iothread();
+ bql_unlock();
migrate_set_state(&s->state, *current_active_state,
MIGRATION_STATUS_PRE_SWITCHOVER);
qemu_sem_wait(&s->pause_sem);
migrate_set_state(&s->state, MIGRATION_STATUS_PRE_SWITCHOVER,
new_state);
*current_active_state = new_state;
- qemu_mutex_lock_iothread();
+ bql_lock();
}
return s->state == new_state ? 0 : -EINVAL;
}
+static int migration_completion_precopy(MigrationState *s,
+ int *current_active_state)
+{
+ int ret;
+
+ bql_lock();
+
+ if (!migrate_mode_is_cpr(s)) {
+ ret = migration_stop_vm(s, RUN_STATE_FINISH_MIGRATE);
+ if (ret < 0) {
+ goto out_unlock;
+ }
+ }
+
+ ret = migration_maybe_pause(s, current_active_state,
+ MIGRATION_STATUS_DEVICE);
+ if (ret < 0) {
+ goto out_unlock;
+ }
+
+ /*
+ * Inactivate disks except in COLO, and track that we have done so in order
+ * to remember to reactivate them if migration fails or is cancelled.
+ */
+ s->block_inactive = !migrate_colo();
+ migration_rate_set(RATE_LIMIT_DISABLED);
+ ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false,
+ s->block_inactive);
+out_unlock:
+ bql_unlock();
+ return ret;
+}
+
+static void migration_completion_postcopy(MigrationState *s)
+{
+ trace_migration_completion_postcopy_end();
+
+ bql_lock();
+ qemu_savevm_state_complete_postcopy(s->to_dst_file);
+ bql_unlock();
+
+ /*
+ * Shutdown the postcopy fast path thread. This is only needed when dest
+ * QEMU binary is old (7.1/7.2). QEMU 8.0+ doesn't need this.
+ */
+ if (migrate_postcopy_preempt() && s->preempt_pre_7_2) {
+ postcopy_preempt_shutdown_file(s);
+ }
+
+ trace_migration_completion_postcopy_end_after_complete();
+}
+
+static void migration_completion_failed(MigrationState *s,
+ int current_active_state)
+{
+ if (s->block_inactive && (s->state == MIGRATION_STATUS_ACTIVE ||
+ s->state == MIGRATION_STATUS_DEVICE)) {
+ /*
+ * If not doing postcopy, vm_start() will be called: let's
+ * regain control on images.
+ */
+ Error *local_err = NULL;
+
+ bql_lock();
+ bdrv_activate_all(&local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ } else {
+ s->block_inactive = false;
+ }
+ bql_unlock();
+ }
+
+ migrate_set_state(&s->state, current_active_state,
+ MIGRATION_STATUS_FAILED);
+}
+
/**
* migration_completion: Used by migration_thread when there's not much left.
* The caller 'breaks' the loop when this returns.
@@ -3133,96 +2805,52 @@ static int migration_maybe_pause(MigrationState *s,
*/
static void migration_completion(MigrationState *s)
{
- int ret;
+ int ret = 0;
int current_active_state = s->state;
+ Error *local_err = NULL;
if (s->state == MIGRATION_STATUS_ACTIVE) {
- qemu_mutex_lock_iothread();
- s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
- qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
- s->vm_was_running = runstate_is_running();
- ret = global_state_store();
-
- if (!ret) {
- bool inactivate = !migrate_colo_enabled();
- ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
- trace_migration_completion_vm_stop(ret);
- if (ret >= 0) {
- ret = migration_maybe_pause(s, &current_active_state,
- MIGRATION_STATUS_DEVICE);
- }
- if (ret >= 0) {
- qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX);
- ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false,
- inactivate);
- }
- if (inactivate && ret >= 0) {
- s->block_inactive = true;
- }
- }
- qemu_mutex_unlock_iothread();
-
- if (ret < 0) {
- goto fail;
- }
+ ret = migration_completion_precopy(s, &current_active_state);
} else if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
- trace_migration_completion_postcopy_end();
+ migration_completion_postcopy(s);
+ } else {
+ ret = -1;
+ }
- qemu_savevm_state_complete_postcopy(s->to_dst_file);
- trace_migration_completion_postcopy_end_after_complete();
- } else if (s->state == MIGRATION_STATUS_CANCELLING) {
+ if (ret < 0) {
goto fail;
}
- /*
- * If rp was opened we must clean up the thread before
- * cleaning everything else up (since if there are no failures
- * it will wait for the destination to send it's status in
- * a SHUT command).
- */
- if (s->rp_state.rp_thread_created) {
- int rp_error;
- trace_migration_return_path_end_before();
- rp_error = await_return_path_close_on_source(s);
- trace_migration_return_path_end_after(rp_error);
- if (rp_error) {
- goto fail_invalidate;
- }
+ if (close_return_path_on_source(s)) {
+ goto fail;
}
if (qemu_file_get_error(s->to_dst_file)) {
trace_migration_completion_file_err();
- goto fail_invalidate;
+ goto fail;
}
- if (!migrate_colo_enabled()) {
- migrate_set_state(&s->state, current_active_state,
- MIGRATION_STATUS_COMPLETED);
+ if (migrate_colo() && s->state == MIGRATION_STATUS_ACTIVE) {
+ /* COLO does not support postcopy */
+ migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
+ MIGRATION_STATUS_COLO);
+ } else {
+ migration_completion_end(s);
}
return;
-fail_invalidate:
- /* If not doing postcopy, vm_start() will be called: let's regain
- * control on images.
- */
- if (s->state == MIGRATION_STATUS_ACTIVE ||
- s->state == MIGRATION_STATUS_DEVICE) {
- Error *local_err = NULL;
-
- qemu_mutex_lock_iothread();
- bdrv_invalidate_cache_all(&local_err);
- if (local_err) {
- error_report_err(local_err);
- } else {
- s->block_inactive = false;
- }
- qemu_mutex_unlock_iothread();
+fail:
+ if (qemu_file_get_error_obj(s->to_dst_file, &local_err)) {
+ migrate_set_error(s, local_err);
+ error_free(local_err);
+ } else if (ret) {
+ error_setg_errno(&local_err, -ret, "Error in migration completion");
+ migrate_set_error(s, local_err);
+ error_free(local_err);
}
-fail:
- migrate_set_state(&s->state, current_active_state,
- MIGRATION_STATUS_FAILED);
+ migration_completion_failed(s, current_active_state);
}
/**
@@ -3235,13 +2863,6 @@ static void bg_migration_completion(MigrationState *s)
{
int current_active_state = s->state;
- /*
- * Stop tracking RAM writes - un-protect memory, un-register UFFD
- * memory ranges, flush kernel wait queues and wake up threads
- * waiting for write fault to be resolved.
- */
- ram_write_tracking_stop();
-
if (s->state == MIGRATION_STATUS_ACTIVE) {
/*
* By this moment we have RAM content saved into the migration stream.
@@ -3260,8 +2881,7 @@ static void bg_migration_completion(MigrationState *s)
goto fail;
}
- migrate_set_state(&s->state, current_active_state,
- MIGRATION_STATUS_COMPLETED);
+ migration_completion_end(s);
return;
fail:
@@ -3269,12 +2889,6 @@ fail:
MIGRATION_STATUS_FAILED);
}
-bool migrate_colo_enabled(void)
-{
- MigrationState *s = migrate_get_current();
- return s->enabled_capabilities[MIGRATION_CAPABILITY_X_COLO];
-}
-
typedef enum MigThrError {
/* No error detected */
MIG_THR_ERR_NONE = 0,
@@ -3289,7 +2903,9 @@ static int postcopy_resume_handshake(MigrationState *s)
qemu_savevm_send_postcopy_resume(s->to_dst_file);
while (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER) {
- qemu_sem_wait(&s->rp_state.rp_sem);
+ if (migration_rp_wait(s)) {
+ return -1;
+ }
}
if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
@@ -3316,6 +2932,20 @@ static int postcopy_do_resume(MigrationState *s)
}
/*
+ * If preempt is enabled, re-establish the preempt channel. Note that
+ * we do it after resume prepare to make sure the main channel will be
+ * created before the preempt channel. E.g. with weak network, the
+ * dest QEMU may get messed up with the preempt and main channels on
+ * the order of connection setup. This guarantees the correct order.
+ */
+ ret = postcopy_preempt_establish_channel(s);
+ if (ret) {
+ error_report("%s: postcopy_preempt_establish_channel(): %d",
+ __func__, ret);
+ return ret;
+ }
+
+ /*
* Last handshake with destination on the resume (destination will
* switch to postcopy-active afterwards)
*/
@@ -3341,6 +2971,13 @@ static MigThrError postcopy_pause(MigrationState *s)
QEMUFile *file;
/*
+ * We're already pausing, so ignore any errors on the return
+ * path and just wait for the thread to finish. It will be
+ * re-created when we resume.
+ */
+ close_return_path_on_source(s);
+
+ /*
* Current channel is possibly broken. Release it. Note that this is
* guaranteed even without lock because to_dst_file should only be
* modified by the migration thread. That also guarantees that the
@@ -3376,12 +3013,6 @@ static MigThrError postcopy_pause(MigrationState *s)
if (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER) {
/* Woken up by a recover procedure. Give it a shot */
- /*
- * Firstly, let's wake up the return path now, with a new
- * return path channel.
- */
- qemu_sem_post(&s->postcopy_pause_rp_sem);
-
/* Do the resume logic */
if (postcopy_do_resume(s) == 0) {
/* Let's continue! */
@@ -3402,6 +3033,17 @@ static MigThrError postcopy_pause(MigrationState *s)
}
}
+void migration_file_set_error(int err)
+{
+ MigrationState *s = current_migration;
+
+ WITH_QEMU_LOCK_GUARD(&s->qemu_file_lock) {
+ if (s->to_dst_file) {
+ qemu_file_set_error(s->to_dst_file, err);
+ }
+ }
+}
+
static MigThrError migration_detect_error(MigrationState *s)
{
int ret;
@@ -3414,8 +3056,13 @@ static MigThrError migration_detect_error(MigrationState *s)
return MIG_THR_ERR_FATAL;
}
- /* Try to detect any file errors */
- ret = qemu_file_get_error_obj(s->to_dst_file, &local_error);
+ /*
+ * Try to detect any file errors. Note that postcopy_qemufile_src will
+ * be NULL when postcopy preempt is not enabled.
+ */
+ ret = qemu_file_get_error_obj_any(s->to_dst_file,
+ s->postcopy_qemufile_src,
+ &local_error);
if (!ret) {
/* Everything is fine */
assert(!local_error);
@@ -3427,7 +3074,7 @@ static MigThrError migration_detect_error(MigrationState *s)
error_free(local_error);
}
- if (state == MIGRATION_STATUS_POSTCOPY_ACTIVE && ret == -EIO) {
+ if (state == MIGRATION_STATUS_POSTCOPY_ACTIVE && ret) {
/*
* For postcopy, we allow the network to be down for a
* while. After that, it can be continued by a
@@ -3447,31 +3094,28 @@ static MigThrError migration_detect_error(MigrationState *s)
}
}
-/* How many bytes have we transferred since the beginning of the migration */
-static uint64_t migration_total_bytes(MigrationState *s)
-{
- return qemu_ftell(s->to_dst_file) + ram_counters.multifd_bytes;
-}
-
-static void migration_calculate_complete(MigrationState *s)
+static void migration_completion_end(MigrationState *s)
{
- uint64_t bytes = migration_total_bytes(s);
+ uint64_t bytes = migration_transferred_bytes();
int64_t end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
int64_t transfer_time;
+ /*
+ * Take the BQL here so that query-migrate on the QMP thread sees:
+ * - atomic update of s->total_time and s->mbps;
+ * - correct ordering of s->mbps update vs. s->state;
+ */
+ bql_lock();
+ migration_downtime_end(s);
s->total_time = end_time - s->start_time;
- if (!s->downtime) {
- /*
- * It's still not set, so we are precopy migration. For
- * postcopy, downtime is calculated during postcopy_start().
- */
- s->downtime = end_time - s->downtime_start;
- }
-
transfer_time = s->total_time - s->setup_time;
if (transfer_time) {
s->mbps = ((double) bytes * 8.0) / transfer_time / 1000;
}
+
+ migrate_set_state(&s->state, s->state,
+ MIGRATION_STATUS_COMPLETED);
+ bql_unlock();
}
static void update_iteration_initial_status(MigrationState *s)
@@ -3481,7 +3125,7 @@ static void update_iteration_initial_status(MigrationState *s)
* wrong speed calculation.
*/
s->iteration_start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
- s->iteration_initial_bytes = migration_total_bytes(s);
+ s->iteration_initial_bytes = migration_transferred_bytes();
s->iteration_initial_pages = ram_get_total_transferred_pages();
}
@@ -3490,17 +3134,33 @@ static void migration_update_counters(MigrationState *s,
{
uint64_t transferred, transferred_pages, time_spent;
uint64_t current_bytes; /* bytes transferred since the beginning */
+ uint64_t switchover_bw;
+ /* Expected bandwidth when switching over to destination QEMU */
+ double expected_bw_per_ms;
double bandwidth;
if (current_time < s->iteration_start_time + BUFFER_DELAY) {
return;
}
- current_bytes = migration_total_bytes(s);
+ switchover_bw = migrate_avail_switchover_bandwidth();
+ current_bytes = migration_transferred_bytes();
transferred = current_bytes - s->iteration_initial_bytes;
time_spent = current_time - s->iteration_start_time;
bandwidth = (double)transferred / time_spent;
- s->threshold_size = bandwidth * s->parameters.downtime_limit;
+
+ if (switchover_bw) {
+ /*
+ * If the user specified a switchover bandwidth, let's trust the
+ * user so that can be more accurate than what we estimated.
+ */
+ expected_bw_per_ms = switchover_bw / 1000;
+ } else {
+ /* If the user doesn't specify bandwidth, we use the estimated */
+ expected_bw_per_ms = bandwidth;
+ }
+
+ s->threshold_size = expected_bw_per_ms * migrate_downtime_limit();
s->mbps = (((double) transferred * 8.0) /
((double) time_spent / 1000.0)) / 1000.0 / 1000.0;
@@ -3514,16 +3174,34 @@ static void migration_update_counters(MigrationState *s,
* if we haven't sent anything, we don't want to
* recalculate. 10000 is a small enough number for our purposes
*/
- if (ram_counters.dirty_pages_rate && transferred > 10000) {
- s->expected_downtime = ram_counters.remaining / bandwidth;
+ if (stat64_get(&mig_stats.dirty_pages_rate) &&
+ transferred > 10000) {
+ s->expected_downtime =
+ stat64_get(&mig_stats.dirty_bytes_last_sync) / expected_bw_per_ms;
}
- qemu_file_reset_rate_limit(s->to_dst_file);
+ migration_rate_reset();
update_iteration_initial_status(s);
trace_migrate_transferred(transferred, time_spent,
- bandwidth, s->threshold_size);
+ /* Both in unit bytes/ms */
+ bandwidth, switchover_bw / 1000,
+ s->threshold_size);
+}
+
+static bool migration_can_switchover(MigrationState *s)
+{
+ if (!migrate_switchover_ack()) {
+ return true;
+ }
+
+ /* No reason to wait for switchover ACK if VM is stopped */
+ if (!runstate_is_running()) {
+ return true;
+ }
+
+ return s->switchover_acked;
}
/* Migration thread iteration status */
@@ -3539,33 +3217,39 @@ typedef enum {
*/
static MigIterateState migration_iteration_run(MigrationState *s)
{
- uint64_t pending_size, pend_pre, pend_compat, pend_post;
+ uint64_t must_precopy, can_postcopy, pending_size;
+ Error *local_err = NULL;
bool in_postcopy = s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE;
+ bool can_switchover = migration_can_switchover(s);
- qemu_savevm_state_pending(s->to_dst_file, s->threshold_size, &pend_pre,
- &pend_compat, &pend_post);
- pending_size = pend_pre + pend_compat + pend_post;
+ qemu_savevm_state_pending_estimate(&must_precopy, &can_postcopy);
+ pending_size = must_precopy + can_postcopy;
+ trace_migrate_pending_estimate(pending_size, must_precopy, can_postcopy);
- trace_migrate_pending(pending_size, s->threshold_size,
- pend_pre, pend_compat, pend_post);
+ if (pending_size < s->threshold_size) {
+ qemu_savevm_state_pending_exact(&must_precopy, &can_postcopy);
+ pending_size = must_precopy + can_postcopy;
+ trace_migrate_pending_exact(pending_size, must_precopy, can_postcopy);
+ }
- if (pending_size && pending_size >= s->threshold_size) {
- /* Still a significant amount to transfer */
- if (!in_postcopy && pend_pre <= s->threshold_size &&
- qatomic_read(&s->start_postcopy)) {
- if (postcopy_start(s)) {
- error_report("%s: postcopy failed to start", __func__);
- }
- return MIG_ITERATE_SKIP;
- }
- /* Just another iteration step */
- qemu_savevm_state_iterate(s->to_dst_file, in_postcopy);
- } else {
+ if ((!pending_size || pending_size < s->threshold_size) && can_switchover) {
trace_migration_thread_low_pending(pending_size);
migration_completion(s);
return MIG_ITERATE_BREAK;
}
+ /* Still a significant amount to transfer */
+ if (!in_postcopy && must_precopy <= s->threshold_size && can_switchover &&
+ qatomic_read(&s->start_postcopy)) {
+ if (postcopy_start(s, &local_err)) {
+ migrate_set_error(s, local_err);
+ error_report_err(local_err);
+ }
+ return MIG_ITERATE_SKIP;
+ }
+
+ /* Just another iteration step */
+ qemu_savevm_state_iterate(s->to_dst_file, in_postcopy);
return MIG_ITERATE_RESUME;
}
@@ -3574,37 +3258,26 @@ static void migration_iteration_finish(MigrationState *s)
/* If we enabled cpu throttling for auto-converge, turn it off. */
cpu_throttle_stop();
- qemu_mutex_lock_iothread();
+ bql_lock();
switch (s->state) {
case MIGRATION_STATUS_COMPLETED:
- migration_calculate_complete(s);
runstate_set(RUN_STATE_POSTMIGRATE);
break;
-
- case MIGRATION_STATUS_ACTIVE:
- /*
- * We should really assert here, but since it's during
- * migration, let's try to reduce the usage of assertions.
- */
- if (!migrate_colo_enabled()) {
- error_report("%s: critical error: calling COLO code without "
- "COLO enabled", __func__);
- }
+ case MIGRATION_STATUS_COLO:
+ assert(migrate_colo());
migrate_start_colo_process(s);
- /*
- * Fixme: we will run VM in COLO no matter its old running state.
- * After exited COLO, we will keep running.
- */
- s->vm_was_running = true;
+ s->vm_old_state = RUN_STATE_RUNNING;
/* Fallthrough */
case MIGRATION_STATUS_FAILED:
case MIGRATION_STATUS_CANCELLED:
case MIGRATION_STATUS_CANCELLING:
- if (s->vm_was_running) {
- vm_start();
+ if (runstate_is_live(s->vm_old_state)) {
+ if (!runstate_check(RUN_STATE_SHUTDOWN)) {
+ vm_start();
+ }
} else {
if (runstate_check(RUN_STATE_FINISH_MIGRATE)) {
- runstate_set(RUN_STATE_POSTMIGRATE);
+ runstate_set(s->vm_old_state);
}
}
break;
@@ -3614,18 +3287,23 @@ static void migration_iteration_finish(MigrationState *s)
error_report("%s: Unknown ending state %d", __func__, s->state);
break;
}
- migrate_fd_cleanup_schedule(s);
- qemu_mutex_unlock_iothread();
+
+ migration_bh_schedule(migrate_fd_cleanup_bh, s);
+ bql_unlock();
}
static void bg_migration_iteration_finish(MigrationState *s)
{
- qemu_mutex_lock_iothread();
+ /*
+ * Stop tracking RAM writes - un-protect memory, un-register UFFD
+ * memory ranges, flush kernel wait queues and wake up threads
+ * waiting for write fault to be resolved.
+ */
+ ram_write_tracking_stop();
+
+ bql_lock();
switch (s->state) {
case MIGRATION_STATUS_COMPLETED:
- migration_calculate_complete(s);
- break;
-
case MIGRATION_STATUS_ACTIVE:
case MIGRATION_STATUS_FAILED:
case MIGRATION_STATUS_CANCELLED:
@@ -3638,8 +3316,8 @@ static void bg_migration_iteration_finish(MigrationState *s)
break;
}
- migrate_fd_cleanup_schedule(s);
- qemu_mutex_unlock_iothread();
+ migration_bh_schedule(migrate_fd_cleanup_bh, s);
+ bql_unlock();
}
/*
@@ -3677,7 +3355,7 @@ bool migration_rate_limit(void)
bool urgent = false;
migration_update_counters(s, now);
- if (qemu_file_rate_limit(s->to_dst_file)) {
+ if (migration_rate_exceeded(s->to_dst_file)) {
if (qemu_file_get_error(s->to_dst_file)) {
return false;
@@ -3729,7 +3407,8 @@ static void qemu_savevm_wait_unplug(MigrationState *s, int old_state,
while (timeout-- && qemu_savevm_state_guest_unplug_pending()) {
qemu_sem_timedwait(&s->wait_unplug_sem, 250);
}
- if (qemu_savevm_state_guest_unplug_pending()) {
+ if (qemu_savevm_state_guest_unplug_pending() &&
+ !qtest_enabled()) {
warn_report("migration: partially unplugged device on "
"failure");
}
@@ -3748,16 +3427,25 @@ static void qemu_savevm_wait_unplug(MigrationState *s, int old_state,
static void *migration_thread(void *opaque)
{
MigrationState *s = opaque;
+ MigrationThread *thread = NULL;
int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
MigThrError thr_error;
bool urgent = false;
+ thread = migration_threads_add("live_migration", qemu_get_thread_id());
+
rcu_register_thread();
object_ref(OBJECT(s));
update_iteration_initial_status(s);
+ if (!multifd_send_setup()) {
+ goto out;
+ }
+
+ bql_lock();
qemu_savevm_state_header(s->to_dst_file);
+ bql_unlock();
/*
* If we opened the return path, we need to make sure dst has it
@@ -3780,12 +3468,14 @@ static void *migration_thread(void *opaque)
qemu_savevm_send_postcopy_advise(s->to_dst_file);
}
- if (migrate_colo_enabled()) {
+ if (migrate_colo()) {
/* Notify migration destination that we enable COLO */
qemu_savevm_send_colo_enable(s->to_dst_file);
}
+ bql_lock();
qemu_savevm_state_setup(s->to_dst_file);
+ bql_unlock();
qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
MIGRATION_STATUS_ACTIVE);
@@ -3794,8 +3484,8 @@ static void *migration_thread(void *opaque)
trace_migration_thread_setup_complete();
- while (migration_is_active(s)) {
- if (urgent || !qemu_file_rate_limit(s->to_dst_file)) {
+ while (migration_is_active()) {
+ if (urgent || !migration_rate_exceeded(s->to_dst_file)) {
MigIterateState iter_state = migration_iteration_run(s);
if (iter_state == MIG_ITERATE_SKIP) {
continue;
@@ -3824,10 +3514,12 @@ static void *migration_thread(void *opaque)
urgent = migration_rate_limit();
}
+out:
trace_migration_thread_after_loop();
migration_iteration_finish(s);
object_unref(OBJECT(s));
rcu_unregister_thread();
+ migration_threads_remove(thread);
return NULL;
}
@@ -3835,11 +3527,8 @@ static void bg_migration_vm_start_bh(void *opaque)
{
MigrationState *s = opaque;
- qemu_bh_delete(s->vm_start_bh);
- s->vm_start_bh = NULL;
-
- vm_start();
- s->downtime = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - s->downtime_start;
+ vm_resume(s->vm_old_state);
+ migration_downtime_end(s);
}
/**
@@ -3868,7 +3557,7 @@ static void *bg_migration_thread(void *opaque)
rcu_register_thread();
object_ref(OBJECT(s));
- qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX);
+ migration_rate_set(RATE_LIMIT_DISABLED);
setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
/*
@@ -3882,7 +3571,7 @@ static void *bg_migration_thread(void *opaque)
*/
s->bioc = qio_channel_buffer_new(512 * 1024);
qio_channel_set_name(QIO_CHANNEL(s->bioc), "vmstate-buffer");
- fb = qemu_fopen_channel_output(QIO_CHANNEL(s->bioc));
+ fb = qemu_file_new_output(QIO_CHANNEL(s->bioc));
object_unref(OBJECT(s->bioc));
update_iteration_initial_status(s);
@@ -3895,8 +3584,10 @@ static void *bg_migration_thread(void *opaque)
ram_write_tracking_prepare();
#endif
+ bql_lock();
qemu_savevm_state_header(s->to_dst_file);
qemu_savevm_state_setup(s->to_dst_file);
+ bql_unlock();
qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
MIGRATION_STATUS_ACTIVE);
@@ -3904,22 +3595,10 @@ static void *bg_migration_thread(void *opaque)
s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
trace_migration_thread_setup_complete();
- s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
- qemu_mutex_lock_iothread();
-
- /*
- * If VM is currently in suspended state, then, to make a valid runstate
- * transition in vm_stop_force_state() we need to wakeup it up.
- */
- qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
- s->vm_was_running = runstate_is_running();
+ bql_lock();
- if (global_state_store()) {
- goto fail;
- }
- /* Forcibly stop VM before saving state of vCPUs and devices */
- if (vm_stop_force_state(RUN_STATE_PAUSED)) {
+ if (migration_stop_vm(s, RUN_STATE_PAUSED)) {
goto fail;
}
/*
@@ -3948,12 +3627,10 @@ static void *bg_migration_thread(void *opaque)
* calling VM state change notifiers from vm_start() would initiate
* writes to virtio VQs memory which is in write-protected region.
*/
- s->vm_start_bh = qemu_bh_new(bg_migration_vm_start_bh, s);
- qemu_bh_schedule(s->vm_start_bh);
-
- qemu_mutex_unlock_iothread();
+ migration_bh_schedule(bg_migration_vm_start_bh, s);
+ bql_unlock();
- while (migration_is_active(s)) {
+ while (migration_is_active()) {
MigIterateState iter_state = bg_migration_iteration_run(s);
if (iter_state == MIG_ITERATE_SKIP) {
continue;
@@ -3980,7 +3657,7 @@ fail:
if (early_fail) {
migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
MIGRATION_STATUS_FAILED);
- qemu_mutex_unlock_iothread();
+ bql_unlock();
}
bg_migration_iteration_finish(s);
@@ -3995,8 +3672,9 @@ fail:
void migrate_fd_connect(MigrationState *s, Error *error_in)
{
Error *local_err = NULL;
- int64_t rate_limit;
+ uint64_t rate_limit;
bool resume = s->state == MIGRATION_STATUS_POSTCOPY_PAUSED;
+ int ret;
/*
* If there's a previous error, free it and prepare for another one.
@@ -4005,13 +3683,7 @@ void migrate_fd_connect(MigrationState *s, Error *error_in)
*/
migrate_error_free(s);
- s->expected_downtime = s->parameters.downtime_limit;
- if (resume) {
- assert(s->cleanup_bh);
- } else {
- assert(!s->cleanup_bh);
- s->cleanup_bh = qemu_bh_new(migrate_fd_cleanup_bh, s);
- }
+ s->expected_downtime = migrate_downtime_limit();
if (error_in) {
migrate_fd_error(s, error_in);
if (resume) {
@@ -4031,17 +3703,18 @@ void migrate_fd_connect(MigrationState *s, Error *error_in)
if (resume) {
/* This is a resumed migration */
- rate_limit = s->parameters.max_postcopy_bandwidth /
- XFER_LIMIT_RATIO;
+ rate_limit = migrate_max_postcopy_bandwidth();
} else {
/* This is a fresh new migration */
- rate_limit = s->parameters.max_bandwidth / XFER_LIMIT_RATIO;
+ rate_limit = migrate_max_bandwidth();
/* Notify before starting migration thread */
- notifier_list_notify(&migration_state_notifiers, s);
+ if (migration_call_notifiers(s, MIG_EVENT_PRECOPY_SETUP, &local_err)) {
+ goto fail;
+ }
}
- qemu_file_set_rate_limit(s->to_dst_file, rate_limit);
+ migration_rate_set(rate_limit);
qemu_file_set_blocking(s->to_dst_file, true);
/*
@@ -4049,15 +3722,22 @@ void migrate_fd_connect(MigrationState *s, Error *error_in)
* precopy, only if user specified "return-path" capability would
* QEMU uses the return path.
*/
- if (migrate_postcopy_ram() || migrate_use_return_path()) {
- if (open_return_path_on_source(s, !resume)) {
- error_report("Unable to open return-path for postcopy");
- migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED);
- migrate_fd_cleanup(s);
- return;
+ if (migrate_postcopy_ram() || migrate_return_path()) {
+ if (open_return_path_on_source(s)) {
+ error_setg(&local_err, "Unable to open return-path for postcopy");
+ goto fail;
}
}
+ /*
+ * This needs to be done before resuming a postcopy. Note: for newer
+ * QEMUs we will delay the channel creation until postcopy_start(), to
+ * avoid disorder of channel creations.
+ */
+ if (migrate_postcopy_preempt() && s->preempt_pre_7_2) {
+ postcopy_preempt_setup(s);
+ }
+
if (resume) {
/* Wakeup the main migration thread to do the recovery */
migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_PAUSED,
@@ -4066,12 +3746,12 @@ void migrate_fd_connect(MigrationState *s, Error *error_in)
return;
}
- if (multifd_save_setup(&local_err) != 0) {
- error_report_err(local_err);
- migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
- MIGRATION_STATUS_FAILED);
- migrate_fd_cleanup(s);
- return;
+ if (migrate_mode_is_cpr(s)) {
+ ret = migration_stop_vm(s, RUN_STATE_FINISH_MIGRATE);
+ if (ret < 0) {
+ error_setg(&local_err, "migration_stop_vm failed, error %d", -ret);
+ goto fail;
+ }
}
if (migrate_background_snapshot()) {
@@ -4082,125 +3762,14 @@ void migrate_fd_connect(MigrationState *s, Error *error_in)
migration_thread, s, QEMU_THREAD_JOINABLE);
}
s->migration_thread_running = true;
-}
-
-void migration_global_dump(Monitor *mon)
-{
- MigrationState *ms = migrate_get_current();
+ return;
- monitor_printf(mon, "globals:\n");
- monitor_printf(mon, "store-global-state: %s\n",
- ms->store_global_state ? "on" : "off");
- monitor_printf(mon, "only-migratable: %s\n",
- only_migratable ? "on" : "off");
- monitor_printf(mon, "send-configuration: %s\n",
- ms->send_configuration ? "on" : "off");
- monitor_printf(mon, "send-section-footer: %s\n",
- ms->send_section_footer ? "on" : "off");
- monitor_printf(mon, "decompress-error-check: %s\n",
- ms->decompress_error_check ? "on" : "off");
- monitor_printf(mon, "clear-bitmap-shift: %u\n",
- ms->clear_bitmap_shift);
-}
-
-#define DEFINE_PROP_MIG_CAP(name, x) \
- DEFINE_PROP_BOOL(name, MigrationState, enabled_capabilities[x], false)
-
-static Property migration_properties[] = {
- DEFINE_PROP_BOOL("store-global-state", MigrationState,
- store_global_state, true),
- DEFINE_PROP_BOOL("send-configuration", MigrationState,
- send_configuration, true),
- DEFINE_PROP_BOOL("send-section-footer", MigrationState,
- send_section_footer, true),
- DEFINE_PROP_BOOL("decompress-error-check", MigrationState,
- decompress_error_check, true),
- DEFINE_PROP_UINT8("x-clear-bitmap-shift", MigrationState,
- clear_bitmap_shift, CLEAR_BITMAP_SHIFT_DEFAULT),
-
- /* Migration parameters */
- DEFINE_PROP_UINT8("x-compress-level", MigrationState,
- parameters.compress_level,
- DEFAULT_MIGRATE_COMPRESS_LEVEL),
- DEFINE_PROP_UINT8("x-compress-threads", MigrationState,
- parameters.compress_threads,
- DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT),
- DEFINE_PROP_BOOL("x-compress-wait-thread", MigrationState,
- parameters.compress_wait_thread, true),
- DEFINE_PROP_UINT8("x-decompress-threads", MigrationState,
- parameters.decompress_threads,
- DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT),
- DEFINE_PROP_UINT8("x-throttle-trigger-threshold", MigrationState,
- parameters.throttle_trigger_threshold,
- DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD),
- DEFINE_PROP_UINT8("x-cpu-throttle-initial", MigrationState,
- parameters.cpu_throttle_initial,
- DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL),
- DEFINE_PROP_UINT8("x-cpu-throttle-increment", MigrationState,
- parameters.cpu_throttle_increment,
- DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT),
- DEFINE_PROP_BOOL("x-cpu-throttle-tailslow", MigrationState,
- parameters.cpu_throttle_tailslow, false),
- DEFINE_PROP_SIZE("x-max-bandwidth", MigrationState,
- parameters.max_bandwidth, MAX_THROTTLE),
- DEFINE_PROP_UINT64("x-downtime-limit", MigrationState,
- parameters.downtime_limit,
- DEFAULT_MIGRATE_SET_DOWNTIME),
- DEFINE_PROP_UINT32("x-checkpoint-delay", MigrationState,
- parameters.x_checkpoint_delay,
- DEFAULT_MIGRATE_X_CHECKPOINT_DELAY),
- DEFINE_PROP_UINT8("multifd-channels", MigrationState,
- parameters.multifd_channels,
- DEFAULT_MIGRATE_MULTIFD_CHANNELS),
- DEFINE_PROP_MULTIFD_COMPRESSION("multifd-compression", MigrationState,
- parameters.multifd_compression,
- DEFAULT_MIGRATE_MULTIFD_COMPRESSION),
- DEFINE_PROP_UINT8("multifd-zlib-level", MigrationState,
- parameters.multifd_zlib_level,
- DEFAULT_MIGRATE_MULTIFD_ZLIB_LEVEL),
- DEFINE_PROP_UINT8("multifd-zstd-level", MigrationState,
- parameters.multifd_zstd_level,
- DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL),
- DEFINE_PROP_SIZE("xbzrle-cache-size", MigrationState,
- parameters.xbzrle_cache_size,
- DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE),
- DEFINE_PROP_SIZE("max-postcopy-bandwidth", MigrationState,
- parameters.max_postcopy_bandwidth,
- DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH),
- DEFINE_PROP_UINT8("max-cpu-throttle", MigrationState,
- parameters.max_cpu_throttle,
- DEFAULT_MIGRATE_MAX_CPU_THROTTLE),
- DEFINE_PROP_SIZE("announce-initial", MigrationState,
- parameters.announce_initial,
- DEFAULT_MIGRATE_ANNOUNCE_INITIAL),
- DEFINE_PROP_SIZE("announce-max", MigrationState,
- parameters.announce_max,
- DEFAULT_MIGRATE_ANNOUNCE_MAX),
- DEFINE_PROP_SIZE("announce-rounds", MigrationState,
- parameters.announce_rounds,
- DEFAULT_MIGRATE_ANNOUNCE_ROUNDS),
- DEFINE_PROP_SIZE("announce-step", MigrationState,
- parameters.announce_step,
- DEFAULT_MIGRATE_ANNOUNCE_STEP),
-
- /* Migration capabilities */
- DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE),
- DEFINE_PROP_MIG_CAP("x-rdma-pin-all", MIGRATION_CAPABILITY_RDMA_PIN_ALL),
- DEFINE_PROP_MIG_CAP("x-auto-converge", MIGRATION_CAPABILITY_AUTO_CONVERGE),
- DEFINE_PROP_MIG_CAP("x-zero-blocks", MIGRATION_CAPABILITY_ZERO_BLOCKS),
- DEFINE_PROP_MIG_CAP("x-compress", MIGRATION_CAPABILITY_COMPRESS),
- DEFINE_PROP_MIG_CAP("x-events", MIGRATION_CAPABILITY_EVENTS),
- DEFINE_PROP_MIG_CAP("x-postcopy-ram", MIGRATION_CAPABILITY_POSTCOPY_RAM),
- DEFINE_PROP_MIG_CAP("x-colo", MIGRATION_CAPABILITY_X_COLO),
- DEFINE_PROP_MIG_CAP("x-release-ram", MIGRATION_CAPABILITY_RELEASE_RAM),
- DEFINE_PROP_MIG_CAP("x-block", MIGRATION_CAPABILITY_BLOCK),
- DEFINE_PROP_MIG_CAP("x-return-path", MIGRATION_CAPABILITY_RETURN_PATH),
- DEFINE_PROP_MIG_CAP("x-multifd", MIGRATION_CAPABILITY_MULTIFD),
- DEFINE_PROP_MIG_CAP("x-background-snapshot",
- MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT),
-
- DEFINE_PROP_END_OF_LIST(),
-};
+fail:
+ migrate_set_error(s, local_err);
+ migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED);
+ error_report_err(local_err);
+ migrate_fd_cleanup(s);
+}
static void migration_class_init(ObjectClass *klass, void *data)
{
@@ -4213,25 +3782,22 @@ static void migration_class_init(ObjectClass *klass, void *data)
static void migration_instance_finalize(Object *obj)
{
MigrationState *ms = MIGRATION_OBJ(obj);
- MigrationParameters *params = &ms->parameters;
qemu_mutex_destroy(&ms->error_mutex);
qemu_mutex_destroy(&ms->qemu_file_lock);
- g_free(params->tls_hostname);
- g_free(params->tls_creds);
qemu_sem_destroy(&ms->wait_unplug_sem);
qemu_sem_destroy(&ms->rate_limit_sem);
qemu_sem_destroy(&ms->pause_sem);
qemu_sem_destroy(&ms->postcopy_pause_sem);
- qemu_sem_destroy(&ms->postcopy_pause_rp_sem);
qemu_sem_destroy(&ms->rp_state.rp_sem);
+ qemu_sem_destroy(&ms->rp_state.rp_pong_acks);
+ qemu_sem_destroy(&ms->postcopy_qemufile_src_sem);
error_free(ms->error);
}
static void migration_instance_init(Object *obj)
{
MigrationState *ms = MIGRATION_OBJ(obj);
- MigrationParameters *params = &ms->parameters;
ms->state = MIGRATION_STATUS_NONE;
ms->mbps = -1;
@@ -4239,38 +3805,14 @@ static void migration_instance_init(Object *obj)
qemu_sem_init(&ms->pause_sem, 0);
qemu_mutex_init(&ms->error_mutex);
- params->tls_hostname = g_strdup("");
- params->tls_creds = g_strdup("");
-
- /* Set has_* up only for parameter checks */
- params->has_compress_level = true;
- params->has_compress_threads = true;
- params->has_decompress_threads = true;
- params->has_throttle_trigger_threshold = true;
- params->has_cpu_throttle_initial = true;
- params->has_cpu_throttle_increment = true;
- params->has_cpu_throttle_tailslow = true;
- params->has_max_bandwidth = true;
- params->has_downtime_limit = true;
- params->has_x_checkpoint_delay = true;
- params->has_block_incremental = true;
- params->has_multifd_channels = true;
- params->has_multifd_compression = true;
- params->has_multifd_zlib_level = true;
- params->has_multifd_zstd_level = true;
- params->has_xbzrle_cache_size = true;
- params->has_max_postcopy_bandwidth = true;
- params->has_max_cpu_throttle = true;
- params->has_announce_initial = true;
- params->has_announce_max = true;
- params->has_announce_rounds = true;
- params->has_announce_step = true;
+ migrate_params_init(&ms->parameters);
qemu_sem_init(&ms->postcopy_pause_sem, 0);
- qemu_sem_init(&ms->postcopy_pause_rp_sem, 0);
qemu_sem_init(&ms->rp_state.rp_sem, 0);
+ qemu_sem_init(&ms->rp_state.rp_pong_acks, 0);
qemu_sem_init(&ms->rate_limit_sem, 0);
qemu_sem_init(&ms->wait_unplug_sem, 0);
+ qemu_sem_init(&ms->postcopy_qemufile_src_sem, 0);
qemu_mutex_init(&ms->qemu_file_lock);
}
@@ -4280,27 +3822,14 @@ static void migration_instance_init(Object *obj)
*/
static bool migration_object_check(MigrationState *ms, Error **errp)
{
- MigrationCapabilityStatusList *head = NULL;
/* Assuming all off */
- bool cap_list[MIGRATION_CAPABILITY__MAX] = { 0 }, ret;
- int i;
+ bool old_caps[MIGRATION_CAPABILITY__MAX] = { 0 };
if (!migrate_params_check(&ms->parameters, errp)) {
return false;
}
- for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
- if (ms->enabled_capabilities[i]) {
- QAPI_LIST_PREPEND(head, migrate_cap_add(i, true));
- }
- }
-
- ret = migrate_caps_check(cap_list, head, errp);
-
- /* It works with head == NULL */
- qapi_free_MigrationCapabilityStatusList(head);
-
- return ret;
+ return migrate_caps_check(old_caps, ms->capabilities, errp);
}
static const TypeInfo migration_type = {