diff options
Diffstat (limited to 'migration/migration.c')
-rw-r--r-- | migration/migration.c | 3201 |
1 files changed, 1365 insertions, 1836 deletions
diff --git a/migration/migration.c b/migration/migration.c index bb909781b7..86bf76e925 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -20,18 +20,21 @@ #include "migration/blocker.h" #include "exec.h" #include "fd.h" +#include "file.h" #include "socket.h" #include "sysemu/runstate.h" #include "sysemu/sysemu.h" #include "sysemu/cpu-throttle.h" #include "rdma.h" #include "ram.h" +#include "ram-compress.h" #include "migration/global_state.h" #include "migration/misc.h" #include "migration.h" +#include "migration-stats.h" #include "savevm.h" -#include "qemu-file-channel.h" #include "qemu-file.h" +#include "channel.h" #include "migration/vmstate.h" #include "block/block.h" #include "qapi/error.h" @@ -49,74 +52,30 @@ #include "trace.h" #include "exec/target_page.h" #include "io/channel-buffer.h" +#include "io/channel-tls.h" #include "migration/colo.h" #include "hw/boards.h" -#include "hw/qdev-properties.h" -#include "hw/qdev-properties-system.h" #include "monitor/monitor.h" #include "net/announce.h" #include "qemu/queue.h" #include "multifd.h" +#include "threadinfo.h" #include "qemu/yank.h" #include "sysemu/cpus.h" #include "yank_functions.h" - -#define MAX_THROTTLE (128 << 20) /* Migration transfer speed throttling */ - -/* Amount of time to allocate to each "chunk" of bandwidth-throttled - * data. */ -#define BUFFER_DELAY 100 -#define XFER_LIMIT_RATIO (1000 / BUFFER_DELAY) - -/* Time in milliseconds we are allowed to stop the source, - * for sending the last part */ -#define DEFAULT_MIGRATE_SET_DOWNTIME 300 - -/* Maximum migrate downtime set to 2000 seconds */ -#define MAX_MIGRATE_DOWNTIME_SECONDS 2000 -#define MAX_MIGRATE_DOWNTIME (MAX_MIGRATE_DOWNTIME_SECONDS * 1000) - -/* Default compression thread count */ -#define DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT 8 -/* Default decompression thread count, usually decompression is at - * least 4 times as fast as compression.*/ -#define DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT 2 -/*0: means nocompress, 1: best speed, ... 9: best compress ratio */ -#define DEFAULT_MIGRATE_COMPRESS_LEVEL 1 -/* Define default autoconverge cpu throttle migration parameters */ -#define DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD 50 -#define DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL 20 -#define DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT 10 -#define DEFAULT_MIGRATE_MAX_CPU_THROTTLE 99 - -/* Migration XBZRLE default cache size */ -#define DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE (64 * 1024 * 1024) - -/* The delay time (in ms) between two COLO checkpoints */ -#define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY (200 * 100) -#define DEFAULT_MIGRATE_MULTIFD_CHANNELS 2 -#define DEFAULT_MIGRATE_MULTIFD_COMPRESSION MULTIFD_COMPRESSION_NONE -/* 0: means nocompress, 1: best speed, ... 9: best compress ratio */ -#define DEFAULT_MIGRATE_MULTIFD_ZLIB_LEVEL 1 -/* 0: means nocompress, 1: best speed, ... 20: best compress ratio */ -#define DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL 1 - -/* Background transfer rate for postcopy, 0 means unlimited, note - * that page requests can still exceed this limit. - */ -#define DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH 0 - -/* - * Parameters for self_announce_delay giving a stream of RARP/ARP - * packets after migration. - */ -#define DEFAULT_MIGRATE_ANNOUNCE_INITIAL 50 -#define DEFAULT_MIGRATE_ANNOUNCE_MAX 550 -#define DEFAULT_MIGRATE_ANNOUNCE_ROUNDS 5 -#define DEFAULT_MIGRATE_ANNOUNCE_STEP 100 - -static NotifierList migration_state_notifiers = - NOTIFIER_LIST_INITIALIZER(migration_state_notifiers); +#include "sysemu/qtest.h" +#include "options.h" +#include "sysemu/dirtylimit.h" +#include "qemu/sockets.h" +#include "sysemu/kvm.h" + +#define NOTIFIER_ELEM_INIT(array, elem) \ + [elem] = NOTIFIER_WITH_RETURN_LIST_INITIALIZER((array)[elem]) + +static NotifierWithReturnList migration_state_notifiers[] = { + NOTIFIER_ELEM_INIT(migration_state_notifiers, MIG_MODE_NORMAL), + NOTIFIER_ELEM_INIT(migration_state_notifiers, MIG_MODE_CPR_REBOOT), +}; /* Messages sent on the return path from destination to source */ enum mig_rp_message_type { @@ -128,42 +87,11 @@ enum mig_rp_message_type { MIG_RP_MSG_REQ_PAGES, /* data (start: be64, len: be32) */ MIG_RP_MSG_RECV_BITMAP, /* send recved_bitmap back to source */ MIG_RP_MSG_RESUME_ACK, /* tell source that we are ready to resume */ + MIG_RP_MSG_SWITCHOVER_ACK, /* Tell source it's OK to do switchover */ MIG_RP_MSG_MAX }; -/* Migration capabilities set */ -struct MigrateCapsSet { - int size; /* Capability set size */ - MigrationCapability caps[]; /* Variadic array of capabilities */ -}; -typedef struct MigrateCapsSet MigrateCapsSet; - -/* Define and initialize MigrateCapsSet */ -#define INITIALIZE_MIGRATE_CAPS_SET(_name, ...) \ - MigrateCapsSet _name = { \ - .size = sizeof((int []) { __VA_ARGS__ }) / sizeof(int), \ - .caps = { __VA_ARGS__ } \ - } - -/* Background-snapshot compatibility check list */ -static const -INITIALIZE_MIGRATE_CAPS_SET(check_caps_background_snapshot, - MIGRATION_CAPABILITY_POSTCOPY_RAM, - MIGRATION_CAPABILITY_DIRTY_BITMAPS, - MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME, - MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE, - MIGRATION_CAPABILITY_RETURN_PATH, - MIGRATION_CAPABILITY_MULTIFD, - MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER, - MIGRATION_CAPABILITY_AUTO_CONVERGE, - MIGRATION_CAPABILITY_RELEASE_RAM, - MIGRATION_CAPABILITY_RDMA_PIN_ALL, - MIGRATION_CAPABILITY_COMPRESS, - MIGRATION_CAPABILITY_XBZRLE, - MIGRATION_CAPABILITY_X_COLO, - MIGRATION_CAPABILITY_VALIDATE_UUID); - /* When we add fault tolerance, we could have several migrations at once. For now we don't need to add dynamic creation of migration */ @@ -171,13 +99,89 @@ INITIALIZE_MIGRATE_CAPS_SET(check_caps_background_snapshot, static MigrationState *current_migration; static MigrationIncomingState *current_incoming; -static GSList *migration_blockers; +static GSList *migration_blockers[MIG_MODE__MAX]; static bool migration_object_check(MigrationState *ms, Error **errp); static int migration_maybe_pause(MigrationState *s, int *current_active_state, int new_state); static void migrate_fd_cancel(MigrationState *s); +static bool close_return_path_on_source(MigrationState *s); +static void migration_completion_end(MigrationState *s); + +static void migration_downtime_start(MigrationState *s) +{ + trace_vmstate_downtime_checkpoint("src-downtime-start"); + s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); +} + +static void migration_downtime_end(MigrationState *s) +{ + int64_t now = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); + + /* + * If downtime already set, should mean that postcopy already set it, + * then that should be the real downtime already. + */ + if (!s->downtime) { + s->downtime = now - s->downtime_start; + } + + trace_vmstate_downtime_checkpoint("src-downtime-end"); +} + +static bool migration_needs_multiple_sockets(void) +{ + return migrate_multifd() || migrate_postcopy_preempt(); +} + +static bool transport_supports_multi_channels(MigrationAddress *addr) +{ + if (addr->transport == MIGRATION_ADDRESS_TYPE_SOCKET) { + SocketAddress *saddr = &addr->u.socket; + + return (saddr->type == SOCKET_ADDRESS_TYPE_INET || + saddr->type == SOCKET_ADDRESS_TYPE_UNIX || + saddr->type == SOCKET_ADDRESS_TYPE_VSOCK); + } else if (addr->transport == MIGRATION_ADDRESS_TYPE_FILE) { + return migrate_mapped_ram(); + } else { + return false; + } +} + +static bool migration_needs_seekable_channel(void) +{ + return migrate_mapped_ram(); +} + +static bool transport_supports_seeking(MigrationAddress *addr) +{ + if (addr->transport == MIGRATION_ADDRESS_TYPE_FILE) { + return true; + } + + return false; +} + +static bool +migration_channels_and_transport_compatible(MigrationAddress *addr, + Error **errp) +{ + if (migration_needs_seekable_channel() && + !transport_supports_seeking(addr)) { + error_setg(errp, "Migration requires seekable transport (e.g. file)"); + return false; + } + + if (migration_needs_multiple_sockets() && + !transport_supports_multi_channels(addr)) { + error_setg(errp, "Migration requires multi-channel URIs (e.g. tcp)"); + return false; + } + + return true; +} static gint page_request_addr_cmp(gconstpointer ap, gconstpointer bp) { @@ -186,6 +190,23 @@ static gint page_request_addr_cmp(gconstpointer ap, gconstpointer bp) return (a > b) - (a < b); } +static int migration_stop_vm(MigrationState *s, RunState state) +{ + int ret; + + migration_downtime_start(s); + + s->vm_old_state = runstate_get(); + global_state_store(); + + ret = vm_stop_force_state(state); + + trace_vmstate_downtime_checkpoint("src-vm-stopped"); + trace_migration_completion_vm_stop(ret); + + return ret; +} + void migration_object_init(void) { /* This can only be called once. */ @@ -202,10 +223,15 @@ void migration_object_init(void) current_incoming->postcopy_remote_fds = g_array_new(FALSE, TRUE, sizeof(struct PostCopyFD)); qemu_mutex_init(¤t_incoming->rp_mutex); + qemu_mutex_init(¤t_incoming->postcopy_prio_thread_mutex); qemu_event_init(¤t_incoming->main_thread_load_event, false); qemu_sem_init(¤t_incoming->postcopy_pause_sem_dst, 0); qemu_sem_init(¤t_incoming->postcopy_pause_sem_fault, 0); + qemu_sem_init(¤t_incoming->postcopy_pause_sem_fast_load, 0); + qemu_sem_init(¤t_incoming->postcopy_qemufile_dst_done, 0); + qemu_mutex_init(¤t_incoming->page_request_mutex); + qemu_cond_init(¤t_incoming->page_request_cond); current_incoming->page_requested = g_tree_new(page_request_addr_cmp); migration_object_check(current_migration, &error_fatal); @@ -215,18 +241,71 @@ void migration_object_init(void) dirty_bitmap_mig_init(); } -void migration_cancel(void) +typedef struct { + QEMUBH *bh; + QEMUBHFunc *cb; + void *opaque; +} MigrationBH; + +static void migration_bh_dispatch_bh(void *opaque) +{ + MigrationState *s = migrate_get_current(); + MigrationBH *migbh = opaque; + + /* cleanup this BH */ + qemu_bh_delete(migbh->bh); + migbh->bh = NULL; + + /* dispatch the other one */ + migbh->cb(migbh->opaque); + object_unref(OBJECT(s)); + + g_free(migbh); +} + +void migration_bh_schedule(QEMUBHFunc *cb, void *opaque) +{ + MigrationState *s = migrate_get_current(); + MigrationBH *migbh = g_new0(MigrationBH, 1); + QEMUBH *bh = qemu_bh_new(migration_bh_dispatch_bh, migbh); + + /* Store these to dispatch when the BH runs */ + migbh->bh = bh; + migbh->cb = cb; + migbh->opaque = opaque; + + /* + * Ref the state for bh, because it may be called when + * there're already no other refs + */ + object_ref(OBJECT(s)); + qemu_bh_schedule(bh); +} + +void migration_cancel(const Error *error) { + if (error) { + migrate_set_error(current_migration, error); + } + if (migrate_dirty_limit()) { + qmp_cancel_vcpu_dirty_limit(false, -1, NULL); + } migrate_fd_cancel(current_migration); } void migration_shutdown(void) { /* + * When the QEMU main thread exit, the COLO thread + * may wait a semaphore. So, we should wakeup the + * COLO thread before migration shutdown. + */ + colo_shutdown(); + /* * Cancel the current migration - that will (eventually) * stop the migration using this structure */ - migration_cancel(); + migration_cancel(NULL); object_unref(OBJECT(current_migration)); /* @@ -257,10 +336,26 @@ MigrationIncomingState *migration_incoming_get_current(void) return current_incoming; } +void migration_incoming_transport_cleanup(MigrationIncomingState *mis) +{ + if (mis->socket_address_list) { + qapi_free_SocketAddressList(mis->socket_address_list); + mis->socket_address_list = NULL; + } + + if (mis->transport_cleanup) { + mis->transport_cleanup(mis->transport_data); + mis->transport_data = mis->transport_cleanup = NULL; + } +} + void migration_incoming_state_destroy(void) { struct MigrationIncomingState *mis = migration_incoming_get_current(); + multifd_recv_cleanup(); + compress_threads_load_cleanup(); + if (mis->to_src_file) { /* Tell source that we are done */ migrate_send_rp_shut(mis, qemu_file_get_error(mis->from_src_file) != 0); @@ -277,10 +372,8 @@ void migration_incoming_state_destroy(void) g_array_free(mis->postcopy_remote_fds, TRUE); mis->postcopy_remote_fds = NULL; } - if (mis->transport_cleanup) { - mis->transport_cleanup(mis->transport_data); - } + migration_incoming_transport_cleanup(mis); qemu_event_reset(&mis->main_thread_load_event); if (mis->page_requested) { @@ -288,9 +381,10 @@ void migration_incoming_state_destroy(void) mis->page_requested = NULL; } - if (mis->socket_address_list) { - qapi_free_SocketAddressList(mis->socket_address_list); - mis->socket_address_list = NULL; + if (mis->postcopy_qemufile_dst) { + migration_ioc_unregister_yank_from_file(mis->postcopy_qemufile_dst); + qemu_fclose(mis->postcopy_qemufile_dst); + mis->postcopy_qemufile_dst = NULL; } yank_unregister_instance(MIGRATION_YANK_INSTANCE); @@ -298,21 +392,11 @@ void migration_incoming_state_destroy(void) static void migrate_generate_event(int new_state) { - if (migrate_use_events()) { + if (migrate_events()) { qapi_event_send_migration(new_state); } } -static bool migrate_late_block_activate(void) -{ - MigrationState *s; - - s = migrate_get_current(); - - return s->enabled_capabilities[ - MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE]; -} - /* * Send a message on the return channel back to the source * of the migration. @@ -338,12 +422,7 @@ static int migrate_send_rp_message(MigrationIncomingState *mis, qemu_put_be16(mis->to_src_file, (unsigned int)message_type); qemu_put_be16(mis->to_src_file, len); qemu_put_buffer(mis->to_src_file, data, len); - qemu_fflush(mis->to_src_file); - - /* It's possible that qemu file got error during sending */ - ret = qemu_file_get_error(mis->to_src_file); - - return ret; + return qemu_fflush(mis->to_src_file); } /* Request one page from the source VM at the given start address. @@ -391,7 +470,7 @@ int migrate_send_rp_message_req_pages(MigrationIncomingState *mis, int migrate_send_rp_req_pages(MigrationIncomingState *mis, RAMBlock *rb, ram_addr_t start, uint64_t haddr) { - void *aligned = (void *)(uintptr_t)(haddr & (-qemu_ram_pagesize(rb))); + void *aligned = (void *)(uintptr_t)ROUND_DOWN(haddr, qemu_ram_pagesize(rb)); bool received = false; WITH_QEMU_LOCK_GUARD(&mis->page_request_mutex) { @@ -403,7 +482,7 @@ int migrate_send_rp_req_pages(MigrationIncomingState *mis, * things like g_tree_lookup() will return TRUE (1) when found. */ g_tree_insert(mis->page_requested, aligned, (gpointer)1); - mis->page_requested_count++; + qatomic_inc(&mis->page_requested_count); trace_postcopy_page_req_add(aligned, mis->page_requested_count); } } @@ -433,6 +512,18 @@ void migration_incoming_disable_colo(void) int migration_incoming_enable_colo(void) { +#ifndef CONFIG_REPLICATION + error_report("ENABLE_COLO command come in migration stream, but COLO " + "module is not built in"); + return -ENOTSUP; +#endif + + if (!migrate_colo()) { + error_report("ENABLE_COLO command come in migration stream, but c-colo " + "capability is not set"); + return -EINVAL; + } + if (ram_block_discard_disable(true)) { error_report("COLO: cannot disable RAM discard"); return -EBUSY; @@ -449,23 +540,131 @@ void migrate_add_address(SocketAddress *address) QAPI_CLONE(SocketAddress, address)); } -static void qemu_start_incoming_migration(const char *uri, Error **errp) +bool migrate_uri_parse(const char *uri, MigrationChannel **channel, + Error **errp) { - const char *p = NULL; + g_autoptr(MigrationChannel) val = g_new0(MigrationChannel, 1); + g_autoptr(MigrationAddress) addr = g_new0(MigrationAddress, 1); + InetSocketAddress *isock = &addr->u.rdma; + strList **tail = &addr->u.exec.args; - qapi_event_send_migration(MIGRATION_STATUS_SETUP); - if (strstart(uri, "tcp:", &p) || - strstart(uri, "unix:", NULL) || - strstart(uri, "vsock:", NULL)) { - socket_start_incoming_migration(p ? p : uri, errp); + if (strstart(uri, "exec:", NULL)) { + addr->transport = MIGRATION_ADDRESS_TYPE_EXEC; +#ifdef WIN32 + QAPI_LIST_APPEND(tail, g_strdup(exec_get_cmd_path())); + QAPI_LIST_APPEND(tail, g_strdup("/c")); +#else + QAPI_LIST_APPEND(tail, g_strdup("/bin/sh")); + QAPI_LIST_APPEND(tail, g_strdup("-c")); +#endif + QAPI_LIST_APPEND(tail, g_strdup(uri + strlen("exec:"))); + } else if (strstart(uri, "rdma:", NULL)) { + if (inet_parse(isock, uri + strlen("rdma:"), errp)) { + qapi_free_InetSocketAddress(isock); + return false; + } + addr->transport = MIGRATION_ADDRESS_TYPE_RDMA; + } else if (strstart(uri, "tcp:", NULL) || + strstart(uri, "unix:", NULL) || + strstart(uri, "vsock:", NULL) || + strstart(uri, "fd:", NULL)) { + addr->transport = MIGRATION_ADDRESS_TYPE_SOCKET; + SocketAddress *saddr = socket_parse(uri, errp); + if (!saddr) { + return false; + } + addr->u.socket.type = saddr->type; + addr->u.socket.u = saddr->u; + /* Don't free the objects inside; their ownership moved to "addr" */ + g_free(saddr); + } else if (strstart(uri, "file:", NULL)) { + addr->transport = MIGRATION_ADDRESS_TYPE_FILE; + addr->u.file.filename = g_strdup(uri + strlen("file:")); + if (file_parse_offset(addr->u.file.filename, &addr->u.file.offset, + errp)) { + return false; + } + } else { + error_setg(errp, "unknown migration protocol: %s", uri); + return false; + } + + val->channel_type = MIGRATION_CHANNEL_TYPE_MAIN; + val->addr = g_steal_pointer(&addr); + *channel = g_steal_pointer(&val); + return true; +} + +static void qemu_start_incoming_migration(const char *uri, bool has_channels, + MigrationChannelList *channels, + Error **errp) +{ + g_autoptr(MigrationChannel) channel = NULL; + MigrationAddress *addr = NULL; + MigrationIncomingState *mis = migration_incoming_get_current(); + + /* + * Having preliminary checks for uri and channel + */ + if (!uri == !channels) { + error_setg(errp, "need either 'uri' or 'channels' argument"); + return; + } + + if (channels) { + /* To verify that Migrate channel list has only item */ + if (channels->next) { + error_setg(errp, "Channel list has more than one entries"); + return; + } + addr = channels->value->addr; + } + + if (uri) { + /* caller uses the old URI syntax */ + if (!migrate_uri_parse(uri, &channel, errp)) { + return; + } + addr = channel->addr; + } + + /* transport mechanism not suitable for migration? */ + if (!migration_channels_and_transport_compatible(addr, errp)) { + return; + } + + migrate_set_state(&mis->state, MIGRATION_STATUS_NONE, + MIGRATION_STATUS_SETUP); + + if (addr->transport == MIGRATION_ADDRESS_TYPE_SOCKET) { + SocketAddress *saddr = &addr->u.socket; + if (saddr->type == SOCKET_ADDRESS_TYPE_INET || + saddr->type == SOCKET_ADDRESS_TYPE_UNIX || + saddr->type == SOCKET_ADDRESS_TYPE_VSOCK) { + socket_start_incoming_migration(saddr, errp); + } else if (saddr->type == SOCKET_ADDRESS_TYPE_FD) { + fd_start_incoming_migration(saddr->u.fd.str, errp); + } #ifdef CONFIG_RDMA - } else if (strstart(uri, "rdma:", &p)) { - rdma_start_incoming_migration(p, errp); + } else if (addr->transport == MIGRATION_ADDRESS_TYPE_RDMA) { + if (migrate_compress()) { + error_setg(errp, "RDMA and compression can't be used together"); + return; + } + if (migrate_xbzrle()) { + error_setg(errp, "RDMA and XBZRLE can't be used together"); + return; + } + if (migrate_multifd()) { + error_setg(errp, "RDMA and multifd can't be used together"); + return; + } + rdma_start_incoming_migration(&addr->u.rdma, errp); #endif - } else if (strstart(uri, "exec:", &p)) { - exec_start_incoming_migration(p, errp); - } else if (strstart(uri, "fd:", &p)) { - fd_start_incoming_migration(p, errp); + } else if (addr->transport == MIGRATION_ADDRESS_TYPE_EXEC) { + exec_start_incoming_migration(addr->u.exec.args, errp); + } else if (addr->transport == MIGRATION_ADDRESS_TYPE_FILE) { + file_start_incoming_migration(&addr->u.file, errp); } else { error_setg(errp, "unknown migration protocol: %s", uri); } @@ -476,6 +675,8 @@ static void process_incoming_migration_bh(void *opaque) Error *local_err = NULL; MigrationIncomingState *mis = opaque; + trace_vmstate_downtime_checkpoint("dst-precopy-bh-enter"); + /* If capability late_block_activate is set: * Only fire up the block code now if we're going to restart the * VM, else 'cont' will do it. @@ -484,10 +685,10 @@ static void process_incoming_migration_bh(void *opaque) */ if (!migrate_late_block_activate() || (autostart && (!global_state_received() || - global_state_get_runstate() == RUN_STATE_RUNNING))) { - /* Make sure all file formats flush their mutable metadata. + runstate_is_live(global_state_get_runstate())))) { + /* Make sure all file formats throw away their mutable metadata. * If we get an error here, just don't restart the VM yet. */ - bdrv_invalidate_cache_all(&local_err); + bdrv_activate_all(&local_err); if (local_err) { error_report_err(local_err); local_err = NULL; @@ -501,18 +702,14 @@ static void process_incoming_migration_bh(void *opaque) */ qemu_announce_self(&mis->announce_timer, migrate_announce_params()); - if (multifd_load_cleanup(&local_err) != 0) { - error_report_err(local_err); - autostart = false; - } - /* If global state section was not received or we are in running - state, we need to obey autostart. Any other state is set with - runstate_set. */ + trace_vmstate_downtime_checkpoint("dst-precopy-bh-announced"); + + multifd_recv_shutdown(); dirty_bitmap_mig_before_vm_start(); if (!global_state_received() || - global_state_get_runstate() == RUN_STATE_RUNNING) { + runstate_is_live(global_state_get_runstate())) { if (autostart) { vm_start(); } else { @@ -524,6 +721,7 @@ static void process_incoming_migration_bh(void *opaque) } else { runstate_set(global_state_get_runstate()); } + trace_vmstate_downtime_checkpoint("dst-precopy-bh-vm-started"); /* * This must happen after any state changes since as soon as an external * observer sees this event they might start to prod at the VM assuming @@ -531,24 +729,33 @@ static void process_incoming_migration_bh(void *opaque) */ migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, MIGRATION_STATUS_COMPLETED); - qemu_bh_delete(mis->bh); migration_incoming_state_destroy(); } -static void process_incoming_migration_co(void *opaque) +static void coroutine_fn +process_incoming_migration_co(void *opaque) { MigrationIncomingState *mis = migration_incoming_get_current(); PostcopyState ps; int ret; - Error *local_err = NULL; assert(mis->from_src_file); - mis->migration_incoming_co = qemu_coroutine_self(); + + if (compress_threads_load_setup(mis->from_src_file)) { + error_report("Failed to setup decompress threads"); + goto fail; + } + mis->largest_page_size = qemu_ram_pagesize_largest(); postcopy_state_set(POSTCOPY_INCOMING_NONE); - migrate_set_state(&mis->state, MIGRATION_STATUS_NONE, + migrate_set_state(&mis->state, MIGRATION_STATUS_SETUP, MIGRATION_STATUS_ACTIVE); + + mis->loadvm_co = qemu_coroutine_self(); ret = qemu_loadvm_state(mis->from_src_file); + mis->loadvm_co = NULL; + + trace_vmstate_downtime_checkpoint("dst-precopy-loadvm-completed"); ps = postcopy_state_get(); trace_process_incoming_migration_co_end(ret, ps); @@ -571,65 +778,47 @@ static void process_incoming_migration_co(void *opaque) /* Else if something went wrong then just fall out of the normal exit */ } - /* we get COLO info, and know if we are in COLO mode */ - if (!ret && migration_incoming_colo_enabled()) { - /* Make sure all file formats flush their mutable metadata */ - bdrv_invalidate_cache_all(&local_err); - if (local_err) { - error_report_err(local_err); - goto fail; - } - - qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming", - colo_process_incoming_thread, mis, QEMU_THREAD_JOINABLE); - mis->have_colo_incoming_thread = true; - qemu_coroutine_yield(); + if (ret < 0) { + MigrationState *s = migrate_get_current(); - /* Wait checkpoint incoming thread exit before free resource */ - qemu_thread_join(&mis->colo_incoming_thread); - /* We hold the global iothread lock, so it is safe here */ - colo_release_ram_cache(); + if (migrate_has_error(s)) { + WITH_QEMU_LOCK_GUARD(&s->error_mutex) { + error_report_err(s->error); + } + } + error_report("load of migration failed: %s", strerror(-ret)); + goto fail; } - if (ret < 0) { - error_report("load of migration failed: %s", strerror(-ret)); + if (colo_incoming_co() < 0) { goto fail; } - mis->bh = qemu_bh_new(process_incoming_migration_bh, mis); - qemu_bh_schedule(mis->bh); - mis->migration_incoming_co = NULL; + + migration_bh_schedule(process_incoming_migration_bh, mis); return; fail: - local_err = NULL; migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, MIGRATION_STATUS_FAILED); qemu_fclose(mis->from_src_file); - if (multifd_load_cleanup(&local_err) != 0) { - error_report_err(local_err); - } + + multifd_recv_cleanup(); + compress_threads_load_cleanup(); + exit(EXIT_FAILURE); } /** * migration_incoming_setup: Setup incoming migration * @f: file for main migration channel - * @errp: where to put errors - * - * Returns: %true on success, %false on error. */ -static bool migration_incoming_setup(QEMUFile *f, Error **errp) +static void migration_incoming_setup(QEMUFile *f) { MigrationIncomingState *mis = migration_incoming_get_current(); - if (multifd_load_setup(errp) != 0) { - return false; - } - if (!mis->from_src_file) { mis->from_src_file = f; } qemu_file_set_blocking(f, false); - return true; } void migration_incoming_process(void) @@ -639,28 +828,29 @@ void migration_incoming_process(void) } /* Returns true if recovered from a paused migration, otherwise false */ -static bool postcopy_try_recover(QEMUFile *f) +static bool postcopy_try_recover(void) { MigrationIncomingState *mis = migration_incoming_get_current(); if (mis->state == MIGRATION_STATUS_POSTCOPY_PAUSED) { /* Resumed from a paused postcopy migration */ - mis->from_src_file = f; + /* This should be set already in migration_incoming_setup() */ + assert(mis->from_src_file); /* Postcopy has standalone thread to do vm load */ - qemu_file_set_blocking(f, true); + qemu_file_set_blocking(mis->from_src_file, true); /* Re-configure the return path */ - mis->to_src_file = qemu_file_get_return_path(f); + mis->to_src_file = qemu_file_get_return_path(mis->from_src_file); migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_PAUSED, MIGRATION_STATUS_POSTCOPY_RECOVER); /* * Here, we only wake up the main loading thread (while the - * fault thread will still be waiting), so that we can receive + * rest threads will still be waiting), so that we can receive * commands from source now, and answer it if needed. The - * fault thread will be woken up afterwards until we are sure + * rest threads will be woken up afterwards until we are sure * that source is ready to reply to page requests. */ qemu_sem_post(&mis->postcopy_pause_sem_dst); @@ -670,53 +860,102 @@ static bool postcopy_try_recover(QEMUFile *f) return false; } -void migration_fd_process_incoming(QEMUFile *f, Error **errp) +void migration_fd_process_incoming(QEMUFile *f) { - if (postcopy_try_recover(f)) { + migration_incoming_setup(f); + if (postcopy_try_recover()) { return; } + migration_incoming_process(); +} - if (!migration_incoming_setup(f, errp)) { - return; +/* + * Returns true when we want to start a new incoming migration process, + * false otherwise. + */ +static bool migration_should_start_incoming(bool main_channel) +{ + /* Multifd doesn't start unless all channels are established */ + if (migrate_multifd()) { + return migration_has_all_channels(); } - migration_incoming_process(); + + /* Preempt channel only starts when the main channel is created */ + if (migrate_postcopy_preempt()) { + return main_channel; + } + + /* + * For all the rest types of migration, we should only reach here when + * it's the main channel that's being created, and we should always + * proceed with this channel. + */ + assert(main_channel); + return true; } void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) { MigrationIncomingState *mis = migration_incoming_get_current(); Error *local_err = NULL; - bool start_migration; + QEMUFile *f; + bool default_channel = true; + uint32_t channel_magic = 0; + int ret = 0; - if (!mis->from_src_file) { - /* The first connection (multifd may have multiple) */ - QEMUFile *f = qemu_fopen_channel_input(ioc); + if (migrate_multifd() && !migrate_mapped_ram() && + !migrate_postcopy_ram() && + qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) { + /* + * With multiple channels, it is possible that we receive channels + * out of order on destination side, causing incorrect mapping of + * source channels on destination side. Check channel MAGIC to + * decide type of channel. Please note this is best effort, postcopy + * preempt channel does not send any magic number so avoid it for + * postcopy live migration. Also tls live migration already does + * tls handshake while initializing main channel so with tls this + * issue is not possible. + */ + ret = migration_channel_read_peek(ioc, (void *)&channel_magic, + sizeof(channel_magic), errp); - /* If it's a recovery, we're done */ - if (postcopy_try_recover(f)) { + if (ret != 0) { return; } - if (!migration_incoming_setup(f, errp)) { - return; - } + default_channel = (channel_magic == cpu_to_be32(QEMU_VM_FILE_MAGIC)); + } else { + default_channel = !mis->from_src_file; + } - /* - * Common migration only needs one channel, so we can start - * right now. Multifd needs more than one channel, we wait. - */ - start_migration = !migrate_use_multifd(); + if (multifd_recv_setup(errp) != 0) { + return; + } + + if (default_channel) { + f = qemu_file_new_input(ioc); + migration_incoming_setup(f); } else { /* Multiple connections */ - assert(migrate_use_multifd()); - start_migration = multifd_recv_new_channel(ioc, &local_err); + assert(migration_needs_multiple_sockets()); + if (migrate_multifd()) { + multifd_recv_new_channel(ioc, &local_err); + } else { + assert(migrate_postcopy_preempt()); + f = qemu_file_new_input(ioc); + postcopy_preempt_new_channel(mis, f); + } if (local_err) { error_propagate(errp, local_err); return; } } - if (start_migration) { + if (migration_should_start_incoming(default_channel)) { + /* If it's a recovery, we're done */ + if (postcopy_try_recover()) { + return; + } migration_incoming_process(); } } @@ -730,11 +969,25 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) bool migration_has_all_channels(void) { MigrationIncomingState *mis = migration_incoming_get_current(); - bool all_channels; - all_channels = multifd_recv_all_channels_created(); + if (!mis->from_src_file) { + return false; + } + + if (migrate_multifd()) { + return multifd_recv_all_channels_created(); + } - return all_channels && mis->from_src_file != NULL; + if (migrate_postcopy_preempt()) { + return mis->postcopy_qemufile_dst != NULL; + } + + return true; +} + +int migrate_send_rp_switchover_ack(MigrationIncomingState *mis) +{ + return migrate_send_rp_message(mis, MIG_RP_MSG_SWITCHOVER_ACK, 0, NULL); } /* @@ -811,120 +1064,15 @@ void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value) migrate_send_rp_message(mis, MIG_RP_MSG_RESUME_ACK, sizeof(buf), &buf); } -MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp) -{ - MigrationCapabilityStatusList *head = NULL, **tail = &head; - MigrationCapabilityStatus *caps; - MigrationState *s = migrate_get_current(); - int i; - - for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) { -#ifndef CONFIG_LIVE_BLOCK_MIGRATION - if (i == MIGRATION_CAPABILITY_BLOCK) { - continue; - } -#endif - caps = g_malloc0(sizeof(*caps)); - caps->capability = i; - caps->state = s->enabled_capabilities[i]; - QAPI_LIST_APPEND(tail, caps); - } - - return head; -} - -MigrationParameters *qmp_query_migrate_parameters(Error **errp) -{ - MigrationParameters *params; - MigrationState *s = migrate_get_current(); - - /* TODO use QAPI_CLONE() instead of duplicating it inline */ - params = g_malloc0(sizeof(*params)); - params->has_compress_level = true; - params->compress_level = s->parameters.compress_level; - params->has_compress_threads = true; - params->compress_threads = s->parameters.compress_threads; - params->has_compress_wait_thread = true; - params->compress_wait_thread = s->parameters.compress_wait_thread; - params->has_decompress_threads = true; - params->decompress_threads = s->parameters.decompress_threads; - params->has_throttle_trigger_threshold = true; - params->throttle_trigger_threshold = s->parameters.throttle_trigger_threshold; - params->has_cpu_throttle_initial = true; - params->cpu_throttle_initial = s->parameters.cpu_throttle_initial; - params->has_cpu_throttle_increment = true; - params->cpu_throttle_increment = s->parameters.cpu_throttle_increment; - params->has_cpu_throttle_tailslow = true; - params->cpu_throttle_tailslow = s->parameters.cpu_throttle_tailslow; - params->has_tls_creds = true; - params->tls_creds = g_strdup(s->parameters.tls_creds); - params->has_tls_hostname = true; - params->tls_hostname = g_strdup(s->parameters.tls_hostname); - params->has_tls_authz = true; - params->tls_authz = g_strdup(s->parameters.tls_authz ? - s->parameters.tls_authz : ""); - params->has_max_bandwidth = true; - params->max_bandwidth = s->parameters.max_bandwidth; - params->has_downtime_limit = true; - params->downtime_limit = s->parameters.downtime_limit; - params->has_x_checkpoint_delay = true; - params->x_checkpoint_delay = s->parameters.x_checkpoint_delay; - params->has_block_incremental = true; - params->block_incremental = s->parameters.block_incremental; - params->has_multifd_channels = true; - params->multifd_channels = s->parameters.multifd_channels; - params->has_multifd_compression = true; - params->multifd_compression = s->parameters.multifd_compression; - params->has_multifd_zlib_level = true; - params->multifd_zlib_level = s->parameters.multifd_zlib_level; - params->has_multifd_zstd_level = true; - params->multifd_zstd_level = s->parameters.multifd_zstd_level; - params->has_xbzrle_cache_size = true; - params->xbzrle_cache_size = s->parameters.xbzrle_cache_size; - params->has_max_postcopy_bandwidth = true; - params->max_postcopy_bandwidth = s->parameters.max_postcopy_bandwidth; - params->has_max_cpu_throttle = true; - params->max_cpu_throttle = s->parameters.max_cpu_throttle; - params->has_announce_initial = true; - params->announce_initial = s->parameters.announce_initial; - params->has_announce_max = true; - params->announce_max = s->parameters.announce_max; - params->has_announce_rounds = true; - params->announce_rounds = s->parameters.announce_rounds; - params->has_announce_step = true; - params->announce_step = s->parameters.announce_step; - - if (s->parameters.has_block_bitmap_mapping) { - params->has_block_bitmap_mapping = true; - params->block_bitmap_mapping = - QAPI_CLONE(BitmapMigrationNodeAliasList, - s->parameters.block_bitmap_mapping); - } - - return params; -} - -AnnounceParameters *migrate_announce_params(void) -{ - static AnnounceParameters ap; - - MigrationState *s = migrate_get_current(); - - ap.initial = s->parameters.announce_initial; - ap.max = s->parameters.announce_max; - ap.rounds = s->parameters.announce_rounds; - ap.step = s->parameters.announce_step; - - return ≈ -} - /* * Return true if we're already in the middle of a migration * (i.e. any of the active or setup states) */ -bool migration_is_setup_or_active(int state) +bool migration_is_setup_or_active(void) { - switch (state) { + MigrationState *s = current_migration; + + switch (s->state) { case MIGRATION_STATUS_ACTIVE: case MIGRATION_STATUS_POSTCOPY_ACTIVE: case MIGRATION_STATUS_POSTCOPY_PAUSED: @@ -942,9 +1090,11 @@ bool migration_is_setup_or_active(int state) } } -bool migration_is_running(int state) +bool migration_is_running(void) { - switch (state) { + MigrationState *s = current_migration; + + switch (s->state) { case MIGRATION_STATUS_ACTIVE: case MIGRATION_STATUS_POSTCOPY_ACTIVE: case MIGRATION_STATUS_POSTCOPY_PAUSED: @@ -962,20 +1112,30 @@ bool migration_is_running(int state) } } +static bool migrate_show_downtime(MigrationState *s) +{ + return (s->state == MIGRATION_STATUS_COMPLETED) || migration_in_postcopy(); +} + static void populate_time_info(MigrationInfo *info, MigrationState *s) { info->has_status = true; info->has_setup_time = true; info->setup_time = s->setup_time; + if (s->state == MIGRATION_STATUS_COMPLETED) { info->has_total_time = true; info->total_time = s->total_time; - info->has_downtime = true; - info->downtime = s->downtime; } else { info->has_total_time = true; info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - s->start_time; + } + + if (migrate_show_downtime(s)) { + info->has_downtime = true; + info->downtime = s->downtime; + } else { info->has_expected_downtime = true; info->expected_downtime = s->expected_downtime; } @@ -983,25 +1143,31 @@ static void populate_time_info(MigrationInfo *info, MigrationState *s) static void populate_ram_info(MigrationInfo *info, MigrationState *s) { - info->has_ram = true; + size_t page_size = qemu_target_page_size(); + info->ram = g_malloc0(sizeof(*info->ram)); - info->ram->transferred = ram_counters.transferred; + info->ram->transferred = migration_transferred_bytes(); info->ram->total = ram_bytes_total(); - info->ram->duplicate = ram_counters.duplicate; + info->ram->duplicate = stat64_get(&mig_stats.zero_pages); /* legacy value. It is not used anymore */ info->ram->skipped = 0; - info->ram->normal = ram_counters.normal; - info->ram->normal_bytes = ram_counters.normal * - qemu_target_page_size(); + info->ram->normal = stat64_get(&mig_stats.normal_pages); + info->ram->normal_bytes = info->ram->normal * page_size; info->ram->mbps = s->mbps; - info->ram->dirty_sync_count = ram_counters.dirty_sync_count; - info->ram->postcopy_requests = ram_counters.postcopy_requests; - info->ram->page_size = qemu_target_page_size(); - info->ram->multifd_bytes = ram_counters.multifd_bytes; + info->ram->dirty_sync_count = + stat64_get(&mig_stats.dirty_sync_count); + info->ram->dirty_sync_missed_zero_copy = + stat64_get(&mig_stats.dirty_sync_missed_zero_copy); + info->ram->postcopy_requests = + stat64_get(&mig_stats.postcopy_requests); + info->ram->page_size = page_size; + info->ram->multifd_bytes = stat64_get(&mig_stats.multifd_bytes); info->ram->pages_per_second = s->pages_per_second; + info->ram->precopy_bytes = stat64_get(&mig_stats.precopy_bytes); + info->ram->downtime_bytes = stat64_get(&mig_stats.downtime_bytes); + info->ram->postcopy_bytes = stat64_get(&mig_stats.postcopy_bytes); - if (migrate_use_xbzrle()) { - info->has_xbzrle_cache = true; + if (migrate_xbzrle()) { info->xbzrle_cache = g_malloc0(sizeof(*info->xbzrle_cache)); info->xbzrle_cache->cache_size = migrate_xbzrle_cache_size(); info->xbzrle_cache->bytes = xbzrle_counters.bytes; @@ -1012,17 +1178,7 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) info->xbzrle_cache->overflow = xbzrle_counters.overflow; } - if (migrate_use_compression()) { - info->has_compression = true; - info->compression = g_malloc0(sizeof(*info->compression)); - info->compression->pages = compression_counters.pages; - info->compression->busy = compression_counters.busy; - info->compression->busy_rate = compression_counters.busy_rate; - info->compression->compressed_size = - compression_counters.compressed_size; - info->compression->compression_rate = - compression_counters.compression_rate; - } + populate_compress(info); if (cpu_throttle_active()) { info->has_cpu_throttle_percentage = true; @@ -1031,14 +1187,23 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) if (s->state != MIGRATION_STATUS_COMPLETED) { info->ram->remaining = ram_bytes_remaining(); - info->ram->dirty_pages_rate = ram_counters.dirty_pages_rate; + info->ram->dirty_pages_rate = + stat64_get(&mig_stats.dirty_pages_rate); + } + + if (migrate_dirty_limit() && dirtylimit_in_service()) { + info->has_dirty_limit_throttle_time_per_round = true; + info->dirty_limit_throttle_time_per_round = + dirtylimit_throttle_time_per_round(); + + info->has_dirty_limit_ring_full_time = true; + info->dirty_limit_ring_full_time = dirtylimit_ring_full_time(); } } static void populate_disk_info(MigrationInfo *info) { if (blk_mig_active()) { - info->has_disk = true; info->disk = g_malloc0(sizeof(*info->disk)); info->disk->transferred = blk_mig_bytes_transferred(); info->disk->remaining = blk_mig_bytes_remaining(); @@ -1049,7 +1214,8 @@ static void populate_disk_info(MigrationInfo *info) static void fill_source_migration_info(MigrationInfo *info) { MigrationState *s = migrate_get_current(); - GSList *cur_blocker = migration_blockers; + int state = qatomic_read(&s->state); + GSList *cur_blocker = migration_blockers[migrate_mode()]; info->blocked_reasons = NULL; @@ -1068,7 +1234,7 @@ static void fill_source_migration_info(MigrationInfo *info) } info->has_blocked_reasons = info->blocked_reasons != NULL; - switch (s->state) { + switch (state) { case MIGRATION_STATUS_NONE: /* no migration has happened ever */ /* do not overwrite destination migration status */ @@ -1088,7 +1254,7 @@ static void fill_source_migration_info(MigrationInfo *info) populate_time_info(info, s); populate_ram_info(info, s); populate_disk_info(info); - populate_vfio_info(info); + migration_populate_vfio_info(info); break; case MIGRATION_STATUS_COLO: info->has_status = true; @@ -1097,14 +1263,10 @@ static void fill_source_migration_info(MigrationInfo *info) case MIGRATION_STATUS_COMPLETED: populate_time_info(info, s); populate_ram_info(info, s); - populate_vfio_info(info); + migration_populate_vfio_info(info); break; case MIGRATION_STATUS_FAILED: info->has_status = true; - if (s->error) { - info->has_error_desc = true; - info->error_desc = g_strdup(error_get_pretty(s->error)); - } break; case MIGRATION_STATUS_CANCELLED: info->has_status = true; @@ -1113,129 +1275,12 @@ static void fill_source_migration_info(MigrationInfo *info) info->has_status = true; break; } - info->status = s->state; -} - -typedef enum WriteTrackingSupport { - WT_SUPPORT_UNKNOWN = 0, - WT_SUPPORT_ABSENT, - WT_SUPPORT_AVAILABLE, - WT_SUPPORT_COMPATIBLE -} WriteTrackingSupport; - -static -WriteTrackingSupport migrate_query_write_tracking(void) -{ - /* Check if kernel supports required UFFD features */ - if (!ram_write_tracking_available()) { - return WT_SUPPORT_ABSENT; - } - /* - * Check if current memory configuration is - * compatible with required UFFD features. - */ - if (!ram_write_tracking_compatible()) { - return WT_SUPPORT_AVAILABLE; - } - - return WT_SUPPORT_COMPATIBLE; -} - -/** - * @migration_caps_check - check capability validity - * - * @cap_list: old capability list, array of bool - * @params: new capabilities to be applied soon - * @errp: set *errp if the check failed, with reason - * - * Returns true if check passed, otherwise false. - */ -static bool migrate_caps_check(bool *cap_list, - MigrationCapabilityStatusList *params, - Error **errp) -{ - MigrationCapabilityStatusList *cap; - bool old_postcopy_cap; - MigrationIncomingState *mis = migration_incoming_get_current(); - - old_postcopy_cap = cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]; - - for (cap = params; cap; cap = cap->next) { - cap_list[cap->value->capability] = cap->value->state; - } - -#ifndef CONFIG_LIVE_BLOCK_MIGRATION - if (cap_list[MIGRATION_CAPABILITY_BLOCK]) { - error_setg(errp, "QEMU compiled without old-style (blk/-b, inc/-i) " - "block migration"); - error_append_hint(errp, "Use drive_mirror+NBD instead.\n"); - return false; - } -#endif - -#ifndef CONFIG_REPLICATION - if (cap_list[MIGRATION_CAPABILITY_X_COLO]) { - error_setg(errp, "QEMU compiled without replication module" - " can't enable COLO"); - error_append_hint(errp, "Please enable replication before COLO.\n"); - return false; - } -#endif - - if (cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]) { - /* This check is reasonably expensive, so only when it's being - * set the first time, also it's only the destination that needs - * special support. - */ - if (!old_postcopy_cap && runstate_check(RUN_STATE_INMIGRATE) && - !postcopy_ram_supported_by_host(mis)) { - /* postcopy_ram_supported_by_host will have emitted a more - * detailed message - */ - error_setg(errp, "Postcopy is not supported"); - return false; - } - - if (cap_list[MIGRATION_CAPABILITY_X_IGNORE_SHARED]) { - error_setg(errp, "Postcopy is not compatible with ignore-shared"); - return false; - } - } + info->status = state; - if (cap_list[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]) { - WriteTrackingSupport wt_support; - int idx; - /* - * Check if 'background-snapshot' capability is supported by - * host kernel and compatible with guest memory configuration. - */ - wt_support = migrate_query_write_tracking(); - if (wt_support < WT_SUPPORT_AVAILABLE) { - error_setg(errp, "Background-snapshot is not supported by host kernel"); - return false; - } - if (wt_support < WT_SUPPORT_COMPATIBLE) { - error_setg(errp, "Background-snapshot is not compatible " - "with guest memory configuration"); - return false; - } - - /* - * Check if there are any migration capabilities - * incompatible with 'background-snapshot'. - */ - for (idx = 0; idx < check_caps_background_snapshot.size; idx++) { - int incomp_cap = check_caps_background_snapshot.caps[idx]; - if (cap_list[incomp_cap]) { - error_setg(errp, - "Background-snapshot is not compatible with %s", - MigrationCapability_str(incomp_cap)); - return false; - } - } + QEMU_LOCK_GUARD(&s->error_mutex); + if (s->error) { + info->error_desc = g_strdup(error_get_pretty(s->error)); } - - return true; } static void fill_destination_migration_info(MigrationInfo *info) @@ -1280,429 +1325,6 @@ MigrationInfo *qmp_query_migrate(Error **errp) return info; } -void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, - Error **errp) -{ - MigrationState *s = migrate_get_current(); - MigrationCapabilityStatusList *cap; - bool cap_list[MIGRATION_CAPABILITY__MAX]; - - if (migration_is_running(s->state)) { - error_setg(errp, QERR_MIGRATION_ACTIVE); - return; - } - - memcpy(cap_list, s->enabled_capabilities, sizeof(cap_list)); - if (!migrate_caps_check(cap_list, params, errp)) { - return; - } - - for (cap = params; cap; cap = cap->next) { - s->enabled_capabilities[cap->value->capability] = cap->value->state; - } -} - -/* - * Check whether the parameters are valid. Error will be put into errp - * (if provided). Return true if valid, otherwise false. - */ -static bool migrate_params_check(MigrationParameters *params, Error **errp) -{ - if (params->has_compress_level && - (params->compress_level > 9)) { - error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "compress_level", - "a value between 0 and 9"); - return false; - } - - if (params->has_compress_threads && (params->compress_threads < 1)) { - error_setg(errp, QERR_INVALID_PARAMETER_VALUE, - "compress_threads", - "a value between 1 and 255"); - return false; - } - - if (params->has_decompress_threads && (params->decompress_threads < 1)) { - error_setg(errp, QERR_INVALID_PARAMETER_VALUE, - "decompress_threads", - "a value between 1 and 255"); - return false; - } - - if (params->has_throttle_trigger_threshold && - (params->throttle_trigger_threshold < 1 || - params->throttle_trigger_threshold > 100)) { - error_setg(errp, QERR_INVALID_PARAMETER_VALUE, - "throttle_trigger_threshold", - "an integer in the range of 1 to 100"); - return false; - } - - if (params->has_cpu_throttle_initial && - (params->cpu_throttle_initial < 1 || - params->cpu_throttle_initial > 99)) { - error_setg(errp, QERR_INVALID_PARAMETER_VALUE, - "cpu_throttle_initial", - "an integer in the range of 1 to 99"); - return false; - } - - if (params->has_cpu_throttle_increment && - (params->cpu_throttle_increment < 1 || - params->cpu_throttle_increment > 99)) { - error_setg(errp, QERR_INVALID_PARAMETER_VALUE, - "cpu_throttle_increment", - "an integer in the range of 1 to 99"); - return false; - } - - if (params->has_max_bandwidth && (params->max_bandwidth > SIZE_MAX)) { - error_setg(errp, QERR_INVALID_PARAMETER_VALUE, - "max_bandwidth", - "an integer in the range of 0 to "stringify(SIZE_MAX) - " bytes/second"); - return false; - } - - if (params->has_downtime_limit && - (params->downtime_limit > MAX_MIGRATE_DOWNTIME)) { - error_setg(errp, QERR_INVALID_PARAMETER_VALUE, - "downtime_limit", - "an integer in the range of 0 to " - stringify(MAX_MIGRATE_DOWNTIME)" ms"); - return false; - } - - /* x_checkpoint_delay is now always positive */ - - if (params->has_multifd_channels && (params->multifd_channels < 1)) { - error_setg(errp, QERR_INVALID_PARAMETER_VALUE, - "multifd_channels", - "a value between 1 and 255"); - return false; - } - - if (params->has_multifd_zlib_level && - (params->multifd_zlib_level > 9)) { - error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "multifd_zlib_level", - "a value between 0 and 9"); - return false; - } - - if (params->has_multifd_zstd_level && - (params->multifd_zstd_level > 20)) { - error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "multifd_zstd_level", - "a value between 0 and 20"); - return false; - } - - if (params->has_xbzrle_cache_size && - (params->xbzrle_cache_size < qemu_target_page_size() || - !is_power_of_2(params->xbzrle_cache_size))) { - error_setg(errp, QERR_INVALID_PARAMETER_VALUE, - "xbzrle_cache_size", - "a power of two no less than the target page size"); - return false; - } - - if (params->has_max_cpu_throttle && - (params->max_cpu_throttle < params->cpu_throttle_initial || - params->max_cpu_throttle > 99)) { - error_setg(errp, QERR_INVALID_PARAMETER_VALUE, - "max_cpu_throttle", - "an integer in the range of cpu_throttle_initial to 99"); - return false; - } - - if (params->has_announce_initial && - params->announce_initial > 100000) { - error_setg(errp, QERR_INVALID_PARAMETER_VALUE, - "announce_initial", - "a value between 0 and 100000"); - return false; - } - if (params->has_announce_max && - params->announce_max > 100000) { - error_setg(errp, QERR_INVALID_PARAMETER_VALUE, - "announce_max", - "a value between 0 and 100000"); - return false; - } - if (params->has_announce_rounds && - params->announce_rounds > 1000) { - error_setg(errp, QERR_INVALID_PARAMETER_VALUE, - "announce_rounds", - "a value between 0 and 1000"); - return false; - } - if (params->has_announce_step && - (params->announce_step < 1 || - params->announce_step > 10000)) { - error_setg(errp, QERR_INVALID_PARAMETER_VALUE, - "announce_step", - "a value between 0 and 10000"); - return false; - } - - if (params->has_block_bitmap_mapping && - !check_dirty_bitmap_mig_alias_map(params->block_bitmap_mapping, errp)) { - error_prepend(errp, "Invalid mapping given for block-bitmap-mapping: "); - return false; - } - - return true; -} - -static void migrate_params_test_apply(MigrateSetParameters *params, - MigrationParameters *dest) -{ - *dest = migrate_get_current()->parameters; - - /* TODO use QAPI_CLONE() instead of duplicating it inline */ - - if (params->has_compress_level) { - dest->compress_level = params->compress_level; - } - - if (params->has_compress_threads) { - dest->compress_threads = params->compress_threads; - } - - if (params->has_compress_wait_thread) { - dest->compress_wait_thread = params->compress_wait_thread; - } - - if (params->has_decompress_threads) { - dest->decompress_threads = params->decompress_threads; - } - - if (params->has_throttle_trigger_threshold) { - dest->throttle_trigger_threshold = params->throttle_trigger_threshold; - } - - if (params->has_cpu_throttle_initial) { - dest->cpu_throttle_initial = params->cpu_throttle_initial; - } - - if (params->has_cpu_throttle_increment) { - dest->cpu_throttle_increment = params->cpu_throttle_increment; - } - - if (params->has_cpu_throttle_tailslow) { - dest->cpu_throttle_tailslow = params->cpu_throttle_tailslow; - } - - if (params->has_tls_creds) { - assert(params->tls_creds->type == QTYPE_QSTRING); - dest->tls_creds = params->tls_creds->u.s; - } - - if (params->has_tls_hostname) { - assert(params->tls_hostname->type == QTYPE_QSTRING); - dest->tls_hostname = params->tls_hostname->u.s; - } - - if (params->has_max_bandwidth) { - dest->max_bandwidth = params->max_bandwidth; - } - - if (params->has_downtime_limit) { - dest->downtime_limit = params->downtime_limit; - } - - if (params->has_x_checkpoint_delay) { - dest->x_checkpoint_delay = params->x_checkpoint_delay; - } - - if (params->has_block_incremental) { - dest->block_incremental = params->block_incremental; - } - if (params->has_multifd_channels) { - dest->multifd_channels = params->multifd_channels; - } - if (params->has_multifd_compression) { - dest->multifd_compression = params->multifd_compression; - } - if (params->has_xbzrle_cache_size) { - dest->xbzrle_cache_size = params->xbzrle_cache_size; - } - if (params->has_max_postcopy_bandwidth) { - dest->max_postcopy_bandwidth = params->max_postcopy_bandwidth; - } - if (params->has_max_cpu_throttle) { - dest->max_cpu_throttle = params->max_cpu_throttle; - } - if (params->has_announce_initial) { - dest->announce_initial = params->announce_initial; - } - if (params->has_announce_max) { - dest->announce_max = params->announce_max; - } - if (params->has_announce_rounds) { - dest->announce_rounds = params->announce_rounds; - } - if (params->has_announce_step) { - dest->announce_step = params->announce_step; - } - - if (params->has_block_bitmap_mapping) { - dest->has_block_bitmap_mapping = true; - dest->block_bitmap_mapping = params->block_bitmap_mapping; - } -} - -static void migrate_params_apply(MigrateSetParameters *params, Error **errp) -{ - MigrationState *s = migrate_get_current(); - - /* TODO use QAPI_CLONE() instead of duplicating it inline */ - - if (params->has_compress_level) { - s->parameters.compress_level = params->compress_level; - } - - if (params->has_compress_threads) { - s->parameters.compress_threads = params->compress_threads; - } - - if (params->has_compress_wait_thread) { - s->parameters.compress_wait_thread = params->compress_wait_thread; - } - - if (params->has_decompress_threads) { - s->parameters.decompress_threads = params->decompress_threads; - } - - if (params->has_throttle_trigger_threshold) { - s->parameters.throttle_trigger_threshold = params->throttle_trigger_threshold; - } - - if (params->has_cpu_throttle_initial) { - s->parameters.cpu_throttle_initial = params->cpu_throttle_initial; - } - - if (params->has_cpu_throttle_increment) { - s->parameters.cpu_throttle_increment = params->cpu_throttle_increment; - } - - if (params->has_cpu_throttle_tailslow) { - s->parameters.cpu_throttle_tailslow = params->cpu_throttle_tailslow; - } - - if (params->has_tls_creds) { - g_free(s->parameters.tls_creds); - assert(params->tls_creds->type == QTYPE_QSTRING); - s->parameters.tls_creds = g_strdup(params->tls_creds->u.s); - } - - if (params->has_tls_hostname) { - g_free(s->parameters.tls_hostname); - assert(params->tls_hostname->type == QTYPE_QSTRING); - s->parameters.tls_hostname = g_strdup(params->tls_hostname->u.s); - } - - if (params->has_tls_authz) { - g_free(s->parameters.tls_authz); - assert(params->tls_authz->type == QTYPE_QSTRING); - s->parameters.tls_authz = g_strdup(params->tls_authz->u.s); - } - - if (params->has_max_bandwidth) { - s->parameters.max_bandwidth = params->max_bandwidth; - if (s->to_dst_file && !migration_in_postcopy()) { - qemu_file_set_rate_limit(s->to_dst_file, - s->parameters.max_bandwidth / XFER_LIMIT_RATIO); - } - } - - if (params->has_downtime_limit) { - s->parameters.downtime_limit = params->downtime_limit; - } - - if (params->has_x_checkpoint_delay) { - s->parameters.x_checkpoint_delay = params->x_checkpoint_delay; - if (migration_in_colo_state()) { - colo_checkpoint_notify(s); - } - } - - if (params->has_block_incremental) { - s->parameters.block_incremental = params->block_incremental; - } - if (params->has_multifd_channels) { - s->parameters.multifd_channels = params->multifd_channels; - } - if (params->has_multifd_compression) { - s->parameters.multifd_compression = params->multifd_compression; - } - if (params->has_xbzrle_cache_size) { - s->parameters.xbzrle_cache_size = params->xbzrle_cache_size; - xbzrle_cache_resize(params->xbzrle_cache_size, errp); - } - if (params->has_max_postcopy_bandwidth) { - s->parameters.max_postcopy_bandwidth = params->max_postcopy_bandwidth; - if (s->to_dst_file && migration_in_postcopy()) { - qemu_file_set_rate_limit(s->to_dst_file, - s->parameters.max_postcopy_bandwidth / XFER_LIMIT_RATIO); - } - } - if (params->has_max_cpu_throttle) { - s->parameters.max_cpu_throttle = params->max_cpu_throttle; - } - if (params->has_announce_initial) { - s->parameters.announce_initial = params->announce_initial; - } - if (params->has_announce_max) { - s->parameters.announce_max = params->announce_max; - } - if (params->has_announce_rounds) { - s->parameters.announce_rounds = params->announce_rounds; - } - if (params->has_announce_step) { - s->parameters.announce_step = params->announce_step; - } - - if (params->has_block_bitmap_mapping) { - qapi_free_BitmapMigrationNodeAliasList( - s->parameters.block_bitmap_mapping); - - s->parameters.has_block_bitmap_mapping = true; - s->parameters.block_bitmap_mapping = - QAPI_CLONE(BitmapMigrationNodeAliasList, - params->block_bitmap_mapping); - } -} - -void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp) -{ - MigrationParameters tmp; - - /* TODO Rewrite "" to null instead */ - if (params->has_tls_creds - && params->tls_creds->type == QTYPE_QNULL) { - qobject_unref(params->tls_creds->u.n); - params->tls_creds->type = QTYPE_QSTRING; - params->tls_creds->u.s = strdup(""); - } - /* TODO Rewrite "" to null instead */ - if (params->has_tls_hostname - && params->tls_hostname->type == QTYPE_QNULL) { - qobject_unref(params->tls_hostname->u.n); - params->tls_hostname->type = QTYPE_QSTRING; - params->tls_hostname->u.s = strdup(""); - } - - migrate_params_test_apply(params, &tmp); - - if (!migrate_params_check(&tmp, errp)) { - /* Invalid parameter */ - return; - } - - migrate_params_apply(params, errp); -} - - void qmp_migrate_start_postcopy(Error **errp) { MigrationState *s = migrate_get_current(); @@ -1736,61 +1358,31 @@ void migrate_set_state(int *state, int old_state, int new_state) } } -static MigrationCapabilityStatus *migrate_cap_add(MigrationCapability index, - bool state) -{ - MigrationCapabilityStatus *cap; - - cap = g_new0(MigrationCapabilityStatus, 1); - cap->capability = index; - cap->state = state; - - return cap; -} - -void migrate_set_block_enabled(bool value, Error **errp) -{ - MigrationCapabilityStatusList *cap = NULL; - - QAPI_LIST_PREPEND(cap, migrate_cap_add(MIGRATION_CAPABILITY_BLOCK, value)); - qmp_migrate_set_capabilities(cap, errp); - qapi_free_MigrationCapabilityStatusList(cap); -} - -static void migrate_set_block_incremental(MigrationState *s, bool value) -{ - s->parameters.block_incremental = value; -} - -static void block_cleanup_parameters(MigrationState *s) -{ - if (s->must_remove_block_options) { - /* setting to false can never fail */ - migrate_set_block_enabled(false, &error_abort); - migrate_set_block_incremental(s, false); - s->must_remove_block_options = false; - } -} - static void migrate_fd_cleanup(MigrationState *s) { - qemu_bh_delete(s->cleanup_bh); - s->cleanup_bh = NULL; + MigrationEventType type; + + g_free(s->hostname); + s->hostname = NULL; + json_writer_free(s->vmdesc); + s->vmdesc = NULL; qemu_savevm_state_cleanup(); + close_return_path_on_source(s); + if (s->to_dst_file) { QEMUFile *tmp; trace_migrate_fd_cleanup(); - qemu_mutex_unlock_iothread(); + bql_unlock(); if (s->migration_thread_running) { qemu_thread_join(&s->thread); s->migration_thread_running = false; } - qemu_mutex_lock_iothread(); + bql_lock(); - multifd_save_cleanup(); + multifd_send_shutdown(); qemu_mutex_lock(&s->qemu_file_lock); tmp = s->to_dst_file; s->to_dst_file = NULL; @@ -1803,7 +1395,7 @@ static void migrate_fd_cleanup(MigrationState *s) qemu_fclose(tmp); } - assert(!migration_is_active(s)); + assert(!migration_is_active()); if (s->state == MIGRATION_STATUS_CANCELLING) { migrate_set_state(&s->state, MIGRATION_STATUS_CANCELLING, @@ -1814,26 +1406,16 @@ static void migrate_fd_cleanup(MigrationState *s) /* It is used on info migrate. We can't free it */ error_report_err(error_copy(s->error)); } - notifier_list_notify(&migration_state_notifiers, s); - block_cleanup_parameters(s); + type = migration_has_failed(s) ? MIG_EVENT_PRECOPY_FAILED : + MIG_EVENT_PRECOPY_DONE; + migration_call_notifiers(s, type, NULL); + block_cleanup_parameters(); yank_unregister_instance(MIGRATION_YANK_INSTANCE); } -static void migrate_fd_cleanup_schedule(MigrationState *s) -{ - /* - * Ref the state for bh, because it may be called when - * there're already no other refs - */ - object_ref(OBJECT(s)); - qemu_bh_schedule(s->cleanup_bh); -} - static void migrate_fd_cleanup_bh(void *opaque) { - MigrationState *s = opaque; - migrate_fd_cleanup(s); - object_unref(OBJECT(s)); + migrate_fd_cleanup(opaque); } void migrate_set_error(MigrationState *s, const Error *error) @@ -1844,6 +1426,13 @@ void migrate_set_error(MigrationState *s, const Error *error) } } +bool migrate_has_error(MigrationState *s) +{ + /* The lock is not helpful here, but still follow the rule */ + QEMU_LOCK_GUARD(&s->error_mutex); + return qatomic_read(&s->error); +} + static void migrate_error_free(MigrationState *s) { QEMU_LOCK_GUARD(&s->error_mutex); @@ -1853,7 +1442,7 @@ static void migrate_error_free(MigrationState *s) } } -void migrate_fd_error(MigrationState *s, const Error *error) +static void migrate_fd_error(MigrationState *s, const Error *error) { trace_migrate_fd_error(error_get_pretty(error)); assert(s->to_dst_file == NULL); @@ -1865,7 +1454,7 @@ void migrate_fd_error(MigrationState *s, const Error *error) static void migrate_fd_cancel(MigrationState *s) { int old_state ; - QEMUFile *f = migrate_get_current()->to_dst_file; + trace_migrate_fd_cancel(); WITH_QEMU_LOCK_GUARD(&s->qemu_file_lock) { @@ -1877,7 +1466,7 @@ static void migrate_fd_cancel(MigrationState *s) do { old_state = s->state; - if (!migration_is_running(old_state)) { + if (!migration_is_running()) { break; } /* If the migration is paused, kick it out of the pause */ @@ -1891,16 +1480,18 @@ static void migrate_fd_cancel(MigrationState *s) * If we're unlucky the migration code might be stuck somewhere in a * send/write while the network has failed and is waiting to timeout; * if we've got shutdown(2) available then we can force it to quit. - * The outgoing qemu file gets closed in migrate_fd_cleanup that is - * called in a bh, so there is no race against this cancel. */ - if (s->state == MIGRATION_STATUS_CANCELLING && f) { - qemu_file_shutdown(f); + if (s->state == MIGRATION_STATUS_CANCELLING) { + WITH_QEMU_LOCK_GUARD(&s->qemu_file_lock) { + if (s->to_dst_file) { + qemu_file_shutdown(s->to_dst_file); + } + } } if (s->state == MIGRATION_STATUS_CANCELLING && s->block_inactive) { Error *local_err = NULL; - bdrv_invalidate_cache_all(&local_err); + bdrv_activate_all(&local_err); if (local_err) { error_report_err(local_err); } else { @@ -1909,24 +1500,39 @@ static void migrate_fd_cancel(MigrationState *s) } } -void add_migration_state_change_notifier(Notifier *notify) +void migration_add_notifier_mode(NotifierWithReturn *notify, + MigrationNotifyFunc func, MigMode mode) { - notifier_list_add(&migration_state_notifiers, notify); + notify->notify = (NotifierWithReturnFunc)func; + notifier_with_return_list_add(&migration_state_notifiers[mode], notify); } -void remove_migration_state_change_notifier(Notifier *notify) +void migration_add_notifier(NotifierWithReturn *notify, + MigrationNotifyFunc func) { - notifier_remove(notify); + migration_add_notifier_mode(notify, func, MIG_MODE_NORMAL); } -bool migration_in_setup(MigrationState *s) +void migration_remove_notifier(NotifierWithReturn *notify) { - return s->state == MIGRATION_STATUS_SETUP; + if (notify->notify) { + notifier_with_return_remove(notify); + notify->notify = NULL; + } } -bool migration_has_finished(MigrationState *s) +int migration_call_notifiers(MigrationState *s, MigrationEventType type, + Error **errp) { - return s->state == MIGRATION_STATUS_COMPLETED; + MigMode mode = s->parameters.mode; + MigrationEvent e; + int ret; + + e.type = type; + ret = notifier_with_return_list_notify(&migration_state_notifiers[mode], + &e, errp); + assert(!ret || type == MIG_EVENT_PRECOPY_SETUP); + return ret; } bool migration_has_failed(MigrationState *s) @@ -1949,9 +1555,15 @@ bool migration_in_postcopy(void) } } -bool migration_in_postcopy_after_devices(MigrationState *s) +bool migration_postcopy_is_alive(int state) { - return migration_in_postcopy() && s->postcopy_after_devices; + switch (state) { + case MIGRATION_STATUS_POSTCOPY_ACTIVE: + case MIGRATION_STATUS_POSTCOPY_RECOVER: + return true; + default: + return false; + } } bool migration_in_incoming_postcopy(void) @@ -1961,12 +1573,17 @@ bool migration_in_incoming_postcopy(void) return ps >= POSTCOPY_INCOMING_DISCARD && ps < POSTCOPY_INCOMING_END; } -bool migration_in_bg_snapshot(void) +bool migration_incoming_postcopy_advised(void) { - MigrationState *s = migrate_get_current(); + PostcopyState ps = postcopy_state_get(); + + return ps >= POSTCOPY_INCOMING_ADVISE && ps < POSTCOPY_INCOMING_END; +} +bool migration_in_bg_snapshot(void) +{ return migrate_background_snapshot() && - migration_is_setup_or_active(s->state); + migration_is_setup_or_active(); } bool migration_is_idle(void) @@ -1999,72 +1616,185 @@ bool migration_is_idle(void) return false; } -bool migration_is_active(MigrationState *s) +bool migration_is_active(void) { + MigrationState *s = current_migration; + return (s->state == MIGRATION_STATUS_ACTIVE || s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE); } -void migrate_init(MigrationState *s) +bool migration_is_device(void) +{ + MigrationState *s = current_migration; + + return s->state == MIGRATION_STATUS_DEVICE; +} + +bool migration_thread_is_self(void) { + MigrationState *s = current_migration; + + return qemu_thread_is_self(&s->thread); +} + +bool migrate_mode_is_cpr(MigrationState *s) +{ + return s->parameters.mode == MIG_MODE_CPR_REBOOT; +} + +int migrate_init(MigrationState *s, Error **errp) +{ + int ret; + + ret = qemu_savevm_state_prepare(errp); + if (ret) { + return ret; + } + /* * Reinitialise all migration state, except * parameters/capabilities that the user set, and * locks. */ - s->cleanup_bh = 0; - s->vm_start_bh = 0; s->to_dst_file = NULL; s->state = MIGRATION_STATUS_NONE; s->rp_state.from_dst_file = NULL; - s->rp_state.error = false; s->mbps = 0.0; s->pages_per_second = 0.0; s->downtime = 0; s->expected_downtime = 0; s->setup_time = 0; s->start_postcopy = false; - s->postcopy_after_devices = false; s->migration_thread_running = false; error_free(s->error); s->error = NULL; - s->hostname = NULL; + s->vmdesc = NULL; migrate_set_state(&s->state, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP); s->start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); s->total_time = 0; - s->vm_was_running = false; + s->vm_old_state = -1; s->iteration_initial_bytes = 0; s->threshold_size = 0; + s->switchover_acked = false; + s->rdma_migration = false; + /* + * set mig_stats memory to zero for a new migration + */ + memset(&mig_stats, 0, sizeof(mig_stats)); + migration_reset_vfio_bytes_transferred(); + + return 0; +} + +static bool is_busy(Error **reasonp, Error **errp) +{ + ERRP_GUARD(); + + /* Snapshots are similar to migrations, so check RUN_STATE_SAVE_VM too. */ + if (runstate_check(RUN_STATE_SAVE_VM) || !migration_is_idle()) { + error_propagate_prepend(errp, *reasonp, + "disallowing migration blocker " + "(migration/snapshot in progress) for: "); + *reasonp = NULL; + return true; + } + return false; } -int migrate_add_blocker(Error *reason, Error **errp) +static bool is_only_migratable(Error **reasonp, Error **errp, int modes) { - if (only_migratable) { - error_propagate_prepend(errp, error_copy(reason), + ERRP_GUARD(); + + if (only_migratable && (modes & BIT(MIG_MODE_NORMAL))) { + error_propagate_prepend(errp, *reasonp, "disallowing migration blocker " "(--only-migratable) for: "); - return -EACCES; + *reasonp = NULL; + return true; + } + return false; +} + +static int get_modes(MigMode mode, va_list ap) +{ + int modes = 0; + + while (mode != -1 && mode != MIG_MODE_ALL) { + assert(mode >= MIG_MODE_NORMAL && mode < MIG_MODE__MAX); + modes |= BIT(mode); + mode = va_arg(ap, MigMode); + } + if (mode == MIG_MODE_ALL) { + modes = BIT(MIG_MODE__MAX) - 1; } + return modes; +} - if (migration_is_idle()) { - migration_blockers = g_slist_prepend(migration_blockers, reason); - return 0; +static int add_blockers(Error **reasonp, Error **errp, int modes) +{ + for (MigMode mode = 0; mode < MIG_MODE__MAX; mode++) { + if (modes & BIT(mode)) { + migration_blockers[mode] = g_slist_prepend(migration_blockers[mode], + *reasonp); + } } + return 0; +} + +int migrate_add_blocker(Error **reasonp, Error **errp) +{ + return migrate_add_blocker_modes(reasonp, errp, MIG_MODE_ALL); +} + +int migrate_add_blocker_normal(Error **reasonp, Error **errp) +{ + return migrate_add_blocker_modes(reasonp, errp, MIG_MODE_NORMAL, -1); +} + +int migrate_add_blocker_modes(Error **reasonp, Error **errp, MigMode mode, ...) +{ + int modes; + va_list ap; + + va_start(ap, mode); + modes = get_modes(mode, ap); + va_end(ap); + + if (is_only_migratable(reasonp, errp, modes)) { + return -EACCES; + } else if (is_busy(reasonp, errp)) { + return -EBUSY; + } + return add_blockers(reasonp, errp, modes); +} + +int migrate_add_blocker_internal(Error **reasonp, Error **errp) +{ + int modes = BIT(MIG_MODE__MAX) - 1; - error_propagate_prepend(errp, error_copy(reason), - "disallowing migration blocker " - "(migration in progress) for: "); - return -EBUSY; + if (is_busy(reasonp, errp)) { + return -EBUSY; + } + return add_blockers(reasonp, errp, modes); } -void migrate_del_blocker(Error *reason) +void migrate_del_blocker(Error **reasonp) { - migration_blockers = g_slist_remove(migration_blockers, reason); + if (*reasonp) { + for (MigMode mode = 0; mode < MIG_MODE__MAX; mode++) { + migration_blockers[mode] = g_slist_remove(migration_blockers[mode], + *reasonp); + } + error_free(*reasonp); + *reasonp = NULL; + } } -void qmp_migrate_incoming(const char *uri, Error **errp) +void qmp_migrate_incoming(const char *uri, bool has_channels, + MigrationChannelList *channels, Error **errp) { Error *local_err = NULL; static bool once = true; @@ -2082,7 +1812,7 @@ void qmp_migrate_incoming(const char *uri, Error **errp) return; } - qemu_start_incoming_migration(uri, &local_err); + qemu_start_incoming_migration(uri, has_channels, channels, &local_err); if (local_err) { yank_unregister_instance(MIGRATION_YANK_INSTANCE); @@ -2110,44 +1840,51 @@ void qmp_migrate_recover(const char *uri, Error **errp) return; } - if (qatomic_cmpxchg(&mis->postcopy_recover_triggered, - false, true) == true) { - error_setg(errp, "Migrate recovery is triggered already"); - return; - } + /* If there's an existing transport, release it */ + migration_incoming_transport_cleanup(mis); /* * Note that this call will never start a real migration; it will * only re-setup the migration stream and poke existing migration * to continue using that newly established channel. */ - qemu_start_incoming_migration(uri, errp); - - /* Safe to dereference with the assert above */ - if (*errp) { - /* Reset the flag so user could still retry */ - qatomic_set(&mis->postcopy_recover_triggered, false); - } + qemu_start_incoming_migration(uri, false, NULL, errp); } void qmp_migrate_pause(Error **errp) { MigrationState *ms = migrate_get_current(); MigrationIncomingState *mis = migration_incoming_get_current(); - int ret; + int ret = 0; - if (ms->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) { + if (migration_postcopy_is_alive(ms->state)) { /* Source side, during postcopy */ + Error *error = NULL; + + /* Tell the core migration that we're pausing */ + error_setg(&error, "Postcopy migration is paused by the user"); + migrate_set_error(ms, error); + error_free(error); + qemu_mutex_lock(&ms->qemu_file_lock); - ret = qemu_file_shutdown(ms->to_dst_file); + if (ms->to_dst_file) { + ret = qemu_file_shutdown(ms->to_dst_file); + } qemu_mutex_unlock(&ms->qemu_file_lock); if (ret) { error_setg(errp, "Failed to pause source migration"); } + + /* + * Kick the migration thread out of any waiting windows (on behalf + * of the rp thread). + */ + migration_rp_kick(ms); + return; } - if (mis->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) { + if (migration_postcopy_is_alive(mis->state)) { ret = qemu_file_shutdown(mis->from_src_file); if (ret) { error_setg(errp, "Failed to pause destination migration"); @@ -2156,17 +1893,19 @@ void qmp_migrate_pause(Error **errp) } error_setg(errp, "migrate-pause is currently only supported " - "during postcopy-active state"); + "during postcopy-active or postcopy-recover state"); } bool migration_is_blocked(Error **errp) { + GSList *blockers = migration_blockers[migrate_mode()]; + if (qemu_savevm_state_blocked(errp)) { return true; } - if (migration_blockers) { - error_propagate(errp, error_copy(migration_blockers->data)); + if (blockers) { + error_propagate(errp, error_copy(blockers->data)); return true; } @@ -2177,7 +1916,15 @@ bool migration_is_blocked(Error **errp) static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, bool resume, Error **errp) { - Error *local_err = NULL; + if (blk_inc) { + warn_report("parameter 'inc' is deprecated;" + " use blockdev-mirror with NBD instead"); + } + + if (blk) { + warn_report("parameter 'blk' is deprecated;" + " use blockdev-mirror with NBD instead"); + } if (resume) { if (s->state != MIGRATION_STATUS_POSTCOPY_PAUSED) { @@ -2208,7 +1955,7 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, return true; } - if (migration_is_running(s->state)) { + if (migration_is_running()) { error_setg(errp, QERR_MIGRATION_ACTIVE); return false; } @@ -2224,88 +1971,153 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, return false; } + if (kvm_hwpoisoned_mem()) { + error_setg(errp, "Can't migrate this vm with hardware poisoned memory, " + "please reboot the vm and try again"); + return false; + } + if (migration_is_blocked(errp)) { return false; } + if (migrate_mapped_ram()) { + if (migrate_tls()) { + error_setg(errp, "Cannot use TLS with mapped-ram"); + return false; + } + + if (migrate_multifd_compression()) { + error_setg(errp, "Cannot use compression with mapped-ram"); + return false; + } + } + + if (migrate_mode_is_cpr(s)) { + const char *conflict = NULL; + + if (migrate_postcopy()) { + conflict = "postcopy"; + } else if (migrate_background_snapshot()) { + conflict = "background snapshot"; + } else if (migrate_colo()) { + conflict = "COLO"; + } + + if (conflict) { + error_setg(errp, "Cannot use %s with CPR", conflict); + return false; + } + } + if (blk || blk_inc) { - if (migrate_colo_enabled()) { + if (migrate_colo()) { error_setg(errp, "No disk migration is required in COLO mode"); return false; } - if (migrate_use_block() || migrate_use_block_incremental()) { + if (migrate_block() || migrate_block_incremental()) { error_setg(errp, "Command options are incompatible with " "current migration capabilities"); return false; } - migrate_set_block_enabled(true, &local_err); - if (local_err) { - error_propagate(errp, local_err); + if (!migrate_cap_set(MIGRATION_CAPABILITY_BLOCK, true, errp)) { return false; } s->must_remove_block_options = true; } if (blk_inc) { - migrate_set_block_incremental(s, true); + migrate_set_block_incremental(true); } - migrate_init(s); - /* - * set ram_counters memory to zero for a - * new migration - */ - memset(&ram_counters, 0, sizeof(ram_counters)); + if (migrate_init(s, errp)) { + return false; + } return true; } -void qmp_migrate(const char *uri, bool has_blk, bool blk, +void qmp_migrate(const char *uri, bool has_channels, + MigrationChannelList *channels, bool has_blk, bool blk, bool has_inc, bool inc, bool has_detach, bool detach, bool has_resume, bool resume, Error **errp) { + bool resume_requested; Error *local_err = NULL; MigrationState *s = migrate_get_current(); - const char *p = NULL; + g_autoptr(MigrationChannel) channel = NULL; + MigrationAddress *addr = NULL; + /* + * Having preliminary checks for uri and channel + */ + if (!uri == !channels) { + error_setg(errp, "need either 'uri' or 'channels' argument"); + return; + } + + if (channels) { + /* To verify that Migrate channel list has only item */ + if (channels->next) { + error_setg(errp, "Channel list has more than one entries"); + return; + } + addr = channels->value->addr; + } + + if (uri) { + /* caller uses the old URI syntax */ + if (!migrate_uri_parse(uri, &channel, errp)) { + return; + } + addr = channel->addr; + } + + /* transport mechanism not suitable for migration? */ + if (!migration_channels_and_transport_compatible(addr, errp)) { + return; + } + + resume_requested = has_resume && resume; if (!migrate_prepare(s, has_blk && blk, has_inc && inc, - has_resume && resume, errp)) { + resume_requested, errp)) { /* Error detected, put into errp */ return; } - if (!(has_resume && resume)) { + if (!resume_requested) { if (!yank_register_instance(MIGRATION_YANK_INSTANCE, errp)) { return; } } - if (strstart(uri, "tcp:", &p) || - strstart(uri, "unix:", NULL) || - strstart(uri, "vsock:", NULL)) { - socket_start_outgoing_migration(s, p ? p : uri, &local_err); + if (addr->transport == MIGRATION_ADDRESS_TYPE_SOCKET) { + SocketAddress *saddr = &addr->u.socket; + if (saddr->type == SOCKET_ADDRESS_TYPE_INET || + saddr->type == SOCKET_ADDRESS_TYPE_UNIX || + saddr->type == SOCKET_ADDRESS_TYPE_VSOCK) { + socket_start_outgoing_migration(s, saddr, &local_err); + } else if (saddr->type == SOCKET_ADDRESS_TYPE_FD) { + fd_start_outgoing_migration(s, saddr->u.fd.str, &local_err); + } #ifdef CONFIG_RDMA - } else if (strstart(uri, "rdma:", &p)) { - rdma_start_outgoing_migration(s, p, &local_err); + } else if (addr->transport == MIGRATION_ADDRESS_TYPE_RDMA) { + rdma_start_outgoing_migration(s, &addr->u.rdma, &local_err); #endif - } else if (strstart(uri, "exec:", &p)) { - exec_start_outgoing_migration(s, p, &local_err); - } else if (strstart(uri, "fd:", &p)) { - fd_start_outgoing_migration(s, p, &local_err); + } else if (addr->transport == MIGRATION_ADDRESS_TYPE_EXEC) { + exec_start_outgoing_migration(s, addr->u.exec.args, &local_err); + } else if (addr->transport == MIGRATION_ADDRESS_TYPE_FILE) { + file_start_outgoing_migration(s, &addr->u.file, &local_err); } else { - if (!(has_resume && resume)) { - yank_unregister_instance(MIGRATION_YANK_INSTANCE); - } - error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "uri", + error_setg(&local_err, QERR_INVALID_PARAMETER_VALUE, "uri", "a valid migration protocol"); migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, MIGRATION_STATUS_FAILED); - block_cleanup_parameters(s); - return; + block_cleanup_parameters(); } if (local_err) { - if (!(has_resume && resume)) { + if (!resume_requested) { yank_unregister_instance(MIGRATION_YANK_INSTANCE); } migrate_fd_error(s, local_err); @@ -2316,7 +2128,7 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk, void qmp_migrate_cancel(Error **errp) { - migration_cancel(); + migration_cancel(NULL); } void qmp_migrate_continue(MigrationStatus state, Error **errp) @@ -2330,263 +2142,26 @@ void qmp_migrate_continue(MigrationStatus state, Error **errp) qemu_sem_post(&s->pause_sem); } -bool migrate_release_ram(void) +int migration_rp_wait(MigrationState *s) { - MigrationState *s; - - s = migrate_get_current(); - - return s->enabled_capabilities[MIGRATION_CAPABILITY_RELEASE_RAM]; -} - -bool migrate_postcopy_ram(void) -{ - MigrationState *s; - - s = migrate_get_current(); - - return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM]; -} - -bool migrate_postcopy(void) -{ - return migrate_postcopy_ram() || migrate_dirty_bitmaps(); -} - -bool migrate_auto_converge(void) -{ - MigrationState *s; - - s = migrate_get_current(); - - return s->enabled_capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE]; -} - -bool migrate_zero_blocks(void) -{ - MigrationState *s; - - s = migrate_get_current(); - - return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS]; -} - -bool migrate_postcopy_blocktime(void) -{ - MigrationState *s; - - s = migrate_get_current(); - - return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME]; -} - -bool migrate_use_compression(void) -{ - MigrationState *s; - - s = migrate_get_current(); - - return s->enabled_capabilities[MIGRATION_CAPABILITY_COMPRESS]; -} - -int migrate_compress_level(void) -{ - MigrationState *s; - - s = migrate_get_current(); - - return s->parameters.compress_level; -} - -int migrate_compress_threads(void) -{ - MigrationState *s; - - s = migrate_get_current(); - - return s->parameters.compress_threads; -} - -int migrate_compress_wait_thread(void) -{ - MigrationState *s; - - s = migrate_get_current(); - - return s->parameters.compress_wait_thread; -} - -int migrate_decompress_threads(void) -{ - MigrationState *s; - - s = migrate_get_current(); - - return s->parameters.decompress_threads; -} - -bool migrate_dirty_bitmaps(void) -{ - MigrationState *s; - - s = migrate_get_current(); - - return s->enabled_capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS]; -} - -bool migrate_ignore_shared(void) -{ - MigrationState *s; - - s = migrate_get_current(); - - return s->enabled_capabilities[MIGRATION_CAPABILITY_X_IGNORE_SHARED]; -} - -bool migrate_validate_uuid(void) -{ - MigrationState *s; - - s = migrate_get_current(); - - return s->enabled_capabilities[MIGRATION_CAPABILITY_VALIDATE_UUID]; -} - -bool migrate_use_events(void) -{ - MigrationState *s; - - s = migrate_get_current(); - - return s->enabled_capabilities[MIGRATION_CAPABILITY_EVENTS]; -} - -bool migrate_use_multifd(void) -{ - MigrationState *s; - - s = migrate_get_current(); - - return s->enabled_capabilities[MIGRATION_CAPABILITY_MULTIFD]; -} - -bool migrate_pause_before_switchover(void) -{ - MigrationState *s; - - s = migrate_get_current(); - - return s->enabled_capabilities[ - MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER]; -} - -int migrate_multifd_channels(void) -{ - MigrationState *s; - - s = migrate_get_current(); - - return s->parameters.multifd_channels; -} - -MultiFDCompression migrate_multifd_compression(void) -{ - MigrationState *s; - - s = migrate_get_current(); - - return s->parameters.multifd_compression; -} - -int migrate_multifd_zlib_level(void) -{ - MigrationState *s; - - s = migrate_get_current(); - - return s->parameters.multifd_zlib_level; -} - -int migrate_multifd_zstd_level(void) -{ - MigrationState *s; - - s = migrate_get_current(); - - return s->parameters.multifd_zstd_level; -} - -int migrate_use_xbzrle(void) -{ - MigrationState *s; - - s = migrate_get_current(); - - return s->enabled_capabilities[MIGRATION_CAPABILITY_XBZRLE]; -} - -uint64_t migrate_xbzrle_cache_size(void) -{ - MigrationState *s; - - s = migrate_get_current(); - - return s->parameters.xbzrle_cache_size; -} - -static int64_t migrate_max_postcopy_bandwidth(void) -{ - MigrationState *s; - - s = migrate_get_current(); - - return s->parameters.max_postcopy_bandwidth; -} - -bool migrate_use_block(void) -{ - MigrationState *s; - - s = migrate_get_current(); - - return s->enabled_capabilities[MIGRATION_CAPABILITY_BLOCK]; -} - -bool migrate_use_return_path(void) -{ - MigrationState *s; - - s = migrate_get_current(); - - return s->enabled_capabilities[MIGRATION_CAPABILITY_RETURN_PATH]; -} - -bool migrate_use_block_incremental(void) -{ - MigrationState *s; - - s = migrate_get_current(); - - return s->parameters.block_incremental; -} + /* If migration has failure already, ignore the wait */ + if (migrate_has_error(s)) { + return -1; + } -bool migrate_background_snapshot(void) -{ - MigrationState *s; + qemu_sem_wait(&s->rp_state.rp_sem); - s = migrate_get_current(); + /* After wait, double check that there's no failure */ + if (migrate_has_error(s)) { + return -1; + } - return s->enabled_capabilities[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]; + return 0; } -/* migration thread support */ -/* - * Something bad happened to the RP stream, mark an error - * The caller shall print or trace something to indicate why - */ -static void mark_source_rp_bad(MigrationState *s) +void migration_rp_kick(MigrationState *s) { - s->rp_state.error = true; + qemu_sem_post(&s->rp_state.rp_sem); } static struct rp_cmd_args { @@ -2600,6 +2175,7 @@ static struct rp_cmd_args { [MIG_RP_MSG_REQ_PAGES_ID] = { .len = -1, .name = "REQ_PAGES_ID" }, [MIG_RP_MSG_RECV_BITMAP] = { .len = -1, .name = "RECV_BITMAP" }, [MIG_RP_MSG_RESUME_ACK] = { .len = 4, .name = "RESUME_ACK" }, + [MIG_RP_MSG_SWITCHOVER_ACK] = { .len = 0, .name = "SWITCHOVER_ACK" }, [MIG_RP_MSG_MAX] = { .len = -1, .name = "MAX" }, }; @@ -2608,10 +2184,11 @@ static struct rp_cmd_args { * We're allowed to send more than requested (e.g. to round to our page size) * and we don't need to send pages that have already been sent. */ -static void migrate_handle_rp_req_pages(MigrationState *ms, const char* rbname, - ram_addr_t start, size_t len) +static void +migrate_handle_rp_req_pages(MigrationState *ms, const char* rbname, + ram_addr_t start, size_t len, Error **errp) { - long our_host_ps = qemu_real_host_page_size; + long our_host_ps = qemu_real_host_page_size(); trace_migrate_handle_rp_req_pages(rbname, start, len); @@ -2619,52 +2196,39 @@ static void migrate_handle_rp_req_pages(MigrationState *ms, const char* rbname, * Since we currently insist on matching page sizes, just sanity check * we're being asked for whole host pages. */ - if (start & (our_host_ps - 1) || - (len & (our_host_ps - 1))) { - error_report("%s: Misaligned page request, start: " RAM_ADDR_FMT - " len: %zd", __func__, start, len); - mark_source_rp_bad(ms); + if (!QEMU_IS_ALIGNED(start, our_host_ps) || + !QEMU_IS_ALIGNED(len, our_host_ps)) { + error_setg(errp, "MIG_RP_MSG_REQ_PAGES: Misaligned page request, start:" + RAM_ADDR_FMT " len: %zd", start, len); return; } - if (ram_save_queue_pages(rbname, start, len)) { - mark_source_rp_bad(ms); - } -} - -/* Return true to retry, false to quit */ -static bool postcopy_pause_return_path_thread(MigrationState *s) -{ - trace_postcopy_pause_return_path(); - - qemu_sem_wait(&s->postcopy_pause_rp_sem); - - trace_postcopy_pause_return_path_continued(); - - return true; + ram_save_queue_pages(rbname, start, len, errp); } -static int migrate_handle_rp_recv_bitmap(MigrationState *s, char *block_name) +static bool migrate_handle_rp_recv_bitmap(MigrationState *s, char *block_name, + Error **errp) { RAMBlock *block = qemu_ram_block_by_name(block_name); if (!block) { - error_report("%s: invalid block name '%s'", __func__, block_name); - return -EINVAL; + error_setg(errp, "MIG_RP_MSG_RECV_BITMAP has invalid block name '%s'", + block_name); + return false; } /* Fetch the received bitmap and refresh the dirty bitmap */ - return ram_dirty_bitmap_reload(s, block); + return ram_dirty_bitmap_reload(s, block, errp); } -static int migrate_handle_rp_resume_ack(MigrationState *s, uint32_t value) +static bool migrate_handle_rp_resume_ack(MigrationState *s, + uint32_t value, Error **errp) { trace_source_return_path_thread_resume_ack(value); if (value != MIGRATION_RESUME_ACK_VALUE) { - error_report("%s: illegal resume_ack value %"PRIu32, - __func__, value); - return -1; + error_setg(errp, "illegal resume_ack value %"PRIu32, value); + return false; } /* Now both sides are active. */ @@ -2672,13 +2236,16 @@ static int migrate_handle_rp_resume_ack(MigrationState *s, uint32_t value) MIGRATION_STATUS_POSTCOPY_ACTIVE); /* Notify send thread that time to continue send pages */ - qemu_sem_post(&s->rp_state.rp_sem); + migration_rp_kick(s); - return 0; + return true; } -/* Release ms->rp_state.from_dst_file in a safe way */ -static void migration_release_from_dst_file(MigrationState *ms) +/* + * Release ms->rp_state.from_dst_file (and postcopy_qemufile_src if + * existed) in a safe way. + */ +static void migration_release_dst_files(MigrationState *ms) { QEMUFile *file; @@ -2691,6 +2258,18 @@ static void migration_release_from_dst_file(MigrationState *ms) ms->rp_state.from_dst_file = NULL; } + /* + * Do the same to postcopy fast path socket too if there is. No + * locking needed because this qemufile should only be managed by + * return path thread. + */ + if (ms->postcopy_qemufile_src) { + migration_ioc_unregister_yank_from_file(ms->postcopy_qemufile_src); + qemu_file_shutdown(ms->postcopy_qemufile_src); + qemu_fclose(ms->postcopy_qemufile_src); + ms->postcopy_qemufile_src = NULL; + } + qemu_fclose(file); } @@ -2707,49 +2286,46 @@ static void *source_return_path_thread(void *opaque) uint32_t tmp32, sibling_error; ram_addr_t start = 0; /* =0 to silence warning */ size_t len = 0, expected_len; + Error *err = NULL; int res; trace_source_return_path_thread_entry(); rcu_register_thread(); -retry: - while (!ms->rp_state.error && !qemu_file_get_error(rp) && - migration_is_setup_or_active(ms->state)) { + while (migration_is_setup_or_active()) { trace_source_return_path_thread_loop_top(); + header_type = qemu_get_be16(rp); header_len = qemu_get_be16(rp); if (qemu_file_get_error(rp)) { - mark_source_rp_bad(ms); + qemu_file_get_error_obj(rp, &err); goto out; } if (header_type >= MIG_RP_MSG_MAX || header_type == MIG_RP_MSG_INVALID) { - error_report("RP: Received invalid message 0x%04x length 0x%04x", - header_type, header_len); - mark_source_rp_bad(ms); + error_setg(&err, "Received invalid message 0x%04x length 0x%04x", + header_type, header_len); goto out; } if ((rp_cmd_args[header_type].len != -1 && header_len != rp_cmd_args[header_type].len) || header_len > sizeof(buf)) { - error_report("RP: Received '%s' message (0x%04x) with" - "incorrect length %d expecting %zu", - rp_cmd_args[header_type].name, header_type, header_len, - (size_t)rp_cmd_args[header_type].len); - mark_source_rp_bad(ms); + error_setg(&err, "Received '%s' message (0x%04x) with" + "incorrect length %d expecting %zu", + rp_cmd_args[header_type].name, header_type, header_len, + (size_t)rp_cmd_args[header_type].len); goto out; } /* We know we've got a valid header by this point */ res = qemu_get_buffer(rp, buf, header_len); if (res != header_len) { - error_report("RP: Failed reading data for message 0x%04x" - " read %d expected %d", - header_type, res, header_len); - mark_source_rp_bad(ms); + error_setg(&err, "Failed reading data for message 0x%04x" + " read %d expected %d", + header_type, res, header_len); goto out; } @@ -2759,8 +2335,7 @@ retry: sibling_error = ldl_be_p(buf); trace_source_return_path_thread_shut(sibling_error); if (sibling_error) { - error_report("RP: Sibling indicated error %d", sibling_error); - mark_source_rp_bad(ms); + error_setg(&err, "Sibling indicated error %d", sibling_error); } /* * We'll let the main thread deal with closing the RP @@ -2772,12 +2347,16 @@ retry: case MIG_RP_MSG_PONG: tmp32 = ldl_be_p(buf); trace_source_return_path_thread_pong(tmp32); + qemu_sem_post(&ms->rp_state.rp_pong_acks); break; case MIG_RP_MSG_REQ_PAGES: start = ldq_be_p(buf); len = ldl_be_p(buf + 8); - migrate_handle_rp_req_pages(ms, NULL, start, len); + migrate_handle_rp_req_pages(ms, NULL, start, len, &err); + if (err) { + goto out; + } break; case MIG_RP_MSG_REQ_PAGES_ID: @@ -2792,74 +2371,74 @@ retry: expected_len += tmp32; } if (header_len != expected_len) { - error_report("RP: Req_Page_id with length %d expecting %zd", - header_len, expected_len); - mark_source_rp_bad(ms); + error_setg(&err, "Req_Page_id with length %d expecting %zd", + header_len, expected_len); + goto out; + } + migrate_handle_rp_req_pages(ms, (char *)&buf[13], start, len, + &err); + if (err) { goto out; } - migrate_handle_rp_req_pages(ms, (char *)&buf[13], start, len); break; case MIG_RP_MSG_RECV_BITMAP: if (header_len < 1) { - error_report("%s: missing block name", __func__); - mark_source_rp_bad(ms); + error_setg(&err, "MIG_RP_MSG_RECV_BITMAP missing block name"); goto out; } /* Format: len (1B) + idstr (<255B). This ends the idstr. */ buf[buf[0] + 1] = '\0'; - if (migrate_handle_rp_recv_bitmap(ms, (char *)(buf + 1))) { - mark_source_rp_bad(ms); + if (!migrate_handle_rp_recv_bitmap(ms, (char *)(buf + 1), &err)) { goto out; } break; case MIG_RP_MSG_RESUME_ACK: tmp32 = ldl_be_p(buf); - if (migrate_handle_rp_resume_ack(ms, tmp32)) { - mark_source_rp_bad(ms); + if (!migrate_handle_rp_resume_ack(ms, tmp32, &err)) { goto out; } break; + case MIG_RP_MSG_SWITCHOVER_ACK: + ms->switchover_acked = true; + trace_source_return_path_thread_switchover_acked(); + break; + default: break; } } out: - res = qemu_file_get_error(rp); - if (res) { - if (res == -EIO && migration_in_postcopy()) { - /* - * Maybe there is something we can do: it looks like a - * network down issue, and we pause for a recovery. - */ - migration_release_from_dst_file(ms); - rp = NULL; - if (postcopy_pause_return_path_thread(ms)) { - /* - * Reload rp, reset the rest. Referencing it is safe since - * it's reset only by us above, or when migration completes - */ - rp = ms->rp_state.from_dst_file; - ms->rp_state.error = false; - goto retry; - } - } - + if (err) { + migrate_set_error(ms, err); + error_free(err); trace_source_return_path_thread_bad_end(); - mark_source_rp_bad(ms); + } + + if (ms->state == MIGRATION_STATUS_POSTCOPY_RECOVER) { + /* + * this will be extremely unlikely: that we got yet another network + * issue during recovering of the 1st network failure.. during this + * period the main migration thread can be waiting on rp_sem for + * this thread to sync with the other side. + * + * When this happens, explicitly kick the migration thread out of + * RECOVER stage and back to PAUSED, so the admin can try + * everything again. + */ + migration_rp_kick(ms); } trace_source_return_path_thread_end(); - migration_release_from_dst_file(ms); rcu_unregister_thread(); + return NULL; } -static int open_return_path_on_source(MigrationState *ms, - bool create_thread) +static int open_return_path_on_source(MigrationState *ms) { ms->rp_state.from_dst_file = qemu_file_get_return_path(ms->to_dst_file); if (!ms->rp_state.from_dst_file) { @@ -2868,11 +2447,6 @@ static int open_return_path_on_source(MigrationState *ms, trace_open_return_path_on_source(); - if (!create_thread) { - /* We're done */ - return 0; - } - qemu_thread_create(&ms->rp_state.rp_thread, "return path", source_return_path_thread, ms, QEMU_THREAD_JOINABLE); ms->rp_state.rp_thread_created = true; @@ -2882,66 +2456,93 @@ static int open_return_path_on_source(MigrationState *ms, return 0; } -/* Returns 0 if the RP was ok, otherwise there was an error on the RP */ -static int await_return_path_close_on_source(MigrationState *ms) +/* Return true if error detected, or false otherwise */ +static bool close_return_path_on_source(MigrationState *ms) { + if (!ms->rp_state.rp_thread_created) { + return false; + } + + trace_migration_return_path_end_before(); + /* - * If this is a normal exit then the destination will send a SHUT and the - * rp_thread will exit, however if there's an error we need to cause - * it to exit. + * If this is a normal exit then the destination will send a SHUT + * and the rp_thread will exit, however if there's an error we + * need to cause it to exit. shutdown(2), if we have it, will + * cause it to unblock if it's stuck waiting for the destination. */ - if (qemu_file_get_error(ms->to_dst_file) && ms->rp_state.from_dst_file) { - /* - * shutdown(2), if we have it, will cause it to unblock if it's stuck - * waiting for the destination. - */ - qemu_file_shutdown(ms->rp_state.from_dst_file); - mark_source_rp_bad(ms); + WITH_QEMU_LOCK_GUARD(&ms->qemu_file_lock) { + if (migrate_has_error(ms) && ms->rp_state.from_dst_file) { + qemu_file_shutdown(ms->rp_state.from_dst_file); + } } - trace_await_return_path_close_on_source_joining(); + qemu_thread_join(&ms->rp_state.rp_thread); ms->rp_state.rp_thread_created = false; - trace_await_return_path_close_on_source_close(); - return ms->rp_state.error; + migration_release_dst_files(ms); + trace_migration_return_path_end_after(); + + /* Return path will persist the error in MigrationState when quit */ + return migrate_has_error(ms); +} + +static inline void +migration_wait_main_channel(MigrationState *ms) +{ + /* Wait until one PONG message received */ + qemu_sem_wait(&ms->rp_state.rp_pong_acks); } /* * Switch from normal iteration to postcopy * Returns non-0 on error */ -static int postcopy_start(MigrationState *ms) +static int postcopy_start(MigrationState *ms, Error **errp) { int ret; QIOChannelBuffer *bioc; QEMUFile *fb; - int64_t time_at_stop = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); - int64_t bandwidth = migrate_max_postcopy_bandwidth(); + uint64_t bandwidth = migrate_max_postcopy_bandwidth(); bool restart_block = false; int cur_state = MIGRATION_STATUS_ACTIVE; + + if (migrate_postcopy_preempt()) { + migration_wait_main_channel(ms); + if (postcopy_preempt_establish_channel(ms)) { + migrate_set_state(&ms->state, ms->state, MIGRATION_STATUS_FAILED); + error_setg(errp, "%s: Failed to establish preempt channel", + __func__); + return -1; + } + } + if (!migrate_pause_before_switchover()) { migrate_set_state(&ms->state, MIGRATION_STATUS_ACTIVE, MIGRATION_STATUS_POSTCOPY_ACTIVE); } trace_postcopy_start(); - qemu_mutex_lock_iothread(); + bql_lock(); trace_postcopy_start_set_run(); - qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL); - global_state_store(); - ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); + ret = migration_stop_vm(ms, RUN_STATE_FINISH_MIGRATE); if (ret < 0) { + error_setg_errno(errp, -ret, "%s: Failed to stop the VM", __func__); goto fail; } ret = migration_maybe_pause(ms, &cur_state, MIGRATION_STATUS_POSTCOPY_ACTIVE); if (ret < 0) { + error_setg_errno(errp, -ret, "%s: Failed in migration_maybe_pause()", + __func__); goto fail; } ret = bdrv_inactivate_all(); if (ret < 0) { + error_setg_errno(errp, -ret, "%s: Failed in bdrv_inactivate_all()", + __func__); goto fail; } restart_block = true; @@ -2959,10 +2560,7 @@ static int postcopy_start(MigrationState *ms) * that are dirty */ if (migrate_postcopy_ram()) { - if (ram_postcopy_send_discard_bitmap(ms)) { - error_report("postcopy send discard bitmap failed"); - goto fail; - } + ram_postcopy_send_discard_bitmap(ms); } /* @@ -2970,12 +2568,7 @@ static int postcopy_start(MigrationState *ms) * will notice we're in POSTCOPY_ACTIVE and not actually * wrap their state up here */ - /* 0 max-postcopy-bandwidth means unlimited */ - if (!bandwidth) { - qemu_file_set_rate_limit(ms->to_dst_file, INT64_MAX); - } else { - qemu_file_set_rate_limit(ms->to_dst_file, bandwidth / XFER_LIMIT_RATIO); - } + migration_rate_set(bandwidth); if (migrate_postcopy_ram()) { /* Ping just for debugging, helps line traces up */ qemu_savevm_send_ping(ms->to_dst_file, 2); @@ -2994,7 +2587,7 @@ static int postcopy_start(MigrationState *ms) */ bioc = qio_channel_buffer_new(4096); qio_channel_set_name(QIO_CHANNEL(bioc), "migration-postcopy-buffer"); - fb = qemu_fopen_channel_output(QIO_CHANNEL(bioc)); + fb = qemu_file_new_output(QIO_CHANNEL(bioc)); object_unref(OBJECT(bioc)); /* @@ -3018,7 +2611,7 @@ static int postcopy_start(MigrationState *ms) */ ret = qemu_file_get_error(ms->to_dst_file); if (ret) { - error_report("postcopy_start: Migration stream errored (pre package)"); + error_setg(errp, "postcopy_start: Migration stream errored (pre package)"); goto fail_closefb; } @@ -3026,6 +2619,7 @@ static int postcopy_start(MigrationState *ms) /* Now send that blob */ if (qemu_savevm_send_packaged(ms->to_dst_file, bioc->data, bioc->usage)) { + error_setg(errp, "%s: Failed to send packaged data", __func__); goto fail_closefb; } qemu_fclose(fb); @@ -3034,12 +2628,11 @@ static int postcopy_start(MigrationState *ms) * at the transition to postcopy and after the device state; in particular * spice needs to trigger a transition now */ - ms->postcopy_after_devices = true; - notifier_list_notify(&migration_state_notifiers, ms); + migration_call_notifiers(ms, MIG_EVENT_PRECOPY_DONE, NULL); - ms->downtime = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - time_at_stop; + migration_downtime_end(ms); - qemu_mutex_unlock_iothread(); + bql_unlock(); if (migrate_postcopy_ram()) { /* @@ -3055,10 +2648,11 @@ static int postcopy_start(MigrationState *ms) ret = qemu_file_get_error(ms->to_dst_file); if (ret) { - error_report("postcopy_start: Migration stream errored"); - migrate_set_state(&ms->state, MIGRATION_STATUS_POSTCOPY_ACTIVE, - MIGRATION_STATUS_FAILED); + error_setg_errno(errp, -ret, "postcopy_start: Migration stream error"); + bql_lock(); + goto fail; } + trace_postcopy_preempt_enabled(migrate_postcopy_preempt()); return ret; @@ -3073,18 +2667,19 @@ fail: */ Error *local_err = NULL; - bdrv_invalidate_cache_all(&local_err); + bdrv_activate_all(&local_err); if (local_err) { error_report_err(local_err); } } - qemu_mutex_unlock_iothread(); + migration_call_notifiers(ms, MIG_EVENT_PRECOPY_FAILED, NULL); + bql_unlock(); return -1; } /** * migration_maybe_pause: Pause if required to by - * migrate_pause_before_switchover called with the iothread locked + * migrate_pause_before_switchover called with the BQL locked * Returns: 0 on success */ static int migration_maybe_pause(MigrationState *s, @@ -3112,19 +2707,96 @@ static int migration_maybe_pause(MigrationState *s, * wait for the 'pause_sem' semaphore. */ if (s->state != MIGRATION_STATUS_CANCELLING) { - qemu_mutex_unlock_iothread(); + bql_unlock(); migrate_set_state(&s->state, *current_active_state, MIGRATION_STATUS_PRE_SWITCHOVER); qemu_sem_wait(&s->pause_sem); migrate_set_state(&s->state, MIGRATION_STATUS_PRE_SWITCHOVER, new_state); *current_active_state = new_state; - qemu_mutex_lock_iothread(); + bql_lock(); } return s->state == new_state ? 0 : -EINVAL; } +static int migration_completion_precopy(MigrationState *s, + int *current_active_state) +{ + int ret; + + bql_lock(); + + if (!migrate_mode_is_cpr(s)) { + ret = migration_stop_vm(s, RUN_STATE_FINISH_MIGRATE); + if (ret < 0) { + goto out_unlock; + } + } + + ret = migration_maybe_pause(s, current_active_state, + MIGRATION_STATUS_DEVICE); + if (ret < 0) { + goto out_unlock; + } + + /* + * Inactivate disks except in COLO, and track that we have done so in order + * to remember to reactivate them if migration fails or is cancelled. + */ + s->block_inactive = !migrate_colo(); + migration_rate_set(RATE_LIMIT_DISABLED); + ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false, + s->block_inactive); +out_unlock: + bql_unlock(); + return ret; +} + +static void migration_completion_postcopy(MigrationState *s) +{ + trace_migration_completion_postcopy_end(); + + bql_lock(); + qemu_savevm_state_complete_postcopy(s->to_dst_file); + bql_unlock(); + + /* + * Shutdown the postcopy fast path thread. This is only needed when dest + * QEMU binary is old (7.1/7.2). QEMU 8.0+ doesn't need this. + */ + if (migrate_postcopy_preempt() && s->preempt_pre_7_2) { + postcopy_preempt_shutdown_file(s); + } + + trace_migration_completion_postcopy_end_after_complete(); +} + +static void migration_completion_failed(MigrationState *s, + int current_active_state) +{ + if (s->block_inactive && (s->state == MIGRATION_STATUS_ACTIVE || + s->state == MIGRATION_STATUS_DEVICE)) { + /* + * If not doing postcopy, vm_start() will be called: let's + * regain control on images. + */ + Error *local_err = NULL; + + bql_lock(); + bdrv_activate_all(&local_err); + if (local_err) { + error_report_err(local_err); + } else { + s->block_inactive = false; + } + bql_unlock(); + } + + migrate_set_state(&s->state, current_active_state, + MIGRATION_STATUS_FAILED); +} + /** * migration_completion: Used by migration_thread when there's not much left. * The caller 'breaks' the loop when this returns. @@ -3133,96 +2805,52 @@ static int migration_maybe_pause(MigrationState *s, */ static void migration_completion(MigrationState *s) { - int ret; + int ret = 0; int current_active_state = s->state; + Error *local_err = NULL; if (s->state == MIGRATION_STATUS_ACTIVE) { - qemu_mutex_lock_iothread(); - s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); - qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL); - s->vm_was_running = runstate_is_running(); - ret = global_state_store(); - - if (!ret) { - bool inactivate = !migrate_colo_enabled(); - ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); - trace_migration_completion_vm_stop(ret); - if (ret >= 0) { - ret = migration_maybe_pause(s, ¤t_active_state, - MIGRATION_STATUS_DEVICE); - } - if (ret >= 0) { - qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX); - ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false, - inactivate); - } - if (inactivate && ret >= 0) { - s->block_inactive = true; - } - } - qemu_mutex_unlock_iothread(); - - if (ret < 0) { - goto fail; - } + ret = migration_completion_precopy(s, ¤t_active_state); } else if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) { - trace_migration_completion_postcopy_end(); + migration_completion_postcopy(s); + } else { + ret = -1; + } - qemu_savevm_state_complete_postcopy(s->to_dst_file); - trace_migration_completion_postcopy_end_after_complete(); - } else if (s->state == MIGRATION_STATUS_CANCELLING) { + if (ret < 0) { goto fail; } - /* - * If rp was opened we must clean up the thread before - * cleaning everything else up (since if there are no failures - * it will wait for the destination to send it's status in - * a SHUT command). - */ - if (s->rp_state.rp_thread_created) { - int rp_error; - trace_migration_return_path_end_before(); - rp_error = await_return_path_close_on_source(s); - trace_migration_return_path_end_after(rp_error); - if (rp_error) { - goto fail_invalidate; - } + if (close_return_path_on_source(s)) { + goto fail; } if (qemu_file_get_error(s->to_dst_file)) { trace_migration_completion_file_err(); - goto fail_invalidate; + goto fail; } - if (!migrate_colo_enabled()) { - migrate_set_state(&s->state, current_active_state, - MIGRATION_STATUS_COMPLETED); + if (migrate_colo() && s->state == MIGRATION_STATUS_ACTIVE) { + /* COLO does not support postcopy */ + migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE, + MIGRATION_STATUS_COLO); + } else { + migration_completion_end(s); } return; -fail_invalidate: - /* If not doing postcopy, vm_start() will be called: let's regain - * control on images. - */ - if (s->state == MIGRATION_STATUS_ACTIVE || - s->state == MIGRATION_STATUS_DEVICE) { - Error *local_err = NULL; - - qemu_mutex_lock_iothread(); - bdrv_invalidate_cache_all(&local_err); - if (local_err) { - error_report_err(local_err); - } else { - s->block_inactive = false; - } - qemu_mutex_unlock_iothread(); +fail: + if (qemu_file_get_error_obj(s->to_dst_file, &local_err)) { + migrate_set_error(s, local_err); + error_free(local_err); + } else if (ret) { + error_setg_errno(&local_err, -ret, "Error in migration completion"); + migrate_set_error(s, local_err); + error_free(local_err); } -fail: - migrate_set_state(&s->state, current_active_state, - MIGRATION_STATUS_FAILED); + migration_completion_failed(s, current_active_state); } /** @@ -3235,13 +2863,6 @@ static void bg_migration_completion(MigrationState *s) { int current_active_state = s->state; - /* - * Stop tracking RAM writes - un-protect memory, un-register UFFD - * memory ranges, flush kernel wait queues and wake up threads - * waiting for write fault to be resolved. - */ - ram_write_tracking_stop(); - if (s->state == MIGRATION_STATUS_ACTIVE) { /* * By this moment we have RAM content saved into the migration stream. @@ -3260,8 +2881,7 @@ static void bg_migration_completion(MigrationState *s) goto fail; } - migrate_set_state(&s->state, current_active_state, - MIGRATION_STATUS_COMPLETED); + migration_completion_end(s); return; fail: @@ -3269,12 +2889,6 @@ fail: MIGRATION_STATUS_FAILED); } -bool migrate_colo_enabled(void) -{ - MigrationState *s = migrate_get_current(); - return s->enabled_capabilities[MIGRATION_CAPABILITY_X_COLO]; -} - typedef enum MigThrError { /* No error detected */ MIG_THR_ERR_NONE = 0, @@ -3289,7 +2903,9 @@ static int postcopy_resume_handshake(MigrationState *s) qemu_savevm_send_postcopy_resume(s->to_dst_file); while (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER) { - qemu_sem_wait(&s->rp_state.rp_sem); + if (migration_rp_wait(s)) { + return -1; + } } if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) { @@ -3316,6 +2932,20 @@ static int postcopy_do_resume(MigrationState *s) } /* + * If preempt is enabled, re-establish the preempt channel. Note that + * we do it after resume prepare to make sure the main channel will be + * created before the preempt channel. E.g. with weak network, the + * dest QEMU may get messed up with the preempt and main channels on + * the order of connection setup. This guarantees the correct order. + */ + ret = postcopy_preempt_establish_channel(s); + if (ret) { + error_report("%s: postcopy_preempt_establish_channel(): %d", + __func__, ret); + return ret; + } + + /* * Last handshake with destination on the resume (destination will * switch to postcopy-active afterwards) */ @@ -3341,6 +2971,13 @@ static MigThrError postcopy_pause(MigrationState *s) QEMUFile *file; /* + * We're already pausing, so ignore any errors on the return + * path and just wait for the thread to finish. It will be + * re-created when we resume. + */ + close_return_path_on_source(s); + + /* * Current channel is possibly broken. Release it. Note that this is * guaranteed even without lock because to_dst_file should only be * modified by the migration thread. That also guarantees that the @@ -3376,12 +3013,6 @@ static MigThrError postcopy_pause(MigrationState *s) if (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER) { /* Woken up by a recover procedure. Give it a shot */ - /* - * Firstly, let's wake up the return path now, with a new - * return path channel. - */ - qemu_sem_post(&s->postcopy_pause_rp_sem); - /* Do the resume logic */ if (postcopy_do_resume(s) == 0) { /* Let's continue! */ @@ -3402,6 +3033,17 @@ static MigThrError postcopy_pause(MigrationState *s) } } +void migration_file_set_error(int err) +{ + MigrationState *s = current_migration; + + WITH_QEMU_LOCK_GUARD(&s->qemu_file_lock) { + if (s->to_dst_file) { + qemu_file_set_error(s->to_dst_file, err); + } + } +} + static MigThrError migration_detect_error(MigrationState *s) { int ret; @@ -3414,8 +3056,13 @@ static MigThrError migration_detect_error(MigrationState *s) return MIG_THR_ERR_FATAL; } - /* Try to detect any file errors */ - ret = qemu_file_get_error_obj(s->to_dst_file, &local_error); + /* + * Try to detect any file errors. Note that postcopy_qemufile_src will + * be NULL when postcopy preempt is not enabled. + */ + ret = qemu_file_get_error_obj_any(s->to_dst_file, + s->postcopy_qemufile_src, + &local_error); if (!ret) { /* Everything is fine */ assert(!local_error); @@ -3427,7 +3074,7 @@ static MigThrError migration_detect_error(MigrationState *s) error_free(local_error); } - if (state == MIGRATION_STATUS_POSTCOPY_ACTIVE && ret == -EIO) { + if (state == MIGRATION_STATUS_POSTCOPY_ACTIVE && ret) { /* * For postcopy, we allow the network to be down for a * while. After that, it can be continued by a @@ -3447,31 +3094,28 @@ static MigThrError migration_detect_error(MigrationState *s) } } -/* How many bytes have we transferred since the beginning of the migration */ -static uint64_t migration_total_bytes(MigrationState *s) -{ - return qemu_ftell(s->to_dst_file) + ram_counters.multifd_bytes; -} - -static void migration_calculate_complete(MigrationState *s) +static void migration_completion_end(MigrationState *s) { - uint64_t bytes = migration_total_bytes(s); + uint64_t bytes = migration_transferred_bytes(); int64_t end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); int64_t transfer_time; + /* + * Take the BQL here so that query-migrate on the QMP thread sees: + * - atomic update of s->total_time and s->mbps; + * - correct ordering of s->mbps update vs. s->state; + */ + bql_lock(); + migration_downtime_end(s); s->total_time = end_time - s->start_time; - if (!s->downtime) { - /* - * It's still not set, so we are precopy migration. For - * postcopy, downtime is calculated during postcopy_start(). - */ - s->downtime = end_time - s->downtime_start; - } - transfer_time = s->total_time - s->setup_time; if (transfer_time) { s->mbps = ((double) bytes * 8.0) / transfer_time / 1000; } + + migrate_set_state(&s->state, s->state, + MIGRATION_STATUS_COMPLETED); + bql_unlock(); } static void update_iteration_initial_status(MigrationState *s) @@ -3481,7 +3125,7 @@ static void update_iteration_initial_status(MigrationState *s) * wrong speed calculation. */ s->iteration_start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); - s->iteration_initial_bytes = migration_total_bytes(s); + s->iteration_initial_bytes = migration_transferred_bytes(); s->iteration_initial_pages = ram_get_total_transferred_pages(); } @@ -3490,17 +3134,33 @@ static void migration_update_counters(MigrationState *s, { uint64_t transferred, transferred_pages, time_spent; uint64_t current_bytes; /* bytes transferred since the beginning */ + uint64_t switchover_bw; + /* Expected bandwidth when switching over to destination QEMU */ + double expected_bw_per_ms; double bandwidth; if (current_time < s->iteration_start_time + BUFFER_DELAY) { return; } - current_bytes = migration_total_bytes(s); + switchover_bw = migrate_avail_switchover_bandwidth(); + current_bytes = migration_transferred_bytes(); transferred = current_bytes - s->iteration_initial_bytes; time_spent = current_time - s->iteration_start_time; bandwidth = (double)transferred / time_spent; - s->threshold_size = bandwidth * s->parameters.downtime_limit; + + if (switchover_bw) { + /* + * If the user specified a switchover bandwidth, let's trust the + * user so that can be more accurate than what we estimated. + */ + expected_bw_per_ms = switchover_bw / 1000; + } else { + /* If the user doesn't specify bandwidth, we use the estimated */ + expected_bw_per_ms = bandwidth; + } + + s->threshold_size = expected_bw_per_ms * migrate_downtime_limit(); s->mbps = (((double) transferred * 8.0) / ((double) time_spent / 1000.0)) / 1000.0 / 1000.0; @@ -3514,16 +3174,34 @@ static void migration_update_counters(MigrationState *s, * if we haven't sent anything, we don't want to * recalculate. 10000 is a small enough number for our purposes */ - if (ram_counters.dirty_pages_rate && transferred > 10000) { - s->expected_downtime = ram_counters.remaining / bandwidth; + if (stat64_get(&mig_stats.dirty_pages_rate) && + transferred > 10000) { + s->expected_downtime = + stat64_get(&mig_stats.dirty_bytes_last_sync) / expected_bw_per_ms; } - qemu_file_reset_rate_limit(s->to_dst_file); + migration_rate_reset(); update_iteration_initial_status(s); trace_migrate_transferred(transferred, time_spent, - bandwidth, s->threshold_size); + /* Both in unit bytes/ms */ + bandwidth, switchover_bw / 1000, + s->threshold_size); +} + +static bool migration_can_switchover(MigrationState *s) +{ + if (!migrate_switchover_ack()) { + return true; + } + + /* No reason to wait for switchover ACK if VM is stopped */ + if (!runstate_is_running()) { + return true; + } + + return s->switchover_acked; } /* Migration thread iteration status */ @@ -3539,33 +3217,39 @@ typedef enum { */ static MigIterateState migration_iteration_run(MigrationState *s) { - uint64_t pending_size, pend_pre, pend_compat, pend_post; + uint64_t must_precopy, can_postcopy, pending_size; + Error *local_err = NULL; bool in_postcopy = s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE; + bool can_switchover = migration_can_switchover(s); - qemu_savevm_state_pending(s->to_dst_file, s->threshold_size, &pend_pre, - &pend_compat, &pend_post); - pending_size = pend_pre + pend_compat + pend_post; + qemu_savevm_state_pending_estimate(&must_precopy, &can_postcopy); + pending_size = must_precopy + can_postcopy; + trace_migrate_pending_estimate(pending_size, must_precopy, can_postcopy); - trace_migrate_pending(pending_size, s->threshold_size, - pend_pre, pend_compat, pend_post); + if (pending_size < s->threshold_size) { + qemu_savevm_state_pending_exact(&must_precopy, &can_postcopy); + pending_size = must_precopy + can_postcopy; + trace_migrate_pending_exact(pending_size, must_precopy, can_postcopy); + } - if (pending_size && pending_size >= s->threshold_size) { - /* Still a significant amount to transfer */ - if (!in_postcopy && pend_pre <= s->threshold_size && - qatomic_read(&s->start_postcopy)) { - if (postcopy_start(s)) { - error_report("%s: postcopy failed to start", __func__); - } - return MIG_ITERATE_SKIP; - } - /* Just another iteration step */ - qemu_savevm_state_iterate(s->to_dst_file, in_postcopy); - } else { + if ((!pending_size || pending_size < s->threshold_size) && can_switchover) { trace_migration_thread_low_pending(pending_size); migration_completion(s); return MIG_ITERATE_BREAK; } + /* Still a significant amount to transfer */ + if (!in_postcopy && must_precopy <= s->threshold_size && can_switchover && + qatomic_read(&s->start_postcopy)) { + if (postcopy_start(s, &local_err)) { + migrate_set_error(s, local_err); + error_report_err(local_err); + } + return MIG_ITERATE_SKIP; + } + + /* Just another iteration step */ + qemu_savevm_state_iterate(s->to_dst_file, in_postcopy); return MIG_ITERATE_RESUME; } @@ -3574,37 +3258,26 @@ static void migration_iteration_finish(MigrationState *s) /* If we enabled cpu throttling for auto-converge, turn it off. */ cpu_throttle_stop(); - qemu_mutex_lock_iothread(); + bql_lock(); switch (s->state) { case MIGRATION_STATUS_COMPLETED: - migration_calculate_complete(s); runstate_set(RUN_STATE_POSTMIGRATE); break; - - case MIGRATION_STATUS_ACTIVE: - /* - * We should really assert here, but since it's during - * migration, let's try to reduce the usage of assertions. - */ - if (!migrate_colo_enabled()) { - error_report("%s: critical error: calling COLO code without " - "COLO enabled", __func__); - } + case MIGRATION_STATUS_COLO: + assert(migrate_colo()); migrate_start_colo_process(s); - /* - * Fixme: we will run VM in COLO no matter its old running state. - * After exited COLO, we will keep running. - */ - s->vm_was_running = true; + s->vm_old_state = RUN_STATE_RUNNING; /* Fallthrough */ case MIGRATION_STATUS_FAILED: case MIGRATION_STATUS_CANCELLED: case MIGRATION_STATUS_CANCELLING: - if (s->vm_was_running) { - vm_start(); + if (runstate_is_live(s->vm_old_state)) { + if (!runstate_check(RUN_STATE_SHUTDOWN)) { + vm_start(); + } } else { if (runstate_check(RUN_STATE_FINISH_MIGRATE)) { - runstate_set(RUN_STATE_POSTMIGRATE); + runstate_set(s->vm_old_state); } } break; @@ -3614,18 +3287,23 @@ static void migration_iteration_finish(MigrationState *s) error_report("%s: Unknown ending state %d", __func__, s->state); break; } - migrate_fd_cleanup_schedule(s); - qemu_mutex_unlock_iothread(); + + migration_bh_schedule(migrate_fd_cleanup_bh, s); + bql_unlock(); } static void bg_migration_iteration_finish(MigrationState *s) { - qemu_mutex_lock_iothread(); + /* + * Stop tracking RAM writes - un-protect memory, un-register UFFD + * memory ranges, flush kernel wait queues and wake up threads + * waiting for write fault to be resolved. + */ + ram_write_tracking_stop(); + + bql_lock(); switch (s->state) { case MIGRATION_STATUS_COMPLETED: - migration_calculate_complete(s); - break; - case MIGRATION_STATUS_ACTIVE: case MIGRATION_STATUS_FAILED: case MIGRATION_STATUS_CANCELLED: @@ -3638,8 +3316,8 @@ static void bg_migration_iteration_finish(MigrationState *s) break; } - migrate_fd_cleanup_schedule(s); - qemu_mutex_unlock_iothread(); + migration_bh_schedule(migrate_fd_cleanup_bh, s); + bql_unlock(); } /* @@ -3677,7 +3355,7 @@ bool migration_rate_limit(void) bool urgent = false; migration_update_counters(s, now); - if (qemu_file_rate_limit(s->to_dst_file)) { + if (migration_rate_exceeded(s->to_dst_file)) { if (qemu_file_get_error(s->to_dst_file)) { return false; @@ -3729,7 +3407,8 @@ static void qemu_savevm_wait_unplug(MigrationState *s, int old_state, while (timeout-- && qemu_savevm_state_guest_unplug_pending()) { qemu_sem_timedwait(&s->wait_unplug_sem, 250); } - if (qemu_savevm_state_guest_unplug_pending()) { + if (qemu_savevm_state_guest_unplug_pending() && + !qtest_enabled()) { warn_report("migration: partially unplugged device on " "failure"); } @@ -3748,16 +3427,25 @@ static void qemu_savevm_wait_unplug(MigrationState *s, int old_state, static void *migration_thread(void *opaque) { MigrationState *s = opaque; + MigrationThread *thread = NULL; int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST); MigThrError thr_error; bool urgent = false; + thread = migration_threads_add("live_migration", qemu_get_thread_id()); + rcu_register_thread(); object_ref(OBJECT(s)); update_iteration_initial_status(s); + if (!multifd_send_setup()) { + goto out; + } + + bql_lock(); qemu_savevm_state_header(s->to_dst_file); + bql_unlock(); /* * If we opened the return path, we need to make sure dst has it @@ -3780,12 +3468,14 @@ static void *migration_thread(void *opaque) qemu_savevm_send_postcopy_advise(s->to_dst_file); } - if (migrate_colo_enabled()) { + if (migrate_colo()) { /* Notify migration destination that we enable COLO */ qemu_savevm_send_colo_enable(s->to_dst_file); } + bql_lock(); qemu_savevm_state_setup(s->to_dst_file); + bql_unlock(); qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP, MIGRATION_STATUS_ACTIVE); @@ -3794,8 +3484,8 @@ static void *migration_thread(void *opaque) trace_migration_thread_setup_complete(); - while (migration_is_active(s)) { - if (urgent || !qemu_file_rate_limit(s->to_dst_file)) { + while (migration_is_active()) { + if (urgent || !migration_rate_exceeded(s->to_dst_file)) { MigIterateState iter_state = migration_iteration_run(s); if (iter_state == MIG_ITERATE_SKIP) { continue; @@ -3824,10 +3514,12 @@ static void *migration_thread(void *opaque) urgent = migration_rate_limit(); } +out: trace_migration_thread_after_loop(); migration_iteration_finish(s); object_unref(OBJECT(s)); rcu_unregister_thread(); + migration_threads_remove(thread); return NULL; } @@ -3835,11 +3527,8 @@ static void bg_migration_vm_start_bh(void *opaque) { MigrationState *s = opaque; - qemu_bh_delete(s->vm_start_bh); - s->vm_start_bh = NULL; - - vm_start(); - s->downtime = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - s->downtime_start; + vm_resume(s->vm_old_state); + migration_downtime_end(s); } /** @@ -3868,7 +3557,7 @@ static void *bg_migration_thread(void *opaque) rcu_register_thread(); object_ref(OBJECT(s)); - qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX); + migration_rate_set(RATE_LIMIT_DISABLED); setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST); /* @@ -3882,7 +3571,7 @@ static void *bg_migration_thread(void *opaque) */ s->bioc = qio_channel_buffer_new(512 * 1024); qio_channel_set_name(QIO_CHANNEL(s->bioc), "vmstate-buffer"); - fb = qemu_fopen_channel_output(QIO_CHANNEL(s->bioc)); + fb = qemu_file_new_output(QIO_CHANNEL(s->bioc)); object_unref(OBJECT(s->bioc)); update_iteration_initial_status(s); @@ -3895,8 +3584,10 @@ static void *bg_migration_thread(void *opaque) ram_write_tracking_prepare(); #endif + bql_lock(); qemu_savevm_state_header(s->to_dst_file); qemu_savevm_state_setup(s->to_dst_file); + bql_unlock(); qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP, MIGRATION_STATUS_ACTIVE); @@ -3904,22 +3595,10 @@ static void *bg_migration_thread(void *opaque) s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start; trace_migration_thread_setup_complete(); - s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); - qemu_mutex_lock_iothread(); - - /* - * If VM is currently in suspended state, then, to make a valid runstate - * transition in vm_stop_force_state() we need to wakeup it up. - */ - qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL); - s->vm_was_running = runstate_is_running(); + bql_lock(); - if (global_state_store()) { - goto fail; - } - /* Forcibly stop VM before saving state of vCPUs and devices */ - if (vm_stop_force_state(RUN_STATE_PAUSED)) { + if (migration_stop_vm(s, RUN_STATE_PAUSED)) { goto fail; } /* @@ -3948,12 +3627,10 @@ static void *bg_migration_thread(void *opaque) * calling VM state change notifiers from vm_start() would initiate * writes to virtio VQs memory which is in write-protected region. */ - s->vm_start_bh = qemu_bh_new(bg_migration_vm_start_bh, s); - qemu_bh_schedule(s->vm_start_bh); - - qemu_mutex_unlock_iothread(); + migration_bh_schedule(bg_migration_vm_start_bh, s); + bql_unlock(); - while (migration_is_active(s)) { + while (migration_is_active()) { MigIterateState iter_state = bg_migration_iteration_run(s); if (iter_state == MIG_ITERATE_SKIP) { continue; @@ -3980,7 +3657,7 @@ fail: if (early_fail) { migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE, MIGRATION_STATUS_FAILED); - qemu_mutex_unlock_iothread(); + bql_unlock(); } bg_migration_iteration_finish(s); @@ -3995,8 +3672,9 @@ fail: void migrate_fd_connect(MigrationState *s, Error *error_in) { Error *local_err = NULL; - int64_t rate_limit; + uint64_t rate_limit; bool resume = s->state == MIGRATION_STATUS_POSTCOPY_PAUSED; + int ret; /* * If there's a previous error, free it and prepare for another one. @@ -4005,13 +3683,7 @@ void migrate_fd_connect(MigrationState *s, Error *error_in) */ migrate_error_free(s); - s->expected_downtime = s->parameters.downtime_limit; - if (resume) { - assert(s->cleanup_bh); - } else { - assert(!s->cleanup_bh); - s->cleanup_bh = qemu_bh_new(migrate_fd_cleanup_bh, s); - } + s->expected_downtime = migrate_downtime_limit(); if (error_in) { migrate_fd_error(s, error_in); if (resume) { @@ -4031,17 +3703,18 @@ void migrate_fd_connect(MigrationState *s, Error *error_in) if (resume) { /* This is a resumed migration */ - rate_limit = s->parameters.max_postcopy_bandwidth / - XFER_LIMIT_RATIO; + rate_limit = migrate_max_postcopy_bandwidth(); } else { /* This is a fresh new migration */ - rate_limit = s->parameters.max_bandwidth / XFER_LIMIT_RATIO; + rate_limit = migrate_max_bandwidth(); /* Notify before starting migration thread */ - notifier_list_notify(&migration_state_notifiers, s); + if (migration_call_notifiers(s, MIG_EVENT_PRECOPY_SETUP, &local_err)) { + goto fail; + } } - qemu_file_set_rate_limit(s->to_dst_file, rate_limit); + migration_rate_set(rate_limit); qemu_file_set_blocking(s->to_dst_file, true); /* @@ -4049,15 +3722,22 @@ void migrate_fd_connect(MigrationState *s, Error *error_in) * precopy, only if user specified "return-path" capability would * QEMU uses the return path. */ - if (migrate_postcopy_ram() || migrate_use_return_path()) { - if (open_return_path_on_source(s, !resume)) { - error_report("Unable to open return-path for postcopy"); - migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED); - migrate_fd_cleanup(s); - return; + if (migrate_postcopy_ram() || migrate_return_path()) { + if (open_return_path_on_source(s)) { + error_setg(&local_err, "Unable to open return-path for postcopy"); + goto fail; } } + /* + * This needs to be done before resuming a postcopy. Note: for newer + * QEMUs we will delay the channel creation until postcopy_start(), to + * avoid disorder of channel creations. + */ + if (migrate_postcopy_preempt() && s->preempt_pre_7_2) { + postcopy_preempt_setup(s); + } + if (resume) { /* Wakeup the main migration thread to do the recovery */ migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_PAUSED, @@ -4066,12 +3746,12 @@ void migrate_fd_connect(MigrationState *s, Error *error_in) return; } - if (multifd_save_setup(&local_err) != 0) { - error_report_err(local_err); - migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, - MIGRATION_STATUS_FAILED); - migrate_fd_cleanup(s); - return; + if (migrate_mode_is_cpr(s)) { + ret = migration_stop_vm(s, RUN_STATE_FINISH_MIGRATE); + if (ret < 0) { + error_setg(&local_err, "migration_stop_vm failed, error %d", -ret); + goto fail; + } } if (migrate_background_snapshot()) { @@ -4082,125 +3762,14 @@ void migrate_fd_connect(MigrationState *s, Error *error_in) migration_thread, s, QEMU_THREAD_JOINABLE); } s->migration_thread_running = true; -} - -void migration_global_dump(Monitor *mon) -{ - MigrationState *ms = migrate_get_current(); + return; - monitor_printf(mon, "globals:\n"); - monitor_printf(mon, "store-global-state: %s\n", - ms->store_global_state ? "on" : "off"); - monitor_printf(mon, "only-migratable: %s\n", - only_migratable ? "on" : "off"); - monitor_printf(mon, "send-configuration: %s\n", - ms->send_configuration ? "on" : "off"); - monitor_printf(mon, "send-section-footer: %s\n", - ms->send_section_footer ? "on" : "off"); - monitor_printf(mon, "decompress-error-check: %s\n", - ms->decompress_error_check ? "on" : "off"); - monitor_printf(mon, "clear-bitmap-shift: %u\n", - ms->clear_bitmap_shift); -} - -#define DEFINE_PROP_MIG_CAP(name, x) \ - DEFINE_PROP_BOOL(name, MigrationState, enabled_capabilities[x], false) - -static Property migration_properties[] = { - DEFINE_PROP_BOOL("store-global-state", MigrationState, - store_global_state, true), - DEFINE_PROP_BOOL("send-configuration", MigrationState, - send_configuration, true), - DEFINE_PROP_BOOL("send-section-footer", MigrationState, - send_section_footer, true), - DEFINE_PROP_BOOL("decompress-error-check", MigrationState, - decompress_error_check, true), - DEFINE_PROP_UINT8("x-clear-bitmap-shift", MigrationState, - clear_bitmap_shift, CLEAR_BITMAP_SHIFT_DEFAULT), - - /* Migration parameters */ - DEFINE_PROP_UINT8("x-compress-level", MigrationState, - parameters.compress_level, - DEFAULT_MIGRATE_COMPRESS_LEVEL), - DEFINE_PROP_UINT8("x-compress-threads", MigrationState, - parameters.compress_threads, - DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT), - DEFINE_PROP_BOOL("x-compress-wait-thread", MigrationState, - parameters.compress_wait_thread, true), - DEFINE_PROP_UINT8("x-decompress-threads", MigrationState, - parameters.decompress_threads, - DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT), - DEFINE_PROP_UINT8("x-throttle-trigger-threshold", MigrationState, - parameters.throttle_trigger_threshold, - DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD), - DEFINE_PROP_UINT8("x-cpu-throttle-initial", MigrationState, - parameters.cpu_throttle_initial, - DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL), - DEFINE_PROP_UINT8("x-cpu-throttle-increment", MigrationState, - parameters.cpu_throttle_increment, - DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT), - DEFINE_PROP_BOOL("x-cpu-throttle-tailslow", MigrationState, - parameters.cpu_throttle_tailslow, false), - DEFINE_PROP_SIZE("x-max-bandwidth", MigrationState, - parameters.max_bandwidth, MAX_THROTTLE), - DEFINE_PROP_UINT64("x-downtime-limit", MigrationState, - parameters.downtime_limit, - DEFAULT_MIGRATE_SET_DOWNTIME), - DEFINE_PROP_UINT32("x-checkpoint-delay", MigrationState, - parameters.x_checkpoint_delay, - DEFAULT_MIGRATE_X_CHECKPOINT_DELAY), - DEFINE_PROP_UINT8("multifd-channels", MigrationState, - parameters.multifd_channels, - DEFAULT_MIGRATE_MULTIFD_CHANNELS), - DEFINE_PROP_MULTIFD_COMPRESSION("multifd-compression", MigrationState, - parameters.multifd_compression, - DEFAULT_MIGRATE_MULTIFD_COMPRESSION), - DEFINE_PROP_UINT8("multifd-zlib-level", MigrationState, - parameters.multifd_zlib_level, - DEFAULT_MIGRATE_MULTIFD_ZLIB_LEVEL), - DEFINE_PROP_UINT8("multifd-zstd-level", MigrationState, - parameters.multifd_zstd_level, - DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL), - DEFINE_PROP_SIZE("xbzrle-cache-size", MigrationState, - parameters.xbzrle_cache_size, - DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE), - DEFINE_PROP_SIZE("max-postcopy-bandwidth", MigrationState, - parameters.max_postcopy_bandwidth, - DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH), - DEFINE_PROP_UINT8("max-cpu-throttle", MigrationState, - parameters.max_cpu_throttle, - DEFAULT_MIGRATE_MAX_CPU_THROTTLE), - DEFINE_PROP_SIZE("announce-initial", MigrationState, - parameters.announce_initial, - DEFAULT_MIGRATE_ANNOUNCE_INITIAL), - DEFINE_PROP_SIZE("announce-max", MigrationState, - parameters.announce_max, - DEFAULT_MIGRATE_ANNOUNCE_MAX), - DEFINE_PROP_SIZE("announce-rounds", MigrationState, - parameters.announce_rounds, - DEFAULT_MIGRATE_ANNOUNCE_ROUNDS), - DEFINE_PROP_SIZE("announce-step", MigrationState, - parameters.announce_step, - DEFAULT_MIGRATE_ANNOUNCE_STEP), - - /* Migration capabilities */ - DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE), - DEFINE_PROP_MIG_CAP("x-rdma-pin-all", MIGRATION_CAPABILITY_RDMA_PIN_ALL), - DEFINE_PROP_MIG_CAP("x-auto-converge", MIGRATION_CAPABILITY_AUTO_CONVERGE), - DEFINE_PROP_MIG_CAP("x-zero-blocks", MIGRATION_CAPABILITY_ZERO_BLOCKS), - DEFINE_PROP_MIG_CAP("x-compress", MIGRATION_CAPABILITY_COMPRESS), - DEFINE_PROP_MIG_CAP("x-events", MIGRATION_CAPABILITY_EVENTS), - DEFINE_PROP_MIG_CAP("x-postcopy-ram", MIGRATION_CAPABILITY_POSTCOPY_RAM), - DEFINE_PROP_MIG_CAP("x-colo", MIGRATION_CAPABILITY_X_COLO), - DEFINE_PROP_MIG_CAP("x-release-ram", MIGRATION_CAPABILITY_RELEASE_RAM), - DEFINE_PROP_MIG_CAP("x-block", MIGRATION_CAPABILITY_BLOCK), - DEFINE_PROP_MIG_CAP("x-return-path", MIGRATION_CAPABILITY_RETURN_PATH), - DEFINE_PROP_MIG_CAP("x-multifd", MIGRATION_CAPABILITY_MULTIFD), - DEFINE_PROP_MIG_CAP("x-background-snapshot", - MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT), - - DEFINE_PROP_END_OF_LIST(), -}; +fail: + migrate_set_error(s, local_err); + migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED); + error_report_err(local_err); + migrate_fd_cleanup(s); +} static void migration_class_init(ObjectClass *klass, void *data) { @@ -4213,25 +3782,22 @@ static void migration_class_init(ObjectClass *klass, void *data) static void migration_instance_finalize(Object *obj) { MigrationState *ms = MIGRATION_OBJ(obj); - MigrationParameters *params = &ms->parameters; qemu_mutex_destroy(&ms->error_mutex); qemu_mutex_destroy(&ms->qemu_file_lock); - g_free(params->tls_hostname); - g_free(params->tls_creds); qemu_sem_destroy(&ms->wait_unplug_sem); qemu_sem_destroy(&ms->rate_limit_sem); qemu_sem_destroy(&ms->pause_sem); qemu_sem_destroy(&ms->postcopy_pause_sem); - qemu_sem_destroy(&ms->postcopy_pause_rp_sem); qemu_sem_destroy(&ms->rp_state.rp_sem); + qemu_sem_destroy(&ms->rp_state.rp_pong_acks); + qemu_sem_destroy(&ms->postcopy_qemufile_src_sem); error_free(ms->error); } static void migration_instance_init(Object *obj) { MigrationState *ms = MIGRATION_OBJ(obj); - MigrationParameters *params = &ms->parameters; ms->state = MIGRATION_STATUS_NONE; ms->mbps = -1; @@ -4239,38 +3805,14 @@ static void migration_instance_init(Object *obj) qemu_sem_init(&ms->pause_sem, 0); qemu_mutex_init(&ms->error_mutex); - params->tls_hostname = g_strdup(""); - params->tls_creds = g_strdup(""); - - /* Set has_* up only for parameter checks */ - params->has_compress_level = true; - params->has_compress_threads = true; - params->has_decompress_threads = true; - params->has_throttle_trigger_threshold = true; - params->has_cpu_throttle_initial = true; - params->has_cpu_throttle_increment = true; - params->has_cpu_throttle_tailslow = true; - params->has_max_bandwidth = true; - params->has_downtime_limit = true; - params->has_x_checkpoint_delay = true; - params->has_block_incremental = true; - params->has_multifd_channels = true; - params->has_multifd_compression = true; - params->has_multifd_zlib_level = true; - params->has_multifd_zstd_level = true; - params->has_xbzrle_cache_size = true; - params->has_max_postcopy_bandwidth = true; - params->has_max_cpu_throttle = true; - params->has_announce_initial = true; - params->has_announce_max = true; - params->has_announce_rounds = true; - params->has_announce_step = true; + migrate_params_init(&ms->parameters); qemu_sem_init(&ms->postcopy_pause_sem, 0); - qemu_sem_init(&ms->postcopy_pause_rp_sem, 0); qemu_sem_init(&ms->rp_state.rp_sem, 0); + qemu_sem_init(&ms->rp_state.rp_pong_acks, 0); qemu_sem_init(&ms->rate_limit_sem, 0); qemu_sem_init(&ms->wait_unplug_sem, 0); + qemu_sem_init(&ms->postcopy_qemufile_src_sem, 0); qemu_mutex_init(&ms->qemu_file_lock); } @@ -4280,27 +3822,14 @@ static void migration_instance_init(Object *obj) */ static bool migration_object_check(MigrationState *ms, Error **errp) { - MigrationCapabilityStatusList *head = NULL; /* Assuming all off */ - bool cap_list[MIGRATION_CAPABILITY__MAX] = { 0 }, ret; - int i; + bool old_caps[MIGRATION_CAPABILITY__MAX] = { 0 }; if (!migrate_params_check(&ms->parameters, errp)) { return false; } - for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) { - if (ms->enabled_capabilities[i]) { - QAPI_LIST_PREPEND(head, migrate_cap_add(i, true)); - } - } - - ret = migrate_caps_check(cap_list, head, errp); - - /* It works with head == NULL */ - qapi_free_MigrationCapabilityStatusList(head); - - return ret; + return migrate_caps_check(old_caps, ms->capabilities, errp); } static const TypeInfo migration_type = { |