aliguori | 5bb7910 | 2008-10-13 03:12:02 +0000 | [diff] [blame] | 1 | /* |
| 2 | * QEMU live migration |
| 3 | * |
| 4 | * Copyright IBM, Corp. 2008 |
| 5 | * |
| 6 | * Authors: |
| 7 | * Anthony Liguori <aliguori@us.ibm.com> |
| 8 | * |
| 9 | * This work is licensed under the terms of the GNU GPL, version 2. See |
| 10 | * the COPYING file in the top-level directory. |
| 11 | * |
| 12 | */ |
| 13 | |
| 14 | #include "qemu-common.h" |
| 15 | #include "migration.h" |
aliguori | 376253e | 2009-03-05 23:01:23 +0000 | [diff] [blame] | 16 | #include "monitor.h" |
aliguori | 065e281 | 2008-11-11 16:46:33 +0000 | [diff] [blame] | 17 | #include "buffered_file.h" |
| 18 | #include "sysemu.h" |
| 19 | #include "block.h" |
| 20 | #include "qemu_socket.h" |
| 21 | |
| 22 | //#define DEBUG_MIGRATION |
| 23 | |
| 24 | #ifdef DEBUG_MIGRATION |
| 25 | #define dprintf(fmt, ...) \ |
| 26 | do { printf("migration: " fmt, ## __VA_ARGS__); } while (0) |
| 27 | #else |
| 28 | #define dprintf(fmt, ...) \ |
| 29 | do { } while (0) |
| 30 | #endif |
aliguori | 5bb7910 | 2008-10-13 03:12:02 +0000 | [diff] [blame] | 31 | |
| 32 | /* Migration speed throttling */ |
| 33 | static uint32_t max_throttle = (32 << 20); |
| 34 | |
| 35 | static MigrationState *current_migration; |
| 36 | |
| 37 | void qemu_start_incoming_migration(const char *uri) |
| 38 | { |
aliguori | 34c9dd8 | 2008-10-13 03:14:31 +0000 | [diff] [blame] | 39 | const char *p; |
| 40 | |
| 41 | if (strstart(uri, "tcp:", &p)) |
| 42 | tcp_start_incoming_migration(p); |
aliguori | 065e281 | 2008-11-11 16:46:33 +0000 | [diff] [blame] | 43 | #if !defined(WIN32) |
| 44 | else if (strstart(uri, "exec:", &p)) |
| 45 | exec_start_incoming_migration(p); |
| 46 | #endif |
aliguori | 34c9dd8 | 2008-10-13 03:14:31 +0000 | [diff] [blame] | 47 | else |
| 48 | fprintf(stderr, "unknown migration protocol: %s\n", uri); |
aliguori | 5bb7910 | 2008-10-13 03:12:02 +0000 | [diff] [blame] | 49 | } |
| 50 | |
aliguori | 376253e | 2009-03-05 23:01:23 +0000 | [diff] [blame] | 51 | void do_migrate(Monitor *mon, int detach, const char *uri) |
aliguori | 5bb7910 | 2008-10-13 03:12:02 +0000 | [diff] [blame] | 52 | { |
aliguori | 34c9dd8 | 2008-10-13 03:14:31 +0000 | [diff] [blame] | 53 | MigrationState *s = NULL; |
| 54 | const char *p; |
| 55 | |
| 56 | if (strstart(uri, "tcp:", &p)) |
aliguori | ff8d81d | 2008-10-24 22:10:31 +0000 | [diff] [blame] | 57 | s = tcp_start_outgoing_migration(p, max_throttle, detach); |
aliguori | 065e281 | 2008-11-11 16:46:33 +0000 | [diff] [blame] | 58 | #if !defined(WIN32) |
| 59 | else if (strstart(uri, "exec:", &p)) |
| 60 | s = exec_start_outgoing_migration(p, max_throttle, detach); |
| 61 | #endif |
aliguori | 34c9dd8 | 2008-10-13 03:14:31 +0000 | [diff] [blame] | 62 | else |
aliguori | 376253e | 2009-03-05 23:01:23 +0000 | [diff] [blame] | 63 | monitor_printf(mon, "unknown migration protocol: %s\n", uri); |
aliguori | 34c9dd8 | 2008-10-13 03:14:31 +0000 | [diff] [blame] | 64 | |
| 65 | if (s == NULL) |
aliguori | 376253e | 2009-03-05 23:01:23 +0000 | [diff] [blame] | 66 | monitor_printf(mon, "migration failed\n"); |
aliguori | 34c9dd8 | 2008-10-13 03:14:31 +0000 | [diff] [blame] | 67 | else { |
aliguori | ff8d81d | 2008-10-24 22:10:31 +0000 | [diff] [blame] | 68 | if (current_migration) |
| 69 | current_migration->release(current_migration); |
aliguori | 34c9dd8 | 2008-10-13 03:14:31 +0000 | [diff] [blame] | 70 | |
aliguori | ff8d81d | 2008-10-24 22:10:31 +0000 | [diff] [blame] | 71 | current_migration = s; |
aliguori | 34c9dd8 | 2008-10-13 03:14:31 +0000 | [diff] [blame] | 72 | } |
aliguori | 5bb7910 | 2008-10-13 03:12:02 +0000 | [diff] [blame] | 73 | } |
| 74 | |
aliguori | 376253e | 2009-03-05 23:01:23 +0000 | [diff] [blame] | 75 | void do_migrate_cancel(Monitor *mon) |
aliguori | 5bb7910 | 2008-10-13 03:12:02 +0000 | [diff] [blame] | 76 | { |
| 77 | MigrationState *s = current_migration; |
| 78 | |
| 79 | if (s) |
aliguori | ff8d81d | 2008-10-24 22:10:31 +0000 | [diff] [blame] | 80 | s->cancel(s); |
aliguori | 5bb7910 | 2008-10-13 03:12:02 +0000 | [diff] [blame] | 81 | } |
| 82 | |
aliguori | 376253e | 2009-03-05 23:01:23 +0000 | [diff] [blame] | 83 | void do_migrate_set_speed(Monitor *mon, const char *value) |
aliguori | 5bb7910 | 2008-10-13 03:12:02 +0000 | [diff] [blame] | 84 | { |
| 85 | double d; |
| 86 | char *ptr; |
Glauber Costa | daa91de | 2009-05-20 18:26:58 -0400 | [diff] [blame] | 87 | FdMigrationState *s; |
aliguori | 5bb7910 | 2008-10-13 03:12:02 +0000 | [diff] [blame] | 88 | |
| 89 | d = strtod(value, &ptr); |
| 90 | switch (*ptr) { |
| 91 | case 'G': case 'g': |
aliguori | ff8d81d | 2008-10-24 22:10:31 +0000 | [diff] [blame] | 92 | d *= 1024; |
aliguori | 5bb7910 | 2008-10-13 03:12:02 +0000 | [diff] [blame] | 93 | case 'M': case 'm': |
aliguori | ff8d81d | 2008-10-24 22:10:31 +0000 | [diff] [blame] | 94 | d *= 1024; |
aliguori | 5bb7910 | 2008-10-13 03:12:02 +0000 | [diff] [blame] | 95 | case 'K': case 'k': |
aliguori | ff8d81d | 2008-10-24 22:10:31 +0000 | [diff] [blame] | 96 | d *= 1024; |
aliguori | 5bb7910 | 2008-10-13 03:12:02 +0000 | [diff] [blame] | 97 | default: |
aliguori | ff8d81d | 2008-10-24 22:10:31 +0000 | [diff] [blame] | 98 | break; |
aliguori | 5bb7910 | 2008-10-13 03:12:02 +0000 | [diff] [blame] | 99 | } |
| 100 | |
| 101 | max_throttle = (uint32_t)d; |
Glauber Costa | daa91de | 2009-05-20 18:26:58 -0400 | [diff] [blame] | 102 | s = migrate_to_fms(current_migration); |
| 103 | |
| 104 | if (s) { |
| 105 | qemu_file_set_rate_limit(s->file, max_throttle); |
| 106 | } |
| 107 | |
aliguori | 5bb7910 | 2008-10-13 03:12:02 +0000 | [diff] [blame] | 108 | } |
| 109 | |
Glauber Costa | a0a3fd6 | 2009-05-28 15:22:57 -0400 | [diff] [blame^] | 110 | /* amount of nanoseconds we are willing to wait for migration to be down. |
| 111 | * the choice of nanoseconds is because it is the maximum resolution that |
| 112 | * get_clock() can achieve. It is an internal measure. All user-visible |
| 113 | * units must be in seconds */ |
| 114 | static uint64_t max_downtime = 30000000; |
| 115 | |
| 116 | uint64_t migrate_max_downtime(void) |
| 117 | { |
| 118 | return max_downtime; |
| 119 | } |
| 120 | |
aliguori | 376253e | 2009-03-05 23:01:23 +0000 | [diff] [blame] | 121 | void do_info_migrate(Monitor *mon) |
aliguori | 5bb7910 | 2008-10-13 03:12:02 +0000 | [diff] [blame] | 122 | { |
| 123 | MigrationState *s = current_migration; |
aliguori | 376253e | 2009-03-05 23:01:23 +0000 | [diff] [blame] | 124 | |
aliguori | 5bb7910 | 2008-10-13 03:12:02 +0000 | [diff] [blame] | 125 | if (s) { |
aliguori | 376253e | 2009-03-05 23:01:23 +0000 | [diff] [blame] | 126 | monitor_printf(mon, "Migration status: "); |
aliguori | ff8d81d | 2008-10-24 22:10:31 +0000 | [diff] [blame] | 127 | switch (s->get_status(s)) { |
| 128 | case MIG_STATE_ACTIVE: |
aliguori | 376253e | 2009-03-05 23:01:23 +0000 | [diff] [blame] | 129 | monitor_printf(mon, "active\n"); |
Glauber Costa | 9f9e28c | 2009-05-21 17:38:01 -0400 | [diff] [blame] | 130 | monitor_printf(mon, "transferred ram: %" PRIu64 " kbytes\n", ram_bytes_transferred() >> 10); |
| 131 | monitor_printf(mon, "remaining ram: %" PRIu64 " kbytes\n", ram_bytes_remaining() >> 10); |
| 132 | monitor_printf(mon, "total ram: %" PRIu64 " kbytes\n", ram_bytes_total() >> 10); |
aliguori | ff8d81d | 2008-10-24 22:10:31 +0000 | [diff] [blame] | 133 | break; |
| 134 | case MIG_STATE_COMPLETED: |
aliguori | 376253e | 2009-03-05 23:01:23 +0000 | [diff] [blame] | 135 | monitor_printf(mon, "completed\n"); |
aliguori | ff8d81d | 2008-10-24 22:10:31 +0000 | [diff] [blame] | 136 | break; |
| 137 | case MIG_STATE_ERROR: |
aliguori | 376253e | 2009-03-05 23:01:23 +0000 | [diff] [blame] | 138 | monitor_printf(mon, "failed\n"); |
aliguori | ff8d81d | 2008-10-24 22:10:31 +0000 | [diff] [blame] | 139 | break; |
| 140 | case MIG_STATE_CANCELLED: |
aliguori | 376253e | 2009-03-05 23:01:23 +0000 | [diff] [blame] | 141 | monitor_printf(mon, "cancelled\n"); |
aliguori | ff8d81d | 2008-10-24 22:10:31 +0000 | [diff] [blame] | 142 | break; |
| 143 | } |
aliguori | 5bb7910 | 2008-10-13 03:12:02 +0000 | [diff] [blame] | 144 | } |
| 145 | } |
| 146 | |
aliguori | 065e281 | 2008-11-11 16:46:33 +0000 | [diff] [blame] | 147 | /* shared migration helpers */ |
| 148 | |
aliguori | 731b036 | 2009-03-05 23:01:42 +0000 | [diff] [blame] | 149 | void migrate_fd_monitor_suspend(FdMigrationState *s) |
| 150 | { |
| 151 | s->mon_resume = cur_mon; |
aliguori | cde76ee | 2009-03-05 23:01:51 +0000 | [diff] [blame] | 152 | if (monitor_suspend(cur_mon) == 0) |
| 153 | dprintf("suspending monitor\n"); |
| 154 | else |
| 155 | monitor_printf(cur_mon, "terminal does not allow synchronous " |
| 156 | "migration, continuing detached\n"); |
aliguori | 731b036 | 2009-03-05 23:01:42 +0000 | [diff] [blame] | 157 | } |
| 158 | |
aliguori | 065e281 | 2008-11-11 16:46:33 +0000 | [diff] [blame] | 159 | void migrate_fd_error(FdMigrationState *s) |
| 160 | { |
| 161 | dprintf("setting error state\n"); |
| 162 | s->state = MIG_STATE_ERROR; |
| 163 | migrate_fd_cleanup(s); |
| 164 | } |
| 165 | |
| 166 | void migrate_fd_cleanup(FdMigrationState *s) |
| 167 | { |
| 168 | qemu_set_fd_handler2(s->fd, NULL, NULL, NULL, NULL); |
| 169 | |
| 170 | if (s->file) { |
| 171 | dprintf("closing file\n"); |
| 172 | qemu_fclose(s->file); |
| 173 | } |
| 174 | |
| 175 | if (s->fd != -1) |
| 176 | close(s->fd); |
| 177 | |
| 178 | /* Don't resume monitor until we've flushed all of the buffers */ |
aliguori | 731b036 | 2009-03-05 23:01:42 +0000 | [diff] [blame] | 179 | if (s->mon_resume) |
| 180 | monitor_resume(s->mon_resume); |
aliguori | 065e281 | 2008-11-11 16:46:33 +0000 | [diff] [blame] | 181 | |
| 182 | s->fd = -1; |
| 183 | } |
| 184 | |
| 185 | void migrate_fd_put_notify(void *opaque) |
| 186 | { |
| 187 | FdMigrationState *s = opaque; |
| 188 | |
| 189 | qemu_set_fd_handler2(s->fd, NULL, NULL, NULL, NULL); |
| 190 | qemu_file_put_notify(s->file); |
| 191 | } |
| 192 | |
| 193 | ssize_t migrate_fd_put_buffer(void *opaque, const void *data, size_t size) |
| 194 | { |
| 195 | FdMigrationState *s = opaque; |
| 196 | ssize_t ret; |
| 197 | |
| 198 | do { |
| 199 | ret = s->write(s, data, size); |
Uri Lublin | 95b134e | 2009-05-19 14:08:53 +0300 | [diff] [blame] | 200 | } while (ret == -1 && ((s->get_error(s)) == EINTR)); |
aliguori | 065e281 | 2008-11-11 16:46:33 +0000 | [diff] [blame] | 201 | |
| 202 | if (ret == -1) |
| 203 | ret = -(s->get_error(s)); |
| 204 | |
| 205 | if (ret == -EAGAIN) |
| 206 | qemu_set_fd_handler2(s->fd, NULL, NULL, migrate_fd_put_notify, s); |
| 207 | |
| 208 | return ret; |
| 209 | } |
| 210 | |
| 211 | void migrate_fd_connect(FdMigrationState *s) |
| 212 | { |
| 213 | int ret; |
| 214 | |
| 215 | s->file = qemu_fopen_ops_buffered(s, |
| 216 | s->bandwidth_limit, |
| 217 | migrate_fd_put_buffer, |
| 218 | migrate_fd_put_ready, |
| 219 | migrate_fd_wait_for_unfreeze, |
| 220 | migrate_fd_close); |
| 221 | |
| 222 | dprintf("beginning savevm\n"); |
| 223 | ret = qemu_savevm_state_begin(s->file); |
| 224 | if (ret < 0) { |
| 225 | dprintf("failed, %d\n", ret); |
| 226 | migrate_fd_error(s); |
| 227 | return; |
| 228 | } |
| 229 | |
| 230 | migrate_fd_put_ready(s); |
| 231 | } |
| 232 | |
| 233 | void migrate_fd_put_ready(void *opaque) |
| 234 | { |
| 235 | FdMigrationState *s = opaque; |
| 236 | |
| 237 | if (s->state != MIG_STATE_ACTIVE) { |
| 238 | dprintf("put_ready returning because of non-active state\n"); |
| 239 | return; |
| 240 | } |
| 241 | |
| 242 | dprintf("iterate\n"); |
| 243 | if (qemu_savevm_state_iterate(s->file) == 1) { |
aliguori | b161d12 | 2009-04-05 19:30:33 +0000 | [diff] [blame] | 244 | int state; |
aliguori | 065e281 | 2008-11-11 16:46:33 +0000 | [diff] [blame] | 245 | dprintf("done iterating\n"); |
| 246 | vm_stop(0); |
| 247 | |
| 248 | bdrv_flush_all(); |
aliguori | b161d12 | 2009-04-05 19:30:33 +0000 | [diff] [blame] | 249 | if ((qemu_savevm_state_complete(s->file)) < 0) { |
| 250 | vm_start(); |
| 251 | state = MIG_STATE_ERROR; |
| 252 | } else { |
| 253 | state = MIG_STATE_COMPLETED; |
| 254 | } |
aliguori | 065e281 | 2008-11-11 16:46:33 +0000 | [diff] [blame] | 255 | migrate_fd_cleanup(s); |
aliguori | b161d12 | 2009-04-05 19:30:33 +0000 | [diff] [blame] | 256 | s->state = state; |
aliguori | 065e281 | 2008-11-11 16:46:33 +0000 | [diff] [blame] | 257 | } |
| 258 | } |
| 259 | |
| 260 | int migrate_fd_get_status(MigrationState *mig_state) |
| 261 | { |
| 262 | FdMigrationState *s = migrate_to_fms(mig_state); |
| 263 | return s->state; |
| 264 | } |
| 265 | |
| 266 | void migrate_fd_cancel(MigrationState *mig_state) |
| 267 | { |
| 268 | FdMigrationState *s = migrate_to_fms(mig_state); |
| 269 | |
| 270 | if (s->state != MIG_STATE_ACTIVE) |
| 271 | return; |
| 272 | |
| 273 | dprintf("cancelling migration\n"); |
| 274 | |
| 275 | s->state = MIG_STATE_CANCELLED; |
| 276 | |
| 277 | migrate_fd_cleanup(s); |
| 278 | } |
| 279 | |
| 280 | void migrate_fd_release(MigrationState *mig_state) |
| 281 | { |
| 282 | FdMigrationState *s = migrate_to_fms(mig_state); |
| 283 | |
| 284 | dprintf("releasing state\n"); |
| 285 | |
| 286 | if (s->state == MIG_STATE_ACTIVE) { |
| 287 | s->state = MIG_STATE_CANCELLED; |
| 288 | migrate_fd_cleanup(s); |
| 289 | } |
| 290 | free(s); |
| 291 | } |
| 292 | |
| 293 | void migrate_fd_wait_for_unfreeze(void *opaque) |
| 294 | { |
| 295 | FdMigrationState *s = opaque; |
| 296 | int ret; |
| 297 | |
| 298 | dprintf("wait for unfreeze\n"); |
| 299 | if (s->state != MIG_STATE_ACTIVE) |
| 300 | return; |
| 301 | |
| 302 | do { |
| 303 | fd_set wfds; |
| 304 | |
| 305 | FD_ZERO(&wfds); |
| 306 | FD_SET(s->fd, &wfds); |
| 307 | |
| 308 | ret = select(s->fd + 1, NULL, &wfds, NULL, NULL); |
| 309 | } while (ret == -1 && (s->get_error(s)) == EINTR); |
| 310 | } |
| 311 | |
| 312 | int migrate_fd_close(void *opaque) |
| 313 | { |
| 314 | FdMigrationState *s = opaque; |
Uri Lublin | e19252d | 2009-06-08 14:28:01 +0300 | [diff] [blame] | 315 | |
| 316 | qemu_set_fd_handler2(s->fd, NULL, NULL, NULL, NULL); |
aliguori | 065e281 | 2008-11-11 16:46:33 +0000 | [diff] [blame] | 317 | return s->close(s); |
| 318 | } |