Commit 34c9dd8eea38e94d97e22ac12c2f2aaa0dca59d8

Authored by aliguori
1 parent 210f41ba

Introduce TCP live migration protocol

This patch introduces a tcp protocol for live migration.  It can be used as
follows:

qemu-system-x86_64 -hda ~/images/linux-test.img -monitor stdio
 <vm runs for a while>
(qemu) migrate tcp:localhost:1025

On the same system:

qemu-system-x86_64 -hda ~/images/linux-test.img -incoming 
tcp:localhost:1025

The monitor can be interacted with while waiting for an incoming live 
migration.

Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>



git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@5478 c046a42c-6fe2-441c-8c8c-71466251a162
Makefile.target
... ... @@ -474,7 +474,7 @@ endif #CONFIG_DARWIN_USER
474 474 ifndef CONFIG_USER_ONLY
475 475  
476 476 OBJS=vl.o osdep.o monitor.o pci.o loader.o isa_mmio.o machine.o net-checksum.o
477   -OBJS+=fw_cfg.o aio.o buffered_file.o migration.o
  477 +OBJS+=fw_cfg.o aio.o buffered_file.o migration.o migration-tcp.o
478 478 ifdef CONFIG_WIN32
479 479 OBJS+=block-raw-win32.o
480 480 else
... ...
migration-tcp.c 0 → 100644
  1 +/*
  2 + * QEMU live migration
  3 + *
  4 + * Copyright IBM, Corp. 2008
  5 + *
  6 + * Authors:
  7 + * Anthony Liguori <aliguori@us.ibm.com>
  8 + *
  9 + * This work is licensed under the terms of the GNU GPL, version 2. See
  10 + * the COPYING file in the top-level directory.
  11 + *
  12 + */
  13 +
  14 +#include "qemu-common.h"
  15 +#include "qemu_socket.h"
  16 +#include "migration.h"
  17 +#include "qemu-char.h"
  18 +#include "sysemu.h"
  19 +#include "console.h"
  20 +#include "buffered_file.h"
  21 +#include "block.h"
  22 +
  23 +//#define DEBUG_MIGRATION_TCP
  24 +
  25 +typedef struct FdMigrationState
  26 +{
  27 + MigrationState mig_state;
  28 + QEMUFile *file;
  29 + int64_t bandwidth_limit;
  30 + int fd;
  31 + int detach;
  32 + int state;
  33 +} FdMigrationState;
  34 +
  35 +#ifdef DEBUG_MIGRATION_TCP
  36 +#define dprintf(fmt, ...) \
  37 + do { printf("migration-tcp: " fmt, ## __VA_ARGS__); } while (0)
  38 +#else
  39 +#define dprintf(fmt, ...) \
  40 + do { } while (0)
  41 +#endif
  42 +
  43 +int debug_me = 0;
  44 +
  45 +static void tcp_cleanup(FdMigrationState *s)
  46 +{
  47 + if (s->detach == 2) {
  48 + monitor_resume();
  49 + s->detach = 0;
  50 + }
  51 +
  52 + qemu_set_fd_handler2(s->fd, NULL, NULL, NULL, NULL);
  53 +
  54 + if (s->file) {
  55 + debug_me = 1;
  56 + dprintf("closing file\n");
  57 + qemu_fclose(s->file);
  58 + }
  59 +
  60 + if (s->fd != -1)
  61 + close(s->fd);
  62 +
  63 + s->fd = -1;
  64 +}
  65 +
  66 +static void tcp_error(FdMigrationState *s)
  67 +{
  68 + dprintf("setting error state\n");
  69 + s->state = MIG_STATE_ERROR;
  70 + tcp_cleanup(s);
  71 +}
  72 +
  73 +static void fd_put_notify(void *opaque)
  74 +{
  75 + FdMigrationState *s = opaque;
  76 +
  77 + qemu_set_fd_handler2(s->fd, NULL, NULL, NULL, NULL);
  78 + qemu_file_put_notify(s->file);
  79 +}
  80 +
  81 +static ssize_t fd_put_buffer(void *opaque, const void *data, size_t size)
  82 +{
  83 + FdMigrationState *s = opaque;
  84 + ssize_t ret;
  85 +
  86 + do {
  87 + ret = write(s->fd, data, size);
  88 + } while (ret == -1 && errno == EINTR);
  89 +
  90 + if (ret == -1)
  91 + ret = -errno;
  92 +
  93 + if (ret == -EAGAIN)
  94 + qemu_set_fd_handler2(s->fd, NULL, NULL, fd_put_notify, s);
  95 +
  96 + return ret;
  97 +}
  98 +
  99 +static int fd_close(void *opaque)
  100 +{
  101 + FdMigrationState *s = opaque;
  102 + dprintf("fd_close\n");
  103 + if (s->fd != -1) {
  104 + close(s->fd);
  105 + s->fd = -1;
  106 + }
  107 + return 0;
  108 +}
  109 +
  110 +static void fd_wait_for_unfreeze(void *opaque)
  111 +{
  112 + FdMigrationState *s = opaque;
  113 + int ret;
  114 +
  115 + dprintf("wait for unfreeze\n");
  116 + if (s->state != MIG_STATE_ACTIVE)
  117 + return;
  118 +
  119 + do {
  120 + fd_set wfds;
  121 +
  122 + FD_ZERO(&wfds);
  123 + FD_SET(s->fd, &wfds);
  124 +
  125 + ret = select(s->fd + 1, NULL, &wfds, NULL, NULL);
  126 + } while (ret == -1 && errno == EINTR);
  127 +}
  128 +
  129 +static void fd_put_ready(void *opaque)
  130 +{
  131 + FdMigrationState *s = opaque;
  132 +
  133 + if (s->state != MIG_STATE_ACTIVE) {
  134 + dprintf("put_ready returning because of non-active state\n");
  135 + return;
  136 + }
  137 +
  138 + dprintf("iterate\n");
  139 + if (qemu_savevm_state_iterate(s->file) == 1) {
  140 + dprintf("done iterating\n");
  141 + vm_stop(0);
  142 +
  143 + bdrv_flush_all();
  144 + qemu_savevm_state_complete(s->file);
  145 + s->state = MIG_STATE_COMPLETED;
  146 + tcp_cleanup(s);
  147 + }
  148 +}
  149 +
  150 +static void tcp_connect_migrate(FdMigrationState *s)
  151 +{
  152 + int ret;
  153 +
  154 + s->file = qemu_fopen_ops_buffered(s,
  155 + s->bandwidth_limit,
  156 + fd_put_buffer,
  157 + fd_put_ready,
  158 + fd_wait_for_unfreeze,
  159 + fd_close);
  160 +
  161 + dprintf("beginning savevm\n");
  162 + ret = qemu_savevm_state_begin(s->file);
  163 + if (ret < 0) {
  164 + dprintf("failed, %d\n", ret);
  165 + tcp_error(s);
  166 + return;
  167 + }
  168 +
  169 + fd_put_ready(s);
  170 +}
  171 +
  172 +static void tcp_wait_for_connect(void *opaque)
  173 +{
  174 + FdMigrationState *s = opaque;
  175 + int val, ret;
  176 + int valsize = sizeof(val);
  177 +
  178 + dprintf("connect completed\n");
  179 + do {
  180 + ret = getsockopt(s->fd, SOL_SOCKET, SO_ERROR, &val, &valsize);
  181 + } while (ret == -1 && errno == EINTR);
  182 +
  183 + if (ret < 0) {
  184 + tcp_error(s);
  185 + return;
  186 + }
  187 +
  188 + qemu_set_fd_handler2(s->fd, NULL, NULL, NULL, NULL);
  189 +
  190 + if (val == 0)
  191 + tcp_connect_migrate(s);
  192 + else {
  193 + dprintf("error connecting %d\n", val);
  194 + tcp_error(s);
  195 + }
  196 +}
  197 +
  198 +static FdMigrationState *to_fms(MigrationState *mig_state)
  199 +{
  200 + return container_of(mig_state, FdMigrationState, mig_state);
  201 +}
  202 +
  203 +static int tcp_get_status(MigrationState *mig_state)
  204 +{
  205 + FdMigrationState *s = to_fms(mig_state);
  206 +
  207 + return s->state;
  208 +}
  209 +
  210 +static void tcp_cancel(MigrationState *mig_state)
  211 +{
  212 + FdMigrationState *s = to_fms(mig_state);
  213 +
  214 + if (s->state != MIG_STATE_ACTIVE)
  215 + return;
  216 +
  217 + dprintf("cancelling migration\n");
  218 +
  219 + s->state = MIG_STATE_CANCELLED;
  220 +
  221 + tcp_cleanup(s);
  222 +}
  223 +
  224 +static void tcp_release(MigrationState *mig_state)
  225 +{
  226 + FdMigrationState *s = to_fms(mig_state);
  227 +
  228 + dprintf("releasing state\n");
  229 +
  230 + if (s->state == MIG_STATE_ACTIVE) {
  231 + s->state = MIG_STATE_CANCELLED;
  232 + tcp_cleanup(s);
  233 + }
  234 + free(s);
  235 +}
  236 +
  237 +MigrationState *tcp_start_outgoing_migration(const char *host_port,
  238 + int64_t bandwidth_limit,
  239 + int async)
  240 +{
  241 + struct sockaddr_in addr;
  242 + FdMigrationState *s;
  243 + int ret;
  244 +
  245 + if (parse_host_port(&addr, host_port) < 0)
  246 + return NULL;
  247 +
  248 + s = qemu_mallocz(sizeof(*s));
  249 + if (s == NULL)
  250 + return NULL;
  251 +
  252 + s->mig_state.cancel = tcp_cancel;
  253 + s->mig_state.get_status = tcp_get_status;
  254 + s->mig_state.release = tcp_release;
  255 +
  256 + s->state = MIG_STATE_ACTIVE;
  257 + s->detach = !async;
  258 + s->bandwidth_limit = bandwidth_limit;
  259 + s->fd = socket(PF_INET, SOCK_STREAM, 0);
  260 + if (s->fd == -1) {
  261 + qemu_free(s);
  262 + return NULL;
  263 + }
  264 +
  265 + fcntl(s->fd, F_SETFL, O_NONBLOCK);
  266 +
  267 + if (s->detach == 1) {
  268 + dprintf("detaching from monitor\n");
  269 + monitor_suspend();
  270 + s->detach = 2;
  271 + }
  272 +
  273 + do {
  274 + ret = connect(s->fd, (struct sockaddr *)&addr, sizeof(addr));
  275 + if (ret == -1)
  276 + ret = -errno;
  277 +
  278 + if (ret == -EINPROGRESS)
  279 + qemu_set_fd_handler2(s->fd, NULL, NULL, tcp_wait_for_connect, s);
  280 + } while (ret == -EINTR);
  281 +
  282 + if (ret < 0 && ret != -EINPROGRESS) {
  283 + dprintf("connect failed\n");
  284 + close(s->fd);
  285 + qemu_free(s);
  286 + s = NULL;
  287 + } else if (ret >= 0)
  288 + tcp_connect_migrate(s);
  289 +
  290 + return &s->mig_state;
  291 +}
  292 +
  293 +static void tcp_accept_incoming_migration(void *opaque)
  294 +{
  295 + struct sockaddr_in addr;
  296 + socklen_t addrlen = sizeof(addr);
  297 + int s = (unsigned long)opaque;
  298 + QEMUFile *f;
  299 + int c, ret;
  300 +
  301 + do {
  302 + c = accept(s, (struct sockaddr *)&addr, &addrlen);
  303 + } while (c == -1 && errno == EINTR);
  304 +
  305 + dprintf("accepted migration\n");
  306 +
  307 + if (c == -1) {
  308 + fprintf(stderr, "could not accept migration connection\n");
  309 + return;
  310 + }
  311 +
  312 + f = qemu_fopen_fd(c);
  313 + if (f == NULL) {
  314 + fprintf(stderr, "could not qemu_fopen socket\n");
  315 + goto out;
  316 + }
  317 +
  318 + vm_stop(0); /* just in case */
  319 + ret = qemu_loadvm_state(f);
  320 + if (ret < 0) {
  321 + fprintf(stderr, "load of migration failed\n");
  322 + goto out_fopen;
  323 + }
  324 + qemu_announce_self();
  325 + dprintf("successfully loaded vm state\n");
  326 +
  327 + /* we've successfully migrated, close the server socket */
  328 + qemu_set_fd_handler2(s, NULL, NULL, NULL, NULL);
  329 + close(s);
  330 +
  331 + vm_start();
  332 +
  333 +out_fopen:
  334 + qemu_fclose(f);
  335 +out:
  336 + close(c);
  337 +}
  338 +
  339 +int tcp_start_incoming_migration(const char *host_port)
  340 +{
  341 + struct sockaddr_in addr;
  342 + int val;
  343 + int s;
  344 +
  345 + if (parse_host_port(&addr, host_port) < 0) {
  346 + fprintf(stderr, "invalid host/port combination: %s\n", host_port);
  347 + return -EINVAL;
  348 + }
  349 +
  350 + s = socket(PF_INET, SOCK_STREAM, 0);
  351 + if (s == -1)
  352 + return -errno;
  353 +
  354 + val = 1;
  355 + setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (const char *)&val, sizeof(val));
  356 +
  357 + if (bind(s, (struct sockaddr *)&addr, sizeof(addr)) == -1)
  358 + goto err;
  359 +
  360 + if (listen(s, 1) == -1)
  361 + goto err;
  362 +
  363 + qemu_set_fd_handler2(s, NULL, tcp_accept_incoming_migration, NULL,
  364 + (void *)(unsigned long)s);
  365 +
  366 + return 0;
  367 +
  368 +err:
  369 + close(s);
  370 + return -errno;
  371 +}
... ...
migration.c
... ... @@ -22,12 +22,32 @@ static MigrationState *current_migration;
22 22  
23 23 void qemu_start_incoming_migration(const char *uri)
24 24 {
25   - fprintf(stderr, "unknown migration protocol: %s\n", uri);
  25 + const char *p;
  26 +
  27 + if (strstart(uri, "tcp:", &p))
  28 + tcp_start_incoming_migration(p);
  29 + else
  30 + fprintf(stderr, "unknown migration protocol: %s\n", uri);
26 31 }
27 32  
28 33 void do_migrate(int detach, const char *uri)
29 34 {
30   - term_printf("unknown migration protocol: %s\n", uri);
  35 + MigrationState *s = NULL;
  36 + const char *p;
  37 +
  38 + if (strstart(uri, "tcp:", &p))
  39 + s = tcp_start_outgoing_migration(p, max_throttle, detach);
  40 + else
  41 + term_printf("unknown migration protocol: %s\n", uri);
  42 +
  43 + if (s == NULL)
  44 + term_printf("migration failed\n");
  45 + else {
  46 + if (current_migration)
  47 + current_migration->release(current_migration);
  48 +
  49 + current_migration = s;
  50 + }
31 51 }
32 52  
33 53 void do_migrate_cancel(void)
... ...
migration.h
... ... @@ -39,5 +39,11 @@ void do_migrate_set_speed(const char *value);
39 39  
40 40 void do_info_migrate(void);
41 41  
  42 +int tcp_start_incoming_migration(const char *host_port);
  43 +
  44 +MigrationState *tcp_start_outgoing_migration(const char *host_port,
  45 + int64_t bandwidth_limit,
  46 + int detach);
  47 +
42 48 #endif
43 49  
... ...