Commit 475e4277721eca10e98bd926fa1d3ebc74d0ac35

Authored by aliguori
1 parent e18231a3

Switch the memory savevm handler to be "live"

This patch replaces the static memory savevm/loadvm handler with a "live" one.
This handler is used even if performing a non-live migration.

The key difference between this handler and the previous is that each page is
prefixed with the address of the page.  The QEMUFile rate limiting code, in
combination with the live migration dirty tracking bits, is used to determine
which pages should be sent and how many should be sent.

The live save code "converges" when the number of dirty pages reaches a fixed
amount.  Currently, this is 10 pages.  This is something that should eventually
be derived from whatever the bandwidth limitation is.

Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>



git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@5437 c046a42c-6fe2-441c-8c8c-71466251a162
Showing 1 changed file with 152 additions and 141 deletions
... ... @@ -6685,7 +6685,7 @@ int qemu_savevm_state_begin(QEMUFile *f)
6685 6685 int qemu_savevm_state_iterate(QEMUFile *f)
6686 6686 {
6687 6687 SaveStateEntry *se;
6688   - int ret = 0;
  6688 + int ret = 1;
6689 6689  
6690 6690 for (se = first_se; se != NULL; se = se->next) {
6691 6691 if (se->save_live_state == NULL)
... ... @@ -6695,7 +6695,7 @@ int qemu_savevm_state_iterate(QEMUFile *f)
6695 6695 qemu_put_byte(f, QEMU_VM_SECTION_PART);
6696 6696 qemu_put_be32(f, se->section_id);
6697 6697  
6698   - ret |= se->save_live_state(f, QEMU_VM_SECTION_PART, se->opaque);
  6698 + ret &= !!se->save_live_state(f, QEMU_VM_SECTION_PART, se->opaque);
6699 6699 }
6700 6700  
6701 6701 if (ret)
... ... @@ -6761,7 +6761,7 @@ int qemu_savevm_state(QEMUFile *f)
6761 6761 ret = qemu_savevm_state_iterate(f);
6762 6762 if (ret < 0)
6763 6763 goto out;
6764   - } while (ret == 1);
  6764 + } while (ret == 0);
6765 6765  
6766 6766 ret = qemu_savevm_state_complete(f);
6767 6767  
... ... @@ -7254,77 +7254,6 @@ static int ram_load_v1(QEMUFile *f, void *opaque)
7254 7254 #define IOBUF_SIZE 4096
7255 7255 #define RAM_CBLOCK_MAGIC 0xfabe
7256 7256  
7257   -typedef struct RamCompressState {
7258   - z_stream zstream;
7259   - QEMUFile *f;
7260   - uint8_t buf[IOBUF_SIZE];
7261   -} RamCompressState;
7262   -
7263   -static int ram_compress_open(RamCompressState *s, QEMUFile *f)
7264   -{
7265   - int ret;
7266   - memset(s, 0, sizeof(*s));
7267   - s->f = f;
7268   - ret = deflateInit2(&s->zstream, 1,
7269   - Z_DEFLATED, 15,
7270   - 9, Z_DEFAULT_STRATEGY);
7271   - if (ret != Z_OK)
7272   - return -1;
7273   - s->zstream.avail_out = IOBUF_SIZE;
7274   - s->zstream.next_out = s->buf;
7275   - return 0;
7276   -}
7277   -
7278   -static void ram_put_cblock(RamCompressState *s, const uint8_t *buf, int len)
7279   -{
7280   - qemu_put_be16(s->f, RAM_CBLOCK_MAGIC);
7281   - qemu_put_be16(s->f, len);
7282   - qemu_put_buffer(s->f, buf, len);
7283   -}
7284   -
7285   -static int ram_compress_buf(RamCompressState *s, const uint8_t *buf, int len)
7286   -{
7287   - int ret;
7288   -
7289   - s->zstream.avail_in = len;
7290   - s->zstream.next_in = (uint8_t *)buf;
7291   - while (s->zstream.avail_in > 0) {
7292   - ret = deflate(&s->zstream, Z_NO_FLUSH);
7293   - if (ret != Z_OK)
7294   - return -1;
7295   - if (s->zstream.avail_out == 0) {
7296   - ram_put_cblock(s, s->buf, IOBUF_SIZE);
7297   - s->zstream.avail_out = IOBUF_SIZE;
7298   - s->zstream.next_out = s->buf;
7299   - }
7300   - }
7301   - return 0;
7302   -}
7303   -
7304   -static void ram_compress_close(RamCompressState *s)
7305   -{
7306   - int len, ret;
7307   -
7308   - /* compress last bytes */
7309   - for(;;) {
7310   - ret = deflate(&s->zstream, Z_FINISH);
7311   - if (ret == Z_OK || ret == Z_STREAM_END) {
7312   - len = IOBUF_SIZE - s->zstream.avail_out;
7313   - if (len > 0) {
7314   - ram_put_cblock(s, s->buf, len);
7315   - }
7316   - s->zstream.avail_out = IOBUF_SIZE;
7317   - s->zstream.next_out = s->buf;
7318   - if (ret == Z_STREAM_END)
7319   - break;
7320   - } else {
7321   - goto fail;
7322   - }
7323   - }
7324   -fail:
7325   - deflateEnd(&s->zstream);
7326   -}
7327   -
7328 7257 typedef struct RamDecompressState {
7329 7258 z_stream zstream;
7330 7259 QEMUFile *f;
... ... @@ -7372,61 +7301,121 @@ static void ram_decompress_close(RamDecompressState *s)
7372 7301 inflateEnd(&s->zstream);
7373 7302 }
7374 7303  
7375   -static void ram_save(QEMUFile *f, void *opaque)
  7304 +#define RAM_SAVE_FLAG_FULL 0x01
  7305 +#define RAM_SAVE_FLAG_COMPRESS 0x02
  7306 +#define RAM_SAVE_FLAG_MEM_SIZE 0x04
  7307 +#define RAM_SAVE_FLAG_PAGE 0x08
  7308 +#define RAM_SAVE_FLAG_EOS 0x10
  7309 +
  7310 +static int is_dup_page(uint8_t *page, uint8_t ch)
7376 7311 {
7377   - ram_addr_t i;
7378   - RamCompressState s1, *s = &s1;
7379   - uint8_t buf[10];
  7312 + uint32_t val = ch << 24 | ch << 16 | ch << 8 | ch;
  7313 + uint32_t *array = (uint32_t *)page;
  7314 + int i;
7380 7315  
7381   - qemu_put_be32(f, phys_ram_size);
7382   - if (ram_compress_open(s, f) < 0)
7383   - return;
7384   - for(i = 0; i < phys_ram_size; i+= BDRV_HASH_BLOCK_SIZE) {
7385   -#if 0
7386   - if (tight_savevm_enabled) {
7387   - int64_t sector_num;
7388   - int j;
7389   -
7390   - /* find if the memory block is available on a virtual
7391   - block device */
7392   - sector_num = -1;
7393   - for(j = 0; j < nb_drives; j++) {
7394   - sector_num = bdrv_hash_find(drives_table[j].bdrv,
7395   - phys_ram_base + i,
7396   - BDRV_HASH_BLOCK_SIZE);
7397   - if (sector_num >= 0)
7398   - break;
  7316 + for (i = 0; i < (TARGET_PAGE_SIZE / 4); i++) {
  7317 + if (array[i] != val)
  7318 + return 0;
  7319 + }
  7320 +
  7321 + return 1;
  7322 +}
  7323 +
  7324 +static int ram_save_block(QEMUFile *f)
  7325 +{
  7326 + static ram_addr_t current_addr = 0;
  7327 + ram_addr_t saved_addr = current_addr;
  7328 + ram_addr_t addr = 0;
  7329 + int found = 0;
  7330 +
  7331 + while (addr < phys_ram_size) {
  7332 + if (cpu_physical_memory_get_dirty(current_addr, MIGRATION_DIRTY_FLAG)) {
  7333 + uint8_t ch;
  7334 +
  7335 + cpu_physical_memory_reset_dirty(current_addr,
  7336 + current_addr + TARGET_PAGE_SIZE,
  7337 + MIGRATION_DIRTY_FLAG);
  7338 +
  7339 + ch = *(phys_ram_base + current_addr);
  7340 +
  7341 + if (is_dup_page(phys_ram_base + current_addr, ch)) {
  7342 + qemu_put_be64(f, current_addr | RAM_SAVE_FLAG_COMPRESS);
  7343 + qemu_put_byte(f, ch);
  7344 + } else {
  7345 + qemu_put_be64(f, current_addr | RAM_SAVE_FLAG_PAGE);
  7346 + qemu_put_buffer(f, phys_ram_base + current_addr, TARGET_PAGE_SIZE);
7399 7347 }
7400   - if (j == nb_drives)
7401   - goto normal_compress;
7402   - buf[0] = 1;
7403   - buf[1] = j;
7404   - cpu_to_be64wu((uint64_t *)(buf + 2), sector_num);
7405   - ram_compress_buf(s, buf, 10);
7406   - } else
7407   -#endif
7408   - {
7409   - // normal_compress:
7410   - buf[0] = 0;
7411   - ram_compress_buf(s, buf, 1);
7412   - ram_compress_buf(s, phys_ram_base + i, BDRV_HASH_BLOCK_SIZE);
  7348 +
  7349 + found = 1;
  7350 + break;
7413 7351 }
  7352 + addr += TARGET_PAGE_SIZE;
  7353 + current_addr = (saved_addr + addr) % phys_ram_size;
7414 7354 }
7415   - ram_compress_close(s);
  7355 +
  7356 + return found;
7416 7357 }
7417 7358  
7418   -static int ram_load(QEMUFile *f, void *opaque, int version_id)
  7359 +static ram_addr_t ram_save_threshold = 10;
  7360 +
  7361 +static ram_addr_t ram_save_remaining(void)
  7362 +{
  7363 + ram_addr_t addr;
  7364 + ram_addr_t count = 0;
  7365 +
  7366 + for (addr = 0; addr < phys_ram_size; addr += TARGET_PAGE_SIZE) {
  7367 + if (cpu_physical_memory_get_dirty(addr, MIGRATION_DIRTY_FLAG))
  7368 + count++;
  7369 + }
  7370 +
  7371 + return count;
  7372 +}
  7373 +
  7374 +static int ram_save_live(QEMUFile *f, int stage, void *opaque)
  7375 +{
  7376 + ram_addr_t addr;
  7377 +
  7378 + if (stage == 1) {
  7379 + /* Make sure all dirty bits are set */
  7380 + for (addr = 0; addr < phys_ram_size; addr += TARGET_PAGE_SIZE) {
  7381 + if (!cpu_physical_memory_get_dirty(addr, MIGRATION_DIRTY_FLAG))
  7382 + cpu_physical_memory_set_dirty(addr);
  7383 + }
  7384 +
  7385 + /* Enable dirty memory tracking */
  7386 + cpu_physical_memory_set_dirty_tracking(1);
  7387 +
  7388 + qemu_put_be64(f, phys_ram_size | RAM_SAVE_FLAG_MEM_SIZE);
  7389 + }
  7390 +
  7391 + while (!qemu_file_rate_limit(f)) {
  7392 + int ret;
  7393 +
  7394 + ret = ram_save_block(f);
  7395 + if (ret == 0) /* no more blocks */
  7396 + break;
  7397 + }
  7398 +
  7399 + /* try transferring iterative blocks of memory */
  7400 +
  7401 + if (stage == 3) {
  7402 + cpu_physical_memory_set_dirty_tracking(0);
  7403 +
  7404 + /* flush all remaining blocks regardless of rate limiting */
  7405 + while (ram_save_block(f) != 0);
  7406 + }
  7407 +
  7408 + qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
  7409 +
  7410 + return (stage == 2) && (ram_save_remaining() < ram_save_threshold);
  7411 +}
  7412 +
  7413 +static int ram_load_dead(QEMUFile *f, void *opaque)
7419 7414 {
7420 7415 RamDecompressState s1, *s = &s1;
7421 7416 uint8_t buf[10];
7422 7417 ram_addr_t i;
7423 7418  
7424   - if (version_id == 1)
7425   - return ram_load_v1(f, opaque);
7426   - if (version_id != 2)
7427   - return -EINVAL;
7428   - if (qemu_get_be32(f) != phys_ram_size)
7429   - return -EINVAL;
7430 7419 if (ram_decompress_open(s, f) < 0)
7431 7420 return -EINVAL;
7432 7421 for(i = 0; i < phys_ram_size; i+= BDRV_HASH_BLOCK_SIZE) {
... ... @@ -7439,35 +7428,57 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
7439 7428 fprintf(stderr, "Error while reading ram block address=0x%08" PRIx64, (uint64_t)i);
7440 7429 goto error;
7441 7430 }
7442   - } else
7443   -#if 0
7444   - if (buf[0] == 1) {
7445   - int bs_index;
7446   - int64_t sector_num;
7447   -
7448   - ram_decompress_buf(s, buf + 1, 9);
7449   - bs_index = buf[1];
7450   - sector_num = be64_to_cpupu((const uint64_t *)(buf + 2));
7451   - if (bs_index >= nb_drives) {
7452   - fprintf(stderr, "Invalid block device index %d\n", bs_index);
7453   - goto error;
7454   - }
7455   - if (bdrv_read(drives_table[bs_index].bdrv, sector_num,
7456   - phys_ram_base + i,
7457   - BDRV_HASH_BLOCK_SIZE / 512) < 0) {
7458   - fprintf(stderr, "Error while reading sector %d:%" PRId64 "\n",
7459   - bs_index, sector_num);
7460   - goto error;
7461   - }
7462   - } else
7463   -#endif
7464   - {
  7431 + } else {
7465 7432 error:
7466 7433 printf("Error block header\n");
7467 7434 return -EINVAL;
7468 7435 }
7469 7436 }
7470 7437 ram_decompress_close(s);
  7438 +
  7439 + return 0;
  7440 +}
  7441 +
  7442 +static int ram_load(QEMUFile *f, void *opaque, int version_id)
  7443 +{
  7444 + ram_addr_t addr;
  7445 + int flags;
  7446 +
  7447 + if (version_id == 1)
  7448 + return ram_load_v1(f, opaque);
  7449 +
  7450 + if (version_id == 2) {
  7451 + if (qemu_get_be32(f) != phys_ram_size)
  7452 + return -EINVAL;
  7453 + return ram_load_dead(f, opaque);
  7454 + }
  7455 +
  7456 + if (version_id != 3)
  7457 + return -EINVAL;
  7458 +
  7459 + do {
  7460 + addr = qemu_get_be64(f);
  7461 +
  7462 + flags = addr & ~TARGET_PAGE_MASK;
  7463 + addr &= TARGET_PAGE_MASK;
  7464 +
  7465 + if (flags & RAM_SAVE_FLAG_MEM_SIZE) {
  7466 + if (addr != phys_ram_size)
  7467 + return -EINVAL;
  7468 + }
  7469 +
  7470 + if (flags & RAM_SAVE_FLAG_FULL) {
  7471 + if (ram_load_dead(f, opaque) < 0)
  7472 + return -EINVAL;
  7473 + }
  7474 +
  7475 + if (flags & RAM_SAVE_FLAG_COMPRESS) {
  7476 + uint8_t ch = qemu_get_byte(f);
  7477 + memset(phys_ram_base + addr, ch, TARGET_PAGE_SIZE);
  7478 + } else if (flags & RAM_SAVE_FLAG_PAGE)
  7479 + qemu_get_buffer(f, phys_ram_base + addr, TARGET_PAGE_SIZE);
  7480 + } while (!(flags & RAM_SAVE_FLAG_EOS));
  7481 +
7471 7482 return 0;
7472 7483 }
7473 7484  
... ... @@ -9512,7 +9523,7 @@ int main(int argc, char **argv)
9512 9523 exit(1);
9513 9524  
9514 9525 register_savevm("timer", 0, 2, timer_save, timer_load, NULL);
9515   - register_savevm("ram", 0, 2, ram_save, ram_load, NULL);
  9526 + register_savevm_live("ram", 0, 3, ram_save_live, NULL, ram_load, NULL);
9516 9527  
9517 9528 /* terminal init */
9518 9529 memset(&display_state, 0, sizeof(display_state));
... ...