Commit 475e4277721eca10e98bd926fa1d3ebc74d0ac35

Authored by aliguori
1 parent e18231a3

Switch the memory savevm handler to be "live"

This patch replaces the static memory savevm/loadvm handler with a "live" one.
This handler is used even if performing a non-live migration.

The key difference between this handler and the previous is that each page is
prefixed with the address of the page.  The QEMUFile rate limiting code, in
combination with the live migration dirty tracking bits, is used to determine
which pages should be sent and how many should be sent.

The live save code "converges" when the number of dirty pages reaches a fixed
amount.  Currently, this is 10 pages.  This is something that should eventually
be derived from whatever the bandwidth limitation is.

Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>



git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@5437 c046a42c-6fe2-441c-8c8c-71466251a162
Showing 1 changed file with 152 additions and 141 deletions
@@ -6685,7 +6685,7 @@ int qemu_savevm_state_begin(QEMUFile *f) @@ -6685,7 +6685,7 @@ int qemu_savevm_state_begin(QEMUFile *f)
6685 int qemu_savevm_state_iterate(QEMUFile *f) 6685 int qemu_savevm_state_iterate(QEMUFile *f)
6686 { 6686 {
6687 SaveStateEntry *se; 6687 SaveStateEntry *se;
6688 - int ret = 0; 6688 + int ret = 1;
6689 6689
6690 for (se = first_se; se != NULL; se = se->next) { 6690 for (se = first_se; se != NULL; se = se->next) {
6691 if (se->save_live_state == NULL) 6691 if (se->save_live_state == NULL)
@@ -6695,7 +6695,7 @@ int qemu_savevm_state_iterate(QEMUFile *f) @@ -6695,7 +6695,7 @@ int qemu_savevm_state_iterate(QEMUFile *f)
6695 qemu_put_byte(f, QEMU_VM_SECTION_PART); 6695 qemu_put_byte(f, QEMU_VM_SECTION_PART);
6696 qemu_put_be32(f, se->section_id); 6696 qemu_put_be32(f, se->section_id);
6697 6697
6698 - ret |= se->save_live_state(f, QEMU_VM_SECTION_PART, se->opaque); 6698 + ret &= !!se->save_live_state(f, QEMU_VM_SECTION_PART, se->opaque);
6699 } 6699 }
6700 6700
6701 if (ret) 6701 if (ret)
@@ -6761,7 +6761,7 @@ int qemu_savevm_state(QEMUFile *f) @@ -6761,7 +6761,7 @@ int qemu_savevm_state(QEMUFile *f)
6761 ret = qemu_savevm_state_iterate(f); 6761 ret = qemu_savevm_state_iterate(f);
6762 if (ret < 0) 6762 if (ret < 0)
6763 goto out; 6763 goto out;
6764 - } while (ret == 1); 6764 + } while (ret == 0);
6765 6765
6766 ret = qemu_savevm_state_complete(f); 6766 ret = qemu_savevm_state_complete(f);
6767 6767
@@ -7254,77 +7254,6 @@ static int ram_load_v1(QEMUFile *f, void *opaque) @@ -7254,77 +7254,6 @@ static int ram_load_v1(QEMUFile *f, void *opaque)
7254 #define IOBUF_SIZE 4096 7254 #define IOBUF_SIZE 4096
7255 #define RAM_CBLOCK_MAGIC 0xfabe 7255 #define RAM_CBLOCK_MAGIC 0xfabe
7256 7256
7257 -typedef struct RamCompressState {  
7258 - z_stream zstream;  
7259 - QEMUFile *f;  
7260 - uint8_t buf[IOBUF_SIZE];  
7261 -} RamCompressState;  
7262 -  
7263 -static int ram_compress_open(RamCompressState *s, QEMUFile *f)  
7264 -{  
7265 - int ret;  
7266 - memset(s, 0, sizeof(*s));  
7267 - s->f = f;  
7268 - ret = deflateInit2(&s->zstream, 1,  
7269 - Z_DEFLATED, 15,  
7270 - 9, Z_DEFAULT_STRATEGY);  
7271 - if (ret != Z_OK)  
7272 - return -1;  
7273 - s->zstream.avail_out = IOBUF_SIZE;  
7274 - s->zstream.next_out = s->buf;  
7275 - return 0;  
7276 -}  
7277 -  
7278 -static void ram_put_cblock(RamCompressState *s, const uint8_t *buf, int len)  
7279 -{  
7280 - qemu_put_be16(s->f, RAM_CBLOCK_MAGIC);  
7281 - qemu_put_be16(s->f, len);  
7282 - qemu_put_buffer(s->f, buf, len);  
7283 -}  
7284 -  
7285 -static int ram_compress_buf(RamCompressState *s, const uint8_t *buf, int len)  
7286 -{  
7287 - int ret;  
7288 -  
7289 - s->zstream.avail_in = len;  
7290 - s->zstream.next_in = (uint8_t *)buf;  
7291 - while (s->zstream.avail_in > 0) {  
7292 - ret = deflate(&s->zstream, Z_NO_FLUSH);  
7293 - if (ret != Z_OK)  
7294 - return -1;  
7295 - if (s->zstream.avail_out == 0) {  
7296 - ram_put_cblock(s, s->buf, IOBUF_SIZE);  
7297 - s->zstream.avail_out = IOBUF_SIZE;  
7298 - s->zstream.next_out = s->buf;  
7299 - }  
7300 - }  
7301 - return 0;  
7302 -}  
7303 -  
7304 -static void ram_compress_close(RamCompressState *s)  
7305 -{  
7306 - int len, ret;  
7307 -  
7308 - /* compress last bytes */  
7309 - for(;;) {  
7310 - ret = deflate(&s->zstream, Z_FINISH);  
7311 - if (ret == Z_OK || ret == Z_STREAM_END) {  
7312 - len = IOBUF_SIZE - s->zstream.avail_out;  
7313 - if (len > 0) {  
7314 - ram_put_cblock(s, s->buf, len);  
7315 - }  
7316 - s->zstream.avail_out = IOBUF_SIZE;  
7317 - s->zstream.next_out = s->buf;  
7318 - if (ret == Z_STREAM_END)  
7319 - break;  
7320 - } else {  
7321 - goto fail;  
7322 - }  
7323 - }  
7324 -fail:  
7325 - deflateEnd(&s->zstream);  
7326 -}  
7327 -  
7328 typedef struct RamDecompressState { 7257 typedef struct RamDecompressState {
7329 z_stream zstream; 7258 z_stream zstream;
7330 QEMUFile *f; 7259 QEMUFile *f;
@@ -7372,61 +7301,121 @@ static void ram_decompress_close(RamDecompressState *s) @@ -7372,61 +7301,121 @@ static void ram_decompress_close(RamDecompressState *s)
7372 inflateEnd(&s->zstream); 7301 inflateEnd(&s->zstream);
7373 } 7302 }
7374 7303
7375 -static void ram_save(QEMUFile *f, void *opaque) 7304 +#define RAM_SAVE_FLAG_FULL 0x01
  7305 +#define RAM_SAVE_FLAG_COMPRESS 0x02
  7306 +#define RAM_SAVE_FLAG_MEM_SIZE 0x04
  7307 +#define RAM_SAVE_FLAG_PAGE 0x08
  7308 +#define RAM_SAVE_FLAG_EOS 0x10
  7309 +
  7310 +static int is_dup_page(uint8_t *page, uint8_t ch)
7376 { 7311 {
7377 - ram_addr_t i;  
7378 - RamCompressState s1, *s = &s1;  
7379 - uint8_t buf[10]; 7312 + uint32_t val = ch << 24 | ch << 16 | ch << 8 | ch;
  7313 + uint32_t *array = (uint32_t *)page;
  7314 + int i;
7380 7315
7381 - qemu_put_be32(f, phys_ram_size);  
7382 - if (ram_compress_open(s, f) < 0)  
7383 - return;  
7384 - for(i = 0; i < phys_ram_size; i+= BDRV_HASH_BLOCK_SIZE) {  
7385 -#if 0  
7386 - if (tight_savevm_enabled) {  
7387 - int64_t sector_num;  
7388 - int j;  
7389 -  
7390 - /* find if the memory block is available on a virtual  
7391 - block device */  
7392 - sector_num = -1;  
7393 - for(j = 0; j < nb_drives; j++) {  
7394 - sector_num = bdrv_hash_find(drives_table[j].bdrv,  
7395 - phys_ram_base + i,  
7396 - BDRV_HASH_BLOCK_SIZE);  
7397 - if (sector_num >= 0)  
7398 - break; 7316 + for (i = 0; i < (TARGET_PAGE_SIZE / 4); i++) {
  7317 + if (array[i] != val)
  7318 + return 0;
  7319 + }
  7320 +
  7321 + return 1;
  7322 +}
  7323 +
  7324 +static int ram_save_block(QEMUFile *f)
  7325 +{
  7326 + static ram_addr_t current_addr = 0;
  7327 + ram_addr_t saved_addr = current_addr;
  7328 + ram_addr_t addr = 0;
  7329 + int found = 0;
  7330 +
  7331 + while (addr < phys_ram_size) {
  7332 + if (cpu_physical_memory_get_dirty(current_addr, MIGRATION_DIRTY_FLAG)) {
  7333 + uint8_t ch;
  7334 +
  7335 + cpu_physical_memory_reset_dirty(current_addr,
  7336 + current_addr + TARGET_PAGE_SIZE,
  7337 + MIGRATION_DIRTY_FLAG);
  7338 +
  7339 + ch = *(phys_ram_base + current_addr);
  7340 +
  7341 + if (is_dup_page(phys_ram_base + current_addr, ch)) {
  7342 + qemu_put_be64(f, current_addr | RAM_SAVE_FLAG_COMPRESS);
  7343 + qemu_put_byte(f, ch);
  7344 + } else {
  7345 + qemu_put_be64(f, current_addr | RAM_SAVE_FLAG_PAGE);
  7346 + qemu_put_buffer(f, phys_ram_base + current_addr, TARGET_PAGE_SIZE);
7399 } 7347 }
7400 - if (j == nb_drives)  
7401 - goto normal_compress;  
7402 - buf[0] = 1;  
7403 - buf[1] = j;  
7404 - cpu_to_be64wu((uint64_t *)(buf + 2), sector_num);  
7405 - ram_compress_buf(s, buf, 10);  
7406 - } else  
7407 -#endif  
7408 - {  
7409 - // normal_compress:  
7410 - buf[0] = 0;  
7411 - ram_compress_buf(s, buf, 1);  
7412 - ram_compress_buf(s, phys_ram_base + i, BDRV_HASH_BLOCK_SIZE); 7348 +
  7349 + found = 1;
  7350 + break;
7413 } 7351 }
  7352 + addr += TARGET_PAGE_SIZE;
  7353 + current_addr = (saved_addr + addr) % phys_ram_size;
7414 } 7354 }
7415 - ram_compress_close(s); 7355 +
  7356 + return found;
7416 } 7357 }
7417 7358
7418 -static int ram_load(QEMUFile *f, void *opaque, int version_id) 7359 +static ram_addr_t ram_save_threshold = 10;
  7360 +
  7361 +static ram_addr_t ram_save_remaining(void)
  7362 +{
  7363 + ram_addr_t addr;
  7364 + ram_addr_t count = 0;
  7365 +
  7366 + for (addr = 0; addr < phys_ram_size; addr += TARGET_PAGE_SIZE) {
  7367 + if (cpu_physical_memory_get_dirty(addr, MIGRATION_DIRTY_FLAG))
  7368 + count++;
  7369 + }
  7370 +
  7371 + return count;
  7372 +}
  7373 +
  7374 +static int ram_save_live(QEMUFile *f, int stage, void *opaque)
  7375 +{
  7376 + ram_addr_t addr;
  7377 +
  7378 + if (stage == 1) {
  7379 + /* Make sure all dirty bits are set */
  7380 + for (addr = 0; addr < phys_ram_size; addr += TARGET_PAGE_SIZE) {
  7381 + if (!cpu_physical_memory_get_dirty(addr, MIGRATION_DIRTY_FLAG))
  7382 + cpu_physical_memory_set_dirty(addr);
  7383 + }
  7384 +
  7385 + /* Enable dirty memory tracking */
  7386 + cpu_physical_memory_set_dirty_tracking(1);
  7387 +
  7388 + qemu_put_be64(f, phys_ram_size | RAM_SAVE_FLAG_MEM_SIZE);
  7389 + }
  7390 +
  7391 + while (!qemu_file_rate_limit(f)) {
  7392 + int ret;
  7393 +
  7394 + ret = ram_save_block(f);
  7395 + if (ret == 0) /* no more blocks */
  7396 + break;
  7397 + }
  7398 +
  7399 + /* try transferring iterative blocks of memory */
  7400 +
  7401 + if (stage == 3) {
  7402 + cpu_physical_memory_set_dirty_tracking(0);
  7403 +
  7404 + /* flush all remaining blocks regardless of rate limiting */
  7405 + while (ram_save_block(f) != 0);
  7406 + }
  7407 +
  7408 + qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
  7409 +
  7410 + return (stage == 2) && (ram_save_remaining() < ram_save_threshold);
  7411 +}
  7412 +
  7413 +static int ram_load_dead(QEMUFile *f, void *opaque)
7419 { 7414 {
7420 RamDecompressState s1, *s = &s1; 7415 RamDecompressState s1, *s = &s1;
7421 uint8_t buf[10]; 7416 uint8_t buf[10];
7422 ram_addr_t i; 7417 ram_addr_t i;
7423 7418
7424 - if (version_id == 1)  
7425 - return ram_load_v1(f, opaque);  
7426 - if (version_id != 2)  
7427 - return -EINVAL;  
7428 - if (qemu_get_be32(f) != phys_ram_size)  
7429 - return -EINVAL;  
7430 if (ram_decompress_open(s, f) < 0) 7419 if (ram_decompress_open(s, f) < 0)
7431 return -EINVAL; 7420 return -EINVAL;
7432 for(i = 0; i < phys_ram_size; i+= BDRV_HASH_BLOCK_SIZE) { 7421 for(i = 0; i < phys_ram_size; i+= BDRV_HASH_BLOCK_SIZE) {
@@ -7439,35 +7428,57 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) @@ -7439,35 +7428,57 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
7439 fprintf(stderr, "Error while reading ram block address=0x%08" PRIx64, (uint64_t)i); 7428 fprintf(stderr, "Error while reading ram block address=0x%08" PRIx64, (uint64_t)i);
7440 goto error; 7429 goto error;
7441 } 7430 }
7442 - } else  
7443 -#if 0  
7444 - if (buf[0] == 1) {  
7445 - int bs_index;  
7446 - int64_t sector_num;  
7447 -  
7448 - ram_decompress_buf(s, buf + 1, 9);  
7449 - bs_index = buf[1];  
7450 - sector_num = be64_to_cpupu((const uint64_t *)(buf + 2));  
7451 - if (bs_index >= nb_drives) {  
7452 - fprintf(stderr, "Invalid block device index %d\n", bs_index);  
7453 - goto error;  
7454 - }  
7455 - if (bdrv_read(drives_table[bs_index].bdrv, sector_num,  
7456 - phys_ram_base + i,  
7457 - BDRV_HASH_BLOCK_SIZE / 512) < 0) {  
7458 - fprintf(stderr, "Error while reading sector %d:%" PRId64 "\n",  
7459 - bs_index, sector_num);  
7460 - goto error;  
7461 - }  
7462 - } else  
7463 -#endif  
7464 - { 7431 + } else {
7465 error: 7432 error:
7466 printf("Error block header\n"); 7433 printf("Error block header\n");
7467 return -EINVAL; 7434 return -EINVAL;
7468 } 7435 }
7469 } 7436 }
7470 ram_decompress_close(s); 7437 ram_decompress_close(s);
  7438 +
  7439 + return 0;
  7440 +}
  7441 +
  7442 +static int ram_load(QEMUFile *f, void *opaque, int version_id)
  7443 +{
  7444 + ram_addr_t addr;
  7445 + int flags;
  7446 +
  7447 + if (version_id == 1)
  7448 + return ram_load_v1(f, opaque);
  7449 +
  7450 + if (version_id == 2) {
  7451 + if (qemu_get_be32(f) != phys_ram_size)
  7452 + return -EINVAL;
  7453 + return ram_load_dead(f, opaque);
  7454 + }
  7455 +
  7456 + if (version_id != 3)
  7457 + return -EINVAL;
  7458 +
  7459 + do {
  7460 + addr = qemu_get_be64(f);
  7461 +
  7462 + flags = addr & ~TARGET_PAGE_MASK;
  7463 + addr &= TARGET_PAGE_MASK;
  7464 +
  7465 + if (flags & RAM_SAVE_FLAG_MEM_SIZE) {
  7466 + if (addr != phys_ram_size)
  7467 + return -EINVAL;
  7468 + }
  7469 +
  7470 + if (flags & RAM_SAVE_FLAG_FULL) {
  7471 + if (ram_load_dead(f, opaque) < 0)
  7472 + return -EINVAL;
  7473 + }
  7474 +
  7475 + if (flags & RAM_SAVE_FLAG_COMPRESS) {
  7476 + uint8_t ch = qemu_get_byte(f);
  7477 + memset(phys_ram_base + addr, ch, TARGET_PAGE_SIZE);
  7478 + } else if (flags & RAM_SAVE_FLAG_PAGE)
  7479 + qemu_get_buffer(f, phys_ram_base + addr, TARGET_PAGE_SIZE);
  7480 + } while (!(flags & RAM_SAVE_FLAG_EOS));
  7481 +
7471 return 0; 7482 return 0;
7472 } 7483 }
7473 7484
@@ -9512,7 +9523,7 @@ int main(int argc, char **argv) @@ -9512,7 +9523,7 @@ int main(int argc, char **argv)
9512 exit(1); 9523 exit(1);
9513 9524
9514 register_savevm("timer", 0, 2, timer_save, timer_load, NULL); 9525 register_savevm("timer", 0, 2, timer_save, timer_load, NULL);
9515 - register_savevm("ram", 0, 2, ram_save, ram_load, NULL); 9526 + register_savevm_live("ram", 0, 3, ram_save_live, NULL, ram_load, NULL);
9516 9527
9517 /* terminal init */ 9528 /* terminal init */
9518 memset(&display_state, 0, sizeof(display_state)); 9529 memset(&display_state, 0, sizeof(display_state));