Commit 9366f4186025e1d8fc3bebd41fb714521c170b6f

Authored by aliguori
1 parent 74576198

Introduce v3 of savevm protocol

The current savevm/loadvm protocol has some draw backs.  It does not support
the ability to do progressive saving which means it cannot be used for live
checkpointing or migration.  The sections sizes are 32-bit integers which
means that it will not function when using more than 4GB of memory for a guest.
It attempts to seek within the output file which means it cannot be streamed.
The current protocol also is pretty lax about how it supports forward
compatibility.  If a saved section version is greater than what the restore
code support, the restore code generally treats the saved data as being in
whatever version it supports.  This means that restoring a saved VM on an older
version of QEMU will likely result in silent guest failure.

This patch introduces a new version of the savevm protocol.  It has the
following features:

 * Support for progressive save of sections (for live checkpoint/migration)
 * An asynchronous API for doing save
 * Support for interleaving multiple progressive save sections
   (for future support of memory hot-add/storage migration)
 * Fully streaming format
 * Strong section version checking

Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>



git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@5434 c046a42c-6fe2-441c-8c8c-71466251a162
Showing 3 changed files with 248 additions and 45 deletions
@@ -217,6 +217,7 @@ int64_t qemu_ftell(QEMUFile *f); @@ -217,6 +217,7 @@ int64_t qemu_ftell(QEMUFile *f);
217 int64_t qemu_fseek(QEMUFile *f, int64_t pos, int whence); 217 int64_t qemu_fseek(QEMUFile *f, int64_t pos, int whence);
218 218
219 typedef void SaveStateHandler(QEMUFile *f, void *opaque); 219 typedef void SaveStateHandler(QEMUFile *f, void *opaque);
  220 +typedef int SaveLiveStateHandler(QEMUFile *f, int stage, void *opaque);
220 typedef int LoadStateHandler(QEMUFile *f, void *opaque, int version_id); 221 typedef int LoadStateHandler(QEMUFile *f, void *opaque, int version_id);
221 222
222 int register_savevm(const char *idstr, 223 int register_savevm(const char *idstr,
@@ -226,6 +227,14 @@ int register_savevm(const char *idstr, @@ -226,6 +227,14 @@ int register_savevm(const char *idstr,
226 LoadStateHandler *load_state, 227 LoadStateHandler *load_state,
227 void *opaque); 228 void *opaque);
228 229
  230 +int register_savevm_live(const char *idstr,
  231 + int instance_id,
  232 + int version_id,
  233 + SaveLiveStateHandler *save_live_state,
  234 + SaveStateHandler *save_state,
  235 + LoadStateHandler *load_state,
  236 + void *opaque);
  237 +
229 typedef void QEMUResetHandler(void *opaque); 238 typedef void QEMUResetHandler(void *opaque);
230 239
231 void qemu_register_reset(QEMUResetHandler *func, void *opaque); 240 void qemu_register_reset(QEMUResetHandler *func, void *opaque);
sysemu.h
@@ -50,6 +50,12 @@ void do_info_snapshots(void); @@ -50,6 +50,12 @@ void do_info_snapshots(void);
50 50
51 void main_loop_wait(int timeout); 51 void main_loop_wait(int timeout);
52 52
  53 +int qemu_savevm_state_begin(QEMUFile *f);
  54 +int qemu_savevm_state_iterate(QEMUFile *f);
  55 +int qemu_savevm_state_complete(QEMUFile *f);
  56 +int qemu_savevm_state(QEMUFile *f);
  57 +int qemu_loadvm_state(QEMUFile *f);
  58 +
53 /* Polling handling */ 59 /* Polling handling */
54 60
55 /* return TRUE if no sleep should be done afterwards */ 61 /* return TRUE if no sleep should be done afterwards */
@@ -6579,6 +6579,8 @@ typedef struct SaveStateEntry { @@ -6579,6 +6579,8 @@ typedef struct SaveStateEntry {
6579 char idstr[256]; 6579 char idstr[256];
6580 int instance_id; 6580 int instance_id;
6581 int version_id; 6581 int version_id;
  6582 + int section_id;
  6583 + SaveLiveStateHandler *save_live_state;
6582 SaveStateHandler *save_state; 6584 SaveStateHandler *save_state;
6583 LoadStateHandler *load_state; 6585 LoadStateHandler *load_state;
6584 void *opaque; 6586 void *opaque;
@@ -6591,14 +6593,16 @@ static SaveStateEntry *first_se; @@ -6591,14 +6593,16 @@ static SaveStateEntry *first_se;
6591 of the system, so instance_id should be removed/replaced. 6593 of the system, so instance_id should be removed/replaced.
6592 Meanwhile pass -1 as instance_id if you do not already have a clearly 6594 Meanwhile pass -1 as instance_id if you do not already have a clearly
6593 distinguishing id for all instances of your device class. */ 6595 distinguishing id for all instances of your device class. */
6594 -int register_savevm(const char *idstr,  
6595 - int instance_id,  
6596 - int version_id,  
6597 - SaveStateHandler *save_state,  
6598 - LoadStateHandler *load_state,  
6599 - void *opaque) 6596 +int register_savevm_live(const char *idstr,
  6597 + int instance_id,
  6598 + int version_id,
  6599 + SaveLiveStateHandler *save_live_state,
  6600 + SaveStateHandler *save_state,
  6601 + LoadStateHandler *load_state,
  6602 + void *opaque)
6600 { 6603 {
6601 SaveStateEntry *se, **pse; 6604 SaveStateEntry *se, **pse;
  6605 + static int global_section_id;
6602 6606
6603 se = qemu_malloc(sizeof(SaveStateEntry)); 6607 se = qemu_malloc(sizeof(SaveStateEntry));
6604 if (!se) 6608 if (!se)
@@ -6606,6 +6610,8 @@ int register_savevm(const char *idstr, @@ -6606,6 +6610,8 @@ int register_savevm(const char *idstr,
6606 pstrcpy(se->idstr, sizeof(se->idstr), idstr); 6610 pstrcpy(se->idstr, sizeof(se->idstr), idstr);
6607 se->instance_id = (instance_id == -1) ? 0 : instance_id; 6611 se->instance_id = (instance_id == -1) ? 0 : instance_id;
6608 se->version_id = version_id; 6612 se->version_id = version_id;
  6613 + se->section_id = global_section_id++;
  6614 + se->save_live_state = save_live_state;
6609 se->save_state = save_state; 6615 se->save_state = save_state;
6610 se->load_state = load_state; 6616 se->load_state = load_state;
6611 se->opaque = opaque; 6617 se->opaque = opaque;
@@ -6624,25 +6630,105 @@ int register_savevm(const char *idstr, @@ -6624,25 +6630,105 @@ int register_savevm(const char *idstr,
6624 return 0; 6630 return 0;
6625 } 6631 }
6626 6632
6627 -#define QEMU_VM_FILE_MAGIC 0x5145564d  
6628 -#define QEMU_VM_FILE_VERSION 0x00000002 6633 +int register_savevm(const char *idstr,
  6634 + int instance_id,
  6635 + int version_id,
  6636 + SaveStateHandler *save_state,
  6637 + LoadStateHandler *load_state,
  6638 + void *opaque)
  6639 +{
  6640 + return register_savevm_live(idstr, instance_id, version_id,
  6641 + NULL, save_state, load_state, opaque);
  6642 +}
  6643 +
  6644 +#define QEMU_VM_FILE_MAGIC 0x5145564d
  6645 +#define QEMU_VM_FILE_VERSION_COMPAT 0x00000002
  6646 +#define QEMU_VM_FILE_VERSION 0x00000003
6629 6647
6630 -static int qemu_savevm_state(QEMUFile *f) 6648 +#define QEMU_VM_EOF 0x00
  6649 +#define QEMU_VM_SECTION_START 0x01
  6650 +#define QEMU_VM_SECTION_PART 0x02
  6651 +#define QEMU_VM_SECTION_END 0x03
  6652 +#define QEMU_VM_SECTION_FULL 0x04
  6653 +
  6654 +int qemu_savevm_state_begin(QEMUFile *f)
6631 { 6655 {
6632 SaveStateEntry *se; 6656 SaveStateEntry *se;
6633 - int len, ret;  
6634 - int64_t cur_pos, len_pos, total_len_pos;  
6635 6657
6636 qemu_put_be32(f, QEMU_VM_FILE_MAGIC); 6658 qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
6637 qemu_put_be32(f, QEMU_VM_FILE_VERSION); 6659 qemu_put_be32(f, QEMU_VM_FILE_VERSION);
6638 - total_len_pos = qemu_ftell(f);  
6639 - qemu_put_be64(f, 0); /* total size */ 6660 +
  6661 + for (se = first_se; se != NULL; se = se->next) {
  6662 + int len;
  6663 +
  6664 + if (se->save_live_state == NULL)
  6665 + continue;
  6666 +
  6667 + /* Section type */
  6668 + qemu_put_byte(f, QEMU_VM_SECTION_START);
  6669 + qemu_put_be32(f, se->section_id);
  6670 +
  6671 + /* ID string */
  6672 + len = strlen(se->idstr);
  6673 + qemu_put_byte(f, len);
  6674 + qemu_put_buffer(f, (uint8_t *)se->idstr, len);
  6675 +
  6676 + qemu_put_be32(f, se->instance_id);
  6677 + qemu_put_be32(f, se->version_id);
  6678 +
  6679 + se->save_live_state(f, QEMU_VM_SECTION_START, se->opaque);
  6680 + }
  6681 +
  6682 + return 0;
  6683 +}
  6684 +
  6685 +int qemu_savevm_state_iterate(QEMUFile *f)
  6686 +{
  6687 + SaveStateEntry *se;
  6688 + int ret = 0;
  6689 +
  6690 + for (se = first_se; se != NULL; se = se->next) {
  6691 + if (se->save_live_state == NULL)
  6692 + continue;
  6693 +
  6694 + /* Section type */
  6695 + qemu_put_byte(f, QEMU_VM_SECTION_PART);
  6696 + qemu_put_be32(f, se->section_id);
  6697 +
  6698 + ret |= se->save_live_state(f, QEMU_VM_SECTION_PART, se->opaque);
  6699 + }
  6700 +
  6701 + if (ret)
  6702 + return 1;
  6703 +
  6704 + return 0;
  6705 +}
  6706 +
  6707 +int qemu_savevm_state_complete(QEMUFile *f)
  6708 +{
  6709 + SaveStateEntry *se;
  6710 +
  6711 + for (se = first_se; se != NULL; se = se->next) {
  6712 + if (se->save_live_state == NULL)
  6713 + continue;
  6714 +
  6715 + /* Section type */
  6716 + qemu_put_byte(f, QEMU_VM_SECTION_END);
  6717 + qemu_put_be32(f, se->section_id);
  6718 +
  6719 + se->save_live_state(f, QEMU_VM_SECTION_END, se->opaque);
  6720 + }
6640 6721
6641 for(se = first_se; se != NULL; se = se->next) { 6722 for(se = first_se; se != NULL; se = se->next) {
  6723 + int len;
  6724 +
6642 if (se->save_state == NULL) 6725 if (se->save_state == NULL)
6643 - /* this one has a loader only, for backwards compatibility */  
6644 continue; 6726 continue;
6645 6727
  6728 + /* Section type */
  6729 + qemu_put_byte(f, QEMU_VM_SECTION_FULL);
  6730 + qemu_put_be32(f, se->section_id);
  6731 +
6646 /* ID string */ 6732 /* ID string */
6647 len = strlen(se->idstr); 6733 len = strlen(se->idstr);
6648 qemu_put_byte(f, len); 6734 qemu_put_byte(f, len);
@@ -6651,24 +6737,37 @@ static int qemu_savevm_state(QEMUFile *f) @@ -6651,24 +6737,37 @@ static int qemu_savevm_state(QEMUFile *f)
6651 qemu_put_be32(f, se->instance_id); 6737 qemu_put_be32(f, se->instance_id);
6652 qemu_put_be32(f, se->version_id); 6738 qemu_put_be32(f, se->version_id);
6653 6739
6654 - /* record size: filled later */  
6655 - len_pos = qemu_ftell(f);  
6656 - qemu_put_be32(f, 0);  
6657 se->save_state(f, se->opaque); 6740 se->save_state(f, se->opaque);
6658 -  
6659 - /* fill record size */  
6660 - cur_pos = qemu_ftell(f);  
6661 - len = cur_pos - len_pos - 4;  
6662 - qemu_fseek(f, len_pos, SEEK_SET);  
6663 - qemu_put_be32(f, len);  
6664 - qemu_fseek(f, cur_pos, SEEK_SET);  
6665 } 6741 }
6666 - cur_pos = qemu_ftell(f);  
6667 - qemu_fseek(f, total_len_pos, SEEK_SET);  
6668 - qemu_put_be64(f, cur_pos - total_len_pos - 8);  
6669 - qemu_fseek(f, cur_pos, SEEK_SET);  
6670 6742
6671 - ret = 0; 6743 + qemu_put_byte(f, QEMU_VM_EOF);
  6744 +
  6745 + return 0;
  6746 +}
  6747 +
  6748 +int qemu_savevm_state(QEMUFile *f)
  6749 +{
  6750 + int saved_vm_running;
  6751 + int ret;
  6752 +
  6753 + saved_vm_running = vm_running;
  6754 + vm_stop(0);
  6755 +
  6756 + ret = qemu_savevm_state_begin(f);
  6757 + if (ret < 0)
  6758 + goto out;
  6759 +
  6760 + do {
  6761 + ret = qemu_savevm_state_iterate(f);
  6762 + if (ret < 0)
  6763 + goto out;
  6764 + } while (ret == 1);
  6765 +
  6766 + ret = qemu_savevm_state_complete(f);
  6767 +
  6768 +out:
  6769 + if (saved_vm_running)
  6770 + vm_start();
6672 return ret; 6771 return ret;
6673 } 6772 }
6674 6773
@@ -6684,23 +6783,20 @@ static SaveStateEntry *find_se(const char *idstr, int instance_id) @@ -6684,23 +6783,20 @@ static SaveStateEntry *find_se(const char *idstr, int instance_id)
6684 return NULL; 6783 return NULL;
6685 } 6784 }
6686 6785
6687 -static int qemu_loadvm_state(QEMUFile *f) 6786 +typedef struct LoadStateEntry {
  6787 + SaveStateEntry *se;
  6788 + int section_id;
  6789 + int version_id;
  6790 + struct LoadStateEntry *next;
  6791 +} LoadStateEntry;
  6792 +
  6793 +static int qemu_loadvm_state_v2(QEMUFile *f)
6688 { 6794 {
6689 SaveStateEntry *se; 6795 SaveStateEntry *se;
6690 int len, ret, instance_id, record_len, version_id; 6796 int len, ret, instance_id, record_len, version_id;
6691 int64_t total_len, end_pos, cur_pos; 6797 int64_t total_len, end_pos, cur_pos;
6692 - unsigned int v;  
6693 char idstr[256]; 6798 char idstr[256];
6694 6799
6695 - v = qemu_get_be32(f);  
6696 - if (v != QEMU_VM_FILE_MAGIC)  
6697 - goto fail;  
6698 - v = qemu_get_be32(f);  
6699 - if (v != QEMU_VM_FILE_VERSION) {  
6700 - fail:  
6701 - ret = -1;  
6702 - goto the_end;  
6703 - }  
6704 total_len = qemu_get_be64(f); 6800 total_len = qemu_get_be64(f);
6705 end_pos = total_len + qemu_ftell(f); 6801 end_pos = total_len + qemu_ftell(f);
6706 for(;;) { 6802 for(;;) {
@@ -6712,10 +6808,6 @@ static int qemu_loadvm_state(QEMUFile *f) @@ -6712,10 +6808,6 @@ static int qemu_loadvm_state(QEMUFile *f)
6712 instance_id = qemu_get_be32(f); 6808 instance_id = qemu_get_be32(f);
6713 version_id = qemu_get_be32(f); 6809 version_id = qemu_get_be32(f);
6714 record_len = qemu_get_be32(f); 6810 record_len = qemu_get_be32(f);
6715 -#if 0  
6716 - printf("idstr=%s instance=0x%x version=%d len=%d\n",  
6717 - idstr, instance_id, version_id, record_len);  
6718 -#endif  
6719 cur_pos = qemu_ftell(f); 6811 cur_pos = qemu_ftell(f);
6720 se = find_se(idstr, instance_id); 6812 se = find_se(idstr, instance_id);
6721 if (!se) { 6813 if (!se) {
@@ -6731,8 +6823,104 @@ static int qemu_loadvm_state(QEMUFile *f) @@ -6731,8 +6823,104 @@ static int qemu_loadvm_state(QEMUFile *f)
6731 /* always seek to exact end of record */ 6823 /* always seek to exact end of record */
6732 qemu_fseek(f, cur_pos + record_len, SEEK_SET); 6824 qemu_fseek(f, cur_pos + record_len, SEEK_SET);
6733 } 6825 }
  6826 + return 0;
  6827 +}
  6828 +
  6829 +int qemu_loadvm_state(QEMUFile *f)
  6830 +{
  6831 + LoadStateEntry *first_le = NULL;
  6832 + uint8_t section_type;
  6833 + unsigned int v;
  6834 + int ret;
  6835 +
  6836 + v = qemu_get_be32(f);
  6837 + if (v != QEMU_VM_FILE_MAGIC)
  6838 + return -EINVAL;
  6839 +
  6840 + v = qemu_get_be32(f);
  6841 + if (v == QEMU_VM_FILE_VERSION_COMPAT)
  6842 + return qemu_loadvm_state_v2(f);
  6843 + if (v != QEMU_VM_FILE_VERSION)
  6844 + return -ENOTSUP;
  6845 +
  6846 + while ((section_type = qemu_get_byte(f)) != QEMU_VM_EOF) {
  6847 + uint32_t instance_id, version_id, section_id;
  6848 + LoadStateEntry *le;
  6849 + SaveStateEntry *se;
  6850 + char idstr[257];
  6851 + int len;
  6852 +
  6853 + switch (section_type) {
  6854 + case QEMU_VM_SECTION_START:
  6855 + case QEMU_VM_SECTION_FULL:
  6856 + /* Read section start */
  6857 + section_id = qemu_get_be32(f);
  6858 + len = qemu_get_byte(f);
  6859 + qemu_get_buffer(f, (uint8_t *)idstr, len);
  6860 + idstr[len] = 0;
  6861 + instance_id = qemu_get_be32(f);
  6862 + version_id = qemu_get_be32(f);
  6863 +
  6864 + /* Find savevm section */
  6865 + se = find_se(idstr, instance_id);
  6866 + if (se == NULL) {
  6867 + fprintf(stderr, "Unknown savevm section or instance '%s' %d\n", idstr, instance_id);
  6868 + ret = -EINVAL;
  6869 + goto out;
  6870 + }
  6871 +
  6872 + /* Validate version */
  6873 + if (version_id > se->version_id) {
  6874 + fprintf(stderr, "savevm: unsupported version %d for '%s' v%d\n",
  6875 + version_id, idstr, se->version_id);
  6876 + ret = -EINVAL;
  6877 + goto out;
  6878 + }
  6879 +
  6880 + /* Add entry */
  6881 + le = qemu_mallocz(sizeof(*le));
  6882 + if (le == NULL) {
  6883 + ret = -ENOMEM;
  6884 + goto out;
  6885 + }
  6886 +
  6887 + le->se = se;
  6888 + le->section_id = section_id;
  6889 + le->version_id = version_id;
  6890 + le->next = first_le;
  6891 + first_le = le;
  6892 +
  6893 + le->se->load_state(f, le->se->opaque, le->version_id);
  6894 + break;
  6895 + case QEMU_VM_SECTION_PART:
  6896 + case QEMU_VM_SECTION_END:
  6897 + section_id = qemu_get_be32(f);
  6898 +
  6899 + for (le = first_le; le && le->section_id != section_id; le = le->next);
  6900 + if (le == NULL) {
  6901 + fprintf(stderr, "Unknown savevm section %d\n", section_id);
  6902 + ret = -EINVAL;
  6903 + goto out;
  6904 + }
  6905 +
  6906 + le->se->load_state(f, le->se->opaque, le->version_id);
  6907 + break;
  6908 + default:
  6909 + fprintf(stderr, "Unknown savevm section type %d\n", section_type);
  6910 + ret = -EINVAL;
  6911 + goto out;
  6912 + }
  6913 + }
  6914 +
6734 ret = 0; 6915 ret = 0;
6735 - the_end: 6916 +
  6917 +out:
  6918 + while (first_le) {
  6919 + LoadStateEntry *le = first_le;
  6920 + first_le = first_le->next;
  6921 + qemu_free(le);
  6922 + }
  6923 +
6736 return ret; 6924 return ret;
6737 } 6925 }
6738 6926