Commit 9366f4186025e1d8fc3bebd41fb714521c170b6f

Authored by aliguori
1 parent 74576198

Introduce v3 of savevm protocol

The current savevm/loadvm protocol has some draw backs.  It does not support
the ability to do progressive saving which means it cannot be used for live
checkpointing or migration.  The sections sizes are 32-bit integers which
means that it will not function when using more than 4GB of memory for a guest.
It attempts to seek within the output file which means it cannot be streamed.
The current protocol also is pretty lax about how it supports forward
compatibility.  If a saved section version is greater than what the restore
code support, the restore code generally treats the saved data as being in
whatever version it supports.  This means that restoring a saved VM on an older
version of QEMU will likely result in silent guest failure.

This patch introduces a new version of the savevm protocol.  It has the
following features:

 * Support for progressive save of sections (for live checkpoint/migration)
 * An asynchronous API for doing save
 * Support for interleaving multiple progressive save sections
   (for future support of memory hot-add/storage migration)
 * Fully streaming format
 * Strong section version checking

Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>



git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@5434 c046a42c-6fe2-441c-8c8c-71466251a162
Showing 3 changed files with 248 additions and 45 deletions
... ... @@ -217,6 +217,7 @@ int64_t qemu_ftell(QEMUFile *f);
217 217 int64_t qemu_fseek(QEMUFile *f, int64_t pos, int whence);
218 218  
219 219 typedef void SaveStateHandler(QEMUFile *f, void *opaque);
  220 +typedef int SaveLiveStateHandler(QEMUFile *f, int stage, void *opaque);
220 221 typedef int LoadStateHandler(QEMUFile *f, void *opaque, int version_id);
221 222  
222 223 int register_savevm(const char *idstr,
... ... @@ -226,6 +227,14 @@ int register_savevm(const char *idstr,
226 227 LoadStateHandler *load_state,
227 228 void *opaque);
228 229  
  230 +int register_savevm_live(const char *idstr,
  231 + int instance_id,
  232 + int version_id,
  233 + SaveLiveStateHandler *save_live_state,
  234 + SaveStateHandler *save_state,
  235 + LoadStateHandler *load_state,
  236 + void *opaque);
  237 +
229 238 typedef void QEMUResetHandler(void *opaque);
230 239  
231 240 void qemu_register_reset(QEMUResetHandler *func, void *opaque);
... ...
sysemu.h
... ... @@ -50,6 +50,12 @@ void do_info_snapshots(void);
50 50  
51 51 void main_loop_wait(int timeout);
52 52  
  53 +int qemu_savevm_state_begin(QEMUFile *f);
  54 +int qemu_savevm_state_iterate(QEMUFile *f);
  55 +int qemu_savevm_state_complete(QEMUFile *f);
  56 +int qemu_savevm_state(QEMUFile *f);
  57 +int qemu_loadvm_state(QEMUFile *f);
  58 +
53 59 /* Polling handling */
54 60  
55 61 /* return TRUE if no sleep should be done afterwards */
... ...
... ... @@ -6579,6 +6579,8 @@ typedef struct SaveStateEntry {
6579 6579 char idstr[256];
6580 6580 int instance_id;
6581 6581 int version_id;
  6582 + int section_id;
  6583 + SaveLiveStateHandler *save_live_state;
6582 6584 SaveStateHandler *save_state;
6583 6585 LoadStateHandler *load_state;
6584 6586 void *opaque;
... ... @@ -6591,14 +6593,16 @@ static SaveStateEntry *first_se;
6591 6593 of the system, so instance_id should be removed/replaced.
6592 6594 Meanwhile pass -1 as instance_id if you do not already have a clearly
6593 6595 distinguishing id for all instances of your device class. */
6594   -int register_savevm(const char *idstr,
6595   - int instance_id,
6596   - int version_id,
6597   - SaveStateHandler *save_state,
6598   - LoadStateHandler *load_state,
6599   - void *opaque)
  6596 +int register_savevm_live(const char *idstr,
  6597 + int instance_id,
  6598 + int version_id,
  6599 + SaveLiveStateHandler *save_live_state,
  6600 + SaveStateHandler *save_state,
  6601 + LoadStateHandler *load_state,
  6602 + void *opaque)
6600 6603 {
6601 6604 SaveStateEntry *se, **pse;
  6605 + static int global_section_id;
6602 6606  
6603 6607 se = qemu_malloc(sizeof(SaveStateEntry));
6604 6608 if (!se)
... ... @@ -6606,6 +6610,8 @@ int register_savevm(const char *idstr,
6606 6610 pstrcpy(se->idstr, sizeof(se->idstr), idstr);
6607 6611 se->instance_id = (instance_id == -1) ? 0 : instance_id;
6608 6612 se->version_id = version_id;
  6613 + se->section_id = global_section_id++;
  6614 + se->save_live_state = save_live_state;
6609 6615 se->save_state = save_state;
6610 6616 se->load_state = load_state;
6611 6617 se->opaque = opaque;
... ... @@ -6624,25 +6630,105 @@ int register_savevm(const char *idstr,
6624 6630 return 0;
6625 6631 }
6626 6632  
6627   -#define QEMU_VM_FILE_MAGIC 0x5145564d
6628   -#define QEMU_VM_FILE_VERSION 0x00000002
  6633 +int register_savevm(const char *idstr,
  6634 + int instance_id,
  6635 + int version_id,
  6636 + SaveStateHandler *save_state,
  6637 + LoadStateHandler *load_state,
  6638 + void *opaque)
  6639 +{
  6640 + return register_savevm_live(idstr, instance_id, version_id,
  6641 + NULL, save_state, load_state, opaque);
  6642 +}
  6643 +
  6644 +#define QEMU_VM_FILE_MAGIC 0x5145564d
  6645 +#define QEMU_VM_FILE_VERSION_COMPAT 0x00000002
  6646 +#define QEMU_VM_FILE_VERSION 0x00000003
6629 6647  
6630   -static int qemu_savevm_state(QEMUFile *f)
  6648 +#define QEMU_VM_EOF 0x00
  6649 +#define QEMU_VM_SECTION_START 0x01
  6650 +#define QEMU_VM_SECTION_PART 0x02
  6651 +#define QEMU_VM_SECTION_END 0x03
  6652 +#define QEMU_VM_SECTION_FULL 0x04
  6653 +
  6654 +int qemu_savevm_state_begin(QEMUFile *f)
6631 6655 {
6632 6656 SaveStateEntry *se;
6633   - int len, ret;
6634   - int64_t cur_pos, len_pos, total_len_pos;
6635 6657  
6636 6658 qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
6637 6659 qemu_put_be32(f, QEMU_VM_FILE_VERSION);
6638   - total_len_pos = qemu_ftell(f);
6639   - qemu_put_be64(f, 0); /* total size */
  6660 +
  6661 + for (se = first_se; se != NULL; se = se->next) {
  6662 + int len;
  6663 +
  6664 + if (se->save_live_state == NULL)
  6665 + continue;
  6666 +
  6667 + /* Section type */
  6668 + qemu_put_byte(f, QEMU_VM_SECTION_START);
  6669 + qemu_put_be32(f, se->section_id);
  6670 +
  6671 + /* ID string */
  6672 + len = strlen(se->idstr);
  6673 + qemu_put_byte(f, len);
  6674 + qemu_put_buffer(f, (uint8_t *)se->idstr, len);
  6675 +
  6676 + qemu_put_be32(f, se->instance_id);
  6677 + qemu_put_be32(f, se->version_id);
  6678 +
  6679 + se->save_live_state(f, QEMU_VM_SECTION_START, se->opaque);
  6680 + }
  6681 +
  6682 + return 0;
  6683 +}
  6684 +
  6685 +int qemu_savevm_state_iterate(QEMUFile *f)
  6686 +{
  6687 + SaveStateEntry *se;
  6688 + int ret = 0;
  6689 +
  6690 + for (se = first_se; se != NULL; se = se->next) {
  6691 + if (se->save_live_state == NULL)
  6692 + continue;
  6693 +
  6694 + /* Section type */
  6695 + qemu_put_byte(f, QEMU_VM_SECTION_PART);
  6696 + qemu_put_be32(f, se->section_id);
  6697 +
  6698 + ret |= se->save_live_state(f, QEMU_VM_SECTION_PART, se->opaque);
  6699 + }
  6700 +
  6701 + if (ret)
  6702 + return 1;
  6703 +
  6704 + return 0;
  6705 +}
  6706 +
  6707 +int qemu_savevm_state_complete(QEMUFile *f)
  6708 +{
  6709 + SaveStateEntry *se;
  6710 +
  6711 + for (se = first_se; se != NULL; se = se->next) {
  6712 + if (se->save_live_state == NULL)
  6713 + continue;
  6714 +
  6715 + /* Section type */
  6716 + qemu_put_byte(f, QEMU_VM_SECTION_END);
  6717 + qemu_put_be32(f, se->section_id);
  6718 +
  6719 + se->save_live_state(f, QEMU_VM_SECTION_END, se->opaque);
  6720 + }
6640 6721  
6641 6722 for(se = first_se; se != NULL; se = se->next) {
  6723 + int len;
  6724 +
6642 6725 if (se->save_state == NULL)
6643   - /* this one has a loader only, for backwards compatibility */
6644 6726 continue;
6645 6727  
  6728 + /* Section type */
  6729 + qemu_put_byte(f, QEMU_VM_SECTION_FULL);
  6730 + qemu_put_be32(f, se->section_id);
  6731 +
6646 6732 /* ID string */
6647 6733 len = strlen(se->idstr);
6648 6734 qemu_put_byte(f, len);
... ... @@ -6651,24 +6737,37 @@ static int qemu_savevm_state(QEMUFile *f)
6651 6737 qemu_put_be32(f, se->instance_id);
6652 6738 qemu_put_be32(f, se->version_id);
6653 6739  
6654   - /* record size: filled later */
6655   - len_pos = qemu_ftell(f);
6656   - qemu_put_be32(f, 0);
6657 6740 se->save_state(f, se->opaque);
6658   -
6659   - /* fill record size */
6660   - cur_pos = qemu_ftell(f);
6661   - len = cur_pos - len_pos - 4;
6662   - qemu_fseek(f, len_pos, SEEK_SET);
6663   - qemu_put_be32(f, len);
6664   - qemu_fseek(f, cur_pos, SEEK_SET);
6665 6741 }
6666   - cur_pos = qemu_ftell(f);
6667   - qemu_fseek(f, total_len_pos, SEEK_SET);
6668   - qemu_put_be64(f, cur_pos - total_len_pos - 8);
6669   - qemu_fseek(f, cur_pos, SEEK_SET);
6670 6742  
6671   - ret = 0;
  6743 + qemu_put_byte(f, QEMU_VM_EOF);
  6744 +
  6745 + return 0;
  6746 +}
  6747 +
  6748 +int qemu_savevm_state(QEMUFile *f)
  6749 +{
  6750 + int saved_vm_running;
  6751 + int ret;
  6752 +
  6753 + saved_vm_running = vm_running;
  6754 + vm_stop(0);
  6755 +
  6756 + ret = qemu_savevm_state_begin(f);
  6757 + if (ret < 0)
  6758 + goto out;
  6759 +
  6760 + do {
  6761 + ret = qemu_savevm_state_iterate(f);
  6762 + if (ret < 0)
  6763 + goto out;
  6764 + } while (ret == 1);
  6765 +
  6766 + ret = qemu_savevm_state_complete(f);
  6767 +
  6768 +out:
  6769 + if (saved_vm_running)
  6770 + vm_start();
6672 6771 return ret;
6673 6772 }
6674 6773  
... ... @@ -6684,23 +6783,20 @@ static SaveStateEntry *find_se(const char *idstr, int instance_id)
6684 6783 return NULL;
6685 6784 }
6686 6785  
6687   -static int qemu_loadvm_state(QEMUFile *f)
  6786 +typedef struct LoadStateEntry {
  6787 + SaveStateEntry *se;
  6788 + int section_id;
  6789 + int version_id;
  6790 + struct LoadStateEntry *next;
  6791 +} LoadStateEntry;
  6792 +
  6793 +static int qemu_loadvm_state_v2(QEMUFile *f)
6688 6794 {
6689 6795 SaveStateEntry *se;
6690 6796 int len, ret, instance_id, record_len, version_id;
6691 6797 int64_t total_len, end_pos, cur_pos;
6692   - unsigned int v;
6693 6798 char idstr[256];
6694 6799  
6695   - v = qemu_get_be32(f);
6696   - if (v != QEMU_VM_FILE_MAGIC)
6697   - goto fail;
6698   - v = qemu_get_be32(f);
6699   - if (v != QEMU_VM_FILE_VERSION) {
6700   - fail:
6701   - ret = -1;
6702   - goto the_end;
6703   - }
6704 6800 total_len = qemu_get_be64(f);
6705 6801 end_pos = total_len + qemu_ftell(f);
6706 6802 for(;;) {
... ... @@ -6712,10 +6808,6 @@ static int qemu_loadvm_state(QEMUFile *f)
6712 6808 instance_id = qemu_get_be32(f);
6713 6809 version_id = qemu_get_be32(f);
6714 6810 record_len = qemu_get_be32(f);
6715   -#if 0
6716   - printf("idstr=%s instance=0x%x version=%d len=%d\n",
6717   - idstr, instance_id, version_id, record_len);
6718   -#endif
6719 6811 cur_pos = qemu_ftell(f);
6720 6812 se = find_se(idstr, instance_id);
6721 6813 if (!se) {
... ... @@ -6731,8 +6823,104 @@ static int qemu_loadvm_state(QEMUFile *f)
6731 6823 /* always seek to exact end of record */
6732 6824 qemu_fseek(f, cur_pos + record_len, SEEK_SET);
6733 6825 }
  6826 + return 0;
  6827 +}
  6828 +
  6829 +int qemu_loadvm_state(QEMUFile *f)
  6830 +{
  6831 + LoadStateEntry *first_le = NULL;
  6832 + uint8_t section_type;
  6833 + unsigned int v;
  6834 + int ret;
  6835 +
  6836 + v = qemu_get_be32(f);
  6837 + if (v != QEMU_VM_FILE_MAGIC)
  6838 + return -EINVAL;
  6839 +
  6840 + v = qemu_get_be32(f);
  6841 + if (v == QEMU_VM_FILE_VERSION_COMPAT)
  6842 + return qemu_loadvm_state_v2(f);
  6843 + if (v != QEMU_VM_FILE_VERSION)
  6844 + return -ENOTSUP;
  6845 +
  6846 + while ((section_type = qemu_get_byte(f)) != QEMU_VM_EOF) {
  6847 + uint32_t instance_id, version_id, section_id;
  6848 + LoadStateEntry *le;
  6849 + SaveStateEntry *se;
  6850 + char idstr[257];
  6851 + int len;
  6852 +
  6853 + switch (section_type) {
  6854 + case QEMU_VM_SECTION_START:
  6855 + case QEMU_VM_SECTION_FULL:
  6856 + /* Read section start */
  6857 + section_id = qemu_get_be32(f);
  6858 + len = qemu_get_byte(f);
  6859 + qemu_get_buffer(f, (uint8_t *)idstr, len);
  6860 + idstr[len] = 0;
  6861 + instance_id = qemu_get_be32(f);
  6862 + version_id = qemu_get_be32(f);
  6863 +
  6864 + /* Find savevm section */
  6865 + se = find_se(idstr, instance_id);
  6866 + if (se == NULL) {
  6867 + fprintf(stderr, "Unknown savevm section or instance '%s' %d\n", idstr, instance_id);
  6868 + ret = -EINVAL;
  6869 + goto out;
  6870 + }
  6871 +
  6872 + /* Validate version */
  6873 + if (version_id > se->version_id) {
  6874 + fprintf(stderr, "savevm: unsupported version %d for '%s' v%d\n",
  6875 + version_id, idstr, se->version_id);
  6876 + ret = -EINVAL;
  6877 + goto out;
  6878 + }
  6879 +
  6880 + /* Add entry */
  6881 + le = qemu_mallocz(sizeof(*le));
  6882 + if (le == NULL) {
  6883 + ret = -ENOMEM;
  6884 + goto out;
  6885 + }
  6886 +
  6887 + le->se = se;
  6888 + le->section_id = section_id;
  6889 + le->version_id = version_id;
  6890 + le->next = first_le;
  6891 + first_le = le;
  6892 +
  6893 + le->se->load_state(f, le->se->opaque, le->version_id);
  6894 + break;
  6895 + case QEMU_VM_SECTION_PART:
  6896 + case QEMU_VM_SECTION_END:
  6897 + section_id = qemu_get_be32(f);
  6898 +
  6899 + for (le = first_le; le && le->section_id != section_id; le = le->next);
  6900 + if (le == NULL) {
  6901 + fprintf(stderr, "Unknown savevm section %d\n", section_id);
  6902 + ret = -EINVAL;
  6903 + goto out;
  6904 + }
  6905 +
  6906 + le->se->load_state(f, le->se->opaque, le->version_id);
  6907 + break;
  6908 + default:
  6909 + fprintf(stderr, "Unknown savevm section type %d\n", section_type);
  6910 + ret = -EINVAL;
  6911 + goto out;
  6912 + }
  6913 + }
  6914 +
6734 6915 ret = 0;
6735   - the_end:
  6916 +
  6917 +out:
  6918 + while (first_le) {
  6919 + LoadStateEntry *le = first_le;
  6920 + first_le = first_le->next;
  6921 + qemu_free(le);
  6922 + }
  6923 +
6736 6924 return ret;
6737 6925 }
6738 6926  
... ...