Commit 585f8587ad3619cf070fdc19d2d0b8e7a2398d91

Authored by bellard
1 parent 9e46cfa4

new qcow2 disk image format


git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@2083 c046a42c-6fe2-441c-8c8c-71466251a162
Showing 1 changed file with 2236 additions and 0 deletions
block-qcow2.c 0 → 100644
  1 +/*
  2 + * Block driver for the QCOW version 2 format
  3 + *
  4 + * Copyright (c) 2004-2006 Fabrice Bellard
  5 + *
  6 + * Permission is hereby granted, free of charge, to any person obtaining a copy
  7 + * of this software and associated documentation files (the "Software"), to deal
  8 + * in the Software without restriction, including without limitation the rights
  9 + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10 + * copies of the Software, and to permit persons to whom the Software is
  11 + * furnished to do so, subject to the following conditions:
  12 + *
  13 + * The above copyright notice and this permission notice shall be included in
  14 + * all copies or substantial portions of the Software.
  15 + *
  16 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19 + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22 + * THE SOFTWARE.
  23 + */
  24 +#include "vl.h"
  25 +#include "block_int.h"
  26 +#include <zlib.h>
  27 +#include "aes.h"
  28 +#include <assert.h>
  29 +
  30 +/*
  31 + Differences with QCOW:
  32 +
  33 + - Support for multiple incremental snapshots.
  34 + - Memory management by reference counts.
  35 + - Clusters which have a reference count of one have the bit
  36 + QCOW_OFLAG_COPIED to optimize write performance.
  37 + - Size of compressed clusters is stored in sectors to reduce bit usage
  38 + in the cluster offsets.
  39 + - Support for storing additional data (such as the VM state) in the
  40 + snapshots.
  41 + - If a backing store is used, the cluster size is not constrained
  42 + (could be backported to QCOW).
  43 + - L2 tables have always a size of one cluster.
  44 +*/
  45 +
  46 +//#define DEBUG_ALLOC
  47 +//#define DEBUG_ALLOC2
  48 +
  49 +#define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb)
  50 +#define QCOW_VERSION 2
  51 +
  52 +#define QCOW_CRYPT_NONE 0
  53 +#define QCOW_CRYPT_AES 1
  54 +
  55 +/* indicate that the refcount of the referenced cluster is exactly one. */
  56 +#define QCOW_OFLAG_COPIED (1LL << 63)
  57 +/* indicate that the cluster is compressed (they never have the copied flag) */
  58 +#define QCOW_OFLAG_COMPRESSED (1LL << 62)
  59 +
  60 +#define REFCOUNT_SHIFT 1 /* refcount size is 2 bytes */
  61 +
  62 +#ifndef offsetof
  63 +#define offsetof(type, field) ((size_t) &((type *)0)->field)
  64 +#endif
  65 +
  66 +typedef struct QCowHeader {
  67 + uint32_t magic;
  68 + uint32_t version;
  69 + uint64_t backing_file_offset;
  70 + uint32_t backing_file_size;
  71 + uint32_t cluster_bits;
  72 + uint64_t size; /* in bytes */
  73 + uint32_t crypt_method;
  74 + uint32_t l1_size; /* XXX: save number of clusters instead ? */
  75 + uint64_t l1_table_offset;
  76 + uint64_t refcount_table_offset;
  77 + uint32_t refcount_table_clusters;
  78 + uint32_t nb_snapshots;
  79 + uint64_t snapshots_offset;
  80 +} QCowHeader;
  81 +
  82 +typedef struct __attribute__((packed)) QCowSnapshotHeader {
  83 + /* header is 8 byte aligned */
  84 + uint64_t l1_table_offset;
  85 +
  86 + uint32_t l1_size;
  87 + uint16_t id_str_size;
  88 + uint16_t name_size;
  89 +
  90 + uint32_t date_sec;
  91 + uint32_t date_nsec;
  92 +
  93 + uint64_t vm_clock_nsec;
  94 +
  95 + uint32_t vm_state_size;
  96 + uint32_t extra_data_size; /* for extension */
  97 + /* extra data follows */
  98 + /* id_str follows */
  99 + /* name follows */
  100 +} QCowSnapshotHeader;
  101 +
  102 +#define L2_CACHE_SIZE 16
  103 +
  104 +typedef struct QCowSnapshot {
  105 + uint64_t l1_table_offset;
  106 + uint32_t l1_size;
  107 + char *id_str;
  108 + char *name;
  109 + uint32_t vm_state_size;
  110 + uint32_t date_sec;
  111 + uint32_t date_nsec;
  112 + uint64_t vm_clock_nsec;
  113 +} QCowSnapshot;
  114 +
  115 +typedef struct BDRVQcowState {
  116 + BlockDriverState *hd;
  117 + int cluster_bits;
  118 + int cluster_size;
  119 + int cluster_sectors;
  120 + int l2_bits;
  121 + int l2_size;
  122 + int l1_size;
  123 + int l1_vm_state_index;
  124 + int csize_shift;
  125 + int csize_mask;
  126 + uint64_t cluster_offset_mask;
  127 + uint64_t l1_table_offset;
  128 + uint64_t *l1_table;
  129 + uint64_t *l2_cache;
  130 + uint64_t l2_cache_offsets[L2_CACHE_SIZE];
  131 + uint32_t l2_cache_counts[L2_CACHE_SIZE];
  132 + uint8_t *cluster_cache;
  133 + uint8_t *cluster_data;
  134 + uint64_t cluster_cache_offset;
  135 +
  136 + uint64_t *refcount_table;
  137 + uint64_t refcount_table_offset;
  138 + uint32_t refcount_table_size;
  139 + uint64_t refcount_block_cache_offset;
  140 + uint16_t *refcount_block_cache;
  141 + int64_t free_cluster_index;
  142 + int64_t free_byte_offset;
  143 +
  144 + uint32_t crypt_method; /* current crypt method, 0 if no key yet */
  145 + uint32_t crypt_method_header;
  146 + AES_KEY aes_encrypt_key;
  147 + AES_KEY aes_decrypt_key;
  148 + uint64_t snapshots_offset;
  149 + int snapshots_size;
  150 + int nb_snapshots;
  151 + QCowSnapshot *snapshots;
  152 +} BDRVQcowState;
  153 +
  154 +static int decompress_cluster(BDRVQcowState *s, uint64_t cluster_offset);
  155 +static int qcow_read(BlockDriverState *bs, int64_t sector_num,
  156 + uint8_t *buf, int nb_sectors);
  157 +static int qcow_read_snapshots(BlockDriverState *bs);
  158 +static void qcow_free_snapshots(BlockDriverState *bs);
  159 +static int refcount_init(BlockDriverState *bs);
  160 +static void refcount_close(BlockDriverState *bs);
  161 +static int get_refcount(BlockDriverState *bs, int64_t cluster_index);
  162 +static int update_cluster_refcount(BlockDriverState *bs,
  163 + int64_t cluster_index,
  164 + int addend);
  165 +static void update_refcount(BlockDriverState *bs,
  166 + int64_t offset, int64_t length,
  167 + int addend);
  168 +static int64_t alloc_clusters(BlockDriverState *bs, int64_t size);
  169 +static int64_t alloc_bytes(BlockDriverState *bs, int size);
  170 +static void free_clusters(BlockDriverState *bs,
  171 + int64_t offset, int64_t size);
  172 +#ifdef DEBUG_ALLOC
  173 +static void check_refcounts(BlockDriverState *bs);
  174 +#endif
  175 +
  176 +static int qcow_probe(const uint8_t *buf, int buf_size, const char *filename)
  177 +{
  178 + const QCowHeader *cow_header = (const void *)buf;
  179 +
  180 + if (buf_size >= sizeof(QCowHeader) &&
  181 + be32_to_cpu(cow_header->magic) == QCOW_MAGIC &&
  182 + be32_to_cpu(cow_header->version) == QCOW_VERSION)
  183 + return 100;
  184 + else
  185 + return 0;
  186 +}
  187 +
  188 +static int qcow_open(BlockDriverState *bs, const char *filename, int flags)
  189 +{
  190 + BDRVQcowState *s = bs->opaque;
  191 + int len, i, shift, ret;
  192 + QCowHeader header;
  193 +
  194 + ret = bdrv_file_open(&s->hd, filename, flags);
  195 + if (ret < 0)
  196 + return ret;
  197 + if (bdrv_pread(s->hd, 0, &header, sizeof(header)) != sizeof(header))
  198 + goto fail;
  199 + be32_to_cpus(&header.magic);
  200 + be32_to_cpus(&header.version);
  201 + be64_to_cpus(&header.backing_file_offset);
  202 + be32_to_cpus(&header.backing_file_size);
  203 + be64_to_cpus(&header.size);
  204 + be32_to_cpus(&header.cluster_bits);
  205 + be32_to_cpus(&header.crypt_method);
  206 + be64_to_cpus(&header.l1_table_offset);
  207 + be32_to_cpus(&header.l1_size);
  208 + be64_to_cpus(&header.refcount_table_offset);
  209 + be32_to_cpus(&header.refcount_table_clusters);
  210 + be64_to_cpus(&header.snapshots_offset);
  211 + be32_to_cpus(&header.nb_snapshots);
  212 +
  213 + if (header.magic != QCOW_MAGIC || header.version != QCOW_VERSION)
  214 + goto fail;
  215 + if (header.size <= 1 ||
  216 + header.cluster_bits < 9 ||
  217 + header.cluster_bits > 16)
  218 + goto fail;
  219 + if (header.crypt_method > QCOW_CRYPT_AES)
  220 + goto fail;
  221 + s->crypt_method_header = header.crypt_method;
  222 + if (s->crypt_method_header)
  223 + bs->encrypted = 1;
  224 + s->cluster_bits = header.cluster_bits;
  225 + s->cluster_size = 1 << s->cluster_bits;
  226 + s->cluster_sectors = 1 << (s->cluster_bits - 9);
  227 + s->l2_bits = s->cluster_bits - 3; /* L2 is always one cluster */
  228 + s->l2_size = 1 << s->l2_bits;
  229 + bs->total_sectors = header.size / 512;
  230 + s->csize_shift = (62 - (s->cluster_bits - 8));
  231 + s->csize_mask = (1 << (s->cluster_bits - 8)) - 1;
  232 + s->cluster_offset_mask = (1LL << s->csize_shift) - 1;
  233 + s->refcount_table_offset = header.refcount_table_offset;
  234 + s->refcount_table_size =
  235 + header.refcount_table_clusters << (s->cluster_bits - 3);
  236 +
  237 + s->snapshots_offset = header.snapshots_offset;
  238 + s->nb_snapshots = header.nb_snapshots;
  239 +
  240 + /* read the level 1 table */
  241 + s->l1_size = header.l1_size;
  242 + shift = s->cluster_bits + s->l2_bits;
  243 + s->l1_vm_state_index = (header.size + (1LL << shift) - 1) >> shift;
  244 + /* the L1 table must contain at least enough entries to put
  245 + header.size bytes */
  246 + if (s->l1_size < s->l1_vm_state_index)
  247 + goto fail;
  248 + s->l1_table_offset = header.l1_table_offset;
  249 + s->l1_table = qemu_malloc(s->l1_size * sizeof(uint64_t));
  250 + if (!s->l1_table)
  251 + goto fail;
  252 + if (bdrv_pread(s->hd, s->l1_table_offset, s->l1_table, s->l1_size * sizeof(uint64_t)) !=
  253 + s->l1_size * sizeof(uint64_t))
  254 + goto fail;
  255 + for(i = 0;i < s->l1_size; i++) {
  256 + be64_to_cpus(&s->l1_table[i]);
  257 + }
  258 + /* alloc L2 cache */
  259 + s->l2_cache = qemu_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
  260 + if (!s->l2_cache)
  261 + goto fail;
  262 + s->cluster_cache = qemu_malloc(s->cluster_size);
  263 + if (!s->cluster_cache)
  264 + goto fail;
  265 + /* one more sector for decompressed data alignment */
  266 + s->cluster_data = qemu_malloc(s->cluster_size + 512);
  267 + if (!s->cluster_data)
  268 + goto fail;
  269 + s->cluster_cache_offset = -1;
  270 +
  271 + if (refcount_init(bs) < 0)
  272 + goto fail;
  273 +
  274 + /* read the backing file name */
  275 + if (header.backing_file_offset != 0) {
  276 + len = header.backing_file_size;
  277 + if (len > 1023)
  278 + len = 1023;
  279 + if (bdrv_pread(s->hd, header.backing_file_offset, bs->backing_file, len) != len)
  280 + goto fail;
  281 + bs->backing_file[len] = '\0';
  282 + }
  283 + if (qcow_read_snapshots(bs) < 0)
  284 + goto fail;
  285 +
  286 +#ifdef DEBUG_ALLOC
  287 + check_refcounts(bs);
  288 +#endif
  289 + return 0;
  290 +
  291 + fail:
  292 + qcow_free_snapshots(bs);
  293 + refcount_close(bs);
  294 + qemu_free(s->l1_table);
  295 + qemu_free(s->l2_cache);
  296 + qemu_free(s->cluster_cache);
  297 + qemu_free(s->cluster_data);
  298 + bdrv_delete(s->hd);
  299 + return -1;
  300 +}
  301 +
  302 +static int qcow_set_key(BlockDriverState *bs, const char *key)
  303 +{
  304 + BDRVQcowState *s = bs->opaque;
  305 + uint8_t keybuf[16];
  306 + int len, i;
  307 +
  308 + memset(keybuf, 0, 16);
  309 + len = strlen(key);
  310 + if (len > 16)
  311 + len = 16;
  312 + /* XXX: we could compress the chars to 7 bits to increase
  313 + entropy */
  314 + for(i = 0;i < len;i++) {
  315 + keybuf[i] = key[i];
  316 + }
  317 + s->crypt_method = s->crypt_method_header;
  318 +
  319 + if (AES_set_encrypt_key(keybuf, 128, &s->aes_encrypt_key) != 0)
  320 + return -1;
  321 + if (AES_set_decrypt_key(keybuf, 128, &s->aes_decrypt_key) != 0)
  322 + return -1;
  323 +#if 0
  324 + /* test */
  325 + {
  326 + uint8_t in[16];
  327 + uint8_t out[16];
  328 + uint8_t tmp[16];
  329 + for(i=0;i<16;i++)
  330 + in[i] = i;
  331 + AES_encrypt(in, tmp, &s->aes_encrypt_key);
  332 + AES_decrypt(tmp, out, &s->aes_decrypt_key);
  333 + for(i = 0; i < 16; i++)
  334 + printf(" %02x", tmp[i]);
  335 + printf("\n");
  336 + for(i = 0; i < 16; i++)
  337 + printf(" %02x", out[i]);
  338 + printf("\n");
  339 + }
  340 +#endif
  341 + return 0;
  342 +}
  343 +
  344 +/* The crypt function is compatible with the linux cryptoloop
  345 + algorithm for < 4 GB images. NOTE: out_buf == in_buf is
  346 + supported */
  347 +static void encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
  348 + uint8_t *out_buf, const uint8_t *in_buf,
  349 + int nb_sectors, int enc,
  350 + const AES_KEY *key)
  351 +{
  352 + union {
  353 + uint64_t ll[2];
  354 + uint8_t b[16];
  355 + } ivec;
  356 + int i;
  357 +
  358 + for(i = 0; i < nb_sectors; i++) {
  359 + ivec.ll[0] = cpu_to_le64(sector_num);
  360 + ivec.ll[1] = 0;
  361 + AES_cbc_encrypt(in_buf, out_buf, 512, key,
  362 + ivec.b, enc);
  363 + sector_num++;
  364 + in_buf += 512;
  365 + out_buf += 512;
  366 + }
  367 +}
  368 +
  369 +static int copy_sectors(BlockDriverState *bs, uint64_t start_sect,
  370 + uint64_t cluster_offset, int n_start, int n_end)
  371 +{
  372 + BDRVQcowState *s = bs->opaque;
  373 + int n, ret;
  374 +
  375 + n = n_end - n_start;
  376 + if (n <= 0)
  377 + return 0;
  378 + ret = qcow_read(bs, start_sect + n_start, s->cluster_data, n);
  379 + if (ret < 0)
  380 + return ret;
  381 + if (s->crypt_method) {
  382 + encrypt_sectors(s, start_sect + n_start,
  383 + s->cluster_data,
  384 + s->cluster_data, n, 1,
  385 + &s->aes_encrypt_key);
  386 + }
  387 + ret = bdrv_write(s->hd, (cluster_offset >> 9) + n_start,
  388 + s->cluster_data, n);
  389 + if (ret < 0)
  390 + return ret;
  391 + return 0;
  392 +}
  393 +
  394 +static void l2_cache_reset(BlockDriverState *bs)
  395 +{
  396 + BDRVQcowState *s = bs->opaque;
  397 +
  398 + memset(s->l2_cache, 0, s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
  399 + memset(s->l2_cache_offsets, 0, L2_CACHE_SIZE * sizeof(uint64_t));
  400 + memset(s->l2_cache_counts, 0, L2_CACHE_SIZE * sizeof(uint32_t));
  401 +}
  402 +
  403 +static inline int l2_cache_new_entry(BlockDriverState *bs)
  404 +{
  405 + BDRVQcowState *s = bs->opaque;
  406 + uint32_t min_count;
  407 + int min_index, i;
  408 +
  409 + /* find a new entry in the least used one */
  410 + min_index = 0;
  411 + min_count = 0xffffffff;
  412 + for(i = 0; i < L2_CACHE_SIZE; i++) {
  413 + if (s->l2_cache_counts[i] < min_count) {
  414 + min_count = s->l2_cache_counts[i];
  415 + min_index = i;
  416 + }
  417 + }
  418 + return min_index;
  419 +}
  420 +
  421 +static int64_t align_offset(int64_t offset, int n)
  422 +{
  423 + offset = (offset + n - 1) & ~(n - 1);
  424 + return offset;
  425 +}
  426 +
  427 +static int grow_l1_table(BlockDriverState *bs, int min_size)
  428 +{
  429 + BDRVQcowState *s = bs->opaque;
  430 + int new_l1_size, new_l1_size2, ret, i;
  431 + uint64_t *new_l1_table;
  432 + uint64_t new_l1_table_offset;
  433 + uint64_t data64;
  434 + uint32_t data32;
  435 +
  436 + new_l1_size = s->l1_size;
  437 + if (min_size <= new_l1_size)
  438 + return 0;
  439 + while (min_size > new_l1_size) {
  440 + new_l1_size = (new_l1_size * 3 + 1) / 2;
  441 + }
  442 +#ifdef DEBUG_ALLOC2
  443 + printf("grow l1_table from %d to %d\n", s->l1_size, new_l1_size);
  444 +#endif
  445 +
  446 + new_l1_size2 = sizeof(uint64_t) * new_l1_size;
  447 + new_l1_table = qemu_mallocz(new_l1_size2);
  448 + if (!new_l1_table)
  449 + return -ENOMEM;
  450 + memcpy(new_l1_table, s->l1_table, s->l1_size * sizeof(uint64_t));
  451 +
  452 + /* write new table (align to cluster) */
  453 + new_l1_table_offset = alloc_clusters(bs, new_l1_size2);
  454 +
  455 + for(i = 0; i < s->l1_size; i++)
  456 + new_l1_table[i] = cpu_to_be64(new_l1_table[i]);
  457 + ret = bdrv_pwrite(s->hd, new_l1_table_offset, new_l1_table, new_l1_size2);
  458 + if (ret != new_l1_size2)
  459 + goto fail;
  460 + for(i = 0; i < s->l1_size; i++)
  461 + new_l1_table[i] = be64_to_cpu(new_l1_table[i]);
  462 +
  463 + /* set new table */
  464 + data64 = cpu_to_be64(new_l1_table_offset);
  465 + if (bdrv_pwrite(s->hd, offsetof(QCowHeader, l1_table_offset),
  466 + &data64, sizeof(data64)) != sizeof(data64))
  467 + goto fail;
  468 + data32 = cpu_to_be32(new_l1_size);
  469 + if (bdrv_pwrite(s->hd, offsetof(QCowHeader, l1_size),
  470 + &data32, sizeof(data32)) != sizeof(data32))
  471 + goto fail;
  472 + qemu_free(s->l1_table);
  473 + free_clusters(bs, s->l1_table_offset, s->l1_size * sizeof(uint64_t));
  474 + s->l1_table_offset = new_l1_table_offset;
  475 + s->l1_table = new_l1_table;
  476 + s->l1_size = new_l1_size;
  477 + return 0;
  478 + fail:
  479 + qemu_free(s->l1_table);
  480 + return -EIO;
  481 +}
  482 +
  483 +/* 'allocate' is:
  484 + *
  485 + * 0 not to allocate.
  486 + *
  487 + * 1 to allocate a normal cluster (for sector indexes 'n_start' to
  488 + * 'n_end')
  489 + *
  490 + * 2 to allocate a compressed cluster of size
  491 + * 'compressed_size'. 'compressed_size' must be > 0 and <
  492 + * cluster_size
  493 + *
  494 + * return 0 if not allocated.
  495 + */
  496 +static uint64_t get_cluster_offset(BlockDriverState *bs,
  497 + uint64_t offset, int allocate,
  498 + int compressed_size,
  499 + int n_start, int n_end)
  500 +{
  501 + BDRVQcowState *s = bs->opaque;
  502 + int min_index, i, j, l1_index, l2_index, ret;
  503 + uint64_t l2_offset, *l2_table, cluster_offset, tmp, old_l2_offset;
  504 +
  505 + l1_index = offset >> (s->l2_bits + s->cluster_bits);
  506 + if (l1_index >= s->l1_size) {
  507 + /* outside l1 table is allowed: we grow the table if needed */
  508 + if (!allocate)
  509 + return 0;
  510 + if (grow_l1_table(bs, l1_index + 1) < 0)
  511 + return 0;
  512 + }
  513 + l2_offset = s->l1_table[l1_index];
  514 + if (!l2_offset) {
  515 + if (!allocate)
  516 + return 0;
  517 + l2_allocate:
  518 + old_l2_offset = l2_offset;
  519 + /* allocate a new l2 entry */
  520 + l2_offset = alloc_clusters(bs, s->l2_size * sizeof(uint64_t));
  521 + /* update the L1 entry */
  522 + s->l1_table[l1_index] = l2_offset | QCOW_OFLAG_COPIED;
  523 + tmp = cpu_to_be64(l2_offset | QCOW_OFLAG_COPIED);
  524 + if (bdrv_pwrite(s->hd, s->l1_table_offset + l1_index * sizeof(tmp),
  525 + &tmp, sizeof(tmp)) != sizeof(tmp))
  526 + return 0;
  527 + min_index = l2_cache_new_entry(bs);
  528 + l2_table = s->l2_cache + (min_index << s->l2_bits);
  529 +
  530 + if (old_l2_offset == 0) {
  531 + memset(l2_table, 0, s->l2_size * sizeof(uint64_t));
  532 + } else {
  533 + if (bdrv_pread(s->hd, old_l2_offset,
  534 + l2_table, s->l2_size * sizeof(uint64_t)) !=
  535 + s->l2_size * sizeof(uint64_t))
  536 + return 0;
  537 + }
  538 + if (bdrv_pwrite(s->hd, l2_offset,
  539 + l2_table, s->l2_size * sizeof(uint64_t)) !=
  540 + s->l2_size * sizeof(uint64_t))
  541 + return 0;
  542 + } else {
  543 + if (!(l2_offset & QCOW_OFLAG_COPIED)) {
  544 + if (allocate) {
  545 + free_clusters(bs, l2_offset, s->l2_size * sizeof(uint64_t));
  546 + goto l2_allocate;
  547 + }
  548 + } else {
  549 + l2_offset &= ~QCOW_OFLAG_COPIED;
  550 + }
  551 + for(i = 0; i < L2_CACHE_SIZE; i++) {
  552 + if (l2_offset == s->l2_cache_offsets[i]) {
  553 + /* increment the hit count */
  554 + if (++s->l2_cache_counts[i] == 0xffffffff) {
  555 + for(j = 0; j < L2_CACHE_SIZE; j++) {
  556 + s->l2_cache_counts[j] >>= 1;
  557 + }
  558 + }
  559 + l2_table = s->l2_cache + (i << s->l2_bits);
  560 + goto found;
  561 + }
  562 + }
  563 + /* not found: load a new entry in the least used one */
  564 + min_index = l2_cache_new_entry(bs);
  565 + l2_table = s->l2_cache + (min_index << s->l2_bits);
  566 + if (bdrv_pread(s->hd, l2_offset, l2_table, s->l2_size * sizeof(uint64_t)) !=
  567 + s->l2_size * sizeof(uint64_t))
  568 + return 0;
  569 + }
  570 + s->l2_cache_offsets[min_index] = l2_offset;
  571 + s->l2_cache_counts[min_index] = 1;
  572 + found:
  573 + l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
  574 + cluster_offset = be64_to_cpu(l2_table[l2_index]);
  575 + if (!cluster_offset) {
  576 + if (!allocate)
  577 + return cluster_offset;
  578 + } else if (!(cluster_offset & QCOW_OFLAG_COPIED)) {
  579 + if (!allocate)
  580 + return cluster_offset;
  581 + /* free the cluster */
  582 + if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
  583 + int nb_csectors;
  584 + nb_csectors = ((cluster_offset >> s->csize_shift) &
  585 + s->csize_mask) + 1;
  586 + free_clusters(bs, (cluster_offset & s->cluster_offset_mask) & ~511,
  587 + nb_csectors * 512);
  588 + } else {
  589 + free_clusters(bs, cluster_offset, s->cluster_size);
  590 + }
  591 + } else {
  592 + cluster_offset &= ~QCOW_OFLAG_COPIED;
  593 + return cluster_offset;
  594 + }
  595 + if (allocate == 1) {
  596 + /* allocate a new cluster */
  597 + cluster_offset = alloc_clusters(bs, s->cluster_size);
  598 +
  599 + /* we must initialize the cluster content which won't be
  600 + written */
  601 + if ((n_end - n_start) < s->cluster_sectors) {
  602 + uint64_t start_sect;
  603 +
  604 + start_sect = (offset & ~(s->cluster_size - 1)) >> 9;
  605 + ret = copy_sectors(bs, start_sect,
  606 + cluster_offset, 0, n_start);
  607 + if (ret < 0)
  608 + return 0;
  609 + ret = copy_sectors(bs, start_sect,
  610 + cluster_offset, n_end, s->cluster_sectors);
  611 + if (ret < 0)
  612 + return 0;
  613 + }
  614 + tmp = cpu_to_be64(cluster_offset | QCOW_OFLAG_COPIED);
  615 + } else {
  616 + int nb_csectors;
  617 + cluster_offset = alloc_bytes(bs, compressed_size);
  618 + nb_csectors = ((cluster_offset + compressed_size - 1) >> 9) -
  619 + (cluster_offset >> 9);
  620 + cluster_offset |= QCOW_OFLAG_COMPRESSED |
  621 + ((uint64_t)nb_csectors << s->csize_shift);
  622 + /* compressed clusters never have the copied flag */
  623 + tmp = cpu_to_be64(cluster_offset);
  624 + }
  625 + /* update L2 table */
  626 + l2_table[l2_index] = tmp;
  627 + if (bdrv_pwrite(s->hd,
  628 + l2_offset + l2_index * sizeof(tmp), &tmp, sizeof(tmp)) != sizeof(tmp))
  629 + return 0;
  630 + return cluster_offset;
  631 +}
  632 +
  633 +static int qcow_is_allocated(BlockDriverState *bs, int64_t sector_num,
  634 + int nb_sectors, int *pnum)
  635 +{
  636 + BDRVQcowState *s = bs->opaque;
  637 + int index_in_cluster, n;
  638 + uint64_t cluster_offset;
  639 +
  640 + cluster_offset = get_cluster_offset(bs, sector_num << 9, 0, 0, 0, 0);
  641 + index_in_cluster = sector_num & (s->cluster_sectors - 1);
  642 + n = s->cluster_sectors - index_in_cluster;
  643 + if (n > nb_sectors)
  644 + n = nb_sectors;
  645 + *pnum = n;
  646 + return (cluster_offset != 0);
  647 +}
  648 +
  649 +static int decompress_buffer(uint8_t *out_buf, int out_buf_size,
  650 + const uint8_t *buf, int buf_size)
  651 +{
  652 + z_stream strm1, *strm = &strm1;
  653 + int ret, out_len;
  654 +
  655 + memset(strm, 0, sizeof(*strm));
  656 +
  657 + strm->next_in = (uint8_t *)buf;
  658 + strm->avail_in = buf_size;
  659 + strm->next_out = out_buf;
  660 + strm->avail_out = out_buf_size;
  661 +
  662 + ret = inflateInit2(strm, -12);
  663 + if (ret != Z_OK)
  664 + return -1;
  665 + ret = inflate(strm, Z_FINISH);
  666 + out_len = strm->next_out - out_buf;
  667 + if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) ||
  668 + out_len != out_buf_size) {
  669 + inflateEnd(strm);
  670 + return -1;
  671 + }
  672 + inflateEnd(strm);
  673 + return 0;
  674 +}
  675 +
  676 +static int decompress_cluster(BDRVQcowState *s, uint64_t cluster_offset)
  677 +{
  678 + int ret, csize, nb_csectors, sector_offset;
  679 + uint64_t coffset;
  680 +
  681 + coffset = cluster_offset & s->cluster_offset_mask;
  682 + if (s->cluster_cache_offset != coffset) {
  683 + nb_csectors = ((cluster_offset >> s->csize_shift) & s->csize_mask) + 1;
  684 + sector_offset = coffset & 511;
  685 + csize = nb_csectors * 512 - sector_offset;
  686 + ret = bdrv_read(s->hd, coffset >> 9, s->cluster_data, nb_csectors);
  687 + if (ret < 0) {
  688 + return -1;
  689 + }
  690 + if (decompress_buffer(s->cluster_cache, s->cluster_size,
  691 + s->cluster_data + sector_offset, csize) < 0) {
  692 + return -1;
  693 + }
  694 + s->cluster_cache_offset = coffset;
  695 + }
  696 + return 0;
  697 +}
  698 +
  699 +static int qcow_read(BlockDriverState *bs, int64_t sector_num,
  700 + uint8_t *buf, int nb_sectors)
  701 +{
  702 + BDRVQcowState *s = bs->opaque;
  703 + int ret, index_in_cluster, n;
  704 + uint64_t cluster_offset;
  705 +
  706 + while (nb_sectors > 0) {
  707 + cluster_offset = get_cluster_offset(bs, sector_num << 9, 0, 0, 0, 0);
  708 + index_in_cluster = sector_num & (s->cluster_sectors - 1);
  709 + n = s->cluster_sectors - index_in_cluster;
  710 + if (n > nb_sectors)
  711 + n = nb_sectors;
  712 + if (!cluster_offset) {
  713 + if (bs->backing_hd) {
  714 + /* read from the base image */
  715 + ret = bdrv_read(bs->backing_hd, sector_num, buf, n);
  716 + if (ret < 0)
  717 + return -1;
  718 + } else {
  719 + memset(buf, 0, 512 * n);
  720 + }
  721 + } else if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
  722 + if (decompress_cluster(s, cluster_offset) < 0)
  723 + return -1;
  724 + memcpy(buf, s->cluster_cache + index_in_cluster * 512, 512 * n);
  725 + } else {
  726 + ret = bdrv_pread(s->hd, cluster_offset + index_in_cluster * 512, buf, n * 512);
  727 + if (ret != n * 512)
  728 + return -1;
  729 + if (s->crypt_method) {
  730 + encrypt_sectors(s, sector_num, buf, buf, n, 0,
  731 + &s->aes_decrypt_key);
  732 + }
  733 + }
  734 + nb_sectors -= n;
  735 + sector_num += n;
  736 + buf += n * 512;
  737 + }
  738 + return 0;
  739 +}
  740 +
  741 +static int qcow_write(BlockDriverState *bs, int64_t sector_num,
  742 + const uint8_t *buf, int nb_sectors)
  743 +{
  744 + BDRVQcowState *s = bs->opaque;
  745 + int ret, index_in_cluster, n;
  746 + uint64_t cluster_offset;
  747 +
  748 + while (nb_sectors > 0) {
  749 + index_in_cluster = sector_num & (s->cluster_sectors - 1);
  750 + n = s->cluster_sectors - index_in_cluster;
  751 + if (n > nb_sectors)
  752 + n = nb_sectors;
  753 + cluster_offset = get_cluster_offset(bs, sector_num << 9, 1, 0,
  754 + index_in_cluster,
  755 + index_in_cluster + n);
  756 + if (!cluster_offset)
  757 + return -1;
  758 + if (s->crypt_method) {
  759 + encrypt_sectors(s, sector_num, s->cluster_data, buf, n, 1,
  760 + &s->aes_encrypt_key);
  761 + ret = bdrv_pwrite(s->hd, cluster_offset + index_in_cluster * 512,
  762 + s->cluster_data, n * 512);
  763 + } else {
  764 + ret = bdrv_pwrite(s->hd, cluster_offset + index_in_cluster * 512, buf, n * 512);
  765 + }
  766 + if (ret != n * 512)
  767 + return -1;
  768 + nb_sectors -= n;
  769 + sector_num += n;
  770 + buf += n * 512;
  771 + }
  772 + s->cluster_cache_offset = -1; /* disable compressed cache */
  773 + return 0;
  774 +}
  775 +
  776 +typedef struct {
  777 + int64_t sector_num;
  778 + uint8_t *buf;
  779 + int nb_sectors;
  780 + int n;
  781 + uint64_t cluster_offset;
  782 + uint8_t *cluster_data;
  783 + BlockDriverAIOCB *hd_aiocb;
  784 + BlockDriverAIOCB *backing_hd_aiocb;
  785 +} QCowAIOCB;
  786 +
  787 +static void qcow_aio_delete(BlockDriverAIOCB *acb);
  788 +
  789 +static int qcow_aio_new(BlockDriverAIOCB *acb)
  790 +{
  791 + BlockDriverState *bs = acb->bs;
  792 + BDRVQcowState *s = bs->opaque;
  793 + QCowAIOCB *acb1;
  794 + acb1 = qemu_mallocz(sizeof(QCowAIOCB));
  795 + if (!acb1)
  796 + return -1;
  797 + acb->opaque = acb1;
  798 + acb1->hd_aiocb = bdrv_aio_new(s->hd);
  799 + if (!acb1->hd_aiocb)
  800 + goto fail;
  801 + if (bs->backing_hd) {
  802 + acb1->backing_hd_aiocb = bdrv_aio_new(bs->backing_hd);
  803 + if (!acb1->backing_hd_aiocb)
  804 + goto fail;
  805 + }
  806 + return 0;
  807 + fail:
  808 + qcow_aio_delete(acb);
  809 + return -1;
  810 +}
  811 +
  812 +static void qcow_aio_read_cb(void *opaque, int ret)
  813 +{
  814 + BlockDriverAIOCB *acb = opaque;
  815 + BlockDriverState *bs = acb->bs;
  816 + BDRVQcowState *s = bs->opaque;
  817 + QCowAIOCB *acb1 = acb->opaque;
  818 + int index_in_cluster;
  819 +
  820 + if (ret < 0) {
  821 + fail:
  822 + acb->cb(acb->cb_opaque, ret);
  823 + return;
  824 + }
  825 +
  826 + redo:
  827 + /* post process the read buffer */
  828 + if (!acb1->cluster_offset) {
  829 + /* nothing to do */
  830 + } else if (acb1->cluster_offset & QCOW_OFLAG_COMPRESSED) {
  831 + /* nothing to do */
  832 + } else {
  833 + if (s->crypt_method) {
  834 + encrypt_sectors(s, acb1->sector_num, acb1->buf, acb1->buf,
  835 + acb1->n, 0,
  836 + &s->aes_decrypt_key);
  837 + }
  838 + }
  839 +
  840 + acb1->nb_sectors -= acb1->n;
  841 + acb1->sector_num += acb1->n;
  842 + acb1->buf += acb1->n * 512;
  843 +
  844 + if (acb1->nb_sectors == 0) {
  845 + /* request completed */
  846 + acb->cb(acb->cb_opaque, 0);
  847 + return;
  848 + }
  849 +
  850 + /* prepare next AIO request */
  851 + acb1->cluster_offset = get_cluster_offset(bs,
  852 + acb1->sector_num << 9,
  853 + 0, 0, 0, 0);
  854 + index_in_cluster = acb1->sector_num & (s->cluster_sectors - 1);
  855 + acb1->n = s->cluster_sectors - index_in_cluster;
  856 + if (acb1->n > acb1->nb_sectors)
  857 + acb1->n = acb1->nb_sectors;
  858 +
  859 + if (!acb1->cluster_offset) {
  860 + if (bs->backing_hd) {
  861 + /* read from the base image */
  862 + ret = bdrv_aio_read(acb1->backing_hd_aiocb, acb1->sector_num,
  863 + acb1->buf, acb1->n, qcow_aio_read_cb, acb);
  864 + if (ret < 0)
  865 + goto fail;
  866 + } else {
  867 + /* Note: in this case, no need to wait */
  868 + memset(acb1->buf, 0, 512 * acb1->n);
  869 + goto redo;
  870 + }
  871 + } else if (acb1->cluster_offset & QCOW_OFLAG_COMPRESSED) {
  872 + /* add AIO support for compressed blocks ? */
  873 + if (decompress_cluster(s, acb1->cluster_offset) < 0)
  874 + goto fail;
  875 + memcpy(acb1->buf,
  876 + s->cluster_cache + index_in_cluster * 512, 512 * acb1->n);
  877 + goto redo;
  878 + } else {
  879 + if ((acb1->cluster_offset & 511) != 0) {
  880 + ret = -EIO;
  881 + goto fail;
  882 + }
  883 + ret = bdrv_aio_read(acb1->hd_aiocb,
  884 + (acb1->cluster_offset >> 9) + index_in_cluster,
  885 + acb1->buf, acb1->n, qcow_aio_read_cb, acb);
  886 + if (ret < 0)
  887 + goto fail;
  888 + }
  889 +}
  890 +
  891 +static int qcow_aio_read(BlockDriverAIOCB *acb, int64_t sector_num,
  892 + uint8_t *buf, int nb_sectors)
  893 +{
  894 + QCowAIOCB *acb1 = acb->opaque;
  895 +
  896 + acb1->sector_num = sector_num;
  897 + acb1->buf = buf;
  898 + acb1->nb_sectors = nb_sectors;
  899 + acb1->n = 0;
  900 + acb1->cluster_offset = 0;
  901 +
  902 + qcow_aio_read_cb(acb, 0);
  903 + return 0;
  904 +}
  905 +
  906 +static void qcow_aio_write_cb(void *opaque, int ret)
  907 +{
  908 + BlockDriverAIOCB *acb = opaque;
  909 + BlockDriverState *bs = acb->bs;
  910 + BDRVQcowState *s = bs->opaque;
  911 + QCowAIOCB *acb1 = acb->opaque;
  912 + int index_in_cluster;
  913 + uint64_t cluster_offset;
  914 + const uint8_t *src_buf;
  915 +
  916 + if (ret < 0) {
  917 + fail:
  918 + acb->cb(acb->cb_opaque, ret);
  919 + return;
  920 + }
  921 +
  922 + acb1->nb_sectors -= acb1->n;
  923 + acb1->sector_num += acb1->n;
  924 + acb1->buf += acb1->n * 512;
  925 +
  926 + if (acb1->nb_sectors == 0) {
  927 + /* request completed */
  928 + acb->cb(acb->cb_opaque, 0);
  929 + return;
  930 + }
  931 +
  932 + index_in_cluster = acb1->sector_num & (s->cluster_sectors - 1);
  933 + acb1->n = s->cluster_sectors - index_in_cluster;
  934 + if (acb1->n > acb1->nb_sectors)
  935 + acb1->n = acb1->nb_sectors;
  936 + cluster_offset = get_cluster_offset(bs, acb1->sector_num << 9, 1, 0,
  937 + index_in_cluster,
  938 + index_in_cluster + acb1->n);
  939 + if (!cluster_offset || (cluster_offset & 511) != 0) {
  940 + ret = -EIO;
  941 + goto fail;
  942 + }
  943 + if (s->crypt_method) {
  944 + if (!acb1->cluster_data) {
  945 + acb1->cluster_data = qemu_mallocz(s->cluster_size);
  946 + if (!acb1->cluster_data) {
  947 + ret = -ENOMEM;
  948 + goto fail;
  949 + }
  950 + }
  951 + encrypt_sectors(s, acb1->sector_num, acb1->cluster_data, acb1->buf,
  952 + acb1->n, 1, &s->aes_encrypt_key);
  953 + src_buf = acb1->cluster_data;
  954 + } else {
  955 + src_buf = acb1->buf;
  956 + }
  957 + ret = bdrv_aio_write(acb1->hd_aiocb,
  958 + (cluster_offset >> 9) + index_in_cluster,
  959 + src_buf, acb1->n,
  960 + qcow_aio_write_cb, acb);
  961 + if (ret < 0)
  962 + goto fail;
  963 +}
  964 +
  965 +static int qcow_aio_write(BlockDriverAIOCB *acb, int64_t sector_num,
  966 + const uint8_t *buf, int nb_sectors)
  967 +{
  968 + QCowAIOCB *acb1 = acb->opaque;
  969 + BlockDriverState *bs = acb->bs;
  970 + BDRVQcowState *s = bs->opaque;
  971 +
  972 + s->cluster_cache_offset = -1; /* disable compressed cache */
  973 +
  974 + acb1->sector_num = sector_num;
  975 + acb1->buf = (uint8_t *)buf;
  976 + acb1->nb_sectors = nb_sectors;
  977 + acb1->n = 0;
  978 +
  979 + qcow_aio_write_cb(acb, 0);
  980 + return 0;
  981 +}
  982 +
  983 +static void qcow_aio_cancel(BlockDriverAIOCB *acb)
  984 +{
  985 + QCowAIOCB *acb1 = acb->opaque;
  986 + if (acb1->hd_aiocb)
  987 + bdrv_aio_cancel(acb1->hd_aiocb);
  988 + if (acb1->backing_hd_aiocb)
  989 + bdrv_aio_cancel(acb1->backing_hd_aiocb);
  990 +}
  991 +
  992 +static void qcow_aio_delete(BlockDriverAIOCB *acb)
  993 +{
  994 + QCowAIOCB *acb1 = acb->opaque;
  995 + if (acb1->hd_aiocb)
  996 + bdrv_aio_delete(acb1->hd_aiocb);
  997 + if (acb1->backing_hd_aiocb)
  998 + bdrv_aio_delete(acb1->backing_hd_aiocb);
  999 + qemu_free(acb1->cluster_data);
  1000 + qemu_free(acb1);
  1001 +}
  1002 +
  1003 +static void qcow_close(BlockDriverState *bs)
  1004 +{
  1005 + BDRVQcowState *s = bs->opaque;
  1006 + qemu_free(s->l1_table);
  1007 + qemu_free(s->l2_cache);
  1008 + qemu_free(s->cluster_cache);
  1009 + qemu_free(s->cluster_data);
  1010 + refcount_close(bs);
  1011 + bdrv_delete(s->hd);
  1012 +}
  1013 +
  1014 +/* XXX: use std qcow open function ? */
  1015 +typedef struct QCowCreateState {
  1016 + int cluster_size;
  1017 + int cluster_bits;
  1018 + uint16_t *refcount_block;
  1019 + uint64_t *refcount_table;
  1020 + int64_t l1_table_offset;
  1021 + int64_t refcount_table_offset;
  1022 + int64_t refcount_block_offset;
  1023 +} QCowCreateState;
  1024 +
  1025 +static void create_refcount_update(QCowCreateState *s,
  1026 + int64_t offset, int64_t size)
  1027 +{
  1028 + int refcount;
  1029 + int64_t start, last, cluster_offset;
  1030 + uint16_t *p;
  1031 +
  1032 + start = offset & ~(s->cluster_size - 1);
  1033 + last = (offset + size - 1) & ~(s->cluster_size - 1);
  1034 + for(cluster_offset = start; cluster_offset <= last;
  1035 + cluster_offset += s->cluster_size) {
  1036 + p = &s->refcount_block[cluster_offset >> s->cluster_bits];
  1037 + refcount = be16_to_cpu(*p);
  1038 + refcount++;
  1039 + *p = cpu_to_be16(refcount);
  1040 + }
  1041 +}
  1042 +
  1043 +static int qcow_create(const char *filename, int64_t total_size,
  1044 + const char *backing_file, int flags)
  1045 +{
  1046 + int fd, header_size, backing_filename_len, l1_size, i, shift, l2_bits;
  1047 + QCowHeader header;
  1048 + uint64_t tmp, offset;
  1049 + QCowCreateState s1, *s = &s1;
  1050 +
  1051 + memset(s, 0, sizeof(*s));
  1052 +
  1053 + fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0644);
  1054 + if (fd < 0)
  1055 + return -1;
  1056 + memset(&header, 0, sizeof(header));
  1057 + header.magic = cpu_to_be32(QCOW_MAGIC);
  1058 + header.version = cpu_to_be32(QCOW_VERSION);
  1059 + header.size = cpu_to_be64(total_size * 512);
  1060 + header_size = sizeof(header);
  1061 + backing_filename_len = 0;
  1062 + if (backing_file) {
  1063 + header.backing_file_offset = cpu_to_be64(header_size);
  1064 + backing_filename_len = strlen(backing_file);
  1065 + header.backing_file_size = cpu_to_be32(backing_filename_len);
  1066 + header_size += backing_filename_len;
  1067 + }
  1068 + s->cluster_bits = 12; /* 4 KB clusters */
  1069 + s->cluster_size = 1 << s->cluster_bits;
  1070 + header.cluster_bits = cpu_to_be32(s->cluster_bits);
  1071 + header_size = (header_size + 7) & ~7;
  1072 + if (flags) {
  1073 + header.crypt_method = cpu_to_be32(QCOW_CRYPT_AES);
  1074 + } else {
  1075 + header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE);
  1076 + }
  1077 + l2_bits = s->cluster_bits - 3;
  1078 + shift = s->cluster_bits + l2_bits;
  1079 + l1_size = (((total_size * 512) + (1LL << shift) - 1) >> shift);
  1080 + offset = align_offset(header_size, s->cluster_size);
  1081 + s->l1_table_offset = offset;
  1082 + header.l1_table_offset = cpu_to_be64(s->l1_table_offset);
  1083 + header.l1_size = cpu_to_be32(l1_size);
  1084 + offset += align_offset(l1_size, s->cluster_size);
  1085 +
  1086 + s->refcount_table = qemu_mallocz(s->cluster_size);
  1087 + if (!s->refcount_table)
  1088 + goto fail;
  1089 + s->refcount_block = qemu_mallocz(s->cluster_size);
  1090 + if (!s->refcount_block)
  1091 + goto fail;
  1092 +
  1093 + s->refcount_table_offset = offset;
  1094 + header.refcount_table_offset = cpu_to_be64(offset);
  1095 + header.refcount_table_clusters = cpu_to_be32(1);
  1096 + offset += s->cluster_size;
  1097 +
  1098 + s->refcount_table[0] = cpu_to_be64(offset);
  1099 + s->refcount_block_offset = offset;
  1100 + offset += s->cluster_size;
  1101 +
  1102 + /* update refcounts */
  1103 + create_refcount_update(s, 0, header_size);
  1104 + create_refcount_update(s, s->l1_table_offset, l1_size);
  1105 + create_refcount_update(s, s->refcount_table_offset, s->cluster_size);
  1106 + create_refcount_update(s, s->refcount_block_offset, s->cluster_size);
  1107 +
  1108 + /* write all the data */
  1109 + write(fd, &header, sizeof(header));
  1110 + if (backing_file) {
  1111 + write(fd, backing_file, backing_filename_len);
  1112 + }
  1113 + lseek(fd, s->l1_table_offset, SEEK_SET);
  1114 + tmp = 0;
  1115 + for(i = 0;i < l1_size; i++) {
  1116 + write(fd, &tmp, sizeof(tmp));
  1117 + }
  1118 + lseek(fd, s->refcount_table_offset, SEEK_SET);
  1119 + write(fd, s->refcount_table, s->cluster_size);
  1120 +
  1121 + lseek(fd, s->refcount_block_offset, SEEK_SET);
  1122 + write(fd, s->refcount_block, s->cluster_size);
  1123 +
  1124 + qemu_free(s->refcount_table);
  1125 + qemu_free(s->refcount_block);
  1126 + close(fd);
  1127 + return 0;
  1128 + fail:
  1129 + qemu_free(s->refcount_table);
  1130 + qemu_free(s->refcount_block);
  1131 + close(fd);
  1132 + return -ENOMEM;
  1133 +}
  1134 +
  1135 +static int qcow_make_empty(BlockDriverState *bs)
  1136 +{
  1137 +#if 0
  1138 + /* XXX: not correct */
  1139 + BDRVQcowState *s = bs->opaque;
  1140 + uint32_t l1_length = s->l1_size * sizeof(uint64_t);
  1141 + int ret;
  1142 +
  1143 + memset(s->l1_table, 0, l1_length);
  1144 + if (bdrv_pwrite(s->hd, s->l1_table_offset, s->l1_table, l1_length) < 0)
  1145 + return -1;
  1146 + ret = bdrv_truncate(s->hd, s->l1_table_offset + l1_length);
  1147 + if (ret < 0)
  1148 + return ret;
  1149 +
  1150 + l2_cache_reset(bs);
  1151 +#endif
  1152 + return 0;
  1153 +}
  1154 +
  1155 +/* XXX: put compressed sectors first, then all the cluster aligned
  1156 + tables to avoid losing bytes in alignment */
  1157 +static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num,
  1158 + const uint8_t *buf, int nb_sectors)
  1159 +{
  1160 + BDRVQcowState *s = bs->opaque;
  1161 + z_stream strm;
  1162 + int ret, out_len;
  1163 + uint8_t *out_buf;
  1164 + uint64_t cluster_offset;
  1165 +
  1166 + if (nb_sectors == 0) {
  1167 + /* align end of file to a sector boundary to ease reading with
  1168 + sector based I/Os */
  1169 + cluster_offset = bdrv_getlength(s->hd);
  1170 + cluster_offset = (cluster_offset + 511) & ~511;
  1171 + bdrv_truncate(s->hd, cluster_offset);
  1172 + return 0;
  1173 + }
  1174 +
  1175 + if (nb_sectors != s->cluster_sectors)
  1176 + return -EINVAL;
  1177 +
  1178 + out_buf = qemu_malloc(s->cluster_size + (s->cluster_size / 1000) + 128);
  1179 + if (!out_buf)
  1180 + return -ENOMEM;
  1181 +
  1182 + /* best compression, small window, no zlib header */
  1183 + memset(&strm, 0, sizeof(strm));
  1184 + ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION,
  1185 + Z_DEFLATED, -12,
  1186 + 9, Z_DEFAULT_STRATEGY);
  1187 + if (ret != 0) {
  1188 + qemu_free(out_buf);
  1189 + return -1;
  1190 + }
  1191 +
  1192 + strm.avail_in = s->cluster_size;
  1193 + strm.next_in = (uint8_t *)buf;
  1194 + strm.avail_out = s->cluster_size;
  1195 + strm.next_out = out_buf;
  1196 +
  1197 + ret = deflate(&strm, Z_FINISH);
  1198 + if (ret != Z_STREAM_END && ret != Z_OK) {
  1199 + qemu_free(out_buf);
  1200 + deflateEnd(&strm);
  1201 + return -1;
  1202 + }
  1203 + out_len = strm.next_out - out_buf;
  1204 +
  1205 + deflateEnd(&strm);
  1206 +
  1207 + if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
  1208 + /* could not compress: write normal cluster */
  1209 + qcow_write(bs, sector_num, buf, s->cluster_sectors);
  1210 + } else {
  1211 + cluster_offset = get_cluster_offset(bs, sector_num << 9, 2,
  1212 + out_len, 0, 0);
  1213 + cluster_offset &= s->cluster_offset_mask;
  1214 + if (bdrv_pwrite(s->hd, cluster_offset, out_buf, out_len) != out_len) {
  1215 + qemu_free(out_buf);
  1216 + return -1;
  1217 + }
  1218 + }
  1219 +
  1220 + qemu_free(out_buf);
  1221 + return 0;
  1222 +}
  1223 +
  1224 +static void qcow_flush(BlockDriverState *bs)
  1225 +{
  1226 + BDRVQcowState *s = bs->opaque;
  1227 + bdrv_flush(s->hd);
  1228 +}
  1229 +
  1230 +static int qcow_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
  1231 +{
  1232 + BDRVQcowState *s = bs->opaque;
  1233 + bdi->cluster_size = s->cluster_size;
  1234 + bdi->vm_state_offset = (int64_t)s->l1_vm_state_index <<
  1235 + (s->cluster_bits + s->l2_bits);
  1236 + return 0;
  1237 +}
  1238 +
  1239 +/*********************************************************/
  1240 +/* snapshot support */
  1241 +
  1242 +/* update the refcounts of snapshots and the copied flag */
  1243 +static int update_snapshot_refcount(BlockDriverState *bs,
  1244 + int64_t l1_table_offset,
  1245 + int l1_size,
  1246 + int addend)
  1247 +{
  1248 + BDRVQcowState *s = bs->opaque;
  1249 + uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2, l1_allocated;
  1250 + int64_t old_offset, old_l2_offset;
  1251 + int l2_size, i, j, l1_modified, l2_modified, nb_csectors, refcount;
  1252 +
  1253 + l2_cache_reset(bs);
  1254 +
  1255 + l2_table = NULL;
  1256 + l1_table = NULL;
  1257 + l1_size2 = l1_size * sizeof(uint64_t);
  1258 + l1_allocated = 0;
  1259 + if (l1_table_offset != s->l1_table_offset) {
  1260 + l1_table = qemu_malloc(l1_size2);
  1261 + if (!l1_table)
  1262 + goto fail;
  1263 + l1_allocated = 1;
  1264 + if (bdrv_pread(s->hd, l1_table_offset,
  1265 + l1_table, l1_size2) != l1_size2)
  1266 + goto fail;
  1267 + for(i = 0;i < l1_size; i++)
  1268 + be64_to_cpus(&l1_table[i]);
  1269 + } else {
  1270 + assert(l1_size == s->l1_size);
  1271 + l1_table = s->l1_table;
  1272 + l1_allocated = 0;
  1273 + }
  1274 +
  1275 + l2_size = s->l2_size * sizeof(uint64_t);
  1276 + l2_table = qemu_malloc(l2_size);
  1277 + if (!l2_table)
  1278 + goto fail;
  1279 + l1_modified = 0;
  1280 + for(i = 0; i < l1_size; i++) {
  1281 + l2_offset = l1_table[i];
  1282 + if (l2_offset) {
  1283 + old_l2_offset = l2_offset;
  1284 + l2_offset &= ~QCOW_OFLAG_COPIED;
  1285 + l2_modified = 0;
  1286 + if (bdrv_pread(s->hd, l2_offset, l2_table, l2_size) != l2_size)
  1287 + goto fail;
  1288 + for(j = 0; j < s->l2_size; j++) {
  1289 + offset = be64_to_cpu(l2_table[j]);
  1290 + if (offset != 0) {
  1291 + old_offset = offset;
  1292 + offset &= ~QCOW_OFLAG_COPIED;
  1293 + if (offset & QCOW_OFLAG_COMPRESSED) {
  1294 + nb_csectors = ((offset >> s->csize_shift) &
  1295 + s->csize_mask) + 1;
  1296 + if (addend != 0)
  1297 + update_refcount(bs, (offset & s->cluster_offset_mask) & ~511,
  1298 + nb_csectors * 512, addend);
  1299 + /* compressed clusters are never modified */
  1300 + refcount = 2;
  1301 + } else {
  1302 + if (addend != 0) {
  1303 + refcount = update_cluster_refcount(bs, offset >> s->cluster_bits, addend);
  1304 + } else {
  1305 + refcount = get_refcount(bs, offset >> s->cluster_bits);
  1306 + }
  1307 + }
  1308 +
  1309 + if (refcount == 1) {
  1310 + offset |= QCOW_OFLAG_COPIED;
  1311 + }
  1312 + if (offset != old_offset) {
  1313 + l2_table[j] = cpu_to_be64(offset);
  1314 + l2_modified = 1;
  1315 + }
  1316 + }
  1317 + }
  1318 + if (l2_modified) {
  1319 + if (bdrv_pwrite(s->hd,
  1320 + l2_offset, l2_table, l2_size) != l2_size)
  1321 + goto fail;
  1322 + }
  1323 +
  1324 + if (addend != 0) {
  1325 + refcount = update_cluster_refcount(bs, l2_offset >> s->cluster_bits, addend);
  1326 + } else {
  1327 + refcount = get_refcount(bs, l2_offset >> s->cluster_bits);
  1328 + }
  1329 + if (refcount == 1) {
  1330 + l2_offset |= QCOW_OFLAG_COPIED;
  1331 + }
  1332 + if (l2_offset != old_l2_offset) {
  1333 + l1_table[i] = l2_offset;
  1334 + l1_modified = 1;
  1335 + }
  1336 + }
  1337 + }
  1338 + if (l1_modified) {
  1339 + for(i = 0; i < l1_size; i++)
  1340 + cpu_to_be64s(&l1_table[i]);
  1341 + if (bdrv_pwrite(s->hd, l1_table_offset, l1_table,
  1342 + l1_size2) != l1_size2)
  1343 + goto fail;
  1344 + for(i = 0; i < l1_size; i++)
  1345 + be64_to_cpus(&l1_table[i]);
  1346 + }
  1347 + if (l1_allocated)
  1348 + qemu_free(l1_table);
  1349 + qemu_free(l2_table);
  1350 + return 0;
  1351 + fail:
  1352 + if (l1_allocated)
  1353 + qemu_free(l1_table);
  1354 + qemu_free(l2_table);
  1355 + return -EIO;
  1356 +}
  1357 +
  1358 +static void qcow_free_snapshots(BlockDriverState *bs)
  1359 +{
  1360 + BDRVQcowState *s = bs->opaque;
  1361 + int i;
  1362 +
  1363 + for(i = 0; i < s->nb_snapshots; i++) {
  1364 + qemu_free(s->snapshots[i].name);
  1365 + qemu_free(s->snapshots[i].id_str);
  1366 + }
  1367 + qemu_free(s->snapshots);
  1368 + s->snapshots = NULL;
  1369 + s->nb_snapshots = 0;
  1370 +}
  1371 +
  1372 +static int qcow_read_snapshots(BlockDriverState *bs)
  1373 +{
  1374 + BDRVQcowState *s = bs->opaque;
  1375 + QCowSnapshotHeader h;
  1376 + QCowSnapshot *sn;
  1377 + int i, id_str_size, name_size;
  1378 + int64_t offset;
  1379 + uint32_t extra_data_size;
  1380 +
  1381 + offset = s->snapshots_offset;
  1382 + s->snapshots = qemu_mallocz(s->nb_snapshots * sizeof(QCowSnapshot));
  1383 + if (!s->snapshots)
  1384 + goto fail;
  1385 + for(i = 0; i < s->nb_snapshots; i++) {
  1386 + offset = align_offset(offset, 8);
  1387 + if (bdrv_pread(s->hd, offset, &h, sizeof(h)) != sizeof(h))
  1388 + goto fail;
  1389 + offset += sizeof(h);
  1390 + sn = s->snapshots + i;
  1391 + sn->l1_table_offset = be64_to_cpu(h.l1_table_offset);
  1392 + sn->l1_size = be32_to_cpu(h.l1_size);
  1393 + sn->vm_state_size = be32_to_cpu(h.vm_state_size);
  1394 + sn->date_sec = be32_to_cpu(h.date_sec);
  1395 + sn->date_nsec = be32_to_cpu(h.date_nsec);
  1396 + sn->vm_clock_nsec = be64_to_cpu(h.vm_clock_nsec);
  1397 + extra_data_size = be32_to_cpu(h.extra_data_size);
  1398 +
  1399 + id_str_size = be16_to_cpu(h.id_str_size);
  1400 + name_size = be16_to_cpu(h.name_size);
  1401 +
  1402 + offset += extra_data_size;
  1403 +
  1404 + sn->id_str = qemu_malloc(id_str_size + 1);
  1405 + if (!sn->id_str)
  1406 + goto fail;
  1407 + if (bdrv_pread(s->hd, offset, sn->id_str, id_str_size) != id_str_size)
  1408 + goto fail;
  1409 + offset += id_str_size;
  1410 + sn->id_str[id_str_size] = '\0';
  1411 +
  1412 + sn->name = qemu_malloc(name_size + 1);
  1413 + if (!sn->name)
  1414 + goto fail;
  1415 + if (bdrv_pread(s->hd, offset, sn->name, name_size) != name_size)
  1416 + goto fail;
  1417 + offset += name_size;
  1418 + sn->name[name_size] = '\0';
  1419 + }
  1420 + s->snapshots_size = offset - s->snapshots_offset;
  1421 + return 0;
  1422 + fail:
  1423 + qcow_free_snapshots(bs);
  1424 + return -1;
  1425 +}
  1426 +
  1427 +/* add at the end of the file a new list of snapshots */
  1428 +static int qcow_write_snapshots(BlockDriverState *bs)
  1429 +{
  1430 + BDRVQcowState *s = bs->opaque;
  1431 + QCowSnapshot *sn;
  1432 + QCowSnapshotHeader h;
  1433 + int i, name_size, id_str_size, snapshots_size;
  1434 + uint64_t data64;
  1435 + uint32_t data32;
  1436 + int64_t offset, snapshots_offset;
  1437 +
  1438 + /* compute the size of the snapshots */
  1439 + offset = 0;
  1440 + for(i = 0; i < s->nb_snapshots; i++) {
  1441 + sn = s->snapshots + i;
  1442 + offset = align_offset(offset, 8);
  1443 + offset += sizeof(h);
  1444 + offset += strlen(sn->id_str);
  1445 + offset += strlen(sn->name);
  1446 + }
  1447 + snapshots_size = offset;
  1448 +
  1449 + snapshots_offset = alloc_clusters(bs, snapshots_size);
  1450 + offset = snapshots_offset;
  1451 +
  1452 + for(i = 0; i < s->nb_snapshots; i++) {
  1453 + sn = s->snapshots + i;
  1454 + memset(&h, 0, sizeof(h));
  1455 + h.l1_table_offset = cpu_to_be64(sn->l1_table_offset);
  1456 + h.l1_size = cpu_to_be32(sn->l1_size);
  1457 + h.vm_state_size = cpu_to_be32(sn->vm_state_size);
  1458 + h.date_sec = cpu_to_be32(sn->date_sec);
  1459 + h.date_nsec = cpu_to_be32(sn->date_nsec);
  1460 + h.vm_clock_nsec = cpu_to_be64(sn->vm_clock_nsec);
  1461 +
  1462 + id_str_size = strlen(sn->id_str);
  1463 + name_size = strlen(sn->name);
  1464 + h.id_str_size = cpu_to_be16(id_str_size);
  1465 + h.name_size = cpu_to_be16(name_size);
  1466 + offset = align_offset(offset, 8);
  1467 + if (bdrv_pwrite(s->hd, offset, &h, sizeof(h)) != sizeof(h))
  1468 + goto fail;
  1469 + offset += sizeof(h);
  1470 + if (bdrv_pwrite(s->hd, offset, sn->id_str, id_str_size) != id_str_size)
  1471 + goto fail;
  1472 + offset += id_str_size;
  1473 + if (bdrv_pwrite(s->hd, offset, sn->name, name_size) != name_size)
  1474 + goto fail;
  1475 + offset += name_size;
  1476 + }
  1477 +
  1478 + /* update the various header fields */
  1479 + data64 = cpu_to_be64(snapshots_offset);
  1480 + if (bdrv_pwrite(s->hd, offsetof(QCowHeader, snapshots_offset),
  1481 + &data64, sizeof(data64)) != sizeof(data64))
  1482 + goto fail;
  1483 + data32 = cpu_to_be32(s->nb_snapshots);
  1484 + if (bdrv_pwrite(s->hd, offsetof(QCowHeader, nb_snapshots),
  1485 + &data32, sizeof(data32)) != sizeof(data32))
  1486 + goto fail;
  1487 +
  1488 + /* free the old snapshot table */
  1489 + free_clusters(bs, s->snapshots_offset, s->snapshots_size);
  1490 + s->snapshots_offset = snapshots_offset;
  1491 + s->snapshots_size = snapshots_size;
  1492 + return 0;
  1493 + fail:
  1494 + return -1;
  1495 +}
  1496 +
  1497 +static void find_new_snapshot_id(BlockDriverState *bs,
  1498 + char *id_str, int id_str_size)
  1499 +{
  1500 + BDRVQcowState *s = bs->opaque;
  1501 + QCowSnapshot *sn;
  1502 + int i, id, id_max = 0;
  1503 +
  1504 + for(i = 0; i < s->nb_snapshots; i++) {
  1505 + sn = s->snapshots + i;
  1506 + id = strtoul(sn->id_str, NULL, 10);
  1507 + if (id > id_max)
  1508 + id_max = id;
  1509 + }
  1510 + snprintf(id_str, id_str_size, "%d", id_max + 1);
  1511 +}
  1512 +
  1513 +static int find_snapshot_by_id(BlockDriverState *bs, const char *id_str)
  1514 +{
  1515 + BDRVQcowState *s = bs->opaque;
  1516 + int i;
  1517 +
  1518 + for(i = 0; i < s->nb_snapshots; i++) {
  1519 + if (!strcmp(s->snapshots[i].id_str, id_str))
  1520 + return i;
  1521 + }
  1522 + return -1;
  1523 +}
  1524 +
  1525 +static int find_snapshot_by_id_or_name(BlockDriverState *bs, const char *name)
  1526 +{
  1527 + BDRVQcowState *s = bs->opaque;
  1528 + int i, ret;
  1529 +
  1530 + ret = find_snapshot_by_id(bs, name);
  1531 + if (ret >= 0)
  1532 + return ret;
  1533 + for(i = 0; i < s->nb_snapshots; i++) {
  1534 + if (!strcmp(s->snapshots[i].name, name))
  1535 + return i;
  1536 + }
  1537 + return -1;
  1538 +}
  1539 +
  1540 +/* if no id is provided, a new one is constructed */
  1541 +static int qcow_snapshot_create(BlockDriverState *bs,
  1542 + QEMUSnapshotInfo *sn_info)
  1543 +{
  1544 + BDRVQcowState *s = bs->opaque;
  1545 + QCowSnapshot *snapshots1, sn1, *sn = &sn1;
  1546 + int i, ret;
  1547 + uint64_t *l1_table = NULL;
  1548 +
  1549 + memset(sn, 0, sizeof(*sn));
  1550 +
  1551 + if (sn_info->id_str[0] == '\0') {
  1552 + /* compute a new id */
  1553 + find_new_snapshot_id(bs, sn_info->id_str, sizeof(sn_info->id_str));
  1554 + }
  1555 +
  1556 + /* check that the ID is unique */
  1557 + if (find_snapshot_by_id(bs, sn_info->id_str) >= 0)
  1558 + return -ENOENT;
  1559 +
  1560 + sn->id_str = qemu_strdup(sn_info->id_str);
  1561 + if (!sn->id_str)
  1562 + goto fail;
  1563 + sn->name = qemu_strdup(sn_info->name);
  1564 + if (!sn->name)
  1565 + goto fail;
  1566 + sn->vm_state_size = sn_info->vm_state_size;
  1567 + sn->date_sec = sn_info->date_sec;
  1568 + sn->date_nsec = sn_info->date_nsec;
  1569 + sn->vm_clock_nsec = sn_info->vm_clock_nsec;
  1570 +
  1571 + ret = update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 1);
  1572 + if (ret < 0)
  1573 + goto fail;
  1574 +
  1575 + /* create the L1 table of the snapshot */
  1576 + sn->l1_table_offset = alloc_clusters(bs, s->l1_size * sizeof(uint64_t));
  1577 + sn->l1_size = s->l1_size;
  1578 +
  1579 + l1_table = qemu_malloc(s->l1_size * sizeof(uint64_t));
  1580 + if (!l1_table)
  1581 + goto fail;
  1582 + for(i = 0; i < s->l1_size; i++) {
  1583 + l1_table[i] = cpu_to_be64(s->l1_table[i]);
  1584 + }
  1585 + if (bdrv_pwrite(s->hd, sn->l1_table_offset,
  1586 + l1_table, s->l1_size * sizeof(uint64_t)) !=
  1587 + (s->l1_size * sizeof(uint64_t)))
  1588 + goto fail;
  1589 + qemu_free(l1_table);
  1590 + l1_table = NULL;
  1591 +
  1592 + snapshots1 = qemu_malloc((s->nb_snapshots + 1) * sizeof(QCowSnapshot));
  1593 + if (!snapshots1)
  1594 + goto fail;
  1595 + memcpy(snapshots1, s->snapshots, s->nb_snapshots * sizeof(QCowSnapshot));
  1596 + s->snapshots = snapshots1;
  1597 + s->snapshots[s->nb_snapshots++] = *sn;
  1598 +
  1599 + if (qcow_write_snapshots(bs) < 0)
  1600 + goto fail;
  1601 +#ifdef DEBUG_ALLOC
  1602 + check_refcounts(bs);
  1603 +#endif
  1604 + return 0;
  1605 + fail:
  1606 + qemu_free(sn->name);
  1607 + qemu_free(l1_table);
  1608 + return -1;
  1609 +}
  1610 +
  1611 +/* copy the snapshot 'snapshot_name' into the current disk image */
  1612 +static int qcow_snapshot_goto(BlockDriverState *bs,
  1613 + const char *snapshot_id)
  1614 +{
  1615 + BDRVQcowState *s = bs->opaque;
  1616 + QCowSnapshot *sn;
  1617 + int i, snapshot_index, l1_size2;
  1618 +
  1619 + snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_id);
  1620 + if (snapshot_index < 0)
  1621 + return -ENOENT;
  1622 + sn = &s->snapshots[snapshot_index];
  1623 +
  1624 + if (update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, -1) < 0)
  1625 + goto fail;
  1626 +
  1627 + if (grow_l1_table(bs, sn->l1_size) < 0)
  1628 + goto fail;
  1629 +
  1630 + s->l1_size = sn->l1_size;
  1631 + l1_size2 = s->l1_size * sizeof(uint64_t);
  1632 + /* copy the snapshot l1 table to the current l1 table */
  1633 + if (bdrv_pread(s->hd, sn->l1_table_offset,
  1634 + s->l1_table, l1_size2) != l1_size2)
  1635 + goto fail;
  1636 + if (bdrv_pwrite(s->hd, s->l1_table_offset,
  1637 + s->l1_table, l1_size2) != l1_size2)
  1638 + goto fail;
  1639 + for(i = 0;i < s->l1_size; i++) {
  1640 + be64_to_cpus(&s->l1_table[i]);
  1641 + }
  1642 +
  1643 + if (update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 1) < 0)
  1644 + goto fail;
  1645 +
  1646 +#ifdef DEBUG_ALLOC
  1647 + check_refcounts(bs);
  1648 +#endif
  1649 + return 0;
  1650 + fail:
  1651 + return -EIO;
  1652 +}
  1653 +
  1654 +static int qcow_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
  1655 +{
  1656 + BDRVQcowState *s = bs->opaque;
  1657 + QCowSnapshot *sn;
  1658 + int snapshot_index, ret;
  1659 +
  1660 + snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_id);
  1661 + if (snapshot_index < 0)
  1662 + return -ENOENT;
  1663 + sn = &s->snapshots[snapshot_index];
  1664 +
  1665 + ret = update_snapshot_refcount(bs, sn->l1_table_offset, sn->l1_size, -1);
  1666 + if (ret < 0)
  1667 + return ret;
  1668 + /* must update the copied flag on the current cluster offsets */
  1669 + ret = update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0);
  1670 + if (ret < 0)
  1671 + return ret;
  1672 + free_clusters(bs, sn->l1_table_offset, sn->l1_size * sizeof(uint64_t));
  1673 +
  1674 + qemu_free(sn->id_str);
  1675 + qemu_free(sn->name);
  1676 + memmove(sn, sn + 1, (s->nb_snapshots - snapshot_index - 1) * sizeof(*sn));
  1677 + s->nb_snapshots--;
  1678 + ret = qcow_write_snapshots(bs);
  1679 + if (ret < 0) {
  1680 + /* XXX: restore snapshot if error ? */
  1681 + return ret;
  1682 + }
  1683 +#ifdef DEBUG_ALLOC
  1684 + check_refcounts(bs);
  1685 +#endif
  1686 + return 0;
  1687 +}
  1688 +
  1689 +static int qcow_snapshot_list(BlockDriverState *bs,
  1690 + QEMUSnapshotInfo **psn_tab)
  1691 +{
  1692 + BDRVQcowState *s = bs->opaque;
  1693 + QEMUSnapshotInfo *sn_tab, *sn_info;
  1694 + QCowSnapshot *sn;
  1695 + int i;
  1696 +
  1697 + sn_tab = qemu_mallocz(s->nb_snapshots * sizeof(QEMUSnapshotInfo));
  1698 + if (!sn_tab)
  1699 + goto fail;
  1700 + for(i = 0; i < s->nb_snapshots; i++) {
  1701 + sn_info = sn_tab + i;
  1702 + sn = s->snapshots + i;
  1703 + pstrcpy(sn_info->id_str, sizeof(sn_info->id_str),
  1704 + sn->id_str);
  1705 + pstrcpy(sn_info->name, sizeof(sn_info->name),
  1706 + sn->name);
  1707 + sn_info->vm_state_size = sn->vm_state_size;
  1708 + sn_info->date_sec = sn->date_sec;
  1709 + sn_info->date_nsec = sn->date_nsec;
  1710 + sn_info->vm_clock_nsec = sn->vm_clock_nsec;
  1711 + }
  1712 + *psn_tab = sn_tab;
  1713 + return s->nb_snapshots;
  1714 + fail:
  1715 + qemu_free(sn_tab);
  1716 + *psn_tab = NULL;
  1717 + return -ENOMEM;
  1718 +}
  1719 +
  1720 +/*********************************************************/
  1721 +/* refcount handling */
  1722 +
  1723 +static int refcount_init(BlockDriverState *bs)
  1724 +{
  1725 + BDRVQcowState *s = bs->opaque;
  1726 + int ret, refcount_table_size2, i;
  1727 +
  1728 + s->refcount_block_cache = qemu_malloc(s->cluster_size);
  1729 + if (!s->refcount_block_cache)
  1730 + goto fail;
  1731 + refcount_table_size2 = s->refcount_table_size * sizeof(uint64_t);
  1732 + s->refcount_table = qemu_malloc(refcount_table_size2);
  1733 + if (!s->refcount_table)
  1734 + goto fail;
  1735 + if (s->refcount_table_size > 0) {
  1736 + ret = bdrv_pread(s->hd, s->refcount_table_offset,
  1737 + s->refcount_table, refcount_table_size2);
  1738 + if (ret != refcount_table_size2)
  1739 + goto fail;
  1740 + for(i = 0; i < s->refcount_table_size; i++)
  1741 + be64_to_cpus(&s->refcount_table[i]);
  1742 + }
  1743 + return 0;
  1744 + fail:
  1745 + return -ENOMEM;
  1746 +}
  1747 +
  1748 +static void refcount_close(BlockDriverState *bs)
  1749 +{
  1750 + BDRVQcowState *s = bs->opaque;
  1751 + qemu_free(s->refcount_block_cache);
  1752 + qemu_free(s->refcount_table);
  1753 +}
  1754 +
  1755 +
  1756 +static int load_refcount_block(BlockDriverState *bs,
  1757 + int64_t refcount_block_offset)
  1758 +{
  1759 + BDRVQcowState *s = bs->opaque;
  1760 + int ret;
  1761 + ret = bdrv_pread(s->hd, refcount_block_offset, s->refcount_block_cache,
  1762 + s->cluster_size);
  1763 + if (ret != s->cluster_size)
  1764 + return -EIO;
  1765 + s->refcount_block_cache_offset = refcount_block_offset;
  1766 + return 0;
  1767 +}
  1768 +
  1769 +static int get_refcount(BlockDriverState *bs, int64_t cluster_index)
  1770 +{
  1771 + BDRVQcowState *s = bs->opaque;
  1772 + int refcount_table_index, block_index;
  1773 + int64_t refcount_block_offset;
  1774 +
  1775 + refcount_table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
  1776 + if (refcount_table_index >= s->refcount_table_size)
  1777 + return 0;
  1778 + refcount_block_offset = s->refcount_table[refcount_table_index];
  1779 + if (!refcount_block_offset)
  1780 + return 0;
  1781 + if (refcount_block_offset != s->refcount_block_cache_offset) {
  1782 + /* better than nothing: return allocated if read error */
  1783 + if (load_refcount_block(bs, refcount_block_offset) < 0)
  1784 + return 1;
  1785 + }
  1786 + block_index = cluster_index &
  1787 + ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1);
  1788 + return be16_to_cpu(s->refcount_block_cache[block_index]);
  1789 +}
  1790 +
  1791 +/* return < 0 if error */
  1792 +static int64_t alloc_clusters_noref(BlockDriverState *bs, int64_t size)
  1793 +{
  1794 + BDRVQcowState *s = bs->opaque;
  1795 + int i, nb_clusters;
  1796 +
  1797 + nb_clusters = (size + s->cluster_size - 1) >> s->cluster_bits;
  1798 + for(;;) {
  1799 + if (get_refcount(bs, s->free_cluster_index) == 0) {
  1800 + s->free_cluster_index++;
  1801 + for(i = 1; i < nb_clusters; i++) {
  1802 + if (get_refcount(bs, s->free_cluster_index) != 0)
  1803 + goto not_found;
  1804 + s->free_cluster_index++;
  1805 + }
  1806 +#ifdef DEBUG_ALLOC2
  1807 + printf("alloc_clusters: size=%lld -> %lld\n",
  1808 + size,
  1809 + (s->free_cluster_index - nb_clusters) << s->cluster_bits);
  1810 +#endif
  1811 + return (s->free_cluster_index - nb_clusters) << s->cluster_bits;
  1812 + } else {
  1813 + not_found:
  1814 + s->free_cluster_index++;
  1815 + }
  1816 + }
  1817 +}
  1818 +
  1819 +static int64_t alloc_clusters(BlockDriverState *bs, int64_t size)
  1820 +{
  1821 + int64_t offset;
  1822 +
  1823 + offset = alloc_clusters_noref(bs, size);
  1824 + update_refcount(bs, offset, size, 1);
  1825 + return offset;
  1826 +}
  1827 +
  1828 +/* only used to allocate compressed sectors. We try to allocate
  1829 + contiguous sectors. size must be <= cluster_size */
  1830 +static int64_t alloc_bytes(BlockDriverState *bs, int size)
  1831 +{
  1832 + BDRVQcowState *s = bs->opaque;
  1833 + int64_t offset, cluster_offset;
  1834 + int free_in_cluster;
  1835 +
  1836 + assert(size > 0 && size <= s->cluster_size);
  1837 + if (s->free_byte_offset == 0) {
  1838 + s->free_byte_offset = alloc_clusters(bs, s->cluster_size);
  1839 + }
  1840 + redo:
  1841 + free_in_cluster = s->cluster_size -
  1842 + (s->free_byte_offset & (s->cluster_size - 1));
  1843 + if (size <= free_in_cluster) {
  1844 + /* enough space in current cluster */
  1845 + offset = s->free_byte_offset;
  1846 + s->free_byte_offset += size;
  1847 + free_in_cluster -= size;
  1848 + if (free_in_cluster == 0)
  1849 + s->free_byte_offset = 0;
  1850 + if ((offset & (s->cluster_size - 1)) != 0)
  1851 + update_cluster_refcount(bs, offset >> s->cluster_bits, 1);
  1852 + } else {
  1853 + offset = alloc_clusters(bs, s->cluster_size);
  1854 + cluster_offset = s->free_byte_offset & ~(s->cluster_size - 1);
  1855 + if ((cluster_offset + s->cluster_size) == offset) {
  1856 + /* we are lucky: contiguous data */
  1857 + offset = s->free_byte_offset;
  1858 + update_cluster_refcount(bs, offset >> s->cluster_bits, 1);
  1859 + s->free_byte_offset += size;
  1860 + } else {
  1861 + s->free_byte_offset = offset;
  1862 + goto redo;
  1863 + }
  1864 + }
  1865 + return offset;
  1866 +}
  1867 +
  1868 +static void free_clusters(BlockDriverState *bs,
  1869 + int64_t offset, int64_t size)
  1870 +{
  1871 + update_refcount(bs, offset, size, -1);
  1872 +}
  1873 +
  1874 +static int grow_refcount_table(BlockDriverState *bs, int min_size)
  1875 +{
  1876 + BDRVQcowState *s = bs->opaque;
  1877 + int new_table_size, new_table_size2, refcount_table_clusters, i, ret;
  1878 + uint64_t *new_table;
  1879 + int64_t table_offset;
  1880 + uint64_t data64;
  1881 + uint32_t data32;
  1882 +
  1883 + if (min_size <= s->refcount_table_size)
  1884 + return 0;
  1885 + /* compute new table size */
  1886 + refcount_table_clusters = s->refcount_table_size >> (s->cluster_bits - 3);
  1887 + for(;;) {
  1888 + if (refcount_table_clusters == 0) {
  1889 + refcount_table_clusters = 1;
  1890 + } else {
  1891 + refcount_table_clusters = (refcount_table_clusters * 3 + 1) / 2;
  1892 + }
  1893 + new_table_size = refcount_table_clusters << (s->cluster_bits - 3);
  1894 + if (min_size <= new_table_size)
  1895 + break;
  1896 + }
  1897 +
  1898 + new_table_size2 = new_table_size * sizeof(uint64_t);
  1899 + new_table = qemu_mallocz(new_table_size2);
  1900 + if (!new_table)
  1901 + return -ENOMEM;
  1902 + memcpy(new_table, s->refcount_table,
  1903 + s->refcount_table_size * sizeof(uint64_t));
  1904 + for(i = 0; i < s->refcount_table_size; i++)
  1905 + cpu_to_be64s(&new_table[i]);
  1906 + /* Note: we cannot update the refcount now to avoid recursion */
  1907 + table_offset = alloc_clusters_noref(bs, new_table_size2);
  1908 + ret = bdrv_pwrite(s->hd, table_offset, new_table, new_table_size2);
  1909 + if (ret != new_table_size2)
  1910 + goto fail;
  1911 + for(i = 0; i < s->refcount_table_size; i++)
  1912 + be64_to_cpus(&new_table[i]);
  1913 +
  1914 + data64 = cpu_to_be64(table_offset);
  1915 + if (bdrv_pwrite(s->hd, offsetof(QCowHeader, refcount_table_offset),
  1916 + &data64, sizeof(data64)) != sizeof(data64))
  1917 + goto fail;
  1918 + data32 = cpu_to_be32(refcount_table_clusters);
  1919 + if (bdrv_pwrite(s->hd, offsetof(QCowHeader, refcount_table_clusters),
  1920 + &data32, sizeof(data32)) != sizeof(data32))
  1921 + goto fail;
  1922 + qemu_free(s->refcount_table);
  1923 + s->refcount_table = new_table;
  1924 + s->refcount_table_size = new_table_size;
  1925 +
  1926 + update_refcount(bs, table_offset, new_table_size2, 1);
  1927 + return 0;
  1928 + fail:
  1929 + free_clusters(bs, table_offset, new_table_size2);
  1930 + qemu_free(new_table);
  1931 + return -EIO;
  1932 +}
  1933 +
  1934 +/* addend must be 1 or -1 */
  1935 +/* XXX: cache several refcount block clusters ? */
  1936 +static int update_cluster_refcount(BlockDriverState *bs,
  1937 + int64_t cluster_index,
  1938 + int addend)
  1939 +{
  1940 + BDRVQcowState *s = bs->opaque;
  1941 + int64_t offset, refcount_block_offset;
  1942 + int ret, refcount_table_index, block_index, refcount;
  1943 + uint64_t data64;
  1944 +
  1945 + refcount_table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
  1946 + if (refcount_table_index >= s->refcount_table_size) {
  1947 + if (addend < 0)
  1948 + return -EINVAL;
  1949 + ret = grow_refcount_table(bs, refcount_table_index + 1);
  1950 + if (ret < 0)
  1951 + return ret;
  1952 + }
  1953 + refcount_block_offset = s->refcount_table[refcount_table_index];
  1954 + if (!refcount_block_offset) {
  1955 + if (addend < 0)
  1956 + return -EINVAL;
  1957 + /* create a new refcount block */
  1958 + /* Note: we cannot update the refcount now to avoid recursion */
  1959 + offset = alloc_clusters_noref(bs, s->cluster_size);
  1960 + memset(s->refcount_block_cache, 0, s->cluster_size);
  1961 + ret = bdrv_pwrite(s->hd, offset, s->refcount_block_cache, s->cluster_size);
  1962 + if (ret != s->cluster_size)
  1963 + return -EINVAL;
  1964 + s->refcount_table[refcount_table_index] = offset;
  1965 + data64 = cpu_to_be64(offset);
  1966 + ret = bdrv_pwrite(s->hd, s->refcount_table_offset +
  1967 + refcount_table_index * sizeof(uint64_t),
  1968 + &data64, sizeof(data64));
  1969 + if (ret != sizeof(data64))
  1970 + return -EINVAL;
  1971 +
  1972 + refcount_block_offset = offset;
  1973 + s->refcount_block_cache_offset = offset;
  1974 + update_refcount(bs, offset, s->cluster_size, 1);
  1975 + } else {
  1976 + if (refcount_block_offset != s->refcount_block_cache_offset) {
  1977 + if (load_refcount_block(bs, refcount_block_offset) < 0)
  1978 + return -EIO;
  1979 + }
  1980 + }
  1981 + /* we can update the count and save it */
  1982 + block_index = cluster_index &
  1983 + ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1);
  1984 + refcount = be16_to_cpu(s->refcount_block_cache[block_index]);
  1985 + refcount += addend;
  1986 + if (refcount < 0 || refcount > 0xffff)
  1987 + return -EINVAL;
  1988 + if (refcount == 0 && cluster_index < s->free_cluster_index) {
  1989 + s->free_cluster_index = cluster_index;
  1990 + }
  1991 + s->refcount_block_cache[block_index] = cpu_to_be16(refcount);
  1992 + if (bdrv_pwrite(s->hd,
  1993 + refcount_block_offset + (block_index << REFCOUNT_SHIFT),
  1994 + &s->refcount_block_cache[block_index], 2) != 2)
  1995 + return -EIO;
  1996 + return refcount;
  1997 +}
  1998 +
  1999 +static void update_refcount(BlockDriverState *bs,
  2000 + int64_t offset, int64_t length,
  2001 + int addend)
  2002 +{
  2003 + BDRVQcowState *s = bs->opaque;
  2004 + int64_t start, last, cluster_offset;
  2005 +
  2006 +#ifdef DEBUG_ALLOC2
  2007 + printf("update_refcount: offset=%lld size=%lld addend=%d\n",
  2008 + offset, length, addend);
  2009 +#endif
  2010 + if (length <= 0)
  2011 + return;
  2012 + start = offset & ~(s->cluster_size - 1);
  2013 + last = (offset + length - 1) & ~(s->cluster_size - 1);
  2014 + for(cluster_offset = start; cluster_offset <= last;
  2015 + cluster_offset += s->cluster_size) {
  2016 + update_cluster_refcount(bs, cluster_offset >> s->cluster_bits, addend);
  2017 + }
  2018 +}
  2019 +
  2020 +#ifdef DEBUG_ALLOC
  2021 +static void inc_refcounts(BlockDriverState *bs,
  2022 + uint16_t *refcount_table,
  2023 + int refcount_table_size,
  2024 + int64_t offset, int64_t size)
  2025 +{
  2026 + BDRVQcowState *s = bs->opaque;
  2027 + int64_t start, last, cluster_offset;
  2028 + int k;
  2029 +
  2030 + if (size <= 0)
  2031 + return;
  2032 +
  2033 + start = offset & ~(s->cluster_size - 1);
  2034 + last = (offset + size - 1) & ~(s->cluster_size - 1);
  2035 + for(cluster_offset = start; cluster_offset <= last;
  2036 + cluster_offset += s->cluster_size) {
  2037 + k = cluster_offset >> s->cluster_bits;
  2038 + if (k < 0 || k >= refcount_table_size) {
  2039 + printf("ERROR: invalid cluster offset=0x%llx\n", cluster_offset);
  2040 + } else {
  2041 + if (++refcount_table[k] == 0) {
  2042 + printf("ERROR: overflow cluster offset=0x%llx\n", cluster_offset);
  2043 + }
  2044 + }
  2045 + }
  2046 +}
  2047 +
  2048 +static int check_refcounts_l1(BlockDriverState *bs,
  2049 + uint16_t *refcount_table,
  2050 + int refcount_table_size,
  2051 + int64_t l1_table_offset, int l1_size,
  2052 + int check_copied)
  2053 +{
  2054 + BDRVQcowState *s = bs->opaque;
  2055 + uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2;
  2056 + int l2_size, i, j, nb_csectors, refcount;
  2057 +
  2058 + l2_table = NULL;
  2059 + l1_size2 = l1_size * sizeof(uint64_t);
  2060 +
  2061 + inc_refcounts(bs, refcount_table, refcount_table_size,
  2062 + l1_table_offset, l1_size2);
  2063 +
  2064 + l1_table = qemu_malloc(l1_size2);
  2065 + if (!l1_table)
  2066 + goto fail;
  2067 + if (bdrv_pread(s->hd, l1_table_offset,
  2068 + l1_table, l1_size2) != l1_size2)
  2069 + goto fail;
  2070 + for(i = 0;i < l1_size; i++)
  2071 + be64_to_cpus(&l1_table[i]);
  2072 +
  2073 + l2_size = s->l2_size * sizeof(uint64_t);
  2074 + l2_table = qemu_malloc(l2_size);
  2075 + if (!l2_table)
  2076 + goto fail;
  2077 + for(i = 0; i < l1_size; i++) {
  2078 + l2_offset = l1_table[i];
  2079 + if (l2_offset) {
  2080 + if (check_copied) {
  2081 + refcount = get_refcount(bs, (l2_offset & ~QCOW_OFLAG_COPIED) >> s->cluster_bits);
  2082 + if ((refcount == 1) != ((l2_offset & QCOW_OFLAG_COPIED) != 0)) {
  2083 + printf("ERROR OFLAG_COPIED: l2_offset=%llx refcount=%d\n",
  2084 + l2_offset, refcount);
  2085 + }
  2086 + }
  2087 + l2_offset &= ~QCOW_OFLAG_COPIED;
  2088 + if (bdrv_pread(s->hd, l2_offset, l2_table, l2_size) != l2_size)
  2089 + goto fail;
  2090 + for(j = 0; j < s->l2_size; j++) {
  2091 + offset = be64_to_cpu(l2_table[j]);
  2092 + if (offset != 0) {
  2093 + if (offset & QCOW_OFLAG_COMPRESSED) {
  2094 + if (offset & QCOW_OFLAG_COPIED) {
  2095 + printf("ERROR: cluster %lld: copied flag must never be set for compressed clusters\n",
  2096 + offset >> s->cluster_bits);
  2097 + offset &= ~QCOW_OFLAG_COPIED;
  2098 + }
  2099 + nb_csectors = ((offset >> s->csize_shift) &
  2100 + s->csize_mask) + 1;
  2101 + offset &= s->cluster_offset_mask;
  2102 + inc_refcounts(bs, refcount_table,
  2103 + refcount_table_size,
  2104 + offset & ~511, nb_csectors * 512);
  2105 + } else {
  2106 + if (check_copied) {
  2107 + refcount = get_refcount(bs, (offset & ~QCOW_OFLAG_COPIED) >> s->cluster_bits);
  2108 + if ((refcount == 1) != ((offset & QCOW_OFLAG_COPIED) != 0)) {
  2109 + printf("ERROR OFLAG_COPIED: offset=%llx refcount=%d\n",
  2110 + offset, refcount);
  2111 + }
  2112 + }
  2113 + offset &= ~QCOW_OFLAG_COPIED;
  2114 + inc_refcounts(bs, refcount_table,
  2115 + refcount_table_size,
  2116 + offset, s->cluster_size);
  2117 + }
  2118 + }
  2119 + }
  2120 + inc_refcounts(bs, refcount_table,
  2121 + refcount_table_size,
  2122 + l2_offset,
  2123 + s->cluster_size);
  2124 + }
  2125 + }
  2126 + qemu_free(l1_table);
  2127 + qemu_free(l2_table);
  2128 + return 0;
  2129 + fail:
  2130 + printf("ERROR: I/O error in check_refcounts_l1\n");
  2131 + qemu_free(l1_table);
  2132 + qemu_free(l2_table);
  2133 + return -EIO;
  2134 +}
  2135 +
  2136 +static void check_refcounts(BlockDriverState *bs)
  2137 +{
  2138 + BDRVQcowState *s = bs->opaque;
  2139 + int64_t size;
  2140 + int nb_clusters, refcount1, refcount2, i;
  2141 + QCowSnapshot *sn;
  2142 + uint16_t *refcount_table;
  2143 +
  2144 + size = bdrv_getlength(s->hd);
  2145 + nb_clusters = (size + s->cluster_size - 1) >> s->cluster_bits;
  2146 + refcount_table = qemu_mallocz(nb_clusters * sizeof(uint16_t));
  2147 +
  2148 + /* header */
  2149 + inc_refcounts(bs, refcount_table, nb_clusters,
  2150 + 0, s->cluster_size);
  2151 +
  2152 + check_refcounts_l1(bs, refcount_table, nb_clusters,
  2153 + s->l1_table_offset, s->l1_size, 1);
  2154 +
  2155 + /* snapshots */
  2156 + for(i = 0; i < s->nb_snapshots; i++) {
  2157 + sn = s->snapshots + i;
  2158 + check_refcounts_l1(bs, refcount_table, nb_clusters,
  2159 + sn->l1_table_offset, sn->l1_size, 0);
  2160 + }
  2161 + inc_refcounts(bs, refcount_table, nb_clusters,
  2162 + s->snapshots_offset, s->snapshots_size);
  2163 +
  2164 + /* refcount data */
  2165 + inc_refcounts(bs, refcount_table, nb_clusters,
  2166 + s->refcount_table_offset,
  2167 + s->refcount_table_size * sizeof(uint64_t));
  2168 + for(i = 0; i < s->refcount_table_size; i++) {
  2169 + int64_t offset;
  2170 + offset = s->refcount_table[i];
  2171 + if (offset != 0) {
  2172 + inc_refcounts(bs, refcount_table, nb_clusters,
  2173 + offset, s->cluster_size);
  2174 + }
  2175 + }
  2176 +
  2177 + /* compare ref counts */
  2178 + for(i = 0; i < nb_clusters; i++) {
  2179 + refcount1 = get_refcount(bs, i);
  2180 + refcount2 = refcount_table[i];
  2181 + if (refcount1 != refcount2)
  2182 + printf("ERROR cluster %d refcount=%d reference=%d\n",
  2183 + i, refcount1, refcount2);
  2184 + }
  2185 +
  2186 + qemu_free(refcount_table);
  2187 +}
  2188 +
  2189 +#if 0
  2190 +static void dump_refcounts(BlockDriverState *bs)
  2191 +{
  2192 + BDRVQcowState *s = bs->opaque;
  2193 + int64_t nb_clusters, k, k1, size;
  2194 + int refcount;
  2195 +
  2196 + size = bdrv_getlength(s->hd);
  2197 + nb_clusters = (size + s->cluster_size - 1) >> s->cluster_bits;
  2198 + for(k = 0; k < nb_clusters;) {
  2199 + k1 = k;
  2200 + refcount = get_refcount(bs, k);
  2201 + k++;
  2202 + while (k < nb_clusters && get_refcount(bs, k) == refcount)
  2203 + k++;
  2204 + printf("%lld: refcount=%d nb=%lld\n", k, refcount, k - k1);
  2205 + }
  2206 +}
  2207 +#endif
  2208 +#endif
  2209 +
  2210 +BlockDriver bdrv_qcow2 = {
  2211 + "qcow2",
  2212 + sizeof(BDRVQcowState),
  2213 + qcow_probe,
  2214 + qcow_open,
  2215 + NULL,
  2216 + NULL,
  2217 + qcow_close,
  2218 + qcow_create,
  2219 + qcow_flush,
  2220 + qcow_is_allocated,
  2221 + qcow_set_key,
  2222 + qcow_make_empty,
  2223 +
  2224 + .bdrv_aio_new = qcow_aio_new,
  2225 + .bdrv_aio_read = qcow_aio_read,
  2226 + .bdrv_aio_write = qcow_aio_write,
  2227 + .bdrv_aio_cancel = qcow_aio_cancel,
  2228 + .bdrv_aio_delete = qcow_aio_delete,
  2229 + .bdrv_write_compressed = qcow_write_compressed,
  2230 +
  2231 + .bdrv_snapshot_create = qcow_snapshot_create,
  2232 + .bdrv_snapshot_goto = qcow_snapshot_goto,
  2233 + .bdrv_snapshot_delete = qcow_snapshot_delete,
  2234 + .bdrv_snapshot_list = qcow_snapshot_list,
  2235 + .bdrv_get_info = qcow_get_info,
  2236 +};
... ...