Commit f7d0fe0239e438c396f5602e18aaf1d615998dfe

Authored by Kevin Wolf
Committed by Anthony Liguori
1 parent 9ccb258e

qcow2: Split out refcount handling

qcow2-refcount.c contains all functions which are related to cluster
allocation and management in the image file. A large part of this is the
reference counting of these clusters.

Also a header file qcow2.h is introduced which will contain the interface of
the split qcow2 modules.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
Makefile
@@ -68,7 +68,8 @@ recurse-all: $(SUBDIR_RULES) @@ -68,7 +68,8 @@ recurse-all: $(SUBDIR_RULES)
68 BLOCK_OBJS=cutils.o cache-utils.o qemu-malloc.o qemu-option.o module.o 68 BLOCK_OBJS=cutils.o cache-utils.o qemu-malloc.o qemu-option.o module.o
69 BLOCK_OBJS+=block/cow.o block/qcow.o aes.o block/vmdk.o block/cloop.o 69 BLOCK_OBJS+=block/cow.o block/qcow.o aes.o block/vmdk.o block/cloop.o
70 BLOCK_OBJS+=block/dmg.o block/bochs.o block/vpc.o block/vvfat.o 70 BLOCK_OBJS+=block/dmg.o block/bochs.o block/vpc.o block/vvfat.o
71 -BLOCK_OBJS+=block/qcow2.o block/parallels.o block/nbd.o 71 +BLOCK_OBJS+=block/qcow2.o block/qcow2-refcount.o
  72 +BLOCK_OBJS+=block/parallels.o block/nbd.o
72 BLOCK_OBJS+=nbd.o block.o aio.o 73 BLOCK_OBJS+=nbd.o block.o aio.o
73 74
74 ifdef CONFIG_WIN32 75 ifdef CONFIG_WIN32
block/qcow2-refcount.c 0 → 100644
  1 +/*
  2 + * Block driver for the QCOW version 2 format
  3 + *
  4 + * Copyright (c) 2004-2006 Fabrice Bellard
  5 + *
  6 + * Permission is hereby granted, free of charge, to any person obtaining a copy
  7 + * of this software and associated documentation files (the "Software"), to deal
  8 + * in the Software without restriction, including without limitation the rights
  9 + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10 + * copies of the Software, and to permit persons to whom the Software is
  11 + * furnished to do so, subject to the following conditions:
  12 + *
  13 + * The above copyright notice and this permission notice shall be included in
  14 + * all copies or substantial portions of the Software.
  15 + *
  16 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19 + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22 + * THE SOFTWARE.
  23 + */
  24 +
  25 +#include "qemu-common.h"
  26 +#include "block_int.h"
  27 +#include "block/qcow2.h"
  28 +
  29 +static int64_t alloc_clusters_noref(BlockDriverState *bs, int64_t size);
  30 +static int update_refcount(BlockDriverState *bs,
  31 + int64_t offset, int64_t length,
  32 + int addend);
  33 +
  34 +/*********************************************************/
  35 +/* refcount handling */
  36 +
  37 +int refcount_init(BlockDriverState *bs)
  38 +{
  39 + BDRVQcowState *s = bs->opaque;
  40 + int ret, refcount_table_size2, i;
  41 +
  42 + s->refcount_block_cache = qemu_malloc(s->cluster_size);
  43 + refcount_table_size2 = s->refcount_table_size * sizeof(uint64_t);
  44 + s->refcount_table = qemu_malloc(refcount_table_size2);
  45 + if (s->refcount_table_size > 0) {
  46 + ret = bdrv_pread(s->hd, s->refcount_table_offset,
  47 + s->refcount_table, refcount_table_size2);
  48 + if (ret != refcount_table_size2)
  49 + goto fail;
  50 + for(i = 0; i < s->refcount_table_size; i++)
  51 + be64_to_cpus(&s->refcount_table[i]);
  52 + }
  53 + return 0;
  54 + fail:
  55 + return -ENOMEM;
  56 +}
  57 +
  58 +void refcount_close(BlockDriverState *bs)
  59 +{
  60 + BDRVQcowState *s = bs->opaque;
  61 + qemu_free(s->refcount_block_cache);
  62 + qemu_free(s->refcount_table);
  63 +}
  64 +
  65 +
  66 +static int load_refcount_block(BlockDriverState *bs,
  67 + int64_t refcount_block_offset)
  68 +{
  69 + BDRVQcowState *s = bs->opaque;
  70 + int ret;
  71 + ret = bdrv_pread(s->hd, refcount_block_offset, s->refcount_block_cache,
  72 + s->cluster_size);
  73 + if (ret != s->cluster_size)
  74 + return -EIO;
  75 + s->refcount_block_cache_offset = refcount_block_offset;
  76 + return 0;
  77 +}
  78 +
  79 +static int get_refcount(BlockDriverState *bs, int64_t cluster_index)
  80 +{
  81 + BDRVQcowState *s = bs->opaque;
  82 + int refcount_table_index, block_index;
  83 + int64_t refcount_block_offset;
  84 +
  85 + refcount_table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
  86 + if (refcount_table_index >= s->refcount_table_size)
  87 + return 0;
  88 + refcount_block_offset = s->refcount_table[refcount_table_index];
  89 + if (!refcount_block_offset)
  90 + return 0;
  91 + if (refcount_block_offset != s->refcount_block_cache_offset) {
  92 + /* better than nothing: return allocated if read error */
  93 + if (load_refcount_block(bs, refcount_block_offset) < 0)
  94 + return 1;
  95 + }
  96 + block_index = cluster_index &
  97 + ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1);
  98 + return be16_to_cpu(s->refcount_block_cache[block_index]);
  99 +}
  100 +
  101 +static int grow_refcount_table(BlockDriverState *bs, int min_size)
  102 +{
  103 + BDRVQcowState *s = bs->opaque;
  104 + int new_table_size, new_table_size2, refcount_table_clusters, i, ret;
  105 + uint64_t *new_table;
  106 + int64_t table_offset;
  107 + uint8_t data[12];
  108 + int old_table_size;
  109 + int64_t old_table_offset;
  110 +
  111 + if (min_size <= s->refcount_table_size)
  112 + return 0;
  113 + /* compute new table size */
  114 + refcount_table_clusters = s->refcount_table_size >> (s->cluster_bits - 3);
  115 + for(;;) {
  116 + if (refcount_table_clusters == 0) {
  117 + refcount_table_clusters = 1;
  118 + } else {
  119 + refcount_table_clusters = (refcount_table_clusters * 3 + 1) / 2;
  120 + }
  121 + new_table_size = refcount_table_clusters << (s->cluster_bits - 3);
  122 + if (min_size <= new_table_size)
  123 + break;
  124 + }
  125 +#ifdef DEBUG_ALLOC2
  126 + printf("grow_refcount_table from %d to %d\n",
  127 + s->refcount_table_size,
  128 + new_table_size);
  129 +#endif
  130 + new_table_size2 = new_table_size * sizeof(uint64_t);
  131 + new_table = qemu_mallocz(new_table_size2);
  132 + memcpy(new_table, s->refcount_table,
  133 + s->refcount_table_size * sizeof(uint64_t));
  134 + for(i = 0; i < s->refcount_table_size; i++)
  135 + cpu_to_be64s(&new_table[i]);
  136 + /* Note: we cannot update the refcount now to avoid recursion */
  137 + table_offset = alloc_clusters_noref(bs, new_table_size2);
  138 + ret = bdrv_pwrite(s->hd, table_offset, new_table, new_table_size2);
  139 + if (ret != new_table_size2)
  140 + goto fail;
  141 + for(i = 0; i < s->refcount_table_size; i++)
  142 + be64_to_cpus(&new_table[i]);
  143 +
  144 + cpu_to_be64w((uint64_t*)data, table_offset);
  145 + cpu_to_be32w((uint32_t*)(data + 8), refcount_table_clusters);
  146 + if (bdrv_pwrite(s->hd, offsetof(QCowHeader, refcount_table_offset),
  147 + data, sizeof(data)) != sizeof(data))
  148 + goto fail;
  149 + qemu_free(s->refcount_table);
  150 + old_table_offset = s->refcount_table_offset;
  151 + old_table_size = s->refcount_table_size;
  152 + s->refcount_table = new_table;
  153 + s->refcount_table_size = new_table_size;
  154 + s->refcount_table_offset = table_offset;
  155 +
  156 + update_refcount(bs, table_offset, new_table_size2, 1);
  157 + free_clusters(bs, old_table_offset, old_table_size * sizeof(uint64_t));
  158 + return 0;
  159 + fail:
  160 + free_clusters(bs, table_offset, new_table_size2);
  161 + qemu_free(new_table);
  162 + return -EIO;
  163 +}
  164 +
  165 +
  166 +static int64_t alloc_refcount_block(BlockDriverState *bs, int64_t cluster_index)
  167 +{
  168 + BDRVQcowState *s = bs->opaque;
  169 + int64_t offset, refcount_block_offset;
  170 + int ret, refcount_table_index;
  171 + uint64_t data64;
  172 +
  173 + /* Find L1 index and grow refcount table if needed */
  174 + refcount_table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
  175 + if (refcount_table_index >= s->refcount_table_size) {
  176 + ret = grow_refcount_table(bs, refcount_table_index + 1);
  177 + if (ret < 0)
  178 + return ret;
  179 + }
  180 +
  181 + /* Load or allocate the refcount block */
  182 + refcount_block_offset = s->refcount_table[refcount_table_index];
  183 + if (!refcount_block_offset) {
  184 + /* create a new refcount block */
  185 + /* Note: we cannot update the refcount now to avoid recursion */
  186 + offset = alloc_clusters_noref(bs, s->cluster_size);
  187 + memset(s->refcount_block_cache, 0, s->cluster_size);
  188 + ret = bdrv_pwrite(s->hd, offset, s->refcount_block_cache, s->cluster_size);
  189 + if (ret != s->cluster_size)
  190 + return -EINVAL;
  191 + s->refcount_table[refcount_table_index] = offset;
  192 + data64 = cpu_to_be64(offset);
  193 + ret = bdrv_pwrite(s->hd, s->refcount_table_offset +
  194 + refcount_table_index * sizeof(uint64_t),
  195 + &data64, sizeof(data64));
  196 + if (ret != sizeof(data64))
  197 + return -EINVAL;
  198 +
  199 + refcount_block_offset = offset;
  200 + s->refcount_block_cache_offset = offset;
  201 + update_refcount(bs, offset, s->cluster_size, 1);
  202 + } else {
  203 + if (refcount_block_offset != s->refcount_block_cache_offset) {
  204 + if (load_refcount_block(bs, refcount_block_offset) < 0)
  205 + return -EIO;
  206 + }
  207 + }
  208 +
  209 + return refcount_block_offset;
  210 +}
  211 +
  212 +/* XXX: cache several refcount block clusters ? */
  213 +static int update_refcount(BlockDriverState *bs,
  214 + int64_t offset, int64_t length,
  215 + int addend)
  216 +{
  217 + BDRVQcowState *s = bs->opaque;
  218 + int64_t start, last, cluster_offset;
  219 + int64_t refcount_block_offset = 0;
  220 + int64_t table_index = -1, old_table_index;
  221 + int first_index = -1, last_index = -1;
  222 +
  223 +#ifdef DEBUG_ALLOC2
  224 + printf("update_refcount: offset=%lld size=%lld addend=%d\n",
  225 + offset, length, addend);
  226 +#endif
  227 + if (length <= 0)
  228 + return -EINVAL;
  229 + start = offset & ~(s->cluster_size - 1);
  230 + last = (offset + length - 1) & ~(s->cluster_size - 1);
  231 + for(cluster_offset = start; cluster_offset <= last;
  232 + cluster_offset += s->cluster_size)
  233 + {
  234 + int block_index, refcount;
  235 + int64_t cluster_index = cluster_offset >> s->cluster_bits;
  236 +
  237 + /* Only write refcount block to disk when we are done with it */
  238 + old_table_index = table_index;
  239 + table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
  240 + if ((old_table_index >= 0) && (table_index != old_table_index)) {
  241 + size_t size = (last_index - first_index + 1) << REFCOUNT_SHIFT;
  242 + if (bdrv_pwrite(s->hd,
  243 + refcount_block_offset + (first_index << REFCOUNT_SHIFT),
  244 + &s->refcount_block_cache[first_index], size) != size)
  245 + {
  246 + return -EIO;
  247 + }
  248 +
  249 + first_index = -1;
  250 + last_index = -1;
  251 + }
  252 +
  253 + /* Load the refcount block and allocate it if needed */
  254 + refcount_block_offset = alloc_refcount_block(bs, cluster_index);
  255 + if (refcount_block_offset < 0) {
  256 + return refcount_block_offset;
  257 + }
  258 +
  259 + /* we can update the count and save it */
  260 + block_index = cluster_index &
  261 + ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1);
  262 + if (first_index == -1 || block_index < first_index) {
  263 + first_index = block_index;
  264 + }
  265 + if (block_index > last_index) {
  266 + last_index = block_index;
  267 + }
  268 +
  269 + refcount = be16_to_cpu(s->refcount_block_cache[block_index]);
  270 + refcount += addend;
  271 + if (refcount < 0 || refcount > 0xffff)
  272 + return -EINVAL;
  273 + if (refcount == 0 && cluster_index < s->free_cluster_index) {
  274 + s->free_cluster_index = cluster_index;
  275 + }
  276 + s->refcount_block_cache[block_index] = cpu_to_be16(refcount);
  277 + }
  278 +
  279 + /* Write last changed block to disk */
  280 + if (refcount_block_offset != 0) {
  281 + size_t size = (last_index - first_index + 1) << REFCOUNT_SHIFT;
  282 + if (bdrv_pwrite(s->hd,
  283 + refcount_block_offset + (first_index << REFCOUNT_SHIFT),
  284 + &s->refcount_block_cache[first_index], size) != size)
  285 + {
  286 + return -EIO;
  287 + }
  288 + }
  289 +
  290 + return 0;
  291 +}
  292 +
  293 +/* addend must be 1 or -1 */
  294 +static int update_cluster_refcount(BlockDriverState *bs,
  295 + int64_t cluster_index,
  296 + int addend)
  297 +{
  298 + BDRVQcowState *s = bs->opaque;
  299 + int ret;
  300 +
  301 + ret = update_refcount(bs, cluster_index << s->cluster_bits, 1, addend);
  302 + if (ret < 0) {
  303 + return ret;
  304 + }
  305 +
  306 + return get_refcount(bs, cluster_index);
  307 +}
  308 +
  309 +
  310 +
  311 +/*********************************************************/
  312 +/* cluster allocation functions */
  313 +
  314 +
  315 +
  316 +/* return < 0 if error */
  317 +static int64_t alloc_clusters_noref(BlockDriverState *bs, int64_t size)
  318 +{
  319 + BDRVQcowState *s = bs->opaque;
  320 + int i, nb_clusters;
  321 +
  322 + nb_clusters = size_to_clusters(s, size);
  323 +retry:
  324 + for(i = 0; i < nb_clusters; i++) {
  325 + int64_t i = s->free_cluster_index++;
  326 + if (get_refcount(bs, i) != 0)
  327 + goto retry;
  328 + }
  329 +#ifdef DEBUG_ALLOC2
  330 + printf("alloc_clusters: size=%lld -> %lld\n",
  331 + size,
  332 + (s->free_cluster_index - nb_clusters) << s->cluster_bits);
  333 +#endif
  334 + return (s->free_cluster_index - nb_clusters) << s->cluster_bits;
  335 +}
  336 +
  337 +int64_t alloc_clusters(BlockDriverState *bs, int64_t size)
  338 +{
  339 + int64_t offset;
  340 +
  341 + offset = alloc_clusters_noref(bs, size);
  342 + update_refcount(bs, offset, size, 1);
  343 + return offset;
  344 +}
  345 +
  346 +/* only used to allocate compressed sectors. We try to allocate
  347 + contiguous sectors. size must be <= cluster_size */
  348 +int64_t alloc_bytes(BlockDriverState *bs, int size)
  349 +{
  350 + BDRVQcowState *s = bs->opaque;
  351 + int64_t offset, cluster_offset;
  352 + int free_in_cluster;
  353 +
  354 + assert(size > 0 && size <= s->cluster_size);
  355 + if (s->free_byte_offset == 0) {
  356 + s->free_byte_offset = alloc_clusters(bs, s->cluster_size);
  357 + }
  358 + redo:
  359 + free_in_cluster = s->cluster_size -
  360 + (s->free_byte_offset & (s->cluster_size - 1));
  361 + if (size <= free_in_cluster) {
  362 + /* enough space in current cluster */
  363 + offset = s->free_byte_offset;
  364 + s->free_byte_offset += size;
  365 + free_in_cluster -= size;
  366 + if (free_in_cluster == 0)
  367 + s->free_byte_offset = 0;
  368 + if ((offset & (s->cluster_size - 1)) != 0)
  369 + update_cluster_refcount(bs, offset >> s->cluster_bits, 1);
  370 + } else {
  371 + offset = alloc_clusters(bs, s->cluster_size);
  372 + cluster_offset = s->free_byte_offset & ~(s->cluster_size - 1);
  373 + if ((cluster_offset + s->cluster_size) == offset) {
  374 + /* we are lucky: contiguous data */
  375 + offset = s->free_byte_offset;
  376 + update_cluster_refcount(bs, offset >> s->cluster_bits, 1);
  377 + s->free_byte_offset += size;
  378 + } else {
  379 + s->free_byte_offset = offset;
  380 + goto redo;
  381 + }
  382 + }
  383 + return offset;
  384 +}
  385 +
  386 +void free_clusters(BlockDriverState *bs,
  387 + int64_t offset, int64_t size)
  388 +{
  389 + update_refcount(bs, offset, size, -1);
  390 +}
  391 +
  392 +
  393 +
  394 +/*********************************************************/
  395 +/* snapshots and image creation */
  396 +
  397 +
  398 +
  399 +void create_refcount_update(QCowCreateState *s, int64_t offset, int64_t size)
  400 +{
  401 + int refcount;
  402 + int64_t start, last, cluster_offset;
  403 + uint16_t *p;
  404 +
  405 + start = offset & ~(s->cluster_size - 1);
  406 + last = (offset + size - 1) & ~(s->cluster_size - 1);
  407 + for(cluster_offset = start; cluster_offset <= last;
  408 + cluster_offset += s->cluster_size) {
  409 + p = &s->refcount_block[cluster_offset >> s->cluster_bits];
  410 + refcount = be16_to_cpu(*p);
  411 + refcount++;
  412 + *p = cpu_to_be16(refcount);
  413 + }
  414 +}
  415 +
  416 +/* update the refcounts of snapshots and the copied flag */
  417 +int update_snapshot_refcount(BlockDriverState *bs,
  418 + int64_t l1_table_offset,
  419 + int l1_size,
  420 + int addend)
  421 +{
  422 + BDRVQcowState *s = bs->opaque;
  423 + uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2, l1_allocated;
  424 + int64_t old_offset, old_l2_offset;
  425 + int l2_size, i, j, l1_modified, l2_modified, nb_csectors, refcount;
  426 +
  427 + l2_cache_reset(bs);
  428 +
  429 + l2_table = NULL;
  430 + l1_table = NULL;
  431 + l1_size2 = l1_size * sizeof(uint64_t);
  432 + l1_allocated = 0;
  433 + if (l1_table_offset != s->l1_table_offset) {
  434 + l1_table = qemu_malloc(l1_size2);
  435 + l1_allocated = 1;
  436 + if (bdrv_pread(s->hd, l1_table_offset,
  437 + l1_table, l1_size2) != l1_size2)
  438 + goto fail;
  439 + for(i = 0;i < l1_size; i++)
  440 + be64_to_cpus(&l1_table[i]);
  441 + } else {
  442 + assert(l1_size == s->l1_size);
  443 + l1_table = s->l1_table;
  444 + l1_allocated = 0;
  445 + }
  446 +
  447 + l2_size = s->l2_size * sizeof(uint64_t);
  448 + l2_table = qemu_malloc(l2_size);
  449 + l1_modified = 0;
  450 + for(i = 0; i < l1_size; i++) {
  451 + l2_offset = l1_table[i];
  452 + if (l2_offset) {
  453 + old_l2_offset = l2_offset;
  454 + l2_offset &= ~QCOW_OFLAG_COPIED;
  455 + l2_modified = 0;
  456 + if (bdrv_pread(s->hd, l2_offset, l2_table, l2_size) != l2_size)
  457 + goto fail;
  458 + for(j = 0; j < s->l2_size; j++) {
  459 + offset = be64_to_cpu(l2_table[j]);
  460 + if (offset != 0) {
  461 + old_offset = offset;
  462 + offset &= ~QCOW_OFLAG_COPIED;
  463 + if (offset & QCOW_OFLAG_COMPRESSED) {
  464 + nb_csectors = ((offset >> s->csize_shift) &
  465 + s->csize_mask) + 1;
  466 + if (addend != 0)
  467 + update_refcount(bs, (offset & s->cluster_offset_mask) & ~511,
  468 + nb_csectors * 512, addend);
  469 + /* compressed clusters are never modified */
  470 + refcount = 2;
  471 + } else {
  472 + if (addend != 0) {
  473 + refcount = update_cluster_refcount(bs, offset >> s->cluster_bits, addend);
  474 + } else {
  475 + refcount = get_refcount(bs, offset >> s->cluster_bits);
  476 + }
  477 + }
  478 +
  479 + if (refcount == 1) {
  480 + offset |= QCOW_OFLAG_COPIED;
  481 + }
  482 + if (offset != old_offset) {
  483 + l2_table[j] = cpu_to_be64(offset);
  484 + l2_modified = 1;
  485 + }
  486 + }
  487 + }
  488 + if (l2_modified) {
  489 + if (bdrv_pwrite(s->hd,
  490 + l2_offset, l2_table, l2_size) != l2_size)
  491 + goto fail;
  492 + }
  493 +
  494 + if (addend != 0) {
  495 + refcount = update_cluster_refcount(bs, l2_offset >> s->cluster_bits, addend);
  496 + } else {
  497 + refcount = get_refcount(bs, l2_offset >> s->cluster_bits);
  498 + }
  499 + if (refcount == 1) {
  500 + l2_offset |= QCOW_OFLAG_COPIED;
  501 + }
  502 + if (l2_offset != old_l2_offset) {
  503 + l1_table[i] = l2_offset;
  504 + l1_modified = 1;
  505 + }
  506 + }
  507 + }
  508 + if (l1_modified) {
  509 + for(i = 0; i < l1_size; i++)
  510 + cpu_to_be64s(&l1_table[i]);
  511 + if (bdrv_pwrite(s->hd, l1_table_offset, l1_table,
  512 + l1_size2) != l1_size2)
  513 + goto fail;
  514 + for(i = 0; i < l1_size; i++)
  515 + be64_to_cpus(&l1_table[i]);
  516 + }
  517 + if (l1_allocated)
  518 + qemu_free(l1_table);
  519 + qemu_free(l2_table);
  520 + return 0;
  521 + fail:
  522 + if (l1_allocated)
  523 + qemu_free(l1_table);
  524 + qemu_free(l2_table);
  525 + return -EIO;
  526 +}
  527 +
  528 +
  529 +
  530 +
  531 +/*********************************************************/
  532 +/* refcount checking functions */
  533 +
  534 +
  535 +
  536 +/*
  537 + * Increases the refcount for a range of clusters in a given refcount table.
  538 + * This is used to construct a temporary refcount table out of L1 and L2 tables
  539 + * which can be compared the the refcount table saved in the image.
  540 + *
  541 + * Returns the number of errors in the image that were found
  542 + */
  543 +static int inc_refcounts(BlockDriverState *bs,
  544 + uint16_t *refcount_table,
  545 + int refcount_table_size,
  546 + int64_t offset, int64_t size)
  547 +{
  548 + BDRVQcowState *s = bs->opaque;
  549 + int64_t start, last, cluster_offset;
  550 + int k;
  551 + int errors = 0;
  552 +
  553 + if (size <= 0)
  554 + return 0;
  555 +
  556 + start = offset & ~(s->cluster_size - 1);
  557 + last = (offset + size - 1) & ~(s->cluster_size - 1);
  558 + for(cluster_offset = start; cluster_offset <= last;
  559 + cluster_offset += s->cluster_size) {
  560 + k = cluster_offset >> s->cluster_bits;
  561 + if (k < 0 || k >= refcount_table_size) {
  562 + fprintf(stderr, "ERROR: invalid cluster offset=0x%" PRIx64 "\n",
  563 + cluster_offset);
  564 + errors++;
  565 + } else {
  566 + if (++refcount_table[k] == 0) {
  567 + fprintf(stderr, "ERROR: overflow cluster offset=0x%" PRIx64
  568 + "\n", cluster_offset);
  569 + errors++;
  570 + }
  571 + }
  572 + }
  573 +
  574 + return errors;
  575 +}
  576 +
  577 +/*
  578 + * Increases the refcount in the given refcount table for the all clusters
  579 + * referenced in the L2 table. While doing so, performs some checks on L2
  580 + * entries.
  581 + *
  582 + * Returns the number of errors found by the checks or -errno if an internal
  583 + * error occurred.
  584 + */
  585 +static int check_refcounts_l2(BlockDriverState *bs,
  586 + uint16_t *refcount_table, int refcount_table_size, int64_t l2_offset,
  587 + int check_copied)
  588 +{
  589 + BDRVQcowState *s = bs->opaque;
  590 + uint64_t *l2_table, offset;
  591 + int i, l2_size, nb_csectors, refcount;
  592 + int errors = 0;
  593 +
  594 + /* Read L2 table from disk */
  595 + l2_size = s->l2_size * sizeof(uint64_t);
  596 + l2_table = qemu_malloc(l2_size);
  597 +
  598 + if (bdrv_pread(s->hd, l2_offset, l2_table, l2_size) != l2_size)
  599 + goto fail;
  600 +
  601 + /* Do the actual checks */
  602 + for(i = 0; i < s->l2_size; i++) {
  603 + offset = be64_to_cpu(l2_table[i]);
  604 + if (offset != 0) {
  605 + if (offset & QCOW_OFLAG_COMPRESSED) {
  606 + /* Compressed clusters don't have QCOW_OFLAG_COPIED */
  607 + if (offset & QCOW_OFLAG_COPIED) {
  608 + fprintf(stderr, "ERROR: cluster %" PRId64 ": "
  609 + "copied flag must never be set for compressed "
  610 + "clusters\n", offset >> s->cluster_bits);
  611 + offset &= ~QCOW_OFLAG_COPIED;
  612 + errors++;
  613 + }
  614 +
  615 + /* Mark cluster as used */
  616 + nb_csectors = ((offset >> s->csize_shift) &
  617 + s->csize_mask) + 1;
  618 + offset &= s->cluster_offset_mask;
  619 + errors += inc_refcounts(bs, refcount_table,
  620 + refcount_table_size,
  621 + offset & ~511, nb_csectors * 512);
  622 + } else {
  623 + /* QCOW_OFLAG_COPIED must be set iff refcount == 1 */
  624 + if (check_copied) {
  625 + uint64_t entry = offset;
  626 + offset &= ~QCOW_OFLAG_COPIED;
  627 + refcount = get_refcount(bs, offset >> s->cluster_bits);
  628 + if ((refcount == 1) != ((entry & QCOW_OFLAG_COPIED) != 0)) {
  629 + fprintf(stderr, "ERROR OFLAG_COPIED: offset=%"
  630 + PRIx64 " refcount=%d\n", entry, refcount);
  631 + errors++;
  632 + }
  633 + }
  634 +
  635 + /* Mark cluster as used */
  636 + offset &= ~QCOW_OFLAG_COPIED;
  637 + errors += inc_refcounts(bs, refcount_table,
  638 + refcount_table_size,
  639 + offset, s->cluster_size);
  640 +
  641 + /* Correct offsets are cluster aligned */
  642 + if (offset & (s->cluster_size - 1)) {
  643 + fprintf(stderr, "ERROR offset=%" PRIx64 ": Cluster is not "
  644 + "properly aligned; L2 entry corrupted.\n", offset);
  645 + errors++;
  646 + }
  647 + }
  648 + }
  649 + }
  650 +
  651 + qemu_free(l2_table);
  652 + return errors;
  653 +
  654 +fail:
  655 + fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n");
  656 + qemu_free(l2_table);
  657 + return -EIO;
  658 +}
  659 +
  660 +/*
  661 + * Increases the refcount for the L1 table, its L2 tables and all referenced
  662 + * clusters in the given refcount table. While doing so, performs some checks
  663 + * on L1 and L2 entries.
  664 + *
  665 + * Returns the number of errors found by the checks or -errno if an internal
  666 + * error occurred.
  667 + */
  668 +static int check_refcounts_l1(BlockDriverState *bs,
  669 + uint16_t *refcount_table,
  670 + int refcount_table_size,
  671 + int64_t l1_table_offset, int l1_size,
  672 + int check_copied)
  673 +{
  674 + BDRVQcowState *s = bs->opaque;
  675 + uint64_t *l1_table, l2_offset, l1_size2;
  676 + int i, refcount, ret;
  677 + int errors = 0;
  678 +
  679 + l1_size2 = l1_size * sizeof(uint64_t);
  680 +
  681 + /* Mark L1 table as used */
  682 + errors += inc_refcounts(bs, refcount_table, refcount_table_size,
  683 + l1_table_offset, l1_size2);
  684 +
  685 + /* Read L1 table entries from disk */
  686 + l1_table = qemu_malloc(l1_size2);
  687 + if (bdrv_pread(s->hd, l1_table_offset,
  688 + l1_table, l1_size2) != l1_size2)
  689 + goto fail;
  690 + for(i = 0;i < l1_size; i++)
  691 + be64_to_cpus(&l1_table[i]);
  692 +
  693 + /* Do the actual checks */
  694 + for(i = 0; i < l1_size; i++) {
  695 + l2_offset = l1_table[i];
  696 + if (l2_offset) {
  697 + /* QCOW_OFLAG_COPIED must be set iff refcount == 1 */
  698 + if (check_copied) {
  699 + refcount = get_refcount(bs, (l2_offset & ~QCOW_OFLAG_COPIED)
  700 + >> s->cluster_bits);
  701 + if ((refcount == 1) != ((l2_offset & QCOW_OFLAG_COPIED) != 0)) {
  702 + fprintf(stderr, "ERROR OFLAG_COPIED: l2_offset=%" PRIx64
  703 + " refcount=%d\n", l2_offset, refcount);
  704 + errors++;
  705 + }
  706 + }
  707 +
  708 + /* Mark L2 table as used */
  709 + l2_offset &= ~QCOW_OFLAG_COPIED;
  710 + errors += inc_refcounts(bs, refcount_table,
  711 + refcount_table_size,
  712 + l2_offset,
  713 + s->cluster_size);
  714 +
  715 + /* L2 tables are cluster aligned */
  716 + if (l2_offset & (s->cluster_size - 1)) {
  717 + fprintf(stderr, "ERROR l2_offset=%" PRIx64 ": Table is not "
  718 + "cluster aligned; L1 entry corrupted\n", l2_offset);
  719 + errors++;
  720 + }
  721 +
  722 + /* Process and check L2 entries */
  723 + ret = check_refcounts_l2(bs, refcount_table, refcount_table_size,
  724 + l2_offset, check_copied);
  725 + if (ret < 0) {
  726 + goto fail;
  727 + }
  728 + errors += ret;
  729 + }
  730 + }
  731 + qemu_free(l1_table);
  732 + return errors;
  733 +
  734 +fail:
  735 + fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n");
  736 + qemu_free(l1_table);
  737 + return -EIO;
  738 +}
  739 +
  740 +/*
  741 + * Checks an image for refcount consistency.
  742 + *
  743 + * Returns 0 if no errors are found, the number of errors in case the image is
  744 + * detected as corrupted, and -errno when an internal error occured.
  745 + */
  746 +int check_refcounts(BlockDriverState *bs)
  747 +{
  748 + BDRVQcowState *s = bs->opaque;
  749 + int64_t size;
  750 + int nb_clusters, refcount1, refcount2, i;
  751 + QCowSnapshot *sn;
  752 + uint16_t *refcount_table;
  753 + int ret, errors = 0;
  754 +
  755 + size = bdrv_getlength(s->hd);
  756 + nb_clusters = size_to_clusters(s, size);
  757 + refcount_table = qemu_mallocz(nb_clusters * sizeof(uint16_t));
  758 +
  759 + /* header */
  760 + errors += inc_refcounts(bs, refcount_table, nb_clusters,
  761 + 0, s->cluster_size);
  762 +
  763 + /* current L1 table */
  764 + ret = check_refcounts_l1(bs, refcount_table, nb_clusters,
  765 + s->l1_table_offset, s->l1_size, 1);
  766 + if (ret < 0) {
  767 + return ret;
  768 + }
  769 + errors += ret;
  770 +
  771 + /* snapshots */
  772 + for(i = 0; i < s->nb_snapshots; i++) {
  773 + sn = s->snapshots + i;
  774 + check_refcounts_l1(bs, refcount_table, nb_clusters,
  775 + sn->l1_table_offset, sn->l1_size, 0);
  776 + }
  777 + errors += inc_refcounts(bs, refcount_table, nb_clusters,
  778 + s->snapshots_offset, s->snapshots_size);
  779 +
  780 + /* refcount data */
  781 + errors += inc_refcounts(bs, refcount_table, nb_clusters,
  782 + s->refcount_table_offset,
  783 + s->refcount_table_size * sizeof(uint64_t));
  784 + for(i = 0; i < s->refcount_table_size; i++) {
  785 + int64_t offset;
  786 + offset = s->refcount_table[i];
  787 + if (offset != 0) {
  788 + errors += inc_refcounts(bs, refcount_table, nb_clusters,
  789 + offset, s->cluster_size);
  790 + }
  791 + }
  792 +
  793 + /* compare ref counts */
  794 + for(i = 0; i < nb_clusters; i++) {
  795 + refcount1 = get_refcount(bs, i);
  796 + refcount2 = refcount_table[i];
  797 + if (refcount1 != refcount2) {
  798 + fprintf(stderr, "ERROR cluster %d refcount=%d reference=%d\n",
  799 + i, refcount1, refcount2);
  800 + errors++;
  801 + }
  802 + }
  803 +
  804 + qemu_free(refcount_table);
  805 +
  806 + return errors;
  807 +}
  808 +
block/qcow2.c
@@ -26,6 +26,7 @@ @@ -26,6 +26,7 @@
26 #include "module.h" 26 #include "module.h"
27 #include <zlib.h> 27 #include <zlib.h>
28 #include "aes.h" 28 #include "aes.h"
  29 +#include "block/qcow2.h"
29 30
30 /* 31 /*
31 Differences with QCOW: 32 Differences with QCOW:
@@ -47,40 +48,6 @@ @@ -47,40 +48,6 @@
47 //#define DEBUG_ALLOC2 48 //#define DEBUG_ALLOC2
48 //#define DEBUG_EXT 49 //#define DEBUG_EXT
49 50
50 -#define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb)  
51 -#define QCOW_VERSION 2  
52 -  
53 -#define QCOW_CRYPT_NONE 0  
54 -#define QCOW_CRYPT_AES 1  
55 -  
56 -#define QCOW_MAX_CRYPT_CLUSTERS 32  
57 -  
58 -/* indicate that the refcount of the referenced cluster is exactly one. */  
59 -#define QCOW_OFLAG_COPIED (1LL << 63)  
60 -/* indicate that the cluster is compressed (they never have the copied flag) */  
61 -#define QCOW_OFLAG_COMPRESSED (1LL << 62)  
62 -  
63 -#define REFCOUNT_SHIFT 1 /* refcount size is 2 bytes */  
64 -  
65 -#define MIN_CLUSTER_BITS 9  
66 -#define MAX_CLUSTER_BITS 16  
67 -  
68 -typedef struct QCowHeader {  
69 - uint32_t magic;  
70 - uint32_t version;  
71 - uint64_t backing_file_offset;  
72 - uint32_t backing_file_size;  
73 - uint32_t cluster_bits;  
74 - uint64_t size; /* in bytes */  
75 - uint32_t crypt_method;  
76 - uint32_t l1_size; /* XXX: save number of clusters instead ? */  
77 - uint64_t l1_table_offset;  
78 - uint64_t refcount_table_offset;  
79 - uint32_t refcount_table_clusters;  
80 - uint32_t nb_snapshots;  
81 - uint64_t snapshots_offset;  
82 -} QCowHeader;  
83 -  
84 51
85 typedef struct { 52 typedef struct {
86 uint32_t magic; 53 uint32_t magic;
@@ -110,77 +77,12 @@ typedef struct __attribute__((packed)) QCowSnapshotHeader { @@ -110,77 +77,12 @@ typedef struct __attribute__((packed)) QCowSnapshotHeader {
110 /* name follows */ 77 /* name follows */
111 } QCowSnapshotHeader; 78 } QCowSnapshotHeader;
112 79
113 -#define L2_CACHE_SIZE 16  
114 -  
115 -typedef struct QCowSnapshot {  
116 - uint64_t l1_table_offset;  
117 - uint32_t l1_size;  
118 - char *id_str;  
119 - char *name;  
120 - uint32_t vm_state_size;  
121 - uint32_t date_sec;  
122 - uint32_t date_nsec;  
123 - uint64_t vm_clock_nsec;  
124 -} QCowSnapshot;  
125 -  
126 -typedef struct BDRVQcowState {  
127 - BlockDriverState *hd;  
128 - int cluster_bits;  
129 - int cluster_size;  
130 - int cluster_sectors;  
131 - int l2_bits;  
132 - int l2_size;  
133 - int l1_size;  
134 - int l1_vm_state_index;  
135 - int csize_shift;  
136 - int csize_mask;  
137 - uint64_t cluster_offset_mask;  
138 - uint64_t l1_table_offset;  
139 - uint64_t *l1_table;  
140 - uint64_t *l2_cache;  
141 - uint64_t l2_cache_offsets[L2_CACHE_SIZE];  
142 - uint32_t l2_cache_counts[L2_CACHE_SIZE];  
143 - uint8_t *cluster_cache;  
144 - uint8_t *cluster_data;  
145 - uint64_t cluster_cache_offset;  
146 -  
147 - uint64_t *refcount_table;  
148 - uint64_t refcount_table_offset;  
149 - uint32_t refcount_table_size;  
150 - uint64_t refcount_block_cache_offset;  
151 - uint16_t *refcount_block_cache;  
152 - int64_t free_cluster_index;  
153 - int64_t free_byte_offset;  
154 -  
155 - uint32_t crypt_method; /* current crypt method, 0 if no key yet */  
156 - uint32_t crypt_method_header;  
157 - AES_KEY aes_encrypt_key;  
158 - AES_KEY aes_decrypt_key;  
159 - uint64_t snapshots_offset;  
160 - int snapshots_size;  
161 - int nb_snapshots;  
162 - QCowSnapshot *snapshots;  
163 -} BDRVQcowState;  
164 80
165 static int decompress_cluster(BDRVQcowState *s, uint64_t cluster_offset); 81 static int decompress_cluster(BDRVQcowState *s, uint64_t cluster_offset);
166 static int qcow_read(BlockDriverState *bs, int64_t sector_num, 82 static int qcow_read(BlockDriverState *bs, int64_t sector_num,
167 uint8_t *buf, int nb_sectors); 83 uint8_t *buf, int nb_sectors);
168 static int qcow_read_snapshots(BlockDriverState *bs); 84 static int qcow_read_snapshots(BlockDriverState *bs);
169 static void qcow_free_snapshots(BlockDriverState *bs); 85 static void qcow_free_snapshots(BlockDriverState *bs);
170 -static int refcount_init(BlockDriverState *bs);  
171 -static void refcount_close(BlockDriverState *bs);  
172 -static int get_refcount(BlockDriverState *bs, int64_t cluster_index);  
173 -static int update_cluster_refcount(BlockDriverState *bs,  
174 - int64_t cluster_index,  
175 - int addend);  
176 -static int update_refcount(BlockDriverState *bs,  
177 - int64_t offset, int64_t length,  
178 - int addend);  
179 -static int64_t alloc_clusters(BlockDriverState *bs, int64_t size);  
180 -static int64_t alloc_bytes(BlockDriverState *bs, int size);  
181 -static void free_clusters(BlockDriverState *bs,  
182 - int64_t offset, int64_t size);  
183 -static int check_refcounts(BlockDriverState *bs);  
184 86
185 static int qcow_probe(const uint8_t *buf, int buf_size, const char *filename) 87 static int qcow_probe(const uint8_t *buf, int buf_size, const char *filename)
186 { 88 {
@@ -482,7 +384,7 @@ static int copy_sectors(BlockDriverState *bs, uint64_t start_sect, @@ -482,7 +384,7 @@ static int copy_sectors(BlockDriverState *bs, uint64_t start_sect,
482 return 0; 384 return 0;
483 } 385 }
484 386
485 -static void l2_cache_reset(BlockDriverState *bs) 387 +void l2_cache_reset(BlockDriverState *bs)
486 { 388 {
487 BDRVQcowState *s = bs->opaque; 389 BDRVQcowState *s = bs->opaque;
488 390
@@ -691,11 +593,6 @@ static uint64_t *l2_allocate(BlockDriverState *bs, int l1_index) @@ -691,11 +593,6 @@ static uint64_t *l2_allocate(BlockDriverState *bs, int l1_index)
691 return l2_table; 593 return l2_table;
692 } 594 }
693 595
694 -static int size_to_clusters(BDRVQcowState *s, int64_t size)  
695 -{  
696 - return (size + (s->cluster_size - 1)) >> s->cluster_bits;  
697 -}  
698 -  
699 static int count_contiguous_clusters(uint64_t nb_clusters, int cluster_size, 596 static int count_contiguous_clusters(uint64_t nb_clusters, int cluster_size,
700 uint64_t *l2_table, uint64_t start, uint64_t mask) 597 uint64_t *l2_table, uint64_t start, uint64_t mask)
701 { 598 {
@@ -1522,35 +1419,6 @@ static void qcow_close(BlockDriverState *bs) @@ -1522,35 +1419,6 @@ static void qcow_close(BlockDriverState *bs)
1522 bdrv_delete(s->hd); 1419 bdrv_delete(s->hd);
1523 } 1420 }
1524 1421
1525 -/* XXX: use std qcow open function ? */  
1526 -typedef struct QCowCreateState {  
1527 - int cluster_size;  
1528 - int cluster_bits;  
1529 - uint16_t *refcount_block;  
1530 - uint64_t *refcount_table;  
1531 - int64_t l1_table_offset;  
1532 - int64_t refcount_table_offset;  
1533 - int64_t refcount_block_offset;  
1534 -} QCowCreateState;  
1535 -  
1536 -static void create_refcount_update(QCowCreateState *s,  
1537 - int64_t offset, int64_t size)  
1538 -{  
1539 - int refcount;  
1540 - int64_t start, last, cluster_offset;  
1541 - uint16_t *p;  
1542 -  
1543 - start = offset & ~(s->cluster_size - 1);  
1544 - last = (offset + size - 1) & ~(s->cluster_size - 1);  
1545 - for(cluster_offset = start; cluster_offset <= last;  
1546 - cluster_offset += s->cluster_size) {  
1547 - p = &s->refcount_block[cluster_offset >> s->cluster_bits];  
1548 - refcount = be16_to_cpu(*p);  
1549 - refcount++;  
1550 - *p = cpu_to_be16(refcount);  
1551 - }  
1552 -}  
1553 -  
1554 static int get_bits_from_size(size_t size) 1422 static int get_bits_from_size(size_t size)
1555 { 1423 {
1556 int res = 0; 1424 int res = 0;
@@ -1834,117 +1702,6 @@ static int qcow_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) @@ -1834,117 +1702,6 @@ static int qcow_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
1834 /*********************************************************/ 1702 /*********************************************************/
1835 /* snapshot support */ 1703 /* snapshot support */
1836 1704
1837 -/* update the refcounts of snapshots and the copied flag */  
1838 -static int update_snapshot_refcount(BlockDriverState *bs,  
1839 - int64_t l1_table_offset,  
1840 - int l1_size,  
1841 - int addend)  
1842 -{  
1843 - BDRVQcowState *s = bs->opaque;  
1844 - uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2, l1_allocated;  
1845 - int64_t old_offset, old_l2_offset;  
1846 - int l2_size, i, j, l1_modified, l2_modified, nb_csectors, refcount;  
1847 -  
1848 - l2_cache_reset(bs);  
1849 -  
1850 - l2_table = NULL;  
1851 - l1_table = NULL;  
1852 - l1_size2 = l1_size * sizeof(uint64_t);  
1853 - l1_allocated = 0;  
1854 - if (l1_table_offset != s->l1_table_offset) {  
1855 - l1_table = qemu_malloc(l1_size2);  
1856 - l1_allocated = 1;  
1857 - if (bdrv_pread(s->hd, l1_table_offset,  
1858 - l1_table, l1_size2) != l1_size2)  
1859 - goto fail;  
1860 - for(i = 0;i < l1_size; i++)  
1861 - be64_to_cpus(&l1_table[i]);  
1862 - } else {  
1863 - assert(l1_size == s->l1_size);  
1864 - l1_table = s->l1_table;  
1865 - l1_allocated = 0;  
1866 - }  
1867 -  
1868 - l2_size = s->l2_size * sizeof(uint64_t);  
1869 - l2_table = qemu_malloc(l2_size);  
1870 - l1_modified = 0;  
1871 - for(i = 0; i < l1_size; i++) {  
1872 - l2_offset = l1_table[i];  
1873 - if (l2_offset) {  
1874 - old_l2_offset = l2_offset;  
1875 - l2_offset &= ~QCOW_OFLAG_COPIED;  
1876 - l2_modified = 0;  
1877 - if (bdrv_pread(s->hd, l2_offset, l2_table, l2_size) != l2_size)  
1878 - goto fail;  
1879 - for(j = 0; j < s->l2_size; j++) {  
1880 - offset = be64_to_cpu(l2_table[j]);  
1881 - if (offset != 0) {  
1882 - old_offset = offset;  
1883 - offset &= ~QCOW_OFLAG_COPIED;  
1884 - if (offset & QCOW_OFLAG_COMPRESSED) {  
1885 - nb_csectors = ((offset >> s->csize_shift) &  
1886 - s->csize_mask) + 1;  
1887 - if (addend != 0)  
1888 - update_refcount(bs, (offset & s->cluster_offset_mask) & ~511,  
1889 - nb_csectors * 512, addend);  
1890 - /* compressed clusters are never modified */  
1891 - refcount = 2;  
1892 - } else {  
1893 - if (addend != 0) {  
1894 - refcount = update_cluster_refcount(bs, offset >> s->cluster_bits, addend);  
1895 - } else {  
1896 - refcount = get_refcount(bs, offset >> s->cluster_bits);  
1897 - }  
1898 - }  
1899 -  
1900 - if (refcount == 1) {  
1901 - offset |= QCOW_OFLAG_COPIED;  
1902 - }  
1903 - if (offset != old_offset) {  
1904 - l2_table[j] = cpu_to_be64(offset);  
1905 - l2_modified = 1;  
1906 - }  
1907 - }  
1908 - }  
1909 - if (l2_modified) {  
1910 - if (bdrv_pwrite(s->hd,  
1911 - l2_offset, l2_table, l2_size) != l2_size)  
1912 - goto fail;  
1913 - }  
1914 -  
1915 - if (addend != 0) {  
1916 - refcount = update_cluster_refcount(bs, l2_offset >> s->cluster_bits, addend);  
1917 - } else {  
1918 - refcount = get_refcount(bs, l2_offset >> s->cluster_bits);  
1919 - }  
1920 - if (refcount == 1) {  
1921 - l2_offset |= QCOW_OFLAG_COPIED;  
1922 - }  
1923 - if (l2_offset != old_l2_offset) {  
1924 - l1_table[i] = l2_offset;  
1925 - l1_modified = 1;  
1926 - }  
1927 - }  
1928 - }  
1929 - if (l1_modified) {  
1930 - for(i = 0; i < l1_size; i++)  
1931 - cpu_to_be64s(&l1_table[i]);  
1932 - if (bdrv_pwrite(s->hd, l1_table_offset, l1_table,  
1933 - l1_size2) != l1_size2)  
1934 - goto fail;  
1935 - for(i = 0; i < l1_size; i++)  
1936 - be64_to_cpus(&l1_table[i]);  
1937 - }  
1938 - if (l1_allocated)  
1939 - qemu_free(l1_table);  
1940 - qemu_free(l2_table);  
1941 - return 0;  
1942 - fail:  
1943 - if (l1_allocated)  
1944 - qemu_free(l1_table);  
1945 - qemu_free(l2_table);  
1946 - return -EIO;  
1947 -}  
1948 1705
1949 static void qcow_free_snapshots(BlockDriverState *bs) 1706 static void qcow_free_snapshots(BlockDriverState *bs)
1950 { 1707 {
@@ -2306,630 +2063,6 @@ static int qcow_snapshot_list(BlockDriverState *bs, @@ -2306,630 +2063,6 @@ static int qcow_snapshot_list(BlockDriverState *bs,
2306 return s->nb_snapshots; 2063 return s->nb_snapshots;
2307 } 2064 }
2308 2065
2309 -/*********************************************************/  
2310 -/* refcount handling */  
2311 -  
2312 -static int refcount_init(BlockDriverState *bs)  
2313 -{  
2314 - BDRVQcowState *s = bs->opaque;  
2315 - int ret, refcount_table_size2, i;  
2316 -  
2317 - s->refcount_block_cache = qemu_malloc(s->cluster_size);  
2318 - refcount_table_size2 = s->refcount_table_size * sizeof(uint64_t);  
2319 - s->refcount_table = qemu_malloc(refcount_table_size2);  
2320 - if (s->refcount_table_size > 0) {  
2321 - ret = bdrv_pread(s->hd, s->refcount_table_offset,  
2322 - s->refcount_table, refcount_table_size2);  
2323 - if (ret != refcount_table_size2)  
2324 - goto fail;  
2325 - for(i = 0; i < s->refcount_table_size; i++)  
2326 - be64_to_cpus(&s->refcount_table[i]);  
2327 - }  
2328 - return 0;  
2329 - fail:  
2330 - return -ENOMEM;  
2331 -}  
2332 -  
2333 -static void refcount_close(BlockDriverState *bs)  
2334 -{  
2335 - BDRVQcowState *s = bs->opaque;  
2336 - qemu_free(s->refcount_block_cache);  
2337 - qemu_free(s->refcount_table);  
2338 -}  
2339 -  
2340 -  
2341 -static int load_refcount_block(BlockDriverState *bs,  
2342 - int64_t refcount_block_offset)  
2343 -{  
2344 - BDRVQcowState *s = bs->opaque;  
2345 - int ret;  
2346 - ret = bdrv_pread(s->hd, refcount_block_offset, s->refcount_block_cache,  
2347 - s->cluster_size);  
2348 - if (ret != s->cluster_size)  
2349 - return -EIO;  
2350 - s->refcount_block_cache_offset = refcount_block_offset;  
2351 - return 0;  
2352 -}  
2353 -  
2354 -static int get_refcount(BlockDriverState *bs, int64_t cluster_index)  
2355 -{  
2356 - BDRVQcowState *s = bs->opaque;  
2357 - int refcount_table_index, block_index;  
2358 - int64_t refcount_block_offset;  
2359 -  
2360 - refcount_table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);  
2361 - if (refcount_table_index >= s->refcount_table_size)  
2362 - return 0;  
2363 - refcount_block_offset = s->refcount_table[refcount_table_index];  
2364 - if (!refcount_block_offset)  
2365 - return 0;  
2366 - if (refcount_block_offset != s->refcount_block_cache_offset) {  
2367 - /* better than nothing: return allocated if read error */  
2368 - if (load_refcount_block(bs, refcount_block_offset) < 0)  
2369 - return 1;  
2370 - }  
2371 - block_index = cluster_index &  
2372 - ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1);  
2373 - return be16_to_cpu(s->refcount_block_cache[block_index]);  
2374 -}  
2375 -  
2376 -/* return < 0 if error */  
2377 -static int64_t alloc_clusters_noref(BlockDriverState *bs, int64_t size)  
2378 -{  
2379 - BDRVQcowState *s = bs->opaque;  
2380 - int i, nb_clusters;  
2381 -  
2382 - nb_clusters = size_to_clusters(s, size);  
2383 -retry:  
2384 - for(i = 0; i < nb_clusters; i++) {  
2385 - int64_t i = s->free_cluster_index++;  
2386 - if (get_refcount(bs, i) != 0)  
2387 - goto retry;  
2388 - }  
2389 -#ifdef DEBUG_ALLOC2  
2390 - printf("alloc_clusters: size=%lld -> %lld\n",  
2391 - size,  
2392 - (s->free_cluster_index - nb_clusters) << s->cluster_bits);  
2393 -#endif  
2394 - return (s->free_cluster_index - nb_clusters) << s->cluster_bits;  
2395 -}  
2396 -  
2397 -static int64_t alloc_clusters(BlockDriverState *bs, int64_t size)  
2398 -{  
2399 - int64_t offset;  
2400 -  
2401 - offset = alloc_clusters_noref(bs, size);  
2402 - update_refcount(bs, offset, size, 1);  
2403 - return offset;  
2404 -}  
2405 -  
2406 -/* only used to allocate compressed sectors. We try to allocate  
2407 - contiguous sectors. size must be <= cluster_size */  
2408 -static int64_t alloc_bytes(BlockDriverState *bs, int size)  
2409 -{  
2410 - BDRVQcowState *s = bs->opaque;  
2411 - int64_t offset, cluster_offset;  
2412 - int free_in_cluster;  
2413 -  
2414 - assert(size > 0 && size <= s->cluster_size);  
2415 - if (s->free_byte_offset == 0) {  
2416 - s->free_byte_offset = alloc_clusters(bs, s->cluster_size);  
2417 - }  
2418 - redo:  
2419 - free_in_cluster = s->cluster_size -  
2420 - (s->free_byte_offset & (s->cluster_size - 1));  
2421 - if (size <= free_in_cluster) {  
2422 - /* enough space in current cluster */  
2423 - offset = s->free_byte_offset;  
2424 - s->free_byte_offset += size;  
2425 - free_in_cluster -= size;  
2426 - if (free_in_cluster == 0)  
2427 - s->free_byte_offset = 0;  
2428 - if ((offset & (s->cluster_size - 1)) != 0)  
2429 - update_cluster_refcount(bs, offset >> s->cluster_bits, 1);  
2430 - } else {  
2431 - offset = alloc_clusters(bs, s->cluster_size);  
2432 - cluster_offset = s->free_byte_offset & ~(s->cluster_size - 1);  
2433 - if ((cluster_offset + s->cluster_size) == offset) {  
2434 - /* we are lucky: contiguous data */  
2435 - offset = s->free_byte_offset;  
2436 - update_cluster_refcount(bs, offset >> s->cluster_bits, 1);  
2437 - s->free_byte_offset += size;  
2438 - } else {  
2439 - s->free_byte_offset = offset;  
2440 - goto redo;  
2441 - }  
2442 - }  
2443 - return offset;  
2444 -}  
2445 -  
2446 -static void free_clusters(BlockDriverState *bs,  
2447 - int64_t offset, int64_t size)  
2448 -{  
2449 - update_refcount(bs, offset, size, -1);  
2450 -}  
2451 -  
2452 -static int grow_refcount_table(BlockDriverState *bs, int min_size)  
2453 -{  
2454 - BDRVQcowState *s = bs->opaque;  
2455 - int new_table_size, new_table_size2, refcount_table_clusters, i, ret;  
2456 - uint64_t *new_table;  
2457 - int64_t table_offset;  
2458 - uint8_t data[12];  
2459 - int old_table_size;  
2460 - int64_t old_table_offset;  
2461 -  
2462 - if (min_size <= s->refcount_table_size)  
2463 - return 0;  
2464 - /* compute new table size */  
2465 - refcount_table_clusters = s->refcount_table_size >> (s->cluster_bits - 3);  
2466 - for(;;) {  
2467 - if (refcount_table_clusters == 0) {  
2468 - refcount_table_clusters = 1;  
2469 - } else {  
2470 - refcount_table_clusters = (refcount_table_clusters * 3 + 1) / 2;  
2471 - }  
2472 - new_table_size = refcount_table_clusters << (s->cluster_bits - 3);  
2473 - if (min_size <= new_table_size)  
2474 - break;  
2475 - }  
2476 -#ifdef DEBUG_ALLOC2  
2477 - printf("grow_refcount_table from %d to %d\n",  
2478 - s->refcount_table_size,  
2479 - new_table_size);  
2480 -#endif  
2481 - new_table_size2 = new_table_size * sizeof(uint64_t);  
2482 - new_table = qemu_mallocz(new_table_size2);  
2483 - memcpy(new_table, s->refcount_table,  
2484 - s->refcount_table_size * sizeof(uint64_t));  
2485 - for(i = 0; i < s->refcount_table_size; i++)  
2486 - cpu_to_be64s(&new_table[i]);  
2487 - /* Note: we cannot update the refcount now to avoid recursion */  
2488 - table_offset = alloc_clusters_noref(bs, new_table_size2);  
2489 - ret = bdrv_pwrite(s->hd, table_offset, new_table, new_table_size2);  
2490 - if (ret != new_table_size2)  
2491 - goto fail;  
2492 - for(i = 0; i < s->refcount_table_size; i++)  
2493 - be64_to_cpus(&new_table[i]);  
2494 -  
2495 - cpu_to_be64w((uint64_t*)data, table_offset);  
2496 - cpu_to_be32w((uint32_t*)(data + 8), refcount_table_clusters);  
2497 - if (bdrv_pwrite(s->hd, offsetof(QCowHeader, refcount_table_offset),  
2498 - data, sizeof(data)) != sizeof(data))  
2499 - goto fail;  
2500 - qemu_free(s->refcount_table);  
2501 - old_table_offset = s->refcount_table_offset;  
2502 - old_table_size = s->refcount_table_size;  
2503 - s->refcount_table = new_table;  
2504 - s->refcount_table_size = new_table_size;  
2505 - s->refcount_table_offset = table_offset;  
2506 -  
2507 - update_refcount(bs, table_offset, new_table_size2, 1);  
2508 - free_clusters(bs, old_table_offset, old_table_size * sizeof(uint64_t));  
2509 - return 0;  
2510 - fail:  
2511 - free_clusters(bs, table_offset, new_table_size2);  
2512 - qemu_free(new_table);  
2513 - return -EIO;  
2514 -}  
2515 -  
2516 -  
2517 -static int64_t alloc_refcount_block(BlockDriverState *bs, int64_t cluster_index)  
2518 -{  
2519 - BDRVQcowState *s = bs->opaque;  
2520 - int64_t offset, refcount_block_offset;  
2521 - int ret, refcount_table_index;  
2522 - uint64_t data64;  
2523 -  
2524 - /* Find L1 index and grow refcount table if needed */  
2525 - refcount_table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);  
2526 - if (refcount_table_index >= s->refcount_table_size) {  
2527 - ret = grow_refcount_table(bs, refcount_table_index + 1);  
2528 - if (ret < 0)  
2529 - return ret;  
2530 - }  
2531 -  
2532 - /* Load or allocate the refcount block */  
2533 - refcount_block_offset = s->refcount_table[refcount_table_index];  
2534 - if (!refcount_block_offset) {  
2535 - /* create a new refcount block */  
2536 - /* Note: we cannot update the refcount now to avoid recursion */  
2537 - offset = alloc_clusters_noref(bs, s->cluster_size);  
2538 - memset(s->refcount_block_cache, 0, s->cluster_size);  
2539 - ret = bdrv_pwrite(s->hd, offset, s->refcount_block_cache, s->cluster_size);  
2540 - if (ret != s->cluster_size)  
2541 - return -EINVAL;  
2542 - s->refcount_table[refcount_table_index] = offset;  
2543 - data64 = cpu_to_be64(offset);  
2544 - ret = bdrv_pwrite(s->hd, s->refcount_table_offset +  
2545 - refcount_table_index * sizeof(uint64_t),  
2546 - &data64, sizeof(data64));  
2547 - if (ret != sizeof(data64))  
2548 - return -EINVAL;  
2549 -  
2550 - refcount_block_offset = offset;  
2551 - s->refcount_block_cache_offset = offset;  
2552 - update_refcount(bs, offset, s->cluster_size, 1);  
2553 - } else {  
2554 - if (refcount_block_offset != s->refcount_block_cache_offset) {  
2555 - if (load_refcount_block(bs, refcount_block_offset) < 0)  
2556 - return -EIO;  
2557 - }  
2558 - }  
2559 -  
2560 - return refcount_block_offset;  
2561 -}  
2562 -  
2563 -/* addend must be 1 or -1 */  
2564 -static int update_cluster_refcount(BlockDriverState *bs,  
2565 - int64_t cluster_index,  
2566 - int addend)  
2567 -{  
2568 - BDRVQcowState *s = bs->opaque;  
2569 - int ret;  
2570 -  
2571 - ret = update_refcount(bs, cluster_index << s->cluster_bits, 1, addend);  
2572 - if (ret < 0) {  
2573 - return ret;  
2574 - }  
2575 -  
2576 - return get_refcount(bs, cluster_index);  
2577 -}  
2578 -  
2579 -/* XXX: cache several refcount block clusters ? */  
2580 -static int update_refcount(BlockDriverState *bs,  
2581 - int64_t offset, int64_t length,  
2582 - int addend)  
2583 -{  
2584 - BDRVQcowState *s = bs->opaque;  
2585 - int64_t start, last, cluster_offset;  
2586 - int64_t refcount_block_offset = 0;  
2587 - int64_t table_index = -1, old_table_index;  
2588 - int first_index = -1, last_index = -1;  
2589 -  
2590 -#ifdef DEBUG_ALLOC2  
2591 - printf("update_refcount: offset=%lld size=%lld addend=%d\n",  
2592 - offset, length, addend);  
2593 -#endif  
2594 - if (length <= 0)  
2595 - return -EINVAL;  
2596 - start = offset & ~(s->cluster_size - 1);  
2597 - last = (offset + length - 1) & ~(s->cluster_size - 1);  
2598 - for(cluster_offset = start; cluster_offset <= last;  
2599 - cluster_offset += s->cluster_size)  
2600 - {  
2601 - int block_index, refcount;  
2602 - int64_t cluster_index = cluster_offset >> s->cluster_bits;  
2603 -  
2604 - /* Only write refcount block to disk when we are done with it */  
2605 - old_table_index = table_index;  
2606 - table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);  
2607 - if ((old_table_index >= 0) && (table_index != old_table_index)) {  
2608 - size_t size = (last_index - first_index + 1) << REFCOUNT_SHIFT;  
2609 - if (bdrv_pwrite(s->hd,  
2610 - refcount_block_offset + (first_index << REFCOUNT_SHIFT),  
2611 - &s->refcount_block_cache[first_index], size) != size)  
2612 - {  
2613 - return -EIO;  
2614 - }  
2615 -  
2616 - first_index = -1;  
2617 - last_index = -1;  
2618 - }  
2619 -  
2620 - /* Load the refcount block and allocate it if needed */  
2621 - refcount_block_offset = alloc_refcount_block(bs, cluster_index);  
2622 - if (refcount_block_offset < 0) {  
2623 - return refcount_block_offset;  
2624 - }  
2625 -  
2626 - /* we can update the count and save it */  
2627 - block_index = cluster_index &  
2628 - ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1);  
2629 - if (first_index == -1 || block_index < first_index) {  
2630 - first_index = block_index;  
2631 - }  
2632 - if (block_index > last_index) {  
2633 - last_index = block_index;  
2634 - }  
2635 -  
2636 - refcount = be16_to_cpu(s->refcount_block_cache[block_index]);  
2637 - refcount += addend;  
2638 - if (refcount < 0 || refcount > 0xffff)  
2639 - return -EINVAL;  
2640 - if (refcount == 0 && cluster_index < s->free_cluster_index) {  
2641 - s->free_cluster_index = cluster_index;  
2642 - }  
2643 - s->refcount_block_cache[block_index] = cpu_to_be16(refcount);  
2644 - }  
2645 -  
2646 - /* Write last changed block to disk */  
2647 - if (refcount_block_offset != 0) {  
2648 - size_t size = (last_index - first_index + 1) << REFCOUNT_SHIFT;  
2649 - if (bdrv_pwrite(s->hd,  
2650 - refcount_block_offset + (first_index << REFCOUNT_SHIFT),  
2651 - &s->refcount_block_cache[first_index], size) != size)  
2652 - {  
2653 - return -EIO;  
2654 - }  
2655 - }  
2656 -  
2657 - return 0;  
2658 -}  
2659 -  
2660 -/*  
2661 - * Increases the refcount for a range of clusters in a given refcount table.  
2662 - * This is used to construct a temporary refcount table out of L1 and L2 tables  
2663 - * which can be compared the the refcount table saved in the image.  
2664 - *  
2665 - * Returns the number of errors in the image that were found  
2666 - */  
2667 -static int inc_refcounts(BlockDriverState *bs,  
2668 - uint16_t *refcount_table,  
2669 - int refcount_table_size,  
2670 - int64_t offset, int64_t size)  
2671 -{  
2672 - BDRVQcowState *s = bs->opaque;  
2673 - int64_t start, last, cluster_offset;  
2674 - int k;  
2675 - int errors = 0;  
2676 -  
2677 - if (size <= 0)  
2678 - return 0;  
2679 -  
2680 - start = offset & ~(s->cluster_size - 1);  
2681 - last = (offset + size - 1) & ~(s->cluster_size - 1);  
2682 - for(cluster_offset = start; cluster_offset <= last;  
2683 - cluster_offset += s->cluster_size) {  
2684 - k = cluster_offset >> s->cluster_bits;  
2685 - if (k < 0 || k >= refcount_table_size) {  
2686 - fprintf(stderr, "ERROR: invalid cluster offset=0x%" PRIx64 "\n",  
2687 - cluster_offset);  
2688 - errors++;  
2689 - } else {  
2690 - if (++refcount_table[k] == 0) {  
2691 - fprintf(stderr, "ERROR: overflow cluster offset=0x%" PRIx64  
2692 - "\n", cluster_offset);  
2693 - errors++;  
2694 - }  
2695 - }  
2696 - }  
2697 -  
2698 - return errors;  
2699 -}  
2700 -  
2701 -/*  
2702 - * Increases the refcount in the given refcount table for the all clusters  
2703 - * referenced in the L2 table. While doing so, performs some checks on L2  
2704 - * entries.  
2705 - *  
2706 - * Returns the number of errors found by the checks or -errno if an internal  
2707 - * error occurred.  
2708 - */  
2709 -static int check_refcounts_l2(BlockDriverState *bs,  
2710 - uint16_t *refcount_table, int refcount_table_size, int64_t l2_offset,  
2711 - int check_copied)  
2712 -{  
2713 - BDRVQcowState *s = bs->opaque;  
2714 - uint64_t *l2_table, offset;  
2715 - int i, l2_size, nb_csectors, refcount;  
2716 - int errors = 0;  
2717 -  
2718 - /* Read L2 table from disk */  
2719 - l2_size = s->l2_size * sizeof(uint64_t);  
2720 - l2_table = qemu_malloc(l2_size);  
2721 -  
2722 - if (bdrv_pread(s->hd, l2_offset, l2_table, l2_size) != l2_size)  
2723 - goto fail;  
2724 -  
2725 - /* Do the actual checks */  
2726 - for(i = 0; i < s->l2_size; i++) {  
2727 - offset = be64_to_cpu(l2_table[i]);  
2728 - if (offset != 0) {  
2729 - if (offset & QCOW_OFLAG_COMPRESSED) {  
2730 - /* Compressed clusters don't have QCOW_OFLAG_COPIED */  
2731 - if (offset & QCOW_OFLAG_COPIED) {  
2732 - fprintf(stderr, "ERROR: cluster %" PRId64 ": "  
2733 - "copied flag must never be set for compressed "  
2734 - "clusters\n", offset >> s->cluster_bits);  
2735 - offset &= ~QCOW_OFLAG_COPIED;  
2736 - errors++;  
2737 - }  
2738 -  
2739 - /* Mark cluster as used */  
2740 - nb_csectors = ((offset >> s->csize_shift) &  
2741 - s->csize_mask) + 1;  
2742 - offset &= s->cluster_offset_mask;  
2743 - errors += inc_refcounts(bs, refcount_table,  
2744 - refcount_table_size,  
2745 - offset & ~511, nb_csectors * 512);  
2746 - } else {  
2747 - /* QCOW_OFLAG_COPIED must be set iff refcount == 1 */  
2748 - if (check_copied) {  
2749 - uint64_t entry = offset;  
2750 - offset &= ~QCOW_OFLAG_COPIED;  
2751 - refcount = get_refcount(bs, offset >> s->cluster_bits);  
2752 - if ((refcount == 1) != ((entry & QCOW_OFLAG_COPIED) != 0)) {  
2753 - fprintf(stderr, "ERROR OFLAG_COPIED: offset=%"  
2754 - PRIx64 " refcount=%d\n", entry, refcount);  
2755 - errors++;  
2756 - }  
2757 - }  
2758 -  
2759 - /* Mark cluster as used */  
2760 - offset &= ~QCOW_OFLAG_COPIED;  
2761 - errors += inc_refcounts(bs, refcount_table,  
2762 - refcount_table_size,  
2763 - offset, s->cluster_size);  
2764 -  
2765 - /* Correct offsets are cluster aligned */  
2766 - if (offset & (s->cluster_size - 1)) {  
2767 - fprintf(stderr, "ERROR offset=%" PRIx64 ": Cluster is not "  
2768 - "properly aligned; L2 entry corrupted.\n", offset);  
2769 - errors++;  
2770 - }  
2771 - }  
2772 - }  
2773 - }  
2774 -  
2775 - qemu_free(l2_table);  
2776 - return errors;  
2777 -  
2778 -fail:  
2779 - fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n");  
2780 - qemu_free(l2_table);  
2781 - return -EIO;  
2782 -}  
2783 -  
2784 -/*  
2785 - * Increases the refcount for the L1 table, its L2 tables and all referenced  
2786 - * clusters in the given refcount table. While doing so, performs some checks  
2787 - * on L1 and L2 entries.  
2788 - *  
2789 - * Returns the number of errors found by the checks or -errno if an internal  
2790 - * error occurred.  
2791 - */  
2792 -static int check_refcounts_l1(BlockDriverState *bs,  
2793 - uint16_t *refcount_table,  
2794 - int refcount_table_size,  
2795 - int64_t l1_table_offset, int l1_size,  
2796 - int check_copied)  
2797 -{  
2798 - BDRVQcowState *s = bs->opaque;  
2799 - uint64_t *l1_table, l2_offset, l1_size2;  
2800 - int i, refcount, ret;  
2801 - int errors = 0;  
2802 -  
2803 - l1_size2 = l1_size * sizeof(uint64_t);  
2804 -  
2805 - /* Mark L1 table as used */  
2806 - errors += inc_refcounts(bs, refcount_table, refcount_table_size,  
2807 - l1_table_offset, l1_size2);  
2808 -  
2809 - /* Read L1 table entries from disk */  
2810 - l1_table = qemu_malloc(l1_size2);  
2811 - if (bdrv_pread(s->hd, l1_table_offset,  
2812 - l1_table, l1_size2) != l1_size2)  
2813 - goto fail;  
2814 - for(i = 0;i < l1_size; i++)  
2815 - be64_to_cpus(&l1_table[i]);  
2816 -  
2817 - /* Do the actual checks */  
2818 - for(i = 0; i < l1_size; i++) {  
2819 - l2_offset = l1_table[i];  
2820 - if (l2_offset) {  
2821 - /* QCOW_OFLAG_COPIED must be set iff refcount == 1 */  
2822 - if (check_copied) {  
2823 - refcount = get_refcount(bs, (l2_offset & ~QCOW_OFLAG_COPIED)  
2824 - >> s->cluster_bits);  
2825 - if ((refcount == 1) != ((l2_offset & QCOW_OFLAG_COPIED) != 0)) {  
2826 - fprintf(stderr, "ERROR OFLAG_COPIED: l2_offset=%" PRIx64  
2827 - " refcount=%d\n", l2_offset, refcount);  
2828 - errors++;  
2829 - }  
2830 - }  
2831 -  
2832 - /* Mark L2 table as used */  
2833 - l2_offset &= ~QCOW_OFLAG_COPIED;  
2834 - errors += inc_refcounts(bs, refcount_table,  
2835 - refcount_table_size,  
2836 - l2_offset,  
2837 - s->cluster_size);  
2838 -  
2839 - /* L2 tables are cluster aligned */  
2840 - if (l2_offset & (s->cluster_size - 1)) {  
2841 - fprintf(stderr, "ERROR l2_offset=%" PRIx64 ": Table is not "  
2842 - "cluster aligned; L1 entry corrupted\n", l2_offset);  
2843 - errors++;  
2844 - }  
2845 -  
2846 - /* Process and check L2 entries */  
2847 - ret = check_refcounts_l2(bs, refcount_table, refcount_table_size,  
2848 - l2_offset, check_copied);  
2849 - if (ret < 0) {  
2850 - goto fail;  
2851 - }  
2852 - errors += ret;  
2853 - }  
2854 - }  
2855 - qemu_free(l1_table);  
2856 - return errors;  
2857 -  
2858 -fail:  
2859 - fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n");  
2860 - qemu_free(l1_table);  
2861 - return -EIO;  
2862 -}  
2863 -  
2864 -/*  
2865 - * Checks an image for refcount consistency.  
2866 - *  
2867 - * Returns 0 if no errors are found, the number of errors in case the image is  
2868 - * detected as corrupted, and -errno when an internal error occured.  
2869 - */  
2870 -static int check_refcounts(BlockDriverState *bs)  
2871 -{  
2872 - BDRVQcowState *s = bs->opaque;  
2873 - int64_t size;  
2874 - int nb_clusters, refcount1, refcount2, i;  
2875 - QCowSnapshot *sn;  
2876 - uint16_t *refcount_table;  
2877 - int ret, errors = 0;  
2878 -  
2879 - size = bdrv_getlength(s->hd);  
2880 - nb_clusters = size_to_clusters(s, size);  
2881 - refcount_table = qemu_mallocz(nb_clusters * sizeof(uint16_t));  
2882 -  
2883 - /* header */  
2884 - errors += inc_refcounts(bs, refcount_table, nb_clusters,  
2885 - 0, s->cluster_size);  
2886 -  
2887 - /* current L1 table */  
2888 - ret = check_refcounts_l1(bs, refcount_table, nb_clusters,  
2889 - s->l1_table_offset, s->l1_size, 1);  
2890 - if (ret < 0) {  
2891 - return ret;  
2892 - }  
2893 - errors += ret;  
2894 -  
2895 - /* snapshots */  
2896 - for(i = 0; i < s->nb_snapshots; i++) {  
2897 - sn = s->snapshots + i;  
2898 - check_refcounts_l1(bs, refcount_table, nb_clusters,  
2899 - sn->l1_table_offset, sn->l1_size, 0);  
2900 - }  
2901 - errors += inc_refcounts(bs, refcount_table, nb_clusters,  
2902 - s->snapshots_offset, s->snapshots_size);  
2903 -  
2904 - /* refcount data */  
2905 - errors += inc_refcounts(bs, refcount_table, nb_clusters,  
2906 - s->refcount_table_offset,  
2907 - s->refcount_table_size * sizeof(uint64_t));  
2908 - for(i = 0; i < s->refcount_table_size; i++) {  
2909 - int64_t offset;  
2910 - offset = s->refcount_table[i];  
2911 - if (offset != 0) {  
2912 - errors += inc_refcounts(bs, refcount_table, nb_clusters,  
2913 - offset, s->cluster_size);  
2914 - }  
2915 - }  
2916 -  
2917 - /* compare ref counts */  
2918 - for(i = 0; i < nb_clusters; i++) {  
2919 - refcount1 = get_refcount(bs, i);  
2920 - refcount2 = refcount_table[i];  
2921 - if (refcount1 != refcount2) {  
2922 - fprintf(stderr, "ERROR cluster %d refcount=%d reference=%d\n",  
2923 - i, refcount1, refcount2);  
2924 - errors++;  
2925 - }  
2926 - }  
2927 -  
2928 - qemu_free(refcount_table);  
2929 -  
2930 - return errors;  
2931 -}  
2932 -  
2933 static int qcow_check(BlockDriverState *bs) 2066 static int qcow_check(BlockDriverState *bs)
2934 { 2067 {
2935 return check_refcounts(bs); 2068 return check_refcounts(bs);
block/qcow2.h 0 → 100644
  1 +/*
  2 + * Block driver for the QCOW version 2 format
  3 + *
  4 + * Copyright (c) 2004-2006 Fabrice Bellard
  5 + *
  6 + * Permission is hereby granted, free of charge, to any person obtaining a copy
  7 + * of this software and associated documentation files (the "Software"), to deal
  8 + * in the Software without restriction, including without limitation the rights
  9 + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10 + * copies of the Software, and to permit persons to whom the Software is
  11 + * furnished to do so, subject to the following conditions:
  12 + *
  13 + * The above copyright notice and this permission notice shall be included in
  14 + * all copies or substantial portions of the Software.
  15 + *
  16 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19 + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22 + * THE SOFTWARE.
  23 + */
  24 +
  25 +#ifndef BLOCK_QCOW2_H
  26 +#define BLOCK_QCOW2_H
  27 +
  28 +#include "aes.h"
  29 +
  30 +#define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb)
  31 +#define QCOW_VERSION 2
  32 +
  33 +#define QCOW_CRYPT_NONE 0
  34 +#define QCOW_CRYPT_AES 1
  35 +
  36 +#define QCOW_MAX_CRYPT_CLUSTERS 32
  37 +
  38 +/* indicate that the refcount of the referenced cluster is exactly one. */
  39 +#define QCOW_OFLAG_COPIED (1LL << 63)
  40 +/* indicate that the cluster is compressed (they never have the copied flag) */
  41 +#define QCOW_OFLAG_COMPRESSED (1LL << 62)
  42 +
  43 +#define REFCOUNT_SHIFT 1 /* refcount size is 2 bytes */
  44 +
  45 +#define MIN_CLUSTER_BITS 9
  46 +#define MAX_CLUSTER_BITS 16
  47 +
  48 +#define L2_CACHE_SIZE 16
  49 +
  50 +typedef struct QCowHeader {
  51 + uint32_t magic;
  52 + uint32_t version;
  53 + uint64_t backing_file_offset;
  54 + uint32_t backing_file_size;
  55 + uint32_t cluster_bits;
  56 + uint64_t size; /* in bytes */
  57 + uint32_t crypt_method;
  58 + uint32_t l1_size; /* XXX: save number of clusters instead ? */
  59 + uint64_t l1_table_offset;
  60 + uint64_t refcount_table_offset;
  61 + uint32_t refcount_table_clusters;
  62 + uint32_t nb_snapshots;
  63 + uint64_t snapshots_offset;
  64 +} QCowHeader;
  65 +
  66 +typedef struct QCowSnapshot {
  67 + uint64_t l1_table_offset;
  68 + uint32_t l1_size;
  69 + char *id_str;
  70 + char *name;
  71 + uint32_t vm_state_size;
  72 + uint32_t date_sec;
  73 + uint32_t date_nsec;
  74 + uint64_t vm_clock_nsec;
  75 +} QCowSnapshot;
  76 +
  77 +typedef struct BDRVQcowState {
  78 + BlockDriverState *hd;
  79 + int cluster_bits;
  80 + int cluster_size;
  81 + int cluster_sectors;
  82 + int l2_bits;
  83 + int l2_size;
  84 + int l1_size;
  85 + int l1_vm_state_index;
  86 + int csize_shift;
  87 + int csize_mask;
  88 + uint64_t cluster_offset_mask;
  89 + uint64_t l1_table_offset;
  90 + uint64_t *l1_table;
  91 + uint64_t *l2_cache;
  92 + uint64_t l2_cache_offsets[L2_CACHE_SIZE];
  93 + uint32_t l2_cache_counts[L2_CACHE_SIZE];
  94 + uint8_t *cluster_cache;
  95 + uint8_t *cluster_data;
  96 + uint64_t cluster_cache_offset;
  97 +
  98 + uint64_t *refcount_table;
  99 + uint64_t refcount_table_offset;
  100 + uint32_t refcount_table_size;
  101 + uint64_t refcount_block_cache_offset;
  102 + uint16_t *refcount_block_cache;
  103 + int64_t free_cluster_index;
  104 + int64_t free_byte_offset;
  105 +
  106 + uint32_t crypt_method; /* current crypt method, 0 if no key yet */
  107 + uint32_t crypt_method_header;
  108 + AES_KEY aes_encrypt_key;
  109 + AES_KEY aes_decrypt_key;
  110 + uint64_t snapshots_offset;
  111 + int snapshots_size;
  112 + int nb_snapshots;
  113 + QCowSnapshot *snapshots;
  114 +} BDRVQcowState;
  115 +
  116 +/* XXX: use std qcow open function ? */
  117 +typedef struct QCowCreateState {
  118 + int cluster_size;
  119 + int cluster_bits;
  120 + uint16_t *refcount_block;
  121 + uint64_t *refcount_table;
  122 + int64_t l1_table_offset;
  123 + int64_t refcount_table_offset;
  124 + int64_t refcount_block_offset;
  125 +} QCowCreateState;
  126 +
  127 +static int size_to_clusters(BDRVQcowState *s, int64_t size)
  128 +{
  129 + return (size + (s->cluster_size - 1)) >> s->cluster_bits;
  130 +}
  131 +
  132 +// FIXME Need qcow2_ prefix to global functions
  133 +
  134 +/* qcow2.c functions */
  135 +void l2_cache_reset(BlockDriverState *bs);
  136 +
  137 +/* qcow2-refcount.c functions */
  138 +int refcount_init(BlockDriverState *bs);
  139 +void refcount_close(BlockDriverState *bs);
  140 +
  141 +int64_t alloc_clusters(BlockDriverState *bs, int64_t size);
  142 +int64_t alloc_bytes(BlockDriverState *bs, int size);
  143 +void free_clusters(BlockDriverState *bs,
  144 + int64_t offset, int64_t size);
  145 +
  146 +void create_refcount_update(QCowCreateState *s, int64_t offset, int64_t size);
  147 +int update_snapshot_refcount(BlockDriverState *bs,
  148 + int64_t l1_table_offset,
  149 + int l1_size,
  150 + int addend);
  151 +
  152 +int check_refcounts(BlockDriverState *bs);
  153 +
  154 +#endif