Commit 585f8587ad3619cf070fdc19d2d0b8e7a2398d91
1 parent
9e46cfa4
new qcow2 disk image format
git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@2083 c046a42c-6fe2-441c-8c8c-71466251a162
Showing
1 changed file
with
2236 additions
and
0 deletions
block-qcow2.c
0 → 100644
1 | +/* | ||
2 | + * Block driver for the QCOW version 2 format | ||
3 | + * | ||
4 | + * Copyright (c) 2004-2006 Fabrice Bellard | ||
5 | + * | ||
6 | + * Permission is hereby granted, free of charge, to any person obtaining a copy | ||
7 | + * of this software and associated documentation files (the "Software"), to deal | ||
8 | + * in the Software without restriction, including without limitation the rights | ||
9 | + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
10 | + * copies of the Software, and to permit persons to whom the Software is | ||
11 | + * furnished to do so, subject to the following conditions: | ||
12 | + * | ||
13 | + * The above copyright notice and this permission notice shall be included in | ||
14 | + * all copies or substantial portions of the Software. | ||
15 | + * | ||
16 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
21 | + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | ||
22 | + * THE SOFTWARE. | ||
23 | + */ | ||
24 | +#include "vl.h" | ||
25 | +#include "block_int.h" | ||
26 | +#include <zlib.h> | ||
27 | +#include "aes.h" | ||
28 | +#include <assert.h> | ||
29 | + | ||
30 | +/* | ||
31 | + Differences with QCOW: | ||
32 | + | ||
33 | + - Support for multiple incremental snapshots. | ||
34 | + - Memory management by reference counts. | ||
35 | + - Clusters which have a reference count of one have the bit | ||
36 | + QCOW_OFLAG_COPIED to optimize write performance. | ||
37 | + - Size of compressed clusters is stored in sectors to reduce bit usage | ||
38 | + in the cluster offsets. | ||
39 | + - Support for storing additional data (such as the VM state) in the | ||
40 | + snapshots. | ||
41 | + - If a backing store is used, the cluster size is not constrained | ||
42 | + (could be backported to QCOW). | ||
43 | + - L2 tables have always a size of one cluster. | ||
44 | +*/ | ||
45 | + | ||
46 | +//#define DEBUG_ALLOC | ||
47 | +//#define DEBUG_ALLOC2 | ||
48 | + | ||
49 | +#define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb) | ||
50 | +#define QCOW_VERSION 2 | ||
51 | + | ||
52 | +#define QCOW_CRYPT_NONE 0 | ||
53 | +#define QCOW_CRYPT_AES 1 | ||
54 | + | ||
55 | +/* indicate that the refcount of the referenced cluster is exactly one. */ | ||
56 | +#define QCOW_OFLAG_COPIED (1LL << 63) | ||
57 | +/* indicate that the cluster is compressed (they never have the copied flag) */ | ||
58 | +#define QCOW_OFLAG_COMPRESSED (1LL << 62) | ||
59 | + | ||
60 | +#define REFCOUNT_SHIFT 1 /* refcount size is 2 bytes */ | ||
61 | + | ||
62 | +#ifndef offsetof | ||
63 | +#define offsetof(type, field) ((size_t) &((type *)0)->field) | ||
64 | +#endif | ||
65 | + | ||
66 | +typedef struct QCowHeader { | ||
67 | + uint32_t magic; | ||
68 | + uint32_t version; | ||
69 | + uint64_t backing_file_offset; | ||
70 | + uint32_t backing_file_size; | ||
71 | + uint32_t cluster_bits; | ||
72 | + uint64_t size; /* in bytes */ | ||
73 | + uint32_t crypt_method; | ||
74 | + uint32_t l1_size; /* XXX: save number of clusters instead ? */ | ||
75 | + uint64_t l1_table_offset; | ||
76 | + uint64_t refcount_table_offset; | ||
77 | + uint32_t refcount_table_clusters; | ||
78 | + uint32_t nb_snapshots; | ||
79 | + uint64_t snapshots_offset; | ||
80 | +} QCowHeader; | ||
81 | + | ||
82 | +typedef struct __attribute__((packed)) QCowSnapshotHeader { | ||
83 | + /* header is 8 byte aligned */ | ||
84 | + uint64_t l1_table_offset; | ||
85 | + | ||
86 | + uint32_t l1_size; | ||
87 | + uint16_t id_str_size; | ||
88 | + uint16_t name_size; | ||
89 | + | ||
90 | + uint32_t date_sec; | ||
91 | + uint32_t date_nsec; | ||
92 | + | ||
93 | + uint64_t vm_clock_nsec; | ||
94 | + | ||
95 | + uint32_t vm_state_size; | ||
96 | + uint32_t extra_data_size; /* for extension */ | ||
97 | + /* extra data follows */ | ||
98 | + /* id_str follows */ | ||
99 | + /* name follows */ | ||
100 | +} QCowSnapshotHeader; | ||
101 | + | ||
102 | +#define L2_CACHE_SIZE 16 | ||
103 | + | ||
104 | +typedef struct QCowSnapshot { | ||
105 | + uint64_t l1_table_offset; | ||
106 | + uint32_t l1_size; | ||
107 | + char *id_str; | ||
108 | + char *name; | ||
109 | + uint32_t vm_state_size; | ||
110 | + uint32_t date_sec; | ||
111 | + uint32_t date_nsec; | ||
112 | + uint64_t vm_clock_nsec; | ||
113 | +} QCowSnapshot; | ||
114 | + | ||
115 | +typedef struct BDRVQcowState { | ||
116 | + BlockDriverState *hd; | ||
117 | + int cluster_bits; | ||
118 | + int cluster_size; | ||
119 | + int cluster_sectors; | ||
120 | + int l2_bits; | ||
121 | + int l2_size; | ||
122 | + int l1_size; | ||
123 | + int l1_vm_state_index; | ||
124 | + int csize_shift; | ||
125 | + int csize_mask; | ||
126 | + uint64_t cluster_offset_mask; | ||
127 | + uint64_t l1_table_offset; | ||
128 | + uint64_t *l1_table; | ||
129 | + uint64_t *l2_cache; | ||
130 | + uint64_t l2_cache_offsets[L2_CACHE_SIZE]; | ||
131 | + uint32_t l2_cache_counts[L2_CACHE_SIZE]; | ||
132 | + uint8_t *cluster_cache; | ||
133 | + uint8_t *cluster_data; | ||
134 | + uint64_t cluster_cache_offset; | ||
135 | + | ||
136 | + uint64_t *refcount_table; | ||
137 | + uint64_t refcount_table_offset; | ||
138 | + uint32_t refcount_table_size; | ||
139 | + uint64_t refcount_block_cache_offset; | ||
140 | + uint16_t *refcount_block_cache; | ||
141 | + int64_t free_cluster_index; | ||
142 | + int64_t free_byte_offset; | ||
143 | + | ||
144 | + uint32_t crypt_method; /* current crypt method, 0 if no key yet */ | ||
145 | + uint32_t crypt_method_header; | ||
146 | + AES_KEY aes_encrypt_key; | ||
147 | + AES_KEY aes_decrypt_key; | ||
148 | + uint64_t snapshots_offset; | ||
149 | + int snapshots_size; | ||
150 | + int nb_snapshots; | ||
151 | + QCowSnapshot *snapshots; | ||
152 | +} BDRVQcowState; | ||
153 | + | ||
154 | +static int decompress_cluster(BDRVQcowState *s, uint64_t cluster_offset); | ||
155 | +static int qcow_read(BlockDriverState *bs, int64_t sector_num, | ||
156 | + uint8_t *buf, int nb_sectors); | ||
157 | +static int qcow_read_snapshots(BlockDriverState *bs); | ||
158 | +static void qcow_free_snapshots(BlockDriverState *bs); | ||
159 | +static int refcount_init(BlockDriverState *bs); | ||
160 | +static void refcount_close(BlockDriverState *bs); | ||
161 | +static int get_refcount(BlockDriverState *bs, int64_t cluster_index); | ||
162 | +static int update_cluster_refcount(BlockDriverState *bs, | ||
163 | + int64_t cluster_index, | ||
164 | + int addend); | ||
165 | +static void update_refcount(BlockDriverState *bs, | ||
166 | + int64_t offset, int64_t length, | ||
167 | + int addend); | ||
168 | +static int64_t alloc_clusters(BlockDriverState *bs, int64_t size); | ||
169 | +static int64_t alloc_bytes(BlockDriverState *bs, int size); | ||
170 | +static void free_clusters(BlockDriverState *bs, | ||
171 | + int64_t offset, int64_t size); | ||
172 | +#ifdef DEBUG_ALLOC | ||
173 | +static void check_refcounts(BlockDriverState *bs); | ||
174 | +#endif | ||
175 | + | ||
176 | +static int qcow_probe(const uint8_t *buf, int buf_size, const char *filename) | ||
177 | +{ | ||
178 | + const QCowHeader *cow_header = (const void *)buf; | ||
179 | + | ||
180 | + if (buf_size >= sizeof(QCowHeader) && | ||
181 | + be32_to_cpu(cow_header->magic) == QCOW_MAGIC && | ||
182 | + be32_to_cpu(cow_header->version) == QCOW_VERSION) | ||
183 | + return 100; | ||
184 | + else | ||
185 | + return 0; | ||
186 | +} | ||
187 | + | ||
188 | +static int qcow_open(BlockDriverState *bs, const char *filename, int flags) | ||
189 | +{ | ||
190 | + BDRVQcowState *s = bs->opaque; | ||
191 | + int len, i, shift, ret; | ||
192 | + QCowHeader header; | ||
193 | + | ||
194 | + ret = bdrv_file_open(&s->hd, filename, flags); | ||
195 | + if (ret < 0) | ||
196 | + return ret; | ||
197 | + if (bdrv_pread(s->hd, 0, &header, sizeof(header)) != sizeof(header)) | ||
198 | + goto fail; | ||
199 | + be32_to_cpus(&header.magic); | ||
200 | + be32_to_cpus(&header.version); | ||
201 | + be64_to_cpus(&header.backing_file_offset); | ||
202 | + be32_to_cpus(&header.backing_file_size); | ||
203 | + be64_to_cpus(&header.size); | ||
204 | + be32_to_cpus(&header.cluster_bits); | ||
205 | + be32_to_cpus(&header.crypt_method); | ||
206 | + be64_to_cpus(&header.l1_table_offset); | ||
207 | + be32_to_cpus(&header.l1_size); | ||
208 | + be64_to_cpus(&header.refcount_table_offset); | ||
209 | + be32_to_cpus(&header.refcount_table_clusters); | ||
210 | + be64_to_cpus(&header.snapshots_offset); | ||
211 | + be32_to_cpus(&header.nb_snapshots); | ||
212 | + | ||
213 | + if (header.magic != QCOW_MAGIC || header.version != QCOW_VERSION) | ||
214 | + goto fail; | ||
215 | + if (header.size <= 1 || | ||
216 | + header.cluster_bits < 9 || | ||
217 | + header.cluster_bits > 16) | ||
218 | + goto fail; | ||
219 | + if (header.crypt_method > QCOW_CRYPT_AES) | ||
220 | + goto fail; | ||
221 | + s->crypt_method_header = header.crypt_method; | ||
222 | + if (s->crypt_method_header) | ||
223 | + bs->encrypted = 1; | ||
224 | + s->cluster_bits = header.cluster_bits; | ||
225 | + s->cluster_size = 1 << s->cluster_bits; | ||
226 | + s->cluster_sectors = 1 << (s->cluster_bits - 9); | ||
227 | + s->l2_bits = s->cluster_bits - 3; /* L2 is always one cluster */ | ||
228 | + s->l2_size = 1 << s->l2_bits; | ||
229 | + bs->total_sectors = header.size / 512; | ||
230 | + s->csize_shift = (62 - (s->cluster_bits - 8)); | ||
231 | + s->csize_mask = (1 << (s->cluster_bits - 8)) - 1; | ||
232 | + s->cluster_offset_mask = (1LL << s->csize_shift) - 1; | ||
233 | + s->refcount_table_offset = header.refcount_table_offset; | ||
234 | + s->refcount_table_size = | ||
235 | + header.refcount_table_clusters << (s->cluster_bits - 3); | ||
236 | + | ||
237 | + s->snapshots_offset = header.snapshots_offset; | ||
238 | + s->nb_snapshots = header.nb_snapshots; | ||
239 | + | ||
240 | + /* read the level 1 table */ | ||
241 | + s->l1_size = header.l1_size; | ||
242 | + shift = s->cluster_bits + s->l2_bits; | ||
243 | + s->l1_vm_state_index = (header.size + (1LL << shift) - 1) >> shift; | ||
244 | + /* the L1 table must contain at least enough entries to put | ||
245 | + header.size bytes */ | ||
246 | + if (s->l1_size < s->l1_vm_state_index) | ||
247 | + goto fail; | ||
248 | + s->l1_table_offset = header.l1_table_offset; | ||
249 | + s->l1_table = qemu_malloc(s->l1_size * sizeof(uint64_t)); | ||
250 | + if (!s->l1_table) | ||
251 | + goto fail; | ||
252 | + if (bdrv_pread(s->hd, s->l1_table_offset, s->l1_table, s->l1_size * sizeof(uint64_t)) != | ||
253 | + s->l1_size * sizeof(uint64_t)) | ||
254 | + goto fail; | ||
255 | + for(i = 0;i < s->l1_size; i++) { | ||
256 | + be64_to_cpus(&s->l1_table[i]); | ||
257 | + } | ||
258 | + /* alloc L2 cache */ | ||
259 | + s->l2_cache = qemu_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t)); | ||
260 | + if (!s->l2_cache) | ||
261 | + goto fail; | ||
262 | + s->cluster_cache = qemu_malloc(s->cluster_size); | ||
263 | + if (!s->cluster_cache) | ||
264 | + goto fail; | ||
265 | + /* one more sector for decompressed data alignment */ | ||
266 | + s->cluster_data = qemu_malloc(s->cluster_size + 512); | ||
267 | + if (!s->cluster_data) | ||
268 | + goto fail; | ||
269 | + s->cluster_cache_offset = -1; | ||
270 | + | ||
271 | + if (refcount_init(bs) < 0) | ||
272 | + goto fail; | ||
273 | + | ||
274 | + /* read the backing file name */ | ||
275 | + if (header.backing_file_offset != 0) { | ||
276 | + len = header.backing_file_size; | ||
277 | + if (len > 1023) | ||
278 | + len = 1023; | ||
279 | + if (bdrv_pread(s->hd, header.backing_file_offset, bs->backing_file, len) != len) | ||
280 | + goto fail; | ||
281 | + bs->backing_file[len] = '\0'; | ||
282 | + } | ||
283 | + if (qcow_read_snapshots(bs) < 0) | ||
284 | + goto fail; | ||
285 | + | ||
286 | +#ifdef DEBUG_ALLOC | ||
287 | + check_refcounts(bs); | ||
288 | +#endif | ||
289 | + return 0; | ||
290 | + | ||
291 | + fail: | ||
292 | + qcow_free_snapshots(bs); | ||
293 | + refcount_close(bs); | ||
294 | + qemu_free(s->l1_table); | ||
295 | + qemu_free(s->l2_cache); | ||
296 | + qemu_free(s->cluster_cache); | ||
297 | + qemu_free(s->cluster_data); | ||
298 | + bdrv_delete(s->hd); | ||
299 | + return -1; | ||
300 | +} | ||
301 | + | ||
302 | +static int qcow_set_key(BlockDriverState *bs, const char *key) | ||
303 | +{ | ||
304 | + BDRVQcowState *s = bs->opaque; | ||
305 | + uint8_t keybuf[16]; | ||
306 | + int len, i; | ||
307 | + | ||
308 | + memset(keybuf, 0, 16); | ||
309 | + len = strlen(key); | ||
310 | + if (len > 16) | ||
311 | + len = 16; | ||
312 | + /* XXX: we could compress the chars to 7 bits to increase | ||
313 | + entropy */ | ||
314 | + for(i = 0;i < len;i++) { | ||
315 | + keybuf[i] = key[i]; | ||
316 | + } | ||
317 | + s->crypt_method = s->crypt_method_header; | ||
318 | + | ||
319 | + if (AES_set_encrypt_key(keybuf, 128, &s->aes_encrypt_key) != 0) | ||
320 | + return -1; | ||
321 | + if (AES_set_decrypt_key(keybuf, 128, &s->aes_decrypt_key) != 0) | ||
322 | + return -1; | ||
323 | +#if 0 | ||
324 | + /* test */ | ||
325 | + { | ||
326 | + uint8_t in[16]; | ||
327 | + uint8_t out[16]; | ||
328 | + uint8_t tmp[16]; | ||
329 | + for(i=0;i<16;i++) | ||
330 | + in[i] = i; | ||
331 | + AES_encrypt(in, tmp, &s->aes_encrypt_key); | ||
332 | + AES_decrypt(tmp, out, &s->aes_decrypt_key); | ||
333 | + for(i = 0; i < 16; i++) | ||
334 | + printf(" %02x", tmp[i]); | ||
335 | + printf("\n"); | ||
336 | + for(i = 0; i < 16; i++) | ||
337 | + printf(" %02x", out[i]); | ||
338 | + printf("\n"); | ||
339 | + } | ||
340 | +#endif | ||
341 | + return 0; | ||
342 | +} | ||
343 | + | ||
344 | +/* The crypt function is compatible with the linux cryptoloop | ||
345 | + algorithm for < 4 GB images. NOTE: out_buf == in_buf is | ||
346 | + supported */ | ||
347 | +static void encrypt_sectors(BDRVQcowState *s, int64_t sector_num, | ||
348 | + uint8_t *out_buf, const uint8_t *in_buf, | ||
349 | + int nb_sectors, int enc, | ||
350 | + const AES_KEY *key) | ||
351 | +{ | ||
352 | + union { | ||
353 | + uint64_t ll[2]; | ||
354 | + uint8_t b[16]; | ||
355 | + } ivec; | ||
356 | + int i; | ||
357 | + | ||
358 | + for(i = 0; i < nb_sectors; i++) { | ||
359 | + ivec.ll[0] = cpu_to_le64(sector_num); | ||
360 | + ivec.ll[1] = 0; | ||
361 | + AES_cbc_encrypt(in_buf, out_buf, 512, key, | ||
362 | + ivec.b, enc); | ||
363 | + sector_num++; | ||
364 | + in_buf += 512; | ||
365 | + out_buf += 512; | ||
366 | + } | ||
367 | +} | ||
368 | + | ||
369 | +static int copy_sectors(BlockDriverState *bs, uint64_t start_sect, | ||
370 | + uint64_t cluster_offset, int n_start, int n_end) | ||
371 | +{ | ||
372 | + BDRVQcowState *s = bs->opaque; | ||
373 | + int n, ret; | ||
374 | + | ||
375 | + n = n_end - n_start; | ||
376 | + if (n <= 0) | ||
377 | + return 0; | ||
378 | + ret = qcow_read(bs, start_sect + n_start, s->cluster_data, n); | ||
379 | + if (ret < 0) | ||
380 | + return ret; | ||
381 | + if (s->crypt_method) { | ||
382 | + encrypt_sectors(s, start_sect + n_start, | ||
383 | + s->cluster_data, | ||
384 | + s->cluster_data, n, 1, | ||
385 | + &s->aes_encrypt_key); | ||
386 | + } | ||
387 | + ret = bdrv_write(s->hd, (cluster_offset >> 9) + n_start, | ||
388 | + s->cluster_data, n); | ||
389 | + if (ret < 0) | ||
390 | + return ret; | ||
391 | + return 0; | ||
392 | +} | ||
393 | + | ||
394 | +static void l2_cache_reset(BlockDriverState *bs) | ||
395 | +{ | ||
396 | + BDRVQcowState *s = bs->opaque; | ||
397 | + | ||
398 | + memset(s->l2_cache, 0, s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t)); | ||
399 | + memset(s->l2_cache_offsets, 0, L2_CACHE_SIZE * sizeof(uint64_t)); | ||
400 | + memset(s->l2_cache_counts, 0, L2_CACHE_SIZE * sizeof(uint32_t)); | ||
401 | +} | ||
402 | + | ||
403 | +static inline int l2_cache_new_entry(BlockDriverState *bs) | ||
404 | +{ | ||
405 | + BDRVQcowState *s = bs->opaque; | ||
406 | + uint32_t min_count; | ||
407 | + int min_index, i; | ||
408 | + | ||
409 | + /* find a new entry in the least used one */ | ||
410 | + min_index = 0; | ||
411 | + min_count = 0xffffffff; | ||
412 | + for(i = 0; i < L2_CACHE_SIZE; i++) { | ||
413 | + if (s->l2_cache_counts[i] < min_count) { | ||
414 | + min_count = s->l2_cache_counts[i]; | ||
415 | + min_index = i; | ||
416 | + } | ||
417 | + } | ||
418 | + return min_index; | ||
419 | +} | ||
420 | + | ||
421 | +static int64_t align_offset(int64_t offset, int n) | ||
422 | +{ | ||
423 | + offset = (offset + n - 1) & ~(n - 1); | ||
424 | + return offset; | ||
425 | +} | ||
426 | + | ||
427 | +static int grow_l1_table(BlockDriverState *bs, int min_size) | ||
428 | +{ | ||
429 | + BDRVQcowState *s = bs->opaque; | ||
430 | + int new_l1_size, new_l1_size2, ret, i; | ||
431 | + uint64_t *new_l1_table; | ||
432 | + uint64_t new_l1_table_offset; | ||
433 | + uint64_t data64; | ||
434 | + uint32_t data32; | ||
435 | + | ||
436 | + new_l1_size = s->l1_size; | ||
437 | + if (min_size <= new_l1_size) | ||
438 | + return 0; | ||
439 | + while (min_size > new_l1_size) { | ||
440 | + new_l1_size = (new_l1_size * 3 + 1) / 2; | ||
441 | + } | ||
442 | +#ifdef DEBUG_ALLOC2 | ||
443 | + printf("grow l1_table from %d to %d\n", s->l1_size, new_l1_size); | ||
444 | +#endif | ||
445 | + | ||
446 | + new_l1_size2 = sizeof(uint64_t) * new_l1_size; | ||
447 | + new_l1_table = qemu_mallocz(new_l1_size2); | ||
448 | + if (!new_l1_table) | ||
449 | + return -ENOMEM; | ||
450 | + memcpy(new_l1_table, s->l1_table, s->l1_size * sizeof(uint64_t)); | ||
451 | + | ||
452 | + /* write new table (align to cluster) */ | ||
453 | + new_l1_table_offset = alloc_clusters(bs, new_l1_size2); | ||
454 | + | ||
455 | + for(i = 0; i < s->l1_size; i++) | ||
456 | + new_l1_table[i] = cpu_to_be64(new_l1_table[i]); | ||
457 | + ret = bdrv_pwrite(s->hd, new_l1_table_offset, new_l1_table, new_l1_size2); | ||
458 | + if (ret != new_l1_size2) | ||
459 | + goto fail; | ||
460 | + for(i = 0; i < s->l1_size; i++) | ||
461 | + new_l1_table[i] = be64_to_cpu(new_l1_table[i]); | ||
462 | + | ||
463 | + /* set new table */ | ||
464 | + data64 = cpu_to_be64(new_l1_table_offset); | ||
465 | + if (bdrv_pwrite(s->hd, offsetof(QCowHeader, l1_table_offset), | ||
466 | + &data64, sizeof(data64)) != sizeof(data64)) | ||
467 | + goto fail; | ||
468 | + data32 = cpu_to_be32(new_l1_size); | ||
469 | + if (bdrv_pwrite(s->hd, offsetof(QCowHeader, l1_size), | ||
470 | + &data32, sizeof(data32)) != sizeof(data32)) | ||
471 | + goto fail; | ||
472 | + qemu_free(s->l1_table); | ||
473 | + free_clusters(bs, s->l1_table_offset, s->l1_size * sizeof(uint64_t)); | ||
474 | + s->l1_table_offset = new_l1_table_offset; | ||
475 | + s->l1_table = new_l1_table; | ||
476 | + s->l1_size = new_l1_size; | ||
477 | + return 0; | ||
478 | + fail: | ||
479 | + qemu_free(s->l1_table); | ||
480 | + return -EIO; | ||
481 | +} | ||
482 | + | ||
483 | +/* 'allocate' is: | ||
484 | + * | ||
485 | + * 0 not to allocate. | ||
486 | + * | ||
487 | + * 1 to allocate a normal cluster (for sector indexes 'n_start' to | ||
488 | + * 'n_end') | ||
489 | + * | ||
490 | + * 2 to allocate a compressed cluster of size | ||
491 | + * 'compressed_size'. 'compressed_size' must be > 0 and < | ||
492 | + * cluster_size | ||
493 | + * | ||
494 | + * return 0 if not allocated. | ||
495 | + */ | ||
496 | +static uint64_t get_cluster_offset(BlockDriverState *bs, | ||
497 | + uint64_t offset, int allocate, | ||
498 | + int compressed_size, | ||
499 | + int n_start, int n_end) | ||
500 | +{ | ||
501 | + BDRVQcowState *s = bs->opaque; | ||
502 | + int min_index, i, j, l1_index, l2_index, ret; | ||
503 | + uint64_t l2_offset, *l2_table, cluster_offset, tmp, old_l2_offset; | ||
504 | + | ||
505 | + l1_index = offset >> (s->l2_bits + s->cluster_bits); | ||
506 | + if (l1_index >= s->l1_size) { | ||
507 | + /* outside l1 table is allowed: we grow the table if needed */ | ||
508 | + if (!allocate) | ||
509 | + return 0; | ||
510 | + if (grow_l1_table(bs, l1_index + 1) < 0) | ||
511 | + return 0; | ||
512 | + } | ||
513 | + l2_offset = s->l1_table[l1_index]; | ||
514 | + if (!l2_offset) { | ||
515 | + if (!allocate) | ||
516 | + return 0; | ||
517 | + l2_allocate: | ||
518 | + old_l2_offset = l2_offset; | ||
519 | + /* allocate a new l2 entry */ | ||
520 | + l2_offset = alloc_clusters(bs, s->l2_size * sizeof(uint64_t)); | ||
521 | + /* update the L1 entry */ | ||
522 | + s->l1_table[l1_index] = l2_offset | QCOW_OFLAG_COPIED; | ||
523 | + tmp = cpu_to_be64(l2_offset | QCOW_OFLAG_COPIED); | ||
524 | + if (bdrv_pwrite(s->hd, s->l1_table_offset + l1_index * sizeof(tmp), | ||
525 | + &tmp, sizeof(tmp)) != sizeof(tmp)) | ||
526 | + return 0; | ||
527 | + min_index = l2_cache_new_entry(bs); | ||
528 | + l2_table = s->l2_cache + (min_index << s->l2_bits); | ||
529 | + | ||
530 | + if (old_l2_offset == 0) { | ||
531 | + memset(l2_table, 0, s->l2_size * sizeof(uint64_t)); | ||
532 | + } else { | ||
533 | + if (bdrv_pread(s->hd, old_l2_offset, | ||
534 | + l2_table, s->l2_size * sizeof(uint64_t)) != | ||
535 | + s->l2_size * sizeof(uint64_t)) | ||
536 | + return 0; | ||
537 | + } | ||
538 | + if (bdrv_pwrite(s->hd, l2_offset, | ||
539 | + l2_table, s->l2_size * sizeof(uint64_t)) != | ||
540 | + s->l2_size * sizeof(uint64_t)) | ||
541 | + return 0; | ||
542 | + } else { | ||
543 | + if (!(l2_offset & QCOW_OFLAG_COPIED)) { | ||
544 | + if (allocate) { | ||
545 | + free_clusters(bs, l2_offset, s->l2_size * sizeof(uint64_t)); | ||
546 | + goto l2_allocate; | ||
547 | + } | ||
548 | + } else { | ||
549 | + l2_offset &= ~QCOW_OFLAG_COPIED; | ||
550 | + } | ||
551 | + for(i = 0; i < L2_CACHE_SIZE; i++) { | ||
552 | + if (l2_offset == s->l2_cache_offsets[i]) { | ||
553 | + /* increment the hit count */ | ||
554 | + if (++s->l2_cache_counts[i] == 0xffffffff) { | ||
555 | + for(j = 0; j < L2_CACHE_SIZE; j++) { | ||
556 | + s->l2_cache_counts[j] >>= 1; | ||
557 | + } | ||
558 | + } | ||
559 | + l2_table = s->l2_cache + (i << s->l2_bits); | ||
560 | + goto found; | ||
561 | + } | ||
562 | + } | ||
563 | + /* not found: load a new entry in the least used one */ | ||
564 | + min_index = l2_cache_new_entry(bs); | ||
565 | + l2_table = s->l2_cache + (min_index << s->l2_bits); | ||
566 | + if (bdrv_pread(s->hd, l2_offset, l2_table, s->l2_size * sizeof(uint64_t)) != | ||
567 | + s->l2_size * sizeof(uint64_t)) | ||
568 | + return 0; | ||
569 | + } | ||
570 | + s->l2_cache_offsets[min_index] = l2_offset; | ||
571 | + s->l2_cache_counts[min_index] = 1; | ||
572 | + found: | ||
573 | + l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1); | ||
574 | + cluster_offset = be64_to_cpu(l2_table[l2_index]); | ||
575 | + if (!cluster_offset) { | ||
576 | + if (!allocate) | ||
577 | + return cluster_offset; | ||
578 | + } else if (!(cluster_offset & QCOW_OFLAG_COPIED)) { | ||
579 | + if (!allocate) | ||
580 | + return cluster_offset; | ||
581 | + /* free the cluster */ | ||
582 | + if (cluster_offset & QCOW_OFLAG_COMPRESSED) { | ||
583 | + int nb_csectors; | ||
584 | + nb_csectors = ((cluster_offset >> s->csize_shift) & | ||
585 | + s->csize_mask) + 1; | ||
586 | + free_clusters(bs, (cluster_offset & s->cluster_offset_mask) & ~511, | ||
587 | + nb_csectors * 512); | ||
588 | + } else { | ||
589 | + free_clusters(bs, cluster_offset, s->cluster_size); | ||
590 | + } | ||
591 | + } else { | ||
592 | + cluster_offset &= ~QCOW_OFLAG_COPIED; | ||
593 | + return cluster_offset; | ||
594 | + } | ||
595 | + if (allocate == 1) { | ||
596 | + /* allocate a new cluster */ | ||
597 | + cluster_offset = alloc_clusters(bs, s->cluster_size); | ||
598 | + | ||
599 | + /* we must initialize the cluster content which won't be | ||
600 | + written */ | ||
601 | + if ((n_end - n_start) < s->cluster_sectors) { | ||
602 | + uint64_t start_sect; | ||
603 | + | ||
604 | + start_sect = (offset & ~(s->cluster_size - 1)) >> 9; | ||
605 | + ret = copy_sectors(bs, start_sect, | ||
606 | + cluster_offset, 0, n_start); | ||
607 | + if (ret < 0) | ||
608 | + return 0; | ||
609 | + ret = copy_sectors(bs, start_sect, | ||
610 | + cluster_offset, n_end, s->cluster_sectors); | ||
611 | + if (ret < 0) | ||
612 | + return 0; | ||
613 | + } | ||
614 | + tmp = cpu_to_be64(cluster_offset | QCOW_OFLAG_COPIED); | ||
615 | + } else { | ||
616 | + int nb_csectors; | ||
617 | + cluster_offset = alloc_bytes(bs, compressed_size); | ||
618 | + nb_csectors = ((cluster_offset + compressed_size - 1) >> 9) - | ||
619 | + (cluster_offset >> 9); | ||
620 | + cluster_offset |= QCOW_OFLAG_COMPRESSED | | ||
621 | + ((uint64_t)nb_csectors << s->csize_shift); | ||
622 | + /* compressed clusters never have the copied flag */ | ||
623 | + tmp = cpu_to_be64(cluster_offset); | ||
624 | + } | ||
625 | + /* update L2 table */ | ||
626 | + l2_table[l2_index] = tmp; | ||
627 | + if (bdrv_pwrite(s->hd, | ||
628 | + l2_offset + l2_index * sizeof(tmp), &tmp, sizeof(tmp)) != sizeof(tmp)) | ||
629 | + return 0; | ||
630 | + return cluster_offset; | ||
631 | +} | ||
632 | + | ||
633 | +static int qcow_is_allocated(BlockDriverState *bs, int64_t sector_num, | ||
634 | + int nb_sectors, int *pnum) | ||
635 | +{ | ||
636 | + BDRVQcowState *s = bs->opaque; | ||
637 | + int index_in_cluster, n; | ||
638 | + uint64_t cluster_offset; | ||
639 | + | ||
640 | + cluster_offset = get_cluster_offset(bs, sector_num << 9, 0, 0, 0, 0); | ||
641 | + index_in_cluster = sector_num & (s->cluster_sectors - 1); | ||
642 | + n = s->cluster_sectors - index_in_cluster; | ||
643 | + if (n > nb_sectors) | ||
644 | + n = nb_sectors; | ||
645 | + *pnum = n; | ||
646 | + return (cluster_offset != 0); | ||
647 | +} | ||
648 | + | ||
649 | +static int decompress_buffer(uint8_t *out_buf, int out_buf_size, | ||
650 | + const uint8_t *buf, int buf_size) | ||
651 | +{ | ||
652 | + z_stream strm1, *strm = &strm1; | ||
653 | + int ret, out_len; | ||
654 | + | ||
655 | + memset(strm, 0, sizeof(*strm)); | ||
656 | + | ||
657 | + strm->next_in = (uint8_t *)buf; | ||
658 | + strm->avail_in = buf_size; | ||
659 | + strm->next_out = out_buf; | ||
660 | + strm->avail_out = out_buf_size; | ||
661 | + | ||
662 | + ret = inflateInit2(strm, -12); | ||
663 | + if (ret != Z_OK) | ||
664 | + return -1; | ||
665 | + ret = inflate(strm, Z_FINISH); | ||
666 | + out_len = strm->next_out - out_buf; | ||
667 | + if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) || | ||
668 | + out_len != out_buf_size) { | ||
669 | + inflateEnd(strm); | ||
670 | + return -1; | ||
671 | + } | ||
672 | + inflateEnd(strm); | ||
673 | + return 0; | ||
674 | +} | ||
675 | + | ||
676 | +static int decompress_cluster(BDRVQcowState *s, uint64_t cluster_offset) | ||
677 | +{ | ||
678 | + int ret, csize, nb_csectors, sector_offset; | ||
679 | + uint64_t coffset; | ||
680 | + | ||
681 | + coffset = cluster_offset & s->cluster_offset_mask; | ||
682 | + if (s->cluster_cache_offset != coffset) { | ||
683 | + nb_csectors = ((cluster_offset >> s->csize_shift) & s->csize_mask) + 1; | ||
684 | + sector_offset = coffset & 511; | ||
685 | + csize = nb_csectors * 512 - sector_offset; | ||
686 | + ret = bdrv_read(s->hd, coffset >> 9, s->cluster_data, nb_csectors); | ||
687 | + if (ret < 0) { | ||
688 | + return -1; | ||
689 | + } | ||
690 | + if (decompress_buffer(s->cluster_cache, s->cluster_size, | ||
691 | + s->cluster_data + sector_offset, csize) < 0) { | ||
692 | + return -1; | ||
693 | + } | ||
694 | + s->cluster_cache_offset = coffset; | ||
695 | + } | ||
696 | + return 0; | ||
697 | +} | ||
698 | + | ||
699 | +static int qcow_read(BlockDriverState *bs, int64_t sector_num, | ||
700 | + uint8_t *buf, int nb_sectors) | ||
701 | +{ | ||
702 | + BDRVQcowState *s = bs->opaque; | ||
703 | + int ret, index_in_cluster, n; | ||
704 | + uint64_t cluster_offset; | ||
705 | + | ||
706 | + while (nb_sectors > 0) { | ||
707 | + cluster_offset = get_cluster_offset(bs, sector_num << 9, 0, 0, 0, 0); | ||
708 | + index_in_cluster = sector_num & (s->cluster_sectors - 1); | ||
709 | + n = s->cluster_sectors - index_in_cluster; | ||
710 | + if (n > nb_sectors) | ||
711 | + n = nb_sectors; | ||
712 | + if (!cluster_offset) { | ||
713 | + if (bs->backing_hd) { | ||
714 | + /* read from the base image */ | ||
715 | + ret = bdrv_read(bs->backing_hd, sector_num, buf, n); | ||
716 | + if (ret < 0) | ||
717 | + return -1; | ||
718 | + } else { | ||
719 | + memset(buf, 0, 512 * n); | ||
720 | + } | ||
721 | + } else if (cluster_offset & QCOW_OFLAG_COMPRESSED) { | ||
722 | + if (decompress_cluster(s, cluster_offset) < 0) | ||
723 | + return -1; | ||
724 | + memcpy(buf, s->cluster_cache + index_in_cluster * 512, 512 * n); | ||
725 | + } else { | ||
726 | + ret = bdrv_pread(s->hd, cluster_offset + index_in_cluster * 512, buf, n * 512); | ||
727 | + if (ret != n * 512) | ||
728 | + return -1; | ||
729 | + if (s->crypt_method) { | ||
730 | + encrypt_sectors(s, sector_num, buf, buf, n, 0, | ||
731 | + &s->aes_decrypt_key); | ||
732 | + } | ||
733 | + } | ||
734 | + nb_sectors -= n; | ||
735 | + sector_num += n; | ||
736 | + buf += n * 512; | ||
737 | + } | ||
738 | + return 0; | ||
739 | +} | ||
740 | + | ||
741 | +static int qcow_write(BlockDriverState *bs, int64_t sector_num, | ||
742 | + const uint8_t *buf, int nb_sectors) | ||
743 | +{ | ||
744 | + BDRVQcowState *s = bs->opaque; | ||
745 | + int ret, index_in_cluster, n; | ||
746 | + uint64_t cluster_offset; | ||
747 | + | ||
748 | + while (nb_sectors > 0) { | ||
749 | + index_in_cluster = sector_num & (s->cluster_sectors - 1); | ||
750 | + n = s->cluster_sectors - index_in_cluster; | ||
751 | + if (n > nb_sectors) | ||
752 | + n = nb_sectors; | ||
753 | + cluster_offset = get_cluster_offset(bs, sector_num << 9, 1, 0, | ||
754 | + index_in_cluster, | ||
755 | + index_in_cluster + n); | ||
756 | + if (!cluster_offset) | ||
757 | + return -1; | ||
758 | + if (s->crypt_method) { | ||
759 | + encrypt_sectors(s, sector_num, s->cluster_data, buf, n, 1, | ||
760 | + &s->aes_encrypt_key); | ||
761 | + ret = bdrv_pwrite(s->hd, cluster_offset + index_in_cluster * 512, | ||
762 | + s->cluster_data, n * 512); | ||
763 | + } else { | ||
764 | + ret = bdrv_pwrite(s->hd, cluster_offset + index_in_cluster * 512, buf, n * 512); | ||
765 | + } | ||
766 | + if (ret != n * 512) | ||
767 | + return -1; | ||
768 | + nb_sectors -= n; | ||
769 | + sector_num += n; | ||
770 | + buf += n * 512; | ||
771 | + } | ||
772 | + s->cluster_cache_offset = -1; /* disable compressed cache */ | ||
773 | + return 0; | ||
774 | +} | ||
775 | + | ||
776 | +typedef struct { | ||
777 | + int64_t sector_num; | ||
778 | + uint8_t *buf; | ||
779 | + int nb_sectors; | ||
780 | + int n; | ||
781 | + uint64_t cluster_offset; | ||
782 | + uint8_t *cluster_data; | ||
783 | + BlockDriverAIOCB *hd_aiocb; | ||
784 | + BlockDriverAIOCB *backing_hd_aiocb; | ||
785 | +} QCowAIOCB; | ||
786 | + | ||
787 | +static void qcow_aio_delete(BlockDriverAIOCB *acb); | ||
788 | + | ||
789 | +static int qcow_aio_new(BlockDriverAIOCB *acb) | ||
790 | +{ | ||
791 | + BlockDriverState *bs = acb->bs; | ||
792 | + BDRVQcowState *s = bs->opaque; | ||
793 | + QCowAIOCB *acb1; | ||
794 | + acb1 = qemu_mallocz(sizeof(QCowAIOCB)); | ||
795 | + if (!acb1) | ||
796 | + return -1; | ||
797 | + acb->opaque = acb1; | ||
798 | + acb1->hd_aiocb = bdrv_aio_new(s->hd); | ||
799 | + if (!acb1->hd_aiocb) | ||
800 | + goto fail; | ||
801 | + if (bs->backing_hd) { | ||
802 | + acb1->backing_hd_aiocb = bdrv_aio_new(bs->backing_hd); | ||
803 | + if (!acb1->backing_hd_aiocb) | ||
804 | + goto fail; | ||
805 | + } | ||
806 | + return 0; | ||
807 | + fail: | ||
808 | + qcow_aio_delete(acb); | ||
809 | + return -1; | ||
810 | +} | ||
811 | + | ||
812 | +static void qcow_aio_read_cb(void *opaque, int ret) | ||
813 | +{ | ||
814 | + BlockDriverAIOCB *acb = opaque; | ||
815 | + BlockDriverState *bs = acb->bs; | ||
816 | + BDRVQcowState *s = bs->opaque; | ||
817 | + QCowAIOCB *acb1 = acb->opaque; | ||
818 | + int index_in_cluster; | ||
819 | + | ||
820 | + if (ret < 0) { | ||
821 | + fail: | ||
822 | + acb->cb(acb->cb_opaque, ret); | ||
823 | + return; | ||
824 | + } | ||
825 | + | ||
826 | + redo: | ||
827 | + /* post process the read buffer */ | ||
828 | + if (!acb1->cluster_offset) { | ||
829 | + /* nothing to do */ | ||
830 | + } else if (acb1->cluster_offset & QCOW_OFLAG_COMPRESSED) { | ||
831 | + /* nothing to do */ | ||
832 | + } else { | ||
833 | + if (s->crypt_method) { | ||
834 | + encrypt_sectors(s, acb1->sector_num, acb1->buf, acb1->buf, | ||
835 | + acb1->n, 0, | ||
836 | + &s->aes_decrypt_key); | ||
837 | + } | ||
838 | + } | ||
839 | + | ||
840 | + acb1->nb_sectors -= acb1->n; | ||
841 | + acb1->sector_num += acb1->n; | ||
842 | + acb1->buf += acb1->n * 512; | ||
843 | + | ||
844 | + if (acb1->nb_sectors == 0) { | ||
845 | + /* request completed */ | ||
846 | + acb->cb(acb->cb_opaque, 0); | ||
847 | + return; | ||
848 | + } | ||
849 | + | ||
850 | + /* prepare next AIO request */ | ||
851 | + acb1->cluster_offset = get_cluster_offset(bs, | ||
852 | + acb1->sector_num << 9, | ||
853 | + 0, 0, 0, 0); | ||
854 | + index_in_cluster = acb1->sector_num & (s->cluster_sectors - 1); | ||
855 | + acb1->n = s->cluster_sectors - index_in_cluster; | ||
856 | + if (acb1->n > acb1->nb_sectors) | ||
857 | + acb1->n = acb1->nb_sectors; | ||
858 | + | ||
859 | + if (!acb1->cluster_offset) { | ||
860 | + if (bs->backing_hd) { | ||
861 | + /* read from the base image */ | ||
862 | + ret = bdrv_aio_read(acb1->backing_hd_aiocb, acb1->sector_num, | ||
863 | + acb1->buf, acb1->n, qcow_aio_read_cb, acb); | ||
864 | + if (ret < 0) | ||
865 | + goto fail; | ||
866 | + } else { | ||
867 | + /* Note: in this case, no need to wait */ | ||
868 | + memset(acb1->buf, 0, 512 * acb1->n); | ||
869 | + goto redo; | ||
870 | + } | ||
871 | + } else if (acb1->cluster_offset & QCOW_OFLAG_COMPRESSED) { | ||
872 | + /* add AIO support for compressed blocks ? */ | ||
873 | + if (decompress_cluster(s, acb1->cluster_offset) < 0) | ||
874 | + goto fail; | ||
875 | + memcpy(acb1->buf, | ||
876 | + s->cluster_cache + index_in_cluster * 512, 512 * acb1->n); | ||
877 | + goto redo; | ||
878 | + } else { | ||
879 | + if ((acb1->cluster_offset & 511) != 0) { | ||
880 | + ret = -EIO; | ||
881 | + goto fail; | ||
882 | + } | ||
883 | + ret = bdrv_aio_read(acb1->hd_aiocb, | ||
884 | + (acb1->cluster_offset >> 9) + index_in_cluster, | ||
885 | + acb1->buf, acb1->n, qcow_aio_read_cb, acb); | ||
886 | + if (ret < 0) | ||
887 | + goto fail; | ||
888 | + } | ||
889 | +} | ||
890 | + | ||
891 | +static int qcow_aio_read(BlockDriverAIOCB *acb, int64_t sector_num, | ||
892 | + uint8_t *buf, int nb_sectors) | ||
893 | +{ | ||
894 | + QCowAIOCB *acb1 = acb->opaque; | ||
895 | + | ||
896 | + acb1->sector_num = sector_num; | ||
897 | + acb1->buf = buf; | ||
898 | + acb1->nb_sectors = nb_sectors; | ||
899 | + acb1->n = 0; | ||
900 | + acb1->cluster_offset = 0; | ||
901 | + | ||
902 | + qcow_aio_read_cb(acb, 0); | ||
903 | + return 0; | ||
904 | +} | ||
905 | + | ||
906 | +static void qcow_aio_write_cb(void *opaque, int ret) | ||
907 | +{ | ||
908 | + BlockDriverAIOCB *acb = opaque; | ||
909 | + BlockDriverState *bs = acb->bs; | ||
910 | + BDRVQcowState *s = bs->opaque; | ||
911 | + QCowAIOCB *acb1 = acb->opaque; | ||
912 | + int index_in_cluster; | ||
913 | + uint64_t cluster_offset; | ||
914 | + const uint8_t *src_buf; | ||
915 | + | ||
916 | + if (ret < 0) { | ||
917 | + fail: | ||
918 | + acb->cb(acb->cb_opaque, ret); | ||
919 | + return; | ||
920 | + } | ||
921 | + | ||
922 | + acb1->nb_sectors -= acb1->n; | ||
923 | + acb1->sector_num += acb1->n; | ||
924 | + acb1->buf += acb1->n * 512; | ||
925 | + | ||
926 | + if (acb1->nb_sectors == 0) { | ||
927 | + /* request completed */ | ||
928 | + acb->cb(acb->cb_opaque, 0); | ||
929 | + return; | ||
930 | + } | ||
931 | + | ||
932 | + index_in_cluster = acb1->sector_num & (s->cluster_sectors - 1); | ||
933 | + acb1->n = s->cluster_sectors - index_in_cluster; | ||
934 | + if (acb1->n > acb1->nb_sectors) | ||
935 | + acb1->n = acb1->nb_sectors; | ||
936 | + cluster_offset = get_cluster_offset(bs, acb1->sector_num << 9, 1, 0, | ||
937 | + index_in_cluster, | ||
938 | + index_in_cluster + acb1->n); | ||
939 | + if (!cluster_offset || (cluster_offset & 511) != 0) { | ||
940 | + ret = -EIO; | ||
941 | + goto fail; | ||
942 | + } | ||
943 | + if (s->crypt_method) { | ||
944 | + if (!acb1->cluster_data) { | ||
945 | + acb1->cluster_data = qemu_mallocz(s->cluster_size); | ||
946 | + if (!acb1->cluster_data) { | ||
947 | + ret = -ENOMEM; | ||
948 | + goto fail; | ||
949 | + } | ||
950 | + } | ||
951 | + encrypt_sectors(s, acb1->sector_num, acb1->cluster_data, acb1->buf, | ||
952 | + acb1->n, 1, &s->aes_encrypt_key); | ||
953 | + src_buf = acb1->cluster_data; | ||
954 | + } else { | ||
955 | + src_buf = acb1->buf; | ||
956 | + } | ||
957 | + ret = bdrv_aio_write(acb1->hd_aiocb, | ||
958 | + (cluster_offset >> 9) + index_in_cluster, | ||
959 | + src_buf, acb1->n, | ||
960 | + qcow_aio_write_cb, acb); | ||
961 | + if (ret < 0) | ||
962 | + goto fail; | ||
963 | +} | ||
964 | + | ||
965 | +static int qcow_aio_write(BlockDriverAIOCB *acb, int64_t sector_num, | ||
966 | + const uint8_t *buf, int nb_sectors) | ||
967 | +{ | ||
968 | + QCowAIOCB *acb1 = acb->opaque; | ||
969 | + BlockDriverState *bs = acb->bs; | ||
970 | + BDRVQcowState *s = bs->opaque; | ||
971 | + | ||
972 | + s->cluster_cache_offset = -1; /* disable compressed cache */ | ||
973 | + | ||
974 | + acb1->sector_num = sector_num; | ||
975 | + acb1->buf = (uint8_t *)buf; | ||
976 | + acb1->nb_sectors = nb_sectors; | ||
977 | + acb1->n = 0; | ||
978 | + | ||
979 | + qcow_aio_write_cb(acb, 0); | ||
980 | + return 0; | ||
981 | +} | ||
982 | + | ||
983 | +static void qcow_aio_cancel(BlockDriverAIOCB *acb) | ||
984 | +{ | ||
985 | + QCowAIOCB *acb1 = acb->opaque; | ||
986 | + if (acb1->hd_aiocb) | ||
987 | + bdrv_aio_cancel(acb1->hd_aiocb); | ||
988 | + if (acb1->backing_hd_aiocb) | ||
989 | + bdrv_aio_cancel(acb1->backing_hd_aiocb); | ||
990 | +} | ||
991 | + | ||
992 | +static void qcow_aio_delete(BlockDriverAIOCB *acb) | ||
993 | +{ | ||
994 | + QCowAIOCB *acb1 = acb->opaque; | ||
995 | + if (acb1->hd_aiocb) | ||
996 | + bdrv_aio_delete(acb1->hd_aiocb); | ||
997 | + if (acb1->backing_hd_aiocb) | ||
998 | + bdrv_aio_delete(acb1->backing_hd_aiocb); | ||
999 | + qemu_free(acb1->cluster_data); | ||
1000 | + qemu_free(acb1); | ||
1001 | +} | ||
1002 | + | ||
1003 | +static void qcow_close(BlockDriverState *bs) | ||
1004 | +{ | ||
1005 | + BDRVQcowState *s = bs->opaque; | ||
1006 | + qemu_free(s->l1_table); | ||
1007 | + qemu_free(s->l2_cache); | ||
1008 | + qemu_free(s->cluster_cache); | ||
1009 | + qemu_free(s->cluster_data); | ||
1010 | + refcount_close(bs); | ||
1011 | + bdrv_delete(s->hd); | ||
1012 | +} | ||
1013 | + | ||
1014 | +/* XXX: use std qcow open function ? */ | ||
1015 | +typedef struct QCowCreateState { | ||
1016 | + int cluster_size; | ||
1017 | + int cluster_bits; | ||
1018 | + uint16_t *refcount_block; | ||
1019 | + uint64_t *refcount_table; | ||
1020 | + int64_t l1_table_offset; | ||
1021 | + int64_t refcount_table_offset; | ||
1022 | + int64_t refcount_block_offset; | ||
1023 | +} QCowCreateState; | ||
1024 | + | ||
1025 | +static void create_refcount_update(QCowCreateState *s, | ||
1026 | + int64_t offset, int64_t size) | ||
1027 | +{ | ||
1028 | + int refcount; | ||
1029 | + int64_t start, last, cluster_offset; | ||
1030 | + uint16_t *p; | ||
1031 | + | ||
1032 | + start = offset & ~(s->cluster_size - 1); | ||
1033 | + last = (offset + size - 1) & ~(s->cluster_size - 1); | ||
1034 | + for(cluster_offset = start; cluster_offset <= last; | ||
1035 | + cluster_offset += s->cluster_size) { | ||
1036 | + p = &s->refcount_block[cluster_offset >> s->cluster_bits]; | ||
1037 | + refcount = be16_to_cpu(*p); | ||
1038 | + refcount++; | ||
1039 | + *p = cpu_to_be16(refcount); | ||
1040 | + } | ||
1041 | +} | ||
1042 | + | ||
1043 | +static int qcow_create(const char *filename, int64_t total_size, | ||
1044 | + const char *backing_file, int flags) | ||
1045 | +{ | ||
1046 | + int fd, header_size, backing_filename_len, l1_size, i, shift, l2_bits; | ||
1047 | + QCowHeader header; | ||
1048 | + uint64_t tmp, offset; | ||
1049 | + QCowCreateState s1, *s = &s1; | ||
1050 | + | ||
1051 | + memset(s, 0, sizeof(*s)); | ||
1052 | + | ||
1053 | + fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0644); | ||
1054 | + if (fd < 0) | ||
1055 | + return -1; | ||
1056 | + memset(&header, 0, sizeof(header)); | ||
1057 | + header.magic = cpu_to_be32(QCOW_MAGIC); | ||
1058 | + header.version = cpu_to_be32(QCOW_VERSION); | ||
1059 | + header.size = cpu_to_be64(total_size * 512); | ||
1060 | + header_size = sizeof(header); | ||
1061 | + backing_filename_len = 0; | ||
1062 | + if (backing_file) { | ||
1063 | + header.backing_file_offset = cpu_to_be64(header_size); | ||
1064 | + backing_filename_len = strlen(backing_file); | ||
1065 | + header.backing_file_size = cpu_to_be32(backing_filename_len); | ||
1066 | + header_size += backing_filename_len; | ||
1067 | + } | ||
1068 | + s->cluster_bits = 12; /* 4 KB clusters */ | ||
1069 | + s->cluster_size = 1 << s->cluster_bits; | ||
1070 | + header.cluster_bits = cpu_to_be32(s->cluster_bits); | ||
1071 | + header_size = (header_size + 7) & ~7; | ||
1072 | + if (flags) { | ||
1073 | + header.crypt_method = cpu_to_be32(QCOW_CRYPT_AES); | ||
1074 | + } else { | ||
1075 | + header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE); | ||
1076 | + } | ||
1077 | + l2_bits = s->cluster_bits - 3; | ||
1078 | + shift = s->cluster_bits + l2_bits; | ||
1079 | + l1_size = (((total_size * 512) + (1LL << shift) - 1) >> shift); | ||
1080 | + offset = align_offset(header_size, s->cluster_size); | ||
1081 | + s->l1_table_offset = offset; | ||
1082 | + header.l1_table_offset = cpu_to_be64(s->l1_table_offset); | ||
1083 | + header.l1_size = cpu_to_be32(l1_size); | ||
1084 | + offset += align_offset(l1_size, s->cluster_size); | ||
1085 | + | ||
1086 | + s->refcount_table = qemu_mallocz(s->cluster_size); | ||
1087 | + if (!s->refcount_table) | ||
1088 | + goto fail; | ||
1089 | + s->refcount_block = qemu_mallocz(s->cluster_size); | ||
1090 | + if (!s->refcount_block) | ||
1091 | + goto fail; | ||
1092 | + | ||
1093 | + s->refcount_table_offset = offset; | ||
1094 | + header.refcount_table_offset = cpu_to_be64(offset); | ||
1095 | + header.refcount_table_clusters = cpu_to_be32(1); | ||
1096 | + offset += s->cluster_size; | ||
1097 | + | ||
1098 | + s->refcount_table[0] = cpu_to_be64(offset); | ||
1099 | + s->refcount_block_offset = offset; | ||
1100 | + offset += s->cluster_size; | ||
1101 | + | ||
1102 | + /* update refcounts */ | ||
1103 | + create_refcount_update(s, 0, header_size); | ||
1104 | + create_refcount_update(s, s->l1_table_offset, l1_size); | ||
1105 | + create_refcount_update(s, s->refcount_table_offset, s->cluster_size); | ||
1106 | + create_refcount_update(s, s->refcount_block_offset, s->cluster_size); | ||
1107 | + | ||
1108 | + /* write all the data */ | ||
1109 | + write(fd, &header, sizeof(header)); | ||
1110 | + if (backing_file) { | ||
1111 | + write(fd, backing_file, backing_filename_len); | ||
1112 | + } | ||
1113 | + lseek(fd, s->l1_table_offset, SEEK_SET); | ||
1114 | + tmp = 0; | ||
1115 | + for(i = 0;i < l1_size; i++) { | ||
1116 | + write(fd, &tmp, sizeof(tmp)); | ||
1117 | + } | ||
1118 | + lseek(fd, s->refcount_table_offset, SEEK_SET); | ||
1119 | + write(fd, s->refcount_table, s->cluster_size); | ||
1120 | + | ||
1121 | + lseek(fd, s->refcount_block_offset, SEEK_SET); | ||
1122 | + write(fd, s->refcount_block, s->cluster_size); | ||
1123 | + | ||
1124 | + qemu_free(s->refcount_table); | ||
1125 | + qemu_free(s->refcount_block); | ||
1126 | + close(fd); | ||
1127 | + return 0; | ||
1128 | + fail: | ||
1129 | + qemu_free(s->refcount_table); | ||
1130 | + qemu_free(s->refcount_block); | ||
1131 | + close(fd); | ||
1132 | + return -ENOMEM; | ||
1133 | +} | ||
1134 | + | ||
1135 | +static int qcow_make_empty(BlockDriverState *bs) | ||
1136 | +{ | ||
1137 | +#if 0 | ||
1138 | + /* XXX: not correct */ | ||
1139 | + BDRVQcowState *s = bs->opaque; | ||
1140 | + uint32_t l1_length = s->l1_size * sizeof(uint64_t); | ||
1141 | + int ret; | ||
1142 | + | ||
1143 | + memset(s->l1_table, 0, l1_length); | ||
1144 | + if (bdrv_pwrite(s->hd, s->l1_table_offset, s->l1_table, l1_length) < 0) | ||
1145 | + return -1; | ||
1146 | + ret = bdrv_truncate(s->hd, s->l1_table_offset + l1_length); | ||
1147 | + if (ret < 0) | ||
1148 | + return ret; | ||
1149 | + | ||
1150 | + l2_cache_reset(bs); | ||
1151 | +#endif | ||
1152 | + return 0; | ||
1153 | +} | ||
1154 | + | ||
1155 | +/* XXX: put compressed sectors first, then all the cluster aligned | ||
1156 | + tables to avoid losing bytes in alignment */ | ||
1157 | +static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num, | ||
1158 | + const uint8_t *buf, int nb_sectors) | ||
1159 | +{ | ||
1160 | + BDRVQcowState *s = bs->opaque; | ||
1161 | + z_stream strm; | ||
1162 | + int ret, out_len; | ||
1163 | + uint8_t *out_buf; | ||
1164 | + uint64_t cluster_offset; | ||
1165 | + | ||
1166 | + if (nb_sectors == 0) { | ||
1167 | + /* align end of file to a sector boundary to ease reading with | ||
1168 | + sector based I/Os */ | ||
1169 | + cluster_offset = bdrv_getlength(s->hd); | ||
1170 | + cluster_offset = (cluster_offset + 511) & ~511; | ||
1171 | + bdrv_truncate(s->hd, cluster_offset); | ||
1172 | + return 0; | ||
1173 | + } | ||
1174 | + | ||
1175 | + if (nb_sectors != s->cluster_sectors) | ||
1176 | + return -EINVAL; | ||
1177 | + | ||
1178 | + out_buf = qemu_malloc(s->cluster_size + (s->cluster_size / 1000) + 128); | ||
1179 | + if (!out_buf) | ||
1180 | + return -ENOMEM; | ||
1181 | + | ||
1182 | + /* best compression, small window, no zlib header */ | ||
1183 | + memset(&strm, 0, sizeof(strm)); | ||
1184 | + ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION, | ||
1185 | + Z_DEFLATED, -12, | ||
1186 | + 9, Z_DEFAULT_STRATEGY); | ||
1187 | + if (ret != 0) { | ||
1188 | + qemu_free(out_buf); | ||
1189 | + return -1; | ||
1190 | + } | ||
1191 | + | ||
1192 | + strm.avail_in = s->cluster_size; | ||
1193 | + strm.next_in = (uint8_t *)buf; | ||
1194 | + strm.avail_out = s->cluster_size; | ||
1195 | + strm.next_out = out_buf; | ||
1196 | + | ||
1197 | + ret = deflate(&strm, Z_FINISH); | ||
1198 | + if (ret != Z_STREAM_END && ret != Z_OK) { | ||
1199 | + qemu_free(out_buf); | ||
1200 | + deflateEnd(&strm); | ||
1201 | + return -1; | ||
1202 | + } | ||
1203 | + out_len = strm.next_out - out_buf; | ||
1204 | + | ||
1205 | + deflateEnd(&strm); | ||
1206 | + | ||
1207 | + if (ret != Z_STREAM_END || out_len >= s->cluster_size) { | ||
1208 | + /* could not compress: write normal cluster */ | ||
1209 | + qcow_write(bs, sector_num, buf, s->cluster_sectors); | ||
1210 | + } else { | ||
1211 | + cluster_offset = get_cluster_offset(bs, sector_num << 9, 2, | ||
1212 | + out_len, 0, 0); | ||
1213 | + cluster_offset &= s->cluster_offset_mask; | ||
1214 | + if (bdrv_pwrite(s->hd, cluster_offset, out_buf, out_len) != out_len) { | ||
1215 | + qemu_free(out_buf); | ||
1216 | + return -1; | ||
1217 | + } | ||
1218 | + } | ||
1219 | + | ||
1220 | + qemu_free(out_buf); | ||
1221 | + return 0; | ||
1222 | +} | ||
1223 | + | ||
1224 | +static void qcow_flush(BlockDriverState *bs) | ||
1225 | +{ | ||
1226 | + BDRVQcowState *s = bs->opaque; | ||
1227 | + bdrv_flush(s->hd); | ||
1228 | +} | ||
1229 | + | ||
1230 | +static int qcow_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) | ||
1231 | +{ | ||
1232 | + BDRVQcowState *s = bs->opaque; | ||
1233 | + bdi->cluster_size = s->cluster_size; | ||
1234 | + bdi->vm_state_offset = (int64_t)s->l1_vm_state_index << | ||
1235 | + (s->cluster_bits + s->l2_bits); | ||
1236 | + return 0; | ||
1237 | +} | ||
1238 | + | ||
1239 | +/*********************************************************/ | ||
1240 | +/* snapshot support */ | ||
1241 | + | ||
1242 | +/* update the refcounts of snapshots and the copied flag */ | ||
1243 | +static int update_snapshot_refcount(BlockDriverState *bs, | ||
1244 | + int64_t l1_table_offset, | ||
1245 | + int l1_size, | ||
1246 | + int addend) | ||
1247 | +{ | ||
1248 | + BDRVQcowState *s = bs->opaque; | ||
1249 | + uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2, l1_allocated; | ||
1250 | + int64_t old_offset, old_l2_offset; | ||
1251 | + int l2_size, i, j, l1_modified, l2_modified, nb_csectors, refcount; | ||
1252 | + | ||
1253 | + l2_cache_reset(bs); | ||
1254 | + | ||
1255 | + l2_table = NULL; | ||
1256 | + l1_table = NULL; | ||
1257 | + l1_size2 = l1_size * sizeof(uint64_t); | ||
1258 | + l1_allocated = 0; | ||
1259 | + if (l1_table_offset != s->l1_table_offset) { | ||
1260 | + l1_table = qemu_malloc(l1_size2); | ||
1261 | + if (!l1_table) | ||
1262 | + goto fail; | ||
1263 | + l1_allocated = 1; | ||
1264 | + if (bdrv_pread(s->hd, l1_table_offset, | ||
1265 | + l1_table, l1_size2) != l1_size2) | ||
1266 | + goto fail; | ||
1267 | + for(i = 0;i < l1_size; i++) | ||
1268 | + be64_to_cpus(&l1_table[i]); | ||
1269 | + } else { | ||
1270 | + assert(l1_size == s->l1_size); | ||
1271 | + l1_table = s->l1_table; | ||
1272 | + l1_allocated = 0; | ||
1273 | + } | ||
1274 | + | ||
1275 | + l2_size = s->l2_size * sizeof(uint64_t); | ||
1276 | + l2_table = qemu_malloc(l2_size); | ||
1277 | + if (!l2_table) | ||
1278 | + goto fail; | ||
1279 | + l1_modified = 0; | ||
1280 | + for(i = 0; i < l1_size; i++) { | ||
1281 | + l2_offset = l1_table[i]; | ||
1282 | + if (l2_offset) { | ||
1283 | + old_l2_offset = l2_offset; | ||
1284 | + l2_offset &= ~QCOW_OFLAG_COPIED; | ||
1285 | + l2_modified = 0; | ||
1286 | + if (bdrv_pread(s->hd, l2_offset, l2_table, l2_size) != l2_size) | ||
1287 | + goto fail; | ||
1288 | + for(j = 0; j < s->l2_size; j++) { | ||
1289 | + offset = be64_to_cpu(l2_table[j]); | ||
1290 | + if (offset != 0) { | ||
1291 | + old_offset = offset; | ||
1292 | + offset &= ~QCOW_OFLAG_COPIED; | ||
1293 | + if (offset & QCOW_OFLAG_COMPRESSED) { | ||
1294 | + nb_csectors = ((offset >> s->csize_shift) & | ||
1295 | + s->csize_mask) + 1; | ||
1296 | + if (addend != 0) | ||
1297 | + update_refcount(bs, (offset & s->cluster_offset_mask) & ~511, | ||
1298 | + nb_csectors * 512, addend); | ||
1299 | + /* compressed clusters are never modified */ | ||
1300 | + refcount = 2; | ||
1301 | + } else { | ||
1302 | + if (addend != 0) { | ||
1303 | + refcount = update_cluster_refcount(bs, offset >> s->cluster_bits, addend); | ||
1304 | + } else { | ||
1305 | + refcount = get_refcount(bs, offset >> s->cluster_bits); | ||
1306 | + } | ||
1307 | + } | ||
1308 | + | ||
1309 | + if (refcount == 1) { | ||
1310 | + offset |= QCOW_OFLAG_COPIED; | ||
1311 | + } | ||
1312 | + if (offset != old_offset) { | ||
1313 | + l2_table[j] = cpu_to_be64(offset); | ||
1314 | + l2_modified = 1; | ||
1315 | + } | ||
1316 | + } | ||
1317 | + } | ||
1318 | + if (l2_modified) { | ||
1319 | + if (bdrv_pwrite(s->hd, | ||
1320 | + l2_offset, l2_table, l2_size) != l2_size) | ||
1321 | + goto fail; | ||
1322 | + } | ||
1323 | + | ||
1324 | + if (addend != 0) { | ||
1325 | + refcount = update_cluster_refcount(bs, l2_offset >> s->cluster_bits, addend); | ||
1326 | + } else { | ||
1327 | + refcount = get_refcount(bs, l2_offset >> s->cluster_bits); | ||
1328 | + } | ||
1329 | + if (refcount == 1) { | ||
1330 | + l2_offset |= QCOW_OFLAG_COPIED; | ||
1331 | + } | ||
1332 | + if (l2_offset != old_l2_offset) { | ||
1333 | + l1_table[i] = l2_offset; | ||
1334 | + l1_modified = 1; | ||
1335 | + } | ||
1336 | + } | ||
1337 | + } | ||
1338 | + if (l1_modified) { | ||
1339 | + for(i = 0; i < l1_size; i++) | ||
1340 | + cpu_to_be64s(&l1_table[i]); | ||
1341 | + if (bdrv_pwrite(s->hd, l1_table_offset, l1_table, | ||
1342 | + l1_size2) != l1_size2) | ||
1343 | + goto fail; | ||
1344 | + for(i = 0; i < l1_size; i++) | ||
1345 | + be64_to_cpus(&l1_table[i]); | ||
1346 | + } | ||
1347 | + if (l1_allocated) | ||
1348 | + qemu_free(l1_table); | ||
1349 | + qemu_free(l2_table); | ||
1350 | + return 0; | ||
1351 | + fail: | ||
1352 | + if (l1_allocated) | ||
1353 | + qemu_free(l1_table); | ||
1354 | + qemu_free(l2_table); | ||
1355 | + return -EIO; | ||
1356 | +} | ||
1357 | + | ||
1358 | +static void qcow_free_snapshots(BlockDriverState *bs) | ||
1359 | +{ | ||
1360 | + BDRVQcowState *s = bs->opaque; | ||
1361 | + int i; | ||
1362 | + | ||
1363 | + for(i = 0; i < s->nb_snapshots; i++) { | ||
1364 | + qemu_free(s->snapshots[i].name); | ||
1365 | + qemu_free(s->snapshots[i].id_str); | ||
1366 | + } | ||
1367 | + qemu_free(s->snapshots); | ||
1368 | + s->snapshots = NULL; | ||
1369 | + s->nb_snapshots = 0; | ||
1370 | +} | ||
1371 | + | ||
1372 | +static int qcow_read_snapshots(BlockDriverState *bs) | ||
1373 | +{ | ||
1374 | + BDRVQcowState *s = bs->opaque; | ||
1375 | + QCowSnapshotHeader h; | ||
1376 | + QCowSnapshot *sn; | ||
1377 | + int i, id_str_size, name_size; | ||
1378 | + int64_t offset; | ||
1379 | + uint32_t extra_data_size; | ||
1380 | + | ||
1381 | + offset = s->snapshots_offset; | ||
1382 | + s->snapshots = qemu_mallocz(s->nb_snapshots * sizeof(QCowSnapshot)); | ||
1383 | + if (!s->snapshots) | ||
1384 | + goto fail; | ||
1385 | + for(i = 0; i < s->nb_snapshots; i++) { | ||
1386 | + offset = align_offset(offset, 8); | ||
1387 | + if (bdrv_pread(s->hd, offset, &h, sizeof(h)) != sizeof(h)) | ||
1388 | + goto fail; | ||
1389 | + offset += sizeof(h); | ||
1390 | + sn = s->snapshots + i; | ||
1391 | + sn->l1_table_offset = be64_to_cpu(h.l1_table_offset); | ||
1392 | + sn->l1_size = be32_to_cpu(h.l1_size); | ||
1393 | + sn->vm_state_size = be32_to_cpu(h.vm_state_size); | ||
1394 | + sn->date_sec = be32_to_cpu(h.date_sec); | ||
1395 | + sn->date_nsec = be32_to_cpu(h.date_nsec); | ||
1396 | + sn->vm_clock_nsec = be64_to_cpu(h.vm_clock_nsec); | ||
1397 | + extra_data_size = be32_to_cpu(h.extra_data_size); | ||
1398 | + | ||
1399 | + id_str_size = be16_to_cpu(h.id_str_size); | ||
1400 | + name_size = be16_to_cpu(h.name_size); | ||
1401 | + | ||
1402 | + offset += extra_data_size; | ||
1403 | + | ||
1404 | + sn->id_str = qemu_malloc(id_str_size + 1); | ||
1405 | + if (!sn->id_str) | ||
1406 | + goto fail; | ||
1407 | + if (bdrv_pread(s->hd, offset, sn->id_str, id_str_size) != id_str_size) | ||
1408 | + goto fail; | ||
1409 | + offset += id_str_size; | ||
1410 | + sn->id_str[id_str_size] = '\0'; | ||
1411 | + | ||
1412 | + sn->name = qemu_malloc(name_size + 1); | ||
1413 | + if (!sn->name) | ||
1414 | + goto fail; | ||
1415 | + if (bdrv_pread(s->hd, offset, sn->name, name_size) != name_size) | ||
1416 | + goto fail; | ||
1417 | + offset += name_size; | ||
1418 | + sn->name[name_size] = '\0'; | ||
1419 | + } | ||
1420 | + s->snapshots_size = offset - s->snapshots_offset; | ||
1421 | + return 0; | ||
1422 | + fail: | ||
1423 | + qcow_free_snapshots(bs); | ||
1424 | + return -1; | ||
1425 | +} | ||
1426 | + | ||
1427 | +/* add at the end of the file a new list of snapshots */ | ||
1428 | +static int qcow_write_snapshots(BlockDriverState *bs) | ||
1429 | +{ | ||
1430 | + BDRVQcowState *s = bs->opaque; | ||
1431 | + QCowSnapshot *sn; | ||
1432 | + QCowSnapshotHeader h; | ||
1433 | + int i, name_size, id_str_size, snapshots_size; | ||
1434 | + uint64_t data64; | ||
1435 | + uint32_t data32; | ||
1436 | + int64_t offset, snapshots_offset; | ||
1437 | + | ||
1438 | + /* compute the size of the snapshots */ | ||
1439 | + offset = 0; | ||
1440 | + for(i = 0; i < s->nb_snapshots; i++) { | ||
1441 | + sn = s->snapshots + i; | ||
1442 | + offset = align_offset(offset, 8); | ||
1443 | + offset += sizeof(h); | ||
1444 | + offset += strlen(sn->id_str); | ||
1445 | + offset += strlen(sn->name); | ||
1446 | + } | ||
1447 | + snapshots_size = offset; | ||
1448 | + | ||
1449 | + snapshots_offset = alloc_clusters(bs, snapshots_size); | ||
1450 | + offset = snapshots_offset; | ||
1451 | + | ||
1452 | + for(i = 0; i < s->nb_snapshots; i++) { | ||
1453 | + sn = s->snapshots + i; | ||
1454 | + memset(&h, 0, sizeof(h)); | ||
1455 | + h.l1_table_offset = cpu_to_be64(sn->l1_table_offset); | ||
1456 | + h.l1_size = cpu_to_be32(sn->l1_size); | ||
1457 | + h.vm_state_size = cpu_to_be32(sn->vm_state_size); | ||
1458 | + h.date_sec = cpu_to_be32(sn->date_sec); | ||
1459 | + h.date_nsec = cpu_to_be32(sn->date_nsec); | ||
1460 | + h.vm_clock_nsec = cpu_to_be64(sn->vm_clock_nsec); | ||
1461 | + | ||
1462 | + id_str_size = strlen(sn->id_str); | ||
1463 | + name_size = strlen(sn->name); | ||
1464 | + h.id_str_size = cpu_to_be16(id_str_size); | ||
1465 | + h.name_size = cpu_to_be16(name_size); | ||
1466 | + offset = align_offset(offset, 8); | ||
1467 | + if (bdrv_pwrite(s->hd, offset, &h, sizeof(h)) != sizeof(h)) | ||
1468 | + goto fail; | ||
1469 | + offset += sizeof(h); | ||
1470 | + if (bdrv_pwrite(s->hd, offset, sn->id_str, id_str_size) != id_str_size) | ||
1471 | + goto fail; | ||
1472 | + offset += id_str_size; | ||
1473 | + if (bdrv_pwrite(s->hd, offset, sn->name, name_size) != name_size) | ||
1474 | + goto fail; | ||
1475 | + offset += name_size; | ||
1476 | + } | ||
1477 | + | ||
1478 | + /* update the various header fields */ | ||
1479 | + data64 = cpu_to_be64(snapshots_offset); | ||
1480 | + if (bdrv_pwrite(s->hd, offsetof(QCowHeader, snapshots_offset), | ||
1481 | + &data64, sizeof(data64)) != sizeof(data64)) | ||
1482 | + goto fail; | ||
1483 | + data32 = cpu_to_be32(s->nb_snapshots); | ||
1484 | + if (bdrv_pwrite(s->hd, offsetof(QCowHeader, nb_snapshots), | ||
1485 | + &data32, sizeof(data32)) != sizeof(data32)) | ||
1486 | + goto fail; | ||
1487 | + | ||
1488 | + /* free the old snapshot table */ | ||
1489 | + free_clusters(bs, s->snapshots_offset, s->snapshots_size); | ||
1490 | + s->snapshots_offset = snapshots_offset; | ||
1491 | + s->snapshots_size = snapshots_size; | ||
1492 | + return 0; | ||
1493 | + fail: | ||
1494 | + return -1; | ||
1495 | +} | ||
1496 | + | ||
1497 | +static void find_new_snapshot_id(BlockDriverState *bs, | ||
1498 | + char *id_str, int id_str_size) | ||
1499 | +{ | ||
1500 | + BDRVQcowState *s = bs->opaque; | ||
1501 | + QCowSnapshot *sn; | ||
1502 | + int i, id, id_max = 0; | ||
1503 | + | ||
1504 | + for(i = 0; i < s->nb_snapshots; i++) { | ||
1505 | + sn = s->snapshots + i; | ||
1506 | + id = strtoul(sn->id_str, NULL, 10); | ||
1507 | + if (id > id_max) | ||
1508 | + id_max = id; | ||
1509 | + } | ||
1510 | + snprintf(id_str, id_str_size, "%d", id_max + 1); | ||
1511 | +} | ||
1512 | + | ||
1513 | +static int find_snapshot_by_id(BlockDriverState *bs, const char *id_str) | ||
1514 | +{ | ||
1515 | + BDRVQcowState *s = bs->opaque; | ||
1516 | + int i; | ||
1517 | + | ||
1518 | + for(i = 0; i < s->nb_snapshots; i++) { | ||
1519 | + if (!strcmp(s->snapshots[i].id_str, id_str)) | ||
1520 | + return i; | ||
1521 | + } | ||
1522 | + return -1; | ||
1523 | +} | ||
1524 | + | ||
1525 | +static int find_snapshot_by_id_or_name(BlockDriverState *bs, const char *name) | ||
1526 | +{ | ||
1527 | + BDRVQcowState *s = bs->opaque; | ||
1528 | + int i, ret; | ||
1529 | + | ||
1530 | + ret = find_snapshot_by_id(bs, name); | ||
1531 | + if (ret >= 0) | ||
1532 | + return ret; | ||
1533 | + for(i = 0; i < s->nb_snapshots; i++) { | ||
1534 | + if (!strcmp(s->snapshots[i].name, name)) | ||
1535 | + return i; | ||
1536 | + } | ||
1537 | + return -1; | ||
1538 | +} | ||
1539 | + | ||
1540 | +/* if no id is provided, a new one is constructed */ | ||
1541 | +static int qcow_snapshot_create(BlockDriverState *bs, | ||
1542 | + QEMUSnapshotInfo *sn_info) | ||
1543 | +{ | ||
1544 | + BDRVQcowState *s = bs->opaque; | ||
1545 | + QCowSnapshot *snapshots1, sn1, *sn = &sn1; | ||
1546 | + int i, ret; | ||
1547 | + uint64_t *l1_table = NULL; | ||
1548 | + | ||
1549 | + memset(sn, 0, sizeof(*sn)); | ||
1550 | + | ||
1551 | + if (sn_info->id_str[0] == '\0') { | ||
1552 | + /* compute a new id */ | ||
1553 | + find_new_snapshot_id(bs, sn_info->id_str, sizeof(sn_info->id_str)); | ||
1554 | + } | ||
1555 | + | ||
1556 | + /* check that the ID is unique */ | ||
1557 | + if (find_snapshot_by_id(bs, sn_info->id_str) >= 0) | ||
1558 | + return -ENOENT; | ||
1559 | + | ||
1560 | + sn->id_str = qemu_strdup(sn_info->id_str); | ||
1561 | + if (!sn->id_str) | ||
1562 | + goto fail; | ||
1563 | + sn->name = qemu_strdup(sn_info->name); | ||
1564 | + if (!sn->name) | ||
1565 | + goto fail; | ||
1566 | + sn->vm_state_size = sn_info->vm_state_size; | ||
1567 | + sn->date_sec = sn_info->date_sec; | ||
1568 | + sn->date_nsec = sn_info->date_nsec; | ||
1569 | + sn->vm_clock_nsec = sn_info->vm_clock_nsec; | ||
1570 | + | ||
1571 | + ret = update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 1); | ||
1572 | + if (ret < 0) | ||
1573 | + goto fail; | ||
1574 | + | ||
1575 | + /* create the L1 table of the snapshot */ | ||
1576 | + sn->l1_table_offset = alloc_clusters(bs, s->l1_size * sizeof(uint64_t)); | ||
1577 | + sn->l1_size = s->l1_size; | ||
1578 | + | ||
1579 | + l1_table = qemu_malloc(s->l1_size * sizeof(uint64_t)); | ||
1580 | + if (!l1_table) | ||
1581 | + goto fail; | ||
1582 | + for(i = 0; i < s->l1_size; i++) { | ||
1583 | + l1_table[i] = cpu_to_be64(s->l1_table[i]); | ||
1584 | + } | ||
1585 | + if (bdrv_pwrite(s->hd, sn->l1_table_offset, | ||
1586 | + l1_table, s->l1_size * sizeof(uint64_t)) != | ||
1587 | + (s->l1_size * sizeof(uint64_t))) | ||
1588 | + goto fail; | ||
1589 | + qemu_free(l1_table); | ||
1590 | + l1_table = NULL; | ||
1591 | + | ||
1592 | + snapshots1 = qemu_malloc((s->nb_snapshots + 1) * sizeof(QCowSnapshot)); | ||
1593 | + if (!snapshots1) | ||
1594 | + goto fail; | ||
1595 | + memcpy(snapshots1, s->snapshots, s->nb_snapshots * sizeof(QCowSnapshot)); | ||
1596 | + s->snapshots = snapshots1; | ||
1597 | + s->snapshots[s->nb_snapshots++] = *sn; | ||
1598 | + | ||
1599 | + if (qcow_write_snapshots(bs) < 0) | ||
1600 | + goto fail; | ||
1601 | +#ifdef DEBUG_ALLOC | ||
1602 | + check_refcounts(bs); | ||
1603 | +#endif | ||
1604 | + return 0; | ||
1605 | + fail: | ||
1606 | + qemu_free(sn->name); | ||
1607 | + qemu_free(l1_table); | ||
1608 | + return -1; | ||
1609 | +} | ||
1610 | + | ||
1611 | +/* copy the snapshot 'snapshot_name' into the current disk image */ | ||
1612 | +static int qcow_snapshot_goto(BlockDriverState *bs, | ||
1613 | + const char *snapshot_id) | ||
1614 | +{ | ||
1615 | + BDRVQcowState *s = bs->opaque; | ||
1616 | + QCowSnapshot *sn; | ||
1617 | + int i, snapshot_index, l1_size2; | ||
1618 | + | ||
1619 | + snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_id); | ||
1620 | + if (snapshot_index < 0) | ||
1621 | + return -ENOENT; | ||
1622 | + sn = &s->snapshots[snapshot_index]; | ||
1623 | + | ||
1624 | + if (update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, -1) < 0) | ||
1625 | + goto fail; | ||
1626 | + | ||
1627 | + if (grow_l1_table(bs, sn->l1_size) < 0) | ||
1628 | + goto fail; | ||
1629 | + | ||
1630 | + s->l1_size = sn->l1_size; | ||
1631 | + l1_size2 = s->l1_size * sizeof(uint64_t); | ||
1632 | + /* copy the snapshot l1 table to the current l1 table */ | ||
1633 | + if (bdrv_pread(s->hd, sn->l1_table_offset, | ||
1634 | + s->l1_table, l1_size2) != l1_size2) | ||
1635 | + goto fail; | ||
1636 | + if (bdrv_pwrite(s->hd, s->l1_table_offset, | ||
1637 | + s->l1_table, l1_size2) != l1_size2) | ||
1638 | + goto fail; | ||
1639 | + for(i = 0;i < s->l1_size; i++) { | ||
1640 | + be64_to_cpus(&s->l1_table[i]); | ||
1641 | + } | ||
1642 | + | ||
1643 | + if (update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 1) < 0) | ||
1644 | + goto fail; | ||
1645 | + | ||
1646 | +#ifdef DEBUG_ALLOC | ||
1647 | + check_refcounts(bs); | ||
1648 | +#endif | ||
1649 | + return 0; | ||
1650 | + fail: | ||
1651 | + return -EIO; | ||
1652 | +} | ||
1653 | + | ||
1654 | +static int qcow_snapshot_delete(BlockDriverState *bs, const char *snapshot_id) | ||
1655 | +{ | ||
1656 | + BDRVQcowState *s = bs->opaque; | ||
1657 | + QCowSnapshot *sn; | ||
1658 | + int snapshot_index, ret; | ||
1659 | + | ||
1660 | + snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_id); | ||
1661 | + if (snapshot_index < 0) | ||
1662 | + return -ENOENT; | ||
1663 | + sn = &s->snapshots[snapshot_index]; | ||
1664 | + | ||
1665 | + ret = update_snapshot_refcount(bs, sn->l1_table_offset, sn->l1_size, -1); | ||
1666 | + if (ret < 0) | ||
1667 | + return ret; | ||
1668 | + /* must update the copied flag on the current cluster offsets */ | ||
1669 | + ret = update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0); | ||
1670 | + if (ret < 0) | ||
1671 | + return ret; | ||
1672 | + free_clusters(bs, sn->l1_table_offset, sn->l1_size * sizeof(uint64_t)); | ||
1673 | + | ||
1674 | + qemu_free(sn->id_str); | ||
1675 | + qemu_free(sn->name); | ||
1676 | + memmove(sn, sn + 1, (s->nb_snapshots - snapshot_index - 1) * sizeof(*sn)); | ||
1677 | + s->nb_snapshots--; | ||
1678 | + ret = qcow_write_snapshots(bs); | ||
1679 | + if (ret < 0) { | ||
1680 | + /* XXX: restore snapshot if error ? */ | ||
1681 | + return ret; | ||
1682 | + } | ||
1683 | +#ifdef DEBUG_ALLOC | ||
1684 | + check_refcounts(bs); | ||
1685 | +#endif | ||
1686 | + return 0; | ||
1687 | +} | ||
1688 | + | ||
1689 | +static int qcow_snapshot_list(BlockDriverState *bs, | ||
1690 | + QEMUSnapshotInfo **psn_tab) | ||
1691 | +{ | ||
1692 | + BDRVQcowState *s = bs->opaque; | ||
1693 | + QEMUSnapshotInfo *sn_tab, *sn_info; | ||
1694 | + QCowSnapshot *sn; | ||
1695 | + int i; | ||
1696 | + | ||
1697 | + sn_tab = qemu_mallocz(s->nb_snapshots * sizeof(QEMUSnapshotInfo)); | ||
1698 | + if (!sn_tab) | ||
1699 | + goto fail; | ||
1700 | + for(i = 0; i < s->nb_snapshots; i++) { | ||
1701 | + sn_info = sn_tab + i; | ||
1702 | + sn = s->snapshots + i; | ||
1703 | + pstrcpy(sn_info->id_str, sizeof(sn_info->id_str), | ||
1704 | + sn->id_str); | ||
1705 | + pstrcpy(sn_info->name, sizeof(sn_info->name), | ||
1706 | + sn->name); | ||
1707 | + sn_info->vm_state_size = sn->vm_state_size; | ||
1708 | + sn_info->date_sec = sn->date_sec; | ||
1709 | + sn_info->date_nsec = sn->date_nsec; | ||
1710 | + sn_info->vm_clock_nsec = sn->vm_clock_nsec; | ||
1711 | + } | ||
1712 | + *psn_tab = sn_tab; | ||
1713 | + return s->nb_snapshots; | ||
1714 | + fail: | ||
1715 | + qemu_free(sn_tab); | ||
1716 | + *psn_tab = NULL; | ||
1717 | + return -ENOMEM; | ||
1718 | +} | ||
1719 | + | ||
1720 | +/*********************************************************/ | ||
1721 | +/* refcount handling */ | ||
1722 | + | ||
1723 | +static int refcount_init(BlockDriverState *bs) | ||
1724 | +{ | ||
1725 | + BDRVQcowState *s = bs->opaque; | ||
1726 | + int ret, refcount_table_size2, i; | ||
1727 | + | ||
1728 | + s->refcount_block_cache = qemu_malloc(s->cluster_size); | ||
1729 | + if (!s->refcount_block_cache) | ||
1730 | + goto fail; | ||
1731 | + refcount_table_size2 = s->refcount_table_size * sizeof(uint64_t); | ||
1732 | + s->refcount_table = qemu_malloc(refcount_table_size2); | ||
1733 | + if (!s->refcount_table) | ||
1734 | + goto fail; | ||
1735 | + if (s->refcount_table_size > 0) { | ||
1736 | + ret = bdrv_pread(s->hd, s->refcount_table_offset, | ||
1737 | + s->refcount_table, refcount_table_size2); | ||
1738 | + if (ret != refcount_table_size2) | ||
1739 | + goto fail; | ||
1740 | + for(i = 0; i < s->refcount_table_size; i++) | ||
1741 | + be64_to_cpus(&s->refcount_table[i]); | ||
1742 | + } | ||
1743 | + return 0; | ||
1744 | + fail: | ||
1745 | + return -ENOMEM; | ||
1746 | +} | ||
1747 | + | ||
1748 | +static void refcount_close(BlockDriverState *bs) | ||
1749 | +{ | ||
1750 | + BDRVQcowState *s = bs->opaque; | ||
1751 | + qemu_free(s->refcount_block_cache); | ||
1752 | + qemu_free(s->refcount_table); | ||
1753 | +} | ||
1754 | + | ||
1755 | + | ||
1756 | +static int load_refcount_block(BlockDriverState *bs, | ||
1757 | + int64_t refcount_block_offset) | ||
1758 | +{ | ||
1759 | + BDRVQcowState *s = bs->opaque; | ||
1760 | + int ret; | ||
1761 | + ret = bdrv_pread(s->hd, refcount_block_offset, s->refcount_block_cache, | ||
1762 | + s->cluster_size); | ||
1763 | + if (ret != s->cluster_size) | ||
1764 | + return -EIO; | ||
1765 | + s->refcount_block_cache_offset = refcount_block_offset; | ||
1766 | + return 0; | ||
1767 | +} | ||
1768 | + | ||
1769 | +static int get_refcount(BlockDriverState *bs, int64_t cluster_index) | ||
1770 | +{ | ||
1771 | + BDRVQcowState *s = bs->opaque; | ||
1772 | + int refcount_table_index, block_index; | ||
1773 | + int64_t refcount_block_offset; | ||
1774 | + | ||
1775 | + refcount_table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT); | ||
1776 | + if (refcount_table_index >= s->refcount_table_size) | ||
1777 | + return 0; | ||
1778 | + refcount_block_offset = s->refcount_table[refcount_table_index]; | ||
1779 | + if (!refcount_block_offset) | ||
1780 | + return 0; | ||
1781 | + if (refcount_block_offset != s->refcount_block_cache_offset) { | ||
1782 | + /* better than nothing: return allocated if read error */ | ||
1783 | + if (load_refcount_block(bs, refcount_block_offset) < 0) | ||
1784 | + return 1; | ||
1785 | + } | ||
1786 | + block_index = cluster_index & | ||
1787 | + ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1); | ||
1788 | + return be16_to_cpu(s->refcount_block_cache[block_index]); | ||
1789 | +} | ||
1790 | + | ||
1791 | +/* return < 0 if error */ | ||
1792 | +static int64_t alloc_clusters_noref(BlockDriverState *bs, int64_t size) | ||
1793 | +{ | ||
1794 | + BDRVQcowState *s = bs->opaque; | ||
1795 | + int i, nb_clusters; | ||
1796 | + | ||
1797 | + nb_clusters = (size + s->cluster_size - 1) >> s->cluster_bits; | ||
1798 | + for(;;) { | ||
1799 | + if (get_refcount(bs, s->free_cluster_index) == 0) { | ||
1800 | + s->free_cluster_index++; | ||
1801 | + for(i = 1; i < nb_clusters; i++) { | ||
1802 | + if (get_refcount(bs, s->free_cluster_index) != 0) | ||
1803 | + goto not_found; | ||
1804 | + s->free_cluster_index++; | ||
1805 | + } | ||
1806 | +#ifdef DEBUG_ALLOC2 | ||
1807 | + printf("alloc_clusters: size=%lld -> %lld\n", | ||
1808 | + size, | ||
1809 | + (s->free_cluster_index - nb_clusters) << s->cluster_bits); | ||
1810 | +#endif | ||
1811 | + return (s->free_cluster_index - nb_clusters) << s->cluster_bits; | ||
1812 | + } else { | ||
1813 | + not_found: | ||
1814 | + s->free_cluster_index++; | ||
1815 | + } | ||
1816 | + } | ||
1817 | +} | ||
1818 | + | ||
1819 | +static int64_t alloc_clusters(BlockDriverState *bs, int64_t size) | ||
1820 | +{ | ||
1821 | + int64_t offset; | ||
1822 | + | ||
1823 | + offset = alloc_clusters_noref(bs, size); | ||
1824 | + update_refcount(bs, offset, size, 1); | ||
1825 | + return offset; | ||
1826 | +} | ||
1827 | + | ||
1828 | +/* only used to allocate compressed sectors. We try to allocate | ||
1829 | + contiguous sectors. size must be <= cluster_size */ | ||
1830 | +static int64_t alloc_bytes(BlockDriverState *bs, int size) | ||
1831 | +{ | ||
1832 | + BDRVQcowState *s = bs->opaque; | ||
1833 | + int64_t offset, cluster_offset; | ||
1834 | + int free_in_cluster; | ||
1835 | + | ||
1836 | + assert(size > 0 && size <= s->cluster_size); | ||
1837 | + if (s->free_byte_offset == 0) { | ||
1838 | + s->free_byte_offset = alloc_clusters(bs, s->cluster_size); | ||
1839 | + } | ||
1840 | + redo: | ||
1841 | + free_in_cluster = s->cluster_size - | ||
1842 | + (s->free_byte_offset & (s->cluster_size - 1)); | ||
1843 | + if (size <= free_in_cluster) { | ||
1844 | + /* enough space in current cluster */ | ||
1845 | + offset = s->free_byte_offset; | ||
1846 | + s->free_byte_offset += size; | ||
1847 | + free_in_cluster -= size; | ||
1848 | + if (free_in_cluster == 0) | ||
1849 | + s->free_byte_offset = 0; | ||
1850 | + if ((offset & (s->cluster_size - 1)) != 0) | ||
1851 | + update_cluster_refcount(bs, offset >> s->cluster_bits, 1); | ||
1852 | + } else { | ||
1853 | + offset = alloc_clusters(bs, s->cluster_size); | ||
1854 | + cluster_offset = s->free_byte_offset & ~(s->cluster_size - 1); | ||
1855 | + if ((cluster_offset + s->cluster_size) == offset) { | ||
1856 | + /* we are lucky: contiguous data */ | ||
1857 | + offset = s->free_byte_offset; | ||
1858 | + update_cluster_refcount(bs, offset >> s->cluster_bits, 1); | ||
1859 | + s->free_byte_offset += size; | ||
1860 | + } else { | ||
1861 | + s->free_byte_offset = offset; | ||
1862 | + goto redo; | ||
1863 | + } | ||
1864 | + } | ||
1865 | + return offset; | ||
1866 | +} | ||
1867 | + | ||
1868 | +static void free_clusters(BlockDriverState *bs, | ||
1869 | + int64_t offset, int64_t size) | ||
1870 | +{ | ||
1871 | + update_refcount(bs, offset, size, -1); | ||
1872 | +} | ||
1873 | + | ||
1874 | +static int grow_refcount_table(BlockDriverState *bs, int min_size) | ||
1875 | +{ | ||
1876 | + BDRVQcowState *s = bs->opaque; | ||
1877 | + int new_table_size, new_table_size2, refcount_table_clusters, i, ret; | ||
1878 | + uint64_t *new_table; | ||
1879 | + int64_t table_offset; | ||
1880 | + uint64_t data64; | ||
1881 | + uint32_t data32; | ||
1882 | + | ||
1883 | + if (min_size <= s->refcount_table_size) | ||
1884 | + return 0; | ||
1885 | + /* compute new table size */ | ||
1886 | + refcount_table_clusters = s->refcount_table_size >> (s->cluster_bits - 3); | ||
1887 | + for(;;) { | ||
1888 | + if (refcount_table_clusters == 0) { | ||
1889 | + refcount_table_clusters = 1; | ||
1890 | + } else { | ||
1891 | + refcount_table_clusters = (refcount_table_clusters * 3 + 1) / 2; | ||
1892 | + } | ||
1893 | + new_table_size = refcount_table_clusters << (s->cluster_bits - 3); | ||
1894 | + if (min_size <= new_table_size) | ||
1895 | + break; | ||
1896 | + } | ||
1897 | + | ||
1898 | + new_table_size2 = new_table_size * sizeof(uint64_t); | ||
1899 | + new_table = qemu_mallocz(new_table_size2); | ||
1900 | + if (!new_table) | ||
1901 | + return -ENOMEM; | ||
1902 | + memcpy(new_table, s->refcount_table, | ||
1903 | + s->refcount_table_size * sizeof(uint64_t)); | ||
1904 | + for(i = 0; i < s->refcount_table_size; i++) | ||
1905 | + cpu_to_be64s(&new_table[i]); | ||
1906 | + /* Note: we cannot update the refcount now to avoid recursion */ | ||
1907 | + table_offset = alloc_clusters_noref(bs, new_table_size2); | ||
1908 | + ret = bdrv_pwrite(s->hd, table_offset, new_table, new_table_size2); | ||
1909 | + if (ret != new_table_size2) | ||
1910 | + goto fail; | ||
1911 | + for(i = 0; i < s->refcount_table_size; i++) | ||
1912 | + be64_to_cpus(&new_table[i]); | ||
1913 | + | ||
1914 | + data64 = cpu_to_be64(table_offset); | ||
1915 | + if (bdrv_pwrite(s->hd, offsetof(QCowHeader, refcount_table_offset), | ||
1916 | + &data64, sizeof(data64)) != sizeof(data64)) | ||
1917 | + goto fail; | ||
1918 | + data32 = cpu_to_be32(refcount_table_clusters); | ||
1919 | + if (bdrv_pwrite(s->hd, offsetof(QCowHeader, refcount_table_clusters), | ||
1920 | + &data32, sizeof(data32)) != sizeof(data32)) | ||
1921 | + goto fail; | ||
1922 | + qemu_free(s->refcount_table); | ||
1923 | + s->refcount_table = new_table; | ||
1924 | + s->refcount_table_size = new_table_size; | ||
1925 | + | ||
1926 | + update_refcount(bs, table_offset, new_table_size2, 1); | ||
1927 | + return 0; | ||
1928 | + fail: | ||
1929 | + free_clusters(bs, table_offset, new_table_size2); | ||
1930 | + qemu_free(new_table); | ||
1931 | + return -EIO; | ||
1932 | +} | ||
1933 | + | ||
1934 | +/* addend must be 1 or -1 */ | ||
1935 | +/* XXX: cache several refcount block clusters ? */ | ||
1936 | +static int update_cluster_refcount(BlockDriverState *bs, | ||
1937 | + int64_t cluster_index, | ||
1938 | + int addend) | ||
1939 | +{ | ||
1940 | + BDRVQcowState *s = bs->opaque; | ||
1941 | + int64_t offset, refcount_block_offset; | ||
1942 | + int ret, refcount_table_index, block_index, refcount; | ||
1943 | + uint64_t data64; | ||
1944 | + | ||
1945 | + refcount_table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT); | ||
1946 | + if (refcount_table_index >= s->refcount_table_size) { | ||
1947 | + if (addend < 0) | ||
1948 | + return -EINVAL; | ||
1949 | + ret = grow_refcount_table(bs, refcount_table_index + 1); | ||
1950 | + if (ret < 0) | ||
1951 | + return ret; | ||
1952 | + } | ||
1953 | + refcount_block_offset = s->refcount_table[refcount_table_index]; | ||
1954 | + if (!refcount_block_offset) { | ||
1955 | + if (addend < 0) | ||
1956 | + return -EINVAL; | ||
1957 | + /* create a new refcount block */ | ||
1958 | + /* Note: we cannot update the refcount now to avoid recursion */ | ||
1959 | + offset = alloc_clusters_noref(bs, s->cluster_size); | ||
1960 | + memset(s->refcount_block_cache, 0, s->cluster_size); | ||
1961 | + ret = bdrv_pwrite(s->hd, offset, s->refcount_block_cache, s->cluster_size); | ||
1962 | + if (ret != s->cluster_size) | ||
1963 | + return -EINVAL; | ||
1964 | + s->refcount_table[refcount_table_index] = offset; | ||
1965 | + data64 = cpu_to_be64(offset); | ||
1966 | + ret = bdrv_pwrite(s->hd, s->refcount_table_offset + | ||
1967 | + refcount_table_index * sizeof(uint64_t), | ||
1968 | + &data64, sizeof(data64)); | ||
1969 | + if (ret != sizeof(data64)) | ||
1970 | + return -EINVAL; | ||
1971 | + | ||
1972 | + refcount_block_offset = offset; | ||
1973 | + s->refcount_block_cache_offset = offset; | ||
1974 | + update_refcount(bs, offset, s->cluster_size, 1); | ||
1975 | + } else { | ||
1976 | + if (refcount_block_offset != s->refcount_block_cache_offset) { | ||
1977 | + if (load_refcount_block(bs, refcount_block_offset) < 0) | ||
1978 | + return -EIO; | ||
1979 | + } | ||
1980 | + } | ||
1981 | + /* we can update the count and save it */ | ||
1982 | + block_index = cluster_index & | ||
1983 | + ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1); | ||
1984 | + refcount = be16_to_cpu(s->refcount_block_cache[block_index]); | ||
1985 | + refcount += addend; | ||
1986 | + if (refcount < 0 || refcount > 0xffff) | ||
1987 | + return -EINVAL; | ||
1988 | + if (refcount == 0 && cluster_index < s->free_cluster_index) { | ||
1989 | + s->free_cluster_index = cluster_index; | ||
1990 | + } | ||
1991 | + s->refcount_block_cache[block_index] = cpu_to_be16(refcount); | ||
1992 | + if (bdrv_pwrite(s->hd, | ||
1993 | + refcount_block_offset + (block_index << REFCOUNT_SHIFT), | ||
1994 | + &s->refcount_block_cache[block_index], 2) != 2) | ||
1995 | + return -EIO; | ||
1996 | + return refcount; | ||
1997 | +} | ||
1998 | + | ||
1999 | +static void update_refcount(BlockDriverState *bs, | ||
2000 | + int64_t offset, int64_t length, | ||
2001 | + int addend) | ||
2002 | +{ | ||
2003 | + BDRVQcowState *s = bs->opaque; | ||
2004 | + int64_t start, last, cluster_offset; | ||
2005 | + | ||
2006 | +#ifdef DEBUG_ALLOC2 | ||
2007 | + printf("update_refcount: offset=%lld size=%lld addend=%d\n", | ||
2008 | + offset, length, addend); | ||
2009 | +#endif | ||
2010 | + if (length <= 0) | ||
2011 | + return; | ||
2012 | + start = offset & ~(s->cluster_size - 1); | ||
2013 | + last = (offset + length - 1) & ~(s->cluster_size - 1); | ||
2014 | + for(cluster_offset = start; cluster_offset <= last; | ||
2015 | + cluster_offset += s->cluster_size) { | ||
2016 | + update_cluster_refcount(bs, cluster_offset >> s->cluster_bits, addend); | ||
2017 | + } | ||
2018 | +} | ||
2019 | + | ||
2020 | +#ifdef DEBUG_ALLOC | ||
2021 | +static void inc_refcounts(BlockDriverState *bs, | ||
2022 | + uint16_t *refcount_table, | ||
2023 | + int refcount_table_size, | ||
2024 | + int64_t offset, int64_t size) | ||
2025 | +{ | ||
2026 | + BDRVQcowState *s = bs->opaque; | ||
2027 | + int64_t start, last, cluster_offset; | ||
2028 | + int k; | ||
2029 | + | ||
2030 | + if (size <= 0) | ||
2031 | + return; | ||
2032 | + | ||
2033 | + start = offset & ~(s->cluster_size - 1); | ||
2034 | + last = (offset + size - 1) & ~(s->cluster_size - 1); | ||
2035 | + for(cluster_offset = start; cluster_offset <= last; | ||
2036 | + cluster_offset += s->cluster_size) { | ||
2037 | + k = cluster_offset >> s->cluster_bits; | ||
2038 | + if (k < 0 || k >= refcount_table_size) { | ||
2039 | + printf("ERROR: invalid cluster offset=0x%llx\n", cluster_offset); | ||
2040 | + } else { | ||
2041 | + if (++refcount_table[k] == 0) { | ||
2042 | + printf("ERROR: overflow cluster offset=0x%llx\n", cluster_offset); | ||
2043 | + } | ||
2044 | + } | ||
2045 | + } | ||
2046 | +} | ||
2047 | + | ||
2048 | +static int check_refcounts_l1(BlockDriverState *bs, | ||
2049 | + uint16_t *refcount_table, | ||
2050 | + int refcount_table_size, | ||
2051 | + int64_t l1_table_offset, int l1_size, | ||
2052 | + int check_copied) | ||
2053 | +{ | ||
2054 | + BDRVQcowState *s = bs->opaque; | ||
2055 | + uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2; | ||
2056 | + int l2_size, i, j, nb_csectors, refcount; | ||
2057 | + | ||
2058 | + l2_table = NULL; | ||
2059 | + l1_size2 = l1_size * sizeof(uint64_t); | ||
2060 | + | ||
2061 | + inc_refcounts(bs, refcount_table, refcount_table_size, | ||
2062 | + l1_table_offset, l1_size2); | ||
2063 | + | ||
2064 | + l1_table = qemu_malloc(l1_size2); | ||
2065 | + if (!l1_table) | ||
2066 | + goto fail; | ||
2067 | + if (bdrv_pread(s->hd, l1_table_offset, | ||
2068 | + l1_table, l1_size2) != l1_size2) | ||
2069 | + goto fail; | ||
2070 | + for(i = 0;i < l1_size; i++) | ||
2071 | + be64_to_cpus(&l1_table[i]); | ||
2072 | + | ||
2073 | + l2_size = s->l2_size * sizeof(uint64_t); | ||
2074 | + l2_table = qemu_malloc(l2_size); | ||
2075 | + if (!l2_table) | ||
2076 | + goto fail; | ||
2077 | + for(i = 0; i < l1_size; i++) { | ||
2078 | + l2_offset = l1_table[i]; | ||
2079 | + if (l2_offset) { | ||
2080 | + if (check_copied) { | ||
2081 | + refcount = get_refcount(bs, (l2_offset & ~QCOW_OFLAG_COPIED) >> s->cluster_bits); | ||
2082 | + if ((refcount == 1) != ((l2_offset & QCOW_OFLAG_COPIED) != 0)) { | ||
2083 | + printf("ERROR OFLAG_COPIED: l2_offset=%llx refcount=%d\n", | ||
2084 | + l2_offset, refcount); | ||
2085 | + } | ||
2086 | + } | ||
2087 | + l2_offset &= ~QCOW_OFLAG_COPIED; | ||
2088 | + if (bdrv_pread(s->hd, l2_offset, l2_table, l2_size) != l2_size) | ||
2089 | + goto fail; | ||
2090 | + for(j = 0; j < s->l2_size; j++) { | ||
2091 | + offset = be64_to_cpu(l2_table[j]); | ||
2092 | + if (offset != 0) { | ||
2093 | + if (offset & QCOW_OFLAG_COMPRESSED) { | ||
2094 | + if (offset & QCOW_OFLAG_COPIED) { | ||
2095 | + printf("ERROR: cluster %lld: copied flag must never be set for compressed clusters\n", | ||
2096 | + offset >> s->cluster_bits); | ||
2097 | + offset &= ~QCOW_OFLAG_COPIED; | ||
2098 | + } | ||
2099 | + nb_csectors = ((offset >> s->csize_shift) & | ||
2100 | + s->csize_mask) + 1; | ||
2101 | + offset &= s->cluster_offset_mask; | ||
2102 | + inc_refcounts(bs, refcount_table, | ||
2103 | + refcount_table_size, | ||
2104 | + offset & ~511, nb_csectors * 512); | ||
2105 | + } else { | ||
2106 | + if (check_copied) { | ||
2107 | + refcount = get_refcount(bs, (offset & ~QCOW_OFLAG_COPIED) >> s->cluster_bits); | ||
2108 | + if ((refcount == 1) != ((offset & QCOW_OFLAG_COPIED) != 0)) { | ||
2109 | + printf("ERROR OFLAG_COPIED: offset=%llx refcount=%d\n", | ||
2110 | + offset, refcount); | ||
2111 | + } | ||
2112 | + } | ||
2113 | + offset &= ~QCOW_OFLAG_COPIED; | ||
2114 | + inc_refcounts(bs, refcount_table, | ||
2115 | + refcount_table_size, | ||
2116 | + offset, s->cluster_size); | ||
2117 | + } | ||
2118 | + } | ||
2119 | + } | ||
2120 | + inc_refcounts(bs, refcount_table, | ||
2121 | + refcount_table_size, | ||
2122 | + l2_offset, | ||
2123 | + s->cluster_size); | ||
2124 | + } | ||
2125 | + } | ||
2126 | + qemu_free(l1_table); | ||
2127 | + qemu_free(l2_table); | ||
2128 | + return 0; | ||
2129 | + fail: | ||
2130 | + printf("ERROR: I/O error in check_refcounts_l1\n"); | ||
2131 | + qemu_free(l1_table); | ||
2132 | + qemu_free(l2_table); | ||
2133 | + return -EIO; | ||
2134 | +} | ||
2135 | + | ||
2136 | +static void check_refcounts(BlockDriverState *bs) | ||
2137 | +{ | ||
2138 | + BDRVQcowState *s = bs->opaque; | ||
2139 | + int64_t size; | ||
2140 | + int nb_clusters, refcount1, refcount2, i; | ||
2141 | + QCowSnapshot *sn; | ||
2142 | + uint16_t *refcount_table; | ||
2143 | + | ||
2144 | + size = bdrv_getlength(s->hd); | ||
2145 | + nb_clusters = (size + s->cluster_size - 1) >> s->cluster_bits; | ||
2146 | + refcount_table = qemu_mallocz(nb_clusters * sizeof(uint16_t)); | ||
2147 | + | ||
2148 | + /* header */ | ||
2149 | + inc_refcounts(bs, refcount_table, nb_clusters, | ||
2150 | + 0, s->cluster_size); | ||
2151 | + | ||
2152 | + check_refcounts_l1(bs, refcount_table, nb_clusters, | ||
2153 | + s->l1_table_offset, s->l1_size, 1); | ||
2154 | + | ||
2155 | + /* snapshots */ | ||
2156 | + for(i = 0; i < s->nb_snapshots; i++) { | ||
2157 | + sn = s->snapshots + i; | ||
2158 | + check_refcounts_l1(bs, refcount_table, nb_clusters, | ||
2159 | + sn->l1_table_offset, sn->l1_size, 0); | ||
2160 | + } | ||
2161 | + inc_refcounts(bs, refcount_table, nb_clusters, | ||
2162 | + s->snapshots_offset, s->snapshots_size); | ||
2163 | + | ||
2164 | + /* refcount data */ | ||
2165 | + inc_refcounts(bs, refcount_table, nb_clusters, | ||
2166 | + s->refcount_table_offset, | ||
2167 | + s->refcount_table_size * sizeof(uint64_t)); | ||
2168 | + for(i = 0; i < s->refcount_table_size; i++) { | ||
2169 | + int64_t offset; | ||
2170 | + offset = s->refcount_table[i]; | ||
2171 | + if (offset != 0) { | ||
2172 | + inc_refcounts(bs, refcount_table, nb_clusters, | ||
2173 | + offset, s->cluster_size); | ||
2174 | + } | ||
2175 | + } | ||
2176 | + | ||
2177 | + /* compare ref counts */ | ||
2178 | + for(i = 0; i < nb_clusters; i++) { | ||
2179 | + refcount1 = get_refcount(bs, i); | ||
2180 | + refcount2 = refcount_table[i]; | ||
2181 | + if (refcount1 != refcount2) | ||
2182 | + printf("ERROR cluster %d refcount=%d reference=%d\n", | ||
2183 | + i, refcount1, refcount2); | ||
2184 | + } | ||
2185 | + | ||
2186 | + qemu_free(refcount_table); | ||
2187 | +} | ||
2188 | + | ||
2189 | +#if 0 | ||
2190 | +static void dump_refcounts(BlockDriverState *bs) | ||
2191 | +{ | ||
2192 | + BDRVQcowState *s = bs->opaque; | ||
2193 | + int64_t nb_clusters, k, k1, size; | ||
2194 | + int refcount; | ||
2195 | + | ||
2196 | + size = bdrv_getlength(s->hd); | ||
2197 | + nb_clusters = (size + s->cluster_size - 1) >> s->cluster_bits; | ||
2198 | + for(k = 0; k < nb_clusters;) { | ||
2199 | + k1 = k; | ||
2200 | + refcount = get_refcount(bs, k); | ||
2201 | + k++; | ||
2202 | + while (k < nb_clusters && get_refcount(bs, k) == refcount) | ||
2203 | + k++; | ||
2204 | + printf("%lld: refcount=%d nb=%lld\n", k, refcount, k - k1); | ||
2205 | + } | ||
2206 | +} | ||
2207 | +#endif | ||
2208 | +#endif | ||
2209 | + | ||
2210 | +BlockDriver bdrv_qcow2 = { | ||
2211 | + "qcow2", | ||
2212 | + sizeof(BDRVQcowState), | ||
2213 | + qcow_probe, | ||
2214 | + qcow_open, | ||
2215 | + NULL, | ||
2216 | + NULL, | ||
2217 | + qcow_close, | ||
2218 | + qcow_create, | ||
2219 | + qcow_flush, | ||
2220 | + qcow_is_allocated, | ||
2221 | + qcow_set_key, | ||
2222 | + qcow_make_empty, | ||
2223 | + | ||
2224 | + .bdrv_aio_new = qcow_aio_new, | ||
2225 | + .bdrv_aio_read = qcow_aio_read, | ||
2226 | + .bdrv_aio_write = qcow_aio_write, | ||
2227 | + .bdrv_aio_cancel = qcow_aio_cancel, | ||
2228 | + .bdrv_aio_delete = qcow_aio_delete, | ||
2229 | + .bdrv_write_compressed = qcow_write_compressed, | ||
2230 | + | ||
2231 | + .bdrv_snapshot_create = qcow_snapshot_create, | ||
2232 | + .bdrv_snapshot_goto = qcow_snapshot_goto, | ||
2233 | + .bdrv_snapshot_delete = qcow_snapshot_delete, | ||
2234 | + .bdrv_snapshot_list = qcow_snapshot_list, | ||
2235 | + .bdrv_get_info = qcow_get_info, | ||
2236 | +}; |