Commit 630530a6529bc3da9ab8aead7053dc753cb9ac77
1 parent
e5686953
Fix a race condition and non-leaf imagesgrowing in VMDK chains, by Igor
Lvovsky. git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@2987 c046a42c-6fe2-441c-8c8c-71466251a162
Showing
1 changed file
with
117 additions
and
32 deletions
block-vmdk.c
@@ -75,8 +75,25 @@ typedef struct BDRVVmdkState { | @@ -75,8 +75,25 @@ typedef struct BDRVVmdkState { | ||
75 | 75 | ||
76 | unsigned int cluster_sectors; | 76 | unsigned int cluster_sectors; |
77 | uint32_t parent_cid; | 77 | uint32_t parent_cid; |
78 | + int is_parent; | ||
78 | } BDRVVmdkState; | 79 | } BDRVVmdkState; |
79 | 80 | ||
81 | +typedef struct VmdkMetaData { | ||
82 | + uint32_t offset; | ||
83 | + unsigned int l1_index; | ||
84 | + unsigned int l2_index; | ||
85 | + unsigned int l2_offset; | ||
86 | + int valid; | ||
87 | +} VmdkMetaData; | ||
88 | + | ||
89 | +typedef struct ActiveBDRVState{ | ||
90 | + BlockDriverState *hd; // active image handler | ||
91 | + uint64_t cluster_offset; // current write offset | ||
92 | +}ActiveBDRVState; | ||
93 | + | ||
94 | +static ActiveBDRVState activeBDRV; | ||
95 | + | ||
96 | + | ||
80 | static int vmdk_probe(const uint8_t *buf, int buf_size, const char *filename) | 97 | static int vmdk_probe(const uint8_t *buf, int buf_size, const char *filename) |
81 | { | 98 | { |
82 | uint32_t magic; | 99 | uint32_t magic; |
@@ -305,7 +322,7 @@ static void vmdk_parent_close(BlockDriverState *bs) | @@ -305,7 +322,7 @@ static void vmdk_parent_close(BlockDriverState *bs) | ||
305 | bdrv_close(bs->backing_hd); | 322 | bdrv_close(bs->backing_hd); |
306 | } | 323 | } |
307 | 324 | ||
308 | - | 325 | +int parent_open = 0; |
309 | static int vmdk_parent_open(BlockDriverState *bs, const char * filename) | 326 | static int vmdk_parent_open(BlockDriverState *bs, const char * filename) |
310 | { | 327 | { |
311 | BDRVVmdkState *s = bs->opaque; | 328 | BDRVVmdkState *s = bs->opaque; |
@@ -339,8 +356,10 @@ static int vmdk_parent_open(BlockDriverState *bs, const char * filename) | @@ -339,8 +356,10 @@ static int vmdk_parent_open(BlockDriverState *bs, const char * filename) | ||
339 | bdrv_close(s->hd); | 356 | bdrv_close(s->hd); |
340 | return -1; | 357 | return -1; |
341 | } | 358 | } |
342 | - if (bdrv_open(s->hd->backing_hd, parent_img_name, 0) < 0) | 359 | + parent_open = 1; |
360 | + if (bdrv_open(s->hd->backing_hd, parent_img_name, BDRV_O_RDONLY) < 0) | ||
343 | goto failure; | 361 | goto failure; |
362 | + parent_open = 0; | ||
344 | } | 363 | } |
345 | 364 | ||
346 | return 0; | 365 | return 0; |
@@ -352,6 +371,11 @@ static int vmdk_open(BlockDriverState *bs, const char *filename, int flags) | @@ -352,6 +371,11 @@ static int vmdk_open(BlockDriverState *bs, const char *filename, int flags) | ||
352 | uint32_t magic; | 371 | uint32_t magic; |
353 | int l1_size, i, ret; | 372 | int l1_size, i, ret; |
354 | 373 | ||
374 | + if (parent_open) | ||
375 | + // Parent must be opened as RO. | ||
376 | + flags = BDRV_O_RDONLY; | ||
377 | + fprintf(stderr, "(VMDK) image open: flags=0x%x filename=%s\n", flags, bs->filename); | ||
378 | + | ||
355 | ret = bdrv_file_open(&s->hd, filename, flags); | 379 | ret = bdrv_file_open(&s->hd, filename, flags); |
356 | if (ret < 0) | 380 | if (ret < 0) |
357 | return ret; | 381 | return ret; |
@@ -387,6 +411,11 @@ static int vmdk_open(BlockDriverState *bs, const char *filename, int flags) | @@ -387,6 +411,11 @@ static int vmdk_open(BlockDriverState *bs, const char *filename, int flags) | ||
387 | s->l1_table_offset = le64_to_cpu(header.rgd_offset) << 9; | 411 | s->l1_table_offset = le64_to_cpu(header.rgd_offset) << 9; |
388 | s->l1_backup_table_offset = le64_to_cpu(header.gd_offset) << 9; | 412 | s->l1_backup_table_offset = le64_to_cpu(header.gd_offset) << 9; |
389 | 413 | ||
414 | + if (parent_open) | ||
415 | + s->is_parent = 1; | ||
416 | + else | ||
417 | + s->is_parent = 0; | ||
418 | + | ||
390 | // try to open parent images, if exist | 419 | // try to open parent images, if exist |
391 | if (vmdk_parent_open(bs, filename) != 0) | 420 | if (vmdk_parent_open(bs, filename) != 0) |
392 | goto fail; | 421 | goto fail; |
@@ -430,7 +459,8 @@ static int vmdk_open(BlockDriverState *bs, const char *filename, int flags) | @@ -430,7 +459,8 @@ static int vmdk_open(BlockDriverState *bs, const char *filename, int flags) | ||
430 | return -1; | 459 | return -1; |
431 | } | 460 | } |
432 | 461 | ||
433 | -static uint64_t get_cluster_offset(BlockDriverState *bs, uint64_t offset, int allocate); | 462 | +static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data, |
463 | + uint64_t offset, int allocate); | ||
434 | 464 | ||
435 | static int get_whole_cluster(BlockDriverState *bs, uint64_t cluster_offset, | 465 | static int get_whole_cluster(BlockDriverState *bs, uint64_t cluster_offset, |
436 | uint64_t offset, int allocate) | 466 | uint64_t offset, int allocate) |
@@ -446,27 +476,55 @@ static int get_whole_cluster(BlockDriverState *bs, uint64_t cluster_offset, | @@ -446,27 +476,55 @@ static int get_whole_cluster(BlockDriverState *bs, uint64_t cluster_offset, | ||
446 | 476 | ||
447 | if (!vmdk_is_cid_valid(bs)) | 477 | if (!vmdk_is_cid_valid(bs)) |
448 | return -1; | 478 | return -1; |
449 | - parent_cluster_offset = get_cluster_offset(s->hd->backing_hd, offset, allocate); | ||
450 | - if (bdrv_pread(ps->hd, parent_cluster_offset, whole_grain, ps->cluster_sectors*512) != | ||
451 | - ps->cluster_sectors*512) | ||
452 | - return -1; | ||
453 | 479 | ||
454 | - if (bdrv_pwrite(s->hd, cluster_offset << 9, whole_grain, sizeof(whole_grain)) != | ||
455 | - sizeof(whole_grain)) | 480 | + parent_cluster_offset = get_cluster_offset(s->hd->backing_hd, NULL, offset, allocate); |
481 | + | ||
482 | + if (parent_cluster_offset) { | ||
483 | + BDRVVmdkState *act_s = activeBDRV.hd->opaque; | ||
484 | + | ||
485 | + if (bdrv_pread(ps->hd, parent_cluster_offset, whole_grain, ps->cluster_sectors*512) != ps->cluster_sectors*512) | ||
486 | + return -1; | ||
487 | + | ||
488 | + //Write grain only into the active image | ||
489 | + if (bdrv_pwrite(act_s->hd, activeBDRV.cluster_offset << 9, whole_grain, sizeof(whole_grain)) != sizeof(whole_grain)) | ||
490 | + return -1; | ||
491 | + } | ||
492 | + } | ||
493 | + return 0; | ||
494 | +} | ||
495 | + | ||
496 | +static int vmdk_L2update(BlockDriverState *bs, VmdkMetaData *m_data) | ||
497 | +{ | ||
498 | + BDRVVmdkState *s = bs->opaque; | ||
499 | + | ||
500 | + /* update L2 table */ | ||
501 | + if (bdrv_pwrite(s->hd, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(m_data->offset)), | ||
502 | + &(m_data->offset), sizeof(m_data->offset)) != sizeof(m_data->offset)) | ||
503 | + return -1; | ||
504 | + /* update backup L2 table */ | ||
505 | + if (s->l1_backup_table_offset != 0) { | ||
506 | + m_data->l2_offset = s->l1_backup_table[m_data->l1_index]; | ||
507 | + if (bdrv_pwrite(s->hd, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(m_data->offset)), | ||
508 | + &(m_data->offset), sizeof(m_data->offset)) != sizeof(m_data->offset)) | ||
456 | return -1; | 509 | return -1; |
457 | } | 510 | } |
511 | + | ||
458 | return 0; | 512 | return 0; |
459 | } | 513 | } |
460 | 514 | ||
461 | -static uint64_t get_cluster_offset(BlockDriverState *bs, | 515 | +static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data, |
462 | uint64_t offset, int allocate) | 516 | uint64_t offset, int allocate) |
463 | { | 517 | { |
464 | BDRVVmdkState *s = bs->opaque; | 518 | BDRVVmdkState *s = bs->opaque; |
465 | unsigned int l1_index, l2_offset, l2_index; | 519 | unsigned int l1_index, l2_offset, l2_index; |
466 | int min_index, i, j; | 520 | int min_index, i, j; |
467 | - uint32_t min_count, *l2_table, tmp; | 521 | + uint32_t min_count, *l2_table, tmp = 0; |
468 | uint64_t cluster_offset; | 522 | uint64_t cluster_offset; |
469 | - | 523 | + int status; |
524 | + | ||
525 | + if (m_data) | ||
526 | + m_data->valid = 0; | ||
527 | + | ||
470 | l1_index = (offset >> 9) / s->l1_entry_sectors; | 528 | l1_index = (offset >> 9) / s->l1_entry_sectors; |
471 | if (l1_index >= s->l1_size) | 529 | if (l1_index >= s->l1_size) |
472 | return 0; | 530 | return 0; |
@@ -504,32 +562,45 @@ static uint64_t get_cluster_offset(BlockDriverState *bs, | @@ -504,32 +562,45 @@ static uint64_t get_cluster_offset(BlockDriverState *bs, | ||
504 | found: | 562 | found: |
505 | l2_index = ((offset >> 9) / s->cluster_sectors) % s->l2_size; | 563 | l2_index = ((offset >> 9) / s->cluster_sectors) % s->l2_size; |
506 | cluster_offset = le32_to_cpu(l2_table[l2_index]); | 564 | cluster_offset = le32_to_cpu(l2_table[l2_index]); |
565 | + | ||
507 | if (!cluster_offset) { | 566 | if (!cluster_offset) { |
508 | struct stat file_buf; | 567 | struct stat file_buf; |
509 | 568 | ||
510 | if (!allocate) | 569 | if (!allocate) |
511 | return 0; | 570 | return 0; |
512 | - stat(s->hd->filename, &file_buf); | ||
513 | - cluster_offset = file_buf.st_size; | ||
514 | - bdrv_truncate(s->hd, cluster_offset + (s->cluster_sectors << 9)); | ||
515 | - | ||
516 | - cluster_offset >>= 9; | ||
517 | - /* update L2 table */ | ||
518 | - tmp = cpu_to_le32(cluster_offset); | ||
519 | - l2_table[l2_index] = tmp; | ||
520 | - if (bdrv_pwrite(s->hd, ((int64_t)l2_offset * 512) + (l2_index * sizeof(tmp)), | ||
521 | - &tmp, sizeof(tmp)) != sizeof(tmp)) | ||
522 | - return 0; | ||
523 | - /* update backup L2 table */ | ||
524 | - if (s->l1_backup_table_offset != 0) { | ||
525 | - l2_offset = s->l1_backup_table[l1_index]; | ||
526 | - if (bdrv_pwrite(s->hd, ((int64_t)l2_offset * 512) + (l2_index * sizeof(tmp)), | ||
527 | - &tmp, sizeof(tmp)) != sizeof(tmp)) | 571 | + // Avoid the L2 tables update for the images that have snapshots. |
572 | + if (!s->is_parent) { | ||
573 | + status = stat(s->hd->filename, &file_buf); | ||
574 | + if (status == -1) { | ||
575 | + fprintf(stderr, "(VMDK) Fail file stat: filename =%s size=0x%lx errno=%s\n", | ||
576 | + s->hd->filename, (uint64_t)file_buf.st_size, strerror(errno)); | ||
528 | return 0; | 577 | return 0; |
578 | + } | ||
579 | + cluster_offset = file_buf.st_size; | ||
580 | + bdrv_truncate(s->hd, cluster_offset + (s->cluster_sectors << 9)); | ||
581 | + | ||
582 | + cluster_offset >>= 9; | ||
583 | + tmp = cpu_to_le32(cluster_offset); | ||
584 | + l2_table[l2_index] = tmp; | ||
585 | + // Save the active image state | ||
586 | + activeBDRV.cluster_offset = cluster_offset; | ||
587 | + activeBDRV.hd = bs; | ||
529 | } | 588 | } |
530 | - | 589 | + /* First of all we write grain itself, to avoid race condition |
590 | + * that may to corrupt the image. | ||
591 | + * This problem may occur because of insufficient space on host disk | ||
592 | + * or inappropriate VM shutdown. | ||
593 | + */ | ||
531 | if (get_whole_cluster(bs, cluster_offset, offset, allocate) == -1) | 594 | if (get_whole_cluster(bs, cluster_offset, offset, allocate) == -1) |
532 | return 0; | 595 | return 0; |
596 | + | ||
597 | + if (m_data) { | ||
598 | + m_data->offset = tmp; | ||
599 | + m_data->l1_index = l1_index; | ||
600 | + m_data->l2_index = l2_index; | ||
601 | + m_data->l2_offset = l2_offset; | ||
602 | + m_data->valid = 1; | ||
603 | + } | ||
533 | } | 604 | } |
534 | cluster_offset <<= 9; | 605 | cluster_offset <<= 9; |
535 | return cluster_offset; | 606 | return cluster_offset; |
@@ -542,7 +613,7 @@ static int vmdk_is_allocated(BlockDriverState *bs, int64_t sector_num, | @@ -542,7 +613,7 @@ static int vmdk_is_allocated(BlockDriverState *bs, int64_t sector_num, | ||
542 | int index_in_cluster, n; | 613 | int index_in_cluster, n; |
543 | uint64_t cluster_offset; | 614 | uint64_t cluster_offset; |
544 | 615 | ||
545 | - cluster_offset = get_cluster_offset(bs, sector_num << 9, 0); | 616 | + cluster_offset = get_cluster_offset(bs, NULL, sector_num << 9, 0); |
546 | index_in_cluster = sector_num % s->cluster_sectors; | 617 | index_in_cluster = sector_num % s->cluster_sectors; |
547 | n = s->cluster_sectors - index_in_cluster; | 618 | n = s->cluster_sectors - index_in_cluster; |
548 | if (n > nb_sectors) | 619 | if (n > nb_sectors) |
@@ -559,7 +630,7 @@ static int vmdk_read(BlockDriverState *bs, int64_t sector_num, | @@ -559,7 +630,7 @@ static int vmdk_read(BlockDriverState *bs, int64_t sector_num, | ||
559 | uint64_t cluster_offset; | 630 | uint64_t cluster_offset; |
560 | 631 | ||
561 | while (nb_sectors > 0) { | 632 | while (nb_sectors > 0) { |
562 | - cluster_offset = get_cluster_offset(bs, sector_num << 9, 0); | 633 | + cluster_offset = get_cluster_offset(bs, NULL, sector_num << 9, 0); |
563 | index_in_cluster = sector_num % s->cluster_sectors; | 634 | index_in_cluster = sector_num % s->cluster_sectors; |
564 | n = s->cluster_sectors - index_in_cluster; | 635 | n = s->cluster_sectors - index_in_cluster; |
565 | if (n > nb_sectors) | 636 | if (n > nb_sectors) |
@@ -590,20 +661,34 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num, | @@ -590,20 +661,34 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num, | ||
590 | const uint8_t *buf, int nb_sectors) | 661 | const uint8_t *buf, int nb_sectors) |
591 | { | 662 | { |
592 | BDRVVmdkState *s = bs->opaque; | 663 | BDRVVmdkState *s = bs->opaque; |
664 | + VmdkMetaData m_data; | ||
593 | int index_in_cluster, n; | 665 | int index_in_cluster, n; |
594 | uint64_t cluster_offset; | 666 | uint64_t cluster_offset; |
595 | static int cid_update = 0; | 667 | static int cid_update = 0; |
596 | 668 | ||
669 | + if (sector_num > bs->total_sectors) { | ||
670 | + fprintf(stderr, | ||
671 | + "(VMDK) Wrong offset: sector_num=0x%lx total_sectors=0x%lx\n", | ||
672 | + sector_num, bs->total_sectors); | ||
673 | + return -1; | ||
674 | + } | ||
675 | + | ||
597 | while (nb_sectors > 0) { | 676 | while (nb_sectors > 0) { |
598 | index_in_cluster = sector_num & (s->cluster_sectors - 1); | 677 | index_in_cluster = sector_num & (s->cluster_sectors - 1); |
599 | n = s->cluster_sectors - index_in_cluster; | 678 | n = s->cluster_sectors - index_in_cluster; |
600 | if (n > nb_sectors) | 679 | if (n > nb_sectors) |
601 | n = nb_sectors; | 680 | n = nb_sectors; |
602 | - cluster_offset = get_cluster_offset(bs, sector_num << 9, 1); | 681 | + cluster_offset = get_cluster_offset(bs, &m_data, sector_num << 9, 1); |
603 | if (!cluster_offset) | 682 | if (!cluster_offset) |
604 | return -1; | 683 | return -1; |
684 | + | ||
605 | if (bdrv_pwrite(s->hd, cluster_offset + index_in_cluster * 512, buf, n * 512) != n * 512) | 685 | if (bdrv_pwrite(s->hd, cluster_offset + index_in_cluster * 512, buf, n * 512) != n * 512) |
606 | return -1; | 686 | return -1; |
687 | + if (m_data.valid) { | ||
688 | + /* update L2 tables */ | ||
689 | + if (vmdk_L2update(bs, &m_data) == -1) | ||
690 | + return -1; | ||
691 | + } | ||
607 | nb_sectors -= n; | 692 | nb_sectors -= n; |
608 | sector_num += n; | 693 | sector_num += n; |
609 | buf += n * 512; | 694 | buf += n * 512; |