Commit 630530a6529bc3da9ab8aead7053dc753cb9ac77
1 parent
e5686953
Fix a race condition and non-leaf imagesgrowing in VMDK chains, by Igor
Lvovsky. git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@2987 c046a42c-6fe2-441c-8c8c-71466251a162
Showing
1 changed file
with
117 additions
and
32 deletions
block-vmdk.c
... | ... | @@ -75,8 +75,25 @@ typedef struct BDRVVmdkState { |
75 | 75 | |
76 | 76 | unsigned int cluster_sectors; |
77 | 77 | uint32_t parent_cid; |
78 | + int is_parent; | |
78 | 79 | } BDRVVmdkState; |
79 | 80 | |
81 | +typedef struct VmdkMetaData { | |
82 | + uint32_t offset; | |
83 | + unsigned int l1_index; | |
84 | + unsigned int l2_index; | |
85 | + unsigned int l2_offset; | |
86 | + int valid; | |
87 | +} VmdkMetaData; | |
88 | + | |
89 | +typedef struct ActiveBDRVState{ | |
90 | + BlockDriverState *hd; // active image handler | |
91 | + uint64_t cluster_offset; // current write offset | |
92 | +}ActiveBDRVState; | |
93 | + | |
94 | +static ActiveBDRVState activeBDRV; | |
95 | + | |
96 | + | |
80 | 97 | static int vmdk_probe(const uint8_t *buf, int buf_size, const char *filename) |
81 | 98 | { |
82 | 99 | uint32_t magic; |
... | ... | @@ -305,7 +322,7 @@ static void vmdk_parent_close(BlockDriverState *bs) |
305 | 322 | bdrv_close(bs->backing_hd); |
306 | 323 | } |
307 | 324 | |
308 | - | |
325 | +int parent_open = 0; | |
309 | 326 | static int vmdk_parent_open(BlockDriverState *bs, const char * filename) |
310 | 327 | { |
311 | 328 | BDRVVmdkState *s = bs->opaque; |
... | ... | @@ -339,8 +356,10 @@ static int vmdk_parent_open(BlockDriverState *bs, const char * filename) |
339 | 356 | bdrv_close(s->hd); |
340 | 357 | return -1; |
341 | 358 | } |
342 | - if (bdrv_open(s->hd->backing_hd, parent_img_name, 0) < 0) | |
359 | + parent_open = 1; | |
360 | + if (bdrv_open(s->hd->backing_hd, parent_img_name, BDRV_O_RDONLY) < 0) | |
343 | 361 | goto failure; |
362 | + parent_open = 0; | |
344 | 363 | } |
345 | 364 | |
346 | 365 | return 0; |
... | ... | @@ -352,6 +371,11 @@ static int vmdk_open(BlockDriverState *bs, const char *filename, int flags) |
352 | 371 | uint32_t magic; |
353 | 372 | int l1_size, i, ret; |
354 | 373 | |
374 | + if (parent_open) | |
375 | + // Parent must be opened as RO. | |
376 | + flags = BDRV_O_RDONLY; | |
377 | + fprintf(stderr, "(VMDK) image open: flags=0x%x filename=%s\n", flags, bs->filename); | |
378 | + | |
355 | 379 | ret = bdrv_file_open(&s->hd, filename, flags); |
356 | 380 | if (ret < 0) |
357 | 381 | return ret; |
... | ... | @@ -387,6 +411,11 @@ static int vmdk_open(BlockDriverState *bs, const char *filename, int flags) |
387 | 411 | s->l1_table_offset = le64_to_cpu(header.rgd_offset) << 9; |
388 | 412 | s->l1_backup_table_offset = le64_to_cpu(header.gd_offset) << 9; |
389 | 413 | |
414 | + if (parent_open) | |
415 | + s->is_parent = 1; | |
416 | + else | |
417 | + s->is_parent = 0; | |
418 | + | |
390 | 419 | // try to open parent images, if exist |
391 | 420 | if (vmdk_parent_open(bs, filename) != 0) |
392 | 421 | goto fail; |
... | ... | @@ -430,7 +459,8 @@ static int vmdk_open(BlockDriverState *bs, const char *filename, int flags) |
430 | 459 | return -1; |
431 | 460 | } |
432 | 461 | |
433 | -static uint64_t get_cluster_offset(BlockDriverState *bs, uint64_t offset, int allocate); | |
462 | +static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data, | |
463 | + uint64_t offset, int allocate); | |
434 | 464 | |
435 | 465 | static int get_whole_cluster(BlockDriverState *bs, uint64_t cluster_offset, |
436 | 466 | uint64_t offset, int allocate) |
... | ... | @@ -446,27 +476,55 @@ static int get_whole_cluster(BlockDriverState *bs, uint64_t cluster_offset, |
446 | 476 | |
447 | 477 | if (!vmdk_is_cid_valid(bs)) |
448 | 478 | return -1; |
449 | - parent_cluster_offset = get_cluster_offset(s->hd->backing_hd, offset, allocate); | |
450 | - if (bdrv_pread(ps->hd, parent_cluster_offset, whole_grain, ps->cluster_sectors*512) != | |
451 | - ps->cluster_sectors*512) | |
452 | - return -1; | |
453 | 479 | |
454 | - if (bdrv_pwrite(s->hd, cluster_offset << 9, whole_grain, sizeof(whole_grain)) != | |
455 | - sizeof(whole_grain)) | |
480 | + parent_cluster_offset = get_cluster_offset(s->hd->backing_hd, NULL, offset, allocate); | |
481 | + | |
482 | + if (parent_cluster_offset) { | |
483 | + BDRVVmdkState *act_s = activeBDRV.hd->opaque; | |
484 | + | |
485 | + if (bdrv_pread(ps->hd, parent_cluster_offset, whole_grain, ps->cluster_sectors*512) != ps->cluster_sectors*512) | |
486 | + return -1; | |
487 | + | |
488 | + //Write grain only into the active image | |
489 | + if (bdrv_pwrite(act_s->hd, activeBDRV.cluster_offset << 9, whole_grain, sizeof(whole_grain)) != sizeof(whole_grain)) | |
490 | + return -1; | |
491 | + } | |
492 | + } | |
493 | + return 0; | |
494 | +} | |
495 | + | |
496 | +static int vmdk_L2update(BlockDriverState *bs, VmdkMetaData *m_data) | |
497 | +{ | |
498 | + BDRVVmdkState *s = bs->opaque; | |
499 | + | |
500 | + /* update L2 table */ | |
501 | + if (bdrv_pwrite(s->hd, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(m_data->offset)), | |
502 | + &(m_data->offset), sizeof(m_data->offset)) != sizeof(m_data->offset)) | |
503 | + return -1; | |
504 | + /* update backup L2 table */ | |
505 | + if (s->l1_backup_table_offset != 0) { | |
506 | + m_data->l2_offset = s->l1_backup_table[m_data->l1_index]; | |
507 | + if (bdrv_pwrite(s->hd, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(m_data->offset)), | |
508 | + &(m_data->offset), sizeof(m_data->offset)) != sizeof(m_data->offset)) | |
456 | 509 | return -1; |
457 | 510 | } |
511 | + | |
458 | 512 | return 0; |
459 | 513 | } |
460 | 514 | |
461 | -static uint64_t get_cluster_offset(BlockDriverState *bs, | |
515 | +static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data, | |
462 | 516 | uint64_t offset, int allocate) |
463 | 517 | { |
464 | 518 | BDRVVmdkState *s = bs->opaque; |
465 | 519 | unsigned int l1_index, l2_offset, l2_index; |
466 | 520 | int min_index, i, j; |
467 | - uint32_t min_count, *l2_table, tmp; | |
521 | + uint32_t min_count, *l2_table, tmp = 0; | |
468 | 522 | uint64_t cluster_offset; |
469 | - | |
523 | + int status; | |
524 | + | |
525 | + if (m_data) | |
526 | + m_data->valid = 0; | |
527 | + | |
470 | 528 | l1_index = (offset >> 9) / s->l1_entry_sectors; |
471 | 529 | if (l1_index >= s->l1_size) |
472 | 530 | return 0; |
... | ... | @@ -504,32 +562,45 @@ static uint64_t get_cluster_offset(BlockDriverState *bs, |
504 | 562 | found: |
505 | 563 | l2_index = ((offset >> 9) / s->cluster_sectors) % s->l2_size; |
506 | 564 | cluster_offset = le32_to_cpu(l2_table[l2_index]); |
565 | + | |
507 | 566 | if (!cluster_offset) { |
508 | 567 | struct stat file_buf; |
509 | 568 | |
510 | 569 | if (!allocate) |
511 | 570 | return 0; |
512 | - stat(s->hd->filename, &file_buf); | |
513 | - cluster_offset = file_buf.st_size; | |
514 | - bdrv_truncate(s->hd, cluster_offset + (s->cluster_sectors << 9)); | |
515 | - | |
516 | - cluster_offset >>= 9; | |
517 | - /* update L2 table */ | |
518 | - tmp = cpu_to_le32(cluster_offset); | |
519 | - l2_table[l2_index] = tmp; | |
520 | - if (bdrv_pwrite(s->hd, ((int64_t)l2_offset * 512) + (l2_index * sizeof(tmp)), | |
521 | - &tmp, sizeof(tmp)) != sizeof(tmp)) | |
522 | - return 0; | |
523 | - /* update backup L2 table */ | |
524 | - if (s->l1_backup_table_offset != 0) { | |
525 | - l2_offset = s->l1_backup_table[l1_index]; | |
526 | - if (bdrv_pwrite(s->hd, ((int64_t)l2_offset * 512) + (l2_index * sizeof(tmp)), | |
527 | - &tmp, sizeof(tmp)) != sizeof(tmp)) | |
571 | + // Avoid the L2 tables update for the images that have snapshots. | |
572 | + if (!s->is_parent) { | |
573 | + status = stat(s->hd->filename, &file_buf); | |
574 | + if (status == -1) { | |
575 | + fprintf(stderr, "(VMDK) Fail file stat: filename =%s size=0x%lx errno=%s\n", | |
576 | + s->hd->filename, (uint64_t)file_buf.st_size, strerror(errno)); | |
528 | 577 | return 0; |
578 | + } | |
579 | + cluster_offset = file_buf.st_size; | |
580 | + bdrv_truncate(s->hd, cluster_offset + (s->cluster_sectors << 9)); | |
581 | + | |
582 | + cluster_offset >>= 9; | |
583 | + tmp = cpu_to_le32(cluster_offset); | |
584 | + l2_table[l2_index] = tmp; | |
585 | + // Save the active image state | |
586 | + activeBDRV.cluster_offset = cluster_offset; | |
587 | + activeBDRV.hd = bs; | |
529 | 588 | } |
530 | - | |
589 | + /* First of all we write grain itself, to avoid race condition | |
590 | + * that may to corrupt the image. | |
591 | + * This problem may occur because of insufficient space on host disk | |
592 | + * or inappropriate VM shutdown. | |
593 | + */ | |
531 | 594 | if (get_whole_cluster(bs, cluster_offset, offset, allocate) == -1) |
532 | 595 | return 0; |
596 | + | |
597 | + if (m_data) { | |
598 | + m_data->offset = tmp; | |
599 | + m_data->l1_index = l1_index; | |
600 | + m_data->l2_index = l2_index; | |
601 | + m_data->l2_offset = l2_offset; | |
602 | + m_data->valid = 1; | |
603 | + } | |
533 | 604 | } |
534 | 605 | cluster_offset <<= 9; |
535 | 606 | return cluster_offset; |
... | ... | @@ -542,7 +613,7 @@ static int vmdk_is_allocated(BlockDriverState *bs, int64_t sector_num, |
542 | 613 | int index_in_cluster, n; |
543 | 614 | uint64_t cluster_offset; |
544 | 615 | |
545 | - cluster_offset = get_cluster_offset(bs, sector_num << 9, 0); | |
616 | + cluster_offset = get_cluster_offset(bs, NULL, sector_num << 9, 0); | |
546 | 617 | index_in_cluster = sector_num % s->cluster_sectors; |
547 | 618 | n = s->cluster_sectors - index_in_cluster; |
548 | 619 | if (n > nb_sectors) |
... | ... | @@ -559,7 +630,7 @@ static int vmdk_read(BlockDriverState *bs, int64_t sector_num, |
559 | 630 | uint64_t cluster_offset; |
560 | 631 | |
561 | 632 | while (nb_sectors > 0) { |
562 | - cluster_offset = get_cluster_offset(bs, sector_num << 9, 0); | |
633 | + cluster_offset = get_cluster_offset(bs, NULL, sector_num << 9, 0); | |
563 | 634 | index_in_cluster = sector_num % s->cluster_sectors; |
564 | 635 | n = s->cluster_sectors - index_in_cluster; |
565 | 636 | if (n > nb_sectors) |
... | ... | @@ -590,20 +661,34 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num, |
590 | 661 | const uint8_t *buf, int nb_sectors) |
591 | 662 | { |
592 | 663 | BDRVVmdkState *s = bs->opaque; |
664 | + VmdkMetaData m_data; | |
593 | 665 | int index_in_cluster, n; |
594 | 666 | uint64_t cluster_offset; |
595 | 667 | static int cid_update = 0; |
596 | 668 | |
669 | + if (sector_num > bs->total_sectors) { | |
670 | + fprintf(stderr, | |
671 | + "(VMDK) Wrong offset: sector_num=0x%lx total_sectors=0x%lx\n", | |
672 | + sector_num, bs->total_sectors); | |
673 | + return -1; | |
674 | + } | |
675 | + | |
597 | 676 | while (nb_sectors > 0) { |
598 | 677 | index_in_cluster = sector_num & (s->cluster_sectors - 1); |
599 | 678 | n = s->cluster_sectors - index_in_cluster; |
600 | 679 | if (n > nb_sectors) |
601 | 680 | n = nb_sectors; |
602 | - cluster_offset = get_cluster_offset(bs, sector_num << 9, 1); | |
681 | + cluster_offset = get_cluster_offset(bs, &m_data, sector_num << 9, 1); | |
603 | 682 | if (!cluster_offset) |
604 | 683 | return -1; |
684 | + | |
605 | 685 | if (bdrv_pwrite(s->hd, cluster_offset + index_in_cluster * 512, buf, n * 512) != n * 512) |
606 | 686 | return -1; |
687 | + if (m_data.valid) { | |
688 | + /* update L2 tables */ | |
689 | + if (vmdk_L2update(bs, &m_data) == -1) | |
690 | + return -1; | |
691 | + } | |
607 | 692 | nb_sectors -= n; |
608 | 693 | sector_num += n; |
609 | 694 | buf += n * 512; | ... | ... |