Commit 630530a6529bc3da9ab8aead7053dc753cb9ac77
1 parent
e5686953
Fix a race condition and non-leaf imagesgrowing in VMDK chains, by Igor
Lvovsky. git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@2987 c046a42c-6fe2-441c-8c8c-71466251a162
Showing
1 changed file
with
117 additions
and
32 deletions
block-vmdk.c
| ... | ... | @@ -75,8 +75,25 @@ typedef struct BDRVVmdkState { |
| 75 | 75 | |
| 76 | 76 | unsigned int cluster_sectors; |
| 77 | 77 | uint32_t parent_cid; |
| 78 | + int is_parent; | |
| 78 | 79 | } BDRVVmdkState; |
| 79 | 80 | |
| 81 | +typedef struct VmdkMetaData { | |
| 82 | + uint32_t offset; | |
| 83 | + unsigned int l1_index; | |
| 84 | + unsigned int l2_index; | |
| 85 | + unsigned int l2_offset; | |
| 86 | + int valid; | |
| 87 | +} VmdkMetaData; | |
| 88 | + | |
| 89 | +typedef struct ActiveBDRVState{ | |
| 90 | + BlockDriverState *hd; // active image handler | |
| 91 | + uint64_t cluster_offset; // current write offset | |
| 92 | +}ActiveBDRVState; | |
| 93 | + | |
| 94 | +static ActiveBDRVState activeBDRV; | |
| 95 | + | |
| 96 | + | |
| 80 | 97 | static int vmdk_probe(const uint8_t *buf, int buf_size, const char *filename) |
| 81 | 98 | { |
| 82 | 99 | uint32_t magic; |
| ... | ... | @@ -305,7 +322,7 @@ static void vmdk_parent_close(BlockDriverState *bs) |
| 305 | 322 | bdrv_close(bs->backing_hd); |
| 306 | 323 | } |
| 307 | 324 | |
| 308 | - | |
| 325 | +int parent_open = 0; | |
| 309 | 326 | static int vmdk_parent_open(BlockDriverState *bs, const char * filename) |
| 310 | 327 | { |
| 311 | 328 | BDRVVmdkState *s = bs->opaque; |
| ... | ... | @@ -339,8 +356,10 @@ static int vmdk_parent_open(BlockDriverState *bs, const char * filename) |
| 339 | 356 | bdrv_close(s->hd); |
| 340 | 357 | return -1; |
| 341 | 358 | } |
| 342 | - if (bdrv_open(s->hd->backing_hd, parent_img_name, 0) < 0) | |
| 359 | + parent_open = 1; | |
| 360 | + if (bdrv_open(s->hd->backing_hd, parent_img_name, BDRV_O_RDONLY) < 0) | |
| 343 | 361 | goto failure; |
| 362 | + parent_open = 0; | |
| 344 | 363 | } |
| 345 | 364 | |
| 346 | 365 | return 0; |
| ... | ... | @@ -352,6 +371,11 @@ static int vmdk_open(BlockDriverState *bs, const char *filename, int flags) |
| 352 | 371 | uint32_t magic; |
| 353 | 372 | int l1_size, i, ret; |
| 354 | 373 | |
| 374 | + if (parent_open) | |
| 375 | + // Parent must be opened as RO. | |
| 376 | + flags = BDRV_O_RDONLY; | |
| 377 | + fprintf(stderr, "(VMDK) image open: flags=0x%x filename=%s\n", flags, bs->filename); | |
| 378 | + | |
| 355 | 379 | ret = bdrv_file_open(&s->hd, filename, flags); |
| 356 | 380 | if (ret < 0) |
| 357 | 381 | return ret; |
| ... | ... | @@ -387,6 +411,11 @@ static int vmdk_open(BlockDriverState *bs, const char *filename, int flags) |
| 387 | 411 | s->l1_table_offset = le64_to_cpu(header.rgd_offset) << 9; |
| 388 | 412 | s->l1_backup_table_offset = le64_to_cpu(header.gd_offset) << 9; |
| 389 | 413 | |
| 414 | + if (parent_open) | |
| 415 | + s->is_parent = 1; | |
| 416 | + else | |
| 417 | + s->is_parent = 0; | |
| 418 | + | |
| 390 | 419 | // try to open parent images, if exist |
| 391 | 420 | if (vmdk_parent_open(bs, filename) != 0) |
| 392 | 421 | goto fail; |
| ... | ... | @@ -430,7 +459,8 @@ static int vmdk_open(BlockDriverState *bs, const char *filename, int flags) |
| 430 | 459 | return -1; |
| 431 | 460 | } |
| 432 | 461 | |
| 433 | -static uint64_t get_cluster_offset(BlockDriverState *bs, uint64_t offset, int allocate); | |
| 462 | +static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data, | |
| 463 | + uint64_t offset, int allocate); | |
| 434 | 464 | |
| 435 | 465 | static int get_whole_cluster(BlockDriverState *bs, uint64_t cluster_offset, |
| 436 | 466 | uint64_t offset, int allocate) |
| ... | ... | @@ -446,27 +476,55 @@ static int get_whole_cluster(BlockDriverState *bs, uint64_t cluster_offset, |
| 446 | 476 | |
| 447 | 477 | if (!vmdk_is_cid_valid(bs)) |
| 448 | 478 | return -1; |
| 449 | - parent_cluster_offset = get_cluster_offset(s->hd->backing_hd, offset, allocate); | |
| 450 | - if (bdrv_pread(ps->hd, parent_cluster_offset, whole_grain, ps->cluster_sectors*512) != | |
| 451 | - ps->cluster_sectors*512) | |
| 452 | - return -1; | |
| 453 | 479 | |
| 454 | - if (bdrv_pwrite(s->hd, cluster_offset << 9, whole_grain, sizeof(whole_grain)) != | |
| 455 | - sizeof(whole_grain)) | |
| 480 | + parent_cluster_offset = get_cluster_offset(s->hd->backing_hd, NULL, offset, allocate); | |
| 481 | + | |
| 482 | + if (parent_cluster_offset) { | |
| 483 | + BDRVVmdkState *act_s = activeBDRV.hd->opaque; | |
| 484 | + | |
| 485 | + if (bdrv_pread(ps->hd, parent_cluster_offset, whole_grain, ps->cluster_sectors*512) != ps->cluster_sectors*512) | |
| 486 | + return -1; | |
| 487 | + | |
| 488 | + //Write grain only into the active image | |
| 489 | + if (bdrv_pwrite(act_s->hd, activeBDRV.cluster_offset << 9, whole_grain, sizeof(whole_grain)) != sizeof(whole_grain)) | |
| 490 | + return -1; | |
| 491 | + } | |
| 492 | + } | |
| 493 | + return 0; | |
| 494 | +} | |
| 495 | + | |
| 496 | +static int vmdk_L2update(BlockDriverState *bs, VmdkMetaData *m_data) | |
| 497 | +{ | |
| 498 | + BDRVVmdkState *s = bs->opaque; | |
| 499 | + | |
| 500 | + /* update L2 table */ | |
| 501 | + if (bdrv_pwrite(s->hd, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(m_data->offset)), | |
| 502 | + &(m_data->offset), sizeof(m_data->offset)) != sizeof(m_data->offset)) | |
| 503 | + return -1; | |
| 504 | + /* update backup L2 table */ | |
| 505 | + if (s->l1_backup_table_offset != 0) { | |
| 506 | + m_data->l2_offset = s->l1_backup_table[m_data->l1_index]; | |
| 507 | + if (bdrv_pwrite(s->hd, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(m_data->offset)), | |
| 508 | + &(m_data->offset), sizeof(m_data->offset)) != sizeof(m_data->offset)) | |
| 456 | 509 | return -1; |
| 457 | 510 | } |
| 511 | + | |
| 458 | 512 | return 0; |
| 459 | 513 | } |
| 460 | 514 | |
| 461 | -static uint64_t get_cluster_offset(BlockDriverState *bs, | |
| 515 | +static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data, | |
| 462 | 516 | uint64_t offset, int allocate) |
| 463 | 517 | { |
| 464 | 518 | BDRVVmdkState *s = bs->opaque; |
| 465 | 519 | unsigned int l1_index, l2_offset, l2_index; |
| 466 | 520 | int min_index, i, j; |
| 467 | - uint32_t min_count, *l2_table, tmp; | |
| 521 | + uint32_t min_count, *l2_table, tmp = 0; | |
| 468 | 522 | uint64_t cluster_offset; |
| 469 | - | |
| 523 | + int status; | |
| 524 | + | |
| 525 | + if (m_data) | |
| 526 | + m_data->valid = 0; | |
| 527 | + | |
| 470 | 528 | l1_index = (offset >> 9) / s->l1_entry_sectors; |
| 471 | 529 | if (l1_index >= s->l1_size) |
| 472 | 530 | return 0; |
| ... | ... | @@ -504,32 +562,45 @@ static uint64_t get_cluster_offset(BlockDriverState *bs, |
| 504 | 562 | found: |
| 505 | 563 | l2_index = ((offset >> 9) / s->cluster_sectors) % s->l2_size; |
| 506 | 564 | cluster_offset = le32_to_cpu(l2_table[l2_index]); |
| 565 | + | |
| 507 | 566 | if (!cluster_offset) { |
| 508 | 567 | struct stat file_buf; |
| 509 | 568 | |
| 510 | 569 | if (!allocate) |
| 511 | 570 | return 0; |
| 512 | - stat(s->hd->filename, &file_buf); | |
| 513 | - cluster_offset = file_buf.st_size; | |
| 514 | - bdrv_truncate(s->hd, cluster_offset + (s->cluster_sectors << 9)); | |
| 515 | - | |
| 516 | - cluster_offset >>= 9; | |
| 517 | - /* update L2 table */ | |
| 518 | - tmp = cpu_to_le32(cluster_offset); | |
| 519 | - l2_table[l2_index] = tmp; | |
| 520 | - if (bdrv_pwrite(s->hd, ((int64_t)l2_offset * 512) + (l2_index * sizeof(tmp)), | |
| 521 | - &tmp, sizeof(tmp)) != sizeof(tmp)) | |
| 522 | - return 0; | |
| 523 | - /* update backup L2 table */ | |
| 524 | - if (s->l1_backup_table_offset != 0) { | |
| 525 | - l2_offset = s->l1_backup_table[l1_index]; | |
| 526 | - if (bdrv_pwrite(s->hd, ((int64_t)l2_offset * 512) + (l2_index * sizeof(tmp)), | |
| 527 | - &tmp, sizeof(tmp)) != sizeof(tmp)) | |
| 571 | + // Avoid the L2 tables update for the images that have snapshots. | |
| 572 | + if (!s->is_parent) { | |
| 573 | + status = stat(s->hd->filename, &file_buf); | |
| 574 | + if (status == -1) { | |
| 575 | + fprintf(stderr, "(VMDK) Fail file stat: filename =%s size=0x%lx errno=%s\n", | |
| 576 | + s->hd->filename, (uint64_t)file_buf.st_size, strerror(errno)); | |
| 528 | 577 | return 0; |
| 578 | + } | |
| 579 | + cluster_offset = file_buf.st_size; | |
| 580 | + bdrv_truncate(s->hd, cluster_offset + (s->cluster_sectors << 9)); | |
| 581 | + | |
| 582 | + cluster_offset >>= 9; | |
| 583 | + tmp = cpu_to_le32(cluster_offset); | |
| 584 | + l2_table[l2_index] = tmp; | |
| 585 | + // Save the active image state | |
| 586 | + activeBDRV.cluster_offset = cluster_offset; | |
| 587 | + activeBDRV.hd = bs; | |
| 529 | 588 | } |
| 530 | - | |
| 589 | + /* First of all we write grain itself, to avoid race condition | |
| 590 | + * that may to corrupt the image. | |
| 591 | + * This problem may occur because of insufficient space on host disk | |
| 592 | + * or inappropriate VM shutdown. | |
| 593 | + */ | |
| 531 | 594 | if (get_whole_cluster(bs, cluster_offset, offset, allocate) == -1) |
| 532 | 595 | return 0; |
| 596 | + | |
| 597 | + if (m_data) { | |
| 598 | + m_data->offset = tmp; | |
| 599 | + m_data->l1_index = l1_index; | |
| 600 | + m_data->l2_index = l2_index; | |
| 601 | + m_data->l2_offset = l2_offset; | |
| 602 | + m_data->valid = 1; | |
| 603 | + } | |
| 533 | 604 | } |
| 534 | 605 | cluster_offset <<= 9; |
| 535 | 606 | return cluster_offset; |
| ... | ... | @@ -542,7 +613,7 @@ static int vmdk_is_allocated(BlockDriverState *bs, int64_t sector_num, |
| 542 | 613 | int index_in_cluster, n; |
| 543 | 614 | uint64_t cluster_offset; |
| 544 | 615 | |
| 545 | - cluster_offset = get_cluster_offset(bs, sector_num << 9, 0); | |
| 616 | + cluster_offset = get_cluster_offset(bs, NULL, sector_num << 9, 0); | |
| 546 | 617 | index_in_cluster = sector_num % s->cluster_sectors; |
| 547 | 618 | n = s->cluster_sectors - index_in_cluster; |
| 548 | 619 | if (n > nb_sectors) |
| ... | ... | @@ -559,7 +630,7 @@ static int vmdk_read(BlockDriverState *bs, int64_t sector_num, |
| 559 | 630 | uint64_t cluster_offset; |
| 560 | 631 | |
| 561 | 632 | while (nb_sectors > 0) { |
| 562 | - cluster_offset = get_cluster_offset(bs, sector_num << 9, 0); | |
| 633 | + cluster_offset = get_cluster_offset(bs, NULL, sector_num << 9, 0); | |
| 563 | 634 | index_in_cluster = sector_num % s->cluster_sectors; |
| 564 | 635 | n = s->cluster_sectors - index_in_cluster; |
| 565 | 636 | if (n > nb_sectors) |
| ... | ... | @@ -590,20 +661,34 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num, |
| 590 | 661 | const uint8_t *buf, int nb_sectors) |
| 591 | 662 | { |
| 592 | 663 | BDRVVmdkState *s = bs->opaque; |
| 664 | + VmdkMetaData m_data; | |
| 593 | 665 | int index_in_cluster, n; |
| 594 | 666 | uint64_t cluster_offset; |
| 595 | 667 | static int cid_update = 0; |
| 596 | 668 | |
| 669 | + if (sector_num > bs->total_sectors) { | |
| 670 | + fprintf(stderr, | |
| 671 | + "(VMDK) Wrong offset: sector_num=0x%lx total_sectors=0x%lx\n", | |
| 672 | + sector_num, bs->total_sectors); | |
| 673 | + return -1; | |
| 674 | + } | |
| 675 | + | |
| 597 | 676 | while (nb_sectors > 0) { |
| 598 | 677 | index_in_cluster = sector_num & (s->cluster_sectors - 1); |
| 599 | 678 | n = s->cluster_sectors - index_in_cluster; |
| 600 | 679 | if (n > nb_sectors) |
| 601 | 680 | n = nb_sectors; |
| 602 | - cluster_offset = get_cluster_offset(bs, sector_num << 9, 1); | |
| 681 | + cluster_offset = get_cluster_offset(bs, &m_data, sector_num << 9, 1); | |
| 603 | 682 | if (!cluster_offset) |
| 604 | 683 | return -1; |
| 684 | + | |
| 605 | 685 | if (bdrv_pwrite(s->hd, cluster_offset + index_in_cluster * 512, buf, n * 512) != n * 512) |
| 606 | 686 | return -1; |
| 687 | + if (m_data.valid) { | |
| 688 | + /* update L2 tables */ | |
| 689 | + if (vmdk_L2update(bs, &m_data) == -1) | |
| 690 | + return -1; | |
| 691 | + } | |
| 607 | 692 | nb_sectors -= n; |
| 608 | 693 | sector_num += n; |
| 609 | 694 | buf += n * 512; | ... | ... |