Commit 630530a6529bc3da9ab8aead7053dc753cb9ac77

Authored by ths
1 parent e5686953

Fix a race condition and non-leaf imagesgrowing in VMDK chains, by Igor

Lvovsky.


git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@2987 c046a42c-6fe2-441c-8c8c-71466251a162
Showing 1 changed file with 117 additions and 32 deletions
block-vmdk.c
... ... @@ -75,8 +75,25 @@ typedef struct BDRVVmdkState {
75 75  
76 76 unsigned int cluster_sectors;
77 77 uint32_t parent_cid;
  78 + int is_parent;
78 79 } BDRVVmdkState;
79 80  
  81 +typedef struct VmdkMetaData {
  82 + uint32_t offset;
  83 + unsigned int l1_index;
  84 + unsigned int l2_index;
  85 + unsigned int l2_offset;
  86 + int valid;
  87 +} VmdkMetaData;
  88 +
  89 +typedef struct ActiveBDRVState{
  90 + BlockDriverState *hd; // active image handler
  91 + uint64_t cluster_offset; // current write offset
  92 +}ActiveBDRVState;
  93 +
  94 +static ActiveBDRVState activeBDRV;
  95 +
  96 +
80 97 static int vmdk_probe(const uint8_t *buf, int buf_size, const char *filename)
81 98 {
82 99 uint32_t magic;
... ... @@ -305,7 +322,7 @@ static void vmdk_parent_close(BlockDriverState *bs)
305 322 bdrv_close(bs->backing_hd);
306 323 }
307 324  
308   -
  325 +int parent_open = 0;
309 326 static int vmdk_parent_open(BlockDriverState *bs, const char * filename)
310 327 {
311 328 BDRVVmdkState *s = bs->opaque;
... ... @@ -339,8 +356,10 @@ static int vmdk_parent_open(BlockDriverState *bs, const char * filename)
339 356 bdrv_close(s->hd);
340 357 return -1;
341 358 }
342   - if (bdrv_open(s->hd->backing_hd, parent_img_name, 0) < 0)
  359 + parent_open = 1;
  360 + if (bdrv_open(s->hd->backing_hd, parent_img_name, BDRV_O_RDONLY) < 0)
343 361 goto failure;
  362 + parent_open = 0;
344 363 }
345 364  
346 365 return 0;
... ... @@ -352,6 +371,11 @@ static int vmdk_open(BlockDriverState *bs, const char *filename, int flags)
352 371 uint32_t magic;
353 372 int l1_size, i, ret;
354 373  
  374 + if (parent_open)
  375 + // Parent must be opened as RO.
  376 + flags = BDRV_O_RDONLY;
  377 + fprintf(stderr, "(VMDK) image open: flags=0x%x filename=%s\n", flags, bs->filename);
  378 +
355 379 ret = bdrv_file_open(&s->hd, filename, flags);
356 380 if (ret < 0)
357 381 return ret;
... ... @@ -387,6 +411,11 @@ static int vmdk_open(BlockDriverState *bs, const char *filename, int flags)
387 411 s->l1_table_offset = le64_to_cpu(header.rgd_offset) << 9;
388 412 s->l1_backup_table_offset = le64_to_cpu(header.gd_offset) << 9;
389 413  
  414 + if (parent_open)
  415 + s->is_parent = 1;
  416 + else
  417 + s->is_parent = 0;
  418 +
390 419 // try to open parent images, if exist
391 420 if (vmdk_parent_open(bs, filename) != 0)
392 421 goto fail;
... ... @@ -430,7 +459,8 @@ static int vmdk_open(BlockDriverState *bs, const char *filename, int flags)
430 459 return -1;
431 460 }
432 461  
433   -static uint64_t get_cluster_offset(BlockDriverState *bs, uint64_t offset, int allocate);
  462 +static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data,
  463 + uint64_t offset, int allocate);
434 464  
435 465 static int get_whole_cluster(BlockDriverState *bs, uint64_t cluster_offset,
436 466 uint64_t offset, int allocate)
... ... @@ -446,27 +476,55 @@ static int get_whole_cluster(BlockDriverState *bs, uint64_t cluster_offset,
446 476  
447 477 if (!vmdk_is_cid_valid(bs))
448 478 return -1;
449   - parent_cluster_offset = get_cluster_offset(s->hd->backing_hd, offset, allocate);
450   - if (bdrv_pread(ps->hd, parent_cluster_offset, whole_grain, ps->cluster_sectors*512) !=
451   - ps->cluster_sectors*512)
452   - return -1;
453 479  
454   - if (bdrv_pwrite(s->hd, cluster_offset << 9, whole_grain, sizeof(whole_grain)) !=
455   - sizeof(whole_grain))
  480 + parent_cluster_offset = get_cluster_offset(s->hd->backing_hd, NULL, offset, allocate);
  481 +
  482 + if (parent_cluster_offset) {
  483 + BDRVVmdkState *act_s = activeBDRV.hd->opaque;
  484 +
  485 + if (bdrv_pread(ps->hd, parent_cluster_offset, whole_grain, ps->cluster_sectors*512) != ps->cluster_sectors*512)
  486 + return -1;
  487 +
  488 + //Write grain only into the active image
  489 + if (bdrv_pwrite(act_s->hd, activeBDRV.cluster_offset << 9, whole_grain, sizeof(whole_grain)) != sizeof(whole_grain))
  490 + return -1;
  491 + }
  492 + }
  493 + return 0;
  494 +}
  495 +
  496 +static int vmdk_L2update(BlockDriverState *bs, VmdkMetaData *m_data)
  497 +{
  498 + BDRVVmdkState *s = bs->opaque;
  499 +
  500 + /* update L2 table */
  501 + if (bdrv_pwrite(s->hd, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(m_data->offset)),
  502 + &(m_data->offset), sizeof(m_data->offset)) != sizeof(m_data->offset))
  503 + return -1;
  504 + /* update backup L2 table */
  505 + if (s->l1_backup_table_offset != 0) {
  506 + m_data->l2_offset = s->l1_backup_table[m_data->l1_index];
  507 + if (bdrv_pwrite(s->hd, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(m_data->offset)),
  508 + &(m_data->offset), sizeof(m_data->offset)) != sizeof(m_data->offset))
456 509 return -1;
457 510 }
  511 +
458 512 return 0;
459 513 }
460 514  
461   -static uint64_t get_cluster_offset(BlockDriverState *bs,
  515 +static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data,
462 516 uint64_t offset, int allocate)
463 517 {
464 518 BDRVVmdkState *s = bs->opaque;
465 519 unsigned int l1_index, l2_offset, l2_index;
466 520 int min_index, i, j;
467   - uint32_t min_count, *l2_table, tmp;
  521 + uint32_t min_count, *l2_table, tmp = 0;
468 522 uint64_t cluster_offset;
469   -
  523 + int status;
  524 +
  525 + if (m_data)
  526 + m_data->valid = 0;
  527 +
470 528 l1_index = (offset >> 9) / s->l1_entry_sectors;
471 529 if (l1_index >= s->l1_size)
472 530 return 0;
... ... @@ -504,32 +562,45 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
504 562 found:
505 563 l2_index = ((offset >> 9) / s->cluster_sectors) % s->l2_size;
506 564 cluster_offset = le32_to_cpu(l2_table[l2_index]);
  565 +
507 566 if (!cluster_offset) {
508 567 struct stat file_buf;
509 568  
510 569 if (!allocate)
511 570 return 0;
512   - stat(s->hd->filename, &file_buf);
513   - cluster_offset = file_buf.st_size;
514   - bdrv_truncate(s->hd, cluster_offset + (s->cluster_sectors << 9));
515   -
516   - cluster_offset >>= 9;
517   - /* update L2 table */
518   - tmp = cpu_to_le32(cluster_offset);
519   - l2_table[l2_index] = tmp;
520   - if (bdrv_pwrite(s->hd, ((int64_t)l2_offset * 512) + (l2_index * sizeof(tmp)),
521   - &tmp, sizeof(tmp)) != sizeof(tmp))
522   - return 0;
523   - /* update backup L2 table */
524   - if (s->l1_backup_table_offset != 0) {
525   - l2_offset = s->l1_backup_table[l1_index];
526   - if (bdrv_pwrite(s->hd, ((int64_t)l2_offset * 512) + (l2_index * sizeof(tmp)),
527   - &tmp, sizeof(tmp)) != sizeof(tmp))
  571 + // Avoid the L2 tables update for the images that have snapshots.
  572 + if (!s->is_parent) {
  573 + status = stat(s->hd->filename, &file_buf);
  574 + if (status == -1) {
  575 + fprintf(stderr, "(VMDK) Fail file stat: filename =%s size=0x%lx errno=%s\n",
  576 + s->hd->filename, (uint64_t)file_buf.st_size, strerror(errno));
528 577 return 0;
  578 + }
  579 + cluster_offset = file_buf.st_size;
  580 + bdrv_truncate(s->hd, cluster_offset + (s->cluster_sectors << 9));
  581 +
  582 + cluster_offset >>= 9;
  583 + tmp = cpu_to_le32(cluster_offset);
  584 + l2_table[l2_index] = tmp;
  585 + // Save the active image state
  586 + activeBDRV.cluster_offset = cluster_offset;
  587 + activeBDRV.hd = bs;
529 588 }
530   -
  589 + /* First of all we write grain itself, to avoid race condition
  590 + * that may to corrupt the image.
  591 + * This problem may occur because of insufficient space on host disk
  592 + * or inappropriate VM shutdown.
  593 + */
531 594 if (get_whole_cluster(bs, cluster_offset, offset, allocate) == -1)
532 595 return 0;
  596 +
  597 + if (m_data) {
  598 + m_data->offset = tmp;
  599 + m_data->l1_index = l1_index;
  600 + m_data->l2_index = l2_index;
  601 + m_data->l2_offset = l2_offset;
  602 + m_data->valid = 1;
  603 + }
533 604 }
534 605 cluster_offset <<= 9;
535 606 return cluster_offset;
... ... @@ -542,7 +613,7 @@ static int vmdk_is_allocated(BlockDriverState *bs, int64_t sector_num,
542 613 int index_in_cluster, n;
543 614 uint64_t cluster_offset;
544 615  
545   - cluster_offset = get_cluster_offset(bs, sector_num << 9, 0);
  616 + cluster_offset = get_cluster_offset(bs, NULL, sector_num << 9, 0);
546 617 index_in_cluster = sector_num % s->cluster_sectors;
547 618 n = s->cluster_sectors - index_in_cluster;
548 619 if (n > nb_sectors)
... ... @@ -559,7 +630,7 @@ static int vmdk_read(BlockDriverState *bs, int64_t sector_num,
559 630 uint64_t cluster_offset;
560 631  
561 632 while (nb_sectors > 0) {
562   - cluster_offset = get_cluster_offset(bs, sector_num << 9, 0);
  633 + cluster_offset = get_cluster_offset(bs, NULL, sector_num << 9, 0);
563 634 index_in_cluster = sector_num % s->cluster_sectors;
564 635 n = s->cluster_sectors - index_in_cluster;
565 636 if (n > nb_sectors)
... ... @@ -590,20 +661,34 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
590 661 const uint8_t *buf, int nb_sectors)
591 662 {
592 663 BDRVVmdkState *s = bs->opaque;
  664 + VmdkMetaData m_data;
593 665 int index_in_cluster, n;
594 666 uint64_t cluster_offset;
595 667 static int cid_update = 0;
596 668  
  669 + if (sector_num > bs->total_sectors) {
  670 + fprintf(stderr,
  671 + "(VMDK) Wrong offset: sector_num=0x%lx total_sectors=0x%lx\n",
  672 + sector_num, bs->total_sectors);
  673 + return -1;
  674 + }
  675 +
597 676 while (nb_sectors > 0) {
598 677 index_in_cluster = sector_num & (s->cluster_sectors - 1);
599 678 n = s->cluster_sectors - index_in_cluster;
600 679 if (n > nb_sectors)
601 680 n = nb_sectors;
602   - cluster_offset = get_cluster_offset(bs, sector_num << 9, 1);
  681 + cluster_offset = get_cluster_offset(bs, &m_data, sector_num << 9, 1);
603 682 if (!cluster_offset)
604 683 return -1;
  684 +
605 685 if (bdrv_pwrite(s->hd, cluster_offset + index_in_cluster * 512, buf, n * 512) != n * 512)
606 686 return -1;
  687 + if (m_data.valid) {
  688 + /* update L2 tables */
  689 + if (vmdk_L2update(bs, &m_data) == -1)
  690 + return -1;
  691 + }
607 692 nb_sectors -= n;
608 693 sector_num += n;
609 694 buf += n * 512;
... ...