Commit f141eafe286c785f7e2c1e312a73f90d66bdfb90

Authored by aliguori
1 parent c87c0672

push down vector linearization to posix-aio-compat.c (Christoph Hellwig)

Make all AIO requests vectored and defer linearization until the actual
I/O thread.  This prepares for using native preadv/pwritev.

Also enables asynchronous direct I/O by handling that case in the I/O thread.

Qcow and qcow2 propably want to be adopted to directly deal with multi-segment
requests, but that can be implemented later.


Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>


git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@7020 c046a42c-6fe2-441c-8c8c-71466251a162
block-qcow.c
@@ -525,7 +525,9 @@ static int qcow_write(BlockDriverState *bs, int64_t sector_num, @@ -525,7 +525,9 @@ static int qcow_write(BlockDriverState *bs, int64_t sector_num,
525 typedef struct QCowAIOCB { 525 typedef struct QCowAIOCB {
526 BlockDriverAIOCB common; 526 BlockDriverAIOCB common;
527 int64_t sector_num; 527 int64_t sector_num;
  528 + QEMUIOVector *qiov;
528 uint8_t *buf; 529 uint8_t *buf;
  530 + void *orig_buf;
529 int nb_sectors; 531 int nb_sectors;
530 int n; 532 int n;
531 uint64_t cluster_offset; 533 uint64_t cluster_offset;
@@ -543,12 +545,8 @@ static void qcow_aio_read_cb(void *opaque, int ret) @@ -543,12 +545,8 @@ static void qcow_aio_read_cb(void *opaque, int ret)
543 int index_in_cluster; 545 int index_in_cluster;
544 546
545 acb->hd_aiocb = NULL; 547 acb->hd_aiocb = NULL;
546 - if (ret < 0) {  
547 - fail:  
548 - acb->common.cb(acb->common.opaque, ret);  
549 - qemu_aio_release(acb);  
550 - return;  
551 - } 548 + if (ret < 0)
  549 + goto done;
552 550
553 redo: 551 redo:
554 /* post process the read buffer */ 552 /* post process the read buffer */
@@ -570,9 +568,8 @@ static void qcow_aio_read_cb(void *opaque, int ret) @@ -570,9 +568,8 @@ static void qcow_aio_read_cb(void *opaque, int ret)
570 568
571 if (acb->nb_sectors == 0) { 569 if (acb->nb_sectors == 0) {
572 /* request completed */ 570 /* request completed */
573 - acb->common.cb(acb->common.opaque, 0);  
574 - qemu_aio_release(acb);  
575 - return; 571 + ret = 0;
  572 + goto done;
576 } 573 }
577 574
578 /* prepare next AIO request */ 575 /* prepare next AIO request */
@@ -592,7 +589,7 @@ static void qcow_aio_read_cb(void *opaque, int ret) @@ -592,7 +589,7 @@ static void qcow_aio_read_cb(void *opaque, int ret)
592 acb->hd_aiocb = bdrv_aio_readv(bs->backing_hd, acb->sector_num, 589 acb->hd_aiocb = bdrv_aio_readv(bs->backing_hd, acb->sector_num,
593 &acb->hd_qiov, acb->n, qcow_aio_read_cb, acb); 590 &acb->hd_qiov, acb->n, qcow_aio_read_cb, acb);
594 if (acb->hd_aiocb == NULL) 591 if (acb->hd_aiocb == NULL)
595 - goto fail; 592 + goto done;
596 } else { 593 } else {
597 /* Note: in this case, no need to wait */ 594 /* Note: in this case, no need to wait */
598 memset(acb->buf, 0, 512 * acb->n); 595 memset(acb->buf, 0, 512 * acb->n);
@@ -601,14 +598,14 @@ static void qcow_aio_read_cb(void *opaque, int ret) @@ -601,14 +598,14 @@ static void qcow_aio_read_cb(void *opaque, int ret)
601 } else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) { 598 } else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) {
602 /* add AIO support for compressed blocks ? */ 599 /* add AIO support for compressed blocks ? */
603 if (decompress_cluster(s, acb->cluster_offset) < 0) 600 if (decompress_cluster(s, acb->cluster_offset) < 0)
604 - goto fail; 601 + goto done;
605 memcpy(acb->buf, 602 memcpy(acb->buf,
606 s->cluster_cache + index_in_cluster * 512, 512 * acb->n); 603 s->cluster_cache + index_in_cluster * 512, 512 * acb->n);
607 goto redo; 604 goto redo;
608 } else { 605 } else {
609 if ((acb->cluster_offset & 511) != 0) { 606 if ((acb->cluster_offset & 511) != 0) {
610 ret = -EIO; 607 ret = -EIO;
611 - goto fail; 608 + goto done;
612 } 609 }
613 acb->hd_iov.iov_base = acb->buf; 610 acb->hd_iov.iov_base = acb->buf;
614 acb->hd_iov.iov_len = acb->n * 512; 611 acb->hd_iov.iov_len = acb->n * 512;
@@ -617,12 +614,22 @@ static void qcow_aio_read_cb(void *opaque, int ret) @@ -617,12 +614,22 @@ static void qcow_aio_read_cb(void *opaque, int ret)
617 (acb->cluster_offset >> 9) + index_in_cluster, 614 (acb->cluster_offset >> 9) + index_in_cluster,
618 &acb->hd_qiov, acb->n, qcow_aio_read_cb, acb); 615 &acb->hd_qiov, acb->n, qcow_aio_read_cb, acb);
619 if (acb->hd_aiocb == NULL) 616 if (acb->hd_aiocb == NULL)
620 - goto fail; 617 + goto done;
  618 + }
  619 +
  620 + return;
  621 +
  622 +done:
  623 + if (acb->qiov->niov > 1) {
  624 + qemu_iovec_from_buffer(acb->qiov, acb->orig_buf, acb->qiov->size);
  625 + qemu_vfree(acb->orig_buf);
621 } 626 }
  627 + acb->common.cb(acb->common.opaque, ret);
  628 + qemu_aio_release(acb);
622 } 629 }
623 630
624 -static BlockDriverAIOCB *qcow_aio_read(BlockDriverState *bs,  
625 - int64_t sector_num, uint8_t *buf, int nb_sectors, 631 +static BlockDriverAIOCB *qcow_aio_readv(BlockDriverState *bs,
  632 + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
626 BlockDriverCompletionFunc *cb, void *opaque) 633 BlockDriverCompletionFunc *cb, void *opaque)
627 { 634 {
628 QCowAIOCB *acb; 635 QCowAIOCB *acb;
@@ -632,7 +639,11 @@ static BlockDriverAIOCB *qcow_aio_read(BlockDriverState *bs, @@ -632,7 +639,11 @@ static BlockDriverAIOCB *qcow_aio_read(BlockDriverState *bs,
632 return NULL; 639 return NULL;
633 acb->hd_aiocb = NULL; 640 acb->hd_aiocb = NULL;
634 acb->sector_num = sector_num; 641 acb->sector_num = sector_num;
635 - acb->buf = buf; 642 + acb->qiov = qiov;
  643 + if (qiov->niov > 1)
  644 + acb->buf = acb->orig_buf = qemu_memalign(512, qiov->size);
  645 + else
  646 + acb->buf = qiov->iov->iov_base;
636 acb->nb_sectors = nb_sectors; 647 acb->nb_sectors = nb_sectors;
637 acb->n = 0; 648 acb->n = 0;
638 acb->cluster_offset = 0; 649 acb->cluster_offset = 0;
@@ -652,12 +663,8 @@ static void qcow_aio_write_cb(void *opaque, int ret) @@ -652,12 +663,8 @@ static void qcow_aio_write_cb(void *opaque, int ret)
652 663
653 acb->hd_aiocb = NULL; 664 acb->hd_aiocb = NULL;
654 665
655 - if (ret < 0) {  
656 - fail:  
657 - acb->common.cb(acb->common.opaque, ret);  
658 - qemu_aio_release(acb);  
659 - return;  
660 - } 666 + if (ret < 0)
  667 + goto done;
661 668
662 acb->nb_sectors -= acb->n; 669 acb->nb_sectors -= acb->n;
663 acb->sector_num += acb->n; 670 acb->sector_num += acb->n;
@@ -665,9 +672,8 @@ static void qcow_aio_write_cb(void *opaque, int ret) @@ -665,9 +672,8 @@ static void qcow_aio_write_cb(void *opaque, int ret)
665 672
666 if (acb->nb_sectors == 0) { 673 if (acb->nb_sectors == 0) {
667 /* request completed */ 674 /* request completed */
668 - acb->common.cb(acb->common.opaque, 0);  
669 - qemu_aio_release(acb);  
670 - return; 675 + ret = 0;
  676 + goto done;
671 } 677 }
672 678
673 index_in_cluster = acb->sector_num & (s->cluster_sectors - 1); 679 index_in_cluster = acb->sector_num & (s->cluster_sectors - 1);
@@ -679,14 +685,14 @@ static void qcow_aio_write_cb(void *opaque, int ret) @@ -679,14 +685,14 @@ static void qcow_aio_write_cb(void *opaque, int ret)
679 index_in_cluster + acb->n); 685 index_in_cluster + acb->n);
680 if (!cluster_offset || (cluster_offset & 511) != 0) { 686 if (!cluster_offset || (cluster_offset & 511) != 0) {
681 ret = -EIO; 687 ret = -EIO;
682 - goto fail; 688 + goto done;
683 } 689 }
684 if (s->crypt_method) { 690 if (s->crypt_method) {
685 if (!acb->cluster_data) { 691 if (!acb->cluster_data) {
686 acb->cluster_data = qemu_mallocz(s->cluster_size); 692 acb->cluster_data = qemu_mallocz(s->cluster_size);
687 if (!acb->cluster_data) { 693 if (!acb->cluster_data) {
688 ret = -ENOMEM; 694 ret = -ENOMEM;
689 - goto fail; 695 + goto done;
690 } 696 }
691 } 697 }
692 encrypt_sectors(s, acb->sector_num, acb->cluster_data, acb->buf, 698 encrypt_sectors(s, acb->sector_num, acb->cluster_data, acb->buf,
@@ -704,11 +710,18 @@ static void qcow_aio_write_cb(void *opaque, int ret) @@ -704,11 +710,18 @@ static void qcow_aio_write_cb(void *opaque, int ret)
704 &acb->hd_qiov, acb->n, 710 &acb->hd_qiov, acb->n,
705 qcow_aio_write_cb, acb); 711 qcow_aio_write_cb, acb);
706 if (acb->hd_aiocb == NULL) 712 if (acb->hd_aiocb == NULL)
707 - goto fail; 713 + goto done;
  714 + return;
  715 +
  716 +done:
  717 + if (acb->qiov->niov > 1)
  718 + qemu_vfree(acb->orig_buf);
  719 + acb->common.cb(acb->common.opaque, ret);
  720 + qemu_aio_release(acb);
708 } 721 }
709 722
710 -static BlockDriverAIOCB *qcow_aio_write(BlockDriverState *bs,  
711 - int64_t sector_num, const uint8_t *buf, int nb_sectors, 723 +static BlockDriverAIOCB *qcow_aio_writev(BlockDriverState *bs,
  724 + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
712 BlockDriverCompletionFunc *cb, void *opaque) 725 BlockDriverCompletionFunc *cb, void *opaque)
713 { 726 {
714 BDRVQcowState *s = bs->opaque; 727 BDRVQcowState *s = bs->opaque;
@@ -721,7 +734,12 @@ static BlockDriverAIOCB *qcow_aio_write(BlockDriverState *bs, @@ -721,7 +734,12 @@ static BlockDriverAIOCB *qcow_aio_write(BlockDriverState *bs,
721 return NULL; 734 return NULL;
722 acb->hd_aiocb = NULL; 735 acb->hd_aiocb = NULL;
723 acb->sector_num = sector_num; 736 acb->sector_num = sector_num;
724 - acb->buf = (uint8_t *)buf; 737 + acb->qiov = qiov;
  738 + if (qiov->niov > 1) {
  739 + acb->buf = acb->orig_buf = qemu_memalign(512, qiov->size);
  740 + qemu_iovec_to_buffer(qiov, acb->buf);
  741 + } else
  742 + acb->buf = qiov->iov->iov_base;
725 acb->nb_sectors = nb_sectors; 743 acb->nb_sectors = nb_sectors;
726 acb->n = 0; 744 acb->n = 0;
727 745
@@ -909,8 +927,8 @@ BlockDriver bdrv_qcow = { @@ -909,8 +927,8 @@ BlockDriver bdrv_qcow = {
909 .bdrv_is_allocated = qcow_is_allocated, 927 .bdrv_is_allocated = qcow_is_allocated,
910 .bdrv_set_key = qcow_set_key, 928 .bdrv_set_key = qcow_set_key,
911 .bdrv_make_empty = qcow_make_empty, 929 .bdrv_make_empty = qcow_make_empty,
912 - .bdrv_aio_read = qcow_aio_read,  
913 - .bdrv_aio_write = qcow_aio_write, 930 + .bdrv_aio_readv = qcow_aio_readv,
  931 + .bdrv_aio_writev = qcow_aio_writev,
914 .bdrv_aio_cancel = qcow_aio_cancel, 932 .bdrv_aio_cancel = qcow_aio_cancel,
915 .aiocb_size = sizeof(QCowAIOCB), 933 .aiocb_size = sizeof(QCowAIOCB),
916 .bdrv_write_compressed = qcow_write_compressed, 934 .bdrv_write_compressed = qcow_write_compressed,
block-qcow2.c
@@ -1264,7 +1264,9 @@ static int qcow_write(BlockDriverState *bs, int64_t sector_num, @@ -1264,7 +1264,9 @@ static int qcow_write(BlockDriverState *bs, int64_t sector_num,
1264 typedef struct QCowAIOCB { 1264 typedef struct QCowAIOCB {
1265 BlockDriverAIOCB common; 1265 BlockDriverAIOCB common;
1266 int64_t sector_num; 1266 int64_t sector_num;
  1267 + QEMUIOVector *qiov;
1267 uint8_t *buf; 1268 uint8_t *buf;
  1269 + void *orig_buf;
1268 int nb_sectors; 1270 int nb_sectors;
1269 int n; 1271 int n;
1270 uint64_t cluster_offset; 1272 uint64_t cluster_offset;
@@ -1307,12 +1309,8 @@ static void qcow_aio_read_cb(void *opaque, int ret) @@ -1307,12 +1309,8 @@ static void qcow_aio_read_cb(void *opaque, int ret)
1307 int index_in_cluster, n1; 1309 int index_in_cluster, n1;
1308 1310
1309 acb->hd_aiocb = NULL; 1311 acb->hd_aiocb = NULL;
1310 - if (ret < 0) {  
1311 -fail:  
1312 - acb->common.cb(acb->common.opaque, ret);  
1313 - qemu_aio_release(acb);  
1314 - return;  
1315 - } 1312 + if (ret < 0)
  1313 + goto done;
1316 1314
1317 /* post process the read buffer */ 1315 /* post process the read buffer */
1318 if (!acb->cluster_offset) { 1316 if (!acb->cluster_offset) {
@@ -1333,9 +1331,8 @@ fail: @@ -1333,9 +1331,8 @@ fail:
1333 1331
1334 if (acb->nb_sectors == 0) { 1332 if (acb->nb_sectors == 0) {
1335 /* request completed */ 1333 /* request completed */
1336 - acb->common.cb(acb->common.opaque, 0);  
1337 - qemu_aio_release(acb);  
1338 - return; 1334 + ret = 0;
  1335 + goto done;
1339 } 1336 }
1340 1337
1341 /* prepare next AIO request */ 1338 /* prepare next AIO request */
@@ -1356,32 +1353,32 @@ fail: @@ -1356,32 +1353,32 @@ fail:
1356 &acb->hd_qiov, acb->n, 1353 &acb->hd_qiov, acb->n,
1357 qcow_aio_read_cb, acb); 1354 qcow_aio_read_cb, acb);
1358 if (acb->hd_aiocb == NULL) 1355 if (acb->hd_aiocb == NULL)
1359 - goto fail; 1356 + goto done;
1360 } else { 1357 } else {
1361 ret = qcow_schedule_bh(qcow_aio_read_bh, acb); 1358 ret = qcow_schedule_bh(qcow_aio_read_bh, acb);
1362 if (ret < 0) 1359 if (ret < 0)
1363 - goto fail; 1360 + goto done;
1364 } 1361 }
1365 } else { 1362 } else {
1366 /* Note: in this case, no need to wait */ 1363 /* Note: in this case, no need to wait */
1367 memset(acb->buf, 0, 512 * acb->n); 1364 memset(acb->buf, 0, 512 * acb->n);
1368 ret = qcow_schedule_bh(qcow_aio_read_bh, acb); 1365 ret = qcow_schedule_bh(qcow_aio_read_bh, acb);
1369 if (ret < 0) 1366 if (ret < 0)
1370 - goto fail; 1367 + goto done;
1371 } 1368 }
1372 } else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) { 1369 } else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) {
1373 /* add AIO support for compressed blocks ? */ 1370 /* add AIO support for compressed blocks ? */
1374 if (decompress_cluster(s, acb->cluster_offset) < 0) 1371 if (decompress_cluster(s, acb->cluster_offset) < 0)
1375 - goto fail; 1372 + goto done;
1376 memcpy(acb->buf, 1373 memcpy(acb->buf,
1377 s->cluster_cache + index_in_cluster * 512, 512 * acb->n); 1374 s->cluster_cache + index_in_cluster * 512, 512 * acb->n);
1378 ret = qcow_schedule_bh(qcow_aio_read_bh, acb); 1375 ret = qcow_schedule_bh(qcow_aio_read_bh, acb);
1379 if (ret < 0) 1376 if (ret < 0)
1380 - goto fail; 1377 + goto done;
1381 } else { 1378 } else {
1382 if ((acb->cluster_offset & 511) != 0) { 1379 if ((acb->cluster_offset & 511) != 0) {
1383 ret = -EIO; 1380 ret = -EIO;
1384 - goto fail; 1381 + goto done;
1385 } 1382 }
1386 1383
1387 acb->hd_iov.iov_base = acb->buf; 1384 acb->hd_iov.iov_base = acb->buf;
@@ -1391,13 +1388,22 @@ fail: @@ -1391,13 +1388,22 @@ fail:
1391 (acb->cluster_offset >> 9) + index_in_cluster, 1388 (acb->cluster_offset >> 9) + index_in_cluster,
1392 &acb->hd_qiov, acb->n, qcow_aio_read_cb, acb); 1389 &acb->hd_qiov, acb->n, qcow_aio_read_cb, acb);
1393 if (acb->hd_aiocb == NULL) 1390 if (acb->hd_aiocb == NULL)
1394 - goto fail; 1391 + goto done;
  1392 + }
  1393 +
  1394 + return;
  1395 +done:
  1396 + if (acb->qiov->niov > 1) {
  1397 + qemu_iovec_from_buffer(acb->qiov, acb->orig_buf, acb->qiov->size);
  1398 + qemu_vfree(acb->orig_buf);
1395 } 1399 }
  1400 + acb->common.cb(acb->common.opaque, ret);
  1401 + qemu_aio_release(acb);
1396 } 1402 }
1397 1403
1398 static QCowAIOCB *qcow_aio_setup(BlockDriverState *bs, 1404 static QCowAIOCB *qcow_aio_setup(BlockDriverState *bs,
1399 - int64_t sector_num, uint8_t *buf, int nb_sectors,  
1400 - BlockDriverCompletionFunc *cb, void *opaque) 1405 + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
  1406 + BlockDriverCompletionFunc *cb, void *opaque, int is_write)
1401 { 1407 {
1402 QCowAIOCB *acb; 1408 QCowAIOCB *acb;
1403 1409
@@ -1406,7 +1412,13 @@ static QCowAIOCB *qcow_aio_setup(BlockDriverState *bs, @@ -1406,7 +1412,13 @@ static QCowAIOCB *qcow_aio_setup(BlockDriverState *bs,
1406 return NULL; 1412 return NULL;
1407 acb->hd_aiocb = NULL; 1413 acb->hd_aiocb = NULL;
1408 acb->sector_num = sector_num; 1414 acb->sector_num = sector_num;
1409 - acb->buf = buf; 1415 + acb->qiov = qiov;
  1416 + if (qiov->niov > 1) {
  1417 + acb->buf = acb->orig_buf = qemu_memalign(512, qiov->size);
  1418 + if (is_write)
  1419 + qemu_iovec_to_buffer(qiov, acb->buf);
  1420 + } else
  1421 + acb->buf = qiov->iov->iov_base;
1410 acb->nb_sectors = nb_sectors; 1422 acb->nb_sectors = nb_sectors;
1411 acb->n = 0; 1423 acb->n = 0;
1412 acb->cluster_offset = 0; 1424 acb->cluster_offset = 0;
@@ -1414,13 +1426,13 @@ static QCowAIOCB *qcow_aio_setup(BlockDriverState *bs, @@ -1414,13 +1426,13 @@ static QCowAIOCB *qcow_aio_setup(BlockDriverState *bs,
1414 return acb; 1426 return acb;
1415 } 1427 }
1416 1428
1417 -static BlockDriverAIOCB *qcow_aio_read(BlockDriverState *bs,  
1418 - int64_t sector_num, uint8_t *buf, int nb_sectors, 1429 +static BlockDriverAIOCB *qcow_aio_readv(BlockDriverState *bs,
  1430 + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
1419 BlockDriverCompletionFunc *cb, void *opaque) 1431 BlockDriverCompletionFunc *cb, void *opaque)
1420 { 1432 {
1421 QCowAIOCB *acb; 1433 QCowAIOCB *acb;
1422 1434
1423 - acb = qcow_aio_setup(bs, sector_num, buf, nb_sectors, cb, opaque); 1435 + acb = qcow_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
1424 if (!acb) 1436 if (!acb)
1425 return NULL; 1437 return NULL;
1426 1438
@@ -1439,16 +1451,12 @@ static void qcow_aio_write_cb(void *opaque, int ret) @@ -1439,16 +1451,12 @@ static void qcow_aio_write_cb(void *opaque, int ret)
1439 1451
1440 acb->hd_aiocb = NULL; 1452 acb->hd_aiocb = NULL;
1441 1453
1442 - if (ret < 0) {  
1443 - fail:  
1444 - acb->common.cb(acb->common.opaque, ret);  
1445 - qemu_aio_release(acb);  
1446 - return;  
1447 - } 1454 + if (ret < 0)
  1455 + goto done;
1448 1456
1449 if (alloc_cluster_link_l2(bs, acb->cluster_offset, &acb->l2meta) < 0) { 1457 if (alloc_cluster_link_l2(bs, acb->cluster_offset, &acb->l2meta) < 0) {
1450 free_any_clusters(bs, acb->cluster_offset, acb->l2meta.nb_clusters); 1458 free_any_clusters(bs, acb->cluster_offset, acb->l2meta.nb_clusters);
1451 - goto fail; 1459 + goto done;
1452 } 1460 }
1453 1461
1454 acb->nb_sectors -= acb->n; 1462 acb->nb_sectors -= acb->n;
@@ -1457,9 +1465,8 @@ static void qcow_aio_write_cb(void *opaque, int ret) @@ -1457,9 +1465,8 @@ static void qcow_aio_write_cb(void *opaque, int ret)
1457 1465
1458 if (acb->nb_sectors == 0) { 1466 if (acb->nb_sectors == 0) {
1459 /* request completed */ 1467 /* request completed */
1460 - acb->common.cb(acb->common.opaque, 0);  
1461 - qemu_aio_release(acb);  
1462 - return; 1468 + ret = 0;
  1469 + goto done;
1463 } 1470 }
1464 1471
1465 index_in_cluster = acb->sector_num & (s->cluster_sectors - 1); 1472 index_in_cluster = acb->sector_num & (s->cluster_sectors - 1);
@@ -1473,7 +1480,7 @@ static void qcow_aio_write_cb(void *opaque, int ret) @@ -1473,7 +1480,7 @@ static void qcow_aio_write_cb(void *opaque, int ret)
1473 n_end, &acb->n, &acb->l2meta); 1480 n_end, &acb->n, &acb->l2meta);
1474 if (!acb->cluster_offset || (acb->cluster_offset & 511) != 0) { 1481 if (!acb->cluster_offset || (acb->cluster_offset & 511) != 0) {
1475 ret = -EIO; 1482 ret = -EIO;
1476 - goto fail; 1483 + goto done;
1477 } 1484 }
1478 if (s->crypt_method) { 1485 if (s->crypt_method) {
1479 if (!acb->cluster_data) { 1486 if (!acb->cluster_data) {
@@ -1494,11 +1501,19 @@ static void qcow_aio_write_cb(void *opaque, int ret) @@ -1494,11 +1501,19 @@ static void qcow_aio_write_cb(void *opaque, int ret)
1494 &acb->hd_qiov, acb->n, 1501 &acb->hd_qiov, acb->n,
1495 qcow_aio_write_cb, acb); 1502 qcow_aio_write_cb, acb);
1496 if (acb->hd_aiocb == NULL) 1503 if (acb->hd_aiocb == NULL)
1497 - goto fail; 1504 + goto done;
  1505 +
  1506 + return;
  1507 +
  1508 +done:
  1509 + if (acb->qiov->niov > 1)
  1510 + qemu_vfree(acb->orig_buf);
  1511 + acb->common.cb(acb->common.opaque, ret);
  1512 + qemu_aio_release(acb);
1498 } 1513 }
1499 1514
1500 -static BlockDriverAIOCB *qcow_aio_write(BlockDriverState *bs,  
1501 - int64_t sector_num, const uint8_t *buf, int nb_sectors, 1515 +static BlockDriverAIOCB *qcow_aio_writev(BlockDriverState *bs,
  1516 + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
1502 BlockDriverCompletionFunc *cb, void *opaque) 1517 BlockDriverCompletionFunc *cb, void *opaque)
1503 { 1518 {
1504 BDRVQcowState *s = bs->opaque; 1519 BDRVQcowState *s = bs->opaque;
@@ -1506,7 +1521,7 @@ static BlockDriverAIOCB *qcow_aio_write(BlockDriverState *bs, @@ -1506,7 +1521,7 @@ static BlockDriverAIOCB *qcow_aio_write(BlockDriverState *bs,
1506 1521
1507 s->cluster_cache_offset = -1; /* disable compressed cache */ 1522 s->cluster_cache_offset = -1; /* disable compressed cache */
1508 1523
1509 - acb = qcow_aio_setup(bs, sector_num, (uint8_t*)buf, nb_sectors, cb, opaque); 1524 + acb = qcow_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
1510 if (!acb) 1525 if (!acb)
1511 return NULL; 1526 return NULL;
1512 1527
@@ -2771,8 +2786,8 @@ BlockDriver bdrv_qcow2 = { @@ -2771,8 +2786,8 @@ BlockDriver bdrv_qcow2 = {
2771 .bdrv_set_key = qcow_set_key, 2786 .bdrv_set_key = qcow_set_key,
2772 .bdrv_make_empty = qcow_make_empty, 2787 .bdrv_make_empty = qcow_make_empty,
2773 2788
2774 - .bdrv_aio_read = qcow_aio_read,  
2775 - .bdrv_aio_write = qcow_aio_write, 2789 + .bdrv_aio_readv = qcow_aio_readv,
  2790 + .bdrv_aio_writev = qcow_aio_writev,
2776 .bdrv_aio_cancel = qcow_aio_cancel, 2791 .bdrv_aio_cancel = qcow_aio_cancel,
2777 .aiocb_size = sizeof(QCowAIOCB), 2792 .aiocb_size = sizeof(QCowAIOCB),
2778 .bdrv_write_compressed = qcow_write_compressed, 2793 .bdrv_write_compressed = qcow_write_compressed,
block-raw-posix.c
@@ -599,8 +599,8 @@ static int posix_aio_init(void) @@ -599,8 +599,8 @@ static int posix_aio_init(void)
599 return 0; 599 return 0;
600 } 600 }
601 601
602 -static RawAIOCB *raw_aio_setup(BlockDriverState *bs,  
603 - int64_t sector_num, uint8_t *buf, int nb_sectors, 602 +static RawAIOCB *raw_aio_setup(BlockDriverState *bs, int64_t sector_num,
  603 + QEMUIOVector *qiov, int nb_sectors,
604 BlockDriverCompletionFunc *cb, void *opaque) 604 BlockDriverCompletionFunc *cb, void *opaque)
605 { 605 {
606 BDRVRawState *s = bs->opaque; 606 BDRVRawState *s = bs->opaque;
@@ -614,24 +614,25 @@ static RawAIOCB *raw_aio_setup(BlockDriverState *bs, @@ -614,24 +614,25 @@ static RawAIOCB *raw_aio_setup(BlockDriverState *bs,
614 return NULL; 614 return NULL;
615 acb->aiocb.aio_fildes = s->fd; 615 acb->aiocb.aio_fildes = s->fd;
616 acb->aiocb.ev_signo = SIGUSR2; 616 acb->aiocb.ev_signo = SIGUSR2;
617 - acb->aiocb.aio_buf = buf;  
618 - if (nb_sectors < 0)  
619 - acb->aiocb.aio_nbytes = -nb_sectors;  
620 - else  
621 - acb->aiocb.aio_nbytes = nb_sectors * 512; 617 + acb->aiocb.aio_iov = qiov->iov;
  618 + acb->aiocb.aio_niov = qiov->niov;
  619 + acb->aiocb.aio_nbytes = nb_sectors * 512;
622 acb->aiocb.aio_offset = sector_num * 512; 620 acb->aiocb.aio_offset = sector_num * 512;
  621 + acb->aiocb.aio_flags = 0;
  622 +
  623 + /*
  624 + * If O_DIRECT is used the buffer needs to be aligned on a sector
  625 + * boundary. Tell the low level code to ensure that in case it's
  626 + * not done yet.
  627 + */
  628 + if (s->aligned_buf)
  629 + acb->aiocb.aio_flags |= QEMU_AIO_SECTOR_ALIGNED;
  630 +
623 acb->next = posix_aio_state->first_aio; 631 acb->next = posix_aio_state->first_aio;
624 posix_aio_state->first_aio = acb; 632 posix_aio_state->first_aio = acb;
625 return acb; 633 return acb;
626 } 634 }
627 635
628 -static void raw_aio_em_cb(void* opaque)  
629 -{  
630 - RawAIOCB *acb = opaque;  
631 - acb->common.cb(acb->common.opaque, acb->ret);  
632 - qemu_aio_release(acb);  
633 -}  
634 -  
635 static void raw_aio_remove(RawAIOCB *acb) 636 static void raw_aio_remove(RawAIOCB *acb)
636 { 637 {
637 RawAIOCB **pacb; 638 RawAIOCB **pacb;
@@ -651,28 +652,13 @@ static void raw_aio_remove(RawAIOCB *acb) @@ -651,28 +652,13 @@ static void raw_aio_remove(RawAIOCB *acb)
651 } 652 }
652 } 653 }
653 654
654 -static BlockDriverAIOCB *raw_aio_read(BlockDriverState *bs,  
655 - int64_t sector_num, uint8_t *buf, int nb_sectors, 655 +static BlockDriverAIOCB *raw_aio_readv(BlockDriverState *bs,
  656 + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
656 BlockDriverCompletionFunc *cb, void *opaque) 657 BlockDriverCompletionFunc *cb, void *opaque)
657 { 658 {
658 RawAIOCB *acb; 659 RawAIOCB *acb;
659 660
660 - /*  
661 - * If O_DIRECT is used and the buffer is not aligned fall back  
662 - * to synchronous IO.  
663 - */  
664 - BDRVRawState *s = bs->opaque;  
665 -  
666 - if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) {  
667 - QEMUBH *bh;  
668 - acb = qemu_aio_get(bs, cb, opaque);  
669 - acb->ret = raw_pread(bs, 512 * sector_num, buf, 512 * nb_sectors);  
670 - bh = qemu_bh_new(raw_aio_em_cb, acb);  
671 - qemu_bh_schedule(bh);  
672 - return &acb->common;  
673 - }  
674 -  
675 - acb = raw_aio_setup(bs, sector_num, buf, nb_sectors, cb, opaque); 661 + acb = raw_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque);
676 if (!acb) 662 if (!acb)
677 return NULL; 663 return NULL;
678 if (qemu_paio_read(&acb->aiocb) < 0) { 664 if (qemu_paio_read(&acb->aiocb) < 0) {
@@ -682,28 +668,13 @@ static BlockDriverAIOCB *raw_aio_read(BlockDriverState *bs, @@ -682,28 +668,13 @@ static BlockDriverAIOCB *raw_aio_read(BlockDriverState *bs,
682 return &acb->common; 668 return &acb->common;
683 } 669 }
684 670
685 -static BlockDriverAIOCB *raw_aio_write(BlockDriverState *bs,  
686 - int64_t sector_num, const uint8_t *buf, int nb_sectors, 671 +static BlockDriverAIOCB *raw_aio_writev(BlockDriverState *bs,
  672 + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
687 BlockDriverCompletionFunc *cb, void *opaque) 673 BlockDriverCompletionFunc *cb, void *opaque)
688 { 674 {
689 RawAIOCB *acb; 675 RawAIOCB *acb;
690 676
691 - /*  
692 - * If O_DIRECT is used and the buffer is not aligned fall back  
693 - * to synchronous IO.  
694 - */  
695 - BDRVRawState *s = bs->opaque;  
696 -  
697 - if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) {  
698 - QEMUBH *bh;  
699 - acb = qemu_aio_get(bs, cb, opaque);  
700 - acb->ret = raw_pwrite(bs, 512 * sector_num, buf, 512 * nb_sectors);  
701 - bh = qemu_bh_new(raw_aio_em_cb, acb);  
702 - qemu_bh_schedule(bh);  
703 - return &acb->common;  
704 - }  
705 -  
706 - acb = raw_aio_setup(bs, sector_num, (uint8_t*)buf, nb_sectors, cb, opaque); 677 + acb = raw_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque);
707 if (!acb) 678 if (!acb)
708 return NULL; 679 return NULL;
709 if (qemu_paio_write(&acb->aiocb) < 0) { 680 if (qemu_paio_write(&acb->aiocb) < 0) {
@@ -887,8 +858,8 @@ BlockDriver bdrv_raw = { @@ -887,8 +858,8 @@ BlockDriver bdrv_raw = {
887 .bdrv_flush = raw_flush, 858 .bdrv_flush = raw_flush,
888 859
889 #ifdef CONFIG_AIO 860 #ifdef CONFIG_AIO
890 - .bdrv_aio_read = raw_aio_read,  
891 - .bdrv_aio_write = raw_aio_write, 861 + .bdrv_aio_readv = raw_aio_readv,
  862 + .bdrv_aio_writev = raw_aio_writev,
892 .bdrv_aio_cancel = raw_aio_cancel, 863 .bdrv_aio_cancel = raw_aio_cancel,
893 .aiocb_size = sizeof(RawAIOCB), 864 .aiocb_size = sizeof(RawAIOCB),
894 #endif 865 #endif
@@ -1215,12 +1186,24 @@ static BlockDriverAIOCB *raw_aio_ioctl(BlockDriverState *bs, @@ -1215,12 +1186,24 @@ static BlockDriverAIOCB *raw_aio_ioctl(BlockDriverState *bs,
1215 unsigned long int req, void *buf, 1186 unsigned long int req, void *buf,
1216 BlockDriverCompletionFunc *cb, void *opaque) 1187 BlockDriverCompletionFunc *cb, void *opaque)
1217 { 1188 {
  1189 + BDRVRawState *s = bs->opaque;
1218 RawAIOCB *acb; 1190 RawAIOCB *acb;
1219 1191
1220 - acb = raw_aio_setup(bs, 0, buf, 0, cb, opaque); 1192 + if (fd_open(bs) < 0)
  1193 + return NULL;
  1194 +
  1195 + acb = qemu_aio_get(bs, cb, opaque);
1221 if (!acb) 1196 if (!acb)
1222 return NULL; 1197 return NULL;
  1198 + acb->aiocb.aio_fildes = s->fd;
  1199 + acb->aiocb.ev_signo = SIGUSR2;
  1200 + acb->aiocb.aio_offset = 0;
  1201 + acb->aiocb.aio_flags = 0;
  1202 +
  1203 + acb->next = posix_aio_state->first_aio;
  1204 + posix_aio_state->first_aio = acb;
1223 1205
  1206 + acb->aiocb.aio_ioctl_buf = buf;
1224 acb->aiocb.aio_ioctl_cmd = req; 1207 acb->aiocb.aio_ioctl_cmd = req;
1225 if (qemu_paio_ioctl(&acb->aiocb) < 0) { 1208 if (qemu_paio_ioctl(&acb->aiocb) < 0) {
1226 raw_aio_remove(acb); 1209 raw_aio_remove(acb);
@@ -1424,8 +1407,8 @@ BlockDriver bdrv_host_device = { @@ -1424,8 +1407,8 @@ BlockDriver bdrv_host_device = {
1424 .bdrv_flush = raw_flush, 1407 .bdrv_flush = raw_flush,
1425 1408
1426 #ifdef CONFIG_AIO 1409 #ifdef CONFIG_AIO
1427 - .bdrv_aio_read = raw_aio_read,  
1428 - .bdrv_aio_write = raw_aio_write, 1410 + .bdrv_aio_readv = raw_aio_readv,
  1411 + .bdrv_aio_writev = raw_aio_writev,
1429 .bdrv_aio_cancel = raw_aio_cancel, 1412 .bdrv_aio_cancel = raw_aio_cancel,
1430 .aiocb_size = sizeof(RawAIOCB), 1413 .aiocb_size = sizeof(RawAIOCB),
1431 #endif 1414 #endif
@@ -47,25 +47,21 @@ @@ -47,25 +47,21 @@
47 #define SECTOR_BITS 9 47 #define SECTOR_BITS 9
48 #define SECTOR_SIZE (1 << SECTOR_BITS) 48 #define SECTOR_SIZE (1 << SECTOR_BITS)
49 49
50 -static AIOPool vectored_aio_pool;  
51 -  
52 typedef struct BlockDriverAIOCBSync { 50 typedef struct BlockDriverAIOCBSync {
53 BlockDriverAIOCB common; 51 BlockDriverAIOCB common;
54 QEMUBH *bh; 52 QEMUBH *bh;
55 int ret; 53 int ret;
  54 + /* vector translation state */
  55 + QEMUIOVector *qiov;
  56 + uint8_t *bounce;
  57 + int is_write;
56 } BlockDriverAIOCBSync; 58 } BlockDriverAIOCBSync;
57 59
58 -static BlockDriverAIOCB *bdrv_aio_read(BlockDriverState *bs,  
59 - int64_t sector_num, uint8_t *buf, int nb_sectors, 60 +static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
  61 + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
60 BlockDriverCompletionFunc *cb, void *opaque); 62 BlockDriverCompletionFunc *cb, void *opaque);
61 -static BlockDriverAIOCB *bdrv_aio_write(BlockDriverState *bs,  
62 - int64_t sector_num, const uint8_t *buf, int nb_sectors,  
63 - BlockDriverCompletionFunc *cb, void *opaque);  
64 -static BlockDriverAIOCB *bdrv_aio_read_em(BlockDriverState *bs,  
65 - int64_t sector_num, uint8_t *buf, int nb_sectors,  
66 - BlockDriverCompletionFunc *cb, void *opaque);  
67 -static BlockDriverAIOCB *bdrv_aio_write_em(BlockDriverState *bs,  
68 - int64_t sector_num, const uint8_t *buf, int nb_sectors, 63 +static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
  64 + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
69 BlockDriverCompletionFunc *cb, void *opaque); 65 BlockDriverCompletionFunc *cb, void *opaque);
70 static void bdrv_aio_cancel_em(BlockDriverAIOCB *acb); 66 static void bdrv_aio_cancel_em(BlockDriverAIOCB *acb);
71 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num, 67 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
@@ -144,10 +140,10 @@ void path_combine(char *dest, int dest_size, @@ -144,10 +140,10 @@ void path_combine(char *dest, int dest_size,
144 140
145 static void bdrv_register(BlockDriver *bdrv) 141 static void bdrv_register(BlockDriver *bdrv)
146 { 142 {
147 - if (!bdrv->bdrv_aio_read) { 143 + if (!bdrv->bdrv_aio_readv) {
148 /* add AIO emulation layer */ 144 /* add AIO emulation layer */
149 - bdrv->bdrv_aio_read = bdrv_aio_read_em;  
150 - bdrv->bdrv_aio_write = bdrv_aio_write_em; 145 + bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
  146 + bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
151 bdrv->bdrv_aio_cancel = bdrv_aio_cancel_em; 147 bdrv->bdrv_aio_cancel = bdrv_aio_cancel_em;
152 bdrv->aiocb_size = sizeof(BlockDriverAIOCBSync); 148 bdrv->aiocb_size = sizeof(BlockDriverAIOCBSync);
153 } else if (!bdrv->bdrv_read) { 149 } else if (!bdrv->bdrv_read) {
@@ -1295,91 +1291,10 @@ char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn) @@ -1295,91 +1291,10 @@ char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
1295 /**************************************************************/ 1291 /**************************************************************/
1296 /* async I/Os */ 1292 /* async I/Os */
1297 1293
1298 -typedef struct VectorTranslationAIOCB {  
1299 - BlockDriverAIOCB common;  
1300 - QEMUIOVector *iov;  
1301 - uint8_t *bounce;  
1302 - int is_write;  
1303 - BlockDriverAIOCB *aiocb;  
1304 -} VectorTranslationAIOCB;  
1305 -  
1306 -static void bdrv_aio_cancel_vector(BlockDriverAIOCB *_acb)  
1307 -{  
1308 - VectorTranslationAIOCB *acb  
1309 - = container_of(_acb, VectorTranslationAIOCB, common);  
1310 -  
1311 - bdrv_aio_cancel(acb->aiocb);  
1312 -}  
1313 -  
1314 -static void bdrv_aio_rw_vector_cb(void *opaque, int ret)  
1315 -{  
1316 - VectorTranslationAIOCB *s = (VectorTranslationAIOCB *)opaque;  
1317 -  
1318 - if (!s->is_write) {  
1319 - qemu_iovec_from_buffer(s->iov, s->bounce, s->iov->size);  
1320 - }  
1321 - qemu_vfree(s->bounce);  
1322 - s->common.cb(s->common.opaque, ret);  
1323 - qemu_aio_release(s);  
1324 -}  
1325 -  
1326 -static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,  
1327 - int64_t sector_num,  
1328 - QEMUIOVector *iov,  
1329 - int nb_sectors,  
1330 - BlockDriverCompletionFunc *cb,  
1331 - void *opaque,  
1332 - int is_write)  
1333 -  
1334 -{  
1335 - VectorTranslationAIOCB *s = qemu_aio_get_pool(&vectored_aio_pool, bs,  
1336 - cb, opaque);  
1337 -  
1338 - s->iov = iov;  
1339 - s->bounce = qemu_memalign(512, nb_sectors * 512);  
1340 - s->is_write = is_write;  
1341 - if (is_write) {  
1342 - qemu_iovec_to_buffer(s->iov, s->bounce);  
1343 - s->aiocb = bdrv_aio_write(bs, sector_num, s->bounce, nb_sectors,  
1344 - bdrv_aio_rw_vector_cb, s);  
1345 - } else {  
1346 - s->aiocb = bdrv_aio_read(bs, sector_num, s->bounce, nb_sectors,  
1347 - bdrv_aio_rw_vector_cb, s);  
1348 - }  
1349 - if (!s->aiocb) {  
1350 - qemu_vfree(s->bounce);  
1351 - qemu_aio_release(s);  
1352 - return NULL;  
1353 - }  
1354 - return &s->common;  
1355 -}  
1356 -  
1357 BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num, 1294 BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
1358 - QEMUIOVector *iov, int nb_sectors, 1295 + QEMUIOVector *qiov, int nb_sectors,
1359 BlockDriverCompletionFunc *cb, void *opaque) 1296 BlockDriverCompletionFunc *cb, void *opaque)
1360 { 1297 {
1361 - if (bdrv_check_request(bs, sector_num, nb_sectors))  
1362 - return NULL;  
1363 -  
1364 - return bdrv_aio_rw_vector(bs, sector_num, iov, nb_sectors,  
1365 - cb, opaque, 0);  
1366 -}  
1367 -  
1368 -BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,  
1369 - QEMUIOVector *iov, int nb_sectors,  
1370 - BlockDriverCompletionFunc *cb, void *opaque)  
1371 -{  
1372 - if (bdrv_check_request(bs, sector_num, nb_sectors))  
1373 - return NULL;  
1374 -  
1375 - return bdrv_aio_rw_vector(bs, sector_num, iov, nb_sectors,  
1376 - cb, opaque, 1);  
1377 -}  
1378 -  
1379 -static BlockDriverAIOCB *bdrv_aio_read(BlockDriverState *bs, int64_t sector_num,  
1380 - uint8_t *buf, int nb_sectors,  
1381 - BlockDriverCompletionFunc *cb, void *opaque)  
1382 -{  
1383 BlockDriver *drv = bs->drv; 1298 BlockDriver *drv = bs->drv;
1384 BlockDriverAIOCB *ret; 1299 BlockDriverAIOCB *ret;
1385 1300
@@ -1388,7 +1303,8 @@ static BlockDriverAIOCB *bdrv_aio_read(BlockDriverState *bs, int64_t sector_num, @@ -1388,7 +1303,8 @@ static BlockDriverAIOCB *bdrv_aio_read(BlockDriverState *bs, int64_t sector_num,
1388 if (bdrv_check_request(bs, sector_num, nb_sectors)) 1303 if (bdrv_check_request(bs, sector_num, nb_sectors))
1389 return NULL; 1304 return NULL;
1390 1305
1391 - ret = drv->bdrv_aio_read(bs, sector_num, buf, nb_sectors, cb, opaque); 1306 + ret = drv->bdrv_aio_readv(bs, sector_num, qiov, nb_sectors,
  1307 + cb, opaque);
1392 1308
1393 if (ret) { 1309 if (ret) {
1394 /* Update stats even though technically transfer has not happened. */ 1310 /* Update stats even though technically transfer has not happened. */
@@ -1399,9 +1315,9 @@ static BlockDriverAIOCB *bdrv_aio_read(BlockDriverState *bs, int64_t sector_num, @@ -1399,9 +1315,9 @@ static BlockDriverAIOCB *bdrv_aio_read(BlockDriverState *bs, int64_t sector_num,
1399 return ret; 1315 return ret;
1400 } 1316 }
1401 1317
1402 -static BlockDriverAIOCB *bdrv_aio_write(BlockDriverState *bs, int64_t sector_num,  
1403 - const uint8_t *buf, int nb_sectors,  
1404 - BlockDriverCompletionFunc *cb, void *opaque) 1318 +BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
  1319 + QEMUIOVector *qiov, int nb_sectors,
  1320 + BlockDriverCompletionFunc *cb, void *opaque)
1405 { 1321 {
1406 BlockDriver *drv = bs->drv; 1322 BlockDriver *drv = bs->drv;
1407 BlockDriverAIOCB *ret; 1323 BlockDriverAIOCB *ret;
@@ -1413,7 +1329,8 @@ static BlockDriverAIOCB *bdrv_aio_write(BlockDriverState *bs, int64_t sector_num @@ -1413,7 +1329,8 @@ static BlockDriverAIOCB *bdrv_aio_write(BlockDriverState *bs, int64_t sector_num
1413 if (bdrv_check_request(bs, sector_num, nb_sectors)) 1329 if (bdrv_check_request(bs, sector_num, nb_sectors))
1414 return NULL; 1330 return NULL;
1415 1331
1416 - ret = drv->bdrv_aio_write(bs, sector_num, buf, nb_sectors, cb, opaque); 1332 + ret = drv->bdrv_aio_writev(bs, sector_num, qiov, nb_sectors,
  1333 + cb, opaque);
1417 1334
1418 if (ret) { 1335 if (ret) {
1419 /* Update stats even though technically transfer has not happened. */ 1336 /* Update stats even though technically transfer has not happened. */
@@ -1436,42 +1353,62 @@ void bdrv_aio_cancel(BlockDriverAIOCB *acb) @@ -1436,42 +1353,62 @@ void bdrv_aio_cancel(BlockDriverAIOCB *acb)
1436 static void bdrv_aio_bh_cb(void *opaque) 1353 static void bdrv_aio_bh_cb(void *opaque)
1437 { 1354 {
1438 BlockDriverAIOCBSync *acb = opaque; 1355 BlockDriverAIOCBSync *acb = opaque;
  1356 +
  1357 + qemu_vfree(acb->bounce);
  1358 +
  1359 + if (!acb->is_write)
  1360 + qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
1439 acb->common.cb(acb->common.opaque, acb->ret); 1361 acb->common.cb(acb->common.opaque, acb->ret);
  1362 +
1440 qemu_aio_release(acb); 1363 qemu_aio_release(acb);
1441 } 1364 }
1442 1365
1443 -static BlockDriverAIOCB *bdrv_aio_read_em(BlockDriverState *bs,  
1444 - int64_t sector_num, uint8_t *buf, int nb_sectors,  
1445 - BlockDriverCompletionFunc *cb, void *opaque) 1366 +static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
  1367 + int64_t sector_num,
  1368 + QEMUIOVector *qiov,
  1369 + int nb_sectors,
  1370 + BlockDriverCompletionFunc *cb,
  1371 + void *opaque,
  1372 + int is_write)
  1373 +
1446 { 1374 {
1447 BlockDriverAIOCBSync *acb; 1375 BlockDriverAIOCBSync *acb;
1448 - int ret;  
1449 1376
1450 acb = qemu_aio_get(bs, cb, opaque); 1377 acb = qemu_aio_get(bs, cb, opaque);
  1378 + acb->is_write = is_write;
  1379 + acb->qiov = qiov;
  1380 + acb->bounce = qemu_memalign(512, qiov->size);
  1381 +
1451 if (!acb->bh) 1382 if (!acb->bh)
1452 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb); 1383 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
1453 - ret = bdrv_read(bs, sector_num, buf, nb_sectors);  
1454 - acb->ret = ret; 1384 +
  1385 + if (is_write) {
  1386 + qemu_iovec_to_buffer(acb->qiov, acb->bounce);
  1387 + acb->ret = bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
  1388 + } else {
  1389 + acb->ret = bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
  1390 + }
  1391 +
1455 qemu_bh_schedule(acb->bh); 1392 qemu_bh_schedule(acb->bh);
  1393 +
1456 return &acb->common; 1394 return &acb->common;
1457 } 1395 }
1458 1396
1459 -static BlockDriverAIOCB *bdrv_aio_write_em(BlockDriverState *bs,  
1460 - int64_t sector_num, const uint8_t *buf, int nb_sectors, 1397 +static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
  1398 + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
1461 BlockDriverCompletionFunc *cb, void *opaque) 1399 BlockDriverCompletionFunc *cb, void *opaque)
1462 { 1400 {
1463 - BlockDriverAIOCBSync *acb;  
1464 - int ret; 1401 + return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
  1402 +}
1465 1403
1466 - acb = qemu_aio_get(bs, cb, opaque);  
1467 - if (!acb->bh)  
1468 - acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);  
1469 - ret = bdrv_write(bs, sector_num, buf, nb_sectors);  
1470 - acb->ret = ret;  
1471 - qemu_bh_schedule(acb->bh);  
1472 - return &acb->common; 1404 +static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
  1405 + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
  1406 + BlockDriverCompletionFunc *cb, void *opaque)
  1407 +{
  1408 + return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
1473 } 1409 }
1474 1410
  1411 +
1475 static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb) 1412 static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
1476 { 1413 {
1477 BlockDriverAIOCBSync *acb = (BlockDriverAIOCBSync *)blockacb; 1414 BlockDriverAIOCBSync *acb = (BlockDriverAIOCBSync *)blockacb;
@@ -1494,10 +1431,15 @@ static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num, @@ -1494,10 +1431,15 @@ static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
1494 { 1431 {
1495 int async_ret; 1432 int async_ret;
1496 BlockDriverAIOCB *acb; 1433 BlockDriverAIOCB *acb;
  1434 + struct iovec iov;
  1435 + QEMUIOVector qiov;
1497 1436
1498 async_ret = NOT_DONE; 1437 async_ret = NOT_DONE;
1499 - acb = bdrv_aio_read(bs, sector_num, buf, nb_sectors,  
1500 - bdrv_rw_em_cb, &async_ret); 1438 + iov.iov_base = buf;
  1439 + iov.iov_len = nb_sectors * 512;
  1440 + qemu_iovec_init_external(&qiov, &iov, 1);
  1441 + acb = bdrv_aio_readv(bs, sector_num, &qiov, nb_sectors,
  1442 + bdrv_rw_em_cb, &async_ret);
1501 if (acb == NULL) 1443 if (acb == NULL)
1502 return -1; 1444 return -1;
1503 1445
@@ -1513,10 +1455,15 @@ static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num, @@ -1513,10 +1455,15 @@ static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
1513 { 1455 {
1514 int async_ret; 1456 int async_ret;
1515 BlockDriverAIOCB *acb; 1457 BlockDriverAIOCB *acb;
  1458 + struct iovec iov;
  1459 + QEMUIOVector qiov;
1516 1460
1517 async_ret = NOT_DONE; 1461 async_ret = NOT_DONE;
1518 - acb = bdrv_aio_write(bs, sector_num, buf, nb_sectors,  
1519 - bdrv_rw_em_cb, &async_ret); 1462 + iov.iov_base = (void *)buf;
  1463 + iov.iov_len = nb_sectors * 512;
  1464 + qemu_iovec_init_external(&qiov, &iov, 1);
  1465 + acb = bdrv_aio_writev(bs, sector_num, &qiov, nb_sectors,
  1466 + bdrv_rw_em_cb, &async_ret);
1520 if (acb == NULL) 1467 if (acb == NULL)
1521 return -1; 1468 return -1;
1522 while (async_ret == NOT_DONE) { 1469 while (async_ret == NOT_DONE) {
@@ -1527,9 +1474,6 @@ static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num, @@ -1527,9 +1474,6 @@ static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
1527 1474
1528 void bdrv_init(void) 1475 void bdrv_init(void)
1529 { 1476 {
1530 - aio_pool_init(&vectored_aio_pool, sizeof(VectorTranslationAIOCB),  
1531 - bdrv_aio_cancel_vector);  
1532 -  
1533 bdrv_register(&bdrv_raw); 1477 bdrv_register(&bdrv_raw);
1534 bdrv_register(&bdrv_host_device); 1478 bdrv_register(&bdrv_host_device);
1535 #ifndef _WIN32 1479 #ifndef _WIN32
block_int.h
@@ -54,11 +54,11 @@ struct BlockDriver { @@ -54,11 +54,11 @@ struct BlockDriver {
54 int (*bdrv_set_key)(BlockDriverState *bs, const char *key); 54 int (*bdrv_set_key)(BlockDriverState *bs, const char *key);
55 int (*bdrv_make_empty)(BlockDriverState *bs); 55 int (*bdrv_make_empty)(BlockDriverState *bs);
56 /* aio */ 56 /* aio */
57 - BlockDriverAIOCB *(*bdrv_aio_read)(BlockDriverState *bs,  
58 - int64_t sector_num, uint8_t *buf, int nb_sectors, 57 + BlockDriverAIOCB *(*bdrv_aio_readv)(BlockDriverState *bs,
  58 + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
59 BlockDriverCompletionFunc *cb, void *opaque); 59 BlockDriverCompletionFunc *cb, void *opaque);
60 - BlockDriverAIOCB *(*bdrv_aio_write)(BlockDriverState *bs,  
61 - int64_t sector_num, const uint8_t *buf, int nb_sectors, 60 + BlockDriverAIOCB *(*bdrv_aio_writev)(BlockDriverState *bs,
  61 + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
62 BlockDriverCompletionFunc *cb, void *opaque); 62 BlockDriverCompletionFunc *cb, void *opaque);
63 void (*bdrv_aio_cancel)(BlockDriverAIOCB *acb); 63 void (*bdrv_aio_cancel)(BlockDriverAIOCB *acb);
64 int aiocb_size; 64 int aiocb_size;
posix-aio-compat.c
@@ -20,6 +20,7 @@ @@ -20,6 +20,7 @@
20 #include <stdlib.h> 20 #include <stdlib.h>
21 #include <stdio.h> 21 #include <stdio.h>
22 #include "osdep.h" 22 #include "osdep.h"
  23 +#include "qemu-common.h"
23 24
24 #include "posix-aio-compat.h" 25 #include "posix-aio-compat.h"
25 26
@@ -76,45 +77,110 @@ static void thread_create(pthread_t *thread, pthread_attr_t *attr, @@ -76,45 +77,110 @@ static void thread_create(pthread_t *thread, pthread_attr_t *attr,
76 if (ret) die2(ret, "pthread_create"); 77 if (ret) die2(ret, "pthread_create");
77 } 78 }
78 79
79 -static size_t handle_aiocb_readwrite(struct qemu_paiocb *aiocb) 80 +static size_t handle_aiocb_ioctl(struct qemu_paiocb *aiocb)
  81 +{
  82 + int ret;
  83 +
  84 + ret = ioctl(aiocb->aio_fildes, aiocb->aio_ioctl_cmd, aiocb->aio_ioctl_buf);
  85 + if (ret == -1)
  86 + return -errno;
  87 + return ret;
  88 +}
  89 +
  90 +/*
  91 + * Check if we need to copy the data in the aiocb into a new
  92 + * properly aligned buffer.
  93 + */
  94 +static int aiocb_needs_copy(struct qemu_paiocb *aiocb)
  95 +{
  96 + if (aiocb->aio_flags & QEMU_AIO_SECTOR_ALIGNED) {
  97 + int i;
  98 +
  99 + for (i = 0; i < aiocb->aio_niov; i++)
  100 + if ((uintptr_t) aiocb->aio_iov[i].iov_base % 512)
  101 + return 1;
  102 + }
  103 +
  104 + return 0;
  105 +}
  106 +
  107 +static size_t handle_aiocb_rw_linear(struct qemu_paiocb *aiocb, char *buf)
80 { 108 {
81 size_t offset = 0; 109 size_t offset = 0;
82 - ssize_t len; 110 + size_t len;
83 111
84 while (offset < aiocb->aio_nbytes) { 112 while (offset < aiocb->aio_nbytes) {
85 - if (aiocb->aio_type == QEMU_PAIO_WRITE)  
86 - len = pwrite(aiocb->aio_fildes,  
87 - (const char *)aiocb->aio_buf + offset, 113 + if (aiocb->aio_type == QEMU_PAIO_WRITE)
  114 + len = pwrite(aiocb->aio_fildes,
  115 + (const char *)buf + offset,
  116 + aiocb->aio_nbytes - offset,
  117 + aiocb->aio_offset + offset);
  118 + else
  119 + len = pread(aiocb->aio_fildes,
  120 + buf + offset,
88 aiocb->aio_nbytes - offset, 121 aiocb->aio_nbytes - offset,
89 aiocb->aio_offset + offset); 122 aiocb->aio_offset + offset);
90 - else  
91 - len = pread(aiocb->aio_fildes,  
92 - (char *)aiocb->aio_buf + offset,  
93 - aiocb->aio_nbytes - offset,  
94 - aiocb->aio_offset + offset);  
95 -  
96 - if (len == -1 && errno == EINTR)  
97 - continue;  
98 - else if (len == -1) {  
99 - offset = -errno;  
100 - break;  
101 - } else if (len == 0)  
102 - break;  
103 123
104 - offset += len; 124 + if (len == -1 && errno == EINTR)
  125 + continue;
  126 + else if (len == -1) {
  127 + offset = -errno;
  128 + break;
  129 + } else if (len == 0)
  130 + break;
  131 +
  132 + offset += len;
105 } 133 }
106 134
107 return offset; 135 return offset;
108 } 136 }
109 137
110 -static size_t handle_aiocb_ioctl(struct qemu_paiocb *aiocb) 138 +static size_t handle_aiocb_rw(struct qemu_paiocb *aiocb)
111 { 139 {
112 - int ret; 140 + size_t nbytes;
  141 + char *buf;
  142 +
  143 + if (!aiocb_needs_copy(aiocb) && aiocb->aio_niov == 1) {
  144 + /*
  145 + * If there is just a single buffer, and it is properly aligned
  146 + * we can just use plain pread/pwrite without any problems.
  147 + */
  148 + return handle_aiocb_rw_linear(aiocb, aiocb->aio_iov->iov_base);
  149 + }
113 150
114 - ret = ioctl(aiocb->aio_fildes, aiocb->aio_ioctl_cmd, aiocb->aio_buf);  
115 - if (ret == -1)  
116 - return -errno;  
117 - return ret; 151 + /*
  152 + * Ok, we have to do it the hard way, copy all segments into
  153 + * a single aligned buffer.
  154 + */
  155 + buf = qemu_memalign(512, aiocb->aio_nbytes);
  156 + if (aiocb->aio_type == QEMU_PAIO_WRITE) {
  157 + char *p = buf;
  158 + int i;
  159 +
  160 + for (i = 0; i < aiocb->aio_niov; ++i) {
  161 + memcpy(p, aiocb->aio_iov[i].iov_base, aiocb->aio_iov[i].iov_len);
  162 + p += aiocb->aio_iov[i].iov_len;
  163 + }
  164 + }
  165 +
  166 + nbytes = handle_aiocb_rw_linear(aiocb, buf);
  167 + if (aiocb->aio_type != QEMU_PAIO_WRITE) {
  168 + char *p = buf;
  169 + size_t count = aiocb->aio_nbytes, copy;
  170 + int i;
  171 +
  172 + for (i = 0; i < aiocb->aio_niov && count; ++i) {
  173 + copy = count;
  174 + if (copy > aiocb->aio_iov[i].iov_len)
  175 + copy = aiocb->aio_iov[i].iov_len;
  176 + memcpy(aiocb->aio_iov[i].iov_base, p, copy);
  177 + p += copy;
  178 + count -= copy;
  179 + }
  180 + }
  181 + qemu_vfree(buf);
  182 +
  183 + return nbytes;
118 } 184 }
119 185
120 static void *aio_thread(void *unused) 186 static void *aio_thread(void *unused)
@@ -157,7 +223,7 @@ static void *aio_thread(void *unused) @@ -157,7 +223,7 @@ static void *aio_thread(void *unused)
157 switch (aiocb->aio_type) { 223 switch (aiocb->aio_type) {
158 case QEMU_PAIO_READ: 224 case QEMU_PAIO_READ:
159 case QEMU_PAIO_WRITE: 225 case QEMU_PAIO_WRITE:
160 - ret = handle_aiocb_readwrite(aiocb); 226 + ret = handle_aiocb_rw(aiocb);
161 break; 227 break;
162 case QEMU_PAIO_IOCTL: 228 case QEMU_PAIO_IOCTL:
163 ret = handle_aiocb_ioctl(aiocb); 229 ret = handle_aiocb_ioctl(aiocb);
posix-aio-compat.h
@@ -27,11 +27,18 @@ @@ -27,11 +27,18 @@
27 struct qemu_paiocb 27 struct qemu_paiocb
28 { 28 {
29 int aio_fildes; 29 int aio_fildes;
30 - void *aio_buf; 30 + union {
  31 + struct iovec *aio_iov;
  32 + void *aio_ioctl_buf;
  33 + };
  34 + int aio_niov;
31 size_t aio_nbytes; 35 size_t aio_nbytes;
32 #define aio_ioctl_cmd aio_nbytes /* for QEMU_PAIO_IOCTL */ 36 #define aio_ioctl_cmd aio_nbytes /* for QEMU_PAIO_IOCTL */
33 int ev_signo; 37 int ev_signo;
34 off_t aio_offset; 38 off_t aio_offset;
  39 + unsigned aio_flags;
  40 +/* 512 byte alignment required for buffer, offset and length */
  41 +#define QEMU_AIO_SECTOR_ALIGNED 0x01
35 42
36 /* private */ 43 /* private */
37 TAILQ_ENTRY(qemu_paiocb) node; 44 TAILQ_ENTRY(qemu_paiocb) node;