Commit f141eafe286c785f7e2c1e312a73f90d66bdfb90
1 parent
c87c0672
push down vector linearization to posix-aio-compat.c (Christoph Hellwig)
Make all AIO requests vectored and defer linearization until the actual I/O thread. This prepares for using native preadv/pwritev. Also enables asynchronous direct I/O by handling that case in the I/O thread. Qcow and qcow2 propably want to be adopted to directly deal with multi-segment requests, but that can be implemented later. Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Anthony Liguori <aliguori@us.ibm.com> git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@7020 c046a42c-6fe2-441c-8c8c-71466251a162
Showing
7 changed files
with
317 additions
and
284 deletions
block-qcow.c
... | ... | @@ -525,7 +525,9 @@ static int qcow_write(BlockDriverState *bs, int64_t sector_num, |
525 | 525 | typedef struct QCowAIOCB { |
526 | 526 | BlockDriverAIOCB common; |
527 | 527 | int64_t sector_num; |
528 | + QEMUIOVector *qiov; | |
528 | 529 | uint8_t *buf; |
530 | + void *orig_buf; | |
529 | 531 | int nb_sectors; |
530 | 532 | int n; |
531 | 533 | uint64_t cluster_offset; |
... | ... | @@ -543,12 +545,8 @@ static void qcow_aio_read_cb(void *opaque, int ret) |
543 | 545 | int index_in_cluster; |
544 | 546 | |
545 | 547 | acb->hd_aiocb = NULL; |
546 | - if (ret < 0) { | |
547 | - fail: | |
548 | - acb->common.cb(acb->common.opaque, ret); | |
549 | - qemu_aio_release(acb); | |
550 | - return; | |
551 | - } | |
548 | + if (ret < 0) | |
549 | + goto done; | |
552 | 550 | |
553 | 551 | redo: |
554 | 552 | /* post process the read buffer */ |
... | ... | @@ -570,9 +568,8 @@ static void qcow_aio_read_cb(void *opaque, int ret) |
570 | 568 | |
571 | 569 | if (acb->nb_sectors == 0) { |
572 | 570 | /* request completed */ |
573 | - acb->common.cb(acb->common.opaque, 0); | |
574 | - qemu_aio_release(acb); | |
575 | - return; | |
571 | + ret = 0; | |
572 | + goto done; | |
576 | 573 | } |
577 | 574 | |
578 | 575 | /* prepare next AIO request */ |
... | ... | @@ -592,7 +589,7 @@ static void qcow_aio_read_cb(void *opaque, int ret) |
592 | 589 | acb->hd_aiocb = bdrv_aio_readv(bs->backing_hd, acb->sector_num, |
593 | 590 | &acb->hd_qiov, acb->n, qcow_aio_read_cb, acb); |
594 | 591 | if (acb->hd_aiocb == NULL) |
595 | - goto fail; | |
592 | + goto done; | |
596 | 593 | } else { |
597 | 594 | /* Note: in this case, no need to wait */ |
598 | 595 | memset(acb->buf, 0, 512 * acb->n); |
... | ... | @@ -601,14 +598,14 @@ static void qcow_aio_read_cb(void *opaque, int ret) |
601 | 598 | } else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) { |
602 | 599 | /* add AIO support for compressed blocks ? */ |
603 | 600 | if (decompress_cluster(s, acb->cluster_offset) < 0) |
604 | - goto fail; | |
601 | + goto done; | |
605 | 602 | memcpy(acb->buf, |
606 | 603 | s->cluster_cache + index_in_cluster * 512, 512 * acb->n); |
607 | 604 | goto redo; |
608 | 605 | } else { |
609 | 606 | if ((acb->cluster_offset & 511) != 0) { |
610 | 607 | ret = -EIO; |
611 | - goto fail; | |
608 | + goto done; | |
612 | 609 | } |
613 | 610 | acb->hd_iov.iov_base = acb->buf; |
614 | 611 | acb->hd_iov.iov_len = acb->n * 512; |
... | ... | @@ -617,12 +614,22 @@ static void qcow_aio_read_cb(void *opaque, int ret) |
617 | 614 | (acb->cluster_offset >> 9) + index_in_cluster, |
618 | 615 | &acb->hd_qiov, acb->n, qcow_aio_read_cb, acb); |
619 | 616 | if (acb->hd_aiocb == NULL) |
620 | - goto fail; | |
617 | + goto done; | |
618 | + } | |
619 | + | |
620 | + return; | |
621 | + | |
622 | +done: | |
623 | + if (acb->qiov->niov > 1) { | |
624 | + qemu_iovec_from_buffer(acb->qiov, acb->orig_buf, acb->qiov->size); | |
625 | + qemu_vfree(acb->orig_buf); | |
621 | 626 | } |
627 | + acb->common.cb(acb->common.opaque, ret); | |
628 | + qemu_aio_release(acb); | |
622 | 629 | } |
623 | 630 | |
624 | -static BlockDriverAIOCB *qcow_aio_read(BlockDriverState *bs, | |
625 | - int64_t sector_num, uint8_t *buf, int nb_sectors, | |
631 | +static BlockDriverAIOCB *qcow_aio_readv(BlockDriverState *bs, | |
632 | + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, | |
626 | 633 | BlockDriverCompletionFunc *cb, void *opaque) |
627 | 634 | { |
628 | 635 | QCowAIOCB *acb; |
... | ... | @@ -632,7 +639,11 @@ static BlockDriverAIOCB *qcow_aio_read(BlockDriverState *bs, |
632 | 639 | return NULL; |
633 | 640 | acb->hd_aiocb = NULL; |
634 | 641 | acb->sector_num = sector_num; |
635 | - acb->buf = buf; | |
642 | + acb->qiov = qiov; | |
643 | + if (qiov->niov > 1) | |
644 | + acb->buf = acb->orig_buf = qemu_memalign(512, qiov->size); | |
645 | + else | |
646 | + acb->buf = qiov->iov->iov_base; | |
636 | 647 | acb->nb_sectors = nb_sectors; |
637 | 648 | acb->n = 0; |
638 | 649 | acb->cluster_offset = 0; |
... | ... | @@ -652,12 +663,8 @@ static void qcow_aio_write_cb(void *opaque, int ret) |
652 | 663 | |
653 | 664 | acb->hd_aiocb = NULL; |
654 | 665 | |
655 | - if (ret < 0) { | |
656 | - fail: | |
657 | - acb->common.cb(acb->common.opaque, ret); | |
658 | - qemu_aio_release(acb); | |
659 | - return; | |
660 | - } | |
666 | + if (ret < 0) | |
667 | + goto done; | |
661 | 668 | |
662 | 669 | acb->nb_sectors -= acb->n; |
663 | 670 | acb->sector_num += acb->n; |
... | ... | @@ -665,9 +672,8 @@ static void qcow_aio_write_cb(void *opaque, int ret) |
665 | 672 | |
666 | 673 | if (acb->nb_sectors == 0) { |
667 | 674 | /* request completed */ |
668 | - acb->common.cb(acb->common.opaque, 0); | |
669 | - qemu_aio_release(acb); | |
670 | - return; | |
675 | + ret = 0; | |
676 | + goto done; | |
671 | 677 | } |
672 | 678 | |
673 | 679 | index_in_cluster = acb->sector_num & (s->cluster_sectors - 1); |
... | ... | @@ -679,14 +685,14 @@ static void qcow_aio_write_cb(void *opaque, int ret) |
679 | 685 | index_in_cluster + acb->n); |
680 | 686 | if (!cluster_offset || (cluster_offset & 511) != 0) { |
681 | 687 | ret = -EIO; |
682 | - goto fail; | |
688 | + goto done; | |
683 | 689 | } |
684 | 690 | if (s->crypt_method) { |
685 | 691 | if (!acb->cluster_data) { |
686 | 692 | acb->cluster_data = qemu_mallocz(s->cluster_size); |
687 | 693 | if (!acb->cluster_data) { |
688 | 694 | ret = -ENOMEM; |
689 | - goto fail; | |
695 | + goto done; | |
690 | 696 | } |
691 | 697 | } |
692 | 698 | encrypt_sectors(s, acb->sector_num, acb->cluster_data, acb->buf, |
... | ... | @@ -704,11 +710,18 @@ static void qcow_aio_write_cb(void *opaque, int ret) |
704 | 710 | &acb->hd_qiov, acb->n, |
705 | 711 | qcow_aio_write_cb, acb); |
706 | 712 | if (acb->hd_aiocb == NULL) |
707 | - goto fail; | |
713 | + goto done; | |
714 | + return; | |
715 | + | |
716 | +done: | |
717 | + if (acb->qiov->niov > 1) | |
718 | + qemu_vfree(acb->orig_buf); | |
719 | + acb->common.cb(acb->common.opaque, ret); | |
720 | + qemu_aio_release(acb); | |
708 | 721 | } |
709 | 722 | |
710 | -static BlockDriverAIOCB *qcow_aio_write(BlockDriverState *bs, | |
711 | - int64_t sector_num, const uint8_t *buf, int nb_sectors, | |
723 | +static BlockDriverAIOCB *qcow_aio_writev(BlockDriverState *bs, | |
724 | + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, | |
712 | 725 | BlockDriverCompletionFunc *cb, void *opaque) |
713 | 726 | { |
714 | 727 | BDRVQcowState *s = bs->opaque; |
... | ... | @@ -721,7 +734,12 @@ static BlockDriverAIOCB *qcow_aio_write(BlockDriverState *bs, |
721 | 734 | return NULL; |
722 | 735 | acb->hd_aiocb = NULL; |
723 | 736 | acb->sector_num = sector_num; |
724 | - acb->buf = (uint8_t *)buf; | |
737 | + acb->qiov = qiov; | |
738 | + if (qiov->niov > 1) { | |
739 | + acb->buf = acb->orig_buf = qemu_memalign(512, qiov->size); | |
740 | + qemu_iovec_to_buffer(qiov, acb->buf); | |
741 | + } else | |
742 | + acb->buf = qiov->iov->iov_base; | |
725 | 743 | acb->nb_sectors = nb_sectors; |
726 | 744 | acb->n = 0; |
727 | 745 | |
... | ... | @@ -909,8 +927,8 @@ BlockDriver bdrv_qcow = { |
909 | 927 | .bdrv_is_allocated = qcow_is_allocated, |
910 | 928 | .bdrv_set_key = qcow_set_key, |
911 | 929 | .bdrv_make_empty = qcow_make_empty, |
912 | - .bdrv_aio_read = qcow_aio_read, | |
913 | - .bdrv_aio_write = qcow_aio_write, | |
930 | + .bdrv_aio_readv = qcow_aio_readv, | |
931 | + .bdrv_aio_writev = qcow_aio_writev, | |
914 | 932 | .bdrv_aio_cancel = qcow_aio_cancel, |
915 | 933 | .aiocb_size = sizeof(QCowAIOCB), |
916 | 934 | .bdrv_write_compressed = qcow_write_compressed, | ... | ... |
block-qcow2.c
... | ... | @@ -1264,7 +1264,9 @@ static int qcow_write(BlockDriverState *bs, int64_t sector_num, |
1264 | 1264 | typedef struct QCowAIOCB { |
1265 | 1265 | BlockDriverAIOCB common; |
1266 | 1266 | int64_t sector_num; |
1267 | + QEMUIOVector *qiov; | |
1267 | 1268 | uint8_t *buf; |
1269 | + void *orig_buf; | |
1268 | 1270 | int nb_sectors; |
1269 | 1271 | int n; |
1270 | 1272 | uint64_t cluster_offset; |
... | ... | @@ -1307,12 +1309,8 @@ static void qcow_aio_read_cb(void *opaque, int ret) |
1307 | 1309 | int index_in_cluster, n1; |
1308 | 1310 | |
1309 | 1311 | acb->hd_aiocb = NULL; |
1310 | - if (ret < 0) { | |
1311 | -fail: | |
1312 | - acb->common.cb(acb->common.opaque, ret); | |
1313 | - qemu_aio_release(acb); | |
1314 | - return; | |
1315 | - } | |
1312 | + if (ret < 0) | |
1313 | + goto done; | |
1316 | 1314 | |
1317 | 1315 | /* post process the read buffer */ |
1318 | 1316 | if (!acb->cluster_offset) { |
... | ... | @@ -1333,9 +1331,8 @@ fail: |
1333 | 1331 | |
1334 | 1332 | if (acb->nb_sectors == 0) { |
1335 | 1333 | /* request completed */ |
1336 | - acb->common.cb(acb->common.opaque, 0); | |
1337 | - qemu_aio_release(acb); | |
1338 | - return; | |
1334 | + ret = 0; | |
1335 | + goto done; | |
1339 | 1336 | } |
1340 | 1337 | |
1341 | 1338 | /* prepare next AIO request */ |
... | ... | @@ -1356,32 +1353,32 @@ fail: |
1356 | 1353 | &acb->hd_qiov, acb->n, |
1357 | 1354 | qcow_aio_read_cb, acb); |
1358 | 1355 | if (acb->hd_aiocb == NULL) |
1359 | - goto fail; | |
1356 | + goto done; | |
1360 | 1357 | } else { |
1361 | 1358 | ret = qcow_schedule_bh(qcow_aio_read_bh, acb); |
1362 | 1359 | if (ret < 0) |
1363 | - goto fail; | |
1360 | + goto done; | |
1364 | 1361 | } |
1365 | 1362 | } else { |
1366 | 1363 | /* Note: in this case, no need to wait */ |
1367 | 1364 | memset(acb->buf, 0, 512 * acb->n); |
1368 | 1365 | ret = qcow_schedule_bh(qcow_aio_read_bh, acb); |
1369 | 1366 | if (ret < 0) |
1370 | - goto fail; | |
1367 | + goto done; | |
1371 | 1368 | } |
1372 | 1369 | } else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) { |
1373 | 1370 | /* add AIO support for compressed blocks ? */ |
1374 | 1371 | if (decompress_cluster(s, acb->cluster_offset) < 0) |
1375 | - goto fail; | |
1372 | + goto done; | |
1376 | 1373 | memcpy(acb->buf, |
1377 | 1374 | s->cluster_cache + index_in_cluster * 512, 512 * acb->n); |
1378 | 1375 | ret = qcow_schedule_bh(qcow_aio_read_bh, acb); |
1379 | 1376 | if (ret < 0) |
1380 | - goto fail; | |
1377 | + goto done; | |
1381 | 1378 | } else { |
1382 | 1379 | if ((acb->cluster_offset & 511) != 0) { |
1383 | 1380 | ret = -EIO; |
1384 | - goto fail; | |
1381 | + goto done; | |
1385 | 1382 | } |
1386 | 1383 | |
1387 | 1384 | acb->hd_iov.iov_base = acb->buf; |
... | ... | @@ -1391,13 +1388,22 @@ fail: |
1391 | 1388 | (acb->cluster_offset >> 9) + index_in_cluster, |
1392 | 1389 | &acb->hd_qiov, acb->n, qcow_aio_read_cb, acb); |
1393 | 1390 | if (acb->hd_aiocb == NULL) |
1394 | - goto fail; | |
1391 | + goto done; | |
1392 | + } | |
1393 | + | |
1394 | + return; | |
1395 | +done: | |
1396 | + if (acb->qiov->niov > 1) { | |
1397 | + qemu_iovec_from_buffer(acb->qiov, acb->orig_buf, acb->qiov->size); | |
1398 | + qemu_vfree(acb->orig_buf); | |
1395 | 1399 | } |
1400 | + acb->common.cb(acb->common.opaque, ret); | |
1401 | + qemu_aio_release(acb); | |
1396 | 1402 | } |
1397 | 1403 | |
1398 | 1404 | static QCowAIOCB *qcow_aio_setup(BlockDriverState *bs, |
1399 | - int64_t sector_num, uint8_t *buf, int nb_sectors, | |
1400 | - BlockDriverCompletionFunc *cb, void *opaque) | |
1405 | + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, | |
1406 | + BlockDriverCompletionFunc *cb, void *opaque, int is_write) | |
1401 | 1407 | { |
1402 | 1408 | QCowAIOCB *acb; |
1403 | 1409 | |
... | ... | @@ -1406,7 +1412,13 @@ static QCowAIOCB *qcow_aio_setup(BlockDriverState *bs, |
1406 | 1412 | return NULL; |
1407 | 1413 | acb->hd_aiocb = NULL; |
1408 | 1414 | acb->sector_num = sector_num; |
1409 | - acb->buf = buf; | |
1415 | + acb->qiov = qiov; | |
1416 | + if (qiov->niov > 1) { | |
1417 | + acb->buf = acb->orig_buf = qemu_memalign(512, qiov->size); | |
1418 | + if (is_write) | |
1419 | + qemu_iovec_to_buffer(qiov, acb->buf); | |
1420 | + } else | |
1421 | + acb->buf = qiov->iov->iov_base; | |
1410 | 1422 | acb->nb_sectors = nb_sectors; |
1411 | 1423 | acb->n = 0; |
1412 | 1424 | acb->cluster_offset = 0; |
... | ... | @@ -1414,13 +1426,13 @@ static QCowAIOCB *qcow_aio_setup(BlockDriverState *bs, |
1414 | 1426 | return acb; |
1415 | 1427 | } |
1416 | 1428 | |
1417 | -static BlockDriverAIOCB *qcow_aio_read(BlockDriverState *bs, | |
1418 | - int64_t sector_num, uint8_t *buf, int nb_sectors, | |
1429 | +static BlockDriverAIOCB *qcow_aio_readv(BlockDriverState *bs, | |
1430 | + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, | |
1419 | 1431 | BlockDriverCompletionFunc *cb, void *opaque) |
1420 | 1432 | { |
1421 | 1433 | QCowAIOCB *acb; |
1422 | 1434 | |
1423 | - acb = qcow_aio_setup(bs, sector_num, buf, nb_sectors, cb, opaque); | |
1435 | + acb = qcow_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 0); | |
1424 | 1436 | if (!acb) |
1425 | 1437 | return NULL; |
1426 | 1438 | |
... | ... | @@ -1439,16 +1451,12 @@ static void qcow_aio_write_cb(void *opaque, int ret) |
1439 | 1451 | |
1440 | 1452 | acb->hd_aiocb = NULL; |
1441 | 1453 | |
1442 | - if (ret < 0) { | |
1443 | - fail: | |
1444 | - acb->common.cb(acb->common.opaque, ret); | |
1445 | - qemu_aio_release(acb); | |
1446 | - return; | |
1447 | - } | |
1454 | + if (ret < 0) | |
1455 | + goto done; | |
1448 | 1456 | |
1449 | 1457 | if (alloc_cluster_link_l2(bs, acb->cluster_offset, &acb->l2meta) < 0) { |
1450 | 1458 | free_any_clusters(bs, acb->cluster_offset, acb->l2meta.nb_clusters); |
1451 | - goto fail; | |
1459 | + goto done; | |
1452 | 1460 | } |
1453 | 1461 | |
1454 | 1462 | acb->nb_sectors -= acb->n; |
... | ... | @@ -1457,9 +1465,8 @@ static void qcow_aio_write_cb(void *opaque, int ret) |
1457 | 1465 | |
1458 | 1466 | if (acb->nb_sectors == 0) { |
1459 | 1467 | /* request completed */ |
1460 | - acb->common.cb(acb->common.opaque, 0); | |
1461 | - qemu_aio_release(acb); | |
1462 | - return; | |
1468 | + ret = 0; | |
1469 | + goto done; | |
1463 | 1470 | } |
1464 | 1471 | |
1465 | 1472 | index_in_cluster = acb->sector_num & (s->cluster_sectors - 1); |
... | ... | @@ -1473,7 +1480,7 @@ static void qcow_aio_write_cb(void *opaque, int ret) |
1473 | 1480 | n_end, &acb->n, &acb->l2meta); |
1474 | 1481 | if (!acb->cluster_offset || (acb->cluster_offset & 511) != 0) { |
1475 | 1482 | ret = -EIO; |
1476 | - goto fail; | |
1483 | + goto done; | |
1477 | 1484 | } |
1478 | 1485 | if (s->crypt_method) { |
1479 | 1486 | if (!acb->cluster_data) { |
... | ... | @@ -1494,11 +1501,19 @@ static void qcow_aio_write_cb(void *opaque, int ret) |
1494 | 1501 | &acb->hd_qiov, acb->n, |
1495 | 1502 | qcow_aio_write_cb, acb); |
1496 | 1503 | if (acb->hd_aiocb == NULL) |
1497 | - goto fail; | |
1504 | + goto done; | |
1505 | + | |
1506 | + return; | |
1507 | + | |
1508 | +done: | |
1509 | + if (acb->qiov->niov > 1) | |
1510 | + qemu_vfree(acb->orig_buf); | |
1511 | + acb->common.cb(acb->common.opaque, ret); | |
1512 | + qemu_aio_release(acb); | |
1498 | 1513 | } |
1499 | 1514 | |
1500 | -static BlockDriverAIOCB *qcow_aio_write(BlockDriverState *bs, | |
1501 | - int64_t sector_num, const uint8_t *buf, int nb_sectors, | |
1515 | +static BlockDriverAIOCB *qcow_aio_writev(BlockDriverState *bs, | |
1516 | + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, | |
1502 | 1517 | BlockDriverCompletionFunc *cb, void *opaque) |
1503 | 1518 | { |
1504 | 1519 | BDRVQcowState *s = bs->opaque; |
... | ... | @@ -1506,7 +1521,7 @@ static BlockDriverAIOCB *qcow_aio_write(BlockDriverState *bs, |
1506 | 1521 | |
1507 | 1522 | s->cluster_cache_offset = -1; /* disable compressed cache */ |
1508 | 1523 | |
1509 | - acb = qcow_aio_setup(bs, sector_num, (uint8_t*)buf, nb_sectors, cb, opaque); | |
1524 | + acb = qcow_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 1); | |
1510 | 1525 | if (!acb) |
1511 | 1526 | return NULL; |
1512 | 1527 | |
... | ... | @@ -2771,8 +2786,8 @@ BlockDriver bdrv_qcow2 = { |
2771 | 2786 | .bdrv_set_key = qcow_set_key, |
2772 | 2787 | .bdrv_make_empty = qcow_make_empty, |
2773 | 2788 | |
2774 | - .bdrv_aio_read = qcow_aio_read, | |
2775 | - .bdrv_aio_write = qcow_aio_write, | |
2789 | + .bdrv_aio_readv = qcow_aio_readv, | |
2790 | + .bdrv_aio_writev = qcow_aio_writev, | |
2776 | 2791 | .bdrv_aio_cancel = qcow_aio_cancel, |
2777 | 2792 | .aiocb_size = sizeof(QCowAIOCB), |
2778 | 2793 | .bdrv_write_compressed = qcow_write_compressed, | ... | ... |
block-raw-posix.c
... | ... | @@ -599,8 +599,8 @@ static int posix_aio_init(void) |
599 | 599 | return 0; |
600 | 600 | } |
601 | 601 | |
602 | -static RawAIOCB *raw_aio_setup(BlockDriverState *bs, | |
603 | - int64_t sector_num, uint8_t *buf, int nb_sectors, | |
602 | +static RawAIOCB *raw_aio_setup(BlockDriverState *bs, int64_t sector_num, | |
603 | + QEMUIOVector *qiov, int nb_sectors, | |
604 | 604 | BlockDriverCompletionFunc *cb, void *opaque) |
605 | 605 | { |
606 | 606 | BDRVRawState *s = bs->opaque; |
... | ... | @@ -614,24 +614,25 @@ static RawAIOCB *raw_aio_setup(BlockDriverState *bs, |
614 | 614 | return NULL; |
615 | 615 | acb->aiocb.aio_fildes = s->fd; |
616 | 616 | acb->aiocb.ev_signo = SIGUSR2; |
617 | - acb->aiocb.aio_buf = buf; | |
618 | - if (nb_sectors < 0) | |
619 | - acb->aiocb.aio_nbytes = -nb_sectors; | |
620 | - else | |
621 | - acb->aiocb.aio_nbytes = nb_sectors * 512; | |
617 | + acb->aiocb.aio_iov = qiov->iov; | |
618 | + acb->aiocb.aio_niov = qiov->niov; | |
619 | + acb->aiocb.aio_nbytes = nb_sectors * 512; | |
622 | 620 | acb->aiocb.aio_offset = sector_num * 512; |
621 | + acb->aiocb.aio_flags = 0; | |
622 | + | |
623 | + /* | |
624 | + * If O_DIRECT is used the buffer needs to be aligned on a sector | |
625 | + * boundary. Tell the low level code to ensure that in case it's | |
626 | + * not done yet. | |
627 | + */ | |
628 | + if (s->aligned_buf) | |
629 | + acb->aiocb.aio_flags |= QEMU_AIO_SECTOR_ALIGNED; | |
630 | + | |
623 | 631 | acb->next = posix_aio_state->first_aio; |
624 | 632 | posix_aio_state->first_aio = acb; |
625 | 633 | return acb; |
626 | 634 | } |
627 | 635 | |
628 | -static void raw_aio_em_cb(void* opaque) | |
629 | -{ | |
630 | - RawAIOCB *acb = opaque; | |
631 | - acb->common.cb(acb->common.opaque, acb->ret); | |
632 | - qemu_aio_release(acb); | |
633 | -} | |
634 | - | |
635 | 636 | static void raw_aio_remove(RawAIOCB *acb) |
636 | 637 | { |
637 | 638 | RawAIOCB **pacb; |
... | ... | @@ -651,28 +652,13 @@ static void raw_aio_remove(RawAIOCB *acb) |
651 | 652 | } |
652 | 653 | } |
653 | 654 | |
654 | -static BlockDriverAIOCB *raw_aio_read(BlockDriverState *bs, | |
655 | - int64_t sector_num, uint8_t *buf, int nb_sectors, | |
655 | +static BlockDriverAIOCB *raw_aio_readv(BlockDriverState *bs, | |
656 | + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, | |
656 | 657 | BlockDriverCompletionFunc *cb, void *opaque) |
657 | 658 | { |
658 | 659 | RawAIOCB *acb; |
659 | 660 | |
660 | - /* | |
661 | - * If O_DIRECT is used and the buffer is not aligned fall back | |
662 | - * to synchronous IO. | |
663 | - */ | |
664 | - BDRVRawState *s = bs->opaque; | |
665 | - | |
666 | - if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) { | |
667 | - QEMUBH *bh; | |
668 | - acb = qemu_aio_get(bs, cb, opaque); | |
669 | - acb->ret = raw_pread(bs, 512 * sector_num, buf, 512 * nb_sectors); | |
670 | - bh = qemu_bh_new(raw_aio_em_cb, acb); | |
671 | - qemu_bh_schedule(bh); | |
672 | - return &acb->common; | |
673 | - } | |
674 | - | |
675 | - acb = raw_aio_setup(bs, sector_num, buf, nb_sectors, cb, opaque); | |
661 | + acb = raw_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque); | |
676 | 662 | if (!acb) |
677 | 663 | return NULL; |
678 | 664 | if (qemu_paio_read(&acb->aiocb) < 0) { |
... | ... | @@ -682,28 +668,13 @@ static BlockDriverAIOCB *raw_aio_read(BlockDriverState *bs, |
682 | 668 | return &acb->common; |
683 | 669 | } |
684 | 670 | |
685 | -static BlockDriverAIOCB *raw_aio_write(BlockDriverState *bs, | |
686 | - int64_t sector_num, const uint8_t *buf, int nb_sectors, | |
671 | +static BlockDriverAIOCB *raw_aio_writev(BlockDriverState *bs, | |
672 | + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, | |
687 | 673 | BlockDriverCompletionFunc *cb, void *opaque) |
688 | 674 | { |
689 | 675 | RawAIOCB *acb; |
690 | 676 | |
691 | - /* | |
692 | - * If O_DIRECT is used and the buffer is not aligned fall back | |
693 | - * to synchronous IO. | |
694 | - */ | |
695 | - BDRVRawState *s = bs->opaque; | |
696 | - | |
697 | - if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) { | |
698 | - QEMUBH *bh; | |
699 | - acb = qemu_aio_get(bs, cb, opaque); | |
700 | - acb->ret = raw_pwrite(bs, 512 * sector_num, buf, 512 * nb_sectors); | |
701 | - bh = qemu_bh_new(raw_aio_em_cb, acb); | |
702 | - qemu_bh_schedule(bh); | |
703 | - return &acb->common; | |
704 | - } | |
705 | - | |
706 | - acb = raw_aio_setup(bs, sector_num, (uint8_t*)buf, nb_sectors, cb, opaque); | |
677 | + acb = raw_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque); | |
707 | 678 | if (!acb) |
708 | 679 | return NULL; |
709 | 680 | if (qemu_paio_write(&acb->aiocb) < 0) { |
... | ... | @@ -887,8 +858,8 @@ BlockDriver bdrv_raw = { |
887 | 858 | .bdrv_flush = raw_flush, |
888 | 859 | |
889 | 860 | #ifdef CONFIG_AIO |
890 | - .bdrv_aio_read = raw_aio_read, | |
891 | - .bdrv_aio_write = raw_aio_write, | |
861 | + .bdrv_aio_readv = raw_aio_readv, | |
862 | + .bdrv_aio_writev = raw_aio_writev, | |
892 | 863 | .bdrv_aio_cancel = raw_aio_cancel, |
893 | 864 | .aiocb_size = sizeof(RawAIOCB), |
894 | 865 | #endif |
... | ... | @@ -1215,12 +1186,24 @@ static BlockDriverAIOCB *raw_aio_ioctl(BlockDriverState *bs, |
1215 | 1186 | unsigned long int req, void *buf, |
1216 | 1187 | BlockDriverCompletionFunc *cb, void *opaque) |
1217 | 1188 | { |
1189 | + BDRVRawState *s = bs->opaque; | |
1218 | 1190 | RawAIOCB *acb; |
1219 | 1191 | |
1220 | - acb = raw_aio_setup(bs, 0, buf, 0, cb, opaque); | |
1192 | + if (fd_open(bs) < 0) | |
1193 | + return NULL; | |
1194 | + | |
1195 | + acb = qemu_aio_get(bs, cb, opaque); | |
1221 | 1196 | if (!acb) |
1222 | 1197 | return NULL; |
1198 | + acb->aiocb.aio_fildes = s->fd; | |
1199 | + acb->aiocb.ev_signo = SIGUSR2; | |
1200 | + acb->aiocb.aio_offset = 0; | |
1201 | + acb->aiocb.aio_flags = 0; | |
1202 | + | |
1203 | + acb->next = posix_aio_state->first_aio; | |
1204 | + posix_aio_state->first_aio = acb; | |
1223 | 1205 | |
1206 | + acb->aiocb.aio_ioctl_buf = buf; | |
1224 | 1207 | acb->aiocb.aio_ioctl_cmd = req; |
1225 | 1208 | if (qemu_paio_ioctl(&acb->aiocb) < 0) { |
1226 | 1209 | raw_aio_remove(acb); |
... | ... | @@ -1424,8 +1407,8 @@ BlockDriver bdrv_host_device = { |
1424 | 1407 | .bdrv_flush = raw_flush, |
1425 | 1408 | |
1426 | 1409 | #ifdef CONFIG_AIO |
1427 | - .bdrv_aio_read = raw_aio_read, | |
1428 | - .bdrv_aio_write = raw_aio_write, | |
1410 | + .bdrv_aio_readv = raw_aio_readv, | |
1411 | + .bdrv_aio_writev = raw_aio_writev, | |
1429 | 1412 | .bdrv_aio_cancel = raw_aio_cancel, |
1430 | 1413 | .aiocb_size = sizeof(RawAIOCB), |
1431 | 1414 | #endif | ... | ... |
block.c
... | ... | @@ -47,25 +47,21 @@ |
47 | 47 | #define SECTOR_BITS 9 |
48 | 48 | #define SECTOR_SIZE (1 << SECTOR_BITS) |
49 | 49 | |
50 | -static AIOPool vectored_aio_pool; | |
51 | - | |
52 | 50 | typedef struct BlockDriverAIOCBSync { |
53 | 51 | BlockDriverAIOCB common; |
54 | 52 | QEMUBH *bh; |
55 | 53 | int ret; |
54 | + /* vector translation state */ | |
55 | + QEMUIOVector *qiov; | |
56 | + uint8_t *bounce; | |
57 | + int is_write; | |
56 | 58 | } BlockDriverAIOCBSync; |
57 | 59 | |
58 | -static BlockDriverAIOCB *bdrv_aio_read(BlockDriverState *bs, | |
59 | - int64_t sector_num, uint8_t *buf, int nb_sectors, | |
60 | +static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs, | |
61 | + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, | |
60 | 62 | BlockDriverCompletionFunc *cb, void *opaque); |
61 | -static BlockDriverAIOCB *bdrv_aio_write(BlockDriverState *bs, | |
62 | - int64_t sector_num, const uint8_t *buf, int nb_sectors, | |
63 | - BlockDriverCompletionFunc *cb, void *opaque); | |
64 | -static BlockDriverAIOCB *bdrv_aio_read_em(BlockDriverState *bs, | |
65 | - int64_t sector_num, uint8_t *buf, int nb_sectors, | |
66 | - BlockDriverCompletionFunc *cb, void *opaque); | |
67 | -static BlockDriverAIOCB *bdrv_aio_write_em(BlockDriverState *bs, | |
68 | - int64_t sector_num, const uint8_t *buf, int nb_sectors, | |
63 | +static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs, | |
64 | + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, | |
69 | 65 | BlockDriverCompletionFunc *cb, void *opaque); |
70 | 66 | static void bdrv_aio_cancel_em(BlockDriverAIOCB *acb); |
71 | 67 | static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num, |
... | ... | @@ -144,10 +140,10 @@ void path_combine(char *dest, int dest_size, |
144 | 140 | |
145 | 141 | static void bdrv_register(BlockDriver *bdrv) |
146 | 142 | { |
147 | - if (!bdrv->bdrv_aio_read) { | |
143 | + if (!bdrv->bdrv_aio_readv) { | |
148 | 144 | /* add AIO emulation layer */ |
149 | - bdrv->bdrv_aio_read = bdrv_aio_read_em; | |
150 | - bdrv->bdrv_aio_write = bdrv_aio_write_em; | |
145 | + bdrv->bdrv_aio_readv = bdrv_aio_readv_em; | |
146 | + bdrv->bdrv_aio_writev = bdrv_aio_writev_em; | |
151 | 147 | bdrv->bdrv_aio_cancel = bdrv_aio_cancel_em; |
152 | 148 | bdrv->aiocb_size = sizeof(BlockDriverAIOCBSync); |
153 | 149 | } else if (!bdrv->bdrv_read) { |
... | ... | @@ -1295,91 +1291,10 @@ char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn) |
1295 | 1291 | /**************************************************************/ |
1296 | 1292 | /* async I/Os */ |
1297 | 1293 | |
1298 | -typedef struct VectorTranslationAIOCB { | |
1299 | - BlockDriverAIOCB common; | |
1300 | - QEMUIOVector *iov; | |
1301 | - uint8_t *bounce; | |
1302 | - int is_write; | |
1303 | - BlockDriverAIOCB *aiocb; | |
1304 | -} VectorTranslationAIOCB; | |
1305 | - | |
1306 | -static void bdrv_aio_cancel_vector(BlockDriverAIOCB *_acb) | |
1307 | -{ | |
1308 | - VectorTranslationAIOCB *acb | |
1309 | - = container_of(_acb, VectorTranslationAIOCB, common); | |
1310 | - | |
1311 | - bdrv_aio_cancel(acb->aiocb); | |
1312 | -} | |
1313 | - | |
1314 | -static void bdrv_aio_rw_vector_cb(void *opaque, int ret) | |
1315 | -{ | |
1316 | - VectorTranslationAIOCB *s = (VectorTranslationAIOCB *)opaque; | |
1317 | - | |
1318 | - if (!s->is_write) { | |
1319 | - qemu_iovec_from_buffer(s->iov, s->bounce, s->iov->size); | |
1320 | - } | |
1321 | - qemu_vfree(s->bounce); | |
1322 | - s->common.cb(s->common.opaque, ret); | |
1323 | - qemu_aio_release(s); | |
1324 | -} | |
1325 | - | |
1326 | -static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs, | |
1327 | - int64_t sector_num, | |
1328 | - QEMUIOVector *iov, | |
1329 | - int nb_sectors, | |
1330 | - BlockDriverCompletionFunc *cb, | |
1331 | - void *opaque, | |
1332 | - int is_write) | |
1333 | - | |
1334 | -{ | |
1335 | - VectorTranslationAIOCB *s = qemu_aio_get_pool(&vectored_aio_pool, bs, | |
1336 | - cb, opaque); | |
1337 | - | |
1338 | - s->iov = iov; | |
1339 | - s->bounce = qemu_memalign(512, nb_sectors * 512); | |
1340 | - s->is_write = is_write; | |
1341 | - if (is_write) { | |
1342 | - qemu_iovec_to_buffer(s->iov, s->bounce); | |
1343 | - s->aiocb = bdrv_aio_write(bs, sector_num, s->bounce, nb_sectors, | |
1344 | - bdrv_aio_rw_vector_cb, s); | |
1345 | - } else { | |
1346 | - s->aiocb = bdrv_aio_read(bs, sector_num, s->bounce, nb_sectors, | |
1347 | - bdrv_aio_rw_vector_cb, s); | |
1348 | - } | |
1349 | - if (!s->aiocb) { | |
1350 | - qemu_vfree(s->bounce); | |
1351 | - qemu_aio_release(s); | |
1352 | - return NULL; | |
1353 | - } | |
1354 | - return &s->common; | |
1355 | -} | |
1356 | - | |
1357 | 1294 | BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num, |
1358 | - QEMUIOVector *iov, int nb_sectors, | |
1295 | + QEMUIOVector *qiov, int nb_sectors, | |
1359 | 1296 | BlockDriverCompletionFunc *cb, void *opaque) |
1360 | 1297 | { |
1361 | - if (bdrv_check_request(bs, sector_num, nb_sectors)) | |
1362 | - return NULL; | |
1363 | - | |
1364 | - return bdrv_aio_rw_vector(bs, sector_num, iov, nb_sectors, | |
1365 | - cb, opaque, 0); | |
1366 | -} | |
1367 | - | |
1368 | -BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num, | |
1369 | - QEMUIOVector *iov, int nb_sectors, | |
1370 | - BlockDriverCompletionFunc *cb, void *opaque) | |
1371 | -{ | |
1372 | - if (bdrv_check_request(bs, sector_num, nb_sectors)) | |
1373 | - return NULL; | |
1374 | - | |
1375 | - return bdrv_aio_rw_vector(bs, sector_num, iov, nb_sectors, | |
1376 | - cb, opaque, 1); | |
1377 | -} | |
1378 | - | |
1379 | -static BlockDriverAIOCB *bdrv_aio_read(BlockDriverState *bs, int64_t sector_num, | |
1380 | - uint8_t *buf, int nb_sectors, | |
1381 | - BlockDriverCompletionFunc *cb, void *opaque) | |
1382 | -{ | |
1383 | 1298 | BlockDriver *drv = bs->drv; |
1384 | 1299 | BlockDriverAIOCB *ret; |
1385 | 1300 | |
... | ... | @@ -1388,7 +1303,8 @@ static BlockDriverAIOCB *bdrv_aio_read(BlockDriverState *bs, int64_t sector_num, |
1388 | 1303 | if (bdrv_check_request(bs, sector_num, nb_sectors)) |
1389 | 1304 | return NULL; |
1390 | 1305 | |
1391 | - ret = drv->bdrv_aio_read(bs, sector_num, buf, nb_sectors, cb, opaque); | |
1306 | + ret = drv->bdrv_aio_readv(bs, sector_num, qiov, nb_sectors, | |
1307 | + cb, opaque); | |
1392 | 1308 | |
1393 | 1309 | if (ret) { |
1394 | 1310 | /* Update stats even though technically transfer has not happened. */ |
... | ... | @@ -1399,9 +1315,9 @@ static BlockDriverAIOCB *bdrv_aio_read(BlockDriverState *bs, int64_t sector_num, |
1399 | 1315 | return ret; |
1400 | 1316 | } |
1401 | 1317 | |
1402 | -static BlockDriverAIOCB *bdrv_aio_write(BlockDriverState *bs, int64_t sector_num, | |
1403 | - const uint8_t *buf, int nb_sectors, | |
1404 | - BlockDriverCompletionFunc *cb, void *opaque) | |
1318 | +BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num, | |
1319 | + QEMUIOVector *qiov, int nb_sectors, | |
1320 | + BlockDriverCompletionFunc *cb, void *opaque) | |
1405 | 1321 | { |
1406 | 1322 | BlockDriver *drv = bs->drv; |
1407 | 1323 | BlockDriverAIOCB *ret; |
... | ... | @@ -1413,7 +1329,8 @@ static BlockDriverAIOCB *bdrv_aio_write(BlockDriverState *bs, int64_t sector_num |
1413 | 1329 | if (bdrv_check_request(bs, sector_num, nb_sectors)) |
1414 | 1330 | return NULL; |
1415 | 1331 | |
1416 | - ret = drv->bdrv_aio_write(bs, sector_num, buf, nb_sectors, cb, opaque); | |
1332 | + ret = drv->bdrv_aio_writev(bs, sector_num, qiov, nb_sectors, | |
1333 | + cb, opaque); | |
1417 | 1334 | |
1418 | 1335 | if (ret) { |
1419 | 1336 | /* Update stats even though technically transfer has not happened. */ |
... | ... | @@ -1436,42 +1353,62 @@ void bdrv_aio_cancel(BlockDriverAIOCB *acb) |
1436 | 1353 | static void bdrv_aio_bh_cb(void *opaque) |
1437 | 1354 | { |
1438 | 1355 | BlockDriverAIOCBSync *acb = opaque; |
1356 | + | |
1357 | + qemu_vfree(acb->bounce); | |
1358 | + | |
1359 | + if (!acb->is_write) | |
1360 | + qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size); | |
1439 | 1361 | acb->common.cb(acb->common.opaque, acb->ret); |
1362 | + | |
1440 | 1363 | qemu_aio_release(acb); |
1441 | 1364 | } |
1442 | 1365 | |
1443 | -static BlockDriverAIOCB *bdrv_aio_read_em(BlockDriverState *bs, | |
1444 | - int64_t sector_num, uint8_t *buf, int nb_sectors, | |
1445 | - BlockDriverCompletionFunc *cb, void *opaque) | |
1366 | +static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs, | |
1367 | + int64_t sector_num, | |
1368 | + QEMUIOVector *qiov, | |
1369 | + int nb_sectors, | |
1370 | + BlockDriverCompletionFunc *cb, | |
1371 | + void *opaque, | |
1372 | + int is_write) | |
1373 | + | |
1446 | 1374 | { |
1447 | 1375 | BlockDriverAIOCBSync *acb; |
1448 | - int ret; | |
1449 | 1376 | |
1450 | 1377 | acb = qemu_aio_get(bs, cb, opaque); |
1378 | + acb->is_write = is_write; | |
1379 | + acb->qiov = qiov; | |
1380 | + acb->bounce = qemu_memalign(512, qiov->size); | |
1381 | + | |
1451 | 1382 | if (!acb->bh) |
1452 | 1383 | acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb); |
1453 | - ret = bdrv_read(bs, sector_num, buf, nb_sectors); | |
1454 | - acb->ret = ret; | |
1384 | + | |
1385 | + if (is_write) { | |
1386 | + qemu_iovec_to_buffer(acb->qiov, acb->bounce); | |
1387 | + acb->ret = bdrv_write(bs, sector_num, acb->bounce, nb_sectors); | |
1388 | + } else { | |
1389 | + acb->ret = bdrv_read(bs, sector_num, acb->bounce, nb_sectors); | |
1390 | + } | |
1391 | + | |
1455 | 1392 | qemu_bh_schedule(acb->bh); |
1393 | + | |
1456 | 1394 | return &acb->common; |
1457 | 1395 | } |
1458 | 1396 | |
1459 | -static BlockDriverAIOCB *bdrv_aio_write_em(BlockDriverState *bs, | |
1460 | - int64_t sector_num, const uint8_t *buf, int nb_sectors, | |
1397 | +static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs, | |
1398 | + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, | |
1461 | 1399 | BlockDriverCompletionFunc *cb, void *opaque) |
1462 | 1400 | { |
1463 | - BlockDriverAIOCBSync *acb; | |
1464 | - int ret; | |
1401 | + return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0); | |
1402 | +} | |
1465 | 1403 | |
1466 | - acb = qemu_aio_get(bs, cb, opaque); | |
1467 | - if (!acb->bh) | |
1468 | - acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb); | |
1469 | - ret = bdrv_write(bs, sector_num, buf, nb_sectors); | |
1470 | - acb->ret = ret; | |
1471 | - qemu_bh_schedule(acb->bh); | |
1472 | - return &acb->common; | |
1404 | +static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs, | |
1405 | + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, | |
1406 | + BlockDriverCompletionFunc *cb, void *opaque) | |
1407 | +{ | |
1408 | + return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1); | |
1473 | 1409 | } |
1474 | 1410 | |
1411 | + | |
1475 | 1412 | static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb) |
1476 | 1413 | { |
1477 | 1414 | BlockDriverAIOCBSync *acb = (BlockDriverAIOCBSync *)blockacb; |
... | ... | @@ -1494,10 +1431,15 @@ static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num, |
1494 | 1431 | { |
1495 | 1432 | int async_ret; |
1496 | 1433 | BlockDriverAIOCB *acb; |
1434 | + struct iovec iov; | |
1435 | + QEMUIOVector qiov; | |
1497 | 1436 | |
1498 | 1437 | async_ret = NOT_DONE; |
1499 | - acb = bdrv_aio_read(bs, sector_num, buf, nb_sectors, | |
1500 | - bdrv_rw_em_cb, &async_ret); | |
1438 | + iov.iov_base = buf; | |
1439 | + iov.iov_len = nb_sectors * 512; | |
1440 | + qemu_iovec_init_external(&qiov, &iov, 1); | |
1441 | + acb = bdrv_aio_readv(bs, sector_num, &qiov, nb_sectors, | |
1442 | + bdrv_rw_em_cb, &async_ret); | |
1501 | 1443 | if (acb == NULL) |
1502 | 1444 | return -1; |
1503 | 1445 | |
... | ... | @@ -1513,10 +1455,15 @@ static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num, |
1513 | 1455 | { |
1514 | 1456 | int async_ret; |
1515 | 1457 | BlockDriverAIOCB *acb; |
1458 | + struct iovec iov; | |
1459 | + QEMUIOVector qiov; | |
1516 | 1460 | |
1517 | 1461 | async_ret = NOT_DONE; |
1518 | - acb = bdrv_aio_write(bs, sector_num, buf, nb_sectors, | |
1519 | - bdrv_rw_em_cb, &async_ret); | |
1462 | + iov.iov_base = (void *)buf; | |
1463 | + iov.iov_len = nb_sectors * 512; | |
1464 | + qemu_iovec_init_external(&qiov, &iov, 1); | |
1465 | + acb = bdrv_aio_writev(bs, sector_num, &qiov, nb_sectors, | |
1466 | + bdrv_rw_em_cb, &async_ret); | |
1520 | 1467 | if (acb == NULL) |
1521 | 1468 | return -1; |
1522 | 1469 | while (async_ret == NOT_DONE) { |
... | ... | @@ -1527,9 +1474,6 @@ static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num, |
1527 | 1474 | |
1528 | 1475 | void bdrv_init(void) |
1529 | 1476 | { |
1530 | - aio_pool_init(&vectored_aio_pool, sizeof(VectorTranslationAIOCB), | |
1531 | - bdrv_aio_cancel_vector); | |
1532 | - | |
1533 | 1477 | bdrv_register(&bdrv_raw); |
1534 | 1478 | bdrv_register(&bdrv_host_device); |
1535 | 1479 | #ifndef _WIN32 | ... | ... |
block_int.h
... | ... | @@ -54,11 +54,11 @@ struct BlockDriver { |
54 | 54 | int (*bdrv_set_key)(BlockDriverState *bs, const char *key); |
55 | 55 | int (*bdrv_make_empty)(BlockDriverState *bs); |
56 | 56 | /* aio */ |
57 | - BlockDriverAIOCB *(*bdrv_aio_read)(BlockDriverState *bs, | |
58 | - int64_t sector_num, uint8_t *buf, int nb_sectors, | |
57 | + BlockDriverAIOCB *(*bdrv_aio_readv)(BlockDriverState *bs, | |
58 | + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, | |
59 | 59 | BlockDriverCompletionFunc *cb, void *opaque); |
60 | - BlockDriverAIOCB *(*bdrv_aio_write)(BlockDriverState *bs, | |
61 | - int64_t sector_num, const uint8_t *buf, int nb_sectors, | |
60 | + BlockDriverAIOCB *(*bdrv_aio_writev)(BlockDriverState *bs, | |
61 | + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, | |
62 | 62 | BlockDriverCompletionFunc *cb, void *opaque); |
63 | 63 | void (*bdrv_aio_cancel)(BlockDriverAIOCB *acb); |
64 | 64 | int aiocb_size; | ... | ... |
posix-aio-compat.c
... | ... | @@ -20,6 +20,7 @@ |
20 | 20 | #include <stdlib.h> |
21 | 21 | #include <stdio.h> |
22 | 22 | #include "osdep.h" |
23 | +#include "qemu-common.h" | |
23 | 24 | |
24 | 25 | #include "posix-aio-compat.h" |
25 | 26 | |
... | ... | @@ -76,45 +77,110 @@ static void thread_create(pthread_t *thread, pthread_attr_t *attr, |
76 | 77 | if (ret) die2(ret, "pthread_create"); |
77 | 78 | } |
78 | 79 | |
79 | -static size_t handle_aiocb_readwrite(struct qemu_paiocb *aiocb) | |
80 | +static size_t handle_aiocb_ioctl(struct qemu_paiocb *aiocb) | |
81 | +{ | |
82 | + int ret; | |
83 | + | |
84 | + ret = ioctl(aiocb->aio_fildes, aiocb->aio_ioctl_cmd, aiocb->aio_ioctl_buf); | |
85 | + if (ret == -1) | |
86 | + return -errno; | |
87 | + return ret; | |
88 | +} | |
89 | + | |
90 | +/* | |
91 | + * Check if we need to copy the data in the aiocb into a new | |
92 | + * properly aligned buffer. | |
93 | + */ | |
94 | +static int aiocb_needs_copy(struct qemu_paiocb *aiocb) | |
95 | +{ | |
96 | + if (aiocb->aio_flags & QEMU_AIO_SECTOR_ALIGNED) { | |
97 | + int i; | |
98 | + | |
99 | + for (i = 0; i < aiocb->aio_niov; i++) | |
100 | + if ((uintptr_t) aiocb->aio_iov[i].iov_base % 512) | |
101 | + return 1; | |
102 | + } | |
103 | + | |
104 | + return 0; | |
105 | +} | |
106 | + | |
107 | +static size_t handle_aiocb_rw_linear(struct qemu_paiocb *aiocb, char *buf) | |
80 | 108 | { |
81 | 109 | size_t offset = 0; |
82 | - ssize_t len; | |
110 | + size_t len; | |
83 | 111 | |
84 | 112 | while (offset < aiocb->aio_nbytes) { |
85 | - if (aiocb->aio_type == QEMU_PAIO_WRITE) | |
86 | - len = pwrite(aiocb->aio_fildes, | |
87 | - (const char *)aiocb->aio_buf + offset, | |
113 | + if (aiocb->aio_type == QEMU_PAIO_WRITE) | |
114 | + len = pwrite(aiocb->aio_fildes, | |
115 | + (const char *)buf + offset, | |
116 | + aiocb->aio_nbytes - offset, | |
117 | + aiocb->aio_offset + offset); | |
118 | + else | |
119 | + len = pread(aiocb->aio_fildes, | |
120 | + buf + offset, | |
88 | 121 | aiocb->aio_nbytes - offset, |
89 | 122 | aiocb->aio_offset + offset); |
90 | - else | |
91 | - len = pread(aiocb->aio_fildes, | |
92 | - (char *)aiocb->aio_buf + offset, | |
93 | - aiocb->aio_nbytes - offset, | |
94 | - aiocb->aio_offset + offset); | |
95 | - | |
96 | - if (len == -1 && errno == EINTR) | |
97 | - continue; | |
98 | - else if (len == -1) { | |
99 | - offset = -errno; | |
100 | - break; | |
101 | - } else if (len == 0) | |
102 | - break; | |
103 | 123 | |
104 | - offset += len; | |
124 | + if (len == -1 && errno == EINTR) | |
125 | + continue; | |
126 | + else if (len == -1) { | |
127 | + offset = -errno; | |
128 | + break; | |
129 | + } else if (len == 0) | |
130 | + break; | |
131 | + | |
132 | + offset += len; | |
105 | 133 | } |
106 | 134 | |
107 | 135 | return offset; |
108 | 136 | } |
109 | 137 | |
110 | -static size_t handle_aiocb_ioctl(struct qemu_paiocb *aiocb) | |
138 | +static size_t handle_aiocb_rw(struct qemu_paiocb *aiocb) | |
111 | 139 | { |
112 | - int ret; | |
140 | + size_t nbytes; | |
141 | + char *buf; | |
142 | + | |
143 | + if (!aiocb_needs_copy(aiocb) && aiocb->aio_niov == 1) { | |
144 | + /* | |
145 | + * If there is just a single buffer, and it is properly aligned | |
146 | + * we can just use plain pread/pwrite without any problems. | |
147 | + */ | |
148 | + return handle_aiocb_rw_linear(aiocb, aiocb->aio_iov->iov_base); | |
149 | + } | |
113 | 150 | |
114 | - ret = ioctl(aiocb->aio_fildes, aiocb->aio_ioctl_cmd, aiocb->aio_buf); | |
115 | - if (ret == -1) | |
116 | - return -errno; | |
117 | - return ret; | |
151 | + /* | |
152 | + * Ok, we have to do it the hard way, copy all segments into | |
153 | + * a single aligned buffer. | |
154 | + */ | |
155 | + buf = qemu_memalign(512, aiocb->aio_nbytes); | |
156 | + if (aiocb->aio_type == QEMU_PAIO_WRITE) { | |
157 | + char *p = buf; | |
158 | + int i; | |
159 | + | |
160 | + for (i = 0; i < aiocb->aio_niov; ++i) { | |
161 | + memcpy(p, aiocb->aio_iov[i].iov_base, aiocb->aio_iov[i].iov_len); | |
162 | + p += aiocb->aio_iov[i].iov_len; | |
163 | + } | |
164 | + } | |
165 | + | |
166 | + nbytes = handle_aiocb_rw_linear(aiocb, buf); | |
167 | + if (aiocb->aio_type != QEMU_PAIO_WRITE) { | |
168 | + char *p = buf; | |
169 | + size_t count = aiocb->aio_nbytes, copy; | |
170 | + int i; | |
171 | + | |
172 | + for (i = 0; i < aiocb->aio_niov && count; ++i) { | |
173 | + copy = count; | |
174 | + if (copy > aiocb->aio_iov[i].iov_len) | |
175 | + copy = aiocb->aio_iov[i].iov_len; | |
176 | + memcpy(aiocb->aio_iov[i].iov_base, p, copy); | |
177 | + p += copy; | |
178 | + count -= copy; | |
179 | + } | |
180 | + } | |
181 | + qemu_vfree(buf); | |
182 | + | |
183 | + return nbytes; | |
118 | 184 | } |
119 | 185 | |
120 | 186 | static void *aio_thread(void *unused) |
... | ... | @@ -157,7 +223,7 @@ static void *aio_thread(void *unused) |
157 | 223 | switch (aiocb->aio_type) { |
158 | 224 | case QEMU_PAIO_READ: |
159 | 225 | case QEMU_PAIO_WRITE: |
160 | - ret = handle_aiocb_readwrite(aiocb); | |
226 | + ret = handle_aiocb_rw(aiocb); | |
161 | 227 | break; |
162 | 228 | case QEMU_PAIO_IOCTL: |
163 | 229 | ret = handle_aiocb_ioctl(aiocb); | ... | ... |
posix-aio-compat.h
... | ... | @@ -27,11 +27,18 @@ |
27 | 27 | struct qemu_paiocb |
28 | 28 | { |
29 | 29 | int aio_fildes; |
30 | - void *aio_buf; | |
30 | + union { | |
31 | + struct iovec *aio_iov; | |
32 | + void *aio_ioctl_buf; | |
33 | + }; | |
34 | + int aio_niov; | |
31 | 35 | size_t aio_nbytes; |
32 | 36 | #define aio_ioctl_cmd aio_nbytes /* for QEMU_PAIO_IOCTL */ |
33 | 37 | int ev_signo; |
34 | 38 | off_t aio_offset; |
39 | + unsigned aio_flags; | |
40 | +/* 512 byte alignment required for buffer, offset and length */ | |
41 | +#define QEMU_AIO_SECTOR_ALIGNED 0x01 | |
35 | 42 | |
36 | 43 | /* private */ |
37 | 44 | TAILQ_ENTRY(qemu_paiocb) node; | ... | ... |