Commit f141eafe286c785f7e2c1e312a73f90d66bdfb90
1 parent
c87c0672
push down vector linearization to posix-aio-compat.c (Christoph Hellwig)
Make all AIO requests vectored and defer linearization until the actual I/O thread. This prepares for using native preadv/pwritev. Also enables asynchronous direct I/O by handling that case in the I/O thread. Qcow and qcow2 propably want to be adopted to directly deal with multi-segment requests, but that can be implemented later. Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Anthony Liguori <aliguori@us.ibm.com> git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@7020 c046a42c-6fe2-441c-8c8c-71466251a162
Showing
7 changed files
with
317 additions
and
284 deletions
block-qcow.c
| ... | ... | @@ -525,7 +525,9 @@ static int qcow_write(BlockDriverState *bs, int64_t sector_num, |
| 525 | 525 | typedef struct QCowAIOCB { |
| 526 | 526 | BlockDriverAIOCB common; |
| 527 | 527 | int64_t sector_num; |
| 528 | + QEMUIOVector *qiov; | |
| 528 | 529 | uint8_t *buf; |
| 530 | + void *orig_buf; | |
| 529 | 531 | int nb_sectors; |
| 530 | 532 | int n; |
| 531 | 533 | uint64_t cluster_offset; |
| ... | ... | @@ -543,12 +545,8 @@ static void qcow_aio_read_cb(void *opaque, int ret) |
| 543 | 545 | int index_in_cluster; |
| 544 | 546 | |
| 545 | 547 | acb->hd_aiocb = NULL; |
| 546 | - if (ret < 0) { | |
| 547 | - fail: | |
| 548 | - acb->common.cb(acb->common.opaque, ret); | |
| 549 | - qemu_aio_release(acb); | |
| 550 | - return; | |
| 551 | - } | |
| 548 | + if (ret < 0) | |
| 549 | + goto done; | |
| 552 | 550 | |
| 553 | 551 | redo: |
| 554 | 552 | /* post process the read buffer */ |
| ... | ... | @@ -570,9 +568,8 @@ static void qcow_aio_read_cb(void *opaque, int ret) |
| 570 | 568 | |
| 571 | 569 | if (acb->nb_sectors == 0) { |
| 572 | 570 | /* request completed */ |
| 573 | - acb->common.cb(acb->common.opaque, 0); | |
| 574 | - qemu_aio_release(acb); | |
| 575 | - return; | |
| 571 | + ret = 0; | |
| 572 | + goto done; | |
| 576 | 573 | } |
| 577 | 574 | |
| 578 | 575 | /* prepare next AIO request */ |
| ... | ... | @@ -592,7 +589,7 @@ static void qcow_aio_read_cb(void *opaque, int ret) |
| 592 | 589 | acb->hd_aiocb = bdrv_aio_readv(bs->backing_hd, acb->sector_num, |
| 593 | 590 | &acb->hd_qiov, acb->n, qcow_aio_read_cb, acb); |
| 594 | 591 | if (acb->hd_aiocb == NULL) |
| 595 | - goto fail; | |
| 592 | + goto done; | |
| 596 | 593 | } else { |
| 597 | 594 | /* Note: in this case, no need to wait */ |
| 598 | 595 | memset(acb->buf, 0, 512 * acb->n); |
| ... | ... | @@ -601,14 +598,14 @@ static void qcow_aio_read_cb(void *opaque, int ret) |
| 601 | 598 | } else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) { |
| 602 | 599 | /* add AIO support for compressed blocks ? */ |
| 603 | 600 | if (decompress_cluster(s, acb->cluster_offset) < 0) |
| 604 | - goto fail; | |
| 601 | + goto done; | |
| 605 | 602 | memcpy(acb->buf, |
| 606 | 603 | s->cluster_cache + index_in_cluster * 512, 512 * acb->n); |
| 607 | 604 | goto redo; |
| 608 | 605 | } else { |
| 609 | 606 | if ((acb->cluster_offset & 511) != 0) { |
| 610 | 607 | ret = -EIO; |
| 611 | - goto fail; | |
| 608 | + goto done; | |
| 612 | 609 | } |
| 613 | 610 | acb->hd_iov.iov_base = acb->buf; |
| 614 | 611 | acb->hd_iov.iov_len = acb->n * 512; |
| ... | ... | @@ -617,12 +614,22 @@ static void qcow_aio_read_cb(void *opaque, int ret) |
| 617 | 614 | (acb->cluster_offset >> 9) + index_in_cluster, |
| 618 | 615 | &acb->hd_qiov, acb->n, qcow_aio_read_cb, acb); |
| 619 | 616 | if (acb->hd_aiocb == NULL) |
| 620 | - goto fail; | |
| 617 | + goto done; | |
| 618 | + } | |
| 619 | + | |
| 620 | + return; | |
| 621 | + | |
| 622 | +done: | |
| 623 | + if (acb->qiov->niov > 1) { | |
| 624 | + qemu_iovec_from_buffer(acb->qiov, acb->orig_buf, acb->qiov->size); | |
| 625 | + qemu_vfree(acb->orig_buf); | |
| 621 | 626 | } |
| 627 | + acb->common.cb(acb->common.opaque, ret); | |
| 628 | + qemu_aio_release(acb); | |
| 622 | 629 | } |
| 623 | 630 | |
| 624 | -static BlockDriverAIOCB *qcow_aio_read(BlockDriverState *bs, | |
| 625 | - int64_t sector_num, uint8_t *buf, int nb_sectors, | |
| 631 | +static BlockDriverAIOCB *qcow_aio_readv(BlockDriverState *bs, | |
| 632 | + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, | |
| 626 | 633 | BlockDriverCompletionFunc *cb, void *opaque) |
| 627 | 634 | { |
| 628 | 635 | QCowAIOCB *acb; |
| ... | ... | @@ -632,7 +639,11 @@ static BlockDriverAIOCB *qcow_aio_read(BlockDriverState *bs, |
| 632 | 639 | return NULL; |
| 633 | 640 | acb->hd_aiocb = NULL; |
| 634 | 641 | acb->sector_num = sector_num; |
| 635 | - acb->buf = buf; | |
| 642 | + acb->qiov = qiov; | |
| 643 | + if (qiov->niov > 1) | |
| 644 | + acb->buf = acb->orig_buf = qemu_memalign(512, qiov->size); | |
| 645 | + else | |
| 646 | + acb->buf = qiov->iov->iov_base; | |
| 636 | 647 | acb->nb_sectors = nb_sectors; |
| 637 | 648 | acb->n = 0; |
| 638 | 649 | acb->cluster_offset = 0; |
| ... | ... | @@ -652,12 +663,8 @@ static void qcow_aio_write_cb(void *opaque, int ret) |
| 652 | 663 | |
| 653 | 664 | acb->hd_aiocb = NULL; |
| 654 | 665 | |
| 655 | - if (ret < 0) { | |
| 656 | - fail: | |
| 657 | - acb->common.cb(acb->common.opaque, ret); | |
| 658 | - qemu_aio_release(acb); | |
| 659 | - return; | |
| 660 | - } | |
| 666 | + if (ret < 0) | |
| 667 | + goto done; | |
| 661 | 668 | |
| 662 | 669 | acb->nb_sectors -= acb->n; |
| 663 | 670 | acb->sector_num += acb->n; |
| ... | ... | @@ -665,9 +672,8 @@ static void qcow_aio_write_cb(void *opaque, int ret) |
| 665 | 672 | |
| 666 | 673 | if (acb->nb_sectors == 0) { |
| 667 | 674 | /* request completed */ |
| 668 | - acb->common.cb(acb->common.opaque, 0); | |
| 669 | - qemu_aio_release(acb); | |
| 670 | - return; | |
| 675 | + ret = 0; | |
| 676 | + goto done; | |
| 671 | 677 | } |
| 672 | 678 | |
| 673 | 679 | index_in_cluster = acb->sector_num & (s->cluster_sectors - 1); |
| ... | ... | @@ -679,14 +685,14 @@ static void qcow_aio_write_cb(void *opaque, int ret) |
| 679 | 685 | index_in_cluster + acb->n); |
| 680 | 686 | if (!cluster_offset || (cluster_offset & 511) != 0) { |
| 681 | 687 | ret = -EIO; |
| 682 | - goto fail; | |
| 688 | + goto done; | |
| 683 | 689 | } |
| 684 | 690 | if (s->crypt_method) { |
| 685 | 691 | if (!acb->cluster_data) { |
| 686 | 692 | acb->cluster_data = qemu_mallocz(s->cluster_size); |
| 687 | 693 | if (!acb->cluster_data) { |
| 688 | 694 | ret = -ENOMEM; |
| 689 | - goto fail; | |
| 695 | + goto done; | |
| 690 | 696 | } |
| 691 | 697 | } |
| 692 | 698 | encrypt_sectors(s, acb->sector_num, acb->cluster_data, acb->buf, |
| ... | ... | @@ -704,11 +710,18 @@ static void qcow_aio_write_cb(void *opaque, int ret) |
| 704 | 710 | &acb->hd_qiov, acb->n, |
| 705 | 711 | qcow_aio_write_cb, acb); |
| 706 | 712 | if (acb->hd_aiocb == NULL) |
| 707 | - goto fail; | |
| 713 | + goto done; | |
| 714 | + return; | |
| 715 | + | |
| 716 | +done: | |
| 717 | + if (acb->qiov->niov > 1) | |
| 718 | + qemu_vfree(acb->orig_buf); | |
| 719 | + acb->common.cb(acb->common.opaque, ret); | |
| 720 | + qemu_aio_release(acb); | |
| 708 | 721 | } |
| 709 | 722 | |
| 710 | -static BlockDriverAIOCB *qcow_aio_write(BlockDriverState *bs, | |
| 711 | - int64_t sector_num, const uint8_t *buf, int nb_sectors, | |
| 723 | +static BlockDriverAIOCB *qcow_aio_writev(BlockDriverState *bs, | |
| 724 | + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, | |
| 712 | 725 | BlockDriverCompletionFunc *cb, void *opaque) |
| 713 | 726 | { |
| 714 | 727 | BDRVQcowState *s = bs->opaque; |
| ... | ... | @@ -721,7 +734,12 @@ static BlockDriverAIOCB *qcow_aio_write(BlockDriverState *bs, |
| 721 | 734 | return NULL; |
| 722 | 735 | acb->hd_aiocb = NULL; |
| 723 | 736 | acb->sector_num = sector_num; |
| 724 | - acb->buf = (uint8_t *)buf; | |
| 737 | + acb->qiov = qiov; | |
| 738 | + if (qiov->niov > 1) { | |
| 739 | + acb->buf = acb->orig_buf = qemu_memalign(512, qiov->size); | |
| 740 | + qemu_iovec_to_buffer(qiov, acb->buf); | |
| 741 | + } else | |
| 742 | + acb->buf = qiov->iov->iov_base; | |
| 725 | 743 | acb->nb_sectors = nb_sectors; |
| 726 | 744 | acb->n = 0; |
| 727 | 745 | |
| ... | ... | @@ -909,8 +927,8 @@ BlockDriver bdrv_qcow = { |
| 909 | 927 | .bdrv_is_allocated = qcow_is_allocated, |
| 910 | 928 | .bdrv_set_key = qcow_set_key, |
| 911 | 929 | .bdrv_make_empty = qcow_make_empty, |
| 912 | - .bdrv_aio_read = qcow_aio_read, | |
| 913 | - .bdrv_aio_write = qcow_aio_write, | |
| 930 | + .bdrv_aio_readv = qcow_aio_readv, | |
| 931 | + .bdrv_aio_writev = qcow_aio_writev, | |
| 914 | 932 | .bdrv_aio_cancel = qcow_aio_cancel, |
| 915 | 933 | .aiocb_size = sizeof(QCowAIOCB), |
| 916 | 934 | .bdrv_write_compressed = qcow_write_compressed, | ... | ... |
block-qcow2.c
| ... | ... | @@ -1264,7 +1264,9 @@ static int qcow_write(BlockDriverState *bs, int64_t sector_num, |
| 1264 | 1264 | typedef struct QCowAIOCB { |
| 1265 | 1265 | BlockDriverAIOCB common; |
| 1266 | 1266 | int64_t sector_num; |
| 1267 | + QEMUIOVector *qiov; | |
| 1267 | 1268 | uint8_t *buf; |
| 1269 | + void *orig_buf; | |
| 1268 | 1270 | int nb_sectors; |
| 1269 | 1271 | int n; |
| 1270 | 1272 | uint64_t cluster_offset; |
| ... | ... | @@ -1307,12 +1309,8 @@ static void qcow_aio_read_cb(void *opaque, int ret) |
| 1307 | 1309 | int index_in_cluster, n1; |
| 1308 | 1310 | |
| 1309 | 1311 | acb->hd_aiocb = NULL; |
| 1310 | - if (ret < 0) { | |
| 1311 | -fail: | |
| 1312 | - acb->common.cb(acb->common.opaque, ret); | |
| 1313 | - qemu_aio_release(acb); | |
| 1314 | - return; | |
| 1315 | - } | |
| 1312 | + if (ret < 0) | |
| 1313 | + goto done; | |
| 1316 | 1314 | |
| 1317 | 1315 | /* post process the read buffer */ |
| 1318 | 1316 | if (!acb->cluster_offset) { |
| ... | ... | @@ -1333,9 +1331,8 @@ fail: |
| 1333 | 1331 | |
| 1334 | 1332 | if (acb->nb_sectors == 0) { |
| 1335 | 1333 | /* request completed */ |
| 1336 | - acb->common.cb(acb->common.opaque, 0); | |
| 1337 | - qemu_aio_release(acb); | |
| 1338 | - return; | |
| 1334 | + ret = 0; | |
| 1335 | + goto done; | |
| 1339 | 1336 | } |
| 1340 | 1337 | |
| 1341 | 1338 | /* prepare next AIO request */ |
| ... | ... | @@ -1356,32 +1353,32 @@ fail: |
| 1356 | 1353 | &acb->hd_qiov, acb->n, |
| 1357 | 1354 | qcow_aio_read_cb, acb); |
| 1358 | 1355 | if (acb->hd_aiocb == NULL) |
| 1359 | - goto fail; | |
| 1356 | + goto done; | |
| 1360 | 1357 | } else { |
| 1361 | 1358 | ret = qcow_schedule_bh(qcow_aio_read_bh, acb); |
| 1362 | 1359 | if (ret < 0) |
| 1363 | - goto fail; | |
| 1360 | + goto done; | |
| 1364 | 1361 | } |
| 1365 | 1362 | } else { |
| 1366 | 1363 | /* Note: in this case, no need to wait */ |
| 1367 | 1364 | memset(acb->buf, 0, 512 * acb->n); |
| 1368 | 1365 | ret = qcow_schedule_bh(qcow_aio_read_bh, acb); |
| 1369 | 1366 | if (ret < 0) |
| 1370 | - goto fail; | |
| 1367 | + goto done; | |
| 1371 | 1368 | } |
| 1372 | 1369 | } else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) { |
| 1373 | 1370 | /* add AIO support for compressed blocks ? */ |
| 1374 | 1371 | if (decompress_cluster(s, acb->cluster_offset) < 0) |
| 1375 | - goto fail; | |
| 1372 | + goto done; | |
| 1376 | 1373 | memcpy(acb->buf, |
| 1377 | 1374 | s->cluster_cache + index_in_cluster * 512, 512 * acb->n); |
| 1378 | 1375 | ret = qcow_schedule_bh(qcow_aio_read_bh, acb); |
| 1379 | 1376 | if (ret < 0) |
| 1380 | - goto fail; | |
| 1377 | + goto done; | |
| 1381 | 1378 | } else { |
| 1382 | 1379 | if ((acb->cluster_offset & 511) != 0) { |
| 1383 | 1380 | ret = -EIO; |
| 1384 | - goto fail; | |
| 1381 | + goto done; | |
| 1385 | 1382 | } |
| 1386 | 1383 | |
| 1387 | 1384 | acb->hd_iov.iov_base = acb->buf; |
| ... | ... | @@ -1391,13 +1388,22 @@ fail: |
| 1391 | 1388 | (acb->cluster_offset >> 9) + index_in_cluster, |
| 1392 | 1389 | &acb->hd_qiov, acb->n, qcow_aio_read_cb, acb); |
| 1393 | 1390 | if (acb->hd_aiocb == NULL) |
| 1394 | - goto fail; | |
| 1391 | + goto done; | |
| 1392 | + } | |
| 1393 | + | |
| 1394 | + return; | |
| 1395 | +done: | |
| 1396 | + if (acb->qiov->niov > 1) { | |
| 1397 | + qemu_iovec_from_buffer(acb->qiov, acb->orig_buf, acb->qiov->size); | |
| 1398 | + qemu_vfree(acb->orig_buf); | |
| 1395 | 1399 | } |
| 1400 | + acb->common.cb(acb->common.opaque, ret); | |
| 1401 | + qemu_aio_release(acb); | |
| 1396 | 1402 | } |
| 1397 | 1403 | |
| 1398 | 1404 | static QCowAIOCB *qcow_aio_setup(BlockDriverState *bs, |
| 1399 | - int64_t sector_num, uint8_t *buf, int nb_sectors, | |
| 1400 | - BlockDriverCompletionFunc *cb, void *opaque) | |
| 1405 | + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, | |
| 1406 | + BlockDriverCompletionFunc *cb, void *opaque, int is_write) | |
| 1401 | 1407 | { |
| 1402 | 1408 | QCowAIOCB *acb; |
| 1403 | 1409 | |
| ... | ... | @@ -1406,7 +1412,13 @@ static QCowAIOCB *qcow_aio_setup(BlockDriverState *bs, |
| 1406 | 1412 | return NULL; |
| 1407 | 1413 | acb->hd_aiocb = NULL; |
| 1408 | 1414 | acb->sector_num = sector_num; |
| 1409 | - acb->buf = buf; | |
| 1415 | + acb->qiov = qiov; | |
| 1416 | + if (qiov->niov > 1) { | |
| 1417 | + acb->buf = acb->orig_buf = qemu_memalign(512, qiov->size); | |
| 1418 | + if (is_write) | |
| 1419 | + qemu_iovec_to_buffer(qiov, acb->buf); | |
| 1420 | + } else | |
| 1421 | + acb->buf = qiov->iov->iov_base; | |
| 1410 | 1422 | acb->nb_sectors = nb_sectors; |
| 1411 | 1423 | acb->n = 0; |
| 1412 | 1424 | acb->cluster_offset = 0; |
| ... | ... | @@ -1414,13 +1426,13 @@ static QCowAIOCB *qcow_aio_setup(BlockDriverState *bs, |
| 1414 | 1426 | return acb; |
| 1415 | 1427 | } |
| 1416 | 1428 | |
| 1417 | -static BlockDriverAIOCB *qcow_aio_read(BlockDriverState *bs, | |
| 1418 | - int64_t sector_num, uint8_t *buf, int nb_sectors, | |
| 1429 | +static BlockDriverAIOCB *qcow_aio_readv(BlockDriverState *bs, | |
| 1430 | + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, | |
| 1419 | 1431 | BlockDriverCompletionFunc *cb, void *opaque) |
| 1420 | 1432 | { |
| 1421 | 1433 | QCowAIOCB *acb; |
| 1422 | 1434 | |
| 1423 | - acb = qcow_aio_setup(bs, sector_num, buf, nb_sectors, cb, opaque); | |
| 1435 | + acb = qcow_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 0); | |
| 1424 | 1436 | if (!acb) |
| 1425 | 1437 | return NULL; |
| 1426 | 1438 | |
| ... | ... | @@ -1439,16 +1451,12 @@ static void qcow_aio_write_cb(void *opaque, int ret) |
| 1439 | 1451 | |
| 1440 | 1452 | acb->hd_aiocb = NULL; |
| 1441 | 1453 | |
| 1442 | - if (ret < 0) { | |
| 1443 | - fail: | |
| 1444 | - acb->common.cb(acb->common.opaque, ret); | |
| 1445 | - qemu_aio_release(acb); | |
| 1446 | - return; | |
| 1447 | - } | |
| 1454 | + if (ret < 0) | |
| 1455 | + goto done; | |
| 1448 | 1456 | |
| 1449 | 1457 | if (alloc_cluster_link_l2(bs, acb->cluster_offset, &acb->l2meta) < 0) { |
| 1450 | 1458 | free_any_clusters(bs, acb->cluster_offset, acb->l2meta.nb_clusters); |
| 1451 | - goto fail; | |
| 1459 | + goto done; | |
| 1452 | 1460 | } |
| 1453 | 1461 | |
| 1454 | 1462 | acb->nb_sectors -= acb->n; |
| ... | ... | @@ -1457,9 +1465,8 @@ static void qcow_aio_write_cb(void *opaque, int ret) |
| 1457 | 1465 | |
| 1458 | 1466 | if (acb->nb_sectors == 0) { |
| 1459 | 1467 | /* request completed */ |
| 1460 | - acb->common.cb(acb->common.opaque, 0); | |
| 1461 | - qemu_aio_release(acb); | |
| 1462 | - return; | |
| 1468 | + ret = 0; | |
| 1469 | + goto done; | |
| 1463 | 1470 | } |
| 1464 | 1471 | |
| 1465 | 1472 | index_in_cluster = acb->sector_num & (s->cluster_sectors - 1); |
| ... | ... | @@ -1473,7 +1480,7 @@ static void qcow_aio_write_cb(void *opaque, int ret) |
| 1473 | 1480 | n_end, &acb->n, &acb->l2meta); |
| 1474 | 1481 | if (!acb->cluster_offset || (acb->cluster_offset & 511) != 0) { |
| 1475 | 1482 | ret = -EIO; |
| 1476 | - goto fail; | |
| 1483 | + goto done; | |
| 1477 | 1484 | } |
| 1478 | 1485 | if (s->crypt_method) { |
| 1479 | 1486 | if (!acb->cluster_data) { |
| ... | ... | @@ -1494,11 +1501,19 @@ static void qcow_aio_write_cb(void *opaque, int ret) |
| 1494 | 1501 | &acb->hd_qiov, acb->n, |
| 1495 | 1502 | qcow_aio_write_cb, acb); |
| 1496 | 1503 | if (acb->hd_aiocb == NULL) |
| 1497 | - goto fail; | |
| 1504 | + goto done; | |
| 1505 | + | |
| 1506 | + return; | |
| 1507 | + | |
| 1508 | +done: | |
| 1509 | + if (acb->qiov->niov > 1) | |
| 1510 | + qemu_vfree(acb->orig_buf); | |
| 1511 | + acb->common.cb(acb->common.opaque, ret); | |
| 1512 | + qemu_aio_release(acb); | |
| 1498 | 1513 | } |
| 1499 | 1514 | |
| 1500 | -static BlockDriverAIOCB *qcow_aio_write(BlockDriverState *bs, | |
| 1501 | - int64_t sector_num, const uint8_t *buf, int nb_sectors, | |
| 1515 | +static BlockDriverAIOCB *qcow_aio_writev(BlockDriverState *bs, | |
| 1516 | + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, | |
| 1502 | 1517 | BlockDriverCompletionFunc *cb, void *opaque) |
| 1503 | 1518 | { |
| 1504 | 1519 | BDRVQcowState *s = bs->opaque; |
| ... | ... | @@ -1506,7 +1521,7 @@ static BlockDriverAIOCB *qcow_aio_write(BlockDriverState *bs, |
| 1506 | 1521 | |
| 1507 | 1522 | s->cluster_cache_offset = -1; /* disable compressed cache */ |
| 1508 | 1523 | |
| 1509 | - acb = qcow_aio_setup(bs, sector_num, (uint8_t*)buf, nb_sectors, cb, opaque); | |
| 1524 | + acb = qcow_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 1); | |
| 1510 | 1525 | if (!acb) |
| 1511 | 1526 | return NULL; |
| 1512 | 1527 | |
| ... | ... | @@ -2771,8 +2786,8 @@ BlockDriver bdrv_qcow2 = { |
| 2771 | 2786 | .bdrv_set_key = qcow_set_key, |
| 2772 | 2787 | .bdrv_make_empty = qcow_make_empty, |
| 2773 | 2788 | |
| 2774 | - .bdrv_aio_read = qcow_aio_read, | |
| 2775 | - .bdrv_aio_write = qcow_aio_write, | |
| 2789 | + .bdrv_aio_readv = qcow_aio_readv, | |
| 2790 | + .bdrv_aio_writev = qcow_aio_writev, | |
| 2776 | 2791 | .bdrv_aio_cancel = qcow_aio_cancel, |
| 2777 | 2792 | .aiocb_size = sizeof(QCowAIOCB), |
| 2778 | 2793 | .bdrv_write_compressed = qcow_write_compressed, | ... | ... |
block-raw-posix.c
| ... | ... | @@ -599,8 +599,8 @@ static int posix_aio_init(void) |
| 599 | 599 | return 0; |
| 600 | 600 | } |
| 601 | 601 | |
| 602 | -static RawAIOCB *raw_aio_setup(BlockDriverState *bs, | |
| 603 | - int64_t sector_num, uint8_t *buf, int nb_sectors, | |
| 602 | +static RawAIOCB *raw_aio_setup(BlockDriverState *bs, int64_t sector_num, | |
| 603 | + QEMUIOVector *qiov, int nb_sectors, | |
| 604 | 604 | BlockDriverCompletionFunc *cb, void *opaque) |
| 605 | 605 | { |
| 606 | 606 | BDRVRawState *s = bs->opaque; |
| ... | ... | @@ -614,24 +614,25 @@ static RawAIOCB *raw_aio_setup(BlockDriverState *bs, |
| 614 | 614 | return NULL; |
| 615 | 615 | acb->aiocb.aio_fildes = s->fd; |
| 616 | 616 | acb->aiocb.ev_signo = SIGUSR2; |
| 617 | - acb->aiocb.aio_buf = buf; | |
| 618 | - if (nb_sectors < 0) | |
| 619 | - acb->aiocb.aio_nbytes = -nb_sectors; | |
| 620 | - else | |
| 621 | - acb->aiocb.aio_nbytes = nb_sectors * 512; | |
| 617 | + acb->aiocb.aio_iov = qiov->iov; | |
| 618 | + acb->aiocb.aio_niov = qiov->niov; | |
| 619 | + acb->aiocb.aio_nbytes = nb_sectors * 512; | |
| 622 | 620 | acb->aiocb.aio_offset = sector_num * 512; |
| 621 | + acb->aiocb.aio_flags = 0; | |
| 622 | + | |
| 623 | + /* | |
| 624 | + * If O_DIRECT is used the buffer needs to be aligned on a sector | |
| 625 | + * boundary. Tell the low level code to ensure that in case it's | |
| 626 | + * not done yet. | |
| 627 | + */ | |
| 628 | + if (s->aligned_buf) | |
| 629 | + acb->aiocb.aio_flags |= QEMU_AIO_SECTOR_ALIGNED; | |
| 630 | + | |
| 623 | 631 | acb->next = posix_aio_state->first_aio; |
| 624 | 632 | posix_aio_state->first_aio = acb; |
| 625 | 633 | return acb; |
| 626 | 634 | } |
| 627 | 635 | |
| 628 | -static void raw_aio_em_cb(void* opaque) | |
| 629 | -{ | |
| 630 | - RawAIOCB *acb = opaque; | |
| 631 | - acb->common.cb(acb->common.opaque, acb->ret); | |
| 632 | - qemu_aio_release(acb); | |
| 633 | -} | |
| 634 | - | |
| 635 | 636 | static void raw_aio_remove(RawAIOCB *acb) |
| 636 | 637 | { |
| 637 | 638 | RawAIOCB **pacb; |
| ... | ... | @@ -651,28 +652,13 @@ static void raw_aio_remove(RawAIOCB *acb) |
| 651 | 652 | } |
| 652 | 653 | } |
| 653 | 654 | |
| 654 | -static BlockDriverAIOCB *raw_aio_read(BlockDriverState *bs, | |
| 655 | - int64_t sector_num, uint8_t *buf, int nb_sectors, | |
| 655 | +static BlockDriverAIOCB *raw_aio_readv(BlockDriverState *bs, | |
| 656 | + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, | |
| 656 | 657 | BlockDriverCompletionFunc *cb, void *opaque) |
| 657 | 658 | { |
| 658 | 659 | RawAIOCB *acb; |
| 659 | 660 | |
| 660 | - /* | |
| 661 | - * If O_DIRECT is used and the buffer is not aligned fall back | |
| 662 | - * to synchronous IO. | |
| 663 | - */ | |
| 664 | - BDRVRawState *s = bs->opaque; | |
| 665 | - | |
| 666 | - if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) { | |
| 667 | - QEMUBH *bh; | |
| 668 | - acb = qemu_aio_get(bs, cb, opaque); | |
| 669 | - acb->ret = raw_pread(bs, 512 * sector_num, buf, 512 * nb_sectors); | |
| 670 | - bh = qemu_bh_new(raw_aio_em_cb, acb); | |
| 671 | - qemu_bh_schedule(bh); | |
| 672 | - return &acb->common; | |
| 673 | - } | |
| 674 | - | |
| 675 | - acb = raw_aio_setup(bs, sector_num, buf, nb_sectors, cb, opaque); | |
| 661 | + acb = raw_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque); | |
| 676 | 662 | if (!acb) |
| 677 | 663 | return NULL; |
| 678 | 664 | if (qemu_paio_read(&acb->aiocb) < 0) { |
| ... | ... | @@ -682,28 +668,13 @@ static BlockDriverAIOCB *raw_aio_read(BlockDriverState *bs, |
| 682 | 668 | return &acb->common; |
| 683 | 669 | } |
| 684 | 670 | |
| 685 | -static BlockDriverAIOCB *raw_aio_write(BlockDriverState *bs, | |
| 686 | - int64_t sector_num, const uint8_t *buf, int nb_sectors, | |
| 671 | +static BlockDriverAIOCB *raw_aio_writev(BlockDriverState *bs, | |
| 672 | + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, | |
| 687 | 673 | BlockDriverCompletionFunc *cb, void *opaque) |
| 688 | 674 | { |
| 689 | 675 | RawAIOCB *acb; |
| 690 | 676 | |
| 691 | - /* | |
| 692 | - * If O_DIRECT is used and the buffer is not aligned fall back | |
| 693 | - * to synchronous IO. | |
| 694 | - */ | |
| 695 | - BDRVRawState *s = bs->opaque; | |
| 696 | - | |
| 697 | - if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) { | |
| 698 | - QEMUBH *bh; | |
| 699 | - acb = qemu_aio_get(bs, cb, opaque); | |
| 700 | - acb->ret = raw_pwrite(bs, 512 * sector_num, buf, 512 * nb_sectors); | |
| 701 | - bh = qemu_bh_new(raw_aio_em_cb, acb); | |
| 702 | - qemu_bh_schedule(bh); | |
| 703 | - return &acb->common; | |
| 704 | - } | |
| 705 | - | |
| 706 | - acb = raw_aio_setup(bs, sector_num, (uint8_t*)buf, nb_sectors, cb, opaque); | |
| 677 | + acb = raw_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque); | |
| 707 | 678 | if (!acb) |
| 708 | 679 | return NULL; |
| 709 | 680 | if (qemu_paio_write(&acb->aiocb) < 0) { |
| ... | ... | @@ -887,8 +858,8 @@ BlockDriver bdrv_raw = { |
| 887 | 858 | .bdrv_flush = raw_flush, |
| 888 | 859 | |
| 889 | 860 | #ifdef CONFIG_AIO |
| 890 | - .bdrv_aio_read = raw_aio_read, | |
| 891 | - .bdrv_aio_write = raw_aio_write, | |
| 861 | + .bdrv_aio_readv = raw_aio_readv, | |
| 862 | + .bdrv_aio_writev = raw_aio_writev, | |
| 892 | 863 | .bdrv_aio_cancel = raw_aio_cancel, |
| 893 | 864 | .aiocb_size = sizeof(RawAIOCB), |
| 894 | 865 | #endif |
| ... | ... | @@ -1215,12 +1186,24 @@ static BlockDriverAIOCB *raw_aio_ioctl(BlockDriverState *bs, |
| 1215 | 1186 | unsigned long int req, void *buf, |
| 1216 | 1187 | BlockDriverCompletionFunc *cb, void *opaque) |
| 1217 | 1188 | { |
| 1189 | + BDRVRawState *s = bs->opaque; | |
| 1218 | 1190 | RawAIOCB *acb; |
| 1219 | 1191 | |
| 1220 | - acb = raw_aio_setup(bs, 0, buf, 0, cb, opaque); | |
| 1192 | + if (fd_open(bs) < 0) | |
| 1193 | + return NULL; | |
| 1194 | + | |
| 1195 | + acb = qemu_aio_get(bs, cb, opaque); | |
| 1221 | 1196 | if (!acb) |
| 1222 | 1197 | return NULL; |
| 1198 | + acb->aiocb.aio_fildes = s->fd; | |
| 1199 | + acb->aiocb.ev_signo = SIGUSR2; | |
| 1200 | + acb->aiocb.aio_offset = 0; | |
| 1201 | + acb->aiocb.aio_flags = 0; | |
| 1202 | + | |
| 1203 | + acb->next = posix_aio_state->first_aio; | |
| 1204 | + posix_aio_state->first_aio = acb; | |
| 1223 | 1205 | |
| 1206 | + acb->aiocb.aio_ioctl_buf = buf; | |
| 1224 | 1207 | acb->aiocb.aio_ioctl_cmd = req; |
| 1225 | 1208 | if (qemu_paio_ioctl(&acb->aiocb) < 0) { |
| 1226 | 1209 | raw_aio_remove(acb); |
| ... | ... | @@ -1424,8 +1407,8 @@ BlockDriver bdrv_host_device = { |
| 1424 | 1407 | .bdrv_flush = raw_flush, |
| 1425 | 1408 | |
| 1426 | 1409 | #ifdef CONFIG_AIO |
| 1427 | - .bdrv_aio_read = raw_aio_read, | |
| 1428 | - .bdrv_aio_write = raw_aio_write, | |
| 1410 | + .bdrv_aio_readv = raw_aio_readv, | |
| 1411 | + .bdrv_aio_writev = raw_aio_writev, | |
| 1429 | 1412 | .bdrv_aio_cancel = raw_aio_cancel, |
| 1430 | 1413 | .aiocb_size = sizeof(RawAIOCB), |
| 1431 | 1414 | #endif | ... | ... |
block.c
| ... | ... | @@ -47,25 +47,21 @@ |
| 47 | 47 | #define SECTOR_BITS 9 |
| 48 | 48 | #define SECTOR_SIZE (1 << SECTOR_BITS) |
| 49 | 49 | |
| 50 | -static AIOPool vectored_aio_pool; | |
| 51 | - | |
| 52 | 50 | typedef struct BlockDriverAIOCBSync { |
| 53 | 51 | BlockDriverAIOCB common; |
| 54 | 52 | QEMUBH *bh; |
| 55 | 53 | int ret; |
| 54 | + /* vector translation state */ | |
| 55 | + QEMUIOVector *qiov; | |
| 56 | + uint8_t *bounce; | |
| 57 | + int is_write; | |
| 56 | 58 | } BlockDriverAIOCBSync; |
| 57 | 59 | |
| 58 | -static BlockDriverAIOCB *bdrv_aio_read(BlockDriverState *bs, | |
| 59 | - int64_t sector_num, uint8_t *buf, int nb_sectors, | |
| 60 | +static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs, | |
| 61 | + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, | |
| 60 | 62 | BlockDriverCompletionFunc *cb, void *opaque); |
| 61 | -static BlockDriverAIOCB *bdrv_aio_write(BlockDriverState *bs, | |
| 62 | - int64_t sector_num, const uint8_t *buf, int nb_sectors, | |
| 63 | - BlockDriverCompletionFunc *cb, void *opaque); | |
| 64 | -static BlockDriverAIOCB *bdrv_aio_read_em(BlockDriverState *bs, | |
| 65 | - int64_t sector_num, uint8_t *buf, int nb_sectors, | |
| 66 | - BlockDriverCompletionFunc *cb, void *opaque); | |
| 67 | -static BlockDriverAIOCB *bdrv_aio_write_em(BlockDriverState *bs, | |
| 68 | - int64_t sector_num, const uint8_t *buf, int nb_sectors, | |
| 63 | +static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs, | |
| 64 | + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, | |
| 69 | 65 | BlockDriverCompletionFunc *cb, void *opaque); |
| 70 | 66 | static void bdrv_aio_cancel_em(BlockDriverAIOCB *acb); |
| 71 | 67 | static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num, |
| ... | ... | @@ -144,10 +140,10 @@ void path_combine(char *dest, int dest_size, |
| 144 | 140 | |
| 145 | 141 | static void bdrv_register(BlockDriver *bdrv) |
| 146 | 142 | { |
| 147 | - if (!bdrv->bdrv_aio_read) { | |
| 143 | + if (!bdrv->bdrv_aio_readv) { | |
| 148 | 144 | /* add AIO emulation layer */ |
| 149 | - bdrv->bdrv_aio_read = bdrv_aio_read_em; | |
| 150 | - bdrv->bdrv_aio_write = bdrv_aio_write_em; | |
| 145 | + bdrv->bdrv_aio_readv = bdrv_aio_readv_em; | |
| 146 | + bdrv->bdrv_aio_writev = bdrv_aio_writev_em; | |
| 151 | 147 | bdrv->bdrv_aio_cancel = bdrv_aio_cancel_em; |
| 152 | 148 | bdrv->aiocb_size = sizeof(BlockDriverAIOCBSync); |
| 153 | 149 | } else if (!bdrv->bdrv_read) { |
| ... | ... | @@ -1295,91 +1291,10 @@ char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn) |
| 1295 | 1291 | /**************************************************************/ |
| 1296 | 1292 | /* async I/Os */ |
| 1297 | 1293 | |
| 1298 | -typedef struct VectorTranslationAIOCB { | |
| 1299 | - BlockDriverAIOCB common; | |
| 1300 | - QEMUIOVector *iov; | |
| 1301 | - uint8_t *bounce; | |
| 1302 | - int is_write; | |
| 1303 | - BlockDriverAIOCB *aiocb; | |
| 1304 | -} VectorTranslationAIOCB; | |
| 1305 | - | |
| 1306 | -static void bdrv_aio_cancel_vector(BlockDriverAIOCB *_acb) | |
| 1307 | -{ | |
| 1308 | - VectorTranslationAIOCB *acb | |
| 1309 | - = container_of(_acb, VectorTranslationAIOCB, common); | |
| 1310 | - | |
| 1311 | - bdrv_aio_cancel(acb->aiocb); | |
| 1312 | -} | |
| 1313 | - | |
| 1314 | -static void bdrv_aio_rw_vector_cb(void *opaque, int ret) | |
| 1315 | -{ | |
| 1316 | - VectorTranslationAIOCB *s = (VectorTranslationAIOCB *)opaque; | |
| 1317 | - | |
| 1318 | - if (!s->is_write) { | |
| 1319 | - qemu_iovec_from_buffer(s->iov, s->bounce, s->iov->size); | |
| 1320 | - } | |
| 1321 | - qemu_vfree(s->bounce); | |
| 1322 | - s->common.cb(s->common.opaque, ret); | |
| 1323 | - qemu_aio_release(s); | |
| 1324 | -} | |
| 1325 | - | |
| 1326 | -static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs, | |
| 1327 | - int64_t sector_num, | |
| 1328 | - QEMUIOVector *iov, | |
| 1329 | - int nb_sectors, | |
| 1330 | - BlockDriverCompletionFunc *cb, | |
| 1331 | - void *opaque, | |
| 1332 | - int is_write) | |
| 1333 | - | |
| 1334 | -{ | |
| 1335 | - VectorTranslationAIOCB *s = qemu_aio_get_pool(&vectored_aio_pool, bs, | |
| 1336 | - cb, opaque); | |
| 1337 | - | |
| 1338 | - s->iov = iov; | |
| 1339 | - s->bounce = qemu_memalign(512, nb_sectors * 512); | |
| 1340 | - s->is_write = is_write; | |
| 1341 | - if (is_write) { | |
| 1342 | - qemu_iovec_to_buffer(s->iov, s->bounce); | |
| 1343 | - s->aiocb = bdrv_aio_write(bs, sector_num, s->bounce, nb_sectors, | |
| 1344 | - bdrv_aio_rw_vector_cb, s); | |
| 1345 | - } else { | |
| 1346 | - s->aiocb = bdrv_aio_read(bs, sector_num, s->bounce, nb_sectors, | |
| 1347 | - bdrv_aio_rw_vector_cb, s); | |
| 1348 | - } | |
| 1349 | - if (!s->aiocb) { | |
| 1350 | - qemu_vfree(s->bounce); | |
| 1351 | - qemu_aio_release(s); | |
| 1352 | - return NULL; | |
| 1353 | - } | |
| 1354 | - return &s->common; | |
| 1355 | -} | |
| 1356 | - | |
| 1357 | 1294 | BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num, |
| 1358 | - QEMUIOVector *iov, int nb_sectors, | |
| 1295 | + QEMUIOVector *qiov, int nb_sectors, | |
| 1359 | 1296 | BlockDriverCompletionFunc *cb, void *opaque) |
| 1360 | 1297 | { |
| 1361 | - if (bdrv_check_request(bs, sector_num, nb_sectors)) | |
| 1362 | - return NULL; | |
| 1363 | - | |
| 1364 | - return bdrv_aio_rw_vector(bs, sector_num, iov, nb_sectors, | |
| 1365 | - cb, opaque, 0); | |
| 1366 | -} | |
| 1367 | - | |
| 1368 | -BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num, | |
| 1369 | - QEMUIOVector *iov, int nb_sectors, | |
| 1370 | - BlockDriverCompletionFunc *cb, void *opaque) | |
| 1371 | -{ | |
| 1372 | - if (bdrv_check_request(bs, sector_num, nb_sectors)) | |
| 1373 | - return NULL; | |
| 1374 | - | |
| 1375 | - return bdrv_aio_rw_vector(bs, sector_num, iov, nb_sectors, | |
| 1376 | - cb, opaque, 1); | |
| 1377 | -} | |
| 1378 | - | |
| 1379 | -static BlockDriverAIOCB *bdrv_aio_read(BlockDriverState *bs, int64_t sector_num, | |
| 1380 | - uint8_t *buf, int nb_sectors, | |
| 1381 | - BlockDriverCompletionFunc *cb, void *opaque) | |
| 1382 | -{ | |
| 1383 | 1298 | BlockDriver *drv = bs->drv; |
| 1384 | 1299 | BlockDriverAIOCB *ret; |
| 1385 | 1300 | |
| ... | ... | @@ -1388,7 +1303,8 @@ static BlockDriverAIOCB *bdrv_aio_read(BlockDriverState *bs, int64_t sector_num, |
| 1388 | 1303 | if (bdrv_check_request(bs, sector_num, nb_sectors)) |
| 1389 | 1304 | return NULL; |
| 1390 | 1305 | |
| 1391 | - ret = drv->bdrv_aio_read(bs, sector_num, buf, nb_sectors, cb, opaque); | |
| 1306 | + ret = drv->bdrv_aio_readv(bs, sector_num, qiov, nb_sectors, | |
| 1307 | + cb, opaque); | |
| 1392 | 1308 | |
| 1393 | 1309 | if (ret) { |
| 1394 | 1310 | /* Update stats even though technically transfer has not happened. */ |
| ... | ... | @@ -1399,9 +1315,9 @@ static BlockDriverAIOCB *bdrv_aio_read(BlockDriverState *bs, int64_t sector_num, |
| 1399 | 1315 | return ret; |
| 1400 | 1316 | } |
| 1401 | 1317 | |
| 1402 | -static BlockDriverAIOCB *bdrv_aio_write(BlockDriverState *bs, int64_t sector_num, | |
| 1403 | - const uint8_t *buf, int nb_sectors, | |
| 1404 | - BlockDriverCompletionFunc *cb, void *opaque) | |
| 1318 | +BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num, | |
| 1319 | + QEMUIOVector *qiov, int nb_sectors, | |
| 1320 | + BlockDriverCompletionFunc *cb, void *opaque) | |
| 1405 | 1321 | { |
| 1406 | 1322 | BlockDriver *drv = bs->drv; |
| 1407 | 1323 | BlockDriverAIOCB *ret; |
| ... | ... | @@ -1413,7 +1329,8 @@ static BlockDriverAIOCB *bdrv_aio_write(BlockDriverState *bs, int64_t sector_num |
| 1413 | 1329 | if (bdrv_check_request(bs, sector_num, nb_sectors)) |
| 1414 | 1330 | return NULL; |
| 1415 | 1331 | |
| 1416 | - ret = drv->bdrv_aio_write(bs, sector_num, buf, nb_sectors, cb, opaque); | |
| 1332 | + ret = drv->bdrv_aio_writev(bs, sector_num, qiov, nb_sectors, | |
| 1333 | + cb, opaque); | |
| 1417 | 1334 | |
| 1418 | 1335 | if (ret) { |
| 1419 | 1336 | /* Update stats even though technically transfer has not happened. */ |
| ... | ... | @@ -1436,42 +1353,62 @@ void bdrv_aio_cancel(BlockDriverAIOCB *acb) |
| 1436 | 1353 | static void bdrv_aio_bh_cb(void *opaque) |
| 1437 | 1354 | { |
| 1438 | 1355 | BlockDriverAIOCBSync *acb = opaque; |
| 1356 | + | |
| 1357 | + qemu_vfree(acb->bounce); | |
| 1358 | + | |
| 1359 | + if (!acb->is_write) | |
| 1360 | + qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size); | |
| 1439 | 1361 | acb->common.cb(acb->common.opaque, acb->ret); |
| 1362 | + | |
| 1440 | 1363 | qemu_aio_release(acb); |
| 1441 | 1364 | } |
| 1442 | 1365 | |
| 1443 | -static BlockDriverAIOCB *bdrv_aio_read_em(BlockDriverState *bs, | |
| 1444 | - int64_t sector_num, uint8_t *buf, int nb_sectors, | |
| 1445 | - BlockDriverCompletionFunc *cb, void *opaque) | |
| 1366 | +static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs, | |
| 1367 | + int64_t sector_num, | |
| 1368 | + QEMUIOVector *qiov, | |
| 1369 | + int nb_sectors, | |
| 1370 | + BlockDriverCompletionFunc *cb, | |
| 1371 | + void *opaque, | |
| 1372 | + int is_write) | |
| 1373 | + | |
| 1446 | 1374 | { |
| 1447 | 1375 | BlockDriverAIOCBSync *acb; |
| 1448 | - int ret; | |
| 1449 | 1376 | |
| 1450 | 1377 | acb = qemu_aio_get(bs, cb, opaque); |
| 1378 | + acb->is_write = is_write; | |
| 1379 | + acb->qiov = qiov; | |
| 1380 | + acb->bounce = qemu_memalign(512, qiov->size); | |
| 1381 | + | |
| 1451 | 1382 | if (!acb->bh) |
| 1452 | 1383 | acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb); |
| 1453 | - ret = bdrv_read(bs, sector_num, buf, nb_sectors); | |
| 1454 | - acb->ret = ret; | |
| 1384 | + | |
| 1385 | + if (is_write) { | |
| 1386 | + qemu_iovec_to_buffer(acb->qiov, acb->bounce); | |
| 1387 | + acb->ret = bdrv_write(bs, sector_num, acb->bounce, nb_sectors); | |
| 1388 | + } else { | |
| 1389 | + acb->ret = bdrv_read(bs, sector_num, acb->bounce, nb_sectors); | |
| 1390 | + } | |
| 1391 | + | |
| 1455 | 1392 | qemu_bh_schedule(acb->bh); |
| 1393 | + | |
| 1456 | 1394 | return &acb->common; |
| 1457 | 1395 | } |
| 1458 | 1396 | |
| 1459 | -static BlockDriverAIOCB *bdrv_aio_write_em(BlockDriverState *bs, | |
| 1460 | - int64_t sector_num, const uint8_t *buf, int nb_sectors, | |
| 1397 | +static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs, | |
| 1398 | + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, | |
| 1461 | 1399 | BlockDriverCompletionFunc *cb, void *opaque) |
| 1462 | 1400 | { |
| 1463 | - BlockDriverAIOCBSync *acb; | |
| 1464 | - int ret; | |
| 1401 | + return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0); | |
| 1402 | +} | |
| 1465 | 1403 | |
| 1466 | - acb = qemu_aio_get(bs, cb, opaque); | |
| 1467 | - if (!acb->bh) | |
| 1468 | - acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb); | |
| 1469 | - ret = bdrv_write(bs, sector_num, buf, nb_sectors); | |
| 1470 | - acb->ret = ret; | |
| 1471 | - qemu_bh_schedule(acb->bh); | |
| 1472 | - return &acb->common; | |
| 1404 | +static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs, | |
| 1405 | + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, | |
| 1406 | + BlockDriverCompletionFunc *cb, void *opaque) | |
| 1407 | +{ | |
| 1408 | + return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1); | |
| 1473 | 1409 | } |
| 1474 | 1410 | |
| 1411 | + | |
| 1475 | 1412 | static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb) |
| 1476 | 1413 | { |
| 1477 | 1414 | BlockDriverAIOCBSync *acb = (BlockDriverAIOCBSync *)blockacb; |
| ... | ... | @@ -1494,10 +1431,15 @@ static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num, |
| 1494 | 1431 | { |
| 1495 | 1432 | int async_ret; |
| 1496 | 1433 | BlockDriverAIOCB *acb; |
| 1434 | + struct iovec iov; | |
| 1435 | + QEMUIOVector qiov; | |
| 1497 | 1436 | |
| 1498 | 1437 | async_ret = NOT_DONE; |
| 1499 | - acb = bdrv_aio_read(bs, sector_num, buf, nb_sectors, | |
| 1500 | - bdrv_rw_em_cb, &async_ret); | |
| 1438 | + iov.iov_base = buf; | |
| 1439 | + iov.iov_len = nb_sectors * 512; | |
| 1440 | + qemu_iovec_init_external(&qiov, &iov, 1); | |
| 1441 | + acb = bdrv_aio_readv(bs, sector_num, &qiov, nb_sectors, | |
| 1442 | + bdrv_rw_em_cb, &async_ret); | |
| 1501 | 1443 | if (acb == NULL) |
| 1502 | 1444 | return -1; |
| 1503 | 1445 | |
| ... | ... | @@ -1513,10 +1455,15 @@ static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num, |
| 1513 | 1455 | { |
| 1514 | 1456 | int async_ret; |
| 1515 | 1457 | BlockDriverAIOCB *acb; |
| 1458 | + struct iovec iov; | |
| 1459 | + QEMUIOVector qiov; | |
| 1516 | 1460 | |
| 1517 | 1461 | async_ret = NOT_DONE; |
| 1518 | - acb = bdrv_aio_write(bs, sector_num, buf, nb_sectors, | |
| 1519 | - bdrv_rw_em_cb, &async_ret); | |
| 1462 | + iov.iov_base = (void *)buf; | |
| 1463 | + iov.iov_len = nb_sectors * 512; | |
| 1464 | + qemu_iovec_init_external(&qiov, &iov, 1); | |
| 1465 | + acb = bdrv_aio_writev(bs, sector_num, &qiov, nb_sectors, | |
| 1466 | + bdrv_rw_em_cb, &async_ret); | |
| 1520 | 1467 | if (acb == NULL) |
| 1521 | 1468 | return -1; |
| 1522 | 1469 | while (async_ret == NOT_DONE) { |
| ... | ... | @@ -1527,9 +1474,6 @@ static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num, |
| 1527 | 1474 | |
| 1528 | 1475 | void bdrv_init(void) |
| 1529 | 1476 | { |
| 1530 | - aio_pool_init(&vectored_aio_pool, sizeof(VectorTranslationAIOCB), | |
| 1531 | - bdrv_aio_cancel_vector); | |
| 1532 | - | |
| 1533 | 1477 | bdrv_register(&bdrv_raw); |
| 1534 | 1478 | bdrv_register(&bdrv_host_device); |
| 1535 | 1479 | #ifndef _WIN32 | ... | ... |
block_int.h
| ... | ... | @@ -54,11 +54,11 @@ struct BlockDriver { |
| 54 | 54 | int (*bdrv_set_key)(BlockDriverState *bs, const char *key); |
| 55 | 55 | int (*bdrv_make_empty)(BlockDriverState *bs); |
| 56 | 56 | /* aio */ |
| 57 | - BlockDriverAIOCB *(*bdrv_aio_read)(BlockDriverState *bs, | |
| 58 | - int64_t sector_num, uint8_t *buf, int nb_sectors, | |
| 57 | + BlockDriverAIOCB *(*bdrv_aio_readv)(BlockDriverState *bs, | |
| 58 | + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, | |
| 59 | 59 | BlockDriverCompletionFunc *cb, void *opaque); |
| 60 | - BlockDriverAIOCB *(*bdrv_aio_write)(BlockDriverState *bs, | |
| 61 | - int64_t sector_num, const uint8_t *buf, int nb_sectors, | |
| 60 | + BlockDriverAIOCB *(*bdrv_aio_writev)(BlockDriverState *bs, | |
| 61 | + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, | |
| 62 | 62 | BlockDriverCompletionFunc *cb, void *opaque); |
| 63 | 63 | void (*bdrv_aio_cancel)(BlockDriverAIOCB *acb); |
| 64 | 64 | int aiocb_size; | ... | ... |
posix-aio-compat.c
| ... | ... | @@ -20,6 +20,7 @@ |
| 20 | 20 | #include <stdlib.h> |
| 21 | 21 | #include <stdio.h> |
| 22 | 22 | #include "osdep.h" |
| 23 | +#include "qemu-common.h" | |
| 23 | 24 | |
| 24 | 25 | #include "posix-aio-compat.h" |
| 25 | 26 | |
| ... | ... | @@ -76,45 +77,110 @@ static void thread_create(pthread_t *thread, pthread_attr_t *attr, |
| 76 | 77 | if (ret) die2(ret, "pthread_create"); |
| 77 | 78 | } |
| 78 | 79 | |
| 79 | -static size_t handle_aiocb_readwrite(struct qemu_paiocb *aiocb) | |
| 80 | +static size_t handle_aiocb_ioctl(struct qemu_paiocb *aiocb) | |
| 81 | +{ | |
| 82 | + int ret; | |
| 83 | + | |
| 84 | + ret = ioctl(aiocb->aio_fildes, aiocb->aio_ioctl_cmd, aiocb->aio_ioctl_buf); | |
| 85 | + if (ret == -1) | |
| 86 | + return -errno; | |
| 87 | + return ret; | |
| 88 | +} | |
| 89 | + | |
| 90 | +/* | |
| 91 | + * Check if we need to copy the data in the aiocb into a new | |
| 92 | + * properly aligned buffer. | |
| 93 | + */ | |
| 94 | +static int aiocb_needs_copy(struct qemu_paiocb *aiocb) | |
| 95 | +{ | |
| 96 | + if (aiocb->aio_flags & QEMU_AIO_SECTOR_ALIGNED) { | |
| 97 | + int i; | |
| 98 | + | |
| 99 | + for (i = 0; i < aiocb->aio_niov; i++) | |
| 100 | + if ((uintptr_t) aiocb->aio_iov[i].iov_base % 512) | |
| 101 | + return 1; | |
| 102 | + } | |
| 103 | + | |
| 104 | + return 0; | |
| 105 | +} | |
| 106 | + | |
| 107 | +static size_t handle_aiocb_rw_linear(struct qemu_paiocb *aiocb, char *buf) | |
| 80 | 108 | { |
| 81 | 109 | size_t offset = 0; |
| 82 | - ssize_t len; | |
| 110 | + size_t len; | |
| 83 | 111 | |
| 84 | 112 | while (offset < aiocb->aio_nbytes) { |
| 85 | - if (aiocb->aio_type == QEMU_PAIO_WRITE) | |
| 86 | - len = pwrite(aiocb->aio_fildes, | |
| 87 | - (const char *)aiocb->aio_buf + offset, | |
| 113 | + if (aiocb->aio_type == QEMU_PAIO_WRITE) | |
| 114 | + len = pwrite(aiocb->aio_fildes, | |
| 115 | + (const char *)buf + offset, | |
| 116 | + aiocb->aio_nbytes - offset, | |
| 117 | + aiocb->aio_offset + offset); | |
| 118 | + else | |
| 119 | + len = pread(aiocb->aio_fildes, | |
| 120 | + buf + offset, | |
| 88 | 121 | aiocb->aio_nbytes - offset, |
| 89 | 122 | aiocb->aio_offset + offset); |
| 90 | - else | |
| 91 | - len = pread(aiocb->aio_fildes, | |
| 92 | - (char *)aiocb->aio_buf + offset, | |
| 93 | - aiocb->aio_nbytes - offset, | |
| 94 | - aiocb->aio_offset + offset); | |
| 95 | - | |
| 96 | - if (len == -1 && errno == EINTR) | |
| 97 | - continue; | |
| 98 | - else if (len == -1) { | |
| 99 | - offset = -errno; | |
| 100 | - break; | |
| 101 | - } else if (len == 0) | |
| 102 | - break; | |
| 103 | 123 | |
| 104 | - offset += len; | |
| 124 | + if (len == -1 && errno == EINTR) | |
| 125 | + continue; | |
| 126 | + else if (len == -1) { | |
| 127 | + offset = -errno; | |
| 128 | + break; | |
| 129 | + } else if (len == 0) | |
| 130 | + break; | |
| 131 | + | |
| 132 | + offset += len; | |
| 105 | 133 | } |
| 106 | 134 | |
| 107 | 135 | return offset; |
| 108 | 136 | } |
| 109 | 137 | |
| 110 | -static size_t handle_aiocb_ioctl(struct qemu_paiocb *aiocb) | |
| 138 | +static size_t handle_aiocb_rw(struct qemu_paiocb *aiocb) | |
| 111 | 139 | { |
| 112 | - int ret; | |
| 140 | + size_t nbytes; | |
| 141 | + char *buf; | |
| 142 | + | |
| 143 | + if (!aiocb_needs_copy(aiocb) && aiocb->aio_niov == 1) { | |
| 144 | + /* | |
| 145 | + * If there is just a single buffer, and it is properly aligned | |
| 146 | + * we can just use plain pread/pwrite without any problems. | |
| 147 | + */ | |
| 148 | + return handle_aiocb_rw_linear(aiocb, aiocb->aio_iov->iov_base); | |
| 149 | + } | |
| 113 | 150 | |
| 114 | - ret = ioctl(aiocb->aio_fildes, aiocb->aio_ioctl_cmd, aiocb->aio_buf); | |
| 115 | - if (ret == -1) | |
| 116 | - return -errno; | |
| 117 | - return ret; | |
| 151 | + /* | |
| 152 | + * Ok, we have to do it the hard way, copy all segments into | |
| 153 | + * a single aligned buffer. | |
| 154 | + */ | |
| 155 | + buf = qemu_memalign(512, aiocb->aio_nbytes); | |
| 156 | + if (aiocb->aio_type == QEMU_PAIO_WRITE) { | |
| 157 | + char *p = buf; | |
| 158 | + int i; | |
| 159 | + | |
| 160 | + for (i = 0; i < aiocb->aio_niov; ++i) { | |
| 161 | + memcpy(p, aiocb->aio_iov[i].iov_base, aiocb->aio_iov[i].iov_len); | |
| 162 | + p += aiocb->aio_iov[i].iov_len; | |
| 163 | + } | |
| 164 | + } | |
| 165 | + | |
| 166 | + nbytes = handle_aiocb_rw_linear(aiocb, buf); | |
| 167 | + if (aiocb->aio_type != QEMU_PAIO_WRITE) { | |
| 168 | + char *p = buf; | |
| 169 | + size_t count = aiocb->aio_nbytes, copy; | |
| 170 | + int i; | |
| 171 | + | |
| 172 | + for (i = 0; i < aiocb->aio_niov && count; ++i) { | |
| 173 | + copy = count; | |
| 174 | + if (copy > aiocb->aio_iov[i].iov_len) | |
| 175 | + copy = aiocb->aio_iov[i].iov_len; | |
| 176 | + memcpy(aiocb->aio_iov[i].iov_base, p, copy); | |
| 177 | + p += copy; | |
| 178 | + count -= copy; | |
| 179 | + } | |
| 180 | + } | |
| 181 | + qemu_vfree(buf); | |
| 182 | + | |
| 183 | + return nbytes; | |
| 118 | 184 | } |
| 119 | 185 | |
| 120 | 186 | static void *aio_thread(void *unused) |
| ... | ... | @@ -157,7 +223,7 @@ static void *aio_thread(void *unused) |
| 157 | 223 | switch (aiocb->aio_type) { |
| 158 | 224 | case QEMU_PAIO_READ: |
| 159 | 225 | case QEMU_PAIO_WRITE: |
| 160 | - ret = handle_aiocb_readwrite(aiocb); | |
| 226 | + ret = handle_aiocb_rw(aiocb); | |
| 161 | 227 | break; |
| 162 | 228 | case QEMU_PAIO_IOCTL: |
| 163 | 229 | ret = handle_aiocb_ioctl(aiocb); | ... | ... |
posix-aio-compat.h
| ... | ... | @@ -27,11 +27,18 @@ |
| 27 | 27 | struct qemu_paiocb |
| 28 | 28 | { |
| 29 | 29 | int aio_fildes; |
| 30 | - void *aio_buf; | |
| 30 | + union { | |
| 31 | + struct iovec *aio_iov; | |
| 32 | + void *aio_ioctl_buf; | |
| 33 | + }; | |
| 34 | + int aio_niov; | |
| 31 | 35 | size_t aio_nbytes; |
| 32 | 36 | #define aio_ioctl_cmd aio_nbytes /* for QEMU_PAIO_IOCTL */ |
| 33 | 37 | int ev_signo; |
| 34 | 38 | off_t aio_offset; |
| 39 | + unsigned aio_flags; | |
| 40 | +/* 512 byte alignment required for buffer, offset and length */ | |
| 41 | +#define QEMU_AIO_SECTOR_ALIGNED 0x01 | |
| 35 | 42 | |
| 36 | 43 | /* private */ |
| 37 | 44 | TAILQ_ENTRY(qemu_paiocb) node; | ... | ... |