Commit 5353872545861d8d21bf9fcc64a25cbfc8cd2eac
1 parent
997306fc
Implement an fd pool to get real AIO with posix-aio
This patch implements a simple fd pool to allow many AIO requests with posix-aio. The result is significantly improved performance (identical to that reported for linux-aio) for both cache=on and cache=off. The fundamental problem with posix-aio is that it limits itself to one thread per-file descriptor. I don't know why this is, but this patch provides a simple mechanism to work around this (duplicating the file descriptor). This isn't a great solution, but it seems like a reasonable intermediate step between posix-aio and a custom thread-pool to replace it. Ryan Harper will be posting some performance analysis he did comparing posix-aio with fd pooling against linux-aio. The size of the posix-aio thread pool and the fd pool were largely determined by him based on this analysis. Signed-off-by: Anthony Liguori <aliguori@us.ibm.com> git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@5323 c046a42c-6fe2-441c-8c8c-71466251a162
Showing
1 changed file
with
65 additions
and
3 deletions
block-raw-posix.c
| ... | ... | @@ -84,10 +84,16 @@ |
| 84 | 84 | reopen it to see if the disk has been changed */ |
| 85 | 85 | #define FD_OPEN_TIMEOUT 1000 |
| 86 | 86 | |
| 87 | +/* posix-aio doesn't allow multiple outstanding requests to a single file | |
| 88 | + * descriptor. we implement a pool of dup()'d file descriptors to work | |
| 89 | + * around this */ | |
| 90 | +#define RAW_FD_POOL_SIZE 64 | |
| 91 | + | |
| 87 | 92 | typedef struct BDRVRawState { |
| 88 | 93 | int fd; |
| 89 | 94 | int type; |
| 90 | 95 | unsigned int lseek_err_cnt; |
| 96 | + int fd_pool[RAW_FD_POOL_SIZE]; | |
| 91 | 97 | #if defined(__linux__) |
| 92 | 98 | /* linux floppy specific */ |
| 93 | 99 | int fd_open_flags; |
| ... | ... | @@ -109,6 +115,7 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags) |
| 109 | 115 | { |
| 110 | 116 | BDRVRawState *s = bs->opaque; |
| 111 | 117 | int fd, open_flags, ret; |
| 118 | + int i; | |
| 112 | 119 | |
| 113 | 120 | posix_aio_init(); |
| 114 | 121 | |
| ... | ... | @@ -138,6 +145,8 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags) |
| 138 | 145 | return ret; |
| 139 | 146 | } |
| 140 | 147 | s->fd = fd; |
| 148 | + for (i = 0; i < RAW_FD_POOL_SIZE; i++) | |
| 149 | + s->fd_pool[i] = -1; | |
| 141 | 150 | #if defined(O_DIRECT) |
| 142 | 151 | s->aligned_buf = NULL; |
| 143 | 152 | if (flags & BDRV_O_DIRECT) { |
| ... | ... | @@ -436,6 +445,7 @@ static int raw_pwrite(BlockDriverState *bs, int64_t offset, |
| 436 | 445 | |
| 437 | 446 | typedef struct RawAIOCB { |
| 438 | 447 | BlockDriverAIOCB common; |
| 448 | + int fd; | |
| 439 | 449 | struct aiocb aiocb; |
| 440 | 450 | struct RawAIOCB *next; |
| 441 | 451 | int ret; |
| ... | ... | @@ -447,6 +457,38 @@ typedef struct PosixAioState |
| 447 | 457 | RawAIOCB *first_aio; |
| 448 | 458 | } PosixAioState; |
| 449 | 459 | |
| 460 | +static int raw_fd_pool_get(BDRVRawState *s) | |
| 461 | +{ | |
| 462 | + int i; | |
| 463 | + | |
| 464 | + for (i = 0; i < RAW_FD_POOL_SIZE; i++) { | |
| 465 | + /* already in use */ | |
| 466 | + if (s->fd_pool[i] != -1) | |
| 467 | + continue; | |
| 468 | + | |
| 469 | + /* try to dup file descriptor */ | |
| 470 | + s->fd_pool[i] = dup(s->fd); | |
| 471 | + if (s->fd_pool[i] != -1) | |
| 472 | + return s->fd_pool[i]; | |
| 473 | + } | |
| 474 | + | |
| 475 | + /* we couldn't dup the file descriptor so just use the main one */ | |
| 476 | + return s->fd; | |
| 477 | +} | |
| 478 | + | |
| 479 | +static void raw_fd_pool_put(RawAIOCB *acb) | |
| 480 | +{ | |
| 481 | + BDRVRawState *s = acb->common.bs->opaque; | |
| 482 | + int i; | |
| 483 | + | |
| 484 | + for (i = 0; i < RAW_FD_POOL_SIZE; i++) { | |
| 485 | + if (s->fd_pool[i] == acb->fd) { | |
| 486 | + close(s->fd_pool[i]); | |
| 487 | + s->fd_pool[i] = -1; | |
| 488 | + } | |
| 489 | + } | |
| 490 | +} | |
| 491 | + | |
| 450 | 492 | static void posix_aio_read(void *opaque) |
| 451 | 493 | { |
| 452 | 494 | PosixAioState *s = opaque; |
| ... | ... | @@ -487,6 +529,7 @@ static void posix_aio_read(void *opaque) |
| 487 | 529 | if (ret == ECANCELED) { |
| 488 | 530 | /* remove the request */ |
| 489 | 531 | *pacb = acb->next; |
| 532 | + raw_fd_pool_put(acb); | |
| 490 | 533 | qemu_aio_release(acb); |
| 491 | 534 | } else if (ret != EINPROGRESS) { |
| 492 | 535 | /* end of aio */ |
| ... | ... | @@ -503,6 +546,7 @@ static void posix_aio_read(void *opaque) |
| 503 | 546 | *pacb = acb->next; |
| 504 | 547 | /* call the callback */ |
| 505 | 548 | acb->common.cb(acb->common.opaque, ret); |
| 549 | + raw_fd_pool_put(acb); | |
| 506 | 550 | qemu_aio_release(acb); |
| 507 | 551 | break; |
| 508 | 552 | } else { |
| ... | ... | @@ -577,7 +621,8 @@ static RawAIOCB *raw_aio_setup(BlockDriverState *bs, |
| 577 | 621 | acb = qemu_aio_get(bs, cb, opaque); |
| 578 | 622 | if (!acb) |
| 579 | 623 | return NULL; |
| 580 | - acb->aiocb.aio_fildes = s->fd; | |
| 624 | + acb->fd = raw_fd_pool_get(s); | |
| 625 | + acb->aiocb.aio_fildes = acb->fd; | |
| 581 | 626 | acb->aiocb.aio_sigevent.sigev_signo = SIGUSR2; |
| 582 | 627 | acb->aiocb.aio_sigevent.sigev_notify = SIGEV_SIGNAL; |
| 583 | 628 | acb->aiocb.aio_buf = buf; |
| ... | ... | @@ -684,6 +729,7 @@ static void raw_aio_cancel(BlockDriverAIOCB *blockacb) |
| 684 | 729 | break; |
| 685 | 730 | } else if (*pacb == acb) { |
| 686 | 731 | *pacb = acb->next; |
| 732 | + raw_fd_pool_put(acb); | |
| 687 | 733 | qemu_aio_release(acb); |
| 688 | 734 | break; |
| 689 | 735 | } |
| ... | ... | @@ -697,6 +743,18 @@ static int posix_aio_init(void) |
| 697 | 743 | } |
| 698 | 744 | #endif /* CONFIG_AIO */ |
| 699 | 745 | |
| 746 | +static void raw_close_fd_pool(BDRVRawState *s) | |
| 747 | +{ | |
| 748 | + int i; | |
| 749 | + | |
| 750 | + for (i = 0; i < RAW_FD_POOL_SIZE; i++) { | |
| 751 | + if (s->fd_pool[i] != -1) { | |
| 752 | + close(s->fd_pool[i]); | |
| 753 | + s->fd_pool[i] = -1; | |
| 754 | + } | |
| 755 | + } | |
| 756 | +} | |
| 757 | + | |
| 700 | 758 | static void raw_close(BlockDriverState *bs) |
| 701 | 759 | { |
| 702 | 760 | BDRVRawState *s = bs->opaque; |
| ... | ... | @@ -708,6 +766,7 @@ static void raw_close(BlockDriverState *bs) |
| 708 | 766 | qemu_free(s->aligned_buf); |
| 709 | 767 | #endif |
| 710 | 768 | } |
| 769 | + raw_close_fd_pool(s); | |
| 711 | 770 | } |
| 712 | 771 | |
| 713 | 772 | static int raw_truncate(BlockDriverState *bs, int64_t offset) |
| ... | ... | @@ -898,7 +957,7 @@ kern_return_t GetBSDPath( io_iterator_t mediaIterator, char *bsdPath, CFIndex ma |
| 898 | 957 | static int hdev_open(BlockDriverState *bs, const char *filename, int flags) |
| 899 | 958 | { |
| 900 | 959 | BDRVRawState *s = bs->opaque; |
| 901 | - int fd, open_flags, ret; | |
| 960 | + int fd, open_flags, ret, i; | |
| 902 | 961 | |
| 903 | 962 | posix_aio_init(); |
| 904 | 963 | |
| ... | ... | @@ -963,6 +1022,8 @@ static int hdev_open(BlockDriverState *bs, const char *filename, int flags) |
| 963 | 1022 | return ret; |
| 964 | 1023 | } |
| 965 | 1024 | s->fd = fd; |
| 1025 | + for (i = 0; i < RAW_FD_POOL_SIZE; i++) | |
| 1026 | + s->fd_pool[i] = -1; | |
| 966 | 1027 | #if defined(__linux__) |
| 967 | 1028 | /* close fd so that we can reopen it as needed */ |
| 968 | 1029 | if (s->type == FTYPE_FD) { |
| ... | ... | @@ -975,7 +1036,6 @@ static int hdev_open(BlockDriverState *bs, const char *filename, int flags) |
| 975 | 1036 | } |
| 976 | 1037 | |
| 977 | 1038 | #if defined(__linux__) |
| 978 | - | |
| 979 | 1039 | /* Note: we do not have a reliable method to detect if the floppy is |
| 980 | 1040 | present. The current method is to try to open the floppy at every |
| 981 | 1041 | I/O and to keep it opened during a few hundreds of ms. */ |
| ... | ... | @@ -991,6 +1051,7 @@ static int fd_open(BlockDriverState *bs) |
| 991 | 1051 | (qemu_get_clock(rt_clock) - s->fd_open_time) >= FD_OPEN_TIMEOUT) { |
| 992 | 1052 | close(s->fd); |
| 993 | 1053 | s->fd = -1; |
| 1054 | + raw_close_fd_pool(s); | |
| 994 | 1055 | #ifdef DEBUG_FLOPPY |
| 995 | 1056 | printf("Floppy closed\n"); |
| 996 | 1057 | #endif |
| ... | ... | @@ -1091,6 +1152,7 @@ static int raw_eject(BlockDriverState *bs, int eject_flag) |
| 1091 | 1152 | if (s->fd >= 0) { |
| 1092 | 1153 | close(s->fd); |
| 1093 | 1154 | s->fd = -1; |
| 1155 | + raw_close_fd_pool(s); | |
| 1094 | 1156 | } |
| 1095 | 1157 | fd = open(bs->filename, s->fd_open_flags | O_NONBLOCK); |
| 1096 | 1158 | if (fd >= 0) { | ... | ... |