Commit 5353872545861d8d21bf9fcc64a25cbfc8cd2eac
1 parent
997306fc
Implement an fd pool to get real AIO with posix-aio
This patch implements a simple fd pool to allow many AIO requests with posix-aio. The result is significantly improved performance (identical to that reported for linux-aio) for both cache=on and cache=off. The fundamental problem with posix-aio is that it limits itself to one thread per-file descriptor. I don't know why this is, but this patch provides a simple mechanism to work around this (duplicating the file descriptor). This isn't a great solution, but it seems like a reasonable intermediate step between posix-aio and a custom thread-pool to replace it. Ryan Harper will be posting some performance analysis he did comparing posix-aio with fd pooling against linux-aio. The size of the posix-aio thread pool and the fd pool were largely determined by him based on this analysis. Signed-off-by: Anthony Liguori <aliguori@us.ibm.com> git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@5323 c046a42c-6fe2-441c-8c8c-71466251a162
Showing
1 changed file
with
65 additions
and
3 deletions
block-raw-posix.c
... | ... | @@ -84,10 +84,16 @@ |
84 | 84 | reopen it to see if the disk has been changed */ |
85 | 85 | #define FD_OPEN_TIMEOUT 1000 |
86 | 86 | |
87 | +/* posix-aio doesn't allow multiple outstanding requests to a single file | |
88 | + * descriptor. we implement a pool of dup()'d file descriptors to work | |
89 | + * around this */ | |
90 | +#define RAW_FD_POOL_SIZE 64 | |
91 | + | |
87 | 92 | typedef struct BDRVRawState { |
88 | 93 | int fd; |
89 | 94 | int type; |
90 | 95 | unsigned int lseek_err_cnt; |
96 | + int fd_pool[RAW_FD_POOL_SIZE]; | |
91 | 97 | #if defined(__linux__) |
92 | 98 | /* linux floppy specific */ |
93 | 99 | int fd_open_flags; |
... | ... | @@ -109,6 +115,7 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags) |
109 | 115 | { |
110 | 116 | BDRVRawState *s = bs->opaque; |
111 | 117 | int fd, open_flags, ret; |
118 | + int i; | |
112 | 119 | |
113 | 120 | posix_aio_init(); |
114 | 121 | |
... | ... | @@ -138,6 +145,8 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags) |
138 | 145 | return ret; |
139 | 146 | } |
140 | 147 | s->fd = fd; |
148 | + for (i = 0; i < RAW_FD_POOL_SIZE; i++) | |
149 | + s->fd_pool[i] = -1; | |
141 | 150 | #if defined(O_DIRECT) |
142 | 151 | s->aligned_buf = NULL; |
143 | 152 | if (flags & BDRV_O_DIRECT) { |
... | ... | @@ -436,6 +445,7 @@ static int raw_pwrite(BlockDriverState *bs, int64_t offset, |
436 | 445 | |
437 | 446 | typedef struct RawAIOCB { |
438 | 447 | BlockDriverAIOCB common; |
448 | + int fd; | |
439 | 449 | struct aiocb aiocb; |
440 | 450 | struct RawAIOCB *next; |
441 | 451 | int ret; |
... | ... | @@ -447,6 +457,38 @@ typedef struct PosixAioState |
447 | 457 | RawAIOCB *first_aio; |
448 | 458 | } PosixAioState; |
449 | 459 | |
460 | +static int raw_fd_pool_get(BDRVRawState *s) | |
461 | +{ | |
462 | + int i; | |
463 | + | |
464 | + for (i = 0; i < RAW_FD_POOL_SIZE; i++) { | |
465 | + /* already in use */ | |
466 | + if (s->fd_pool[i] != -1) | |
467 | + continue; | |
468 | + | |
469 | + /* try to dup file descriptor */ | |
470 | + s->fd_pool[i] = dup(s->fd); | |
471 | + if (s->fd_pool[i] != -1) | |
472 | + return s->fd_pool[i]; | |
473 | + } | |
474 | + | |
475 | + /* we couldn't dup the file descriptor so just use the main one */ | |
476 | + return s->fd; | |
477 | +} | |
478 | + | |
479 | +static void raw_fd_pool_put(RawAIOCB *acb) | |
480 | +{ | |
481 | + BDRVRawState *s = acb->common.bs->opaque; | |
482 | + int i; | |
483 | + | |
484 | + for (i = 0; i < RAW_FD_POOL_SIZE; i++) { | |
485 | + if (s->fd_pool[i] == acb->fd) { | |
486 | + close(s->fd_pool[i]); | |
487 | + s->fd_pool[i] = -1; | |
488 | + } | |
489 | + } | |
490 | +} | |
491 | + | |
450 | 492 | static void posix_aio_read(void *opaque) |
451 | 493 | { |
452 | 494 | PosixAioState *s = opaque; |
... | ... | @@ -487,6 +529,7 @@ static void posix_aio_read(void *opaque) |
487 | 529 | if (ret == ECANCELED) { |
488 | 530 | /* remove the request */ |
489 | 531 | *pacb = acb->next; |
532 | + raw_fd_pool_put(acb); | |
490 | 533 | qemu_aio_release(acb); |
491 | 534 | } else if (ret != EINPROGRESS) { |
492 | 535 | /* end of aio */ |
... | ... | @@ -503,6 +546,7 @@ static void posix_aio_read(void *opaque) |
503 | 546 | *pacb = acb->next; |
504 | 547 | /* call the callback */ |
505 | 548 | acb->common.cb(acb->common.opaque, ret); |
549 | + raw_fd_pool_put(acb); | |
506 | 550 | qemu_aio_release(acb); |
507 | 551 | break; |
508 | 552 | } else { |
... | ... | @@ -577,7 +621,8 @@ static RawAIOCB *raw_aio_setup(BlockDriverState *bs, |
577 | 621 | acb = qemu_aio_get(bs, cb, opaque); |
578 | 622 | if (!acb) |
579 | 623 | return NULL; |
580 | - acb->aiocb.aio_fildes = s->fd; | |
624 | + acb->fd = raw_fd_pool_get(s); | |
625 | + acb->aiocb.aio_fildes = acb->fd; | |
581 | 626 | acb->aiocb.aio_sigevent.sigev_signo = SIGUSR2; |
582 | 627 | acb->aiocb.aio_sigevent.sigev_notify = SIGEV_SIGNAL; |
583 | 628 | acb->aiocb.aio_buf = buf; |
... | ... | @@ -684,6 +729,7 @@ static void raw_aio_cancel(BlockDriverAIOCB *blockacb) |
684 | 729 | break; |
685 | 730 | } else if (*pacb == acb) { |
686 | 731 | *pacb = acb->next; |
732 | + raw_fd_pool_put(acb); | |
687 | 733 | qemu_aio_release(acb); |
688 | 734 | break; |
689 | 735 | } |
... | ... | @@ -697,6 +743,18 @@ static int posix_aio_init(void) |
697 | 743 | } |
698 | 744 | #endif /* CONFIG_AIO */ |
699 | 745 | |
746 | +static void raw_close_fd_pool(BDRVRawState *s) | |
747 | +{ | |
748 | + int i; | |
749 | + | |
750 | + for (i = 0; i < RAW_FD_POOL_SIZE; i++) { | |
751 | + if (s->fd_pool[i] != -1) { | |
752 | + close(s->fd_pool[i]); | |
753 | + s->fd_pool[i] = -1; | |
754 | + } | |
755 | + } | |
756 | +} | |
757 | + | |
700 | 758 | static void raw_close(BlockDriverState *bs) |
701 | 759 | { |
702 | 760 | BDRVRawState *s = bs->opaque; |
... | ... | @@ -708,6 +766,7 @@ static void raw_close(BlockDriverState *bs) |
708 | 766 | qemu_free(s->aligned_buf); |
709 | 767 | #endif |
710 | 768 | } |
769 | + raw_close_fd_pool(s); | |
711 | 770 | } |
712 | 771 | |
713 | 772 | static int raw_truncate(BlockDriverState *bs, int64_t offset) |
... | ... | @@ -898,7 +957,7 @@ kern_return_t GetBSDPath( io_iterator_t mediaIterator, char *bsdPath, CFIndex ma |
898 | 957 | static int hdev_open(BlockDriverState *bs, const char *filename, int flags) |
899 | 958 | { |
900 | 959 | BDRVRawState *s = bs->opaque; |
901 | - int fd, open_flags, ret; | |
960 | + int fd, open_flags, ret, i; | |
902 | 961 | |
903 | 962 | posix_aio_init(); |
904 | 963 | |
... | ... | @@ -963,6 +1022,8 @@ static int hdev_open(BlockDriverState *bs, const char *filename, int flags) |
963 | 1022 | return ret; |
964 | 1023 | } |
965 | 1024 | s->fd = fd; |
1025 | + for (i = 0; i < RAW_FD_POOL_SIZE; i++) | |
1026 | + s->fd_pool[i] = -1; | |
966 | 1027 | #if defined(__linux__) |
967 | 1028 | /* close fd so that we can reopen it as needed */ |
968 | 1029 | if (s->type == FTYPE_FD) { |
... | ... | @@ -975,7 +1036,6 @@ static int hdev_open(BlockDriverState *bs, const char *filename, int flags) |
975 | 1036 | } |
976 | 1037 | |
977 | 1038 | #if defined(__linux__) |
978 | - | |
979 | 1039 | /* Note: we do not have a reliable method to detect if the floppy is |
980 | 1040 | present. The current method is to try to open the floppy at every |
981 | 1041 | I/O and to keep it opened during a few hundreds of ms. */ |
... | ... | @@ -991,6 +1051,7 @@ static int fd_open(BlockDriverState *bs) |
991 | 1051 | (qemu_get_clock(rt_clock) - s->fd_open_time) >= FD_OPEN_TIMEOUT) { |
992 | 1052 | close(s->fd); |
993 | 1053 | s->fd = -1; |
1054 | + raw_close_fd_pool(s); | |
994 | 1055 | #ifdef DEBUG_FLOPPY |
995 | 1056 | printf("Floppy closed\n"); |
996 | 1057 | #endif |
... | ... | @@ -1091,6 +1152,7 @@ static int raw_eject(BlockDriverState *bs, int eject_flag) |
1091 | 1152 | if (s->fd >= 0) { |
1092 | 1153 | close(s->fd); |
1093 | 1154 | s->fd = -1; |
1155 | + raw_close_fd_pool(s); | |
1094 | 1156 | } |
1095 | 1157 | fd = open(bs->filename, s->fd_open_flags | O_NONBLOCK); |
1096 | 1158 | if (fd >= 0) { | ... | ... |