Commit 5353872545861d8d21bf9fcc64a25cbfc8cd2eac

Authored by aliguori
1 parent 997306fc

Implement an fd pool to get real AIO with posix-aio

This patch implements a simple fd pool to allow many AIO requests with
posix-aio.  The result is significantly improved performance (identical to that
reported for linux-aio) for both cache=on and cache=off.

The fundamental problem with posix-aio is that it limits itself to one thread
per-file descriptor.  I don't know why this is, but this patch provides a simple
mechanism to work around this (duplicating the file descriptor).

This isn't a great solution, but it seems like a reasonable intermediate step
between posix-aio and a custom thread-pool to replace it.

Ryan Harper will be posting some performance analysis he did comparing posix-aio
with fd pooling against linux-aio.  The size of the posix-aio thread pool and
the fd pool were largely determined by him based on this analysis.

Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>



git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@5323 c046a42c-6fe2-441c-8c8c-71466251a162
Showing 1 changed file with 65 additions and 3 deletions
block-raw-posix.c
... ... @@ -84,10 +84,16 @@
84 84 reopen it to see if the disk has been changed */
85 85 #define FD_OPEN_TIMEOUT 1000
86 86  
  87 +/* posix-aio doesn't allow multiple outstanding requests to a single file
  88 + * descriptor. we implement a pool of dup()'d file descriptors to work
  89 + * around this */
  90 +#define RAW_FD_POOL_SIZE 64
  91 +
87 92 typedef struct BDRVRawState {
88 93 int fd;
89 94 int type;
90 95 unsigned int lseek_err_cnt;
  96 + int fd_pool[RAW_FD_POOL_SIZE];
91 97 #if defined(__linux__)
92 98 /* linux floppy specific */
93 99 int fd_open_flags;
... ... @@ -109,6 +115,7 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags)
109 115 {
110 116 BDRVRawState *s = bs->opaque;
111 117 int fd, open_flags, ret;
  118 + int i;
112 119  
113 120 posix_aio_init();
114 121  
... ... @@ -138,6 +145,8 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags)
138 145 return ret;
139 146 }
140 147 s->fd = fd;
  148 + for (i = 0; i < RAW_FD_POOL_SIZE; i++)
  149 + s->fd_pool[i] = -1;
141 150 #if defined(O_DIRECT)
142 151 s->aligned_buf = NULL;
143 152 if (flags & BDRV_O_DIRECT) {
... ... @@ -436,6 +445,7 @@ static int raw_pwrite(BlockDriverState *bs, int64_t offset,
436 445  
437 446 typedef struct RawAIOCB {
438 447 BlockDriverAIOCB common;
  448 + int fd;
439 449 struct aiocb aiocb;
440 450 struct RawAIOCB *next;
441 451 int ret;
... ... @@ -447,6 +457,38 @@ typedef struct PosixAioState
447 457 RawAIOCB *first_aio;
448 458 } PosixAioState;
449 459  
  460 +static int raw_fd_pool_get(BDRVRawState *s)
  461 +{
  462 + int i;
  463 +
  464 + for (i = 0; i < RAW_FD_POOL_SIZE; i++) {
  465 + /* already in use */
  466 + if (s->fd_pool[i] != -1)
  467 + continue;
  468 +
  469 + /* try to dup file descriptor */
  470 + s->fd_pool[i] = dup(s->fd);
  471 + if (s->fd_pool[i] != -1)
  472 + return s->fd_pool[i];
  473 + }
  474 +
  475 + /* we couldn't dup the file descriptor so just use the main one */
  476 + return s->fd;
  477 +}
  478 +
  479 +static void raw_fd_pool_put(RawAIOCB *acb)
  480 +{
  481 + BDRVRawState *s = acb->common.bs->opaque;
  482 + int i;
  483 +
  484 + for (i = 0; i < RAW_FD_POOL_SIZE; i++) {
  485 + if (s->fd_pool[i] == acb->fd) {
  486 + close(s->fd_pool[i]);
  487 + s->fd_pool[i] = -1;
  488 + }
  489 + }
  490 +}
  491 +
450 492 static void posix_aio_read(void *opaque)
451 493 {
452 494 PosixAioState *s = opaque;
... ... @@ -487,6 +529,7 @@ static void posix_aio_read(void *opaque)
487 529 if (ret == ECANCELED) {
488 530 /* remove the request */
489 531 *pacb = acb->next;
  532 + raw_fd_pool_put(acb);
490 533 qemu_aio_release(acb);
491 534 } else if (ret != EINPROGRESS) {
492 535 /* end of aio */
... ... @@ -503,6 +546,7 @@ static void posix_aio_read(void *opaque)
503 546 *pacb = acb->next;
504 547 /* call the callback */
505 548 acb->common.cb(acb->common.opaque, ret);
  549 + raw_fd_pool_put(acb);
506 550 qemu_aio_release(acb);
507 551 break;
508 552 } else {
... ... @@ -577,7 +621,8 @@ static RawAIOCB *raw_aio_setup(BlockDriverState *bs,
577 621 acb = qemu_aio_get(bs, cb, opaque);
578 622 if (!acb)
579 623 return NULL;
580   - acb->aiocb.aio_fildes = s->fd;
  624 + acb->fd = raw_fd_pool_get(s);
  625 + acb->aiocb.aio_fildes = acb->fd;
581 626 acb->aiocb.aio_sigevent.sigev_signo = SIGUSR2;
582 627 acb->aiocb.aio_sigevent.sigev_notify = SIGEV_SIGNAL;
583 628 acb->aiocb.aio_buf = buf;
... ... @@ -684,6 +729,7 @@ static void raw_aio_cancel(BlockDriverAIOCB *blockacb)
684 729 break;
685 730 } else if (*pacb == acb) {
686 731 *pacb = acb->next;
  732 + raw_fd_pool_put(acb);
687 733 qemu_aio_release(acb);
688 734 break;
689 735 }
... ... @@ -697,6 +743,18 @@ static int posix_aio_init(void)
697 743 }
698 744 #endif /* CONFIG_AIO */
699 745  
  746 +static void raw_close_fd_pool(BDRVRawState *s)
  747 +{
  748 + int i;
  749 +
  750 + for (i = 0; i < RAW_FD_POOL_SIZE; i++) {
  751 + if (s->fd_pool[i] != -1) {
  752 + close(s->fd_pool[i]);
  753 + s->fd_pool[i] = -1;
  754 + }
  755 + }
  756 +}
  757 +
700 758 static void raw_close(BlockDriverState *bs)
701 759 {
702 760 BDRVRawState *s = bs->opaque;
... ... @@ -708,6 +766,7 @@ static void raw_close(BlockDriverState *bs)
708 766 qemu_free(s->aligned_buf);
709 767 #endif
710 768 }
  769 + raw_close_fd_pool(s);
711 770 }
712 771  
713 772 static int raw_truncate(BlockDriverState *bs, int64_t offset)
... ... @@ -898,7 +957,7 @@ kern_return_t GetBSDPath( io_iterator_t mediaIterator, char *bsdPath, CFIndex ma
898 957 static int hdev_open(BlockDriverState *bs, const char *filename, int flags)
899 958 {
900 959 BDRVRawState *s = bs->opaque;
901   - int fd, open_flags, ret;
  960 + int fd, open_flags, ret, i;
902 961  
903 962 posix_aio_init();
904 963  
... ... @@ -963,6 +1022,8 @@ static int hdev_open(BlockDriverState *bs, const char *filename, int flags)
963 1022 return ret;
964 1023 }
965 1024 s->fd = fd;
  1025 + for (i = 0; i < RAW_FD_POOL_SIZE; i++)
  1026 + s->fd_pool[i] = -1;
966 1027 #if defined(__linux__)
967 1028 /* close fd so that we can reopen it as needed */
968 1029 if (s->type == FTYPE_FD) {
... ... @@ -975,7 +1036,6 @@ static int hdev_open(BlockDriverState *bs, const char *filename, int flags)
975 1036 }
976 1037  
977 1038 #if defined(__linux__)
978   -
979 1039 /* Note: we do not have a reliable method to detect if the floppy is
980 1040 present. The current method is to try to open the floppy at every
981 1041 I/O and to keep it opened during a few hundreds of ms. */
... ... @@ -991,6 +1051,7 @@ static int fd_open(BlockDriverState *bs)
991 1051 (qemu_get_clock(rt_clock) - s->fd_open_time) >= FD_OPEN_TIMEOUT) {
992 1052 close(s->fd);
993 1053 s->fd = -1;
  1054 + raw_close_fd_pool(s);
994 1055 #ifdef DEBUG_FLOPPY
995 1056 printf("Floppy closed\n");
996 1057 #endif
... ... @@ -1091,6 +1152,7 @@ static int raw_eject(BlockDriverState *bs, int eject_flag)
1091 1152 if (s->fd >= 0) {
1092 1153 close(s->fd);
1093 1154 s->fd = -1;
  1155 + raw_close_fd_pool(s);
1094 1156 }
1095 1157 fd = open(bs->filename, s->fd_open_flags | O_NONBLOCK);
1096 1158 if (fd >= 0) {
... ...