Commit 9e472e101f37233f4e32d181d2fee29014c1cf2f
1 parent
235262cf
Fix IO performance regression in sparc
Replace signalfd with signal handler/pipe. There is no way to interrupt
the CPU execution loop when a file descriptor becomes readable. This
results in a large performance regression in sparc emulation during
bootup.
This patch switches us to signal handler/pipe which was originally
suggested by Ian Jackson. The signal handler lets us interrupt the
CPU emulation loop while the write to a pipe lets us avoid the
select/signal race condition.
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@5451 c046a42c-6fe2-441c-8c8c-71466251a162
Showing
9 changed files
with
55 additions
and
233 deletions
Makefile
| ... | ... | @@ -59,10 +59,6 @@ else |
| 59 | 59 | BLOCK_OBJS += block-raw-posix.o |
| 60 | 60 | endif |
| 61 | 61 | |
| 62 | -ifdef CONFIG_AIO | |
| 63 | -BLOCK_OBJS += compatfd.o | |
| 64 | -endif | |
| 65 | - | |
| 66 | 62 | ###################################################################### |
| 67 | 63 | # libqemu_common.a: Target independent part of system emulation. The |
| 68 | 64 | # long term path is to suppress *all* target specific code in case of | ... | ... |
Makefile.target
block-raw-posix.c
| ... | ... | @@ -25,7 +25,6 @@ |
| 25 | 25 | #include "qemu-timer.h" |
| 26 | 26 | #include "qemu-char.h" |
| 27 | 27 | #include "block_int.h" |
| 28 | -#include "compatfd.h" | |
| 29 | 28 | #include <assert.h> |
| 30 | 29 | #ifdef CONFIG_AIO |
| 31 | 30 | #include <aio.h> |
| ... | ... | @@ -453,7 +452,7 @@ typedef struct RawAIOCB { |
| 453 | 452 | |
| 454 | 453 | typedef struct PosixAioState |
| 455 | 454 | { |
| 456 | - int fd; | |
| 455 | + int rfd, wfd; | |
| 457 | 456 | RawAIOCB *first_aio; |
| 458 | 457 | } PosixAioState; |
| 459 | 458 | |
| ... | ... | @@ -494,30 +493,17 @@ static void posix_aio_read(void *opaque) |
| 494 | 493 | PosixAioState *s = opaque; |
| 495 | 494 | RawAIOCB *acb, **pacb; |
| 496 | 495 | int ret; |
| 497 | - size_t offset; | |
| 498 | - union { | |
| 499 | - struct qemu_signalfd_siginfo siginfo; | |
| 500 | - char buf[128]; | |
| 501 | - } sig; | |
| 502 | - | |
| 503 | - /* try to read from signalfd, don't freak out if we can't read anything */ | |
| 504 | - offset = 0; | |
| 505 | - while (offset < 128) { | |
| 506 | - ssize_t len; | |
| 507 | - | |
| 508 | - len = read(s->fd, sig.buf + offset, 128 - offset); | |
| 496 | + ssize_t len; | |
| 497 | + | |
| 498 | + do { | |
| 499 | + char byte; | |
| 500 | + | |
| 501 | + len = read(s->rfd, &byte, 1); | |
| 509 | 502 | if (len == -1 && errno == EINTR) |
| 510 | 503 | continue; |
| 511 | - if (len == -1 && errno == EAGAIN) { | |
| 512 | - /* there is no natural reason for this to happen, | |
| 513 | - * so we'll spin hard until we get everything just | |
| 514 | - * to be on the safe side. */ | |
| 515 | - if (offset > 0) | |
| 516 | - continue; | |
| 517 | - } | |
| 518 | - | |
| 519 | - offset += len; | |
| 520 | - } | |
| 504 | + if (len == -1 && errno == EAGAIN) | |
| 505 | + break; | |
| 506 | + } while (len == -1); | |
| 521 | 507 | |
| 522 | 508 | for(;;) { |
| 523 | 509 | pacb = &s->first_aio; |
| ... | ... | @@ -565,10 +551,22 @@ static int posix_aio_flush(void *opaque) |
| 565 | 551 | |
| 566 | 552 | static PosixAioState *posix_aio_state; |
| 567 | 553 | |
| 554 | +static void aio_signal_handler(int signum) | |
| 555 | +{ | |
| 556 | + if (posix_aio_state) { | |
| 557 | + char byte = 0; | |
| 558 | + | |
| 559 | + write(posix_aio_state->wfd, &byte, sizeof(byte)); | |
| 560 | + } | |
| 561 | + | |
| 562 | + qemu_service_io(); | |
| 563 | +} | |
| 564 | + | |
| 568 | 565 | static int posix_aio_init(void) |
| 569 | 566 | { |
| 570 | - sigset_t mask; | |
| 567 | + struct sigaction act; | |
| 571 | 568 | PosixAioState *s; |
| 569 | + int fds[2]; | |
| 572 | 570 | |
| 573 | 571 | if (posix_aio_state) |
| 574 | 572 | return 0; |
| ... | ... | @@ -577,21 +575,23 @@ static int posix_aio_init(void) |
| 577 | 575 | if (s == NULL) |
| 578 | 576 | return -ENOMEM; |
| 579 | 577 | |
| 580 | - /* Make sure to block AIO signal */ | |
| 581 | - sigemptyset(&mask); | |
| 582 | - sigaddset(&mask, SIGUSR2); | |
| 583 | - sigprocmask(SIG_BLOCK, &mask, NULL); | |
| 584 | - | |
| 578 | + sigfillset(&act.sa_mask); | |
| 579 | + act.sa_flags = 0; /* do not restart syscalls to interrupt select() */ | |
| 580 | + act.sa_handler = aio_signal_handler; | |
| 581 | + sigaction(SIGUSR2, &act, NULL); | |
| 582 | + | |
| 585 | 583 | s->first_aio = NULL; |
| 586 | - s->fd = qemu_signalfd(&mask); | |
| 587 | - if (s->fd == -1) { | |
| 588 | - fprintf(stderr, "failed to create signalfd\n"); | |
| 584 | + if (pipe(fds) == -1) { | |
| 585 | + fprintf(stderr, "failed to create pipe\n"); | |
| 589 | 586 | return -errno; |
| 590 | 587 | } |
| 591 | 588 | |
| 592 | - fcntl(s->fd, F_SETFL, O_NONBLOCK); | |
| 589 | + s->rfd = fds[0]; | |
| 590 | + s->wfd = fds[1]; | |
| 591 | + | |
| 592 | + fcntl(s->wfd, F_SETFL, O_NONBLOCK); | |
| 593 | 593 | |
| 594 | - qemu_aio_set_fd_handler(s->fd, posix_aio_read, NULL, posix_aio_flush, s); | |
| 594 | + qemu_aio_set_fd_handler(s->rfd, posix_aio_read, NULL, posix_aio_flush, s); | |
| 595 | 595 | |
| 596 | 596 | #if defined(__linux__) |
| 597 | 597 | { | ... | ... |
compatfd.c deleted
100644 โ 0
| 1 | -/* | |
| 2 | - * signalfd/eventfd compatibility | |
| 3 | - * | |
| 4 | - * Copyright IBM, Corp. 2008 | |
| 5 | - * | |
| 6 | - * Authors: | |
| 7 | - * Anthony Liguori <aliguori@us.ibm.com> | |
| 8 | - * | |
| 9 | - * This work is licensed under the terms of the GNU GPL, version 2. See | |
| 10 | - * the COPYING file in the top-level directory. | |
| 11 | - * | |
| 12 | - */ | |
| 13 | - | |
| 14 | -#include "qemu-common.h" | |
| 15 | -#include "compatfd.h" | |
| 16 | - | |
| 17 | -#include <sys/syscall.h> | |
| 18 | -#include <pthread.h> | |
| 19 | - | |
| 20 | -struct sigfd_compat_info | |
| 21 | -{ | |
| 22 | - sigset_t mask; | |
| 23 | - int fd; | |
| 24 | -}; | |
| 25 | - | |
| 26 | -static void *sigwait_compat(void *opaque) | |
| 27 | -{ | |
| 28 | - struct sigfd_compat_info *info = opaque; | |
| 29 | - int err; | |
| 30 | - sigset_t all; | |
| 31 | - | |
| 32 | - sigfillset(&all); | |
| 33 | - sigprocmask(SIG_BLOCK, &all, NULL); | |
| 34 | - | |
| 35 | - do { | |
| 36 | - siginfo_t siginfo; | |
| 37 | - | |
| 38 | - err = sigwaitinfo(&info->mask, &siginfo); | |
| 39 | - if (err == -1 && errno == EINTR) { | |
| 40 | - err = 0; | |
| 41 | - continue; | |
| 42 | - } | |
| 43 | - | |
| 44 | - if (err > 0) { | |
| 45 | - char buffer[128]; | |
| 46 | - size_t offset = 0; | |
| 47 | - | |
| 48 | - memcpy(buffer, &err, sizeof(err)); | |
| 49 | - while (offset < sizeof(buffer)) { | |
| 50 | - ssize_t len; | |
| 51 | - | |
| 52 | - len = write(info->fd, buffer + offset, | |
| 53 | - sizeof(buffer) - offset); | |
| 54 | - if (len == -1 && errno == EINTR) | |
| 55 | - continue; | |
| 56 | - | |
| 57 | - if (len <= 0) { | |
| 58 | - err = -1; | |
| 59 | - break; | |
| 60 | - } | |
| 61 | - | |
| 62 | - offset += len; | |
| 63 | - } | |
| 64 | - } | |
| 65 | - } while (err >= 0); | |
| 66 | - | |
| 67 | - return NULL; | |
| 68 | -} | |
| 69 | - | |
| 70 | -static int qemu_signalfd_compat(const sigset_t *mask) | |
| 71 | -{ | |
| 72 | - pthread_attr_t attr; | |
| 73 | - pthread_t tid; | |
| 74 | - struct sigfd_compat_info *info; | |
| 75 | - int fds[2]; | |
| 76 | - | |
| 77 | - info = malloc(sizeof(*info)); | |
| 78 | - if (info == NULL) { | |
| 79 | - errno = ENOMEM; | |
| 80 | - return -1; | |
| 81 | - } | |
| 82 | - | |
| 83 | - if (pipe(fds) == -1) { | |
| 84 | - free(info); | |
| 85 | - return -1; | |
| 86 | - } | |
| 87 | - | |
| 88 | - memcpy(&info->mask, mask, sizeof(*mask)); | |
| 89 | - info->fd = fds[1]; | |
| 90 | - | |
| 91 | - pthread_attr_init(&attr); | |
| 92 | - pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); | |
| 93 | - | |
| 94 | - pthread_create(&tid, &attr, sigwait_compat, info); | |
| 95 | - | |
| 96 | - pthread_attr_destroy(&attr); | |
| 97 | - | |
| 98 | - return fds[0]; | |
| 99 | -} | |
| 100 | - | |
| 101 | -int qemu_signalfd(const sigset_t *mask) | |
| 102 | -{ | |
| 103 | -#if defined(CONFIG_signalfd) | |
| 104 | - int ret; | |
| 105 | - | |
| 106 | - ret = syscall(SYS_signalfd, -1, mask, _NSIG / 8); | |
| 107 | - if (ret != -1) | |
| 108 | - return ret; | |
| 109 | -#endif | |
| 110 | - | |
| 111 | - return qemu_signalfd_compat(mask); | |
| 112 | -} | |
| 113 | - | |
| 114 | -int qemu_eventfd(int *fds) | |
| 115 | -{ | |
| 116 | -#if defined(CONFIG_eventfd) | |
| 117 | - int ret; | |
| 118 | - | |
| 119 | - ret = syscall(SYS_eventfd, 0); | |
| 120 | - if (ret >= 0) { | |
| 121 | - fds[0] = fds[1] = ret; | |
| 122 | - return 0; | |
| 123 | - } | |
| 124 | -#endif | |
| 125 | - | |
| 126 | - return pipe(fds); | |
| 127 | -} |
compatfd.h deleted
100644 โ 0
| 1 | -/* | |
| 2 | - * signalfd/eventfd compatibility | |
| 3 | - * | |
| 4 | - * Copyright IBM, Corp. 2008 | |
| 5 | - * | |
| 6 | - * Authors: | |
| 7 | - * Anthony Liguori <aliguori@us.ibm.com> | |
| 8 | - * | |
| 9 | - * This work is licensed under the terms of the GNU GPL, version 2. See | |
| 10 | - * the COPYING file in the top-level directory. | |
| 11 | - * | |
| 12 | - */ | |
| 13 | - | |
| 14 | -#ifndef QEMU_COMPATFD_H | |
| 15 | -#define QEMU_COMPATFD_H | |
| 16 | - | |
| 17 | -#include <signal.h> | |
| 18 | - | |
| 19 | -struct qemu_signalfd_siginfo { | |
| 20 | - uint32_t ssi_signo; | |
| 21 | - uint8_t pad[124]; | |
| 22 | -}; | |
| 23 | - | |
| 24 | -int qemu_signalfd(const sigset_t *mask); | |
| 25 | - | |
| 26 | -int qemu_eventfd(int *fds); | |
| 27 | - | |
| 28 | -#endif |
configure
| ... | ... | @@ -113,8 +113,6 @@ aio="yes" |
| 113 | 113 | nptl="yes" |
| 114 | 114 | mixemu="no" |
| 115 | 115 | bluez="yes" |
| 116 | -signalfd="no" | |
| 117 | -eventfd="no" | |
| 118 | 116 | |
| 119 | 117 | # OS specific |
| 120 | 118 | targetos=`uname -s` |
| ... | ... | @@ -930,33 +928,6 @@ EOF |
| 930 | 928 | fi |
| 931 | 929 | fi |
| 932 | 930 | |
| 933 | -########################################## | |
| 934 | -# signalfd probe | |
| 935 | -cat > $TMPC << EOF | |
| 936 | -#define _GNU_SOURCE | |
| 937 | -#include <unistd.h> | |
| 938 | -#include <sys/syscall.h> | |
| 939 | -#include <signal.h> | |
| 940 | -int main(void) { return syscall(SYS_signalfd, -1, NULL, _NSIG / 8); } | |
| 941 | -EOF | |
| 942 | - | |
| 943 | -if $cc $ARCH_CFLAGS -o $TMPE $TMPC 2> /dev/null ; then | |
| 944 | - signalfd=yes | |
| 945 | -fi | |
| 946 | - | |
| 947 | -########################################## | |
| 948 | -# eventfd probe | |
| 949 | -cat > $TMPC << EOF | |
| 950 | -#define _GNU_SOURCE | |
| 951 | -#include <unistd.h> | |
| 952 | -#include <sys/syscall.h> | |
| 953 | -int main(void) { return syscall(SYS_eventfd, 0); } | |
| 954 | -EOF | |
| 955 | - | |
| 956 | -if $cc $ARCH_CFLAGS -o $TMPE $TMPC 2> /dev/null ; then | |
| 957 | - eventfd=yes | |
| 958 | -fi | |
| 959 | - | |
| 960 | 931 | # Check if tools are available to build documentation. |
| 961 | 932 | if [ -x "`which texi2html 2>/dev/null`" ] && \ |
| 962 | 933 | [ -x "`which pod2man 2>/dev/null`" ]; then |
| ... | ... | @@ -1297,12 +1268,6 @@ if test "$aio" = "yes" ; then |
| 1297 | 1268 | echo "#define CONFIG_AIO 1" >> $config_h |
| 1298 | 1269 | echo "CONFIG_AIO=yes" >> $config_mak |
| 1299 | 1270 | fi |
| 1300 | -if test "$signalfd" = "yes" ; then | |
| 1301 | - echo "#define CONFIG_signalfd 1" >> $config_h | |
| 1302 | -fi | |
| 1303 | -if test "$eventfd" = "yes" ; then | |
| 1304 | - echo "#define CONFIG_eventfd 1" >> $config_h | |
| 1305 | -fi | |
| 1306 | 1271 | |
| 1307 | 1272 | # XXX: suppress that |
| 1308 | 1273 | if [ "$bsd" = "yes" ] ; then | ... | ... |
qemu-common.h
qemu-tool.c
vl.c
| ... | ... | @@ -7475,6 +7475,19 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) |
| 7475 | 7475 | return 0; |
| 7476 | 7476 | } |
| 7477 | 7477 | |
| 7478 | +void qemu_service_io(void) | |
| 7479 | +{ | |
| 7480 | + CPUState *env = cpu_single_env; | |
| 7481 | + if (env) { | |
| 7482 | + cpu_interrupt(env, CPU_INTERRUPT_EXIT); | |
| 7483 | +#ifdef USE_KQEMU | |
| 7484 | + if (env->kqemu_enabled) { | |
| 7485 | + kqemu_cpu_interrupt(env); | |
| 7486 | + } | |
| 7487 | +#endif | |
| 7488 | + } | |
| 7489 | +} | |
| 7490 | + | |
| 7478 | 7491 | /***********************************************************/ |
| 7479 | 7492 | /* bottom halves (can be seen as timers which expire ASAP) */ |
| 7480 | 7493 | ... | ... |