Commit baf35cb90204d75404892aa4e52628ae7a00669b
1 parent
27982661
Use signalfd() to work around signal/select race
This patch introduces signalfd() to work around the signal/select race in checking for AIO completions. For platforms that don't support signalfd(), we emulate it with threads. There was a long discussion about this approach. I don't believe there are any fundamental problems with this approach and I believe eliminating the use of signals is a good thing. I've tested Windows and Linux using Windows and Linux guests. I've also checked for disk IO performance regressions. Signed-off-by: Anthony Liguori <aliguori@us.ibm.com> git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@5187 c046a42c-6fe2-441c-8c8c-71466251a162
Showing
7 changed files
with
55 additions
and
110 deletions
Makefile
| @@ -177,7 +177,7 @@ QEMU_IMG_BLOCK_OBJS = $(BLOCK_OBJS) | @@ -177,7 +177,7 @@ QEMU_IMG_BLOCK_OBJS = $(BLOCK_OBJS) | ||
| 177 | ifdef CONFIG_WIN32 | 177 | ifdef CONFIG_WIN32 |
| 178 | QEMU_IMG_BLOCK_OBJS += qemu-img-block-raw-win32.o | 178 | QEMU_IMG_BLOCK_OBJS += qemu-img-block-raw-win32.o |
| 179 | else | 179 | else |
| 180 | -QEMU_IMG_BLOCK_OBJS += nbd.o qemu-img-block-raw-posix.o | 180 | +QEMU_IMG_BLOCK_OBJS += nbd.o qemu-img-block-raw-posix.o compatfd.o |
| 181 | endif | 181 | endif |
| 182 | 182 | ||
| 183 | ###################################################################### | 183 | ###################################################################### |
| @@ -195,7 +195,7 @@ qemu-nbd-%.o: %.c | @@ -195,7 +195,7 @@ qemu-nbd-%.o: %.c | ||
| 195 | $(CC) $(CFLAGS) $(CPPFLAGS) -DQEMU_NBD -c -o $@ $< | 195 | $(CC) $(CFLAGS) $(CPPFLAGS) -DQEMU_NBD -c -o $@ $< |
| 196 | 196 | ||
| 197 | qemu-nbd$(EXESUF): qemu-nbd.o qemu-nbd-nbd.o qemu-img-block.o \ | 197 | qemu-nbd$(EXESUF): qemu-nbd.o qemu-nbd-nbd.o qemu-img-block.o \ |
| 198 | - osdep.o qemu-nbd-block-raw-posix.o $(BLOCK_OBJS) | 198 | + osdep.o qemu-nbd-block-raw-posix.o compatfd.o $(BLOCK_OBJS) |
| 199 | $(CC) $(LDFLAGS) -o $@ $^ -lz $(LIBS) | 199 | $(CC) $(LDFLAGS) -o $@ $^ -lz $(LIBS) |
| 200 | 200 | ||
| 201 | # dyngen host tool | 201 | # dyngen host tool |
Makefile.target
| @@ -476,7 +476,7 @@ OBJS=vl.o osdep.o monitor.o pci.o loader.o isa_mmio.o machine.o net-checksum.o | @@ -476,7 +476,7 @@ OBJS=vl.o osdep.o monitor.o pci.o loader.o isa_mmio.o machine.o net-checksum.o | ||
| 476 | ifdef CONFIG_WIN32 | 476 | ifdef CONFIG_WIN32 |
| 477 | OBJS+=block-raw-win32.o | 477 | OBJS+=block-raw-win32.o |
| 478 | else | 478 | else |
| 479 | -OBJS+=block-raw-posix.o | 479 | +OBJS+=block-raw-posix.o compatfd.o |
| 480 | endif | 480 | endif |
| 481 | 481 | ||
| 482 | LIBS+=-lz | 482 | LIBS+=-lz |
block-raw-posix.c
| @@ -25,8 +25,10 @@ | @@ -25,8 +25,10 @@ | ||
| 25 | #if !defined(QEMU_IMG) && !defined(QEMU_NBD) | 25 | #if !defined(QEMU_IMG) && !defined(QEMU_NBD) |
| 26 | #include "qemu-timer.h" | 26 | #include "qemu-timer.h" |
| 27 | #include "exec-all.h" | 27 | #include "exec-all.h" |
| 28 | +#include "qemu-char.h" | ||
| 28 | #endif | 29 | #endif |
| 29 | #include "block_int.h" | 30 | #include "block_int.h" |
| 31 | +#include "compatfd.h" | ||
| 30 | #include <assert.h> | 32 | #include <assert.h> |
| 31 | #ifdef CONFIG_AIO | 33 | #ifdef CONFIG_AIO |
| 32 | #include <aio.h> | 34 | #include <aio.h> |
| @@ -438,52 +440,12 @@ typedef struct RawAIOCB { | @@ -438,52 +440,12 @@ typedef struct RawAIOCB { | ||
| 438 | int ret; | 440 | int ret; |
| 439 | } RawAIOCB; | 441 | } RawAIOCB; |
| 440 | 442 | ||
| 443 | +static int aio_sig_fd = -1; | ||
| 441 | static int aio_sig_num = SIGUSR2; | 444 | static int aio_sig_num = SIGUSR2; |
| 442 | static RawAIOCB *first_aio; /* AIO issued */ | 445 | static RawAIOCB *first_aio; /* AIO issued */ |
| 443 | static int aio_initialized = 0; | 446 | static int aio_initialized = 0; |
| 444 | 447 | ||
| 445 | -static void aio_signal_handler(int signum) | ||
| 446 | -{ | ||
| 447 | -#if !defined(QEMU_IMG) && !defined(QEMU_NBD) | ||
| 448 | - CPUState *env = cpu_single_env; | ||
| 449 | - if (env) { | ||
| 450 | - /* stop the currently executing cpu because a timer occured */ | ||
| 451 | - cpu_interrupt(env, CPU_INTERRUPT_EXIT); | ||
| 452 | -#ifdef USE_KQEMU | ||
| 453 | - if (env->kqemu_enabled) { | ||
| 454 | - kqemu_cpu_interrupt(env); | ||
| 455 | - } | ||
| 456 | -#endif | ||
| 457 | - } | ||
| 458 | -#endif | ||
| 459 | -} | ||
| 460 | - | ||
| 461 | -void qemu_aio_init(void) | ||
| 462 | -{ | ||
| 463 | - struct sigaction act; | ||
| 464 | - | ||
| 465 | - aio_initialized = 1; | ||
| 466 | - | ||
| 467 | - sigfillset(&act.sa_mask); | ||
| 468 | - act.sa_flags = 0; /* do not restart syscalls to interrupt select() */ | ||
| 469 | - act.sa_handler = aio_signal_handler; | ||
| 470 | - sigaction(aio_sig_num, &act, NULL); | ||
| 471 | - | ||
| 472 | -#if defined(__GLIBC__) && defined(__linux__) | ||
| 473 | - { | ||
| 474 | - /* XXX: aio thread exit seems to hang on RedHat 9 and this init | ||
| 475 | - seems to fix the problem. */ | ||
| 476 | - struct aioinit ai; | ||
| 477 | - memset(&ai, 0, sizeof(ai)); | ||
| 478 | - ai.aio_threads = 1; | ||
| 479 | - ai.aio_num = 1; | ||
| 480 | - ai.aio_idle_time = 365 * 100000; | ||
| 481 | - aio_init(&ai); | ||
| 482 | - } | ||
| 483 | -#endif | ||
| 484 | -} | ||
| 485 | - | ||
| 486 | -void qemu_aio_poll(void) | 448 | +static void qemu_aio_poll(void *opaque) |
| 487 | { | 449 | { |
| 488 | RawAIOCB *acb, **pacb; | 450 | RawAIOCB *acb, **pacb; |
| 489 | int ret; | 451 | int ret; |
| @@ -524,49 +486,66 @@ void qemu_aio_poll(void) | @@ -524,49 +486,66 @@ void qemu_aio_poll(void) | ||
| 524 | the_end: ; | 486 | the_end: ; |
| 525 | } | 487 | } |
| 526 | 488 | ||
| 489 | +void qemu_aio_init(void) | ||
| 490 | +{ | ||
| 491 | + sigset_t mask; | ||
| 492 | + | ||
| 493 | + aio_initialized = 1; | ||
| 494 | + | ||
| 495 | + /* Make sure to block AIO signal */ | ||
| 496 | + sigemptyset(&mask); | ||
| 497 | + sigaddset(&mask, aio_sig_num); | ||
| 498 | + sigprocmask(SIG_BLOCK, &mask, NULL); | ||
| 499 | + | ||
| 500 | + aio_sig_fd = qemu_signalfd(&mask); | ||
| 501 | +#if !defined(QEMU_IMG) && !defined(QEMU_NBD) | ||
| 502 | + qemu_set_fd_handler2(aio_sig_fd, NULL, qemu_aio_poll, NULL, NULL); | ||
| 503 | +#endif | ||
| 504 | + | ||
| 505 | +#if defined(__GLIBC__) && defined(__linux__) | ||
| 506 | + { | ||
| 507 | + /* XXX: aio thread exit seems to hang on RedHat 9 and this init | ||
| 508 | + seems to fix the problem. */ | ||
| 509 | + struct aioinit ai; | ||
| 510 | + memset(&ai, 0, sizeof(ai)); | ||
| 511 | + ai.aio_threads = 1; | ||
| 512 | + ai.aio_num = 1; | ||
| 513 | + ai.aio_idle_time = 365 * 100000; | ||
| 514 | + aio_init(&ai); | ||
| 515 | + } | ||
| 516 | +#endif | ||
| 517 | +} | ||
| 518 | + | ||
| 527 | /* Wait for all IO requests to complete. */ | 519 | /* Wait for all IO requests to complete. */ |
| 528 | void qemu_aio_flush(void) | 520 | void qemu_aio_flush(void) |
| 529 | { | 521 | { |
| 530 | - qemu_aio_wait_start(); | ||
| 531 | - qemu_aio_poll(); | 522 | + qemu_aio_poll(NULL); |
| 532 | while (first_aio) { | 523 | while (first_aio) { |
| 533 | qemu_aio_wait(); | 524 | qemu_aio_wait(); |
| 534 | } | 525 | } |
| 535 | - qemu_aio_wait_end(); | ||
| 536 | -} | ||
| 537 | - | ||
| 538 | -/* wait until at least one AIO was handled */ | ||
| 539 | -static sigset_t wait_oset; | ||
| 540 | - | ||
| 541 | -void qemu_aio_wait_start(void) | ||
| 542 | -{ | ||
| 543 | - sigset_t set; | ||
| 544 | - | ||
| 545 | - if (!aio_initialized) | ||
| 546 | - qemu_aio_init(); | ||
| 547 | - sigemptyset(&set); | ||
| 548 | - sigaddset(&set, aio_sig_num); | ||
| 549 | - sigprocmask(SIG_BLOCK, &set, &wait_oset); | ||
| 550 | } | 526 | } |
| 551 | 527 | ||
| 552 | void qemu_aio_wait(void) | 528 | void qemu_aio_wait(void) |
| 553 | { | 529 | { |
| 554 | - sigset_t set; | ||
| 555 | - int nb_sigs; | 530 | + int ret; |
| 556 | 531 | ||
| 557 | #if !defined(QEMU_IMG) && !defined(QEMU_NBD) | 532 | #if !defined(QEMU_IMG) && !defined(QEMU_NBD) |
| 558 | if (qemu_bh_poll()) | 533 | if (qemu_bh_poll()) |
| 559 | return; | 534 | return; |
| 560 | #endif | 535 | #endif |
| 561 | - sigemptyset(&set); | ||
| 562 | - sigaddset(&set, aio_sig_num); | ||
| 563 | - sigwait(&set, &nb_sigs); | ||
| 564 | - qemu_aio_poll(); | ||
| 565 | -} | ||
| 566 | 536 | ||
| 567 | -void qemu_aio_wait_end(void) | ||
| 568 | -{ | ||
| 569 | - sigprocmask(SIG_SETMASK, &wait_oset, NULL); | 537 | + do { |
| 538 | + fd_set rdfds; | ||
| 539 | + | ||
| 540 | + FD_ZERO(&rdfds); | ||
| 541 | + FD_SET(aio_sig_fd, &rdfds); | ||
| 542 | + | ||
| 543 | + ret = select(aio_sig_fd + 1, &rdfds, NULL, NULL, NULL); | ||
| 544 | + if (ret == -1 && errno == EINTR) | ||
| 545 | + continue; | ||
| 546 | + } while (ret == 0); | ||
| 547 | + | ||
| 548 | + qemu_aio_poll(NULL); | ||
| 570 | } | 549 | } |
| 571 | 550 | ||
| 572 | static RawAIOCB *raw_aio_setup(BlockDriverState *bs, | 551 | static RawAIOCB *raw_aio_setup(BlockDriverState *bs, |
| @@ -704,18 +683,10 @@ void qemu_aio_init(void) | @@ -704,18 +683,10 @@ void qemu_aio_init(void) | ||
| 704 | { | 683 | { |
| 705 | } | 684 | } |
| 706 | 685 | ||
| 707 | -void qemu_aio_poll(void) | ||
| 708 | -{ | ||
| 709 | -} | ||
| 710 | - | ||
| 711 | void qemu_aio_flush(void) | 686 | void qemu_aio_flush(void) |
| 712 | { | 687 | { |
| 713 | } | 688 | } |
| 714 | 689 | ||
| 715 | -void qemu_aio_wait_start(void) | ||
| 716 | -{ | ||
| 717 | -} | ||
| 718 | - | ||
| 719 | void qemu_aio_wait(void) | 690 | void qemu_aio_wait(void) |
| 720 | { | 691 | { |
| 721 | #if !defined(QEMU_IMG) && !defined(QEMU_NBD) | 692 | #if !defined(QEMU_IMG) && !defined(QEMU_NBD) |
| @@ -723,10 +694,6 @@ void qemu_aio_wait(void) | @@ -723,10 +694,6 @@ void qemu_aio_wait(void) | ||
| 723 | #endif | 694 | #endif |
| 724 | } | 695 | } |
| 725 | 696 | ||
| 726 | -void qemu_aio_wait_end(void) | ||
| 727 | -{ | ||
| 728 | -} | ||
| 729 | - | ||
| 730 | #endif /* CONFIG_AIO */ | 697 | #endif /* CONFIG_AIO */ |
| 731 | 698 | ||
| 732 | static void raw_close(BlockDriverState *bs) | 699 | static void raw_close(BlockDriverState *bs) |
block-raw-win32.c
| @@ -350,18 +350,10 @@ void qemu_aio_init(void) | @@ -350,18 +350,10 @@ void qemu_aio_init(void) | ||
| 350 | { | 350 | { |
| 351 | } | 351 | } |
| 352 | 352 | ||
| 353 | -void qemu_aio_poll(void) | ||
| 354 | -{ | ||
| 355 | -} | ||
| 356 | - | ||
| 357 | void qemu_aio_flush(void) | 353 | void qemu_aio_flush(void) |
| 358 | { | 354 | { |
| 359 | } | 355 | } |
| 360 | 356 | ||
| 361 | -void qemu_aio_wait_start(void) | ||
| 362 | -{ | ||
| 363 | -} | ||
| 364 | - | ||
| 365 | void qemu_aio_wait(void) | 357 | void qemu_aio_wait(void) |
| 366 | { | 358 | { |
| 367 | #ifndef QEMU_IMG | 359 | #ifndef QEMU_IMG |
| @@ -369,10 +361,6 @@ void qemu_aio_wait(void) | @@ -369,10 +361,6 @@ void qemu_aio_wait(void) | ||
| 369 | #endif | 361 | #endif |
| 370 | } | 362 | } |
| 371 | 363 | ||
| 372 | -void qemu_aio_wait_end(void) | ||
| 373 | -{ | ||
| 374 | -} | ||
| 375 | - | ||
| 376 | BlockDriver bdrv_raw = { | 364 | BlockDriver bdrv_raw = { |
| 377 | "raw", | 365 | "raw", |
| 378 | sizeof(BDRVRawState), | 366 | sizeof(BDRVRawState), |
block.c
| @@ -1280,17 +1280,15 @@ static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num, | @@ -1280,17 +1280,15 @@ static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num, | ||
| 1280 | BlockDriverAIOCB *acb; | 1280 | BlockDriverAIOCB *acb; |
| 1281 | 1281 | ||
| 1282 | async_ret = NOT_DONE; | 1282 | async_ret = NOT_DONE; |
| 1283 | - qemu_aio_wait_start(); | ||
| 1284 | acb = bdrv_aio_read(bs, sector_num, buf, nb_sectors, | 1283 | acb = bdrv_aio_read(bs, sector_num, buf, nb_sectors, |
| 1285 | bdrv_rw_em_cb, &async_ret); | 1284 | bdrv_rw_em_cb, &async_ret); |
| 1286 | - if (acb == NULL) { | ||
| 1287 | - qemu_aio_wait_end(); | 1285 | + if (acb == NULL) |
| 1288 | return -1; | 1286 | return -1; |
| 1289 | - } | 1287 | + |
| 1290 | while (async_ret == NOT_DONE) { | 1288 | while (async_ret == NOT_DONE) { |
| 1291 | qemu_aio_wait(); | 1289 | qemu_aio_wait(); |
| 1292 | } | 1290 | } |
| 1293 | - qemu_aio_wait_end(); | 1291 | + |
| 1294 | return async_ret; | 1292 | return async_ret; |
| 1295 | } | 1293 | } |
| 1296 | 1294 | ||
| @@ -1301,17 +1299,13 @@ static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num, | @@ -1301,17 +1299,13 @@ static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num, | ||
| 1301 | BlockDriverAIOCB *acb; | 1299 | BlockDriverAIOCB *acb; |
| 1302 | 1300 | ||
| 1303 | async_ret = NOT_DONE; | 1301 | async_ret = NOT_DONE; |
| 1304 | - qemu_aio_wait_start(); | ||
| 1305 | acb = bdrv_aio_write(bs, sector_num, buf, nb_sectors, | 1302 | acb = bdrv_aio_write(bs, sector_num, buf, nb_sectors, |
| 1306 | bdrv_rw_em_cb, &async_ret); | 1303 | bdrv_rw_em_cb, &async_ret); |
| 1307 | - if (acb == NULL) { | ||
| 1308 | - qemu_aio_wait_end(); | 1304 | + if (acb == NULL) |
| 1309 | return -1; | 1305 | return -1; |
| 1310 | - } | ||
| 1311 | while (async_ret == NOT_DONE) { | 1306 | while (async_ret == NOT_DONE) { |
| 1312 | qemu_aio_wait(); | 1307 | qemu_aio_wait(); |
| 1313 | } | 1308 | } |
| 1314 | - qemu_aio_wait_end(); | ||
| 1315 | return async_ret; | 1309 | return async_ret; |
| 1316 | } | 1310 | } |
| 1317 | 1311 |
block.h
| @@ -90,11 +90,8 @@ BlockDriverAIOCB *bdrv_aio_write(BlockDriverState *bs, int64_t sector_num, | @@ -90,11 +90,8 @@ BlockDriverAIOCB *bdrv_aio_write(BlockDriverState *bs, int64_t sector_num, | ||
| 90 | void bdrv_aio_cancel(BlockDriverAIOCB *acb); | 90 | void bdrv_aio_cancel(BlockDriverAIOCB *acb); |
| 91 | 91 | ||
| 92 | void qemu_aio_init(void); | 92 | void qemu_aio_init(void); |
| 93 | -void qemu_aio_poll(void); | ||
| 94 | void qemu_aio_flush(void); | 93 | void qemu_aio_flush(void); |
| 95 | -void qemu_aio_wait_start(void); | ||
| 96 | void qemu_aio_wait(void); | 94 | void qemu_aio_wait(void); |
| 97 | -void qemu_aio_wait_end(void); | ||
| 98 | 95 | ||
| 99 | int qemu_key_check(BlockDriverState *bs, const char *name); | 96 | int qemu_key_check(BlockDriverState *bs, const char *name); |
| 100 | 97 |
vl.c
| @@ -7482,7 +7482,6 @@ void main_loop_wait(int timeout) | @@ -7482,7 +7482,6 @@ void main_loop_wait(int timeout) | ||
| 7482 | slirp_select_poll(&rfds, &wfds, &xfds); | 7482 | slirp_select_poll(&rfds, &wfds, &xfds); |
| 7483 | } | 7483 | } |
| 7484 | #endif | 7484 | #endif |
| 7485 | - qemu_aio_poll(); | ||
| 7486 | 7485 | ||
| 7487 | if (vm_running) { | 7486 | if (vm_running) { |
| 7488 | if (likely(!(cur_cpu->singlestep_enabled & SSTEP_NOTIMER))) | 7487 | if (likely(!(cur_cpu->singlestep_enabled & SSTEP_NOTIMER))) |