Commit baf35cb90204d75404892aa4e52628ae7a00669b
1 parent
27982661
Use signalfd() to work around signal/select race
This patch introduces signalfd() to work around the signal/select race in checking for AIO completions. For platforms that don't support signalfd(), we emulate it with threads. There was a long discussion about this approach. I don't believe there are any fundamental problems with this approach and I believe eliminating the use of signals is a good thing. I've tested Windows and Linux using Windows and Linux guests. I've also checked for disk IO performance regressions. Signed-off-by: Anthony Liguori <aliguori@us.ibm.com> git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@5187 c046a42c-6fe2-441c-8c8c-71466251a162
Showing
7 changed files
with
55 additions
and
110 deletions
Makefile
... | ... | @@ -177,7 +177,7 @@ QEMU_IMG_BLOCK_OBJS = $(BLOCK_OBJS) |
177 | 177 | ifdef CONFIG_WIN32 |
178 | 178 | QEMU_IMG_BLOCK_OBJS += qemu-img-block-raw-win32.o |
179 | 179 | else |
180 | -QEMU_IMG_BLOCK_OBJS += nbd.o qemu-img-block-raw-posix.o | |
180 | +QEMU_IMG_BLOCK_OBJS += nbd.o qemu-img-block-raw-posix.o compatfd.o | |
181 | 181 | endif |
182 | 182 | |
183 | 183 | ###################################################################### |
... | ... | @@ -195,7 +195,7 @@ qemu-nbd-%.o: %.c |
195 | 195 | $(CC) $(CFLAGS) $(CPPFLAGS) -DQEMU_NBD -c -o $@ $< |
196 | 196 | |
197 | 197 | qemu-nbd$(EXESUF): qemu-nbd.o qemu-nbd-nbd.o qemu-img-block.o \ |
198 | - osdep.o qemu-nbd-block-raw-posix.o $(BLOCK_OBJS) | |
198 | + osdep.o qemu-nbd-block-raw-posix.o compatfd.o $(BLOCK_OBJS) | |
199 | 199 | $(CC) $(LDFLAGS) -o $@ $^ -lz $(LIBS) |
200 | 200 | |
201 | 201 | # dyngen host tool | ... | ... |
Makefile.target
block-raw-posix.c
... | ... | @@ -25,8 +25,10 @@ |
25 | 25 | #if !defined(QEMU_IMG) && !defined(QEMU_NBD) |
26 | 26 | #include "qemu-timer.h" |
27 | 27 | #include "exec-all.h" |
28 | +#include "qemu-char.h" | |
28 | 29 | #endif |
29 | 30 | #include "block_int.h" |
31 | +#include "compatfd.h" | |
30 | 32 | #include <assert.h> |
31 | 33 | #ifdef CONFIG_AIO |
32 | 34 | #include <aio.h> |
... | ... | @@ -438,52 +440,12 @@ typedef struct RawAIOCB { |
438 | 440 | int ret; |
439 | 441 | } RawAIOCB; |
440 | 442 | |
443 | +static int aio_sig_fd = -1; | |
441 | 444 | static int aio_sig_num = SIGUSR2; |
442 | 445 | static RawAIOCB *first_aio; /* AIO issued */ |
443 | 446 | static int aio_initialized = 0; |
444 | 447 | |
445 | -static void aio_signal_handler(int signum) | |
446 | -{ | |
447 | -#if !defined(QEMU_IMG) && !defined(QEMU_NBD) | |
448 | - CPUState *env = cpu_single_env; | |
449 | - if (env) { | |
450 | - /* stop the currently executing cpu because a timer occured */ | |
451 | - cpu_interrupt(env, CPU_INTERRUPT_EXIT); | |
452 | -#ifdef USE_KQEMU | |
453 | - if (env->kqemu_enabled) { | |
454 | - kqemu_cpu_interrupt(env); | |
455 | - } | |
456 | -#endif | |
457 | - } | |
458 | -#endif | |
459 | -} | |
460 | - | |
461 | -void qemu_aio_init(void) | |
462 | -{ | |
463 | - struct sigaction act; | |
464 | - | |
465 | - aio_initialized = 1; | |
466 | - | |
467 | - sigfillset(&act.sa_mask); | |
468 | - act.sa_flags = 0; /* do not restart syscalls to interrupt select() */ | |
469 | - act.sa_handler = aio_signal_handler; | |
470 | - sigaction(aio_sig_num, &act, NULL); | |
471 | - | |
472 | -#if defined(__GLIBC__) && defined(__linux__) | |
473 | - { | |
474 | - /* XXX: aio thread exit seems to hang on RedHat 9 and this init | |
475 | - seems to fix the problem. */ | |
476 | - struct aioinit ai; | |
477 | - memset(&ai, 0, sizeof(ai)); | |
478 | - ai.aio_threads = 1; | |
479 | - ai.aio_num = 1; | |
480 | - ai.aio_idle_time = 365 * 100000; | |
481 | - aio_init(&ai); | |
482 | - } | |
483 | -#endif | |
484 | -} | |
485 | - | |
486 | -void qemu_aio_poll(void) | |
448 | +static void qemu_aio_poll(void *opaque) | |
487 | 449 | { |
488 | 450 | RawAIOCB *acb, **pacb; |
489 | 451 | int ret; |
... | ... | @@ -524,49 +486,66 @@ void qemu_aio_poll(void) |
524 | 486 | the_end: ; |
525 | 487 | } |
526 | 488 | |
489 | +void qemu_aio_init(void) | |
490 | +{ | |
491 | + sigset_t mask; | |
492 | + | |
493 | + aio_initialized = 1; | |
494 | + | |
495 | + /* Make sure to block AIO signal */ | |
496 | + sigemptyset(&mask); | |
497 | + sigaddset(&mask, aio_sig_num); | |
498 | + sigprocmask(SIG_BLOCK, &mask, NULL); | |
499 | + | |
500 | + aio_sig_fd = qemu_signalfd(&mask); | |
501 | +#if !defined(QEMU_IMG) && !defined(QEMU_NBD) | |
502 | + qemu_set_fd_handler2(aio_sig_fd, NULL, qemu_aio_poll, NULL, NULL); | |
503 | +#endif | |
504 | + | |
505 | +#if defined(__GLIBC__) && defined(__linux__) | |
506 | + { | |
507 | + /* XXX: aio thread exit seems to hang on RedHat 9 and this init | |
508 | + seems to fix the problem. */ | |
509 | + struct aioinit ai; | |
510 | + memset(&ai, 0, sizeof(ai)); | |
511 | + ai.aio_threads = 1; | |
512 | + ai.aio_num = 1; | |
513 | + ai.aio_idle_time = 365 * 100000; | |
514 | + aio_init(&ai); | |
515 | + } | |
516 | +#endif | |
517 | +} | |
518 | + | |
527 | 519 | /* Wait for all IO requests to complete. */ |
528 | 520 | void qemu_aio_flush(void) |
529 | 521 | { |
530 | - qemu_aio_wait_start(); | |
531 | - qemu_aio_poll(); | |
522 | + qemu_aio_poll(NULL); | |
532 | 523 | while (first_aio) { |
533 | 524 | qemu_aio_wait(); |
534 | 525 | } |
535 | - qemu_aio_wait_end(); | |
536 | -} | |
537 | - | |
538 | -/* wait until at least one AIO was handled */ | |
539 | -static sigset_t wait_oset; | |
540 | - | |
541 | -void qemu_aio_wait_start(void) | |
542 | -{ | |
543 | - sigset_t set; | |
544 | - | |
545 | - if (!aio_initialized) | |
546 | - qemu_aio_init(); | |
547 | - sigemptyset(&set); | |
548 | - sigaddset(&set, aio_sig_num); | |
549 | - sigprocmask(SIG_BLOCK, &set, &wait_oset); | |
550 | 526 | } |
551 | 527 | |
552 | 528 | void qemu_aio_wait(void) |
553 | 529 | { |
554 | - sigset_t set; | |
555 | - int nb_sigs; | |
530 | + int ret; | |
556 | 531 | |
557 | 532 | #if !defined(QEMU_IMG) && !defined(QEMU_NBD) |
558 | 533 | if (qemu_bh_poll()) |
559 | 534 | return; |
560 | 535 | #endif |
561 | - sigemptyset(&set); | |
562 | - sigaddset(&set, aio_sig_num); | |
563 | - sigwait(&set, &nb_sigs); | |
564 | - qemu_aio_poll(); | |
565 | -} | |
566 | 536 | |
567 | -void qemu_aio_wait_end(void) | |
568 | -{ | |
569 | - sigprocmask(SIG_SETMASK, &wait_oset, NULL); | |
537 | + do { | |
538 | + fd_set rdfds; | |
539 | + | |
540 | + FD_ZERO(&rdfds); | |
541 | + FD_SET(aio_sig_fd, &rdfds); | |
542 | + | |
543 | + ret = select(aio_sig_fd + 1, &rdfds, NULL, NULL, NULL); | |
544 | + if (ret == -1 && errno == EINTR) | |
545 | + continue; | |
546 | + } while (ret == 0); | |
547 | + | |
548 | + qemu_aio_poll(NULL); | |
570 | 549 | } |
571 | 550 | |
572 | 551 | static RawAIOCB *raw_aio_setup(BlockDriverState *bs, |
... | ... | @@ -704,18 +683,10 @@ void qemu_aio_init(void) |
704 | 683 | { |
705 | 684 | } |
706 | 685 | |
707 | -void qemu_aio_poll(void) | |
708 | -{ | |
709 | -} | |
710 | - | |
711 | 686 | void qemu_aio_flush(void) |
712 | 687 | { |
713 | 688 | } |
714 | 689 | |
715 | -void qemu_aio_wait_start(void) | |
716 | -{ | |
717 | -} | |
718 | - | |
719 | 690 | void qemu_aio_wait(void) |
720 | 691 | { |
721 | 692 | #if !defined(QEMU_IMG) && !defined(QEMU_NBD) |
... | ... | @@ -723,10 +694,6 @@ void qemu_aio_wait(void) |
723 | 694 | #endif |
724 | 695 | } |
725 | 696 | |
726 | -void qemu_aio_wait_end(void) | |
727 | -{ | |
728 | -} | |
729 | - | |
730 | 697 | #endif /* CONFIG_AIO */ |
731 | 698 | |
732 | 699 | static void raw_close(BlockDriverState *bs) | ... | ... |
block-raw-win32.c
... | ... | @@ -350,18 +350,10 @@ void qemu_aio_init(void) |
350 | 350 | { |
351 | 351 | } |
352 | 352 | |
353 | -void qemu_aio_poll(void) | |
354 | -{ | |
355 | -} | |
356 | - | |
357 | 353 | void qemu_aio_flush(void) |
358 | 354 | { |
359 | 355 | } |
360 | 356 | |
361 | -void qemu_aio_wait_start(void) | |
362 | -{ | |
363 | -} | |
364 | - | |
365 | 357 | void qemu_aio_wait(void) |
366 | 358 | { |
367 | 359 | #ifndef QEMU_IMG |
... | ... | @@ -369,10 +361,6 @@ void qemu_aio_wait(void) |
369 | 361 | #endif |
370 | 362 | } |
371 | 363 | |
372 | -void qemu_aio_wait_end(void) | |
373 | -{ | |
374 | -} | |
375 | - | |
376 | 364 | BlockDriver bdrv_raw = { |
377 | 365 | "raw", |
378 | 366 | sizeof(BDRVRawState), | ... | ... |
block.c
... | ... | @@ -1280,17 +1280,15 @@ static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num, |
1280 | 1280 | BlockDriverAIOCB *acb; |
1281 | 1281 | |
1282 | 1282 | async_ret = NOT_DONE; |
1283 | - qemu_aio_wait_start(); | |
1284 | 1283 | acb = bdrv_aio_read(bs, sector_num, buf, nb_sectors, |
1285 | 1284 | bdrv_rw_em_cb, &async_ret); |
1286 | - if (acb == NULL) { | |
1287 | - qemu_aio_wait_end(); | |
1285 | + if (acb == NULL) | |
1288 | 1286 | return -1; |
1289 | - } | |
1287 | + | |
1290 | 1288 | while (async_ret == NOT_DONE) { |
1291 | 1289 | qemu_aio_wait(); |
1292 | 1290 | } |
1293 | - qemu_aio_wait_end(); | |
1291 | + | |
1294 | 1292 | return async_ret; |
1295 | 1293 | } |
1296 | 1294 | |
... | ... | @@ -1301,17 +1299,13 @@ static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num, |
1301 | 1299 | BlockDriverAIOCB *acb; |
1302 | 1300 | |
1303 | 1301 | async_ret = NOT_DONE; |
1304 | - qemu_aio_wait_start(); | |
1305 | 1302 | acb = bdrv_aio_write(bs, sector_num, buf, nb_sectors, |
1306 | 1303 | bdrv_rw_em_cb, &async_ret); |
1307 | - if (acb == NULL) { | |
1308 | - qemu_aio_wait_end(); | |
1304 | + if (acb == NULL) | |
1309 | 1305 | return -1; |
1310 | - } | |
1311 | 1306 | while (async_ret == NOT_DONE) { |
1312 | 1307 | qemu_aio_wait(); |
1313 | 1308 | } |
1314 | - qemu_aio_wait_end(); | |
1315 | 1309 | return async_ret; |
1316 | 1310 | } |
1317 | 1311 | ... | ... |
block.h
... | ... | @@ -90,11 +90,8 @@ BlockDriverAIOCB *bdrv_aio_write(BlockDriverState *bs, int64_t sector_num, |
90 | 90 | void bdrv_aio_cancel(BlockDriverAIOCB *acb); |
91 | 91 | |
92 | 92 | void qemu_aio_init(void); |
93 | -void qemu_aio_poll(void); | |
94 | 93 | void qemu_aio_flush(void); |
95 | -void qemu_aio_wait_start(void); | |
96 | 94 | void qemu_aio_wait(void); |
97 | -void qemu_aio_wait_end(void); | |
98 | 95 | |
99 | 96 | int qemu_key_check(BlockDriverState *bs, const char *name); |
100 | 97 | ... | ... |
vl.c