Commit baf35cb90204d75404892aa4e52628ae7a00669b

Authored by aliguori
1 parent 27982661

Use signalfd() to work around signal/select race

This patch introduces signalfd() to work around the signal/select race in
checking for AIO completions.  For platforms that don't support signalfd(), we
emulate it with threads.

There was a long discussion about this approach.  I don't believe there are any
fundamental problems with this approach and I believe eliminating the use of
signals is a good thing.

I've tested Windows and Linux using Windows and Linux guests.  I've also checked
for disk IO performance regressions.

Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>



git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@5187 c046a42c-6fe2-441c-8c8c-71466251a162
Makefile
@@ -177,7 +177,7 @@ QEMU_IMG_BLOCK_OBJS = $(BLOCK_OBJS) @@ -177,7 +177,7 @@ QEMU_IMG_BLOCK_OBJS = $(BLOCK_OBJS)
177 ifdef CONFIG_WIN32 177 ifdef CONFIG_WIN32
178 QEMU_IMG_BLOCK_OBJS += qemu-img-block-raw-win32.o 178 QEMU_IMG_BLOCK_OBJS += qemu-img-block-raw-win32.o
179 else 179 else
180 -QEMU_IMG_BLOCK_OBJS += nbd.o qemu-img-block-raw-posix.o 180 +QEMU_IMG_BLOCK_OBJS += nbd.o qemu-img-block-raw-posix.o compatfd.o
181 endif 181 endif
182 182
183 ###################################################################### 183 ######################################################################
@@ -195,7 +195,7 @@ qemu-nbd-%.o: %.c @@ -195,7 +195,7 @@ qemu-nbd-%.o: %.c
195 $(CC) $(CFLAGS) $(CPPFLAGS) -DQEMU_NBD -c -o $@ $< 195 $(CC) $(CFLAGS) $(CPPFLAGS) -DQEMU_NBD -c -o $@ $<
196 196
197 qemu-nbd$(EXESUF): qemu-nbd.o qemu-nbd-nbd.o qemu-img-block.o \ 197 qemu-nbd$(EXESUF): qemu-nbd.o qemu-nbd-nbd.o qemu-img-block.o \
198 - osdep.o qemu-nbd-block-raw-posix.o $(BLOCK_OBJS) 198 + osdep.o qemu-nbd-block-raw-posix.o compatfd.o $(BLOCK_OBJS)
199 $(CC) $(LDFLAGS) -o $@ $^ -lz $(LIBS) 199 $(CC) $(LDFLAGS) -o $@ $^ -lz $(LIBS)
200 200
201 # dyngen host tool 201 # dyngen host tool
Makefile.target
@@ -476,7 +476,7 @@ OBJS=vl.o osdep.o monitor.o pci.o loader.o isa_mmio.o machine.o net-checksum.o @@ -476,7 +476,7 @@ OBJS=vl.o osdep.o monitor.o pci.o loader.o isa_mmio.o machine.o net-checksum.o
476 ifdef CONFIG_WIN32 476 ifdef CONFIG_WIN32
477 OBJS+=block-raw-win32.o 477 OBJS+=block-raw-win32.o
478 else 478 else
479 -OBJS+=block-raw-posix.o 479 +OBJS+=block-raw-posix.o compatfd.o
480 endif 480 endif
481 481
482 LIBS+=-lz 482 LIBS+=-lz
block-raw-posix.c
@@ -25,8 +25,10 @@ @@ -25,8 +25,10 @@
25 #if !defined(QEMU_IMG) && !defined(QEMU_NBD) 25 #if !defined(QEMU_IMG) && !defined(QEMU_NBD)
26 #include "qemu-timer.h" 26 #include "qemu-timer.h"
27 #include "exec-all.h" 27 #include "exec-all.h"
  28 +#include "qemu-char.h"
28 #endif 29 #endif
29 #include "block_int.h" 30 #include "block_int.h"
  31 +#include "compatfd.h"
30 #include <assert.h> 32 #include <assert.h>
31 #ifdef CONFIG_AIO 33 #ifdef CONFIG_AIO
32 #include <aio.h> 34 #include <aio.h>
@@ -438,52 +440,12 @@ typedef struct RawAIOCB { @@ -438,52 +440,12 @@ typedef struct RawAIOCB {
438 int ret; 440 int ret;
439 } RawAIOCB; 441 } RawAIOCB;
440 442
  443 +static int aio_sig_fd = -1;
441 static int aio_sig_num = SIGUSR2; 444 static int aio_sig_num = SIGUSR2;
442 static RawAIOCB *first_aio; /* AIO issued */ 445 static RawAIOCB *first_aio; /* AIO issued */
443 static int aio_initialized = 0; 446 static int aio_initialized = 0;
444 447
445 -static void aio_signal_handler(int signum)  
446 -{  
447 -#if !defined(QEMU_IMG) && !defined(QEMU_NBD)  
448 - CPUState *env = cpu_single_env;  
449 - if (env) {  
450 - /* stop the currently executing cpu because a timer occured */  
451 - cpu_interrupt(env, CPU_INTERRUPT_EXIT);  
452 -#ifdef USE_KQEMU  
453 - if (env->kqemu_enabled) {  
454 - kqemu_cpu_interrupt(env);  
455 - }  
456 -#endif  
457 - }  
458 -#endif  
459 -}  
460 -  
461 -void qemu_aio_init(void)  
462 -{  
463 - struct sigaction act;  
464 -  
465 - aio_initialized = 1;  
466 -  
467 - sigfillset(&act.sa_mask);  
468 - act.sa_flags = 0; /* do not restart syscalls to interrupt select() */  
469 - act.sa_handler = aio_signal_handler;  
470 - sigaction(aio_sig_num, &act, NULL);  
471 -  
472 -#if defined(__GLIBC__) && defined(__linux__)  
473 - {  
474 - /* XXX: aio thread exit seems to hang on RedHat 9 and this init  
475 - seems to fix the problem. */  
476 - struct aioinit ai;  
477 - memset(&ai, 0, sizeof(ai));  
478 - ai.aio_threads = 1;  
479 - ai.aio_num = 1;  
480 - ai.aio_idle_time = 365 * 100000;  
481 - aio_init(&ai);  
482 - }  
483 -#endif  
484 -}  
485 -  
486 -void qemu_aio_poll(void) 448 +static void qemu_aio_poll(void *opaque)
487 { 449 {
488 RawAIOCB *acb, **pacb; 450 RawAIOCB *acb, **pacb;
489 int ret; 451 int ret;
@@ -524,49 +486,66 @@ void qemu_aio_poll(void) @@ -524,49 +486,66 @@ void qemu_aio_poll(void)
524 the_end: ; 486 the_end: ;
525 } 487 }
526 488
  489 +void qemu_aio_init(void)
  490 +{
  491 + sigset_t mask;
  492 +
  493 + aio_initialized = 1;
  494 +
  495 + /* Make sure to block AIO signal */
  496 + sigemptyset(&mask);
  497 + sigaddset(&mask, aio_sig_num);
  498 + sigprocmask(SIG_BLOCK, &mask, NULL);
  499 +
  500 + aio_sig_fd = qemu_signalfd(&mask);
  501 +#if !defined(QEMU_IMG) && !defined(QEMU_NBD)
  502 + qemu_set_fd_handler2(aio_sig_fd, NULL, qemu_aio_poll, NULL, NULL);
  503 +#endif
  504 +
  505 +#if defined(__GLIBC__) && defined(__linux__)
  506 + {
  507 + /* XXX: aio thread exit seems to hang on RedHat 9 and this init
  508 + seems to fix the problem. */
  509 + struct aioinit ai;
  510 + memset(&ai, 0, sizeof(ai));
  511 + ai.aio_threads = 1;
  512 + ai.aio_num = 1;
  513 + ai.aio_idle_time = 365 * 100000;
  514 + aio_init(&ai);
  515 + }
  516 +#endif
  517 +}
  518 +
527 /* Wait for all IO requests to complete. */ 519 /* Wait for all IO requests to complete. */
528 void qemu_aio_flush(void) 520 void qemu_aio_flush(void)
529 { 521 {
530 - qemu_aio_wait_start();  
531 - qemu_aio_poll(); 522 + qemu_aio_poll(NULL);
532 while (first_aio) { 523 while (first_aio) {
533 qemu_aio_wait(); 524 qemu_aio_wait();
534 } 525 }
535 - qemu_aio_wait_end();  
536 -}  
537 -  
538 -/* wait until at least one AIO was handled */  
539 -static sigset_t wait_oset;  
540 -  
541 -void qemu_aio_wait_start(void)  
542 -{  
543 - sigset_t set;  
544 -  
545 - if (!aio_initialized)  
546 - qemu_aio_init();  
547 - sigemptyset(&set);  
548 - sigaddset(&set, aio_sig_num);  
549 - sigprocmask(SIG_BLOCK, &set, &wait_oset);  
550 } 526 }
551 527
552 void qemu_aio_wait(void) 528 void qemu_aio_wait(void)
553 { 529 {
554 - sigset_t set;  
555 - int nb_sigs; 530 + int ret;
556 531
557 #if !defined(QEMU_IMG) && !defined(QEMU_NBD) 532 #if !defined(QEMU_IMG) && !defined(QEMU_NBD)
558 if (qemu_bh_poll()) 533 if (qemu_bh_poll())
559 return; 534 return;
560 #endif 535 #endif
561 - sigemptyset(&set);  
562 - sigaddset(&set, aio_sig_num);  
563 - sigwait(&set, &nb_sigs);  
564 - qemu_aio_poll();  
565 -}  
566 536
567 -void qemu_aio_wait_end(void)  
568 -{  
569 - sigprocmask(SIG_SETMASK, &wait_oset, NULL); 537 + do {
  538 + fd_set rdfds;
  539 +
  540 + FD_ZERO(&rdfds);
  541 + FD_SET(aio_sig_fd, &rdfds);
  542 +
  543 + ret = select(aio_sig_fd + 1, &rdfds, NULL, NULL, NULL);
  544 + if (ret == -1 && errno == EINTR)
  545 + continue;
  546 + } while (ret == 0);
  547 +
  548 + qemu_aio_poll(NULL);
570 } 549 }
571 550
572 static RawAIOCB *raw_aio_setup(BlockDriverState *bs, 551 static RawAIOCB *raw_aio_setup(BlockDriverState *bs,
@@ -704,18 +683,10 @@ void qemu_aio_init(void) @@ -704,18 +683,10 @@ void qemu_aio_init(void)
704 { 683 {
705 } 684 }
706 685
707 -void qemu_aio_poll(void)  
708 -{  
709 -}  
710 -  
711 void qemu_aio_flush(void) 686 void qemu_aio_flush(void)
712 { 687 {
713 } 688 }
714 689
715 -void qemu_aio_wait_start(void)  
716 -{  
717 -}  
718 -  
719 void qemu_aio_wait(void) 690 void qemu_aio_wait(void)
720 { 691 {
721 #if !defined(QEMU_IMG) && !defined(QEMU_NBD) 692 #if !defined(QEMU_IMG) && !defined(QEMU_NBD)
@@ -723,10 +694,6 @@ void qemu_aio_wait(void) @@ -723,10 +694,6 @@ void qemu_aio_wait(void)
723 #endif 694 #endif
724 } 695 }
725 696
726 -void qemu_aio_wait_end(void)  
727 -{  
728 -}  
729 -  
730 #endif /* CONFIG_AIO */ 697 #endif /* CONFIG_AIO */
731 698
732 static void raw_close(BlockDriverState *bs) 699 static void raw_close(BlockDriverState *bs)
block-raw-win32.c
@@ -350,18 +350,10 @@ void qemu_aio_init(void) @@ -350,18 +350,10 @@ void qemu_aio_init(void)
350 { 350 {
351 } 351 }
352 352
353 -void qemu_aio_poll(void)  
354 -{  
355 -}  
356 -  
357 void qemu_aio_flush(void) 353 void qemu_aio_flush(void)
358 { 354 {
359 } 355 }
360 356
361 -void qemu_aio_wait_start(void)  
362 -{  
363 -}  
364 -  
365 void qemu_aio_wait(void) 357 void qemu_aio_wait(void)
366 { 358 {
367 #ifndef QEMU_IMG 359 #ifndef QEMU_IMG
@@ -369,10 +361,6 @@ void qemu_aio_wait(void) @@ -369,10 +361,6 @@ void qemu_aio_wait(void)
369 #endif 361 #endif
370 } 362 }
371 363
372 -void qemu_aio_wait_end(void)  
373 -{  
374 -}  
375 -  
376 BlockDriver bdrv_raw = { 364 BlockDriver bdrv_raw = {
377 "raw", 365 "raw",
378 sizeof(BDRVRawState), 366 sizeof(BDRVRawState),
@@ -1280,17 +1280,15 @@ static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num, @@ -1280,17 +1280,15 @@ static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
1280 BlockDriverAIOCB *acb; 1280 BlockDriverAIOCB *acb;
1281 1281
1282 async_ret = NOT_DONE; 1282 async_ret = NOT_DONE;
1283 - qemu_aio_wait_start();  
1284 acb = bdrv_aio_read(bs, sector_num, buf, nb_sectors, 1283 acb = bdrv_aio_read(bs, sector_num, buf, nb_sectors,
1285 bdrv_rw_em_cb, &async_ret); 1284 bdrv_rw_em_cb, &async_ret);
1286 - if (acb == NULL) {  
1287 - qemu_aio_wait_end(); 1285 + if (acb == NULL)
1288 return -1; 1286 return -1;
1289 - } 1287 +
1290 while (async_ret == NOT_DONE) { 1288 while (async_ret == NOT_DONE) {
1291 qemu_aio_wait(); 1289 qemu_aio_wait();
1292 } 1290 }
1293 - qemu_aio_wait_end(); 1291 +
1294 return async_ret; 1292 return async_ret;
1295 } 1293 }
1296 1294
@@ -1301,17 +1299,13 @@ static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num, @@ -1301,17 +1299,13 @@ static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
1301 BlockDriverAIOCB *acb; 1299 BlockDriverAIOCB *acb;
1302 1300
1303 async_ret = NOT_DONE; 1301 async_ret = NOT_DONE;
1304 - qemu_aio_wait_start();  
1305 acb = bdrv_aio_write(bs, sector_num, buf, nb_sectors, 1302 acb = bdrv_aio_write(bs, sector_num, buf, nb_sectors,
1306 bdrv_rw_em_cb, &async_ret); 1303 bdrv_rw_em_cb, &async_ret);
1307 - if (acb == NULL) {  
1308 - qemu_aio_wait_end(); 1304 + if (acb == NULL)
1309 return -1; 1305 return -1;
1310 - }  
1311 while (async_ret == NOT_DONE) { 1306 while (async_ret == NOT_DONE) {
1312 qemu_aio_wait(); 1307 qemu_aio_wait();
1313 } 1308 }
1314 - qemu_aio_wait_end();  
1315 return async_ret; 1309 return async_ret;
1316 } 1310 }
1317 1311
@@ -90,11 +90,8 @@ BlockDriverAIOCB *bdrv_aio_write(BlockDriverState *bs, int64_t sector_num, @@ -90,11 +90,8 @@ BlockDriverAIOCB *bdrv_aio_write(BlockDriverState *bs, int64_t sector_num,
90 void bdrv_aio_cancel(BlockDriverAIOCB *acb); 90 void bdrv_aio_cancel(BlockDriverAIOCB *acb);
91 91
92 void qemu_aio_init(void); 92 void qemu_aio_init(void);
93 -void qemu_aio_poll(void);  
94 void qemu_aio_flush(void); 93 void qemu_aio_flush(void);
95 -void qemu_aio_wait_start(void);  
96 void qemu_aio_wait(void); 94 void qemu_aio_wait(void);
97 -void qemu_aio_wait_end(void);  
98 95
99 int qemu_key_check(BlockDriverState *bs, const char *name); 96 int qemu_key_check(BlockDriverState *bs, const char *name);
100 97
@@ -7482,7 +7482,6 @@ void main_loop_wait(int timeout) @@ -7482,7 +7482,6 @@ void main_loop_wait(int timeout)
7482 slirp_select_poll(&rfds, &wfds, &xfds); 7482 slirp_select_poll(&rfds, &wfds, &xfds);
7483 } 7483 }
7484 #endif 7484 #endif
7485 - qemu_aio_poll();  
7486 7485
7487 if (vm_running) { 7486 if (vm_running) {
7488 if (likely(!(cur_cpu->singlestep_enabled & SSTEP_NOTIMER))) 7487 if (likely(!(cur_cpu->singlestep_enabled & SSTEP_NOTIMER)))