Commit 9f7965c7e965c8b80da27048017a360b3c57c4af
1 parent
eeb438c1
Expand cache= option and use write-through caching by default
This patch changes the cache= option to accept none, writeback, or writethough to control the host page cache behavior. By default, writethrough caching is now used which internally is implemented by using O_DSYNC to open the disk images. When using -snapshot, writeback is used by default since data integrity it not at all an issue. cache=none has the same behavior as cache=off previously. The later syntax is still supported by now deprecated. I also cleaned up the O_DIRECT implementation to avoid many of the #ifdefs. Signed-off-by: Anthony Liguori <aliguori@us.ibm.com> git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@5485 c046a42c-6fe2-441c-8c8c-71466251a162
Showing
7 changed files
with
59 additions
and
38 deletions
block-raw-posix.c
| ... | ... | @@ -73,6 +73,11 @@ |
| 73 | 73 | #define DEBUG_BLOCK_PRINT(formatCstr, args...) |
| 74 | 74 | #endif |
| 75 | 75 | |
| 76 | +/* Approximate O_DIRECT with O_DSYNC if O_DIRECT isn't available */ | |
| 77 | +#ifndef O_DIRECT | |
| 78 | +#define O_DIRECT O_DSYNC | |
| 79 | +#endif | |
| 80 | + | |
| 76 | 81 | #define FTYPE_FILE 0 |
| 77 | 82 | #define FTYPE_CD 1 |
| 78 | 83 | #define FTYPE_FD 2 |
| ... | ... | @@ -101,9 +106,7 @@ typedef struct BDRVRawState { |
| 101 | 106 | int fd_got_error; |
| 102 | 107 | int fd_media_changed; |
| 103 | 108 | #endif |
| 104 | -#if defined(O_DIRECT) | |
| 105 | 109 | uint8_t* aligned_buf; |
| 106 | -#endif | |
| 107 | 110 | } BDRVRawState; |
| 108 | 111 | |
| 109 | 112 | static int posix_aio_init(void); |
| ... | ... | @@ -129,10 +132,13 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags) |
| 129 | 132 | } |
| 130 | 133 | if (flags & BDRV_O_CREAT) |
| 131 | 134 | open_flags |= O_CREAT | O_TRUNC; |
| 132 | -#ifdef O_DIRECT | |
| 133 | - if (flags & BDRV_O_DIRECT) | |
| 135 | + | |
| 136 | + /* Use O_DSYNC for write-through caching, no flags for write-back caching, | |
| 137 | + * and O_DIRECT for no caching. */ | |
| 138 | + if ((flags & BDRV_O_NOCACHE)) | |
| 134 | 139 | open_flags |= O_DIRECT; |
| 135 | -#endif | |
| 140 | + else if (!(flags & BDRV_O_CACHE_WB)) | |
| 141 | + open_flags |= O_DSYNC; | |
| 136 | 142 | |
| 137 | 143 | s->type = FTYPE_FILE; |
| 138 | 144 | |
| ... | ... | @@ -146,9 +152,8 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags) |
| 146 | 152 | s->fd = fd; |
| 147 | 153 | for (i = 0; i < RAW_FD_POOL_SIZE; i++) |
| 148 | 154 | s->fd_pool[i] = -1; |
| 149 | -#if defined(O_DIRECT) | |
| 150 | 155 | s->aligned_buf = NULL; |
| 151 | - if (flags & BDRV_O_DIRECT) { | |
| 156 | + if ((flags & BDRV_O_NOCACHE)) { | |
| 152 | 157 | s->aligned_buf = qemu_memalign(512, ALIGNED_BUFFER_SIZE); |
| 153 | 158 | if (s->aligned_buf == NULL) { |
| 154 | 159 | ret = -errno; |
| ... | ... | @@ -156,7 +161,6 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags) |
| 156 | 161 | return ret; |
| 157 | 162 | } |
| 158 | 163 | } |
| 159 | -#endif | |
| 160 | 164 | return 0; |
| 161 | 165 | } |
| 162 | 166 | |
| ... | ... | @@ -281,7 +285,6 @@ label__raw_write__success: |
| 281 | 285 | } |
| 282 | 286 | |
| 283 | 287 | |
| 284 | -#if defined(O_DIRECT) | |
| 285 | 288 | /* |
| 286 | 289 | * offset and count are in bytes and possibly not aligned. For files opened |
| 287 | 290 | * with O_DIRECT, necessary alignments are ensured before calling |
| ... | ... | @@ -432,12 +435,6 @@ static int raw_pwrite(BlockDriverState *bs, int64_t offset, |
| 432 | 435 | return raw_pwrite_aligned(bs, offset, buf, count) + sum; |
| 433 | 436 | } |
| 434 | 437 | |
| 435 | -#else | |
| 436 | -#define raw_pread raw_pread_aligned | |
| 437 | -#define raw_pwrite raw_pwrite_aligned | |
| 438 | -#endif | |
| 439 | - | |
| 440 | - | |
| 441 | 438 | #ifdef CONFIG_AIO |
| 442 | 439 | /***********************************************************/ |
| 443 | 440 | /* Unix AIO using POSIX AIO */ |
| ... | ... | @@ -661,7 +658,6 @@ static BlockDriverAIOCB *raw_aio_read(BlockDriverState *bs, |
| 661 | 658 | * If O_DIRECT is used and the buffer is not aligned fall back |
| 662 | 659 | * to synchronous IO. |
| 663 | 660 | */ |
| 664 | -#if defined(O_DIRECT) | |
| 665 | 661 | BDRVRawState *s = bs->opaque; |
| 666 | 662 | |
| 667 | 663 | if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) { |
| ... | ... | @@ -672,7 +668,6 @@ static BlockDriverAIOCB *raw_aio_read(BlockDriverState *bs, |
| 672 | 668 | qemu_bh_schedule(bh); |
| 673 | 669 | return &acb->common; |
| 674 | 670 | } |
| 675 | -#endif | |
| 676 | 671 | |
| 677 | 672 | acb = raw_aio_setup(bs, sector_num, buf, nb_sectors, cb, opaque); |
| 678 | 673 | if (!acb) |
| ... | ... | @@ -694,7 +689,6 @@ static BlockDriverAIOCB *raw_aio_write(BlockDriverState *bs, |
| 694 | 689 | * If O_DIRECT is used and the buffer is not aligned fall back |
| 695 | 690 | * to synchronous IO. |
| 696 | 691 | */ |
| 697 | -#if defined(O_DIRECT) | |
| 698 | 692 | BDRVRawState *s = bs->opaque; |
| 699 | 693 | |
| 700 | 694 | if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) { |
| ... | ... | @@ -705,7 +699,6 @@ static BlockDriverAIOCB *raw_aio_write(BlockDriverState *bs, |
| 705 | 699 | qemu_bh_schedule(bh); |
| 706 | 700 | return &acb->common; |
| 707 | 701 | } |
| 708 | -#endif | |
| 709 | 702 | |
| 710 | 703 | acb = raw_aio_setup(bs, sector_num, (uint8_t*)buf, nb_sectors, cb, opaque); |
| 711 | 704 | if (!acb) |
| ... | ... | @@ -770,10 +763,8 @@ static void raw_close(BlockDriverState *bs) |
| 770 | 763 | if (s->fd >= 0) { |
| 771 | 764 | close(s->fd); |
| 772 | 765 | s->fd = -1; |
| 773 | -#if defined(O_DIRECT) | |
| 774 | 766 | if (s->aligned_buf != NULL) |
| 775 | 767 | qemu_free(s->aligned_buf); |
| 776 | -#endif | |
| 777 | 768 | } |
| 778 | 769 | raw_close_fd_pool(s); |
| 779 | 770 | } |
| ... | ... | @@ -1003,10 +994,12 @@ static int hdev_open(BlockDriverState *bs, const char *filename, int flags) |
| 1003 | 994 | open_flags |= O_RDONLY; |
| 1004 | 995 | bs->read_only = 1; |
| 1005 | 996 | } |
| 1006 | -#ifdef O_DIRECT | |
| 1007 | - if (flags & BDRV_O_DIRECT) | |
| 997 | + /* Use O_DSYNC for write-through caching, no flags for write-back caching, | |
| 998 | + * and O_DIRECT for no caching. */ | |
| 999 | + if ((flags & BDRV_O_NOCACHE)) | |
| 1008 | 1000 | open_flags |= O_DIRECT; |
| 1009 | -#endif | |
| 1001 | + else if (!(flags & BDRV_O_CACHE_WB)) | |
| 1002 | + open_flags |= O_DSYNC; | |
| 1010 | 1003 | |
| 1011 | 1004 | s->type = FTYPE_FILE; |
| 1012 | 1005 | #if defined(__linux__) | ... | ... |
block-raw-win32.c
| ... | ... | @@ -104,8 +104,10 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags) |
| 104 | 104 | #else |
| 105 | 105 | overlapped = FILE_ATTRIBUTE_NORMAL; |
| 106 | 106 | #endif |
| 107 | - if (flags & BDRV_O_DIRECT) | |
| 107 | + if ((flags & BDRV_O_NOCACHE)) | |
| 108 | 108 | overlapped |= FILE_FLAG_NO_BUFFERING | FILE_FLAG_WRITE_THROUGH; |
| 109 | + else if (!(flags & BDRV_O_CACHE_WB)) | |
| 110 | + overlapped |= FILE_FLAG_WRITE_THROUGH; | |
| 109 | 111 | s->hfile = CreateFile(filename, access_flags, |
| 110 | 112 | FILE_SHARE_READ, NULL, |
| 111 | 113 | create_flags, overlapped, NULL); |
| ... | ... | @@ -440,8 +442,10 @@ static int hdev_open(BlockDriverState *bs, const char *filename, int flags) |
| 440 | 442 | #else |
| 441 | 443 | overlapped = FILE_ATTRIBUTE_NORMAL; |
| 442 | 444 | #endif |
| 443 | - if (flags & BDRV_O_DIRECT) | |
| 445 | + if ((flags & BDRV_O_NOCACHE)) | |
| 444 | 446 | overlapped |= FILE_FLAG_NO_BUFFERING | FILE_FLAG_WRITE_THROUGH; |
| 447 | + else if (!(flags & BDRV_O_CACHE_WB)) | |
| 448 | + overlapped |= FILE_FLAG_WRITE_THROUGH; | |
| 445 | 449 | s->hfile = CreateFile(filename, access_flags, |
| 446 | 450 | FILE_SHARE_READ, NULL, |
| 447 | 451 | create_flags, overlapped, NULL); | ... | ... |
block.c
| ... | ... | @@ -395,12 +395,12 @@ int bdrv_open2(BlockDriverState *bs, const char *filename, int flags, |
| 395 | 395 | /* Note: for compatibility, we open disk image files as RDWR, and |
| 396 | 396 | RDONLY as fallback */ |
| 397 | 397 | if (!(flags & BDRV_O_FILE)) |
| 398 | - open_flags = BDRV_O_RDWR | (flags & BDRV_O_DIRECT); | |
| 398 | + open_flags = BDRV_O_RDWR | (flags & BDRV_O_CACHE_MASK); | |
| 399 | 399 | else |
| 400 | 400 | open_flags = flags & ~(BDRV_O_FILE | BDRV_O_SNAPSHOT); |
| 401 | 401 | ret = drv->bdrv_open(bs, filename, open_flags); |
| 402 | 402 | if ((ret == -EACCES || ret == -EPERM) && !(flags & BDRV_O_FILE)) { |
| 403 | - ret = drv->bdrv_open(bs, filename, BDRV_O_RDONLY); | |
| 403 | + ret = drv->bdrv_open(bs, filename, open_flags & ~BDRV_O_RDWR); | |
| 404 | 404 | bs->read_only = 1; |
| 405 | 405 | } |
| 406 | 406 | if (ret < 0) { |
| ... | ... | @@ -427,7 +427,7 @@ int bdrv_open2(BlockDriverState *bs, const char *filename, int flags, |
| 427 | 427 | } |
| 428 | 428 | path_combine(backing_filename, sizeof(backing_filename), |
| 429 | 429 | filename, bs->backing_file); |
| 430 | - if (bdrv_open(bs->backing_hd, backing_filename, 0) < 0) | |
| 430 | + if (bdrv_open(bs->backing_hd, backing_filename, open_flags) < 0) | |
| 431 | 431 | goto fail; |
| 432 | 432 | } |
| 433 | 433 | ... | ... |
block.h
| ... | ... | @@ -47,7 +47,10 @@ typedef struct QEMUSnapshotInfo { |
| 47 | 47 | use a disk image format on top of |
| 48 | 48 | it (default for |
| 49 | 49 | bdrv_file_open()) */ |
| 50 | -#define BDRV_O_DIRECT 0x0020 | |
| 50 | +#define BDRV_O_NOCACHE 0x0020 /* do not use the host page cache */ | |
| 51 | +#define BDRV_O_CACHE_WB 0x0040 /* use write-back caching */ | |
| 52 | + | |
| 53 | +#define BDRV_O_CACHE_MASK (BDRV_O_NOCACHE | BDRV_O_CACHE_WB) | |
| 51 | 54 | |
| 52 | 55 | void bdrv_info(void); |
| 53 | 56 | void bdrv_info_stats(void); | ... | ... |
qemu-doc.texi
| ... | ... | @@ -267,13 +267,28 @@ These options have the same definition as they have in @option{-hdachs}. |
| 267 | 267 | @item snapshot=@var{snapshot} |
| 268 | 268 | @var{snapshot} is "on" or "off" and allows to enable snapshot for given drive (see @option{-snapshot}). |
| 269 | 269 | @item cache=@var{cache} |
| 270 | -@var{cache} is "on" or "off" and allows to disable host cache to access data. | |
| 270 | +@var{cache} is "none", "writeback", or "writethrough" and controls how the host cache is used to access block data. | |
| 271 | 271 | @item format=@var{format} |
| 272 | 272 | Specify which disk @var{format} will be used rather than detecting |
| 273 | 273 | the format. Can be used to specifiy format=raw to avoid interpreting |
| 274 | 274 | an untrusted format header. |
| 275 | 275 | @end table |
| 276 | 276 | |
| 277 | +By default, writethrough caching is used for all block device. This means that | |
| 278 | +the host page cache will be used to read and write data but write notification | |
| 279 | +will be sent to the guest only when the data has been reported as written by | |
| 280 | +the storage subsystem. | |
| 281 | + | |
| 282 | +Writeback caching will report data writes as completed as soon as the data is | |
| 283 | +present in the host page cache. This is safe as long as you trust your host. | |
| 284 | +If your host crashes or loses power, then the guest may experience data | |
| 285 | +corruption. When using the @option{-snapshot} option, writeback caching is | |
| 286 | +used by default. | |
| 287 | + | |
| 288 | +The host page can be avoided entirely with @option{cache=none}. This will | |
| 289 | +attempt to do disk IO directly to the guests memory. QEMU may still perform | |
| 290 | +an internal copy of the data. | |
| 291 | + | |
| 277 | 292 | Instead of @option{-cdrom} you can use: |
| 278 | 293 | @example |
| 279 | 294 | qemu -drive file=file,index=2,media=cdrom | ... | ... |
qemu-nbd.c
vl.c
| ... | ... | @@ -5648,10 +5648,12 @@ static int drive_init(struct drive_opt *arg, int snapshot, |
| 5648 | 5648 | } |
| 5649 | 5649 | |
| 5650 | 5650 | if (get_param_value(buf, sizeof(buf), "cache", str)) { |
| 5651 | - if (!strcmp(buf, "off")) | |
| 5651 | + if (!strcmp(buf, "off") || !strcmp(buf, "none")) | |
| 5652 | 5652 | cache = 0; |
| 5653 | - else if (!strcmp(buf, "on")) | |
| 5653 | + else if (!strcmp(buf, "writethrough")) | |
| 5654 | 5654 | cache = 1; |
| 5655 | + else if (!strcmp(buf, "writeback")) | |
| 5656 | + cache = 2; | |
| 5655 | 5657 | else { |
| 5656 | 5658 | fprintf(stderr, "qemu: invalid cache option\n"); |
| 5657 | 5659 | return -1; |
| ... | ... | @@ -5770,10 +5772,14 @@ static int drive_init(struct drive_opt *arg, int snapshot, |
| 5770 | 5772 | if (!file[0]) |
| 5771 | 5773 | return 0; |
| 5772 | 5774 | bdrv_flags = 0; |
| 5773 | - if (snapshot) | |
| 5775 | + if (snapshot) { | |
| 5774 | 5776 | bdrv_flags |= BDRV_O_SNAPSHOT; |
| 5775 | - if (!cache) | |
| 5776 | - bdrv_flags |= BDRV_O_DIRECT; | |
| 5777 | + cache = 2; /* always use write-back with snapshot */ | |
| 5778 | + } | |
| 5779 | + if (cache == 0) /* no caching */ | |
| 5780 | + bdrv_flags |= BDRV_O_NOCACHE; | |
| 5781 | + else if (cache == 2) /* write-back */ | |
| 5782 | + bdrv_flags |= BDRV_O_CACHE_WB; | |
| 5777 | 5783 | if (bdrv_open2(bdrv, file, bdrv_flags, drv) < 0 || qemu_key_check(bdrv, file)) { |
| 5778 | 5784 | fprintf(stderr, "qemu: could not open disk image %s\n", |
| 5779 | 5785 | file); |
| ... | ... | @@ -8145,7 +8151,7 @@ static void help(int exitcode) |
| 8145 | 8151 | "-cdrom file use 'file' as IDE cdrom image (cdrom is ide1 master)\n" |
| 8146 | 8152 | "-drive [file=file][,if=type][,bus=n][,unit=m][,media=d][,index=i]\n" |
| 8147 | 8153 | " [,cyls=c,heads=h,secs=s[,trans=t]][,snapshot=on|off]\n" |
| 8148 | - " [,cache=on|off][,format=f]\n" | |
| 8154 | + " [,cache=writethrough|writeback|none][,format=f]\n" | |
| 8149 | 8155 | " use 'file' as a drive image\n" |
| 8150 | 8156 | "-mtdblock file use 'file' as on-board Flash memory image\n" |
| 8151 | 8157 | "-sd file use 'file' as SecureDigital card image\n" | ... | ... |