Commit 9f7965c7e965c8b80da27048017a360b3c57c4af
1 parent
eeb438c1
Expand cache= option and use write-through caching by default
This patch changes the cache= option to accept none, writeback, or writethough to control the host page cache behavior. By default, writethrough caching is now used which internally is implemented by using O_DSYNC to open the disk images. When using -snapshot, writeback is used by default since data integrity it not at all an issue. cache=none has the same behavior as cache=off previously. The later syntax is still supported by now deprecated. I also cleaned up the O_DIRECT implementation to avoid many of the #ifdefs. Signed-off-by: Anthony Liguori <aliguori@us.ibm.com> git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@5485 c046a42c-6fe2-441c-8c8c-71466251a162
Showing
7 changed files
with
59 additions
and
38 deletions
block-raw-posix.c
... | ... | @@ -73,6 +73,11 @@ |
73 | 73 | #define DEBUG_BLOCK_PRINT(formatCstr, args...) |
74 | 74 | #endif |
75 | 75 | |
76 | +/* Approximate O_DIRECT with O_DSYNC if O_DIRECT isn't available */ | |
77 | +#ifndef O_DIRECT | |
78 | +#define O_DIRECT O_DSYNC | |
79 | +#endif | |
80 | + | |
76 | 81 | #define FTYPE_FILE 0 |
77 | 82 | #define FTYPE_CD 1 |
78 | 83 | #define FTYPE_FD 2 |
... | ... | @@ -101,9 +106,7 @@ typedef struct BDRVRawState { |
101 | 106 | int fd_got_error; |
102 | 107 | int fd_media_changed; |
103 | 108 | #endif |
104 | -#if defined(O_DIRECT) | |
105 | 109 | uint8_t* aligned_buf; |
106 | -#endif | |
107 | 110 | } BDRVRawState; |
108 | 111 | |
109 | 112 | static int posix_aio_init(void); |
... | ... | @@ -129,10 +132,13 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags) |
129 | 132 | } |
130 | 133 | if (flags & BDRV_O_CREAT) |
131 | 134 | open_flags |= O_CREAT | O_TRUNC; |
132 | -#ifdef O_DIRECT | |
133 | - if (flags & BDRV_O_DIRECT) | |
135 | + | |
136 | + /* Use O_DSYNC for write-through caching, no flags for write-back caching, | |
137 | + * and O_DIRECT for no caching. */ | |
138 | + if ((flags & BDRV_O_NOCACHE)) | |
134 | 139 | open_flags |= O_DIRECT; |
135 | -#endif | |
140 | + else if (!(flags & BDRV_O_CACHE_WB)) | |
141 | + open_flags |= O_DSYNC; | |
136 | 142 | |
137 | 143 | s->type = FTYPE_FILE; |
138 | 144 | |
... | ... | @@ -146,9 +152,8 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags) |
146 | 152 | s->fd = fd; |
147 | 153 | for (i = 0; i < RAW_FD_POOL_SIZE; i++) |
148 | 154 | s->fd_pool[i] = -1; |
149 | -#if defined(O_DIRECT) | |
150 | 155 | s->aligned_buf = NULL; |
151 | - if (flags & BDRV_O_DIRECT) { | |
156 | + if ((flags & BDRV_O_NOCACHE)) { | |
152 | 157 | s->aligned_buf = qemu_memalign(512, ALIGNED_BUFFER_SIZE); |
153 | 158 | if (s->aligned_buf == NULL) { |
154 | 159 | ret = -errno; |
... | ... | @@ -156,7 +161,6 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags) |
156 | 161 | return ret; |
157 | 162 | } |
158 | 163 | } |
159 | -#endif | |
160 | 164 | return 0; |
161 | 165 | } |
162 | 166 | |
... | ... | @@ -281,7 +285,6 @@ label__raw_write__success: |
281 | 285 | } |
282 | 286 | |
283 | 287 | |
284 | -#if defined(O_DIRECT) | |
285 | 288 | /* |
286 | 289 | * offset and count are in bytes and possibly not aligned. For files opened |
287 | 290 | * with O_DIRECT, necessary alignments are ensured before calling |
... | ... | @@ -432,12 +435,6 @@ static int raw_pwrite(BlockDriverState *bs, int64_t offset, |
432 | 435 | return raw_pwrite_aligned(bs, offset, buf, count) + sum; |
433 | 436 | } |
434 | 437 | |
435 | -#else | |
436 | -#define raw_pread raw_pread_aligned | |
437 | -#define raw_pwrite raw_pwrite_aligned | |
438 | -#endif | |
439 | - | |
440 | - | |
441 | 438 | #ifdef CONFIG_AIO |
442 | 439 | /***********************************************************/ |
443 | 440 | /* Unix AIO using POSIX AIO */ |
... | ... | @@ -661,7 +658,6 @@ static BlockDriverAIOCB *raw_aio_read(BlockDriverState *bs, |
661 | 658 | * If O_DIRECT is used and the buffer is not aligned fall back |
662 | 659 | * to synchronous IO. |
663 | 660 | */ |
664 | -#if defined(O_DIRECT) | |
665 | 661 | BDRVRawState *s = bs->opaque; |
666 | 662 | |
667 | 663 | if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) { |
... | ... | @@ -672,7 +668,6 @@ static BlockDriverAIOCB *raw_aio_read(BlockDriverState *bs, |
672 | 668 | qemu_bh_schedule(bh); |
673 | 669 | return &acb->common; |
674 | 670 | } |
675 | -#endif | |
676 | 671 | |
677 | 672 | acb = raw_aio_setup(bs, sector_num, buf, nb_sectors, cb, opaque); |
678 | 673 | if (!acb) |
... | ... | @@ -694,7 +689,6 @@ static BlockDriverAIOCB *raw_aio_write(BlockDriverState *bs, |
694 | 689 | * If O_DIRECT is used and the buffer is not aligned fall back |
695 | 690 | * to synchronous IO. |
696 | 691 | */ |
697 | -#if defined(O_DIRECT) | |
698 | 692 | BDRVRawState *s = bs->opaque; |
699 | 693 | |
700 | 694 | if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) { |
... | ... | @@ -705,7 +699,6 @@ static BlockDriverAIOCB *raw_aio_write(BlockDriverState *bs, |
705 | 699 | qemu_bh_schedule(bh); |
706 | 700 | return &acb->common; |
707 | 701 | } |
708 | -#endif | |
709 | 702 | |
710 | 703 | acb = raw_aio_setup(bs, sector_num, (uint8_t*)buf, nb_sectors, cb, opaque); |
711 | 704 | if (!acb) |
... | ... | @@ -770,10 +763,8 @@ static void raw_close(BlockDriverState *bs) |
770 | 763 | if (s->fd >= 0) { |
771 | 764 | close(s->fd); |
772 | 765 | s->fd = -1; |
773 | -#if defined(O_DIRECT) | |
774 | 766 | if (s->aligned_buf != NULL) |
775 | 767 | qemu_free(s->aligned_buf); |
776 | -#endif | |
777 | 768 | } |
778 | 769 | raw_close_fd_pool(s); |
779 | 770 | } |
... | ... | @@ -1003,10 +994,12 @@ static int hdev_open(BlockDriverState *bs, const char *filename, int flags) |
1003 | 994 | open_flags |= O_RDONLY; |
1004 | 995 | bs->read_only = 1; |
1005 | 996 | } |
1006 | -#ifdef O_DIRECT | |
1007 | - if (flags & BDRV_O_DIRECT) | |
997 | + /* Use O_DSYNC for write-through caching, no flags for write-back caching, | |
998 | + * and O_DIRECT for no caching. */ | |
999 | + if ((flags & BDRV_O_NOCACHE)) | |
1008 | 1000 | open_flags |= O_DIRECT; |
1009 | -#endif | |
1001 | + else if (!(flags & BDRV_O_CACHE_WB)) | |
1002 | + open_flags |= O_DSYNC; | |
1010 | 1003 | |
1011 | 1004 | s->type = FTYPE_FILE; |
1012 | 1005 | #if defined(__linux__) | ... | ... |
block-raw-win32.c
... | ... | @@ -104,8 +104,10 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags) |
104 | 104 | #else |
105 | 105 | overlapped = FILE_ATTRIBUTE_NORMAL; |
106 | 106 | #endif |
107 | - if (flags & BDRV_O_DIRECT) | |
107 | + if ((flags & BDRV_O_NOCACHE)) | |
108 | 108 | overlapped |= FILE_FLAG_NO_BUFFERING | FILE_FLAG_WRITE_THROUGH; |
109 | + else if (!(flags & BDRV_O_CACHE_WB)) | |
110 | + overlapped |= FILE_FLAG_WRITE_THROUGH; | |
109 | 111 | s->hfile = CreateFile(filename, access_flags, |
110 | 112 | FILE_SHARE_READ, NULL, |
111 | 113 | create_flags, overlapped, NULL); |
... | ... | @@ -440,8 +442,10 @@ static int hdev_open(BlockDriverState *bs, const char *filename, int flags) |
440 | 442 | #else |
441 | 443 | overlapped = FILE_ATTRIBUTE_NORMAL; |
442 | 444 | #endif |
443 | - if (flags & BDRV_O_DIRECT) | |
445 | + if ((flags & BDRV_O_NOCACHE)) | |
444 | 446 | overlapped |= FILE_FLAG_NO_BUFFERING | FILE_FLAG_WRITE_THROUGH; |
447 | + else if (!(flags & BDRV_O_CACHE_WB)) | |
448 | + overlapped |= FILE_FLAG_WRITE_THROUGH; | |
445 | 449 | s->hfile = CreateFile(filename, access_flags, |
446 | 450 | FILE_SHARE_READ, NULL, |
447 | 451 | create_flags, overlapped, NULL); | ... | ... |
block.c
... | ... | @@ -395,12 +395,12 @@ int bdrv_open2(BlockDriverState *bs, const char *filename, int flags, |
395 | 395 | /* Note: for compatibility, we open disk image files as RDWR, and |
396 | 396 | RDONLY as fallback */ |
397 | 397 | if (!(flags & BDRV_O_FILE)) |
398 | - open_flags = BDRV_O_RDWR | (flags & BDRV_O_DIRECT); | |
398 | + open_flags = BDRV_O_RDWR | (flags & BDRV_O_CACHE_MASK); | |
399 | 399 | else |
400 | 400 | open_flags = flags & ~(BDRV_O_FILE | BDRV_O_SNAPSHOT); |
401 | 401 | ret = drv->bdrv_open(bs, filename, open_flags); |
402 | 402 | if ((ret == -EACCES || ret == -EPERM) && !(flags & BDRV_O_FILE)) { |
403 | - ret = drv->bdrv_open(bs, filename, BDRV_O_RDONLY); | |
403 | + ret = drv->bdrv_open(bs, filename, open_flags & ~BDRV_O_RDWR); | |
404 | 404 | bs->read_only = 1; |
405 | 405 | } |
406 | 406 | if (ret < 0) { |
... | ... | @@ -427,7 +427,7 @@ int bdrv_open2(BlockDriverState *bs, const char *filename, int flags, |
427 | 427 | } |
428 | 428 | path_combine(backing_filename, sizeof(backing_filename), |
429 | 429 | filename, bs->backing_file); |
430 | - if (bdrv_open(bs->backing_hd, backing_filename, 0) < 0) | |
430 | + if (bdrv_open(bs->backing_hd, backing_filename, open_flags) < 0) | |
431 | 431 | goto fail; |
432 | 432 | } |
433 | 433 | ... | ... |
block.h
... | ... | @@ -47,7 +47,10 @@ typedef struct QEMUSnapshotInfo { |
47 | 47 | use a disk image format on top of |
48 | 48 | it (default for |
49 | 49 | bdrv_file_open()) */ |
50 | -#define BDRV_O_DIRECT 0x0020 | |
50 | +#define BDRV_O_NOCACHE 0x0020 /* do not use the host page cache */ | |
51 | +#define BDRV_O_CACHE_WB 0x0040 /* use write-back caching */ | |
52 | + | |
53 | +#define BDRV_O_CACHE_MASK (BDRV_O_NOCACHE | BDRV_O_CACHE_WB) | |
51 | 54 | |
52 | 55 | void bdrv_info(void); |
53 | 56 | void bdrv_info_stats(void); | ... | ... |
qemu-doc.texi
... | ... | @@ -267,13 +267,28 @@ These options have the same definition as they have in @option{-hdachs}. |
267 | 267 | @item snapshot=@var{snapshot} |
268 | 268 | @var{snapshot} is "on" or "off" and allows to enable snapshot for given drive (see @option{-snapshot}). |
269 | 269 | @item cache=@var{cache} |
270 | -@var{cache} is "on" or "off" and allows to disable host cache to access data. | |
270 | +@var{cache} is "none", "writeback", or "writethrough" and controls how the host cache is used to access block data. | |
271 | 271 | @item format=@var{format} |
272 | 272 | Specify which disk @var{format} will be used rather than detecting |
273 | 273 | the format. Can be used to specifiy format=raw to avoid interpreting |
274 | 274 | an untrusted format header. |
275 | 275 | @end table |
276 | 276 | |
277 | +By default, writethrough caching is used for all block device. This means that | |
278 | +the host page cache will be used to read and write data but write notification | |
279 | +will be sent to the guest only when the data has been reported as written by | |
280 | +the storage subsystem. | |
281 | + | |
282 | +Writeback caching will report data writes as completed as soon as the data is | |
283 | +present in the host page cache. This is safe as long as you trust your host. | |
284 | +If your host crashes or loses power, then the guest may experience data | |
285 | +corruption. When using the @option{-snapshot} option, writeback caching is | |
286 | +used by default. | |
287 | + | |
288 | +The host page can be avoided entirely with @option{cache=none}. This will | |
289 | +attempt to do disk IO directly to the guests memory. QEMU may still perform | |
290 | +an internal copy of the data. | |
291 | + | |
277 | 292 | Instead of @option{-cdrom} you can use: |
278 | 293 | @example |
279 | 294 | qemu -drive file=file,index=2,media=cdrom | ... | ... |
qemu-nbd.c
vl.c
... | ... | @@ -5648,10 +5648,12 @@ static int drive_init(struct drive_opt *arg, int snapshot, |
5648 | 5648 | } |
5649 | 5649 | |
5650 | 5650 | if (get_param_value(buf, sizeof(buf), "cache", str)) { |
5651 | - if (!strcmp(buf, "off")) | |
5651 | + if (!strcmp(buf, "off") || !strcmp(buf, "none")) | |
5652 | 5652 | cache = 0; |
5653 | - else if (!strcmp(buf, "on")) | |
5653 | + else if (!strcmp(buf, "writethrough")) | |
5654 | 5654 | cache = 1; |
5655 | + else if (!strcmp(buf, "writeback")) | |
5656 | + cache = 2; | |
5655 | 5657 | else { |
5656 | 5658 | fprintf(stderr, "qemu: invalid cache option\n"); |
5657 | 5659 | return -1; |
... | ... | @@ -5770,10 +5772,14 @@ static int drive_init(struct drive_opt *arg, int snapshot, |
5770 | 5772 | if (!file[0]) |
5771 | 5773 | return 0; |
5772 | 5774 | bdrv_flags = 0; |
5773 | - if (snapshot) | |
5775 | + if (snapshot) { | |
5774 | 5776 | bdrv_flags |= BDRV_O_SNAPSHOT; |
5775 | - if (!cache) | |
5776 | - bdrv_flags |= BDRV_O_DIRECT; | |
5777 | + cache = 2; /* always use write-back with snapshot */ | |
5778 | + } | |
5779 | + if (cache == 0) /* no caching */ | |
5780 | + bdrv_flags |= BDRV_O_NOCACHE; | |
5781 | + else if (cache == 2) /* write-back */ | |
5782 | + bdrv_flags |= BDRV_O_CACHE_WB; | |
5777 | 5783 | if (bdrv_open2(bdrv, file, bdrv_flags, drv) < 0 || qemu_key_check(bdrv, file)) { |
5778 | 5784 | fprintf(stderr, "qemu: could not open disk image %s\n", |
5779 | 5785 | file); |
... | ... | @@ -8145,7 +8151,7 @@ static void help(int exitcode) |
8145 | 8151 | "-cdrom file use 'file' as IDE cdrom image (cdrom is ide1 master)\n" |
8146 | 8152 | "-drive [file=file][,if=type][,bus=n][,unit=m][,media=d][,index=i]\n" |
8147 | 8153 | " [,cyls=c,heads=h,secs=s[,trans=t]][,snapshot=on|off]\n" |
8148 | - " [,cache=on|off][,format=f]\n" | |
8154 | + " [,cache=writethrough|writeback|none][,format=f]\n" | |
8149 | 8155 | " use 'file' as a drive image\n" |
8150 | 8156 | "-mtdblock file use 'file' as on-board Flash memory image\n" |
8151 | 8157 | "-sd file use 'file' as SecureDigital card image\n" | ... | ... |