Commit 9f7965c7e965c8b80da27048017a360b3c57c4af

Authored by aliguori
1 parent eeb438c1

Expand cache= option and use write-through caching by default

This patch changes the cache= option to accept none, writeback, or writethough
to control the host page cache behavior.  By default, writethrough caching is
now used which internally is implemented by using O_DSYNC to open the disk
images.  When using -snapshot, writeback is used by default since data integrity
it not at all an issue.

cache=none has the same behavior as cache=off previously.  The later syntax is
still supported by now deprecated.  I also cleaned up the O_DIRECT
implementation to avoid many of the #ifdefs.

Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>



git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@5485 c046a42c-6fe2-441c-8c8c-71466251a162
block-raw-posix.c
@@ -73,6 +73,11 @@ @@ -73,6 +73,11 @@
73 #define DEBUG_BLOCK_PRINT(formatCstr, args...) 73 #define DEBUG_BLOCK_PRINT(formatCstr, args...)
74 #endif 74 #endif
75 75
  76 +/* Approximate O_DIRECT with O_DSYNC if O_DIRECT isn't available */
  77 +#ifndef O_DIRECT
  78 +#define O_DIRECT O_DSYNC
  79 +#endif
  80 +
76 #define FTYPE_FILE 0 81 #define FTYPE_FILE 0
77 #define FTYPE_CD 1 82 #define FTYPE_CD 1
78 #define FTYPE_FD 2 83 #define FTYPE_FD 2
@@ -101,9 +106,7 @@ typedef struct BDRVRawState { @@ -101,9 +106,7 @@ typedef struct BDRVRawState {
101 int fd_got_error; 106 int fd_got_error;
102 int fd_media_changed; 107 int fd_media_changed;
103 #endif 108 #endif
104 -#if defined(O_DIRECT)  
105 uint8_t* aligned_buf; 109 uint8_t* aligned_buf;
106 -#endif  
107 } BDRVRawState; 110 } BDRVRawState;
108 111
109 static int posix_aio_init(void); 112 static int posix_aio_init(void);
@@ -129,10 +132,13 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags) @@ -129,10 +132,13 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags)
129 } 132 }
130 if (flags & BDRV_O_CREAT) 133 if (flags & BDRV_O_CREAT)
131 open_flags |= O_CREAT | O_TRUNC; 134 open_flags |= O_CREAT | O_TRUNC;
132 -#ifdef O_DIRECT  
133 - if (flags & BDRV_O_DIRECT) 135 +
  136 + /* Use O_DSYNC for write-through caching, no flags for write-back caching,
  137 + * and O_DIRECT for no caching. */
  138 + if ((flags & BDRV_O_NOCACHE))
134 open_flags |= O_DIRECT; 139 open_flags |= O_DIRECT;
135 -#endif 140 + else if (!(flags & BDRV_O_CACHE_WB))
  141 + open_flags |= O_DSYNC;
136 142
137 s->type = FTYPE_FILE; 143 s->type = FTYPE_FILE;
138 144
@@ -146,9 +152,8 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags) @@ -146,9 +152,8 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags)
146 s->fd = fd; 152 s->fd = fd;
147 for (i = 0; i < RAW_FD_POOL_SIZE; i++) 153 for (i = 0; i < RAW_FD_POOL_SIZE; i++)
148 s->fd_pool[i] = -1; 154 s->fd_pool[i] = -1;
149 -#if defined(O_DIRECT)  
150 s->aligned_buf = NULL; 155 s->aligned_buf = NULL;
151 - if (flags & BDRV_O_DIRECT) { 156 + if ((flags & BDRV_O_NOCACHE)) {
152 s->aligned_buf = qemu_memalign(512, ALIGNED_BUFFER_SIZE); 157 s->aligned_buf = qemu_memalign(512, ALIGNED_BUFFER_SIZE);
153 if (s->aligned_buf == NULL) { 158 if (s->aligned_buf == NULL) {
154 ret = -errno; 159 ret = -errno;
@@ -156,7 +161,6 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags) @@ -156,7 +161,6 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags)
156 return ret; 161 return ret;
157 } 162 }
158 } 163 }
159 -#endif  
160 return 0; 164 return 0;
161 } 165 }
162 166
@@ -281,7 +285,6 @@ label__raw_write__success: @@ -281,7 +285,6 @@ label__raw_write__success:
281 } 285 }
282 286
283 287
284 -#if defined(O_DIRECT)  
285 /* 288 /*
286 * offset and count are in bytes and possibly not aligned. For files opened 289 * offset and count are in bytes and possibly not aligned. For files opened
287 * with O_DIRECT, necessary alignments are ensured before calling 290 * with O_DIRECT, necessary alignments are ensured before calling
@@ -432,12 +435,6 @@ static int raw_pwrite(BlockDriverState *bs, int64_t offset, @@ -432,12 +435,6 @@ static int raw_pwrite(BlockDriverState *bs, int64_t offset,
432 return raw_pwrite_aligned(bs, offset, buf, count) + sum; 435 return raw_pwrite_aligned(bs, offset, buf, count) + sum;
433 } 436 }
434 437
435 -#else  
436 -#define raw_pread raw_pread_aligned  
437 -#define raw_pwrite raw_pwrite_aligned  
438 -#endif  
439 -  
440 -  
441 #ifdef CONFIG_AIO 438 #ifdef CONFIG_AIO
442 /***********************************************************/ 439 /***********************************************************/
443 /* Unix AIO using POSIX AIO */ 440 /* Unix AIO using POSIX AIO */
@@ -661,7 +658,6 @@ static BlockDriverAIOCB *raw_aio_read(BlockDriverState *bs, @@ -661,7 +658,6 @@ static BlockDriverAIOCB *raw_aio_read(BlockDriverState *bs,
661 * If O_DIRECT is used and the buffer is not aligned fall back 658 * If O_DIRECT is used and the buffer is not aligned fall back
662 * to synchronous IO. 659 * to synchronous IO.
663 */ 660 */
664 -#if defined(O_DIRECT)  
665 BDRVRawState *s = bs->opaque; 661 BDRVRawState *s = bs->opaque;
666 662
667 if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) { 663 if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) {
@@ -672,7 +668,6 @@ static BlockDriverAIOCB *raw_aio_read(BlockDriverState *bs, @@ -672,7 +668,6 @@ static BlockDriverAIOCB *raw_aio_read(BlockDriverState *bs,
672 qemu_bh_schedule(bh); 668 qemu_bh_schedule(bh);
673 return &acb->common; 669 return &acb->common;
674 } 670 }
675 -#endif  
676 671
677 acb = raw_aio_setup(bs, sector_num, buf, nb_sectors, cb, opaque); 672 acb = raw_aio_setup(bs, sector_num, buf, nb_sectors, cb, opaque);
678 if (!acb) 673 if (!acb)
@@ -694,7 +689,6 @@ static BlockDriverAIOCB *raw_aio_write(BlockDriverState *bs, @@ -694,7 +689,6 @@ static BlockDriverAIOCB *raw_aio_write(BlockDriverState *bs,
694 * If O_DIRECT is used and the buffer is not aligned fall back 689 * If O_DIRECT is used and the buffer is not aligned fall back
695 * to synchronous IO. 690 * to synchronous IO.
696 */ 691 */
697 -#if defined(O_DIRECT)  
698 BDRVRawState *s = bs->opaque; 692 BDRVRawState *s = bs->opaque;
699 693
700 if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) { 694 if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) {
@@ -705,7 +699,6 @@ static BlockDriverAIOCB *raw_aio_write(BlockDriverState *bs, @@ -705,7 +699,6 @@ static BlockDriverAIOCB *raw_aio_write(BlockDriverState *bs,
705 qemu_bh_schedule(bh); 699 qemu_bh_schedule(bh);
706 return &acb->common; 700 return &acb->common;
707 } 701 }
708 -#endif  
709 702
710 acb = raw_aio_setup(bs, sector_num, (uint8_t*)buf, nb_sectors, cb, opaque); 703 acb = raw_aio_setup(bs, sector_num, (uint8_t*)buf, nb_sectors, cb, opaque);
711 if (!acb) 704 if (!acb)
@@ -770,10 +763,8 @@ static void raw_close(BlockDriverState *bs) @@ -770,10 +763,8 @@ static void raw_close(BlockDriverState *bs)
770 if (s->fd >= 0) { 763 if (s->fd >= 0) {
771 close(s->fd); 764 close(s->fd);
772 s->fd = -1; 765 s->fd = -1;
773 -#if defined(O_DIRECT)  
774 if (s->aligned_buf != NULL) 766 if (s->aligned_buf != NULL)
775 qemu_free(s->aligned_buf); 767 qemu_free(s->aligned_buf);
776 -#endif  
777 } 768 }
778 raw_close_fd_pool(s); 769 raw_close_fd_pool(s);
779 } 770 }
@@ -1003,10 +994,12 @@ static int hdev_open(BlockDriverState *bs, const char *filename, int flags) @@ -1003,10 +994,12 @@ static int hdev_open(BlockDriverState *bs, const char *filename, int flags)
1003 open_flags |= O_RDONLY; 994 open_flags |= O_RDONLY;
1004 bs->read_only = 1; 995 bs->read_only = 1;
1005 } 996 }
1006 -#ifdef O_DIRECT  
1007 - if (flags & BDRV_O_DIRECT) 997 + /* Use O_DSYNC for write-through caching, no flags for write-back caching,
  998 + * and O_DIRECT for no caching. */
  999 + if ((flags & BDRV_O_NOCACHE))
1008 open_flags |= O_DIRECT; 1000 open_flags |= O_DIRECT;
1009 -#endif 1001 + else if (!(flags & BDRV_O_CACHE_WB))
  1002 + open_flags |= O_DSYNC;
1010 1003
1011 s->type = FTYPE_FILE; 1004 s->type = FTYPE_FILE;
1012 #if defined(__linux__) 1005 #if defined(__linux__)
block-raw-win32.c
@@ -104,8 +104,10 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags) @@ -104,8 +104,10 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags)
104 #else 104 #else
105 overlapped = FILE_ATTRIBUTE_NORMAL; 105 overlapped = FILE_ATTRIBUTE_NORMAL;
106 #endif 106 #endif
107 - if (flags & BDRV_O_DIRECT) 107 + if ((flags & BDRV_O_NOCACHE))
108 overlapped |= FILE_FLAG_NO_BUFFERING | FILE_FLAG_WRITE_THROUGH; 108 overlapped |= FILE_FLAG_NO_BUFFERING | FILE_FLAG_WRITE_THROUGH;
  109 + else if (!(flags & BDRV_O_CACHE_WB))
  110 + overlapped |= FILE_FLAG_WRITE_THROUGH;
109 s->hfile = CreateFile(filename, access_flags, 111 s->hfile = CreateFile(filename, access_flags,
110 FILE_SHARE_READ, NULL, 112 FILE_SHARE_READ, NULL,
111 create_flags, overlapped, NULL); 113 create_flags, overlapped, NULL);
@@ -440,8 +442,10 @@ static int hdev_open(BlockDriverState *bs, const char *filename, int flags) @@ -440,8 +442,10 @@ static int hdev_open(BlockDriverState *bs, const char *filename, int flags)
440 #else 442 #else
441 overlapped = FILE_ATTRIBUTE_NORMAL; 443 overlapped = FILE_ATTRIBUTE_NORMAL;
442 #endif 444 #endif
443 - if (flags & BDRV_O_DIRECT) 445 + if ((flags & BDRV_O_NOCACHE))
444 overlapped |= FILE_FLAG_NO_BUFFERING | FILE_FLAG_WRITE_THROUGH; 446 overlapped |= FILE_FLAG_NO_BUFFERING | FILE_FLAG_WRITE_THROUGH;
  447 + else if (!(flags & BDRV_O_CACHE_WB))
  448 + overlapped |= FILE_FLAG_WRITE_THROUGH;
445 s->hfile = CreateFile(filename, access_flags, 449 s->hfile = CreateFile(filename, access_flags,
446 FILE_SHARE_READ, NULL, 450 FILE_SHARE_READ, NULL,
447 create_flags, overlapped, NULL); 451 create_flags, overlapped, NULL);
@@ -395,12 +395,12 @@ int bdrv_open2(BlockDriverState *bs, const char *filename, int flags, @@ -395,12 +395,12 @@ int bdrv_open2(BlockDriverState *bs, const char *filename, int flags,
395 /* Note: for compatibility, we open disk image files as RDWR, and 395 /* Note: for compatibility, we open disk image files as RDWR, and
396 RDONLY as fallback */ 396 RDONLY as fallback */
397 if (!(flags & BDRV_O_FILE)) 397 if (!(flags & BDRV_O_FILE))
398 - open_flags = BDRV_O_RDWR | (flags & BDRV_O_DIRECT); 398 + open_flags = BDRV_O_RDWR | (flags & BDRV_O_CACHE_MASK);
399 else 399 else
400 open_flags = flags & ~(BDRV_O_FILE | BDRV_O_SNAPSHOT); 400 open_flags = flags & ~(BDRV_O_FILE | BDRV_O_SNAPSHOT);
401 ret = drv->bdrv_open(bs, filename, open_flags); 401 ret = drv->bdrv_open(bs, filename, open_flags);
402 if ((ret == -EACCES || ret == -EPERM) && !(flags & BDRV_O_FILE)) { 402 if ((ret == -EACCES || ret == -EPERM) && !(flags & BDRV_O_FILE)) {
403 - ret = drv->bdrv_open(bs, filename, BDRV_O_RDONLY); 403 + ret = drv->bdrv_open(bs, filename, open_flags & ~BDRV_O_RDWR);
404 bs->read_only = 1; 404 bs->read_only = 1;
405 } 405 }
406 if (ret < 0) { 406 if (ret < 0) {
@@ -427,7 +427,7 @@ int bdrv_open2(BlockDriverState *bs, const char *filename, int flags, @@ -427,7 +427,7 @@ int bdrv_open2(BlockDriverState *bs, const char *filename, int flags,
427 } 427 }
428 path_combine(backing_filename, sizeof(backing_filename), 428 path_combine(backing_filename, sizeof(backing_filename),
429 filename, bs->backing_file); 429 filename, bs->backing_file);
430 - if (bdrv_open(bs->backing_hd, backing_filename, 0) < 0) 430 + if (bdrv_open(bs->backing_hd, backing_filename, open_flags) < 0)
431 goto fail; 431 goto fail;
432 } 432 }
433 433
@@ -47,7 +47,10 @@ typedef struct QEMUSnapshotInfo { @@ -47,7 +47,10 @@ typedef struct QEMUSnapshotInfo {
47 use a disk image format on top of 47 use a disk image format on top of
48 it (default for 48 it (default for
49 bdrv_file_open()) */ 49 bdrv_file_open()) */
50 -#define BDRV_O_DIRECT 0x0020 50 +#define BDRV_O_NOCACHE 0x0020 /* do not use the host page cache */
  51 +#define BDRV_O_CACHE_WB 0x0040 /* use write-back caching */
  52 +
  53 +#define BDRV_O_CACHE_MASK (BDRV_O_NOCACHE | BDRV_O_CACHE_WB)
51 54
52 void bdrv_info(void); 55 void bdrv_info(void);
53 void bdrv_info_stats(void); 56 void bdrv_info_stats(void);
qemu-doc.texi
@@ -267,13 +267,28 @@ These options have the same definition as they have in @option{-hdachs}. @@ -267,13 +267,28 @@ These options have the same definition as they have in @option{-hdachs}.
267 @item snapshot=@var{snapshot} 267 @item snapshot=@var{snapshot}
268 @var{snapshot} is "on" or "off" and allows to enable snapshot for given drive (see @option{-snapshot}). 268 @var{snapshot} is "on" or "off" and allows to enable snapshot for given drive (see @option{-snapshot}).
269 @item cache=@var{cache} 269 @item cache=@var{cache}
270 -@var{cache} is "on" or "off" and allows to disable host cache to access data. 270 +@var{cache} is "none", "writeback", or "writethrough" and controls how the host cache is used to access block data.
271 @item format=@var{format} 271 @item format=@var{format}
272 Specify which disk @var{format} will be used rather than detecting 272 Specify which disk @var{format} will be used rather than detecting
273 the format. Can be used to specifiy format=raw to avoid interpreting 273 the format. Can be used to specifiy format=raw to avoid interpreting
274 an untrusted format header. 274 an untrusted format header.
275 @end table 275 @end table
276 276
  277 +By default, writethrough caching is used for all block device. This means that
  278 +the host page cache will be used to read and write data but write notification
  279 +will be sent to the guest only when the data has been reported as written by
  280 +the storage subsystem.
  281 +
  282 +Writeback caching will report data writes as completed as soon as the data is
  283 +present in the host page cache. This is safe as long as you trust your host.
  284 +If your host crashes or loses power, then the guest may experience data
  285 +corruption. When using the @option{-snapshot} option, writeback caching is
  286 +used by default.
  287 +
  288 +The host page can be avoided entirely with @option{cache=none}. This will
  289 +attempt to do disk IO directly to the guests memory. QEMU may still perform
  290 +an internal copy of the data.
  291 +
277 Instead of @option{-cdrom} you can use: 292 Instead of @option{-cdrom} you can use:
278 @example 293 @example
279 qemu -drive file=file,index=2,media=cdrom 294 qemu -drive file=file,index=2,media=cdrom
qemu-nbd.c
@@ -232,7 +232,7 @@ int main(int argc, char **argv) @@ -232,7 +232,7 @@ int main(int argc, char **argv)
232 flags |= BDRV_O_SNAPSHOT; 232 flags |= BDRV_O_SNAPSHOT;
233 break; 233 break;
234 case 'n': 234 case 'n':
235 - flags |= BDRV_O_DIRECT; 235 + flags |= BDRV_O_NOCACHE;
236 break; 236 break;
237 case 'b': 237 case 'b':
238 bindto = optarg; 238 bindto = optarg;
@@ -5648,10 +5648,12 @@ static int drive_init(struct drive_opt *arg, int snapshot, @@ -5648,10 +5648,12 @@ static int drive_init(struct drive_opt *arg, int snapshot,
5648 } 5648 }
5649 5649
5650 if (get_param_value(buf, sizeof(buf), "cache", str)) { 5650 if (get_param_value(buf, sizeof(buf), "cache", str)) {
5651 - if (!strcmp(buf, "off")) 5651 + if (!strcmp(buf, "off") || !strcmp(buf, "none"))
5652 cache = 0; 5652 cache = 0;
5653 - else if (!strcmp(buf, "on")) 5653 + else if (!strcmp(buf, "writethrough"))
5654 cache = 1; 5654 cache = 1;
  5655 + else if (!strcmp(buf, "writeback"))
  5656 + cache = 2;
5655 else { 5657 else {
5656 fprintf(stderr, "qemu: invalid cache option\n"); 5658 fprintf(stderr, "qemu: invalid cache option\n");
5657 return -1; 5659 return -1;
@@ -5770,10 +5772,14 @@ static int drive_init(struct drive_opt *arg, int snapshot, @@ -5770,10 +5772,14 @@ static int drive_init(struct drive_opt *arg, int snapshot,
5770 if (!file[0]) 5772 if (!file[0])
5771 return 0; 5773 return 0;
5772 bdrv_flags = 0; 5774 bdrv_flags = 0;
5773 - if (snapshot) 5775 + if (snapshot) {
5774 bdrv_flags |= BDRV_O_SNAPSHOT; 5776 bdrv_flags |= BDRV_O_SNAPSHOT;
5775 - if (!cache)  
5776 - bdrv_flags |= BDRV_O_DIRECT; 5777 + cache = 2; /* always use write-back with snapshot */
  5778 + }
  5779 + if (cache == 0) /* no caching */
  5780 + bdrv_flags |= BDRV_O_NOCACHE;
  5781 + else if (cache == 2) /* write-back */
  5782 + bdrv_flags |= BDRV_O_CACHE_WB;
5777 if (bdrv_open2(bdrv, file, bdrv_flags, drv) < 0 || qemu_key_check(bdrv, file)) { 5783 if (bdrv_open2(bdrv, file, bdrv_flags, drv) < 0 || qemu_key_check(bdrv, file)) {
5778 fprintf(stderr, "qemu: could not open disk image %s\n", 5784 fprintf(stderr, "qemu: could not open disk image %s\n",
5779 file); 5785 file);
@@ -8145,7 +8151,7 @@ static void help(int exitcode) @@ -8145,7 +8151,7 @@ static void help(int exitcode)
8145 "-cdrom file use 'file' as IDE cdrom image (cdrom is ide1 master)\n" 8151 "-cdrom file use 'file' as IDE cdrom image (cdrom is ide1 master)\n"
8146 "-drive [file=file][,if=type][,bus=n][,unit=m][,media=d][,index=i]\n" 8152 "-drive [file=file][,if=type][,bus=n][,unit=m][,media=d][,index=i]\n"
8147 " [,cyls=c,heads=h,secs=s[,trans=t]][,snapshot=on|off]\n" 8153 " [,cyls=c,heads=h,secs=s[,trans=t]][,snapshot=on|off]\n"
8148 - " [,cache=on|off][,format=f]\n" 8154 + " [,cache=writethrough|writeback|none][,format=f]\n"
8149 " use 'file' as a drive image\n" 8155 " use 'file' as a drive image\n"
8150 "-mtdblock file use 'file' as on-board Flash memory image\n" 8156 "-mtdblock file use 'file' as on-board Flash memory image\n"
8151 "-sd file use 'file' as SecureDigital card image\n" 8157 "-sd file use 'file' as SecureDigital card image\n"