Commit 6e02c38dadfe4cf02b0da6135adfd8d9352b90e1

Authored by aliguori
1 parent 967f97fa

Add virtio-blk support

Virtio-blk is a paravirtual block device based on VirtIO.  It can be used by
specifying the if=virtio parameter to the -drive parameter.

When using -enable-kvm, it can achieve very good performance compared to IDE or
SCSI.

Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>



git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@5870 c046a42c-6fe2-441c-8c8c-71466251a162
Makefile.target
@@ -665,7 +665,7 @@ OBJS+= fdc.o mc146818rtc.o serial.o i8259.o i8254.o pcspk.o pc.o @@ -665,7 +665,7 @@ OBJS+= fdc.o mc146818rtc.o serial.o i8259.o i8254.o pcspk.o pc.o
665 OBJS+= cirrus_vga.o apic.o parallel.o acpi.o piix_pci.o 665 OBJS+= cirrus_vga.o apic.o parallel.o acpi.o piix_pci.o
666 OBJS+= usb-uhci.o vmmouse.o vmport.o vmware_vga.o 666 OBJS+= usb-uhci.o vmmouse.o vmport.o vmware_vga.o
667 # virtio support 667 # virtio support
668 -OBJS+= virtio.o 668 +OBJS+= virtio.o virtio-blk.o
669 CPPFLAGS += -DHAS_AUDIO -DHAS_AUDIO_CHOICE 669 CPPFLAGS += -DHAS_AUDIO -DHAS_AUDIO_CHOICE
670 endif 670 endif
671 ifeq ($(TARGET_BASE_ARCH), ppc) 671 ifeq ($(TARGET_BASE_ARCH), ppc)
@@ -684,7 +684,7 @@ OBJS+= unin_pci.o ppc_chrp.o @@ -684,7 +684,7 @@ OBJS+= unin_pci.o ppc_chrp.o
684 # PowerPC 4xx boards 684 # PowerPC 4xx boards
685 OBJS+= pflash_cfi02.o ppc4xx_devs.o ppc4xx_pci.o ppc405_uc.o ppc405_boards.o 685 OBJS+= pflash_cfi02.o ppc4xx_devs.o ppc4xx_pci.o ppc405_uc.o ppc405_boards.o
686 # virtio support 686 # virtio support
687 -OBJS+= virtio.o 687 +OBJS+= virtio.o virtio-blk.o
688 endif 688 endif
689 ifeq ($(TARGET_BASE_ARCH), mips) 689 ifeq ($(TARGET_BASE_ARCH), mips)
690 OBJS+= mips_r4k.o mips_jazz.o mips_malta.o mips_mipssim.o 690 OBJS+= mips_r4k.o mips_jazz.o mips_malta.o mips_mipssim.o
@@ -33,6 +33,7 @@ @@ -33,6 +33,7 @@
33 #include "boards.h" 33 #include "boards.h"
34 #include "console.h" 34 #include "console.h"
35 #include "fw_cfg.h" 35 #include "fw_cfg.h"
  36 +#include "virtio-blk.h"
36 37
37 /* output Bochs bios info messages */ 38 /* output Bochs bios info messages */
38 //#define DEBUG_BIOS 39 //#define DEBUG_BIOS
@@ -1092,6 +1093,18 @@ static void pc_init1(ram_addr_t ram_size, int vga_ram_size, @@ -1092,6 +1093,18 @@ static void pc_init1(ram_addr_t ram_size, int vga_ram_size,
1092 } 1093 }
1093 } 1094 }
1094 } 1095 }
  1096 +
  1097 + /* Add virtio block devices */
  1098 + if (pci_enabled) {
  1099 + int index;
  1100 + int unit_id = 0;
  1101 +
  1102 + while ((index = drive_get_index(IF_VIRTIO, 0, unit_id)) != -1) {
  1103 + virtio_blk_init(pci_bus, 0x1AF4, 0x1001,
  1104 + drives_table[index].bdrv);
  1105 + unit_id++;
  1106 + }
  1107 + }
1095 } 1108 }
1096 1109
1097 static void pc_init_pci(ram_addr_t ram_size, int vga_ram_size, 1110 static void pc_init_pci(ram_addr_t ram_size, int vga_ram_size,
hw/virtio-blk.c 0 → 100644
  1 +/*
  2 + * Virtio Block Device
  3 + *
  4 + * Copyright IBM, Corp. 2007
  5 + *
  6 + * Authors:
  7 + * Anthony Liguori <aliguori@us.ibm.com>
  8 + *
  9 + * This work is licensed under the terms of the GNU GPL, version 2. See
  10 + * the COPYING file in the top-level directory.
  11 + *
  12 + */
  13 +
  14 +#include "virtio-blk.h"
  15 +#include "block_int.h"
  16 +
  17 +typedef struct VirtIOBlock
  18 +{
  19 + VirtIODevice vdev;
  20 + BlockDriverState *bs;
  21 + VirtQueue *vq;
  22 +} VirtIOBlock;
  23 +
  24 +static VirtIOBlock *to_virtio_blk(VirtIODevice *vdev)
  25 +{
  26 + return (VirtIOBlock *)vdev;
  27 +}
  28 +
  29 +typedef struct VirtIOBlockReq
  30 +{
  31 + VirtIOBlock *dev;
  32 + VirtQueueElement elem;
  33 + struct virtio_blk_inhdr *in;
  34 + struct virtio_blk_outhdr *out;
  35 + size_t size;
  36 + uint8_t *buffer;
  37 +} VirtIOBlockReq;
  38 +
  39 +static void virtio_blk_rw_complete(void *opaque, int ret)
  40 +{
  41 + VirtIOBlockReq *req = opaque;
  42 + VirtIOBlock *s = req->dev;
  43 +
  44 + /* Copy read data to the guest */
  45 + if (!ret && !(req->out->type & VIRTIO_BLK_T_OUT)) {
  46 + size_t offset = 0;
  47 + int i;
  48 +
  49 + for (i = 0; i < req->elem.in_num - 1; i++) {
  50 + size_t len;
  51 +
  52 + /* Be pretty defensive wrt malicious guests */
  53 + len = MIN(req->elem.in_sg[i].iov_len,
  54 + req->size - offset);
  55 +
  56 + memcpy(req->elem.in_sg[i].iov_base,
  57 + req->buffer + offset,
  58 + len);
  59 + offset += len;
  60 + }
  61 + }
  62 +
  63 + req->in->status = ret ? VIRTIO_BLK_S_IOERR : VIRTIO_BLK_S_OK;
  64 + virtqueue_push(s->vq, &req->elem, req->size + sizeof(*req->in));
  65 + virtio_notify(&s->vdev, s->vq);
  66 +
  67 + qemu_free(req->buffer);
  68 + qemu_free(req);
  69 +}
  70 +
  71 +static VirtIOBlockReq *virtio_blk_get_request(VirtIOBlock *s)
  72 +{
  73 + VirtIOBlockReq *req;
  74 +
  75 + req = qemu_mallocz(sizeof(*req));
  76 + if (req == NULL)
  77 + return NULL;
  78 +
  79 + req->dev = s;
  80 + if (!virtqueue_pop(s->vq, &req->elem)) {
  81 + qemu_free(req);
  82 + return NULL;
  83 + }
  84 +
  85 + return req;
  86 +}
  87 +
  88 +static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
  89 +{
  90 + VirtIOBlock *s = to_virtio_blk(vdev);
  91 + VirtIOBlockReq *req;
  92 +
  93 + while ((req = virtio_blk_get_request(s))) {
  94 + int i;
  95 +
  96 + if (req->elem.out_num < 1 || req->elem.in_num < 1) {
  97 + fprintf(stderr, "virtio-blk missing headers\n");
  98 + exit(1);
  99 + }
  100 +
  101 + if (req->elem.out_sg[0].iov_len < sizeof(*req->out) ||
  102 + req->elem.in_sg[req->elem.in_num - 1].iov_len < sizeof(*req->in)) {
  103 + fprintf(stderr, "virtio-blk header not in correct element\n");
  104 + exit(1);
  105 + }
  106 +
  107 + req->out = (void *)req->elem.out_sg[0].iov_base;
  108 + req->in = (void *)req->elem.in_sg[req->elem.in_num - 1].iov_base;
  109 +
  110 + if (req->out->type & VIRTIO_BLK_T_SCSI_CMD) {
  111 + unsigned int len = sizeof(*req->in);
  112 +
  113 + req->in->status = VIRTIO_BLK_S_UNSUPP;
  114 + virtqueue_push(vq, &req->elem, len);
  115 + virtio_notify(vdev, vq);
  116 + qemu_free(req);
  117 + } else if (req->out->type & VIRTIO_BLK_T_OUT) {
  118 + size_t offset;
  119 +
  120 + for (i = 1; i < req->elem.out_num; i++)
  121 + req->size += req->elem.out_sg[i].iov_len;
  122 +
  123 + req->buffer = qemu_memalign(512, req->size);
  124 + if (req->buffer == NULL) {
  125 + qemu_free(req);
  126 + break;
  127 + }
  128 +
  129 + /* We copy the data from the SG list to avoid splitting up the request. This helps
  130 + performance a lot until we can pass full sg lists as AIO operations */
  131 + offset = 0;
  132 + for (i = 1; i < req->elem.out_num; i++) {
  133 + size_t len;
  134 +
  135 + len = MIN(req->elem.out_sg[i].iov_len,
  136 + req->size - offset);
  137 + memcpy(req->buffer + offset,
  138 + req->elem.out_sg[i].iov_base,
  139 + len);
  140 + offset += len;
  141 + }
  142 +
  143 + bdrv_aio_write(s->bs, req->out->sector,
  144 + req->buffer,
  145 + req->size / 512,
  146 + virtio_blk_rw_complete,
  147 + req);
  148 + } else {
  149 + for (i = 0; i < req->elem.in_num - 1; i++)
  150 + req->size += req->elem.in_sg[i].iov_len;
  151 +
  152 + req->buffer = qemu_memalign(512, req->size);
  153 + if (req->buffer == NULL) {
  154 + qemu_free(req);
  155 + break;
  156 + }
  157 +
  158 + bdrv_aio_read(s->bs, req->out->sector,
  159 + req->buffer,
  160 + req->size / 512,
  161 + virtio_blk_rw_complete,
  162 + req);
  163 + }
  164 + }
  165 + /*
  166 + * FIXME: Want to check for completions before returning to guest mode,
  167 + * so cached reads and writes are reported as quickly as possible. But
  168 + * that should be done in the generic block layer.
  169 + */
  170 +}
  171 +
  172 +static void virtio_blk_reset(VirtIODevice *vdev)
  173 +{
  174 + /*
  175 + * This should cancel pending requests, but can't do nicely until there
  176 + * are per-device request lists.
  177 + */
  178 + qemu_aio_flush();
  179 +}
  180 +
  181 +static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config)
  182 +{
  183 + VirtIOBlock *s = to_virtio_blk(vdev);
  184 + struct virtio_blk_config blkcfg;
  185 + uint64_t capacity;
  186 + int cylinders, heads, secs;
  187 +
  188 + bdrv_get_geometry(s->bs, &capacity);
  189 + bdrv_get_geometry_hint(s->bs, &cylinders, &heads, &secs);
  190 + stq_raw(&blkcfg.capacity, capacity);
  191 + stl_raw(&blkcfg.seg_max, 128 - 2);
  192 + stw_raw(&blkcfg.cylinders, cylinders);
  193 + blkcfg.heads = heads;
  194 + blkcfg.sectors = secs;
  195 + memcpy(config, &blkcfg, sizeof(blkcfg));
  196 +}
  197 +
  198 +static uint32_t virtio_blk_get_features(VirtIODevice *vdev)
  199 +{
  200 + return (1 << VIRTIO_BLK_F_SEG_MAX | 1 << VIRTIO_BLK_F_GEOMETRY);
  201 +}
  202 +
  203 +static void virtio_blk_save(QEMUFile *f, void *opaque)
  204 +{
  205 + VirtIOBlock *s = opaque;
  206 + virtio_save(&s->vdev, f);
  207 +}
  208 +
  209 +static int virtio_blk_load(QEMUFile *f, void *opaque, int version_id)
  210 +{
  211 + VirtIOBlock *s = opaque;
  212 +
  213 + if (version_id != 1)
  214 + return -EINVAL;
  215 +
  216 + virtio_load(&s->vdev, f);
  217 +
  218 + return 0;
  219 +}
  220 +
  221 +void *virtio_blk_init(PCIBus *bus, uint16_t vendor, uint16_t device,
  222 + BlockDriverState *bs)
  223 +{
  224 + VirtIOBlock *s;
  225 + int cylinders, heads, secs;
  226 + static int virtio_blk_id;
  227 +
  228 + s = (VirtIOBlock *)virtio_init_pci(bus, "virtio-blk", vendor, device,
  229 + 0, VIRTIO_ID_BLOCK,
  230 + 0x01, 0x80, 0x00,
  231 + sizeof(struct virtio_blk_config), sizeof(VirtIOBlock));
  232 + if (!s)
  233 + return NULL;
  234 +
  235 + s->vdev.get_config = virtio_blk_update_config;
  236 + s->vdev.get_features = virtio_blk_get_features;
  237 + s->vdev.reset = virtio_blk_reset;
  238 + s->bs = bs;
  239 + bdrv_guess_geometry(s->bs, &cylinders, &heads, &secs);
  240 + bdrv_set_geometry_hint(s->bs, cylinders, heads, secs);
  241 +
  242 + s->vq = virtio_add_queue(&s->vdev, 128, virtio_blk_handle_output);
  243 +
  244 + register_savevm("virtio-blk", virtio_blk_id++, 1,
  245 + virtio_blk_save, virtio_blk_load, s);
  246 +
  247 + return s;
  248 +}
hw/virtio-blk.h 0 → 100644
  1 +/*
  2 + * Virtio Block Device
  3 + *
  4 + * Copyright IBM, Corp. 2007
  5 + *
  6 + * Authors:
  7 + * Anthony Liguori <aliguori@us.ibm.com>
  8 + *
  9 + * This work is licensed under the terms of the GNU GPL, version 2. See
  10 + * the COPYING file in the top-level directory.
  11 + *
  12 + */
  13 +
  14 +#ifndef _QEMU_VIRTIO_BLK_H
  15 +#define _QEMU_VIRTIO_BLK_H
  16 +
  17 +#include "virtio.h"
  18 +#include "block.h"
  19 +#include "pci.h"
  20 +
  21 +/* from Linux's linux/virtio_blk.h */
  22 +
  23 +/* The ID for virtio_block */
  24 +#define VIRTIO_ID_BLOCK 2
  25 +
  26 +/* Feature bits */
  27 +#define VIRTIO_BLK_F_BARRIER 0 /* Does host support barriers? */
  28 +#define VIRTIO_BLK_F_SIZE_MAX 1 /* Indicates maximum segment size */
  29 +#define VIRTIO_BLK_F_SEG_MAX 2 /* Indicates maximum # of segments */
  30 +#define VIRTIO_BLK_F_GEOMETRY 4 /* Indicates support of legacy geometry */
  31 +
  32 +struct virtio_blk_config
  33 +{
  34 + uint64_t capacity;
  35 + uint32_t size_max;
  36 + uint32_t seg_max;
  37 + uint16_t cylinders;
  38 + uint8_t heads;
  39 + uint8_t sectors;
  40 +} __attribute__((packed));
  41 +
  42 +/* These two define direction. */
  43 +#define VIRTIO_BLK_T_IN 0
  44 +#define VIRTIO_BLK_T_OUT 1
  45 +
  46 +/* This bit says it's a scsi command, not an actual read or write. */
  47 +#define VIRTIO_BLK_T_SCSI_CMD 2
  48 +
  49 +/* Barrier before this op. */
  50 +#define VIRTIO_BLK_T_BARRIER 0x80000000
  51 +
  52 +/* This is the first element of the read scatter-gather list. */
  53 +struct virtio_blk_outhdr
  54 +{
  55 + /* VIRTIO_BLK_T* */
  56 + uint32_t type;
  57 + /* io priority. */
  58 + uint32_t ioprio;
  59 + /* Sector (ie. 512 byte offset) */
  60 + uint64_t sector;
  61 +};
  62 +
  63 +#define VIRTIO_BLK_S_OK 0
  64 +#define VIRTIO_BLK_S_IOERR 1
  65 +#define VIRTIO_BLK_S_UNSUPP 2
  66 +
  67 +/* This is the first element of the write scatter-gather list */
  68 +struct virtio_blk_inhdr
  69 +{
  70 + unsigned char status;
  71 +};
  72 +
  73 +void *virtio_blk_init(PCIBus *bus, uint16_t vendor, uint16_t device,
  74 + BlockDriverState *bs);
  75 +
  76 +#endif
qemu-doc.texi
@@ -253,7 +253,7 @@ this drive. If the filename contains comma, you must double it @@ -253,7 +253,7 @@ this drive. If the filename contains comma, you must double it
253 (for instance, "file=my,,file" to use file "my,file"). 253 (for instance, "file=my,,file" to use file "my,file").
254 @item if=@var{interface} 254 @item if=@var{interface}
255 This option defines on which type on interface the drive is connected. 255 This option defines on which type on interface the drive is connected.
256 -Available types are: ide, scsi, sd, mtd, floppy, pflash. 256 +Available types are: ide, scsi, sd, mtd, floppy, pflash, virtio.
257 @item bus=@var{bus},unit=@var{unit} 257 @item bus=@var{bus},unit=@var{unit}
258 These options define where is connected the drive by defining the bus number and 258 These options define where is connected the drive by defining the bus number and
259 the unit id. 259 the unit id.
sysemu.h
@@ -123,7 +123,7 @@ extern unsigned int nb_prom_envs; @@ -123,7 +123,7 @@ extern unsigned int nb_prom_envs;
123 #endif 123 #endif
124 124
125 typedef enum { 125 typedef enum {
126 - IF_IDE, IF_SCSI, IF_FLOPPY, IF_PFLASH, IF_MTD, IF_SD 126 + IF_IDE, IF_SCSI, IF_FLOPPY, IF_PFLASH, IF_MTD, IF_SD, IF_VIRTIO
127 } BlockInterfaceType; 127 } BlockInterfaceType;
128 128
129 typedef struct DriveInfo { 129 typedef struct DriveInfo {
@@ -2267,7 +2267,10 @@ static int drive_init(struct drive_opt *arg, int snapshot, @@ -2267,7 +2267,10 @@ static int drive_init(struct drive_opt *arg, int snapshot,
2267 } else if (!strcmp(buf, "sd")) { 2267 } else if (!strcmp(buf, "sd")) {
2268 type = IF_SD; 2268 type = IF_SD;
2269 max_devs = 0; 2269 max_devs = 0;
2270 - } else { 2270 + } else if (!strcmp(buf, "virtio")) {
  2271 + type = IF_VIRTIO;
  2272 + max_devs = 0;
  2273 + } else {
2271 fprintf(stderr, "qemu: '%s' unsupported bus type '%s'\n", str, buf); 2274 fprintf(stderr, "qemu: '%s' unsupported bus type '%s'\n", str, buf);
2272 return -1; 2275 return -1;
2273 } 2276 }
@@ -2474,6 +2477,7 @@ static int drive_init(struct drive_opt *arg, int snapshot, @@ -2474,6 +2477,7 @@ static int drive_init(struct drive_opt *arg, int snapshot,
2474 break; 2477 break;
2475 case IF_PFLASH: 2478 case IF_PFLASH:
2476 case IF_MTD: 2479 case IF_MTD:
  2480 + case IF_VIRTIO:
2477 break; 2481 break;
2478 } 2482 }
2479 if (!file[0]) 2483 if (!file[0])