Commit 6e02c38dadfe4cf02b0da6135adfd8d9352b90e1

Authored by aliguori
1 parent 967f97fa

Add virtio-blk support

Virtio-blk is a paravirtual block device based on VirtIO.  It can be used by
specifying the if=virtio parameter to the -drive parameter.

When using -enable-kvm, it can achieve very good performance compared to IDE or
SCSI.

Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>



git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@5870 c046a42c-6fe2-441c-8c8c-71466251a162
Makefile.target
... ... @@ -665,7 +665,7 @@ OBJS+= fdc.o mc146818rtc.o serial.o i8259.o i8254.o pcspk.o pc.o
665 665 OBJS+= cirrus_vga.o apic.o parallel.o acpi.o piix_pci.o
666 666 OBJS+= usb-uhci.o vmmouse.o vmport.o vmware_vga.o
667 667 # virtio support
668   -OBJS+= virtio.o
  668 +OBJS+= virtio.o virtio-blk.o
669 669 CPPFLAGS += -DHAS_AUDIO -DHAS_AUDIO_CHOICE
670 670 endif
671 671 ifeq ($(TARGET_BASE_ARCH), ppc)
... ... @@ -684,7 +684,7 @@ OBJS+= unin_pci.o ppc_chrp.o
684 684 # PowerPC 4xx boards
685 685 OBJS+= pflash_cfi02.o ppc4xx_devs.o ppc4xx_pci.o ppc405_uc.o ppc405_boards.o
686 686 # virtio support
687   -OBJS+= virtio.o
  687 +OBJS+= virtio.o virtio-blk.o
688 688 endif
689 689 ifeq ($(TARGET_BASE_ARCH), mips)
690 690 OBJS+= mips_r4k.o mips_jazz.o mips_malta.o mips_mipssim.o
... ...
... ... @@ -33,6 +33,7 @@
33 33 #include "boards.h"
34 34 #include "console.h"
35 35 #include "fw_cfg.h"
  36 +#include "virtio-blk.h"
36 37  
37 38 /* output Bochs bios info messages */
38 39 //#define DEBUG_BIOS
... ... @@ -1092,6 +1093,18 @@ static void pc_init1(ram_addr_t ram_size, int vga_ram_size,
1092 1093 }
1093 1094 }
1094 1095 }
  1096 +
  1097 + /* Add virtio block devices */
  1098 + if (pci_enabled) {
  1099 + int index;
  1100 + int unit_id = 0;
  1101 +
  1102 + while ((index = drive_get_index(IF_VIRTIO, 0, unit_id)) != -1) {
  1103 + virtio_blk_init(pci_bus, 0x1AF4, 0x1001,
  1104 + drives_table[index].bdrv);
  1105 + unit_id++;
  1106 + }
  1107 + }
1095 1108 }
1096 1109  
1097 1110 static void pc_init_pci(ram_addr_t ram_size, int vga_ram_size,
... ...
hw/virtio-blk.c 0 → 100644
  1 +/*
  2 + * Virtio Block Device
  3 + *
  4 + * Copyright IBM, Corp. 2007
  5 + *
  6 + * Authors:
  7 + * Anthony Liguori <aliguori@us.ibm.com>
  8 + *
  9 + * This work is licensed under the terms of the GNU GPL, version 2. See
  10 + * the COPYING file in the top-level directory.
  11 + *
  12 + */
  13 +
  14 +#include "virtio-blk.h"
  15 +#include "block_int.h"
  16 +
  17 +typedef struct VirtIOBlock
  18 +{
  19 + VirtIODevice vdev;
  20 + BlockDriverState *bs;
  21 + VirtQueue *vq;
  22 +} VirtIOBlock;
  23 +
  24 +static VirtIOBlock *to_virtio_blk(VirtIODevice *vdev)
  25 +{
  26 + return (VirtIOBlock *)vdev;
  27 +}
  28 +
  29 +typedef struct VirtIOBlockReq
  30 +{
  31 + VirtIOBlock *dev;
  32 + VirtQueueElement elem;
  33 + struct virtio_blk_inhdr *in;
  34 + struct virtio_blk_outhdr *out;
  35 + size_t size;
  36 + uint8_t *buffer;
  37 +} VirtIOBlockReq;
  38 +
  39 +static void virtio_blk_rw_complete(void *opaque, int ret)
  40 +{
  41 + VirtIOBlockReq *req = opaque;
  42 + VirtIOBlock *s = req->dev;
  43 +
  44 + /* Copy read data to the guest */
  45 + if (!ret && !(req->out->type & VIRTIO_BLK_T_OUT)) {
  46 + size_t offset = 0;
  47 + int i;
  48 +
  49 + for (i = 0; i < req->elem.in_num - 1; i++) {
  50 + size_t len;
  51 +
  52 + /* Be pretty defensive wrt malicious guests */
  53 + len = MIN(req->elem.in_sg[i].iov_len,
  54 + req->size - offset);
  55 +
  56 + memcpy(req->elem.in_sg[i].iov_base,
  57 + req->buffer + offset,
  58 + len);
  59 + offset += len;
  60 + }
  61 + }
  62 +
  63 + req->in->status = ret ? VIRTIO_BLK_S_IOERR : VIRTIO_BLK_S_OK;
  64 + virtqueue_push(s->vq, &req->elem, req->size + sizeof(*req->in));
  65 + virtio_notify(&s->vdev, s->vq);
  66 +
  67 + qemu_free(req->buffer);
  68 + qemu_free(req);
  69 +}
  70 +
  71 +static VirtIOBlockReq *virtio_blk_get_request(VirtIOBlock *s)
  72 +{
  73 + VirtIOBlockReq *req;
  74 +
  75 + req = qemu_mallocz(sizeof(*req));
  76 + if (req == NULL)
  77 + return NULL;
  78 +
  79 + req->dev = s;
  80 + if (!virtqueue_pop(s->vq, &req->elem)) {
  81 + qemu_free(req);
  82 + return NULL;
  83 + }
  84 +
  85 + return req;
  86 +}
  87 +
  88 +static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
  89 +{
  90 + VirtIOBlock *s = to_virtio_blk(vdev);
  91 + VirtIOBlockReq *req;
  92 +
  93 + while ((req = virtio_blk_get_request(s))) {
  94 + int i;
  95 +
  96 + if (req->elem.out_num < 1 || req->elem.in_num < 1) {
  97 + fprintf(stderr, "virtio-blk missing headers\n");
  98 + exit(1);
  99 + }
  100 +
  101 + if (req->elem.out_sg[0].iov_len < sizeof(*req->out) ||
  102 + req->elem.in_sg[req->elem.in_num - 1].iov_len < sizeof(*req->in)) {
  103 + fprintf(stderr, "virtio-blk header not in correct element\n");
  104 + exit(1);
  105 + }
  106 +
  107 + req->out = (void *)req->elem.out_sg[0].iov_base;
  108 + req->in = (void *)req->elem.in_sg[req->elem.in_num - 1].iov_base;
  109 +
  110 + if (req->out->type & VIRTIO_BLK_T_SCSI_CMD) {
  111 + unsigned int len = sizeof(*req->in);
  112 +
  113 + req->in->status = VIRTIO_BLK_S_UNSUPP;
  114 + virtqueue_push(vq, &req->elem, len);
  115 + virtio_notify(vdev, vq);
  116 + qemu_free(req);
  117 + } else if (req->out->type & VIRTIO_BLK_T_OUT) {
  118 + size_t offset;
  119 +
  120 + for (i = 1; i < req->elem.out_num; i++)
  121 + req->size += req->elem.out_sg[i].iov_len;
  122 +
  123 + req->buffer = qemu_memalign(512, req->size);
  124 + if (req->buffer == NULL) {
  125 + qemu_free(req);
  126 + break;
  127 + }
  128 +
  129 + /* We copy the data from the SG list to avoid splitting up the request. This helps
  130 + performance a lot until we can pass full sg lists as AIO operations */
  131 + offset = 0;
  132 + for (i = 1; i < req->elem.out_num; i++) {
  133 + size_t len;
  134 +
  135 + len = MIN(req->elem.out_sg[i].iov_len,
  136 + req->size - offset);
  137 + memcpy(req->buffer + offset,
  138 + req->elem.out_sg[i].iov_base,
  139 + len);
  140 + offset += len;
  141 + }
  142 +
  143 + bdrv_aio_write(s->bs, req->out->sector,
  144 + req->buffer,
  145 + req->size / 512,
  146 + virtio_blk_rw_complete,
  147 + req);
  148 + } else {
  149 + for (i = 0; i < req->elem.in_num - 1; i++)
  150 + req->size += req->elem.in_sg[i].iov_len;
  151 +
  152 + req->buffer = qemu_memalign(512, req->size);
  153 + if (req->buffer == NULL) {
  154 + qemu_free(req);
  155 + break;
  156 + }
  157 +
  158 + bdrv_aio_read(s->bs, req->out->sector,
  159 + req->buffer,
  160 + req->size / 512,
  161 + virtio_blk_rw_complete,
  162 + req);
  163 + }
  164 + }
  165 + /*
  166 + * FIXME: Want to check for completions before returning to guest mode,
  167 + * so cached reads and writes are reported as quickly as possible. But
  168 + * that should be done in the generic block layer.
  169 + */
  170 +}
  171 +
  172 +static void virtio_blk_reset(VirtIODevice *vdev)
  173 +{
  174 + /*
  175 + * This should cancel pending requests, but can't do nicely until there
  176 + * are per-device request lists.
  177 + */
  178 + qemu_aio_flush();
  179 +}
  180 +
  181 +static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config)
  182 +{
  183 + VirtIOBlock *s = to_virtio_blk(vdev);
  184 + struct virtio_blk_config blkcfg;
  185 + uint64_t capacity;
  186 + int cylinders, heads, secs;
  187 +
  188 + bdrv_get_geometry(s->bs, &capacity);
  189 + bdrv_get_geometry_hint(s->bs, &cylinders, &heads, &secs);
  190 + stq_raw(&blkcfg.capacity, capacity);
  191 + stl_raw(&blkcfg.seg_max, 128 - 2);
  192 + stw_raw(&blkcfg.cylinders, cylinders);
  193 + blkcfg.heads = heads;
  194 + blkcfg.sectors = secs;
  195 + memcpy(config, &blkcfg, sizeof(blkcfg));
  196 +}
  197 +
  198 +static uint32_t virtio_blk_get_features(VirtIODevice *vdev)
  199 +{
  200 + return (1 << VIRTIO_BLK_F_SEG_MAX | 1 << VIRTIO_BLK_F_GEOMETRY);
  201 +}
  202 +
  203 +static void virtio_blk_save(QEMUFile *f, void *opaque)
  204 +{
  205 + VirtIOBlock *s = opaque;
  206 + virtio_save(&s->vdev, f);
  207 +}
  208 +
  209 +static int virtio_blk_load(QEMUFile *f, void *opaque, int version_id)
  210 +{
  211 + VirtIOBlock *s = opaque;
  212 +
  213 + if (version_id != 1)
  214 + return -EINVAL;
  215 +
  216 + virtio_load(&s->vdev, f);
  217 +
  218 + return 0;
  219 +}
  220 +
  221 +void *virtio_blk_init(PCIBus *bus, uint16_t vendor, uint16_t device,
  222 + BlockDriverState *bs)
  223 +{
  224 + VirtIOBlock *s;
  225 + int cylinders, heads, secs;
  226 + static int virtio_blk_id;
  227 +
  228 + s = (VirtIOBlock *)virtio_init_pci(bus, "virtio-blk", vendor, device,
  229 + 0, VIRTIO_ID_BLOCK,
  230 + 0x01, 0x80, 0x00,
  231 + sizeof(struct virtio_blk_config), sizeof(VirtIOBlock));
  232 + if (!s)
  233 + return NULL;
  234 +
  235 + s->vdev.get_config = virtio_blk_update_config;
  236 + s->vdev.get_features = virtio_blk_get_features;
  237 + s->vdev.reset = virtio_blk_reset;
  238 + s->bs = bs;
  239 + bdrv_guess_geometry(s->bs, &cylinders, &heads, &secs);
  240 + bdrv_set_geometry_hint(s->bs, cylinders, heads, secs);
  241 +
  242 + s->vq = virtio_add_queue(&s->vdev, 128, virtio_blk_handle_output);
  243 +
  244 + register_savevm("virtio-blk", virtio_blk_id++, 1,
  245 + virtio_blk_save, virtio_blk_load, s);
  246 +
  247 + return s;
  248 +}
... ...
hw/virtio-blk.h 0 → 100644
  1 +/*
  2 + * Virtio Block Device
  3 + *
  4 + * Copyright IBM, Corp. 2007
  5 + *
  6 + * Authors:
  7 + * Anthony Liguori <aliguori@us.ibm.com>
  8 + *
  9 + * This work is licensed under the terms of the GNU GPL, version 2. See
  10 + * the COPYING file in the top-level directory.
  11 + *
  12 + */
  13 +
  14 +#ifndef _QEMU_VIRTIO_BLK_H
  15 +#define _QEMU_VIRTIO_BLK_H
  16 +
  17 +#include "virtio.h"
  18 +#include "block.h"
  19 +#include "pci.h"
  20 +
  21 +/* from Linux's linux/virtio_blk.h */
  22 +
  23 +/* The ID for virtio_block */
  24 +#define VIRTIO_ID_BLOCK 2
  25 +
  26 +/* Feature bits */
  27 +#define VIRTIO_BLK_F_BARRIER 0 /* Does host support barriers? */
  28 +#define VIRTIO_BLK_F_SIZE_MAX 1 /* Indicates maximum segment size */
  29 +#define VIRTIO_BLK_F_SEG_MAX 2 /* Indicates maximum # of segments */
  30 +#define VIRTIO_BLK_F_GEOMETRY 4 /* Indicates support of legacy geometry */
  31 +
  32 +struct virtio_blk_config
  33 +{
  34 + uint64_t capacity;
  35 + uint32_t size_max;
  36 + uint32_t seg_max;
  37 + uint16_t cylinders;
  38 + uint8_t heads;
  39 + uint8_t sectors;
  40 +} __attribute__((packed));
  41 +
  42 +/* These two define direction. */
  43 +#define VIRTIO_BLK_T_IN 0
  44 +#define VIRTIO_BLK_T_OUT 1
  45 +
  46 +/* This bit says it's a scsi command, not an actual read or write. */
  47 +#define VIRTIO_BLK_T_SCSI_CMD 2
  48 +
  49 +/* Barrier before this op. */
  50 +#define VIRTIO_BLK_T_BARRIER 0x80000000
  51 +
  52 +/* This is the first element of the read scatter-gather list. */
  53 +struct virtio_blk_outhdr
  54 +{
  55 + /* VIRTIO_BLK_T* */
  56 + uint32_t type;
  57 + /* io priority. */
  58 + uint32_t ioprio;
  59 + /* Sector (ie. 512 byte offset) */
  60 + uint64_t sector;
  61 +};
  62 +
  63 +#define VIRTIO_BLK_S_OK 0
  64 +#define VIRTIO_BLK_S_IOERR 1
  65 +#define VIRTIO_BLK_S_UNSUPP 2
  66 +
  67 +/* This is the first element of the write scatter-gather list */
  68 +struct virtio_blk_inhdr
  69 +{
  70 + unsigned char status;
  71 +};
  72 +
  73 +void *virtio_blk_init(PCIBus *bus, uint16_t vendor, uint16_t device,
  74 + BlockDriverState *bs);
  75 +
  76 +#endif
... ...
qemu-doc.texi
... ... @@ -253,7 +253,7 @@ this drive. If the filename contains comma, you must double it
253 253 (for instance, "file=my,,file" to use file "my,file").
254 254 @item if=@var{interface}
255 255 This option defines on which type on interface the drive is connected.
256   -Available types are: ide, scsi, sd, mtd, floppy, pflash.
  256 +Available types are: ide, scsi, sd, mtd, floppy, pflash, virtio.
257 257 @item bus=@var{bus},unit=@var{unit}
258 258 These options define where is connected the drive by defining the bus number and
259 259 the unit id.
... ...
sysemu.h
... ... @@ -123,7 +123,7 @@ extern unsigned int nb_prom_envs;
123 123 #endif
124 124  
125 125 typedef enum {
126   - IF_IDE, IF_SCSI, IF_FLOPPY, IF_PFLASH, IF_MTD, IF_SD
  126 + IF_IDE, IF_SCSI, IF_FLOPPY, IF_PFLASH, IF_MTD, IF_SD, IF_VIRTIO
127 127 } BlockInterfaceType;
128 128  
129 129 typedef struct DriveInfo {
... ...
... ... @@ -2267,7 +2267,10 @@ static int drive_init(struct drive_opt *arg, int snapshot,
2267 2267 } else if (!strcmp(buf, "sd")) {
2268 2268 type = IF_SD;
2269 2269 max_devs = 0;
2270   - } else {
  2270 + } else if (!strcmp(buf, "virtio")) {
  2271 + type = IF_VIRTIO;
  2272 + max_devs = 0;
  2273 + } else {
2271 2274 fprintf(stderr, "qemu: '%s' unsupported bus type '%s'\n", str, buf);
2272 2275 return -1;
2273 2276 }
... ... @@ -2474,6 +2477,7 @@ static int drive_init(struct drive_opt *arg, int snapshot,
2474 2477 break;
2475 2478 case IF_PFLASH:
2476 2479 case IF_MTD:
  2480 + case IF_VIRTIO:
2477 2481 break;
2478 2482 }
2479 2483 if (!file[0])
... ...