Commit 967f97fa00b2d301584d08c614d446423c65c15a

Authored by aliguori
1 parent b39ade83

Virtio core support

This patch adds core support for VirtIO.  VirtIO is a paravirtualization
framework that has been in Linux since 2.6.21.  A PCI transport has been
available since 2.6.25.  Network drivers are also available for Windows.

Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>



git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@5869 c046a42c-6fe2-441c-8c8c-71466251a162
Makefile.target
... ... @@ -664,6 +664,8 @@ OBJS+= ide.o pckbd.o ps2.o vga.o $(SOUND_HW) dma.o
664 664 OBJS+= fdc.o mc146818rtc.o serial.o i8259.o i8254.o pcspk.o pc.o
665 665 OBJS+= cirrus_vga.o apic.o parallel.o acpi.o piix_pci.o
666 666 OBJS+= usb-uhci.o vmmouse.o vmport.o vmware_vga.o
  667 +# virtio support
  668 +OBJS+= virtio.o
667 669 CPPFLAGS += -DHAS_AUDIO -DHAS_AUDIO_CHOICE
668 670 endif
669 671 ifeq ($(TARGET_BASE_ARCH), ppc)
... ... @@ -681,6 +683,8 @@ OBJS+= heathrow_pic.o grackle_pci.o ppc_oldworld.o
681 683 OBJS+= unin_pci.o ppc_chrp.o
682 684 # PowerPC 4xx boards
683 685 OBJS+= pflash_cfi02.o ppc4xx_devs.o ppc4xx_pci.o ppc405_uc.o ppc405_boards.o
  686 +# virtio support
  687 +OBJS+= virtio.o
684 688 endif
685 689 ifeq ($(TARGET_BASE_ARCH), mips)
686 690 OBJS+= mips_r4k.o mips_jazz.o mips_malta.o mips_mipssim.o
... ...
hw/virtio.c 0 → 100644
  1 +/*
  2 + * Virtio Support
  3 + *
  4 + * Copyright IBM, Corp. 2007
  5 + *
  6 + * Authors:
  7 + * Anthony Liguori <aliguori@us.ibm.com>
  8 + *
  9 + * This work is licensed under the terms of the GNU GPL, version 2. See
  10 + * the COPYING file in the top-level directory.
  11 + *
  12 + */
  13 +
  14 +#include <inttypes.h>
  15 +#include <err.h>
  16 +
  17 +#include "virtio.h"
  18 +#include "sysemu.h"
  19 +
  20 +//#define VIRTIO_ZERO_COPY
  21 +
  22 +/* from Linux's linux/virtio_pci.h */
  23 +
  24 +/* A 32-bit r/o bitmask of the features supported by the host */
  25 +#define VIRTIO_PCI_HOST_FEATURES 0
  26 +
  27 +/* A 32-bit r/w bitmask of features activated by the guest */
  28 +#define VIRTIO_PCI_GUEST_FEATURES 4
  29 +
  30 +/* A 32-bit r/w PFN for the currently selected queue */
  31 +#define VIRTIO_PCI_QUEUE_PFN 8
  32 +
  33 +/* A 16-bit r/o queue size for the currently selected queue */
  34 +#define VIRTIO_PCI_QUEUE_NUM 12
  35 +
  36 +/* A 16-bit r/w queue selector */
  37 +#define VIRTIO_PCI_QUEUE_SEL 14
  38 +
  39 +/* A 16-bit r/w queue notifier */
  40 +#define VIRTIO_PCI_QUEUE_NOTIFY 16
  41 +
  42 +/* An 8-bit device status register. */
  43 +#define VIRTIO_PCI_STATUS 18
  44 +
  45 +/* An 8-bit r/o interrupt status register. Reading the value will return the
  46 + * current contents of the ISR and will also clear it. This is effectively
  47 + * a read-and-acknowledge. */
  48 +#define VIRTIO_PCI_ISR 19
  49 +
  50 +#define VIRTIO_PCI_CONFIG 20
  51 +
  52 +/* Virtio ABI version, if we increment this, we break the guest driver. */
  53 +#define VIRTIO_PCI_ABI_VERSION 0
  54 +
  55 +/* QEMU doesn't strictly need write barriers since everything runs in
  56 + * lock-step. We'll leave the calls to wmb() in though to make it obvious for
  57 + * KVM or if kqemu gets SMP support.
  58 + */
  59 +#define wmb() do { } while (0)
  60 +
  61 +typedef struct VRingDesc
  62 +{
  63 + uint64_t addr;
  64 + uint32_t len;
  65 + uint16_t flags;
  66 + uint16_t next;
  67 +} VRingDesc;
  68 +
  69 +typedef struct VRingAvail
  70 +{
  71 + uint16_t flags;
  72 + uint16_t idx;
  73 + uint16_t ring[0];
  74 +} VRingAvail;
  75 +
  76 +typedef struct VRingUsedElem
  77 +{
  78 + uint32_t id;
  79 + uint32_t len;
  80 +} VRingUsedElem;
  81 +
  82 +typedef struct VRingUsed
  83 +{
  84 + uint16_t flags;
  85 + uint16_t idx;
  86 + VRingUsedElem ring[0];
  87 +} VRingUsed;
  88 +
  89 +typedef struct VRing
  90 +{
  91 + unsigned int num;
  92 + target_phys_addr_t desc;
  93 + target_phys_addr_t avail;
  94 + target_phys_addr_t used;
  95 +} VRing;
  96 +
  97 +struct VirtQueue
  98 +{
  99 + VRing vring;
  100 + uint32_t pfn;
  101 + uint16_t last_avail_idx;
  102 + int inuse;
  103 + void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq);
  104 +};
  105 +
  106 +#define VIRTIO_PCI_QUEUE_MAX 16
  107 +
  108 +/* virt queue functions */
  109 +#ifdef VIRTIO_ZERO_COPY
  110 +static void *virtio_map_gpa(target_phys_addr_t addr, size_t size)
  111 +{
  112 + ram_addr_t off;
  113 + target_phys_addr_t addr1;
  114 +
  115 + off = cpu_get_physical_page_desc(addr);
  116 + if ((off & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
  117 + fprintf(stderr, "virtio DMA to IO ram\n");
  118 + exit(1);
  119 + }
  120 +
  121 + off = (off & TARGET_PAGE_MASK) | (addr & ~TARGET_PAGE_MASK);
  122 +
  123 + for (addr1 = addr + TARGET_PAGE_SIZE;
  124 + addr1 < TARGET_PAGE_ALIGN(addr + size);
  125 + addr1 += TARGET_PAGE_SIZE) {
  126 + ram_addr_t off1;
  127 +
  128 + off1 = cpu_get_physical_page_desc(addr1);
  129 + if ((off1 & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
  130 + fprintf(stderr, "virtio DMA to IO ram\n");
  131 + exit(1);
  132 + }
  133 +
  134 + off1 = (off1 & TARGET_PAGE_MASK) | (addr1 & ~TARGET_PAGE_MASK);
  135 +
  136 + if (off1 != (off + (addr1 - addr))) {
  137 + fprintf(stderr, "discontigous virtio memory\n");
  138 + exit(1);
  139 + }
  140 + }
  141 +
  142 + return phys_ram_base + off;
  143 +}
  144 +#endif
  145 +
  146 +static void virtqueue_init(VirtQueue *vq, target_phys_addr_t pa)
  147 +{
  148 + vq->vring.desc = pa;
  149 + vq->vring.avail = pa + vq->vring.num * sizeof(VRingDesc);
  150 + vq->vring.used = TARGET_PAGE_ALIGN(vq->vring.avail + offsetof(VRingAvail, ring[vq->vring.num]));
  151 +}
  152 +
  153 +static inline uint64_t vring_desc_addr(VirtQueue *vq, int i)
  154 +{
  155 + target_phys_addr_t pa;
  156 + pa = vq->vring.desc + sizeof(VRingDesc) * i + offsetof(VRingDesc, addr);
  157 + return ldq_phys(pa);
  158 +}
  159 +
  160 +static inline uint32_t vring_desc_len(VirtQueue *vq, int i)
  161 +{
  162 + target_phys_addr_t pa;
  163 + pa = vq->vring.desc + sizeof(VRingDesc) * i + offsetof(VRingDesc, len);
  164 + return ldl_phys(pa);
  165 +}
  166 +
  167 +static inline uint16_t vring_desc_flags(VirtQueue *vq, int i)
  168 +{
  169 + target_phys_addr_t pa;
  170 + pa = vq->vring.desc + sizeof(VRingDesc) * i + offsetof(VRingDesc, flags);
  171 + return lduw_phys(pa);
  172 +}
  173 +
  174 +static inline uint16_t vring_desc_next(VirtQueue *vq, int i)
  175 +{
  176 + target_phys_addr_t pa;
  177 + pa = vq->vring.desc + sizeof(VRingDesc) * i + offsetof(VRingDesc, next);
  178 + return lduw_phys(pa);
  179 +}
  180 +
  181 +static inline uint16_t vring_avail_flags(VirtQueue *vq)
  182 +{
  183 + target_phys_addr_t pa;
  184 + pa = vq->vring.avail + offsetof(VRingAvail, flags);
  185 + return lduw_phys(pa);
  186 +}
  187 +
  188 +static inline uint16_t vring_avail_idx(VirtQueue *vq)
  189 +{
  190 + target_phys_addr_t pa;
  191 + pa = vq->vring.avail + offsetof(VRingAvail, idx);
  192 + return lduw_phys(pa);
  193 +}
  194 +
  195 +static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
  196 +{
  197 + target_phys_addr_t pa;
  198 + pa = vq->vring.avail + offsetof(VRingAvail, ring[i]);
  199 + return lduw_phys(pa);
  200 +}
  201 +
  202 +static inline void vring_used_ring_id(VirtQueue *vq, int i, uint32_t val)
  203 +{
  204 + target_phys_addr_t pa;
  205 + pa = vq->vring.used + offsetof(VRingUsed, ring[i].id);
  206 + stl_phys(pa, val);
  207 +}
  208 +
  209 +static inline void vring_used_ring_len(VirtQueue *vq, int i, uint32_t val)
  210 +{
  211 + target_phys_addr_t pa;
  212 + pa = vq->vring.used + offsetof(VRingUsed, ring[i].len);
  213 + stl_phys(pa, val);
  214 +}
  215 +
  216 +static uint16_t vring_used_idx(VirtQueue *vq)
  217 +{
  218 + target_phys_addr_t pa;
  219 + pa = vq->vring.used + offsetof(VRingUsed, idx);
  220 + return lduw_phys(pa);
  221 +}
  222 +
  223 +static inline void vring_used_idx_increment(VirtQueue *vq, uint16_t val)
  224 +{
  225 + target_phys_addr_t pa;
  226 + pa = vq->vring.used + offsetof(VRingUsed, idx);
  227 + stw_phys(pa, vring_used_idx(vq) + val);
  228 +}
  229 +
  230 +static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
  231 +{
  232 + target_phys_addr_t pa;
  233 + pa = vq->vring.used + offsetof(VRingUsed, flags);
  234 + stw_phys(pa, lduw_phys(pa) | mask);
  235 +}
  236 +
  237 +static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
  238 +{
  239 + target_phys_addr_t pa;
  240 + pa = vq->vring.used + offsetof(VRingUsed, flags);
  241 + stw_phys(pa, lduw_phys(pa) & ~mask);
  242 +}
  243 +
  244 +void virtio_queue_set_notification(VirtQueue *vq, int enable)
  245 +{
  246 + if (enable)
  247 + vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
  248 + else
  249 + vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
  250 +}
  251 +
  252 +int virtio_queue_ready(VirtQueue *vq)
  253 +{
  254 + return vq->vring.avail != 0;
  255 +}
  256 +
  257 +int virtio_queue_empty(VirtQueue *vq)
  258 +{
  259 + return vring_avail_idx(vq) == vq->last_avail_idx;
  260 +}
  261 +
  262 +void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
  263 + unsigned int len, unsigned int idx)
  264 +{
  265 + unsigned int offset;
  266 + int i;
  267 +
  268 +#ifndef VIRTIO_ZERO_COPY
  269 + for (i = 0; i < elem->out_num; i++)
  270 + qemu_free(elem->out_sg[i].iov_base);
  271 +#endif
  272 +
  273 + offset = 0;
  274 + for (i = 0; i < elem->in_num; i++) {
  275 + size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
  276 +
  277 +#ifdef VIRTIO_ZERO_COPY
  278 + if (size) {
  279 + ram_addr_t addr = (uint8_t *)elem->in_sg[i].iov_base - phys_ram_base;
  280 + ram_addr_t off;
  281 +
  282 + for (off = 0; off < size; off += TARGET_PAGE_SIZE)
  283 + cpu_physical_memory_set_dirty(addr + off);
  284 + }
  285 +#else
  286 + if (size)
  287 + cpu_physical_memory_write(elem->in_addr[i],
  288 + elem->in_sg[i].iov_base,
  289 + size);
  290 +
  291 + qemu_free(elem->in_sg[i].iov_base);
  292 +#endif
  293 +
  294 + offset += size;
  295 + }
  296 +
  297 + idx = (idx + vring_used_idx(vq)) % vq->vring.num;
  298 +
  299 + /* Get a pointer to the next entry in the used ring. */
  300 + vring_used_ring_id(vq, idx, elem->index);
  301 + vring_used_ring_len(vq, idx, len);
  302 +}
  303 +
  304 +void virtqueue_flush(VirtQueue *vq, unsigned int count)
  305 +{
  306 + /* Make sure buffer is written before we update index. */
  307 + wmb();
  308 + vring_used_idx_increment(vq, count);
  309 + vq->inuse -= count;
  310 +}
  311 +
  312 +void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
  313 + unsigned int len)
  314 +{
  315 + virtqueue_fill(vq, elem, len, 0);
  316 + virtqueue_flush(vq, 1);
  317 +}
  318 +
  319 +static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
  320 +{
  321 + uint16_t num_heads = vring_avail_idx(vq) - idx;
  322 +
  323 + /* Check it isn't doing very strange things with descriptor numbers. */
  324 + if (num_heads > vq->vring.num)
  325 + errx(1, "Guest moved used index from %u to %u",
  326 + idx, vring_avail_idx(vq));
  327 +
  328 + return num_heads;
  329 +}
  330 +
  331 +static unsigned int virtqueue_get_head(VirtQueue *vq, unsigned int idx)
  332 +{
  333 + unsigned int head;
  334 +
  335 + /* Grab the next descriptor number they're advertising, and increment
  336 + * the index we've seen. */
  337 + head = vring_avail_ring(vq, idx % vq->vring.num);
  338 +
  339 + /* If their number is silly, that's a fatal mistake. */
  340 + if (head >= vq->vring.num)
  341 + errx(1, "Guest says index %u is available", head);
  342 +
  343 + return head;
  344 +}
  345 +
  346 +static unsigned virtqueue_next_desc(VirtQueue *vq, unsigned int i)
  347 +{
  348 + unsigned int next;
  349 +
  350 + /* If this descriptor says it doesn't chain, we're done. */
  351 + if (!(vring_desc_flags(vq, i) & VRING_DESC_F_NEXT))
  352 + return vq->vring.num;
  353 +
  354 + /* Check they're not leading us off end of descriptors. */
  355 + next = vring_desc_next(vq, i);
  356 + /* Make sure compiler knows to grab that: we don't want it changing! */
  357 + wmb();
  358 +
  359 + if (next >= vq->vring.num)
  360 + errx(1, "Desc next is %u", next);
  361 +
  362 + return next;
  363 +}
  364 +
  365 +int virtqueue_avail_bytes(VirtQueue *vq, int in_bytes, int out_bytes)
  366 +{
  367 + unsigned int idx;
  368 + int num_bufs, in_total, out_total;
  369 +
  370 + idx = vq->last_avail_idx;
  371 +
  372 + num_bufs = in_total = out_total = 0;
  373 + while (virtqueue_num_heads(vq, idx)) {
  374 + int i;
  375 +
  376 + i = virtqueue_get_head(vq, idx++);
  377 + do {
  378 + /* If we've got too many, that implies a descriptor loop. */
  379 + if (++num_bufs > vq->vring.num)
  380 + errx(1, "Looped descriptor");
  381 +
  382 + if (vring_desc_flags(vq, i) & VRING_DESC_F_WRITE) {
  383 + if (in_bytes > 0 &&
  384 + (in_total += vring_desc_len(vq, i)) >= in_bytes)
  385 + return 1;
  386 + } else {
  387 + if (out_bytes > 0 &&
  388 + (out_total += vring_desc_len(vq, i)) >= out_bytes)
  389 + return 1;
  390 + }
  391 + } while ((i = virtqueue_next_desc(vq, i)) != vq->vring.num);
  392 + }
  393 +
  394 + return 0;
  395 +}
  396 +
  397 +int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
  398 +{
  399 + unsigned int i, head;
  400 +
  401 + if (!virtqueue_num_heads(vq, vq->last_avail_idx))
  402 + return 0;
  403 +
  404 + /* When we start there are none of either input nor output. */
  405 + elem->out_num = elem->in_num = 0;
  406 +
  407 + i = head = virtqueue_get_head(vq, vq->last_avail_idx++);
  408 + do {
  409 + struct iovec *sg;
  410 +
  411 + if (vring_desc_flags(vq, i) & VRING_DESC_F_WRITE) {
  412 + elem->in_addr[elem->in_num] = vring_desc_addr(vq, i);
  413 + sg = &elem->in_sg[elem->in_num++];
  414 + } else
  415 + sg = &elem->out_sg[elem->out_num++];
  416 +
  417 + /* Grab the first descriptor, and check it's OK. */
  418 + sg->iov_len = vring_desc_len(vq, i);
  419 +
  420 +#ifdef VIRTIO_ZERO_COPY
  421 + sg->iov_base = virtio_map_gpa(vring_desc_addr(vq, i), sg->iov_len);
  422 +#else
  423 + /* cap individual scatter element size to prevent unbounded allocations
  424 + of memory from the guest. Practically speaking, no virtio driver
  425 + will ever pass more than a page in each element. We set the cap to
  426 + be 2MB in case for some reason a large page makes it way into the
  427 + sg list. When we implement a zero copy API, this limitation will
  428 + disappear */
  429 + if (sg->iov_len > (2 << 20))
  430 + sg->iov_len = 2 << 20;
  431 +
  432 + sg->iov_base = qemu_malloc(sg->iov_len);
  433 + if (sg->iov_base &&
  434 + !(vring_desc_flags(vq, i) & VRING_DESC_F_WRITE)) {
  435 + cpu_physical_memory_read(vring_desc_addr(vq, i),
  436 + sg->iov_base,
  437 + sg->iov_len);
  438 + }
  439 +#endif
  440 + if (sg->iov_base == NULL)
  441 + errx(1, "Invalid mapping\n");
  442 +
  443 + /* If we've got too many, that implies a descriptor loop. */
  444 + if ((elem->in_num + elem->out_num) > vq->vring.num)
  445 + errx(1, "Looped descriptor");
  446 + } while ((i = virtqueue_next_desc(vq, i)) != vq->vring.num);
  447 +
  448 + elem->index = head;
  449 +
  450 + vq->inuse++;
  451 +
  452 + return elem->in_num + elem->out_num;
  453 +}
  454 +
  455 +/* virtio device */
  456 +
  457 +static VirtIODevice *to_virtio_device(PCIDevice *pci_dev)
  458 +{
  459 + return (VirtIODevice *)pci_dev;
  460 +}
  461 +
  462 +static void virtio_update_irq(VirtIODevice *vdev)
  463 +{
  464 + qemu_set_irq(vdev->pci_dev.irq[0], vdev->isr & 1);
  465 +}
  466 +
  467 +void virtio_reset(void *opaque)
  468 +{
  469 + VirtIODevice *vdev = opaque;
  470 + int i;
  471 +
  472 + if (vdev->reset)
  473 + vdev->reset(vdev);
  474 +
  475 + vdev->features = 0;
  476 + vdev->queue_sel = 0;
  477 + vdev->status = 0;
  478 + vdev->isr = 0;
  479 + virtio_update_irq(vdev);
  480 +
  481 + for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
  482 + vdev->vq[i].vring.desc = 0;
  483 + vdev->vq[i].vring.avail = 0;
  484 + vdev->vq[i].vring.used = 0;
  485 + vdev->vq[i].last_avail_idx = 0;
  486 + vdev->vq[i].pfn = 0;
  487 + }
  488 +}
  489 +
  490 +static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
  491 +{
  492 + VirtIODevice *vdev = to_virtio_device(opaque);
  493 + ram_addr_t pa;
  494 +
  495 + addr -= vdev->addr;
  496 +
  497 + switch (addr) {
  498 + case VIRTIO_PCI_GUEST_FEATURES:
  499 + if (vdev->set_features)
  500 + vdev->set_features(vdev, val);
  501 + vdev->features = val;
  502 + break;
  503 + case VIRTIO_PCI_QUEUE_PFN:
  504 + pa = (ram_addr_t)val << TARGET_PAGE_BITS;
  505 + vdev->vq[vdev->queue_sel].pfn = val;
  506 + if (pa == 0) {
  507 + virtio_reset(vdev);
  508 + } else {
  509 + virtqueue_init(&vdev->vq[vdev->queue_sel], pa);
  510 + }
  511 + break;
  512 + case VIRTIO_PCI_QUEUE_SEL:
  513 + if (val < VIRTIO_PCI_QUEUE_MAX)
  514 + vdev->queue_sel = val;
  515 + break;
  516 + case VIRTIO_PCI_QUEUE_NOTIFY:
  517 + if (val < VIRTIO_PCI_QUEUE_MAX && vdev->vq[val].vring.desc)
  518 + vdev->vq[val].handle_output(vdev, &vdev->vq[val]);
  519 + break;
  520 + case VIRTIO_PCI_STATUS:
  521 + vdev->status = val & 0xFF;
  522 + if (vdev->status == 0)
  523 + virtio_reset(vdev);
  524 + break;
  525 + }
  526 +}
  527 +
  528 +static uint32_t virtio_ioport_read(void *opaque, uint32_t addr)
  529 +{
  530 + VirtIODevice *vdev = to_virtio_device(opaque);
  531 + uint32_t ret = 0xFFFFFFFF;
  532 +
  533 + addr -= vdev->addr;
  534 +
  535 + switch (addr) {
  536 + case VIRTIO_PCI_HOST_FEATURES:
  537 + ret = vdev->get_features(vdev);
  538 + ret |= (1 << VIRTIO_F_NOTIFY_ON_EMPTY);
  539 + break;
  540 + case VIRTIO_PCI_GUEST_FEATURES:
  541 + ret = vdev->features;
  542 + break;
  543 + case VIRTIO_PCI_QUEUE_PFN:
  544 + ret = vdev->vq[vdev->queue_sel].pfn;
  545 + break;
  546 + case VIRTIO_PCI_QUEUE_NUM:
  547 + ret = vdev->vq[vdev->queue_sel].vring.num;
  548 + break;
  549 + case VIRTIO_PCI_QUEUE_SEL:
  550 + ret = vdev->queue_sel;
  551 + break;
  552 + case VIRTIO_PCI_STATUS:
  553 + ret = vdev->status;
  554 + break;
  555 + case VIRTIO_PCI_ISR:
  556 + /* reading from the ISR also clears it. */
  557 + ret = vdev->isr;
  558 + vdev->isr = 0;
  559 + virtio_update_irq(vdev);
  560 + break;
  561 + default:
  562 + break;
  563 + }
  564 +
  565 + return ret;
  566 +}
  567 +
  568 +static uint32_t virtio_config_readb(void *opaque, uint32_t addr)
  569 +{
  570 + VirtIODevice *vdev = opaque;
  571 + uint8_t val;
  572 +
  573 + vdev->get_config(vdev, vdev->config);
  574 +
  575 + addr -= vdev->addr + VIRTIO_PCI_CONFIG;
  576 + if (addr > (vdev->config_len - sizeof(val)))
  577 + return (uint32_t)-1;
  578 +
  579 + memcpy(&val, vdev->config + addr, sizeof(val));
  580 + return val;
  581 +}
  582 +
  583 +static uint32_t virtio_config_readw(void *opaque, uint32_t addr)
  584 +{
  585 + VirtIODevice *vdev = opaque;
  586 + uint16_t val;
  587 +
  588 + vdev->get_config(vdev, vdev->config);
  589 +
  590 + addr -= vdev->addr + VIRTIO_PCI_CONFIG;
  591 + if (addr > (vdev->config_len - sizeof(val)))
  592 + return (uint32_t)-1;
  593 +
  594 + memcpy(&val, vdev->config + addr, sizeof(val));
  595 + return val;
  596 +}
  597 +
  598 +static uint32_t virtio_config_readl(void *opaque, uint32_t addr)
  599 +{
  600 + VirtIODevice *vdev = opaque;
  601 + uint32_t val;
  602 +
  603 + vdev->get_config(vdev, vdev->config);
  604 +
  605 + addr -= vdev->addr + VIRTIO_PCI_CONFIG;
  606 + if (addr > (vdev->config_len - sizeof(val)))
  607 + return (uint32_t)-1;
  608 +
  609 + memcpy(&val, vdev->config + addr, sizeof(val));
  610 + return val;
  611 +}
  612 +
  613 +static void virtio_config_writeb(void *opaque, uint32_t addr, uint32_t data)
  614 +{
  615 + VirtIODevice *vdev = opaque;
  616 + uint8_t val = data;
  617 +
  618 + addr -= vdev->addr + VIRTIO_PCI_CONFIG;
  619 + if (addr > (vdev->config_len - sizeof(val)))
  620 + return;
  621 +
  622 + memcpy(vdev->config + addr, &val, sizeof(val));
  623 +
  624 + if (vdev->set_config)
  625 + vdev->set_config(vdev, vdev->config);
  626 +}
  627 +
  628 +static void virtio_config_writew(void *opaque, uint32_t addr, uint32_t data)
  629 +{
  630 + VirtIODevice *vdev = opaque;
  631 + uint16_t val = data;
  632 +
  633 + addr -= vdev->addr + VIRTIO_PCI_CONFIG;
  634 + if (addr > (vdev->config_len - sizeof(val)))
  635 + return;
  636 +
  637 + memcpy(vdev->config + addr, &val, sizeof(val));
  638 +
  639 + if (vdev->set_config)
  640 + vdev->set_config(vdev, vdev->config);
  641 +}
  642 +
  643 +static void virtio_config_writel(void *opaque, uint32_t addr, uint32_t data)
  644 +{
  645 + VirtIODevice *vdev = opaque;
  646 + uint32_t val = data;
  647 +
  648 + addr -= vdev->addr + VIRTIO_PCI_CONFIG;
  649 + if (addr > (vdev->config_len - sizeof(val)))
  650 + return;
  651 +
  652 + memcpy(vdev->config + addr, &val, sizeof(val));
  653 +
  654 + if (vdev->set_config)
  655 + vdev->set_config(vdev, vdev->config);
  656 +}
  657 +
  658 +static void virtio_map(PCIDevice *pci_dev, int region_num,
  659 + uint32_t addr, uint32_t size, int type)
  660 +{
  661 + VirtIODevice *vdev = to_virtio_device(pci_dev);
  662 + int i;
  663 +
  664 + vdev->addr = addr;
  665 + for (i = 0; i < 3; i++) {
  666 + register_ioport_write(addr, 20, 1 << i, virtio_ioport_write, vdev);
  667 + register_ioport_read(addr, 20, 1 << i, virtio_ioport_read, vdev);
  668 + }
  669 +
  670 + if (vdev->config_len) {
  671 + register_ioport_write(addr + 20, vdev->config_len, 1,
  672 + virtio_config_writeb, vdev);
  673 + register_ioport_write(addr + 20, vdev->config_len, 2,
  674 + virtio_config_writew, vdev);
  675 + register_ioport_write(addr + 20, vdev->config_len, 4,
  676 + virtio_config_writel, vdev);
  677 + register_ioport_read(addr + 20, vdev->config_len, 1,
  678 + virtio_config_readb, vdev);
  679 + register_ioport_read(addr + 20, vdev->config_len, 2,
  680 + virtio_config_readw, vdev);
  681 + register_ioport_read(addr + 20, vdev->config_len, 4,
  682 + virtio_config_readl, vdev);
  683 +
  684 + vdev->get_config(vdev, vdev->config);
  685 + }
  686 +}
  687 +
  688 +VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
  689 + void (*handle_output)(VirtIODevice *, VirtQueue *))
  690 +{
  691 + int i;
  692 +
  693 + for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
  694 + if (vdev->vq[i].vring.num == 0)
  695 + break;
  696 + }
  697 +
  698 + if (i == VIRTIO_PCI_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
  699 + abort();
  700 +
  701 + vdev->vq[i].vring.num = queue_size;
  702 + vdev->vq[i].handle_output = handle_output;
  703 +
  704 + return &vdev->vq[i];
  705 +}
  706 +
  707 +void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
  708 +{
  709 + /* Always notify when queue is empty */
  710 + if ((vq->inuse || vring_avail_idx(vq) != vq->last_avail_idx) &&
  711 + (vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT))
  712 + return;
  713 +
  714 + vdev->isr |= 0x01;
  715 + virtio_update_irq(vdev);
  716 +}
  717 +
  718 +void virtio_notify_config(VirtIODevice *vdev)
  719 +{
  720 + vdev->isr |= 0x03;
  721 + virtio_update_irq(vdev);
  722 +}
  723 +
  724 +void virtio_save(VirtIODevice *vdev, QEMUFile *f)
  725 +{
  726 + int i;
  727 +
  728 + pci_device_save(&vdev->pci_dev, f);
  729 +
  730 + qemu_put_be32s(f, &vdev->addr);
  731 + qemu_put_8s(f, &vdev->status);
  732 + qemu_put_8s(f, &vdev->isr);
  733 + qemu_put_be16s(f, &vdev->queue_sel);
  734 + qemu_put_be32s(f, &vdev->features);
  735 + qemu_put_be32(f, vdev->config_len);
  736 + qemu_put_buffer(f, vdev->config, vdev->config_len);
  737 +
  738 + for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
  739 + if (vdev->vq[i].vring.num == 0)
  740 + break;
  741 + }
  742 +
  743 + qemu_put_be32(f, i);
  744 +
  745 + for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
  746 + if (vdev->vq[i].vring.num == 0)
  747 + break;
  748 +
  749 + qemu_put_be32(f, vdev->vq[i].vring.num);
  750 + qemu_put_be32s(f, &vdev->vq[i].pfn);
  751 + qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
  752 + }
  753 +}
  754 +
  755 +void virtio_load(VirtIODevice *vdev, QEMUFile *f)
  756 +{
  757 + int num, i;
  758 +
  759 + pci_device_load(&vdev->pci_dev, f);
  760 +
  761 + qemu_get_be32s(f, &vdev->addr);
  762 + qemu_get_8s(f, &vdev->status);
  763 + qemu_get_8s(f, &vdev->isr);
  764 + qemu_get_be16s(f, &vdev->queue_sel);
  765 + qemu_get_be32s(f, &vdev->features);
  766 + vdev->config_len = qemu_get_be32(f);
  767 + qemu_get_buffer(f, vdev->config, vdev->config_len);
  768 +
  769 + num = qemu_get_be32(f);
  770 +
  771 + for (i = 0; i < num; i++) {
  772 + vdev->vq[i].vring.num = qemu_get_be32(f);
  773 + qemu_get_be32s(f, &vdev->vq[i].pfn);
  774 + qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
  775 +
  776 + if (vdev->vq[i].pfn) {
  777 + target_phys_addr_t pa;
  778 +
  779 + pa = (ram_addr_t)vdev->vq[i].pfn << TARGET_PAGE_BITS;
  780 + virtqueue_init(&vdev->vq[i], pa);
  781 + }
  782 + }
  783 +
  784 + virtio_update_irq(vdev);
  785 +}
  786 +
  787 +VirtIODevice *virtio_init_pci(PCIBus *bus, const char *name,
  788 + uint16_t vendor, uint16_t device,
  789 + uint16_t subvendor, uint16_t subdevice,
  790 + uint8_t class_code, uint8_t subclass_code,
  791 + uint8_t pif, size_t config_size,
  792 + size_t struct_size)
  793 +{
  794 + VirtIODevice *vdev;
  795 + PCIDevice *pci_dev;
  796 + uint8_t *config;
  797 + uint32_t size;
  798 +
  799 + pci_dev = pci_register_device(bus, name, struct_size,
  800 + -1, NULL, NULL);
  801 + if (!pci_dev)
  802 + return NULL;
  803 +
  804 + vdev = to_virtio_device(pci_dev);
  805 +
  806 + vdev->status = 0;
  807 + vdev->isr = 0;
  808 + vdev->queue_sel = 0;
  809 + vdev->vq = qemu_mallocz(sizeof(VirtQueue) * VIRTIO_PCI_QUEUE_MAX);
  810 +
  811 + config = pci_dev->config;
  812 + config[0x00] = vendor & 0xFF;
  813 + config[0x01] = (vendor >> 8) & 0xFF;
  814 + config[0x02] = device & 0xFF;
  815 + config[0x03] = (device >> 8) & 0xFF;
  816 +
  817 + config[0x08] = VIRTIO_PCI_ABI_VERSION;
  818 +
  819 + config[0x09] = pif;
  820 + config[0x0a] = subclass_code;
  821 + config[0x0b] = class_code;
  822 + config[0x0e] = 0x00;
  823 +
  824 + config[0x2c] = subvendor & 0xFF;
  825 + config[0x2d] = (subvendor >> 8) & 0xFF;
  826 + config[0x2e] = subdevice & 0xFF;
  827 + config[0x2f] = (subdevice >> 8) & 0xFF;
  828 +
  829 + config[0x3d] = 1;
  830 +
  831 + vdev->name = name;
  832 + vdev->config_len = config_size;
  833 + if (vdev->config_len)
  834 + vdev->config = qemu_mallocz(config_size);
  835 + else
  836 + vdev->config = NULL;
  837 +
  838 + size = 20 + config_size;
  839 + if (size & (size-1))
  840 + size = 1 << fls(size);
  841 +
  842 + pci_register_io_region(pci_dev, 0, size, PCI_ADDRESS_SPACE_IO,
  843 + virtio_map);
  844 + qemu_register_reset(virtio_reset, vdev);
  845 +
  846 + return vdev;
  847 +}
... ...
hw/virtio.h 0 → 100644
  1 +/*
  2 + * Virtio Support
  3 + *
  4 + * Copyright IBM, Corp. 2007
  5 + *
  6 + * Authors:
  7 + * Anthony Liguori <aliguori@us.ibm.com>
  8 + *
  9 + * This work is licensed under the terms of the GNU GPL, version 2. See
  10 + * the COPYING file in the top-level directory.
  11 + *
  12 + */
  13 +
  14 +#ifndef _QEMU_VIRTIO_H
  15 +#define _QEMU_VIRTIO_H
  16 +
  17 +#include <sys/uio.h>
  18 +#include "hw.h"
  19 +#include "pci.h"
  20 +
  21 +/* from Linux's linux/virtio_config.h */
  22 +
  23 +/* Status byte for guest to report progress, and synchronize features. */
  24 +/* We have seen device and processed generic fields (VIRTIO_CONFIG_F_VIRTIO) */
  25 +#define VIRTIO_CONFIG_S_ACKNOWLEDGE 1
  26 +/* We have found a driver for the device. */
  27 +#define VIRTIO_CONFIG_S_DRIVER 2
  28 +/* Driver has used its parts of the config, and is happy */
  29 +#define VIRTIO_CONFIG_S_DRIVER_OK 4
  30 +/* We've given up on this device. */
  31 +#define VIRTIO_CONFIG_S_FAILED 0x80
  32 +
  33 +/* We notify when the ring is completely used, even if the guest is supressing
  34 + * callbacks */
  35 +#define VIRTIO_F_NOTIFY_ON_EMPTY 24
  36 +
  37 +/* from Linux's linux/virtio_ring.h */
  38 +
  39 +/* This marks a buffer as continuing via the next field. */
  40 +#define VRING_DESC_F_NEXT 1
  41 +/* This marks a buffer as write-only (otherwise read-only). */
  42 +#define VRING_DESC_F_WRITE 2
  43 +
  44 +/* This means don't notify other side when buffer added. */
  45 +#define VRING_USED_F_NO_NOTIFY 1
  46 +/* This means don't interrupt guest when buffer consumed. */
  47 +#define VRING_AVAIL_F_NO_INTERRUPT 1
  48 +
  49 +struct VirtQueue;
  50 +
  51 +typedef struct VirtQueue VirtQueue;
  52 +typedef struct VirtIODevice VirtIODevice;
  53 +
  54 +#define VIRTQUEUE_MAX_SIZE 1024
  55 +
  56 +typedef struct VirtQueueElement
  57 +{
  58 + unsigned int index;
  59 + unsigned int out_num;
  60 + unsigned int in_num;
  61 + target_phys_addr_t in_addr[VIRTQUEUE_MAX_SIZE];
  62 + struct iovec in_sg[VIRTQUEUE_MAX_SIZE];
  63 + struct iovec out_sg[VIRTQUEUE_MAX_SIZE];
  64 +} VirtQueueElement;
  65 +
  66 +#define VIRTIO_PCI_QUEUE_MAX 16
  67 +
  68 +struct VirtIODevice
  69 +{
  70 + PCIDevice pci_dev;
  71 + const char *name;
  72 + uint32_t addr;
  73 + uint8_t status;
  74 + uint8_t isr;
  75 + uint16_t queue_sel;
  76 + uint32_t features;
  77 + size_t config_len;
  78 + void *config;
  79 + uint32_t (*get_features)(VirtIODevice *vdev);
  80 + void (*set_features)(VirtIODevice *vdev, uint32_t val);
  81 + void (*get_config)(VirtIODevice *vdev, uint8_t *config);
  82 + void (*set_config)(VirtIODevice *vdev, const uint8_t *config);
  83 + void (*reset)(VirtIODevice *vdev);
  84 + VirtQueue *vq;
  85 +};
  86 +
  87 +VirtIODevice *virtio_init_pci(PCIBus *bus, const char *name,
  88 + uint16_t vendor, uint16_t device,
  89 + uint16_t subvendor, uint16_t subdevice,
  90 + uint8_t class_code, uint8_t subclass_code,
  91 + uint8_t pif, size_t config_size,
  92 + size_t struct_size);
  93 +
  94 +VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
  95 + void (*handle_output)(VirtIODevice *,
  96 + VirtQueue *));
  97 +
  98 +void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
  99 + unsigned int len);
  100 +void virtqueue_flush(VirtQueue *vq, unsigned int count);
  101 +void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
  102 + unsigned int len, unsigned int idx);
  103 +
  104 +int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem);
  105 +int virtqueue_avail_bytes(VirtQueue *vq, int in_bytes, int out_bytes);
  106 +
  107 +void virtio_notify(VirtIODevice *vdev, VirtQueue *vq);
  108 +
  109 +void virtio_save(VirtIODevice *vdev, QEMUFile *f);
  110 +
  111 +void virtio_load(VirtIODevice *vdev, QEMUFile *f);
  112 +
  113 +void virtio_notify_config(VirtIODevice *vdev);
  114 +
  115 +void virtio_queue_set_notification(VirtQueue *vq, int enable);
  116 +
  117 +int virtio_queue_ready(VirtQueue *vq);
  118 +
  119 +int virtio_queue_empty(VirtQueue *vq);
  120 +
  121 +#endif
... ...