Commit 967f97fa00b2d301584d08c614d446423c65c15a
1 parent
b39ade83
Virtio core support
This patch adds core support for VirtIO. VirtIO is a paravirtualization framework that has been in Linux since 2.6.21. A PCI transport has been available since 2.6.25. Network drivers are also available for Windows. Signed-off-by: Anthony Liguori <aliguori@us.ibm.com> git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@5869 c046a42c-6fe2-441c-8c8c-71466251a162
Showing
3 changed files
with
972 additions
and
0 deletions
Makefile.target
... | ... | @@ -664,6 +664,8 @@ OBJS+= ide.o pckbd.o ps2.o vga.o $(SOUND_HW) dma.o |
664 | 664 | OBJS+= fdc.o mc146818rtc.o serial.o i8259.o i8254.o pcspk.o pc.o |
665 | 665 | OBJS+= cirrus_vga.o apic.o parallel.o acpi.o piix_pci.o |
666 | 666 | OBJS+= usb-uhci.o vmmouse.o vmport.o vmware_vga.o |
667 | +# virtio support | |
668 | +OBJS+= virtio.o | |
667 | 669 | CPPFLAGS += -DHAS_AUDIO -DHAS_AUDIO_CHOICE |
668 | 670 | endif |
669 | 671 | ifeq ($(TARGET_BASE_ARCH), ppc) |
... | ... | @@ -681,6 +683,8 @@ OBJS+= heathrow_pic.o grackle_pci.o ppc_oldworld.o |
681 | 683 | OBJS+= unin_pci.o ppc_chrp.o |
682 | 684 | # PowerPC 4xx boards |
683 | 685 | OBJS+= pflash_cfi02.o ppc4xx_devs.o ppc4xx_pci.o ppc405_uc.o ppc405_boards.o |
686 | +# virtio support | |
687 | +OBJS+= virtio.o | |
684 | 688 | endif |
685 | 689 | ifeq ($(TARGET_BASE_ARCH), mips) |
686 | 690 | OBJS+= mips_r4k.o mips_jazz.o mips_malta.o mips_mipssim.o | ... | ... |
hw/virtio.c
0 → 100644
1 | +/* | |
2 | + * Virtio Support | |
3 | + * | |
4 | + * Copyright IBM, Corp. 2007 | |
5 | + * | |
6 | + * Authors: | |
7 | + * Anthony Liguori <aliguori@us.ibm.com> | |
8 | + * | |
9 | + * This work is licensed under the terms of the GNU GPL, version 2. See | |
10 | + * the COPYING file in the top-level directory. | |
11 | + * | |
12 | + */ | |
13 | + | |
14 | +#include <inttypes.h> | |
15 | +#include <err.h> | |
16 | + | |
17 | +#include "virtio.h" | |
18 | +#include "sysemu.h" | |
19 | + | |
20 | +//#define VIRTIO_ZERO_COPY | |
21 | + | |
22 | +/* from Linux's linux/virtio_pci.h */ | |
23 | + | |
24 | +/* A 32-bit r/o bitmask of the features supported by the host */ | |
25 | +#define VIRTIO_PCI_HOST_FEATURES 0 | |
26 | + | |
27 | +/* A 32-bit r/w bitmask of features activated by the guest */ | |
28 | +#define VIRTIO_PCI_GUEST_FEATURES 4 | |
29 | + | |
30 | +/* A 32-bit r/w PFN for the currently selected queue */ | |
31 | +#define VIRTIO_PCI_QUEUE_PFN 8 | |
32 | + | |
33 | +/* A 16-bit r/o queue size for the currently selected queue */ | |
34 | +#define VIRTIO_PCI_QUEUE_NUM 12 | |
35 | + | |
36 | +/* A 16-bit r/w queue selector */ | |
37 | +#define VIRTIO_PCI_QUEUE_SEL 14 | |
38 | + | |
39 | +/* A 16-bit r/w queue notifier */ | |
40 | +#define VIRTIO_PCI_QUEUE_NOTIFY 16 | |
41 | + | |
42 | +/* An 8-bit device status register. */ | |
43 | +#define VIRTIO_PCI_STATUS 18 | |
44 | + | |
45 | +/* An 8-bit r/o interrupt status register. Reading the value will return the | |
46 | + * current contents of the ISR and will also clear it. This is effectively | |
47 | + * a read-and-acknowledge. */ | |
48 | +#define VIRTIO_PCI_ISR 19 | |
49 | + | |
50 | +#define VIRTIO_PCI_CONFIG 20 | |
51 | + | |
52 | +/* Virtio ABI version, if we increment this, we break the guest driver. */ | |
53 | +#define VIRTIO_PCI_ABI_VERSION 0 | |
54 | + | |
55 | +/* QEMU doesn't strictly need write barriers since everything runs in | |
56 | + * lock-step. We'll leave the calls to wmb() in though to make it obvious for | |
57 | + * KVM or if kqemu gets SMP support. | |
58 | + */ | |
59 | +#define wmb() do { } while (0) | |
60 | + | |
61 | +typedef struct VRingDesc | |
62 | +{ | |
63 | + uint64_t addr; | |
64 | + uint32_t len; | |
65 | + uint16_t flags; | |
66 | + uint16_t next; | |
67 | +} VRingDesc; | |
68 | + | |
69 | +typedef struct VRingAvail | |
70 | +{ | |
71 | + uint16_t flags; | |
72 | + uint16_t idx; | |
73 | + uint16_t ring[0]; | |
74 | +} VRingAvail; | |
75 | + | |
76 | +typedef struct VRingUsedElem | |
77 | +{ | |
78 | + uint32_t id; | |
79 | + uint32_t len; | |
80 | +} VRingUsedElem; | |
81 | + | |
82 | +typedef struct VRingUsed | |
83 | +{ | |
84 | + uint16_t flags; | |
85 | + uint16_t idx; | |
86 | + VRingUsedElem ring[0]; | |
87 | +} VRingUsed; | |
88 | + | |
89 | +typedef struct VRing | |
90 | +{ | |
91 | + unsigned int num; | |
92 | + target_phys_addr_t desc; | |
93 | + target_phys_addr_t avail; | |
94 | + target_phys_addr_t used; | |
95 | +} VRing; | |
96 | + | |
97 | +struct VirtQueue | |
98 | +{ | |
99 | + VRing vring; | |
100 | + uint32_t pfn; | |
101 | + uint16_t last_avail_idx; | |
102 | + int inuse; | |
103 | + void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq); | |
104 | +}; | |
105 | + | |
106 | +#define VIRTIO_PCI_QUEUE_MAX 16 | |
107 | + | |
108 | +/* virt queue functions */ | |
109 | +#ifdef VIRTIO_ZERO_COPY | |
110 | +static void *virtio_map_gpa(target_phys_addr_t addr, size_t size) | |
111 | +{ | |
112 | + ram_addr_t off; | |
113 | + target_phys_addr_t addr1; | |
114 | + | |
115 | + off = cpu_get_physical_page_desc(addr); | |
116 | + if ((off & ~TARGET_PAGE_MASK) != IO_MEM_RAM) { | |
117 | + fprintf(stderr, "virtio DMA to IO ram\n"); | |
118 | + exit(1); | |
119 | + } | |
120 | + | |
121 | + off = (off & TARGET_PAGE_MASK) | (addr & ~TARGET_PAGE_MASK); | |
122 | + | |
123 | + for (addr1 = addr + TARGET_PAGE_SIZE; | |
124 | + addr1 < TARGET_PAGE_ALIGN(addr + size); | |
125 | + addr1 += TARGET_PAGE_SIZE) { | |
126 | + ram_addr_t off1; | |
127 | + | |
128 | + off1 = cpu_get_physical_page_desc(addr1); | |
129 | + if ((off1 & ~TARGET_PAGE_MASK) != IO_MEM_RAM) { | |
130 | + fprintf(stderr, "virtio DMA to IO ram\n"); | |
131 | + exit(1); | |
132 | + } | |
133 | + | |
134 | + off1 = (off1 & TARGET_PAGE_MASK) | (addr1 & ~TARGET_PAGE_MASK); | |
135 | + | |
136 | + if (off1 != (off + (addr1 - addr))) { | |
137 | + fprintf(stderr, "discontigous virtio memory\n"); | |
138 | + exit(1); | |
139 | + } | |
140 | + } | |
141 | + | |
142 | + return phys_ram_base + off; | |
143 | +} | |
144 | +#endif | |
145 | + | |
146 | +static void virtqueue_init(VirtQueue *vq, target_phys_addr_t pa) | |
147 | +{ | |
148 | + vq->vring.desc = pa; | |
149 | + vq->vring.avail = pa + vq->vring.num * sizeof(VRingDesc); | |
150 | + vq->vring.used = TARGET_PAGE_ALIGN(vq->vring.avail + offsetof(VRingAvail, ring[vq->vring.num])); | |
151 | +} | |
152 | + | |
153 | +static inline uint64_t vring_desc_addr(VirtQueue *vq, int i) | |
154 | +{ | |
155 | + target_phys_addr_t pa; | |
156 | + pa = vq->vring.desc + sizeof(VRingDesc) * i + offsetof(VRingDesc, addr); | |
157 | + return ldq_phys(pa); | |
158 | +} | |
159 | + | |
160 | +static inline uint32_t vring_desc_len(VirtQueue *vq, int i) | |
161 | +{ | |
162 | + target_phys_addr_t pa; | |
163 | + pa = vq->vring.desc + sizeof(VRingDesc) * i + offsetof(VRingDesc, len); | |
164 | + return ldl_phys(pa); | |
165 | +} | |
166 | + | |
167 | +static inline uint16_t vring_desc_flags(VirtQueue *vq, int i) | |
168 | +{ | |
169 | + target_phys_addr_t pa; | |
170 | + pa = vq->vring.desc + sizeof(VRingDesc) * i + offsetof(VRingDesc, flags); | |
171 | + return lduw_phys(pa); | |
172 | +} | |
173 | + | |
174 | +static inline uint16_t vring_desc_next(VirtQueue *vq, int i) | |
175 | +{ | |
176 | + target_phys_addr_t pa; | |
177 | + pa = vq->vring.desc + sizeof(VRingDesc) * i + offsetof(VRingDesc, next); | |
178 | + return lduw_phys(pa); | |
179 | +} | |
180 | + | |
181 | +static inline uint16_t vring_avail_flags(VirtQueue *vq) | |
182 | +{ | |
183 | + target_phys_addr_t pa; | |
184 | + pa = vq->vring.avail + offsetof(VRingAvail, flags); | |
185 | + return lduw_phys(pa); | |
186 | +} | |
187 | + | |
188 | +static inline uint16_t vring_avail_idx(VirtQueue *vq) | |
189 | +{ | |
190 | + target_phys_addr_t pa; | |
191 | + pa = vq->vring.avail + offsetof(VRingAvail, idx); | |
192 | + return lduw_phys(pa); | |
193 | +} | |
194 | + | |
195 | +static inline uint16_t vring_avail_ring(VirtQueue *vq, int i) | |
196 | +{ | |
197 | + target_phys_addr_t pa; | |
198 | + pa = vq->vring.avail + offsetof(VRingAvail, ring[i]); | |
199 | + return lduw_phys(pa); | |
200 | +} | |
201 | + | |
202 | +static inline void vring_used_ring_id(VirtQueue *vq, int i, uint32_t val) | |
203 | +{ | |
204 | + target_phys_addr_t pa; | |
205 | + pa = vq->vring.used + offsetof(VRingUsed, ring[i].id); | |
206 | + stl_phys(pa, val); | |
207 | +} | |
208 | + | |
209 | +static inline void vring_used_ring_len(VirtQueue *vq, int i, uint32_t val) | |
210 | +{ | |
211 | + target_phys_addr_t pa; | |
212 | + pa = vq->vring.used + offsetof(VRingUsed, ring[i].len); | |
213 | + stl_phys(pa, val); | |
214 | +} | |
215 | + | |
216 | +static uint16_t vring_used_idx(VirtQueue *vq) | |
217 | +{ | |
218 | + target_phys_addr_t pa; | |
219 | + pa = vq->vring.used + offsetof(VRingUsed, idx); | |
220 | + return lduw_phys(pa); | |
221 | +} | |
222 | + | |
223 | +static inline void vring_used_idx_increment(VirtQueue *vq, uint16_t val) | |
224 | +{ | |
225 | + target_phys_addr_t pa; | |
226 | + pa = vq->vring.used + offsetof(VRingUsed, idx); | |
227 | + stw_phys(pa, vring_used_idx(vq) + val); | |
228 | +} | |
229 | + | |
230 | +static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask) | |
231 | +{ | |
232 | + target_phys_addr_t pa; | |
233 | + pa = vq->vring.used + offsetof(VRingUsed, flags); | |
234 | + stw_phys(pa, lduw_phys(pa) | mask); | |
235 | +} | |
236 | + | |
237 | +static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask) | |
238 | +{ | |
239 | + target_phys_addr_t pa; | |
240 | + pa = vq->vring.used + offsetof(VRingUsed, flags); | |
241 | + stw_phys(pa, lduw_phys(pa) & ~mask); | |
242 | +} | |
243 | + | |
244 | +void virtio_queue_set_notification(VirtQueue *vq, int enable) | |
245 | +{ | |
246 | + if (enable) | |
247 | + vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY); | |
248 | + else | |
249 | + vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY); | |
250 | +} | |
251 | + | |
252 | +int virtio_queue_ready(VirtQueue *vq) | |
253 | +{ | |
254 | + return vq->vring.avail != 0; | |
255 | +} | |
256 | + | |
257 | +int virtio_queue_empty(VirtQueue *vq) | |
258 | +{ | |
259 | + return vring_avail_idx(vq) == vq->last_avail_idx; | |
260 | +} | |
261 | + | |
262 | +void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, | |
263 | + unsigned int len, unsigned int idx) | |
264 | +{ | |
265 | + unsigned int offset; | |
266 | + int i; | |
267 | + | |
268 | +#ifndef VIRTIO_ZERO_COPY | |
269 | + for (i = 0; i < elem->out_num; i++) | |
270 | + qemu_free(elem->out_sg[i].iov_base); | |
271 | +#endif | |
272 | + | |
273 | + offset = 0; | |
274 | + for (i = 0; i < elem->in_num; i++) { | |
275 | + size_t size = MIN(len - offset, elem->in_sg[i].iov_len); | |
276 | + | |
277 | +#ifdef VIRTIO_ZERO_COPY | |
278 | + if (size) { | |
279 | + ram_addr_t addr = (uint8_t *)elem->in_sg[i].iov_base - phys_ram_base; | |
280 | + ram_addr_t off; | |
281 | + | |
282 | + for (off = 0; off < size; off += TARGET_PAGE_SIZE) | |
283 | + cpu_physical_memory_set_dirty(addr + off); | |
284 | + } | |
285 | +#else | |
286 | + if (size) | |
287 | + cpu_physical_memory_write(elem->in_addr[i], | |
288 | + elem->in_sg[i].iov_base, | |
289 | + size); | |
290 | + | |
291 | + qemu_free(elem->in_sg[i].iov_base); | |
292 | +#endif | |
293 | + | |
294 | + offset += size; | |
295 | + } | |
296 | + | |
297 | + idx = (idx + vring_used_idx(vq)) % vq->vring.num; | |
298 | + | |
299 | + /* Get a pointer to the next entry in the used ring. */ | |
300 | + vring_used_ring_id(vq, idx, elem->index); | |
301 | + vring_used_ring_len(vq, idx, len); | |
302 | +} | |
303 | + | |
304 | +void virtqueue_flush(VirtQueue *vq, unsigned int count) | |
305 | +{ | |
306 | + /* Make sure buffer is written before we update index. */ | |
307 | + wmb(); | |
308 | + vring_used_idx_increment(vq, count); | |
309 | + vq->inuse -= count; | |
310 | +} | |
311 | + | |
312 | +void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem, | |
313 | + unsigned int len) | |
314 | +{ | |
315 | + virtqueue_fill(vq, elem, len, 0); | |
316 | + virtqueue_flush(vq, 1); | |
317 | +} | |
318 | + | |
319 | +static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx) | |
320 | +{ | |
321 | + uint16_t num_heads = vring_avail_idx(vq) - idx; | |
322 | + | |
323 | + /* Check it isn't doing very strange things with descriptor numbers. */ | |
324 | + if (num_heads > vq->vring.num) | |
325 | + errx(1, "Guest moved used index from %u to %u", | |
326 | + idx, vring_avail_idx(vq)); | |
327 | + | |
328 | + return num_heads; | |
329 | +} | |
330 | + | |
331 | +static unsigned int virtqueue_get_head(VirtQueue *vq, unsigned int idx) | |
332 | +{ | |
333 | + unsigned int head; | |
334 | + | |
335 | + /* Grab the next descriptor number they're advertising, and increment | |
336 | + * the index we've seen. */ | |
337 | + head = vring_avail_ring(vq, idx % vq->vring.num); | |
338 | + | |
339 | + /* If their number is silly, that's a fatal mistake. */ | |
340 | + if (head >= vq->vring.num) | |
341 | + errx(1, "Guest says index %u is available", head); | |
342 | + | |
343 | + return head; | |
344 | +} | |
345 | + | |
346 | +static unsigned virtqueue_next_desc(VirtQueue *vq, unsigned int i) | |
347 | +{ | |
348 | + unsigned int next; | |
349 | + | |
350 | + /* If this descriptor says it doesn't chain, we're done. */ | |
351 | + if (!(vring_desc_flags(vq, i) & VRING_DESC_F_NEXT)) | |
352 | + return vq->vring.num; | |
353 | + | |
354 | + /* Check they're not leading us off end of descriptors. */ | |
355 | + next = vring_desc_next(vq, i); | |
356 | + /* Make sure compiler knows to grab that: we don't want it changing! */ | |
357 | + wmb(); | |
358 | + | |
359 | + if (next >= vq->vring.num) | |
360 | + errx(1, "Desc next is %u", next); | |
361 | + | |
362 | + return next; | |
363 | +} | |
364 | + | |
365 | +int virtqueue_avail_bytes(VirtQueue *vq, int in_bytes, int out_bytes) | |
366 | +{ | |
367 | + unsigned int idx; | |
368 | + int num_bufs, in_total, out_total; | |
369 | + | |
370 | + idx = vq->last_avail_idx; | |
371 | + | |
372 | + num_bufs = in_total = out_total = 0; | |
373 | + while (virtqueue_num_heads(vq, idx)) { | |
374 | + int i; | |
375 | + | |
376 | + i = virtqueue_get_head(vq, idx++); | |
377 | + do { | |
378 | + /* If we've got too many, that implies a descriptor loop. */ | |
379 | + if (++num_bufs > vq->vring.num) | |
380 | + errx(1, "Looped descriptor"); | |
381 | + | |
382 | + if (vring_desc_flags(vq, i) & VRING_DESC_F_WRITE) { | |
383 | + if (in_bytes > 0 && | |
384 | + (in_total += vring_desc_len(vq, i)) >= in_bytes) | |
385 | + return 1; | |
386 | + } else { | |
387 | + if (out_bytes > 0 && | |
388 | + (out_total += vring_desc_len(vq, i)) >= out_bytes) | |
389 | + return 1; | |
390 | + } | |
391 | + } while ((i = virtqueue_next_desc(vq, i)) != vq->vring.num); | |
392 | + } | |
393 | + | |
394 | + return 0; | |
395 | +} | |
396 | + | |
397 | +int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem) | |
398 | +{ | |
399 | + unsigned int i, head; | |
400 | + | |
401 | + if (!virtqueue_num_heads(vq, vq->last_avail_idx)) | |
402 | + return 0; | |
403 | + | |
404 | + /* When we start there are none of either input nor output. */ | |
405 | + elem->out_num = elem->in_num = 0; | |
406 | + | |
407 | + i = head = virtqueue_get_head(vq, vq->last_avail_idx++); | |
408 | + do { | |
409 | + struct iovec *sg; | |
410 | + | |
411 | + if (vring_desc_flags(vq, i) & VRING_DESC_F_WRITE) { | |
412 | + elem->in_addr[elem->in_num] = vring_desc_addr(vq, i); | |
413 | + sg = &elem->in_sg[elem->in_num++]; | |
414 | + } else | |
415 | + sg = &elem->out_sg[elem->out_num++]; | |
416 | + | |
417 | + /* Grab the first descriptor, and check it's OK. */ | |
418 | + sg->iov_len = vring_desc_len(vq, i); | |
419 | + | |
420 | +#ifdef VIRTIO_ZERO_COPY | |
421 | + sg->iov_base = virtio_map_gpa(vring_desc_addr(vq, i), sg->iov_len); | |
422 | +#else | |
423 | + /* cap individual scatter element size to prevent unbounded allocations | |
424 | + of memory from the guest. Practically speaking, no virtio driver | |
425 | + will ever pass more than a page in each element. We set the cap to | |
426 | + be 2MB in case for some reason a large page makes it way into the | |
427 | + sg list. When we implement a zero copy API, this limitation will | |
428 | + disappear */ | |
429 | + if (sg->iov_len > (2 << 20)) | |
430 | + sg->iov_len = 2 << 20; | |
431 | + | |
432 | + sg->iov_base = qemu_malloc(sg->iov_len); | |
433 | + if (sg->iov_base && | |
434 | + !(vring_desc_flags(vq, i) & VRING_DESC_F_WRITE)) { | |
435 | + cpu_physical_memory_read(vring_desc_addr(vq, i), | |
436 | + sg->iov_base, | |
437 | + sg->iov_len); | |
438 | + } | |
439 | +#endif | |
440 | + if (sg->iov_base == NULL) | |
441 | + errx(1, "Invalid mapping\n"); | |
442 | + | |
443 | + /* If we've got too many, that implies a descriptor loop. */ | |
444 | + if ((elem->in_num + elem->out_num) > vq->vring.num) | |
445 | + errx(1, "Looped descriptor"); | |
446 | + } while ((i = virtqueue_next_desc(vq, i)) != vq->vring.num); | |
447 | + | |
448 | + elem->index = head; | |
449 | + | |
450 | + vq->inuse++; | |
451 | + | |
452 | + return elem->in_num + elem->out_num; | |
453 | +} | |
454 | + | |
455 | +/* virtio device */ | |
456 | + | |
457 | +static VirtIODevice *to_virtio_device(PCIDevice *pci_dev) | |
458 | +{ | |
459 | + return (VirtIODevice *)pci_dev; | |
460 | +} | |
461 | + | |
462 | +static void virtio_update_irq(VirtIODevice *vdev) | |
463 | +{ | |
464 | + qemu_set_irq(vdev->pci_dev.irq[0], vdev->isr & 1); | |
465 | +} | |
466 | + | |
467 | +void virtio_reset(void *opaque) | |
468 | +{ | |
469 | + VirtIODevice *vdev = opaque; | |
470 | + int i; | |
471 | + | |
472 | + if (vdev->reset) | |
473 | + vdev->reset(vdev); | |
474 | + | |
475 | + vdev->features = 0; | |
476 | + vdev->queue_sel = 0; | |
477 | + vdev->status = 0; | |
478 | + vdev->isr = 0; | |
479 | + virtio_update_irq(vdev); | |
480 | + | |
481 | + for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) { | |
482 | + vdev->vq[i].vring.desc = 0; | |
483 | + vdev->vq[i].vring.avail = 0; | |
484 | + vdev->vq[i].vring.used = 0; | |
485 | + vdev->vq[i].last_avail_idx = 0; | |
486 | + vdev->vq[i].pfn = 0; | |
487 | + } | |
488 | +} | |
489 | + | |
490 | +static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val) | |
491 | +{ | |
492 | + VirtIODevice *vdev = to_virtio_device(opaque); | |
493 | + ram_addr_t pa; | |
494 | + | |
495 | + addr -= vdev->addr; | |
496 | + | |
497 | + switch (addr) { | |
498 | + case VIRTIO_PCI_GUEST_FEATURES: | |
499 | + if (vdev->set_features) | |
500 | + vdev->set_features(vdev, val); | |
501 | + vdev->features = val; | |
502 | + break; | |
503 | + case VIRTIO_PCI_QUEUE_PFN: | |
504 | + pa = (ram_addr_t)val << TARGET_PAGE_BITS; | |
505 | + vdev->vq[vdev->queue_sel].pfn = val; | |
506 | + if (pa == 0) { | |
507 | + virtio_reset(vdev); | |
508 | + } else { | |
509 | + virtqueue_init(&vdev->vq[vdev->queue_sel], pa); | |
510 | + } | |
511 | + break; | |
512 | + case VIRTIO_PCI_QUEUE_SEL: | |
513 | + if (val < VIRTIO_PCI_QUEUE_MAX) | |
514 | + vdev->queue_sel = val; | |
515 | + break; | |
516 | + case VIRTIO_PCI_QUEUE_NOTIFY: | |
517 | + if (val < VIRTIO_PCI_QUEUE_MAX && vdev->vq[val].vring.desc) | |
518 | + vdev->vq[val].handle_output(vdev, &vdev->vq[val]); | |
519 | + break; | |
520 | + case VIRTIO_PCI_STATUS: | |
521 | + vdev->status = val & 0xFF; | |
522 | + if (vdev->status == 0) | |
523 | + virtio_reset(vdev); | |
524 | + break; | |
525 | + } | |
526 | +} | |
527 | + | |
528 | +static uint32_t virtio_ioport_read(void *opaque, uint32_t addr) | |
529 | +{ | |
530 | + VirtIODevice *vdev = to_virtio_device(opaque); | |
531 | + uint32_t ret = 0xFFFFFFFF; | |
532 | + | |
533 | + addr -= vdev->addr; | |
534 | + | |
535 | + switch (addr) { | |
536 | + case VIRTIO_PCI_HOST_FEATURES: | |
537 | + ret = vdev->get_features(vdev); | |
538 | + ret |= (1 << VIRTIO_F_NOTIFY_ON_EMPTY); | |
539 | + break; | |
540 | + case VIRTIO_PCI_GUEST_FEATURES: | |
541 | + ret = vdev->features; | |
542 | + break; | |
543 | + case VIRTIO_PCI_QUEUE_PFN: | |
544 | + ret = vdev->vq[vdev->queue_sel].pfn; | |
545 | + break; | |
546 | + case VIRTIO_PCI_QUEUE_NUM: | |
547 | + ret = vdev->vq[vdev->queue_sel].vring.num; | |
548 | + break; | |
549 | + case VIRTIO_PCI_QUEUE_SEL: | |
550 | + ret = vdev->queue_sel; | |
551 | + break; | |
552 | + case VIRTIO_PCI_STATUS: | |
553 | + ret = vdev->status; | |
554 | + break; | |
555 | + case VIRTIO_PCI_ISR: | |
556 | + /* reading from the ISR also clears it. */ | |
557 | + ret = vdev->isr; | |
558 | + vdev->isr = 0; | |
559 | + virtio_update_irq(vdev); | |
560 | + break; | |
561 | + default: | |
562 | + break; | |
563 | + } | |
564 | + | |
565 | + return ret; | |
566 | +} | |
567 | + | |
568 | +static uint32_t virtio_config_readb(void *opaque, uint32_t addr) | |
569 | +{ | |
570 | + VirtIODevice *vdev = opaque; | |
571 | + uint8_t val; | |
572 | + | |
573 | + vdev->get_config(vdev, vdev->config); | |
574 | + | |
575 | + addr -= vdev->addr + VIRTIO_PCI_CONFIG; | |
576 | + if (addr > (vdev->config_len - sizeof(val))) | |
577 | + return (uint32_t)-1; | |
578 | + | |
579 | + memcpy(&val, vdev->config + addr, sizeof(val)); | |
580 | + return val; | |
581 | +} | |
582 | + | |
583 | +static uint32_t virtio_config_readw(void *opaque, uint32_t addr) | |
584 | +{ | |
585 | + VirtIODevice *vdev = opaque; | |
586 | + uint16_t val; | |
587 | + | |
588 | + vdev->get_config(vdev, vdev->config); | |
589 | + | |
590 | + addr -= vdev->addr + VIRTIO_PCI_CONFIG; | |
591 | + if (addr > (vdev->config_len - sizeof(val))) | |
592 | + return (uint32_t)-1; | |
593 | + | |
594 | + memcpy(&val, vdev->config + addr, sizeof(val)); | |
595 | + return val; | |
596 | +} | |
597 | + | |
598 | +static uint32_t virtio_config_readl(void *opaque, uint32_t addr) | |
599 | +{ | |
600 | + VirtIODevice *vdev = opaque; | |
601 | + uint32_t val; | |
602 | + | |
603 | + vdev->get_config(vdev, vdev->config); | |
604 | + | |
605 | + addr -= vdev->addr + VIRTIO_PCI_CONFIG; | |
606 | + if (addr > (vdev->config_len - sizeof(val))) | |
607 | + return (uint32_t)-1; | |
608 | + | |
609 | + memcpy(&val, vdev->config + addr, sizeof(val)); | |
610 | + return val; | |
611 | +} | |
612 | + | |
613 | +static void virtio_config_writeb(void *opaque, uint32_t addr, uint32_t data) | |
614 | +{ | |
615 | + VirtIODevice *vdev = opaque; | |
616 | + uint8_t val = data; | |
617 | + | |
618 | + addr -= vdev->addr + VIRTIO_PCI_CONFIG; | |
619 | + if (addr > (vdev->config_len - sizeof(val))) | |
620 | + return; | |
621 | + | |
622 | + memcpy(vdev->config + addr, &val, sizeof(val)); | |
623 | + | |
624 | + if (vdev->set_config) | |
625 | + vdev->set_config(vdev, vdev->config); | |
626 | +} | |
627 | + | |
628 | +static void virtio_config_writew(void *opaque, uint32_t addr, uint32_t data) | |
629 | +{ | |
630 | + VirtIODevice *vdev = opaque; | |
631 | + uint16_t val = data; | |
632 | + | |
633 | + addr -= vdev->addr + VIRTIO_PCI_CONFIG; | |
634 | + if (addr > (vdev->config_len - sizeof(val))) | |
635 | + return; | |
636 | + | |
637 | + memcpy(vdev->config + addr, &val, sizeof(val)); | |
638 | + | |
639 | + if (vdev->set_config) | |
640 | + vdev->set_config(vdev, vdev->config); | |
641 | +} | |
642 | + | |
643 | +static void virtio_config_writel(void *opaque, uint32_t addr, uint32_t data) | |
644 | +{ | |
645 | + VirtIODevice *vdev = opaque; | |
646 | + uint32_t val = data; | |
647 | + | |
648 | + addr -= vdev->addr + VIRTIO_PCI_CONFIG; | |
649 | + if (addr > (vdev->config_len - sizeof(val))) | |
650 | + return; | |
651 | + | |
652 | + memcpy(vdev->config + addr, &val, sizeof(val)); | |
653 | + | |
654 | + if (vdev->set_config) | |
655 | + vdev->set_config(vdev, vdev->config); | |
656 | +} | |
657 | + | |
658 | +static void virtio_map(PCIDevice *pci_dev, int region_num, | |
659 | + uint32_t addr, uint32_t size, int type) | |
660 | +{ | |
661 | + VirtIODevice *vdev = to_virtio_device(pci_dev); | |
662 | + int i; | |
663 | + | |
664 | + vdev->addr = addr; | |
665 | + for (i = 0; i < 3; i++) { | |
666 | + register_ioport_write(addr, 20, 1 << i, virtio_ioport_write, vdev); | |
667 | + register_ioport_read(addr, 20, 1 << i, virtio_ioport_read, vdev); | |
668 | + } | |
669 | + | |
670 | + if (vdev->config_len) { | |
671 | + register_ioport_write(addr + 20, vdev->config_len, 1, | |
672 | + virtio_config_writeb, vdev); | |
673 | + register_ioport_write(addr + 20, vdev->config_len, 2, | |
674 | + virtio_config_writew, vdev); | |
675 | + register_ioport_write(addr + 20, vdev->config_len, 4, | |
676 | + virtio_config_writel, vdev); | |
677 | + register_ioport_read(addr + 20, vdev->config_len, 1, | |
678 | + virtio_config_readb, vdev); | |
679 | + register_ioport_read(addr + 20, vdev->config_len, 2, | |
680 | + virtio_config_readw, vdev); | |
681 | + register_ioport_read(addr + 20, vdev->config_len, 4, | |
682 | + virtio_config_readl, vdev); | |
683 | + | |
684 | + vdev->get_config(vdev, vdev->config); | |
685 | + } | |
686 | +} | |
687 | + | |
688 | +VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, | |
689 | + void (*handle_output)(VirtIODevice *, VirtQueue *)) | |
690 | +{ | |
691 | + int i; | |
692 | + | |
693 | + for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) { | |
694 | + if (vdev->vq[i].vring.num == 0) | |
695 | + break; | |
696 | + } | |
697 | + | |
698 | + if (i == VIRTIO_PCI_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE) | |
699 | + abort(); | |
700 | + | |
701 | + vdev->vq[i].vring.num = queue_size; | |
702 | + vdev->vq[i].handle_output = handle_output; | |
703 | + | |
704 | + return &vdev->vq[i]; | |
705 | +} | |
706 | + | |
707 | +void virtio_notify(VirtIODevice *vdev, VirtQueue *vq) | |
708 | +{ | |
709 | + /* Always notify when queue is empty */ | |
710 | + if ((vq->inuse || vring_avail_idx(vq) != vq->last_avail_idx) && | |
711 | + (vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT)) | |
712 | + return; | |
713 | + | |
714 | + vdev->isr |= 0x01; | |
715 | + virtio_update_irq(vdev); | |
716 | +} | |
717 | + | |
718 | +void virtio_notify_config(VirtIODevice *vdev) | |
719 | +{ | |
720 | + vdev->isr |= 0x03; | |
721 | + virtio_update_irq(vdev); | |
722 | +} | |
723 | + | |
724 | +void virtio_save(VirtIODevice *vdev, QEMUFile *f) | |
725 | +{ | |
726 | + int i; | |
727 | + | |
728 | + pci_device_save(&vdev->pci_dev, f); | |
729 | + | |
730 | + qemu_put_be32s(f, &vdev->addr); | |
731 | + qemu_put_8s(f, &vdev->status); | |
732 | + qemu_put_8s(f, &vdev->isr); | |
733 | + qemu_put_be16s(f, &vdev->queue_sel); | |
734 | + qemu_put_be32s(f, &vdev->features); | |
735 | + qemu_put_be32(f, vdev->config_len); | |
736 | + qemu_put_buffer(f, vdev->config, vdev->config_len); | |
737 | + | |
738 | + for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) { | |
739 | + if (vdev->vq[i].vring.num == 0) | |
740 | + break; | |
741 | + } | |
742 | + | |
743 | + qemu_put_be32(f, i); | |
744 | + | |
745 | + for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) { | |
746 | + if (vdev->vq[i].vring.num == 0) | |
747 | + break; | |
748 | + | |
749 | + qemu_put_be32(f, vdev->vq[i].vring.num); | |
750 | + qemu_put_be32s(f, &vdev->vq[i].pfn); | |
751 | + qemu_put_be16s(f, &vdev->vq[i].last_avail_idx); | |
752 | + } | |
753 | +} | |
754 | + | |
755 | +void virtio_load(VirtIODevice *vdev, QEMUFile *f) | |
756 | +{ | |
757 | + int num, i; | |
758 | + | |
759 | + pci_device_load(&vdev->pci_dev, f); | |
760 | + | |
761 | + qemu_get_be32s(f, &vdev->addr); | |
762 | + qemu_get_8s(f, &vdev->status); | |
763 | + qemu_get_8s(f, &vdev->isr); | |
764 | + qemu_get_be16s(f, &vdev->queue_sel); | |
765 | + qemu_get_be32s(f, &vdev->features); | |
766 | + vdev->config_len = qemu_get_be32(f); | |
767 | + qemu_get_buffer(f, vdev->config, vdev->config_len); | |
768 | + | |
769 | + num = qemu_get_be32(f); | |
770 | + | |
771 | + for (i = 0; i < num; i++) { | |
772 | + vdev->vq[i].vring.num = qemu_get_be32(f); | |
773 | + qemu_get_be32s(f, &vdev->vq[i].pfn); | |
774 | + qemu_get_be16s(f, &vdev->vq[i].last_avail_idx); | |
775 | + | |
776 | + if (vdev->vq[i].pfn) { | |
777 | + target_phys_addr_t pa; | |
778 | + | |
779 | + pa = (ram_addr_t)vdev->vq[i].pfn << TARGET_PAGE_BITS; | |
780 | + virtqueue_init(&vdev->vq[i], pa); | |
781 | + } | |
782 | + } | |
783 | + | |
784 | + virtio_update_irq(vdev); | |
785 | +} | |
786 | + | |
787 | +VirtIODevice *virtio_init_pci(PCIBus *bus, const char *name, | |
788 | + uint16_t vendor, uint16_t device, | |
789 | + uint16_t subvendor, uint16_t subdevice, | |
790 | + uint8_t class_code, uint8_t subclass_code, | |
791 | + uint8_t pif, size_t config_size, | |
792 | + size_t struct_size) | |
793 | +{ | |
794 | + VirtIODevice *vdev; | |
795 | + PCIDevice *pci_dev; | |
796 | + uint8_t *config; | |
797 | + uint32_t size; | |
798 | + | |
799 | + pci_dev = pci_register_device(bus, name, struct_size, | |
800 | + -1, NULL, NULL); | |
801 | + if (!pci_dev) | |
802 | + return NULL; | |
803 | + | |
804 | + vdev = to_virtio_device(pci_dev); | |
805 | + | |
806 | + vdev->status = 0; | |
807 | + vdev->isr = 0; | |
808 | + vdev->queue_sel = 0; | |
809 | + vdev->vq = qemu_mallocz(sizeof(VirtQueue) * VIRTIO_PCI_QUEUE_MAX); | |
810 | + | |
811 | + config = pci_dev->config; | |
812 | + config[0x00] = vendor & 0xFF; | |
813 | + config[0x01] = (vendor >> 8) & 0xFF; | |
814 | + config[0x02] = device & 0xFF; | |
815 | + config[0x03] = (device >> 8) & 0xFF; | |
816 | + | |
817 | + config[0x08] = VIRTIO_PCI_ABI_VERSION; | |
818 | + | |
819 | + config[0x09] = pif; | |
820 | + config[0x0a] = subclass_code; | |
821 | + config[0x0b] = class_code; | |
822 | + config[0x0e] = 0x00; | |
823 | + | |
824 | + config[0x2c] = subvendor & 0xFF; | |
825 | + config[0x2d] = (subvendor >> 8) & 0xFF; | |
826 | + config[0x2e] = subdevice & 0xFF; | |
827 | + config[0x2f] = (subdevice >> 8) & 0xFF; | |
828 | + | |
829 | + config[0x3d] = 1; | |
830 | + | |
831 | + vdev->name = name; | |
832 | + vdev->config_len = config_size; | |
833 | + if (vdev->config_len) | |
834 | + vdev->config = qemu_mallocz(config_size); | |
835 | + else | |
836 | + vdev->config = NULL; | |
837 | + | |
838 | + size = 20 + config_size; | |
839 | + if (size & (size-1)) | |
840 | + size = 1 << fls(size); | |
841 | + | |
842 | + pci_register_io_region(pci_dev, 0, size, PCI_ADDRESS_SPACE_IO, | |
843 | + virtio_map); | |
844 | + qemu_register_reset(virtio_reset, vdev); | |
845 | + | |
846 | + return vdev; | |
847 | +} | ... | ... |
hw/virtio.h
0 → 100644
1 | +/* | |
2 | + * Virtio Support | |
3 | + * | |
4 | + * Copyright IBM, Corp. 2007 | |
5 | + * | |
6 | + * Authors: | |
7 | + * Anthony Liguori <aliguori@us.ibm.com> | |
8 | + * | |
9 | + * This work is licensed under the terms of the GNU GPL, version 2. See | |
10 | + * the COPYING file in the top-level directory. | |
11 | + * | |
12 | + */ | |
13 | + | |
14 | +#ifndef _QEMU_VIRTIO_H | |
15 | +#define _QEMU_VIRTIO_H | |
16 | + | |
17 | +#include <sys/uio.h> | |
18 | +#include "hw.h" | |
19 | +#include "pci.h" | |
20 | + | |
21 | +/* from Linux's linux/virtio_config.h */ | |
22 | + | |
23 | +/* Status byte for guest to report progress, and synchronize features. */ | |
24 | +/* We have seen device and processed generic fields (VIRTIO_CONFIG_F_VIRTIO) */ | |
25 | +#define VIRTIO_CONFIG_S_ACKNOWLEDGE 1 | |
26 | +/* We have found a driver for the device. */ | |
27 | +#define VIRTIO_CONFIG_S_DRIVER 2 | |
28 | +/* Driver has used its parts of the config, and is happy */ | |
29 | +#define VIRTIO_CONFIG_S_DRIVER_OK 4 | |
30 | +/* We've given up on this device. */ | |
31 | +#define VIRTIO_CONFIG_S_FAILED 0x80 | |
32 | + | |
33 | +/* We notify when the ring is completely used, even if the guest is supressing | |
34 | + * callbacks */ | |
35 | +#define VIRTIO_F_NOTIFY_ON_EMPTY 24 | |
36 | + | |
37 | +/* from Linux's linux/virtio_ring.h */ | |
38 | + | |
39 | +/* This marks a buffer as continuing via the next field. */ | |
40 | +#define VRING_DESC_F_NEXT 1 | |
41 | +/* This marks a buffer as write-only (otherwise read-only). */ | |
42 | +#define VRING_DESC_F_WRITE 2 | |
43 | + | |
44 | +/* This means don't notify other side when buffer added. */ | |
45 | +#define VRING_USED_F_NO_NOTIFY 1 | |
46 | +/* This means don't interrupt guest when buffer consumed. */ | |
47 | +#define VRING_AVAIL_F_NO_INTERRUPT 1 | |
48 | + | |
49 | +struct VirtQueue; | |
50 | + | |
51 | +typedef struct VirtQueue VirtQueue; | |
52 | +typedef struct VirtIODevice VirtIODevice; | |
53 | + | |
54 | +#define VIRTQUEUE_MAX_SIZE 1024 | |
55 | + | |
56 | +typedef struct VirtQueueElement | |
57 | +{ | |
58 | + unsigned int index; | |
59 | + unsigned int out_num; | |
60 | + unsigned int in_num; | |
61 | + target_phys_addr_t in_addr[VIRTQUEUE_MAX_SIZE]; | |
62 | + struct iovec in_sg[VIRTQUEUE_MAX_SIZE]; | |
63 | + struct iovec out_sg[VIRTQUEUE_MAX_SIZE]; | |
64 | +} VirtQueueElement; | |
65 | + | |
66 | +#define VIRTIO_PCI_QUEUE_MAX 16 | |
67 | + | |
68 | +struct VirtIODevice | |
69 | +{ | |
70 | + PCIDevice pci_dev; | |
71 | + const char *name; | |
72 | + uint32_t addr; | |
73 | + uint8_t status; | |
74 | + uint8_t isr; | |
75 | + uint16_t queue_sel; | |
76 | + uint32_t features; | |
77 | + size_t config_len; | |
78 | + void *config; | |
79 | + uint32_t (*get_features)(VirtIODevice *vdev); | |
80 | + void (*set_features)(VirtIODevice *vdev, uint32_t val); | |
81 | + void (*get_config)(VirtIODevice *vdev, uint8_t *config); | |
82 | + void (*set_config)(VirtIODevice *vdev, const uint8_t *config); | |
83 | + void (*reset)(VirtIODevice *vdev); | |
84 | + VirtQueue *vq; | |
85 | +}; | |
86 | + | |
87 | +VirtIODevice *virtio_init_pci(PCIBus *bus, const char *name, | |
88 | + uint16_t vendor, uint16_t device, | |
89 | + uint16_t subvendor, uint16_t subdevice, | |
90 | + uint8_t class_code, uint8_t subclass_code, | |
91 | + uint8_t pif, size_t config_size, | |
92 | + size_t struct_size); | |
93 | + | |
94 | +VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, | |
95 | + void (*handle_output)(VirtIODevice *, | |
96 | + VirtQueue *)); | |
97 | + | |
98 | +void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem, | |
99 | + unsigned int len); | |
100 | +void virtqueue_flush(VirtQueue *vq, unsigned int count); | |
101 | +void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, | |
102 | + unsigned int len, unsigned int idx); | |
103 | + | |
104 | +int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem); | |
105 | +int virtqueue_avail_bytes(VirtQueue *vq, int in_bytes, int out_bytes); | |
106 | + | |
107 | +void virtio_notify(VirtIODevice *vdev, VirtQueue *vq); | |
108 | + | |
109 | +void virtio_save(VirtIODevice *vdev, QEMUFile *f); | |
110 | + | |
111 | +void virtio_load(VirtIODevice *vdev, QEMUFile *f); | |
112 | + | |
113 | +void virtio_notify_config(VirtIODevice *vdev); | |
114 | + | |
115 | +void virtio_queue_set_notification(VirtQueue *vq, int enable); | |
116 | + | |
117 | +int virtio_queue_ready(VirtQueue *vq); | |
118 | + | |
119 | +int virtio_queue_empty(VirtQueue *vq); | |
120 | + | |
121 | +#endif | ... | ... |