Commit efeea6d048756bc42ad39f0acce6bede4b74177a

Authored by Mark McLoughlin
Committed by Anthony Liguori
1 parent 5774cf98

virtio: add support for indirect ring entries

Support a new feature flag for indirect ring entries. These are ring
entries which point to a table of buffer descriptors.

The idea here is to increase the ring capacity by allowing a larger
effective ring size whereby the ring size dictates the number of
requests that may be outstanding, rather than the size of those
requests.

This should be most effective in the case of block I/O where we can
potentially benefit by concurrently dispatching a large number of
large requests. Even in the simple case of single segment block
requests, this results in a threefold increase in ring capacity.

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
hw/virtio-pci.c
@@ -136,7 +136,9 @@ static uint32_t virtio_ioport_read(void *opaque, uint32_t addr) @@ -136,7 +136,9 @@ static uint32_t virtio_ioport_read(void *opaque, uint32_t addr)
136 switch (addr) { 136 switch (addr) {
137 case VIRTIO_PCI_HOST_FEATURES: 137 case VIRTIO_PCI_HOST_FEATURES:
138 ret = vdev->get_features(vdev); 138 ret = vdev->get_features(vdev);
139 - ret |= (1 << VIRTIO_F_NOTIFY_ON_EMPTY) | (1 << VIRTIO_F_BAD_FEATURE); 139 + ret |= (1 << VIRTIO_F_NOTIFY_ON_EMPTY);
  140 + ret |= (1 << VIRTIO_RING_F_INDIRECT_DESC);
  141 + ret |= (1 << VIRTIO_F_BAD_FEATURE);
140 break; 142 break;
141 case VIRTIO_PCI_GUEST_FEATURES: 143 case VIRTIO_PCI_GUEST_FEATURES:
142 ret = vdev->features; 144 ret = vdev->features;
hw/virtio.c
@@ -293,18 +293,41 @@ static unsigned virtqueue_next_desc(target_phys_addr_t desc_pa, @@ -293,18 +293,41 @@ static unsigned virtqueue_next_desc(target_phys_addr_t desc_pa,
293 293
294 int virtqueue_avail_bytes(VirtQueue *vq, int in_bytes, int out_bytes) 294 int virtqueue_avail_bytes(VirtQueue *vq, int in_bytes, int out_bytes)
295 { 295 {
296 - target_phys_addr_t desc_pa = vq->vring.desc;  
297 - unsigned int idx, max;  
298 - int num_bufs, in_total, out_total; 296 + unsigned int idx;
  297 + int total_bufs, in_total, out_total;
299 298
300 idx = vq->last_avail_idx; 299 idx = vq->last_avail_idx;
301 - max = vq->vring.num;  
302 300
303 - num_bufs = in_total = out_total = 0; 301 + total_bufs = in_total = out_total = 0;
304 while (virtqueue_num_heads(vq, idx)) { 302 while (virtqueue_num_heads(vq, idx)) {
  303 + unsigned int max, num_bufs, indirect = 0;
  304 + target_phys_addr_t desc_pa;
305 int i; 305 int i;
306 306
  307 + max = vq->vring.num;
  308 + num_bufs = total_bufs;
307 i = virtqueue_get_head(vq, idx++); 309 i = virtqueue_get_head(vq, idx++);
  310 + desc_pa = vq->vring.desc;
  311 +
  312 + if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) {
  313 + if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) {
  314 + fprintf(stderr, "Invalid size for indirect buffer table\n");
  315 + exit(1);
  316 + }
  317 +
  318 + /* If we've got too many, that implies a descriptor loop. */
  319 + if (num_bufs >= max) {
  320 + fprintf(stderr, "Looped descriptor");
  321 + exit(1);
  322 + }
  323 +
  324 + /* loop over the indirect descriptor table */
  325 + indirect = 1;
  326 + max = vring_desc_len(desc_pa, i) / sizeof(VRingDesc);
  327 + num_bufs = i = 0;
  328 + desc_pa = vring_desc_addr(desc_pa, i);
  329 + }
  330 +
308 do { 331 do {
309 /* If we've got too many, that implies a descriptor loop. */ 332 /* If we've got too many, that implies a descriptor loop. */
310 if (++num_bufs > max) { 333 if (++num_bufs > max) {
@@ -322,6 +345,11 @@ int virtqueue_avail_bytes(VirtQueue *vq, int in_bytes, int out_bytes) @@ -322,6 +345,11 @@ int virtqueue_avail_bytes(VirtQueue *vq, int in_bytes, int out_bytes)
322 return 1; 345 return 1;
323 } 346 }
324 } while ((i = virtqueue_next_desc(desc_pa, i, max)) != max); 347 } while ((i = virtqueue_next_desc(desc_pa, i, max)) != max);
  348 +
  349 + if (!indirect)
  350 + total_bufs = num_bufs;
  351 + else
  352 + total_bufs++;
325 } 353 }
326 354
327 return 0; 355 return 0;
@@ -342,6 +370,19 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem) @@ -342,6 +370,19 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
342 max = vq->vring.num; 370 max = vq->vring.num;
343 371
344 i = head = virtqueue_get_head(vq, vq->last_avail_idx++); 372 i = head = virtqueue_get_head(vq, vq->last_avail_idx++);
  373 +
  374 + if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) {
  375 + if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) {
  376 + fprintf(stderr, "Invalid size for indirect buffer table\n");
  377 + exit(1);
  378 + }
  379 +
  380 + /* loop over the indirect descriptor table */
  381 + max = vring_desc_len(desc_pa, i) / sizeof(VRingDesc);
  382 + desc_pa = vring_desc_addr(desc_pa, i);
  383 + i = 0;
  384 + }
  385 +
345 do { 386 do {
346 struct iovec *sg; 387 struct iovec *sg;
347 int is_write = 0; 388 int is_write = 0;
hw/virtio.h
@@ -32,6 +32,8 @@ @@ -32,6 +32,8 @@
32 /* We notify when the ring is completely used, even if the guest is supressing 32 /* We notify when the ring is completely used, even if the guest is supressing
33 * callbacks */ 33 * callbacks */
34 #define VIRTIO_F_NOTIFY_ON_EMPTY 24 34 #define VIRTIO_F_NOTIFY_ON_EMPTY 24
  35 +/* We support indirect buffer descriptors */
  36 +#define VIRTIO_RING_F_INDIRECT_DESC 28
35 /* A guest should never accept this. It implies negotiation is broken. */ 37 /* A guest should never accept this. It implies negotiation is broken. */
36 #define VIRTIO_F_BAD_FEATURE 30 38 #define VIRTIO_F_BAD_FEATURE 30
37 39
@@ -41,6 +43,8 @@ @@ -41,6 +43,8 @@
41 #define VRING_DESC_F_NEXT 1 43 #define VRING_DESC_F_NEXT 1
42 /* This marks a buffer as write-only (otherwise read-only). */ 44 /* This marks a buffer as write-only (otherwise read-only). */
43 #define VRING_DESC_F_WRITE 2 45 #define VRING_DESC_F_WRITE 2
  46 +/* This means the buffer contains a list of buffer descriptors. */
  47 +#define VRING_DESC_F_INDIRECT 4
44 48
45 /* This means don't notify other side when buffer added. */ 49 /* This means don't notify other side when buffer added. */
46 #define VRING_USED_F_NO_NOTIFY 1 50 #define VRING_USED_F_NO_NOTIFY 1