Commit f65ed4c1529f29a7d62d6733eaa50bed24a4b2ed

Authored by aliguori
1 parent d85dc283

KVM: Coalesced MMIO support

MMIO exits are more expensive in KVM or Xen than in QEMU because they 
involve, at least, privilege transitions.  However, MMIO write 
operations can be effectively batched if those writes do not have side 
effects.

Good examples of this include VGA pixel operations when in a planar 
mode.  As it turns out, we can get a nice boost in other areas too.  
Laurent mentioned a 9.7% performance boost in iperf with the coalesced 
MMIO changes for the e1000 when he originally posted this work for KVM.

Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>



git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@5961 c046a42c-6fe2-441c-8c8c-71466251a162
cpu-all.h
@@ -973,6 +973,15 @@ void cpu_physical_sync_dirty_bitmap(target_phys_addr_t start_addr, target_phys_a @@ -973,6 +973,15 @@ void cpu_physical_sync_dirty_bitmap(target_phys_addr_t start_addr, target_phys_a
973 void dump_exec_info(FILE *f, 973 void dump_exec_info(FILE *f,
974 int (*cpu_fprintf)(FILE *f, const char *fmt, ...)); 974 int (*cpu_fprintf)(FILE *f, const char *fmt, ...));
975 975
  976 +/* Coalesced MMIO regions are areas where write operations can be reordered.
  977 + * This usually implies that write operations are side-effect free. This allows
  978 + * batching which can make a major impact on performance when using
  979 + * virtualization.
  980 + */
  981 +void qemu_register_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size);
  982 +
  983 +void qemu_unregister_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size);
  984 +
976 /*******************************************/ 985 /*******************************************/
977 /* host CPU ticks (if available) */ 986 /* host CPU ticks (if available) */
978 987
@@ -2344,6 +2344,18 @@ ram_addr_t cpu_get_physical_page_desc(target_phys_addr_t addr) @@ -2344,6 +2344,18 @@ ram_addr_t cpu_get_physical_page_desc(target_phys_addr_t addr)
2344 return p->phys_offset; 2344 return p->phys_offset;
2345 } 2345 }
2346 2346
  2347 +void qemu_register_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
  2348 +{
  2349 + if (kvm_enabled())
  2350 + kvm_coalesce_mmio_region(addr, size);
  2351 +}
  2352 +
  2353 +void qemu_unregister_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
  2354 +{
  2355 + if (kvm_enabled())
  2356 + kvm_uncoalesce_mmio_region(addr, size);
  2357 +}
  2358 +
2347 /* XXX: better than nothing */ 2359 /* XXX: better than nothing */
2348 ram_addr_t qemu_ram_alloc(ram_addr_t size) 2360 ram_addr_t qemu_ram_alloc(ram_addr_t size)
2349 { 2361 {
hw/cirrus_vga.c
@@ -3220,6 +3220,7 @@ static void cirrus_init_common(CirrusVGAState * s, int device_id, int is_pci) @@ -3220,6 +3220,7 @@ static void cirrus_init_common(CirrusVGAState * s, int device_id, int is_pci)
3220 cirrus_vga_mem_write, s); 3220 cirrus_vga_mem_write, s);
3221 cpu_register_physical_memory(isa_mem_base + 0x000a0000, 0x20000, 3221 cpu_register_physical_memory(isa_mem_base + 0x000a0000, 0x20000,
3222 s->vga_io_memory); 3222 s->vga_io_memory);
  3223 + qemu_register_coalesced_mmio(isa_mem_base + 0x000a0000, 0x20000);
3223 3224
3224 s->sr[0x06] = 0x0f; 3225 s->sr[0x06] = 0x0f;
3225 if (device_id == CIRRUS_ID_CLGD5446) { 3226 if (device_id == CIRRUS_ID_CLGD5446) {
hw/e1000.c
@@ -1001,10 +1001,22 @@ e1000_mmio_map(PCIDevice *pci_dev, int region_num, @@ -1001,10 +1001,22 @@ e1000_mmio_map(PCIDevice *pci_dev, int region_num,
1001 uint32_t addr, uint32_t size, int type) 1001 uint32_t addr, uint32_t size, int type)
1002 { 1002 {
1003 E1000State *d = (E1000State *)pci_dev; 1003 E1000State *d = (E1000State *)pci_dev;
  1004 + int i;
  1005 + const uint32_t excluded_regs[] = {
  1006 + E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS,
  1007 + E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE
  1008 + };
  1009 +
1004 1010
1005 DBGOUT(MMIO, "e1000_mmio_map addr=0x%08x 0x%08x\n", addr, size); 1011 DBGOUT(MMIO, "e1000_mmio_map addr=0x%08x 0x%08x\n", addr, size);
1006 1012
1007 cpu_register_physical_memory(addr, PNPMMIO_SIZE, d->mmio_index); 1013 cpu_register_physical_memory(addr, PNPMMIO_SIZE, d->mmio_index);
  1014 + qemu_register_coalesced_mmio(addr, excluded_regs[0]);
  1015 +
  1016 + for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++)
  1017 + qemu_register_coalesced_mmio(addr + excluded_regs[i] + 4,
  1018 + excluded_regs[i + 1] -
  1019 + excluded_regs[i] - 4);
1008 } 1020 }
1009 1021
1010 void 1022 void
hw/pci.c
@@ -279,6 +279,7 @@ static void pci_update_mappings(PCIDevice *d) @@ -279,6 +279,7 @@ static void pci_update_mappings(PCIDevice *d)
279 cpu_register_physical_memory(pci_to_cpu_addr(r->addr), 279 cpu_register_physical_memory(pci_to_cpu_addr(r->addr),
280 r->size, 280 r->size,
281 IO_MEM_UNASSIGNED); 281 IO_MEM_UNASSIGNED);
  282 + qemu_unregister_coalesced_mmio(r->addr, r->size);
282 } 283 }
283 } 284 }
284 r->addr = new_addr; 285 r->addr = new_addr;
hw/vga.c
@@ -2256,6 +2256,7 @@ void vga_init(VGAState *s) @@ -2256,6 +2256,7 @@ void vga_init(VGAState *s)
2256 vga_io_memory = cpu_register_io_memory(0, vga_mem_read, vga_mem_write, s); 2256 vga_io_memory = cpu_register_io_memory(0, vga_mem_read, vga_mem_write, s);
2257 cpu_register_physical_memory(isa_mem_base + 0x000a0000, 0x20000, 2257 cpu_register_physical_memory(isa_mem_base + 0x000a0000, 0x20000,
2258 vga_io_memory); 2258 vga_io_memory);
  2259 + qemu_register_coalesced_mmio(isa_mem_base + 0x000a0000, 0x20000);
2259 } 2260 }
2260 2261
2261 /* Memory mapped interface */ 2262 /* Memory mapped interface */
@@ -2330,6 +2331,7 @@ static void vga_mm_init(VGAState *s, target_phys_addr_t vram_base, @@ -2330,6 +2331,7 @@ static void vga_mm_init(VGAState *s, target_phys_addr_t vram_base,
2330 cpu_register_physical_memory(ctrl_base, 0x100000, s_ioport_ctrl); 2331 cpu_register_physical_memory(ctrl_base, 0x100000, s_ioport_ctrl);
2331 s->bank_offset = 0; 2332 s->bank_offset = 0;
2332 cpu_register_physical_memory(vram_base + 0x000a0000, 0x20000, vga_io_memory); 2333 cpu_register_physical_memory(vram_base + 0x000a0000, 0x20000, vga_io_memory);
  2334 + qemu_register_coalesced_mmio(vram_base + 0x000a0000, 0x20000);
2333 } 2335 }
2334 2336
2335 int isa_vga_init(DisplayState *ds, uint8_t *vga_ram_base, 2337 int isa_vga_init(DisplayState *ds, uint8_t *vga_ram_base,
kvm-all.c
@@ -24,6 +24,9 @@ @@ -24,6 +24,9 @@
24 #include "sysemu.h" 24 #include "sysemu.h"
25 #include "kvm.h" 25 #include "kvm.h"
26 26
  27 +/* KVM uses PAGE_SIZE in it's definition of COALESCED_MMIO_MAX */
  28 +#define PAGE_SIZE TARGET_PAGE_SIZE
  29 +
27 //#define DEBUG_KVM 30 //#define DEBUG_KVM
28 31
29 #ifdef DEBUG_KVM 32 #ifdef DEBUG_KVM
@@ -52,6 +55,7 @@ struct KVMState @@ -52,6 +55,7 @@ struct KVMState
52 KVMSlot slots[32]; 55 KVMSlot slots[32];
53 int fd; 56 int fd;
54 int vmfd; 57 int vmfd;
  58 + int coalesced_mmio;
55 }; 59 };
56 60
57 static KVMState *kvm_state; 61 static KVMState *kvm_state;
@@ -228,6 +232,44 @@ out: @@ -228,6 +232,44 @@ out:
228 qemu_free(d.dirty_bitmap); 232 qemu_free(d.dirty_bitmap);
229 } 233 }
230 234
  235 +int kvm_coalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
  236 +{
  237 + int ret = -ENOSYS;
  238 +#ifdef KVM_CAP_COALESCED_MMIO
  239 + KVMState *s = kvm_state;
  240 +
  241 + if (s->coalesced_mmio) {
  242 + struct kvm_coalesced_mmio_zone zone;
  243 +
  244 + zone.addr = start;
  245 + zone.size = size;
  246 +
  247 + ret = kvm_vm_ioctl(s, KVM_REGISTER_COALESCED_MMIO, &zone);
  248 + }
  249 +#endif
  250 +
  251 + return ret;
  252 +}
  253 +
  254 +int kvm_uncoalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
  255 +{
  256 + int ret = -ENOSYS;
  257 +#ifdef KVM_CAP_COALESCED_MMIO
  258 + KVMState *s = kvm_state;
  259 +
  260 + if (s->coalesced_mmio) {
  261 + struct kvm_coalesced_mmio_zone zone;
  262 +
  263 + zone.addr = start;
  264 + zone.size = size;
  265 +
  266 + ret = kvm_vm_ioctl(s, KVM_UNREGISTER_COALESCED_MMIO, &zone);
  267 + }
  268 +#endif
  269 +
  270 + return ret;
  271 +}
  272 +
231 int kvm_init(int smp_cpus) 273 int kvm_init(int smp_cpus)
232 { 274 {
233 KVMState *s; 275 KVMState *s;
@@ -298,6 +340,13 @@ int kvm_init(int smp_cpus) @@ -298,6 +340,13 @@ int kvm_init(int smp_cpus)
298 goto err; 340 goto err;
299 } 341 }
300 342
  343 + s->coalesced_mmio = 0;
  344 +#ifdef KVM_CAP_COALESCED_MMIO
  345 + ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, KVM_CAP_COALESCED_MMIO);
  346 + if (ret > 0)
  347 + s->coalesced_mmio = ret;
  348 +#endif
  349 +
301 ret = kvm_arch_init(s, smp_cpus); 350 ret = kvm_arch_init(s, smp_cpus);
302 if (ret < 0) 351 if (ret < 0)
303 goto err; 352 goto err;
@@ -357,6 +406,27 @@ static int kvm_handle_io(CPUState *env, uint16_t port, void *data, @@ -357,6 +406,27 @@ static int kvm_handle_io(CPUState *env, uint16_t port, void *data,
357 return 1; 406 return 1;
358 } 407 }
359 408
  409 +static void kvm_run_coalesced_mmio(CPUState *env, struct kvm_run *run)
  410 +{
  411 +#ifdef KVM_CAP_COALESCED_MMIO
  412 + KVMState *s = kvm_state;
  413 + if (s->coalesced_mmio) {
  414 + struct kvm_coalesced_mmio_ring *ring;
  415 +
  416 + ring = (void *)run + (s->coalesced_mmio * TARGET_PAGE_SIZE);
  417 + while (ring->first != ring->last) {
  418 + struct kvm_coalesced_mmio *ent;
  419 +
  420 + ent = &ring->coalesced_mmio[ring->first];
  421 +
  422 + cpu_physical_memory_write(ent->phys_addr, ent->data, ent->len);
  423 + /* FIXME smp_wmb() */
  424 + ring->first = (ring->first + 1) % KVM_COALESCED_MMIO_MAX;
  425 + }
  426 + }
  427 +#endif
  428 +}
  429 +
360 int kvm_cpu_exec(CPUState *env) 430 int kvm_cpu_exec(CPUState *env)
361 { 431 {
362 struct kvm_run *run = env->kvm_run; 432 struct kvm_run *run = env->kvm_run;
@@ -387,6 +457,8 @@ int kvm_cpu_exec(CPUState *env) @@ -387,6 +457,8 @@ int kvm_cpu_exec(CPUState *env)
387 abort(); 457 abort();
388 } 458 }
389 459
  460 + kvm_run_coalesced_mmio(env, run);
  461 +
390 ret = 0; /* exit loop */ 462 ret = 0; /* exit loop */
391 switch (run->exit_reason) { 463 switch (run->exit_reason) {
392 case KVM_EXIT_IO: 464 case KVM_EXIT_IO:
@@ -45,6 +45,9 @@ int kvm_log_stop(target_phys_addr_t phys_addr, target_phys_addr_t len); @@ -45,6 +45,9 @@ int kvm_log_stop(target_phys_addr_t phys_addr, target_phys_addr_t len);
45 45
46 int kvm_has_sync_mmu(void); 46 int kvm_has_sync_mmu(void);
47 47
  48 +int kvm_coalesce_mmio_region(target_phys_addr_t start, ram_addr_t size);
  49 +int kvm_uncoalesce_mmio_region(target_phys_addr_t start, ram_addr_t size);
  50 +
48 /* internal API */ 51 /* internal API */
49 52
50 struct KVMState; 53 struct KVMState;