Commit 7ba1e61953f4592606e60b2e7507ff6a6faf861a

Authored by aliguori
1 parent 6fd805e1

Add KVM support to QEMU

This patch adds very basic KVM support.  KVM is a kernel module for Linux that
allows userspace programs to make use of hardware virtualization support.  It
current supports x86 hardware virtualization using Intel VT-x or AMD-V.  It
also supports IA64 VT-i, PPC 440, and S390.

This patch only implements the bare minimum support to get a guest booting.  It
has very little impact the rest of QEMU and attempts to integrate nicely with
the rest of QEMU.

Even though this implementation is basic, it is significantly faster than TCG.
Booting and shutting down a Linux guest:

w/TCG:  1:32.36 elapsed  84% CPU

w/KVM:  0:31.14 elapsed  59% CPU

Right now, KVM is disabled by default and must be explicitly enabled with
 -enable-kvm.  We can enable it by default later when we have had better
testing.

Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>



git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@5627 c046a42c-6fe2-441c-8c8c-71466251a162
Makefile.target
... ... @@ -183,6 +183,9 @@ CFLAGS+=-I/opt/SUNWspro/prod/include/cc
183 183 endif
184 184 endif
185 185  
  186 +kvm.o: CFLAGS+=$(KVM_CFLAGS)
  187 +kvm-all.o: CFLAGS+=$(KVM_CFLAGS)
  188 +
186 189 all: $(PROGS)
187 190  
188 191 #########################################################
... ... @@ -581,6 +584,9 @@ ifndef CONFIG_USER_ONLY
581 584 OBJS=vl.o osdep.o monitor.o pci.o loader.o isa_mmio.o machine.o net-checksum.o
582 585 OBJS+=fw_cfg.o aio.o buffered_file.o migration.o migration-tcp.o qemu-char.o
583 586 OBJS+=net.o
  587 +ifdef CONFIG_KVM
  588 +OBJS+=kvm.o kvm-all.o
  589 +endif
584 590 ifdef CONFIG_WIN32
585 591 OBJS+=block-raw-win32.o
586 592 else
... ...
configure
... ... @@ -115,6 +115,7 @@ aio=&quot;yes&quot;
115 115 nptl="yes"
116 116 mixemu="no"
117 117 bluez="yes"
  118 +kvm="yes"
118 119  
119 120 # OS specific
120 121 targetos=`uname -s`
... ... @@ -303,6 +304,8 @@ for opt do
303 304 ;;
304 305 --disable-bluez) bluez="no"
305 306 ;;
  307 + --disable-kvm) kvm="no"
  308 + ;;
306 309 --enable-profiler) profiler="yes"
307 310 ;;
308 311 --enable-cocoa)
... ... @@ -448,6 +451,7 @@ echo &quot; --disable-brlapi disable BrlAPI&quot;
448 451 echo " --disable-vnc-tls disable TLS encryption for VNC server"
449 452 echo " --disable-curses disable curses output"
450 453 echo " --disable-bluez disable bluez stack connectivity"
  454 +echo " --disable-kvm disable KVM acceleration support"
451 455 echo " --disable-nptl disable usermode NPTL support"
452 456 echo " --enable-system enable all system emulation targets"
453 457 echo " --disable-system disable all system emulation targets"
... ... @@ -951,6 +955,30 @@ EOF
951 955 fi
952 956  
953 957 ##########################################
  958 +# kvm probe
  959 +if test "$kvm" = "yes" ; then
  960 + cat > $TMPC <<EOF
  961 +#include <linux/kvm.h>
  962 +#if !defined(KVM_API_VERSION) || \
  963 + KVM_API_VERSION < 12 || \
  964 + KVM_API_VERSION > 12 || \
  965 + !defined(KVM_CAP_USER_MEMORY) || \
  966 + !defined(KVM_CAP_SET_TSS_ADDR)
  967 +#error Invalid KVM version
  968 +#endif
  969 +int main(void) { return 0; }
  970 +EOF
  971 + # FIXME make this configurable
  972 + kvm_cflags=-I/lib/modules/`uname -r`/build/include
  973 + if $cc $ARCH_CFLAGS -o $TMPE ${OS_CFLAGS} $kvm_cflags $TMPC \
  974 + 2>/dev/null ; then
  975 + :
  976 + else
  977 + kvm="no"
  978 + fi
  979 +fi
  980 +
  981 +##########################################
954 982 # AIO probe
955 983 if test "$aio" = "yes" ; then
956 984 aio=no
... ... @@ -1036,6 +1064,7 @@ echo &quot;uname -r $uname_release&quot;
1036 1064 echo "NPTL support $nptl"
1037 1065 echo "vde support $vde"
1038 1066 echo "AIO support $aio"
  1067 +echo "KVM support $kvm"
1039 1068  
1040 1069 if test $sdl_too_old = "yes"; then
1041 1070 echo "-> Your SDL version is too old - please upgrade to have SDL support"
... ... @@ -1411,6 +1440,15 @@ interp_prefix1=`echo &quot;$interp_prefix&quot; | sed &quot;s/%M/$target_cpu/g&quot;`
1411 1440 echo "#define CONFIG_QEMU_PREFIX \"$interp_prefix1\"" >> $config_h
1412 1441 gdb_xml_files=""
1413 1442  
  1443 +# FIXME allow i386 to build on x86_64 and vice versa
  1444 +if test "$kvm" = "yes" -a "$target_cpu" != "$cpu" ; then
  1445 + kvm="no"
  1446 +fi
  1447 +# Disable KVM for linux-user
  1448 +if test "$kvm" = "yes" -a "$target_softmmu" = "no" ; then
  1449 + kvm="no"
  1450 +fi
  1451 +
1414 1452 case "$target_cpu" in
1415 1453 i386)
1416 1454 echo "TARGET_ARCH=i386" >> $config_mak
... ... @@ -1420,6 +1458,11 @@ case &quot;$target_cpu&quot; in
1420 1458 then
1421 1459 echo "#define USE_KQEMU 1" >> $config_h
1422 1460 fi
  1461 + if test "$kvm" = "yes" ; then
  1462 + echo "CONFIG_KVM=yes" >> $config_mak
  1463 + echo "KVM_CFLAGS=$kvm_cflags" >> $config_mak
  1464 + echo "#define CONFIG_KVM" >> $config_h
  1465 + fi
1423 1466 gcc3minver=`$cc --version 2> /dev/null| fgrep "(GCC) 3." | awk '{ print $3 }' | cut -f2 -d.`
1424 1467 if test -n "$gcc3minver" && test $gcc3minver -gt 3
1425 1468 then
... ... @@ -1437,6 +1480,11 @@ case &quot;$target_cpu&quot; in
1437 1480 then
1438 1481 echo "#define USE_KQEMU 1" >> $config_h
1439 1482 fi
  1483 + if test "$kvm" = "yes" ; then
  1484 + echo "CONFIG_KVM=yes" >> $config_mak
  1485 + echo "KVM_CFLAGS=$kvm_cflags" >> $config_mak
  1486 + echo "#define CONFIG_KVM 1" >> $config_h
  1487 + fi
1440 1488 ;;
1441 1489 alpha)
1442 1490 echo "TARGET_ARCH=alpha" >> $config_mak
... ...
cpu-defs.h
... ... @@ -142,6 +142,9 @@ typedef struct icount_decr_u16 {
142 142 } icount_decr_u16;
143 143 #endif
144 144  
  145 +struct kvm_run;
  146 +struct KVMState;
  147 +
145 148 #define CPU_TEMP_BUF_NLONGS 128
146 149 #define CPU_COMMON \
147 150 struct TranslationBlock *current_tb; /* currently executing TB */ \
... ... @@ -199,6 +202,9 @@ typedef struct icount_decr_u16 {
199 202 /* user data */ \
200 203 void *opaque; \
201 204 \
202   - const char *cpu_model_str;
  205 + const char *cpu_model_str; \
  206 + struct KVMState *kvm_state; \
  207 + struct kvm_run *kvm_run; \
  208 + int kvm_fd;
203 209  
204 210 #endif
... ...
cpu-exec.c
... ... @@ -22,6 +22,7 @@
22 22 #include "exec.h"
23 23 #include "disas.h"
24 24 #include "tcg.h"
  25 +#include "kvm.h"
25 26  
26 27 #if !defined(CONFIG_SOFTMMU)
27 28 #undef EAX
... ... @@ -371,6 +372,19 @@ int cpu_exec(CPUState *env1)
371 372 }
372 373 #endif
373 374  
  375 + if (kvm_enabled()) {
  376 + int ret;
  377 + ret = kvm_cpu_exec(env);
  378 + if ((env->interrupt_request & CPU_INTERRUPT_EXIT)) {
  379 + env->interrupt_request &= ~CPU_INTERRUPT_EXIT;
  380 + env->exception_index = EXCP_INTERRUPT;
  381 + cpu_loop_exit();
  382 + } else if (env->halted) {
  383 + cpu_loop_exit();
  384 + } else
  385 + longjmp(env->jmp_env, 1);
  386 + }
  387 +
374 388 next_tb = 0; /* force lookup of first TB */
375 389 for(;;) {
376 390 interrupt_request = env->interrupt_request;
... ...
... ... @@ -39,6 +39,7 @@
39 39 #include "tcg.h"
40 40 #include "hw/hw.h"
41 41 #include "osdep.h"
  42 +#include "kvm.h"
42 43 #if defined(CONFIG_USER_ONLY)
43 44 #include <qemu.h>
44 45 #endif
... ... @@ -2212,6 +2213,9 @@ void cpu_register_physical_memory(target_phys_addr_t start_addr,
2212 2213 kqemu_set_phys_mem(start_addr, size, phys_offset);
2213 2214 }
2214 2215 #endif
  2216 + if (kvm_enabled())
  2217 + kvm_set_phys_mem(start_addr, size, phys_offset);
  2218 +
2215 2219 size = (size + TARGET_PAGE_SIZE - 1) & TARGET_PAGE_MASK;
2216 2220 end_addr = start_addr + (target_phys_addr_t)size;
2217 2221 for(addr = start_addr; addr != end_addr; addr += TARGET_PAGE_SIZE) {
... ...
hw/acpi.c
... ... @@ -23,6 +23,7 @@
23 23 #include "sysemu.h"
24 24 #include "i2c.h"
25 25 #include "smbus.h"
  26 +#include "kvm.h"
26 27  
27 28 //#define DEBUG
28 29  
... ... @@ -501,6 +502,12 @@ i2c_bus *piix4_pm_init(PCIBus *bus, int devfn, uint32_t smb_io_base,
501 502  
502 503 register_ioport_write(ACPI_DBG_IO_ADDR, 4, 4, acpi_dbg_writel, s);
503 504  
  505 + if (kvm_enabled()) {
  506 + /* Mark SMM as already inited to prevent SMM from running. KVM does not
  507 + * support SMM mode. */
  508 + pci_conf[0x5B] = 0x02;
  509 + }
  510 +
504 511 /* XXX: which specification is used ? The i82731AB has different
505 512 mappings */
506 513 pci_conf[0x5f] = (parallel_hds[0] != NULL ? 0x80 : 0) | 0x10;
... ...
monitor.c
... ... @@ -37,6 +37,7 @@
37 37 #include <dirent.h>
38 38 #include "qemu-timer.h"
39 39 #include "migration.h"
  40 +#include "kvm.h"
40 41  
41 42 //#define DEBUG
42 43 //#define DEBUG_COMPLETION
... ... @@ -1263,6 +1264,19 @@ static void do_info_kqemu(void)
1263 1264 #endif
1264 1265 }
1265 1266  
  1267 +static void do_info_kvm(void)
  1268 +{
  1269 +#ifdef CONFIG_KVM
  1270 + term_printf("kvm support: ");
  1271 + if (kvm_enabled())
  1272 + term_printf("enabled\n");
  1273 + else
  1274 + term_printf("disabled\n");
  1275 +#else
  1276 + term_printf("kvm support: not compiled\n");
  1277 +#endif
  1278 +}
  1279 +
1266 1280 #ifdef CONFIG_PROFILER
1267 1281  
1268 1282 int64_t kqemu_time;
... ... @@ -1497,6 +1511,8 @@ static const term_cmd_t info_cmds[] = {
1497 1511 "", "show dynamic compiler info", },
1498 1512 { "kqemu", "", do_info_kqemu,
1499 1513 "", "show kqemu information", },
  1514 + { "kvm", "", do_info_kvm,
  1515 + "", "show kvm information", },
1500 1516 { "usb", "", usb_info,
1501 1517 "", "show guest USB devices", },
1502 1518 { "usbhost", "", usb_host_info,
... ...
target-i386/cpu.h
... ... @@ -587,6 +587,8 @@ typedef struct CPUX86State {
587 587 target_ulong kernelgsbase;
588 588 #endif
589 589  
  590 + uint64_t tsc;
  591 +
590 592 uint64_t pat;
591 593  
592 594 /* exception/interrupt handling */
... ... @@ -617,6 +619,10 @@ typedef struct CPUX86State {
617 619 int kqemu_enabled;
618 620 int last_io_time;
619 621 #endif
  622 +
  623 + /* For KVM */
  624 + uint64_t interrupt_bitmap[256 / 64];
  625 +
620 626 /* in order to simplify APIC support, we leave this pointer to the
621 627 user */
622 628 struct APICState *apic_state;
... ...
target-i386/helper.c
... ... @@ -29,6 +29,7 @@
29 29 #include "exec-all.h"
30 30 #include "svm.h"
31 31 #include "qemu-common.h"
  32 +#include "kvm.h"
32 33  
33 34 //#define DEBUG_MMU
34 35  
... ... @@ -115,6 +116,8 @@ CPUX86State *cpu_x86_init(const char *cpu_model)
115 116 #ifdef USE_KQEMU
116 117 kqemu_init(env);
117 118 #endif
  119 + if (kvm_enabled())
  120 + kvm_init_vcpu(env);
118 121 return env;
119 122 }
120 123  
... ... @@ -1288,6 +1291,40 @@ target_phys_addr_t cpu_get_phys_page_debug(CPUState *env, target_ulong addr)
1288 1291 }
1289 1292 #endif /* !CONFIG_USER_ONLY */
1290 1293  
  1294 +#if defined(CONFIG_KVM)
  1295 +static void host_cpuid(uint32_t function, uint32_t *eax, uint32_t *ebx,
  1296 + uint32_t *ecx, uint32_t *edx)
  1297 +{
  1298 + uint32_t vec[4];
  1299 +
  1300 +#ifdef __x86_64__
  1301 + asm volatile("cpuid"
  1302 + : "=a"(vec[0]), "=b"(vec[1]),
  1303 + "=c"(vec[2]), "=d"(vec[3])
  1304 + : "0"(function) : "cc");
  1305 +#else
  1306 + asm volatile("pusha \n\t"
  1307 + "cpuid \n\t"
  1308 + "mov %%eax, 0(%1) \n\t"
  1309 + "mov %%ebx, 4(%1) \n\t"
  1310 + "mov %%ecx, 8(%1) \n\t"
  1311 + "mov %%edx, 12(%1) \n\t"
  1312 + "popa"
  1313 + : : "a"(function), "S"(vec)
  1314 + : "memory", "cc");
  1315 +#endif
  1316 +
  1317 + if (eax)
  1318 + *eax = vec[0];
  1319 + if (ebx)
  1320 + *ebx = vec[1];
  1321 + if (ecx)
  1322 + *ecx = vec[2];
  1323 + if (edx)
  1324 + *edx = vec[3];
  1325 +}
  1326 +#endif
  1327 +
1291 1328 void cpu_x86_cpuid(CPUX86State *env, uint32_t index,
1292 1329 uint32_t *eax, uint32_t *ebx,
1293 1330 uint32_t *ecx, uint32_t *edx)
... ... @@ -1307,12 +1344,23 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index,
1307 1344 *ebx = env->cpuid_vendor1;
1308 1345 *edx = env->cpuid_vendor2;
1309 1346 *ecx = env->cpuid_vendor3;
  1347 +
  1348 + /* sysenter isn't supported on compatibility mode on AMD. and syscall
  1349 + * isn't supported in compatibility mode on Intel. so advertise the
  1350 + * actuall cpu, and say goodbye to migration between different vendors
  1351 + * is you use compatibility mode. */
  1352 + if (kvm_enabled())
  1353 + host_cpuid(0, NULL, ebx, ecx, edx);
1310 1354 break;
1311 1355 case 1:
1312 1356 *eax = env->cpuid_version;
1313 1357 *ebx = (env->cpuid_apic_id << 24) | 8 << 8; /* CLFLUSH size in quad words, Linux wants it. */
1314 1358 *ecx = env->cpuid_ext_features;
1315 1359 *edx = env->cpuid_features;
  1360 +
  1361 + /* "Hypervisor present" bit required for Microsoft SVVP */
  1362 + if (kvm_enabled())
  1363 + *ecx |= (1 << 31);
1316 1364 break;
1317 1365 case 2:
1318 1366 /* cache info: needed for Pentium Pro compatibility */
... ... @@ -1390,6 +1438,31 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index,
1390 1438 *ebx = 0;
1391 1439 *ecx = env->cpuid_ext3_features;
1392 1440 *edx = env->cpuid_ext2_features;
  1441 +
  1442 + if (kvm_enabled()) {
  1443 + uint32_t h_eax, h_edx;
  1444 +
  1445 + host_cpuid(0x80000001, &h_eax, NULL, NULL, &h_edx);
  1446 +
  1447 + /* disable CPU features that the host does not support */
  1448 +
  1449 + /* long mode */
  1450 + if ((h_edx & 0x20000000) == 0 /* || !lm_capable_kernel */)
  1451 + *edx &= ~0x20000000;
  1452 + /* syscall */
  1453 + if ((h_edx & 0x00000800) == 0)
  1454 + *edx &= ~0x00000800;
  1455 + /* nx */
  1456 + if ((h_edx & 0x00100000) == 0)
  1457 + *edx &= ~0x00100000;
  1458 +
  1459 + /* disable CPU features that KVM cannot support */
  1460 +
  1461 + /* svm */
  1462 + *ecx &= ~4UL;
  1463 + /* 3dnow */
  1464 + *edx = ~0xc0000000;
  1465 + }
1393 1466 break;
1394 1467 case 0x80000002:
1395 1468 case 0x80000003:
... ...
... ... @@ -39,6 +39,7 @@
39 39 #include "block.h"
40 40 #include "audio/audio.h"
41 41 #include "migration.h"
  42 +#include "kvm.h"
42 43  
43 44 #include <unistd.h>
44 45 #include <fcntl.h>
... ... @@ -4782,6 +4783,9 @@ static void help(int exitcode)
4782 4783 "-kernel-kqemu enable KQEMU full virtualization (default is user mode only)\n"
4783 4784 "-no-kqemu disable KQEMU kernel module usage\n"
4784 4785 #endif
  4786 +#ifdef CONFIG_KVM
  4787 + "-enable-kvm enable KVM full virtualization support\n"
  4788 +#endif
4785 4789 #ifdef TARGET_I386
4786 4790 "-no-acpi disable ACPI\n"
4787 4791 #endif
... ... @@ -4887,6 +4891,7 @@ enum {
4887 4891 QEMU_OPTION_pidfile,
4888 4892 QEMU_OPTION_no_kqemu,
4889 4893 QEMU_OPTION_kernel_kqemu,
  4894 + QEMU_OPTION_enable_kvm,
4890 4895 QEMU_OPTION_win2k_hack,
4891 4896 QEMU_OPTION_usb,
4892 4897 QEMU_OPTION_usbdevice,
... ... @@ -4973,6 +4978,9 @@ static const QEMUOption qemu_options[] = {
4973 4978 { "no-kqemu", 0, QEMU_OPTION_no_kqemu },
4974 4979 { "kernel-kqemu", 0, QEMU_OPTION_kernel_kqemu },
4975 4980 #endif
  4981 +#ifdef CONFIG_KVM
  4982 + { "enable-kvm", 0, QEMU_OPTION_enable_kvm },
  4983 +#endif
4976 4984 #if defined(TARGET_PPC) || defined(TARGET_SPARC)
4977 4985 { "g", 1, QEMU_OPTION_g },
4978 4986 #endif
... ... @@ -5794,6 +5802,14 @@ int main(int argc, char **argv)
5794 5802 kqemu_allowed = 2;
5795 5803 break;
5796 5804 #endif
  5805 +#ifdef CONFIG_KVM
  5806 + case QEMU_OPTION_enable_kvm:
  5807 + kvm_allowed = 1;
  5808 +#ifdef USE_KQEMU
  5809 + kqemu_allowed = 0;
  5810 +#endif
  5811 + break;
  5812 +#endif
5797 5813 case QEMU_OPTION_usb:
5798 5814 usb_enabled = 1;
5799 5815 break;
... ... @@ -5928,6 +5944,14 @@ int main(int argc, char **argv)
5928 5944 }
5929 5945 }
5930 5946  
  5947 +#if defined(CONFIG_KVM) && defined(USE_KQEMU)
  5948 + if (kvm_allowed && kqemu_allowed) {
  5949 + fprintf(stderr,
  5950 + "You can not enable both KVM and kqemu at the same time\n");
  5951 + exit(1);
  5952 + }
  5953 +#endif
  5954 +
5931 5955 machine->max_cpus = machine->max_cpus ?: 1; /* Default to UP */
5932 5956 if (smp_cpus > machine->max_cpus) {
5933 5957 fprintf(stderr, "Number of SMP cpus requested (%d), exceeds max cpus "
... ... @@ -6229,6 +6253,16 @@ int main(int argc, char **argv)
6229 6253 }
6230 6254 }
6231 6255  
  6256 + if (kvm_enabled()) {
  6257 + int ret;
  6258 +
  6259 + ret = kvm_init(smp_cpus);
  6260 + if (ret < 0) {
  6261 + fprintf(stderr, "failed to initialize KVM\n");
  6262 + exit(1);
  6263 + }
  6264 + }
  6265 +
6232 6266 machine->init(ram_size, vga_ram_size, boot_devices, ds,
6233 6267 kernel_filename, kernel_cmdline, initrd_filename, cpu_model);
6234 6268  
... ...