Commit d4e8164f7e9342d692c1d6f1c848ed05f8007ece
1 parent
08351fb3
direct chaining for PowerPC and i386
git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@183 c046a42c-6fe2-441c-8c8c-71466251a162
Showing
9 changed files
with
620 additions
and
210 deletions
dyngen.c
| ... | ... | @@ -170,7 +170,16 @@ void elf_swap_phdr(struct elf_phdr *h) |
| 170 | 170 | swabls(&h->p_align); /* Segment alignment */ |
| 171 | 171 | } |
| 172 | 172 | |
| 173 | +/* ELF file info */ | |
| 173 | 174 | int do_swap; |
| 175 | +struct elf_shdr *shdr; | |
| 176 | +struct elfhdr ehdr; | |
| 177 | +ElfW(Sym) *symtab; | |
| 178 | +int nb_syms; | |
| 179 | +char *strtab; | |
| 180 | +/* data section */ | |
| 181 | +uint8_t *data_data; | |
| 182 | +int data_shndx; | |
| 174 | 183 | |
| 175 | 184 | uint16_t get16(uint16_t *p) |
| 176 | 185 | { |
| ... | ... | @@ -270,7 +279,7 @@ int strstart(const char *str, const char *val, const char **ptr) |
| 270 | 279 | /* generate op code */ |
| 271 | 280 | void gen_code(const char *name, host_ulong offset, host_ulong size, |
| 272 | 281 | FILE *outfile, uint8_t *text, ELF_RELOC *relocs, int nb_relocs, int reloc_sh_type, |
| 273 | - ElfW(Sym) *symtab, char *strtab, int gen_switch) | |
| 282 | + int gen_switch) | |
| 274 | 283 | { |
| 275 | 284 | int copy_size = 0; |
| 276 | 285 | uint8_t *p_start, *p_end; |
| ... | ... | @@ -291,13 +300,16 @@ void gen_code(const char *name, host_ulong offset, host_ulong size, |
| 291 | 300 | switch(ELF_ARCH) { |
| 292 | 301 | case EM_386: |
| 293 | 302 | { |
| 294 | - uint8_t *p; | |
| 295 | - p = p_end - 1; | |
| 296 | - if (p == p_start) | |
| 303 | + int len; | |
| 304 | + len = p_end - p_start; | |
| 305 | + if (len == 0) | |
| 297 | 306 | error("empty code for %s", name); |
| 298 | - if (p[0] != 0xc3) | |
| 299 | - error("ret expected at the end of %s", name); | |
| 300 | - copy_size = p - p_start; | |
| 307 | + if (p_end[-1] == 0xc3) { | |
| 308 | + len--; | |
| 309 | + } else { | |
| 310 | + error("ret or jmp expected at the end of %s", name); | |
| 311 | + } | |
| 312 | + copy_size = len; | |
| 301 | 313 | } |
| 302 | 314 | break; |
| 303 | 315 | case EM_PPC: |
| ... | ... | @@ -423,7 +435,7 @@ void gen_code(const char *name, host_ulong offset, host_ulong size, |
| 423 | 435 | sym_name = strtab + symtab[ELFW(R_SYM)(rel->r_info)].st_name; |
| 424 | 436 | if (strstart(sym_name, "__op_param", &p)) { |
| 425 | 437 | n = strtoul(p, NULL, 10); |
| 426 | - if (n >= MAX_ARGS) | |
| 438 | + if (n > MAX_ARGS) | |
| 427 | 439 | error("too many arguments in %s", name); |
| 428 | 440 | args_present[n - 1] = 1; |
| 429 | 441 | } |
| ... | ... | @@ -459,7 +471,9 @@ void gen_code(const char *name, host_ulong offset, host_ulong size, |
| 459 | 471 | if (rel->r_offset >= start_offset && |
| 460 | 472 | rel->r_offset < start_offset + copy_size) { |
| 461 | 473 | sym_name = strtab + symtab[ELFW(R_SYM)(rel->r_info)].st_name; |
| 462 | - if (*sym_name && !strstart(sym_name, "__op_param", &p)) { | |
| 474 | + if (*sym_name && | |
| 475 | + !strstart(sym_name, "__op_param", NULL) && | |
| 476 | + !strstart(sym_name, "__op_jmp", NULL)) { | |
| 463 | 477 | #if defined(HOST_SPARC) |
| 464 | 478 | if (sym_name[0] == '.') { |
| 465 | 479 | fprintf(outfile, |
| ... | ... | @@ -474,6 +488,31 @@ void gen_code(const char *name, host_ulong offset, host_ulong size, |
| 474 | 488 | } |
| 475 | 489 | |
| 476 | 490 | fprintf(outfile, " memcpy(gen_code_ptr, (void *)((char *)&%s+%d), %d);\n", name, start_offset - offset, copy_size); |
| 491 | + | |
| 492 | + /* emit code offset information */ | |
| 493 | + { | |
| 494 | + ElfW(Sym) *sym; | |
| 495 | + const char *sym_name, *p; | |
| 496 | + target_ulong val; | |
| 497 | + int n; | |
| 498 | + | |
| 499 | + for(i = 0, sym = symtab; i < nb_syms; i++, sym++) { | |
| 500 | + sym_name = strtab + sym->st_name; | |
| 501 | + if (strstart(sym_name, "__op_label", &p)) { | |
| 502 | + /* test if the variable refers to a label inside | |
| 503 | + the code we are generating */ | |
| 504 | + if (sym->st_shndx != data_shndx) | |
| 505 | + error("__op_labelN symbols must be in .data or .sdata section"); | |
| 506 | + val = *(target_ulong *)(data_data + sym->st_value); | |
| 507 | + if (val >= start_offset && val < start_offset + copy_size) { | |
| 508 | + n = strtol(p, NULL, 10); | |
| 509 | + fprintf(outfile, " label_offsets[%d] = %d + (gen_code_ptr - gen_code_buf);\n", n, val - start_offset); | |
| 510 | + } | |
| 511 | + } | |
| 512 | + } | |
| 513 | + } | |
| 514 | + | |
| 515 | + /* load parameres in variables */ | |
| 477 | 516 | for(i = 0; i < nb_args; i++) { |
| 478 | 517 | fprintf(outfile, " param%d = *opparam_ptr++;\n", i + 1); |
| 479 | 518 | } |
| ... | ... | @@ -519,6 +558,18 @@ void gen_code(const char *name, host_ulong offset, host_ulong size, |
| 519 | 558 | if (rel->r_offset >= start_offset && |
| 520 | 559 | rel->r_offset < start_offset + copy_size) { |
| 521 | 560 | sym_name = strtab + symtab[ELFW(R_SYM)(rel->r_info)].st_name; |
| 561 | + if (strstart(sym_name, "__op_jmp", &p)) { | |
| 562 | + int n; | |
| 563 | + n = strtol(p, NULL, 10); | |
| 564 | + /* __op_jmp relocations are done at | |
| 565 | + runtime to do translated block | |
| 566 | + chaining: the offset of the instruction | |
| 567 | + needs to be stored */ | |
| 568 | + fprintf(outfile, " jmp_offsets[%d] = %d + (gen_code_ptr - gen_code_buf);\n", | |
| 569 | + n, rel->r_offset - start_offset); | |
| 570 | + continue; | |
| 571 | + } | |
| 572 | + | |
| 522 | 573 | if (strstart(sym_name, "__op_param", &p)) { |
| 523 | 574 | snprintf(name, sizeof(name), "param%s", p); |
| 524 | 575 | } else { |
| ... | ... | @@ -824,11 +875,10 @@ void gen_code(const char *name, host_ulong offset, host_ulong size, |
| 824 | 875 | int load_elf(const char *filename, FILE *outfile, int do_print_enum) |
| 825 | 876 | { |
| 826 | 877 | int fd; |
| 827 | - struct elfhdr ehdr; | |
| 828 | - struct elf_shdr *sec, *shdr, *symtab_sec, *strtab_sec, *text_sec; | |
| 829 | - int i, j, nb_syms; | |
| 830 | - ElfW(Sym) *symtab, *sym; | |
| 831 | - char *shstr, *strtab; | |
| 878 | + struct elf_shdr *sec, *symtab_sec, *strtab_sec, *text_sec; | |
| 879 | + int i, j; | |
| 880 | + ElfW(Sym) *sym; | |
| 881 | + char *shstr, *data_name; | |
| 832 | 882 | uint8_t *text; |
| 833 | 883 | void *relocs; |
| 834 | 884 | int nb_relocs, reloc_sh_type; |
| ... | ... | @@ -880,6 +930,17 @@ int load_elf(const char *filename, FILE *outfile, int do_print_enum) |
| 880 | 930 | error("could not find .text section"); |
| 881 | 931 | text = load_data(fd, text_sec->sh_offset, text_sec->sh_size); |
| 882 | 932 | |
| 933 | +#if defined(HOST_PPC) | |
| 934 | + data_name = ".sdata"; | |
| 935 | +#else | |
| 936 | + data_name = ".data"; | |
| 937 | +#endif | |
| 938 | + sec = find_elf_section(shdr, ehdr.e_shnum, shstr, data_name); | |
| 939 | + if (!sec) | |
| 940 | + error("could not find %s section", data_name); | |
| 941 | + data_shndx = sec - shdr; | |
| 942 | + data_data = load_data(fd, sec->sh_offset, sec->sh_size); | |
| 943 | + | |
| 883 | 944 | /* find text relocations, if any */ |
| 884 | 945 | nb_relocs = 0; |
| 885 | 946 | relocs = NULL; |
| ... | ... | @@ -936,7 +997,7 @@ int load_elf(const char *filename, FILE *outfile, int do_print_enum) |
| 936 | 997 | name = strtab + sym->st_name; |
| 937 | 998 | if (strstart(name, OP_PREFIX, &p)) { |
| 938 | 999 | gen_code(name, sym->st_value, sym->st_size, outfile, |
| 939 | - text, relocs, nb_relocs, reloc_sh_type, symtab, strtab, 2); | |
| 1000 | + text, relocs, nb_relocs, reloc_sh_type, 2); | |
| 940 | 1001 | } |
| 941 | 1002 | } |
| 942 | 1003 | } else { |
| ... | ... | @@ -963,6 +1024,7 @@ fprintf(outfile, |
| 963 | 1024 | #endif |
| 964 | 1025 | fprintf(outfile, |
| 965 | 1026 | "int dyngen_code(uint8_t *gen_code_buf,\n" |
| 1027 | +" uint16_t *label_offsets, uint16_t *jmp_offsets,\n" | |
| 966 | 1028 | " const uint16_t *opc_buf, const uint32_t *opparam_buf)\n" |
| 967 | 1029 | "{\n" |
| 968 | 1030 | " uint8_t *gen_code_ptr;\n" |
| ... | ... | @@ -1001,7 +1063,7 @@ fprintf(outfile, |
| 1001 | 1063 | if (sym->st_shndx != (text_sec - shdr)) |
| 1002 | 1064 | error("invalid section for opcode (0x%x)", sym->st_shndx); |
| 1003 | 1065 | gen_code(name, sym->st_value, sym->st_size, outfile, |
| 1004 | - text, relocs, nb_relocs, reloc_sh_type, symtab, strtab, 1); | |
| 1066 | + text, relocs, nb_relocs, reloc_sh_type, 1); | |
| 1005 | 1067 | } |
| 1006 | 1068 | } |
| 1007 | 1069 | |
| ... | ... | @@ -1056,7 +1118,7 @@ fprintf(outfile, |
| 1056 | 1118 | if (sym->st_shndx != (text_sec - shdr)) |
| 1057 | 1119 | error("invalid section for opcode (0x%x)", sym->st_shndx); |
| 1058 | 1120 | gen_code(name, sym->st_value, sym->st_size, outfile, |
| 1059 | - text, relocs, nb_relocs, reloc_sh_type, symtab, strtab, 0); | |
| 1121 | + text, relocs, nb_relocs, reloc_sh_type, 0); | |
| 1060 | 1122 | } |
| 1061 | 1123 | } |
| 1062 | 1124 | } | ... | ... |
exec-i386.c
| ... | ... | @@ -120,7 +120,7 @@ int cpu_x86_exec(CPUX86State *env1) |
| 120 | 120 | TranslationBlock *tb, **ptb; |
| 121 | 121 | uint8_t *tc_ptr, *cs_base, *pc; |
| 122 | 122 | unsigned int flags; |
| 123 | - | |
| 123 | + | |
| 124 | 124 | /* first we save global registers */ |
| 125 | 125 | saved_T0 = T0; |
| 126 | 126 | saved_T1 = T1; |
| ... | ... | @@ -169,6 +169,7 @@ int cpu_x86_exec(CPUX86State *env1) |
| 169 | 169 | |
| 170 | 170 | /* prepare setjmp context for exception handling */ |
| 171 | 171 | if (setjmp(env->jmp_env) == 0) { |
| 172 | + T0 = 0; /* force lookup of first TB */ | |
| 172 | 173 | for(;;) { |
| 173 | 174 | if (env->interrupt_request) { |
| 174 | 175 | raise_exception(EXCP_INTERRUPT); |
| ... | ... | @@ -209,30 +210,40 @@ int cpu_x86_exec(CPUX86State *env1) |
| 209 | 210 | flags |= (env->eflags & TF_MASK) << (GEN_FLAG_TF_SHIFT - 8); |
| 210 | 211 | cs_base = env->seg_cache[R_CS].base; |
| 211 | 212 | pc = cs_base + env->eip; |
| 213 | + spin_lock(&tb_lock); | |
| 212 | 214 | tb = tb_find(&ptb, (unsigned long)pc, (unsigned long)cs_base, |
| 213 | 215 | flags); |
| 214 | 216 | if (!tb) { |
| 215 | 217 | /* if no translated code available, then translate it now */ |
| 216 | - /* very inefficient but safe: we lock all the cpus | |
| 217 | - when generating code */ | |
| 218 | - spin_lock(&tb_lock); | |
| 218 | + tb = tb_alloc((unsigned long)pc); | |
| 219 | + if (!tb) { | |
| 220 | + /* flush must be done */ | |
| 221 | + tb_flush(); | |
| 222 | + /* cannot fail at this point */ | |
| 223 | + tb = tb_alloc((unsigned long)pc); | |
| 224 | + /* don't forget to invalidate previous TB info */ | |
| 225 | + ptb = &tb_hash[tb_hash_func((unsigned long)pc)]; | |
| 226 | + T0 = 0; | |
| 227 | + } | |
| 219 | 228 | tc_ptr = code_gen_ptr; |
| 229 | + tb->tc_ptr = tc_ptr; | |
| 220 | 230 | ret = cpu_x86_gen_code(code_gen_ptr, CODE_GEN_MAX_SIZE, |
| 221 | 231 | &code_gen_size, pc, cs_base, flags, |
| 222 | - &code_size); | |
| 232 | + &code_size, tb); | |
| 223 | 233 | /* if invalid instruction, signal it */ |
| 224 | 234 | if (ret != 0) { |
| 235 | + /* NOTE: the tb is allocated but not linked, so we | |
| 236 | + can leave it */ | |
| 225 | 237 | spin_unlock(&tb_lock); |
| 226 | 238 | raise_exception(EXCP06_ILLOP); |
| 227 | 239 | } |
| 228 | - tb = tb_alloc((unsigned long)pc, code_size); | |
| 229 | 240 | *ptb = tb; |
| 241 | + tb->size = code_size; | |
| 230 | 242 | tb->cs_base = (unsigned long)cs_base; |
| 231 | 243 | tb->flags = flags; |
| 232 | - tb->tc_ptr = tc_ptr; | |
| 233 | 244 | tb->hash_next = NULL; |
| 245 | + tb_link(tb); | |
| 234 | 246 | code_gen_ptr = (void *)(((unsigned long)code_gen_ptr + code_gen_size + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1)); |
| 235 | - spin_unlock(&tb_lock); | |
| 236 | 247 | } |
| 237 | 248 | #ifdef DEBUG_EXEC |
| 238 | 249 | if (loglevel) { |
| ... | ... | @@ -241,14 +252,21 @@ int cpu_x86_exec(CPUX86State *env1) |
| 241 | 252 | lookup_symbol((void *)tb->pc)); |
| 242 | 253 | } |
| 243 | 254 | #endif |
| 244 | - /* execute the generated code */ | |
| 255 | + | |
| 256 | + /* see if we can patch the calling TB */ | |
| 257 | + if (T0 != 0 && !(env->eflags & TF_MASK)) { | |
| 258 | + tb_add_jump((TranslationBlock *)(T0 & ~3), T0 & 3, tb); | |
| 259 | + } | |
| 245 | 260 | tc_ptr = tb->tc_ptr; |
| 261 | + spin_unlock(&tb_lock); | |
| 262 | + | |
| 263 | + /* execute the generated code */ | |
| 246 | 264 | gen_func = (void *)tc_ptr; |
| 247 | 265 | #ifdef __sparc__ |
| 248 | 266 | __asm__ __volatile__("call %0\n\t" |
| 249 | 267 | " mov %%o7,%%i0" |
| 250 | 268 | : /* no outputs */ |
| 251 | - : "r" (gen_func) | |
| 269 | + : "r" (gen_func) | |
| 252 | 270 | : "i0", "i1", "i2", "i3", "i4", "i5"); |
| 253 | 271 | #else |
| 254 | 272 | gen_func(); | ... | ... |
exec-i386.h
| ... | ... | @@ -205,8 +205,10 @@ extern int __op_param1, __op_param2, __op_param3; |
| 205 | 205 | #define PARAM2 ((long)(&__op_param2)) |
| 206 | 206 | #define PARAM3 ((long)(&__op_param3)) |
| 207 | 207 | #endif |
| 208 | +extern int __op_jmp0, __op_jmp1; | |
| 208 | 209 | |
| 209 | 210 | #include "cpu-i386.h" |
| 211 | +#include "exec.h" | |
| 210 | 212 | |
| 211 | 213 | typedef struct CCTable { |
| 212 | 214 | int (*compute_all)(void); /* return all the flags */ | ... | ... |
exec.c
| ... | ... | @@ -27,6 +27,7 @@ |
| 27 | 27 | #include <sys/mman.h> |
| 28 | 28 | |
| 29 | 29 | #include "cpu-i386.h" |
| 30 | +#include "exec.h" | |
| 30 | 31 | |
| 31 | 32 | //#define DEBUG_TB_INVALIDATE |
| 32 | 33 | #define DEBUG_FLUSH |
| ... | ... | @@ -212,6 +213,7 @@ static void page_flush_tb(void) |
| 212 | 213 | } |
| 213 | 214 | |
| 214 | 215 | /* flush all the translation blocks */ |
| 216 | +/* XXX: tb_flush is currently not thread safe */ | |
| 215 | 217 | void tb_flush(void) |
| 216 | 218 | { |
| 217 | 219 | int i; |
| ... | ... | @@ -226,7 +228,8 @@ void tb_flush(void) |
| 226 | 228 | tb_hash[i] = NULL; |
| 227 | 229 | page_flush_tb(); |
| 228 | 230 | code_gen_ptr = code_gen_buffer; |
| 229 | - /* XXX: flush processor icache at this point */ | |
| 231 | + /* XXX: flush processor icache at this point if cache flush is | |
| 232 | + expensive */ | |
| 230 | 233 | } |
| 231 | 234 | |
| 232 | 235 | #ifdef DEBUG_TB_CHECK |
| ... | ... | @@ -265,6 +268,26 @@ static void tb_page_check(void) |
| 265 | 268 | } |
| 266 | 269 | } |
| 267 | 270 | |
| 271 | +void tb_jmp_check(TranslationBlock *tb) | |
| 272 | +{ | |
| 273 | + TranslationBlock *tb1; | |
| 274 | + unsigned int n1; | |
| 275 | + | |
| 276 | + /* suppress any remaining jumps to this TB */ | |
| 277 | + tb1 = tb->jmp_first; | |
| 278 | + for(;;) { | |
| 279 | + n1 = (long)tb1 & 3; | |
| 280 | + tb1 = (TranslationBlock *)((long)tb1 & ~3); | |
| 281 | + if (n1 == 2) | |
| 282 | + break; | |
| 283 | + tb1 = tb1->jmp_next[n1]; | |
| 284 | + } | |
| 285 | + /* check end of list */ | |
| 286 | + if (tb1 != tb) { | |
| 287 | + printf("ERROR: jmp_list from 0x%08lx\n", (long)tb); | |
| 288 | + } | |
| 289 | +} | |
| 290 | + | |
| 268 | 291 | #endif |
| 269 | 292 | |
| 270 | 293 | /* invalidate one TB */ |
| ... | ... | @@ -282,12 +305,48 @@ static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb, |
| 282 | 305 | } |
| 283 | 306 | } |
| 284 | 307 | |
| 308 | +static inline void tb_jmp_remove(TranslationBlock *tb, int n) | |
| 309 | +{ | |
| 310 | + TranslationBlock *tb1, **ptb; | |
| 311 | + unsigned int n1; | |
| 312 | + | |
| 313 | + ptb = &tb->jmp_next[n]; | |
| 314 | + tb1 = *ptb; | |
| 315 | + if (tb1) { | |
| 316 | + /* find tb(n) in circular list */ | |
| 317 | + for(;;) { | |
| 318 | + tb1 = *ptb; | |
| 319 | + n1 = (long)tb1 & 3; | |
| 320 | + tb1 = (TranslationBlock *)((long)tb1 & ~3); | |
| 321 | + if (n1 == n && tb1 == tb) | |
| 322 | + break; | |
| 323 | + if (n1 == 2) { | |
| 324 | + ptb = &tb1->jmp_first; | |
| 325 | + } else { | |
| 326 | + ptb = &tb1->jmp_next[n1]; | |
| 327 | + } | |
| 328 | + } | |
| 329 | + /* now we can suppress tb(n) from the list */ | |
| 330 | + *ptb = tb->jmp_next[n]; | |
| 331 | + | |
| 332 | + tb->jmp_next[n] = NULL; | |
| 333 | + } | |
| 334 | +} | |
| 335 | + | |
| 336 | +/* reset the jump entry 'n' of a TB so that it is not chained to | |
| 337 | + another TB */ | |
| 338 | +static inline void tb_reset_jump(TranslationBlock *tb, int n) | |
| 339 | +{ | |
| 340 | + tb_set_jmp_target(tb, n, (unsigned long)(tb->tc_ptr + tb->tb_next_offset[n])); | |
| 341 | +} | |
| 342 | + | |
| 285 | 343 | static inline void tb_invalidate(TranslationBlock *tb, int parity) |
| 286 | 344 | { |
| 287 | 345 | PageDesc *p; |
| 288 | 346 | unsigned int page_index1, page_index2; |
| 289 | - unsigned int h; | |
| 290 | - | |
| 347 | + unsigned int h, n1; | |
| 348 | + TranslationBlock *tb1, *tb2; | |
| 349 | + | |
| 291 | 350 | /* remove the TB from the hash list */ |
| 292 | 351 | h = tb_hash_func(tb->pc); |
| 293 | 352 | tb_remove(&tb_hash[h], tb, |
| ... | ... | @@ -305,6 +364,24 @@ static inline void tb_invalidate(TranslationBlock *tb, int parity) |
| 305 | 364 | tb_remove(&p->first_tb, tb, |
| 306 | 365 | offsetof(TranslationBlock, page_next[page_index2 & 1])); |
| 307 | 366 | } |
| 367 | + | |
| 368 | + /* suppress this TB from the two jump lists */ | |
| 369 | + tb_jmp_remove(tb, 0); | |
| 370 | + tb_jmp_remove(tb, 1); | |
| 371 | + | |
| 372 | + /* suppress any remaining jumps to this TB */ | |
| 373 | + tb1 = tb->jmp_first; | |
| 374 | + for(;;) { | |
| 375 | + n1 = (long)tb1 & 3; | |
| 376 | + if (n1 == 2) | |
| 377 | + break; | |
| 378 | + tb1 = (TranslationBlock *)((long)tb1 & ~3); | |
| 379 | + tb2 = tb1->jmp_next[n1]; | |
| 380 | + tb_reset_jump(tb1, n1); | |
| 381 | + tb1->jmp_next[n1] = NULL; | |
| 382 | + tb1 = tb2; | |
| 383 | + } | |
| 384 | + tb->jmp_first = (TranslationBlock *)((long)tb | 2); /* fail safe */ | |
| 308 | 385 | } |
| 309 | 386 | |
| 310 | 387 | /* invalidate all TBs which intersect with the target page starting at addr */ |
| ... | ... | @@ -367,27 +444,39 @@ static inline void tb_alloc_page(TranslationBlock *tb, unsigned int page_index) |
| 367 | 444 | |
| 368 | 445 | /* Allocate a new translation block. Flush the translation buffer if |
| 369 | 446 | too many translation blocks or too much generated code. */ |
| 370 | -TranslationBlock *tb_alloc(unsigned long pc, | |
| 371 | - unsigned long size) | |
| 447 | +TranslationBlock *tb_alloc(unsigned long pc) | |
| 372 | 448 | { |
| 373 | 449 | TranslationBlock *tb; |
| 374 | - unsigned int page_index1, page_index2; | |
| 375 | 450 | |
| 376 | 451 | if (nb_tbs >= CODE_GEN_MAX_BLOCKS || |
| 377 | 452 | (code_gen_ptr - code_gen_buffer) >= CODE_GEN_BUFFER_MAX_SIZE) |
| 378 | - tb_flush(); | |
| 453 | + return NULL; | |
| 379 | 454 | tb = &tbs[nb_tbs++]; |
| 380 | 455 | tb->pc = pc; |
| 381 | - tb->size = size; | |
| 456 | + return tb; | |
| 457 | +} | |
| 458 | + | |
| 459 | +/* link the tb with the other TBs */ | |
| 460 | +void tb_link(TranslationBlock *tb) | |
| 461 | +{ | |
| 462 | + unsigned int page_index1, page_index2; | |
| 382 | 463 | |
| 383 | 464 | /* add in the page list */ |
| 384 | - page_index1 = pc >> TARGET_PAGE_BITS; | |
| 465 | + page_index1 = tb->pc >> TARGET_PAGE_BITS; | |
| 385 | 466 | tb_alloc_page(tb, page_index1); |
| 386 | - page_index2 = (pc + size - 1) >> TARGET_PAGE_BITS; | |
| 467 | + page_index2 = (tb->pc + tb->size - 1) >> TARGET_PAGE_BITS; | |
| 387 | 468 | if (page_index2 != page_index1) { |
| 388 | 469 | tb_alloc_page(tb, page_index2); |
| 389 | 470 | } |
| 390 | - return tb; | |
| 471 | + tb->jmp_first = (TranslationBlock *)((long)tb | 2); | |
| 472 | + tb->jmp_next[0] = NULL; | |
| 473 | + tb->jmp_next[1] = NULL; | |
| 474 | + | |
| 475 | + /* init original jump addresses */ | |
| 476 | + if (tb->tb_next_offset[0] != 0xffff) | |
| 477 | + tb_reset_jump(tb, 0); | |
| 478 | + if (tb->tb_next_offset[1] != 0xffff) | |
| 479 | + tb_reset_jump(tb, 1); | |
| 391 | 480 | } |
| 392 | 481 | |
| 393 | 482 | /* called from signal handler: invalidate the code and unprotect the | ... | ... |
exec.h
0 โ 100644
| 1 | +/* | |
| 2 | + * internal execution defines for qemu | |
| 3 | + * | |
| 4 | + * Copyright (c) 2003 Fabrice Bellard | |
| 5 | + * | |
| 6 | + * This library is free software; you can redistribute it and/or | |
| 7 | + * modify it under the terms of the GNU Lesser General Public | |
| 8 | + * License as published by the Free Software Foundation; either | |
| 9 | + * version 2 of the License, or (at your option) any later version. | |
| 10 | + * | |
| 11 | + * This library is distributed in the hope that it will be useful, | |
| 12 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 13 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 14 | + * Lesser General Public License for more details. | |
| 15 | + * | |
| 16 | + * You should have received a copy of the GNU Lesser General Public | |
| 17 | + * License along with this library; if not, write to the Free Software | |
| 18 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
| 19 | + */ | |
| 20 | + | |
| 21 | +#define GEN_FLAG_CODE32_SHIFT 0 | |
| 22 | +#define GEN_FLAG_ADDSEG_SHIFT 1 | |
| 23 | +#define GEN_FLAG_SS32_SHIFT 2 | |
| 24 | +#define GEN_FLAG_VM_SHIFT 3 | |
| 25 | +#define GEN_FLAG_ST_SHIFT 4 | |
| 26 | +#define GEN_FLAG_CPL_SHIFT 7 | |
| 27 | +#define GEN_FLAG_IOPL_SHIFT 9 | |
| 28 | +#define GEN_FLAG_TF_SHIFT 11 | |
| 29 | + | |
| 30 | +struct TranslationBlock; | |
| 31 | +int cpu_x86_gen_code(uint8_t *gen_code_buf, int max_code_size, | |
| 32 | + int *gen_code_size_ptr, | |
| 33 | + uint8_t *pc_start, uint8_t *cs_base, int flags, | |
| 34 | + int *code_size_ptr, struct TranslationBlock *tb); | |
| 35 | +void cpu_x86_tblocks_init(void); | |
| 36 | +void page_init(void); | |
| 37 | +int page_unprotect(unsigned long address); | |
| 38 | + | |
| 39 | +#define CODE_GEN_MAX_SIZE 65536 | |
| 40 | +#define CODE_GEN_ALIGN 16 /* must be >= of the size of a icache line */ | |
| 41 | + | |
| 42 | +#define CODE_GEN_HASH_BITS 15 | |
| 43 | +#define CODE_GEN_HASH_SIZE (1 << CODE_GEN_HASH_BITS) | |
| 44 | + | |
| 45 | +/* maximum total translate dcode allocated */ | |
| 46 | +#define CODE_GEN_BUFFER_SIZE (2048 * 1024) | |
| 47 | +//#define CODE_GEN_BUFFER_SIZE (128 * 1024) | |
| 48 | + | |
| 49 | +#if defined(__powerpc__) | |
| 50 | +#define USE_DIRECT_JUMP | |
| 51 | +#endif | |
| 52 | + | |
| 53 | +typedef struct TranslationBlock { | |
| 54 | + unsigned long pc; /* simulated PC corresponding to this block (EIP + CS base) */ | |
| 55 | + unsigned long cs_base; /* CS base for this block */ | |
| 56 | + unsigned int flags; /* flags defining in which context the code was generated */ | |
| 57 | + uint16_t size; /* size of target code for this block (1 <= | |
| 58 | + size <= TARGET_PAGE_SIZE) */ | |
| 59 | + uint8_t *tc_ptr; /* pointer to the translated code */ | |
| 60 | + struct TranslationBlock *hash_next; /* next matching block */ | |
| 61 | + struct TranslationBlock *page_next[2]; /* next blocks in even/odd page */ | |
| 62 | + /* the following data are used to directly call another TB from | |
| 63 | + the code of this one. */ | |
| 64 | + uint16_t tb_next_offset[2]; /* offset of original jump target */ | |
| 65 | +#ifdef USE_DIRECT_JUMP | |
| 66 | + uint16_t tb_jmp_offset[2]; /* offset of jump instruction */ | |
| 67 | +#else | |
| 68 | + uint8_t *tb_next[2]; /* address of jump generated code */ | |
| 69 | +#endif | |
| 70 | + /* list of TBs jumping to this one. This is a circular list using | |
| 71 | + the two least significant bits of the pointers to tell what is | |
| 72 | + the next pointer: 0 = jmp_next[0], 1 = jmp_next[1], 2 = | |
| 73 | + jmp_first */ | |
| 74 | + struct TranslationBlock *jmp_next[2]; | |
| 75 | + struct TranslationBlock *jmp_first; | |
| 76 | +} TranslationBlock; | |
| 77 | + | |
| 78 | +static inline unsigned int tb_hash_func(unsigned long pc) | |
| 79 | +{ | |
| 80 | + return pc & (CODE_GEN_HASH_SIZE - 1); | |
| 81 | +} | |
| 82 | + | |
| 83 | +TranslationBlock *tb_alloc(unsigned long pc); | |
| 84 | +void tb_flush(void); | |
| 85 | +void tb_link(TranslationBlock *tb); | |
| 86 | + | |
| 87 | +extern TranslationBlock *tb_hash[CODE_GEN_HASH_SIZE]; | |
| 88 | + | |
| 89 | +extern uint8_t code_gen_buffer[CODE_GEN_BUFFER_SIZE]; | |
| 90 | +extern uint8_t *code_gen_ptr; | |
| 91 | + | |
| 92 | +/* find a translation block in the translation cache. If not found, | |
| 93 | + return NULL and the pointer to the last element of the list in pptb */ | |
| 94 | +static inline TranslationBlock *tb_find(TranslationBlock ***pptb, | |
| 95 | + unsigned long pc, | |
| 96 | + unsigned long cs_base, | |
| 97 | + unsigned int flags) | |
| 98 | +{ | |
| 99 | + TranslationBlock **ptb, *tb; | |
| 100 | + unsigned int h; | |
| 101 | + | |
| 102 | + h = tb_hash_func(pc); | |
| 103 | + ptb = &tb_hash[h]; | |
| 104 | + for(;;) { | |
| 105 | + tb = *ptb; | |
| 106 | + if (!tb) | |
| 107 | + break; | |
| 108 | + if (tb->pc == pc && tb->cs_base == cs_base && tb->flags == flags) | |
| 109 | + return tb; | |
| 110 | + ptb = &tb->hash_next; | |
| 111 | + } | |
| 112 | + *pptb = ptb; | |
| 113 | + return NULL; | |
| 114 | +} | |
| 115 | + | |
| 116 | +#if defined(__powerpc__) | |
| 117 | + | |
| 118 | +static inline void tb_set_jmp_target(TranslationBlock *tb, | |
| 119 | + int n, unsigned long addr) | |
| 120 | +{ | |
| 121 | + uint32_t val, *ptr; | |
| 122 | + unsigned long offset; | |
| 123 | + | |
| 124 | + offset = (unsigned long)(tb->tc_ptr + tb->tb_jmp_offset[n]); | |
| 125 | + | |
| 126 | + /* patch the branch destination */ | |
| 127 | + ptr = (uint32_t *)offset; | |
| 128 | + val = *ptr; | |
| 129 | + val = (val & ~0x03fffffc) | ((addr - offset) & 0x03fffffc); | |
| 130 | + *ptr = val; | |
| 131 | + /* flush icache */ | |
| 132 | + asm volatile ("dcbst 0,%0" : : "r"(ptr) : "memory"); | |
| 133 | + asm volatile ("sync" : : : "memory"); | |
| 134 | + asm volatile ("icbi 0,%0" : : "r"(ptr) : "memory"); | |
| 135 | + asm volatile ("sync" : : : "memory"); | |
| 136 | + asm volatile ("isync" : : : "memory"); | |
| 137 | +} | |
| 138 | + | |
| 139 | +#else | |
| 140 | + | |
| 141 | +/* set the jump target */ | |
| 142 | +static inline void tb_set_jmp_target(TranslationBlock *tb, | |
| 143 | + int n, unsigned long addr) | |
| 144 | +{ | |
| 145 | + tb->tb_next[n] = (void *)addr; | |
| 146 | +} | |
| 147 | + | |
| 148 | +#endif | |
| 149 | + | |
| 150 | +static inline void tb_add_jump(TranslationBlock *tb, int n, | |
| 151 | + TranslationBlock *tb_next) | |
| 152 | +{ | |
| 153 | + /* patch the native jump address */ | |
| 154 | + tb_set_jmp_target(tb, n, (unsigned long)tb_next->tc_ptr); | |
| 155 | + | |
| 156 | + /* add in TB jmp circular list */ | |
| 157 | + tb->jmp_next[n] = tb_next->jmp_first; | |
| 158 | + tb_next->jmp_first = (TranslationBlock *)((long)(tb) | (n)); | |
| 159 | +} | |
| 160 | + | |
| 161 | +#ifndef offsetof | |
| 162 | +#define offsetof(type, field) ((size_t) &((type *)0)->field) | |
| 163 | +#endif | |
| 164 | + | |
| 165 | +#ifdef __powerpc__ | |
| 166 | +static inline int testandset (int *p) | |
| 167 | +{ | |
| 168 | + int ret; | |
| 169 | + __asm__ __volatile__ ( | |
| 170 | + "0: lwarx %0,0,%1 ;" | |
| 171 | + " xor. %0,%3,%0;" | |
| 172 | + " bne 1f;" | |
| 173 | + " stwcx. %2,0,%1;" | |
| 174 | + " bne- 0b;" | |
| 175 | + "1: " | |
| 176 | + : "=&r" (ret) | |
| 177 | + : "r" (p), "r" (1), "r" (0) | |
| 178 | + : "cr0", "memory"); | |
| 179 | + return ret; | |
| 180 | +} | |
| 181 | +#endif | |
| 182 | + | |
| 183 | +#ifdef __i386__ | |
| 184 | +static inline int testandset (int *p) | |
| 185 | +{ | |
| 186 | + char ret; | |
| 187 | + long int readval; | |
| 188 | + | |
| 189 | + __asm__ __volatile__ ("lock; cmpxchgl %3, %1; sete %0" | |
| 190 | + : "=q" (ret), "=m" (*p), "=a" (readval) | |
| 191 | + : "r" (1), "m" (*p), "a" (0) | |
| 192 | + : "memory"); | |
| 193 | + return ret; | |
| 194 | +} | |
| 195 | +#endif | |
| 196 | + | |
| 197 | +#ifdef __s390__ | |
| 198 | +static inline int testandset (int *p) | |
| 199 | +{ | |
| 200 | + int ret; | |
| 201 | + | |
| 202 | + __asm__ __volatile__ ("0: cs %0,%1,0(%2)\n" | |
| 203 | + " jl 0b" | |
| 204 | + : "=&d" (ret) | |
| 205 | + : "r" (1), "a" (p), "0" (*p) | |
| 206 | + : "cc", "memory" ); | |
| 207 | + return ret; | |
| 208 | +} | |
| 209 | +#endif | |
| 210 | + | |
| 211 | +#ifdef __alpha__ | |
| 212 | +int testandset (int *p) | |
| 213 | +{ | |
| 214 | + int ret; | |
| 215 | + unsigned long one; | |
| 216 | + | |
| 217 | + __asm__ __volatile__ ("0: mov 1,%2\n" | |
| 218 | + " ldl_l %0,%1\n" | |
| 219 | + " stl_c %2,%1\n" | |
| 220 | + " beq %2,1f\n" | |
| 221 | + ".subsection 2\n" | |
| 222 | + "1: br 0b\n" | |
| 223 | + ".previous" | |
| 224 | + : "=r" (ret), "=m" (*p), "=r" (one) | |
| 225 | + : "m" (*p)); | |
| 226 | + return ret; | |
| 227 | +} | |
| 228 | +#endif | |
| 229 | + | |
| 230 | +#ifdef __sparc__ | |
| 231 | +static inline int testandset (int *p) | |
| 232 | +{ | |
| 233 | + int ret; | |
| 234 | + | |
| 235 | + __asm__ __volatile__("ldstub [%1], %0" | |
| 236 | + : "=r" (ret) | |
| 237 | + : "r" (p) | |
| 238 | + : "memory"); | |
| 239 | + | |
| 240 | + return (ret ? 1 : 0); | |
| 241 | +} | |
| 242 | +#endif | |
| 243 | + | |
| 244 | +typedef int spinlock_t; | |
| 245 | + | |
| 246 | +#define SPIN_LOCK_UNLOCKED 0 | |
| 247 | + | |
| 248 | +static inline void spin_lock(spinlock_t *lock) | |
| 249 | +{ | |
| 250 | + while (testandset(lock)); | |
| 251 | +} | |
| 252 | + | |
| 253 | +static inline void spin_unlock(spinlock_t *lock) | |
| 254 | +{ | |
| 255 | + *lock = 0; | |
| 256 | +} | |
| 257 | + | |
| 258 | +static inline int spin_trylock(spinlock_t *lock) | |
| 259 | +{ | |
| 260 | + return !testandset(lock); | |
| 261 | +} | |
| 262 | + | |
| 263 | +extern spinlock_t tb_lock; | |
| 264 | + | ... | ... |
op-i386.c
| ... | ... | @@ -709,7 +709,44 @@ void OPPROTO op_cmpxchg8b(void) |
| 709 | 709 | FORCE_RET(); |
| 710 | 710 | } |
| 711 | 711 | |
| 712 | -/* string ops */ | |
| 712 | +#if defined(__powerpc__) | |
| 713 | + | |
| 714 | +/* on PowerPC we patch the jump instruction directly */ | |
| 715 | +#define JUMP_TB(tbparam, n, eip)\ | |
| 716 | +do {\ | |
| 717 | + static void __attribute__((unused)) *__op_label ## n = &&label ## n;\ | |
| 718 | + asm volatile ("b %0" : : "i" (&__op_jmp ## n));\ | |
| 719 | +label ## n:\ | |
| 720 | + T0 = (long)(tbparam) + (n);\ | |
| 721 | + EIP = eip;\ | |
| 722 | +} while (0) | |
| 723 | + | |
| 724 | +#else | |
| 725 | + | |
| 726 | +/* jump to next block operations (more portable code, does not need | |
| 727 | + cache flushing, but slower because of indirect jump) */ | |
| 728 | +#define JUMP_TB(tbparam, n, eip)\ | |
| 729 | +do {\ | |
| 730 | + static void __attribute__((unused)) *__op_label ## n = &&label ## n;\ | |
| 731 | + goto *((TranslationBlock *)tbparam)->tb_next[n];\ | |
| 732 | +label ## n:\ | |
| 733 | + T0 = (long)(tbparam) + (n);\ | |
| 734 | + EIP = eip;\ | |
| 735 | +} while (0) | |
| 736 | + | |
| 737 | +#endif | |
| 738 | + | |
| 739 | +void OPPROTO op_jmp_tb_next(void) | |
| 740 | +{ | |
| 741 | + JUMP_TB(PARAM1, 0, PARAM2); | |
| 742 | +} | |
| 743 | + | |
| 744 | +void OPPROTO op_movl_T0_0(void) | |
| 745 | +{ | |
| 746 | + T0 = 0; | |
| 747 | +} | |
| 748 | + | |
| 749 | +/* multiple size ops */ | |
| 713 | 750 | |
| 714 | 751 | #define ldul ldl |
| 715 | 752 | |
| ... | ... | @@ -1199,90 +1236,15 @@ void OPPROTO op_lar(void) |
| 1199 | 1236 | |
| 1200 | 1237 | /* flags handling */ |
| 1201 | 1238 | |
| 1202 | -/* slow jumps cases (compute x86 flags) */ | |
| 1203 | -void OPPROTO op_jo_cc(void) | |
| 1204 | -{ | |
| 1205 | - int eflags; | |
| 1206 | - eflags = cc_table[CC_OP].compute_all(); | |
| 1207 | - if (eflags & CC_O) | |
| 1208 | - EIP = PARAM1; | |
| 1209 | - else | |
| 1210 | - EIP = PARAM2; | |
| 1211 | - FORCE_RET(); | |
| 1212 | -} | |
| 1213 | - | |
| 1214 | -void OPPROTO op_jb_cc(void) | |
| 1215 | -{ | |
| 1216 | - if (cc_table[CC_OP].compute_c()) | |
| 1217 | - EIP = PARAM1; | |
| 1218 | - else | |
| 1219 | - EIP = PARAM2; | |
| 1220 | - FORCE_RET(); | |
| 1221 | -} | |
| 1222 | - | |
| 1223 | -void OPPROTO op_jz_cc(void) | |
| 1224 | -{ | |
| 1225 | - int eflags; | |
| 1226 | - eflags = cc_table[CC_OP].compute_all(); | |
| 1227 | - if (eflags & CC_Z) | |
| 1228 | - EIP = PARAM1; | |
| 1229 | - else | |
| 1230 | - EIP = PARAM2; | |
| 1231 | - FORCE_RET(); | |
| 1232 | -} | |
| 1233 | - | |
| 1234 | -void OPPROTO op_jbe_cc(void) | |
| 1239 | +/* slow jumps cases : in order to avoid calling a function with a | |
| 1240 | + pointer (which can generate a stack frame on PowerPC), we use | |
| 1241 | + op_setcc to set T0 and then call op_jcc. */ | |
| 1242 | +void OPPROTO op_jcc(void) | |
| 1235 | 1243 | { |
| 1236 | - int eflags; | |
| 1237 | - eflags = cc_table[CC_OP].compute_all(); | |
| 1238 | - if (eflags & (CC_Z | CC_C)) | |
| 1239 | - EIP = PARAM1; | |
| 1240 | - else | |
| 1241 | - EIP = PARAM2; | |
| 1242 | - FORCE_RET(); | |
| 1243 | -} | |
| 1244 | - | |
| 1245 | -void OPPROTO op_js_cc(void) | |
| 1246 | -{ | |
| 1247 | - int eflags; | |
| 1248 | - eflags = cc_table[CC_OP].compute_all(); | |
| 1249 | - if (eflags & CC_S) | |
| 1250 | - EIP = PARAM1; | |
| 1251 | - else | |
| 1252 | - EIP = PARAM2; | |
| 1253 | - FORCE_RET(); | |
| 1254 | -} | |
| 1255 | - | |
| 1256 | -void OPPROTO op_jp_cc(void) | |
| 1257 | -{ | |
| 1258 | - int eflags; | |
| 1259 | - eflags = cc_table[CC_OP].compute_all(); | |
| 1260 | - if (eflags & CC_P) | |
| 1261 | - EIP = PARAM1; | |
| 1262 | - else | |
| 1263 | - EIP = PARAM2; | |
| 1264 | - FORCE_RET(); | |
| 1265 | -} | |
| 1266 | - | |
| 1267 | -void OPPROTO op_jl_cc(void) | |
| 1268 | -{ | |
| 1269 | - int eflags; | |
| 1270 | - eflags = cc_table[CC_OP].compute_all(); | |
| 1271 | - if ((eflags ^ (eflags >> 4)) & 0x80) | |
| 1272 | - EIP = PARAM1; | |
| 1273 | - else | |
| 1274 | - EIP = PARAM2; | |
| 1275 | - FORCE_RET(); | |
| 1276 | -} | |
| 1277 | - | |
| 1278 | -void OPPROTO op_jle_cc(void) | |
| 1279 | -{ | |
| 1280 | - int eflags; | |
| 1281 | - eflags = cc_table[CC_OP].compute_all(); | |
| 1282 | - if (((eflags ^ (eflags >> 4)) & 0x80) || (eflags & CC_Z)) | |
| 1283 | - EIP = PARAM1; | |
| 1244 | + if (T0) | |
| 1245 | + JUMP_TB(PARAM1, 0, PARAM2); | |
| 1284 | 1246 | else |
| 1285 | - EIP = PARAM2; | |
| 1247 | + JUMP_TB(PARAM1, 1, PARAM3); | |
| 1286 | 1248 | FORCE_RET(); |
| 1287 | 1249 | } |
| 1288 | 1250 | ... | ... |
opc-i386.h
| ... | ... | @@ -231,18 +231,20 @@ DEF(jmp_T0, 0) |
| 231 | 231 | DEF(jmp_im, 1) |
| 232 | 232 | DEF(int_im, 2) |
| 233 | 233 | DEF(raise_exception, 1) |
| 234 | -DEF(into, 0) | |
| 234 | +DEF(into, 1) | |
| 235 | 235 | DEF(cli, 0) |
| 236 | 236 | DEF(sti, 0) |
| 237 | 237 | DEF(boundw, 0) |
| 238 | 238 | DEF(boundl, 0) |
| 239 | 239 | DEF(cmpxchg8b, 0) |
| 240 | -DEF(jb_subb, 2) | |
| 241 | -DEF(jz_subb, 2) | |
| 242 | -DEF(jbe_subb, 2) | |
| 243 | -DEF(js_subb, 2) | |
| 244 | -DEF(jl_subb, 2) | |
| 245 | -DEF(jle_subb, 2) | |
| 240 | +DEF(jmp_tb_next, 2) | |
| 241 | +DEF(movl_T0_0, 0) | |
| 242 | +DEF(jb_subb, 3) | |
| 243 | +DEF(jz_subb, 3) | |
| 244 | +DEF(jbe_subb, 3) | |
| 245 | +DEF(js_subb, 3) | |
| 246 | +DEF(jl_subb, 3) | |
| 247 | +DEF(jle_subb, 3) | |
| 246 | 248 | DEF(setb_T0_subb, 0) |
| 247 | 249 | DEF(setz_T0_subb, 0) |
| 248 | 250 | DEF(setbe_T0_subb, 0) |
| ... | ... | @@ -314,12 +316,12 @@ DEF(insb_a16, 0) |
| 314 | 316 | DEF(rep_insb_a16, 0) |
| 315 | 317 | DEF(outb_T0_T1, 0) |
| 316 | 318 | DEF(inb_T0_T1, 0) |
| 317 | -DEF(jb_subw, 2) | |
| 318 | -DEF(jz_subw, 2) | |
| 319 | -DEF(jbe_subw, 2) | |
| 320 | -DEF(js_subw, 2) | |
| 321 | -DEF(jl_subw, 2) | |
| 322 | -DEF(jle_subw, 2) | |
| 319 | +DEF(jb_subw, 3) | |
| 320 | +DEF(jz_subw, 3) | |
| 321 | +DEF(jbe_subw, 3) | |
| 322 | +DEF(js_subw, 3) | |
| 323 | +DEF(jl_subw, 3) | |
| 324 | +DEF(jle_subw, 3) | |
| 323 | 325 | DEF(loopnzw, 2) |
| 324 | 326 | DEF(loopzw, 2) |
| 325 | 327 | DEF(loopw, 2) |
| ... | ... | @@ -405,12 +407,12 @@ DEF(insw_a16, 0) |
| 405 | 407 | DEF(rep_insw_a16, 0) |
| 406 | 408 | DEF(outw_T0_T1, 0) |
| 407 | 409 | DEF(inw_T0_T1, 0) |
| 408 | -DEF(jb_subl, 2) | |
| 409 | -DEF(jz_subl, 2) | |
| 410 | -DEF(jbe_subl, 2) | |
| 411 | -DEF(js_subl, 2) | |
| 412 | -DEF(jl_subl, 2) | |
| 413 | -DEF(jle_subl, 2) | |
| 410 | +DEF(jb_subl, 3) | |
| 411 | +DEF(jz_subl, 3) | |
| 412 | +DEF(jbe_subl, 3) | |
| 413 | +DEF(js_subl, 3) | |
| 414 | +DEF(jl_subl, 3) | |
| 415 | +DEF(jle_subl, 3) | |
| 414 | 416 | DEF(loopnzl, 2) |
| 415 | 417 | DEF(loopzl, 2) |
| 416 | 418 | DEF(loopl, 2) |
| ... | ... | @@ -536,14 +538,7 @@ DEF(movl_A0_seg, 1) |
| 536 | 538 | DEF(addl_A0_seg, 1) |
| 537 | 539 | DEF(lsl, 0) |
| 538 | 540 | DEF(lar, 0) |
| 539 | -DEF(jo_cc, 2) | |
| 540 | -DEF(jb_cc, 2) | |
| 541 | -DEF(jz_cc, 2) | |
| 542 | -DEF(jbe_cc, 2) | |
| 543 | -DEF(js_cc, 2) | |
| 544 | -DEF(jp_cc, 2) | |
| 545 | -DEF(jl_cc, 2) | |
| 546 | -DEF(jle_cc, 2) | |
| 541 | +DEF(jcc, 3) | |
| 547 | 542 | DEF(seto_T0_cc, 0) |
| 548 | 543 | DEF(setb_T0_cc, 0) |
| 549 | 544 | DEF(setz_T0_cc, 0) | ... | ... |
ops_template.h
| ... | ... | @@ -238,18 +238,18 @@ void OPPROTO glue(op_jb_sub, SUFFIX)(void) |
| 238 | 238 | src2 = CC_SRC - CC_DST; |
| 239 | 239 | |
| 240 | 240 | if ((DATA_TYPE)src1 < (DATA_TYPE)src2) |
| 241 | - EIP = PARAM1; | |
| 241 | + JUMP_TB(PARAM1, 0, PARAM2); | |
| 242 | 242 | else |
| 243 | - EIP = PARAM2; | |
| 243 | + JUMP_TB(PARAM1, 1, PARAM3); | |
| 244 | 244 | FORCE_RET(); |
| 245 | 245 | } |
| 246 | 246 | |
| 247 | 247 | void OPPROTO glue(op_jz_sub, SUFFIX)(void) |
| 248 | 248 | { |
| 249 | 249 | if ((DATA_TYPE)CC_DST == 0) |
| 250 | - EIP = PARAM1; | |
| 250 | + JUMP_TB(PARAM1, 0, PARAM2); | |
| 251 | 251 | else |
| 252 | - EIP = PARAM2; | |
| 252 | + JUMP_TB(PARAM1, 1, PARAM3); | |
| 253 | 253 | FORCE_RET(); |
| 254 | 254 | } |
| 255 | 255 | |
| ... | ... | @@ -260,18 +260,18 @@ void OPPROTO glue(op_jbe_sub, SUFFIX)(void) |
| 260 | 260 | src2 = CC_SRC - CC_DST; |
| 261 | 261 | |
| 262 | 262 | if ((DATA_TYPE)src1 <= (DATA_TYPE)src2) |
| 263 | - EIP = PARAM1; | |
| 263 | + JUMP_TB(PARAM1, 0, PARAM2); | |
| 264 | 264 | else |
| 265 | - EIP = PARAM2; | |
| 265 | + JUMP_TB(PARAM1, 1, PARAM3); | |
| 266 | 266 | FORCE_RET(); |
| 267 | 267 | } |
| 268 | 268 | |
| 269 | 269 | void OPPROTO glue(op_js_sub, SUFFIX)(void) |
| 270 | 270 | { |
| 271 | 271 | if (CC_DST & SIGN_MASK) |
| 272 | - EIP = PARAM1; | |
| 272 | + JUMP_TB(PARAM1, 0, PARAM2); | |
| 273 | 273 | else |
| 274 | - EIP = PARAM2; | |
| 274 | + JUMP_TB(PARAM1, 1, PARAM3); | |
| 275 | 275 | FORCE_RET(); |
| 276 | 276 | } |
| 277 | 277 | |
| ... | ... | @@ -282,9 +282,9 @@ void OPPROTO glue(op_jl_sub, SUFFIX)(void) |
| 282 | 282 | src2 = CC_SRC - CC_DST; |
| 283 | 283 | |
| 284 | 284 | if ((DATA_STYPE)src1 < (DATA_STYPE)src2) |
| 285 | - EIP = PARAM1; | |
| 285 | + JUMP_TB(PARAM1, 0, PARAM2); | |
| 286 | 286 | else |
| 287 | - EIP = PARAM2; | |
| 287 | + JUMP_TB(PARAM1, 1, PARAM3); | |
| 288 | 288 | FORCE_RET(); |
| 289 | 289 | } |
| 290 | 290 | |
| ... | ... | @@ -295,9 +295,9 @@ void OPPROTO glue(op_jle_sub, SUFFIX)(void) |
| 295 | 295 | src2 = CC_SRC - CC_DST; |
| 296 | 296 | |
| 297 | 297 | if ((DATA_STYPE)src1 <= (DATA_STYPE)src2) |
| 298 | - EIP = PARAM1; | |
| 298 | + JUMP_TB(PARAM1, 0, PARAM2); | |
| 299 | 299 | else |
| 300 | - EIP = PARAM2; | |
| 300 | + JUMP_TB(PARAM1, 1, PARAM3); | |
| 301 | 301 | FORCE_RET(); |
| 302 | 302 | } |
| 303 | 303 | ... | ... |
translate-i386.c
| ... | ... | @@ -31,11 +31,15 @@ |
| 31 | 31 | |
| 32 | 32 | #define IN_OP_I386 |
| 33 | 33 | #include "cpu-i386.h" |
| 34 | +#include "exec.h" | |
| 34 | 35 | |
| 35 | 36 | /* XXX: move that elsewhere */ |
| 36 | 37 | static uint16_t *gen_opc_ptr; |
| 37 | 38 | static uint32_t *gen_opparam_ptr; |
| 38 | 39 | int __op_param1, __op_param2, __op_param3; |
| 40 | +#ifdef USE_DIRECT_JUMP | |
| 41 | +int __op_jmp0, __op_jmp1; | |
| 42 | +#endif | |
| 39 | 43 | |
| 40 | 44 | #ifdef __i386__ |
| 41 | 45 | static inline void flush_icache_range(unsigned long start, unsigned long stop) |
| ... | ... | @@ -67,14 +71,14 @@ static void inline flush_icache_range(unsigned long start, unsigned long stop) |
| 67 | 71 | stop = (stop + MIN_CACHE_LINE_SIZE - 1) & ~(MIN_CACHE_LINE_SIZE - 1); |
| 68 | 72 | |
| 69 | 73 | for (p = start; p < stop; p += MIN_CACHE_LINE_SIZE) { |
| 70 | - asm ("dcbst 0,%0;" : : "r"(p) : "memory"); | |
| 74 | + asm volatile ("dcbst 0,%0" : : "r"(p) : "memory"); | |
| 71 | 75 | } |
| 72 | - asm ("sync"); | |
| 76 | + asm volatile ("sync" : : : "memory"); | |
| 73 | 77 | for (p = start; p < stop; p += MIN_CACHE_LINE_SIZE) { |
| 74 | - asm ("icbi 0,%0; sync;" : : "r"(p) : "memory"); | |
| 78 | + asm volatile ("icbi 0,%0" : : "r"(p) : "memory"); | |
| 75 | 79 | } |
| 76 | - asm ("sync"); | |
| 77 | - asm ("isync"); | |
| 80 | + asm volatile ("sync" : : : "memory"); | |
| 81 | + asm volatile ("isync" : : : "memory"); | |
| 78 | 82 | } |
| 79 | 83 | #endif |
| 80 | 84 | |
| ... | ... | @@ -129,6 +133,7 @@ typedef struct DisasContext { |
| 129 | 133 | int cpl; |
| 130 | 134 | int iopl; |
| 131 | 135 | int tf; /* TF cpu flag */ |
| 136 | + TranslationBlock *tb; | |
| 132 | 137 | } DisasContext; |
| 133 | 138 | |
| 134 | 139 | /* i386 arith/logic operations */ |
| ... | ... | @@ -192,6 +197,7 @@ enum { |
| 192 | 197 | typedef void (GenOpFunc)(void); |
| 193 | 198 | typedef void (GenOpFunc1)(long); |
| 194 | 199 | typedef void (GenOpFunc2)(long, long); |
| 200 | +typedef void (GenOpFunc3)(long, long, long); | |
| 195 | 201 | |
| 196 | 202 | static GenOpFunc *gen_op_mov_reg_T0[3][8] = { |
| 197 | 203 | [OT_BYTE] = { |
| ... | ... | @@ -699,18 +705,7 @@ enum { |
| 699 | 705 | JCC_LE, |
| 700 | 706 | }; |
| 701 | 707 | |
| 702 | -static GenOpFunc2 *gen_jcc_slow[8] = { | |
| 703 | - gen_op_jo_cc, | |
| 704 | - gen_op_jb_cc, | |
| 705 | - gen_op_jz_cc, | |
| 706 | - gen_op_jbe_cc, | |
| 707 | - gen_op_js_cc, | |
| 708 | - gen_op_jp_cc, | |
| 709 | - gen_op_jl_cc, | |
| 710 | - gen_op_jle_cc, | |
| 711 | -}; | |
| 712 | - | |
| 713 | -static GenOpFunc2 *gen_jcc_sub[3][8] = { | |
| 708 | +static GenOpFunc3 *gen_jcc_sub[3][8] = { | |
| 714 | 709 | [OT_BYTE] = { |
| 715 | 710 | NULL, |
| 716 | 711 | gen_op_jb_subb, |
| ... | ... | @@ -1090,8 +1085,9 @@ static inline uint32_t insn_get(DisasContext *s, int ot) |
| 1090 | 1085 | |
| 1091 | 1086 | static inline void gen_jcc(DisasContext *s, int b, int val, int next_eip) |
| 1092 | 1087 | { |
| 1088 | + TranslationBlock *tb; | |
| 1093 | 1089 | int inv, jcc_op; |
| 1094 | - GenOpFunc2 *func; | |
| 1090 | + GenOpFunc3 *func; | |
| 1095 | 1091 | |
| 1096 | 1092 | inv = b & 1; |
| 1097 | 1093 | jcc_op = (b >> 1) & 7; |
| ... | ... | @@ -1101,8 +1097,6 @@ static inline void gen_jcc(DisasContext *s, int b, int val, int next_eip) |
| 1101 | 1097 | case CC_OP_SUBW: |
| 1102 | 1098 | case CC_OP_SUBL: |
| 1103 | 1099 | func = gen_jcc_sub[s->cc_op - CC_OP_SUBB][jcc_op]; |
| 1104 | - if (!func) | |
| 1105 | - goto slow_jcc; | |
| 1106 | 1100 | break; |
| 1107 | 1101 | |
| 1108 | 1102 | /* some jumps are easy to compute */ |
| ... | ... | @@ -1138,21 +1132,30 @@ static inline void gen_jcc(DisasContext *s, int b, int val, int next_eip) |
| 1138 | 1132 | func = gen_jcc_sub[(s->cc_op - CC_OP_ADDB) % 3][jcc_op]; |
| 1139 | 1133 | break; |
| 1140 | 1134 | default: |
| 1141 | - goto slow_jcc; | |
| 1135 | + func = NULL; | |
| 1136 | + break; | |
| 1142 | 1137 | } |
| 1143 | 1138 | break; |
| 1144 | 1139 | default: |
| 1145 | - slow_jcc: | |
| 1146 | - if (s->cc_op != CC_OP_DYNAMIC) | |
| 1147 | - gen_op_set_cc_op(s->cc_op); | |
| 1148 | - func = gen_jcc_slow[jcc_op]; | |
| 1140 | + func = NULL; | |
| 1149 | 1141 | break; |
| 1150 | 1142 | } |
| 1143 | + | |
| 1144 | + if (s->cc_op != CC_OP_DYNAMIC) | |
| 1145 | + gen_op_set_cc_op(s->cc_op); | |
| 1146 | + | |
| 1147 | + if (!func) { | |
| 1148 | + gen_setcc_slow[jcc_op](); | |
| 1149 | + func = gen_op_jcc; | |
| 1150 | + } | |
| 1151 | + | |
| 1152 | + tb = s->tb; | |
| 1151 | 1153 | if (!inv) { |
| 1152 | - func(val, next_eip); | |
| 1154 | + func((long)tb, val, next_eip); | |
| 1153 | 1155 | } else { |
| 1154 | - func(next_eip, val); | |
| 1156 | + func((long)tb, next_eip, val); | |
| 1155 | 1157 | } |
| 1158 | + s->is_jmp = 3; | |
| 1156 | 1159 | } |
| 1157 | 1160 | |
| 1158 | 1161 | static void gen_setcc(DisasContext *s, int b) |
| ... | ... | @@ -1372,6 +1375,18 @@ static void gen_exception(DisasContext *s, int trapno, unsigned int cur_eip) |
| 1372 | 1375 | s->is_jmp = 1; |
| 1373 | 1376 | } |
| 1374 | 1377 | |
| 1378 | +/* generate a jump to eip. No segment change must happen before as a | |
| 1379 | + direct call to the next block may occur */ | |
| 1380 | +static void gen_jmp(DisasContext *s, unsigned int eip) | |
| 1381 | +{ | |
| 1382 | + TranslationBlock *tb = s->tb; | |
| 1383 | + | |
| 1384 | + if (s->cc_op != CC_OP_DYNAMIC) | |
| 1385 | + gen_op_set_cc_op(s->cc_op); | |
| 1386 | + gen_op_jmp_tb_next((long)tb, eip); | |
| 1387 | + s->is_jmp = 3; | |
| 1388 | +} | |
| 1389 | + | |
| 1375 | 1390 | /* return the next pc address. Return -1 if no insn found. *is_jmp_ptr |
| 1376 | 1391 | is set to true if the instruction sets the PC (last instruction of |
| 1377 | 1392 | a basic block) */ |
| ... | ... | @@ -2964,8 +2979,7 @@ long disas_insn(DisasContext *s, uint8_t *pc_start) |
| 2964 | 2979 | val &= 0xffff; |
| 2965 | 2980 | gen_op_movl_T0_im(next_eip); |
| 2966 | 2981 | gen_push_T0(s); |
| 2967 | - gen_op_jmp_im(val); | |
| 2968 | - s->is_jmp = 1; | |
| 2982 | + gen_jmp(s, val); | |
| 2969 | 2983 | } |
| 2970 | 2984 | break; |
| 2971 | 2985 | case 0x9a: /* lcall im */ |
| ... | ... | @@ -2996,8 +3010,7 @@ long disas_insn(DisasContext *s, uint8_t *pc_start) |
| 2996 | 3010 | val += s->pc - s->cs_base; |
| 2997 | 3011 | if (s->dflag == 0) |
| 2998 | 3012 | val = val & 0xffff; |
| 2999 | - gen_op_jmp_im(val); | |
| 3000 | - s->is_jmp = 1; | |
| 3013 | + gen_jmp(s, val); | |
| 3001 | 3014 | break; |
| 3002 | 3015 | case 0xea: /* ljmp im */ |
| 3003 | 3016 | { |
| ... | ... | @@ -3019,8 +3032,7 @@ long disas_insn(DisasContext *s, uint8_t *pc_start) |
| 3019 | 3032 | val += s->pc - s->cs_base; |
| 3020 | 3033 | if (s->dflag == 0) |
| 3021 | 3034 | val = val & 0xffff; |
| 3022 | - gen_op_jmp_im(val); | |
| 3023 | - s->is_jmp = 1; | |
| 3035 | + gen_jmp(s, val); | |
| 3024 | 3036 | break; |
| 3025 | 3037 | case 0x70 ... 0x7f: /* jcc Jb */ |
| 3026 | 3038 | val = (int8_t)insn_get(s, OT_BYTE); |
| ... | ... | @@ -3037,7 +3049,6 @@ long disas_insn(DisasContext *s, uint8_t *pc_start) |
| 3037 | 3049 | if (s->dflag == 0) |
| 3038 | 3050 | val &= 0xffff; |
| 3039 | 3051 | gen_jcc(s, b, val, next_eip); |
| 3040 | - s->is_jmp = 1; | |
| 3041 | 3052 | break; |
| 3042 | 3053 | |
| 3043 | 3054 | case 0x190 ... 0x19f: /* setcc Gv */ |
| ... | ... | @@ -3393,15 +3404,6 @@ static uint16_t opc_read_flags[NB_OPS] = { |
| 3393 | 3404 | |
| 3394 | 3405 | [INDEX_op_into] = CC_O, |
| 3395 | 3406 | |
| 3396 | - [INDEX_op_jo_cc] = CC_O, | |
| 3397 | - [INDEX_op_jb_cc] = CC_C, | |
| 3398 | - [INDEX_op_jz_cc] = CC_Z, | |
| 3399 | - [INDEX_op_jbe_cc] = CC_Z | CC_C, | |
| 3400 | - [INDEX_op_js_cc] = CC_S, | |
| 3401 | - [INDEX_op_jp_cc] = CC_P, | |
| 3402 | - [INDEX_op_jl_cc] = CC_O | CC_S, | |
| 3403 | - [INDEX_op_jle_cc] = CC_O | CC_S | CC_Z, | |
| 3404 | - | |
| 3405 | 3407 | [INDEX_op_jb_subb] = CC_C, |
| 3406 | 3408 | [INDEX_op_jb_subw] = CC_C, |
| 3407 | 3409 | [INDEX_op_jb_subl] = CC_C, |
| ... | ... | @@ -3730,7 +3732,7 @@ static uint32_t gen_opparam_buf[OPPARAM_BUF_SIZE]; |
| 3730 | 3732 | int cpu_x86_gen_code(uint8_t *gen_code_buf, int max_code_size, |
| 3731 | 3733 | int *gen_code_size_ptr, |
| 3732 | 3734 | uint8_t *pc_start, uint8_t *cs_base, int flags, |
| 3733 | - int *code_size_ptr) | |
| 3735 | + int *code_size_ptr, TranslationBlock *tb) | |
| 3734 | 3736 | { |
| 3735 | 3737 | DisasContext dc1, *dc = &dc1; |
| 3736 | 3738 | uint8_t *pc_ptr; |
| ... | ... | @@ -3750,6 +3752,7 @@ int cpu_x86_gen_code(uint8_t *gen_code_buf, int max_code_size, |
| 3750 | 3752 | dc->tf = (flags >> GEN_FLAG_TF_SHIFT) & 1; |
| 3751 | 3753 | dc->cc_op = CC_OP_DYNAMIC; |
| 3752 | 3754 | dc->cs_base = cs_base; |
| 3755 | + dc->tb = tb; | |
| 3753 | 3756 | |
| 3754 | 3757 | gen_opc_ptr = gen_opc_buf; |
| 3755 | 3758 | gen_opc_end = gen_opc_buf + OPC_MAX_SIZE; |
| ... | ... | @@ -3776,15 +3779,21 @@ int cpu_x86_gen_code(uint8_t *gen_code_buf, int max_code_size, |
| 3776 | 3779 | } while (!dc->is_jmp && gen_opc_ptr < gen_opc_end && |
| 3777 | 3780 | (pc_ptr - pc_start) < (TARGET_PAGE_SIZE - 32)); |
| 3778 | 3781 | /* we must store the eflags state if it is not already done */ |
| 3779 | - if (dc->cc_op != CC_OP_DYNAMIC) | |
| 3780 | - gen_op_set_cc_op(dc->cc_op); | |
| 3781 | - if (dc->is_jmp != 1) { | |
| 3782 | - /* we add an additionnal jmp to update the simulated PC */ | |
| 3783 | - gen_op_jmp_im(ret - (unsigned long)dc->cs_base); | |
| 3782 | + if (dc->is_jmp != 3) { | |
| 3783 | + if (dc->cc_op != CC_OP_DYNAMIC) | |
| 3784 | + gen_op_set_cc_op(dc->cc_op); | |
| 3785 | + if (dc->is_jmp != 1) { | |
| 3786 | + /* we add an additionnal jmp to update the simulated PC */ | |
| 3787 | + gen_op_jmp_im(ret - (unsigned long)dc->cs_base); | |
| 3788 | + } | |
| 3784 | 3789 | } |
| 3785 | 3790 | if (dc->tf) { |
| 3786 | 3791 | gen_op_raise_exception(EXCP01_SSTP); |
| 3787 | 3792 | } |
| 3793 | + if (dc->is_jmp != 3) { | |
| 3794 | + /* indicate that the hash table must be used to find the next TB */ | |
| 3795 | + gen_op_movl_T0_0(); | |
| 3796 | + } | |
| 3788 | 3797 | |
| 3789 | 3798 | *gen_opc_ptr = INDEX_op_end; |
| 3790 | 3799 | |
| ... | ... | @@ -3814,8 +3823,17 @@ int cpu_x86_gen_code(uint8_t *gen_code_buf, int max_code_size, |
| 3814 | 3823 | #endif |
| 3815 | 3824 | |
| 3816 | 3825 | /* generate machine code */ |
| 3817 | - gen_code_size = dyngen_code(gen_code_buf, gen_opc_buf, gen_opparam_buf); | |
| 3826 | + tb->tb_next_offset[0] = 0xffff; | |
| 3827 | + tb->tb_next_offset[1] = 0xffff; | |
| 3828 | + gen_code_size = dyngen_code(gen_code_buf, tb->tb_next_offset, | |
| 3829 | +#ifdef USE_DIRECT_JUMP | |
| 3830 | + tb->tb_jmp_offset, | |
| 3831 | +#else | |
| 3832 | + NULL, | |
| 3833 | +#endif | |
| 3834 | + gen_opc_buf, gen_opparam_buf); | |
| 3818 | 3835 | flush_icache_range((unsigned long)gen_code_buf, (unsigned long)(gen_code_buf + gen_code_size)); |
| 3836 | + | |
| 3819 | 3837 | *gen_code_size_ptr = gen_code_size; |
| 3820 | 3838 | *code_size_ptr = pc_ptr - pc_start; |
| 3821 | 3839 | #ifdef DEBUG_DISAS | ... | ... |