Commit d4e8164f7e9342d692c1d6f1c848ed05f8007ece
1 parent
08351fb3
direct chaining for PowerPC and i386
git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@183 c046a42c-6fe2-441c-8c8c-71466251a162
Showing
9 changed files
with
620 additions
and
210 deletions
dyngen.c
... | ... | @@ -170,7 +170,16 @@ void elf_swap_phdr(struct elf_phdr *h) |
170 | 170 | swabls(&h->p_align); /* Segment alignment */ |
171 | 171 | } |
172 | 172 | |
173 | +/* ELF file info */ | |
173 | 174 | int do_swap; |
175 | +struct elf_shdr *shdr; | |
176 | +struct elfhdr ehdr; | |
177 | +ElfW(Sym) *symtab; | |
178 | +int nb_syms; | |
179 | +char *strtab; | |
180 | +/* data section */ | |
181 | +uint8_t *data_data; | |
182 | +int data_shndx; | |
174 | 183 | |
175 | 184 | uint16_t get16(uint16_t *p) |
176 | 185 | { |
... | ... | @@ -270,7 +279,7 @@ int strstart(const char *str, const char *val, const char **ptr) |
270 | 279 | /* generate op code */ |
271 | 280 | void gen_code(const char *name, host_ulong offset, host_ulong size, |
272 | 281 | FILE *outfile, uint8_t *text, ELF_RELOC *relocs, int nb_relocs, int reloc_sh_type, |
273 | - ElfW(Sym) *symtab, char *strtab, int gen_switch) | |
282 | + int gen_switch) | |
274 | 283 | { |
275 | 284 | int copy_size = 0; |
276 | 285 | uint8_t *p_start, *p_end; |
... | ... | @@ -291,13 +300,16 @@ void gen_code(const char *name, host_ulong offset, host_ulong size, |
291 | 300 | switch(ELF_ARCH) { |
292 | 301 | case EM_386: |
293 | 302 | { |
294 | - uint8_t *p; | |
295 | - p = p_end - 1; | |
296 | - if (p == p_start) | |
303 | + int len; | |
304 | + len = p_end - p_start; | |
305 | + if (len == 0) | |
297 | 306 | error("empty code for %s", name); |
298 | - if (p[0] != 0xc3) | |
299 | - error("ret expected at the end of %s", name); | |
300 | - copy_size = p - p_start; | |
307 | + if (p_end[-1] == 0xc3) { | |
308 | + len--; | |
309 | + } else { | |
310 | + error("ret or jmp expected at the end of %s", name); | |
311 | + } | |
312 | + copy_size = len; | |
301 | 313 | } |
302 | 314 | break; |
303 | 315 | case EM_PPC: |
... | ... | @@ -423,7 +435,7 @@ void gen_code(const char *name, host_ulong offset, host_ulong size, |
423 | 435 | sym_name = strtab + symtab[ELFW(R_SYM)(rel->r_info)].st_name; |
424 | 436 | if (strstart(sym_name, "__op_param", &p)) { |
425 | 437 | n = strtoul(p, NULL, 10); |
426 | - if (n >= MAX_ARGS) | |
438 | + if (n > MAX_ARGS) | |
427 | 439 | error("too many arguments in %s", name); |
428 | 440 | args_present[n - 1] = 1; |
429 | 441 | } |
... | ... | @@ -459,7 +471,9 @@ void gen_code(const char *name, host_ulong offset, host_ulong size, |
459 | 471 | if (rel->r_offset >= start_offset && |
460 | 472 | rel->r_offset < start_offset + copy_size) { |
461 | 473 | sym_name = strtab + symtab[ELFW(R_SYM)(rel->r_info)].st_name; |
462 | - if (*sym_name && !strstart(sym_name, "__op_param", &p)) { | |
474 | + if (*sym_name && | |
475 | + !strstart(sym_name, "__op_param", NULL) && | |
476 | + !strstart(sym_name, "__op_jmp", NULL)) { | |
463 | 477 | #if defined(HOST_SPARC) |
464 | 478 | if (sym_name[0] == '.') { |
465 | 479 | fprintf(outfile, |
... | ... | @@ -474,6 +488,31 @@ void gen_code(const char *name, host_ulong offset, host_ulong size, |
474 | 488 | } |
475 | 489 | |
476 | 490 | fprintf(outfile, " memcpy(gen_code_ptr, (void *)((char *)&%s+%d), %d);\n", name, start_offset - offset, copy_size); |
491 | + | |
492 | + /* emit code offset information */ | |
493 | + { | |
494 | + ElfW(Sym) *sym; | |
495 | + const char *sym_name, *p; | |
496 | + target_ulong val; | |
497 | + int n; | |
498 | + | |
499 | + for(i = 0, sym = symtab; i < nb_syms; i++, sym++) { | |
500 | + sym_name = strtab + sym->st_name; | |
501 | + if (strstart(sym_name, "__op_label", &p)) { | |
502 | + /* test if the variable refers to a label inside | |
503 | + the code we are generating */ | |
504 | + if (sym->st_shndx != data_shndx) | |
505 | + error("__op_labelN symbols must be in .data or .sdata section"); | |
506 | + val = *(target_ulong *)(data_data + sym->st_value); | |
507 | + if (val >= start_offset && val < start_offset + copy_size) { | |
508 | + n = strtol(p, NULL, 10); | |
509 | + fprintf(outfile, " label_offsets[%d] = %d + (gen_code_ptr - gen_code_buf);\n", n, val - start_offset); | |
510 | + } | |
511 | + } | |
512 | + } | |
513 | + } | |
514 | + | |
515 | + /* load parameres in variables */ | |
477 | 516 | for(i = 0; i < nb_args; i++) { |
478 | 517 | fprintf(outfile, " param%d = *opparam_ptr++;\n", i + 1); |
479 | 518 | } |
... | ... | @@ -519,6 +558,18 @@ void gen_code(const char *name, host_ulong offset, host_ulong size, |
519 | 558 | if (rel->r_offset >= start_offset && |
520 | 559 | rel->r_offset < start_offset + copy_size) { |
521 | 560 | sym_name = strtab + symtab[ELFW(R_SYM)(rel->r_info)].st_name; |
561 | + if (strstart(sym_name, "__op_jmp", &p)) { | |
562 | + int n; | |
563 | + n = strtol(p, NULL, 10); | |
564 | + /* __op_jmp relocations are done at | |
565 | + runtime to do translated block | |
566 | + chaining: the offset of the instruction | |
567 | + needs to be stored */ | |
568 | + fprintf(outfile, " jmp_offsets[%d] = %d + (gen_code_ptr - gen_code_buf);\n", | |
569 | + n, rel->r_offset - start_offset); | |
570 | + continue; | |
571 | + } | |
572 | + | |
522 | 573 | if (strstart(sym_name, "__op_param", &p)) { |
523 | 574 | snprintf(name, sizeof(name), "param%s", p); |
524 | 575 | } else { |
... | ... | @@ -824,11 +875,10 @@ void gen_code(const char *name, host_ulong offset, host_ulong size, |
824 | 875 | int load_elf(const char *filename, FILE *outfile, int do_print_enum) |
825 | 876 | { |
826 | 877 | int fd; |
827 | - struct elfhdr ehdr; | |
828 | - struct elf_shdr *sec, *shdr, *symtab_sec, *strtab_sec, *text_sec; | |
829 | - int i, j, nb_syms; | |
830 | - ElfW(Sym) *symtab, *sym; | |
831 | - char *shstr, *strtab; | |
878 | + struct elf_shdr *sec, *symtab_sec, *strtab_sec, *text_sec; | |
879 | + int i, j; | |
880 | + ElfW(Sym) *sym; | |
881 | + char *shstr, *data_name; | |
832 | 882 | uint8_t *text; |
833 | 883 | void *relocs; |
834 | 884 | int nb_relocs, reloc_sh_type; |
... | ... | @@ -880,6 +930,17 @@ int load_elf(const char *filename, FILE *outfile, int do_print_enum) |
880 | 930 | error("could not find .text section"); |
881 | 931 | text = load_data(fd, text_sec->sh_offset, text_sec->sh_size); |
882 | 932 | |
933 | +#if defined(HOST_PPC) | |
934 | + data_name = ".sdata"; | |
935 | +#else | |
936 | + data_name = ".data"; | |
937 | +#endif | |
938 | + sec = find_elf_section(shdr, ehdr.e_shnum, shstr, data_name); | |
939 | + if (!sec) | |
940 | + error("could not find %s section", data_name); | |
941 | + data_shndx = sec - shdr; | |
942 | + data_data = load_data(fd, sec->sh_offset, sec->sh_size); | |
943 | + | |
883 | 944 | /* find text relocations, if any */ |
884 | 945 | nb_relocs = 0; |
885 | 946 | relocs = NULL; |
... | ... | @@ -936,7 +997,7 @@ int load_elf(const char *filename, FILE *outfile, int do_print_enum) |
936 | 997 | name = strtab + sym->st_name; |
937 | 998 | if (strstart(name, OP_PREFIX, &p)) { |
938 | 999 | gen_code(name, sym->st_value, sym->st_size, outfile, |
939 | - text, relocs, nb_relocs, reloc_sh_type, symtab, strtab, 2); | |
1000 | + text, relocs, nb_relocs, reloc_sh_type, 2); | |
940 | 1001 | } |
941 | 1002 | } |
942 | 1003 | } else { |
... | ... | @@ -963,6 +1024,7 @@ fprintf(outfile, |
963 | 1024 | #endif |
964 | 1025 | fprintf(outfile, |
965 | 1026 | "int dyngen_code(uint8_t *gen_code_buf,\n" |
1027 | +" uint16_t *label_offsets, uint16_t *jmp_offsets,\n" | |
966 | 1028 | " const uint16_t *opc_buf, const uint32_t *opparam_buf)\n" |
967 | 1029 | "{\n" |
968 | 1030 | " uint8_t *gen_code_ptr;\n" |
... | ... | @@ -1001,7 +1063,7 @@ fprintf(outfile, |
1001 | 1063 | if (sym->st_shndx != (text_sec - shdr)) |
1002 | 1064 | error("invalid section for opcode (0x%x)", sym->st_shndx); |
1003 | 1065 | gen_code(name, sym->st_value, sym->st_size, outfile, |
1004 | - text, relocs, nb_relocs, reloc_sh_type, symtab, strtab, 1); | |
1066 | + text, relocs, nb_relocs, reloc_sh_type, 1); | |
1005 | 1067 | } |
1006 | 1068 | } |
1007 | 1069 | |
... | ... | @@ -1056,7 +1118,7 @@ fprintf(outfile, |
1056 | 1118 | if (sym->st_shndx != (text_sec - shdr)) |
1057 | 1119 | error("invalid section for opcode (0x%x)", sym->st_shndx); |
1058 | 1120 | gen_code(name, sym->st_value, sym->st_size, outfile, |
1059 | - text, relocs, nb_relocs, reloc_sh_type, symtab, strtab, 0); | |
1121 | + text, relocs, nb_relocs, reloc_sh_type, 0); | |
1060 | 1122 | } |
1061 | 1123 | } |
1062 | 1124 | } | ... | ... |
exec-i386.c
... | ... | @@ -120,7 +120,7 @@ int cpu_x86_exec(CPUX86State *env1) |
120 | 120 | TranslationBlock *tb, **ptb; |
121 | 121 | uint8_t *tc_ptr, *cs_base, *pc; |
122 | 122 | unsigned int flags; |
123 | - | |
123 | + | |
124 | 124 | /* first we save global registers */ |
125 | 125 | saved_T0 = T0; |
126 | 126 | saved_T1 = T1; |
... | ... | @@ -169,6 +169,7 @@ int cpu_x86_exec(CPUX86State *env1) |
169 | 169 | |
170 | 170 | /* prepare setjmp context for exception handling */ |
171 | 171 | if (setjmp(env->jmp_env) == 0) { |
172 | + T0 = 0; /* force lookup of first TB */ | |
172 | 173 | for(;;) { |
173 | 174 | if (env->interrupt_request) { |
174 | 175 | raise_exception(EXCP_INTERRUPT); |
... | ... | @@ -209,30 +210,40 @@ int cpu_x86_exec(CPUX86State *env1) |
209 | 210 | flags |= (env->eflags & TF_MASK) << (GEN_FLAG_TF_SHIFT - 8); |
210 | 211 | cs_base = env->seg_cache[R_CS].base; |
211 | 212 | pc = cs_base + env->eip; |
213 | + spin_lock(&tb_lock); | |
212 | 214 | tb = tb_find(&ptb, (unsigned long)pc, (unsigned long)cs_base, |
213 | 215 | flags); |
214 | 216 | if (!tb) { |
215 | 217 | /* if no translated code available, then translate it now */ |
216 | - /* very inefficient but safe: we lock all the cpus | |
217 | - when generating code */ | |
218 | - spin_lock(&tb_lock); | |
218 | + tb = tb_alloc((unsigned long)pc); | |
219 | + if (!tb) { | |
220 | + /* flush must be done */ | |
221 | + tb_flush(); | |
222 | + /* cannot fail at this point */ | |
223 | + tb = tb_alloc((unsigned long)pc); | |
224 | + /* don't forget to invalidate previous TB info */ | |
225 | + ptb = &tb_hash[tb_hash_func((unsigned long)pc)]; | |
226 | + T0 = 0; | |
227 | + } | |
219 | 228 | tc_ptr = code_gen_ptr; |
229 | + tb->tc_ptr = tc_ptr; | |
220 | 230 | ret = cpu_x86_gen_code(code_gen_ptr, CODE_GEN_MAX_SIZE, |
221 | 231 | &code_gen_size, pc, cs_base, flags, |
222 | - &code_size); | |
232 | + &code_size, tb); | |
223 | 233 | /* if invalid instruction, signal it */ |
224 | 234 | if (ret != 0) { |
235 | + /* NOTE: the tb is allocated but not linked, so we | |
236 | + can leave it */ | |
225 | 237 | spin_unlock(&tb_lock); |
226 | 238 | raise_exception(EXCP06_ILLOP); |
227 | 239 | } |
228 | - tb = tb_alloc((unsigned long)pc, code_size); | |
229 | 240 | *ptb = tb; |
241 | + tb->size = code_size; | |
230 | 242 | tb->cs_base = (unsigned long)cs_base; |
231 | 243 | tb->flags = flags; |
232 | - tb->tc_ptr = tc_ptr; | |
233 | 244 | tb->hash_next = NULL; |
245 | + tb_link(tb); | |
234 | 246 | code_gen_ptr = (void *)(((unsigned long)code_gen_ptr + code_gen_size + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1)); |
235 | - spin_unlock(&tb_lock); | |
236 | 247 | } |
237 | 248 | #ifdef DEBUG_EXEC |
238 | 249 | if (loglevel) { |
... | ... | @@ -241,14 +252,21 @@ int cpu_x86_exec(CPUX86State *env1) |
241 | 252 | lookup_symbol((void *)tb->pc)); |
242 | 253 | } |
243 | 254 | #endif |
244 | - /* execute the generated code */ | |
255 | + | |
256 | + /* see if we can patch the calling TB */ | |
257 | + if (T0 != 0 && !(env->eflags & TF_MASK)) { | |
258 | + tb_add_jump((TranslationBlock *)(T0 & ~3), T0 & 3, tb); | |
259 | + } | |
245 | 260 | tc_ptr = tb->tc_ptr; |
261 | + spin_unlock(&tb_lock); | |
262 | + | |
263 | + /* execute the generated code */ | |
246 | 264 | gen_func = (void *)tc_ptr; |
247 | 265 | #ifdef __sparc__ |
248 | 266 | __asm__ __volatile__("call %0\n\t" |
249 | 267 | " mov %%o7,%%i0" |
250 | 268 | : /* no outputs */ |
251 | - : "r" (gen_func) | |
269 | + : "r" (gen_func) | |
252 | 270 | : "i0", "i1", "i2", "i3", "i4", "i5"); |
253 | 271 | #else |
254 | 272 | gen_func(); | ... | ... |
exec-i386.h
... | ... | @@ -205,8 +205,10 @@ extern int __op_param1, __op_param2, __op_param3; |
205 | 205 | #define PARAM2 ((long)(&__op_param2)) |
206 | 206 | #define PARAM3 ((long)(&__op_param3)) |
207 | 207 | #endif |
208 | +extern int __op_jmp0, __op_jmp1; | |
208 | 209 | |
209 | 210 | #include "cpu-i386.h" |
211 | +#include "exec.h" | |
210 | 212 | |
211 | 213 | typedef struct CCTable { |
212 | 214 | int (*compute_all)(void); /* return all the flags */ | ... | ... |
exec.c
... | ... | @@ -27,6 +27,7 @@ |
27 | 27 | #include <sys/mman.h> |
28 | 28 | |
29 | 29 | #include "cpu-i386.h" |
30 | +#include "exec.h" | |
30 | 31 | |
31 | 32 | //#define DEBUG_TB_INVALIDATE |
32 | 33 | #define DEBUG_FLUSH |
... | ... | @@ -212,6 +213,7 @@ static void page_flush_tb(void) |
212 | 213 | } |
213 | 214 | |
214 | 215 | /* flush all the translation blocks */ |
216 | +/* XXX: tb_flush is currently not thread safe */ | |
215 | 217 | void tb_flush(void) |
216 | 218 | { |
217 | 219 | int i; |
... | ... | @@ -226,7 +228,8 @@ void tb_flush(void) |
226 | 228 | tb_hash[i] = NULL; |
227 | 229 | page_flush_tb(); |
228 | 230 | code_gen_ptr = code_gen_buffer; |
229 | - /* XXX: flush processor icache at this point */ | |
231 | + /* XXX: flush processor icache at this point if cache flush is | |
232 | + expensive */ | |
230 | 233 | } |
231 | 234 | |
232 | 235 | #ifdef DEBUG_TB_CHECK |
... | ... | @@ -265,6 +268,26 @@ static void tb_page_check(void) |
265 | 268 | } |
266 | 269 | } |
267 | 270 | |
271 | +void tb_jmp_check(TranslationBlock *tb) | |
272 | +{ | |
273 | + TranslationBlock *tb1; | |
274 | + unsigned int n1; | |
275 | + | |
276 | + /* suppress any remaining jumps to this TB */ | |
277 | + tb1 = tb->jmp_first; | |
278 | + for(;;) { | |
279 | + n1 = (long)tb1 & 3; | |
280 | + tb1 = (TranslationBlock *)((long)tb1 & ~3); | |
281 | + if (n1 == 2) | |
282 | + break; | |
283 | + tb1 = tb1->jmp_next[n1]; | |
284 | + } | |
285 | + /* check end of list */ | |
286 | + if (tb1 != tb) { | |
287 | + printf("ERROR: jmp_list from 0x%08lx\n", (long)tb); | |
288 | + } | |
289 | +} | |
290 | + | |
268 | 291 | #endif |
269 | 292 | |
270 | 293 | /* invalidate one TB */ |
... | ... | @@ -282,12 +305,48 @@ static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb, |
282 | 305 | } |
283 | 306 | } |
284 | 307 | |
308 | +static inline void tb_jmp_remove(TranslationBlock *tb, int n) | |
309 | +{ | |
310 | + TranslationBlock *tb1, **ptb; | |
311 | + unsigned int n1; | |
312 | + | |
313 | + ptb = &tb->jmp_next[n]; | |
314 | + tb1 = *ptb; | |
315 | + if (tb1) { | |
316 | + /* find tb(n) in circular list */ | |
317 | + for(;;) { | |
318 | + tb1 = *ptb; | |
319 | + n1 = (long)tb1 & 3; | |
320 | + tb1 = (TranslationBlock *)((long)tb1 & ~3); | |
321 | + if (n1 == n && tb1 == tb) | |
322 | + break; | |
323 | + if (n1 == 2) { | |
324 | + ptb = &tb1->jmp_first; | |
325 | + } else { | |
326 | + ptb = &tb1->jmp_next[n1]; | |
327 | + } | |
328 | + } | |
329 | + /* now we can suppress tb(n) from the list */ | |
330 | + *ptb = tb->jmp_next[n]; | |
331 | + | |
332 | + tb->jmp_next[n] = NULL; | |
333 | + } | |
334 | +} | |
335 | + | |
336 | +/* reset the jump entry 'n' of a TB so that it is not chained to | |
337 | + another TB */ | |
338 | +static inline void tb_reset_jump(TranslationBlock *tb, int n) | |
339 | +{ | |
340 | + tb_set_jmp_target(tb, n, (unsigned long)(tb->tc_ptr + tb->tb_next_offset[n])); | |
341 | +} | |
342 | + | |
285 | 343 | static inline void tb_invalidate(TranslationBlock *tb, int parity) |
286 | 344 | { |
287 | 345 | PageDesc *p; |
288 | 346 | unsigned int page_index1, page_index2; |
289 | - unsigned int h; | |
290 | - | |
347 | + unsigned int h, n1; | |
348 | + TranslationBlock *tb1, *tb2; | |
349 | + | |
291 | 350 | /* remove the TB from the hash list */ |
292 | 351 | h = tb_hash_func(tb->pc); |
293 | 352 | tb_remove(&tb_hash[h], tb, |
... | ... | @@ -305,6 +364,24 @@ static inline void tb_invalidate(TranslationBlock *tb, int parity) |
305 | 364 | tb_remove(&p->first_tb, tb, |
306 | 365 | offsetof(TranslationBlock, page_next[page_index2 & 1])); |
307 | 366 | } |
367 | + | |
368 | + /* suppress this TB from the two jump lists */ | |
369 | + tb_jmp_remove(tb, 0); | |
370 | + tb_jmp_remove(tb, 1); | |
371 | + | |
372 | + /* suppress any remaining jumps to this TB */ | |
373 | + tb1 = tb->jmp_first; | |
374 | + for(;;) { | |
375 | + n1 = (long)tb1 & 3; | |
376 | + if (n1 == 2) | |
377 | + break; | |
378 | + tb1 = (TranslationBlock *)((long)tb1 & ~3); | |
379 | + tb2 = tb1->jmp_next[n1]; | |
380 | + tb_reset_jump(tb1, n1); | |
381 | + tb1->jmp_next[n1] = NULL; | |
382 | + tb1 = tb2; | |
383 | + } | |
384 | + tb->jmp_first = (TranslationBlock *)((long)tb | 2); /* fail safe */ | |
308 | 385 | } |
309 | 386 | |
310 | 387 | /* invalidate all TBs which intersect with the target page starting at addr */ |
... | ... | @@ -367,27 +444,39 @@ static inline void tb_alloc_page(TranslationBlock *tb, unsigned int page_index) |
367 | 444 | |
368 | 445 | /* Allocate a new translation block. Flush the translation buffer if |
369 | 446 | too many translation blocks or too much generated code. */ |
370 | -TranslationBlock *tb_alloc(unsigned long pc, | |
371 | - unsigned long size) | |
447 | +TranslationBlock *tb_alloc(unsigned long pc) | |
372 | 448 | { |
373 | 449 | TranslationBlock *tb; |
374 | - unsigned int page_index1, page_index2; | |
375 | 450 | |
376 | 451 | if (nb_tbs >= CODE_GEN_MAX_BLOCKS || |
377 | 452 | (code_gen_ptr - code_gen_buffer) >= CODE_GEN_BUFFER_MAX_SIZE) |
378 | - tb_flush(); | |
453 | + return NULL; | |
379 | 454 | tb = &tbs[nb_tbs++]; |
380 | 455 | tb->pc = pc; |
381 | - tb->size = size; | |
456 | + return tb; | |
457 | +} | |
458 | + | |
459 | +/* link the tb with the other TBs */ | |
460 | +void tb_link(TranslationBlock *tb) | |
461 | +{ | |
462 | + unsigned int page_index1, page_index2; | |
382 | 463 | |
383 | 464 | /* add in the page list */ |
384 | - page_index1 = pc >> TARGET_PAGE_BITS; | |
465 | + page_index1 = tb->pc >> TARGET_PAGE_BITS; | |
385 | 466 | tb_alloc_page(tb, page_index1); |
386 | - page_index2 = (pc + size - 1) >> TARGET_PAGE_BITS; | |
467 | + page_index2 = (tb->pc + tb->size - 1) >> TARGET_PAGE_BITS; | |
387 | 468 | if (page_index2 != page_index1) { |
388 | 469 | tb_alloc_page(tb, page_index2); |
389 | 470 | } |
390 | - return tb; | |
471 | + tb->jmp_first = (TranslationBlock *)((long)tb | 2); | |
472 | + tb->jmp_next[0] = NULL; | |
473 | + tb->jmp_next[1] = NULL; | |
474 | + | |
475 | + /* init original jump addresses */ | |
476 | + if (tb->tb_next_offset[0] != 0xffff) | |
477 | + tb_reset_jump(tb, 0); | |
478 | + if (tb->tb_next_offset[1] != 0xffff) | |
479 | + tb_reset_jump(tb, 1); | |
391 | 480 | } |
392 | 481 | |
393 | 482 | /* called from signal handler: invalidate the code and unprotect the | ... | ... |
exec.h
0 โ 100644
1 | +/* | |
2 | + * internal execution defines for qemu | |
3 | + * | |
4 | + * Copyright (c) 2003 Fabrice Bellard | |
5 | + * | |
6 | + * This library is free software; you can redistribute it and/or | |
7 | + * modify it under the terms of the GNU Lesser General Public | |
8 | + * License as published by the Free Software Foundation; either | |
9 | + * version 2 of the License, or (at your option) any later version. | |
10 | + * | |
11 | + * This library is distributed in the hope that it will be useful, | |
12 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | + * Lesser General Public License for more details. | |
15 | + * | |
16 | + * You should have received a copy of the GNU Lesser General Public | |
17 | + * License along with this library; if not, write to the Free Software | |
18 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
19 | + */ | |
20 | + | |
21 | +#define GEN_FLAG_CODE32_SHIFT 0 | |
22 | +#define GEN_FLAG_ADDSEG_SHIFT 1 | |
23 | +#define GEN_FLAG_SS32_SHIFT 2 | |
24 | +#define GEN_FLAG_VM_SHIFT 3 | |
25 | +#define GEN_FLAG_ST_SHIFT 4 | |
26 | +#define GEN_FLAG_CPL_SHIFT 7 | |
27 | +#define GEN_FLAG_IOPL_SHIFT 9 | |
28 | +#define GEN_FLAG_TF_SHIFT 11 | |
29 | + | |
30 | +struct TranslationBlock; | |
31 | +int cpu_x86_gen_code(uint8_t *gen_code_buf, int max_code_size, | |
32 | + int *gen_code_size_ptr, | |
33 | + uint8_t *pc_start, uint8_t *cs_base, int flags, | |
34 | + int *code_size_ptr, struct TranslationBlock *tb); | |
35 | +void cpu_x86_tblocks_init(void); | |
36 | +void page_init(void); | |
37 | +int page_unprotect(unsigned long address); | |
38 | + | |
39 | +#define CODE_GEN_MAX_SIZE 65536 | |
40 | +#define CODE_GEN_ALIGN 16 /* must be >= of the size of a icache line */ | |
41 | + | |
42 | +#define CODE_GEN_HASH_BITS 15 | |
43 | +#define CODE_GEN_HASH_SIZE (1 << CODE_GEN_HASH_BITS) | |
44 | + | |
45 | +/* maximum total translate dcode allocated */ | |
46 | +#define CODE_GEN_BUFFER_SIZE (2048 * 1024) | |
47 | +//#define CODE_GEN_BUFFER_SIZE (128 * 1024) | |
48 | + | |
49 | +#if defined(__powerpc__) | |
50 | +#define USE_DIRECT_JUMP | |
51 | +#endif | |
52 | + | |
53 | +typedef struct TranslationBlock { | |
54 | + unsigned long pc; /* simulated PC corresponding to this block (EIP + CS base) */ | |
55 | + unsigned long cs_base; /* CS base for this block */ | |
56 | + unsigned int flags; /* flags defining in which context the code was generated */ | |
57 | + uint16_t size; /* size of target code for this block (1 <= | |
58 | + size <= TARGET_PAGE_SIZE) */ | |
59 | + uint8_t *tc_ptr; /* pointer to the translated code */ | |
60 | + struct TranslationBlock *hash_next; /* next matching block */ | |
61 | + struct TranslationBlock *page_next[2]; /* next blocks in even/odd page */ | |
62 | + /* the following data are used to directly call another TB from | |
63 | + the code of this one. */ | |
64 | + uint16_t tb_next_offset[2]; /* offset of original jump target */ | |
65 | +#ifdef USE_DIRECT_JUMP | |
66 | + uint16_t tb_jmp_offset[2]; /* offset of jump instruction */ | |
67 | +#else | |
68 | + uint8_t *tb_next[2]; /* address of jump generated code */ | |
69 | +#endif | |
70 | + /* list of TBs jumping to this one. This is a circular list using | |
71 | + the two least significant bits of the pointers to tell what is | |
72 | + the next pointer: 0 = jmp_next[0], 1 = jmp_next[1], 2 = | |
73 | + jmp_first */ | |
74 | + struct TranslationBlock *jmp_next[2]; | |
75 | + struct TranslationBlock *jmp_first; | |
76 | +} TranslationBlock; | |
77 | + | |
78 | +static inline unsigned int tb_hash_func(unsigned long pc) | |
79 | +{ | |
80 | + return pc & (CODE_GEN_HASH_SIZE - 1); | |
81 | +} | |
82 | + | |
83 | +TranslationBlock *tb_alloc(unsigned long pc); | |
84 | +void tb_flush(void); | |
85 | +void tb_link(TranslationBlock *tb); | |
86 | + | |
87 | +extern TranslationBlock *tb_hash[CODE_GEN_HASH_SIZE]; | |
88 | + | |
89 | +extern uint8_t code_gen_buffer[CODE_GEN_BUFFER_SIZE]; | |
90 | +extern uint8_t *code_gen_ptr; | |
91 | + | |
92 | +/* find a translation block in the translation cache. If not found, | |
93 | + return NULL and the pointer to the last element of the list in pptb */ | |
94 | +static inline TranslationBlock *tb_find(TranslationBlock ***pptb, | |
95 | + unsigned long pc, | |
96 | + unsigned long cs_base, | |
97 | + unsigned int flags) | |
98 | +{ | |
99 | + TranslationBlock **ptb, *tb; | |
100 | + unsigned int h; | |
101 | + | |
102 | + h = tb_hash_func(pc); | |
103 | + ptb = &tb_hash[h]; | |
104 | + for(;;) { | |
105 | + tb = *ptb; | |
106 | + if (!tb) | |
107 | + break; | |
108 | + if (tb->pc == pc && tb->cs_base == cs_base && tb->flags == flags) | |
109 | + return tb; | |
110 | + ptb = &tb->hash_next; | |
111 | + } | |
112 | + *pptb = ptb; | |
113 | + return NULL; | |
114 | +} | |
115 | + | |
116 | +#if defined(__powerpc__) | |
117 | + | |
118 | +static inline void tb_set_jmp_target(TranslationBlock *tb, | |
119 | + int n, unsigned long addr) | |
120 | +{ | |
121 | + uint32_t val, *ptr; | |
122 | + unsigned long offset; | |
123 | + | |
124 | + offset = (unsigned long)(tb->tc_ptr + tb->tb_jmp_offset[n]); | |
125 | + | |
126 | + /* patch the branch destination */ | |
127 | + ptr = (uint32_t *)offset; | |
128 | + val = *ptr; | |
129 | + val = (val & ~0x03fffffc) | ((addr - offset) & 0x03fffffc); | |
130 | + *ptr = val; | |
131 | + /* flush icache */ | |
132 | + asm volatile ("dcbst 0,%0" : : "r"(ptr) : "memory"); | |
133 | + asm volatile ("sync" : : : "memory"); | |
134 | + asm volatile ("icbi 0,%0" : : "r"(ptr) : "memory"); | |
135 | + asm volatile ("sync" : : : "memory"); | |
136 | + asm volatile ("isync" : : : "memory"); | |
137 | +} | |
138 | + | |
139 | +#else | |
140 | + | |
141 | +/* set the jump target */ | |
142 | +static inline void tb_set_jmp_target(TranslationBlock *tb, | |
143 | + int n, unsigned long addr) | |
144 | +{ | |
145 | + tb->tb_next[n] = (void *)addr; | |
146 | +} | |
147 | + | |
148 | +#endif | |
149 | + | |
150 | +static inline void tb_add_jump(TranslationBlock *tb, int n, | |
151 | + TranslationBlock *tb_next) | |
152 | +{ | |
153 | + /* patch the native jump address */ | |
154 | + tb_set_jmp_target(tb, n, (unsigned long)tb_next->tc_ptr); | |
155 | + | |
156 | + /* add in TB jmp circular list */ | |
157 | + tb->jmp_next[n] = tb_next->jmp_first; | |
158 | + tb_next->jmp_first = (TranslationBlock *)((long)(tb) | (n)); | |
159 | +} | |
160 | + | |
161 | +#ifndef offsetof | |
162 | +#define offsetof(type, field) ((size_t) &((type *)0)->field) | |
163 | +#endif | |
164 | + | |
165 | +#ifdef __powerpc__ | |
166 | +static inline int testandset (int *p) | |
167 | +{ | |
168 | + int ret; | |
169 | + __asm__ __volatile__ ( | |
170 | + "0: lwarx %0,0,%1 ;" | |
171 | + " xor. %0,%3,%0;" | |
172 | + " bne 1f;" | |
173 | + " stwcx. %2,0,%1;" | |
174 | + " bne- 0b;" | |
175 | + "1: " | |
176 | + : "=&r" (ret) | |
177 | + : "r" (p), "r" (1), "r" (0) | |
178 | + : "cr0", "memory"); | |
179 | + return ret; | |
180 | +} | |
181 | +#endif | |
182 | + | |
183 | +#ifdef __i386__ | |
184 | +static inline int testandset (int *p) | |
185 | +{ | |
186 | + char ret; | |
187 | + long int readval; | |
188 | + | |
189 | + __asm__ __volatile__ ("lock; cmpxchgl %3, %1; sete %0" | |
190 | + : "=q" (ret), "=m" (*p), "=a" (readval) | |
191 | + : "r" (1), "m" (*p), "a" (0) | |
192 | + : "memory"); | |
193 | + return ret; | |
194 | +} | |
195 | +#endif | |
196 | + | |
197 | +#ifdef __s390__ | |
198 | +static inline int testandset (int *p) | |
199 | +{ | |
200 | + int ret; | |
201 | + | |
202 | + __asm__ __volatile__ ("0: cs %0,%1,0(%2)\n" | |
203 | + " jl 0b" | |
204 | + : "=&d" (ret) | |
205 | + : "r" (1), "a" (p), "0" (*p) | |
206 | + : "cc", "memory" ); | |
207 | + return ret; | |
208 | +} | |
209 | +#endif | |
210 | + | |
211 | +#ifdef __alpha__ | |
212 | +int testandset (int *p) | |
213 | +{ | |
214 | + int ret; | |
215 | + unsigned long one; | |
216 | + | |
217 | + __asm__ __volatile__ ("0: mov 1,%2\n" | |
218 | + " ldl_l %0,%1\n" | |
219 | + " stl_c %2,%1\n" | |
220 | + " beq %2,1f\n" | |
221 | + ".subsection 2\n" | |
222 | + "1: br 0b\n" | |
223 | + ".previous" | |
224 | + : "=r" (ret), "=m" (*p), "=r" (one) | |
225 | + : "m" (*p)); | |
226 | + return ret; | |
227 | +} | |
228 | +#endif | |
229 | + | |
230 | +#ifdef __sparc__ | |
231 | +static inline int testandset (int *p) | |
232 | +{ | |
233 | + int ret; | |
234 | + | |
235 | + __asm__ __volatile__("ldstub [%1], %0" | |
236 | + : "=r" (ret) | |
237 | + : "r" (p) | |
238 | + : "memory"); | |
239 | + | |
240 | + return (ret ? 1 : 0); | |
241 | +} | |
242 | +#endif | |
243 | + | |
244 | +typedef int spinlock_t; | |
245 | + | |
246 | +#define SPIN_LOCK_UNLOCKED 0 | |
247 | + | |
248 | +static inline void spin_lock(spinlock_t *lock) | |
249 | +{ | |
250 | + while (testandset(lock)); | |
251 | +} | |
252 | + | |
253 | +static inline void spin_unlock(spinlock_t *lock) | |
254 | +{ | |
255 | + *lock = 0; | |
256 | +} | |
257 | + | |
258 | +static inline int spin_trylock(spinlock_t *lock) | |
259 | +{ | |
260 | + return !testandset(lock); | |
261 | +} | |
262 | + | |
263 | +extern spinlock_t tb_lock; | |
264 | + | ... | ... |
op-i386.c
... | ... | @@ -709,7 +709,44 @@ void OPPROTO op_cmpxchg8b(void) |
709 | 709 | FORCE_RET(); |
710 | 710 | } |
711 | 711 | |
712 | -/* string ops */ | |
712 | +#if defined(__powerpc__) | |
713 | + | |
714 | +/* on PowerPC we patch the jump instruction directly */ | |
715 | +#define JUMP_TB(tbparam, n, eip)\ | |
716 | +do {\ | |
717 | + static void __attribute__((unused)) *__op_label ## n = &&label ## n;\ | |
718 | + asm volatile ("b %0" : : "i" (&__op_jmp ## n));\ | |
719 | +label ## n:\ | |
720 | + T0 = (long)(tbparam) + (n);\ | |
721 | + EIP = eip;\ | |
722 | +} while (0) | |
723 | + | |
724 | +#else | |
725 | + | |
726 | +/* jump to next block operations (more portable code, does not need | |
727 | + cache flushing, but slower because of indirect jump) */ | |
728 | +#define JUMP_TB(tbparam, n, eip)\ | |
729 | +do {\ | |
730 | + static void __attribute__((unused)) *__op_label ## n = &&label ## n;\ | |
731 | + goto *((TranslationBlock *)tbparam)->tb_next[n];\ | |
732 | +label ## n:\ | |
733 | + T0 = (long)(tbparam) + (n);\ | |
734 | + EIP = eip;\ | |
735 | +} while (0) | |
736 | + | |
737 | +#endif | |
738 | + | |
739 | +void OPPROTO op_jmp_tb_next(void) | |
740 | +{ | |
741 | + JUMP_TB(PARAM1, 0, PARAM2); | |
742 | +} | |
743 | + | |
744 | +void OPPROTO op_movl_T0_0(void) | |
745 | +{ | |
746 | + T0 = 0; | |
747 | +} | |
748 | + | |
749 | +/* multiple size ops */ | |
713 | 750 | |
714 | 751 | #define ldul ldl |
715 | 752 | |
... | ... | @@ -1199,90 +1236,15 @@ void OPPROTO op_lar(void) |
1199 | 1236 | |
1200 | 1237 | /* flags handling */ |
1201 | 1238 | |
1202 | -/* slow jumps cases (compute x86 flags) */ | |
1203 | -void OPPROTO op_jo_cc(void) | |
1204 | -{ | |
1205 | - int eflags; | |
1206 | - eflags = cc_table[CC_OP].compute_all(); | |
1207 | - if (eflags & CC_O) | |
1208 | - EIP = PARAM1; | |
1209 | - else | |
1210 | - EIP = PARAM2; | |
1211 | - FORCE_RET(); | |
1212 | -} | |
1213 | - | |
1214 | -void OPPROTO op_jb_cc(void) | |
1215 | -{ | |
1216 | - if (cc_table[CC_OP].compute_c()) | |
1217 | - EIP = PARAM1; | |
1218 | - else | |
1219 | - EIP = PARAM2; | |
1220 | - FORCE_RET(); | |
1221 | -} | |
1222 | - | |
1223 | -void OPPROTO op_jz_cc(void) | |
1224 | -{ | |
1225 | - int eflags; | |
1226 | - eflags = cc_table[CC_OP].compute_all(); | |
1227 | - if (eflags & CC_Z) | |
1228 | - EIP = PARAM1; | |
1229 | - else | |
1230 | - EIP = PARAM2; | |
1231 | - FORCE_RET(); | |
1232 | -} | |
1233 | - | |
1234 | -void OPPROTO op_jbe_cc(void) | |
1239 | +/* slow jumps cases : in order to avoid calling a function with a | |
1240 | + pointer (which can generate a stack frame on PowerPC), we use | |
1241 | + op_setcc to set T0 and then call op_jcc. */ | |
1242 | +void OPPROTO op_jcc(void) | |
1235 | 1243 | { |
1236 | - int eflags; | |
1237 | - eflags = cc_table[CC_OP].compute_all(); | |
1238 | - if (eflags & (CC_Z | CC_C)) | |
1239 | - EIP = PARAM1; | |
1240 | - else | |
1241 | - EIP = PARAM2; | |
1242 | - FORCE_RET(); | |
1243 | -} | |
1244 | - | |
1245 | -void OPPROTO op_js_cc(void) | |
1246 | -{ | |
1247 | - int eflags; | |
1248 | - eflags = cc_table[CC_OP].compute_all(); | |
1249 | - if (eflags & CC_S) | |
1250 | - EIP = PARAM1; | |
1251 | - else | |
1252 | - EIP = PARAM2; | |
1253 | - FORCE_RET(); | |
1254 | -} | |
1255 | - | |
1256 | -void OPPROTO op_jp_cc(void) | |
1257 | -{ | |
1258 | - int eflags; | |
1259 | - eflags = cc_table[CC_OP].compute_all(); | |
1260 | - if (eflags & CC_P) | |
1261 | - EIP = PARAM1; | |
1262 | - else | |
1263 | - EIP = PARAM2; | |
1264 | - FORCE_RET(); | |
1265 | -} | |
1266 | - | |
1267 | -void OPPROTO op_jl_cc(void) | |
1268 | -{ | |
1269 | - int eflags; | |
1270 | - eflags = cc_table[CC_OP].compute_all(); | |
1271 | - if ((eflags ^ (eflags >> 4)) & 0x80) | |
1272 | - EIP = PARAM1; | |
1273 | - else | |
1274 | - EIP = PARAM2; | |
1275 | - FORCE_RET(); | |
1276 | -} | |
1277 | - | |
1278 | -void OPPROTO op_jle_cc(void) | |
1279 | -{ | |
1280 | - int eflags; | |
1281 | - eflags = cc_table[CC_OP].compute_all(); | |
1282 | - if (((eflags ^ (eflags >> 4)) & 0x80) || (eflags & CC_Z)) | |
1283 | - EIP = PARAM1; | |
1244 | + if (T0) | |
1245 | + JUMP_TB(PARAM1, 0, PARAM2); | |
1284 | 1246 | else |
1285 | - EIP = PARAM2; | |
1247 | + JUMP_TB(PARAM1, 1, PARAM3); | |
1286 | 1248 | FORCE_RET(); |
1287 | 1249 | } |
1288 | 1250 | ... | ... |
opc-i386.h
... | ... | @@ -231,18 +231,20 @@ DEF(jmp_T0, 0) |
231 | 231 | DEF(jmp_im, 1) |
232 | 232 | DEF(int_im, 2) |
233 | 233 | DEF(raise_exception, 1) |
234 | -DEF(into, 0) | |
234 | +DEF(into, 1) | |
235 | 235 | DEF(cli, 0) |
236 | 236 | DEF(sti, 0) |
237 | 237 | DEF(boundw, 0) |
238 | 238 | DEF(boundl, 0) |
239 | 239 | DEF(cmpxchg8b, 0) |
240 | -DEF(jb_subb, 2) | |
241 | -DEF(jz_subb, 2) | |
242 | -DEF(jbe_subb, 2) | |
243 | -DEF(js_subb, 2) | |
244 | -DEF(jl_subb, 2) | |
245 | -DEF(jle_subb, 2) | |
240 | +DEF(jmp_tb_next, 2) | |
241 | +DEF(movl_T0_0, 0) | |
242 | +DEF(jb_subb, 3) | |
243 | +DEF(jz_subb, 3) | |
244 | +DEF(jbe_subb, 3) | |
245 | +DEF(js_subb, 3) | |
246 | +DEF(jl_subb, 3) | |
247 | +DEF(jle_subb, 3) | |
246 | 248 | DEF(setb_T0_subb, 0) |
247 | 249 | DEF(setz_T0_subb, 0) |
248 | 250 | DEF(setbe_T0_subb, 0) |
... | ... | @@ -314,12 +316,12 @@ DEF(insb_a16, 0) |
314 | 316 | DEF(rep_insb_a16, 0) |
315 | 317 | DEF(outb_T0_T1, 0) |
316 | 318 | DEF(inb_T0_T1, 0) |
317 | -DEF(jb_subw, 2) | |
318 | -DEF(jz_subw, 2) | |
319 | -DEF(jbe_subw, 2) | |
320 | -DEF(js_subw, 2) | |
321 | -DEF(jl_subw, 2) | |
322 | -DEF(jle_subw, 2) | |
319 | +DEF(jb_subw, 3) | |
320 | +DEF(jz_subw, 3) | |
321 | +DEF(jbe_subw, 3) | |
322 | +DEF(js_subw, 3) | |
323 | +DEF(jl_subw, 3) | |
324 | +DEF(jle_subw, 3) | |
323 | 325 | DEF(loopnzw, 2) |
324 | 326 | DEF(loopzw, 2) |
325 | 327 | DEF(loopw, 2) |
... | ... | @@ -405,12 +407,12 @@ DEF(insw_a16, 0) |
405 | 407 | DEF(rep_insw_a16, 0) |
406 | 408 | DEF(outw_T0_T1, 0) |
407 | 409 | DEF(inw_T0_T1, 0) |
408 | -DEF(jb_subl, 2) | |
409 | -DEF(jz_subl, 2) | |
410 | -DEF(jbe_subl, 2) | |
411 | -DEF(js_subl, 2) | |
412 | -DEF(jl_subl, 2) | |
413 | -DEF(jle_subl, 2) | |
410 | +DEF(jb_subl, 3) | |
411 | +DEF(jz_subl, 3) | |
412 | +DEF(jbe_subl, 3) | |
413 | +DEF(js_subl, 3) | |
414 | +DEF(jl_subl, 3) | |
415 | +DEF(jle_subl, 3) | |
414 | 416 | DEF(loopnzl, 2) |
415 | 417 | DEF(loopzl, 2) |
416 | 418 | DEF(loopl, 2) |
... | ... | @@ -536,14 +538,7 @@ DEF(movl_A0_seg, 1) |
536 | 538 | DEF(addl_A0_seg, 1) |
537 | 539 | DEF(lsl, 0) |
538 | 540 | DEF(lar, 0) |
539 | -DEF(jo_cc, 2) | |
540 | -DEF(jb_cc, 2) | |
541 | -DEF(jz_cc, 2) | |
542 | -DEF(jbe_cc, 2) | |
543 | -DEF(js_cc, 2) | |
544 | -DEF(jp_cc, 2) | |
545 | -DEF(jl_cc, 2) | |
546 | -DEF(jle_cc, 2) | |
541 | +DEF(jcc, 3) | |
547 | 542 | DEF(seto_T0_cc, 0) |
548 | 543 | DEF(setb_T0_cc, 0) |
549 | 544 | DEF(setz_T0_cc, 0) | ... | ... |
ops_template.h
... | ... | @@ -238,18 +238,18 @@ void OPPROTO glue(op_jb_sub, SUFFIX)(void) |
238 | 238 | src2 = CC_SRC - CC_DST; |
239 | 239 | |
240 | 240 | if ((DATA_TYPE)src1 < (DATA_TYPE)src2) |
241 | - EIP = PARAM1; | |
241 | + JUMP_TB(PARAM1, 0, PARAM2); | |
242 | 242 | else |
243 | - EIP = PARAM2; | |
243 | + JUMP_TB(PARAM1, 1, PARAM3); | |
244 | 244 | FORCE_RET(); |
245 | 245 | } |
246 | 246 | |
247 | 247 | void OPPROTO glue(op_jz_sub, SUFFIX)(void) |
248 | 248 | { |
249 | 249 | if ((DATA_TYPE)CC_DST == 0) |
250 | - EIP = PARAM1; | |
250 | + JUMP_TB(PARAM1, 0, PARAM2); | |
251 | 251 | else |
252 | - EIP = PARAM2; | |
252 | + JUMP_TB(PARAM1, 1, PARAM3); | |
253 | 253 | FORCE_RET(); |
254 | 254 | } |
255 | 255 | |
... | ... | @@ -260,18 +260,18 @@ void OPPROTO glue(op_jbe_sub, SUFFIX)(void) |
260 | 260 | src2 = CC_SRC - CC_DST; |
261 | 261 | |
262 | 262 | if ((DATA_TYPE)src1 <= (DATA_TYPE)src2) |
263 | - EIP = PARAM1; | |
263 | + JUMP_TB(PARAM1, 0, PARAM2); | |
264 | 264 | else |
265 | - EIP = PARAM2; | |
265 | + JUMP_TB(PARAM1, 1, PARAM3); | |
266 | 266 | FORCE_RET(); |
267 | 267 | } |
268 | 268 | |
269 | 269 | void OPPROTO glue(op_js_sub, SUFFIX)(void) |
270 | 270 | { |
271 | 271 | if (CC_DST & SIGN_MASK) |
272 | - EIP = PARAM1; | |
272 | + JUMP_TB(PARAM1, 0, PARAM2); | |
273 | 273 | else |
274 | - EIP = PARAM2; | |
274 | + JUMP_TB(PARAM1, 1, PARAM3); | |
275 | 275 | FORCE_RET(); |
276 | 276 | } |
277 | 277 | |
... | ... | @@ -282,9 +282,9 @@ void OPPROTO glue(op_jl_sub, SUFFIX)(void) |
282 | 282 | src2 = CC_SRC - CC_DST; |
283 | 283 | |
284 | 284 | if ((DATA_STYPE)src1 < (DATA_STYPE)src2) |
285 | - EIP = PARAM1; | |
285 | + JUMP_TB(PARAM1, 0, PARAM2); | |
286 | 286 | else |
287 | - EIP = PARAM2; | |
287 | + JUMP_TB(PARAM1, 1, PARAM3); | |
288 | 288 | FORCE_RET(); |
289 | 289 | } |
290 | 290 | |
... | ... | @@ -295,9 +295,9 @@ void OPPROTO glue(op_jle_sub, SUFFIX)(void) |
295 | 295 | src2 = CC_SRC - CC_DST; |
296 | 296 | |
297 | 297 | if ((DATA_STYPE)src1 <= (DATA_STYPE)src2) |
298 | - EIP = PARAM1; | |
298 | + JUMP_TB(PARAM1, 0, PARAM2); | |
299 | 299 | else |
300 | - EIP = PARAM2; | |
300 | + JUMP_TB(PARAM1, 1, PARAM3); | |
301 | 301 | FORCE_RET(); |
302 | 302 | } |
303 | 303 | ... | ... |
translate-i386.c
... | ... | @@ -31,11 +31,15 @@ |
31 | 31 | |
32 | 32 | #define IN_OP_I386 |
33 | 33 | #include "cpu-i386.h" |
34 | +#include "exec.h" | |
34 | 35 | |
35 | 36 | /* XXX: move that elsewhere */ |
36 | 37 | static uint16_t *gen_opc_ptr; |
37 | 38 | static uint32_t *gen_opparam_ptr; |
38 | 39 | int __op_param1, __op_param2, __op_param3; |
40 | +#ifdef USE_DIRECT_JUMP | |
41 | +int __op_jmp0, __op_jmp1; | |
42 | +#endif | |
39 | 43 | |
40 | 44 | #ifdef __i386__ |
41 | 45 | static inline void flush_icache_range(unsigned long start, unsigned long stop) |
... | ... | @@ -67,14 +71,14 @@ static void inline flush_icache_range(unsigned long start, unsigned long stop) |
67 | 71 | stop = (stop + MIN_CACHE_LINE_SIZE - 1) & ~(MIN_CACHE_LINE_SIZE - 1); |
68 | 72 | |
69 | 73 | for (p = start; p < stop; p += MIN_CACHE_LINE_SIZE) { |
70 | - asm ("dcbst 0,%0;" : : "r"(p) : "memory"); | |
74 | + asm volatile ("dcbst 0,%0" : : "r"(p) : "memory"); | |
71 | 75 | } |
72 | - asm ("sync"); | |
76 | + asm volatile ("sync" : : : "memory"); | |
73 | 77 | for (p = start; p < stop; p += MIN_CACHE_LINE_SIZE) { |
74 | - asm ("icbi 0,%0; sync;" : : "r"(p) : "memory"); | |
78 | + asm volatile ("icbi 0,%0" : : "r"(p) : "memory"); | |
75 | 79 | } |
76 | - asm ("sync"); | |
77 | - asm ("isync"); | |
80 | + asm volatile ("sync" : : : "memory"); | |
81 | + asm volatile ("isync" : : : "memory"); | |
78 | 82 | } |
79 | 83 | #endif |
80 | 84 | |
... | ... | @@ -129,6 +133,7 @@ typedef struct DisasContext { |
129 | 133 | int cpl; |
130 | 134 | int iopl; |
131 | 135 | int tf; /* TF cpu flag */ |
136 | + TranslationBlock *tb; | |
132 | 137 | } DisasContext; |
133 | 138 | |
134 | 139 | /* i386 arith/logic operations */ |
... | ... | @@ -192,6 +197,7 @@ enum { |
192 | 197 | typedef void (GenOpFunc)(void); |
193 | 198 | typedef void (GenOpFunc1)(long); |
194 | 199 | typedef void (GenOpFunc2)(long, long); |
200 | +typedef void (GenOpFunc3)(long, long, long); | |
195 | 201 | |
196 | 202 | static GenOpFunc *gen_op_mov_reg_T0[3][8] = { |
197 | 203 | [OT_BYTE] = { |
... | ... | @@ -699,18 +705,7 @@ enum { |
699 | 705 | JCC_LE, |
700 | 706 | }; |
701 | 707 | |
702 | -static GenOpFunc2 *gen_jcc_slow[8] = { | |
703 | - gen_op_jo_cc, | |
704 | - gen_op_jb_cc, | |
705 | - gen_op_jz_cc, | |
706 | - gen_op_jbe_cc, | |
707 | - gen_op_js_cc, | |
708 | - gen_op_jp_cc, | |
709 | - gen_op_jl_cc, | |
710 | - gen_op_jle_cc, | |
711 | -}; | |
712 | - | |
713 | -static GenOpFunc2 *gen_jcc_sub[3][8] = { | |
708 | +static GenOpFunc3 *gen_jcc_sub[3][8] = { | |
714 | 709 | [OT_BYTE] = { |
715 | 710 | NULL, |
716 | 711 | gen_op_jb_subb, |
... | ... | @@ -1090,8 +1085,9 @@ static inline uint32_t insn_get(DisasContext *s, int ot) |
1090 | 1085 | |
1091 | 1086 | static inline void gen_jcc(DisasContext *s, int b, int val, int next_eip) |
1092 | 1087 | { |
1088 | + TranslationBlock *tb; | |
1093 | 1089 | int inv, jcc_op; |
1094 | - GenOpFunc2 *func; | |
1090 | + GenOpFunc3 *func; | |
1095 | 1091 | |
1096 | 1092 | inv = b & 1; |
1097 | 1093 | jcc_op = (b >> 1) & 7; |
... | ... | @@ -1101,8 +1097,6 @@ static inline void gen_jcc(DisasContext *s, int b, int val, int next_eip) |
1101 | 1097 | case CC_OP_SUBW: |
1102 | 1098 | case CC_OP_SUBL: |
1103 | 1099 | func = gen_jcc_sub[s->cc_op - CC_OP_SUBB][jcc_op]; |
1104 | - if (!func) | |
1105 | - goto slow_jcc; | |
1106 | 1100 | break; |
1107 | 1101 | |
1108 | 1102 | /* some jumps are easy to compute */ |
... | ... | @@ -1138,21 +1132,30 @@ static inline void gen_jcc(DisasContext *s, int b, int val, int next_eip) |
1138 | 1132 | func = gen_jcc_sub[(s->cc_op - CC_OP_ADDB) % 3][jcc_op]; |
1139 | 1133 | break; |
1140 | 1134 | default: |
1141 | - goto slow_jcc; | |
1135 | + func = NULL; | |
1136 | + break; | |
1142 | 1137 | } |
1143 | 1138 | break; |
1144 | 1139 | default: |
1145 | - slow_jcc: | |
1146 | - if (s->cc_op != CC_OP_DYNAMIC) | |
1147 | - gen_op_set_cc_op(s->cc_op); | |
1148 | - func = gen_jcc_slow[jcc_op]; | |
1140 | + func = NULL; | |
1149 | 1141 | break; |
1150 | 1142 | } |
1143 | + | |
1144 | + if (s->cc_op != CC_OP_DYNAMIC) | |
1145 | + gen_op_set_cc_op(s->cc_op); | |
1146 | + | |
1147 | + if (!func) { | |
1148 | + gen_setcc_slow[jcc_op](); | |
1149 | + func = gen_op_jcc; | |
1150 | + } | |
1151 | + | |
1152 | + tb = s->tb; | |
1151 | 1153 | if (!inv) { |
1152 | - func(val, next_eip); | |
1154 | + func((long)tb, val, next_eip); | |
1153 | 1155 | } else { |
1154 | - func(next_eip, val); | |
1156 | + func((long)tb, next_eip, val); | |
1155 | 1157 | } |
1158 | + s->is_jmp = 3; | |
1156 | 1159 | } |
1157 | 1160 | |
1158 | 1161 | static void gen_setcc(DisasContext *s, int b) |
... | ... | @@ -1372,6 +1375,18 @@ static void gen_exception(DisasContext *s, int trapno, unsigned int cur_eip) |
1372 | 1375 | s->is_jmp = 1; |
1373 | 1376 | } |
1374 | 1377 | |
1378 | +/* generate a jump to eip. No segment change must happen before as a | |
1379 | + direct call to the next block may occur */ | |
1380 | +static void gen_jmp(DisasContext *s, unsigned int eip) | |
1381 | +{ | |
1382 | + TranslationBlock *tb = s->tb; | |
1383 | + | |
1384 | + if (s->cc_op != CC_OP_DYNAMIC) | |
1385 | + gen_op_set_cc_op(s->cc_op); | |
1386 | + gen_op_jmp_tb_next((long)tb, eip); | |
1387 | + s->is_jmp = 3; | |
1388 | +} | |
1389 | + | |
1375 | 1390 | /* return the next pc address. Return -1 if no insn found. *is_jmp_ptr |
1376 | 1391 | is set to true if the instruction sets the PC (last instruction of |
1377 | 1392 | a basic block) */ |
... | ... | @@ -2964,8 +2979,7 @@ long disas_insn(DisasContext *s, uint8_t *pc_start) |
2964 | 2979 | val &= 0xffff; |
2965 | 2980 | gen_op_movl_T0_im(next_eip); |
2966 | 2981 | gen_push_T0(s); |
2967 | - gen_op_jmp_im(val); | |
2968 | - s->is_jmp = 1; | |
2982 | + gen_jmp(s, val); | |
2969 | 2983 | } |
2970 | 2984 | break; |
2971 | 2985 | case 0x9a: /* lcall im */ |
... | ... | @@ -2996,8 +3010,7 @@ long disas_insn(DisasContext *s, uint8_t *pc_start) |
2996 | 3010 | val += s->pc - s->cs_base; |
2997 | 3011 | if (s->dflag == 0) |
2998 | 3012 | val = val & 0xffff; |
2999 | - gen_op_jmp_im(val); | |
3000 | - s->is_jmp = 1; | |
3013 | + gen_jmp(s, val); | |
3001 | 3014 | break; |
3002 | 3015 | case 0xea: /* ljmp im */ |
3003 | 3016 | { |
... | ... | @@ -3019,8 +3032,7 @@ long disas_insn(DisasContext *s, uint8_t *pc_start) |
3019 | 3032 | val += s->pc - s->cs_base; |
3020 | 3033 | if (s->dflag == 0) |
3021 | 3034 | val = val & 0xffff; |
3022 | - gen_op_jmp_im(val); | |
3023 | - s->is_jmp = 1; | |
3035 | + gen_jmp(s, val); | |
3024 | 3036 | break; |
3025 | 3037 | case 0x70 ... 0x7f: /* jcc Jb */ |
3026 | 3038 | val = (int8_t)insn_get(s, OT_BYTE); |
... | ... | @@ -3037,7 +3049,6 @@ long disas_insn(DisasContext *s, uint8_t *pc_start) |
3037 | 3049 | if (s->dflag == 0) |
3038 | 3050 | val &= 0xffff; |
3039 | 3051 | gen_jcc(s, b, val, next_eip); |
3040 | - s->is_jmp = 1; | |
3041 | 3052 | break; |
3042 | 3053 | |
3043 | 3054 | case 0x190 ... 0x19f: /* setcc Gv */ |
... | ... | @@ -3393,15 +3404,6 @@ static uint16_t opc_read_flags[NB_OPS] = { |
3393 | 3404 | |
3394 | 3405 | [INDEX_op_into] = CC_O, |
3395 | 3406 | |
3396 | - [INDEX_op_jo_cc] = CC_O, | |
3397 | - [INDEX_op_jb_cc] = CC_C, | |
3398 | - [INDEX_op_jz_cc] = CC_Z, | |
3399 | - [INDEX_op_jbe_cc] = CC_Z | CC_C, | |
3400 | - [INDEX_op_js_cc] = CC_S, | |
3401 | - [INDEX_op_jp_cc] = CC_P, | |
3402 | - [INDEX_op_jl_cc] = CC_O | CC_S, | |
3403 | - [INDEX_op_jle_cc] = CC_O | CC_S | CC_Z, | |
3404 | - | |
3405 | 3407 | [INDEX_op_jb_subb] = CC_C, |
3406 | 3408 | [INDEX_op_jb_subw] = CC_C, |
3407 | 3409 | [INDEX_op_jb_subl] = CC_C, |
... | ... | @@ -3730,7 +3732,7 @@ static uint32_t gen_opparam_buf[OPPARAM_BUF_SIZE]; |
3730 | 3732 | int cpu_x86_gen_code(uint8_t *gen_code_buf, int max_code_size, |
3731 | 3733 | int *gen_code_size_ptr, |
3732 | 3734 | uint8_t *pc_start, uint8_t *cs_base, int flags, |
3733 | - int *code_size_ptr) | |
3735 | + int *code_size_ptr, TranslationBlock *tb) | |
3734 | 3736 | { |
3735 | 3737 | DisasContext dc1, *dc = &dc1; |
3736 | 3738 | uint8_t *pc_ptr; |
... | ... | @@ -3750,6 +3752,7 @@ int cpu_x86_gen_code(uint8_t *gen_code_buf, int max_code_size, |
3750 | 3752 | dc->tf = (flags >> GEN_FLAG_TF_SHIFT) & 1; |
3751 | 3753 | dc->cc_op = CC_OP_DYNAMIC; |
3752 | 3754 | dc->cs_base = cs_base; |
3755 | + dc->tb = tb; | |
3753 | 3756 | |
3754 | 3757 | gen_opc_ptr = gen_opc_buf; |
3755 | 3758 | gen_opc_end = gen_opc_buf + OPC_MAX_SIZE; |
... | ... | @@ -3776,15 +3779,21 @@ int cpu_x86_gen_code(uint8_t *gen_code_buf, int max_code_size, |
3776 | 3779 | } while (!dc->is_jmp && gen_opc_ptr < gen_opc_end && |
3777 | 3780 | (pc_ptr - pc_start) < (TARGET_PAGE_SIZE - 32)); |
3778 | 3781 | /* we must store the eflags state if it is not already done */ |
3779 | - if (dc->cc_op != CC_OP_DYNAMIC) | |
3780 | - gen_op_set_cc_op(dc->cc_op); | |
3781 | - if (dc->is_jmp != 1) { | |
3782 | - /* we add an additionnal jmp to update the simulated PC */ | |
3783 | - gen_op_jmp_im(ret - (unsigned long)dc->cs_base); | |
3782 | + if (dc->is_jmp != 3) { | |
3783 | + if (dc->cc_op != CC_OP_DYNAMIC) | |
3784 | + gen_op_set_cc_op(dc->cc_op); | |
3785 | + if (dc->is_jmp != 1) { | |
3786 | + /* we add an additionnal jmp to update the simulated PC */ | |
3787 | + gen_op_jmp_im(ret - (unsigned long)dc->cs_base); | |
3788 | + } | |
3784 | 3789 | } |
3785 | 3790 | if (dc->tf) { |
3786 | 3791 | gen_op_raise_exception(EXCP01_SSTP); |
3787 | 3792 | } |
3793 | + if (dc->is_jmp != 3) { | |
3794 | + /* indicate that the hash table must be used to find the next TB */ | |
3795 | + gen_op_movl_T0_0(); | |
3796 | + } | |
3788 | 3797 | |
3789 | 3798 | *gen_opc_ptr = INDEX_op_end; |
3790 | 3799 | |
... | ... | @@ -3814,8 +3823,17 @@ int cpu_x86_gen_code(uint8_t *gen_code_buf, int max_code_size, |
3814 | 3823 | #endif |
3815 | 3824 | |
3816 | 3825 | /* generate machine code */ |
3817 | - gen_code_size = dyngen_code(gen_code_buf, gen_opc_buf, gen_opparam_buf); | |
3826 | + tb->tb_next_offset[0] = 0xffff; | |
3827 | + tb->tb_next_offset[1] = 0xffff; | |
3828 | + gen_code_size = dyngen_code(gen_code_buf, tb->tb_next_offset, | |
3829 | +#ifdef USE_DIRECT_JUMP | |
3830 | + tb->tb_jmp_offset, | |
3831 | +#else | |
3832 | + NULL, | |
3833 | +#endif | |
3834 | + gen_opc_buf, gen_opparam_buf); | |
3818 | 3835 | flush_icache_range((unsigned long)gen_code_buf, (unsigned long)(gen_code_buf + gen_code_size)); |
3836 | + | |
3819 | 3837 | *gen_code_size_ptr = gen_code_size; |
3820 | 3838 | *code_size_ptr = pc_ptr - pc_start; |
3821 | 3839 | #ifdef DEBUG_DISAS | ... | ... |