Commit d4e8164f7e9342d692c1d6f1c848ed05f8007ece

Authored by bellard
1 parent 08351fb3

direct chaining for PowerPC and i386


git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@183 c046a42c-6fe2-441c-8c8c-71466251a162
dyngen.c
@@ -170,7 +170,16 @@ void elf_swap_phdr(struct elf_phdr *h) @@ -170,7 +170,16 @@ void elf_swap_phdr(struct elf_phdr *h)
170 swabls(&h->p_align); /* Segment alignment */ 170 swabls(&h->p_align); /* Segment alignment */
171 } 171 }
172 172
  173 +/* ELF file info */
173 int do_swap; 174 int do_swap;
  175 +struct elf_shdr *shdr;
  176 +struct elfhdr ehdr;
  177 +ElfW(Sym) *symtab;
  178 +int nb_syms;
  179 +char *strtab;
  180 +/* data section */
  181 +uint8_t *data_data;
  182 +int data_shndx;
174 183
175 uint16_t get16(uint16_t *p) 184 uint16_t get16(uint16_t *p)
176 { 185 {
@@ -270,7 +279,7 @@ int strstart(const char *str, const char *val, const char **ptr) @@ -270,7 +279,7 @@ int strstart(const char *str, const char *val, const char **ptr)
270 /* generate op code */ 279 /* generate op code */
271 void gen_code(const char *name, host_ulong offset, host_ulong size, 280 void gen_code(const char *name, host_ulong offset, host_ulong size,
272 FILE *outfile, uint8_t *text, ELF_RELOC *relocs, int nb_relocs, int reloc_sh_type, 281 FILE *outfile, uint8_t *text, ELF_RELOC *relocs, int nb_relocs, int reloc_sh_type,
273 - ElfW(Sym) *symtab, char *strtab, int gen_switch) 282 + int gen_switch)
274 { 283 {
275 int copy_size = 0; 284 int copy_size = 0;
276 uint8_t *p_start, *p_end; 285 uint8_t *p_start, *p_end;
@@ -291,13 +300,16 @@ void gen_code(const char *name, host_ulong offset, host_ulong size, @@ -291,13 +300,16 @@ void gen_code(const char *name, host_ulong offset, host_ulong size,
291 switch(ELF_ARCH) { 300 switch(ELF_ARCH) {
292 case EM_386: 301 case EM_386:
293 { 302 {
294 - uint8_t *p;  
295 - p = p_end - 1;  
296 - if (p == p_start) 303 + int len;
  304 + len = p_end - p_start;
  305 + if (len == 0)
297 error("empty code for %s", name); 306 error("empty code for %s", name);
298 - if (p[0] != 0xc3)  
299 - error("ret expected at the end of %s", name);  
300 - copy_size = p - p_start; 307 + if (p_end[-1] == 0xc3) {
  308 + len--;
  309 + } else {
  310 + error("ret or jmp expected at the end of %s", name);
  311 + }
  312 + copy_size = len;
301 } 313 }
302 break; 314 break;
303 case EM_PPC: 315 case EM_PPC:
@@ -423,7 +435,7 @@ void gen_code(const char *name, host_ulong offset, host_ulong size, @@ -423,7 +435,7 @@ void gen_code(const char *name, host_ulong offset, host_ulong size,
423 sym_name = strtab + symtab[ELFW(R_SYM)(rel->r_info)].st_name; 435 sym_name = strtab + symtab[ELFW(R_SYM)(rel->r_info)].st_name;
424 if (strstart(sym_name, "__op_param", &p)) { 436 if (strstart(sym_name, "__op_param", &p)) {
425 n = strtoul(p, NULL, 10); 437 n = strtoul(p, NULL, 10);
426 - if (n >= MAX_ARGS) 438 + if (n > MAX_ARGS)
427 error("too many arguments in %s", name); 439 error("too many arguments in %s", name);
428 args_present[n - 1] = 1; 440 args_present[n - 1] = 1;
429 } 441 }
@@ -459,7 +471,9 @@ void gen_code(const char *name, host_ulong offset, host_ulong size, @@ -459,7 +471,9 @@ void gen_code(const char *name, host_ulong offset, host_ulong size,
459 if (rel->r_offset >= start_offset && 471 if (rel->r_offset >= start_offset &&
460 rel->r_offset < start_offset + copy_size) { 472 rel->r_offset < start_offset + copy_size) {
461 sym_name = strtab + symtab[ELFW(R_SYM)(rel->r_info)].st_name; 473 sym_name = strtab + symtab[ELFW(R_SYM)(rel->r_info)].st_name;
462 - if (*sym_name && !strstart(sym_name, "__op_param", &p)) { 474 + if (*sym_name &&
  475 + !strstart(sym_name, "__op_param", NULL) &&
  476 + !strstart(sym_name, "__op_jmp", NULL)) {
463 #if defined(HOST_SPARC) 477 #if defined(HOST_SPARC)
464 if (sym_name[0] == '.') { 478 if (sym_name[0] == '.') {
465 fprintf(outfile, 479 fprintf(outfile,
@@ -474,6 +488,31 @@ void gen_code(const char *name, host_ulong offset, host_ulong size, @@ -474,6 +488,31 @@ void gen_code(const char *name, host_ulong offset, host_ulong size,
474 } 488 }
475 489
476 fprintf(outfile, " memcpy(gen_code_ptr, (void *)((char *)&%s+%d), %d);\n", name, start_offset - offset, copy_size); 490 fprintf(outfile, " memcpy(gen_code_ptr, (void *)((char *)&%s+%d), %d);\n", name, start_offset - offset, copy_size);
  491 +
  492 + /* emit code offset information */
  493 + {
  494 + ElfW(Sym) *sym;
  495 + const char *sym_name, *p;
  496 + target_ulong val;
  497 + int n;
  498 +
  499 + for(i = 0, sym = symtab; i < nb_syms; i++, sym++) {
  500 + sym_name = strtab + sym->st_name;
  501 + if (strstart(sym_name, "__op_label", &p)) {
  502 + /* test if the variable refers to a label inside
  503 + the code we are generating */
  504 + if (sym->st_shndx != data_shndx)
  505 + error("__op_labelN symbols must be in .data or .sdata section");
  506 + val = *(target_ulong *)(data_data + sym->st_value);
  507 + if (val >= start_offset && val < start_offset + copy_size) {
  508 + n = strtol(p, NULL, 10);
  509 + fprintf(outfile, " label_offsets[%d] = %d + (gen_code_ptr - gen_code_buf);\n", n, val - start_offset);
  510 + }
  511 + }
  512 + }
  513 + }
  514 +
  515 + /* load parameres in variables */
477 for(i = 0; i < nb_args; i++) { 516 for(i = 0; i < nb_args; i++) {
478 fprintf(outfile, " param%d = *opparam_ptr++;\n", i + 1); 517 fprintf(outfile, " param%d = *opparam_ptr++;\n", i + 1);
479 } 518 }
@@ -519,6 +558,18 @@ void gen_code(const char *name, host_ulong offset, host_ulong size, @@ -519,6 +558,18 @@ void gen_code(const char *name, host_ulong offset, host_ulong size,
519 if (rel->r_offset >= start_offset && 558 if (rel->r_offset >= start_offset &&
520 rel->r_offset < start_offset + copy_size) { 559 rel->r_offset < start_offset + copy_size) {
521 sym_name = strtab + symtab[ELFW(R_SYM)(rel->r_info)].st_name; 560 sym_name = strtab + symtab[ELFW(R_SYM)(rel->r_info)].st_name;
  561 + if (strstart(sym_name, "__op_jmp", &p)) {
  562 + int n;
  563 + n = strtol(p, NULL, 10);
  564 + /* __op_jmp relocations are done at
  565 + runtime to do translated block
  566 + chaining: the offset of the instruction
  567 + needs to be stored */
  568 + fprintf(outfile, " jmp_offsets[%d] = %d + (gen_code_ptr - gen_code_buf);\n",
  569 + n, rel->r_offset - start_offset);
  570 + continue;
  571 + }
  572 +
522 if (strstart(sym_name, "__op_param", &p)) { 573 if (strstart(sym_name, "__op_param", &p)) {
523 snprintf(name, sizeof(name), "param%s", p); 574 snprintf(name, sizeof(name), "param%s", p);
524 } else { 575 } else {
@@ -824,11 +875,10 @@ void gen_code(const char *name, host_ulong offset, host_ulong size, @@ -824,11 +875,10 @@ void gen_code(const char *name, host_ulong offset, host_ulong size,
824 int load_elf(const char *filename, FILE *outfile, int do_print_enum) 875 int load_elf(const char *filename, FILE *outfile, int do_print_enum)
825 { 876 {
826 int fd; 877 int fd;
827 - struct elfhdr ehdr;  
828 - struct elf_shdr *sec, *shdr, *symtab_sec, *strtab_sec, *text_sec;  
829 - int i, j, nb_syms;  
830 - ElfW(Sym) *symtab, *sym;  
831 - char *shstr, *strtab; 878 + struct elf_shdr *sec, *symtab_sec, *strtab_sec, *text_sec;
  879 + int i, j;
  880 + ElfW(Sym) *sym;
  881 + char *shstr, *data_name;
832 uint8_t *text; 882 uint8_t *text;
833 void *relocs; 883 void *relocs;
834 int nb_relocs, reloc_sh_type; 884 int nb_relocs, reloc_sh_type;
@@ -880,6 +930,17 @@ int load_elf(const char *filename, FILE *outfile, int do_print_enum) @@ -880,6 +930,17 @@ int load_elf(const char *filename, FILE *outfile, int do_print_enum)
880 error("could not find .text section"); 930 error("could not find .text section");
881 text = load_data(fd, text_sec->sh_offset, text_sec->sh_size); 931 text = load_data(fd, text_sec->sh_offset, text_sec->sh_size);
882 932
  933 +#if defined(HOST_PPC)
  934 + data_name = ".sdata";
  935 +#else
  936 + data_name = ".data";
  937 +#endif
  938 + sec = find_elf_section(shdr, ehdr.e_shnum, shstr, data_name);
  939 + if (!sec)
  940 + error("could not find %s section", data_name);
  941 + data_shndx = sec - shdr;
  942 + data_data = load_data(fd, sec->sh_offset, sec->sh_size);
  943 +
883 /* find text relocations, if any */ 944 /* find text relocations, if any */
884 nb_relocs = 0; 945 nb_relocs = 0;
885 relocs = NULL; 946 relocs = NULL;
@@ -936,7 +997,7 @@ int load_elf(const char *filename, FILE *outfile, int do_print_enum) @@ -936,7 +997,7 @@ int load_elf(const char *filename, FILE *outfile, int do_print_enum)
936 name = strtab + sym->st_name; 997 name = strtab + sym->st_name;
937 if (strstart(name, OP_PREFIX, &p)) { 998 if (strstart(name, OP_PREFIX, &p)) {
938 gen_code(name, sym->st_value, sym->st_size, outfile, 999 gen_code(name, sym->st_value, sym->st_size, outfile,
939 - text, relocs, nb_relocs, reloc_sh_type, symtab, strtab, 2); 1000 + text, relocs, nb_relocs, reloc_sh_type, 2);
940 } 1001 }
941 } 1002 }
942 } else { 1003 } else {
@@ -963,6 +1024,7 @@ fprintf(outfile, @@ -963,6 +1024,7 @@ fprintf(outfile,
963 #endif 1024 #endif
964 fprintf(outfile, 1025 fprintf(outfile,
965 "int dyngen_code(uint8_t *gen_code_buf,\n" 1026 "int dyngen_code(uint8_t *gen_code_buf,\n"
  1027 +" uint16_t *label_offsets, uint16_t *jmp_offsets,\n"
966 " const uint16_t *opc_buf, const uint32_t *opparam_buf)\n" 1028 " const uint16_t *opc_buf, const uint32_t *opparam_buf)\n"
967 "{\n" 1029 "{\n"
968 " uint8_t *gen_code_ptr;\n" 1030 " uint8_t *gen_code_ptr;\n"
@@ -1001,7 +1063,7 @@ fprintf(outfile, @@ -1001,7 +1063,7 @@ fprintf(outfile,
1001 if (sym->st_shndx != (text_sec - shdr)) 1063 if (sym->st_shndx != (text_sec - shdr))
1002 error("invalid section for opcode (0x%x)", sym->st_shndx); 1064 error("invalid section for opcode (0x%x)", sym->st_shndx);
1003 gen_code(name, sym->st_value, sym->st_size, outfile, 1065 gen_code(name, sym->st_value, sym->st_size, outfile,
1004 - text, relocs, nb_relocs, reloc_sh_type, symtab, strtab, 1); 1066 + text, relocs, nb_relocs, reloc_sh_type, 1);
1005 } 1067 }
1006 } 1068 }
1007 1069
@@ -1056,7 +1118,7 @@ fprintf(outfile, @@ -1056,7 +1118,7 @@ fprintf(outfile,
1056 if (sym->st_shndx != (text_sec - shdr)) 1118 if (sym->st_shndx != (text_sec - shdr))
1057 error("invalid section for opcode (0x%x)", sym->st_shndx); 1119 error("invalid section for opcode (0x%x)", sym->st_shndx);
1058 gen_code(name, sym->st_value, sym->st_size, outfile, 1120 gen_code(name, sym->st_value, sym->st_size, outfile,
1059 - text, relocs, nb_relocs, reloc_sh_type, symtab, strtab, 0); 1121 + text, relocs, nb_relocs, reloc_sh_type, 0);
1060 } 1122 }
1061 } 1123 }
1062 } 1124 }
exec-i386.c
@@ -120,7 +120,7 @@ int cpu_x86_exec(CPUX86State *env1) @@ -120,7 +120,7 @@ int cpu_x86_exec(CPUX86State *env1)
120 TranslationBlock *tb, **ptb; 120 TranslationBlock *tb, **ptb;
121 uint8_t *tc_ptr, *cs_base, *pc; 121 uint8_t *tc_ptr, *cs_base, *pc;
122 unsigned int flags; 122 unsigned int flags;
123 - 123 +
124 /* first we save global registers */ 124 /* first we save global registers */
125 saved_T0 = T0; 125 saved_T0 = T0;
126 saved_T1 = T1; 126 saved_T1 = T1;
@@ -169,6 +169,7 @@ int cpu_x86_exec(CPUX86State *env1) @@ -169,6 +169,7 @@ int cpu_x86_exec(CPUX86State *env1)
169 169
170 /* prepare setjmp context for exception handling */ 170 /* prepare setjmp context for exception handling */
171 if (setjmp(env->jmp_env) == 0) { 171 if (setjmp(env->jmp_env) == 0) {
  172 + T0 = 0; /* force lookup of first TB */
172 for(;;) { 173 for(;;) {
173 if (env->interrupt_request) { 174 if (env->interrupt_request) {
174 raise_exception(EXCP_INTERRUPT); 175 raise_exception(EXCP_INTERRUPT);
@@ -209,30 +210,40 @@ int cpu_x86_exec(CPUX86State *env1) @@ -209,30 +210,40 @@ int cpu_x86_exec(CPUX86State *env1)
209 flags |= (env->eflags & TF_MASK) << (GEN_FLAG_TF_SHIFT - 8); 210 flags |= (env->eflags & TF_MASK) << (GEN_FLAG_TF_SHIFT - 8);
210 cs_base = env->seg_cache[R_CS].base; 211 cs_base = env->seg_cache[R_CS].base;
211 pc = cs_base + env->eip; 212 pc = cs_base + env->eip;
  213 + spin_lock(&tb_lock);
212 tb = tb_find(&ptb, (unsigned long)pc, (unsigned long)cs_base, 214 tb = tb_find(&ptb, (unsigned long)pc, (unsigned long)cs_base,
213 flags); 215 flags);
214 if (!tb) { 216 if (!tb) {
215 /* if no translated code available, then translate it now */ 217 /* if no translated code available, then translate it now */
216 - /* very inefficient but safe: we lock all the cpus  
217 - when generating code */  
218 - spin_lock(&tb_lock); 218 + tb = tb_alloc((unsigned long)pc);
  219 + if (!tb) {
  220 + /* flush must be done */
  221 + tb_flush();
  222 + /* cannot fail at this point */
  223 + tb = tb_alloc((unsigned long)pc);
  224 + /* don't forget to invalidate previous TB info */
  225 + ptb = &tb_hash[tb_hash_func((unsigned long)pc)];
  226 + T0 = 0;
  227 + }
219 tc_ptr = code_gen_ptr; 228 tc_ptr = code_gen_ptr;
  229 + tb->tc_ptr = tc_ptr;
220 ret = cpu_x86_gen_code(code_gen_ptr, CODE_GEN_MAX_SIZE, 230 ret = cpu_x86_gen_code(code_gen_ptr, CODE_GEN_MAX_SIZE,
221 &code_gen_size, pc, cs_base, flags, 231 &code_gen_size, pc, cs_base, flags,
222 - &code_size); 232 + &code_size, tb);
223 /* if invalid instruction, signal it */ 233 /* if invalid instruction, signal it */
224 if (ret != 0) { 234 if (ret != 0) {
  235 + /* NOTE: the tb is allocated but not linked, so we
  236 + can leave it */
225 spin_unlock(&tb_lock); 237 spin_unlock(&tb_lock);
226 raise_exception(EXCP06_ILLOP); 238 raise_exception(EXCP06_ILLOP);
227 } 239 }
228 - tb = tb_alloc((unsigned long)pc, code_size);  
229 *ptb = tb; 240 *ptb = tb;
  241 + tb->size = code_size;
230 tb->cs_base = (unsigned long)cs_base; 242 tb->cs_base = (unsigned long)cs_base;
231 tb->flags = flags; 243 tb->flags = flags;
232 - tb->tc_ptr = tc_ptr;  
233 tb->hash_next = NULL; 244 tb->hash_next = NULL;
  245 + tb_link(tb);
234 code_gen_ptr = (void *)(((unsigned long)code_gen_ptr + code_gen_size + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1)); 246 code_gen_ptr = (void *)(((unsigned long)code_gen_ptr + code_gen_size + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
235 - spin_unlock(&tb_lock);  
236 } 247 }
237 #ifdef DEBUG_EXEC 248 #ifdef DEBUG_EXEC
238 if (loglevel) { 249 if (loglevel) {
@@ -241,14 +252,21 @@ int cpu_x86_exec(CPUX86State *env1) @@ -241,14 +252,21 @@ int cpu_x86_exec(CPUX86State *env1)
241 lookup_symbol((void *)tb->pc)); 252 lookup_symbol((void *)tb->pc));
242 } 253 }
243 #endif 254 #endif
244 - /* execute the generated code */ 255 +
  256 + /* see if we can patch the calling TB */
  257 + if (T0 != 0 && !(env->eflags & TF_MASK)) {
  258 + tb_add_jump((TranslationBlock *)(T0 & ~3), T0 & 3, tb);
  259 + }
245 tc_ptr = tb->tc_ptr; 260 tc_ptr = tb->tc_ptr;
  261 + spin_unlock(&tb_lock);
  262 +
  263 + /* execute the generated code */
246 gen_func = (void *)tc_ptr; 264 gen_func = (void *)tc_ptr;
247 #ifdef __sparc__ 265 #ifdef __sparc__
248 __asm__ __volatile__("call %0\n\t" 266 __asm__ __volatile__("call %0\n\t"
249 " mov %%o7,%%i0" 267 " mov %%o7,%%i0"
250 : /* no outputs */ 268 : /* no outputs */
251 - : "r" (gen_func) 269 + : "r" (gen_func)
252 : "i0", "i1", "i2", "i3", "i4", "i5"); 270 : "i0", "i1", "i2", "i3", "i4", "i5");
253 #else 271 #else
254 gen_func(); 272 gen_func();
exec-i386.h
@@ -205,8 +205,10 @@ extern int __op_param1, __op_param2, __op_param3; @@ -205,8 +205,10 @@ extern int __op_param1, __op_param2, __op_param3;
205 #define PARAM2 ((long)(&__op_param2)) 205 #define PARAM2 ((long)(&__op_param2))
206 #define PARAM3 ((long)(&__op_param3)) 206 #define PARAM3 ((long)(&__op_param3))
207 #endif 207 #endif
  208 +extern int __op_jmp0, __op_jmp1;
208 209
209 #include "cpu-i386.h" 210 #include "cpu-i386.h"
  211 +#include "exec.h"
210 212
211 typedef struct CCTable { 213 typedef struct CCTable {
212 int (*compute_all)(void); /* return all the flags */ 214 int (*compute_all)(void); /* return all the flags */
@@ -27,6 +27,7 @@ @@ -27,6 +27,7 @@
27 #include <sys/mman.h> 27 #include <sys/mman.h>
28 28
29 #include "cpu-i386.h" 29 #include "cpu-i386.h"
  30 +#include "exec.h"
30 31
31 //#define DEBUG_TB_INVALIDATE 32 //#define DEBUG_TB_INVALIDATE
32 #define DEBUG_FLUSH 33 #define DEBUG_FLUSH
@@ -212,6 +213,7 @@ static void page_flush_tb(void) @@ -212,6 +213,7 @@ static void page_flush_tb(void)
212 } 213 }
213 214
214 /* flush all the translation blocks */ 215 /* flush all the translation blocks */
  216 +/* XXX: tb_flush is currently not thread safe */
215 void tb_flush(void) 217 void tb_flush(void)
216 { 218 {
217 int i; 219 int i;
@@ -226,7 +228,8 @@ void tb_flush(void) @@ -226,7 +228,8 @@ void tb_flush(void)
226 tb_hash[i] = NULL; 228 tb_hash[i] = NULL;
227 page_flush_tb(); 229 page_flush_tb();
228 code_gen_ptr = code_gen_buffer; 230 code_gen_ptr = code_gen_buffer;
229 - /* XXX: flush processor icache at this point */ 231 + /* XXX: flush processor icache at this point if cache flush is
  232 + expensive */
230 } 233 }
231 234
232 #ifdef DEBUG_TB_CHECK 235 #ifdef DEBUG_TB_CHECK
@@ -265,6 +268,26 @@ static void tb_page_check(void) @@ -265,6 +268,26 @@ static void tb_page_check(void)
265 } 268 }
266 } 269 }
267 270
  271 +void tb_jmp_check(TranslationBlock *tb)
  272 +{
  273 + TranslationBlock *tb1;
  274 + unsigned int n1;
  275 +
  276 + /* suppress any remaining jumps to this TB */
  277 + tb1 = tb->jmp_first;
  278 + for(;;) {
  279 + n1 = (long)tb1 & 3;
  280 + tb1 = (TranslationBlock *)((long)tb1 & ~3);
  281 + if (n1 == 2)
  282 + break;
  283 + tb1 = tb1->jmp_next[n1];
  284 + }
  285 + /* check end of list */
  286 + if (tb1 != tb) {
  287 + printf("ERROR: jmp_list from 0x%08lx\n", (long)tb);
  288 + }
  289 +}
  290 +
268 #endif 291 #endif
269 292
270 /* invalidate one TB */ 293 /* invalidate one TB */
@@ -282,12 +305,48 @@ static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb, @@ -282,12 +305,48 @@ static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
282 } 305 }
283 } 306 }
284 307
  308 +static inline void tb_jmp_remove(TranslationBlock *tb, int n)
  309 +{
  310 + TranslationBlock *tb1, **ptb;
  311 + unsigned int n1;
  312 +
  313 + ptb = &tb->jmp_next[n];
  314 + tb1 = *ptb;
  315 + if (tb1) {
  316 + /* find tb(n) in circular list */
  317 + for(;;) {
  318 + tb1 = *ptb;
  319 + n1 = (long)tb1 & 3;
  320 + tb1 = (TranslationBlock *)((long)tb1 & ~3);
  321 + if (n1 == n && tb1 == tb)
  322 + break;
  323 + if (n1 == 2) {
  324 + ptb = &tb1->jmp_first;
  325 + } else {
  326 + ptb = &tb1->jmp_next[n1];
  327 + }
  328 + }
  329 + /* now we can suppress tb(n) from the list */
  330 + *ptb = tb->jmp_next[n];
  331 +
  332 + tb->jmp_next[n] = NULL;
  333 + }
  334 +}
  335 +
  336 +/* reset the jump entry 'n' of a TB so that it is not chained to
  337 + another TB */
  338 +static inline void tb_reset_jump(TranslationBlock *tb, int n)
  339 +{
  340 + tb_set_jmp_target(tb, n, (unsigned long)(tb->tc_ptr + tb->tb_next_offset[n]));
  341 +}
  342 +
285 static inline void tb_invalidate(TranslationBlock *tb, int parity) 343 static inline void tb_invalidate(TranslationBlock *tb, int parity)
286 { 344 {
287 PageDesc *p; 345 PageDesc *p;
288 unsigned int page_index1, page_index2; 346 unsigned int page_index1, page_index2;
289 - unsigned int h;  
290 - 347 + unsigned int h, n1;
  348 + TranslationBlock *tb1, *tb2;
  349 +
291 /* remove the TB from the hash list */ 350 /* remove the TB from the hash list */
292 h = tb_hash_func(tb->pc); 351 h = tb_hash_func(tb->pc);
293 tb_remove(&tb_hash[h], tb, 352 tb_remove(&tb_hash[h], tb,
@@ -305,6 +364,24 @@ static inline void tb_invalidate(TranslationBlock *tb, int parity) @@ -305,6 +364,24 @@ static inline void tb_invalidate(TranslationBlock *tb, int parity)
305 tb_remove(&p->first_tb, tb, 364 tb_remove(&p->first_tb, tb,
306 offsetof(TranslationBlock, page_next[page_index2 & 1])); 365 offsetof(TranslationBlock, page_next[page_index2 & 1]));
307 } 366 }
  367 +
  368 + /* suppress this TB from the two jump lists */
  369 + tb_jmp_remove(tb, 0);
  370 + tb_jmp_remove(tb, 1);
  371 +
  372 + /* suppress any remaining jumps to this TB */
  373 + tb1 = tb->jmp_first;
  374 + for(;;) {
  375 + n1 = (long)tb1 & 3;
  376 + if (n1 == 2)
  377 + break;
  378 + tb1 = (TranslationBlock *)((long)tb1 & ~3);
  379 + tb2 = tb1->jmp_next[n1];
  380 + tb_reset_jump(tb1, n1);
  381 + tb1->jmp_next[n1] = NULL;
  382 + tb1 = tb2;
  383 + }
  384 + tb->jmp_first = (TranslationBlock *)((long)tb | 2); /* fail safe */
308 } 385 }
309 386
310 /* invalidate all TBs which intersect with the target page starting at addr */ 387 /* invalidate all TBs which intersect with the target page starting at addr */
@@ -367,27 +444,39 @@ static inline void tb_alloc_page(TranslationBlock *tb, unsigned int page_index) @@ -367,27 +444,39 @@ static inline void tb_alloc_page(TranslationBlock *tb, unsigned int page_index)
367 444
368 /* Allocate a new translation block. Flush the translation buffer if 445 /* Allocate a new translation block. Flush the translation buffer if
369 too many translation blocks or too much generated code. */ 446 too many translation blocks or too much generated code. */
370 -TranslationBlock *tb_alloc(unsigned long pc,  
371 - unsigned long size) 447 +TranslationBlock *tb_alloc(unsigned long pc)
372 { 448 {
373 TranslationBlock *tb; 449 TranslationBlock *tb;
374 - unsigned int page_index1, page_index2;  
375 450
376 if (nb_tbs >= CODE_GEN_MAX_BLOCKS || 451 if (nb_tbs >= CODE_GEN_MAX_BLOCKS ||
377 (code_gen_ptr - code_gen_buffer) >= CODE_GEN_BUFFER_MAX_SIZE) 452 (code_gen_ptr - code_gen_buffer) >= CODE_GEN_BUFFER_MAX_SIZE)
378 - tb_flush(); 453 + return NULL;
379 tb = &tbs[nb_tbs++]; 454 tb = &tbs[nb_tbs++];
380 tb->pc = pc; 455 tb->pc = pc;
381 - tb->size = size; 456 + return tb;
  457 +}
  458 +
  459 +/* link the tb with the other TBs */
  460 +void tb_link(TranslationBlock *tb)
  461 +{
  462 + unsigned int page_index1, page_index2;
382 463
383 /* add in the page list */ 464 /* add in the page list */
384 - page_index1 = pc >> TARGET_PAGE_BITS; 465 + page_index1 = tb->pc >> TARGET_PAGE_BITS;
385 tb_alloc_page(tb, page_index1); 466 tb_alloc_page(tb, page_index1);
386 - page_index2 = (pc + size - 1) >> TARGET_PAGE_BITS; 467 + page_index2 = (tb->pc + tb->size - 1) >> TARGET_PAGE_BITS;
387 if (page_index2 != page_index1) { 468 if (page_index2 != page_index1) {
388 tb_alloc_page(tb, page_index2); 469 tb_alloc_page(tb, page_index2);
389 } 470 }
390 - return tb; 471 + tb->jmp_first = (TranslationBlock *)((long)tb | 2);
  472 + tb->jmp_next[0] = NULL;
  473 + tb->jmp_next[1] = NULL;
  474 +
  475 + /* init original jump addresses */
  476 + if (tb->tb_next_offset[0] != 0xffff)
  477 + tb_reset_jump(tb, 0);
  478 + if (tb->tb_next_offset[1] != 0xffff)
  479 + tb_reset_jump(tb, 1);
391 } 480 }
392 481
393 /* called from signal handler: invalidate the code and unprotect the 482 /* called from signal handler: invalidate the code and unprotect the
exec.h 0 โ†’ 100644
  1 +/*
  2 + * internal execution defines for qemu
  3 + *
  4 + * Copyright (c) 2003 Fabrice Bellard
  5 + *
  6 + * This library is free software; you can redistribute it and/or
  7 + * modify it under the terms of the GNU Lesser General Public
  8 + * License as published by the Free Software Foundation; either
  9 + * version 2 of the License, or (at your option) any later version.
  10 + *
  11 + * This library is distributed in the hope that it will be useful,
  12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14 + * Lesser General Public License for more details.
  15 + *
  16 + * You should have received a copy of the GNU Lesser General Public
  17 + * License along with this library; if not, write to the Free Software
  18 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  19 + */
  20 +
  21 +#define GEN_FLAG_CODE32_SHIFT 0
  22 +#define GEN_FLAG_ADDSEG_SHIFT 1
  23 +#define GEN_FLAG_SS32_SHIFT 2
  24 +#define GEN_FLAG_VM_SHIFT 3
  25 +#define GEN_FLAG_ST_SHIFT 4
  26 +#define GEN_FLAG_CPL_SHIFT 7
  27 +#define GEN_FLAG_IOPL_SHIFT 9
  28 +#define GEN_FLAG_TF_SHIFT 11
  29 +
  30 +struct TranslationBlock;
  31 +int cpu_x86_gen_code(uint8_t *gen_code_buf, int max_code_size,
  32 + int *gen_code_size_ptr,
  33 + uint8_t *pc_start, uint8_t *cs_base, int flags,
  34 + int *code_size_ptr, struct TranslationBlock *tb);
  35 +void cpu_x86_tblocks_init(void);
  36 +void page_init(void);
  37 +int page_unprotect(unsigned long address);
  38 +
  39 +#define CODE_GEN_MAX_SIZE 65536
  40 +#define CODE_GEN_ALIGN 16 /* must be >= of the size of a icache line */
  41 +
  42 +#define CODE_GEN_HASH_BITS 15
  43 +#define CODE_GEN_HASH_SIZE (1 << CODE_GEN_HASH_BITS)
  44 +
  45 +/* maximum total translate dcode allocated */
  46 +#define CODE_GEN_BUFFER_SIZE (2048 * 1024)
  47 +//#define CODE_GEN_BUFFER_SIZE (128 * 1024)
  48 +
  49 +#if defined(__powerpc__)
  50 +#define USE_DIRECT_JUMP
  51 +#endif
  52 +
  53 +typedef struct TranslationBlock {
  54 + unsigned long pc; /* simulated PC corresponding to this block (EIP + CS base) */
  55 + unsigned long cs_base; /* CS base for this block */
  56 + unsigned int flags; /* flags defining in which context the code was generated */
  57 + uint16_t size; /* size of target code for this block (1 <=
  58 + size <= TARGET_PAGE_SIZE) */
  59 + uint8_t *tc_ptr; /* pointer to the translated code */
  60 + struct TranslationBlock *hash_next; /* next matching block */
  61 + struct TranslationBlock *page_next[2]; /* next blocks in even/odd page */
  62 + /* the following data are used to directly call another TB from
  63 + the code of this one. */
  64 + uint16_t tb_next_offset[2]; /* offset of original jump target */
  65 +#ifdef USE_DIRECT_JUMP
  66 + uint16_t tb_jmp_offset[2]; /* offset of jump instruction */
  67 +#else
  68 + uint8_t *tb_next[2]; /* address of jump generated code */
  69 +#endif
  70 + /* list of TBs jumping to this one. This is a circular list using
  71 + the two least significant bits of the pointers to tell what is
  72 + the next pointer: 0 = jmp_next[0], 1 = jmp_next[1], 2 =
  73 + jmp_first */
  74 + struct TranslationBlock *jmp_next[2];
  75 + struct TranslationBlock *jmp_first;
  76 +} TranslationBlock;
  77 +
  78 +static inline unsigned int tb_hash_func(unsigned long pc)
  79 +{
  80 + return pc & (CODE_GEN_HASH_SIZE - 1);
  81 +}
  82 +
  83 +TranslationBlock *tb_alloc(unsigned long pc);
  84 +void tb_flush(void);
  85 +void tb_link(TranslationBlock *tb);
  86 +
  87 +extern TranslationBlock *tb_hash[CODE_GEN_HASH_SIZE];
  88 +
  89 +extern uint8_t code_gen_buffer[CODE_GEN_BUFFER_SIZE];
  90 +extern uint8_t *code_gen_ptr;
  91 +
  92 +/* find a translation block in the translation cache. If not found,
  93 + return NULL and the pointer to the last element of the list in pptb */
  94 +static inline TranslationBlock *tb_find(TranslationBlock ***pptb,
  95 + unsigned long pc,
  96 + unsigned long cs_base,
  97 + unsigned int flags)
  98 +{
  99 + TranslationBlock **ptb, *tb;
  100 + unsigned int h;
  101 +
  102 + h = tb_hash_func(pc);
  103 + ptb = &tb_hash[h];
  104 + for(;;) {
  105 + tb = *ptb;
  106 + if (!tb)
  107 + break;
  108 + if (tb->pc == pc && tb->cs_base == cs_base && tb->flags == flags)
  109 + return tb;
  110 + ptb = &tb->hash_next;
  111 + }
  112 + *pptb = ptb;
  113 + return NULL;
  114 +}
  115 +
  116 +#if defined(__powerpc__)
  117 +
  118 +static inline void tb_set_jmp_target(TranslationBlock *tb,
  119 + int n, unsigned long addr)
  120 +{
  121 + uint32_t val, *ptr;
  122 + unsigned long offset;
  123 +
  124 + offset = (unsigned long)(tb->tc_ptr + tb->tb_jmp_offset[n]);
  125 +
  126 + /* patch the branch destination */
  127 + ptr = (uint32_t *)offset;
  128 + val = *ptr;
  129 + val = (val & ~0x03fffffc) | ((addr - offset) & 0x03fffffc);
  130 + *ptr = val;
  131 + /* flush icache */
  132 + asm volatile ("dcbst 0,%0" : : "r"(ptr) : "memory");
  133 + asm volatile ("sync" : : : "memory");
  134 + asm volatile ("icbi 0,%0" : : "r"(ptr) : "memory");
  135 + asm volatile ("sync" : : : "memory");
  136 + asm volatile ("isync" : : : "memory");
  137 +}
  138 +
  139 +#else
  140 +
  141 +/* set the jump target */
  142 +static inline void tb_set_jmp_target(TranslationBlock *tb,
  143 + int n, unsigned long addr)
  144 +{
  145 + tb->tb_next[n] = (void *)addr;
  146 +}
  147 +
  148 +#endif
  149 +
  150 +static inline void tb_add_jump(TranslationBlock *tb, int n,
  151 + TranslationBlock *tb_next)
  152 +{
  153 + /* patch the native jump address */
  154 + tb_set_jmp_target(tb, n, (unsigned long)tb_next->tc_ptr);
  155 +
  156 + /* add in TB jmp circular list */
  157 + tb->jmp_next[n] = tb_next->jmp_first;
  158 + tb_next->jmp_first = (TranslationBlock *)((long)(tb) | (n));
  159 +}
  160 +
  161 +#ifndef offsetof
  162 +#define offsetof(type, field) ((size_t) &((type *)0)->field)
  163 +#endif
  164 +
  165 +#ifdef __powerpc__
  166 +static inline int testandset (int *p)
  167 +{
  168 + int ret;
  169 + __asm__ __volatile__ (
  170 + "0: lwarx %0,0,%1 ;"
  171 + " xor. %0,%3,%0;"
  172 + " bne 1f;"
  173 + " stwcx. %2,0,%1;"
  174 + " bne- 0b;"
  175 + "1: "
  176 + : "=&r" (ret)
  177 + : "r" (p), "r" (1), "r" (0)
  178 + : "cr0", "memory");
  179 + return ret;
  180 +}
  181 +#endif
  182 +
  183 +#ifdef __i386__
  184 +static inline int testandset (int *p)
  185 +{
  186 + char ret;
  187 + long int readval;
  188 +
  189 + __asm__ __volatile__ ("lock; cmpxchgl %3, %1; sete %0"
  190 + : "=q" (ret), "=m" (*p), "=a" (readval)
  191 + : "r" (1), "m" (*p), "a" (0)
  192 + : "memory");
  193 + return ret;
  194 +}
  195 +#endif
  196 +
  197 +#ifdef __s390__
  198 +static inline int testandset (int *p)
  199 +{
  200 + int ret;
  201 +
  202 + __asm__ __volatile__ ("0: cs %0,%1,0(%2)\n"
  203 + " jl 0b"
  204 + : "=&d" (ret)
  205 + : "r" (1), "a" (p), "0" (*p)
  206 + : "cc", "memory" );
  207 + return ret;
  208 +}
  209 +#endif
  210 +
  211 +#ifdef __alpha__
  212 +int testandset (int *p)
  213 +{
  214 + int ret;
  215 + unsigned long one;
  216 +
  217 + __asm__ __volatile__ ("0: mov 1,%2\n"
  218 + " ldl_l %0,%1\n"
  219 + " stl_c %2,%1\n"
  220 + " beq %2,1f\n"
  221 + ".subsection 2\n"
  222 + "1: br 0b\n"
  223 + ".previous"
  224 + : "=r" (ret), "=m" (*p), "=r" (one)
  225 + : "m" (*p));
  226 + return ret;
  227 +}
  228 +#endif
  229 +
  230 +#ifdef __sparc__
  231 +static inline int testandset (int *p)
  232 +{
  233 + int ret;
  234 +
  235 + __asm__ __volatile__("ldstub [%1], %0"
  236 + : "=r" (ret)
  237 + : "r" (p)
  238 + : "memory");
  239 +
  240 + return (ret ? 1 : 0);
  241 +}
  242 +#endif
  243 +
  244 +typedef int spinlock_t;
  245 +
  246 +#define SPIN_LOCK_UNLOCKED 0
  247 +
  248 +static inline void spin_lock(spinlock_t *lock)
  249 +{
  250 + while (testandset(lock));
  251 +}
  252 +
  253 +static inline void spin_unlock(spinlock_t *lock)
  254 +{
  255 + *lock = 0;
  256 +}
  257 +
  258 +static inline int spin_trylock(spinlock_t *lock)
  259 +{
  260 + return !testandset(lock);
  261 +}
  262 +
  263 +extern spinlock_t tb_lock;
  264 +
op-i386.c
@@ -709,7 +709,44 @@ void OPPROTO op_cmpxchg8b(void) @@ -709,7 +709,44 @@ void OPPROTO op_cmpxchg8b(void)
709 FORCE_RET(); 709 FORCE_RET();
710 } 710 }
711 711
712 -/* string ops */ 712 +#if defined(__powerpc__)
  713 +
  714 +/* on PowerPC we patch the jump instruction directly */
  715 +#define JUMP_TB(tbparam, n, eip)\
  716 +do {\
  717 + static void __attribute__((unused)) *__op_label ## n = &&label ## n;\
  718 + asm volatile ("b %0" : : "i" (&__op_jmp ## n));\
  719 +label ## n:\
  720 + T0 = (long)(tbparam) + (n);\
  721 + EIP = eip;\
  722 +} while (0)
  723 +
  724 +#else
  725 +
  726 +/* jump to next block operations (more portable code, does not need
  727 + cache flushing, but slower because of indirect jump) */
  728 +#define JUMP_TB(tbparam, n, eip)\
  729 +do {\
  730 + static void __attribute__((unused)) *__op_label ## n = &&label ## n;\
  731 + goto *((TranslationBlock *)tbparam)->tb_next[n];\
  732 +label ## n:\
  733 + T0 = (long)(tbparam) + (n);\
  734 + EIP = eip;\
  735 +} while (0)
  736 +
  737 +#endif
  738 +
  739 +void OPPROTO op_jmp_tb_next(void)
  740 +{
  741 + JUMP_TB(PARAM1, 0, PARAM2);
  742 +}
  743 +
  744 +void OPPROTO op_movl_T0_0(void)
  745 +{
  746 + T0 = 0;
  747 +}
  748 +
  749 +/* multiple size ops */
713 750
714 #define ldul ldl 751 #define ldul ldl
715 752
@@ -1199,90 +1236,15 @@ void OPPROTO op_lar(void) @@ -1199,90 +1236,15 @@ void OPPROTO op_lar(void)
1199 1236
1200 /* flags handling */ 1237 /* flags handling */
1201 1238
1202 -/* slow jumps cases (compute x86 flags) */  
1203 -void OPPROTO op_jo_cc(void)  
1204 -{  
1205 - int eflags;  
1206 - eflags = cc_table[CC_OP].compute_all();  
1207 - if (eflags & CC_O)  
1208 - EIP = PARAM1;  
1209 - else  
1210 - EIP = PARAM2;  
1211 - FORCE_RET();  
1212 -}  
1213 -  
1214 -void OPPROTO op_jb_cc(void)  
1215 -{  
1216 - if (cc_table[CC_OP].compute_c())  
1217 - EIP = PARAM1;  
1218 - else  
1219 - EIP = PARAM2;  
1220 - FORCE_RET();  
1221 -}  
1222 -  
1223 -void OPPROTO op_jz_cc(void)  
1224 -{  
1225 - int eflags;  
1226 - eflags = cc_table[CC_OP].compute_all();  
1227 - if (eflags & CC_Z)  
1228 - EIP = PARAM1;  
1229 - else  
1230 - EIP = PARAM2;  
1231 - FORCE_RET();  
1232 -}  
1233 -  
1234 -void OPPROTO op_jbe_cc(void) 1239 +/* slow jumps cases : in order to avoid calling a function with a
  1240 + pointer (which can generate a stack frame on PowerPC), we use
  1241 + op_setcc to set T0 and then call op_jcc. */
  1242 +void OPPROTO op_jcc(void)
1235 { 1243 {
1236 - int eflags;  
1237 - eflags = cc_table[CC_OP].compute_all();  
1238 - if (eflags & (CC_Z | CC_C))  
1239 - EIP = PARAM1;  
1240 - else  
1241 - EIP = PARAM2;  
1242 - FORCE_RET();  
1243 -}  
1244 -  
1245 -void OPPROTO op_js_cc(void)  
1246 -{  
1247 - int eflags;  
1248 - eflags = cc_table[CC_OP].compute_all();  
1249 - if (eflags & CC_S)  
1250 - EIP = PARAM1;  
1251 - else  
1252 - EIP = PARAM2;  
1253 - FORCE_RET();  
1254 -}  
1255 -  
1256 -void OPPROTO op_jp_cc(void)  
1257 -{  
1258 - int eflags;  
1259 - eflags = cc_table[CC_OP].compute_all();  
1260 - if (eflags & CC_P)  
1261 - EIP = PARAM1;  
1262 - else  
1263 - EIP = PARAM2;  
1264 - FORCE_RET();  
1265 -}  
1266 -  
1267 -void OPPROTO op_jl_cc(void)  
1268 -{  
1269 - int eflags;  
1270 - eflags = cc_table[CC_OP].compute_all();  
1271 - if ((eflags ^ (eflags >> 4)) & 0x80)  
1272 - EIP = PARAM1;  
1273 - else  
1274 - EIP = PARAM2;  
1275 - FORCE_RET();  
1276 -}  
1277 -  
1278 -void OPPROTO op_jle_cc(void)  
1279 -{  
1280 - int eflags;  
1281 - eflags = cc_table[CC_OP].compute_all();  
1282 - if (((eflags ^ (eflags >> 4)) & 0x80) || (eflags & CC_Z))  
1283 - EIP = PARAM1; 1244 + if (T0)
  1245 + JUMP_TB(PARAM1, 0, PARAM2);
1284 else 1246 else
1285 - EIP = PARAM2; 1247 + JUMP_TB(PARAM1, 1, PARAM3);
1286 FORCE_RET(); 1248 FORCE_RET();
1287 } 1249 }
1288 1250
opc-i386.h
@@ -231,18 +231,20 @@ DEF(jmp_T0, 0) @@ -231,18 +231,20 @@ DEF(jmp_T0, 0)
231 DEF(jmp_im, 1) 231 DEF(jmp_im, 1)
232 DEF(int_im, 2) 232 DEF(int_im, 2)
233 DEF(raise_exception, 1) 233 DEF(raise_exception, 1)
234 -DEF(into, 0) 234 +DEF(into, 1)
235 DEF(cli, 0) 235 DEF(cli, 0)
236 DEF(sti, 0) 236 DEF(sti, 0)
237 DEF(boundw, 0) 237 DEF(boundw, 0)
238 DEF(boundl, 0) 238 DEF(boundl, 0)
239 DEF(cmpxchg8b, 0) 239 DEF(cmpxchg8b, 0)
240 -DEF(jb_subb, 2)  
241 -DEF(jz_subb, 2)  
242 -DEF(jbe_subb, 2)  
243 -DEF(js_subb, 2)  
244 -DEF(jl_subb, 2)  
245 -DEF(jle_subb, 2) 240 +DEF(jmp_tb_next, 2)
  241 +DEF(movl_T0_0, 0)
  242 +DEF(jb_subb, 3)
  243 +DEF(jz_subb, 3)
  244 +DEF(jbe_subb, 3)
  245 +DEF(js_subb, 3)
  246 +DEF(jl_subb, 3)
  247 +DEF(jle_subb, 3)
246 DEF(setb_T0_subb, 0) 248 DEF(setb_T0_subb, 0)
247 DEF(setz_T0_subb, 0) 249 DEF(setz_T0_subb, 0)
248 DEF(setbe_T0_subb, 0) 250 DEF(setbe_T0_subb, 0)
@@ -314,12 +316,12 @@ DEF(insb_a16, 0) @@ -314,12 +316,12 @@ DEF(insb_a16, 0)
314 DEF(rep_insb_a16, 0) 316 DEF(rep_insb_a16, 0)
315 DEF(outb_T0_T1, 0) 317 DEF(outb_T0_T1, 0)
316 DEF(inb_T0_T1, 0) 318 DEF(inb_T0_T1, 0)
317 -DEF(jb_subw, 2)  
318 -DEF(jz_subw, 2)  
319 -DEF(jbe_subw, 2)  
320 -DEF(js_subw, 2)  
321 -DEF(jl_subw, 2)  
322 -DEF(jle_subw, 2) 319 +DEF(jb_subw, 3)
  320 +DEF(jz_subw, 3)
  321 +DEF(jbe_subw, 3)
  322 +DEF(js_subw, 3)
  323 +DEF(jl_subw, 3)
  324 +DEF(jle_subw, 3)
323 DEF(loopnzw, 2) 325 DEF(loopnzw, 2)
324 DEF(loopzw, 2) 326 DEF(loopzw, 2)
325 DEF(loopw, 2) 327 DEF(loopw, 2)
@@ -405,12 +407,12 @@ DEF(insw_a16, 0) @@ -405,12 +407,12 @@ DEF(insw_a16, 0)
405 DEF(rep_insw_a16, 0) 407 DEF(rep_insw_a16, 0)
406 DEF(outw_T0_T1, 0) 408 DEF(outw_T0_T1, 0)
407 DEF(inw_T0_T1, 0) 409 DEF(inw_T0_T1, 0)
408 -DEF(jb_subl, 2)  
409 -DEF(jz_subl, 2)  
410 -DEF(jbe_subl, 2)  
411 -DEF(js_subl, 2)  
412 -DEF(jl_subl, 2)  
413 -DEF(jle_subl, 2) 410 +DEF(jb_subl, 3)
  411 +DEF(jz_subl, 3)
  412 +DEF(jbe_subl, 3)
  413 +DEF(js_subl, 3)
  414 +DEF(jl_subl, 3)
  415 +DEF(jle_subl, 3)
414 DEF(loopnzl, 2) 416 DEF(loopnzl, 2)
415 DEF(loopzl, 2) 417 DEF(loopzl, 2)
416 DEF(loopl, 2) 418 DEF(loopl, 2)
@@ -536,14 +538,7 @@ DEF(movl_A0_seg, 1) @@ -536,14 +538,7 @@ DEF(movl_A0_seg, 1)
536 DEF(addl_A0_seg, 1) 538 DEF(addl_A0_seg, 1)
537 DEF(lsl, 0) 539 DEF(lsl, 0)
538 DEF(lar, 0) 540 DEF(lar, 0)
539 -DEF(jo_cc, 2)  
540 -DEF(jb_cc, 2)  
541 -DEF(jz_cc, 2)  
542 -DEF(jbe_cc, 2)  
543 -DEF(js_cc, 2)  
544 -DEF(jp_cc, 2)  
545 -DEF(jl_cc, 2)  
546 -DEF(jle_cc, 2) 541 +DEF(jcc, 3)
547 DEF(seto_T0_cc, 0) 542 DEF(seto_T0_cc, 0)
548 DEF(setb_T0_cc, 0) 543 DEF(setb_T0_cc, 0)
549 DEF(setz_T0_cc, 0) 544 DEF(setz_T0_cc, 0)
ops_template.h
@@ -238,18 +238,18 @@ void OPPROTO glue(op_jb_sub, SUFFIX)(void) @@ -238,18 +238,18 @@ void OPPROTO glue(op_jb_sub, SUFFIX)(void)
238 src2 = CC_SRC - CC_DST; 238 src2 = CC_SRC - CC_DST;
239 239
240 if ((DATA_TYPE)src1 < (DATA_TYPE)src2) 240 if ((DATA_TYPE)src1 < (DATA_TYPE)src2)
241 - EIP = PARAM1; 241 + JUMP_TB(PARAM1, 0, PARAM2);
242 else 242 else
243 - EIP = PARAM2; 243 + JUMP_TB(PARAM1, 1, PARAM3);
244 FORCE_RET(); 244 FORCE_RET();
245 } 245 }
246 246
247 void OPPROTO glue(op_jz_sub, SUFFIX)(void) 247 void OPPROTO glue(op_jz_sub, SUFFIX)(void)
248 { 248 {
249 if ((DATA_TYPE)CC_DST == 0) 249 if ((DATA_TYPE)CC_DST == 0)
250 - EIP = PARAM1; 250 + JUMP_TB(PARAM1, 0, PARAM2);
251 else 251 else
252 - EIP = PARAM2; 252 + JUMP_TB(PARAM1, 1, PARAM3);
253 FORCE_RET(); 253 FORCE_RET();
254 } 254 }
255 255
@@ -260,18 +260,18 @@ void OPPROTO glue(op_jbe_sub, SUFFIX)(void) @@ -260,18 +260,18 @@ void OPPROTO glue(op_jbe_sub, SUFFIX)(void)
260 src2 = CC_SRC - CC_DST; 260 src2 = CC_SRC - CC_DST;
261 261
262 if ((DATA_TYPE)src1 <= (DATA_TYPE)src2) 262 if ((DATA_TYPE)src1 <= (DATA_TYPE)src2)
263 - EIP = PARAM1; 263 + JUMP_TB(PARAM1, 0, PARAM2);
264 else 264 else
265 - EIP = PARAM2; 265 + JUMP_TB(PARAM1, 1, PARAM3);
266 FORCE_RET(); 266 FORCE_RET();
267 } 267 }
268 268
269 void OPPROTO glue(op_js_sub, SUFFIX)(void) 269 void OPPROTO glue(op_js_sub, SUFFIX)(void)
270 { 270 {
271 if (CC_DST & SIGN_MASK) 271 if (CC_DST & SIGN_MASK)
272 - EIP = PARAM1; 272 + JUMP_TB(PARAM1, 0, PARAM2);
273 else 273 else
274 - EIP = PARAM2; 274 + JUMP_TB(PARAM1, 1, PARAM3);
275 FORCE_RET(); 275 FORCE_RET();
276 } 276 }
277 277
@@ -282,9 +282,9 @@ void OPPROTO glue(op_jl_sub, SUFFIX)(void) @@ -282,9 +282,9 @@ void OPPROTO glue(op_jl_sub, SUFFIX)(void)
282 src2 = CC_SRC - CC_DST; 282 src2 = CC_SRC - CC_DST;
283 283
284 if ((DATA_STYPE)src1 < (DATA_STYPE)src2) 284 if ((DATA_STYPE)src1 < (DATA_STYPE)src2)
285 - EIP = PARAM1; 285 + JUMP_TB(PARAM1, 0, PARAM2);
286 else 286 else
287 - EIP = PARAM2; 287 + JUMP_TB(PARAM1, 1, PARAM3);
288 FORCE_RET(); 288 FORCE_RET();
289 } 289 }
290 290
@@ -295,9 +295,9 @@ void OPPROTO glue(op_jle_sub, SUFFIX)(void) @@ -295,9 +295,9 @@ void OPPROTO glue(op_jle_sub, SUFFIX)(void)
295 src2 = CC_SRC - CC_DST; 295 src2 = CC_SRC - CC_DST;
296 296
297 if ((DATA_STYPE)src1 <= (DATA_STYPE)src2) 297 if ((DATA_STYPE)src1 <= (DATA_STYPE)src2)
298 - EIP = PARAM1; 298 + JUMP_TB(PARAM1, 0, PARAM2);
299 else 299 else
300 - EIP = PARAM2; 300 + JUMP_TB(PARAM1, 1, PARAM3);
301 FORCE_RET(); 301 FORCE_RET();
302 } 302 }
303 303
translate-i386.c
@@ -31,11 +31,15 @@ @@ -31,11 +31,15 @@
31 31
32 #define IN_OP_I386 32 #define IN_OP_I386
33 #include "cpu-i386.h" 33 #include "cpu-i386.h"
  34 +#include "exec.h"
34 35
35 /* XXX: move that elsewhere */ 36 /* XXX: move that elsewhere */
36 static uint16_t *gen_opc_ptr; 37 static uint16_t *gen_opc_ptr;
37 static uint32_t *gen_opparam_ptr; 38 static uint32_t *gen_opparam_ptr;
38 int __op_param1, __op_param2, __op_param3; 39 int __op_param1, __op_param2, __op_param3;
  40 +#ifdef USE_DIRECT_JUMP
  41 +int __op_jmp0, __op_jmp1;
  42 +#endif
39 43
40 #ifdef __i386__ 44 #ifdef __i386__
41 static inline void flush_icache_range(unsigned long start, unsigned long stop) 45 static inline void flush_icache_range(unsigned long start, unsigned long stop)
@@ -67,14 +71,14 @@ static void inline flush_icache_range(unsigned long start, unsigned long stop) @@ -67,14 +71,14 @@ static void inline flush_icache_range(unsigned long start, unsigned long stop)
67 stop = (stop + MIN_CACHE_LINE_SIZE - 1) & ~(MIN_CACHE_LINE_SIZE - 1); 71 stop = (stop + MIN_CACHE_LINE_SIZE - 1) & ~(MIN_CACHE_LINE_SIZE - 1);
68 72
69 for (p = start; p < stop; p += MIN_CACHE_LINE_SIZE) { 73 for (p = start; p < stop; p += MIN_CACHE_LINE_SIZE) {
70 - asm ("dcbst 0,%0;" : : "r"(p) : "memory"); 74 + asm volatile ("dcbst 0,%0" : : "r"(p) : "memory");
71 } 75 }
72 - asm ("sync"); 76 + asm volatile ("sync" : : : "memory");
73 for (p = start; p < stop; p += MIN_CACHE_LINE_SIZE) { 77 for (p = start; p < stop; p += MIN_CACHE_LINE_SIZE) {
74 - asm ("icbi 0,%0; sync;" : : "r"(p) : "memory"); 78 + asm volatile ("icbi 0,%0" : : "r"(p) : "memory");
75 } 79 }
76 - asm ("sync");  
77 - asm ("isync"); 80 + asm volatile ("sync" : : : "memory");
  81 + asm volatile ("isync" : : : "memory");
78 } 82 }
79 #endif 83 #endif
80 84
@@ -129,6 +133,7 @@ typedef struct DisasContext { @@ -129,6 +133,7 @@ typedef struct DisasContext {
129 int cpl; 133 int cpl;
130 int iopl; 134 int iopl;
131 int tf; /* TF cpu flag */ 135 int tf; /* TF cpu flag */
  136 + TranslationBlock *tb;
132 } DisasContext; 137 } DisasContext;
133 138
134 /* i386 arith/logic operations */ 139 /* i386 arith/logic operations */
@@ -192,6 +197,7 @@ enum { @@ -192,6 +197,7 @@ enum {
192 typedef void (GenOpFunc)(void); 197 typedef void (GenOpFunc)(void);
193 typedef void (GenOpFunc1)(long); 198 typedef void (GenOpFunc1)(long);
194 typedef void (GenOpFunc2)(long, long); 199 typedef void (GenOpFunc2)(long, long);
  200 +typedef void (GenOpFunc3)(long, long, long);
195 201
196 static GenOpFunc *gen_op_mov_reg_T0[3][8] = { 202 static GenOpFunc *gen_op_mov_reg_T0[3][8] = {
197 [OT_BYTE] = { 203 [OT_BYTE] = {
@@ -699,18 +705,7 @@ enum { @@ -699,18 +705,7 @@ enum {
699 JCC_LE, 705 JCC_LE,
700 }; 706 };
701 707
702 -static GenOpFunc2 *gen_jcc_slow[8] = {  
703 - gen_op_jo_cc,  
704 - gen_op_jb_cc,  
705 - gen_op_jz_cc,  
706 - gen_op_jbe_cc,  
707 - gen_op_js_cc,  
708 - gen_op_jp_cc,  
709 - gen_op_jl_cc,  
710 - gen_op_jle_cc,  
711 -};  
712 -  
713 -static GenOpFunc2 *gen_jcc_sub[3][8] = { 708 +static GenOpFunc3 *gen_jcc_sub[3][8] = {
714 [OT_BYTE] = { 709 [OT_BYTE] = {
715 NULL, 710 NULL,
716 gen_op_jb_subb, 711 gen_op_jb_subb,
@@ -1090,8 +1085,9 @@ static inline uint32_t insn_get(DisasContext *s, int ot) @@ -1090,8 +1085,9 @@ static inline uint32_t insn_get(DisasContext *s, int ot)
1090 1085
1091 static inline void gen_jcc(DisasContext *s, int b, int val, int next_eip) 1086 static inline void gen_jcc(DisasContext *s, int b, int val, int next_eip)
1092 { 1087 {
  1088 + TranslationBlock *tb;
1093 int inv, jcc_op; 1089 int inv, jcc_op;
1094 - GenOpFunc2 *func; 1090 + GenOpFunc3 *func;
1095 1091
1096 inv = b & 1; 1092 inv = b & 1;
1097 jcc_op = (b >> 1) & 7; 1093 jcc_op = (b >> 1) & 7;
@@ -1101,8 +1097,6 @@ static inline void gen_jcc(DisasContext *s, int b, int val, int next_eip) @@ -1101,8 +1097,6 @@ static inline void gen_jcc(DisasContext *s, int b, int val, int next_eip)
1101 case CC_OP_SUBW: 1097 case CC_OP_SUBW:
1102 case CC_OP_SUBL: 1098 case CC_OP_SUBL:
1103 func = gen_jcc_sub[s->cc_op - CC_OP_SUBB][jcc_op]; 1099 func = gen_jcc_sub[s->cc_op - CC_OP_SUBB][jcc_op];
1104 - if (!func)  
1105 - goto slow_jcc;  
1106 break; 1100 break;
1107 1101
1108 /* some jumps are easy to compute */ 1102 /* some jumps are easy to compute */
@@ -1138,21 +1132,30 @@ static inline void gen_jcc(DisasContext *s, int b, int val, int next_eip) @@ -1138,21 +1132,30 @@ static inline void gen_jcc(DisasContext *s, int b, int val, int next_eip)
1138 func = gen_jcc_sub[(s->cc_op - CC_OP_ADDB) % 3][jcc_op]; 1132 func = gen_jcc_sub[(s->cc_op - CC_OP_ADDB) % 3][jcc_op];
1139 break; 1133 break;
1140 default: 1134 default:
1141 - goto slow_jcc; 1135 + func = NULL;
  1136 + break;
1142 } 1137 }
1143 break; 1138 break;
1144 default: 1139 default:
1145 - slow_jcc:  
1146 - if (s->cc_op != CC_OP_DYNAMIC)  
1147 - gen_op_set_cc_op(s->cc_op);  
1148 - func = gen_jcc_slow[jcc_op]; 1140 + func = NULL;
1149 break; 1141 break;
1150 } 1142 }
  1143 +
  1144 + if (s->cc_op != CC_OP_DYNAMIC)
  1145 + gen_op_set_cc_op(s->cc_op);
  1146 +
  1147 + if (!func) {
  1148 + gen_setcc_slow[jcc_op]();
  1149 + func = gen_op_jcc;
  1150 + }
  1151 +
  1152 + tb = s->tb;
1151 if (!inv) { 1153 if (!inv) {
1152 - func(val, next_eip); 1154 + func((long)tb, val, next_eip);
1153 } else { 1155 } else {
1154 - func(next_eip, val); 1156 + func((long)tb, next_eip, val);
1155 } 1157 }
  1158 + s->is_jmp = 3;
1156 } 1159 }
1157 1160
1158 static void gen_setcc(DisasContext *s, int b) 1161 static void gen_setcc(DisasContext *s, int b)
@@ -1372,6 +1375,18 @@ static void gen_exception(DisasContext *s, int trapno, unsigned int cur_eip) @@ -1372,6 +1375,18 @@ static void gen_exception(DisasContext *s, int trapno, unsigned int cur_eip)
1372 s->is_jmp = 1; 1375 s->is_jmp = 1;
1373 } 1376 }
1374 1377
  1378 +/* generate a jump to eip. No segment change must happen before as a
  1379 + direct call to the next block may occur */
  1380 +static void gen_jmp(DisasContext *s, unsigned int eip)
  1381 +{
  1382 + TranslationBlock *tb = s->tb;
  1383 +
  1384 + if (s->cc_op != CC_OP_DYNAMIC)
  1385 + gen_op_set_cc_op(s->cc_op);
  1386 + gen_op_jmp_tb_next((long)tb, eip);
  1387 + s->is_jmp = 3;
  1388 +}
  1389 +
1375 /* return the next pc address. Return -1 if no insn found. *is_jmp_ptr 1390 /* return the next pc address. Return -1 if no insn found. *is_jmp_ptr
1376 is set to true if the instruction sets the PC (last instruction of 1391 is set to true if the instruction sets the PC (last instruction of
1377 a basic block) */ 1392 a basic block) */
@@ -2964,8 +2979,7 @@ long disas_insn(DisasContext *s, uint8_t *pc_start) @@ -2964,8 +2979,7 @@ long disas_insn(DisasContext *s, uint8_t *pc_start)
2964 val &= 0xffff; 2979 val &= 0xffff;
2965 gen_op_movl_T0_im(next_eip); 2980 gen_op_movl_T0_im(next_eip);
2966 gen_push_T0(s); 2981 gen_push_T0(s);
2967 - gen_op_jmp_im(val);  
2968 - s->is_jmp = 1; 2982 + gen_jmp(s, val);
2969 } 2983 }
2970 break; 2984 break;
2971 case 0x9a: /* lcall im */ 2985 case 0x9a: /* lcall im */
@@ -2996,8 +3010,7 @@ long disas_insn(DisasContext *s, uint8_t *pc_start) @@ -2996,8 +3010,7 @@ long disas_insn(DisasContext *s, uint8_t *pc_start)
2996 val += s->pc - s->cs_base; 3010 val += s->pc - s->cs_base;
2997 if (s->dflag == 0) 3011 if (s->dflag == 0)
2998 val = val & 0xffff; 3012 val = val & 0xffff;
2999 - gen_op_jmp_im(val);  
3000 - s->is_jmp = 1; 3013 + gen_jmp(s, val);
3001 break; 3014 break;
3002 case 0xea: /* ljmp im */ 3015 case 0xea: /* ljmp im */
3003 { 3016 {
@@ -3019,8 +3032,7 @@ long disas_insn(DisasContext *s, uint8_t *pc_start) @@ -3019,8 +3032,7 @@ long disas_insn(DisasContext *s, uint8_t *pc_start)
3019 val += s->pc - s->cs_base; 3032 val += s->pc - s->cs_base;
3020 if (s->dflag == 0) 3033 if (s->dflag == 0)
3021 val = val & 0xffff; 3034 val = val & 0xffff;
3022 - gen_op_jmp_im(val);  
3023 - s->is_jmp = 1; 3035 + gen_jmp(s, val);
3024 break; 3036 break;
3025 case 0x70 ... 0x7f: /* jcc Jb */ 3037 case 0x70 ... 0x7f: /* jcc Jb */
3026 val = (int8_t)insn_get(s, OT_BYTE); 3038 val = (int8_t)insn_get(s, OT_BYTE);
@@ -3037,7 +3049,6 @@ long disas_insn(DisasContext *s, uint8_t *pc_start) @@ -3037,7 +3049,6 @@ long disas_insn(DisasContext *s, uint8_t *pc_start)
3037 if (s->dflag == 0) 3049 if (s->dflag == 0)
3038 val &= 0xffff; 3050 val &= 0xffff;
3039 gen_jcc(s, b, val, next_eip); 3051 gen_jcc(s, b, val, next_eip);
3040 - s->is_jmp = 1;  
3041 break; 3052 break;
3042 3053
3043 case 0x190 ... 0x19f: /* setcc Gv */ 3054 case 0x190 ... 0x19f: /* setcc Gv */
@@ -3393,15 +3404,6 @@ static uint16_t opc_read_flags[NB_OPS] = { @@ -3393,15 +3404,6 @@ static uint16_t opc_read_flags[NB_OPS] = {
3393 3404
3394 [INDEX_op_into] = CC_O, 3405 [INDEX_op_into] = CC_O,
3395 3406
3396 - [INDEX_op_jo_cc] = CC_O,  
3397 - [INDEX_op_jb_cc] = CC_C,  
3398 - [INDEX_op_jz_cc] = CC_Z,  
3399 - [INDEX_op_jbe_cc] = CC_Z | CC_C,  
3400 - [INDEX_op_js_cc] = CC_S,  
3401 - [INDEX_op_jp_cc] = CC_P,  
3402 - [INDEX_op_jl_cc] = CC_O | CC_S,  
3403 - [INDEX_op_jle_cc] = CC_O | CC_S | CC_Z,  
3404 -  
3405 [INDEX_op_jb_subb] = CC_C, 3407 [INDEX_op_jb_subb] = CC_C,
3406 [INDEX_op_jb_subw] = CC_C, 3408 [INDEX_op_jb_subw] = CC_C,
3407 [INDEX_op_jb_subl] = CC_C, 3409 [INDEX_op_jb_subl] = CC_C,
@@ -3730,7 +3732,7 @@ static uint32_t gen_opparam_buf[OPPARAM_BUF_SIZE]; @@ -3730,7 +3732,7 @@ static uint32_t gen_opparam_buf[OPPARAM_BUF_SIZE];
3730 int cpu_x86_gen_code(uint8_t *gen_code_buf, int max_code_size, 3732 int cpu_x86_gen_code(uint8_t *gen_code_buf, int max_code_size,
3731 int *gen_code_size_ptr, 3733 int *gen_code_size_ptr,
3732 uint8_t *pc_start, uint8_t *cs_base, int flags, 3734 uint8_t *pc_start, uint8_t *cs_base, int flags,
3733 - int *code_size_ptr) 3735 + int *code_size_ptr, TranslationBlock *tb)
3734 { 3736 {
3735 DisasContext dc1, *dc = &dc1; 3737 DisasContext dc1, *dc = &dc1;
3736 uint8_t *pc_ptr; 3738 uint8_t *pc_ptr;
@@ -3750,6 +3752,7 @@ int cpu_x86_gen_code(uint8_t *gen_code_buf, int max_code_size, @@ -3750,6 +3752,7 @@ int cpu_x86_gen_code(uint8_t *gen_code_buf, int max_code_size,
3750 dc->tf = (flags >> GEN_FLAG_TF_SHIFT) & 1; 3752 dc->tf = (flags >> GEN_FLAG_TF_SHIFT) & 1;
3751 dc->cc_op = CC_OP_DYNAMIC; 3753 dc->cc_op = CC_OP_DYNAMIC;
3752 dc->cs_base = cs_base; 3754 dc->cs_base = cs_base;
  3755 + dc->tb = tb;
3753 3756
3754 gen_opc_ptr = gen_opc_buf; 3757 gen_opc_ptr = gen_opc_buf;
3755 gen_opc_end = gen_opc_buf + OPC_MAX_SIZE; 3758 gen_opc_end = gen_opc_buf + OPC_MAX_SIZE;
@@ -3776,15 +3779,21 @@ int cpu_x86_gen_code(uint8_t *gen_code_buf, int max_code_size, @@ -3776,15 +3779,21 @@ int cpu_x86_gen_code(uint8_t *gen_code_buf, int max_code_size,
3776 } while (!dc->is_jmp && gen_opc_ptr < gen_opc_end && 3779 } while (!dc->is_jmp && gen_opc_ptr < gen_opc_end &&
3777 (pc_ptr - pc_start) < (TARGET_PAGE_SIZE - 32)); 3780 (pc_ptr - pc_start) < (TARGET_PAGE_SIZE - 32));
3778 /* we must store the eflags state if it is not already done */ 3781 /* we must store the eflags state if it is not already done */
3779 - if (dc->cc_op != CC_OP_DYNAMIC)  
3780 - gen_op_set_cc_op(dc->cc_op);  
3781 - if (dc->is_jmp != 1) {  
3782 - /* we add an additionnal jmp to update the simulated PC */  
3783 - gen_op_jmp_im(ret - (unsigned long)dc->cs_base); 3782 + if (dc->is_jmp != 3) {
  3783 + if (dc->cc_op != CC_OP_DYNAMIC)
  3784 + gen_op_set_cc_op(dc->cc_op);
  3785 + if (dc->is_jmp != 1) {
  3786 + /* we add an additionnal jmp to update the simulated PC */
  3787 + gen_op_jmp_im(ret - (unsigned long)dc->cs_base);
  3788 + }
3784 } 3789 }
3785 if (dc->tf) { 3790 if (dc->tf) {
3786 gen_op_raise_exception(EXCP01_SSTP); 3791 gen_op_raise_exception(EXCP01_SSTP);
3787 } 3792 }
  3793 + if (dc->is_jmp != 3) {
  3794 + /* indicate that the hash table must be used to find the next TB */
  3795 + gen_op_movl_T0_0();
  3796 + }
3788 3797
3789 *gen_opc_ptr = INDEX_op_end; 3798 *gen_opc_ptr = INDEX_op_end;
3790 3799
@@ -3814,8 +3823,17 @@ int cpu_x86_gen_code(uint8_t *gen_code_buf, int max_code_size, @@ -3814,8 +3823,17 @@ int cpu_x86_gen_code(uint8_t *gen_code_buf, int max_code_size,
3814 #endif 3823 #endif
3815 3824
3816 /* generate machine code */ 3825 /* generate machine code */
3817 - gen_code_size = dyngen_code(gen_code_buf, gen_opc_buf, gen_opparam_buf); 3826 + tb->tb_next_offset[0] = 0xffff;
  3827 + tb->tb_next_offset[1] = 0xffff;
  3828 + gen_code_size = dyngen_code(gen_code_buf, tb->tb_next_offset,
  3829 +#ifdef USE_DIRECT_JUMP
  3830 + tb->tb_jmp_offset,
  3831 +#else
  3832 + NULL,
  3833 +#endif
  3834 + gen_opc_buf, gen_opparam_buf);
3818 flush_icache_range((unsigned long)gen_code_buf, (unsigned long)(gen_code_buf + gen_code_size)); 3835 flush_icache_range((unsigned long)gen_code_buf, (unsigned long)(gen_code_buf + gen_code_size));
  3836 +
3819 *gen_code_size_ptr = gen_code_size; 3837 *gen_code_size_ptr = gen_code_size;
3820 *code_size_ptr = pc_ptr - pc_start; 3838 *code_size_ptr = pc_ptr - pc_start;
3821 #ifdef DEBUG_DISAS 3839 #ifdef DEBUG_DISAS