Commit d4e8164f7e9342d692c1d6f1c848ed05f8007ece

Authored by bellard
1 parent 08351fb3

direct chaining for PowerPC and i386


git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@183 c046a42c-6fe2-441c-8c8c-71466251a162
dyngen.c
... ... @@ -170,7 +170,16 @@ void elf_swap_phdr(struct elf_phdr *h)
170 170 swabls(&h->p_align); /* Segment alignment */
171 171 }
172 172  
  173 +/* ELF file info */
173 174 int do_swap;
  175 +struct elf_shdr *shdr;
  176 +struct elfhdr ehdr;
  177 +ElfW(Sym) *symtab;
  178 +int nb_syms;
  179 +char *strtab;
  180 +/* data section */
  181 +uint8_t *data_data;
  182 +int data_shndx;
174 183  
175 184 uint16_t get16(uint16_t *p)
176 185 {
... ... @@ -270,7 +279,7 @@ int strstart(const char *str, const char *val, const char **ptr)
270 279 /* generate op code */
271 280 void gen_code(const char *name, host_ulong offset, host_ulong size,
272 281 FILE *outfile, uint8_t *text, ELF_RELOC *relocs, int nb_relocs, int reloc_sh_type,
273   - ElfW(Sym) *symtab, char *strtab, int gen_switch)
  282 + int gen_switch)
274 283 {
275 284 int copy_size = 0;
276 285 uint8_t *p_start, *p_end;
... ... @@ -291,13 +300,16 @@ void gen_code(const char *name, host_ulong offset, host_ulong size,
291 300 switch(ELF_ARCH) {
292 301 case EM_386:
293 302 {
294   - uint8_t *p;
295   - p = p_end - 1;
296   - if (p == p_start)
  303 + int len;
  304 + len = p_end - p_start;
  305 + if (len == 0)
297 306 error("empty code for %s", name);
298   - if (p[0] != 0xc3)
299   - error("ret expected at the end of %s", name);
300   - copy_size = p - p_start;
  307 + if (p_end[-1] == 0xc3) {
  308 + len--;
  309 + } else {
  310 + error("ret or jmp expected at the end of %s", name);
  311 + }
  312 + copy_size = len;
301 313 }
302 314 break;
303 315 case EM_PPC:
... ... @@ -423,7 +435,7 @@ void gen_code(const char *name, host_ulong offset, host_ulong size,
423 435 sym_name = strtab + symtab[ELFW(R_SYM)(rel->r_info)].st_name;
424 436 if (strstart(sym_name, "__op_param", &p)) {
425 437 n = strtoul(p, NULL, 10);
426   - if (n >= MAX_ARGS)
  438 + if (n > MAX_ARGS)
427 439 error("too many arguments in %s", name);
428 440 args_present[n - 1] = 1;
429 441 }
... ... @@ -459,7 +471,9 @@ void gen_code(const char *name, host_ulong offset, host_ulong size,
459 471 if (rel->r_offset >= start_offset &&
460 472 rel->r_offset < start_offset + copy_size) {
461 473 sym_name = strtab + symtab[ELFW(R_SYM)(rel->r_info)].st_name;
462   - if (*sym_name && !strstart(sym_name, "__op_param", &p)) {
  474 + if (*sym_name &&
  475 + !strstart(sym_name, "__op_param", NULL) &&
  476 + !strstart(sym_name, "__op_jmp", NULL)) {
463 477 #if defined(HOST_SPARC)
464 478 if (sym_name[0] == '.') {
465 479 fprintf(outfile,
... ... @@ -474,6 +488,31 @@ void gen_code(const char *name, host_ulong offset, host_ulong size,
474 488 }
475 489  
476 490 fprintf(outfile, " memcpy(gen_code_ptr, (void *)((char *)&%s+%d), %d);\n", name, start_offset - offset, copy_size);
  491 +
  492 + /* emit code offset information */
  493 + {
  494 + ElfW(Sym) *sym;
  495 + const char *sym_name, *p;
  496 + target_ulong val;
  497 + int n;
  498 +
  499 + for(i = 0, sym = symtab; i < nb_syms; i++, sym++) {
  500 + sym_name = strtab + sym->st_name;
  501 + if (strstart(sym_name, "__op_label", &p)) {
  502 + /* test if the variable refers to a label inside
  503 + the code we are generating */
  504 + if (sym->st_shndx != data_shndx)
  505 + error("__op_labelN symbols must be in .data or .sdata section");
  506 + val = *(target_ulong *)(data_data + sym->st_value);
  507 + if (val >= start_offset && val < start_offset + copy_size) {
  508 + n = strtol(p, NULL, 10);
  509 + fprintf(outfile, " label_offsets[%d] = %d + (gen_code_ptr - gen_code_buf);\n", n, val - start_offset);
  510 + }
  511 + }
  512 + }
  513 + }
  514 +
  515 + /* load parameres in variables */
477 516 for(i = 0; i < nb_args; i++) {
478 517 fprintf(outfile, " param%d = *opparam_ptr++;\n", i + 1);
479 518 }
... ... @@ -519,6 +558,18 @@ void gen_code(const char *name, host_ulong offset, host_ulong size,
519 558 if (rel->r_offset >= start_offset &&
520 559 rel->r_offset < start_offset + copy_size) {
521 560 sym_name = strtab + symtab[ELFW(R_SYM)(rel->r_info)].st_name;
  561 + if (strstart(sym_name, "__op_jmp", &p)) {
  562 + int n;
  563 + n = strtol(p, NULL, 10);
  564 + /* __op_jmp relocations are done at
  565 + runtime to do translated block
  566 + chaining: the offset of the instruction
  567 + needs to be stored */
  568 + fprintf(outfile, " jmp_offsets[%d] = %d + (gen_code_ptr - gen_code_buf);\n",
  569 + n, rel->r_offset - start_offset);
  570 + continue;
  571 + }
  572 +
522 573 if (strstart(sym_name, "__op_param", &p)) {
523 574 snprintf(name, sizeof(name), "param%s", p);
524 575 } else {
... ... @@ -824,11 +875,10 @@ void gen_code(const char *name, host_ulong offset, host_ulong size,
824 875 int load_elf(const char *filename, FILE *outfile, int do_print_enum)
825 876 {
826 877 int fd;
827   - struct elfhdr ehdr;
828   - struct elf_shdr *sec, *shdr, *symtab_sec, *strtab_sec, *text_sec;
829   - int i, j, nb_syms;
830   - ElfW(Sym) *symtab, *sym;
831   - char *shstr, *strtab;
  878 + struct elf_shdr *sec, *symtab_sec, *strtab_sec, *text_sec;
  879 + int i, j;
  880 + ElfW(Sym) *sym;
  881 + char *shstr, *data_name;
832 882 uint8_t *text;
833 883 void *relocs;
834 884 int nb_relocs, reloc_sh_type;
... ... @@ -880,6 +930,17 @@ int load_elf(const char *filename, FILE *outfile, int do_print_enum)
880 930 error("could not find .text section");
881 931 text = load_data(fd, text_sec->sh_offset, text_sec->sh_size);
882 932  
  933 +#if defined(HOST_PPC)
  934 + data_name = ".sdata";
  935 +#else
  936 + data_name = ".data";
  937 +#endif
  938 + sec = find_elf_section(shdr, ehdr.e_shnum, shstr, data_name);
  939 + if (!sec)
  940 + error("could not find %s section", data_name);
  941 + data_shndx = sec - shdr;
  942 + data_data = load_data(fd, sec->sh_offset, sec->sh_size);
  943 +
883 944 /* find text relocations, if any */
884 945 nb_relocs = 0;
885 946 relocs = NULL;
... ... @@ -936,7 +997,7 @@ int load_elf(const char *filename, FILE *outfile, int do_print_enum)
936 997 name = strtab + sym->st_name;
937 998 if (strstart(name, OP_PREFIX, &p)) {
938 999 gen_code(name, sym->st_value, sym->st_size, outfile,
939   - text, relocs, nb_relocs, reloc_sh_type, symtab, strtab, 2);
  1000 + text, relocs, nb_relocs, reloc_sh_type, 2);
940 1001 }
941 1002 }
942 1003 } else {
... ... @@ -963,6 +1024,7 @@ fprintf(outfile,
963 1024 #endif
964 1025 fprintf(outfile,
965 1026 "int dyngen_code(uint8_t *gen_code_buf,\n"
  1027 +" uint16_t *label_offsets, uint16_t *jmp_offsets,\n"
966 1028 " const uint16_t *opc_buf, const uint32_t *opparam_buf)\n"
967 1029 "{\n"
968 1030 " uint8_t *gen_code_ptr;\n"
... ... @@ -1001,7 +1063,7 @@ fprintf(outfile,
1001 1063 if (sym->st_shndx != (text_sec - shdr))
1002 1064 error("invalid section for opcode (0x%x)", sym->st_shndx);
1003 1065 gen_code(name, sym->st_value, sym->st_size, outfile,
1004   - text, relocs, nb_relocs, reloc_sh_type, symtab, strtab, 1);
  1066 + text, relocs, nb_relocs, reloc_sh_type, 1);
1005 1067 }
1006 1068 }
1007 1069  
... ... @@ -1056,7 +1118,7 @@ fprintf(outfile,
1056 1118 if (sym->st_shndx != (text_sec - shdr))
1057 1119 error("invalid section for opcode (0x%x)", sym->st_shndx);
1058 1120 gen_code(name, sym->st_value, sym->st_size, outfile,
1059   - text, relocs, nb_relocs, reloc_sh_type, symtab, strtab, 0);
  1121 + text, relocs, nb_relocs, reloc_sh_type, 0);
1060 1122 }
1061 1123 }
1062 1124 }
... ...
exec-i386.c
... ... @@ -120,7 +120,7 @@ int cpu_x86_exec(CPUX86State *env1)
120 120 TranslationBlock *tb, **ptb;
121 121 uint8_t *tc_ptr, *cs_base, *pc;
122 122 unsigned int flags;
123   -
  123 +
124 124 /* first we save global registers */
125 125 saved_T0 = T0;
126 126 saved_T1 = T1;
... ... @@ -169,6 +169,7 @@ int cpu_x86_exec(CPUX86State *env1)
169 169  
170 170 /* prepare setjmp context for exception handling */
171 171 if (setjmp(env->jmp_env) == 0) {
  172 + T0 = 0; /* force lookup of first TB */
172 173 for(;;) {
173 174 if (env->interrupt_request) {
174 175 raise_exception(EXCP_INTERRUPT);
... ... @@ -209,30 +210,40 @@ int cpu_x86_exec(CPUX86State *env1)
209 210 flags |= (env->eflags & TF_MASK) << (GEN_FLAG_TF_SHIFT - 8);
210 211 cs_base = env->seg_cache[R_CS].base;
211 212 pc = cs_base + env->eip;
  213 + spin_lock(&tb_lock);
212 214 tb = tb_find(&ptb, (unsigned long)pc, (unsigned long)cs_base,
213 215 flags);
214 216 if (!tb) {
215 217 /* if no translated code available, then translate it now */
216   - /* very inefficient but safe: we lock all the cpus
217   - when generating code */
218   - spin_lock(&tb_lock);
  218 + tb = tb_alloc((unsigned long)pc);
  219 + if (!tb) {
  220 + /* flush must be done */
  221 + tb_flush();
  222 + /* cannot fail at this point */
  223 + tb = tb_alloc((unsigned long)pc);
  224 + /* don't forget to invalidate previous TB info */
  225 + ptb = &tb_hash[tb_hash_func((unsigned long)pc)];
  226 + T0 = 0;
  227 + }
219 228 tc_ptr = code_gen_ptr;
  229 + tb->tc_ptr = tc_ptr;
220 230 ret = cpu_x86_gen_code(code_gen_ptr, CODE_GEN_MAX_SIZE,
221 231 &code_gen_size, pc, cs_base, flags,
222   - &code_size);
  232 + &code_size, tb);
223 233 /* if invalid instruction, signal it */
224 234 if (ret != 0) {
  235 + /* NOTE: the tb is allocated but not linked, so we
  236 + can leave it */
225 237 spin_unlock(&tb_lock);
226 238 raise_exception(EXCP06_ILLOP);
227 239 }
228   - tb = tb_alloc((unsigned long)pc, code_size);
229 240 *ptb = tb;
  241 + tb->size = code_size;
230 242 tb->cs_base = (unsigned long)cs_base;
231 243 tb->flags = flags;
232   - tb->tc_ptr = tc_ptr;
233 244 tb->hash_next = NULL;
  245 + tb_link(tb);
234 246 code_gen_ptr = (void *)(((unsigned long)code_gen_ptr + code_gen_size + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
235   - spin_unlock(&tb_lock);
236 247 }
237 248 #ifdef DEBUG_EXEC
238 249 if (loglevel) {
... ... @@ -241,14 +252,21 @@ int cpu_x86_exec(CPUX86State *env1)
241 252 lookup_symbol((void *)tb->pc));
242 253 }
243 254 #endif
244   - /* execute the generated code */
  255 +
  256 + /* see if we can patch the calling TB */
  257 + if (T0 != 0 && !(env->eflags & TF_MASK)) {
  258 + tb_add_jump((TranslationBlock *)(T0 & ~3), T0 & 3, tb);
  259 + }
245 260 tc_ptr = tb->tc_ptr;
  261 + spin_unlock(&tb_lock);
  262 +
  263 + /* execute the generated code */
246 264 gen_func = (void *)tc_ptr;
247 265 #ifdef __sparc__
248 266 __asm__ __volatile__("call %0\n\t"
249 267 " mov %%o7,%%i0"
250 268 : /* no outputs */
251   - : "r" (gen_func)
  269 + : "r" (gen_func)
252 270 : "i0", "i1", "i2", "i3", "i4", "i5");
253 271 #else
254 272 gen_func();
... ...
exec-i386.h
... ... @@ -205,8 +205,10 @@ extern int __op_param1, __op_param2, __op_param3;
205 205 #define PARAM2 ((long)(&__op_param2))
206 206 #define PARAM3 ((long)(&__op_param3))
207 207 #endif
  208 +extern int __op_jmp0, __op_jmp1;
208 209  
209 210 #include "cpu-i386.h"
  211 +#include "exec.h"
210 212  
211 213 typedef struct CCTable {
212 214 int (*compute_all)(void); /* return all the flags */
... ...
... ... @@ -27,6 +27,7 @@
27 27 #include <sys/mman.h>
28 28  
29 29 #include "cpu-i386.h"
  30 +#include "exec.h"
30 31  
31 32 //#define DEBUG_TB_INVALIDATE
32 33 #define DEBUG_FLUSH
... ... @@ -212,6 +213,7 @@ static void page_flush_tb(void)
212 213 }
213 214  
214 215 /* flush all the translation blocks */
  216 +/* XXX: tb_flush is currently not thread safe */
215 217 void tb_flush(void)
216 218 {
217 219 int i;
... ... @@ -226,7 +228,8 @@ void tb_flush(void)
226 228 tb_hash[i] = NULL;
227 229 page_flush_tb();
228 230 code_gen_ptr = code_gen_buffer;
229   - /* XXX: flush processor icache at this point */
  231 + /* XXX: flush processor icache at this point if cache flush is
  232 + expensive */
230 233 }
231 234  
232 235 #ifdef DEBUG_TB_CHECK
... ... @@ -265,6 +268,26 @@ static void tb_page_check(void)
265 268 }
266 269 }
267 270  
  271 +void tb_jmp_check(TranslationBlock *tb)
  272 +{
  273 + TranslationBlock *tb1;
  274 + unsigned int n1;
  275 +
  276 + /* suppress any remaining jumps to this TB */
  277 + tb1 = tb->jmp_first;
  278 + for(;;) {
  279 + n1 = (long)tb1 & 3;
  280 + tb1 = (TranslationBlock *)((long)tb1 & ~3);
  281 + if (n1 == 2)
  282 + break;
  283 + tb1 = tb1->jmp_next[n1];
  284 + }
  285 + /* check end of list */
  286 + if (tb1 != tb) {
  287 + printf("ERROR: jmp_list from 0x%08lx\n", (long)tb);
  288 + }
  289 +}
  290 +
268 291 #endif
269 292  
270 293 /* invalidate one TB */
... ... @@ -282,12 +305,48 @@ static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
282 305 }
283 306 }
284 307  
  308 +static inline void tb_jmp_remove(TranslationBlock *tb, int n)
  309 +{
  310 + TranslationBlock *tb1, **ptb;
  311 + unsigned int n1;
  312 +
  313 + ptb = &tb->jmp_next[n];
  314 + tb1 = *ptb;
  315 + if (tb1) {
  316 + /* find tb(n) in circular list */
  317 + for(;;) {
  318 + tb1 = *ptb;
  319 + n1 = (long)tb1 & 3;
  320 + tb1 = (TranslationBlock *)((long)tb1 & ~3);
  321 + if (n1 == n && tb1 == tb)
  322 + break;
  323 + if (n1 == 2) {
  324 + ptb = &tb1->jmp_first;
  325 + } else {
  326 + ptb = &tb1->jmp_next[n1];
  327 + }
  328 + }
  329 + /* now we can suppress tb(n) from the list */
  330 + *ptb = tb->jmp_next[n];
  331 +
  332 + tb->jmp_next[n] = NULL;
  333 + }
  334 +}
  335 +
  336 +/* reset the jump entry 'n' of a TB so that it is not chained to
  337 + another TB */
  338 +static inline void tb_reset_jump(TranslationBlock *tb, int n)
  339 +{
  340 + tb_set_jmp_target(tb, n, (unsigned long)(tb->tc_ptr + tb->tb_next_offset[n]));
  341 +}
  342 +
285 343 static inline void tb_invalidate(TranslationBlock *tb, int parity)
286 344 {
287 345 PageDesc *p;
288 346 unsigned int page_index1, page_index2;
289   - unsigned int h;
290   -
  347 + unsigned int h, n1;
  348 + TranslationBlock *tb1, *tb2;
  349 +
291 350 /* remove the TB from the hash list */
292 351 h = tb_hash_func(tb->pc);
293 352 tb_remove(&tb_hash[h], tb,
... ... @@ -305,6 +364,24 @@ static inline void tb_invalidate(TranslationBlock *tb, int parity)
305 364 tb_remove(&p->first_tb, tb,
306 365 offsetof(TranslationBlock, page_next[page_index2 & 1]));
307 366 }
  367 +
  368 + /* suppress this TB from the two jump lists */
  369 + tb_jmp_remove(tb, 0);
  370 + tb_jmp_remove(tb, 1);
  371 +
  372 + /* suppress any remaining jumps to this TB */
  373 + tb1 = tb->jmp_first;
  374 + for(;;) {
  375 + n1 = (long)tb1 & 3;
  376 + if (n1 == 2)
  377 + break;
  378 + tb1 = (TranslationBlock *)((long)tb1 & ~3);
  379 + tb2 = tb1->jmp_next[n1];
  380 + tb_reset_jump(tb1, n1);
  381 + tb1->jmp_next[n1] = NULL;
  382 + tb1 = tb2;
  383 + }
  384 + tb->jmp_first = (TranslationBlock *)((long)tb | 2); /* fail safe */
308 385 }
309 386  
310 387 /* invalidate all TBs which intersect with the target page starting at addr */
... ... @@ -367,27 +444,39 @@ static inline void tb_alloc_page(TranslationBlock *tb, unsigned int page_index)
367 444  
368 445 /* Allocate a new translation block. Flush the translation buffer if
369 446 too many translation blocks or too much generated code. */
370   -TranslationBlock *tb_alloc(unsigned long pc,
371   - unsigned long size)
  447 +TranslationBlock *tb_alloc(unsigned long pc)
372 448 {
373 449 TranslationBlock *tb;
374   - unsigned int page_index1, page_index2;
375 450  
376 451 if (nb_tbs >= CODE_GEN_MAX_BLOCKS ||
377 452 (code_gen_ptr - code_gen_buffer) >= CODE_GEN_BUFFER_MAX_SIZE)
378   - tb_flush();
  453 + return NULL;
379 454 tb = &tbs[nb_tbs++];
380 455 tb->pc = pc;
381   - tb->size = size;
  456 + return tb;
  457 +}
  458 +
  459 +/* link the tb with the other TBs */
  460 +void tb_link(TranslationBlock *tb)
  461 +{
  462 + unsigned int page_index1, page_index2;
382 463  
383 464 /* add in the page list */
384   - page_index1 = pc >> TARGET_PAGE_BITS;
  465 + page_index1 = tb->pc >> TARGET_PAGE_BITS;
385 466 tb_alloc_page(tb, page_index1);
386   - page_index2 = (pc + size - 1) >> TARGET_PAGE_BITS;
  467 + page_index2 = (tb->pc + tb->size - 1) >> TARGET_PAGE_BITS;
387 468 if (page_index2 != page_index1) {
388 469 tb_alloc_page(tb, page_index2);
389 470 }
390   - return tb;
  471 + tb->jmp_first = (TranslationBlock *)((long)tb | 2);
  472 + tb->jmp_next[0] = NULL;
  473 + tb->jmp_next[1] = NULL;
  474 +
  475 + /* init original jump addresses */
  476 + if (tb->tb_next_offset[0] != 0xffff)
  477 + tb_reset_jump(tb, 0);
  478 + if (tb->tb_next_offset[1] != 0xffff)
  479 + tb_reset_jump(tb, 1);
391 480 }
392 481  
393 482 /* called from signal handler: invalidate the code and unprotect the
... ...
exec.h 0 โ†’ 100644
  1 +/*
  2 + * internal execution defines for qemu
  3 + *
  4 + * Copyright (c) 2003 Fabrice Bellard
  5 + *
  6 + * This library is free software; you can redistribute it and/or
  7 + * modify it under the terms of the GNU Lesser General Public
  8 + * License as published by the Free Software Foundation; either
  9 + * version 2 of the License, or (at your option) any later version.
  10 + *
  11 + * This library is distributed in the hope that it will be useful,
  12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14 + * Lesser General Public License for more details.
  15 + *
  16 + * You should have received a copy of the GNU Lesser General Public
  17 + * License along with this library; if not, write to the Free Software
  18 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  19 + */
  20 +
  21 +#define GEN_FLAG_CODE32_SHIFT 0
  22 +#define GEN_FLAG_ADDSEG_SHIFT 1
  23 +#define GEN_FLAG_SS32_SHIFT 2
  24 +#define GEN_FLAG_VM_SHIFT 3
  25 +#define GEN_FLAG_ST_SHIFT 4
  26 +#define GEN_FLAG_CPL_SHIFT 7
  27 +#define GEN_FLAG_IOPL_SHIFT 9
  28 +#define GEN_FLAG_TF_SHIFT 11
  29 +
  30 +struct TranslationBlock;
  31 +int cpu_x86_gen_code(uint8_t *gen_code_buf, int max_code_size,
  32 + int *gen_code_size_ptr,
  33 + uint8_t *pc_start, uint8_t *cs_base, int flags,
  34 + int *code_size_ptr, struct TranslationBlock *tb);
  35 +void cpu_x86_tblocks_init(void);
  36 +void page_init(void);
  37 +int page_unprotect(unsigned long address);
  38 +
  39 +#define CODE_GEN_MAX_SIZE 65536
  40 +#define CODE_GEN_ALIGN 16 /* must be >= of the size of a icache line */
  41 +
  42 +#define CODE_GEN_HASH_BITS 15
  43 +#define CODE_GEN_HASH_SIZE (1 << CODE_GEN_HASH_BITS)
  44 +
  45 +/* maximum total translate dcode allocated */
  46 +#define CODE_GEN_BUFFER_SIZE (2048 * 1024)
  47 +//#define CODE_GEN_BUFFER_SIZE (128 * 1024)
  48 +
  49 +#if defined(__powerpc__)
  50 +#define USE_DIRECT_JUMP
  51 +#endif
  52 +
  53 +typedef struct TranslationBlock {
  54 + unsigned long pc; /* simulated PC corresponding to this block (EIP + CS base) */
  55 + unsigned long cs_base; /* CS base for this block */
  56 + unsigned int flags; /* flags defining in which context the code was generated */
  57 + uint16_t size; /* size of target code for this block (1 <=
  58 + size <= TARGET_PAGE_SIZE) */
  59 + uint8_t *tc_ptr; /* pointer to the translated code */
  60 + struct TranslationBlock *hash_next; /* next matching block */
  61 + struct TranslationBlock *page_next[2]; /* next blocks in even/odd page */
  62 + /* the following data are used to directly call another TB from
  63 + the code of this one. */
  64 + uint16_t tb_next_offset[2]; /* offset of original jump target */
  65 +#ifdef USE_DIRECT_JUMP
  66 + uint16_t tb_jmp_offset[2]; /* offset of jump instruction */
  67 +#else
  68 + uint8_t *tb_next[2]; /* address of jump generated code */
  69 +#endif
  70 + /* list of TBs jumping to this one. This is a circular list using
  71 + the two least significant bits of the pointers to tell what is
  72 + the next pointer: 0 = jmp_next[0], 1 = jmp_next[1], 2 =
  73 + jmp_first */
  74 + struct TranslationBlock *jmp_next[2];
  75 + struct TranslationBlock *jmp_first;
  76 +} TranslationBlock;
  77 +
  78 +static inline unsigned int tb_hash_func(unsigned long pc)
  79 +{
  80 + return pc & (CODE_GEN_HASH_SIZE - 1);
  81 +}
  82 +
  83 +TranslationBlock *tb_alloc(unsigned long pc);
  84 +void tb_flush(void);
  85 +void tb_link(TranslationBlock *tb);
  86 +
  87 +extern TranslationBlock *tb_hash[CODE_GEN_HASH_SIZE];
  88 +
  89 +extern uint8_t code_gen_buffer[CODE_GEN_BUFFER_SIZE];
  90 +extern uint8_t *code_gen_ptr;
  91 +
  92 +/* find a translation block in the translation cache. If not found,
  93 + return NULL and the pointer to the last element of the list in pptb */
  94 +static inline TranslationBlock *tb_find(TranslationBlock ***pptb,
  95 + unsigned long pc,
  96 + unsigned long cs_base,
  97 + unsigned int flags)
  98 +{
  99 + TranslationBlock **ptb, *tb;
  100 + unsigned int h;
  101 +
  102 + h = tb_hash_func(pc);
  103 + ptb = &tb_hash[h];
  104 + for(;;) {
  105 + tb = *ptb;
  106 + if (!tb)
  107 + break;
  108 + if (tb->pc == pc && tb->cs_base == cs_base && tb->flags == flags)
  109 + return tb;
  110 + ptb = &tb->hash_next;
  111 + }
  112 + *pptb = ptb;
  113 + return NULL;
  114 +}
  115 +
  116 +#if defined(__powerpc__)
  117 +
  118 +static inline void tb_set_jmp_target(TranslationBlock *tb,
  119 + int n, unsigned long addr)
  120 +{
  121 + uint32_t val, *ptr;
  122 + unsigned long offset;
  123 +
  124 + offset = (unsigned long)(tb->tc_ptr + tb->tb_jmp_offset[n]);
  125 +
  126 + /* patch the branch destination */
  127 + ptr = (uint32_t *)offset;
  128 + val = *ptr;
  129 + val = (val & ~0x03fffffc) | ((addr - offset) & 0x03fffffc);
  130 + *ptr = val;
  131 + /* flush icache */
  132 + asm volatile ("dcbst 0,%0" : : "r"(ptr) : "memory");
  133 + asm volatile ("sync" : : : "memory");
  134 + asm volatile ("icbi 0,%0" : : "r"(ptr) : "memory");
  135 + asm volatile ("sync" : : : "memory");
  136 + asm volatile ("isync" : : : "memory");
  137 +}
  138 +
  139 +#else
  140 +
  141 +/* set the jump target */
  142 +static inline void tb_set_jmp_target(TranslationBlock *tb,
  143 + int n, unsigned long addr)
  144 +{
  145 + tb->tb_next[n] = (void *)addr;
  146 +}
  147 +
  148 +#endif
  149 +
  150 +static inline void tb_add_jump(TranslationBlock *tb, int n,
  151 + TranslationBlock *tb_next)
  152 +{
  153 + /* patch the native jump address */
  154 + tb_set_jmp_target(tb, n, (unsigned long)tb_next->tc_ptr);
  155 +
  156 + /* add in TB jmp circular list */
  157 + tb->jmp_next[n] = tb_next->jmp_first;
  158 + tb_next->jmp_first = (TranslationBlock *)((long)(tb) | (n));
  159 +}
  160 +
  161 +#ifndef offsetof
  162 +#define offsetof(type, field) ((size_t) &((type *)0)->field)
  163 +#endif
  164 +
  165 +#ifdef __powerpc__
  166 +static inline int testandset (int *p)
  167 +{
  168 + int ret;
  169 + __asm__ __volatile__ (
  170 + "0: lwarx %0,0,%1 ;"
  171 + " xor. %0,%3,%0;"
  172 + " bne 1f;"
  173 + " stwcx. %2,0,%1;"
  174 + " bne- 0b;"
  175 + "1: "
  176 + : "=&r" (ret)
  177 + : "r" (p), "r" (1), "r" (0)
  178 + : "cr0", "memory");
  179 + return ret;
  180 +}
  181 +#endif
  182 +
  183 +#ifdef __i386__
  184 +static inline int testandset (int *p)
  185 +{
  186 + char ret;
  187 + long int readval;
  188 +
  189 + __asm__ __volatile__ ("lock; cmpxchgl %3, %1; sete %0"
  190 + : "=q" (ret), "=m" (*p), "=a" (readval)
  191 + : "r" (1), "m" (*p), "a" (0)
  192 + : "memory");
  193 + return ret;
  194 +}
  195 +#endif
  196 +
  197 +#ifdef __s390__
  198 +static inline int testandset (int *p)
  199 +{
  200 + int ret;
  201 +
  202 + __asm__ __volatile__ ("0: cs %0,%1,0(%2)\n"
  203 + " jl 0b"
  204 + : "=&d" (ret)
  205 + : "r" (1), "a" (p), "0" (*p)
  206 + : "cc", "memory" );
  207 + return ret;
  208 +}
  209 +#endif
  210 +
  211 +#ifdef __alpha__
  212 +int testandset (int *p)
  213 +{
  214 + int ret;
  215 + unsigned long one;
  216 +
  217 + __asm__ __volatile__ ("0: mov 1,%2\n"
  218 + " ldl_l %0,%1\n"
  219 + " stl_c %2,%1\n"
  220 + " beq %2,1f\n"
  221 + ".subsection 2\n"
  222 + "1: br 0b\n"
  223 + ".previous"
  224 + : "=r" (ret), "=m" (*p), "=r" (one)
  225 + : "m" (*p));
  226 + return ret;
  227 +}
  228 +#endif
  229 +
  230 +#ifdef __sparc__
  231 +static inline int testandset (int *p)
  232 +{
  233 + int ret;
  234 +
  235 + __asm__ __volatile__("ldstub [%1], %0"
  236 + : "=r" (ret)
  237 + : "r" (p)
  238 + : "memory");
  239 +
  240 + return (ret ? 1 : 0);
  241 +}
  242 +#endif
  243 +
  244 +typedef int spinlock_t;
  245 +
  246 +#define SPIN_LOCK_UNLOCKED 0
  247 +
  248 +static inline void spin_lock(spinlock_t *lock)
  249 +{
  250 + while (testandset(lock));
  251 +}
  252 +
  253 +static inline void spin_unlock(spinlock_t *lock)
  254 +{
  255 + *lock = 0;
  256 +}
  257 +
  258 +static inline int spin_trylock(spinlock_t *lock)
  259 +{
  260 + return !testandset(lock);
  261 +}
  262 +
  263 +extern spinlock_t tb_lock;
  264 +
... ...
op-i386.c
... ... @@ -709,7 +709,44 @@ void OPPROTO op_cmpxchg8b(void)
709 709 FORCE_RET();
710 710 }
711 711  
712   -/* string ops */
  712 +#if defined(__powerpc__)
  713 +
  714 +/* on PowerPC we patch the jump instruction directly */
  715 +#define JUMP_TB(tbparam, n, eip)\
  716 +do {\
  717 + static void __attribute__((unused)) *__op_label ## n = &&label ## n;\
  718 + asm volatile ("b %0" : : "i" (&__op_jmp ## n));\
  719 +label ## n:\
  720 + T0 = (long)(tbparam) + (n);\
  721 + EIP = eip;\
  722 +} while (0)
  723 +
  724 +#else
  725 +
  726 +/* jump to next block operations (more portable code, does not need
  727 + cache flushing, but slower because of indirect jump) */
  728 +#define JUMP_TB(tbparam, n, eip)\
  729 +do {\
  730 + static void __attribute__((unused)) *__op_label ## n = &&label ## n;\
  731 + goto *((TranslationBlock *)tbparam)->tb_next[n];\
  732 +label ## n:\
  733 + T0 = (long)(tbparam) + (n);\
  734 + EIP = eip;\
  735 +} while (0)
  736 +
  737 +#endif
  738 +
  739 +void OPPROTO op_jmp_tb_next(void)
  740 +{
  741 + JUMP_TB(PARAM1, 0, PARAM2);
  742 +}
  743 +
  744 +void OPPROTO op_movl_T0_0(void)
  745 +{
  746 + T0 = 0;
  747 +}
  748 +
  749 +/* multiple size ops */
713 750  
714 751 #define ldul ldl
715 752  
... ... @@ -1199,90 +1236,15 @@ void OPPROTO op_lar(void)
1199 1236  
1200 1237 /* flags handling */
1201 1238  
1202   -/* slow jumps cases (compute x86 flags) */
1203   -void OPPROTO op_jo_cc(void)
1204   -{
1205   - int eflags;
1206   - eflags = cc_table[CC_OP].compute_all();
1207   - if (eflags & CC_O)
1208   - EIP = PARAM1;
1209   - else
1210   - EIP = PARAM2;
1211   - FORCE_RET();
1212   -}
1213   -
1214   -void OPPROTO op_jb_cc(void)
1215   -{
1216   - if (cc_table[CC_OP].compute_c())
1217   - EIP = PARAM1;
1218   - else
1219   - EIP = PARAM2;
1220   - FORCE_RET();
1221   -}
1222   -
1223   -void OPPROTO op_jz_cc(void)
1224   -{
1225   - int eflags;
1226   - eflags = cc_table[CC_OP].compute_all();
1227   - if (eflags & CC_Z)
1228   - EIP = PARAM1;
1229   - else
1230   - EIP = PARAM2;
1231   - FORCE_RET();
1232   -}
1233   -
1234   -void OPPROTO op_jbe_cc(void)
  1239 +/* slow jumps cases : in order to avoid calling a function with a
  1240 + pointer (which can generate a stack frame on PowerPC), we use
  1241 + op_setcc to set T0 and then call op_jcc. */
  1242 +void OPPROTO op_jcc(void)
1235 1243 {
1236   - int eflags;
1237   - eflags = cc_table[CC_OP].compute_all();
1238   - if (eflags & (CC_Z | CC_C))
1239   - EIP = PARAM1;
1240   - else
1241   - EIP = PARAM2;
1242   - FORCE_RET();
1243   -}
1244   -
1245   -void OPPROTO op_js_cc(void)
1246   -{
1247   - int eflags;
1248   - eflags = cc_table[CC_OP].compute_all();
1249   - if (eflags & CC_S)
1250   - EIP = PARAM1;
1251   - else
1252   - EIP = PARAM2;
1253   - FORCE_RET();
1254   -}
1255   -
1256   -void OPPROTO op_jp_cc(void)
1257   -{
1258   - int eflags;
1259   - eflags = cc_table[CC_OP].compute_all();
1260   - if (eflags & CC_P)
1261   - EIP = PARAM1;
1262   - else
1263   - EIP = PARAM2;
1264   - FORCE_RET();
1265   -}
1266   -
1267   -void OPPROTO op_jl_cc(void)
1268   -{
1269   - int eflags;
1270   - eflags = cc_table[CC_OP].compute_all();
1271   - if ((eflags ^ (eflags >> 4)) & 0x80)
1272   - EIP = PARAM1;
1273   - else
1274   - EIP = PARAM2;
1275   - FORCE_RET();
1276   -}
1277   -
1278   -void OPPROTO op_jle_cc(void)
1279   -{
1280   - int eflags;
1281   - eflags = cc_table[CC_OP].compute_all();
1282   - if (((eflags ^ (eflags >> 4)) & 0x80) || (eflags & CC_Z))
1283   - EIP = PARAM1;
  1244 + if (T0)
  1245 + JUMP_TB(PARAM1, 0, PARAM2);
1284 1246 else
1285   - EIP = PARAM2;
  1247 + JUMP_TB(PARAM1, 1, PARAM3);
1286 1248 FORCE_RET();
1287 1249 }
1288 1250  
... ...
opc-i386.h
... ... @@ -231,18 +231,20 @@ DEF(jmp_T0, 0)
231 231 DEF(jmp_im, 1)
232 232 DEF(int_im, 2)
233 233 DEF(raise_exception, 1)
234   -DEF(into, 0)
  234 +DEF(into, 1)
235 235 DEF(cli, 0)
236 236 DEF(sti, 0)
237 237 DEF(boundw, 0)
238 238 DEF(boundl, 0)
239 239 DEF(cmpxchg8b, 0)
240   -DEF(jb_subb, 2)
241   -DEF(jz_subb, 2)
242   -DEF(jbe_subb, 2)
243   -DEF(js_subb, 2)
244   -DEF(jl_subb, 2)
245   -DEF(jle_subb, 2)
  240 +DEF(jmp_tb_next, 2)
  241 +DEF(movl_T0_0, 0)
  242 +DEF(jb_subb, 3)
  243 +DEF(jz_subb, 3)
  244 +DEF(jbe_subb, 3)
  245 +DEF(js_subb, 3)
  246 +DEF(jl_subb, 3)
  247 +DEF(jle_subb, 3)
246 248 DEF(setb_T0_subb, 0)
247 249 DEF(setz_T0_subb, 0)
248 250 DEF(setbe_T0_subb, 0)
... ... @@ -314,12 +316,12 @@ DEF(insb_a16, 0)
314 316 DEF(rep_insb_a16, 0)
315 317 DEF(outb_T0_T1, 0)
316 318 DEF(inb_T0_T1, 0)
317   -DEF(jb_subw, 2)
318   -DEF(jz_subw, 2)
319   -DEF(jbe_subw, 2)
320   -DEF(js_subw, 2)
321   -DEF(jl_subw, 2)
322   -DEF(jle_subw, 2)
  319 +DEF(jb_subw, 3)
  320 +DEF(jz_subw, 3)
  321 +DEF(jbe_subw, 3)
  322 +DEF(js_subw, 3)
  323 +DEF(jl_subw, 3)
  324 +DEF(jle_subw, 3)
323 325 DEF(loopnzw, 2)
324 326 DEF(loopzw, 2)
325 327 DEF(loopw, 2)
... ... @@ -405,12 +407,12 @@ DEF(insw_a16, 0)
405 407 DEF(rep_insw_a16, 0)
406 408 DEF(outw_T0_T1, 0)
407 409 DEF(inw_T0_T1, 0)
408   -DEF(jb_subl, 2)
409   -DEF(jz_subl, 2)
410   -DEF(jbe_subl, 2)
411   -DEF(js_subl, 2)
412   -DEF(jl_subl, 2)
413   -DEF(jle_subl, 2)
  410 +DEF(jb_subl, 3)
  411 +DEF(jz_subl, 3)
  412 +DEF(jbe_subl, 3)
  413 +DEF(js_subl, 3)
  414 +DEF(jl_subl, 3)
  415 +DEF(jle_subl, 3)
414 416 DEF(loopnzl, 2)
415 417 DEF(loopzl, 2)
416 418 DEF(loopl, 2)
... ... @@ -536,14 +538,7 @@ DEF(movl_A0_seg, 1)
536 538 DEF(addl_A0_seg, 1)
537 539 DEF(lsl, 0)
538 540 DEF(lar, 0)
539   -DEF(jo_cc, 2)
540   -DEF(jb_cc, 2)
541   -DEF(jz_cc, 2)
542   -DEF(jbe_cc, 2)
543   -DEF(js_cc, 2)
544   -DEF(jp_cc, 2)
545   -DEF(jl_cc, 2)
546   -DEF(jle_cc, 2)
  541 +DEF(jcc, 3)
547 542 DEF(seto_T0_cc, 0)
548 543 DEF(setb_T0_cc, 0)
549 544 DEF(setz_T0_cc, 0)
... ...
ops_template.h
... ... @@ -238,18 +238,18 @@ void OPPROTO glue(op_jb_sub, SUFFIX)(void)
238 238 src2 = CC_SRC - CC_DST;
239 239  
240 240 if ((DATA_TYPE)src1 < (DATA_TYPE)src2)
241   - EIP = PARAM1;
  241 + JUMP_TB(PARAM1, 0, PARAM2);
242 242 else
243   - EIP = PARAM2;
  243 + JUMP_TB(PARAM1, 1, PARAM3);
244 244 FORCE_RET();
245 245 }
246 246  
247 247 void OPPROTO glue(op_jz_sub, SUFFIX)(void)
248 248 {
249 249 if ((DATA_TYPE)CC_DST == 0)
250   - EIP = PARAM1;
  250 + JUMP_TB(PARAM1, 0, PARAM2);
251 251 else
252   - EIP = PARAM2;
  252 + JUMP_TB(PARAM1, 1, PARAM3);
253 253 FORCE_RET();
254 254 }
255 255  
... ... @@ -260,18 +260,18 @@ void OPPROTO glue(op_jbe_sub, SUFFIX)(void)
260 260 src2 = CC_SRC - CC_DST;
261 261  
262 262 if ((DATA_TYPE)src1 <= (DATA_TYPE)src2)
263   - EIP = PARAM1;
  263 + JUMP_TB(PARAM1, 0, PARAM2);
264 264 else
265   - EIP = PARAM2;
  265 + JUMP_TB(PARAM1, 1, PARAM3);
266 266 FORCE_RET();
267 267 }
268 268  
269 269 void OPPROTO glue(op_js_sub, SUFFIX)(void)
270 270 {
271 271 if (CC_DST & SIGN_MASK)
272   - EIP = PARAM1;
  272 + JUMP_TB(PARAM1, 0, PARAM2);
273 273 else
274   - EIP = PARAM2;
  274 + JUMP_TB(PARAM1, 1, PARAM3);
275 275 FORCE_RET();
276 276 }
277 277  
... ... @@ -282,9 +282,9 @@ void OPPROTO glue(op_jl_sub, SUFFIX)(void)
282 282 src2 = CC_SRC - CC_DST;
283 283  
284 284 if ((DATA_STYPE)src1 < (DATA_STYPE)src2)
285   - EIP = PARAM1;
  285 + JUMP_TB(PARAM1, 0, PARAM2);
286 286 else
287   - EIP = PARAM2;
  287 + JUMP_TB(PARAM1, 1, PARAM3);
288 288 FORCE_RET();
289 289 }
290 290  
... ... @@ -295,9 +295,9 @@ void OPPROTO glue(op_jle_sub, SUFFIX)(void)
295 295 src2 = CC_SRC - CC_DST;
296 296  
297 297 if ((DATA_STYPE)src1 <= (DATA_STYPE)src2)
298   - EIP = PARAM1;
  298 + JUMP_TB(PARAM1, 0, PARAM2);
299 299 else
300   - EIP = PARAM2;
  300 + JUMP_TB(PARAM1, 1, PARAM3);
301 301 FORCE_RET();
302 302 }
303 303  
... ...
translate-i386.c
... ... @@ -31,11 +31,15 @@
31 31  
32 32 #define IN_OP_I386
33 33 #include "cpu-i386.h"
  34 +#include "exec.h"
34 35  
35 36 /* XXX: move that elsewhere */
36 37 static uint16_t *gen_opc_ptr;
37 38 static uint32_t *gen_opparam_ptr;
38 39 int __op_param1, __op_param2, __op_param3;
  40 +#ifdef USE_DIRECT_JUMP
  41 +int __op_jmp0, __op_jmp1;
  42 +#endif
39 43  
40 44 #ifdef __i386__
41 45 static inline void flush_icache_range(unsigned long start, unsigned long stop)
... ... @@ -67,14 +71,14 @@ static void inline flush_icache_range(unsigned long start, unsigned long stop)
67 71 stop = (stop + MIN_CACHE_LINE_SIZE - 1) & ~(MIN_CACHE_LINE_SIZE - 1);
68 72  
69 73 for (p = start; p < stop; p += MIN_CACHE_LINE_SIZE) {
70   - asm ("dcbst 0,%0;" : : "r"(p) : "memory");
  74 + asm volatile ("dcbst 0,%0" : : "r"(p) : "memory");
71 75 }
72   - asm ("sync");
  76 + asm volatile ("sync" : : : "memory");
73 77 for (p = start; p < stop; p += MIN_CACHE_LINE_SIZE) {
74   - asm ("icbi 0,%0; sync;" : : "r"(p) : "memory");
  78 + asm volatile ("icbi 0,%0" : : "r"(p) : "memory");
75 79 }
76   - asm ("sync");
77   - asm ("isync");
  80 + asm volatile ("sync" : : : "memory");
  81 + asm volatile ("isync" : : : "memory");
78 82 }
79 83 #endif
80 84  
... ... @@ -129,6 +133,7 @@ typedef struct DisasContext {
129 133 int cpl;
130 134 int iopl;
131 135 int tf; /* TF cpu flag */
  136 + TranslationBlock *tb;
132 137 } DisasContext;
133 138  
134 139 /* i386 arith/logic operations */
... ... @@ -192,6 +197,7 @@ enum {
192 197 typedef void (GenOpFunc)(void);
193 198 typedef void (GenOpFunc1)(long);
194 199 typedef void (GenOpFunc2)(long, long);
  200 +typedef void (GenOpFunc3)(long, long, long);
195 201  
196 202 static GenOpFunc *gen_op_mov_reg_T0[3][8] = {
197 203 [OT_BYTE] = {
... ... @@ -699,18 +705,7 @@ enum {
699 705 JCC_LE,
700 706 };
701 707  
702   -static GenOpFunc2 *gen_jcc_slow[8] = {
703   - gen_op_jo_cc,
704   - gen_op_jb_cc,
705   - gen_op_jz_cc,
706   - gen_op_jbe_cc,
707   - gen_op_js_cc,
708   - gen_op_jp_cc,
709   - gen_op_jl_cc,
710   - gen_op_jle_cc,
711   -};
712   -
713   -static GenOpFunc2 *gen_jcc_sub[3][8] = {
  708 +static GenOpFunc3 *gen_jcc_sub[3][8] = {
714 709 [OT_BYTE] = {
715 710 NULL,
716 711 gen_op_jb_subb,
... ... @@ -1090,8 +1085,9 @@ static inline uint32_t insn_get(DisasContext *s, int ot)
1090 1085  
1091 1086 static inline void gen_jcc(DisasContext *s, int b, int val, int next_eip)
1092 1087 {
  1088 + TranslationBlock *tb;
1093 1089 int inv, jcc_op;
1094   - GenOpFunc2 *func;
  1090 + GenOpFunc3 *func;
1095 1091  
1096 1092 inv = b & 1;
1097 1093 jcc_op = (b >> 1) & 7;
... ... @@ -1101,8 +1097,6 @@ static inline void gen_jcc(DisasContext *s, int b, int val, int next_eip)
1101 1097 case CC_OP_SUBW:
1102 1098 case CC_OP_SUBL:
1103 1099 func = gen_jcc_sub[s->cc_op - CC_OP_SUBB][jcc_op];
1104   - if (!func)
1105   - goto slow_jcc;
1106 1100 break;
1107 1101  
1108 1102 /* some jumps are easy to compute */
... ... @@ -1138,21 +1132,30 @@ static inline void gen_jcc(DisasContext *s, int b, int val, int next_eip)
1138 1132 func = gen_jcc_sub[(s->cc_op - CC_OP_ADDB) % 3][jcc_op];
1139 1133 break;
1140 1134 default:
1141   - goto slow_jcc;
  1135 + func = NULL;
  1136 + break;
1142 1137 }
1143 1138 break;
1144 1139 default:
1145   - slow_jcc:
1146   - if (s->cc_op != CC_OP_DYNAMIC)
1147   - gen_op_set_cc_op(s->cc_op);
1148   - func = gen_jcc_slow[jcc_op];
  1140 + func = NULL;
1149 1141 break;
1150 1142 }
  1143 +
  1144 + if (s->cc_op != CC_OP_DYNAMIC)
  1145 + gen_op_set_cc_op(s->cc_op);
  1146 +
  1147 + if (!func) {
  1148 + gen_setcc_slow[jcc_op]();
  1149 + func = gen_op_jcc;
  1150 + }
  1151 +
  1152 + tb = s->tb;
1151 1153 if (!inv) {
1152   - func(val, next_eip);
  1154 + func((long)tb, val, next_eip);
1153 1155 } else {
1154   - func(next_eip, val);
  1156 + func((long)tb, next_eip, val);
1155 1157 }
  1158 + s->is_jmp = 3;
1156 1159 }
1157 1160  
1158 1161 static void gen_setcc(DisasContext *s, int b)
... ... @@ -1372,6 +1375,18 @@ static void gen_exception(DisasContext *s, int trapno, unsigned int cur_eip)
1372 1375 s->is_jmp = 1;
1373 1376 }
1374 1377  
  1378 +/* generate a jump to eip. No segment change must happen before as a
  1379 + direct call to the next block may occur */
  1380 +static void gen_jmp(DisasContext *s, unsigned int eip)
  1381 +{
  1382 + TranslationBlock *tb = s->tb;
  1383 +
  1384 + if (s->cc_op != CC_OP_DYNAMIC)
  1385 + gen_op_set_cc_op(s->cc_op);
  1386 + gen_op_jmp_tb_next((long)tb, eip);
  1387 + s->is_jmp = 3;
  1388 +}
  1389 +
1375 1390 /* return the next pc address. Return -1 if no insn found. *is_jmp_ptr
1376 1391 is set to true if the instruction sets the PC (last instruction of
1377 1392 a basic block) */
... ... @@ -2964,8 +2979,7 @@ long disas_insn(DisasContext *s, uint8_t *pc_start)
2964 2979 val &= 0xffff;
2965 2980 gen_op_movl_T0_im(next_eip);
2966 2981 gen_push_T0(s);
2967   - gen_op_jmp_im(val);
2968   - s->is_jmp = 1;
  2982 + gen_jmp(s, val);
2969 2983 }
2970 2984 break;
2971 2985 case 0x9a: /* lcall im */
... ... @@ -2996,8 +3010,7 @@ long disas_insn(DisasContext *s, uint8_t *pc_start)
2996 3010 val += s->pc - s->cs_base;
2997 3011 if (s->dflag == 0)
2998 3012 val = val & 0xffff;
2999   - gen_op_jmp_im(val);
3000   - s->is_jmp = 1;
  3013 + gen_jmp(s, val);
3001 3014 break;
3002 3015 case 0xea: /* ljmp im */
3003 3016 {
... ... @@ -3019,8 +3032,7 @@ long disas_insn(DisasContext *s, uint8_t *pc_start)
3019 3032 val += s->pc - s->cs_base;
3020 3033 if (s->dflag == 0)
3021 3034 val = val & 0xffff;
3022   - gen_op_jmp_im(val);
3023   - s->is_jmp = 1;
  3035 + gen_jmp(s, val);
3024 3036 break;
3025 3037 case 0x70 ... 0x7f: /* jcc Jb */
3026 3038 val = (int8_t)insn_get(s, OT_BYTE);
... ... @@ -3037,7 +3049,6 @@ long disas_insn(DisasContext *s, uint8_t *pc_start)
3037 3049 if (s->dflag == 0)
3038 3050 val &= 0xffff;
3039 3051 gen_jcc(s, b, val, next_eip);
3040   - s->is_jmp = 1;
3041 3052 break;
3042 3053  
3043 3054 case 0x190 ... 0x19f: /* setcc Gv */
... ... @@ -3393,15 +3404,6 @@ static uint16_t opc_read_flags[NB_OPS] = {
3393 3404  
3394 3405 [INDEX_op_into] = CC_O,
3395 3406  
3396   - [INDEX_op_jo_cc] = CC_O,
3397   - [INDEX_op_jb_cc] = CC_C,
3398   - [INDEX_op_jz_cc] = CC_Z,
3399   - [INDEX_op_jbe_cc] = CC_Z | CC_C,
3400   - [INDEX_op_js_cc] = CC_S,
3401   - [INDEX_op_jp_cc] = CC_P,
3402   - [INDEX_op_jl_cc] = CC_O | CC_S,
3403   - [INDEX_op_jle_cc] = CC_O | CC_S | CC_Z,
3404   -
3405 3407 [INDEX_op_jb_subb] = CC_C,
3406 3408 [INDEX_op_jb_subw] = CC_C,
3407 3409 [INDEX_op_jb_subl] = CC_C,
... ... @@ -3730,7 +3732,7 @@ static uint32_t gen_opparam_buf[OPPARAM_BUF_SIZE];
3730 3732 int cpu_x86_gen_code(uint8_t *gen_code_buf, int max_code_size,
3731 3733 int *gen_code_size_ptr,
3732 3734 uint8_t *pc_start, uint8_t *cs_base, int flags,
3733   - int *code_size_ptr)
  3735 + int *code_size_ptr, TranslationBlock *tb)
3734 3736 {
3735 3737 DisasContext dc1, *dc = &dc1;
3736 3738 uint8_t *pc_ptr;
... ... @@ -3750,6 +3752,7 @@ int cpu_x86_gen_code(uint8_t *gen_code_buf, int max_code_size,
3750 3752 dc->tf = (flags >> GEN_FLAG_TF_SHIFT) & 1;
3751 3753 dc->cc_op = CC_OP_DYNAMIC;
3752 3754 dc->cs_base = cs_base;
  3755 + dc->tb = tb;
3753 3756  
3754 3757 gen_opc_ptr = gen_opc_buf;
3755 3758 gen_opc_end = gen_opc_buf + OPC_MAX_SIZE;
... ... @@ -3776,15 +3779,21 @@ int cpu_x86_gen_code(uint8_t *gen_code_buf, int max_code_size,
3776 3779 } while (!dc->is_jmp && gen_opc_ptr < gen_opc_end &&
3777 3780 (pc_ptr - pc_start) < (TARGET_PAGE_SIZE - 32));
3778 3781 /* we must store the eflags state if it is not already done */
3779   - if (dc->cc_op != CC_OP_DYNAMIC)
3780   - gen_op_set_cc_op(dc->cc_op);
3781   - if (dc->is_jmp != 1) {
3782   - /* we add an additionnal jmp to update the simulated PC */
3783   - gen_op_jmp_im(ret - (unsigned long)dc->cs_base);
  3782 + if (dc->is_jmp != 3) {
  3783 + if (dc->cc_op != CC_OP_DYNAMIC)
  3784 + gen_op_set_cc_op(dc->cc_op);
  3785 + if (dc->is_jmp != 1) {
  3786 + /* we add an additionnal jmp to update the simulated PC */
  3787 + gen_op_jmp_im(ret - (unsigned long)dc->cs_base);
  3788 + }
3784 3789 }
3785 3790 if (dc->tf) {
3786 3791 gen_op_raise_exception(EXCP01_SSTP);
3787 3792 }
  3793 + if (dc->is_jmp != 3) {
  3794 + /* indicate that the hash table must be used to find the next TB */
  3795 + gen_op_movl_T0_0();
  3796 + }
3788 3797  
3789 3798 *gen_opc_ptr = INDEX_op_end;
3790 3799  
... ... @@ -3814,8 +3823,17 @@ int cpu_x86_gen_code(uint8_t *gen_code_buf, int max_code_size,
3814 3823 #endif
3815 3824  
3816 3825 /* generate machine code */
3817   - gen_code_size = dyngen_code(gen_code_buf, gen_opc_buf, gen_opparam_buf);
  3826 + tb->tb_next_offset[0] = 0xffff;
  3827 + tb->tb_next_offset[1] = 0xffff;
  3828 + gen_code_size = dyngen_code(gen_code_buf, tb->tb_next_offset,
  3829 +#ifdef USE_DIRECT_JUMP
  3830 + tb->tb_jmp_offset,
  3831 +#else
  3832 + NULL,
  3833 +#endif
  3834 + gen_opc_buf, gen_opparam_buf);
3818 3835 flush_icache_range((unsigned long)gen_code_buf, (unsigned long)(gen_code_buf + gen_code_size));
  3836 +
3819 3837 *gen_code_size_ptr = gen_code_size;
3820 3838 *code_size_ptr = pc_ptr - pc_start;
3821 3839 #ifdef DEBUG_DISAS
... ...