Commit 1e4840bf40f1bcb08ed539cb644522707902a421
1 parent
641d5fbe
transformed TN into temporaries - add local temporaries usage when needed - optimized fcmovX
git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@4577 c046a42c-6fe2-441c-8c8c-71466251a162
Showing
4 changed files
with
259 additions
and
266 deletions
target-i386/cpu.h
| ... | ... | @@ -472,12 +472,6 @@ typedef union { |
| 472 | 472 | #define NB_MMU_MODES 2 |
| 473 | 473 | |
| 474 | 474 | typedef struct CPUX86State { |
| 475 | -#if TARGET_LONG_BITS > HOST_LONG_BITS | |
| 476 | - /* temporaries if we cannot store them in host registers */ | |
| 477 | - target_ulong t0, t1, t2; | |
| 478 | -#endif | |
| 479 | - target_ulong t3; | |
| 480 | - | |
| 481 | 475 | /* standard registers */ |
| 482 | 476 | target_ulong regs[CPU_NB_REGS]; |
| 483 | 477 | target_ulong eip; |
| ... | ... | @@ -526,6 +520,7 @@ typedef struct CPUX86State { |
| 526 | 520 | XMMReg xmm_regs[CPU_NB_REGS]; |
| 527 | 521 | XMMReg xmm_t0; |
| 528 | 522 | MMXReg mmx_t0; |
| 523 | + target_ulong cc_tmp; /* temporary for rcr/rcl */ | |
| 529 | 524 | |
| 530 | 525 | /* sysenter registers */ |
| 531 | 526 | uint32_t sysenter_cs; | ... | ... |
target-i386/exec.h
| ... | ... | @@ -29,60 +29,20 @@ |
| 29 | 29 | |
| 30 | 30 | #include "cpu-defs.h" |
| 31 | 31 | |
| 32 | -/* at least 4 register variables are defined */ | |
| 33 | 32 | register struct CPUX86State *env asm(AREG0); |
| 34 | 33 | |
| 35 | -#ifndef CPU_NO_GLOBAL_REGS | |
| 36 | - | |
| 37 | -#if TARGET_LONG_BITS > HOST_LONG_BITS | |
| 38 | - | |
| 39 | -/* no registers can be used */ | |
| 40 | -#define T0 (env->t0) | |
| 41 | -#define T1 (env->t1) | |
| 42 | -#define T2 (env->t2) | |
| 43 | - | |
| 44 | -#else | |
| 45 | - | |
| 46 | -/* XXX: use unsigned long instead of target_ulong - better code will | |
| 47 | - be generated for 64 bit CPUs */ | |
| 48 | -register target_ulong T0 asm(AREG1); | |
| 49 | -register target_ulong T1 asm(AREG2); | |
| 50 | -register target_ulong T2 asm(AREG3); | |
| 51 | - | |
| 52 | -#endif /* ! (TARGET_LONG_BITS > HOST_LONG_BITS) */ | |
| 53 | - | |
| 54 | -#endif /* ! CPU_NO_GLOBAL_REGS */ | |
| 55 | - | |
| 56 | -#define A0 T2 | |
| 57 | - | |
| 58 | 34 | extern FILE *logfile; |
| 59 | 35 | extern int loglevel; |
| 60 | 36 | |
| 61 | -#ifndef reg_EAX | |
| 62 | 37 | #define EAX (env->regs[R_EAX]) |
| 63 | -#endif | |
| 64 | -#ifndef reg_ECX | |
| 65 | 38 | #define ECX (env->regs[R_ECX]) |
| 66 | -#endif | |
| 67 | -#ifndef reg_EDX | |
| 68 | 39 | #define EDX (env->regs[R_EDX]) |
| 69 | -#endif | |
| 70 | -#ifndef reg_EBX | |
| 71 | 40 | #define EBX (env->regs[R_EBX]) |
| 72 | -#endif | |
| 73 | -#ifndef reg_ESP | |
| 74 | 41 | #define ESP (env->regs[R_ESP]) |
| 75 | -#endif | |
| 76 | -#ifndef reg_EBP | |
| 77 | 42 | #define EBP (env->regs[R_EBP]) |
| 78 | -#endif | |
| 79 | -#ifndef reg_ESI | |
| 80 | 43 | #define ESI (env->regs[R_ESI]) |
| 81 | -#endif | |
| 82 | -#ifndef reg_EDI | |
| 83 | 44 | #define EDI (env->regs[R_EDI]) |
| 84 | -#endif | |
| 85 | -#define EIP (env->eip) | |
| 45 | +#define EIP (env->eip) | |
| 86 | 46 | #define DF (env->df) |
| 87 | 47 | |
| 88 | 48 | #define CC_SRC (env->cc_src) | ... | ... |
target-i386/helper_template.h
| ... | ... | @@ -287,11 +287,11 @@ target_ulong glue(helper_rcl, SUFFIX)(target_ulong t0, target_ulong t1) |
| 287 | 287 | if (count > 1) |
| 288 | 288 | res |= t0 >> (DATA_BITS + 1 - count); |
| 289 | 289 | t0 = res; |
| 290 | - env->t3 = (eflags & ~(CC_C | CC_O)) | | |
| 290 | + env->cc_tmp = (eflags & ~(CC_C | CC_O)) | | |
| 291 | 291 | (lshift(src ^ t0, 11 - (DATA_BITS - 1)) & CC_O) | |
| 292 | 292 | ((src >> (DATA_BITS - count)) & CC_C); |
| 293 | 293 | } else { |
| 294 | - env->t3 = -1; | |
| 294 | + env->cc_tmp = -1; | |
| 295 | 295 | } |
| 296 | 296 | return t0; |
| 297 | 297 | } |
| ... | ... | @@ -316,11 +316,11 @@ target_ulong glue(helper_rcr, SUFFIX)(target_ulong t0, target_ulong t1) |
| 316 | 316 | if (count > 1) |
| 317 | 317 | res |= t0 << (DATA_BITS + 1 - count); |
| 318 | 318 | t0 = res; |
| 319 | - env->t3 = (eflags & ~(CC_C | CC_O)) | | |
| 319 | + env->cc_tmp = (eflags & ~(CC_C | CC_O)) | | |
| 320 | 320 | (lshift(src ^ t0, 11 - (DATA_BITS - 1)) & CC_O) | |
| 321 | 321 | ((src >> (count - 1)) & CC_C); |
| 322 | 322 | } else { |
| 323 | - env->t3 = -1; | |
| 323 | + env->cc_tmp = -1; | |
| 324 | 324 | } |
| 325 | 325 | return t0; |
| 326 | 326 | } | ... | ... |
target-i386/translate.c
| ... | ... | @@ -58,8 +58,9 @@ |
| 58 | 58 | //#define MACRO_TEST 1 |
| 59 | 59 | |
| 60 | 60 | /* global register indexes */ |
| 61 | -static TCGv cpu_env, cpu_T[2], cpu_A0, cpu_cc_op, cpu_cc_src, cpu_cc_dst; | |
| 62 | -static TCGv cpu_T3; | |
| 61 | +static TCGv cpu_env, cpu_A0, cpu_cc_op, cpu_cc_src, cpu_cc_dst, cpu_cc_tmp; | |
| 62 | +/* local temps */ | |
| 63 | +static TCGv cpu_T[2], cpu_T3; | |
| 63 | 64 | /* local register indexes (only used inside old micro ops) */ |
| 64 | 65 | static TCGv cpu_tmp0, cpu_tmp1_i64, cpu_tmp2_i32, cpu_tmp3_i32, cpu_tmp4, cpu_ptr0, cpu_ptr1; |
| 65 | 66 | static TCGv cpu_tmp5, cpu_tmp6; |
| ... | ... | @@ -260,34 +261,34 @@ static inline void gen_op_andl_A0_ffff(void) |
| 260 | 261 | #define REG_LH_OFFSET 4 |
| 261 | 262 | #endif |
| 262 | 263 | |
| 263 | -static inline void gen_op_mov_reg_TN(int ot, int t_index, int reg) | |
| 264 | +static inline void gen_op_mov_reg_v(int ot, int reg, TCGv t0) | |
| 264 | 265 | { |
| 265 | 266 | switch(ot) { |
| 266 | 267 | case OT_BYTE: |
| 267 | 268 | if (reg < 4 X86_64_DEF( || reg >= 8 || x86_64_hregs)) { |
| 268 | - tcg_gen_st8_tl(cpu_T[t_index], cpu_env, offsetof(CPUState, regs[reg]) + REG_B_OFFSET); | |
| 269 | + tcg_gen_st8_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_B_OFFSET); | |
| 269 | 270 | } else { |
| 270 | - tcg_gen_st8_tl(cpu_T[t_index], cpu_env, offsetof(CPUState, regs[reg - 4]) + REG_H_OFFSET); | |
| 271 | + tcg_gen_st8_tl(t0, cpu_env, offsetof(CPUState, regs[reg - 4]) + REG_H_OFFSET); | |
| 271 | 272 | } |
| 272 | 273 | break; |
| 273 | 274 | case OT_WORD: |
| 274 | - tcg_gen_st16_tl(cpu_T[t_index], cpu_env, offsetof(CPUState, regs[reg]) + REG_W_OFFSET); | |
| 275 | + tcg_gen_st16_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_W_OFFSET); | |
| 275 | 276 | break; |
| 276 | 277 | #ifdef TARGET_X86_64 |
| 277 | 278 | case OT_LONG: |
| 278 | - tcg_gen_st32_tl(cpu_T[t_index], cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET); | |
| 279 | + tcg_gen_st32_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET); | |
| 279 | 280 | /* high part of register set to zero */ |
| 280 | 281 | tcg_gen_movi_tl(cpu_tmp0, 0); |
| 281 | 282 | tcg_gen_st32_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + REG_LH_OFFSET); |
| 282 | 283 | break; |
| 283 | 284 | default: |
| 284 | 285 | case OT_QUAD: |
| 285 | - tcg_gen_st_tl(cpu_T[t_index], cpu_env, offsetof(CPUState, regs[reg])); | |
| 286 | + tcg_gen_st_tl(t0, cpu_env, offsetof(CPUState, regs[reg])); | |
| 286 | 287 | break; |
| 287 | 288 | #else |
| 288 | 289 | default: |
| 289 | 290 | case OT_LONG: |
| 290 | - tcg_gen_st32_tl(cpu_T[t_index], cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET); | |
| 291 | + tcg_gen_st32_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET); | |
| 291 | 292 | break; |
| 292 | 293 | #endif |
| 293 | 294 | } |
| ... | ... | @@ -295,12 +296,12 @@ static inline void gen_op_mov_reg_TN(int ot, int t_index, int reg) |
| 295 | 296 | |
| 296 | 297 | static inline void gen_op_mov_reg_T0(int ot, int reg) |
| 297 | 298 | { |
| 298 | - gen_op_mov_reg_TN(ot, 0, reg); | |
| 299 | + gen_op_mov_reg_v(ot, reg, cpu_T[0]); | |
| 299 | 300 | } |
| 300 | 301 | |
| 301 | 302 | static inline void gen_op_mov_reg_T1(int ot, int reg) |
| 302 | 303 | { |
| 303 | - gen_op_mov_reg_TN(ot, 1, reg); | |
| 304 | + gen_op_mov_reg_v(ot, reg, cpu_T[1]); | |
| 304 | 305 | } |
| 305 | 306 | |
| 306 | 307 | static inline void gen_op_mov_reg_A0(int size, int reg) |
| ... | ... | @@ -329,23 +330,28 @@ static inline void gen_op_mov_reg_A0(int size, int reg) |
| 329 | 330 | } |
| 330 | 331 | } |
| 331 | 332 | |
| 332 | -static inline void gen_op_mov_TN_reg(int ot, int t_index, int reg) | |
| 333 | +static inline void gen_op_mov_v_reg(int ot, TCGv t0, int reg) | |
| 333 | 334 | { |
| 334 | 335 | switch(ot) { |
| 335 | 336 | case OT_BYTE: |
| 336 | 337 | if (reg < 4 X86_64_DEF( || reg >= 8 || x86_64_hregs)) { |
| 337 | 338 | goto std_case; |
| 338 | 339 | } else { |
| 339 | - tcg_gen_ld8u_tl(cpu_T[t_index], cpu_env, offsetof(CPUState, regs[reg - 4]) + REG_H_OFFSET); | |
| 340 | + tcg_gen_ld8u_tl(t0, cpu_env, offsetof(CPUState, regs[reg - 4]) + REG_H_OFFSET); | |
| 340 | 341 | } |
| 341 | 342 | break; |
| 342 | 343 | default: |
| 343 | 344 | std_case: |
| 344 | - tcg_gen_ld_tl(cpu_T[t_index], cpu_env, offsetof(CPUState, regs[reg])); | |
| 345 | + tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUState, regs[reg])); | |
| 345 | 346 | break; |
| 346 | 347 | } |
| 347 | 348 | } |
| 348 | 349 | |
| 350 | +static inline void gen_op_mov_TN_reg(int ot, int t_index, int reg) | |
| 351 | +{ | |
| 352 | + gen_op_mov_v_reg(ot, cpu_T[t_index], reg); | |
| 353 | +} | |
| 354 | + | |
| 349 | 355 | static inline void gen_op_movl_A0_reg(int reg) |
| 350 | 356 | { |
| 351 | 357 | tcg_gen_ld32u_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET); |
| ... | ... | @@ -511,90 +517,70 @@ static inline void gen_op_lds_T0_A0(int idx) |
| 511 | 517 | } |
| 512 | 518 | } |
| 513 | 519 | |
| 514 | -/* sign does not matter, except for lidt/lgdt call (TODO: fix it) */ | |
| 515 | -static inline void gen_op_ld_T0_A0(int idx) | |
| 520 | +static inline void gen_op_ld_v(int idx, TCGv t0, TCGv a0) | |
| 516 | 521 | { |
| 517 | 522 | int mem_index = (idx >> 2) - 1; |
| 518 | 523 | switch(idx & 3) { |
| 519 | 524 | case 0: |
| 520 | - tcg_gen_qemu_ld8u(cpu_T[0], cpu_A0, mem_index); | |
| 525 | + tcg_gen_qemu_ld8u(t0, a0, mem_index); | |
| 521 | 526 | break; |
| 522 | 527 | case 1: |
| 523 | - tcg_gen_qemu_ld16u(cpu_T[0], cpu_A0, mem_index); | |
| 528 | + tcg_gen_qemu_ld16u(t0, a0, mem_index); | |
| 524 | 529 | break; |
| 525 | 530 | case 2: |
| 526 | - tcg_gen_qemu_ld32u(cpu_T[0], cpu_A0, mem_index); | |
| 531 | + tcg_gen_qemu_ld32u(t0, a0, mem_index); | |
| 527 | 532 | break; |
| 528 | 533 | default: |
| 529 | 534 | case 3: |
| 530 | - tcg_gen_qemu_ld64(cpu_T[0], cpu_A0, mem_index); | |
| 535 | + tcg_gen_qemu_ld64(t0, a0, mem_index); | |
| 531 | 536 | break; |
| 532 | 537 | } |
| 533 | 538 | } |
| 534 | 539 | |
| 540 | +/* XXX: always use ldu or lds */ | |
| 541 | +static inline void gen_op_ld_T0_A0(int idx) | |
| 542 | +{ | |
| 543 | + gen_op_ld_v(idx, cpu_T[0], cpu_A0); | |
| 544 | +} | |
| 545 | + | |
| 535 | 546 | static inline void gen_op_ldu_T0_A0(int idx) |
| 536 | 547 | { |
| 537 | - gen_op_ld_T0_A0(idx); | |
| 548 | + gen_op_ld_v(idx, cpu_T[0], cpu_A0); | |
| 538 | 549 | } |
| 539 | 550 | |
| 540 | 551 | static inline void gen_op_ld_T1_A0(int idx) |
| 541 | 552 | { |
| 553 | + gen_op_ld_v(idx, cpu_T[1], cpu_A0); | |
| 554 | +} | |
| 555 | + | |
| 556 | +static inline void gen_op_st_v(int idx, TCGv t0, TCGv a0) | |
| 557 | +{ | |
| 542 | 558 | int mem_index = (idx >> 2) - 1; |
| 543 | 559 | switch(idx & 3) { |
| 544 | 560 | case 0: |
| 545 | - tcg_gen_qemu_ld8u(cpu_T[1], cpu_A0, mem_index); | |
| 561 | + tcg_gen_qemu_st8(t0, a0, mem_index); | |
| 546 | 562 | break; |
| 547 | 563 | case 1: |
| 548 | - tcg_gen_qemu_ld16u(cpu_T[1], cpu_A0, mem_index); | |
| 564 | + tcg_gen_qemu_st16(t0, a0, mem_index); | |
| 549 | 565 | break; |
| 550 | 566 | case 2: |
| 551 | - tcg_gen_qemu_ld32u(cpu_T[1], cpu_A0, mem_index); | |
| 567 | + tcg_gen_qemu_st32(t0, a0, mem_index); | |
| 552 | 568 | break; |
| 553 | 569 | default: |
| 554 | 570 | case 3: |
| 555 | - tcg_gen_qemu_ld64(cpu_T[1], cpu_A0, mem_index); | |
| 571 | + tcg_gen_qemu_st64(t0, a0, mem_index); | |
| 556 | 572 | break; |
| 557 | 573 | } |
| 558 | 574 | } |
| 559 | 575 | |
| 560 | 576 | static inline void gen_op_st_T0_A0(int idx) |
| 561 | 577 | { |
| 562 | - int mem_index = (idx >> 2) - 1; | |
| 563 | - switch(idx & 3) { | |
| 564 | - case 0: | |
| 565 | - tcg_gen_qemu_st8(cpu_T[0], cpu_A0, mem_index); | |
| 566 | - break; | |
| 567 | - case 1: | |
| 568 | - tcg_gen_qemu_st16(cpu_T[0], cpu_A0, mem_index); | |
| 569 | - break; | |
| 570 | - case 2: | |
| 571 | - tcg_gen_qemu_st32(cpu_T[0], cpu_A0, mem_index); | |
| 572 | - break; | |
| 573 | - default: | |
| 574 | - case 3: | |
| 575 | - tcg_gen_qemu_st64(cpu_T[0], cpu_A0, mem_index); | |
| 576 | - break; | |
| 577 | - } | |
| 578 | + gen_op_st_v(idx, cpu_T[0], cpu_A0); | |
| 578 | 579 | } |
| 579 | 580 | |
| 580 | 581 | static inline void gen_op_st_T1_A0(int idx) |
| 581 | 582 | { |
| 582 | - int mem_index = (idx >> 2) - 1; | |
| 583 | - switch(idx & 3) { | |
| 584 | - case 0: | |
| 585 | - tcg_gen_qemu_st8(cpu_T[1], cpu_A0, mem_index); | |
| 586 | - break; | |
| 587 | - case 1: | |
| 588 | - tcg_gen_qemu_st16(cpu_T[1], cpu_A0, mem_index); | |
| 589 | - break; | |
| 590 | - case 2: | |
| 591 | - tcg_gen_qemu_st32(cpu_T[1], cpu_A0, mem_index); | |
| 592 | - break; | |
| 593 | - default: | |
| 594 | - case 3: | |
| 595 | - tcg_gen_qemu_st64(cpu_T[1], cpu_A0, mem_index); | |
| 596 | - break; | |
| 597 | - } | |
| 583 | + gen_op_st_v(idx, cpu_T[1], cpu_A0); | |
| 598 | 584 | } |
| 599 | 585 | |
| 600 | 586 | static inline void gen_jmp_im(target_ulong pc) |
| ... | ... | @@ -857,9 +843,11 @@ static void gen_compute_eflags(TCGv reg) |
| 857 | 843 | tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32); |
| 858 | 844 | } |
| 859 | 845 | |
| 860 | -static inline void gen_setcc_slow_T0(int op) | |
| 846 | +static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op) | |
| 861 | 847 | { |
| 862 | - switch(op) { | |
| 848 | + if (s->cc_op != CC_OP_DYNAMIC) | |
| 849 | + gen_op_set_cc_op(s->cc_op); | |
| 850 | + switch(jcc_op) { | |
| 863 | 851 | case JCC_O: |
| 864 | 852 | gen_compute_eflags(cpu_T[0]); |
| 865 | 853 | tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 11); |
| ... | ... | @@ -1151,7 +1139,7 @@ static inline void gen_jcc1(DisasContext *s, int cc_op, int b, int l1) |
| 1151 | 1139 | break; |
| 1152 | 1140 | default: |
| 1153 | 1141 | slow_jcc: |
| 1154 | - gen_setcc_slow_T0(jcc_op); | |
| 1142 | + gen_setcc_slow_T0(s, jcc_op); | |
| 1155 | 1143 | tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, |
| 1156 | 1144 | cpu_T[0], 0, l1); |
| 1157 | 1145 | break; |
| ... | ... | @@ -1436,7 +1424,8 @@ static void gen_shift_rm_T1(DisasContext *s, int ot, int op1, |
| 1436 | 1424 | { |
| 1437 | 1425 | target_ulong mask; |
| 1438 | 1426 | int shift_label; |
| 1439 | - | |
| 1427 | + TCGv t0, t1; | |
| 1428 | + | |
| 1440 | 1429 | if (ot == OT_QUAD) |
| 1441 | 1430 | mask = 0x3f; |
| 1442 | 1431 | else |
| ... | ... | @@ -1477,11 +1466,18 @@ static void gen_shift_rm_T1(DisasContext *s, int ot, int op1, |
| 1477 | 1466 | if (s->cc_op != CC_OP_DYNAMIC) |
| 1478 | 1467 | gen_op_set_cc_op(s->cc_op); |
| 1479 | 1468 | |
| 1469 | + /* XXX: inefficient */ | |
| 1470 | + t0 = tcg_temp_local_new(TCG_TYPE_TL); | |
| 1471 | + t1 = tcg_temp_local_new(TCG_TYPE_TL); | |
| 1472 | + | |
| 1473 | + tcg_gen_mov_tl(t0, cpu_T[0]); | |
| 1474 | + tcg_gen_mov_tl(t1, cpu_T3); | |
| 1475 | + | |
| 1480 | 1476 | shift_label = gen_new_label(); |
| 1481 | 1477 | tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_T[1], 0, shift_label); |
| 1482 | 1478 | |
| 1483 | - tcg_gen_mov_tl(cpu_cc_src, cpu_T3); | |
| 1484 | - tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); | |
| 1479 | + tcg_gen_mov_tl(cpu_cc_src, t1); | |
| 1480 | + tcg_gen_mov_tl(cpu_cc_dst, t0); | |
| 1485 | 1481 | if (is_right) |
| 1486 | 1482 | tcg_gen_movi_i32(cpu_cc_op, CC_OP_SARB + ot); |
| 1487 | 1483 | else |
| ... | ... | @@ -1489,6 +1485,9 @@ static void gen_shift_rm_T1(DisasContext *s, int ot, int op1, |
| 1489 | 1485 | |
| 1490 | 1486 | gen_set_label(shift_label); |
| 1491 | 1487 | s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */ |
| 1488 | + | |
| 1489 | + tcg_temp_free(t0); | |
| 1490 | + tcg_temp_free(t1); | |
| 1492 | 1491 | } |
| 1493 | 1492 | |
| 1494 | 1493 | static void gen_shift_rm_im(DisasContext *s, int ot, int op1, int op2, |
| ... | ... | @@ -1556,78 +1555,95 @@ static void gen_rot_rm_T1(DisasContext *s, int ot, int op1, |
| 1556 | 1555 | { |
| 1557 | 1556 | target_ulong mask; |
| 1558 | 1557 | int label1, label2, data_bits; |
| 1559 | - | |
| 1558 | + TCGv t0, t1, t2, a0; | |
| 1559 | + | |
| 1560 | + /* XXX: inefficient, but we must use local temps */ | |
| 1561 | + t0 = tcg_temp_local_new(TCG_TYPE_TL); | |
| 1562 | + t1 = tcg_temp_local_new(TCG_TYPE_TL); | |
| 1563 | + t2 = tcg_temp_local_new(TCG_TYPE_TL); | |
| 1564 | + a0 = tcg_temp_local_new(TCG_TYPE_TL); | |
| 1565 | + | |
| 1560 | 1566 | if (ot == OT_QUAD) |
| 1561 | 1567 | mask = 0x3f; |
| 1562 | 1568 | else |
| 1563 | 1569 | mask = 0x1f; |
| 1564 | 1570 | |
| 1565 | 1571 | /* load */ |
| 1566 | - if (op1 == OR_TMP0) | |
| 1567 | - gen_op_ld_T0_A0(ot + s->mem_index); | |
| 1568 | - else | |
| 1569 | - gen_op_mov_TN_reg(ot, 0, op1); | |
| 1572 | + if (op1 == OR_TMP0) { | |
| 1573 | + tcg_gen_mov_tl(a0, cpu_A0); | |
| 1574 | + gen_op_ld_v(ot + s->mem_index, t0, a0); | |
| 1575 | + } else { | |
| 1576 | + gen_op_mov_v_reg(ot, t0, op1); | |
| 1577 | + } | |
| 1570 | 1578 | |
| 1571 | - tcg_gen_andi_tl(cpu_T[1], cpu_T[1], mask); | |
| 1579 | + tcg_gen_mov_tl(t1, cpu_T[1]); | |
| 1580 | + | |
| 1581 | + tcg_gen_andi_tl(t1, t1, mask); | |
| 1572 | 1582 | |
| 1573 | 1583 | /* Must test zero case to avoid using undefined behaviour in TCG |
| 1574 | 1584 | shifts. */ |
| 1575 | 1585 | label1 = gen_new_label(); |
| 1576 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_T[1], 0, label1); | |
| 1586 | + tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, label1); | |
| 1577 | 1587 | |
| 1578 | 1588 | if (ot <= OT_WORD) |
| 1579 | - tcg_gen_andi_tl(cpu_tmp0, cpu_T[1], (1 << (3 + ot)) - 1); | |
| 1589 | + tcg_gen_andi_tl(cpu_tmp0, t1, (1 << (3 + ot)) - 1); | |
| 1580 | 1590 | else |
| 1581 | - tcg_gen_mov_tl(cpu_tmp0, cpu_T[1]); | |
| 1591 | + tcg_gen_mov_tl(cpu_tmp0, t1); | |
| 1582 | 1592 | |
| 1583 | - gen_extu(ot, cpu_T[0]); | |
| 1584 | - tcg_gen_mov_tl(cpu_T3, cpu_T[0]); | |
| 1593 | + gen_extu(ot, t0); | |
| 1594 | + tcg_gen_mov_tl(t2, t0); | |
| 1585 | 1595 | |
| 1586 | 1596 | data_bits = 8 << ot; |
| 1587 | 1597 | /* XXX: rely on behaviour of shifts when operand 2 overflows (XXX: |
| 1588 | 1598 | fix TCG definition) */ |
| 1589 | 1599 | if (is_right) { |
| 1590 | - tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_tmp0); | |
| 1600 | + tcg_gen_shr_tl(cpu_tmp4, t0, cpu_tmp0); | |
| 1591 | 1601 | tcg_gen_sub_tl(cpu_tmp0, tcg_const_tl(data_bits), cpu_tmp0); |
| 1592 | - tcg_gen_shl_tl(cpu_T[0], cpu_T[0], cpu_tmp0); | |
| 1602 | + tcg_gen_shl_tl(t0, t0, cpu_tmp0); | |
| 1593 | 1603 | } else { |
| 1594 | - tcg_gen_shl_tl(cpu_tmp4, cpu_T[0], cpu_tmp0); | |
| 1604 | + tcg_gen_shl_tl(cpu_tmp4, t0, cpu_tmp0); | |
| 1595 | 1605 | tcg_gen_sub_tl(cpu_tmp0, tcg_const_tl(data_bits), cpu_tmp0); |
| 1596 | - tcg_gen_shr_tl(cpu_T[0], cpu_T[0], cpu_tmp0); | |
| 1606 | + tcg_gen_shr_tl(t0, t0, cpu_tmp0); | |
| 1597 | 1607 | } |
| 1598 | - tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp4); | |
| 1608 | + tcg_gen_or_tl(t0, t0, cpu_tmp4); | |
| 1599 | 1609 | |
| 1600 | 1610 | gen_set_label(label1); |
| 1601 | 1611 | /* store */ |
| 1602 | - if (op1 == OR_TMP0) | |
| 1603 | - gen_op_st_T0_A0(ot + s->mem_index); | |
| 1604 | - else | |
| 1605 | - gen_op_mov_reg_T0(ot, op1); | |
| 1612 | + if (op1 == OR_TMP0) { | |
| 1613 | + gen_op_st_v(ot + s->mem_index, t0, a0); | |
| 1614 | + } else { | |
| 1615 | + gen_op_mov_reg_v(ot, op1, t0); | |
| 1616 | + } | |
| 1606 | 1617 | |
| 1607 | 1618 | /* update eflags */ |
| 1608 | 1619 | if (s->cc_op != CC_OP_DYNAMIC) |
| 1609 | 1620 | gen_op_set_cc_op(s->cc_op); |
| 1610 | 1621 | |
| 1611 | 1622 | label2 = gen_new_label(); |
| 1612 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_T[1], 0, label2); | |
| 1623 | + tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, label2); | |
| 1613 | 1624 | |
| 1614 | 1625 | gen_compute_eflags(cpu_cc_src); |
| 1615 | 1626 | tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~(CC_O | CC_C)); |
| 1616 | - tcg_gen_xor_tl(cpu_tmp0, cpu_T3, cpu_T[0]); | |
| 1627 | + tcg_gen_xor_tl(cpu_tmp0, t2, t0); | |
| 1617 | 1628 | tcg_gen_lshift(cpu_tmp0, cpu_tmp0, 11 - (data_bits - 1)); |
| 1618 | 1629 | tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, CC_O); |
| 1619 | 1630 | tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_tmp0); |
| 1620 | 1631 | if (is_right) { |
| 1621 | - tcg_gen_shri_tl(cpu_T[0], cpu_T[0], data_bits - 1); | |
| 1632 | + tcg_gen_shri_tl(t0, t0, data_bits - 1); | |
| 1622 | 1633 | } |
| 1623 | - tcg_gen_andi_tl(cpu_T[0], cpu_T[0], CC_C); | |
| 1624 | - tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_T[0]); | |
| 1634 | + tcg_gen_andi_tl(t0, t0, CC_C); | |
| 1635 | + tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t0); | |
| 1625 | 1636 | |
| 1626 | 1637 | tcg_gen_discard_tl(cpu_cc_dst); |
| 1627 | 1638 | tcg_gen_movi_i32(cpu_cc_op, CC_OP_EFLAGS); |
| 1628 | 1639 | |
| 1629 | 1640 | gen_set_label(label2); |
| 1630 | 1641 | s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */ |
| 1642 | + | |
| 1643 | + tcg_temp_free(t0); | |
| 1644 | + tcg_temp_free(t1); | |
| 1645 | + tcg_temp_free(t2); | |
| 1646 | + tcg_temp_free(a0); | |
| 1631 | 1647 | } |
| 1632 | 1648 | |
| 1633 | 1649 | static void *helper_rotc[8] = { |
| ... | ... | @@ -1666,9 +1682,9 @@ static void gen_rotc_rm_T1(DisasContext *s, int ot, int op1, |
| 1666 | 1682 | |
| 1667 | 1683 | /* update eflags */ |
| 1668 | 1684 | label1 = gen_new_label(); |
| 1669 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_T3, -1, label1); | |
| 1685 | + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_cc_tmp, -1, label1); | |
| 1670 | 1686 | |
| 1671 | - tcg_gen_mov_tl(cpu_cc_src, cpu_T3); | |
| 1687 | + tcg_gen_mov_tl(cpu_cc_src, cpu_cc_tmp); | |
| 1672 | 1688 | tcg_gen_discard_tl(cpu_cc_dst); |
| 1673 | 1689 | tcg_gen_movi_i32(cpu_cc_op, CC_OP_EFLAGS); |
| 1674 | 1690 | |
| ... | ... | @@ -1682,6 +1698,12 @@ static void gen_shiftd_rm_T1_T3(DisasContext *s, int ot, int op1, |
| 1682 | 1698 | { |
| 1683 | 1699 | int label1, label2, data_bits; |
| 1684 | 1700 | target_ulong mask; |
| 1701 | + TCGv t0, t1, t2, a0; | |
| 1702 | + | |
| 1703 | + t0 = tcg_temp_local_new(TCG_TYPE_TL); | |
| 1704 | + t1 = tcg_temp_local_new(TCG_TYPE_TL); | |
| 1705 | + t2 = tcg_temp_local_new(TCG_TYPE_TL); | |
| 1706 | + a0 = tcg_temp_local_new(TCG_TYPE_TL); | |
| 1685 | 1707 | |
| 1686 | 1708 | if (ot == OT_QUAD) |
| 1687 | 1709 | mask = 0x3f; |
| ... | ... | @@ -1689,95 +1711,102 @@ static void gen_shiftd_rm_T1_T3(DisasContext *s, int ot, int op1, |
| 1689 | 1711 | mask = 0x1f; |
| 1690 | 1712 | |
| 1691 | 1713 | /* load */ |
| 1692 | - if (op1 == OR_TMP0) | |
| 1693 | - gen_op_ld_T0_A0(ot + s->mem_index); | |
| 1694 | - else | |
| 1695 | - gen_op_mov_TN_reg(ot, 0, op1); | |
| 1714 | + if (op1 == OR_TMP0) { | |
| 1715 | + tcg_gen_mov_tl(a0, cpu_A0); | |
| 1716 | + gen_op_ld_v(ot + s->mem_index, t0, a0); | |
| 1717 | + } else { | |
| 1718 | + gen_op_mov_v_reg(ot, t0, op1); | |
| 1719 | + } | |
| 1696 | 1720 | |
| 1697 | 1721 | tcg_gen_andi_tl(cpu_T3, cpu_T3, mask); |
| 1722 | + | |
| 1723 | + tcg_gen_mov_tl(t1, cpu_T[1]); | |
| 1724 | + tcg_gen_mov_tl(t2, cpu_T3); | |
| 1725 | + | |
| 1698 | 1726 | /* Must test zero case to avoid using undefined behaviour in TCG |
| 1699 | 1727 | shifts. */ |
| 1700 | 1728 | label1 = gen_new_label(); |
| 1701 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_T3, 0, label1); | |
| 1729 | + tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label1); | |
| 1702 | 1730 | |
| 1703 | - tcg_gen_addi_tl(cpu_tmp5, cpu_T3, -1); | |
| 1731 | + tcg_gen_addi_tl(cpu_tmp5, t2, -1); | |
| 1704 | 1732 | if (ot == OT_WORD) { |
| 1705 | 1733 | /* Note: we implement the Intel behaviour for shift count > 16 */ |
| 1706 | 1734 | if (is_right) { |
| 1707 | - tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 0xffff); | |
| 1708 | - tcg_gen_shli_tl(cpu_tmp0, cpu_T[1], 16); | |
| 1709 | - tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0); | |
| 1710 | - tcg_gen_ext32u_tl(cpu_T[0], cpu_T[0]); | |
| 1735 | + tcg_gen_andi_tl(t0, t0, 0xffff); | |
| 1736 | + tcg_gen_shli_tl(cpu_tmp0, t1, 16); | |
| 1737 | + tcg_gen_or_tl(t0, t0, cpu_tmp0); | |
| 1738 | + tcg_gen_ext32u_tl(t0, t0); | |
| 1711 | 1739 | |
| 1712 | - tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_tmp5); | |
| 1740 | + tcg_gen_shr_tl(cpu_tmp4, t0, cpu_tmp5); | |
| 1713 | 1741 | |
| 1714 | 1742 | /* only needed if count > 16, but a test would complicate */ |
| 1715 | - tcg_gen_sub_tl(cpu_tmp5, tcg_const_tl(32), cpu_T3); | |
| 1716 | - tcg_gen_shl_tl(cpu_tmp0, cpu_T[0], cpu_tmp5); | |
| 1743 | + tcg_gen_sub_tl(cpu_tmp5, tcg_const_tl(32), t2); | |
| 1744 | + tcg_gen_shl_tl(cpu_tmp0, t0, cpu_tmp5); | |
| 1717 | 1745 | |
| 1718 | - tcg_gen_shr_tl(cpu_T[0], cpu_T[0], cpu_T3); | |
| 1746 | + tcg_gen_shr_tl(t0, t0, t2); | |
| 1719 | 1747 | |
| 1720 | - tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0); | |
| 1748 | + tcg_gen_or_tl(t0, t0, cpu_tmp0); | |
| 1721 | 1749 | } else { |
| 1722 | 1750 | /* XXX: not optimal */ |
| 1723 | - tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 0xffff); | |
| 1724 | - tcg_gen_shli_tl(cpu_T[1], cpu_T[1], 16); | |
| 1725 | - tcg_gen_or_tl(cpu_T[1], cpu_T[1], cpu_T[0]); | |
| 1726 | - tcg_gen_ext32u_tl(cpu_T[1], cpu_T[1]); | |
| 1751 | + tcg_gen_andi_tl(t0, t0, 0xffff); | |
| 1752 | + tcg_gen_shli_tl(t1, t1, 16); | |
| 1753 | + tcg_gen_or_tl(t1, t1, t0); | |
| 1754 | + tcg_gen_ext32u_tl(t1, t1); | |
| 1727 | 1755 | |
| 1728 | - tcg_gen_shl_tl(cpu_tmp4, cpu_T[0], cpu_tmp5); | |
| 1756 | + tcg_gen_shl_tl(cpu_tmp4, t0, cpu_tmp5); | |
| 1729 | 1757 | tcg_gen_sub_tl(cpu_tmp0, tcg_const_tl(32), cpu_tmp5); |
| 1730 | - tcg_gen_shr_tl(cpu_tmp6, cpu_T[1], cpu_tmp0); | |
| 1758 | + tcg_gen_shr_tl(cpu_tmp6, t1, cpu_tmp0); | |
| 1731 | 1759 | tcg_gen_or_tl(cpu_tmp4, cpu_tmp4, cpu_tmp6); |
| 1732 | 1760 | |
| 1733 | - tcg_gen_shl_tl(cpu_T[0], cpu_T[0], cpu_T3); | |
| 1734 | - tcg_gen_sub_tl(cpu_tmp5, tcg_const_tl(32), cpu_T3); | |
| 1735 | - tcg_gen_shr_tl(cpu_T[1], cpu_T[1], cpu_tmp5); | |
| 1736 | - tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_T[1]); | |
| 1761 | + tcg_gen_shl_tl(t0, t0, t2); | |
| 1762 | + tcg_gen_sub_tl(cpu_tmp5, tcg_const_tl(32), t2); | |
| 1763 | + tcg_gen_shr_tl(t1, t1, cpu_tmp5); | |
| 1764 | + tcg_gen_or_tl(t0, t0, t1); | |
| 1737 | 1765 | } |
| 1738 | 1766 | } else { |
| 1739 | 1767 | data_bits = 8 << ot; |
| 1740 | 1768 | if (is_right) { |
| 1741 | 1769 | if (ot == OT_LONG) |
| 1742 | - tcg_gen_ext32u_tl(cpu_T[0], cpu_T[0]); | |
| 1770 | + tcg_gen_ext32u_tl(t0, t0); | |
| 1743 | 1771 | |
| 1744 | - tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_tmp5); | |
| 1772 | + tcg_gen_shr_tl(cpu_tmp4, t0, cpu_tmp5); | |
| 1745 | 1773 | |
| 1746 | - tcg_gen_shr_tl(cpu_T[0], cpu_T[0], cpu_T3); | |
| 1747 | - tcg_gen_sub_tl(cpu_tmp5, tcg_const_tl(data_bits), cpu_T3); | |
| 1748 | - tcg_gen_shl_tl(cpu_T[1], cpu_T[1], cpu_tmp5); | |
| 1749 | - tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_T[1]); | |
| 1774 | + tcg_gen_shr_tl(t0, t0, t2); | |
| 1775 | + tcg_gen_sub_tl(cpu_tmp5, tcg_const_tl(data_bits), t2); | |
| 1776 | + tcg_gen_shl_tl(t1, t1, cpu_tmp5); | |
| 1777 | + tcg_gen_or_tl(t0, t0, t1); | |
| 1750 | 1778 | |
| 1751 | 1779 | } else { |
| 1752 | 1780 | if (ot == OT_LONG) |
| 1753 | - tcg_gen_ext32u_tl(cpu_T[1], cpu_T[1]); | |
| 1781 | + tcg_gen_ext32u_tl(t1, t1); | |
| 1754 | 1782 | |
| 1755 | - tcg_gen_shl_tl(cpu_tmp4, cpu_T[0], cpu_tmp5); | |
| 1783 | + tcg_gen_shl_tl(cpu_tmp4, t0, cpu_tmp5); | |
| 1756 | 1784 | |
| 1757 | - tcg_gen_shl_tl(cpu_T[0], cpu_T[0], cpu_T3); | |
| 1758 | - tcg_gen_sub_tl(cpu_tmp5, tcg_const_tl(data_bits), cpu_T3); | |
| 1759 | - tcg_gen_shr_tl(cpu_T[1], cpu_T[1], cpu_tmp5); | |
| 1760 | - tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_T[1]); | |
| 1785 | + tcg_gen_shl_tl(t0, t0, t2); | |
| 1786 | + tcg_gen_sub_tl(cpu_tmp5, tcg_const_tl(data_bits), t2); | |
| 1787 | + tcg_gen_shr_tl(t1, t1, cpu_tmp5); | |
| 1788 | + tcg_gen_or_tl(t0, t0, t1); | |
| 1761 | 1789 | } |
| 1762 | 1790 | } |
| 1763 | - tcg_gen_mov_tl(cpu_T[1], cpu_tmp4); | |
| 1791 | + tcg_gen_mov_tl(t1, cpu_tmp4); | |
| 1764 | 1792 | |
| 1765 | 1793 | gen_set_label(label1); |
| 1766 | 1794 | /* store */ |
| 1767 | - if (op1 == OR_TMP0) | |
| 1768 | - gen_op_st_T0_A0(ot + s->mem_index); | |
| 1769 | - else | |
| 1770 | - gen_op_mov_reg_T0(ot, op1); | |
| 1795 | + if (op1 == OR_TMP0) { | |
| 1796 | + gen_op_st_v(ot + s->mem_index, t0, a0); | |
| 1797 | + } else { | |
| 1798 | + gen_op_mov_reg_v(ot, op1, t0); | |
| 1799 | + } | |
| 1771 | 1800 | |
| 1772 | 1801 | /* update eflags */ |
| 1773 | 1802 | if (s->cc_op != CC_OP_DYNAMIC) |
| 1774 | 1803 | gen_op_set_cc_op(s->cc_op); |
| 1775 | 1804 | |
| 1776 | 1805 | label2 = gen_new_label(); |
| 1777 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_T3, 0, label2); | |
| 1806 | + tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label2); | |
| 1778 | 1807 | |
| 1779 | - tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]); | |
| 1780 | - tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); | |
| 1808 | + tcg_gen_mov_tl(cpu_cc_src, t1); | |
| 1809 | + tcg_gen_mov_tl(cpu_cc_dst, t0); | |
| 1781 | 1810 | if (is_right) { |
| 1782 | 1811 | tcg_gen_movi_i32(cpu_cc_op, CC_OP_SARB + ot); |
| 1783 | 1812 | } else { |
| ... | ... | @@ -1785,6 +1814,11 @@ static void gen_shiftd_rm_T1_T3(DisasContext *s, int ot, int op1, |
| 1785 | 1814 | } |
| 1786 | 1815 | gen_set_label(label2); |
| 1787 | 1816 | s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */ |
| 1817 | + | |
| 1818 | + tcg_temp_free(t0); | |
| 1819 | + tcg_temp_free(t1); | |
| 1820 | + tcg_temp_free(t2); | |
| 1821 | + tcg_temp_free(a0); | |
| 1788 | 1822 | } |
| 1789 | 1823 | |
| 1790 | 1824 | static void gen_shift(DisasContext *s1, int op, int ot, int d, int s) |
| ... | ... | @@ -2217,23 +2251,26 @@ static inline void gen_jcc(DisasContext *s, int b, |
| 2217 | 2251 | static void gen_setcc(DisasContext *s, int b) |
| 2218 | 2252 | { |
| 2219 | 2253 | int inv, jcc_op, l1; |
| 2254 | + TCGv t0; | |
| 2220 | 2255 | |
| 2221 | 2256 | if (is_fast_jcc_case(s, b)) { |
| 2222 | 2257 | /* nominal case: we use a jump */ |
| 2223 | - tcg_gen_movi_tl(cpu_T[0], 0); | |
| 2258 | + /* XXX: make it faster by adding new instructions in TCG */ | |
| 2259 | + t0 = tcg_temp_local_new(TCG_TYPE_TL); | |
| 2260 | + tcg_gen_movi_tl(t0, 0); | |
| 2224 | 2261 | l1 = gen_new_label(); |
| 2225 | 2262 | gen_jcc1(s, s->cc_op, b ^ 1, l1); |
| 2226 | - tcg_gen_movi_tl(cpu_T[0], 1); | |
| 2263 | + tcg_gen_movi_tl(t0, 1); | |
| 2227 | 2264 | gen_set_label(l1); |
| 2265 | + tcg_gen_mov_tl(cpu_T[0], t0); | |
| 2266 | + tcg_temp_free(t0); | |
| 2228 | 2267 | } else { |
| 2229 | 2268 | /* slow case: it is more efficient not to generate a jump, |
| 2230 | 2269 | although it is questionnable whether this optimization is |
| 2231 | 2270 | worth to */ |
| 2232 | 2271 | inv = b & 1; |
| 2233 | 2272 | jcc_op = (b >> 1) & 7; |
| 2234 | - if (s->cc_op != CC_OP_DYNAMIC) | |
| 2235 | - gen_op_set_cc_op(s->cc_op); | |
| 2236 | - gen_setcc_slow_T0(jcc_op); | |
| 2273 | + gen_setcc_slow_T0(s, jcc_op); | |
| 2237 | 2274 | if (inv) { |
| 2238 | 2275 | tcg_gen_xori_tl(cpu_T[0], cpu_T[0], 1); |
| 2239 | 2276 | } |
| ... | ... | @@ -4353,6 +4390,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) |
| 4353 | 4390 | case 0x1b1: /* cmpxchg Ev, Gv */ |
| 4354 | 4391 | { |
| 4355 | 4392 | int label1, label2; |
| 4393 | + TCGv t0, t1, t2, a0; | |
| 4356 | 4394 | |
| 4357 | 4395 | if ((b & 1) == 0) |
| 4358 | 4396 | ot = OT_BYTE; |
| ... | ... | @@ -4361,37 +4399,46 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) |
| 4361 | 4399 | modrm = ldub_code(s->pc++); |
| 4362 | 4400 | reg = ((modrm >> 3) & 7) | rex_r; |
| 4363 | 4401 | mod = (modrm >> 6) & 3; |
| 4364 | - gen_op_mov_TN_reg(ot, 1, reg); | |
| 4402 | + t0 = tcg_temp_local_new(TCG_TYPE_TL); | |
| 4403 | + t1 = tcg_temp_local_new(TCG_TYPE_TL); | |
| 4404 | + t2 = tcg_temp_local_new(TCG_TYPE_TL); | |
| 4405 | + a0 = tcg_temp_local_new(TCG_TYPE_TL); | |
| 4406 | + gen_op_mov_v_reg(ot, t1, reg); | |
| 4365 | 4407 | if (mod == 3) { |
| 4366 | 4408 | rm = (modrm & 7) | REX_B(s); |
| 4367 | - gen_op_mov_TN_reg(ot, 0, rm); | |
| 4409 | + gen_op_mov_v_reg(ot, t0, rm); | |
| 4368 | 4410 | } else { |
| 4369 | 4411 | gen_lea_modrm(s, modrm, ®_addr, &offset_addr); |
| 4370 | - gen_op_ld_T0_A0(ot + s->mem_index); | |
| 4412 | + tcg_gen_mov_tl(a0, cpu_A0); | |
| 4413 | + gen_op_ld_v(ot + s->mem_index, t0, a0); | |
| 4371 | 4414 | rm = 0; /* avoid warning */ |
| 4372 | 4415 | } |
| 4373 | 4416 | label1 = gen_new_label(); |
| 4374 | - tcg_gen_ld_tl(cpu_T3, cpu_env, offsetof(CPUState, regs[R_EAX])); | |
| 4375 | - tcg_gen_sub_tl(cpu_T3, cpu_T3, cpu_T[0]); | |
| 4376 | - gen_extu(ot, cpu_T3); | |
| 4377 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_T3, 0, label1); | |
| 4417 | + tcg_gen_ld_tl(t2, cpu_env, offsetof(CPUState, regs[R_EAX])); | |
| 4418 | + tcg_gen_sub_tl(t2, t2, t0); | |
| 4419 | + gen_extu(ot, t2); | |
| 4420 | + tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label1); | |
| 4378 | 4421 | if (mod == 3) { |
| 4379 | 4422 | label2 = gen_new_label(); |
| 4380 | - gen_op_mov_reg_T0(ot, R_EAX); | |
| 4423 | + gen_op_mov_reg_v(ot, R_EAX, t0); | |
| 4381 | 4424 | tcg_gen_br(label2); |
| 4382 | 4425 | gen_set_label(label1); |
| 4383 | - gen_op_mov_reg_T1(ot, rm); | |
| 4426 | + gen_op_mov_reg_v(ot, rm, t1); | |
| 4384 | 4427 | gen_set_label(label2); |
| 4385 | 4428 | } else { |
| 4386 | - tcg_gen_mov_tl(cpu_T[1], cpu_T[0]); | |
| 4387 | - gen_op_mov_reg_T0(ot, R_EAX); | |
| 4429 | + tcg_gen_mov_tl(t1, t0); | |
| 4430 | + gen_op_mov_reg_v(ot, R_EAX, t0); | |
| 4388 | 4431 | gen_set_label(label1); |
| 4389 | 4432 | /* always store */ |
| 4390 | - gen_op_st_T1_A0(ot + s->mem_index); | |
| 4433 | + gen_op_st_v(ot + s->mem_index, t1, a0); | |
| 4391 | 4434 | } |
| 4392 | - tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]); | |
| 4393 | - tcg_gen_mov_tl(cpu_cc_dst, cpu_T3); | |
| 4435 | + tcg_gen_mov_tl(cpu_cc_src, t0); | |
| 4436 | + tcg_gen_mov_tl(cpu_cc_dst, t2); | |
| 4394 | 4437 | s->cc_op = CC_OP_SUBB + ot; |
| 4438 | + tcg_temp_free(t0); | |
| 4439 | + tcg_temp_free(t1); | |
| 4440 | + tcg_temp_free(t2); | |
| 4441 | + tcg_temp_free(a0); | |
| 4395 | 4442 | } |
| 4396 | 4443 | break; |
| 4397 | 4444 | case 0x1c7: /* cmpxchg8b */ |
| ... | ... | @@ -5457,10 +5504,9 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) |
| 5457 | 5504 | (JCC_BE << 1), |
| 5458 | 5505 | (JCC_P << 1), |
| 5459 | 5506 | }; |
| 5460 | - op1 = fcmov_cc[op & 3] | ((op >> 3) & 1); | |
| 5461 | - gen_setcc(s, op1); | |
| 5507 | + op1 = fcmov_cc[op & 3] | (((op >> 3) & 1) ^ 1); | |
| 5462 | 5508 | l1 = gen_new_label(); |
| 5463 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_T[0], 0, l1); | |
| 5509 | + gen_jcc1(s, s->cc_op, op1, l1); | |
| 5464 | 5510 | tcg_gen_helper_0_1(helper_fmov_ST0_STN, tcg_const_i32(opreg)); |
| 5465 | 5511 | gen_set_label(l1); |
| 5466 | 5512 | } |
| ... | ... | @@ -5806,25 +5852,26 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) |
| 5806 | 5852 | case 0x140 ... 0x14f: /* cmov Gv, Ev */ |
| 5807 | 5853 | { |
| 5808 | 5854 | int l1; |
| 5855 | + TCGv t0; | |
| 5856 | + | |
| 5809 | 5857 | ot = dflag + OT_WORD; |
| 5810 | 5858 | modrm = ldub_code(s->pc++); |
| 5811 | 5859 | reg = ((modrm >> 3) & 7) | rex_r; |
| 5812 | 5860 | mod = (modrm >> 6) & 3; |
| 5861 | + t0 = tcg_temp_local_new(TCG_TYPE_TL); | |
| 5813 | 5862 | if (mod != 3) { |
| 5814 | 5863 | gen_lea_modrm(s, modrm, ®_addr, &offset_addr); |
| 5815 | - gen_op_ld_T1_A0(ot + s->mem_index); | |
| 5864 | + gen_op_ld_v(ot + s->mem_index, t0, cpu_A0); | |
| 5816 | 5865 | } else { |
| 5817 | 5866 | rm = (modrm & 7) | REX_B(s); |
| 5818 | - gen_op_mov_TN_reg(ot, 1, rm); | |
| 5867 | + gen_op_mov_v_reg(ot, t0, rm); | |
| 5819 | 5868 | } |
| 5820 | - if (s->cc_op != CC_OP_DYNAMIC) | |
| 5821 | - gen_op_set_cc_op(s->cc_op); | |
| 5822 | 5869 | #ifdef TARGET_X86_64 |
| 5823 | 5870 | if (ot == OT_LONG) { |
| 5824 | 5871 | /* XXX: specific Intel behaviour ? */ |
| 5825 | 5872 | l1 = gen_new_label(); |
| 5826 | 5873 | gen_jcc1(s, s->cc_op, b ^ 1, l1); |
| 5827 | - tcg_gen_st32_tl(cpu_T[1], cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET); | |
| 5874 | + tcg_gen_st32_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET); | |
| 5828 | 5875 | gen_set_label(l1); |
| 5829 | 5876 | tcg_gen_movi_tl(cpu_tmp0, 0); |
| 5830 | 5877 | tcg_gen_st32_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + REG_LH_OFFSET); |
| ... | ... | @@ -5833,9 +5880,10 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) |
| 5833 | 5880 | { |
| 5834 | 5881 | l1 = gen_new_label(); |
| 5835 | 5882 | gen_jcc1(s, s->cc_op, b ^ 1, l1); |
| 5836 | - gen_op_mov_reg_T1(ot, reg); | |
| 5883 | + gen_op_mov_reg_v(ot, reg, t0); | |
| 5837 | 5884 | gen_set_label(l1); |
| 5838 | 5885 | } |
| 5886 | + tcg_temp_free(t0); | |
| 5839 | 5887 | } |
| 5840 | 5888 | break; |
| 5841 | 5889 | |
| ... | ... | @@ -6039,6 +6087,8 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) |
| 6039 | 6087 | case 0x1bd: /* bsr */ |
| 6040 | 6088 | { |
| 6041 | 6089 | int label1; |
| 6090 | + TCGv t0; | |
| 6091 | + | |
| 6042 | 6092 | ot = dflag + OT_WORD; |
| 6043 | 6093 | modrm = ldub_code(s->pc++); |
| 6044 | 6094 | reg = ((modrm >> 3) & 7) | rex_r; |
| ... | ... | @@ -6046,17 +6096,20 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) |
| 6046 | 6096 | gen_extu(ot, cpu_T[0]); |
| 6047 | 6097 | label1 = gen_new_label(); |
| 6048 | 6098 | tcg_gen_movi_tl(cpu_cc_dst, 0); |
| 6049 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_T[0], 0, label1); | |
| 6099 | + t0 = tcg_temp_local_new(TCG_TYPE_TL); | |
| 6100 | + tcg_gen_mov_tl(t0, cpu_T[0]); | |
| 6101 | + tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0, label1); | |
| 6050 | 6102 | if (b & 1) { |
| 6051 | - tcg_gen_helper_1_1(helper_bsr, cpu_T[0], cpu_T[0]); | |
| 6103 | + tcg_gen_helper_1_1(helper_bsr, cpu_T[0], t0); | |
| 6052 | 6104 | } else { |
| 6053 | - tcg_gen_helper_1_1(helper_bsf, cpu_T[0], cpu_T[0]); | |
| 6105 | + tcg_gen_helper_1_1(helper_bsf, cpu_T[0], t0); | |
| 6054 | 6106 | } |
| 6055 | 6107 | gen_op_mov_reg_T0(ot, reg); |
| 6056 | 6108 | tcg_gen_movi_tl(cpu_cc_dst, 1); |
| 6057 | 6109 | gen_set_label(label1); |
| 6058 | 6110 | tcg_gen_discard_tl(cpu_cc_src); |
| 6059 | 6111 | s->cc_op = CC_OP_LOGICB + ot; |
| 6112 | + tcg_temp_free(t0); | |
| 6060 | 6113 | } |
| 6061 | 6114 | break; |
| 6062 | 6115 | /************************/ |
| ... | ... | @@ -6725,8 +6778,13 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) |
| 6725 | 6778 | #endif |
| 6726 | 6779 | { |
| 6727 | 6780 | int label1; |
| 6781 | + TCGv t0, t1, t2; | |
| 6782 | + | |
| 6728 | 6783 | if (!s->pe || s->vm86) |
| 6729 | 6784 | goto illegal_op; |
| 6785 | + t0 = tcg_temp_local_new(TCG_TYPE_TL); | |
| 6786 | + t1 = tcg_temp_local_new(TCG_TYPE_TL); | |
| 6787 | + t2 = tcg_temp_local_new(TCG_TYPE_TL); | |
| 6730 | 6788 | ot = OT_WORD; |
| 6731 | 6789 | modrm = ldub_code(s->pc++); |
| 6732 | 6790 | reg = (modrm >> 3) & 7; |
| ... | ... | @@ -6734,55 +6792,61 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) |
| 6734 | 6792 | rm = modrm & 7; |
| 6735 | 6793 | if (mod != 3) { |
| 6736 | 6794 | gen_lea_modrm(s, modrm, ®_addr, &offset_addr); |
| 6737 | - gen_op_ld_T0_A0(ot + s->mem_index); | |
| 6795 | + gen_op_ld_v(ot + s->mem_index, t0, cpu_A0); | |
| 6738 | 6796 | } else { |
| 6739 | - gen_op_mov_TN_reg(ot, 0, rm); | |
| 6797 | + gen_op_mov_v_reg(ot, t0, rm); | |
| 6740 | 6798 | } |
| 6741 | - gen_op_mov_TN_reg(ot, 1, reg); | |
| 6742 | - tcg_gen_andi_tl(cpu_tmp0, cpu_T[0], 3); | |
| 6743 | - tcg_gen_andi_tl(cpu_T[1], cpu_T[1], 3); | |
| 6744 | - tcg_gen_movi_tl(cpu_T3, 0); | |
| 6799 | + gen_op_mov_v_reg(ot, t1, reg); | |
| 6800 | + tcg_gen_andi_tl(cpu_tmp0, t0, 3); | |
| 6801 | + tcg_gen_andi_tl(t1, t1, 3); | |
| 6802 | + tcg_gen_movi_tl(t2, 0); | |
| 6745 | 6803 | label1 = gen_new_label(); |
| 6746 | - tcg_gen_brcond_tl(TCG_COND_GE, cpu_tmp0, cpu_T[1], label1); | |
| 6747 | - tcg_gen_andi_tl(cpu_T[0], cpu_T[0], ~3); | |
| 6748 | - tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_T[1]); | |
| 6749 | - tcg_gen_movi_tl(cpu_T3, CC_Z); | |
| 6804 | + tcg_gen_brcond_tl(TCG_COND_GE, cpu_tmp0, t1, label1); | |
| 6805 | + tcg_gen_andi_tl(t0, t0, ~3); | |
| 6806 | + tcg_gen_or_tl(t0, t0, t1); | |
| 6807 | + tcg_gen_movi_tl(t2, CC_Z); | |
| 6750 | 6808 | gen_set_label(label1); |
| 6751 | 6809 | if (mod != 3) { |
| 6752 | - gen_op_st_T0_A0(ot + s->mem_index); | |
| 6810 | + gen_op_st_v(ot + s->mem_index, t0, cpu_A0); | |
| 6753 | 6811 | } else { |
| 6754 | - gen_op_mov_reg_T0(ot, rm); | |
| 6812 | + gen_op_mov_reg_v(ot, rm, t0); | |
| 6755 | 6813 | } |
| 6756 | 6814 | if (s->cc_op != CC_OP_DYNAMIC) |
| 6757 | 6815 | gen_op_set_cc_op(s->cc_op); |
| 6758 | 6816 | gen_compute_eflags(cpu_cc_src); |
| 6759 | 6817 | tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_Z); |
| 6760 | - tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_T3); | |
| 6818 | + tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t2); | |
| 6761 | 6819 | s->cc_op = CC_OP_EFLAGS; |
| 6820 | + tcg_temp_free(t0); | |
| 6821 | + tcg_temp_free(t1); | |
| 6822 | + tcg_temp_free(t2); | |
| 6762 | 6823 | } |
| 6763 | 6824 | break; |
| 6764 | 6825 | case 0x102: /* lar */ |
| 6765 | 6826 | case 0x103: /* lsl */ |
| 6766 | 6827 | { |
| 6767 | 6828 | int label1; |
| 6829 | + TCGv t0; | |
| 6768 | 6830 | if (!s->pe || s->vm86) |
| 6769 | 6831 | goto illegal_op; |
| 6770 | 6832 | ot = dflag ? OT_LONG : OT_WORD; |
| 6771 | 6833 | modrm = ldub_code(s->pc++); |
| 6772 | 6834 | reg = ((modrm >> 3) & 7) | rex_r; |
| 6773 | 6835 | gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0); |
| 6836 | + t0 = tcg_temp_local_new(TCG_TYPE_TL); | |
| 6774 | 6837 | if (s->cc_op != CC_OP_DYNAMIC) |
| 6775 | 6838 | gen_op_set_cc_op(s->cc_op); |
| 6776 | 6839 | if (b == 0x102) |
| 6777 | - tcg_gen_helper_1_1(helper_lar, cpu_T[0], cpu_T[0]); | |
| 6840 | + tcg_gen_helper_1_1(helper_lar, t0, cpu_T[0]); | |
| 6778 | 6841 | else |
| 6779 | - tcg_gen_helper_1_1(helper_lsl, cpu_T[0], cpu_T[0]); | |
| 6842 | + tcg_gen_helper_1_1(helper_lsl, t0, cpu_T[0]); | |
| 6780 | 6843 | tcg_gen_andi_tl(cpu_tmp0, cpu_cc_src, CC_Z); |
| 6781 | 6844 | label1 = gen_new_label(); |
| 6782 | 6845 | tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, label1); |
| 6783 | - gen_op_mov_reg_T0(ot, reg); | |
| 6846 | + gen_op_mov_reg_v(ot, reg, t0); | |
| 6784 | 6847 | gen_set_label(label1); |
| 6785 | 6848 | s->cc_op = CC_OP_EFLAGS; |
| 6849 | + tcg_temp_free(t0); | |
| 6786 | 6850 | } |
| 6787 | 6851 | break; |
| 6788 | 6852 | case 0x118: |
| ... | ... | @@ -7029,17 +7093,6 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) |
| 7029 | 7093 | return s->pc; |
| 7030 | 7094 | } |
| 7031 | 7095 | |
| 7032 | -static void tcg_macro_func(TCGContext *s, int macro_id, const int *dead_args) | |
| 7033 | -{ | |
| 7034 | - switch(macro_id) { | |
| 7035 | -#ifdef MACRO_TEST | |
| 7036 | - case MACRO_TEST: | |
| 7037 | - tcg_gen_helper_0_1(helper_divl_EAX_T0, cpu_T[0]); | |
| 7038 | - break; | |
| 7039 | -#endif | |
| 7040 | - } | |
| 7041 | -} | |
| 7042 | - | |
| 7043 | 7096 | void optimize_flags_init(void) |
| 7044 | 7097 | { |
| 7045 | 7098 | #if TCG_TARGET_REG_BITS == 32 |
| ... | ... | @@ -7047,33 +7100,15 @@ void optimize_flags_init(void) |
| 7047 | 7100 | #else |
| 7048 | 7101 | assert(sizeof(CCTable) == (1 << 4)); |
| 7049 | 7102 | #endif |
| 7050 | - tcg_set_macro_func(&tcg_ctx, tcg_macro_func); | |
| 7051 | - | |
| 7052 | 7103 | cpu_env = tcg_global_reg_new(TCG_TYPE_PTR, TCG_AREG0, "env"); |
| 7053 | -#if TARGET_LONG_BITS > HOST_LONG_BITS | |
| 7054 | - cpu_T[0] = tcg_global_mem_new(TCG_TYPE_TL, | |
| 7055 | - TCG_AREG0, offsetof(CPUState, t0), "T0"); | |
| 7056 | - cpu_T[1] = tcg_global_mem_new(TCG_TYPE_TL, | |
| 7057 | - TCG_AREG0, offsetof(CPUState, t1), "T1"); | |
| 7058 | - cpu_A0 = tcg_global_mem_new(TCG_TYPE_TL, | |
| 7059 | - TCG_AREG0, offsetof(CPUState, t2), "A0"); | |
| 7060 | -#else | |
| 7061 | - cpu_T[0] = tcg_global_reg_new(TCG_TYPE_TL, TCG_AREG1, "T0"); | |
| 7062 | - cpu_T[1] = tcg_global_reg_new(TCG_TYPE_TL, TCG_AREG2, "T1"); | |
| 7063 | - cpu_A0 = tcg_global_reg_new(TCG_TYPE_TL, TCG_AREG3, "A0"); | |
| 7064 | -#endif | |
| 7065 | - cpu_T3 = tcg_global_mem_new(TCG_TYPE_TL, | |
| 7066 | - TCG_AREG0, offsetof(CPUState, t3), "T3"); | |
| 7067 | -#if defined(__i386__) && (TARGET_LONG_BITS <= HOST_LONG_BITS) | |
| 7068 | - /* XXX: must be suppressed once there are less fixed registers */ | |
| 7069 | - cpu_tmp1_i64 = tcg_global_reg2_new_hack(TCG_TYPE_I64, TCG_AREG1, TCG_AREG2, "tmp1"); | |
| 7070 | -#endif | |
| 7071 | 7104 | cpu_cc_op = tcg_global_mem_new(TCG_TYPE_I32, |
| 7072 | 7105 | TCG_AREG0, offsetof(CPUState, cc_op), "cc_op"); |
| 7073 | 7106 | cpu_cc_src = tcg_global_mem_new(TCG_TYPE_TL, |
| 7074 | 7107 | TCG_AREG0, offsetof(CPUState, cc_src), "cc_src"); |
| 7075 | 7108 | cpu_cc_dst = tcg_global_mem_new(TCG_TYPE_TL, |
| 7076 | 7109 | TCG_AREG0, offsetof(CPUState, cc_dst), "cc_dst"); |
| 7110 | + cpu_cc_tmp = tcg_global_mem_new(TCG_TYPE_TL, | |
| 7111 | + TCG_AREG0, offsetof(CPUState, cc_tmp), "cc_tmp"); | |
| 7077 | 7112 | |
| 7078 | 7113 | /* register helpers */ |
| 7079 | 7114 | |
| ... | ... | @@ -7145,10 +7180,13 @@ static inline int gen_intermediate_code_internal(CPUState *env, |
| 7145 | 7180 | printf("ERROR addseg\n"); |
| 7146 | 7181 | #endif |
| 7147 | 7182 | |
| 7183 | + cpu_T[0] = tcg_temp_new(TCG_TYPE_TL); | |
| 7184 | + cpu_T[1] = tcg_temp_new(TCG_TYPE_TL); | |
| 7185 | + cpu_A0 = tcg_temp_new(TCG_TYPE_TL); | |
| 7186 | + cpu_T3 = tcg_temp_new(TCG_TYPE_TL); | |
| 7187 | + | |
| 7148 | 7188 | cpu_tmp0 = tcg_temp_new(TCG_TYPE_TL); |
| 7149 | -#if !(defined(__i386__) && (TARGET_LONG_BITS <= HOST_LONG_BITS)) | |
| 7150 | 7189 | cpu_tmp1_i64 = tcg_temp_new(TCG_TYPE_I64); |
| 7151 | -#endif | |
| 7152 | 7190 | cpu_tmp2_i32 = tcg_temp_new(TCG_TYPE_I32); |
| 7153 | 7191 | cpu_tmp3_i32 = tcg_temp_new(TCG_TYPE_I32); |
| 7154 | 7192 | cpu_tmp4 = tcg_temp_new(TCG_TYPE_TL); | ... | ... |