Commit 1e4840bf40f1bcb08ed539cb644522707902a421

Authored by bellard
1 parent 641d5fbe

transformed TN into temporaries - add local temporaries usage when needed - optimized fcmovX

git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@4577 c046a42c-6fe2-441c-8c8c-71466251a162
target-i386/cpu.h
... ... @@ -472,12 +472,6 @@ typedef union {
472 472 #define NB_MMU_MODES 2
473 473  
474 474 typedef struct CPUX86State {
475   -#if TARGET_LONG_BITS > HOST_LONG_BITS
476   - /* temporaries if we cannot store them in host registers */
477   - target_ulong t0, t1, t2;
478   -#endif
479   - target_ulong t3;
480   -
481 475 /* standard registers */
482 476 target_ulong regs[CPU_NB_REGS];
483 477 target_ulong eip;
... ... @@ -526,6 +520,7 @@ typedef struct CPUX86State {
526 520 XMMReg xmm_regs[CPU_NB_REGS];
527 521 XMMReg xmm_t0;
528 522 MMXReg mmx_t0;
  523 + target_ulong cc_tmp; /* temporary for rcr/rcl */
529 524  
530 525 /* sysenter registers */
531 526 uint32_t sysenter_cs;
... ...
target-i386/exec.h
... ... @@ -29,60 +29,20 @@
29 29  
30 30 #include "cpu-defs.h"
31 31  
32   -/* at least 4 register variables are defined */
33 32 register struct CPUX86State *env asm(AREG0);
34 33  
35   -#ifndef CPU_NO_GLOBAL_REGS
36   -
37   -#if TARGET_LONG_BITS > HOST_LONG_BITS
38   -
39   -/* no registers can be used */
40   -#define T0 (env->t0)
41   -#define T1 (env->t1)
42   -#define T2 (env->t2)
43   -
44   -#else
45   -
46   -/* XXX: use unsigned long instead of target_ulong - better code will
47   - be generated for 64 bit CPUs */
48   -register target_ulong T0 asm(AREG1);
49   -register target_ulong T1 asm(AREG2);
50   -register target_ulong T2 asm(AREG3);
51   -
52   -#endif /* ! (TARGET_LONG_BITS > HOST_LONG_BITS) */
53   -
54   -#endif /* ! CPU_NO_GLOBAL_REGS */
55   -
56   -#define A0 T2
57   -
58 34 extern FILE *logfile;
59 35 extern int loglevel;
60 36  
61   -#ifndef reg_EAX
62 37 #define EAX (env->regs[R_EAX])
63   -#endif
64   -#ifndef reg_ECX
65 38 #define ECX (env->regs[R_ECX])
66   -#endif
67   -#ifndef reg_EDX
68 39 #define EDX (env->regs[R_EDX])
69   -#endif
70   -#ifndef reg_EBX
71 40 #define EBX (env->regs[R_EBX])
72   -#endif
73   -#ifndef reg_ESP
74 41 #define ESP (env->regs[R_ESP])
75   -#endif
76   -#ifndef reg_EBP
77 42 #define EBP (env->regs[R_EBP])
78   -#endif
79   -#ifndef reg_ESI
80 43 #define ESI (env->regs[R_ESI])
81   -#endif
82   -#ifndef reg_EDI
83 44 #define EDI (env->regs[R_EDI])
84   -#endif
85   -#define EIP (env->eip)
  45 +#define EIP (env->eip)
86 46 #define DF (env->df)
87 47  
88 48 #define CC_SRC (env->cc_src)
... ...
target-i386/helper_template.h
... ... @@ -287,11 +287,11 @@ target_ulong glue(helper_rcl, SUFFIX)(target_ulong t0, target_ulong t1)
287 287 if (count > 1)
288 288 res |= t0 >> (DATA_BITS + 1 - count);
289 289 t0 = res;
290   - env->t3 = (eflags & ~(CC_C | CC_O)) |
  290 + env->cc_tmp = (eflags & ~(CC_C | CC_O)) |
291 291 (lshift(src ^ t0, 11 - (DATA_BITS - 1)) & CC_O) |
292 292 ((src >> (DATA_BITS - count)) & CC_C);
293 293 } else {
294   - env->t3 = -1;
  294 + env->cc_tmp = -1;
295 295 }
296 296 return t0;
297 297 }
... ... @@ -316,11 +316,11 @@ target_ulong glue(helper_rcr, SUFFIX)(target_ulong t0, target_ulong t1)
316 316 if (count > 1)
317 317 res |= t0 << (DATA_BITS + 1 - count);
318 318 t0 = res;
319   - env->t3 = (eflags & ~(CC_C | CC_O)) |
  319 + env->cc_tmp = (eflags & ~(CC_C | CC_O)) |
320 320 (lshift(src ^ t0, 11 - (DATA_BITS - 1)) & CC_O) |
321 321 ((src >> (count - 1)) & CC_C);
322 322 } else {
323   - env->t3 = -1;
  323 + env->cc_tmp = -1;
324 324 }
325 325 return t0;
326 326 }
... ...
target-i386/translate.c
... ... @@ -58,8 +58,9 @@
58 58 //#define MACRO_TEST 1
59 59  
60 60 /* global register indexes */
61   -static TCGv cpu_env, cpu_T[2], cpu_A0, cpu_cc_op, cpu_cc_src, cpu_cc_dst;
62   -static TCGv cpu_T3;
  61 +static TCGv cpu_env, cpu_A0, cpu_cc_op, cpu_cc_src, cpu_cc_dst, cpu_cc_tmp;
  62 +/* local temps */
  63 +static TCGv cpu_T[2], cpu_T3;
63 64 /* local register indexes (only used inside old micro ops) */
64 65 static TCGv cpu_tmp0, cpu_tmp1_i64, cpu_tmp2_i32, cpu_tmp3_i32, cpu_tmp4, cpu_ptr0, cpu_ptr1;
65 66 static TCGv cpu_tmp5, cpu_tmp6;
... ... @@ -260,34 +261,34 @@ static inline void gen_op_andl_A0_ffff(void)
260 261 #define REG_LH_OFFSET 4
261 262 #endif
262 263  
263   -static inline void gen_op_mov_reg_TN(int ot, int t_index, int reg)
  264 +static inline void gen_op_mov_reg_v(int ot, int reg, TCGv t0)
264 265 {
265 266 switch(ot) {
266 267 case OT_BYTE:
267 268 if (reg < 4 X86_64_DEF( || reg >= 8 || x86_64_hregs)) {
268   - tcg_gen_st8_tl(cpu_T[t_index], cpu_env, offsetof(CPUState, regs[reg]) + REG_B_OFFSET);
  269 + tcg_gen_st8_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_B_OFFSET);
269 270 } else {
270   - tcg_gen_st8_tl(cpu_T[t_index], cpu_env, offsetof(CPUState, regs[reg - 4]) + REG_H_OFFSET);
  271 + tcg_gen_st8_tl(t0, cpu_env, offsetof(CPUState, regs[reg - 4]) + REG_H_OFFSET);
271 272 }
272 273 break;
273 274 case OT_WORD:
274   - tcg_gen_st16_tl(cpu_T[t_index], cpu_env, offsetof(CPUState, regs[reg]) + REG_W_OFFSET);
  275 + tcg_gen_st16_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_W_OFFSET);
275 276 break;
276 277 #ifdef TARGET_X86_64
277 278 case OT_LONG:
278   - tcg_gen_st32_tl(cpu_T[t_index], cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
  279 + tcg_gen_st32_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
279 280 /* high part of register set to zero */
280 281 tcg_gen_movi_tl(cpu_tmp0, 0);
281 282 tcg_gen_st32_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + REG_LH_OFFSET);
282 283 break;
283 284 default:
284 285 case OT_QUAD:
285   - tcg_gen_st_tl(cpu_T[t_index], cpu_env, offsetof(CPUState, regs[reg]));
  286 + tcg_gen_st_tl(t0, cpu_env, offsetof(CPUState, regs[reg]));
286 287 break;
287 288 #else
288 289 default:
289 290 case OT_LONG:
290   - tcg_gen_st32_tl(cpu_T[t_index], cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
  291 + tcg_gen_st32_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
291 292 break;
292 293 #endif
293 294 }
... ... @@ -295,12 +296,12 @@ static inline void gen_op_mov_reg_TN(int ot, int t_index, int reg)
295 296  
296 297 static inline void gen_op_mov_reg_T0(int ot, int reg)
297 298 {
298   - gen_op_mov_reg_TN(ot, 0, reg);
  299 + gen_op_mov_reg_v(ot, reg, cpu_T[0]);
299 300 }
300 301  
301 302 static inline void gen_op_mov_reg_T1(int ot, int reg)
302 303 {
303   - gen_op_mov_reg_TN(ot, 1, reg);
  304 + gen_op_mov_reg_v(ot, reg, cpu_T[1]);
304 305 }
305 306  
306 307 static inline void gen_op_mov_reg_A0(int size, int reg)
... ... @@ -329,23 +330,28 @@ static inline void gen_op_mov_reg_A0(int size, int reg)
329 330 }
330 331 }
331 332  
332   -static inline void gen_op_mov_TN_reg(int ot, int t_index, int reg)
  333 +static inline void gen_op_mov_v_reg(int ot, TCGv t0, int reg)
333 334 {
334 335 switch(ot) {
335 336 case OT_BYTE:
336 337 if (reg < 4 X86_64_DEF( || reg >= 8 || x86_64_hregs)) {
337 338 goto std_case;
338 339 } else {
339   - tcg_gen_ld8u_tl(cpu_T[t_index], cpu_env, offsetof(CPUState, regs[reg - 4]) + REG_H_OFFSET);
  340 + tcg_gen_ld8u_tl(t0, cpu_env, offsetof(CPUState, regs[reg - 4]) + REG_H_OFFSET);
340 341 }
341 342 break;
342 343 default:
343 344 std_case:
344   - tcg_gen_ld_tl(cpu_T[t_index], cpu_env, offsetof(CPUState, regs[reg]));
  345 + tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUState, regs[reg]));
345 346 break;
346 347 }
347 348 }
348 349  
  350 +static inline void gen_op_mov_TN_reg(int ot, int t_index, int reg)
  351 +{
  352 + gen_op_mov_v_reg(ot, cpu_T[t_index], reg);
  353 +}
  354 +
349 355 static inline void gen_op_movl_A0_reg(int reg)
350 356 {
351 357 tcg_gen_ld32u_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
... ... @@ -511,90 +517,70 @@ static inline void gen_op_lds_T0_A0(int idx)
511 517 }
512 518 }
513 519  
514   -/* sign does not matter, except for lidt/lgdt call (TODO: fix it) */
515   -static inline void gen_op_ld_T0_A0(int idx)
  520 +static inline void gen_op_ld_v(int idx, TCGv t0, TCGv a0)
516 521 {
517 522 int mem_index = (idx >> 2) - 1;
518 523 switch(idx & 3) {
519 524 case 0:
520   - tcg_gen_qemu_ld8u(cpu_T[0], cpu_A0, mem_index);
  525 + tcg_gen_qemu_ld8u(t0, a0, mem_index);
521 526 break;
522 527 case 1:
523   - tcg_gen_qemu_ld16u(cpu_T[0], cpu_A0, mem_index);
  528 + tcg_gen_qemu_ld16u(t0, a0, mem_index);
524 529 break;
525 530 case 2:
526   - tcg_gen_qemu_ld32u(cpu_T[0], cpu_A0, mem_index);
  531 + tcg_gen_qemu_ld32u(t0, a0, mem_index);
527 532 break;
528 533 default:
529 534 case 3:
530   - tcg_gen_qemu_ld64(cpu_T[0], cpu_A0, mem_index);
  535 + tcg_gen_qemu_ld64(t0, a0, mem_index);
531 536 break;
532 537 }
533 538 }
534 539  
  540 +/* XXX: always use ldu or lds */
  541 +static inline void gen_op_ld_T0_A0(int idx)
  542 +{
  543 + gen_op_ld_v(idx, cpu_T[0], cpu_A0);
  544 +}
  545 +
535 546 static inline void gen_op_ldu_T0_A0(int idx)
536 547 {
537   - gen_op_ld_T0_A0(idx);
  548 + gen_op_ld_v(idx, cpu_T[0], cpu_A0);
538 549 }
539 550  
540 551 static inline void gen_op_ld_T1_A0(int idx)
541 552 {
  553 + gen_op_ld_v(idx, cpu_T[1], cpu_A0);
  554 +}
  555 +
  556 +static inline void gen_op_st_v(int idx, TCGv t0, TCGv a0)
  557 +{
542 558 int mem_index = (idx >> 2) - 1;
543 559 switch(idx & 3) {
544 560 case 0:
545   - tcg_gen_qemu_ld8u(cpu_T[1], cpu_A0, mem_index);
  561 + tcg_gen_qemu_st8(t0, a0, mem_index);
546 562 break;
547 563 case 1:
548   - tcg_gen_qemu_ld16u(cpu_T[1], cpu_A0, mem_index);
  564 + tcg_gen_qemu_st16(t0, a0, mem_index);
549 565 break;
550 566 case 2:
551   - tcg_gen_qemu_ld32u(cpu_T[1], cpu_A0, mem_index);
  567 + tcg_gen_qemu_st32(t0, a0, mem_index);
552 568 break;
553 569 default:
554 570 case 3:
555   - tcg_gen_qemu_ld64(cpu_T[1], cpu_A0, mem_index);
  571 + tcg_gen_qemu_st64(t0, a0, mem_index);
556 572 break;
557 573 }
558 574 }
559 575  
560 576 static inline void gen_op_st_T0_A0(int idx)
561 577 {
562   - int mem_index = (idx >> 2) - 1;
563   - switch(idx & 3) {
564   - case 0:
565   - tcg_gen_qemu_st8(cpu_T[0], cpu_A0, mem_index);
566   - break;
567   - case 1:
568   - tcg_gen_qemu_st16(cpu_T[0], cpu_A0, mem_index);
569   - break;
570   - case 2:
571   - tcg_gen_qemu_st32(cpu_T[0], cpu_A0, mem_index);
572   - break;
573   - default:
574   - case 3:
575   - tcg_gen_qemu_st64(cpu_T[0], cpu_A0, mem_index);
576   - break;
577   - }
  578 + gen_op_st_v(idx, cpu_T[0], cpu_A0);
578 579 }
579 580  
580 581 static inline void gen_op_st_T1_A0(int idx)
581 582 {
582   - int mem_index = (idx >> 2) - 1;
583   - switch(idx & 3) {
584   - case 0:
585   - tcg_gen_qemu_st8(cpu_T[1], cpu_A0, mem_index);
586   - break;
587   - case 1:
588   - tcg_gen_qemu_st16(cpu_T[1], cpu_A0, mem_index);
589   - break;
590   - case 2:
591   - tcg_gen_qemu_st32(cpu_T[1], cpu_A0, mem_index);
592   - break;
593   - default:
594   - case 3:
595   - tcg_gen_qemu_st64(cpu_T[1], cpu_A0, mem_index);
596   - break;
597   - }
  583 + gen_op_st_v(idx, cpu_T[1], cpu_A0);
598 584 }
599 585  
600 586 static inline void gen_jmp_im(target_ulong pc)
... ... @@ -857,9 +843,11 @@ static void gen_compute_eflags(TCGv reg)
857 843 tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32);
858 844 }
859 845  
860   -static inline void gen_setcc_slow_T0(int op)
  846 +static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op)
861 847 {
862   - switch(op) {
  848 + if (s->cc_op != CC_OP_DYNAMIC)
  849 + gen_op_set_cc_op(s->cc_op);
  850 + switch(jcc_op) {
863 851 case JCC_O:
864 852 gen_compute_eflags(cpu_T[0]);
865 853 tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 11);
... ... @@ -1151,7 +1139,7 @@ static inline void gen_jcc1(DisasContext *s, int cc_op, int b, int l1)
1151 1139 break;
1152 1140 default:
1153 1141 slow_jcc:
1154   - gen_setcc_slow_T0(jcc_op);
  1142 + gen_setcc_slow_T0(s, jcc_op);
1155 1143 tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE,
1156 1144 cpu_T[0], 0, l1);
1157 1145 break;
... ... @@ -1436,7 +1424,8 @@ static void gen_shift_rm_T1(DisasContext *s, int ot, int op1,
1436 1424 {
1437 1425 target_ulong mask;
1438 1426 int shift_label;
1439   -
  1427 + TCGv t0, t1;
  1428 +
1440 1429 if (ot == OT_QUAD)
1441 1430 mask = 0x3f;
1442 1431 else
... ... @@ -1477,11 +1466,18 @@ static void gen_shift_rm_T1(DisasContext *s, int ot, int op1,
1477 1466 if (s->cc_op != CC_OP_DYNAMIC)
1478 1467 gen_op_set_cc_op(s->cc_op);
1479 1468  
  1469 + /* XXX: inefficient */
  1470 + t0 = tcg_temp_local_new(TCG_TYPE_TL);
  1471 + t1 = tcg_temp_local_new(TCG_TYPE_TL);
  1472 +
  1473 + tcg_gen_mov_tl(t0, cpu_T[0]);
  1474 + tcg_gen_mov_tl(t1, cpu_T3);
  1475 +
1480 1476 shift_label = gen_new_label();
1481 1477 tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_T[1], 0, shift_label);
1482 1478  
1483   - tcg_gen_mov_tl(cpu_cc_src, cpu_T3);
1484   - tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
  1479 + tcg_gen_mov_tl(cpu_cc_src, t1);
  1480 + tcg_gen_mov_tl(cpu_cc_dst, t0);
1485 1481 if (is_right)
1486 1482 tcg_gen_movi_i32(cpu_cc_op, CC_OP_SARB + ot);
1487 1483 else
... ... @@ -1489,6 +1485,9 @@ static void gen_shift_rm_T1(DisasContext *s, int ot, int op1,
1489 1485  
1490 1486 gen_set_label(shift_label);
1491 1487 s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */
  1488 +
  1489 + tcg_temp_free(t0);
  1490 + tcg_temp_free(t1);
1492 1491 }
1493 1492  
1494 1493 static void gen_shift_rm_im(DisasContext *s, int ot, int op1, int op2,
... ... @@ -1556,78 +1555,95 @@ static void gen_rot_rm_T1(DisasContext *s, int ot, int op1,
1556 1555 {
1557 1556 target_ulong mask;
1558 1557 int label1, label2, data_bits;
1559   -
  1558 + TCGv t0, t1, t2, a0;
  1559 +
  1560 + /* XXX: inefficient, but we must use local temps */
  1561 + t0 = tcg_temp_local_new(TCG_TYPE_TL);
  1562 + t1 = tcg_temp_local_new(TCG_TYPE_TL);
  1563 + t2 = tcg_temp_local_new(TCG_TYPE_TL);
  1564 + a0 = tcg_temp_local_new(TCG_TYPE_TL);
  1565 +
1560 1566 if (ot == OT_QUAD)
1561 1567 mask = 0x3f;
1562 1568 else
1563 1569 mask = 0x1f;
1564 1570  
1565 1571 /* load */
1566   - if (op1 == OR_TMP0)
1567   - gen_op_ld_T0_A0(ot + s->mem_index);
1568   - else
1569   - gen_op_mov_TN_reg(ot, 0, op1);
  1572 + if (op1 == OR_TMP0) {
  1573 + tcg_gen_mov_tl(a0, cpu_A0);
  1574 + gen_op_ld_v(ot + s->mem_index, t0, a0);
  1575 + } else {
  1576 + gen_op_mov_v_reg(ot, t0, op1);
  1577 + }
1570 1578  
1571   - tcg_gen_andi_tl(cpu_T[1], cpu_T[1], mask);
  1579 + tcg_gen_mov_tl(t1, cpu_T[1]);
  1580 +
  1581 + tcg_gen_andi_tl(t1, t1, mask);
1572 1582  
1573 1583 /* Must test zero case to avoid using undefined behaviour in TCG
1574 1584 shifts. */
1575 1585 label1 = gen_new_label();
1576   - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_T[1], 0, label1);
  1586 + tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, label1);
1577 1587  
1578 1588 if (ot <= OT_WORD)
1579   - tcg_gen_andi_tl(cpu_tmp0, cpu_T[1], (1 << (3 + ot)) - 1);
  1589 + tcg_gen_andi_tl(cpu_tmp0, t1, (1 << (3 + ot)) - 1);
1580 1590 else
1581   - tcg_gen_mov_tl(cpu_tmp0, cpu_T[1]);
  1591 + tcg_gen_mov_tl(cpu_tmp0, t1);
1582 1592  
1583   - gen_extu(ot, cpu_T[0]);
1584   - tcg_gen_mov_tl(cpu_T3, cpu_T[0]);
  1593 + gen_extu(ot, t0);
  1594 + tcg_gen_mov_tl(t2, t0);
1585 1595  
1586 1596 data_bits = 8 << ot;
1587 1597 /* XXX: rely on behaviour of shifts when operand 2 overflows (XXX:
1588 1598 fix TCG definition) */
1589 1599 if (is_right) {
1590   - tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_tmp0);
  1600 + tcg_gen_shr_tl(cpu_tmp4, t0, cpu_tmp0);
1591 1601 tcg_gen_sub_tl(cpu_tmp0, tcg_const_tl(data_bits), cpu_tmp0);
1592   - tcg_gen_shl_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
  1602 + tcg_gen_shl_tl(t0, t0, cpu_tmp0);
1593 1603 } else {
1594   - tcg_gen_shl_tl(cpu_tmp4, cpu_T[0], cpu_tmp0);
  1604 + tcg_gen_shl_tl(cpu_tmp4, t0, cpu_tmp0);
1595 1605 tcg_gen_sub_tl(cpu_tmp0, tcg_const_tl(data_bits), cpu_tmp0);
1596   - tcg_gen_shr_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
  1606 + tcg_gen_shr_tl(t0, t0, cpu_tmp0);
1597 1607 }
1598   - tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp4);
  1608 + tcg_gen_or_tl(t0, t0, cpu_tmp4);
1599 1609  
1600 1610 gen_set_label(label1);
1601 1611 /* store */
1602   - if (op1 == OR_TMP0)
1603   - gen_op_st_T0_A0(ot + s->mem_index);
1604   - else
1605   - gen_op_mov_reg_T0(ot, op1);
  1612 + if (op1 == OR_TMP0) {
  1613 + gen_op_st_v(ot + s->mem_index, t0, a0);
  1614 + } else {
  1615 + gen_op_mov_reg_v(ot, op1, t0);
  1616 + }
1606 1617  
1607 1618 /* update eflags */
1608 1619 if (s->cc_op != CC_OP_DYNAMIC)
1609 1620 gen_op_set_cc_op(s->cc_op);
1610 1621  
1611 1622 label2 = gen_new_label();
1612   - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_T[1], 0, label2);
  1623 + tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, label2);
1613 1624  
1614 1625 gen_compute_eflags(cpu_cc_src);
1615 1626 tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~(CC_O | CC_C));
1616   - tcg_gen_xor_tl(cpu_tmp0, cpu_T3, cpu_T[0]);
  1627 + tcg_gen_xor_tl(cpu_tmp0, t2, t0);
1617 1628 tcg_gen_lshift(cpu_tmp0, cpu_tmp0, 11 - (data_bits - 1));
1618 1629 tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, CC_O);
1619 1630 tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_tmp0);
1620 1631 if (is_right) {
1621   - tcg_gen_shri_tl(cpu_T[0], cpu_T[0], data_bits - 1);
  1632 + tcg_gen_shri_tl(t0, t0, data_bits - 1);
1622 1633 }
1623   - tcg_gen_andi_tl(cpu_T[0], cpu_T[0], CC_C);
1624   - tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_T[0]);
  1634 + tcg_gen_andi_tl(t0, t0, CC_C);
  1635 + tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t0);
1625 1636  
1626 1637 tcg_gen_discard_tl(cpu_cc_dst);
1627 1638 tcg_gen_movi_i32(cpu_cc_op, CC_OP_EFLAGS);
1628 1639  
1629 1640 gen_set_label(label2);
1630 1641 s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */
  1642 +
  1643 + tcg_temp_free(t0);
  1644 + tcg_temp_free(t1);
  1645 + tcg_temp_free(t2);
  1646 + tcg_temp_free(a0);
1631 1647 }
1632 1648  
1633 1649 static void *helper_rotc[8] = {
... ... @@ -1666,9 +1682,9 @@ static void gen_rotc_rm_T1(DisasContext *s, int ot, int op1,
1666 1682  
1667 1683 /* update eflags */
1668 1684 label1 = gen_new_label();
1669   - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_T3, -1, label1);
  1685 + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_cc_tmp, -1, label1);
1670 1686  
1671   - tcg_gen_mov_tl(cpu_cc_src, cpu_T3);
  1687 + tcg_gen_mov_tl(cpu_cc_src, cpu_cc_tmp);
1672 1688 tcg_gen_discard_tl(cpu_cc_dst);
1673 1689 tcg_gen_movi_i32(cpu_cc_op, CC_OP_EFLAGS);
1674 1690  
... ... @@ -1682,6 +1698,12 @@ static void gen_shiftd_rm_T1_T3(DisasContext *s, int ot, int op1,
1682 1698 {
1683 1699 int label1, label2, data_bits;
1684 1700 target_ulong mask;
  1701 + TCGv t0, t1, t2, a0;
  1702 +
  1703 + t0 = tcg_temp_local_new(TCG_TYPE_TL);
  1704 + t1 = tcg_temp_local_new(TCG_TYPE_TL);
  1705 + t2 = tcg_temp_local_new(TCG_TYPE_TL);
  1706 + a0 = tcg_temp_local_new(TCG_TYPE_TL);
1685 1707  
1686 1708 if (ot == OT_QUAD)
1687 1709 mask = 0x3f;
... ... @@ -1689,95 +1711,102 @@ static void gen_shiftd_rm_T1_T3(DisasContext *s, int ot, int op1,
1689 1711 mask = 0x1f;
1690 1712  
1691 1713 /* load */
1692   - if (op1 == OR_TMP0)
1693   - gen_op_ld_T0_A0(ot + s->mem_index);
1694   - else
1695   - gen_op_mov_TN_reg(ot, 0, op1);
  1714 + if (op1 == OR_TMP0) {
  1715 + tcg_gen_mov_tl(a0, cpu_A0);
  1716 + gen_op_ld_v(ot + s->mem_index, t0, a0);
  1717 + } else {
  1718 + gen_op_mov_v_reg(ot, t0, op1);
  1719 + }
1696 1720  
1697 1721 tcg_gen_andi_tl(cpu_T3, cpu_T3, mask);
  1722 +
  1723 + tcg_gen_mov_tl(t1, cpu_T[1]);
  1724 + tcg_gen_mov_tl(t2, cpu_T3);
  1725 +
1698 1726 /* Must test zero case to avoid using undefined behaviour in TCG
1699 1727 shifts. */
1700 1728 label1 = gen_new_label();
1701   - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_T3, 0, label1);
  1729 + tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label1);
1702 1730  
1703   - tcg_gen_addi_tl(cpu_tmp5, cpu_T3, -1);
  1731 + tcg_gen_addi_tl(cpu_tmp5, t2, -1);
1704 1732 if (ot == OT_WORD) {
1705 1733 /* Note: we implement the Intel behaviour for shift count > 16 */
1706 1734 if (is_right) {
1707   - tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 0xffff);
1708   - tcg_gen_shli_tl(cpu_tmp0, cpu_T[1], 16);
1709   - tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
1710   - tcg_gen_ext32u_tl(cpu_T[0], cpu_T[0]);
  1735 + tcg_gen_andi_tl(t0, t0, 0xffff);
  1736 + tcg_gen_shli_tl(cpu_tmp0, t1, 16);
  1737 + tcg_gen_or_tl(t0, t0, cpu_tmp0);
  1738 + tcg_gen_ext32u_tl(t0, t0);
1711 1739  
1712   - tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_tmp5);
  1740 + tcg_gen_shr_tl(cpu_tmp4, t0, cpu_tmp5);
1713 1741  
1714 1742 /* only needed if count > 16, but a test would complicate */
1715   - tcg_gen_sub_tl(cpu_tmp5, tcg_const_tl(32), cpu_T3);
1716   - tcg_gen_shl_tl(cpu_tmp0, cpu_T[0], cpu_tmp5);
  1743 + tcg_gen_sub_tl(cpu_tmp5, tcg_const_tl(32), t2);
  1744 + tcg_gen_shl_tl(cpu_tmp0, t0, cpu_tmp5);
1717 1745  
1718   - tcg_gen_shr_tl(cpu_T[0], cpu_T[0], cpu_T3);
  1746 + tcg_gen_shr_tl(t0, t0, t2);
1719 1747  
1720   - tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
  1748 + tcg_gen_or_tl(t0, t0, cpu_tmp0);
1721 1749 } else {
1722 1750 /* XXX: not optimal */
1723   - tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 0xffff);
1724   - tcg_gen_shli_tl(cpu_T[1], cpu_T[1], 16);
1725   - tcg_gen_or_tl(cpu_T[1], cpu_T[1], cpu_T[0]);
1726   - tcg_gen_ext32u_tl(cpu_T[1], cpu_T[1]);
  1751 + tcg_gen_andi_tl(t0, t0, 0xffff);
  1752 + tcg_gen_shli_tl(t1, t1, 16);
  1753 + tcg_gen_or_tl(t1, t1, t0);
  1754 + tcg_gen_ext32u_tl(t1, t1);
1727 1755  
1728   - tcg_gen_shl_tl(cpu_tmp4, cpu_T[0], cpu_tmp5);
  1756 + tcg_gen_shl_tl(cpu_tmp4, t0, cpu_tmp5);
1729 1757 tcg_gen_sub_tl(cpu_tmp0, tcg_const_tl(32), cpu_tmp5);
1730   - tcg_gen_shr_tl(cpu_tmp6, cpu_T[1], cpu_tmp0);
  1758 + tcg_gen_shr_tl(cpu_tmp6, t1, cpu_tmp0);
1731 1759 tcg_gen_or_tl(cpu_tmp4, cpu_tmp4, cpu_tmp6);
1732 1760  
1733   - tcg_gen_shl_tl(cpu_T[0], cpu_T[0], cpu_T3);
1734   - tcg_gen_sub_tl(cpu_tmp5, tcg_const_tl(32), cpu_T3);
1735   - tcg_gen_shr_tl(cpu_T[1], cpu_T[1], cpu_tmp5);
1736   - tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
  1761 + tcg_gen_shl_tl(t0, t0, t2);
  1762 + tcg_gen_sub_tl(cpu_tmp5, tcg_const_tl(32), t2);
  1763 + tcg_gen_shr_tl(t1, t1, cpu_tmp5);
  1764 + tcg_gen_or_tl(t0, t0, t1);
1737 1765 }
1738 1766 } else {
1739 1767 data_bits = 8 << ot;
1740 1768 if (is_right) {
1741 1769 if (ot == OT_LONG)
1742   - tcg_gen_ext32u_tl(cpu_T[0], cpu_T[0]);
  1770 + tcg_gen_ext32u_tl(t0, t0);
1743 1771  
1744   - tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_tmp5);
  1772 + tcg_gen_shr_tl(cpu_tmp4, t0, cpu_tmp5);
1745 1773  
1746   - tcg_gen_shr_tl(cpu_T[0], cpu_T[0], cpu_T3);
1747   - tcg_gen_sub_tl(cpu_tmp5, tcg_const_tl(data_bits), cpu_T3);
1748   - tcg_gen_shl_tl(cpu_T[1], cpu_T[1], cpu_tmp5);
1749   - tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
  1774 + tcg_gen_shr_tl(t0, t0, t2);
  1775 + tcg_gen_sub_tl(cpu_tmp5, tcg_const_tl(data_bits), t2);
  1776 + tcg_gen_shl_tl(t1, t1, cpu_tmp5);
  1777 + tcg_gen_or_tl(t0, t0, t1);
1750 1778  
1751 1779 } else {
1752 1780 if (ot == OT_LONG)
1753   - tcg_gen_ext32u_tl(cpu_T[1], cpu_T[1]);
  1781 + tcg_gen_ext32u_tl(t1, t1);
1754 1782  
1755   - tcg_gen_shl_tl(cpu_tmp4, cpu_T[0], cpu_tmp5);
  1783 + tcg_gen_shl_tl(cpu_tmp4, t0, cpu_tmp5);
1756 1784  
1757   - tcg_gen_shl_tl(cpu_T[0], cpu_T[0], cpu_T3);
1758   - tcg_gen_sub_tl(cpu_tmp5, tcg_const_tl(data_bits), cpu_T3);
1759   - tcg_gen_shr_tl(cpu_T[1], cpu_T[1], cpu_tmp5);
1760   - tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
  1785 + tcg_gen_shl_tl(t0, t0, t2);
  1786 + tcg_gen_sub_tl(cpu_tmp5, tcg_const_tl(data_bits), t2);
  1787 + tcg_gen_shr_tl(t1, t1, cpu_tmp5);
  1788 + tcg_gen_or_tl(t0, t0, t1);
1761 1789 }
1762 1790 }
1763   - tcg_gen_mov_tl(cpu_T[1], cpu_tmp4);
  1791 + tcg_gen_mov_tl(t1, cpu_tmp4);
1764 1792  
1765 1793 gen_set_label(label1);
1766 1794 /* store */
1767   - if (op1 == OR_TMP0)
1768   - gen_op_st_T0_A0(ot + s->mem_index);
1769   - else
1770   - gen_op_mov_reg_T0(ot, op1);
  1795 + if (op1 == OR_TMP0) {
  1796 + gen_op_st_v(ot + s->mem_index, t0, a0);
  1797 + } else {
  1798 + gen_op_mov_reg_v(ot, op1, t0);
  1799 + }
1771 1800  
1772 1801 /* update eflags */
1773 1802 if (s->cc_op != CC_OP_DYNAMIC)
1774 1803 gen_op_set_cc_op(s->cc_op);
1775 1804  
1776 1805 label2 = gen_new_label();
1777   - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_T3, 0, label2);
  1806 + tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label2);
1778 1807  
1779   - tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]);
1780   - tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
  1808 + tcg_gen_mov_tl(cpu_cc_src, t1);
  1809 + tcg_gen_mov_tl(cpu_cc_dst, t0);
1781 1810 if (is_right) {
1782 1811 tcg_gen_movi_i32(cpu_cc_op, CC_OP_SARB + ot);
1783 1812 } else {
... ... @@ -1785,6 +1814,11 @@ static void gen_shiftd_rm_T1_T3(DisasContext *s, int ot, int op1,
1785 1814 }
1786 1815 gen_set_label(label2);
1787 1816 s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */
  1817 +
  1818 + tcg_temp_free(t0);
  1819 + tcg_temp_free(t1);
  1820 + tcg_temp_free(t2);
  1821 + tcg_temp_free(a0);
1788 1822 }
1789 1823  
1790 1824 static void gen_shift(DisasContext *s1, int op, int ot, int d, int s)
... ... @@ -2217,23 +2251,26 @@ static inline void gen_jcc(DisasContext *s, int b,
2217 2251 static void gen_setcc(DisasContext *s, int b)
2218 2252 {
2219 2253 int inv, jcc_op, l1;
  2254 + TCGv t0;
2220 2255  
2221 2256 if (is_fast_jcc_case(s, b)) {
2222 2257 /* nominal case: we use a jump */
2223   - tcg_gen_movi_tl(cpu_T[0], 0);
  2258 + /* XXX: make it faster by adding new instructions in TCG */
  2259 + t0 = tcg_temp_local_new(TCG_TYPE_TL);
  2260 + tcg_gen_movi_tl(t0, 0);
2224 2261 l1 = gen_new_label();
2225 2262 gen_jcc1(s, s->cc_op, b ^ 1, l1);
2226   - tcg_gen_movi_tl(cpu_T[0], 1);
  2263 + tcg_gen_movi_tl(t0, 1);
2227 2264 gen_set_label(l1);
  2265 + tcg_gen_mov_tl(cpu_T[0], t0);
  2266 + tcg_temp_free(t0);
2228 2267 } else {
2229 2268 /* slow case: it is more efficient not to generate a jump,
2230 2269 although it is questionnable whether this optimization is
2231 2270 worth to */
2232 2271 inv = b & 1;
2233 2272 jcc_op = (b >> 1) & 7;
2234   - if (s->cc_op != CC_OP_DYNAMIC)
2235   - gen_op_set_cc_op(s->cc_op);
2236   - gen_setcc_slow_T0(jcc_op);
  2273 + gen_setcc_slow_T0(s, jcc_op);
2237 2274 if (inv) {
2238 2275 tcg_gen_xori_tl(cpu_T[0], cpu_T[0], 1);
2239 2276 }
... ... @@ -4353,6 +4390,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
4353 4390 case 0x1b1: /* cmpxchg Ev, Gv */
4354 4391 {
4355 4392 int label1, label2;
  4393 + TCGv t0, t1, t2, a0;
4356 4394  
4357 4395 if ((b & 1) == 0)
4358 4396 ot = OT_BYTE;
... ... @@ -4361,37 +4399,46 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
4361 4399 modrm = ldub_code(s->pc++);
4362 4400 reg = ((modrm >> 3) & 7) | rex_r;
4363 4401 mod = (modrm >> 6) & 3;
4364   - gen_op_mov_TN_reg(ot, 1, reg);
  4402 + t0 = tcg_temp_local_new(TCG_TYPE_TL);
  4403 + t1 = tcg_temp_local_new(TCG_TYPE_TL);
  4404 + t2 = tcg_temp_local_new(TCG_TYPE_TL);
  4405 + a0 = tcg_temp_local_new(TCG_TYPE_TL);
  4406 + gen_op_mov_v_reg(ot, t1, reg);
4365 4407 if (mod == 3) {
4366 4408 rm = (modrm & 7) | REX_B(s);
4367   - gen_op_mov_TN_reg(ot, 0, rm);
  4409 + gen_op_mov_v_reg(ot, t0, rm);
4368 4410 } else {
4369 4411 gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
4370   - gen_op_ld_T0_A0(ot + s->mem_index);
  4412 + tcg_gen_mov_tl(a0, cpu_A0);
  4413 + gen_op_ld_v(ot + s->mem_index, t0, a0);
4371 4414 rm = 0; /* avoid warning */
4372 4415 }
4373 4416 label1 = gen_new_label();
4374   - tcg_gen_ld_tl(cpu_T3, cpu_env, offsetof(CPUState, regs[R_EAX]));
4375   - tcg_gen_sub_tl(cpu_T3, cpu_T3, cpu_T[0]);
4376   - gen_extu(ot, cpu_T3);
4377   - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_T3, 0, label1);
  4417 + tcg_gen_ld_tl(t2, cpu_env, offsetof(CPUState, regs[R_EAX]));
  4418 + tcg_gen_sub_tl(t2, t2, t0);
  4419 + gen_extu(ot, t2);
  4420 + tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label1);
4378 4421 if (mod == 3) {
4379 4422 label2 = gen_new_label();
4380   - gen_op_mov_reg_T0(ot, R_EAX);
  4423 + gen_op_mov_reg_v(ot, R_EAX, t0);
4381 4424 tcg_gen_br(label2);
4382 4425 gen_set_label(label1);
4383   - gen_op_mov_reg_T1(ot, rm);
  4426 + gen_op_mov_reg_v(ot, rm, t1);
4384 4427 gen_set_label(label2);
4385 4428 } else {
4386   - tcg_gen_mov_tl(cpu_T[1], cpu_T[0]);
4387   - gen_op_mov_reg_T0(ot, R_EAX);
  4429 + tcg_gen_mov_tl(t1, t0);
  4430 + gen_op_mov_reg_v(ot, R_EAX, t0);
4388 4431 gen_set_label(label1);
4389 4432 /* always store */
4390   - gen_op_st_T1_A0(ot + s->mem_index);
  4433 + gen_op_st_v(ot + s->mem_index, t1, a0);
4391 4434 }
4392   - tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
4393   - tcg_gen_mov_tl(cpu_cc_dst, cpu_T3);
  4435 + tcg_gen_mov_tl(cpu_cc_src, t0);
  4436 + tcg_gen_mov_tl(cpu_cc_dst, t2);
4394 4437 s->cc_op = CC_OP_SUBB + ot;
  4438 + tcg_temp_free(t0);
  4439 + tcg_temp_free(t1);
  4440 + tcg_temp_free(t2);
  4441 + tcg_temp_free(a0);
4395 4442 }
4396 4443 break;
4397 4444 case 0x1c7: /* cmpxchg8b */
... ... @@ -5457,10 +5504,9 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
5457 5504 (JCC_BE << 1),
5458 5505 (JCC_P << 1),
5459 5506 };
5460   - op1 = fcmov_cc[op & 3] | ((op >> 3) & 1);
5461   - gen_setcc(s, op1);
  5507 + op1 = fcmov_cc[op & 3] | (((op >> 3) & 1) ^ 1);
5462 5508 l1 = gen_new_label();
5463   - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_T[0], 0, l1);
  5509 + gen_jcc1(s, s->cc_op, op1, l1);
5464 5510 tcg_gen_helper_0_1(helper_fmov_ST0_STN, tcg_const_i32(opreg));
5465 5511 gen_set_label(l1);
5466 5512 }
... ... @@ -5806,25 +5852,26 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
5806 5852 case 0x140 ... 0x14f: /* cmov Gv, Ev */
5807 5853 {
5808 5854 int l1;
  5855 + TCGv t0;
  5856 +
5809 5857 ot = dflag + OT_WORD;
5810 5858 modrm = ldub_code(s->pc++);
5811 5859 reg = ((modrm >> 3) & 7) | rex_r;
5812 5860 mod = (modrm >> 6) & 3;
  5861 + t0 = tcg_temp_local_new(TCG_TYPE_TL);
5813 5862 if (mod != 3) {
5814 5863 gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
5815   - gen_op_ld_T1_A0(ot + s->mem_index);
  5864 + gen_op_ld_v(ot + s->mem_index, t0, cpu_A0);
5816 5865 } else {
5817 5866 rm = (modrm & 7) | REX_B(s);
5818   - gen_op_mov_TN_reg(ot, 1, rm);
  5867 + gen_op_mov_v_reg(ot, t0, rm);
5819 5868 }
5820   - if (s->cc_op != CC_OP_DYNAMIC)
5821   - gen_op_set_cc_op(s->cc_op);
5822 5869 #ifdef TARGET_X86_64
5823 5870 if (ot == OT_LONG) {
5824 5871 /* XXX: specific Intel behaviour ? */
5825 5872 l1 = gen_new_label();
5826 5873 gen_jcc1(s, s->cc_op, b ^ 1, l1);
5827   - tcg_gen_st32_tl(cpu_T[1], cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
  5874 + tcg_gen_st32_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
5828 5875 gen_set_label(l1);
5829 5876 tcg_gen_movi_tl(cpu_tmp0, 0);
5830 5877 tcg_gen_st32_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + REG_LH_OFFSET);
... ... @@ -5833,9 +5880,10 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
5833 5880 {
5834 5881 l1 = gen_new_label();
5835 5882 gen_jcc1(s, s->cc_op, b ^ 1, l1);
5836   - gen_op_mov_reg_T1(ot, reg);
  5883 + gen_op_mov_reg_v(ot, reg, t0);
5837 5884 gen_set_label(l1);
5838 5885 }
  5886 + tcg_temp_free(t0);
5839 5887 }
5840 5888 break;
5841 5889  
... ... @@ -6039,6 +6087,8 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
6039 6087 case 0x1bd: /* bsr */
6040 6088 {
6041 6089 int label1;
  6090 + TCGv t0;
  6091 +
6042 6092 ot = dflag + OT_WORD;
6043 6093 modrm = ldub_code(s->pc++);
6044 6094 reg = ((modrm >> 3) & 7) | rex_r;
... ... @@ -6046,17 +6096,20 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
6046 6096 gen_extu(ot, cpu_T[0]);
6047 6097 label1 = gen_new_label();
6048 6098 tcg_gen_movi_tl(cpu_cc_dst, 0);
6049   - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_T[0], 0, label1);
  6099 + t0 = tcg_temp_local_new(TCG_TYPE_TL);
  6100 + tcg_gen_mov_tl(t0, cpu_T[0]);
  6101 + tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0, label1);
6050 6102 if (b & 1) {
6051   - tcg_gen_helper_1_1(helper_bsr, cpu_T[0], cpu_T[0]);
  6103 + tcg_gen_helper_1_1(helper_bsr, cpu_T[0], t0);
6052 6104 } else {
6053   - tcg_gen_helper_1_1(helper_bsf, cpu_T[0], cpu_T[0]);
  6105 + tcg_gen_helper_1_1(helper_bsf, cpu_T[0], t0);
6054 6106 }
6055 6107 gen_op_mov_reg_T0(ot, reg);
6056 6108 tcg_gen_movi_tl(cpu_cc_dst, 1);
6057 6109 gen_set_label(label1);
6058 6110 tcg_gen_discard_tl(cpu_cc_src);
6059 6111 s->cc_op = CC_OP_LOGICB + ot;
  6112 + tcg_temp_free(t0);
6060 6113 }
6061 6114 break;
6062 6115 /************************/
... ... @@ -6725,8 +6778,13 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
6725 6778 #endif
6726 6779 {
6727 6780 int label1;
  6781 + TCGv t0, t1, t2;
  6782 +
6728 6783 if (!s->pe || s->vm86)
6729 6784 goto illegal_op;
  6785 + t0 = tcg_temp_local_new(TCG_TYPE_TL);
  6786 + t1 = tcg_temp_local_new(TCG_TYPE_TL);
  6787 + t2 = tcg_temp_local_new(TCG_TYPE_TL);
6730 6788 ot = OT_WORD;
6731 6789 modrm = ldub_code(s->pc++);
6732 6790 reg = (modrm >> 3) & 7;
... ... @@ -6734,55 +6792,61 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
6734 6792 rm = modrm & 7;
6735 6793 if (mod != 3) {
6736 6794 gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
6737   - gen_op_ld_T0_A0(ot + s->mem_index);
  6795 + gen_op_ld_v(ot + s->mem_index, t0, cpu_A0);
6738 6796 } else {
6739   - gen_op_mov_TN_reg(ot, 0, rm);
  6797 + gen_op_mov_v_reg(ot, t0, rm);
6740 6798 }
6741   - gen_op_mov_TN_reg(ot, 1, reg);
6742   - tcg_gen_andi_tl(cpu_tmp0, cpu_T[0], 3);
6743   - tcg_gen_andi_tl(cpu_T[1], cpu_T[1], 3);
6744   - tcg_gen_movi_tl(cpu_T3, 0);
  6799 + gen_op_mov_v_reg(ot, t1, reg);
  6800 + tcg_gen_andi_tl(cpu_tmp0, t0, 3);
  6801 + tcg_gen_andi_tl(t1, t1, 3);
  6802 + tcg_gen_movi_tl(t2, 0);
6745 6803 label1 = gen_new_label();
6746   - tcg_gen_brcond_tl(TCG_COND_GE, cpu_tmp0, cpu_T[1], label1);
6747   - tcg_gen_andi_tl(cpu_T[0], cpu_T[0], ~3);
6748   - tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
6749   - tcg_gen_movi_tl(cpu_T3, CC_Z);
  6804 + tcg_gen_brcond_tl(TCG_COND_GE, cpu_tmp0, t1, label1);
  6805 + tcg_gen_andi_tl(t0, t0, ~3);
  6806 + tcg_gen_or_tl(t0, t0, t1);
  6807 + tcg_gen_movi_tl(t2, CC_Z);
6750 6808 gen_set_label(label1);
6751 6809 if (mod != 3) {
6752   - gen_op_st_T0_A0(ot + s->mem_index);
  6810 + gen_op_st_v(ot + s->mem_index, t0, cpu_A0);
6753 6811 } else {
6754   - gen_op_mov_reg_T0(ot, rm);
  6812 + gen_op_mov_reg_v(ot, rm, t0);
6755 6813 }
6756 6814 if (s->cc_op != CC_OP_DYNAMIC)
6757 6815 gen_op_set_cc_op(s->cc_op);
6758 6816 gen_compute_eflags(cpu_cc_src);
6759 6817 tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_Z);
6760   - tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_T3);
  6818 + tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t2);
6761 6819 s->cc_op = CC_OP_EFLAGS;
  6820 + tcg_temp_free(t0);
  6821 + tcg_temp_free(t1);
  6822 + tcg_temp_free(t2);
6762 6823 }
6763 6824 break;
6764 6825 case 0x102: /* lar */
6765 6826 case 0x103: /* lsl */
6766 6827 {
6767 6828 int label1;
  6829 + TCGv t0;
6768 6830 if (!s->pe || s->vm86)
6769 6831 goto illegal_op;
6770 6832 ot = dflag ? OT_LONG : OT_WORD;
6771 6833 modrm = ldub_code(s->pc++);
6772 6834 reg = ((modrm >> 3) & 7) | rex_r;
6773 6835 gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0);
  6836 + t0 = tcg_temp_local_new(TCG_TYPE_TL);
6774 6837 if (s->cc_op != CC_OP_DYNAMIC)
6775 6838 gen_op_set_cc_op(s->cc_op);
6776 6839 if (b == 0x102)
6777   - tcg_gen_helper_1_1(helper_lar, cpu_T[0], cpu_T[0]);
  6840 + tcg_gen_helper_1_1(helper_lar, t0, cpu_T[0]);
6778 6841 else
6779   - tcg_gen_helper_1_1(helper_lsl, cpu_T[0], cpu_T[0]);
  6842 + tcg_gen_helper_1_1(helper_lsl, t0, cpu_T[0]);
6780 6843 tcg_gen_andi_tl(cpu_tmp0, cpu_cc_src, CC_Z);
6781 6844 label1 = gen_new_label();
6782 6845 tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, label1);
6783   - gen_op_mov_reg_T0(ot, reg);
  6846 + gen_op_mov_reg_v(ot, reg, t0);
6784 6847 gen_set_label(label1);
6785 6848 s->cc_op = CC_OP_EFLAGS;
  6849 + tcg_temp_free(t0);
6786 6850 }
6787 6851 break;
6788 6852 case 0x118:
... ... @@ -7029,17 +7093,6 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
7029 7093 return s->pc;
7030 7094 }
7031 7095  
7032   -static void tcg_macro_func(TCGContext *s, int macro_id, const int *dead_args)
7033   -{
7034   - switch(macro_id) {
7035   -#ifdef MACRO_TEST
7036   - case MACRO_TEST:
7037   - tcg_gen_helper_0_1(helper_divl_EAX_T0, cpu_T[0]);
7038   - break;
7039   -#endif
7040   - }
7041   -}
7042   -
7043 7096 void optimize_flags_init(void)
7044 7097 {
7045 7098 #if TCG_TARGET_REG_BITS == 32
... ... @@ -7047,33 +7100,15 @@ void optimize_flags_init(void)
7047 7100 #else
7048 7101 assert(sizeof(CCTable) == (1 << 4));
7049 7102 #endif
7050   - tcg_set_macro_func(&tcg_ctx, tcg_macro_func);
7051   -
7052 7103 cpu_env = tcg_global_reg_new(TCG_TYPE_PTR, TCG_AREG0, "env");
7053   -#if TARGET_LONG_BITS > HOST_LONG_BITS
7054   - cpu_T[0] = tcg_global_mem_new(TCG_TYPE_TL,
7055   - TCG_AREG0, offsetof(CPUState, t0), "T0");
7056   - cpu_T[1] = tcg_global_mem_new(TCG_TYPE_TL,
7057   - TCG_AREG0, offsetof(CPUState, t1), "T1");
7058   - cpu_A0 = tcg_global_mem_new(TCG_TYPE_TL,
7059   - TCG_AREG0, offsetof(CPUState, t2), "A0");
7060   -#else
7061   - cpu_T[0] = tcg_global_reg_new(TCG_TYPE_TL, TCG_AREG1, "T0");
7062   - cpu_T[1] = tcg_global_reg_new(TCG_TYPE_TL, TCG_AREG2, "T1");
7063   - cpu_A0 = tcg_global_reg_new(TCG_TYPE_TL, TCG_AREG3, "A0");
7064   -#endif
7065   - cpu_T3 = tcg_global_mem_new(TCG_TYPE_TL,
7066   - TCG_AREG0, offsetof(CPUState, t3), "T3");
7067   -#if defined(__i386__) && (TARGET_LONG_BITS <= HOST_LONG_BITS)
7068   - /* XXX: must be suppressed once there are less fixed registers */
7069   - cpu_tmp1_i64 = tcg_global_reg2_new_hack(TCG_TYPE_I64, TCG_AREG1, TCG_AREG2, "tmp1");
7070   -#endif
7071 7104 cpu_cc_op = tcg_global_mem_new(TCG_TYPE_I32,
7072 7105 TCG_AREG0, offsetof(CPUState, cc_op), "cc_op");
7073 7106 cpu_cc_src = tcg_global_mem_new(TCG_TYPE_TL,
7074 7107 TCG_AREG0, offsetof(CPUState, cc_src), "cc_src");
7075 7108 cpu_cc_dst = tcg_global_mem_new(TCG_TYPE_TL,
7076 7109 TCG_AREG0, offsetof(CPUState, cc_dst), "cc_dst");
  7110 + cpu_cc_tmp = tcg_global_mem_new(TCG_TYPE_TL,
  7111 + TCG_AREG0, offsetof(CPUState, cc_tmp), "cc_tmp");
7077 7112  
7078 7113 /* register helpers */
7079 7114  
... ... @@ -7145,10 +7180,13 @@ static inline int gen_intermediate_code_internal(CPUState *env,
7145 7180 printf("ERROR addseg\n");
7146 7181 #endif
7147 7182  
  7183 + cpu_T[0] = tcg_temp_new(TCG_TYPE_TL);
  7184 + cpu_T[1] = tcg_temp_new(TCG_TYPE_TL);
  7185 + cpu_A0 = tcg_temp_new(TCG_TYPE_TL);
  7186 + cpu_T3 = tcg_temp_new(TCG_TYPE_TL);
  7187 +
7148 7188 cpu_tmp0 = tcg_temp_new(TCG_TYPE_TL);
7149   -#if !(defined(__i386__) && (TARGET_LONG_BITS <= HOST_LONG_BITS))
7150 7189 cpu_tmp1_i64 = tcg_temp_new(TCG_TYPE_I64);
7151   -#endif
7152 7190 cpu_tmp2_i32 = tcg_temp_new(TCG_TYPE_I32);
7153 7191 cpu_tmp3_i32 = tcg_temp_new(TCG_TYPE_I32);
7154 7192 cpu_tmp4 = tcg_temp_new(TCG_TYPE_TL);
... ...