Commit fb951ff5c0b598f7a4dff0fd59d5167345c3337a

Authored by Filip Navara
1 parent dc485d28

Convert disas_neon_data_insn and helpers not to use cpu_T.

Signed-off-by: Filip Navara <filip.navara@gmail.com>
Showing 1 changed file with 273 additions and 298 deletions
target-arm/translate.c
@@ -188,13 +188,9 @@ static void store_reg(DisasContext *s, int reg, TCGv var) @@ -188,13 +188,9 @@ static void store_reg(DisasContext *s, int reg, TCGv var)
188 188
189 #define gen_op_addl_T1_im(im) tcg_gen_addi_i32(cpu_T[1], cpu_T[1], im) 189 #define gen_op_addl_T1_im(im) tcg_gen_addi_i32(cpu_T[1], cpu_T[1], im)
190 #define gen_op_addl_T0_T1() tcg_gen_add_i32(cpu_T[0], cpu_T[0], cpu_T[1]) 190 #define gen_op_addl_T0_T1() tcg_gen_add_i32(cpu_T[0], cpu_T[0], cpu_T[1])
191 -#define gen_op_subl_T0_T1() tcg_gen_sub_i32(cpu_T[0], cpu_T[0], cpu_T[1])  
192 -#define gen_op_rsbl_T0_T1() tcg_gen_sub_i32(cpu_T[0], cpu_T[1], cpu_T[0])  
193 191
194 #define gen_op_andl_T0_T1() tcg_gen_and_i32(cpu_T[0], cpu_T[0], cpu_T[1]) 192 #define gen_op_andl_T0_T1() tcg_gen_and_i32(cpu_T[0], cpu_T[0], cpu_T[1])
195 -#define gen_op_xorl_T0_T1() tcg_gen_xor_i32(cpu_T[0], cpu_T[0], cpu_T[1])  
196 #define gen_op_orl_T0_T1() tcg_gen_or_i32(cpu_T[0], cpu_T[0], cpu_T[1]) 193 #define gen_op_orl_T0_T1() tcg_gen_or_i32(cpu_T[0], cpu_T[0], cpu_T[1])
197 -#define gen_op_notl_T0() tcg_gen_not_i32(cpu_T[0], cpu_T[0])  
198 #define gen_op_notl_T1() tcg_gen_not_i32(cpu_T[1], cpu_T[1]) 194 #define gen_op_notl_T1() tcg_gen_not_i32(cpu_T[1], cpu_T[1])
199 195
200 #define gen_op_shll_T1_im(im) tcg_gen_shli_i32(cpu_T[1], cpu_T[1], im) 196 #define gen_op_shll_T1_im(im) tcg_gen_shli_i32(cpu_T[1], cpu_T[1], im)
@@ -209,7 +205,6 @@ static void store_reg(DisasContext *s, int reg, TCGv var) @@ -209,7 +205,6 @@ static void store_reg(DisasContext *s, int reg, TCGv var)
209 #define gen_sxtb16(var) gen_helper_sxtb16(var, var) 205 #define gen_sxtb16(var) gen_helper_sxtb16(var, var)
210 #define gen_uxtb16(var) gen_helper_uxtb16(var, var) 206 #define gen_uxtb16(var) gen_helper_uxtb16(var, var)
211 207
212 -#define gen_op_mul_T0_T1() tcg_gen_mul_i32(cpu_T[0], cpu_T[0], cpu_T[1])  
213 208
214 #define gen_set_cpsr(var, mask) gen_helper_cpsr_write(var, tcg_const_i32(mask)) 209 #define gen_set_cpsr(var, mask) gen_helper_cpsr_write(var, tcg_const_i32(mask))
215 /* Set NZCV flags from the high 4 bits of var. */ 210 /* Set NZCV flags from the high 4 bits of var. */
@@ -1118,14 +1113,6 @@ neon_reg_offset (int reg, int n) @@ -1118,14 +1113,6 @@ neon_reg_offset (int reg, int n)
1118 return vfp_reg_offset(0, sreg); 1113 return vfp_reg_offset(0, sreg);
1119 } 1114 }
1120 1115
1121 -/* FIXME: Remove these. */  
1122 -#define neon_T0 cpu_T[0]  
1123 -#define neon_T1 cpu_T[1]  
1124 -#define NEON_GET_REG(T, reg, n) \  
1125 - tcg_gen_ld_i32(neon_##T, cpu_env, neon_reg_offset(reg, n))  
1126 -#define NEON_SET_REG(T, reg, n) \  
1127 - tcg_gen_st_i32(neon_##T, cpu_env, neon_reg_offset(reg, n))  
1128 -  
1129 static TCGv neon_load_reg(int reg, int pass) 1116 static TCGv neon_load_reg(int reg, int pass)
1130 { 1117 {
1131 TCGv tmp = new_tmp(); 1118 TCGv tmp = new_tmp();
@@ -3485,31 +3472,25 @@ static void gen_nop_hint(DisasContext *s, int val) @@ -3485,31 +3472,25 @@ static void gen_nop_hint(DisasContext *s, int val)
3485 } 3472 }
3486 } 3473 }
3487 3474
3488 -/* These macros help make the code more readable when migrating from the  
3489 - old dyngen helpers. They should probably be removed when  
3490 - T0/T1 are removed. */  
3491 -#define CPU_T001 cpu_T[0], cpu_T[0], cpu_T[1]  
3492 -#define CPU_T0E01 cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]  
3493 -  
3494 #define CPU_V001 cpu_V0, cpu_V0, cpu_V1 3475 #define CPU_V001 cpu_V0, cpu_V0, cpu_V1
3495 3476
3496 -static inline int gen_neon_add(int size) 3477 +static inline int gen_neon_add(int size, TCGv t0, TCGv t1)
3497 { 3478 {
3498 switch (size) { 3479 switch (size) {
3499 - case 0: gen_helper_neon_add_u8(CPU_T001); break;  
3500 - case 1: gen_helper_neon_add_u16(CPU_T001); break;  
3501 - case 2: gen_op_addl_T0_T1(); break; 3480 + case 0: gen_helper_neon_add_u8(t0, t0, t1); break;
  3481 + case 1: gen_helper_neon_add_u16(t0, t0, t1); break;
  3482 + case 2: tcg_gen_add_i32(t0, t0, t1); break;
3502 default: return 1; 3483 default: return 1;
3503 } 3484 }
3504 return 0; 3485 return 0;
3505 } 3486 }
3506 3487
3507 -static inline void gen_neon_rsb(int size) 3488 +static inline void gen_neon_rsb(int size, TCGv t0, TCGv t1)
3508 { 3489 {
3509 switch (size) { 3490 switch (size) {
3510 - case 0: gen_helper_neon_sub_u8(cpu_T[0], cpu_T[1], cpu_T[0]); break;  
3511 - case 1: gen_helper_neon_sub_u16(cpu_T[0], cpu_T[1], cpu_T[0]); break;  
3512 - case 2: gen_op_rsbl_T0_T1(); break; 3491 + case 0: gen_helper_neon_sub_u8(t0, t1, t0); break;
  3492 + case 1: gen_helper_neon_sub_u16(t0, t1, t0); break;
  3493 + case 2: tcg_gen_sub_i32(t0, t1, t0); break;
3513 default: return; 3494 default: return;
3514 } 3495 }
3515 } 3496 }
@@ -3529,22 +3510,22 @@ static inline void gen_neon_rsb(int size) @@ -3529,22 +3510,22 @@ static inline void gen_neon_rsb(int size)
3529 #define GEN_NEON_INTEGER_OP_ENV(name) do { \ 3510 #define GEN_NEON_INTEGER_OP_ENV(name) do { \
3530 switch ((size << 1) | u) { \ 3511 switch ((size << 1) | u) { \
3531 case 0: \ 3512 case 0: \
3532 - gen_helper_neon_##name##_s8(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); \ 3513 + gen_helper_neon_##name##_s8(tmp, cpu_env, tmp, tmp2); \
3533 break; \ 3514 break; \
3534 case 1: \ 3515 case 1: \
3535 - gen_helper_neon_##name##_u8(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); \ 3516 + gen_helper_neon_##name##_u8(tmp, cpu_env, tmp, tmp2); \
3536 break; \ 3517 break; \
3537 case 2: \ 3518 case 2: \
3538 - gen_helper_neon_##name##_s16(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); \ 3519 + gen_helper_neon_##name##_s16(tmp, cpu_env, tmp, tmp2); \
3539 break; \ 3520 break; \
3540 case 3: \ 3521 case 3: \
3541 - gen_helper_neon_##name##_u16(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); \ 3522 + gen_helper_neon_##name##_u16(tmp, cpu_env, tmp, tmp2); \
3542 break; \ 3523 break; \
3543 case 4: \ 3524 case 4: \
3544 - gen_helper_neon_##name##_s32(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); \ 3525 + gen_helper_neon_##name##_s32(tmp, cpu_env, tmp, tmp2); \
3545 break; \ 3526 break; \
3546 case 5: \ 3527 case 5: \
3547 - gen_helper_neon_##name##_u32(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); \ 3528 + gen_helper_neon_##name##_u32(tmp, cpu_env, tmp, tmp2); \
3548 break; \ 3529 break; \
3549 default: return 1; \ 3530 default: return 1; \
3550 }} while (0) 3531 }} while (0)
@@ -3552,73 +3533,53 @@ static inline void gen_neon_rsb(int size) @@ -3552,73 +3533,53 @@ static inline void gen_neon_rsb(int size)
3552 #define GEN_NEON_INTEGER_OP(name) do { \ 3533 #define GEN_NEON_INTEGER_OP(name) do { \
3553 switch ((size << 1) | u) { \ 3534 switch ((size << 1) | u) { \
3554 case 0: \ 3535 case 0: \
3555 - gen_helper_neon_##name##_s8(cpu_T[0], cpu_T[0], cpu_T[1]); \ 3536 + gen_helper_neon_##name##_s8(tmp, tmp, tmp2); \
3556 break; \ 3537 break; \
3557 case 1: \ 3538 case 1: \
3558 - gen_helper_neon_##name##_u8(cpu_T[0], cpu_T[0], cpu_T[1]); \ 3539 + gen_helper_neon_##name##_u8(tmp, tmp, tmp2); \
3559 break; \ 3540 break; \
3560 case 2: \ 3541 case 2: \
3561 - gen_helper_neon_##name##_s16(cpu_T[0], cpu_T[0], cpu_T[1]); \ 3542 + gen_helper_neon_##name##_s16(tmp, tmp, tmp2); \
3562 break; \ 3543 break; \
3563 case 3: \ 3544 case 3: \
3564 - gen_helper_neon_##name##_u16(cpu_T[0], cpu_T[0], cpu_T[1]); \ 3545 + gen_helper_neon_##name##_u16(tmp, tmp, tmp2); \
3565 break; \ 3546 break; \
3566 case 4: \ 3547 case 4: \
3567 - gen_helper_neon_##name##_s32(cpu_T[0], cpu_T[0], cpu_T[1]); \ 3548 + gen_helper_neon_##name##_s32(tmp, tmp, tmp2); \
3568 break; \ 3549 break; \
3569 case 5: \ 3550 case 5: \
3570 - gen_helper_neon_##name##_u32(cpu_T[0], cpu_T[0], cpu_T[1]); \ 3551 + gen_helper_neon_##name##_u32(tmp, tmp, tmp2); \
3571 break; \ 3552 break; \
3572 default: return 1; \ 3553 default: return 1; \
3573 }} while (0) 3554 }} while (0)
3574 3555
3575 -static inline void  
3576 -gen_neon_movl_scratch_T0(int scratch) 3556 +static TCGv neon_load_scratch(int scratch)
3577 { 3557 {
3578 - uint32_t offset;  
3579 -  
3580 - offset = offsetof(CPUARMState, vfp.scratch[scratch]);  
3581 - tcg_gen_st_i32(cpu_T[0], cpu_env, offset);  
3582 -}  
3583 -  
3584 -static inline void  
3585 -gen_neon_movl_scratch_T1(int scratch)  
3586 -{  
3587 - uint32_t offset;  
3588 -  
3589 - offset = offsetof(CPUARMState, vfp.scratch[scratch]);  
3590 - tcg_gen_st_i32(cpu_T[1], cpu_env, offset);  
3591 -}  
3592 -  
3593 -static inline void  
3594 -gen_neon_movl_T0_scratch(int scratch)  
3595 -{  
3596 - uint32_t offset;  
3597 -  
3598 - offset = offsetof(CPUARMState, vfp.scratch[scratch]);  
3599 - tcg_gen_ld_i32(cpu_T[0], cpu_env, offset); 3558 + TCGv tmp = new_tmp();
  3559 + tcg_gen_ld_i32(tmp, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
  3560 + return tmp;
3600 } 3561 }
3601 -  
3602 -static inline void  
3603 -gen_neon_movl_T1_scratch(int scratch) 3562 +
  3563 +static void neon_store_scratch(int scratch, TCGv var)
3604 { 3564 {
3605 - uint32_t offset;  
3606 -  
3607 - offset = offsetof(CPUARMState, vfp.scratch[scratch]);  
3608 - tcg_gen_ld_i32(cpu_T[1], cpu_env, offset); 3565 + tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
  3566 + dead_tmp(var);
3609 } 3567 }
3610 -  
3611 -static inline void gen_neon_get_scalar(int size, int reg) 3568 +
  3569 +static inline TCGv neon_get_scalar(int size, int reg)
3612 { 3570 {
  3571 + TCGv tmp;
3613 if (size == 1) { 3572 if (size == 1) {
3614 - NEON_GET_REG(T0, reg >> 1, reg & 1); 3573 + tmp = neon_load_reg(reg >> 1, reg & 1);
3615 } else { 3574 } else {
3616 - NEON_GET_REG(T0, reg >> 2, (reg >> 1) & 1);  
3617 - if (reg & 1)  
3618 - gen_neon_dup_low16(cpu_T[0]);  
3619 - else  
3620 - gen_neon_dup_high16(cpu_T[0]); 3575 + tmp = neon_load_reg(reg >> 2, (reg >> 1) & 1);
  3576 + if (reg & 1) {
  3577 + gen_neon_dup_low16(tmp);
  3578 + } else {
  3579 + gen_neon_dup_high16(tmp);
  3580 + }
3621 } 3581 }
  3582 + return tmp;
3622 } 3583 }
3623 3584
3624 static void gen_neon_unzip_u8(TCGv t0, TCGv t1) 3585 static void gen_neon_unzip_u8(TCGv t0, TCGv t1)
@@ -3715,18 +3676,19 @@ static void gen_neon_zip_u16(TCGv t0, TCGv t1) @@ -3715,18 +3676,19 @@ static void gen_neon_zip_u16(TCGv t0, TCGv t1)
3715 static void gen_neon_unzip(int reg, int q, int tmp, int size) 3676 static void gen_neon_unzip(int reg, int q, int tmp, int size)
3716 { 3677 {
3717 int n; 3678 int n;
  3679 + TCGv t0, t1;
3718 3680
3719 for (n = 0; n < q + 1; n += 2) { 3681 for (n = 0; n < q + 1; n += 2) {
3720 - NEON_GET_REG(T0, reg, n);  
3721 - NEON_GET_REG(T1, reg, n + 1); 3682 + t0 = neon_load_reg(reg, n);
  3683 + t1 = neon_load_reg(reg, n + 1);
3722 switch (size) { 3684 switch (size) {
3723 - case 0: gen_neon_unzip_u8(cpu_T[0], cpu_T[1]); break;  
3724 - case 1: gen_neon_zip_u16(cpu_T[0], cpu_T[1]); break; /* zip and unzip are the same. */ 3685 + case 0: gen_neon_unzip_u8(t0, t1); break;
  3686 + case 1: gen_neon_zip_u16(t0, t1); break; /* zip and unzip are the same. */
3725 case 2: /* no-op */; break; 3687 case 2: /* no-op */; break;
3726 default: abort(); 3688 default: abort();
3727 } 3689 }
3728 - gen_neon_movl_T0_scratch(tmp + n);  
3729 - gen_neon_movl_T1_scratch(tmp + n + 1); 3690 + neon_store_scratch(tmp + n, t0);
  3691 + neon_store_scratch(tmp + n + 1, t1);
3730 } 3692 }
3731 } 3693 }
3732 3694
@@ -4160,10 +4122,6 @@ static inline void gen_neon_mull(TCGv_i64 dest, TCGv a, TCGv b, int size, int u) @@ -4160,10 +4122,6 @@ static inline void gen_neon_mull(TCGv_i64 dest, TCGv a, TCGv b, int size, int u)
4160 break; 4122 break;
4161 default: abort(); 4123 default: abort();
4162 } 4124 }
4163 - if (size < 2) {  
4164 - dead_tmp(b);  
4165 - dead_tmp(a);  
4166 - }  
4167 } 4125 }
4168 4126
4169 /* Translate a NEON data processing instruction. Return nonzero if the 4127 /* Translate a NEON data processing instruction. Return nonzero if the
@@ -4296,6 +4254,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) @@ -4296,6 +4254,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
4296 pairwise = 0; 4254 pairwise = 0;
4297 break; 4255 break;
4298 } 4256 }
  4257 +
4299 for (pass = 0; pass < (q ? 4 : 2); pass++) { 4258 for (pass = 0; pass < (q ? 4 : 2); pass++) {
4300 4259
4301 if (pairwise) { 4260 if (pairwise) {
@@ -4305,16 +4264,16 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) @@ -4305,16 +4264,16 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
4305 else 4264 else
4306 n = 0; 4265 n = 0;
4307 if (pass < q + 1) { 4266 if (pass < q + 1) {
4308 - NEON_GET_REG(T0, rn, n);  
4309 - NEON_GET_REG(T1, rn, n + 1); 4267 + tmp = neon_load_reg(rn, n);
  4268 + tmp2 = neon_load_reg(rn, n + 1);
4310 } else { 4269 } else {
4311 - NEON_GET_REG(T0, rm, n);  
4312 - NEON_GET_REG(T1, rm, n + 1); 4270 + tmp = neon_load_reg(rm, n);
  4271 + tmp2 = neon_load_reg(rm, n + 1);
4313 } 4272 }
4314 } else { 4273 } else {
4315 /* Elementwise. */ 4274 /* Elementwise. */
4316 - NEON_GET_REG(T0, rn, pass);  
4317 - NEON_GET_REG(T1, rm, pass); 4275 + tmp = neon_load_reg(rn, pass);
  4276 + tmp2 = neon_load_reg(rm, pass);
4318 } 4277 }
4319 switch (op) { 4278 switch (op) {
4320 case 0: /* VHADD */ 4279 case 0: /* VHADD */
@@ -4329,35 +4288,35 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) @@ -4329,35 +4288,35 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
4329 case 3: /* Logic ops. */ 4288 case 3: /* Logic ops. */
4330 switch ((u << 2) | size) { 4289 switch ((u << 2) | size) {
4331 case 0: /* VAND */ 4290 case 0: /* VAND */
4332 - gen_op_andl_T0_T1(); 4291 + tcg_gen_and_i32(tmp, tmp, tmp2);
4333 break; 4292 break;
4334 case 1: /* BIC */ 4293 case 1: /* BIC */
4335 - gen_op_bicl_T0_T1(); 4294 + tcg_gen_bic_i32(tmp, tmp, tmp2);
4336 break; 4295 break;
4337 case 2: /* VORR */ 4296 case 2: /* VORR */
4338 - gen_op_orl_T0_T1(); 4297 + tcg_gen_or_i32(tmp, tmp, tmp2);
4339 break; 4298 break;
4340 case 3: /* VORN */ 4299 case 3: /* VORN */
4341 - gen_op_notl_T1();  
4342 - gen_op_orl_T0_T1(); 4300 + tcg_gen_not_i32(tmp2, tmp2);
  4301 + tcg_gen_or_i32(tmp, tmp, tmp2);
4343 break; 4302 break;
4344 case 4: /* VEOR */ 4303 case 4: /* VEOR */
4345 - gen_op_xorl_T0_T1(); 4304 + tcg_gen_xor_i32(tmp, tmp, tmp2);
4346 break; 4305 break;
4347 case 5: /* VBSL */ 4306 case 5: /* VBSL */
4348 - tmp = neon_load_reg(rd, pass);  
4349 - gen_neon_bsl(cpu_T[0], cpu_T[0], cpu_T[1], tmp);  
4350 - dead_tmp(tmp); 4307 + tmp3 = neon_load_reg(rd, pass);
  4308 + gen_neon_bsl(tmp, tmp, tmp2, tmp3);
  4309 + dead_tmp(tmp3);
4351 break; 4310 break;
4352 case 6: /* VBIT */ 4311 case 6: /* VBIT */
4353 - tmp = neon_load_reg(rd, pass);  
4354 - gen_neon_bsl(cpu_T[0], cpu_T[0], tmp, cpu_T[1]);  
4355 - dead_tmp(tmp); 4312 + tmp3 = neon_load_reg(rd, pass);
  4313 + gen_neon_bsl(tmp, tmp, tmp3, tmp2);
  4314 + dead_tmp(tmp3);
4356 break; 4315 break;
4357 case 7: /* VBIF */ 4316 case 7: /* VBIF */
4358 - tmp = neon_load_reg(rd, pass);  
4359 - gen_neon_bsl(cpu_T[0], tmp, cpu_T[0], cpu_T[1]);  
4360 - dead_tmp(tmp); 4317 + tmp3 = neon_load_reg(rd, pass);
  4318 + gen_neon_bsl(tmp, tmp3, tmp, tmp2);
  4319 + dead_tmp(tmp3);
4361 break; 4320 break;
4362 } 4321 }
4363 break; 4322 break;
@@ -4396,18 +4355,19 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) @@ -4396,18 +4355,19 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
4396 break; 4355 break;
4397 case 15: /* VABA */ 4356 case 15: /* VABA */
4398 GEN_NEON_INTEGER_OP(abd); 4357 GEN_NEON_INTEGER_OP(abd);
4399 - NEON_GET_REG(T1, rd, pass);  
4400 - gen_neon_add(size); 4358 + dead_tmp(tmp2);
  4359 + tmp2 = neon_load_reg(rd, pass);
  4360 + gen_neon_add(size, tmp, tmp2);
4401 break; 4361 break;
4402 case 16: 4362 case 16:
4403 if (!u) { /* VADD */ 4363 if (!u) { /* VADD */
4404 - if (gen_neon_add(size)) 4364 + if (gen_neon_add(size, tmp, tmp2))
4405 return 1; 4365 return 1;
4406 } else { /* VSUB */ 4366 } else { /* VSUB */
4407 switch (size) { 4367 switch (size) {
4408 - case 0: gen_helper_neon_sub_u8(CPU_T001); break;  
4409 - case 1: gen_helper_neon_sub_u16(CPU_T001); break;  
4410 - case 2: gen_op_subl_T0_T1(); break; 4368 + case 0: gen_helper_neon_sub_u8(tmp, tmp, tmp2); break;
  4369 + case 1: gen_helper_neon_sub_u16(tmp, tmp, tmp2); break;
  4370 + case 2: tcg_gen_sub_i32(tmp, tmp, tmp2); break;
4411 default: return 1; 4371 default: return 1;
4412 } 4372 }
4413 } 4373 }
@@ -4415,42 +4375,43 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) @@ -4415,42 +4375,43 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
4415 case 17: 4375 case 17:
4416 if (!u) { /* VTST */ 4376 if (!u) { /* VTST */
4417 switch (size) { 4377 switch (size) {
4418 - case 0: gen_helper_neon_tst_u8(CPU_T001); break;  
4419 - case 1: gen_helper_neon_tst_u16(CPU_T001); break;  
4420 - case 2: gen_helper_neon_tst_u32(CPU_T001); break; 4378 + case 0: gen_helper_neon_tst_u8(tmp, tmp, tmp2); break;
  4379 + case 1: gen_helper_neon_tst_u16(tmp, tmp, tmp2); break;
  4380 + case 2: gen_helper_neon_tst_u32(tmp, tmp, tmp2); break;
4421 default: return 1; 4381 default: return 1;
4422 } 4382 }
4423 } else { /* VCEQ */ 4383 } else { /* VCEQ */
4424 switch (size) { 4384 switch (size) {
4425 - case 0: gen_helper_neon_ceq_u8(CPU_T001); break;  
4426 - case 1: gen_helper_neon_ceq_u16(CPU_T001); break;  
4427 - case 2: gen_helper_neon_ceq_u32(CPU_T001); break; 4385 + case 0: gen_helper_neon_ceq_u8(tmp, tmp, tmp2); break;
  4386 + case 1: gen_helper_neon_ceq_u16(tmp, tmp, tmp2); break;
  4387 + case 2: gen_helper_neon_ceq_u32(tmp, tmp, tmp2); break;
4428 default: return 1; 4388 default: return 1;
4429 } 4389 }
4430 } 4390 }
4431 break; 4391 break;
4432 case 18: /* Multiply. */ 4392 case 18: /* Multiply. */
4433 switch (size) { 4393 switch (size) {
4434 - case 0: gen_helper_neon_mul_u8(CPU_T001); break;  
4435 - case 1: gen_helper_neon_mul_u16(CPU_T001); break;  
4436 - case 2: gen_op_mul_T0_T1(); break; 4394 + case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
  4395 + case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
  4396 + case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
4437 default: return 1; 4397 default: return 1;
4438 } 4398 }
4439 - NEON_GET_REG(T1, rd, pass); 4399 + dead_tmp(tmp2);
  4400 + tmp2 = neon_load_reg(rd, pass);
4440 if (u) { /* VMLS */ 4401 if (u) { /* VMLS */
4441 - gen_neon_rsb(size); 4402 + gen_neon_rsb(size, tmp, tmp2);
4442 } else { /* VMLA */ 4403 } else { /* VMLA */
4443 - gen_neon_add(size); 4404 + gen_neon_add(size, tmp, tmp2);
4444 } 4405 }
4445 break; 4406 break;
4446 case 19: /* VMUL */ 4407 case 19: /* VMUL */
4447 if (u) { /* polynomial */ 4408 if (u) { /* polynomial */
4448 - gen_helper_neon_mul_p8(CPU_T001); 4409 + gen_helper_neon_mul_p8(tmp, tmp, tmp2);
4449 } else { /* Integer */ 4410 } else { /* Integer */
4450 switch (size) { 4411 switch (size) {
4451 - case 0: gen_helper_neon_mul_u8(CPU_T001); break;  
4452 - case 1: gen_helper_neon_mul_u16(CPU_T001); break;  
4453 - case 2: gen_op_mul_T0_T1(); break; 4412 + case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
  4413 + case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
  4414 + case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
4454 default: return 1; 4415 default: return 1;
4455 } 4416 }
4456 } 4417 }
@@ -4464,14 +4425,14 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) @@ -4464,14 +4425,14 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
4464 case 22: /* Hultiply high. */ 4425 case 22: /* Hultiply high. */
4465 if (!u) { /* VQDMULH */ 4426 if (!u) { /* VQDMULH */
4466 switch (size) { 4427 switch (size) {
4467 - case 1: gen_helper_neon_qdmulh_s16(CPU_T0E01); break;  
4468 - case 2: gen_helper_neon_qdmulh_s32(CPU_T0E01); break; 4428 + case 1: gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2); break;
  4429 + case 2: gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2); break;
4469 default: return 1; 4430 default: return 1;
4470 } 4431 }
4471 } else { /* VQRDHMUL */ 4432 } else { /* VQRDHMUL */
4472 switch (size) { 4433 switch (size) {
4473 - case 1: gen_helper_neon_qrdmulh_s16(CPU_T0E01); break;  
4474 - case 2: gen_helper_neon_qrdmulh_s32(CPU_T0E01); break; 4434 + case 1: gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2); break;
  4435 + case 2: gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2); break;
4475 default: return 1; 4436 default: return 1;
4476 } 4437 }
4477 } 4438 }
@@ -4480,88 +4441,91 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) @@ -4480,88 +4441,91 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
4480 if (u) 4441 if (u)
4481 return 1; 4442 return 1;
4482 switch (size) { 4443 switch (size) {
4483 - case 0: gen_helper_neon_padd_u8(CPU_T001); break;  
4484 - case 1: gen_helper_neon_padd_u16(CPU_T001); break;  
4485 - case 2: gen_op_addl_T0_T1(); break; 4444 + case 0: gen_helper_neon_padd_u8(tmp, tmp, tmp2); break;
  4445 + case 1: gen_helper_neon_padd_u16(tmp, tmp, tmp2); break;
  4446 + case 2: tcg_gen_add_i32(tmp, tmp, tmp2); break;
4486 default: return 1; 4447 default: return 1;
4487 } 4448 }
4488 break; 4449 break;
4489 case 26: /* Floating point arithnetic. */ 4450 case 26: /* Floating point arithnetic. */
4490 switch ((u << 2) | size) { 4451 switch ((u << 2) | size) {
4491 case 0: /* VADD */ 4452 case 0: /* VADD */
4492 - gen_helper_neon_add_f32(CPU_T001); 4453 + gen_helper_neon_add_f32(tmp, tmp, tmp2);
4493 break; 4454 break;
4494 case 2: /* VSUB */ 4455 case 2: /* VSUB */
4495 - gen_helper_neon_sub_f32(CPU_T001); 4456 + gen_helper_neon_sub_f32(tmp, tmp, tmp2);
4496 break; 4457 break;
4497 case 4: /* VPADD */ 4458 case 4: /* VPADD */
4498 - gen_helper_neon_add_f32(CPU_T001); 4459 + gen_helper_neon_add_f32(tmp, tmp, tmp2);
4499 break; 4460 break;
4500 case 6: /* VABD */ 4461 case 6: /* VABD */
4501 - gen_helper_neon_abd_f32(CPU_T001); 4462 + gen_helper_neon_abd_f32(tmp, tmp, tmp2);
4502 break; 4463 break;
4503 default: 4464 default:
4504 return 1; 4465 return 1;
4505 } 4466 }
4506 break; 4467 break;
4507 case 27: /* Float multiply. */ 4468 case 27: /* Float multiply. */
4508 - gen_helper_neon_mul_f32(CPU_T001); 4469 + gen_helper_neon_mul_f32(tmp, tmp, tmp2);
4509 if (!u) { 4470 if (!u) {
4510 - NEON_GET_REG(T1, rd, pass); 4471 + dead_tmp(tmp2);
  4472 + tmp2 = neon_load_reg(rd, pass);
4511 if (size == 0) { 4473 if (size == 0) {
4512 - gen_helper_neon_add_f32(CPU_T001); 4474 + gen_helper_neon_add_f32(tmp, tmp, tmp2);
4513 } else { 4475 } else {
4514 - gen_helper_neon_sub_f32(cpu_T[0], cpu_T[1], cpu_T[0]); 4476 + gen_helper_neon_sub_f32(tmp, tmp2, tmp);
4515 } 4477 }
4516 } 4478 }
4517 break; 4479 break;
4518 case 28: /* Float compare. */ 4480 case 28: /* Float compare. */
4519 if (!u) { 4481 if (!u) {
4520 - gen_helper_neon_ceq_f32(CPU_T001); 4482 + gen_helper_neon_ceq_f32(tmp, tmp, tmp2);
4521 } else { 4483 } else {
4522 if (size == 0) 4484 if (size == 0)
4523 - gen_helper_neon_cge_f32(CPU_T001); 4485 + gen_helper_neon_cge_f32(tmp, tmp, tmp2);
4524 else 4486 else
4525 - gen_helper_neon_cgt_f32(CPU_T001); 4487 + gen_helper_neon_cgt_f32(tmp, tmp, tmp2);
4526 } 4488 }
4527 break; 4489 break;
4528 case 29: /* Float compare absolute. */ 4490 case 29: /* Float compare absolute. */
4529 if (!u) 4491 if (!u)
4530 return 1; 4492 return 1;
4531 if (size == 0) 4493 if (size == 0)
4532 - gen_helper_neon_acge_f32(CPU_T001); 4494 + gen_helper_neon_acge_f32(tmp, tmp, tmp2);
4533 else 4495 else
4534 - gen_helper_neon_acgt_f32(CPU_T001); 4496 + gen_helper_neon_acgt_f32(tmp, tmp, tmp2);
4535 break; 4497 break;
4536 case 30: /* Float min/max. */ 4498 case 30: /* Float min/max. */
4537 if (size == 0) 4499 if (size == 0)
4538 - gen_helper_neon_max_f32(CPU_T001); 4500 + gen_helper_neon_max_f32(tmp, tmp, tmp2);
4539 else 4501 else
4540 - gen_helper_neon_min_f32(CPU_T001); 4502 + gen_helper_neon_min_f32(tmp, tmp, tmp2);
4541 break; 4503 break;
4542 case 31: 4504 case 31:
4543 if (size == 0) 4505 if (size == 0)
4544 - gen_helper_recps_f32(cpu_T[0], cpu_T[0], cpu_T[1], cpu_env); 4506 + gen_helper_recps_f32(tmp, tmp, tmp2, cpu_env);
4545 else 4507 else
4546 - gen_helper_rsqrts_f32(cpu_T[0], cpu_T[0], cpu_T[1], cpu_env); 4508 + gen_helper_rsqrts_f32(tmp, tmp, tmp2, cpu_env);
4547 break; 4509 break;
4548 default: 4510 default:
4549 abort(); 4511 abort();
4550 } 4512 }
  4513 + dead_tmp(tmp2);
  4514 +
4551 /* Save the result. For elementwise operations we can put it 4515 /* Save the result. For elementwise operations we can put it
4552 straight into the destination register. For pairwise operations 4516 straight into the destination register. For pairwise operations
4553 we have to be careful to avoid clobbering the source operands. */ 4517 we have to be careful to avoid clobbering the source operands. */
4554 if (pairwise && rd == rm) { 4518 if (pairwise && rd == rm) {
4555 - gen_neon_movl_scratch_T0(pass); 4519 + neon_store_scratch(pass, tmp);
4556 } else { 4520 } else {
4557 - NEON_SET_REG(T0, rd, pass); 4521 + neon_store_reg(rd, pass, tmp);
4558 } 4522 }
4559 4523
4560 } /* for pass */ 4524 } /* for pass */
4561 if (pairwise && rd == rm) { 4525 if (pairwise && rd == rm) {
4562 for (pass = 0; pass < (q ? 4 : 2); pass++) { 4526 for (pass = 0; pass < (q ? 4 : 2); pass++) {
4563 - gen_neon_movl_T0_scratch(pass);  
4564 - NEON_SET_REG(T0, rd, pass); 4527 + tmp = neon_load_scratch(pass);
  4528 + neon_store_reg(rd, pass, tmp);
4565 } 4529 }
4566 } 4530 }
4567 /* End of 3 register same size operations. */ 4531 /* End of 3 register same size operations. */
@@ -4658,8 +4622,9 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) @@ -4658,8 +4622,9 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
4658 neon_store_reg64(cpu_V0, rd + pass); 4622 neon_store_reg64(cpu_V0, rd + pass);
4659 } else { /* size < 3 */ 4623 } else { /* size < 3 */
4660 /* Operands in T0 and T1. */ 4624 /* Operands in T0 and T1. */
4661 - gen_op_movl_T1_im(imm);  
4662 - NEON_GET_REG(T0, rm, pass); 4625 + tmp = neon_load_reg(rm, pass);
  4626 + tmp2 = new_tmp();
  4627 + tcg_gen_movi_i32(tmp2, imm);
4663 switch (op) { 4628 switch (op) {
4664 case 0: /* VSHR */ 4629 case 0: /* VSHR */
4665 case 1: /* VSRA */ 4630 case 1: /* VSRA */
@@ -4676,9 +4641,9 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) @@ -4676,9 +4641,9 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
4676 break; 4641 break;
4677 case 5: /* VSHL, VSLI */ 4642 case 5: /* VSHL, VSLI */
4678 switch (size) { 4643 switch (size) {
4679 - case 0: gen_helper_neon_shl_u8(CPU_T001); break;  
4680 - case 1: gen_helper_neon_shl_u16(CPU_T001); break;  
4681 - case 2: gen_helper_neon_shl_u32(CPU_T001); break; 4644 + case 0: gen_helper_neon_shl_u8(tmp, tmp, tmp2); break;
  4645 + case 1: gen_helper_neon_shl_u16(tmp, tmp, tmp2); break;
  4646 + case 2: gen_helper_neon_shl_u32(tmp, tmp, tmp2); break;
4682 default: return 1; 4647 default: return 1;
4683 } 4648 }
4684 break; 4649 break;
@@ -4687,18 +4652,20 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) @@ -4687,18 +4652,20 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
4687 break; 4652 break;
4688 case 7: /* VQSHLU */ 4653 case 7: /* VQSHLU */
4689 switch (size) { 4654 switch (size) {
4690 - case 0: gen_helper_neon_qshl_u8(CPU_T0E01); break;  
4691 - case 1: gen_helper_neon_qshl_u16(CPU_T0E01); break;  
4692 - case 2: gen_helper_neon_qshl_u32(CPU_T0E01); break; 4655 + case 0: gen_helper_neon_qshl_u8(tmp, cpu_env, tmp, tmp2); break;
  4656 + case 1: gen_helper_neon_qshl_u16(tmp, cpu_env, tmp, tmp2); break;
  4657 + case 2: gen_helper_neon_qshl_u32(tmp, cpu_env, tmp, tmp2); break;
4693 default: return 1; 4658 default: return 1;
4694 } 4659 }
4695 break; 4660 break;
4696 } 4661 }
  4662 + dead_tmp(tmp2);
4697 4663
4698 if (op == 1 || op == 3) { 4664 if (op == 1 || op == 3) {
4699 /* Accumulate. */ 4665 /* Accumulate. */
4700 - NEON_GET_REG(T1, rd, pass);  
4701 - gen_neon_add(size); 4666 + tmp2 = neon_load_reg(rd, pass);
  4667 + gen_neon_add(size, tmp2, tmp);
  4668 + dead_tmp(tmp2);
4702 } else if (op == 4 || (op == 5 && u)) { 4669 } else if (op == 4 || (op == 5 && u)) {
4703 /* Insert */ 4670 /* Insert */
4704 switch (size) { 4671 switch (size) {
@@ -4726,12 +4693,13 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) @@ -4726,12 +4693,13 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
4726 default: 4693 default:
4727 abort(); 4694 abort();
4728 } 4695 }
4729 - tmp = neon_load_reg(rd, pass);  
4730 - tcg_gen_andi_i32(cpu_T[0], cpu_T[0], imm);  
4731 - tcg_gen_andi_i32(tmp, tmp, ~imm);  
4732 - tcg_gen_or_i32(cpu_T[0], cpu_T[0], tmp); 4696 + tmp2 = neon_load_reg(rd, pass);
  4697 + tcg_gen_andi_i32(tmp, tmp, imm);
  4698 + tcg_gen_andi_i32(tmp2, tmp2, ~imm);
  4699 + tcg_gen_or_i32(tmp, tmp, tmp2);
  4700 + dead_tmp(tmp2);
4733 } 4701 }
4734 - NEON_SET_REG(T0, rd, pass); 4702 + neon_store_reg(rd, pass, tmp);
4735 } 4703 }
4736 } /* for pass */ 4704 } /* for pass */
4737 } else if (op < 10) { 4705 } else if (op < 10) {
@@ -4893,9 +4861,6 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) @@ -4893,9 +4861,6 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
4893 if (invert) 4861 if (invert)
4894 imm = ~imm; 4862 imm = ~imm;
4895 4863
4896 - if (op != 14 || !invert)  
4897 - gen_op_movl_T1_im(imm);  
4898 -  
4899 for (pass = 0; pass < (q ? 4 : 2); pass++) { 4864 for (pass = 0; pass < (q ? 4 : 2); pass++) {
4900 if (op & 1 && op < 12) { 4865 if (op & 1 && op < 12) {
4901 tmp = neon_load_reg(rd, pass); 4866 tmp = neon_load_reg(rd, pass);
@@ -4962,11 +4927,11 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) @@ -4962,11 +4927,11 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
4962 always aligned so will never overlap with wide 4927 always aligned so will never overlap with wide
4963 destinations in problematic ways. */ 4928 destinations in problematic ways. */
4964 if (rd == rm && !src2_wide) { 4929 if (rd == rm && !src2_wide) {
4965 - NEON_GET_REG(T0, rm, 1);  
4966 - gen_neon_movl_scratch_T0(2); 4930 + tmp = neon_load_reg(rm, 1);
  4931 + neon_store_scratch(2, tmp);
4967 } else if (rd == rn && !src1_wide) { 4932 } else if (rd == rn && !src1_wide) {
4968 - NEON_GET_REG(T0, rn, 1);  
4969 - gen_neon_movl_scratch_T0(2); 4933 + tmp = neon_load_reg(rn, 1);
  4934 + neon_store_scratch(2, tmp);
4970 } 4935 }
4971 TCGV_UNUSED(tmp3); 4936 TCGV_UNUSED(tmp3);
4972 for (pass = 0; pass < 2; pass++) { 4937 for (pass = 0; pass < 2; pass++) {
@@ -4975,9 +4940,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) @@ -4975,9 +4940,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
4975 TCGV_UNUSED(tmp); 4940 TCGV_UNUSED(tmp);
4976 } else { 4941 } else {
4977 if (pass == 1 && rd == rn) { 4942 if (pass == 1 && rd == rn) {
4978 - gen_neon_movl_T0_scratch(2);  
4979 - tmp = new_tmp();  
4980 - tcg_gen_mov_i32(tmp, cpu_T[0]); 4943 + tmp = neon_load_scratch(2);
4981 } else { 4944 } else {
4982 tmp = neon_load_reg(rn, pass); 4945 tmp = neon_load_reg(rn, pass);
4983 } 4946 }
@@ -4990,9 +4953,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) @@ -4990,9 +4953,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
4990 TCGV_UNUSED(tmp2); 4953 TCGV_UNUSED(tmp2);
4991 } else { 4954 } else {
4992 if (pass == 1 && rd == rm) { 4955 if (pass == 1 && rd == rm) {
4993 - gen_neon_movl_T0_scratch(2);  
4994 - tmp2 = new_tmp();  
4995 - tcg_gen_mov_i32(tmp2, cpu_T[0]); 4956 + tmp2 = neon_load_scratch(2);
4996 } else { 4957 } else {
4997 tmp2 = neon_load_reg(rm, pass); 4958 tmp2 = neon_load_reg(rm, pass);
4998 } 4959 }
@@ -5035,6 +4996,8 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) @@ -5035,6 +4996,8 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
5035 case 8: case 9: case 10: case 11: case 12: case 13: 4996 case 8: case 9: case 10: case 11: case 12: case 13:
5036 /* VMLAL, VQDMLAL, VMLSL, VQDMLSL, VMULL, VQDMULL */ 4997 /* VMLAL, VQDMLAL, VMLSL, VQDMLSL, VMULL, VQDMULL */
5037 gen_neon_mull(cpu_V0, tmp, tmp2, size, u); 4998 gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
  4999 + dead_tmp(tmp2);
  5000 + dead_tmp(tmp);
5038 break; 5001 break;
5039 case 14: /* Polynomial VMULL */ 5002 case 14: /* Polynomial VMULL */
5040 cpu_abort(env, "Polynomial VMULL not implemented"); 5003 cpu_abort(env, "Polynomial VMULL not implemented");
@@ -5123,55 +5086,56 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) @@ -5123,55 +5086,56 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
5123 case 9: /* Floating point VMUL scalar */ 5086 case 9: /* Floating point VMUL scalar */
5124 case 12: /* VQDMULH scalar */ 5087 case 12: /* VQDMULH scalar */
5125 case 13: /* VQRDMULH scalar */ 5088 case 13: /* VQRDMULH scalar */
5126 - gen_neon_get_scalar(size, rm);  
5127 - gen_neon_movl_scratch_T0(0); 5089 + tmp = neon_get_scalar(size, rm);
  5090 + neon_store_scratch(0, tmp);
5128 for (pass = 0; pass < (u ? 4 : 2); pass++) { 5091 for (pass = 0; pass < (u ? 4 : 2); pass++) {
5129 - if (pass != 0)  
5130 - gen_neon_movl_T0_scratch(0);  
5131 - NEON_GET_REG(T1, rn, pass); 5092 + tmp = neon_load_scratch(0);
  5093 + tmp2 = neon_load_reg(rn, pass);
5132 if (op == 12) { 5094 if (op == 12) {
5133 if (size == 1) { 5095 if (size == 1) {
5134 - gen_helper_neon_qdmulh_s16(CPU_T0E01); 5096 + gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2);
5135 } else { 5097 } else {
5136 - gen_helper_neon_qdmulh_s32(CPU_T0E01); 5098 + gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
5137 } 5099 }
5138 } else if (op == 13) { 5100 } else if (op == 13) {
5139 if (size == 1) { 5101 if (size == 1) {
5140 - gen_helper_neon_qrdmulh_s16(CPU_T0E01); 5102 + gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
5141 } else { 5103 } else {
5142 - gen_helper_neon_qrdmulh_s32(CPU_T0E01); 5104 + gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
5143 } 5105 }
5144 } else if (op & 1) { 5106 } else if (op & 1) {
5145 - gen_helper_neon_mul_f32(CPU_T001); 5107 + gen_helper_neon_mul_f32(tmp, tmp, tmp2);
5146 } else { 5108 } else {
5147 switch (size) { 5109 switch (size) {
5148 - case 0: gen_helper_neon_mul_u8(CPU_T001); break;  
5149 - case 1: gen_helper_neon_mul_u16(CPU_T001); break;  
5150 - case 2: gen_op_mul_T0_T1(); break; 5110 + case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
  5111 + case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
  5112 + case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
5151 default: return 1; 5113 default: return 1;
5152 } 5114 }
5153 } 5115 }
  5116 + dead_tmp(tmp2);
5154 if (op < 8) { 5117 if (op < 8) {
5155 /* Accumulate. */ 5118 /* Accumulate. */
5156 - NEON_GET_REG(T1, rd, pass); 5119 + tmp2 = neon_load_reg(rd, pass);
5157 switch (op) { 5120 switch (op) {
5158 case 0: 5121 case 0:
5159 - gen_neon_add(size); 5122 + gen_neon_add(size, tmp, tmp2);
5160 break; 5123 break;
5161 case 1: 5124 case 1:
5162 - gen_helper_neon_add_f32(CPU_T001); 5125 + gen_helper_neon_add_f32(tmp, tmp, tmp2);
5163 break; 5126 break;
5164 case 4: 5127 case 4:
5165 - gen_neon_rsb(size); 5128 + gen_neon_rsb(size, tmp, tmp2);
5166 break; 5129 break;
5167 case 5: 5130 case 5:
5168 - gen_helper_neon_sub_f32(cpu_T[0], cpu_T[1], cpu_T[0]); 5131 + gen_helper_neon_sub_f32(tmp, tmp2, tmp);
5169 break; 5132 break;
5170 default: 5133 default:
5171 abort(); 5134 abort();
5172 } 5135 }
  5136 + dead_tmp(tmp2);
5173 } 5137 }
5174 - NEON_SET_REG(T0, rd, pass); 5138 + neon_store_reg(rd, pass, tmp);
5175 } 5139 }
5176 break; 5140 break;
5177 case 2: /* VMLAL sclar */ 5141 case 2: /* VMLAL sclar */
@@ -5183,19 +5147,17 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) @@ -5183,19 +5147,17 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
5183 if (size == 0 && (op == 3 || op == 7 || op == 11)) 5147 if (size == 0 && (op == 3 || op == 7 || op == 11))
5184 return 1; 5148 return 1;
5185 5149
5186 - gen_neon_get_scalar(size, rm);  
5187 - NEON_GET_REG(T1, rn, 1); 5150 + tmp2 = neon_get_scalar(size, rm);
  5151 + tmp3 = neon_load_reg(rn, 1);
5188 5152
5189 for (pass = 0; pass < 2; pass++) { 5153 for (pass = 0; pass < 2; pass++) {
5190 if (pass == 0) { 5154 if (pass == 0) {
5191 tmp = neon_load_reg(rn, 0); 5155 tmp = neon_load_reg(rn, 0);
5192 } else { 5156 } else {
5193 - tmp = new_tmp();  
5194 - tcg_gen_mov_i32(tmp, cpu_T[1]); 5157 + tmp = tmp3;
5195 } 5158 }
5196 - tmp2 = new_tmp();  
5197 - tcg_gen_mov_i32(tmp2, cpu_T[0]);  
5198 gen_neon_mull(cpu_V0, tmp, tmp2, size, u); 5159 gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
  5160 + dead_tmp(tmp);
5199 if (op == 6 || op == 7) { 5161 if (op == 6 || op == 7) {
5200 gen_neon_negl(cpu_V0, size); 5162 gen_neon_negl(cpu_V0, size);
5201 } 5163 }
@@ -5221,6 +5183,9 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) @@ -5221,6 +5183,9 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
5221 } 5183 }
5222 neon_store_reg64(cpu_V0, rd + pass); 5184 neon_store_reg64(cpu_V0, rd + pass);
5223 } 5185 }
  5186 +
  5187 + dead_tmp(tmp2);
  5188 +
5224 break; 5189 break;
5225 default: /* 14 and 15 are RESERVED */ 5190 default: /* 14 and 15 are RESERVED */
5226 return 1; 5191 return 1;
@@ -5287,25 +5252,24 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) @@ -5287,25 +5252,24 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
5287 if (size == 3) 5252 if (size == 3)
5288 return 1; 5253 return 1;
5289 for (pass = 0; pass < (q ? 2 : 1); pass++) { 5254 for (pass = 0; pass < (q ? 2 : 1); pass++) {
5290 - NEON_GET_REG(T0, rm, pass * 2);  
5291 - NEON_GET_REG(T1, rm, pass * 2 + 1); 5255 + tmp = neon_load_reg(rm, pass * 2);
  5256 + tmp2 = neon_load_reg(rm, pass * 2 + 1);
5292 switch (size) { 5257 switch (size) {
5293 - case 0: tcg_gen_bswap32_i32(cpu_T[0], cpu_T[0]); break;  
5294 - case 1: gen_swap_half(cpu_T[0]); break; 5258 + case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
  5259 + case 1: gen_swap_half(tmp); break;
5295 case 2: /* no-op */ break; 5260 case 2: /* no-op */ break;
5296 default: abort(); 5261 default: abort();
5297 } 5262 }
5298 - NEON_SET_REG(T0, rd, pass * 2 + 1); 5263 + neon_store_reg(rd, pass * 2 + 1, tmp);
5299 if (size == 2) { 5264 if (size == 2) {
5300 - NEON_SET_REG(T1, rd, pass * 2); 5265 + neon_store_reg(rd, pass * 2, tmp2);
5301 } else { 5266 } else {
5302 - gen_op_movl_T0_T1();  
5303 switch (size) { 5267 switch (size) {
5304 - case 0: tcg_gen_bswap32_i32(cpu_T[0], cpu_T[0]); break;  
5305 - case 1: gen_swap_half(cpu_T[0]); break; 5268 + case 0: tcg_gen_bswap32_i32(tmp2, tmp2); break;
  5269 + case 1: gen_swap_half(tmp2); break;
5306 default: abort(); 5270 default: abort();
5307 } 5271 }
5308 - NEON_SET_REG(T0, rd, pass * 2); 5272 + neon_store_reg(rd, pass * 2, tmp2);
5309 } 5273 }
5310 } 5274 }
5311 break; 5275 break;
@@ -5335,10 +5299,10 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) @@ -5335,10 +5299,10 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
5335 case 33: /* VTRN */ 5299 case 33: /* VTRN */
5336 if (size == 2) { 5300 if (size == 2) {
5337 for (n = 0; n < (q ? 4 : 2); n += 2) { 5301 for (n = 0; n < (q ? 4 : 2); n += 2) {
5338 - NEON_GET_REG(T0, rm, n);  
5339 - NEON_GET_REG(T1, rd, n + 1);  
5340 - NEON_SET_REG(T1, rm, n);  
5341 - NEON_SET_REG(T0, rd, n + 1); 5302 + tmp = neon_load_reg(rm, n);
  5303 + tmp2 = neon_load_reg(rd, n + 1);
  5304 + neon_store_reg(rm, n, tmp2);
  5305 + neon_store_reg(rd, n + 1, tmp);
5342 } 5306 }
5343 } else { 5307 } else {
5344 goto elementwise; 5308 goto elementwise;
@@ -5358,16 +5322,16 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) @@ -5358,16 +5322,16 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
5358 {0, 2, 4, 6, 1, 3, 5, 7}; 5322 {0, 2, 4, 6, 1, 3, 5, 7};
5359 for (n = 0; n < 8; n++) { 5323 for (n = 0; n < 8; n++) {
5360 int reg = (n < 4) ? rd : rm; 5324 int reg = (n < 4) ? rd : rm;
5361 - gen_neon_movl_T0_scratch(unzip_order_q[n]);  
5362 - NEON_SET_REG(T0, reg, n % 4); 5325 + tmp = neon_load_scratch(unzip_order_q[n]);
  5326 + neon_store_reg(reg, n % 4, tmp);
5363 } 5327 }
5364 } else { 5328 } else {
5365 static int unzip_order[4] = 5329 static int unzip_order[4] =
5366 {0, 4, 1, 5}; 5330 {0, 4, 1, 5};
5367 for (n = 0; n < 4; n++) { 5331 for (n = 0; n < 4; n++) {
5368 int reg = (n < 2) ? rd : rm; 5332 int reg = (n < 2) ? rd : rm;
5369 - gen_neon_movl_T0_scratch(unzip_order[n]);  
5370 - NEON_SET_REG(T0, reg, n % 2); 5333 + tmp = neon_load_scratch(unzip_order[n]);
  5334 + neon_store_reg(reg, n % 2, tmp);
5371 } 5335 }
5372 } 5336 }
5373 break; 5337 break;
@@ -5380,21 +5344,21 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) @@ -5380,21 +5344,21 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
5380 return 1; 5344 return 1;
5381 count = (q ? 4 : 2); 5345 count = (q ? 4 : 2);
5382 for (n = 0; n < count; n++) { 5346 for (n = 0; n < count; n++) {
5383 - NEON_GET_REG(T0, rd, n);  
5384 - NEON_GET_REG(T1, rd, n); 5347 + tmp = neon_load_reg(rd, n);
  5348 + tmp2 = neon_load_reg(rd, n);
5385 switch (size) { 5349 switch (size) {
5386 - case 0: gen_neon_zip_u8(cpu_T[0], cpu_T[1]); break;  
5387 - case 1: gen_neon_zip_u16(cpu_T[0], cpu_T[1]); break; 5350 + case 0: gen_neon_zip_u8(tmp, tmp2); break;
  5351 + case 1: gen_neon_zip_u16(tmp, tmp2); break;
5388 case 2: /* no-op */; break; 5352 case 2: /* no-op */; break;
5389 default: abort(); 5353 default: abort();
5390 } 5354 }
5391 - gen_neon_movl_scratch_T0(n * 2);  
5392 - gen_neon_movl_scratch_T1(n * 2 + 1); 5355 + neon_store_scratch(n * 2, tmp);
  5356 + neon_store_scratch(n * 2 + 1, tmp2);
5393 } 5357 }
5394 for (n = 0; n < count * 2; n++) { 5358 for (n = 0; n < count * 2; n++) {
5395 int reg = (n < count) ? rd : rm; 5359 int reg = (n < count) ? rd : rm;
5396 - gen_neon_movl_T0_scratch(n);  
5397 - NEON_SET_REG(T0, reg, n % count); 5360 + tmp = neon_load_scratch(n);
  5361 + neon_store_reg(reg, n % count, tmp);
5398 } 5362 }
5399 break; 5363 break;
5400 case 36: case 37: /* VMOVN, VQMOVUN, VQMOVN */ 5364 case 36: case 37: /* VMOVN, VQMOVUN, VQMOVN */
@@ -5437,124 +5401,132 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) @@ -5437,124 +5401,132 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
5437 if (op == 30 || op == 31 || op >= 58) { 5401 if (op == 30 || op == 31 || op >= 58) {
5438 tcg_gen_ld_f32(cpu_F0s, cpu_env, 5402 tcg_gen_ld_f32(cpu_F0s, cpu_env,
5439 neon_reg_offset(rm, pass)); 5403 neon_reg_offset(rm, pass));
  5404 + TCGV_UNUSED(tmp);
5440 } else { 5405 } else {
5441 - NEON_GET_REG(T0, rm, pass); 5406 + tmp = neon_load_reg(rm, pass);
5442 } 5407 }
5443 switch (op) { 5408 switch (op) {
5444 case 1: /* VREV32 */ 5409 case 1: /* VREV32 */
5445 switch (size) { 5410 switch (size) {
5446 - case 0: tcg_gen_bswap32_i32(cpu_T[0], cpu_T[0]); break;  
5447 - case 1: gen_swap_half(cpu_T[0]); break; 5411 + case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
  5412 + case 1: gen_swap_half(tmp); break;
5448 default: return 1; 5413 default: return 1;
5449 } 5414 }
5450 break; 5415 break;
5451 case 2: /* VREV16 */ 5416 case 2: /* VREV16 */
5452 if (size != 0) 5417 if (size != 0)
5453 return 1; 5418 return 1;
5454 - gen_rev16(cpu_T[0]); 5419 + gen_rev16(tmp);
5455 break; 5420 break;
5456 case 8: /* CLS */ 5421 case 8: /* CLS */
5457 switch (size) { 5422 switch (size) {
5458 - case 0: gen_helper_neon_cls_s8(cpu_T[0], cpu_T[0]); break;  
5459 - case 1: gen_helper_neon_cls_s16(cpu_T[0], cpu_T[0]); break;  
5460 - case 2: gen_helper_neon_cls_s32(cpu_T[0], cpu_T[0]); break; 5423 + case 0: gen_helper_neon_cls_s8(tmp, tmp); break;
  5424 + case 1: gen_helper_neon_cls_s16(tmp, tmp); break;
  5425 + case 2: gen_helper_neon_cls_s32(tmp, tmp); break;
5461 default: return 1; 5426 default: return 1;
5462 } 5427 }
5463 break; 5428 break;
5464 case 9: /* CLZ */ 5429 case 9: /* CLZ */
5465 switch (size) { 5430 switch (size) {
5466 - case 0: gen_helper_neon_clz_u8(cpu_T[0], cpu_T[0]); break;  
5467 - case 1: gen_helper_neon_clz_u16(cpu_T[0], cpu_T[0]); break;  
5468 - case 2: gen_helper_clz(cpu_T[0], cpu_T[0]); break; 5431 + case 0: gen_helper_neon_clz_u8(tmp, tmp); break;
  5432 + case 1: gen_helper_neon_clz_u16(tmp, tmp); break;
  5433 + case 2: gen_helper_clz(tmp, tmp); break;
5469 default: return 1; 5434 default: return 1;
5470 } 5435 }
5471 break; 5436 break;
5472 case 10: /* CNT */ 5437 case 10: /* CNT */
5473 if (size != 0) 5438 if (size != 0)
5474 return 1; 5439 return 1;
5475 - gen_helper_neon_cnt_u8(cpu_T[0], cpu_T[0]); 5440 + gen_helper_neon_cnt_u8(tmp, tmp);
5476 break; 5441 break;
5477 case 11: /* VNOT */ 5442 case 11: /* VNOT */
5478 if (size != 0) 5443 if (size != 0)
5479 return 1; 5444 return 1;
5480 - gen_op_notl_T0(); 5445 + tcg_gen_not_i32(tmp, tmp);
5481 break; 5446 break;
5482 case 14: /* VQABS */ 5447 case 14: /* VQABS */
5483 switch (size) { 5448 switch (size) {
5484 - case 0: gen_helper_neon_qabs_s8(cpu_T[0], cpu_env, cpu_T[0]); break;  
5485 - case 1: gen_helper_neon_qabs_s16(cpu_T[0], cpu_env, cpu_T[0]); break;  
5486 - case 2: gen_helper_neon_qabs_s32(cpu_T[0], cpu_env, cpu_T[0]); break; 5449 + case 0: gen_helper_neon_qabs_s8(tmp, cpu_env, tmp); break;
  5450 + case 1: gen_helper_neon_qabs_s16(tmp, cpu_env, tmp); break;
  5451 + case 2: gen_helper_neon_qabs_s32(tmp, cpu_env, tmp); break;
5487 default: return 1; 5452 default: return 1;
5488 } 5453 }
5489 break; 5454 break;
5490 case 15: /* VQNEG */ 5455 case 15: /* VQNEG */
5491 switch (size) { 5456 switch (size) {
5492 - case 0: gen_helper_neon_qneg_s8(cpu_T[0], cpu_env, cpu_T[0]); break;  
5493 - case 1: gen_helper_neon_qneg_s16(cpu_T[0], cpu_env, cpu_T[0]); break;  
5494 - case 2: gen_helper_neon_qneg_s32(cpu_T[0], cpu_env, cpu_T[0]); break; 5457 + case 0: gen_helper_neon_qneg_s8(tmp, cpu_env, tmp); break;
  5458 + case 1: gen_helper_neon_qneg_s16(tmp, cpu_env, tmp); break;
  5459 + case 2: gen_helper_neon_qneg_s32(tmp, cpu_env, tmp); break;
5495 default: return 1; 5460 default: return 1;
5496 } 5461 }
5497 break; 5462 break;
5498 case 16: case 19: /* VCGT #0, VCLE #0 */ 5463 case 16: case 19: /* VCGT #0, VCLE #0 */
5499 - gen_op_movl_T1_im(0); 5464 + tmp2 = tcg_const_i32(0);
5500 switch(size) { 5465 switch(size) {
5501 - case 0: gen_helper_neon_cgt_s8(CPU_T001); break;  
5502 - case 1: gen_helper_neon_cgt_s16(CPU_T001); break;  
5503 - case 2: gen_helper_neon_cgt_s32(CPU_T001); break; 5466 + case 0: gen_helper_neon_cgt_s8(tmp, tmp, tmp2); break;
  5467 + case 1: gen_helper_neon_cgt_s16(tmp, tmp, tmp2); break;
  5468 + case 2: gen_helper_neon_cgt_s32(tmp, tmp, tmp2); break;
5504 default: return 1; 5469 default: return 1;
5505 } 5470 }
  5471 + tcg_temp_free(tmp2);
5506 if (op == 19) 5472 if (op == 19)
5507 - gen_op_notl_T0(); 5473 + tcg_gen_not_i32(tmp, tmp);
5508 break; 5474 break;
5509 case 17: case 20: /* VCGE #0, VCLT #0 */ 5475 case 17: case 20: /* VCGE #0, VCLT #0 */
5510 - gen_op_movl_T1_im(0); 5476 + tmp2 = tcg_const_i32(0);
5511 switch(size) { 5477 switch(size) {
5512 - case 0: gen_helper_neon_cge_s8(CPU_T001); break;  
5513 - case 1: gen_helper_neon_cge_s16(CPU_T001); break;  
5514 - case 2: gen_helper_neon_cge_s32(CPU_T001); break; 5478 + case 0: gen_helper_neon_cge_s8(tmp, tmp, tmp2); break;
  5479 + case 1: gen_helper_neon_cge_s16(tmp, tmp, tmp2); break;
  5480 + case 2: gen_helper_neon_cge_s32(tmp, tmp, tmp2); break;
5515 default: return 1; 5481 default: return 1;
5516 } 5482 }
  5483 + tcg_temp_free(tmp2);
5517 if (op == 20) 5484 if (op == 20)
5518 - gen_op_notl_T0(); 5485 + tcg_gen_not_i32(tmp, tmp);
5519 break; 5486 break;
5520 case 18: /* VCEQ #0 */ 5487 case 18: /* VCEQ #0 */
5521 - gen_op_movl_T1_im(0); 5488 + tmp2 = tcg_const_i32(0);
5522 switch(size) { 5489 switch(size) {
5523 - case 0: gen_helper_neon_ceq_u8(CPU_T001); break;  
5524 - case 1: gen_helper_neon_ceq_u16(CPU_T001); break;  
5525 - case 2: gen_helper_neon_ceq_u32(CPU_T001); break; 5490 + case 0: gen_helper_neon_ceq_u8(tmp, tmp, tmp2); break;
  5491 + case 1: gen_helper_neon_ceq_u16(tmp, tmp, tmp2); break;
  5492 + case 2: gen_helper_neon_ceq_u32(tmp, tmp, tmp2); break;
5526 default: return 1; 5493 default: return 1;
5527 } 5494 }
  5495 + tcg_temp_free(tmp2);
5528 break; 5496 break;
5529 case 22: /* VABS */ 5497 case 22: /* VABS */
5530 switch(size) { 5498 switch(size) {
5531 - case 0: gen_helper_neon_abs_s8(cpu_T[0], cpu_T[0]); break;  
5532 - case 1: gen_helper_neon_abs_s16(cpu_T[0], cpu_T[0]); break;  
5533 - case 2: tcg_gen_abs_i32(cpu_T[0], cpu_T[0]); break; 5499 + case 0: gen_helper_neon_abs_s8(tmp, tmp); break;
  5500 + case 1: gen_helper_neon_abs_s16(tmp, tmp); break;
  5501 + case 2: tcg_gen_abs_i32(tmp, tmp); break;
5534 default: return 1; 5502 default: return 1;
5535 } 5503 }
5536 break; 5504 break;
5537 case 23: /* VNEG */ 5505 case 23: /* VNEG */
5538 - gen_op_movl_T1_im(0);  
5539 if (size == 3) 5506 if (size == 3)
5540 return 1; 5507 return 1;
5541 - gen_neon_rsb(size); 5508 + tmp2 = tcg_const_i32(0);
  5509 + gen_neon_rsb(size, tmp, tmp2);
  5510 + tcg_temp_free(tmp2);
5542 break; 5511 break;
5543 case 24: case 27: /* Float VCGT #0, Float VCLE #0 */ 5512 case 24: case 27: /* Float VCGT #0, Float VCLE #0 */
5544 - gen_op_movl_T1_im(0);  
5545 - gen_helper_neon_cgt_f32(CPU_T001); 5513 + tmp2 = tcg_const_i32(0);
  5514 + gen_helper_neon_cgt_f32(tmp, tmp, tmp2);
  5515 + tcg_temp_free(tmp2);
5546 if (op == 27) 5516 if (op == 27)
5547 - gen_op_notl_T0(); 5517 + tcg_gen_not_i32(tmp, tmp);
5548 break; 5518 break;
5549 case 25: case 28: /* Float VCGE #0, Float VCLT #0 */ 5519 case 25: case 28: /* Float VCGE #0, Float VCLT #0 */
5550 - gen_op_movl_T1_im(0);  
5551 - gen_helper_neon_cge_f32(CPU_T001); 5520 + tmp2 = tcg_const_i32(0);
  5521 + gen_helper_neon_cge_f32(tmp, tmp, tmp2);
  5522 + tcg_temp_free(tmp2);
5552 if (op == 28) 5523 if (op == 28)
5553 - gen_op_notl_T0(); 5524 + tcg_gen_not_i32(tmp, tmp);
5554 break; 5525 break;
5555 case 26: /* Float VCEQ #0 */ 5526 case 26: /* Float VCEQ #0 */
5556 - gen_op_movl_T1_im(0);  
5557 - gen_helper_neon_ceq_f32(CPU_T001); 5527 + tmp2 = tcg_const_i32(0);
  5528 + gen_helper_neon_ceq_f32(tmp, tmp, tmp2);
  5529 + tcg_temp_free(tmp2);
5558 break; 5530 break;
5559 case 30: /* Float VABS */ 5531 case 30: /* Float VABS */
5560 gen_vfp_abs(0); 5532 gen_vfp_abs(0);
@@ -5563,24 +5535,24 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) @@ -5563,24 +5535,24 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
5563 gen_vfp_neg(0); 5535 gen_vfp_neg(0);
5564 break; 5536 break;
5565 case 32: /* VSWP */ 5537 case 32: /* VSWP */
5566 - NEON_GET_REG(T1, rd, pass);  
5567 - NEON_SET_REG(T1, rm, pass); 5538 + tmp2 = neon_load_reg(rd, pass);
  5539 + neon_store_reg(rm, pass, tmp2);
5568 break; 5540 break;
5569 case 33: /* VTRN */ 5541 case 33: /* VTRN */
5570 - NEON_GET_REG(T1, rd, pass); 5542 + tmp2 = neon_load_reg(rd, pass);
5571 switch (size) { 5543 switch (size) {
5572 - case 0: gen_neon_trn_u8(cpu_T[0], cpu_T[1]); break;  
5573 - case 1: gen_neon_trn_u16(cpu_T[0], cpu_T[1]); break; 5544 + case 0: gen_neon_trn_u8(tmp, tmp2); break;
  5545 + case 1: gen_neon_trn_u16(tmp, tmp2); break;
5574 case 2: abort(); 5546 case 2: abort();
5575 default: return 1; 5547 default: return 1;
5576 } 5548 }
5577 - NEON_SET_REG(T1, rm, pass); 5549 + neon_store_reg(rm, pass, tmp2);
5578 break; 5550 break;
5579 case 56: /* Integer VRECPE */ 5551 case 56: /* Integer VRECPE */
5580 - gen_helper_recpe_u32(cpu_T[0], cpu_T[0], cpu_env); 5552 + gen_helper_recpe_u32(tmp, tmp, cpu_env);
5581 break; 5553 break;
5582 case 57: /* Integer VRSQRTE */ 5554 case 57: /* Integer VRSQRTE */
5583 - gen_helper_rsqrte_u32(cpu_T[0], cpu_T[0], cpu_env); 5555 + gen_helper_rsqrte_u32(tmp, tmp, cpu_env);
5584 break; 5556 break;
5585 case 58: /* Float VRECPE */ 5557 case 58: /* Float VRECPE */
5586 gen_helper_recpe_f32(cpu_F0s, cpu_F0s, cpu_env); 5558 gen_helper_recpe_f32(cpu_F0s, cpu_F0s, cpu_env);
@@ -5608,7 +5580,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) @@ -5608,7 +5580,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
5608 tcg_gen_st_f32(cpu_F0s, cpu_env, 5580 tcg_gen_st_f32(cpu_F0s, cpu_env,
5609 neon_reg_offset(rd, pass)); 5581 neon_reg_offset(rd, pass));
5610 } else { 5582 } else {
5611 - NEON_SET_REG(T0, rd, pass); 5583 + neon_store_reg(rd, pass, tmp);
5612 } 5584 }
5613 } 5585 }
5614 break; 5586 break;
@@ -5641,21 +5613,24 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) @@ -5641,21 +5613,24 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
5641 } else if ((insn & 0x380) == 0) { 5613 } else if ((insn & 0x380) == 0) {
5642 /* VDUP */ 5614 /* VDUP */
5643 if (insn & (1 << 19)) { 5615 if (insn & (1 << 19)) {
5644 - NEON_SET_REG(T0, rm, 1); 5616 + tmp = neon_load_reg(rm, 1);
5645 } else { 5617 } else {
5646 - NEON_SET_REG(T0, rm, 0); 5618 + tmp = neon_load_reg(rm, 0);
5647 } 5619 }
5648 if (insn & (1 << 16)) { 5620 if (insn & (1 << 16)) {
5649 - gen_neon_dup_u8(cpu_T[0], ((insn >> 17) & 3) * 8); 5621 + gen_neon_dup_u8(tmp, ((insn >> 17) & 3) * 8);
5650 } else if (insn & (1 << 17)) { 5622 } else if (insn & (1 << 17)) {
5651 if ((insn >> 18) & 1) 5623 if ((insn >> 18) & 1)
5652 - gen_neon_dup_high16(cpu_T[0]); 5624 + gen_neon_dup_high16(tmp);
5653 else 5625 else
5654 - gen_neon_dup_low16(cpu_T[0]); 5626 + gen_neon_dup_low16(tmp);
5655 } 5627 }
5656 for (pass = 0; pass < (q ? 4 : 2); pass++) { 5628 for (pass = 0; pass < (q ? 4 : 2); pass++) {
5657 - NEON_SET_REG(T0, rd, pass); 5629 + tmp2 = new_tmp();
  5630 + tcg_gen_mov_i32(tmp2, tmp);
  5631 + neon_store_reg(rd, pass, tmp2);
5658 } 5632 }
  5633 + dead_tmp(tmp);
5659 } else { 5634 } else {
5660 return 1; 5635 return 1;
5661 } 5636 }