Commit fb951ff5c0b598f7a4dff0fd59d5167345c3337a

Authored by Filip Navara
1 parent dc485d28

Convert disas_neon_data_insn and helpers not to use cpu_T.

Signed-off-by: Filip Navara <filip.navara@gmail.com>
Showing 1 changed file with 273 additions and 298 deletions
target-arm/translate.c
... ... @@ -188,13 +188,9 @@ static void store_reg(DisasContext *s, int reg, TCGv var)
188 188  
189 189 #define gen_op_addl_T1_im(im) tcg_gen_addi_i32(cpu_T[1], cpu_T[1], im)
190 190 #define gen_op_addl_T0_T1() tcg_gen_add_i32(cpu_T[0], cpu_T[0], cpu_T[1])
191   -#define gen_op_subl_T0_T1() tcg_gen_sub_i32(cpu_T[0], cpu_T[0], cpu_T[1])
192   -#define gen_op_rsbl_T0_T1() tcg_gen_sub_i32(cpu_T[0], cpu_T[1], cpu_T[0])
193 191  
194 192 #define gen_op_andl_T0_T1() tcg_gen_and_i32(cpu_T[0], cpu_T[0], cpu_T[1])
195   -#define gen_op_xorl_T0_T1() tcg_gen_xor_i32(cpu_T[0], cpu_T[0], cpu_T[1])
196 193 #define gen_op_orl_T0_T1() tcg_gen_or_i32(cpu_T[0], cpu_T[0], cpu_T[1])
197   -#define gen_op_notl_T0() tcg_gen_not_i32(cpu_T[0], cpu_T[0])
198 194 #define gen_op_notl_T1() tcg_gen_not_i32(cpu_T[1], cpu_T[1])
199 195  
200 196 #define gen_op_shll_T1_im(im) tcg_gen_shli_i32(cpu_T[1], cpu_T[1], im)
... ... @@ -209,7 +205,6 @@ static void store_reg(DisasContext *s, int reg, TCGv var)
209 205 #define gen_sxtb16(var) gen_helper_sxtb16(var, var)
210 206 #define gen_uxtb16(var) gen_helper_uxtb16(var, var)
211 207  
212   -#define gen_op_mul_T0_T1() tcg_gen_mul_i32(cpu_T[0], cpu_T[0], cpu_T[1])
213 208  
214 209 #define gen_set_cpsr(var, mask) gen_helper_cpsr_write(var, tcg_const_i32(mask))
215 210 /* Set NZCV flags from the high 4 bits of var. */
... ... @@ -1118,14 +1113,6 @@ neon_reg_offset (int reg, int n)
1118 1113 return vfp_reg_offset(0, sreg);
1119 1114 }
1120 1115  
1121   -/* FIXME: Remove these. */
1122   -#define neon_T0 cpu_T[0]
1123   -#define neon_T1 cpu_T[1]
1124   -#define NEON_GET_REG(T, reg, n) \
1125   - tcg_gen_ld_i32(neon_##T, cpu_env, neon_reg_offset(reg, n))
1126   -#define NEON_SET_REG(T, reg, n) \
1127   - tcg_gen_st_i32(neon_##T, cpu_env, neon_reg_offset(reg, n))
1128   -
1129 1116 static TCGv neon_load_reg(int reg, int pass)
1130 1117 {
1131 1118 TCGv tmp = new_tmp();
... ... @@ -3485,31 +3472,25 @@ static void gen_nop_hint(DisasContext *s, int val)
3485 3472 }
3486 3473 }
3487 3474  
3488   -/* These macros help make the code more readable when migrating from the
3489   - old dyngen helpers. They should probably be removed when
3490   - T0/T1 are removed. */
3491   -#define CPU_T001 cpu_T[0], cpu_T[0], cpu_T[1]
3492   -#define CPU_T0E01 cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]
3493   -
3494 3475 #define CPU_V001 cpu_V0, cpu_V0, cpu_V1
3495 3476  
3496   -static inline int gen_neon_add(int size)
  3477 +static inline int gen_neon_add(int size, TCGv t0, TCGv t1)
3497 3478 {
3498 3479 switch (size) {
3499   - case 0: gen_helper_neon_add_u8(CPU_T001); break;
3500   - case 1: gen_helper_neon_add_u16(CPU_T001); break;
3501   - case 2: gen_op_addl_T0_T1(); break;
  3480 + case 0: gen_helper_neon_add_u8(t0, t0, t1); break;
  3481 + case 1: gen_helper_neon_add_u16(t0, t0, t1); break;
  3482 + case 2: tcg_gen_add_i32(t0, t0, t1); break;
3502 3483 default: return 1;
3503 3484 }
3504 3485 return 0;
3505 3486 }
3506 3487  
3507   -static inline void gen_neon_rsb(int size)
  3488 +static inline void gen_neon_rsb(int size, TCGv t0, TCGv t1)
3508 3489 {
3509 3490 switch (size) {
3510   - case 0: gen_helper_neon_sub_u8(cpu_T[0], cpu_T[1], cpu_T[0]); break;
3511   - case 1: gen_helper_neon_sub_u16(cpu_T[0], cpu_T[1], cpu_T[0]); break;
3512   - case 2: gen_op_rsbl_T0_T1(); break;
  3491 + case 0: gen_helper_neon_sub_u8(t0, t1, t0); break;
  3492 + case 1: gen_helper_neon_sub_u16(t0, t1, t0); break;
  3493 + case 2: tcg_gen_sub_i32(t0, t1, t0); break;
3513 3494 default: return;
3514 3495 }
3515 3496 }
... ... @@ -3529,22 +3510,22 @@ static inline void gen_neon_rsb(int size)
3529 3510 #define GEN_NEON_INTEGER_OP_ENV(name) do { \
3530 3511 switch ((size << 1) | u) { \
3531 3512 case 0: \
3532   - gen_helper_neon_##name##_s8(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); \
  3513 + gen_helper_neon_##name##_s8(tmp, cpu_env, tmp, tmp2); \
3533 3514 break; \
3534 3515 case 1: \
3535   - gen_helper_neon_##name##_u8(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); \
  3516 + gen_helper_neon_##name##_u8(tmp, cpu_env, tmp, tmp2); \
3536 3517 break; \
3537 3518 case 2: \
3538   - gen_helper_neon_##name##_s16(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); \
  3519 + gen_helper_neon_##name##_s16(tmp, cpu_env, tmp, tmp2); \
3539 3520 break; \
3540 3521 case 3: \
3541   - gen_helper_neon_##name##_u16(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); \
  3522 + gen_helper_neon_##name##_u16(tmp, cpu_env, tmp, tmp2); \
3542 3523 break; \
3543 3524 case 4: \
3544   - gen_helper_neon_##name##_s32(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); \
  3525 + gen_helper_neon_##name##_s32(tmp, cpu_env, tmp, tmp2); \
3545 3526 break; \
3546 3527 case 5: \
3547   - gen_helper_neon_##name##_u32(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); \
  3528 + gen_helper_neon_##name##_u32(tmp, cpu_env, tmp, tmp2); \
3548 3529 break; \
3549 3530 default: return 1; \
3550 3531 }} while (0)
... ... @@ -3552,73 +3533,53 @@ static inline void gen_neon_rsb(int size)
3552 3533 #define GEN_NEON_INTEGER_OP(name) do { \
3553 3534 switch ((size << 1) | u) { \
3554 3535 case 0: \
3555   - gen_helper_neon_##name##_s8(cpu_T[0], cpu_T[0], cpu_T[1]); \
  3536 + gen_helper_neon_##name##_s8(tmp, tmp, tmp2); \
3556 3537 break; \
3557 3538 case 1: \
3558   - gen_helper_neon_##name##_u8(cpu_T[0], cpu_T[0], cpu_T[1]); \
  3539 + gen_helper_neon_##name##_u8(tmp, tmp, tmp2); \
3559 3540 break; \
3560 3541 case 2: \
3561   - gen_helper_neon_##name##_s16(cpu_T[0], cpu_T[0], cpu_T[1]); \
  3542 + gen_helper_neon_##name##_s16(tmp, tmp, tmp2); \
3562 3543 break; \
3563 3544 case 3: \
3564   - gen_helper_neon_##name##_u16(cpu_T[0], cpu_T[0], cpu_T[1]); \
  3545 + gen_helper_neon_##name##_u16(tmp, tmp, tmp2); \
3565 3546 break; \
3566 3547 case 4: \
3567   - gen_helper_neon_##name##_s32(cpu_T[0], cpu_T[0], cpu_T[1]); \
  3548 + gen_helper_neon_##name##_s32(tmp, tmp, tmp2); \
3568 3549 break; \
3569 3550 case 5: \
3570   - gen_helper_neon_##name##_u32(cpu_T[0], cpu_T[0], cpu_T[1]); \
  3551 + gen_helper_neon_##name##_u32(tmp, tmp, tmp2); \
3571 3552 break; \
3572 3553 default: return 1; \
3573 3554 }} while (0)
3574 3555  
3575   -static inline void
3576   -gen_neon_movl_scratch_T0(int scratch)
  3556 +static TCGv neon_load_scratch(int scratch)
3577 3557 {
3578   - uint32_t offset;
3579   -
3580   - offset = offsetof(CPUARMState, vfp.scratch[scratch]);
3581   - tcg_gen_st_i32(cpu_T[0], cpu_env, offset);
3582   -}
3583   -
3584   -static inline void
3585   -gen_neon_movl_scratch_T1(int scratch)
3586   -{
3587   - uint32_t offset;
3588   -
3589   - offset = offsetof(CPUARMState, vfp.scratch[scratch]);
3590   - tcg_gen_st_i32(cpu_T[1], cpu_env, offset);
3591   -}
3592   -
3593   -static inline void
3594   -gen_neon_movl_T0_scratch(int scratch)
3595   -{
3596   - uint32_t offset;
3597   -
3598   - offset = offsetof(CPUARMState, vfp.scratch[scratch]);
3599   - tcg_gen_ld_i32(cpu_T[0], cpu_env, offset);
  3558 + TCGv tmp = new_tmp();
  3559 + tcg_gen_ld_i32(tmp, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
  3560 + return tmp;
3600 3561 }
3601   -
3602   -static inline void
3603   -gen_neon_movl_T1_scratch(int scratch)
  3562 +
  3563 +static void neon_store_scratch(int scratch, TCGv var)
3604 3564 {
3605   - uint32_t offset;
3606   -
3607   - offset = offsetof(CPUARMState, vfp.scratch[scratch]);
3608   - tcg_gen_ld_i32(cpu_T[1], cpu_env, offset);
  3565 + tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
  3566 + dead_tmp(var);
3609 3567 }
3610   -
3611   -static inline void gen_neon_get_scalar(int size, int reg)
  3568 +
  3569 +static inline TCGv neon_get_scalar(int size, int reg)
3612 3570 {
  3571 + TCGv tmp;
3613 3572 if (size == 1) {
3614   - NEON_GET_REG(T0, reg >> 1, reg & 1);
  3573 + tmp = neon_load_reg(reg >> 1, reg & 1);
3615 3574 } else {
3616   - NEON_GET_REG(T0, reg >> 2, (reg >> 1) & 1);
3617   - if (reg & 1)
3618   - gen_neon_dup_low16(cpu_T[0]);
3619   - else
3620   - gen_neon_dup_high16(cpu_T[0]);
  3575 + tmp = neon_load_reg(reg >> 2, (reg >> 1) & 1);
  3576 + if (reg & 1) {
  3577 + gen_neon_dup_low16(tmp);
  3578 + } else {
  3579 + gen_neon_dup_high16(tmp);
  3580 + }
3621 3581 }
  3582 + return tmp;
3622 3583 }
3623 3584  
3624 3585 static void gen_neon_unzip_u8(TCGv t0, TCGv t1)
... ... @@ -3715,18 +3676,19 @@ static void gen_neon_zip_u16(TCGv t0, TCGv t1)
3715 3676 static void gen_neon_unzip(int reg, int q, int tmp, int size)
3716 3677 {
3717 3678 int n;
  3679 + TCGv t0, t1;
3718 3680  
3719 3681 for (n = 0; n < q + 1; n += 2) {
3720   - NEON_GET_REG(T0, reg, n);
3721   - NEON_GET_REG(T1, reg, n + 1);
  3682 + t0 = neon_load_reg(reg, n);
  3683 + t1 = neon_load_reg(reg, n + 1);
3722 3684 switch (size) {
3723   - case 0: gen_neon_unzip_u8(cpu_T[0], cpu_T[1]); break;
3724   - case 1: gen_neon_zip_u16(cpu_T[0], cpu_T[1]); break; /* zip and unzip are the same. */
  3685 + case 0: gen_neon_unzip_u8(t0, t1); break;
  3686 + case 1: gen_neon_zip_u16(t0, t1); break; /* zip and unzip are the same. */
3725 3687 case 2: /* no-op */; break;
3726 3688 default: abort();
3727 3689 }
3728   - gen_neon_movl_T0_scratch(tmp + n);
3729   - gen_neon_movl_T1_scratch(tmp + n + 1);
  3690 + neon_store_scratch(tmp + n, t0);
  3691 + neon_store_scratch(tmp + n + 1, t1);
3730 3692 }
3731 3693 }
3732 3694  
... ... @@ -4160,10 +4122,6 @@ static inline void gen_neon_mull(TCGv_i64 dest, TCGv a, TCGv b, int size, int u)
4160 4122 break;
4161 4123 default: abort();
4162 4124 }
4163   - if (size < 2) {
4164   - dead_tmp(b);
4165   - dead_tmp(a);
4166   - }
4167 4125 }
4168 4126  
4169 4127 /* Translate a NEON data processing instruction. Return nonzero if the
... ... @@ -4296,6 +4254,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
4296 4254 pairwise = 0;
4297 4255 break;
4298 4256 }
  4257 +
4299 4258 for (pass = 0; pass < (q ? 4 : 2); pass++) {
4300 4259  
4301 4260 if (pairwise) {
... ... @@ -4305,16 +4264,16 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
4305 4264 else
4306 4265 n = 0;
4307 4266 if (pass < q + 1) {
4308   - NEON_GET_REG(T0, rn, n);
4309   - NEON_GET_REG(T1, rn, n + 1);
  4267 + tmp = neon_load_reg(rn, n);
  4268 + tmp2 = neon_load_reg(rn, n + 1);
4310 4269 } else {
4311   - NEON_GET_REG(T0, rm, n);
4312   - NEON_GET_REG(T1, rm, n + 1);
  4270 + tmp = neon_load_reg(rm, n);
  4271 + tmp2 = neon_load_reg(rm, n + 1);
4313 4272 }
4314 4273 } else {
4315 4274 /* Elementwise. */
4316   - NEON_GET_REG(T0, rn, pass);
4317   - NEON_GET_REG(T1, rm, pass);
  4275 + tmp = neon_load_reg(rn, pass);
  4276 + tmp2 = neon_load_reg(rm, pass);
4318 4277 }
4319 4278 switch (op) {
4320 4279 case 0: /* VHADD */
... ... @@ -4329,35 +4288,35 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
4329 4288 case 3: /* Logic ops. */
4330 4289 switch ((u << 2) | size) {
4331 4290 case 0: /* VAND */
4332   - gen_op_andl_T0_T1();
  4291 + tcg_gen_and_i32(tmp, tmp, tmp2);
4333 4292 break;
4334 4293 case 1: /* BIC */
4335   - gen_op_bicl_T0_T1();
  4294 + tcg_gen_bic_i32(tmp, tmp, tmp2);
4336 4295 break;
4337 4296 case 2: /* VORR */
4338   - gen_op_orl_T0_T1();
  4297 + tcg_gen_or_i32(tmp, tmp, tmp2);
4339 4298 break;
4340 4299 case 3: /* VORN */
4341   - gen_op_notl_T1();
4342   - gen_op_orl_T0_T1();
  4300 + tcg_gen_not_i32(tmp2, tmp2);
  4301 + tcg_gen_or_i32(tmp, tmp, tmp2);
4343 4302 break;
4344 4303 case 4: /* VEOR */
4345   - gen_op_xorl_T0_T1();
  4304 + tcg_gen_xor_i32(tmp, tmp, tmp2);
4346 4305 break;
4347 4306 case 5: /* VBSL */
4348   - tmp = neon_load_reg(rd, pass);
4349   - gen_neon_bsl(cpu_T[0], cpu_T[0], cpu_T[1], tmp);
4350   - dead_tmp(tmp);
  4307 + tmp3 = neon_load_reg(rd, pass);
  4308 + gen_neon_bsl(tmp, tmp, tmp2, tmp3);
  4309 + dead_tmp(tmp3);
4351 4310 break;
4352 4311 case 6: /* VBIT */
4353   - tmp = neon_load_reg(rd, pass);
4354   - gen_neon_bsl(cpu_T[0], cpu_T[0], tmp, cpu_T[1]);
4355   - dead_tmp(tmp);
  4312 + tmp3 = neon_load_reg(rd, pass);
  4313 + gen_neon_bsl(tmp, tmp, tmp3, tmp2);
  4314 + dead_tmp(tmp3);
4356 4315 break;
4357 4316 case 7: /* VBIF */
4358   - tmp = neon_load_reg(rd, pass);
4359   - gen_neon_bsl(cpu_T[0], tmp, cpu_T[0], cpu_T[1]);
4360   - dead_tmp(tmp);
  4317 + tmp3 = neon_load_reg(rd, pass);
  4318 + gen_neon_bsl(tmp, tmp3, tmp, tmp2);
  4319 + dead_tmp(tmp3);
4361 4320 break;
4362 4321 }
4363 4322 break;
... ... @@ -4396,18 +4355,19 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
4396 4355 break;
4397 4356 case 15: /* VABA */
4398 4357 GEN_NEON_INTEGER_OP(abd);
4399   - NEON_GET_REG(T1, rd, pass);
4400   - gen_neon_add(size);
  4358 + dead_tmp(tmp2);
  4359 + tmp2 = neon_load_reg(rd, pass);
  4360 + gen_neon_add(size, tmp, tmp2);
4401 4361 break;
4402 4362 case 16:
4403 4363 if (!u) { /* VADD */
4404   - if (gen_neon_add(size))
  4364 + if (gen_neon_add(size, tmp, tmp2))
4405 4365 return 1;
4406 4366 } else { /* VSUB */
4407 4367 switch (size) {
4408   - case 0: gen_helper_neon_sub_u8(CPU_T001); break;
4409   - case 1: gen_helper_neon_sub_u16(CPU_T001); break;
4410   - case 2: gen_op_subl_T0_T1(); break;
  4368 + case 0: gen_helper_neon_sub_u8(tmp, tmp, tmp2); break;
  4369 + case 1: gen_helper_neon_sub_u16(tmp, tmp, tmp2); break;
  4370 + case 2: tcg_gen_sub_i32(tmp, tmp, tmp2); break;
4411 4371 default: return 1;
4412 4372 }
4413 4373 }
... ... @@ -4415,42 +4375,43 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
4415 4375 case 17:
4416 4376 if (!u) { /* VTST */
4417 4377 switch (size) {
4418   - case 0: gen_helper_neon_tst_u8(CPU_T001); break;
4419   - case 1: gen_helper_neon_tst_u16(CPU_T001); break;
4420   - case 2: gen_helper_neon_tst_u32(CPU_T001); break;
  4378 + case 0: gen_helper_neon_tst_u8(tmp, tmp, tmp2); break;
  4379 + case 1: gen_helper_neon_tst_u16(tmp, tmp, tmp2); break;
  4380 + case 2: gen_helper_neon_tst_u32(tmp, tmp, tmp2); break;
4421 4381 default: return 1;
4422 4382 }
4423 4383 } else { /* VCEQ */
4424 4384 switch (size) {
4425   - case 0: gen_helper_neon_ceq_u8(CPU_T001); break;
4426   - case 1: gen_helper_neon_ceq_u16(CPU_T001); break;
4427   - case 2: gen_helper_neon_ceq_u32(CPU_T001); break;
  4385 + case 0: gen_helper_neon_ceq_u8(tmp, tmp, tmp2); break;
  4386 + case 1: gen_helper_neon_ceq_u16(tmp, tmp, tmp2); break;
  4387 + case 2: gen_helper_neon_ceq_u32(tmp, tmp, tmp2); break;
4428 4388 default: return 1;
4429 4389 }
4430 4390 }
4431 4391 break;
4432 4392 case 18: /* Multiply. */
4433 4393 switch (size) {
4434   - case 0: gen_helper_neon_mul_u8(CPU_T001); break;
4435   - case 1: gen_helper_neon_mul_u16(CPU_T001); break;
4436   - case 2: gen_op_mul_T0_T1(); break;
  4394 + case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
  4395 + case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
  4396 + case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
4437 4397 default: return 1;
4438 4398 }
4439   - NEON_GET_REG(T1, rd, pass);
  4399 + dead_tmp(tmp2);
  4400 + tmp2 = neon_load_reg(rd, pass);
4440 4401 if (u) { /* VMLS */
4441   - gen_neon_rsb(size);
  4402 + gen_neon_rsb(size, tmp, tmp2);
4442 4403 } else { /* VMLA */
4443   - gen_neon_add(size);
  4404 + gen_neon_add(size, tmp, tmp2);
4444 4405 }
4445 4406 break;
4446 4407 case 19: /* VMUL */
4447 4408 if (u) { /* polynomial */
4448   - gen_helper_neon_mul_p8(CPU_T001);
  4409 + gen_helper_neon_mul_p8(tmp, tmp, tmp2);
4449 4410 } else { /* Integer */
4450 4411 switch (size) {
4451   - case 0: gen_helper_neon_mul_u8(CPU_T001); break;
4452   - case 1: gen_helper_neon_mul_u16(CPU_T001); break;
4453   - case 2: gen_op_mul_T0_T1(); break;
  4412 + case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
  4413 + case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
  4414 + case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
4454 4415 default: return 1;
4455 4416 }
4456 4417 }
... ... @@ -4464,14 +4425,14 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
4464 4425 case 22: /* Hultiply high. */
4465 4426 if (!u) { /* VQDMULH */
4466 4427 switch (size) {
4467   - case 1: gen_helper_neon_qdmulh_s16(CPU_T0E01); break;
4468   - case 2: gen_helper_neon_qdmulh_s32(CPU_T0E01); break;
  4428 + case 1: gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2); break;
  4429 + case 2: gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2); break;
4469 4430 default: return 1;
4470 4431 }
4471 4432 } else { /* VQRDHMUL */
4472 4433 switch (size) {
4473   - case 1: gen_helper_neon_qrdmulh_s16(CPU_T0E01); break;
4474   - case 2: gen_helper_neon_qrdmulh_s32(CPU_T0E01); break;
  4434 + case 1: gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2); break;
  4435 + case 2: gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2); break;
4475 4436 default: return 1;
4476 4437 }
4477 4438 }
... ... @@ -4480,88 +4441,91 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
4480 4441 if (u)
4481 4442 return 1;
4482 4443 switch (size) {
4483   - case 0: gen_helper_neon_padd_u8(CPU_T001); break;
4484   - case 1: gen_helper_neon_padd_u16(CPU_T001); break;
4485   - case 2: gen_op_addl_T0_T1(); break;
  4444 + case 0: gen_helper_neon_padd_u8(tmp, tmp, tmp2); break;
  4445 + case 1: gen_helper_neon_padd_u16(tmp, tmp, tmp2); break;
  4446 + case 2: tcg_gen_add_i32(tmp, tmp, tmp2); break;
4486 4447 default: return 1;
4487 4448 }
4488 4449 break;
4489 4450 case 26: /* Floating point arithnetic. */
4490 4451 switch ((u << 2) | size) {
4491 4452 case 0: /* VADD */
4492   - gen_helper_neon_add_f32(CPU_T001);
  4453 + gen_helper_neon_add_f32(tmp, tmp, tmp2);
4493 4454 break;
4494 4455 case 2: /* VSUB */
4495   - gen_helper_neon_sub_f32(CPU_T001);
  4456 + gen_helper_neon_sub_f32(tmp, tmp, tmp2);
4496 4457 break;
4497 4458 case 4: /* VPADD */
4498   - gen_helper_neon_add_f32(CPU_T001);
  4459 + gen_helper_neon_add_f32(tmp, tmp, tmp2);
4499 4460 break;
4500 4461 case 6: /* VABD */
4501   - gen_helper_neon_abd_f32(CPU_T001);
  4462 + gen_helper_neon_abd_f32(tmp, tmp, tmp2);
4502 4463 break;
4503 4464 default:
4504 4465 return 1;
4505 4466 }
4506 4467 break;
4507 4468 case 27: /* Float multiply. */
4508   - gen_helper_neon_mul_f32(CPU_T001);
  4469 + gen_helper_neon_mul_f32(tmp, tmp, tmp2);
4509 4470 if (!u) {
4510   - NEON_GET_REG(T1, rd, pass);
  4471 + dead_tmp(tmp2);
  4472 + tmp2 = neon_load_reg(rd, pass);
4511 4473 if (size == 0) {
4512   - gen_helper_neon_add_f32(CPU_T001);
  4474 + gen_helper_neon_add_f32(tmp, tmp, tmp2);
4513 4475 } else {
4514   - gen_helper_neon_sub_f32(cpu_T[0], cpu_T[1], cpu_T[0]);
  4476 + gen_helper_neon_sub_f32(tmp, tmp2, tmp);
4515 4477 }
4516 4478 }
4517 4479 break;
4518 4480 case 28: /* Float compare. */
4519 4481 if (!u) {
4520   - gen_helper_neon_ceq_f32(CPU_T001);
  4482 + gen_helper_neon_ceq_f32(tmp, tmp, tmp2);
4521 4483 } else {
4522 4484 if (size == 0)
4523   - gen_helper_neon_cge_f32(CPU_T001);
  4485 + gen_helper_neon_cge_f32(tmp, tmp, tmp2);
4524 4486 else
4525   - gen_helper_neon_cgt_f32(CPU_T001);
  4487 + gen_helper_neon_cgt_f32(tmp, tmp, tmp2);
4526 4488 }
4527 4489 break;
4528 4490 case 29: /* Float compare absolute. */
4529 4491 if (!u)
4530 4492 return 1;
4531 4493 if (size == 0)
4532   - gen_helper_neon_acge_f32(CPU_T001);
  4494 + gen_helper_neon_acge_f32(tmp, tmp, tmp2);
4533 4495 else
4534   - gen_helper_neon_acgt_f32(CPU_T001);
  4496 + gen_helper_neon_acgt_f32(tmp, tmp, tmp2);
4535 4497 break;
4536 4498 case 30: /* Float min/max. */
4537 4499 if (size == 0)
4538   - gen_helper_neon_max_f32(CPU_T001);
  4500 + gen_helper_neon_max_f32(tmp, tmp, tmp2);
4539 4501 else
4540   - gen_helper_neon_min_f32(CPU_T001);
  4502 + gen_helper_neon_min_f32(tmp, tmp, tmp2);
4541 4503 break;
4542 4504 case 31:
4543 4505 if (size == 0)
4544   - gen_helper_recps_f32(cpu_T[0], cpu_T[0], cpu_T[1], cpu_env);
  4506 + gen_helper_recps_f32(tmp, tmp, tmp2, cpu_env);
4545 4507 else
4546   - gen_helper_rsqrts_f32(cpu_T[0], cpu_T[0], cpu_T[1], cpu_env);
  4508 + gen_helper_rsqrts_f32(tmp, tmp, tmp2, cpu_env);
4547 4509 break;
4548 4510 default:
4549 4511 abort();
4550 4512 }
  4513 + dead_tmp(tmp2);
  4514 +
4551 4515 /* Save the result. For elementwise operations we can put it
4552 4516 straight into the destination register. For pairwise operations
4553 4517 we have to be careful to avoid clobbering the source operands. */
4554 4518 if (pairwise && rd == rm) {
4555   - gen_neon_movl_scratch_T0(pass);
  4519 + neon_store_scratch(pass, tmp);
4556 4520 } else {
4557   - NEON_SET_REG(T0, rd, pass);
  4521 + neon_store_reg(rd, pass, tmp);
4558 4522 }
4559 4523  
4560 4524 } /* for pass */
4561 4525 if (pairwise && rd == rm) {
4562 4526 for (pass = 0; pass < (q ? 4 : 2); pass++) {
4563   - gen_neon_movl_T0_scratch(pass);
4564   - NEON_SET_REG(T0, rd, pass);
  4527 + tmp = neon_load_scratch(pass);
  4528 + neon_store_reg(rd, pass, tmp);
4565 4529 }
4566 4530 }
4567 4531 /* End of 3 register same size operations. */
... ... @@ -4658,8 +4622,9 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
4658 4622 neon_store_reg64(cpu_V0, rd + pass);
4659 4623 } else { /* size < 3 */
4660 4624 /* Operands in T0 and T1. */
4661   - gen_op_movl_T1_im(imm);
4662   - NEON_GET_REG(T0, rm, pass);
  4625 + tmp = neon_load_reg(rm, pass);
  4626 + tmp2 = new_tmp();
  4627 + tcg_gen_movi_i32(tmp2, imm);
4663 4628 switch (op) {
4664 4629 case 0: /* VSHR */
4665 4630 case 1: /* VSRA */
... ... @@ -4676,9 +4641,9 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
4676 4641 break;
4677 4642 case 5: /* VSHL, VSLI */
4678 4643 switch (size) {
4679   - case 0: gen_helper_neon_shl_u8(CPU_T001); break;
4680   - case 1: gen_helper_neon_shl_u16(CPU_T001); break;
4681   - case 2: gen_helper_neon_shl_u32(CPU_T001); break;
  4644 + case 0: gen_helper_neon_shl_u8(tmp, tmp, tmp2); break;
  4645 + case 1: gen_helper_neon_shl_u16(tmp, tmp, tmp2); break;
  4646 + case 2: gen_helper_neon_shl_u32(tmp, tmp, tmp2); break;
4682 4647 default: return 1;
4683 4648 }
4684 4649 break;
... ... @@ -4687,18 +4652,20 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
4687 4652 break;
4688 4653 case 7: /* VQSHLU */
4689 4654 switch (size) {
4690   - case 0: gen_helper_neon_qshl_u8(CPU_T0E01); break;
4691   - case 1: gen_helper_neon_qshl_u16(CPU_T0E01); break;
4692   - case 2: gen_helper_neon_qshl_u32(CPU_T0E01); break;
  4655 + case 0: gen_helper_neon_qshl_u8(tmp, cpu_env, tmp, tmp2); break;
  4656 + case 1: gen_helper_neon_qshl_u16(tmp, cpu_env, tmp, tmp2); break;
  4657 + case 2: gen_helper_neon_qshl_u32(tmp, cpu_env, tmp, tmp2); break;
4693 4658 default: return 1;
4694 4659 }
4695 4660 break;
4696 4661 }
  4662 + dead_tmp(tmp2);
4697 4663  
4698 4664 if (op == 1 || op == 3) {
4699 4665 /* Accumulate. */
4700   - NEON_GET_REG(T1, rd, pass);
4701   - gen_neon_add(size);
  4666 + tmp2 = neon_load_reg(rd, pass);
  4667 + gen_neon_add(size, tmp2, tmp);
  4668 + dead_tmp(tmp2);
4702 4669 } else if (op == 4 || (op == 5 && u)) {
4703 4670 /* Insert */
4704 4671 switch (size) {
... ... @@ -4726,12 +4693,13 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
4726 4693 default:
4727 4694 abort();
4728 4695 }
4729   - tmp = neon_load_reg(rd, pass);
4730   - tcg_gen_andi_i32(cpu_T[0], cpu_T[0], imm);
4731   - tcg_gen_andi_i32(tmp, tmp, ~imm);
4732   - tcg_gen_or_i32(cpu_T[0], cpu_T[0], tmp);
  4696 + tmp2 = neon_load_reg(rd, pass);
  4697 + tcg_gen_andi_i32(tmp, tmp, imm);
  4698 + tcg_gen_andi_i32(tmp2, tmp2, ~imm);
  4699 + tcg_gen_or_i32(tmp, tmp, tmp2);
  4700 + dead_tmp(tmp2);
4733 4701 }
4734   - NEON_SET_REG(T0, rd, pass);
  4702 + neon_store_reg(rd, pass, tmp);
4735 4703 }
4736 4704 } /* for pass */
4737 4705 } else if (op < 10) {
... ... @@ -4893,9 +4861,6 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
4893 4861 if (invert)
4894 4862 imm = ~imm;
4895 4863  
4896   - if (op != 14 || !invert)
4897   - gen_op_movl_T1_im(imm);
4898   -
4899 4864 for (pass = 0; pass < (q ? 4 : 2); pass++) {
4900 4865 if (op & 1 && op < 12) {
4901 4866 tmp = neon_load_reg(rd, pass);
... ... @@ -4962,11 +4927,11 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
4962 4927 always aligned so will never overlap with wide
4963 4928 destinations in problematic ways. */
4964 4929 if (rd == rm && !src2_wide) {
4965   - NEON_GET_REG(T0, rm, 1);
4966   - gen_neon_movl_scratch_T0(2);
  4930 + tmp = neon_load_reg(rm, 1);
  4931 + neon_store_scratch(2, tmp);
4967 4932 } else if (rd == rn && !src1_wide) {
4968   - NEON_GET_REG(T0, rn, 1);
4969   - gen_neon_movl_scratch_T0(2);
  4933 + tmp = neon_load_reg(rn, 1);
  4934 + neon_store_scratch(2, tmp);
4970 4935 }
4971 4936 TCGV_UNUSED(tmp3);
4972 4937 for (pass = 0; pass < 2; pass++) {
... ... @@ -4975,9 +4940,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
4975 4940 TCGV_UNUSED(tmp);
4976 4941 } else {
4977 4942 if (pass == 1 && rd == rn) {
4978   - gen_neon_movl_T0_scratch(2);
4979   - tmp = new_tmp();
4980   - tcg_gen_mov_i32(tmp, cpu_T[0]);
  4943 + tmp = neon_load_scratch(2);
4981 4944 } else {
4982 4945 tmp = neon_load_reg(rn, pass);
4983 4946 }
... ... @@ -4990,9 +4953,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
4990 4953 TCGV_UNUSED(tmp2);
4991 4954 } else {
4992 4955 if (pass == 1 && rd == rm) {
4993   - gen_neon_movl_T0_scratch(2);
4994   - tmp2 = new_tmp();
4995   - tcg_gen_mov_i32(tmp2, cpu_T[0]);
  4956 + tmp2 = neon_load_scratch(2);
4996 4957 } else {
4997 4958 tmp2 = neon_load_reg(rm, pass);
4998 4959 }
... ... @@ -5035,6 +4996,8 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
5035 4996 case 8: case 9: case 10: case 11: case 12: case 13:
5036 4997 /* VMLAL, VQDMLAL, VMLSL, VQDMLSL, VMULL, VQDMULL */
5037 4998 gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
  4999 + dead_tmp(tmp2);
  5000 + dead_tmp(tmp);
5038 5001 break;
5039 5002 case 14: /* Polynomial VMULL */
5040 5003 cpu_abort(env, "Polynomial VMULL not implemented");
... ... @@ -5123,55 +5086,56 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
5123 5086 case 9: /* Floating point VMUL scalar */
5124 5087 case 12: /* VQDMULH scalar */
5125 5088 case 13: /* VQRDMULH scalar */
5126   - gen_neon_get_scalar(size, rm);
5127   - gen_neon_movl_scratch_T0(0);
  5089 + tmp = neon_get_scalar(size, rm);
  5090 + neon_store_scratch(0, tmp);
5128 5091 for (pass = 0; pass < (u ? 4 : 2); pass++) {
5129   - if (pass != 0)
5130   - gen_neon_movl_T0_scratch(0);
5131   - NEON_GET_REG(T1, rn, pass);
  5092 + tmp = neon_load_scratch(0);
  5093 + tmp2 = neon_load_reg(rn, pass);
5132 5094 if (op == 12) {
5133 5095 if (size == 1) {
5134   - gen_helper_neon_qdmulh_s16(CPU_T0E01);
  5096 + gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2);
5135 5097 } else {
5136   - gen_helper_neon_qdmulh_s32(CPU_T0E01);
  5098 + gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
5137 5099 }
5138 5100 } else if (op == 13) {
5139 5101 if (size == 1) {
5140   - gen_helper_neon_qrdmulh_s16(CPU_T0E01);
  5102 + gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
5141 5103 } else {
5142   - gen_helper_neon_qrdmulh_s32(CPU_T0E01);
  5104 + gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
5143 5105 }
5144 5106 } else if (op & 1) {
5145   - gen_helper_neon_mul_f32(CPU_T001);
  5107 + gen_helper_neon_mul_f32(tmp, tmp, tmp2);
5146 5108 } else {
5147 5109 switch (size) {
5148   - case 0: gen_helper_neon_mul_u8(CPU_T001); break;
5149   - case 1: gen_helper_neon_mul_u16(CPU_T001); break;
5150   - case 2: gen_op_mul_T0_T1(); break;
  5110 + case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
  5111 + case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
  5112 + case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
5151 5113 default: return 1;
5152 5114 }
5153 5115 }
  5116 + dead_tmp(tmp2);
5154 5117 if (op < 8) {
5155 5118 /* Accumulate. */
5156   - NEON_GET_REG(T1, rd, pass);
  5119 + tmp2 = neon_load_reg(rd, pass);
5157 5120 switch (op) {
5158 5121 case 0:
5159   - gen_neon_add(size);
  5122 + gen_neon_add(size, tmp, tmp2);
5160 5123 break;
5161 5124 case 1:
5162   - gen_helper_neon_add_f32(CPU_T001);
  5125 + gen_helper_neon_add_f32(tmp, tmp, tmp2);
5163 5126 break;
5164 5127 case 4:
5165   - gen_neon_rsb(size);
  5128 + gen_neon_rsb(size, tmp, tmp2);
5166 5129 break;
5167 5130 case 5:
5168   - gen_helper_neon_sub_f32(cpu_T[0], cpu_T[1], cpu_T[0]);
  5131 + gen_helper_neon_sub_f32(tmp, tmp2, tmp);
5169 5132 break;
5170 5133 default:
5171 5134 abort();
5172 5135 }
  5136 + dead_tmp(tmp2);
5173 5137 }
5174   - NEON_SET_REG(T0, rd, pass);
  5138 + neon_store_reg(rd, pass, tmp);
5175 5139 }
5176 5140 break;
5177 5141 case 2: /* VMLAL sclar */
... ... @@ -5183,19 +5147,17 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
5183 5147 if (size == 0 && (op == 3 || op == 7 || op == 11))
5184 5148 return 1;
5185 5149  
5186   - gen_neon_get_scalar(size, rm);
5187   - NEON_GET_REG(T1, rn, 1);
  5150 + tmp2 = neon_get_scalar(size, rm);
  5151 + tmp3 = neon_load_reg(rn, 1);
5188 5152  
5189 5153 for (pass = 0; pass < 2; pass++) {
5190 5154 if (pass == 0) {
5191 5155 tmp = neon_load_reg(rn, 0);
5192 5156 } else {
5193   - tmp = new_tmp();
5194   - tcg_gen_mov_i32(tmp, cpu_T[1]);
  5157 + tmp = tmp3;
5195 5158 }
5196   - tmp2 = new_tmp();
5197   - tcg_gen_mov_i32(tmp2, cpu_T[0]);
5198 5159 gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
  5160 + dead_tmp(tmp);
5199 5161 if (op == 6 || op == 7) {
5200 5162 gen_neon_negl(cpu_V0, size);
5201 5163 }
... ... @@ -5221,6 +5183,9 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
5221 5183 }
5222 5184 neon_store_reg64(cpu_V0, rd + pass);
5223 5185 }
  5186 +
  5187 + dead_tmp(tmp2);
  5188 +
5224 5189 break;
5225 5190 default: /* 14 and 15 are RESERVED */
5226 5191 return 1;
... ... @@ -5287,25 +5252,24 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
5287 5252 if (size == 3)
5288 5253 return 1;
5289 5254 for (pass = 0; pass < (q ? 2 : 1); pass++) {
5290   - NEON_GET_REG(T0, rm, pass * 2);
5291   - NEON_GET_REG(T1, rm, pass * 2 + 1);
  5255 + tmp = neon_load_reg(rm, pass * 2);
  5256 + tmp2 = neon_load_reg(rm, pass * 2 + 1);
5292 5257 switch (size) {
5293   - case 0: tcg_gen_bswap32_i32(cpu_T[0], cpu_T[0]); break;
5294   - case 1: gen_swap_half(cpu_T[0]); break;
  5258 + case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
  5259 + case 1: gen_swap_half(tmp); break;
5295 5260 case 2: /* no-op */ break;
5296 5261 default: abort();
5297 5262 }
5298   - NEON_SET_REG(T0, rd, pass * 2 + 1);
  5263 + neon_store_reg(rd, pass * 2 + 1, tmp);
5299 5264 if (size == 2) {
5300   - NEON_SET_REG(T1, rd, pass * 2);
  5265 + neon_store_reg(rd, pass * 2, tmp2);
5301 5266 } else {
5302   - gen_op_movl_T0_T1();
5303 5267 switch (size) {
5304   - case 0: tcg_gen_bswap32_i32(cpu_T[0], cpu_T[0]); break;
5305   - case 1: gen_swap_half(cpu_T[0]); break;
  5268 + case 0: tcg_gen_bswap32_i32(tmp2, tmp2); break;
  5269 + case 1: gen_swap_half(tmp2); break;
5306 5270 default: abort();
5307 5271 }
5308   - NEON_SET_REG(T0, rd, pass * 2);
  5272 + neon_store_reg(rd, pass * 2, tmp2);
5309 5273 }
5310 5274 }
5311 5275 break;
... ... @@ -5335,10 +5299,10 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
5335 5299 case 33: /* VTRN */
5336 5300 if (size == 2) {
5337 5301 for (n = 0; n < (q ? 4 : 2); n += 2) {
5338   - NEON_GET_REG(T0, rm, n);
5339   - NEON_GET_REG(T1, rd, n + 1);
5340   - NEON_SET_REG(T1, rm, n);
5341   - NEON_SET_REG(T0, rd, n + 1);
  5302 + tmp = neon_load_reg(rm, n);
  5303 + tmp2 = neon_load_reg(rd, n + 1);
  5304 + neon_store_reg(rm, n, tmp2);
  5305 + neon_store_reg(rd, n + 1, tmp);
5342 5306 }
5343 5307 } else {
5344 5308 goto elementwise;
... ... @@ -5358,16 +5322,16 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
5358 5322 {0, 2, 4, 6, 1, 3, 5, 7};
5359 5323 for (n = 0; n < 8; n++) {
5360 5324 int reg = (n < 4) ? rd : rm;
5361   - gen_neon_movl_T0_scratch(unzip_order_q[n]);
5362   - NEON_SET_REG(T0, reg, n % 4);
  5325 + tmp = neon_load_scratch(unzip_order_q[n]);
  5326 + neon_store_reg(reg, n % 4, tmp);
5363 5327 }
5364 5328 } else {
5365 5329 static int unzip_order[4] =
5366 5330 {0, 4, 1, 5};
5367 5331 for (n = 0; n < 4; n++) {
5368 5332 int reg = (n < 2) ? rd : rm;
5369   - gen_neon_movl_T0_scratch(unzip_order[n]);
5370   - NEON_SET_REG(T0, reg, n % 2);
  5333 + tmp = neon_load_scratch(unzip_order[n]);
  5334 + neon_store_reg(reg, n % 2, tmp);
5371 5335 }
5372 5336 }
5373 5337 break;
... ... @@ -5380,21 +5344,21 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
5380 5344 return 1;
5381 5345 count = (q ? 4 : 2);
5382 5346 for (n = 0; n < count; n++) {
5383   - NEON_GET_REG(T0, rd, n);
5384   - NEON_GET_REG(T1, rd, n);
  5347 + tmp = neon_load_reg(rd, n);
  5348 + tmp2 = neon_load_reg(rd, n);
5385 5349 switch (size) {
5386   - case 0: gen_neon_zip_u8(cpu_T[0], cpu_T[1]); break;
5387   - case 1: gen_neon_zip_u16(cpu_T[0], cpu_T[1]); break;
  5350 + case 0: gen_neon_zip_u8(tmp, tmp2); break;
  5351 + case 1: gen_neon_zip_u16(tmp, tmp2); break;
5388 5352 case 2: /* no-op */; break;
5389 5353 default: abort();
5390 5354 }
5391   - gen_neon_movl_scratch_T0(n * 2);
5392   - gen_neon_movl_scratch_T1(n * 2 + 1);
  5355 + neon_store_scratch(n * 2, tmp);
  5356 + neon_store_scratch(n * 2 + 1, tmp2);
5393 5357 }
5394 5358 for (n = 0; n < count * 2; n++) {
5395 5359 int reg = (n < count) ? rd : rm;
5396   - gen_neon_movl_T0_scratch(n);
5397   - NEON_SET_REG(T0, reg, n % count);
  5360 + tmp = neon_load_scratch(n);
  5361 + neon_store_reg(reg, n % count, tmp);
5398 5362 }
5399 5363 break;
5400 5364 case 36: case 37: /* VMOVN, VQMOVUN, VQMOVN */
... ... @@ -5437,124 +5401,132 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
5437 5401 if (op == 30 || op == 31 || op >= 58) {
5438 5402 tcg_gen_ld_f32(cpu_F0s, cpu_env,
5439 5403 neon_reg_offset(rm, pass));
  5404 + TCGV_UNUSED(tmp);
5440 5405 } else {
5441   - NEON_GET_REG(T0, rm, pass);
  5406 + tmp = neon_load_reg(rm, pass);
5442 5407 }
5443 5408 switch (op) {
5444 5409 case 1: /* VREV32 */
5445 5410 switch (size) {
5446   - case 0: tcg_gen_bswap32_i32(cpu_T[0], cpu_T[0]); break;
5447   - case 1: gen_swap_half(cpu_T[0]); break;
  5411 + case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
  5412 + case 1: gen_swap_half(tmp); break;
5448 5413 default: return 1;
5449 5414 }
5450 5415 break;
5451 5416 case 2: /* VREV16 */
5452 5417 if (size != 0)
5453 5418 return 1;
5454   - gen_rev16(cpu_T[0]);
  5419 + gen_rev16(tmp);
5455 5420 break;
5456 5421 case 8: /* CLS */
5457 5422 switch (size) {
5458   - case 0: gen_helper_neon_cls_s8(cpu_T[0], cpu_T[0]); break;
5459   - case 1: gen_helper_neon_cls_s16(cpu_T[0], cpu_T[0]); break;
5460   - case 2: gen_helper_neon_cls_s32(cpu_T[0], cpu_T[0]); break;
  5423 + case 0: gen_helper_neon_cls_s8(tmp, tmp); break;
  5424 + case 1: gen_helper_neon_cls_s16(tmp, tmp); break;
  5425 + case 2: gen_helper_neon_cls_s32(tmp, tmp); break;
5461 5426 default: return 1;
5462 5427 }
5463 5428 break;
5464 5429 case 9: /* CLZ */
5465 5430 switch (size) {
5466   - case 0: gen_helper_neon_clz_u8(cpu_T[0], cpu_T[0]); break;
5467   - case 1: gen_helper_neon_clz_u16(cpu_T[0], cpu_T[0]); break;
5468   - case 2: gen_helper_clz(cpu_T[0], cpu_T[0]); break;
  5431 + case 0: gen_helper_neon_clz_u8(tmp, tmp); break;
  5432 + case 1: gen_helper_neon_clz_u16(tmp, tmp); break;
  5433 + case 2: gen_helper_clz(tmp, tmp); break;
5469 5434 default: return 1;
5470 5435 }
5471 5436 break;
5472 5437 case 10: /* CNT */
5473 5438 if (size != 0)
5474 5439 return 1;
5475   - gen_helper_neon_cnt_u8(cpu_T[0], cpu_T[0]);
  5440 + gen_helper_neon_cnt_u8(tmp, tmp);
5476 5441 break;
5477 5442 case 11: /* VNOT */
5478 5443 if (size != 0)
5479 5444 return 1;
5480   - gen_op_notl_T0();
  5445 + tcg_gen_not_i32(tmp, tmp);
5481 5446 break;
5482 5447 case 14: /* VQABS */
5483 5448 switch (size) {
5484   - case 0: gen_helper_neon_qabs_s8(cpu_T[0], cpu_env, cpu_T[0]); break;
5485   - case 1: gen_helper_neon_qabs_s16(cpu_T[0], cpu_env, cpu_T[0]); break;
5486   - case 2: gen_helper_neon_qabs_s32(cpu_T[0], cpu_env, cpu_T[0]); break;
  5449 + case 0: gen_helper_neon_qabs_s8(tmp, cpu_env, tmp); break;
  5450 + case 1: gen_helper_neon_qabs_s16(tmp, cpu_env, tmp); break;
  5451 + case 2: gen_helper_neon_qabs_s32(tmp, cpu_env, tmp); break;
5487 5452 default: return 1;
5488 5453 }
5489 5454 break;
5490 5455 case 15: /* VQNEG */
5491 5456 switch (size) {
5492   - case 0: gen_helper_neon_qneg_s8(cpu_T[0], cpu_env, cpu_T[0]); break;
5493   - case 1: gen_helper_neon_qneg_s16(cpu_T[0], cpu_env, cpu_T[0]); break;
5494   - case 2: gen_helper_neon_qneg_s32(cpu_T[0], cpu_env, cpu_T[0]); break;
  5457 + case 0: gen_helper_neon_qneg_s8(tmp, cpu_env, tmp); break;
  5458 + case 1: gen_helper_neon_qneg_s16(tmp, cpu_env, tmp); break;
  5459 + case 2: gen_helper_neon_qneg_s32(tmp, cpu_env, tmp); break;
5495 5460 default: return 1;
5496 5461 }
5497 5462 break;
5498 5463 case 16: case 19: /* VCGT #0, VCLE #0 */
5499   - gen_op_movl_T1_im(0);
  5464 + tmp2 = tcg_const_i32(0);
5500 5465 switch(size) {
5501   - case 0: gen_helper_neon_cgt_s8(CPU_T001); break;
5502   - case 1: gen_helper_neon_cgt_s16(CPU_T001); break;
5503   - case 2: gen_helper_neon_cgt_s32(CPU_T001); break;
  5466 + case 0: gen_helper_neon_cgt_s8(tmp, tmp, tmp2); break;
  5467 + case 1: gen_helper_neon_cgt_s16(tmp, tmp, tmp2); break;
  5468 + case 2: gen_helper_neon_cgt_s32(tmp, tmp, tmp2); break;
5504 5469 default: return 1;
5505 5470 }
  5471 + tcg_temp_free(tmp2);
5506 5472 if (op == 19)
5507   - gen_op_notl_T0();
  5473 + tcg_gen_not_i32(tmp, tmp);
5508 5474 break;
5509 5475 case 17: case 20: /* VCGE #0, VCLT #0 */
5510   - gen_op_movl_T1_im(0);
  5476 + tmp2 = tcg_const_i32(0);
5511 5477 switch(size) {
5512   - case 0: gen_helper_neon_cge_s8(CPU_T001); break;
5513   - case 1: gen_helper_neon_cge_s16(CPU_T001); break;
5514   - case 2: gen_helper_neon_cge_s32(CPU_T001); break;
  5478 + case 0: gen_helper_neon_cge_s8(tmp, tmp, tmp2); break;
  5479 + case 1: gen_helper_neon_cge_s16(tmp, tmp, tmp2); break;
  5480 + case 2: gen_helper_neon_cge_s32(tmp, tmp, tmp2); break;
5515 5481 default: return 1;
5516 5482 }
  5483 + tcg_temp_free(tmp2);
5517 5484 if (op == 20)
5518   - gen_op_notl_T0();
  5485 + tcg_gen_not_i32(tmp, tmp);
5519 5486 break;
5520 5487 case 18: /* VCEQ #0 */
5521   - gen_op_movl_T1_im(0);
  5488 + tmp2 = tcg_const_i32(0);
5522 5489 switch(size) {
5523   - case 0: gen_helper_neon_ceq_u8(CPU_T001); break;
5524   - case 1: gen_helper_neon_ceq_u16(CPU_T001); break;
5525   - case 2: gen_helper_neon_ceq_u32(CPU_T001); break;
  5490 + case 0: gen_helper_neon_ceq_u8(tmp, tmp, tmp2); break;
  5491 + case 1: gen_helper_neon_ceq_u16(tmp, tmp, tmp2); break;
  5492 + case 2: gen_helper_neon_ceq_u32(tmp, tmp, tmp2); break;
5526 5493 default: return 1;
5527 5494 }
  5495 + tcg_temp_free(tmp2);
5528 5496 break;
5529 5497 case 22: /* VABS */
5530 5498 switch(size) {
5531   - case 0: gen_helper_neon_abs_s8(cpu_T[0], cpu_T[0]); break;
5532   - case 1: gen_helper_neon_abs_s16(cpu_T[0], cpu_T[0]); break;
5533   - case 2: tcg_gen_abs_i32(cpu_T[0], cpu_T[0]); break;
  5499 + case 0: gen_helper_neon_abs_s8(tmp, tmp); break;
  5500 + case 1: gen_helper_neon_abs_s16(tmp, tmp); break;
  5501 + case 2: tcg_gen_abs_i32(tmp, tmp); break;
5534 5502 default: return 1;
5535 5503 }
5536 5504 break;
5537 5505 case 23: /* VNEG */
5538   - gen_op_movl_T1_im(0);
5539 5506 if (size == 3)
5540 5507 return 1;
5541   - gen_neon_rsb(size);
  5508 + tmp2 = tcg_const_i32(0);
  5509 + gen_neon_rsb(size, tmp, tmp2);
  5510 + tcg_temp_free(tmp2);
5542 5511 break;
5543 5512 case 24: case 27: /* Float VCGT #0, Float VCLE #0 */
5544   - gen_op_movl_T1_im(0);
5545   - gen_helper_neon_cgt_f32(CPU_T001);
  5513 + tmp2 = tcg_const_i32(0);
  5514 + gen_helper_neon_cgt_f32(tmp, tmp, tmp2);
  5515 + tcg_temp_free(tmp2);
5546 5516 if (op == 27)
5547   - gen_op_notl_T0();
  5517 + tcg_gen_not_i32(tmp, tmp);
5548 5518 break;
5549 5519 case 25: case 28: /* Float VCGE #0, Float VCLT #0 */
5550   - gen_op_movl_T1_im(0);
5551   - gen_helper_neon_cge_f32(CPU_T001);
  5520 + tmp2 = tcg_const_i32(0);
  5521 + gen_helper_neon_cge_f32(tmp, tmp, tmp2);
  5522 + tcg_temp_free(tmp2);
5552 5523 if (op == 28)
5553   - gen_op_notl_T0();
  5524 + tcg_gen_not_i32(tmp, tmp);
5554 5525 break;
5555 5526 case 26: /* Float VCEQ #0 */
5556   - gen_op_movl_T1_im(0);
5557   - gen_helper_neon_ceq_f32(CPU_T001);
  5527 + tmp2 = tcg_const_i32(0);
  5528 + gen_helper_neon_ceq_f32(tmp, tmp, tmp2);
  5529 + tcg_temp_free(tmp2);
5558 5530 break;
5559 5531 case 30: /* Float VABS */
5560 5532 gen_vfp_abs(0);
... ... @@ -5563,24 +5535,24 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
5563 5535 gen_vfp_neg(0);
5564 5536 break;
5565 5537 case 32: /* VSWP */
5566   - NEON_GET_REG(T1, rd, pass);
5567   - NEON_SET_REG(T1, rm, pass);
  5538 + tmp2 = neon_load_reg(rd, pass);
  5539 + neon_store_reg(rm, pass, tmp2);
5568 5540 break;
5569 5541 case 33: /* VTRN */
5570   - NEON_GET_REG(T1, rd, pass);
  5542 + tmp2 = neon_load_reg(rd, pass);
5571 5543 switch (size) {
5572   - case 0: gen_neon_trn_u8(cpu_T[0], cpu_T[1]); break;
5573   - case 1: gen_neon_trn_u16(cpu_T[0], cpu_T[1]); break;
  5544 + case 0: gen_neon_trn_u8(tmp, tmp2); break;
  5545 + case 1: gen_neon_trn_u16(tmp, tmp2); break;
5574 5546 case 2: abort();
5575 5547 default: return 1;
5576 5548 }
5577   - NEON_SET_REG(T1, rm, pass);
  5549 + neon_store_reg(rm, pass, tmp2);
5578 5550 break;
5579 5551 case 56: /* Integer VRECPE */
5580   - gen_helper_recpe_u32(cpu_T[0], cpu_T[0], cpu_env);
  5552 + gen_helper_recpe_u32(tmp, tmp, cpu_env);
5581 5553 break;
5582 5554 case 57: /* Integer VRSQRTE */
5583   - gen_helper_rsqrte_u32(cpu_T[0], cpu_T[0], cpu_env);
  5555 + gen_helper_rsqrte_u32(tmp, tmp, cpu_env);
5584 5556 break;
5585 5557 case 58: /* Float VRECPE */
5586 5558 gen_helper_recpe_f32(cpu_F0s, cpu_F0s, cpu_env);
... ... @@ -5608,7 +5580,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
5608 5580 tcg_gen_st_f32(cpu_F0s, cpu_env,
5609 5581 neon_reg_offset(rd, pass));
5610 5582 } else {
5611   - NEON_SET_REG(T0, rd, pass);
  5583 + neon_store_reg(rd, pass, tmp);
5612 5584 }
5613 5585 }
5614 5586 break;
... ... @@ -5641,21 +5613,24 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
5641 5613 } else if ((insn & 0x380) == 0) {
5642 5614 /* VDUP */
5643 5615 if (insn & (1 << 19)) {
5644   - NEON_SET_REG(T0, rm, 1);
  5616 + tmp = neon_load_reg(rm, 1);
5645 5617 } else {
5646   - NEON_SET_REG(T0, rm, 0);
  5618 + tmp = neon_load_reg(rm, 0);
5647 5619 }
5648 5620 if (insn & (1 << 16)) {
5649   - gen_neon_dup_u8(cpu_T[0], ((insn >> 17) & 3) * 8);
  5621 + gen_neon_dup_u8(tmp, ((insn >> 17) & 3) * 8);
5650 5622 } else if (insn & (1 << 17)) {
5651 5623 if ((insn >> 18) & 1)
5652   - gen_neon_dup_high16(cpu_T[0]);
  5624 + gen_neon_dup_high16(tmp);
5653 5625 else
5654   - gen_neon_dup_low16(cpu_T[0]);
  5626 + gen_neon_dup_low16(tmp);
5655 5627 }
5656 5628 for (pass = 0; pass < (q ? 4 : 2); pass++) {
5657   - NEON_SET_REG(T0, rd, pass);
  5629 + tmp2 = new_tmp();
  5630 + tcg_gen_mov_i32(tmp2, tmp);
  5631 + neon_store_reg(rd, pass, tmp2);
5658 5632 }
  5633 + dead_tmp(tmp);
5659 5634 } else {
5660 5635 return 1;
5661 5636 }
... ...