Commit fb951ff5c0b598f7a4dff0fd59d5167345c3337a
1 parent
dc485d28
Convert disas_neon_data_insn and helpers not to use cpu_T.
Signed-off-by: Filip Navara <filip.navara@gmail.com>
Showing
1 changed file
with
273 additions
and
298 deletions
target-arm/translate.c
... | ... | @@ -188,13 +188,9 @@ static void store_reg(DisasContext *s, int reg, TCGv var) |
188 | 188 | |
189 | 189 | #define gen_op_addl_T1_im(im) tcg_gen_addi_i32(cpu_T[1], cpu_T[1], im) |
190 | 190 | #define gen_op_addl_T0_T1() tcg_gen_add_i32(cpu_T[0], cpu_T[0], cpu_T[1]) |
191 | -#define gen_op_subl_T0_T1() tcg_gen_sub_i32(cpu_T[0], cpu_T[0], cpu_T[1]) | |
192 | -#define gen_op_rsbl_T0_T1() tcg_gen_sub_i32(cpu_T[0], cpu_T[1], cpu_T[0]) | |
193 | 191 | |
194 | 192 | #define gen_op_andl_T0_T1() tcg_gen_and_i32(cpu_T[0], cpu_T[0], cpu_T[1]) |
195 | -#define gen_op_xorl_T0_T1() tcg_gen_xor_i32(cpu_T[0], cpu_T[0], cpu_T[1]) | |
196 | 193 | #define gen_op_orl_T0_T1() tcg_gen_or_i32(cpu_T[0], cpu_T[0], cpu_T[1]) |
197 | -#define gen_op_notl_T0() tcg_gen_not_i32(cpu_T[0], cpu_T[0]) | |
198 | 194 | #define gen_op_notl_T1() tcg_gen_not_i32(cpu_T[1], cpu_T[1]) |
199 | 195 | |
200 | 196 | #define gen_op_shll_T1_im(im) tcg_gen_shli_i32(cpu_T[1], cpu_T[1], im) |
... | ... | @@ -209,7 +205,6 @@ static void store_reg(DisasContext *s, int reg, TCGv var) |
209 | 205 | #define gen_sxtb16(var) gen_helper_sxtb16(var, var) |
210 | 206 | #define gen_uxtb16(var) gen_helper_uxtb16(var, var) |
211 | 207 | |
212 | -#define gen_op_mul_T0_T1() tcg_gen_mul_i32(cpu_T[0], cpu_T[0], cpu_T[1]) | |
213 | 208 | |
214 | 209 | #define gen_set_cpsr(var, mask) gen_helper_cpsr_write(var, tcg_const_i32(mask)) |
215 | 210 | /* Set NZCV flags from the high 4 bits of var. */ |
... | ... | @@ -1118,14 +1113,6 @@ neon_reg_offset (int reg, int n) |
1118 | 1113 | return vfp_reg_offset(0, sreg); |
1119 | 1114 | } |
1120 | 1115 | |
1121 | -/* FIXME: Remove these. */ | |
1122 | -#define neon_T0 cpu_T[0] | |
1123 | -#define neon_T1 cpu_T[1] | |
1124 | -#define NEON_GET_REG(T, reg, n) \ | |
1125 | - tcg_gen_ld_i32(neon_##T, cpu_env, neon_reg_offset(reg, n)) | |
1126 | -#define NEON_SET_REG(T, reg, n) \ | |
1127 | - tcg_gen_st_i32(neon_##T, cpu_env, neon_reg_offset(reg, n)) | |
1128 | - | |
1129 | 1116 | static TCGv neon_load_reg(int reg, int pass) |
1130 | 1117 | { |
1131 | 1118 | TCGv tmp = new_tmp(); |
... | ... | @@ -3485,31 +3472,25 @@ static void gen_nop_hint(DisasContext *s, int val) |
3485 | 3472 | } |
3486 | 3473 | } |
3487 | 3474 | |
3488 | -/* These macros help make the code more readable when migrating from the | |
3489 | - old dyngen helpers. They should probably be removed when | |
3490 | - T0/T1 are removed. */ | |
3491 | -#define CPU_T001 cpu_T[0], cpu_T[0], cpu_T[1] | |
3492 | -#define CPU_T0E01 cpu_T[0], cpu_env, cpu_T[0], cpu_T[1] | |
3493 | - | |
3494 | 3475 | #define CPU_V001 cpu_V0, cpu_V0, cpu_V1 |
3495 | 3476 | |
3496 | -static inline int gen_neon_add(int size) | |
3477 | +static inline int gen_neon_add(int size, TCGv t0, TCGv t1) | |
3497 | 3478 | { |
3498 | 3479 | switch (size) { |
3499 | - case 0: gen_helper_neon_add_u8(CPU_T001); break; | |
3500 | - case 1: gen_helper_neon_add_u16(CPU_T001); break; | |
3501 | - case 2: gen_op_addl_T0_T1(); break; | |
3480 | + case 0: gen_helper_neon_add_u8(t0, t0, t1); break; | |
3481 | + case 1: gen_helper_neon_add_u16(t0, t0, t1); break; | |
3482 | + case 2: tcg_gen_add_i32(t0, t0, t1); break; | |
3502 | 3483 | default: return 1; |
3503 | 3484 | } |
3504 | 3485 | return 0; |
3505 | 3486 | } |
3506 | 3487 | |
3507 | -static inline void gen_neon_rsb(int size) | |
3488 | +static inline void gen_neon_rsb(int size, TCGv t0, TCGv t1) | |
3508 | 3489 | { |
3509 | 3490 | switch (size) { |
3510 | - case 0: gen_helper_neon_sub_u8(cpu_T[0], cpu_T[1], cpu_T[0]); break; | |
3511 | - case 1: gen_helper_neon_sub_u16(cpu_T[0], cpu_T[1], cpu_T[0]); break; | |
3512 | - case 2: gen_op_rsbl_T0_T1(); break; | |
3491 | + case 0: gen_helper_neon_sub_u8(t0, t1, t0); break; | |
3492 | + case 1: gen_helper_neon_sub_u16(t0, t1, t0); break; | |
3493 | + case 2: tcg_gen_sub_i32(t0, t1, t0); break; | |
3513 | 3494 | default: return; |
3514 | 3495 | } |
3515 | 3496 | } |
... | ... | @@ -3529,22 +3510,22 @@ static inline void gen_neon_rsb(int size) |
3529 | 3510 | #define GEN_NEON_INTEGER_OP_ENV(name) do { \ |
3530 | 3511 | switch ((size << 1) | u) { \ |
3531 | 3512 | case 0: \ |
3532 | - gen_helper_neon_##name##_s8(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); \ | |
3513 | + gen_helper_neon_##name##_s8(tmp, cpu_env, tmp, tmp2); \ | |
3533 | 3514 | break; \ |
3534 | 3515 | case 1: \ |
3535 | - gen_helper_neon_##name##_u8(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); \ | |
3516 | + gen_helper_neon_##name##_u8(tmp, cpu_env, tmp, tmp2); \ | |
3536 | 3517 | break; \ |
3537 | 3518 | case 2: \ |
3538 | - gen_helper_neon_##name##_s16(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); \ | |
3519 | + gen_helper_neon_##name##_s16(tmp, cpu_env, tmp, tmp2); \ | |
3539 | 3520 | break; \ |
3540 | 3521 | case 3: \ |
3541 | - gen_helper_neon_##name##_u16(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); \ | |
3522 | + gen_helper_neon_##name##_u16(tmp, cpu_env, tmp, tmp2); \ | |
3542 | 3523 | break; \ |
3543 | 3524 | case 4: \ |
3544 | - gen_helper_neon_##name##_s32(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); \ | |
3525 | + gen_helper_neon_##name##_s32(tmp, cpu_env, tmp, tmp2); \ | |
3545 | 3526 | break; \ |
3546 | 3527 | case 5: \ |
3547 | - gen_helper_neon_##name##_u32(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); \ | |
3528 | + gen_helper_neon_##name##_u32(tmp, cpu_env, tmp, tmp2); \ | |
3548 | 3529 | break; \ |
3549 | 3530 | default: return 1; \ |
3550 | 3531 | }} while (0) |
... | ... | @@ -3552,73 +3533,53 @@ static inline void gen_neon_rsb(int size) |
3552 | 3533 | #define GEN_NEON_INTEGER_OP(name) do { \ |
3553 | 3534 | switch ((size << 1) | u) { \ |
3554 | 3535 | case 0: \ |
3555 | - gen_helper_neon_##name##_s8(cpu_T[0], cpu_T[0], cpu_T[1]); \ | |
3536 | + gen_helper_neon_##name##_s8(tmp, tmp, tmp2); \ | |
3556 | 3537 | break; \ |
3557 | 3538 | case 1: \ |
3558 | - gen_helper_neon_##name##_u8(cpu_T[0], cpu_T[0], cpu_T[1]); \ | |
3539 | + gen_helper_neon_##name##_u8(tmp, tmp, tmp2); \ | |
3559 | 3540 | break; \ |
3560 | 3541 | case 2: \ |
3561 | - gen_helper_neon_##name##_s16(cpu_T[0], cpu_T[0], cpu_T[1]); \ | |
3542 | + gen_helper_neon_##name##_s16(tmp, tmp, tmp2); \ | |
3562 | 3543 | break; \ |
3563 | 3544 | case 3: \ |
3564 | - gen_helper_neon_##name##_u16(cpu_T[0], cpu_T[0], cpu_T[1]); \ | |
3545 | + gen_helper_neon_##name##_u16(tmp, tmp, tmp2); \ | |
3565 | 3546 | break; \ |
3566 | 3547 | case 4: \ |
3567 | - gen_helper_neon_##name##_s32(cpu_T[0], cpu_T[0], cpu_T[1]); \ | |
3548 | + gen_helper_neon_##name##_s32(tmp, tmp, tmp2); \ | |
3568 | 3549 | break; \ |
3569 | 3550 | case 5: \ |
3570 | - gen_helper_neon_##name##_u32(cpu_T[0], cpu_T[0], cpu_T[1]); \ | |
3551 | + gen_helper_neon_##name##_u32(tmp, tmp, tmp2); \ | |
3571 | 3552 | break; \ |
3572 | 3553 | default: return 1; \ |
3573 | 3554 | }} while (0) |
3574 | 3555 | |
3575 | -static inline void | |
3576 | -gen_neon_movl_scratch_T0(int scratch) | |
3556 | +static TCGv neon_load_scratch(int scratch) | |
3577 | 3557 | { |
3578 | - uint32_t offset; | |
3579 | - | |
3580 | - offset = offsetof(CPUARMState, vfp.scratch[scratch]); | |
3581 | - tcg_gen_st_i32(cpu_T[0], cpu_env, offset); | |
3582 | -} | |
3583 | - | |
3584 | -static inline void | |
3585 | -gen_neon_movl_scratch_T1(int scratch) | |
3586 | -{ | |
3587 | - uint32_t offset; | |
3588 | - | |
3589 | - offset = offsetof(CPUARMState, vfp.scratch[scratch]); | |
3590 | - tcg_gen_st_i32(cpu_T[1], cpu_env, offset); | |
3591 | -} | |
3592 | - | |
3593 | -static inline void | |
3594 | -gen_neon_movl_T0_scratch(int scratch) | |
3595 | -{ | |
3596 | - uint32_t offset; | |
3597 | - | |
3598 | - offset = offsetof(CPUARMState, vfp.scratch[scratch]); | |
3599 | - tcg_gen_ld_i32(cpu_T[0], cpu_env, offset); | |
3558 | + TCGv tmp = new_tmp(); | |
3559 | + tcg_gen_ld_i32(tmp, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch])); | |
3560 | + return tmp; | |
3600 | 3561 | } |
3601 | - | |
3602 | -static inline void | |
3603 | -gen_neon_movl_T1_scratch(int scratch) | |
3562 | + | |
3563 | +static void neon_store_scratch(int scratch, TCGv var) | |
3604 | 3564 | { |
3605 | - uint32_t offset; | |
3606 | - | |
3607 | - offset = offsetof(CPUARMState, vfp.scratch[scratch]); | |
3608 | - tcg_gen_ld_i32(cpu_T[1], cpu_env, offset); | |
3565 | + tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch])); | |
3566 | + dead_tmp(var); | |
3609 | 3567 | } |
3610 | - | |
3611 | -static inline void gen_neon_get_scalar(int size, int reg) | |
3568 | + | |
3569 | +static inline TCGv neon_get_scalar(int size, int reg) | |
3612 | 3570 | { |
3571 | + TCGv tmp; | |
3613 | 3572 | if (size == 1) { |
3614 | - NEON_GET_REG(T0, reg >> 1, reg & 1); | |
3573 | + tmp = neon_load_reg(reg >> 1, reg & 1); | |
3615 | 3574 | } else { |
3616 | - NEON_GET_REG(T0, reg >> 2, (reg >> 1) & 1); | |
3617 | - if (reg & 1) | |
3618 | - gen_neon_dup_low16(cpu_T[0]); | |
3619 | - else | |
3620 | - gen_neon_dup_high16(cpu_T[0]); | |
3575 | + tmp = neon_load_reg(reg >> 2, (reg >> 1) & 1); | |
3576 | + if (reg & 1) { | |
3577 | + gen_neon_dup_low16(tmp); | |
3578 | + } else { | |
3579 | + gen_neon_dup_high16(tmp); | |
3580 | + } | |
3621 | 3581 | } |
3582 | + return tmp; | |
3622 | 3583 | } |
3623 | 3584 | |
3624 | 3585 | static void gen_neon_unzip_u8(TCGv t0, TCGv t1) |
... | ... | @@ -3715,18 +3676,19 @@ static void gen_neon_zip_u16(TCGv t0, TCGv t1) |
3715 | 3676 | static void gen_neon_unzip(int reg, int q, int tmp, int size) |
3716 | 3677 | { |
3717 | 3678 | int n; |
3679 | + TCGv t0, t1; | |
3718 | 3680 | |
3719 | 3681 | for (n = 0; n < q + 1; n += 2) { |
3720 | - NEON_GET_REG(T0, reg, n); | |
3721 | - NEON_GET_REG(T1, reg, n + 1); | |
3682 | + t0 = neon_load_reg(reg, n); | |
3683 | + t1 = neon_load_reg(reg, n + 1); | |
3722 | 3684 | switch (size) { |
3723 | - case 0: gen_neon_unzip_u8(cpu_T[0], cpu_T[1]); break; | |
3724 | - case 1: gen_neon_zip_u16(cpu_T[0], cpu_T[1]); break; /* zip and unzip are the same. */ | |
3685 | + case 0: gen_neon_unzip_u8(t0, t1); break; | |
3686 | + case 1: gen_neon_zip_u16(t0, t1); break; /* zip and unzip are the same. */ | |
3725 | 3687 | case 2: /* no-op */; break; |
3726 | 3688 | default: abort(); |
3727 | 3689 | } |
3728 | - gen_neon_movl_T0_scratch(tmp + n); | |
3729 | - gen_neon_movl_T1_scratch(tmp + n + 1); | |
3690 | + neon_store_scratch(tmp + n, t0); | |
3691 | + neon_store_scratch(tmp + n + 1, t1); | |
3730 | 3692 | } |
3731 | 3693 | } |
3732 | 3694 | |
... | ... | @@ -4160,10 +4122,6 @@ static inline void gen_neon_mull(TCGv_i64 dest, TCGv a, TCGv b, int size, int u) |
4160 | 4122 | break; |
4161 | 4123 | default: abort(); |
4162 | 4124 | } |
4163 | - if (size < 2) { | |
4164 | - dead_tmp(b); | |
4165 | - dead_tmp(a); | |
4166 | - } | |
4167 | 4125 | } |
4168 | 4126 | |
4169 | 4127 | /* Translate a NEON data processing instruction. Return nonzero if the |
... | ... | @@ -4296,6 +4254,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) |
4296 | 4254 | pairwise = 0; |
4297 | 4255 | break; |
4298 | 4256 | } |
4257 | + | |
4299 | 4258 | for (pass = 0; pass < (q ? 4 : 2); pass++) { |
4300 | 4259 | |
4301 | 4260 | if (pairwise) { |
... | ... | @@ -4305,16 +4264,16 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) |
4305 | 4264 | else |
4306 | 4265 | n = 0; |
4307 | 4266 | if (pass < q + 1) { |
4308 | - NEON_GET_REG(T0, rn, n); | |
4309 | - NEON_GET_REG(T1, rn, n + 1); | |
4267 | + tmp = neon_load_reg(rn, n); | |
4268 | + tmp2 = neon_load_reg(rn, n + 1); | |
4310 | 4269 | } else { |
4311 | - NEON_GET_REG(T0, rm, n); | |
4312 | - NEON_GET_REG(T1, rm, n + 1); | |
4270 | + tmp = neon_load_reg(rm, n); | |
4271 | + tmp2 = neon_load_reg(rm, n + 1); | |
4313 | 4272 | } |
4314 | 4273 | } else { |
4315 | 4274 | /* Elementwise. */ |
4316 | - NEON_GET_REG(T0, rn, pass); | |
4317 | - NEON_GET_REG(T1, rm, pass); | |
4275 | + tmp = neon_load_reg(rn, pass); | |
4276 | + tmp2 = neon_load_reg(rm, pass); | |
4318 | 4277 | } |
4319 | 4278 | switch (op) { |
4320 | 4279 | case 0: /* VHADD */ |
... | ... | @@ -4329,35 +4288,35 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) |
4329 | 4288 | case 3: /* Logic ops. */ |
4330 | 4289 | switch ((u << 2) | size) { |
4331 | 4290 | case 0: /* VAND */ |
4332 | - gen_op_andl_T0_T1(); | |
4291 | + tcg_gen_and_i32(tmp, tmp, tmp2); | |
4333 | 4292 | break; |
4334 | 4293 | case 1: /* BIC */ |
4335 | - gen_op_bicl_T0_T1(); | |
4294 | + tcg_gen_bic_i32(tmp, tmp, tmp2); | |
4336 | 4295 | break; |
4337 | 4296 | case 2: /* VORR */ |
4338 | - gen_op_orl_T0_T1(); | |
4297 | + tcg_gen_or_i32(tmp, tmp, tmp2); | |
4339 | 4298 | break; |
4340 | 4299 | case 3: /* VORN */ |
4341 | - gen_op_notl_T1(); | |
4342 | - gen_op_orl_T0_T1(); | |
4300 | + tcg_gen_not_i32(tmp2, tmp2); | |
4301 | + tcg_gen_or_i32(tmp, tmp, tmp2); | |
4343 | 4302 | break; |
4344 | 4303 | case 4: /* VEOR */ |
4345 | - gen_op_xorl_T0_T1(); | |
4304 | + tcg_gen_xor_i32(tmp, tmp, tmp2); | |
4346 | 4305 | break; |
4347 | 4306 | case 5: /* VBSL */ |
4348 | - tmp = neon_load_reg(rd, pass); | |
4349 | - gen_neon_bsl(cpu_T[0], cpu_T[0], cpu_T[1], tmp); | |
4350 | - dead_tmp(tmp); | |
4307 | + tmp3 = neon_load_reg(rd, pass); | |
4308 | + gen_neon_bsl(tmp, tmp, tmp2, tmp3); | |
4309 | + dead_tmp(tmp3); | |
4351 | 4310 | break; |
4352 | 4311 | case 6: /* VBIT */ |
4353 | - tmp = neon_load_reg(rd, pass); | |
4354 | - gen_neon_bsl(cpu_T[0], cpu_T[0], tmp, cpu_T[1]); | |
4355 | - dead_tmp(tmp); | |
4312 | + tmp3 = neon_load_reg(rd, pass); | |
4313 | + gen_neon_bsl(tmp, tmp, tmp3, tmp2); | |
4314 | + dead_tmp(tmp3); | |
4356 | 4315 | break; |
4357 | 4316 | case 7: /* VBIF */ |
4358 | - tmp = neon_load_reg(rd, pass); | |
4359 | - gen_neon_bsl(cpu_T[0], tmp, cpu_T[0], cpu_T[1]); | |
4360 | - dead_tmp(tmp); | |
4317 | + tmp3 = neon_load_reg(rd, pass); | |
4318 | + gen_neon_bsl(tmp, tmp3, tmp, tmp2); | |
4319 | + dead_tmp(tmp3); | |
4361 | 4320 | break; |
4362 | 4321 | } |
4363 | 4322 | break; |
... | ... | @@ -4396,18 +4355,19 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) |
4396 | 4355 | break; |
4397 | 4356 | case 15: /* VABA */ |
4398 | 4357 | GEN_NEON_INTEGER_OP(abd); |
4399 | - NEON_GET_REG(T1, rd, pass); | |
4400 | - gen_neon_add(size); | |
4358 | + dead_tmp(tmp2); | |
4359 | + tmp2 = neon_load_reg(rd, pass); | |
4360 | + gen_neon_add(size, tmp, tmp2); | |
4401 | 4361 | break; |
4402 | 4362 | case 16: |
4403 | 4363 | if (!u) { /* VADD */ |
4404 | - if (gen_neon_add(size)) | |
4364 | + if (gen_neon_add(size, tmp, tmp2)) | |
4405 | 4365 | return 1; |
4406 | 4366 | } else { /* VSUB */ |
4407 | 4367 | switch (size) { |
4408 | - case 0: gen_helper_neon_sub_u8(CPU_T001); break; | |
4409 | - case 1: gen_helper_neon_sub_u16(CPU_T001); break; | |
4410 | - case 2: gen_op_subl_T0_T1(); break; | |
4368 | + case 0: gen_helper_neon_sub_u8(tmp, tmp, tmp2); break; | |
4369 | + case 1: gen_helper_neon_sub_u16(tmp, tmp, tmp2); break; | |
4370 | + case 2: tcg_gen_sub_i32(tmp, tmp, tmp2); break; | |
4411 | 4371 | default: return 1; |
4412 | 4372 | } |
4413 | 4373 | } |
... | ... | @@ -4415,42 +4375,43 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) |
4415 | 4375 | case 17: |
4416 | 4376 | if (!u) { /* VTST */ |
4417 | 4377 | switch (size) { |
4418 | - case 0: gen_helper_neon_tst_u8(CPU_T001); break; | |
4419 | - case 1: gen_helper_neon_tst_u16(CPU_T001); break; | |
4420 | - case 2: gen_helper_neon_tst_u32(CPU_T001); break; | |
4378 | + case 0: gen_helper_neon_tst_u8(tmp, tmp, tmp2); break; | |
4379 | + case 1: gen_helper_neon_tst_u16(tmp, tmp, tmp2); break; | |
4380 | + case 2: gen_helper_neon_tst_u32(tmp, tmp, tmp2); break; | |
4421 | 4381 | default: return 1; |
4422 | 4382 | } |
4423 | 4383 | } else { /* VCEQ */ |
4424 | 4384 | switch (size) { |
4425 | - case 0: gen_helper_neon_ceq_u8(CPU_T001); break; | |
4426 | - case 1: gen_helper_neon_ceq_u16(CPU_T001); break; | |
4427 | - case 2: gen_helper_neon_ceq_u32(CPU_T001); break; | |
4385 | + case 0: gen_helper_neon_ceq_u8(tmp, tmp, tmp2); break; | |
4386 | + case 1: gen_helper_neon_ceq_u16(tmp, tmp, tmp2); break; | |
4387 | + case 2: gen_helper_neon_ceq_u32(tmp, tmp, tmp2); break; | |
4428 | 4388 | default: return 1; |
4429 | 4389 | } |
4430 | 4390 | } |
4431 | 4391 | break; |
4432 | 4392 | case 18: /* Multiply. */ |
4433 | 4393 | switch (size) { |
4434 | - case 0: gen_helper_neon_mul_u8(CPU_T001); break; | |
4435 | - case 1: gen_helper_neon_mul_u16(CPU_T001); break; | |
4436 | - case 2: gen_op_mul_T0_T1(); break; | |
4394 | + case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break; | |
4395 | + case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break; | |
4396 | + case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break; | |
4437 | 4397 | default: return 1; |
4438 | 4398 | } |
4439 | - NEON_GET_REG(T1, rd, pass); | |
4399 | + dead_tmp(tmp2); | |
4400 | + tmp2 = neon_load_reg(rd, pass); | |
4440 | 4401 | if (u) { /* VMLS */ |
4441 | - gen_neon_rsb(size); | |
4402 | + gen_neon_rsb(size, tmp, tmp2); | |
4442 | 4403 | } else { /* VMLA */ |
4443 | - gen_neon_add(size); | |
4404 | + gen_neon_add(size, tmp, tmp2); | |
4444 | 4405 | } |
4445 | 4406 | break; |
4446 | 4407 | case 19: /* VMUL */ |
4447 | 4408 | if (u) { /* polynomial */ |
4448 | - gen_helper_neon_mul_p8(CPU_T001); | |
4409 | + gen_helper_neon_mul_p8(tmp, tmp, tmp2); | |
4449 | 4410 | } else { /* Integer */ |
4450 | 4411 | switch (size) { |
4451 | - case 0: gen_helper_neon_mul_u8(CPU_T001); break; | |
4452 | - case 1: gen_helper_neon_mul_u16(CPU_T001); break; | |
4453 | - case 2: gen_op_mul_T0_T1(); break; | |
4412 | + case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break; | |
4413 | + case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break; | |
4414 | + case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break; | |
4454 | 4415 | default: return 1; |
4455 | 4416 | } |
4456 | 4417 | } |
... | ... | @@ -4464,14 +4425,14 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) |
4464 | 4425 | case 22: /* Hultiply high. */ |
4465 | 4426 | if (!u) { /* VQDMULH */ |
4466 | 4427 | switch (size) { |
4467 | - case 1: gen_helper_neon_qdmulh_s16(CPU_T0E01); break; | |
4468 | - case 2: gen_helper_neon_qdmulh_s32(CPU_T0E01); break; | |
4428 | + case 1: gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2); break; | |
4429 | + case 2: gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2); break; | |
4469 | 4430 | default: return 1; |
4470 | 4431 | } |
4471 | 4432 | } else { /* VQRDHMUL */ |
4472 | 4433 | switch (size) { |
4473 | - case 1: gen_helper_neon_qrdmulh_s16(CPU_T0E01); break; | |
4474 | - case 2: gen_helper_neon_qrdmulh_s32(CPU_T0E01); break; | |
4434 | + case 1: gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2); break; | |
4435 | + case 2: gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2); break; | |
4475 | 4436 | default: return 1; |
4476 | 4437 | } |
4477 | 4438 | } |
... | ... | @@ -4480,88 +4441,91 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) |
4480 | 4441 | if (u) |
4481 | 4442 | return 1; |
4482 | 4443 | switch (size) { |
4483 | - case 0: gen_helper_neon_padd_u8(CPU_T001); break; | |
4484 | - case 1: gen_helper_neon_padd_u16(CPU_T001); break; | |
4485 | - case 2: gen_op_addl_T0_T1(); break; | |
4444 | + case 0: gen_helper_neon_padd_u8(tmp, tmp, tmp2); break; | |
4445 | + case 1: gen_helper_neon_padd_u16(tmp, tmp, tmp2); break; | |
4446 | + case 2: tcg_gen_add_i32(tmp, tmp, tmp2); break; | |
4486 | 4447 | default: return 1; |
4487 | 4448 | } |
4488 | 4449 | break; |
4489 | 4450 | case 26: /* Floating point arithnetic. */ |
4490 | 4451 | switch ((u << 2) | size) { |
4491 | 4452 | case 0: /* VADD */ |
4492 | - gen_helper_neon_add_f32(CPU_T001); | |
4453 | + gen_helper_neon_add_f32(tmp, tmp, tmp2); | |
4493 | 4454 | break; |
4494 | 4455 | case 2: /* VSUB */ |
4495 | - gen_helper_neon_sub_f32(CPU_T001); | |
4456 | + gen_helper_neon_sub_f32(tmp, tmp, tmp2); | |
4496 | 4457 | break; |
4497 | 4458 | case 4: /* VPADD */ |
4498 | - gen_helper_neon_add_f32(CPU_T001); | |
4459 | + gen_helper_neon_add_f32(tmp, tmp, tmp2); | |
4499 | 4460 | break; |
4500 | 4461 | case 6: /* VABD */ |
4501 | - gen_helper_neon_abd_f32(CPU_T001); | |
4462 | + gen_helper_neon_abd_f32(tmp, tmp, tmp2); | |
4502 | 4463 | break; |
4503 | 4464 | default: |
4504 | 4465 | return 1; |
4505 | 4466 | } |
4506 | 4467 | break; |
4507 | 4468 | case 27: /* Float multiply. */ |
4508 | - gen_helper_neon_mul_f32(CPU_T001); | |
4469 | + gen_helper_neon_mul_f32(tmp, tmp, tmp2); | |
4509 | 4470 | if (!u) { |
4510 | - NEON_GET_REG(T1, rd, pass); | |
4471 | + dead_tmp(tmp2); | |
4472 | + tmp2 = neon_load_reg(rd, pass); | |
4511 | 4473 | if (size == 0) { |
4512 | - gen_helper_neon_add_f32(CPU_T001); | |
4474 | + gen_helper_neon_add_f32(tmp, tmp, tmp2); | |
4513 | 4475 | } else { |
4514 | - gen_helper_neon_sub_f32(cpu_T[0], cpu_T[1], cpu_T[0]); | |
4476 | + gen_helper_neon_sub_f32(tmp, tmp2, tmp); | |
4515 | 4477 | } |
4516 | 4478 | } |
4517 | 4479 | break; |
4518 | 4480 | case 28: /* Float compare. */ |
4519 | 4481 | if (!u) { |
4520 | - gen_helper_neon_ceq_f32(CPU_T001); | |
4482 | + gen_helper_neon_ceq_f32(tmp, tmp, tmp2); | |
4521 | 4483 | } else { |
4522 | 4484 | if (size == 0) |
4523 | - gen_helper_neon_cge_f32(CPU_T001); | |
4485 | + gen_helper_neon_cge_f32(tmp, tmp, tmp2); | |
4524 | 4486 | else |
4525 | - gen_helper_neon_cgt_f32(CPU_T001); | |
4487 | + gen_helper_neon_cgt_f32(tmp, tmp, tmp2); | |
4526 | 4488 | } |
4527 | 4489 | break; |
4528 | 4490 | case 29: /* Float compare absolute. */ |
4529 | 4491 | if (!u) |
4530 | 4492 | return 1; |
4531 | 4493 | if (size == 0) |
4532 | - gen_helper_neon_acge_f32(CPU_T001); | |
4494 | + gen_helper_neon_acge_f32(tmp, tmp, tmp2); | |
4533 | 4495 | else |
4534 | - gen_helper_neon_acgt_f32(CPU_T001); | |
4496 | + gen_helper_neon_acgt_f32(tmp, tmp, tmp2); | |
4535 | 4497 | break; |
4536 | 4498 | case 30: /* Float min/max. */ |
4537 | 4499 | if (size == 0) |
4538 | - gen_helper_neon_max_f32(CPU_T001); | |
4500 | + gen_helper_neon_max_f32(tmp, tmp, tmp2); | |
4539 | 4501 | else |
4540 | - gen_helper_neon_min_f32(CPU_T001); | |
4502 | + gen_helper_neon_min_f32(tmp, tmp, tmp2); | |
4541 | 4503 | break; |
4542 | 4504 | case 31: |
4543 | 4505 | if (size == 0) |
4544 | - gen_helper_recps_f32(cpu_T[0], cpu_T[0], cpu_T[1], cpu_env); | |
4506 | + gen_helper_recps_f32(tmp, tmp, tmp2, cpu_env); | |
4545 | 4507 | else |
4546 | - gen_helper_rsqrts_f32(cpu_T[0], cpu_T[0], cpu_T[1], cpu_env); | |
4508 | + gen_helper_rsqrts_f32(tmp, tmp, tmp2, cpu_env); | |
4547 | 4509 | break; |
4548 | 4510 | default: |
4549 | 4511 | abort(); |
4550 | 4512 | } |
4513 | + dead_tmp(tmp2); | |
4514 | + | |
4551 | 4515 | /* Save the result. For elementwise operations we can put it |
4552 | 4516 | straight into the destination register. For pairwise operations |
4553 | 4517 | we have to be careful to avoid clobbering the source operands. */ |
4554 | 4518 | if (pairwise && rd == rm) { |
4555 | - gen_neon_movl_scratch_T0(pass); | |
4519 | + neon_store_scratch(pass, tmp); | |
4556 | 4520 | } else { |
4557 | - NEON_SET_REG(T0, rd, pass); | |
4521 | + neon_store_reg(rd, pass, tmp); | |
4558 | 4522 | } |
4559 | 4523 | |
4560 | 4524 | } /* for pass */ |
4561 | 4525 | if (pairwise && rd == rm) { |
4562 | 4526 | for (pass = 0; pass < (q ? 4 : 2); pass++) { |
4563 | - gen_neon_movl_T0_scratch(pass); | |
4564 | - NEON_SET_REG(T0, rd, pass); | |
4527 | + tmp = neon_load_scratch(pass); | |
4528 | + neon_store_reg(rd, pass, tmp); | |
4565 | 4529 | } |
4566 | 4530 | } |
4567 | 4531 | /* End of 3 register same size operations. */ |
... | ... | @@ -4658,8 +4622,9 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) |
4658 | 4622 | neon_store_reg64(cpu_V0, rd + pass); |
4659 | 4623 | } else { /* size < 3 */ |
4660 | 4624 | /* Operands in T0 and T1. */ |
4661 | - gen_op_movl_T1_im(imm); | |
4662 | - NEON_GET_REG(T0, rm, pass); | |
4625 | + tmp = neon_load_reg(rm, pass); | |
4626 | + tmp2 = new_tmp(); | |
4627 | + tcg_gen_movi_i32(tmp2, imm); | |
4663 | 4628 | switch (op) { |
4664 | 4629 | case 0: /* VSHR */ |
4665 | 4630 | case 1: /* VSRA */ |
... | ... | @@ -4676,9 +4641,9 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) |
4676 | 4641 | break; |
4677 | 4642 | case 5: /* VSHL, VSLI */ |
4678 | 4643 | switch (size) { |
4679 | - case 0: gen_helper_neon_shl_u8(CPU_T001); break; | |
4680 | - case 1: gen_helper_neon_shl_u16(CPU_T001); break; | |
4681 | - case 2: gen_helper_neon_shl_u32(CPU_T001); break; | |
4644 | + case 0: gen_helper_neon_shl_u8(tmp, tmp, tmp2); break; | |
4645 | + case 1: gen_helper_neon_shl_u16(tmp, tmp, tmp2); break; | |
4646 | + case 2: gen_helper_neon_shl_u32(tmp, tmp, tmp2); break; | |
4682 | 4647 | default: return 1; |
4683 | 4648 | } |
4684 | 4649 | break; |
... | ... | @@ -4687,18 +4652,20 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) |
4687 | 4652 | break; |
4688 | 4653 | case 7: /* VQSHLU */ |
4689 | 4654 | switch (size) { |
4690 | - case 0: gen_helper_neon_qshl_u8(CPU_T0E01); break; | |
4691 | - case 1: gen_helper_neon_qshl_u16(CPU_T0E01); break; | |
4692 | - case 2: gen_helper_neon_qshl_u32(CPU_T0E01); break; | |
4655 | + case 0: gen_helper_neon_qshl_u8(tmp, cpu_env, tmp, tmp2); break; | |
4656 | + case 1: gen_helper_neon_qshl_u16(tmp, cpu_env, tmp, tmp2); break; | |
4657 | + case 2: gen_helper_neon_qshl_u32(tmp, cpu_env, tmp, tmp2); break; | |
4693 | 4658 | default: return 1; |
4694 | 4659 | } |
4695 | 4660 | break; |
4696 | 4661 | } |
4662 | + dead_tmp(tmp2); | |
4697 | 4663 | |
4698 | 4664 | if (op == 1 || op == 3) { |
4699 | 4665 | /* Accumulate. */ |
4700 | - NEON_GET_REG(T1, rd, pass); | |
4701 | - gen_neon_add(size); | |
4666 | + tmp2 = neon_load_reg(rd, pass); | |
4667 | + gen_neon_add(size, tmp2, tmp); | |
4668 | + dead_tmp(tmp2); | |
4702 | 4669 | } else if (op == 4 || (op == 5 && u)) { |
4703 | 4670 | /* Insert */ |
4704 | 4671 | switch (size) { |
... | ... | @@ -4726,12 +4693,13 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) |
4726 | 4693 | default: |
4727 | 4694 | abort(); |
4728 | 4695 | } |
4729 | - tmp = neon_load_reg(rd, pass); | |
4730 | - tcg_gen_andi_i32(cpu_T[0], cpu_T[0], imm); | |
4731 | - tcg_gen_andi_i32(tmp, tmp, ~imm); | |
4732 | - tcg_gen_or_i32(cpu_T[0], cpu_T[0], tmp); | |
4696 | + tmp2 = neon_load_reg(rd, pass); | |
4697 | + tcg_gen_andi_i32(tmp, tmp, imm); | |
4698 | + tcg_gen_andi_i32(tmp2, tmp2, ~imm); | |
4699 | + tcg_gen_or_i32(tmp, tmp, tmp2); | |
4700 | + dead_tmp(tmp2); | |
4733 | 4701 | } |
4734 | - NEON_SET_REG(T0, rd, pass); | |
4702 | + neon_store_reg(rd, pass, tmp); | |
4735 | 4703 | } |
4736 | 4704 | } /* for pass */ |
4737 | 4705 | } else if (op < 10) { |
... | ... | @@ -4893,9 +4861,6 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) |
4893 | 4861 | if (invert) |
4894 | 4862 | imm = ~imm; |
4895 | 4863 | |
4896 | - if (op != 14 || !invert) | |
4897 | - gen_op_movl_T1_im(imm); | |
4898 | - | |
4899 | 4864 | for (pass = 0; pass < (q ? 4 : 2); pass++) { |
4900 | 4865 | if (op & 1 && op < 12) { |
4901 | 4866 | tmp = neon_load_reg(rd, pass); |
... | ... | @@ -4962,11 +4927,11 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) |
4962 | 4927 | always aligned so will never overlap with wide |
4963 | 4928 | destinations in problematic ways. */ |
4964 | 4929 | if (rd == rm && !src2_wide) { |
4965 | - NEON_GET_REG(T0, rm, 1); | |
4966 | - gen_neon_movl_scratch_T0(2); | |
4930 | + tmp = neon_load_reg(rm, 1); | |
4931 | + neon_store_scratch(2, tmp); | |
4967 | 4932 | } else if (rd == rn && !src1_wide) { |
4968 | - NEON_GET_REG(T0, rn, 1); | |
4969 | - gen_neon_movl_scratch_T0(2); | |
4933 | + tmp = neon_load_reg(rn, 1); | |
4934 | + neon_store_scratch(2, tmp); | |
4970 | 4935 | } |
4971 | 4936 | TCGV_UNUSED(tmp3); |
4972 | 4937 | for (pass = 0; pass < 2; pass++) { |
... | ... | @@ -4975,9 +4940,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) |
4975 | 4940 | TCGV_UNUSED(tmp); |
4976 | 4941 | } else { |
4977 | 4942 | if (pass == 1 && rd == rn) { |
4978 | - gen_neon_movl_T0_scratch(2); | |
4979 | - tmp = new_tmp(); | |
4980 | - tcg_gen_mov_i32(tmp, cpu_T[0]); | |
4943 | + tmp = neon_load_scratch(2); | |
4981 | 4944 | } else { |
4982 | 4945 | tmp = neon_load_reg(rn, pass); |
4983 | 4946 | } |
... | ... | @@ -4990,9 +4953,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) |
4990 | 4953 | TCGV_UNUSED(tmp2); |
4991 | 4954 | } else { |
4992 | 4955 | if (pass == 1 && rd == rm) { |
4993 | - gen_neon_movl_T0_scratch(2); | |
4994 | - tmp2 = new_tmp(); | |
4995 | - tcg_gen_mov_i32(tmp2, cpu_T[0]); | |
4956 | + tmp2 = neon_load_scratch(2); | |
4996 | 4957 | } else { |
4997 | 4958 | tmp2 = neon_load_reg(rm, pass); |
4998 | 4959 | } |
... | ... | @@ -5035,6 +4996,8 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) |
5035 | 4996 | case 8: case 9: case 10: case 11: case 12: case 13: |
5036 | 4997 | /* VMLAL, VQDMLAL, VMLSL, VQDMLSL, VMULL, VQDMULL */ |
5037 | 4998 | gen_neon_mull(cpu_V0, tmp, tmp2, size, u); |
4999 | + dead_tmp(tmp2); | |
5000 | + dead_tmp(tmp); | |
5038 | 5001 | break; |
5039 | 5002 | case 14: /* Polynomial VMULL */ |
5040 | 5003 | cpu_abort(env, "Polynomial VMULL not implemented"); |
... | ... | @@ -5123,55 +5086,56 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) |
5123 | 5086 | case 9: /* Floating point VMUL scalar */ |
5124 | 5087 | case 12: /* VQDMULH scalar */ |
5125 | 5088 | case 13: /* VQRDMULH scalar */ |
5126 | - gen_neon_get_scalar(size, rm); | |
5127 | - gen_neon_movl_scratch_T0(0); | |
5089 | + tmp = neon_get_scalar(size, rm); | |
5090 | + neon_store_scratch(0, tmp); | |
5128 | 5091 | for (pass = 0; pass < (u ? 4 : 2); pass++) { |
5129 | - if (pass != 0) | |
5130 | - gen_neon_movl_T0_scratch(0); | |
5131 | - NEON_GET_REG(T1, rn, pass); | |
5092 | + tmp = neon_load_scratch(0); | |
5093 | + tmp2 = neon_load_reg(rn, pass); | |
5132 | 5094 | if (op == 12) { |
5133 | 5095 | if (size == 1) { |
5134 | - gen_helper_neon_qdmulh_s16(CPU_T0E01); | |
5096 | + gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2); | |
5135 | 5097 | } else { |
5136 | - gen_helper_neon_qdmulh_s32(CPU_T0E01); | |
5098 | + gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2); | |
5137 | 5099 | } |
5138 | 5100 | } else if (op == 13) { |
5139 | 5101 | if (size == 1) { |
5140 | - gen_helper_neon_qrdmulh_s16(CPU_T0E01); | |
5102 | + gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2); | |
5141 | 5103 | } else { |
5142 | - gen_helper_neon_qrdmulh_s32(CPU_T0E01); | |
5104 | + gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2); | |
5143 | 5105 | } |
5144 | 5106 | } else if (op & 1) { |
5145 | - gen_helper_neon_mul_f32(CPU_T001); | |
5107 | + gen_helper_neon_mul_f32(tmp, tmp, tmp2); | |
5146 | 5108 | } else { |
5147 | 5109 | switch (size) { |
5148 | - case 0: gen_helper_neon_mul_u8(CPU_T001); break; | |
5149 | - case 1: gen_helper_neon_mul_u16(CPU_T001); break; | |
5150 | - case 2: gen_op_mul_T0_T1(); break; | |
5110 | + case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break; | |
5111 | + case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break; | |
5112 | + case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break; | |
5151 | 5113 | default: return 1; |
5152 | 5114 | } |
5153 | 5115 | } |
5116 | + dead_tmp(tmp2); | |
5154 | 5117 | if (op < 8) { |
5155 | 5118 | /* Accumulate. */ |
5156 | - NEON_GET_REG(T1, rd, pass); | |
5119 | + tmp2 = neon_load_reg(rd, pass); | |
5157 | 5120 | switch (op) { |
5158 | 5121 | case 0: |
5159 | - gen_neon_add(size); | |
5122 | + gen_neon_add(size, tmp, tmp2); | |
5160 | 5123 | break; |
5161 | 5124 | case 1: |
5162 | - gen_helper_neon_add_f32(CPU_T001); | |
5125 | + gen_helper_neon_add_f32(tmp, tmp, tmp2); | |
5163 | 5126 | break; |
5164 | 5127 | case 4: |
5165 | - gen_neon_rsb(size); | |
5128 | + gen_neon_rsb(size, tmp, tmp2); | |
5166 | 5129 | break; |
5167 | 5130 | case 5: |
5168 | - gen_helper_neon_sub_f32(cpu_T[0], cpu_T[1], cpu_T[0]); | |
5131 | + gen_helper_neon_sub_f32(tmp, tmp2, tmp); | |
5169 | 5132 | break; |
5170 | 5133 | default: |
5171 | 5134 | abort(); |
5172 | 5135 | } |
5136 | + dead_tmp(tmp2); | |
5173 | 5137 | } |
5174 | - NEON_SET_REG(T0, rd, pass); | |
5138 | + neon_store_reg(rd, pass, tmp); | |
5175 | 5139 | } |
5176 | 5140 | break; |
5177 | 5141 | case 2: /* VMLAL sclar */ |
... | ... | @@ -5183,19 +5147,17 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) |
5183 | 5147 | if (size == 0 && (op == 3 || op == 7 || op == 11)) |
5184 | 5148 | return 1; |
5185 | 5149 | |
5186 | - gen_neon_get_scalar(size, rm); | |
5187 | - NEON_GET_REG(T1, rn, 1); | |
5150 | + tmp2 = neon_get_scalar(size, rm); | |
5151 | + tmp3 = neon_load_reg(rn, 1); | |
5188 | 5152 | |
5189 | 5153 | for (pass = 0; pass < 2; pass++) { |
5190 | 5154 | if (pass == 0) { |
5191 | 5155 | tmp = neon_load_reg(rn, 0); |
5192 | 5156 | } else { |
5193 | - tmp = new_tmp(); | |
5194 | - tcg_gen_mov_i32(tmp, cpu_T[1]); | |
5157 | + tmp = tmp3; | |
5195 | 5158 | } |
5196 | - tmp2 = new_tmp(); | |
5197 | - tcg_gen_mov_i32(tmp2, cpu_T[0]); | |
5198 | 5159 | gen_neon_mull(cpu_V0, tmp, tmp2, size, u); |
5160 | + dead_tmp(tmp); | |
5199 | 5161 | if (op == 6 || op == 7) { |
5200 | 5162 | gen_neon_negl(cpu_V0, size); |
5201 | 5163 | } |
... | ... | @@ -5221,6 +5183,9 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) |
5221 | 5183 | } |
5222 | 5184 | neon_store_reg64(cpu_V0, rd + pass); |
5223 | 5185 | } |
5186 | + | |
5187 | + dead_tmp(tmp2); | |
5188 | + | |
5224 | 5189 | break; |
5225 | 5190 | default: /* 14 and 15 are RESERVED */ |
5226 | 5191 | return 1; |
... | ... | @@ -5287,25 +5252,24 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) |
5287 | 5252 | if (size == 3) |
5288 | 5253 | return 1; |
5289 | 5254 | for (pass = 0; pass < (q ? 2 : 1); pass++) { |
5290 | - NEON_GET_REG(T0, rm, pass * 2); | |
5291 | - NEON_GET_REG(T1, rm, pass * 2 + 1); | |
5255 | + tmp = neon_load_reg(rm, pass * 2); | |
5256 | + tmp2 = neon_load_reg(rm, pass * 2 + 1); | |
5292 | 5257 | switch (size) { |
5293 | - case 0: tcg_gen_bswap32_i32(cpu_T[0], cpu_T[0]); break; | |
5294 | - case 1: gen_swap_half(cpu_T[0]); break; | |
5258 | + case 0: tcg_gen_bswap32_i32(tmp, tmp); break; | |
5259 | + case 1: gen_swap_half(tmp); break; | |
5295 | 5260 | case 2: /* no-op */ break; |
5296 | 5261 | default: abort(); |
5297 | 5262 | } |
5298 | - NEON_SET_REG(T0, rd, pass * 2 + 1); | |
5263 | + neon_store_reg(rd, pass * 2 + 1, tmp); | |
5299 | 5264 | if (size == 2) { |
5300 | - NEON_SET_REG(T1, rd, pass * 2); | |
5265 | + neon_store_reg(rd, pass * 2, tmp2); | |
5301 | 5266 | } else { |
5302 | - gen_op_movl_T0_T1(); | |
5303 | 5267 | switch (size) { |
5304 | - case 0: tcg_gen_bswap32_i32(cpu_T[0], cpu_T[0]); break; | |
5305 | - case 1: gen_swap_half(cpu_T[0]); break; | |
5268 | + case 0: tcg_gen_bswap32_i32(tmp2, tmp2); break; | |
5269 | + case 1: gen_swap_half(tmp2); break; | |
5306 | 5270 | default: abort(); |
5307 | 5271 | } |
5308 | - NEON_SET_REG(T0, rd, pass * 2); | |
5272 | + neon_store_reg(rd, pass * 2, tmp2); | |
5309 | 5273 | } |
5310 | 5274 | } |
5311 | 5275 | break; |
... | ... | @@ -5335,10 +5299,10 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) |
5335 | 5299 | case 33: /* VTRN */ |
5336 | 5300 | if (size == 2) { |
5337 | 5301 | for (n = 0; n < (q ? 4 : 2); n += 2) { |
5338 | - NEON_GET_REG(T0, rm, n); | |
5339 | - NEON_GET_REG(T1, rd, n + 1); | |
5340 | - NEON_SET_REG(T1, rm, n); | |
5341 | - NEON_SET_REG(T0, rd, n + 1); | |
5302 | + tmp = neon_load_reg(rm, n); | |
5303 | + tmp2 = neon_load_reg(rd, n + 1); | |
5304 | + neon_store_reg(rm, n, tmp2); | |
5305 | + neon_store_reg(rd, n + 1, tmp); | |
5342 | 5306 | } |
5343 | 5307 | } else { |
5344 | 5308 | goto elementwise; |
... | ... | @@ -5358,16 +5322,16 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) |
5358 | 5322 | {0, 2, 4, 6, 1, 3, 5, 7}; |
5359 | 5323 | for (n = 0; n < 8; n++) { |
5360 | 5324 | int reg = (n < 4) ? rd : rm; |
5361 | - gen_neon_movl_T0_scratch(unzip_order_q[n]); | |
5362 | - NEON_SET_REG(T0, reg, n % 4); | |
5325 | + tmp = neon_load_scratch(unzip_order_q[n]); | |
5326 | + neon_store_reg(reg, n % 4, tmp); | |
5363 | 5327 | } |
5364 | 5328 | } else { |
5365 | 5329 | static int unzip_order[4] = |
5366 | 5330 | {0, 4, 1, 5}; |
5367 | 5331 | for (n = 0; n < 4; n++) { |
5368 | 5332 | int reg = (n < 2) ? rd : rm; |
5369 | - gen_neon_movl_T0_scratch(unzip_order[n]); | |
5370 | - NEON_SET_REG(T0, reg, n % 2); | |
5333 | + tmp = neon_load_scratch(unzip_order[n]); | |
5334 | + neon_store_reg(reg, n % 2, tmp); | |
5371 | 5335 | } |
5372 | 5336 | } |
5373 | 5337 | break; |
... | ... | @@ -5380,21 +5344,21 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) |
5380 | 5344 | return 1; |
5381 | 5345 | count = (q ? 4 : 2); |
5382 | 5346 | for (n = 0; n < count; n++) { |
5383 | - NEON_GET_REG(T0, rd, n); | |
5384 | - NEON_GET_REG(T1, rd, n); | |
5347 | + tmp = neon_load_reg(rd, n); | |
5348 | + tmp2 = neon_load_reg(rd, n); | |
5385 | 5349 | switch (size) { |
5386 | - case 0: gen_neon_zip_u8(cpu_T[0], cpu_T[1]); break; | |
5387 | - case 1: gen_neon_zip_u16(cpu_T[0], cpu_T[1]); break; | |
5350 | + case 0: gen_neon_zip_u8(tmp, tmp2); break; | |
5351 | + case 1: gen_neon_zip_u16(tmp, tmp2); break; | |
5388 | 5352 | case 2: /* no-op */; break; |
5389 | 5353 | default: abort(); |
5390 | 5354 | } |
5391 | - gen_neon_movl_scratch_T0(n * 2); | |
5392 | - gen_neon_movl_scratch_T1(n * 2 + 1); | |
5355 | + neon_store_scratch(n * 2, tmp); | |
5356 | + neon_store_scratch(n * 2 + 1, tmp2); | |
5393 | 5357 | } |
5394 | 5358 | for (n = 0; n < count * 2; n++) { |
5395 | 5359 | int reg = (n < count) ? rd : rm; |
5396 | - gen_neon_movl_T0_scratch(n); | |
5397 | - NEON_SET_REG(T0, reg, n % count); | |
5360 | + tmp = neon_load_scratch(n); | |
5361 | + neon_store_reg(reg, n % count, tmp); | |
5398 | 5362 | } |
5399 | 5363 | break; |
5400 | 5364 | case 36: case 37: /* VMOVN, VQMOVUN, VQMOVN */ |
... | ... | @@ -5437,124 +5401,132 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) |
5437 | 5401 | if (op == 30 || op == 31 || op >= 58) { |
5438 | 5402 | tcg_gen_ld_f32(cpu_F0s, cpu_env, |
5439 | 5403 | neon_reg_offset(rm, pass)); |
5404 | + TCGV_UNUSED(tmp); | |
5440 | 5405 | } else { |
5441 | - NEON_GET_REG(T0, rm, pass); | |
5406 | + tmp = neon_load_reg(rm, pass); | |
5442 | 5407 | } |
5443 | 5408 | switch (op) { |
5444 | 5409 | case 1: /* VREV32 */ |
5445 | 5410 | switch (size) { |
5446 | - case 0: tcg_gen_bswap32_i32(cpu_T[0], cpu_T[0]); break; | |
5447 | - case 1: gen_swap_half(cpu_T[0]); break; | |
5411 | + case 0: tcg_gen_bswap32_i32(tmp, tmp); break; | |
5412 | + case 1: gen_swap_half(tmp); break; | |
5448 | 5413 | default: return 1; |
5449 | 5414 | } |
5450 | 5415 | break; |
5451 | 5416 | case 2: /* VREV16 */ |
5452 | 5417 | if (size != 0) |
5453 | 5418 | return 1; |
5454 | - gen_rev16(cpu_T[0]); | |
5419 | + gen_rev16(tmp); | |
5455 | 5420 | break; |
5456 | 5421 | case 8: /* CLS */ |
5457 | 5422 | switch (size) { |
5458 | - case 0: gen_helper_neon_cls_s8(cpu_T[0], cpu_T[0]); break; | |
5459 | - case 1: gen_helper_neon_cls_s16(cpu_T[0], cpu_T[0]); break; | |
5460 | - case 2: gen_helper_neon_cls_s32(cpu_T[0], cpu_T[0]); break; | |
5423 | + case 0: gen_helper_neon_cls_s8(tmp, tmp); break; | |
5424 | + case 1: gen_helper_neon_cls_s16(tmp, tmp); break; | |
5425 | + case 2: gen_helper_neon_cls_s32(tmp, tmp); break; | |
5461 | 5426 | default: return 1; |
5462 | 5427 | } |
5463 | 5428 | break; |
5464 | 5429 | case 9: /* CLZ */ |
5465 | 5430 | switch (size) { |
5466 | - case 0: gen_helper_neon_clz_u8(cpu_T[0], cpu_T[0]); break; | |
5467 | - case 1: gen_helper_neon_clz_u16(cpu_T[0], cpu_T[0]); break; | |
5468 | - case 2: gen_helper_clz(cpu_T[0], cpu_T[0]); break; | |
5431 | + case 0: gen_helper_neon_clz_u8(tmp, tmp); break; | |
5432 | + case 1: gen_helper_neon_clz_u16(tmp, tmp); break; | |
5433 | + case 2: gen_helper_clz(tmp, tmp); break; | |
5469 | 5434 | default: return 1; |
5470 | 5435 | } |
5471 | 5436 | break; |
5472 | 5437 | case 10: /* CNT */ |
5473 | 5438 | if (size != 0) |
5474 | 5439 | return 1; |
5475 | - gen_helper_neon_cnt_u8(cpu_T[0], cpu_T[0]); | |
5440 | + gen_helper_neon_cnt_u8(tmp, tmp); | |
5476 | 5441 | break; |
5477 | 5442 | case 11: /* VNOT */ |
5478 | 5443 | if (size != 0) |
5479 | 5444 | return 1; |
5480 | - gen_op_notl_T0(); | |
5445 | + tcg_gen_not_i32(tmp, tmp); | |
5481 | 5446 | break; |
5482 | 5447 | case 14: /* VQABS */ |
5483 | 5448 | switch (size) { |
5484 | - case 0: gen_helper_neon_qabs_s8(cpu_T[0], cpu_env, cpu_T[0]); break; | |
5485 | - case 1: gen_helper_neon_qabs_s16(cpu_T[0], cpu_env, cpu_T[0]); break; | |
5486 | - case 2: gen_helper_neon_qabs_s32(cpu_T[0], cpu_env, cpu_T[0]); break; | |
5449 | + case 0: gen_helper_neon_qabs_s8(tmp, cpu_env, tmp); break; | |
5450 | + case 1: gen_helper_neon_qabs_s16(tmp, cpu_env, tmp); break; | |
5451 | + case 2: gen_helper_neon_qabs_s32(tmp, cpu_env, tmp); break; | |
5487 | 5452 | default: return 1; |
5488 | 5453 | } |
5489 | 5454 | break; |
5490 | 5455 | case 15: /* VQNEG */ |
5491 | 5456 | switch (size) { |
5492 | - case 0: gen_helper_neon_qneg_s8(cpu_T[0], cpu_env, cpu_T[0]); break; | |
5493 | - case 1: gen_helper_neon_qneg_s16(cpu_T[0], cpu_env, cpu_T[0]); break; | |
5494 | - case 2: gen_helper_neon_qneg_s32(cpu_T[0], cpu_env, cpu_T[0]); break; | |
5457 | + case 0: gen_helper_neon_qneg_s8(tmp, cpu_env, tmp); break; | |
5458 | + case 1: gen_helper_neon_qneg_s16(tmp, cpu_env, tmp); break; | |
5459 | + case 2: gen_helper_neon_qneg_s32(tmp, cpu_env, tmp); break; | |
5495 | 5460 | default: return 1; |
5496 | 5461 | } |
5497 | 5462 | break; |
5498 | 5463 | case 16: case 19: /* VCGT #0, VCLE #0 */ |
5499 | - gen_op_movl_T1_im(0); | |
5464 | + tmp2 = tcg_const_i32(0); | |
5500 | 5465 | switch(size) { |
5501 | - case 0: gen_helper_neon_cgt_s8(CPU_T001); break; | |
5502 | - case 1: gen_helper_neon_cgt_s16(CPU_T001); break; | |
5503 | - case 2: gen_helper_neon_cgt_s32(CPU_T001); break; | |
5466 | + case 0: gen_helper_neon_cgt_s8(tmp, tmp, tmp2); break; | |
5467 | + case 1: gen_helper_neon_cgt_s16(tmp, tmp, tmp2); break; | |
5468 | + case 2: gen_helper_neon_cgt_s32(tmp, tmp, tmp2); break; | |
5504 | 5469 | default: return 1; |
5505 | 5470 | } |
5471 | + tcg_temp_free(tmp2); | |
5506 | 5472 | if (op == 19) |
5507 | - gen_op_notl_T0(); | |
5473 | + tcg_gen_not_i32(tmp, tmp); | |
5508 | 5474 | break; |
5509 | 5475 | case 17: case 20: /* VCGE #0, VCLT #0 */ |
5510 | - gen_op_movl_T1_im(0); | |
5476 | + tmp2 = tcg_const_i32(0); | |
5511 | 5477 | switch(size) { |
5512 | - case 0: gen_helper_neon_cge_s8(CPU_T001); break; | |
5513 | - case 1: gen_helper_neon_cge_s16(CPU_T001); break; | |
5514 | - case 2: gen_helper_neon_cge_s32(CPU_T001); break; | |
5478 | + case 0: gen_helper_neon_cge_s8(tmp, tmp, tmp2); break; | |
5479 | + case 1: gen_helper_neon_cge_s16(tmp, tmp, tmp2); break; | |
5480 | + case 2: gen_helper_neon_cge_s32(tmp, tmp, tmp2); break; | |
5515 | 5481 | default: return 1; |
5516 | 5482 | } |
5483 | + tcg_temp_free(tmp2); | |
5517 | 5484 | if (op == 20) |
5518 | - gen_op_notl_T0(); | |
5485 | + tcg_gen_not_i32(tmp, tmp); | |
5519 | 5486 | break; |
5520 | 5487 | case 18: /* VCEQ #0 */ |
5521 | - gen_op_movl_T1_im(0); | |
5488 | + tmp2 = tcg_const_i32(0); | |
5522 | 5489 | switch(size) { |
5523 | - case 0: gen_helper_neon_ceq_u8(CPU_T001); break; | |
5524 | - case 1: gen_helper_neon_ceq_u16(CPU_T001); break; | |
5525 | - case 2: gen_helper_neon_ceq_u32(CPU_T001); break; | |
5490 | + case 0: gen_helper_neon_ceq_u8(tmp, tmp, tmp2); break; | |
5491 | + case 1: gen_helper_neon_ceq_u16(tmp, tmp, tmp2); break; | |
5492 | + case 2: gen_helper_neon_ceq_u32(tmp, tmp, tmp2); break; | |
5526 | 5493 | default: return 1; |
5527 | 5494 | } |
5495 | + tcg_temp_free(tmp2); | |
5528 | 5496 | break; |
5529 | 5497 | case 22: /* VABS */ |
5530 | 5498 | switch(size) { |
5531 | - case 0: gen_helper_neon_abs_s8(cpu_T[0], cpu_T[0]); break; | |
5532 | - case 1: gen_helper_neon_abs_s16(cpu_T[0], cpu_T[0]); break; | |
5533 | - case 2: tcg_gen_abs_i32(cpu_T[0], cpu_T[0]); break; | |
5499 | + case 0: gen_helper_neon_abs_s8(tmp, tmp); break; | |
5500 | + case 1: gen_helper_neon_abs_s16(tmp, tmp); break; | |
5501 | + case 2: tcg_gen_abs_i32(tmp, tmp); break; | |
5534 | 5502 | default: return 1; |
5535 | 5503 | } |
5536 | 5504 | break; |
5537 | 5505 | case 23: /* VNEG */ |
5538 | - gen_op_movl_T1_im(0); | |
5539 | 5506 | if (size == 3) |
5540 | 5507 | return 1; |
5541 | - gen_neon_rsb(size); | |
5508 | + tmp2 = tcg_const_i32(0); | |
5509 | + gen_neon_rsb(size, tmp, tmp2); | |
5510 | + tcg_temp_free(tmp2); | |
5542 | 5511 | break; |
5543 | 5512 | case 24: case 27: /* Float VCGT #0, Float VCLE #0 */ |
5544 | - gen_op_movl_T1_im(0); | |
5545 | - gen_helper_neon_cgt_f32(CPU_T001); | |
5513 | + tmp2 = tcg_const_i32(0); | |
5514 | + gen_helper_neon_cgt_f32(tmp, tmp, tmp2); | |
5515 | + tcg_temp_free(tmp2); | |
5546 | 5516 | if (op == 27) |
5547 | - gen_op_notl_T0(); | |
5517 | + tcg_gen_not_i32(tmp, tmp); | |
5548 | 5518 | break; |
5549 | 5519 | case 25: case 28: /* Float VCGE #0, Float VCLT #0 */ |
5550 | - gen_op_movl_T1_im(0); | |
5551 | - gen_helper_neon_cge_f32(CPU_T001); | |
5520 | + tmp2 = tcg_const_i32(0); | |
5521 | + gen_helper_neon_cge_f32(tmp, tmp, tmp2); | |
5522 | + tcg_temp_free(tmp2); | |
5552 | 5523 | if (op == 28) |
5553 | - gen_op_notl_T0(); | |
5524 | + tcg_gen_not_i32(tmp, tmp); | |
5554 | 5525 | break; |
5555 | 5526 | case 26: /* Float VCEQ #0 */ |
5556 | - gen_op_movl_T1_im(0); | |
5557 | - gen_helper_neon_ceq_f32(CPU_T001); | |
5527 | + tmp2 = tcg_const_i32(0); | |
5528 | + gen_helper_neon_ceq_f32(tmp, tmp, tmp2); | |
5529 | + tcg_temp_free(tmp2); | |
5558 | 5530 | break; |
5559 | 5531 | case 30: /* Float VABS */ |
5560 | 5532 | gen_vfp_abs(0); |
... | ... | @@ -5563,24 +5535,24 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) |
5563 | 5535 | gen_vfp_neg(0); |
5564 | 5536 | break; |
5565 | 5537 | case 32: /* VSWP */ |
5566 | - NEON_GET_REG(T1, rd, pass); | |
5567 | - NEON_SET_REG(T1, rm, pass); | |
5538 | + tmp2 = neon_load_reg(rd, pass); | |
5539 | + neon_store_reg(rm, pass, tmp2); | |
5568 | 5540 | break; |
5569 | 5541 | case 33: /* VTRN */ |
5570 | - NEON_GET_REG(T1, rd, pass); | |
5542 | + tmp2 = neon_load_reg(rd, pass); | |
5571 | 5543 | switch (size) { |
5572 | - case 0: gen_neon_trn_u8(cpu_T[0], cpu_T[1]); break; | |
5573 | - case 1: gen_neon_trn_u16(cpu_T[0], cpu_T[1]); break; | |
5544 | + case 0: gen_neon_trn_u8(tmp, tmp2); break; | |
5545 | + case 1: gen_neon_trn_u16(tmp, tmp2); break; | |
5574 | 5546 | case 2: abort(); |
5575 | 5547 | default: return 1; |
5576 | 5548 | } |
5577 | - NEON_SET_REG(T1, rm, pass); | |
5549 | + neon_store_reg(rm, pass, tmp2); | |
5578 | 5550 | break; |
5579 | 5551 | case 56: /* Integer VRECPE */ |
5580 | - gen_helper_recpe_u32(cpu_T[0], cpu_T[0], cpu_env); | |
5552 | + gen_helper_recpe_u32(tmp, tmp, cpu_env); | |
5581 | 5553 | break; |
5582 | 5554 | case 57: /* Integer VRSQRTE */ |
5583 | - gen_helper_rsqrte_u32(cpu_T[0], cpu_T[0], cpu_env); | |
5555 | + gen_helper_rsqrte_u32(tmp, tmp, cpu_env); | |
5584 | 5556 | break; |
5585 | 5557 | case 58: /* Float VRECPE */ |
5586 | 5558 | gen_helper_recpe_f32(cpu_F0s, cpu_F0s, cpu_env); |
... | ... | @@ -5608,7 +5580,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) |
5608 | 5580 | tcg_gen_st_f32(cpu_F0s, cpu_env, |
5609 | 5581 | neon_reg_offset(rd, pass)); |
5610 | 5582 | } else { |
5611 | - NEON_SET_REG(T0, rd, pass); | |
5583 | + neon_store_reg(rd, pass, tmp); | |
5612 | 5584 | } |
5613 | 5585 | } |
5614 | 5586 | break; |
... | ... | @@ -5641,21 +5613,24 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) |
5641 | 5613 | } else if ((insn & 0x380) == 0) { |
5642 | 5614 | /* VDUP */ |
5643 | 5615 | if (insn & (1 << 19)) { |
5644 | - NEON_SET_REG(T0, rm, 1); | |
5616 | + tmp = neon_load_reg(rm, 1); | |
5645 | 5617 | } else { |
5646 | - NEON_SET_REG(T0, rm, 0); | |
5618 | + tmp = neon_load_reg(rm, 0); | |
5647 | 5619 | } |
5648 | 5620 | if (insn & (1 << 16)) { |
5649 | - gen_neon_dup_u8(cpu_T[0], ((insn >> 17) & 3) * 8); | |
5621 | + gen_neon_dup_u8(tmp, ((insn >> 17) & 3) * 8); | |
5650 | 5622 | } else if (insn & (1 << 17)) { |
5651 | 5623 | if ((insn >> 18) & 1) |
5652 | - gen_neon_dup_high16(cpu_T[0]); | |
5624 | + gen_neon_dup_high16(tmp); | |
5653 | 5625 | else |
5654 | - gen_neon_dup_low16(cpu_T[0]); | |
5626 | + gen_neon_dup_low16(tmp); | |
5655 | 5627 | } |
5656 | 5628 | for (pass = 0; pass < (q ? 4 : 2); pass++) { |
5657 | - NEON_SET_REG(T0, rd, pass); | |
5629 | + tmp2 = new_tmp(); | |
5630 | + tcg_gen_mov_i32(tmp2, tmp); | |
5631 | + neon_store_reg(rd, pass, tmp2); | |
5658 | 5632 | } |
5633 | + dead_tmp(tmp); | |
5659 | 5634 | } else { |
5660 | 5635 | return 1; |
5661 | 5636 | } | ... | ... |