Commit 8f8e3aa45185dbb99a25edee384a18ca3d931132

Authored by pbrook
1 parent 8984bd2e

ARM TCG conversion 13/16.

git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@4150 c046a42c-6fe2-441c-8c8c-71466251a162
target-arm/exec.h
... ... @@ -23,7 +23,6 @@
23 23 register struct CPUARMState *env asm(AREG0);
24 24 register uint32_t T0 asm(AREG1);
25 25 register uint32_t T1 asm(AREG2);
26   -register uint32_t T2 asm(AREG3);
27 26  
28 27 #define M0 env->iwmmxt.val
29 28  
... ... @@ -59,15 +58,8 @@ static inline int cpu_halted(CPUState *env) {
59 58 #include "softmmu_exec.h"
60 59 #endif
61 60  
62   -/* In op_helper.c */
63   -
64   -void helper_mark_exclusive(CPUARMState *, uint32_t addr);
65   -int helper_test_exclusive(CPUARMState *, uint32_t addr);
66   -void helper_clrex(CPUARMState *env);
67   -
68 61 void cpu_loop_exit(void);
69 62  
70 63 void raise_exception(int);
71 64  
72   -void helper_neon_tbl(int rn, int maxindex);
73 65 uint32_t helper_neon_mul_p8(uint32_t op1, uint32_t op2);
... ...
target-arm/helper.c
... ... @@ -432,7 +432,7 @@ static void flush_mmon(uint32_t addr)
432 432 }
433 433  
434 434 /* Mark an address for exclusive access. */
435   -void helper_mark_exclusive(CPUState *env, uint32_t addr)
  435 +void HELPER(mark_exclusive)(CPUState *env, uint32_t addr)
436 436 {
437 437 if (!env->mmon_entry)
438 438 allocate_mmon_state(env);
... ... @@ -443,7 +443,7 @@ void helper_mark_exclusive(CPUState *env, uint32_t addr)
443 443  
444 444 /* Test if an exclusive address is still exclusive. Returns zero
445 445 if the address is still exclusive. */
446   -int helper_test_exclusive(CPUState *env, uint32_t addr)
  446 +uint32_t HELPER(test_exclusive)(CPUState *env, uint32_t addr)
447 447 {
448 448 int res;
449 449  
... ... @@ -457,7 +457,7 @@ int helper_test_exclusive(CPUState *env, uint32_t addr)
457 457 return res;
458 458 }
459 459  
460   -void helper_clrex(CPUState *env)
  460 +void HELPER(clrex)(CPUState *env)
461 461 {
462 462 if (!(env->mmon_entry && env->mmon_entry->addr))
463 463 return;
... ... @@ -1176,17 +1176,17 @@ target_phys_addr_t cpu_get_phys_page_debug(CPUState *env, target_ulong addr)
1176 1176 /* Not really implemented. Need to figure out a sane way of doing this.
1177 1177 Maybe add generic watchpoint support and use that. */
1178 1178  
1179   -void helper_mark_exclusive(CPUState *env, uint32_t addr)
  1179 +void HELPER(mark_exclusive)(CPUState *env, uint32_t addr)
1180 1180 {
1181 1181 env->mmon_addr = addr;
1182 1182 }
1183 1183  
1184   -int helper_test_exclusive(CPUState *env, uint32_t addr)
  1184 +uint32_t HELPER(test_exclusive)(CPUState *env, uint32_t addr)
1185 1185 {
1186 1186 return (env->mmon_addr != addr);
1187 1187 }
1188 1188  
1189   -void helper_clrex(CPUState *env)
  1189 +void HELPER(clrex)(CPUState *env)
1190 1190 {
1191 1191 env->mmon_addr = -1;
1192 1192 }
... ... @@ -2496,6 +2496,8 @@ float32 HELPER(rsqrts_f32)(float32 a, float32 b, CPUState *env)
2496 2496 return float32_sub(three, float32_mul(a, b, s), s);
2497 2497 }
2498 2498  
  2499 +/* NEON helpers. */
  2500 +
2499 2501 /* TODO: The architecture specifies the value that the estimate functions
2500 2502 should return. We return the exact reciprocal/root instead. */
2501 2503 float32 HELPER(recpe_f32)(float32 a, CPUState *env)
... ...
target-arm/helpers.h
... ... @@ -51,6 +51,13 @@ static inline void gen_helper_##name(TCGv ret, \
51 51 { \
52 52 tcg_gen_helper_1_3(helper_##name, ret, arg1, arg2, arg3); \
53 53 }
  54 +#define DEF_HELPER_1_4(name, ret, args) \
  55 +DEF_HELPER(name, ret, args) \
  56 +static inline void gen_helper_##name(TCGv ret, \
  57 + TCGv arg1, TCGv arg2, TCGv arg3, TCGv arg4) \
  58 +{ \
  59 + tcg_gen_helper_1_4(helper_##name, ret, arg1, arg2, arg3, arg4); \
  60 +}
54 61 #else /* !GEN_HELPER */
55 62 #define DEF_HELPER_0_0 DEF_HELPER
56 63 #define DEF_HELPER_0_1 DEF_HELPER
... ... @@ -60,6 +67,7 @@ static inline void gen_helper_##name(TCGv ret, \
60 67 #define DEF_HELPER_1_1 DEF_HELPER
61 68 #define DEF_HELPER_1_2 DEF_HELPER
62 69 #define DEF_HELPER_1_3 DEF_HELPER
  70 +#define DEF_HELPER_1_4 DEF_HELPER
63 71 #define HELPER(x) glue(helper_,x)
64 72 #endif
65 73  
... ... @@ -130,6 +138,10 @@ DEF_HELPER_1_2(get_cp, uint32_t, (CPUState *, uint32_t))
130 138 DEF_HELPER_1_2(get_r13_banked, uint32_t, (CPUState *, uint32_t))
131 139 DEF_HELPER_0_3(set_r13_banked, void, (CPUState *, uint32_t, uint32_t))
132 140  
  141 +DEF_HELPER_0_2(mark_exclusive, void, (CPUState *, uint32_t))
  142 +DEF_HELPER_1_2(test_exclusive, uint32_t, (CPUState *, uint32_t))
  143 +DEF_HELPER_0_1(clrex, void, (CPUState *))
  144 +
133 145 DEF_HELPER_1_1(get_user_reg, uint32_t, (uint32_t))
134 146 DEF_HELPER_0_2(set_user_reg, void, (uint32_t, uint32_t))
135 147  
... ... @@ -195,6 +207,7 @@ DEF_HELPER_1_2(recpe_f32, float32, (float32, CPUState *))
195 207 DEF_HELPER_1_2(rsqrte_f32, float32, (float32, CPUState *))
196 208 DEF_HELPER_1_2(recpe_u32, uint32_t, (uint32_t, CPUState *))
197 209 DEF_HELPER_1_2(rsqrte_u32, uint32_t, (uint32_t, CPUState *))
  210 +DEF_HELPER_1_4(neon_tbl, uint32_t, (uint32_t, uint32_t, uint32_t, uint32_t))
198 211  
199 212 DEF_HELPER_1_2(add_cc, uint32_t, (uint32_t, uint32_t))
200 213 DEF_HELPER_1_2(adc_cc, uint32_t, (uint32_t, uint32_t))
... ...
target-arm/op_helper.c
... ... @@ -40,27 +40,26 @@ void cpu_unlock(void)
40 40 spin_unlock(&global_cpu_lock);
41 41 }
42 42  
43   -void helper_neon_tbl(int rn, int maxindex)
  43 +uint32_t HELPER(neon_tbl)(uint32_t ireg, uint32_t def,
  44 + uint32_t rn, uint32_t maxindex)
44 45 {
45 46 uint32_t val;
46   - uint32_t mask;
47 47 uint32_t tmp;
48 48 int index;
49 49 int shift;
50 50 uint64_t *table;
51 51 table = (uint64_t *)&env->vfp.regs[rn];
52 52 val = 0;
53   - mask = 0;
54 53 for (shift = 0; shift < 32; shift += 8) {
55   - index = (T1 >> shift) & 0xff;
56   - if (index <= maxindex) {
  54 + index = (ireg >> shift) & 0xff;
  55 + if (index < maxindex) {
57 56 tmp = (table[index >> 3] >> (index & 7)) & 0xff;
58 57 val |= tmp << shift;
59 58 } else {
60   - val |= T0 & (0xff << shift);
  59 + val |= def & (0xff << shift);
61 60 }
62 61 }
63   - T0 = val;
  62 + return val;
64 63 }
65 64  
66 65 #if !defined(CONFIG_USER_ONLY)
... ...
target-arm/op_mem.h
1 1 /* ARM memory operations. */
2 2  
3   -/* Load-locked, store exclusive. */
4   -#define EXCLUSIVE_OP(suffix, ldsuffix) \
5   -void OPPROTO glue(op_ld##suffix##ex,MEMSUFFIX)(void) \
6   -{ \
7   - cpu_lock(); \
8   - helper_mark_exclusive(env, T1); \
9   - T0 = glue(ld##ldsuffix,MEMSUFFIX)(T1); \
10   - cpu_unlock(); \
11   - FORCE_RET(); \
12   -} \
13   - \
14   -void OPPROTO glue(op_st##suffix##ex,MEMSUFFIX)(void) \
15   -{ \
16   - int failed; \
17   - cpu_lock(); \
18   - failed = helper_test_exclusive(env, T1); \
19   - /* ??? Is it safe to hold the cpu lock over a store? */ \
20   - if (!failed) { \
21   - glue(st##suffix,MEMSUFFIX)(T1, T0); \
22   - } \
23   - T0 = failed; \
24   - cpu_unlock(); \
25   - FORCE_RET(); \
26   -}
27   -
28   -EXCLUSIVE_OP(b, ub)
29   -EXCLUSIVE_OP(w, uw)
30   -EXCLUSIVE_OP(l, l)
31   -
32   -#undef EXCLUSIVE_OP
33   -
34   -/* Load exclusive T0:T1 from address T1. */
35   -void OPPROTO glue(op_ldqex,MEMSUFFIX)(void)
36   -{
37   - cpu_lock();
38   - helper_mark_exclusive(env, T1);
39   - T0 = glue(ldl,MEMSUFFIX)(T1);
40   - T1 = glue(ldl,MEMSUFFIX)((T1 + 4));
41   - cpu_unlock();
42   - FORCE_RET();
43   -}
44   -
45   -/* Store exclusive T0:T2 to address T1. */
46   -void OPPROTO glue(op_stqex,MEMSUFFIX)(void)
47   -{
48   - int failed;
49   - cpu_lock();
50   - failed = helper_test_exclusive(env, T1);
51   - /* ??? Is it safe to hold the cpu lock over a store? */
52   - if (!failed) {
53   - glue(stl,MEMSUFFIX)(T1, T0);
54   - glue(stl,MEMSUFFIX)((T1 + 4), T2);
55   - }
56   - T0 = failed;
57   - cpu_unlock();
58   - FORCE_RET();
59   -}
60   -
61 3 /* iwMMXt load/store. Address is in T1 */
62 4 #define MMX_MEM_OP(name, ldname) \
63 5 void OPPROTO glue(op_iwmmxt_ld##name,MEMSUFFIX)(void) \
... ...
target-arm/op_neon.h
... ... @@ -47,11 +47,6 @@ NEON_OP(getreg_T1)
47 47 T1 = *(uint32_t *)((char *) env + PARAM1);
48 48 }
49 49  
50   -NEON_OP(getreg_T2)
51   -{
52   - T2 = *(uint32_t *)((char *) env + PARAM1);
53   -}
54   -
55 50 NEON_OP(setreg_T0)
56 51 {
57 52 *(uint32_t *)((char *) env + PARAM1) = T0;
... ... @@ -62,11 +57,6 @@ NEON_OP(setreg_T1)
62 57 *(uint32_t *)((char *) env + PARAM1) = T1;
63 58 }
64 59  
65   -NEON_OP(setreg_T2)
66   -{
67   - *(uint32_t *)((char *) env + PARAM1) = T2;
68   -}
69   -
70 60 #define NEON_TYPE1(name, type) \
71 61 typedef struct \
72 62 { \
... ... @@ -293,28 +283,6 @@ NEON_OP(hsub_u32)
293 283 FORCE_RET();
294 284 }
295 285  
296   -/* ??? bsl, bif and bit are all the same op, just with the oparands in a
297   - differnet order. It's currently easier to have 3 differnt ops than
298   - rearange the operands. */
299   -
300   -/* Bitwise Select. */
301   -NEON_OP(bsl)
302   -{
303   - T0 = (T0 & T2) | (T1 & ~T2);
304   -}
305   -
306   -/* Bitwise Insert If True. */
307   -NEON_OP(bit)
308   -{
309   - T0 = (T0 & T1) | (T2 & ~T1);
310   -}
311   -
312   -/* Bitwise Insert If False. */
313   -NEON_OP(bif)
314   -{
315   - T0 = (T2 & T1) | (T0 & ~T1);
316   -}
317   -
318 286 #define NEON_USAT(dest, src1, src2, type) do { \
319 287 uint32_t tmp = (uint32_t)src1 + (uint32_t)src2; \
320 288 if (tmp != (type)tmp) { \
... ... @@ -423,7 +391,7 @@ NEON_VOP(shl_u32, neon_u32, 1)
423 391  
424 392 NEON_OP(shl_u64)
425 393 {
426   - int8_t shift = T2;
  394 + int8_t shift = env->vfp.scratch[0];
427 395 uint64_t val = T0 | ((uint64_t)T1 << 32);
428 396 if (shift < 0) {
429 397 val >>= -shift;
... ... @@ -437,7 +405,7 @@ NEON_OP(shl_u64)
437 405  
438 406 NEON_OP(shl_s64)
439 407 {
440   - int8_t shift = T2;
  408 + int8_t shift = env->vfp.scratch[0];
441 409 int64_t val = T0 | ((uint64_t)T1 << 32);
442 410 if (shift < 0) {
443 411 val >>= -shift;
... ... @@ -468,7 +436,7 @@ NEON_VOP(rshl_u32, neon_u32, 1)
468 436  
469 437 NEON_OP(rshl_u64)
470 438 {
471   - int8_t shift = T2;
  439 + int8_t shift = env->vfp.scratch[0];
472 440 uint64_t val = T0 | ((uint64_t)T1 << 32);
473 441 if (shift < 0) {
474 442 val = (val + ((uint64_t)1 << (-1 - shift))) >> -shift;
... ... @@ -483,7 +451,7 @@ NEON_OP(rshl_u64)
483 451  
484 452 NEON_OP(rshl_s64)
485 453 {
486   - int8_t shift = T2;
  454 + int8_t shift = env->vfp.scratch[0];
487 455 int64_t val = T0 | ((uint64_t)T1 << 32);
488 456 if (shift < 0) {
489 457 val = (val + ((int64_t)1 << (-1 - shift))) >> -shift;
... ... @@ -514,7 +482,7 @@ NEON_VOP(qshl_s32, neon_s32, 1)
514 482  
515 483 NEON_OP(qshl_s64)
516 484 {
517   - int8_t shift = T2;
  485 + int8_t shift = env->vfp.scratch[0];
518 486 int64_t val = T0 | ((uint64_t)T1 << 32);
519 487 if (shift < 0) {
520 488 val >>= -shift;
... ... @@ -550,7 +518,7 @@ NEON_VOP(qshl_u32, neon_u32, 1)
550 518  
551 519 NEON_OP(qshl_u64)
552 520 {
553   - int8_t shift = T2;
  521 + int8_t shift = env->vfp.scratch[0];
554 522 uint64_t val = T0 | ((uint64_t)T1 << 32);
555 523 if (shift < 0) {
556 524 val >>= -shift;
... ... @@ -1713,12 +1681,6 @@ NEON_OP(zip_u16)
1713 1681 FORCE_RET();
1714 1682 }
1715 1683  
1716   -/* Table lookup. This accessed the register file directly. */
1717   -NEON_OP(tbl)
1718   -{
1719   - helper_neon_tbl(PARAM1, PARAM2);
1720   -}
1721   -
1722 1684 NEON_OP(dup_u8)
1723 1685 {
1724 1686 T0 = (T0 >> PARAM1) & 0xff;
... ... @@ -1726,20 +1688,3 @@ NEON_OP(dup_u8)
1726 1688 T0 |= T0 << 16;
1727 1689 FORCE_RET();
1728 1690 }
1729   -
1730   -/* Helpers for element load/store. */
1731   -NEON_OP(insert_elt)
1732   -{
1733   - int shift = PARAM1;
1734   - uint32_t mask = PARAM2;
1735   - T2 = (T2 & mask) | (T0 << shift);
1736   - FORCE_RET();
1737   -}
1738   -
1739   -NEON_OP(extract_elt)
1740   -{
1741   - int shift = PARAM1;
1742   - uint32_t mask = PARAM2;
1743   - T0 = (T2 & mask) >> shift;
1744   - FORCE_RET();
1745   -}
... ...
target-arm/translate.c
... ... @@ -78,7 +78,7 @@ extern int loglevel;
78 78  
79 79 static TCGv cpu_env;
80 80 /* FIXME: These should be removed. */
81   -static TCGv cpu_T[3];
  81 +static TCGv cpu_T[2];
82 82 static TCGv cpu_F0s, cpu_F1s, cpu_F0d, cpu_F1d;
83 83  
84 84 /* initialize TCG globals. */
... ... @@ -88,7 +88,6 @@ void arm_translate_init(void)
88 88  
89 89 cpu_T[0] = tcg_global_reg_new(TCG_TYPE_I32, TCG_AREG1, "T0");
90 90 cpu_T[1] = tcg_global_reg_new(TCG_TYPE_I32, TCG_AREG2, "T1");
91   - cpu_T[2] = tcg_global_reg_new(TCG_TYPE_I32, TCG_AREG3, "T2");
92 91 }
93 92  
94 93 /* The code generator doesn't like lots of temporaries, so maintain our own
... ... @@ -188,13 +187,9 @@ static void store_reg(DisasContext *s, int reg, TCGv var)
188 187  
189 188 /* Basic operations. */
190 189 #define gen_op_movl_T0_T1() tcg_gen_mov_i32(cpu_T[0], cpu_T[1])
191   -#define gen_op_movl_T0_T2() tcg_gen_mov_i32(cpu_T[0], cpu_T[2])
192 190 #define gen_op_movl_T1_T0() tcg_gen_mov_i32(cpu_T[1], cpu_T[0])
193   -#define gen_op_movl_T1_T2() tcg_gen_mov_i32(cpu_T[1], cpu_T[2])
194   -#define gen_op_movl_T2_T0() tcg_gen_mov_i32(cpu_T[2], cpu_T[0])
195 191 #define gen_op_movl_T0_im(im) tcg_gen_movi_i32(cpu_T[0], im)
196 192 #define gen_op_movl_T1_im(im) tcg_gen_movi_i32(cpu_T[1], im)
197   -#define gen_op_movl_T2_im(im) tcg_gen_movi_i32(cpu_T[2], im)
198 193  
199 194 #define gen_op_addl_T1_im(im) tcg_gen_addi_i32(cpu_T[1], cpu_T[1], im)
200 195 #define gen_op_addl_T0_T1() tcg_gen_add_i32(cpu_T[0], cpu_T[0], cpu_T[1])
... ... @@ -310,9 +305,9 @@ static void gen_sbfx(TCGv var, int shift, int width)
310 305 /* Bitfield insertion. Insert val into base. Clobbers base and val. */
311 306 static void gen_bfi(TCGv dest, TCGv base, TCGv val, int shift, uint32_t mask)
312 307 {
313   - tcg_gen_shli_i32(val, val, shift);
314 308 tcg_gen_andi_i32(val, val, mask);
315   - tcg_gen_andi_i32(base, base, ~mask);
  309 + tcg_gen_shli_i32(val, val, shift);
  310 + tcg_gen_andi_i32(base, base, ~(mask << shift));
316 311 tcg_gen_or_i32(dest, base, val);
317 312 }
318 313  
... ... @@ -460,6 +455,13 @@ static inline void tcg_gen_not_i32(TCGv t0, TCGv t1)
460 455  
461 456 /* T0 &= ~T1. Clobbers T1. */
462 457 /* FIXME: Implement bic natively. */
  458 +static inline void tcg_gen_bic_i32(TCGv dest, TCGv t0, TCGv t1)
  459 +{
  460 + TCGv tmp = new_tmp();
  461 + tcg_gen_not_i32(tmp, t1);
  462 + tcg_gen_and_i32(dest, t0, tmp);
  463 + dead_tmp(tmp);
  464 +}
463 465 static inline void gen_op_bicl_T0_T1(void)
464 466 {
465 467 gen_op_notl_T1();
... ... @@ -1167,6 +1169,19 @@ neon_reg_offset (int reg, int n)
1167 1169 #define NEON_GET_REG(T, reg, n) gen_op_neon_getreg_##T(neon_reg_offset(reg, n))
1168 1170 #define NEON_SET_REG(T, reg, n) gen_op_neon_setreg_##T(neon_reg_offset(reg, n))
1169 1171  
  1172 +static TCGv neon_load_reg(int reg, int pass)
  1173 +{
  1174 + TCGv tmp = new_tmp();
  1175 + tcg_gen_ld_i32(tmp, cpu_env, neon_reg_offset(reg, pass));
  1176 + return tmp;
  1177 +}
  1178 +
  1179 +static void neon_store_reg(int reg, int pass, TCGv var)
  1180 +{
  1181 + tcg_gen_st_i32(var, cpu_env, neon_reg_offset(reg, pass));
  1182 + dead_tmp(var);
  1183 +}
  1184 +
1170 1185 #define tcg_gen_ld_f32 tcg_gen_ld_i32
1171 1186 #define tcg_gen_ld_f64 tcg_gen_ld_i64
1172 1187 #define tcg_gen_st_f32 tcg_gen_st_i32
... ... @@ -2500,19 +2515,14 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn)
2500 2515 /* VMOV */
2501 2516 switch (size) {
2502 2517 case 0:
2503   - NEON_GET_REG(T2, rn, pass);
2504   - gen_op_movl_T1_im(0xff);
2505   - gen_op_andl_T0_T1();
2506   - gen_op_neon_insert_elt(offset, ~(0xff << offset));
2507   - NEON_SET_REG(T2, rn, pass);
  2518 + tmp = neon_load_reg(rn, pass);
  2519 + gen_bfi(tmp, tmp, cpu_T[0], offset, 0xff);
  2520 + neon_store_reg(rn, pass, tmp);
2508 2521 break;
2509 2522 case 1:
2510   - NEON_GET_REG(T2, rn, pass);
2511   - gen_op_movl_T1_im(0xffff);
2512   - gen_op_andl_T0_T1();
2513   - bank_mask = offset ? 0xffff : 0xffff0000;
2514   - gen_op_neon_insert_elt(offset, bank_mask);
2515   - NEON_SET_REG(T2, rn, pass);
  2523 + tmp = neon_load_reg(rn, pass);
  2524 + gen_bfi(tmp, tmp, cpu_T[0], offset, 0xffff);
  2525 + neon_store_reg(rn, pass, tmp);
2516 2526 break;
2517 2527 case 2:
2518 2528 NEON_SET_REG(T0, rn, pass);
... ... @@ -3480,9 +3490,9 @@ static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn)
3480 3490 int pass;
3481 3491 int load;
3482 3492 int shift;
3483   - uint32_t mask;
3484 3493 int n;
3485 3494 TCGv tmp;
  3495 + TCGv tmp2;
3486 3496  
3487 3497 if (!vfp_enabled(env))
3488 3498 return 1;
... ... @@ -3525,60 +3535,47 @@ static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn)
3525 3535 } else if (size == 1) {
3526 3536 if (load) {
3527 3537 tmp = gen_ld16u(cpu_T[1], IS_USER(s));
3528   - tcg_gen_mov_i32(cpu_T[0], tmp);
3529   - dead_tmp(tmp);
3530 3538 gen_op_addl_T1_im(stride);
3531   - gen_op_movl_T2_T0();
3532   - tmp = gen_ld16u(cpu_T[1], IS_USER(s));
3533   - tcg_gen_mov_i32(cpu_T[0], tmp);
3534   - dead_tmp(tmp);
  3539 + tmp2 = gen_ld16u(cpu_T[1], IS_USER(s));
3535 3540 gen_op_addl_T1_im(stride);
3536   - gen_op_neon_insert_elt(16, 0xffff);
3537   - NEON_SET_REG(T2, rd, pass);
  3541 + gen_bfi(tmp, tmp, tmp2, 16, 0xffff);
  3542 + dead_tmp(tmp2);
  3543 + neon_store_reg(rd, pass, tmp);
3538 3544 } else {
3539   - NEON_GET_REG(T2, rd, pass);
3540   - gen_op_movl_T0_T2();
3541   - tmp = new_tmp();
3542   - tcg_gen_mov_i32(tmp, cpu_T[0]);
  3545 + tmp = neon_load_reg(rd, pass);
  3546 + tmp2 = new_tmp();
  3547 + tcg_gen_shri_i32(tmp2, tmp, 16);
3543 3548 gen_st16(tmp, cpu_T[1], IS_USER(s));
3544 3549 gen_op_addl_T1_im(stride);
3545   - gen_op_neon_extract_elt(16, 0xffff0000);
3546   - tmp = new_tmp();
3547   - tcg_gen_mov_i32(tmp, cpu_T[0]);
3548   - gen_st16(tmp, cpu_T[1], IS_USER(s));
  3550 + gen_st16(tmp2, cpu_T[1], IS_USER(s));
3549 3551 gen_op_addl_T1_im(stride);
3550 3552 }
3551 3553 } else /* size == 0 */ {
3552 3554 if (load) {
3553   - mask = 0xff;
3554 3555 for (n = 0; n < 4; n++) {
3555 3556 tmp = gen_ld8u(cpu_T[1], IS_USER(s));
3556   - tcg_gen_mov_i32(cpu_T[0], tmp);
3557   - dead_tmp(tmp);
3558 3557 gen_op_addl_T1_im(stride);
3559 3558 if (n == 0) {
3560   - gen_op_movl_T2_T0();
  3559 + tmp2 = tmp;
3561 3560 } else {
3562   - gen_op_neon_insert_elt(n * 8, ~mask);
  3561 + gen_bfi(tmp2, tmp2, tmp, n * 8, 0xff);
  3562 + dead_tmp(tmp);
3563 3563 }
3564   - mask <<= 8;
3565 3564 }
3566   - NEON_SET_REG(T2, rd, pass);
  3565 + neon_store_reg(rd, pass, tmp2);
3567 3566 } else {
3568   - NEON_GET_REG(T2, rd, pass);
3569   - mask = 0xff;
  3567 + tmp2 = neon_load_reg(rd, pass);
3570 3568 for (n = 0; n < 4; n++) {
  3569 + tmp = new_tmp();
3571 3570 if (n == 0) {
3572   - gen_op_movl_T0_T2();
  3571 + tcg_gen_mov_i32(tmp, tmp2);
3573 3572 } else {
3574   - gen_op_neon_extract_elt(n * 8, mask);
  3573 + tcg_gen_shri_i32(tmp, tmp2, n * 8);
3575 3574 }
3576   - tmp = new_tmp();
3577   - tcg_gen_mov_i32(tmp, cpu_T[0]);
3578 3575 gen_st8(tmp, cpu_T[1], IS_USER(s));
3579 3576 gen_op_addl_T1_im(stride);
3580   - mask <<= 8;
3581 3577 }
  3578 + dead_tmp(tmp2);
3582 3579 }
3583 3580 }
3584 3581 }
... ... @@ -3629,17 +3626,14 @@ static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn)
3629 3626 switch (size) {
3630 3627 case 0:
3631 3628 shift = ((insn >> 5) & 3) * 8;
3632   - mask = 0xff << shift;
3633 3629 stride = 1;
3634 3630 break;
3635 3631 case 1:
3636 3632 shift = ((insn >> 6) & 1) * 16;
3637   - mask = shift ? 0xffff0000 : 0xffff;
3638 3633 stride = (insn & (1 << 5)) ? 2 : 1;
3639 3634 break;
3640 3635 case 2:
3641 3636 shift = 0;
3642   - mask = 0xffffffff;
3643 3637 stride = (insn & (1 << 6)) ? 2 : 1;
3644 3638 break;
3645 3639 default:
... ... @@ -3649,9 +3643,6 @@ static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn)
3649 3643 gen_movl_T1_reg(s, rn);
3650 3644 for (reg = 0; reg < nregs; reg++) {
3651 3645 if (load) {
3652   - if (size != 2) {
3653   - NEON_GET_REG(T2, rd, pass);
3654   - }
3655 3646 switch (size) {
3656 3647 case 0:
3657 3648 tmp = gen_ld8u(cpu_T[1], IS_USER(s));
... ... @@ -3663,23 +3654,16 @@ static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn)
3663 3654 tmp = gen_ld32(cpu_T[1], IS_USER(s));
3664 3655 break;
3665 3656 }
3666   - tcg_gen_mov_i32(cpu_T[0], tmp);
3667   - dead_tmp(tmp);
3668 3657 if (size != 2) {
3669   - gen_op_neon_insert_elt(shift, ~mask);
3670   - NEON_SET_REG(T0, rd, pass);
3671   - } else {
3672   - NEON_SET_REG(T0, rd, pass);
  3658 + tmp2 = neon_load_reg(rd, pass);
  3659 + gen_bfi(tmp, tmp2, tmp, shift, size ? 0xffff : 0xff);
  3660 + dead_tmp(tmp2);
3673 3661 }
  3662 + neon_store_reg(rd, pass, tmp);
3674 3663 } else { /* Store */
3675   - if (size == 2) {
3676   - NEON_GET_REG(T0, rd, pass);
3677   - } else {
3678   - NEON_GET_REG(T2, rd, pass);
3679   - gen_op_neon_extract_elt(shift, mask);
3680   - }
3681   - tmp = new_tmp();
3682   - tcg_gen_mov_i32(tmp, cpu_T[0]);
  3664 + tmp = neon_load_reg(rd, pass);
  3665 + if (shift)
  3666 + tcg_gen_shri_i32(tmp, tmp, shift);
3683 3667 switch (size) {
3684 3668 case 0:
3685 3669 gen_st8(tmp, cpu_T[1], IS_USER(s));
... ... @@ -3715,6 +3699,14 @@ static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn)
3715 3699 return 0;
3716 3700 }
3717 3701  
  3702 +/* Bitwise select. dest = c ? t : f. Clobbers T and F. */
  3703 +static void gen_neon_bsl(TCGv dest, TCGv t, TCGv f, TCGv c)
  3704 +{
  3705 + tcg_gen_and_i32(t, t, c);
  3706 + tcg_gen_bic_i32(f, f, c);
  3707 + tcg_gen_or_i32(dest, t, f);
  3708 +}
  3709 +
3718 3710 /* Translate a NEON data processing instruction. Return nonzero if the
3719 3711 instruction is invalid.
3720 3712 In general we process vectors in 32-bit chunks. This means we can reuse
... ... @@ -3735,6 +3727,9 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
3735 3727 int u;
3736 3728 int n;
3737 3729 uint32_t imm;
  3730 + TCGv tmp;
  3731 + TCGv tmp2;
  3732 + TCGv tmp3;
3738 3733  
3739 3734 if (!vfp_enabled(env))
3740 3735 return 1;
... ... @@ -3875,16 +3870,19 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
3875 3870 gen_op_xorl_T0_T1();
3876 3871 break;
3877 3872 case 5: /* VBSL */
3878   - NEON_GET_REG(T2, rd, pass);
3879   - gen_op_neon_bsl();
  3873 + tmp = neon_load_reg(rd, pass);
  3874 + gen_neon_bsl(cpu_T[0], cpu_T[0], cpu_T[1], tmp);
  3875 + dead_tmp(tmp);
3880 3876 break;
3881 3877 case 6: /* VBIT */
3882   - NEON_GET_REG(T2, rd, pass);
3883   - gen_op_neon_bit();
  3878 + tmp = neon_load_reg(rd, pass);
  3879 + gen_neon_bsl(cpu_T[0], cpu_T[0], tmp, cpu_T[1]);
  3880 + dead_tmp(tmp);
3884 3881 break;
3885 3882 case 7: /* VBIF */
3886   - NEON_GET_REG(T2, rd, pass);
3887   - gen_op_neon_bif();
  3883 + tmp = neon_load_reg(rd, pass);
  3884 + gen_neon_bsl(cpu_T[0], tmp, cpu_T[0], cpu_T[1]);
  3885 + dead_tmp(tmp);
3888 3886 break;
3889 3887 }
3890 3888 break;
... ... @@ -4190,8 +4188,6 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
4190 4188 element size in bits. */
4191 4189 if (op <= 4)
4192 4190 shift = shift - (1 << (size + 3));
4193   - else
4194   - shift++;
4195 4191 if (size == 3) {
4196 4192 count = q + 1;
4197 4193 } else {
... ... @@ -4276,9 +4272,10 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
4276 4272 default:
4277 4273 abort();
4278 4274 }
4279   - NEON_GET_REG(T1, rd, pass);
4280   - gen_op_movl_T2_im(imm);
4281   - gen_op_neon_bsl();
  4275 + tmp = neon_load_reg(rd, pass);
  4276 + tcg_gen_andi_i32(cpu_T[0], cpu_T[0], imm);
  4277 + tcg_gen_andi_i32(tmp, tmp, ~imm);
  4278 + tcg_gen_or_i32(cpu_T[0], cpu_T[0], tmp);
4282 4279 }
4283 4280 if (size == 3) {
4284 4281 NEON_SET_REG(T0, rd, pass * 2);
... ... @@ -4519,24 +4516,26 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
4519 4516 /* Avoid overlapping operands. Wide source operands are
4520 4517 always aligned so will never overlap with wide
4521 4518 destinations in problematic ways. */
4522   - if (rd == rm) {
4523   - NEON_GET_REG(T2, rm, 1);
4524   - } else if (rd == rn) {
4525   - NEON_GET_REG(T2, rn, 1);
  4519 + if (rd == rm && !src2_wide) {
  4520 + NEON_GET_REG(T0, rm, 1);
  4521 + gen_neon_movl_scratch_T0(2);
  4522 + } else if (rd == rn && !src1_wide) {
  4523 + NEON_GET_REG(T0, rn, 1);
  4524 + gen_neon_movl_scratch_T0(2);
4526 4525 }
4527 4526 for (pass = 0; pass < 2; pass++) {
4528 4527 /* Load the second operand into env->vfp.scratch.
4529 4528 Also widen narrow operands. */
4530   - if (pass == 1 && rd == rm) {
4531   - if (prewiden) {
4532   - gen_op_movl_T0_T2();
4533   - } else {
4534   - gen_op_movl_T1_T2();
4535   - }
  4529 + if (src2_wide) {
  4530 + NEON_GET_REG(T0, rm, pass * 2);
  4531 + NEON_GET_REG(T1, rm, pass * 2 + 1);
4536 4532 } else {
4537   - if (src2_wide) {
4538   - NEON_GET_REG(T0, rm, pass * 2);
4539   - NEON_GET_REG(T1, rm, pass * 2 + 1);
  4533 + if (pass == 1 && rd == rm) {
  4534 + if (prewiden) {
  4535 + gen_neon_movl_T0_scratch(2);
  4536 + } else {
  4537 + gen_neon_movl_T1_scratch(2);
  4538 + }
4540 4539 } else {
4541 4540 if (prewiden) {
4542 4541 NEON_GET_REG(T0, rm, pass);
... ... @@ -4554,12 +4553,12 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
4554 4553 }
4555 4554  
4556 4555 /* Load the first operand. */
4557   - if (pass == 1 && rd == rn) {
4558   - gen_op_movl_T0_T2();
  4556 + if (src1_wide) {
  4557 + NEON_GET_REG(T0, rn, pass * 2);
  4558 + NEON_GET_REG(T1, rn, pass * 2 + 1);
4559 4559 } else {
4560   - if (src1_wide) {
4561   - NEON_GET_REG(T0, rn, pass * 2);
4562   - NEON_GET_REG(T1, rn, pass * 2 + 1);
  4560 + if (pass == 1 && rd == rn) {
  4561 + gen_neon_movl_T0_scratch(2);
4563 4562 } else {
4564 4563 NEON_GET_REG(T0, rn, pass);
4565 4564 }
... ... @@ -4696,10 +4695,10 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
4696 4695 case 12: /* VQDMULH scalar */
4697 4696 case 13: /* VQRDMULH scalar */
4698 4697 gen_neon_get_scalar(size, rm);
4699   - gen_op_movl_T2_T0();
  4698 + gen_neon_movl_scratch_T0(0);
4700 4699 for (pass = 0; pass < (u ? 4 : 2); pass++) {
4701 4700 if (pass != 0)
4702   - gen_op_movl_T0_T2();
  4701 + gen_neon_movl_T0_scratch(0);
4703 4702 NEON_GET_REG(T1, rn, pass);
4704 4703 if (op == 12) {
4705 4704 if (size == 1) {
... ... @@ -4764,10 +4763,10 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
4764 4763 gen_neon_movl_scratch_T0(2);
4765 4764 }
4766 4765 gen_neon_get_scalar(size, rm);
4767   - gen_op_movl_T2_T0();
  4766 + gen_neon_movl_scratch_T0(3);
4768 4767 for (pass = 0; pass < 2; pass++) {
4769 4768 if (pass != 0) {
4770   - gen_op_movl_T0_T2();
  4769 + gen_neon_movl_T0_scratch(3);
4771 4770 }
4772 4771 if (pass != 0 && rd == rn) {
4773 4772 gen_neon_movl_T1_scratch(2);
... ... @@ -5025,11 +5024,12 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
5025 5024 if (q)
5026 5025 return 1;
5027 5026 if (rm == rd) {
5028   - NEON_GET_REG(T2, rm, 1);
  5027 + NEON_GET_REG(T0, rm, 1);
  5028 + gen_neon_movl_scratch_T0(0);
5029 5029 }
5030 5030 for (pass = 0; pass < 2; pass++) {
5031 5031 if (pass == 1 && rm == rd) {
5032   - gen_op_movl_T0_T2();
  5032 + gen_neon_movl_T0_scratch(0);
5033 5033 } else {
5034 5034 NEON_GET_REG(T0, rm, pass);
5035 5035 }
... ... @@ -5253,23 +5253,26 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
5253 5253 } else if ((insn & (1 << 10)) == 0) {
5254 5254 /* VTBL, VTBX. */
5255 5255 n = (insn >> 5) & 0x18;
5256   - NEON_GET_REG(T1, rm, 0);
5257 5256 if (insn & (1 << 6)) {
5258   - NEON_GET_REG(T0, rd, 0);
  5257 + tmp = neon_load_reg(rd, 0);
5259 5258 } else {
5260   - gen_op_movl_T0_im(0);
  5259 + tmp = new_tmp();
  5260 + tcg_gen_movi_i32(tmp, 0);
5261 5261 }
5262   - gen_op_neon_tbl(rn, n);
5263   - gen_op_movl_T2_T0();
5264   - NEON_GET_REG(T1, rm, 1);
  5262 + tmp2 = neon_load_reg(rm, 0);
  5263 + gen_helper_neon_tbl(tmp2, tmp2, tmp, tcg_const_i32(rn),
  5264 + tcg_const_i32(n));
5265 5265 if (insn & (1 << 6)) {
5266   - NEON_GET_REG(T0, rd, 0);
  5266 + tmp = neon_load_reg(rd, 1);
5267 5267 } else {
5268   - gen_op_movl_T0_im(0);
  5268 + tmp = new_tmp();
  5269 + tcg_gen_movi_i32(tmp, 0);
5269 5270 }
5270   - gen_op_neon_tbl(rn, n);
5271   - NEON_SET_REG(T2, rd, 0);
5272   - NEON_SET_REG(T0, rd, 1);
  5271 + tmp3 = neon_load_reg(rm, 1);
  5272 + gen_helper_neon_tbl(tmp3, tmp3, tmp, tcg_const_i32(rn),
  5273 + tcg_const_i32(n));
  5274 + neon_store_reg(rd, 0, tmp2);
  5275 + neon_store_reg(rd, 1, tmp2);
5273 5276 } else if ((insn & 0x380) == 0) {
5274 5277 /* VDUP */
5275 5278 if (insn & (1 << 19)) {
... ... @@ -5430,7 +5433,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
5430 5433 switch ((insn >> 4) & 0xf) {
5431 5434 case 1: /* clrex */
5432 5435 ARCH(6K);
5433   - gen_op_clrex();
  5436 + gen_helper_clrex(cpu_env);
5434 5437 return;
5435 5438 case 4: /* dsb */
5436 5439 case 5: /* dmb */
... ... @@ -5977,13 +5980,19 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
5977 5980 /* load/store exclusive */
5978 5981 gen_movl_T1_reg(s, rn);
5979 5982 if (insn & (1 << 20)) {
5980   - gen_ldst(ldlex, s);
  5983 + gen_helper_mark_exclusive(cpu_env, cpu_T[1]);
  5984 + tmp = gen_ld32(addr, IS_USER(s));
  5985 + store_reg(s, rd, tmp);
5981 5986 } else {
  5987 + int label = gen_new_label();
5982 5988 rm = insn & 0xf;
5983   - gen_movl_T0_reg(s, rm);
5984   - gen_ldst(stlex, s);
  5989 + gen_helper_test_exclusive(cpu_T[0], cpu_env, addr);
  5990 + tcg_gen_brcond_i32(TCG_COND_NE, cpu_T[0],
  5991 + tcg_const_i32(0), label);
  5992 + tmp = load_reg(s,rm);
  5993 + gen_st32(tmp, cpu_T[1], IS_USER(s));
  5994 + gen_movl_reg_T0(s, rd);
5985 5995 }
5986   - gen_movl_reg_T0(s, rd);
5987 5996 } else {
5988 5997 /* SWP instruction */
5989 5998 rm = (insn) & 0xf;
... ... @@ -6287,8 +6296,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
6287 6296 }
6288 6297 if (i != 32) {
6289 6298 tmp2 = load_reg(s, rd);
6290   - gen_bfi(tmp, tmp2, tmp,
6291   - shift, ((1u << i) - 1) << shift);
  6299 + gen_bfi(tmp, tmp2, tmp, shift, (1u << i) - 1);
6292 6300 dead_tmp(tmp2);
6293 6301 }
6294 6302 store_reg(s, rd, tmp);
... ... @@ -6720,14 +6728,21 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
6720 6728 }
6721 6729 } else if ((insn & (1 << 23)) == 0) {
6722 6730 /* Load/store exclusive word. */
6723   - gen_movl_T0_reg(s, rd);
6724 6731 gen_movl_T1_reg(s, rn);
6725 6732 if (insn & (1 << 20)) {
6726   - gen_ldst(ldlex, s);
  6733 + gen_helper_mark_exclusive(cpu_env, cpu_T[1]);
  6734 + tmp = gen_ld32(addr, IS_USER(s));
  6735 + store_reg(s, rd, tmp);
6727 6736 } else {
6728   - gen_ldst(stlex, s);
  6737 + int label = gen_new_label();
  6738 + gen_helper_test_exclusive(cpu_T[0], cpu_env, addr);
  6739 + tcg_gen_brcond_i32(TCG_COND_NE, cpu_T[0],
  6740 + tcg_const_i32(0), label);
  6741 + tmp = load_reg(s, rs);
  6742 + gen_st32(tmp, cpu_T[1], IS_USER(s));
  6743 + gen_set_label(label);
  6744 + gen_movl_reg_T0(s, rd);
6729 6745 }
6730   - gen_movl_reg_T0(s, rd);
6731 6746 } else if ((insn & (1 << 6)) == 0) {
6732 6747 /* Table Branch. */
6733 6748 if (rn == 15) {
... ... @@ -6753,40 +6768,57 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
6753 6768 store_reg(s, 15, tmp);
6754 6769 } else {
6755 6770 /* Load/store exclusive byte/halfword/doubleword. */
  6771 + /* ??? These are not really atomic. However we know
  6772 + we never have multiple CPUs running in parallel,
  6773 + so it is good enough. */
6756 6774 op = (insn >> 4) & 0x3;
  6775 + /* Must use a global reg for the address because we have
  6776 + a conditional branch in the store instruction. */
6757 6777 gen_movl_T1_reg(s, rn);
  6778 + addr = cpu_T[1];
6758 6779 if (insn & (1 << 20)) {
  6780 + gen_helper_mark_exclusive(cpu_env, addr);
6759 6781 switch (op) {
6760 6782 case 0:
6761   - gen_ldst(ldbex, s);
  6783 + tmp = gen_ld8u(addr, IS_USER(s));
6762 6784 break;
6763 6785 case 1:
6764   - gen_ldst(ldwex, s);
  6786 + tmp = gen_ld16u(addr, IS_USER(s));
6765 6787 break;
6766 6788 case 3:
6767   - gen_ldst(ldqex, s);
6768   - gen_movl_reg_T1(s, rd);
  6789 + tmp = gen_ld32(addr, IS_USER(s));
  6790 + tcg_gen_addi_i32(addr, addr, 4);
  6791 + tmp2 = gen_ld32(addr, IS_USER(s));
  6792 + store_reg(s, rd, tmp2);
6769 6793 break;
6770 6794 default:
6771 6795 goto illegal_op;
6772 6796 }
6773   - gen_movl_reg_T0(s, rs);
  6797 + store_reg(s, rs, tmp);
6774 6798 } else {
6775   - gen_movl_T0_reg(s, rs);
  6799 + int label = gen_new_label();
  6800 + /* Must use a global that is not killed by the branch. */
  6801 + gen_helper_test_exclusive(cpu_T[0], cpu_env, addr);
  6802 + tcg_gen_brcond_i32(TCG_COND_NE, cpu_T[0], tcg_const_i32(0),
  6803 + label);
  6804 + tmp = load_reg(s, rs);
6776 6805 switch (op) {
6777 6806 case 0:
6778   - gen_ldst(stbex, s);
  6807 + gen_st8(tmp, addr, IS_USER(s));
6779 6808 break;
6780 6809 case 1:
6781   - gen_ldst(stwex, s);
  6810 + gen_st16(tmp, addr, IS_USER(s));
6782 6811 break;
6783 6812 case 3:
6784   - gen_movl_T2_reg(s, rd);
6785   - gen_ldst(stqex, s);
  6813 + gen_st32(tmp, addr, IS_USER(s));
  6814 + tcg_gen_addi_i32(addr, addr, 4);
  6815 + tmp = load_reg(s, rd);
  6816 + gen_st32(tmp, addr, IS_USER(s));
6786 6817 break;
6787 6818 default:
6788 6819 goto illegal_op;
6789 6820 }
  6821 + gen_set_label(label);
6790 6822 gen_movl_reg_T0(s, rm);
6791 6823 }
6792 6824 }
... ... @@ -7271,7 +7303,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
7271 7303 op = (insn >> 4) & 0xf;
7272 7304 switch (op) {
7273 7305 case 2: /* clrex */
7274   - gen_op_clrex();
  7306 + gen_helper_clrex(cpu_env);
7275 7307 break;
7276 7308 case 4: /* dsb */
7277 7309 case 5: /* dmb */
... ... @@ -7369,8 +7401,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
7369 7401 imm = imm + 1 - shift;
7370 7402 if (imm != 32) {
7371 7403 tmp2 = load_reg(s, rd);
7372   - gen_bfi(tmp, tmp2, tmp,
7373   - shift, ((1u << imm) - 1) << shift);
  7404 + gen_bfi(tmp, tmp2, tmp, shift, (1u << imm) - 1);
7374 7405 dead_tmp(tmp2);
7375 7406 }
7376 7407 break;
... ...