Commit 0211e5aff995ee55722148923a7fc317796e4114

Authored by bellard
1 parent 30898801

converted MUL/IMUL to TCG

git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@4508 c046a42c-6fe2-441c-8c8c-71466251a162
target-i386/helper.c
... ... @@ -1609,22 +1609,6 @@ void helper_rsm(void)
1609 1609 #endif /* !CONFIG_USER_ONLY */
1610 1610  
1611 1611  
1612   -#ifdef BUGGY_GCC_DIV64
1613   -/* gcc 2.95.4 on PowerPC does not seem to like using __udivdi3, so we
1614   - call it from another function */
1615   -uint32_t div32(uint64_t *q_ptr, uint64_t num, uint32_t den)
1616   -{
1617   - *q_ptr = num / den;
1618   - return num % den;
1619   -}
1620   -
1621   -int32_t idiv32(int64_t *q_ptr, int64_t num, int32_t den)
1622   -{
1623   - *q_ptr = num / den;
1624   - return num % den;
1625   -}
1626   -#endif
1627   -
1628 1612 /* division, flags are undefined */
1629 1613  
1630 1614 void helper_divb_AL(target_ulong t0)
... ... @@ -1707,12 +1691,8 @@ void helper_divl_EAX(target_ulong t0)
1707 1691 if (den == 0) {
1708 1692 raise_exception(EXCP00_DIVZ);
1709 1693 }
1710   -#ifdef BUGGY_GCC_DIV64
1711   - r = div32(&q, num, den);
1712   -#else
1713 1694 q = (num / den);
1714 1695 r = (num % den);
1715   -#endif
1716 1696 if (q > 0xffffffff)
1717 1697 raise_exception(EXCP00_DIVZ);
1718 1698 EAX = (uint32_t)q;
... ... @@ -1729,12 +1709,8 @@ void helper_idivl_EAX(target_ulong t0)
1729 1709 if (den == 0) {
1730 1710 raise_exception(EXCP00_DIVZ);
1731 1711 }
1732   -#ifdef BUGGY_GCC_DIV64
1733   - r = idiv32(&q, num, den);
1734   -#else
1735 1712 q = (num / den);
1736 1713 r = (num % den);
1737   -#endif
1738 1714 if (q != (int32_t)q)
1739 1715 raise_exception(EXCP00_DIVZ);
1740 1716 EAX = (uint32_t)q;
... ...
target-i386/op.c
... ... @@ -123,104 +123,6 @@
123 123  
124 124 #endif
125 125  
126   -/* multiply/divide */
127   -
128   -/* XXX: add eflags optimizations */
129   -/* XXX: add non P4 style flags */
130   -
131   -void OPPROTO op_mulb_AL_T0(void)
132   -{
133   - unsigned int res;
134   - res = (uint8_t)EAX * (uint8_t)T0;
135   - EAX = (EAX & ~0xffff) | res;
136   - CC_DST = res;
137   - CC_SRC = (res & 0xff00);
138   -}
139   -
140   -void OPPROTO op_imulb_AL_T0(void)
141   -{
142   - int res;
143   - res = (int8_t)EAX * (int8_t)T0;
144   - EAX = (EAX & ~0xffff) | (res & 0xffff);
145   - CC_DST = res;
146   - CC_SRC = (res != (int8_t)res);
147   -}
148   -
149   -void OPPROTO op_mulw_AX_T0(void)
150   -{
151   - unsigned int res;
152   - res = (uint16_t)EAX * (uint16_t)T0;
153   - EAX = (EAX & ~0xffff) | (res & 0xffff);
154   - EDX = (EDX & ~0xffff) | ((res >> 16) & 0xffff);
155   - CC_DST = res;
156   - CC_SRC = res >> 16;
157   -}
158   -
159   -void OPPROTO op_imulw_AX_T0(void)
160   -{
161   - int res;
162   - res = (int16_t)EAX * (int16_t)T0;
163   - EAX = (EAX & ~0xffff) | (res & 0xffff);
164   - EDX = (EDX & ~0xffff) | ((res >> 16) & 0xffff);
165   - CC_DST = res;
166   - CC_SRC = (res != (int16_t)res);
167   -}
168   -
169   -void OPPROTO op_mull_EAX_T0(void)
170   -{
171   - uint64_t res;
172   - res = (uint64_t)((uint32_t)EAX) * (uint64_t)((uint32_t)T0);
173   - EAX = (uint32_t)res;
174   - EDX = (uint32_t)(res >> 32);
175   - CC_DST = (uint32_t)res;
176   - CC_SRC = (uint32_t)(res >> 32);
177   -}
178   -
179   -void OPPROTO op_imull_EAX_T0(void)
180   -{
181   - int64_t res;
182   - res = (int64_t)((int32_t)EAX) * (int64_t)((int32_t)T0);
183   - EAX = (uint32_t)(res);
184   - EDX = (uint32_t)(res >> 32);
185   - CC_DST = res;
186   - CC_SRC = (res != (int32_t)res);
187   -}
188   -
189   -void OPPROTO op_imulw_T0_T1(void)
190   -{
191   - int res;
192   - res = (int16_t)T0 * (int16_t)T1;
193   - T0 = res;
194   - CC_DST = res;
195   - CC_SRC = (res != (int16_t)res);
196   -}
197   -
198   -void OPPROTO op_imull_T0_T1(void)
199   -{
200   - int64_t res;
201   - res = (int64_t)((int32_t)T0) * (int64_t)((int32_t)T1);
202   - T0 = res;
203   - CC_DST = res;
204   - CC_SRC = (res != (int32_t)res);
205   -}
206   -
207   -#ifdef TARGET_X86_64
208   -void OPPROTO op_mulq_EAX_T0(void)
209   -{
210   - helper_mulq_EAX_T0(T0);
211   -}
212   -
213   -void OPPROTO op_imulq_EAX_T0(void)
214   -{
215   - helper_imulq_EAX_T0(T0);
216   -}
217   -
218   -void OPPROTO op_imulq_T0_T1(void)
219   -{
220   - T0 = helper_imulq_T0_T1(T0, T1);
221   -}
222   -#endif
223   -
224 126 /* constant load & misc op */
225 127  
226 128 /* XXX: consistent names */
... ...
target-i386/translate.c
... ... @@ -3799,21 +3799,64 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
3799 3799 case 4: /* mul */
3800 3800 switch(ot) {
3801 3801 case OT_BYTE:
3802   - gen_op_mulb_AL_T0();
  3802 + gen_op_mov_TN_reg(OT_BYTE, 1, R_EAX);
  3803 + tcg_gen_ext8u_tl(cpu_T[0], cpu_T[0]);
  3804 + tcg_gen_ext8u_tl(cpu_T[1], cpu_T[1]);
  3805 + /* XXX: use 32 bit mul which could be faster */
  3806 + tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
  3807 + gen_op_mov_reg_T0(OT_WORD, R_EAX);
  3808 + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
  3809 + tcg_gen_andi_tl(cpu_cc_src, cpu_T[0], 0xff00);
3803 3810 s->cc_op = CC_OP_MULB;
3804 3811 break;
3805 3812 case OT_WORD:
3806   - gen_op_mulw_AX_T0();
  3813 + gen_op_mov_TN_reg(OT_WORD, 1, R_EAX);
  3814 + tcg_gen_ext16u_tl(cpu_T[0], cpu_T[0]);
  3815 + tcg_gen_ext16u_tl(cpu_T[1], cpu_T[1]);
  3816 + /* XXX: use 32 bit mul which could be faster */
  3817 + tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
  3818 + gen_op_mov_reg_T0(OT_WORD, R_EAX);
  3819 + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
  3820 + tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 16);
  3821 + gen_op_mov_reg_T0(OT_WORD, R_EDX);
  3822 + tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
3807 3823 s->cc_op = CC_OP_MULW;
3808 3824 break;
3809 3825 default:
3810 3826 case OT_LONG:
3811   - gen_op_mull_EAX_T0();
  3827 +#ifdef TARGET_X86_64
  3828 + gen_op_mov_TN_reg(OT_LONG, 1, R_EAX);
  3829 + tcg_gen_ext32u_tl(cpu_T[0], cpu_T[0]);
  3830 + tcg_gen_ext32u_tl(cpu_T[1], cpu_T[1]);
  3831 + tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
  3832 + gen_op_mov_reg_T0(OT_LONG, R_EAX);
  3833 + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
  3834 + tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 32);
  3835 + gen_op_mov_reg_T0(OT_LONG, R_EDX);
  3836 + tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
  3837 +#else
  3838 + {
  3839 + TCGv t0, t1;
  3840 + t0 = tcg_temp_new(TCG_TYPE_I64);
  3841 + t1 = tcg_temp_new(TCG_TYPE_I64);
  3842 + gen_op_mov_TN_reg(OT_LONG, 1, R_EAX);
  3843 + tcg_gen_extu_i32_i64(t0, cpu_T[0]);
  3844 + tcg_gen_extu_i32_i64(t1, cpu_T[1]);
  3845 + tcg_gen_mul_i64(t0, t0, t1);
  3846 + tcg_gen_trunc_i64_i32(cpu_T[0], t0);
  3847 + gen_op_mov_reg_T0(OT_LONG, R_EAX);
  3848 + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
  3849 + tcg_gen_shri_i64(t0, t0, 32);
  3850 + tcg_gen_trunc_i64_i32(cpu_T[0], t0);
  3851 + gen_op_mov_reg_T0(OT_LONG, R_EDX);
  3852 + tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
  3853 + }
  3854 +#endif
3812 3855 s->cc_op = CC_OP_MULL;
3813 3856 break;
3814 3857 #ifdef TARGET_X86_64
3815 3858 case OT_QUAD:
3816   - gen_op_mulq_EAX_T0();
  3859 + tcg_gen_helper_0_1(helper_mulq_EAX_T0, cpu_T[0]);
3817 3860 s->cc_op = CC_OP_MULQ;
3818 3861 break;
3819 3862 #endif
... ... @@ -3822,21 +3865,68 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
3822 3865 case 5: /* imul */
3823 3866 switch(ot) {
3824 3867 case OT_BYTE:
3825   - gen_op_imulb_AL_T0();
  3868 + gen_op_mov_TN_reg(OT_BYTE, 1, R_EAX);
  3869 + tcg_gen_ext8s_tl(cpu_T[0], cpu_T[0]);
  3870 + tcg_gen_ext8s_tl(cpu_T[1], cpu_T[1]);
  3871 + /* XXX: use 32 bit mul which could be faster */
  3872 + tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
  3873 + gen_op_mov_reg_T0(OT_WORD, R_EAX);
  3874 + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
  3875 + tcg_gen_ext8s_tl(cpu_tmp0, cpu_T[0]);
  3876 + tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
3826 3877 s->cc_op = CC_OP_MULB;
3827 3878 break;
3828 3879 case OT_WORD:
3829   - gen_op_imulw_AX_T0();
  3880 + gen_op_mov_TN_reg(OT_WORD, 1, R_EAX);
  3881 + tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]);
  3882 + tcg_gen_ext16s_tl(cpu_T[1], cpu_T[1]);
  3883 + /* XXX: use 32 bit mul which could be faster */
  3884 + tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
  3885 + gen_op_mov_reg_T0(OT_WORD, R_EAX);
  3886 + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
  3887 + tcg_gen_ext16s_tl(cpu_tmp0, cpu_T[0]);
  3888 + tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
  3889 + tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 16);
  3890 + gen_op_mov_reg_T0(OT_WORD, R_EDX);
3830 3891 s->cc_op = CC_OP_MULW;
3831 3892 break;
3832 3893 default:
3833 3894 case OT_LONG:
3834   - gen_op_imull_EAX_T0();
  3895 +#ifdef TARGET_X86_64
  3896 + gen_op_mov_TN_reg(OT_LONG, 1, R_EAX);
  3897 + tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
  3898 + tcg_gen_ext32s_tl(cpu_T[1], cpu_T[1]);
  3899 + tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
  3900 + gen_op_mov_reg_T0(OT_LONG, R_EAX);
  3901 + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
  3902 + tcg_gen_ext32s_tl(cpu_tmp0, cpu_T[0]);
  3903 + tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
  3904 + tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 32);
  3905 + gen_op_mov_reg_T0(OT_LONG, R_EDX);
  3906 +#else
  3907 + {
  3908 + TCGv t0, t1;
  3909 + t0 = tcg_temp_new(TCG_TYPE_I64);
  3910 + t1 = tcg_temp_new(TCG_TYPE_I64);
  3911 + gen_op_mov_TN_reg(OT_LONG, 1, R_EAX);
  3912 + tcg_gen_ext_i32_i64(t0, cpu_T[0]);
  3913 + tcg_gen_ext_i32_i64(t1, cpu_T[1]);
  3914 + tcg_gen_mul_i64(t0, t0, t1);
  3915 + tcg_gen_trunc_i64_i32(cpu_T[0], t0);
  3916 + gen_op_mov_reg_T0(OT_LONG, R_EAX);
  3917 + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
  3918 + tcg_gen_sari_tl(cpu_tmp0, cpu_T[0], 31);
  3919 + tcg_gen_shri_i64(t0, t0, 32);
  3920 + tcg_gen_trunc_i64_i32(cpu_T[0], t0);
  3921 + gen_op_mov_reg_T0(OT_LONG, R_EDX);
  3922 + tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
  3923 + }
  3924 +#endif
3835 3925 s->cc_op = CC_OP_MULL;
3836 3926 break;
3837 3927 #ifdef TARGET_X86_64
3838 3928 case OT_QUAD:
3839   - gen_op_imulq_EAX_T0();
  3929 + tcg_gen_helper_0_1(helper_imulq_EAX_T0, cpu_T[0]);
3840 3930 s->cc_op = CC_OP_MULQ;
3841 3931 break;
3842 3932 #endif
... ... @@ -4104,13 +4194,41 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
4104 4194  
4105 4195 #ifdef TARGET_X86_64
4106 4196 if (ot == OT_QUAD) {
4107   - gen_op_imulq_T0_T1();
  4197 + tcg_gen_helper_1_2(helper_imulq_T0_T1, cpu_T[0], cpu_T[0], cpu_T[1]);
4108 4198 } else
4109 4199 #endif
4110 4200 if (ot == OT_LONG) {
4111   - gen_op_imull_T0_T1();
  4201 +#ifdef TARGET_X86_64
  4202 + tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
  4203 + tcg_gen_ext32s_tl(cpu_T[1], cpu_T[1]);
  4204 + tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
  4205 + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
  4206 + tcg_gen_ext32s_tl(cpu_tmp0, cpu_T[0]);
  4207 + tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
  4208 +#else
  4209 + {
  4210 + TCGv t0, t1;
  4211 + t0 = tcg_temp_new(TCG_TYPE_I64);
  4212 + t1 = tcg_temp_new(TCG_TYPE_I64);
  4213 + tcg_gen_ext_i32_i64(t0, cpu_T[0]);
  4214 + tcg_gen_ext_i32_i64(t1, cpu_T[1]);
  4215 + tcg_gen_mul_i64(t0, t0, t1);
  4216 + tcg_gen_trunc_i64_i32(cpu_T[0], t0);
  4217 + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
  4218 + tcg_gen_sari_tl(cpu_tmp0, cpu_T[0], 31);
  4219 + tcg_gen_shri_i64(t0, t0, 32);
  4220 + tcg_gen_trunc_i64_i32(cpu_T[1], t0);
  4221 + tcg_gen_sub_tl(cpu_cc_src, cpu_T[1], cpu_tmp0);
  4222 + }
  4223 +#endif
4112 4224 } else {
4113   - gen_op_imulw_T0_T1();
  4225 + tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]);
  4226 + tcg_gen_ext16s_tl(cpu_T[1], cpu_T[1]);
  4227 + /* XXX: use 32 bit mul which could be faster */
  4228 + tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
  4229 + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
  4230 + tcg_gen_ext16s_tl(cpu_tmp0, cpu_T[0]);
  4231 + tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
4114 4232 }
4115 4233 gen_op_mov_reg_T0(ot, reg);
4116 4234 s->cc_op = CC_OP_MULB + ot;
... ...