Commit 0211e5aff995ee55722148923a7fc317796e4114

Authored by bellard
1 parent 30898801

converted MUL/IMUL to TCG

git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@4508 c046a42c-6fe2-441c-8c8c-71466251a162
target-i386/helper.c
@@ -1609,22 +1609,6 @@ void helper_rsm(void) @@ -1609,22 +1609,6 @@ void helper_rsm(void)
1609 #endif /* !CONFIG_USER_ONLY */ 1609 #endif /* !CONFIG_USER_ONLY */
1610 1610
1611 1611
1612 -#ifdef BUGGY_GCC_DIV64  
1613 -/* gcc 2.95.4 on PowerPC does not seem to like using __udivdi3, so we  
1614 - call it from another function */  
1615 -uint32_t div32(uint64_t *q_ptr, uint64_t num, uint32_t den)  
1616 -{  
1617 - *q_ptr = num / den;  
1618 - return num % den;  
1619 -}  
1620 -  
1621 -int32_t idiv32(int64_t *q_ptr, int64_t num, int32_t den)  
1622 -{  
1623 - *q_ptr = num / den;  
1624 - return num % den;  
1625 -}  
1626 -#endif  
1627 -  
1628 /* division, flags are undefined */ 1612 /* division, flags are undefined */
1629 1613
1630 void helper_divb_AL(target_ulong t0) 1614 void helper_divb_AL(target_ulong t0)
@@ -1707,12 +1691,8 @@ void helper_divl_EAX(target_ulong t0) @@ -1707,12 +1691,8 @@ void helper_divl_EAX(target_ulong t0)
1707 if (den == 0) { 1691 if (den == 0) {
1708 raise_exception(EXCP00_DIVZ); 1692 raise_exception(EXCP00_DIVZ);
1709 } 1693 }
1710 -#ifdef BUGGY_GCC_DIV64  
1711 - r = div32(&q, num, den);  
1712 -#else  
1713 q = (num / den); 1694 q = (num / den);
1714 r = (num % den); 1695 r = (num % den);
1715 -#endif  
1716 if (q > 0xffffffff) 1696 if (q > 0xffffffff)
1717 raise_exception(EXCP00_DIVZ); 1697 raise_exception(EXCP00_DIVZ);
1718 EAX = (uint32_t)q; 1698 EAX = (uint32_t)q;
@@ -1729,12 +1709,8 @@ void helper_idivl_EAX(target_ulong t0) @@ -1729,12 +1709,8 @@ void helper_idivl_EAX(target_ulong t0)
1729 if (den == 0) { 1709 if (den == 0) {
1730 raise_exception(EXCP00_DIVZ); 1710 raise_exception(EXCP00_DIVZ);
1731 } 1711 }
1732 -#ifdef BUGGY_GCC_DIV64  
1733 - r = idiv32(&q, num, den);  
1734 -#else  
1735 q = (num / den); 1712 q = (num / den);
1736 r = (num % den); 1713 r = (num % den);
1737 -#endif  
1738 if (q != (int32_t)q) 1714 if (q != (int32_t)q)
1739 raise_exception(EXCP00_DIVZ); 1715 raise_exception(EXCP00_DIVZ);
1740 EAX = (uint32_t)q; 1716 EAX = (uint32_t)q;
target-i386/op.c
@@ -123,104 +123,6 @@ @@ -123,104 +123,6 @@
123 123
124 #endif 124 #endif
125 125
126 -/* multiply/divide */  
127 -  
128 -/* XXX: add eflags optimizations */  
129 -/* XXX: add non P4 style flags */  
130 -  
131 -void OPPROTO op_mulb_AL_T0(void)  
132 -{  
133 - unsigned int res;  
134 - res = (uint8_t)EAX * (uint8_t)T0;  
135 - EAX = (EAX & ~0xffff) | res;  
136 - CC_DST = res;  
137 - CC_SRC = (res & 0xff00);  
138 -}  
139 -  
140 -void OPPROTO op_imulb_AL_T0(void)  
141 -{  
142 - int res;  
143 - res = (int8_t)EAX * (int8_t)T0;  
144 - EAX = (EAX & ~0xffff) | (res & 0xffff);  
145 - CC_DST = res;  
146 - CC_SRC = (res != (int8_t)res);  
147 -}  
148 -  
149 -void OPPROTO op_mulw_AX_T0(void)  
150 -{  
151 - unsigned int res;  
152 - res = (uint16_t)EAX * (uint16_t)T0;  
153 - EAX = (EAX & ~0xffff) | (res & 0xffff);  
154 - EDX = (EDX & ~0xffff) | ((res >> 16) & 0xffff);  
155 - CC_DST = res;  
156 - CC_SRC = res >> 16;  
157 -}  
158 -  
159 -void OPPROTO op_imulw_AX_T0(void)  
160 -{  
161 - int res;  
162 - res = (int16_t)EAX * (int16_t)T0;  
163 - EAX = (EAX & ~0xffff) | (res & 0xffff);  
164 - EDX = (EDX & ~0xffff) | ((res >> 16) & 0xffff);  
165 - CC_DST = res;  
166 - CC_SRC = (res != (int16_t)res);  
167 -}  
168 -  
169 -void OPPROTO op_mull_EAX_T0(void)  
170 -{  
171 - uint64_t res;  
172 - res = (uint64_t)((uint32_t)EAX) * (uint64_t)((uint32_t)T0);  
173 - EAX = (uint32_t)res;  
174 - EDX = (uint32_t)(res >> 32);  
175 - CC_DST = (uint32_t)res;  
176 - CC_SRC = (uint32_t)(res >> 32);  
177 -}  
178 -  
179 -void OPPROTO op_imull_EAX_T0(void)  
180 -{  
181 - int64_t res;  
182 - res = (int64_t)((int32_t)EAX) * (int64_t)((int32_t)T0);  
183 - EAX = (uint32_t)(res);  
184 - EDX = (uint32_t)(res >> 32);  
185 - CC_DST = res;  
186 - CC_SRC = (res != (int32_t)res);  
187 -}  
188 -  
189 -void OPPROTO op_imulw_T0_T1(void)  
190 -{  
191 - int res;  
192 - res = (int16_t)T0 * (int16_t)T1;  
193 - T0 = res;  
194 - CC_DST = res;  
195 - CC_SRC = (res != (int16_t)res);  
196 -}  
197 -  
198 -void OPPROTO op_imull_T0_T1(void)  
199 -{  
200 - int64_t res;  
201 - res = (int64_t)((int32_t)T0) * (int64_t)((int32_t)T1);  
202 - T0 = res;  
203 - CC_DST = res;  
204 - CC_SRC = (res != (int32_t)res);  
205 -}  
206 -  
207 -#ifdef TARGET_X86_64  
208 -void OPPROTO op_mulq_EAX_T0(void)  
209 -{  
210 - helper_mulq_EAX_T0(T0);  
211 -}  
212 -  
213 -void OPPROTO op_imulq_EAX_T0(void)  
214 -{  
215 - helper_imulq_EAX_T0(T0);  
216 -}  
217 -  
218 -void OPPROTO op_imulq_T0_T1(void)  
219 -{  
220 - T0 = helper_imulq_T0_T1(T0, T1);  
221 -}  
222 -#endif  
223 -  
224 /* constant load & misc op */ 126 /* constant load & misc op */
225 127
226 /* XXX: consistent names */ 128 /* XXX: consistent names */
target-i386/translate.c
@@ -3799,21 +3799,64 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) @@ -3799,21 +3799,64 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
3799 case 4: /* mul */ 3799 case 4: /* mul */
3800 switch(ot) { 3800 switch(ot) {
3801 case OT_BYTE: 3801 case OT_BYTE:
3802 - gen_op_mulb_AL_T0(); 3802 + gen_op_mov_TN_reg(OT_BYTE, 1, R_EAX);
  3803 + tcg_gen_ext8u_tl(cpu_T[0], cpu_T[0]);
  3804 + tcg_gen_ext8u_tl(cpu_T[1], cpu_T[1]);
  3805 + /* XXX: use 32 bit mul which could be faster */
  3806 + tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
  3807 + gen_op_mov_reg_T0(OT_WORD, R_EAX);
  3808 + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
  3809 + tcg_gen_andi_tl(cpu_cc_src, cpu_T[0], 0xff00);
3803 s->cc_op = CC_OP_MULB; 3810 s->cc_op = CC_OP_MULB;
3804 break; 3811 break;
3805 case OT_WORD: 3812 case OT_WORD:
3806 - gen_op_mulw_AX_T0(); 3813 + gen_op_mov_TN_reg(OT_WORD, 1, R_EAX);
  3814 + tcg_gen_ext16u_tl(cpu_T[0], cpu_T[0]);
  3815 + tcg_gen_ext16u_tl(cpu_T[1], cpu_T[1]);
  3816 + /* XXX: use 32 bit mul which could be faster */
  3817 + tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
  3818 + gen_op_mov_reg_T0(OT_WORD, R_EAX);
  3819 + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
  3820 + tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 16);
  3821 + gen_op_mov_reg_T0(OT_WORD, R_EDX);
  3822 + tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
3807 s->cc_op = CC_OP_MULW; 3823 s->cc_op = CC_OP_MULW;
3808 break; 3824 break;
3809 default: 3825 default:
3810 case OT_LONG: 3826 case OT_LONG:
3811 - gen_op_mull_EAX_T0(); 3827 +#ifdef TARGET_X86_64
  3828 + gen_op_mov_TN_reg(OT_LONG, 1, R_EAX);
  3829 + tcg_gen_ext32u_tl(cpu_T[0], cpu_T[0]);
  3830 + tcg_gen_ext32u_tl(cpu_T[1], cpu_T[1]);
  3831 + tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
  3832 + gen_op_mov_reg_T0(OT_LONG, R_EAX);
  3833 + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
  3834 + tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 32);
  3835 + gen_op_mov_reg_T0(OT_LONG, R_EDX);
  3836 + tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
  3837 +#else
  3838 + {
  3839 + TCGv t0, t1;
  3840 + t0 = tcg_temp_new(TCG_TYPE_I64);
  3841 + t1 = tcg_temp_new(TCG_TYPE_I64);
  3842 + gen_op_mov_TN_reg(OT_LONG, 1, R_EAX);
  3843 + tcg_gen_extu_i32_i64(t0, cpu_T[0]);
  3844 + tcg_gen_extu_i32_i64(t1, cpu_T[1]);
  3845 + tcg_gen_mul_i64(t0, t0, t1);
  3846 + tcg_gen_trunc_i64_i32(cpu_T[0], t0);
  3847 + gen_op_mov_reg_T0(OT_LONG, R_EAX);
  3848 + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
  3849 + tcg_gen_shri_i64(t0, t0, 32);
  3850 + tcg_gen_trunc_i64_i32(cpu_T[0], t0);
  3851 + gen_op_mov_reg_T0(OT_LONG, R_EDX);
  3852 + tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
  3853 + }
  3854 +#endif
3812 s->cc_op = CC_OP_MULL; 3855 s->cc_op = CC_OP_MULL;
3813 break; 3856 break;
3814 #ifdef TARGET_X86_64 3857 #ifdef TARGET_X86_64
3815 case OT_QUAD: 3858 case OT_QUAD:
3816 - gen_op_mulq_EAX_T0(); 3859 + tcg_gen_helper_0_1(helper_mulq_EAX_T0, cpu_T[0]);
3817 s->cc_op = CC_OP_MULQ; 3860 s->cc_op = CC_OP_MULQ;
3818 break; 3861 break;
3819 #endif 3862 #endif
@@ -3822,21 +3865,68 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) @@ -3822,21 +3865,68 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
3822 case 5: /* imul */ 3865 case 5: /* imul */
3823 switch(ot) { 3866 switch(ot) {
3824 case OT_BYTE: 3867 case OT_BYTE:
3825 - gen_op_imulb_AL_T0(); 3868 + gen_op_mov_TN_reg(OT_BYTE, 1, R_EAX);
  3869 + tcg_gen_ext8s_tl(cpu_T[0], cpu_T[0]);
  3870 + tcg_gen_ext8s_tl(cpu_T[1], cpu_T[1]);
  3871 + /* XXX: use 32 bit mul which could be faster */
  3872 + tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
  3873 + gen_op_mov_reg_T0(OT_WORD, R_EAX);
  3874 + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
  3875 + tcg_gen_ext8s_tl(cpu_tmp0, cpu_T[0]);
  3876 + tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
3826 s->cc_op = CC_OP_MULB; 3877 s->cc_op = CC_OP_MULB;
3827 break; 3878 break;
3828 case OT_WORD: 3879 case OT_WORD:
3829 - gen_op_imulw_AX_T0(); 3880 + gen_op_mov_TN_reg(OT_WORD, 1, R_EAX);
  3881 + tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]);
  3882 + tcg_gen_ext16s_tl(cpu_T[1], cpu_T[1]);
  3883 + /* XXX: use 32 bit mul which could be faster */
  3884 + tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
  3885 + gen_op_mov_reg_T0(OT_WORD, R_EAX);
  3886 + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
  3887 + tcg_gen_ext16s_tl(cpu_tmp0, cpu_T[0]);
  3888 + tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
  3889 + tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 16);
  3890 + gen_op_mov_reg_T0(OT_WORD, R_EDX);
3830 s->cc_op = CC_OP_MULW; 3891 s->cc_op = CC_OP_MULW;
3831 break; 3892 break;
3832 default: 3893 default:
3833 case OT_LONG: 3894 case OT_LONG:
3834 - gen_op_imull_EAX_T0(); 3895 +#ifdef TARGET_X86_64
  3896 + gen_op_mov_TN_reg(OT_LONG, 1, R_EAX);
  3897 + tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
  3898 + tcg_gen_ext32s_tl(cpu_T[1], cpu_T[1]);
  3899 + tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
  3900 + gen_op_mov_reg_T0(OT_LONG, R_EAX);
  3901 + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
  3902 + tcg_gen_ext32s_tl(cpu_tmp0, cpu_T[0]);
  3903 + tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
  3904 + tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 32);
  3905 + gen_op_mov_reg_T0(OT_LONG, R_EDX);
  3906 +#else
  3907 + {
  3908 + TCGv t0, t1;
  3909 + t0 = tcg_temp_new(TCG_TYPE_I64);
  3910 + t1 = tcg_temp_new(TCG_TYPE_I64);
  3911 + gen_op_mov_TN_reg(OT_LONG, 1, R_EAX);
  3912 + tcg_gen_ext_i32_i64(t0, cpu_T[0]);
  3913 + tcg_gen_ext_i32_i64(t1, cpu_T[1]);
  3914 + tcg_gen_mul_i64(t0, t0, t1);
  3915 + tcg_gen_trunc_i64_i32(cpu_T[0], t0);
  3916 + gen_op_mov_reg_T0(OT_LONG, R_EAX);
  3917 + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
  3918 + tcg_gen_sari_tl(cpu_tmp0, cpu_T[0], 31);
  3919 + tcg_gen_shri_i64(t0, t0, 32);
  3920 + tcg_gen_trunc_i64_i32(cpu_T[0], t0);
  3921 + gen_op_mov_reg_T0(OT_LONG, R_EDX);
  3922 + tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
  3923 + }
  3924 +#endif
3835 s->cc_op = CC_OP_MULL; 3925 s->cc_op = CC_OP_MULL;
3836 break; 3926 break;
3837 #ifdef TARGET_X86_64 3927 #ifdef TARGET_X86_64
3838 case OT_QUAD: 3928 case OT_QUAD:
3839 - gen_op_imulq_EAX_T0(); 3929 + tcg_gen_helper_0_1(helper_imulq_EAX_T0, cpu_T[0]);
3840 s->cc_op = CC_OP_MULQ; 3930 s->cc_op = CC_OP_MULQ;
3841 break; 3931 break;
3842 #endif 3932 #endif
@@ -4104,13 +4194,41 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) @@ -4104,13 +4194,41 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
4104 4194
4105 #ifdef TARGET_X86_64 4195 #ifdef TARGET_X86_64
4106 if (ot == OT_QUAD) { 4196 if (ot == OT_QUAD) {
4107 - gen_op_imulq_T0_T1(); 4197 + tcg_gen_helper_1_2(helper_imulq_T0_T1, cpu_T[0], cpu_T[0], cpu_T[1]);
4108 } else 4198 } else
4109 #endif 4199 #endif
4110 if (ot == OT_LONG) { 4200 if (ot == OT_LONG) {
4111 - gen_op_imull_T0_T1(); 4201 +#ifdef TARGET_X86_64
  4202 + tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
  4203 + tcg_gen_ext32s_tl(cpu_T[1], cpu_T[1]);
  4204 + tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
  4205 + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
  4206 + tcg_gen_ext32s_tl(cpu_tmp0, cpu_T[0]);
  4207 + tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
  4208 +#else
  4209 + {
  4210 + TCGv t0, t1;
  4211 + t0 = tcg_temp_new(TCG_TYPE_I64);
  4212 + t1 = tcg_temp_new(TCG_TYPE_I64);
  4213 + tcg_gen_ext_i32_i64(t0, cpu_T[0]);
  4214 + tcg_gen_ext_i32_i64(t1, cpu_T[1]);
  4215 + tcg_gen_mul_i64(t0, t0, t1);
  4216 + tcg_gen_trunc_i64_i32(cpu_T[0], t0);
  4217 + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
  4218 + tcg_gen_sari_tl(cpu_tmp0, cpu_T[0], 31);
  4219 + tcg_gen_shri_i64(t0, t0, 32);
  4220 + tcg_gen_trunc_i64_i32(cpu_T[1], t0);
  4221 + tcg_gen_sub_tl(cpu_cc_src, cpu_T[1], cpu_tmp0);
  4222 + }
  4223 +#endif
4112 } else { 4224 } else {
4113 - gen_op_imulw_T0_T1(); 4225 + tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]);
  4226 + tcg_gen_ext16s_tl(cpu_T[1], cpu_T[1]);
  4227 + /* XXX: use 32 bit mul which could be faster */
  4228 + tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
  4229 + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
  4230 + tcg_gen_ext16s_tl(cpu_tmp0, cpu_T[0]);
  4231 + tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
4114 } 4232 }
4115 gen_op_mov_reg_T0(ot, reg); 4233 gen_op_mov_reg_T0(ot, reg);
4116 s->cc_op = CC_OP_MULB + ot; 4234 s->cc_op = CC_OP_MULB + ot;