Commit 0211e5aff995ee55722148923a7fc317796e4114
1 parent
30898801
converted MUL/IMUL to TCG
git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@4508 c046a42c-6fe2-441c-8c8c-71466251a162
Showing
3 changed files
with
129 additions
and
133 deletions
target-i386/helper.c
... | ... | @@ -1609,22 +1609,6 @@ void helper_rsm(void) |
1609 | 1609 | #endif /* !CONFIG_USER_ONLY */ |
1610 | 1610 | |
1611 | 1611 | |
1612 | -#ifdef BUGGY_GCC_DIV64 | |
1613 | -/* gcc 2.95.4 on PowerPC does not seem to like using __udivdi3, so we | |
1614 | - call it from another function */ | |
1615 | -uint32_t div32(uint64_t *q_ptr, uint64_t num, uint32_t den) | |
1616 | -{ | |
1617 | - *q_ptr = num / den; | |
1618 | - return num % den; | |
1619 | -} | |
1620 | - | |
1621 | -int32_t idiv32(int64_t *q_ptr, int64_t num, int32_t den) | |
1622 | -{ | |
1623 | - *q_ptr = num / den; | |
1624 | - return num % den; | |
1625 | -} | |
1626 | -#endif | |
1627 | - | |
1628 | 1612 | /* division, flags are undefined */ |
1629 | 1613 | |
1630 | 1614 | void helper_divb_AL(target_ulong t0) |
... | ... | @@ -1707,12 +1691,8 @@ void helper_divl_EAX(target_ulong t0) |
1707 | 1691 | if (den == 0) { |
1708 | 1692 | raise_exception(EXCP00_DIVZ); |
1709 | 1693 | } |
1710 | -#ifdef BUGGY_GCC_DIV64 | |
1711 | - r = div32(&q, num, den); | |
1712 | -#else | |
1713 | 1694 | q = (num / den); |
1714 | 1695 | r = (num % den); |
1715 | -#endif | |
1716 | 1696 | if (q > 0xffffffff) |
1717 | 1697 | raise_exception(EXCP00_DIVZ); |
1718 | 1698 | EAX = (uint32_t)q; |
... | ... | @@ -1729,12 +1709,8 @@ void helper_idivl_EAX(target_ulong t0) |
1729 | 1709 | if (den == 0) { |
1730 | 1710 | raise_exception(EXCP00_DIVZ); |
1731 | 1711 | } |
1732 | -#ifdef BUGGY_GCC_DIV64 | |
1733 | - r = idiv32(&q, num, den); | |
1734 | -#else | |
1735 | 1712 | q = (num / den); |
1736 | 1713 | r = (num % den); |
1737 | -#endif | |
1738 | 1714 | if (q != (int32_t)q) |
1739 | 1715 | raise_exception(EXCP00_DIVZ); |
1740 | 1716 | EAX = (uint32_t)q; | ... | ... |
target-i386/op.c
... | ... | @@ -123,104 +123,6 @@ |
123 | 123 | |
124 | 124 | #endif |
125 | 125 | |
126 | -/* multiply/divide */ | |
127 | - | |
128 | -/* XXX: add eflags optimizations */ | |
129 | -/* XXX: add non P4 style flags */ | |
130 | - | |
131 | -void OPPROTO op_mulb_AL_T0(void) | |
132 | -{ | |
133 | - unsigned int res; | |
134 | - res = (uint8_t)EAX * (uint8_t)T0; | |
135 | - EAX = (EAX & ~0xffff) | res; | |
136 | - CC_DST = res; | |
137 | - CC_SRC = (res & 0xff00); | |
138 | -} | |
139 | - | |
140 | -void OPPROTO op_imulb_AL_T0(void) | |
141 | -{ | |
142 | - int res; | |
143 | - res = (int8_t)EAX * (int8_t)T0; | |
144 | - EAX = (EAX & ~0xffff) | (res & 0xffff); | |
145 | - CC_DST = res; | |
146 | - CC_SRC = (res != (int8_t)res); | |
147 | -} | |
148 | - | |
149 | -void OPPROTO op_mulw_AX_T0(void) | |
150 | -{ | |
151 | - unsigned int res; | |
152 | - res = (uint16_t)EAX * (uint16_t)T0; | |
153 | - EAX = (EAX & ~0xffff) | (res & 0xffff); | |
154 | - EDX = (EDX & ~0xffff) | ((res >> 16) & 0xffff); | |
155 | - CC_DST = res; | |
156 | - CC_SRC = res >> 16; | |
157 | -} | |
158 | - | |
159 | -void OPPROTO op_imulw_AX_T0(void) | |
160 | -{ | |
161 | - int res; | |
162 | - res = (int16_t)EAX * (int16_t)T0; | |
163 | - EAX = (EAX & ~0xffff) | (res & 0xffff); | |
164 | - EDX = (EDX & ~0xffff) | ((res >> 16) & 0xffff); | |
165 | - CC_DST = res; | |
166 | - CC_SRC = (res != (int16_t)res); | |
167 | -} | |
168 | - | |
169 | -void OPPROTO op_mull_EAX_T0(void) | |
170 | -{ | |
171 | - uint64_t res; | |
172 | - res = (uint64_t)((uint32_t)EAX) * (uint64_t)((uint32_t)T0); | |
173 | - EAX = (uint32_t)res; | |
174 | - EDX = (uint32_t)(res >> 32); | |
175 | - CC_DST = (uint32_t)res; | |
176 | - CC_SRC = (uint32_t)(res >> 32); | |
177 | -} | |
178 | - | |
179 | -void OPPROTO op_imull_EAX_T0(void) | |
180 | -{ | |
181 | - int64_t res; | |
182 | - res = (int64_t)((int32_t)EAX) * (int64_t)((int32_t)T0); | |
183 | - EAX = (uint32_t)(res); | |
184 | - EDX = (uint32_t)(res >> 32); | |
185 | - CC_DST = res; | |
186 | - CC_SRC = (res != (int32_t)res); | |
187 | -} | |
188 | - | |
189 | -void OPPROTO op_imulw_T0_T1(void) | |
190 | -{ | |
191 | - int res; | |
192 | - res = (int16_t)T0 * (int16_t)T1; | |
193 | - T0 = res; | |
194 | - CC_DST = res; | |
195 | - CC_SRC = (res != (int16_t)res); | |
196 | -} | |
197 | - | |
198 | -void OPPROTO op_imull_T0_T1(void) | |
199 | -{ | |
200 | - int64_t res; | |
201 | - res = (int64_t)((int32_t)T0) * (int64_t)((int32_t)T1); | |
202 | - T0 = res; | |
203 | - CC_DST = res; | |
204 | - CC_SRC = (res != (int32_t)res); | |
205 | -} | |
206 | - | |
207 | -#ifdef TARGET_X86_64 | |
208 | -void OPPROTO op_mulq_EAX_T0(void) | |
209 | -{ | |
210 | - helper_mulq_EAX_T0(T0); | |
211 | -} | |
212 | - | |
213 | -void OPPROTO op_imulq_EAX_T0(void) | |
214 | -{ | |
215 | - helper_imulq_EAX_T0(T0); | |
216 | -} | |
217 | - | |
218 | -void OPPROTO op_imulq_T0_T1(void) | |
219 | -{ | |
220 | - T0 = helper_imulq_T0_T1(T0, T1); | |
221 | -} | |
222 | -#endif | |
223 | - | |
224 | 126 | /* constant load & misc op */ |
225 | 127 | |
226 | 128 | /* XXX: consistent names */ | ... | ... |
target-i386/translate.c
... | ... | @@ -3799,21 +3799,64 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) |
3799 | 3799 | case 4: /* mul */ |
3800 | 3800 | switch(ot) { |
3801 | 3801 | case OT_BYTE: |
3802 | - gen_op_mulb_AL_T0(); | |
3802 | + gen_op_mov_TN_reg(OT_BYTE, 1, R_EAX); | |
3803 | + tcg_gen_ext8u_tl(cpu_T[0], cpu_T[0]); | |
3804 | + tcg_gen_ext8u_tl(cpu_T[1], cpu_T[1]); | |
3805 | + /* XXX: use 32 bit mul which could be faster */ | |
3806 | + tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]); | |
3807 | + gen_op_mov_reg_T0(OT_WORD, R_EAX); | |
3808 | + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); | |
3809 | + tcg_gen_andi_tl(cpu_cc_src, cpu_T[0], 0xff00); | |
3803 | 3810 | s->cc_op = CC_OP_MULB; |
3804 | 3811 | break; |
3805 | 3812 | case OT_WORD: |
3806 | - gen_op_mulw_AX_T0(); | |
3813 | + gen_op_mov_TN_reg(OT_WORD, 1, R_EAX); | |
3814 | + tcg_gen_ext16u_tl(cpu_T[0], cpu_T[0]); | |
3815 | + tcg_gen_ext16u_tl(cpu_T[1], cpu_T[1]); | |
3816 | + /* XXX: use 32 bit mul which could be faster */ | |
3817 | + tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]); | |
3818 | + gen_op_mov_reg_T0(OT_WORD, R_EAX); | |
3819 | + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); | |
3820 | + tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 16); | |
3821 | + gen_op_mov_reg_T0(OT_WORD, R_EDX); | |
3822 | + tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]); | |
3807 | 3823 | s->cc_op = CC_OP_MULW; |
3808 | 3824 | break; |
3809 | 3825 | default: |
3810 | 3826 | case OT_LONG: |
3811 | - gen_op_mull_EAX_T0(); | |
3827 | +#ifdef TARGET_X86_64 | |
3828 | + gen_op_mov_TN_reg(OT_LONG, 1, R_EAX); | |
3829 | + tcg_gen_ext32u_tl(cpu_T[0], cpu_T[0]); | |
3830 | + tcg_gen_ext32u_tl(cpu_T[1], cpu_T[1]); | |
3831 | + tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]); | |
3832 | + gen_op_mov_reg_T0(OT_LONG, R_EAX); | |
3833 | + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); | |
3834 | + tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 32); | |
3835 | + gen_op_mov_reg_T0(OT_LONG, R_EDX); | |
3836 | + tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]); | |
3837 | +#else | |
3838 | + { | |
3839 | + TCGv t0, t1; | |
3840 | + t0 = tcg_temp_new(TCG_TYPE_I64); | |
3841 | + t1 = tcg_temp_new(TCG_TYPE_I64); | |
3842 | + gen_op_mov_TN_reg(OT_LONG, 1, R_EAX); | |
3843 | + tcg_gen_extu_i32_i64(t0, cpu_T[0]); | |
3844 | + tcg_gen_extu_i32_i64(t1, cpu_T[1]); | |
3845 | + tcg_gen_mul_i64(t0, t0, t1); | |
3846 | + tcg_gen_trunc_i64_i32(cpu_T[0], t0); | |
3847 | + gen_op_mov_reg_T0(OT_LONG, R_EAX); | |
3848 | + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); | |
3849 | + tcg_gen_shri_i64(t0, t0, 32); | |
3850 | + tcg_gen_trunc_i64_i32(cpu_T[0], t0); | |
3851 | + gen_op_mov_reg_T0(OT_LONG, R_EDX); | |
3852 | + tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]); | |
3853 | + } | |
3854 | +#endif | |
3812 | 3855 | s->cc_op = CC_OP_MULL; |
3813 | 3856 | break; |
3814 | 3857 | #ifdef TARGET_X86_64 |
3815 | 3858 | case OT_QUAD: |
3816 | - gen_op_mulq_EAX_T0(); | |
3859 | + tcg_gen_helper_0_1(helper_mulq_EAX_T0, cpu_T[0]); | |
3817 | 3860 | s->cc_op = CC_OP_MULQ; |
3818 | 3861 | break; |
3819 | 3862 | #endif |
... | ... | @@ -3822,21 +3865,68 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) |
3822 | 3865 | case 5: /* imul */ |
3823 | 3866 | switch(ot) { |
3824 | 3867 | case OT_BYTE: |
3825 | - gen_op_imulb_AL_T0(); | |
3868 | + gen_op_mov_TN_reg(OT_BYTE, 1, R_EAX); | |
3869 | + tcg_gen_ext8s_tl(cpu_T[0], cpu_T[0]); | |
3870 | + tcg_gen_ext8s_tl(cpu_T[1], cpu_T[1]); | |
3871 | + /* XXX: use 32 bit mul which could be faster */ | |
3872 | + tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]); | |
3873 | + gen_op_mov_reg_T0(OT_WORD, R_EAX); | |
3874 | + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); | |
3875 | + tcg_gen_ext8s_tl(cpu_tmp0, cpu_T[0]); | |
3876 | + tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0); | |
3826 | 3877 | s->cc_op = CC_OP_MULB; |
3827 | 3878 | break; |
3828 | 3879 | case OT_WORD: |
3829 | - gen_op_imulw_AX_T0(); | |
3880 | + gen_op_mov_TN_reg(OT_WORD, 1, R_EAX); | |
3881 | + tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]); | |
3882 | + tcg_gen_ext16s_tl(cpu_T[1], cpu_T[1]); | |
3883 | + /* XXX: use 32 bit mul which could be faster */ | |
3884 | + tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]); | |
3885 | + gen_op_mov_reg_T0(OT_WORD, R_EAX); | |
3886 | + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); | |
3887 | + tcg_gen_ext16s_tl(cpu_tmp0, cpu_T[0]); | |
3888 | + tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0); | |
3889 | + tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 16); | |
3890 | + gen_op_mov_reg_T0(OT_WORD, R_EDX); | |
3830 | 3891 | s->cc_op = CC_OP_MULW; |
3831 | 3892 | break; |
3832 | 3893 | default: |
3833 | 3894 | case OT_LONG: |
3834 | - gen_op_imull_EAX_T0(); | |
3895 | +#ifdef TARGET_X86_64 | |
3896 | + gen_op_mov_TN_reg(OT_LONG, 1, R_EAX); | |
3897 | + tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]); | |
3898 | + tcg_gen_ext32s_tl(cpu_T[1], cpu_T[1]); | |
3899 | + tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]); | |
3900 | + gen_op_mov_reg_T0(OT_LONG, R_EAX); | |
3901 | + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); | |
3902 | + tcg_gen_ext32s_tl(cpu_tmp0, cpu_T[0]); | |
3903 | + tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0); | |
3904 | + tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 32); | |
3905 | + gen_op_mov_reg_T0(OT_LONG, R_EDX); | |
3906 | +#else | |
3907 | + { | |
3908 | + TCGv t0, t1; | |
3909 | + t0 = tcg_temp_new(TCG_TYPE_I64); | |
3910 | + t1 = tcg_temp_new(TCG_TYPE_I64); | |
3911 | + gen_op_mov_TN_reg(OT_LONG, 1, R_EAX); | |
3912 | + tcg_gen_ext_i32_i64(t0, cpu_T[0]); | |
3913 | + tcg_gen_ext_i32_i64(t1, cpu_T[1]); | |
3914 | + tcg_gen_mul_i64(t0, t0, t1); | |
3915 | + tcg_gen_trunc_i64_i32(cpu_T[0], t0); | |
3916 | + gen_op_mov_reg_T0(OT_LONG, R_EAX); | |
3917 | + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); | |
3918 | + tcg_gen_sari_tl(cpu_tmp0, cpu_T[0], 31); | |
3919 | + tcg_gen_shri_i64(t0, t0, 32); | |
3920 | + tcg_gen_trunc_i64_i32(cpu_T[0], t0); | |
3921 | + gen_op_mov_reg_T0(OT_LONG, R_EDX); | |
3922 | + tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0); | |
3923 | + } | |
3924 | +#endif | |
3835 | 3925 | s->cc_op = CC_OP_MULL; |
3836 | 3926 | break; |
3837 | 3927 | #ifdef TARGET_X86_64 |
3838 | 3928 | case OT_QUAD: |
3839 | - gen_op_imulq_EAX_T0(); | |
3929 | + tcg_gen_helper_0_1(helper_imulq_EAX_T0, cpu_T[0]); | |
3840 | 3930 | s->cc_op = CC_OP_MULQ; |
3841 | 3931 | break; |
3842 | 3932 | #endif |
... | ... | @@ -4104,13 +4194,41 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) |
4104 | 4194 | |
4105 | 4195 | #ifdef TARGET_X86_64 |
4106 | 4196 | if (ot == OT_QUAD) { |
4107 | - gen_op_imulq_T0_T1(); | |
4197 | + tcg_gen_helper_1_2(helper_imulq_T0_T1, cpu_T[0], cpu_T[0], cpu_T[1]); | |
4108 | 4198 | } else |
4109 | 4199 | #endif |
4110 | 4200 | if (ot == OT_LONG) { |
4111 | - gen_op_imull_T0_T1(); | |
4201 | +#ifdef TARGET_X86_64 | |
4202 | + tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]); | |
4203 | + tcg_gen_ext32s_tl(cpu_T[1], cpu_T[1]); | |
4204 | + tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]); | |
4205 | + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); | |
4206 | + tcg_gen_ext32s_tl(cpu_tmp0, cpu_T[0]); | |
4207 | + tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0); | |
4208 | +#else | |
4209 | + { | |
4210 | + TCGv t0, t1; | |
4211 | + t0 = tcg_temp_new(TCG_TYPE_I64); | |
4212 | + t1 = tcg_temp_new(TCG_TYPE_I64); | |
4213 | + tcg_gen_ext_i32_i64(t0, cpu_T[0]); | |
4214 | + tcg_gen_ext_i32_i64(t1, cpu_T[1]); | |
4215 | + tcg_gen_mul_i64(t0, t0, t1); | |
4216 | + tcg_gen_trunc_i64_i32(cpu_T[0], t0); | |
4217 | + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); | |
4218 | + tcg_gen_sari_tl(cpu_tmp0, cpu_T[0], 31); | |
4219 | + tcg_gen_shri_i64(t0, t0, 32); | |
4220 | + tcg_gen_trunc_i64_i32(cpu_T[1], t0); | |
4221 | + tcg_gen_sub_tl(cpu_cc_src, cpu_T[1], cpu_tmp0); | |
4222 | + } | |
4223 | +#endif | |
4112 | 4224 | } else { |
4113 | - gen_op_imulw_T0_T1(); | |
4225 | + tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]); | |
4226 | + tcg_gen_ext16s_tl(cpu_T[1], cpu_T[1]); | |
4227 | + /* XXX: use 32 bit mul which could be faster */ | |
4228 | + tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]); | |
4229 | + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); | |
4230 | + tcg_gen_ext16s_tl(cpu_tmp0, cpu_T[0]); | |
4231 | + tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0); | |
4114 | 4232 | } |
4115 | 4233 | gen_op_mov_reg_T0(ot, reg); |
4116 | 4234 | s->cc_op = CC_OP_MULB + ot; | ... | ... |