Commit 0211e5aff995ee55722148923a7fc317796e4114
1 parent
30898801
converted MUL/IMUL to TCG
git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@4508 c046a42c-6fe2-441c-8c8c-71466251a162
Showing
3 changed files
with
129 additions
and
133 deletions
target-i386/helper.c
@@ -1609,22 +1609,6 @@ void helper_rsm(void) | @@ -1609,22 +1609,6 @@ void helper_rsm(void) | ||
1609 | #endif /* !CONFIG_USER_ONLY */ | 1609 | #endif /* !CONFIG_USER_ONLY */ |
1610 | 1610 | ||
1611 | 1611 | ||
1612 | -#ifdef BUGGY_GCC_DIV64 | ||
1613 | -/* gcc 2.95.4 on PowerPC does not seem to like using __udivdi3, so we | ||
1614 | - call it from another function */ | ||
1615 | -uint32_t div32(uint64_t *q_ptr, uint64_t num, uint32_t den) | ||
1616 | -{ | ||
1617 | - *q_ptr = num / den; | ||
1618 | - return num % den; | ||
1619 | -} | ||
1620 | - | ||
1621 | -int32_t idiv32(int64_t *q_ptr, int64_t num, int32_t den) | ||
1622 | -{ | ||
1623 | - *q_ptr = num / den; | ||
1624 | - return num % den; | ||
1625 | -} | ||
1626 | -#endif | ||
1627 | - | ||
1628 | /* division, flags are undefined */ | 1612 | /* division, flags are undefined */ |
1629 | 1613 | ||
1630 | void helper_divb_AL(target_ulong t0) | 1614 | void helper_divb_AL(target_ulong t0) |
@@ -1707,12 +1691,8 @@ void helper_divl_EAX(target_ulong t0) | @@ -1707,12 +1691,8 @@ void helper_divl_EAX(target_ulong t0) | ||
1707 | if (den == 0) { | 1691 | if (den == 0) { |
1708 | raise_exception(EXCP00_DIVZ); | 1692 | raise_exception(EXCP00_DIVZ); |
1709 | } | 1693 | } |
1710 | -#ifdef BUGGY_GCC_DIV64 | ||
1711 | - r = div32(&q, num, den); | ||
1712 | -#else | ||
1713 | q = (num / den); | 1694 | q = (num / den); |
1714 | r = (num % den); | 1695 | r = (num % den); |
1715 | -#endif | ||
1716 | if (q > 0xffffffff) | 1696 | if (q > 0xffffffff) |
1717 | raise_exception(EXCP00_DIVZ); | 1697 | raise_exception(EXCP00_DIVZ); |
1718 | EAX = (uint32_t)q; | 1698 | EAX = (uint32_t)q; |
@@ -1729,12 +1709,8 @@ void helper_idivl_EAX(target_ulong t0) | @@ -1729,12 +1709,8 @@ void helper_idivl_EAX(target_ulong t0) | ||
1729 | if (den == 0) { | 1709 | if (den == 0) { |
1730 | raise_exception(EXCP00_DIVZ); | 1710 | raise_exception(EXCP00_DIVZ); |
1731 | } | 1711 | } |
1732 | -#ifdef BUGGY_GCC_DIV64 | ||
1733 | - r = idiv32(&q, num, den); | ||
1734 | -#else | ||
1735 | q = (num / den); | 1712 | q = (num / den); |
1736 | r = (num % den); | 1713 | r = (num % den); |
1737 | -#endif | ||
1738 | if (q != (int32_t)q) | 1714 | if (q != (int32_t)q) |
1739 | raise_exception(EXCP00_DIVZ); | 1715 | raise_exception(EXCP00_DIVZ); |
1740 | EAX = (uint32_t)q; | 1716 | EAX = (uint32_t)q; |
target-i386/op.c
@@ -123,104 +123,6 @@ | @@ -123,104 +123,6 @@ | ||
123 | 123 | ||
124 | #endif | 124 | #endif |
125 | 125 | ||
126 | -/* multiply/divide */ | ||
127 | - | ||
128 | -/* XXX: add eflags optimizations */ | ||
129 | -/* XXX: add non P4 style flags */ | ||
130 | - | ||
131 | -void OPPROTO op_mulb_AL_T0(void) | ||
132 | -{ | ||
133 | - unsigned int res; | ||
134 | - res = (uint8_t)EAX * (uint8_t)T0; | ||
135 | - EAX = (EAX & ~0xffff) | res; | ||
136 | - CC_DST = res; | ||
137 | - CC_SRC = (res & 0xff00); | ||
138 | -} | ||
139 | - | ||
140 | -void OPPROTO op_imulb_AL_T0(void) | ||
141 | -{ | ||
142 | - int res; | ||
143 | - res = (int8_t)EAX * (int8_t)T0; | ||
144 | - EAX = (EAX & ~0xffff) | (res & 0xffff); | ||
145 | - CC_DST = res; | ||
146 | - CC_SRC = (res != (int8_t)res); | ||
147 | -} | ||
148 | - | ||
149 | -void OPPROTO op_mulw_AX_T0(void) | ||
150 | -{ | ||
151 | - unsigned int res; | ||
152 | - res = (uint16_t)EAX * (uint16_t)T0; | ||
153 | - EAX = (EAX & ~0xffff) | (res & 0xffff); | ||
154 | - EDX = (EDX & ~0xffff) | ((res >> 16) & 0xffff); | ||
155 | - CC_DST = res; | ||
156 | - CC_SRC = res >> 16; | ||
157 | -} | ||
158 | - | ||
159 | -void OPPROTO op_imulw_AX_T0(void) | ||
160 | -{ | ||
161 | - int res; | ||
162 | - res = (int16_t)EAX * (int16_t)T0; | ||
163 | - EAX = (EAX & ~0xffff) | (res & 0xffff); | ||
164 | - EDX = (EDX & ~0xffff) | ((res >> 16) & 0xffff); | ||
165 | - CC_DST = res; | ||
166 | - CC_SRC = (res != (int16_t)res); | ||
167 | -} | ||
168 | - | ||
169 | -void OPPROTO op_mull_EAX_T0(void) | ||
170 | -{ | ||
171 | - uint64_t res; | ||
172 | - res = (uint64_t)((uint32_t)EAX) * (uint64_t)((uint32_t)T0); | ||
173 | - EAX = (uint32_t)res; | ||
174 | - EDX = (uint32_t)(res >> 32); | ||
175 | - CC_DST = (uint32_t)res; | ||
176 | - CC_SRC = (uint32_t)(res >> 32); | ||
177 | -} | ||
178 | - | ||
179 | -void OPPROTO op_imull_EAX_T0(void) | ||
180 | -{ | ||
181 | - int64_t res; | ||
182 | - res = (int64_t)((int32_t)EAX) * (int64_t)((int32_t)T0); | ||
183 | - EAX = (uint32_t)(res); | ||
184 | - EDX = (uint32_t)(res >> 32); | ||
185 | - CC_DST = res; | ||
186 | - CC_SRC = (res != (int32_t)res); | ||
187 | -} | ||
188 | - | ||
189 | -void OPPROTO op_imulw_T0_T1(void) | ||
190 | -{ | ||
191 | - int res; | ||
192 | - res = (int16_t)T0 * (int16_t)T1; | ||
193 | - T0 = res; | ||
194 | - CC_DST = res; | ||
195 | - CC_SRC = (res != (int16_t)res); | ||
196 | -} | ||
197 | - | ||
198 | -void OPPROTO op_imull_T0_T1(void) | ||
199 | -{ | ||
200 | - int64_t res; | ||
201 | - res = (int64_t)((int32_t)T0) * (int64_t)((int32_t)T1); | ||
202 | - T0 = res; | ||
203 | - CC_DST = res; | ||
204 | - CC_SRC = (res != (int32_t)res); | ||
205 | -} | ||
206 | - | ||
207 | -#ifdef TARGET_X86_64 | ||
208 | -void OPPROTO op_mulq_EAX_T0(void) | ||
209 | -{ | ||
210 | - helper_mulq_EAX_T0(T0); | ||
211 | -} | ||
212 | - | ||
213 | -void OPPROTO op_imulq_EAX_T0(void) | ||
214 | -{ | ||
215 | - helper_imulq_EAX_T0(T0); | ||
216 | -} | ||
217 | - | ||
218 | -void OPPROTO op_imulq_T0_T1(void) | ||
219 | -{ | ||
220 | - T0 = helper_imulq_T0_T1(T0, T1); | ||
221 | -} | ||
222 | -#endif | ||
223 | - | ||
224 | /* constant load & misc op */ | 126 | /* constant load & misc op */ |
225 | 127 | ||
226 | /* XXX: consistent names */ | 128 | /* XXX: consistent names */ |
target-i386/translate.c
@@ -3799,21 +3799,64 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) | @@ -3799,21 +3799,64 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) | ||
3799 | case 4: /* mul */ | 3799 | case 4: /* mul */ |
3800 | switch(ot) { | 3800 | switch(ot) { |
3801 | case OT_BYTE: | 3801 | case OT_BYTE: |
3802 | - gen_op_mulb_AL_T0(); | 3802 | + gen_op_mov_TN_reg(OT_BYTE, 1, R_EAX); |
3803 | + tcg_gen_ext8u_tl(cpu_T[0], cpu_T[0]); | ||
3804 | + tcg_gen_ext8u_tl(cpu_T[1], cpu_T[1]); | ||
3805 | + /* XXX: use 32 bit mul which could be faster */ | ||
3806 | + tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]); | ||
3807 | + gen_op_mov_reg_T0(OT_WORD, R_EAX); | ||
3808 | + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); | ||
3809 | + tcg_gen_andi_tl(cpu_cc_src, cpu_T[0], 0xff00); | ||
3803 | s->cc_op = CC_OP_MULB; | 3810 | s->cc_op = CC_OP_MULB; |
3804 | break; | 3811 | break; |
3805 | case OT_WORD: | 3812 | case OT_WORD: |
3806 | - gen_op_mulw_AX_T0(); | 3813 | + gen_op_mov_TN_reg(OT_WORD, 1, R_EAX); |
3814 | + tcg_gen_ext16u_tl(cpu_T[0], cpu_T[0]); | ||
3815 | + tcg_gen_ext16u_tl(cpu_T[1], cpu_T[1]); | ||
3816 | + /* XXX: use 32 bit mul which could be faster */ | ||
3817 | + tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]); | ||
3818 | + gen_op_mov_reg_T0(OT_WORD, R_EAX); | ||
3819 | + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); | ||
3820 | + tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 16); | ||
3821 | + gen_op_mov_reg_T0(OT_WORD, R_EDX); | ||
3822 | + tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]); | ||
3807 | s->cc_op = CC_OP_MULW; | 3823 | s->cc_op = CC_OP_MULW; |
3808 | break; | 3824 | break; |
3809 | default: | 3825 | default: |
3810 | case OT_LONG: | 3826 | case OT_LONG: |
3811 | - gen_op_mull_EAX_T0(); | 3827 | +#ifdef TARGET_X86_64 |
3828 | + gen_op_mov_TN_reg(OT_LONG, 1, R_EAX); | ||
3829 | + tcg_gen_ext32u_tl(cpu_T[0], cpu_T[0]); | ||
3830 | + tcg_gen_ext32u_tl(cpu_T[1], cpu_T[1]); | ||
3831 | + tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]); | ||
3832 | + gen_op_mov_reg_T0(OT_LONG, R_EAX); | ||
3833 | + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); | ||
3834 | + tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 32); | ||
3835 | + gen_op_mov_reg_T0(OT_LONG, R_EDX); | ||
3836 | + tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]); | ||
3837 | +#else | ||
3838 | + { | ||
3839 | + TCGv t0, t1; | ||
3840 | + t0 = tcg_temp_new(TCG_TYPE_I64); | ||
3841 | + t1 = tcg_temp_new(TCG_TYPE_I64); | ||
3842 | + gen_op_mov_TN_reg(OT_LONG, 1, R_EAX); | ||
3843 | + tcg_gen_extu_i32_i64(t0, cpu_T[0]); | ||
3844 | + tcg_gen_extu_i32_i64(t1, cpu_T[1]); | ||
3845 | + tcg_gen_mul_i64(t0, t0, t1); | ||
3846 | + tcg_gen_trunc_i64_i32(cpu_T[0], t0); | ||
3847 | + gen_op_mov_reg_T0(OT_LONG, R_EAX); | ||
3848 | + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); | ||
3849 | + tcg_gen_shri_i64(t0, t0, 32); | ||
3850 | + tcg_gen_trunc_i64_i32(cpu_T[0], t0); | ||
3851 | + gen_op_mov_reg_T0(OT_LONG, R_EDX); | ||
3852 | + tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]); | ||
3853 | + } | ||
3854 | +#endif | ||
3812 | s->cc_op = CC_OP_MULL; | 3855 | s->cc_op = CC_OP_MULL; |
3813 | break; | 3856 | break; |
3814 | #ifdef TARGET_X86_64 | 3857 | #ifdef TARGET_X86_64 |
3815 | case OT_QUAD: | 3858 | case OT_QUAD: |
3816 | - gen_op_mulq_EAX_T0(); | 3859 | + tcg_gen_helper_0_1(helper_mulq_EAX_T0, cpu_T[0]); |
3817 | s->cc_op = CC_OP_MULQ; | 3860 | s->cc_op = CC_OP_MULQ; |
3818 | break; | 3861 | break; |
3819 | #endif | 3862 | #endif |
@@ -3822,21 +3865,68 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) | @@ -3822,21 +3865,68 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) | ||
3822 | case 5: /* imul */ | 3865 | case 5: /* imul */ |
3823 | switch(ot) { | 3866 | switch(ot) { |
3824 | case OT_BYTE: | 3867 | case OT_BYTE: |
3825 | - gen_op_imulb_AL_T0(); | 3868 | + gen_op_mov_TN_reg(OT_BYTE, 1, R_EAX); |
3869 | + tcg_gen_ext8s_tl(cpu_T[0], cpu_T[0]); | ||
3870 | + tcg_gen_ext8s_tl(cpu_T[1], cpu_T[1]); | ||
3871 | + /* XXX: use 32 bit mul which could be faster */ | ||
3872 | + tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]); | ||
3873 | + gen_op_mov_reg_T0(OT_WORD, R_EAX); | ||
3874 | + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); | ||
3875 | + tcg_gen_ext8s_tl(cpu_tmp0, cpu_T[0]); | ||
3876 | + tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0); | ||
3826 | s->cc_op = CC_OP_MULB; | 3877 | s->cc_op = CC_OP_MULB; |
3827 | break; | 3878 | break; |
3828 | case OT_WORD: | 3879 | case OT_WORD: |
3829 | - gen_op_imulw_AX_T0(); | 3880 | + gen_op_mov_TN_reg(OT_WORD, 1, R_EAX); |
3881 | + tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]); | ||
3882 | + tcg_gen_ext16s_tl(cpu_T[1], cpu_T[1]); | ||
3883 | + /* XXX: use 32 bit mul which could be faster */ | ||
3884 | + tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]); | ||
3885 | + gen_op_mov_reg_T0(OT_WORD, R_EAX); | ||
3886 | + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); | ||
3887 | + tcg_gen_ext16s_tl(cpu_tmp0, cpu_T[0]); | ||
3888 | + tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0); | ||
3889 | + tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 16); | ||
3890 | + gen_op_mov_reg_T0(OT_WORD, R_EDX); | ||
3830 | s->cc_op = CC_OP_MULW; | 3891 | s->cc_op = CC_OP_MULW; |
3831 | break; | 3892 | break; |
3832 | default: | 3893 | default: |
3833 | case OT_LONG: | 3894 | case OT_LONG: |
3834 | - gen_op_imull_EAX_T0(); | 3895 | +#ifdef TARGET_X86_64 |
3896 | + gen_op_mov_TN_reg(OT_LONG, 1, R_EAX); | ||
3897 | + tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]); | ||
3898 | + tcg_gen_ext32s_tl(cpu_T[1], cpu_T[1]); | ||
3899 | + tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]); | ||
3900 | + gen_op_mov_reg_T0(OT_LONG, R_EAX); | ||
3901 | + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); | ||
3902 | + tcg_gen_ext32s_tl(cpu_tmp0, cpu_T[0]); | ||
3903 | + tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0); | ||
3904 | + tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 32); | ||
3905 | + gen_op_mov_reg_T0(OT_LONG, R_EDX); | ||
3906 | +#else | ||
3907 | + { | ||
3908 | + TCGv t0, t1; | ||
3909 | + t0 = tcg_temp_new(TCG_TYPE_I64); | ||
3910 | + t1 = tcg_temp_new(TCG_TYPE_I64); | ||
3911 | + gen_op_mov_TN_reg(OT_LONG, 1, R_EAX); | ||
3912 | + tcg_gen_ext_i32_i64(t0, cpu_T[0]); | ||
3913 | + tcg_gen_ext_i32_i64(t1, cpu_T[1]); | ||
3914 | + tcg_gen_mul_i64(t0, t0, t1); | ||
3915 | + tcg_gen_trunc_i64_i32(cpu_T[0], t0); | ||
3916 | + gen_op_mov_reg_T0(OT_LONG, R_EAX); | ||
3917 | + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); | ||
3918 | + tcg_gen_sari_tl(cpu_tmp0, cpu_T[0], 31); | ||
3919 | + tcg_gen_shri_i64(t0, t0, 32); | ||
3920 | + tcg_gen_trunc_i64_i32(cpu_T[0], t0); | ||
3921 | + gen_op_mov_reg_T0(OT_LONG, R_EDX); | ||
3922 | + tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0); | ||
3923 | + } | ||
3924 | +#endif | ||
3835 | s->cc_op = CC_OP_MULL; | 3925 | s->cc_op = CC_OP_MULL; |
3836 | break; | 3926 | break; |
3837 | #ifdef TARGET_X86_64 | 3927 | #ifdef TARGET_X86_64 |
3838 | case OT_QUAD: | 3928 | case OT_QUAD: |
3839 | - gen_op_imulq_EAX_T0(); | 3929 | + tcg_gen_helper_0_1(helper_imulq_EAX_T0, cpu_T[0]); |
3840 | s->cc_op = CC_OP_MULQ; | 3930 | s->cc_op = CC_OP_MULQ; |
3841 | break; | 3931 | break; |
3842 | #endif | 3932 | #endif |
@@ -4104,13 +4194,41 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) | @@ -4104,13 +4194,41 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) | ||
4104 | 4194 | ||
4105 | #ifdef TARGET_X86_64 | 4195 | #ifdef TARGET_X86_64 |
4106 | if (ot == OT_QUAD) { | 4196 | if (ot == OT_QUAD) { |
4107 | - gen_op_imulq_T0_T1(); | 4197 | + tcg_gen_helper_1_2(helper_imulq_T0_T1, cpu_T[0], cpu_T[0], cpu_T[1]); |
4108 | } else | 4198 | } else |
4109 | #endif | 4199 | #endif |
4110 | if (ot == OT_LONG) { | 4200 | if (ot == OT_LONG) { |
4111 | - gen_op_imull_T0_T1(); | 4201 | +#ifdef TARGET_X86_64 |
4202 | + tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]); | ||
4203 | + tcg_gen_ext32s_tl(cpu_T[1], cpu_T[1]); | ||
4204 | + tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]); | ||
4205 | + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); | ||
4206 | + tcg_gen_ext32s_tl(cpu_tmp0, cpu_T[0]); | ||
4207 | + tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0); | ||
4208 | +#else | ||
4209 | + { | ||
4210 | + TCGv t0, t1; | ||
4211 | + t0 = tcg_temp_new(TCG_TYPE_I64); | ||
4212 | + t1 = tcg_temp_new(TCG_TYPE_I64); | ||
4213 | + tcg_gen_ext_i32_i64(t0, cpu_T[0]); | ||
4214 | + tcg_gen_ext_i32_i64(t1, cpu_T[1]); | ||
4215 | + tcg_gen_mul_i64(t0, t0, t1); | ||
4216 | + tcg_gen_trunc_i64_i32(cpu_T[0], t0); | ||
4217 | + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); | ||
4218 | + tcg_gen_sari_tl(cpu_tmp0, cpu_T[0], 31); | ||
4219 | + tcg_gen_shri_i64(t0, t0, 32); | ||
4220 | + tcg_gen_trunc_i64_i32(cpu_T[1], t0); | ||
4221 | + tcg_gen_sub_tl(cpu_cc_src, cpu_T[1], cpu_tmp0); | ||
4222 | + } | ||
4223 | +#endif | ||
4112 | } else { | 4224 | } else { |
4113 | - gen_op_imulw_T0_T1(); | 4225 | + tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]); |
4226 | + tcg_gen_ext16s_tl(cpu_T[1], cpu_T[1]); | ||
4227 | + /* XXX: use 32 bit mul which could be faster */ | ||
4228 | + tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]); | ||
4229 | + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); | ||
4230 | + tcg_gen_ext16s_tl(cpu_tmp0, cpu_T[0]); | ||
4231 | + tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0); | ||
4114 | } | 4232 | } |
4115 | gen_op_mov_reg_T0(ot, reg); | 4233 | gen_op_mov_reg_T0(ot, reg); |
4116 | s->cc_op = CC_OP_MULB + ot; | 4234 | s->cc_op = CC_OP_MULB + ot; |