Commit 0211e5aff995ee55722148923a7fc317796e4114
1 parent
30898801
converted MUL/IMUL to TCG
git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@4508 c046a42c-6fe2-441c-8c8c-71466251a162
Showing
3 changed files
with
129 additions
and
133 deletions
target-i386/helper.c
| ... | ... | @@ -1609,22 +1609,6 @@ void helper_rsm(void) |
| 1609 | 1609 | #endif /* !CONFIG_USER_ONLY */ |
| 1610 | 1610 | |
| 1611 | 1611 | |
| 1612 | -#ifdef BUGGY_GCC_DIV64 | |
| 1613 | -/* gcc 2.95.4 on PowerPC does not seem to like using __udivdi3, so we | |
| 1614 | - call it from another function */ | |
| 1615 | -uint32_t div32(uint64_t *q_ptr, uint64_t num, uint32_t den) | |
| 1616 | -{ | |
| 1617 | - *q_ptr = num / den; | |
| 1618 | - return num % den; | |
| 1619 | -} | |
| 1620 | - | |
| 1621 | -int32_t idiv32(int64_t *q_ptr, int64_t num, int32_t den) | |
| 1622 | -{ | |
| 1623 | - *q_ptr = num / den; | |
| 1624 | - return num % den; | |
| 1625 | -} | |
| 1626 | -#endif | |
| 1627 | - | |
| 1628 | 1612 | /* division, flags are undefined */ |
| 1629 | 1613 | |
| 1630 | 1614 | void helper_divb_AL(target_ulong t0) |
| ... | ... | @@ -1707,12 +1691,8 @@ void helper_divl_EAX(target_ulong t0) |
| 1707 | 1691 | if (den == 0) { |
| 1708 | 1692 | raise_exception(EXCP00_DIVZ); |
| 1709 | 1693 | } |
| 1710 | -#ifdef BUGGY_GCC_DIV64 | |
| 1711 | - r = div32(&q, num, den); | |
| 1712 | -#else | |
| 1713 | 1694 | q = (num / den); |
| 1714 | 1695 | r = (num % den); |
| 1715 | -#endif | |
| 1716 | 1696 | if (q > 0xffffffff) |
| 1717 | 1697 | raise_exception(EXCP00_DIVZ); |
| 1718 | 1698 | EAX = (uint32_t)q; |
| ... | ... | @@ -1729,12 +1709,8 @@ void helper_idivl_EAX(target_ulong t0) |
| 1729 | 1709 | if (den == 0) { |
| 1730 | 1710 | raise_exception(EXCP00_DIVZ); |
| 1731 | 1711 | } |
| 1732 | -#ifdef BUGGY_GCC_DIV64 | |
| 1733 | - r = idiv32(&q, num, den); | |
| 1734 | -#else | |
| 1735 | 1712 | q = (num / den); |
| 1736 | 1713 | r = (num % den); |
| 1737 | -#endif | |
| 1738 | 1714 | if (q != (int32_t)q) |
| 1739 | 1715 | raise_exception(EXCP00_DIVZ); |
| 1740 | 1716 | EAX = (uint32_t)q; | ... | ... |
target-i386/op.c
| ... | ... | @@ -123,104 +123,6 @@ |
| 123 | 123 | |
| 124 | 124 | #endif |
| 125 | 125 | |
| 126 | -/* multiply/divide */ | |
| 127 | - | |
| 128 | -/* XXX: add eflags optimizations */ | |
| 129 | -/* XXX: add non P4 style flags */ | |
| 130 | - | |
| 131 | -void OPPROTO op_mulb_AL_T0(void) | |
| 132 | -{ | |
| 133 | - unsigned int res; | |
| 134 | - res = (uint8_t)EAX * (uint8_t)T0; | |
| 135 | - EAX = (EAX & ~0xffff) | res; | |
| 136 | - CC_DST = res; | |
| 137 | - CC_SRC = (res & 0xff00); | |
| 138 | -} | |
| 139 | - | |
| 140 | -void OPPROTO op_imulb_AL_T0(void) | |
| 141 | -{ | |
| 142 | - int res; | |
| 143 | - res = (int8_t)EAX * (int8_t)T0; | |
| 144 | - EAX = (EAX & ~0xffff) | (res & 0xffff); | |
| 145 | - CC_DST = res; | |
| 146 | - CC_SRC = (res != (int8_t)res); | |
| 147 | -} | |
| 148 | - | |
| 149 | -void OPPROTO op_mulw_AX_T0(void) | |
| 150 | -{ | |
| 151 | - unsigned int res; | |
| 152 | - res = (uint16_t)EAX * (uint16_t)T0; | |
| 153 | - EAX = (EAX & ~0xffff) | (res & 0xffff); | |
| 154 | - EDX = (EDX & ~0xffff) | ((res >> 16) & 0xffff); | |
| 155 | - CC_DST = res; | |
| 156 | - CC_SRC = res >> 16; | |
| 157 | -} | |
| 158 | - | |
| 159 | -void OPPROTO op_imulw_AX_T0(void) | |
| 160 | -{ | |
| 161 | - int res; | |
| 162 | - res = (int16_t)EAX * (int16_t)T0; | |
| 163 | - EAX = (EAX & ~0xffff) | (res & 0xffff); | |
| 164 | - EDX = (EDX & ~0xffff) | ((res >> 16) & 0xffff); | |
| 165 | - CC_DST = res; | |
| 166 | - CC_SRC = (res != (int16_t)res); | |
| 167 | -} | |
| 168 | - | |
| 169 | -void OPPROTO op_mull_EAX_T0(void) | |
| 170 | -{ | |
| 171 | - uint64_t res; | |
| 172 | - res = (uint64_t)((uint32_t)EAX) * (uint64_t)((uint32_t)T0); | |
| 173 | - EAX = (uint32_t)res; | |
| 174 | - EDX = (uint32_t)(res >> 32); | |
| 175 | - CC_DST = (uint32_t)res; | |
| 176 | - CC_SRC = (uint32_t)(res >> 32); | |
| 177 | -} | |
| 178 | - | |
| 179 | -void OPPROTO op_imull_EAX_T0(void) | |
| 180 | -{ | |
| 181 | - int64_t res; | |
| 182 | - res = (int64_t)((int32_t)EAX) * (int64_t)((int32_t)T0); | |
| 183 | - EAX = (uint32_t)(res); | |
| 184 | - EDX = (uint32_t)(res >> 32); | |
| 185 | - CC_DST = res; | |
| 186 | - CC_SRC = (res != (int32_t)res); | |
| 187 | -} | |
| 188 | - | |
| 189 | -void OPPROTO op_imulw_T0_T1(void) | |
| 190 | -{ | |
| 191 | - int res; | |
| 192 | - res = (int16_t)T0 * (int16_t)T1; | |
| 193 | - T0 = res; | |
| 194 | - CC_DST = res; | |
| 195 | - CC_SRC = (res != (int16_t)res); | |
| 196 | -} | |
| 197 | - | |
| 198 | -void OPPROTO op_imull_T0_T1(void) | |
| 199 | -{ | |
| 200 | - int64_t res; | |
| 201 | - res = (int64_t)((int32_t)T0) * (int64_t)((int32_t)T1); | |
| 202 | - T0 = res; | |
| 203 | - CC_DST = res; | |
| 204 | - CC_SRC = (res != (int32_t)res); | |
| 205 | -} | |
| 206 | - | |
| 207 | -#ifdef TARGET_X86_64 | |
| 208 | -void OPPROTO op_mulq_EAX_T0(void) | |
| 209 | -{ | |
| 210 | - helper_mulq_EAX_T0(T0); | |
| 211 | -} | |
| 212 | - | |
| 213 | -void OPPROTO op_imulq_EAX_T0(void) | |
| 214 | -{ | |
| 215 | - helper_imulq_EAX_T0(T0); | |
| 216 | -} | |
| 217 | - | |
| 218 | -void OPPROTO op_imulq_T0_T1(void) | |
| 219 | -{ | |
| 220 | - T0 = helper_imulq_T0_T1(T0, T1); | |
| 221 | -} | |
| 222 | -#endif | |
| 223 | - | |
| 224 | 126 | /* constant load & misc op */ |
| 225 | 127 | |
| 226 | 128 | /* XXX: consistent names */ | ... | ... |
target-i386/translate.c
| ... | ... | @@ -3799,21 +3799,64 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) |
| 3799 | 3799 | case 4: /* mul */ |
| 3800 | 3800 | switch(ot) { |
| 3801 | 3801 | case OT_BYTE: |
| 3802 | - gen_op_mulb_AL_T0(); | |
| 3802 | + gen_op_mov_TN_reg(OT_BYTE, 1, R_EAX); | |
| 3803 | + tcg_gen_ext8u_tl(cpu_T[0], cpu_T[0]); | |
| 3804 | + tcg_gen_ext8u_tl(cpu_T[1], cpu_T[1]); | |
| 3805 | + /* XXX: use 32 bit mul which could be faster */ | |
| 3806 | + tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]); | |
| 3807 | + gen_op_mov_reg_T0(OT_WORD, R_EAX); | |
| 3808 | + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); | |
| 3809 | + tcg_gen_andi_tl(cpu_cc_src, cpu_T[0], 0xff00); | |
| 3803 | 3810 | s->cc_op = CC_OP_MULB; |
| 3804 | 3811 | break; |
| 3805 | 3812 | case OT_WORD: |
| 3806 | - gen_op_mulw_AX_T0(); | |
| 3813 | + gen_op_mov_TN_reg(OT_WORD, 1, R_EAX); | |
| 3814 | + tcg_gen_ext16u_tl(cpu_T[0], cpu_T[0]); | |
| 3815 | + tcg_gen_ext16u_tl(cpu_T[1], cpu_T[1]); | |
| 3816 | + /* XXX: use 32 bit mul which could be faster */ | |
| 3817 | + tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]); | |
| 3818 | + gen_op_mov_reg_T0(OT_WORD, R_EAX); | |
| 3819 | + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); | |
| 3820 | + tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 16); | |
| 3821 | + gen_op_mov_reg_T0(OT_WORD, R_EDX); | |
| 3822 | + tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]); | |
| 3807 | 3823 | s->cc_op = CC_OP_MULW; |
| 3808 | 3824 | break; |
| 3809 | 3825 | default: |
| 3810 | 3826 | case OT_LONG: |
| 3811 | - gen_op_mull_EAX_T0(); | |
| 3827 | +#ifdef TARGET_X86_64 | |
| 3828 | + gen_op_mov_TN_reg(OT_LONG, 1, R_EAX); | |
| 3829 | + tcg_gen_ext32u_tl(cpu_T[0], cpu_T[0]); | |
| 3830 | + tcg_gen_ext32u_tl(cpu_T[1], cpu_T[1]); | |
| 3831 | + tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]); | |
| 3832 | + gen_op_mov_reg_T0(OT_LONG, R_EAX); | |
| 3833 | + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); | |
| 3834 | + tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 32); | |
| 3835 | + gen_op_mov_reg_T0(OT_LONG, R_EDX); | |
| 3836 | + tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]); | |
| 3837 | +#else | |
| 3838 | + { | |
| 3839 | + TCGv t0, t1; | |
| 3840 | + t0 = tcg_temp_new(TCG_TYPE_I64); | |
| 3841 | + t1 = tcg_temp_new(TCG_TYPE_I64); | |
| 3842 | + gen_op_mov_TN_reg(OT_LONG, 1, R_EAX); | |
| 3843 | + tcg_gen_extu_i32_i64(t0, cpu_T[0]); | |
| 3844 | + tcg_gen_extu_i32_i64(t1, cpu_T[1]); | |
| 3845 | + tcg_gen_mul_i64(t0, t0, t1); | |
| 3846 | + tcg_gen_trunc_i64_i32(cpu_T[0], t0); | |
| 3847 | + gen_op_mov_reg_T0(OT_LONG, R_EAX); | |
| 3848 | + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); | |
| 3849 | + tcg_gen_shri_i64(t0, t0, 32); | |
| 3850 | + tcg_gen_trunc_i64_i32(cpu_T[0], t0); | |
| 3851 | + gen_op_mov_reg_T0(OT_LONG, R_EDX); | |
| 3852 | + tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]); | |
| 3853 | + } | |
| 3854 | +#endif | |
| 3812 | 3855 | s->cc_op = CC_OP_MULL; |
| 3813 | 3856 | break; |
| 3814 | 3857 | #ifdef TARGET_X86_64 |
| 3815 | 3858 | case OT_QUAD: |
| 3816 | - gen_op_mulq_EAX_T0(); | |
| 3859 | + tcg_gen_helper_0_1(helper_mulq_EAX_T0, cpu_T[0]); | |
| 3817 | 3860 | s->cc_op = CC_OP_MULQ; |
| 3818 | 3861 | break; |
| 3819 | 3862 | #endif |
| ... | ... | @@ -3822,21 +3865,68 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) |
| 3822 | 3865 | case 5: /* imul */ |
| 3823 | 3866 | switch(ot) { |
| 3824 | 3867 | case OT_BYTE: |
| 3825 | - gen_op_imulb_AL_T0(); | |
| 3868 | + gen_op_mov_TN_reg(OT_BYTE, 1, R_EAX); | |
| 3869 | + tcg_gen_ext8s_tl(cpu_T[0], cpu_T[0]); | |
| 3870 | + tcg_gen_ext8s_tl(cpu_T[1], cpu_T[1]); | |
| 3871 | + /* XXX: use 32 bit mul which could be faster */ | |
| 3872 | + tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]); | |
| 3873 | + gen_op_mov_reg_T0(OT_WORD, R_EAX); | |
| 3874 | + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); | |
| 3875 | + tcg_gen_ext8s_tl(cpu_tmp0, cpu_T[0]); | |
| 3876 | + tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0); | |
| 3826 | 3877 | s->cc_op = CC_OP_MULB; |
| 3827 | 3878 | break; |
| 3828 | 3879 | case OT_WORD: |
| 3829 | - gen_op_imulw_AX_T0(); | |
| 3880 | + gen_op_mov_TN_reg(OT_WORD, 1, R_EAX); | |
| 3881 | + tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]); | |
| 3882 | + tcg_gen_ext16s_tl(cpu_T[1], cpu_T[1]); | |
| 3883 | + /* XXX: use 32 bit mul which could be faster */ | |
| 3884 | + tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]); | |
| 3885 | + gen_op_mov_reg_T0(OT_WORD, R_EAX); | |
| 3886 | + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); | |
| 3887 | + tcg_gen_ext16s_tl(cpu_tmp0, cpu_T[0]); | |
| 3888 | + tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0); | |
| 3889 | + tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 16); | |
| 3890 | + gen_op_mov_reg_T0(OT_WORD, R_EDX); | |
| 3830 | 3891 | s->cc_op = CC_OP_MULW; |
| 3831 | 3892 | break; |
| 3832 | 3893 | default: |
| 3833 | 3894 | case OT_LONG: |
| 3834 | - gen_op_imull_EAX_T0(); | |
| 3895 | +#ifdef TARGET_X86_64 | |
| 3896 | + gen_op_mov_TN_reg(OT_LONG, 1, R_EAX); | |
| 3897 | + tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]); | |
| 3898 | + tcg_gen_ext32s_tl(cpu_T[1], cpu_T[1]); | |
| 3899 | + tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]); | |
| 3900 | + gen_op_mov_reg_T0(OT_LONG, R_EAX); | |
| 3901 | + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); | |
| 3902 | + tcg_gen_ext32s_tl(cpu_tmp0, cpu_T[0]); | |
| 3903 | + tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0); | |
| 3904 | + tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 32); | |
| 3905 | + gen_op_mov_reg_T0(OT_LONG, R_EDX); | |
| 3906 | +#else | |
| 3907 | + { | |
| 3908 | + TCGv t0, t1; | |
| 3909 | + t0 = tcg_temp_new(TCG_TYPE_I64); | |
| 3910 | + t1 = tcg_temp_new(TCG_TYPE_I64); | |
| 3911 | + gen_op_mov_TN_reg(OT_LONG, 1, R_EAX); | |
| 3912 | + tcg_gen_ext_i32_i64(t0, cpu_T[0]); | |
| 3913 | + tcg_gen_ext_i32_i64(t1, cpu_T[1]); | |
| 3914 | + tcg_gen_mul_i64(t0, t0, t1); | |
| 3915 | + tcg_gen_trunc_i64_i32(cpu_T[0], t0); | |
| 3916 | + gen_op_mov_reg_T0(OT_LONG, R_EAX); | |
| 3917 | + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); | |
| 3918 | + tcg_gen_sari_tl(cpu_tmp0, cpu_T[0], 31); | |
| 3919 | + tcg_gen_shri_i64(t0, t0, 32); | |
| 3920 | + tcg_gen_trunc_i64_i32(cpu_T[0], t0); | |
| 3921 | + gen_op_mov_reg_T0(OT_LONG, R_EDX); | |
| 3922 | + tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0); | |
| 3923 | + } | |
| 3924 | +#endif | |
| 3835 | 3925 | s->cc_op = CC_OP_MULL; |
| 3836 | 3926 | break; |
| 3837 | 3927 | #ifdef TARGET_X86_64 |
| 3838 | 3928 | case OT_QUAD: |
| 3839 | - gen_op_imulq_EAX_T0(); | |
| 3929 | + tcg_gen_helper_0_1(helper_imulq_EAX_T0, cpu_T[0]); | |
| 3840 | 3930 | s->cc_op = CC_OP_MULQ; |
| 3841 | 3931 | break; |
| 3842 | 3932 | #endif |
| ... | ... | @@ -4104,13 +4194,41 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) |
| 4104 | 4194 | |
| 4105 | 4195 | #ifdef TARGET_X86_64 |
| 4106 | 4196 | if (ot == OT_QUAD) { |
| 4107 | - gen_op_imulq_T0_T1(); | |
| 4197 | + tcg_gen_helper_1_2(helper_imulq_T0_T1, cpu_T[0], cpu_T[0], cpu_T[1]); | |
| 4108 | 4198 | } else |
| 4109 | 4199 | #endif |
| 4110 | 4200 | if (ot == OT_LONG) { |
| 4111 | - gen_op_imull_T0_T1(); | |
| 4201 | +#ifdef TARGET_X86_64 | |
| 4202 | + tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]); | |
| 4203 | + tcg_gen_ext32s_tl(cpu_T[1], cpu_T[1]); | |
| 4204 | + tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]); | |
| 4205 | + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); | |
| 4206 | + tcg_gen_ext32s_tl(cpu_tmp0, cpu_T[0]); | |
| 4207 | + tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0); | |
| 4208 | +#else | |
| 4209 | + { | |
| 4210 | + TCGv t0, t1; | |
| 4211 | + t0 = tcg_temp_new(TCG_TYPE_I64); | |
| 4212 | + t1 = tcg_temp_new(TCG_TYPE_I64); | |
| 4213 | + tcg_gen_ext_i32_i64(t0, cpu_T[0]); | |
| 4214 | + tcg_gen_ext_i32_i64(t1, cpu_T[1]); | |
| 4215 | + tcg_gen_mul_i64(t0, t0, t1); | |
| 4216 | + tcg_gen_trunc_i64_i32(cpu_T[0], t0); | |
| 4217 | + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); | |
| 4218 | + tcg_gen_sari_tl(cpu_tmp0, cpu_T[0], 31); | |
| 4219 | + tcg_gen_shri_i64(t0, t0, 32); | |
| 4220 | + tcg_gen_trunc_i64_i32(cpu_T[1], t0); | |
| 4221 | + tcg_gen_sub_tl(cpu_cc_src, cpu_T[1], cpu_tmp0); | |
| 4222 | + } | |
| 4223 | +#endif | |
| 4112 | 4224 | } else { |
| 4113 | - gen_op_imulw_T0_T1(); | |
| 4225 | + tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]); | |
| 4226 | + tcg_gen_ext16s_tl(cpu_T[1], cpu_T[1]); | |
| 4227 | + /* XXX: use 32 bit mul which could be faster */ | |
| 4228 | + tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]); | |
| 4229 | + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); | |
| 4230 | + tcg_gen_ext16s_tl(cpu_tmp0, cpu_T[0]); | |
| 4231 | + tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0); | |
| 4114 | 4232 | } |
| 4115 | 4233 | gen_op_mov_reg_T0(ot, reg); |
| 4116 | 4234 | s->cc_op = CC_OP_MULB + ot; | ... | ... |