Commit d36cd60e6c8c66e0279bad4b17e2d23833eb20b9
1 parent
5e809a80
P4 style multiplication eflags
git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@481 c046a42c-6fe2-441c-8c8c-71466251a162
Showing
4 changed files
with
49 additions
and
24 deletions
target-i386/cpu.h
| @@ -184,7 +184,10 @@ | @@ -184,7 +184,10 @@ | ||
| 184 | enum { | 184 | enum { |
| 185 | CC_OP_DYNAMIC, /* must use dynamic code to get cc_op */ | 185 | CC_OP_DYNAMIC, /* must use dynamic code to get cc_op */ |
| 186 | CC_OP_EFLAGS, /* all cc are explicitely computed, CC_SRC = flags */ | 186 | CC_OP_EFLAGS, /* all cc are explicitely computed, CC_SRC = flags */ |
| 187 | - CC_OP_MUL, /* modify all flags, C, O = (CC_SRC != 0) */ | 187 | + |
| 188 | + CC_OP_MULB, /* modify all flags, C, O = (CC_SRC != 0) */ | ||
| 189 | + CC_OP_MULW, | ||
| 190 | + CC_OP_MULL, | ||
| 188 | 191 | ||
| 189 | CC_OP_ADDB, /* modify all flags, CC_DST = res, CC_SRC = src1 */ | 192 | CC_OP_ADDB, /* modify all flags, CC_DST = res, CC_SRC = src1 */ |
| 190 | CC_OP_ADDW, | 193 | CC_OP_ADDW, |
target-i386/op.c
| @@ -169,11 +169,16 @@ void OPPROTO op_bswapl_T0(void) | @@ -169,11 +169,16 @@ void OPPROTO op_bswapl_T0(void) | ||
| 169 | } | 169 | } |
| 170 | 170 | ||
| 171 | /* multiply/divide */ | 171 | /* multiply/divide */ |
| 172 | + | ||
| 173 | +/* XXX: add eflags optimizations */ | ||
| 174 | +/* XXX: add non P4 style flags */ | ||
| 175 | + | ||
| 172 | void OPPROTO op_mulb_AL_T0(void) | 176 | void OPPROTO op_mulb_AL_T0(void) |
| 173 | { | 177 | { |
| 174 | unsigned int res; | 178 | unsigned int res; |
| 175 | res = (uint8_t)EAX * (uint8_t)T0; | 179 | res = (uint8_t)EAX * (uint8_t)T0; |
| 176 | EAX = (EAX & 0xffff0000) | res; | 180 | EAX = (EAX & 0xffff0000) | res; |
| 181 | + CC_DST = res; | ||
| 177 | CC_SRC = (res & 0xff00); | 182 | CC_SRC = (res & 0xff00); |
| 178 | } | 183 | } |
| 179 | 184 | ||
| @@ -182,6 +187,7 @@ void OPPROTO op_imulb_AL_T0(void) | @@ -182,6 +187,7 @@ void OPPROTO op_imulb_AL_T0(void) | ||
| 182 | int res; | 187 | int res; |
| 183 | res = (int8_t)EAX * (int8_t)T0; | 188 | res = (int8_t)EAX * (int8_t)T0; |
| 184 | EAX = (EAX & 0xffff0000) | (res & 0xffff); | 189 | EAX = (EAX & 0xffff0000) | (res & 0xffff); |
| 190 | + CC_DST = res; | ||
| 185 | CC_SRC = (res != (int8_t)res); | 191 | CC_SRC = (res != (int8_t)res); |
| 186 | } | 192 | } |
| 187 | 193 | ||
| @@ -191,6 +197,7 @@ void OPPROTO op_mulw_AX_T0(void) | @@ -191,6 +197,7 @@ void OPPROTO op_mulw_AX_T0(void) | ||
| 191 | res = (uint16_t)EAX * (uint16_t)T0; | 197 | res = (uint16_t)EAX * (uint16_t)T0; |
| 192 | EAX = (EAX & 0xffff0000) | (res & 0xffff); | 198 | EAX = (EAX & 0xffff0000) | (res & 0xffff); |
| 193 | EDX = (EDX & 0xffff0000) | ((res >> 16) & 0xffff); | 199 | EDX = (EDX & 0xffff0000) | ((res >> 16) & 0xffff); |
| 200 | + CC_DST = res; | ||
| 194 | CC_SRC = res >> 16; | 201 | CC_SRC = res >> 16; |
| 195 | } | 202 | } |
| 196 | 203 | ||
| @@ -200,6 +207,7 @@ void OPPROTO op_imulw_AX_T0(void) | @@ -200,6 +207,7 @@ void OPPROTO op_imulw_AX_T0(void) | ||
| 200 | res = (int16_t)EAX * (int16_t)T0; | 207 | res = (int16_t)EAX * (int16_t)T0; |
| 201 | EAX = (EAX & 0xffff0000) | (res & 0xffff); | 208 | EAX = (EAX & 0xffff0000) | (res & 0xffff); |
| 202 | EDX = (EDX & 0xffff0000) | ((res >> 16) & 0xffff); | 209 | EDX = (EDX & 0xffff0000) | ((res >> 16) & 0xffff); |
| 210 | + CC_DST = res; | ||
| 203 | CC_SRC = (res != (int16_t)res); | 211 | CC_SRC = (res != (int16_t)res); |
| 204 | } | 212 | } |
| 205 | 213 | ||
| @@ -209,6 +217,7 @@ void OPPROTO op_mull_EAX_T0(void) | @@ -209,6 +217,7 @@ void OPPROTO op_mull_EAX_T0(void) | ||
| 209 | res = (uint64_t)((uint32_t)EAX) * (uint64_t)((uint32_t)T0); | 217 | res = (uint64_t)((uint32_t)EAX) * (uint64_t)((uint32_t)T0); |
| 210 | EAX = res; | 218 | EAX = res; |
| 211 | EDX = res >> 32; | 219 | EDX = res >> 32; |
| 220 | + CC_DST = res; | ||
| 212 | CC_SRC = res >> 32; | 221 | CC_SRC = res >> 32; |
| 213 | } | 222 | } |
| 214 | 223 | ||
| @@ -218,6 +227,7 @@ void OPPROTO op_imull_EAX_T0(void) | @@ -218,6 +227,7 @@ void OPPROTO op_imull_EAX_T0(void) | ||
| 218 | res = (int64_t)((int32_t)EAX) * (int64_t)((int32_t)T0); | 227 | res = (int64_t)((int32_t)EAX) * (int64_t)((int32_t)T0); |
| 219 | EAX = res; | 228 | EAX = res; |
| 220 | EDX = res >> 32; | 229 | EDX = res >> 32; |
| 230 | + CC_DST = res; | ||
| 221 | CC_SRC = (res != (int32_t)res); | 231 | CC_SRC = (res != (int32_t)res); |
| 222 | } | 232 | } |
| 223 | 233 | ||
| @@ -226,6 +236,7 @@ void OPPROTO op_imulw_T0_T1(void) | @@ -226,6 +236,7 @@ void OPPROTO op_imulw_T0_T1(void) | ||
| 226 | int res; | 236 | int res; |
| 227 | res = (int16_t)T0 * (int16_t)T1; | 237 | res = (int16_t)T0 * (int16_t)T1; |
| 228 | T0 = res; | 238 | T0 = res; |
| 239 | + CC_DST = res; | ||
| 229 | CC_SRC = (res != (int16_t)res); | 240 | CC_SRC = (res != (int16_t)res); |
| 230 | } | 241 | } |
| 231 | 242 | ||
| @@ -234,6 +245,7 @@ void OPPROTO op_imull_T0_T1(void) | @@ -234,6 +245,7 @@ void OPPROTO op_imull_T0_T1(void) | ||
| 234 | int64_t res; | 245 | int64_t res; |
| 235 | res = (int64_t)((int32_t)T0) * (int64_t)((int32_t)T1); | 246 | res = (int64_t)((int32_t)T0) * (int64_t)((int32_t)T1); |
| 236 | T0 = res; | 247 | T0 = res; |
| 248 | + CC_DST = res; | ||
| 237 | CC_SRC = (res != (int32_t)res); | 249 | CC_SRC = (res != (int32_t)res); |
| 238 | } | 250 | } |
| 239 | 251 | ||
| @@ -1293,31 +1305,14 @@ static int compute_c_eflags(void) | @@ -1293,31 +1305,14 @@ static int compute_c_eflags(void) | ||
| 1293 | return CC_SRC & CC_C; | 1305 | return CC_SRC & CC_C; |
| 1294 | } | 1306 | } |
| 1295 | 1307 | ||
| 1296 | -static int compute_c_mul(void) | ||
| 1297 | -{ | ||
| 1298 | - int cf; | ||
| 1299 | - cf = (CC_SRC != 0); | ||
| 1300 | - return cf; | ||
| 1301 | -} | ||
| 1302 | - | ||
| 1303 | -static int compute_all_mul(void) | ||
| 1304 | -{ | ||
| 1305 | - int cf, pf, af, zf, sf, of; | ||
| 1306 | - cf = (CC_SRC != 0); | ||
| 1307 | - pf = 0; /* undefined */ | ||
| 1308 | - af = 0; /* undefined */ | ||
| 1309 | - zf = 0; /* undefined */ | ||
| 1310 | - sf = 0; /* undefined */ | ||
| 1311 | - of = cf << 11; | ||
| 1312 | - return cf | pf | af | zf | sf | of; | ||
| 1313 | -} | ||
| 1314 | - | ||
| 1315 | CCTable cc_table[CC_OP_NB] = { | 1308 | CCTable cc_table[CC_OP_NB] = { |
| 1316 | [CC_OP_DYNAMIC] = { /* should never happen */ }, | 1309 | [CC_OP_DYNAMIC] = { /* should never happen */ }, |
| 1317 | 1310 | ||
| 1318 | [CC_OP_EFLAGS] = { compute_all_eflags, compute_c_eflags }, | 1311 | [CC_OP_EFLAGS] = { compute_all_eflags, compute_c_eflags }, |
| 1319 | 1312 | ||
| 1320 | - [CC_OP_MUL] = { compute_all_mul, compute_c_mul }, | 1313 | + [CC_OP_MULB] = { compute_all_mulb, compute_c_mull }, |
| 1314 | + [CC_OP_MULW] = { compute_all_mulw, compute_c_mull }, | ||
| 1315 | + [CC_OP_MULL] = { compute_all_mull, compute_c_mull }, | ||
| 1321 | 1316 | ||
| 1322 | [CC_OP_ADDB] = { compute_all_addb, compute_c_addb }, | 1317 | [CC_OP_ADDB] = { compute_all_addb, compute_c_addb }, |
| 1323 | [CC_OP_ADDW] = { compute_all_addw, compute_c_addw }, | 1318 | [CC_OP_ADDW] = { compute_all_addw, compute_c_addw }, |
target-i386/ops_template.h
| @@ -229,6 +229,29 @@ static int glue(compute_all_sar, SUFFIX)(void) | @@ -229,6 +229,29 @@ static int glue(compute_all_sar, SUFFIX)(void) | ||
| 229 | return cf | pf | af | zf | sf | of; | 229 | return cf | pf | af | zf | sf | of; |
| 230 | } | 230 | } |
| 231 | 231 | ||
| 232 | +#if DATA_BITS == 32 | ||
| 233 | +static int glue(compute_c_mul, SUFFIX)(void) | ||
| 234 | +{ | ||
| 235 | + int cf; | ||
| 236 | + cf = (CC_SRC != 0); | ||
| 237 | + return cf; | ||
| 238 | +} | ||
| 239 | +#endif | ||
| 240 | + | ||
| 241 | +/* NOTE: we compute the flags like the P4. On olders CPUs, only OF and | ||
| 242 | + CF are modified and it is slower to do that. */ | ||
| 243 | +static int glue(compute_all_mul, SUFFIX)(void) | ||
| 244 | +{ | ||
| 245 | + int cf, pf, af, zf, sf, of; | ||
| 246 | + cf = (CC_SRC != 0); | ||
| 247 | + pf = parity_table[(uint8_t)CC_DST]; | ||
| 248 | + af = 0; /* undefined */ | ||
| 249 | + zf = ((DATA_TYPE)CC_DST == 0) << 6; | ||
| 250 | + sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80; | ||
| 251 | + of = cf << 11; | ||
| 252 | + return cf | pf | af | zf | sf | of; | ||
| 253 | +} | ||
| 254 | + | ||
| 232 | /* various optimized jumps cases */ | 255 | /* various optimized jumps cases */ |
| 233 | 256 | ||
| 234 | void OPPROTO glue(op_jb_sub, SUFFIX)(void) | 257 | void OPPROTO glue(op_jb_sub, SUFFIX)(void) |
target-i386/translate.c
| @@ -2016,31 +2016,35 @@ static uint8_t *disas_insn(DisasContext *s, uint8_t *pc_start) | @@ -2016,31 +2016,35 @@ static uint8_t *disas_insn(DisasContext *s, uint8_t *pc_start) | ||
| 2016 | switch(ot) { | 2016 | switch(ot) { |
| 2017 | case OT_BYTE: | 2017 | case OT_BYTE: |
| 2018 | gen_op_mulb_AL_T0(); | 2018 | gen_op_mulb_AL_T0(); |
| 2019 | + s->cc_op = CC_OP_MULB; | ||
| 2019 | break; | 2020 | break; |
| 2020 | case OT_WORD: | 2021 | case OT_WORD: |
| 2021 | gen_op_mulw_AX_T0(); | 2022 | gen_op_mulw_AX_T0(); |
| 2023 | + s->cc_op = CC_OP_MULW; | ||
| 2022 | break; | 2024 | break; |
| 2023 | default: | 2025 | default: |
| 2024 | case OT_LONG: | 2026 | case OT_LONG: |
| 2025 | gen_op_mull_EAX_T0(); | 2027 | gen_op_mull_EAX_T0(); |
| 2028 | + s->cc_op = CC_OP_MULL; | ||
| 2026 | break; | 2029 | break; |
| 2027 | } | 2030 | } |
| 2028 | - s->cc_op = CC_OP_MUL; | ||
| 2029 | break; | 2031 | break; |
| 2030 | case 5: /* imul */ | 2032 | case 5: /* imul */ |
| 2031 | switch(ot) { | 2033 | switch(ot) { |
| 2032 | case OT_BYTE: | 2034 | case OT_BYTE: |
| 2033 | gen_op_imulb_AL_T0(); | 2035 | gen_op_imulb_AL_T0(); |
| 2036 | + s->cc_op = CC_OP_MULB; | ||
| 2034 | break; | 2037 | break; |
| 2035 | case OT_WORD: | 2038 | case OT_WORD: |
| 2036 | gen_op_imulw_AX_T0(); | 2039 | gen_op_imulw_AX_T0(); |
| 2040 | + s->cc_op = CC_OP_MULW; | ||
| 2037 | break; | 2041 | break; |
| 2038 | default: | 2042 | default: |
| 2039 | case OT_LONG: | 2043 | case OT_LONG: |
| 2040 | gen_op_imull_EAX_T0(); | 2044 | gen_op_imull_EAX_T0(); |
| 2045 | + s->cc_op = CC_OP_MULL; | ||
| 2041 | break; | 2046 | break; |
| 2042 | } | 2047 | } |
| 2043 | - s->cc_op = CC_OP_MUL; | ||
| 2044 | break; | 2048 | break; |
| 2045 | case 6: /* div */ | 2049 | case 6: /* div */ |
| 2046 | switch(ot) { | 2050 | switch(ot) { |
| @@ -2235,7 +2239,7 @@ static uint8_t *disas_insn(DisasContext *s, uint8_t *pc_start) | @@ -2235,7 +2239,7 @@ static uint8_t *disas_insn(DisasContext *s, uint8_t *pc_start) | ||
| 2235 | gen_op_imulw_T0_T1(); | 2239 | gen_op_imulw_T0_T1(); |
| 2236 | } | 2240 | } |
| 2237 | gen_op_mov_reg_T0[ot][reg](); | 2241 | gen_op_mov_reg_T0[ot][reg](); |
| 2238 | - s->cc_op = CC_OP_MUL; | 2242 | + s->cc_op = CC_OP_MULB + ot; |
| 2239 | break; | 2243 | break; |
| 2240 | case 0x1c0: | 2244 | case 0x1c0: |
| 2241 | case 0x1c1: /* xadd Ev, Gv */ | 2245 | case 0x1c1: /* xadd Ev, Gv */ |