Commit d36cd60e6c8c66e0279bad4b17e2d23833eb20b9

Authored by bellard
1 parent 5e809a80

P4 style multiplication eflags


git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@481 c046a42c-6fe2-441c-8c8c-71466251a162
target-i386/cpu.h
@@ -184,7 +184,10 @@ @@ -184,7 +184,10 @@
184 enum { 184 enum {
185 CC_OP_DYNAMIC, /* must use dynamic code to get cc_op */ 185 CC_OP_DYNAMIC, /* must use dynamic code to get cc_op */
186 CC_OP_EFLAGS, /* all cc are explicitely computed, CC_SRC = flags */ 186 CC_OP_EFLAGS, /* all cc are explicitely computed, CC_SRC = flags */
187 - CC_OP_MUL, /* modify all flags, C, O = (CC_SRC != 0) */ 187 +
  188 + CC_OP_MULB, /* modify all flags, C, O = (CC_SRC != 0) */
  189 + CC_OP_MULW,
  190 + CC_OP_MULL,
188 191
189 CC_OP_ADDB, /* modify all flags, CC_DST = res, CC_SRC = src1 */ 192 CC_OP_ADDB, /* modify all flags, CC_DST = res, CC_SRC = src1 */
190 CC_OP_ADDW, 193 CC_OP_ADDW,
target-i386/op.c
@@ -169,11 +169,16 @@ void OPPROTO op_bswapl_T0(void) @@ -169,11 +169,16 @@ void OPPROTO op_bswapl_T0(void)
169 } 169 }
170 170
171 /* multiply/divide */ 171 /* multiply/divide */
  172 +
  173 +/* XXX: add eflags optimizations */
  174 +/* XXX: add non P4 style flags */
  175 +
172 void OPPROTO op_mulb_AL_T0(void) 176 void OPPROTO op_mulb_AL_T0(void)
173 { 177 {
174 unsigned int res; 178 unsigned int res;
175 res = (uint8_t)EAX * (uint8_t)T0; 179 res = (uint8_t)EAX * (uint8_t)T0;
176 EAX = (EAX & 0xffff0000) | res; 180 EAX = (EAX & 0xffff0000) | res;
  181 + CC_DST = res;
177 CC_SRC = (res & 0xff00); 182 CC_SRC = (res & 0xff00);
178 } 183 }
179 184
@@ -182,6 +187,7 @@ void OPPROTO op_imulb_AL_T0(void) @@ -182,6 +187,7 @@ void OPPROTO op_imulb_AL_T0(void)
182 int res; 187 int res;
183 res = (int8_t)EAX * (int8_t)T0; 188 res = (int8_t)EAX * (int8_t)T0;
184 EAX = (EAX & 0xffff0000) | (res & 0xffff); 189 EAX = (EAX & 0xffff0000) | (res & 0xffff);
  190 + CC_DST = res;
185 CC_SRC = (res != (int8_t)res); 191 CC_SRC = (res != (int8_t)res);
186 } 192 }
187 193
@@ -191,6 +197,7 @@ void OPPROTO op_mulw_AX_T0(void) @@ -191,6 +197,7 @@ void OPPROTO op_mulw_AX_T0(void)
191 res = (uint16_t)EAX * (uint16_t)T0; 197 res = (uint16_t)EAX * (uint16_t)T0;
192 EAX = (EAX & 0xffff0000) | (res & 0xffff); 198 EAX = (EAX & 0xffff0000) | (res & 0xffff);
193 EDX = (EDX & 0xffff0000) | ((res >> 16) & 0xffff); 199 EDX = (EDX & 0xffff0000) | ((res >> 16) & 0xffff);
  200 + CC_DST = res;
194 CC_SRC = res >> 16; 201 CC_SRC = res >> 16;
195 } 202 }
196 203
@@ -200,6 +207,7 @@ void OPPROTO op_imulw_AX_T0(void) @@ -200,6 +207,7 @@ void OPPROTO op_imulw_AX_T0(void)
200 res = (int16_t)EAX * (int16_t)T0; 207 res = (int16_t)EAX * (int16_t)T0;
201 EAX = (EAX & 0xffff0000) | (res & 0xffff); 208 EAX = (EAX & 0xffff0000) | (res & 0xffff);
202 EDX = (EDX & 0xffff0000) | ((res >> 16) & 0xffff); 209 EDX = (EDX & 0xffff0000) | ((res >> 16) & 0xffff);
  210 + CC_DST = res;
203 CC_SRC = (res != (int16_t)res); 211 CC_SRC = (res != (int16_t)res);
204 } 212 }
205 213
@@ -209,6 +217,7 @@ void OPPROTO op_mull_EAX_T0(void) @@ -209,6 +217,7 @@ void OPPROTO op_mull_EAX_T0(void)
209 res = (uint64_t)((uint32_t)EAX) * (uint64_t)((uint32_t)T0); 217 res = (uint64_t)((uint32_t)EAX) * (uint64_t)((uint32_t)T0);
210 EAX = res; 218 EAX = res;
211 EDX = res >> 32; 219 EDX = res >> 32;
  220 + CC_DST = res;
212 CC_SRC = res >> 32; 221 CC_SRC = res >> 32;
213 } 222 }
214 223
@@ -218,6 +227,7 @@ void OPPROTO op_imull_EAX_T0(void) @@ -218,6 +227,7 @@ void OPPROTO op_imull_EAX_T0(void)
218 res = (int64_t)((int32_t)EAX) * (int64_t)((int32_t)T0); 227 res = (int64_t)((int32_t)EAX) * (int64_t)((int32_t)T0);
219 EAX = res; 228 EAX = res;
220 EDX = res >> 32; 229 EDX = res >> 32;
  230 + CC_DST = res;
221 CC_SRC = (res != (int32_t)res); 231 CC_SRC = (res != (int32_t)res);
222 } 232 }
223 233
@@ -226,6 +236,7 @@ void OPPROTO op_imulw_T0_T1(void) @@ -226,6 +236,7 @@ void OPPROTO op_imulw_T0_T1(void)
226 int res; 236 int res;
227 res = (int16_t)T0 * (int16_t)T1; 237 res = (int16_t)T0 * (int16_t)T1;
228 T0 = res; 238 T0 = res;
  239 + CC_DST = res;
229 CC_SRC = (res != (int16_t)res); 240 CC_SRC = (res != (int16_t)res);
230 } 241 }
231 242
@@ -234,6 +245,7 @@ void OPPROTO op_imull_T0_T1(void) @@ -234,6 +245,7 @@ void OPPROTO op_imull_T0_T1(void)
234 int64_t res; 245 int64_t res;
235 res = (int64_t)((int32_t)T0) * (int64_t)((int32_t)T1); 246 res = (int64_t)((int32_t)T0) * (int64_t)((int32_t)T1);
236 T0 = res; 247 T0 = res;
  248 + CC_DST = res;
237 CC_SRC = (res != (int32_t)res); 249 CC_SRC = (res != (int32_t)res);
238 } 250 }
239 251
@@ -1293,31 +1305,14 @@ static int compute_c_eflags(void) @@ -1293,31 +1305,14 @@ static int compute_c_eflags(void)
1293 return CC_SRC & CC_C; 1305 return CC_SRC & CC_C;
1294 } 1306 }
1295 1307
1296 -static int compute_c_mul(void)  
1297 -{  
1298 - int cf;  
1299 - cf = (CC_SRC != 0);  
1300 - return cf;  
1301 -}  
1302 -  
1303 -static int compute_all_mul(void)  
1304 -{  
1305 - int cf, pf, af, zf, sf, of;  
1306 - cf = (CC_SRC != 0);  
1307 - pf = 0; /* undefined */  
1308 - af = 0; /* undefined */  
1309 - zf = 0; /* undefined */  
1310 - sf = 0; /* undefined */  
1311 - of = cf << 11;  
1312 - return cf | pf | af | zf | sf | of;  
1313 -}  
1314 -  
1315 CCTable cc_table[CC_OP_NB] = { 1308 CCTable cc_table[CC_OP_NB] = {
1316 [CC_OP_DYNAMIC] = { /* should never happen */ }, 1309 [CC_OP_DYNAMIC] = { /* should never happen */ },
1317 1310
1318 [CC_OP_EFLAGS] = { compute_all_eflags, compute_c_eflags }, 1311 [CC_OP_EFLAGS] = { compute_all_eflags, compute_c_eflags },
1319 1312
1320 - [CC_OP_MUL] = { compute_all_mul, compute_c_mul }, 1313 + [CC_OP_MULB] = { compute_all_mulb, compute_c_mull },
  1314 + [CC_OP_MULW] = { compute_all_mulw, compute_c_mull },
  1315 + [CC_OP_MULL] = { compute_all_mull, compute_c_mull },
1321 1316
1322 [CC_OP_ADDB] = { compute_all_addb, compute_c_addb }, 1317 [CC_OP_ADDB] = { compute_all_addb, compute_c_addb },
1323 [CC_OP_ADDW] = { compute_all_addw, compute_c_addw }, 1318 [CC_OP_ADDW] = { compute_all_addw, compute_c_addw },
target-i386/ops_template.h
@@ -229,6 +229,29 @@ static int glue(compute_all_sar, SUFFIX)(void) @@ -229,6 +229,29 @@ static int glue(compute_all_sar, SUFFIX)(void)
229 return cf | pf | af | zf | sf | of; 229 return cf | pf | af | zf | sf | of;
230 } 230 }
231 231
  232 +#if DATA_BITS == 32
  233 +static int glue(compute_c_mul, SUFFIX)(void)
  234 +{
  235 + int cf;
  236 + cf = (CC_SRC != 0);
  237 + return cf;
  238 +}
  239 +#endif
  240 +
  241 +/* NOTE: we compute the flags like the P4. On olders CPUs, only OF and
  242 + CF are modified and it is slower to do that. */
  243 +static int glue(compute_all_mul, SUFFIX)(void)
  244 +{
  245 + int cf, pf, af, zf, sf, of;
  246 + cf = (CC_SRC != 0);
  247 + pf = parity_table[(uint8_t)CC_DST];
  248 + af = 0; /* undefined */
  249 + zf = ((DATA_TYPE)CC_DST == 0) << 6;
  250 + sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
  251 + of = cf << 11;
  252 + return cf | pf | af | zf | sf | of;
  253 +}
  254 +
232 /* various optimized jumps cases */ 255 /* various optimized jumps cases */
233 256
234 void OPPROTO glue(op_jb_sub, SUFFIX)(void) 257 void OPPROTO glue(op_jb_sub, SUFFIX)(void)
target-i386/translate.c
@@ -2016,31 +2016,35 @@ static uint8_t *disas_insn(DisasContext *s, uint8_t *pc_start) @@ -2016,31 +2016,35 @@ static uint8_t *disas_insn(DisasContext *s, uint8_t *pc_start)
2016 switch(ot) { 2016 switch(ot) {
2017 case OT_BYTE: 2017 case OT_BYTE:
2018 gen_op_mulb_AL_T0(); 2018 gen_op_mulb_AL_T0();
  2019 + s->cc_op = CC_OP_MULB;
2019 break; 2020 break;
2020 case OT_WORD: 2021 case OT_WORD:
2021 gen_op_mulw_AX_T0(); 2022 gen_op_mulw_AX_T0();
  2023 + s->cc_op = CC_OP_MULW;
2022 break; 2024 break;
2023 default: 2025 default:
2024 case OT_LONG: 2026 case OT_LONG:
2025 gen_op_mull_EAX_T0(); 2027 gen_op_mull_EAX_T0();
  2028 + s->cc_op = CC_OP_MULL;
2026 break; 2029 break;
2027 } 2030 }
2028 - s->cc_op = CC_OP_MUL;  
2029 break; 2031 break;
2030 case 5: /* imul */ 2032 case 5: /* imul */
2031 switch(ot) { 2033 switch(ot) {
2032 case OT_BYTE: 2034 case OT_BYTE:
2033 gen_op_imulb_AL_T0(); 2035 gen_op_imulb_AL_T0();
  2036 + s->cc_op = CC_OP_MULB;
2034 break; 2037 break;
2035 case OT_WORD: 2038 case OT_WORD:
2036 gen_op_imulw_AX_T0(); 2039 gen_op_imulw_AX_T0();
  2040 + s->cc_op = CC_OP_MULW;
2037 break; 2041 break;
2038 default: 2042 default:
2039 case OT_LONG: 2043 case OT_LONG:
2040 gen_op_imull_EAX_T0(); 2044 gen_op_imull_EAX_T0();
  2045 + s->cc_op = CC_OP_MULL;
2041 break; 2046 break;
2042 } 2047 }
2043 - s->cc_op = CC_OP_MUL;  
2044 break; 2048 break;
2045 case 6: /* div */ 2049 case 6: /* div */
2046 switch(ot) { 2050 switch(ot) {
@@ -2235,7 +2239,7 @@ static uint8_t *disas_insn(DisasContext *s, uint8_t *pc_start) @@ -2235,7 +2239,7 @@ static uint8_t *disas_insn(DisasContext *s, uint8_t *pc_start)
2235 gen_op_imulw_T0_T1(); 2239 gen_op_imulw_T0_T1();
2236 } 2240 }
2237 gen_op_mov_reg_T0[ot][reg](); 2241 gen_op_mov_reg_T0[ot][reg]();
2238 - s->cc_op = CC_OP_MUL; 2242 + s->cc_op = CC_OP_MULB + ot;
2239 break; 2243 break;
2240 case 0x1c0: 2244 case 0x1c0:
2241 case 0x1c1: /* xadd Ev, Gv */ 2245 case 0x1c1: /* xadd Ev, Gv */