Commit a35f3ec76be8e0a5424349831127e538c6b91ef5

Authored by aurel32
1 parent 34c6f050

3DNow! instruction set emulation

(Michael Tross)


git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@4180 c046a42c-6fe2-441c-8c8c-71466251a162
target-i386/cpu.h
... ... @@ -428,8 +428,9 @@ typedef union {
428 428  
429 429 typedef union {
430 430 uint8_t _b[8];
431   - uint16_t _w[2];
432   - uint32_t _l[1];
  431 + uint16_t _w[4];
  432 + uint32_t _l[2];
  433 + float32 _s[2];
433 434 uint64_t q;
434 435 } MMXReg;
435 436  
... ... @@ -444,6 +445,7 @@ typedef union {
444 445 #define MMX_B(n) _b[7 - (n)]
445 446 #define MMX_W(n) _w[3 - (n)]
446 447 #define MMX_L(n) _l[1 - (n)]
  448 +#define MMX_S(n) _s[1 - (n)]
447 449 #else
448 450 #define XMM_B(n) _b[n]
449 451 #define XMM_W(n) _w[n]
... ... @@ -455,6 +457,7 @@ typedef union {
455 457 #define MMX_B(n) _b[n]
456 458 #define MMX_W(n) _w[n]
457 459 #define MMX_L(n) _l[n]
  460 +#define MMX_S(n) _s[n]
458 461 #endif
459 462 #define MMX_Q(n) q
460 463  
... ... @@ -520,6 +523,7 @@ typedef struct CPUX86State {
520 523 int64_t i64;
521 524 } fp_convert;
522 525  
  526 + float_status mmx_status; /* for 3DNow! float ops */
523 527 float_status sse_status;
524 528 uint32_t mxcsr;
525 529 XMMReg xmm_regs[CPU_NB_REGS];
... ...
target-i386/helper2.c
... ... @@ -150,7 +150,8 @@ static x86_def_t x86_defs[] = {
150 150 CPUID_PSE36,
151 151 .ext_features = CPUID_EXT_SSE3,
152 152 .ext2_features = (PPRO_FEATURES & 0x0183F3FF) |
153   - CPUID_EXT2_LM | CPUID_EXT2_SYSCALL | CPUID_EXT2_NX,
  153 + CPUID_EXT2_LM | CPUID_EXT2_SYSCALL | CPUID_EXT2_NX |
  154 + CPUID_EXT2_3DNOW | CPUID_EXT2_3DNOWEXT,
154 155 .ext3_features = CPUID_EXT3_SVM,
155 156 .xlevel = 0x8000000A,
156 157 },
... ... @@ -201,6 +202,19 @@ static x86_def_t x86_defs[] = {
201 202 .features = 0x0383F9FF,
202 203 .xlevel = 0,
203 204 },
  205 + {
  206 + .name = "athlon",
  207 + .level = 2,
  208 + .vendor1 = 0x68747541, /* "Auth" */
  209 + .vendor2 = 0x69746e65, /* "enti" */
  210 + .vendor3 = 0x444d4163, /* "cAMD" */
  211 + .family = 6,
  212 + .model = 2,
  213 + .stepping = 3,
  214 + .features = PPRO_FEATURES | PPRO_FEATURES | CPUID_PSE36 | CPUID_VME | CPUID_MTRR | CPUID_MCA,
  215 + .ext2_features = (PPRO_FEATURES & 0x0183F3FF) | CPUID_EXT2_MMXEXT | CPUID_EXT2_3DNOW | CPUID_EXT2_3DNOWEXT,
  216 + .xlevel = 0x80000008,
  217 + },
204 218 };
205 219  
206 220 static int cpu_x86_find_by_name(x86_def_t *x86_cpu_def, const char *cpu_model)
... ...
target-i386/ops_sse.h
1 1 /*
2   - * MMX/SSE/SSE2/PNI support
  2 + * MMX/3DNow!/SSE/SSE2/SSE3/PNI support
3 3 *
4 4 * Copyright (c) 2005 Fabrice Bellard
5 5 *
... ... @@ -409,6 +409,7 @@ static inline int satsw(int x)
409 409 #define FCMPEQ(a, b) (a) == (b) ? -1 : 0
410 410  
411 411 #define FMULLW(a, b) (a) * (b)
  412 +#define FMULHRW(a, b) ((int16_t)(a) * (int16_t)(b) + 0x8000) >> 16
412 413 #define FMULHUW(a, b) (a) * (b) >> 16
413 414 #define FMULHW(a, b) (int16_t)(a) * (int16_t)(b) >> 16
414 415  
... ... @@ -455,6 +456,9 @@ SSE_OP_W(op_pcmpeqw, FCMPEQ)
455 456 SSE_OP_L(op_pcmpeql, FCMPEQ)
456 457  
457 458 SSE_OP_W(op_pmullw, FMULLW)
  459 +#if SHIFT == 0
  460 +SSE_OP_W(op_pmulhrw, FMULHRW)
  461 +#endif
458 462 SSE_OP_W(op_pmulhuw, FMULHUW)
459 463 SSE_OP_W(op_pmulhw, FMULHW)
460 464  
... ... @@ -1383,6 +1387,175 @@ void OPPROTO glue(op_punpck ## base_name ## qdq, SUFFIX) (void) \
1383 1387 UNPCK_OP(l, 0)
1384 1388 UNPCK_OP(h, 1)
1385 1389  
  1390 +/* 3DNow! float ops */
  1391 +#if SHIFT == 0
  1392 +void OPPROTO op_pi2fd(void)
  1393 +{
  1394 + MMXReg *d = (MMXReg *)((char *)env + PARAM1);
  1395 + MMXReg *s = (MMXReg *)((char *)env + PARAM2);
  1396 + d->MMX_S(0) = int32_to_float32(s->MMX_L(0), &env->mmx_status);
  1397 + d->MMX_S(1) = int32_to_float32(s->MMX_L(1), &env->mmx_status);
  1398 +}
  1399 +
  1400 +void OPPROTO op_pi2fw(void)
  1401 +{
  1402 + MMXReg *d = (MMXReg *)((char *)env + PARAM1);
  1403 + MMXReg *s = (MMXReg *)((char *)env + PARAM2);
  1404 + d->MMX_S(0) = int32_to_float32((int16_t)s->MMX_W(0), &env->mmx_status);
  1405 + d->MMX_S(1) = int32_to_float32((int16_t)s->MMX_W(2), &env->mmx_status);
  1406 +}
  1407 +
  1408 +void OPPROTO op_pf2id(void)
  1409 +{
  1410 + MMXReg *d = (MMXReg *)((char *)env + PARAM1);
  1411 + MMXReg *s = (MMXReg *)((char *)env + PARAM2);
  1412 + d->MMX_L(0) = float32_to_int32_round_to_zero(s->MMX_S(0), &env->mmx_status);
  1413 + d->MMX_L(1) = float32_to_int32_round_to_zero(s->MMX_S(1), &env->mmx_status);
  1414 +}
  1415 +
  1416 +void OPPROTO op_pf2iw(void)
  1417 +{
  1418 + MMXReg *d = (MMXReg *)((char *)env + PARAM1);
  1419 + MMXReg *s = (MMXReg *)((char *)env + PARAM2);
  1420 + d->MMX_L(0) = satsw(float32_to_int32_round_to_zero(s->MMX_S(0), &env->mmx_status));
  1421 + d->MMX_L(1) = satsw(float32_to_int32_round_to_zero(s->MMX_S(1), &env->mmx_status));
  1422 +}
  1423 +
  1424 +void OPPROTO op_pfacc(void)
  1425 +{
  1426 + MMXReg *d = (MMXReg *)((char *)env + PARAM1);
  1427 + MMXReg *s = (MMXReg *)((char *)env + PARAM2);
  1428 + MMXReg r;
  1429 + r.MMX_S(0) = float32_add(d->MMX_S(0), d->MMX_S(1), &env->mmx_status);
  1430 + r.MMX_S(1) = float32_add(s->MMX_S(0), s->MMX_S(1), &env->mmx_status);
  1431 + *d = r;
  1432 +}
  1433 +
  1434 +void OPPROTO op_pfadd(void)
  1435 +{
  1436 + MMXReg *d = (MMXReg *)((char *)env + PARAM1);
  1437 + MMXReg *s = (MMXReg *)((char *)env + PARAM2);
  1438 + d->MMX_S(0) = float32_add(d->MMX_S(0), s->MMX_S(0), &env->mmx_status);
  1439 + d->MMX_S(1) = float32_add(d->MMX_S(1), s->MMX_S(1), &env->mmx_status);
  1440 +}
  1441 +
  1442 +void OPPROTO op_pfcmpeq(void)
  1443 +{
  1444 + MMXReg *d = (MMXReg *)((char *)env + PARAM1);
  1445 + MMXReg *s = (MMXReg *)((char *)env + PARAM2);
  1446 + d->MMX_L(0) = float32_eq(d->MMX_S(0), s->MMX_S(0), &env->mmx_status) ? -1 : 0;
  1447 + d->MMX_L(1) = float32_eq(d->MMX_S(1), s->MMX_S(1), &env->mmx_status) ? -1 : 0;
  1448 +}
  1449 +
  1450 +void OPPROTO op_pfcmpge(void)
  1451 +{
  1452 + MMXReg *d = (MMXReg *)((char *)env + PARAM1);
  1453 + MMXReg *s = (MMXReg *)((char *)env + PARAM2);
  1454 + d->MMX_L(0) = float32_le(s->MMX_S(0), d->MMX_S(0), &env->mmx_status) ? -1 : 0;
  1455 + d->MMX_L(1) = float32_le(s->MMX_S(1), d->MMX_S(1), &env->mmx_status) ? -1 : 0;
  1456 +}
  1457 +
  1458 +void OPPROTO op_pfcmpgt(void)
  1459 +{
  1460 + MMXReg *d = (MMXReg *)((char *)env + PARAM1);
  1461 + MMXReg *s = (MMXReg *)((char *)env + PARAM2);
  1462 + d->MMX_L(0) = float32_lt(s->MMX_S(0), d->MMX_S(0), &env->mmx_status) ? -1 : 0;
  1463 + d->MMX_L(1) = float32_lt(s->MMX_S(1), d->MMX_S(1), &env->mmx_status) ? -1 : 0;
  1464 +}
  1465 +
  1466 +void OPPROTO op_pfmax(void)
  1467 +{
  1468 + MMXReg *d = (MMXReg *)((char *)env + PARAM1);
  1469 + MMXReg *s = (MMXReg *)((char *)env + PARAM2);
  1470 + if (float32_lt(d->MMX_S(0), s->MMX_S(0), &env->mmx_status))
  1471 + d->MMX_S(0) = s->MMX_S(0);
  1472 + if (float32_lt(d->MMX_S(1), s->MMX_S(1), &env->mmx_status))
  1473 + d->MMX_S(1) = s->MMX_S(1);
  1474 +}
  1475 +
  1476 +void OPPROTO op_pfmin(void)
  1477 +{
  1478 + MMXReg *d = (MMXReg *)((char *)env + PARAM1);
  1479 + MMXReg *s = (MMXReg *)((char *)env + PARAM2);
  1480 + if (float32_lt(s->MMX_S(0), d->MMX_S(0), &env->mmx_status))
  1481 + d->MMX_S(0) = s->MMX_S(0);
  1482 + if (float32_lt(s->MMX_S(1), d->MMX_S(1), &env->mmx_status))
  1483 + d->MMX_S(1) = s->MMX_S(1);
  1484 +}
  1485 +
  1486 +void OPPROTO op_pfmul(void)
  1487 +{
  1488 + MMXReg *d = (MMXReg *)((char *)env + PARAM1);
  1489 + MMXReg *s = (MMXReg *)((char *)env + PARAM2);
  1490 + d->MMX_S(0) = float32_mul(d->MMX_S(0), s->MMX_S(0), &env->mmx_status);
  1491 + d->MMX_S(1) = float32_mul(d->MMX_S(1), s->MMX_S(1), &env->mmx_status);
  1492 +}
  1493 +
  1494 +void OPPROTO op_pfnacc(void)
  1495 +{
  1496 + MMXReg *d = (MMXReg *)((char *)env + PARAM1);
  1497 + MMXReg *s = (MMXReg *)((char *)env + PARAM2);
  1498 + MMXReg r;
  1499 + r.MMX_S(0) = float32_sub(d->MMX_S(0), d->MMX_S(1), &env->mmx_status);
  1500 + r.MMX_S(1) = float32_sub(s->MMX_S(0), s->MMX_S(1), &env->mmx_status);
  1501 + *d = r;
  1502 +}
  1503 +
  1504 +void OPPROTO op_pfpnacc(void)
  1505 +{
  1506 + MMXReg *d = (MMXReg *)((char *)env + PARAM1);
  1507 + MMXReg *s = (MMXReg *)((char *)env + PARAM2);
  1508 + MMXReg r;
  1509 + r.MMX_S(0) = float32_sub(d->MMX_S(0), d->MMX_S(1), &env->mmx_status);
  1510 + r.MMX_S(1) = float32_add(s->MMX_S(0), s->MMX_S(1), &env->mmx_status);
  1511 + *d = r;
  1512 +}
  1513 +
  1514 +void OPPROTO op_pfrcp(void)
  1515 +{
  1516 + MMXReg *d = (MMXReg *)((char *)env + PARAM1);
  1517 + MMXReg *s = (MMXReg *)((char *)env + PARAM2);
  1518 + d->MMX_S(0) = approx_rcp(s->MMX_S(0));
  1519 + d->MMX_S(1) = d->MMX_S(0);
  1520 +}
  1521 +
  1522 +void OPPROTO op_pfrsqrt(void)
  1523 +{
  1524 + MMXReg *d = (MMXReg *)((char *)env + PARAM1);
  1525 + MMXReg *s = (MMXReg *)((char *)env + PARAM2);
  1526 + d->MMX_L(1) = s->MMX_L(0) & 0x7fffffff;
  1527 + d->MMX_S(1) = approx_rsqrt(d->MMX_S(1));
  1528 + d->MMX_L(1) |= s->MMX_L(0) & 0x80000000;
  1529 + d->MMX_L(0) = d->MMX_L(1);
  1530 +}
  1531 +
  1532 +void OPPROTO op_pfsub(void)
  1533 +{
  1534 + MMXReg *d = (MMXReg *)((char *)env + PARAM1);
  1535 + MMXReg *s = (MMXReg *)((char *)env + PARAM2);
  1536 + d->MMX_S(0) = float32_sub(d->MMX_S(0), s->MMX_S(0), &env->mmx_status);
  1537 + d->MMX_S(1) = float32_sub(d->MMX_S(1), s->MMX_S(1), &env->mmx_status);
  1538 +}
  1539 +
  1540 +void OPPROTO op_pfsubr(void)
  1541 +{
  1542 + MMXReg *d = (MMXReg *)((char *)env + PARAM1);
  1543 + MMXReg *s = (MMXReg *)((char *)env + PARAM2);
  1544 + d->MMX_S(0) = float32_sub(s->MMX_S(0), d->MMX_S(0), &env->mmx_status);
  1545 + d->MMX_S(1) = float32_sub(s->MMX_S(1), d->MMX_S(1), &env->mmx_status);
  1546 +}
  1547 +
  1548 +void OPPROTO op_pswapd(void)
  1549 +{
  1550 + MMXReg *d = (MMXReg *)((char *)env + PARAM1);
  1551 + MMXReg *s = (MMXReg *)((char *)env + PARAM2);
  1552 + MMXReg r;
  1553 + r.MMX_L(0) = s->MMX_L(1);
  1554 + r.MMX_L(1) = s->MMX_L(0);
  1555 + *d = r;
  1556 +}
  1557 +#endif
  1558 +
1386 1559 #undef SHIFT
1387 1560 #undef XMM_ONLY
1388 1561 #undef Reg
... ...
target-i386/translate.c
... ... @@ -2408,12 +2408,16 @@ static GenOpFunc1 *gen_sto_env_A0[3] = {
2408 2408 };
2409 2409  
2410 2410 #define SSE_SPECIAL ((GenOpFunc2 *)1)
  2411 +#define SSE_DUMMY ((GenOpFunc2 *)2)
2411 2412  
2412 2413 #define MMX_OP2(x) { gen_op_ ## x ## _mmx, gen_op_ ## x ## _xmm }
2413 2414 #define SSE_FOP(x) { gen_op_ ## x ## ps, gen_op_ ## x ## pd, \
2414 2415 gen_op_ ## x ## ss, gen_op_ ## x ## sd, }
2415 2416  
2416 2417 static GenOpFunc2 *sse_op_table1[256][4] = {
  2418 + /* 3DNow! extensions */
  2419 + [0x0e] = { SSE_DUMMY }, /* femms */
  2420 + [0x0f] = { SSE_DUMMY }, /* pf... */
2417 2421 /* pure SSE operations */
2418 2422 [0x10] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2419 2423 [0x11] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
... ... @@ -2480,7 +2484,7 @@ static GenOpFunc2 *sse_op_table1[256][4] = {
2480 2484 [0x74] = MMX_OP2(pcmpeqb),
2481 2485 [0x75] = MMX_OP2(pcmpeqw),
2482 2486 [0x76] = MMX_OP2(pcmpeql),
2483   - [0x77] = { SSE_SPECIAL }, /* emms */
  2487 + [0x77] = { SSE_DUMMY }, /* emms */
2484 2488 [0x7c] = { NULL, gen_op_haddpd, NULL, gen_op_haddps },
2485 2489 [0x7d] = { NULL, gen_op_hsubpd, NULL, gen_op_hsubps },
2486 2490 [0x7e] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movd, movd, , movq */
... ... @@ -2577,6 +2581,33 @@ static GenOpFunc2 *sse_op_table4[8][4] = {
2577 2581 SSE_FOP(cmpord),
2578 2582 };
2579 2583  
  2584 +static GenOpFunc2 *sse_op_table5[256] = {
  2585 + [0x0c] = gen_op_pi2fw,
  2586 + [0x0d] = gen_op_pi2fd,
  2587 + [0x1c] = gen_op_pf2iw,
  2588 + [0x1d] = gen_op_pf2id,
  2589 + [0x8a] = gen_op_pfnacc,
  2590 + [0x8e] = gen_op_pfpnacc,
  2591 + [0x90] = gen_op_pfcmpge,
  2592 + [0x94] = gen_op_pfmin,
  2593 + [0x96] = gen_op_pfrcp,
  2594 + [0x97] = gen_op_pfrsqrt,
  2595 + [0x9a] = gen_op_pfsub,
  2596 + [0x9e] = gen_op_pfadd,
  2597 + [0xa0] = gen_op_pfcmpgt,
  2598 + [0xa4] = gen_op_pfmax,
  2599 + [0xa6] = gen_op_movq, /* pfrcpit1; no need to actually increase precision */
  2600 + [0xa7] = gen_op_movq, /* pfrsqit1 */
  2601 + [0xaa] = gen_op_pfsubr,
  2602 + [0xae] = gen_op_pfacc,
  2603 + [0xb0] = gen_op_pfcmpeq,
  2604 + [0xb4] = gen_op_pfmul,
  2605 + [0xb6] = gen_op_movq, /* pfrcpit2 */
  2606 + [0xb7] = gen_op_pmulhrw_mmx,
  2607 + [0xbb] = gen_op_pswapd,
  2608 + [0xbf] = gen_op_pavgb_mmx /* pavgusb */
  2609 +};
  2610 +
2580 2611 static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
2581 2612 {
2582 2613 int b1, op1_offset, op2_offset, is_xmm, val, ot;
... ... @@ -2596,7 +2627,7 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
2596 2627 sse_op2 = sse_op_table1[b][b1];
2597 2628 if (!sse_op2)
2598 2629 goto illegal_op;
2599   - if (b <= 0x5f || b == 0xc6 || b == 0xc2) {
  2630 + if ((b <= 0x5f && b >= 0x10) || b == 0xc6 || b == 0xc2) {
2600 2631 is_xmm = 1;
2601 2632 } else {
2602 2633 if (b1 == 0) {
... ... @@ -2618,8 +2649,8 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
2618 2649 }
2619 2650 if (is_xmm && !(s->flags & HF_OSFXSR_MASK))
2620 2651 goto illegal_op;
2621   - if (b == 0x77) {
2622   - /* emms */
  2652 + if (b == 0x77 || b == 0x0e) {
  2653 + /* emms or femms */
2623 2654 gen_op_emms();
2624 2655 return;
2625 2656 }
... ... @@ -3151,6 +3182,13 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
3151 3182 }
3152 3183 }
3153 3184 switch(b) {
  3185 + case 0x0f: /* 3DNow! data insns */
  3186 + val = ldub_code(s->pc++);
  3187 + sse_op2 = sse_op_table5[val];
  3188 + if (!sse_op2)
  3189 + goto illegal_op;
  3190 + sse_op2(op1_offset, op2_offset);
  3191 + break;
3154 3192 case 0x70: /* pshufx insn */
3155 3193 case 0xc6: /* pshufx insn */
3156 3194 val = ldub_code(s->pc++);
... ... @@ -6148,7 +6186,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
6148 6186 gen_eob(s);
6149 6187 }
6150 6188 break;
6151   - /* MMX/SSE/SSE2/PNI support */
  6189 + /* MMX/3DNow!/SSE/SSE2/SSE3 support */
6152 6190 case 0x1c3: /* MOVNTI reg, mem */
6153 6191 if (!(s->cpuid_features & CPUID_SSE2))
6154 6192 goto illegal_op;
... ... @@ -6214,6 +6252,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
6214 6252 case 7: /* sfence / clflush */
6215 6253 if ((modrm & 0xc7) == 0xc0) {
6216 6254 /* sfence */
  6255 + /* XXX: also check for cpuid_ext2_features & CPUID_EXT2_EMMX */
6217 6256 if (!(s->cpuid_features & CPUID_SSE))
6218 6257 goto illegal_op;
6219 6258 } else {
... ... @@ -6227,8 +6266,11 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
6227 6266 goto illegal_op;
6228 6267 }
6229 6268 break;
6230   - case 0x10d: /* prefetch */
  6269 + case 0x10d: /* 3DNow! prefetch(w) */
6231 6270 modrm = ldub_code(s->pc++);
  6271 + mod = (modrm >> 6) & 3;
  6272 + if (mod == 3)
  6273 + goto illegal_op;
6232 6274 gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
6233 6275 /* ignore for now */
6234 6276 break;
... ... @@ -6245,6 +6287,9 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
6245 6287 gen_op_rsm();
6246 6288 gen_eob(s);
6247 6289 break;
  6290 + case 0x10e ... 0x10f:
  6291 + /* 3DNow! instructions, ignore prefixes */
  6292 + s->prefix &= ~(PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA);
6248 6293 case 0x110 ... 0x117:
6249 6294 case 0x128 ... 0x12f:
6250 6295 case 0x150 ... 0x177:
... ...