Commit a35f3ec76be8e0a5424349831127e538c6b91ef5
1 parent
34c6f050
3DNow! instruction set emulation
(Michael Tross) git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@4180 c046a42c-6fe2-441c-8c8c-71466251a162
Showing
4 changed files
with
246 additions
and
10 deletions
target-i386/cpu.h
| @@ -428,8 +428,9 @@ typedef union { | @@ -428,8 +428,9 @@ typedef union { | ||
| 428 | 428 | ||
| 429 | typedef union { | 429 | typedef union { |
| 430 | uint8_t _b[8]; | 430 | uint8_t _b[8]; |
| 431 | - uint16_t _w[2]; | ||
| 432 | - uint32_t _l[1]; | 431 | + uint16_t _w[4]; |
| 432 | + uint32_t _l[2]; | ||
| 433 | + float32 _s[2]; | ||
| 433 | uint64_t q; | 434 | uint64_t q; |
| 434 | } MMXReg; | 435 | } MMXReg; |
| 435 | 436 | ||
| @@ -444,6 +445,7 @@ typedef union { | @@ -444,6 +445,7 @@ typedef union { | ||
| 444 | #define MMX_B(n) _b[7 - (n)] | 445 | #define MMX_B(n) _b[7 - (n)] |
| 445 | #define MMX_W(n) _w[3 - (n)] | 446 | #define MMX_W(n) _w[3 - (n)] |
| 446 | #define MMX_L(n) _l[1 - (n)] | 447 | #define MMX_L(n) _l[1 - (n)] |
| 448 | +#define MMX_S(n) _s[1 - (n)] | ||
| 447 | #else | 449 | #else |
| 448 | #define XMM_B(n) _b[n] | 450 | #define XMM_B(n) _b[n] |
| 449 | #define XMM_W(n) _w[n] | 451 | #define XMM_W(n) _w[n] |
| @@ -455,6 +457,7 @@ typedef union { | @@ -455,6 +457,7 @@ typedef union { | ||
| 455 | #define MMX_B(n) _b[n] | 457 | #define MMX_B(n) _b[n] |
| 456 | #define MMX_W(n) _w[n] | 458 | #define MMX_W(n) _w[n] |
| 457 | #define MMX_L(n) _l[n] | 459 | #define MMX_L(n) _l[n] |
| 460 | +#define MMX_S(n) _s[n] | ||
| 458 | #endif | 461 | #endif |
| 459 | #define MMX_Q(n) q | 462 | #define MMX_Q(n) q |
| 460 | 463 | ||
| @@ -520,6 +523,7 @@ typedef struct CPUX86State { | @@ -520,6 +523,7 @@ typedef struct CPUX86State { | ||
| 520 | int64_t i64; | 523 | int64_t i64; |
| 521 | } fp_convert; | 524 | } fp_convert; |
| 522 | 525 | ||
| 526 | + float_status mmx_status; /* for 3DNow! float ops */ | ||
| 523 | float_status sse_status; | 527 | float_status sse_status; |
| 524 | uint32_t mxcsr; | 528 | uint32_t mxcsr; |
| 525 | XMMReg xmm_regs[CPU_NB_REGS]; | 529 | XMMReg xmm_regs[CPU_NB_REGS]; |
target-i386/helper2.c
| @@ -150,7 +150,8 @@ static x86_def_t x86_defs[] = { | @@ -150,7 +150,8 @@ static x86_def_t x86_defs[] = { | ||
| 150 | CPUID_PSE36, | 150 | CPUID_PSE36, |
| 151 | .ext_features = CPUID_EXT_SSE3, | 151 | .ext_features = CPUID_EXT_SSE3, |
| 152 | .ext2_features = (PPRO_FEATURES & 0x0183F3FF) | | 152 | .ext2_features = (PPRO_FEATURES & 0x0183F3FF) | |
| 153 | - CPUID_EXT2_LM | CPUID_EXT2_SYSCALL | CPUID_EXT2_NX, | 153 | + CPUID_EXT2_LM | CPUID_EXT2_SYSCALL | CPUID_EXT2_NX | |
| 154 | + CPUID_EXT2_3DNOW | CPUID_EXT2_3DNOWEXT, | ||
| 154 | .ext3_features = CPUID_EXT3_SVM, | 155 | .ext3_features = CPUID_EXT3_SVM, |
| 155 | .xlevel = 0x8000000A, | 156 | .xlevel = 0x8000000A, |
| 156 | }, | 157 | }, |
| @@ -201,6 +202,19 @@ static x86_def_t x86_defs[] = { | @@ -201,6 +202,19 @@ static x86_def_t x86_defs[] = { | ||
| 201 | .features = 0x0383F9FF, | 202 | .features = 0x0383F9FF, |
| 202 | .xlevel = 0, | 203 | .xlevel = 0, |
| 203 | }, | 204 | }, |
| 205 | + { | ||
| 206 | + .name = "athlon", | ||
| 207 | + .level = 2, | ||
| 208 | + .vendor1 = 0x68747541, /* "Auth" */ | ||
| 209 | + .vendor2 = 0x69746e65, /* "enti" */ | ||
| 210 | + .vendor3 = 0x444d4163, /* "cAMD" */ | ||
| 211 | + .family = 6, | ||
| 212 | + .model = 2, | ||
| 213 | + .stepping = 3, | ||
| 214 | + .features = PPRO_FEATURES | PPRO_FEATURES | CPUID_PSE36 | CPUID_VME | CPUID_MTRR | CPUID_MCA, | ||
| 215 | + .ext2_features = (PPRO_FEATURES & 0x0183F3FF) | CPUID_EXT2_MMXEXT | CPUID_EXT2_3DNOW | CPUID_EXT2_3DNOWEXT, | ||
| 216 | + .xlevel = 0x80000008, | ||
| 217 | + }, | ||
| 204 | }; | 218 | }; |
| 205 | 219 | ||
| 206 | static int cpu_x86_find_by_name(x86_def_t *x86_cpu_def, const char *cpu_model) | 220 | static int cpu_x86_find_by_name(x86_def_t *x86_cpu_def, const char *cpu_model) |
target-i386/ops_sse.h
| 1 | /* | 1 | /* |
| 2 | - * MMX/SSE/SSE2/PNI support | 2 | + * MMX/3DNow!/SSE/SSE2/SSE3/PNI support |
| 3 | * | 3 | * |
| 4 | * Copyright (c) 2005 Fabrice Bellard | 4 | * Copyright (c) 2005 Fabrice Bellard |
| 5 | * | 5 | * |
| @@ -409,6 +409,7 @@ static inline int satsw(int x) | @@ -409,6 +409,7 @@ static inline int satsw(int x) | ||
| 409 | #define FCMPEQ(a, b) (a) == (b) ? -1 : 0 | 409 | #define FCMPEQ(a, b) (a) == (b) ? -1 : 0 |
| 410 | 410 | ||
| 411 | #define FMULLW(a, b) (a) * (b) | 411 | #define FMULLW(a, b) (a) * (b) |
| 412 | +#define FMULHRW(a, b) ((int16_t)(a) * (int16_t)(b) + 0x8000) >> 16 | ||
| 412 | #define FMULHUW(a, b) (a) * (b) >> 16 | 413 | #define FMULHUW(a, b) (a) * (b) >> 16 |
| 413 | #define FMULHW(a, b) (int16_t)(a) * (int16_t)(b) >> 16 | 414 | #define FMULHW(a, b) (int16_t)(a) * (int16_t)(b) >> 16 |
| 414 | 415 | ||
| @@ -455,6 +456,9 @@ SSE_OP_W(op_pcmpeqw, FCMPEQ) | @@ -455,6 +456,9 @@ SSE_OP_W(op_pcmpeqw, FCMPEQ) | ||
| 455 | SSE_OP_L(op_pcmpeql, FCMPEQ) | 456 | SSE_OP_L(op_pcmpeql, FCMPEQ) |
| 456 | 457 | ||
| 457 | SSE_OP_W(op_pmullw, FMULLW) | 458 | SSE_OP_W(op_pmullw, FMULLW) |
| 459 | +#if SHIFT == 0 | ||
| 460 | +SSE_OP_W(op_pmulhrw, FMULHRW) | ||
| 461 | +#endif | ||
| 458 | SSE_OP_W(op_pmulhuw, FMULHUW) | 462 | SSE_OP_W(op_pmulhuw, FMULHUW) |
| 459 | SSE_OP_W(op_pmulhw, FMULHW) | 463 | SSE_OP_W(op_pmulhw, FMULHW) |
| 460 | 464 | ||
| @@ -1383,6 +1387,175 @@ void OPPROTO glue(op_punpck ## base_name ## qdq, SUFFIX) (void) \ | @@ -1383,6 +1387,175 @@ void OPPROTO glue(op_punpck ## base_name ## qdq, SUFFIX) (void) \ | ||
| 1383 | UNPCK_OP(l, 0) | 1387 | UNPCK_OP(l, 0) |
| 1384 | UNPCK_OP(h, 1) | 1388 | UNPCK_OP(h, 1) |
| 1385 | 1389 | ||
| 1390 | +/* 3DNow! float ops */ | ||
| 1391 | +#if SHIFT == 0 | ||
| 1392 | +void OPPROTO op_pi2fd(void) | ||
| 1393 | +{ | ||
| 1394 | + MMXReg *d = (MMXReg *)((char *)env + PARAM1); | ||
| 1395 | + MMXReg *s = (MMXReg *)((char *)env + PARAM2); | ||
| 1396 | + d->MMX_S(0) = int32_to_float32(s->MMX_L(0), &env->mmx_status); | ||
| 1397 | + d->MMX_S(1) = int32_to_float32(s->MMX_L(1), &env->mmx_status); | ||
| 1398 | +} | ||
| 1399 | + | ||
| 1400 | +void OPPROTO op_pi2fw(void) | ||
| 1401 | +{ | ||
| 1402 | + MMXReg *d = (MMXReg *)((char *)env + PARAM1); | ||
| 1403 | + MMXReg *s = (MMXReg *)((char *)env + PARAM2); | ||
| 1404 | + d->MMX_S(0) = int32_to_float32((int16_t)s->MMX_W(0), &env->mmx_status); | ||
| 1405 | + d->MMX_S(1) = int32_to_float32((int16_t)s->MMX_W(2), &env->mmx_status); | ||
| 1406 | +} | ||
| 1407 | + | ||
| 1408 | +void OPPROTO op_pf2id(void) | ||
| 1409 | +{ | ||
| 1410 | + MMXReg *d = (MMXReg *)((char *)env + PARAM1); | ||
| 1411 | + MMXReg *s = (MMXReg *)((char *)env + PARAM2); | ||
| 1412 | + d->MMX_L(0) = float32_to_int32_round_to_zero(s->MMX_S(0), &env->mmx_status); | ||
| 1413 | + d->MMX_L(1) = float32_to_int32_round_to_zero(s->MMX_S(1), &env->mmx_status); | ||
| 1414 | +} | ||
| 1415 | + | ||
| 1416 | +void OPPROTO op_pf2iw(void) | ||
| 1417 | +{ | ||
| 1418 | + MMXReg *d = (MMXReg *)((char *)env + PARAM1); | ||
| 1419 | + MMXReg *s = (MMXReg *)((char *)env + PARAM2); | ||
| 1420 | + d->MMX_L(0) = satsw(float32_to_int32_round_to_zero(s->MMX_S(0), &env->mmx_status)); | ||
| 1421 | + d->MMX_L(1) = satsw(float32_to_int32_round_to_zero(s->MMX_S(1), &env->mmx_status)); | ||
| 1422 | +} | ||
| 1423 | + | ||
| 1424 | +void OPPROTO op_pfacc(void) | ||
| 1425 | +{ | ||
| 1426 | + MMXReg *d = (MMXReg *)((char *)env + PARAM1); | ||
| 1427 | + MMXReg *s = (MMXReg *)((char *)env + PARAM2); | ||
| 1428 | + MMXReg r; | ||
| 1429 | + r.MMX_S(0) = float32_add(d->MMX_S(0), d->MMX_S(1), &env->mmx_status); | ||
| 1430 | + r.MMX_S(1) = float32_add(s->MMX_S(0), s->MMX_S(1), &env->mmx_status); | ||
| 1431 | + *d = r; | ||
| 1432 | +} | ||
| 1433 | + | ||
| 1434 | +void OPPROTO op_pfadd(void) | ||
| 1435 | +{ | ||
| 1436 | + MMXReg *d = (MMXReg *)((char *)env + PARAM1); | ||
| 1437 | + MMXReg *s = (MMXReg *)((char *)env + PARAM2); | ||
| 1438 | + d->MMX_S(0) = float32_add(d->MMX_S(0), s->MMX_S(0), &env->mmx_status); | ||
| 1439 | + d->MMX_S(1) = float32_add(d->MMX_S(1), s->MMX_S(1), &env->mmx_status); | ||
| 1440 | +} | ||
| 1441 | + | ||
| 1442 | +void OPPROTO op_pfcmpeq(void) | ||
| 1443 | +{ | ||
| 1444 | + MMXReg *d = (MMXReg *)((char *)env + PARAM1); | ||
| 1445 | + MMXReg *s = (MMXReg *)((char *)env + PARAM2); | ||
| 1446 | + d->MMX_L(0) = float32_eq(d->MMX_S(0), s->MMX_S(0), &env->mmx_status) ? -1 : 0; | ||
| 1447 | + d->MMX_L(1) = float32_eq(d->MMX_S(1), s->MMX_S(1), &env->mmx_status) ? -1 : 0; | ||
| 1448 | +} | ||
| 1449 | + | ||
| 1450 | +void OPPROTO op_pfcmpge(void) | ||
| 1451 | +{ | ||
| 1452 | + MMXReg *d = (MMXReg *)((char *)env + PARAM1); | ||
| 1453 | + MMXReg *s = (MMXReg *)((char *)env + PARAM2); | ||
| 1454 | + d->MMX_L(0) = float32_le(s->MMX_S(0), d->MMX_S(0), &env->mmx_status) ? -1 : 0; | ||
| 1455 | + d->MMX_L(1) = float32_le(s->MMX_S(1), d->MMX_S(1), &env->mmx_status) ? -1 : 0; | ||
| 1456 | +} | ||
| 1457 | + | ||
| 1458 | +void OPPROTO op_pfcmpgt(void) | ||
| 1459 | +{ | ||
| 1460 | + MMXReg *d = (MMXReg *)((char *)env + PARAM1); | ||
| 1461 | + MMXReg *s = (MMXReg *)((char *)env + PARAM2); | ||
| 1462 | + d->MMX_L(0) = float32_lt(s->MMX_S(0), d->MMX_S(0), &env->mmx_status) ? -1 : 0; | ||
| 1463 | + d->MMX_L(1) = float32_lt(s->MMX_S(1), d->MMX_S(1), &env->mmx_status) ? -1 : 0; | ||
| 1464 | +} | ||
| 1465 | + | ||
| 1466 | +void OPPROTO op_pfmax(void) | ||
| 1467 | +{ | ||
| 1468 | + MMXReg *d = (MMXReg *)((char *)env + PARAM1); | ||
| 1469 | + MMXReg *s = (MMXReg *)((char *)env + PARAM2); | ||
| 1470 | + if (float32_lt(d->MMX_S(0), s->MMX_S(0), &env->mmx_status)) | ||
| 1471 | + d->MMX_S(0) = s->MMX_S(0); | ||
| 1472 | + if (float32_lt(d->MMX_S(1), s->MMX_S(1), &env->mmx_status)) | ||
| 1473 | + d->MMX_S(1) = s->MMX_S(1); | ||
| 1474 | +} | ||
| 1475 | + | ||
| 1476 | +void OPPROTO op_pfmin(void) | ||
| 1477 | +{ | ||
| 1478 | + MMXReg *d = (MMXReg *)((char *)env + PARAM1); | ||
| 1479 | + MMXReg *s = (MMXReg *)((char *)env + PARAM2); | ||
| 1480 | + if (float32_lt(s->MMX_S(0), d->MMX_S(0), &env->mmx_status)) | ||
| 1481 | + d->MMX_S(0) = s->MMX_S(0); | ||
| 1482 | + if (float32_lt(s->MMX_S(1), d->MMX_S(1), &env->mmx_status)) | ||
| 1483 | + d->MMX_S(1) = s->MMX_S(1); | ||
| 1484 | +} | ||
| 1485 | + | ||
| 1486 | +void OPPROTO op_pfmul(void) | ||
| 1487 | +{ | ||
| 1488 | + MMXReg *d = (MMXReg *)((char *)env + PARAM1); | ||
| 1489 | + MMXReg *s = (MMXReg *)((char *)env + PARAM2); | ||
| 1490 | + d->MMX_S(0) = float32_mul(d->MMX_S(0), s->MMX_S(0), &env->mmx_status); | ||
| 1491 | + d->MMX_S(1) = float32_mul(d->MMX_S(1), s->MMX_S(1), &env->mmx_status); | ||
| 1492 | +} | ||
| 1493 | + | ||
| 1494 | +void OPPROTO op_pfnacc(void) | ||
| 1495 | +{ | ||
| 1496 | + MMXReg *d = (MMXReg *)((char *)env + PARAM1); | ||
| 1497 | + MMXReg *s = (MMXReg *)((char *)env + PARAM2); | ||
| 1498 | + MMXReg r; | ||
| 1499 | + r.MMX_S(0) = float32_sub(d->MMX_S(0), d->MMX_S(1), &env->mmx_status); | ||
| 1500 | + r.MMX_S(1) = float32_sub(s->MMX_S(0), s->MMX_S(1), &env->mmx_status); | ||
| 1501 | + *d = r; | ||
| 1502 | +} | ||
| 1503 | + | ||
| 1504 | +void OPPROTO op_pfpnacc(void) | ||
| 1505 | +{ | ||
| 1506 | + MMXReg *d = (MMXReg *)((char *)env + PARAM1); | ||
| 1507 | + MMXReg *s = (MMXReg *)((char *)env + PARAM2); | ||
| 1508 | + MMXReg r; | ||
| 1509 | + r.MMX_S(0) = float32_sub(d->MMX_S(0), d->MMX_S(1), &env->mmx_status); | ||
| 1510 | + r.MMX_S(1) = float32_add(s->MMX_S(0), s->MMX_S(1), &env->mmx_status); | ||
| 1511 | + *d = r; | ||
| 1512 | +} | ||
| 1513 | + | ||
| 1514 | +void OPPROTO op_pfrcp(void) | ||
| 1515 | +{ | ||
| 1516 | + MMXReg *d = (MMXReg *)((char *)env + PARAM1); | ||
| 1517 | + MMXReg *s = (MMXReg *)((char *)env + PARAM2); | ||
| 1518 | + d->MMX_S(0) = approx_rcp(s->MMX_S(0)); | ||
| 1519 | + d->MMX_S(1) = d->MMX_S(0); | ||
| 1520 | +} | ||
| 1521 | + | ||
| 1522 | +void OPPROTO op_pfrsqrt(void) | ||
| 1523 | +{ | ||
| 1524 | + MMXReg *d = (MMXReg *)((char *)env + PARAM1); | ||
| 1525 | + MMXReg *s = (MMXReg *)((char *)env + PARAM2); | ||
| 1526 | + d->MMX_L(1) = s->MMX_L(0) & 0x7fffffff; | ||
| 1527 | + d->MMX_S(1) = approx_rsqrt(d->MMX_S(1)); | ||
| 1528 | + d->MMX_L(1) |= s->MMX_L(0) & 0x80000000; | ||
| 1529 | + d->MMX_L(0) = d->MMX_L(1); | ||
| 1530 | +} | ||
| 1531 | + | ||
| 1532 | +void OPPROTO op_pfsub(void) | ||
| 1533 | +{ | ||
| 1534 | + MMXReg *d = (MMXReg *)((char *)env + PARAM1); | ||
| 1535 | + MMXReg *s = (MMXReg *)((char *)env + PARAM2); | ||
| 1536 | + d->MMX_S(0) = float32_sub(d->MMX_S(0), s->MMX_S(0), &env->mmx_status); | ||
| 1537 | + d->MMX_S(1) = float32_sub(d->MMX_S(1), s->MMX_S(1), &env->mmx_status); | ||
| 1538 | +} | ||
| 1539 | + | ||
| 1540 | +void OPPROTO op_pfsubr(void) | ||
| 1541 | +{ | ||
| 1542 | + MMXReg *d = (MMXReg *)((char *)env + PARAM1); | ||
| 1543 | + MMXReg *s = (MMXReg *)((char *)env + PARAM2); | ||
| 1544 | + d->MMX_S(0) = float32_sub(s->MMX_S(0), d->MMX_S(0), &env->mmx_status); | ||
| 1545 | + d->MMX_S(1) = float32_sub(s->MMX_S(1), d->MMX_S(1), &env->mmx_status); | ||
| 1546 | +} | ||
| 1547 | + | ||
| 1548 | +void OPPROTO op_pswapd(void) | ||
| 1549 | +{ | ||
| 1550 | + MMXReg *d = (MMXReg *)((char *)env + PARAM1); | ||
| 1551 | + MMXReg *s = (MMXReg *)((char *)env + PARAM2); | ||
| 1552 | + MMXReg r; | ||
| 1553 | + r.MMX_L(0) = s->MMX_L(1); | ||
| 1554 | + r.MMX_L(1) = s->MMX_L(0); | ||
| 1555 | + *d = r; | ||
| 1556 | +} | ||
| 1557 | +#endif | ||
| 1558 | + | ||
| 1386 | #undef SHIFT | 1559 | #undef SHIFT |
| 1387 | #undef XMM_ONLY | 1560 | #undef XMM_ONLY |
| 1388 | #undef Reg | 1561 | #undef Reg |
target-i386/translate.c
| @@ -2408,12 +2408,16 @@ static GenOpFunc1 *gen_sto_env_A0[3] = { | @@ -2408,12 +2408,16 @@ static GenOpFunc1 *gen_sto_env_A0[3] = { | ||
| 2408 | }; | 2408 | }; |
| 2409 | 2409 | ||
| 2410 | #define SSE_SPECIAL ((GenOpFunc2 *)1) | 2410 | #define SSE_SPECIAL ((GenOpFunc2 *)1) |
| 2411 | +#define SSE_DUMMY ((GenOpFunc2 *)2) | ||
| 2411 | 2412 | ||
| 2412 | #define MMX_OP2(x) { gen_op_ ## x ## _mmx, gen_op_ ## x ## _xmm } | 2413 | #define MMX_OP2(x) { gen_op_ ## x ## _mmx, gen_op_ ## x ## _xmm } |
| 2413 | #define SSE_FOP(x) { gen_op_ ## x ## ps, gen_op_ ## x ## pd, \ | 2414 | #define SSE_FOP(x) { gen_op_ ## x ## ps, gen_op_ ## x ## pd, \ |
| 2414 | gen_op_ ## x ## ss, gen_op_ ## x ## sd, } | 2415 | gen_op_ ## x ## ss, gen_op_ ## x ## sd, } |
| 2415 | 2416 | ||
| 2416 | static GenOpFunc2 *sse_op_table1[256][4] = { | 2417 | static GenOpFunc2 *sse_op_table1[256][4] = { |
| 2418 | + /* 3DNow! extensions */ | ||
| 2419 | + [0x0e] = { SSE_DUMMY }, /* femms */ | ||
| 2420 | + [0x0f] = { SSE_DUMMY }, /* pf... */ | ||
| 2417 | /* pure SSE operations */ | 2421 | /* pure SSE operations */ |
| 2418 | [0x10] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */ | 2422 | [0x10] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */ |
| 2419 | [0x11] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */ | 2423 | [0x11] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */ |
| @@ -2480,7 +2484,7 @@ static GenOpFunc2 *sse_op_table1[256][4] = { | @@ -2480,7 +2484,7 @@ static GenOpFunc2 *sse_op_table1[256][4] = { | ||
| 2480 | [0x74] = MMX_OP2(pcmpeqb), | 2484 | [0x74] = MMX_OP2(pcmpeqb), |
| 2481 | [0x75] = MMX_OP2(pcmpeqw), | 2485 | [0x75] = MMX_OP2(pcmpeqw), |
| 2482 | [0x76] = MMX_OP2(pcmpeql), | 2486 | [0x76] = MMX_OP2(pcmpeql), |
| 2483 | - [0x77] = { SSE_SPECIAL }, /* emms */ | 2487 | + [0x77] = { SSE_DUMMY }, /* emms */ |
| 2484 | [0x7c] = { NULL, gen_op_haddpd, NULL, gen_op_haddps }, | 2488 | [0x7c] = { NULL, gen_op_haddpd, NULL, gen_op_haddps }, |
| 2485 | [0x7d] = { NULL, gen_op_hsubpd, NULL, gen_op_hsubps }, | 2489 | [0x7d] = { NULL, gen_op_hsubpd, NULL, gen_op_hsubps }, |
| 2486 | [0x7e] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movd, movd, , movq */ | 2490 | [0x7e] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movd, movd, , movq */ |
| @@ -2577,6 +2581,33 @@ static GenOpFunc2 *sse_op_table4[8][4] = { | @@ -2577,6 +2581,33 @@ static GenOpFunc2 *sse_op_table4[8][4] = { | ||
| 2577 | SSE_FOP(cmpord), | 2581 | SSE_FOP(cmpord), |
| 2578 | }; | 2582 | }; |
| 2579 | 2583 | ||
| 2584 | +static GenOpFunc2 *sse_op_table5[256] = { | ||
| 2585 | + [0x0c] = gen_op_pi2fw, | ||
| 2586 | + [0x0d] = gen_op_pi2fd, | ||
| 2587 | + [0x1c] = gen_op_pf2iw, | ||
| 2588 | + [0x1d] = gen_op_pf2id, | ||
| 2589 | + [0x8a] = gen_op_pfnacc, | ||
| 2590 | + [0x8e] = gen_op_pfpnacc, | ||
| 2591 | + [0x90] = gen_op_pfcmpge, | ||
| 2592 | + [0x94] = gen_op_pfmin, | ||
| 2593 | + [0x96] = gen_op_pfrcp, | ||
| 2594 | + [0x97] = gen_op_pfrsqrt, | ||
| 2595 | + [0x9a] = gen_op_pfsub, | ||
| 2596 | + [0x9e] = gen_op_pfadd, | ||
| 2597 | + [0xa0] = gen_op_pfcmpgt, | ||
| 2598 | + [0xa4] = gen_op_pfmax, | ||
| 2599 | + [0xa6] = gen_op_movq, /* pfrcpit1; no need to actually increase precision */ | ||
| 2600 | + [0xa7] = gen_op_movq, /* pfrsqit1 */ | ||
| 2601 | + [0xaa] = gen_op_pfsubr, | ||
| 2602 | + [0xae] = gen_op_pfacc, | ||
| 2603 | + [0xb0] = gen_op_pfcmpeq, | ||
| 2604 | + [0xb4] = gen_op_pfmul, | ||
| 2605 | + [0xb6] = gen_op_movq, /* pfrcpit2 */ | ||
| 2606 | + [0xb7] = gen_op_pmulhrw_mmx, | ||
| 2607 | + [0xbb] = gen_op_pswapd, | ||
| 2608 | + [0xbf] = gen_op_pavgb_mmx /* pavgusb */ | ||
| 2609 | +}; | ||
| 2610 | + | ||
| 2580 | static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) | 2611 | static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) |
| 2581 | { | 2612 | { |
| 2582 | int b1, op1_offset, op2_offset, is_xmm, val, ot; | 2613 | int b1, op1_offset, op2_offset, is_xmm, val, ot; |
| @@ -2596,7 +2627,7 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) | @@ -2596,7 +2627,7 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) | ||
| 2596 | sse_op2 = sse_op_table1[b][b1]; | 2627 | sse_op2 = sse_op_table1[b][b1]; |
| 2597 | if (!sse_op2) | 2628 | if (!sse_op2) |
| 2598 | goto illegal_op; | 2629 | goto illegal_op; |
| 2599 | - if (b <= 0x5f || b == 0xc6 || b == 0xc2) { | 2630 | + if ((b <= 0x5f && b >= 0x10) || b == 0xc6 || b == 0xc2) { |
| 2600 | is_xmm = 1; | 2631 | is_xmm = 1; |
| 2601 | } else { | 2632 | } else { |
| 2602 | if (b1 == 0) { | 2633 | if (b1 == 0) { |
| @@ -2618,8 +2649,8 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) | @@ -2618,8 +2649,8 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) | ||
| 2618 | } | 2649 | } |
| 2619 | if (is_xmm && !(s->flags & HF_OSFXSR_MASK)) | 2650 | if (is_xmm && !(s->flags & HF_OSFXSR_MASK)) |
| 2620 | goto illegal_op; | 2651 | goto illegal_op; |
| 2621 | - if (b == 0x77) { | ||
| 2622 | - /* emms */ | 2652 | + if (b == 0x77 || b == 0x0e) { |
| 2653 | + /* emms or femms */ | ||
| 2623 | gen_op_emms(); | 2654 | gen_op_emms(); |
| 2624 | return; | 2655 | return; |
| 2625 | } | 2656 | } |
| @@ -3151,6 +3182,13 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) | @@ -3151,6 +3182,13 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) | ||
| 3151 | } | 3182 | } |
| 3152 | } | 3183 | } |
| 3153 | switch(b) { | 3184 | switch(b) { |
| 3185 | + case 0x0f: /* 3DNow! data insns */ | ||
| 3186 | + val = ldub_code(s->pc++); | ||
| 3187 | + sse_op2 = sse_op_table5[val]; | ||
| 3188 | + if (!sse_op2) | ||
| 3189 | + goto illegal_op; | ||
| 3190 | + sse_op2(op1_offset, op2_offset); | ||
| 3191 | + break; | ||
| 3154 | case 0x70: /* pshufx insn */ | 3192 | case 0x70: /* pshufx insn */ |
| 3155 | case 0xc6: /* pshufx insn */ | 3193 | case 0xc6: /* pshufx insn */ |
| 3156 | val = ldub_code(s->pc++); | 3194 | val = ldub_code(s->pc++); |
| @@ -6148,7 +6186,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) | @@ -6148,7 +6186,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) | ||
| 6148 | gen_eob(s); | 6186 | gen_eob(s); |
| 6149 | } | 6187 | } |
| 6150 | break; | 6188 | break; |
| 6151 | - /* MMX/SSE/SSE2/PNI support */ | 6189 | + /* MMX/3DNow!/SSE/SSE2/SSE3 support */ |
| 6152 | case 0x1c3: /* MOVNTI reg, mem */ | 6190 | case 0x1c3: /* MOVNTI reg, mem */ |
| 6153 | if (!(s->cpuid_features & CPUID_SSE2)) | 6191 | if (!(s->cpuid_features & CPUID_SSE2)) |
| 6154 | goto illegal_op; | 6192 | goto illegal_op; |
| @@ -6214,6 +6252,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) | @@ -6214,6 +6252,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) | ||
| 6214 | case 7: /* sfence / clflush */ | 6252 | case 7: /* sfence / clflush */ |
| 6215 | if ((modrm & 0xc7) == 0xc0) { | 6253 | if ((modrm & 0xc7) == 0xc0) { |
| 6216 | /* sfence */ | 6254 | /* sfence */ |
| 6255 | + /* XXX: also check for cpuid_ext2_features & CPUID_EXT2_EMMX */ | ||
| 6217 | if (!(s->cpuid_features & CPUID_SSE)) | 6256 | if (!(s->cpuid_features & CPUID_SSE)) |
| 6218 | goto illegal_op; | 6257 | goto illegal_op; |
| 6219 | } else { | 6258 | } else { |
| @@ -6227,8 +6266,11 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) | @@ -6227,8 +6266,11 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) | ||
| 6227 | goto illegal_op; | 6266 | goto illegal_op; |
| 6228 | } | 6267 | } |
| 6229 | break; | 6268 | break; |
| 6230 | - case 0x10d: /* prefetch */ | 6269 | + case 0x10d: /* 3DNow! prefetch(w) */ |
| 6231 | modrm = ldub_code(s->pc++); | 6270 | modrm = ldub_code(s->pc++); |
| 6271 | + mod = (modrm >> 6) & 3; | ||
| 6272 | + if (mod == 3) | ||
| 6273 | + goto illegal_op; | ||
| 6232 | gen_lea_modrm(s, modrm, ®_addr, &offset_addr); | 6274 | gen_lea_modrm(s, modrm, ®_addr, &offset_addr); |
| 6233 | /* ignore for now */ | 6275 | /* ignore for now */ |
| 6234 | break; | 6276 | break; |
| @@ -6245,6 +6287,9 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) | @@ -6245,6 +6287,9 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) | ||
| 6245 | gen_op_rsm(); | 6287 | gen_op_rsm(); |
| 6246 | gen_eob(s); | 6288 | gen_eob(s); |
| 6247 | break; | 6289 | break; |
| 6290 | + case 0x10e ... 0x10f: | ||
| 6291 | + /* 3DNow! instructions, ignore prefixes */ | ||
| 6292 | + s->prefix &= ~(PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA); | ||
| 6248 | case 0x110 ... 0x117: | 6293 | case 0x110 ... 0x117: |
| 6249 | case 0x128 ... 0x12f: | 6294 | case 0x128 ... 0x12f: |
| 6250 | case 0x150 ... 0x177: | 6295 | case 0x150 ... 0x177: |