Commit 4242b1bd8acc19aaaacffdaad4ac23213d72a72b
1 parent
bb332cb2
Implement x86 SSSE3 instructions.
git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@5315 c046a42c-6fe2-441c-8c8c-71466251a162
Showing
3 changed files
with
272 additions
and
4 deletions
target-i386/ops_sse.h
1 | 1 | /* |
2 | - * MMX/3DNow!/SSE/SSE2/SSE3/PNI support | |
2 | + * MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/PNI support | |
3 | 3 | * |
4 | 4 | * Copyright (c) 2005 Fabrice Bellard |
5 | 5 | * |
... | ... | @@ -1275,6 +1275,151 @@ void helper_pswapd(MMXReg *d, MMXReg *s) |
1275 | 1275 | } |
1276 | 1276 | #endif |
1277 | 1277 | |
1278 | +/* SSSE3 op helpers */ | |
1279 | +void glue(helper_pshufb, SUFFIX) (Reg *d, Reg *s) | |
1280 | +{ | |
1281 | + int i; | |
1282 | + Reg r; | |
1283 | + | |
1284 | + for (i = 0; i < (8 << SHIFT); i++) | |
1285 | + r.B(i) = (s->B(i) & 0x80) ? 0 : (d->B(s->B(i) & ((8 << SHIFT) - 1))); | |
1286 | + | |
1287 | + *d = r; | |
1288 | +} | |
1289 | + | |
1290 | +void glue(helper_phaddw, SUFFIX) (Reg *d, Reg *s) | |
1291 | +{ | |
1292 | + d->W(0) = (int16_t)d->W(0) + (int16_t)d->W(1); | |
1293 | + d->W(1) = (int16_t)d->W(2) + (int16_t)d->W(3); | |
1294 | + XMM_ONLY(d->W(2) = (int16_t)d->W(4) + (int16_t)d->W(5)); | |
1295 | + XMM_ONLY(d->W(3) = (int16_t)d->W(6) + (int16_t)d->W(7)); | |
1296 | + d->W((2 << SHIFT) + 0) = (int16_t)s->W(0) + (int16_t)s->W(1); | |
1297 | + d->W((2 << SHIFT) + 1) = (int16_t)s->W(2) + (int16_t)s->W(3); | |
1298 | + XMM_ONLY(d->W(6) = (int16_t)s->W(4) + (int16_t)s->W(5)); | |
1299 | + XMM_ONLY(d->W(7) = (int16_t)s->W(6) + (int16_t)s->W(7)); | |
1300 | +} | |
1301 | + | |
1302 | +void glue(helper_phaddd, SUFFIX) (Reg *d, Reg *s) | |
1303 | +{ | |
1304 | + d->L(0) = (int32_t)d->L(0) + (int32_t)d->L(1); | |
1305 | + XMM_ONLY(d->L(1) = (int32_t)d->L(2) + (int32_t)d->L(3)); | |
1306 | + d->L((1 << SHIFT) + 0) = (int32_t)s->L(0) + (int32_t)s->L(1); | |
1307 | + XMM_ONLY(d->L(3) = (int32_t)s->L(2) + (int32_t)s->L(3)); | |
1308 | +} | |
1309 | + | |
1310 | +void glue(helper_phaddsw, SUFFIX) (Reg *d, Reg *s) | |
1311 | +{ | |
1312 | + d->W(0) = satsw((int16_t)d->W(0) + (int16_t)d->W(1)); | |
1313 | + d->W(1) = satsw((int16_t)d->W(2) + (int16_t)d->W(3)); | |
1314 | + XMM_ONLY(d->W(2) = satsw((int16_t)d->W(4) + (int16_t)d->W(5))); | |
1315 | + XMM_ONLY(d->W(3) = satsw((int16_t)d->W(6) + (int16_t)d->W(7))); | |
1316 | + d->W((2 << SHIFT) + 0) = satsw((int16_t)s->W(0) + (int16_t)s->W(1)); | |
1317 | + d->W((2 << SHIFT) + 1) = satsw((int16_t)s->W(2) + (int16_t)s->W(3)); | |
1318 | + XMM_ONLY(d->W(6) = satsw((int16_t)s->W(4) + (int16_t)s->W(5))); | |
1319 | + XMM_ONLY(d->W(7) = satsw((int16_t)s->W(6) + (int16_t)s->W(7))); | |
1320 | +} | |
1321 | + | |
1322 | +void glue(helper_pmaddubsw, SUFFIX) (Reg *d, Reg *s) | |
1323 | +{ | |
1324 | + d->W(0) = satsw((int8_t)s->B( 0) * (uint8_t)d->B( 0) + | |
1325 | + (int8_t)s->B( 1) * (uint8_t)d->B( 1)); | |
1326 | + d->W(1) = satsw((int8_t)s->B( 2) * (uint8_t)d->B( 2) + | |
1327 | + (int8_t)s->B( 3) * (uint8_t)d->B( 3)); | |
1328 | + d->W(2) = satsw((int8_t)s->B( 4) * (uint8_t)d->B( 4) + | |
1329 | + (int8_t)s->B( 5) * (uint8_t)d->B( 5)); | |
1330 | + d->W(3) = satsw((int8_t)s->B( 6) * (uint8_t)d->B( 6) + | |
1331 | + (int8_t)s->B( 7) * (uint8_t)d->B( 7)); | |
1332 | +#if SHIFT == 1 | |
1333 | + d->W(4) = satsw((int8_t)s->B( 8) * (uint8_t)d->B( 8) + | |
1334 | + (int8_t)s->B( 9) * (uint8_t)d->B( 9)); | |
1335 | + d->W(5) = satsw((int8_t)s->B(10) * (uint8_t)d->B(10) + | |
1336 | + (int8_t)s->B(11) * (uint8_t)d->B(11)); | |
1337 | + d->W(6) = satsw((int8_t)s->B(12) * (uint8_t)d->B(12) + | |
1338 | + (int8_t)s->B(13) * (uint8_t)d->B(13)); | |
1339 | + d->W(7) = satsw((int8_t)s->B(14) * (uint8_t)d->B(14) + | |
1340 | + (int8_t)s->B(15) * (uint8_t)d->B(15)); | |
1341 | +#endif | |
1342 | +} | |
1343 | + | |
1344 | +void glue(helper_phsubw, SUFFIX) (Reg *d, Reg *s) | |
1345 | +{ | |
1346 | + d->W(0) = (int16_t)d->W(0) - (int16_t)d->W(1); | |
1347 | + d->W(1) = (int16_t)d->W(2) - (int16_t)d->W(3); | |
1348 | + XMM_ONLY(d->W(2) = (int16_t)d->W(4) - (int16_t)d->W(5)); | |
1349 | + XMM_ONLY(d->W(3) = (int16_t)d->W(6) - (int16_t)d->W(7)); | |
1350 | + d->W((2 << SHIFT) + 0) = (int16_t)s->W(0) - (int16_t)s->W(1); | |
1351 | + d->W((2 << SHIFT) + 1) = (int16_t)s->W(2) - (int16_t)s->W(3); | |
1352 | + XMM_ONLY(d->W(6) = (int16_t)s->W(4) - (int16_t)s->W(5)); | |
1353 | + XMM_ONLY(d->W(7) = (int16_t)s->W(6) - (int16_t)s->W(7)); | |
1354 | +} | |
1355 | + | |
1356 | +void glue(helper_phsubd, SUFFIX) (Reg *d, Reg *s) | |
1357 | +{ | |
1358 | + d->L(0) = (int32_t)d->L(0) - (int32_t)d->L(1); | |
1359 | + XMM_ONLY(d->L(1) = (int32_t)d->L(2) - (int32_t)d->L(3)); | |
1360 | + d->L((1 << SHIFT) + 0) = (int32_t)s->L(0) - (int32_t)s->L(1); | |
1361 | + XMM_ONLY(d->L(3) = (int32_t)s->L(2) - (int32_t)s->L(3)); | |
1362 | +} | |
1363 | + | |
1364 | +void glue(helper_phsubsw, SUFFIX) (Reg *d, Reg *s) | |
1365 | +{ | |
1366 | + d->W(0) = satsw((int16_t)d->W(0) - (int16_t)d->W(1)); | |
1367 | + d->W(1) = satsw((int16_t)d->W(2) - (int16_t)d->W(3)); | |
1368 | + XMM_ONLY(d->W(2) = satsw((int16_t)d->W(4) - (int16_t)d->W(5))); | |
1369 | + XMM_ONLY(d->W(3) = satsw((int16_t)d->W(6) - (int16_t)d->W(7))); | |
1370 | + d->W((2 << SHIFT) + 0) = satsw((int16_t)s->W(0) - (int16_t)s->W(1)); | |
1371 | + d->W((2 << SHIFT) + 1) = satsw((int16_t)s->W(2) - (int16_t)s->W(3)); | |
1372 | + XMM_ONLY(d->W(6) = satsw((int16_t)s->W(4) - (int16_t)s->W(5))); | |
1373 | + XMM_ONLY(d->W(7) = satsw((int16_t)s->W(6) - (int16_t)s->W(7))); | |
1374 | +} | |
1375 | + | |
1376 | +#define FABSB(_, x) x > INT8_MAX ? -(int8_t ) x : x | |
1377 | +#define FABSW(_, x) x > INT16_MAX ? -(int16_t) x : x | |
1378 | +#define FABSL(_, x) x > INT32_MAX ? -(int32_t) x : x | |
1379 | +SSE_HELPER_B(helper_pabsb, FABSB) | |
1380 | +SSE_HELPER_W(helper_pabsw, FABSW) | |
1381 | +SSE_HELPER_L(helper_pabsd, FABSL) | |
1382 | + | |
1383 | +#define FMULHRSW(d, s) ((int16_t) d * (int16_t) s + 0x4000) >> 15 | |
1384 | +SSE_HELPER_W(helper_pmulhrsw, FMULHRSW) | |
1385 | + | |
1386 | +#define FSIGNB(d, s) s <= INT8_MAX ? s ? d : 0 : -(int8_t ) d | |
1387 | +#define FSIGNW(d, s) s <= INT16_MAX ? s ? d : 0 : -(int16_t) d | |
1388 | +#define FSIGNL(d, s) s <= INT32_MAX ? s ? d : 0 : -(int32_t) d | |
1389 | +SSE_HELPER_B(helper_psignb, FSIGNB) | |
1390 | +SSE_HELPER_W(helper_psignw, FSIGNW) | |
1391 | +SSE_HELPER_L(helper_psignd, FSIGNL) | |
1392 | + | |
1393 | +void glue(helper_palignr, SUFFIX) (Reg *d, Reg *s, int32_t shift) | |
1394 | +{ | |
1395 | + Reg r; | |
1396 | + | |
1397 | + /* XXX could be checked during translation */ | |
1398 | + if (shift >= (16 << SHIFT)) { | |
1399 | + r.Q(0) = 0; | |
1400 | + XMM_ONLY(r.Q(1) = 0); | |
1401 | + } else { | |
1402 | + shift <<= 3; | |
1403 | +#define SHR(v, i) (i < 64 && i > -64 ? i > 0 ? v >> (i) : (v << -(i)) : 0) | |
1404 | +#if SHIFT == 0 | |
1405 | + r.Q(0) = SHR(s->Q(0), shift - 0) | | |
1406 | + SHR(d->Q(0), shift - 64); | |
1407 | +#else | |
1408 | + r.Q(0) = SHR(s->Q(0), shift - 0) | | |
1409 | + SHR(s->Q(1), shift - 64) | | |
1410 | + SHR(d->Q(0), shift - 128) | | |
1411 | + SHR(d->Q(1), shift - 192); | |
1412 | + r.Q(1) = SHR(s->Q(0), shift + 64) | | |
1413 | + SHR(s->Q(1), shift - 0) | | |
1414 | + SHR(d->Q(0), shift - 64) | | |
1415 | + SHR(d->Q(1), shift - 128); | |
1416 | +#endif | |
1417 | +#undef SHR | |
1418 | + } | |
1419 | + | |
1420 | + *d = r; | |
1421 | +} | |
1422 | + | |
1278 | 1423 | #undef SHIFT |
1279 | 1424 | #undef XMM_ONLY |
1280 | 1425 | #undef Reg | ... | ... |
target-i386/ops_sse_header.h
1 | 1 | /* |
2 | - * MMX/3DNow!/SSE/SSE2/SSE3/PNI support | |
2 | + * MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/PNI support | |
3 | 3 | * |
4 | 4 | * Copyright (c) 2005 Fabrice Bellard |
5 | 5 | * |
... | ... | @@ -251,6 +251,24 @@ DEF_HELPER(void, helper_pfsubr, (MMXReg *d, MMXReg *s)) |
251 | 251 | DEF_HELPER(void, helper_pswapd, (MMXReg *d, MMXReg *s)) |
252 | 252 | #endif |
253 | 253 | |
254 | +/* SSSE3 op helpers */ | |
255 | +DEF_HELPER(void, glue(helper_phaddw, SUFFIX), (Reg *d, Reg *s)) | |
256 | +DEF_HELPER(void, glue(helper_phaddd, SUFFIX), (Reg *d, Reg *s)) | |
257 | +DEF_HELPER(void, glue(helper_phaddsw, SUFFIX), (Reg *d, Reg *s)) | |
258 | +DEF_HELPER(void, glue(helper_phsubw, SUFFIX), (Reg *d, Reg *s)) | |
259 | +DEF_HELPER(void, glue(helper_phsubd, SUFFIX), (Reg *d, Reg *s)) | |
260 | +DEF_HELPER(void, glue(helper_phsubsw, SUFFIX), (Reg *d, Reg *s)) | |
261 | +DEF_HELPER(void, glue(helper_pabsb, SUFFIX), (Reg *d, Reg *s)) | |
262 | +DEF_HELPER(void, glue(helper_pabsw, SUFFIX), (Reg *d, Reg *s)) | |
263 | +DEF_HELPER(void, glue(helper_pabsd, SUFFIX), (Reg *d, Reg *s)) | |
264 | +DEF_HELPER(void, glue(helper_pmaddubsw, SUFFIX), (Reg *d, Reg *s)) | |
265 | +DEF_HELPER(void, glue(helper_pmulhrsw, SUFFIX), (Reg *d, Reg *s)) | |
266 | +DEF_HELPER(void, glue(helper_pshufb, SUFFIX), (Reg *d, Reg *s)) | |
267 | +DEF_HELPER(void, glue(helper_psignb, SUFFIX), (Reg *d, Reg *s)) | |
268 | +DEF_HELPER(void, glue(helper_psignw, SUFFIX), (Reg *d, Reg *s)) | |
269 | +DEF_HELPER(void, glue(helper_psignd, SUFFIX), (Reg *d, Reg *s)) | |
270 | +DEF_HELPER(void, glue(helper_palignr, SUFFIX), (Reg *d, Reg *s, int32_t shift)) | |
271 | + | |
254 | 272 | #undef SHIFT |
255 | 273 | #undef Reg |
256 | 274 | #undef SUFFIX | ... | ... |
target-i386/translate.c
... | ... | @@ -2770,6 +2770,9 @@ static void *sse_op_table1[256][4] = { |
2770 | 2770 | [0xc2] = SSE_FOP(cmpeq), |
2771 | 2771 | [0xc6] = { helper_shufps, helper_shufpd }, |
2772 | 2772 | |
2773 | + [0x38] = { SSE_SPECIAL, SSE_SPECIAL }, /* SSSE3 */ | |
2774 | + [0x3a] = { SSE_SPECIAL, SSE_SPECIAL }, /* SSSE3 */ | |
2775 | + | |
2773 | 2776 | /* MMX ops and their SSE extensions */ |
2774 | 2777 | [0x60] = MMX_OP2(punpcklbw), |
2775 | 2778 | [0x61] = MMX_OP2(punpcklwd), |
... | ... | @@ -2921,6 +2924,28 @@ static void *sse_op_table5[256] = { |
2921 | 2924 | [0xbf] = helper_pavgb_mmx /* pavgusb */ |
2922 | 2925 | }; |
2923 | 2926 | |
2927 | +static void *sse_op_table6[256][2] = { | |
2928 | + [0x00] = MMX_OP2(pshufb), | |
2929 | + [0x01] = MMX_OP2(phaddw), | |
2930 | + [0x02] = MMX_OP2(phaddd), | |
2931 | + [0x03] = MMX_OP2(phaddsw), | |
2932 | + [0x04] = MMX_OP2(pmaddubsw), | |
2933 | + [0x05] = MMX_OP2(phsubw), | |
2934 | + [0x06] = MMX_OP2(phsubd), | |
2935 | + [0x07] = MMX_OP2(phsubsw), | |
2936 | + [0x08] = MMX_OP2(psignb), | |
2937 | + [0x09] = MMX_OP2(psignw), | |
2938 | + [0x0a] = MMX_OP2(psignd), | |
2939 | + [0x0b] = MMX_OP2(pmulhrsw), | |
2940 | + [0x1c] = MMX_OP2(pabsb), | |
2941 | + [0x1d] = MMX_OP2(pabsw), | |
2942 | + [0x1e] = MMX_OP2(pabsd), | |
2943 | +}; | |
2944 | + | |
2945 | +static void *sse_op_table7[256][2] = { | |
2946 | + [0x0f] = MMX_OP2(palignr), | |
2947 | +}; | |
2948 | + | |
2924 | 2949 | static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) |
2925 | 2950 | { |
2926 | 2951 | int b1, op1_offset, op2_offset, is_xmm, val, ot; |
... | ... | @@ -2960,7 +2985,8 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) |
2960 | 2985 | return; |
2961 | 2986 | } |
2962 | 2987 | if (is_xmm && !(s->flags & HF_OSFXSR_MASK)) |
2963 | - goto illegal_op; | |
2988 | + if ((b != 0x38 && b != 0x3a) || (s->prefix & PREFIX_DATA)) | |
2989 | + goto illegal_op; | |
2964 | 2990 | if (b == 0x0e) { |
2965 | 2991 | if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) |
2966 | 2992 | goto illegal_op; |
... | ... | @@ -3482,6 +3508,84 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) |
3482 | 3508 | reg = ((modrm >> 3) & 7) | rex_r; |
3483 | 3509 | gen_op_mov_reg_T0(OT_LONG, reg); |
3484 | 3510 | break; |
3511 | + case 0x038: | |
3512 | + case 0x138: | |
3513 | + if (!(s->cpuid_ext_features & CPUID_EXT_SSSE3)) | |
3514 | + goto illegal_op; | |
3515 | + | |
3516 | + b = modrm; | |
3517 | + modrm = ldub_code(s->pc++); | |
3518 | + rm = modrm & 7; | |
3519 | + reg = ((modrm >> 3) & 7) | rex_r; | |
3520 | + mod = (modrm >> 6) & 3; | |
3521 | + | |
3522 | + sse_op2 = sse_op_table6[b][b1]; | |
3523 | + if (!sse_op2) | |
3524 | + goto illegal_op; | |
3525 | + | |
3526 | + if (b1) { | |
3527 | + op1_offset = offsetof(CPUX86State,xmm_regs[reg]); | |
3528 | + if (mod == 3) { | |
3529 | + op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]); | |
3530 | + } else { | |
3531 | + op2_offset = offsetof(CPUX86State,xmm_t0); | |
3532 | + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); | |
3533 | + gen_ldo_env_A0(s->mem_index, op2_offset); | |
3534 | + } | |
3535 | + } else { | |
3536 | + op1_offset = offsetof(CPUX86State,fpregs[reg].mmx); | |
3537 | + if (mod == 3) { | |
3538 | + op2_offset = offsetof(CPUX86State,fpregs[rm].mmx); | |
3539 | + } else { | |
3540 | + op2_offset = offsetof(CPUX86State,mmx_t0); | |
3541 | + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); | |
3542 | + gen_ldq_env_A0(s->mem_index, op2_offset); | |
3543 | + } | |
3544 | + } | |
3545 | + tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset); | |
3546 | + tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset); | |
3547 | + tcg_gen_helper_0_2(sse_op2, cpu_ptr0, cpu_ptr1); | |
3548 | + break; | |
3549 | + case 0x03a: | |
3550 | + case 0x13a: | |
3551 | + if (!(s->cpuid_ext_features & CPUID_EXT_SSSE3)) | |
3552 | + goto illegal_op; | |
3553 | + | |
3554 | + b = modrm; | |
3555 | + modrm = ldub_code(s->pc++); | |
3556 | + rm = modrm & 7; | |
3557 | + reg = ((modrm >> 3) & 7) | rex_r; | |
3558 | + mod = (modrm >> 6) & 3; | |
3559 | + | |
3560 | + sse_op2 = sse_op_table7[b][b1]; | |
3561 | + if (!sse_op2) | |
3562 | + goto illegal_op; | |
3563 | + | |
3564 | + if (b1) { | |
3565 | + op1_offset = offsetof(CPUX86State,xmm_regs[reg]); | |
3566 | + if (mod == 3) { | |
3567 | + op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]); | |
3568 | + } else { | |
3569 | + op2_offset = offsetof(CPUX86State,xmm_t0); | |
3570 | + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); | |
3571 | + gen_ldo_env_A0(s->mem_index, op2_offset); | |
3572 | + } | |
3573 | + } else { | |
3574 | + op1_offset = offsetof(CPUX86State,fpregs[reg].mmx); | |
3575 | + if (mod == 3) { | |
3576 | + op2_offset = offsetof(CPUX86State,fpregs[rm].mmx); | |
3577 | + } else { | |
3578 | + op2_offset = offsetof(CPUX86State,mmx_t0); | |
3579 | + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); | |
3580 | + gen_ldq_env_A0(s->mem_index, op2_offset); | |
3581 | + } | |
3582 | + } | |
3583 | + val = ldub_code(s->pc++); | |
3584 | + | |
3585 | + tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset); | |
3586 | + tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset); | |
3587 | + tcg_gen_helper_0_3(sse_op2, cpu_ptr0, cpu_ptr1, tcg_const_i32(val)); | |
3588 | + break; | |
3485 | 3589 | default: |
3486 | 3590 | goto illegal_op; |
3487 | 3591 | } |
... | ... | @@ -6987,7 +7091,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) |
6987 | 7091 | gen_eob(s); |
6988 | 7092 | } |
6989 | 7093 | break; |
6990 | - /* MMX/3DNow!/SSE/SSE2/SSE3 support */ | |
7094 | + /* MMX/3DNow!/SSE/SSE2/SSE3/SSSE3 support */ | |
6991 | 7095 | case 0x1c3: /* MOVNTI reg, mem */ |
6992 | 7096 | if (!(s->cpuid_features & CPUID_SSE2)) |
6993 | 7097 | goto illegal_op; |
... | ... | @@ -7100,6 +7204,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) |
7100 | 7204 | s->prefix &= ~(PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA); |
7101 | 7205 | case 0x110 ... 0x117: |
7102 | 7206 | case 0x128 ... 0x12f: |
7207 | + case 0x138 ... 0x13a: | |
7103 | 7208 | case 0x150 ... 0x177: |
7104 | 7209 | case 0x17c ... 0x17f: |
7105 | 7210 | case 0x1c2: | ... | ... |