Commit 465e983875be3d7c8cb8f53628e090f15417b4f7

Authored by bellard
1 parent b854608e

SSE3 support (Joachim Henke)


git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@1839 c046a42c-6fe2-441c-8c8c-71466251a162
target-i386/cpu.h
... ... @@ -265,7 +265,7 @@
265 265 #define CPUID_SSE (1 << 25)
266 266 #define CPUID_SSE2 (1 << 26)
267 267  
268   -#define CPUID_EXT_SS3 (1 << 0)
  268 +#define CPUID_EXT_SSE3 (1 << 0)
269 269 #define CPUID_EXT_MONITOR (1 << 3)
270 270 #define CPUID_EXT_CX16 (1 << 13)
271 271  
... ...
target-i386/exec.h
... ... @@ -260,6 +260,8 @@ static inline void stfl(target_ulong ptr, float v)
260 260 /* use long double functions */
261 261 #define floatx_to_int32 floatx80_to_int32
262 262 #define floatx_to_int64 floatx80_to_int64
  263 +#define floatx_to_int32_round_to_zero floatx80_to_int32_round_to_zero
  264 +#define floatx_to_int64_round_to_zero floatx80_to_int64_round_to_zero
263 265 #define floatx_abs floatx80_abs
264 266 #define floatx_chs floatx80_chs
265 267 #define floatx_round_to_int floatx80_round_to_int
... ... @@ -278,6 +280,8 @@ static inline void stfl(target_ulong ptr, float v)
278 280 #else
279 281 #define floatx_to_int32 float64_to_int32
280 282 #define floatx_to_int64 float64_to_int64
  283 +#define floatx_to_int32_round_to_zero float64_to_int32_round_to_zero
  284 +#define floatx_to_int64_round_to_zero float64_to_int64_round_to_zero
281 285 #define floatx_abs float64_abs
282 286 #define floatx_chs float64_chs
283 287 #define floatx_round_to_int float64_round_to_int
... ...
target-i386/helper2.c
... ... @@ -108,7 +108,7 @@ CPUX86State *cpu_x86_init(void)
108 108 CPUID_CX8 | CPUID_PGE | CPUID_CMOV |
109 109 CPUID_PAT);
110 110 env->pat = 0x0007040600070406ULL;
111   - env->cpuid_ext_features = 0;
  111 + env->cpuid_ext_features = CPUID_EXT_SSE3;
112 112 env->cpuid_features |= CPUID_FXSR | CPUID_MMX | CPUID_SSE | CPUID_SSE2 | CPUID_PAE | CPUID_SEP;
113 113 env->cpuid_xlevel = 0;
114 114 {
... ...
target-i386/op.c
... ... @@ -1911,6 +1911,53 @@ void OPPROTO op_fistll_ST0_A0(void)
1911 1911 FORCE_RET();
1912 1912 }
1913 1913  
  1914 +void OPPROTO op_fistt_ST0_A0(void)
  1915 +{
  1916 +#if defined(__sparc__) && !defined(__sparc_v9__)
  1917 + register CPU86_LDouble d asm("o0");
  1918 +#else
  1919 + CPU86_LDouble d;
  1920 +#endif
  1921 + int val;
  1922 +
  1923 + d = ST0;
  1924 + val = floatx_to_int32_round_to_zero(d, &env->fp_status);
  1925 + if (val != (int16_t)val)
  1926 + val = -32768;
  1927 + stw(A0, val);
  1928 + FORCE_RET();
  1929 +}
  1930 +
  1931 +void OPPROTO op_fisttl_ST0_A0(void)
  1932 +{
  1933 +#if defined(__sparc__) && !defined(__sparc_v9__)
  1934 + register CPU86_LDouble d asm("o0");
  1935 +#else
  1936 + CPU86_LDouble d;
  1937 +#endif
  1938 + int val;
  1939 +
  1940 + d = ST0;
  1941 + val = floatx_to_int32_round_to_zero(d, &env->fp_status);
  1942 + stl(A0, val);
  1943 + FORCE_RET();
  1944 +}
  1945 +
  1946 +void OPPROTO op_fisttll_ST0_A0(void)
  1947 +{
  1948 +#if defined(__sparc__) && !defined(__sparc_v9__)
  1949 + register CPU86_LDouble d asm("o0");
  1950 +#else
  1951 + CPU86_LDouble d;
  1952 +#endif
  1953 + int64_t val;
  1954 +
  1955 + d = ST0;
  1956 + val = floatx_to_int64_round_to_zero(d, &env->fp_status);
  1957 + stq(A0, val);
  1958 + FORCE_RET();
  1959 +}
  1960 +
1914 1961 void OPPROTO op_fbld_ST0_A0(void)
1915 1962 {
1916 1963 helper_fbld_ST0_A0();
... ...
target-i386/translate.c
... ... @@ -2334,7 +2334,7 @@ static GenOpFunc2 *sse_op_table1[256][4] = {
2334 2334 /* pure SSE operations */
2335 2335 [0x10] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2336 2336 [0x11] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2337   - [0x12] = { SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd */
  2337 + [0x12] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd, movsldup, movddup */
2338 2338 [0x13] = { SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd */
2339 2339 [0x14] = { gen_op_punpckldq_xmm, gen_op_punpcklqdq_xmm },
2340 2340 [0x15] = { gen_op_punpckhdq_xmm, gen_op_punpckhqdq_xmm },
... ... @@ -2436,7 +2436,7 @@ static GenOpFunc2 *sse_op_table1[256][4] = {
2436 2436 [0xed] = MMX_OP2(paddsw),
2437 2437 [0xee] = MMX_OP2(pmaxsw),
2438 2438 [0xef] = MMX_OP2(pxor),
2439   - [0xf0] = { NULL, NULL, NULL, SSE_SPECIAL }, /* lddqu (PNI) */
  2439 + [0xf0] = { NULL, NULL, NULL, SSE_SPECIAL }, /* lddqu */
2440 2440 [0xf1] = MMX_OP2(psllw),
2441 2441 [0xf2] = MMX_OP2(pslld),
2442 2442 [0xf3] = MMX_OP2(psllq),
... ... @@ -2563,8 +2563,8 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
2563 2563 case 0x1e7: /* movntdq */
2564 2564 case 0x02b: /* movntps */
2565 2565 case 0x12b: /* movntps */
2566   - case 0x2f0: /* lddqu */
2567   - if (mod == 3)
  2566 + case 0x3f0: /* lddqu */
  2567 + if (mod == 3)
2568 2568 goto illegal_op;
2569 2569 gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
2570 2570 gen_sto_env_A0[s->mem_index >> 2](offsetof(CPUX86State,xmm_regs[reg]));
... ... @@ -2642,6 +2642,34 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
2642 2642 offsetof(CPUX86State,xmm_regs[rm].XMM_Q(1)));
2643 2643 }
2644 2644 break;
  2645 + case 0x212: /* movsldup */
  2646 + if (mod != 3) {
  2647 + gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
  2648 + gen_ldo_env_A0[s->mem_index >> 2](offsetof(CPUX86State,xmm_regs[reg]));
  2649 + } else {
  2650 + rm = (modrm & 7) | REX_B(s);
  2651 + gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)),
  2652 + offsetof(CPUX86State,xmm_regs[rm].XMM_L(0)));
  2653 + gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)),
  2654 + offsetof(CPUX86State,xmm_regs[rm].XMM_L(2)));
  2655 + }
  2656 + gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(1)),
  2657 + offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
  2658 + gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)),
  2659 + offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)));
  2660 + break;
  2661 + case 0x312: /* movddup */
  2662 + if (mod != 3) {
  2663 + gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
  2664 + gen_ldq_env_A0[s->mem_index >> 2](offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
  2665 + } else {
  2666 + rm = (modrm & 7) | REX_B(s);
  2667 + gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
  2668 + offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
  2669 + }
  2670 + gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)),
  2671 + offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
  2672 + break;
2645 2673 case 0x016: /* movhps */
2646 2674 case 0x116: /* movhpd */
2647 2675 if (mod != 3) {
... ... @@ -4278,16 +4306,9 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
4278 4306 case 0x08: /* flds */
4279 4307 case 0x0a: /* fsts */
4280 4308 case 0x0b: /* fstps */
4281   - case 0x18: /* fildl */
4282   - case 0x1a: /* fistl */
4283   - case 0x1b: /* fistpl */
4284   - case 0x28: /* fldl */
4285   - case 0x2a: /* fstl */
4286   - case 0x2b: /* fstpl */
4287   - case 0x38: /* filds */
4288   - case 0x3a: /* fists */
4289   - case 0x3b: /* fistps */
4290   -
  4309 + case 0x18 ... 0x1b: /* fildl, fisttpl, fistl, fistpl */
  4310 + case 0x28 ... 0x2b: /* fldl, fisttpll, fstl, fstpl */
  4311 + case 0x38 ... 0x3b: /* filds, fisttps, fists, fistps */
4291 4312 switch(op & 7) {
4292 4313 case 0:
4293 4314 switch(op >> 4) {
... ... @@ -4306,6 +4327,20 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
4306 4327 break;
4307 4328 }
4308 4329 break;
  4330 + case 1:
  4331 + switch(op >> 4) {
  4332 + case 1:
  4333 + gen_op_fisttl_ST0_A0();
  4334 + break;
  4335 + case 2:
  4336 + gen_op_fisttll_ST0_A0();
  4337 + break;
  4338 + case 3:
  4339 + default:
  4340 + gen_op_fistt_ST0_A0();
  4341 + }
  4342 + gen_op_fpop();
  4343 + break;
4309 4344 default:
4310 4345 switch(op >> 4) {
4311 4346 case 0:
... ...