Commit 465e983875be3d7c8cb8f53628e090f15417b4f7

Authored by bellard
1 parent b854608e

SSE3 support (Joachim Henke)


git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@1839 c046a42c-6fe2-441c-8c8c-71466251a162
target-i386/cpu.h
@@ -265,7 +265,7 @@ @@ -265,7 +265,7 @@
265 #define CPUID_SSE (1 << 25) 265 #define CPUID_SSE (1 << 25)
266 #define CPUID_SSE2 (1 << 26) 266 #define CPUID_SSE2 (1 << 26)
267 267
268 -#define CPUID_EXT_SS3 (1 << 0) 268 +#define CPUID_EXT_SSE3 (1 << 0)
269 #define CPUID_EXT_MONITOR (1 << 3) 269 #define CPUID_EXT_MONITOR (1 << 3)
270 #define CPUID_EXT_CX16 (1 << 13) 270 #define CPUID_EXT_CX16 (1 << 13)
271 271
target-i386/exec.h
@@ -260,6 +260,8 @@ static inline void stfl(target_ulong ptr, float v) @@ -260,6 +260,8 @@ static inline void stfl(target_ulong ptr, float v)
260 /* use long double functions */ 260 /* use long double functions */
261 #define floatx_to_int32 floatx80_to_int32 261 #define floatx_to_int32 floatx80_to_int32
262 #define floatx_to_int64 floatx80_to_int64 262 #define floatx_to_int64 floatx80_to_int64
  263 +#define floatx_to_int32_round_to_zero floatx80_to_int32_round_to_zero
  264 +#define floatx_to_int64_round_to_zero floatx80_to_int64_round_to_zero
263 #define floatx_abs floatx80_abs 265 #define floatx_abs floatx80_abs
264 #define floatx_chs floatx80_chs 266 #define floatx_chs floatx80_chs
265 #define floatx_round_to_int floatx80_round_to_int 267 #define floatx_round_to_int floatx80_round_to_int
@@ -278,6 +280,8 @@ static inline void stfl(target_ulong ptr, float v) @@ -278,6 +280,8 @@ static inline void stfl(target_ulong ptr, float v)
278 #else 280 #else
279 #define floatx_to_int32 float64_to_int32 281 #define floatx_to_int32 float64_to_int32
280 #define floatx_to_int64 float64_to_int64 282 #define floatx_to_int64 float64_to_int64
  283 +#define floatx_to_int32_round_to_zero float64_to_int32_round_to_zero
  284 +#define floatx_to_int64_round_to_zero float64_to_int64_round_to_zero
281 #define floatx_abs float64_abs 285 #define floatx_abs float64_abs
282 #define floatx_chs float64_chs 286 #define floatx_chs float64_chs
283 #define floatx_round_to_int float64_round_to_int 287 #define floatx_round_to_int float64_round_to_int
target-i386/helper2.c
@@ -108,7 +108,7 @@ CPUX86State *cpu_x86_init(void) @@ -108,7 +108,7 @@ CPUX86State *cpu_x86_init(void)
108 CPUID_CX8 | CPUID_PGE | CPUID_CMOV | 108 CPUID_CX8 | CPUID_PGE | CPUID_CMOV |
109 CPUID_PAT); 109 CPUID_PAT);
110 env->pat = 0x0007040600070406ULL; 110 env->pat = 0x0007040600070406ULL;
111 - env->cpuid_ext_features = 0; 111 + env->cpuid_ext_features = CPUID_EXT_SSE3;
112 env->cpuid_features |= CPUID_FXSR | CPUID_MMX | CPUID_SSE | CPUID_SSE2 | CPUID_PAE | CPUID_SEP; 112 env->cpuid_features |= CPUID_FXSR | CPUID_MMX | CPUID_SSE | CPUID_SSE2 | CPUID_PAE | CPUID_SEP;
113 env->cpuid_xlevel = 0; 113 env->cpuid_xlevel = 0;
114 { 114 {
target-i386/op.c
@@ -1911,6 +1911,53 @@ void OPPROTO op_fistll_ST0_A0(void) @@ -1911,6 +1911,53 @@ void OPPROTO op_fistll_ST0_A0(void)
1911 FORCE_RET(); 1911 FORCE_RET();
1912 } 1912 }
1913 1913
  1914 +void OPPROTO op_fistt_ST0_A0(void)
  1915 +{
  1916 +#if defined(__sparc__) && !defined(__sparc_v9__)
  1917 + register CPU86_LDouble d asm("o0");
  1918 +#else
  1919 + CPU86_LDouble d;
  1920 +#endif
  1921 + int val;
  1922 +
  1923 + d = ST0;
  1924 + val = floatx_to_int32_round_to_zero(d, &env->fp_status);
  1925 + if (val != (int16_t)val)
  1926 + val = -32768;
  1927 + stw(A0, val);
  1928 + FORCE_RET();
  1929 +}
  1930 +
  1931 +void OPPROTO op_fisttl_ST0_A0(void)
  1932 +{
  1933 +#if defined(__sparc__) && !defined(__sparc_v9__)
  1934 + register CPU86_LDouble d asm("o0");
  1935 +#else
  1936 + CPU86_LDouble d;
  1937 +#endif
  1938 + int val;
  1939 +
  1940 + d = ST0;
  1941 + val = floatx_to_int32_round_to_zero(d, &env->fp_status);
  1942 + stl(A0, val);
  1943 + FORCE_RET();
  1944 +}
  1945 +
  1946 +void OPPROTO op_fisttll_ST0_A0(void)
  1947 +{
  1948 +#if defined(__sparc__) && !defined(__sparc_v9__)
  1949 + register CPU86_LDouble d asm("o0");
  1950 +#else
  1951 + CPU86_LDouble d;
  1952 +#endif
  1953 + int64_t val;
  1954 +
  1955 + d = ST0;
  1956 + val = floatx_to_int64_round_to_zero(d, &env->fp_status);
  1957 + stq(A0, val);
  1958 + FORCE_RET();
  1959 +}
  1960 +
1914 void OPPROTO op_fbld_ST0_A0(void) 1961 void OPPROTO op_fbld_ST0_A0(void)
1915 { 1962 {
1916 helper_fbld_ST0_A0(); 1963 helper_fbld_ST0_A0();
target-i386/translate.c
@@ -2334,7 +2334,7 @@ static GenOpFunc2 *sse_op_table1[256][4] = { @@ -2334,7 +2334,7 @@ static GenOpFunc2 *sse_op_table1[256][4] = {
2334 /* pure SSE operations */ 2334 /* pure SSE operations */
2335 [0x10] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */ 2335 [0x10] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2336 [0x11] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */ 2336 [0x11] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2337 - [0x12] = { SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd */ 2337 + [0x12] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd, movsldup, movddup */
2338 [0x13] = { SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd */ 2338 [0x13] = { SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd */
2339 [0x14] = { gen_op_punpckldq_xmm, gen_op_punpcklqdq_xmm }, 2339 [0x14] = { gen_op_punpckldq_xmm, gen_op_punpcklqdq_xmm },
2340 [0x15] = { gen_op_punpckhdq_xmm, gen_op_punpckhqdq_xmm }, 2340 [0x15] = { gen_op_punpckhdq_xmm, gen_op_punpckhqdq_xmm },
@@ -2436,7 +2436,7 @@ static GenOpFunc2 *sse_op_table1[256][4] = { @@ -2436,7 +2436,7 @@ static GenOpFunc2 *sse_op_table1[256][4] = {
2436 [0xed] = MMX_OP2(paddsw), 2436 [0xed] = MMX_OP2(paddsw),
2437 [0xee] = MMX_OP2(pmaxsw), 2437 [0xee] = MMX_OP2(pmaxsw),
2438 [0xef] = MMX_OP2(pxor), 2438 [0xef] = MMX_OP2(pxor),
2439 - [0xf0] = { NULL, NULL, NULL, SSE_SPECIAL }, /* lddqu (PNI) */ 2439 + [0xf0] = { NULL, NULL, NULL, SSE_SPECIAL }, /* lddqu */
2440 [0xf1] = MMX_OP2(psllw), 2440 [0xf1] = MMX_OP2(psllw),
2441 [0xf2] = MMX_OP2(pslld), 2441 [0xf2] = MMX_OP2(pslld),
2442 [0xf3] = MMX_OP2(psllq), 2442 [0xf3] = MMX_OP2(psllq),
@@ -2563,8 +2563,8 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) @@ -2563,8 +2563,8 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
2563 case 0x1e7: /* movntdq */ 2563 case 0x1e7: /* movntdq */
2564 case 0x02b: /* movntps */ 2564 case 0x02b: /* movntps */
2565 case 0x12b: /* movntps */ 2565 case 0x12b: /* movntps */
2566 - case 0x2f0: /* lddqu */  
2567 - if (mod == 3) 2566 + case 0x3f0: /* lddqu */
  2567 + if (mod == 3)
2568 goto illegal_op; 2568 goto illegal_op;
2569 gen_lea_modrm(s, modrm, &reg_addr, &offset_addr); 2569 gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
2570 gen_sto_env_A0[s->mem_index >> 2](offsetof(CPUX86State,xmm_regs[reg])); 2570 gen_sto_env_A0[s->mem_index >> 2](offsetof(CPUX86State,xmm_regs[reg]));
@@ -2642,6 +2642,34 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) @@ -2642,6 +2642,34 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
2642 offsetof(CPUX86State,xmm_regs[rm].XMM_Q(1))); 2642 offsetof(CPUX86State,xmm_regs[rm].XMM_Q(1)));
2643 } 2643 }
2644 break; 2644 break;
  2645 + case 0x212: /* movsldup */
  2646 + if (mod != 3) {
  2647 + gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
  2648 + gen_ldo_env_A0[s->mem_index >> 2](offsetof(CPUX86State,xmm_regs[reg]));
  2649 + } else {
  2650 + rm = (modrm & 7) | REX_B(s);
  2651 + gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)),
  2652 + offsetof(CPUX86State,xmm_regs[rm].XMM_L(0)));
  2653 + gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)),
  2654 + offsetof(CPUX86State,xmm_regs[rm].XMM_L(2)));
  2655 + }
  2656 + gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(1)),
  2657 + offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
  2658 + gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)),
  2659 + offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)));
  2660 + break;
  2661 + case 0x312: /* movddup */
  2662 + if (mod != 3) {
  2663 + gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
  2664 + gen_ldq_env_A0[s->mem_index >> 2](offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
  2665 + } else {
  2666 + rm = (modrm & 7) | REX_B(s);
  2667 + gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
  2668 + offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
  2669 + }
  2670 + gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)),
  2671 + offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
  2672 + break;
2645 case 0x016: /* movhps */ 2673 case 0x016: /* movhps */
2646 case 0x116: /* movhpd */ 2674 case 0x116: /* movhpd */
2647 if (mod != 3) { 2675 if (mod != 3) {
@@ -4278,16 +4306,9 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) @@ -4278,16 +4306,9 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
4278 case 0x08: /* flds */ 4306 case 0x08: /* flds */
4279 case 0x0a: /* fsts */ 4307 case 0x0a: /* fsts */
4280 case 0x0b: /* fstps */ 4308 case 0x0b: /* fstps */
4281 - case 0x18: /* fildl */  
4282 - case 0x1a: /* fistl */  
4283 - case 0x1b: /* fistpl */  
4284 - case 0x28: /* fldl */  
4285 - case 0x2a: /* fstl */  
4286 - case 0x2b: /* fstpl */  
4287 - case 0x38: /* filds */  
4288 - case 0x3a: /* fists */  
4289 - case 0x3b: /* fistps */  
4290 - 4309 + case 0x18 ... 0x1b: /* fildl, fisttpl, fistl, fistpl */
  4310 + case 0x28 ... 0x2b: /* fldl, fisttpll, fstl, fstpl */
  4311 + case 0x38 ... 0x3b: /* filds, fisttps, fists, fistps */
4291 switch(op & 7) { 4312 switch(op & 7) {
4292 case 0: 4313 case 0:
4293 switch(op >> 4) { 4314 switch(op >> 4) {
@@ -4306,6 +4327,20 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) @@ -4306,6 +4327,20 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
4306 break; 4327 break;
4307 } 4328 }
4308 break; 4329 break;
  4330 + case 1:
  4331 + switch(op >> 4) {
  4332 + case 1:
  4333 + gen_op_fisttl_ST0_A0();
  4334 + break;
  4335 + case 2:
  4336 + gen_op_fisttll_ST0_A0();
  4337 + break;
  4338 + case 3:
  4339 + default:
  4340 + gen_op_fistt_ST0_A0();
  4341 + }
  4342 + gen_op_fpop();
  4343 + break;
4309 default: 4344 default:
4310 switch(op >> 4) { 4345 switch(op >> 4) {
4311 case 0: 4346 case 0: