Commit 664e0f195adda3cf01b40d8d1aa79bbc24ad5fab
1 parent
085339a1
MMX/SSE support
git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@1205 c046a42c-6fe2-441c-8c8c-71466251a162
Showing
12 changed files
with
2402 additions
and
171 deletions
Changelog
Makefile.target
| ... | ... | @@ -392,7 +392,7 @@ helper.o: helper.c |
| 392 | 392 | $(CC) $(HELPER_CFLAGS) $(DEFINES) -c -o $@ $< |
| 393 | 393 | |
| 394 | 394 | ifeq ($(TARGET_BASE_ARCH), i386) |
| 395 | -op.o: op.c opreg_template.h ops_template.h ops_template_mem.h ops_mem.h | |
| 395 | +op.o: op.c opreg_template.h ops_template.h ops_template_mem.h ops_mem.h ops_sse.h | |
| 396 | 396 | endif |
| 397 | 397 | |
| 398 | 398 | ifeq ($(TARGET_ARCH), arm) | ... | ... |
linux-user/main.c
| ... | ... | @@ -1052,8 +1052,8 @@ int main(int argc, char **argv) |
| 1052 | 1052 | cpu_x86_set_cpl(env, 3); |
| 1053 | 1053 | |
| 1054 | 1054 | env->cr[0] = CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK; |
| 1055 | - env->hflags |= HF_PE_MASK; | |
| 1056 | - | |
| 1055 | + env->hflags |= HF_PE_MASK | HF_OSFXSR_MASK; | |
| 1056 | + | |
| 1057 | 1057 | /* flags setup : we activate the IRQs by default as in user mode */ |
| 1058 | 1058 | env->eflags |= IF_MASK; |
| 1059 | 1059 | ... | ... |
target-i386/cpu.h
| ... | ... | @@ -135,6 +135,7 @@ |
| 135 | 135 | #define HF_IOPL_SHIFT 12 /* must be same as eflags */ |
| 136 | 136 | #define HF_LMA_SHIFT 14 /* only used on x86_64: long mode active */ |
| 137 | 137 | #define HF_CS64_SHIFT 15 /* only used on x86_64: 64 bit code segment */ |
| 138 | +#define HF_OSFXSR_SHIFT 16 /* CR4.OSFXSR */ | |
| 138 | 139 | #define HF_VM_SHIFT 17 /* must be same as eflags */ |
| 139 | 140 | |
| 140 | 141 | #define HF_CPL_MASK (3 << HF_CPL_SHIFT) |
| ... | ... | @@ -150,6 +151,7 @@ |
| 150 | 151 | #define HF_TS_MASK (1 << HF_TS_SHIFT) |
| 151 | 152 | #define HF_LMA_MASK (1 << HF_LMA_SHIFT) |
| 152 | 153 | #define HF_CS64_MASK (1 << HF_CS64_SHIFT) |
| 154 | +#define HF_OSFXSR_MASK (1 << HF_OSFXSR_SHIFT) | |
| 153 | 155 | |
| 154 | 156 | #define CR0_PE_MASK (1 << 0) |
| 155 | 157 | #define CR0_MP_MASK (1 << 1) |
| ... | ... | @@ -340,10 +342,12 @@ typedef struct SegmentCache { |
| 340 | 342 | } SegmentCache; |
| 341 | 343 | |
| 342 | 344 | typedef union { |
| 343 | - uint8_t _b[16]; | |
| 344 | - uint16_t _w[8]; | |
| 345 | - uint32_t _l[4]; | |
| 346 | - uint64_t _q[2]; | |
| 345 | + uint8_t _b[16]; | |
| 346 | + uint16_t _w[8]; | |
| 347 | + uint32_t _l[4]; | |
| 348 | + uint64_t _q[2]; | |
| 349 | + float _s[4]; | |
| 350 | + double _d[2]; | |
| 347 | 351 | } XMMReg; |
| 348 | 352 | |
| 349 | 353 | typedef union { |
| ... | ... | @@ -357,7 +361,9 @@ typedef union { |
| 357 | 361 | #define XMM_B(n) _b[15 - (n)] |
| 358 | 362 | #define XMM_W(n) _w[7 - (n)] |
| 359 | 363 | #define XMM_L(n) _l[3 - (n)] |
| 364 | +#define XMM_S(n) _s[3 - (n)] | |
| 360 | 365 | #define XMM_Q(n) _q[1 - (n)] |
| 366 | +#define XMM_D(n) _d[1 - (n)] | |
| 361 | 367 | |
| 362 | 368 | #define MMX_B(n) _b[7 - (n)] |
| 363 | 369 | #define MMX_W(n) _w[3 - (n)] |
| ... | ... | @@ -366,12 +372,15 @@ typedef union { |
| 366 | 372 | #define XMM_B(n) _b[n] |
| 367 | 373 | #define XMM_W(n) _w[n] |
| 368 | 374 | #define XMM_L(n) _l[n] |
| 375 | +#define XMM_S(n) _s[n] | |
| 369 | 376 | #define XMM_Q(n) _q[n] |
| 377 | +#define XMM_D(n) _d[n] | |
| 370 | 378 | |
| 371 | 379 | #define MMX_B(n) _b[n] |
| 372 | 380 | #define MMX_W(n) _w[n] |
| 373 | 381 | #define MMX_L(n) _l[n] |
| 374 | 382 | #endif |
| 383 | +#define MMX_Q(n) q | |
| 375 | 384 | |
| 376 | 385 | #ifdef TARGET_X86_64 |
| 377 | 386 | #define CPU_NB_REGS 16 |
| ... | ... | @@ -404,7 +413,14 @@ typedef struct CPUX86State { |
| 404 | 413 | unsigned int fpus; |
| 405 | 414 | unsigned int fpuc; |
| 406 | 415 | uint8_t fptags[8]; /* 0 = valid, 1 = empty */ |
| 407 | - CPU86_LDouble fpregs[8]; | |
| 416 | + union { | |
| 417 | +#ifdef USE_X86LDOUBLE | |
| 418 | + CPU86_LDouble d __attribute__((aligned(16))); | |
| 419 | +#else | |
| 420 | + CPU86_LDouble d; | |
| 421 | +#endif | |
| 422 | + MMXReg mmx; | |
| 423 | + } fpregs[8]; | |
| 408 | 424 | |
| 409 | 425 | /* emulator internal variables */ |
| 410 | 426 | CPU86_LDouble ft0; |
| ... | ... | @@ -421,9 +437,11 @@ typedef struct CPUX86State { |
| 421 | 437 | SegmentCache tr; |
| 422 | 438 | SegmentCache gdt; /* only base and limit are used */ |
| 423 | 439 | SegmentCache idt; /* only base and limit are used */ |
| 424 | - | |
| 440 | + | |
| 441 | + uint32_t mxcsr; | |
| 425 | 442 | XMMReg xmm_regs[CPU_NB_REGS]; |
| 426 | 443 | XMMReg xmm_t0; |
| 444 | + MMXReg mmx_t0; | |
| 427 | 445 | |
| 428 | 446 | /* sysenter registers */ |
| 429 | 447 | uint32_t sysenter_cs; | ... | ... |
target-i386/exec.h
| ... | ... | @@ -131,8 +131,8 @@ extern int loglevel; |
| 131 | 131 | |
| 132 | 132 | /* float macros */ |
| 133 | 133 | #define FT0 (env->ft0) |
| 134 | -#define ST0 (env->fpregs[env->fpstt]) | |
| 135 | -#define ST(n) (env->fpregs[(env->fpstt + (n)) & 7]) | |
| 134 | +#define ST0 (env->fpregs[env->fpstt].d) | |
| 135 | +#define ST(n) (env->fpregs[(env->fpstt + (n)) & 7].d) | |
| 136 | 136 | #define ST1 ST(1) |
| 137 | 137 | |
| 138 | 138 | #ifdef USE_FP_CONVERT |
| ... | ... | @@ -459,7 +459,7 @@ static inline CPU86_LDouble helper_fldt(target_ulong ptr) |
| 459 | 459 | return temp.d; |
| 460 | 460 | } |
| 461 | 461 | |
| 462 | -static inline void helper_fstt(CPU86_LDouble f, uint8_t *ptr) | |
| 462 | +static inline void helper_fstt(CPU86_LDouble f, target_ulong ptr) | |
| 463 | 463 | { |
| 464 | 464 | CPU86_LDoubleU temp; |
| 465 | 465 | int e; |
| ... | ... | @@ -557,6 +557,9 @@ void helper_fxsave(target_ulong ptr, int data64); |
| 557 | 557 | void helper_fxrstor(target_ulong ptr, int data64); |
| 558 | 558 | void restore_native_fp_state(CPUState *env); |
| 559 | 559 | void save_native_fp_state(CPUState *env); |
| 560 | +float approx_rsqrt(float a); | |
| 561 | +float approx_rcp(float a); | |
| 562 | +int fpu_isnan(double a); | |
| 560 | 563 | |
| 561 | 564 | extern const uint8_t parity_table[256]; |
| 562 | 565 | extern const uint8_t rclw_table[32]; | ... | ... |
target-i386/helper.c
| ... | ... | @@ -2444,7 +2444,7 @@ void helper_fldt_ST0_A0(void) |
| 2444 | 2444 | { |
| 2445 | 2445 | int new_fpstt; |
| 2446 | 2446 | new_fpstt = (env->fpstt - 1) & 7; |
| 2447 | - env->fpregs[new_fpstt] = helper_fldt(A0); | |
| 2447 | + env->fpregs[new_fpstt].d = helper_fldt(A0); | |
| 2448 | 2448 | env->fpstt = new_fpstt; |
| 2449 | 2449 | env->fptags[new_fpstt] = 0; /* validate stack entry */ |
| 2450 | 2450 | } |
| ... | ... | @@ -2804,9 +2804,10 @@ void helper_fstenv(target_ulong ptr, int data32) |
| 2804 | 2804 | if (env->fptags[i]) { |
| 2805 | 2805 | fptag |= 3; |
| 2806 | 2806 | } else { |
| 2807 | - tmp.d = env->fpregs[i]; | |
| 2807 | + tmp.d = env->fpregs[i].d; | |
| 2808 | 2808 | exp = EXPD(tmp); |
| 2809 | 2809 | mant = MANTD(tmp); |
| 2810 | + printf("mant=%llx exp=%x\n", mant, exp); | |
| 2810 | 2811 | if (exp == 0 && mant == 0) { |
| 2811 | 2812 | /* zero */ |
| 2812 | 2813 | fptag |= 1; |
| ... | ... | @@ -2930,7 +2931,7 @@ void helper_fxsave(target_ulong ptr, int data64) |
| 2930 | 2931 | |
| 2931 | 2932 | if (env->cr[4] & CR4_OSFXSR_MASK) { |
| 2932 | 2933 | /* XXX: finish it */ |
| 2933 | - stl(ptr + 0x18, 0); /* mxcsr */ | |
| 2934 | + stl(ptr + 0x18, env->mxcsr); /* mxcsr */ | |
| 2934 | 2935 | stl(ptr + 0x1c, 0); /* mxcsr_mask */ |
| 2935 | 2936 | nb_xmm_regs = 8 << data64; |
| 2936 | 2937 | addr = ptr + 0xa0; |
| ... | ... | @@ -2967,7 +2968,7 @@ void helper_fxrstor(target_ulong ptr, int data64) |
| 2967 | 2968 | |
| 2968 | 2969 | if (env->cr[4] & CR4_OSFXSR_MASK) { |
| 2969 | 2970 | /* XXX: finish it, endianness */ |
| 2970 | - //ldl(ptr + 0x18); | |
| 2971 | + env->mxcsr = ldl(ptr + 0x18); | |
| 2971 | 2972 | //ldl(ptr + 0x1c); |
| 2972 | 2973 | nb_xmm_regs = 8 << data64; |
| 2973 | 2974 | addr = ptr + 0xa0; |
| ... | ... | @@ -3209,6 +3210,23 @@ void helper_idivq_EAX_T0(void) |
| 3209 | 3210 | |
| 3210 | 3211 | #endif |
| 3211 | 3212 | |
| 3213 | +/* XXX: do it */ | |
| 3214 | +int fpu_isnan(double a) | |
| 3215 | +{ | |
| 3216 | + return 0; | |
| 3217 | +} | |
| 3218 | + | |
| 3219 | +float approx_rsqrt(float a) | |
| 3220 | +{ | |
| 3221 | + return 1.0 / sqrt(a); | |
| 3222 | +} | |
| 3223 | + | |
| 3224 | +float approx_rcp(float a) | |
| 3225 | +{ | |
| 3226 | + return 1.0 / a; | |
| 3227 | +} | |
| 3228 | + | |
| 3229 | + | |
| 3212 | 3230 | #if !defined(CONFIG_USER_ONLY) |
| 3213 | 3231 | |
| 3214 | 3232 | #define MMUSUFFIX _mmu | ... | ... |
target-i386/helper2.c
| ... | ... | @@ -158,6 +158,8 @@ void cpu_reset(CPUX86State *env) |
| 158 | 158 | for(i = 0;i < 8; i++) |
| 159 | 159 | env->fptags[i] = 1; |
| 160 | 160 | env->fpuc = 0x37f; |
| 161 | + | |
| 162 | + env->mxcsr = 0x1f80; | |
| 161 | 163 | } |
| 162 | 164 | |
| 163 | 165 | void cpu_x86_close(CPUX86State *env) |
| ... | ... | @@ -376,15 +378,15 @@ void cpu_dump_state(CPUState *env, FILE *f, |
| 376 | 378 | } |
| 377 | 379 | if (flags & X86_DUMP_FPU) { |
| 378 | 380 | cpu_fprintf(f, "ST0=%f ST1=%f ST2=%f ST3=%f\n", |
| 379 | - (double)env->fpregs[0], | |
| 380 | - (double)env->fpregs[1], | |
| 381 | - (double)env->fpregs[2], | |
| 382 | - (double)env->fpregs[3]); | |
| 381 | + (double)env->fpregs[0].d, | |
| 382 | + (double)env->fpregs[1].d, | |
| 383 | + (double)env->fpregs[2].d, | |
| 384 | + (double)env->fpregs[3].d); | |
| 383 | 385 | cpu_fprintf(f, "ST4=%f ST5=%f ST6=%f ST7=%f\n", |
| 384 | - (double)env->fpregs[4], | |
| 385 | - (double)env->fpregs[5], | |
| 386 | - (double)env->fpregs[7], | |
| 387 | - (double)env->fpregs[8]); | |
| 386 | + (double)env->fpregs[4].d, | |
| 387 | + (double)env->fpregs[5].d, | |
| 388 | + (double)env->fpregs[7].d, | |
| 389 | + (double)env->fpregs[8].d); | |
| 388 | 390 | } |
| 389 | 391 | } |
| 390 | 392 | |
| ... | ... | @@ -471,6 +473,14 @@ void cpu_x86_update_cr4(CPUX86State *env, uint32_t new_cr4) |
| 471 | 473 | (env->cr[4] & (CR4_PGE_MASK | CR4_PAE_MASK | CR4_PSE_MASK))) { |
| 472 | 474 | tlb_flush(env, 1); |
| 473 | 475 | } |
| 476 | + /* SSE handling */ | |
| 477 | + if (!(env->cpuid_features & CPUID_SSE)) | |
| 478 | + new_cr4 &= ~CR4_OSFXSR_MASK; | |
| 479 | + if (new_cr4 & CR4_OSFXSR_MASK) | |
| 480 | + env->hflags |= HF_OSFXSR_MASK; | |
| 481 | + else | |
| 482 | + env->hflags &= ~HF_OSFXSR_MASK; | |
| 483 | + | |
| 474 | 484 | env->cr[4] = new_cr4; |
| 475 | 485 | } |
| 476 | 486 | |
| ... | ... | @@ -800,7 +810,7 @@ void restore_native_fp_state(CPUState *env) |
| 800 | 810 | fp->fptag = fptag; |
| 801 | 811 | j = env->fpstt; |
| 802 | 812 | for(i = 0;i < 8; i++) { |
| 803 | - memcpy(&fp->fpregs1[i * 10], &env->fpregs[j], 10); | |
| 813 | + memcpy(&fp->fpregs1[i * 10], &env->fpregs[j].d, 10); | |
| 804 | 814 | j = (j + 1) & 7; |
| 805 | 815 | } |
| 806 | 816 | asm volatile ("frstor %0" : "=m" (*fp)); |
| ... | ... | @@ -824,7 +834,7 @@ void save_native_fp_state(CPUState *env) |
| 824 | 834 | } |
| 825 | 835 | j = env->fpstt; |
| 826 | 836 | for(i = 0;i < 8; i++) { |
| 827 | - memcpy(&env->fpregs[j], &fp->fpregs1[i * 10], 10); | |
| 837 | + memcpy(&env->fpregs[j].d, &fp->fpregs1[i * 10], 10); | |
| 828 | 838 | j = (j + 1) & 7; |
| 829 | 839 | } |
| 830 | 840 | /* we must restore the default rounding state */ | ... | ... |
target-i386/op.c
| ... | ... | @@ -752,11 +752,6 @@ void OPPROTO op_movswl_T0_T0(void) |
| 752 | 752 | T0 = (int16_t)T0; |
| 753 | 753 | } |
| 754 | 754 | |
| 755 | -void OPPROTO op_movslq_T0_T0(void) | |
| 756 | -{ | |
| 757 | - T0 = (int32_t)T0; | |
| 758 | -} | |
| 759 | - | |
| 760 | 755 | void OPPROTO op_movzwl_T0_T0(void) |
| 761 | 756 | { |
| 762 | 757 | T0 = (uint16_t)T0; |
| ... | ... | @@ -768,6 +763,11 @@ void OPPROTO op_movswl_EAX_AX(void) |
| 768 | 763 | } |
| 769 | 764 | |
| 770 | 765 | #ifdef TARGET_X86_64 |
| 766 | +void OPPROTO op_movslq_T0_T0(void) | |
| 767 | +{ | |
| 768 | + T0 = (int32_t)T0; | |
| 769 | +} | |
| 770 | + | |
| 771 | 771 | void OPPROTO op_movslq_RAX_EAX(void) |
| 772 | 772 | { |
| 773 | 773 | EAX = (int32_t)EAX; |
| ... | ... | @@ -1695,9 +1695,9 @@ void OPPROTO op_flds_ST0_A0(void) |
| 1695 | 1695 | new_fpstt = (env->fpstt - 1) & 7; |
| 1696 | 1696 | #ifdef USE_FP_CONVERT |
| 1697 | 1697 | FP_CONVERT.i32 = ldl(A0); |
| 1698 | - env->fpregs[new_fpstt] = FP_CONVERT.f; | |
| 1698 | + env->fpregs[new_fpstt].d = FP_CONVERT.f; | |
| 1699 | 1699 | #else |
| 1700 | - env->fpregs[new_fpstt] = ldfl(A0); | |
| 1700 | + env->fpregs[new_fpstt].d = ldfl(A0); | |
| 1701 | 1701 | #endif |
| 1702 | 1702 | env->fpstt = new_fpstt; |
| 1703 | 1703 | env->fptags[new_fpstt] = 0; /* validate stack entry */ |
| ... | ... | @@ -1709,9 +1709,9 @@ void OPPROTO op_fldl_ST0_A0(void) |
| 1709 | 1709 | new_fpstt = (env->fpstt - 1) & 7; |
| 1710 | 1710 | #ifdef USE_FP_CONVERT |
| 1711 | 1711 | FP_CONVERT.i64 = ldq(A0); |
| 1712 | - env->fpregs[new_fpstt] = FP_CONVERT.d; | |
| 1712 | + env->fpregs[new_fpstt].d = FP_CONVERT.d; | |
| 1713 | 1713 | #else |
| 1714 | - env->fpregs[new_fpstt] = ldfq(A0); | |
| 1714 | + env->fpregs[new_fpstt].d = ldfq(A0); | |
| 1715 | 1715 | #endif |
| 1716 | 1716 | env->fpstt = new_fpstt; |
| 1717 | 1717 | env->fptags[new_fpstt] = 0; /* validate stack entry */ |
| ... | ... | @@ -1729,7 +1729,7 @@ void helper_fild_ST0_A0(void) |
| 1729 | 1729 | { |
| 1730 | 1730 | int new_fpstt; |
| 1731 | 1731 | new_fpstt = (env->fpstt - 1) & 7; |
| 1732 | - env->fpregs[new_fpstt] = (CPU86_LDouble)ldsw(A0); | |
| 1732 | + env->fpregs[new_fpstt].d = (CPU86_LDouble)ldsw(A0); | |
| 1733 | 1733 | env->fpstt = new_fpstt; |
| 1734 | 1734 | env->fptags[new_fpstt] = 0; /* validate stack entry */ |
| 1735 | 1735 | } |
| ... | ... | @@ -1738,7 +1738,7 @@ void helper_fildl_ST0_A0(void) |
| 1738 | 1738 | { |
| 1739 | 1739 | int new_fpstt; |
| 1740 | 1740 | new_fpstt = (env->fpstt - 1) & 7; |
| 1741 | - env->fpregs[new_fpstt] = (CPU86_LDouble)((int32_t)ldl(A0)); | |
| 1741 | + env->fpregs[new_fpstt].d = (CPU86_LDouble)((int32_t)ldl(A0)); | |
| 1742 | 1742 | env->fpstt = new_fpstt; |
| 1743 | 1743 | env->fptags[new_fpstt] = 0; /* validate stack entry */ |
| 1744 | 1744 | } |
| ... | ... | @@ -1747,7 +1747,7 @@ void helper_fildll_ST0_A0(void) |
| 1747 | 1747 | { |
| 1748 | 1748 | int new_fpstt; |
| 1749 | 1749 | new_fpstt = (env->fpstt - 1) & 7; |
| 1750 | - env->fpregs[new_fpstt] = (CPU86_LDouble)((int64_t)ldq(A0)); | |
| 1750 | + env->fpregs[new_fpstt].d = (CPU86_LDouble)((int64_t)ldq(A0)); | |
| 1751 | 1751 | env->fpstt = new_fpstt; |
| 1752 | 1752 | env->fptags[new_fpstt] = 0; /* validate stack entry */ |
| 1753 | 1753 | } |
| ... | ... | @@ -1775,9 +1775,9 @@ void OPPROTO op_fild_ST0_A0(void) |
| 1775 | 1775 | new_fpstt = (env->fpstt - 1) & 7; |
| 1776 | 1776 | #ifdef USE_FP_CONVERT |
| 1777 | 1777 | FP_CONVERT.i32 = ldsw(A0); |
| 1778 | - env->fpregs[new_fpstt] = (CPU86_LDouble)FP_CONVERT.i32; | |
| 1778 | + env->fpregs[new_fpstt].d = (CPU86_LDouble)FP_CONVERT.i32; | |
| 1779 | 1779 | #else |
| 1780 | - env->fpregs[new_fpstt] = (CPU86_LDouble)ldsw(A0); | |
| 1780 | + env->fpregs[new_fpstt].d = (CPU86_LDouble)ldsw(A0); | |
| 1781 | 1781 | #endif |
| 1782 | 1782 | env->fpstt = new_fpstt; |
| 1783 | 1783 | env->fptags[new_fpstt] = 0; /* validate stack entry */ |
| ... | ... | @@ -1789,9 +1789,9 @@ void OPPROTO op_fildl_ST0_A0(void) |
| 1789 | 1789 | new_fpstt = (env->fpstt - 1) & 7; |
| 1790 | 1790 | #ifdef USE_FP_CONVERT |
| 1791 | 1791 | FP_CONVERT.i32 = (int32_t) ldl(A0); |
| 1792 | - env->fpregs[new_fpstt] = (CPU86_LDouble)FP_CONVERT.i32; | |
| 1792 | + env->fpregs[new_fpstt].d = (CPU86_LDouble)FP_CONVERT.i32; | |
| 1793 | 1793 | #else |
| 1794 | - env->fpregs[new_fpstt] = (CPU86_LDouble)((int32_t)ldl(A0)); | |
| 1794 | + env->fpregs[new_fpstt].d = (CPU86_LDouble)((int32_t)ldl(A0)); | |
| 1795 | 1795 | #endif |
| 1796 | 1796 | env->fpstt = new_fpstt; |
| 1797 | 1797 | env->fptags[new_fpstt] = 0; /* validate stack entry */ |
| ... | ... | @@ -1803,9 +1803,9 @@ void OPPROTO op_fildll_ST0_A0(void) |
| 1803 | 1803 | new_fpstt = (env->fpstt - 1) & 7; |
| 1804 | 1804 | #ifdef USE_FP_CONVERT |
| 1805 | 1805 | FP_CONVERT.i64 = (int64_t) ldq(A0); |
| 1806 | - env->fpregs[new_fpstt] = (CPU86_LDouble)FP_CONVERT.i64; | |
| 1806 | + env->fpregs[new_fpstt].d = (CPU86_LDouble)FP_CONVERT.i64; | |
| 1807 | 1807 | #else |
| 1808 | - env->fpregs[new_fpstt] = (CPU86_LDouble)((int64_t)ldq(A0)); | |
| 1808 | + env->fpregs[new_fpstt].d = (CPU86_LDouble)((int64_t)ldq(A0)); | |
| 1809 | 1809 | #endif |
| 1810 | 1810 | env->fpstt = new_fpstt; |
| 1811 | 1811 | env->fptags[new_fpstt] = 0; /* validate stack entry */ |
| ... | ... | @@ -2322,6 +2322,29 @@ void OPPROTO op_movo(void) |
| 2322 | 2322 | memcpy16(d, s); |
| 2323 | 2323 | } |
| 2324 | 2324 | |
| 2325 | +void OPPROTO op_movq(void) | |
| 2326 | +{ | |
| 2327 | + uint64_t *d, *s; | |
| 2328 | + d = (uint64_t *)((char *)env + PARAM1); | |
| 2329 | + s = (uint64_t *)((char *)env + PARAM2); | |
| 2330 | + *d = *s; | |
| 2331 | +} | |
| 2332 | + | |
| 2333 | +void OPPROTO op_movl(void) | |
| 2334 | +{ | |
| 2335 | + uint32_t *d, *s; | |
| 2336 | + d = (uint32_t *)((char *)env + PARAM1); | |
| 2337 | + s = (uint32_t *)((char *)env + PARAM2); | |
| 2338 | + *d = *s; | |
| 2339 | +} | |
| 2340 | + | |
| 2341 | +void OPPROTO op_movq_env_0(void) | |
| 2342 | +{ | |
| 2343 | + uint64_t *d; | |
| 2344 | + d = (uint64_t *)((char *)env + PARAM1); | |
| 2345 | + *d = 0; | |
| 2346 | +} | |
| 2347 | + | |
| 2325 | 2348 | void OPPROTO op_fxsave_A0(void) |
| 2326 | 2349 | { |
| 2327 | 2350 | helper_fxsave(A0, PARAM1); |
| ... | ... | @@ -2331,3 +2354,24 @@ void OPPROTO op_fxrstor_A0(void) |
| 2331 | 2354 | { |
| 2332 | 2355 | helper_fxrstor(A0, PARAM1); |
| 2333 | 2356 | } |
| 2357 | + | |
| 2358 | +/* XXX: optimize by storing fptt and fptags in the static cpu state */ | |
| 2359 | +void OPPROTO op_enter_mmx(void) | |
| 2360 | +{ | |
| 2361 | + env->fpstt = 0; | |
| 2362 | + *(uint32_t *)(env->fptags) = 0; | |
| 2363 | + *(uint32_t *)(env->fptags + 4) = 0; | |
| 2364 | +} | |
| 2365 | + | |
| 2366 | +void OPPROTO op_emms(void) | |
| 2367 | +{ | |
| 2368 | + /* set to empty state */ | |
| 2369 | + *(uint32_t *)(env->fptags) = 0x01010101; | |
| 2370 | + *(uint32_t *)(env->fptags + 4) = 0x01010101; | |
| 2371 | +} | |
| 2372 | + | |
| 2373 | +#define SHIFT 0 | |
| 2374 | +#include "ops_sse.h" | |
| 2375 | + | |
| 2376 | +#define SHIFT 1 | |
| 2377 | +#include "ops_sse.h" | ... | ... |
target-i386/ops_mem.h
| ... | ... | @@ -80,7 +80,21 @@ void OPPROTO glue(glue(op_stl, MEMSUFFIX), _T1_A0)(void) |
| 80 | 80 | glue(stl, MEMSUFFIX)(A0, T1); |
| 81 | 81 | } |
| 82 | 82 | |
| 83 | -/* SSE support */ | |
| 83 | +/* SSE/MMX support */ | |
| 84 | +void OPPROTO glue(glue(op_ldq, MEMSUFFIX), _env_A0)(void) | |
| 85 | +{ | |
| 86 | + uint64_t *p; | |
| 87 | + p = (uint64_t *)((char *)env + PARAM1); | |
| 88 | + *p = glue(ldq, MEMSUFFIX)(A0); | |
| 89 | +} | |
| 90 | + | |
| 91 | +void OPPROTO glue(glue(op_stq, MEMSUFFIX), _env_A0)(void) | |
| 92 | +{ | |
| 93 | + uint64_t *p; | |
| 94 | + p = (uint64_t *)((char *)env + PARAM1); | |
| 95 | + glue(stq, MEMSUFFIX)(A0, *p); | |
| 96 | +} | |
| 97 | + | |
| 84 | 98 | void OPPROTO glue(glue(op_ldo, MEMSUFFIX), _env_A0)(void) |
| 85 | 99 | { |
| 86 | 100 | XMMReg *p; | ... | ... |
target-i386/ops_sse.h
0 โ 100644
| 1 | +/* | |
| 2 | + * MMX/SSE/SSE2/PNI support | |
| 3 | + * | |
| 4 | + * Copyright (c) 2005 Fabrice Bellard | |
| 5 | + * | |
| 6 | + * This library is free software; you can redistribute it and/or | |
| 7 | + * modify it under the terms of the GNU Lesser General Public | |
| 8 | + * License as published by the Free Software Foundation; either | |
| 9 | + * version 2 of the License, or (at your option) any later version. | |
| 10 | + * | |
| 11 | + * This library is distributed in the hope that it will be useful, | |
| 12 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 13 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 14 | + * Lesser General Public License for more details. | |
| 15 | + * | |
| 16 | + * You should have received a copy of the GNU Lesser General Public | |
| 17 | + * License along with this library; if not, write to the Free Software | |
| 18 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
| 19 | + */ | |
| 20 | +#if SHIFT == 0 | |
| 21 | +#define Reg MMXReg | |
| 22 | +#define XMM_ONLY(x...) | |
| 23 | +#define B(n) MMX_B(n) | |
| 24 | +#define W(n) MMX_W(n) | |
| 25 | +#define L(n) MMX_L(n) | |
| 26 | +#define Q(n) q | |
| 27 | +#define SUFFIX _mmx | |
| 28 | +#else | |
| 29 | +#define Reg XMMReg | |
| 30 | +#define XMM_ONLY(x...) x | |
| 31 | +#define B(n) XMM_B(n) | |
| 32 | +#define W(n) XMM_W(n) | |
| 33 | +#define L(n) XMM_L(n) | |
| 34 | +#define Q(n) XMM_Q(n) | |
| 35 | +#define SUFFIX _xmm | |
| 36 | +#endif | |
| 37 | + | |
| 38 | +void OPPROTO glue(op_psrlw, SUFFIX)(void) | |
| 39 | +{ | |
| 40 | + Reg *d, *s; | |
| 41 | + int shift; | |
| 42 | + | |
| 43 | + d = (Reg *)((char *)env + PARAM1); | |
| 44 | + s = (Reg *)((char *)env + PARAM2); | |
| 45 | + | |
| 46 | + if (s->Q(0) > 15) { | |
| 47 | + d->Q(0) = 0; | |
| 48 | +#if SHIFT == 1 | |
| 49 | + d->Q(1) = 0; | |
| 50 | +#endif | |
| 51 | + } else { | |
| 52 | + shift = s->B(0); | |
| 53 | + d->W(0) >>= shift; | |
| 54 | + d->W(1) >>= shift; | |
| 55 | + d->W(2) >>= shift; | |
| 56 | + d->W(3) >>= shift; | |
| 57 | +#if SHIFT == 1 | |
| 58 | + d->W(4) >>= shift; | |
| 59 | + d->W(5) >>= shift; | |
| 60 | + d->W(6) >>= shift; | |
| 61 | + d->W(7) >>= shift; | |
| 62 | +#endif | |
| 63 | + } | |
| 64 | +} | |
| 65 | + | |
| 66 | +void OPPROTO glue(op_psraw, SUFFIX)(void) | |
| 67 | +{ | |
| 68 | + Reg *d, *s; | |
| 69 | + int shift; | |
| 70 | + | |
| 71 | + d = (Reg *)((char *)env + PARAM1); | |
| 72 | + s = (Reg *)((char *)env + PARAM2); | |
| 73 | + | |
| 74 | + if (s->Q(0) > 15) { | |
| 75 | + shift = 15; | |
| 76 | + } else { | |
| 77 | + shift = s->B(0); | |
| 78 | + } | |
| 79 | + d->W(0) = (int16_t)d->W(0) >> shift; | |
| 80 | + d->W(1) = (int16_t)d->W(1) >> shift; | |
| 81 | + d->W(2) = (int16_t)d->W(2) >> shift; | |
| 82 | + d->W(3) = (int16_t)d->W(3) >> shift; | |
| 83 | +#if SHIFT == 1 | |
| 84 | + d->W(4) = (int16_t)d->W(4) >> shift; | |
| 85 | + d->W(5) = (int16_t)d->W(5) >> shift; | |
| 86 | + d->W(6) = (int16_t)d->W(6) >> shift; | |
| 87 | + d->W(7) = (int16_t)d->W(7) >> shift; | |
| 88 | +#endif | |
| 89 | +} | |
| 90 | + | |
| 91 | +void OPPROTO glue(op_psllw, SUFFIX)(void) | |
| 92 | +{ | |
| 93 | + Reg *d, *s; | |
| 94 | + int shift; | |
| 95 | + | |
| 96 | + d = (Reg *)((char *)env + PARAM1); | |
| 97 | + s = (Reg *)((char *)env + PARAM2); | |
| 98 | + | |
| 99 | + if (s->Q(0) > 15) { | |
| 100 | + d->Q(0) = 0; | |
| 101 | +#if SHIFT == 1 | |
| 102 | + d->Q(1) = 0; | |
| 103 | +#endif | |
| 104 | + } else { | |
| 105 | + shift = s->B(0); | |
| 106 | + d->W(0) <<= shift; | |
| 107 | + d->W(1) <<= shift; | |
| 108 | + d->W(2) <<= shift; | |
| 109 | + d->W(3) <<= shift; | |
| 110 | +#if SHIFT == 1 | |
| 111 | + d->W(4) <<= shift; | |
| 112 | + d->W(5) <<= shift; | |
| 113 | + d->W(6) <<= shift; | |
| 114 | + d->W(7) <<= shift; | |
| 115 | +#endif | |
| 116 | + } | |
| 117 | +} | |
| 118 | + | |
| 119 | +void OPPROTO glue(op_psrld, SUFFIX)(void) | |
| 120 | +{ | |
| 121 | + Reg *d, *s; | |
| 122 | + int shift; | |
| 123 | + | |
| 124 | + d = (Reg *)((char *)env + PARAM1); | |
| 125 | + s = (Reg *)((char *)env + PARAM2); | |
| 126 | + | |
| 127 | + if (s->Q(0) > 31) { | |
| 128 | + d->Q(0) = 0; | |
| 129 | +#if SHIFT == 1 | |
| 130 | + d->Q(1) = 0; | |
| 131 | +#endif | |
| 132 | + } else { | |
| 133 | + shift = s->B(0); | |
| 134 | + d->L(0) >>= shift; | |
| 135 | + d->L(1) >>= shift; | |
| 136 | +#if SHIFT == 1 | |
| 137 | + d->L(2) >>= shift; | |
| 138 | + d->L(3) >>= shift; | |
| 139 | +#endif | |
| 140 | + } | |
| 141 | +} | |
| 142 | + | |
| 143 | +void OPPROTO glue(op_psrad, SUFFIX)(void) | |
| 144 | +{ | |
| 145 | + Reg *d, *s; | |
| 146 | + int shift; | |
| 147 | + | |
| 148 | + d = (Reg *)((char *)env + PARAM1); | |
| 149 | + s = (Reg *)((char *)env + PARAM2); | |
| 150 | + | |
| 151 | + if (s->Q(0) > 31) { | |
| 152 | + shift = 31; | |
| 153 | + } else { | |
| 154 | + shift = s->B(0); | |
| 155 | + } | |
| 156 | + d->L(0) = (int32_t)d->L(0) >> shift; | |
| 157 | + d->L(1) = (int32_t)d->L(1) >> shift; | |
| 158 | +#if SHIFT == 1 | |
| 159 | + d->L(2) = (int32_t)d->L(2) >> shift; | |
| 160 | + d->L(3) = (int32_t)d->L(3) >> shift; | |
| 161 | +#endif | |
| 162 | +} | |
| 163 | + | |
| 164 | +void OPPROTO glue(op_pslld, SUFFIX)(void) | |
| 165 | +{ | |
| 166 | + Reg *d, *s; | |
| 167 | + int shift; | |
| 168 | + | |
| 169 | + d = (Reg *)((char *)env + PARAM1); | |
| 170 | + s = (Reg *)((char *)env + PARAM2); | |
| 171 | + | |
| 172 | + if (s->Q(0) > 31) { | |
| 173 | + d->Q(0) = 0; | |
| 174 | +#if SHIFT == 1 | |
| 175 | + d->Q(1) = 0; | |
| 176 | +#endif | |
| 177 | + } else { | |
| 178 | + shift = s->B(0); | |
| 179 | + d->L(0) <<= shift; | |
| 180 | + d->L(1) <<= shift; | |
| 181 | +#if SHIFT == 1 | |
| 182 | + d->L(2) <<= shift; | |
| 183 | + d->L(3) <<= shift; | |
| 184 | +#endif | |
| 185 | + } | |
| 186 | +} | |
| 187 | + | |
| 188 | +void OPPROTO glue(op_psrlq, SUFFIX)(void) | |
| 189 | +{ | |
| 190 | + Reg *d, *s; | |
| 191 | + int shift; | |
| 192 | + | |
| 193 | + d = (Reg *)((char *)env + PARAM1); | |
| 194 | + s = (Reg *)((char *)env + PARAM2); | |
| 195 | + | |
| 196 | + if (s->Q(0) > 63) { | |
| 197 | + d->Q(0) = 0; | |
| 198 | +#if SHIFT == 1 | |
| 199 | + d->Q(1) = 0; | |
| 200 | +#endif | |
| 201 | + } else { | |
| 202 | + shift = s->B(0); | |
| 203 | + d->Q(0) >>= shift; | |
| 204 | +#if SHIFT == 1 | |
| 205 | + d->Q(1) >>= shift; | |
| 206 | +#endif | |
| 207 | + } | |
| 208 | +} | |
| 209 | + | |
| 210 | +void OPPROTO glue(op_psllq, SUFFIX)(void) | |
| 211 | +{ | |
| 212 | + Reg *d, *s; | |
| 213 | + int shift; | |
| 214 | + | |
| 215 | + d = (Reg *)((char *)env + PARAM1); | |
| 216 | + s = (Reg *)((char *)env + PARAM2); | |
| 217 | + | |
| 218 | + if (s->Q(0) > 63) { | |
| 219 | + d->Q(0) = 0; | |
| 220 | +#if SHIFT == 1 | |
| 221 | + d->Q(1) = 0; | |
| 222 | +#endif | |
| 223 | + } else { | |
| 224 | + shift = s->B(0); | |
| 225 | + d->Q(0) <<= shift; | |
| 226 | +#if SHIFT == 1 | |
| 227 | + d->Q(1) <<= shift; | |
| 228 | +#endif | |
| 229 | + } | |
| 230 | +} | |
| 231 | + | |
| 232 | +#if SHIFT == 1 | |
| 233 | +void OPPROTO glue(op_psrldq, SUFFIX)(void) | |
| 234 | +{ | |
| 235 | + Reg *d, *s; | |
| 236 | + int shift, i; | |
| 237 | + | |
| 238 | + d = (Reg *)((char *)env + PARAM1); | |
| 239 | + s = (Reg *)((char *)env + PARAM2); | |
| 240 | + shift = s->L(0); | |
| 241 | + if (shift > 16) | |
| 242 | + shift = 16; | |
| 243 | + for(i = 0; i < 16 - shift; i++) | |
| 244 | + d->B(i) = d->B(i + shift); | |
| 245 | + for(i = 16 - shift; i < 16; i++) | |
| 246 | + d->B(i) = 0; | |
| 247 | + FORCE_RET(); | |
| 248 | +} | |
| 249 | + | |
| 250 | +void OPPROTO glue(op_pslldq, SUFFIX)(void) | |
| 251 | +{ | |
| 252 | + Reg *d, *s; | |
| 253 | + int shift, i; | |
| 254 | + | |
| 255 | + d = (Reg *)((char *)env + PARAM1); | |
| 256 | + s = (Reg *)((char *)env + PARAM2); | |
| 257 | + shift = s->L(0); | |
| 258 | + if (shift > 16) | |
| 259 | + shift = 16; | |
| 260 | + for(i = 15; i >= shift; i--) | |
| 261 | + d->B(i) = d->B(i - shift); | |
| 262 | + for(i = 0; i < shift; i++) | |
| 263 | + d->B(i) = 0; | |
| 264 | + FORCE_RET(); | |
| 265 | +} | |
| 266 | +#endif | |
| 267 | + | |
| 268 | +#define SSE_OP_B(name, F)\ | |
| 269 | +void OPPROTO glue(name, SUFFIX) (void)\ | |
| 270 | +{\ | |
| 271 | + Reg *d, *s;\ | |
| 272 | + d = (Reg *)((char *)env + PARAM1);\ | |
| 273 | + s = (Reg *)((char *)env + PARAM2);\ | |
| 274 | + d->B(0) = F(d->B(0), s->B(0));\ | |
| 275 | + d->B(1) = F(d->B(1), s->B(1));\ | |
| 276 | + d->B(2) = F(d->B(2), s->B(2));\ | |
| 277 | + d->B(3) = F(d->B(3), s->B(3));\ | |
| 278 | + d->B(4) = F(d->B(4), s->B(4));\ | |
| 279 | + d->B(5) = F(d->B(5), s->B(5));\ | |
| 280 | + d->B(6) = F(d->B(6), s->B(6));\ | |
| 281 | + d->B(7) = F(d->B(7), s->B(7));\ | |
| 282 | + XMM_ONLY(\ | |
| 283 | + d->B(8) = F(d->B(8), s->B(8));\ | |
| 284 | + d->B(9) = F(d->B(9), s->B(9));\ | |
| 285 | + d->B(10) = F(d->B(10), s->B(10));\ | |
| 286 | + d->B(11) = F(d->B(11), s->B(11));\ | |
| 287 | + d->B(12) = F(d->B(12), s->B(12));\ | |
| 288 | + d->B(13) = F(d->B(13), s->B(13));\ | |
| 289 | + d->B(14) = F(d->B(14), s->B(14));\ | |
| 290 | + d->B(15) = F(d->B(15), s->B(15));\ | |
| 291 | + )\ | |
| 292 | +} | |
| 293 | + | |
| 294 | +#define SSE_OP_W(name, F)\ | |
| 295 | +void OPPROTO glue(name, SUFFIX) (void)\ | |
| 296 | +{\ | |
| 297 | + Reg *d, *s;\ | |
| 298 | + d = (Reg *)((char *)env + PARAM1);\ | |
| 299 | + s = (Reg *)((char *)env + PARAM2);\ | |
| 300 | + d->W(0) = F(d->W(0), s->W(0));\ | |
| 301 | + d->W(1) = F(d->W(1), s->W(1));\ | |
| 302 | + d->W(2) = F(d->W(2), s->W(2));\ | |
| 303 | + d->W(3) = F(d->W(3), s->W(3));\ | |
| 304 | + XMM_ONLY(\ | |
| 305 | + d->W(4) = F(d->W(4), s->W(4));\ | |
| 306 | + d->W(5) = F(d->W(5), s->W(5));\ | |
| 307 | + d->W(6) = F(d->W(6), s->W(6));\ | |
| 308 | + d->W(7) = F(d->W(7), s->W(7));\ | |
| 309 | + )\ | |
| 310 | +} | |
| 311 | + | |
| 312 | +#define SSE_OP_L(name, F)\ | |
| 313 | +void OPPROTO glue(name, SUFFIX) (void)\ | |
| 314 | +{\ | |
| 315 | + Reg *d, *s;\ | |
| 316 | + d = (Reg *)((char *)env + PARAM1);\ | |
| 317 | + s = (Reg *)((char *)env + PARAM2);\ | |
| 318 | + d->L(0) = F(d->L(0), s->L(0));\ | |
| 319 | + d->L(1) = F(d->L(1), s->L(1));\ | |
| 320 | + XMM_ONLY(\ | |
| 321 | + d->L(2) = F(d->L(2), s->L(2));\ | |
| 322 | + d->L(3) = F(d->L(3), s->L(3));\ | |
| 323 | + )\ | |
| 324 | +} | |
| 325 | + | |
| 326 | +#define SSE_OP_Q(name, F)\ | |
| 327 | +void OPPROTO glue(name, SUFFIX) (void)\ | |
| 328 | +{\ | |
| 329 | + Reg *d, *s;\ | |
| 330 | + d = (Reg *)((char *)env + PARAM1);\ | |
| 331 | + s = (Reg *)((char *)env + PARAM2);\ | |
| 332 | + d->Q(0) = F(d->Q(0), s->Q(0));\ | |
| 333 | + XMM_ONLY(\ | |
| 334 | + d->Q(1) = F(d->Q(1), s->Q(1));\ | |
| 335 | + )\ | |
| 336 | +} | |
| 337 | + | |
| 338 | +#if SHIFT == 0 | |
| 339 | +static inline int satub(int x) | |
| 340 | +{ | |
| 341 | + if (x < 0) | |
| 342 | + return 0; | |
| 343 | + else if (x > 255) | |
| 344 | + return 255; | |
| 345 | + else | |
| 346 | + return x; | |
| 347 | +} | |
| 348 | + | |
| 349 | +static inline int satuw(int x) | |
| 350 | +{ | |
| 351 | + if (x < 0) | |
| 352 | + return 0; | |
| 353 | + else if (x > 65535) | |
| 354 | + return 65535; | |
| 355 | + else | |
| 356 | + return x; | |
| 357 | +} | |
| 358 | + | |
| 359 | +static inline int satsb(int x) | |
| 360 | +{ | |
| 361 | + if (x < -128) | |
| 362 | + return -128; | |
| 363 | + else if (x > 127) | |
| 364 | + return 127; | |
| 365 | + else | |
| 366 | + return x; | |
| 367 | +} | |
| 368 | + | |
| 369 | +static inline int satsw(int x) | |
| 370 | +{ | |
| 371 | + if (x < -32768) | |
| 372 | + return -32768; | |
| 373 | + else if (x > 32767) | |
| 374 | + return 32767; | |
| 375 | + else | |
| 376 | + return x; | |
| 377 | +} | |
| 378 | + | |
| 379 | +#define FADD(a, b) ((a) + (b)) | |
| 380 | +#define FADDUB(a, b) satub((a) + (b)) | |
| 381 | +#define FADDUW(a, b) satuw((a) + (b)) | |
| 382 | +#define FADDSB(a, b) satsb((int8_t)(a) + (int8_t)(b)) | |
| 383 | +#define FADDSW(a, b) satsw((int16_t)(a) + (int16_t)(b)) | |
| 384 | + | |
| 385 | +#define FSUB(a, b) ((a) - (b)) | |
| 386 | +#define FSUBUB(a, b) satub((a) - (b)) | |
| 387 | +#define FSUBUW(a, b) satuw((a) - (b)) | |
| 388 | +#define FSUBSB(a, b) satsb((int8_t)(a) - (int8_t)(b)) | |
| 389 | +#define FSUBSW(a, b) satsw((int16_t)(a) - (int16_t)(b)) | |
| 390 | +#define FMINUB(a, b) ((a) < (b)) ? (a) : (b) | |
| 391 | +#define FMINSW(a, b) ((int16_t)(a) < (int16_t)(b)) ? (a) : (b) | |
| 392 | +#define FMAXUB(a, b) ((a) > (b)) ? (a) : (b) | |
| 393 | +#define FMAXSW(a, b) ((int16_t)(a) > (int16_t)(b)) ? (a) : (b) | |
| 394 | + | |
| 395 | +#define FAND(a, b) (a) & (b) | |
| 396 | +#define FANDN(a, b) ((~(a)) & (b)) | |
| 397 | +#define FOR(a, b) (a) | (b) | |
| 398 | +#define FXOR(a, b) (a) ^ (b) | |
| 399 | + | |
| 400 | +#define FCMPGTB(a, b) (int8_t)(a) > (int8_t)(b) ? -1 : 0 | |
| 401 | +#define FCMPGTW(a, b) (int16_t)(a) > (int16_t)(b) ? -1 : 0 | |
| 402 | +#define FCMPGTL(a, b) (int32_t)(a) > (int32_t)(b) ? -1 : 0 | |
| 403 | +#define FCMPEQ(a, b) (a) == (b) ? -1 : 0 | |
| 404 | + | |
| 405 | +#define FMULLW(a, b) (a) * (b) | |
| 406 | +#define FMULHUW(a, b) (a) * (b) >> 16 | |
| 407 | +#define FMULHW(a, b) (int16_t)(a) * (int16_t)(b) >> 16 | |
| 408 | + | |
| 409 | +#define FAVG(a, b) ((a) + (b) + 1) >> 1 | |
| 410 | +#endif | |
| 411 | + | |
| 412 | +SSE_OP_B(op_paddb, FADD) | |
| 413 | +SSE_OP_W(op_paddw, FADD) | |
| 414 | +SSE_OP_L(op_paddl, FADD) | |
| 415 | +SSE_OP_Q(op_paddq, FADD) | |
| 416 | + | |
| 417 | +SSE_OP_B(op_psubb, FSUB) | |
| 418 | +SSE_OP_W(op_psubw, FSUB) | |
| 419 | +SSE_OP_L(op_psubl, FSUB) | |
| 420 | +SSE_OP_Q(op_psubq, FSUB) | |
| 421 | + | |
| 422 | +SSE_OP_B(op_paddusb, FADDUB) | |
| 423 | +SSE_OP_B(op_paddsb, FADDSB) | |
| 424 | +SSE_OP_B(op_psubusb, FSUBUB) | |
| 425 | +SSE_OP_B(op_psubsb, FSUBSB) | |
| 426 | + | |
| 427 | +SSE_OP_W(op_paddusw, FADDUW) | |
| 428 | +SSE_OP_W(op_paddsw, FADDSW) | |
| 429 | +SSE_OP_W(op_psubusw, FSUBUW) | |
| 430 | +SSE_OP_W(op_psubsw, FSUBSW) | |
| 431 | + | |
| 432 | +SSE_OP_B(op_pminub, FMINUB) | |
| 433 | +SSE_OP_B(op_pmaxub, FMAXUB) | |
| 434 | + | |
| 435 | +SSE_OP_W(op_pminsw, FMINSW) | |
| 436 | +SSE_OP_W(op_pmaxsw, FMAXSW) | |
| 437 | + | |
| 438 | +SSE_OP_Q(op_pand, FAND) | |
| 439 | +SSE_OP_Q(op_pandn, FANDN) | |
| 440 | +SSE_OP_Q(op_por, FOR) | |
| 441 | +SSE_OP_Q(op_pxor, FXOR) | |
| 442 | + | |
| 443 | +SSE_OP_B(op_pcmpgtb, FCMPGTB) | |
| 444 | +SSE_OP_W(op_pcmpgtw, FCMPGTW) | |
| 445 | +SSE_OP_L(op_pcmpgtl, FCMPGTL) | |
| 446 | + | |
| 447 | +SSE_OP_B(op_pcmpeqb, FCMPEQ) | |
| 448 | +SSE_OP_W(op_pcmpeqw, FCMPEQ) | |
| 449 | +SSE_OP_L(op_pcmpeql, FCMPEQ) | |
| 450 | + | |
| 451 | +SSE_OP_W(op_pmullw, FMULLW) | |
| 452 | +SSE_OP_W(op_pmulhuw, FMULHUW) | |
| 453 | +SSE_OP_W(op_pmulhw, FMULHW) | |
| 454 | + | |
| 455 | +SSE_OP_B(op_pavgb, FAVG) | |
| 456 | +SSE_OP_W(op_pavgw, FAVG) | |
| 457 | + | |
| 458 | +void OPPROTO glue(op_pmuludq, SUFFIX) (void) | |
| 459 | +{ | |
| 460 | + Reg *d, *s; | |
| 461 | + d = (Reg *)((char *)env + PARAM1); | |
| 462 | + s = (Reg *)((char *)env + PARAM2); | |
| 463 | + | |
| 464 | + d->Q(0) = (uint64_t)s->L(0) * (uint64_t)d->L(0); | |
| 465 | +#if SHIFT == 1 | |
| 466 | + d->Q(1) = (uint64_t)s->L(2) * (uint64_t)d->L(2); | |
| 467 | +#endif | |
| 468 | +} | |
| 469 | + | |
| 470 | +void OPPROTO glue(op_pmaddwd, SUFFIX) (void) | |
| 471 | +{ | |
| 472 | + int i; | |
| 473 | + Reg *d, *s; | |
| 474 | + d = (Reg *)((char *)env + PARAM1); | |
| 475 | + s = (Reg *)((char *)env + PARAM2); | |
| 476 | + | |
| 477 | + for(i = 0; i < (2 << SHIFT); i++) { | |
| 478 | + d->L(i) = (int16_t)s->W(2*i) * (int16_t)d->W(2*i) + | |
| 479 | + (int16_t)s->W(2*i+1) * (int16_t)d->W(2*i+1); | |
| 480 | + } | |
| 481 | +} | |
| 482 | + | |
| 483 | +#if SHIFT == 0 | |
| 484 | +static inline int abs1(int a) | |
| 485 | +{ | |
| 486 | + if (a < 0) | |
| 487 | + return -a; | |
| 488 | + else | |
| 489 | + return a; | |
| 490 | +} | |
| 491 | +#endif | |
| 492 | +void OPPROTO glue(op_psadbw, SUFFIX) (void) | |
| 493 | +{ | |
| 494 | + unsigned int val; | |
| 495 | + Reg *d, *s; | |
| 496 | + d = (Reg *)((char *)env + PARAM1); | |
| 497 | + s = (Reg *)((char *)env + PARAM2); | |
| 498 | + | |
| 499 | + val = 0; | |
| 500 | + val += abs1(d->B(0) - s->B(0)); | |
| 501 | + val += abs1(d->B(1) - s->B(1)); | |
| 502 | + val += abs1(d->B(2) - s->B(2)); | |
| 503 | + val += abs1(d->B(3) - s->B(3)); | |
| 504 | + val += abs1(d->B(4) - s->B(4)); | |
| 505 | + val += abs1(d->B(5) - s->B(5)); | |
| 506 | + val += abs1(d->B(6) - s->B(6)); | |
| 507 | + val += abs1(d->B(7) - s->B(7)); | |
| 508 | + d->Q(0) = val; | |
| 509 | +#if SHIFT == 1 | |
| 510 | + val = 0; | |
| 511 | + val += abs1(d->B(8) - s->B(8)); | |
| 512 | + val += abs1(d->B(9) - s->B(9)); | |
| 513 | + val += abs1(d->B(10) - s->B(10)); | |
| 514 | + val += abs1(d->B(11) - s->B(11)); | |
| 515 | + val += abs1(d->B(12) - s->B(12)); | |
| 516 | + val += abs1(d->B(13) - s->B(13)); | |
| 517 | + val += abs1(d->B(14) - s->B(14)); | |
| 518 | + val += abs1(d->B(15) - s->B(15)); | |
| 519 | + d->Q(1) = val; | |
| 520 | +#endif | |
| 521 | +} | |
| 522 | + | |
| 523 | +void OPPROTO glue(op_maskmov, SUFFIX) (void) | |
| 524 | +{ | |
| 525 | + int i; | |
| 526 | + Reg *d, *s; | |
| 527 | + d = (Reg *)((char *)env + PARAM1); | |
| 528 | + s = (Reg *)((char *)env + PARAM2); | |
| 529 | + for(i = 0; i < (8 << SHIFT); i++) { | |
| 530 | + if (s->B(i) & 0x80) | |
| 531 | + stb(A0, d->B(i)); | |
| 532 | + } | |
| 533 | +} | |
| 534 | + | |
| 535 | +void OPPROTO glue(op_movl_mm_T0, SUFFIX) (void) | |
| 536 | +{ | |
| 537 | + Reg *d; | |
| 538 | + d = (Reg *)((char *)env + PARAM1); | |
| 539 | + d->L(0) = T0; | |
| 540 | + d->L(1) = 0; | |
| 541 | +#if SHIFT == 1 | |
| 542 | + d->Q(1) = 0; | |
| 543 | +#endif | |
| 544 | +} | |
| 545 | + | |
| 546 | +void OPPROTO glue(op_movl_T0_mm, SUFFIX) (void) | |
| 547 | +{ | |
| 548 | + Reg *s; | |
| 549 | + s = (Reg *)((char *)env + PARAM1); | |
| 550 | + T0 = s->L(0); | |
| 551 | +} | |
| 552 | + | |
| 553 | +#if SHIFT == 0 | |
| 554 | +void OPPROTO glue(op_pshufw, SUFFIX) (void) | |
| 555 | +{ | |
| 556 | + Reg r, *d, *s; | |
| 557 | + int order; | |
| 558 | + d = (Reg *)((char *)env + PARAM1); | |
| 559 | + s = (Reg *)((char *)env + PARAM2); | |
| 560 | + order = PARAM3; | |
| 561 | + r.W(0) = s->W(order & 3); | |
| 562 | + r.W(1) = s->W((order >> 2) & 3); | |
| 563 | + r.W(2) = s->W((order >> 4) & 3); | |
| 564 | + r.W(3) = s->W((order >> 6) & 3); | |
| 565 | + *d = r; | |
| 566 | +} | |
| 567 | +#else | |
| 568 | +void OPPROTO op_shufpd(void) | |
| 569 | +{ | |
| 570 | + Reg r, *d, *s; | |
| 571 | + int order; | |
| 572 | + d = (Reg *)((char *)env + PARAM1); | |
| 573 | + s = (Reg *)((char *)env + PARAM2); | |
| 574 | + order = PARAM3; | |
| 575 | + r.Q(0) = s->Q(order & 1); | |
| 576 | + r.Q(1) = s->Q((order >> 1) & 1); | |
| 577 | + *d = r; | |
| 578 | +} | |
| 579 | + | |
| 580 | +void OPPROTO glue(op_pshufd, SUFFIX) (void) | |
| 581 | +{ | |
| 582 | + Reg r, *d, *s; | |
| 583 | + int order; | |
| 584 | + d = (Reg *)((char *)env + PARAM1); | |
| 585 | + s = (Reg *)((char *)env + PARAM2); | |
| 586 | + order = PARAM3; | |
| 587 | + r.L(0) = s->L(order & 3); | |
| 588 | + r.L(1) = s->L((order >> 2) & 3); | |
| 589 | + r.L(2) = s->L((order >> 4) & 3); | |
| 590 | + r.L(3) = s->L((order >> 6) & 3); | |
| 591 | + *d = r; | |
| 592 | +} | |
| 593 | + | |
| 594 | +void OPPROTO glue(op_pshuflw, SUFFIX) (void) | |
| 595 | +{ | |
| 596 | + Reg r, *d, *s; | |
| 597 | + int order; | |
| 598 | + d = (Reg *)((char *)env + PARAM1); | |
| 599 | + s = (Reg *)((char *)env + PARAM2); | |
| 600 | + order = PARAM3; | |
| 601 | + r.W(0) = s->W(order & 3); | |
| 602 | + r.W(1) = s->W((order >> 2) & 3); | |
| 603 | + r.W(2) = s->W((order >> 4) & 3); | |
| 604 | + r.W(3) = s->W((order >> 6) & 3); | |
| 605 | + r.Q(1) = s->Q(1); | |
| 606 | + *d = r; | |
| 607 | +} | |
| 608 | + | |
| 609 | +void OPPROTO glue(op_pshufhw, SUFFIX) (void) | |
| 610 | +{ | |
| 611 | + Reg r, *d, *s; | |
| 612 | + int order; | |
| 613 | + d = (Reg *)((char *)env + PARAM1); | |
| 614 | + s = (Reg *)((char *)env + PARAM2); | |
| 615 | + order = PARAM3; | |
| 616 | + r.Q(0) = s->Q(0); | |
| 617 | + r.W(4) = s->W(4 + (order & 3)); | |
| 618 | + r.W(5) = s->W(4 + ((order >> 2) & 3)); | |
| 619 | + r.W(6) = s->W(4 + ((order >> 4) & 3)); | |
| 620 | + r.W(7) = s->W(4 + ((order >> 6) & 3)); | |
| 621 | + *d = r; | |
| 622 | +} | |
| 623 | +#endif | |
| 624 | + | |
| 625 | +#if SHIFT == 1 | |
| 626 | +/* FPU ops */ | |
| 627 | +/* XXX: not accurate */ | |
| 628 | + | |
| 629 | +#define SSE_OP_S(name, F)\ | |
| 630 | +void OPPROTO op_ ## name ## ps (void)\ | |
| 631 | +{\ | |
| 632 | + Reg *d, *s;\ | |
| 633 | + d = (Reg *)((char *)env + PARAM1);\ | |
| 634 | + s = (Reg *)((char *)env + PARAM2);\ | |
| 635 | + d->XMM_S(0) = F(d->XMM_S(0), s->XMM_S(0));\ | |
| 636 | + d->XMM_S(1) = F(d->XMM_S(1), s->XMM_S(1));\ | |
| 637 | + d->XMM_S(2) = F(d->XMM_S(2), s->XMM_S(2));\ | |
| 638 | + d->XMM_S(3) = F(d->XMM_S(3), s->XMM_S(3));\ | |
| 639 | +}\ | |
| 640 | +\ | |
| 641 | +void OPPROTO op_ ## name ## ss (void)\ | |
| 642 | +{\ | |
| 643 | + Reg *d, *s;\ | |
| 644 | + d = (Reg *)((char *)env + PARAM1);\ | |
| 645 | + s = (Reg *)((char *)env + PARAM2);\ | |
| 646 | + d->XMM_S(0) = F(d->XMM_S(0), s->XMM_S(0));\ | |
| 647 | +}\ | |
| 648 | +void OPPROTO op_ ## name ## pd (void)\ | |
| 649 | +{\ | |
| 650 | + Reg *d, *s;\ | |
| 651 | + d = (Reg *)((char *)env + PARAM1);\ | |
| 652 | + s = (Reg *)((char *)env + PARAM2);\ | |
| 653 | + d->XMM_D(0) = F(d->XMM_D(0), s->XMM_D(0));\ | |
| 654 | + d->XMM_D(1) = F(d->XMM_D(1), s->XMM_D(1));\ | |
| 655 | +}\ | |
| 656 | +\ | |
| 657 | +void OPPROTO op_ ## name ## sd (void)\ | |
| 658 | +{\ | |
| 659 | + Reg *d, *s;\ | |
| 660 | + d = (Reg *)((char *)env + PARAM1);\ | |
| 661 | + s = (Reg *)((char *)env + PARAM2);\ | |
| 662 | + d->XMM_D(0) = F(d->XMM_D(0), s->XMM_D(0));\ | |
| 663 | +} | |
| 664 | + | |
| 665 | +#define FPU_ADD(a, b) (a) + (b) | |
| 666 | +#define FPU_SUB(a, b) (a) - (b) | |
| 667 | +#define FPU_MUL(a, b) (a) * (b) | |
| 668 | +#define FPU_DIV(a, b) (a) / (b) | |
| 669 | +#define FPU_MIN(a, b) (a) < (b) ? (a) : (b) | |
| 670 | +#define FPU_MAX(a, b) (a) > (b) ? (a) : (b) | |
| 671 | +#define FPU_SQRT(a, b) sqrt(b) | |
| 672 | + | |
| 673 | +SSE_OP_S(add, FPU_ADD) | |
| 674 | +SSE_OP_S(sub, FPU_SUB) | |
| 675 | +SSE_OP_S(mul, FPU_MUL) | |
| 676 | +SSE_OP_S(div, FPU_DIV) | |
| 677 | +SSE_OP_S(min, FPU_MIN) | |
| 678 | +SSE_OP_S(max, FPU_MAX) | |
| 679 | +SSE_OP_S(sqrt, FPU_SQRT) | |
| 680 | + | |
| 681 | + | |
| 682 | +/* float to float conversions */ | |
| 683 | +void OPPROTO op_cvtps2pd(void) | |
| 684 | +{ | |
| 685 | + float s0, s1; | |
| 686 | + Reg *d, *s; | |
| 687 | + d = (Reg *)((char *)env + PARAM1); | |
| 688 | + s = (Reg *)((char *)env + PARAM2); | |
| 689 | + s0 = s->XMM_S(0); | |
| 690 | + s1 = s->XMM_S(1); | |
| 691 | + d->XMM_D(0) = s0; | |
| 692 | + d->XMM_D(1) = s1; | |
| 693 | +} | |
| 694 | + | |
| 695 | +void OPPROTO op_cvtpd2ps(void) | |
| 696 | +{ | |
| 697 | + Reg *d, *s; | |
| 698 | + d = (Reg *)((char *)env + PARAM1); | |
| 699 | + s = (Reg *)((char *)env + PARAM2); | |
| 700 | + d->XMM_S(0) = s->XMM_D(0); | |
| 701 | + d->XMM_S(1) = s->XMM_D(1); | |
| 702 | + d->Q(1) = 0; | |
| 703 | +} | |
| 704 | + | |
| 705 | +void OPPROTO op_cvtss2sd(void) | |
| 706 | +{ | |
| 707 | + Reg *d, *s; | |
| 708 | + d = (Reg *)((char *)env + PARAM1); | |
| 709 | + s = (Reg *)((char *)env + PARAM2); | |
| 710 | + d->XMM_D(0) = s->XMM_S(0); | |
| 711 | +} | |
| 712 | + | |
| 713 | +void OPPROTO op_cvtsd2ss(void) | |
| 714 | +{ | |
| 715 | + Reg *d, *s; | |
| 716 | + d = (Reg *)((char *)env + PARAM1); | |
| 717 | + s = (Reg *)((char *)env + PARAM2); | |
| 718 | + d->XMM_S(0) = s->XMM_D(0); | |
| 719 | +} | |
| 720 | + | |
| 721 | +/* integer to float */ | |
| 722 | +void OPPROTO op_cvtdq2ps(void) | |
| 723 | +{ | |
| 724 | + XMMReg *d = (XMMReg *)((char *)env + PARAM1); | |
| 725 | + XMMReg *s = (XMMReg *)((char *)env + PARAM2); | |
| 726 | + d->XMM_S(0) = (int32_t)s->XMM_L(0); | |
| 727 | + d->XMM_S(1) = (int32_t)s->XMM_L(1); | |
| 728 | + d->XMM_S(2) = (int32_t)s->XMM_L(2); | |
| 729 | + d->XMM_S(3) = (int32_t)s->XMM_L(3); | |
| 730 | +} | |
| 731 | + | |
| 732 | +void OPPROTO op_cvtdq2pd(void) | |
| 733 | +{ | |
| 734 | + XMMReg *d = (XMMReg *)((char *)env + PARAM1); | |
| 735 | + XMMReg *s = (XMMReg *)((char *)env + PARAM2); | |
| 736 | + int32_t l0, l1; | |
| 737 | + l0 = (int32_t)s->XMM_L(0); | |
| 738 | + l1 = (int32_t)s->XMM_L(1); | |
| 739 | + d->XMM_D(0) = l0; | |
| 740 | + d->XMM_D(1) = l1; | |
| 741 | +} | |
| 742 | + | |
| 743 | +void OPPROTO op_cvtpi2ps(void) | |
| 744 | +{ | |
| 745 | + XMMReg *d = (Reg *)((char *)env + PARAM1); | |
| 746 | + MMXReg *s = (MMXReg *)((char *)env + PARAM2); | |
| 747 | + d->XMM_S(0) = (int32_t)s->MMX_L(0); | |
| 748 | + d->XMM_S(1) = (int32_t)s->MMX_L(1); | |
| 749 | +} | |
| 750 | + | |
| 751 | +void OPPROTO op_cvtpi2pd(void) | |
| 752 | +{ | |
| 753 | + XMMReg *d = (Reg *)((char *)env + PARAM1); | |
| 754 | + MMXReg *s = (MMXReg *)((char *)env + PARAM2); | |
| 755 | + d->XMM_D(0) = (int32_t)s->MMX_L(0); | |
| 756 | + d->XMM_D(1) = (int32_t)s->MMX_L(1); | |
| 757 | +} | |
| 758 | + | |
| 759 | +void OPPROTO op_cvtsi2ss(void) | |
| 760 | +{ | |
| 761 | + XMMReg *d = (Reg *)((char *)env + PARAM1); | |
| 762 | + d->XMM_S(0) = (int32_t)T0; | |
| 763 | +} | |
| 764 | + | |
| 765 | +void OPPROTO op_cvtsi2sd(void) | |
| 766 | +{ | |
| 767 | + XMMReg *d = (Reg *)((char *)env + PARAM1); | |
| 768 | + d->XMM_D(0) = (int32_t)T0; | |
| 769 | +} | |
| 770 | + | |
| 771 | +#ifdef TARGET_X86_64 | |
| 772 | +void OPPROTO op_cvtsq2ss(void) | |
| 773 | +{ | |
| 774 | + XMMReg *d = (Reg *)((char *)env + PARAM1); | |
| 775 | + d->XMM_S(0) = (int64_t)T0; | |
| 776 | +} | |
| 777 | + | |
| 778 | +void OPPROTO op_cvtsq2sd(void) | |
| 779 | +{ | |
| 780 | + XMMReg *d = (Reg *)((char *)env + PARAM1); | |
| 781 | + d->XMM_D(0) = (int64_t)T0; | |
| 782 | +} | |
| 783 | +#endif | |
| 784 | + | |
| 785 | +/* float to integer */ | |
| 786 | +void OPPROTO op_cvtps2dq(void) | |
| 787 | +{ | |
| 788 | + XMMReg *d = (XMMReg *)((char *)env + PARAM1); | |
| 789 | + XMMReg *s = (XMMReg *)((char *)env + PARAM2); | |
| 790 | + d->XMM_L(0) = lrint(s->XMM_S(0)); | |
| 791 | + d->XMM_L(1) = lrint(s->XMM_S(1)); | |
| 792 | + d->XMM_L(2) = lrint(s->XMM_S(2)); | |
| 793 | + d->XMM_L(3) = lrint(s->XMM_S(3)); | |
| 794 | +} | |
| 795 | + | |
| 796 | +void OPPROTO op_cvtpd2dq(void) | |
| 797 | +{ | |
| 798 | + XMMReg *d = (XMMReg *)((char *)env + PARAM1); | |
| 799 | + XMMReg *s = (XMMReg *)((char *)env + PARAM2); | |
| 800 | + d->XMM_L(0) = lrint(s->XMM_D(0)); | |
| 801 | + d->XMM_L(1) = lrint(s->XMM_D(1)); | |
| 802 | + d->XMM_Q(1) = 0; | |
| 803 | +} | |
| 804 | + | |
| 805 | +void OPPROTO op_cvtps2pi(void) | |
| 806 | +{ | |
| 807 | + MMXReg *d = (MMXReg *)((char *)env + PARAM1); | |
| 808 | + XMMReg *s = (XMMReg *)((char *)env + PARAM2); | |
| 809 | + d->MMX_L(0) = lrint(s->XMM_S(0)); | |
| 810 | + d->MMX_L(1) = lrint(s->XMM_S(1)); | |
| 811 | +} | |
| 812 | + | |
| 813 | +void OPPROTO op_cvtpd2pi(void) | |
| 814 | +{ | |
| 815 | + MMXReg *d = (MMXReg *)((char *)env + PARAM1); | |
| 816 | + XMMReg *s = (XMMReg *)((char *)env + PARAM2); | |
| 817 | + d->MMX_L(0) = lrint(s->XMM_D(0)); | |
| 818 | + d->MMX_L(1) = lrint(s->XMM_D(1)); | |
| 819 | +} | |
| 820 | + | |
| 821 | +void OPPROTO op_cvtss2si(void) | |
| 822 | +{ | |
| 823 | + XMMReg *s = (XMMReg *)((char *)env + PARAM1); | |
| 824 | + T0 = (int32_t)lrint(s->XMM_S(0)); | |
| 825 | +} | |
| 826 | + | |
| 827 | +void OPPROTO op_cvtsd2si(void) | |
| 828 | +{ | |
| 829 | + XMMReg *s = (XMMReg *)((char *)env + PARAM1); | |
| 830 | + T0 = (int32_t)lrint(s->XMM_D(0)); | |
| 831 | +} | |
| 832 | + | |
| 833 | +#ifdef TARGET_X86_64 | |
| 834 | +void OPPROTO op_cvtss2sq(void) | |
| 835 | +{ | |
| 836 | + XMMReg *s = (XMMReg *)((char *)env + PARAM1); | |
| 837 | + T0 = llrint(s->XMM_S(0)); | |
| 838 | +} | |
| 839 | + | |
| 840 | +void OPPROTO op_cvtsd2sq(void) | |
| 841 | +{ | |
| 842 | + XMMReg *s = (XMMReg *)((char *)env + PARAM1); | |
| 843 | + T0 = llrint(s->XMM_D(0)); | |
| 844 | +} | |
| 845 | +#endif | |
| 846 | + | |
| 847 | +/* float to integer truncated */ | |
| 848 | +void OPPROTO op_cvttps2dq(void) | |
| 849 | +{ | |
| 850 | + XMMReg *d = (XMMReg *)((char *)env + PARAM1); | |
| 851 | + XMMReg *s = (XMMReg *)((char *)env + PARAM2); | |
| 852 | + d->XMM_L(0) = (int32_t)s->XMM_S(0); | |
| 853 | + d->XMM_L(1) = (int32_t)s->XMM_S(1); | |
| 854 | + d->XMM_L(2) = (int32_t)s->XMM_S(2); | |
| 855 | + d->XMM_L(3) = (int32_t)s->XMM_S(3); | |
| 856 | +} | |
| 857 | + | |
| 858 | +void OPPROTO op_cvttpd2dq(void) | |
| 859 | +{ | |
| 860 | + XMMReg *d = (XMMReg *)((char *)env + PARAM1); | |
| 861 | + XMMReg *s = (XMMReg *)((char *)env + PARAM2); | |
| 862 | + d->XMM_L(0) = (int32_t)s->XMM_D(0); | |
| 863 | + d->XMM_L(1) = (int32_t)s->XMM_D(1); | |
| 864 | + d->XMM_Q(1) = 0; | |
| 865 | +} | |
| 866 | + | |
| 867 | +void OPPROTO op_cvttps2pi(void) | |
| 868 | +{ | |
| 869 | + MMXReg *d = (MMXReg *)((char *)env + PARAM1); | |
| 870 | + XMMReg *s = (XMMReg *)((char *)env + PARAM2); | |
| 871 | + d->MMX_L(0) = (int32_t)(s->XMM_S(0)); | |
| 872 | + d->MMX_L(1) = (int32_t)(s->XMM_S(1)); | |
| 873 | +} | |
| 874 | + | |
| 875 | +void OPPROTO op_cvttpd2pi(void) | |
| 876 | +{ | |
| 877 | + MMXReg *d = (MMXReg *)((char *)env + PARAM1); | |
| 878 | + XMMReg *s = (XMMReg *)((char *)env + PARAM2); | |
| 879 | + d->MMX_L(0) = (int32_t)(s->XMM_D(0)); | |
| 880 | + d->MMX_L(1) = (int32_t)(s->XMM_D(1)); | |
| 881 | +} | |
| 882 | + | |
| 883 | +void OPPROTO op_cvttss2si(void) | |
| 884 | +{ | |
| 885 | + XMMReg *s = (XMMReg *)((char *)env + PARAM1); | |
| 886 | + T0 = (int32_t)(s->XMM_S(0)); | |
| 887 | +} | |
| 888 | + | |
| 889 | +void OPPROTO op_cvttsd2si(void) | |
| 890 | +{ | |
| 891 | + XMMReg *s = (XMMReg *)((char *)env + PARAM1); | |
| 892 | + T0 = (int32_t)(s->XMM_D(0)); | |
| 893 | +} | |
| 894 | + | |
| 895 | +#ifdef TARGET_X86_64 | |
| 896 | +void OPPROTO op_cvttss2sq(void) | |
| 897 | +{ | |
| 898 | + XMMReg *s = (XMMReg *)((char *)env + PARAM1); | |
| 899 | + T0 = (int64_t)(s->XMM_S(0)); | |
| 900 | +} | |
| 901 | + | |
| 902 | +void OPPROTO op_cvttsd2sq(void) | |
| 903 | +{ | |
| 904 | + XMMReg *s = (XMMReg *)((char *)env + PARAM1); | |
| 905 | + T0 = (int64_t)(s->XMM_D(0)); | |
| 906 | +} | |
| 907 | +#endif | |
| 908 | + | |
| 909 | +void OPPROTO op_rsqrtps(void) | |
| 910 | +{ | |
| 911 | + XMMReg *d = (XMMReg *)((char *)env + PARAM1); | |
| 912 | + XMMReg *s = (XMMReg *)((char *)env + PARAM2); | |
| 913 | + d->XMM_S(0) = approx_rsqrt(s->XMM_S(0)); | |
| 914 | + d->XMM_S(1) = approx_rsqrt(s->XMM_S(1)); | |
| 915 | + d->XMM_S(2) = approx_rsqrt(s->XMM_S(2)); | |
| 916 | + d->XMM_S(3) = approx_rsqrt(s->XMM_S(3)); | |
| 917 | +} | |
| 918 | + | |
| 919 | +void OPPROTO op_rsqrtss(void) | |
| 920 | +{ | |
| 921 | + XMMReg *d = (XMMReg *)((char *)env + PARAM1); | |
| 922 | + XMMReg *s = (XMMReg *)((char *)env + PARAM2); | |
| 923 | + d->XMM_S(0) = approx_rsqrt(s->XMM_S(0)); | |
| 924 | +} | |
| 925 | + | |
| 926 | +void OPPROTO op_rcpps(void) | |
| 927 | +{ | |
| 928 | + XMMReg *d = (XMMReg *)((char *)env + PARAM1); | |
| 929 | + XMMReg *s = (XMMReg *)((char *)env + PARAM2); | |
| 930 | + d->XMM_S(0) = approx_rcp(s->XMM_S(0)); | |
| 931 | + d->XMM_S(1) = approx_rcp(s->XMM_S(1)); | |
| 932 | + d->XMM_S(2) = approx_rcp(s->XMM_S(2)); | |
| 933 | + d->XMM_S(3) = approx_rcp(s->XMM_S(3)); | |
| 934 | +} | |
| 935 | + | |
| 936 | +void OPPROTO op_rcpss(void) | |
| 937 | +{ | |
| 938 | + XMMReg *d = (XMMReg *)((char *)env + PARAM1); | |
| 939 | + XMMReg *s = (XMMReg *)((char *)env + PARAM2); | |
| 940 | + d->XMM_S(0) = approx_rcp(s->XMM_S(0)); | |
| 941 | +} | |
| 942 | + | |
| 943 | +void OPPROTO op_haddps(void) | |
| 944 | +{ | |
| 945 | + XMMReg *d = (XMMReg *)((char *)env + PARAM1); | |
| 946 | + XMMReg *s = (XMMReg *)((char *)env + PARAM2); | |
| 947 | + XMMReg r; | |
| 948 | + r.XMM_S(0) = d->XMM_S(0) + d->XMM_S(1); | |
| 949 | + r.XMM_S(1) = d->XMM_S(2) + d->XMM_S(3); | |
| 950 | + r.XMM_S(2) = s->XMM_S(0) + s->XMM_S(1); | |
| 951 | + r.XMM_S(3) = s->XMM_S(2) + s->XMM_S(3); | |
| 952 | + *d = r; | |
| 953 | +} | |
| 954 | + | |
| 955 | +void OPPROTO op_haddpd(void) | |
| 956 | +{ | |
| 957 | + XMMReg *d = (XMMReg *)((char *)env + PARAM1); | |
| 958 | + XMMReg *s = (XMMReg *)((char *)env + PARAM2); | |
| 959 | + XMMReg r; | |
| 960 | + r.XMM_D(0) = d->XMM_D(0) + d->XMM_D(1); | |
| 961 | + r.XMM_D(1) = s->XMM_D(0) + s->XMM_D(1); | |
| 962 | + *d = r; | |
| 963 | +} | |
| 964 | + | |
| 965 | +void OPPROTO op_hsubps(void) | |
| 966 | +{ | |
| 967 | + XMMReg *d = (XMMReg *)((char *)env + PARAM1); | |
| 968 | + XMMReg *s = (XMMReg *)((char *)env + PARAM2); | |
| 969 | + XMMReg r; | |
| 970 | + r.XMM_S(0) = d->XMM_S(0) - d->XMM_S(1); | |
| 971 | + r.XMM_S(1) = d->XMM_S(2) - d->XMM_S(3); | |
| 972 | + r.XMM_S(2) = s->XMM_S(0) - s->XMM_S(1); | |
| 973 | + r.XMM_S(3) = s->XMM_S(2) - s->XMM_S(3); | |
| 974 | + *d = r; | |
| 975 | +} | |
| 976 | + | |
| 977 | +void OPPROTO op_hsubpd(void) | |
| 978 | +{ | |
| 979 | + XMMReg *d = (XMMReg *)((char *)env + PARAM1); | |
| 980 | + XMMReg *s = (XMMReg *)((char *)env + PARAM2); | |
| 981 | + XMMReg r; | |
| 982 | + r.XMM_D(0) = d->XMM_D(0) - d->XMM_D(1); | |
| 983 | + r.XMM_D(1) = s->XMM_D(0) - s->XMM_D(1); | |
| 984 | + *d = r; | |
| 985 | +} | |
| 986 | + | |
| 987 | +void OPPROTO op_addsubps(void) | |
| 988 | +{ | |
| 989 | + XMMReg *d = (XMMReg *)((char *)env + PARAM1); | |
| 990 | + XMMReg *s = (XMMReg *)((char *)env + PARAM2); | |
| 991 | + d->XMM_S(0) = d->XMM_S(0) - s->XMM_S(0); | |
| 992 | + d->XMM_S(1) = d->XMM_S(1) + s->XMM_S(1); | |
| 993 | + d->XMM_S(2) = d->XMM_S(2) - s->XMM_S(2); | |
| 994 | + d->XMM_S(3) = d->XMM_S(3) + s->XMM_S(3); | |
| 995 | +} | |
| 996 | + | |
| 997 | +void OPPROTO op_addsubpd(void) | |
| 998 | +{ | |
| 999 | + XMMReg *d = (XMMReg *)((char *)env + PARAM1); | |
| 1000 | + XMMReg *s = (XMMReg *)((char *)env + PARAM2); | |
| 1001 | + d->XMM_D(0) = d->XMM_D(0) - s->XMM_D(0); | |
| 1002 | + d->XMM_D(1) = d->XMM_D(1) + s->XMM_D(1); | |
| 1003 | +} | |
| 1004 | + | |
| 1005 | +/* XXX: unordered */ | |
| 1006 | +#define SSE_OP_CMP(name, F)\ | |
| 1007 | +void OPPROTO op_ ## name ## ps (void)\ | |
| 1008 | +{\ | |
| 1009 | + Reg *d, *s;\ | |
| 1010 | + d = (Reg *)((char *)env + PARAM1);\ | |
| 1011 | + s = (Reg *)((char *)env + PARAM2);\ | |
| 1012 | + d->XMM_L(0) = F(d->XMM_S(0), s->XMM_S(0));\ | |
| 1013 | + d->XMM_L(1) = F(d->XMM_S(1), s->XMM_S(1));\ | |
| 1014 | + d->XMM_L(2) = F(d->XMM_S(2), s->XMM_S(2));\ | |
| 1015 | + d->XMM_L(3) = F(d->XMM_S(3), s->XMM_S(3));\ | |
| 1016 | +}\ | |
| 1017 | +\ | |
| 1018 | +void OPPROTO op_ ## name ## ss (void)\ | |
| 1019 | +{\ | |
| 1020 | + Reg *d, *s;\ | |
| 1021 | + d = (Reg *)((char *)env + PARAM1);\ | |
| 1022 | + s = (Reg *)((char *)env + PARAM2);\ | |
| 1023 | + d->XMM_L(0) = F(d->XMM_S(0), s->XMM_S(0));\ | |
| 1024 | +}\ | |
| 1025 | +void OPPROTO op_ ## name ## pd (void)\ | |
| 1026 | +{\ | |
| 1027 | + Reg *d, *s;\ | |
| 1028 | + d = (Reg *)((char *)env + PARAM1);\ | |
| 1029 | + s = (Reg *)((char *)env + PARAM2);\ | |
| 1030 | + d->XMM_Q(0) = F(d->XMM_D(0), s->XMM_D(0));\ | |
| 1031 | + d->XMM_Q(1) = F(d->XMM_D(1), s->XMM_D(1));\ | |
| 1032 | +}\ | |
| 1033 | +\ | |
| 1034 | +void OPPROTO op_ ## name ## sd (void)\ | |
| 1035 | +{\ | |
| 1036 | + Reg *d, *s;\ | |
| 1037 | + d = (Reg *)((char *)env + PARAM1);\ | |
| 1038 | + s = (Reg *)((char *)env + PARAM2);\ | |
| 1039 | + d->XMM_Q(0) = F(d->XMM_D(0), s->XMM_D(0));\ | |
| 1040 | +} | |
| 1041 | + | |
| 1042 | +#define FPU_CMPEQ(a, b) (a) == (b) ? -1 : 0 | |
| 1043 | +#define FPU_CMPLT(a, b) (a) < (b) ? -1 : 0 | |
| 1044 | +#define FPU_CMPLE(a, b) (a) <= (b) ? -1 : 0 | |
| 1045 | +#define FPU_CMPUNORD(a, b) (fpu_isnan(a) || fpu_isnan(b)) ? - 1 : 0 | |
| 1046 | +#define FPU_CMPNEQ(a, b) (a) == (b) ? 0 : -1 | |
| 1047 | +#define FPU_CMPNLT(a, b) (a) < (b) ? 0 : -1 | |
| 1048 | +#define FPU_CMPNLE(a, b) (a) <= (b) ? 0 : -1 | |
| 1049 | +#define FPU_CMPORD(a, b) (!fpu_isnan(a) && !fpu_isnan(b)) ? - 1 : 0 | |
| 1050 | + | |
| 1051 | +SSE_OP_CMP(cmpeq, FPU_CMPEQ) | |
| 1052 | +SSE_OP_CMP(cmplt, FPU_CMPLT) | |
| 1053 | +SSE_OP_CMP(cmple, FPU_CMPLE) | |
| 1054 | +SSE_OP_CMP(cmpunord, FPU_CMPUNORD) | |
| 1055 | +SSE_OP_CMP(cmpneq, FPU_CMPNEQ) | |
| 1056 | +SSE_OP_CMP(cmpnlt, FPU_CMPNLT) | |
| 1057 | +SSE_OP_CMP(cmpnle, FPU_CMPNLE) | |
| 1058 | +SSE_OP_CMP(cmpord, FPU_CMPORD) | |
| 1059 | + | |
| 1060 | +void OPPROTO op_ucomiss(void) | |
| 1061 | +{ | |
| 1062 | + int eflags; | |
| 1063 | + float s0, s1; | |
| 1064 | + Reg *d, *s; | |
| 1065 | + d = (Reg *)((char *)env + PARAM1); | |
| 1066 | + s = (Reg *)((char *)env + PARAM2); | |
| 1067 | + | |
| 1068 | + s0 = d->XMM_S(0); | |
| 1069 | + s1 = s->XMM_S(0); | |
| 1070 | + if (s0 < s1) | |
| 1071 | + eflags = CC_C; | |
| 1072 | + else if (s0 == s1) | |
| 1073 | + eflags = CC_Z; | |
| 1074 | + else | |
| 1075 | + eflags = 0; | |
| 1076 | + CC_SRC = eflags; | |
| 1077 | + FORCE_RET(); | |
| 1078 | +} | |
| 1079 | + | |
| 1080 | +void OPPROTO op_comiss(void) | |
| 1081 | +{ | |
| 1082 | + int eflags; | |
| 1083 | + float s0, s1; | |
| 1084 | + Reg *d, *s; | |
| 1085 | + d = (Reg *)((char *)env + PARAM1); | |
| 1086 | + s = (Reg *)((char *)env + PARAM2); | |
| 1087 | + | |
| 1088 | + s0 = d->XMM_S(0); | |
| 1089 | + s1 = s->XMM_S(0); | |
| 1090 | + if (s0 < s1) | |
| 1091 | + eflags = CC_C; | |
| 1092 | + else if (s0 == s1) | |
| 1093 | + eflags = CC_Z; | |
| 1094 | + else | |
| 1095 | + eflags = 0; | |
| 1096 | + CC_SRC = eflags; | |
| 1097 | + FORCE_RET(); | |
| 1098 | +} | |
| 1099 | + | |
| 1100 | +void OPPROTO op_ucomisd(void) | |
| 1101 | +{ | |
| 1102 | + int eflags; | |
| 1103 | + double d0, d1; | |
| 1104 | + Reg *d, *s; | |
| 1105 | + d = (Reg *)((char *)env + PARAM1); | |
| 1106 | + s = (Reg *)((char *)env + PARAM2); | |
| 1107 | + | |
| 1108 | + d0 = d->XMM_D(0); | |
| 1109 | + d1 = s->XMM_D(0); | |
| 1110 | + if (d0 < d1) | |
| 1111 | + eflags = CC_C; | |
| 1112 | + else if (d0 == d1) | |
| 1113 | + eflags = CC_Z; | |
| 1114 | + else | |
| 1115 | + eflags = 0; | |
| 1116 | + CC_SRC = eflags; | |
| 1117 | + FORCE_RET(); | |
| 1118 | +} | |
| 1119 | + | |
| 1120 | +void OPPROTO op_comisd(void) | |
| 1121 | +{ | |
| 1122 | + int eflags; | |
| 1123 | + double d0, d1; | |
| 1124 | + Reg *d, *s; | |
| 1125 | + d = (Reg *)((char *)env + PARAM1); | |
| 1126 | + s = (Reg *)((char *)env + PARAM2); | |
| 1127 | + | |
| 1128 | + d0 = d->XMM_D(0); | |
| 1129 | + d1 = s->XMM_D(0); | |
| 1130 | + if (d0 < d1) | |
| 1131 | + eflags = CC_C; | |
| 1132 | + else if (d0 == d1) | |
| 1133 | + eflags = CC_Z; | |
| 1134 | + else | |
| 1135 | + eflags = 0; | |
| 1136 | + CC_SRC = eflags; | |
| 1137 | + FORCE_RET(); | |
| 1138 | +} | |
| 1139 | + | |
| 1140 | +void OPPROTO op_movmskps(void) | |
| 1141 | +{ | |
| 1142 | + int b0, b1, b2, b3; | |
| 1143 | + Reg *s; | |
| 1144 | + s = (Reg *)((char *)env + PARAM1); | |
| 1145 | + b0 = s->XMM_L(0) >> 31; | |
| 1146 | + b1 = s->XMM_L(1) >> 31; | |
| 1147 | + b2 = s->XMM_L(2) >> 31; | |
| 1148 | + b3 = s->XMM_L(3) >> 31; | |
| 1149 | + T0 = b0 | (b1 << 1) | (b2 << 2) | (b3 << 3); | |
| 1150 | +} | |
| 1151 | + | |
| 1152 | +void OPPROTO op_movmskpd(void) | |
| 1153 | +{ | |
| 1154 | + int b0, b1; | |
| 1155 | + Reg *s; | |
| 1156 | + s = (Reg *)((char *)env + PARAM1); | |
| 1157 | + b0 = s->XMM_L(1) >> 31; | |
| 1158 | + b1 = s->XMM_L(3) >> 31; | |
| 1159 | + T0 = b0 | (b1 << 1); | |
| 1160 | +} | |
| 1161 | + | |
| 1162 | +#endif | |
| 1163 | + | |
| 1164 | +void OPPROTO glue(op_pmovmskb, SUFFIX)(void) | |
| 1165 | +{ | |
| 1166 | + Reg *s; | |
| 1167 | + s = (Reg *)((char *)env + PARAM1); | |
| 1168 | + T0 = 0; | |
| 1169 | + T0 |= (s->XMM_B(0) >> 7); | |
| 1170 | + T0 |= (s->XMM_B(1) >> 6) & 0x02; | |
| 1171 | + T0 |= (s->XMM_B(2) >> 5) & 0x04; | |
| 1172 | + T0 |= (s->XMM_B(3) >> 4) & 0x08; | |
| 1173 | + T0 |= (s->XMM_B(4) >> 3) & 0x10; | |
| 1174 | + T0 |= (s->XMM_B(5) >> 2) & 0x20; | |
| 1175 | + T0 |= (s->XMM_B(6) >> 1) & 0x40; | |
| 1176 | + T0 |= (s->XMM_B(7)) & 0x80; | |
| 1177 | +#if SHIFT == 1 | |
| 1178 | + T0 |= (s->XMM_B(8) << 1) & 0x0100; | |
| 1179 | + T0 |= (s->XMM_B(9) << 2) & 0x0200; | |
| 1180 | + T0 |= (s->XMM_B(10) << 3) & 0x0400; | |
| 1181 | + T0 |= (s->XMM_B(11) << 4) & 0x0800; | |
| 1182 | + T0 |= (s->XMM_B(12) << 5) & 0x1000; | |
| 1183 | + T0 |= (s->XMM_B(13) << 6) & 0x2000; | |
| 1184 | + T0 |= (s->XMM_B(14) << 7) & 0x4000; | |
| 1185 | + T0 |= (s->XMM_B(15) << 8) & 0x8000; | |
| 1186 | +#endif | |
| 1187 | +} | |
| 1188 | + | |
| 1189 | +void OPPROTO glue(op_pinsrw, SUFFIX) (void) | |
| 1190 | +{ | |
| 1191 | + Reg *d = (Reg *)((char *)env + PARAM1); | |
| 1192 | + int pos = PARAM2; | |
| 1193 | + | |
| 1194 | + d->W(pos) = T0; | |
| 1195 | +} | |
| 1196 | + | |
| 1197 | +void OPPROTO glue(op_pextrw, SUFFIX) (void) | |
| 1198 | +{ | |
| 1199 | + Reg *s = (Reg *)((char *)env + PARAM1); | |
| 1200 | + int pos = PARAM2; | |
| 1201 | + | |
| 1202 | + T0 = s->W(pos); | |
| 1203 | +} | |
| 1204 | + | |
| 1205 | +void OPPROTO glue(op_packsswb, SUFFIX) (void) | |
| 1206 | +{ | |
| 1207 | + Reg r, *d, *s; | |
| 1208 | + d = (Reg *)((char *)env + PARAM1); | |
| 1209 | + s = (Reg *)((char *)env + PARAM2); | |
| 1210 | + | |
| 1211 | + r.B(0) = satsb((int16_t)d->W(0)); | |
| 1212 | + r.B(1) = satsb((int16_t)d->W(1)); | |
| 1213 | + r.B(2) = satsb((int16_t)d->W(2)); | |
| 1214 | + r.B(3) = satsb((int16_t)d->W(3)); | |
| 1215 | +#if SHIFT == 1 | |
| 1216 | + r.B(4) = satsb((int16_t)d->W(4)); | |
| 1217 | + r.B(5) = satsb((int16_t)d->W(5)); | |
| 1218 | + r.B(6) = satsb((int16_t)d->W(6)); | |
| 1219 | + r.B(7) = satsb((int16_t)d->W(7)); | |
| 1220 | +#endif | |
| 1221 | + r.B((4 << SHIFT) + 0) = satsb((int16_t)s->W(0)); | |
| 1222 | + r.B((4 << SHIFT) + 1) = satsb((int16_t)s->W(1)); | |
| 1223 | + r.B((4 << SHIFT) + 2) = satsb((int16_t)s->W(2)); | |
| 1224 | + r.B((4 << SHIFT) + 3) = satsb((int16_t)s->W(3)); | |
| 1225 | +#if SHIFT == 1 | |
| 1226 | + r.B(12) = satsb((int16_t)s->W(4)); | |
| 1227 | + r.B(13) = satsb((int16_t)s->W(5)); | |
| 1228 | + r.B(14) = satsb((int16_t)s->W(6)); | |
| 1229 | + r.B(15) = satsb((int16_t)s->W(7)); | |
| 1230 | +#endif | |
| 1231 | + *d = r; | |
| 1232 | +} | |
| 1233 | + | |
| 1234 | +void OPPROTO glue(op_packuswb, SUFFIX) (void) | |
| 1235 | +{ | |
| 1236 | + Reg r, *d, *s; | |
| 1237 | + d = (Reg *)((char *)env + PARAM1); | |
| 1238 | + s = (Reg *)((char *)env + PARAM2); | |
| 1239 | + | |
| 1240 | + r.B(0) = satub((int16_t)d->W(0)); | |
| 1241 | + r.B(1) = satub((int16_t)d->W(1)); | |
| 1242 | + r.B(2) = satub((int16_t)d->W(2)); | |
| 1243 | + r.B(3) = satub((int16_t)d->W(3)); | |
| 1244 | +#if SHIFT == 1 | |
| 1245 | + r.B(4) = satub((int16_t)d->W(4)); | |
| 1246 | + r.B(5) = satub((int16_t)d->W(5)); | |
| 1247 | + r.B(6) = satub((int16_t)d->W(6)); | |
| 1248 | + r.B(7) = satub((int16_t)d->W(7)); | |
| 1249 | +#endif | |
| 1250 | + r.B((4 << SHIFT) + 0) = satub((int16_t)s->W(0)); | |
| 1251 | + r.B((4 << SHIFT) + 1) = satub((int16_t)s->W(1)); | |
| 1252 | + r.B((4 << SHIFT) + 2) = satub((int16_t)s->W(2)); | |
| 1253 | + r.B((4 << SHIFT) + 3) = satub((int16_t)s->W(3)); | |
| 1254 | +#if SHIFT == 1 | |
| 1255 | + r.B(12) = satub((int16_t)s->W(4)); | |
| 1256 | + r.B(13) = satub((int16_t)s->W(5)); | |
| 1257 | + r.B(14) = satub((int16_t)s->W(6)); | |
| 1258 | + r.B(15) = satub((int16_t)s->W(7)); | |
| 1259 | +#endif | |
| 1260 | + *d = r; | |
| 1261 | +} | |
| 1262 | + | |
| 1263 | +void OPPROTO glue(op_packssdw, SUFFIX) (void) | |
| 1264 | +{ | |
| 1265 | + Reg r, *d, *s; | |
| 1266 | + d = (Reg *)((char *)env + PARAM1); | |
| 1267 | + s = (Reg *)((char *)env + PARAM2); | |
| 1268 | + | |
| 1269 | + r.W(0) = satsw(d->L(0)); | |
| 1270 | + r.W(1) = satsw(d->L(1)); | |
| 1271 | +#if SHIFT == 1 | |
| 1272 | + r.W(2) = satsw(d->L(2)); | |
| 1273 | + r.W(3) = satsw(d->L(3)); | |
| 1274 | +#endif | |
| 1275 | + r.W((2 << SHIFT) + 0) = satsw(s->L(0)); | |
| 1276 | + r.W((2 << SHIFT) + 1) = satsw(s->L(1)); | |
| 1277 | +#if SHIFT == 1 | |
| 1278 | + r.W(6) = satsw(s->L(2)); | |
| 1279 | + r.W(7) = satsw(s->L(3)); | |
| 1280 | +#endif | |
| 1281 | + *d = r; | |
| 1282 | +} | |
| 1283 | + | |
| 1284 | +#define UNPCK_OP(base_name, base) \ | |
| 1285 | + \ | |
| 1286 | +void OPPROTO glue(op_punpck ## base_name ## bw, SUFFIX) (void) \ | |
| 1287 | +{ \ | |
| 1288 | + Reg r, *d, *s; \ | |
| 1289 | + d = (Reg *)((char *)env + PARAM1); \ | |
| 1290 | + s = (Reg *)((char *)env + PARAM2); \ | |
| 1291 | + \ | |
| 1292 | + r.B(0) = d->B((base << (SHIFT + 2)) + 0); \ | |
| 1293 | + r.B(1) = s->B((base << (SHIFT + 2)) + 0); \ | |
| 1294 | + r.B(2) = d->B((base << (SHIFT + 2)) + 1); \ | |
| 1295 | + r.B(3) = s->B((base << (SHIFT + 2)) + 1); \ | |
| 1296 | + r.B(4) = d->B((base << (SHIFT + 2)) + 2); \ | |
| 1297 | + r.B(5) = s->B((base << (SHIFT + 2)) + 2); \ | |
| 1298 | + r.B(6) = d->B((base << (SHIFT + 2)) + 3); \ | |
| 1299 | + r.B(7) = s->B((base << (SHIFT + 2)) + 3); \ | |
| 1300 | +XMM_ONLY( \ | |
| 1301 | + r.B(8) = d->B((base << (SHIFT + 2)) + 4); \ | |
| 1302 | + r.B(9) = s->B((base << (SHIFT + 2)) + 4); \ | |
| 1303 | + r.B(10) = d->B((base << (SHIFT + 2)) + 5); \ | |
| 1304 | + r.B(11) = s->B((base << (SHIFT + 2)) + 5); \ | |
| 1305 | + r.B(12) = d->B((base << (SHIFT + 2)) + 6); \ | |
| 1306 | + r.B(13) = s->B((base << (SHIFT + 2)) + 6); \ | |
| 1307 | + r.B(14) = d->B((base << (SHIFT + 2)) + 7); \ | |
| 1308 | + r.B(15) = s->B((base << (SHIFT + 2)) + 7); \ | |
| 1309 | +) \ | |
| 1310 | + *d = r; \ | |
| 1311 | +} \ | |
| 1312 | + \ | |
| 1313 | +void OPPROTO glue(op_punpck ## base_name ## wd, SUFFIX) (void) \ | |
| 1314 | +{ \ | |
| 1315 | + Reg r, *d, *s; \ | |
| 1316 | + d = (Reg *)((char *)env + PARAM1); \ | |
| 1317 | + s = (Reg *)((char *)env + PARAM2); \ | |
| 1318 | + \ | |
| 1319 | + r.W(0) = d->W((base << (SHIFT + 1)) + 0); \ | |
| 1320 | + r.W(1) = s->W((base << (SHIFT + 1)) + 0); \ | |
| 1321 | + r.W(2) = d->W((base << (SHIFT + 1)) + 1); \ | |
| 1322 | + r.W(3) = s->W((base << (SHIFT + 1)) + 1); \ | |
| 1323 | +XMM_ONLY( \ | |
| 1324 | + r.W(4) = d->W((base << (SHIFT + 1)) + 2); \ | |
| 1325 | + r.W(5) = s->W((base << (SHIFT + 1)) + 2); \ | |
| 1326 | + r.W(6) = d->W((base << (SHIFT + 1)) + 3); \ | |
| 1327 | + r.W(7) = s->W((base << (SHIFT + 1)) + 3); \ | |
| 1328 | +) \ | |
| 1329 | + *d = r; \ | |
| 1330 | +} \ | |
| 1331 | + \ | |
| 1332 | +void OPPROTO glue(op_punpck ## base_name ## dq, SUFFIX) (void) \ | |
| 1333 | +{ \ | |
| 1334 | + Reg r, *d, *s; \ | |
| 1335 | + d = (Reg *)((char *)env + PARAM1); \ | |
| 1336 | + s = (Reg *)((char *)env + PARAM2); \ | |
| 1337 | + \ | |
| 1338 | + r.L(0) = d->L((base << SHIFT) + 0); \ | |
| 1339 | + r.L(1) = s->L((base << SHIFT) + 0); \ | |
| 1340 | +XMM_ONLY( \ | |
| 1341 | + r.L(2) = d->L((base << SHIFT) + 1); \ | |
| 1342 | + r.L(3) = s->L((base << SHIFT) + 1); \ | |
| 1343 | +) \ | |
| 1344 | + *d = r; \ | |
| 1345 | +} \ | |
| 1346 | + \ | |
| 1347 | +XMM_ONLY( \ | |
| 1348 | +void OPPROTO glue(op_punpck ## base_name ## qdq, SUFFIX) (void) \ | |
| 1349 | +{ \ | |
| 1350 | + Reg r, *d, *s; \ | |
| 1351 | + d = (Reg *)((char *)env + PARAM1); \ | |
| 1352 | + s = (Reg *)((char *)env + PARAM2); \ | |
| 1353 | + \ | |
| 1354 | + r.Q(0) = d->Q(base); \ | |
| 1355 | + r.Q(1) = s->Q(base); \ | |
| 1356 | + *d = r; \ | |
| 1357 | +} \ | |
| 1358 | +) | |
| 1359 | + | |
| 1360 | +UNPCK_OP(l, 0) | |
| 1361 | +UNPCK_OP(h, 1) | |
| 1362 | + | |
| 1363 | +#undef SHIFT | |
| 1364 | +#undef XMM_ONLY | |
| 1365 | +#undef Reg | |
| 1366 | +#undef B | |
| 1367 | +#undef W | |
| 1368 | +#undef L | |
| 1369 | +#undef Q | |
| 1370 | +#undef SUFFIX | ... | ... |
target-i386/translate.c
| ... | ... | @@ -1606,6 +1606,23 @@ static void gen_lea_modrm(DisasContext *s, int modrm, int *reg_ptr, int *offset_ |
| 1606 | 1606 | *offset_ptr = disp; |
| 1607 | 1607 | } |
| 1608 | 1608 | |
| 1609 | +/* used for LEA and MOV AX, mem */ | |
| 1610 | +static void gen_add_A0_ds_seg(DisasContext *s) | |
| 1611 | +{ | |
| 1612 | + int override, must_add_seg; | |
| 1613 | + must_add_seg = s->addseg; | |
| 1614 | + override = R_DS; | |
| 1615 | + if (s->override >= 0) { | |
| 1616 | + override = s->override; | |
| 1617 | + must_add_seg = 1; | |
| 1618 | + } else { | |
| 1619 | + override = R_DS; | |
| 1620 | + } | |
| 1621 | + if (must_add_seg) { | |
| 1622 | + gen_op_addl_A0_seg(offsetof(CPUX86State,segs[override].base)); | |
| 1623 | + } | |
| 1624 | +} | |
| 1625 | + | |
| 1609 | 1626 | /* generate modrm memory load or store of 'reg'. TMP0 is used if reg != |
| 1610 | 1627 | OR_TMP0 */ |
| 1611 | 1628 | static void gen_ldst_modrm(DisasContext *s, int modrm, int ot, int reg, int is_store) |
| ... | ... | @@ -2193,6 +2210,22 @@ static void gen_movtl_T0_im(target_ulong val) |
| 2193 | 2210 | #endif |
| 2194 | 2211 | } |
| 2195 | 2212 | |
| 2213 | +static GenOpFunc1 *gen_ldq_env_A0[3] = { | |
| 2214 | + gen_op_ldq_raw_env_A0, | |
| 2215 | +#ifndef CONFIG_USER_ONLY | |
| 2216 | + gen_op_ldq_kernel_env_A0, | |
| 2217 | + gen_op_ldq_user_env_A0, | |
| 2218 | +#endif | |
| 2219 | +}; | |
| 2220 | + | |
| 2221 | +static GenOpFunc1 *gen_stq_env_A0[3] = { | |
| 2222 | + gen_op_stq_raw_env_A0, | |
| 2223 | +#ifndef CONFIG_USER_ONLY | |
| 2224 | + gen_op_stq_kernel_env_A0, | |
| 2225 | + gen_op_stq_user_env_A0, | |
| 2226 | +#endif | |
| 2227 | +}; | |
| 2228 | + | |
| 2196 | 2229 | static GenOpFunc1 *gen_ldo_env_A0[3] = { |
| 2197 | 2230 | gen_op_ldo_raw_env_A0, |
| 2198 | 2231 | #ifndef CONFIG_USER_ONLY |
| ... | ... | @@ -2209,6 +2242,693 @@ static GenOpFunc1 *gen_sto_env_A0[3] = { |
| 2209 | 2242 | #endif |
| 2210 | 2243 | }; |
| 2211 | 2244 | |
| 2245 | +#define SSE_SPECIAL ((GenOpFunc2 *)1) | |
| 2246 | + | |
| 2247 | +#define MMX_OP2(x) { gen_op_ ## x ## _mmx, gen_op_ ## x ## _xmm } | |
| 2248 | +#define SSE_FOP(x) { gen_op_ ## x ## ps, gen_op_ ## x ## pd, \ | |
| 2249 | + gen_op_ ## x ## ss, gen_op_ ## x ## sd, } | |
| 2250 | + | |
| 2251 | +static GenOpFunc2 *sse_op_table1[256][4] = { | |
| 2252 | + /* pure SSE operations */ | |
| 2253 | + [0x10] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */ | |
| 2254 | + [0x11] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */ | |
| 2255 | + [0x12] = { SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd */ | |
| 2256 | + [0x13] = { SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd */ | |
| 2257 | + [0x14] = { gen_op_punpckldq_xmm, gen_op_punpcklqdq_xmm }, | |
| 2258 | + [0x15] = { gen_op_punpckhdq_xmm, gen_op_punpckhqdq_xmm }, | |
| 2259 | + [0x16] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movhps, movhpd, movshdup */ | |
| 2260 | + [0x17] = { SSE_SPECIAL, SSE_SPECIAL }, /* movhps, movhpd */ | |
| 2261 | + | |
| 2262 | + [0x28] = { SSE_SPECIAL, SSE_SPECIAL }, /* movaps, movapd */ | |
| 2263 | + [0x29] = { SSE_SPECIAL, SSE_SPECIAL }, /* movaps, movapd */ | |
| 2264 | + [0x2a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtpi2ps, cvtpi2pd, cvtsi2ss, cvtsi2sd */ | |
| 2265 | + [0x2b] = { SSE_SPECIAL, SSE_SPECIAL }, /* movntps, movntpd */ | |
| 2266 | + [0x2c] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvttps2pi, cvttpd2pi, cvttsd2si, cvttss2si */ | |
| 2267 | + [0x2d] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtps2pi, cvtpd2pi, cvtsd2si, cvtss2si */ | |
| 2268 | + [0x2e] = { gen_op_ucomiss, gen_op_ucomisd }, | |
| 2269 | + [0x2f] = { gen_op_comiss, gen_op_comisd }, | |
| 2270 | + [0x50] = { SSE_SPECIAL, SSE_SPECIAL }, /* movmskps, movmskpd */ | |
| 2271 | + [0x51] = SSE_FOP(sqrt), | |
| 2272 | + [0x52] = { gen_op_rsqrtps, NULL, gen_op_rsqrtss, NULL }, | |
| 2273 | + [0x53] = { gen_op_rcpps, NULL, gen_op_rcpss, NULL }, | |
| 2274 | + [0x54] = { gen_op_pand_xmm, gen_op_pand_xmm }, /* andps, andpd */ | |
| 2275 | + [0x55] = { gen_op_pandn_xmm, gen_op_pandn_xmm }, /* andnps, andnpd */ | |
| 2276 | + [0x56] = { gen_op_por_xmm, gen_op_por_xmm }, /* orps, orpd */ | |
| 2277 | + [0x57] = { gen_op_pxor_xmm, gen_op_pxor_xmm }, /* xorps, xorpd */ | |
| 2278 | + [0x58] = SSE_FOP(add), | |
| 2279 | + [0x59] = SSE_FOP(mul), | |
| 2280 | + [0x5a] = { gen_op_cvtps2pd, gen_op_cvtpd2ps, | |
| 2281 | + gen_op_cvtss2sd, gen_op_cvtsd2ss }, | |
| 2282 | + [0x5b] = { gen_op_cvtdq2ps, gen_op_cvtps2dq, gen_op_cvttps2dq }, | |
| 2283 | + [0x5c] = SSE_FOP(sub), | |
| 2284 | + [0x5d] = SSE_FOP(min), | |
| 2285 | + [0x5e] = SSE_FOP(div), | |
| 2286 | + [0x5f] = SSE_FOP(max), | |
| 2287 | + | |
| 2288 | + [0xc2] = SSE_FOP(cmpeq), | |
| 2289 | + [0xc6] = { (GenOpFunc2 *)gen_op_pshufd_xmm, (GenOpFunc2 *)gen_op_shufpd }, | |
| 2290 | + | |
| 2291 | + /* MMX ops and their SSE extensions */ | |
| 2292 | + [0x60] = MMX_OP2(punpcklbw), | |
| 2293 | + [0x61] = MMX_OP2(punpcklwd), | |
| 2294 | + [0x62] = MMX_OP2(punpckldq), | |
| 2295 | + [0x63] = MMX_OP2(packsswb), | |
| 2296 | + [0x64] = MMX_OP2(pcmpgtb), | |
| 2297 | + [0x65] = MMX_OP2(pcmpgtw), | |
| 2298 | + [0x66] = MMX_OP2(pcmpgtl), | |
| 2299 | + [0x67] = MMX_OP2(packuswb), | |
| 2300 | + [0x68] = MMX_OP2(punpckhbw), | |
| 2301 | + [0x69] = MMX_OP2(punpckhwd), | |
| 2302 | + [0x6a] = MMX_OP2(punpckhdq), | |
| 2303 | + [0x6b] = MMX_OP2(packssdw), | |
| 2304 | + [0x6c] = { NULL, gen_op_punpcklqdq_xmm }, | |
| 2305 | + [0x6d] = { NULL, gen_op_punpckhqdq_xmm }, | |
| 2306 | + [0x6e] = { SSE_SPECIAL, SSE_SPECIAL }, /* movd mm, ea */ | |
| 2307 | + [0x6f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, , movqdu */ | |
| 2308 | + [0x70] = { (GenOpFunc2 *)gen_op_pshufw_mmx, | |
| 2309 | + (GenOpFunc2 *)gen_op_pshufd_xmm, | |
| 2310 | + (GenOpFunc2 *)gen_op_pshufhw_xmm, | |
| 2311 | + (GenOpFunc2 *)gen_op_pshuflw_xmm }, | |
| 2312 | + [0x71] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftw */ | |
| 2313 | + [0x72] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftd */ | |
| 2314 | + [0x73] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftq */ | |
| 2315 | + [0x74] = MMX_OP2(pcmpeqb), | |
| 2316 | + [0x75] = MMX_OP2(pcmpeqw), | |
| 2317 | + [0x76] = MMX_OP2(pcmpeql), | |
| 2318 | + [0x77] = { SSE_SPECIAL }, /* emms */ | |
| 2319 | + [0x7c] = { NULL, gen_op_haddpd, NULL, gen_op_haddps }, | |
| 2320 | + [0x7d] = { NULL, gen_op_hsubpd, NULL, gen_op_hsubps }, | |
| 2321 | + [0x7e] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movd, movd, , movq */ | |
| 2322 | + [0x7f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, movdqu */ | |
| 2323 | + [0xc4] = { SSE_SPECIAL, SSE_SPECIAL }, /* pinsrw */ | |
| 2324 | + [0xc5] = { SSE_SPECIAL, SSE_SPECIAL }, /* pextrw */ | |
| 2325 | + [0xd0] = { NULL, gen_op_addsubpd, NULL, gen_op_addsubps }, | |
| 2326 | + [0xd1] = MMX_OP2(psrlw), | |
| 2327 | + [0xd2] = MMX_OP2(psrld), | |
| 2328 | + [0xd3] = MMX_OP2(psrlq), | |
| 2329 | + [0xd4] = MMX_OP2(paddq), | |
| 2330 | + [0xd5] = MMX_OP2(pmullw), | |
| 2331 | + [0xd6] = { NULL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, | |
| 2332 | + [0xd7] = { SSE_SPECIAL, SSE_SPECIAL }, /* pmovmskb */ | |
| 2333 | + [0xd8] = MMX_OP2(psubusb), | |
| 2334 | + [0xd9] = MMX_OP2(psubusw), | |
| 2335 | + [0xda] = MMX_OP2(pminub), | |
| 2336 | + [0xdb] = MMX_OP2(pand), | |
| 2337 | + [0xdc] = MMX_OP2(paddusb), | |
| 2338 | + [0xdd] = MMX_OP2(paddusw), | |
| 2339 | + [0xde] = MMX_OP2(pmaxub), | |
| 2340 | + [0xdf] = MMX_OP2(pandn), | |
| 2341 | + [0xe0] = MMX_OP2(pavgb), | |
| 2342 | + [0xe1] = MMX_OP2(psraw), | |
| 2343 | + [0xe2] = MMX_OP2(psrad), | |
| 2344 | + [0xe3] = MMX_OP2(pavgw), | |
| 2345 | + [0xe4] = MMX_OP2(pmulhuw), | |
| 2346 | + [0xe5] = MMX_OP2(pmulhw), | |
| 2347 | + [0xe6] = { NULL, gen_op_cvttpd2dq, gen_op_cvtdq2pd, gen_op_cvtpd2dq }, | |
| 2348 | + [0xe7] = { SSE_SPECIAL , SSE_SPECIAL }, /* movntq, movntq */ | |
| 2349 | + [0xe8] = MMX_OP2(psubsb), | |
| 2350 | + [0xe9] = MMX_OP2(psubsw), | |
| 2351 | + [0xea] = MMX_OP2(pminsw), | |
| 2352 | + [0xeb] = MMX_OP2(por), | |
| 2353 | + [0xec] = MMX_OP2(paddsb), | |
| 2354 | + [0xed] = MMX_OP2(paddsw), | |
| 2355 | + [0xee] = MMX_OP2(pmaxsw), | |
| 2356 | + [0xef] = MMX_OP2(pxor), | |
| 2357 | + [0xf0] = { NULL, NULL, NULL, SSE_SPECIAL }, /* lddqu (PNI) */ | |
| 2358 | + [0xf1] = MMX_OP2(psllw), | |
| 2359 | + [0xf2] = MMX_OP2(pslld), | |
| 2360 | + [0xf3] = MMX_OP2(psllq), | |
| 2361 | + [0xf4] = MMX_OP2(pmuludq), | |
| 2362 | + [0xf5] = MMX_OP2(pmaddwd), | |
| 2363 | + [0xf6] = MMX_OP2(psadbw), | |
| 2364 | + [0xf7] = MMX_OP2(maskmov), | |
| 2365 | + [0xf8] = MMX_OP2(psubb), | |
| 2366 | + [0xf9] = MMX_OP2(psubw), | |
| 2367 | + [0xfa] = MMX_OP2(psubl), | |
| 2368 | + [0xfb] = MMX_OP2(psubq), | |
| 2369 | + [0xfc] = MMX_OP2(paddb), | |
| 2370 | + [0xfd] = MMX_OP2(paddw), | |
| 2371 | + [0xfe] = MMX_OP2(paddl), | |
| 2372 | +}; | |
| 2373 | + | |
| 2374 | +static GenOpFunc2 *sse_op_table2[3 * 8][2] = { | |
| 2375 | + [0 + 2] = MMX_OP2(psrlw), | |
| 2376 | + [0 + 4] = MMX_OP2(psraw), | |
| 2377 | + [0 + 6] = MMX_OP2(psllw), | |
| 2378 | + [8 + 2] = MMX_OP2(psrld), | |
| 2379 | + [8 + 4] = MMX_OP2(psrad), | |
| 2380 | + [8 + 6] = MMX_OP2(pslld), | |
| 2381 | + [16 + 2] = MMX_OP2(psrlq), | |
| 2382 | + [16 + 3] = { NULL, gen_op_psrldq_xmm }, | |
| 2383 | + [16 + 6] = MMX_OP2(psllq), | |
| 2384 | + [16 + 7] = { NULL, gen_op_pslldq_xmm }, | |
| 2385 | +}; | |
| 2386 | + | |
| 2387 | +static GenOpFunc1 *sse_op_table3[4 * 3] = { | |
| 2388 | + gen_op_cvtsi2ss, | |
| 2389 | + gen_op_cvtsi2sd, | |
| 2390 | + X86_64_ONLY(gen_op_cvtsq2ss), | |
| 2391 | + X86_64_ONLY(gen_op_cvtsq2sd), | |
| 2392 | + | |
| 2393 | + gen_op_cvttss2si, | |
| 2394 | + gen_op_cvttsd2si, | |
| 2395 | + X86_64_ONLY(gen_op_cvttss2sq), | |
| 2396 | + X86_64_ONLY(gen_op_cvttsd2sq), | |
| 2397 | + | |
| 2398 | + gen_op_cvtss2si, | |
| 2399 | + gen_op_cvtsd2si, | |
| 2400 | + X86_64_ONLY(gen_op_cvtss2sq), | |
| 2401 | + X86_64_ONLY(gen_op_cvtsd2sq), | |
| 2402 | +}; | |
| 2403 | + | |
| 2404 | +static GenOpFunc2 *sse_op_table4[8][4] = { | |
| 2405 | + SSE_FOP(cmpeq), | |
| 2406 | + SSE_FOP(cmplt), | |
| 2407 | + SSE_FOP(cmple), | |
| 2408 | + SSE_FOP(cmpunord), | |
| 2409 | + SSE_FOP(cmpneq), | |
| 2410 | + SSE_FOP(cmpnlt), | |
| 2411 | + SSE_FOP(cmpnle), | |
| 2412 | + SSE_FOP(cmpord), | |
| 2413 | +}; | |
| 2414 | + | |
| 2415 | +static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) | |
| 2416 | +{ | |
| 2417 | + int b1, op1_offset, op2_offset, is_xmm, val, ot; | |
| 2418 | + int modrm, mod, rm, reg, reg_addr, offset_addr; | |
| 2419 | + GenOpFunc2 *sse_op2; | |
| 2420 | + GenOpFunc3 *sse_op3; | |
| 2421 | + | |
| 2422 | + b &= 0xff; | |
| 2423 | + if (s->prefix & PREFIX_DATA) | |
| 2424 | + b1 = 1; | |
| 2425 | + else if (s->prefix & PREFIX_REPZ) | |
| 2426 | + b1 = 2; | |
| 2427 | + else if (s->prefix & PREFIX_REPNZ) | |
| 2428 | + b1 = 3; | |
| 2429 | + else | |
| 2430 | + b1 = 0; | |
| 2431 | + sse_op2 = sse_op_table1[b][b1]; | |
| 2432 | + if (!sse_op2) | |
| 2433 | + goto illegal_op; | |
| 2434 | + if (b <= 0x5f || b == 0xc6 || b == 0xc2) { | |
| 2435 | + is_xmm = 1; | |
| 2436 | + } else { | |
| 2437 | + if (b1 == 0) { | |
| 2438 | + /* MMX case */ | |
| 2439 | + is_xmm = 0; | |
| 2440 | + } else { | |
| 2441 | + is_xmm = 1; | |
| 2442 | + } | |
| 2443 | + } | |
| 2444 | + /* simple MMX/SSE operation */ | |
| 2445 | + if (s->flags & HF_TS_MASK) { | |
| 2446 | + gen_exception(s, EXCP07_PREX, pc_start - s->cs_base); | |
| 2447 | + return; | |
| 2448 | + } | |
| 2449 | + if (s->flags & HF_EM_MASK) { | |
| 2450 | + illegal_op: | |
| 2451 | + gen_exception(s, EXCP06_ILLOP, pc_start - s->cs_base); | |
| 2452 | + return; | |
| 2453 | + } | |
| 2454 | + if (is_xmm && !(s->flags & HF_OSFXSR_MASK)) | |
| 2455 | + goto illegal_op; | |
| 2456 | + if (b == 0x77) { | |
| 2457 | + /* emms */ | |
| 2458 | + gen_op_emms(); | |
| 2459 | + return; | |
| 2460 | + } | |
| 2461 | + /* prepare MMX state (XXX: optimize by storing fptt and fptags in | |
| 2462 | + the static cpu state) */ | |
| 2463 | + if (!is_xmm) { | |
| 2464 | + gen_op_enter_mmx(); | |
| 2465 | + } | |
| 2466 | + | |
| 2467 | + modrm = ldub_code(s->pc++); | |
| 2468 | + reg = ((modrm >> 3) & 7); | |
| 2469 | + if (is_xmm) | |
| 2470 | + reg |= rex_r; | |
| 2471 | + mod = (modrm >> 6) & 3; | |
| 2472 | + if (sse_op2 == SSE_SPECIAL) { | |
| 2473 | + b |= (b1 << 8); | |
| 2474 | + switch(b) { | |
| 2475 | + case 0x0e7: /* movntq */ | |
| 2476 | + if (mod == 3) | |
| 2477 | + goto illegal_op; | |
| 2478 | + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); | |
| 2479 | + gen_stq_env_A0[s->mem_index >> 2](offsetof(CPUX86State,fpregs[reg].mmx)); | |
| 2480 | + break; | |
| 2481 | + case 0x1e7: /* movntdq */ | |
| 2482 | + case 0x02b: /* movntps */ | |
| 2483 | + case 0x12b: /* movntps */ | |
| 2484 | + case 0x2f0: /* lddqu */ | |
| 2485 | + if (mod == 3) | |
| 2486 | + goto illegal_op; | |
| 2487 | + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); | |
| 2488 | + gen_sto_env_A0[s->mem_index >> 2](offsetof(CPUX86State,xmm_regs[reg])); | |
| 2489 | + break; | |
| 2490 | + case 0x6e: /* movd mm, ea */ | |
| 2491 | + gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 0); | |
| 2492 | + gen_op_movl_mm_T0_mmx(offsetof(CPUX86State,fpregs[reg].mmx)); | |
| 2493 | + break; | |
| 2494 | + case 0x16e: /* movd xmm, ea */ | |
| 2495 | + gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 0); | |
| 2496 | + gen_op_movl_mm_T0_xmm(offsetof(CPUX86State,xmm_regs[reg])); | |
| 2497 | + break; | |
| 2498 | + case 0x6f: /* movq mm, ea */ | |
| 2499 | + if (mod != 3) { | |
| 2500 | + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); | |
| 2501 | + gen_ldq_env_A0[s->mem_index >> 2](offsetof(CPUX86State,fpregs[reg].mmx)); | |
| 2502 | + } else { | |
| 2503 | + rm = (modrm & 7); | |
| 2504 | + gen_op_movq(offsetof(CPUX86State,fpregs[reg].mmx), | |
| 2505 | + offsetof(CPUX86State,fpregs[rm].mmx)); | |
| 2506 | + } | |
| 2507 | + break; | |
| 2508 | + case 0x010: /* movups */ | |
| 2509 | + case 0x110: /* movupd */ | |
| 2510 | + case 0x028: /* movaps */ | |
| 2511 | + case 0x128: /* movapd */ | |
| 2512 | + case 0x16f: /* movdqa xmm, ea */ | |
| 2513 | + case 0x26f: /* movdqu xmm, ea */ | |
| 2514 | + if (mod != 3) { | |
| 2515 | + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); | |
| 2516 | + gen_ldo_env_A0[s->mem_index >> 2](offsetof(CPUX86State,xmm_regs[reg])); | |
| 2517 | + } else { | |
| 2518 | + rm = (modrm & 7) | REX_B(s); | |
| 2519 | + gen_op_movo(offsetof(CPUX86State,xmm_regs[reg]), | |
| 2520 | + offsetof(CPUX86State,xmm_regs[rm])); | |
| 2521 | + } | |
| 2522 | + break; | |
| 2523 | + case 0x210: /* movss xmm, ea */ | |
| 2524 | + if (mod != 3) { | |
| 2525 | + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); | |
| 2526 | + gen_op_ld_T0_A0[OT_LONG + s->mem_index](); | |
| 2527 | + gen_op_movl_env_T0(offsetof(CPUX86State,xmm_regs[reg].XMM_L(0))); | |
| 2528 | + gen_op_movl_T0_0(); | |
| 2529 | + gen_op_movl_env_T0(offsetof(CPUX86State,xmm_regs[reg].XMM_L(1))); | |
| 2530 | + gen_op_movl_env_T0(offsetof(CPUX86State,xmm_regs[reg].XMM_L(2))); | |
| 2531 | + gen_op_movl_env_T0(offsetof(CPUX86State,xmm_regs[reg].XMM_L(3))); | |
| 2532 | + } else { | |
| 2533 | + rm = (modrm & 7) | REX_B(s); | |
| 2534 | + gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)), | |
| 2535 | + offsetof(CPUX86State,xmm_regs[rm].XMM_L(0))); | |
| 2536 | + } | |
| 2537 | + break; | |
| 2538 | + case 0x310: /* movsd xmm, ea */ | |
| 2539 | + if (mod != 3) { | |
| 2540 | + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); | |
| 2541 | + gen_ldq_env_A0[s->mem_index >> 2](offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0))); | |
| 2542 | + gen_op_movl_T0_0(); | |
| 2543 | + gen_op_movl_env_T0(offsetof(CPUX86State,xmm_regs[reg].XMM_L(2))); | |
| 2544 | + gen_op_movl_env_T0(offsetof(CPUX86State,xmm_regs[reg].XMM_L(3))); | |
| 2545 | + } else { | |
| 2546 | + rm = (modrm & 7) | REX_B(s); | |
| 2547 | + gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)), | |
| 2548 | + offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0))); | |
| 2549 | + } | |
| 2550 | + break; | |
| 2551 | + case 0x012: /* movlps */ | |
| 2552 | + case 0x112: /* movlpd */ | |
| 2553 | + if (mod != 3) { | |
| 2554 | + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); | |
| 2555 | + gen_ldq_env_A0[s->mem_index >> 2](offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0))); | |
| 2556 | + } else { | |
| 2557 | + /* movhlps */ | |
| 2558 | + rm = (modrm & 7) | REX_B(s); | |
| 2559 | + gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)), | |
| 2560 | + offsetof(CPUX86State,xmm_regs[rm].XMM_Q(1))); | |
| 2561 | + } | |
| 2562 | + break; | |
| 2563 | + case 0x016: /* movhps */ | |
| 2564 | + case 0x116: /* movhpd */ | |
| 2565 | + if (mod != 3) { | |
| 2566 | + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); | |
| 2567 | + gen_ldq_env_A0[s->mem_index >> 2](offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1))); | |
| 2568 | + } else { | |
| 2569 | + /* movlhps */ | |
| 2570 | + rm = (modrm & 7) | REX_B(s); | |
| 2571 | + gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)), | |
| 2572 | + offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0))); | |
| 2573 | + } | |
| 2574 | + break; | |
| 2575 | + case 0x216: /* movshdup */ | |
| 2576 | + if (mod != 3) { | |
| 2577 | + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); | |
| 2578 | + gen_ldo_env_A0[s->mem_index >> 2](offsetof(CPUX86State,xmm_regs[reg])); | |
| 2579 | + } else { | |
| 2580 | + rm = (modrm & 7) | REX_B(s); | |
| 2581 | + gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(1)), | |
| 2582 | + offsetof(CPUX86State,xmm_regs[rm].XMM_L(1))); | |
| 2583 | + gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)), | |
| 2584 | + offsetof(CPUX86State,xmm_regs[rm].XMM_L(3))); | |
| 2585 | + } | |
| 2586 | + gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)), | |
| 2587 | + offsetof(CPUX86State,xmm_regs[reg].XMM_L(1))); | |
| 2588 | + gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)), | |
| 2589 | + offsetof(CPUX86State,xmm_regs[reg].XMM_L(3))); | |
| 2590 | + break; | |
| 2591 | + case 0x7e: /* movd ea, mm */ | |
| 2592 | + gen_op_movl_T0_mm_mmx(offsetof(CPUX86State,fpregs[reg].mmx)); | |
| 2593 | + gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 1); | |
| 2594 | + break; | |
| 2595 | + case 0x17e: /* movd ea, xmm */ | |
| 2596 | + gen_op_movl_T0_mm_xmm(offsetof(CPUX86State,xmm_regs[reg])); | |
| 2597 | + gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 1); | |
| 2598 | + break; | |
| 2599 | + case 0x27e: /* movq xmm, ea */ | |
| 2600 | + if (mod != 3) { | |
| 2601 | + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); | |
| 2602 | + gen_ldq_env_A0[s->mem_index >> 2](offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0))); | |
| 2603 | + } else { | |
| 2604 | + rm = (modrm & 7) | REX_B(s); | |
| 2605 | + gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)), | |
| 2606 | + offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0))); | |
| 2607 | + } | |
| 2608 | + gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1))); | |
| 2609 | + break; | |
| 2610 | + case 0x7f: /* movq ea, mm */ | |
| 2611 | + if (mod != 3) { | |
| 2612 | + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); | |
| 2613 | + gen_stq_env_A0[s->mem_index >> 2](offsetof(CPUX86State,fpregs[reg].mmx)); | |
| 2614 | + } else { | |
| 2615 | + rm = (modrm & 7); | |
| 2616 | + gen_op_movq(offsetof(CPUX86State,fpregs[rm].mmx), | |
| 2617 | + offsetof(CPUX86State,fpregs[reg].mmx)); | |
| 2618 | + } | |
| 2619 | + break; | |
| 2620 | + case 0x011: /* movups */ | |
| 2621 | + case 0x111: /* movupd */ | |
| 2622 | + case 0x029: /* movaps */ | |
| 2623 | + case 0x129: /* movapd */ | |
| 2624 | + case 0x17f: /* movdqa ea, xmm */ | |
| 2625 | + case 0x27f: /* movdqu ea, xmm */ | |
| 2626 | + if (mod != 3) { | |
| 2627 | + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); | |
| 2628 | + gen_sto_env_A0[s->mem_index >> 2](offsetof(CPUX86State,xmm_regs[reg])); | |
| 2629 | + } else { | |
| 2630 | + rm = (modrm & 7) | REX_B(s); | |
| 2631 | + gen_op_movo(offsetof(CPUX86State,xmm_regs[rm]), | |
| 2632 | + offsetof(CPUX86State,xmm_regs[reg])); | |
| 2633 | + } | |
| 2634 | + break; | |
| 2635 | + case 0x211: /* movss ea, xmm */ | |
| 2636 | + if (mod != 3) { | |
| 2637 | + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); | |
| 2638 | + gen_op_movl_T0_env(offsetof(CPUX86State,xmm_regs[reg].XMM_L(0))); | |
| 2639 | + gen_op_st_T0_A0[OT_LONG + s->mem_index](); | |
| 2640 | + } else { | |
| 2641 | + rm = (modrm & 7) | REX_B(s); | |
| 2642 | + gen_op_movl(offsetof(CPUX86State,xmm_regs[rm].XMM_L(0)), | |
| 2643 | + offsetof(CPUX86State,xmm_regs[reg].XMM_L(0))); | |
| 2644 | + } | |
| 2645 | + break; | |
| 2646 | + case 0x311: /* movsd ea, xmm */ | |
| 2647 | + if (mod != 3) { | |
| 2648 | + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); | |
| 2649 | + gen_stq_env_A0[s->mem_index >> 2](offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0))); | |
| 2650 | + } else { | |
| 2651 | + rm = (modrm & 7) | REX_B(s); | |
| 2652 | + gen_op_movq(offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)), | |
| 2653 | + offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0))); | |
| 2654 | + } | |
| 2655 | + break; | |
| 2656 | + case 0x013: /* movlps */ | |
| 2657 | + case 0x113: /* movlpd */ | |
| 2658 | + if (mod != 3) { | |
| 2659 | + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); | |
| 2660 | + gen_stq_env_A0[s->mem_index >> 2](offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0))); | |
| 2661 | + } else { | |
| 2662 | + goto illegal_op; | |
| 2663 | + } | |
| 2664 | + break; | |
| 2665 | + case 0x017: /* movhps */ | |
| 2666 | + case 0x117: /* movhpd */ | |
| 2667 | + if (mod != 3) { | |
| 2668 | + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); | |
| 2669 | + gen_stq_env_A0[s->mem_index >> 2](offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1))); | |
| 2670 | + } else { | |
| 2671 | + goto illegal_op; | |
| 2672 | + } | |
| 2673 | + break; | |
| 2674 | + case 0x71: /* shift mm, im */ | |
| 2675 | + case 0x72: | |
| 2676 | + case 0x73: | |
| 2677 | + case 0x171: /* shift xmm, im */ | |
| 2678 | + case 0x172: | |
| 2679 | + case 0x173: | |
| 2680 | + val = ldub_code(s->pc++); | |
| 2681 | + if (is_xmm) { | |
| 2682 | + gen_op_movl_T0_im(val); | |
| 2683 | + gen_op_movl_env_T0(offsetof(CPUX86State,xmm_t0.XMM_L(0))); | |
| 2684 | + gen_op_movl_T0_0(); | |
| 2685 | + gen_op_movl_env_T0(offsetof(CPUX86State,xmm_t0.XMM_L(1))); | |
| 2686 | + op1_offset = offsetof(CPUX86State,xmm_t0); | |
| 2687 | + } else { | |
| 2688 | + gen_op_movl_T0_im(val); | |
| 2689 | + gen_op_movl_env_T0(offsetof(CPUX86State,mmx_t0.MMX_L(0))); | |
| 2690 | + gen_op_movl_T0_0(); | |
| 2691 | + gen_op_movl_env_T0(offsetof(CPUX86State,mmx_t0.MMX_L(1))); | |
| 2692 | + op1_offset = offsetof(CPUX86State,mmx_t0); | |
| 2693 | + } | |
| 2694 | + sse_op2 = sse_op_table2[((b - 1) & 3) * 8 + (((modrm >> 3)) & 7)][b1]; | |
| 2695 | + if (!sse_op2) | |
| 2696 | + goto illegal_op; | |
| 2697 | + if (is_xmm) { | |
| 2698 | + rm = (modrm & 7) | REX_B(s); | |
| 2699 | + op2_offset = offsetof(CPUX86State,xmm_regs[rm]); | |
| 2700 | + } else { | |
| 2701 | + rm = (modrm & 7); | |
| 2702 | + op2_offset = offsetof(CPUX86State,fpregs[rm].mmx); | |
| 2703 | + } | |
| 2704 | + sse_op2(op2_offset, op1_offset); | |
| 2705 | + break; | |
| 2706 | + case 0x050: /* movmskps */ | |
| 2707 | + gen_op_movmskps(offsetof(CPUX86State,xmm_regs[reg])); | |
| 2708 | + rm = (modrm & 7) | REX_B(s); | |
| 2709 | + gen_op_mov_reg_T0[OT_LONG][rm](); | |
| 2710 | + break; | |
| 2711 | + case 0x150: /* movmskpd */ | |
| 2712 | + gen_op_movmskpd(offsetof(CPUX86State,xmm_regs[reg])); | |
| 2713 | + rm = (modrm & 7) | REX_B(s); | |
| 2714 | + gen_op_mov_reg_T0[OT_LONG][rm](); | |
| 2715 | + break; | |
| 2716 | + case 0x02a: /* cvtpi2ps */ | |
| 2717 | + case 0x12a: /* cvtpi2pd */ | |
| 2718 | + gen_op_enter_mmx(); | |
| 2719 | + if (mod != 3) { | |
| 2720 | + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); | |
| 2721 | + op2_offset = offsetof(CPUX86State,mmx_t0); | |
| 2722 | + gen_ldq_env_A0[s->mem_index >> 2](op2_offset); | |
| 2723 | + } else { | |
| 2724 | + rm = (modrm & 7); | |
| 2725 | + op2_offset = offsetof(CPUX86State,fpregs[rm].mmx); | |
| 2726 | + } | |
| 2727 | + op1_offset = offsetof(CPUX86State,xmm_regs[reg]); | |
| 2728 | + switch(b >> 8) { | |
| 2729 | + case 0x0: | |
| 2730 | + gen_op_cvtpi2ps(op1_offset, op2_offset); | |
| 2731 | + break; | |
| 2732 | + default: | |
| 2733 | + case 0x1: | |
| 2734 | + gen_op_cvtpi2pd(op1_offset, op2_offset); | |
| 2735 | + break; | |
| 2736 | + } | |
| 2737 | + break; | |
| 2738 | + case 0x22a: /* cvtsi2ss */ | |
| 2739 | + case 0x32a: /* cvtsi2sd */ | |
| 2740 | + ot = (s->dflag == 2) ? OT_QUAD : OT_LONG; | |
| 2741 | + gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0); | |
| 2742 | + op1_offset = offsetof(CPUX86State,xmm_regs[reg]); | |
| 2743 | + sse_op_table3[(s->dflag == 2) * 2 + ((b >> 8) - 2)](op1_offset); | |
| 2744 | + break; | |
| 2745 | + case 0x02c: /* cvttps2pi */ | |
| 2746 | + case 0x12c: /* cvttpd2pi */ | |
| 2747 | + case 0x02d: /* cvtps2pi */ | |
| 2748 | + case 0x12d: /* cvtpd2pi */ | |
| 2749 | + gen_op_enter_mmx(); | |
| 2750 | + if (mod != 3) { | |
| 2751 | + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); | |
| 2752 | + op2_offset = offsetof(CPUX86State,xmm_t0); | |
| 2753 | + gen_ldo_env_A0[s->mem_index >> 2](op2_offset); | |
| 2754 | + } else { | |
| 2755 | + rm = (modrm & 7) | REX_B(s); | |
| 2756 | + op2_offset = offsetof(CPUX86State,xmm_regs[rm]); | |
| 2757 | + } | |
| 2758 | + op1_offset = offsetof(CPUX86State,fpregs[reg & 7].mmx); | |
| 2759 | + switch(b) { | |
| 2760 | + case 0x02c: | |
| 2761 | + gen_op_cvttps2pi(op1_offset, op2_offset); | |
| 2762 | + break; | |
| 2763 | + case 0x12c: | |
| 2764 | + gen_op_cvttpd2pi(op1_offset, op2_offset); | |
| 2765 | + break; | |
| 2766 | + case 0x02d: | |
| 2767 | + gen_op_cvtps2pi(op1_offset, op2_offset); | |
| 2768 | + break; | |
| 2769 | + case 0x12d: | |
| 2770 | + gen_op_cvtpd2pi(op1_offset, op2_offset); | |
| 2771 | + break; | |
| 2772 | + } | |
| 2773 | + break; | |
| 2774 | + case 0x22c: /* cvttss2si */ | |
| 2775 | + case 0x32c: /* cvttsd2si */ | |
| 2776 | + case 0x22d: /* cvtss2si */ | |
| 2777 | + case 0x32d: /* cvtsd2si */ | |
| 2778 | + ot = (s->dflag == 2) ? OT_QUAD : OT_LONG; | |
| 2779 | + op1_offset = offsetof(CPUX86State,xmm_regs[reg]); | |
| 2780 | + sse_op_table3[(s->dflag == 2) * 2 + ((b >> 8) - 2) + 4 + | |
| 2781 | + (b & 1) * 4](op1_offset); | |
| 2782 | + gen_ldst_modrm(s, modrm, ot, OR_TMP0, 1); | |
| 2783 | + break; | |
| 2784 | + case 0xc4: /* pinsrw */ | |
| 2785 | + case 0x1c4: | |
| 2786 | + gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0); | |
| 2787 | + val = ldub_code(s->pc++); | |
| 2788 | + if (b1) { | |
| 2789 | + val &= 7; | |
| 2790 | + gen_op_pinsrw_xmm(offsetof(CPUX86State,xmm_regs[reg]), val); | |
| 2791 | + } else { | |
| 2792 | + val &= 3; | |
| 2793 | + gen_op_pinsrw_mmx(offsetof(CPUX86State,fpregs[reg].mmx), val); | |
| 2794 | + } | |
| 2795 | + break; | |
| 2796 | + case 0xc5: /* pextrw */ | |
| 2797 | + case 0x1c5: | |
| 2798 | + if (mod != 3) | |
| 2799 | + goto illegal_op; | |
| 2800 | + val = ldub_code(s->pc++); | |
| 2801 | + if (b1) { | |
| 2802 | + val &= 7; | |
| 2803 | + rm = (modrm & 7) | REX_B(s); | |
| 2804 | + gen_op_pextrw_xmm(offsetof(CPUX86State,xmm_regs[rm]), val); | |
| 2805 | + } else { | |
| 2806 | + val &= 3; | |
| 2807 | + rm = (modrm & 7); | |
| 2808 | + gen_op_pextrw_mmx(offsetof(CPUX86State,fpregs[rm].mmx), val); | |
| 2809 | + } | |
| 2810 | + reg = ((modrm >> 3) & 7) | rex_r; | |
| 2811 | + gen_op_mov_reg_T0[OT_LONG][reg](); | |
| 2812 | + break; | |
| 2813 | + case 0x1d6: /* movq ea, xmm */ | |
| 2814 | + if (mod != 3) { | |
| 2815 | + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); | |
| 2816 | + gen_stq_env_A0[s->mem_index >> 2](offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0))); | |
| 2817 | + } else { | |
| 2818 | + rm = (modrm & 7) | REX_B(s); | |
| 2819 | + gen_op_movq(offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)), | |
| 2820 | + offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0))); | |
| 2821 | + gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[rm].XMM_Q(1))); | |
| 2822 | + } | |
| 2823 | + break; | |
| 2824 | + case 0x2d6: /* movq2dq */ | |
| 2825 | + gen_op_enter_mmx(); | |
| 2826 | + rm = (modrm & 7) | REX_B(s); | |
| 2827 | + gen_op_movq(offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)), | |
| 2828 | + offsetof(CPUX86State,fpregs[reg & 7].mmx)); | |
| 2829 | + gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[rm].XMM_Q(1))); | |
| 2830 | + break; | |
| 2831 | + case 0x3d6: /* movdq2q */ | |
| 2832 | + gen_op_enter_mmx(); | |
| 2833 | + rm = (modrm & 7); | |
| 2834 | + gen_op_movq(offsetof(CPUX86State,fpregs[rm].mmx), | |
| 2835 | + offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0))); | |
| 2836 | + break; | |
| 2837 | + case 0xd7: /* pmovmskb */ | |
| 2838 | + case 0x1d7: | |
| 2839 | + if (mod != 3) | |
| 2840 | + goto illegal_op; | |
| 2841 | + if (b1) { | |
| 2842 | + rm = (modrm & 7) | REX_B(s); | |
| 2843 | + gen_op_pmovmskb_xmm(offsetof(CPUX86State,xmm_regs[rm])); | |
| 2844 | + } else { | |
| 2845 | + rm = (modrm & 7); | |
| 2846 | + gen_op_pmovmskb_mmx(offsetof(CPUX86State,fpregs[rm].mmx)); | |
| 2847 | + } | |
| 2848 | + reg = ((modrm >> 3) & 7) | rex_r; | |
| 2849 | + gen_op_mov_reg_T0[OT_LONG][reg](); | |
| 2850 | + break; | |
| 2851 | + default: | |
| 2852 | + goto illegal_op; | |
| 2853 | + } | |
| 2854 | + } else { | |
| 2855 | + /* generic MMX or SSE operation */ | |
| 2856 | + if (b == 0xf7) { | |
| 2857 | + /* maskmov : we must prepare A0 */ | |
| 2858 | + if (mod != 3) | |
| 2859 | + goto illegal_op; | |
| 2860 | +#ifdef TARGET_X86_64 | |
| 2861 | + if (CODE64(s)) { | |
| 2862 | + gen_op_movq_A0_reg[R_EDI](); | |
| 2863 | + } else | |
| 2864 | +#endif | |
| 2865 | + { | |
| 2866 | + gen_op_movl_A0_reg[R_EDI](); | |
| 2867 | + if (s->aflag == 0) | |
| 2868 | + gen_op_andl_A0_ffff(); | |
| 2869 | + } | |
| 2870 | + gen_add_A0_ds_seg(s); | |
| 2871 | + } | |
| 2872 | + if (is_xmm) { | |
| 2873 | + op1_offset = offsetof(CPUX86State,xmm_regs[reg]); | |
| 2874 | + if (mod != 3) { | |
| 2875 | + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); | |
| 2876 | + op2_offset = offsetof(CPUX86State,xmm_t0); | |
| 2877 | + if (b1 >= 2 && ((b >= 0x50 && b <= 0x5f) || | |
| 2878 | + b == 0xc2)) { | |
| 2879 | + /* specific case for SSE single instructions */ | |
| 2880 | + if (b1 == 2) { | |
| 2881 | + /* 32 bit access */ | |
| 2882 | + gen_op_ld_T0_A0[OT_LONG + s->mem_index](); | |
| 2883 | + gen_op_movl_env_T0(offsetof(CPUX86State,xmm_t0.XMM_L(0))); | |
| 2884 | + } else { | |
| 2885 | + /* 64 bit access */ | |
| 2886 | + gen_ldq_env_A0[s->mem_index >> 2](offsetof(CPUX86State,xmm_t0.XMM_D(0))); | |
| 2887 | + } | |
| 2888 | + } else { | |
| 2889 | + gen_ldo_env_A0[s->mem_index >> 2](op2_offset); | |
| 2890 | + } | |
| 2891 | + } else { | |
| 2892 | + rm = (modrm & 7) | REX_B(s); | |
| 2893 | + op2_offset = offsetof(CPUX86State,xmm_regs[rm]); | |
| 2894 | + } | |
| 2895 | + } else { | |
| 2896 | + op1_offset = offsetof(CPUX86State,fpregs[reg].mmx); | |
| 2897 | + if (mod != 3) { | |
| 2898 | + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); | |
| 2899 | + op2_offset = offsetof(CPUX86State,mmx_t0); | |
| 2900 | + gen_ldq_env_A0[s->mem_index >> 2](op2_offset); | |
| 2901 | + } else { | |
| 2902 | + rm = (modrm & 7); | |
| 2903 | + op2_offset = offsetof(CPUX86State,fpregs[rm].mmx); | |
| 2904 | + } | |
| 2905 | + } | |
| 2906 | + switch(b) { | |
| 2907 | + case 0x70: /* pshufx insn */ | |
| 2908 | + case 0xc6: /* pshufx insn */ | |
| 2909 | + val = ldub_code(s->pc++); | |
| 2910 | + sse_op3 = (GenOpFunc3 *)sse_op2; | |
| 2911 | + sse_op3(op1_offset, op2_offset, val); | |
| 2912 | + break; | |
| 2913 | + case 0xc2: | |
| 2914 | + /* compare insns */ | |
| 2915 | + val = ldub_code(s->pc++); | |
| 2916 | + if (val >= 8) | |
| 2917 | + goto illegal_op; | |
| 2918 | + sse_op2 = sse_op_table4[val][b1]; | |
| 2919 | + sse_op2(op1_offset, op2_offset); | |
| 2920 | + break; | |
| 2921 | + default: | |
| 2922 | + sse_op2(op1_offset, op2_offset); | |
| 2923 | + break; | |
| 2924 | + } | |
| 2925 | + if (b == 0x2e || b == 0x2f) { | |
| 2926 | + s->cc_op = CC_OP_EFLAGS; | |
| 2927 | + } | |
| 2928 | + } | |
| 2929 | +} | |
| 2930 | + | |
| 2931 | + | |
| 2212 | 2932 | /* convert one instruction. s->is_jmp is set if the translation must |
| 2213 | 2933 | be stopped. Return the next pc value */ |
| 2214 | 2934 | static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) |
| ... | ... | @@ -3176,20 +3896,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) |
| 3176 | 3896 | } |
| 3177 | 3897 | gen_op_movl_A0_im(offset_addr); |
| 3178 | 3898 | } |
| 3179 | - /* handle override */ | |
| 3180 | - { | |
| 3181 | - int override, must_add_seg; | |
| 3182 | - must_add_seg = s->addseg; | |
| 3183 | - if (s->override >= 0) { | |
| 3184 | - override = s->override; | |
| 3185 | - must_add_seg = 1; | |
| 3186 | - } else { | |
| 3187 | - override = R_DS; | |
| 3188 | - } | |
| 3189 | - if (must_add_seg) { | |
| 3190 | - gen_op_addl_A0_seg(offsetof(CPUX86State,segs[override].base)); | |
| 3191 | - } | |
| 3192 | - } | |
| 3899 | + gen_add_A0_ds_seg(s); | |
| 3193 | 3900 | if ((b & 2) == 0) { |
| 3194 | 3901 | gen_op_ld_T0_A0[ot + s->mem_index](); |
| 3195 | 3902 | gen_op_mov_reg_T0[ot][R_EAX](); |
| ... | ... | @@ -3212,21 +3919,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) |
| 3212 | 3919 | if (s->aflag == 0) |
| 3213 | 3920 | gen_op_andl_A0_ffff(); |
| 3214 | 3921 | } |
| 3215 | - /* handle override */ | |
| 3216 | - { | |
| 3217 | - int override, must_add_seg; | |
| 3218 | - must_add_seg = s->addseg; | |
| 3219 | - override = R_DS; | |
| 3220 | - if (s->override >= 0) { | |
| 3221 | - override = s->override; | |
| 3222 | - must_add_seg = 1; | |
| 3223 | - } else { | |
| 3224 | - override = R_DS; | |
| 3225 | - } | |
| 3226 | - if (must_add_seg) { | |
| 3227 | - gen_op_addl_A0_seg(offsetof(CPUX86State,segs[override].base)); | |
| 3228 | - } | |
| 3229 | - } | |
| 3922 | + gen_add_A0_ds_seg(s); | |
| 3230 | 3923 | gen_op_ldu_T0_A0[OT_BYTE + s->mem_index](); |
| 3231 | 3924 | gen_op_mov_reg_T0[OT_BYTE][R_EAX](); |
| 3232 | 3925 | break; |
| ... | ... | @@ -4827,33 +5520,6 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) |
| 4827 | 5520 | /* nothing to do */ |
| 4828 | 5521 | } |
| 4829 | 5522 | break; |
| 4830 | - case 0x1ae: | |
| 4831 | - modrm = ldub_code(s->pc++); | |
| 4832 | - mod = (modrm >> 6) & 3; | |
| 4833 | - op = (modrm >> 3) & 7; | |
| 4834 | - switch(op) { | |
| 4835 | - case 0: /* fxsave */ | |
| 4836 | - if (mod == 3 || !(s->cpuid_features & CPUID_FXSR)) | |
| 4837 | - goto illegal_op; | |
| 4838 | - gen_lea_modrm(s, modrm, ®_addr, &offset_addr); | |
| 4839 | - gen_op_fxsave_A0((s->dflag == 2)); | |
| 4840 | - break; | |
| 4841 | - case 1: /* fxrstor */ | |
| 4842 | - if (mod == 3 || !(s->cpuid_features & CPUID_FXSR)) | |
| 4843 | - goto illegal_op; | |
| 4844 | - gen_lea_modrm(s, modrm, ®_addr, &offset_addr); | |
| 4845 | - gen_op_fxrstor_A0((s->dflag == 2)); | |
| 4846 | - break; | |
| 4847 | - case 5: /* lfence */ | |
| 4848 | - case 6: /* mfence */ | |
| 4849 | - case 7: /* sfence */ | |
| 4850 | - if ((modrm & 0xc7) != 0xc0 || !(s->cpuid_features & CPUID_SSE)) | |
| 4851 | - goto illegal_op; | |
| 4852 | - break; | |
| 4853 | - default: | |
| 4854 | - goto illegal_op; | |
| 4855 | - } | |
| 4856 | - break; | |
| 4857 | 5523 | case 0x63: /* arpl or movslS (x86_64) */ |
| 4858 | 5524 | #ifdef TARGET_X86_64 |
| 4859 | 5525 | if (CODE64(s)) { |
| ... | ... | @@ -5018,65 +5684,73 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) |
| 5018 | 5684 | gen_eob(s); |
| 5019 | 5685 | } |
| 5020 | 5686 | break; |
| 5021 | - /* SSE support */ | |
| 5022 | - case 0x16f: | |
| 5023 | - if (prefixes & PREFIX_DATA) { | |
| 5024 | - /* movdqa xmm1, xmm2/mem128 */ | |
| 5025 | - if (!(s->cpuid_features & CPUID_SSE)) | |
| 5026 | - goto illegal_op; | |
| 5027 | - modrm = ldub_code(s->pc++); | |
| 5028 | - reg = ((modrm >> 3) & 7) | rex_r; | |
| 5029 | - mod = (modrm >> 6) & 3; | |
| 5030 | - if (mod != 3) { | |
| 5031 | - gen_lea_modrm(s, modrm, ®_addr, &offset_addr); | |
| 5032 | - gen_ldo_env_A0[s->mem_index >> 2](offsetof(CPUX86State,xmm_regs[reg])); | |
| 5033 | - } else { | |
| 5034 | - rm = (modrm & 7) | REX_B(s); | |
| 5035 | - gen_op_movo(offsetof(CPUX86State,xmm_regs[reg]), | |
| 5036 | - offsetof(CPUX86State,xmm_regs[rm])); | |
| 5037 | - } | |
| 5038 | - } else { | |
| 5687 | + /* MMX/SSE/SSE2/PNI support */ | |
| 5688 | + case 0x1c3: /* MOVNTI reg, mem */ | |
| 5689 | + if (!(s->cpuid_features & CPUID_SSE2)) | |
| 5039 | 5690 | goto illegal_op; |
| 5040 | - } | |
| 5691 | + ot = s->dflag == 2 ? OT_QUAD : OT_LONG; | |
| 5692 | + modrm = ldub_code(s->pc++); | |
| 5693 | + mod = (modrm >> 6) & 3; | |
| 5694 | + if (mod == 3) | |
| 5695 | + goto illegal_op; | |
| 5696 | + reg = ((modrm >> 3) & 7) | rex_r; | |
| 5697 | + /* generate a generic store */ | |
| 5698 | + gen_ldst_modrm(s, modrm, ot, reg, 1); | |
| 5041 | 5699 | break; |
| 5042 | - case 0x1e7: | |
| 5043 | - if (prefixes & PREFIX_DATA) { | |
| 5044 | - /* movntdq mem128, xmm1 */ | |
| 5045 | - if (!(s->cpuid_features & CPUID_SSE)) | |
| 5700 | + case 0x1ae: | |
| 5701 | + modrm = ldub_code(s->pc++); | |
| 5702 | + mod = (modrm >> 6) & 3; | |
| 5703 | + op = (modrm >> 3) & 7; | |
| 5704 | + switch(op) { | |
| 5705 | + case 0: /* fxsave */ | |
| 5706 | + if (mod == 3 || !(s->cpuid_features & CPUID_FXSR)) | |
| 5046 | 5707 | goto illegal_op; |
| 5047 | - modrm = ldub_code(s->pc++); | |
| 5048 | - reg = ((modrm >> 3) & 7) | rex_r; | |
| 5049 | - mod = (modrm >> 6) & 3; | |
| 5050 | - if (mod != 3) { | |
| 5051 | - gen_lea_modrm(s, modrm, ®_addr, &offset_addr); | |
| 5052 | - gen_sto_env_A0[s->mem_index >> 2](offsetof(CPUX86State,xmm_regs[reg])); | |
| 5053 | - } else { | |
| 5708 | + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); | |
| 5709 | + gen_op_fxsave_A0((s->dflag == 2)); | |
| 5710 | + break; | |
| 5711 | + case 1: /* fxrstor */ | |
| 5712 | + if (mod == 3 || !(s->cpuid_features & CPUID_FXSR)) | |
| 5054 | 5713 | goto illegal_op; |
| 5714 | + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); | |
| 5715 | + gen_op_fxrstor_A0((s->dflag == 2)); | |
| 5716 | + break; | |
| 5717 | + case 2: /* ldmxcsr */ | |
| 5718 | + case 3: /* stmxcsr */ | |
| 5719 | + if (s->flags & HF_TS_MASK) { | |
| 5720 | + gen_exception(s, EXCP07_PREX, pc_start - s->cs_base); | |
| 5721 | + break; | |
| 5055 | 5722 | } |
| 5056 | - } else { | |
| 5057 | - goto illegal_op; | |
| 5058 | - } | |
| 5059 | - break; | |
| 5060 | - case 0x17f: | |
| 5061 | - if (prefixes & PREFIX_DATA) { | |
| 5062 | - /* movdqa xmm2/mem128, xmm1 */ | |
| 5063 | - if (!(s->cpuid_features & CPUID_SSE)) | |
| 5723 | + if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK) || | |
| 5724 | + mod == 3) | |
| 5064 | 5725 | goto illegal_op; |
| 5065 | - modrm = ldub_code(s->pc++); | |
| 5066 | - reg = ((modrm >> 3) & 7) | rex_r; | |
| 5067 | - mod = (modrm >> 6) & 3; | |
| 5068 | - if (mod != 3) { | |
| 5069 | - gen_lea_modrm(s, modrm, ®_addr, &offset_addr); | |
| 5070 | - gen_sto_env_A0[s->mem_index >> 2](offsetof(CPUX86State,xmm_regs[reg])); | |
| 5726 | + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); | |
| 5727 | + if (op == 2) { | |
| 5728 | + gen_op_ld_T0_A0[OT_LONG + s->mem_index](); | |
| 5729 | + gen_op_movl_env_T0(offsetof(CPUX86State, mxcsr)); | |
| 5071 | 5730 | } else { |
| 5072 | - rm = (modrm & 7) | REX_B(s); | |
| 5073 | - gen_op_movo(offsetof(CPUX86State,xmm_regs[rm]), | |
| 5074 | - offsetof(CPUX86State,xmm_regs[reg])); | |
| 5731 | + gen_op_movl_T0_env(offsetof(CPUX86State, mxcsr)); | |
| 5732 | + gen_op_st_T0_A0[OT_LONG + s->mem_index](); | |
| 5075 | 5733 | } |
| 5076 | - } else { | |
| 5734 | + break; | |
| 5735 | + case 5: /* lfence */ | |
| 5736 | + case 6: /* mfence */ | |
| 5737 | + case 7: /* sfence */ | |
| 5738 | + if ((modrm & 0xc7) != 0xc0 || !(s->cpuid_features & CPUID_SSE)) | |
| 5739 | + goto illegal_op; | |
| 5740 | + break; | |
| 5741 | + default: | |
| 5077 | 5742 | goto illegal_op; |
| 5078 | 5743 | } |
| 5079 | 5744 | break; |
| 5745 | + case 0x110 ... 0x117: | |
| 5746 | + case 0x128 ... 0x12f: | |
| 5747 | + case 0x150 ... 0x177: | |
| 5748 | + case 0x17c ... 0x17f: | |
| 5749 | + case 0x1c2: | |
| 5750 | + case 0x1c4 ... 0x1c6: | |
| 5751 | + case 0x1d0 ... 0x1fe: | |
| 5752 | + gen_sse(s, b, pc_start, rex_r); | |
| 5753 | + break; | |
| 5080 | 5754 | default: |
| 5081 | 5755 | goto illegal_op; |
| 5082 | 5756 | } |
| ... | ... | @@ -5250,6 +5924,12 @@ static uint16_t opc_write_flags[NB_OPS] = { |
| 5250 | 5924 | [INDEX_op_imull_T0_T1] = CC_OSZAPC, |
| 5251 | 5925 | X86_64_DEF([INDEX_op_imulq_T0_T1] = CC_OSZAPC,) |
| 5252 | 5926 | |
| 5927 | + /* sse */ | |
| 5928 | + [INDEX_op_ucomiss] = CC_OSZAPC, | |
| 5929 | + [INDEX_op_ucomisd] = CC_OSZAPC, | |
| 5930 | + [INDEX_op_comiss] = CC_OSZAPC, | |
| 5931 | + [INDEX_op_comisd] = CC_OSZAPC, | |
| 5932 | + | |
| 5253 | 5933 | /* bcd */ |
| 5254 | 5934 | [INDEX_op_aam] = CC_OSZAPC, |
| 5255 | 5935 | [INDEX_op_aad] = CC_OSZAPC, | ... | ... |
vl.c
| ... | ... | @@ -2082,15 +2082,14 @@ static void cpu_get_seg(QEMUFile *f, SegmentCache *dt) |
| 2082 | 2082 | void cpu_save(QEMUFile *f, void *opaque) |
| 2083 | 2083 | { |
| 2084 | 2084 | CPUState *env = opaque; |
| 2085 | - uint16_t fptag, fpus, fpuc; | |
| 2085 | + uint16_t fptag, fpus, fpuc, fpregs_format; | |
| 2086 | 2086 | uint32_t hflags; |
| 2087 | 2087 | int i; |
| 2088 | - | |
| 2088 | + | |
| 2089 | 2089 | for(i = 0; i < CPU_NB_REGS; i++) |
| 2090 | 2090 | qemu_put_betls(f, &env->regs[i]); |
| 2091 | 2091 | qemu_put_betls(f, &env->eip); |
| 2092 | 2092 | qemu_put_betls(f, &env->eflags); |
| 2093 | - qemu_put_betl(f, 0); /* XXX: suppress that */ | |
| 2094 | 2093 | hflags = env->hflags; /* XXX: suppress most of the redundant hflags */ |
| 2095 | 2094 | qemu_put_be32s(f, &hflags); |
| 2096 | 2095 | |
| ... | ... | @@ -2098,23 +2097,37 @@ void cpu_save(QEMUFile *f, void *opaque) |
| 2098 | 2097 | fpuc = env->fpuc; |
| 2099 | 2098 | fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; |
| 2100 | 2099 | fptag = 0; |
| 2101 | - for (i=7; i>=0; i--) { | |
| 2102 | - fptag <<= 2; | |
| 2103 | - if (env->fptags[i]) { | |
| 2104 | - fptag |= 3; | |
| 2105 | - } | |
| 2100 | + for(i = 0; i < 8; i++) { | |
| 2101 | + fptag |= ((!env->fptags[i]) << i); | |
| 2106 | 2102 | } |
| 2107 | 2103 | |
| 2108 | 2104 | qemu_put_be16s(f, &fpuc); |
| 2109 | 2105 | qemu_put_be16s(f, &fpus); |
| 2110 | 2106 | qemu_put_be16s(f, &fptag); |
| 2111 | 2107 | |
| 2108 | +#ifdef USE_X86LDOUBLE | |
| 2109 | + fpregs_format = 0; | |
| 2110 | +#else | |
| 2111 | + fpregs_format = 1; | |
| 2112 | +#endif | |
| 2113 | + qemu_put_be16s(f, &fpregs_format); | |
| 2114 | + | |
| 2112 | 2115 | for(i = 0; i < 8; i++) { |
| 2113 | 2116 | uint64_t mant; |
| 2114 | 2117 | uint16_t exp; |
| 2115 | - cpu_get_fp80(&mant, &exp, env->fpregs[i]); | |
| 2118 | +#ifdef USE_X86LDOUBLE | |
| 2119 | + /* we save the real CPU data (in case of MMX usage only 'mant' | |
| 2120 | + contains the MMX register */ | |
| 2121 | + cpu_get_fp80(&mant, &exp, env->fpregs[i].d); | |
| 2116 | 2122 | qemu_put_be64(f, mant); |
| 2117 | 2123 | qemu_put_be16(f, exp); |
| 2124 | +#else | |
| 2125 | + /* if we use doubles for float emulation, we save the doubles to | |
| 2126 | + avoid losing information in case of MMX usage. It can give | |
| 2127 | + problems if the image is restored on a CPU where long | |
| 2128 | + doubles are used instead. */ | |
| 2129 | + qemu_put_be64(f, env->fpregs[i].xmm.MMX_Q(0)); | |
| 2130 | +#endif | |
| 2118 | 2131 | } |
| 2119 | 2132 | |
| 2120 | 2133 | for(i = 0; i < 6; i++) |
| ... | ... | @@ -2139,12 +2152,14 @@ void cpu_save(QEMUFile *f, void *opaque) |
| 2139 | 2152 | /* MMU */ |
| 2140 | 2153 | qemu_put_be32s(f, &env->a20_mask); |
| 2141 | 2154 | |
| 2142 | -#ifdef TARGET_X86_64 | |
| 2155 | + /* XMM */ | |
| 2156 | + qemu_put_be32s(f, &env->mxcsr); | |
| 2143 | 2157 | for(i = 0; i < CPU_NB_REGS; i++) { |
| 2144 | 2158 | qemu_put_be64s(f, &env->xmm_regs[i].XMM_Q(0)); |
| 2145 | 2159 | qemu_put_be64s(f, &env->xmm_regs[i].XMM_Q(1)); |
| 2146 | 2160 | } |
| 2147 | 2161 | |
| 2162 | +#ifdef TARGET_X86_64 | |
| 2148 | 2163 | qemu_put_be64s(f, &env->efer); |
| 2149 | 2164 | qemu_put_be64s(f, &env->star); |
| 2150 | 2165 | qemu_put_be64s(f, &env->lstar); |
| ... | ... | @@ -2154,40 +2169,97 @@ void cpu_save(QEMUFile *f, void *opaque) |
| 2154 | 2169 | #endif |
| 2155 | 2170 | } |
| 2156 | 2171 | |
| 2172 | +/* XXX: add that in a FPU generic layer */ | |
| 2173 | +union x86_longdouble { | |
| 2174 | + uint64_t mant; | |
| 2175 | + uint16_t exp; | |
| 2176 | +}; | |
| 2177 | + | |
| 2178 | +#define MANTD1(fp) (fp & ((1LL << 52) - 1)) | |
| 2179 | +#define EXPBIAS1 1023 | |
| 2180 | +#define EXPD1(fp) ((fp >> 52) & 0x7FF) | |
| 2181 | +#define SIGND1(fp) ((fp >> 32) & 0x80000000) | |
| 2182 | + | |
| 2183 | +static void fp64_to_fp80(union x86_longdouble *p, uint64_t temp) | |
| 2184 | +{ | |
| 2185 | + int e; | |
| 2186 | + /* mantissa */ | |
| 2187 | + p->mant = (MANTD1(temp) << 11) | (1LL << 63); | |
| 2188 | + /* exponent + sign */ | |
| 2189 | + e = EXPD1(temp) - EXPBIAS1 + 16383; | |
| 2190 | + e |= SIGND1(temp) >> 16; | |
| 2191 | + p->exp = e; | |
| 2192 | +} | |
| 2193 | + | |
| 2157 | 2194 | int cpu_load(QEMUFile *f, void *opaque, int version_id) |
| 2158 | 2195 | { |
| 2159 | 2196 | CPUState *env = opaque; |
| 2160 | - int i; | |
| 2197 | + int i, guess_mmx; | |
| 2161 | 2198 | uint32_t hflags; |
| 2162 | - uint16_t fpus, fpuc, fptag; | |
| 2199 | + uint16_t fpus, fpuc, fptag, fpregs_format; | |
| 2163 | 2200 | |
| 2164 | - if (version_id != 2) | |
| 2201 | + if (version_id != 3) | |
| 2165 | 2202 | return -EINVAL; |
| 2166 | 2203 | for(i = 0; i < CPU_NB_REGS; i++) |
| 2167 | 2204 | qemu_get_betls(f, &env->regs[i]); |
| 2168 | 2205 | qemu_get_betls(f, &env->eip); |
| 2169 | 2206 | qemu_get_betls(f, &env->eflags); |
| 2170 | - qemu_get_betl(f); /* XXX: suppress that */ | |
| 2171 | 2207 | qemu_get_be32s(f, &hflags); |
| 2172 | 2208 | |
| 2173 | 2209 | qemu_get_be16s(f, &fpuc); |
| 2174 | 2210 | qemu_get_be16s(f, &fpus); |
| 2175 | 2211 | qemu_get_be16s(f, &fptag); |
| 2176 | - | |
| 2212 | + qemu_get_be16s(f, &fpregs_format); | |
| 2213 | + | |
| 2214 | + /* NOTE: we cannot always restore the FPU state if the image come | |
| 2215 | + from a host with a different 'USE_X86LDOUBLE' define. We guess | |
| 2216 | + if we are in an MMX state to restore correctly in that case. */ | |
| 2217 | + guess_mmx = ((fptag == 0xff) && (fpus & 0x3800) == 0); | |
| 2177 | 2218 | for(i = 0; i < 8; i++) { |
| 2178 | 2219 | uint64_t mant; |
| 2179 | 2220 | uint16_t exp; |
| 2180 | - mant = qemu_get_be64(f); | |
| 2181 | - exp = qemu_get_be16(f); | |
| 2182 | - env->fpregs[i] = cpu_set_fp80(mant, exp); | |
| 2221 | + union x86_longdouble *p; | |
| 2222 | + | |
| 2223 | + switch(fpregs_format) { | |
| 2224 | + case 0: | |
| 2225 | + mant = qemu_get_be64(f); | |
| 2226 | + exp = qemu_get_be16(f); | |
| 2227 | +#ifdef USE_X86LDOUBLE | |
| 2228 | + env->fpregs[i].d = cpu_set_fp80(mant, exp); | |
| 2229 | +#else | |
| 2230 | + /* difficult case */ | |
| 2231 | + if (guess_mmx) | |
| 2232 | + env->fpregs[i].xmm.MMX_Q(0) = mant; | |
| 2233 | + else | |
| 2234 | + env->fpregs[i].d = cpu_set_fp80(mant, exp); | |
| 2235 | +#endif | |
| 2236 | + break; | |
| 2237 | + case 1: | |
| 2238 | + mant = qemu_get_be64(f); | |
| 2239 | +#ifdef USE_X86LDOUBLE | |
| 2240 | + /* difficult case */ | |
| 2241 | + p = (void *)&env->fpregs[i]; | |
| 2242 | + if (guess_mmx) { | |
| 2243 | + p->mant = mant; | |
| 2244 | + p->exp = 0xffff; | |
| 2245 | + } else { | |
| 2246 | + fp64_to_fp80(p, mant); | |
| 2247 | + } | |
| 2248 | +#else | |
| 2249 | + env->fpregs[i].xmm.MMX_Q(0) = mant; | |
| 2250 | +#endif | |
| 2251 | + break; | |
| 2252 | + default: | |
| 2253 | + return -EINVAL; | |
| 2254 | + } | |
| 2183 | 2255 | } |
| 2184 | 2256 | |
| 2185 | 2257 | env->fpuc = fpuc; |
| 2186 | 2258 | env->fpstt = (fpus >> 11) & 7; |
| 2187 | 2259 | env->fpus = fpus & ~0x3800; |
| 2260 | + fptag ^= 0xff; | |
| 2188 | 2261 | for(i = 0; i < 8; i++) { |
| 2189 | - env->fptags[i] = ((fptag & 3) == 3); | |
| 2190 | - fptag >>= 2; | |
| 2262 | + env->fptags[i] = (fptag >> i) & 1; | |
| 2191 | 2263 | } |
| 2192 | 2264 | |
| 2193 | 2265 | for(i = 0; i < 6; i++) |
| ... | ... | @@ -2212,12 +2284,13 @@ int cpu_load(QEMUFile *f, void *opaque, int version_id) |
| 2212 | 2284 | /* MMU */ |
| 2213 | 2285 | qemu_get_be32s(f, &env->a20_mask); |
| 2214 | 2286 | |
| 2215 | -#ifdef TARGET_X86_64 | |
| 2287 | + qemu_get_be32s(f, &env->mxcsr); | |
| 2216 | 2288 | for(i = 0; i < CPU_NB_REGS; i++) { |
| 2217 | 2289 | qemu_get_be64s(f, &env->xmm_regs[i].XMM_Q(0)); |
| 2218 | 2290 | qemu_get_be64s(f, &env->xmm_regs[i].XMM_Q(1)); |
| 2219 | 2291 | } |
| 2220 | 2292 | |
| 2293 | +#ifdef TARGET_X86_64 | |
| 2221 | 2294 | qemu_get_be64s(f, &env->efer); |
| 2222 | 2295 | qemu_get_be64s(f, &env->star); |
| 2223 | 2296 | qemu_get_be64s(f, &env->lstar); |
| ... | ... | @@ -3433,7 +3506,7 @@ int main(int argc, char **argv) |
| 3433 | 3506 | cpu_single_env = env; |
| 3434 | 3507 | |
| 3435 | 3508 | register_savevm("timer", 0, 1, timer_save, timer_load, env); |
| 3436 | - register_savevm("cpu", 0, 2, cpu_save, cpu_load, env); | |
| 3509 | + register_savevm("cpu", 0, 3, cpu_save, cpu_load, env); | |
| 3437 | 3510 | register_savevm("ram", 0, 1, ram_save, ram_load, NULL); |
| 3438 | 3511 | qemu_register_reset(main_cpu_reset, global_env); |
| 3439 | 3512 | ... | ... |