Commit 5af451868c5bd93ea24672b0fa1b765cbd563cc6

Authored by bellard
1 parent 77f193da

converted SSE/MMX ops to TCG

git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@4441 c046a42c-6fe2-441c-8c8c-71466251a162
target-i386/helper.c
... ... @@ -4462,3 +4462,32 @@ void vmexit(uint64_t exit_code, uint64_t exit_info_1)
4462 4462 }
4463 4463  
4464 4464 #endif
  4465 +
  4466 +/* MMX/SSE */
  4467 +/* XXX: optimize by storing fptt and fptags in the static cpu state */
  4468 +void helper_enter_mmx(void)
  4469 +{
  4470 + env->fpstt = 0;
  4471 + *(uint32_t *)(env->fptags) = 0;
  4472 + *(uint32_t *)(env->fptags + 4) = 0;
  4473 +}
  4474 +
  4475 +void helper_emms(void)
  4476 +{
  4477 + /* set to empty state */
  4478 + *(uint32_t *)(env->fptags) = 0x01010101;
  4479 + *(uint32_t *)(env->fptags + 4) = 0x01010101;
  4480 +}
  4481 +
  4482 +/* XXX: suppress */
  4483 +void helper_movq(uint64_t *d, uint64_t *s)
  4484 +{
  4485 + *d = *s;
  4486 +}
  4487 +
  4488 +#define SHIFT 0
  4489 +#include "ops_sse.h"
  4490 +
  4491 +#define SHIFT 1
  4492 +#include "ops_sse.h"
  4493 +
... ...
target-i386/helper.h
... ... @@ -2,3 +2,12 @@
2 2  
3 3 void TCG_HELPER_PROTO helper_divl_EAX_T0(target_ulong t0);
4 4 void TCG_HELPER_PROTO helper_idivl_EAX_T0(target_ulong t0);
  5 +void TCG_HELPER_PROTO helper_enter_mmx(void);
  6 +void TCG_HELPER_PROTO helper_emms(void);
  7 +void TCG_HELPER_PROTO helper_movq(uint64_t *d, uint64_t *s);
  8 +
  9 +#define SHIFT 0
  10 +#include "ops_sse_header.h"
  11 +#define SHIFT 1
  12 +#include "ops_sse_header.h"
  13 +
... ...
target-i386/op.c
... ... @@ -2144,44 +2144,9 @@ void OPPROTO op_unlock(void)
2144 2144 }
2145 2145  
2146 2146 /* SSE support */
2147   -static inline void memcpy16(void *d, void *s)
  2147 +void OPPROTO op_com_dummy(void)
2148 2148 {
2149   - ((uint32_t *)d)[0] = ((uint32_t *)s)[0];
2150   - ((uint32_t *)d)[1] = ((uint32_t *)s)[1];
2151   - ((uint32_t *)d)[2] = ((uint32_t *)s)[2];
2152   - ((uint32_t *)d)[3] = ((uint32_t *)s)[3];
2153   -}
2154   -
2155   -void OPPROTO op_movo(void)
2156   -{
2157   - /* XXX: badly generated code */
2158   - XMMReg *d, *s;
2159   - d = (XMMReg *)((char *)env + PARAM1);
2160   - s = (XMMReg *)((char *)env + PARAM2);
2161   - memcpy16(d, s);
2162   -}
2163   -
2164   -void OPPROTO op_movq(void)
2165   -{
2166   - uint64_t *d, *s;
2167   - d = (uint64_t *)((char *)env + PARAM1);
2168   - s = (uint64_t *)((char *)env + PARAM2);
2169   - *d = *s;
2170   -}
2171   -
2172   -void OPPROTO op_movl(void)
2173   -{
2174   - uint32_t *d, *s;
2175   - d = (uint32_t *)((char *)env + PARAM1);
2176   - s = (uint32_t *)((char *)env + PARAM2);
2177   - *d = *s;
2178   -}
2179   -
2180   -void OPPROTO op_movq_env_0(void)
2181   -{
2182   - uint64_t *d;
2183   - d = (uint64_t *)((char *)env + PARAM1);
2184   - *d = 0;
  2149 + T0 = 0;
2185 2150 }
2186 2151  
2187 2152 void OPPROTO op_fxsave_A0(void)
... ... @@ -2194,27 +2159,6 @@ void OPPROTO op_fxrstor_A0(void)
2194 2159 helper_fxrstor(A0, PARAM1);
2195 2160 }
2196 2161  
2197   -/* XXX: optimize by storing fptt and fptags in the static cpu state */
2198   -void OPPROTO op_enter_mmx(void)
2199   -{
2200   - env->fpstt = 0;
2201   - *(uint32_t *)(env->fptags) = 0;
2202   - *(uint32_t *)(env->fptags + 4) = 0;
2203   -}
2204   -
2205   -void OPPROTO op_emms(void)
2206   -{
2207   - /* set to empty state */
2208   - *(uint32_t *)(env->fptags) = 0x01010101;
2209   - *(uint32_t *)(env->fptags + 4) = 0x01010101;
2210   -}
2211   -
2212   -#define SHIFT 0
2213   -#include "ops_sse.h"
2214   -
2215   -#define SHIFT 1
2216   -#include "ops_sse.h"
2217   -
2218 2162 /* Secure Virtual Machine ops */
2219 2163  
2220 2164 void OPPROTO op_vmrun(void)
... ...
target-i386/ops_sse.h
... ... @@ -35,14 +35,10 @@
35 35 #define SUFFIX _xmm
36 36 #endif
37 37  
38   -void OPPROTO glue(op_psrlw, SUFFIX)(void)
  38 +void glue(helper_psrlw, SUFFIX)(Reg *d, Reg *s)
39 39 {
40   - Reg *d, *s;
41 40 int shift;
42 41  
43   - d = (Reg *)((char *)env + PARAM1);
44   - s = (Reg *)((char *)env + PARAM2);
45   -
46 42 if (s->Q(0) > 15) {
47 43 d->Q(0) = 0;
48 44 #if SHIFT == 1
... ... @@ -64,14 +60,10 @@ void OPPROTO glue(op_psrlw, SUFFIX)(void)
64 60 FORCE_RET();
65 61 }
66 62  
67   -void OPPROTO glue(op_psraw, SUFFIX)(void)
  63 +void glue(helper_psraw, SUFFIX)(Reg *d, Reg *s)
68 64 {
69   - Reg *d, *s;
70 65 int shift;
71 66  
72   - d = (Reg *)((char *)env + PARAM1);
73   - s = (Reg *)((char *)env + PARAM2);
74   -
75 67 if (s->Q(0) > 15) {
76 68 shift = 15;
77 69 } else {
... ... @@ -89,14 +81,10 @@ void OPPROTO glue(op_psraw, SUFFIX)(void)
89 81 #endif
90 82 }
91 83  
92   -void OPPROTO glue(op_psllw, SUFFIX)(void)
  84 +void glue(helper_psllw, SUFFIX)(Reg *d, Reg *s)
93 85 {
94   - Reg *d, *s;
95 86 int shift;
96 87  
97   - d = (Reg *)((char *)env + PARAM1);
98   - s = (Reg *)((char *)env + PARAM2);
99   -
100 88 if (s->Q(0) > 15) {
101 89 d->Q(0) = 0;
102 90 #if SHIFT == 1
... ... @@ -118,14 +106,10 @@ void OPPROTO glue(op_psllw, SUFFIX)(void)
118 106 FORCE_RET();
119 107 }
120 108  
121   -void OPPROTO glue(op_psrld, SUFFIX)(void)
  109 +void glue(helper_psrld, SUFFIX)(Reg *d, Reg *s)
122 110 {
123   - Reg *d, *s;
124 111 int shift;
125 112  
126   - d = (Reg *)((char *)env + PARAM1);
127   - s = (Reg *)((char *)env + PARAM2);
128   -
129 113 if (s->Q(0) > 31) {
130 114 d->Q(0) = 0;
131 115 #if SHIFT == 1
... ... @@ -143,14 +127,10 @@ void OPPROTO glue(op_psrld, SUFFIX)(void)
143 127 FORCE_RET();
144 128 }
145 129  
146   -void OPPROTO glue(op_psrad, SUFFIX)(void)
  130 +void glue(helper_psrad, SUFFIX)(Reg *d, Reg *s)
147 131 {
148   - Reg *d, *s;
149 132 int shift;
150 133  
151   - d = (Reg *)((char *)env + PARAM1);
152   - s = (Reg *)((char *)env + PARAM2);
153   -
154 134 if (s->Q(0) > 31) {
155 135 shift = 31;
156 136 } else {
... ... @@ -164,14 +144,10 @@ void OPPROTO glue(op_psrad, SUFFIX)(void)
164 144 #endif
165 145 }
166 146  
167   -void OPPROTO glue(op_pslld, SUFFIX)(void)
  147 +void glue(helper_pslld, SUFFIX)(Reg *d, Reg *s)
168 148 {
169   - Reg *d, *s;
170 149 int shift;
171 150  
172   - d = (Reg *)((char *)env + PARAM1);
173   - s = (Reg *)((char *)env + PARAM2);
174   -
175 151 if (s->Q(0) > 31) {
176 152 d->Q(0) = 0;
177 153 #if SHIFT == 1
... ... @@ -189,14 +165,10 @@ void OPPROTO glue(op_pslld, SUFFIX)(void)
189 165 FORCE_RET();
190 166 }
191 167  
192   -void OPPROTO glue(op_psrlq, SUFFIX)(void)
  168 +void glue(helper_psrlq, SUFFIX)(Reg *d, Reg *s)
193 169 {
194   - Reg *d, *s;
195 170 int shift;
196 171  
197   - d = (Reg *)((char *)env + PARAM1);
198   - s = (Reg *)((char *)env + PARAM2);
199   -
200 172 if (s->Q(0) > 63) {
201 173 d->Q(0) = 0;
202 174 #if SHIFT == 1
... ... @@ -212,14 +184,10 @@ void OPPROTO glue(op_psrlq, SUFFIX)(void)
212 184 FORCE_RET();
213 185 }
214 186  
215   -void OPPROTO glue(op_psllq, SUFFIX)(void)
  187 +void glue(helper_psllq, SUFFIX)(Reg *d, Reg *s)
216 188 {
217   - Reg *d, *s;
218 189 int shift;
219 190  
220   - d = (Reg *)((char *)env + PARAM1);
221   - s = (Reg *)((char *)env + PARAM2);
222   -
223 191 if (s->Q(0) > 63) {
224 192 d->Q(0) = 0;
225 193 #if SHIFT == 1
... ... @@ -236,13 +204,10 @@ void OPPROTO glue(op_psllq, SUFFIX)(void)
236 204 }
237 205  
238 206 #if SHIFT == 1
239   -void OPPROTO glue(op_psrldq, SUFFIX)(void)
  207 +void glue(helper_psrldq, SUFFIX)(Reg *d, Reg *s)
240 208 {
241   - Reg *d, *s;
242 209 int shift, i;
243 210  
244   - d = (Reg *)((char *)env + PARAM1);
245   - s = (Reg *)((char *)env + PARAM2);
246 211 shift = s->L(0);
247 212 if (shift > 16)
248 213 shift = 16;
... ... @@ -253,13 +218,10 @@ void OPPROTO glue(op_psrldq, SUFFIX)(void)
253 218 FORCE_RET();
254 219 }
255 220  
256   -void OPPROTO glue(op_pslldq, SUFFIX)(void)
  221 +void glue(helper_pslldq, SUFFIX)(Reg *d, Reg *s)
257 222 {
258   - Reg *d, *s;
259 223 int shift, i;
260 224  
261   - d = (Reg *)((char *)env + PARAM1);
262   - s = (Reg *)((char *)env + PARAM2);
263 225 shift = s->L(0);
264 226 if (shift > 16)
265 227 shift = 16;
... ... @@ -271,12 +233,9 @@ void OPPROTO glue(op_pslldq, SUFFIX)(void)
271 233 }
272 234 #endif
273 235  
274   -#define SSE_OP_B(name, F)\
275   -void OPPROTO glue(name, SUFFIX) (void)\
  236 +#define SSE_HELPER_B(name, F)\
  237 +void glue(name, SUFFIX) (Reg *d, Reg *s)\
276 238 {\
277   - Reg *d, *s;\
278   - d = (Reg *)((char *)env + PARAM1);\
279   - s = (Reg *)((char *)env + PARAM2);\
280 239 d->B(0) = F(d->B(0), s->B(0));\
281 240 d->B(1) = F(d->B(1), s->B(1));\
282 241 d->B(2) = F(d->B(2), s->B(2));\
... ... @@ -297,12 +256,9 @@ void OPPROTO glue(name, SUFFIX) (void)\
297 256 )\
298 257 }
299 258  
300   -#define SSE_OP_W(name, F)\
301   -void OPPROTO glue(name, SUFFIX) (void)\
  259 +#define SSE_HELPER_W(name, F)\
  260 +void glue(name, SUFFIX) (Reg *d, Reg *s)\
302 261 {\
303   - Reg *d, *s;\
304   - d = (Reg *)((char *)env + PARAM1);\
305   - s = (Reg *)((char *)env + PARAM2);\
306 262 d->W(0) = F(d->W(0), s->W(0));\
307 263 d->W(1) = F(d->W(1), s->W(1));\
308 264 d->W(2) = F(d->W(2), s->W(2));\
... ... @@ -315,12 +271,9 @@ void OPPROTO glue(name, SUFFIX) (void)\
315 271 )\
316 272 }
317 273  
318   -#define SSE_OP_L(name, F)\
319   -void OPPROTO glue(name, SUFFIX) (void)\
  274 +#define SSE_HELPER_L(name, F)\
  275 +void glue(name, SUFFIX) (Reg *d, Reg *s)\
320 276 {\
321   - Reg *d, *s;\
322   - d = (Reg *)((char *)env + PARAM1);\
323   - s = (Reg *)((char *)env + PARAM2);\
324 277 d->L(0) = F(d->L(0), s->L(0));\
325 278 d->L(1) = F(d->L(1), s->L(1));\
326 279 XMM_ONLY(\
... ... @@ -329,12 +282,9 @@ void OPPROTO glue(name, SUFFIX) (void)\
329 282 )\
330 283 }
331 284  
332   -#define SSE_OP_Q(name, F)\
333   -void OPPROTO glue(name, SUFFIX) (void)\
  285 +#define SSE_HELPER_Q(name, F)\
  286 +void glue(name, SUFFIX) (Reg *d, Reg *s)\
334 287 {\
335   - Reg *d, *s;\
336   - d = (Reg *)((char *)env + PARAM1);\
337   - s = (Reg *)((char *)env + PARAM2);\
338 288 d->Q(0) = F(d->Q(0), s->Q(0));\
339 289 XMM_ONLY(\
340 290 d->Q(1) = F(d->Q(1), s->Q(1));\
... ... @@ -416,73 +366,66 @@ static inline int satsw(int x)
416 366 #define FAVG(a, b) ((a) + (b) + 1) >> 1
417 367 #endif
418 368  
419   -SSE_OP_B(op_paddb, FADD)
420   -SSE_OP_W(op_paddw, FADD)
421   -SSE_OP_L(op_paddl, FADD)
422   -SSE_OP_Q(op_paddq, FADD)
  369 +SSE_HELPER_B(helper_paddb, FADD)
  370 +SSE_HELPER_W(helper_paddw, FADD)
  371 +SSE_HELPER_L(helper_paddl, FADD)
  372 +SSE_HELPER_Q(helper_paddq, FADD)
423 373  
424   -SSE_OP_B(op_psubb, FSUB)
425   -SSE_OP_W(op_psubw, FSUB)
426   -SSE_OP_L(op_psubl, FSUB)
427   -SSE_OP_Q(op_psubq, FSUB)
  374 +SSE_HELPER_B(helper_psubb, FSUB)
  375 +SSE_HELPER_W(helper_psubw, FSUB)
  376 +SSE_HELPER_L(helper_psubl, FSUB)
  377 +SSE_HELPER_Q(helper_psubq, FSUB)
428 378  
429   -SSE_OP_B(op_paddusb, FADDUB)
430   -SSE_OP_B(op_paddsb, FADDSB)
431   -SSE_OP_B(op_psubusb, FSUBUB)
432   -SSE_OP_B(op_psubsb, FSUBSB)
  379 +SSE_HELPER_B(helper_paddusb, FADDUB)
  380 +SSE_HELPER_B(helper_paddsb, FADDSB)
  381 +SSE_HELPER_B(helper_psubusb, FSUBUB)
  382 +SSE_HELPER_B(helper_psubsb, FSUBSB)
433 383  
434   -SSE_OP_W(op_paddusw, FADDUW)
435   -SSE_OP_W(op_paddsw, FADDSW)
436   -SSE_OP_W(op_psubusw, FSUBUW)
437   -SSE_OP_W(op_psubsw, FSUBSW)
  384 +SSE_HELPER_W(helper_paddusw, FADDUW)
  385 +SSE_HELPER_W(helper_paddsw, FADDSW)
  386 +SSE_HELPER_W(helper_psubusw, FSUBUW)
  387 +SSE_HELPER_W(helper_psubsw, FSUBSW)
438 388  
439   -SSE_OP_B(op_pminub, FMINUB)
440   -SSE_OP_B(op_pmaxub, FMAXUB)
  389 +SSE_HELPER_B(helper_pminub, FMINUB)
  390 +SSE_HELPER_B(helper_pmaxub, FMAXUB)
441 391  
442   -SSE_OP_W(op_pminsw, FMINSW)
443   -SSE_OP_W(op_pmaxsw, FMAXSW)
  392 +SSE_HELPER_W(helper_pminsw, FMINSW)
  393 +SSE_HELPER_W(helper_pmaxsw, FMAXSW)
444 394  
445   -SSE_OP_Q(op_pand, FAND)
446   -SSE_OP_Q(op_pandn, FANDN)
447   -SSE_OP_Q(op_por, FOR)
448   -SSE_OP_Q(op_pxor, FXOR)
  395 +SSE_HELPER_Q(helper_pand, FAND)
  396 +SSE_HELPER_Q(helper_pandn, FANDN)
  397 +SSE_HELPER_Q(helper_por, FOR)
  398 +SSE_HELPER_Q(helper_pxor, FXOR)
449 399  
450   -SSE_OP_B(op_pcmpgtb, FCMPGTB)
451   -SSE_OP_W(op_pcmpgtw, FCMPGTW)
452   -SSE_OP_L(op_pcmpgtl, FCMPGTL)
  400 +SSE_HELPER_B(helper_pcmpgtb, FCMPGTB)
  401 +SSE_HELPER_W(helper_pcmpgtw, FCMPGTW)
  402 +SSE_HELPER_L(helper_pcmpgtl, FCMPGTL)
453 403  
454   -SSE_OP_B(op_pcmpeqb, FCMPEQ)
455   -SSE_OP_W(op_pcmpeqw, FCMPEQ)
456   -SSE_OP_L(op_pcmpeql, FCMPEQ)
  404 +SSE_HELPER_B(helper_pcmpeqb, FCMPEQ)
  405 +SSE_HELPER_W(helper_pcmpeqw, FCMPEQ)
  406 +SSE_HELPER_L(helper_pcmpeql, FCMPEQ)
457 407  
458   -SSE_OP_W(op_pmullw, FMULLW)
  408 +SSE_HELPER_W(helper_pmullw, FMULLW)
459 409 #if SHIFT == 0
460   -SSE_OP_W(op_pmulhrw, FMULHRW)
  410 +SSE_HELPER_W(helper_pmulhrw, FMULHRW)
461 411 #endif
462   -SSE_OP_W(op_pmulhuw, FMULHUW)
463   -SSE_OP_W(op_pmulhw, FMULHW)
  412 +SSE_HELPER_W(helper_pmulhuw, FMULHUW)
  413 +SSE_HELPER_W(helper_pmulhw, FMULHW)
464 414  
465   -SSE_OP_B(op_pavgb, FAVG)
466   -SSE_OP_W(op_pavgw, FAVG)
  415 +SSE_HELPER_B(helper_pavgb, FAVG)
  416 +SSE_HELPER_W(helper_pavgw, FAVG)
467 417  
468   -void OPPROTO glue(op_pmuludq, SUFFIX) (void)
  418 +void glue(helper_pmuludq, SUFFIX) (Reg *d, Reg *s)
469 419 {
470   - Reg *d, *s;
471   - d = (Reg *)((char *)env + PARAM1);
472   - s = (Reg *)((char *)env + PARAM2);
473   -
474 420 d->Q(0) = (uint64_t)s->L(0) * (uint64_t)d->L(0);
475 421 #if SHIFT == 1
476 422 d->Q(1) = (uint64_t)s->L(2) * (uint64_t)d->L(2);
477 423 #endif
478 424 }
479 425  
480   -void OPPROTO glue(op_pmaddwd, SUFFIX) (void)
  426 +void glue(helper_pmaddwd, SUFFIX) (Reg *d, Reg *s)
481 427 {
482 428 int i;
483   - Reg *d, *s;
484   - d = (Reg *)((char *)env + PARAM1);
485   - s = (Reg *)((char *)env + PARAM2);
486 429  
487 430 for(i = 0; i < (2 << SHIFT); i++) {
488 431 d->L(i) = (int16_t)s->W(2*i) * (int16_t)d->W(2*i) +
... ... @@ -500,12 +443,9 @@ static inline int abs1(int a)
500 443 return a;
501 444 }
502 445 #endif
503   -void OPPROTO glue(op_psadbw, SUFFIX) (void)
  446 +void glue(helper_psadbw, SUFFIX) (Reg *d, Reg *s)
504 447 {
505 448 unsigned int val;
506   - Reg *d, *s;
507   - d = (Reg *)((char *)env + PARAM1);
508   - s = (Reg *)((char *)env + PARAM2);
509 449  
510 450 val = 0;
511 451 val += abs1(d->B(0) - s->B(0));
... ... @@ -531,12 +471,9 @@ void OPPROTO glue(op_psadbw, SUFFIX) (void)
531 471 #endif
532 472 }
533 473  
534   -void OPPROTO glue(op_maskmov, SUFFIX) (void)
  474 +void glue(helper_maskmov, SUFFIX) (Reg *d, Reg *s)
535 475 {
536 476 int i;
537   - Reg *d, *s;
538   - d = (Reg *)((char *)env + PARAM1);
539   - s = (Reg *)((char *)env + PARAM2);
540 477 for(i = 0; i < (8 << SHIFT); i++) {
541 478 if (s->B(i) & 0x80)
542 479 stb(A0 + i, d->B(i));
... ... @@ -544,51 +481,29 @@ void OPPROTO glue(op_maskmov, SUFFIX) (void)
544 481 FORCE_RET();
545 482 }
546 483  
547   -void OPPROTO glue(op_movl_mm_T0, SUFFIX) (void)
  484 +void glue(helper_movl_mm_T0, SUFFIX) (Reg *d, uint32_t val)
548 485 {
549   - Reg *d;
550   - d = (Reg *)((char *)env + PARAM1);
551   - d->L(0) = T0;
  486 + d->L(0) = val;
552 487 d->L(1) = 0;
553 488 #if SHIFT == 1
554 489 d->Q(1) = 0;
555 490 #endif
556 491 }
557 492  
558   -void OPPROTO glue(op_movl_T0_mm, SUFFIX) (void)
559   -{
560   - Reg *s;
561   - s = (Reg *)((char *)env + PARAM1);
562   - T0 = s->L(0);
563   -}
564   -
565 493 #ifdef TARGET_X86_64
566   -void OPPROTO glue(op_movq_mm_T0, SUFFIX) (void)
  494 +void glue(helper_movq_mm_T0, SUFFIX) (Reg *d, uint64_t val)
567 495 {
568   - Reg *d;
569   - d = (Reg *)((char *)env + PARAM1);
570   - d->Q(0) = T0;
  496 + d->Q(0) = val;
571 497 #if SHIFT == 1
572 498 d->Q(1) = 0;
573 499 #endif
574 500 }
575   -
576   -void OPPROTO glue(op_movq_T0_mm, SUFFIX) (void)
577   -{
578   - Reg *s;
579   - s = (Reg *)((char *)env + PARAM1);
580   - T0 = s->Q(0);
581   -}
582 501 #endif
583 502  
584 503 #if SHIFT == 0
585   -void OPPROTO glue(op_pshufw, SUFFIX) (void)
  504 +void glue(helper_pshufw, SUFFIX) (Reg *d, Reg *s, int order)
586 505 {
587   - Reg r, *d, *s;
588   - int order;
589   - d = (Reg *)((char *)env + PARAM1);
590   - s = (Reg *)((char *)env + PARAM2);
591   - order = PARAM3;
  506 + Reg r;
592 507 r.W(0) = s->W(order & 3);
593 508 r.W(1) = s->W((order >> 2) & 3);
594 509 r.W(2) = s->W((order >> 4) & 3);
... ... @@ -596,13 +511,9 @@ void OPPROTO glue(op_pshufw, SUFFIX) (void)
596 511 *d = r;
597 512 }
598 513 #else
599   -void OPPROTO op_shufps(void)
  514 +void helper_shufps(Reg *d, Reg *s, int order)
600 515 {
601   - Reg r, *d, *s;
602   - int order;
603   - d = (Reg *)((char *)env + PARAM1);
604   - s = (Reg *)((char *)env + PARAM2);
605   - order = PARAM3;
  516 + Reg r;
606 517 r.L(0) = d->L(order & 3);
607 518 r.L(1) = d->L((order >> 2) & 3);
608 519 r.L(2) = s->L((order >> 4) & 3);
... ... @@ -610,25 +521,17 @@ void OPPROTO op_shufps(void)
610 521 *d = r;
611 522 }
612 523  
613   -void OPPROTO op_shufpd(void)
  524 +void helper_shufpd(Reg *d, Reg *s, int order)
614 525 {
615   - Reg r, *d, *s;
616   - int order;
617   - d = (Reg *)((char *)env + PARAM1);
618   - s = (Reg *)((char *)env + PARAM2);
619   - order = PARAM3;
  526 + Reg r;
620 527 r.Q(0) = d->Q(order & 1);
621 528 r.Q(1) = s->Q((order >> 1) & 1);
622 529 *d = r;
623 530 }
624 531  
625   -void OPPROTO glue(op_pshufd, SUFFIX) (void)
  532 +void glue(helper_pshufd, SUFFIX) (Reg *d, Reg *s, int order)
626 533 {
627   - Reg r, *d, *s;
628   - int order;
629   - d = (Reg *)((char *)env + PARAM1);
630   - s = (Reg *)((char *)env + PARAM2);
631   - order = PARAM3;
  534 + Reg r;
632 535 r.L(0) = s->L(order & 3);
633 536 r.L(1) = s->L((order >> 2) & 3);
634 537 r.L(2) = s->L((order >> 4) & 3);
... ... @@ -636,13 +539,9 @@ void OPPROTO glue(op_pshufd, SUFFIX) (void)
636 539 *d = r;
637 540 }
638 541  
639   -void OPPROTO glue(op_pshuflw, SUFFIX) (void)
  542 +void glue(helper_pshuflw, SUFFIX) (Reg *d, Reg *s, int order)
640 543 {
641   - Reg r, *d, *s;
642   - int order;
643   - d = (Reg *)((char *)env + PARAM1);
644   - s = (Reg *)((char *)env + PARAM2);
645   - order = PARAM3;
  544 + Reg r;
646 545 r.W(0) = s->W(order & 3);
647 546 r.W(1) = s->W((order >> 2) & 3);
648 547 r.W(2) = s->W((order >> 4) & 3);
... ... @@ -651,13 +550,9 @@ void OPPROTO glue(op_pshuflw, SUFFIX) (void)
651 550 *d = r;
652 551 }
653 552  
654   -void OPPROTO glue(op_pshufhw, SUFFIX) (void)
  553 +void glue(helper_pshufhw, SUFFIX) (Reg *d, Reg *s, int order)
655 554 {
656   - Reg r, *d, *s;
657   - int order;
658   - d = (Reg *)((char *)env + PARAM1);
659   - s = (Reg *)((char *)env + PARAM2);
660   - order = PARAM3;
  555 + Reg r;
661 556 r.Q(0) = s->Q(0);
662 557 r.W(4) = s->W(4 + (order & 3));
663 558 r.W(5) = s->W(4 + ((order >> 2) & 3));
... ... @@ -671,39 +566,27 @@ void OPPROTO glue(op_pshufhw, SUFFIX) (void)
671 566 /* FPU ops */
672 567 /* XXX: not accurate */
673 568  
674   -#define SSE_OP_S(name, F)\
675   -void OPPROTO op_ ## name ## ps (void)\
  569 +#define SSE_HELPER_S(name, F)\
  570 +void helper_ ## name ## ps (Reg *d, Reg *s)\
676 571 {\
677   - Reg *d, *s;\
678   - d = (Reg *)((char *)env + PARAM1);\
679   - s = (Reg *)((char *)env + PARAM2);\
680 572 d->XMM_S(0) = F(32, d->XMM_S(0), s->XMM_S(0));\
681 573 d->XMM_S(1) = F(32, d->XMM_S(1), s->XMM_S(1));\
682 574 d->XMM_S(2) = F(32, d->XMM_S(2), s->XMM_S(2));\
683 575 d->XMM_S(3) = F(32, d->XMM_S(3), s->XMM_S(3));\
684 576 }\
685 577 \
686   -void OPPROTO op_ ## name ## ss (void)\
  578 +void helper_ ## name ## ss (Reg *d, Reg *s)\
687 579 {\
688   - Reg *d, *s;\
689   - d = (Reg *)((char *)env + PARAM1);\
690   - s = (Reg *)((char *)env + PARAM2);\
691 580 d->XMM_S(0) = F(32, d->XMM_S(0), s->XMM_S(0));\
692 581 }\
693   -void OPPROTO op_ ## name ## pd (void)\
  582 +void helper_ ## name ## pd (Reg *d, Reg *s)\
694 583 {\
695   - Reg *d, *s;\
696   - d = (Reg *)((char *)env + PARAM1);\
697   - s = (Reg *)((char *)env + PARAM2);\
698 584 d->XMM_D(0) = F(64, d->XMM_D(0), s->XMM_D(0));\
699 585 d->XMM_D(1) = F(64, d->XMM_D(1), s->XMM_D(1));\
700 586 }\
701 587 \
702   -void OPPROTO op_ ## name ## sd (void)\
  588 +void helper_ ## name ## sd (Reg *d, Reg *s)\
703 589 {\
704   - Reg *d, *s;\
705   - d = (Reg *)((char *)env + PARAM1);\
706   - s = (Reg *)((char *)env + PARAM2);\
707 590 d->XMM_D(0) = F(64, d->XMM_D(0), s->XMM_D(0));\
708 591 }
709 592  
... ... @@ -715,69 +598,53 @@ void OPPROTO op_ ## name ## sd (void)\
715 598 #define FPU_MAX(size, a, b) (a) > (b) ? (a) : (b)
716 599 #define FPU_SQRT(size, a, b) float ## size ## _sqrt(b, &env->sse_status)
717 600  
718   -SSE_OP_S(add, FPU_ADD)
719   -SSE_OP_S(sub, FPU_SUB)
720   -SSE_OP_S(mul, FPU_MUL)
721   -SSE_OP_S(div, FPU_DIV)
722   -SSE_OP_S(min, FPU_MIN)
723   -SSE_OP_S(max, FPU_MAX)
724   -SSE_OP_S(sqrt, FPU_SQRT)
  601 +SSE_HELPER_S(add, FPU_ADD)
  602 +SSE_HELPER_S(sub, FPU_SUB)
  603 +SSE_HELPER_S(mul, FPU_MUL)
  604 +SSE_HELPER_S(div, FPU_DIV)
  605 +SSE_HELPER_S(min, FPU_MIN)
  606 +SSE_HELPER_S(max, FPU_MAX)
  607 +SSE_HELPER_S(sqrt, FPU_SQRT)
725 608  
726 609  
727 610 /* float to float conversions */
728   -void OPPROTO op_cvtps2pd(void)
  611 +void helper_cvtps2pd(Reg *d, Reg *s)
729 612 {
730 613 float32 s0, s1;
731   - Reg *d, *s;
732   - d = (Reg *)((char *)env + PARAM1);
733   - s = (Reg *)((char *)env + PARAM2);
734 614 s0 = s->XMM_S(0);
735 615 s1 = s->XMM_S(1);
736 616 d->XMM_D(0) = float32_to_float64(s0, &env->sse_status);
737 617 d->XMM_D(1) = float32_to_float64(s1, &env->sse_status);
738 618 }
739 619  
740   -void OPPROTO op_cvtpd2ps(void)
  620 +void helper_cvtpd2ps(Reg *d, Reg *s)
741 621 {
742   - Reg *d, *s;
743   - d = (Reg *)((char *)env + PARAM1);
744   - s = (Reg *)((char *)env + PARAM2);
745 622 d->XMM_S(0) = float64_to_float32(s->XMM_D(0), &env->sse_status);
746 623 d->XMM_S(1) = float64_to_float32(s->XMM_D(1), &env->sse_status);
747 624 d->Q(1) = 0;
748 625 }
749 626  
750   -void OPPROTO op_cvtss2sd(void)
  627 +void helper_cvtss2sd(Reg *d, Reg *s)
751 628 {
752   - Reg *d, *s;
753   - d = (Reg *)((char *)env + PARAM1);
754   - s = (Reg *)((char *)env + PARAM2);
755 629 d->XMM_D(0) = float32_to_float64(s->XMM_S(0), &env->sse_status);
756 630 }
757 631  
758   -void OPPROTO op_cvtsd2ss(void)
  632 +void helper_cvtsd2ss(Reg *d, Reg *s)
759 633 {
760   - Reg *d, *s;
761   - d = (Reg *)((char *)env + PARAM1);
762   - s = (Reg *)((char *)env + PARAM2);
763 634 d->XMM_S(0) = float64_to_float32(s->XMM_D(0), &env->sse_status);
764 635 }
765 636  
766 637 /* integer to float */
767   -void OPPROTO op_cvtdq2ps(void)
  638 +void helper_cvtdq2ps(Reg *d, Reg *s)
768 639 {
769   - XMMReg *d = (XMMReg *)((char *)env + PARAM1);
770   - XMMReg *s = (XMMReg *)((char *)env + PARAM2);
771 640 d->XMM_S(0) = int32_to_float32(s->XMM_L(0), &env->sse_status);
772 641 d->XMM_S(1) = int32_to_float32(s->XMM_L(1), &env->sse_status);
773 642 d->XMM_S(2) = int32_to_float32(s->XMM_L(2), &env->sse_status);
774 643 d->XMM_S(3) = int32_to_float32(s->XMM_L(3), &env->sse_status);
775 644 }
776 645  
777   -void OPPROTO op_cvtdq2pd(void)
  646 +void helper_cvtdq2pd(Reg *d, Reg *s)
778 647 {
779   - XMMReg *d = (XMMReg *)((char *)env + PARAM1);
780   - XMMReg *s = (XMMReg *)((char *)env + PARAM2);
781 648 int32_t l0, l1;
782 649 l0 = (int32_t)s->XMM_L(0);
783 650 l1 = (int32_t)s->XMM_L(1);
... ... @@ -785,210 +652,168 @@ void OPPROTO op_cvtdq2pd(void)
785 652 d->XMM_D(1) = int32_to_float64(l1, &env->sse_status);
786 653 }
787 654  
788   -void OPPROTO op_cvtpi2ps(void)
  655 +void helper_cvtpi2ps(XMMReg *d, MMXReg *s)
789 656 {
790   - XMMReg *d = (Reg *)((char *)env + PARAM1);
791   - MMXReg *s = (MMXReg *)((char *)env + PARAM2);
792 657 d->XMM_S(0) = int32_to_float32(s->MMX_L(0), &env->sse_status);
793 658 d->XMM_S(1) = int32_to_float32(s->MMX_L(1), &env->sse_status);
794 659 }
795 660  
796   -void OPPROTO op_cvtpi2pd(void)
  661 +void helper_cvtpi2pd(XMMReg *d, MMXReg *s)
797 662 {
798   - XMMReg *d = (Reg *)((char *)env + PARAM1);
799   - MMXReg *s = (MMXReg *)((char *)env + PARAM2);
800 663 d->XMM_D(0) = int32_to_float64(s->MMX_L(0), &env->sse_status);
801 664 d->XMM_D(1) = int32_to_float64(s->MMX_L(1), &env->sse_status);
802 665 }
803 666  
804   -void OPPROTO op_cvtsi2ss(void)
  667 +void helper_cvtsi2ss(XMMReg *d, uint32_t val)
805 668 {
806   - XMMReg *d = (Reg *)((char *)env + PARAM1);
807   - d->XMM_S(0) = int32_to_float32(T0, &env->sse_status);
  669 + d->XMM_S(0) = int32_to_float32(val, &env->sse_status);
808 670 }
809 671  
810   -void OPPROTO op_cvtsi2sd(void)
  672 +void helper_cvtsi2sd(XMMReg *d, uint32_t val)
811 673 {
812   - XMMReg *d = (Reg *)((char *)env + PARAM1);
813   - d->XMM_D(0) = int32_to_float64(T0, &env->sse_status);
  674 + d->XMM_D(0) = int32_to_float64(val, &env->sse_status);
814 675 }
815 676  
816 677 #ifdef TARGET_X86_64
817   -void OPPROTO op_cvtsq2ss(void)
  678 +void helper_cvtsq2ss(XMMReg *d, uint64_t val)
818 679 {
819   - XMMReg *d = (Reg *)((char *)env + PARAM1);
820   - d->XMM_S(0) = int64_to_float32(T0, &env->sse_status);
  680 + d->XMM_S(0) = int64_to_float32(val, &env->sse_status);
821 681 }
822 682  
823   -void OPPROTO op_cvtsq2sd(void)
  683 +void helper_cvtsq2sd(XMMReg *d, uint64_t val)
824 684 {
825   - XMMReg *d = (Reg *)((char *)env + PARAM1);
826   - d->XMM_D(0) = int64_to_float64(T0, &env->sse_status);
  685 + d->XMM_D(0) = int64_to_float64(val, &env->sse_status);
827 686 }
828 687 #endif
829 688  
830 689 /* float to integer */
831   -void OPPROTO op_cvtps2dq(void)
  690 +void helper_cvtps2dq(XMMReg *d, XMMReg *s)
832 691 {
833   - XMMReg *d = (XMMReg *)((char *)env + PARAM1);
834   - XMMReg *s = (XMMReg *)((char *)env + PARAM2);
835 692 d->XMM_L(0) = float32_to_int32(s->XMM_S(0), &env->sse_status);
836 693 d->XMM_L(1) = float32_to_int32(s->XMM_S(1), &env->sse_status);
837 694 d->XMM_L(2) = float32_to_int32(s->XMM_S(2), &env->sse_status);
838 695 d->XMM_L(3) = float32_to_int32(s->XMM_S(3), &env->sse_status);
839 696 }
840 697  
841   -void OPPROTO op_cvtpd2dq(void)
  698 +void helper_cvtpd2dq(XMMReg *d, XMMReg *s)
842 699 {
843   - XMMReg *d = (XMMReg *)((char *)env + PARAM1);
844   - XMMReg *s = (XMMReg *)((char *)env + PARAM2);
845 700 d->XMM_L(0) = float64_to_int32(s->XMM_D(0), &env->sse_status);
846 701 d->XMM_L(1) = float64_to_int32(s->XMM_D(1), &env->sse_status);
847 702 d->XMM_Q(1) = 0;
848 703 }
849 704  
850   -void OPPROTO op_cvtps2pi(void)
  705 +void helper_cvtps2pi(MMXReg *d, XMMReg *s)
851 706 {
852   - MMXReg *d = (MMXReg *)((char *)env + PARAM1);
853   - XMMReg *s = (XMMReg *)((char *)env + PARAM2);
854 707 d->MMX_L(0) = float32_to_int32(s->XMM_S(0), &env->sse_status);
855 708 d->MMX_L(1) = float32_to_int32(s->XMM_S(1), &env->sse_status);
856 709 }
857 710  
858   -void OPPROTO op_cvtpd2pi(void)
  711 +void helper_cvtpd2pi(MMXReg *d, XMMReg *s)
859 712 {
860   - MMXReg *d = (MMXReg *)((char *)env + PARAM1);
861   - XMMReg *s = (XMMReg *)((char *)env + PARAM2);
862 713 d->MMX_L(0) = float64_to_int32(s->XMM_D(0), &env->sse_status);
863 714 d->MMX_L(1) = float64_to_int32(s->XMM_D(1), &env->sse_status);
864 715 }
865 716  
866   -void OPPROTO op_cvtss2si(void)
  717 +int32_t helper_cvtss2si(XMMReg *s)
867 718 {
868   - XMMReg *s = (XMMReg *)((char *)env + PARAM1);
869   - T0 = float32_to_int32(s->XMM_S(0), &env->sse_status);
  719 + return float32_to_int32(s->XMM_S(0), &env->sse_status);
870 720 }
871 721  
872   -void OPPROTO op_cvtsd2si(void)
  722 +int32_t helper_cvtsd2si(XMMReg *s)
873 723 {
874   - XMMReg *s = (XMMReg *)((char *)env + PARAM1);
875   - T0 = float64_to_int32(s->XMM_D(0), &env->sse_status);
  724 + return float64_to_int32(s->XMM_D(0), &env->sse_status);
876 725 }
877 726  
878 727 #ifdef TARGET_X86_64
879   -void OPPROTO op_cvtss2sq(void)
  728 +int64_t helper_cvtss2sq(XMMReg *s)
880 729 {
881   - XMMReg *s = (XMMReg *)((char *)env + PARAM1);
882   - T0 = float32_to_int64(s->XMM_S(0), &env->sse_status);
  730 + return float32_to_int64(s->XMM_S(0), &env->sse_status);
883 731 }
884 732  
885   -void OPPROTO op_cvtsd2sq(void)
  733 +int64_t helper_cvtsd2sq(XMMReg *s)
886 734 {
887   - XMMReg *s = (XMMReg *)((char *)env + PARAM1);
888   - T0 = float64_to_int64(s->XMM_D(0), &env->sse_status);
  735 + return float64_to_int64(s->XMM_D(0), &env->sse_status);
889 736 }
890 737 #endif
891 738  
892 739 /* float to integer truncated */
893   -void OPPROTO op_cvttps2dq(void)
  740 +void helper_cvttps2dq(XMMReg *d, XMMReg *s)
894 741 {
895   - XMMReg *d = (XMMReg *)((char *)env + PARAM1);
896   - XMMReg *s = (XMMReg *)((char *)env + PARAM2);
897 742 d->XMM_L(0) = float32_to_int32_round_to_zero(s->XMM_S(0), &env->sse_status);
898 743 d->XMM_L(1) = float32_to_int32_round_to_zero(s->XMM_S(1), &env->sse_status);
899 744 d->XMM_L(2) = float32_to_int32_round_to_zero(s->XMM_S(2), &env->sse_status);
900 745 d->XMM_L(3) = float32_to_int32_round_to_zero(s->XMM_S(3), &env->sse_status);
901 746 }
902 747  
903   -void OPPROTO op_cvttpd2dq(void)
  748 +void helper_cvttpd2dq(XMMReg *d, XMMReg *s)
904 749 {
905   - XMMReg *d = (XMMReg *)((char *)env + PARAM1);
906   - XMMReg *s = (XMMReg *)((char *)env + PARAM2);
907 750 d->XMM_L(0) = float64_to_int32_round_to_zero(s->XMM_D(0), &env->sse_status);
908 751 d->XMM_L(1) = float64_to_int32_round_to_zero(s->XMM_D(1), &env->sse_status);
909 752 d->XMM_Q(1) = 0;
910 753 }
911 754  
912   -void OPPROTO op_cvttps2pi(void)
  755 +void helper_cvttps2pi(MMXReg *d, XMMReg *s)
913 756 {
914   - MMXReg *d = (MMXReg *)((char *)env + PARAM1);
915   - XMMReg *s = (XMMReg *)((char *)env + PARAM2);
916 757 d->MMX_L(0) = float32_to_int32_round_to_zero(s->XMM_S(0), &env->sse_status);
917 758 d->MMX_L(1) = float32_to_int32_round_to_zero(s->XMM_S(1), &env->sse_status);
918 759 }
919 760  
920   -void OPPROTO op_cvttpd2pi(void)
  761 +void helper_cvttpd2pi(MMXReg *d, XMMReg *s)
921 762 {
922   - MMXReg *d = (MMXReg *)((char *)env + PARAM1);
923   - XMMReg *s = (XMMReg *)((char *)env + PARAM2);
924 763 d->MMX_L(0) = float64_to_int32_round_to_zero(s->XMM_D(0), &env->sse_status);
925 764 d->MMX_L(1) = float64_to_int32_round_to_zero(s->XMM_D(1), &env->sse_status);
926 765 }
927 766  
928   -void OPPROTO op_cvttss2si(void)
  767 +int32_t helper_cvttss2si(XMMReg *s)
929 768 {
930   - XMMReg *s = (XMMReg *)((char *)env + PARAM1);
931   - T0 = float32_to_int32_round_to_zero(s->XMM_S(0), &env->sse_status);
  769 + return float32_to_int32_round_to_zero(s->XMM_S(0), &env->sse_status);
932 770 }
933 771  
934   -void OPPROTO op_cvttsd2si(void)
  772 +int32_t helper_cvttsd2si(XMMReg *s)
935 773 {
936   - XMMReg *s = (XMMReg *)((char *)env + PARAM1);
937   - T0 = float64_to_int32_round_to_zero(s->XMM_D(0), &env->sse_status);
  774 + return float64_to_int32_round_to_zero(s->XMM_D(0), &env->sse_status);
938 775 }
939 776  
940 777 #ifdef TARGET_X86_64
941   -void OPPROTO op_cvttss2sq(void)
  778 +int64_t helper_cvttss2sq(XMMReg *s)
942 779 {
943   - XMMReg *s = (XMMReg *)((char *)env + PARAM1);
944   - T0 = float32_to_int64_round_to_zero(s->XMM_S(0), &env->sse_status);
  780 + return float32_to_int64_round_to_zero(s->XMM_S(0), &env->sse_status);
945 781 }
946 782  
947   -void OPPROTO op_cvttsd2sq(void)
  783 +int64_t helper_cvttsd2sq(XMMReg *s)
948 784 {
949   - XMMReg *s = (XMMReg *)((char *)env + PARAM1);
950   - T0 = float64_to_int64_round_to_zero(s->XMM_D(0), &env->sse_status);
  785 + return float64_to_int64_round_to_zero(s->XMM_D(0), &env->sse_status);
951 786 }
952 787 #endif
953 788  
954   -void OPPROTO op_rsqrtps(void)
  789 +void helper_rsqrtps(XMMReg *d, XMMReg *s)
955 790 {
956   - XMMReg *d = (XMMReg *)((char *)env + PARAM1);
957   - XMMReg *s = (XMMReg *)((char *)env + PARAM2);
958 791 d->XMM_S(0) = approx_rsqrt(s->XMM_S(0));
959 792 d->XMM_S(1) = approx_rsqrt(s->XMM_S(1));
960 793 d->XMM_S(2) = approx_rsqrt(s->XMM_S(2));
961 794 d->XMM_S(3) = approx_rsqrt(s->XMM_S(3));
962 795 }
963 796  
964   -void OPPROTO op_rsqrtss(void)
  797 +void helper_rsqrtss(XMMReg *d, XMMReg *s)
965 798 {
966   - XMMReg *d = (XMMReg *)((char *)env + PARAM1);
967   - XMMReg *s = (XMMReg *)((char *)env + PARAM2);
968 799 d->XMM_S(0) = approx_rsqrt(s->XMM_S(0));
969 800 }
970 801  
971   -void OPPROTO op_rcpps(void)
  802 +void helper_rcpps(XMMReg *d, XMMReg *s)
972 803 {
973   - XMMReg *d = (XMMReg *)((char *)env + PARAM1);
974   - XMMReg *s = (XMMReg *)((char *)env + PARAM2);
975 804 d->XMM_S(0) = approx_rcp(s->XMM_S(0));
976 805 d->XMM_S(1) = approx_rcp(s->XMM_S(1));
977 806 d->XMM_S(2) = approx_rcp(s->XMM_S(2));
978 807 d->XMM_S(3) = approx_rcp(s->XMM_S(3));
979 808 }
980 809  
981   -void OPPROTO op_rcpss(void)
  810 +void helper_rcpss(XMMReg *d, XMMReg *s)
982 811 {
983   - XMMReg *d = (XMMReg *)((char *)env + PARAM1);
984   - XMMReg *s = (XMMReg *)((char *)env + PARAM2);
985 812 d->XMM_S(0) = approx_rcp(s->XMM_S(0));
986 813 }
987 814  
988   -void OPPROTO op_haddps(void)
  815 +void helper_haddps(XMMReg *d, XMMReg *s)
989 816 {
990   - XMMReg *d = (XMMReg *)((char *)env + PARAM1);
991   - XMMReg *s = (XMMReg *)((char *)env + PARAM2);
992 817 XMMReg r;
993 818 r.XMM_S(0) = d->XMM_S(0) + d->XMM_S(1);
994 819 r.XMM_S(1) = d->XMM_S(2) + d->XMM_S(3);
... ... @@ -997,20 +822,16 @@ void OPPROTO op_haddps(void)
997 822 *d = r;
998 823 }
999 824  
1000   -void OPPROTO op_haddpd(void)
  825 +void helper_haddpd(XMMReg *d, XMMReg *s)
1001 826 {
1002   - XMMReg *d = (XMMReg *)((char *)env + PARAM1);
1003   - XMMReg *s = (XMMReg *)((char *)env + PARAM2);
1004 827 XMMReg r;
1005 828 r.XMM_D(0) = d->XMM_D(0) + d->XMM_D(1);
1006 829 r.XMM_D(1) = s->XMM_D(0) + s->XMM_D(1);
1007 830 *d = r;
1008 831 }
1009 832  
1010   -void OPPROTO op_hsubps(void)
  833 +void helper_hsubps(XMMReg *d, XMMReg *s)
1011 834 {
1012   - XMMReg *d = (XMMReg *)((char *)env + PARAM1);
1013   - XMMReg *s = (XMMReg *)((char *)env + PARAM2);
1014 835 XMMReg r;
1015 836 r.XMM_S(0) = d->XMM_S(0) - d->XMM_S(1);
1016 837 r.XMM_S(1) = d->XMM_S(2) - d->XMM_S(3);
... ... @@ -1019,68 +840,50 @@ void OPPROTO op_hsubps(void)
1019 840 *d = r;
1020 841 }
1021 842  
1022   -void OPPROTO op_hsubpd(void)
  843 +void helper_hsubpd(XMMReg *d, XMMReg *s)
1023 844 {
1024   - XMMReg *d = (XMMReg *)((char *)env + PARAM1);
1025   - XMMReg *s = (XMMReg *)((char *)env + PARAM2);
1026 845 XMMReg r;
1027 846 r.XMM_D(0) = d->XMM_D(0) - d->XMM_D(1);
1028 847 r.XMM_D(1) = s->XMM_D(0) - s->XMM_D(1);
1029 848 *d = r;
1030 849 }
1031 850  
1032   -void OPPROTO op_addsubps(void)
  851 +void helper_addsubps(XMMReg *d, XMMReg *s)
1033 852 {
1034   - XMMReg *d = (XMMReg *)((char *)env + PARAM1);
1035   - XMMReg *s = (XMMReg *)((char *)env + PARAM2);
1036 853 d->XMM_S(0) = d->XMM_S(0) - s->XMM_S(0);
1037 854 d->XMM_S(1) = d->XMM_S(1) + s->XMM_S(1);
1038 855 d->XMM_S(2) = d->XMM_S(2) - s->XMM_S(2);
1039 856 d->XMM_S(3) = d->XMM_S(3) + s->XMM_S(3);
1040 857 }
1041 858  
1042   -void OPPROTO op_addsubpd(void)
  859 +void helper_addsubpd(XMMReg *d, XMMReg *s)
1043 860 {
1044   - XMMReg *d = (XMMReg *)((char *)env + PARAM1);
1045   - XMMReg *s = (XMMReg *)((char *)env + PARAM2);
1046 861 d->XMM_D(0) = d->XMM_D(0) - s->XMM_D(0);
1047 862 d->XMM_D(1) = d->XMM_D(1) + s->XMM_D(1);
1048 863 }
1049 864  
1050 865 /* XXX: unordered */
1051   -#define SSE_OP_CMP(name, F)\
1052   -void OPPROTO op_ ## name ## ps (void)\
  866 +#define SSE_HELPER_CMP(name, F)\
  867 +void helper_ ## name ## ps (Reg *d, Reg *s)\
1053 868 {\
1054   - Reg *d, *s;\
1055   - d = (Reg *)((char *)env + PARAM1);\
1056   - s = (Reg *)((char *)env + PARAM2);\
1057 869 d->XMM_L(0) = F(32, d->XMM_S(0), s->XMM_S(0));\
1058 870 d->XMM_L(1) = F(32, d->XMM_S(1), s->XMM_S(1));\
1059 871 d->XMM_L(2) = F(32, d->XMM_S(2), s->XMM_S(2));\
1060 872 d->XMM_L(3) = F(32, d->XMM_S(3), s->XMM_S(3));\
1061 873 }\
1062 874 \
1063   -void OPPROTO op_ ## name ## ss (void)\
  875 +void helper_ ## name ## ss (Reg *d, Reg *s)\
1064 876 {\
1065   - Reg *d, *s;\
1066   - d = (Reg *)((char *)env + PARAM1);\
1067   - s = (Reg *)((char *)env + PARAM2);\
1068 877 d->XMM_L(0) = F(32, d->XMM_S(0), s->XMM_S(0));\
1069 878 }\
1070   -void OPPROTO op_ ## name ## pd (void)\
  879 +void helper_ ## name ## pd (Reg *d, Reg *s)\
1071 880 {\
1072   - Reg *d, *s;\
1073   - d = (Reg *)((char *)env + PARAM1);\
1074   - s = (Reg *)((char *)env + PARAM2);\
1075 881 d->XMM_Q(0) = F(64, d->XMM_D(0), s->XMM_D(0));\
1076 882 d->XMM_Q(1) = F(64, d->XMM_D(1), s->XMM_D(1));\
1077 883 }\
1078 884 \
1079   -void OPPROTO op_ ## name ## sd (void)\
  885 +void helper_ ## name ## sd (Reg *d, Reg *s)\
1080 886 {\
1081   - Reg *d, *s;\
1082   - d = (Reg *)((char *)env + PARAM1);\
1083   - s = (Reg *)((char *)env + PARAM2);\
1084 887 d->XMM_Q(0) = F(64, d->XMM_D(0), s->XMM_D(0));\
1085 888 }
1086 889  
... ... @@ -1093,24 +896,21 @@ void OPPROTO op_ ## name ## sd (void)\
1093 896 #define FPU_CMPNLE(size, a, b) float ## size ## _le(a, b, &env->sse_status) ? 0 : -1
1094 897 #define FPU_CMPORD(size, a, b) float ## size ## _unordered(a, b, &env->sse_status) ? 0 : -1
1095 898  
1096   -SSE_OP_CMP(cmpeq, FPU_CMPEQ)
1097   -SSE_OP_CMP(cmplt, FPU_CMPLT)
1098   -SSE_OP_CMP(cmple, FPU_CMPLE)
1099   -SSE_OP_CMP(cmpunord, FPU_CMPUNORD)
1100   -SSE_OP_CMP(cmpneq, FPU_CMPNEQ)
1101   -SSE_OP_CMP(cmpnlt, FPU_CMPNLT)
1102   -SSE_OP_CMP(cmpnle, FPU_CMPNLE)
1103   -SSE_OP_CMP(cmpord, FPU_CMPORD)
  899 +SSE_HELPER_CMP(cmpeq, FPU_CMPEQ)
  900 +SSE_HELPER_CMP(cmplt, FPU_CMPLT)
  901 +SSE_HELPER_CMP(cmple, FPU_CMPLE)
  902 +SSE_HELPER_CMP(cmpunord, FPU_CMPUNORD)
  903 +SSE_HELPER_CMP(cmpneq, FPU_CMPNEQ)
  904 +SSE_HELPER_CMP(cmpnlt, FPU_CMPNLT)
  905 +SSE_HELPER_CMP(cmpnle, FPU_CMPNLE)
  906 +SSE_HELPER_CMP(cmpord, FPU_CMPORD)
1104 907  
1105 908 const int comis_eflags[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
1106 909  
1107   -void OPPROTO op_ucomiss(void)
  910 +void helper_ucomiss(Reg *d, Reg *s)
1108 911 {
1109 912 int ret;
1110 913 float32 s0, s1;
1111   - Reg *d, *s;
1112   - d = (Reg *)((char *)env + PARAM1);
1113   - s = (Reg *)((char *)env + PARAM2);
1114 914  
1115 915 s0 = d->XMM_S(0);
1116 916 s1 = s->XMM_S(0);
... ... @@ -1119,13 +919,10 @@ void OPPROTO op_ucomiss(void)
1119 919 FORCE_RET();
1120 920 }
1121 921  
1122   -void OPPROTO op_comiss(void)
  922 +void helper_comiss(Reg *d, Reg *s)
1123 923 {
1124 924 int ret;
1125 925 float32 s0, s1;
1126   - Reg *d, *s;
1127   - d = (Reg *)((char *)env + PARAM1);
1128   - s = (Reg *)((char *)env + PARAM2);
1129 926  
1130 927 s0 = d->XMM_S(0);
1131 928 s1 = s->XMM_S(0);
... ... @@ -1134,13 +931,10 @@ void OPPROTO op_comiss(void)
1134 931 FORCE_RET();
1135 932 }
1136 933  
1137   -void OPPROTO op_ucomisd(void)
  934 +void helper_ucomisd(Reg *d, Reg *s)
1138 935 {
1139 936 int ret;
1140 937 float64 d0, d1;
1141   - Reg *d, *s;
1142   - d = (Reg *)((char *)env + PARAM1);
1143   - s = (Reg *)((char *)env + PARAM2);
1144 938  
1145 939 d0 = d->XMM_D(0);
1146 940 d1 = s->XMM_D(0);
... ... @@ -1149,13 +943,10 @@ void OPPROTO op_ucomisd(void)
1149 943 FORCE_RET();
1150 944 }
1151 945  
1152   -void OPPROTO op_comisd(void)
  946 +void helper_comisd(Reg *d, Reg *s)
1153 947 {
1154 948 int ret;
1155 949 float64 d0, d1;
1156   - Reg *d, *s;
1157   - d = (Reg *)((char *)env + PARAM1);
1158   - s = (Reg *)((char *)env + PARAM2);
1159 950  
1160 951 d0 = d->XMM_D(0);
1161 952 d1 = s->XMM_D(0);
... ... @@ -1164,76 +955,54 @@ void OPPROTO op_comisd(void)
1164 955 FORCE_RET();
1165 956 }
1166 957  
1167   -void OPPROTO op_movmskps(void)
  958 +uint32_t helper_movmskps(Reg *s)
1168 959 {
1169 960 int b0, b1, b2, b3;
1170   - Reg *s;
1171   - s = (Reg *)((char *)env + PARAM1);
1172 961 b0 = s->XMM_L(0) >> 31;
1173 962 b1 = s->XMM_L(1) >> 31;
1174 963 b2 = s->XMM_L(2) >> 31;
1175 964 b3 = s->XMM_L(3) >> 31;
1176   - T0 = b0 | (b1 << 1) | (b2 << 2) | (b3 << 3);
  965 + return b0 | (b1 << 1) | (b2 << 2) | (b3 << 3);
1177 966 }
1178 967  
1179   -void OPPROTO op_movmskpd(void)
  968 +uint32_t helper_movmskpd(Reg *s)
1180 969 {
1181 970 int b0, b1;
1182   - Reg *s;
1183   - s = (Reg *)((char *)env + PARAM1);
1184 971 b0 = s->XMM_L(1) >> 31;
1185 972 b1 = s->XMM_L(3) >> 31;
1186   - T0 = b0 | (b1 << 1);
  973 + return b0 | (b1 << 1);
1187 974 }
1188 975  
1189 976 #endif
1190 977  
1191   -void OPPROTO glue(op_pmovmskb, SUFFIX)(void)
1192   -{
1193   - Reg *s;
1194   - s = (Reg *)((char *)env + PARAM1);
1195   - T0 = 0;
1196   - T0 |= (s->XMM_B(0) >> 7);
1197   - T0 |= (s->XMM_B(1) >> 6) & 0x02;
1198   - T0 |= (s->XMM_B(2) >> 5) & 0x04;
1199   - T0 |= (s->XMM_B(3) >> 4) & 0x08;
1200   - T0 |= (s->XMM_B(4) >> 3) & 0x10;
1201   - T0 |= (s->XMM_B(5) >> 2) & 0x20;
1202   - T0 |= (s->XMM_B(6) >> 1) & 0x40;
1203   - T0 |= (s->XMM_B(7)) & 0x80;
  978 +uint32_t glue(helper_pmovmskb, SUFFIX)(Reg *s)
  979 +{
  980 + uint32_t val;
  981 + val = 0;
  982 + val |= (s->XMM_B(0) >> 7);
  983 + val |= (s->XMM_B(1) >> 6) & 0x02;
  984 + val |= (s->XMM_B(2) >> 5) & 0x04;
  985 + val |= (s->XMM_B(3) >> 4) & 0x08;
  986 + val |= (s->XMM_B(4) >> 3) & 0x10;
  987 + val |= (s->XMM_B(5) >> 2) & 0x20;
  988 + val |= (s->XMM_B(6) >> 1) & 0x40;
  989 + val |= (s->XMM_B(7)) & 0x80;
1204 990 #if SHIFT == 1
1205   - T0 |= (s->XMM_B(8) << 1) & 0x0100;
1206   - T0 |= (s->XMM_B(9) << 2) & 0x0200;
1207   - T0 |= (s->XMM_B(10) << 3) & 0x0400;
1208   - T0 |= (s->XMM_B(11) << 4) & 0x0800;
1209   - T0 |= (s->XMM_B(12) << 5) & 0x1000;
1210   - T0 |= (s->XMM_B(13) << 6) & 0x2000;
1211   - T0 |= (s->XMM_B(14) << 7) & 0x4000;
1212   - T0 |= (s->XMM_B(15) << 8) & 0x8000;
  991 + val |= (s->XMM_B(8) << 1) & 0x0100;
  992 + val |= (s->XMM_B(9) << 2) & 0x0200;
  993 + val |= (s->XMM_B(10) << 3) & 0x0400;
  994 + val |= (s->XMM_B(11) << 4) & 0x0800;
  995 + val |= (s->XMM_B(12) << 5) & 0x1000;
  996 + val |= (s->XMM_B(13) << 6) & 0x2000;
  997 + val |= (s->XMM_B(14) << 7) & 0x4000;
  998 + val |= (s->XMM_B(15) << 8) & 0x8000;
1213 999 #endif
  1000 + return val;
1214 1001 }
1215 1002  
1216   -void OPPROTO glue(op_pinsrw, SUFFIX) (void)
1217   -{
1218   - Reg *d = (Reg *)((char *)env + PARAM1);
1219   - int pos = PARAM2;
1220   -
1221   - d->W(pos) = T0;
1222   -}
1223   -
1224   -void OPPROTO glue(op_pextrw, SUFFIX) (void)
1225   -{
1226   - Reg *s = (Reg *)((char *)env + PARAM1);
1227   - int pos = PARAM2;
1228   -
1229   - T0 = s->W(pos);
1230   -}
1231   -
1232   -void OPPROTO glue(op_packsswb, SUFFIX) (void)
  1003 +void glue(helper_packsswb, SUFFIX) (Reg *d, Reg *s)
1233 1004 {
1234   - Reg r, *d, *s;
1235   - d = (Reg *)((char *)env + PARAM1);
1236   - s = (Reg *)((char *)env + PARAM2);
  1005 + Reg r;
1237 1006  
1238 1007 r.B(0) = satsb((int16_t)d->W(0));
1239 1008 r.B(1) = satsb((int16_t)d->W(1));
... ... @@ -1258,11 +1027,9 @@ void OPPROTO glue(op_packsswb, SUFFIX) (void)
1258 1027 *d = r;
1259 1028 }
1260 1029  
1261   -void OPPROTO glue(op_packuswb, SUFFIX) (void)
  1030 +void glue(helper_packuswb, SUFFIX) (Reg *d, Reg *s)
1262 1031 {
1263   - Reg r, *d, *s;
1264   - d = (Reg *)((char *)env + PARAM1);
1265   - s = (Reg *)((char *)env + PARAM2);
  1032 + Reg r;
1266 1033  
1267 1034 r.B(0) = satub((int16_t)d->W(0));
1268 1035 r.B(1) = satub((int16_t)d->W(1));
... ... @@ -1287,11 +1054,9 @@ void OPPROTO glue(op_packuswb, SUFFIX) (void)
1287 1054 *d = r;
1288 1055 }
1289 1056  
1290   -void OPPROTO glue(op_packssdw, SUFFIX) (void)
  1057 +void glue(helper_packssdw, SUFFIX) (Reg *d, Reg *s)
1291 1058 {
1292   - Reg r, *d, *s;
1293   - d = (Reg *)((char *)env + PARAM1);
1294   - s = (Reg *)((char *)env + PARAM2);
  1059 + Reg r;
1295 1060  
1296 1061 r.W(0) = satsw(d->L(0));
1297 1062 r.W(1) = satsw(d->L(1));
... ... @@ -1310,11 +1075,9 @@ void OPPROTO glue(op_packssdw, SUFFIX) (void)
1310 1075  
1311 1076 #define UNPCK_OP(base_name, base) \
1312 1077 \
1313   -void OPPROTO glue(op_punpck ## base_name ## bw, SUFFIX) (void) \
  1078 +void glue(helper_punpck ## base_name ## bw, SUFFIX) (Reg *d, Reg *s) \
1314 1079 { \
1315   - Reg r, *d, *s; \
1316   - d = (Reg *)((char *)env + PARAM1); \
1317   - s = (Reg *)((char *)env + PARAM2); \
  1080 + Reg r; \
1318 1081 \
1319 1082 r.B(0) = d->B((base << (SHIFT + 2)) + 0); \
1320 1083 r.B(1) = s->B((base << (SHIFT + 2)) + 0); \
... ... @@ -1337,11 +1100,9 @@ XMM_ONLY( \
1337 1100 *d = r; \
1338 1101 } \
1339 1102 \
1340   -void OPPROTO glue(op_punpck ## base_name ## wd, SUFFIX) (void) \
  1103 +void glue(helper_punpck ## base_name ## wd, SUFFIX) (Reg *d, Reg *s) \
1341 1104 { \
1342   - Reg r, *d, *s; \
1343   - d = (Reg *)((char *)env + PARAM1); \
1344   - s = (Reg *)((char *)env + PARAM2); \
  1105 + Reg r; \
1345 1106 \
1346 1107 r.W(0) = d->W((base << (SHIFT + 1)) + 0); \
1347 1108 r.W(1) = s->W((base << (SHIFT + 1)) + 0); \
... ... @@ -1356,11 +1117,9 @@ XMM_ONLY( \
1356 1117 *d = r; \
1357 1118 } \
1358 1119 \
1359   -void OPPROTO glue(op_punpck ## base_name ## dq, SUFFIX) (void) \
  1120 +void glue(helper_punpck ## base_name ## dq, SUFFIX) (Reg *d, Reg *s) \
1360 1121 { \
1361   - Reg r, *d, *s; \
1362   - d = (Reg *)((char *)env + PARAM1); \
1363   - s = (Reg *)((char *)env + PARAM2); \
  1122 + Reg r; \
1364 1123 \
1365 1124 r.L(0) = d->L((base << SHIFT) + 0); \
1366 1125 r.L(1) = s->L((base << SHIFT) + 0); \
... ... @@ -1372,11 +1131,9 @@ XMM_ONLY( \
1372 1131 } \
1373 1132 \
1374 1133 XMM_ONLY( \
1375   -void OPPROTO glue(op_punpck ## base_name ## qdq, SUFFIX) (void) \
  1134 +void glue(helper_punpck ## base_name ## qdq, SUFFIX) (Reg *d, Reg *s) \
1376 1135 { \
1377   - Reg r, *d, *s; \
1378   - d = (Reg *)((char *)env + PARAM1); \
1379   - s = (Reg *)((char *)env + PARAM2); \
  1136 + Reg r; \
1380 1137 \
1381 1138 r.Q(0) = d->Q(base); \
1382 1139 r.Q(1) = s->Q(base); \
... ... @@ -1389,166 +1146,128 @@ UNPCK_OP(h, 1)
1389 1146  
1390 1147 /* 3DNow! float ops */
1391 1148 #if SHIFT == 0
1392   -void OPPROTO op_pi2fd(void)
  1149 +void helper_pi2fd(MMXReg *d, MMXReg *s)
1393 1150 {
1394   - MMXReg *d = (MMXReg *)((char *)env + PARAM1);
1395   - MMXReg *s = (MMXReg *)((char *)env + PARAM2);
1396 1151 d->MMX_S(0) = int32_to_float32(s->MMX_L(0), &env->mmx_status);
1397 1152 d->MMX_S(1) = int32_to_float32(s->MMX_L(1), &env->mmx_status);
1398 1153 }
1399 1154  
1400   -void OPPROTO op_pi2fw(void)
  1155 +void helper_pi2fw(MMXReg *d, MMXReg *s)
1401 1156 {
1402   - MMXReg *d = (MMXReg *)((char *)env + PARAM1);
1403   - MMXReg *s = (MMXReg *)((char *)env + PARAM2);
1404 1157 d->MMX_S(0) = int32_to_float32((int16_t)s->MMX_W(0), &env->mmx_status);
1405 1158 d->MMX_S(1) = int32_to_float32((int16_t)s->MMX_W(2), &env->mmx_status);
1406 1159 }
1407 1160  
1408   -void OPPROTO op_pf2id(void)
  1161 +void helper_pf2id(MMXReg *d, MMXReg *s)
1409 1162 {
1410   - MMXReg *d = (MMXReg *)((char *)env + PARAM1);
1411   - MMXReg *s = (MMXReg *)((char *)env + PARAM2);
1412 1163 d->MMX_L(0) = float32_to_int32_round_to_zero(s->MMX_S(0), &env->mmx_status);
1413 1164 d->MMX_L(1) = float32_to_int32_round_to_zero(s->MMX_S(1), &env->mmx_status);
1414 1165 }
1415 1166  
1416   -void OPPROTO op_pf2iw(void)
  1167 +void helper_pf2iw(MMXReg *d, MMXReg *s)
1417 1168 {
1418   - MMXReg *d = (MMXReg *)((char *)env + PARAM1);
1419   - MMXReg *s = (MMXReg *)((char *)env + PARAM2);
1420 1169 d->MMX_L(0) = satsw(float32_to_int32_round_to_zero(s->MMX_S(0), &env->mmx_status));
1421 1170 d->MMX_L(1) = satsw(float32_to_int32_round_to_zero(s->MMX_S(1), &env->mmx_status));
1422 1171 }
1423 1172  
1424   -void OPPROTO op_pfacc(void)
  1173 +void helper_pfacc(MMXReg *d, MMXReg *s)
1425 1174 {
1426   - MMXReg *d = (MMXReg *)((char *)env + PARAM1);
1427   - MMXReg *s = (MMXReg *)((char *)env + PARAM2);
1428 1175 MMXReg r;
1429 1176 r.MMX_S(0) = float32_add(d->MMX_S(0), d->MMX_S(1), &env->mmx_status);
1430 1177 r.MMX_S(1) = float32_add(s->MMX_S(0), s->MMX_S(1), &env->mmx_status);
1431 1178 *d = r;
1432 1179 }
1433 1180  
1434   -void OPPROTO op_pfadd(void)
  1181 +void helper_pfadd(MMXReg *d, MMXReg *s)
1435 1182 {
1436   - MMXReg *d = (MMXReg *)((char *)env + PARAM1);
1437   - MMXReg *s = (MMXReg *)((char *)env + PARAM2);
1438 1183 d->MMX_S(0) = float32_add(d->MMX_S(0), s->MMX_S(0), &env->mmx_status);
1439 1184 d->MMX_S(1) = float32_add(d->MMX_S(1), s->MMX_S(1), &env->mmx_status);
1440 1185 }
1441 1186  
1442   -void OPPROTO op_pfcmpeq(void)
  1187 +void helper_pfcmpeq(MMXReg *d, MMXReg *s)
1443 1188 {
1444   - MMXReg *d = (MMXReg *)((char *)env + PARAM1);
1445   - MMXReg *s = (MMXReg *)((char *)env + PARAM2);
1446 1189 d->MMX_L(0) = float32_eq(d->MMX_S(0), s->MMX_S(0), &env->mmx_status) ? -1 : 0;
1447 1190 d->MMX_L(1) = float32_eq(d->MMX_S(1), s->MMX_S(1), &env->mmx_status) ? -1 : 0;
1448 1191 }
1449 1192  
1450   -void OPPROTO op_pfcmpge(void)
  1193 +void helper_pfcmpge(MMXReg *d, MMXReg *s)
1451 1194 {
1452   - MMXReg *d = (MMXReg *)((char *)env + PARAM1);
1453   - MMXReg *s = (MMXReg *)((char *)env + PARAM2);
1454 1195 d->MMX_L(0) = float32_le(s->MMX_S(0), d->MMX_S(0), &env->mmx_status) ? -1 : 0;
1455 1196 d->MMX_L(1) = float32_le(s->MMX_S(1), d->MMX_S(1), &env->mmx_status) ? -1 : 0;
1456 1197 }
1457 1198  
1458   -void OPPROTO op_pfcmpgt(void)
  1199 +void helper_pfcmpgt(MMXReg *d, MMXReg *s)
1459 1200 {
1460   - MMXReg *d = (MMXReg *)((char *)env + PARAM1);
1461   - MMXReg *s = (MMXReg *)((char *)env + PARAM2);
1462 1201 d->MMX_L(0) = float32_lt(s->MMX_S(0), d->MMX_S(0), &env->mmx_status) ? -1 : 0;
1463 1202 d->MMX_L(1) = float32_lt(s->MMX_S(1), d->MMX_S(1), &env->mmx_status) ? -1 : 0;
1464 1203 }
1465 1204  
1466   -void OPPROTO op_pfmax(void)
  1205 +void helper_pfmax(MMXReg *d, MMXReg *s)
1467 1206 {
1468   - MMXReg *d = (MMXReg *)((char *)env + PARAM1);
1469   - MMXReg *s = (MMXReg *)((char *)env + PARAM2);
1470 1207 if (float32_lt(d->MMX_S(0), s->MMX_S(0), &env->mmx_status))
1471 1208 d->MMX_S(0) = s->MMX_S(0);
1472 1209 if (float32_lt(d->MMX_S(1), s->MMX_S(1), &env->mmx_status))
1473 1210 d->MMX_S(1) = s->MMX_S(1);
1474 1211 }
1475 1212  
1476   -void OPPROTO op_pfmin(void)
  1213 +void helper_pfmin(MMXReg *d, MMXReg *s)
1477 1214 {
1478   - MMXReg *d = (MMXReg *)((char *)env + PARAM1);
1479   - MMXReg *s = (MMXReg *)((char *)env + PARAM2);
1480 1215 if (float32_lt(s->MMX_S(0), d->MMX_S(0), &env->mmx_status))
1481 1216 d->MMX_S(0) = s->MMX_S(0);
1482 1217 if (float32_lt(s->MMX_S(1), d->MMX_S(1), &env->mmx_status))
1483 1218 d->MMX_S(1) = s->MMX_S(1);
1484 1219 }
1485 1220  
1486   -void OPPROTO op_pfmul(void)
  1221 +void helper_pfmul(MMXReg *d, MMXReg *s)
1487 1222 {
1488   - MMXReg *d = (MMXReg *)((char *)env + PARAM1);
1489   - MMXReg *s = (MMXReg *)((char *)env + PARAM2);
1490 1223 d->MMX_S(0) = float32_mul(d->MMX_S(0), s->MMX_S(0), &env->mmx_status);
1491 1224 d->MMX_S(1) = float32_mul(d->MMX_S(1), s->MMX_S(1), &env->mmx_status);
1492 1225 }
1493 1226  
1494   -void OPPROTO op_pfnacc(void)
  1227 +void helper_pfnacc(MMXReg *d, MMXReg *s)
1495 1228 {
1496   - MMXReg *d = (MMXReg *)((char *)env + PARAM1);
1497   - MMXReg *s = (MMXReg *)((char *)env + PARAM2);
1498 1229 MMXReg r;
1499 1230 r.MMX_S(0) = float32_sub(d->MMX_S(0), d->MMX_S(1), &env->mmx_status);
1500 1231 r.MMX_S(1) = float32_sub(s->MMX_S(0), s->MMX_S(1), &env->mmx_status);
1501 1232 *d = r;
1502 1233 }
1503 1234  
1504   -void OPPROTO op_pfpnacc(void)
  1235 +void helper_pfpnacc(MMXReg *d, MMXReg *s)
1505 1236 {
1506   - MMXReg *d = (MMXReg *)((char *)env + PARAM1);
1507   - MMXReg *s = (MMXReg *)((char *)env + PARAM2);
1508 1237 MMXReg r;
1509 1238 r.MMX_S(0) = float32_sub(d->MMX_S(0), d->MMX_S(1), &env->mmx_status);
1510 1239 r.MMX_S(1) = float32_add(s->MMX_S(0), s->MMX_S(1), &env->mmx_status);
1511 1240 *d = r;
1512 1241 }
1513 1242  
1514   -void OPPROTO op_pfrcp(void)
  1243 +void helper_pfrcp(MMXReg *d, MMXReg *s)
1515 1244 {
1516   - MMXReg *d = (MMXReg *)((char *)env + PARAM1);
1517   - MMXReg *s = (MMXReg *)((char *)env + PARAM2);
1518 1245 d->MMX_S(0) = approx_rcp(s->MMX_S(0));
1519 1246 d->MMX_S(1) = d->MMX_S(0);
1520 1247 }
1521 1248  
1522   -void OPPROTO op_pfrsqrt(void)
  1249 +void helper_pfrsqrt(MMXReg *d, MMXReg *s)
1523 1250 {
1524   - MMXReg *d = (MMXReg *)((char *)env + PARAM1);
1525   - MMXReg *s = (MMXReg *)((char *)env + PARAM2);
1526 1251 d->MMX_L(1) = s->MMX_L(0) & 0x7fffffff;
1527 1252 d->MMX_S(1) = approx_rsqrt(d->MMX_S(1));
1528 1253 d->MMX_L(1) |= s->MMX_L(0) & 0x80000000;
1529 1254 d->MMX_L(0) = d->MMX_L(1);
1530 1255 }
1531 1256  
1532   -void OPPROTO op_pfsub(void)
  1257 +void helper_pfsub(MMXReg *d, MMXReg *s)
1533 1258 {
1534   - MMXReg *d = (MMXReg *)((char *)env + PARAM1);
1535   - MMXReg *s = (MMXReg *)((char *)env + PARAM2);
1536 1259 d->MMX_S(0) = float32_sub(d->MMX_S(0), s->MMX_S(0), &env->mmx_status);
1537 1260 d->MMX_S(1) = float32_sub(d->MMX_S(1), s->MMX_S(1), &env->mmx_status);
1538 1261 }
1539 1262  
1540   -void OPPROTO op_pfsubr(void)
  1263 +void helper_pfsubr(MMXReg *d, MMXReg *s)
1541 1264 {
1542   - MMXReg *d = (MMXReg *)((char *)env + PARAM1);
1543   - MMXReg *s = (MMXReg *)((char *)env + PARAM2);
1544 1265 d->MMX_S(0) = float32_sub(s->MMX_S(0), d->MMX_S(0), &env->mmx_status);
1545 1266 d->MMX_S(1) = float32_sub(s->MMX_S(1), d->MMX_S(1), &env->mmx_status);
1546 1267 }
1547 1268  
1548   -void OPPROTO op_pswapd(void)
  1269 +void helper_pswapd(MMXReg *d, MMXReg *s)
1549 1270 {
1550   - MMXReg *d = (MMXReg *)((char *)env + PARAM1);
1551   - MMXReg *s = (MMXReg *)((char *)env + PARAM2);
1552 1271 MMXReg r;
1553 1272 r.MMX_L(0) = s->MMX_L(1);
1554 1273 r.MMX_L(1) = s->MMX_L(0);
... ...
target-i386/ops_sse_header.h 0 โ†’ 100644
  1 +/*
  2 + * MMX/3DNow!/SSE/SSE2/SSE3/PNI support
  3 + *
  4 + * Copyright (c) 2005 Fabrice Bellard
  5 + *
  6 + * This library is free software; you can redistribute it and/or
  7 + * modify it under the terms of the GNU Lesser General Public
  8 + * License as published by the Free Software Foundation; either
  9 + * version 2 of the License, or (at your option) any later version.
  10 + *
  11 + * This library is distributed in the hope that it will be useful,
  12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14 + * Lesser General Public License for more details.
  15 + *
  16 + * You should have received a copy of the GNU Lesser General Public
  17 + * License along with this library; if not, write to the Free Software
  18 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  19 + */
  20 +#if SHIFT == 0
  21 +#define Reg MMXReg
  22 +#define SUFFIX _mmx
  23 +#else
  24 +#define Reg XMMReg
  25 +#define SUFFIX _xmm
  26 +#endif
  27 +
  28 +void glue(helper_psrlw, SUFFIX)(Reg *d, Reg *s);
  29 +void glue(helper_psraw, SUFFIX)(Reg *d, Reg *s);
  30 +void glue(helper_psllw, SUFFIX)(Reg *d, Reg *s);
  31 +void glue(helper_psrld, SUFFIX)(Reg *d, Reg *s);
  32 +void glue(helper_psrad, SUFFIX)(Reg *d, Reg *s);
  33 +void glue(helper_pslld, SUFFIX)(Reg *d, Reg *s);
  34 +void glue(helper_psrlq, SUFFIX)(Reg *d, Reg *s);
  35 +void glue(helper_psllq, SUFFIX)(Reg *d, Reg *s);
  36 +
  37 +#if SHIFT == 1
  38 +void glue(helper_psrldq, SUFFIX)(Reg *d, Reg *s);
  39 +void glue(helper_pslldq, SUFFIX)(Reg *d, Reg *s);
  40 +#endif
  41 +
  42 +#define SSE_HELPER_B(name, F)\
  43 + void glue(name, SUFFIX) (Reg *d, Reg *s);
  44 +
  45 +#define SSE_HELPER_W(name, F)\
  46 + void glue(name, SUFFIX) (Reg *d, Reg *s);
  47 +
  48 +#define SSE_HELPER_L(name, F)\
  49 + void glue(name, SUFFIX) (Reg *d, Reg *s);
  50 +
  51 +#define SSE_HELPER_Q(name, F)\
  52 + void glue(name, SUFFIX) (Reg *d, Reg *s);
  53 +
  54 +SSE_HELPER_B(helper_paddb, FADD);
  55 +SSE_HELPER_W(helper_paddw, FADD);
  56 +SSE_HELPER_L(helper_paddl, FADD);
  57 +SSE_HELPER_Q(helper_paddq, FADD);
  58 +
  59 +SSE_HELPER_B(helper_psubb, FSUB);
  60 +SSE_HELPER_W(helper_psubw, FSUB);
  61 +SSE_HELPER_L(helper_psubl, FSUB);
  62 +SSE_HELPER_Q(helper_psubq, FSUB);
  63 +
  64 +SSE_HELPER_B(helper_paddusb, FADDUB);
  65 +SSE_HELPER_B(helper_paddsb, FADDSB);
  66 +SSE_HELPER_B(helper_psubusb, FSUBUB);
  67 +SSE_HELPER_B(helper_psubsb, FSUBSB);
  68 +
  69 +SSE_HELPER_W(helper_paddusw, FADDUW);
  70 +SSE_HELPER_W(helper_paddsw, FADDSW);
  71 +SSE_HELPER_W(helper_psubusw, FSUBUW);
  72 +SSE_HELPER_W(helper_psubsw, FSUBSW);
  73 +
  74 +SSE_HELPER_B(helper_pminub, FMINUB);
  75 +SSE_HELPER_B(helper_pmaxub, FMAXUB);
  76 +
  77 +SSE_HELPER_W(helper_pminsw, FMINSW);
  78 +SSE_HELPER_W(helper_pmaxsw, FMAXSW);
  79 +
  80 +SSE_HELPER_Q(helper_pand, FAND);
  81 +SSE_HELPER_Q(helper_pandn, FANDN);
  82 +SSE_HELPER_Q(helper_por, FOR);
  83 +SSE_HELPER_Q(helper_pxor, FXOR);
  84 +
  85 +SSE_HELPER_B(helper_pcmpgtb, FCMPGTB);
  86 +SSE_HELPER_W(helper_pcmpgtw, FCMPGTW);
  87 +SSE_HELPER_L(helper_pcmpgtl, FCMPGTL);
  88 +
  89 +SSE_HELPER_B(helper_pcmpeqb, FCMPEQ);
  90 +SSE_HELPER_W(helper_pcmpeqw, FCMPEQ);
  91 +SSE_HELPER_L(helper_pcmpeql, FCMPEQ);
  92 +
  93 +SSE_HELPER_W(helper_pmullw, FMULLW);
  94 +#if SHIFT == 0
  95 +SSE_HELPER_W(helper_pmulhrw, FMULHRW);
  96 +#endif
  97 +SSE_HELPER_W(helper_pmulhuw, FMULHUW);
  98 +SSE_HELPER_W(helper_pmulhw, FMULHW);
  99 +
  100 +SSE_HELPER_B(helper_pavgb, FAVG);
  101 +SSE_HELPER_W(helper_pavgw, FAVG);
  102 +
  103 +void glue(helper_pmuludq, SUFFIX) (Reg *d, Reg *s);
  104 +void glue(helper_pmaddwd, SUFFIX) (Reg *d, Reg *s);
  105 +
  106 +void glue(helper_psadbw, SUFFIX) (Reg *d, Reg *s);
  107 +void glue(helper_maskmov, SUFFIX) (Reg *d, Reg *s);
  108 +void glue(helper_movl_mm_T0, SUFFIX) (Reg *d, uint32_t val);
  109 +#ifdef TARGET_X86_64
  110 +void glue(helper_movq_mm_T0, SUFFIX) (Reg *d, uint64_t val);
  111 +#endif
  112 +
  113 +#if SHIFT == 0
  114 +void glue(helper_pshufw, SUFFIX) (Reg *d, Reg *s, int order);
  115 +#else
  116 +void helper_shufps(Reg *d, Reg *s, int order);
  117 +void helper_shufpd(Reg *d, Reg *s, int order);
  118 +void glue(helper_pshufd, SUFFIX) (Reg *d, Reg *s, int order);
  119 +void glue(helper_pshuflw, SUFFIX) (Reg *d, Reg *s, int order);
  120 +void glue(helper_pshufhw, SUFFIX) (Reg *d, Reg *s, int order);
  121 +#endif
  122 +
  123 +#if SHIFT == 1
  124 +/* FPU ops */
  125 +/* XXX: not accurate */
  126 +
  127 +#define SSE_HELPER_S(name, F)\
  128 + void helper_ ## name ## ps (Reg *d, Reg *s); \
  129 + void helper_ ## name ## ss (Reg *d, Reg *s); \
  130 + void helper_ ## name ## pd (Reg *d, Reg *s); \
  131 + void helper_ ## name ## sd (Reg *d, Reg *s);
  132 +
  133 +SSE_HELPER_S(add, FPU_ADD);
  134 +SSE_HELPER_S(sub, FPU_SUB);
  135 +SSE_HELPER_S(mul, FPU_MUL);
  136 +SSE_HELPER_S(div, FPU_DIV);
  137 +SSE_HELPER_S(min, FPU_MIN);
  138 +SSE_HELPER_S(max, FPU_MAX);
  139 +SSE_HELPER_S(sqrt, FPU_SQRT);
  140 +
  141 +
  142 +void helper_cvtps2pd(Reg *d, Reg *s);
  143 +void helper_cvtpd2ps(Reg *d, Reg *s);
  144 +void helper_cvtss2sd(Reg *d, Reg *s);
  145 +void helper_cvtsd2ss(Reg *d, Reg *s);
  146 +void helper_cvtdq2ps(Reg *d, Reg *s);
  147 +void helper_cvtdq2pd(Reg *d, Reg *s);
  148 +void helper_cvtpi2ps(XMMReg *d, MMXReg *s);
  149 +void helper_cvtpi2pd(XMMReg *d, MMXReg *s);
  150 +void helper_cvtsi2ss(XMMReg *d, uint32_t val);
  151 +void helper_cvtsi2sd(XMMReg *d, uint32_t val);
  152 +
  153 +#ifdef TARGET_X86_64
  154 +void helper_cvtsq2ss(XMMReg *d, uint64_t val);
  155 +void helper_cvtsq2sd(XMMReg *d, uint64_t val);
  156 +#endif
  157 +
  158 +void helper_cvtps2dq(XMMReg *d, XMMReg *s);
  159 +void helper_cvtpd2dq(XMMReg *d, XMMReg *s);
  160 +void helper_cvtps2pi(MMXReg *d, XMMReg *s);
  161 +void helper_cvtpd2pi(MMXReg *d, XMMReg *s);
  162 +int32_t helper_cvtss2si(XMMReg *s);
  163 +int32_t helper_cvtsd2si(XMMReg *s);
  164 +#ifdef TARGET_X86_64
  165 +int64_t helper_cvtss2sq(XMMReg *s);
  166 +int64_t helper_cvtsd2sq(XMMReg *s);
  167 +#endif
  168 +
  169 +void helper_cvttps2dq(XMMReg *d, XMMReg *s);
  170 +void helper_cvttpd2dq(XMMReg *d, XMMReg *s);
  171 +void helper_cvttps2pi(MMXReg *d, XMMReg *s);
  172 +void helper_cvttpd2pi(MMXReg *d, XMMReg *s);
  173 +int32_t helper_cvttss2si(XMMReg *s);
  174 +int32_t helper_cvttsd2si(XMMReg *s);
  175 +#ifdef TARGET_X86_64
  176 +int64_t helper_cvttss2sq(XMMReg *s);
  177 +int64_t helper_cvttsd2sq(XMMReg *s);
  178 +#endif
  179 +
  180 +void helper_rsqrtps(XMMReg *d, XMMReg *s);
  181 +void helper_rsqrtss(XMMReg *d, XMMReg *s);
  182 +void helper_rcpps(XMMReg *d, XMMReg *s);
  183 +void helper_rcpss(XMMReg *d, XMMReg *s);
  184 +void helper_haddps(XMMReg *d, XMMReg *s);
  185 +void helper_haddpd(XMMReg *d, XMMReg *s);
  186 +void helper_hsubps(XMMReg *d, XMMReg *s);
  187 +void helper_hsubpd(XMMReg *d, XMMReg *s);
  188 +void helper_addsubps(XMMReg *d, XMMReg *s);
  189 +void helper_addsubpd(XMMReg *d, XMMReg *s);
  190 +
  191 +#define SSE_HELPER_CMP(name, F)\
  192 + void helper_ ## name ## ps (Reg *d, Reg *s); \
  193 + void helper_ ## name ## ss (Reg *d, Reg *s); \
  194 + void helper_ ## name ## pd (Reg *d, Reg *s); \
  195 + void helper_ ## name ## sd (Reg *d, Reg *s);
  196 +
  197 +SSE_HELPER_CMP(cmpeq, FPU_CMPEQ);
  198 +SSE_HELPER_CMP(cmplt, FPU_CMPLT);
  199 +SSE_HELPER_CMP(cmple, FPU_CMPLE);
  200 +SSE_HELPER_CMP(cmpunord, FPU_CMPUNORD);
  201 +SSE_HELPER_CMP(cmpneq, FPU_CMPNEQ);
  202 +SSE_HELPER_CMP(cmpnlt, FPU_CMPNLT);
  203 +SSE_HELPER_CMP(cmpnle, FPU_CMPNLE);
  204 +SSE_HELPER_CMP(cmpord, FPU_CMPORD);
  205 +
  206 +void helper_ucomiss(Reg *d, Reg *s);
  207 +void helper_comiss(Reg *d, Reg *s);
  208 +void helper_ucomisd(Reg *d, Reg *s);
  209 +void helper_comisd(Reg *d, Reg *s);
  210 +uint32_t helper_movmskps(Reg *s);
  211 +uint32_t helper_movmskpd(Reg *s);
  212 +#endif
  213 +
  214 +uint32_t glue(helper_pmovmskb, SUFFIX)(Reg *s);
  215 +void glue(helper_packsswb, SUFFIX) (Reg *d, Reg *s);
  216 +void glue(helper_packuswb, SUFFIX) (Reg *d, Reg *s);
  217 +void glue(helper_packssdw, SUFFIX) (Reg *d, Reg *s);
  218 +#define UNPCK_OP(base_name, base) \
  219 + void glue(helper_punpck ## base_name ## bw, SUFFIX) (Reg *d, Reg *s); \
  220 + void glue(helper_punpck ## base_name ## wd, SUFFIX) (Reg *d, Reg *s); \
  221 + void glue(helper_punpck ## base_name ## dq, SUFFIX) (Reg *d, Reg *s);
  222 +
  223 +UNPCK_OP(l, 0);
  224 +UNPCK_OP(h, 1);
  225 +
  226 +#if SHIFT == 1
  227 +void glue(helper_punpcklqdq, SUFFIX) (Reg *d, Reg *s);
  228 +void glue(helper_punpckhqdq, SUFFIX) (Reg *d, Reg *s);
  229 +#endif
  230 +
  231 +/* 3DNow! float ops */
  232 +#if SHIFT == 0
  233 +void helper_pi2fd(MMXReg *d, MMXReg *s);
  234 +void helper_pi2fw(MMXReg *d, MMXReg *s);
  235 +void helper_pf2id(MMXReg *d, MMXReg *s);
  236 +void helper_pf2iw(MMXReg *d, MMXReg *s);
  237 +void helper_pfacc(MMXReg *d, MMXReg *s);
  238 +void helper_pfadd(MMXReg *d, MMXReg *s);
  239 +void helper_pfcmpeq(MMXReg *d, MMXReg *s);
  240 +void helper_pfcmpge(MMXReg *d, MMXReg *s);
  241 +void helper_pfcmpgt(MMXReg *d, MMXReg *s);
  242 +void helper_pfmax(MMXReg *d, MMXReg *s);
  243 +void helper_pfmin(MMXReg *d, MMXReg *s);
  244 +void helper_pfmul(MMXReg *d, MMXReg *s);
  245 +void helper_pfnacc(MMXReg *d, MMXReg *s);
  246 +void helper_pfpnacc(MMXReg *d, MMXReg *s);
  247 +void helper_pfrcp(MMXReg *d, MMXReg *s);
  248 +void helper_pfrsqrt(MMXReg *d, MMXReg *s);
  249 +void helper_pfsub(MMXReg *d, MMXReg *s);
  250 +void helper_pfsubr(MMXReg *d, MMXReg *s);
  251 +void helper_pswapd(MMXReg *d, MMXReg *s);
  252 +#endif
  253 +
  254 +#undef SHIFT
  255 +#undef Reg
  256 +#undef SUFFIX
  257 +
  258 +#undef SSE_HELPER_B
  259 +#undef SSE_HELPER_W
  260 +#undef SSE_HELPER_L
  261 +#undef SSE_HELPER_Q
  262 +#undef SSE_HELPER_S
  263 +#undef SSE_HELPER_CMP
  264 +#undef UNPCK_OP
... ...
target-i386/translate.c
... ... @@ -60,7 +60,7 @@
60 60 /* global register indexes */
61 61 static TCGv cpu_env, cpu_T[2], cpu_A0;
62 62 /* local register indexes (only used inside old micro ops) */
63   -static TCGv cpu_tmp0, cpu_tmp1;
  63 +static TCGv cpu_tmp0, cpu_tmp1, cpu_tmp2, cpu_ptr0, cpu_ptr1;
64 64  
65 65 #ifdef TARGET_X86_64
66 66 static int x86_64_hregs;
... ... @@ -2410,14 +2410,40 @@ static inline void gen_sto_env_A0(int idx, int offset)
2410 2410 tcg_gen_qemu_st64(cpu_tmp1, cpu_tmp0, mem_index);
2411 2411 }
2412 2412  
2413   -#define SSE_SPECIAL ((GenOpFunc2 *)1)
2414   -#define SSE_DUMMY ((GenOpFunc2 *)2)
  2413 +static inline void gen_op_movo(int d_offset, int s_offset)
  2414 +{
  2415 + tcg_gen_ld_i64(cpu_tmp1, cpu_env, s_offset);
  2416 + tcg_gen_st_i64(cpu_tmp1, cpu_env, d_offset);
  2417 + tcg_gen_ld_i64(cpu_tmp1, cpu_env, s_offset + 8);
  2418 + tcg_gen_st_i64(cpu_tmp1, cpu_env, d_offset + 8);
  2419 +}
  2420 +
  2421 +static inline void gen_op_movq(int d_offset, int s_offset)
  2422 +{
  2423 + tcg_gen_ld_i64(cpu_tmp1, cpu_env, s_offset);
  2424 + tcg_gen_st_i64(cpu_tmp1, cpu_env, d_offset);
  2425 +}
  2426 +
  2427 +static inline void gen_op_movl(int d_offset, int s_offset)
  2428 +{
  2429 + tcg_gen_ld_i32(cpu_tmp2, cpu_env, s_offset);
  2430 + tcg_gen_st_i32(cpu_tmp2, cpu_env, d_offset);
  2431 +}
  2432 +
  2433 +static inline void gen_op_movq_env_0(int d_offset)
  2434 +{
  2435 + tcg_gen_movi_i64(cpu_tmp1, 0);
  2436 + tcg_gen_st_i64(cpu_tmp1, cpu_env, d_offset);
  2437 +}
2415 2438  
2416   -#define MMX_OP2(x) { gen_op_ ## x ## _mmx, gen_op_ ## x ## _xmm }
2417   -#define SSE_FOP(x) { gen_op_ ## x ## ps, gen_op_ ## x ## pd, \
2418   - gen_op_ ## x ## ss, gen_op_ ## x ## sd, }
  2439 +#define SSE_SPECIAL ((void *)1)
  2440 +#define SSE_DUMMY ((void *)2)
2419 2441  
2420   -static GenOpFunc2 *sse_op_table1[256][4] = {
  2442 +#define MMX_OP2(x) { helper_ ## x ## _mmx, helper_ ## x ## _xmm }
  2443 +#define SSE_FOP(x) { helper_ ## x ## ps, helper_ ## x ## pd, \
  2444 + helper_ ## x ## ss, helper_ ## x ## sd, }
  2445 +
  2446 +static void *sse_op_table1[256][4] = {
2421 2447 /* 3DNow! extensions */
2422 2448 [0x0e] = { SSE_DUMMY }, /* femms */
2423 2449 [0x0f] = { SSE_DUMMY }, /* pf... */
... ... @@ -2426,8 +2452,8 @@ static GenOpFunc2 *sse_op_table1[256][4] = {
2426 2452 [0x11] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2427 2453 [0x12] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd, movsldup, movddup */
2428 2454 [0x13] = { SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd */
2429   - [0x14] = { gen_op_punpckldq_xmm, gen_op_punpcklqdq_xmm },
2430   - [0x15] = { gen_op_punpckhdq_xmm, gen_op_punpckhqdq_xmm },
  2455 + [0x14] = { helper_punpckldq_xmm, helper_punpcklqdq_xmm },
  2456 + [0x15] = { helper_punpckhdq_xmm, helper_punpckhqdq_xmm },
2431 2457 [0x16] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movhps, movhpd, movshdup */
2432 2458 [0x17] = { SSE_SPECIAL, SSE_SPECIAL }, /* movhps, movhpd */
2433 2459  
... ... @@ -2437,28 +2463,28 @@ static GenOpFunc2 *sse_op_table1[256][4] = {
2437 2463 [0x2b] = { SSE_SPECIAL, SSE_SPECIAL }, /* movntps, movntpd */
2438 2464 [0x2c] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvttps2pi, cvttpd2pi, cvttsd2si, cvttss2si */
2439 2465 [0x2d] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtps2pi, cvtpd2pi, cvtsd2si, cvtss2si */
2440   - [0x2e] = { gen_op_ucomiss, gen_op_ucomisd },
2441   - [0x2f] = { gen_op_comiss, gen_op_comisd },
  2466 + [0x2e] = { helper_ucomiss, helper_ucomisd },
  2467 + [0x2f] = { helper_comiss, helper_comisd },
2442 2468 [0x50] = { SSE_SPECIAL, SSE_SPECIAL }, /* movmskps, movmskpd */
2443 2469 [0x51] = SSE_FOP(sqrt),
2444   - [0x52] = { gen_op_rsqrtps, NULL, gen_op_rsqrtss, NULL },
2445   - [0x53] = { gen_op_rcpps, NULL, gen_op_rcpss, NULL },
2446   - [0x54] = { gen_op_pand_xmm, gen_op_pand_xmm }, /* andps, andpd */
2447   - [0x55] = { gen_op_pandn_xmm, gen_op_pandn_xmm }, /* andnps, andnpd */
2448   - [0x56] = { gen_op_por_xmm, gen_op_por_xmm }, /* orps, orpd */
2449   - [0x57] = { gen_op_pxor_xmm, gen_op_pxor_xmm }, /* xorps, xorpd */
  2470 + [0x52] = { helper_rsqrtps, NULL, helper_rsqrtss, NULL },
  2471 + [0x53] = { helper_rcpps, NULL, helper_rcpss, NULL },
  2472 + [0x54] = { helper_pand_xmm, helper_pand_xmm }, /* andps, andpd */
  2473 + [0x55] = { helper_pandn_xmm, helper_pandn_xmm }, /* andnps, andnpd */
  2474 + [0x56] = { helper_por_xmm, helper_por_xmm }, /* orps, orpd */
  2475 + [0x57] = { helper_pxor_xmm, helper_pxor_xmm }, /* xorps, xorpd */
2450 2476 [0x58] = SSE_FOP(add),
2451 2477 [0x59] = SSE_FOP(mul),
2452   - [0x5a] = { gen_op_cvtps2pd, gen_op_cvtpd2ps,
2453   - gen_op_cvtss2sd, gen_op_cvtsd2ss },
2454   - [0x5b] = { gen_op_cvtdq2ps, gen_op_cvtps2dq, gen_op_cvttps2dq },
  2478 + [0x5a] = { helper_cvtps2pd, helper_cvtpd2ps,
  2479 + helper_cvtss2sd, helper_cvtsd2ss },
  2480 + [0x5b] = { helper_cvtdq2ps, helper_cvtps2dq, helper_cvttps2dq },
2455 2481 [0x5c] = SSE_FOP(sub),
2456 2482 [0x5d] = SSE_FOP(min),
2457 2483 [0x5e] = SSE_FOP(div),
2458 2484 [0x5f] = SSE_FOP(max),
2459 2485  
2460 2486 [0xc2] = SSE_FOP(cmpeq),
2461   - [0xc6] = { (GenOpFunc2 *)gen_op_shufps, (GenOpFunc2 *)gen_op_shufpd },
  2487 + [0xc6] = { helper_shufps, helper_shufpd },
2462 2488  
2463 2489 /* MMX ops and their SSE extensions */
2464 2490 [0x60] = MMX_OP2(punpcklbw),
... ... @@ -2473,14 +2499,14 @@ static GenOpFunc2 *sse_op_table1[256][4] = {
2473 2499 [0x69] = MMX_OP2(punpckhwd),
2474 2500 [0x6a] = MMX_OP2(punpckhdq),
2475 2501 [0x6b] = MMX_OP2(packssdw),
2476   - [0x6c] = { NULL, gen_op_punpcklqdq_xmm },
2477   - [0x6d] = { NULL, gen_op_punpckhqdq_xmm },
  2502 + [0x6c] = { NULL, helper_punpcklqdq_xmm },
  2503 + [0x6d] = { NULL, helper_punpckhqdq_xmm },
2478 2504 [0x6e] = { SSE_SPECIAL, SSE_SPECIAL }, /* movd mm, ea */
2479 2505 [0x6f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, , movqdu */
2480   - [0x70] = { (GenOpFunc2 *)gen_op_pshufw_mmx,
2481   - (GenOpFunc2 *)gen_op_pshufd_xmm,
2482   - (GenOpFunc2 *)gen_op_pshufhw_xmm,
2483   - (GenOpFunc2 *)gen_op_pshuflw_xmm },
  2506 + [0x70] = { helper_pshufw_mmx,
  2507 + helper_pshufd_xmm,
  2508 + helper_pshufhw_xmm,
  2509 + helper_pshuflw_xmm },
2484 2510 [0x71] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftw */
2485 2511 [0x72] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftd */
2486 2512 [0x73] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftq */
... ... @@ -2488,13 +2514,13 @@ static GenOpFunc2 *sse_op_table1[256][4] = {
2488 2514 [0x75] = MMX_OP2(pcmpeqw),
2489 2515 [0x76] = MMX_OP2(pcmpeql),
2490 2516 [0x77] = { SSE_DUMMY }, /* emms */
2491   - [0x7c] = { NULL, gen_op_haddpd, NULL, gen_op_haddps },
2492   - [0x7d] = { NULL, gen_op_hsubpd, NULL, gen_op_hsubps },
  2517 + [0x7c] = { NULL, helper_haddpd, NULL, helper_haddps },
  2518 + [0x7d] = { NULL, helper_hsubpd, NULL, helper_hsubps },
2493 2519 [0x7e] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movd, movd, , movq */
2494 2520 [0x7f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, movdqu */
2495 2521 [0xc4] = { SSE_SPECIAL, SSE_SPECIAL }, /* pinsrw */
2496 2522 [0xc5] = { SSE_SPECIAL, SSE_SPECIAL }, /* pextrw */
2497   - [0xd0] = { NULL, gen_op_addsubpd, NULL, gen_op_addsubps },
  2523 + [0xd0] = { NULL, helper_addsubpd, NULL, helper_addsubps },
2498 2524 [0xd1] = MMX_OP2(psrlw),
2499 2525 [0xd2] = MMX_OP2(psrld),
2500 2526 [0xd3] = MMX_OP2(psrlq),
... ... @@ -2516,7 +2542,7 @@ static GenOpFunc2 *sse_op_table1[256][4] = {
2516 2542 [0xe3] = MMX_OP2(pavgw),
2517 2543 [0xe4] = MMX_OP2(pmulhuw),
2518 2544 [0xe5] = MMX_OP2(pmulhw),
2519   - [0xe6] = { NULL, gen_op_cvttpd2dq, gen_op_cvtdq2pd, gen_op_cvtpd2dq },
  2545 + [0xe6] = { NULL, helper_cvttpd2dq, helper_cvtdq2pd, helper_cvtpd2dq },
2520 2546 [0xe7] = { SSE_SPECIAL , SSE_SPECIAL }, /* movntq, movntq */
2521 2547 [0xe8] = MMX_OP2(psubsb),
2522 2548 [0xe9] = MMX_OP2(psubsw),
... ... @@ -2543,7 +2569,7 @@ static GenOpFunc2 *sse_op_table1[256][4] = {
2543 2569 [0xfe] = MMX_OP2(paddl),
2544 2570 };
2545 2571  
2546   -static GenOpFunc2 *sse_op_table2[3 * 8][2] = {
  2572 +static void *sse_op_table2[3 * 8][2] = {
2547 2573 [0 + 2] = MMX_OP2(psrlw),
2548 2574 [0 + 4] = MMX_OP2(psraw),
2549 2575 [0 + 6] = MMX_OP2(psllw),
... ... @@ -2551,29 +2577,29 @@ static GenOpFunc2 *sse_op_table2[3 * 8][2] = {
2551 2577 [8 + 4] = MMX_OP2(psrad),
2552 2578 [8 + 6] = MMX_OP2(pslld),
2553 2579 [16 + 2] = MMX_OP2(psrlq),
2554   - [16 + 3] = { NULL, gen_op_psrldq_xmm },
  2580 + [16 + 3] = { NULL, helper_psrldq_xmm },
2555 2581 [16 + 6] = MMX_OP2(psllq),
2556   - [16 + 7] = { NULL, gen_op_pslldq_xmm },
  2582 + [16 + 7] = { NULL, helper_pslldq_xmm },
2557 2583 };
2558 2584  
2559   -static GenOpFunc1 *sse_op_table3[4 * 3] = {
2560   - gen_op_cvtsi2ss,
2561   - gen_op_cvtsi2sd,
2562   - X86_64_ONLY(gen_op_cvtsq2ss),
2563   - X86_64_ONLY(gen_op_cvtsq2sd),
2564   -
2565   - gen_op_cvttss2si,
2566   - gen_op_cvttsd2si,
2567   - X86_64_ONLY(gen_op_cvttss2sq),
2568   - X86_64_ONLY(gen_op_cvttsd2sq),
2569   -
2570   - gen_op_cvtss2si,
2571   - gen_op_cvtsd2si,
2572   - X86_64_ONLY(gen_op_cvtss2sq),
2573   - X86_64_ONLY(gen_op_cvtsd2sq),
  2585 +static void *sse_op_table3[4 * 3] = {
  2586 + helper_cvtsi2ss,
  2587 + helper_cvtsi2sd,
  2588 + X86_64_ONLY(helper_cvtsq2ss),
  2589 + X86_64_ONLY(helper_cvtsq2sd),
  2590 +
  2591 + helper_cvttss2si,
  2592 + helper_cvttsd2si,
  2593 + X86_64_ONLY(helper_cvttss2sq),
  2594 + X86_64_ONLY(helper_cvttsd2sq),
  2595 +
  2596 + helper_cvtss2si,
  2597 + helper_cvtsd2si,
  2598 + X86_64_ONLY(helper_cvtss2sq),
  2599 + X86_64_ONLY(helper_cvtsd2sq),
2574 2600 };
2575 2601  
2576   -static GenOpFunc2 *sse_op_table4[8][4] = {
  2602 +static void *sse_op_table4[8][4] = {
2577 2603 SSE_FOP(cmpeq),
2578 2604 SSE_FOP(cmplt),
2579 2605 SSE_FOP(cmple),
... ... @@ -2584,39 +2610,38 @@ static GenOpFunc2 *sse_op_table4[8][4] = {
2584 2610 SSE_FOP(cmpord),
2585 2611 };
2586 2612  
2587   -static GenOpFunc2 *sse_op_table5[256] = {
2588   - [0x0c] = gen_op_pi2fw,
2589   - [0x0d] = gen_op_pi2fd,
2590   - [0x1c] = gen_op_pf2iw,
2591   - [0x1d] = gen_op_pf2id,
2592   - [0x8a] = gen_op_pfnacc,
2593   - [0x8e] = gen_op_pfpnacc,
2594   - [0x90] = gen_op_pfcmpge,
2595   - [0x94] = gen_op_pfmin,
2596   - [0x96] = gen_op_pfrcp,
2597   - [0x97] = gen_op_pfrsqrt,
2598   - [0x9a] = gen_op_pfsub,
2599   - [0x9e] = gen_op_pfadd,
2600   - [0xa0] = gen_op_pfcmpgt,
2601   - [0xa4] = gen_op_pfmax,
2602   - [0xa6] = gen_op_movq, /* pfrcpit1; no need to actually increase precision */
2603   - [0xa7] = gen_op_movq, /* pfrsqit1 */
2604   - [0xaa] = gen_op_pfsubr,
2605   - [0xae] = gen_op_pfacc,
2606   - [0xb0] = gen_op_pfcmpeq,
2607   - [0xb4] = gen_op_pfmul,
2608   - [0xb6] = gen_op_movq, /* pfrcpit2 */
2609   - [0xb7] = gen_op_pmulhrw_mmx,
2610   - [0xbb] = gen_op_pswapd,
2611   - [0xbf] = gen_op_pavgb_mmx /* pavgusb */
  2613 +static void *sse_op_table5[256] = {
  2614 + [0x0c] = helper_pi2fw,
  2615 + [0x0d] = helper_pi2fd,
  2616 + [0x1c] = helper_pf2iw,
  2617 + [0x1d] = helper_pf2id,
  2618 + [0x8a] = helper_pfnacc,
  2619 + [0x8e] = helper_pfpnacc,
  2620 + [0x90] = helper_pfcmpge,
  2621 + [0x94] = helper_pfmin,
  2622 + [0x96] = helper_pfrcp,
  2623 + [0x97] = helper_pfrsqrt,
  2624 + [0x9a] = helper_pfsub,
  2625 + [0x9e] = helper_pfadd,
  2626 + [0xa0] = helper_pfcmpgt,
  2627 + [0xa4] = helper_pfmax,
  2628 + [0xa6] = helper_movq, /* pfrcpit1; no need to actually increase precision */
  2629 + [0xa7] = helper_movq, /* pfrsqit1 */
  2630 + [0xaa] = helper_pfsubr,
  2631 + [0xae] = helper_pfacc,
  2632 + [0xb0] = helper_pfcmpeq,
  2633 + [0xb4] = helper_pfmul,
  2634 + [0xb6] = helper_movq, /* pfrcpit2 */
  2635 + [0xb7] = helper_pmulhrw_mmx,
  2636 + [0xbb] = helper_pswapd,
  2637 + [0xbf] = helper_pavgb_mmx /* pavgusb */
2612 2638 };
2613 2639  
2614 2640 static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
2615 2641 {
2616 2642 int b1, op1_offset, op2_offset, is_xmm, val, ot;
2617 2643 int modrm, mod, rm, reg, reg_addr, offset_addr;
2618   - GenOpFunc2 *sse_op2;
2619   - GenOpFunc3 *sse_op3;
  2644 + void *sse_op2;
2620 2645  
2621 2646 b &= 0xff;
2622 2647 if (s->prefix & PREFIX_DATA)
... ... @@ -2656,18 +2681,18 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
2656 2681 if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW))
2657 2682 goto illegal_op;
2658 2683 /* femms */
2659   - gen_op_emms();
  2684 + tcg_gen_helper_0_0(helper_emms);
2660 2685 return;
2661 2686 }
2662 2687 if (b == 0x77) {
2663 2688 /* emms */
2664   - gen_op_emms();
  2689 + tcg_gen_helper_0_0(helper_emms);
2665 2690 return;
2666 2691 }
2667 2692 /* prepare MMX state (XXX: optimize by storing fptt and fptags in
2668 2693 the static cpu state) */
2669 2694 if (!is_xmm) {
2670   - gen_op_enter_mmx();
  2695 + tcg_gen_helper_0_0(helper_enter_mmx);
2671 2696 }
2672 2697  
2673 2698 modrm = ldub_code(s->pc++);
... ... @@ -2697,24 +2722,31 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
2697 2722 #ifdef TARGET_X86_64
2698 2723 if (s->dflag == 2) {
2699 2724 gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 0);
2700   - gen_op_movq_mm_T0_mmx(offsetof(CPUX86State,fpregs[reg].mmx));
  2725 + tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,fpregs[reg].mmx));
2701 2726 } else
2702 2727 #endif
2703 2728 {
2704 2729 gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 0);
2705   - gen_op_movl_mm_T0_mmx(offsetof(CPUX86State,fpregs[reg].mmx));
  2730 + tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
  2731 + offsetof(CPUX86State,fpregs[reg].mmx));
  2732 + tcg_gen_helper_0_2(helper_movl_mm_T0_mmx, cpu_ptr0, cpu_T[0]);
2706 2733 }
2707 2734 break;
2708 2735 case 0x16e: /* movd xmm, ea */
2709 2736 #ifdef TARGET_X86_64
2710 2737 if (s->dflag == 2) {
2711 2738 gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 0);
2712   - gen_op_movq_mm_T0_xmm(offsetof(CPUX86State,xmm_regs[reg]));
  2739 + tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
  2740 + offsetof(CPUX86State,xmm_regs[reg]));
  2741 + tcg_gen_helper_0_2(helper_movq_mm_T0_xmm, cpu_ptr0, cpu_T[0]);
2713 2742 } else
2714 2743 #endif
2715 2744 {
2716 2745 gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 0);
2717   - gen_op_movl_mm_T0_xmm(offsetof(CPUX86State,xmm_regs[reg]));
  2746 + tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
  2747 + offsetof(CPUX86State,xmm_regs[reg]));
  2748 + tcg_gen_trunc_tl_i32(cpu_tmp2, cpu_T[0]);
  2749 + tcg_gen_helper_0_2(helper_movl_mm_T0_xmm, cpu_ptr0, cpu_tmp2);
2718 2750 }
2719 2751 break;
2720 2752 case 0x6f: /* movq mm, ea */
... ... @@ -2723,8 +2755,10 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
2723 2755 gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,fpregs[reg].mmx));
2724 2756 } else {
2725 2757 rm = (modrm & 7);
2726   - gen_op_movq(offsetof(CPUX86State,fpregs[reg].mmx),
2727   - offsetof(CPUX86State,fpregs[rm].mmx));
  2758 + tcg_gen_ld_i64(cpu_tmp1, cpu_env,
  2759 + offsetof(CPUX86State,fpregs[rm].mmx));
  2760 + tcg_gen_st_i64(cpu_tmp1, cpu_env,
  2761 + offsetof(CPUX86State,fpregs[reg].mmx));
2728 2762 }
2729 2763 break;
2730 2764 case 0x010: /* movups */
... ... @@ -2841,24 +2875,28 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
2841 2875 case 0x7e: /* movd ea, mm */
2842 2876 #ifdef TARGET_X86_64
2843 2877 if (s->dflag == 2) {
2844   - gen_op_movq_T0_mm_mmx(offsetof(CPUX86State,fpregs[reg].mmx));
  2878 + tcg_gen_ld_i64(cpu_T[0], cpu_env,
  2879 + offsetof(CPUX86State,fpregs[reg].mmx));
2845 2880 gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 1);
2846 2881 } else
2847 2882 #endif
2848 2883 {
2849   - gen_op_movl_T0_mm_mmx(offsetof(CPUX86State,fpregs[reg].mmx));
  2884 + tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
  2885 + offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0)));
2850 2886 gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 1);
2851 2887 }
2852 2888 break;
2853 2889 case 0x17e: /* movd ea, xmm */
2854 2890 #ifdef TARGET_X86_64
2855 2891 if (s->dflag == 2) {
2856   - gen_op_movq_T0_mm_xmm(offsetof(CPUX86State,xmm_regs[reg]));
  2892 + tcg_gen_ld_i64(cpu_T[0], cpu_env,
  2893 + offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
2857 2894 gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 1);
2858 2895 } else
2859 2896 #endif
2860 2897 {
2861   - gen_op_movl_T0_mm_xmm(offsetof(CPUX86State,xmm_regs[reg]));
  2898 + tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
  2899 + offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
2862 2900 gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 1);
2863 2901 }
2864 2902 break;
... ... @@ -2967,21 +3005,29 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
2967 3005 rm = (modrm & 7);
2968 3006 op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
2969 3007 }
2970   - sse_op2(op2_offset, op1_offset);
  3008 + tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op2_offset);
  3009 + tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op1_offset);
  3010 + tcg_gen_helper_0_2(sse_op2, cpu_ptr0, cpu_ptr1);
2971 3011 break;
2972 3012 case 0x050: /* movmskps */
2973 3013 rm = (modrm & 7) | REX_B(s);
2974   - gen_op_movmskps(offsetof(CPUX86State,xmm_regs[rm]));
  3014 + tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
  3015 + offsetof(CPUX86State,xmm_regs[rm]));
  3016 + tcg_gen_helper_1_1(helper_movmskps, cpu_tmp2, cpu_ptr0);
  3017 + tcg_gen_extu_i32_i64(cpu_T[0], cpu_tmp2);
2975 3018 gen_op_mov_reg_T0(OT_LONG, reg);
2976 3019 break;
2977 3020 case 0x150: /* movmskpd */
2978 3021 rm = (modrm & 7) | REX_B(s);
2979   - gen_op_movmskpd(offsetof(CPUX86State,xmm_regs[rm]));
  3022 + tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
  3023 + offsetof(CPUX86State,xmm_regs[rm]));
  3024 + tcg_gen_helper_1_1(helper_movmskpd, cpu_tmp2, cpu_ptr0);
  3025 + tcg_gen_extu_i32_i64(cpu_T[0], cpu_tmp2);
2980 3026 gen_op_mov_reg_T0(OT_LONG, reg);
2981 3027 break;
2982 3028 case 0x02a: /* cvtpi2ps */
2983 3029 case 0x12a: /* cvtpi2pd */
2984   - gen_op_enter_mmx();
  3030 + tcg_gen_helper_0_0(helper_enter_mmx);
2985 3031 if (mod != 3) {
2986 3032 gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
2987 3033 op2_offset = offsetof(CPUX86State,mmx_t0);
... ... @@ -2991,13 +3037,15 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
2991 3037 op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
2992 3038 }
2993 3039 op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
  3040 + tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
  3041 + tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
2994 3042 switch(b >> 8) {
2995 3043 case 0x0:
2996   - gen_op_cvtpi2ps(op1_offset, op2_offset);
  3044 + tcg_gen_helper_0_2(helper_cvtpi2ps, cpu_ptr0, cpu_ptr1);
2997 3045 break;
2998 3046 default:
2999 3047 case 0x1:
3000   - gen_op_cvtpi2pd(op1_offset, op2_offset);
  3048 + tcg_gen_helper_0_2(helper_cvtpi2pd, cpu_ptr0, cpu_ptr1);
3001 3049 break;
3002 3050 }
3003 3051 break;
... ... @@ -3006,13 +3054,16 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
3006 3054 ot = (s->dflag == 2) ? OT_QUAD : OT_LONG;
3007 3055 gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
3008 3056 op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3009   - sse_op_table3[(s->dflag == 2) * 2 + ((b >> 8) - 2)](op1_offset);
  3057 + tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
  3058 + sse_op2 = sse_op_table3[(s->dflag == 2) * 2 + ((b >> 8) - 2)];
  3059 + tcg_gen_trunc_tl_i32(cpu_tmp2, cpu_T[0]);
  3060 + tcg_gen_helper_0_2(sse_op2, cpu_ptr0, cpu_tmp2);
3010 3061 break;
3011 3062 case 0x02c: /* cvttps2pi */
3012 3063 case 0x12c: /* cvttpd2pi */
3013 3064 case 0x02d: /* cvtps2pi */
3014 3065 case 0x12d: /* cvtpd2pi */
3015   - gen_op_enter_mmx();
  3066 + tcg_gen_helper_0_0(helper_enter_mmx);
3016 3067 if (mod != 3) {
3017 3068 gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3018 3069 op2_offset = offsetof(CPUX86State,xmm_t0);
... ... @@ -3022,18 +3073,20 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
3022 3073 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3023 3074 }
3024 3075 op1_offset = offsetof(CPUX86State,fpregs[reg & 7].mmx);
  3076 + tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
  3077 + tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3025 3078 switch(b) {
3026 3079 case 0x02c:
3027   - gen_op_cvttps2pi(op1_offset, op2_offset);
  3080 + tcg_gen_helper_0_2(helper_cvttps2pi, cpu_ptr0, cpu_ptr1);
3028 3081 break;
3029 3082 case 0x12c:
3030   - gen_op_cvttpd2pi(op1_offset, op2_offset);
  3083 + tcg_gen_helper_0_2(helper_cvttpd2pi, cpu_ptr0, cpu_ptr1);
3031 3084 break;
3032 3085 case 0x02d:
3033   - gen_op_cvtps2pi(op1_offset, op2_offset);
  3086 + tcg_gen_helper_0_2(helper_cvtps2pi, cpu_ptr0, cpu_ptr1);
3034 3087 break;
3035 3088 case 0x12d:
3036   - gen_op_cvtpd2pi(op1_offset, op2_offset);
  3089 + tcg_gen_helper_0_2(helper_cvtpd2pi, cpu_ptr0, cpu_ptr1);
3037 3090 break;
3038 3091 }
3039 3092 break;
... ... @@ -3055,8 +3108,15 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
3055 3108 rm = (modrm & 7) | REX_B(s);
3056 3109 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3057 3110 }
3058   - sse_op_table3[(s->dflag == 2) * 2 + ((b >> 8) - 2) + 4 +
3059   - (b & 1) * 4](op2_offset);
  3111 + sse_op2 = sse_op_table3[(s->dflag == 2) * 2 + ((b >> 8) - 2) + 4 +
  3112 + (b & 1) * 4];
  3113 + tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op2_offset);
  3114 + if (ot == OT_LONG) {
  3115 + tcg_gen_helper_1_1(sse_op2, cpu_tmp2, cpu_ptr0);
  3116 + tcg_gen_extu_i32_i64(cpu_T[0], cpu_tmp2);
  3117 + } else {
  3118 + tcg_gen_helper_1_1(sse_op2, cpu_T[0], cpu_ptr0);
  3119 + }
3060 3120 gen_op_mov_reg_T0(ot, reg);
3061 3121 break;
3062 3122 case 0xc4: /* pinsrw */
... ... @@ -3066,10 +3126,12 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
3066 3126 val = ldub_code(s->pc++);
3067 3127 if (b1) {
3068 3128 val &= 7;
3069   - gen_op_pinsrw_xmm(offsetof(CPUX86State,xmm_regs[reg]), val);
  3129 + tcg_gen_st16_tl(cpu_T[0], cpu_env,
  3130 + offsetof(CPUX86State,xmm_regs[reg].XMM_W(val)));
3070 3131 } else {
3071 3132 val &= 3;
3072   - gen_op_pinsrw_mmx(offsetof(CPUX86State,fpregs[reg].mmx), val);
  3133 + tcg_gen_st16_tl(cpu_T[0], cpu_env,
  3134 + offsetof(CPUX86State,fpregs[reg].mmx.MMX_W(val)));
3073 3135 }
3074 3136 break;
3075 3137 case 0xc5: /* pextrw */
... ... @@ -3080,11 +3142,13 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
3080 3142 if (b1) {
3081 3143 val &= 7;
3082 3144 rm = (modrm & 7) | REX_B(s);
3083   - gen_op_pextrw_xmm(offsetof(CPUX86State,xmm_regs[rm]), val);
  3145 + tcg_gen_ld16u_tl(cpu_T[0], cpu_env,
  3146 + offsetof(CPUX86State,xmm_regs[rm].XMM_W(val)));
3084 3147 } else {
3085 3148 val &= 3;
3086 3149 rm = (modrm & 7);
3087   - gen_op_pextrw_mmx(offsetof(CPUX86State,fpregs[rm].mmx), val);
  3150 + tcg_gen_ld16u_tl(cpu_T[0], cpu_env,
  3151 + offsetof(CPUX86State,fpregs[rm].mmx.MMX_W(val)));
3088 3152 }
3089 3153 reg = ((modrm >> 3) & 7) | rex_r;
3090 3154 gen_op_mov_reg_T0(OT_LONG, reg);
... ... @@ -3101,14 +3165,14 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
3101 3165 }
3102 3166 break;
3103 3167 case 0x2d6: /* movq2dq */
3104   - gen_op_enter_mmx();
  3168 + tcg_gen_helper_0_0(helper_enter_mmx);
3105 3169 rm = (modrm & 7);
3106 3170 gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
3107 3171 offsetof(CPUX86State,fpregs[rm].mmx));
3108 3172 gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)));
3109 3173 break;
3110 3174 case 0x3d6: /* movdq2q */
3111   - gen_op_enter_mmx();
  3175 + tcg_gen_helper_0_0(helper_enter_mmx);
3112 3176 rm = (modrm & 7) | REX_B(s);
3113 3177 gen_op_movq(offsetof(CPUX86State,fpregs[reg & 7].mmx),
3114 3178 offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
... ... @@ -3119,11 +3183,14 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
3119 3183 goto illegal_op;
3120 3184 if (b1) {
3121 3185 rm = (modrm & 7) | REX_B(s);
3122   - gen_op_pmovmskb_xmm(offsetof(CPUX86State,xmm_regs[rm]));
  3186 + tcg_gen_addi_ptr(cpu_ptr0, cpu_env, offsetof(CPUX86State,xmm_regs[rm]));
  3187 + tcg_gen_helper_1_1(helper_pmovmskb_xmm, cpu_tmp2, cpu_ptr0);
3123 3188 } else {
3124 3189 rm = (modrm & 7);
3125   - gen_op_pmovmskb_mmx(offsetof(CPUX86State,fpregs[rm].mmx));
  3190 + tcg_gen_addi_ptr(cpu_ptr0, cpu_env, offsetof(CPUX86State,fpregs[rm].mmx));
  3191 + tcg_gen_helper_1_1(helper_pmovmskb_mmx, cpu_tmp2, cpu_ptr0);
3126 3192 }
  3193 + tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2);
3127 3194 reg = ((modrm >> 3) & 7) | rex_r;
3128 3195 gen_op_mov_reg_T0(OT_LONG, reg);
3129 3196 break;
... ... @@ -3199,13 +3266,16 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
3199 3266 sse_op2 = sse_op_table5[val];
3200 3267 if (!sse_op2)
3201 3268 goto illegal_op;
3202   - sse_op2(op1_offset, op2_offset);
  3269 + tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
  3270 + tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
  3271 + tcg_gen_helper_0_2(sse_op2, cpu_ptr0, cpu_ptr1);
3203 3272 break;
3204 3273 case 0x70: /* pshufx insn */
3205 3274 case 0xc6: /* pshufx insn */
3206 3275 val = ldub_code(s->pc++);
3207   - sse_op3 = (GenOpFunc3 *)sse_op2;
3208   - sse_op3(op1_offset, op2_offset, val);
  3276 + tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
  3277 + tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
  3278 + tcg_gen_helper_0_3(sse_op2, cpu_ptr0, cpu_ptr1, tcg_const_i32(val));
3209 3279 break;
3210 3280 case 0xc2:
3211 3281 /* compare insns */
... ... @@ -3213,13 +3283,19 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
3213 3283 if (val >= 8)
3214 3284 goto illegal_op;
3215 3285 sse_op2 = sse_op_table4[val][b1];
3216   - sse_op2(op1_offset, op2_offset);
  3286 + tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
  3287 + tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
  3288 + tcg_gen_helper_0_2(sse_op2, cpu_ptr0, cpu_ptr1);
3217 3289 break;
3218 3290 default:
3219   - sse_op2(op1_offset, op2_offset);
  3291 + tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
  3292 + tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
  3293 + tcg_gen_helper_0_2(sse_op2, cpu_ptr0, cpu_ptr1);
3220 3294 break;
3221 3295 }
3222 3296 if (b == 0x2e || b == 0x2f) {
  3297 + /* just to keep the EFLAGS optimization correct */
  3298 + gen_op_com_dummy();
3223 3299 s->cc_op = CC_OP_EFLAGS;
3224 3300 }
3225 3301 }
... ... @@ -6485,10 +6561,10 @@ static uint16_t opc_write_flags[NB_OPS] = {
6485 6561 X86_64_DEF([INDEX_op_imulq_T0_T1] = CC_OSZAPC,)
6486 6562  
6487 6563 /* sse */
6488   - [INDEX_op_ucomiss] = CC_OSZAPC,
6489   - [INDEX_op_ucomisd] = CC_OSZAPC,
6490   - [INDEX_op_comiss] = CC_OSZAPC,
6491   - [INDEX_op_comisd] = CC_OSZAPC,
  6564 + [INDEX_op_com_dummy] = CC_OSZAPC,
  6565 + [INDEX_op_com_dummy] = CC_OSZAPC,
  6566 + [INDEX_op_com_dummy] = CC_OSZAPC,
  6567 + [INDEX_op_com_dummy] = CC_OSZAPC,
6492 6568  
6493 6569 /* bcd */
6494 6570 [INDEX_op_aam] = CC_OSZAPC,
... ... @@ -6792,6 +6868,9 @@ static inline int gen_intermediate_code_internal(CPUState *env,
6792 6868 #if TARGET_LONG_BITS > HOST_LONG_BITS
6793 6869 cpu_tmp1 = tcg_temp_new(TCG_TYPE_I64);
6794 6870 #endif
  6871 + cpu_tmp2 = tcg_temp_new(TCG_TYPE_I32);
  6872 + cpu_ptr0 = tcg_temp_new(TCG_TYPE_PTR);
  6873 + cpu_ptr1 = tcg_temp_new(TCG_TYPE_PTR);
6795 6874  
6796 6875 gen_opc_end = gen_opc_buf + OPC_MAX_SIZE;
6797 6876  
... ...