Commit 5af451868c5bd93ea24672b0fa1b765cbd563cc6

Authored by bellard
1 parent 77f193da

converted SSE/MMX ops to TCG

git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@4441 c046a42c-6fe2-441c-8c8c-71466251a162
target-i386/helper.c
@@ -4462,3 +4462,32 @@ void vmexit(uint64_t exit_code, uint64_t exit_info_1) @@ -4462,3 +4462,32 @@ void vmexit(uint64_t exit_code, uint64_t exit_info_1)
4462 } 4462 }
4463 4463
4464 #endif 4464 #endif
  4465 +
  4466 +/* MMX/SSE */
  4467 +/* XXX: optimize by storing fptt and fptags in the static cpu state */
  4468 +void helper_enter_mmx(void)
  4469 +{
  4470 + env->fpstt = 0;
  4471 + *(uint32_t *)(env->fptags) = 0;
  4472 + *(uint32_t *)(env->fptags + 4) = 0;
  4473 +}
  4474 +
  4475 +void helper_emms(void)
  4476 +{
  4477 + /* set to empty state */
  4478 + *(uint32_t *)(env->fptags) = 0x01010101;
  4479 + *(uint32_t *)(env->fptags + 4) = 0x01010101;
  4480 +}
  4481 +
  4482 +/* XXX: suppress */
  4483 +void helper_movq(uint64_t *d, uint64_t *s)
  4484 +{
  4485 + *d = *s;
  4486 +}
  4487 +
  4488 +#define SHIFT 0
  4489 +#include "ops_sse.h"
  4490 +
  4491 +#define SHIFT 1
  4492 +#include "ops_sse.h"
  4493 +
target-i386/helper.h
@@ -2,3 +2,12 @@ @@ -2,3 +2,12 @@
2 2
3 void TCG_HELPER_PROTO helper_divl_EAX_T0(target_ulong t0); 3 void TCG_HELPER_PROTO helper_divl_EAX_T0(target_ulong t0);
4 void TCG_HELPER_PROTO helper_idivl_EAX_T0(target_ulong t0); 4 void TCG_HELPER_PROTO helper_idivl_EAX_T0(target_ulong t0);
  5 +void TCG_HELPER_PROTO helper_enter_mmx(void);
  6 +void TCG_HELPER_PROTO helper_emms(void);
  7 +void TCG_HELPER_PROTO helper_movq(uint64_t *d, uint64_t *s);
  8 +
  9 +#define SHIFT 0
  10 +#include "ops_sse_header.h"
  11 +#define SHIFT 1
  12 +#include "ops_sse_header.h"
  13 +
target-i386/op.c
@@ -2144,44 +2144,9 @@ void OPPROTO op_unlock(void) @@ -2144,44 +2144,9 @@ void OPPROTO op_unlock(void)
2144 } 2144 }
2145 2145
2146 /* SSE support */ 2146 /* SSE support */
2147 -static inline void memcpy16(void *d, void *s) 2147 +void OPPROTO op_com_dummy(void)
2148 { 2148 {
2149 - ((uint32_t *)d)[0] = ((uint32_t *)s)[0];  
2150 - ((uint32_t *)d)[1] = ((uint32_t *)s)[1];  
2151 - ((uint32_t *)d)[2] = ((uint32_t *)s)[2];  
2152 - ((uint32_t *)d)[3] = ((uint32_t *)s)[3];  
2153 -}  
2154 -  
2155 -void OPPROTO op_movo(void)  
2156 -{  
2157 - /* XXX: badly generated code */  
2158 - XMMReg *d, *s;  
2159 - d = (XMMReg *)((char *)env + PARAM1);  
2160 - s = (XMMReg *)((char *)env + PARAM2);  
2161 - memcpy16(d, s);  
2162 -}  
2163 -  
2164 -void OPPROTO op_movq(void)  
2165 -{  
2166 - uint64_t *d, *s;  
2167 - d = (uint64_t *)((char *)env + PARAM1);  
2168 - s = (uint64_t *)((char *)env + PARAM2);  
2169 - *d = *s;  
2170 -}  
2171 -  
2172 -void OPPROTO op_movl(void)  
2173 -{  
2174 - uint32_t *d, *s;  
2175 - d = (uint32_t *)((char *)env + PARAM1);  
2176 - s = (uint32_t *)((char *)env + PARAM2);  
2177 - *d = *s;  
2178 -}  
2179 -  
2180 -void OPPROTO op_movq_env_0(void)  
2181 -{  
2182 - uint64_t *d;  
2183 - d = (uint64_t *)((char *)env + PARAM1);  
2184 - *d = 0; 2149 + T0 = 0;
2185 } 2150 }
2186 2151
2187 void OPPROTO op_fxsave_A0(void) 2152 void OPPROTO op_fxsave_A0(void)
@@ -2194,27 +2159,6 @@ void OPPROTO op_fxrstor_A0(void) @@ -2194,27 +2159,6 @@ void OPPROTO op_fxrstor_A0(void)
2194 helper_fxrstor(A0, PARAM1); 2159 helper_fxrstor(A0, PARAM1);
2195 } 2160 }
2196 2161
2197 -/* XXX: optimize by storing fptt and fptags in the static cpu state */  
2198 -void OPPROTO op_enter_mmx(void)  
2199 -{  
2200 - env->fpstt = 0;  
2201 - *(uint32_t *)(env->fptags) = 0;  
2202 - *(uint32_t *)(env->fptags + 4) = 0;  
2203 -}  
2204 -  
2205 -void OPPROTO op_emms(void)  
2206 -{  
2207 - /* set to empty state */  
2208 - *(uint32_t *)(env->fptags) = 0x01010101;  
2209 - *(uint32_t *)(env->fptags + 4) = 0x01010101;  
2210 -}  
2211 -  
2212 -#define SHIFT 0  
2213 -#include "ops_sse.h"  
2214 -  
2215 -#define SHIFT 1  
2216 -#include "ops_sse.h"  
2217 -  
2218 /* Secure Virtual Machine ops */ 2162 /* Secure Virtual Machine ops */
2219 2163
2220 void OPPROTO op_vmrun(void) 2164 void OPPROTO op_vmrun(void)
target-i386/ops_sse.h
@@ -35,14 +35,10 @@ @@ -35,14 +35,10 @@
35 #define SUFFIX _xmm 35 #define SUFFIX _xmm
36 #endif 36 #endif
37 37
38 -void OPPROTO glue(op_psrlw, SUFFIX)(void) 38 +void glue(helper_psrlw, SUFFIX)(Reg *d, Reg *s)
39 { 39 {
40 - Reg *d, *s;  
41 int shift; 40 int shift;
42 41
43 - d = (Reg *)((char *)env + PARAM1);  
44 - s = (Reg *)((char *)env + PARAM2);  
45 -  
46 if (s->Q(0) > 15) { 42 if (s->Q(0) > 15) {
47 d->Q(0) = 0; 43 d->Q(0) = 0;
48 #if SHIFT == 1 44 #if SHIFT == 1
@@ -64,14 +60,10 @@ void OPPROTO glue(op_psrlw, SUFFIX)(void) @@ -64,14 +60,10 @@ void OPPROTO glue(op_psrlw, SUFFIX)(void)
64 FORCE_RET(); 60 FORCE_RET();
65 } 61 }
66 62
67 -void OPPROTO glue(op_psraw, SUFFIX)(void) 63 +void glue(helper_psraw, SUFFIX)(Reg *d, Reg *s)
68 { 64 {
69 - Reg *d, *s;  
70 int shift; 65 int shift;
71 66
72 - d = (Reg *)((char *)env + PARAM1);  
73 - s = (Reg *)((char *)env + PARAM2);  
74 -  
75 if (s->Q(0) > 15) { 67 if (s->Q(0) > 15) {
76 shift = 15; 68 shift = 15;
77 } else { 69 } else {
@@ -89,14 +81,10 @@ void OPPROTO glue(op_psraw, SUFFIX)(void) @@ -89,14 +81,10 @@ void OPPROTO glue(op_psraw, SUFFIX)(void)
89 #endif 81 #endif
90 } 82 }
91 83
92 -void OPPROTO glue(op_psllw, SUFFIX)(void) 84 +void glue(helper_psllw, SUFFIX)(Reg *d, Reg *s)
93 { 85 {
94 - Reg *d, *s;  
95 int shift; 86 int shift;
96 87
97 - d = (Reg *)((char *)env + PARAM1);  
98 - s = (Reg *)((char *)env + PARAM2);  
99 -  
100 if (s->Q(0) > 15) { 88 if (s->Q(0) > 15) {
101 d->Q(0) = 0; 89 d->Q(0) = 0;
102 #if SHIFT == 1 90 #if SHIFT == 1
@@ -118,14 +106,10 @@ void OPPROTO glue(op_psllw, SUFFIX)(void) @@ -118,14 +106,10 @@ void OPPROTO glue(op_psllw, SUFFIX)(void)
118 FORCE_RET(); 106 FORCE_RET();
119 } 107 }
120 108
121 -void OPPROTO glue(op_psrld, SUFFIX)(void) 109 +void glue(helper_psrld, SUFFIX)(Reg *d, Reg *s)
122 { 110 {
123 - Reg *d, *s;  
124 int shift; 111 int shift;
125 112
126 - d = (Reg *)((char *)env + PARAM1);  
127 - s = (Reg *)((char *)env + PARAM2);  
128 -  
129 if (s->Q(0) > 31) { 113 if (s->Q(0) > 31) {
130 d->Q(0) = 0; 114 d->Q(0) = 0;
131 #if SHIFT == 1 115 #if SHIFT == 1
@@ -143,14 +127,10 @@ void OPPROTO glue(op_psrld, SUFFIX)(void) @@ -143,14 +127,10 @@ void OPPROTO glue(op_psrld, SUFFIX)(void)
143 FORCE_RET(); 127 FORCE_RET();
144 } 128 }
145 129
146 -void OPPROTO glue(op_psrad, SUFFIX)(void) 130 +void glue(helper_psrad, SUFFIX)(Reg *d, Reg *s)
147 { 131 {
148 - Reg *d, *s;  
149 int shift; 132 int shift;
150 133
151 - d = (Reg *)((char *)env + PARAM1);  
152 - s = (Reg *)((char *)env + PARAM2);  
153 -  
154 if (s->Q(0) > 31) { 134 if (s->Q(0) > 31) {
155 shift = 31; 135 shift = 31;
156 } else { 136 } else {
@@ -164,14 +144,10 @@ void OPPROTO glue(op_psrad, SUFFIX)(void) @@ -164,14 +144,10 @@ void OPPROTO glue(op_psrad, SUFFIX)(void)
164 #endif 144 #endif
165 } 145 }
166 146
167 -void OPPROTO glue(op_pslld, SUFFIX)(void) 147 +void glue(helper_pslld, SUFFIX)(Reg *d, Reg *s)
168 { 148 {
169 - Reg *d, *s;  
170 int shift; 149 int shift;
171 150
172 - d = (Reg *)((char *)env + PARAM1);  
173 - s = (Reg *)((char *)env + PARAM2);  
174 -  
175 if (s->Q(0) > 31) { 151 if (s->Q(0) > 31) {
176 d->Q(0) = 0; 152 d->Q(0) = 0;
177 #if SHIFT == 1 153 #if SHIFT == 1
@@ -189,14 +165,10 @@ void OPPROTO glue(op_pslld, SUFFIX)(void) @@ -189,14 +165,10 @@ void OPPROTO glue(op_pslld, SUFFIX)(void)
189 FORCE_RET(); 165 FORCE_RET();
190 } 166 }
191 167
192 -void OPPROTO glue(op_psrlq, SUFFIX)(void) 168 +void glue(helper_psrlq, SUFFIX)(Reg *d, Reg *s)
193 { 169 {
194 - Reg *d, *s;  
195 int shift; 170 int shift;
196 171
197 - d = (Reg *)((char *)env + PARAM1);  
198 - s = (Reg *)((char *)env + PARAM2);  
199 -  
200 if (s->Q(0) > 63) { 172 if (s->Q(0) > 63) {
201 d->Q(0) = 0; 173 d->Q(0) = 0;
202 #if SHIFT == 1 174 #if SHIFT == 1
@@ -212,14 +184,10 @@ void OPPROTO glue(op_psrlq, SUFFIX)(void) @@ -212,14 +184,10 @@ void OPPROTO glue(op_psrlq, SUFFIX)(void)
212 FORCE_RET(); 184 FORCE_RET();
213 } 185 }
214 186
215 -void OPPROTO glue(op_psllq, SUFFIX)(void) 187 +void glue(helper_psllq, SUFFIX)(Reg *d, Reg *s)
216 { 188 {
217 - Reg *d, *s;  
218 int shift; 189 int shift;
219 190
220 - d = (Reg *)((char *)env + PARAM1);  
221 - s = (Reg *)((char *)env + PARAM2);  
222 -  
223 if (s->Q(0) > 63) { 191 if (s->Q(0) > 63) {
224 d->Q(0) = 0; 192 d->Q(0) = 0;
225 #if SHIFT == 1 193 #if SHIFT == 1
@@ -236,13 +204,10 @@ void OPPROTO glue(op_psllq, SUFFIX)(void) @@ -236,13 +204,10 @@ void OPPROTO glue(op_psllq, SUFFIX)(void)
236 } 204 }
237 205
238 #if SHIFT == 1 206 #if SHIFT == 1
239 -void OPPROTO glue(op_psrldq, SUFFIX)(void) 207 +void glue(helper_psrldq, SUFFIX)(Reg *d, Reg *s)
240 { 208 {
241 - Reg *d, *s;  
242 int shift, i; 209 int shift, i;
243 210
244 - d = (Reg *)((char *)env + PARAM1);  
245 - s = (Reg *)((char *)env + PARAM2);  
246 shift = s->L(0); 211 shift = s->L(0);
247 if (shift > 16) 212 if (shift > 16)
248 shift = 16; 213 shift = 16;
@@ -253,13 +218,10 @@ void OPPROTO glue(op_psrldq, SUFFIX)(void) @@ -253,13 +218,10 @@ void OPPROTO glue(op_psrldq, SUFFIX)(void)
253 FORCE_RET(); 218 FORCE_RET();
254 } 219 }
255 220
256 -void OPPROTO glue(op_pslldq, SUFFIX)(void) 221 +void glue(helper_pslldq, SUFFIX)(Reg *d, Reg *s)
257 { 222 {
258 - Reg *d, *s;  
259 int shift, i; 223 int shift, i;
260 224
261 - d = (Reg *)((char *)env + PARAM1);  
262 - s = (Reg *)((char *)env + PARAM2);  
263 shift = s->L(0); 225 shift = s->L(0);
264 if (shift > 16) 226 if (shift > 16)
265 shift = 16; 227 shift = 16;
@@ -271,12 +233,9 @@ void OPPROTO glue(op_pslldq, SUFFIX)(void) @@ -271,12 +233,9 @@ void OPPROTO glue(op_pslldq, SUFFIX)(void)
271 } 233 }
272 #endif 234 #endif
273 235
274 -#define SSE_OP_B(name, F)\  
275 -void OPPROTO glue(name, SUFFIX) (void)\ 236 +#define SSE_HELPER_B(name, F)\
  237 +void glue(name, SUFFIX) (Reg *d, Reg *s)\
276 {\ 238 {\
277 - Reg *d, *s;\  
278 - d = (Reg *)((char *)env + PARAM1);\  
279 - s = (Reg *)((char *)env + PARAM2);\  
280 d->B(0) = F(d->B(0), s->B(0));\ 239 d->B(0) = F(d->B(0), s->B(0));\
281 d->B(1) = F(d->B(1), s->B(1));\ 240 d->B(1) = F(d->B(1), s->B(1));\
282 d->B(2) = F(d->B(2), s->B(2));\ 241 d->B(2) = F(d->B(2), s->B(2));\
@@ -297,12 +256,9 @@ void OPPROTO glue(name, SUFFIX) (void)\ @@ -297,12 +256,9 @@ void OPPROTO glue(name, SUFFIX) (void)\
297 )\ 256 )\
298 } 257 }
299 258
300 -#define SSE_OP_W(name, F)\  
301 -void OPPROTO glue(name, SUFFIX) (void)\ 259 +#define SSE_HELPER_W(name, F)\
  260 +void glue(name, SUFFIX) (Reg *d, Reg *s)\
302 {\ 261 {\
303 - Reg *d, *s;\  
304 - d = (Reg *)((char *)env + PARAM1);\  
305 - s = (Reg *)((char *)env + PARAM2);\  
306 d->W(0) = F(d->W(0), s->W(0));\ 262 d->W(0) = F(d->W(0), s->W(0));\
307 d->W(1) = F(d->W(1), s->W(1));\ 263 d->W(1) = F(d->W(1), s->W(1));\
308 d->W(2) = F(d->W(2), s->W(2));\ 264 d->W(2) = F(d->W(2), s->W(2));\
@@ -315,12 +271,9 @@ void OPPROTO glue(name, SUFFIX) (void)\ @@ -315,12 +271,9 @@ void OPPROTO glue(name, SUFFIX) (void)\
315 )\ 271 )\
316 } 272 }
317 273
318 -#define SSE_OP_L(name, F)\  
319 -void OPPROTO glue(name, SUFFIX) (void)\ 274 +#define SSE_HELPER_L(name, F)\
  275 +void glue(name, SUFFIX) (Reg *d, Reg *s)\
320 {\ 276 {\
321 - Reg *d, *s;\  
322 - d = (Reg *)((char *)env + PARAM1);\  
323 - s = (Reg *)((char *)env + PARAM2);\  
324 d->L(0) = F(d->L(0), s->L(0));\ 277 d->L(0) = F(d->L(0), s->L(0));\
325 d->L(1) = F(d->L(1), s->L(1));\ 278 d->L(1) = F(d->L(1), s->L(1));\
326 XMM_ONLY(\ 279 XMM_ONLY(\
@@ -329,12 +282,9 @@ void OPPROTO glue(name, SUFFIX) (void)\ @@ -329,12 +282,9 @@ void OPPROTO glue(name, SUFFIX) (void)\
329 )\ 282 )\
330 } 283 }
331 284
332 -#define SSE_OP_Q(name, F)\  
333 -void OPPROTO glue(name, SUFFIX) (void)\ 285 +#define SSE_HELPER_Q(name, F)\
  286 +void glue(name, SUFFIX) (Reg *d, Reg *s)\
334 {\ 287 {\
335 - Reg *d, *s;\  
336 - d = (Reg *)((char *)env + PARAM1);\  
337 - s = (Reg *)((char *)env + PARAM2);\  
338 d->Q(0) = F(d->Q(0), s->Q(0));\ 288 d->Q(0) = F(d->Q(0), s->Q(0));\
339 XMM_ONLY(\ 289 XMM_ONLY(\
340 d->Q(1) = F(d->Q(1), s->Q(1));\ 290 d->Q(1) = F(d->Q(1), s->Q(1));\
@@ -416,73 +366,66 @@ static inline int satsw(int x) @@ -416,73 +366,66 @@ static inline int satsw(int x)
416 #define FAVG(a, b) ((a) + (b) + 1) >> 1 366 #define FAVG(a, b) ((a) + (b) + 1) >> 1
417 #endif 367 #endif
418 368
419 -SSE_OP_B(op_paddb, FADD)  
420 -SSE_OP_W(op_paddw, FADD)  
421 -SSE_OP_L(op_paddl, FADD)  
422 -SSE_OP_Q(op_paddq, FADD) 369 +SSE_HELPER_B(helper_paddb, FADD)
  370 +SSE_HELPER_W(helper_paddw, FADD)
  371 +SSE_HELPER_L(helper_paddl, FADD)
  372 +SSE_HELPER_Q(helper_paddq, FADD)
423 373
424 -SSE_OP_B(op_psubb, FSUB)  
425 -SSE_OP_W(op_psubw, FSUB)  
426 -SSE_OP_L(op_psubl, FSUB)  
427 -SSE_OP_Q(op_psubq, FSUB) 374 +SSE_HELPER_B(helper_psubb, FSUB)
  375 +SSE_HELPER_W(helper_psubw, FSUB)
  376 +SSE_HELPER_L(helper_psubl, FSUB)
  377 +SSE_HELPER_Q(helper_psubq, FSUB)
428 378
429 -SSE_OP_B(op_paddusb, FADDUB)  
430 -SSE_OP_B(op_paddsb, FADDSB)  
431 -SSE_OP_B(op_psubusb, FSUBUB)  
432 -SSE_OP_B(op_psubsb, FSUBSB) 379 +SSE_HELPER_B(helper_paddusb, FADDUB)
  380 +SSE_HELPER_B(helper_paddsb, FADDSB)
  381 +SSE_HELPER_B(helper_psubusb, FSUBUB)
  382 +SSE_HELPER_B(helper_psubsb, FSUBSB)
433 383
434 -SSE_OP_W(op_paddusw, FADDUW)  
435 -SSE_OP_W(op_paddsw, FADDSW)  
436 -SSE_OP_W(op_psubusw, FSUBUW)  
437 -SSE_OP_W(op_psubsw, FSUBSW) 384 +SSE_HELPER_W(helper_paddusw, FADDUW)
  385 +SSE_HELPER_W(helper_paddsw, FADDSW)
  386 +SSE_HELPER_W(helper_psubusw, FSUBUW)
  387 +SSE_HELPER_W(helper_psubsw, FSUBSW)
438 388
439 -SSE_OP_B(op_pminub, FMINUB)  
440 -SSE_OP_B(op_pmaxub, FMAXUB) 389 +SSE_HELPER_B(helper_pminub, FMINUB)
  390 +SSE_HELPER_B(helper_pmaxub, FMAXUB)
441 391
442 -SSE_OP_W(op_pminsw, FMINSW)  
443 -SSE_OP_W(op_pmaxsw, FMAXSW) 392 +SSE_HELPER_W(helper_pminsw, FMINSW)
  393 +SSE_HELPER_W(helper_pmaxsw, FMAXSW)
444 394
445 -SSE_OP_Q(op_pand, FAND)  
446 -SSE_OP_Q(op_pandn, FANDN)  
447 -SSE_OP_Q(op_por, FOR)  
448 -SSE_OP_Q(op_pxor, FXOR) 395 +SSE_HELPER_Q(helper_pand, FAND)
  396 +SSE_HELPER_Q(helper_pandn, FANDN)
  397 +SSE_HELPER_Q(helper_por, FOR)
  398 +SSE_HELPER_Q(helper_pxor, FXOR)
449 399
450 -SSE_OP_B(op_pcmpgtb, FCMPGTB)  
451 -SSE_OP_W(op_pcmpgtw, FCMPGTW)  
452 -SSE_OP_L(op_pcmpgtl, FCMPGTL) 400 +SSE_HELPER_B(helper_pcmpgtb, FCMPGTB)
  401 +SSE_HELPER_W(helper_pcmpgtw, FCMPGTW)
  402 +SSE_HELPER_L(helper_pcmpgtl, FCMPGTL)
453 403
454 -SSE_OP_B(op_pcmpeqb, FCMPEQ)  
455 -SSE_OP_W(op_pcmpeqw, FCMPEQ)  
456 -SSE_OP_L(op_pcmpeql, FCMPEQ) 404 +SSE_HELPER_B(helper_pcmpeqb, FCMPEQ)
  405 +SSE_HELPER_W(helper_pcmpeqw, FCMPEQ)
  406 +SSE_HELPER_L(helper_pcmpeql, FCMPEQ)
457 407
458 -SSE_OP_W(op_pmullw, FMULLW) 408 +SSE_HELPER_W(helper_pmullw, FMULLW)
459 #if SHIFT == 0 409 #if SHIFT == 0
460 -SSE_OP_W(op_pmulhrw, FMULHRW) 410 +SSE_HELPER_W(helper_pmulhrw, FMULHRW)
461 #endif 411 #endif
462 -SSE_OP_W(op_pmulhuw, FMULHUW)  
463 -SSE_OP_W(op_pmulhw, FMULHW) 412 +SSE_HELPER_W(helper_pmulhuw, FMULHUW)
  413 +SSE_HELPER_W(helper_pmulhw, FMULHW)
464 414
465 -SSE_OP_B(op_pavgb, FAVG)  
466 -SSE_OP_W(op_pavgw, FAVG) 415 +SSE_HELPER_B(helper_pavgb, FAVG)
  416 +SSE_HELPER_W(helper_pavgw, FAVG)
467 417
468 -void OPPROTO glue(op_pmuludq, SUFFIX) (void) 418 +void glue(helper_pmuludq, SUFFIX) (Reg *d, Reg *s)
469 { 419 {
470 - Reg *d, *s;  
471 - d = (Reg *)((char *)env + PARAM1);  
472 - s = (Reg *)((char *)env + PARAM2);  
473 -  
474 d->Q(0) = (uint64_t)s->L(0) * (uint64_t)d->L(0); 420 d->Q(0) = (uint64_t)s->L(0) * (uint64_t)d->L(0);
475 #if SHIFT == 1 421 #if SHIFT == 1
476 d->Q(1) = (uint64_t)s->L(2) * (uint64_t)d->L(2); 422 d->Q(1) = (uint64_t)s->L(2) * (uint64_t)d->L(2);
477 #endif 423 #endif
478 } 424 }
479 425
480 -void OPPROTO glue(op_pmaddwd, SUFFIX) (void) 426 +void glue(helper_pmaddwd, SUFFIX) (Reg *d, Reg *s)
481 { 427 {
482 int i; 428 int i;
483 - Reg *d, *s;  
484 - d = (Reg *)((char *)env + PARAM1);  
485 - s = (Reg *)((char *)env + PARAM2);  
486 429
487 for(i = 0; i < (2 << SHIFT); i++) { 430 for(i = 0; i < (2 << SHIFT); i++) {
488 d->L(i) = (int16_t)s->W(2*i) * (int16_t)d->W(2*i) + 431 d->L(i) = (int16_t)s->W(2*i) * (int16_t)d->W(2*i) +
@@ -500,12 +443,9 @@ static inline int abs1(int a) @@ -500,12 +443,9 @@ static inline int abs1(int a)
500 return a; 443 return a;
501 } 444 }
502 #endif 445 #endif
503 -void OPPROTO glue(op_psadbw, SUFFIX) (void) 446 +void glue(helper_psadbw, SUFFIX) (Reg *d, Reg *s)
504 { 447 {
505 unsigned int val; 448 unsigned int val;
506 - Reg *d, *s;  
507 - d = (Reg *)((char *)env + PARAM1);  
508 - s = (Reg *)((char *)env + PARAM2);  
509 449
510 val = 0; 450 val = 0;
511 val += abs1(d->B(0) - s->B(0)); 451 val += abs1(d->B(0) - s->B(0));
@@ -531,12 +471,9 @@ void OPPROTO glue(op_psadbw, SUFFIX) (void) @@ -531,12 +471,9 @@ void OPPROTO glue(op_psadbw, SUFFIX) (void)
531 #endif 471 #endif
532 } 472 }
533 473
534 -void OPPROTO glue(op_maskmov, SUFFIX) (void) 474 +void glue(helper_maskmov, SUFFIX) (Reg *d, Reg *s)
535 { 475 {
536 int i; 476 int i;
537 - Reg *d, *s;  
538 - d = (Reg *)((char *)env + PARAM1);  
539 - s = (Reg *)((char *)env + PARAM2);  
540 for(i = 0; i < (8 << SHIFT); i++) { 477 for(i = 0; i < (8 << SHIFT); i++) {
541 if (s->B(i) & 0x80) 478 if (s->B(i) & 0x80)
542 stb(A0 + i, d->B(i)); 479 stb(A0 + i, d->B(i));
@@ -544,51 +481,29 @@ void OPPROTO glue(op_maskmov, SUFFIX) (void) @@ -544,51 +481,29 @@ void OPPROTO glue(op_maskmov, SUFFIX) (void)
544 FORCE_RET(); 481 FORCE_RET();
545 } 482 }
546 483
547 -void OPPROTO glue(op_movl_mm_T0, SUFFIX) (void) 484 +void glue(helper_movl_mm_T0, SUFFIX) (Reg *d, uint32_t val)
548 { 485 {
549 - Reg *d;  
550 - d = (Reg *)((char *)env + PARAM1);  
551 - d->L(0) = T0; 486 + d->L(0) = val;
552 d->L(1) = 0; 487 d->L(1) = 0;
553 #if SHIFT == 1 488 #if SHIFT == 1
554 d->Q(1) = 0; 489 d->Q(1) = 0;
555 #endif 490 #endif
556 } 491 }
557 492
558 -void OPPROTO glue(op_movl_T0_mm, SUFFIX) (void)  
559 -{  
560 - Reg *s;  
561 - s = (Reg *)((char *)env + PARAM1);  
562 - T0 = s->L(0);  
563 -}  
564 -  
565 #ifdef TARGET_X86_64 493 #ifdef TARGET_X86_64
566 -void OPPROTO glue(op_movq_mm_T0, SUFFIX) (void) 494 +void glue(helper_movq_mm_T0, SUFFIX) (Reg *d, uint64_t val)
567 { 495 {
568 - Reg *d;  
569 - d = (Reg *)((char *)env + PARAM1);  
570 - d->Q(0) = T0; 496 + d->Q(0) = val;
571 #if SHIFT == 1 497 #if SHIFT == 1
572 d->Q(1) = 0; 498 d->Q(1) = 0;
573 #endif 499 #endif
574 } 500 }
575 -  
576 -void OPPROTO glue(op_movq_T0_mm, SUFFIX) (void)  
577 -{  
578 - Reg *s;  
579 - s = (Reg *)((char *)env + PARAM1);  
580 - T0 = s->Q(0);  
581 -}  
582 #endif 501 #endif
583 502
584 #if SHIFT == 0 503 #if SHIFT == 0
585 -void OPPROTO glue(op_pshufw, SUFFIX) (void) 504 +void glue(helper_pshufw, SUFFIX) (Reg *d, Reg *s, int order)
586 { 505 {
587 - Reg r, *d, *s;  
588 - int order;  
589 - d = (Reg *)((char *)env + PARAM1);  
590 - s = (Reg *)((char *)env + PARAM2);  
591 - order = PARAM3; 506 + Reg r;
592 r.W(0) = s->W(order & 3); 507 r.W(0) = s->W(order & 3);
593 r.W(1) = s->W((order >> 2) & 3); 508 r.W(1) = s->W((order >> 2) & 3);
594 r.W(2) = s->W((order >> 4) & 3); 509 r.W(2) = s->W((order >> 4) & 3);
@@ -596,13 +511,9 @@ void OPPROTO glue(op_pshufw, SUFFIX) (void) @@ -596,13 +511,9 @@ void OPPROTO glue(op_pshufw, SUFFIX) (void)
596 *d = r; 511 *d = r;
597 } 512 }
598 #else 513 #else
599 -void OPPROTO op_shufps(void) 514 +void helper_shufps(Reg *d, Reg *s, int order)
600 { 515 {
601 - Reg r, *d, *s;  
602 - int order;  
603 - d = (Reg *)((char *)env + PARAM1);  
604 - s = (Reg *)((char *)env + PARAM2);  
605 - order = PARAM3; 516 + Reg r;
606 r.L(0) = d->L(order & 3); 517 r.L(0) = d->L(order & 3);
607 r.L(1) = d->L((order >> 2) & 3); 518 r.L(1) = d->L((order >> 2) & 3);
608 r.L(2) = s->L((order >> 4) & 3); 519 r.L(2) = s->L((order >> 4) & 3);
@@ -610,25 +521,17 @@ void OPPROTO op_shufps(void) @@ -610,25 +521,17 @@ void OPPROTO op_shufps(void)
610 *d = r; 521 *d = r;
611 } 522 }
612 523
613 -void OPPROTO op_shufpd(void) 524 +void helper_shufpd(Reg *d, Reg *s, int order)
614 { 525 {
615 - Reg r, *d, *s;  
616 - int order;  
617 - d = (Reg *)((char *)env + PARAM1);  
618 - s = (Reg *)((char *)env + PARAM2);  
619 - order = PARAM3; 526 + Reg r;
620 r.Q(0) = d->Q(order & 1); 527 r.Q(0) = d->Q(order & 1);
621 r.Q(1) = s->Q((order >> 1) & 1); 528 r.Q(1) = s->Q((order >> 1) & 1);
622 *d = r; 529 *d = r;
623 } 530 }
624 531
625 -void OPPROTO glue(op_pshufd, SUFFIX) (void) 532 +void glue(helper_pshufd, SUFFIX) (Reg *d, Reg *s, int order)
626 { 533 {
627 - Reg r, *d, *s;  
628 - int order;  
629 - d = (Reg *)((char *)env + PARAM1);  
630 - s = (Reg *)((char *)env + PARAM2);  
631 - order = PARAM3; 534 + Reg r;
632 r.L(0) = s->L(order & 3); 535 r.L(0) = s->L(order & 3);
633 r.L(1) = s->L((order >> 2) & 3); 536 r.L(1) = s->L((order >> 2) & 3);
634 r.L(2) = s->L((order >> 4) & 3); 537 r.L(2) = s->L((order >> 4) & 3);
@@ -636,13 +539,9 @@ void OPPROTO glue(op_pshufd, SUFFIX) (void) @@ -636,13 +539,9 @@ void OPPROTO glue(op_pshufd, SUFFIX) (void)
636 *d = r; 539 *d = r;
637 } 540 }
638 541
639 -void OPPROTO glue(op_pshuflw, SUFFIX) (void) 542 +void glue(helper_pshuflw, SUFFIX) (Reg *d, Reg *s, int order)
640 { 543 {
641 - Reg r, *d, *s;  
642 - int order;  
643 - d = (Reg *)((char *)env + PARAM1);  
644 - s = (Reg *)((char *)env + PARAM2);  
645 - order = PARAM3; 544 + Reg r;
646 r.W(0) = s->W(order & 3); 545 r.W(0) = s->W(order & 3);
647 r.W(1) = s->W((order >> 2) & 3); 546 r.W(1) = s->W((order >> 2) & 3);
648 r.W(2) = s->W((order >> 4) & 3); 547 r.W(2) = s->W((order >> 4) & 3);
@@ -651,13 +550,9 @@ void OPPROTO glue(op_pshuflw, SUFFIX) (void) @@ -651,13 +550,9 @@ void OPPROTO glue(op_pshuflw, SUFFIX) (void)
651 *d = r; 550 *d = r;
652 } 551 }
653 552
654 -void OPPROTO glue(op_pshufhw, SUFFIX) (void) 553 +void glue(helper_pshufhw, SUFFIX) (Reg *d, Reg *s, int order)
655 { 554 {
656 - Reg r, *d, *s;  
657 - int order;  
658 - d = (Reg *)((char *)env + PARAM1);  
659 - s = (Reg *)((char *)env + PARAM2);  
660 - order = PARAM3; 555 + Reg r;
661 r.Q(0) = s->Q(0); 556 r.Q(0) = s->Q(0);
662 r.W(4) = s->W(4 + (order & 3)); 557 r.W(4) = s->W(4 + (order & 3));
663 r.W(5) = s->W(4 + ((order >> 2) & 3)); 558 r.W(5) = s->W(4 + ((order >> 2) & 3));
@@ -671,39 +566,27 @@ void OPPROTO glue(op_pshufhw, SUFFIX) (void) @@ -671,39 +566,27 @@ void OPPROTO glue(op_pshufhw, SUFFIX) (void)
671 /* FPU ops */ 566 /* FPU ops */
672 /* XXX: not accurate */ 567 /* XXX: not accurate */
673 568
674 -#define SSE_OP_S(name, F)\  
675 -void OPPROTO op_ ## name ## ps (void)\ 569 +#define SSE_HELPER_S(name, F)\
  570 +void helper_ ## name ## ps (Reg *d, Reg *s)\
676 {\ 571 {\
677 - Reg *d, *s;\  
678 - d = (Reg *)((char *)env + PARAM1);\  
679 - s = (Reg *)((char *)env + PARAM2);\  
680 d->XMM_S(0) = F(32, d->XMM_S(0), s->XMM_S(0));\ 572 d->XMM_S(0) = F(32, d->XMM_S(0), s->XMM_S(0));\
681 d->XMM_S(1) = F(32, d->XMM_S(1), s->XMM_S(1));\ 573 d->XMM_S(1) = F(32, d->XMM_S(1), s->XMM_S(1));\
682 d->XMM_S(2) = F(32, d->XMM_S(2), s->XMM_S(2));\ 574 d->XMM_S(2) = F(32, d->XMM_S(2), s->XMM_S(2));\
683 d->XMM_S(3) = F(32, d->XMM_S(3), s->XMM_S(3));\ 575 d->XMM_S(3) = F(32, d->XMM_S(3), s->XMM_S(3));\
684 }\ 576 }\
685 \ 577 \
686 -void OPPROTO op_ ## name ## ss (void)\ 578 +void helper_ ## name ## ss (Reg *d, Reg *s)\
687 {\ 579 {\
688 - Reg *d, *s;\  
689 - d = (Reg *)((char *)env + PARAM1);\  
690 - s = (Reg *)((char *)env + PARAM2);\  
691 d->XMM_S(0) = F(32, d->XMM_S(0), s->XMM_S(0));\ 580 d->XMM_S(0) = F(32, d->XMM_S(0), s->XMM_S(0));\
692 }\ 581 }\
693 -void OPPROTO op_ ## name ## pd (void)\ 582 +void helper_ ## name ## pd (Reg *d, Reg *s)\
694 {\ 583 {\
695 - Reg *d, *s;\  
696 - d = (Reg *)((char *)env + PARAM1);\  
697 - s = (Reg *)((char *)env + PARAM2);\  
698 d->XMM_D(0) = F(64, d->XMM_D(0), s->XMM_D(0));\ 584 d->XMM_D(0) = F(64, d->XMM_D(0), s->XMM_D(0));\
699 d->XMM_D(1) = F(64, d->XMM_D(1), s->XMM_D(1));\ 585 d->XMM_D(1) = F(64, d->XMM_D(1), s->XMM_D(1));\
700 }\ 586 }\
701 \ 587 \
702 -void OPPROTO op_ ## name ## sd (void)\ 588 +void helper_ ## name ## sd (Reg *d, Reg *s)\
703 {\ 589 {\
704 - Reg *d, *s;\  
705 - d = (Reg *)((char *)env + PARAM1);\  
706 - s = (Reg *)((char *)env + PARAM2);\  
707 d->XMM_D(0) = F(64, d->XMM_D(0), s->XMM_D(0));\ 590 d->XMM_D(0) = F(64, d->XMM_D(0), s->XMM_D(0));\
708 } 591 }
709 592
@@ -715,69 +598,53 @@ void OPPROTO op_ ## name ## sd (void)\ @@ -715,69 +598,53 @@ void OPPROTO op_ ## name ## sd (void)\
715 #define FPU_MAX(size, a, b) (a) > (b) ? (a) : (b) 598 #define FPU_MAX(size, a, b) (a) > (b) ? (a) : (b)
716 #define FPU_SQRT(size, a, b) float ## size ## _sqrt(b, &env->sse_status) 599 #define FPU_SQRT(size, a, b) float ## size ## _sqrt(b, &env->sse_status)
717 600
718 -SSE_OP_S(add, FPU_ADD)  
719 -SSE_OP_S(sub, FPU_SUB)  
720 -SSE_OP_S(mul, FPU_MUL)  
721 -SSE_OP_S(div, FPU_DIV)  
722 -SSE_OP_S(min, FPU_MIN)  
723 -SSE_OP_S(max, FPU_MAX)  
724 -SSE_OP_S(sqrt, FPU_SQRT) 601 +SSE_HELPER_S(add, FPU_ADD)
  602 +SSE_HELPER_S(sub, FPU_SUB)
  603 +SSE_HELPER_S(mul, FPU_MUL)
  604 +SSE_HELPER_S(div, FPU_DIV)
  605 +SSE_HELPER_S(min, FPU_MIN)
  606 +SSE_HELPER_S(max, FPU_MAX)
  607 +SSE_HELPER_S(sqrt, FPU_SQRT)
725 608
726 609
727 /* float to float conversions */ 610 /* float to float conversions */
728 -void OPPROTO op_cvtps2pd(void) 611 +void helper_cvtps2pd(Reg *d, Reg *s)
729 { 612 {
730 float32 s0, s1; 613 float32 s0, s1;
731 - Reg *d, *s;  
732 - d = (Reg *)((char *)env + PARAM1);  
733 - s = (Reg *)((char *)env + PARAM2);  
734 s0 = s->XMM_S(0); 614 s0 = s->XMM_S(0);
735 s1 = s->XMM_S(1); 615 s1 = s->XMM_S(1);
736 d->XMM_D(0) = float32_to_float64(s0, &env->sse_status); 616 d->XMM_D(0) = float32_to_float64(s0, &env->sse_status);
737 d->XMM_D(1) = float32_to_float64(s1, &env->sse_status); 617 d->XMM_D(1) = float32_to_float64(s1, &env->sse_status);
738 } 618 }
739 619
740 -void OPPROTO op_cvtpd2ps(void) 620 +void helper_cvtpd2ps(Reg *d, Reg *s)
741 { 621 {
742 - Reg *d, *s;  
743 - d = (Reg *)((char *)env + PARAM1);  
744 - s = (Reg *)((char *)env + PARAM2);  
745 d->XMM_S(0) = float64_to_float32(s->XMM_D(0), &env->sse_status); 622 d->XMM_S(0) = float64_to_float32(s->XMM_D(0), &env->sse_status);
746 d->XMM_S(1) = float64_to_float32(s->XMM_D(1), &env->sse_status); 623 d->XMM_S(1) = float64_to_float32(s->XMM_D(1), &env->sse_status);
747 d->Q(1) = 0; 624 d->Q(1) = 0;
748 } 625 }
749 626
750 -void OPPROTO op_cvtss2sd(void) 627 +void helper_cvtss2sd(Reg *d, Reg *s)
751 { 628 {
752 - Reg *d, *s;  
753 - d = (Reg *)((char *)env + PARAM1);  
754 - s = (Reg *)((char *)env + PARAM2);  
755 d->XMM_D(0) = float32_to_float64(s->XMM_S(0), &env->sse_status); 629 d->XMM_D(0) = float32_to_float64(s->XMM_S(0), &env->sse_status);
756 } 630 }
757 631
758 -void OPPROTO op_cvtsd2ss(void) 632 +void helper_cvtsd2ss(Reg *d, Reg *s)
759 { 633 {
760 - Reg *d, *s;  
761 - d = (Reg *)((char *)env + PARAM1);  
762 - s = (Reg *)((char *)env + PARAM2);  
763 d->XMM_S(0) = float64_to_float32(s->XMM_D(0), &env->sse_status); 634 d->XMM_S(0) = float64_to_float32(s->XMM_D(0), &env->sse_status);
764 } 635 }
765 636
766 /* integer to float */ 637 /* integer to float */
767 -void OPPROTO op_cvtdq2ps(void) 638 +void helper_cvtdq2ps(Reg *d, Reg *s)
768 { 639 {
769 - XMMReg *d = (XMMReg *)((char *)env + PARAM1);  
770 - XMMReg *s = (XMMReg *)((char *)env + PARAM2);  
771 d->XMM_S(0) = int32_to_float32(s->XMM_L(0), &env->sse_status); 640 d->XMM_S(0) = int32_to_float32(s->XMM_L(0), &env->sse_status);
772 d->XMM_S(1) = int32_to_float32(s->XMM_L(1), &env->sse_status); 641 d->XMM_S(1) = int32_to_float32(s->XMM_L(1), &env->sse_status);
773 d->XMM_S(2) = int32_to_float32(s->XMM_L(2), &env->sse_status); 642 d->XMM_S(2) = int32_to_float32(s->XMM_L(2), &env->sse_status);
774 d->XMM_S(3) = int32_to_float32(s->XMM_L(3), &env->sse_status); 643 d->XMM_S(3) = int32_to_float32(s->XMM_L(3), &env->sse_status);
775 } 644 }
776 645
777 -void OPPROTO op_cvtdq2pd(void) 646 +void helper_cvtdq2pd(Reg *d, Reg *s)
778 { 647 {
779 - XMMReg *d = (XMMReg *)((char *)env + PARAM1);  
780 - XMMReg *s = (XMMReg *)((char *)env + PARAM2);  
781 int32_t l0, l1; 648 int32_t l0, l1;
782 l0 = (int32_t)s->XMM_L(0); 649 l0 = (int32_t)s->XMM_L(0);
783 l1 = (int32_t)s->XMM_L(1); 650 l1 = (int32_t)s->XMM_L(1);
@@ -785,210 +652,168 @@ void OPPROTO op_cvtdq2pd(void) @@ -785,210 +652,168 @@ void OPPROTO op_cvtdq2pd(void)
785 d->XMM_D(1) = int32_to_float64(l1, &env->sse_status); 652 d->XMM_D(1) = int32_to_float64(l1, &env->sse_status);
786 } 653 }
787 654
788 -void OPPROTO op_cvtpi2ps(void) 655 +void helper_cvtpi2ps(XMMReg *d, MMXReg *s)
789 { 656 {
790 - XMMReg *d = (Reg *)((char *)env + PARAM1);  
791 - MMXReg *s = (MMXReg *)((char *)env + PARAM2);  
792 d->XMM_S(0) = int32_to_float32(s->MMX_L(0), &env->sse_status); 657 d->XMM_S(0) = int32_to_float32(s->MMX_L(0), &env->sse_status);
793 d->XMM_S(1) = int32_to_float32(s->MMX_L(1), &env->sse_status); 658 d->XMM_S(1) = int32_to_float32(s->MMX_L(1), &env->sse_status);
794 } 659 }
795 660
796 -void OPPROTO op_cvtpi2pd(void) 661 +void helper_cvtpi2pd(XMMReg *d, MMXReg *s)
797 { 662 {
798 - XMMReg *d = (Reg *)((char *)env + PARAM1);  
799 - MMXReg *s = (MMXReg *)((char *)env + PARAM2);  
800 d->XMM_D(0) = int32_to_float64(s->MMX_L(0), &env->sse_status); 663 d->XMM_D(0) = int32_to_float64(s->MMX_L(0), &env->sse_status);
801 d->XMM_D(1) = int32_to_float64(s->MMX_L(1), &env->sse_status); 664 d->XMM_D(1) = int32_to_float64(s->MMX_L(1), &env->sse_status);
802 } 665 }
803 666
804 -void OPPROTO op_cvtsi2ss(void) 667 +void helper_cvtsi2ss(XMMReg *d, uint32_t val)
805 { 668 {
806 - XMMReg *d = (Reg *)((char *)env + PARAM1);  
807 - d->XMM_S(0) = int32_to_float32(T0, &env->sse_status); 669 + d->XMM_S(0) = int32_to_float32(val, &env->sse_status);
808 } 670 }
809 671
810 -void OPPROTO op_cvtsi2sd(void) 672 +void helper_cvtsi2sd(XMMReg *d, uint32_t val)
811 { 673 {
812 - XMMReg *d = (Reg *)((char *)env + PARAM1);  
813 - d->XMM_D(0) = int32_to_float64(T0, &env->sse_status); 674 + d->XMM_D(0) = int32_to_float64(val, &env->sse_status);
814 } 675 }
815 676
816 #ifdef TARGET_X86_64 677 #ifdef TARGET_X86_64
817 -void OPPROTO op_cvtsq2ss(void) 678 +void helper_cvtsq2ss(XMMReg *d, uint64_t val)
818 { 679 {
819 - XMMReg *d = (Reg *)((char *)env + PARAM1);  
820 - d->XMM_S(0) = int64_to_float32(T0, &env->sse_status); 680 + d->XMM_S(0) = int64_to_float32(val, &env->sse_status);
821 } 681 }
822 682
823 -void OPPROTO op_cvtsq2sd(void) 683 +void helper_cvtsq2sd(XMMReg *d, uint64_t val)
824 { 684 {
825 - XMMReg *d = (Reg *)((char *)env + PARAM1);  
826 - d->XMM_D(0) = int64_to_float64(T0, &env->sse_status); 685 + d->XMM_D(0) = int64_to_float64(val, &env->sse_status);
827 } 686 }
828 #endif 687 #endif
829 688
830 /* float to integer */ 689 /* float to integer */
831 -void OPPROTO op_cvtps2dq(void) 690 +void helper_cvtps2dq(XMMReg *d, XMMReg *s)
832 { 691 {
833 - XMMReg *d = (XMMReg *)((char *)env + PARAM1);  
834 - XMMReg *s = (XMMReg *)((char *)env + PARAM2);  
835 d->XMM_L(0) = float32_to_int32(s->XMM_S(0), &env->sse_status); 692 d->XMM_L(0) = float32_to_int32(s->XMM_S(0), &env->sse_status);
836 d->XMM_L(1) = float32_to_int32(s->XMM_S(1), &env->sse_status); 693 d->XMM_L(1) = float32_to_int32(s->XMM_S(1), &env->sse_status);
837 d->XMM_L(2) = float32_to_int32(s->XMM_S(2), &env->sse_status); 694 d->XMM_L(2) = float32_to_int32(s->XMM_S(2), &env->sse_status);
838 d->XMM_L(3) = float32_to_int32(s->XMM_S(3), &env->sse_status); 695 d->XMM_L(3) = float32_to_int32(s->XMM_S(3), &env->sse_status);
839 } 696 }
840 697
841 -void OPPROTO op_cvtpd2dq(void) 698 +void helper_cvtpd2dq(XMMReg *d, XMMReg *s)
842 { 699 {
843 - XMMReg *d = (XMMReg *)((char *)env + PARAM1);  
844 - XMMReg *s = (XMMReg *)((char *)env + PARAM2);  
845 d->XMM_L(0) = float64_to_int32(s->XMM_D(0), &env->sse_status); 700 d->XMM_L(0) = float64_to_int32(s->XMM_D(0), &env->sse_status);
846 d->XMM_L(1) = float64_to_int32(s->XMM_D(1), &env->sse_status); 701 d->XMM_L(1) = float64_to_int32(s->XMM_D(1), &env->sse_status);
847 d->XMM_Q(1) = 0; 702 d->XMM_Q(1) = 0;
848 } 703 }
849 704
850 -void OPPROTO op_cvtps2pi(void) 705 +void helper_cvtps2pi(MMXReg *d, XMMReg *s)
851 { 706 {
852 - MMXReg *d = (MMXReg *)((char *)env + PARAM1);  
853 - XMMReg *s = (XMMReg *)((char *)env + PARAM2);  
854 d->MMX_L(0) = float32_to_int32(s->XMM_S(0), &env->sse_status); 707 d->MMX_L(0) = float32_to_int32(s->XMM_S(0), &env->sse_status);
855 d->MMX_L(1) = float32_to_int32(s->XMM_S(1), &env->sse_status); 708 d->MMX_L(1) = float32_to_int32(s->XMM_S(1), &env->sse_status);
856 } 709 }
857 710
858 -void OPPROTO op_cvtpd2pi(void) 711 +void helper_cvtpd2pi(MMXReg *d, XMMReg *s)
859 { 712 {
860 - MMXReg *d = (MMXReg *)((char *)env + PARAM1);  
861 - XMMReg *s = (XMMReg *)((char *)env + PARAM2);  
862 d->MMX_L(0) = float64_to_int32(s->XMM_D(0), &env->sse_status); 713 d->MMX_L(0) = float64_to_int32(s->XMM_D(0), &env->sse_status);
863 d->MMX_L(1) = float64_to_int32(s->XMM_D(1), &env->sse_status); 714 d->MMX_L(1) = float64_to_int32(s->XMM_D(1), &env->sse_status);
864 } 715 }
865 716
866 -void OPPROTO op_cvtss2si(void) 717 +int32_t helper_cvtss2si(XMMReg *s)
867 { 718 {
868 - XMMReg *s = (XMMReg *)((char *)env + PARAM1);  
869 - T0 = float32_to_int32(s->XMM_S(0), &env->sse_status); 719 + return float32_to_int32(s->XMM_S(0), &env->sse_status);
870 } 720 }
871 721
872 -void OPPROTO op_cvtsd2si(void) 722 +int32_t helper_cvtsd2si(XMMReg *s)
873 { 723 {
874 - XMMReg *s = (XMMReg *)((char *)env + PARAM1);  
875 - T0 = float64_to_int32(s->XMM_D(0), &env->sse_status); 724 + return float64_to_int32(s->XMM_D(0), &env->sse_status);
876 } 725 }
877 726
878 #ifdef TARGET_X86_64 727 #ifdef TARGET_X86_64
879 -void OPPROTO op_cvtss2sq(void) 728 +int64_t helper_cvtss2sq(XMMReg *s)
880 { 729 {
881 - XMMReg *s = (XMMReg *)((char *)env + PARAM1);  
882 - T0 = float32_to_int64(s->XMM_S(0), &env->sse_status); 730 + return float32_to_int64(s->XMM_S(0), &env->sse_status);
883 } 731 }
884 732
885 -void OPPROTO op_cvtsd2sq(void) 733 +int64_t helper_cvtsd2sq(XMMReg *s)
886 { 734 {
887 - XMMReg *s = (XMMReg *)((char *)env + PARAM1);  
888 - T0 = float64_to_int64(s->XMM_D(0), &env->sse_status); 735 + return float64_to_int64(s->XMM_D(0), &env->sse_status);
889 } 736 }
890 #endif 737 #endif
891 738
892 /* float to integer truncated */ 739 /* float to integer truncated */
893 -void OPPROTO op_cvttps2dq(void) 740 +void helper_cvttps2dq(XMMReg *d, XMMReg *s)
894 { 741 {
895 - XMMReg *d = (XMMReg *)((char *)env + PARAM1);  
896 - XMMReg *s = (XMMReg *)((char *)env + PARAM2);  
897 d->XMM_L(0) = float32_to_int32_round_to_zero(s->XMM_S(0), &env->sse_status); 742 d->XMM_L(0) = float32_to_int32_round_to_zero(s->XMM_S(0), &env->sse_status);
898 d->XMM_L(1) = float32_to_int32_round_to_zero(s->XMM_S(1), &env->sse_status); 743 d->XMM_L(1) = float32_to_int32_round_to_zero(s->XMM_S(1), &env->sse_status);
899 d->XMM_L(2) = float32_to_int32_round_to_zero(s->XMM_S(2), &env->sse_status); 744 d->XMM_L(2) = float32_to_int32_round_to_zero(s->XMM_S(2), &env->sse_status);
900 d->XMM_L(3) = float32_to_int32_round_to_zero(s->XMM_S(3), &env->sse_status); 745 d->XMM_L(3) = float32_to_int32_round_to_zero(s->XMM_S(3), &env->sse_status);
901 } 746 }
902 747
903 -void OPPROTO op_cvttpd2dq(void) 748 +void helper_cvttpd2dq(XMMReg *d, XMMReg *s)
904 { 749 {
905 - XMMReg *d = (XMMReg *)((char *)env + PARAM1);  
906 - XMMReg *s = (XMMReg *)((char *)env + PARAM2);  
907 d->XMM_L(0) = float64_to_int32_round_to_zero(s->XMM_D(0), &env->sse_status); 750 d->XMM_L(0) = float64_to_int32_round_to_zero(s->XMM_D(0), &env->sse_status);
908 d->XMM_L(1) = float64_to_int32_round_to_zero(s->XMM_D(1), &env->sse_status); 751 d->XMM_L(1) = float64_to_int32_round_to_zero(s->XMM_D(1), &env->sse_status);
909 d->XMM_Q(1) = 0; 752 d->XMM_Q(1) = 0;
910 } 753 }
911 754
912 -void OPPROTO op_cvttps2pi(void) 755 +void helper_cvttps2pi(MMXReg *d, XMMReg *s)
913 { 756 {
914 - MMXReg *d = (MMXReg *)((char *)env + PARAM1);  
915 - XMMReg *s = (XMMReg *)((char *)env + PARAM2);  
916 d->MMX_L(0) = float32_to_int32_round_to_zero(s->XMM_S(0), &env->sse_status); 757 d->MMX_L(0) = float32_to_int32_round_to_zero(s->XMM_S(0), &env->sse_status);
917 d->MMX_L(1) = float32_to_int32_round_to_zero(s->XMM_S(1), &env->sse_status); 758 d->MMX_L(1) = float32_to_int32_round_to_zero(s->XMM_S(1), &env->sse_status);
918 } 759 }
919 760
920 -void OPPROTO op_cvttpd2pi(void) 761 +void helper_cvttpd2pi(MMXReg *d, XMMReg *s)
921 { 762 {
922 - MMXReg *d = (MMXReg *)((char *)env + PARAM1);  
923 - XMMReg *s = (XMMReg *)((char *)env + PARAM2);  
924 d->MMX_L(0) = float64_to_int32_round_to_zero(s->XMM_D(0), &env->sse_status); 763 d->MMX_L(0) = float64_to_int32_round_to_zero(s->XMM_D(0), &env->sse_status);
925 d->MMX_L(1) = float64_to_int32_round_to_zero(s->XMM_D(1), &env->sse_status); 764 d->MMX_L(1) = float64_to_int32_round_to_zero(s->XMM_D(1), &env->sse_status);
926 } 765 }
927 766
928 -void OPPROTO op_cvttss2si(void) 767 +int32_t helper_cvttss2si(XMMReg *s)
929 { 768 {
930 - XMMReg *s = (XMMReg *)((char *)env + PARAM1);  
931 - T0 = float32_to_int32_round_to_zero(s->XMM_S(0), &env->sse_status); 769 + return float32_to_int32_round_to_zero(s->XMM_S(0), &env->sse_status);
932 } 770 }
933 771
934 -void OPPROTO op_cvttsd2si(void) 772 +int32_t helper_cvttsd2si(XMMReg *s)
935 { 773 {
936 - XMMReg *s = (XMMReg *)((char *)env + PARAM1);  
937 - T0 = float64_to_int32_round_to_zero(s->XMM_D(0), &env->sse_status); 774 + return float64_to_int32_round_to_zero(s->XMM_D(0), &env->sse_status);
938 } 775 }
939 776
940 #ifdef TARGET_X86_64 777 #ifdef TARGET_X86_64
941 -void OPPROTO op_cvttss2sq(void) 778 +int64_t helper_cvttss2sq(XMMReg *s)
942 { 779 {
943 - XMMReg *s = (XMMReg *)((char *)env + PARAM1);  
944 - T0 = float32_to_int64_round_to_zero(s->XMM_S(0), &env->sse_status); 780 + return float32_to_int64_round_to_zero(s->XMM_S(0), &env->sse_status);
945 } 781 }
946 782
947 -void OPPROTO op_cvttsd2sq(void) 783 +int64_t helper_cvttsd2sq(XMMReg *s)
948 { 784 {
949 - XMMReg *s = (XMMReg *)((char *)env + PARAM1);  
950 - T0 = float64_to_int64_round_to_zero(s->XMM_D(0), &env->sse_status); 785 + return float64_to_int64_round_to_zero(s->XMM_D(0), &env->sse_status);
951 } 786 }
952 #endif 787 #endif
953 788
954 -void OPPROTO op_rsqrtps(void) 789 +void helper_rsqrtps(XMMReg *d, XMMReg *s)
955 { 790 {
956 - XMMReg *d = (XMMReg *)((char *)env + PARAM1);  
957 - XMMReg *s = (XMMReg *)((char *)env + PARAM2);  
958 d->XMM_S(0) = approx_rsqrt(s->XMM_S(0)); 791 d->XMM_S(0) = approx_rsqrt(s->XMM_S(0));
959 d->XMM_S(1) = approx_rsqrt(s->XMM_S(1)); 792 d->XMM_S(1) = approx_rsqrt(s->XMM_S(1));
960 d->XMM_S(2) = approx_rsqrt(s->XMM_S(2)); 793 d->XMM_S(2) = approx_rsqrt(s->XMM_S(2));
961 d->XMM_S(3) = approx_rsqrt(s->XMM_S(3)); 794 d->XMM_S(3) = approx_rsqrt(s->XMM_S(3));
962 } 795 }
963 796
964 -void OPPROTO op_rsqrtss(void) 797 +void helper_rsqrtss(XMMReg *d, XMMReg *s)
965 { 798 {
966 - XMMReg *d = (XMMReg *)((char *)env + PARAM1);  
967 - XMMReg *s = (XMMReg *)((char *)env + PARAM2);  
968 d->XMM_S(0) = approx_rsqrt(s->XMM_S(0)); 799 d->XMM_S(0) = approx_rsqrt(s->XMM_S(0));
969 } 800 }
970 801
971 -void OPPROTO op_rcpps(void) 802 +void helper_rcpps(XMMReg *d, XMMReg *s)
972 { 803 {
973 - XMMReg *d = (XMMReg *)((char *)env + PARAM1);  
974 - XMMReg *s = (XMMReg *)((char *)env + PARAM2);  
975 d->XMM_S(0) = approx_rcp(s->XMM_S(0)); 804 d->XMM_S(0) = approx_rcp(s->XMM_S(0));
976 d->XMM_S(1) = approx_rcp(s->XMM_S(1)); 805 d->XMM_S(1) = approx_rcp(s->XMM_S(1));
977 d->XMM_S(2) = approx_rcp(s->XMM_S(2)); 806 d->XMM_S(2) = approx_rcp(s->XMM_S(2));
978 d->XMM_S(3) = approx_rcp(s->XMM_S(3)); 807 d->XMM_S(3) = approx_rcp(s->XMM_S(3));
979 } 808 }
980 809
981 -void OPPROTO op_rcpss(void) 810 +void helper_rcpss(XMMReg *d, XMMReg *s)
982 { 811 {
983 - XMMReg *d = (XMMReg *)((char *)env + PARAM1);  
984 - XMMReg *s = (XMMReg *)((char *)env + PARAM2);  
985 d->XMM_S(0) = approx_rcp(s->XMM_S(0)); 812 d->XMM_S(0) = approx_rcp(s->XMM_S(0));
986 } 813 }
987 814
988 -void OPPROTO op_haddps(void) 815 +void helper_haddps(XMMReg *d, XMMReg *s)
989 { 816 {
990 - XMMReg *d = (XMMReg *)((char *)env + PARAM1);  
991 - XMMReg *s = (XMMReg *)((char *)env + PARAM2);  
992 XMMReg r; 817 XMMReg r;
993 r.XMM_S(0) = d->XMM_S(0) + d->XMM_S(1); 818 r.XMM_S(0) = d->XMM_S(0) + d->XMM_S(1);
994 r.XMM_S(1) = d->XMM_S(2) + d->XMM_S(3); 819 r.XMM_S(1) = d->XMM_S(2) + d->XMM_S(3);
@@ -997,20 +822,16 @@ void OPPROTO op_haddps(void) @@ -997,20 +822,16 @@ void OPPROTO op_haddps(void)
997 *d = r; 822 *d = r;
998 } 823 }
999 824
1000 -void OPPROTO op_haddpd(void) 825 +void helper_haddpd(XMMReg *d, XMMReg *s)
1001 { 826 {
1002 - XMMReg *d = (XMMReg *)((char *)env + PARAM1);  
1003 - XMMReg *s = (XMMReg *)((char *)env + PARAM2);  
1004 XMMReg r; 827 XMMReg r;
1005 r.XMM_D(0) = d->XMM_D(0) + d->XMM_D(1); 828 r.XMM_D(0) = d->XMM_D(0) + d->XMM_D(1);
1006 r.XMM_D(1) = s->XMM_D(0) + s->XMM_D(1); 829 r.XMM_D(1) = s->XMM_D(0) + s->XMM_D(1);
1007 *d = r; 830 *d = r;
1008 } 831 }
1009 832
1010 -void OPPROTO op_hsubps(void) 833 +void helper_hsubps(XMMReg *d, XMMReg *s)
1011 { 834 {
1012 - XMMReg *d = (XMMReg *)((char *)env + PARAM1);  
1013 - XMMReg *s = (XMMReg *)((char *)env + PARAM2);  
1014 XMMReg r; 835 XMMReg r;
1015 r.XMM_S(0) = d->XMM_S(0) - d->XMM_S(1); 836 r.XMM_S(0) = d->XMM_S(0) - d->XMM_S(1);
1016 r.XMM_S(1) = d->XMM_S(2) - d->XMM_S(3); 837 r.XMM_S(1) = d->XMM_S(2) - d->XMM_S(3);
@@ -1019,68 +840,50 @@ void OPPROTO op_hsubps(void) @@ -1019,68 +840,50 @@ void OPPROTO op_hsubps(void)
1019 *d = r; 840 *d = r;
1020 } 841 }
1021 842
1022 -void OPPROTO op_hsubpd(void) 843 +void helper_hsubpd(XMMReg *d, XMMReg *s)
1023 { 844 {
1024 - XMMReg *d = (XMMReg *)((char *)env + PARAM1);  
1025 - XMMReg *s = (XMMReg *)((char *)env + PARAM2);  
1026 XMMReg r; 845 XMMReg r;
1027 r.XMM_D(0) = d->XMM_D(0) - d->XMM_D(1); 846 r.XMM_D(0) = d->XMM_D(0) - d->XMM_D(1);
1028 r.XMM_D(1) = s->XMM_D(0) - s->XMM_D(1); 847 r.XMM_D(1) = s->XMM_D(0) - s->XMM_D(1);
1029 *d = r; 848 *d = r;
1030 } 849 }
1031 850
1032 -void OPPROTO op_addsubps(void) 851 +void helper_addsubps(XMMReg *d, XMMReg *s)
1033 { 852 {
1034 - XMMReg *d = (XMMReg *)((char *)env + PARAM1);  
1035 - XMMReg *s = (XMMReg *)((char *)env + PARAM2);  
1036 d->XMM_S(0) = d->XMM_S(0) - s->XMM_S(0); 853 d->XMM_S(0) = d->XMM_S(0) - s->XMM_S(0);
1037 d->XMM_S(1) = d->XMM_S(1) + s->XMM_S(1); 854 d->XMM_S(1) = d->XMM_S(1) + s->XMM_S(1);
1038 d->XMM_S(2) = d->XMM_S(2) - s->XMM_S(2); 855 d->XMM_S(2) = d->XMM_S(2) - s->XMM_S(2);
1039 d->XMM_S(3) = d->XMM_S(3) + s->XMM_S(3); 856 d->XMM_S(3) = d->XMM_S(3) + s->XMM_S(3);
1040 } 857 }
1041 858
1042 -void OPPROTO op_addsubpd(void) 859 +void helper_addsubpd(XMMReg *d, XMMReg *s)
1043 { 860 {
1044 - XMMReg *d = (XMMReg *)((char *)env + PARAM1);  
1045 - XMMReg *s = (XMMReg *)((char *)env + PARAM2);  
1046 d->XMM_D(0) = d->XMM_D(0) - s->XMM_D(0); 861 d->XMM_D(0) = d->XMM_D(0) - s->XMM_D(0);
1047 d->XMM_D(1) = d->XMM_D(1) + s->XMM_D(1); 862 d->XMM_D(1) = d->XMM_D(1) + s->XMM_D(1);
1048 } 863 }
1049 864
1050 /* XXX: unordered */ 865 /* XXX: unordered */
1051 -#define SSE_OP_CMP(name, F)\  
1052 -void OPPROTO op_ ## name ## ps (void)\ 866 +#define SSE_HELPER_CMP(name, F)\
  867 +void helper_ ## name ## ps (Reg *d, Reg *s)\
1053 {\ 868 {\
1054 - Reg *d, *s;\  
1055 - d = (Reg *)((char *)env + PARAM1);\  
1056 - s = (Reg *)((char *)env + PARAM2);\  
1057 d->XMM_L(0) = F(32, d->XMM_S(0), s->XMM_S(0));\ 869 d->XMM_L(0) = F(32, d->XMM_S(0), s->XMM_S(0));\
1058 d->XMM_L(1) = F(32, d->XMM_S(1), s->XMM_S(1));\ 870 d->XMM_L(1) = F(32, d->XMM_S(1), s->XMM_S(1));\
1059 d->XMM_L(2) = F(32, d->XMM_S(2), s->XMM_S(2));\ 871 d->XMM_L(2) = F(32, d->XMM_S(2), s->XMM_S(2));\
1060 d->XMM_L(3) = F(32, d->XMM_S(3), s->XMM_S(3));\ 872 d->XMM_L(3) = F(32, d->XMM_S(3), s->XMM_S(3));\
1061 }\ 873 }\
1062 \ 874 \
1063 -void OPPROTO op_ ## name ## ss (void)\ 875 +void helper_ ## name ## ss (Reg *d, Reg *s)\
1064 {\ 876 {\
1065 - Reg *d, *s;\  
1066 - d = (Reg *)((char *)env + PARAM1);\  
1067 - s = (Reg *)((char *)env + PARAM2);\  
1068 d->XMM_L(0) = F(32, d->XMM_S(0), s->XMM_S(0));\ 877 d->XMM_L(0) = F(32, d->XMM_S(0), s->XMM_S(0));\
1069 }\ 878 }\
1070 -void OPPROTO op_ ## name ## pd (void)\ 879 +void helper_ ## name ## pd (Reg *d, Reg *s)\
1071 {\ 880 {\
1072 - Reg *d, *s;\  
1073 - d = (Reg *)((char *)env + PARAM1);\  
1074 - s = (Reg *)((char *)env + PARAM2);\  
1075 d->XMM_Q(0) = F(64, d->XMM_D(0), s->XMM_D(0));\ 881 d->XMM_Q(0) = F(64, d->XMM_D(0), s->XMM_D(0));\
1076 d->XMM_Q(1) = F(64, d->XMM_D(1), s->XMM_D(1));\ 882 d->XMM_Q(1) = F(64, d->XMM_D(1), s->XMM_D(1));\
1077 }\ 883 }\
1078 \ 884 \
1079 -void OPPROTO op_ ## name ## sd (void)\ 885 +void helper_ ## name ## sd (Reg *d, Reg *s)\
1080 {\ 886 {\
1081 - Reg *d, *s;\  
1082 - d = (Reg *)((char *)env + PARAM1);\  
1083 - s = (Reg *)((char *)env + PARAM2);\  
1084 d->XMM_Q(0) = F(64, d->XMM_D(0), s->XMM_D(0));\ 887 d->XMM_Q(0) = F(64, d->XMM_D(0), s->XMM_D(0));\
1085 } 888 }
1086 889
@@ -1093,24 +896,21 @@ void OPPROTO op_ ## name ## sd (void)\ @@ -1093,24 +896,21 @@ void OPPROTO op_ ## name ## sd (void)\
1093 #define FPU_CMPNLE(size, a, b) float ## size ## _le(a, b, &env->sse_status) ? 0 : -1 896 #define FPU_CMPNLE(size, a, b) float ## size ## _le(a, b, &env->sse_status) ? 0 : -1
1094 #define FPU_CMPORD(size, a, b) float ## size ## _unordered(a, b, &env->sse_status) ? 0 : -1 897 #define FPU_CMPORD(size, a, b) float ## size ## _unordered(a, b, &env->sse_status) ? 0 : -1
1095 898
1096 -SSE_OP_CMP(cmpeq, FPU_CMPEQ)  
1097 -SSE_OP_CMP(cmplt, FPU_CMPLT)  
1098 -SSE_OP_CMP(cmple, FPU_CMPLE)  
1099 -SSE_OP_CMP(cmpunord, FPU_CMPUNORD)  
1100 -SSE_OP_CMP(cmpneq, FPU_CMPNEQ)  
1101 -SSE_OP_CMP(cmpnlt, FPU_CMPNLT)  
1102 -SSE_OP_CMP(cmpnle, FPU_CMPNLE)  
1103 -SSE_OP_CMP(cmpord, FPU_CMPORD) 899 +SSE_HELPER_CMP(cmpeq, FPU_CMPEQ)
  900 +SSE_HELPER_CMP(cmplt, FPU_CMPLT)
  901 +SSE_HELPER_CMP(cmple, FPU_CMPLE)
  902 +SSE_HELPER_CMP(cmpunord, FPU_CMPUNORD)
  903 +SSE_HELPER_CMP(cmpneq, FPU_CMPNEQ)
  904 +SSE_HELPER_CMP(cmpnlt, FPU_CMPNLT)
  905 +SSE_HELPER_CMP(cmpnle, FPU_CMPNLE)
  906 +SSE_HELPER_CMP(cmpord, FPU_CMPORD)
1104 907
1105 const int comis_eflags[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C}; 908 const int comis_eflags[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
1106 909
1107 -void OPPROTO op_ucomiss(void) 910 +void helper_ucomiss(Reg *d, Reg *s)
1108 { 911 {
1109 int ret; 912 int ret;
1110 float32 s0, s1; 913 float32 s0, s1;
1111 - Reg *d, *s;  
1112 - d = (Reg *)((char *)env + PARAM1);  
1113 - s = (Reg *)((char *)env + PARAM2);  
1114 914
1115 s0 = d->XMM_S(0); 915 s0 = d->XMM_S(0);
1116 s1 = s->XMM_S(0); 916 s1 = s->XMM_S(0);
@@ -1119,13 +919,10 @@ void OPPROTO op_ucomiss(void) @@ -1119,13 +919,10 @@ void OPPROTO op_ucomiss(void)
1119 FORCE_RET(); 919 FORCE_RET();
1120 } 920 }
1121 921
1122 -void OPPROTO op_comiss(void) 922 +void helper_comiss(Reg *d, Reg *s)
1123 { 923 {
1124 int ret; 924 int ret;
1125 float32 s0, s1; 925 float32 s0, s1;
1126 - Reg *d, *s;  
1127 - d = (Reg *)((char *)env + PARAM1);  
1128 - s = (Reg *)((char *)env + PARAM2);  
1129 926
1130 s0 = d->XMM_S(0); 927 s0 = d->XMM_S(0);
1131 s1 = s->XMM_S(0); 928 s1 = s->XMM_S(0);
@@ -1134,13 +931,10 @@ void OPPROTO op_comiss(void) @@ -1134,13 +931,10 @@ void OPPROTO op_comiss(void)
1134 FORCE_RET(); 931 FORCE_RET();
1135 } 932 }
1136 933
1137 -void OPPROTO op_ucomisd(void) 934 +void helper_ucomisd(Reg *d, Reg *s)
1138 { 935 {
1139 int ret; 936 int ret;
1140 float64 d0, d1; 937 float64 d0, d1;
1141 - Reg *d, *s;  
1142 - d = (Reg *)((char *)env + PARAM1);  
1143 - s = (Reg *)((char *)env + PARAM2);  
1144 938
1145 d0 = d->XMM_D(0); 939 d0 = d->XMM_D(0);
1146 d1 = s->XMM_D(0); 940 d1 = s->XMM_D(0);
@@ -1149,13 +943,10 @@ void OPPROTO op_ucomisd(void) @@ -1149,13 +943,10 @@ void OPPROTO op_ucomisd(void)
1149 FORCE_RET(); 943 FORCE_RET();
1150 } 944 }
1151 945
1152 -void OPPROTO op_comisd(void) 946 +void helper_comisd(Reg *d, Reg *s)
1153 { 947 {
1154 int ret; 948 int ret;
1155 float64 d0, d1; 949 float64 d0, d1;
1156 - Reg *d, *s;  
1157 - d = (Reg *)((char *)env + PARAM1);  
1158 - s = (Reg *)((char *)env + PARAM2);  
1159 950
1160 d0 = d->XMM_D(0); 951 d0 = d->XMM_D(0);
1161 d1 = s->XMM_D(0); 952 d1 = s->XMM_D(0);
@@ -1164,76 +955,54 @@ void OPPROTO op_comisd(void) @@ -1164,76 +955,54 @@ void OPPROTO op_comisd(void)
1164 FORCE_RET(); 955 FORCE_RET();
1165 } 956 }
1166 957
1167 -void OPPROTO op_movmskps(void) 958 +uint32_t helper_movmskps(Reg *s)
1168 { 959 {
1169 int b0, b1, b2, b3; 960 int b0, b1, b2, b3;
1170 - Reg *s;  
1171 - s = (Reg *)((char *)env + PARAM1);  
1172 b0 = s->XMM_L(0) >> 31; 961 b0 = s->XMM_L(0) >> 31;
1173 b1 = s->XMM_L(1) >> 31; 962 b1 = s->XMM_L(1) >> 31;
1174 b2 = s->XMM_L(2) >> 31; 963 b2 = s->XMM_L(2) >> 31;
1175 b3 = s->XMM_L(3) >> 31; 964 b3 = s->XMM_L(3) >> 31;
1176 - T0 = b0 | (b1 << 1) | (b2 << 2) | (b3 << 3); 965 + return b0 | (b1 << 1) | (b2 << 2) | (b3 << 3);
1177 } 966 }
1178 967
1179 -void OPPROTO op_movmskpd(void) 968 +uint32_t helper_movmskpd(Reg *s)
1180 { 969 {
1181 int b0, b1; 970 int b0, b1;
1182 - Reg *s;  
1183 - s = (Reg *)((char *)env + PARAM1);  
1184 b0 = s->XMM_L(1) >> 31; 971 b0 = s->XMM_L(1) >> 31;
1185 b1 = s->XMM_L(3) >> 31; 972 b1 = s->XMM_L(3) >> 31;
1186 - T0 = b0 | (b1 << 1); 973 + return b0 | (b1 << 1);
1187 } 974 }
1188 975
1189 #endif 976 #endif
1190 977
1191 -void OPPROTO glue(op_pmovmskb, SUFFIX)(void)  
1192 -{  
1193 - Reg *s;  
1194 - s = (Reg *)((char *)env + PARAM1);  
1195 - T0 = 0;  
1196 - T0 |= (s->XMM_B(0) >> 7);  
1197 - T0 |= (s->XMM_B(1) >> 6) & 0x02;  
1198 - T0 |= (s->XMM_B(2) >> 5) & 0x04;  
1199 - T0 |= (s->XMM_B(3) >> 4) & 0x08;  
1200 - T0 |= (s->XMM_B(4) >> 3) & 0x10;  
1201 - T0 |= (s->XMM_B(5) >> 2) & 0x20;  
1202 - T0 |= (s->XMM_B(6) >> 1) & 0x40;  
1203 - T0 |= (s->XMM_B(7)) & 0x80; 978 +uint32_t glue(helper_pmovmskb, SUFFIX)(Reg *s)
  979 +{
  980 + uint32_t val;
  981 + val = 0;
  982 + val |= (s->XMM_B(0) >> 7);
  983 + val |= (s->XMM_B(1) >> 6) & 0x02;
  984 + val |= (s->XMM_B(2) >> 5) & 0x04;
  985 + val |= (s->XMM_B(3) >> 4) & 0x08;
  986 + val |= (s->XMM_B(4) >> 3) & 0x10;
  987 + val |= (s->XMM_B(5) >> 2) & 0x20;
  988 + val |= (s->XMM_B(6) >> 1) & 0x40;
  989 + val |= (s->XMM_B(7)) & 0x80;
1204 #if SHIFT == 1 990 #if SHIFT == 1
1205 - T0 |= (s->XMM_B(8) << 1) & 0x0100;  
1206 - T0 |= (s->XMM_B(9) << 2) & 0x0200;  
1207 - T0 |= (s->XMM_B(10) << 3) & 0x0400;  
1208 - T0 |= (s->XMM_B(11) << 4) & 0x0800;  
1209 - T0 |= (s->XMM_B(12) << 5) & 0x1000;  
1210 - T0 |= (s->XMM_B(13) << 6) & 0x2000;  
1211 - T0 |= (s->XMM_B(14) << 7) & 0x4000;  
1212 - T0 |= (s->XMM_B(15) << 8) & 0x8000; 991 + val |= (s->XMM_B(8) << 1) & 0x0100;
  992 + val |= (s->XMM_B(9) << 2) & 0x0200;
  993 + val |= (s->XMM_B(10) << 3) & 0x0400;
  994 + val |= (s->XMM_B(11) << 4) & 0x0800;
  995 + val |= (s->XMM_B(12) << 5) & 0x1000;
  996 + val |= (s->XMM_B(13) << 6) & 0x2000;
  997 + val |= (s->XMM_B(14) << 7) & 0x4000;
  998 + val |= (s->XMM_B(15) << 8) & 0x8000;
1213 #endif 999 #endif
  1000 + return val;
1214 } 1001 }
1215 1002
1216 -void OPPROTO glue(op_pinsrw, SUFFIX) (void)  
1217 -{  
1218 - Reg *d = (Reg *)((char *)env + PARAM1);  
1219 - int pos = PARAM2;  
1220 -  
1221 - d->W(pos) = T0;  
1222 -}  
1223 -  
1224 -void OPPROTO glue(op_pextrw, SUFFIX) (void)  
1225 -{  
1226 - Reg *s = (Reg *)((char *)env + PARAM1);  
1227 - int pos = PARAM2;  
1228 -  
1229 - T0 = s->W(pos);  
1230 -}  
1231 -  
1232 -void OPPROTO glue(op_packsswb, SUFFIX) (void) 1003 +void glue(helper_packsswb, SUFFIX) (Reg *d, Reg *s)
1233 { 1004 {
1234 - Reg r, *d, *s;  
1235 - d = (Reg *)((char *)env + PARAM1);  
1236 - s = (Reg *)((char *)env + PARAM2); 1005 + Reg r;
1237 1006
1238 r.B(0) = satsb((int16_t)d->W(0)); 1007 r.B(0) = satsb((int16_t)d->W(0));
1239 r.B(1) = satsb((int16_t)d->W(1)); 1008 r.B(1) = satsb((int16_t)d->W(1));
@@ -1258,11 +1027,9 @@ void OPPROTO glue(op_packsswb, SUFFIX) (void) @@ -1258,11 +1027,9 @@ void OPPROTO glue(op_packsswb, SUFFIX) (void)
1258 *d = r; 1027 *d = r;
1259 } 1028 }
1260 1029
1261 -void OPPROTO glue(op_packuswb, SUFFIX) (void) 1030 +void glue(helper_packuswb, SUFFIX) (Reg *d, Reg *s)
1262 { 1031 {
1263 - Reg r, *d, *s;  
1264 - d = (Reg *)((char *)env + PARAM1);  
1265 - s = (Reg *)((char *)env + PARAM2); 1032 + Reg r;
1266 1033
1267 r.B(0) = satub((int16_t)d->W(0)); 1034 r.B(0) = satub((int16_t)d->W(0));
1268 r.B(1) = satub((int16_t)d->W(1)); 1035 r.B(1) = satub((int16_t)d->W(1));
@@ -1287,11 +1054,9 @@ void OPPROTO glue(op_packuswb, SUFFIX) (void) @@ -1287,11 +1054,9 @@ void OPPROTO glue(op_packuswb, SUFFIX) (void)
1287 *d = r; 1054 *d = r;
1288 } 1055 }
1289 1056
1290 -void OPPROTO glue(op_packssdw, SUFFIX) (void) 1057 +void glue(helper_packssdw, SUFFIX) (Reg *d, Reg *s)
1291 { 1058 {
1292 - Reg r, *d, *s;  
1293 - d = (Reg *)((char *)env + PARAM1);  
1294 - s = (Reg *)((char *)env + PARAM2); 1059 + Reg r;
1295 1060
1296 r.W(0) = satsw(d->L(0)); 1061 r.W(0) = satsw(d->L(0));
1297 r.W(1) = satsw(d->L(1)); 1062 r.W(1) = satsw(d->L(1));
@@ -1310,11 +1075,9 @@ void OPPROTO glue(op_packssdw, SUFFIX) (void) @@ -1310,11 +1075,9 @@ void OPPROTO glue(op_packssdw, SUFFIX) (void)
1310 1075
1311 #define UNPCK_OP(base_name, base) \ 1076 #define UNPCK_OP(base_name, base) \
1312 \ 1077 \
1313 -void OPPROTO glue(op_punpck ## base_name ## bw, SUFFIX) (void) \ 1078 +void glue(helper_punpck ## base_name ## bw, SUFFIX) (Reg *d, Reg *s) \
1314 { \ 1079 { \
1315 - Reg r, *d, *s; \  
1316 - d = (Reg *)((char *)env + PARAM1); \  
1317 - s = (Reg *)((char *)env + PARAM2); \ 1080 + Reg r; \
1318 \ 1081 \
1319 r.B(0) = d->B((base << (SHIFT + 2)) + 0); \ 1082 r.B(0) = d->B((base << (SHIFT + 2)) + 0); \
1320 r.B(1) = s->B((base << (SHIFT + 2)) + 0); \ 1083 r.B(1) = s->B((base << (SHIFT + 2)) + 0); \
@@ -1337,11 +1100,9 @@ XMM_ONLY( \ @@ -1337,11 +1100,9 @@ XMM_ONLY( \
1337 *d = r; \ 1100 *d = r; \
1338 } \ 1101 } \
1339 \ 1102 \
1340 -void OPPROTO glue(op_punpck ## base_name ## wd, SUFFIX) (void) \ 1103 +void glue(helper_punpck ## base_name ## wd, SUFFIX) (Reg *d, Reg *s) \
1341 { \ 1104 { \
1342 - Reg r, *d, *s; \  
1343 - d = (Reg *)((char *)env + PARAM1); \  
1344 - s = (Reg *)((char *)env + PARAM2); \ 1105 + Reg r; \
1345 \ 1106 \
1346 r.W(0) = d->W((base << (SHIFT + 1)) + 0); \ 1107 r.W(0) = d->W((base << (SHIFT + 1)) + 0); \
1347 r.W(1) = s->W((base << (SHIFT + 1)) + 0); \ 1108 r.W(1) = s->W((base << (SHIFT + 1)) + 0); \
@@ -1356,11 +1117,9 @@ XMM_ONLY( \ @@ -1356,11 +1117,9 @@ XMM_ONLY( \
1356 *d = r; \ 1117 *d = r; \
1357 } \ 1118 } \
1358 \ 1119 \
1359 -void OPPROTO glue(op_punpck ## base_name ## dq, SUFFIX) (void) \ 1120 +void glue(helper_punpck ## base_name ## dq, SUFFIX) (Reg *d, Reg *s) \
1360 { \ 1121 { \
1361 - Reg r, *d, *s; \  
1362 - d = (Reg *)((char *)env + PARAM1); \  
1363 - s = (Reg *)((char *)env + PARAM2); \ 1122 + Reg r; \
1364 \ 1123 \
1365 r.L(0) = d->L((base << SHIFT) + 0); \ 1124 r.L(0) = d->L((base << SHIFT) + 0); \
1366 r.L(1) = s->L((base << SHIFT) + 0); \ 1125 r.L(1) = s->L((base << SHIFT) + 0); \
@@ -1372,11 +1131,9 @@ XMM_ONLY( \ @@ -1372,11 +1131,9 @@ XMM_ONLY( \
1372 } \ 1131 } \
1373 \ 1132 \
1374 XMM_ONLY( \ 1133 XMM_ONLY( \
1375 -void OPPROTO glue(op_punpck ## base_name ## qdq, SUFFIX) (void) \ 1134 +void glue(helper_punpck ## base_name ## qdq, SUFFIX) (Reg *d, Reg *s) \
1376 { \ 1135 { \
1377 - Reg r, *d, *s; \  
1378 - d = (Reg *)((char *)env + PARAM1); \  
1379 - s = (Reg *)((char *)env + PARAM2); \ 1136 + Reg r; \
1380 \ 1137 \
1381 r.Q(0) = d->Q(base); \ 1138 r.Q(0) = d->Q(base); \
1382 r.Q(1) = s->Q(base); \ 1139 r.Q(1) = s->Q(base); \
@@ -1389,166 +1146,128 @@ UNPCK_OP(h, 1) @@ -1389,166 +1146,128 @@ UNPCK_OP(h, 1)
1389 1146
1390 /* 3DNow! float ops */ 1147 /* 3DNow! float ops */
1391 #if SHIFT == 0 1148 #if SHIFT == 0
1392 -void OPPROTO op_pi2fd(void) 1149 +void helper_pi2fd(MMXReg *d, MMXReg *s)
1393 { 1150 {
1394 - MMXReg *d = (MMXReg *)((char *)env + PARAM1);  
1395 - MMXReg *s = (MMXReg *)((char *)env + PARAM2);  
1396 d->MMX_S(0) = int32_to_float32(s->MMX_L(0), &env->mmx_status); 1151 d->MMX_S(0) = int32_to_float32(s->MMX_L(0), &env->mmx_status);
1397 d->MMX_S(1) = int32_to_float32(s->MMX_L(1), &env->mmx_status); 1152 d->MMX_S(1) = int32_to_float32(s->MMX_L(1), &env->mmx_status);
1398 } 1153 }
1399 1154
1400 -void OPPROTO op_pi2fw(void) 1155 +void helper_pi2fw(MMXReg *d, MMXReg *s)
1401 { 1156 {
1402 - MMXReg *d = (MMXReg *)((char *)env + PARAM1);  
1403 - MMXReg *s = (MMXReg *)((char *)env + PARAM2);  
1404 d->MMX_S(0) = int32_to_float32((int16_t)s->MMX_W(0), &env->mmx_status); 1157 d->MMX_S(0) = int32_to_float32((int16_t)s->MMX_W(0), &env->mmx_status);
1405 d->MMX_S(1) = int32_to_float32((int16_t)s->MMX_W(2), &env->mmx_status); 1158 d->MMX_S(1) = int32_to_float32((int16_t)s->MMX_W(2), &env->mmx_status);
1406 } 1159 }
1407 1160
1408 -void OPPROTO op_pf2id(void) 1161 +void helper_pf2id(MMXReg *d, MMXReg *s)
1409 { 1162 {
1410 - MMXReg *d = (MMXReg *)((char *)env + PARAM1);  
1411 - MMXReg *s = (MMXReg *)((char *)env + PARAM2);  
1412 d->MMX_L(0) = float32_to_int32_round_to_zero(s->MMX_S(0), &env->mmx_status); 1163 d->MMX_L(0) = float32_to_int32_round_to_zero(s->MMX_S(0), &env->mmx_status);
1413 d->MMX_L(1) = float32_to_int32_round_to_zero(s->MMX_S(1), &env->mmx_status); 1164 d->MMX_L(1) = float32_to_int32_round_to_zero(s->MMX_S(1), &env->mmx_status);
1414 } 1165 }
1415 1166
1416 -void OPPROTO op_pf2iw(void) 1167 +void helper_pf2iw(MMXReg *d, MMXReg *s)
1417 { 1168 {
1418 - MMXReg *d = (MMXReg *)((char *)env + PARAM1);  
1419 - MMXReg *s = (MMXReg *)((char *)env + PARAM2);  
1420 d->MMX_L(0) = satsw(float32_to_int32_round_to_zero(s->MMX_S(0), &env->mmx_status)); 1169 d->MMX_L(0) = satsw(float32_to_int32_round_to_zero(s->MMX_S(0), &env->mmx_status));
1421 d->MMX_L(1) = satsw(float32_to_int32_round_to_zero(s->MMX_S(1), &env->mmx_status)); 1170 d->MMX_L(1) = satsw(float32_to_int32_round_to_zero(s->MMX_S(1), &env->mmx_status));
1422 } 1171 }
1423 1172
1424 -void OPPROTO op_pfacc(void) 1173 +void helper_pfacc(MMXReg *d, MMXReg *s)
1425 { 1174 {
1426 - MMXReg *d = (MMXReg *)((char *)env + PARAM1);  
1427 - MMXReg *s = (MMXReg *)((char *)env + PARAM2);  
1428 MMXReg r; 1175 MMXReg r;
1429 r.MMX_S(0) = float32_add(d->MMX_S(0), d->MMX_S(1), &env->mmx_status); 1176 r.MMX_S(0) = float32_add(d->MMX_S(0), d->MMX_S(1), &env->mmx_status);
1430 r.MMX_S(1) = float32_add(s->MMX_S(0), s->MMX_S(1), &env->mmx_status); 1177 r.MMX_S(1) = float32_add(s->MMX_S(0), s->MMX_S(1), &env->mmx_status);
1431 *d = r; 1178 *d = r;
1432 } 1179 }
1433 1180
1434 -void OPPROTO op_pfadd(void) 1181 +void helper_pfadd(MMXReg *d, MMXReg *s)
1435 { 1182 {
1436 - MMXReg *d = (MMXReg *)((char *)env + PARAM1);  
1437 - MMXReg *s = (MMXReg *)((char *)env + PARAM2);  
1438 d->MMX_S(0) = float32_add(d->MMX_S(0), s->MMX_S(0), &env->mmx_status); 1183 d->MMX_S(0) = float32_add(d->MMX_S(0), s->MMX_S(0), &env->mmx_status);
1439 d->MMX_S(1) = float32_add(d->MMX_S(1), s->MMX_S(1), &env->mmx_status); 1184 d->MMX_S(1) = float32_add(d->MMX_S(1), s->MMX_S(1), &env->mmx_status);
1440 } 1185 }
1441 1186
1442 -void OPPROTO op_pfcmpeq(void) 1187 +void helper_pfcmpeq(MMXReg *d, MMXReg *s)
1443 { 1188 {
1444 - MMXReg *d = (MMXReg *)((char *)env + PARAM1);  
1445 - MMXReg *s = (MMXReg *)((char *)env + PARAM2);  
1446 d->MMX_L(0) = float32_eq(d->MMX_S(0), s->MMX_S(0), &env->mmx_status) ? -1 : 0; 1189 d->MMX_L(0) = float32_eq(d->MMX_S(0), s->MMX_S(0), &env->mmx_status) ? -1 : 0;
1447 d->MMX_L(1) = float32_eq(d->MMX_S(1), s->MMX_S(1), &env->mmx_status) ? -1 : 0; 1190 d->MMX_L(1) = float32_eq(d->MMX_S(1), s->MMX_S(1), &env->mmx_status) ? -1 : 0;
1448 } 1191 }
1449 1192
1450 -void OPPROTO op_pfcmpge(void) 1193 +void helper_pfcmpge(MMXReg *d, MMXReg *s)
1451 { 1194 {
1452 - MMXReg *d = (MMXReg *)((char *)env + PARAM1);  
1453 - MMXReg *s = (MMXReg *)((char *)env + PARAM2);  
1454 d->MMX_L(0) = float32_le(s->MMX_S(0), d->MMX_S(0), &env->mmx_status) ? -1 : 0; 1195 d->MMX_L(0) = float32_le(s->MMX_S(0), d->MMX_S(0), &env->mmx_status) ? -1 : 0;
1455 d->MMX_L(1) = float32_le(s->MMX_S(1), d->MMX_S(1), &env->mmx_status) ? -1 : 0; 1196 d->MMX_L(1) = float32_le(s->MMX_S(1), d->MMX_S(1), &env->mmx_status) ? -1 : 0;
1456 } 1197 }
1457 1198
1458 -void OPPROTO op_pfcmpgt(void) 1199 +void helper_pfcmpgt(MMXReg *d, MMXReg *s)
1459 { 1200 {
1460 - MMXReg *d = (MMXReg *)((char *)env + PARAM1);  
1461 - MMXReg *s = (MMXReg *)((char *)env + PARAM2);  
1462 d->MMX_L(0) = float32_lt(s->MMX_S(0), d->MMX_S(0), &env->mmx_status) ? -1 : 0; 1201 d->MMX_L(0) = float32_lt(s->MMX_S(0), d->MMX_S(0), &env->mmx_status) ? -1 : 0;
1463 d->MMX_L(1) = float32_lt(s->MMX_S(1), d->MMX_S(1), &env->mmx_status) ? -1 : 0; 1202 d->MMX_L(1) = float32_lt(s->MMX_S(1), d->MMX_S(1), &env->mmx_status) ? -1 : 0;
1464 } 1203 }
1465 1204
1466 -void OPPROTO op_pfmax(void) 1205 +void helper_pfmax(MMXReg *d, MMXReg *s)
1467 { 1206 {
1468 - MMXReg *d = (MMXReg *)((char *)env + PARAM1);  
1469 - MMXReg *s = (MMXReg *)((char *)env + PARAM2);  
1470 if (float32_lt(d->MMX_S(0), s->MMX_S(0), &env->mmx_status)) 1207 if (float32_lt(d->MMX_S(0), s->MMX_S(0), &env->mmx_status))
1471 d->MMX_S(0) = s->MMX_S(0); 1208 d->MMX_S(0) = s->MMX_S(0);
1472 if (float32_lt(d->MMX_S(1), s->MMX_S(1), &env->mmx_status)) 1209 if (float32_lt(d->MMX_S(1), s->MMX_S(1), &env->mmx_status))
1473 d->MMX_S(1) = s->MMX_S(1); 1210 d->MMX_S(1) = s->MMX_S(1);
1474 } 1211 }
1475 1212
1476 -void OPPROTO op_pfmin(void) 1213 +void helper_pfmin(MMXReg *d, MMXReg *s)
1477 { 1214 {
1478 - MMXReg *d = (MMXReg *)((char *)env + PARAM1);  
1479 - MMXReg *s = (MMXReg *)((char *)env + PARAM2);  
1480 if (float32_lt(s->MMX_S(0), d->MMX_S(0), &env->mmx_status)) 1215 if (float32_lt(s->MMX_S(0), d->MMX_S(0), &env->mmx_status))
1481 d->MMX_S(0) = s->MMX_S(0); 1216 d->MMX_S(0) = s->MMX_S(0);
1482 if (float32_lt(s->MMX_S(1), d->MMX_S(1), &env->mmx_status)) 1217 if (float32_lt(s->MMX_S(1), d->MMX_S(1), &env->mmx_status))
1483 d->MMX_S(1) = s->MMX_S(1); 1218 d->MMX_S(1) = s->MMX_S(1);
1484 } 1219 }
1485 1220
1486 -void OPPROTO op_pfmul(void) 1221 +void helper_pfmul(MMXReg *d, MMXReg *s)
1487 { 1222 {
1488 - MMXReg *d = (MMXReg *)((char *)env + PARAM1);  
1489 - MMXReg *s = (MMXReg *)((char *)env + PARAM2);  
1490 d->MMX_S(0) = float32_mul(d->MMX_S(0), s->MMX_S(0), &env->mmx_status); 1223 d->MMX_S(0) = float32_mul(d->MMX_S(0), s->MMX_S(0), &env->mmx_status);
1491 d->MMX_S(1) = float32_mul(d->MMX_S(1), s->MMX_S(1), &env->mmx_status); 1224 d->MMX_S(1) = float32_mul(d->MMX_S(1), s->MMX_S(1), &env->mmx_status);
1492 } 1225 }
1493 1226
1494 -void OPPROTO op_pfnacc(void) 1227 +void helper_pfnacc(MMXReg *d, MMXReg *s)
1495 { 1228 {
1496 - MMXReg *d = (MMXReg *)((char *)env + PARAM1);  
1497 - MMXReg *s = (MMXReg *)((char *)env + PARAM2);  
1498 MMXReg r; 1229 MMXReg r;
1499 r.MMX_S(0) = float32_sub(d->MMX_S(0), d->MMX_S(1), &env->mmx_status); 1230 r.MMX_S(0) = float32_sub(d->MMX_S(0), d->MMX_S(1), &env->mmx_status);
1500 r.MMX_S(1) = float32_sub(s->MMX_S(0), s->MMX_S(1), &env->mmx_status); 1231 r.MMX_S(1) = float32_sub(s->MMX_S(0), s->MMX_S(1), &env->mmx_status);
1501 *d = r; 1232 *d = r;
1502 } 1233 }
1503 1234
1504 -void OPPROTO op_pfpnacc(void) 1235 +void helper_pfpnacc(MMXReg *d, MMXReg *s)
1505 { 1236 {
1506 - MMXReg *d = (MMXReg *)((char *)env + PARAM1);  
1507 - MMXReg *s = (MMXReg *)((char *)env + PARAM2);  
1508 MMXReg r; 1237 MMXReg r;
1509 r.MMX_S(0) = float32_sub(d->MMX_S(0), d->MMX_S(1), &env->mmx_status); 1238 r.MMX_S(0) = float32_sub(d->MMX_S(0), d->MMX_S(1), &env->mmx_status);
1510 r.MMX_S(1) = float32_add(s->MMX_S(0), s->MMX_S(1), &env->mmx_status); 1239 r.MMX_S(1) = float32_add(s->MMX_S(0), s->MMX_S(1), &env->mmx_status);
1511 *d = r; 1240 *d = r;
1512 } 1241 }
1513 1242
1514 -void OPPROTO op_pfrcp(void) 1243 +void helper_pfrcp(MMXReg *d, MMXReg *s)
1515 { 1244 {
1516 - MMXReg *d = (MMXReg *)((char *)env + PARAM1);  
1517 - MMXReg *s = (MMXReg *)((char *)env + PARAM2);  
1518 d->MMX_S(0) = approx_rcp(s->MMX_S(0)); 1245 d->MMX_S(0) = approx_rcp(s->MMX_S(0));
1519 d->MMX_S(1) = d->MMX_S(0); 1246 d->MMX_S(1) = d->MMX_S(0);
1520 } 1247 }
1521 1248
1522 -void OPPROTO op_pfrsqrt(void) 1249 +void helper_pfrsqrt(MMXReg *d, MMXReg *s)
1523 { 1250 {
1524 - MMXReg *d = (MMXReg *)((char *)env + PARAM1);  
1525 - MMXReg *s = (MMXReg *)((char *)env + PARAM2);  
1526 d->MMX_L(1) = s->MMX_L(0) & 0x7fffffff; 1251 d->MMX_L(1) = s->MMX_L(0) & 0x7fffffff;
1527 d->MMX_S(1) = approx_rsqrt(d->MMX_S(1)); 1252 d->MMX_S(1) = approx_rsqrt(d->MMX_S(1));
1528 d->MMX_L(1) |= s->MMX_L(0) & 0x80000000; 1253 d->MMX_L(1) |= s->MMX_L(0) & 0x80000000;
1529 d->MMX_L(0) = d->MMX_L(1); 1254 d->MMX_L(0) = d->MMX_L(1);
1530 } 1255 }
1531 1256
1532 -void OPPROTO op_pfsub(void) 1257 +void helper_pfsub(MMXReg *d, MMXReg *s)
1533 { 1258 {
1534 - MMXReg *d = (MMXReg *)((char *)env + PARAM1);  
1535 - MMXReg *s = (MMXReg *)((char *)env + PARAM2);  
1536 d->MMX_S(0) = float32_sub(d->MMX_S(0), s->MMX_S(0), &env->mmx_status); 1259 d->MMX_S(0) = float32_sub(d->MMX_S(0), s->MMX_S(0), &env->mmx_status);
1537 d->MMX_S(1) = float32_sub(d->MMX_S(1), s->MMX_S(1), &env->mmx_status); 1260 d->MMX_S(1) = float32_sub(d->MMX_S(1), s->MMX_S(1), &env->mmx_status);
1538 } 1261 }
1539 1262
1540 -void OPPROTO op_pfsubr(void) 1263 +void helper_pfsubr(MMXReg *d, MMXReg *s)
1541 { 1264 {
1542 - MMXReg *d = (MMXReg *)((char *)env + PARAM1);  
1543 - MMXReg *s = (MMXReg *)((char *)env + PARAM2);  
1544 d->MMX_S(0) = float32_sub(s->MMX_S(0), d->MMX_S(0), &env->mmx_status); 1265 d->MMX_S(0) = float32_sub(s->MMX_S(0), d->MMX_S(0), &env->mmx_status);
1545 d->MMX_S(1) = float32_sub(s->MMX_S(1), d->MMX_S(1), &env->mmx_status); 1266 d->MMX_S(1) = float32_sub(s->MMX_S(1), d->MMX_S(1), &env->mmx_status);
1546 } 1267 }
1547 1268
1548 -void OPPROTO op_pswapd(void) 1269 +void helper_pswapd(MMXReg *d, MMXReg *s)
1549 { 1270 {
1550 - MMXReg *d = (MMXReg *)((char *)env + PARAM1);  
1551 - MMXReg *s = (MMXReg *)((char *)env + PARAM2);  
1552 MMXReg r; 1271 MMXReg r;
1553 r.MMX_L(0) = s->MMX_L(1); 1272 r.MMX_L(0) = s->MMX_L(1);
1554 r.MMX_L(1) = s->MMX_L(0); 1273 r.MMX_L(1) = s->MMX_L(0);
target-i386/ops_sse_header.h 0 โ†’ 100644
  1 +/*
  2 + * MMX/3DNow!/SSE/SSE2/SSE3/PNI support
  3 + *
  4 + * Copyright (c) 2005 Fabrice Bellard
  5 + *
  6 + * This library is free software; you can redistribute it and/or
  7 + * modify it under the terms of the GNU Lesser General Public
  8 + * License as published by the Free Software Foundation; either
  9 + * version 2 of the License, or (at your option) any later version.
  10 + *
  11 + * This library is distributed in the hope that it will be useful,
  12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14 + * Lesser General Public License for more details.
  15 + *
  16 + * You should have received a copy of the GNU Lesser General Public
  17 + * License along with this library; if not, write to the Free Software
  18 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  19 + */
  20 +#if SHIFT == 0
  21 +#define Reg MMXReg
  22 +#define SUFFIX _mmx
  23 +#else
  24 +#define Reg XMMReg
  25 +#define SUFFIX _xmm
  26 +#endif
  27 +
  28 +void glue(helper_psrlw, SUFFIX)(Reg *d, Reg *s);
  29 +void glue(helper_psraw, SUFFIX)(Reg *d, Reg *s);
  30 +void glue(helper_psllw, SUFFIX)(Reg *d, Reg *s);
  31 +void glue(helper_psrld, SUFFIX)(Reg *d, Reg *s);
  32 +void glue(helper_psrad, SUFFIX)(Reg *d, Reg *s);
  33 +void glue(helper_pslld, SUFFIX)(Reg *d, Reg *s);
  34 +void glue(helper_psrlq, SUFFIX)(Reg *d, Reg *s);
  35 +void glue(helper_psllq, SUFFIX)(Reg *d, Reg *s);
  36 +
  37 +#if SHIFT == 1
  38 +void glue(helper_psrldq, SUFFIX)(Reg *d, Reg *s);
  39 +void glue(helper_pslldq, SUFFIX)(Reg *d, Reg *s);
  40 +#endif
  41 +
  42 +#define SSE_HELPER_B(name, F)\
  43 + void glue(name, SUFFIX) (Reg *d, Reg *s);
  44 +
  45 +#define SSE_HELPER_W(name, F)\
  46 + void glue(name, SUFFIX) (Reg *d, Reg *s);
  47 +
  48 +#define SSE_HELPER_L(name, F)\
  49 + void glue(name, SUFFIX) (Reg *d, Reg *s);
  50 +
  51 +#define SSE_HELPER_Q(name, F)\
  52 + void glue(name, SUFFIX) (Reg *d, Reg *s);
  53 +
  54 +SSE_HELPER_B(helper_paddb, FADD);
  55 +SSE_HELPER_W(helper_paddw, FADD);
  56 +SSE_HELPER_L(helper_paddl, FADD);
  57 +SSE_HELPER_Q(helper_paddq, FADD);
  58 +
  59 +SSE_HELPER_B(helper_psubb, FSUB);
  60 +SSE_HELPER_W(helper_psubw, FSUB);
  61 +SSE_HELPER_L(helper_psubl, FSUB);
  62 +SSE_HELPER_Q(helper_psubq, FSUB);
  63 +
  64 +SSE_HELPER_B(helper_paddusb, FADDUB);
  65 +SSE_HELPER_B(helper_paddsb, FADDSB);
  66 +SSE_HELPER_B(helper_psubusb, FSUBUB);
  67 +SSE_HELPER_B(helper_psubsb, FSUBSB);
  68 +
  69 +SSE_HELPER_W(helper_paddusw, FADDUW);
  70 +SSE_HELPER_W(helper_paddsw, FADDSW);
  71 +SSE_HELPER_W(helper_psubusw, FSUBUW);
  72 +SSE_HELPER_W(helper_psubsw, FSUBSW);
  73 +
  74 +SSE_HELPER_B(helper_pminub, FMINUB);
  75 +SSE_HELPER_B(helper_pmaxub, FMAXUB);
  76 +
  77 +SSE_HELPER_W(helper_pminsw, FMINSW);
  78 +SSE_HELPER_W(helper_pmaxsw, FMAXSW);
  79 +
  80 +SSE_HELPER_Q(helper_pand, FAND);
  81 +SSE_HELPER_Q(helper_pandn, FANDN);
  82 +SSE_HELPER_Q(helper_por, FOR);
  83 +SSE_HELPER_Q(helper_pxor, FXOR);
  84 +
  85 +SSE_HELPER_B(helper_pcmpgtb, FCMPGTB);
  86 +SSE_HELPER_W(helper_pcmpgtw, FCMPGTW);
  87 +SSE_HELPER_L(helper_pcmpgtl, FCMPGTL);
  88 +
  89 +SSE_HELPER_B(helper_pcmpeqb, FCMPEQ);
  90 +SSE_HELPER_W(helper_pcmpeqw, FCMPEQ);
  91 +SSE_HELPER_L(helper_pcmpeql, FCMPEQ);
  92 +
  93 +SSE_HELPER_W(helper_pmullw, FMULLW);
  94 +#if SHIFT == 0
  95 +SSE_HELPER_W(helper_pmulhrw, FMULHRW);
  96 +#endif
  97 +SSE_HELPER_W(helper_pmulhuw, FMULHUW);
  98 +SSE_HELPER_W(helper_pmulhw, FMULHW);
  99 +
  100 +SSE_HELPER_B(helper_pavgb, FAVG);
  101 +SSE_HELPER_W(helper_pavgw, FAVG);
  102 +
  103 +void glue(helper_pmuludq, SUFFIX) (Reg *d, Reg *s);
  104 +void glue(helper_pmaddwd, SUFFIX) (Reg *d, Reg *s);
  105 +
  106 +void glue(helper_psadbw, SUFFIX) (Reg *d, Reg *s);
  107 +void glue(helper_maskmov, SUFFIX) (Reg *d, Reg *s);
  108 +void glue(helper_movl_mm_T0, SUFFIX) (Reg *d, uint32_t val);
  109 +#ifdef TARGET_X86_64
  110 +void glue(helper_movq_mm_T0, SUFFIX) (Reg *d, uint64_t val);
  111 +#endif
  112 +
  113 +#if SHIFT == 0
  114 +void glue(helper_pshufw, SUFFIX) (Reg *d, Reg *s, int order);
  115 +#else
  116 +void helper_shufps(Reg *d, Reg *s, int order);
  117 +void helper_shufpd(Reg *d, Reg *s, int order);
  118 +void glue(helper_pshufd, SUFFIX) (Reg *d, Reg *s, int order);
  119 +void glue(helper_pshuflw, SUFFIX) (Reg *d, Reg *s, int order);
  120 +void glue(helper_pshufhw, SUFFIX) (Reg *d, Reg *s, int order);
  121 +#endif
  122 +
  123 +#if SHIFT == 1
  124 +/* FPU ops */
  125 +/* XXX: not accurate */
  126 +
  127 +#define SSE_HELPER_S(name, F)\
  128 + void helper_ ## name ## ps (Reg *d, Reg *s); \
  129 + void helper_ ## name ## ss (Reg *d, Reg *s); \
  130 + void helper_ ## name ## pd (Reg *d, Reg *s); \
  131 + void helper_ ## name ## sd (Reg *d, Reg *s);
  132 +
  133 +SSE_HELPER_S(add, FPU_ADD);
  134 +SSE_HELPER_S(sub, FPU_SUB);
  135 +SSE_HELPER_S(mul, FPU_MUL);
  136 +SSE_HELPER_S(div, FPU_DIV);
  137 +SSE_HELPER_S(min, FPU_MIN);
  138 +SSE_HELPER_S(max, FPU_MAX);
  139 +SSE_HELPER_S(sqrt, FPU_SQRT);
  140 +
  141 +
  142 +void helper_cvtps2pd(Reg *d, Reg *s);
  143 +void helper_cvtpd2ps(Reg *d, Reg *s);
  144 +void helper_cvtss2sd(Reg *d, Reg *s);
  145 +void helper_cvtsd2ss(Reg *d, Reg *s);
  146 +void helper_cvtdq2ps(Reg *d, Reg *s);
  147 +void helper_cvtdq2pd(Reg *d, Reg *s);
  148 +void helper_cvtpi2ps(XMMReg *d, MMXReg *s);
  149 +void helper_cvtpi2pd(XMMReg *d, MMXReg *s);
  150 +void helper_cvtsi2ss(XMMReg *d, uint32_t val);
  151 +void helper_cvtsi2sd(XMMReg *d, uint32_t val);
  152 +
  153 +#ifdef TARGET_X86_64
  154 +void helper_cvtsq2ss(XMMReg *d, uint64_t val);
  155 +void helper_cvtsq2sd(XMMReg *d, uint64_t val);
  156 +#endif
  157 +
  158 +void helper_cvtps2dq(XMMReg *d, XMMReg *s);
  159 +void helper_cvtpd2dq(XMMReg *d, XMMReg *s);
  160 +void helper_cvtps2pi(MMXReg *d, XMMReg *s);
  161 +void helper_cvtpd2pi(MMXReg *d, XMMReg *s);
  162 +int32_t helper_cvtss2si(XMMReg *s);
  163 +int32_t helper_cvtsd2si(XMMReg *s);
  164 +#ifdef TARGET_X86_64
  165 +int64_t helper_cvtss2sq(XMMReg *s);
  166 +int64_t helper_cvtsd2sq(XMMReg *s);
  167 +#endif
  168 +
  169 +void helper_cvttps2dq(XMMReg *d, XMMReg *s);
  170 +void helper_cvttpd2dq(XMMReg *d, XMMReg *s);
  171 +void helper_cvttps2pi(MMXReg *d, XMMReg *s);
  172 +void helper_cvttpd2pi(MMXReg *d, XMMReg *s);
  173 +int32_t helper_cvttss2si(XMMReg *s);
  174 +int32_t helper_cvttsd2si(XMMReg *s);
  175 +#ifdef TARGET_X86_64
  176 +int64_t helper_cvttss2sq(XMMReg *s);
  177 +int64_t helper_cvttsd2sq(XMMReg *s);
  178 +#endif
  179 +
  180 +void helper_rsqrtps(XMMReg *d, XMMReg *s);
  181 +void helper_rsqrtss(XMMReg *d, XMMReg *s);
  182 +void helper_rcpps(XMMReg *d, XMMReg *s);
  183 +void helper_rcpss(XMMReg *d, XMMReg *s);
  184 +void helper_haddps(XMMReg *d, XMMReg *s);
  185 +void helper_haddpd(XMMReg *d, XMMReg *s);
  186 +void helper_hsubps(XMMReg *d, XMMReg *s);
  187 +void helper_hsubpd(XMMReg *d, XMMReg *s);
  188 +void helper_addsubps(XMMReg *d, XMMReg *s);
  189 +void helper_addsubpd(XMMReg *d, XMMReg *s);
  190 +
  191 +#define SSE_HELPER_CMP(name, F)\
  192 + void helper_ ## name ## ps (Reg *d, Reg *s); \
  193 + void helper_ ## name ## ss (Reg *d, Reg *s); \
  194 + void helper_ ## name ## pd (Reg *d, Reg *s); \
  195 + void helper_ ## name ## sd (Reg *d, Reg *s);
  196 +
  197 +SSE_HELPER_CMP(cmpeq, FPU_CMPEQ);
  198 +SSE_HELPER_CMP(cmplt, FPU_CMPLT);
  199 +SSE_HELPER_CMP(cmple, FPU_CMPLE);
  200 +SSE_HELPER_CMP(cmpunord, FPU_CMPUNORD);
  201 +SSE_HELPER_CMP(cmpneq, FPU_CMPNEQ);
  202 +SSE_HELPER_CMP(cmpnlt, FPU_CMPNLT);
  203 +SSE_HELPER_CMP(cmpnle, FPU_CMPNLE);
  204 +SSE_HELPER_CMP(cmpord, FPU_CMPORD);
  205 +
  206 +void helper_ucomiss(Reg *d, Reg *s);
  207 +void helper_comiss(Reg *d, Reg *s);
  208 +void helper_ucomisd(Reg *d, Reg *s);
  209 +void helper_comisd(Reg *d, Reg *s);
  210 +uint32_t helper_movmskps(Reg *s);
  211 +uint32_t helper_movmskpd(Reg *s);
  212 +#endif
  213 +
  214 +uint32_t glue(helper_pmovmskb, SUFFIX)(Reg *s);
  215 +void glue(helper_packsswb, SUFFIX) (Reg *d, Reg *s);
  216 +void glue(helper_packuswb, SUFFIX) (Reg *d, Reg *s);
  217 +void glue(helper_packssdw, SUFFIX) (Reg *d, Reg *s);
  218 +#define UNPCK_OP(base_name, base) \
  219 + void glue(helper_punpck ## base_name ## bw, SUFFIX) (Reg *d, Reg *s); \
  220 + void glue(helper_punpck ## base_name ## wd, SUFFIX) (Reg *d, Reg *s); \
  221 + void glue(helper_punpck ## base_name ## dq, SUFFIX) (Reg *d, Reg *s);
  222 +
  223 +UNPCK_OP(l, 0);
  224 +UNPCK_OP(h, 1);
  225 +
  226 +#if SHIFT == 1
  227 +void glue(helper_punpcklqdq, SUFFIX) (Reg *d, Reg *s);
  228 +void glue(helper_punpckhqdq, SUFFIX) (Reg *d, Reg *s);
  229 +#endif
  230 +
  231 +/* 3DNow! float ops */
  232 +#if SHIFT == 0
  233 +void helper_pi2fd(MMXReg *d, MMXReg *s);
  234 +void helper_pi2fw(MMXReg *d, MMXReg *s);
  235 +void helper_pf2id(MMXReg *d, MMXReg *s);
  236 +void helper_pf2iw(MMXReg *d, MMXReg *s);
  237 +void helper_pfacc(MMXReg *d, MMXReg *s);
  238 +void helper_pfadd(MMXReg *d, MMXReg *s);
  239 +void helper_pfcmpeq(MMXReg *d, MMXReg *s);
  240 +void helper_pfcmpge(MMXReg *d, MMXReg *s);
  241 +void helper_pfcmpgt(MMXReg *d, MMXReg *s);
  242 +void helper_pfmax(MMXReg *d, MMXReg *s);
  243 +void helper_pfmin(MMXReg *d, MMXReg *s);
  244 +void helper_pfmul(MMXReg *d, MMXReg *s);
  245 +void helper_pfnacc(MMXReg *d, MMXReg *s);
  246 +void helper_pfpnacc(MMXReg *d, MMXReg *s);
  247 +void helper_pfrcp(MMXReg *d, MMXReg *s);
  248 +void helper_pfrsqrt(MMXReg *d, MMXReg *s);
  249 +void helper_pfsub(MMXReg *d, MMXReg *s);
  250 +void helper_pfsubr(MMXReg *d, MMXReg *s);
  251 +void helper_pswapd(MMXReg *d, MMXReg *s);
  252 +#endif
  253 +
  254 +#undef SHIFT
  255 +#undef Reg
  256 +#undef SUFFIX
  257 +
  258 +#undef SSE_HELPER_B
  259 +#undef SSE_HELPER_W
  260 +#undef SSE_HELPER_L
  261 +#undef SSE_HELPER_Q
  262 +#undef SSE_HELPER_S
  263 +#undef SSE_HELPER_CMP
  264 +#undef UNPCK_OP
target-i386/translate.c
@@ -60,7 +60,7 @@ @@ -60,7 +60,7 @@
60 /* global register indexes */ 60 /* global register indexes */
61 static TCGv cpu_env, cpu_T[2], cpu_A0; 61 static TCGv cpu_env, cpu_T[2], cpu_A0;
62 /* local register indexes (only used inside old micro ops) */ 62 /* local register indexes (only used inside old micro ops) */
63 -static TCGv cpu_tmp0, cpu_tmp1; 63 +static TCGv cpu_tmp0, cpu_tmp1, cpu_tmp2, cpu_ptr0, cpu_ptr1;
64 64
65 #ifdef TARGET_X86_64 65 #ifdef TARGET_X86_64
66 static int x86_64_hregs; 66 static int x86_64_hregs;
@@ -2410,14 +2410,40 @@ static inline void gen_sto_env_A0(int idx, int offset) @@ -2410,14 +2410,40 @@ static inline void gen_sto_env_A0(int idx, int offset)
2410 tcg_gen_qemu_st64(cpu_tmp1, cpu_tmp0, mem_index); 2410 tcg_gen_qemu_st64(cpu_tmp1, cpu_tmp0, mem_index);
2411 } 2411 }
2412 2412
2413 -#define SSE_SPECIAL ((GenOpFunc2 *)1)  
2414 -#define SSE_DUMMY ((GenOpFunc2 *)2) 2413 +static inline void gen_op_movo(int d_offset, int s_offset)
  2414 +{
  2415 + tcg_gen_ld_i64(cpu_tmp1, cpu_env, s_offset);
  2416 + tcg_gen_st_i64(cpu_tmp1, cpu_env, d_offset);
  2417 + tcg_gen_ld_i64(cpu_tmp1, cpu_env, s_offset + 8);
  2418 + tcg_gen_st_i64(cpu_tmp1, cpu_env, d_offset + 8);
  2419 +}
  2420 +
  2421 +static inline void gen_op_movq(int d_offset, int s_offset)
  2422 +{
  2423 + tcg_gen_ld_i64(cpu_tmp1, cpu_env, s_offset);
  2424 + tcg_gen_st_i64(cpu_tmp1, cpu_env, d_offset);
  2425 +}
  2426 +
  2427 +static inline void gen_op_movl(int d_offset, int s_offset)
  2428 +{
  2429 + tcg_gen_ld_i32(cpu_tmp2, cpu_env, s_offset);
  2430 + tcg_gen_st_i32(cpu_tmp2, cpu_env, d_offset);
  2431 +}
  2432 +
  2433 +static inline void gen_op_movq_env_0(int d_offset)
  2434 +{
  2435 + tcg_gen_movi_i64(cpu_tmp1, 0);
  2436 + tcg_gen_st_i64(cpu_tmp1, cpu_env, d_offset);
  2437 +}
2415 2438
2416 -#define MMX_OP2(x) { gen_op_ ## x ## _mmx, gen_op_ ## x ## _xmm }  
2417 -#define SSE_FOP(x) { gen_op_ ## x ## ps, gen_op_ ## x ## pd, \  
2418 - gen_op_ ## x ## ss, gen_op_ ## x ## sd, } 2439 +#define SSE_SPECIAL ((void *)1)
  2440 +#define SSE_DUMMY ((void *)2)
2419 2441
2420 -static GenOpFunc2 *sse_op_table1[256][4] = { 2442 +#define MMX_OP2(x) { helper_ ## x ## _mmx, helper_ ## x ## _xmm }
  2443 +#define SSE_FOP(x) { helper_ ## x ## ps, helper_ ## x ## pd, \
  2444 + helper_ ## x ## ss, helper_ ## x ## sd, }
  2445 +
  2446 +static void *sse_op_table1[256][4] = {
2421 /* 3DNow! extensions */ 2447 /* 3DNow! extensions */
2422 [0x0e] = { SSE_DUMMY }, /* femms */ 2448 [0x0e] = { SSE_DUMMY }, /* femms */
2423 [0x0f] = { SSE_DUMMY }, /* pf... */ 2449 [0x0f] = { SSE_DUMMY }, /* pf... */
@@ -2426,8 +2452,8 @@ static GenOpFunc2 *sse_op_table1[256][4] = { @@ -2426,8 +2452,8 @@ static GenOpFunc2 *sse_op_table1[256][4] = {
2426 [0x11] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */ 2452 [0x11] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2427 [0x12] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd, movsldup, movddup */ 2453 [0x12] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd, movsldup, movddup */
2428 [0x13] = { SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd */ 2454 [0x13] = { SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd */
2429 - [0x14] = { gen_op_punpckldq_xmm, gen_op_punpcklqdq_xmm },  
2430 - [0x15] = { gen_op_punpckhdq_xmm, gen_op_punpckhqdq_xmm }, 2455 + [0x14] = { helper_punpckldq_xmm, helper_punpcklqdq_xmm },
  2456 + [0x15] = { helper_punpckhdq_xmm, helper_punpckhqdq_xmm },
2431 [0x16] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movhps, movhpd, movshdup */ 2457 [0x16] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movhps, movhpd, movshdup */
2432 [0x17] = { SSE_SPECIAL, SSE_SPECIAL }, /* movhps, movhpd */ 2458 [0x17] = { SSE_SPECIAL, SSE_SPECIAL }, /* movhps, movhpd */
2433 2459
@@ -2437,28 +2463,28 @@ static GenOpFunc2 *sse_op_table1[256][4] = { @@ -2437,28 +2463,28 @@ static GenOpFunc2 *sse_op_table1[256][4] = {
2437 [0x2b] = { SSE_SPECIAL, SSE_SPECIAL }, /* movntps, movntpd */ 2463 [0x2b] = { SSE_SPECIAL, SSE_SPECIAL }, /* movntps, movntpd */
2438 [0x2c] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvttps2pi, cvttpd2pi, cvttsd2si, cvttss2si */ 2464 [0x2c] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvttps2pi, cvttpd2pi, cvttsd2si, cvttss2si */
2439 [0x2d] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtps2pi, cvtpd2pi, cvtsd2si, cvtss2si */ 2465 [0x2d] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtps2pi, cvtpd2pi, cvtsd2si, cvtss2si */
2440 - [0x2e] = { gen_op_ucomiss, gen_op_ucomisd },  
2441 - [0x2f] = { gen_op_comiss, gen_op_comisd }, 2466 + [0x2e] = { helper_ucomiss, helper_ucomisd },
  2467 + [0x2f] = { helper_comiss, helper_comisd },
2442 [0x50] = { SSE_SPECIAL, SSE_SPECIAL }, /* movmskps, movmskpd */ 2468 [0x50] = { SSE_SPECIAL, SSE_SPECIAL }, /* movmskps, movmskpd */
2443 [0x51] = SSE_FOP(sqrt), 2469 [0x51] = SSE_FOP(sqrt),
2444 - [0x52] = { gen_op_rsqrtps, NULL, gen_op_rsqrtss, NULL },  
2445 - [0x53] = { gen_op_rcpps, NULL, gen_op_rcpss, NULL },  
2446 - [0x54] = { gen_op_pand_xmm, gen_op_pand_xmm }, /* andps, andpd */  
2447 - [0x55] = { gen_op_pandn_xmm, gen_op_pandn_xmm }, /* andnps, andnpd */  
2448 - [0x56] = { gen_op_por_xmm, gen_op_por_xmm }, /* orps, orpd */  
2449 - [0x57] = { gen_op_pxor_xmm, gen_op_pxor_xmm }, /* xorps, xorpd */ 2470 + [0x52] = { helper_rsqrtps, NULL, helper_rsqrtss, NULL },
  2471 + [0x53] = { helper_rcpps, NULL, helper_rcpss, NULL },
  2472 + [0x54] = { helper_pand_xmm, helper_pand_xmm }, /* andps, andpd */
  2473 + [0x55] = { helper_pandn_xmm, helper_pandn_xmm }, /* andnps, andnpd */
  2474 + [0x56] = { helper_por_xmm, helper_por_xmm }, /* orps, orpd */
  2475 + [0x57] = { helper_pxor_xmm, helper_pxor_xmm }, /* xorps, xorpd */
2450 [0x58] = SSE_FOP(add), 2476 [0x58] = SSE_FOP(add),
2451 [0x59] = SSE_FOP(mul), 2477 [0x59] = SSE_FOP(mul),
2452 - [0x5a] = { gen_op_cvtps2pd, gen_op_cvtpd2ps,  
2453 - gen_op_cvtss2sd, gen_op_cvtsd2ss },  
2454 - [0x5b] = { gen_op_cvtdq2ps, gen_op_cvtps2dq, gen_op_cvttps2dq }, 2478 + [0x5a] = { helper_cvtps2pd, helper_cvtpd2ps,
  2479 + helper_cvtss2sd, helper_cvtsd2ss },
  2480 + [0x5b] = { helper_cvtdq2ps, helper_cvtps2dq, helper_cvttps2dq },
2455 [0x5c] = SSE_FOP(sub), 2481 [0x5c] = SSE_FOP(sub),
2456 [0x5d] = SSE_FOP(min), 2482 [0x5d] = SSE_FOP(min),
2457 [0x5e] = SSE_FOP(div), 2483 [0x5e] = SSE_FOP(div),
2458 [0x5f] = SSE_FOP(max), 2484 [0x5f] = SSE_FOP(max),
2459 2485
2460 [0xc2] = SSE_FOP(cmpeq), 2486 [0xc2] = SSE_FOP(cmpeq),
2461 - [0xc6] = { (GenOpFunc2 *)gen_op_shufps, (GenOpFunc2 *)gen_op_shufpd }, 2487 + [0xc6] = { helper_shufps, helper_shufpd },
2462 2488
2463 /* MMX ops and their SSE extensions */ 2489 /* MMX ops and their SSE extensions */
2464 [0x60] = MMX_OP2(punpcklbw), 2490 [0x60] = MMX_OP2(punpcklbw),
@@ -2473,14 +2499,14 @@ static GenOpFunc2 *sse_op_table1[256][4] = { @@ -2473,14 +2499,14 @@ static GenOpFunc2 *sse_op_table1[256][4] = {
2473 [0x69] = MMX_OP2(punpckhwd), 2499 [0x69] = MMX_OP2(punpckhwd),
2474 [0x6a] = MMX_OP2(punpckhdq), 2500 [0x6a] = MMX_OP2(punpckhdq),
2475 [0x6b] = MMX_OP2(packssdw), 2501 [0x6b] = MMX_OP2(packssdw),
2476 - [0x6c] = { NULL, gen_op_punpcklqdq_xmm },  
2477 - [0x6d] = { NULL, gen_op_punpckhqdq_xmm }, 2502 + [0x6c] = { NULL, helper_punpcklqdq_xmm },
  2503 + [0x6d] = { NULL, helper_punpckhqdq_xmm },
2478 [0x6e] = { SSE_SPECIAL, SSE_SPECIAL }, /* movd mm, ea */ 2504 [0x6e] = { SSE_SPECIAL, SSE_SPECIAL }, /* movd mm, ea */
2479 [0x6f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, , movqdu */ 2505 [0x6f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, , movqdu */
2480 - [0x70] = { (GenOpFunc2 *)gen_op_pshufw_mmx,  
2481 - (GenOpFunc2 *)gen_op_pshufd_xmm,  
2482 - (GenOpFunc2 *)gen_op_pshufhw_xmm,  
2483 - (GenOpFunc2 *)gen_op_pshuflw_xmm }, 2506 + [0x70] = { helper_pshufw_mmx,
  2507 + helper_pshufd_xmm,
  2508 + helper_pshufhw_xmm,
  2509 + helper_pshuflw_xmm },
2484 [0x71] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftw */ 2510 [0x71] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftw */
2485 [0x72] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftd */ 2511 [0x72] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftd */
2486 [0x73] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftq */ 2512 [0x73] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftq */
@@ -2488,13 +2514,13 @@ static GenOpFunc2 *sse_op_table1[256][4] = { @@ -2488,13 +2514,13 @@ static GenOpFunc2 *sse_op_table1[256][4] = {
2488 [0x75] = MMX_OP2(pcmpeqw), 2514 [0x75] = MMX_OP2(pcmpeqw),
2489 [0x76] = MMX_OP2(pcmpeql), 2515 [0x76] = MMX_OP2(pcmpeql),
2490 [0x77] = { SSE_DUMMY }, /* emms */ 2516 [0x77] = { SSE_DUMMY }, /* emms */
2491 - [0x7c] = { NULL, gen_op_haddpd, NULL, gen_op_haddps },  
2492 - [0x7d] = { NULL, gen_op_hsubpd, NULL, gen_op_hsubps }, 2517 + [0x7c] = { NULL, helper_haddpd, NULL, helper_haddps },
  2518 + [0x7d] = { NULL, helper_hsubpd, NULL, helper_hsubps },
2493 [0x7e] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movd, movd, , movq */ 2519 [0x7e] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movd, movd, , movq */
2494 [0x7f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, movdqu */ 2520 [0x7f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, movdqu */
2495 [0xc4] = { SSE_SPECIAL, SSE_SPECIAL }, /* pinsrw */ 2521 [0xc4] = { SSE_SPECIAL, SSE_SPECIAL }, /* pinsrw */
2496 [0xc5] = { SSE_SPECIAL, SSE_SPECIAL }, /* pextrw */ 2522 [0xc5] = { SSE_SPECIAL, SSE_SPECIAL }, /* pextrw */
2497 - [0xd0] = { NULL, gen_op_addsubpd, NULL, gen_op_addsubps }, 2523 + [0xd0] = { NULL, helper_addsubpd, NULL, helper_addsubps },
2498 [0xd1] = MMX_OP2(psrlw), 2524 [0xd1] = MMX_OP2(psrlw),
2499 [0xd2] = MMX_OP2(psrld), 2525 [0xd2] = MMX_OP2(psrld),
2500 [0xd3] = MMX_OP2(psrlq), 2526 [0xd3] = MMX_OP2(psrlq),
@@ -2516,7 +2542,7 @@ static GenOpFunc2 *sse_op_table1[256][4] = { @@ -2516,7 +2542,7 @@ static GenOpFunc2 *sse_op_table1[256][4] = {
2516 [0xe3] = MMX_OP2(pavgw), 2542 [0xe3] = MMX_OP2(pavgw),
2517 [0xe4] = MMX_OP2(pmulhuw), 2543 [0xe4] = MMX_OP2(pmulhuw),
2518 [0xe5] = MMX_OP2(pmulhw), 2544 [0xe5] = MMX_OP2(pmulhw),
2519 - [0xe6] = { NULL, gen_op_cvttpd2dq, gen_op_cvtdq2pd, gen_op_cvtpd2dq }, 2545 + [0xe6] = { NULL, helper_cvttpd2dq, helper_cvtdq2pd, helper_cvtpd2dq },
2520 [0xe7] = { SSE_SPECIAL , SSE_SPECIAL }, /* movntq, movntq */ 2546 [0xe7] = { SSE_SPECIAL , SSE_SPECIAL }, /* movntq, movntq */
2521 [0xe8] = MMX_OP2(psubsb), 2547 [0xe8] = MMX_OP2(psubsb),
2522 [0xe9] = MMX_OP2(psubsw), 2548 [0xe9] = MMX_OP2(psubsw),
@@ -2543,7 +2569,7 @@ static GenOpFunc2 *sse_op_table1[256][4] = { @@ -2543,7 +2569,7 @@ static GenOpFunc2 *sse_op_table1[256][4] = {
2543 [0xfe] = MMX_OP2(paddl), 2569 [0xfe] = MMX_OP2(paddl),
2544 }; 2570 };
2545 2571
2546 -static GenOpFunc2 *sse_op_table2[3 * 8][2] = { 2572 +static void *sse_op_table2[3 * 8][2] = {
2547 [0 + 2] = MMX_OP2(psrlw), 2573 [0 + 2] = MMX_OP2(psrlw),
2548 [0 + 4] = MMX_OP2(psraw), 2574 [0 + 4] = MMX_OP2(psraw),
2549 [0 + 6] = MMX_OP2(psllw), 2575 [0 + 6] = MMX_OP2(psllw),
@@ -2551,29 +2577,29 @@ static GenOpFunc2 *sse_op_table2[3 * 8][2] = { @@ -2551,29 +2577,29 @@ static GenOpFunc2 *sse_op_table2[3 * 8][2] = {
2551 [8 + 4] = MMX_OP2(psrad), 2577 [8 + 4] = MMX_OP2(psrad),
2552 [8 + 6] = MMX_OP2(pslld), 2578 [8 + 6] = MMX_OP2(pslld),
2553 [16 + 2] = MMX_OP2(psrlq), 2579 [16 + 2] = MMX_OP2(psrlq),
2554 - [16 + 3] = { NULL, gen_op_psrldq_xmm }, 2580 + [16 + 3] = { NULL, helper_psrldq_xmm },
2555 [16 + 6] = MMX_OP2(psllq), 2581 [16 + 6] = MMX_OP2(psllq),
2556 - [16 + 7] = { NULL, gen_op_pslldq_xmm }, 2582 + [16 + 7] = { NULL, helper_pslldq_xmm },
2557 }; 2583 };
2558 2584
2559 -static GenOpFunc1 *sse_op_table3[4 * 3] = {  
2560 - gen_op_cvtsi2ss,  
2561 - gen_op_cvtsi2sd,  
2562 - X86_64_ONLY(gen_op_cvtsq2ss),  
2563 - X86_64_ONLY(gen_op_cvtsq2sd),  
2564 -  
2565 - gen_op_cvttss2si,  
2566 - gen_op_cvttsd2si,  
2567 - X86_64_ONLY(gen_op_cvttss2sq),  
2568 - X86_64_ONLY(gen_op_cvttsd2sq),  
2569 -  
2570 - gen_op_cvtss2si,  
2571 - gen_op_cvtsd2si,  
2572 - X86_64_ONLY(gen_op_cvtss2sq),  
2573 - X86_64_ONLY(gen_op_cvtsd2sq), 2585 +static void *sse_op_table3[4 * 3] = {
  2586 + helper_cvtsi2ss,
  2587 + helper_cvtsi2sd,
  2588 + X86_64_ONLY(helper_cvtsq2ss),
  2589 + X86_64_ONLY(helper_cvtsq2sd),
  2590 +
  2591 + helper_cvttss2si,
  2592 + helper_cvttsd2si,
  2593 + X86_64_ONLY(helper_cvttss2sq),
  2594 + X86_64_ONLY(helper_cvttsd2sq),
  2595 +
  2596 + helper_cvtss2si,
  2597 + helper_cvtsd2si,
  2598 + X86_64_ONLY(helper_cvtss2sq),
  2599 + X86_64_ONLY(helper_cvtsd2sq),
2574 }; 2600 };
2575 2601
2576 -static GenOpFunc2 *sse_op_table4[8][4] = { 2602 +static void *sse_op_table4[8][4] = {
2577 SSE_FOP(cmpeq), 2603 SSE_FOP(cmpeq),
2578 SSE_FOP(cmplt), 2604 SSE_FOP(cmplt),
2579 SSE_FOP(cmple), 2605 SSE_FOP(cmple),
@@ -2584,39 +2610,38 @@ static GenOpFunc2 *sse_op_table4[8][4] = { @@ -2584,39 +2610,38 @@ static GenOpFunc2 *sse_op_table4[8][4] = {
2584 SSE_FOP(cmpord), 2610 SSE_FOP(cmpord),
2585 }; 2611 };
2586 2612
2587 -static GenOpFunc2 *sse_op_table5[256] = {  
2588 - [0x0c] = gen_op_pi2fw,  
2589 - [0x0d] = gen_op_pi2fd,  
2590 - [0x1c] = gen_op_pf2iw,  
2591 - [0x1d] = gen_op_pf2id,  
2592 - [0x8a] = gen_op_pfnacc,  
2593 - [0x8e] = gen_op_pfpnacc,  
2594 - [0x90] = gen_op_pfcmpge,  
2595 - [0x94] = gen_op_pfmin,  
2596 - [0x96] = gen_op_pfrcp,  
2597 - [0x97] = gen_op_pfrsqrt,  
2598 - [0x9a] = gen_op_pfsub,  
2599 - [0x9e] = gen_op_pfadd,  
2600 - [0xa0] = gen_op_pfcmpgt,  
2601 - [0xa4] = gen_op_pfmax,  
2602 - [0xa6] = gen_op_movq, /* pfrcpit1; no need to actually increase precision */  
2603 - [0xa7] = gen_op_movq, /* pfrsqit1 */  
2604 - [0xaa] = gen_op_pfsubr,  
2605 - [0xae] = gen_op_pfacc,  
2606 - [0xb0] = gen_op_pfcmpeq,  
2607 - [0xb4] = gen_op_pfmul,  
2608 - [0xb6] = gen_op_movq, /* pfrcpit2 */  
2609 - [0xb7] = gen_op_pmulhrw_mmx,  
2610 - [0xbb] = gen_op_pswapd,  
2611 - [0xbf] = gen_op_pavgb_mmx /* pavgusb */ 2613 +static void *sse_op_table5[256] = {
  2614 + [0x0c] = helper_pi2fw,
  2615 + [0x0d] = helper_pi2fd,
  2616 + [0x1c] = helper_pf2iw,
  2617 + [0x1d] = helper_pf2id,
  2618 + [0x8a] = helper_pfnacc,
  2619 + [0x8e] = helper_pfpnacc,
  2620 + [0x90] = helper_pfcmpge,
  2621 + [0x94] = helper_pfmin,
  2622 + [0x96] = helper_pfrcp,
  2623 + [0x97] = helper_pfrsqrt,
  2624 + [0x9a] = helper_pfsub,
  2625 + [0x9e] = helper_pfadd,
  2626 + [0xa0] = helper_pfcmpgt,
  2627 + [0xa4] = helper_pfmax,
  2628 + [0xa6] = helper_movq, /* pfrcpit1; no need to actually increase precision */
  2629 + [0xa7] = helper_movq, /* pfrsqit1 */
  2630 + [0xaa] = helper_pfsubr,
  2631 + [0xae] = helper_pfacc,
  2632 + [0xb0] = helper_pfcmpeq,
  2633 + [0xb4] = helper_pfmul,
  2634 + [0xb6] = helper_movq, /* pfrcpit2 */
  2635 + [0xb7] = helper_pmulhrw_mmx,
  2636 + [0xbb] = helper_pswapd,
  2637 + [0xbf] = helper_pavgb_mmx /* pavgusb */
2612 }; 2638 };
2613 2639
2614 static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) 2640 static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
2615 { 2641 {
2616 int b1, op1_offset, op2_offset, is_xmm, val, ot; 2642 int b1, op1_offset, op2_offset, is_xmm, val, ot;
2617 int modrm, mod, rm, reg, reg_addr, offset_addr; 2643 int modrm, mod, rm, reg, reg_addr, offset_addr;
2618 - GenOpFunc2 *sse_op2;  
2619 - GenOpFunc3 *sse_op3; 2644 + void *sse_op2;
2620 2645
2621 b &= 0xff; 2646 b &= 0xff;
2622 if (s->prefix & PREFIX_DATA) 2647 if (s->prefix & PREFIX_DATA)
@@ -2656,18 +2681,18 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) @@ -2656,18 +2681,18 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
2656 if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) 2681 if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW))
2657 goto illegal_op; 2682 goto illegal_op;
2658 /* femms */ 2683 /* femms */
2659 - gen_op_emms(); 2684 + tcg_gen_helper_0_0(helper_emms);
2660 return; 2685 return;
2661 } 2686 }
2662 if (b == 0x77) { 2687 if (b == 0x77) {
2663 /* emms */ 2688 /* emms */
2664 - gen_op_emms(); 2689 + tcg_gen_helper_0_0(helper_emms);
2665 return; 2690 return;
2666 } 2691 }
2667 /* prepare MMX state (XXX: optimize by storing fptt and fptags in 2692 /* prepare MMX state (XXX: optimize by storing fptt and fptags in
2668 the static cpu state) */ 2693 the static cpu state) */
2669 if (!is_xmm) { 2694 if (!is_xmm) {
2670 - gen_op_enter_mmx(); 2695 + tcg_gen_helper_0_0(helper_enter_mmx);
2671 } 2696 }
2672 2697
2673 modrm = ldub_code(s->pc++); 2698 modrm = ldub_code(s->pc++);
@@ -2697,24 +2722,31 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) @@ -2697,24 +2722,31 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
2697 #ifdef TARGET_X86_64 2722 #ifdef TARGET_X86_64
2698 if (s->dflag == 2) { 2723 if (s->dflag == 2) {
2699 gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 0); 2724 gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 0);
2700 - gen_op_movq_mm_T0_mmx(offsetof(CPUX86State,fpregs[reg].mmx)); 2725 + tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,fpregs[reg].mmx));
2701 } else 2726 } else
2702 #endif 2727 #endif
2703 { 2728 {
2704 gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 0); 2729 gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 0);
2705 - gen_op_movl_mm_T0_mmx(offsetof(CPUX86State,fpregs[reg].mmx)); 2730 + tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
  2731 + offsetof(CPUX86State,fpregs[reg].mmx));
  2732 + tcg_gen_helper_0_2(helper_movl_mm_T0_mmx, cpu_ptr0, cpu_T[0]);
2706 } 2733 }
2707 break; 2734 break;
2708 case 0x16e: /* movd xmm, ea */ 2735 case 0x16e: /* movd xmm, ea */
2709 #ifdef TARGET_X86_64 2736 #ifdef TARGET_X86_64
2710 if (s->dflag == 2) { 2737 if (s->dflag == 2) {
2711 gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 0); 2738 gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 0);
2712 - gen_op_movq_mm_T0_xmm(offsetof(CPUX86State,xmm_regs[reg])); 2739 + tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
  2740 + offsetof(CPUX86State,xmm_regs[reg]));
  2741 + tcg_gen_helper_0_2(helper_movq_mm_T0_xmm, cpu_ptr0, cpu_T[0]);
2713 } else 2742 } else
2714 #endif 2743 #endif
2715 { 2744 {
2716 gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 0); 2745 gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 0);
2717 - gen_op_movl_mm_T0_xmm(offsetof(CPUX86State,xmm_regs[reg])); 2746 + tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
  2747 + offsetof(CPUX86State,xmm_regs[reg]));
  2748 + tcg_gen_trunc_tl_i32(cpu_tmp2, cpu_T[0]);
  2749 + tcg_gen_helper_0_2(helper_movl_mm_T0_xmm, cpu_ptr0, cpu_tmp2);
2718 } 2750 }
2719 break; 2751 break;
2720 case 0x6f: /* movq mm, ea */ 2752 case 0x6f: /* movq mm, ea */
@@ -2723,8 +2755,10 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) @@ -2723,8 +2755,10 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
2723 gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,fpregs[reg].mmx)); 2755 gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,fpregs[reg].mmx));
2724 } else { 2756 } else {
2725 rm = (modrm & 7); 2757 rm = (modrm & 7);
2726 - gen_op_movq(offsetof(CPUX86State,fpregs[reg].mmx),  
2727 - offsetof(CPUX86State,fpregs[rm].mmx)); 2758 + tcg_gen_ld_i64(cpu_tmp1, cpu_env,
  2759 + offsetof(CPUX86State,fpregs[rm].mmx));
  2760 + tcg_gen_st_i64(cpu_tmp1, cpu_env,
  2761 + offsetof(CPUX86State,fpregs[reg].mmx));
2728 } 2762 }
2729 break; 2763 break;
2730 case 0x010: /* movups */ 2764 case 0x010: /* movups */
@@ -2841,24 +2875,28 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) @@ -2841,24 +2875,28 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
2841 case 0x7e: /* movd ea, mm */ 2875 case 0x7e: /* movd ea, mm */
2842 #ifdef TARGET_X86_64 2876 #ifdef TARGET_X86_64
2843 if (s->dflag == 2) { 2877 if (s->dflag == 2) {
2844 - gen_op_movq_T0_mm_mmx(offsetof(CPUX86State,fpregs[reg].mmx)); 2878 + tcg_gen_ld_i64(cpu_T[0], cpu_env,
  2879 + offsetof(CPUX86State,fpregs[reg].mmx));
2845 gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 1); 2880 gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 1);
2846 } else 2881 } else
2847 #endif 2882 #endif
2848 { 2883 {
2849 - gen_op_movl_T0_mm_mmx(offsetof(CPUX86State,fpregs[reg].mmx)); 2884 + tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
  2885 + offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0)));
2850 gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 1); 2886 gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 1);
2851 } 2887 }
2852 break; 2888 break;
2853 case 0x17e: /* movd ea, xmm */ 2889 case 0x17e: /* movd ea, xmm */
2854 #ifdef TARGET_X86_64 2890 #ifdef TARGET_X86_64
2855 if (s->dflag == 2) { 2891 if (s->dflag == 2) {
2856 - gen_op_movq_T0_mm_xmm(offsetof(CPUX86State,xmm_regs[reg])); 2892 + tcg_gen_ld_i64(cpu_T[0], cpu_env,
  2893 + offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
2857 gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 1); 2894 gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 1);
2858 } else 2895 } else
2859 #endif 2896 #endif
2860 { 2897 {
2861 - gen_op_movl_T0_mm_xmm(offsetof(CPUX86State,xmm_regs[reg])); 2898 + tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
  2899 + offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
2862 gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 1); 2900 gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 1);
2863 } 2901 }
2864 break; 2902 break;
@@ -2967,21 +3005,29 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) @@ -2967,21 +3005,29 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
2967 rm = (modrm & 7); 3005 rm = (modrm & 7);
2968 op2_offset = offsetof(CPUX86State,fpregs[rm].mmx); 3006 op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
2969 } 3007 }
2970 - sse_op2(op2_offset, op1_offset); 3008 + tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op2_offset);
  3009 + tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op1_offset);
  3010 + tcg_gen_helper_0_2(sse_op2, cpu_ptr0, cpu_ptr1);
2971 break; 3011 break;
2972 case 0x050: /* movmskps */ 3012 case 0x050: /* movmskps */
2973 rm = (modrm & 7) | REX_B(s); 3013 rm = (modrm & 7) | REX_B(s);
2974 - gen_op_movmskps(offsetof(CPUX86State,xmm_regs[rm])); 3014 + tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
  3015 + offsetof(CPUX86State,xmm_regs[rm]));
  3016 + tcg_gen_helper_1_1(helper_movmskps, cpu_tmp2, cpu_ptr0);
  3017 + tcg_gen_extu_i32_i64(cpu_T[0], cpu_tmp2);
2975 gen_op_mov_reg_T0(OT_LONG, reg); 3018 gen_op_mov_reg_T0(OT_LONG, reg);
2976 break; 3019 break;
2977 case 0x150: /* movmskpd */ 3020 case 0x150: /* movmskpd */
2978 rm = (modrm & 7) | REX_B(s); 3021 rm = (modrm & 7) | REX_B(s);
2979 - gen_op_movmskpd(offsetof(CPUX86State,xmm_regs[rm])); 3022 + tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
  3023 + offsetof(CPUX86State,xmm_regs[rm]));
  3024 + tcg_gen_helper_1_1(helper_movmskpd, cpu_tmp2, cpu_ptr0);
  3025 + tcg_gen_extu_i32_i64(cpu_T[0], cpu_tmp2);
2980 gen_op_mov_reg_T0(OT_LONG, reg); 3026 gen_op_mov_reg_T0(OT_LONG, reg);
2981 break; 3027 break;
2982 case 0x02a: /* cvtpi2ps */ 3028 case 0x02a: /* cvtpi2ps */
2983 case 0x12a: /* cvtpi2pd */ 3029 case 0x12a: /* cvtpi2pd */
2984 - gen_op_enter_mmx(); 3030 + tcg_gen_helper_0_0(helper_enter_mmx);
2985 if (mod != 3) { 3031 if (mod != 3) {
2986 gen_lea_modrm(s, modrm, &reg_addr, &offset_addr); 3032 gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
2987 op2_offset = offsetof(CPUX86State,mmx_t0); 3033 op2_offset = offsetof(CPUX86State,mmx_t0);
@@ -2991,13 +3037,15 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) @@ -2991,13 +3037,15 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
2991 op2_offset = offsetof(CPUX86State,fpregs[rm].mmx); 3037 op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
2992 } 3038 }
2993 op1_offset = offsetof(CPUX86State,xmm_regs[reg]); 3039 op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
  3040 + tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
  3041 + tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
2994 switch(b >> 8) { 3042 switch(b >> 8) {
2995 case 0x0: 3043 case 0x0:
2996 - gen_op_cvtpi2ps(op1_offset, op2_offset); 3044 + tcg_gen_helper_0_2(helper_cvtpi2ps, cpu_ptr0, cpu_ptr1);
2997 break; 3045 break;
2998 default: 3046 default:
2999 case 0x1: 3047 case 0x1:
3000 - gen_op_cvtpi2pd(op1_offset, op2_offset); 3048 + tcg_gen_helper_0_2(helper_cvtpi2pd, cpu_ptr0, cpu_ptr1);
3001 break; 3049 break;
3002 } 3050 }
3003 break; 3051 break;
@@ -3006,13 +3054,16 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) @@ -3006,13 +3054,16 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
3006 ot = (s->dflag == 2) ? OT_QUAD : OT_LONG; 3054 ot = (s->dflag == 2) ? OT_QUAD : OT_LONG;
3007 gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0); 3055 gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
3008 op1_offset = offsetof(CPUX86State,xmm_regs[reg]); 3056 op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3009 - sse_op_table3[(s->dflag == 2) * 2 + ((b >> 8) - 2)](op1_offset); 3057 + tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
  3058 + sse_op2 = sse_op_table3[(s->dflag == 2) * 2 + ((b >> 8) - 2)];
  3059 + tcg_gen_trunc_tl_i32(cpu_tmp2, cpu_T[0]);
  3060 + tcg_gen_helper_0_2(sse_op2, cpu_ptr0, cpu_tmp2);
3010 break; 3061 break;
3011 case 0x02c: /* cvttps2pi */ 3062 case 0x02c: /* cvttps2pi */
3012 case 0x12c: /* cvttpd2pi */ 3063 case 0x12c: /* cvttpd2pi */
3013 case 0x02d: /* cvtps2pi */ 3064 case 0x02d: /* cvtps2pi */
3014 case 0x12d: /* cvtpd2pi */ 3065 case 0x12d: /* cvtpd2pi */
3015 - gen_op_enter_mmx(); 3066 + tcg_gen_helper_0_0(helper_enter_mmx);
3016 if (mod != 3) { 3067 if (mod != 3) {
3017 gen_lea_modrm(s, modrm, &reg_addr, &offset_addr); 3068 gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3018 op2_offset = offsetof(CPUX86State,xmm_t0); 3069 op2_offset = offsetof(CPUX86State,xmm_t0);
@@ -3022,18 +3073,20 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) @@ -3022,18 +3073,20 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
3022 op2_offset = offsetof(CPUX86State,xmm_regs[rm]); 3073 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3023 } 3074 }
3024 op1_offset = offsetof(CPUX86State,fpregs[reg & 7].mmx); 3075 op1_offset = offsetof(CPUX86State,fpregs[reg & 7].mmx);
  3076 + tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
  3077 + tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3025 switch(b) { 3078 switch(b) {
3026 case 0x02c: 3079 case 0x02c:
3027 - gen_op_cvttps2pi(op1_offset, op2_offset); 3080 + tcg_gen_helper_0_2(helper_cvttps2pi, cpu_ptr0, cpu_ptr1);
3028 break; 3081 break;
3029 case 0x12c: 3082 case 0x12c:
3030 - gen_op_cvttpd2pi(op1_offset, op2_offset); 3083 + tcg_gen_helper_0_2(helper_cvttpd2pi, cpu_ptr0, cpu_ptr1);
3031 break; 3084 break;
3032 case 0x02d: 3085 case 0x02d:
3033 - gen_op_cvtps2pi(op1_offset, op2_offset); 3086 + tcg_gen_helper_0_2(helper_cvtps2pi, cpu_ptr0, cpu_ptr1);
3034 break; 3087 break;
3035 case 0x12d: 3088 case 0x12d:
3036 - gen_op_cvtpd2pi(op1_offset, op2_offset); 3089 + tcg_gen_helper_0_2(helper_cvtpd2pi, cpu_ptr0, cpu_ptr1);
3037 break; 3090 break;
3038 } 3091 }
3039 break; 3092 break;
@@ -3055,8 +3108,15 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) @@ -3055,8 +3108,15 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
3055 rm = (modrm & 7) | REX_B(s); 3108 rm = (modrm & 7) | REX_B(s);
3056 op2_offset = offsetof(CPUX86State,xmm_regs[rm]); 3109 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3057 } 3110 }
3058 - sse_op_table3[(s->dflag == 2) * 2 + ((b >> 8) - 2) + 4 +  
3059 - (b & 1) * 4](op2_offset); 3111 + sse_op2 = sse_op_table3[(s->dflag == 2) * 2 + ((b >> 8) - 2) + 4 +
  3112 + (b & 1) * 4];
  3113 + tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op2_offset);
  3114 + if (ot == OT_LONG) {
  3115 + tcg_gen_helper_1_1(sse_op2, cpu_tmp2, cpu_ptr0);
  3116 + tcg_gen_extu_i32_i64(cpu_T[0], cpu_tmp2);
  3117 + } else {
  3118 + tcg_gen_helper_1_1(sse_op2, cpu_T[0], cpu_ptr0);
  3119 + }
3060 gen_op_mov_reg_T0(ot, reg); 3120 gen_op_mov_reg_T0(ot, reg);
3061 break; 3121 break;
3062 case 0xc4: /* pinsrw */ 3122 case 0xc4: /* pinsrw */
@@ -3066,10 +3126,12 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) @@ -3066,10 +3126,12 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
3066 val = ldub_code(s->pc++); 3126 val = ldub_code(s->pc++);
3067 if (b1) { 3127 if (b1) {
3068 val &= 7; 3128 val &= 7;
3069 - gen_op_pinsrw_xmm(offsetof(CPUX86State,xmm_regs[reg]), val); 3129 + tcg_gen_st16_tl(cpu_T[0], cpu_env,
  3130 + offsetof(CPUX86State,xmm_regs[reg].XMM_W(val)));
3070 } else { 3131 } else {
3071 val &= 3; 3132 val &= 3;
3072 - gen_op_pinsrw_mmx(offsetof(CPUX86State,fpregs[reg].mmx), val); 3133 + tcg_gen_st16_tl(cpu_T[0], cpu_env,
  3134 + offsetof(CPUX86State,fpregs[reg].mmx.MMX_W(val)));
3073 } 3135 }
3074 break; 3136 break;
3075 case 0xc5: /* pextrw */ 3137 case 0xc5: /* pextrw */
@@ -3080,11 +3142,13 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) @@ -3080,11 +3142,13 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
3080 if (b1) { 3142 if (b1) {
3081 val &= 7; 3143 val &= 7;
3082 rm = (modrm & 7) | REX_B(s); 3144 rm = (modrm & 7) | REX_B(s);
3083 - gen_op_pextrw_xmm(offsetof(CPUX86State,xmm_regs[rm]), val); 3145 + tcg_gen_ld16u_tl(cpu_T[0], cpu_env,
  3146 + offsetof(CPUX86State,xmm_regs[rm].XMM_W(val)));
3084 } else { 3147 } else {
3085 val &= 3; 3148 val &= 3;
3086 rm = (modrm & 7); 3149 rm = (modrm & 7);
3087 - gen_op_pextrw_mmx(offsetof(CPUX86State,fpregs[rm].mmx), val); 3150 + tcg_gen_ld16u_tl(cpu_T[0], cpu_env,
  3151 + offsetof(CPUX86State,fpregs[rm].mmx.MMX_W(val)));
3088 } 3152 }
3089 reg = ((modrm >> 3) & 7) | rex_r; 3153 reg = ((modrm >> 3) & 7) | rex_r;
3090 gen_op_mov_reg_T0(OT_LONG, reg); 3154 gen_op_mov_reg_T0(OT_LONG, reg);
@@ -3101,14 +3165,14 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) @@ -3101,14 +3165,14 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
3101 } 3165 }
3102 break; 3166 break;
3103 case 0x2d6: /* movq2dq */ 3167 case 0x2d6: /* movq2dq */
3104 - gen_op_enter_mmx(); 3168 + tcg_gen_helper_0_0(helper_enter_mmx);
3105 rm = (modrm & 7); 3169 rm = (modrm & 7);
3106 gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)), 3170 gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
3107 offsetof(CPUX86State,fpregs[rm].mmx)); 3171 offsetof(CPUX86State,fpregs[rm].mmx));
3108 gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1))); 3172 gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)));
3109 break; 3173 break;
3110 case 0x3d6: /* movdq2q */ 3174 case 0x3d6: /* movdq2q */
3111 - gen_op_enter_mmx(); 3175 + tcg_gen_helper_0_0(helper_enter_mmx);
3112 rm = (modrm & 7) | REX_B(s); 3176 rm = (modrm & 7) | REX_B(s);
3113 gen_op_movq(offsetof(CPUX86State,fpregs[reg & 7].mmx), 3177 gen_op_movq(offsetof(CPUX86State,fpregs[reg & 7].mmx),
3114 offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0))); 3178 offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
@@ -3119,11 +3183,14 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) @@ -3119,11 +3183,14 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
3119 goto illegal_op; 3183 goto illegal_op;
3120 if (b1) { 3184 if (b1) {
3121 rm = (modrm & 7) | REX_B(s); 3185 rm = (modrm & 7) | REX_B(s);
3122 - gen_op_pmovmskb_xmm(offsetof(CPUX86State,xmm_regs[rm])); 3186 + tcg_gen_addi_ptr(cpu_ptr0, cpu_env, offsetof(CPUX86State,xmm_regs[rm]));
  3187 + tcg_gen_helper_1_1(helper_pmovmskb_xmm, cpu_tmp2, cpu_ptr0);
3123 } else { 3188 } else {
3124 rm = (modrm & 7); 3189 rm = (modrm & 7);
3125 - gen_op_pmovmskb_mmx(offsetof(CPUX86State,fpregs[rm].mmx)); 3190 + tcg_gen_addi_ptr(cpu_ptr0, cpu_env, offsetof(CPUX86State,fpregs[rm].mmx));
  3191 + tcg_gen_helper_1_1(helper_pmovmskb_mmx, cpu_tmp2, cpu_ptr0);
3126 } 3192 }
  3193 + tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2);
3127 reg = ((modrm >> 3) & 7) | rex_r; 3194 reg = ((modrm >> 3) & 7) | rex_r;
3128 gen_op_mov_reg_T0(OT_LONG, reg); 3195 gen_op_mov_reg_T0(OT_LONG, reg);
3129 break; 3196 break;
@@ -3199,13 +3266,16 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) @@ -3199,13 +3266,16 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
3199 sse_op2 = sse_op_table5[val]; 3266 sse_op2 = sse_op_table5[val];
3200 if (!sse_op2) 3267 if (!sse_op2)
3201 goto illegal_op; 3268 goto illegal_op;
3202 - sse_op2(op1_offset, op2_offset); 3269 + tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
  3270 + tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
  3271 + tcg_gen_helper_0_2(sse_op2, cpu_ptr0, cpu_ptr1);
3203 break; 3272 break;
3204 case 0x70: /* pshufx insn */ 3273 case 0x70: /* pshufx insn */
3205 case 0xc6: /* pshufx insn */ 3274 case 0xc6: /* pshufx insn */
3206 val = ldub_code(s->pc++); 3275 val = ldub_code(s->pc++);
3207 - sse_op3 = (GenOpFunc3 *)sse_op2;  
3208 - sse_op3(op1_offset, op2_offset, val); 3276 + tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
  3277 + tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
  3278 + tcg_gen_helper_0_3(sse_op2, cpu_ptr0, cpu_ptr1, tcg_const_i32(val));
3209 break; 3279 break;
3210 case 0xc2: 3280 case 0xc2:
3211 /* compare insns */ 3281 /* compare insns */
@@ -3213,13 +3283,19 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) @@ -3213,13 +3283,19 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
3213 if (val >= 8) 3283 if (val >= 8)
3214 goto illegal_op; 3284 goto illegal_op;
3215 sse_op2 = sse_op_table4[val][b1]; 3285 sse_op2 = sse_op_table4[val][b1];
3216 - sse_op2(op1_offset, op2_offset); 3286 + tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
  3287 + tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
  3288 + tcg_gen_helper_0_2(sse_op2, cpu_ptr0, cpu_ptr1);
3217 break; 3289 break;
3218 default: 3290 default:
3219 - sse_op2(op1_offset, op2_offset); 3291 + tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
  3292 + tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
  3293 + tcg_gen_helper_0_2(sse_op2, cpu_ptr0, cpu_ptr1);
3220 break; 3294 break;
3221 } 3295 }
3222 if (b == 0x2e || b == 0x2f) { 3296 if (b == 0x2e || b == 0x2f) {
  3297 + /* just to keep the EFLAGS optimization correct */
  3298 + gen_op_com_dummy();
3223 s->cc_op = CC_OP_EFLAGS; 3299 s->cc_op = CC_OP_EFLAGS;
3224 } 3300 }
3225 } 3301 }
@@ -6485,10 +6561,10 @@ static uint16_t opc_write_flags[NB_OPS] = { @@ -6485,10 +6561,10 @@ static uint16_t opc_write_flags[NB_OPS] = {
6485 X86_64_DEF([INDEX_op_imulq_T0_T1] = CC_OSZAPC,) 6561 X86_64_DEF([INDEX_op_imulq_T0_T1] = CC_OSZAPC,)
6486 6562
6487 /* sse */ 6563 /* sse */
6488 - [INDEX_op_ucomiss] = CC_OSZAPC,  
6489 - [INDEX_op_ucomisd] = CC_OSZAPC,  
6490 - [INDEX_op_comiss] = CC_OSZAPC,  
6491 - [INDEX_op_comisd] = CC_OSZAPC, 6564 + [INDEX_op_com_dummy] = CC_OSZAPC,
  6565 + [INDEX_op_com_dummy] = CC_OSZAPC,
  6566 + [INDEX_op_com_dummy] = CC_OSZAPC,
  6567 + [INDEX_op_com_dummy] = CC_OSZAPC,
6492 6568
6493 /* bcd */ 6569 /* bcd */
6494 [INDEX_op_aam] = CC_OSZAPC, 6570 [INDEX_op_aam] = CC_OSZAPC,
@@ -6792,6 +6868,9 @@ static inline int gen_intermediate_code_internal(CPUState *env, @@ -6792,6 +6868,9 @@ static inline int gen_intermediate_code_internal(CPUState *env,
6792 #if TARGET_LONG_BITS > HOST_LONG_BITS 6868 #if TARGET_LONG_BITS > HOST_LONG_BITS
6793 cpu_tmp1 = tcg_temp_new(TCG_TYPE_I64); 6869 cpu_tmp1 = tcg_temp_new(TCG_TYPE_I64);
6794 #endif 6870 #endif
  6871 + cpu_tmp2 = tcg_temp_new(TCG_TYPE_I32);
  6872 + cpu_ptr0 = tcg_temp_new(TCG_TYPE_PTR);
  6873 + cpu_ptr1 = tcg_temp_new(TCG_TYPE_PTR);
6795 6874
6796 gen_opc_end = gen_opc_buf + OPC_MAX_SIZE; 6875 gen_opc_end = gen_opc_buf + OPC_MAX_SIZE;
6797 6876