Commit ad69471ce5e1284e1cacd053bb0fe8d6175a2f9e
1 parent
8f8e3aa4
ARM TCG conversion 14/16.
git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@4151 c046a42c-6fe2-441c-8c8c-71466251a162
Showing
8 changed files
with
2597 additions
and
2538 deletions
Makefile.target
| @@ -211,7 +211,7 @@ LIBOBJS+= op_helper.o helper.o | @@ -211,7 +211,7 @@ LIBOBJS+= op_helper.o helper.o | ||
| 211 | endif | 211 | endif |
| 212 | 212 | ||
| 213 | ifeq ($(TARGET_BASE_ARCH), arm) | 213 | ifeq ($(TARGET_BASE_ARCH), arm) |
| 214 | -LIBOBJS+= op_helper.o helper.o | 214 | +LIBOBJS+= op_helper.o helper.o neon_helper.o |
| 215 | endif | 215 | endif |
| 216 | 216 | ||
| 217 | ifeq ($(TARGET_BASE_ARCH), sh4) | 217 | ifeq ($(TARGET_BASE_ARCH), sh4) |
target-arm/helper.c
| @@ -256,30 +256,6 @@ void cpu_arm_close(CPUARMState *env) | @@ -256,30 +256,6 @@ void cpu_arm_close(CPUARMState *env) | ||
| 256 | free(env); | 256 | free(env); |
| 257 | } | 257 | } |
| 258 | 258 | ||
| 259 | -/* Polynomial multiplication is like integer multiplcation except the | ||
| 260 | - partial products are XORed, not added. */ | ||
| 261 | -uint32_t helper_neon_mul_p8(uint32_t op1, uint32_t op2) | ||
| 262 | -{ | ||
| 263 | - uint32_t mask; | ||
| 264 | - uint32_t result; | ||
| 265 | - result = 0; | ||
| 266 | - while (op1) { | ||
| 267 | - mask = 0; | ||
| 268 | - if (op1 & 1) | ||
| 269 | - mask |= 0xff; | ||
| 270 | - if (op1 & (1 << 8)) | ||
| 271 | - mask |= (0xff << 8); | ||
| 272 | - if (op1 & (1 << 16)) | ||
| 273 | - mask |= (0xff << 16); | ||
| 274 | - if (op1 & (1 << 24)) | ||
| 275 | - mask |= (0xff << 24); | ||
| 276 | - result ^= op2 & mask; | ||
| 277 | - op1 = (op1 >> 1) & 0x7f7f7f7f; | ||
| 278 | - op2 = (op2 << 1) & 0xfefefefe; | ||
| 279 | - } | ||
| 280 | - return result; | ||
| 281 | -} | ||
| 282 | - | ||
| 283 | uint32_t cpsr_read(CPUARMState *env) | 259 | uint32_t cpsr_read(CPUARMState *env) |
| 284 | { | 260 | { |
| 285 | int ZF; | 261 | int ZF; |
| @@ -376,6 +352,11 @@ uint32_t HELPER(rbit)(uint32_t x) | @@ -376,6 +352,11 @@ uint32_t HELPER(rbit)(uint32_t x) | ||
| 376 | return x; | 352 | return x; |
| 377 | } | 353 | } |
| 378 | 354 | ||
| 355 | +uint32_t HELPER(abs)(uint32_t x) | ||
| 356 | +{ | ||
| 357 | + return ((int32_t)x < 0) ? -x : x; | ||
| 358 | +} | ||
| 359 | + | ||
| 379 | #if defined(CONFIG_USER_ONLY) | 360 | #if defined(CONFIG_USER_ONLY) |
| 380 | 361 | ||
| 381 | void do_interrupt (CPUState *env) | 362 | void do_interrupt (CPUState *env) |
target-arm/helpers.h
| @@ -84,6 +84,7 @@ DEF_HELPER_1_1(double_saturate, uint32_t, (int32_t)) | @@ -84,6 +84,7 @@ DEF_HELPER_1_1(double_saturate, uint32_t, (int32_t)) | ||
| 84 | DEF_HELPER_1_2(sdiv, int32_t, (int32_t, int32_t)) | 84 | DEF_HELPER_1_2(sdiv, int32_t, (int32_t, int32_t)) |
| 85 | DEF_HELPER_1_2(udiv, uint32_t, (uint32_t, uint32_t)) | 85 | DEF_HELPER_1_2(udiv, uint32_t, (uint32_t, uint32_t)) |
| 86 | DEF_HELPER_1_1(rbit, uint32_t, (uint32_t)) | 86 | DEF_HELPER_1_1(rbit, uint32_t, (uint32_t)) |
| 87 | +DEF_HELPER_1_1(abs, uint32_t, (uint32_t)) | ||
| 87 | 88 | ||
| 88 | #define PAS_OP(pfx) \ | 89 | #define PAS_OP(pfx) \ |
| 89 | DEF_HELPER_1_3(pfx ## add8, uint32_t, (uint32_t, uint32_t, uint32_t *)) \ | 90 | DEF_HELPER_1_3(pfx ## add8, uint32_t, (uint32_t, uint32_t, uint32_t *)) \ |
| @@ -208,6 +209,10 @@ DEF_HELPER_1_2(rsqrte_f32, float32, (float32, CPUState *)) | @@ -208,6 +209,10 @@ DEF_HELPER_1_2(rsqrte_f32, float32, (float32, CPUState *)) | ||
| 208 | DEF_HELPER_1_2(recpe_u32, uint32_t, (uint32_t, CPUState *)) | 209 | DEF_HELPER_1_2(recpe_u32, uint32_t, (uint32_t, CPUState *)) |
| 209 | DEF_HELPER_1_2(rsqrte_u32, uint32_t, (uint32_t, CPUState *)) | 210 | DEF_HELPER_1_2(rsqrte_u32, uint32_t, (uint32_t, CPUState *)) |
| 210 | DEF_HELPER_1_4(neon_tbl, uint32_t, (uint32_t, uint32_t, uint32_t, uint32_t)) | 211 | DEF_HELPER_1_4(neon_tbl, uint32_t, (uint32_t, uint32_t, uint32_t, uint32_t)) |
| 212 | +DEF_HELPER_1_2(neon_add_saturate_u64, uint64_t, (uint64_t, uint64_t)) | ||
| 213 | +DEF_HELPER_1_2(neon_add_saturate_s64, uint64_t, (uint64_t, uint64_t)) | ||
| 214 | +DEF_HELPER_1_2(neon_sub_saturate_u64, uint64_t, (uint64_t, uint64_t)) | ||
| 215 | +DEF_HELPER_1_2(neon_sub_saturate_s64, uint64_t, (uint64_t, uint64_t)) | ||
| 211 | 216 | ||
| 212 | DEF_HELPER_1_2(add_cc, uint32_t, (uint32_t, uint32_t)) | 217 | DEF_HELPER_1_2(add_cc, uint32_t, (uint32_t, uint32_t)) |
| 213 | DEF_HELPER_1_2(adc_cc, uint32_t, (uint32_t, uint32_t)) | 218 | DEF_HELPER_1_2(adc_cc, uint32_t, (uint32_t, uint32_t)) |
| @@ -223,6 +228,209 @@ DEF_HELPER_1_2(shr_cc, uint32_t, (uint32_t, uint32_t)) | @@ -223,6 +228,209 @@ DEF_HELPER_1_2(shr_cc, uint32_t, (uint32_t, uint32_t)) | ||
| 223 | DEF_HELPER_1_2(sar_cc, uint32_t, (uint32_t, uint32_t)) | 228 | DEF_HELPER_1_2(sar_cc, uint32_t, (uint32_t, uint32_t)) |
| 224 | DEF_HELPER_1_2(ror_cc, uint32_t, (uint32_t, uint32_t)) | 229 | DEF_HELPER_1_2(ror_cc, uint32_t, (uint32_t, uint32_t)) |
| 225 | 230 | ||
| 231 | +/* neon_helper.c */ | ||
| 232 | +DEF_HELPER_1_3(neon_qadd_u8, uint32_t, (CPUState *, uint32_t, uint32_t)) | ||
| 233 | +DEF_HELPER_1_3(neon_qadd_s8, uint32_t, (CPUState *, uint32_t, uint32_t)) | ||
| 234 | +DEF_HELPER_1_3(neon_qadd_u16, uint32_t, (CPUState *, uint32_t, uint32_t)) | ||
| 235 | +DEF_HELPER_1_3(neon_qadd_s16, uint32_t, (CPUState *, uint32_t, uint32_t)) | ||
| 236 | +DEF_HELPER_1_3(neon_qsub_u8, uint32_t, (CPUState *, uint32_t, uint32_t)) | ||
| 237 | +DEF_HELPER_1_3(neon_qsub_s8, uint32_t, (CPUState *, uint32_t, uint32_t)) | ||
| 238 | +DEF_HELPER_1_3(neon_qsub_u16, uint32_t, (CPUState *, uint32_t, uint32_t)) | ||
| 239 | +DEF_HELPER_1_3(neon_qsub_s16, uint32_t, (CPUState *, uint32_t, uint32_t)) | ||
| 240 | + | ||
| 241 | +DEF_HELPER_1_2(neon_hadd_s8, uint32_t, (uint32_t, uint32_t)) | ||
| 242 | +DEF_HELPER_1_2(neon_hadd_u8, uint32_t, (uint32_t, uint32_t)) | ||
| 243 | +DEF_HELPER_1_2(neon_hadd_s16, uint32_t, (uint32_t, uint32_t)) | ||
| 244 | +DEF_HELPER_1_2(neon_hadd_u16, uint32_t, (uint32_t, uint32_t)) | ||
| 245 | +DEF_HELPER_1_2(neon_hadd_s32, int32_t, (int32_t, int32_t)) | ||
| 246 | +DEF_HELPER_1_2(neon_hadd_u32, uint32_t, (uint32_t, uint32_t)) | ||
| 247 | +DEF_HELPER_1_2(neon_rhadd_s8, uint32_t, (uint32_t, uint32_t)) | ||
| 248 | +DEF_HELPER_1_2(neon_rhadd_u8, uint32_t, (uint32_t, uint32_t)) | ||
| 249 | +DEF_HELPER_1_2(neon_rhadd_s16, uint32_t, (uint32_t, uint32_t)) | ||
| 250 | +DEF_HELPER_1_2(neon_rhadd_u16, uint32_t, (uint32_t, uint32_t)) | ||
| 251 | +DEF_HELPER_1_2(neon_rhadd_s32, int32_t, (int32_t, int32_t)) | ||
| 252 | +DEF_HELPER_1_2(neon_rhadd_u32, uint32_t, (uint32_t, uint32_t)) | ||
| 253 | +DEF_HELPER_1_2(neon_hsub_s8, uint32_t, (uint32_t, uint32_t)) | ||
| 254 | +DEF_HELPER_1_2(neon_hsub_u8, uint32_t, (uint32_t, uint32_t)) | ||
| 255 | +DEF_HELPER_1_2(neon_hsub_s16, uint32_t, (uint32_t, uint32_t)) | ||
| 256 | +DEF_HELPER_1_2(neon_hsub_u16, uint32_t, (uint32_t, uint32_t)) | ||
| 257 | +DEF_HELPER_1_2(neon_hsub_s32, int32_t, (int32_t, int32_t)) | ||
| 258 | +DEF_HELPER_1_2(neon_hsub_u32, uint32_t, (uint32_t, uint32_t)) | ||
| 259 | + | ||
| 260 | +DEF_HELPER_1_2(neon_cgt_u8, uint32_t, (uint32_t, uint32_t)) | ||
| 261 | +DEF_HELPER_1_2(neon_cgt_s8, uint32_t, (uint32_t, uint32_t)) | ||
| 262 | +DEF_HELPER_1_2(neon_cgt_u16, uint32_t, (uint32_t, uint32_t)) | ||
| 263 | +DEF_HELPER_1_2(neon_cgt_s16, uint32_t, (uint32_t, uint32_t)) | ||
| 264 | +DEF_HELPER_1_2(neon_cgt_u32, uint32_t, (uint32_t, uint32_t)) | ||
| 265 | +DEF_HELPER_1_2(neon_cgt_s32, uint32_t, (uint32_t, uint32_t)) | ||
| 266 | +DEF_HELPER_1_2(neon_cge_u8, uint32_t, (uint32_t, uint32_t)) | ||
| 267 | +DEF_HELPER_1_2(neon_cge_s8, uint32_t, (uint32_t, uint32_t)) | ||
| 268 | +DEF_HELPER_1_2(neon_cge_u16, uint32_t, (uint32_t, uint32_t)) | ||
| 269 | +DEF_HELPER_1_2(neon_cge_s16, uint32_t, (uint32_t, uint32_t)) | ||
| 270 | +DEF_HELPER_1_2(neon_cge_u32, uint32_t, (uint32_t, uint32_t)) | ||
| 271 | +DEF_HELPER_1_2(neon_cge_s32, uint32_t, (uint32_t, uint32_t)) | ||
| 272 | + | ||
| 273 | +DEF_HELPER_1_2(neon_min_u8, uint32_t, (uint32_t, uint32_t)) | ||
| 274 | +DEF_HELPER_1_2(neon_min_s8, uint32_t, (uint32_t, uint32_t)) | ||
| 275 | +DEF_HELPER_1_2(neon_min_u16, uint32_t, (uint32_t, uint32_t)) | ||
| 276 | +DEF_HELPER_1_2(neon_min_s16, uint32_t, (uint32_t, uint32_t)) | ||
| 277 | +DEF_HELPER_1_2(neon_min_u32, uint32_t, (uint32_t, uint32_t)) | ||
| 278 | +DEF_HELPER_1_2(neon_min_s32, uint32_t, (uint32_t, uint32_t)) | ||
| 279 | +DEF_HELPER_1_2(neon_max_u8, uint32_t, (uint32_t, uint32_t)) | ||
| 280 | +DEF_HELPER_1_2(neon_max_s8, uint32_t, (uint32_t, uint32_t)) | ||
| 281 | +DEF_HELPER_1_2(neon_max_u16, uint32_t, (uint32_t, uint32_t)) | ||
| 282 | +DEF_HELPER_1_2(neon_max_s16, uint32_t, (uint32_t, uint32_t)) | ||
| 283 | +DEF_HELPER_1_2(neon_max_u32, uint32_t, (uint32_t, uint32_t)) | ||
| 284 | +DEF_HELPER_1_2(neon_max_s32, uint32_t, (uint32_t, uint32_t)) | ||
| 285 | +DEF_HELPER_1_2(neon_pmin_u8, uint32_t, (uint32_t, uint32_t)) | ||
| 286 | +DEF_HELPER_1_2(neon_pmin_s8, uint32_t, (uint32_t, uint32_t)) | ||
| 287 | +DEF_HELPER_1_2(neon_pmin_u16, uint32_t, (uint32_t, uint32_t)) | ||
| 288 | +DEF_HELPER_1_2(neon_pmin_s16, uint32_t, (uint32_t, uint32_t)) | ||
| 289 | +DEF_HELPER_1_2(neon_pmin_u32, uint32_t, (uint32_t, uint32_t)) | ||
| 290 | +DEF_HELPER_1_2(neon_pmin_s32, uint32_t, (uint32_t, uint32_t)) | ||
| 291 | +DEF_HELPER_1_2(neon_pmax_u8, uint32_t, (uint32_t, uint32_t)) | ||
| 292 | +DEF_HELPER_1_2(neon_pmax_s8, uint32_t, (uint32_t, uint32_t)) | ||
| 293 | +DEF_HELPER_1_2(neon_pmax_u16, uint32_t, (uint32_t, uint32_t)) | ||
| 294 | +DEF_HELPER_1_2(neon_pmax_s16, uint32_t, (uint32_t, uint32_t)) | ||
| 295 | +DEF_HELPER_1_2(neon_pmax_u32, uint32_t, (uint32_t, uint32_t)) | ||
| 296 | +DEF_HELPER_1_2(neon_pmax_s32, uint32_t, (uint32_t, uint32_t)) | ||
| 297 | + | ||
| 298 | +DEF_HELPER_1_2(neon_abd_u8, uint32_t, (uint32_t, uint32_t)) | ||
| 299 | +DEF_HELPER_1_2(neon_abd_s8, uint32_t, (uint32_t, uint32_t)) | ||
| 300 | +DEF_HELPER_1_2(neon_abd_u16, uint32_t, (uint32_t, uint32_t)) | ||
| 301 | +DEF_HELPER_1_2(neon_abd_s16, uint32_t, (uint32_t, uint32_t)) | ||
| 302 | +DEF_HELPER_1_2(neon_abd_u32, uint32_t, (uint32_t, uint32_t)) | ||
| 303 | +DEF_HELPER_1_2(neon_abd_s32, uint32_t, (uint32_t, uint32_t)) | ||
| 304 | + | ||
| 305 | +DEF_HELPER_1_2(neon_shl_u8, uint32_t, (uint32_t, uint32_t)) | ||
| 306 | +DEF_HELPER_1_2(neon_shl_s8, uint32_t, (uint32_t, uint32_t)) | ||
| 307 | +DEF_HELPER_1_2(neon_shl_u16, uint32_t, (uint32_t, uint32_t)) | ||
| 308 | +DEF_HELPER_1_2(neon_shl_s16, uint32_t, (uint32_t, uint32_t)) | ||
| 309 | +DEF_HELPER_1_2(neon_shl_u32, uint32_t, (uint32_t, uint32_t)) | ||
| 310 | +DEF_HELPER_1_2(neon_shl_s32, uint32_t, (uint32_t, uint32_t)) | ||
| 311 | +DEF_HELPER_1_2(neon_shl_u64, uint64_t, (uint64_t, uint64_t)) | ||
| 312 | +DEF_HELPER_1_2(neon_shl_s64, uint64_t, (uint64_t, uint64_t)) | ||
| 313 | +DEF_HELPER_1_2(neon_rshl_u8, uint32_t, (uint32_t, uint32_t)) | ||
| 314 | +DEF_HELPER_1_2(neon_rshl_s8, uint32_t, (uint32_t, uint32_t)) | ||
| 315 | +DEF_HELPER_1_2(neon_rshl_u16, uint32_t, (uint32_t, uint32_t)) | ||
| 316 | +DEF_HELPER_1_2(neon_rshl_s16, uint32_t, (uint32_t, uint32_t)) | ||
| 317 | +DEF_HELPER_1_2(neon_rshl_u32, uint32_t, (uint32_t, uint32_t)) | ||
| 318 | +DEF_HELPER_1_2(neon_rshl_s32, uint32_t, (uint32_t, uint32_t)) | ||
| 319 | +DEF_HELPER_1_2(neon_rshl_u64, uint64_t, (uint64_t, uint64_t)) | ||
| 320 | +DEF_HELPER_1_2(neon_rshl_s64, uint64_t, (uint64_t, uint64_t)) | ||
| 321 | +DEF_HELPER_1_3(neon_qshl_u8, uint32_t, (CPUState *, uint32_t, uint32_t)) | ||
| 322 | +DEF_HELPER_1_3(neon_qshl_s8, uint32_t, (CPUState *, uint32_t, uint32_t)) | ||
| 323 | +DEF_HELPER_1_3(neon_qshl_u16, uint32_t, (CPUState *, uint32_t, uint32_t)) | ||
| 324 | +DEF_HELPER_1_3(neon_qshl_s16, uint32_t, (CPUState *, uint32_t, uint32_t)) | ||
| 325 | +DEF_HELPER_1_3(neon_qshl_u32, uint32_t, (CPUState *, uint32_t, uint32_t)) | ||
| 326 | +DEF_HELPER_1_3(neon_qshl_s32, uint32_t, (CPUState *, uint32_t, uint32_t)) | ||
| 327 | +DEF_HELPER_1_3(neon_qshl_u64, uint64_t, (CPUState *, uint64_t, uint64_t)) | ||
| 328 | +DEF_HELPER_1_3(neon_qshl_s64, uint64_t, (CPUState *, uint64_t, uint64_t)) | ||
| 329 | +DEF_HELPER_1_3(neon_qrshl_u8, uint32_t, (CPUState *, uint32_t, uint32_t)) | ||
| 330 | +DEF_HELPER_1_3(neon_qrshl_s8, uint32_t, (CPUState *, uint32_t, uint32_t)) | ||
| 331 | +DEF_HELPER_1_3(neon_qrshl_u16, uint32_t, (CPUState *, uint32_t, uint32_t)) | ||
| 332 | +DEF_HELPER_1_3(neon_qrshl_s16, uint32_t, (CPUState *, uint32_t, uint32_t)) | ||
| 333 | +DEF_HELPER_1_3(neon_qrshl_u32, uint32_t, (CPUState *, uint32_t, uint32_t)) | ||
| 334 | +DEF_HELPER_1_3(neon_qrshl_s32, uint32_t, (CPUState *, uint32_t, uint32_t)) | ||
| 335 | +DEF_HELPER_1_3(neon_qrshl_u64, uint64_t, (CPUState *, uint64_t, uint64_t)) | ||
| 336 | +DEF_HELPER_1_3(neon_qrshl_s64, uint64_t, (CPUState *, uint64_t, uint64_t)) | ||
| 337 | + | ||
| 338 | +DEF_HELPER_1_2(neon_add_u8, uint32_t, (uint32_t, uint32_t)) | ||
| 339 | +DEF_HELPER_1_2(neon_add_u16, uint32_t, (uint32_t, uint32_t)) | ||
| 340 | +DEF_HELPER_1_2(neon_padd_u8, uint32_t, (uint32_t, uint32_t)) | ||
| 341 | +DEF_HELPER_1_2(neon_padd_u16, uint32_t, (uint32_t, uint32_t)) | ||
| 342 | +DEF_HELPER_1_2(neon_sub_u8, uint32_t, (uint32_t, uint32_t)) | ||
| 343 | +DEF_HELPER_1_2(neon_sub_u16, uint32_t, (uint32_t, uint32_t)) | ||
| 344 | +DEF_HELPER_1_2(neon_mul_u8, uint32_t, (uint32_t, uint32_t)) | ||
| 345 | +DEF_HELPER_1_2(neon_mul_u16, uint32_t, (uint32_t, uint32_t)) | ||
| 346 | +DEF_HELPER_1_2(neon_mul_p8, uint32_t, (uint32_t, uint32_t)) | ||
| 347 | + | ||
| 348 | +DEF_HELPER_1_2(neon_tst_u8, uint32_t, (uint32_t, uint32_t)) | ||
| 349 | +DEF_HELPER_1_2(neon_tst_u16, uint32_t, (uint32_t, uint32_t)) | ||
| 350 | +DEF_HELPER_1_2(neon_tst_u32, uint32_t, (uint32_t, uint32_t)) | ||
| 351 | +DEF_HELPER_1_2(neon_ceq_u8, uint32_t, (uint32_t, uint32_t)) | ||
| 352 | +DEF_HELPER_1_2(neon_ceq_u16, uint32_t, (uint32_t, uint32_t)) | ||
| 353 | +DEF_HELPER_1_2(neon_ceq_u32, uint32_t, (uint32_t, uint32_t)) | ||
| 354 | + | ||
| 355 | +DEF_HELPER_1_1(neon_abs_s8, uint32_t, (uint32_t)) | ||
| 356 | +DEF_HELPER_1_1(neon_abs_s16, uint32_t, (uint32_t)) | ||
| 357 | +DEF_HELPER_1_1(neon_clz_u8, uint32_t, (uint32_t)) | ||
| 358 | +DEF_HELPER_1_1(neon_clz_u16, uint32_t, (uint32_t)) | ||
| 359 | +DEF_HELPER_1_1(neon_cls_s8, uint32_t, (uint32_t)) | ||
| 360 | +DEF_HELPER_1_1(neon_cls_s16, uint32_t, (uint32_t)) | ||
| 361 | +DEF_HELPER_1_1(neon_cls_s32, uint32_t, (uint32_t)) | ||
| 362 | +DEF_HELPER_1_1(neon_cnt_u8, uint32_t, (uint32_t)) | ||
| 363 | + | ||
| 364 | +DEF_HELPER_1_3(neon_qdmulh_s16, uint32_t, (CPUState *, uint32_t, uint32_t)) | ||
| 365 | +DEF_HELPER_1_3(neon_qrdmulh_s16, uint32_t, (CPUState *, uint32_t, uint32_t)) | ||
| 366 | +DEF_HELPER_1_3(neon_qdmulh_s32, uint32_t, (CPUState *, uint32_t, uint32_t)) | ||
| 367 | +DEF_HELPER_1_3(neon_qrdmulh_s32, uint32_t, (CPUState *, uint32_t, uint32_t)) | ||
| 368 | + | ||
| 369 | +DEF_HELPER_1_1(neon_narrow_u8, uint32_t, (uint64_t)) | ||
| 370 | +DEF_HELPER_1_1(neon_narrow_u16, uint32_t, (uint64_t)) | ||
| 371 | +DEF_HELPER_1_2(neon_narrow_sat_u8, uint32_t, (CPUState *, uint64_t)) | ||
| 372 | +DEF_HELPER_1_2(neon_narrow_sat_s8, uint32_t, (CPUState *, uint64_t)) | ||
| 373 | +DEF_HELPER_1_2(neon_narrow_sat_u16, uint32_t, (CPUState *, uint64_t)) | ||
| 374 | +DEF_HELPER_1_2(neon_narrow_sat_s16, uint32_t, (CPUState *, uint64_t)) | ||
| 375 | +DEF_HELPER_1_2(neon_narrow_sat_u32, uint32_t, (CPUState *, uint64_t)) | ||
| 376 | +DEF_HELPER_1_2(neon_narrow_sat_s32, uint32_t, (CPUState *, uint64_t)) | ||
| 377 | +DEF_HELPER_1_1(neon_narrow_high_u8, uint32_t, (uint64_t)) | ||
| 378 | +DEF_HELPER_1_1(neon_narrow_high_u16, uint32_t, (uint64_t)) | ||
| 379 | +DEF_HELPER_1_1(neon_narrow_round_high_u8, uint32_t, (uint64_t)) | ||
| 380 | +DEF_HELPER_1_1(neon_narrow_round_high_u16, uint32_t, (uint64_t)) | ||
| 381 | +DEF_HELPER_1_1(neon_widen_u8, uint64_t, (uint32_t)) | ||
| 382 | +DEF_HELPER_1_1(neon_widen_s8, uint64_t, (uint32_t)) | ||
| 383 | +DEF_HELPER_1_1(neon_widen_u16, uint64_t, (uint32_t)) | ||
| 384 | +DEF_HELPER_1_1(neon_widen_s16, uint64_t, (uint32_t)) | ||
| 385 | + | ||
| 386 | +DEF_HELPER_1_2(neon_addl_u16, uint64_t, (uint64_t, uint64_t)) | ||
| 387 | +DEF_HELPER_1_2(neon_addl_u32, uint64_t, (uint64_t, uint64_t)) | ||
| 388 | +DEF_HELPER_1_2(neon_paddl_u16, uint64_t, (uint64_t, uint64_t)) | ||
| 389 | +DEF_HELPER_1_2(neon_paddl_u32, uint64_t, (uint64_t, uint64_t)) | ||
| 390 | +DEF_HELPER_1_2(neon_subl_u16, uint64_t, (uint64_t, uint64_t)) | ||
| 391 | +DEF_HELPER_1_2(neon_subl_u32, uint64_t, (uint64_t, uint64_t)) | ||
| 392 | +DEF_HELPER_1_3(neon_addl_saturate_s32, uint64_t, (CPUState *, uint64_t, uint64_t)) | ||
| 393 | +DEF_HELPER_1_3(neon_addl_saturate_s64, uint64_t, (CPUState *, uint64_t, uint64_t)) | ||
| 394 | +DEF_HELPER_1_2(neon_abdl_u16, uint64_t, (uint32_t, uint32_t)) | ||
| 395 | +DEF_HELPER_1_2(neon_abdl_s16, uint64_t, (uint32_t, uint32_t)) | ||
| 396 | +DEF_HELPER_1_2(neon_abdl_u32, uint64_t, (uint32_t, uint32_t)) | ||
| 397 | +DEF_HELPER_1_2(neon_abdl_s32, uint64_t, (uint32_t, uint32_t)) | ||
| 398 | +DEF_HELPER_1_2(neon_abdl_u64, uint64_t, (uint32_t, uint32_t)) | ||
| 399 | +DEF_HELPER_1_2(neon_abdl_s64, uint64_t, (uint32_t, uint32_t)) | ||
| 400 | +DEF_HELPER_1_2(neon_mull_u8, uint64_t, (uint32_t, uint32_t)) | ||
| 401 | +DEF_HELPER_1_2(neon_mull_s8, uint64_t, (uint32_t, uint32_t)) | ||
| 402 | +DEF_HELPER_1_2(neon_mull_u16, uint64_t, (uint32_t, uint32_t)) | ||
| 403 | +DEF_HELPER_1_2(neon_mull_s16, uint64_t, (uint32_t, uint32_t)) | ||
| 404 | + | ||
| 405 | +DEF_HELPER_1_1(neon_negl_u16, uint64_t, (uint64_t)) | ||
| 406 | +DEF_HELPER_1_1(neon_negl_u32, uint64_t, (uint64_t)) | ||
| 407 | +DEF_HELPER_1_1(neon_negl_u64, uint64_t, (uint64_t)) | ||
| 408 | + | ||
| 409 | +DEF_HELPER_1_2(neon_qabs_s8, uint32_t, (CPUState *, uint32_t)) | ||
| 410 | +DEF_HELPER_1_2(neon_qabs_s16, uint32_t, (CPUState *, uint32_t)) | ||
| 411 | +DEF_HELPER_1_2(neon_qabs_s32, uint32_t, (CPUState *, uint32_t)) | ||
| 412 | +DEF_HELPER_1_2(neon_qneg_s8, uint32_t, (CPUState *, uint32_t)) | ||
| 413 | +DEF_HELPER_1_2(neon_qneg_s16, uint32_t, (CPUState *, uint32_t)) | ||
| 414 | +DEF_HELPER_1_2(neon_qneg_s32, uint32_t, (CPUState *, uint32_t)) | ||
| 415 | + | ||
| 416 | +DEF_HELPER_0_0(neon_trn_u8, void, (void)) | ||
| 417 | +DEF_HELPER_0_0(neon_trn_u16, void, (void)) | ||
| 418 | +DEF_HELPER_0_0(neon_unzip_u8, void, (void)) | ||
| 419 | +DEF_HELPER_0_0(neon_zip_u8, void, (void)) | ||
| 420 | +DEF_HELPER_0_0(neon_zip_u16, void, (void)) | ||
| 421 | + | ||
| 422 | +DEF_HELPER_1_2(neon_min_f32, uint32_t, (uint32_t, uint32_t)) | ||
| 423 | +DEF_HELPER_1_2(neon_max_f32, uint32_t, (uint32_t, uint32_t)) | ||
| 424 | +DEF_HELPER_1_2(neon_abd_f32, uint32_t, (uint32_t, uint32_t)) | ||
| 425 | +DEF_HELPER_1_2(neon_add_f32, uint32_t, (uint32_t, uint32_t)) | ||
| 426 | +DEF_HELPER_1_2(neon_sub_f32, uint32_t, (uint32_t, uint32_t)) | ||
| 427 | +DEF_HELPER_1_2(neon_mul_f32, uint32_t, (uint32_t, uint32_t)) | ||
| 428 | +DEF_HELPER_1_2(neon_ceq_f32, uint32_t, (uint32_t, uint32_t)) | ||
| 429 | +DEF_HELPER_1_2(neon_cge_f32, uint32_t, (uint32_t, uint32_t)) | ||
| 430 | +DEF_HELPER_1_2(neon_cgt_f32, uint32_t, (uint32_t, uint32_t)) | ||
| 431 | +DEF_HELPER_1_2(neon_acge_f32, uint32_t, (uint32_t, uint32_t)) | ||
| 432 | +DEF_HELPER_1_2(neon_acgt_f32, uint32_t, (uint32_t, uint32_t)) | ||
| 433 | + | ||
| 226 | #undef DEF_HELPER | 434 | #undef DEF_HELPER |
| 227 | #undef DEF_HELPER_0_0 | 435 | #undef DEF_HELPER_0_0 |
| 228 | #undef DEF_HELPER_0_1 | 436 | #undef DEF_HELPER_0_1 |
target-arm/neon_helper.c
0 โ 100644
| 1 | +#include <stdlib.h> | ||
| 2 | +#include <stdio.h> | ||
| 3 | + | ||
| 4 | +#include "cpu.h" | ||
| 5 | +#include "exec-all.h" | ||
| 6 | +#include "helpers.h" | ||
| 7 | + | ||
| 8 | +#define SIGNBIT (uint32_t)0x80000000 | ||
| 9 | +#define SIGNBIT64 ((uint64_t)1 << 63) | ||
| 10 | + | ||
| 11 | +#define SET_QC() env->vfp.xregs[ARM_VFP_FPSCR] = CPSR_Q | ||
| 12 | + | ||
| 13 | +static float_status neon_float_status; | ||
| 14 | +#define NFS &neon_float_status | ||
| 15 | + | ||
| 16 | +/* Helper routines to perform bitwise copies between float and int. */ | ||
| 17 | +static inline float32 vfp_itos(uint32_t i) | ||
| 18 | +{ | ||
| 19 | + union { | ||
| 20 | + uint32_t i; | ||
| 21 | + float32 s; | ||
| 22 | + } v; | ||
| 23 | + | ||
| 24 | + v.i = i; | ||
| 25 | + return v.s; | ||
| 26 | +} | ||
| 27 | + | ||
| 28 | +static inline uint32_t vfp_stoi(float32 s) | ||
| 29 | +{ | ||
| 30 | + union { | ||
| 31 | + uint32_t i; | ||
| 32 | + float32 s; | ||
| 33 | + } v; | ||
| 34 | + | ||
| 35 | + v.s = s; | ||
| 36 | + return v.i; | ||
| 37 | +} | ||
| 38 | + | ||
| 39 | +#define NEON_TYPE1(name, type) \ | ||
| 40 | +typedef struct \ | ||
| 41 | +{ \ | ||
| 42 | + type v1; \ | ||
| 43 | +} neon_##name; | ||
| 44 | +#ifdef WORDS_BIGENDIAN | ||
| 45 | +#define NEON_TYPE2(name, type) \ | ||
| 46 | +typedef struct \ | ||
| 47 | +{ \ | ||
| 48 | + type v2; \ | ||
| 49 | + type v1; \ | ||
| 50 | +} neon_##name; | ||
| 51 | +#define NEON_TYPE4(name, type) \ | ||
| 52 | +typedef struct \ | ||
| 53 | +{ \ | ||
| 54 | + type v4; \ | ||
| 55 | + type v3; \ | ||
| 56 | + type v2; \ | ||
| 57 | + type v1; \ | ||
| 58 | +} neon_##name; | ||
| 59 | +#else | ||
| 60 | +#define NEON_TYPE2(name, type) \ | ||
| 61 | +typedef struct \ | ||
| 62 | +{ \ | ||
| 63 | + type v1; \ | ||
| 64 | + type v2; \ | ||
| 65 | +} neon_##name; | ||
| 66 | +#define NEON_TYPE4(name, type) \ | ||
| 67 | +typedef struct \ | ||
| 68 | +{ \ | ||
| 69 | + type v1; \ | ||
| 70 | + type v2; \ | ||
| 71 | + type v3; \ | ||
| 72 | + type v4; \ | ||
| 73 | +} neon_##name; | ||
| 74 | +#endif | ||
| 75 | + | ||
| 76 | +NEON_TYPE4(s8, int8_t) | ||
| 77 | +NEON_TYPE4(u8, uint8_t) | ||
| 78 | +NEON_TYPE2(s16, int16_t) | ||
| 79 | +NEON_TYPE2(u16, uint16_t) | ||
| 80 | +NEON_TYPE1(s32, int32_t) | ||
| 81 | +NEON_TYPE1(u32, uint32_t) | ||
| 82 | +#undef NEON_TYPE4 | ||
| 83 | +#undef NEON_TYPE2 | ||
| 84 | +#undef NEON_TYPE1 | ||
| 85 | + | ||
| 86 | +/* Copy from a uint32_t to a vector structure type. */ | ||
| 87 | +#define NEON_UNPACK(vtype, dest, val) do { \ | ||
| 88 | + union { \ | ||
| 89 | + vtype v; \ | ||
| 90 | + uint32_t i; \ | ||
| 91 | + } conv_u; \ | ||
| 92 | + conv_u.i = (val); \ | ||
| 93 | + dest = conv_u.v; \ | ||
| 94 | + } while(0) | ||
| 95 | + | ||
| 96 | +/* Copy from a vector structure type to a uint32_t. */ | ||
| 97 | +#define NEON_PACK(vtype, dest, val) do { \ | ||
| 98 | + union { \ | ||
| 99 | + vtype v; \ | ||
| 100 | + uint32_t i; \ | ||
| 101 | + } conv_u; \ | ||
| 102 | + conv_u.v = (val); \ | ||
| 103 | + dest = conv_u.i; \ | ||
| 104 | + } while(0) | ||
| 105 | + | ||
| 106 | +#define NEON_DO1 \ | ||
| 107 | + NEON_FN(vdest.v1, vsrc1.v1, vsrc2.v1); | ||
| 108 | +#define NEON_DO2 \ | ||
| 109 | + NEON_FN(vdest.v1, vsrc1.v1, vsrc2.v1); \ | ||
| 110 | + NEON_FN(vdest.v2, vsrc1.v2, vsrc2.v2); | ||
| 111 | +#define NEON_DO4 \ | ||
| 112 | + NEON_FN(vdest.v1, vsrc1.v1, vsrc2.v1); \ | ||
| 113 | + NEON_FN(vdest.v2, vsrc1.v2, vsrc2.v2); \ | ||
| 114 | + NEON_FN(vdest.v3, vsrc1.v3, vsrc2.v3); \ | ||
| 115 | + NEON_FN(vdest.v4, vsrc1.v4, vsrc2.v4); | ||
| 116 | + | ||
| 117 | +#define NEON_VOP_BODY(vtype, n) \ | ||
| 118 | +{ \ | ||
| 119 | + uint32_t res; \ | ||
| 120 | + vtype vsrc1; \ | ||
| 121 | + vtype vsrc2; \ | ||
| 122 | + vtype vdest; \ | ||
| 123 | + NEON_UNPACK(vtype, vsrc1, arg1); \ | ||
| 124 | + NEON_UNPACK(vtype, vsrc2, arg2); \ | ||
| 125 | + NEON_DO##n; \ | ||
| 126 | + NEON_PACK(vtype, res, vdest); \ | ||
| 127 | + return res; \ | ||
| 128 | +} | ||
| 129 | + | ||
| 130 | +#define NEON_VOP(name, vtype, n) \ | ||
| 131 | +uint32_t HELPER(glue(neon_,name))(uint32_t arg1, uint32_t arg2) \ | ||
| 132 | +NEON_VOP_BODY(vtype, n) | ||
| 133 | + | ||
| 134 | +#define NEON_VOP_ENV(name, vtype, n) \ | ||
| 135 | +uint32_t HELPER(glue(neon_,name))(CPUState *env, uint32_t arg1, uint32_t arg2) \ | ||
| 136 | +NEON_VOP_BODY(vtype, n) | ||
| 137 | + | ||
| 138 | +/* Pairwise operations. */ | ||
| 139 | +/* For 32-bit elements each segment only contains a single element, so | ||
| 140 | + the elementwise and pairwise operations are the same. */ | ||
| 141 | +#define NEON_PDO2 \ | ||
| 142 | + NEON_FN(vdest.v1, vsrc1.v1, vsrc1.v2); \ | ||
| 143 | + NEON_FN(vdest.v2, vsrc2.v1, vsrc2.v2); | ||
| 144 | +#define NEON_PDO4 \ | ||
| 145 | + NEON_FN(vdest.v1, vsrc1.v1, vsrc1.v2); \ | ||
| 146 | + NEON_FN(vdest.v2, vsrc1.v3, vsrc1.v4); \ | ||
| 147 | + NEON_FN(vdest.v3, vsrc2.v1, vsrc2.v2); \ | ||
| 148 | + NEON_FN(vdest.v4, vsrc2.v3, vsrc2.v4); \ | ||
| 149 | + | ||
| 150 | +#define NEON_POP(name, vtype, n) \ | ||
| 151 | +uint32_t HELPER(glue(neon_,name))(uint32_t arg1, uint32_t arg2) \ | ||
| 152 | +{ \ | ||
| 153 | + uint32_t res; \ | ||
| 154 | + vtype vsrc1; \ | ||
| 155 | + vtype vsrc2; \ | ||
| 156 | + vtype vdest; \ | ||
| 157 | + NEON_UNPACK(vtype, vsrc1, arg1); \ | ||
| 158 | + NEON_UNPACK(vtype, vsrc2, arg2); \ | ||
| 159 | + NEON_PDO##n; \ | ||
| 160 | + NEON_PACK(vtype, res, vdest); \ | ||
| 161 | + return res; \ | ||
| 162 | +} | ||
| 163 | + | ||
| 164 | +/* Unary operators. */ | ||
| 165 | +#define NEON_VOP1(name, vtype, n) \ | ||
| 166 | +uint32_t HELPER(glue(neon_,name))(uint32_t arg) \ | ||
| 167 | +{ \ | ||
| 168 | + vtype vsrc1; \ | ||
| 169 | + vtype vdest; \ | ||
| 170 | + NEON_UNPACK(vtype, vsrc1, arg); \ | ||
| 171 | + NEON_DO##n; \ | ||
| 172 | + NEON_PACK(vtype, arg, vdest); \ | ||
| 173 | + return arg; \ | ||
| 174 | +} | ||
| 175 | + | ||
| 176 | + | ||
| 177 | +#define NEON_USAT(dest, src1, src2, type) do { \ | ||
| 178 | + uint32_t tmp = (uint32_t)src1 + (uint32_t)src2; \ | ||
| 179 | + if (tmp != (type)tmp) { \ | ||
| 180 | + SET_QC(); \ | ||
| 181 | + dest = ~0; \ | ||
| 182 | + } else { \ | ||
| 183 | + dest = tmp; \ | ||
| 184 | + }} while(0) | ||
| 185 | +#define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint8_t) | ||
| 186 | +NEON_VOP_ENV(qadd_u8, neon_u8, 4) | ||
| 187 | +#undef NEON_FN | ||
| 188 | +#define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint16_t) | ||
| 189 | +NEON_VOP_ENV(qadd_u16, neon_u16, 2) | ||
| 190 | +#undef NEON_FN | ||
| 191 | +#undef NEON_USAT | ||
| 192 | + | ||
| 193 | +#define NEON_SSAT(dest, src1, src2, type) do { \ | ||
| 194 | + int32_t tmp = (uint32_t)src1 + (uint32_t)src2; \ | ||
| 195 | + if (tmp != (type)tmp) { \ | ||
| 196 | + SET_QC(); \ | ||
| 197 | + if (src2 > 0) { \ | ||
| 198 | + tmp = (1 << (sizeof(type) * 8 - 1)) - 1; \ | ||
| 199 | + } else { \ | ||
| 200 | + tmp = 1 << (sizeof(type) * 8 - 1); \ | ||
| 201 | + } \ | ||
| 202 | + } \ | ||
| 203 | + dest = tmp; \ | ||
| 204 | + } while(0) | ||
| 205 | +#define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int8_t) | ||
| 206 | +NEON_VOP_ENV(qadd_s8, neon_s8, 4) | ||
| 207 | +#undef NEON_FN | ||
| 208 | +#define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int16_t) | ||
| 209 | +NEON_VOP_ENV(qadd_s16, neon_s16, 2) | ||
| 210 | +#undef NEON_FN | ||
| 211 | +#undef NEON_SSAT | ||
| 212 | + | ||
| 213 | +#define NEON_USAT(dest, src1, src2, type) do { \ | ||
| 214 | + uint32_t tmp = (uint32_t)src1 - (uint32_t)src2; \ | ||
| 215 | + if (tmp != (type)tmp) { \ | ||
| 216 | + SET_QC(); \ | ||
| 217 | + dest = 0; \ | ||
| 218 | + } else { \ | ||
| 219 | + dest = tmp; \ | ||
| 220 | + }} while(0) | ||
| 221 | +#define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint8_t) | ||
| 222 | +NEON_VOP_ENV(qsub_u8, neon_u8, 4) | ||
| 223 | +#undef NEON_FN | ||
| 224 | +#define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint16_t) | ||
| 225 | +NEON_VOP_ENV(qsub_u16, neon_u16, 2) | ||
| 226 | +#undef NEON_FN | ||
| 227 | +#undef NEON_USAT | ||
| 228 | + | ||
| 229 | +#define NEON_SSAT(dest, src1, src2, type) do { \ | ||
| 230 | + int32_t tmp = (uint32_t)src1 - (uint32_t)src2; \ | ||
| 231 | + if (tmp != (type)tmp) { \ | ||
| 232 | + SET_QC(); \ | ||
| 233 | + if (src2 < 0) { \ | ||
| 234 | + tmp = (1 << (sizeof(type) * 8 - 1)) - 1; \ | ||
| 235 | + } else { \ | ||
| 236 | + tmp = 1 << (sizeof(type) * 8 - 1); \ | ||
| 237 | + } \ | ||
| 238 | + } \ | ||
| 239 | + dest = tmp; \ | ||
| 240 | + } while(0) | ||
| 241 | +#define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int8_t) | ||
| 242 | +NEON_VOP_ENV(qsub_s8, neon_s8, 4) | ||
| 243 | +#undef NEON_FN | ||
| 244 | +#define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int16_t) | ||
| 245 | +NEON_VOP_ENV(qsub_s16, neon_s16, 2) | ||
| 246 | +#undef NEON_FN | ||
| 247 | +#undef NEON_SSAT | ||
| 248 | + | ||
| 249 | +#define NEON_FN(dest, src1, src2) dest = (src1 + src2) >> 1 | ||
| 250 | +NEON_VOP(hadd_s8, neon_s8, 4) | ||
| 251 | +NEON_VOP(hadd_u8, neon_u8, 4) | ||
| 252 | +NEON_VOP(hadd_s16, neon_s16, 2) | ||
| 253 | +NEON_VOP(hadd_u16, neon_u16, 2) | ||
| 254 | +#undef NEON_FN | ||
| 255 | + | ||
| 256 | +int32_t HELPER(neon_hadd_s32)(int32_t src1, int32_t src2) | ||
| 257 | +{ | ||
| 258 | + int32_t dest; | ||
| 259 | + | ||
| 260 | + dest = (src1 >> 1) + (src2 >> 1); | ||
| 261 | + if (src1 & src2 & 1) | ||
| 262 | + dest++; | ||
| 263 | + return dest; | ||
| 264 | +} | ||
| 265 | + | ||
| 266 | +uint32_t HELPER(neon_hadd_u32)(uint32_t src1, uint32_t src2) | ||
| 267 | +{ | ||
| 268 | + uint32_t dest; | ||
| 269 | + | ||
| 270 | + dest = (src1 >> 1) + (src2 >> 1); | ||
| 271 | + if (src1 & src2 & 1) | ||
| 272 | + dest++; | ||
| 273 | + return dest; | ||
| 274 | +} | ||
| 275 | + | ||
| 276 | +#define NEON_FN(dest, src1, src2) dest = (src1 + src2 + 1) >> 1 | ||
| 277 | +NEON_VOP(rhadd_s8, neon_s8, 4) | ||
| 278 | +NEON_VOP(rhadd_u8, neon_u8, 4) | ||
| 279 | +NEON_VOP(rhadd_s16, neon_s16, 2) | ||
| 280 | +NEON_VOP(rhadd_u16, neon_u16, 2) | ||
| 281 | +#undef NEON_FN | ||
| 282 | + | ||
| 283 | +int32_t HELPER(neon_rhadd_s32)(int32_t src1, int32_t src2) | ||
| 284 | +{ | ||
| 285 | + int32_t dest; | ||
| 286 | + | ||
| 287 | + dest = (src1 >> 1) + (src2 >> 1); | ||
| 288 | + if ((src1 | src2) & 1) | ||
| 289 | + dest++; | ||
| 290 | + return dest; | ||
| 291 | +} | ||
| 292 | + | ||
| 293 | +uint32_t HELPER(neon_rhadd_u32)(uint32_t src1, uint32_t src2) | ||
| 294 | +{ | ||
| 295 | + uint32_t dest; | ||
| 296 | + | ||
| 297 | + dest = (src1 >> 1) + (src2 >> 1); | ||
| 298 | + if ((src1 | src2) & 1) | ||
| 299 | + dest++; | ||
| 300 | + return dest; | ||
| 301 | +} | ||
| 302 | + | ||
| 303 | +#define NEON_FN(dest, src1, src2) dest = (src1 - src2) >> 1 | ||
| 304 | +NEON_VOP(hsub_s8, neon_s8, 4) | ||
| 305 | +NEON_VOP(hsub_u8, neon_u8, 4) | ||
| 306 | +NEON_VOP(hsub_s16, neon_s16, 2) | ||
| 307 | +NEON_VOP(hsub_u16, neon_u16, 2) | ||
| 308 | +#undef NEON_FN | ||
| 309 | + | ||
| 310 | +int32_t HELPER(neon_hsub_s32)(int32_t src1, int32_t src2) | ||
| 311 | +{ | ||
| 312 | + int32_t dest; | ||
| 313 | + | ||
| 314 | + dest = (src1 >> 1) - (src2 >> 1); | ||
| 315 | + if ((~src1) & src2 & 1) | ||
| 316 | + dest--; | ||
| 317 | + return dest; | ||
| 318 | +} | ||
| 319 | + | ||
| 320 | +uint32_t HELPER(neon_hsub_u32)(uint32_t src1, uint32_t src2) | ||
| 321 | +{ | ||
| 322 | + uint32_t dest; | ||
| 323 | + | ||
| 324 | + dest = (src1 >> 1) - (src2 >> 1); | ||
| 325 | + if ((~src1) & src2 & 1) | ||
| 326 | + dest--; | ||
| 327 | + return dest; | ||
| 328 | +} | ||
| 329 | + | ||
| 330 | +#define NEON_FN(dest, src1, src2) dest = (src1 > src2) ? ~0 : 0 | ||
| 331 | +NEON_VOP(cgt_s8, neon_s8, 4) | ||
| 332 | +NEON_VOP(cgt_u8, neon_u8, 4) | ||
| 333 | +NEON_VOP(cgt_s16, neon_s16, 2) | ||
| 334 | +NEON_VOP(cgt_u16, neon_u16, 2) | ||
| 335 | +NEON_VOP(cgt_s32, neon_s32, 1) | ||
| 336 | +NEON_VOP(cgt_u32, neon_u32, 1) | ||
| 337 | +#undef NEON_FN | ||
| 338 | + | ||
| 339 | +#define NEON_FN(dest, src1, src2) dest = (src1 >= src2) ? ~0 : 0 | ||
| 340 | +NEON_VOP(cge_s8, neon_s8, 4) | ||
| 341 | +NEON_VOP(cge_u8, neon_u8, 4) | ||
| 342 | +NEON_VOP(cge_s16, neon_s16, 2) | ||
| 343 | +NEON_VOP(cge_u16, neon_u16, 2) | ||
| 344 | +NEON_VOP(cge_s32, neon_s32, 1) | ||
| 345 | +NEON_VOP(cge_u32, neon_u32, 1) | ||
| 346 | +#undef NEON_FN | ||
| 347 | + | ||
| 348 | +#define NEON_FN(dest, src1, src2) dest = (src1 < src2) ? src1 : src2 | ||
| 349 | +NEON_VOP(min_s8, neon_s8, 4) | ||
| 350 | +NEON_VOP(min_u8, neon_u8, 4) | ||
| 351 | +NEON_VOP(min_s16, neon_s16, 2) | ||
| 352 | +NEON_VOP(min_u16, neon_u16, 2) | ||
| 353 | +NEON_VOP(min_s32, neon_s32, 1) | ||
| 354 | +NEON_VOP(min_u32, neon_u32, 1) | ||
| 355 | +NEON_POP(pmin_s8, neon_s8, 4) | ||
| 356 | +NEON_POP(pmin_u8, neon_u8, 4) | ||
| 357 | +NEON_POP(pmin_s16, neon_s16, 2) | ||
| 358 | +NEON_POP(pmin_u16, neon_u16, 2) | ||
| 359 | +#undef NEON_FN | ||
| 360 | + | ||
| 361 | +#define NEON_FN(dest, src1, src2) dest = (src1 > src2) ? src1 : src2 | ||
| 362 | +NEON_VOP(max_s8, neon_s8, 4) | ||
| 363 | +NEON_VOP(max_u8, neon_u8, 4) | ||
| 364 | +NEON_VOP(max_s16, neon_s16, 2) | ||
| 365 | +NEON_VOP(max_u16, neon_u16, 2) | ||
| 366 | +NEON_VOP(max_s32, neon_s32, 1) | ||
| 367 | +NEON_VOP(max_u32, neon_u32, 1) | ||
| 368 | +NEON_POP(pmax_s8, neon_s8, 4) | ||
| 369 | +NEON_POP(pmax_u8, neon_u8, 4) | ||
| 370 | +NEON_POP(pmax_s16, neon_s16, 2) | ||
| 371 | +NEON_POP(pmax_u16, neon_u16, 2) | ||
| 372 | +#undef NEON_FN | ||
| 373 | + | ||
| 374 | +#define NEON_FN(dest, src1, src2) \ | ||
| 375 | + dest = (src1 > src2) ? (src1 - src2) : (src2 - src1) | ||
| 376 | +NEON_VOP(abd_s8, neon_s8, 4) | ||
| 377 | +NEON_VOP(abd_u8, neon_u8, 4) | ||
| 378 | +NEON_VOP(abd_s16, neon_s16, 2) | ||
| 379 | +NEON_VOP(abd_u16, neon_u16, 2) | ||
| 380 | +NEON_VOP(abd_s32, neon_s32, 1) | ||
| 381 | +NEON_VOP(abd_u32, neon_u32, 1) | ||
| 382 | +#undef NEON_FN | ||
| 383 | + | ||
| 384 | +#define NEON_FN(dest, src1, src2) do { \ | ||
| 385 | + int8_t tmp; \ | ||
| 386 | + tmp = (int8_t)src2; \ | ||
| 387 | + if (tmp >= sizeof(src1) * 8 || tmp <= -sizeof(src1) * 8) { \ | ||
| 388 | + dest = 0; \ | ||
| 389 | + } else if (tmp < 0) { \ | ||
| 390 | + dest = src1 >> -tmp; \ | ||
| 391 | + } else { \ | ||
| 392 | + dest = src1 << tmp; \ | ||
| 393 | + }} while (0) | ||
| 394 | +NEON_VOP(shl_u8, neon_u8, 4) | ||
| 395 | +NEON_VOP(shl_u16, neon_u16, 2) | ||
| 396 | +NEON_VOP(shl_u32, neon_u32, 1) | ||
| 397 | +#undef NEON_FN | ||
| 398 | + | ||
| 399 | +uint64_t HELPER(neon_shl_u64)(uint64_t val, uint64_t shiftop) | ||
| 400 | +{ | ||
| 401 | + int8_t shift = (int8_t)shiftop; | ||
| 402 | + if (shift >= 64 || shift <= -64) { | ||
| 403 | + val = 0; | ||
| 404 | + } else if (shift < 0) { | ||
| 405 | + val >>= -shift; | ||
| 406 | + } else { | ||
| 407 | + val <<= shift; | ||
| 408 | + } | ||
| 409 | + return val; | ||
| 410 | +} | ||
| 411 | + | ||
| 412 | +#define NEON_FN(dest, src1, src2) do { \ | ||
| 413 | + int8_t tmp; \ | ||
| 414 | + tmp = (int8_t)src2; \ | ||
| 415 | + if (tmp >= sizeof(src1) * 8) { \ | ||
| 416 | + dest = 0; \ | ||
| 417 | + } else if (tmp <= -sizeof(src1) * 8) { \ | ||
| 418 | + dest = src1 >> (sizeof(src1) * 8 - 1); \ | ||
| 419 | + } else if (tmp < 0) { \ | ||
| 420 | + dest = src1 >> -tmp; \ | ||
| 421 | + } else { \ | ||
| 422 | + dest = src1 << tmp; \ | ||
| 423 | + }} while (0) | ||
| 424 | +NEON_VOP(shl_s8, neon_s8, 4) | ||
| 425 | +NEON_VOP(shl_s16, neon_s16, 2) | ||
| 426 | +NEON_VOP(shl_s32, neon_s32, 1) | ||
| 427 | +#undef NEON_FN | ||
| 428 | + | ||
| 429 | +uint64_t HELPER(neon_shl_s64)(uint64_t valop, uint64_t shiftop) | ||
| 430 | +{ | ||
| 431 | + int8_t shift = (int8_t)shiftop; | ||
| 432 | + int64_t val = valop; | ||
| 433 | + if (shift >= 64) { | ||
| 434 | + val = 0; | ||
| 435 | + } else if (shift <= -64) { | ||
| 436 | + val >>= 63; | ||
| 437 | + } else if (shift < 0) { | ||
| 438 | + val >>= -shift; | ||
| 439 | + } else { | ||
| 440 | + val <<= shift; | ||
| 441 | + } | ||
| 442 | + return val; | ||
| 443 | +} | ||
| 444 | + | ||
| 445 | +#define NEON_FN(dest, src1, src2) do { \ | ||
| 446 | + int8_t tmp; \ | ||
| 447 | + tmp = (int8_t)src2; \ | ||
| 448 | + if (tmp >= sizeof(src1) * 8) { \ | ||
| 449 | + dest = 0; \ | ||
| 450 | + } else if (tmp < -sizeof(src1) * 8) { \ | ||
| 451 | + dest >>= sizeof(src1) * 8 - 1; \ | ||
| 452 | + } else if (tmp == -sizeof(src1) * 8) { \ | ||
| 453 | + dest = src1 >> (tmp - 1); \ | ||
| 454 | + dest++; \ | ||
| 455 | + src2 >>= 1; \ | ||
| 456 | + } else if (tmp < 0) { \ | ||
| 457 | + dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \ | ||
| 458 | + } else { \ | ||
| 459 | + dest = src1 << tmp; \ | ||
| 460 | + }} while (0) | ||
| 461 | +NEON_VOP(rshl_s8, neon_s8, 4) | ||
| 462 | +NEON_VOP(rshl_s16, neon_s16, 2) | ||
| 463 | +NEON_VOP(rshl_s32, neon_s32, 1) | ||
| 464 | +#undef NEON_FN | ||
| 465 | + | ||
| 466 | +uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop) | ||
| 467 | +{ | ||
| 468 | + int8_t shift = (int8_t)shiftop; | ||
| 469 | + int64_t val = valop; | ||
| 470 | + if (shift >= 64) { | ||
| 471 | + val = 0; | ||
| 472 | + } else if (shift < -64) { | ||
| 473 | + val >>= 63; | ||
| 474 | + } else if (shift == -63) { | ||
| 475 | + val >>= 63; | ||
| 476 | + val++; | ||
| 477 | + val >>= 1; | ||
| 478 | + } else if (shift < 0) { | ||
| 479 | + val = (val + ((int64_t)1 << (-1 - shift))) >> -shift; | ||
| 480 | + } else { | ||
| 481 | + val <<= shift; | ||
| 482 | + } | ||
| 483 | + return val; | ||
| 484 | +} | ||
| 485 | + | ||
| 486 | +#define NEON_FN(dest, src1, src2) do { \ | ||
| 487 | + int8_t tmp; \ | ||
| 488 | + tmp = (int8_t)src2; \ | ||
| 489 | + if (tmp >= sizeof(src1) * 8 || tmp < -sizeof(src1) * 8) { \ | ||
| 490 | + dest = 0; \ | ||
| 491 | + } else if (tmp == -sizeof(src1) * 8) { \ | ||
| 492 | + dest = src1 >> (tmp - 1); \ | ||
| 493 | + } else if (tmp < 0) { \ | ||
| 494 | + dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \ | ||
| 495 | + } else { \ | ||
| 496 | + dest = src1 << tmp; \ | ||
| 497 | + }} while (0) | ||
| 498 | +NEON_VOP(rshl_u8, neon_u8, 4) | ||
| 499 | +NEON_VOP(rshl_u16, neon_u16, 2) | ||
| 500 | +NEON_VOP(rshl_u32, neon_u32, 1) | ||
| 501 | +#undef NEON_FN | ||
| 502 | + | ||
| 503 | +uint64_t HELPER(neon_rshl_u64)(uint64_t val, uint64_t shiftop) | ||
| 504 | +{ | ||
| 505 | + int8_t shift = (uint8_t)shiftop; | ||
| 506 | + if (shift >= 64 || shift < 64) { | ||
| 507 | + val = 0; | ||
| 508 | + } else if (shift == -64) { | ||
| 509 | + /* Rounding a 1-bit result just preserves that bit. */ | ||
| 510 | + val >>= 63; | ||
| 511 | + } if (shift < 0) { | ||
| 512 | + val = (val + ((uint64_t)1 << (-1 - shift))) >> -shift; | ||
| 513 | + val >>= -shift; | ||
| 514 | + } else { | ||
| 515 | + val <<= shift; | ||
| 516 | + } | ||
| 517 | + return val; | ||
| 518 | +} | ||
| 519 | + | ||
| 520 | +#define NEON_FN(dest, src1, src2) do { \ | ||
| 521 | + int8_t tmp; \ | ||
| 522 | + tmp = (int8_t)src2; \ | ||
| 523 | + if (tmp >= sizeof(src1) * 8) { \ | ||
| 524 | + if (src1) { \ | ||
| 525 | + SET_QC(); \ | ||
| 526 | + dest = ~0; \ | ||
| 527 | + } else { \ | ||
| 528 | + dest = 0; \ | ||
| 529 | + } \ | ||
| 530 | + } else if (tmp <= -sizeof(src1) * 8) { \ | ||
| 531 | + dest = 0; \ | ||
| 532 | + } else if (tmp < 0) { \ | ||
| 533 | + dest = src1 >> -tmp; \ | ||
| 534 | + } else { \ | ||
| 535 | + dest = src1 << tmp; \ | ||
| 536 | + if ((dest >> tmp) != src1) { \ | ||
| 537 | + SET_QC(); \ | ||
| 538 | + dest = ~0; \ | ||
| 539 | + } \ | ||
| 540 | + }} while (0) | ||
| 541 | +NEON_VOP_ENV(qshl_u8, neon_u8, 4) | ||
| 542 | +NEON_VOP_ENV(qshl_u16, neon_u16, 2) | ||
| 543 | +NEON_VOP_ENV(qshl_u32, neon_u32, 1) | ||
| 544 | +#undef NEON_FN | ||
| 545 | + | ||
| 546 | +uint64_t HELPER(neon_qshl_u64)(CPUState *env, uint64_t val, uint64_t shiftop) | ||
| 547 | +{ | ||
| 548 | + int8_t shift = (int8_t)shiftop; | ||
| 549 | + if (shift >= 64) { | ||
| 550 | + if (val) { | ||
| 551 | + val = ~(uint64_t)0; | ||
| 552 | + SET_QC(); | ||
| 553 | + } else { | ||
| 554 | + val = 0; | ||
| 555 | + } | ||
| 556 | + } else if (shift <= -64) { | ||
| 557 | + val = 0; | ||
| 558 | + } else if (shift < 0) { | ||
| 559 | + val >>= -shift; | ||
| 560 | + } else { | ||
| 561 | + uint64_t tmp = val; | ||
| 562 | + val <<= shift; | ||
| 563 | + if ((val >> shift) != tmp) { | ||
| 564 | + SET_QC(); | ||
| 565 | + val = ~(uint64_t)0; | ||
| 566 | + } | ||
| 567 | + } | ||
| 568 | + return val; | ||
| 569 | +} | ||
| 570 | + | ||
| 571 | +#define NEON_FN(dest, src1, src2) do { \ | ||
| 572 | + int8_t tmp; \ | ||
| 573 | + tmp = (int8_t)src2; \ | ||
| 574 | + if (tmp >= sizeof(src1) * 8) { \ | ||
| 575 | + if (src1) \ | ||
| 576 | + SET_QC(); \ | ||
| 577 | + dest = src1 >> 31; \ | ||
| 578 | + } else if (tmp <= -sizeof(src1) * 8) { \ | ||
| 579 | + dest = src1 >> 31; \ | ||
| 580 | + } else if (tmp < 0) { \ | ||
| 581 | + dest = src1 >> -tmp; \ | ||
| 582 | + } else { \ | ||
| 583 | + dest = src1 << tmp; \ | ||
| 584 | + if ((dest >> tmp) != src1) { \ | ||
| 585 | + SET_QC(); \ | ||
| 586 | + dest = src2 >> 31; \ | ||
| 587 | + } \ | ||
| 588 | + }} while (0) | ||
| 589 | +NEON_VOP_ENV(qshl_s8, neon_s8, 4) | ||
| 590 | +NEON_VOP_ENV(qshl_s16, neon_s16, 2) | ||
| 591 | +NEON_VOP_ENV(qshl_s32, neon_s32, 1) | ||
| 592 | +#undef NEON_FN | ||
| 593 | + | ||
| 594 | +uint64_t HELPER(neon_qshl_s64)(CPUState *env, uint64_t valop, uint64_t shiftop) | ||
| 595 | +{ | ||
| 596 | + int8_t shift = (uint8_t)shiftop; | ||
| 597 | + int64_t val = valop; | ||
| 598 | + if (shift >= 64) { | ||
| 599 | + if (val) { | ||
| 600 | + SET_QC(); | ||
| 601 | + val = (val >> 63) & ~SIGNBIT64; | ||
| 602 | + } | ||
| 603 | + } else if (shift <= 64) { | ||
| 604 | + val >>= 63; | ||
| 605 | + } else if (shift < 0) { | ||
| 606 | + val >>= -shift; | ||
| 607 | + } else { | ||
| 608 | + int64_t tmp = val; | ||
| 609 | + val <<= shift; | ||
| 610 | + if ((val >> shift) != tmp) { | ||
| 611 | + SET_QC(); | ||
| 612 | + val = (tmp >> 63) ^ ~SIGNBIT64; | ||
| 613 | + } | ||
| 614 | + } | ||
| 615 | + return val; | ||
| 616 | +} | ||
| 617 | + | ||
| 618 | + | ||
| 619 | +/* FIXME: This is wrong. */ | ||
| 620 | +#define NEON_FN(dest, src1, src2) do { \ | ||
| 621 | + int8_t tmp; \ | ||
| 622 | + tmp = (int8_t)src2; \ | ||
| 623 | + if (tmp < 0) { \ | ||
| 624 | + dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \ | ||
| 625 | + } else { \ | ||
| 626 | + dest = src1 << tmp; \ | ||
| 627 | + if ((dest >> tmp) != src1) { \ | ||
| 628 | + SET_QC(); \ | ||
| 629 | + dest = ~0; \ | ||
| 630 | + } \ | ||
| 631 | + }} while (0) | ||
| 632 | +NEON_VOP_ENV(qrshl_u8, neon_u8, 4) | ||
| 633 | +NEON_VOP_ENV(qrshl_u16, neon_u16, 2) | ||
| 634 | +NEON_VOP_ENV(qrshl_u32, neon_u32, 1) | ||
| 635 | +#undef NEON_FN | ||
| 636 | + | ||
| 637 | +uint64_t HELPER(neon_qrshl_u64)(CPUState *env, uint64_t val, uint64_t shiftop) | ||
| 638 | +{ | ||
| 639 | + int8_t shift = (int8_t)shiftop; | ||
| 640 | + if (shift < 0) { | ||
| 641 | + val = (val + (1 << (-1 - shift))) >> -shift; | ||
| 642 | + } else { \ | ||
| 643 | + uint64_t tmp = val; | ||
| 644 | + val <<= shift; | ||
| 645 | + if ((val >> shift) != tmp) { | ||
| 646 | + SET_QC(); | ||
| 647 | + val = ~0; | ||
| 648 | + } | ||
| 649 | + } | ||
| 650 | + return val; | ||
| 651 | +} | ||
| 652 | + | ||
| 653 | +#define NEON_FN(dest, src1, src2) do { \ | ||
| 654 | + int8_t tmp; \ | ||
| 655 | + tmp = (int8_t)src2; \ | ||
| 656 | + if (tmp < 0) { \ | ||
| 657 | + dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \ | ||
| 658 | + } else { \ | ||
| 659 | + dest = src1 << tmp; \ | ||
| 660 | + if ((dest >> tmp) != src1) { \ | ||
| 661 | + SET_QC(); \ | ||
| 662 | + dest = src1 >> 31; \ | ||
| 663 | + } \ | ||
| 664 | + }} while (0) | ||
| 665 | +NEON_VOP_ENV(qrshl_s8, neon_s8, 4) | ||
| 666 | +NEON_VOP_ENV(qrshl_s16, neon_s16, 2) | ||
| 667 | +NEON_VOP_ENV(qrshl_s32, neon_s32, 1) | ||
| 668 | +#undef NEON_FN | ||
| 669 | + | ||
| 670 | +uint64_t HELPER(neon_qrshl_s64)(CPUState *env, uint64_t valop, uint64_t shiftop) | ||
| 671 | +{ | ||
| 672 | + int8_t shift = (uint8_t)shiftop; | ||
| 673 | + int64_t val = valop; | ||
| 674 | + | ||
| 675 | + if (shift < 0) { | ||
| 676 | + val = (val + (1 << (-1 - shift))) >> -shift; | ||
| 677 | + } else { | ||
| 678 | + int64_t tmp = val;; | ||
| 679 | + val <<= shift; | ||
| 680 | + if ((val >> shift) != tmp) { | ||
| 681 | + SET_QC(); | ||
| 682 | + val = tmp >> 31; | ||
| 683 | + } | ||
| 684 | + } | ||
| 685 | + return val; | ||
| 686 | +} | ||
| 687 | + | ||
| 688 | +uint32_t HELPER(neon_add_u8)(uint32_t a, uint32_t b) | ||
| 689 | +{ | ||
| 690 | + uint32_t mask; | ||
| 691 | + mask = (a ^ b) & 0x80808080u; | ||
| 692 | + a &= ~0x80808080u; | ||
| 693 | + b &= ~0x80808080u; | ||
| 694 | + return (a + b) ^ mask; | ||
| 695 | +} | ||
| 696 | + | ||
| 697 | +uint32_t HELPER(neon_add_u16)(uint32_t a, uint32_t b) | ||
| 698 | +{ | ||
| 699 | + uint32_t mask; | ||
| 700 | + mask = (a ^ b) & 0x80008000u; | ||
| 701 | + a &= ~0x80008000u; | ||
| 702 | + b &= ~0x80008000u; | ||
| 703 | + return (a + b) ^ mask; | ||
| 704 | +} | ||
| 705 | + | ||
| 706 | +#define NEON_FN(dest, src1, src2) dest = src1 + src2 | ||
| 707 | +NEON_POP(padd_u8, neon_u8, 4) | ||
| 708 | +NEON_POP(padd_u16, neon_u16, 2) | ||
| 709 | +#undef NEON_FN | ||
| 710 | + | ||
| 711 | +#define NEON_FN(dest, src1, src2) dest = src1 - src2 | ||
| 712 | +NEON_VOP(sub_u8, neon_u8, 4) | ||
| 713 | +NEON_VOP(sub_u16, neon_u16, 2) | ||
| 714 | +#undef NEON_FN | ||
| 715 | + | ||
| 716 | +#define NEON_FN(dest, src1, src2) dest = src1 * src2 | ||
| 717 | +NEON_VOP(mul_u8, neon_u8, 4) | ||
| 718 | +NEON_VOP(mul_u16, neon_u16, 2) | ||
| 719 | +#undef NEON_FN | ||
| 720 | + | ||
| 721 | +/* Polynomial multiplication is like integer multiplcation except the | ||
| 722 | + partial products are XORed, not added. */ | ||
| 723 | +uint32_t HELPER(neon_mul_p8)(uint32_t op1, uint32_t op2) | ||
| 724 | +{ | ||
| 725 | + uint32_t mask; | ||
| 726 | + uint32_t result; | ||
| 727 | + result = 0; | ||
| 728 | + while (op1) { | ||
| 729 | + mask = 0; | ||
| 730 | + if (op1 & 1) | ||
| 731 | + mask |= 0xff; | ||
| 732 | + if (op1 & (1 << 8)) | ||
| 733 | + mask |= (0xff << 8); | ||
| 734 | + if (op1 & (1 << 16)) | ||
| 735 | + mask |= (0xff << 16); | ||
| 736 | + if (op1 & (1 << 24)) | ||
| 737 | + mask |= (0xff << 24); | ||
| 738 | + result ^= op2 & mask; | ||
| 739 | + op1 = (op1 >> 1) & 0x7f7f7f7f; | ||
| 740 | + op2 = (op2 << 1) & 0xfefefefe; | ||
| 741 | + } | ||
| 742 | + return result; | ||
| 743 | +} | ||
| 744 | + | ||
| 745 | +#define NEON_FN(dest, src1, src2) dest = (src1 & src2) ? -1 : 0 | ||
| 746 | +NEON_VOP(tst_u8, neon_u8, 4) | ||
| 747 | +NEON_VOP(tst_u16, neon_u16, 2) | ||
| 748 | +NEON_VOP(tst_u32, neon_u32, 1) | ||
| 749 | +#undef NEON_FN | ||
| 750 | + | ||
| 751 | +#define NEON_FN(dest, src1, src2) dest = (src1 == src2) ? -1 : 0 | ||
| 752 | +NEON_VOP(ceq_u8, neon_u8, 4) | ||
| 753 | +NEON_VOP(ceq_u16, neon_u16, 2) | ||
| 754 | +NEON_VOP(ceq_u32, neon_u32, 1) | ||
| 755 | +#undef NEON_FN | ||
| 756 | + | ||
| 757 | +#define NEON_FN(dest, src, dummy) dest = (src < 0) ? -src : src | ||
| 758 | +NEON_VOP1(abs_s8, neon_s8, 4) | ||
| 759 | +NEON_VOP1(abs_s16, neon_s16, 2) | ||
| 760 | +#undef NEON_FN | ||
| 761 | + | ||
| 762 | +/* Count Leading Sign/Zero Bits. */ | ||
| 763 | +static inline int do_clz8(uint8_t x) | ||
| 764 | +{ | ||
| 765 | + int n; | ||
| 766 | + for (n = 8; x; n--) | ||
| 767 | + x >>= 1; | ||
| 768 | + return n; | ||
| 769 | +} | ||
| 770 | + | ||
| 771 | +static inline int do_clz16(uint16_t x) | ||
| 772 | +{ | ||
| 773 | + int n; | ||
| 774 | + for (n = 16; x; n--) | ||
| 775 | + x >>= 1; | ||
| 776 | + return n; | ||
| 777 | +} | ||
| 778 | + | ||
| 779 | +#define NEON_FN(dest, src, dummy) dest = do_clz8(src) | ||
| 780 | +NEON_VOP1(clz_u8, neon_u8, 4) | ||
| 781 | +#undef NEON_FN | ||
| 782 | + | ||
| 783 | +#define NEON_FN(dest, src, dummy) dest = do_clz16(src) | ||
| 784 | +NEON_VOP1(clz_u16, neon_u16, 2) | ||
| 785 | +#undef NEON_FN | ||
| 786 | + | ||
| 787 | +#define NEON_FN(dest, src, dummy) dest = do_clz8((src < 0) ? ~src : src) - 1 | ||
| 788 | +NEON_VOP1(cls_s8, neon_s8, 4) | ||
| 789 | +#undef NEON_FN | ||
| 790 | + | ||
| 791 | +#define NEON_FN(dest, src, dummy) dest = do_clz16((src < 0) ? ~src : src) - 1 | ||
| 792 | +NEON_VOP1(cls_s16, neon_s16, 2) | ||
| 793 | +#undef NEON_FN | ||
| 794 | + | ||
| 795 | +uint32_t HELPER(neon_cls_s32)(uint32_t x) | ||
| 796 | +{ | ||
| 797 | + int count; | ||
| 798 | + if ((int32_t)x < 0) | ||
| 799 | + x = ~x; | ||
| 800 | + for (count = 32; x; count--) | ||
| 801 | + x = x >> 1; | ||
| 802 | + return count - 1; | ||
| 803 | +} | ||
| 804 | + | ||
| 805 | +/* Bit count. */ | ||
| 806 | +uint32_t HELPER(neon_cnt_u8)(uint32_t x) | ||
| 807 | +{ | ||
| 808 | + x = (x & 0x55555555) + ((x >> 1) & 0x55555555); | ||
| 809 | + x = (x & 0x33333333) + ((x >> 2) & 0x33333333); | ||
| 810 | + x = (x & 0x0f0f0f0f) + ((x >> 4) & 0x0f0f0f0f); | ||
| 811 | + return x; | ||
| 812 | +} | ||
| 813 | + | ||
| 814 | +#define NEON_QDMULH16(dest, src1, src2, round) do { \ | ||
| 815 | + uint32_t tmp = (int32_t)(int16_t) src1 * (int16_t) src2; \ | ||
| 816 | + if ((tmp ^ (tmp << 1)) & SIGNBIT) { \ | ||
| 817 | + SET_QC(); \ | ||
| 818 | + tmp = (tmp >> 31) ^ ~SIGNBIT; \ | ||
| 819 | + } \ | ||
| 820 | + tmp <<= 1; \ | ||
| 821 | + if (round) { \ | ||
| 822 | + int32_t old = tmp; \ | ||
| 823 | + tmp += 1 << 15; \ | ||
| 824 | + if ((int32_t)tmp < old) { \ | ||
| 825 | + SET_QC(); \ | ||
| 826 | + tmp = SIGNBIT - 1; \ | ||
| 827 | + } \ | ||
| 828 | + } \ | ||
| 829 | + dest = tmp >> 16; \ | ||
| 830 | + } while(0) | ||
| 831 | +#define NEON_FN(dest, src1, src2) NEON_QDMULH16(dest, src1, src2, 0) | ||
| 832 | +NEON_VOP_ENV(qdmulh_s16, neon_s16, 2) | ||
| 833 | +#undef NEON_FN | ||
| 834 | +#define NEON_FN(dest, src1, src2) NEON_QDMULH16(dest, src1, src2, 1) | ||
| 835 | +NEON_VOP_ENV(qrdmulh_s16, neon_s16, 2) | ||
| 836 | +#undef NEON_FN | ||
| 837 | +#undef NEON_QDMULH16 | ||
| 838 | + | ||
| 839 | +#define NEON_QDMULH32(dest, src1, src2, round) do { \ | ||
| 840 | + uint64_t tmp = (int64_t)(int32_t) src1 * (int32_t) src2; \ | ||
| 841 | + if ((tmp ^ (tmp << 1)) & SIGNBIT64) { \ | ||
| 842 | + SET_QC(); \ | ||
| 843 | + tmp = (tmp >> 63) ^ ~SIGNBIT64; \ | ||
| 844 | + } else { \ | ||
| 845 | + tmp <<= 1; \ | ||
| 846 | + } \ | ||
| 847 | + if (round) { \ | ||
| 848 | + int64_t old = tmp; \ | ||
| 849 | + tmp += (int64_t)1 << 31; \ | ||
| 850 | + if ((int64_t)tmp < old) { \ | ||
| 851 | + SET_QC(); \ | ||
| 852 | + tmp = SIGNBIT64 - 1; \ | ||
| 853 | + } \ | ||
| 854 | + } \ | ||
| 855 | + dest = tmp >> 32; \ | ||
| 856 | + } while(0) | ||
| 857 | +#define NEON_FN(dest, src1, src2) NEON_QDMULH32(dest, src1, src2, 0) | ||
| 858 | +NEON_VOP_ENV(qdmulh_s32, neon_s32, 1) | ||
| 859 | +#undef NEON_FN | ||
| 860 | +#define NEON_FN(dest, src1, src2) NEON_QDMULH32(dest, src1, src2, 1) | ||
| 861 | +NEON_VOP_ENV(qrdmulh_s32, neon_s32, 1) | ||
| 862 | +#undef NEON_FN | ||
| 863 | +#undef NEON_QDMULH32 | ||
| 864 | + | ||
| 865 | +uint32_t HELPER(neon_narrow_u8)(uint64_t x) | ||
| 866 | +{ | ||
| 867 | + return (x & 0xffu) | ((x >> 8) & 0xff00u) | ((x >> 16) & 0xff0000u) | ||
| 868 | + | ((x >> 24) & 0xff000000u); | ||
| 869 | +} | ||
| 870 | + | ||
| 871 | +uint32_t HELPER(neon_narrow_u16)(uint64_t x) | ||
| 872 | +{ | ||
| 873 | + return (x & 0xffffu) | ((x >> 16) & 0xffff0000u); | ||
| 874 | +} | ||
| 875 | + | ||
| 876 | +uint32_t HELPER(neon_narrow_high_u8)(uint64_t x) | ||
| 877 | +{ | ||
| 878 | + return ((x >> 8) & 0xff) | ((x >> 16) & 0xff00) | ||
| 879 | + | ((x >> 24) & 0xff0000) | ((x >> 32) & 0xff000000); | ||
| 880 | +} | ||
| 881 | + | ||
| 882 | +uint32_t HELPER(neon_narrow_high_u16)(uint64_t x) | ||
| 883 | +{ | ||
| 884 | + return ((x >> 16) & 0xffff) | ((x >> 32) & 0xffff0000); | ||
| 885 | +} | ||
| 886 | + | ||
| 887 | +uint32_t HELPER(neon_narrow_round_high_u8)(uint64_t x) | ||
| 888 | +{ | ||
| 889 | + x &= 0xff80ff80ff80ff80ull; | ||
| 890 | + x += 0x0080008000800080ull; | ||
| 891 | + return ((x >> 8) & 0xff) | ((x >> 16) & 0xff00) | ||
| 892 | + | ((x >> 24) & 0xff0000) | ((x >> 32) & 0xff000000); | ||
| 893 | +} | ||
| 894 | + | ||
| 895 | +uint32_t HELPER(neon_narrow_round_high_u16)(uint64_t x) | ||
| 896 | +{ | ||
| 897 | + x &= 0xffff8000ffff8000ull; | ||
| 898 | + x += 0x0000800000008000ull; | ||
| 899 | + return ((x >> 16) & 0xffff) | ((x >> 32) & 0xffff0000); | ||
| 900 | +} | ||
| 901 | + | ||
| 902 | +uint32_t HELPER(neon_narrow_sat_u8)(CPUState *env, uint64_t x) | ||
| 903 | +{ | ||
| 904 | + uint16_t s; | ||
| 905 | + uint8_t d; | ||
| 906 | + uint32_t res = 0; | ||
| 907 | +#define SAT8(n) \ | ||
| 908 | + s = x >> n; \ | ||
| 909 | + if (s > 0xff) { \ | ||
| 910 | + d = 0xff; \ | ||
| 911 | + SET_QC(); \ | ||
| 912 | + } else { \ | ||
| 913 | + d = s; \ | ||
| 914 | + } \ | ||
| 915 | + res |= (uint32_t)d << (n / 2); | ||
| 916 | + | ||
| 917 | + SAT8(0); | ||
| 918 | + SAT8(16); | ||
| 919 | + SAT8(32); | ||
| 920 | + SAT8(48); | ||
| 921 | +#undef SAT8 | ||
| 922 | + return res; | ||
| 923 | +} | ||
| 924 | + | ||
| 925 | +uint32_t HELPER(neon_narrow_sat_s8)(CPUState *env, uint64_t x) | ||
| 926 | +{ | ||
| 927 | + int16_t s; | ||
| 928 | + uint8_t d; | ||
| 929 | + uint32_t res = 0; | ||
| 930 | +#define SAT8(n) \ | ||
| 931 | + s = x >> n; \ | ||
| 932 | + if (s != (int8_t)s) { \ | ||
| 933 | + d = (s >> 15) ^ 0x7f; \ | ||
| 934 | + SET_QC(); \ | ||
| 935 | + } else { \ | ||
| 936 | + d = s; \ | ||
| 937 | + } \ | ||
| 938 | + res |= (uint32_t)d << (n / 2); | ||
| 939 | + | ||
| 940 | + SAT8(0); | ||
| 941 | + SAT8(16); | ||
| 942 | + SAT8(32); | ||
| 943 | + SAT8(48); | ||
| 944 | +#undef SAT8 | ||
| 945 | + return res; | ||
| 946 | +} | ||
| 947 | + | ||
| 948 | +uint32_t HELPER(neon_narrow_sat_u16)(CPUState *env, uint64_t x) | ||
| 949 | +{ | ||
| 950 | + uint32_t high; | ||
| 951 | + uint32_t low; | ||
| 952 | + low = x; | ||
| 953 | + if (low > 0xffff) { | ||
| 954 | + low = 0xffff; | ||
| 955 | + SET_QC(); | ||
| 956 | + } | ||
| 957 | + high = x >> 32; | ||
| 958 | + if (high > 0xffff) { | ||
| 959 | + high = 0xffff; | ||
| 960 | + SET_QC(); | ||
| 961 | + } | ||
| 962 | + return low | (high << 16); | ||
| 963 | +} | ||
| 964 | + | ||
| 965 | +uint32_t HELPER(neon_narrow_sat_s16)(CPUState *env, uint64_t x) | ||
| 966 | +{ | ||
| 967 | + int32_t low; | ||
| 968 | + int32_t high; | ||
| 969 | + low = x; | ||
| 970 | + if (low != (int16_t)low) { | ||
| 971 | + low = (low >> 31) ^ 0x7fff; | ||
| 972 | + SET_QC(); | ||
| 973 | + } | ||
| 974 | + high = x >> 32; | ||
| 975 | + if (high != (int16_t)high) { | ||
| 976 | + high = (high >> 31) ^ 0x7fff; | ||
| 977 | + SET_QC(); | ||
| 978 | + } | ||
| 979 | + return (uint16_t)low | (high << 16); | ||
| 980 | +} | ||
| 981 | + | ||
| 982 | +uint32_t HELPER(neon_narrow_sat_u32)(CPUState *env, uint64_t x) | ||
| 983 | +{ | ||
| 984 | + if (x > 0xffffffffu) { | ||
| 985 | + SET_QC(); | ||
| 986 | + return 0xffffffffu; | ||
| 987 | + } | ||
| 988 | + return x; | ||
| 989 | +} | ||
| 990 | + | ||
| 991 | +uint32_t HELPER(neon_narrow_sat_s32)(CPUState *env, uint64_t x) | ||
| 992 | +{ | ||
| 993 | + if ((int64_t)x != (int32_t)x) { | ||
| 994 | + SET_QC(); | ||
| 995 | + return (x >> 63) ^ 0x7fffffff; | ||
| 996 | + } | ||
| 997 | + return x; | ||
| 998 | +} | ||
| 999 | + | ||
| 1000 | +uint64_t HELPER(neon_widen_u8)(uint32_t x) | ||
| 1001 | +{ | ||
| 1002 | + uint64_t tmp; | ||
| 1003 | + uint64_t ret; | ||
| 1004 | + ret = (uint8_t)x; | ||
| 1005 | + tmp = (uint8_t)(x >> 8); | ||
| 1006 | + ret |= tmp << 16; | ||
| 1007 | + tmp = (uint8_t)(x >> 16); | ||
| 1008 | + ret |= tmp << 32; | ||
| 1009 | + tmp = (uint8_t)(x >> 24); | ||
| 1010 | + ret |= tmp << 48; | ||
| 1011 | + return ret; | ||
| 1012 | +} | ||
| 1013 | + | ||
| 1014 | +uint64_t HELPER(neon_widen_s8)(uint32_t x) | ||
| 1015 | +{ | ||
| 1016 | + uint64_t tmp; | ||
| 1017 | + uint64_t ret; | ||
| 1018 | + ret = (uint16_t)(int8_t)x; | ||
| 1019 | + tmp = (uint16_t)(int8_t)(x >> 8); | ||
| 1020 | + ret |= tmp << 16; | ||
| 1021 | + tmp = (uint16_t)(int8_t)(x >> 16); | ||
| 1022 | + ret |= tmp << 32; | ||
| 1023 | + tmp = (uint16_t)(int8_t)(x >> 24); | ||
| 1024 | + ret |= tmp << 48; | ||
| 1025 | + return ret; | ||
| 1026 | +} | ||
| 1027 | + | ||
| 1028 | +uint64_t HELPER(neon_widen_u16)(uint32_t x) | ||
| 1029 | +{ | ||
| 1030 | + uint64_t high = (uint16_t)(x >> 16); | ||
| 1031 | + return ((uint16_t)x) | (high << 32); | ||
| 1032 | +} | ||
| 1033 | + | ||
| 1034 | +uint64_t HELPER(neon_widen_s16)(uint32_t x) | ||
| 1035 | +{ | ||
| 1036 | + uint64_t high = (int16_t)(x >> 16); | ||
| 1037 | + return ((uint32_t)(int16_t)x) | (high << 32); | ||
| 1038 | +} | ||
| 1039 | + | ||
| 1040 | +uint64_t HELPER(neon_addl_u16)(uint64_t a, uint64_t b) | ||
| 1041 | +{ | ||
| 1042 | + uint64_t mask; | ||
| 1043 | + mask = (a ^ b) & 0x8000800080008000ull; | ||
| 1044 | + a &= ~0x8000800080008000ull; | ||
| 1045 | + b &= ~0x8000800080008000ull; | ||
| 1046 | + return (a + b) ^ mask; | ||
| 1047 | +} | ||
| 1048 | + | ||
| 1049 | +uint64_t HELPER(neon_addl_u32)(uint64_t a, uint64_t b) | ||
| 1050 | +{ | ||
| 1051 | + uint64_t mask; | ||
| 1052 | + mask = (a ^ b) & 0x8000000080000000ull; | ||
| 1053 | + a &= ~0x8000000080000000ull; | ||
| 1054 | + b &= ~0x8000000080000000ull; | ||
| 1055 | + return (a + b) ^ mask; | ||
| 1056 | +} | ||
| 1057 | + | ||
| 1058 | +uint64_t HELPER(neon_paddl_u16)(uint64_t a, uint64_t b) | ||
| 1059 | +{ | ||
| 1060 | + uint64_t tmp; | ||
| 1061 | + uint64_t tmp2; | ||
| 1062 | + | ||
| 1063 | + tmp = a & 0x0000ffff0000ffffull; | ||
| 1064 | + tmp += (a >> 16) & 0x0000ffff0000ffffull; | ||
| 1065 | + tmp2 = b & 0xffff0000ffff0000ull; | ||
| 1066 | + tmp2 += (b << 16) & 0xffff0000ffff0000ull; | ||
| 1067 | + return ( tmp & 0xffff) | ||
| 1068 | + | ((tmp >> 16) & 0xffff0000ull) | ||
| 1069 | + | ((tmp2 << 16) & 0xffff00000000ull) | ||
| 1070 | + | ( tmp2 & 0xffff000000000000ull); | ||
| 1071 | +} | ||
| 1072 | + | ||
| 1073 | +uint64_t HELPER(neon_paddl_u32)(uint64_t a, uint64_t b) | ||
| 1074 | +{ | ||
| 1075 | + uint32_t low = a + (a >> 32); | ||
| 1076 | + uint32_t high = b + (b >> 32); | ||
| 1077 | + return low + ((uint64_t)high << 32); | ||
| 1078 | +} | ||
| 1079 | + | ||
| 1080 | +uint64_t HELPER(neon_subl_u16)(uint64_t a, uint64_t b) | ||
| 1081 | +{ | ||
| 1082 | + uint64_t mask; | ||
| 1083 | + mask = (a ^ ~b) & 0x8000800080008000ull; | ||
| 1084 | + a |= 0x8000800080008000ull; | ||
| 1085 | + b &= ~0x8000800080008000ull; | ||
| 1086 | + return (a - b) ^ mask; | ||
| 1087 | +} | ||
| 1088 | + | ||
| 1089 | +uint64_t HELPER(neon_subl_u32)(uint64_t a, uint64_t b) | ||
| 1090 | +{ | ||
| 1091 | + uint64_t mask; | ||
| 1092 | + mask = (a ^ ~b) & 0x8000000080000000ull; | ||
| 1093 | + a |= 0x8000000080000000ull; | ||
| 1094 | + b &= ~0x8000000080000000ull; | ||
| 1095 | + return (a - b) ^ mask; | ||
| 1096 | +} | ||
| 1097 | + | ||
| 1098 | +uint64_t HELPER(neon_addl_saturate_s32)(CPUState *env, uint64_t a, uint64_t b) | ||
| 1099 | +{ | ||
| 1100 | + uint32_t x, y; | ||
| 1101 | + uint32_t low, high; | ||
| 1102 | + | ||
| 1103 | + x = a; | ||
| 1104 | + y = b; | ||
| 1105 | + low = x + y; | ||
| 1106 | + if (((low ^ x) & SIGNBIT) && !((x ^ y) & SIGNBIT)) { | ||
| 1107 | + SET_QC(); | ||
| 1108 | + low = ((int32_t)x >> 31) ^ ~SIGNBIT; | ||
| 1109 | + } | ||
| 1110 | + x = a >> 32; | ||
| 1111 | + y = b >> 32; | ||
| 1112 | + high = x + y; | ||
| 1113 | + if (((high ^ x) & SIGNBIT) && !((x ^ y) & SIGNBIT)) { | ||
| 1114 | + SET_QC(); | ||
| 1115 | + high = ((int32_t)x >> 31) ^ ~SIGNBIT; | ||
| 1116 | + } | ||
| 1117 | + return low | ((uint64_t)high << 32); | ||
| 1118 | +} | ||
| 1119 | + | ||
| 1120 | +uint64_t HELPER(neon_addl_saturate_s64)(CPUState *env, uint64_t a, uint64_t b) | ||
| 1121 | +{ | ||
| 1122 | + uint64_t result; | ||
| 1123 | + | ||
| 1124 | + result = a + b; | ||
| 1125 | + if (((result ^ a) & SIGNBIT64) && !((a ^ b) & SIGNBIT64)) { | ||
| 1126 | + SET_QC(); | ||
| 1127 | + result = ((int64_t)a >> 63) ^ ~SIGNBIT64; | ||
| 1128 | + } | ||
| 1129 | + return result; | ||
| 1130 | +} | ||
| 1131 | + | ||
| 1132 | +#define DO_ABD(dest, x, y, type) do { \ | ||
| 1133 | + type tmp_x = x; \ | ||
| 1134 | + type tmp_y = y; \ | ||
| 1135 | + dest = ((tmp_x > tmp_y) ? tmp_x - tmp_y : tmp_y - tmp_x); \ | ||
| 1136 | + } while(0) | ||
| 1137 | + | ||
| 1138 | +uint64_t HELPER(neon_abdl_u16)(uint32_t a, uint32_t b) | ||
| 1139 | +{ | ||
| 1140 | + uint64_t tmp; | ||
| 1141 | + uint64_t result; | ||
| 1142 | + DO_ABD(result, a, b, uint8_t); | ||
| 1143 | + DO_ABD(tmp, a >> 8, b >> 8, uint8_t); | ||
| 1144 | + result |= tmp << 16; | ||
| 1145 | + DO_ABD(tmp, a >> 16, b >> 16, uint8_t); | ||
| 1146 | + result |= tmp << 32; | ||
| 1147 | + DO_ABD(tmp, a >> 24, b >> 24, uint8_t); | ||
| 1148 | + result |= tmp << 48; | ||
| 1149 | + return result; | ||
| 1150 | +} | ||
| 1151 | + | ||
| 1152 | +uint64_t HELPER(neon_abdl_s16)(uint32_t a, uint32_t b) | ||
| 1153 | +{ | ||
| 1154 | + uint64_t tmp; | ||
| 1155 | + uint64_t result; | ||
| 1156 | + DO_ABD(result, a, b, int8_t); | ||
| 1157 | + DO_ABD(tmp, a >> 8, b >> 8, int8_t); | ||
| 1158 | + result |= tmp << 16; | ||
| 1159 | + DO_ABD(tmp, a >> 16, b >> 16, int8_t); | ||
| 1160 | + result |= tmp << 32; | ||
| 1161 | + DO_ABD(tmp, a >> 24, b >> 24, int8_t); | ||
| 1162 | + result |= tmp << 48; | ||
| 1163 | + return result; | ||
| 1164 | +} | ||
| 1165 | + | ||
| 1166 | +uint64_t HELPER(neon_abdl_u32)(uint32_t a, uint32_t b) | ||
| 1167 | +{ | ||
| 1168 | + uint64_t tmp; | ||
| 1169 | + uint64_t result; | ||
| 1170 | + DO_ABD(result, a, b, uint16_t); | ||
| 1171 | + DO_ABD(tmp, a >> 16, b >> 16, uint16_t); | ||
| 1172 | + return result | (tmp << 32); | ||
| 1173 | +} | ||
| 1174 | + | ||
| 1175 | +uint64_t HELPER(neon_abdl_s32)(uint32_t a, uint32_t b) | ||
| 1176 | +{ | ||
| 1177 | + uint64_t tmp; | ||
| 1178 | + uint64_t result; | ||
| 1179 | + DO_ABD(result, a, b, int16_t); | ||
| 1180 | + DO_ABD(tmp, a >> 16, b >> 16, int16_t); | ||
| 1181 | + return result | (tmp << 32); | ||
| 1182 | +} | ||
| 1183 | + | ||
| 1184 | +uint64_t HELPER(neon_abdl_u64)(uint32_t a, uint32_t b) | ||
| 1185 | +{ | ||
| 1186 | + uint64_t result; | ||
| 1187 | + DO_ABD(result, a, b, uint32_t); | ||
| 1188 | + return result; | ||
| 1189 | +} | ||
| 1190 | + | ||
| 1191 | +uint64_t HELPER(neon_abdl_s64)(uint32_t a, uint32_t b) | ||
| 1192 | +{ | ||
| 1193 | + uint64_t result; | ||
| 1194 | + DO_ABD(result, a, b, int32_t); | ||
| 1195 | + return result; | ||
| 1196 | +} | ||
| 1197 | +#undef DO_ABD | ||
| 1198 | + | ||
| 1199 | +/* Widening multiply. Named type is the source type. */ | ||
| 1200 | +#define DO_MULL(dest, x, y, type1, type2) do { \ | ||
| 1201 | + type1 tmp_x = x; \ | ||
| 1202 | + type1 tmp_y = y; \ | ||
| 1203 | + dest = (type2)((type2)tmp_x * (type2)tmp_y); \ | ||
| 1204 | + } while(0) | ||
| 1205 | + | ||
| 1206 | +uint64_t HELPER(neon_mull_u8)(uint32_t a, uint32_t b) | ||
| 1207 | +{ | ||
| 1208 | + uint64_t tmp; | ||
| 1209 | + uint64_t result; | ||
| 1210 | + | ||
| 1211 | + DO_MULL(result, a, b, uint8_t, uint16_t); | ||
| 1212 | + DO_MULL(tmp, a >> 8, b >> 8, uint8_t, uint16_t); | ||
| 1213 | + result |= tmp << 16; | ||
| 1214 | + DO_MULL(tmp, a >> 16, b >> 16, uint8_t, uint16_t); | ||
| 1215 | + result |= tmp << 32; | ||
| 1216 | + DO_MULL(tmp, a >> 24, b >> 24, uint8_t, uint16_t); | ||
| 1217 | + result |= tmp << 48; | ||
| 1218 | + return result; | ||
| 1219 | +} | ||
| 1220 | + | ||
| 1221 | +uint64_t HELPER(neon_mull_s8)(uint32_t a, uint32_t b) | ||
| 1222 | +{ | ||
| 1223 | + uint64_t tmp; | ||
| 1224 | + uint64_t result; | ||
| 1225 | + | ||
| 1226 | + DO_MULL(result, a, b, int8_t, uint16_t); | ||
| 1227 | + DO_MULL(tmp, a >> 8, b >> 8, int8_t, uint16_t); | ||
| 1228 | + result |= tmp << 16; | ||
| 1229 | + DO_MULL(tmp, a >> 16, b >> 16, int8_t, uint16_t); | ||
| 1230 | + result |= tmp << 32; | ||
| 1231 | + DO_MULL(tmp, a >> 24, b >> 24, int8_t, uint16_t); | ||
| 1232 | + result |= tmp << 48; | ||
| 1233 | + return result; | ||
| 1234 | +} | ||
| 1235 | + | ||
| 1236 | +uint64_t HELPER(neon_mull_u16)(uint32_t a, uint32_t b) | ||
| 1237 | +{ | ||
| 1238 | + uint64_t tmp; | ||
| 1239 | + uint64_t result; | ||
| 1240 | + | ||
| 1241 | + DO_MULL(result, a, b, uint16_t, uint32_t); | ||
| 1242 | + DO_MULL(tmp, a >> 16, b >> 16, uint16_t, uint32_t); | ||
| 1243 | + return result | (tmp << 32); | ||
| 1244 | +} | ||
| 1245 | + | ||
| 1246 | +uint64_t HELPER(neon_mull_s16)(uint32_t a, uint32_t b) | ||
| 1247 | +{ | ||
| 1248 | + uint64_t tmp; | ||
| 1249 | + uint64_t result; | ||
| 1250 | + | ||
| 1251 | + DO_MULL(result, a, b, int16_t, uint32_t); | ||
| 1252 | + DO_MULL(tmp, a >> 16, b >> 16, int16_t, uint32_t); | ||
| 1253 | + return result | (tmp << 32); | ||
| 1254 | +} | ||
| 1255 | + | ||
| 1256 | +uint64_t HELPER(neon_negl_u16)(uint64_t x) | ||
| 1257 | +{ | ||
| 1258 | + uint16_t tmp; | ||
| 1259 | + uint64_t result; | ||
| 1260 | + result = (uint16_t)-x; | ||
| 1261 | + tmp = -(x >> 16); | ||
| 1262 | + result |= (uint64_t)tmp << 16; | ||
| 1263 | + tmp = -(x >> 32); | ||
| 1264 | + result |= (uint64_t)tmp << 32; | ||
| 1265 | + tmp = -(x >> 48); | ||
| 1266 | + result |= (uint64_t)tmp << 48; | ||
| 1267 | + return result; | ||
| 1268 | +} | ||
| 1269 | + | ||
| 1270 | +#include <stdio.h> | ||
| 1271 | +uint64_t HELPER(neon_negl_u32)(uint64_t x) | ||
| 1272 | +{ | ||
| 1273 | + uint32_t low = -x; | ||
| 1274 | + uint32_t high = -(x >> 32); | ||
| 1275 | + return low | ((uint64_t)high << 32); | ||
| 1276 | +} | ||
| 1277 | + | ||
| 1278 | +/* FIXME: There should be a native op for this. */ | ||
| 1279 | +uint64_t HELPER(neon_negl_u64)(uint64_t x) | ||
| 1280 | +{ | ||
| 1281 | + return -x; | ||
| 1282 | +} | ||
| 1283 | + | ||
| 1284 | +/* Saturnating sign manuipulation. */ | ||
| 1285 | +/* ??? Make these use NEON_VOP1 */ | ||
| 1286 | +#define DO_QABS8(x) do { \ | ||
| 1287 | + if (x == (int8_t)0x80) { \ | ||
| 1288 | + x = 0x7f; \ | ||
| 1289 | + SET_QC(); \ | ||
| 1290 | + } else if (x < 0) { \ | ||
| 1291 | + x = -x; \ | ||
| 1292 | + }} while (0) | ||
| 1293 | +uint32_t HELPER(neon_qabs_s8)(CPUState *env, uint32_t x) | ||
| 1294 | +{ | ||
| 1295 | + neon_s8 vec; | ||
| 1296 | + NEON_UNPACK(neon_s8, vec, x); | ||
| 1297 | + DO_QABS8(vec.v1); | ||
| 1298 | + DO_QABS8(vec.v2); | ||
| 1299 | + DO_QABS8(vec.v3); | ||
| 1300 | + DO_QABS8(vec.v4); | ||
| 1301 | + NEON_PACK(neon_s8, x, vec); | ||
| 1302 | + return x; | ||
| 1303 | +} | ||
| 1304 | +#undef DO_QABS8 | ||
| 1305 | + | ||
| 1306 | +#define DO_QNEG8(x) do { \ | ||
| 1307 | + if (x == (int8_t)0x80) { \ | ||
| 1308 | + x = 0x7f; \ | ||
| 1309 | + SET_QC(); \ | ||
| 1310 | + } else { \ | ||
| 1311 | + x = -x; \ | ||
| 1312 | + }} while (0) | ||
| 1313 | +uint32_t HELPER(neon_qneg_s8)(CPUState *env, uint32_t x) | ||
| 1314 | +{ | ||
| 1315 | + neon_s8 vec; | ||
| 1316 | + NEON_UNPACK(neon_s8, vec, x); | ||
| 1317 | + DO_QNEG8(vec.v1); | ||
| 1318 | + DO_QNEG8(vec.v2); | ||
| 1319 | + DO_QNEG8(vec.v3); | ||
| 1320 | + DO_QNEG8(vec.v4); | ||
| 1321 | + NEON_PACK(neon_s8, x, vec); | ||
| 1322 | + return x; | ||
| 1323 | +} | ||
| 1324 | +#undef DO_QNEG8 | ||
| 1325 | + | ||
| 1326 | +#define DO_QABS16(x) do { \ | ||
| 1327 | + if (x == (int16_t)0x8000) { \ | ||
| 1328 | + x = 0x7fff; \ | ||
| 1329 | + SET_QC(); \ | ||
| 1330 | + } else if (x < 0) { \ | ||
| 1331 | + x = -x; \ | ||
| 1332 | + }} while (0) | ||
| 1333 | +uint32_t HELPER(neon_qabs_s16)(CPUState *env, uint32_t x) | ||
| 1334 | +{ | ||
| 1335 | + neon_s16 vec; | ||
| 1336 | + NEON_UNPACK(neon_s16, vec, x); | ||
| 1337 | + DO_QABS16(vec.v1); | ||
| 1338 | + DO_QABS16(vec.v2); | ||
| 1339 | + NEON_PACK(neon_s16, x, vec); | ||
| 1340 | + return x; | ||
| 1341 | +} | ||
| 1342 | +#undef DO_QABS16 | ||
| 1343 | + | ||
| 1344 | +#define DO_QNEG16(x) do { \ | ||
| 1345 | + if (x == (int16_t)0x8000) { \ | ||
| 1346 | + x = 0x7fff; \ | ||
| 1347 | + SET_QC(); \ | ||
| 1348 | + } else { \ | ||
| 1349 | + x = -x; \ | ||
| 1350 | + }} while (0) | ||
| 1351 | +uint32_t HELPER(neon_qneg_s16)(CPUState *env, uint32_t x) | ||
| 1352 | +{ | ||
| 1353 | + neon_s16 vec; | ||
| 1354 | + NEON_UNPACK(neon_s16, vec, x); | ||
| 1355 | + DO_QNEG16(vec.v1); | ||
| 1356 | + DO_QNEG16(vec.v2); | ||
| 1357 | + NEON_PACK(neon_s16, x, vec); | ||
| 1358 | + return x; | ||
| 1359 | +} | ||
| 1360 | +#undef DO_QNEG16 | ||
| 1361 | + | ||
| 1362 | +uint32_t HELPER(neon_qabs_s32)(CPUState *env, uint32_t x) | ||
| 1363 | +{ | ||
| 1364 | + if (x == SIGNBIT) { | ||
| 1365 | + SET_QC(); | ||
| 1366 | + x = ~SIGNBIT; | ||
| 1367 | + } else if ((int32_t)x < 0) { | ||
| 1368 | + x = -x; | ||
| 1369 | + } | ||
| 1370 | + return x; | ||
| 1371 | +} | ||
| 1372 | + | ||
| 1373 | +uint32_t HELPER(neon_qneg_s32)(CPUState *env, uint32_t x) | ||
| 1374 | +{ | ||
| 1375 | + if (x == SIGNBIT) { | ||
| 1376 | + SET_QC(); | ||
| 1377 | + x = ~SIGNBIT; | ||
| 1378 | + } else { | ||
| 1379 | + x = -x; | ||
| 1380 | + } | ||
| 1381 | + return x; | ||
| 1382 | +} | ||
| 1383 | + | ||
| 1384 | +/* NEON Float helpers. */ | ||
| 1385 | +uint32_t HELPER(neon_min_f32)(uint32_t a, uint32_t b) | ||
| 1386 | +{ | ||
| 1387 | + float32 f0 = vfp_itos(a); | ||
| 1388 | + float32 f1 = vfp_itos(b); | ||
| 1389 | + return (float32_compare_quiet(f0, f1, NFS) == -1) ? a : b; | ||
| 1390 | +} | ||
| 1391 | + | ||
| 1392 | +uint32_t HELPER(neon_max_f32)(uint32_t a, uint32_t b) | ||
| 1393 | +{ | ||
| 1394 | + float32 f0 = vfp_itos(a); | ||
| 1395 | + float32 f1 = vfp_itos(b); | ||
| 1396 | + return (float32_compare_quiet(f0, f1, NFS) == 1) ? a : b; | ||
| 1397 | +} | ||
| 1398 | + | ||
| 1399 | +uint32_t HELPER(neon_abd_f32)(uint32_t a, uint32_t b) | ||
| 1400 | +{ | ||
| 1401 | + float32 f0 = vfp_itos(a); | ||
| 1402 | + float32 f1 = vfp_itos(b); | ||
| 1403 | + return vfp_stoi((float32_compare_quiet(f0, f1, NFS) == 1) | ||
| 1404 | + ? float32_sub(f0, f1, NFS) | ||
| 1405 | + : float32_sub(f1, f0, NFS)); | ||
| 1406 | +} | ||
| 1407 | + | ||
| 1408 | +uint32_t HELPER(neon_add_f32)(uint32_t a, uint32_t b) | ||
| 1409 | +{ | ||
| 1410 | + return vfp_stoi(float32_add(vfp_itos(a), vfp_itos(b), NFS)); | ||
| 1411 | +} | ||
| 1412 | + | ||
| 1413 | +uint32_t HELPER(neon_sub_f32)(uint32_t a, uint32_t b) | ||
| 1414 | +{ | ||
| 1415 | + return vfp_stoi(float32_sub(vfp_itos(a), vfp_itos(b), NFS)); | ||
| 1416 | +} | ||
| 1417 | + | ||
| 1418 | +uint32_t HELPER(neon_mul_f32)(uint32_t a, uint32_t b) | ||
| 1419 | +{ | ||
| 1420 | + return vfp_stoi(float32_mul(vfp_itos(a), vfp_itos(b), NFS)); | ||
| 1421 | +} | ||
| 1422 | + | ||
| 1423 | +/* Floating point comparisons produce an integer result. */ | ||
| 1424 | +#define NEON_VOP_FCMP(name, cmp) \ | ||
| 1425 | +uint32_t HELPER(neon_##name)(uint32_t a, uint32_t b) \ | ||
| 1426 | +{ \ | ||
| 1427 | + if (float32_compare_quiet(vfp_itos(a), vfp_itos(b), NFS) cmp 0) \ | ||
| 1428 | + return ~0; \ | ||
| 1429 | + else \ | ||
| 1430 | + return 0; \ | ||
| 1431 | +} | ||
| 1432 | + | ||
| 1433 | +NEON_VOP_FCMP(ceq_f32, ==) | ||
| 1434 | +NEON_VOP_FCMP(cge_f32, >=) | ||
| 1435 | +NEON_VOP_FCMP(cgt_f32, >) | ||
| 1436 | + | ||
| 1437 | +uint32_t HELPER(neon_acge_f32)(uint32_t a, uint32_t b) | ||
| 1438 | +{ | ||
| 1439 | + float32 f0 = float32_abs(vfp_itos(a)); | ||
| 1440 | + float32 f1 = float32_abs(vfp_itos(b)); | ||
| 1441 | + return (float32_compare_quiet(f0, f1,NFS) >= 0) ? ~0 : 0; | ||
| 1442 | +} | ||
| 1443 | + | ||
| 1444 | +uint32_t HELPER(neon_acgt_f32)(uint32_t a, uint32_t b) | ||
| 1445 | +{ | ||
| 1446 | + float32 f0 = float32_abs(vfp_itos(a)); | ||
| 1447 | + float32 f1 = float32_abs(vfp_itos(b)); | ||
| 1448 | + return (float32_compare_quiet(f0, f1, NFS) > 0) ? ~0 : 0; | ||
| 1449 | +} |
target-arm/op.c
target-arm/op_helper.c
| @@ -20,6 +20,9 @@ | @@ -20,6 +20,9 @@ | ||
| 20 | #include "exec.h" | 20 | #include "exec.h" |
| 21 | #include "helpers.h" | 21 | #include "helpers.h" |
| 22 | 22 | ||
| 23 | +#define SIGNBIT (uint32_t)0x80000000 | ||
| 24 | +#define SIGNBIT64 ((uint64_t)1 << 63) | ||
| 25 | + | ||
| 23 | void raise_exception(int tt) | 26 | void raise_exception(int tt) |
| 24 | { | 27 | { |
| 25 | env->exception_index = tt; | 28 | env->exception_index = tt; |
| @@ -116,7 +119,8 @@ void tlb_fill (target_ulong addr, int is_write, int mmu_idx, void *retaddr) | @@ -116,7 +119,8 @@ void tlb_fill (target_ulong addr, int is_write, int mmu_idx, void *retaddr) | ||
| 116 | } | 119 | } |
| 117 | #endif | 120 | #endif |
| 118 | 121 | ||
| 119 | -#define SIGNBIT (uint32_t)0x80000000 | 122 | +/* FIXME: Pass an axplicit pointer to QF to CPUState, and move saturating |
| 123 | + instructions into helper.c */ | ||
| 120 | uint32_t HELPER(add_setq)(uint32_t a, uint32_t b) | 124 | uint32_t HELPER(add_setq)(uint32_t a, uint32_t b) |
| 121 | { | 125 | { |
| 122 | uint32_t res = a + b; | 126 | uint32_t res = a + b; |
| @@ -451,3 +455,114 @@ uint32_t HELPER(ror_cc)(uint32_t x, uint32_t i) | @@ -451,3 +455,114 @@ uint32_t HELPER(ror_cc)(uint32_t x, uint32_t i) | ||
| 451 | } | 455 | } |
| 452 | } | 456 | } |
| 453 | 457 | ||
| 458 | +uint64_t HELPER(neon_add_saturate_s64)(uint64_t src1, uint64_t src2) | ||
| 459 | +{ | ||
| 460 | + uint64_t res; | ||
| 461 | + | ||
| 462 | + res = src1 + src2; | ||
| 463 | + if (((res ^ src1) & SIGNBIT64) && !((src1 ^ src2) & SIGNBIT64)) { | ||
| 464 | + env->QF = 1; | ||
| 465 | + res = ((int64_t)src1 >> 63) ^ ~SIGNBIT64; | ||
| 466 | + } | ||
| 467 | + return res; | ||
| 468 | +} | ||
| 469 | + | ||
| 470 | +uint64_t HELPER(neon_add_saturate_u64)(uint64_t src1, uint64_t src2) | ||
| 471 | +{ | ||
| 472 | + uint64_t res; | ||
| 473 | + | ||
| 474 | + res = src1 + src2; | ||
| 475 | + if (res < src1) { | ||
| 476 | + env->QF = 1; | ||
| 477 | + res = ~(uint64_t)0; | ||
| 478 | + } | ||
| 479 | + return res; | ||
| 480 | +} | ||
| 481 | + | ||
| 482 | +uint64_t HELPER(neon_sub_saturate_s64)(uint64_t src1, uint64_t src2) | ||
| 483 | +{ | ||
| 484 | + uint64_t res; | ||
| 485 | + | ||
| 486 | + res = src1 - src2; | ||
| 487 | + if (((res ^ src1) & SIGNBIT64) && ((src1 ^ src2) & SIGNBIT64)) { | ||
| 488 | + env->QF = 1; | ||
| 489 | + res = ((int64_t)src1 >> 63) ^ ~SIGNBIT64; | ||
| 490 | + } | ||
| 491 | + return res; | ||
| 492 | +} | ||
| 493 | + | ||
| 494 | +uint64_t HELPER(neon_sub_saturate_u64)(uint64_t src1, uint64_t src2) | ||
| 495 | +{ | ||
| 496 | + uint64_t res; | ||
| 497 | + | ||
| 498 | + if (src1 < src2) { | ||
| 499 | + env->QF = 1; | ||
| 500 | + res = 0; | ||
| 501 | + } else { | ||
| 502 | + res = src1 - src2; | ||
| 503 | + } | ||
| 504 | + return res; | ||
| 505 | +} | ||
| 506 | + | ||
| 507 | +/* These need to return a pair of value, so still use T0/T1. */ | ||
| 508 | +/* Transpose. Argument order is rather strange to avoid special casing | ||
| 509 | + the tranlation code. | ||
| 510 | + On input T0 = rm, T1 = rd. On output T0 = rd, T1 = rm */ | ||
| 511 | +void HELPER(neon_trn_u8)(void) | ||
| 512 | +{ | ||
| 513 | + uint32_t rd; | ||
| 514 | + uint32_t rm; | ||
| 515 | + rd = ((T0 & 0x00ff00ff) << 8) | (T1 & 0x00ff00ff); | ||
| 516 | + rm = ((T1 & 0xff00ff00) >> 8) | (T0 & 0xff00ff00); | ||
| 517 | + T0 = rd; | ||
| 518 | + T1 = rm; | ||
| 519 | + FORCE_RET(); | ||
| 520 | +} | ||
| 521 | + | ||
| 522 | +void HELPER(neon_trn_u16)(void) | ||
| 523 | +{ | ||
| 524 | + uint32_t rd; | ||
| 525 | + uint32_t rm; | ||
| 526 | + rd = (T0 << 16) | (T1 & 0xffff); | ||
| 527 | + rm = (T1 >> 16) | (T0 & 0xffff0000); | ||
| 528 | + T0 = rd; | ||
| 529 | + T1 = rm; | ||
| 530 | + FORCE_RET(); | ||
| 531 | +} | ||
| 532 | + | ||
| 533 | +/* Worker routines for zip and unzip. */ | ||
| 534 | +void HELPER(neon_unzip_u8)(void) | ||
| 535 | +{ | ||
| 536 | + uint32_t rd; | ||
| 537 | + uint32_t rm; | ||
| 538 | + rd = (T0 & 0xff) | ((T0 >> 8) & 0xff00) | ||
| 539 | + | ((T1 << 16) & 0xff0000) | ((T1 << 8) & 0xff000000); | ||
| 540 | + rm = ((T0 >> 8) & 0xff) | ((T0 >> 16) & 0xff00) | ||
| 541 | + | ((T1 << 8) & 0xff0000) | (T1 & 0xff000000); | ||
| 542 | + T0 = rd; | ||
| 543 | + T1 = rm; | ||
| 544 | + FORCE_RET(); | ||
| 545 | +} | ||
| 546 | + | ||
| 547 | +void HELPER(neon_zip_u8)(void) | ||
| 548 | +{ | ||
| 549 | + uint32_t rd; | ||
| 550 | + uint32_t rm; | ||
| 551 | + rd = (T0 & 0xff) | ((T1 << 8) & 0xff00) | ||
| 552 | + | ((T0 << 16) & 0xff0000) | ((T1 << 24) & 0xff000000); | ||
| 553 | + rm = ((T0 >> 16) & 0xff) | ((T1 >> 8) & 0xff00) | ||
| 554 | + | ((T0 >> 8) & 0xff0000) | (T1 & 0xff000000); | ||
| 555 | + T0 = rd; | ||
| 556 | + T1 = rm; | ||
| 557 | + FORCE_RET(); | ||
| 558 | +} | ||
| 559 | + | ||
| 560 | +void HELPER(neon_zip_u16)(void) | ||
| 561 | +{ | ||
| 562 | + uint32_t tmp; | ||
| 563 | + | ||
| 564 | + tmp = (T0 & 0xffff) | (T1 << 16); | ||
| 565 | + T1 = (T1 & 0xffff0000) | (T0 >> 16); | ||
| 566 | + T0 = tmp; | ||
| 567 | + FORCE_RET(); | ||
| 568 | +} |
target-arm/op_neon.h deleted
100644 โ 0
| 1 | -/* | ||
| 2 | - * ARM NEON vector operations. | ||
| 3 | - * | ||
| 4 | - * Copyright (c) 2007 CodeSourcery. | ||
| 5 | - * Written by Paul Brook | ||
| 6 | - * | ||
| 7 | - * This code is licenced under the GPL. | ||
| 8 | - */ | ||
| 9 | -/* Note that for NEON an "l" prefix means it is a wide operation, unlike | ||
| 10 | - scalar arm ops where it means a word size operation. */ | ||
| 11 | - | ||
| 12 | -#define SIGNBIT (uint32_t)0x80000000 | ||
| 13 | -/* ??? NEON ops should probably have their own float status. */ | ||
| 14 | -#define NFS &env->vfp.fp_status | ||
| 15 | -#define NEON_OP(name) void OPPROTO op_neon_##name (void) | ||
| 16 | - | ||
| 17 | -/* Helper routines to perform bitwise copies between float and int. */ | ||
| 18 | -static inline float32 vfp_itos(uint32_t i) | ||
| 19 | -{ | ||
| 20 | - union { | ||
| 21 | - uint32_t i; | ||
| 22 | - float32 s; | ||
| 23 | - } v; | ||
| 24 | - | ||
| 25 | - v.i = i; | ||
| 26 | - return v.s; | ||
| 27 | -} | ||
| 28 | - | ||
| 29 | -static inline uint32_t vfp_stoi(float32 s) | ||
| 30 | -{ | ||
| 31 | - union { | ||
| 32 | - uint32_t i; | ||
| 33 | - float32 s; | ||
| 34 | - } v; | ||
| 35 | - | ||
| 36 | - v.s = s; | ||
| 37 | - return v.i; | ||
| 38 | -} | ||
| 39 | - | ||
| 40 | -NEON_OP(getreg_T0) | ||
| 41 | -{ | ||
| 42 | - T0 = *(uint32_t *)((char *) env + PARAM1); | ||
| 43 | -} | ||
| 44 | - | ||
| 45 | -NEON_OP(getreg_T1) | ||
| 46 | -{ | ||
| 47 | - T1 = *(uint32_t *)((char *) env + PARAM1); | ||
| 48 | -} | ||
| 49 | - | ||
| 50 | -NEON_OP(setreg_T0) | ||
| 51 | -{ | ||
| 52 | - *(uint32_t *)((char *) env + PARAM1) = T0; | ||
| 53 | -} | ||
| 54 | - | ||
| 55 | -NEON_OP(setreg_T1) | ||
| 56 | -{ | ||
| 57 | - *(uint32_t *)((char *) env + PARAM1) = T1; | ||
| 58 | -} | ||
| 59 | - | ||
| 60 | -#define NEON_TYPE1(name, type) \ | ||
| 61 | -typedef struct \ | ||
| 62 | -{ \ | ||
| 63 | - type v1; \ | ||
| 64 | -} neon_##name; | ||
| 65 | -#ifdef WORDS_BIGENDIAN | ||
| 66 | -#define NEON_TYPE2(name, type) \ | ||
| 67 | -typedef struct \ | ||
| 68 | -{ \ | ||
| 69 | - type v2; \ | ||
| 70 | - type v1; \ | ||
| 71 | -} neon_##name; | ||
| 72 | -#define NEON_TYPE4(name, type) \ | ||
| 73 | -typedef struct \ | ||
| 74 | -{ \ | ||
| 75 | - type v4; \ | ||
| 76 | - type v3; \ | ||
| 77 | - type v2; \ | ||
| 78 | - type v1; \ | ||
| 79 | -} neon_##name; | ||
| 80 | -#else | ||
| 81 | -#define NEON_TYPE2(name, type) \ | ||
| 82 | -typedef struct \ | ||
| 83 | -{ \ | ||
| 84 | - type v1; \ | ||
| 85 | - type v2; \ | ||
| 86 | -} neon_##name; | ||
| 87 | -#define NEON_TYPE4(name, type) \ | ||
| 88 | -typedef struct \ | ||
| 89 | -{ \ | ||
| 90 | - type v1; \ | ||
| 91 | - type v2; \ | ||
| 92 | - type v3; \ | ||
| 93 | - type v4; \ | ||
| 94 | -} neon_##name; | ||
| 95 | -#endif | ||
| 96 | - | ||
| 97 | -NEON_TYPE4(s8, int8_t) | ||
| 98 | -NEON_TYPE4(u8, uint8_t) | ||
| 99 | -NEON_TYPE2(s16, int16_t) | ||
| 100 | -NEON_TYPE2(u16, uint16_t) | ||
| 101 | -NEON_TYPE1(s32, int32_t) | ||
| 102 | -NEON_TYPE1(u32, uint32_t) | ||
| 103 | -#undef NEON_TYPE4 | ||
| 104 | -#undef NEON_TYPE2 | ||
| 105 | -#undef NEON_TYPE1 | ||
| 106 | - | ||
| 107 | -/* Copy from a uint32_t to a vector structure type. */ | ||
| 108 | -#define NEON_UNPACK(vtype, dest, val) do { \ | ||
| 109 | - union { \ | ||
| 110 | - vtype v; \ | ||
| 111 | - uint32_t i; \ | ||
| 112 | - } conv_u; \ | ||
| 113 | - conv_u.i = (val); \ | ||
| 114 | - dest = conv_u.v; \ | ||
| 115 | - } while(0) | ||
| 116 | - | ||
| 117 | -/* Copy from a vector structure type to a uint32_t. */ | ||
| 118 | -#define NEON_PACK(vtype, dest, val) do { \ | ||
| 119 | - union { \ | ||
| 120 | - vtype v; \ | ||
| 121 | - uint32_t i; \ | ||
| 122 | - } conv_u; \ | ||
| 123 | - conv_u.v = (val); \ | ||
| 124 | - dest = conv_u.i; \ | ||
| 125 | - } while(0) | ||
| 126 | - | ||
| 127 | -#define NEON_DO1 \ | ||
| 128 | - NEON_FN(vdest.v1, vsrc1.v1, vsrc2.v1); | ||
| 129 | -#define NEON_DO2 \ | ||
| 130 | - NEON_FN(vdest.v1, vsrc1.v1, vsrc2.v1); \ | ||
| 131 | - NEON_FN(vdest.v2, vsrc1.v2, vsrc2.v2); | ||
| 132 | -#define NEON_DO4 \ | ||
| 133 | - NEON_FN(vdest.v1, vsrc1.v1, vsrc2.v1); \ | ||
| 134 | - NEON_FN(vdest.v2, vsrc1.v2, vsrc2.v2); \ | ||
| 135 | - NEON_FN(vdest.v3, vsrc1.v3, vsrc2.v3); \ | ||
| 136 | - NEON_FN(vdest.v4, vsrc1.v4, vsrc2.v4); | ||
| 137 | - | ||
| 138 | -#define NEON_VOP(name, vtype, n) \ | ||
| 139 | -NEON_OP(name) \ | ||
| 140 | -{ \ | ||
| 141 | - vtype vsrc1; \ | ||
| 142 | - vtype vsrc2; \ | ||
| 143 | - vtype vdest; \ | ||
| 144 | - NEON_UNPACK(vtype, vsrc1, T0); \ | ||
| 145 | - NEON_UNPACK(vtype, vsrc2, T1); \ | ||
| 146 | - NEON_DO##n; \ | ||
| 147 | - NEON_PACK(vtype, T0, vdest); \ | ||
| 148 | - FORCE_RET(); \ | ||
| 149 | -} | ||
| 150 | - | ||
| 151 | -#define NEON_VOP1(name, vtype, n) \ | ||
| 152 | -NEON_OP(name) \ | ||
| 153 | -{ \ | ||
| 154 | - vtype vsrc1; \ | ||
| 155 | - vtype vdest; \ | ||
| 156 | - NEON_UNPACK(vtype, vsrc1, T0); \ | ||
| 157 | - NEON_DO##n; \ | ||
| 158 | - NEON_PACK(vtype, T0, vdest); \ | ||
| 159 | - FORCE_RET(); \ | ||
| 160 | -} | ||
| 161 | - | ||
| 162 | -/* Pairwise operations. */ | ||
| 163 | -/* For 32-bit elements each segment only contains a single element, so | ||
| 164 | - the elementwise and pairwise operations are the same. */ | ||
| 165 | -#define NEON_PDO2 \ | ||
| 166 | - NEON_FN(vdest.v1, vsrc1.v1, vsrc1.v2); \ | ||
| 167 | - NEON_FN(vdest.v2, vsrc2.v1, vsrc2.v2); | ||
| 168 | -#define NEON_PDO4 \ | ||
| 169 | - NEON_FN(vdest.v1, vsrc1.v1, vsrc1.v2); \ | ||
| 170 | - NEON_FN(vdest.v2, vsrc1.v3, vsrc1.v4); \ | ||
| 171 | - NEON_FN(vdest.v3, vsrc2.v1, vsrc2.v2); \ | ||
| 172 | - NEON_FN(vdest.v4, vsrc2.v3, vsrc2.v4); \ | ||
| 173 | - | ||
| 174 | -#define NEON_POP(name, vtype, n) \ | ||
| 175 | -NEON_OP(name) \ | ||
| 176 | -{ \ | ||
| 177 | - vtype vsrc1; \ | ||
| 178 | - vtype vsrc2; \ | ||
| 179 | - vtype vdest; \ | ||
| 180 | - NEON_UNPACK(vtype, vsrc1, T0); \ | ||
| 181 | - NEON_UNPACK(vtype, vsrc2, T1); \ | ||
| 182 | - NEON_PDO##n; \ | ||
| 183 | - NEON_PACK(vtype, T0, vdest); \ | ||
| 184 | - FORCE_RET(); \ | ||
| 185 | -} | ||
| 186 | - | ||
| 187 | -#define NEON_FN(dest, src1, src2) dest = (src1 + src2) >> 1 | ||
| 188 | -NEON_VOP(hadd_s8, neon_s8, 4) | ||
| 189 | -NEON_VOP(hadd_u8, neon_u8, 4) | ||
| 190 | -NEON_VOP(hadd_s16, neon_s16, 2) | ||
| 191 | -NEON_VOP(hadd_u16, neon_u16, 2) | ||
| 192 | -#undef NEON_FN | ||
| 193 | - | ||
| 194 | -NEON_OP(hadd_s32) | ||
| 195 | -{ | ||
| 196 | - int32_t src1 = T0; | ||
| 197 | - int32_t src2 = T1; | ||
| 198 | - int32_t dest; | ||
| 199 | - | ||
| 200 | - dest = (src1 >> 1) + (src2 >> 1); | ||
| 201 | - if (src1 & src2 & 1) | ||
| 202 | - dest++; | ||
| 203 | - T0 = dest; | ||
| 204 | - FORCE_RET(); | ||
| 205 | -} | ||
| 206 | - | ||
| 207 | -NEON_OP(hadd_u32) | ||
| 208 | -{ | ||
| 209 | - uint32_t src1 = T0; | ||
| 210 | - uint32_t src2 = T1; | ||
| 211 | - uint32_t dest; | ||
| 212 | - | ||
| 213 | - dest = (src1 >> 1) + (src2 >> 1); | ||
| 214 | - if (src1 & src2 & 1) | ||
| 215 | - dest++; | ||
| 216 | - T0 = dest; | ||
| 217 | - FORCE_RET(); | ||
| 218 | -} | ||
| 219 | - | ||
| 220 | -#define NEON_FN(dest, src1, src2) dest = (src1 + src2 + 1) >> 1 | ||
| 221 | -NEON_VOP(rhadd_s8, neon_s8, 4) | ||
| 222 | -NEON_VOP(rhadd_u8, neon_u8, 4) | ||
| 223 | -NEON_VOP(rhadd_s16, neon_s16, 2) | ||
| 224 | -NEON_VOP(rhadd_u16, neon_u16, 2) | ||
| 225 | -#undef NEON_FN | ||
| 226 | - | ||
| 227 | -NEON_OP(rhadd_s32) | ||
| 228 | -{ | ||
| 229 | - int32_t src1 = T0; | ||
| 230 | - int32_t src2 = T1; | ||
| 231 | - int32_t dest; | ||
| 232 | - | ||
| 233 | - dest = (src1 >> 1) + (src2 >> 1); | ||
| 234 | - if ((src1 | src2) & 1) | ||
| 235 | - dest++; | ||
| 236 | - T0 = dest; | ||
| 237 | - FORCE_RET(); | ||
| 238 | -} | ||
| 239 | - | ||
| 240 | -NEON_OP(rhadd_u32) | ||
| 241 | -{ | ||
| 242 | - uint32_t src1 = T0; | ||
| 243 | - uint32_t src2 = T1; | ||
| 244 | - uint32_t dest; | ||
| 245 | - | ||
| 246 | - dest = (src1 >> 1) + (src2 >> 1); | ||
| 247 | - if ((src1 | src2) & 1) | ||
| 248 | - dest++; | ||
| 249 | - T0 = dest; | ||
| 250 | - FORCE_RET(); | ||
| 251 | -} | ||
| 252 | - | ||
| 253 | -#define NEON_FN(dest, src1, src2) dest = (src1 - src2) >> 1 | ||
| 254 | -NEON_VOP(hsub_s8, neon_s8, 4) | ||
| 255 | -NEON_VOP(hsub_u8, neon_u8, 4) | ||
| 256 | -NEON_VOP(hsub_s16, neon_s16, 2) | ||
| 257 | -NEON_VOP(hsub_u16, neon_u16, 2) | ||
| 258 | -#undef NEON_FN | ||
| 259 | - | ||
| 260 | -NEON_OP(hsub_s32) | ||
| 261 | -{ | ||
| 262 | - int32_t src1 = T0; | ||
| 263 | - int32_t src2 = T1; | ||
| 264 | - int32_t dest; | ||
| 265 | - | ||
| 266 | - dest = (src1 >> 1) - (src2 >> 1); | ||
| 267 | - if ((~src1) & src2 & 1) | ||
| 268 | - dest--; | ||
| 269 | - T0 = dest; | ||
| 270 | - FORCE_RET(); | ||
| 271 | -} | ||
| 272 | - | ||
| 273 | -NEON_OP(hsub_u32) | ||
| 274 | -{ | ||
| 275 | - uint32_t src1 = T0; | ||
| 276 | - uint32_t src2 = T1; | ||
| 277 | - uint32_t dest; | ||
| 278 | - | ||
| 279 | - dest = (src1 >> 1) - (src2 >> 1); | ||
| 280 | - if ((~src1) & src2 & 1) | ||
| 281 | - dest--; | ||
| 282 | - T0 = dest; | ||
| 283 | - FORCE_RET(); | ||
| 284 | -} | ||
| 285 | - | ||
| 286 | -#define NEON_USAT(dest, src1, src2, type) do { \ | ||
| 287 | - uint32_t tmp = (uint32_t)src1 + (uint32_t)src2; \ | ||
| 288 | - if (tmp != (type)tmp) { \ | ||
| 289 | - env->QF = 1; \ | ||
| 290 | - dest = ~0; \ | ||
| 291 | - } else { \ | ||
| 292 | - dest = tmp; \ | ||
| 293 | - }} while(0) | ||
| 294 | -#define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint8_t) | ||
| 295 | -NEON_VOP(qadd_u8, neon_u8, 4) | ||
| 296 | -#undef NEON_FN | ||
| 297 | -#define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint16_t) | ||
| 298 | -NEON_VOP(qadd_u16, neon_u16, 2) | ||
| 299 | -#undef NEON_FN | ||
| 300 | -#undef NEON_USAT | ||
| 301 | - | ||
| 302 | -#define NEON_SSAT(dest, src1, src2, type) do { \ | ||
| 303 | - int32_t tmp = (uint32_t)src1 + (uint32_t)src2; \ | ||
| 304 | - if (tmp != (type)tmp) { \ | ||
| 305 | - env->QF = 1; \ | ||
| 306 | - if (src2 > 0) { \ | ||
| 307 | - tmp = (1 << (sizeof(type) * 8 - 1)) - 1; \ | ||
| 308 | - } else { \ | ||
| 309 | - tmp = 1 << (sizeof(type) * 8 - 1); \ | ||
| 310 | - } \ | ||
| 311 | - } \ | ||
| 312 | - dest = tmp; \ | ||
| 313 | - } while(0) | ||
| 314 | -#define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int8_t) | ||
| 315 | -NEON_VOP(qadd_s8, neon_s8, 4) | ||
| 316 | -#undef NEON_FN | ||
| 317 | -#define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int16_t) | ||
| 318 | -NEON_VOP(qadd_s16, neon_s16, 2) | ||
| 319 | -#undef NEON_FN | ||
| 320 | -#undef NEON_SSAT | ||
| 321 | - | ||
| 322 | -#define NEON_USAT(dest, src1, src2, type) do { \ | ||
| 323 | - uint32_t tmp = (uint32_t)src1 - (uint32_t)src2; \ | ||
| 324 | - if (tmp != (type)tmp) { \ | ||
| 325 | - env->QF = 1; \ | ||
| 326 | - dest = 0; \ | ||
| 327 | - } else { \ | ||
| 328 | - dest = tmp; \ | ||
| 329 | - }} while(0) | ||
| 330 | -#define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint8_t) | ||
| 331 | -NEON_VOP(qsub_u8, neon_u8, 4) | ||
| 332 | -#undef NEON_FN | ||
| 333 | -#define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint16_t) | ||
| 334 | -NEON_VOP(qsub_u16, neon_u16, 2) | ||
| 335 | -#undef NEON_FN | ||
| 336 | -#undef NEON_USAT | ||
| 337 | - | ||
| 338 | -#define NEON_SSAT(dest, src1, src2, type) do { \ | ||
| 339 | - int32_t tmp = (uint32_t)src1 - (uint32_t)src2; \ | ||
| 340 | - if (tmp != (type)tmp) { \ | ||
| 341 | - env->QF = 1; \ | ||
| 342 | - if (src2 < 0) { \ | ||
| 343 | - tmp = (1 << (sizeof(type) * 8 - 1)) - 1; \ | ||
| 344 | - } else { \ | ||
| 345 | - tmp = 1 << (sizeof(type) * 8 - 1); \ | ||
| 346 | - } \ | ||
| 347 | - } \ | ||
| 348 | - dest = tmp; \ | ||
| 349 | - } while(0) | ||
| 350 | -#define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int8_t) | ||
| 351 | -NEON_VOP(qsub_s8, neon_s8, 4) | ||
| 352 | -#undef NEON_FN | ||
| 353 | -#define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int16_t) | ||
| 354 | -NEON_VOP(qsub_s16, neon_s16, 2) | ||
| 355 | -#undef NEON_FN | ||
| 356 | -#undef NEON_SSAT | ||
| 357 | - | ||
| 358 | -#define NEON_FN(dest, src1, src2) dest = (src1 > src2) ? ~0 : 0 | ||
| 359 | -NEON_VOP(cgt_s8, neon_s8, 4) | ||
| 360 | -NEON_VOP(cgt_u8, neon_u8, 4) | ||
| 361 | -NEON_VOP(cgt_s16, neon_s16, 2) | ||
| 362 | -NEON_VOP(cgt_u16, neon_u16, 2) | ||
| 363 | -NEON_VOP(cgt_s32, neon_s32, 1) | ||
| 364 | -NEON_VOP(cgt_u32, neon_u32, 1) | ||
| 365 | -#undef NEON_FN | ||
| 366 | - | ||
| 367 | -#define NEON_FN(dest, src1, src2) dest = (src1 >= src2) ? ~0 : 0 | ||
| 368 | -NEON_VOP(cge_s8, neon_s8, 4) | ||
| 369 | -NEON_VOP(cge_u8, neon_u8, 4) | ||
| 370 | -NEON_VOP(cge_s16, neon_s16, 2) | ||
| 371 | -NEON_VOP(cge_u16, neon_u16, 2) | ||
| 372 | -NEON_VOP(cge_s32, neon_s32, 1) | ||
| 373 | -NEON_VOP(cge_u32, neon_u32, 1) | ||
| 374 | -#undef NEON_FN | ||
| 375 | - | ||
| 376 | -#define NEON_FN(dest, src1, src2) do { \ | ||
| 377 | - int8_t tmp; \ | ||
| 378 | - tmp = (int8_t)src2; \ | ||
| 379 | - if (tmp < 0) { \ | ||
| 380 | - dest = src1 >> -tmp; \ | ||
| 381 | - } else { \ | ||
| 382 | - dest = src1 << tmp; \ | ||
| 383 | - }} while (0) | ||
| 384 | -NEON_VOP(shl_s8, neon_s8, 4) | ||
| 385 | -NEON_VOP(shl_u8, neon_u8, 4) | ||
| 386 | -NEON_VOP(shl_s16, neon_s16, 2) | ||
| 387 | -NEON_VOP(shl_u16, neon_u16, 2) | ||
| 388 | -NEON_VOP(shl_s32, neon_s32, 1) | ||
| 389 | -NEON_VOP(shl_u32, neon_u32, 1) | ||
| 390 | -#undef NEON_FN | ||
| 391 | - | ||
| 392 | -NEON_OP(shl_u64) | ||
| 393 | -{ | ||
| 394 | - int8_t shift = env->vfp.scratch[0]; | ||
| 395 | - uint64_t val = T0 | ((uint64_t)T1 << 32); | ||
| 396 | - if (shift < 0) { | ||
| 397 | - val >>= -shift; | ||
| 398 | - } else { | ||
| 399 | - val <<= shift; | ||
| 400 | - } | ||
| 401 | - T0 = val; | ||
| 402 | - T1 = val >> 32; | ||
| 403 | - FORCE_RET(); | ||
| 404 | -} | ||
| 405 | - | ||
| 406 | -NEON_OP(shl_s64) | ||
| 407 | -{ | ||
| 408 | - int8_t shift = env->vfp.scratch[0]; | ||
| 409 | - int64_t val = T0 | ((uint64_t)T1 << 32); | ||
| 410 | - if (shift < 0) { | ||
| 411 | - val >>= -shift; | ||
| 412 | - } else { | ||
| 413 | - val <<= shift; | ||
| 414 | - } | ||
| 415 | - T0 = val; | ||
| 416 | - T1 = val >> 32; | ||
| 417 | - FORCE_RET(); | ||
| 418 | -} | ||
| 419 | - | ||
| 420 | -#define NEON_FN(dest, src1, src2) do { \ | ||
| 421 | - int8_t tmp; \ | ||
| 422 | - tmp = (int8_t)src1; \ | ||
| 423 | - if (tmp < 0) { \ | ||
| 424 | - dest = (src2 + (1 << (-1 - tmp))) >> -tmp; \ | ||
| 425 | - } else { \ | ||
| 426 | - dest = src2 << tmp; \ | ||
| 427 | - }} while (0) | ||
| 428 | - | ||
| 429 | -NEON_VOP(rshl_s8, neon_s8, 4) | ||
| 430 | -NEON_VOP(rshl_u8, neon_u8, 4) | ||
| 431 | -NEON_VOP(rshl_s16, neon_s16, 2) | ||
| 432 | -NEON_VOP(rshl_u16, neon_u16, 2) | ||
| 433 | -NEON_VOP(rshl_s32, neon_s32, 1) | ||
| 434 | -NEON_VOP(rshl_u32, neon_u32, 1) | ||
| 435 | -#undef NEON_FN | ||
| 436 | - | ||
| 437 | -NEON_OP(rshl_u64) | ||
| 438 | -{ | ||
| 439 | - int8_t shift = env->vfp.scratch[0]; | ||
| 440 | - uint64_t val = T0 | ((uint64_t)T1 << 32); | ||
| 441 | - if (shift < 0) { | ||
| 442 | - val = (val + ((uint64_t)1 << (-1 - shift))) >> -shift; | ||
| 443 | - val >>= -shift; | ||
| 444 | - } else { | ||
| 445 | - val <<= shift; | ||
| 446 | - } | ||
| 447 | - T0 = val; | ||
| 448 | - T1 = val >> 32; | ||
| 449 | - FORCE_RET(); | ||
| 450 | -} | ||
| 451 | - | ||
| 452 | -NEON_OP(rshl_s64) | ||
| 453 | -{ | ||
| 454 | - int8_t shift = env->vfp.scratch[0]; | ||
| 455 | - int64_t val = T0 | ((uint64_t)T1 << 32); | ||
| 456 | - if (shift < 0) { | ||
| 457 | - val = (val + ((int64_t)1 << (-1 - shift))) >> -shift; | ||
| 458 | - } else { | ||
| 459 | - val <<= shift; | ||
| 460 | - } | ||
| 461 | - T0 = val; | ||
| 462 | - T1 = val >> 32; | ||
| 463 | - FORCE_RET(); | ||
| 464 | -} | ||
| 465 | - | ||
| 466 | -#define NEON_FN(dest, src1, src2) do { \ | ||
| 467 | - int8_t tmp; \ | ||
| 468 | - tmp = (int8_t)src1; \ | ||
| 469 | - if (tmp < 0) { \ | ||
| 470 | - dest = src2 >> -tmp; \ | ||
| 471 | - } else { \ | ||
| 472 | - dest = src2 << tmp; \ | ||
| 473 | - if ((dest >> tmp) != src2) { \ | ||
| 474 | - env->QF = 1; \ | ||
| 475 | - dest = ~0; \ | ||
| 476 | - } \ | ||
| 477 | - }} while (0) | ||
| 478 | -NEON_VOP(qshl_s8, neon_s8, 4) | ||
| 479 | -NEON_VOP(qshl_s16, neon_s16, 2) | ||
| 480 | -NEON_VOP(qshl_s32, neon_s32, 1) | ||
| 481 | -#undef NEON_FN | ||
| 482 | - | ||
| 483 | -NEON_OP(qshl_s64) | ||
| 484 | -{ | ||
| 485 | - int8_t shift = env->vfp.scratch[0]; | ||
| 486 | - int64_t val = T0 | ((uint64_t)T1 << 32); | ||
| 487 | - if (shift < 0) { | ||
| 488 | - val >>= -shift; | ||
| 489 | - } else { | ||
| 490 | - int64_t tmp = val; | ||
| 491 | - val <<= shift; | ||
| 492 | - if ((val >> shift) != tmp) { | ||
| 493 | - env->QF = 1; | ||
| 494 | - val = (tmp >> 63) ^ 0x7fffffffffffffffULL; | ||
| 495 | - } | ||
| 496 | - } | ||
| 497 | - T0 = val; | ||
| 498 | - T1 = val >> 32; | ||
| 499 | - FORCE_RET(); | ||
| 500 | -} | ||
| 501 | - | ||
| 502 | -#define NEON_FN(dest, src1, src2) do { \ | ||
| 503 | - int8_t tmp; \ | ||
| 504 | - tmp = (int8_t)src1; \ | ||
| 505 | - if (tmp < 0) { \ | ||
| 506 | - dest = src2 >> -tmp; \ | ||
| 507 | - } else { \ | ||
| 508 | - dest = src2 << tmp; \ | ||
| 509 | - if ((dest >> tmp) != src2) { \ | ||
| 510 | - env->QF = 1; \ | ||
| 511 | - dest = src2 >> 31; \ | ||
| 512 | - } \ | ||
| 513 | - }} while (0) | ||
| 514 | -NEON_VOP(qshl_u8, neon_u8, 4) | ||
| 515 | -NEON_VOP(qshl_u16, neon_u16, 2) | ||
| 516 | -NEON_VOP(qshl_u32, neon_u32, 1) | ||
| 517 | -#undef NEON_FN | ||
| 518 | - | ||
| 519 | -NEON_OP(qshl_u64) | ||
| 520 | -{ | ||
| 521 | - int8_t shift = env->vfp.scratch[0]; | ||
| 522 | - uint64_t val = T0 | ((uint64_t)T1 << 32); | ||
| 523 | - if (shift < 0) { | ||
| 524 | - val >>= -shift; | ||
| 525 | - } else { | ||
| 526 | - uint64_t tmp = val; | ||
| 527 | - val <<= shift; | ||
| 528 | - if ((val >> shift) != tmp) { | ||
| 529 | - env->QF = 1; | ||
| 530 | - val = ~(uint64_t)0; | ||
| 531 | - } | ||
| 532 | - } | ||
| 533 | - T0 = val; | ||
| 534 | - T1 = val >> 32; | ||
| 535 | - FORCE_RET(); | ||
| 536 | -} | ||
| 537 | - | ||
| 538 | -#define NEON_FN(dest, src1, src2) do { \ | ||
| 539 | - int8_t tmp; \ | ||
| 540 | - tmp = (int8_t)src1; \ | ||
| 541 | - if (tmp < 0) { \ | ||
| 542 | - dest = (src2 + (1 << (-1 - tmp))) >> -tmp; \ | ||
| 543 | - } else { \ | ||
| 544 | - dest = src2 << tmp; \ | ||
| 545 | - if ((dest >> tmp) != src2) { \ | ||
| 546 | - dest = ~0; \ | ||
| 547 | - } \ | ||
| 548 | - }} while (0) | ||
| 549 | -NEON_VOP(qrshl_s8, neon_s8, 4) | ||
| 550 | -NEON_VOP(qrshl_s16, neon_s16, 2) | ||
| 551 | -NEON_VOP(qrshl_s32, neon_s32, 1) | ||
| 552 | -#undef NEON_FN | ||
| 553 | - | ||
| 554 | -#define NEON_FN(dest, src1, src2) do { \ | ||
| 555 | - int8_t tmp; \ | ||
| 556 | - tmp = (int8_t)src1; \ | ||
| 557 | - if (tmp < 0) { \ | ||
| 558 | - dest = (src2 + (1 << (-1 - tmp))) >> -tmp; \ | ||
| 559 | - } else { \ | ||
| 560 | - dest = src2 << tmp; \ | ||
| 561 | - if ((dest >> tmp) != src2) { \ | ||
| 562 | - env->QF = 1; \ | ||
| 563 | - dest = src2 >> 31; \ | ||
| 564 | - } \ | ||
| 565 | - }} while (0) | ||
| 566 | -NEON_VOP(qrshl_u8, neon_u8, 4) | ||
| 567 | -NEON_VOP(qrshl_u16, neon_u16, 2) | ||
| 568 | -NEON_VOP(qrshl_u32, neon_u32, 1) | ||
| 569 | -#undef NEON_FN | ||
| 570 | - | ||
| 571 | -#define NEON_FN(dest, src1, src2) dest = (src1 > src2) ? src1 : src2 | ||
| 572 | -NEON_VOP(max_s8, neon_s8, 4) | ||
| 573 | -NEON_VOP(max_u8, neon_u8, 4) | ||
| 574 | -NEON_VOP(max_s16, neon_s16, 2) | ||
| 575 | -NEON_VOP(max_u16, neon_u16, 2) | ||
| 576 | -NEON_VOP(max_s32, neon_s32, 1) | ||
| 577 | -NEON_VOP(max_u32, neon_u32, 1) | ||
| 578 | -NEON_POP(pmax_s8, neon_s8, 4) | ||
| 579 | -NEON_POP(pmax_u8, neon_u8, 4) | ||
| 580 | -NEON_POP(pmax_s16, neon_s16, 2) | ||
| 581 | -NEON_POP(pmax_u16, neon_u16, 2) | ||
| 582 | -#undef NEON_FN | ||
| 583 | - | ||
| 584 | -NEON_OP(max_f32) | ||
| 585 | -{ | ||
| 586 | - float32 f0 = vfp_itos(T0); | ||
| 587 | - float32 f1 = vfp_itos(T1); | ||
| 588 | - T0 = (float32_compare_quiet(f0, f1, NFS) == 1) ? T0 : T1; | ||
| 589 | - FORCE_RET(); | ||
| 590 | -} | ||
| 591 | - | ||
| 592 | -#define NEON_FN(dest, src1, src2) dest = (src1 < src2) ? src1 : src2 | ||
| 593 | -NEON_VOP(min_s8, neon_s8, 4) | ||
| 594 | -NEON_VOP(min_u8, neon_u8, 4) | ||
| 595 | -NEON_VOP(min_s16, neon_s16, 2) | ||
| 596 | -NEON_VOP(min_u16, neon_u16, 2) | ||
| 597 | -NEON_VOP(min_s32, neon_s32, 1) | ||
| 598 | -NEON_VOP(min_u32, neon_u32, 1) | ||
| 599 | -NEON_POP(pmin_s8, neon_s8, 4) | ||
| 600 | -NEON_POP(pmin_u8, neon_u8, 4) | ||
| 601 | -NEON_POP(pmin_s16, neon_s16, 2) | ||
| 602 | -NEON_POP(pmin_u16, neon_u16, 2) | ||
| 603 | -#undef NEON_FN | ||
| 604 | - | ||
| 605 | -NEON_OP(min_f32) | ||
| 606 | -{ | ||
| 607 | - float32 f0 = vfp_itos(T0); | ||
| 608 | - float32 f1 = vfp_itos(T1); | ||
| 609 | - T0 = (float32_compare_quiet(f0, f1, NFS) == -1) ? T0 : T1; | ||
| 610 | - FORCE_RET(); | ||
| 611 | -} | ||
| 612 | - | ||
| 613 | -#define NEON_FN(dest, src1, src2) \ | ||
| 614 | - dest = (src1 > src2) ? (src1 - src2) : (src2 - src1) | ||
| 615 | -NEON_VOP(abd_s8, neon_s8, 4) | ||
| 616 | -NEON_VOP(abd_u8, neon_u8, 4) | ||
| 617 | -NEON_VOP(abd_s16, neon_s16, 2) | ||
| 618 | -NEON_VOP(abd_u16, neon_u16, 2) | ||
| 619 | -NEON_VOP(abd_s32, neon_s32, 1) | ||
| 620 | -NEON_VOP(abd_u32, neon_u32, 1) | ||
| 621 | -#undef NEON_FN | ||
| 622 | - | ||
| 623 | -NEON_OP(abd_f32) | ||
| 624 | -{ | ||
| 625 | - float32 f0 = vfp_itos(T0); | ||
| 626 | - float32 f1 = vfp_itos(T1); | ||
| 627 | - T0 = vfp_stoi((float32_compare_quiet(f0, f1, NFS) == 1) | ||
| 628 | - ? float32_sub(f0, f1, NFS) | ||
| 629 | - : float32_sub(f1, f0, NFS)); | ||
| 630 | - FORCE_RET(); | ||
| 631 | -} | ||
| 632 | - | ||
| 633 | -#define NEON_FN(dest, src1, src2) dest = src1 + src2 | ||
| 634 | -NEON_VOP(add_u8, neon_u8, 4) | ||
| 635 | -NEON_VOP(add_u16, neon_u16, 2) | ||
| 636 | -NEON_POP(padd_u8, neon_u8, 4) | ||
| 637 | -NEON_POP(padd_u16, neon_u16, 2) | ||
| 638 | -#undef NEON_FN | ||
| 639 | - | ||
| 640 | -NEON_OP(add_f32) | ||
| 641 | -{ | ||
| 642 | - T0 = vfp_stoi(float32_add(vfp_itos(T0), vfp_itos(T1), NFS)); | ||
| 643 | - FORCE_RET(); | ||
| 644 | -} | ||
| 645 | - | ||
| 646 | -#define NEON_FN(dest, src1, src2) dest = src1 - src2 | ||
| 647 | -NEON_VOP(sub_u8, neon_u8, 4) | ||
| 648 | -NEON_VOP(sub_u16, neon_u16, 2) | ||
| 649 | -#undef NEON_FN | ||
| 650 | - | ||
| 651 | -NEON_OP(sub_f32) | ||
| 652 | -{ | ||
| 653 | - T0 = vfp_stoi(float32_sub(vfp_itos(T0), vfp_itos(T1), NFS)); | ||
| 654 | - FORCE_RET(); | ||
| 655 | -} | ||
| 656 | - | ||
| 657 | -#define NEON_FN(dest, src1, src2) dest = src2 - src1 | ||
| 658 | -NEON_VOP(rsb_u8, neon_u8, 4) | ||
| 659 | -NEON_VOP(rsb_u16, neon_u16, 2) | ||
| 660 | -#undef NEON_FN | ||
| 661 | - | ||
| 662 | -NEON_OP(rsb_f32) | ||
| 663 | -{ | ||
| 664 | - T0 = vfp_stoi(float32_sub(vfp_itos(T1), vfp_itos(T0), NFS)); | ||
| 665 | - FORCE_RET(); | ||
| 666 | -} | ||
| 667 | - | ||
| 668 | -#define NEON_FN(dest, src1, src2) dest = src1 * src2 | ||
| 669 | -NEON_VOP(mul_u8, neon_u8, 4) | ||
| 670 | -NEON_VOP(mul_u16, neon_u16, 2) | ||
| 671 | -#undef NEON_FN | ||
| 672 | - | ||
| 673 | -NEON_OP(mul_f32) | ||
| 674 | -{ | ||
| 675 | - T0 = vfp_stoi(float32_mul(vfp_itos(T0), vfp_itos(T1), NFS)); | ||
| 676 | - FORCE_RET(); | ||
| 677 | -} | ||
| 678 | - | ||
| 679 | -NEON_OP(mul_p8) | ||
| 680 | -{ | ||
| 681 | - T0 = helper_neon_mul_p8(T0, T1); | ||
| 682 | -} | ||
| 683 | - | ||
| 684 | -#define NEON_FN(dest, src1, src2) dest = (src1 & src2) ? -1 : 0 | ||
| 685 | -NEON_VOP(tst_u8, neon_u8, 4) | ||
| 686 | -NEON_VOP(tst_u16, neon_u16, 2) | ||
| 687 | -NEON_VOP(tst_u32, neon_u32, 1) | ||
| 688 | -#undef NEON_FN | ||
| 689 | - | ||
| 690 | -#define NEON_FN(dest, src1, src2) dest = (src1 == src2) ? -1 : 0 | ||
| 691 | -NEON_VOP(ceq_u8, neon_u8, 4) | ||
| 692 | -NEON_VOP(ceq_u16, neon_u16, 2) | ||
| 693 | -NEON_VOP(ceq_u32, neon_u32, 1) | ||
| 694 | -#undef NEON_FN | ||
| 695 | - | ||
| 696 | -#define NEON_QDMULH16(dest, src1, src2, round) do { \ | ||
| 697 | - uint32_t tmp = (int32_t)(int16_t) src1 * (int16_t) src2; \ | ||
| 698 | - if ((tmp ^ (tmp << 1)) & SIGNBIT) { \ | ||
| 699 | - env->QF = 1; \ | ||
| 700 | - tmp = (tmp >> 31) ^ ~SIGNBIT; \ | ||
| 701 | - } \ | ||
| 702 | - tmp <<= 1; \ | ||
| 703 | - if (round) { \ | ||
| 704 | - int32_t old = tmp; \ | ||
| 705 | - tmp += 1 << 15; \ | ||
| 706 | - if ((int32_t)tmp < old) { \ | ||
| 707 | - env->QF = 1; \ | ||
| 708 | - tmp = SIGNBIT - 1; \ | ||
| 709 | - } \ | ||
| 710 | - } \ | ||
| 711 | - dest = tmp >> 16; \ | ||
| 712 | - } while(0) | ||
| 713 | -#define NEON_FN(dest, src1, src2) NEON_QDMULH16(dest, src1, src2, 0) | ||
| 714 | -NEON_VOP(qdmulh_s16, neon_s16, 2) | ||
| 715 | -#undef NEON_FN | ||
| 716 | -#define NEON_FN(dest, src1, src2) NEON_QDMULH16(dest, src1, src2, 1) | ||
| 717 | -NEON_VOP(qrdmulh_s16, neon_s16, 2) | ||
| 718 | -#undef NEON_FN | ||
| 719 | -#undef NEON_QDMULH16 | ||
| 720 | - | ||
| 721 | -#define SIGNBIT64 ((uint64_t)1 << 63) | ||
| 722 | -#define NEON_QDMULH32(dest, src1, src2, round) do { \ | ||
| 723 | - uint64_t tmp = (int64_t)(int32_t) src1 * (int32_t) src2; \ | ||
| 724 | - if ((tmp ^ (tmp << 1)) & SIGNBIT64) { \ | ||
| 725 | - env->QF = 1; \ | ||
| 726 | - tmp = (tmp >> 63) ^ ~SIGNBIT64; \ | ||
| 727 | - } else { \ | ||
| 728 | - tmp <<= 1; \ | ||
| 729 | - } \ | ||
| 730 | - if (round) { \ | ||
| 731 | - int64_t old = tmp; \ | ||
| 732 | - tmp += (int64_t)1 << 31; \ | ||
| 733 | - if ((int64_t)tmp < old) { \ | ||
| 734 | - env->QF = 1; \ | ||
| 735 | - tmp = SIGNBIT64 - 1; \ | ||
| 736 | - } \ | ||
| 737 | - } \ | ||
| 738 | - dest = tmp >> 32; \ | ||
| 739 | - } while(0) | ||
| 740 | -#define NEON_FN(dest, src1, src2) NEON_QDMULH32(dest, src1, src2, 0) | ||
| 741 | -NEON_VOP(qdmulh_s32, neon_s32, 1) | ||
| 742 | -#undef NEON_FN | ||
| 743 | -#define NEON_FN(dest, src1, src2) NEON_QDMULH32(dest, src1, src2, 1) | ||
| 744 | -NEON_VOP(qrdmulh_s32, neon_s32, 1) | ||
| 745 | -#undef NEON_FN | ||
| 746 | -#undef NEON_QDMULH32 | ||
| 747 | - | ||
| 748 | -/* Floating point comparisons produce an integer result. */ | ||
| 749 | -#define NEON_VOP_FCMP(name, cmp) \ | ||
| 750 | -NEON_OP(name) \ | ||
| 751 | -{ \ | ||
| 752 | - if (float32_compare_quiet(vfp_itos(T0), vfp_itos(T1), NFS) cmp 0) \ | ||
| 753 | - T0 = -1; \ | ||
| 754 | - else \ | ||
| 755 | - T0 = 0; \ | ||
| 756 | - FORCE_RET(); \ | ||
| 757 | -} | ||
| 758 | - | ||
| 759 | -NEON_VOP_FCMP(ceq_f32, ==) | ||
| 760 | -NEON_VOP_FCMP(cge_f32, >=) | ||
| 761 | -NEON_VOP_FCMP(cgt_f32, >) | ||
| 762 | - | ||
| 763 | -NEON_OP(acge_f32) | ||
| 764 | -{ | ||
| 765 | - float32 f0 = float32_abs(vfp_itos(T0)); | ||
| 766 | - float32 f1 = float32_abs(vfp_itos(T1)); | ||
| 767 | - T0 = (float32_compare_quiet(f0, f1,NFS) >= 0) ? -1 : 0; | ||
| 768 | - FORCE_RET(); | ||
| 769 | -} | ||
| 770 | - | ||
| 771 | -NEON_OP(acgt_f32) | ||
| 772 | -{ | ||
| 773 | - float32 f0 = float32_abs(vfp_itos(T0)); | ||
| 774 | - float32 f1 = float32_abs(vfp_itos(T1)); | ||
| 775 | - T0 = (float32_compare_quiet(f0, f1, NFS) > 0) ? -1 : 0; | ||
| 776 | - FORCE_RET(); | ||
| 777 | -} | ||
| 778 | - | ||
| 779 | -/* Narrowing instructions. The named type is the destination type. */ | ||
| 780 | -NEON_OP(narrow_u8) | ||
| 781 | -{ | ||
| 782 | - T0 = (T0 & 0xff) | ((T0 >> 8) & 0xff00) | ||
| 783 | - | ((T1 << 16) & 0xff0000) | (T1 << 24); | ||
| 784 | - FORCE_RET(); | ||
| 785 | -} | ||
| 786 | - | ||
| 787 | -NEON_OP(narrow_sat_u8) | ||
| 788 | -{ | ||
| 789 | - neon_u16 src; | ||
| 790 | - neon_u8 dest; | ||
| 791 | -#define SAT8(d, s) \ | ||
| 792 | - if (s > 0xff) { \ | ||
| 793 | - d = 0xff; \ | ||
| 794 | - env->QF = 1; \ | ||
| 795 | - } else { \ | ||
| 796 | - d = s; \ | ||
| 797 | - } | ||
| 798 | - | ||
| 799 | - NEON_UNPACK(neon_u16, src, T0); | ||
| 800 | - SAT8(dest.v1, src.v1); | ||
| 801 | - SAT8(dest.v2, src.v2); | ||
| 802 | - NEON_UNPACK(neon_u16, src, T1); | ||
| 803 | - SAT8(dest.v3, src.v1); | ||
| 804 | - SAT8(dest.v4, src.v2); | ||
| 805 | - NEON_PACK(neon_u8, T0, dest); | ||
| 806 | - FORCE_RET(); | ||
| 807 | -#undef SAT8 | ||
| 808 | -} | ||
| 809 | - | ||
| 810 | -NEON_OP(narrow_sat_s8) | ||
| 811 | -{ | ||
| 812 | - neon_s16 src; | ||
| 813 | - neon_s8 dest; | ||
| 814 | -#define SAT8(d, s) \ | ||
| 815 | - if (s != (uint8_t)s) { \ | ||
| 816 | - d = (s >> 15) ^ 0x7f; \ | ||
| 817 | - env->QF = 1; \ | ||
| 818 | - } else { \ | ||
| 819 | - d = s; \ | ||
| 820 | - } | ||
| 821 | - | ||
| 822 | - NEON_UNPACK(neon_s16, src, T0); | ||
| 823 | - SAT8(dest.v1, src.v1); | ||
| 824 | - SAT8(dest.v2, src.v2); | ||
| 825 | - NEON_UNPACK(neon_s16, src, T1); | ||
| 826 | - SAT8(dest.v3, src.v1); | ||
| 827 | - SAT8(dest.v4, src.v2); | ||
| 828 | - NEON_PACK(neon_s8, T0, dest); | ||
| 829 | - FORCE_RET(); | ||
| 830 | -#undef SAT8 | ||
| 831 | -} | ||
| 832 | - | ||
| 833 | -NEON_OP(narrow_u16) | ||
| 834 | -{ | ||
| 835 | - T0 = (T0 & 0xffff) | (T1 << 16); | ||
| 836 | -} | ||
| 837 | - | ||
| 838 | -NEON_OP(narrow_sat_u16) | ||
| 839 | -{ | ||
| 840 | - if (T0 > 0xffff) { | ||
| 841 | - T0 = 0xffff; | ||
| 842 | - env->QF = 1; | ||
| 843 | - } | ||
| 844 | - if (T1 > 0xffff) { | ||
| 845 | - T1 = 0xffff; | ||
| 846 | - env->QF = 1; | ||
| 847 | - } | ||
| 848 | - T0 |= T1 << 16; | ||
| 849 | - FORCE_RET(); | ||
| 850 | -} | ||
| 851 | - | ||
| 852 | -NEON_OP(narrow_sat_s16) | ||
| 853 | -{ | ||
| 854 | - if ((int32_t)T0 != (int16_t)T0) { | ||
| 855 | - T0 = ((int32_t)T0 >> 31) ^ 0x7fff; | ||
| 856 | - env->QF = 1; | ||
| 857 | - } | ||
| 858 | - if ((int32_t)T1 != (int16_t) T1) { | ||
| 859 | - T1 = ((int32_t)T1 >> 31) ^ 0x7fff; | ||
| 860 | - env->QF = 1; | ||
| 861 | - } | ||
| 862 | - T0 = (uint16_t)T0 | (T1 << 16); | ||
| 863 | - FORCE_RET(); | ||
| 864 | -} | ||
| 865 | - | ||
| 866 | -NEON_OP(narrow_sat_u32) | ||
| 867 | -{ | ||
| 868 | - if (T1) { | ||
| 869 | - T0 = 0xffffffffu; | ||
| 870 | - env->QF = 1; | ||
| 871 | - } | ||
| 872 | - FORCE_RET(); | ||
| 873 | -} | ||
| 874 | - | ||
| 875 | -NEON_OP(narrow_sat_s32) | ||
| 876 | -{ | ||
| 877 | - int32_t sign = (int32_t)T1 >> 31; | ||
| 878 | - | ||
| 879 | - if ((int32_t)T1 != sign) { | ||
| 880 | - T0 = sign ^ 0x7fffffff; | ||
| 881 | - env->QF = 1; | ||
| 882 | - } | ||
| 883 | - FORCE_RET(); | ||
| 884 | -} | ||
| 885 | - | ||
| 886 | -/* Narrowing instructions. Named type is the narrow type. */ | ||
| 887 | -NEON_OP(narrow_high_u8) | ||
| 888 | -{ | ||
| 889 | - T0 = ((T0 >> 8) & 0xff) | ((T0 >> 16) & 0xff00) | ||
| 890 | - | ((T1 << 8) & 0xff0000) | (T1 & 0xff000000); | ||
| 891 | - FORCE_RET(); | ||
| 892 | -} | ||
| 893 | - | ||
| 894 | -NEON_OP(narrow_high_u16) | ||
| 895 | -{ | ||
| 896 | - T0 = (T0 >> 16) | (T1 & 0xffff0000); | ||
| 897 | - FORCE_RET(); | ||
| 898 | -} | ||
| 899 | - | ||
| 900 | -NEON_OP(narrow_high_round_u8) | ||
| 901 | -{ | ||
| 902 | - T0 = (((T0 + 0x80) >> 8) & 0xff) | (((T0 + 0x800000) >> 16) & 0xff00) | ||
| 903 | - | (((T1 + 0x80) << 8) & 0xff0000) | ((T1 + 0x800000) & 0xff000000); | ||
| 904 | - FORCE_RET(); | ||
| 905 | -} | ||
| 906 | - | ||
| 907 | -NEON_OP(narrow_high_round_u16) | ||
| 908 | -{ | ||
| 909 | - T0 = ((T0 + 0x8000) >> 16) | ((T1 + 0x8000) & 0xffff0000); | ||
| 910 | - FORCE_RET(); | ||
| 911 | -} | ||
| 912 | - | ||
| 913 | -NEON_OP(narrow_high_round_u32) | ||
| 914 | -{ | ||
| 915 | - if (T0 >= 0x80000000u) | ||
| 916 | - T0 = T1 + 1; | ||
| 917 | - else | ||
| 918 | - T0 = T1; | ||
| 919 | - FORCE_RET(); | ||
| 920 | -} | ||
| 921 | - | ||
| 922 | -/* Widening instructions. Named type is source type. */ | ||
| 923 | -NEON_OP(widen_s8) | ||
| 924 | -{ | ||
| 925 | - uint32_t src; | ||
| 926 | - | ||
| 927 | - src = T0; | ||
| 928 | - T0 = (uint16_t)(int8_t)src | ((int8_t)(src >> 8) << 16); | ||
| 929 | - T1 = (uint16_t)(int8_t)(src >> 16) | ((int8_t)(src >> 24) << 16); | ||
| 930 | -} | ||
| 931 | - | ||
| 932 | -NEON_OP(widen_u8) | ||
| 933 | -{ | ||
| 934 | - T1 = ((T0 >> 8) & 0xff0000) | ((T0 >> 16) & 0xff); | ||
| 935 | - T0 = ((T0 << 8) & 0xff0000) | (T0 & 0xff); | ||
| 936 | -} | ||
| 937 | - | ||
| 938 | -NEON_OP(widen_s16) | ||
| 939 | -{ | ||
| 940 | - int32_t src; | ||
| 941 | - | ||
| 942 | - src = T0; | ||
| 943 | - T0 = (int16_t)src; | ||
| 944 | - T1 = src >> 16; | ||
| 945 | -} | ||
| 946 | - | ||
| 947 | -NEON_OP(widen_u16) | ||
| 948 | -{ | ||
| 949 | - T1 = T0 >> 16; | ||
| 950 | - T0 &= 0xffff; | ||
| 951 | -} | ||
| 952 | - | ||
| 953 | -NEON_OP(widen_s32) | ||
| 954 | -{ | ||
| 955 | - T1 = (int32_t)T0 >> 31; | ||
| 956 | - FORCE_RET(); | ||
| 957 | -} | ||
| 958 | - | ||
| 959 | -NEON_OP(widen_high_u8) | ||
| 960 | -{ | ||
| 961 | - T1 = (T0 & 0xff000000) | ((T0 >> 8) & 0xff00); | ||
| 962 | - T0 = ((T0 << 16) & 0xff000000) | ((T0 << 8) & 0xff00); | ||
| 963 | -} | ||
| 964 | - | ||
| 965 | -NEON_OP(widen_high_u16) | ||
| 966 | -{ | ||
| 967 | - T1 = T0 & 0xffff0000; | ||
| 968 | - T0 <<= 16; | ||
| 969 | -} | ||
| 970 | - | ||
| 971 | -/* Long operations. The type is the wide type. */ | ||
| 972 | -NEON_OP(shll_u16) | ||
| 973 | -{ | ||
| 974 | - int shift = PARAM1; | ||
| 975 | - uint32_t mask; | ||
| 976 | - | ||
| 977 | - mask = 0xffff >> (16 - shift); | ||
| 978 | - mask |= mask << 16; | ||
| 979 | - mask = ~mask; | ||
| 980 | - | ||
| 981 | - T0 = (T0 << shift) & mask; | ||
| 982 | - T1 = (T1 << shift) & mask; | ||
| 983 | - FORCE_RET(); | ||
| 984 | -} | ||
| 985 | - | ||
| 986 | -NEON_OP(shll_u64) | ||
| 987 | -{ | ||
| 988 | - int shift = PARAM1; | ||
| 989 | - | ||
| 990 | - T1 <<= shift; | ||
| 991 | - T1 |= T0 >> (32 - shift); | ||
| 992 | - T0 <<= shift; | ||
| 993 | - FORCE_RET(); | ||
| 994 | -} | ||
| 995 | - | ||
| 996 | -NEON_OP(addl_u16) | ||
| 997 | -{ | ||
| 998 | - uint32_t tmp; | ||
| 999 | - uint32_t high; | ||
| 1000 | - | ||
| 1001 | - tmp = env->vfp.scratch[0]; | ||
| 1002 | - high = (T0 >> 16) + (tmp >> 16); | ||
| 1003 | - T0 = (uint16_t)(T0 + tmp); | ||
| 1004 | - T0 |= (high << 16); | ||
| 1005 | - tmp = env->vfp.scratch[1]; | ||
| 1006 | - high = (T1 >> 16) + (tmp >> 16); | ||
| 1007 | - T1 = (uint16_t)(T1 + tmp); | ||
| 1008 | - T1 |= (high << 16); | ||
| 1009 | - FORCE_RET(); | ||
| 1010 | -} | ||
| 1011 | - | ||
| 1012 | -NEON_OP(addl_u32) | ||
| 1013 | -{ | ||
| 1014 | - T0 += env->vfp.scratch[0]; | ||
| 1015 | - T1 += env->vfp.scratch[1]; | ||
| 1016 | - FORCE_RET(); | ||
| 1017 | -} | ||
| 1018 | - | ||
| 1019 | -NEON_OP(addl_u64) | ||
| 1020 | -{ | ||
| 1021 | - uint64_t tmp; | ||
| 1022 | - tmp = T0 | ((uint64_t)T1 << 32); | ||
| 1023 | - tmp += env->vfp.scratch[0]; | ||
| 1024 | - tmp += (uint64_t)env->vfp.scratch[1] << 32; | ||
| 1025 | - T0 = tmp; | ||
| 1026 | - T1 = tmp >> 32; | ||
| 1027 | - FORCE_RET(); | ||
| 1028 | -} | ||
| 1029 | - | ||
| 1030 | -NEON_OP(subl_u16) | ||
| 1031 | -{ | ||
| 1032 | - uint32_t tmp; | ||
| 1033 | - uint32_t high; | ||
| 1034 | - | ||
| 1035 | - tmp = env->vfp.scratch[0]; | ||
| 1036 | - high = (T0 >> 16) - (tmp >> 16); | ||
| 1037 | - T0 = (uint16_t)(T0 - tmp); | ||
| 1038 | - T0 |= (high << 16); | ||
| 1039 | - tmp = env->vfp.scratch[1]; | ||
| 1040 | - high = (T1 >> 16) - (tmp >> 16); | ||
| 1041 | - T1 = (uint16_t)(T1 - tmp); | ||
| 1042 | - T1 |= (high << 16); | ||
| 1043 | - FORCE_RET(); | ||
| 1044 | -} | ||
| 1045 | - | ||
| 1046 | -NEON_OP(subl_u32) | ||
| 1047 | -{ | ||
| 1048 | - T0 -= env->vfp.scratch[0]; | ||
| 1049 | - T1 -= env->vfp.scratch[1]; | ||
| 1050 | - FORCE_RET(); | ||
| 1051 | -} | ||
| 1052 | - | ||
| 1053 | -NEON_OP(subl_u64) | ||
| 1054 | -{ | ||
| 1055 | - uint64_t tmp; | ||
| 1056 | - tmp = T0 | ((uint64_t)T1 << 32); | ||
| 1057 | - tmp -= env->vfp.scratch[0]; | ||
| 1058 | - tmp -= (uint64_t)env->vfp.scratch[1] << 32; | ||
| 1059 | - T0 = tmp; | ||
| 1060 | - T1 = tmp >> 32; | ||
| 1061 | - FORCE_RET(); | ||
| 1062 | -} | ||
| 1063 | - | ||
| 1064 | -#define DO_ABD(dest, x, y, type) do { \ | ||
| 1065 | - type tmp_x = x; \ | ||
| 1066 | - type tmp_y = y; \ | ||
| 1067 | - dest = ((tmp_x > tmp_y) ? tmp_x - tmp_y : tmp_y - tmp_x); \ | ||
| 1068 | - } while(0) | ||
| 1069 | - | ||
| 1070 | -NEON_OP(abdl_u16) | ||
| 1071 | -{ | ||
| 1072 | - uint32_t tmp; | ||
| 1073 | - uint32_t low; | ||
| 1074 | - uint32_t high; | ||
| 1075 | - | ||
| 1076 | - DO_ABD(low, T0, T1, uint8_t); | ||
| 1077 | - DO_ABD(tmp, T0 >> 8, T1 >> 8, uint8_t); | ||
| 1078 | - low |= tmp << 16; | ||
| 1079 | - DO_ABD(high, T0 >> 16, T1 >> 16, uint8_t); | ||
| 1080 | - DO_ABD(tmp, T0 >> 24, T1 >> 24, uint8_t); | ||
| 1081 | - high |= tmp << 16; | ||
| 1082 | - T0 = low; | ||
| 1083 | - T1 = high; | ||
| 1084 | - FORCE_RET(); | ||
| 1085 | -} | ||
| 1086 | - | ||
| 1087 | -NEON_OP(abdl_s16) | ||
| 1088 | -{ | ||
| 1089 | - uint32_t tmp; | ||
| 1090 | - uint32_t low; | ||
| 1091 | - uint32_t high; | ||
| 1092 | - | ||
| 1093 | - DO_ABD(low, T0, T1, int8_t); | ||
| 1094 | - DO_ABD(tmp, T0 >> 8, T1 >> 8, int8_t); | ||
| 1095 | - low |= tmp << 16; | ||
| 1096 | - DO_ABD(high, T0 >> 16, T1 >> 16, int8_t); | ||
| 1097 | - DO_ABD(tmp, T0 >> 24, T1 >> 24, int8_t); | ||
| 1098 | - high |= tmp << 16; | ||
| 1099 | - T0 = low; | ||
| 1100 | - T1 = high; | ||
| 1101 | - FORCE_RET(); | ||
| 1102 | -} | ||
| 1103 | - | ||
| 1104 | -NEON_OP(abdl_u32) | ||
| 1105 | -{ | ||
| 1106 | - uint32_t low; | ||
| 1107 | - uint32_t high; | ||
| 1108 | - | ||
| 1109 | - DO_ABD(low, T0, T1, uint16_t); | ||
| 1110 | - DO_ABD(high, T0 >> 16, T1 >> 16, uint16_t); | ||
| 1111 | - T0 = low; | ||
| 1112 | - T1 = high; | ||
| 1113 | - FORCE_RET(); | ||
| 1114 | -} | ||
| 1115 | - | ||
| 1116 | -NEON_OP(abdl_s32) | ||
| 1117 | -{ | ||
| 1118 | - uint32_t low; | ||
| 1119 | - uint32_t high; | ||
| 1120 | - | ||
| 1121 | - DO_ABD(low, T0, T1, int16_t); | ||
| 1122 | - DO_ABD(high, T0 >> 16, T1 >> 16, int16_t); | ||
| 1123 | - T0 = low; | ||
| 1124 | - T1 = high; | ||
| 1125 | - FORCE_RET(); | ||
| 1126 | -} | ||
| 1127 | - | ||
| 1128 | -NEON_OP(abdl_u64) | ||
| 1129 | -{ | ||
| 1130 | - DO_ABD(T0, T0, T1, uint32_t); | ||
| 1131 | - T1 = 0; | ||
| 1132 | -} | ||
| 1133 | - | ||
| 1134 | -NEON_OP(abdl_s64) | ||
| 1135 | -{ | ||
| 1136 | - DO_ABD(T0, T0, T1, int32_t); | ||
| 1137 | - T1 = 0; | ||
| 1138 | -} | ||
| 1139 | -#undef DO_ABD | ||
| 1140 | - | ||
| 1141 | -/* Widening multiple. Named type is the source type. */ | ||
| 1142 | -#define DO_MULL(dest, x, y, type1, type2) do { \ | ||
| 1143 | - type1 tmp_x = x; \ | ||
| 1144 | - type1 tmp_y = y; \ | ||
| 1145 | - dest = (type2)((type2)tmp_x * (type2)tmp_y); \ | ||
| 1146 | - } while(0) | ||
| 1147 | - | ||
| 1148 | -NEON_OP(mull_u8) | ||
| 1149 | -{ | ||
| 1150 | - uint32_t tmp; | ||
| 1151 | - uint32_t low; | ||
| 1152 | - uint32_t high; | ||
| 1153 | - | ||
| 1154 | - DO_MULL(low, T0, T1, uint8_t, uint16_t); | ||
| 1155 | - DO_MULL(tmp, T0 >> 8, T1 >> 8, uint8_t, uint16_t); | ||
| 1156 | - low |= tmp << 16; | ||
| 1157 | - DO_MULL(high, T0 >> 16, T1 >> 16, uint8_t, uint16_t); | ||
| 1158 | - DO_MULL(tmp, T0 >> 24, T1 >> 24, uint8_t, uint16_t); | ||
| 1159 | - high |= tmp << 16; | ||
| 1160 | - T0 = low; | ||
| 1161 | - T1 = high; | ||
| 1162 | - FORCE_RET(); | ||
| 1163 | -} | ||
| 1164 | - | ||
| 1165 | -NEON_OP(mull_s8) | ||
| 1166 | -{ | ||
| 1167 | - uint32_t tmp; | ||
| 1168 | - uint32_t low; | ||
| 1169 | - uint32_t high; | ||
| 1170 | - | ||
| 1171 | - DO_MULL(low, T0, T1, int8_t, uint16_t); | ||
| 1172 | - DO_MULL(tmp, T0 >> 8, T1 >> 8, int8_t, uint16_t); | ||
| 1173 | - low |= tmp << 16; | ||
| 1174 | - DO_MULL(high, T0 >> 16, T1 >> 16, int8_t, uint16_t); | ||
| 1175 | - DO_MULL(tmp, T0 >> 24, T1 >> 24, int8_t, uint16_t); | ||
| 1176 | - high |= tmp << 16; | ||
| 1177 | - T0 = low; | ||
| 1178 | - T1 = high; | ||
| 1179 | - FORCE_RET(); | ||
| 1180 | -} | ||
| 1181 | - | ||
| 1182 | -NEON_OP(mull_u16) | ||
| 1183 | -{ | ||
| 1184 | - uint32_t low; | ||
| 1185 | - uint32_t high; | ||
| 1186 | - | ||
| 1187 | - DO_MULL(low, T0, T1, uint16_t, uint32_t); | ||
| 1188 | - DO_MULL(high, T0 >> 16, T1 >> 16, uint16_t, uint32_t); | ||
| 1189 | - T0 = low; | ||
| 1190 | - T1 = high; | ||
| 1191 | - FORCE_RET(); | ||
| 1192 | -} | ||
| 1193 | - | ||
| 1194 | -NEON_OP(mull_s16) | ||
| 1195 | -{ | ||
| 1196 | - uint32_t low; | ||
| 1197 | - uint32_t high; | ||
| 1198 | - | ||
| 1199 | - DO_MULL(low, T0, T1, int16_t, uint32_t); | ||
| 1200 | - DO_MULL(high, T0 >> 16, T1 >> 16, int16_t, uint32_t); | ||
| 1201 | - T0 = low; | ||
| 1202 | - T1 = high; | ||
| 1203 | - FORCE_RET(); | ||
| 1204 | -} | ||
| 1205 | - | ||
| 1206 | -NEON_OP(addl_saturate_s32) | ||
| 1207 | -{ | ||
| 1208 | - uint32_t tmp; | ||
| 1209 | - uint32_t res; | ||
| 1210 | - | ||
| 1211 | - tmp = env->vfp.scratch[0]; | ||
| 1212 | - res = T0 + tmp; | ||
| 1213 | - if (((res ^ T0) & SIGNBIT) && !((T0 ^ tmp) & SIGNBIT)) { | ||
| 1214 | - env->QF = 1; | ||
| 1215 | - T0 = (T0 >> 31) ^ 0x7fffffff; | ||
| 1216 | - } else { | ||
| 1217 | - T0 = res; | ||
| 1218 | - } | ||
| 1219 | - tmp = env->vfp.scratch[1]; | ||
| 1220 | - res = T1 + tmp; | ||
| 1221 | - if (((res ^ T1) & SIGNBIT) && !((T1 ^ tmp) & SIGNBIT)) { | ||
| 1222 | - env->QF = 1; | ||
| 1223 | - T1 = (T1 >> 31) ^ 0x7fffffff; | ||
| 1224 | - } else { | ||
| 1225 | - T1 = res; | ||
| 1226 | - } | ||
| 1227 | - FORCE_RET(); | ||
| 1228 | -} | ||
| 1229 | - | ||
| 1230 | -NEON_OP(addl_saturate_s64) | ||
| 1231 | -{ | ||
| 1232 | - uint64_t src1; | ||
| 1233 | - uint64_t src2; | ||
| 1234 | - uint64_t res; | ||
| 1235 | - | ||
| 1236 | - src1 = T0 + ((uint64_t)T1 << 32); | ||
| 1237 | - src2 = env->vfp.scratch[0] + ((uint64_t)env->vfp.scratch[1] << 32); | ||
| 1238 | - res = src1 + src2; | ||
| 1239 | - if (((res ^ src1) & SIGNBIT64) && !((src1 ^ src2) & SIGNBIT64)) { | ||
| 1240 | - env->QF = 1; | ||
| 1241 | - T0 = ~(int64_t)src1 >> 63; | ||
| 1242 | - T1 = T0 ^ 0x80000000; | ||
| 1243 | - } else { | ||
| 1244 | - T0 = res; | ||
| 1245 | - T1 = res >> 32; | ||
| 1246 | - } | ||
| 1247 | - FORCE_RET(); | ||
| 1248 | -} | ||
| 1249 | - | ||
| 1250 | -NEON_OP(addl_saturate_u64) | ||
| 1251 | -{ | ||
| 1252 | - uint64_t src1; | ||
| 1253 | - uint64_t src2; | ||
| 1254 | - uint64_t res; | ||
| 1255 | - | ||
| 1256 | - src1 = T0 + ((uint64_t)T1 << 32); | ||
| 1257 | - src2 = env->vfp.scratch[0] + ((uint64_t)env->vfp.scratch[1] << 32); | ||
| 1258 | - res = src1 + src2; | ||
| 1259 | - if (res < src1) { | ||
| 1260 | - env->QF = 1; | ||
| 1261 | - T0 = 0xffffffff; | ||
| 1262 | - T1 = 0xffffffff; | ||
| 1263 | - } else { | ||
| 1264 | - T0 = res; | ||
| 1265 | - T1 = res >> 32; | ||
| 1266 | - } | ||
| 1267 | - FORCE_RET(); | ||
| 1268 | -} | ||
| 1269 | - | ||
| 1270 | -NEON_OP(subl_saturate_s64) | ||
| 1271 | -{ | ||
| 1272 | - uint64_t src1; | ||
| 1273 | - uint64_t src2; | ||
| 1274 | - uint64_t res; | ||
| 1275 | - | ||
| 1276 | - src1 = T0 + ((uint64_t)T1 << 32); | ||
| 1277 | - src2 = env->vfp.scratch[0] + ((uint64_t)env->vfp.scratch[1] << 32); | ||
| 1278 | - res = src1 - src2; | ||
| 1279 | - if (((res ^ src1) & SIGNBIT64) && ((src1 ^ src2) & SIGNBIT64)) { | ||
| 1280 | - env->QF = 1; | ||
| 1281 | - T0 = ~(int64_t)src1 >> 63; | ||
| 1282 | - T1 = T0 ^ 0x80000000; | ||
| 1283 | - } else { | ||
| 1284 | - T0 = res; | ||
| 1285 | - T1 = res >> 32; | ||
| 1286 | - } | ||
| 1287 | - FORCE_RET(); | ||
| 1288 | -} | ||
| 1289 | - | ||
| 1290 | -NEON_OP(subl_saturate_u64) | ||
| 1291 | -{ | ||
| 1292 | - uint64_t src1; | ||
| 1293 | - uint64_t src2; | ||
| 1294 | - uint64_t res; | ||
| 1295 | - | ||
| 1296 | - src1 = T0 + ((uint64_t)T1 << 32); | ||
| 1297 | - src2 = env->vfp.scratch[0] + ((uint64_t)env->vfp.scratch[1] << 32); | ||
| 1298 | - if (src1 < src2) { | ||
| 1299 | - env->QF = 1; | ||
| 1300 | - T0 = 0; | ||
| 1301 | - T1 = 0; | ||
| 1302 | - } else { | ||
| 1303 | - res = src1 - src2; | ||
| 1304 | - T0 = res; | ||
| 1305 | - T1 = res >> 32; | ||
| 1306 | - } | ||
| 1307 | - FORCE_RET(); | ||
| 1308 | -} | ||
| 1309 | - | ||
| 1310 | -NEON_OP(negl_u16) | ||
| 1311 | -{ | ||
| 1312 | - uint32_t tmp; | ||
| 1313 | - tmp = T0 >> 16; | ||
| 1314 | - tmp = -tmp; | ||
| 1315 | - T0 = (-T0 & 0xffff) | (tmp << 16); | ||
| 1316 | - tmp = T1 >> 16; | ||
| 1317 | - tmp = -tmp; | ||
| 1318 | - T1 = (-T1 & 0xffff) | (tmp << 16); | ||
| 1319 | - FORCE_RET(); | ||
| 1320 | -} | ||
| 1321 | - | ||
| 1322 | -NEON_OP(negl_u32) | ||
| 1323 | -{ | ||
| 1324 | - T0 = -T0; | ||
| 1325 | - T1 = -T1; | ||
| 1326 | - FORCE_RET(); | ||
| 1327 | -} | ||
| 1328 | - | ||
| 1329 | -NEON_OP(negl_u64) | ||
| 1330 | -{ | ||
| 1331 | - uint64_t val; | ||
| 1332 | - | ||
| 1333 | - val = T0 | ((uint64_t)T1 << 32); | ||
| 1334 | - val = -val; | ||
| 1335 | - T0 = val; | ||
| 1336 | - T1 = val >> 32; | ||
| 1337 | - FORCE_RET(); | ||
| 1338 | -} | ||
| 1339 | - | ||
| 1340 | -/* Scalar operations. */ | ||
| 1341 | -NEON_OP(dup_low16) | ||
| 1342 | -{ | ||
| 1343 | - T0 = (T0 & 0xffff) | (T0 << 16); | ||
| 1344 | - FORCE_RET(); | ||
| 1345 | -} | ||
| 1346 | - | ||
| 1347 | -NEON_OP(dup_high16) | ||
| 1348 | -{ | ||
| 1349 | - T0 = (T0 >> 16) | (T0 & 0xffff0000); | ||
| 1350 | - FORCE_RET(); | ||
| 1351 | -} | ||
| 1352 | - | ||
| 1353 | -/* Helper for VEXT */ | ||
| 1354 | -NEON_OP(extract) | ||
| 1355 | -{ | ||
| 1356 | - int shift = PARAM1; | ||
| 1357 | - T0 = (T0 >> shift) | (T1 << (32 - shift)); | ||
| 1358 | - FORCE_RET(); | ||
| 1359 | -} | ||
| 1360 | - | ||
| 1361 | -/* Pairwise add long. Named type is source type. */ | ||
| 1362 | -NEON_OP(paddl_s8) | ||
| 1363 | -{ | ||
| 1364 | - int8_t src1; | ||
| 1365 | - int8_t src2; | ||
| 1366 | - uint16_t result; | ||
| 1367 | - src1 = T0 >> 24; | ||
| 1368 | - src2 = T0 >> 16; | ||
| 1369 | - result = (uint16_t)src1 + src2; | ||
| 1370 | - src1 = T0 >> 8; | ||
| 1371 | - src2 = T0; | ||
| 1372 | - T0 = (uint16_t)((uint16_t)src1 + src2) | ((uint32_t)result << 16); | ||
| 1373 | - FORCE_RET(); | ||
| 1374 | -} | ||
| 1375 | - | ||
| 1376 | -NEON_OP(paddl_u8) | ||
| 1377 | -{ | ||
| 1378 | - uint8_t src1; | ||
| 1379 | - uint8_t src2; | ||
| 1380 | - uint16_t result; | ||
| 1381 | - src1 = T0 >> 24; | ||
| 1382 | - src2 = T0 >> 16; | ||
| 1383 | - result = (uint16_t)src1 + src2; | ||
| 1384 | - src1 = T0 >> 8; | ||
| 1385 | - src2 = T0; | ||
| 1386 | - T0 = (uint16_t)((uint16_t)src1 + src2) | ((uint32_t)result << 16); | ||
| 1387 | - FORCE_RET(); | ||
| 1388 | -} | ||
| 1389 | - | ||
| 1390 | -NEON_OP(paddl_s16) | ||
| 1391 | -{ | ||
| 1392 | - T0 = (uint32_t)(int16_t)T0 + (uint32_t)(int16_t)(T0 >> 16); | ||
| 1393 | - FORCE_RET(); | ||
| 1394 | -} | ||
| 1395 | - | ||
| 1396 | -NEON_OP(paddl_u16) | ||
| 1397 | -{ | ||
| 1398 | - T0 = (uint32_t)(uint16_t)T0 + (uint32_t)(uint16_t)(T0 >> 16); | ||
| 1399 | - FORCE_RET(); | ||
| 1400 | -} | ||
| 1401 | - | ||
| 1402 | -NEON_OP(paddl_s32) | ||
| 1403 | -{ | ||
| 1404 | - int64_t tmp; | ||
| 1405 | - tmp = (int64_t)(int32_t)T0 + (int64_t)(int32_t)T1; | ||
| 1406 | - T0 = tmp; | ||
| 1407 | - T1 = tmp >> 32; | ||
| 1408 | - FORCE_RET(); | ||
| 1409 | -} | ||
| 1410 | - | ||
| 1411 | -NEON_OP(paddl_u32) | ||
| 1412 | -{ | ||
| 1413 | - uint64_t tmp; | ||
| 1414 | - tmp = (uint64_t)T0 + (uint64_t)T1; | ||
| 1415 | - T0 = tmp; | ||
| 1416 | - T1 = tmp >> 32; | ||
| 1417 | - FORCE_RET(); | ||
| 1418 | -} | ||
| 1419 | - | ||
| 1420 | -/* Count Leading Sign/Zero Bits. */ | ||
| 1421 | -static inline int do_clz8(uint8_t x) | ||
| 1422 | -{ | ||
| 1423 | - int n; | ||
| 1424 | - for (n = 8; x; n--) | ||
| 1425 | - x >>= 1; | ||
| 1426 | - return n; | ||
| 1427 | -} | ||
| 1428 | - | ||
| 1429 | -static inline int do_clz16(uint16_t x) | ||
| 1430 | -{ | ||
| 1431 | - int n; | ||
| 1432 | - for (n = 16; x; n--) | ||
| 1433 | - x >>= 1; | ||
| 1434 | - return n; | ||
| 1435 | -} | ||
| 1436 | - | ||
| 1437 | -NEON_OP(clz_u8) | ||
| 1438 | -{ | ||
| 1439 | - uint32_t result; | ||
| 1440 | - uint32_t tmp; | ||
| 1441 | - | ||
| 1442 | - tmp = T0; | ||
| 1443 | - result = do_clz8(tmp); | ||
| 1444 | - result |= do_clz8(tmp >> 8) << 8; | ||
| 1445 | - result |= do_clz8(tmp >> 16) << 16; | ||
| 1446 | - result |= do_clz8(tmp >> 24) << 24; | ||
| 1447 | - T0 = result; | ||
| 1448 | - FORCE_RET(); | ||
| 1449 | -} | ||
| 1450 | - | ||
| 1451 | -NEON_OP(clz_u16) | ||
| 1452 | -{ | ||
| 1453 | - uint32_t result; | ||
| 1454 | - uint32_t tmp; | ||
| 1455 | - tmp = T0; | ||
| 1456 | - result = do_clz16(tmp); | ||
| 1457 | - result |= do_clz16(tmp >> 16) << 16; | ||
| 1458 | - T0 = result; | ||
| 1459 | - FORCE_RET(); | ||
| 1460 | -} | ||
| 1461 | - | ||
| 1462 | -NEON_OP(cls_s8) | ||
| 1463 | -{ | ||
| 1464 | - uint32_t result; | ||
| 1465 | - int8_t tmp; | ||
| 1466 | - tmp = T0; | ||
| 1467 | - result = do_clz8((tmp < 0) ? ~tmp : tmp) - 1; | ||
| 1468 | - tmp = T0 >> 8; | ||
| 1469 | - result |= (do_clz8((tmp < 0) ? ~tmp : tmp) - 1) << 8; | ||
| 1470 | - tmp = T0 >> 16; | ||
| 1471 | - result |= (do_clz8((tmp < 0) ? ~tmp : tmp) - 1) << 16; | ||
| 1472 | - tmp = T0 >> 24; | ||
| 1473 | - result |= (do_clz8((tmp < 0) ? ~tmp : tmp) - 1) << 24; | ||
| 1474 | - T0 = result; | ||
| 1475 | - FORCE_RET(); | ||
| 1476 | -} | ||
| 1477 | - | ||
| 1478 | -NEON_OP(cls_s16) | ||
| 1479 | -{ | ||
| 1480 | - uint32_t result; | ||
| 1481 | - int16_t tmp; | ||
| 1482 | - tmp = T0; | ||
| 1483 | - result = do_clz16((tmp < 0) ? ~tmp : tmp) - 1; | ||
| 1484 | - tmp = T0 >> 16; | ||
| 1485 | - result |= (do_clz16((tmp < 0) ? ~tmp : tmp) - 1) << 16; | ||
| 1486 | - T0 = result; | ||
| 1487 | - FORCE_RET(); | ||
| 1488 | -} | ||
| 1489 | - | ||
| 1490 | -NEON_OP(cls_s32) | ||
| 1491 | -{ | ||
| 1492 | - int count; | ||
| 1493 | - if ((int32_t)T0 < 0) | ||
| 1494 | - T0 = ~T0; | ||
| 1495 | - for (count = 32; T0 > 0; count--) | ||
| 1496 | - T0 = T0 >> 1; | ||
| 1497 | - T0 = count - 1; | ||
| 1498 | - FORCE_RET(); | ||
| 1499 | -} | ||
| 1500 | - | ||
| 1501 | -/* Bit count. */ | ||
| 1502 | -NEON_OP(cnt_u8) | ||
| 1503 | -{ | ||
| 1504 | - T0 = (T0 & 0x55555555) + ((T0 >> 1) & 0x55555555); | ||
| 1505 | - T0 = (T0 & 0x33333333) + ((T0 >> 2) & 0x33333333); | ||
| 1506 | - T0 = (T0 & 0x0f0f0f0f) + ((T0 >> 4) & 0x0f0f0f0f); | ||
| 1507 | - FORCE_RET(); | ||
| 1508 | -} | ||
| 1509 | - | ||
| 1510 | -/* Saturnating negation. */ | ||
| 1511 | -/* ??? Make these use NEON_VOP1 */ | ||
| 1512 | -#define DO_QABS8(x) do { \ | ||
| 1513 | - if (x == (int8_t)0x80) { \ | ||
| 1514 | - x = 0x7f; \ | ||
| 1515 | - env->QF = 1; \ | ||
| 1516 | - } else if (x < 0) { \ | ||
| 1517 | - x = -x; \ | ||
| 1518 | - }} while (0) | ||
| 1519 | -NEON_OP(qabs_s8) | ||
| 1520 | -{ | ||
| 1521 | - neon_s8 vec; | ||
| 1522 | - NEON_UNPACK(neon_s8, vec, T0); | ||
| 1523 | - DO_QABS8(vec.v1); | ||
| 1524 | - DO_QABS8(vec.v2); | ||
| 1525 | - DO_QABS8(vec.v3); | ||
| 1526 | - DO_QABS8(vec.v4); | ||
| 1527 | - NEON_PACK(neon_s8, T0, vec); | ||
| 1528 | - FORCE_RET(); | ||
| 1529 | -} | ||
| 1530 | -#undef DO_QABS8 | ||
| 1531 | - | ||
| 1532 | -#define DO_QNEG8(x) do { \ | ||
| 1533 | - if (x == (int8_t)0x80) { \ | ||
| 1534 | - x = 0x7f; \ | ||
| 1535 | - env->QF = 1; \ | ||
| 1536 | - } else { \ | ||
| 1537 | - x = -x; \ | ||
| 1538 | - }} while (0) | ||
| 1539 | -NEON_OP(qneg_s8) | ||
| 1540 | -{ | ||
| 1541 | - neon_s8 vec; | ||
| 1542 | - NEON_UNPACK(neon_s8, vec, T0); | ||
| 1543 | - DO_QNEG8(vec.v1); | ||
| 1544 | - DO_QNEG8(vec.v2); | ||
| 1545 | - DO_QNEG8(vec.v3); | ||
| 1546 | - DO_QNEG8(vec.v4); | ||
| 1547 | - NEON_PACK(neon_s8, T0, vec); | ||
| 1548 | - FORCE_RET(); | ||
| 1549 | -} | ||
| 1550 | -#undef DO_QNEG8 | ||
| 1551 | - | ||
| 1552 | -#define DO_QABS16(x) do { \ | ||
| 1553 | - if (x == (int16_t)0x8000) { \ | ||
| 1554 | - x = 0x7fff; \ | ||
| 1555 | - env->QF = 1; \ | ||
| 1556 | - } else if (x < 0) { \ | ||
| 1557 | - x = -x; \ | ||
| 1558 | - }} while (0) | ||
| 1559 | -NEON_OP(qabs_s16) | ||
| 1560 | -{ | ||
| 1561 | - neon_s16 vec; | ||
| 1562 | - NEON_UNPACK(neon_s16, vec, T0); | ||
| 1563 | - DO_QABS16(vec.v1); | ||
| 1564 | - DO_QABS16(vec.v2); | ||
| 1565 | - NEON_PACK(neon_s16, T0, vec); | ||
| 1566 | - FORCE_RET(); | ||
| 1567 | -} | ||
| 1568 | -#undef DO_QABS16 | ||
| 1569 | - | ||
| 1570 | -#define DO_QNEG16(x) do { \ | ||
| 1571 | - if (x == (int16_t)0x8000) { \ | ||
| 1572 | - x = 0x7fff; \ | ||
| 1573 | - env->QF = 1; \ | ||
| 1574 | - } else { \ | ||
| 1575 | - x = -x; \ | ||
| 1576 | - }} while (0) | ||
| 1577 | -NEON_OP(qneg_s16) | ||
| 1578 | -{ | ||
| 1579 | - neon_s16 vec; | ||
| 1580 | - NEON_UNPACK(neon_s16, vec, T0); | ||
| 1581 | - DO_QNEG16(vec.v1); | ||
| 1582 | - DO_QNEG16(vec.v2); | ||
| 1583 | - NEON_PACK(neon_s16, T0, vec); | ||
| 1584 | - FORCE_RET(); | ||
| 1585 | -} | ||
| 1586 | -#undef DO_QNEG16 | ||
| 1587 | - | ||
| 1588 | -NEON_OP(qabs_s32) | ||
| 1589 | -{ | ||
| 1590 | - if (T0 == 0x80000000) { | ||
| 1591 | - T0 = 0x7fffffff; | ||
| 1592 | - env->QF = 1; | ||
| 1593 | - } else if ((int32_t)T0 < 0) { | ||
| 1594 | - T0 = -T0; | ||
| 1595 | - } | ||
| 1596 | - FORCE_RET(); | ||
| 1597 | -} | ||
| 1598 | - | ||
| 1599 | -NEON_OP(qneg_s32) | ||
| 1600 | -{ | ||
| 1601 | - if (T0 == 0x80000000) { | ||
| 1602 | - T0 = 0x7fffffff; | ||
| 1603 | - env->QF = 1; | ||
| 1604 | - } else { | ||
| 1605 | - T0 = -T0; | ||
| 1606 | - } | ||
| 1607 | - FORCE_RET(); | ||
| 1608 | -} | ||
| 1609 | - | ||
| 1610 | -/* Unary opperations */ | ||
| 1611 | -#define NEON_FN(dest, src, dummy) dest = (src < 0) ? -src : src | ||
| 1612 | -NEON_VOP1(abs_s8, neon_s8, 4) | ||
| 1613 | -NEON_VOP1(abs_s16, neon_s16, 2) | ||
| 1614 | -NEON_OP(abs_s32) | ||
| 1615 | -{ | ||
| 1616 | - if ((int32_t)T0 < 0) | ||
| 1617 | - T0 = -T0; | ||
| 1618 | - FORCE_RET(); | ||
| 1619 | -} | ||
| 1620 | -#undef NEON_FN | ||
| 1621 | - | ||
| 1622 | -/* Transpose. Argument order is rather strange to avoid special casing | ||
| 1623 | - the tranlation code. | ||
| 1624 | - On input T0 = rm, T1 = rd. On output T0 = rd, T1 = rm */ | ||
| 1625 | -NEON_OP(trn_u8) | ||
| 1626 | -{ | ||
| 1627 | - uint32_t rd; | ||
| 1628 | - uint32_t rm; | ||
| 1629 | - rd = ((T0 & 0x00ff00ff) << 8) | (T1 & 0x00ff00ff); | ||
| 1630 | - rm = ((T1 & 0xff00ff00) >> 8) | (T0 & 0xff00ff00); | ||
| 1631 | - T0 = rd; | ||
| 1632 | - T1 = rm; | ||
| 1633 | - FORCE_RET(); | ||
| 1634 | -} | ||
| 1635 | - | ||
| 1636 | -NEON_OP(trn_u16) | ||
| 1637 | -{ | ||
| 1638 | - uint32_t rd; | ||
| 1639 | - uint32_t rm; | ||
| 1640 | - rd = (T0 << 16) | (T1 & 0xffff); | ||
| 1641 | - rm = (T1 >> 16) | (T0 & 0xffff0000); | ||
| 1642 | - T0 = rd; | ||
| 1643 | - T1 = rm; | ||
| 1644 | - FORCE_RET(); | ||
| 1645 | -} | ||
| 1646 | - | ||
| 1647 | -/* Worker routines for zip and unzip. */ | ||
| 1648 | -NEON_OP(unzip_u8) | ||
| 1649 | -{ | ||
| 1650 | - uint32_t rd; | ||
| 1651 | - uint32_t rm; | ||
| 1652 | - rd = (T0 & 0xff) | ((T0 >> 8) & 0xff00) | ||
| 1653 | - | ((T1 << 16) & 0xff0000) | ((T1 << 8) & 0xff000000); | ||
| 1654 | - rm = ((T0 >> 8) & 0xff) | ((T0 >> 16) & 0xff00) | ||
| 1655 | - | ((T1 << 8) & 0xff0000) | (T1 & 0xff000000); | ||
| 1656 | - T0 = rd; | ||
| 1657 | - T1 = rm; | ||
| 1658 | - FORCE_RET(); | ||
| 1659 | -} | ||
| 1660 | - | ||
| 1661 | -NEON_OP(zip_u8) | ||
| 1662 | -{ | ||
| 1663 | - uint32_t rd; | ||
| 1664 | - uint32_t rm; | ||
| 1665 | - rd = (T0 & 0xff) | ((T1 << 8) & 0xff00) | ||
| 1666 | - | ((T0 << 16) & 0xff0000) | ((T1 << 24) & 0xff000000); | ||
| 1667 | - rm = ((T0 >> 16) & 0xff) | ((T1 >> 8) & 0xff00) | ||
| 1668 | - | ((T0 >> 8) & 0xff0000) | (T1 & 0xff000000); | ||
| 1669 | - T0 = rd; | ||
| 1670 | - T1 = rm; | ||
| 1671 | - FORCE_RET(); | ||
| 1672 | -} | ||
| 1673 | - | ||
| 1674 | -NEON_OP(zip_u16) | ||
| 1675 | -{ | ||
| 1676 | - uint32_t tmp; | ||
| 1677 | - | ||
| 1678 | - tmp = (T0 & 0xffff) | (T1 << 16); | ||
| 1679 | - T1 = (T1 & 0xffff0000) | (T0 >> 16); | ||
| 1680 | - T0 = tmp; | ||
| 1681 | - FORCE_RET(); | ||
| 1682 | -} | ||
| 1683 | - | ||
| 1684 | -NEON_OP(dup_u8) | ||
| 1685 | -{ | ||
| 1686 | - T0 = (T0 >> PARAM1) & 0xff; | ||
| 1687 | - T0 |= T0 << 8; | ||
| 1688 | - T0 |= T0 << 16; | ||
| 1689 | - FORCE_RET(); | ||
| 1690 | -} |
target-arm/translate.c
| @@ -77,6 +77,9 @@ extern FILE *logfile; | @@ -77,6 +77,9 @@ extern FILE *logfile; | ||
| 77 | extern int loglevel; | 77 | extern int loglevel; |
| 78 | 78 | ||
| 79 | static TCGv cpu_env; | 79 | static TCGv cpu_env; |
| 80 | +/* We reuse the same 64-bit temporaries for efficiency. */ | ||
| 81 | +static TCGv cpu_V0, cpu_V1; | ||
| 82 | + | ||
| 80 | /* FIXME: These should be removed. */ | 83 | /* FIXME: These should be removed. */ |
| 81 | static TCGv cpu_T[2]; | 84 | static TCGv cpu_T[2]; |
| 82 | static TCGv cpu_F0s, cpu_F1s, cpu_F0d, cpu_F1d; | 85 | static TCGv cpu_F0s, cpu_F1s, cpu_F0d, cpu_F1d; |
| @@ -469,6 +472,9 @@ static inline void gen_op_bicl_T0_T1(void) | @@ -469,6 +472,9 @@ static inline void gen_op_bicl_T0_T1(void) | ||
| 469 | } | 472 | } |
| 470 | 473 | ||
| 471 | /* FIXME: Implement this natively. */ | 474 | /* FIXME: Implement this natively. */ |
| 475 | +#define tcg_gen_abs_i32(t0, t1) gen_helper_abs(t0, t1) | ||
| 476 | + | ||
| 477 | +/* FIXME: Implement this natively. */ | ||
| 472 | static void tcg_gen_rori_i32(TCGv t0, TCGv t1, int i) | 478 | static void tcg_gen_rori_i32(TCGv t0, TCGv t1, int i) |
| 473 | { | 479 | { |
| 474 | TCGv tmp; | 480 | TCGv tmp; |
| @@ -1166,8 +1172,13 @@ neon_reg_offset (int reg, int n) | @@ -1166,8 +1172,13 @@ neon_reg_offset (int reg, int n) | ||
| 1166 | return vfp_reg_offset(0, sreg); | 1172 | return vfp_reg_offset(0, sreg); |
| 1167 | } | 1173 | } |
| 1168 | 1174 | ||
| 1169 | -#define NEON_GET_REG(T, reg, n) gen_op_neon_getreg_##T(neon_reg_offset(reg, n)) | ||
| 1170 | -#define NEON_SET_REG(T, reg, n) gen_op_neon_setreg_##T(neon_reg_offset(reg, n)) | 1175 | +/* FIXME: Remove these. */ |
| 1176 | +#define neon_T0 cpu_T[0] | ||
| 1177 | +#define neon_T1 cpu_T[1] | ||
| 1178 | +#define NEON_GET_REG(T, reg, n) \ | ||
| 1179 | + tcg_gen_ld_i32(neon_##T, cpu_env, neon_reg_offset(reg, n)) | ||
| 1180 | +#define NEON_SET_REG(T, reg, n) \ | ||
| 1181 | + tcg_gen_st_i32(neon_##T, cpu_env, neon_reg_offset(reg, n)) | ||
| 1171 | 1182 | ||
| 1172 | static TCGv neon_load_reg(int reg, int pass) | 1183 | static TCGv neon_load_reg(int reg, int pass) |
| 1173 | { | 1184 | { |
| @@ -1182,6 +1193,16 @@ static void neon_store_reg(int reg, int pass, TCGv var) | @@ -1182,6 +1193,16 @@ static void neon_store_reg(int reg, int pass, TCGv var) | ||
| 1182 | dead_tmp(var); | 1193 | dead_tmp(var); |
| 1183 | } | 1194 | } |
| 1184 | 1195 | ||
| 1196 | +static inline void neon_load_reg64(TCGv var, int reg) | ||
| 1197 | +{ | ||
| 1198 | + tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(1, reg)); | ||
| 1199 | +} | ||
| 1200 | + | ||
| 1201 | +static inline void neon_store_reg64(TCGv var, int reg) | ||
| 1202 | +{ | ||
| 1203 | + tcg_gen_st_i64(var, cpu_env, vfp_reg_offset(1, reg)); | ||
| 1204 | +} | ||
| 1205 | + | ||
| 1185 | #define tcg_gen_ld_f32 tcg_gen_ld_i32 | 1206 | #define tcg_gen_ld_f32 tcg_gen_ld_i32 |
| 1186 | #define tcg_gen_ld_f64 tcg_gen_ld_i64 | 1207 | #define tcg_gen_ld_f64 tcg_gen_ld_i64 |
| 1187 | #define tcg_gen_st_f32 tcg_gen_st_i32 | 1208 | #define tcg_gen_st_f32 tcg_gen_st_i32 |
| @@ -2418,6 +2439,37 @@ vfp_enabled(CPUState * env) | @@ -2418,6 +2439,37 @@ vfp_enabled(CPUState * env) | ||
| 2418 | return ((env->vfp.xregs[ARM_VFP_FPEXC] & (1 << 30)) != 0); | 2439 | return ((env->vfp.xregs[ARM_VFP_FPEXC] & (1 << 30)) != 0); |
| 2419 | } | 2440 | } |
| 2420 | 2441 | ||
| 2442 | +static void gen_neon_dup_u8(TCGv var, int shift) | ||
| 2443 | +{ | ||
| 2444 | + TCGv tmp = new_tmp(); | ||
| 2445 | + if (shift) | ||
| 2446 | + tcg_gen_shri_i32(var, var, shift); | ||
| 2447 | + tcg_gen_andi_i32(var, var, 0xff); | ||
| 2448 | + tcg_gen_shli_i32(tmp, var, 8); | ||
| 2449 | + tcg_gen_or_i32(var, var, tmp); | ||
| 2450 | + tcg_gen_shli_i32(tmp, var, 16); | ||
| 2451 | + tcg_gen_or_i32(var, var, tmp); | ||
| 2452 | + dead_tmp(tmp); | ||
| 2453 | +} | ||
| 2454 | + | ||
| 2455 | +static void gen_neon_dup_low16(TCGv var) | ||
| 2456 | +{ | ||
| 2457 | + TCGv tmp = new_tmp(); | ||
| 2458 | + tcg_gen_andi_i32(var, var, 0xffff); | ||
| 2459 | + tcg_gen_shli_i32(tmp, var, 16); | ||
| 2460 | + tcg_gen_or_i32(var, var, tmp); | ||
| 2461 | + dead_tmp(tmp); | ||
| 2462 | +} | ||
| 2463 | + | ||
| 2464 | +static void gen_neon_dup_high16(TCGv var) | ||
| 2465 | +{ | ||
| 2466 | + TCGv tmp = new_tmp(); | ||
| 2467 | + tcg_gen_andi_i32(var, var, 0xffff0000); | ||
| 2468 | + tcg_gen_shri_i32(tmp, var, 16); | ||
| 2469 | + tcg_gen_or_i32(var, var, tmp); | ||
| 2470 | + dead_tmp(tmp); | ||
| 2471 | +} | ||
| 2472 | + | ||
| 2421 | /* Disassemble a VFP instruction. Returns nonzero if an error occured | 2473 | /* Disassemble a VFP instruction. Returns nonzero if an error occured |
| 2422 | (ie. an undefined instruction). */ | 2474 | (ie. an undefined instruction). */ |
| 2423 | static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn) | 2475 | static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn) |
| @@ -2425,6 +2477,7 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn) | @@ -2425,6 +2477,7 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn) | ||
| 2425 | uint32_t rd, rn, rm, op, i, n, offset, delta_d, delta_m, bank_mask; | 2477 | uint32_t rd, rn, rm, op, i, n, offset, delta_d, delta_m, bank_mask; |
| 2426 | int dp, veclen; | 2478 | int dp, veclen; |
| 2427 | TCGv tmp; | 2479 | TCGv tmp; |
| 2480 | + TCGv tmp2; | ||
| 2428 | 2481 | ||
| 2429 | if (!arm_feature(env, ARM_FEATURE_VFP)) | 2482 | if (!arm_feature(env, ARM_FEATURE_VFP)) |
| 2430 | return 1; | 2483 | return 1; |
| @@ -2468,66 +2521,66 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn) | @@ -2468,66 +2521,66 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn) | ||
| 2468 | } | 2521 | } |
| 2469 | if (insn & ARM_CP_RW_BIT) { | 2522 | if (insn & ARM_CP_RW_BIT) { |
| 2470 | /* vfp->arm */ | 2523 | /* vfp->arm */ |
| 2524 | + tmp = neon_load_reg(rn, pass); | ||
| 2471 | switch (size) { | 2525 | switch (size) { |
| 2472 | case 0: | 2526 | case 0: |
| 2473 | - NEON_GET_REG(T1, rn, pass); | ||
| 2474 | if (offset) | 2527 | if (offset) |
| 2475 | - gen_op_shrl_T1_im(offset); | 2528 | + tcg_gen_shri_i32(tmp, tmp, offset); |
| 2476 | if (insn & (1 << 23)) | 2529 | if (insn & (1 << 23)) |
| 2477 | - gen_uxtb(cpu_T[1]); | 2530 | + gen_uxtb(tmp); |
| 2478 | else | 2531 | else |
| 2479 | - gen_sxtb(cpu_T[1]); | 2532 | + gen_sxtb(tmp); |
| 2480 | break; | 2533 | break; |
| 2481 | case 1: | 2534 | case 1: |
| 2482 | - NEON_GET_REG(T1, rn, pass); | ||
| 2483 | if (insn & (1 << 23)) { | 2535 | if (insn & (1 << 23)) { |
| 2484 | if (offset) { | 2536 | if (offset) { |
| 2485 | - gen_op_shrl_T1_im(16); | 2537 | + tcg_gen_shri_i32(tmp, tmp, 16); |
| 2486 | } else { | 2538 | } else { |
| 2487 | - gen_uxth(cpu_T[1]); | 2539 | + gen_uxth(tmp); |
| 2488 | } | 2540 | } |
| 2489 | } else { | 2541 | } else { |
| 2490 | if (offset) { | 2542 | if (offset) { |
| 2491 | - gen_op_sarl_T1_im(16); | 2543 | + tcg_gen_sari_i32(tmp, tmp, 16); |
| 2492 | } else { | 2544 | } else { |
| 2493 | - gen_sxth(cpu_T[1]); | 2545 | + gen_sxth(tmp); |
| 2494 | } | 2546 | } |
| 2495 | } | 2547 | } |
| 2496 | break; | 2548 | break; |
| 2497 | case 2: | 2549 | case 2: |
| 2498 | - NEON_GET_REG(T1, rn, pass); | ||
| 2499 | break; | 2550 | break; |
| 2500 | } | 2551 | } |
| 2501 | - gen_movl_reg_T1(s, rd); | 2552 | + store_reg(s, rd, tmp); |
| 2502 | } else { | 2553 | } else { |
| 2503 | /* arm->vfp */ | 2554 | /* arm->vfp */ |
| 2504 | - gen_movl_T0_reg(s, rd); | 2555 | + tmp = load_reg(s, rd); |
| 2505 | if (insn & (1 << 23)) { | 2556 | if (insn & (1 << 23)) { |
| 2506 | /* VDUP */ | 2557 | /* VDUP */ |
| 2507 | if (size == 0) { | 2558 | if (size == 0) { |
| 2508 | - gen_op_neon_dup_u8(0); | 2559 | + gen_neon_dup_u8(tmp, 0); |
| 2509 | } else if (size == 1) { | 2560 | } else if (size == 1) { |
| 2510 | - gen_op_neon_dup_low16(); | 2561 | + gen_neon_dup_low16(tmp); |
| 2511 | } | 2562 | } |
| 2512 | - NEON_SET_REG(T0, rn, 0); | ||
| 2513 | - NEON_SET_REG(T0, rn, 1); | 2563 | + tmp2 = new_tmp(); |
| 2564 | + tcg_gen_mov_i32(tmp2, tmp); | ||
| 2565 | + neon_store_reg(rn, 0, tmp2); | ||
| 2566 | + neon_store_reg(rn, 0, tmp); | ||
| 2514 | } else { | 2567 | } else { |
| 2515 | /* VMOV */ | 2568 | /* VMOV */ |
| 2516 | switch (size) { | 2569 | switch (size) { |
| 2517 | case 0: | 2570 | case 0: |
| 2518 | - tmp = neon_load_reg(rn, pass); | ||
| 2519 | - gen_bfi(tmp, tmp, cpu_T[0], offset, 0xff); | ||
| 2520 | - neon_store_reg(rn, pass, tmp); | 2571 | + tmp2 = neon_load_reg(rn, pass); |
| 2572 | + gen_bfi(tmp, tmp2, tmp, offset, 0xff); | ||
| 2573 | + dead_tmp(tmp2); | ||
| 2521 | break; | 2574 | break; |
| 2522 | case 1: | 2575 | case 1: |
| 2523 | - tmp = neon_load_reg(rn, pass); | ||
| 2524 | - gen_bfi(tmp, tmp, cpu_T[0], offset, 0xffff); | ||
| 2525 | - neon_store_reg(rn, pass, tmp); | 2576 | + tmp2 = neon_load_reg(rn, pass); |
| 2577 | + gen_bfi(tmp, tmp2, tmp, offset, 0xffff); | ||
| 2578 | + dead_tmp(tmp2); | ||
| 2526 | break; | 2579 | break; |
| 2527 | case 2: | 2580 | case 2: |
| 2528 | - NEON_SET_REG(T0, rn, pass); | ||
| 2529 | break; | 2581 | break; |
| 2530 | } | 2582 | } |
| 2583 | + neon_store_reg(rn, pass, tmp); | ||
| 2531 | } | 2584 | } |
| 2532 | } | 2585 | } |
| 2533 | } else { /* !dp */ | 2586 | } else { /* !dp */ |
| @@ -3210,179 +3263,90 @@ static void gen_nop_hint(DisasContext *s, int val) | @@ -3210,179 +3263,90 @@ static void gen_nop_hint(DisasContext *s, int val) | ||
| 3210 | } | 3263 | } |
| 3211 | } | 3264 | } |
| 3212 | 3265 | ||
| 3213 | -/* Neon shift by constant. The actual ops are the same as used for variable | ||
| 3214 | - shifts. [OP][U][SIZE] */ | ||
| 3215 | -static GenOpFunc *gen_neon_shift_im[8][2][4] = { | ||
| 3216 | - { /* 0 */ /* VSHR */ | ||
| 3217 | - { | ||
| 3218 | - gen_op_neon_shl_u8, | ||
| 3219 | - gen_op_neon_shl_u16, | ||
| 3220 | - gen_op_neon_shl_u32, | ||
| 3221 | - gen_op_neon_shl_u64 | ||
| 3222 | - }, { | ||
| 3223 | - gen_op_neon_shl_s8, | ||
| 3224 | - gen_op_neon_shl_s16, | ||
| 3225 | - gen_op_neon_shl_s32, | ||
| 3226 | - gen_op_neon_shl_s64 | ||
| 3227 | - } | ||
| 3228 | - }, { /* 1 */ /* VSRA */ | ||
| 3229 | - { | ||
| 3230 | - gen_op_neon_shl_u8, | ||
| 3231 | - gen_op_neon_shl_u16, | ||
| 3232 | - gen_op_neon_shl_u32, | ||
| 3233 | - gen_op_neon_shl_u64 | ||
| 3234 | - }, { | ||
| 3235 | - gen_op_neon_shl_s8, | ||
| 3236 | - gen_op_neon_shl_s16, | ||
| 3237 | - gen_op_neon_shl_s32, | ||
| 3238 | - gen_op_neon_shl_s64 | ||
| 3239 | - } | ||
| 3240 | - }, { /* 2 */ /* VRSHR */ | ||
| 3241 | - { | ||
| 3242 | - gen_op_neon_rshl_u8, | ||
| 3243 | - gen_op_neon_rshl_u16, | ||
| 3244 | - gen_op_neon_rshl_u32, | ||
| 3245 | - gen_op_neon_rshl_u64 | ||
| 3246 | - }, { | ||
| 3247 | - gen_op_neon_rshl_s8, | ||
| 3248 | - gen_op_neon_rshl_s16, | ||
| 3249 | - gen_op_neon_rshl_s32, | ||
| 3250 | - gen_op_neon_rshl_s64 | ||
| 3251 | - } | ||
| 3252 | - }, { /* 3 */ /* VRSRA */ | ||
| 3253 | - { | ||
| 3254 | - gen_op_neon_rshl_u8, | ||
| 3255 | - gen_op_neon_rshl_u16, | ||
| 3256 | - gen_op_neon_rshl_u32, | ||
| 3257 | - gen_op_neon_rshl_u64 | ||
| 3258 | - }, { | ||
| 3259 | - gen_op_neon_rshl_s8, | ||
| 3260 | - gen_op_neon_rshl_s16, | ||
| 3261 | - gen_op_neon_rshl_s32, | ||
| 3262 | - gen_op_neon_rshl_s64 | ||
| 3263 | - } | ||
| 3264 | - }, { /* 4 */ | ||
| 3265 | - { | ||
| 3266 | - NULL, NULL, NULL, NULL | ||
| 3267 | - }, { /* VSRI */ | ||
| 3268 | - gen_op_neon_shl_u8, | ||
| 3269 | - gen_op_neon_shl_u16, | ||
| 3270 | - gen_op_neon_shl_u32, | ||
| 3271 | - gen_op_neon_shl_u64, | ||
| 3272 | - } | ||
| 3273 | - }, { /* 5 */ | ||
| 3274 | - { /* VSHL */ | ||
| 3275 | - gen_op_neon_shl_u8, | ||
| 3276 | - gen_op_neon_shl_u16, | ||
| 3277 | - gen_op_neon_shl_u32, | ||
| 3278 | - gen_op_neon_shl_u64, | ||
| 3279 | - }, { /* VSLI */ | ||
| 3280 | - gen_op_neon_shl_u8, | ||
| 3281 | - gen_op_neon_shl_u16, | ||
| 3282 | - gen_op_neon_shl_u32, | ||
| 3283 | - gen_op_neon_shl_u64, | ||
| 3284 | - } | ||
| 3285 | - }, { /* 6 */ /* VQSHL */ | ||
| 3286 | - { | ||
| 3287 | - gen_op_neon_qshl_u8, | ||
| 3288 | - gen_op_neon_qshl_u16, | ||
| 3289 | - gen_op_neon_qshl_u32, | ||
| 3290 | - gen_op_neon_qshl_u64 | ||
| 3291 | - }, { | ||
| 3292 | - gen_op_neon_qshl_s8, | ||
| 3293 | - gen_op_neon_qshl_s16, | ||
| 3294 | - gen_op_neon_qshl_s32, | ||
| 3295 | - gen_op_neon_qshl_s64 | ||
| 3296 | - } | ||
| 3297 | - }, { /* 7 */ /* VQSHLU */ | ||
| 3298 | - { | ||
| 3299 | - gen_op_neon_qshl_u8, | ||
| 3300 | - gen_op_neon_qshl_u16, | ||
| 3301 | - gen_op_neon_qshl_u32, | ||
| 3302 | - gen_op_neon_qshl_u64 | ||
| 3303 | - }, { | ||
| 3304 | - gen_op_neon_qshl_u8, | ||
| 3305 | - gen_op_neon_qshl_u16, | ||
| 3306 | - gen_op_neon_qshl_u32, | ||
| 3307 | - gen_op_neon_qshl_u64 | ||
| 3308 | - } | ||
| 3309 | - } | ||
| 3310 | -}; | 3266 | +/* These macros help make the code more readable when migrating from the |
| 3267 | + old dyngen helpers. They should probably be removed when | ||
| 3268 | + T0/T1 are removed. */ | ||
| 3269 | +#define CPU_T001 cpu_T[0], cpu_T[0], cpu_T[1] | ||
| 3270 | +#define CPU_T0E01 cpu_T[0], cpu_env, cpu_T[0], cpu_T[1] | ||
| 3311 | 3271 | ||
| 3312 | -/* [R][U][size - 1] */ | ||
| 3313 | -static GenOpFunc *gen_neon_shift_im_narrow[2][2][3] = { | ||
| 3314 | - { | ||
| 3315 | - { | ||
| 3316 | - gen_op_neon_shl_u16, | ||
| 3317 | - gen_op_neon_shl_u32, | ||
| 3318 | - gen_op_neon_shl_u64 | ||
| 3319 | - }, { | ||
| 3320 | - gen_op_neon_shl_s16, | ||
| 3321 | - gen_op_neon_shl_s32, | ||
| 3322 | - gen_op_neon_shl_s64 | ||
| 3323 | - } | ||
| 3324 | - }, { | ||
| 3325 | - { | ||
| 3326 | - gen_op_neon_rshl_u16, | ||
| 3327 | - gen_op_neon_rshl_u32, | ||
| 3328 | - gen_op_neon_rshl_u64 | ||
| 3329 | - }, { | ||
| 3330 | - gen_op_neon_rshl_s16, | ||
| 3331 | - gen_op_neon_rshl_s32, | ||
| 3332 | - gen_op_neon_rshl_s64 | ||
| 3333 | - } | ||
| 3334 | - } | ||
| 3335 | -}; | ||
| 3336 | - | ||
| 3337 | -static inline void | ||
| 3338 | -gen_op_neon_narrow_u32 () | ||
| 3339 | -{ | ||
| 3340 | - /* No-op. */ | ||
| 3341 | -} | ||
| 3342 | - | ||
| 3343 | -static GenOpFunc *gen_neon_narrow[3] = { | ||
| 3344 | - gen_op_neon_narrow_u8, | ||
| 3345 | - gen_op_neon_narrow_u16, | ||
| 3346 | - gen_op_neon_narrow_u32 | ||
| 3347 | -}; | ||
| 3348 | - | ||
| 3349 | -static GenOpFunc *gen_neon_narrow_satu[3] = { | ||
| 3350 | - gen_op_neon_narrow_sat_u8, | ||
| 3351 | - gen_op_neon_narrow_sat_u16, | ||
| 3352 | - gen_op_neon_narrow_sat_u32 | ||
| 3353 | -}; | ||
| 3354 | - | ||
| 3355 | -static GenOpFunc *gen_neon_narrow_sats[3] = { | ||
| 3356 | - gen_op_neon_narrow_sat_s8, | ||
| 3357 | - gen_op_neon_narrow_sat_s16, | ||
| 3358 | - gen_op_neon_narrow_sat_s32 | ||
| 3359 | -}; | 3272 | +#define CPU_V001 cpu_V0, cpu_V0, cpu_V1 |
| 3360 | 3273 | ||
| 3361 | static inline int gen_neon_add(int size) | 3274 | static inline int gen_neon_add(int size) |
| 3362 | { | 3275 | { |
| 3363 | switch (size) { | 3276 | switch (size) { |
| 3364 | - case 0: gen_op_neon_add_u8(); break; | ||
| 3365 | - case 1: gen_op_neon_add_u16(); break; | 3277 | + case 0: gen_helper_neon_add_u8(CPU_T001); break; |
| 3278 | + case 1: gen_helper_neon_add_u16(CPU_T001); break; | ||
| 3366 | case 2: gen_op_addl_T0_T1(); break; | 3279 | case 2: gen_op_addl_T0_T1(); break; |
| 3367 | default: return 1; | 3280 | default: return 1; |
| 3368 | } | 3281 | } |
| 3369 | return 0; | 3282 | return 0; |
| 3370 | } | 3283 | } |
| 3371 | 3284 | ||
| 3372 | -/* 32-bit pairwise ops end up the same as the elementsise versions. */ | ||
| 3373 | -#define gen_op_neon_pmax_s32 gen_op_neon_max_s32 | ||
| 3374 | -#define gen_op_neon_pmax_u32 gen_op_neon_max_u32 | ||
| 3375 | -#define gen_op_neon_pmin_s32 gen_op_neon_min_s32 | ||
| 3376 | -#define gen_op_neon_pmin_u32 gen_op_neon_min_u32 | 3285 | +static inline void gen_neon_rsb(int size) |
| 3286 | +{ | ||
| 3287 | + switch (size) { | ||
| 3288 | + case 0: gen_helper_neon_sub_u8(cpu_T[0], cpu_T[1], cpu_T[0]); break; | ||
| 3289 | + case 1: gen_helper_neon_sub_u16(cpu_T[0], cpu_T[1], cpu_T[0]); break; | ||
| 3290 | + case 2: gen_op_rsbl_T0_T1(); break; | ||
| 3291 | + default: return; | ||
| 3292 | + } | ||
| 3293 | +} | ||
| 3294 | + | ||
| 3295 | +/* 32-bit pairwise ops end up the same as the elementwise versions. */ | ||
| 3296 | +#define gen_helper_neon_pmax_s32 gen_helper_neon_max_s32 | ||
| 3297 | +#define gen_helper_neon_pmax_u32 gen_helper_neon_max_u32 | ||
| 3298 | +#define gen_helper_neon_pmin_s32 gen_helper_neon_min_s32 | ||
| 3299 | +#define gen_helper_neon_pmin_u32 gen_helper_neon_min_u32 | ||
| 3300 | + | ||
| 3301 | +/* FIXME: This is wrong. They set the wrong overflow bit. */ | ||
| 3302 | +#define gen_helper_neon_qadd_s32(a, e, b, c) gen_helper_add_saturate(a, b, c) | ||
| 3303 | +#define gen_helper_neon_qadd_u32(a, e, b, c) gen_helper_add_usaturate(a, b, c) | ||
| 3304 | +#define gen_helper_neon_qsub_s32(a, e, b, c) gen_helper_sub_saturate(a, b, c) | ||
| 3305 | +#define gen_helper_neon_qsub_u32(a, e, b, c) gen_helper_sub_usaturate(a, b, c) | ||
| 3306 | + | ||
| 3307 | +#define GEN_NEON_INTEGER_OP_ENV(name) do { \ | ||
| 3308 | + switch ((size << 1) | u) { \ | ||
| 3309 | + case 0: \ | ||
| 3310 | + gen_helper_neon_##name##_s8(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); \ | ||
| 3311 | + break; \ | ||
| 3312 | + case 1: \ | ||
| 3313 | + gen_helper_neon_##name##_u8(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); \ | ||
| 3314 | + break; \ | ||
| 3315 | + case 2: \ | ||
| 3316 | + gen_helper_neon_##name##_s16(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); \ | ||
| 3317 | + break; \ | ||
| 3318 | + case 3: \ | ||
| 3319 | + gen_helper_neon_##name##_u16(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); \ | ||
| 3320 | + break; \ | ||
| 3321 | + case 4: \ | ||
| 3322 | + gen_helper_neon_##name##_s32(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); \ | ||
| 3323 | + break; \ | ||
| 3324 | + case 5: \ | ||
| 3325 | + gen_helper_neon_##name##_u32(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); \ | ||
| 3326 | + break; \ | ||
| 3327 | + default: return 1; \ | ||
| 3328 | + }} while (0) | ||
| 3377 | 3329 | ||
| 3378 | #define GEN_NEON_INTEGER_OP(name) do { \ | 3330 | #define GEN_NEON_INTEGER_OP(name) do { \ |
| 3379 | switch ((size << 1) | u) { \ | 3331 | switch ((size << 1) | u) { \ |
| 3380 | - case 0: gen_op_neon_##name##_s8(); break; \ | ||
| 3381 | - case 1: gen_op_neon_##name##_u8(); break; \ | ||
| 3382 | - case 2: gen_op_neon_##name##_s16(); break; \ | ||
| 3383 | - case 3: gen_op_neon_##name##_u16(); break; \ | ||
| 3384 | - case 4: gen_op_neon_##name##_s32(); break; \ | ||
| 3385 | - case 5: gen_op_neon_##name##_u32(); break; \ | 3332 | + case 0: \ |
| 3333 | + gen_helper_neon_##name##_s8(cpu_T[0], cpu_T[0], cpu_T[1]); \ | ||
| 3334 | + break; \ | ||
| 3335 | + case 1: \ | ||
| 3336 | + gen_helper_neon_##name##_u8(cpu_T[0], cpu_T[0], cpu_T[1]); \ | ||
| 3337 | + break; \ | ||
| 3338 | + case 2: \ | ||
| 3339 | + gen_helper_neon_##name##_s16(cpu_T[0], cpu_T[0], cpu_T[1]); \ | ||
| 3340 | + break; \ | ||
| 3341 | + case 3: \ | ||
| 3342 | + gen_helper_neon_##name##_u16(cpu_T[0], cpu_T[0], cpu_T[1]); \ | ||
| 3343 | + break; \ | ||
| 3344 | + case 4: \ | ||
| 3345 | + gen_helper_neon_##name##_s32(cpu_T[0], cpu_T[0], cpu_T[1]); \ | ||
| 3346 | + break; \ | ||
| 3347 | + case 5: \ | ||
| 3348 | + gen_helper_neon_##name##_u32(cpu_T[0], cpu_T[0], cpu_T[1]); \ | ||
| 3349 | + break; \ | ||
| 3386 | default: return 1; \ | 3350 | default: return 1; \ |
| 3387 | }} while (0) | 3351 | }} while (0) |
| 3388 | 3352 | ||
| @@ -3392,7 +3356,7 @@ gen_neon_movl_scratch_T0(int scratch) | @@ -3392,7 +3356,7 @@ gen_neon_movl_scratch_T0(int scratch) | ||
| 3392 | uint32_t offset; | 3356 | uint32_t offset; |
| 3393 | 3357 | ||
| 3394 | offset = offsetof(CPUARMState, vfp.scratch[scratch]); | 3358 | offset = offsetof(CPUARMState, vfp.scratch[scratch]); |
| 3395 | - gen_op_neon_setreg_T0(offset); | 3359 | + tcg_gen_st_i32(cpu_T[0], cpu_env, offset); |
| 3396 | } | 3360 | } |
| 3397 | 3361 | ||
| 3398 | static inline void | 3362 | static inline void |
| @@ -3401,7 +3365,7 @@ gen_neon_movl_scratch_T1(int scratch) | @@ -3401,7 +3365,7 @@ gen_neon_movl_scratch_T1(int scratch) | ||
| 3401 | uint32_t offset; | 3365 | uint32_t offset; |
| 3402 | 3366 | ||
| 3403 | offset = offsetof(CPUARMState, vfp.scratch[scratch]); | 3367 | offset = offsetof(CPUARMState, vfp.scratch[scratch]); |
| 3404 | - gen_op_neon_setreg_T1(offset); | 3368 | + tcg_gen_st_i32(cpu_T[1], cpu_env, offset); |
| 3405 | } | 3369 | } |
| 3406 | 3370 | ||
| 3407 | static inline void | 3371 | static inline void |
| @@ -3410,7 +3374,7 @@ gen_neon_movl_T0_scratch(int scratch) | @@ -3410,7 +3374,7 @@ gen_neon_movl_T0_scratch(int scratch) | ||
| 3410 | uint32_t offset; | 3374 | uint32_t offset; |
| 3411 | 3375 | ||
| 3412 | offset = offsetof(CPUARMState, vfp.scratch[scratch]); | 3376 | offset = offsetof(CPUARMState, vfp.scratch[scratch]); |
| 3413 | - gen_op_neon_getreg_T0(offset); | 3377 | + tcg_gen_ld_i32(cpu_T[0], cpu_env, offset); |
| 3414 | } | 3378 | } |
| 3415 | 3379 | ||
| 3416 | static inline void | 3380 | static inline void |
| @@ -3419,12 +3383,7 @@ gen_neon_movl_T1_scratch(int scratch) | @@ -3419,12 +3383,7 @@ gen_neon_movl_T1_scratch(int scratch) | ||
| 3419 | uint32_t offset; | 3383 | uint32_t offset; |
| 3420 | 3384 | ||
| 3421 | offset = offsetof(CPUARMState, vfp.scratch[scratch]); | 3385 | offset = offsetof(CPUARMState, vfp.scratch[scratch]); |
| 3422 | - gen_op_neon_getreg_T1(offset); | ||
| 3423 | -} | ||
| 3424 | - | ||
| 3425 | -static inline void gen_op_neon_widen_u32(void) | ||
| 3426 | -{ | ||
| 3427 | - gen_op_movl_T1_im(0); | 3386 | + tcg_gen_ld_i32(cpu_T[1], cpu_env, offset); |
| 3428 | } | 3387 | } |
| 3429 | 3388 | ||
| 3430 | static inline void gen_neon_get_scalar(int size, int reg) | 3389 | static inline void gen_neon_get_scalar(int size, int reg) |
| @@ -3434,9 +3393,9 @@ static inline void gen_neon_get_scalar(int size, int reg) | @@ -3434,9 +3393,9 @@ static inline void gen_neon_get_scalar(int size, int reg) | ||
| 3434 | } else { | 3393 | } else { |
| 3435 | NEON_GET_REG(T0, reg >> 2, (reg >> 1) & 1); | 3394 | NEON_GET_REG(T0, reg >> 2, (reg >> 1) & 1); |
| 3436 | if (reg & 1) | 3395 | if (reg & 1) |
| 3437 | - gen_op_neon_dup_low16(); | 3396 | + gen_neon_dup_low16(cpu_T[0]); |
| 3438 | else | 3397 | else |
| 3439 | - gen_op_neon_dup_high16(); | 3398 | + gen_neon_dup_high16(cpu_T[0]); |
| 3440 | } | 3399 | } |
| 3441 | } | 3400 | } |
| 3442 | 3401 | ||
| @@ -3448,8 +3407,8 @@ static void gen_neon_unzip(int reg, int q, int tmp, int size) | @@ -3448,8 +3407,8 @@ static void gen_neon_unzip(int reg, int q, int tmp, int size) | ||
| 3448 | NEON_GET_REG(T0, reg, n); | 3407 | NEON_GET_REG(T0, reg, n); |
| 3449 | NEON_GET_REG(T0, reg, n + n); | 3408 | NEON_GET_REG(T0, reg, n + n); |
| 3450 | switch (size) { | 3409 | switch (size) { |
| 3451 | - case 0: gen_op_neon_unzip_u8(); break; | ||
| 3452 | - case 1: gen_op_neon_zip_u16(); break; /* zip and unzip are the same. */ | 3410 | + case 0: gen_helper_neon_unzip_u8(); break; |
| 3411 | + case 1: gen_helper_neon_zip_u16(); break; /* zip and unzip are the same. */ | ||
| 3453 | case 2: /* no-op */; break; | 3412 | case 2: /* no-op */; break; |
| 3454 | default: abort(); | 3413 | default: abort(); |
| 3455 | } | 3414 | } |
| @@ -3522,13 +3481,9 @@ static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn) | @@ -3522,13 +3481,9 @@ static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn) | ||
| 3522 | if (size == 2) { | 3481 | if (size == 2) { |
| 3523 | if (load) { | 3482 | if (load) { |
| 3524 | tmp = gen_ld32(cpu_T[1], IS_USER(s)); | 3483 | tmp = gen_ld32(cpu_T[1], IS_USER(s)); |
| 3525 | - tcg_gen_mov_i32(cpu_T[0], tmp); | ||
| 3526 | - dead_tmp(tmp); | ||
| 3527 | - NEON_SET_REG(T0, rd, pass); | 3484 | + neon_store_reg(rd, pass, tmp); |
| 3528 | } else { | 3485 | } else { |
| 3529 | - NEON_GET_REG(T0, rd, pass); | ||
| 3530 | - tmp = new_tmp(); | ||
| 3531 | - tcg_gen_mov_i32(tmp, cpu_T[0]); | 3486 | + tmp = neon_load_reg(rd, pass); |
| 3532 | gen_st32(tmp, cpu_T[1], IS_USER(s)); | 3487 | gen_st32(tmp, cpu_T[1], IS_USER(s)); |
| 3533 | } | 3488 | } |
| 3534 | gen_op_addl_T1_im(stride); | 3489 | gen_op_addl_T1_im(stride); |
| @@ -3596,27 +3551,23 @@ static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn) | @@ -3596,27 +3551,23 @@ static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn) | ||
| 3596 | switch (size) { | 3551 | switch (size) { |
| 3597 | case 0: | 3552 | case 0: |
| 3598 | tmp = gen_ld8u(cpu_T[1], IS_USER(s)); | 3553 | tmp = gen_ld8u(cpu_T[1], IS_USER(s)); |
| 3599 | - tcg_gen_mov_i32(cpu_T[0], tmp); | ||
| 3600 | - dead_tmp(tmp); | ||
| 3601 | - gen_op_neon_dup_u8(0); | 3554 | + gen_neon_dup_u8(tmp, 0); |
| 3602 | break; | 3555 | break; |
| 3603 | case 1: | 3556 | case 1: |
| 3604 | tmp = gen_ld16u(cpu_T[1], IS_USER(s)); | 3557 | tmp = gen_ld16u(cpu_T[1], IS_USER(s)); |
| 3605 | - tcg_gen_mov_i32(cpu_T[0], tmp); | ||
| 3606 | - dead_tmp(tmp); | ||
| 3607 | - gen_op_neon_dup_low16(); | 3558 | + gen_neon_dup_low16(tmp); |
| 3608 | break; | 3559 | break; |
| 3609 | case 2: | 3560 | case 2: |
| 3610 | tmp = gen_ld32(cpu_T[0], IS_USER(s)); | 3561 | tmp = gen_ld32(cpu_T[0], IS_USER(s)); |
| 3611 | - tcg_gen_mov_i32(cpu_T[0], tmp); | ||
| 3612 | - dead_tmp(tmp); | ||
| 3613 | break; | 3562 | break; |
| 3614 | case 3: | 3563 | case 3: |
| 3615 | return 1; | 3564 | return 1; |
| 3616 | } | 3565 | } |
| 3617 | gen_op_addl_T1_im(1 << size); | 3566 | gen_op_addl_T1_im(1 << size); |
| 3618 | - NEON_SET_REG(T0, rd, 0); | ||
| 3619 | - NEON_SET_REG(T0, rd, 1); | 3567 | + tmp2 = new_tmp(); |
| 3568 | + tcg_gen_mov_i32(tmp2, tmp); | ||
| 3569 | + neon_store_reg(rd, 0, tmp2); | ||
| 3570 | + neon_store_reg(rd, 0, tmp); | ||
| 3620 | rd += stride; | 3571 | rd += stride; |
| 3621 | } | 3572 | } |
| 3622 | stride = (1 << size) * nregs; | 3573 | stride = (1 << size) * nregs; |
| @@ -3707,12 +3658,158 @@ static void gen_neon_bsl(TCGv dest, TCGv t, TCGv f, TCGv c) | @@ -3707,12 +3658,158 @@ static void gen_neon_bsl(TCGv dest, TCGv t, TCGv f, TCGv c) | ||
| 3707 | tcg_gen_or_i32(dest, t, f); | 3658 | tcg_gen_or_i32(dest, t, f); |
| 3708 | } | 3659 | } |
| 3709 | 3660 | ||
| 3661 | +static inline void gen_neon_narrow(int size, TCGv dest, TCGv src) | ||
| 3662 | +{ | ||
| 3663 | + switch (size) { | ||
| 3664 | + case 0: gen_helper_neon_narrow_u8(dest, src); break; | ||
| 3665 | + case 1: gen_helper_neon_narrow_u16(dest, src); break; | ||
| 3666 | + case 2: tcg_gen_trunc_i64_i32(dest, src); break; | ||
| 3667 | + default: abort(); | ||
| 3668 | + } | ||
| 3669 | +} | ||
| 3670 | + | ||
| 3671 | +static inline void gen_neon_narrow_sats(int size, TCGv dest, TCGv src) | ||
| 3672 | +{ | ||
| 3673 | + switch (size) { | ||
| 3674 | + case 0: gen_helper_neon_narrow_sat_s8(dest, cpu_env, src); break; | ||
| 3675 | + case 1: gen_helper_neon_narrow_sat_s16(dest, cpu_env, src); break; | ||
| 3676 | + case 2: gen_helper_neon_narrow_sat_s32(dest, cpu_env, src); break; | ||
| 3677 | + default: abort(); | ||
| 3678 | + } | ||
| 3679 | +} | ||
| 3680 | + | ||
| 3681 | +static inline void gen_neon_narrow_satu(int size, TCGv dest, TCGv src) | ||
| 3682 | +{ | ||
| 3683 | + switch (size) { | ||
| 3684 | + case 0: gen_helper_neon_narrow_sat_u8(dest, cpu_env, src); break; | ||
| 3685 | + case 1: gen_helper_neon_narrow_sat_u16(dest, cpu_env, src); break; | ||
| 3686 | + case 2: gen_helper_neon_narrow_sat_u32(dest, cpu_env, src); break; | ||
| 3687 | + default: abort(); | ||
| 3688 | + } | ||
| 3689 | +} | ||
| 3690 | + | ||
| 3691 | +static inline void gen_neon_shift_narrow(int size, TCGv var, TCGv shift, | ||
| 3692 | + int q, int u) | ||
| 3693 | +{ | ||
| 3694 | + if (q) { | ||
| 3695 | + if (u) { | ||
| 3696 | + switch (size) { | ||
| 3697 | + case 1: gen_helper_neon_rshl_u16(var, var, shift); break; | ||
| 3698 | + case 2: gen_helper_neon_rshl_u32(var, var, shift); break; | ||
| 3699 | + default: abort(); | ||
| 3700 | + } | ||
| 3701 | + } else { | ||
| 3702 | + switch (size) { | ||
| 3703 | + case 1: gen_helper_neon_rshl_s16(var, var, shift); break; | ||
| 3704 | + case 2: gen_helper_neon_rshl_s32(var, var, shift); break; | ||
| 3705 | + default: abort(); | ||
| 3706 | + } | ||
| 3707 | + } | ||
| 3708 | + } else { | ||
| 3709 | + if (u) { | ||
| 3710 | + switch (size) { | ||
| 3711 | + case 1: gen_helper_neon_rshl_u16(var, var, shift); break; | ||
| 3712 | + case 2: gen_helper_neon_rshl_u32(var, var, shift); break; | ||
| 3713 | + default: abort(); | ||
| 3714 | + } | ||
| 3715 | + } else { | ||
| 3716 | + switch (size) { | ||
| 3717 | + case 1: gen_helper_neon_shl_s16(var, var, shift); break; | ||
| 3718 | + case 2: gen_helper_neon_shl_s32(var, var, shift); break; | ||
| 3719 | + default: abort(); | ||
| 3720 | + } | ||
| 3721 | + } | ||
| 3722 | + } | ||
| 3723 | +} | ||
| 3724 | + | ||
| 3725 | +static inline void gen_neon_widen(TCGv dest, TCGv src, int size, int u) | ||
| 3726 | +{ | ||
| 3727 | + if (u) { | ||
| 3728 | + switch (size) { | ||
| 3729 | + case 0: gen_helper_neon_widen_u8(dest, src); break; | ||
| 3730 | + case 1: gen_helper_neon_widen_u16(dest, src); break; | ||
| 3731 | + case 2: tcg_gen_extu_i32_i64(dest, src); break; | ||
| 3732 | + default: abort(); | ||
| 3733 | + } | ||
| 3734 | + } else { | ||
| 3735 | + switch (size) { | ||
| 3736 | + case 0: gen_helper_neon_widen_s8(dest, src); break; | ||
| 3737 | + case 1: gen_helper_neon_widen_s16(dest, src); break; | ||
| 3738 | + case 2: tcg_gen_ext_i32_i64(dest, src); break; | ||
| 3739 | + default: abort(); | ||
| 3740 | + } | ||
| 3741 | + } | ||
| 3742 | + dead_tmp(src); | ||
| 3743 | +} | ||
| 3744 | + | ||
| 3745 | +static inline void gen_neon_addl(int size) | ||
| 3746 | +{ | ||
| 3747 | + switch (size) { | ||
| 3748 | + case 0: gen_helper_neon_addl_u16(CPU_V001); break; | ||
| 3749 | + case 1: gen_helper_neon_addl_u32(CPU_V001); break; | ||
| 3750 | + case 2: tcg_gen_add_i64(CPU_V001); break; | ||
| 3751 | + default: abort(); | ||
| 3752 | + } | ||
| 3753 | +} | ||
| 3754 | + | ||
| 3755 | +static inline void gen_neon_subl(int size) | ||
| 3756 | +{ | ||
| 3757 | + switch (size) { | ||
| 3758 | + case 0: gen_helper_neon_subl_u16(CPU_V001); break; | ||
| 3759 | + case 1: gen_helper_neon_subl_u32(CPU_V001); break; | ||
| 3760 | + case 2: tcg_gen_sub_i64(CPU_V001); break; | ||
| 3761 | + default: abort(); | ||
| 3762 | + } | ||
| 3763 | +} | ||
| 3764 | + | ||
| 3765 | +static inline void gen_neon_negl(TCGv var, int size) | ||
| 3766 | +{ | ||
| 3767 | + switch (size) { | ||
| 3768 | + case 0: gen_helper_neon_negl_u16(var, var); break; | ||
| 3769 | + case 1: gen_helper_neon_negl_u32(var, var); break; | ||
| 3770 | + case 2: gen_helper_neon_negl_u64(var, var); break; | ||
| 3771 | + default: abort(); | ||
| 3772 | + } | ||
| 3773 | +} | ||
| 3774 | + | ||
| 3775 | +static inline void gen_neon_addl_saturate(TCGv op0, TCGv op1, int size) | ||
| 3776 | +{ | ||
| 3777 | + switch (size) { | ||
| 3778 | + case 1: gen_helper_neon_addl_saturate_s32(op0, cpu_env, op0, op1); break; | ||
| 3779 | + case 2: gen_helper_neon_addl_saturate_s64(op0, cpu_env, op0, op1); break; | ||
| 3780 | + default: abort(); | ||
| 3781 | + } | ||
| 3782 | +} | ||
| 3783 | + | ||
| 3784 | +static inline void gen_neon_mull(TCGv dest, TCGv a, TCGv b, int size, int u) | ||
| 3785 | +{ | ||
| 3786 | + TCGv tmp; | ||
| 3787 | + | ||
| 3788 | + switch ((size << 1) | u) { | ||
| 3789 | + case 0: gen_helper_neon_mull_s8(dest, a, b); break; | ||
| 3790 | + case 1: gen_helper_neon_mull_u8(dest, a, b); break; | ||
| 3791 | + case 2: gen_helper_neon_mull_s16(dest, a, b); break; | ||
| 3792 | + case 3: gen_helper_neon_mull_u16(dest, a, b); break; | ||
| 3793 | + case 4: | ||
| 3794 | + tmp = gen_muls_i64_i32(a, b); | ||
| 3795 | + tcg_gen_mov_i64(dest, tmp); | ||
| 3796 | + break; | ||
| 3797 | + case 5: | ||
| 3798 | + tmp = gen_mulu_i64_i32(a, b); | ||
| 3799 | + tcg_gen_mov_i64(dest, tmp); | ||
| 3800 | + break; | ||
| 3801 | + default: abort(); | ||
| 3802 | + } | ||
| 3803 | + if (size < 2) { | ||
| 3804 | + dead_tmp(b); | ||
| 3805 | + dead_tmp(a); | ||
| 3806 | + } | ||
| 3807 | +} | ||
| 3808 | + | ||
| 3710 | /* Translate a NEON data processing instruction. Return nonzero if the | 3809 | /* Translate a NEON data processing instruction. Return nonzero if the |
| 3711 | instruction is invalid. | 3810 | instruction is invalid. |
| 3712 | - In general we process vectors in 32-bit chunks. This means we can reuse | ||
| 3713 | - some of the scalar ops, and hopefully the code generated for 32-bit | ||
| 3714 | - hosts won't be too awful. The downside is that the few 64-bit operations | ||
| 3715 | - (mainly shifts) get complicated. */ | 3811 | + We process data in a mixture of 32-bit and 64-bit chunks. |
| 3812 | + Mostly we use 32-bit chunks so we can use normal scalar instructions. */ | ||
| 3716 | 3813 | ||
| 3717 | static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) | 3814 | static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) |
| 3718 | { | 3815 | { |
| @@ -3742,41 +3839,70 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) | @@ -3742,41 +3839,70 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) | ||
| 3742 | if ((insn & (1 << 23)) == 0) { | 3839 | if ((insn & (1 << 23)) == 0) { |
| 3743 | /* Three register same length. */ | 3840 | /* Three register same length. */ |
| 3744 | op = ((insn >> 7) & 0x1e) | ((insn >> 4) & 1); | 3841 | op = ((insn >> 7) & 0x1e) | ((insn >> 4) & 1); |
| 3745 | - if (size == 3 && (op == 1 || op == 5 || op == 16)) { | 3842 | + if (size == 3 && (op == 1 || op == 5 || op == 8 || op == 9 |
| 3843 | + || op == 10 || op == 11 || op == 16)) { | ||
| 3844 | + /* 64-bit element instructions. */ | ||
| 3746 | for (pass = 0; pass < (q ? 2 : 1); pass++) { | 3845 | for (pass = 0; pass < (q ? 2 : 1); pass++) { |
| 3747 | - NEON_GET_REG(T0, rm, pass * 2); | ||
| 3748 | - NEON_GET_REG(T1, rm, pass * 2 + 1); | ||
| 3749 | - gen_neon_movl_scratch_T0(0); | ||
| 3750 | - gen_neon_movl_scratch_T1(1); | ||
| 3751 | - NEON_GET_REG(T0, rn, pass * 2); | ||
| 3752 | - NEON_GET_REG(T1, rn, pass * 2 + 1); | 3846 | + neon_load_reg64(cpu_V0, rn + pass); |
| 3847 | + neon_load_reg64(cpu_V1, rm + pass); | ||
| 3753 | switch (op) { | 3848 | switch (op) { |
| 3754 | case 1: /* VQADD */ | 3849 | case 1: /* VQADD */ |
| 3755 | if (u) { | 3850 | if (u) { |
| 3756 | - gen_op_neon_addl_saturate_u64(); | 3851 | + gen_helper_neon_add_saturate_u64(CPU_V001); |
| 3757 | } else { | 3852 | } else { |
| 3758 | - gen_op_neon_addl_saturate_s64(); | 3853 | + gen_helper_neon_add_saturate_s64(CPU_V001); |
| 3759 | } | 3854 | } |
| 3760 | break; | 3855 | break; |
| 3761 | case 5: /* VQSUB */ | 3856 | case 5: /* VQSUB */ |
| 3762 | if (u) { | 3857 | if (u) { |
| 3763 | - gen_op_neon_subl_saturate_u64(); | 3858 | + gen_helper_neon_sub_saturate_u64(CPU_V001); |
| 3764 | } else { | 3859 | } else { |
| 3765 | - gen_op_neon_subl_saturate_s64(); | 3860 | + gen_helper_neon_sub_saturate_s64(CPU_V001); |
| 3861 | + } | ||
| 3862 | + break; | ||
| 3863 | + case 8: /* VSHL */ | ||
| 3864 | + if (u) { | ||
| 3865 | + gen_helper_neon_shl_u64(cpu_V0, cpu_V1, cpu_V0); | ||
| 3866 | + } else { | ||
| 3867 | + gen_helper_neon_shl_s64(cpu_V0, cpu_V1, cpu_V0); | ||
| 3868 | + } | ||
| 3869 | + break; | ||
| 3870 | + case 9: /* VQSHL */ | ||
| 3871 | + if (u) { | ||
| 3872 | + gen_helper_neon_qshl_u64(cpu_V0, cpu_env, | ||
| 3873 | + cpu_V0, cpu_V0); | ||
| 3874 | + } else { | ||
| 3875 | + gen_helper_neon_qshl_s64(cpu_V1, cpu_env, | ||
| 3876 | + cpu_V1, cpu_V0); | ||
| 3877 | + } | ||
| 3878 | + break; | ||
| 3879 | + case 10: /* VRSHL */ | ||
| 3880 | + if (u) { | ||
| 3881 | + gen_helper_neon_rshl_u64(cpu_V0, cpu_V1, cpu_V0); | ||
| 3882 | + } else { | ||
| 3883 | + gen_helper_neon_rshl_s64(cpu_V0, cpu_V1, cpu_V0); | ||
| 3884 | + } | ||
| 3885 | + break; | ||
| 3886 | + case 11: /* VQRSHL */ | ||
| 3887 | + if (u) { | ||
| 3888 | + gen_helper_neon_qrshl_u64(cpu_V0, cpu_env, | ||
| 3889 | + cpu_V1, cpu_V0); | ||
| 3890 | + } else { | ||
| 3891 | + gen_helper_neon_qrshl_s64(cpu_V0, cpu_env, | ||
| 3892 | + cpu_V1, cpu_V0); | ||
| 3766 | } | 3893 | } |
| 3767 | break; | 3894 | break; |
| 3768 | case 16: | 3895 | case 16: |
| 3769 | if (u) { | 3896 | if (u) { |
| 3770 | - gen_op_neon_subl_u64(); | 3897 | + tcg_gen_sub_i64(CPU_V001); |
| 3771 | } else { | 3898 | } else { |
| 3772 | - gen_op_neon_addl_u64(); | 3899 | + tcg_gen_add_i64(CPU_V001); |
| 3773 | } | 3900 | } |
| 3774 | break; | 3901 | break; |
| 3775 | default: | 3902 | default: |
| 3776 | abort(); | 3903 | abort(); |
| 3777 | } | 3904 | } |
| 3778 | - NEON_SET_REG(T0, rd, pass * 2); | ||
| 3779 | - NEON_SET_REG(T1, rd, pass * 2 + 1); | 3905 | + neon_store_reg64(cpu_V0, rd + pass); |
| 3780 | } | 3906 | } |
| 3781 | return 0; | 3907 | return 0; |
| 3782 | } | 3908 | } |
| @@ -3784,13 +3910,13 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) | @@ -3784,13 +3910,13 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) | ||
| 3784 | case 8: /* VSHL */ | 3910 | case 8: /* VSHL */ |
| 3785 | case 9: /* VQSHL */ | 3911 | case 9: /* VQSHL */ |
| 3786 | case 10: /* VRSHL */ | 3912 | case 10: /* VRSHL */ |
| 3787 | - case 11: /* VQSHL */ | ||
| 3788 | - /* Shift operations have Rn and Rm reversed. */ | 3913 | + case 11: /* VQRSHL */ |
| 3789 | { | 3914 | { |
| 3790 | - int tmp; | ||
| 3791 | - tmp = rn; | 3915 | + int rtmp; |
| 3916 | + /* Shift instruction operands are reversed. */ | ||
| 3917 | + rtmp = rn; | ||
| 3792 | rn = rm; | 3918 | rn = rm; |
| 3793 | - rm = tmp; | 3919 | + rm = rtmp; |
| 3794 | pairwise = 0; | 3920 | pairwise = 0; |
| 3795 | } | 3921 | } |
| 3796 | break; | 3922 | break; |
| @@ -3834,19 +3960,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) | @@ -3834,19 +3960,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) | ||
| 3834 | GEN_NEON_INTEGER_OP(hadd); | 3960 | GEN_NEON_INTEGER_OP(hadd); |
| 3835 | break; | 3961 | break; |
| 3836 | case 1: /* VQADD */ | 3962 | case 1: /* VQADD */ |
| 3837 | - switch (size << 1| u) { | ||
| 3838 | - case 0: gen_op_neon_qadd_s8(); break; | ||
| 3839 | - case 1: gen_op_neon_qadd_u8(); break; | ||
| 3840 | - case 2: gen_op_neon_qadd_s16(); break; | ||
| 3841 | - case 3: gen_op_neon_qadd_u16(); break; | ||
| 3842 | - case 4: | ||
| 3843 | - gen_helper_add_saturate(cpu_T[0], cpu_T[0], cpu_T[1]); | ||
| 3844 | - break; | ||
| 3845 | - case 5: | ||
| 3846 | - gen_helper_add_usaturate(cpu_T[0], cpu_T[0], cpu_T[1]); | ||
| 3847 | - break; | ||
| 3848 | - default: abort(); | ||
| 3849 | - } | 3963 | + GEN_NEON_INTEGER_OP_ENV(qadd); |
| 3850 | break; | 3964 | break; |
| 3851 | case 2: /* VRHADD */ | 3965 | case 2: /* VRHADD */ |
| 3852 | GEN_NEON_INTEGER_OP(rhadd); | 3966 | GEN_NEON_INTEGER_OP(rhadd); |
| @@ -3890,19 +4004,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) | @@ -3890,19 +4004,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) | ||
| 3890 | GEN_NEON_INTEGER_OP(hsub); | 4004 | GEN_NEON_INTEGER_OP(hsub); |
| 3891 | break; | 4005 | break; |
| 3892 | case 5: /* VQSUB */ | 4006 | case 5: /* VQSUB */ |
| 3893 | - switch ((size << 1) | u) { | ||
| 3894 | - case 0: gen_op_neon_qsub_s8(); break; | ||
| 3895 | - case 1: gen_op_neon_qsub_u8(); break; | ||
| 3896 | - case 2: gen_op_neon_qsub_s16(); break; | ||
| 3897 | - case 3: gen_op_neon_qsub_u16(); break; | ||
| 3898 | - case 4: | ||
| 3899 | - gen_helper_sub_saturate(cpu_T[0], cpu_T[0], cpu_T[1]); | ||
| 3900 | - break; | ||
| 3901 | - case 5: | ||
| 3902 | - gen_helper_sub_usaturate(cpu_T[0], cpu_T[0], cpu_T[1]); | ||
| 3903 | - break; | ||
| 3904 | - default: abort(); | ||
| 3905 | - } | 4007 | + GEN_NEON_INTEGER_OP_ENV(qsub); |
| 3906 | break; | 4008 | break; |
| 3907 | case 6: /* VCGT */ | 4009 | case 6: /* VCGT */ |
| 3908 | GEN_NEON_INTEGER_OP(cgt); | 4010 | GEN_NEON_INTEGER_OP(cgt); |
| @@ -3911,76 +4013,16 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) | @@ -3911,76 +4013,16 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) | ||
| 3911 | GEN_NEON_INTEGER_OP(cge); | 4013 | GEN_NEON_INTEGER_OP(cge); |
| 3912 | break; | 4014 | break; |
| 3913 | case 8: /* VSHL */ | 4015 | case 8: /* VSHL */ |
| 3914 | - switch ((size << 1) | u) { | ||
| 3915 | - case 0: gen_op_neon_shl_s8(); break; | ||
| 3916 | - case 1: gen_op_neon_shl_u8(); break; | ||
| 3917 | - case 2: gen_op_neon_shl_s16(); break; | ||
| 3918 | - case 3: gen_op_neon_shl_u16(); break; | ||
| 3919 | - case 4: gen_op_neon_shl_s32(); break; | ||
| 3920 | - case 5: gen_op_neon_shl_u32(); break; | ||
| 3921 | -#if 0 | ||
| 3922 | - /* ??? Implementing these is tricky because the vector ops work | ||
| 3923 | - on 32-bit pieces. */ | ||
| 3924 | - case 6: gen_op_neon_shl_s64(); break; | ||
| 3925 | - case 7: gen_op_neon_shl_u64(); break; | ||
| 3926 | -#else | ||
| 3927 | - case 6: case 7: cpu_abort(env, "VSHL.64 not implemented"); | ||
| 3928 | -#endif | ||
| 3929 | - } | 4016 | + GEN_NEON_INTEGER_OP(shl); |
| 3930 | break; | 4017 | break; |
| 3931 | case 9: /* VQSHL */ | 4018 | case 9: /* VQSHL */ |
| 3932 | - switch ((size << 1) | u) { | ||
| 3933 | - case 0: gen_op_neon_qshl_s8(); break; | ||
| 3934 | - case 1: gen_op_neon_qshl_u8(); break; | ||
| 3935 | - case 2: gen_op_neon_qshl_s16(); break; | ||
| 3936 | - case 3: gen_op_neon_qshl_u16(); break; | ||
| 3937 | - case 4: gen_op_neon_qshl_s32(); break; | ||
| 3938 | - case 5: gen_op_neon_qshl_u32(); break; | ||
| 3939 | -#if 0 | ||
| 3940 | - /* ??? Implementing these is tricky because the vector ops work | ||
| 3941 | - on 32-bit pieces. */ | ||
| 3942 | - case 6: gen_op_neon_qshl_s64(); break; | ||
| 3943 | - case 7: gen_op_neon_qshl_u64(); break; | ||
| 3944 | -#else | ||
| 3945 | - case 6: case 7: cpu_abort(env, "VQSHL.64 not implemented"); | ||
| 3946 | -#endif | ||
| 3947 | - } | 4019 | + GEN_NEON_INTEGER_OP_ENV(qshl); |
| 3948 | break; | 4020 | break; |
| 3949 | case 10: /* VRSHL */ | 4021 | case 10: /* VRSHL */ |
| 3950 | - switch ((size << 1) | u) { | ||
| 3951 | - case 0: gen_op_neon_rshl_s8(); break; | ||
| 3952 | - case 1: gen_op_neon_rshl_u8(); break; | ||
| 3953 | - case 2: gen_op_neon_rshl_s16(); break; | ||
| 3954 | - case 3: gen_op_neon_rshl_u16(); break; | ||
| 3955 | - case 4: gen_op_neon_rshl_s32(); break; | ||
| 3956 | - case 5: gen_op_neon_rshl_u32(); break; | ||
| 3957 | -#if 0 | ||
| 3958 | - /* ??? Implementing these is tricky because the vector ops work | ||
| 3959 | - on 32-bit pieces. */ | ||
| 3960 | - case 6: gen_op_neon_rshl_s64(); break; | ||
| 3961 | - case 7: gen_op_neon_rshl_u64(); break; | ||
| 3962 | -#else | ||
| 3963 | - case 6: case 7: cpu_abort(env, "VRSHL.64 not implemented"); | ||
| 3964 | -#endif | ||
| 3965 | - } | 4022 | + GEN_NEON_INTEGER_OP(rshl); |
| 3966 | break; | 4023 | break; |
| 3967 | case 11: /* VQRSHL */ | 4024 | case 11: /* VQRSHL */ |
| 3968 | - switch ((size << 1) | u) { | ||
| 3969 | - case 0: gen_op_neon_qrshl_s8(); break; | ||
| 3970 | - case 1: gen_op_neon_qrshl_u8(); break; | ||
| 3971 | - case 2: gen_op_neon_qrshl_s16(); break; | ||
| 3972 | - case 3: gen_op_neon_qrshl_u16(); break; | ||
| 3973 | - case 4: gen_op_neon_qrshl_s32(); break; | ||
| 3974 | - case 5: gen_op_neon_qrshl_u32(); break; | ||
| 3975 | -#if 0 | ||
| 3976 | - /* ??? Implementing these is tricky because the vector ops work | ||
| 3977 | - on 32-bit pieces. */ | ||
| 3978 | - case 6: gen_op_neon_qrshl_s64(); break; | ||
| 3979 | - case 7: gen_op_neon_qrshl_u64(); break; | ||
| 3980 | -#else | ||
| 3981 | - case 6: case 7: cpu_abort(env, "VQRSHL.64 not implemented"); | ||
| 3982 | -#endif | ||
| 3983 | - } | 4025 | + GEN_NEON_INTEGER_OP_ENV(qrshl); |
| 3984 | break; | 4026 | break; |
| 3985 | case 12: /* VMAX */ | 4027 | case 12: /* VMAX */ |
| 3986 | GEN_NEON_INTEGER_OP(max); | 4028 | GEN_NEON_INTEGER_OP(max); |
| @@ -4002,8 +4044,8 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) | @@ -4002,8 +4044,8 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) | ||
| 4002 | return 1; | 4044 | return 1; |
| 4003 | } else { /* VSUB */ | 4045 | } else { /* VSUB */ |
| 4004 | switch (size) { | 4046 | switch (size) { |
| 4005 | - case 0: gen_op_neon_sub_u8(); break; | ||
| 4006 | - case 1: gen_op_neon_sub_u16(); break; | 4047 | + case 0: gen_helper_neon_sub_u8(CPU_T001); break; |
| 4048 | + case 1: gen_helper_neon_sub_u16(CPU_T001); break; | ||
| 4007 | case 2: gen_op_subl_T0_T1(); break; | 4049 | case 2: gen_op_subl_T0_T1(); break; |
| 4008 | default: return 1; | 4050 | default: return 1; |
| 4009 | } | 4051 | } |
| @@ -4012,46 +4054,41 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) | @@ -4012,46 +4054,41 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) | ||
| 4012 | case 17: | 4054 | case 17: |
| 4013 | if (!u) { /* VTST */ | 4055 | if (!u) { /* VTST */ |
| 4014 | switch (size) { | 4056 | switch (size) { |
| 4015 | - case 0: gen_op_neon_tst_u8(); break; | ||
| 4016 | - case 1: gen_op_neon_tst_u16(); break; | ||
| 4017 | - case 2: gen_op_neon_tst_u32(); break; | 4057 | + case 0: gen_helper_neon_tst_u8(CPU_T001); break; |
| 4058 | + case 1: gen_helper_neon_tst_u16(CPU_T001); break; | ||
| 4059 | + case 2: gen_helper_neon_tst_u32(CPU_T001); break; | ||
| 4018 | default: return 1; | 4060 | default: return 1; |
| 4019 | } | 4061 | } |
| 4020 | } else { /* VCEQ */ | 4062 | } else { /* VCEQ */ |
| 4021 | switch (size) { | 4063 | switch (size) { |
| 4022 | - case 0: gen_op_neon_ceq_u8(); break; | ||
| 4023 | - case 1: gen_op_neon_ceq_u16(); break; | ||
| 4024 | - case 2: gen_op_neon_ceq_u32(); break; | 4064 | + case 0: gen_helper_neon_ceq_u8(CPU_T001); break; |
| 4065 | + case 1: gen_helper_neon_ceq_u16(CPU_T001); break; | ||
| 4066 | + case 2: gen_helper_neon_ceq_u32(CPU_T001); break; | ||
| 4025 | default: return 1; | 4067 | default: return 1; |
| 4026 | } | 4068 | } |
| 4027 | } | 4069 | } |
| 4028 | break; | 4070 | break; |
| 4029 | case 18: /* Multiply. */ | 4071 | case 18: /* Multiply. */ |
| 4030 | switch (size) { | 4072 | switch (size) { |
| 4031 | - case 0: gen_op_neon_mul_u8(); break; | ||
| 4032 | - case 1: gen_op_neon_mul_u16(); break; | 4073 | + case 0: gen_helper_neon_mul_u8(CPU_T001); break; |
| 4074 | + case 1: gen_helper_neon_mul_u16(CPU_T001); break; | ||
| 4033 | case 2: gen_op_mul_T0_T1(); break; | 4075 | case 2: gen_op_mul_T0_T1(); break; |
| 4034 | default: return 1; | 4076 | default: return 1; |
| 4035 | } | 4077 | } |
| 4036 | NEON_GET_REG(T1, rd, pass); | 4078 | NEON_GET_REG(T1, rd, pass); |
| 4037 | if (u) { /* VMLS */ | 4079 | if (u) { /* VMLS */ |
| 4038 | - switch (size) { | ||
| 4039 | - case 0: gen_op_neon_rsb_u8(); break; | ||
| 4040 | - case 1: gen_op_neon_rsb_u16(); break; | ||
| 4041 | - case 2: gen_op_rsbl_T0_T1(); break; | ||
| 4042 | - default: return 1; | ||
| 4043 | - } | 4080 | + gen_neon_rsb(size); |
| 4044 | } else { /* VMLA */ | 4081 | } else { /* VMLA */ |
| 4045 | gen_neon_add(size); | 4082 | gen_neon_add(size); |
| 4046 | } | 4083 | } |
| 4047 | break; | 4084 | break; |
| 4048 | case 19: /* VMUL */ | 4085 | case 19: /* VMUL */ |
| 4049 | if (u) { /* polynomial */ | 4086 | if (u) { /* polynomial */ |
| 4050 | - gen_op_neon_mul_p8(); | 4087 | + gen_helper_neon_mul_p8(CPU_T001); |
| 4051 | } else { /* Integer */ | 4088 | } else { /* Integer */ |
| 4052 | switch (size) { | 4089 | switch (size) { |
| 4053 | - case 0: gen_op_neon_mul_u8(); break; | ||
| 4054 | - case 1: gen_op_neon_mul_u16(); break; | 4090 | + case 0: gen_helper_neon_mul_u8(CPU_T001); break; |
| 4091 | + case 1: gen_helper_neon_mul_u16(CPU_T001); break; | ||
| 4055 | case 2: gen_op_mul_T0_T1(); break; | 4092 | case 2: gen_op_mul_T0_T1(); break; |
| 4056 | default: return 1; | 4093 | default: return 1; |
| 4057 | } | 4094 | } |
| @@ -4066,14 +4103,14 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) | @@ -4066,14 +4103,14 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) | ||
| 4066 | case 22: /* Hultiply high. */ | 4103 | case 22: /* Hultiply high. */ |
| 4067 | if (!u) { /* VQDMULH */ | 4104 | if (!u) { /* VQDMULH */ |
| 4068 | switch (size) { | 4105 | switch (size) { |
| 4069 | - case 1: gen_op_neon_qdmulh_s16(); break; | ||
| 4070 | - case 2: gen_op_neon_qdmulh_s32(); break; | 4106 | + case 1: gen_helper_neon_qdmulh_s16(CPU_T0E01); break; |
| 4107 | + case 2: gen_helper_neon_qdmulh_s32(CPU_T0E01); break; | ||
| 4071 | default: return 1; | 4108 | default: return 1; |
| 4072 | } | 4109 | } |
| 4073 | } else { /* VQRDHMUL */ | 4110 | } else { /* VQRDHMUL */ |
| 4074 | switch (size) { | 4111 | switch (size) { |
| 4075 | - case 1: gen_op_neon_qrdmulh_s16(); break; | ||
| 4076 | - case 2: gen_op_neon_qrdmulh_s32(); break; | 4112 | + case 1: gen_helper_neon_qrdmulh_s16(CPU_T0E01); break; |
| 4113 | + case 2: gen_helper_neon_qrdmulh_s32(CPU_T0E01); break; | ||
| 4077 | default: return 1; | 4114 | default: return 1; |
| 4078 | } | 4115 | } |
| 4079 | } | 4116 | } |
| @@ -4082,8 +4119,8 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) | @@ -4082,8 +4119,8 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) | ||
| 4082 | if (u) | 4119 | if (u) |
| 4083 | return 1; | 4120 | return 1; |
| 4084 | switch (size) { | 4121 | switch (size) { |
| 4085 | - case 0: gen_op_neon_padd_u8(); break; | ||
| 4086 | - case 1: gen_op_neon_padd_u16(); break; | 4122 | + case 0: gen_helper_neon_padd_u8(CPU_T001); break; |
| 4123 | + case 1: gen_helper_neon_padd_u16(CPU_T001); break; | ||
| 4087 | case 2: gen_op_addl_T0_T1(); break; | 4124 | case 2: gen_op_addl_T0_T1(); break; |
| 4088 | default: return 1; | 4125 | default: return 1; |
| 4089 | } | 4126 | } |
| @@ -4091,55 +4128,55 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) | @@ -4091,55 +4128,55 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) | ||
| 4091 | case 26: /* Floating point arithnetic. */ | 4128 | case 26: /* Floating point arithnetic. */ |
| 4092 | switch ((u << 2) | size) { | 4129 | switch ((u << 2) | size) { |
| 4093 | case 0: /* VADD */ | 4130 | case 0: /* VADD */ |
| 4094 | - gen_op_neon_add_f32(); | 4131 | + gen_helper_neon_add_f32(CPU_T001); |
| 4095 | break; | 4132 | break; |
| 4096 | case 2: /* VSUB */ | 4133 | case 2: /* VSUB */ |
| 4097 | - gen_op_neon_sub_f32(); | 4134 | + gen_helper_neon_sub_f32(CPU_T001); |
| 4098 | break; | 4135 | break; |
| 4099 | case 4: /* VPADD */ | 4136 | case 4: /* VPADD */ |
| 4100 | - gen_op_neon_add_f32(); | 4137 | + gen_helper_neon_add_f32(CPU_T001); |
| 4101 | break; | 4138 | break; |
| 4102 | case 6: /* VABD */ | 4139 | case 6: /* VABD */ |
| 4103 | - gen_op_neon_abd_f32(); | 4140 | + gen_helper_neon_abd_f32(CPU_T001); |
| 4104 | break; | 4141 | break; |
| 4105 | default: | 4142 | default: |
| 4106 | return 1; | 4143 | return 1; |
| 4107 | } | 4144 | } |
| 4108 | break; | 4145 | break; |
| 4109 | case 27: /* Float multiply. */ | 4146 | case 27: /* Float multiply. */ |
| 4110 | - gen_op_neon_mul_f32(); | 4147 | + gen_helper_neon_mul_f32(CPU_T001); |
| 4111 | if (!u) { | 4148 | if (!u) { |
| 4112 | NEON_GET_REG(T1, rd, pass); | 4149 | NEON_GET_REG(T1, rd, pass); |
| 4113 | if (size == 0) { | 4150 | if (size == 0) { |
| 4114 | - gen_op_neon_add_f32(); | 4151 | + gen_helper_neon_add_f32(CPU_T001); |
| 4115 | } else { | 4152 | } else { |
| 4116 | - gen_op_neon_rsb_f32(); | 4153 | + gen_helper_neon_sub_f32(cpu_T[0], cpu_T[1], cpu_T[0]); |
| 4117 | } | 4154 | } |
| 4118 | } | 4155 | } |
| 4119 | break; | 4156 | break; |
| 4120 | case 28: /* Float compare. */ | 4157 | case 28: /* Float compare. */ |
| 4121 | if (!u) { | 4158 | if (!u) { |
| 4122 | - gen_op_neon_ceq_f32(); | 4159 | + gen_helper_neon_ceq_f32(CPU_T001); |
| 4123 | } else { | 4160 | } else { |
| 4124 | if (size == 0) | 4161 | if (size == 0) |
| 4125 | - gen_op_neon_cge_f32(); | 4162 | + gen_helper_neon_cge_f32(CPU_T001); |
| 4126 | else | 4163 | else |
| 4127 | - gen_op_neon_cgt_f32(); | 4164 | + gen_helper_neon_cgt_f32(CPU_T001); |
| 4128 | } | 4165 | } |
| 4129 | break; | 4166 | break; |
| 4130 | case 29: /* Float compare absolute. */ | 4167 | case 29: /* Float compare absolute. */ |
| 4131 | if (!u) | 4168 | if (!u) |
| 4132 | return 1; | 4169 | return 1; |
| 4133 | if (size == 0) | 4170 | if (size == 0) |
| 4134 | - gen_op_neon_acge_f32(); | 4171 | + gen_helper_neon_acge_f32(CPU_T001); |
| 4135 | else | 4172 | else |
| 4136 | - gen_op_neon_acgt_f32(); | 4173 | + gen_helper_neon_acgt_f32(CPU_T001); |
| 4137 | break; | 4174 | break; |
| 4138 | case 30: /* Float min/max. */ | 4175 | case 30: /* Float min/max. */ |
| 4139 | if (size == 0) | 4176 | if (size == 0) |
| 4140 | - gen_op_neon_max_f32(); | 4177 | + gen_helper_neon_max_f32(CPU_T001); |
| 4141 | else | 4178 | else |
| 4142 | - gen_op_neon_min_f32(); | 4179 | + gen_helper_neon_min_f32(CPU_T001); |
| 4143 | break; | 4180 | break; |
| 4144 | case 31: | 4181 | case 31: |
| 4145 | if (size == 0) | 4182 | if (size == 0) |
| @@ -4166,6 +4203,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) | @@ -4166,6 +4203,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) | ||
| 4166 | NEON_SET_REG(T0, rd, pass); | 4203 | NEON_SET_REG(T0, rd, pass); |
| 4167 | } | 4204 | } |
| 4168 | } | 4205 | } |
| 4206 | + /* End of 3 register same size operations. */ | ||
| 4169 | } else if (insn & (1 << 4)) { | 4207 | } else if (insn & (1 << 4)) { |
| 4170 | if ((insn & 0x00380080) != 0) { | 4208 | if ((insn & 0x00380080) != 0) { |
| 4171 | /* Two registers and shift. */ | 4209 | /* Two registers and shift. */ |
| @@ -4212,181 +4250,221 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) | @@ -4212,181 +4250,221 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) | ||
| 4212 | } | 4250 | } |
| 4213 | 4251 | ||
| 4214 | for (pass = 0; pass < count; pass++) { | 4252 | for (pass = 0; pass < count; pass++) { |
| 4215 | - if (size < 3) { | ||
| 4216 | - /* Operands in T0 and T1. */ | ||
| 4217 | - gen_op_movl_T1_im(imm); | ||
| 4218 | - NEON_GET_REG(T0, rm, pass); | ||
| 4219 | - } else { | ||
| 4220 | - /* Operands in {T0, T1} and env->vfp.scratch. */ | ||
| 4221 | - gen_op_movl_T0_im(imm); | ||
| 4222 | - gen_neon_movl_scratch_T0(0); | ||
| 4223 | - gen_op_movl_T0_im((int32_t)imm >> 31); | ||
| 4224 | - gen_neon_movl_scratch_T0(1); | ||
| 4225 | - NEON_GET_REG(T0, rm, pass * 2); | ||
| 4226 | - NEON_GET_REG(T1, rm, pass * 2 + 1); | ||
| 4227 | - } | ||
| 4228 | - | ||
| 4229 | - if (gen_neon_shift_im[op][u][size] == NULL) | ||
| 4230 | - return 1; | ||
| 4231 | - gen_neon_shift_im[op][u][size](); | ||
| 4232 | - | ||
| 4233 | - if (op == 1 || op == 3) { | ||
| 4234 | - /* Accumulate. */ | ||
| 4235 | - if (size == 3) { | ||
| 4236 | - gen_neon_movl_scratch_T0(0); | ||
| 4237 | - gen_neon_movl_scratch_T1(1); | ||
| 4238 | - NEON_GET_REG(T0, rd, pass * 2); | ||
| 4239 | - NEON_GET_REG(T1, rd, pass * 2 + 1); | ||
| 4240 | - gen_op_neon_addl_u64(); | ||
| 4241 | - } else { | ||
| 4242 | - NEON_GET_REG(T1, rd, pass); | ||
| 4243 | - gen_neon_add(size); | ||
| 4244 | - } | ||
| 4245 | - } else if (op == 4 || (op == 5 && u)) { | ||
| 4246 | - /* Insert */ | ||
| 4247 | - if (size == 3) { | ||
| 4248 | - cpu_abort(env, "VS[LR]I.64 not implemented"); | ||
| 4249 | - } | ||
| 4250 | - switch (size) { | ||
| 4251 | - case 0: | ||
| 4252 | - if (op == 4) | ||
| 4253 | - imm = 0xff >> -shift; | 4253 | + if (size == 3) { |
| 4254 | + neon_load_reg64(cpu_V0, rm + pass); | ||
| 4255 | + tcg_gen_movi_i64(cpu_V1, imm); | ||
| 4256 | + switch (op) { | ||
| 4257 | + case 0: /* VSHR */ | ||
| 4258 | + case 1: /* VSRA */ | ||
| 4259 | + if (u) | ||
| 4260 | + gen_helper_neon_shl_u64(cpu_V0, cpu_V0, cpu_V1); | ||
| 4254 | else | 4261 | else |
| 4255 | - imm = (uint8_t)(0xff << shift); | ||
| 4256 | - imm |= imm << 8; | ||
| 4257 | - imm |= imm << 16; | 4262 | + gen_helper_neon_shl_s64(cpu_V0, cpu_V0, cpu_V1); |
| 4258 | break; | 4263 | break; |
| 4259 | - case 1: | ||
| 4260 | - if (op == 4) | ||
| 4261 | - imm = 0xffff >> -shift; | 4264 | + case 2: /* VRSHR */ |
| 4265 | + case 3: /* VRSRA */ | ||
| 4266 | + if (u) | ||
| 4267 | + gen_helper_neon_rshl_u64(cpu_V0, cpu_V0, cpu_V1); | ||
| 4262 | else | 4268 | else |
| 4263 | - imm = (uint16_t)(0xffff << shift); | ||
| 4264 | - imm |= imm << 16; | 4269 | + gen_helper_neon_rshl_s64(cpu_V0, cpu_V0, cpu_V1); |
| 4265 | break; | 4270 | break; |
| 4266 | - case 2: | ||
| 4267 | - if (op == 4) | ||
| 4268 | - imm = 0xffffffffu >> -shift; | 4271 | + case 4: /* VSRI */ |
| 4272 | + if (!u) | ||
| 4273 | + return 1; | ||
| 4274 | + gen_helper_neon_shl_u64(cpu_V0, cpu_V0, cpu_V1); | ||
| 4275 | + break; | ||
| 4276 | + case 5: /* VSHL, VSLI */ | ||
| 4277 | + gen_helper_neon_shl_u64(cpu_V0, cpu_V0, cpu_V1); | ||
| 4278 | + break; | ||
| 4279 | + case 6: /* VQSHL */ | ||
| 4280 | + if (u) | ||
| 4281 | + gen_helper_neon_qshl_u64(cpu_V0, cpu_env, cpu_V0, cpu_V1); | ||
| 4269 | else | 4282 | else |
| 4270 | - imm = 0xffffffffu << shift; | 4283 | + gen_helper_neon_qshl_s64(cpu_V0, cpu_env, cpu_V0, cpu_V1); |
| 4284 | + break; | ||
| 4285 | + case 7: /* VQSHLU */ | ||
| 4286 | + gen_helper_neon_qshl_u64(cpu_V0, cpu_env, cpu_V0, cpu_V1); | ||
| 4271 | break; | 4287 | break; |
| 4272 | - default: | ||
| 4273 | - abort(); | ||
| 4274 | } | 4288 | } |
| 4275 | - tmp = neon_load_reg(rd, pass); | ||
| 4276 | - tcg_gen_andi_i32(cpu_T[0], cpu_T[0], imm); | ||
| 4277 | - tcg_gen_andi_i32(tmp, tmp, ~imm); | ||
| 4278 | - tcg_gen_or_i32(cpu_T[0], cpu_T[0], tmp); | ||
| 4279 | - } | ||
| 4280 | - if (size == 3) { | ||
| 4281 | - NEON_SET_REG(T0, rd, pass * 2); | ||
| 4282 | - NEON_SET_REG(T1, rd, pass * 2 + 1); | ||
| 4283 | - } else { | 4289 | + if (op == 1 || op == 3) { |
| 4290 | + /* Accumulate. */ | ||
| 4291 | + neon_load_reg64(cpu_V0, rd + pass); | ||
| 4292 | + tcg_gen_add_i64(cpu_V0, cpu_V0, cpu_V1); | ||
| 4293 | + } else if (op == 4 || (op == 5 && u)) { | ||
| 4294 | + /* Insert */ | ||
| 4295 | + cpu_abort(env, "VS[LR]I.64 not implemented"); | ||
| 4296 | + } | ||
| 4297 | + neon_store_reg64(cpu_V0, rd + pass); | ||
| 4298 | + } else { /* size < 3 */ | ||
| 4299 | + /* Operands in T0 and T1. */ | ||
| 4300 | + gen_op_movl_T1_im(imm); | ||
| 4301 | + NEON_GET_REG(T0, rm, pass); | ||
| 4302 | + switch (op) { | ||
| 4303 | + case 0: /* VSHR */ | ||
| 4304 | + case 1: /* VSRA */ | ||
| 4305 | + GEN_NEON_INTEGER_OP(shl); | ||
| 4306 | + break; | ||
| 4307 | + case 2: /* VRSHR */ | ||
| 4308 | + case 3: /* VRSRA */ | ||
| 4309 | + GEN_NEON_INTEGER_OP(rshl); | ||
| 4310 | + break; | ||
| 4311 | + case 4: /* VSRI */ | ||
| 4312 | + if (!u) | ||
| 4313 | + return 1; | ||
| 4314 | + GEN_NEON_INTEGER_OP(shl); | ||
| 4315 | + break; | ||
| 4316 | + case 5: /* VSHL, VSLI */ | ||
| 4317 | + switch (size) { | ||
| 4318 | + case 0: gen_helper_neon_shl_u8(CPU_T001); break; | ||
| 4319 | + case 1: gen_helper_neon_shl_u16(CPU_T001); break; | ||
| 4320 | + case 2: gen_helper_neon_shl_u32(CPU_T001); break; | ||
| 4321 | + default: return 1; | ||
| 4322 | + } | ||
| 4323 | + break; | ||
| 4324 | + case 6: /* VQSHL */ | ||
| 4325 | + GEN_NEON_INTEGER_OP_ENV(qshl); | ||
| 4326 | + break; | ||
| 4327 | + case 7: /* VQSHLU */ | ||
| 4328 | + switch (size) { | ||
| 4329 | + case 0: gen_helper_neon_qshl_u8(CPU_T0E01); break; | ||
| 4330 | + case 1: gen_helper_neon_qshl_u16(CPU_T0E01); break; | ||
| 4331 | + case 2: gen_helper_neon_qshl_u32(CPU_T0E01); break; | ||
| 4332 | + default: return 1; | ||
| 4333 | + } | ||
| 4334 | + break; | ||
| 4335 | + } | ||
| 4336 | + | ||
| 4337 | + if (op == 1 || op == 3) { | ||
| 4338 | + /* Accumulate. */ | ||
| 4339 | + NEON_GET_REG(T1, rd, pass); | ||
| 4340 | + gen_neon_add(size); | ||
| 4341 | + } else if (op == 4 || (op == 5 && u)) { | ||
| 4342 | + /* Insert */ | ||
| 4343 | + switch (size) { | ||
| 4344 | + case 0: | ||
| 4345 | + if (op == 4) | ||
| 4346 | + imm = 0xff >> -shift; | ||
| 4347 | + else | ||
| 4348 | + imm = (uint8_t)(0xff << shift); | ||
| 4349 | + imm |= imm << 8; | ||
| 4350 | + imm |= imm << 16; | ||
| 4351 | + break; | ||
| 4352 | + case 1: | ||
| 4353 | + if (op == 4) | ||
| 4354 | + imm = 0xffff >> -shift; | ||
| 4355 | + else | ||
| 4356 | + imm = (uint16_t)(0xffff << shift); | ||
| 4357 | + imm |= imm << 16; | ||
| 4358 | + break; | ||
| 4359 | + case 2: | ||
| 4360 | + if (op == 4) | ||
| 4361 | + imm = 0xffffffffu >> -shift; | ||
| 4362 | + else | ||
| 4363 | + imm = 0xffffffffu << shift; | ||
| 4364 | + break; | ||
| 4365 | + default: | ||
| 4366 | + abort(); | ||
| 4367 | + } | ||
| 4368 | + tmp = neon_load_reg(rd, pass); | ||
| 4369 | + tcg_gen_andi_i32(cpu_T[0], cpu_T[0], imm); | ||
| 4370 | + tcg_gen_andi_i32(tmp, tmp, ~imm); | ||
| 4371 | + tcg_gen_or_i32(cpu_T[0], cpu_T[0], tmp); | ||
| 4372 | + } | ||
| 4284 | NEON_SET_REG(T0, rd, pass); | 4373 | NEON_SET_REG(T0, rd, pass); |
| 4285 | } | 4374 | } |
| 4286 | } /* for pass */ | 4375 | } /* for pass */ |
| 4287 | } else if (op < 10) { | 4376 | } else if (op < 10) { |
| 4288 | - /* Shift by immedaiate and narrow: | 4377 | + /* Shift by immediate and narrow: |
| 4289 | VSHRN, VRSHRN, VQSHRN, VQRSHRN. */ | 4378 | VSHRN, VRSHRN, VQSHRN, VQRSHRN. */ |
| 4290 | shift = shift - (1 << (size + 3)); | 4379 | shift = shift - (1 << (size + 3)); |
| 4291 | size++; | 4380 | size++; |
| 4292 | - if (size == 3) { | ||
| 4293 | - count = q + 1; | ||
| 4294 | - } else { | ||
| 4295 | - count = q ? 4: 2; | ||
| 4296 | - } | ||
| 4297 | switch (size) { | 4381 | switch (size) { |
| 4298 | case 1: | 4382 | case 1: |
| 4299 | - imm = (uint16_t) shift; | 4383 | + imm = (uint16_t)shift; |
| 4300 | imm |= imm << 16; | 4384 | imm |= imm << 16; |
| 4385 | + tmp2 = tcg_const_i32(imm); | ||
| 4301 | break; | 4386 | break; |
| 4302 | case 2: | 4387 | case 2: |
| 4388 | + imm = (uint32_t)shift; | ||
| 4389 | + tmp2 = tcg_const_i32(imm); | ||
| 4303 | case 3: | 4390 | case 3: |
| 4304 | - imm = shift; | 4391 | + tmp2 = tcg_const_i64(shift); |
| 4305 | break; | 4392 | break; |
| 4306 | default: | 4393 | default: |
| 4307 | abort(); | 4394 | abort(); |
| 4308 | } | 4395 | } |
| 4309 | 4396 | ||
| 4310 | - /* Processing MSB first means we need to do less shuffling at | ||
| 4311 | - the end. */ | ||
| 4312 | - for (pass = count - 1; pass >= 0; pass--) { | ||
| 4313 | - /* Avoid clobbering the second operand before it has been | ||
| 4314 | - written. */ | ||
| 4315 | - n = pass; | ||
| 4316 | - if (rd == rm) | ||
| 4317 | - n ^= (count - 1); | ||
| 4318 | - else | ||
| 4319 | - n = pass; | ||
| 4320 | - | ||
| 4321 | - if (size < 3) { | ||
| 4322 | - /* Operands in T0 and T1. */ | ||
| 4323 | - gen_op_movl_T1_im(imm); | ||
| 4324 | - NEON_GET_REG(T0, rm, n); | 4397 | + for (pass = 0; pass < 2; pass++) { |
| 4398 | + if (size == 3) { | ||
| 4399 | + neon_load_reg64(cpu_V0, rm + pass); | ||
| 4400 | + if (q) { | ||
| 4401 | + if (u) | ||
| 4402 | + gen_helper_neon_rshl_u64(cpu_V0, cpu_V0, tmp2); | ||
| 4403 | + else | ||
| 4404 | + gen_helper_neon_rshl_s64(cpu_V0, cpu_V0, tmp2); | ||
| 4405 | + } else { | ||
| 4406 | + if (u) | ||
| 4407 | + gen_helper_neon_shl_u64(cpu_V0, cpu_V0, tmp2); | ||
| 4408 | + else | ||
| 4409 | + gen_helper_neon_shl_s64(cpu_V0, cpu_V0, tmp2); | ||
| 4410 | + } | ||
| 4325 | } else { | 4411 | } else { |
| 4326 | - /* Operands in {T0, T1} and env->vfp.scratch. */ | ||
| 4327 | - gen_op_movl_T0_im(imm); | ||
| 4328 | - gen_neon_movl_scratch_T0(0); | ||
| 4329 | - gen_op_movl_T0_im((int32_t)imm >> 31); | ||
| 4330 | - gen_neon_movl_scratch_T0(1); | ||
| 4331 | - NEON_GET_REG(T0, rm, n * 2); | ||
| 4332 | - NEON_GET_REG(T0, rm, n * 2 + 1); | 4412 | + tmp = neon_load_reg(rm + pass, 0); |
| 4413 | + gen_neon_shift_narrow(size, tmp, tmp2, q, u); | ||
| 4414 | + tcg_gen_extu_i32_i64(cpu_V0, tmp); | ||
| 4415 | + dead_tmp(tmp); | ||
| 4416 | + tmp = neon_load_reg(rm + pass, 1); | ||
| 4417 | + gen_neon_shift_narrow(size, tmp, tmp2, q, u); | ||
| 4418 | + tcg_gen_extu_i32_i64(cpu_V1, tmp); | ||
| 4419 | + dead_tmp(tmp); | ||
| 4420 | + tcg_gen_shli_i64(cpu_V1, cpu_V1, 32); | ||
| 4421 | + tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1); | ||
| 4333 | } | 4422 | } |
| 4334 | - | ||
| 4335 | - gen_neon_shift_im_narrow[q][u][size - 1](); | ||
| 4336 | - | ||
| 4337 | - if (size < 3 && (pass & 1) == 0) { | ||
| 4338 | - gen_neon_movl_scratch_T0(0); | 4423 | + tmp = new_tmp(); |
| 4424 | + if (op == 8 && !u) { | ||
| 4425 | + gen_neon_narrow(size - 1, tmp, cpu_V0); | ||
| 4339 | } else { | 4426 | } else { |
| 4340 | - uint32_t offset; | ||
| 4341 | - | ||
| 4342 | - if (size < 3) | ||
| 4343 | - gen_neon_movl_T1_scratch(0); | ||
| 4344 | - | ||
| 4345 | - if (op == 8 && !u) { | ||
| 4346 | - gen_neon_narrow[size - 1](); | ||
| 4347 | - } else { | ||
| 4348 | - if (op == 8) | ||
| 4349 | - gen_neon_narrow_sats[size - 2](); | ||
| 4350 | - else | ||
| 4351 | - gen_neon_narrow_satu[size - 1](); | ||
| 4352 | - } | ||
| 4353 | - if (size == 3) | ||
| 4354 | - offset = neon_reg_offset(rd, n); | 4427 | + if (op == 8) |
| 4428 | + gen_neon_narrow_sats(size - 1, tmp, cpu_V0); | ||
| 4355 | else | 4429 | else |
| 4356 | - offset = neon_reg_offset(rd, n >> 1); | ||
| 4357 | - gen_op_neon_setreg_T0(offset); | 4430 | + gen_neon_narrow_satu(size - 1, tmp, cpu_V0); |
| 4431 | + } | ||
| 4432 | + if (pass == 0) { | ||
| 4433 | + tmp2 = tmp; | ||
| 4434 | + } else { | ||
| 4435 | + neon_store_reg(rd, 0, tmp2); | ||
| 4436 | + neon_store_reg(rd, 1, tmp); | ||
| 4358 | } | 4437 | } |
| 4359 | } /* for pass */ | 4438 | } /* for pass */ |
| 4360 | } else if (op == 10) { | 4439 | } else if (op == 10) { |
| 4361 | /* VSHLL */ | 4440 | /* VSHLL */ |
| 4362 | - if (q) | 4441 | + if (q || size == 3) |
| 4363 | return 1; | 4442 | return 1; |
| 4443 | + tmp = neon_load_reg(rm, 0); | ||
| 4444 | + tmp2 = neon_load_reg(rm, 1); | ||
| 4364 | for (pass = 0; pass < 2; pass++) { | 4445 | for (pass = 0; pass < 2; pass++) { |
| 4365 | - /* Avoid clobbering the input operand. */ | ||
| 4366 | - if (rd == rm) | ||
| 4367 | - n = 1 - pass; | ||
| 4368 | - else | ||
| 4369 | - n = pass; | 4446 | + if (pass == 1) |
| 4447 | + tmp = tmp2; | ||
| 4448 | + | ||
| 4449 | + gen_neon_widen(cpu_V0, tmp, size, u); | ||
| 4370 | 4450 | ||
| 4371 | - NEON_GET_REG(T0, rm, n); | ||
| 4372 | - GEN_NEON_INTEGER_OP(widen); | ||
| 4373 | if (shift != 0) { | 4451 | if (shift != 0) { |
| 4374 | /* The shift is less than the width of the source | 4452 | /* The shift is less than the width of the source |
| 4375 | - type, so in some cases we can just | ||
| 4376 | - shift the whole register. */ | ||
| 4377 | - if (size == 1 || (size == 0 && u)) { | ||
| 4378 | - gen_op_shll_T0_im(shift); | ||
| 4379 | - gen_op_shll_T1_im(shift); | ||
| 4380 | - } else { | ||
| 4381 | - switch (size) { | ||
| 4382 | - case 0: gen_op_neon_shll_u16(shift); break; | ||
| 4383 | - case 2: gen_op_neon_shll_u64(shift); break; | ||
| 4384 | - default: abort(); | 4453 | + type, so we can just shift the whole register. */ |
| 4454 | + tcg_gen_shli_i64(cpu_V0, cpu_V0, shift); | ||
| 4455 | + if (size < 2 || !u) { | ||
| 4456 | + uint64_t imm64; | ||
| 4457 | + if (size == 0) { | ||
| 4458 | + imm = (0xffu >> (8 - shift)); | ||
| 4459 | + imm |= imm << 16; | ||
| 4460 | + } else { | ||
| 4461 | + imm = 0xffff >> (16 - shift); | ||
| 4385 | } | 4462 | } |
| 4463 | + imm64 = imm | (((uint64_t)imm) << 32); | ||
| 4464 | + tcg_gen_andi_i64(cpu_V0, cpu_V0, imm64); | ||
| 4386 | } | 4465 | } |
| 4387 | } | 4466 | } |
| 4388 | - NEON_SET_REG(T0, rd, n * 2); | ||
| 4389 | - NEON_SET_REG(T1, rd, n * 2 + 1); | 4467 | + neon_store_reg64(cpu_V0, rd + pass); |
| 4390 | } | 4468 | } |
| 4391 | } else if (op == 15 || op == 16) { | 4469 | } else if (op == 15 || op == 16) { |
| 4392 | /* VCVT fixed-point. */ | 4470 | /* VCVT fixed-point. */ |
| @@ -4458,28 +4536,30 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) | @@ -4458,28 +4536,30 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) | ||
| 4458 | 4536 | ||
| 4459 | for (pass = 0; pass < (q ? 4 : 2); pass++) { | 4537 | for (pass = 0; pass < (q ? 4 : 2); pass++) { |
| 4460 | if (op & 1 && op < 12) { | 4538 | if (op & 1 && op < 12) { |
| 4461 | - NEON_GET_REG(T0, rd, pass); | 4539 | + tmp = neon_load_reg(rd, pass); |
| 4462 | if (invert) { | 4540 | if (invert) { |
| 4463 | /* The immediate value has already been inverted, so | 4541 | /* The immediate value has already been inverted, so |
| 4464 | BIC becomes AND. */ | 4542 | BIC becomes AND. */ |
| 4465 | - gen_op_andl_T0_T1(); | 4543 | + tcg_gen_andi_i32(tmp, tmp, imm); |
| 4466 | } else { | 4544 | } else { |
| 4467 | - gen_op_orl_T0_T1(); | 4545 | + tcg_gen_ori_i32(tmp, tmp, imm); |
| 4468 | } | 4546 | } |
| 4469 | - NEON_SET_REG(T0, rd, pass); | ||
| 4470 | } else { | 4547 | } else { |
| 4548 | + /* VMOV, VMVN. */ | ||
| 4549 | + tmp = new_tmp(); | ||
| 4471 | if (op == 14 && invert) { | 4550 | if (op == 14 && invert) { |
| 4472 | - uint32_t tmp; | ||
| 4473 | - tmp = 0; | 4551 | + uint32_t val; |
| 4552 | + val = 0; | ||
| 4474 | for (n = 0; n < 4; n++) { | 4553 | for (n = 0; n < 4; n++) { |
| 4475 | if (imm & (1 << (n + (pass & 1) * 4))) | 4554 | if (imm & (1 << (n + (pass & 1) * 4))) |
| 4476 | - tmp |= 0xff << (n * 8); | 4555 | + val |= 0xff << (n * 8); |
| 4477 | } | 4556 | } |
| 4478 | - gen_op_movl_T1_im(tmp); | 4557 | + tcg_gen_movi_i32(tmp, val); |
| 4558 | + } else { | ||
| 4559 | + tcg_gen_movi_i32(tmp, imm); | ||
| 4479 | } | 4560 | } |
| 4480 | - /* VMOV, VMVN. */ | ||
| 4481 | - NEON_SET_REG(T1, rd, pass); | ||
| 4482 | } | 4561 | } |
| 4562 | + neon_store_reg(rd, pass, tmp); | ||
| 4483 | } | 4563 | } |
| 4484 | } | 4564 | } |
| 4485 | } else { /* (insn & 0x00800010 == 0x00800010) */ | 4565 | } else { /* (insn & 0x00800010 == 0x00800010) */ |
| @@ -4513,6 +4593,9 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) | @@ -4513,6 +4593,9 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) | ||
| 4513 | src1_wide = neon_3reg_wide[op][1]; | 4593 | src1_wide = neon_3reg_wide[op][1]; |
| 4514 | src2_wide = neon_3reg_wide[op][2]; | 4594 | src2_wide = neon_3reg_wide[op][2]; |
| 4515 | 4595 | ||
| 4596 | + if (size == 0 && (op == 9 || op == 11 || op == 13)) | ||
| 4597 | + return 1; | ||
| 4598 | + | ||
| 4516 | /* Avoid overlapping operands. Wide source operands are | 4599 | /* Avoid overlapping operands. Wide source operands are |
| 4517 | always aligned so will never overlap with wide | 4600 | always aligned so will never overlap with wide |
| 4518 | destinations in problematic ways. */ | 4601 | destinations in problematic ways. */ |
| @@ -4524,87 +4607,69 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) | @@ -4524,87 +4607,69 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) | ||
| 4524 | gen_neon_movl_scratch_T0(2); | 4607 | gen_neon_movl_scratch_T0(2); |
| 4525 | } | 4608 | } |
| 4526 | for (pass = 0; pass < 2; pass++) { | 4609 | for (pass = 0; pass < 2; pass++) { |
| 4527 | - /* Load the second operand into env->vfp.scratch. | ||
| 4528 | - Also widen narrow operands. */ | ||
| 4529 | - if (src2_wide) { | ||
| 4530 | - NEON_GET_REG(T0, rm, pass * 2); | ||
| 4531 | - NEON_GET_REG(T1, rm, pass * 2 + 1); | 4610 | + if (src1_wide) { |
| 4611 | + neon_load_reg64(cpu_V0, rn + pass); | ||
| 4532 | } else { | 4612 | } else { |
| 4533 | - if (pass == 1 && rd == rm) { | ||
| 4534 | - if (prewiden) { | ||
| 4535 | - gen_neon_movl_T0_scratch(2); | ||
| 4536 | - } else { | ||
| 4537 | - gen_neon_movl_T1_scratch(2); | ||
| 4538 | - } | 4613 | + if (pass == 1 && rd == rn) { |
| 4614 | + gen_neon_movl_T0_scratch(2); | ||
| 4615 | + tmp = new_tmp(); | ||
| 4616 | + tcg_gen_mov_i32(tmp, cpu_T[0]); | ||
| 4539 | } else { | 4617 | } else { |
| 4540 | - if (prewiden) { | ||
| 4541 | - NEON_GET_REG(T0, rm, pass); | ||
| 4542 | - } else { | ||
| 4543 | - NEON_GET_REG(T1, rm, pass); | ||
| 4544 | - } | 4618 | + tmp = neon_load_reg(rn, pass); |
| 4619 | + } | ||
| 4620 | + if (prewiden) { | ||
| 4621 | + gen_neon_widen(cpu_V0, tmp, size, u); | ||
| 4545 | } | 4622 | } |
| 4546 | } | 4623 | } |
| 4547 | - if (prewiden && !src2_wide) { | ||
| 4548 | - GEN_NEON_INTEGER_OP(widen); | ||
| 4549 | - } | ||
| 4550 | - if (prewiden || src2_wide) { | ||
| 4551 | - gen_neon_movl_scratch_T0(0); | ||
| 4552 | - gen_neon_movl_scratch_T1(1); | ||
| 4553 | - } | ||
| 4554 | - | ||
| 4555 | - /* Load the first operand. */ | ||
| 4556 | - if (src1_wide) { | ||
| 4557 | - NEON_GET_REG(T0, rn, pass * 2); | ||
| 4558 | - NEON_GET_REG(T1, rn, pass * 2 + 1); | 4624 | + if (src2_wide) { |
| 4625 | + neon_load_reg64(cpu_V1, rm + pass); | ||
| 4559 | } else { | 4626 | } else { |
| 4560 | - if (pass == 1 && rd == rn) { | 4627 | + if (pass == 1 && rd == rm) { |
| 4561 | gen_neon_movl_T0_scratch(2); | 4628 | gen_neon_movl_T0_scratch(2); |
| 4629 | + tmp2 = new_tmp(); | ||
| 4630 | + tcg_gen_mov_i32(tmp2, cpu_T[0]); | ||
| 4562 | } else { | 4631 | } else { |
| 4563 | - NEON_GET_REG(T0, rn, pass); | 4632 | + tmp2 = neon_load_reg(rm, pass); |
| 4633 | + } | ||
| 4634 | + if (prewiden) { | ||
| 4635 | + gen_neon_widen(cpu_V1, tmp2, size, u); | ||
| 4564 | } | 4636 | } |
| 4565 | - } | ||
| 4566 | - if (prewiden && !src1_wide) { | ||
| 4567 | - GEN_NEON_INTEGER_OP(widen); | ||
| 4568 | } | 4637 | } |
| 4569 | switch (op) { | 4638 | switch (op) { |
| 4570 | case 0: case 1: case 4: /* VADDL, VADDW, VADDHN, VRADDHN */ | 4639 | case 0: case 1: case 4: /* VADDL, VADDW, VADDHN, VRADDHN */ |
| 4571 | - switch (size) { | ||
| 4572 | - case 0: gen_op_neon_addl_u16(); break; | ||
| 4573 | - case 1: gen_op_neon_addl_u32(); break; | ||
| 4574 | - case 2: gen_op_neon_addl_u64(); break; | ||
| 4575 | - default: abort(); | ||
| 4576 | - } | 4640 | + gen_neon_addl(size); |
| 4577 | break; | 4641 | break; |
| 4578 | case 2: case 3: case 6: /* VSUBL, VSUBW, VSUBHL, VRSUBHL */ | 4642 | case 2: case 3: case 6: /* VSUBL, VSUBW, VSUBHL, VRSUBHL */ |
| 4579 | - switch (size) { | ||
| 4580 | - case 0: gen_op_neon_subl_u16(); break; | ||
| 4581 | - case 1: gen_op_neon_subl_u32(); break; | ||
| 4582 | - case 2: gen_op_neon_subl_u64(); break; | ||
| 4583 | - default: abort(); | ||
| 4584 | - } | 4643 | + gen_neon_subl(size); |
| 4585 | break; | 4644 | break; |
| 4586 | case 5: case 7: /* VABAL, VABDL */ | 4645 | case 5: case 7: /* VABAL, VABDL */ |
| 4587 | switch ((size << 1) | u) { | 4646 | switch ((size << 1) | u) { |
| 4588 | - case 0: gen_op_neon_abdl_s16(); break; | ||
| 4589 | - case 1: gen_op_neon_abdl_u16(); break; | ||
| 4590 | - case 2: gen_op_neon_abdl_s32(); break; | ||
| 4591 | - case 3: gen_op_neon_abdl_u32(); break; | ||
| 4592 | - case 4: gen_op_neon_abdl_s64(); break; | ||
| 4593 | - case 5: gen_op_neon_abdl_u64(); break; | 4647 | + case 0: |
| 4648 | + gen_helper_neon_abdl_s16(cpu_V0, tmp, tmp2); | ||
| 4649 | + break; | ||
| 4650 | + case 1: | ||
| 4651 | + gen_helper_neon_abdl_u16(cpu_V0, tmp, tmp2); | ||
| 4652 | + break; | ||
| 4653 | + case 2: | ||
| 4654 | + gen_helper_neon_abdl_s32(cpu_V0, tmp, tmp2); | ||
| 4655 | + break; | ||
| 4656 | + case 3: | ||
| 4657 | + gen_helper_neon_abdl_u32(cpu_V0, tmp, tmp2); | ||
| 4658 | + break; | ||
| 4659 | + case 4: | ||
| 4660 | + gen_helper_neon_abdl_s64(cpu_V0, tmp, tmp2); | ||
| 4661 | + break; | ||
| 4662 | + case 5: | ||
| 4663 | + gen_helper_neon_abdl_u64(cpu_V0, tmp, tmp2); | ||
| 4664 | + break; | ||
| 4594 | default: abort(); | 4665 | default: abort(); |
| 4595 | } | 4666 | } |
| 4667 | + dead_tmp(tmp2); | ||
| 4668 | + dead_tmp(tmp); | ||
| 4596 | break; | 4669 | break; |
| 4597 | case 8: case 9: case 10: case 11: case 12: case 13: | 4670 | case 8: case 9: case 10: case 11: case 12: case 13: |
| 4598 | /* VMLAL, VQDMLAL, VMLSL, VQDMLSL, VMULL, VQDMULL */ | 4671 | /* VMLAL, VQDMLAL, VMLSL, VQDMLSL, VMULL, VQDMULL */ |
| 4599 | - switch ((size << 1) | u) { | ||
| 4600 | - case 0: gen_op_neon_mull_s8(); break; | ||
| 4601 | - case 1: gen_op_neon_mull_u8(); break; | ||
| 4602 | - case 2: gen_op_neon_mull_s16(); break; | ||
| 4603 | - case 3: gen_op_neon_mull_u16(); break; | ||
| 4604 | - case 4: gen_op_imull_T0_T1(); break; | ||
| 4605 | - case 5: gen_op_mull_T0_T1(); break; | ||
| 4606 | - default: abort(); | ||
| 4607 | - } | 4672 | + gen_neon_mull(cpu_V0, tmp, tmp2, size, u); |
| 4608 | break; | 4673 | break; |
| 4609 | case 14: /* Polynomial VMULL */ | 4674 | case 14: /* Polynomial VMULL */ |
| 4610 | cpu_abort(env, "Polynomial VMULL not implemented"); | 4675 | cpu_abort(env, "Polynomial VMULL not implemented"); |
| @@ -4615,72 +4680,71 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) | @@ -4615,72 +4680,71 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) | ||
| 4615 | if (op == 5 || op == 13 || (op >= 8 && op <= 11)) { | 4680 | if (op == 5 || op == 13 || (op >= 8 && op <= 11)) { |
| 4616 | /* Accumulate. */ | 4681 | /* Accumulate. */ |
| 4617 | if (op == 10 || op == 11) { | 4682 | if (op == 10 || op == 11) { |
| 4618 | - switch (size) { | ||
| 4619 | - case 0: gen_op_neon_negl_u16(); break; | ||
| 4620 | - case 1: gen_op_neon_negl_u32(); break; | ||
| 4621 | - case 2: gen_op_neon_negl_u64(); break; | ||
| 4622 | - default: abort(); | ||
| 4623 | - } | 4683 | + gen_neon_negl(cpu_V0, size); |
| 4624 | } | 4684 | } |
| 4625 | 4685 | ||
| 4626 | - gen_neon_movl_scratch_T0(0); | ||
| 4627 | - gen_neon_movl_scratch_T1(1); | ||
| 4628 | - | ||
| 4629 | if (op != 13) { | 4686 | if (op != 13) { |
| 4630 | - NEON_GET_REG(T0, rd, pass * 2); | ||
| 4631 | - NEON_GET_REG(T1, rd, pass * 2 + 1); | 4687 | + neon_load_reg64(cpu_V1, rd + pass); |
| 4632 | } | 4688 | } |
| 4633 | 4689 | ||
| 4634 | switch (op) { | 4690 | switch (op) { |
| 4635 | case 5: case 8: case 10: /* VABAL, VMLAL, VMLSL */ | 4691 | case 5: case 8: case 10: /* VABAL, VMLAL, VMLSL */ |
| 4636 | - switch (size) { | ||
| 4637 | - case 0: gen_op_neon_addl_u16(); break; | ||
| 4638 | - case 1: gen_op_neon_addl_u32(); break; | ||
| 4639 | - case 2: gen_op_neon_addl_u64(); break; | ||
| 4640 | - default: abort(); | ||
| 4641 | - } | 4692 | + gen_neon_addl(size); |
| 4642 | break; | 4693 | break; |
| 4643 | case 9: case 11: /* VQDMLAL, VQDMLSL */ | 4694 | case 9: case 11: /* VQDMLAL, VQDMLSL */ |
| 4644 | - switch (size) { | ||
| 4645 | - case 1: gen_op_neon_addl_saturate_s32(); break; | ||
| 4646 | - case 2: gen_op_neon_addl_saturate_s64(); break; | ||
| 4647 | - default: abort(); | ||
| 4648 | - } | 4695 | + gen_neon_addl_saturate(cpu_V0, cpu_V0, size); |
| 4696 | + gen_neon_addl_saturate(cpu_V0, cpu_V1, size); | ||
| 4697 | + break; | ||
| 4649 | /* Fall through. */ | 4698 | /* Fall through. */ |
| 4650 | case 13: /* VQDMULL */ | 4699 | case 13: /* VQDMULL */ |
| 4651 | - switch (size) { | ||
| 4652 | - case 1: gen_op_neon_addl_saturate_s32(); break; | ||
| 4653 | - case 2: gen_op_neon_addl_saturate_s64(); break; | ||
| 4654 | - default: abort(); | ||
| 4655 | - } | 4700 | + gen_neon_addl_saturate(cpu_V0, cpu_V0, size); |
| 4656 | break; | 4701 | break; |
| 4657 | default: | 4702 | default: |
| 4658 | abort(); | 4703 | abort(); |
| 4659 | } | 4704 | } |
| 4660 | - NEON_SET_REG(T0, rd, pass * 2); | ||
| 4661 | - NEON_SET_REG(T1, rd, pass * 2 + 1); | 4705 | + neon_store_reg64(cpu_V0, rd + pass); |
| 4662 | } else if (op == 4 || op == 6) { | 4706 | } else if (op == 4 || op == 6) { |
| 4663 | /* Narrowing operation. */ | 4707 | /* Narrowing operation. */ |
| 4708 | + tmp = new_tmp(); | ||
| 4664 | if (u) { | 4709 | if (u) { |
| 4665 | switch (size) { | 4710 | switch (size) { |
| 4666 | - case 0: gen_op_neon_narrow_high_u8(); break; | ||
| 4667 | - case 1: gen_op_neon_narrow_high_u16(); break; | ||
| 4668 | - case 2: gen_op_movl_T0_T1(); break; | 4711 | + case 0: |
| 4712 | + gen_helper_neon_narrow_high_u8(tmp, cpu_V0); | ||
| 4713 | + break; | ||
| 4714 | + case 1: | ||
| 4715 | + gen_helper_neon_narrow_high_u16(tmp, cpu_V0); | ||
| 4716 | + break; | ||
| 4717 | + case 2: | ||
| 4718 | + tcg_gen_shri_i64(cpu_V0, cpu_V0, 32); | ||
| 4719 | + tcg_gen_trunc_i64_i32(tmp, cpu_V0); | ||
| 4720 | + break; | ||
| 4669 | default: abort(); | 4721 | default: abort(); |
| 4670 | } | 4722 | } |
| 4671 | } else { | 4723 | } else { |
| 4672 | switch (size) { | 4724 | switch (size) { |
| 4673 | - case 0: gen_op_neon_narrow_high_round_u8(); break; | ||
| 4674 | - case 1: gen_op_neon_narrow_high_round_u16(); break; | ||
| 4675 | - case 2: gen_op_neon_narrow_high_round_u32(); break; | 4725 | + case 0: |
| 4726 | + gen_helper_neon_narrow_round_high_u8(tmp, cpu_V0); | ||
| 4727 | + break; | ||
| 4728 | + case 1: | ||
| 4729 | + gen_helper_neon_narrow_round_high_u16(tmp, cpu_V0); | ||
| 4730 | + break; | ||
| 4731 | + case 2: | ||
| 4732 | + tcg_gen_addi_i64(cpu_V0, cpu_V0, 1u << 31); | ||
| 4733 | + tcg_gen_shri_i64(cpu_V0, cpu_V0, 32); | ||
| 4734 | + tcg_gen_trunc_i64_i32(tmp, cpu_V0); | ||
| 4735 | + break; | ||
| 4676 | default: abort(); | 4736 | default: abort(); |
| 4677 | } | 4737 | } |
| 4678 | } | 4738 | } |
| 4679 | - NEON_SET_REG(T0, rd, pass); | 4739 | + if (pass == 0) { |
| 4740 | + tmp3 = tmp; | ||
| 4741 | + } else { | ||
| 4742 | + neon_store_reg(rd, 0, tmp3); | ||
| 4743 | + neon_store_reg(rd, 1, tmp); | ||
| 4744 | + } | ||
| 4680 | } else { | 4745 | } else { |
| 4681 | /* Write back the result. */ | 4746 | /* Write back the result. */ |
| 4682 | - NEON_SET_REG(T0, rd, pass * 2); | ||
| 4683 | - NEON_SET_REG(T1, rd, pass * 2 + 1); | 4747 | + neon_store_reg64(cpu_V0, rd + pass); |
| 4684 | } | 4748 | } |
| 4685 | } | 4749 | } |
| 4686 | } else { | 4750 | } else { |
| @@ -4702,22 +4766,22 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) | @@ -4702,22 +4766,22 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) | ||
| 4702 | NEON_GET_REG(T1, rn, pass); | 4766 | NEON_GET_REG(T1, rn, pass); |
| 4703 | if (op == 12) { | 4767 | if (op == 12) { |
| 4704 | if (size == 1) { | 4768 | if (size == 1) { |
| 4705 | - gen_op_neon_qdmulh_s16(); | 4769 | + gen_helper_neon_qdmulh_s16(CPU_T0E01); |
| 4706 | } else { | 4770 | } else { |
| 4707 | - gen_op_neon_qdmulh_s32(); | 4771 | + gen_helper_neon_qdmulh_s32(CPU_T0E01); |
| 4708 | } | 4772 | } |
| 4709 | } else if (op == 13) { | 4773 | } else if (op == 13) { |
| 4710 | if (size == 1) { | 4774 | if (size == 1) { |
| 4711 | - gen_op_neon_qrdmulh_s16(); | 4775 | + gen_helper_neon_qrdmulh_s16(CPU_T0E01); |
| 4712 | } else { | 4776 | } else { |
| 4713 | - gen_op_neon_qrdmulh_s32(); | 4777 | + gen_helper_neon_qrdmulh_s32(CPU_T0E01); |
| 4714 | } | 4778 | } |
| 4715 | } else if (op & 1) { | 4779 | } else if (op & 1) { |
| 4716 | - gen_op_neon_mul_f32(); | 4780 | + gen_helper_neon_mul_f32(CPU_T001); |
| 4717 | } else { | 4781 | } else { |
| 4718 | switch (size) { | 4782 | switch (size) { |
| 4719 | - case 0: gen_op_neon_mul_u8(); break; | ||
| 4720 | - case 1: gen_op_neon_mul_u16(); break; | 4783 | + case 0: gen_helper_neon_mul_u8(CPU_T001); break; |
| 4784 | + case 1: gen_helper_neon_mul_u16(CPU_T001); break; | ||
| 4721 | case 2: gen_op_mul_T0_T1(); break; | 4785 | case 2: gen_op_mul_T0_T1(); break; |
| 4722 | default: return 1; | 4786 | default: return 1; |
| 4723 | } | 4787 | } |
| @@ -4730,18 +4794,13 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) | @@ -4730,18 +4794,13 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) | ||
| 4730 | gen_neon_add(size); | 4794 | gen_neon_add(size); |
| 4731 | break; | 4795 | break; |
| 4732 | case 1: | 4796 | case 1: |
| 4733 | - gen_op_neon_add_f32(); | 4797 | + gen_helper_neon_add_f32(CPU_T001); |
| 4734 | break; | 4798 | break; |
| 4735 | case 4: | 4799 | case 4: |
| 4736 | - switch (size) { | ||
| 4737 | - case 0: gen_op_neon_rsb_u8(); break; | ||
| 4738 | - case 1: gen_op_neon_rsb_u16(); break; | ||
| 4739 | - case 2: gen_op_rsbl_T0_T1(); break; | ||
| 4740 | - default: return 1; | ||
| 4741 | - } | 4800 | + gen_neon_rsb(size); |
| 4742 | break; | 4801 | break; |
| 4743 | case 5: | 4802 | case 5: |
| 4744 | - gen_op_neon_rsb_f32(); | 4803 | + gen_helper_neon_sub_f32(cpu_T[0], cpu_T[1], cpu_T[0]); |
| 4745 | break; | 4804 | break; |
| 4746 | default: | 4805 | default: |
| 4747 | abort(); | 4806 | abort(); |
| @@ -4756,81 +4815,46 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) | @@ -4756,81 +4815,46 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) | ||
| 4756 | case 7: /* VQDMLSL scalar */ | 4815 | case 7: /* VQDMLSL scalar */ |
| 4757 | case 10: /* VMULL scalar */ | 4816 | case 10: /* VMULL scalar */ |
| 4758 | case 11: /* VQDMULL scalar */ | 4817 | case 11: /* VQDMULL scalar */ |
| 4759 | - if (rd == rn) { | ||
| 4760 | - /* Save overlapping operands before they are | ||
| 4761 | - clobbered. */ | ||
| 4762 | - NEON_GET_REG(T0, rn, 1); | ||
| 4763 | - gen_neon_movl_scratch_T0(2); | ||
| 4764 | - } | 4818 | + if (size == 0 && (op == 3 || op == 7 || op == 11)) |
| 4819 | + return 1; | ||
| 4820 | + | ||
| 4765 | gen_neon_get_scalar(size, rm); | 4821 | gen_neon_get_scalar(size, rm); |
| 4766 | - gen_neon_movl_scratch_T0(3); | 4822 | + NEON_GET_REG(T1, rn, 1); |
| 4823 | + | ||
| 4767 | for (pass = 0; pass < 2; pass++) { | 4824 | for (pass = 0; pass < 2; pass++) { |
| 4768 | - if (pass != 0) { | ||
| 4769 | - gen_neon_movl_T0_scratch(3); | ||
| 4770 | - } | ||
| 4771 | - if (pass != 0 && rd == rn) { | ||
| 4772 | - gen_neon_movl_T1_scratch(2); | 4825 | + if (pass == 0) { |
| 4826 | + tmp = neon_load_reg(rn, 0); | ||
| 4773 | } else { | 4827 | } else { |
| 4774 | - NEON_GET_REG(T1, rn, pass); | ||
| 4775 | - } | ||
| 4776 | - switch ((size << 1) | u) { | ||
| 4777 | - case 0: gen_op_neon_mull_s8(); break; | ||
| 4778 | - case 1: gen_op_neon_mull_u8(); break; | ||
| 4779 | - case 2: gen_op_neon_mull_s16(); break; | ||
| 4780 | - case 3: gen_op_neon_mull_u16(); break; | ||
| 4781 | - case 4: gen_op_imull_T0_T1(); break; | ||
| 4782 | - case 5: gen_op_mull_T0_T1(); break; | ||
| 4783 | - default: abort(); | 4828 | + tmp = new_tmp(); |
| 4829 | + tcg_gen_mov_i32(tmp, cpu_T[1]); | ||
| 4784 | } | 4830 | } |
| 4831 | + tmp2 = new_tmp(); | ||
| 4832 | + tcg_gen_mov_i32(tmp2, cpu_T[0]); | ||
| 4833 | + gen_neon_mull(cpu_V0, tmp, tmp2, size, u); | ||
| 4785 | if (op == 6 || op == 7) { | 4834 | if (op == 6 || op == 7) { |
| 4786 | - switch (size) { | ||
| 4787 | - case 0: gen_op_neon_negl_u16(); break; | ||
| 4788 | - case 1: gen_op_neon_negl_u32(); break; | ||
| 4789 | - case 2: gen_op_neon_negl_u64(); break; | ||
| 4790 | - default: abort(); | ||
| 4791 | - } | 4835 | + gen_neon_negl(cpu_V0, size); |
| 4836 | + } | ||
| 4837 | + if (op != 11) { | ||
| 4838 | + neon_load_reg64(cpu_V1, rd + pass); | ||
| 4792 | } | 4839 | } |
| 4793 | - gen_neon_movl_scratch_T0(0); | ||
| 4794 | - gen_neon_movl_scratch_T1(1); | ||
| 4795 | - NEON_GET_REG(T0, rd, pass * 2); | ||
| 4796 | - NEON_GET_REG(T1, rd, pass * 2 + 1); | ||
| 4797 | switch (op) { | 4840 | switch (op) { |
| 4798 | case 2: case 6: | 4841 | case 2: case 6: |
| 4799 | - switch (size) { | ||
| 4800 | - case 0: gen_op_neon_addl_u16(); break; | ||
| 4801 | - case 1: gen_op_neon_addl_u32(); break; | ||
| 4802 | - case 2: gen_op_neon_addl_u64(); break; | ||
| 4803 | - default: abort(); | ||
| 4804 | - } | 4842 | + gen_neon_addl(size); |
| 4805 | break; | 4843 | break; |
| 4806 | case 3: case 7: | 4844 | case 3: case 7: |
| 4807 | - switch (size) { | ||
| 4808 | - case 1: | ||
| 4809 | - gen_op_neon_addl_saturate_s32(); | ||
| 4810 | - gen_op_neon_addl_saturate_s32(); | ||
| 4811 | - break; | ||
| 4812 | - case 2: | ||
| 4813 | - gen_op_neon_addl_saturate_s64(); | ||
| 4814 | - gen_op_neon_addl_saturate_s64(); | ||
| 4815 | - break; | ||
| 4816 | - default: abort(); | ||
| 4817 | - } | 4845 | + gen_neon_addl_saturate(cpu_V0, cpu_V0, size); |
| 4846 | + gen_neon_addl_saturate(cpu_V0, cpu_V1, size); | ||
| 4818 | break; | 4847 | break; |
| 4819 | case 10: | 4848 | case 10: |
| 4820 | /* no-op */ | 4849 | /* no-op */ |
| 4821 | break; | 4850 | break; |
| 4822 | case 11: | 4851 | case 11: |
| 4823 | - switch (size) { | ||
| 4824 | - case 1: gen_op_neon_addl_saturate_s32(); break; | ||
| 4825 | - case 2: gen_op_neon_addl_saturate_s64(); break; | ||
| 4826 | - default: abort(); | ||
| 4827 | - } | 4852 | + gen_neon_addl_saturate(cpu_V0, cpu_V0, size); |
| 4828 | break; | 4853 | break; |
| 4829 | default: | 4854 | default: |
| 4830 | abort(); | 4855 | abort(); |
| 4831 | } | 4856 | } |
| 4832 | - NEON_SET_REG(T0, rd, pass * 2); | ||
| 4833 | - NEON_SET_REG(T1, rd, pass * 2 + 1); | 4857 | + neon_store_reg64(cpu_V0, rd + pass); |
| 4834 | } | 4858 | } |
| 4835 | break; | 4859 | break; |
| 4836 | default: /* 14 and 15 are RESERVED */ | 4860 | default: /* 14 and 15 are RESERVED */ |
| @@ -4840,29 +4864,53 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) | @@ -4840,29 +4864,53 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) | ||
| 4840 | } else { /* size == 3 */ | 4864 | } else { /* size == 3 */ |
| 4841 | if (!u) { | 4865 | if (!u) { |
| 4842 | /* Extract. */ | 4866 | /* Extract. */ |
| 4843 | - int reg; | ||
| 4844 | imm = (insn >> 8) & 0xf; | 4867 | imm = (insn >> 8) & 0xf; |
| 4845 | - reg = rn; | ||
| 4846 | - count = q ? 4 : 2; | ||
| 4847 | - n = imm >> 2; | ||
| 4848 | - NEON_GET_REG(T0, reg, n); | ||
| 4849 | - for (pass = 0; pass < count; pass++) { | ||
| 4850 | - n++; | ||
| 4851 | - if (n > count) { | ||
| 4852 | - reg = rm; | ||
| 4853 | - n -= count; | 4868 | + count = q + 1; |
| 4869 | + | ||
| 4870 | + if (imm > 7 && !q) | ||
| 4871 | + return 1; | ||
| 4872 | + | ||
| 4873 | + if (imm == 0) { | ||
| 4874 | + neon_load_reg64(cpu_V0, rn); | ||
| 4875 | + if (q) { | ||
| 4876 | + neon_load_reg64(cpu_V1, rn + 1); | ||
| 4854 | } | 4877 | } |
| 4855 | - if (imm & 3) { | ||
| 4856 | - NEON_GET_REG(T1, reg, n); | ||
| 4857 | - gen_op_neon_extract((insn << 3) & 0x1f); | 4878 | + } else if (imm == 8) { |
| 4879 | + neon_load_reg64(cpu_V0, rn + 1); | ||
| 4880 | + if (q) { | ||
| 4881 | + neon_load_reg64(cpu_V1, rm); | ||
| 4858 | } | 4882 | } |
| 4859 | - /* ??? This is broken if rd and rm overlap */ | ||
| 4860 | - NEON_SET_REG(T0, rd, pass); | ||
| 4861 | - if (imm & 3) { | ||
| 4862 | - gen_op_movl_T0_T1(); | 4883 | + } else if (q) { |
| 4884 | + tmp = tcg_temp_new(TCG_TYPE_I64); | ||
| 4885 | + if (imm < 8) { | ||
| 4886 | + neon_load_reg64(cpu_V0, rn); | ||
| 4887 | + neon_load_reg64(tmp, rn + 1); | ||
| 4888 | + } else { | ||
| 4889 | + neon_load_reg64(cpu_V0, rn + 1); | ||
| 4890 | + neon_load_reg64(tmp, rm); | ||
| 4891 | + } | ||
| 4892 | + tcg_gen_shri_i64(cpu_V0, cpu_V0, (imm & 7) * 8); | ||
| 4893 | + tcg_gen_shli_i64(cpu_V1, tmp, 64 - ((imm & 7) * 8)); | ||
| 4894 | + tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1); | ||
| 4895 | + if (imm < 8) { | ||
| 4896 | + neon_load_reg64(cpu_V1, rm); | ||
| 4863 | } else { | 4897 | } else { |
| 4864 | - NEON_GET_REG(T0, reg, n); | 4898 | + neon_load_reg64(cpu_V1, rm + 1); |
| 4899 | + imm -= 8; | ||
| 4865 | } | 4900 | } |
| 4901 | + tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8)); | ||
| 4902 | + tcg_gen_shri_i64(tmp, tmp, imm * 8); | ||
| 4903 | + tcg_gen_or_i64(cpu_V1, cpu_V1, tmp); | ||
| 4904 | + } else { | ||
| 4905 | + neon_load_reg64(cpu_V0, rn); | ||
| 4906 | + tcg_gen_shri_i32(cpu_V0, cpu_V0, imm * 8); | ||
| 4907 | + neon_load_reg64(cpu_V1, rm); | ||
| 4908 | + tcg_gen_shli_i32(cpu_V1, cpu_V1, 64 - (imm * 8)); | ||
| 4909 | + tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1); | ||
| 4910 | + } | ||
| 4911 | + neon_store_reg64(cpu_V0, rd); | ||
| 4912 | + if (q) { | ||
| 4913 | + neon_store_reg64(cpu_V1, rd + 1); | ||
| 4866 | } | 4914 | } |
| 4867 | } else if ((insn & (1 << 11)) == 0) { | 4915 | } else if ((insn & (1 << 11)) == 0) { |
| 4868 | /* Two register misc. */ | 4916 | /* Two register misc. */ |
| @@ -4897,28 +4945,25 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) | @@ -4897,28 +4945,25 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) | ||
| 4897 | break; | 4945 | break; |
| 4898 | case 4: case 5: /* VPADDL */ | 4946 | case 4: case 5: /* VPADDL */ |
| 4899 | case 12: case 13: /* VPADAL */ | 4947 | case 12: case 13: /* VPADAL */ |
| 4900 | - if (size < 2) | ||
| 4901 | - goto elementwise; | ||
| 4902 | if (size == 3) | 4948 | if (size == 3) |
| 4903 | return 1; | 4949 | return 1; |
| 4904 | - for (pass = 0; pass < (q ? 2 : 1); pass++) { | ||
| 4905 | - NEON_GET_REG(T0, rm, pass * 2); | ||
| 4906 | - NEON_GET_REG(T1, rm, pass * 2 + 1); | ||
| 4907 | - if (op & 1) | ||
| 4908 | - gen_op_neon_paddl_u32(); | ||
| 4909 | - else | ||
| 4910 | - gen_op_neon_paddl_s32(); | 4950 | + for (pass = 0; pass < q + 1; pass++) { |
| 4951 | + tmp = neon_load_reg(rm, pass * 2); | ||
| 4952 | + gen_neon_widen(cpu_V0, tmp, size, op & 1); | ||
| 4953 | + tmp = neon_load_reg(rm, pass * 2 + 1); | ||
| 4954 | + gen_neon_widen(cpu_V1, tmp, size, op & 1); | ||
| 4955 | + switch (size) { | ||
| 4956 | + case 0: gen_helper_neon_paddl_u16(CPU_V001); break; | ||
| 4957 | + case 1: gen_helper_neon_paddl_u32(CPU_V001); break; | ||
| 4958 | + case 2: tcg_gen_add_i64(CPU_V001); break; | ||
| 4959 | + default: abort(); | ||
| 4960 | + } | ||
| 4911 | if (op >= 12) { | 4961 | if (op >= 12) { |
| 4912 | /* Accumulate. */ | 4962 | /* Accumulate. */ |
| 4913 | - gen_neon_movl_scratch_T0(0); | ||
| 4914 | - gen_neon_movl_scratch_T1(1); | ||
| 4915 | - | ||
| 4916 | - NEON_GET_REG(T0, rd, pass * 2); | ||
| 4917 | - NEON_GET_REG(T1, rd, pass * 2 + 1); | ||
| 4918 | - gen_op_neon_addl_u64(); | 4963 | + neon_load_reg64(cpu_V1, rd + pass); |
| 4964 | + gen_neon_addl(size); | ||
| 4919 | } | 4965 | } |
| 4920 | - NEON_SET_REG(T0, rd, pass * 2); | ||
| 4921 | - NEON_SET_REG(T1, rd, pass * 2 + 1); | 4966 | + neon_store_reg64(cpu_V0, rd + pass); |
| 4922 | } | 4967 | } |
| 4923 | break; | 4968 | break; |
| 4924 | case 33: /* VTRN */ | 4969 | case 33: /* VTRN */ |
| @@ -4972,8 +5017,8 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) | @@ -4972,8 +5017,8 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) | ||
| 4972 | NEON_GET_REG(T0, rd, n); | 5017 | NEON_GET_REG(T0, rd, n); |
| 4973 | NEON_GET_REG(T1, rd, n); | 5018 | NEON_GET_REG(T1, rd, n); |
| 4974 | switch (size) { | 5019 | switch (size) { |
| 4975 | - case 0: gen_op_neon_zip_u8(); break; | ||
| 4976 | - case 1: gen_op_neon_zip_u16(); break; | 5020 | + case 0: gen_helper_neon_zip_u8(); break; |
| 5021 | + case 1: gen_helper_neon_zip_u16(); break; | ||
| 4977 | case 2: /* no-op */; break; | 5022 | case 2: /* no-op */; break; |
| 4978 | default: abort(); | 5023 | default: abort(); |
| 4979 | } | 5024 | } |
| @@ -4987,63 +5032,36 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) | @@ -4987,63 +5032,36 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) | ||
| 4987 | } | 5032 | } |
| 4988 | break; | 5033 | break; |
| 4989 | case 36: case 37: /* VMOVN, VQMOVUN, VQMOVN */ | 5034 | case 36: case 37: /* VMOVN, VQMOVUN, VQMOVN */ |
| 5035 | + if (size == 3) | ||
| 5036 | + return 1; | ||
| 4990 | for (pass = 0; pass < 2; pass++) { | 5037 | for (pass = 0; pass < 2; pass++) { |
| 4991 | - if (rd == rm + 1) { | ||
| 4992 | - n = 1 - pass; | ||
| 4993 | - } else { | ||
| 4994 | - n = pass; | ||
| 4995 | - } | ||
| 4996 | - NEON_GET_REG(T0, rm, n * 2); | ||
| 4997 | - NEON_GET_REG(T1, rm, n * 2 + 1); | 5038 | + neon_load_reg64(cpu_V0, rm + pass); |
| 5039 | + tmp = new_tmp(); | ||
| 4998 | if (op == 36 && q == 0) { | 5040 | if (op == 36 && q == 0) { |
| 4999 | - switch (size) { | ||
| 5000 | - case 0: gen_op_neon_narrow_u8(); break; | ||
| 5001 | - case 1: gen_op_neon_narrow_u16(); break; | ||
| 5002 | - case 2: /* no-op */ break; | ||
| 5003 | - default: return 1; | ||
| 5004 | - } | 5041 | + gen_neon_narrow(size, tmp, cpu_V0); |
| 5005 | } else if (q) { | 5042 | } else if (q) { |
| 5006 | - switch (size) { | ||
| 5007 | - case 0: gen_op_neon_narrow_sat_u8(); break; | ||
| 5008 | - case 1: gen_op_neon_narrow_sat_u16(); break; | ||
| 5009 | - case 2: gen_op_neon_narrow_sat_u32(); break; | ||
| 5010 | - default: return 1; | ||
| 5011 | - } | 5043 | + gen_neon_narrow_satu(size, tmp, cpu_V0); |
| 5012 | } else { | 5044 | } else { |
| 5013 | - switch (size) { | ||
| 5014 | - case 0: gen_op_neon_narrow_sat_s8(); break; | ||
| 5015 | - case 1: gen_op_neon_narrow_sat_s16(); break; | ||
| 5016 | - case 2: gen_op_neon_narrow_sat_s32(); break; | ||
| 5017 | - default: return 1; | ||
| 5018 | - } | 5045 | + gen_neon_narrow_sats(size, tmp, cpu_V0); |
| 5046 | + } | ||
| 5047 | + if (pass == 0) { | ||
| 5048 | + tmp2 = tmp; | ||
| 5049 | + } else { | ||
| 5050 | + neon_store_reg(rd, 0, tmp2); | ||
| 5051 | + neon_store_reg(rd, 1, tmp); | ||
| 5019 | } | 5052 | } |
| 5020 | - NEON_SET_REG(T0, rd, n); | ||
| 5021 | } | 5053 | } |
| 5022 | break; | 5054 | break; |
| 5023 | case 38: /* VSHLL */ | 5055 | case 38: /* VSHLL */ |
| 5024 | - if (q) | 5056 | + if (q || size == 3) |
| 5025 | return 1; | 5057 | return 1; |
| 5026 | - if (rm == rd) { | ||
| 5027 | - NEON_GET_REG(T0, rm, 1); | ||
| 5028 | - gen_neon_movl_scratch_T0(0); | ||
| 5029 | - } | 5058 | + tmp = neon_load_reg(rm, 0); |
| 5059 | + tmp2 = neon_load_reg(rm, 1); | ||
| 5030 | for (pass = 0; pass < 2; pass++) { | 5060 | for (pass = 0; pass < 2; pass++) { |
| 5031 | - if (pass == 1 && rm == rd) { | ||
| 5032 | - gen_neon_movl_T0_scratch(0); | ||
| 5033 | - } else { | ||
| 5034 | - NEON_GET_REG(T0, rm, pass); | ||
| 5035 | - } | ||
| 5036 | - switch (size) { | ||
| 5037 | - case 0: gen_op_neon_widen_high_u8(); break; | ||
| 5038 | - case 1: gen_op_neon_widen_high_u16(); break; | ||
| 5039 | - case 2: | ||
| 5040 | - gen_op_movl_T1_T0(); | ||
| 5041 | - gen_op_movl_T0_im(0); | ||
| 5042 | - break; | ||
| 5043 | - default: return 1; | ||
| 5044 | - } | ||
| 5045 | - NEON_SET_REG(T0, rd, pass * 2); | ||
| 5046 | - NEON_SET_REG(T1, rd, pass * 2 + 1); | 5061 | + if (pass == 1) |
| 5062 | + tmp = tmp2; | ||
| 5063 | + gen_neon_widen(cpu_V0, tmp, size, 1); | ||
| 5064 | + neon_store_reg64(cpu_V0, rd + pass); | ||
| 5047 | } | 5065 | } |
| 5048 | break; | 5066 | break; |
| 5049 | default: | 5067 | default: |
| @@ -5068,37 +5086,18 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) | @@ -5068,37 +5086,18 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) | ||
| 5068 | return 1; | 5086 | return 1; |
| 5069 | gen_rev16(cpu_T[0]); | 5087 | gen_rev16(cpu_T[0]); |
| 5070 | break; | 5088 | break; |
| 5071 | - case 4: case 5: /* VPADDL */ | ||
| 5072 | - case 12: case 13: /* VPADAL */ | ||
| 5073 | - switch ((size << 1) | (op & 1)) { | ||
| 5074 | - case 0: gen_op_neon_paddl_s8(); break; | ||
| 5075 | - case 1: gen_op_neon_paddl_u8(); break; | ||
| 5076 | - case 2: gen_op_neon_paddl_s16(); break; | ||
| 5077 | - case 3: gen_op_neon_paddl_u16(); break; | ||
| 5078 | - default: abort(); | ||
| 5079 | - } | ||
| 5080 | - if (op >= 12) { | ||
| 5081 | - /* Accumulate */ | ||
| 5082 | - NEON_GET_REG(T1, rd, pass); | ||
| 5083 | - switch (size) { | ||
| 5084 | - case 0: gen_op_neon_add_u16(); break; | ||
| 5085 | - case 1: gen_op_addl_T0_T1(); break; | ||
| 5086 | - default: abort(); | ||
| 5087 | - } | ||
| 5088 | - } | ||
| 5089 | - break; | ||
| 5090 | case 8: /* CLS */ | 5089 | case 8: /* CLS */ |
| 5091 | switch (size) { | 5090 | switch (size) { |
| 5092 | - case 0: gen_op_neon_cls_s8(); break; | ||
| 5093 | - case 1: gen_op_neon_cls_s16(); break; | ||
| 5094 | - case 2: gen_op_neon_cls_s32(); break; | 5091 | + case 0: gen_helper_neon_cls_s8(cpu_T[0], cpu_T[0]); break; |
| 5092 | + case 1: gen_helper_neon_cls_s16(cpu_T[0], cpu_T[0]); break; | ||
| 5093 | + case 2: gen_helper_neon_cls_s32(cpu_T[0], cpu_T[0]); break; | ||
| 5095 | default: return 1; | 5094 | default: return 1; |
| 5096 | } | 5095 | } |
| 5097 | break; | 5096 | break; |
| 5098 | case 9: /* CLZ */ | 5097 | case 9: /* CLZ */ |
| 5099 | switch (size) { | 5098 | switch (size) { |
| 5100 | - case 0: gen_op_neon_clz_u8(); break; | ||
| 5101 | - case 1: gen_op_neon_clz_u16(); break; | 5099 | + case 0: gen_helper_neon_clz_u8(cpu_T[0], cpu_T[0]); break; |
| 5100 | + case 1: gen_helper_neon_clz_u16(cpu_T[0], cpu_T[0]); break; | ||
| 5102 | case 2: gen_helper_clz(cpu_T[0], cpu_T[0]); break; | 5101 | case 2: gen_helper_clz(cpu_T[0], cpu_T[0]); break; |
| 5103 | default: return 1; | 5102 | default: return 1; |
| 5104 | } | 5103 | } |
| @@ -5106,7 +5105,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) | @@ -5106,7 +5105,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) | ||
| 5106 | case 10: /* CNT */ | 5105 | case 10: /* CNT */ |
| 5107 | if (size != 0) | 5106 | if (size != 0) |
| 5108 | return 1; | 5107 | return 1; |
| 5109 | - gen_op_neon_cnt_u8(); | 5108 | + gen_helper_neon_cnt_u8(cpu_T[0], cpu_T[0]); |
| 5110 | break; | 5109 | break; |
| 5111 | case 11: /* VNOT */ | 5110 | case 11: /* VNOT */ |
| 5112 | if (size != 0) | 5111 | if (size != 0) |
| @@ -5115,26 +5114,26 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) | @@ -5115,26 +5114,26 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) | ||
| 5115 | break; | 5114 | break; |
| 5116 | case 14: /* VQABS */ | 5115 | case 14: /* VQABS */ |
| 5117 | switch (size) { | 5116 | switch (size) { |
| 5118 | - case 0: gen_op_neon_qabs_s8(); break; | ||
| 5119 | - case 1: gen_op_neon_qabs_s16(); break; | ||
| 5120 | - case 2: gen_op_neon_qabs_s32(); break; | 5117 | + case 0: gen_helper_neon_qabs_s8(cpu_T[0], cpu_env, cpu_T[0]); break; |
| 5118 | + case 1: gen_helper_neon_qabs_s16(cpu_T[0], cpu_env, cpu_T[0]); break; | ||
| 5119 | + case 2: gen_helper_neon_qabs_s32(cpu_T[0], cpu_env, cpu_T[0]); break; | ||
| 5121 | default: return 1; | 5120 | default: return 1; |
| 5122 | } | 5121 | } |
| 5123 | break; | 5122 | break; |
| 5124 | case 15: /* VQNEG */ | 5123 | case 15: /* VQNEG */ |
| 5125 | switch (size) { | 5124 | switch (size) { |
| 5126 | - case 0: gen_op_neon_qneg_s8(); break; | ||
| 5127 | - case 1: gen_op_neon_qneg_s16(); break; | ||
| 5128 | - case 2: gen_op_neon_qneg_s32(); break; | 5125 | + case 0: gen_helper_neon_qneg_s8(cpu_T[0], cpu_env, cpu_T[0]); break; |
| 5126 | + case 1: gen_helper_neon_qneg_s16(cpu_T[0], cpu_env, cpu_T[0]); break; | ||
| 5127 | + case 2: gen_helper_neon_qneg_s32(cpu_T[0], cpu_env, cpu_T[0]); break; | ||
| 5129 | default: return 1; | 5128 | default: return 1; |
| 5130 | } | 5129 | } |
| 5131 | break; | 5130 | break; |
| 5132 | case 16: case 19: /* VCGT #0, VCLE #0 */ | 5131 | case 16: case 19: /* VCGT #0, VCLE #0 */ |
| 5133 | gen_op_movl_T1_im(0); | 5132 | gen_op_movl_T1_im(0); |
| 5134 | switch(size) { | 5133 | switch(size) { |
| 5135 | - case 0: gen_op_neon_cgt_s8(); break; | ||
| 5136 | - case 1: gen_op_neon_cgt_s16(); break; | ||
| 5137 | - case 2: gen_op_neon_cgt_s32(); break; | 5134 | + case 0: gen_helper_neon_cgt_s8(CPU_T001); break; |
| 5135 | + case 1: gen_helper_neon_cgt_s16(CPU_T001); break; | ||
| 5136 | + case 2: gen_helper_neon_cgt_s32(CPU_T001); break; | ||
| 5138 | default: return 1; | 5137 | default: return 1; |
| 5139 | } | 5138 | } |
| 5140 | if (op == 19) | 5139 | if (op == 19) |
| @@ -5143,9 +5142,9 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) | @@ -5143,9 +5142,9 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) | ||
| 5143 | case 17: case 20: /* VCGE #0, VCLT #0 */ | 5142 | case 17: case 20: /* VCGE #0, VCLT #0 */ |
| 5144 | gen_op_movl_T1_im(0); | 5143 | gen_op_movl_T1_im(0); |
| 5145 | switch(size) { | 5144 | switch(size) { |
| 5146 | - case 0: gen_op_neon_cge_s8(); break; | ||
| 5147 | - case 1: gen_op_neon_cge_s16(); break; | ||
| 5148 | - case 2: gen_op_neon_cge_s32(); break; | 5145 | + case 0: gen_helper_neon_cge_s8(CPU_T001); break; |
| 5146 | + case 1: gen_helper_neon_cge_s16(CPU_T001); break; | ||
| 5147 | + case 2: gen_helper_neon_cge_s32(CPU_T001); break; | ||
| 5149 | default: return 1; | 5148 | default: return 1; |
| 5150 | } | 5149 | } |
| 5151 | if (op == 20) | 5150 | if (op == 20) |
| @@ -5154,44 +5153,41 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) | @@ -5154,44 +5153,41 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) | ||
| 5154 | case 18: /* VCEQ #0 */ | 5153 | case 18: /* VCEQ #0 */ |
| 5155 | gen_op_movl_T1_im(0); | 5154 | gen_op_movl_T1_im(0); |
| 5156 | switch(size) { | 5155 | switch(size) { |
| 5157 | - case 0: gen_op_neon_ceq_u8(); break; | ||
| 5158 | - case 1: gen_op_neon_ceq_u16(); break; | ||
| 5159 | - case 2: gen_op_neon_ceq_u32(); break; | 5156 | + case 0: gen_helper_neon_ceq_u8(CPU_T001); break; |
| 5157 | + case 1: gen_helper_neon_ceq_u16(CPU_T001); break; | ||
| 5158 | + case 2: gen_helper_neon_ceq_u32(CPU_T001); break; | ||
| 5160 | default: return 1; | 5159 | default: return 1; |
| 5161 | } | 5160 | } |
| 5162 | break; | 5161 | break; |
| 5163 | case 22: /* VABS */ | 5162 | case 22: /* VABS */ |
| 5164 | switch(size) { | 5163 | switch(size) { |
| 5165 | - case 0: gen_op_neon_abs_s8(); break; | ||
| 5166 | - case 1: gen_op_neon_abs_s16(); break; | ||
| 5167 | - case 2: gen_op_neon_abs_s32(); break; | 5164 | + case 0: gen_helper_neon_abs_s8(cpu_T[0], cpu_T[0]); break; |
| 5165 | + case 1: gen_helper_neon_abs_s16(cpu_T[0], cpu_T[0]); break; | ||
| 5166 | + case 2: tcg_gen_abs_i32(cpu_T[0], cpu_T[0]); break; | ||
| 5168 | default: return 1; | 5167 | default: return 1; |
| 5169 | } | 5168 | } |
| 5170 | break; | 5169 | break; |
| 5171 | case 23: /* VNEG */ | 5170 | case 23: /* VNEG */ |
| 5172 | gen_op_movl_T1_im(0); | 5171 | gen_op_movl_T1_im(0); |
| 5173 | - switch(size) { | ||
| 5174 | - case 0: gen_op_neon_rsb_u8(); break; | ||
| 5175 | - case 1: gen_op_neon_rsb_u16(); break; | ||
| 5176 | - case 2: gen_op_rsbl_T0_T1(); break; | ||
| 5177 | - default: return 1; | ||
| 5178 | - } | 5172 | + if (size == 3) |
| 5173 | + return 1; | ||
| 5174 | + gen_neon_rsb(size); | ||
| 5179 | break; | 5175 | break; |
| 5180 | case 24: case 27: /* Float VCGT #0, Float VCLE #0 */ | 5176 | case 24: case 27: /* Float VCGT #0, Float VCLE #0 */ |
| 5181 | gen_op_movl_T1_im(0); | 5177 | gen_op_movl_T1_im(0); |
| 5182 | - gen_op_neon_cgt_f32(); | 5178 | + gen_helper_neon_cgt_f32(CPU_T001); |
| 5183 | if (op == 27) | 5179 | if (op == 27) |
| 5184 | gen_op_notl_T0(); | 5180 | gen_op_notl_T0(); |
| 5185 | break; | 5181 | break; |
| 5186 | case 25: case 28: /* Float VCGE #0, Float VCLT #0 */ | 5182 | case 25: case 28: /* Float VCGE #0, Float VCLT #0 */ |
| 5187 | gen_op_movl_T1_im(0); | 5183 | gen_op_movl_T1_im(0); |
| 5188 | - gen_op_neon_cge_f32(); | 5184 | + gen_helper_neon_cge_f32(CPU_T001); |
| 5189 | if (op == 28) | 5185 | if (op == 28) |
| 5190 | gen_op_notl_T0(); | 5186 | gen_op_notl_T0(); |
| 5191 | break; | 5187 | break; |
| 5192 | case 26: /* Float VCEQ #0 */ | 5188 | case 26: /* Float VCEQ #0 */ |
| 5193 | gen_op_movl_T1_im(0); | 5189 | gen_op_movl_T1_im(0); |
| 5194 | - gen_op_neon_ceq_f32(); | 5190 | + gen_helper_neon_ceq_f32(CPU_T001); |
| 5195 | break; | 5191 | break; |
| 5196 | case 30: /* Float VABS */ | 5192 | case 30: /* Float VABS */ |
| 5197 | gen_vfp_abs(0); | 5193 | gen_vfp_abs(0); |
| @@ -5206,8 +5202,8 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) | @@ -5206,8 +5202,8 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) | ||
| 5206 | case 33: /* VTRN */ | 5202 | case 33: /* VTRN */ |
| 5207 | NEON_GET_REG(T1, rd, pass); | 5203 | NEON_GET_REG(T1, rd, pass); |
| 5208 | switch (size) { | 5204 | switch (size) { |
| 5209 | - case 0: gen_op_neon_trn_u8(); break; | ||
| 5210 | - case 1: gen_op_neon_trn_u16(); break; | 5205 | + case 0: gen_helper_neon_trn_u8(); break; |
| 5206 | + case 1: gen_helper_neon_trn_u16(); break; | ||
| 5211 | case 2: abort(); | 5207 | case 2: abort(); |
| 5212 | default: return 1; | 5208 | default: return 1; |
| 5213 | } | 5209 | } |
| @@ -5281,12 +5277,12 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) | @@ -5281,12 +5277,12 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) | ||
| 5281 | NEON_SET_REG(T0, rm, 0); | 5277 | NEON_SET_REG(T0, rm, 0); |
| 5282 | } | 5278 | } |
| 5283 | if (insn & (1 << 16)) { | 5279 | if (insn & (1 << 16)) { |
| 5284 | - gen_op_neon_dup_u8(((insn >> 17) & 3) * 8); | 5280 | + gen_neon_dup_u8(cpu_T[0], ((insn >> 17) & 3) * 8); |
| 5285 | } else if (insn & (1 << 17)) { | 5281 | } else if (insn & (1 << 17)) { |
| 5286 | if ((insn >> 18) & 1) | 5282 | if ((insn >> 18) & 1) |
| 5287 | - gen_op_neon_dup_high16(); | 5283 | + gen_neon_dup_high16(cpu_T[0]); |
| 5288 | else | 5284 | else |
| 5289 | - gen_op_neon_dup_low16(); | 5285 | + gen_neon_dup_low16(cpu_T[0]); |
| 5290 | } | 5286 | } |
| 5291 | for (pass = 0; pass < (q ? 4 : 2); pass++) { | 5287 | for (pass = 0; pass < (q ? 4 : 2); pass++) { |
| 5292 | NEON_SET_REG(T0, rd, pass); | 5288 | NEON_SET_REG(T0, rd, pass); |
| @@ -8324,6 +8320,8 @@ static inline int gen_intermediate_code_internal(CPUState *env, | @@ -8324,6 +8320,8 @@ static inline int gen_intermediate_code_internal(CPUState *env, | ||
| 8324 | cpu_F1s = tcg_temp_new(TCG_TYPE_I32); | 8320 | cpu_F1s = tcg_temp_new(TCG_TYPE_I32); |
| 8325 | cpu_F0d = tcg_temp_new(TCG_TYPE_I64); | 8321 | cpu_F0d = tcg_temp_new(TCG_TYPE_I64); |
| 8326 | cpu_F1d = tcg_temp_new(TCG_TYPE_I64); | 8322 | cpu_F1d = tcg_temp_new(TCG_TYPE_I64); |
| 8323 | + cpu_V0 = cpu_F0d; | ||
| 8324 | + cpu_V1 = cpu_F1d; | ||
| 8327 | next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE; | 8325 | next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE; |
| 8328 | lj = -1; | 8326 | lj = -1; |
| 8329 | /* Reset the conditional execution bits immediately. This avoids | 8327 | /* Reset the conditional execution bits immediately. This avoids |