Commit 8142cddda25b8010c0fe2d6bb3aa562a2048b347

Authored by aurel32
1 parent cbfb6ae9

Add vsumsws, vsum2sws, and vsum4{sbs, shs,ubs} instructions.

Signed-off-by: Nathan Froyd <froydnj@codesourcery.com>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>

git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@6189 c046a42c-6fe2-441c-8c8c-71466251a162
target-ppc/helper.h
... ... @@ -191,6 +191,11 @@ DEF_HELPER_2(lvewx, void, avr, tl)
191 191 DEF_HELPER_2(stvebx, void, avr, tl)
192 192 DEF_HELPER_2(stvehx, void, avr, tl)
193 193 DEF_HELPER_2(stvewx, void, avr, tl)
  194 +DEF_HELPER_3(vsumsws, void, avr, avr, avr)
  195 +DEF_HELPER_3(vsum2sws, void, avr, avr, avr)
  196 +DEF_HELPER_3(vsum4sbs, void, avr, avr, avr)
  197 +DEF_HELPER_3(vsum4shs, void, avr, avr, avr)
  198 +DEF_HELPER_3(vsum4ubs, void, avr, avr, avr)
194 199  
195 200 DEF_HELPER_1(efscfsi, i32, i32)
196 201 DEF_HELPER_1(efscfui, i32, i32)
... ...
target-ppc/op_helper.c
... ... @@ -2534,6 +2534,109 @@ void helper_vsubcuw (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2534 2534 }
2535 2535 }
2536 2536  
  2537 +void helper_vsumsws (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
  2538 +{
  2539 + int64_t t;
  2540 + int i, upper;
  2541 + ppc_avr_t result;
  2542 + int sat = 0;
  2543 +
  2544 +#if defined(WORDS_BIGENDIAN)
  2545 + upper = ARRAY_SIZE(r->s32)-1;
  2546 +#else
  2547 + upper = 0;
  2548 +#endif
  2549 + t = (int64_t)b->s32[upper];
  2550 + for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
  2551 + t += a->s32[i];
  2552 + result.s32[i] = 0;
  2553 + }
  2554 + result.s32[upper] = cvtsdsw(t, &sat);
  2555 + *r = result;
  2556 +
  2557 + if (sat) {
  2558 + env->vscr |= (1 << VSCR_SAT);
  2559 + }
  2560 +}
  2561 +
  2562 +void helper_vsum2sws (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
  2563 +{
  2564 + int i, j, upper;
  2565 + ppc_avr_t result;
  2566 + int sat = 0;
  2567 +
  2568 +#if defined(WORDS_BIGENDIAN)
  2569 + upper = 1;
  2570 +#else
  2571 + upper = 0;
  2572 +#endif
  2573 + for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
  2574 + int64_t t = (int64_t)b->s32[upper+i*2];
  2575 + result.u64[i] = 0;
  2576 + for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
  2577 + t += a->s32[2*i+j];
  2578 + }
  2579 + result.s32[upper+i*2] = cvtsdsw(t, &sat);
  2580 + }
  2581 +
  2582 + *r = result;
  2583 + if (sat) {
  2584 + env->vscr |= (1 << VSCR_SAT);
  2585 + }
  2586 +}
  2587 +
  2588 +void helper_vsum4sbs (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
  2589 +{
  2590 + int i, j;
  2591 + int sat = 0;
  2592 +
  2593 + for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
  2594 + int64_t t = (int64_t)b->s32[i];
  2595 + for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
  2596 + t += a->s8[4*i+j];
  2597 + }
  2598 + r->s32[i] = cvtsdsw(t, &sat);
  2599 + }
  2600 +
  2601 + if (sat) {
  2602 + env->vscr |= (1 << VSCR_SAT);
  2603 + }
  2604 +}
  2605 +
  2606 +void helper_vsum4shs (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
  2607 +{
  2608 + int sat = 0;
  2609 + int i;
  2610 +
  2611 + for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
  2612 + int64_t t = (int64_t)b->s32[i];
  2613 + t += a->s16[2*i] + a->s16[2*i+1];
  2614 + r->s32[i] = cvtsdsw(t, &sat);
  2615 + }
  2616 +
  2617 + if (sat) {
  2618 + env->vscr |= (1 << VSCR_SAT);
  2619 + }
  2620 +}
  2621 +
  2622 +void helper_vsum4ubs (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
  2623 +{
  2624 + int i, j;
  2625 + int sat = 0;
  2626 +
  2627 + for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
  2628 + uint64_t t = (uint64_t)b->u32[i];
  2629 + for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
  2630 + t += a->u8[4*i+j];
  2631 + }
  2632 + r->u32[i] = cvtuduw(t, &sat);
  2633 + }
  2634 +
  2635 + if (sat) {
  2636 + env->vscr |= (1 << VSCR_SAT);
  2637 + }
  2638 +}
  2639 +
2537 2640 #if defined(WORDS_BIGENDIAN)
2538 2641 #define UPKHI 1
2539 2642 #define UPKLO 0
... ...
target-ppc/translate.c
... ... @@ -6326,6 +6326,11 @@ GEN_VXFORM(vpkswus, 7, 5);
6326 6326 GEN_VXFORM(vpkshss, 7, 6);
6327 6327 GEN_VXFORM(vpkswss, 7, 7);
6328 6328 GEN_VXFORM(vpkpx, 7, 12);
  6329 +GEN_VXFORM(vsum4ubs, 4, 24);
  6330 +GEN_VXFORM(vsum4sbs, 4, 28);
  6331 +GEN_VXFORM(vsum4shs, 4, 25);
  6332 +GEN_VXFORM(vsum2sws, 4, 26);
  6333 +GEN_VXFORM(vsumsws, 4, 30);
6329 6334  
6330 6335 #define GEN_VXFORM_NOA(name, opc2, opc3) \
6331 6336 GEN_HANDLER(name, 0x04, opc2, opc3, 0x001f0000, PPC_ALTIVEC) \
... ...