Commit 6ddbc6e4cfbfa0937fdebd8aa7b518d8b7fd118b

Authored by pbrook
1 parent 3670669c

ARM TCG conversion 7/16.

git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@4144 c046a42c-6fe2-441c-8c8c-71466251a162
target-arm/helper.c
@@ -1922,3 +1922,248 @@ void cpu_arm_set_cp_io(CPUARMState *env, int cpnum, @@ -1922,3 +1922,248 @@ void cpu_arm_set_cp_io(CPUARMState *env, int cpnum,
1922 } 1922 }
1923 1923
1924 #endif 1924 #endif
  1925 +
  1926 +/* Note that signed overflow is undefined in C. The following routines are
  1927 + careful to use unsigned types where modulo arithmetic is required.
  1928 + Failure to do so _will_ break on newer gcc. */
  1929 +
  1930 +/* Signed saturating arithmetic. */
  1931 +
  1932 +/* Perform 16-bit signed satruating addition. */
  1933 +static inline uint16_t add16_sat(uint16_t a, uint16_t b)
  1934 +{
  1935 + uint16_t res;
  1936 +
  1937 + res = a + b;
  1938 + if (((res ^ a) & 0x8000) && !((a ^ b) & 0x8000)) {
  1939 + if (a & 0x8000)
  1940 + res = 0x8000;
  1941 + else
  1942 + res = 0x7fff;
  1943 + }
  1944 + return res;
  1945 +}
  1946 +
  1947 +/* Perform 8-bit signed satruating addition. */
  1948 +static inline uint8_t add8_sat(uint8_t a, uint8_t b)
  1949 +{
  1950 + uint8_t res;
  1951 +
  1952 + res = a + b;
  1953 + if (((res ^ a) & 0x80) && !((a ^ b) & 0x80)) {
  1954 + if (a & 0x80)
  1955 + res = 0x80;
  1956 + else
  1957 + res = 0x7f;
  1958 + }
  1959 + return res;
  1960 +}
  1961 +
  1962 +/* Perform 16-bit signed satruating subtraction. */
  1963 +static inline uint16_t sub16_sat(uint16_t a, uint16_t b)
  1964 +{
  1965 + uint16_t res;
  1966 +
  1967 + res = a - b;
  1968 + if (((res ^ a) & 0x8000) && ((a ^ b) & 0x8000)) {
  1969 + if (a & 0x8000)
  1970 + res = 0x8000;
  1971 + else
  1972 + res = 0x7fff;
  1973 + }
  1974 + return res;
  1975 +}
  1976 +
  1977 +/* Perform 8-bit signed satruating subtraction. */
  1978 +static inline uint8_t sub8_sat(uint8_t a, uint8_t b)
  1979 +{
  1980 + uint8_t res;
  1981 +
  1982 + res = a - b;
  1983 + if (((res ^ a) & 0x80) && ((a ^ b) & 0x80)) {
  1984 + if (a & 0x80)
  1985 + res = 0x80;
  1986 + else
  1987 + res = 0x7f;
  1988 + }
  1989 + return res;
  1990 +}
  1991 +
  1992 +#define ADD16(a, b, n) RESULT(add16_sat(a, b), n, 16);
  1993 +#define SUB16(a, b, n) RESULT(sub16_sat(a, b), n, 16);
  1994 +#define ADD8(a, b, n) RESULT(add8_sat(a, b), n, 8);
  1995 +#define SUB8(a, b, n) RESULT(sub8_sat(a, b), n, 8);
  1996 +#define PFX q
  1997 +
  1998 +#include "op_addsub.h"
  1999 +
  2000 +/* Unsigned saturating arithmetic. */
  2001 +static inline uint16_t add16_usat(uint16_t a, uint8_t b)
  2002 +{
  2003 + uint16_t res;
  2004 + res = a + b;
  2005 + if (res < a)
  2006 + res = 0xffff;
  2007 + return res;
  2008 +}
  2009 +
  2010 +static inline uint16_t sub16_usat(uint16_t a, uint8_t b)
  2011 +{
  2012 + if (a < b)
  2013 + return a - b;
  2014 + else
  2015 + return 0;
  2016 +}
  2017 +
  2018 +static inline uint8_t add8_usat(uint8_t a, uint8_t b)
  2019 +{
  2020 + uint8_t res;
  2021 + res = a + b;
  2022 + if (res < a)
  2023 + res = 0xff;
  2024 + return res;
  2025 +}
  2026 +
  2027 +static inline uint8_t sub8_usat(uint8_t a, uint8_t b)
  2028 +{
  2029 + if (a < b)
  2030 + return a - b;
  2031 + else
  2032 + return 0;
  2033 +}
  2034 +
  2035 +#define ADD16(a, b, n) RESULT(add16_usat(a, b), n, 16);
  2036 +#define SUB16(a, b, n) RESULT(sub16_usat(a, b), n, 16);
  2037 +#define ADD8(a, b, n) RESULT(add8_usat(a, b), n, 8);
  2038 +#define SUB8(a, b, n) RESULT(sub8_usat(a, b), n, 8);
  2039 +#define PFX uq
  2040 +
  2041 +#include "op_addsub.h"
  2042 +
  2043 +/* Signed modulo arithmetic. */
  2044 +#define SARITH16(a, b, n, op) do { \
  2045 + int32_t sum; \
  2046 + sum = (int16_t)((uint16_t)(a) op (uint16_t)(b)); \
  2047 + RESULT(sum, n, 16); \
  2048 + if (sum >= 0) \
  2049 + ge |= 3 << (n * 2); \
  2050 + } while(0)
  2051 +
  2052 +#define SARITH8(a, b, n, op) do { \
  2053 + int32_t sum; \
  2054 + sum = (int8_t)((uint8_t)(a) op (uint8_t)(b)); \
  2055 + RESULT(sum, n, 8); \
  2056 + if (sum >= 0) \
  2057 + ge |= 1 << n; \
  2058 + } while(0)
  2059 +
  2060 +
  2061 +#define ADD16(a, b, n) SARITH16(a, b, n, +)
  2062 +#define SUB16(a, b, n) SARITH16(a, b, n, -)
  2063 +#define ADD8(a, b, n) SARITH8(a, b, n, +)
  2064 +#define SUB8(a, b, n) SARITH8(a, b, n, -)
  2065 +#define PFX s
  2066 +#define ARITH_GE
  2067 +
  2068 +#include "op_addsub.h"
  2069 +
  2070 +/* Unsigned modulo arithmetic. */
  2071 +#define ADD16(a, b, n) do { \
  2072 + uint32_t sum; \
  2073 + sum = (uint32_t)(uint16_t)(a) + (uint32_t)(uint16_t)(b); \
  2074 + RESULT(sum, n, 16); \
  2075 + if ((sum >> 16) == 0) \
  2076 + ge |= 3 << (n * 2); \
  2077 + } while(0)
  2078 +
  2079 +#define ADD8(a, b, n) do { \
  2080 + uint32_t sum; \
  2081 + sum = (uint32_t)(uint8_t)(a) + (uint32_t)(uint8_t)(b); \
  2082 + RESULT(sum, n, 8); \
  2083 + if ((sum >> 8) == 0) \
  2084 + ge |= 3 << (n * 2); \
  2085 + } while(0)
  2086 +
  2087 +#define SUB16(a, b, n) do { \
  2088 + uint32_t sum; \
  2089 + sum = (uint32_t)(uint16_t)(a) - (uint32_t)(uint16_t)(b); \
  2090 + RESULT(sum, n, 16); \
  2091 + if ((sum >> 16) == 0) \
  2092 + ge |= 3 << (n * 2); \
  2093 + } while(0)
  2094 +
  2095 +#define SUB8(a, b, n) do { \
  2096 + uint32_t sum; \
  2097 + sum = (uint32_t)(uint8_t)(a) - (uint32_t)(uint8_t)(b); \
  2098 + RESULT(sum, n, 8); \
  2099 + if ((sum >> 8) == 0) \
  2100 + ge |= 3 << (n * 2); \
  2101 + } while(0)
  2102 +
  2103 +#define PFX u
  2104 +#define ARITH_GE
  2105 +
  2106 +#include "op_addsub.h"
  2107 +
  2108 +/* Halved signed arithmetic. */
  2109 +#define ADD16(a, b, n) \
  2110 + RESULT(((int32_t)(int16_t)(a) + (int32_t)(int16_t)(b)) >> 1, n, 16)
  2111 +#define SUB16(a, b, n) \
  2112 + RESULT(((int32_t)(int16_t)(a) - (int32_t)(int16_t)(b)) >> 1, n, 16)
  2113 +#define ADD8(a, b, n) \
  2114 + RESULT(((int32_t)(int8_t)(a) + (int32_t)(int8_t)(b)) >> 1, n, 8)
  2115 +#define SUB8(a, b, n) \
  2116 + RESULT(((int32_t)(int8_t)(a) - (int32_t)(int8_t)(b)) >> 1, n, 8)
  2117 +#define PFX sh
  2118 +
  2119 +#include "op_addsub.h"
  2120 +
  2121 +/* Halved unsigned arithmetic. */
  2122 +#define ADD16(a, b, n) \
  2123 + RESULT(((uint32_t)(uint16_t)(a) + (uint32_t)(uint16_t)(b)) >> 1, n, 16)
  2124 +#define SUB16(a, b, n) \
  2125 + RESULT(((uint32_t)(uint16_t)(a) - (uint32_t)(uint16_t)(b)) >> 1, n, 16)
  2126 +#define ADD8(a, b, n) \
  2127 + RESULT(((uint32_t)(uint8_t)(a) + (uint32_t)(uint8_t)(b)) >> 1, n, 8)
  2128 +#define SUB8(a, b, n) \
  2129 + RESULT(((uint32_t)(uint8_t)(a) - (uint32_t)(uint8_t)(b)) >> 1, n, 8)
  2130 +#define PFX uh
  2131 +
  2132 +#include "op_addsub.h"
  2133 +
  2134 +static inline uint8_t do_usad(uint8_t a, uint8_t b)
  2135 +{
  2136 + if (a > b)
  2137 + return a - b;
  2138 + else
  2139 + return b - a;
  2140 +}
  2141 +
  2142 +/* Unsigned sum of absolute byte differences. */
  2143 +uint32_t HELPER(usad8)(uint32_t a, uint32_t b)
  2144 +{
  2145 + uint32_t sum;
  2146 + sum = do_usad(a, b);
  2147 + sum += do_usad(a >> 8, b >> 8);
  2148 + sum += do_usad(a >> 16, b >>16);
  2149 + sum += do_usad(a >> 24, b >> 24);
  2150 + return sum;
  2151 +}
  2152 +
  2153 +/* For ARMv6 SEL instruction. */
  2154 +uint32_t HELPER(sel_flags)(uint32_t flags, uint32_t a, uint32_t b)
  2155 +{
  2156 + uint32_t mask;
  2157 +
  2158 + mask = 0;
  2159 + if (flags & 1)
  2160 + mask |= 0xff;
  2161 + if (flags & 2)
  2162 + mask |= 0xff00;
  2163 + if (flags & 4)
  2164 + mask |= 0xff0000;
  2165 + if (flags & 8)
  2166 + mask |= 0xff000000;
  2167 + return (a & mask) | (b & ~mask);
  2168 +}
  2169 +
target-arm/helpers.h
1 -#define DEF_HELPER(name, ret, args) ret helper_##name args; 1 +#define DEF_HELPER(name, ret, args) ret glue(helper_,name) args;
2 2
3 #ifdef GEN_HELPER 3 #ifdef GEN_HELPER
4 #define DEF_HELPER_1_1(name, ret, args) \ 4 #define DEF_HELPER_1_1(name, ret, args) \
@@ -13,10 +13,18 @@ static inline void gen_helper_##name(TCGv ret, TCGv arg1, TCGv arg2) \ @@ -13,10 +13,18 @@ static inline void gen_helper_##name(TCGv ret, TCGv arg1, TCGv arg2) \
13 { \ 13 { \
14 tcg_gen_helper_1_2(helper_##name, ret, arg1, arg2); \ 14 tcg_gen_helper_1_2(helper_##name, ret, arg1, arg2); \
15 } 15 }
  16 +#define DEF_HELPER_1_3(name, ret, args) \
  17 +DEF_HELPER(name, ret, args) \
  18 +static inline void gen_helper_##name(TCGv ret, \
  19 + TCGv arg1, TCGv arg2, TCGv arg3) \
  20 +{ \
  21 + tcg_gen_helper_1_3(helper_##name, ret, arg1, arg2, arg3); \
  22 +}
16 #else /* !GEN_HELPER */ 23 #else /* !GEN_HELPER */
17 #define DEF_HELPER_1_1 DEF_HELPER 24 #define DEF_HELPER_1_1 DEF_HELPER
18 #define DEF_HELPER_1_2 DEF_HELPER 25 #define DEF_HELPER_1_2 DEF_HELPER
19 -#define HELPER(x) helper_##x 26 +#define DEF_HELPER_1_3 DEF_HELPER
  27 +#define HELPER(x) glue(helper_,x)
20 #endif 28 #endif
21 29
22 DEF_HELPER_1_1(clz, uint32_t, (uint32_t)) 30 DEF_HELPER_1_1(clz, uint32_t, (uint32_t))
@@ -33,6 +41,40 @@ DEF_HELPER_1_2(sdiv, int32_t, (int32_t, int32_t)) @@ -33,6 +41,40 @@ DEF_HELPER_1_2(sdiv, int32_t, (int32_t, int32_t))
33 DEF_HELPER_1_2(udiv, uint32_t, (uint32_t, uint32_t)) 41 DEF_HELPER_1_2(udiv, uint32_t, (uint32_t, uint32_t))
34 DEF_HELPER_1_1(rbit, uint32_t, (uint32_t)) 42 DEF_HELPER_1_1(rbit, uint32_t, (uint32_t))
35 43
  44 +#define PAS_OP(pfx) \
  45 + DEF_HELPER_1_3(pfx ## add8, uint32_t, (uint32_t, uint32_t, uint32_t *)) \
  46 + DEF_HELPER_1_3(pfx ## sub8, uint32_t, (uint32_t, uint32_t, uint32_t *)) \
  47 + DEF_HELPER_1_3(pfx ## sub16, uint32_t, (uint32_t, uint32_t, uint32_t *)) \
  48 + DEF_HELPER_1_3(pfx ## add16, uint32_t, (uint32_t, uint32_t, uint32_t *)) \
  49 + DEF_HELPER_1_3(pfx ## addsubx, uint32_t, (uint32_t, uint32_t, uint32_t *)) \
  50 + DEF_HELPER_1_3(pfx ## subaddx, uint32_t, (uint32_t, uint32_t, uint32_t *))
  51 +
  52 +PAS_OP(s)
  53 +PAS_OP(u)
  54 +#undef PAS_OP
  55 +
  56 +#define PAS_OP(pfx) \
  57 + DEF_HELPER_1_2(pfx ## add8, uint32_t, (uint32_t, uint32_t)) \
  58 + DEF_HELPER_1_2(pfx ## sub8, uint32_t, (uint32_t, uint32_t)) \
  59 + DEF_HELPER_1_2(pfx ## sub16, uint32_t, (uint32_t, uint32_t)) \
  60 + DEF_HELPER_1_2(pfx ## add16, uint32_t, (uint32_t, uint32_t)) \
  61 + DEF_HELPER_1_2(pfx ## addsubx, uint32_t, (uint32_t, uint32_t)) \
  62 + DEF_HELPER_1_2(pfx ## subaddx, uint32_t, (uint32_t, uint32_t))
  63 +PAS_OP(q)
  64 +PAS_OP(sh)
  65 +PAS_OP(uq)
  66 +PAS_OP(uh)
  67 +#undef PAS_OP
  68 +
  69 +DEF_HELPER_1_2(ssat, uint32_t, (uint32_t, uint32_t))
  70 +DEF_HELPER_1_2(usat, uint32_t, (uint32_t, uint32_t))
  71 +DEF_HELPER_1_2(ssat16, uint32_t, (uint32_t, uint32_t))
  72 +DEF_HELPER_1_2(usat16, uint32_t, (uint32_t, uint32_t))
  73 +
  74 +DEF_HELPER_1_2(usad8, uint32_t, (uint32_t, uint32_t))
  75 +
  76 +DEF_HELPER_1_3(sel_flags, uint32_t, (uint32_t, uint32_t, uint32_t))
  77 +
36 #undef DEF_HELPER 78 #undef DEF_HELPER
37 #undef DEF_HELPER_1_1 79 #undef DEF_HELPER_1_1
38 #undef DEF_HELPER_1_2 80 #undef DEF_HELPER_1_2
target-arm/op.c
@@ -805,327 +805,6 @@ void OPPROTO op_movl_user_T0(void) @@ -805,327 +805,6 @@ void OPPROTO op_movl_user_T0(void)
805 FORCE_RET(); 805 FORCE_RET();
806 } 806 }
807 807
808 -/* ARMv6 Media instructions. */  
809 -  
810 -/* Note that signed overflow is undefined in C. The following routines are  
811 - careful to use unsigned types where modulo arithmetic is required.  
812 - Failure to do so _will_ break on newer gcc. */  
813 -  
814 -/* Signed saturating arithmetic. */  
815 -  
816 -/* Perform 16-bit signed satruating addition. */  
817 -static inline uint16_t add16_sat(uint16_t a, uint16_t b)  
818 -{  
819 - uint16_t res;  
820 -  
821 - res = a + b;  
822 - if (((res ^ a) & 0x8000) && !((a ^ b) & 0x8000)) {  
823 - if (a & 0x8000)  
824 - res = 0x8000;  
825 - else  
826 - res = 0x7fff;  
827 - }  
828 - return res;  
829 -}  
830 -  
831 -/* Perform 8-bit signed satruating addition. */  
832 -static inline uint8_t add8_sat(uint8_t a, uint8_t b)  
833 -{  
834 - uint8_t res;  
835 -  
836 - res = a + b;  
837 - if (((res ^ a) & 0x80) && !((a ^ b) & 0x80)) {  
838 - if (a & 0x80)  
839 - res = 0x80;  
840 - else  
841 - res = 0x7f;  
842 - }  
843 - return res;  
844 -}  
845 -  
846 -/* Perform 16-bit signed satruating subtraction. */  
847 -static inline uint16_t sub16_sat(uint16_t a, uint16_t b)  
848 -{  
849 - uint16_t res;  
850 -  
851 - res = a - b;  
852 - if (((res ^ a) & 0x8000) && ((a ^ b) & 0x8000)) {  
853 - if (a & 0x8000)  
854 - res = 0x8000;  
855 - else  
856 - res = 0x7fff;  
857 - }  
858 - return res;  
859 -}  
860 -  
861 -/* Perform 8-bit signed satruating subtraction. */  
862 -static inline uint8_t sub8_sat(uint8_t a, uint8_t b)  
863 -{  
864 - uint8_t res;  
865 -  
866 - res = a - b;  
867 - if (((res ^ a) & 0x80) && ((a ^ b) & 0x80)) {  
868 - if (a & 0x80)  
869 - res = 0x80;  
870 - else  
871 - res = 0x7f;  
872 - }  
873 - return res;  
874 -}  
875 -  
876 -#define ADD16(a, b, n) RESULT(add16_sat(a, b), n, 16);  
877 -#define SUB16(a, b, n) RESULT(sub16_sat(a, b), n, 16);  
878 -#define ADD8(a, b, n) RESULT(add8_sat(a, b), n, 8);  
879 -#define SUB8(a, b, n) RESULT(sub8_sat(a, b), n, 8);  
880 -#define PFX q  
881 -  
882 -#include "op_addsub.h"  
883 -  
884 -/* Unsigned saturating arithmetic. */  
885 -static inline uint16_t add16_usat(uint16_t a, uint8_t b)  
886 -{  
887 - uint16_t res;  
888 - res = a + b;  
889 - if (res < a)  
890 - res = 0xffff;  
891 - return res;  
892 -}  
893 -  
894 -static inline uint16_t sub16_usat(uint16_t a, uint8_t b)  
895 -{  
896 - if (a < b)  
897 - return a - b;  
898 - else  
899 - return 0;  
900 -}  
901 -  
902 -static inline uint8_t add8_usat(uint8_t a, uint8_t b)  
903 -{  
904 - uint8_t res;  
905 - res = a + b;  
906 - if (res < a)  
907 - res = 0xff;  
908 - return res;  
909 -}  
910 -  
911 -static inline uint8_t sub8_usat(uint8_t a, uint8_t b)  
912 -{  
913 - if (a < b)  
914 - return a - b;  
915 - else  
916 - return 0;  
917 -}  
918 -  
919 -#define ADD16(a, b, n) RESULT(add16_usat(a, b), n, 16);  
920 -#define SUB16(a, b, n) RESULT(sub16_usat(a, b), n, 16);  
921 -#define ADD8(a, b, n) RESULT(add8_usat(a, b), n, 8);  
922 -#define SUB8(a, b, n) RESULT(sub8_usat(a, b), n, 8);  
923 -#define PFX uq  
924 -  
925 -#include "op_addsub.h"  
926 -  
927 -/* Signed modulo arithmetic. */  
928 -#define SARITH16(a, b, n, op) do { \  
929 - int32_t sum; \  
930 - sum = (int16_t)((uint16_t)(a) op (uint16_t)(b)); \  
931 - RESULT(sum, n, 16); \  
932 - if (sum >= 0) \  
933 - ge |= 3 << (n * 2); \  
934 - } while(0)  
935 -  
936 -#define SARITH8(a, b, n, op) do { \  
937 - int32_t sum; \  
938 - sum = (int8_t)((uint8_t)(a) op (uint8_t)(b)); \  
939 - RESULT(sum, n, 8); \  
940 - if (sum >= 0) \  
941 - ge |= 1 << n; \  
942 - } while(0)  
943 -  
944 -  
945 -#define ADD16(a, b, n) SARITH16(a, b, n, +)  
946 -#define SUB16(a, b, n) SARITH16(a, b, n, -)  
947 -#define ADD8(a, b, n) SARITH8(a, b, n, +)  
948 -#define SUB8(a, b, n) SARITH8(a, b, n, -)  
949 -#define PFX s  
950 -#define ARITH_GE  
951 -  
952 -#include "op_addsub.h"  
953 -  
954 -/* Unsigned modulo arithmetic. */  
955 -#define ADD16(a, b, n) do { \  
956 - uint32_t sum; \  
957 - sum = (uint32_t)(uint16_t)(a) + (uint32_t)(uint16_t)(b); \  
958 - RESULT(sum, n, 16); \  
959 - if ((sum >> 16) == 0) \  
960 - ge |= 3 << (n * 2); \  
961 - } while(0)  
962 -  
963 -#define ADD8(a, b, n) do { \  
964 - uint32_t sum; \  
965 - sum = (uint32_t)(uint8_t)(a) + (uint32_t)(uint8_t)(b); \  
966 - RESULT(sum, n, 8); \  
967 - if ((sum >> 8) == 0) \  
968 - ge |= 3 << (n * 2); \  
969 - } while(0)  
970 -  
971 -#define SUB16(a, b, n) do { \  
972 - uint32_t sum; \  
973 - sum = (uint32_t)(uint16_t)(a) - (uint32_t)(uint16_t)(b); \  
974 - RESULT(sum, n, 16); \  
975 - if ((sum >> 16) == 0) \  
976 - ge |= 3 << (n * 2); \  
977 - } while(0)  
978 -  
979 -#define SUB8(a, b, n) do { \  
980 - uint32_t sum; \  
981 - sum = (uint32_t)(uint8_t)(a) - (uint32_t)(uint8_t)(b); \  
982 - RESULT(sum, n, 8); \  
983 - if ((sum >> 8) == 0) \  
984 - ge |= 3 << (n * 2); \  
985 - } while(0)  
986 -  
987 -#define PFX u  
988 -#define ARITH_GE  
989 -  
990 -#include "op_addsub.h"  
991 -  
992 -/* Halved signed arithmetic. */  
993 -#define ADD16(a, b, n) \  
994 - RESULT(((int32_t)(int16_t)(a) + (int32_t)(int16_t)(b)) >> 1, n, 16)  
995 -#define SUB16(a, b, n) \  
996 - RESULT(((int32_t)(int16_t)(a) - (int32_t)(int16_t)(b)) >> 1, n, 16)  
997 -#define ADD8(a, b, n) \  
998 - RESULT(((int32_t)(int8_t)(a) + (int32_t)(int8_t)(b)) >> 1, n, 8)  
999 -#define SUB8(a, b, n) \  
1000 - RESULT(((int32_t)(int8_t)(a) - (int32_t)(int8_t)(b)) >> 1, n, 8)  
1001 -#define PFX sh  
1002 -  
1003 -#include "op_addsub.h"  
1004 -  
1005 -/* Halved unsigned arithmetic. */  
1006 -#define ADD16(a, b, n) \  
1007 - RESULT(((uint32_t)(uint16_t)(a) + (uint32_t)(uint16_t)(b)) >> 1, n, 16)  
1008 -#define SUB16(a, b, n) \  
1009 - RESULT(((uint32_t)(uint16_t)(a) - (uint32_t)(uint16_t)(b)) >> 1, n, 16)  
1010 -#define ADD8(a, b, n) \  
1011 - RESULT(((uint32_t)(uint8_t)(a) + (uint32_t)(uint8_t)(b)) >> 1, n, 8)  
1012 -#define SUB8(a, b, n) \  
1013 - RESULT(((uint32_t)(uint8_t)(a) - (uint32_t)(uint8_t)(b)) >> 1, n, 8)  
1014 -#define PFX uh  
1015 -  
1016 -#include "op_addsub.h"  
1017 -  
1018 -void OPPROTO op_sel_T0_T1(void)  
1019 -{  
1020 - uint32_t mask;  
1021 - uint32_t flags;  
1022 -  
1023 - flags = env->GE;  
1024 - mask = 0;  
1025 - if (flags & 1)  
1026 - mask |= 0xff;  
1027 - if (flags & 2)  
1028 - mask |= 0xff00;  
1029 - if (flags & 4)  
1030 - mask |= 0xff0000;  
1031 - if (flags & 8)  
1032 - mask |= 0xff000000;  
1033 - T0 = (T0 & mask) | (T1 & ~mask);  
1034 - FORCE_RET();  
1035 -}  
1036 -  
1037 -/* Signed saturation. */  
1038 -static inline uint32_t do_ssat(int32_t val, int shift)  
1039 -{  
1040 - int32_t top;  
1041 - uint32_t mask;  
1042 -  
1043 - shift = PARAM1;  
1044 - top = val >> shift;  
1045 - mask = (1u << shift) - 1;  
1046 - if (top > 0) {  
1047 - env->QF = 1;  
1048 - return mask;  
1049 - } else if (top < -1) {  
1050 - env->QF = 1;  
1051 - return ~mask;  
1052 - }  
1053 - return val;  
1054 -}  
1055 -  
1056 -/* Unsigned saturation. */  
1057 -static inline uint32_t do_usat(int32_t val, int shift)  
1058 -{  
1059 - uint32_t max;  
1060 -  
1061 - shift = PARAM1;  
1062 - max = (1u << shift) - 1;  
1063 - if (val < 0) {  
1064 - env->QF = 1;  
1065 - return 0;  
1066 - } else if (val > max) {  
1067 - env->QF = 1;  
1068 - return max;  
1069 - }  
1070 - return val;  
1071 -}  
1072 -  
1073 -/* Signed saturate. */  
1074 -void OPPROTO op_ssat_T1(void)  
1075 -{  
1076 - T0 = do_ssat(T0, PARAM1);  
1077 - FORCE_RET();  
1078 -}  
1079 -  
1080 -/* Dual halfword signed saturate. */  
1081 -void OPPROTO op_ssat16_T1(void)  
1082 -{  
1083 - uint32_t res;  
1084 -  
1085 - res = (uint16_t)do_ssat((int16_t)T0, PARAM1);  
1086 - res |= do_ssat(((int32_t)T0) >> 16, PARAM1) << 16;  
1087 - T0 = res;  
1088 - FORCE_RET();  
1089 -}  
1090 -  
1091 -/* Unsigned saturate. */  
1092 -void OPPROTO op_usat_T1(void)  
1093 -{  
1094 - T0 = do_usat(T0, PARAM1);  
1095 - FORCE_RET();  
1096 -}  
1097 -  
1098 -/* Dual halfword unsigned saturate. */  
1099 -void OPPROTO op_usat16_T1(void)  
1100 -{  
1101 - uint32_t res;  
1102 -  
1103 - res = (uint16_t)do_usat((int16_t)T0, PARAM1);  
1104 - res |= do_usat(((int32_t)T0) >> 16, PARAM1) << 16;  
1105 - T0 = res;  
1106 - FORCE_RET();  
1107 -}  
1108 -  
1109 -/* Dual 16-bit add. */  
1110 -static inline uint8_t do_usad(uint8_t a, uint8_t b)  
1111 -{  
1112 - if (a > b)  
1113 - return a - b;  
1114 - else  
1115 - return b - a;  
1116 -}  
1117 -  
1118 -/* Unsigned sum of absolute byte differences. */  
1119 -void OPPROTO op_usad8_T0_T1(void)  
1120 -{  
1121 - uint32_t sum;  
1122 - sum = do_usad(T0, T1);  
1123 - sum += do_usad(T0 >> 8, T1 >> 8);  
1124 - sum += do_usad(T0 >> 16, T1 >>16);  
1125 - sum += do_usad(T0 >> 24, T1 >> 24);  
1126 - T0 = sum;  
1127 -}  
1128 -  
1129 void OPPROTO op_movl_T1_r13_banked(void) 808 void OPPROTO op_movl_T1_r13_banked(void)
1130 { 809 {
1131 T1 = helper_get_r13_banked(env, PARAM1); 810 T1 = helper_get_r13_banked(env, PARAM1);
target-arm/op_addsub.h
@@ -8,9 +8,11 @@ @@ -8,9 +8,11 @@
8 */ 8 */
9 9
10 #ifdef ARITH_GE 10 #ifdef ARITH_GE
  11 +#define GE_ARG , uint32_t *gep
11 #define DECLARE_GE uint32_t ge = 0 12 #define DECLARE_GE uint32_t ge = 0
12 -#define SET_GE env->GE = ge 13 +#define SET_GE *gep = ge
13 #else 14 #else
  15 +#define GE_ARG
14 #define DECLARE_GE do{}while(0) 16 #define DECLARE_GE do{}while(0)
15 #define SET_GE do{}while(0) 17 #define SET_GE do{}while(0)
16 #endif 18 #endif
@@ -18,82 +20,77 @@ @@ -18,82 +20,77 @@
18 #define RESULT(val, n, width) \ 20 #define RESULT(val, n, width) \
19 res |= ((uint32_t)(glue(glue(uint,width),_t))(val)) << (n * width) 21 res |= ((uint32_t)(glue(glue(uint,width),_t))(val)) << (n * width)
20 22
21 -void OPPROTO glue(glue(op_,PFX),add16_T0_T1)(void) 23 +uint32_t HELPER(glue(PFX,add16))(uint32_t a, uint32_t b GE_ARG)
22 { 24 {
23 uint32_t res = 0; 25 uint32_t res = 0;
24 DECLARE_GE; 26 DECLARE_GE;
25 27
26 - ADD16(T0, T1, 0);  
27 - ADD16(T0 >> 16, T1 >> 16, 1); 28 + ADD16(a, b, 0);
  29 + ADD16(a >> 16, b >> 16, 1);
28 SET_GE; 30 SET_GE;
29 - T0 = res;  
30 - FORCE_RET(); 31 + return res;
31 } 32 }
32 33
33 -void OPPROTO glue(glue(op_,PFX),add8_T0_T1)(void) 34 +uint32_t HELPER(glue(PFX,add8))(uint32_t a, uint32_t b GE_ARG)
34 { 35 {
35 uint32_t res = 0; 36 uint32_t res = 0;
36 DECLARE_GE; 37 DECLARE_GE;
37 38
38 - ADD8(T0, T1, 0);  
39 - ADD8(T0 >> 8, T1 >> 8, 1);  
40 - ADD8(T0 >> 16, T1 >> 16, 2);  
41 - ADD8(T0 >> 24, T1 >> 24, 3); 39 + ADD8(a, b, 0);
  40 + ADD8(a >> 8, b >> 8, 1);
  41 + ADD8(a >> 16, b >> 16, 2);
  42 + ADD8(a >> 24, b >> 24, 3);
42 SET_GE; 43 SET_GE;
43 - T0 = res;  
44 - FORCE_RET(); 44 + return res;
45 } 45 }
46 46
47 -void OPPROTO glue(glue(op_,PFX),sub16_T0_T1)(void) 47 +uint32_t HELPER(glue(PFX,sub16))(uint32_t a, uint32_t b GE_ARG)
48 { 48 {
49 uint32_t res = 0; 49 uint32_t res = 0;
50 DECLARE_GE; 50 DECLARE_GE;
51 51
52 - SUB16(T0, T1, 0);  
53 - SUB16(T0 >> 16, T1 >> 16, 1); 52 + SUB16(a, b, 0);
  53 + SUB16(a >> 16, b >> 16, 1);
54 SET_GE; 54 SET_GE;
55 - T0 = res;  
56 - FORCE_RET(); 55 + return res;
57 } 56 }
58 57
59 -void OPPROTO glue(glue(op_,PFX),sub8_T0_T1)(void) 58 +uint32_t HELPER(glue(PFX,sub8))(uint32_t a, uint32_t b GE_ARG)
60 { 59 {
61 uint32_t res = 0; 60 uint32_t res = 0;
62 DECLARE_GE; 61 DECLARE_GE;
63 62
64 - SUB8(T0, T1, 0);  
65 - SUB8(T0 >> 8, T1 >> 8, 1);  
66 - SUB8(T0 >> 16, T1 >> 16, 2);  
67 - SUB8(T0 >> 24, T1 >> 24, 3); 63 + SUB8(a, b, 0);
  64 + SUB8(a >> 8, b >> 8, 1);
  65 + SUB8(a >> 16, b >> 16, 2);
  66 + SUB8(a >> 24, b >> 24, 3);
68 SET_GE; 67 SET_GE;
69 - T0 = res;  
70 - FORCE_RET(); 68 + return res;
71 } 69 }
72 70
73 -void OPPROTO glue(glue(op_,PFX),subaddx_T0_T1)(void) 71 +uint32_t HELPER(glue(PFX,subaddx))(uint32_t a, uint32_t b GE_ARG)
74 { 72 {
75 uint32_t res = 0; 73 uint32_t res = 0;
76 DECLARE_GE; 74 DECLARE_GE;
77 75
78 - ADD16(T0, T1, 0);  
79 - SUB16(T0 >> 16, T1 >> 16, 1); 76 + ADD16(a, b, 0);
  77 + SUB16(a >> 16, b >> 16, 1);
80 SET_GE; 78 SET_GE;
81 - T0 = res;  
82 - FORCE_RET(); 79 + return res;
83 } 80 }
84 81
85 -void OPPROTO glue(glue(op_,PFX),addsubx_T0_T1)(void) 82 +uint32_t HELPER(glue(PFX,addsubx))(uint32_t a, uint32_t b GE_ARG)
86 { 83 {
87 uint32_t res = 0; 84 uint32_t res = 0;
88 DECLARE_GE; 85 DECLARE_GE;
89 86
90 - SUB16(T0, T1, 0);  
91 - ADD16(T0 >> 16, T1 >> 16, 1); 87 + SUB16(a, b, 0);
  88 + ADD16(a >> 16, b >> 16, 1);
92 SET_GE; 89 SET_GE;
93 - T0 = res;  
94 - FORCE_RET(); 90 + return res;
95 } 91 }
96 92
  93 +#undef GE_ARG
97 #undef DECLARE_GE 94 #undef DECLARE_GE
98 #undef SET_GE 95 #undef SET_GE
99 #undef RESULT 96 #undef RESULT
target-arm/op_helper.c
@@ -369,3 +369,70 @@ uint32_t HELPER(sub_usaturate)(uint32_t a, uint32_t b) @@ -369,3 +369,70 @@ uint32_t HELPER(sub_usaturate)(uint32_t a, uint32_t b)
369 return res; 369 return res;
370 } 370 }
371 371
  372 +/* Signed saturation. */
  373 +static inline uint32_t do_ssat(int32_t val, int shift)
  374 +{
  375 + int32_t top;
  376 + uint32_t mask;
  377 +
  378 + shift = PARAM1;
  379 + top = val >> shift;
  380 + mask = (1u << shift) - 1;
  381 + if (top > 0) {
  382 + env->QF = 1;
  383 + return mask;
  384 + } else if (top < -1) {
  385 + env->QF = 1;
  386 + return ~mask;
  387 + }
  388 + return val;
  389 +}
  390 +
  391 +/* Unsigned saturation. */
  392 +static inline uint32_t do_usat(int32_t val, int shift)
  393 +{
  394 + uint32_t max;
  395 +
  396 + shift = PARAM1;
  397 + max = (1u << shift) - 1;
  398 + if (val < 0) {
  399 + env->QF = 1;
  400 + return 0;
  401 + } else if (val > max) {
  402 + env->QF = 1;
  403 + return max;
  404 + }
  405 + return val;
  406 +}
  407 +
  408 +/* Signed saturate. */
  409 +uint32_t HELPER(ssat)(uint32_t x, uint32_t shift)
  410 +{
  411 + return do_ssat(x, shift);
  412 +}
  413 +
  414 +/* Dual halfword signed saturate. */
  415 +uint32_t HELPER(ssat16)(uint32_t x, uint32_t shift)
  416 +{
  417 + uint32_t res;
  418 +
  419 + res = (uint16_t)do_ssat((int16_t)x, shift);
  420 + res |= do_ssat(((int32_t)x) >> 16, shift) << 16;
  421 + return res;
  422 +}
  423 +
  424 +/* Unsigned saturate. */
  425 +uint32_t HELPER(usat)(uint32_t x, uint32_t shift)
  426 +{
  427 + return do_usat(x, shift);
  428 +}
  429 +
  430 +/* Dual halfword unsigned saturate. */
  431 +uint32_t HELPER(usat16)(uint32_t x, uint32_t shift)
  432 +{
  433 + uint32_t res;
  434 +
  435 + res = (uint16_t)do_usat((int16_t)x, shift);
  436 + res |= do_usat(((int32_t)x) >> 16, shift) << 16;
  437 + return res;
  438 +}
target-arm/translate.c
@@ -226,7 +226,6 @@ static void gen_smul_dual(TCGv a, TCGv b) @@ -226,7 +226,6 @@ static void gen_smul_dual(TCGv a, TCGv b)
226 { 226 {
227 TCGv tmp1 = new_tmp(); 227 TCGv tmp1 = new_tmp();
228 TCGv tmp2 = new_tmp(); 228 TCGv tmp2 = new_tmp();
229 - TCGv res;  
230 tcg_gen_ext8s_i32(tmp1, a); 229 tcg_gen_ext8s_i32(tmp1, a);
231 tcg_gen_ext8s_i32(tmp2, b); 230 tcg_gen_ext8s_i32(tmp2, b);
232 tcg_gen_mul_i32(tmp1, tmp1, tmp2); 231 tcg_gen_mul_i32(tmp1, tmp1, tmp2);
@@ -495,49 +494,93 @@ static inline void gen_arm_shift_im(TCGv var, int shiftop, int shift, int flags) @@ -495,49 +494,93 @@ static inline void gen_arm_shift_im(TCGv var, int shiftop, int shift, int flags)
495 } 494 }
496 }; 495 };
497 496
498 -#define PAS_OP(pfx) { \  
499 - gen_op_ ## pfx ## add16_T0_T1, \  
500 - gen_op_ ## pfx ## addsubx_T0_T1, \  
501 - gen_op_ ## pfx ## subaddx_T0_T1, \  
502 - gen_op_ ## pfx ## sub16_T0_T1, \  
503 - gen_op_ ## pfx ## add8_T0_T1, \  
504 - NULL, \  
505 - NULL, \  
506 - gen_op_ ## pfx ## sub8_T0_T1 }  
507 -  
508 -static GenOpFunc *gen_arm_parallel_addsub[8][8] = {  
509 - {},  
510 - PAS_OP(s),  
511 - PAS_OP(q),  
512 - PAS_OP(sh),  
513 - {},  
514 - PAS_OP(u),  
515 - PAS_OP(uq),  
516 - PAS_OP(uh),  
517 -}; 497 +#define PAS_OP(pfx) \
  498 + switch (op2) { \
  499 + case 0: gen_pas_helper(glue(pfx,add16)); break; \
  500 + case 1: gen_pas_helper(glue(pfx,addsubx)); break; \
  501 + case 2: gen_pas_helper(glue(pfx,subaddx)); break; \
  502 + case 3: gen_pas_helper(glue(pfx,sub16)); break; \
  503 + case 4: gen_pas_helper(glue(pfx,add8)); break; \
  504 + case 7: gen_pas_helper(glue(pfx,sub8)); break; \
  505 + }
  506 +void gen_arm_parallel_addsub(int op1, int op2, TCGv a, TCGv b)
  507 +{
  508 + TCGv tmp;
  509 +
  510 + switch (op1) {
  511 +#define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b, tmp)
  512 + case 1:
  513 + tmp = tcg_temp_new(TCG_TYPE_PTR);
  514 + tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUState, GE));
  515 + PAS_OP(s)
  516 + break;
  517 + case 5:
  518 + tmp = tcg_temp_new(TCG_TYPE_PTR);
  519 + tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUState, GE));
  520 + PAS_OP(u)
  521 + break;
  522 +#undef gen_pas_helper
  523 +#define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b)
  524 + case 2:
  525 + PAS_OP(q);
  526 + break;
  527 + case 3:
  528 + PAS_OP(sh);
  529 + break;
  530 + case 6:
  531 + PAS_OP(uq);
  532 + break;
  533 + case 7:
  534 + PAS_OP(uh);
  535 + break;
  536 +#undef gen_pas_helper
  537 + }
  538 +}
518 #undef PAS_OP 539 #undef PAS_OP
519 540
520 -/* For unknown reasons Arm and Thumb-2 use arbitrarily diffenet encodings. */  
521 -#define PAS_OP(pfx) { \  
522 - gen_op_ ## pfx ## add8_T0_T1, \  
523 - gen_op_ ## pfx ## add16_T0_T1, \  
524 - gen_op_ ## pfx ## addsubx_T0_T1, \  
525 - NULL, \  
526 - gen_op_ ## pfx ## sub8_T0_T1, \  
527 - gen_op_ ## pfx ## sub16_T0_T1, \  
528 - gen_op_ ## pfx ## subaddx_T0_T1, \  
529 - NULL }  
530 -  
531 -static GenOpFunc *gen_thumb2_parallel_addsub[8][8] = {  
532 - PAS_OP(s),  
533 - PAS_OP(q),  
534 - PAS_OP(sh),  
535 - {},  
536 - PAS_OP(u),  
537 - PAS_OP(uq),  
538 - PAS_OP(uh),  
539 - {}  
540 -}; 541 +/* For unknown reasons Arm and Thumb-2 use arbitrarily different encodings. */
  542 +#define PAS_OP(pfx) \
  543 + switch (op2) { \
  544 + case 0: gen_pas_helper(glue(pfx,add8)); break; \
  545 + case 1: gen_pas_helper(glue(pfx,add16)); break; \
  546 + case 2: gen_pas_helper(glue(pfx,addsubx)); break; \
  547 + case 4: gen_pas_helper(glue(pfx,sub8)); break; \
  548 + case 5: gen_pas_helper(glue(pfx,sub16)); break; \
  549 + case 6: gen_pas_helper(glue(pfx,subaddx)); break; \
  550 + }
  551 +void gen_thumb2_parallel_addsub(int op1, int op2, TCGv a, TCGv b)
  552 +{
  553 + TCGv tmp;
  554 +
  555 + switch (op1) {
  556 +#define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b, tmp)
  557 + case 0:
  558 + tmp = tcg_temp_new(TCG_TYPE_PTR);
  559 + tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUState, GE));
  560 + PAS_OP(s)
  561 + break;
  562 + case 4:
  563 + tmp = tcg_temp_new(TCG_TYPE_PTR);
  564 + tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUState, GE));
  565 + PAS_OP(u)
  566 + break;
  567 +#undef gen_pas_helper
  568 +#define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b)
  569 + case 1:
  570 + PAS_OP(q);
  571 + break;
  572 + case 2:
  573 + PAS_OP(sh);
  574 + break;
  575 + case 5:
  576 + PAS_OP(uq);
  577 + break;
  578 + case 6:
  579 + PAS_OP(uh);
  580 + break;
  581 +#undef gen_pas_helper
  582 + }
  583 +}
541 #undef PAS_OP 584 #undef PAS_OP
542 585
543 static GenOpFunc1 *gen_test_cc[14] = { 586 static GenOpFunc1 *gen_test_cc[14] = {
@@ -4906,6 +4949,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) @@ -4906,6 +4949,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
4906 unsigned int cond, insn, val, op1, i, shift, rm, rs, rn, rd, sh; 4949 unsigned int cond, insn, val, op1, i, shift, rm, rs, rn, rd, sh;
4907 TCGv tmp; 4950 TCGv tmp;
4908 TCGv tmp2; 4951 TCGv tmp2;
  4952 + TCGv tmp3;
4909 4953
4910 insn = ldl_code(s->pc); 4954 insn = ldl_code(s->pc);
4911 s->pc += 4; 4955 s->pc += 4;
@@ -5591,13 +5635,14 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) @@ -5591,13 +5635,14 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
5591 switch ((insn >> 23) & 3) { 5635 switch ((insn >> 23) & 3) {
5592 case 0: /* Parallel add/subtract. */ 5636 case 0: /* Parallel add/subtract. */
5593 op1 = (insn >> 20) & 7; 5637 op1 = (insn >> 20) & 7;
5594 - gen_movl_T0_reg(s, rn);  
5595 - gen_movl_T1_reg(s, rm); 5638 + tmp = load_reg(s, rn);
  5639 + tmp2 = load_reg(s, rm);
5596 sh = (insn >> 5) & 7; 5640 sh = (insn >> 5) & 7;
5597 if ((op1 & 3) == 0 || sh == 5 || sh == 6) 5641 if ((op1 & 3) == 0 || sh == 5 || sh == 6)
5598 goto illegal_op; 5642 goto illegal_op;
5599 - gen_arm_parallel_addsub[op1][sh]();  
5600 - gen_movl_reg_T0(s, rd); 5643 + gen_arm_parallel_addsub(op1, sh, tmp, tmp2);
  5644 + dead_tmp(tmp2);
  5645 + store_reg(s, rd, tmp);
5601 break; 5646 break;
5602 case 1: 5647 case 1:
5603 if ((insn & 0x00700020) == 0) { 5648 if ((insn & 0x00700020) == 0) {
@@ -5620,40 +5665,44 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) @@ -5620,40 +5665,44 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
5620 store_reg(s, rd, tmp); 5665 store_reg(s, rd, tmp);
5621 } else if ((insn & 0x00200020) == 0x00200000) { 5666 } else if ((insn & 0x00200020) == 0x00200000) {
5622 /* [us]sat */ 5667 /* [us]sat */
5623 - gen_movl_T1_reg(s, rm); 5668 + tmp = load_reg(s, rm);
5624 shift = (insn >> 7) & 0x1f; 5669 shift = (insn >> 7) & 0x1f;
5625 if (insn & (1 << 6)) { 5670 if (insn & (1 << 6)) {
5626 if (shift == 0) 5671 if (shift == 0)
5627 shift = 31; 5672 shift = 31;
5628 - gen_op_sarl_T1_im(shift); 5673 + tcg_gen_sari_i32(tmp, tmp, shift);
5629 } else { 5674 } else {
5630 - gen_op_shll_T1_im(shift); 5675 + tcg_gen_shli_i32(tmp, tmp, shift);
5631 } 5676 }
5632 sh = (insn >> 16) & 0x1f; 5677 sh = (insn >> 16) & 0x1f;
5633 if (sh != 0) { 5678 if (sh != 0) {
5634 if (insn & (1 << 22)) 5679 if (insn & (1 << 22))
5635 - gen_op_usat_T1(sh); 5680 + gen_helper_usat(tmp, tmp, tcg_const_i32(sh));
5636 else 5681 else
5637 - gen_op_ssat_T1(sh); 5682 + gen_helper_ssat(tmp, tmp, tcg_const_i32(sh));
5638 } 5683 }
5639 - gen_movl_T1_reg(s, rd); 5684 + store_reg(s, rd, tmp);
5640 } else if ((insn & 0x00300fe0) == 0x00200f20) { 5685 } else if ((insn & 0x00300fe0) == 0x00200f20) {
5641 /* [us]sat16 */ 5686 /* [us]sat16 */
5642 - gen_movl_T1_reg(s, rm); 5687 + tmp = load_reg(s, rm);
5643 sh = (insn >> 16) & 0x1f; 5688 sh = (insn >> 16) & 0x1f;
5644 if (sh != 0) { 5689 if (sh != 0) {
5645 if (insn & (1 << 22)) 5690 if (insn & (1 << 22))
5646 - gen_op_usat16_T1(sh); 5691 + gen_helper_usat16(tmp, tmp, tcg_const_i32(sh));
5647 else 5692 else
5648 - gen_op_ssat16_T1(sh); 5693 + gen_helper_ssat16(tmp, tmp, tcg_const_i32(sh));
5649 } 5694 }
5650 - gen_movl_T1_reg(s, rd); 5695 + store_reg(s, rd, tmp);
5651 } else if ((insn & 0x00700fe0) == 0x00000fa0) { 5696 } else if ((insn & 0x00700fe0) == 0x00000fa0) {
5652 /* Select bytes. */ 5697 /* Select bytes. */
5653 - gen_movl_T0_reg(s, rn);  
5654 - gen_movl_T1_reg(s, rm);  
5655 - gen_op_sel_T0_T1();  
5656 - gen_movl_reg_T0(s, rd); 5698 + tmp = load_reg(s, rn);
  5699 + tmp2 = load_reg(s, rm);
  5700 + tmp3 = new_tmp();
  5701 + tcg_gen_ld_i32(tmp3, cpu_env, offsetof(CPUState, GE));
  5702 + gen_helper_sel_flags(tmp, tmp3, tmp, tmp2);
  5703 + dead_tmp(tmp3);
  5704 + dead_tmp(tmp2);
  5705 + store_reg(s, rd, tmp);
5657 } else if ((insn & 0x000003e0) == 0x00000060) { 5706 } else if ((insn & 0x000003e0) == 0x00000060) {
5658 gen_movl_T1_reg(s, rm); 5707 gen_movl_T1_reg(s, rm);
5659 shift = (insn >> 10) & 3; 5708 shift = (insn >> 10) & 3;
@@ -5755,15 +5804,17 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) @@ -5755,15 +5804,17 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
5755 op1 = ((insn >> 17) & 0x38) | ((insn >> 5) & 7); 5804 op1 = ((insn >> 17) & 0x38) | ((insn >> 5) & 7);
5756 switch (op1) { 5805 switch (op1) {
5757 case 0: /* Unsigned sum of absolute differences. */ 5806 case 0: /* Unsigned sum of absolute differences. */
5758 - goto illegal_op;  
5759 - gen_movl_T0_reg(s, rm);  
5760 - gen_movl_T1_reg(s, rs);  
5761 - gen_op_usad8_T0_T1(); 5807 + ARCH(6);
  5808 + tmp = load_reg(s, rm);
  5809 + tmp2 = load_reg(s, rs);
  5810 + gen_helper_usad8(tmp, tmp, tmp2);
  5811 + dead_tmp(tmp2);
5762 if (rn != 15) { 5812 if (rn != 15) {
5763 - gen_movl_T1_reg(s, rn);  
5764 - gen_op_addl_T0_T1(); 5813 + tmp2 = load_reg(s, rn);
  5814 + tcg_gen_add_i32(tmp, tmp, tmp2);
  5815 + dead_tmp(tmp2);
5765 } 5816 }
5766 - gen_movl_reg_T0(s, rd); 5817 + store_reg(s, rd, tmp);
5767 break; 5818 break;
5768 case 0x20: case 0x24: case 0x28: case 0x2c: 5819 case 0x20: case 0x24: case 0x28: case 0x2c:
5769 /* Bitfield insert/clear. */ 5820 /* Bitfield insert/clear. */
@@ -6120,6 +6171,8 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) @@ -6120,6 +6171,8 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
6120 uint32_t insn, imm, shift, offset, addr; 6171 uint32_t insn, imm, shift, offset, addr;
6121 uint32_t rd, rn, rm, rs; 6172 uint32_t rd, rn, rm, rs;
6122 TCGv tmp; 6173 TCGv tmp;
  6174 + TCGv tmp2;
  6175 + TCGv tmp3;
6123 int op; 6176 int op;
6124 int shiftop; 6177 int shiftop;
6125 int conds; 6178 int conds;
@@ -6464,10 +6517,11 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) @@ -6464,10 +6517,11 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
6464 shift = (insn >> 4) & 7; 6517 shift = (insn >> 4) & 7;
6465 if ((op & 3) == 3 || (shift & 3) == 3) 6518 if ((op & 3) == 3 || (shift & 3) == 3)
6466 goto illegal_op; 6519 goto illegal_op;
6467 - gen_movl_T0_reg(s, rn);  
6468 - gen_movl_T1_reg(s, rm);  
6469 - gen_thumb2_parallel_addsub[op][shift]();  
6470 - gen_movl_reg_T0(s, rd); 6520 + tmp = load_reg(s, rn);
  6521 + tmp2 = load_reg(s, rm);
  6522 + gen_thumb2_parallel_addsub(op, shift, tmp, tmp2);
  6523 + dead_tmp(tmp2);
  6524 + store_reg(s, rd, tmp);
6471 break; 6525 break;
6472 case 3: /* Other data processing. */ 6526 case 3: /* Other data processing. */
6473 op = ((insn >> 17) & 0x38) | ((insn >> 4) & 7); 6527 op = ((insn >> 17) & 0x38) | ((insn >> 4) & 7);
@@ -6498,7 +6552,10 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) @@ -6498,7 +6552,10 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
6498 break; 6552 break;
6499 case 0x10: /* sel */ 6553 case 0x10: /* sel */
6500 gen_movl_T1_reg(s, rm); 6554 gen_movl_T1_reg(s, rm);
6501 - gen_op_sel_T0_T1(); 6555 + tmp3 = new_tmp();
  6556 + tcg_gen_ld_i32(tmp3, cpu_env, offsetof(CPUState, GE));
  6557 + gen_helper_sel_flags(cpu_T[0], tmp3, cpu_T[0], cpu_T[1]);
  6558 + dead_tmp(tmp3);
6502 break; 6559 break;
6503 case 0x18: /* clz */ 6560 case 0x18: /* clz */
6504 gen_helper_clz(cpu_T[0], cpu_T[0]); 6561 gen_helper_clz(cpu_T[0], cpu_T[0]);
@@ -6581,7 +6638,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) @@ -6581,7 +6638,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
6581 gen_movl_reg_T0(s, rd); 6638 gen_movl_reg_T0(s, rd);
6582 break; 6639 break;
6583 case 7: /* Unsigned sum of absolute differences. */ 6640 case 7: /* Unsigned sum of absolute differences. */
6584 - gen_op_usad8_T0_T1(); 6641 + gen_helper_usad8(cpu_T[0], cpu_T[0], cpu_T[1]);
6585 if (rs != 15) { 6642 if (rs != 15) {
6586 gen_movl_T1_reg(s, rs); 6643 gen_movl_T1_reg(s, rs);
6587 gen_op_addl_T0_T1(); 6644 gen_op_addl_T0_T1();
@@ -6821,63 +6878,64 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) @@ -6821,63 +6878,64 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
6821 op = (insn >> 21) & 7; 6878 op = (insn >> 21) & 7;
6822 imm = insn & 0x1f; 6879 imm = insn & 0x1f;
6823 shift = ((insn >> 6) & 3) | ((insn >> 10) & 0x1c); 6880 shift = ((insn >> 6) & 3) | ((insn >> 10) & 0x1c);
6824 - if (rn == 15)  
6825 - gen_op_movl_T1_im(0);  
6826 - else  
6827 - gen_movl_T1_reg(s, rn); 6881 + if (rn == 15) {
  6882 + tmp = new_tmp();
  6883 + tcg_gen_movi_i32(tmp, 0);
  6884 + } else {
  6885 + tmp = load_reg(s, rn);
  6886 + }
6828 switch (op) { 6887 switch (op) {
6829 case 2: /* Signed bitfield extract. */ 6888 case 2: /* Signed bitfield extract. */
6830 imm++; 6889 imm++;
6831 if (shift + imm > 32) 6890 if (shift + imm > 32)
6832 goto illegal_op; 6891 goto illegal_op;
6833 if (imm < 32) 6892 if (imm < 32)
6834 - gen_sbfx(cpu_T[1], shift, imm); 6893 + gen_sbfx(tmp, shift, imm);
6835 break; 6894 break;
6836 case 6: /* Unsigned bitfield extract. */ 6895 case 6: /* Unsigned bitfield extract. */
6837 imm++; 6896 imm++;
6838 if (shift + imm > 32) 6897 if (shift + imm > 32)
6839 goto illegal_op; 6898 goto illegal_op;
6840 if (imm < 32) 6899 if (imm < 32)
6841 - gen_ubfx(cpu_T[1], shift, (1u << imm) - 1); 6900 + gen_ubfx(tmp, shift, (1u << imm) - 1);
6842 break; 6901 break;
6843 case 3: /* Bitfield insert/clear. */ 6902 case 3: /* Bitfield insert/clear. */
6844 if (imm < shift) 6903 if (imm < shift)
6845 goto illegal_op; 6904 goto illegal_op;
6846 imm = imm + 1 - shift; 6905 imm = imm + 1 - shift;
6847 if (imm != 32) { 6906 if (imm != 32) {
6848 - gen_movl_T0_reg(s, rd);  
6849 - gen_bfi(cpu_T[1], cpu_T[0], cpu_T[1], 6907 + tmp2 = load_reg(s, rd);
  6908 + gen_bfi(tmp, tmp2, tmp,
6850 shift, ((1u << imm) - 1) << shift); 6909 shift, ((1u << imm) - 1) << shift);
  6910 + dead_tmp(tmp2);
6851 } 6911 }
6852 break; 6912 break;
6853 case 7: 6913 case 7:
6854 goto illegal_op; 6914 goto illegal_op;
6855 default: /* Saturate. */ 6915 default: /* Saturate. */
6856 - gen_movl_T1_reg(s, rn);  
6857 if (shift) { 6916 if (shift) {
6858 if (op & 1) 6917 if (op & 1)
6859 - gen_op_sarl_T1_im(shift); 6918 + tcg_gen_sari_i32(tmp, tmp, shift);
6860 else 6919 else
6861 - gen_op_shll_T1_im(shift); 6920 + tcg_gen_shli_i32(tmp, tmp, shift);
6862 } 6921 }
  6922 + tmp2 = tcg_const_i32(imm);
6863 if (op & 4) { 6923 if (op & 4) {
6864 /* Unsigned. */ 6924 /* Unsigned. */
6865 - gen_op_ssat_T1(imm);  
6866 if ((op & 1) && shift == 0) 6925 if ((op & 1) && shift == 0)
6867 - gen_op_usat16_T1(imm); 6926 + gen_helper_usat16(tmp, tmp, tmp2);
6868 else 6927 else
6869 - gen_op_usat_T1(imm); 6928 + gen_helper_usat(tmp, tmp, tmp2);
6870 } else { 6929 } else {
6871 /* Signed. */ 6930 /* Signed. */
6872 - gen_op_ssat_T1(imm);  
6873 if ((op & 1) && shift == 0) 6931 if ((op & 1) && shift == 0)
6874 - gen_op_ssat16_T1(imm); 6932 + gen_helper_ssat16(tmp, tmp, tmp2);
6875 else 6933 else
6876 - gen_op_ssat_T1(imm); 6934 + gen_helper_ssat(tmp, tmp, tmp2);
6877 } 6935 }
6878 break; 6936 break;
6879 } 6937 }
6880 - gen_movl_reg_T1(s, rd); 6938 + store_reg(s, rd, tmp);
6881 } else { 6939 } else {
6882 imm = ((insn & 0x04000000) >> 15) 6940 imm = ((insn & 0x04000000) >> 15)
6883 | ((insn & 0x7000) >> 4) | (insn & 0xff); 6941 | ((insn & 0x7000) >> 4) | (insn & 0xff);
tcg/tcg-op.h
@@ -237,6 +237,18 @@ static inline void tcg_gen_helper_1_2(void *func, TCGv ret, @@ -237,6 +237,18 @@ static inline void tcg_gen_helper_1_2(void *func, TCGv ret,
237 1, &ret, 2, args); 237 1, &ret, 2, args);
238 } 238 }
239 239
  240 +static inline void tcg_gen_helper_1_3(void *func, TCGv ret,
  241 + TCGv arg1, TCGv arg2, TCGv arg3)
  242 +{
  243 + TCGv args[3];
  244 + args[0] = arg1;
  245 + args[1] = arg2;
  246 + args[2] = arg3;
  247 + tcg_gen_call(&tcg_ctx,
  248 + tcg_const_ptr((tcg_target_long)func), TCG_HELPER_CALL_FLAGS,
  249 + 1, &ret, 3, args);
  250 +}
  251 +
240 static inline void tcg_gen_helper_1_4(void *func, TCGv ret, 252 static inline void tcg_gen_helper_1_4(void *func, TCGv ret,
241 TCGv arg1, TCGv arg2, TCGv arg3, 253 TCGv arg1, TCGv arg2, TCGv arg3,
242 TCGv arg4) 254 TCGv arg4)
@@ -1416,3 +1428,10 @@ static inline void tcg_gen_qemu_st64(TCGv arg, TCGv addr, int mem_index) @@ -1416,3 +1428,10 @@ static inline void tcg_gen_qemu_st64(TCGv arg, TCGv addr, int mem_index)
1416 #define tcg_gen_ext_tl_i64 tcg_gen_ext_i32_i64 1428 #define tcg_gen_ext_tl_i64 tcg_gen_ext_i32_i64
1417 #define tcg_const_tl tcg_const_i32 1429 #define tcg_const_tl tcg_const_i32
1418 #endif 1430 #endif
  1431 +
  1432 +#if TCG_TARGET_REG_BITS == 32
  1433 +#define tcg_gen_addi_ptr tcg_gen_addi_i32
  1434 +#else /* TCG_TARGET_REG_BITS == 32 */
  1435 +#define tcg_gen_addi_ptr tcg_gen_addi_i64
  1436 +#endif /* TCG_TARGET_REG_BITS != 32 */
  1437 +