Commit b10fa3c906a3d48ee58d64663f9d020ef90d897d
1 parent
507563e8
target-mips: optimize gen_muldiv()
Optimize code generation in gen_muldiv(): - Don't do sign extension when the value is already guaranteed to be sign extended (otherwise, results are marked as UNPREDICTABLE). - Access the LO, HI registers directly instead of writting them through a temporary variable. Signed-off-by: Aurelien Jarno <aurelien@aurel32.net> git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@5681 c046a42c-6fe2-441c-8c8c-71466251a162
Showing
1 changed file
with
47 additions
and
115 deletions
target-mips/translate.c
... | ... | @@ -604,27 +604,7 @@ static inline void gen_store_gpr (TCGv t, int reg) |
604 | 604 | tcg_gen_mov_tl(cpu_gpr[reg], t); |
605 | 605 | } |
606 | 606 | |
607 | -/* Moves to/from HI and LO registers. */ | |
608 | -static inline void gen_load_HI (TCGv t, int reg) | |
609 | -{ | |
610 | - tcg_gen_mov_tl(t, cpu_HI[reg]); | |
611 | -} | |
612 | - | |
613 | -static inline void gen_store_HI (TCGv t, int reg) | |
614 | -{ | |
615 | - tcg_gen_mov_tl(cpu_HI[reg], t); | |
616 | -} | |
617 | - | |
618 | -static inline void gen_load_LO (TCGv t, int reg) | |
619 | -{ | |
620 | - tcg_gen_mov_tl(t, cpu_LO[reg]); | |
621 | -} | |
622 | - | |
623 | -static inline void gen_store_LO (TCGv t, int reg) | |
624 | -{ | |
625 | - tcg_gen_mov_tl(cpu_LO[reg], t); | |
626 | -} | |
627 | - | |
607 | +/* Moves to/from ACX register. */ | |
628 | 608 | static inline void gen_load_ACX (TCGv t, int reg) |
629 | 609 | { |
630 | 610 | tcg_gen_mov_tl(t, cpu_ACX[reg]); |
... | ... | @@ -1856,23 +1836,23 @@ static void gen_HILO (DisasContext *ctx, uint32_t opc, int reg) |
1856 | 1836 | } |
1857 | 1837 | switch (opc) { |
1858 | 1838 | case OPC_MFHI: |
1859 | - gen_load_HI(t0, 0); | |
1839 | + tcg_gen_mov_tl(t0, cpu_HI[0]); | |
1860 | 1840 | gen_store_gpr(t0, reg); |
1861 | 1841 | opn = "mfhi"; |
1862 | 1842 | break; |
1863 | 1843 | case OPC_MFLO: |
1864 | - gen_load_LO(t0, 0); | |
1844 | + tcg_gen_mov_tl(t0, cpu_LO[0]); | |
1865 | 1845 | gen_store_gpr(t0, reg); |
1866 | 1846 | opn = "mflo"; |
1867 | 1847 | break; |
1868 | 1848 | case OPC_MTHI: |
1869 | 1849 | gen_load_gpr(t0, reg); |
1870 | - gen_store_HI(t0, 0); | |
1850 | + tcg_gen_mov_tl(cpu_HI[0], t0); | |
1871 | 1851 | opn = "mthi"; |
1872 | 1852 | break; |
1873 | 1853 | case OPC_MTLO: |
1874 | 1854 | gen_load_gpr(t0, reg); |
1875 | - gen_store_LO(t0, 0); | |
1855 | + tcg_gen_mov_tl(cpu_LO[0], t0); | |
1876 | 1856 | opn = "mtlo"; |
1877 | 1857 | break; |
1878 | 1858 | default: |
... | ... | @@ -1899,27 +1879,28 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc, |
1899 | 1879 | { |
1900 | 1880 | int l1 = gen_new_label(); |
1901 | 1881 | |
1902 | - tcg_gen_ext32s_tl(t0, t0); | |
1903 | - tcg_gen_ext32s_tl(t1, t1); | |
1904 | 1882 | tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, l1); |
1905 | 1883 | { |
1906 | - TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I64); | |
1907 | - TCGv r_tmp2 = tcg_temp_new(TCG_TYPE_I64); | |
1908 | - TCGv r_tmp3 = tcg_temp_new(TCG_TYPE_I64); | |
1909 | - | |
1910 | - tcg_gen_ext_tl_i64(r_tmp1, t0); | |
1911 | - tcg_gen_ext_tl_i64(r_tmp2, t1); | |
1912 | - tcg_gen_div_i64(r_tmp3, r_tmp1, r_tmp2); | |
1913 | - tcg_gen_rem_i64(r_tmp2, r_tmp1, r_tmp2); | |
1914 | - tcg_gen_trunc_i64_tl(t0, r_tmp3); | |
1915 | - tcg_gen_trunc_i64_tl(t1, r_tmp2); | |
1884 | + int l2 = gen_new_label(); | |
1885 | + TCGv r_tmp1 = tcg_temp_local_new(TCG_TYPE_I32); | |
1886 | + TCGv r_tmp2 = tcg_temp_local_new(TCG_TYPE_I32); | |
1887 | + TCGv r_tmp3 = tcg_temp_local_new(TCG_TYPE_I32); | |
1888 | + | |
1889 | + tcg_gen_trunc_tl_i32(r_tmp1, t0); | |
1890 | + tcg_gen_trunc_tl_i32(r_tmp2, t1); | |
1891 | + tcg_gen_brcondi_i32(TCG_COND_NE, r_tmp1, -1 << 31, l2); | |
1892 | + tcg_gen_brcondi_i32(TCG_COND_NE, r_tmp2, -1, l2); | |
1893 | + tcg_gen_ext32s_tl(cpu_LO[0], t0); | |
1894 | + tcg_gen_movi_tl(cpu_HI[0], 0); | |
1895 | + tcg_gen_br(l1); | |
1896 | + gen_set_label(l2); | |
1897 | + tcg_gen_div_i32(r_tmp3, r_tmp1, r_tmp2); | |
1898 | + tcg_gen_rem_i32(r_tmp2, r_tmp1, r_tmp2); | |
1899 | + tcg_gen_ext_i32_tl(cpu_LO[0], r_tmp3); | |
1900 | + tcg_gen_ext_i32_tl(cpu_HI[0], r_tmp2); | |
1916 | 1901 | tcg_temp_free(r_tmp1); |
1917 | 1902 | tcg_temp_free(r_tmp2); |
1918 | 1903 | tcg_temp_free(r_tmp3); |
1919 | - tcg_gen_ext32s_tl(t0, t0); | |
1920 | - tcg_gen_ext32s_tl(t1, t1); | |
1921 | - gen_store_LO(t0, 0); | |
1922 | - gen_store_HI(t1, 0); | |
1923 | 1904 | } |
1924 | 1905 | gen_set_label(l1); |
1925 | 1906 | } |
... | ... | @@ -1940,13 +1921,11 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc, |
1940 | 1921 | tcg_gen_trunc_tl_i32(r_tmp2, t1); |
1941 | 1922 | tcg_gen_divu_i32(r_tmp3, r_tmp1, r_tmp2); |
1942 | 1923 | tcg_gen_remu_i32(r_tmp1, r_tmp1, r_tmp2); |
1943 | - tcg_gen_ext_i32_tl(t0, r_tmp3); | |
1944 | - tcg_gen_ext_i32_tl(t1, r_tmp1); | |
1924 | + tcg_gen_ext_i32_tl(cpu_LO[0], r_tmp3); | |
1925 | + tcg_gen_ext_i32_tl(cpu_HI[0], r_tmp1); | |
1945 | 1926 | tcg_temp_free(r_tmp1); |
1946 | 1927 | tcg_temp_free(r_tmp2); |
1947 | 1928 | tcg_temp_free(r_tmp3); |
1948 | - gen_store_LO(t0, 0); | |
1949 | - gen_store_HI(t1, 0); | |
1950 | 1929 | } |
1951 | 1930 | gen_set_label(l1); |
1952 | 1931 | } |
... | ... | @@ -1957,8 +1936,6 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc, |
1957 | 1936 | TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I64); |
1958 | 1937 | TCGv r_tmp2 = tcg_temp_new(TCG_TYPE_I64); |
1959 | 1938 | |
1960 | - tcg_gen_ext32s_tl(t0, t0); | |
1961 | - tcg_gen_ext32s_tl(t1, t1); | |
1962 | 1939 | tcg_gen_ext_tl_i64(r_tmp1, t0); |
1963 | 1940 | tcg_gen_ext_tl_i64(r_tmp2, t1); |
1964 | 1941 | tcg_gen_mul_i64(r_tmp1, r_tmp1, r_tmp2); |
... | ... | @@ -1967,10 +1944,8 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc, |
1967 | 1944 | tcg_gen_shri_i64(r_tmp1, r_tmp1, 32); |
1968 | 1945 | tcg_gen_trunc_i64_tl(t1, r_tmp1); |
1969 | 1946 | tcg_temp_free(r_tmp1); |
1970 | - tcg_gen_ext32s_tl(t0, t0); | |
1971 | - tcg_gen_ext32s_tl(t1, t1); | |
1972 | - gen_store_LO(t0, 0); | |
1973 | - gen_store_HI(t1, 0); | |
1947 | + tcg_gen_ext32s_tl(cpu_LO[0], t0); | |
1948 | + tcg_gen_ext32s_tl(cpu_HI[0], t1); | |
1974 | 1949 | } |
1975 | 1950 | opn = "mult"; |
1976 | 1951 | break; |
... | ... | @@ -1989,10 +1964,8 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc, |
1989 | 1964 | tcg_gen_shri_i64(r_tmp1, r_tmp1, 32); |
1990 | 1965 | tcg_gen_trunc_i64_tl(t1, r_tmp1); |
1991 | 1966 | tcg_temp_free(r_tmp1); |
1992 | - tcg_gen_ext32s_tl(t0, t0); | |
1993 | - tcg_gen_ext32s_tl(t1, t1); | |
1994 | - gen_store_LO(t0, 0); | |
1995 | - gen_store_HI(t1, 0); | |
1967 | + tcg_gen_ext32s_tl(cpu_LO[0], t0); | |
1968 | + tcg_gen_ext32s_tl(cpu_HI[0], t1); | |
1996 | 1969 | } |
1997 | 1970 | opn = "multu"; |
1998 | 1971 | break; |
... | ... | @@ -2007,24 +1980,12 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc, |
2007 | 1980 | |
2008 | 1981 | tcg_gen_brcondi_tl(TCG_COND_NE, t0, -1LL << 63, l2); |
2009 | 1982 | tcg_gen_brcondi_tl(TCG_COND_NE, t1, -1LL, l2); |
2010 | - { | |
2011 | - tcg_gen_movi_tl(t1, 0); | |
2012 | - gen_store_LO(t0, 0); | |
2013 | - gen_store_HI(t1, 0); | |
2014 | - tcg_gen_br(l1); | |
2015 | - } | |
1983 | + tcg_gen_mov_tl(cpu_LO[0], t0); | |
1984 | + tcg_gen_movi_tl(cpu_HI[0], 0); | |
1985 | + tcg_gen_br(l1); | |
2016 | 1986 | gen_set_label(l2); |
2017 | - { | |
2018 | - TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I64); | |
2019 | - TCGv r_tmp2 = tcg_temp_new(TCG_TYPE_I64); | |
2020 | - | |
2021 | - tcg_gen_div_i64(r_tmp1, t0, t1); | |
2022 | - tcg_gen_rem_i64(r_tmp2, t0, t1); | |
2023 | - gen_store_LO(r_tmp1, 0); | |
2024 | - gen_store_HI(r_tmp2, 0); | |
2025 | - tcg_temp_free(r_tmp1); | |
2026 | - tcg_temp_free(r_tmp2); | |
2027 | - } | |
1987 | + tcg_gen_div_i64(cpu_LO[0], t0, t1); | |
1988 | + tcg_gen_rem_i64(cpu_HI[0], t0, t1); | |
2028 | 1989 | } |
2029 | 1990 | gen_set_label(l1); |
2030 | 1991 | } |
... | ... | @@ -2035,17 +1996,8 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc, |
2035 | 1996 | int l1 = gen_new_label(); |
2036 | 1997 | |
2037 | 1998 | tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, l1); |
2038 | - { | |
2039 | - TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I64); | |
2040 | - TCGv r_tmp2 = tcg_temp_new(TCG_TYPE_I64); | |
2041 | - | |
2042 | - tcg_gen_divu_i64(r_tmp1, t0, t1); | |
2043 | - tcg_gen_remu_i64(r_tmp2, t0, t1); | |
2044 | - tcg_temp_free(r_tmp1); | |
2045 | - tcg_temp_free(r_tmp2); | |
2046 | - gen_store_LO(r_tmp1, 0); | |
2047 | - gen_store_HI(r_tmp2, 0); | |
2048 | - } | |
1999 | + tcg_gen_divu_i64(cpu_LO[0], t0, t1); | |
2000 | + tcg_gen_remu_i64(cpu_HI[0], t0, t1); | |
2049 | 2001 | gen_set_label(l1); |
2050 | 2002 | } |
2051 | 2003 | opn = "ddivu"; |
... | ... | @@ -2064,24 +2016,18 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc, |
2064 | 2016 | TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I64); |
2065 | 2017 | TCGv r_tmp2 = tcg_temp_new(TCG_TYPE_I64); |
2066 | 2018 | |
2067 | - tcg_gen_ext32s_tl(t0, t0); | |
2068 | - tcg_gen_ext32s_tl(t1, t1); | |
2069 | 2019 | tcg_gen_ext_tl_i64(r_tmp1, t0); |
2070 | 2020 | tcg_gen_ext_tl_i64(r_tmp2, t1); |
2071 | 2021 | tcg_gen_mul_i64(r_tmp1, r_tmp1, r_tmp2); |
2072 | - gen_load_LO(t0, 0); | |
2073 | - gen_load_HI(t1, 0); | |
2074 | - tcg_gen_concat_tl_i64(r_tmp2, t0, t1); | |
2022 | + tcg_gen_concat_tl_i64(r_tmp2, cpu_LO[0], cpu_HI[0]); | |
2075 | 2023 | tcg_gen_add_i64(r_tmp1, r_tmp1, r_tmp2); |
2076 | 2024 | tcg_temp_free(r_tmp2); |
2077 | 2025 | tcg_gen_trunc_i64_tl(t0, r_tmp1); |
2078 | 2026 | tcg_gen_shri_i64(r_tmp1, r_tmp1, 32); |
2079 | 2027 | tcg_gen_trunc_i64_tl(t1, r_tmp1); |
2080 | 2028 | tcg_temp_free(r_tmp1); |
2081 | - tcg_gen_ext32s_tl(t0, t0); | |
2082 | - tcg_gen_ext32s_tl(t1, t1); | |
2083 | - gen_store_LO(t0, 0); | |
2084 | - gen_store_HI(t1, 0); | |
2029 | + tcg_gen_ext32s_tl(cpu_LO[0], t0); | |
2030 | + tcg_gen_ext32s_tl(cpu_LO[1], t1); | |
2085 | 2031 | } |
2086 | 2032 | opn = "madd"; |
2087 | 2033 | break; |
... | ... | @@ -2095,19 +2041,15 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc, |
2095 | 2041 | tcg_gen_extu_tl_i64(r_tmp1, t0); |
2096 | 2042 | tcg_gen_extu_tl_i64(r_tmp2, t1); |
2097 | 2043 | tcg_gen_mul_i64(r_tmp1, r_tmp1, r_tmp2); |
2098 | - gen_load_LO(t0, 0); | |
2099 | - gen_load_HI(t1, 0); | |
2100 | - tcg_gen_concat_tl_i64(r_tmp2, t0, t1); | |
2044 | + tcg_gen_concat_tl_i64(r_tmp2, cpu_LO[0], cpu_HI[0]); | |
2101 | 2045 | tcg_gen_add_i64(r_tmp1, r_tmp1, r_tmp2); |
2102 | 2046 | tcg_temp_free(r_tmp2); |
2103 | 2047 | tcg_gen_trunc_i64_tl(t0, r_tmp1); |
2104 | 2048 | tcg_gen_shri_i64(r_tmp1, r_tmp1, 32); |
2105 | 2049 | tcg_gen_trunc_i64_tl(t1, r_tmp1); |
2106 | 2050 | tcg_temp_free(r_tmp1); |
2107 | - tcg_gen_ext32s_tl(t0, t0); | |
2108 | - tcg_gen_ext32s_tl(t1, t1); | |
2109 | - gen_store_LO(t0, 0); | |
2110 | - gen_store_HI(t1, 0); | |
2051 | + tcg_gen_ext32s_tl(cpu_LO[0], t0); | |
2052 | + tcg_gen_ext32s_tl(cpu_HI[0], t1); | |
2111 | 2053 | } |
2112 | 2054 | opn = "maddu"; |
2113 | 2055 | break; |
... | ... | @@ -2116,24 +2058,18 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc, |
2116 | 2058 | TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I64); |
2117 | 2059 | TCGv r_tmp2 = tcg_temp_new(TCG_TYPE_I64); |
2118 | 2060 | |
2119 | - tcg_gen_ext32s_tl(t0, t0); | |
2120 | - tcg_gen_ext32s_tl(t1, t1); | |
2121 | 2061 | tcg_gen_ext_tl_i64(r_tmp1, t0); |
2122 | 2062 | tcg_gen_ext_tl_i64(r_tmp2, t1); |
2123 | 2063 | tcg_gen_mul_i64(r_tmp1, r_tmp1, r_tmp2); |
2124 | - gen_load_LO(t0, 0); | |
2125 | - gen_load_HI(t1, 0); | |
2126 | - tcg_gen_concat_tl_i64(r_tmp2, t0, t1); | |
2064 | + tcg_gen_concat_tl_i64(r_tmp2, cpu_LO[0], cpu_HI[0]); | |
2127 | 2065 | tcg_gen_sub_i64(r_tmp1, r_tmp1, r_tmp2); |
2128 | 2066 | tcg_temp_free(r_tmp2); |
2129 | 2067 | tcg_gen_trunc_i64_tl(t0, r_tmp1); |
2130 | 2068 | tcg_gen_shri_i64(r_tmp1, r_tmp1, 32); |
2131 | 2069 | tcg_gen_trunc_i64_tl(t1, r_tmp1); |
2132 | 2070 | tcg_temp_free(r_tmp1); |
2133 | - tcg_gen_ext32s_tl(t0, t0); | |
2134 | - tcg_gen_ext32s_tl(t1, t1); | |
2135 | - gen_store_LO(t0, 0); | |
2136 | - gen_store_HI(t1, 0); | |
2071 | + tcg_gen_ext32s_tl(cpu_LO[0], t0); | |
2072 | + tcg_gen_ext32s_tl(cpu_HI[0], t1); | |
2137 | 2073 | } |
2138 | 2074 | opn = "msub"; |
2139 | 2075 | break; |
... | ... | @@ -2147,19 +2083,15 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc, |
2147 | 2083 | tcg_gen_extu_tl_i64(r_tmp1, t0); |
2148 | 2084 | tcg_gen_extu_tl_i64(r_tmp2, t1); |
2149 | 2085 | tcg_gen_mul_i64(r_tmp1, r_tmp1, r_tmp2); |
2150 | - gen_load_LO(t0, 0); | |
2151 | - gen_load_HI(t1, 0); | |
2152 | - tcg_gen_concat_tl_i64(r_tmp2, t0, t1); | |
2086 | + tcg_gen_concat_tl_i64(r_tmp2, cpu_LO[0], cpu_HI[0]); | |
2153 | 2087 | tcg_gen_sub_i64(r_tmp1, r_tmp1, r_tmp2); |
2154 | 2088 | tcg_temp_free(r_tmp2); |
2155 | 2089 | tcg_gen_trunc_i64_tl(t0, r_tmp1); |
2156 | 2090 | tcg_gen_shri_i64(r_tmp1, r_tmp1, 32); |
2157 | 2091 | tcg_gen_trunc_i64_tl(t1, r_tmp1); |
2158 | 2092 | tcg_temp_free(r_tmp1); |
2159 | - tcg_gen_ext32s_tl(t0, t0); | |
2160 | - tcg_gen_ext32s_tl(t1, t1); | |
2161 | - gen_store_LO(t0, 0); | |
2162 | - gen_store_HI(t1, 0); | |
2093 | + tcg_gen_ext32s_tl(cpu_LO[0], t0); | |
2094 | + tcg_gen_ext32s_tl(cpu_HI[0], t1); | |
2163 | 2095 | } |
2164 | 2096 | opn = "msubu"; |
2165 | 2097 | break; | ... | ... |