Commit b10fa3c906a3d48ee58d64663f9d020ef90d897d
1 parent
507563e8
target-mips: optimize gen_muldiv()
Optimize code generation in gen_muldiv(): - Don't do sign extension when the value is already guaranteed to be sign extended (otherwise, results are marked as UNPREDICTABLE). - Access the LO, HI registers directly instead of writting them through a temporary variable. Signed-off-by: Aurelien Jarno <aurelien@aurel32.net> git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@5681 c046a42c-6fe2-441c-8c8c-71466251a162
Showing
1 changed file
with
47 additions
and
115 deletions
target-mips/translate.c
| ... | ... | @@ -604,27 +604,7 @@ static inline void gen_store_gpr (TCGv t, int reg) |
| 604 | 604 | tcg_gen_mov_tl(cpu_gpr[reg], t); |
| 605 | 605 | } |
| 606 | 606 | |
| 607 | -/* Moves to/from HI and LO registers. */ | |
| 608 | -static inline void gen_load_HI (TCGv t, int reg) | |
| 609 | -{ | |
| 610 | - tcg_gen_mov_tl(t, cpu_HI[reg]); | |
| 611 | -} | |
| 612 | - | |
| 613 | -static inline void gen_store_HI (TCGv t, int reg) | |
| 614 | -{ | |
| 615 | - tcg_gen_mov_tl(cpu_HI[reg], t); | |
| 616 | -} | |
| 617 | - | |
| 618 | -static inline void gen_load_LO (TCGv t, int reg) | |
| 619 | -{ | |
| 620 | - tcg_gen_mov_tl(t, cpu_LO[reg]); | |
| 621 | -} | |
| 622 | - | |
| 623 | -static inline void gen_store_LO (TCGv t, int reg) | |
| 624 | -{ | |
| 625 | - tcg_gen_mov_tl(cpu_LO[reg], t); | |
| 626 | -} | |
| 627 | - | |
| 607 | +/* Moves to/from ACX register. */ | |
| 628 | 608 | static inline void gen_load_ACX (TCGv t, int reg) |
| 629 | 609 | { |
| 630 | 610 | tcg_gen_mov_tl(t, cpu_ACX[reg]); |
| ... | ... | @@ -1856,23 +1836,23 @@ static void gen_HILO (DisasContext *ctx, uint32_t opc, int reg) |
| 1856 | 1836 | } |
| 1857 | 1837 | switch (opc) { |
| 1858 | 1838 | case OPC_MFHI: |
| 1859 | - gen_load_HI(t0, 0); | |
| 1839 | + tcg_gen_mov_tl(t0, cpu_HI[0]); | |
| 1860 | 1840 | gen_store_gpr(t0, reg); |
| 1861 | 1841 | opn = "mfhi"; |
| 1862 | 1842 | break; |
| 1863 | 1843 | case OPC_MFLO: |
| 1864 | - gen_load_LO(t0, 0); | |
| 1844 | + tcg_gen_mov_tl(t0, cpu_LO[0]); | |
| 1865 | 1845 | gen_store_gpr(t0, reg); |
| 1866 | 1846 | opn = "mflo"; |
| 1867 | 1847 | break; |
| 1868 | 1848 | case OPC_MTHI: |
| 1869 | 1849 | gen_load_gpr(t0, reg); |
| 1870 | - gen_store_HI(t0, 0); | |
| 1850 | + tcg_gen_mov_tl(cpu_HI[0], t0); | |
| 1871 | 1851 | opn = "mthi"; |
| 1872 | 1852 | break; |
| 1873 | 1853 | case OPC_MTLO: |
| 1874 | 1854 | gen_load_gpr(t0, reg); |
| 1875 | - gen_store_LO(t0, 0); | |
| 1855 | + tcg_gen_mov_tl(cpu_LO[0], t0); | |
| 1876 | 1856 | opn = "mtlo"; |
| 1877 | 1857 | break; |
| 1878 | 1858 | default: |
| ... | ... | @@ -1899,27 +1879,28 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc, |
| 1899 | 1879 | { |
| 1900 | 1880 | int l1 = gen_new_label(); |
| 1901 | 1881 | |
| 1902 | - tcg_gen_ext32s_tl(t0, t0); | |
| 1903 | - tcg_gen_ext32s_tl(t1, t1); | |
| 1904 | 1882 | tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, l1); |
| 1905 | 1883 | { |
| 1906 | - TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I64); | |
| 1907 | - TCGv r_tmp2 = tcg_temp_new(TCG_TYPE_I64); | |
| 1908 | - TCGv r_tmp3 = tcg_temp_new(TCG_TYPE_I64); | |
| 1909 | - | |
| 1910 | - tcg_gen_ext_tl_i64(r_tmp1, t0); | |
| 1911 | - tcg_gen_ext_tl_i64(r_tmp2, t1); | |
| 1912 | - tcg_gen_div_i64(r_tmp3, r_tmp1, r_tmp2); | |
| 1913 | - tcg_gen_rem_i64(r_tmp2, r_tmp1, r_tmp2); | |
| 1914 | - tcg_gen_trunc_i64_tl(t0, r_tmp3); | |
| 1915 | - tcg_gen_trunc_i64_tl(t1, r_tmp2); | |
| 1884 | + int l2 = gen_new_label(); | |
| 1885 | + TCGv r_tmp1 = tcg_temp_local_new(TCG_TYPE_I32); | |
| 1886 | + TCGv r_tmp2 = tcg_temp_local_new(TCG_TYPE_I32); | |
| 1887 | + TCGv r_tmp3 = tcg_temp_local_new(TCG_TYPE_I32); | |
| 1888 | + | |
| 1889 | + tcg_gen_trunc_tl_i32(r_tmp1, t0); | |
| 1890 | + tcg_gen_trunc_tl_i32(r_tmp2, t1); | |
| 1891 | + tcg_gen_brcondi_i32(TCG_COND_NE, r_tmp1, -1 << 31, l2); | |
| 1892 | + tcg_gen_brcondi_i32(TCG_COND_NE, r_tmp2, -1, l2); | |
| 1893 | + tcg_gen_ext32s_tl(cpu_LO[0], t0); | |
| 1894 | + tcg_gen_movi_tl(cpu_HI[0], 0); | |
| 1895 | + tcg_gen_br(l1); | |
| 1896 | + gen_set_label(l2); | |
| 1897 | + tcg_gen_div_i32(r_tmp3, r_tmp1, r_tmp2); | |
| 1898 | + tcg_gen_rem_i32(r_tmp2, r_tmp1, r_tmp2); | |
| 1899 | + tcg_gen_ext_i32_tl(cpu_LO[0], r_tmp3); | |
| 1900 | + tcg_gen_ext_i32_tl(cpu_HI[0], r_tmp2); | |
| 1916 | 1901 | tcg_temp_free(r_tmp1); |
| 1917 | 1902 | tcg_temp_free(r_tmp2); |
| 1918 | 1903 | tcg_temp_free(r_tmp3); |
| 1919 | - tcg_gen_ext32s_tl(t0, t0); | |
| 1920 | - tcg_gen_ext32s_tl(t1, t1); | |
| 1921 | - gen_store_LO(t0, 0); | |
| 1922 | - gen_store_HI(t1, 0); | |
| 1923 | 1904 | } |
| 1924 | 1905 | gen_set_label(l1); |
| 1925 | 1906 | } |
| ... | ... | @@ -1940,13 +1921,11 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc, |
| 1940 | 1921 | tcg_gen_trunc_tl_i32(r_tmp2, t1); |
| 1941 | 1922 | tcg_gen_divu_i32(r_tmp3, r_tmp1, r_tmp2); |
| 1942 | 1923 | tcg_gen_remu_i32(r_tmp1, r_tmp1, r_tmp2); |
| 1943 | - tcg_gen_ext_i32_tl(t0, r_tmp3); | |
| 1944 | - tcg_gen_ext_i32_tl(t1, r_tmp1); | |
| 1924 | + tcg_gen_ext_i32_tl(cpu_LO[0], r_tmp3); | |
| 1925 | + tcg_gen_ext_i32_tl(cpu_HI[0], r_tmp1); | |
| 1945 | 1926 | tcg_temp_free(r_tmp1); |
| 1946 | 1927 | tcg_temp_free(r_tmp2); |
| 1947 | 1928 | tcg_temp_free(r_tmp3); |
| 1948 | - gen_store_LO(t0, 0); | |
| 1949 | - gen_store_HI(t1, 0); | |
| 1950 | 1929 | } |
| 1951 | 1930 | gen_set_label(l1); |
| 1952 | 1931 | } |
| ... | ... | @@ -1957,8 +1936,6 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc, |
| 1957 | 1936 | TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I64); |
| 1958 | 1937 | TCGv r_tmp2 = tcg_temp_new(TCG_TYPE_I64); |
| 1959 | 1938 | |
| 1960 | - tcg_gen_ext32s_tl(t0, t0); | |
| 1961 | - tcg_gen_ext32s_tl(t1, t1); | |
| 1962 | 1939 | tcg_gen_ext_tl_i64(r_tmp1, t0); |
| 1963 | 1940 | tcg_gen_ext_tl_i64(r_tmp2, t1); |
| 1964 | 1941 | tcg_gen_mul_i64(r_tmp1, r_tmp1, r_tmp2); |
| ... | ... | @@ -1967,10 +1944,8 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc, |
| 1967 | 1944 | tcg_gen_shri_i64(r_tmp1, r_tmp1, 32); |
| 1968 | 1945 | tcg_gen_trunc_i64_tl(t1, r_tmp1); |
| 1969 | 1946 | tcg_temp_free(r_tmp1); |
| 1970 | - tcg_gen_ext32s_tl(t0, t0); | |
| 1971 | - tcg_gen_ext32s_tl(t1, t1); | |
| 1972 | - gen_store_LO(t0, 0); | |
| 1973 | - gen_store_HI(t1, 0); | |
| 1947 | + tcg_gen_ext32s_tl(cpu_LO[0], t0); | |
| 1948 | + tcg_gen_ext32s_tl(cpu_HI[0], t1); | |
| 1974 | 1949 | } |
| 1975 | 1950 | opn = "mult"; |
| 1976 | 1951 | break; |
| ... | ... | @@ -1989,10 +1964,8 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc, |
| 1989 | 1964 | tcg_gen_shri_i64(r_tmp1, r_tmp1, 32); |
| 1990 | 1965 | tcg_gen_trunc_i64_tl(t1, r_tmp1); |
| 1991 | 1966 | tcg_temp_free(r_tmp1); |
| 1992 | - tcg_gen_ext32s_tl(t0, t0); | |
| 1993 | - tcg_gen_ext32s_tl(t1, t1); | |
| 1994 | - gen_store_LO(t0, 0); | |
| 1995 | - gen_store_HI(t1, 0); | |
| 1967 | + tcg_gen_ext32s_tl(cpu_LO[0], t0); | |
| 1968 | + tcg_gen_ext32s_tl(cpu_HI[0], t1); | |
| 1996 | 1969 | } |
| 1997 | 1970 | opn = "multu"; |
| 1998 | 1971 | break; |
| ... | ... | @@ -2007,24 +1980,12 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc, |
| 2007 | 1980 | |
| 2008 | 1981 | tcg_gen_brcondi_tl(TCG_COND_NE, t0, -1LL << 63, l2); |
| 2009 | 1982 | tcg_gen_brcondi_tl(TCG_COND_NE, t1, -1LL, l2); |
| 2010 | - { | |
| 2011 | - tcg_gen_movi_tl(t1, 0); | |
| 2012 | - gen_store_LO(t0, 0); | |
| 2013 | - gen_store_HI(t1, 0); | |
| 2014 | - tcg_gen_br(l1); | |
| 2015 | - } | |
| 1983 | + tcg_gen_mov_tl(cpu_LO[0], t0); | |
| 1984 | + tcg_gen_movi_tl(cpu_HI[0], 0); | |
| 1985 | + tcg_gen_br(l1); | |
| 2016 | 1986 | gen_set_label(l2); |
| 2017 | - { | |
| 2018 | - TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I64); | |
| 2019 | - TCGv r_tmp2 = tcg_temp_new(TCG_TYPE_I64); | |
| 2020 | - | |
| 2021 | - tcg_gen_div_i64(r_tmp1, t0, t1); | |
| 2022 | - tcg_gen_rem_i64(r_tmp2, t0, t1); | |
| 2023 | - gen_store_LO(r_tmp1, 0); | |
| 2024 | - gen_store_HI(r_tmp2, 0); | |
| 2025 | - tcg_temp_free(r_tmp1); | |
| 2026 | - tcg_temp_free(r_tmp2); | |
| 2027 | - } | |
| 1987 | + tcg_gen_div_i64(cpu_LO[0], t0, t1); | |
| 1988 | + tcg_gen_rem_i64(cpu_HI[0], t0, t1); | |
| 2028 | 1989 | } |
| 2029 | 1990 | gen_set_label(l1); |
| 2030 | 1991 | } |
| ... | ... | @@ -2035,17 +1996,8 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc, |
| 2035 | 1996 | int l1 = gen_new_label(); |
| 2036 | 1997 | |
| 2037 | 1998 | tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, l1); |
| 2038 | - { | |
| 2039 | - TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I64); | |
| 2040 | - TCGv r_tmp2 = tcg_temp_new(TCG_TYPE_I64); | |
| 2041 | - | |
| 2042 | - tcg_gen_divu_i64(r_tmp1, t0, t1); | |
| 2043 | - tcg_gen_remu_i64(r_tmp2, t0, t1); | |
| 2044 | - tcg_temp_free(r_tmp1); | |
| 2045 | - tcg_temp_free(r_tmp2); | |
| 2046 | - gen_store_LO(r_tmp1, 0); | |
| 2047 | - gen_store_HI(r_tmp2, 0); | |
| 2048 | - } | |
| 1999 | + tcg_gen_divu_i64(cpu_LO[0], t0, t1); | |
| 2000 | + tcg_gen_remu_i64(cpu_HI[0], t0, t1); | |
| 2049 | 2001 | gen_set_label(l1); |
| 2050 | 2002 | } |
| 2051 | 2003 | opn = "ddivu"; |
| ... | ... | @@ -2064,24 +2016,18 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc, |
| 2064 | 2016 | TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I64); |
| 2065 | 2017 | TCGv r_tmp2 = tcg_temp_new(TCG_TYPE_I64); |
| 2066 | 2018 | |
| 2067 | - tcg_gen_ext32s_tl(t0, t0); | |
| 2068 | - tcg_gen_ext32s_tl(t1, t1); | |
| 2069 | 2019 | tcg_gen_ext_tl_i64(r_tmp1, t0); |
| 2070 | 2020 | tcg_gen_ext_tl_i64(r_tmp2, t1); |
| 2071 | 2021 | tcg_gen_mul_i64(r_tmp1, r_tmp1, r_tmp2); |
| 2072 | - gen_load_LO(t0, 0); | |
| 2073 | - gen_load_HI(t1, 0); | |
| 2074 | - tcg_gen_concat_tl_i64(r_tmp2, t0, t1); | |
| 2022 | + tcg_gen_concat_tl_i64(r_tmp2, cpu_LO[0], cpu_HI[0]); | |
| 2075 | 2023 | tcg_gen_add_i64(r_tmp1, r_tmp1, r_tmp2); |
| 2076 | 2024 | tcg_temp_free(r_tmp2); |
| 2077 | 2025 | tcg_gen_trunc_i64_tl(t0, r_tmp1); |
| 2078 | 2026 | tcg_gen_shri_i64(r_tmp1, r_tmp1, 32); |
| 2079 | 2027 | tcg_gen_trunc_i64_tl(t1, r_tmp1); |
| 2080 | 2028 | tcg_temp_free(r_tmp1); |
| 2081 | - tcg_gen_ext32s_tl(t0, t0); | |
| 2082 | - tcg_gen_ext32s_tl(t1, t1); | |
| 2083 | - gen_store_LO(t0, 0); | |
| 2084 | - gen_store_HI(t1, 0); | |
| 2029 | + tcg_gen_ext32s_tl(cpu_LO[0], t0); | |
| 2030 | + tcg_gen_ext32s_tl(cpu_LO[1], t1); | |
| 2085 | 2031 | } |
| 2086 | 2032 | opn = "madd"; |
| 2087 | 2033 | break; |
| ... | ... | @@ -2095,19 +2041,15 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc, |
| 2095 | 2041 | tcg_gen_extu_tl_i64(r_tmp1, t0); |
| 2096 | 2042 | tcg_gen_extu_tl_i64(r_tmp2, t1); |
| 2097 | 2043 | tcg_gen_mul_i64(r_tmp1, r_tmp1, r_tmp2); |
| 2098 | - gen_load_LO(t0, 0); | |
| 2099 | - gen_load_HI(t1, 0); | |
| 2100 | - tcg_gen_concat_tl_i64(r_tmp2, t0, t1); | |
| 2044 | + tcg_gen_concat_tl_i64(r_tmp2, cpu_LO[0], cpu_HI[0]); | |
| 2101 | 2045 | tcg_gen_add_i64(r_tmp1, r_tmp1, r_tmp2); |
| 2102 | 2046 | tcg_temp_free(r_tmp2); |
| 2103 | 2047 | tcg_gen_trunc_i64_tl(t0, r_tmp1); |
| 2104 | 2048 | tcg_gen_shri_i64(r_tmp1, r_tmp1, 32); |
| 2105 | 2049 | tcg_gen_trunc_i64_tl(t1, r_tmp1); |
| 2106 | 2050 | tcg_temp_free(r_tmp1); |
| 2107 | - tcg_gen_ext32s_tl(t0, t0); | |
| 2108 | - tcg_gen_ext32s_tl(t1, t1); | |
| 2109 | - gen_store_LO(t0, 0); | |
| 2110 | - gen_store_HI(t1, 0); | |
| 2051 | + tcg_gen_ext32s_tl(cpu_LO[0], t0); | |
| 2052 | + tcg_gen_ext32s_tl(cpu_HI[0], t1); | |
| 2111 | 2053 | } |
| 2112 | 2054 | opn = "maddu"; |
| 2113 | 2055 | break; |
| ... | ... | @@ -2116,24 +2058,18 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc, |
| 2116 | 2058 | TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I64); |
| 2117 | 2059 | TCGv r_tmp2 = tcg_temp_new(TCG_TYPE_I64); |
| 2118 | 2060 | |
| 2119 | - tcg_gen_ext32s_tl(t0, t0); | |
| 2120 | - tcg_gen_ext32s_tl(t1, t1); | |
| 2121 | 2061 | tcg_gen_ext_tl_i64(r_tmp1, t0); |
| 2122 | 2062 | tcg_gen_ext_tl_i64(r_tmp2, t1); |
| 2123 | 2063 | tcg_gen_mul_i64(r_tmp1, r_tmp1, r_tmp2); |
| 2124 | - gen_load_LO(t0, 0); | |
| 2125 | - gen_load_HI(t1, 0); | |
| 2126 | - tcg_gen_concat_tl_i64(r_tmp2, t0, t1); | |
| 2064 | + tcg_gen_concat_tl_i64(r_tmp2, cpu_LO[0], cpu_HI[0]); | |
| 2127 | 2065 | tcg_gen_sub_i64(r_tmp1, r_tmp1, r_tmp2); |
| 2128 | 2066 | tcg_temp_free(r_tmp2); |
| 2129 | 2067 | tcg_gen_trunc_i64_tl(t0, r_tmp1); |
| 2130 | 2068 | tcg_gen_shri_i64(r_tmp1, r_tmp1, 32); |
| 2131 | 2069 | tcg_gen_trunc_i64_tl(t1, r_tmp1); |
| 2132 | 2070 | tcg_temp_free(r_tmp1); |
| 2133 | - tcg_gen_ext32s_tl(t0, t0); | |
| 2134 | - tcg_gen_ext32s_tl(t1, t1); | |
| 2135 | - gen_store_LO(t0, 0); | |
| 2136 | - gen_store_HI(t1, 0); | |
| 2071 | + tcg_gen_ext32s_tl(cpu_LO[0], t0); | |
| 2072 | + tcg_gen_ext32s_tl(cpu_HI[0], t1); | |
| 2137 | 2073 | } |
| 2138 | 2074 | opn = "msub"; |
| 2139 | 2075 | break; |
| ... | ... | @@ -2147,19 +2083,15 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc, |
| 2147 | 2083 | tcg_gen_extu_tl_i64(r_tmp1, t0); |
| 2148 | 2084 | tcg_gen_extu_tl_i64(r_tmp2, t1); |
| 2149 | 2085 | tcg_gen_mul_i64(r_tmp1, r_tmp1, r_tmp2); |
| 2150 | - gen_load_LO(t0, 0); | |
| 2151 | - gen_load_HI(t1, 0); | |
| 2152 | - tcg_gen_concat_tl_i64(r_tmp2, t0, t1); | |
| 2086 | + tcg_gen_concat_tl_i64(r_tmp2, cpu_LO[0], cpu_HI[0]); | |
| 2153 | 2087 | tcg_gen_sub_i64(r_tmp1, r_tmp1, r_tmp2); |
| 2154 | 2088 | tcg_temp_free(r_tmp2); |
| 2155 | 2089 | tcg_gen_trunc_i64_tl(t0, r_tmp1); |
| 2156 | 2090 | tcg_gen_shri_i64(r_tmp1, r_tmp1, 32); |
| 2157 | 2091 | tcg_gen_trunc_i64_tl(t1, r_tmp1); |
| 2158 | 2092 | tcg_temp_free(r_tmp1); |
| 2159 | - tcg_gen_ext32s_tl(t0, t0); | |
| 2160 | - tcg_gen_ext32s_tl(t1, t1); | |
| 2161 | - gen_store_LO(t0, 0); | |
| 2162 | - gen_store_HI(t1, 0); | |
| 2093 | + tcg_gen_ext32s_tl(cpu_LO[0], t0); | |
| 2094 | + tcg_gen_ext32s_tl(cpu_HI[0], t1); | |
| 2163 | 2095 | } |
| 2164 | 2096 | opn = "msubu"; |
| 2165 | 2097 | break; | ... | ... |