Commit b10fa3c906a3d48ee58d64663f9d020ef90d897d

Authored by aurel32
1 parent 507563e8

target-mips: optimize gen_muldiv()

Optimize code generation in gen_muldiv():
- Don't do sign extension when the value is already guaranteed to be
  sign extended (otherwise, results are marked as UNPREDICTABLE).
- Access the LO, HI registers directly instead of writting them through
  a temporary variable.

Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>

git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@5681 c046a42c-6fe2-441c-8c8c-71466251a162
Showing 1 changed file with 47 additions and 115 deletions
target-mips/translate.c
... ... @@ -604,27 +604,7 @@ static inline void gen_store_gpr (TCGv t, int reg)
604 604 tcg_gen_mov_tl(cpu_gpr[reg], t);
605 605 }
606 606  
607   -/* Moves to/from HI and LO registers. */
608   -static inline void gen_load_HI (TCGv t, int reg)
609   -{
610   - tcg_gen_mov_tl(t, cpu_HI[reg]);
611   -}
612   -
613   -static inline void gen_store_HI (TCGv t, int reg)
614   -{
615   - tcg_gen_mov_tl(cpu_HI[reg], t);
616   -}
617   -
618   -static inline void gen_load_LO (TCGv t, int reg)
619   -{
620   - tcg_gen_mov_tl(t, cpu_LO[reg]);
621   -}
622   -
623   -static inline void gen_store_LO (TCGv t, int reg)
624   -{
625   - tcg_gen_mov_tl(cpu_LO[reg], t);
626   -}
627   -
  607 +/* Moves to/from ACX register. */
628 608 static inline void gen_load_ACX (TCGv t, int reg)
629 609 {
630 610 tcg_gen_mov_tl(t, cpu_ACX[reg]);
... ... @@ -1856,23 +1836,23 @@ static void gen_HILO (DisasContext *ctx, uint32_t opc, int reg)
1856 1836 }
1857 1837 switch (opc) {
1858 1838 case OPC_MFHI:
1859   - gen_load_HI(t0, 0);
  1839 + tcg_gen_mov_tl(t0, cpu_HI[0]);
1860 1840 gen_store_gpr(t0, reg);
1861 1841 opn = "mfhi";
1862 1842 break;
1863 1843 case OPC_MFLO:
1864   - gen_load_LO(t0, 0);
  1844 + tcg_gen_mov_tl(t0, cpu_LO[0]);
1865 1845 gen_store_gpr(t0, reg);
1866 1846 opn = "mflo";
1867 1847 break;
1868 1848 case OPC_MTHI:
1869 1849 gen_load_gpr(t0, reg);
1870   - gen_store_HI(t0, 0);
  1850 + tcg_gen_mov_tl(cpu_HI[0], t0);
1871 1851 opn = "mthi";
1872 1852 break;
1873 1853 case OPC_MTLO:
1874 1854 gen_load_gpr(t0, reg);
1875   - gen_store_LO(t0, 0);
  1855 + tcg_gen_mov_tl(cpu_LO[0], t0);
1876 1856 opn = "mtlo";
1877 1857 break;
1878 1858 default:
... ... @@ -1899,27 +1879,28 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
1899 1879 {
1900 1880 int l1 = gen_new_label();
1901 1881  
1902   - tcg_gen_ext32s_tl(t0, t0);
1903   - tcg_gen_ext32s_tl(t1, t1);
1904 1882 tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, l1);
1905 1883 {
1906   - TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I64);
1907   - TCGv r_tmp2 = tcg_temp_new(TCG_TYPE_I64);
1908   - TCGv r_tmp3 = tcg_temp_new(TCG_TYPE_I64);
1909   -
1910   - tcg_gen_ext_tl_i64(r_tmp1, t0);
1911   - tcg_gen_ext_tl_i64(r_tmp2, t1);
1912   - tcg_gen_div_i64(r_tmp3, r_tmp1, r_tmp2);
1913   - tcg_gen_rem_i64(r_tmp2, r_tmp1, r_tmp2);
1914   - tcg_gen_trunc_i64_tl(t0, r_tmp3);
1915   - tcg_gen_trunc_i64_tl(t1, r_tmp2);
  1884 + int l2 = gen_new_label();
  1885 + TCGv r_tmp1 = tcg_temp_local_new(TCG_TYPE_I32);
  1886 + TCGv r_tmp2 = tcg_temp_local_new(TCG_TYPE_I32);
  1887 + TCGv r_tmp3 = tcg_temp_local_new(TCG_TYPE_I32);
  1888 +
  1889 + tcg_gen_trunc_tl_i32(r_tmp1, t0);
  1890 + tcg_gen_trunc_tl_i32(r_tmp2, t1);
  1891 + tcg_gen_brcondi_i32(TCG_COND_NE, r_tmp1, -1 << 31, l2);
  1892 + tcg_gen_brcondi_i32(TCG_COND_NE, r_tmp2, -1, l2);
  1893 + tcg_gen_ext32s_tl(cpu_LO[0], t0);
  1894 + tcg_gen_movi_tl(cpu_HI[0], 0);
  1895 + tcg_gen_br(l1);
  1896 + gen_set_label(l2);
  1897 + tcg_gen_div_i32(r_tmp3, r_tmp1, r_tmp2);
  1898 + tcg_gen_rem_i32(r_tmp2, r_tmp1, r_tmp2);
  1899 + tcg_gen_ext_i32_tl(cpu_LO[0], r_tmp3);
  1900 + tcg_gen_ext_i32_tl(cpu_HI[0], r_tmp2);
1916 1901 tcg_temp_free(r_tmp1);
1917 1902 tcg_temp_free(r_tmp2);
1918 1903 tcg_temp_free(r_tmp3);
1919   - tcg_gen_ext32s_tl(t0, t0);
1920   - tcg_gen_ext32s_tl(t1, t1);
1921   - gen_store_LO(t0, 0);
1922   - gen_store_HI(t1, 0);
1923 1904 }
1924 1905 gen_set_label(l1);
1925 1906 }
... ... @@ -1940,13 +1921,11 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
1940 1921 tcg_gen_trunc_tl_i32(r_tmp2, t1);
1941 1922 tcg_gen_divu_i32(r_tmp3, r_tmp1, r_tmp2);
1942 1923 tcg_gen_remu_i32(r_tmp1, r_tmp1, r_tmp2);
1943   - tcg_gen_ext_i32_tl(t0, r_tmp3);
1944   - tcg_gen_ext_i32_tl(t1, r_tmp1);
  1924 + tcg_gen_ext_i32_tl(cpu_LO[0], r_tmp3);
  1925 + tcg_gen_ext_i32_tl(cpu_HI[0], r_tmp1);
1945 1926 tcg_temp_free(r_tmp1);
1946 1927 tcg_temp_free(r_tmp2);
1947 1928 tcg_temp_free(r_tmp3);
1948   - gen_store_LO(t0, 0);
1949   - gen_store_HI(t1, 0);
1950 1929 }
1951 1930 gen_set_label(l1);
1952 1931 }
... ... @@ -1957,8 +1936,6 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
1957 1936 TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I64);
1958 1937 TCGv r_tmp2 = tcg_temp_new(TCG_TYPE_I64);
1959 1938  
1960   - tcg_gen_ext32s_tl(t0, t0);
1961   - tcg_gen_ext32s_tl(t1, t1);
1962 1939 tcg_gen_ext_tl_i64(r_tmp1, t0);
1963 1940 tcg_gen_ext_tl_i64(r_tmp2, t1);
1964 1941 tcg_gen_mul_i64(r_tmp1, r_tmp1, r_tmp2);
... ... @@ -1967,10 +1944,8 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
1967 1944 tcg_gen_shri_i64(r_tmp1, r_tmp1, 32);
1968 1945 tcg_gen_trunc_i64_tl(t1, r_tmp1);
1969 1946 tcg_temp_free(r_tmp1);
1970   - tcg_gen_ext32s_tl(t0, t0);
1971   - tcg_gen_ext32s_tl(t1, t1);
1972   - gen_store_LO(t0, 0);
1973   - gen_store_HI(t1, 0);
  1947 + tcg_gen_ext32s_tl(cpu_LO[0], t0);
  1948 + tcg_gen_ext32s_tl(cpu_HI[0], t1);
1974 1949 }
1975 1950 opn = "mult";
1976 1951 break;
... ... @@ -1989,10 +1964,8 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
1989 1964 tcg_gen_shri_i64(r_tmp1, r_tmp1, 32);
1990 1965 tcg_gen_trunc_i64_tl(t1, r_tmp1);
1991 1966 tcg_temp_free(r_tmp1);
1992   - tcg_gen_ext32s_tl(t0, t0);
1993   - tcg_gen_ext32s_tl(t1, t1);
1994   - gen_store_LO(t0, 0);
1995   - gen_store_HI(t1, 0);
  1967 + tcg_gen_ext32s_tl(cpu_LO[0], t0);
  1968 + tcg_gen_ext32s_tl(cpu_HI[0], t1);
1996 1969 }
1997 1970 opn = "multu";
1998 1971 break;
... ... @@ -2007,24 +1980,12 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
2007 1980  
2008 1981 tcg_gen_brcondi_tl(TCG_COND_NE, t0, -1LL << 63, l2);
2009 1982 tcg_gen_brcondi_tl(TCG_COND_NE, t1, -1LL, l2);
2010   - {
2011   - tcg_gen_movi_tl(t1, 0);
2012   - gen_store_LO(t0, 0);
2013   - gen_store_HI(t1, 0);
2014   - tcg_gen_br(l1);
2015   - }
  1983 + tcg_gen_mov_tl(cpu_LO[0], t0);
  1984 + tcg_gen_movi_tl(cpu_HI[0], 0);
  1985 + tcg_gen_br(l1);
2016 1986 gen_set_label(l2);
2017   - {
2018   - TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I64);
2019   - TCGv r_tmp2 = tcg_temp_new(TCG_TYPE_I64);
2020   -
2021   - tcg_gen_div_i64(r_tmp1, t0, t1);
2022   - tcg_gen_rem_i64(r_tmp2, t0, t1);
2023   - gen_store_LO(r_tmp1, 0);
2024   - gen_store_HI(r_tmp2, 0);
2025   - tcg_temp_free(r_tmp1);
2026   - tcg_temp_free(r_tmp2);
2027   - }
  1987 + tcg_gen_div_i64(cpu_LO[0], t0, t1);
  1988 + tcg_gen_rem_i64(cpu_HI[0], t0, t1);
2028 1989 }
2029 1990 gen_set_label(l1);
2030 1991 }
... ... @@ -2035,17 +1996,8 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
2035 1996 int l1 = gen_new_label();
2036 1997  
2037 1998 tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, l1);
2038   - {
2039   - TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I64);
2040   - TCGv r_tmp2 = tcg_temp_new(TCG_TYPE_I64);
2041   -
2042   - tcg_gen_divu_i64(r_tmp1, t0, t1);
2043   - tcg_gen_remu_i64(r_tmp2, t0, t1);
2044   - tcg_temp_free(r_tmp1);
2045   - tcg_temp_free(r_tmp2);
2046   - gen_store_LO(r_tmp1, 0);
2047   - gen_store_HI(r_tmp2, 0);
2048   - }
  1999 + tcg_gen_divu_i64(cpu_LO[0], t0, t1);
  2000 + tcg_gen_remu_i64(cpu_HI[0], t0, t1);
2049 2001 gen_set_label(l1);
2050 2002 }
2051 2003 opn = "ddivu";
... ... @@ -2064,24 +2016,18 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
2064 2016 TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I64);
2065 2017 TCGv r_tmp2 = tcg_temp_new(TCG_TYPE_I64);
2066 2018  
2067   - tcg_gen_ext32s_tl(t0, t0);
2068   - tcg_gen_ext32s_tl(t1, t1);
2069 2019 tcg_gen_ext_tl_i64(r_tmp1, t0);
2070 2020 tcg_gen_ext_tl_i64(r_tmp2, t1);
2071 2021 tcg_gen_mul_i64(r_tmp1, r_tmp1, r_tmp2);
2072   - gen_load_LO(t0, 0);
2073   - gen_load_HI(t1, 0);
2074   - tcg_gen_concat_tl_i64(r_tmp2, t0, t1);
  2022 + tcg_gen_concat_tl_i64(r_tmp2, cpu_LO[0], cpu_HI[0]);
2075 2023 tcg_gen_add_i64(r_tmp1, r_tmp1, r_tmp2);
2076 2024 tcg_temp_free(r_tmp2);
2077 2025 tcg_gen_trunc_i64_tl(t0, r_tmp1);
2078 2026 tcg_gen_shri_i64(r_tmp1, r_tmp1, 32);
2079 2027 tcg_gen_trunc_i64_tl(t1, r_tmp1);
2080 2028 tcg_temp_free(r_tmp1);
2081   - tcg_gen_ext32s_tl(t0, t0);
2082   - tcg_gen_ext32s_tl(t1, t1);
2083   - gen_store_LO(t0, 0);
2084   - gen_store_HI(t1, 0);
  2029 + tcg_gen_ext32s_tl(cpu_LO[0], t0);
  2030 + tcg_gen_ext32s_tl(cpu_LO[1], t1);
2085 2031 }
2086 2032 opn = "madd";
2087 2033 break;
... ... @@ -2095,19 +2041,15 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
2095 2041 tcg_gen_extu_tl_i64(r_tmp1, t0);
2096 2042 tcg_gen_extu_tl_i64(r_tmp2, t1);
2097 2043 tcg_gen_mul_i64(r_tmp1, r_tmp1, r_tmp2);
2098   - gen_load_LO(t0, 0);
2099   - gen_load_HI(t1, 0);
2100   - tcg_gen_concat_tl_i64(r_tmp2, t0, t1);
  2044 + tcg_gen_concat_tl_i64(r_tmp2, cpu_LO[0], cpu_HI[0]);
2101 2045 tcg_gen_add_i64(r_tmp1, r_tmp1, r_tmp2);
2102 2046 tcg_temp_free(r_tmp2);
2103 2047 tcg_gen_trunc_i64_tl(t0, r_tmp1);
2104 2048 tcg_gen_shri_i64(r_tmp1, r_tmp1, 32);
2105 2049 tcg_gen_trunc_i64_tl(t1, r_tmp1);
2106 2050 tcg_temp_free(r_tmp1);
2107   - tcg_gen_ext32s_tl(t0, t0);
2108   - tcg_gen_ext32s_tl(t1, t1);
2109   - gen_store_LO(t0, 0);
2110   - gen_store_HI(t1, 0);
  2051 + tcg_gen_ext32s_tl(cpu_LO[0], t0);
  2052 + tcg_gen_ext32s_tl(cpu_HI[0], t1);
2111 2053 }
2112 2054 opn = "maddu";
2113 2055 break;
... ... @@ -2116,24 +2058,18 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
2116 2058 TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I64);
2117 2059 TCGv r_tmp2 = tcg_temp_new(TCG_TYPE_I64);
2118 2060  
2119   - tcg_gen_ext32s_tl(t0, t0);
2120   - tcg_gen_ext32s_tl(t1, t1);
2121 2061 tcg_gen_ext_tl_i64(r_tmp1, t0);
2122 2062 tcg_gen_ext_tl_i64(r_tmp2, t1);
2123 2063 tcg_gen_mul_i64(r_tmp1, r_tmp1, r_tmp2);
2124   - gen_load_LO(t0, 0);
2125   - gen_load_HI(t1, 0);
2126   - tcg_gen_concat_tl_i64(r_tmp2, t0, t1);
  2064 + tcg_gen_concat_tl_i64(r_tmp2, cpu_LO[0], cpu_HI[0]);
2127 2065 tcg_gen_sub_i64(r_tmp1, r_tmp1, r_tmp2);
2128 2066 tcg_temp_free(r_tmp2);
2129 2067 tcg_gen_trunc_i64_tl(t0, r_tmp1);
2130 2068 tcg_gen_shri_i64(r_tmp1, r_tmp1, 32);
2131 2069 tcg_gen_trunc_i64_tl(t1, r_tmp1);
2132 2070 tcg_temp_free(r_tmp1);
2133   - tcg_gen_ext32s_tl(t0, t0);
2134   - tcg_gen_ext32s_tl(t1, t1);
2135   - gen_store_LO(t0, 0);
2136   - gen_store_HI(t1, 0);
  2071 + tcg_gen_ext32s_tl(cpu_LO[0], t0);
  2072 + tcg_gen_ext32s_tl(cpu_HI[0], t1);
2137 2073 }
2138 2074 opn = "msub";
2139 2075 break;
... ... @@ -2147,19 +2083,15 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
2147 2083 tcg_gen_extu_tl_i64(r_tmp1, t0);
2148 2084 tcg_gen_extu_tl_i64(r_tmp2, t1);
2149 2085 tcg_gen_mul_i64(r_tmp1, r_tmp1, r_tmp2);
2150   - gen_load_LO(t0, 0);
2151   - gen_load_HI(t1, 0);
2152   - tcg_gen_concat_tl_i64(r_tmp2, t0, t1);
  2086 + tcg_gen_concat_tl_i64(r_tmp2, cpu_LO[0], cpu_HI[0]);
2153 2087 tcg_gen_sub_i64(r_tmp1, r_tmp1, r_tmp2);
2154 2088 tcg_temp_free(r_tmp2);
2155 2089 tcg_gen_trunc_i64_tl(t0, r_tmp1);
2156 2090 tcg_gen_shri_i64(r_tmp1, r_tmp1, 32);
2157 2091 tcg_gen_trunc_i64_tl(t1, r_tmp1);
2158 2092 tcg_temp_free(r_tmp1);
2159   - tcg_gen_ext32s_tl(t0, t0);
2160   - tcg_gen_ext32s_tl(t1, t1);
2161   - gen_store_LO(t0, 0);
2162   - gen_store_HI(t1, 0);
  2093 + tcg_gen_ext32s_tl(cpu_LO[0], t0);
  2094 + tcg_gen_ext32s_tl(cpu_HI[0], t1);
2163 2095 }
2164 2096 opn = "msubu";
2165 2097 break;
... ...