Commit d45f89f4e44758d595e6d9bfb45d1a8884dc2b66

Authored by aurel32
1 parent ab36421e

target-mips: optimize gen_muldiv()

Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>

git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@6940 c046a42c-6fe2-441c-8c8c-71466251a162
Showing 1 changed file with 115 additions and 123 deletions
target-mips/translate.c
@@ -1803,8 +1803,23 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc, @@ -1803,8 +1803,23 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
1803 int rs, int rt) 1803 int rs, int rt)
1804 { 1804 {
1805 const char *opn = "mul/div"; 1805 const char *opn = "mul/div";
1806 - TCGv t0 = tcg_temp_local_new();  
1807 - TCGv t1 = tcg_temp_local_new(); 1806 + TCGv t0, t1;
  1807 +
  1808 + switch (opc) {
  1809 + case OPC_DIV:
  1810 + case OPC_DIVU:
  1811 +#if defined(TARGET_MIPS64)
  1812 + case OPC_DDIV:
  1813 + case OPC_DDIVU:
  1814 +#endif
  1815 + t0 = tcg_temp_local_new();
  1816 + t1 = tcg_temp_local_new();
  1817 + break;
  1818 + default:
  1819 + t0 = tcg_temp_new();
  1820 + t1 = tcg_temp_new();
  1821 + break;
  1822 + }
1808 1823
1809 gen_load_gpr(t0, rs); 1824 gen_load_gpr(t0, rs);
1810 gen_load_gpr(t1, rt); 1825 gen_load_gpr(t1, rt);
@@ -1812,30 +1827,22 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc, @@ -1812,30 +1827,22 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
1812 case OPC_DIV: 1827 case OPC_DIV:
1813 { 1828 {
1814 int l1 = gen_new_label(); 1829 int l1 = gen_new_label();
  1830 + int l2 = gen_new_label();
1815 1831
  1832 + tcg_gen_ext32s_tl(t0, t0);
  1833 + tcg_gen_ext32s_tl(t1, t1);
1816 tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, l1); 1834 tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, l1);
1817 - {  
1818 - int l2 = gen_new_label();  
1819 - TCGv_i32 r_tmp1 = tcg_temp_local_new_i32();  
1820 - TCGv_i32 r_tmp2 = tcg_temp_local_new_i32();  
1821 - TCGv_i32 r_tmp3 = tcg_temp_local_new_i32();  
1822 -  
1823 - tcg_gen_trunc_tl_i32(r_tmp1, t0);  
1824 - tcg_gen_trunc_tl_i32(r_tmp2, t1);  
1825 - tcg_gen_brcondi_i32(TCG_COND_NE, r_tmp1, -1 << 31, l2);  
1826 - tcg_gen_brcondi_i32(TCG_COND_NE, r_tmp2, -1, l2);  
1827 - tcg_gen_ext32s_tl(cpu_LO[0], t0);  
1828 - tcg_gen_movi_tl(cpu_HI[0], 0);  
1829 - tcg_gen_br(l1);  
1830 - gen_set_label(l2);  
1831 - tcg_gen_div_i32(r_tmp3, r_tmp1, r_tmp2);  
1832 - tcg_gen_rem_i32(r_tmp2, r_tmp1, r_tmp2);  
1833 - tcg_gen_ext_i32_tl(cpu_LO[0], r_tmp3);  
1834 - tcg_gen_ext_i32_tl(cpu_HI[0], r_tmp2);  
1835 - tcg_temp_free_i32(r_tmp1);  
1836 - tcg_temp_free_i32(r_tmp2);  
1837 - tcg_temp_free_i32(r_tmp3);  
1838 - } 1835 + tcg_gen_brcondi_tl(TCG_COND_NE, t0, INT_MIN, l2);
  1836 + tcg_gen_brcondi_tl(TCG_COND_NE, t1, -1, l2);
  1837 +
  1838 + tcg_gen_mov_tl(cpu_LO[0], t0);
  1839 + tcg_gen_movi_tl(cpu_HI[0], 0);
  1840 + tcg_gen_br(l1);
  1841 + gen_set_label(l2);
  1842 + tcg_gen_div_tl(cpu_LO[0], t0, t1);
  1843 + tcg_gen_rem_tl(cpu_HI[0], t0, t1);
  1844 + tcg_gen_ext32s_tl(cpu_LO[0], cpu_LO[0]);
  1845 + tcg_gen_ext32s_tl(cpu_HI[0], cpu_HI[0]);
1839 gen_set_label(l1); 1846 gen_set_label(l1);
1840 } 1847 }
1841 opn = "div"; 1848 opn = "div";
@@ -1844,40 +1851,28 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc, @@ -1844,40 +1851,28 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
1844 { 1851 {
1845 int l1 = gen_new_label(); 1852 int l1 = gen_new_label();
1846 1853
1847 - tcg_gen_ext32s_tl(t1, t1);  
1848 tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, l1); 1854 tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, l1);
1849 - {  
1850 - TCGv_i32 r_tmp1 = tcg_temp_new_i32();  
1851 - TCGv_i32 r_tmp2 = tcg_temp_new_i32();  
1852 - TCGv_i32 r_tmp3 = tcg_temp_new_i32();  
1853 -  
1854 - tcg_gen_trunc_tl_i32(r_tmp1, t0);  
1855 - tcg_gen_trunc_tl_i32(r_tmp2, t1);  
1856 - tcg_gen_divu_i32(r_tmp3, r_tmp1, r_tmp2);  
1857 - tcg_gen_remu_i32(r_tmp1, r_tmp1, r_tmp2);  
1858 - tcg_gen_ext_i32_tl(cpu_LO[0], r_tmp3);  
1859 - tcg_gen_ext_i32_tl(cpu_HI[0], r_tmp1);  
1860 - tcg_temp_free_i32(r_tmp1);  
1861 - tcg_temp_free_i32(r_tmp2);  
1862 - tcg_temp_free_i32(r_tmp3);  
1863 - } 1855 + tcg_gen_divu_tl(cpu_LO[0], t0, t1);
  1856 + tcg_gen_remu_tl(cpu_HI[0], t0, t1);
  1857 + tcg_gen_ext32s_tl(cpu_LO[0], cpu_LO[0]);
  1858 + tcg_gen_ext32s_tl(cpu_HI[0], cpu_HI[0]);
1864 gen_set_label(l1); 1859 gen_set_label(l1);
1865 } 1860 }
1866 opn = "divu"; 1861 opn = "divu";
1867 break; 1862 break;
1868 case OPC_MULT: 1863 case OPC_MULT:
1869 { 1864 {
1870 - TCGv_i64 r_tmp1 = tcg_temp_new_i64();  
1871 - TCGv_i64 r_tmp2 = tcg_temp_new_i64();  
1872 -  
1873 - tcg_gen_ext_tl_i64(r_tmp1, t0);  
1874 - tcg_gen_ext_tl_i64(r_tmp2, t1);  
1875 - tcg_gen_mul_i64(r_tmp1, r_tmp1, r_tmp2);  
1876 - tcg_temp_free_i64(r_tmp2);  
1877 - tcg_gen_trunc_i64_tl(t0, r_tmp1);  
1878 - tcg_gen_shri_i64(r_tmp1, r_tmp1, 32);  
1879 - tcg_gen_trunc_i64_tl(t1, r_tmp1);  
1880 - tcg_temp_free_i64(r_tmp1); 1865 + TCGv_i64 t2 = tcg_temp_new_i64();
  1866 + TCGv_i64 t3 = tcg_temp_new_i64();
  1867 +
  1868 + tcg_gen_ext_tl_i64(t2, t0);
  1869 + tcg_gen_ext_tl_i64(t3, t1);
  1870 + tcg_gen_mul_i64(t2, t2, t3);
  1871 + tcg_temp_free_i64(t3);
  1872 + tcg_gen_trunc_i64_tl(t0, t2);
  1873 + tcg_gen_shri_i64(t2, t2, 32);
  1874 + tcg_gen_trunc_i64_tl(t1, t2);
  1875 + tcg_temp_free_i64(t2);
1881 tcg_gen_ext32s_tl(cpu_LO[0], t0); 1876 tcg_gen_ext32s_tl(cpu_LO[0], t0);
1882 tcg_gen_ext32s_tl(cpu_HI[0], t1); 1877 tcg_gen_ext32s_tl(cpu_HI[0], t1);
1883 } 1878 }
@@ -1885,19 +1880,19 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc, @@ -1885,19 +1880,19 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
1885 break; 1880 break;
1886 case OPC_MULTU: 1881 case OPC_MULTU:
1887 { 1882 {
1888 - TCGv_i64 r_tmp1 = tcg_temp_new_i64();  
1889 - TCGv_i64 r_tmp2 = tcg_temp_new_i64(); 1883 + TCGv_i64 t2 = tcg_temp_new_i64();
  1884 + TCGv_i64 t3 = tcg_temp_new_i64();
1890 1885
1891 tcg_gen_ext32u_tl(t0, t0); 1886 tcg_gen_ext32u_tl(t0, t0);
1892 tcg_gen_ext32u_tl(t1, t1); 1887 tcg_gen_ext32u_tl(t1, t1);
1893 - tcg_gen_extu_tl_i64(r_tmp1, t0);  
1894 - tcg_gen_extu_tl_i64(r_tmp2, t1);  
1895 - tcg_gen_mul_i64(r_tmp1, r_tmp1, r_tmp2);  
1896 - tcg_temp_free_i64(r_tmp2);  
1897 - tcg_gen_trunc_i64_tl(t0, r_tmp1);  
1898 - tcg_gen_shri_i64(r_tmp1, r_tmp1, 32);  
1899 - tcg_gen_trunc_i64_tl(t1, r_tmp1);  
1900 - tcg_temp_free_i64(r_tmp1); 1888 + tcg_gen_extu_tl_i64(t2, t0);
  1889 + tcg_gen_extu_tl_i64(t3, t1);
  1890 + tcg_gen_mul_i64(t2, t2, t3);
  1891 + tcg_temp_free_i64(t3);
  1892 + tcg_gen_trunc_i64_tl(t0, t2);
  1893 + tcg_gen_shri_i64(t2, t2, 32);
  1894 + tcg_gen_trunc_i64_tl(t1, t2);
  1895 + tcg_temp_free_i64(t2);
1901 tcg_gen_ext32s_tl(cpu_LO[0], t0); 1896 tcg_gen_ext32s_tl(cpu_LO[0], t0);
1902 tcg_gen_ext32s_tl(cpu_HI[0], t1); 1897 tcg_gen_ext32s_tl(cpu_HI[0], t1);
1903 } 1898 }
@@ -1907,20 +1902,17 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc, @@ -1907,20 +1902,17 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
1907 case OPC_DDIV: 1902 case OPC_DDIV:
1908 { 1903 {
1909 int l1 = gen_new_label(); 1904 int l1 = gen_new_label();
  1905 + int l2 = gen_new_label();
1910 1906
1911 tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, l1); 1907 tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, l1);
1912 - {  
1913 - int l2 = gen_new_label();  
1914 -  
1915 - tcg_gen_brcondi_tl(TCG_COND_NE, t0, -1LL << 63, l2);  
1916 - tcg_gen_brcondi_tl(TCG_COND_NE, t1, -1LL, l2);  
1917 - tcg_gen_mov_tl(cpu_LO[0], t0);  
1918 - tcg_gen_movi_tl(cpu_HI[0], 0);  
1919 - tcg_gen_br(l1);  
1920 - gen_set_label(l2);  
1921 - tcg_gen_div_i64(cpu_LO[0], t0, t1);  
1922 - tcg_gen_rem_i64(cpu_HI[0], t0, t1);  
1923 - } 1908 + tcg_gen_brcondi_tl(TCG_COND_NE, t0, -1LL << 63, l2);
  1909 + tcg_gen_brcondi_tl(TCG_COND_NE, t1, -1LL, l2);
  1910 + tcg_gen_mov_tl(cpu_LO[0], t0);
  1911 + tcg_gen_movi_tl(cpu_HI[0], 0);
  1912 + tcg_gen_br(l1);
  1913 + gen_set_label(l2);
  1914 + tcg_gen_div_i64(cpu_LO[0], t0, t1);
  1915 + tcg_gen_rem_i64(cpu_HI[0], t0, t1);
1924 gen_set_label(l1); 1916 gen_set_label(l1);
1925 } 1917 }
1926 opn = "ddiv"; 1918 opn = "ddiv";
@@ -1947,19 +1939,19 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc, @@ -1947,19 +1939,19 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
1947 #endif 1939 #endif
1948 case OPC_MADD: 1940 case OPC_MADD:
1949 { 1941 {
1950 - TCGv_i64 r_tmp1 = tcg_temp_new_i64();  
1951 - TCGv_i64 r_tmp2 = tcg_temp_new_i64();  
1952 -  
1953 - tcg_gen_ext_tl_i64(r_tmp1, t0);  
1954 - tcg_gen_ext_tl_i64(r_tmp2, t1);  
1955 - tcg_gen_mul_i64(r_tmp1, r_tmp1, r_tmp2);  
1956 - tcg_gen_concat_tl_i64(r_tmp2, cpu_LO[0], cpu_HI[0]);  
1957 - tcg_gen_add_i64(r_tmp1, r_tmp1, r_tmp2);  
1958 - tcg_temp_free_i64(r_tmp2);  
1959 - tcg_gen_trunc_i64_tl(t0, r_tmp1);  
1960 - tcg_gen_shri_i64(r_tmp1, r_tmp1, 32);  
1961 - tcg_gen_trunc_i64_tl(t1, r_tmp1);  
1962 - tcg_temp_free_i64(r_tmp1); 1942 + TCGv_i64 t2 = tcg_temp_new_i64();
  1943 + TCGv_i64 t3 = tcg_temp_new_i64();
  1944 +
  1945 + tcg_gen_ext_tl_i64(t2, t0);
  1946 + tcg_gen_ext_tl_i64(t3, t1);
  1947 + tcg_gen_mul_i64(t2, t2, t3);
  1948 + tcg_gen_concat_tl_i64(t3, cpu_LO[0], cpu_HI[0]);
  1949 + tcg_gen_add_i64(t2, t2, t3);
  1950 + tcg_temp_free_i64(t3);
  1951 + tcg_gen_trunc_i64_tl(t0, t2);
  1952 + tcg_gen_shri_i64(t2, t2, 32);
  1953 + tcg_gen_trunc_i64_tl(t1, t2);
  1954 + tcg_temp_free_i64(t2);
1963 tcg_gen_ext32s_tl(cpu_LO[0], t0); 1955 tcg_gen_ext32s_tl(cpu_LO[0], t0);
1964 tcg_gen_ext32s_tl(cpu_LO[1], t1); 1956 tcg_gen_ext32s_tl(cpu_LO[1], t1);
1965 } 1957 }
@@ -1967,21 +1959,21 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc, @@ -1967,21 +1959,21 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
1967 break; 1959 break;
1968 case OPC_MADDU: 1960 case OPC_MADDU:
1969 { 1961 {
1970 - TCGv_i64 r_tmp1 = tcg_temp_new_i64();  
1971 - TCGv_i64 r_tmp2 = tcg_temp_new_i64(); 1962 + TCGv_i64 t2 = tcg_temp_new_i64();
  1963 + TCGv_i64 t3 = tcg_temp_new_i64();
1972 1964
1973 tcg_gen_ext32u_tl(t0, t0); 1965 tcg_gen_ext32u_tl(t0, t0);
1974 tcg_gen_ext32u_tl(t1, t1); 1966 tcg_gen_ext32u_tl(t1, t1);
1975 - tcg_gen_extu_tl_i64(r_tmp1, t0);  
1976 - tcg_gen_extu_tl_i64(r_tmp2, t1);  
1977 - tcg_gen_mul_i64(r_tmp1, r_tmp1, r_tmp2);  
1978 - tcg_gen_concat_tl_i64(r_tmp2, cpu_LO[0], cpu_HI[0]);  
1979 - tcg_gen_add_i64(r_tmp1, r_tmp1, r_tmp2);  
1980 - tcg_temp_free_i64(r_tmp2);  
1981 - tcg_gen_trunc_i64_tl(t0, r_tmp1);  
1982 - tcg_gen_shri_i64(r_tmp1, r_tmp1, 32);  
1983 - tcg_gen_trunc_i64_tl(t1, r_tmp1);  
1984 - tcg_temp_free_i64(r_tmp1); 1967 + tcg_gen_extu_tl_i64(t2, t0);
  1968 + tcg_gen_extu_tl_i64(t3, t1);
  1969 + tcg_gen_mul_i64(t2, t2, t3);
  1970 + tcg_gen_concat_tl_i64(t3, cpu_LO[0], cpu_HI[0]);
  1971 + tcg_gen_add_i64(t2, t2, t3);
  1972 + tcg_temp_free_i64(t3);
  1973 + tcg_gen_trunc_i64_tl(t0, t2);
  1974 + tcg_gen_shri_i64(t2, t2, 32);
  1975 + tcg_gen_trunc_i64_tl(t1, t2);
  1976 + tcg_temp_free_i64(t2);
1985 tcg_gen_ext32s_tl(cpu_LO[0], t0); 1977 tcg_gen_ext32s_tl(cpu_LO[0], t0);
1986 tcg_gen_ext32s_tl(cpu_HI[0], t1); 1978 tcg_gen_ext32s_tl(cpu_HI[0], t1);
1987 } 1979 }
@@ -1989,19 +1981,19 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc, @@ -1989,19 +1981,19 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
1989 break; 1981 break;
1990 case OPC_MSUB: 1982 case OPC_MSUB:
1991 { 1983 {
1992 - TCGv_i64 r_tmp1 = tcg_temp_new_i64();  
1993 - TCGv_i64 r_tmp2 = tcg_temp_new_i64();  
1994 -  
1995 - tcg_gen_ext_tl_i64(r_tmp1, t0);  
1996 - tcg_gen_ext_tl_i64(r_tmp2, t1);  
1997 - tcg_gen_mul_i64(r_tmp1, r_tmp1, r_tmp2);  
1998 - tcg_gen_concat_tl_i64(r_tmp2, cpu_LO[0], cpu_HI[0]);  
1999 - tcg_gen_sub_i64(r_tmp1, r_tmp1, r_tmp2);  
2000 - tcg_temp_free_i64(r_tmp2);  
2001 - tcg_gen_trunc_i64_tl(t0, r_tmp1);  
2002 - tcg_gen_shri_i64(r_tmp1, r_tmp1, 32);  
2003 - tcg_gen_trunc_i64_tl(t1, r_tmp1);  
2004 - tcg_temp_free_i64(r_tmp1); 1984 + TCGv_i64 t2 = tcg_temp_new_i64();
  1985 + TCGv_i64 t3 = tcg_temp_new_i64();
  1986 +
  1987 + tcg_gen_ext_tl_i64(t2, t0);
  1988 + tcg_gen_ext_tl_i64(t3, t1);
  1989 + tcg_gen_mul_i64(t2, t2, t3);
  1990 + tcg_gen_concat_tl_i64(t3, cpu_LO[0], cpu_HI[0]);
  1991 + tcg_gen_sub_i64(t2, t2, t3);
  1992 + tcg_temp_free_i64(t3);
  1993 + tcg_gen_trunc_i64_tl(t0, t2);
  1994 + tcg_gen_shri_i64(t2, t2, 32);
  1995 + tcg_gen_trunc_i64_tl(t1, t2);
  1996 + tcg_temp_free_i64(t2);
2005 tcg_gen_ext32s_tl(cpu_LO[0], t0); 1997 tcg_gen_ext32s_tl(cpu_LO[0], t0);
2006 tcg_gen_ext32s_tl(cpu_HI[0], t1); 1998 tcg_gen_ext32s_tl(cpu_HI[0], t1);
2007 } 1999 }
@@ -2009,21 +2001,21 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc, @@ -2009,21 +2001,21 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
2009 break; 2001 break;
2010 case OPC_MSUBU: 2002 case OPC_MSUBU:
2011 { 2003 {
2012 - TCGv_i64 r_tmp1 = tcg_temp_new_i64();  
2013 - TCGv_i64 r_tmp2 = tcg_temp_new_i64(); 2004 + TCGv_i64 t2 = tcg_temp_new_i64();
  2005 + TCGv_i64 t3 = tcg_temp_new_i64();
2014 2006
2015 tcg_gen_ext32u_tl(t0, t0); 2007 tcg_gen_ext32u_tl(t0, t0);
2016 tcg_gen_ext32u_tl(t1, t1); 2008 tcg_gen_ext32u_tl(t1, t1);
2017 - tcg_gen_extu_tl_i64(r_tmp1, t0);  
2018 - tcg_gen_extu_tl_i64(r_tmp2, t1);  
2019 - tcg_gen_mul_i64(r_tmp1, r_tmp1, r_tmp2);  
2020 - tcg_gen_concat_tl_i64(r_tmp2, cpu_LO[0], cpu_HI[0]);  
2021 - tcg_gen_sub_i64(r_tmp1, r_tmp1, r_tmp2);  
2022 - tcg_temp_free_i64(r_tmp2);  
2023 - tcg_gen_trunc_i64_tl(t0, r_tmp1);  
2024 - tcg_gen_shri_i64(r_tmp1, r_tmp1, 32);  
2025 - tcg_gen_trunc_i64_tl(t1, r_tmp1);  
2026 - tcg_temp_free_i64(r_tmp1); 2009 + tcg_gen_extu_tl_i64(t2, t0);
  2010 + tcg_gen_extu_tl_i64(t3, t1);
  2011 + tcg_gen_mul_i64(t2, t2, t3);
  2012 + tcg_gen_concat_tl_i64(t3, cpu_LO[0], cpu_HI[0]);
  2013 + tcg_gen_sub_i64(t2, t2, t3);
  2014 + tcg_temp_free_i64(t3);
  2015 + tcg_gen_trunc_i64_tl(t0, t2);
  2016 + tcg_gen_shri_i64(t2, t2, 32);
  2017 + tcg_gen_trunc_i64_tl(t1, t2);
  2018 + tcg_temp_free_i64(t2);
2027 tcg_gen_ext32s_tl(cpu_LO[0], t0); 2019 tcg_gen_ext32s_tl(cpu_LO[0], t0);
2028 tcg_gen_ext32s_tl(cpu_HI[0], t1); 2020 tcg_gen_ext32s_tl(cpu_HI[0], t1);
2029 } 2021 }