Commit 085339a12b448a64527ce4fdb6e1d43addbb16dc
1 parent
abd2c7dc
MMX/SSE test
git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@1204 c046a42c-6fe2-441c-8c8c-71466251a162
Showing
2 changed files
with
434 additions
and
3 deletions
tests/Makefile
tests/test-i386.c
... | ... | @@ -35,6 +35,7 @@ |
35 | 35 | #define TEST_VM86 |
36 | 36 | //#define LINUX_VM86_IOPL_FIX |
37 | 37 | //#define TEST_P4_FLAGS |
38 | +//#define TEST_SSE | |
38 | 39 | |
39 | 40 | #define xglue(x, y) x ## y |
40 | 41 | #define glue(x, y) xglue(x, y) |
... | ... | @@ -647,8 +648,8 @@ void test_fbcd(double a) |
647 | 648 | memset((env), 0xaa, sizeof(*(env)));\ |
648 | 649 | for(i=0;i<5;i++)\ |
649 | 650 | asm volatile ("fldl %0" : : "m" (dtab[i]));\ |
650 | - asm(save " %0\n" : : "m" (*(env)));\ | |
651 | - asm(restore " %0\n": : "m" (*(env)));\ | |
651 | + asm volatile (save " %0\n" : : "m" (*(env)));\ | |
652 | + asm volatile (restore " %0\n": : "m" (*(env)));\ | |
652 | 653 | for(i=0;i<5;i++)\ |
653 | 654 | asm volatile ("fstpl %0" : "=m" (rtab[i]));\ |
654 | 655 | for(i=0;i<5;i++)\ |
... | ... | @@ -1674,6 +1675,433 @@ static void test_enter(void) |
1674 | 1675 | TEST_ENTER("w", uint16_t, 31); |
1675 | 1676 | } |
1676 | 1677 | |
1678 | +#ifdef TEST_SSE | |
1679 | + | |
1680 | +typedef int __m64 __attribute__ ((__mode__ (__V2SI__))); | |
1681 | +typedef int __m128 __attribute__ ((__mode__(__V4SF__))); | |
1682 | + | |
1683 | +typedef union { | |
1684 | + double d[2]; | |
1685 | + float s[4]; | |
1686 | + uint32_t l[4]; | |
1687 | + uint64_t q[2]; | |
1688 | + __m128 dq; | |
1689 | +} XMMReg; | |
1690 | + | |
1691 | +static uint64_t __attribute__((aligned(16))) test_values[4][2] = { | |
1692 | + { 0x456723c698694873, 0xdc515cff944a58ec }, | |
1693 | + { 0x1f297ccd58bad7ab, 0x41f21efba9e3e146 }, | |
1694 | + { 0x007c62c2085427f8, 0x231be9e8cde7438d }, | |
1695 | + { 0x0f76255a085427f8, 0xc233e9e8c4c9439a }, | |
1696 | +}; | |
1697 | + | |
1698 | +#define SSE_OP(op)\ | |
1699 | +{\ | |
1700 | + asm volatile (#op " %2, %0" : "=x" (r.dq) : "0" (a.dq), "x" (b.dq));\ | |
1701 | + printf("%-9s: a=%016llx%016llx b=%016llx%016llx r=%016llx%016llx\n",\ | |
1702 | + #op,\ | |
1703 | + a.q[1], a.q[0],\ | |
1704 | + b.q[1], b.q[0],\ | |
1705 | + r.q[1], r.q[0]);\ | |
1706 | +} | |
1707 | + | |
1708 | +#define SSE_OP2(op)\ | |
1709 | +{\ | |
1710 | + int i;\ | |
1711 | + for(i=0;i<2;i++) {\ | |
1712 | + a.q[0] = test_values[2*i][0];\ | |
1713 | + a.q[1] = test_values[2*i][1];\ | |
1714 | + b.q[0] = test_values[2*i+1][0];\ | |
1715 | + b.q[1] = test_values[2*i+1][1];\ | |
1716 | + SSE_OP(op);\ | |
1717 | + }\ | |
1718 | +} | |
1719 | + | |
1720 | +#define MMX_OP2(op)\ | |
1721 | +{\ | |
1722 | + int i;\ | |
1723 | + for(i=0;i<2;i++) {\ | |
1724 | + a.q[0] = test_values[2*i][0];\ | |
1725 | + b.q[0] = test_values[2*i+1][0];\ | |
1726 | + asm volatile (#op " %2, %0" : "=y" (r.q[0]) : "0" (a.q[0]), "y" (b.q[0]));\ | |
1727 | + printf("%-9s: a=%016llx b=%016llx r=%016llx\n",\ | |
1728 | + #op,\ | |
1729 | + a.q[0],\ | |
1730 | + b.q[0],\ | |
1731 | + r.q[0]);\ | |
1732 | + }\ | |
1733 | + SSE_OP2(op);\ | |
1734 | +} | |
1735 | + | |
1736 | +#define PSHUF_OP(op, ib)\ | |
1737 | +{\ | |
1738 | + int i;\ | |
1739 | + for(i=0;i<2;i++) {\ | |
1740 | + a.q[0] = test_values[2*i][0];\ | |
1741 | + a.q[1] = test_values[2*i][1];\ | |
1742 | + asm volatile (#op " $" #ib ", %1, %0" : "=x" (r.dq) : "x" (a.dq));\ | |
1743 | + printf("%-9s: a=%016llx%016llx ib=%02x r=%016llx%016llx\n",\ | |
1744 | + #op,\ | |
1745 | + a.q[1], a.q[0],\ | |
1746 | + ib,\ | |
1747 | + r.q[1], r.q[0]);\ | |
1748 | + }\ | |
1749 | +} | |
1750 | + | |
1751 | +#define SHIFT_IM(op, ib)\ | |
1752 | +{\ | |
1753 | + int i;\ | |
1754 | + for(i=0;i<2;i++) {\ | |
1755 | + a.q[0] = test_values[2*i][0];\ | |
1756 | + a.q[1] = test_values[2*i][1];\ | |
1757 | + asm volatile (#op " $" #ib ", %0" : "=x" (r.dq) : "0" (a.dq));\ | |
1758 | + printf("%-9s: a=%016llx%016llx ib=%02x r=%016llx%016llx\n",\ | |
1759 | + #op,\ | |
1760 | + a.q[1], a.q[0],\ | |
1761 | + ib,\ | |
1762 | + r.q[1], r.q[0]);\ | |
1763 | + }\ | |
1764 | +} | |
1765 | + | |
1766 | +#define SHIFT_OP(op, ib)\ | |
1767 | +{\ | |
1768 | + int i;\ | |
1769 | + SHIFT_IM(op, ib);\ | |
1770 | + for(i=0;i<2;i++) {\ | |
1771 | + a.q[0] = test_values[2*i][0];\ | |
1772 | + a.q[1] = test_values[2*i][1];\ | |
1773 | + b.q[0] = ib;\ | |
1774 | + b.q[1] = 0;\ | |
1775 | + asm volatile (#op " %2, %0" : "=x" (r.dq) : "0" (a.dq), "x" (b.dq));\ | |
1776 | + printf("%-9s: a=%016llx%016llx b=%016llx%016llx r=%016llx%016llx\n",\ | |
1777 | + #op,\ | |
1778 | + a.q[1], a.q[0],\ | |
1779 | + b.q[1], b.q[0],\ | |
1780 | + r.q[1], r.q[0]);\ | |
1781 | + }\ | |
1782 | +} | |
1783 | + | |
1784 | +#define MOVMSK(op)\ | |
1785 | +{\ | |
1786 | + int i, reg;\ | |
1787 | + for(i=0;i<2;i++) {\ | |
1788 | + a.q[0] = test_values[2*i][0];\ | |
1789 | + a.q[1] = test_values[2*i][1];\ | |
1790 | + asm volatile (#op " %1, %0" : "=r" (reg) : "x" (a.dq));\ | |
1791 | + printf("%-9s: a=%016llx%016llx r=%08x\n",\ | |
1792 | + #op,\ | |
1793 | + a.q[1], a.q[0],\ | |
1794 | + reg);\ | |
1795 | + }\ | |
1796 | +} | |
1797 | + | |
1798 | +#define SSE_OPS(a) \ | |
1799 | +SSE_OP(a ## ps);\ | |
1800 | +SSE_OP(a ## ss); | |
1801 | + | |
1802 | +#define SSE_OPD(a) \ | |
1803 | +SSE_OP(a ## pd);\ | |
1804 | +SSE_OP(a ## sd); | |
1805 | + | |
1806 | +#define SSE_COMI(op, field)\ | |
1807 | +{\ | |
1808 | + unsigned int eflags;\ | |
1809 | + XMMReg a, b;\ | |
1810 | + a.field[0] = a1;\ | |
1811 | + b.field[0] = b1;\ | |
1812 | + asm volatile (#op " %2, %1\n"\ | |
1813 | + "pushf\n"\ | |
1814 | + "pop %0\n"\ | |
1815 | + : "=m" (eflags)\ | |
1816 | + : "x" (a.dq), "x" (b.dq));\ | |
1817 | + printf("%-9s: a=%f b=%f cc=%04x\n",\ | |
1818 | + #op, a1, b1,\ | |
1819 | + eflags & (CC_C | CC_P | CC_Z | CC_S | CC_O | CC_A));\ | |
1820 | +} | |
1821 | + | |
1822 | +void test_sse_comi(double a1, double b1) | |
1823 | +{ | |
1824 | + SSE_COMI(ucomiss, s); | |
1825 | + SSE_COMI(ucomisd, d); | |
1826 | + SSE_COMI(comiss, s); | |
1827 | + SSE_COMI(comisd, d); | |
1828 | +} | |
1829 | + | |
1830 | +#define CVT_OP_XMM(op)\ | |
1831 | +{\ | |
1832 | + asm volatile (#op " %1, %0" : "=x" (r.dq) : "x" (a.dq));\ | |
1833 | + printf("%-9s: a=%016llx%016llx r=%016llx%016llx\n",\ | |
1834 | + #op,\ | |
1835 | + a.q[1], a.q[0],\ | |
1836 | + r.q[1], r.q[0]);\ | |
1837 | +} | |
1838 | + | |
1839 | +#define CVT_OP_XMM2MMX(op)\ | |
1840 | +{\ | |
1841 | + asm volatile (#op " %1, %0" : "=y" (r.q[0]) : "x" (a.dq));\ | |
1842 | + printf("%-9s: a=%016llx%016llx r=%016llx\n",\ | |
1843 | + #op,\ | |
1844 | + a.q[1], a.q[0],\ | |
1845 | + r.q[0]);\ | |
1846 | +} | |
1847 | + | |
1848 | +#define CVT_OP_MMX2XMM(op)\ | |
1849 | +{\ | |
1850 | + asm volatile (#op " %1, %0" : "=x" (r.dq) : "y" (a.q[0]));\ | |
1851 | + printf("%-9s: a=%016llx r=%016llx%016llx\n",\ | |
1852 | + #op,\ | |
1853 | + a.q[0],\ | |
1854 | + r.q[1], r.q[0]);\ | |
1855 | +} | |
1856 | + | |
1857 | +#define CVT_OP_REG2XMM(op)\ | |
1858 | +{\ | |
1859 | + asm volatile (#op " %1, %0" : "=x" (r.dq) : "r" (a.l[0]));\ | |
1860 | + printf("%-9s: a=%08x r=%016llx%016llx\n",\ | |
1861 | + #op,\ | |
1862 | + a.l[0],\ | |
1863 | + r.q[1], r.q[0]);\ | |
1864 | +} | |
1865 | + | |
1866 | +#define CVT_OP_XMM2REG(op)\ | |
1867 | +{\ | |
1868 | + asm volatile (#op " %1, %0" : "=r" (r.l[0]) : "x" (a.dq));\ | |
1869 | + printf("%-9s: a=%016llx%016llx r=%08x\n",\ | |
1870 | + #op,\ | |
1871 | + a.q[1], a.q[0],\ | |
1872 | + r.l[0]);\ | |
1873 | +} | |
1874 | + | |
1875 | +void test_sse(void) | |
1876 | +{ | |
1877 | + XMMReg r, a, b; | |
1878 | + | |
1879 | + MMX_OP2(punpcklbw); | |
1880 | + MMX_OP2(punpcklwd); | |
1881 | + MMX_OP2(punpckldq); | |
1882 | + MMX_OP2(packsswb); | |
1883 | + MMX_OP2(pcmpgtb); | |
1884 | + MMX_OP2(pcmpgtw); | |
1885 | + MMX_OP2(pcmpgtd); | |
1886 | + MMX_OP2(packuswb); | |
1887 | + MMX_OP2(punpckhbw); | |
1888 | + MMX_OP2(punpckhwd); | |
1889 | + MMX_OP2(punpckhdq); | |
1890 | + MMX_OP2(packssdw); | |
1891 | + MMX_OP2(pcmpeqb); | |
1892 | + MMX_OP2(pcmpeqw); | |
1893 | + MMX_OP2(pcmpeqd); | |
1894 | + | |
1895 | + MMX_OP2(paddq); | |
1896 | + MMX_OP2(pmullw); | |
1897 | + MMX_OP2(psubusb); | |
1898 | + MMX_OP2(psubusw); | |
1899 | + MMX_OP2(pminub); | |
1900 | + MMX_OP2(pand); | |
1901 | + MMX_OP2(paddusb); | |
1902 | + MMX_OP2(paddusw); | |
1903 | + MMX_OP2(pmaxub); | |
1904 | + MMX_OP2(pandn); | |
1905 | + | |
1906 | + MMX_OP2(pmulhuw); | |
1907 | + MMX_OP2(pmulhw); | |
1908 | + | |
1909 | + MMX_OP2(psubsb); | |
1910 | + MMX_OP2(psubsw); | |
1911 | + MMX_OP2(pminsw); | |
1912 | + MMX_OP2(por); | |
1913 | + MMX_OP2(paddsb); | |
1914 | + MMX_OP2(paddsw); | |
1915 | + MMX_OP2(pmaxsw); | |
1916 | + MMX_OP2(pxor); | |
1917 | + MMX_OP2(pmuludq); | |
1918 | + MMX_OP2(pmaddwd); | |
1919 | + MMX_OP2(psadbw); | |
1920 | + MMX_OP2(psubb); | |
1921 | + MMX_OP2(psubw); | |
1922 | + MMX_OP2(psubd); | |
1923 | + MMX_OP2(psubq); | |
1924 | + MMX_OP2(paddb); | |
1925 | + MMX_OP2(paddw); | |
1926 | + MMX_OP2(paddd); | |
1927 | + | |
1928 | + MMX_OP2(pavgb); | |
1929 | + MMX_OP2(pavgw); | |
1930 | + | |
1931 | + asm volatile ("pinsrw $1, %1, %0" : "=y" (r.q[0]) : "r" (0x12345678)); | |
1932 | + printf("%-9s: r=%016llx\n", "pinsrw", r.q[0]); | |
1933 | + | |
1934 | + asm volatile ("pinsrw $5, %1, %0" : "=x" (r.dq) : "r" (0x12345678)); | |
1935 | + printf("%-9s: r=%016llx%016llx\n", "pinsrw", r.q[1], r.q[0]); | |
1936 | + | |
1937 | + a.q[0] = test_values[0][0]; | |
1938 | + a.q[1] = test_values[0][1]; | |
1939 | + asm volatile ("pextrw $1, %1, %0" : "=r" (r.l[0]) : "y" (a.q[0])); | |
1940 | + printf("%-9s: r=%08x\n", "pextrw", r.l[0]); | |
1941 | + | |
1942 | + asm volatile ("pextrw $5, %1, %0" : "=r" (r.l[0]) : "x" (a.dq)); | |
1943 | + printf("%-9s: r=%08x\n", "pextrw", r.l[0]); | |
1944 | + | |
1945 | + asm volatile ("pmovmskb %1, %0" : "=r" (r.l[0]) : "y" (a.q[0])); | |
1946 | + printf("%-9s: r=%08x\n", "pmovmskb", r.l[0]); | |
1947 | + | |
1948 | + asm volatile ("pmovmskb %1, %0" : "=r" (r.l[0]) : "x" (a.dq)); | |
1949 | + printf("%-9s: r=%08x\n", "pmovmskb", r.l[0]); | |
1950 | + | |
1951 | + asm volatile ("emms"); | |
1952 | + | |
1953 | + SSE_OP2(punpcklqdq); | |
1954 | + SSE_OP2(punpckhqdq); | |
1955 | + SSE_OP2(andps); | |
1956 | + SSE_OP2(andpd); | |
1957 | + SSE_OP2(andnps); | |
1958 | + SSE_OP2(andnpd); | |
1959 | + SSE_OP2(orps); | |
1960 | + SSE_OP2(orpd); | |
1961 | + SSE_OP2(xorps); | |
1962 | + SSE_OP2(xorpd); | |
1963 | + | |
1964 | + SSE_OP2(unpcklps); | |
1965 | + SSE_OP2(unpcklpd); | |
1966 | + SSE_OP2(unpckhps); | |
1967 | + SSE_OP2(unpckhpd); | |
1968 | + | |
1969 | + PSHUF_OP(shufps, 0x78); | |
1970 | + PSHUF_OP(shufpd, 0x02); | |
1971 | + | |
1972 | + PSHUF_OP(pshufd, 0x78); | |
1973 | + PSHUF_OP(pshuflw, 0x78); | |
1974 | + PSHUF_OP(pshufhw, 0x78); | |
1975 | + | |
1976 | + SHIFT_OP(psrlw, 7); | |
1977 | + SHIFT_OP(psrlw, 16); | |
1978 | + SHIFT_OP(psraw, 7); | |
1979 | + SHIFT_OP(psraw, 16); | |
1980 | + SHIFT_OP(psllw, 7); | |
1981 | + SHIFT_OP(psllw, 16); | |
1982 | + | |
1983 | + SHIFT_OP(psrld, 7); | |
1984 | + SHIFT_OP(psrld, 32); | |
1985 | + SHIFT_OP(psrad, 7); | |
1986 | + SHIFT_OP(psrad, 32); | |
1987 | + SHIFT_OP(pslld, 7); | |
1988 | + SHIFT_OP(pslld, 32); | |
1989 | + | |
1990 | + SHIFT_OP(psrlq, 7); | |
1991 | + SHIFT_OP(psrlq, 32); | |
1992 | + SHIFT_OP(psllq, 7); | |
1993 | + SHIFT_OP(psllq, 32); | |
1994 | + | |
1995 | + SHIFT_IM(psrldq, 16); | |
1996 | + SHIFT_IM(psrldq, 7); | |
1997 | + SHIFT_IM(pslldq, 16); | |
1998 | + SHIFT_IM(pslldq, 7); | |
1999 | + | |
2000 | + MOVMSK(movmskps); | |
2001 | + MOVMSK(movmskpd); | |
2002 | + | |
2003 | + /* FPU specific ops */ | |
2004 | + | |
2005 | + { | |
2006 | + uint32_t mxcsr; | |
2007 | + asm volatile("stmxcsr %0" : "=m" (mxcsr)); | |
2008 | + printf("mxcsr=%08x\n", mxcsr & 0x1f80); | |
2009 | + asm volatile("ldmxcsr %0" : : "m" (mxcsr)); | |
2010 | + } | |
2011 | + | |
2012 | + test_sse_comi(2, -1); | |
2013 | + test_sse_comi(2, 2); | |
2014 | + test_sse_comi(2, 3); | |
2015 | + | |
2016 | + a.s[0] = 2.7; | |
2017 | + a.s[1] = 3.4; | |
2018 | + a.s[2] = 4; | |
2019 | + a.s[3] = -6.3; | |
2020 | + b.s[0] = 45.7; | |
2021 | + b.s[1] = 353.4; | |
2022 | + b.s[2] = 4; | |
2023 | + b.s[3] = 56.3; | |
2024 | + SSE_OPS(add); | |
2025 | + SSE_OPS(mul); | |
2026 | + SSE_OPS(sub); | |
2027 | + SSE_OPS(min); | |
2028 | + SSE_OPS(div); | |
2029 | + SSE_OPS(max); | |
2030 | + SSE_OPS(sqrt); | |
2031 | + SSE_OPS(cmpeq); | |
2032 | + SSE_OPS(cmplt); | |
2033 | + SSE_OPS(cmple); | |
2034 | + SSE_OPS(cmpunord); | |
2035 | + SSE_OPS(cmpneq); | |
2036 | + SSE_OPS(cmpnlt); | |
2037 | + SSE_OPS(cmpnle); | |
2038 | + SSE_OPS(cmpord); | |
2039 | + | |
2040 | + a.d[0] = 2.7; | |
2041 | + a.d[1] = -3.4; | |
2042 | + b.d[0] = 45.7; | |
2043 | + b.d[1] = -53.4; | |
2044 | + SSE_OPD(add); | |
2045 | + SSE_OPD(mul); | |
2046 | + SSE_OPD(sub); | |
2047 | + SSE_OPD(min); | |
2048 | + SSE_OPD(div); | |
2049 | + SSE_OPD(max); | |
2050 | + SSE_OPD(sqrt); | |
2051 | + SSE_OPD(cmpeq); | |
2052 | + SSE_OPD(cmplt); | |
2053 | + SSE_OPD(cmple); | |
2054 | + SSE_OPD(cmpunord); | |
2055 | + SSE_OPD(cmpneq); | |
2056 | + SSE_OPD(cmpnlt); | |
2057 | + SSE_OPD(cmpnle); | |
2058 | + SSE_OPD(cmpord); | |
2059 | + | |
2060 | + /* float to float/int */ | |
2061 | + a.s[0] = 2.7; | |
2062 | + a.s[1] = 3.4; | |
2063 | + a.s[2] = 4; | |
2064 | + a.s[3] = -6.3; | |
2065 | + CVT_OP_XMM(cvtps2pd); | |
2066 | + CVT_OP_XMM(cvtss2sd); | |
2067 | + CVT_OP_XMM2MMX(cvtps2pi); | |
2068 | + CVT_OP_XMM2MMX(cvttps2pi); | |
2069 | + CVT_OP_XMM2REG(cvtss2si); | |
2070 | + CVT_OP_XMM2REG(cvttss2si); | |
2071 | + CVT_OP_XMM(cvtps2dq); | |
2072 | + CVT_OP_XMM(cvttps2dq); | |
2073 | + | |
2074 | + a.d[0] = 2.6; | |
2075 | + a.d[1] = -3.4; | |
2076 | + CVT_OP_XMM(cvtpd2ps); | |
2077 | + CVT_OP_XMM(cvtsd2ss); | |
2078 | + CVT_OP_XMM2MMX(cvtpd2pi); | |
2079 | + CVT_OP_XMM2MMX(cvttpd2pi); | |
2080 | + CVT_OP_XMM2REG(cvtsd2si); | |
2081 | + CVT_OP_XMM2REG(cvttsd2si); | |
2082 | + CVT_OP_XMM(cvtpd2dq); | |
2083 | + CVT_OP_XMM(cvttpd2dq); | |
2084 | + | |
2085 | + /* int to float */ | |
2086 | + a.l[0] = -6; | |
2087 | + a.l[1] = 2; | |
2088 | + a.l[2] = 100; | |
2089 | + a.l[3] = -60000; | |
2090 | + CVT_OP_MMX2XMM(cvtpi2ps); | |
2091 | + CVT_OP_MMX2XMM(cvtpi2pd); | |
2092 | + CVT_OP_REG2XMM(cvtsi2ss); | |
2093 | + CVT_OP_REG2XMM(cvtsi2sd); | |
2094 | + CVT_OP_XMM(cvtdq2ps); | |
2095 | + CVT_OP_XMM(cvtdq2pd); | |
2096 | + | |
2097 | + /* XXX: test PNI insns */ | |
2098 | +#if 0 | |
2099 | + SSE_OP2(movshdup); | |
2100 | +#endif | |
2101 | +} | |
2102 | + | |
2103 | +#endif | |
2104 | + | |
1677 | 2105 | static void *call_end __init_call = NULL; |
1678 | 2106 | |
1679 | 2107 | int main(int argc, char **argv) |
... | ... | @@ -1704,5 +2132,8 @@ int main(int argc, char **argv) |
1704 | 2132 | test_self_modifying_code(); |
1705 | 2133 | test_single_step(); |
1706 | 2134 | test_enter(); |
2135 | +#ifdef TEST_SSE | |
2136 | + test_sse(); | |
2137 | +#endif | |
1707 | 2138 | return 0; |
1708 | 2139 | } | ... | ... |