Commit 085339a12b448a64527ce4fdb6e1d43addbb16dc

Authored by bellard
1 parent abd2c7dc

MMX/SSE test


git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@1204 c046a42c-6fe2-441c-8c8c-71466251a162
tests/Makefile
1 1 -include ../config-host.mak
2 2  
3   -CFLAGS=-Wall -O2 -g
  3 +CFLAGS=-Wall -O2 -g #-msse2
4 4 LDFLAGS=
5 5  
6 6 ifeq ($(ARCH),i386)
... ...
tests/test-i386.c
... ... @@ -35,6 +35,7 @@
35 35 #define TEST_VM86
36 36 //#define LINUX_VM86_IOPL_FIX
37 37 //#define TEST_P4_FLAGS
  38 +//#define TEST_SSE
38 39  
39 40 #define xglue(x, y) x ## y
40 41 #define glue(x, y) xglue(x, y)
... ... @@ -647,8 +648,8 @@ void test_fbcd(double a)
647 648 memset((env), 0xaa, sizeof(*(env)));\
648 649 for(i=0;i<5;i++)\
649 650 asm volatile ("fldl %0" : : "m" (dtab[i]));\
650   - asm(save " %0\n" : : "m" (*(env)));\
651   - asm(restore " %0\n": : "m" (*(env)));\
  651 + asm volatile (save " %0\n" : : "m" (*(env)));\
  652 + asm volatile (restore " %0\n": : "m" (*(env)));\
652 653 for(i=0;i<5;i++)\
653 654 asm volatile ("fstpl %0" : "=m" (rtab[i]));\
654 655 for(i=0;i<5;i++)\
... ... @@ -1674,6 +1675,433 @@ static void test_enter(void)
1674 1675 TEST_ENTER("w", uint16_t, 31);
1675 1676 }
1676 1677  
  1678 +#ifdef TEST_SSE
  1679 +
  1680 +typedef int __m64 __attribute__ ((__mode__ (__V2SI__)));
  1681 +typedef int __m128 __attribute__ ((__mode__(__V4SF__)));
  1682 +
  1683 +typedef union {
  1684 + double d[2];
  1685 + float s[4];
  1686 + uint32_t l[4];
  1687 + uint64_t q[2];
  1688 + __m128 dq;
  1689 +} XMMReg;
  1690 +
  1691 +static uint64_t __attribute__((aligned(16))) test_values[4][2] = {
  1692 + { 0x456723c698694873, 0xdc515cff944a58ec },
  1693 + { 0x1f297ccd58bad7ab, 0x41f21efba9e3e146 },
  1694 + { 0x007c62c2085427f8, 0x231be9e8cde7438d },
  1695 + { 0x0f76255a085427f8, 0xc233e9e8c4c9439a },
  1696 +};
  1697 +
  1698 +#define SSE_OP(op)\
  1699 +{\
  1700 + asm volatile (#op " %2, %0" : "=x" (r.dq) : "0" (a.dq), "x" (b.dq));\
  1701 + printf("%-9s: a=%016llx%016llx b=%016llx%016llx r=%016llx%016llx\n",\
  1702 + #op,\
  1703 + a.q[1], a.q[0],\
  1704 + b.q[1], b.q[0],\
  1705 + r.q[1], r.q[0]);\
  1706 +}
  1707 +
  1708 +#define SSE_OP2(op)\
  1709 +{\
  1710 + int i;\
  1711 + for(i=0;i<2;i++) {\
  1712 + a.q[0] = test_values[2*i][0];\
  1713 + a.q[1] = test_values[2*i][1];\
  1714 + b.q[0] = test_values[2*i+1][0];\
  1715 + b.q[1] = test_values[2*i+1][1];\
  1716 + SSE_OP(op);\
  1717 + }\
  1718 +}
  1719 +
  1720 +#define MMX_OP2(op)\
  1721 +{\
  1722 + int i;\
  1723 + for(i=0;i<2;i++) {\
  1724 + a.q[0] = test_values[2*i][0];\
  1725 + b.q[0] = test_values[2*i+1][0];\
  1726 + asm volatile (#op " %2, %0" : "=y" (r.q[0]) : "0" (a.q[0]), "y" (b.q[0]));\
  1727 + printf("%-9s: a=%016llx b=%016llx r=%016llx\n",\
  1728 + #op,\
  1729 + a.q[0],\
  1730 + b.q[0],\
  1731 + r.q[0]);\
  1732 + }\
  1733 + SSE_OP2(op);\
  1734 +}
  1735 +
  1736 +#define PSHUF_OP(op, ib)\
  1737 +{\
  1738 + int i;\
  1739 + for(i=0;i<2;i++) {\
  1740 + a.q[0] = test_values[2*i][0];\
  1741 + a.q[1] = test_values[2*i][1];\
  1742 + asm volatile (#op " $" #ib ", %1, %0" : "=x" (r.dq) : "x" (a.dq));\
  1743 + printf("%-9s: a=%016llx%016llx ib=%02x r=%016llx%016llx\n",\
  1744 + #op,\
  1745 + a.q[1], a.q[0],\
  1746 + ib,\
  1747 + r.q[1], r.q[0]);\
  1748 + }\
  1749 +}
  1750 +
  1751 +#define SHIFT_IM(op, ib)\
  1752 +{\
  1753 + int i;\
  1754 + for(i=0;i<2;i++) {\
  1755 + a.q[0] = test_values[2*i][0];\
  1756 + a.q[1] = test_values[2*i][1];\
  1757 + asm volatile (#op " $" #ib ", %0" : "=x" (r.dq) : "0" (a.dq));\
  1758 + printf("%-9s: a=%016llx%016llx ib=%02x r=%016llx%016llx\n",\
  1759 + #op,\
  1760 + a.q[1], a.q[0],\
  1761 + ib,\
  1762 + r.q[1], r.q[0]);\
  1763 + }\
  1764 +}
  1765 +
  1766 +#define SHIFT_OP(op, ib)\
  1767 +{\
  1768 + int i;\
  1769 + SHIFT_IM(op, ib);\
  1770 + for(i=0;i<2;i++) {\
  1771 + a.q[0] = test_values[2*i][0];\
  1772 + a.q[1] = test_values[2*i][1];\
  1773 + b.q[0] = ib;\
  1774 + b.q[1] = 0;\
  1775 + asm volatile (#op " %2, %0" : "=x" (r.dq) : "0" (a.dq), "x" (b.dq));\
  1776 + printf("%-9s: a=%016llx%016llx b=%016llx%016llx r=%016llx%016llx\n",\
  1777 + #op,\
  1778 + a.q[1], a.q[0],\
  1779 + b.q[1], b.q[0],\
  1780 + r.q[1], r.q[0]);\
  1781 + }\
  1782 +}
  1783 +
  1784 +#define MOVMSK(op)\
  1785 +{\
  1786 + int i, reg;\
  1787 + for(i=0;i<2;i++) {\
  1788 + a.q[0] = test_values[2*i][0];\
  1789 + a.q[1] = test_values[2*i][1];\
  1790 + asm volatile (#op " %1, %0" : "=r" (reg) : "x" (a.dq));\
  1791 + printf("%-9s: a=%016llx%016llx r=%08x\n",\
  1792 + #op,\
  1793 + a.q[1], a.q[0],\
  1794 + reg);\
  1795 + }\
  1796 +}
  1797 +
  1798 +#define SSE_OPS(a) \
  1799 +SSE_OP(a ## ps);\
  1800 +SSE_OP(a ## ss);
  1801 +
  1802 +#define SSE_OPD(a) \
  1803 +SSE_OP(a ## pd);\
  1804 +SSE_OP(a ## sd);
  1805 +
  1806 +#define SSE_COMI(op, field)\
  1807 +{\
  1808 + unsigned int eflags;\
  1809 + XMMReg a, b;\
  1810 + a.field[0] = a1;\
  1811 + b.field[0] = b1;\
  1812 + asm volatile (#op " %2, %1\n"\
  1813 + "pushf\n"\
  1814 + "pop %0\n"\
  1815 + : "=m" (eflags)\
  1816 + : "x" (a.dq), "x" (b.dq));\
  1817 + printf("%-9s: a=%f b=%f cc=%04x\n",\
  1818 + #op, a1, b1,\
  1819 + eflags & (CC_C | CC_P | CC_Z | CC_S | CC_O | CC_A));\
  1820 +}
  1821 +
  1822 +void test_sse_comi(double a1, double b1)
  1823 +{
  1824 + SSE_COMI(ucomiss, s);
  1825 + SSE_COMI(ucomisd, d);
  1826 + SSE_COMI(comiss, s);
  1827 + SSE_COMI(comisd, d);
  1828 +}
  1829 +
  1830 +#define CVT_OP_XMM(op)\
  1831 +{\
  1832 + asm volatile (#op " %1, %0" : "=x" (r.dq) : "x" (a.dq));\
  1833 + printf("%-9s: a=%016llx%016llx r=%016llx%016llx\n",\
  1834 + #op,\
  1835 + a.q[1], a.q[0],\
  1836 + r.q[1], r.q[0]);\
  1837 +}
  1838 +
  1839 +#define CVT_OP_XMM2MMX(op)\
  1840 +{\
  1841 + asm volatile (#op " %1, %0" : "=y" (r.q[0]) : "x" (a.dq));\
  1842 + printf("%-9s: a=%016llx%016llx r=%016llx\n",\
  1843 + #op,\
  1844 + a.q[1], a.q[0],\
  1845 + r.q[0]);\
  1846 +}
  1847 +
  1848 +#define CVT_OP_MMX2XMM(op)\
  1849 +{\
  1850 + asm volatile (#op " %1, %0" : "=x" (r.dq) : "y" (a.q[0]));\
  1851 + printf("%-9s: a=%016llx r=%016llx%016llx\n",\
  1852 + #op,\
  1853 + a.q[0],\
  1854 + r.q[1], r.q[0]);\
  1855 +}
  1856 +
  1857 +#define CVT_OP_REG2XMM(op)\
  1858 +{\
  1859 + asm volatile (#op " %1, %0" : "=x" (r.dq) : "r" (a.l[0]));\
  1860 + printf("%-9s: a=%08x r=%016llx%016llx\n",\
  1861 + #op,\
  1862 + a.l[0],\
  1863 + r.q[1], r.q[0]);\
  1864 +}
  1865 +
  1866 +#define CVT_OP_XMM2REG(op)\
  1867 +{\
  1868 + asm volatile (#op " %1, %0" : "=r" (r.l[0]) : "x" (a.dq));\
  1869 + printf("%-9s: a=%016llx%016llx r=%08x\n",\
  1870 + #op,\
  1871 + a.q[1], a.q[0],\
  1872 + r.l[0]);\
  1873 +}
  1874 +
  1875 +void test_sse(void)
  1876 +{
  1877 + XMMReg r, a, b;
  1878 +
  1879 + MMX_OP2(punpcklbw);
  1880 + MMX_OP2(punpcklwd);
  1881 + MMX_OP2(punpckldq);
  1882 + MMX_OP2(packsswb);
  1883 + MMX_OP2(pcmpgtb);
  1884 + MMX_OP2(pcmpgtw);
  1885 + MMX_OP2(pcmpgtd);
  1886 + MMX_OP2(packuswb);
  1887 + MMX_OP2(punpckhbw);
  1888 + MMX_OP2(punpckhwd);
  1889 + MMX_OP2(punpckhdq);
  1890 + MMX_OP2(packssdw);
  1891 + MMX_OP2(pcmpeqb);
  1892 + MMX_OP2(pcmpeqw);
  1893 + MMX_OP2(pcmpeqd);
  1894 +
  1895 + MMX_OP2(paddq);
  1896 + MMX_OP2(pmullw);
  1897 + MMX_OP2(psubusb);
  1898 + MMX_OP2(psubusw);
  1899 + MMX_OP2(pminub);
  1900 + MMX_OP2(pand);
  1901 + MMX_OP2(paddusb);
  1902 + MMX_OP2(paddusw);
  1903 + MMX_OP2(pmaxub);
  1904 + MMX_OP2(pandn);
  1905 +
  1906 + MMX_OP2(pmulhuw);
  1907 + MMX_OP2(pmulhw);
  1908 +
  1909 + MMX_OP2(psubsb);
  1910 + MMX_OP2(psubsw);
  1911 + MMX_OP2(pminsw);
  1912 + MMX_OP2(por);
  1913 + MMX_OP2(paddsb);
  1914 + MMX_OP2(paddsw);
  1915 + MMX_OP2(pmaxsw);
  1916 + MMX_OP2(pxor);
  1917 + MMX_OP2(pmuludq);
  1918 + MMX_OP2(pmaddwd);
  1919 + MMX_OP2(psadbw);
  1920 + MMX_OP2(psubb);
  1921 + MMX_OP2(psubw);
  1922 + MMX_OP2(psubd);
  1923 + MMX_OP2(psubq);
  1924 + MMX_OP2(paddb);
  1925 + MMX_OP2(paddw);
  1926 + MMX_OP2(paddd);
  1927 +
  1928 + MMX_OP2(pavgb);
  1929 + MMX_OP2(pavgw);
  1930 +
  1931 + asm volatile ("pinsrw $1, %1, %0" : "=y" (r.q[0]) : "r" (0x12345678));
  1932 + printf("%-9s: r=%016llx\n", "pinsrw", r.q[0]);
  1933 +
  1934 + asm volatile ("pinsrw $5, %1, %0" : "=x" (r.dq) : "r" (0x12345678));
  1935 + printf("%-9s: r=%016llx%016llx\n", "pinsrw", r.q[1], r.q[0]);
  1936 +
  1937 + a.q[0] = test_values[0][0];
  1938 + a.q[1] = test_values[0][1];
  1939 + asm volatile ("pextrw $1, %1, %0" : "=r" (r.l[0]) : "y" (a.q[0]));
  1940 + printf("%-9s: r=%08x\n", "pextrw", r.l[0]);
  1941 +
  1942 + asm volatile ("pextrw $5, %1, %0" : "=r" (r.l[0]) : "x" (a.dq));
  1943 + printf("%-9s: r=%08x\n", "pextrw", r.l[0]);
  1944 +
  1945 + asm volatile ("pmovmskb %1, %0" : "=r" (r.l[0]) : "y" (a.q[0]));
  1946 + printf("%-9s: r=%08x\n", "pmovmskb", r.l[0]);
  1947 +
  1948 + asm volatile ("pmovmskb %1, %0" : "=r" (r.l[0]) : "x" (a.dq));
  1949 + printf("%-9s: r=%08x\n", "pmovmskb", r.l[0]);
  1950 +
  1951 + asm volatile ("emms");
  1952 +
  1953 + SSE_OP2(punpcklqdq);
  1954 + SSE_OP2(punpckhqdq);
  1955 + SSE_OP2(andps);
  1956 + SSE_OP2(andpd);
  1957 + SSE_OP2(andnps);
  1958 + SSE_OP2(andnpd);
  1959 + SSE_OP2(orps);
  1960 + SSE_OP2(orpd);
  1961 + SSE_OP2(xorps);
  1962 + SSE_OP2(xorpd);
  1963 +
  1964 + SSE_OP2(unpcklps);
  1965 + SSE_OP2(unpcklpd);
  1966 + SSE_OP2(unpckhps);
  1967 + SSE_OP2(unpckhpd);
  1968 +
  1969 + PSHUF_OP(shufps, 0x78);
  1970 + PSHUF_OP(shufpd, 0x02);
  1971 +
  1972 + PSHUF_OP(pshufd, 0x78);
  1973 + PSHUF_OP(pshuflw, 0x78);
  1974 + PSHUF_OP(pshufhw, 0x78);
  1975 +
  1976 + SHIFT_OP(psrlw, 7);
  1977 + SHIFT_OP(psrlw, 16);
  1978 + SHIFT_OP(psraw, 7);
  1979 + SHIFT_OP(psraw, 16);
  1980 + SHIFT_OP(psllw, 7);
  1981 + SHIFT_OP(psllw, 16);
  1982 +
  1983 + SHIFT_OP(psrld, 7);
  1984 + SHIFT_OP(psrld, 32);
  1985 + SHIFT_OP(psrad, 7);
  1986 + SHIFT_OP(psrad, 32);
  1987 + SHIFT_OP(pslld, 7);
  1988 + SHIFT_OP(pslld, 32);
  1989 +
  1990 + SHIFT_OP(psrlq, 7);
  1991 + SHIFT_OP(psrlq, 32);
  1992 + SHIFT_OP(psllq, 7);
  1993 + SHIFT_OP(psllq, 32);
  1994 +
  1995 + SHIFT_IM(psrldq, 16);
  1996 + SHIFT_IM(psrldq, 7);
  1997 + SHIFT_IM(pslldq, 16);
  1998 + SHIFT_IM(pslldq, 7);
  1999 +
  2000 + MOVMSK(movmskps);
  2001 + MOVMSK(movmskpd);
  2002 +
  2003 + /* FPU specific ops */
  2004 +
  2005 + {
  2006 + uint32_t mxcsr;
  2007 + asm volatile("stmxcsr %0" : "=m" (mxcsr));
  2008 + printf("mxcsr=%08x\n", mxcsr & 0x1f80);
  2009 + asm volatile("ldmxcsr %0" : : "m" (mxcsr));
  2010 + }
  2011 +
  2012 + test_sse_comi(2, -1);
  2013 + test_sse_comi(2, 2);
  2014 + test_sse_comi(2, 3);
  2015 +
  2016 + a.s[0] = 2.7;
  2017 + a.s[1] = 3.4;
  2018 + a.s[2] = 4;
  2019 + a.s[3] = -6.3;
  2020 + b.s[0] = 45.7;
  2021 + b.s[1] = 353.4;
  2022 + b.s[2] = 4;
  2023 + b.s[3] = 56.3;
  2024 + SSE_OPS(add);
  2025 + SSE_OPS(mul);
  2026 + SSE_OPS(sub);
  2027 + SSE_OPS(min);
  2028 + SSE_OPS(div);
  2029 + SSE_OPS(max);
  2030 + SSE_OPS(sqrt);
  2031 + SSE_OPS(cmpeq);
  2032 + SSE_OPS(cmplt);
  2033 + SSE_OPS(cmple);
  2034 + SSE_OPS(cmpunord);
  2035 + SSE_OPS(cmpneq);
  2036 + SSE_OPS(cmpnlt);
  2037 + SSE_OPS(cmpnle);
  2038 + SSE_OPS(cmpord);
  2039 +
  2040 + a.d[0] = 2.7;
  2041 + a.d[1] = -3.4;
  2042 + b.d[0] = 45.7;
  2043 + b.d[1] = -53.4;
  2044 + SSE_OPD(add);
  2045 + SSE_OPD(mul);
  2046 + SSE_OPD(sub);
  2047 + SSE_OPD(min);
  2048 + SSE_OPD(div);
  2049 + SSE_OPD(max);
  2050 + SSE_OPD(sqrt);
  2051 + SSE_OPD(cmpeq);
  2052 + SSE_OPD(cmplt);
  2053 + SSE_OPD(cmple);
  2054 + SSE_OPD(cmpunord);
  2055 + SSE_OPD(cmpneq);
  2056 + SSE_OPD(cmpnlt);
  2057 + SSE_OPD(cmpnle);
  2058 + SSE_OPD(cmpord);
  2059 +
  2060 + /* float to float/int */
  2061 + a.s[0] = 2.7;
  2062 + a.s[1] = 3.4;
  2063 + a.s[2] = 4;
  2064 + a.s[3] = -6.3;
  2065 + CVT_OP_XMM(cvtps2pd);
  2066 + CVT_OP_XMM(cvtss2sd);
  2067 + CVT_OP_XMM2MMX(cvtps2pi);
  2068 + CVT_OP_XMM2MMX(cvttps2pi);
  2069 + CVT_OP_XMM2REG(cvtss2si);
  2070 + CVT_OP_XMM2REG(cvttss2si);
  2071 + CVT_OP_XMM(cvtps2dq);
  2072 + CVT_OP_XMM(cvttps2dq);
  2073 +
  2074 + a.d[0] = 2.6;
  2075 + a.d[1] = -3.4;
  2076 + CVT_OP_XMM(cvtpd2ps);
  2077 + CVT_OP_XMM(cvtsd2ss);
  2078 + CVT_OP_XMM2MMX(cvtpd2pi);
  2079 + CVT_OP_XMM2MMX(cvttpd2pi);
  2080 + CVT_OP_XMM2REG(cvtsd2si);
  2081 + CVT_OP_XMM2REG(cvttsd2si);
  2082 + CVT_OP_XMM(cvtpd2dq);
  2083 + CVT_OP_XMM(cvttpd2dq);
  2084 +
  2085 + /* int to float */
  2086 + a.l[0] = -6;
  2087 + a.l[1] = 2;
  2088 + a.l[2] = 100;
  2089 + a.l[3] = -60000;
  2090 + CVT_OP_MMX2XMM(cvtpi2ps);
  2091 + CVT_OP_MMX2XMM(cvtpi2pd);
  2092 + CVT_OP_REG2XMM(cvtsi2ss);
  2093 + CVT_OP_REG2XMM(cvtsi2sd);
  2094 + CVT_OP_XMM(cvtdq2ps);
  2095 + CVT_OP_XMM(cvtdq2pd);
  2096 +
  2097 + /* XXX: test PNI insns */
  2098 +#if 0
  2099 + SSE_OP2(movshdup);
  2100 +#endif
  2101 +}
  2102 +
  2103 +#endif
  2104 +
1677 2105 static void *call_end __init_call = NULL;
1678 2106  
1679 2107 int main(int argc, char **argv)
... ... @@ -1704,5 +2132,8 @@ int main(int argc, char **argv)
1704 2132 test_self_modifying_code();
1705 2133 test_single_step();
1706 2134 test_enter();
  2135 +#ifdef TEST_SSE
  2136 + test_sse();
  2137 +#endif
1707 2138 return 0;
1708 2139 }
... ...