/* { dg-do run } */
|
/* { dg-do run } */
|
/* { dg-options "-O2 -msse2" } */
|
/* { dg-options "-O2 -msse2" } */
|
/* { dg-require-effective-target sse2 } */
|
/* { dg-require-effective-target sse2 } */
|
|
|
#include "sse2-check.h"
|
#include "sse2-check.h"
|
|
|
#include <emmintrin.h>
|
#include <emmintrin.h>
|
#include <string.h>
|
#include <string.h>
|
|
|
#define SHIFT (4)
|
#define SHIFT (4)
|
|
|
typedef union {
|
typedef union {
|
__m128i v;
|
__m128i v;
|
unsigned int s[4];
|
unsigned int s[4];
|
unsigned short int t[8];
|
unsigned short int t[8];
|
unsigned long long u[2];
|
unsigned long long u[2];
|
unsigned char c[16];
|
unsigned char c[16];
|
}vecInLong;
|
}vecInLong;
|
|
|
void sse2_tests (void) __attribute__((noinline));
|
void sse2_tests (void) __attribute__((noinline));
|
void dump128_16 (char *, char *, vecInLong);
|
void dump128_16 (char *, char *, vecInLong);
|
void dump128_32 (char *, char *, vecInLong);
|
void dump128_32 (char *, char *, vecInLong);
|
void dump128_64 (char *, char *, vecInLong);
|
void dump128_64 (char *, char *, vecInLong);
|
void dump128_128 (char *, char *, vecInLong);
|
void dump128_128 (char *, char *, vecInLong);
|
int check (const char *, const char *[]);
|
int check (const char *, const char *[]);
|
|
|
char buf[8000];
|
char buf[8000];
|
char comparison[8000];
|
char comparison[8000];
|
static int errors = 0;
|
static int errors = 0;
|
|
|
vecInLong a128, b128, c128, d128, e128, f128;
|
vecInLong a128, b128, c128, d128, e128, f128;
|
__m128i m128_16, m128_32, s128, m128_64, m128_128;
|
__m128i m128_16, m128_32, s128, m128_64, m128_128;
|
__m64 m64_16, s64, m64_32, m64_64;
|
__m64 m64_16, s64, m64_32, m64_64;
|
|
|
const char *reference_sse2[] = {
|
const char *reference_sse2[] = {
|
"_mm_srai_epi16 0012 0012 0012 0012 0012 0012 0012 0012 \n",
|
"_mm_srai_epi16 0012 0012 0012 0012 0012 0012 0012 0012 \n",
|
"_mm_sra_epi16 0012 0012 0012 0012 0012 0012 0012 0012 \n",
|
"_mm_sra_epi16 0012 0012 0012 0012 0012 0012 0012 0012 \n",
|
"_mm_srai_epi32 00123456 00123456 00123456 00123456 \n",
|
"_mm_srai_epi32 00123456 00123456 00123456 00123456 \n",
|
"_mm_sra_epi32 00123456 00123456 00123456 00123456 \n",
|
"_mm_sra_epi32 00123456 00123456 00123456 00123456 \n",
|
"_mm_srli_epi16 0012 0012 0012 0012 0012 0012 0012 0012 \n",
|
"_mm_srli_epi16 0012 0012 0012 0012 0012 0012 0012 0012 \n",
|
"_mm_srl_epi16 0012 0012 0012 0012 0012 0012 0012 0012 \n",
|
"_mm_srl_epi16 0012 0012 0012 0012 0012 0012 0012 0012 \n",
|
"_mm_srli_epi32 00123456 00123456 00123456 00123456 \n",
|
"_mm_srli_epi32 00123456 00123456 00123456 00123456 \n",
|
"_mm_srl_epi32 00123456 00123456 00123456 00123456 \n",
|
"_mm_srl_epi32 00123456 00123456 00123456 00123456 \n",
|
"_mm_srli_epi64 00123456789abcde 00123456789abcde \n",
|
"_mm_srli_epi64 00123456789abcde 00123456789abcde \n",
|
"_mm_srl_epi64 00123456789abcde 00123456789abcde \n",
|
"_mm_srl_epi64 00123456789abcde 00123456789abcde \n",
|
"_mm_srli_si128 (byte shift) 00000000ffeeddccbbaa998877665544\n",
|
"_mm_srli_si128 (byte shift) 00000000ffeeddccbbaa998877665544\n",
|
"_mm_slli_epi16 1230 1230 1230 1230 1230 1230 1230 1230 \n",
|
"_mm_slli_epi16 1230 1230 1230 1230 1230 1230 1230 1230 \n",
|
"_mm_sll_epi16 1230 1230 1230 1230 1230 1230 1230 1230 \n",
|
"_mm_sll_epi16 1230 1230 1230 1230 1230 1230 1230 1230 \n",
|
"_mm_slli_epi32 12345670 12345670 12345670 12345670 \n",
|
"_mm_slli_epi32 12345670 12345670 12345670 12345670 \n",
|
"_mm_sll_epi32 12345670 12345670 12345670 12345670 \n",
|
"_mm_sll_epi32 12345670 12345670 12345670 12345670 \n",
|
"_mm_slli_epi64 123456789abcdef0 123456789abcdef0 \n",
|
"_mm_slli_epi64 123456789abcdef0 123456789abcdef0 \n",
|
"_mm_sll_epi64 123456789abcdef0 123456789abcdef0 \n",
|
"_mm_sll_epi64 123456789abcdef0 123456789abcdef0 \n",
|
"_mm_sll_si128 (byte shift) bbaa9988776655443322110000000000\n",
|
"_mm_sll_si128 (byte shift) bbaa9988776655443322110000000000\n",
|
"_mm_shuffle_epi32 ffeeddcc bbaa9988 77665544 33221100 \n",
|
"_mm_shuffle_epi32 ffeeddcc bbaa9988 77665544 33221100 \n",
|
"_mm_shuffelo_epi16 7766 5544 3322 1100 9988 bbaa ddcc ffee \n",
|
"_mm_shuffelo_epi16 7766 5544 3322 1100 9988 bbaa ddcc ffee \n",
|
"_mm_shuffehi_epi16 1100 3322 5544 7766 ffee ddcc bbaa 9988 \n",
|
"_mm_shuffehi_epi16 1100 3322 5544 7766 ffee ddcc bbaa 9988 \n",
|
""
|
""
|
};
|
};
|
|
|
static void
|
static void
|
sse2_test (void)
|
sse2_test (void)
|
{
|
{
|
a128.s[0] = 0x01234567;
|
a128.s[0] = 0x01234567;
|
a128.s[1] = 0x01234567;
|
a128.s[1] = 0x01234567;
|
a128.s[2] = 0x01234567;
|
a128.s[2] = 0x01234567;
|
a128.s[3] = 0x01234567;
|
a128.s[3] = 0x01234567;
|
|
|
m128_32 = a128.v;
|
m128_32 = a128.v;
|
|
|
d128.u[0] = 0x0123456789abcdefULL;
|
d128.u[0] = 0x0123456789abcdefULL;
|
d128.u[1] = 0x0123456789abcdefULL;
|
d128.u[1] = 0x0123456789abcdefULL;
|
|
|
m128_64 = d128.v;
|
m128_64 = d128.v;
|
|
|
/* This is the 128-bit constant 0x00112233445566778899aabbccddeeff,
|
/* This is the 128-bit constant 0x00112233445566778899aabbccddeeff,
|
expressed as two little-endian 64-bit words. */
|
expressed as two little-endian 64-bit words. */
|
e128.u[0] = 0x7766554433221100ULL;
|
e128.u[0] = 0x7766554433221100ULL;
|
e128.u[1] = 0xffeeddccbbaa9988ULL;
|
e128.u[1] = 0xffeeddccbbaa9988ULL;
|
|
|
f128.t[0] = 0x0123;
|
f128.t[0] = 0x0123;
|
f128.t[1] = 0x0123;
|
f128.t[1] = 0x0123;
|
f128.t[2] = 0x0123;
|
f128.t[2] = 0x0123;
|
f128.t[3] = 0x0123;
|
f128.t[3] = 0x0123;
|
f128.t[4] = 0x0123;
|
f128.t[4] = 0x0123;
|
f128.t[5] = 0x0123;
|
f128.t[5] = 0x0123;
|
f128.t[6] = 0x0123;
|
f128.t[6] = 0x0123;
|
f128.t[7] = 0x0123;
|
f128.t[7] = 0x0123;
|
|
|
m128_16 = f128.v;
|
m128_16 = f128.v;
|
|
|
m128_128 = e128.v;
|
m128_128 = e128.v;
|
|
|
b128.s[0] = SHIFT;
|
b128.s[0] = SHIFT;
|
b128.s[1] = 0;
|
b128.s[1] = 0;
|
b128.s[2] = 0;
|
b128.s[2] = 0;
|
b128.s[3] = 0;
|
b128.s[3] = 0;
|
|
|
s128 = b128.v;
|
s128 = b128.v;
|
|
|
sse2_tests();
|
sse2_tests();
|
check (buf, reference_sse2);
|
check (buf, reference_sse2);
|
#ifdef DEBUG
|
#ifdef DEBUG
|
printf ("sse2 testing:\n");
|
printf ("sse2 testing:\n");
|
printf (buf);
|
printf (buf);
|
printf ("\ncomparison:\n");
|
printf ("\ncomparison:\n");
|
printf (comparison);
|
printf (comparison);
|
#endif
|
#endif
|
buf[0] = '\0';
|
buf[0] = '\0';
|
|
|
if (errors != 0)
|
if (errors != 0)
|
abort ();
|
abort ();
|
}
|
}
|
|
|
void __attribute__((noinline))
|
void __attribute__((noinline))
|
sse2_tests (void)
|
sse2_tests (void)
|
{
|
{
|
/* psraw */
|
/* psraw */
|
c128.v = _mm_srai_epi16 (m128_16, SHIFT);
|
c128.v = _mm_srai_epi16 (m128_16, SHIFT);
|
dump128_16 (buf, "_mm_srai_epi16", c128);
|
dump128_16 (buf, "_mm_srai_epi16", c128);
|
c128.v = _mm_sra_epi16 (m128_16, s128);
|
c128.v = _mm_sra_epi16 (m128_16, s128);
|
dump128_16 (buf, "_mm_sra_epi16", c128);
|
dump128_16 (buf, "_mm_sra_epi16", c128);
|
|
|
/* psrad */
|
/* psrad */
|
c128.v = _mm_srai_epi32 (m128_32, SHIFT);
|
c128.v = _mm_srai_epi32 (m128_32, SHIFT);
|
dump128_32 (buf, "_mm_srai_epi32", c128);
|
dump128_32 (buf, "_mm_srai_epi32", c128);
|
c128.v = _mm_sra_epi32 (m128_32, s128);
|
c128.v = _mm_sra_epi32 (m128_32, s128);
|
dump128_32 (buf, "_mm_sra_epi32", c128);
|
dump128_32 (buf, "_mm_sra_epi32", c128);
|
|
|
/* psrlw */
|
/* psrlw */
|
c128.v = _mm_srli_epi16 (m128_16, SHIFT);
|
c128.v = _mm_srli_epi16 (m128_16, SHIFT);
|
dump128_16 (buf, "_mm_srli_epi16", c128);
|
dump128_16 (buf, "_mm_srli_epi16", c128);
|
c128.v = _mm_srl_epi16 (m128_16, s128);
|
c128.v = _mm_srl_epi16 (m128_16, s128);
|
dump128_16 (buf, "_mm_srl_epi16", c128);
|
dump128_16 (buf, "_mm_srl_epi16", c128);
|
|
|
/* psrld */
|
/* psrld */
|
c128.v = _mm_srli_epi32 (m128_32, SHIFT);
|
c128.v = _mm_srli_epi32 (m128_32, SHIFT);
|
dump128_32 (buf, "_mm_srli_epi32", c128);
|
dump128_32 (buf, "_mm_srli_epi32", c128);
|
c128.v = _mm_srl_epi32 (m128_32, s128);
|
c128.v = _mm_srl_epi32 (m128_32, s128);
|
dump128_32 (buf, "_mm_srl_epi32", c128);
|
dump128_32 (buf, "_mm_srl_epi32", c128);
|
|
|
/* psrlq */
|
/* psrlq */
|
c128.v = _mm_srli_epi64 (m128_64, SHIFT);
|
c128.v = _mm_srli_epi64 (m128_64, SHIFT);
|
dump128_64 (buf, "_mm_srli_epi64", c128);
|
dump128_64 (buf, "_mm_srli_epi64", c128);
|
c128.v = _mm_srl_epi64 (m128_64, s128);
|
c128.v = _mm_srl_epi64 (m128_64, s128);
|
dump128_64 (buf, "_mm_srl_epi64", c128);
|
dump128_64 (buf, "_mm_srl_epi64", c128);
|
|
|
/* psrldq */
|
/* psrldq */
|
c128.v = _mm_srli_si128 (m128_128, SHIFT);
|
c128.v = _mm_srli_si128 (m128_128, SHIFT);
|
dump128_128 (buf, "_mm_srli_si128 (byte shift) ", c128);
|
dump128_128 (buf, "_mm_srli_si128 (byte shift) ", c128);
|
|
|
/* psllw */
|
/* psllw */
|
c128.v = _mm_slli_epi16 (m128_16, SHIFT);
|
c128.v = _mm_slli_epi16 (m128_16, SHIFT);
|
dump128_16 (buf, "_mm_slli_epi16", c128);
|
dump128_16 (buf, "_mm_slli_epi16", c128);
|
c128.v = _mm_sll_epi16 (m128_16, s128);
|
c128.v = _mm_sll_epi16 (m128_16, s128);
|
dump128_16 (buf, "_mm_sll_epi16", c128);
|
dump128_16 (buf, "_mm_sll_epi16", c128);
|
|
|
/* pslld */
|
/* pslld */
|
c128.v = _mm_slli_epi32 (m128_32, SHIFT);
|
c128.v = _mm_slli_epi32 (m128_32, SHIFT);
|
dump128_32 (buf, "_mm_slli_epi32", c128);
|
dump128_32 (buf, "_mm_slli_epi32", c128);
|
c128.v = _mm_sll_epi32 (m128_32, s128);
|
c128.v = _mm_sll_epi32 (m128_32, s128);
|
dump128_32 (buf, "_mm_sll_epi32", c128);
|
dump128_32 (buf, "_mm_sll_epi32", c128);
|
|
|
/* psllq */
|
/* psllq */
|
c128.v = _mm_slli_epi64 (m128_64, SHIFT);
|
c128.v = _mm_slli_epi64 (m128_64, SHIFT);
|
dump128_64 (buf, "_mm_slli_epi64", c128);
|
dump128_64 (buf, "_mm_slli_epi64", c128);
|
c128.v = _mm_sll_epi64 (m128_64, s128);
|
c128.v = _mm_sll_epi64 (m128_64, s128);
|
dump128_64 (buf, "_mm_sll_epi64", c128);
|
dump128_64 (buf, "_mm_sll_epi64", c128);
|
|
|
/* pslldq */
|
/* pslldq */
|
c128.v = _mm_slli_si128 (m128_128, SHIFT);
|
c128.v = _mm_slli_si128 (m128_128, SHIFT);
|
dump128_128 (buf, "_mm_sll_si128 (byte shift)", c128);
|
dump128_128 (buf, "_mm_sll_si128 (byte shift)", c128);
|
|
|
/* Shuffle constant 0x1b == 0b_00_01_10_11, e.g. swap words: ABCD => DCBA. */
|
/* Shuffle constant 0x1b == 0b_00_01_10_11, e.g. swap words: ABCD => DCBA. */
|
|
|
/* pshufd */
|
/* pshufd */
|
c128.v = _mm_shuffle_epi32 (m128_128, 0x1b);
|
c128.v = _mm_shuffle_epi32 (m128_128, 0x1b);
|
dump128_32 (buf, "_mm_shuffle_epi32", c128);
|
dump128_32 (buf, "_mm_shuffle_epi32", c128);
|
|
|
/* pshuflw */
|
/* pshuflw */
|
c128.v = _mm_shufflelo_epi16 (m128_128, 0x1b);
|
c128.v = _mm_shufflelo_epi16 (m128_128, 0x1b);
|
dump128_16 (buf, "_mm_shuffelo_epi16", c128);
|
dump128_16 (buf, "_mm_shuffelo_epi16", c128);
|
|
|
/* pshufhw */
|
/* pshufhw */
|
c128.v = _mm_shufflehi_epi16 (m128_128, 0x1b);
|
c128.v = _mm_shufflehi_epi16 (m128_128, 0x1b);
|
dump128_16 (buf, "_mm_shuffehi_epi16", c128);
|
dump128_16 (buf, "_mm_shuffehi_epi16", c128);
|
}
|
}
|
|
|
void
|
void
|
dump128_16 (char *buf, char *name, vecInLong x)
|
dump128_16 (char *buf, char *name, vecInLong x)
|
{
|
{
|
int i;
|
int i;
|
char *p = buf + strlen (buf);
|
char *p = buf + strlen (buf);
|
|
|
sprintf (p, "%s ", name);
|
sprintf (p, "%s ", name);
|
p += strlen (p);
|
p += strlen (p);
|
|
|
for (i=0; i<8; i++)
|
for (i=0; i<8; i++)
|
{
|
{
|
sprintf (p, "%4.4x ", x.t[i]);
|
sprintf (p, "%4.4x ", x.t[i]);
|
p += strlen (p);
|
p += strlen (p);
|
}
|
}
|
strcat (p, "\n");
|
strcat (p, "\n");
|
}
|
}
|
|
|
void
|
void
|
dump128_32 (char *buf, char *name, vecInLong x)
|
dump128_32 (char *buf, char *name, vecInLong x)
|
{
|
{
|
int i;
|
int i;
|
char *p = buf + strlen (buf);
|
char *p = buf + strlen (buf);
|
|
|
sprintf (p, "%s ", name);
|
sprintf (p, "%s ", name);
|
p += strlen (p);
|
p += strlen (p);
|
|
|
for (i=0; i<4; i++)
|
for (i=0; i<4; i++)
|
{
|
{
|
sprintf (p, "%8.8x ", x.s[i]);
|
sprintf (p, "%8.8x ", x.s[i]);
|
p += strlen (p);
|
p += strlen (p);
|
}
|
}
|
strcat (p, "\n");
|
strcat (p, "\n");
|
}
|
}
|
|
|
void
|
void
|
dump128_64 (char *buf, char *name, vecInLong x)
|
dump128_64 (char *buf, char *name, vecInLong x)
|
{
|
{
|
int i;
|
int i;
|
char *p = buf + strlen (buf);
|
char *p = buf + strlen (buf);
|
|
|
sprintf (p, "%s ", name);
|
sprintf (p, "%s ", name);
|
p += strlen (p);
|
p += strlen (p);
|
|
|
for (i=0; i<2; i++)
|
for (i=0; i<2; i++)
|
{
|
{
|
#if defined(_WIN32) && !defined(__CYGWIN__)
|
#if defined(_WIN32) && !defined(__CYGWIN__)
|
sprintf (p, "%16.16I64x ", x.u[i]);
|
sprintf (p, "%16.16I64x ", x.u[i]);
|
#else
|
#else
|
sprintf (p, "%16.16llx ", x.u[i]);
|
sprintf (p, "%16.16llx ", x.u[i]);
|
#endif
|
#endif
|
p += strlen (p);
|
p += strlen (p);
|
}
|
}
|
strcat (p, "\n");
|
strcat (p, "\n");
|
}
|
}
|
|
|
void
|
void
|
dump128_128 (char *buf, char *name, vecInLong x)
|
dump128_128 (char *buf, char *name, vecInLong x)
|
{
|
{
|
int i;
|
int i;
|
char *p = buf + strlen (buf);
|
char *p = buf + strlen (buf);
|
|
|
sprintf (p, "%s ", name);
|
sprintf (p, "%s ", name);
|
p += strlen (p);
|
p += strlen (p);
|
|
|
for (i=15; i>=0; i--)
|
for (i=15; i>=0; i--)
|
{
|
{
|
/* This is cheating; we don't have a 128-bit int format code.
|
/* This is cheating; we don't have a 128-bit int format code.
|
Running the loop backwards to compensate for the
|
Running the loop backwards to compensate for the
|
little-endian layout. */
|
little-endian layout. */
|
sprintf (p, "%2.2x", x.c[i]);
|
sprintf (p, "%2.2x", x.c[i]);
|
p += strlen (p);
|
p += strlen (p);
|
}
|
}
|
strcat (p, "\n");
|
strcat (p, "\n");
|
}
|
}
|
|
|
int
|
int
|
check (const char *input, const char *reference[])
|
check (const char *input, const char *reference[])
|
{
|
{
|
int broken, i, j, len;
|
int broken, i, j, len;
|
const char *p_input;
|
const char *p_input;
|
char *p_comparison;
|
char *p_comparison;
|
int new_errors = 0;
|
int new_errors = 0;
|
|
|
p_comparison = &comparison[0];
|
p_comparison = &comparison[0];
|
p_input = input;
|
p_input = input;
|
|
|
for (i = 0; *reference[i] != '\0'; i++)
|
for (i = 0; *reference[i] != '\0'; i++)
|
{
|
{
|
broken = 0;
|
broken = 0;
|
len = strlen (reference[i]);
|
len = strlen (reference[i]);
|
for (j = 0; j < len; j++)
|
for (j = 0; j < len; j++)
|
{
|
{
|
/* Ignore the terminating NUL characters at the end of every string in 'reference[]'. */
|
/* Ignore the terminating NUL characters at the end of every string in 'reference[]'. */
|
if (!broken && *p_input != reference[i][j])
|
if (!broken && *p_input != reference[i][j])
|
{
|
{
|
*p_comparison = '\0';
|
*p_comparison = '\0';
|
strcat (p_comparison, " >>> ");
|
strcat (p_comparison, " >>> ");
|
p_comparison += strlen (p_comparison);
|
p_comparison += strlen (p_comparison);
|
new_errors++;
|
new_errors++;
|
broken = 1;
|
broken = 1;
|
}
|
}
|
*p_comparison = *p_input;
|
*p_comparison = *p_input;
|
p_comparison++;
|
p_comparison++;
|
p_input++;
|
p_input++;
|
}
|
}
|
if (broken)
|
if (broken)
|
{
|
{
|
*p_comparison = '\0';
|
*p_comparison = '\0';
|
strcat (p_comparison, "expected:\n");
|
strcat (p_comparison, "expected:\n");
|
strcat (p_comparison, reference[i]);
|
strcat (p_comparison, reference[i]);
|
p_comparison += strlen (p_comparison);
|
p_comparison += strlen (p_comparison);
|
}
|
}
|
}
|
}
|
*p_comparison = '\0';
|
*p_comparison = '\0';
|
strcat (p_comparison, new_errors ? "failure\n\n" : "O.K.\n\n") ;
|
strcat (p_comparison, new_errors ? "failure\n\n" : "O.K.\n\n") ;
|
errors += new_errors;
|
errors += new_errors;
|
return 0;
|
return 0;
|
}
|
}
|
|
|