/* { dg-do run } */
|
/* { dg-do run } */
|
/* { dg-require-effective-target xop } */
|
/* { dg-require-effective-target xop } */
|
/* { dg-options "-O2 -mxop" } */
|
/* { dg-options "-O2 -mxop" } */
|
|
|
#include "xop-check.h"
|
#include "xop-check.h"
|
|
|
#include <x86intrin.h>
|
#include <x86intrin.h>
|
#include <string.h>
|
#include <string.h>
|
|
|
#define NUM 10
|
#define NUM 10
|
|
|
union
|
union
|
{
|
{
|
__m128i x[NUM];
|
__m128i x[NUM];
|
signed char ssi[NUM * 16];
|
signed char ssi[NUM * 16];
|
short si[NUM * 8];
|
short si[NUM * 8];
|
int li[NUM * 4];
|
int li[NUM * 4];
|
long long lli[NUM * 2];
|
long long lli[NUM * 2];
|
} dst, res, src1;
|
} dst, res, src1;
|
|
|
static void
|
static void
|
init_sbyte ()
|
init_sbyte ()
|
{
|
{
|
int i;
|
int i;
|
for (i=0; i < NUM * 16; i++)
|
for (i=0; i < NUM * 16; i++)
|
src1.ssi[i] = i;
|
src1.ssi[i] = i;
|
}
|
}
|
|
|
static void
|
static void
|
init_sword ()
|
init_sword ()
|
{
|
{
|
int i;
|
int i;
|
for (i=0; i < NUM * 8; i++)
|
for (i=0; i < NUM * 8; i++)
|
src1.si[i] = i;
|
src1.si[i] = i;
|
}
|
}
|
|
|
|
|
static void
|
static void
|
init_sdword ()
|
init_sdword ()
|
{
|
{
|
int i;
|
int i;
|
for (i=0; i < NUM * 4; i++)
|
for (i=0; i < NUM * 4; i++)
|
src1.li[i] = i;
|
src1.li[i] = i;
|
}
|
}
|
|
|
static int
|
static int
|
check_sbyte2word ()
|
check_sbyte2word ()
|
{
|
{
|
int i, j, s, t, check_fails = 0;
|
int i, j, s, t, check_fails = 0;
|
for (i = 0; i < NUM * 16; i = i + 16)
|
for (i = 0; i < NUM * 16; i = i + 16)
|
{
|
{
|
for (j = 0; j < 8; j++)
|
for (j = 0; j < 8; j++)
|
{
|
{
|
t = i + (2 * j);
|
t = i + (2 * j);
|
s = (i / 2) + j;
|
s = (i / 2) + j;
|
res.si[s] = src1.ssi[t] - src1.ssi[t + 1] ;
|
res.si[s] = src1.ssi[t] - src1.ssi[t + 1] ;
|
if (res.si[s] != dst.si[s])
|
if (res.si[s] != dst.si[s])
|
check_fails++;
|
check_fails++;
|
}
|
}
|
}
|
}
|
}
|
}
|
|
|
static int
|
static int
|
check_sword2dword ()
|
check_sword2dword ()
|
{
|
{
|
int i, j, s, t, check_fails = 0;
|
int i, j, s, t, check_fails = 0;
|
for (i = 0; i < (NUM * 8); i = i + 8)
|
for (i = 0; i < (NUM * 8); i = i + 8)
|
{
|
{
|
for (j = 0; j < 4; j++)
|
for (j = 0; j < 4; j++)
|
{
|
{
|
t = i + (2 * j);
|
t = i + (2 * j);
|
s = (i / 2) + j;
|
s = (i / 2) + j;
|
res.li[s] = src1.si[t] - src1.si[t + 1] ;
|
res.li[s] = src1.si[t] - src1.si[t + 1] ;
|
if (res.li[s] != dst.li[s])
|
if (res.li[s] != dst.li[s])
|
check_fails++;
|
check_fails++;
|
}
|
}
|
}
|
}
|
}
|
}
|
|
|
static int
|
static int
|
check_dword2qword ()
|
check_dword2qword ()
|
{
|
{
|
int i, j, s, t, check_fails = 0;
|
int i, j, s, t, check_fails = 0;
|
for (i = 0; i < (NUM * 4); i = i + 4)
|
for (i = 0; i < (NUM * 4); i = i + 4)
|
{
|
{
|
for (j = 0; j < 2; j++)
|
for (j = 0; j < 2; j++)
|
{
|
{
|
t = i + (2 * j);
|
t = i + (2 * j);
|
s = (i / 2) + j;
|
s = (i / 2) + j;
|
res.lli[s] = src1.li[t] - src1.li[t + 1] ;
|
res.lli[s] = src1.li[t] - src1.li[t + 1] ;
|
if (res.lli[s] != dst.lli[s])
|
if (res.lli[s] != dst.lli[s])
|
check_fails++;
|
check_fails++;
|
}
|
}
|
}
|
}
|
}
|
}
|
|
|
static void
|
static void
|
xop_test (void)
|
xop_test (void)
|
{
|
{
|
int i;
|
int i;
|
|
|
/* Check hsubbw */
|
/* Check hsubbw */
|
init_sbyte ();
|
init_sbyte ();
|
|
|
for (i = 0; i < NUM; i++)
|
for (i = 0; i < NUM; i++)
|
dst.x[i] = _mm_hsubw_epi8 (src1.x[i]);
|
dst.x[i] = _mm_hsubw_epi8 (src1.x[i]);
|
|
|
if (check_sbyte2word())
|
if (check_sbyte2word())
|
abort ();
|
abort ();
|
|
|
|
|
/* Check hsubwd */
|
/* Check hsubwd */
|
init_sword ();
|
init_sword ();
|
|
|
for (i = 0; i < (NUM ); i++)
|
for (i = 0; i < (NUM ); i++)
|
dst.x[i] = _mm_hsubd_epi16 (src1.x[i]);
|
dst.x[i] = _mm_hsubd_epi16 (src1.x[i]);
|
|
|
if (check_sword2dword())
|
if (check_sword2dword())
|
abort ();
|
abort ();
|
|
|
/* Check hsubdq */
|
/* Check hsubdq */
|
init_sdword ();
|
init_sdword ();
|
for (i = 0; i < NUM; i++)
|
for (i = 0; i < NUM; i++)
|
dst.x[i] = _mm_hsubq_epi32 (src1.x[i]);
|
dst.x[i] = _mm_hsubq_epi32 (src1.x[i]);
|
|
|
if (check_dword2qword())
|
if (check_dword2qword())
|
abort ();
|
abort ();
|
}
|
}
|
|
|