OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [tags/] [gnu-src/] [gcc-4.5.1/] [gcc-4.5.1-or32-1.0rc1/] [gcc/] [testsuite/] [gcc.target/] [i386/] [reload-1.c] - Blame information for rev 338

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 318 jeremybenn
/* { dg-do compile } */
2
/* { dg-require-effective-target ilp32 } */
3
/* { dg-require-effective-target sse2 } */
4
/* { dg-options "-O3 -msse2 -fdump-rtl-csa" } */
5
/* { dg-skip-if "no stdint" { vxworks_kernel } } */
6
 
7
#include <emmintrin.h>
8
#include <stdint.h>
9
 
10
typedef __SIZE_TYPE__ size_t;
11
typedef float vFloat __attribute__ ((__vector_size__ (16)));
12
typedef double vDouble __attribute__ ((__vector_size__ (16)));
13
typedef struct buf
14
{
15
  void *data;
16
  unsigned long h;
17
  unsigned long  w;
18
  size_t bytes;
19
} buf;
20
 
21
typedef struct job
22
{
23
  struct Job *next;
24
  void * info;
25
  long (*func)(struct Job *job);
26
  long error;
27
} job;
28
 
29
typedef struct fj
30
{
31
    job hd;
32
    buf src;
33
    buf dest;
34
    float g;
35
    unsigned int flags;
36
} fj;
37
 
38
static const double r[256], t[256];
39
 
40
long bar (const buf *src, const buf *dest, float g, unsigned int flags)
41
{
42
  float *d0 = (float*) src->data;
43
  float *d1 = (float*) dest->data;
44
  uintptr_t w = dest->w;
45
  uintptr_t idx;
46
  vFloat p0;
47
  static const vFloat m0;
48
  static const vDouble p[3], m, b;
49
  float *sr = d0;
50
  float *dr = d1;
51
  for( idx = 0; idx + 8 <= w; idx += 8 )
52
  {
53
    vFloat f0 = _mm_loadu_ps (sr);
54
    vFloat f1 = _mm_loadu_ps (sr + 4);
55
    sr += 8;
56
    vFloat fa0 = _mm_andnot_ps (m0, f0);
57
    vFloat fa1 = _mm_andnot_ps (m0, f1);
58
    vDouble v0 = _mm_cvtps_pd (fa0);
59
    vDouble v1 = _mm_cvtps_pd (_mm_movehl_ps (fa0, fa0));
60
    vDouble v2 = _mm_cvtps_pd (fa1);
61
    vDouble v3 = _mm_cvtps_pd (_mm_movehl_ps (fa1, fa1));
62
    vDouble  vi0, vi1, vi2, vi3;
63
    __m128i b0, b1, b2, b3;
64
    b0 = _mm_packs_epi32 (_mm_packs_epi32 (b0, b1), _mm_packs_epi32 (b2, b3));
65
    b1 = _mm_srli_epi64 (b0, 32);
66
    unsigned int i0 = _mm_cvtsi128_si32 (b0);
67
    unsigned int i2 = _mm_cvtsi128_si32 (b1);
68
    v0 -= _mm_loadh_pd (_mm_load_sd (r + (i0 & 0xff)), r + (i0 >> 16));
69
    v1 -= _mm_loadh_pd (_mm_load_sd (r + (i2 & 0xff)), r + (i2 >> 16));
70
    b0 = _mm_unpackhi_epi64 (b0, b0);
71
    b1 = _mm_unpackhi_epi64 (b1, b1);
72
    unsigned int i4 = _mm_cvtsi128_si32 (b0);
73
    unsigned int i6 = _mm_cvtsi128_si32 (b1);
74
    v2 -= _mm_loadh_pd (_mm_load_sd (r + (i4 & 0xff)), r + (i4 >> 16));
75
    v3 -= _mm_loadh_pd (_mm_load_sd (r + (i6 & 0xff)), r + (i6 >> 16));
76
    v0 = p[0] + (p[1] + p[2] * v0) * v0;
77
    v1 = p[0] + (p[1] + p[2] * v1) * v1;
78
    v2 = p[0] + (p[1] + p[2] * v2) * v2;
79
    v3 = p[0] + (p[1] + p[2] * v3) * v3;
80
    vi0 = (vDouble) _mm_slli_epi64 ((__m128i)((vi0 + b) + m), 52);
81
    vi1 = (vDouble) _mm_slli_epi64 ((__m128i)((vi1 + b) + m), 52);
82
    vi2 = (vDouble) _mm_slli_epi64 ((__m128i)((vi2 + b) + m), 52);
83
    vi3 = (vDouble) _mm_slli_epi64 ((__m128i)((vi3 + b) + m), 52);
84
    vi0 *= _mm_loadh_pd (_mm_load_sd (t + (i0 & 0xff)), t + (i0 >> 16));
85
    vi1 *= _mm_loadh_pd (_mm_load_sd (t + (i2 & 0xff)), t + (i2 >> 16));
86
    vi2 *= _mm_loadh_pd (_mm_load_sd (t + (i4 & 0xff)), t + (i4 >> 16));
87
    vi3 *= _mm_loadh_pd (_mm_load_sd (t + (i6 & 0xff)), t + (i6 >> 16));
88
    v0 *= vi0;
89
    v1 *= vi1;
90
    v2 *= vi2;
91
    v3 *= vi3;
92
    vFloat r0 = _mm_movelh_ps (_mm_cvtpd_ps( v0 ), _mm_cvtpd_ps (v1));
93
    vFloat r1 = _mm_movelh_ps (_mm_cvtpd_ps( v2 ), _mm_cvtpd_ps (v3));
94
    vFloat z0 = _mm_cmpeq_ps (f0, _mm_setzero_ps());
95
    vFloat z1 = _mm_cmpeq_ps (f1, _mm_setzero_ps());
96
    r0 = _mm_andnot_ps (z0, r0);
97
    r1 = _mm_andnot_ps (z1, r1);
98
    z0 = _mm_and_ps (z0, p0);
99
    z1 = _mm_and_ps (z1, p0);
100
    r0 = _mm_or_ps (r0, z0);
101
    r1 = _mm_or_ps (r1, z1);
102
    _mm_storeu_ps (dr, r0);
103
    _mm_storeu_ps (dr + 4, r1);
104
    dr += 8;
105
  }
106
  return 0;
107
}
108
 
109
long foo (job *j )
110
{
111
  fj *jd = (fj*) j;
112
  return bar (&jd->src, &jd->dest, jd->g, jd->flags);
113
}
114
 
115
/* { dg-final { scan-rtl-dump-not "deleted 1 dead insns" "csa" } } */
116
/* { dg-final { cleanup-rtl-dump "csa" } } */

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.