OpenCores
URL https://opencores.org/ocsvn/openrisc_me/openrisc_me/trunk

Subversion Repositories openrisc_me

[/] [openrisc/] [trunk/] [gnu-src/] [gcc-4.5.1/] [gcc/] [testsuite/] [gcc.target/] [i386/] [pr23570.c] - Blame information for rev 328

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 318 jeremybenn
/* { dg-do compile } */
2
/* { dg-options "-O2 -msse2" } */
3
/* { dg-require-effective-target sse2 } */
4
 
5
typedef float __v4sf __attribute__ ((__vector_size__ (16)));
6
typedef float __m128 __attribute__ ((__vector_size__ (16)));
7
typedef long long __v2di __attribute__ ((__vector_size__ (16)));
8
 
9
static __inline __m128
10
_mm_cmpeq_ps (__m128 __A, __m128 __B)
11
{
12
  return (__m128) __builtin_ia32_cmpeqps ((__v4sf)__A, (__v4sf)__B);
13
}
14
 
15
static __inline __m128
16
_mm_setr_ps (float __Z, float __Y, float __X, float __W)
17
{
18
  return __extension__ (__m128)(__v4sf){__Z, __Y, __X, __W };
19
}
20
 
21
static __inline __m128
22
_mm_and_si128 (__m128 __A, __m128 __B)
23
{
24
  return (__m128)__builtin_ia32_pand128 ((__v2di)__A, (__v2di)__B);
25
}
26
 
27
static __inline __m128
28
_mm_or_si128 (__m128 __A, __m128 __B)
29
{
30
  return (__m128)__builtin_ia32_por128 ((__v2di)__A, (__v2di)__B);
31
}
32
 
33
typedef union
34
{
35
  __m128 xmmi;
36
  int si[4];
37
}
38
__attribute__ ((aligned (16))) um128;
39
 
40
um128 u;
41
 
42
static inline int
43
sse_max_abs_indexf (float *v, int step, int n)
44
{
45
  __m128 m1, mm;
46
  __m128 mim, mi, msk;
47
  um128 u, ui;
48
  int n4, step2, step3;
49
  mm = __builtin_ia32_andps ((__m128) (__v4sf)
50
                             { 0.0, v[step], v[step2], v[step3] }
51
                             , u.xmmi);
52
  if (n4)
53
    {
54
      int i;
55
      for (i = 0; i < n4; ++i);
56
      msk = (__m128) _mm_cmpeq_ps (m1, mm);
57
      mim = _mm_or_si128 (_mm_and_si128 (msk, mi), mim);
58
    }
59
  ui.xmmi = (__m128) mim;
60
  return ui.si[n];
61
}
62
 
63
static void
64
sse_swap_rowf (float *r1, float *r2, int n)
65
{
66
  int n4 = (n / 4) * 4;
67
  float *r14end = r1 + n4;
68
  while (r1 < r14end)
69
    {
70
      *r1 = *r2;
71
      r1++;
72
    }
73
}
74
 
75
void
76
ludcompf (float *m, int nw, int *prow, int n)
77
{
78
  int i, s = 0;
79
  float *pm;
80
  for (i = 0, pm = m; i < n - 1; ++i, pm += nw)
81
    {
82
      int vi = sse_max_abs_indexf (pm + i, nw, n - i);
83
      float *pt;
84
      int j;
85
      if (vi != 0)
86
        {
87
          sse_swap_rowf (pm, pm + vi * nw, nw);
88
          swap_index (prow, i, i + vi);
89
        }
90
      for (j = i + 1, pt = pm + nw; j < n; ++j, pt += nw)
91
        sse_add_rowf (pt + i + 1, pm + i + 1, -1.0, n - i - 1);
92
    }
93
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.