OpenCores
URL https://opencores.org/ocsvn/openrisc_me/openrisc_me/trunk

Subversion Repositories openrisc_me

[/] [openrisc/] [trunk/] [gnu-src/] [gcc-4.5.1/] [gcc/] [testsuite/] [gcc.target/] [mips/] [mips-3d-9.c] - Blame information for rev 321

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 321 jeremybenn
/* { dg-do run } */
2
/* { dg-options "-O2 -mips3d" } */
3
 
4
/* Matrix Multiplications */
5
#include <stdlib.h>
6
#include <stdio.h>
7
 
8
typedef float v2sf __attribute__((vector_size(8)));
9
 
10
float a[4] = {1.1, 2.2, 3.3, 4.4};
11
float b[4][4] = {{1, 2, 3, 4},
12
                 {5, 6, 7, 8},
13
                 {9, 10, 11, 12},
14
                 {13, 14, 15, 16}};
15
 
16
float c[4]; /* Result for matrix_multiply1() */
17
float d[4]; /* Result for matrix_multiply2() */
18
float e[4]; /* Result for matrix_multiply3() */
19
float f[4]; /* Result for matrix_multiply4() */
20
 
21
void matrix_multiply1();
22
NOMIPS16 void matrix_multiply2();
23
NOMIPS16 void matrix_multiply3();
24
NOMIPS16 void matrix_multiply4();
25
 
26
int main ()
27
{
28
  int i;
29
 
30
  /* Version 1. Use float calculations */
31
  matrix_multiply1();
32
 
33
  /* Version 2. Use paired-single instructions inside the inner loop*/
34
  matrix_multiply2();
35
  for (i = 0; i < 4; i++)
36
    if (d[i] != c[i])
37
      abort();
38
 
39
  /* Version 3. Use paired-single instructions and unroll the inner loop */
40
  matrix_multiply3();
41
  for (i = 0; i < 4; i++)
42
    if (e[i] != c[i])
43
      abort();
44
 
45
  /* Version 4. Use paired-single instructions and unroll all loops */
46
  matrix_multiply4();
47
  for (i = 0; i < 4; i++)
48
    if (f[i] != c[i])
49
      abort();
50
 
51
  printf ("Test Passes\n");
52
  exit (0);
53
}
54
 
55
void matrix_multiply1()
56
{
57
  int i, j;
58
 
59
  for (i = 0; i < 4; i++)
60
   {
61
     c[i] = 0.0;
62
 
63
     for (j = 0; j < 4; j ++)
64
       c[i] += a[j] * b[j][i];
65
   }
66
}
67
 
68
NOMIPS16 void matrix_multiply2()
69
{
70
  int i, j;
71
  v2sf m1, m2;
72
  v2sf result, temp;
73
 
74
  for (i = 0; i < 4; i++)
75
   {
76
     result = (v2sf) {0.0, 0.0};
77
 
78
     for (j = 0; j < 4; j+=2)
79
     {
80
       /* Load two float values into m1 */
81
       m1 = (v2sf) {a[j], a[j+1]};
82
       m2 = (v2sf) {b[j][i], b[j+1][i]};
83
 
84
       /* Multiply and add */
85
       result += m1 * m2;
86
     }
87
 
88
     /* Reduction add at the end */
89
     temp = __builtin_mips_addr_ps (result, result);
90
     d[i] = __builtin_mips_cvt_s_pl (temp);
91
   }
92
}
93
 
94
NOMIPS16 void matrix_multiply3()
95
{
96
  int i;
97
  v2sf m1, m2, n1, n2;
98
  v2sf result, temp;
99
 
100
  m1 = (v2sf) {a[0], a[1]};
101
  m2 = (v2sf) {a[2], a[3]};
102
 
103
  for (i = 0; i < 4; i++)
104
   {
105
     n1 = (v2sf) {b[0][i], b[1][i]};
106
     n2 = (v2sf) {b[2][i], b[3][i]};
107
 
108
     /* Multiply and add */
109
     result = m1 * n1 + m2 * n2;
110
 
111
     /* Reduction add at the end */
112
     temp = __builtin_mips_addr_ps (result, result);
113
     e[i] = __builtin_mips_cvt_s_pl (temp);
114
   }
115
}
116
 
117
NOMIPS16 void matrix_multiply4()
118
{
119
  v2sf m1, m2;
120
  v2sf n1, n2, n3, n4, n5, n6, n7, n8;
121
  v2sf temp1, temp2, temp3, temp4;
122
  v2sf result1, result2;
123
 
124
  /* Load a[0] a[1] values into m1
125
     Load a[2] a[3] values into m2 */
126
  m1 = (v2sf) {a[0], a[1]};
127
  m2 = (v2sf) {a[2], a[3]};
128
 
129
  /* Load b[0][0] b[1][0] values into n1
130
     Load b[2][0] b[3][0] values into n2
131
     Load b[0][1] b[1][1] values into n3
132
     Load b[2][1] b[3][1] values into n4
133
     Load b[0][2] b[1][2] values into n5
134
     Load b[2][2] b[3][2] values into n6
135
     Load b[0][3] b[1][3] values into n7
136
     Load b[2][3] b[3][3] values into n8 */
137
  n1 = (v2sf) {b[0][0], b[1][0]};
138
  n2 = (v2sf) {b[2][0], b[3][0]};
139
  n3 = (v2sf) {b[0][1], b[1][1]};
140
  n4 = (v2sf) {b[2][1], b[3][1]};
141
  n5 = (v2sf) {b[0][2], b[1][2]};
142
  n6 = (v2sf) {b[2][2], b[3][2]};
143
  n7 = (v2sf) {b[0][3], b[1][3]};
144
  n8 = (v2sf) {b[2][3], b[3][3]};
145
 
146
  temp1 = m1 * n1 + m2 * n2;
147
  temp2 = m1 * n3 + m2 * n4;
148
  temp3 = m1 * n5 + m2 * n6;
149
  temp4 = m1 * n7 + m2 * n8;
150
 
151
  result1 = __builtin_mips_addr_ps (temp1, temp2);
152
  result2 = __builtin_mips_addr_ps (temp3, temp4);
153
 
154
  f[0] = __builtin_mips_cvt_s_pu (result1);
155
  f[1] = __builtin_mips_cvt_s_pl (result1);
156
  f[2] = __builtin_mips_cvt_s_pu (result2);
157
  f[3] = __builtin_mips_cvt_s_pl (result2);
158
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.