OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [gcc/] [testsuite/] [gcc.dg/] [vmx/] [dct.c] - Blame information for rev 801

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 689 jeremybenn
/* { dg-do compile } */
2
#include <altivec.h>
3
 
4
inline void
5
transpose_vmx (vector signed short *input, vector signed short *output)
6
{
7
  vector signed short v0, v1, v2, v3, v4, v5, v6, v7;
8
  vector signed short x0, x1, x2, x3, x4, x5, x6, x7;
9
 
10
  /* Matrix transpose */
11
  v0 = vec_mergeh (input[0], input[4]);
12
  v1 = vec_mergel (input[0], input[4]);
13
  v2 = vec_mergeh (input[1], input[5]);
14
  v3 = vec_mergel (input[1], input[5]);
15
  v4 = vec_mergeh (input[2], input[6]);
16
  v5 = vec_mergel (input[2], input[6]);
17
  v6 = vec_mergeh (input[3], input[7]);
18
  v7 = vec_mergel (input[3], input[7]);
19
 
20
  x0 = vec_mergeh (v0, v4);
21
  x1 = vec_mergel (v0, v4);
22
  x2 = vec_mergeh (v1, v5);
23
  x3 = vec_mergel (v1, v5);
24
  x4 = vec_mergeh (v2, v6);
25
  x5 = vec_mergel (v2, v6);
26
  x6 = vec_mergeh (v3, v7);
27
  x7 = vec_mergel (v3, v7);
28
 
29
  output[0] = vec_mergeh (x0, x4);
30
  output[1] = vec_mergel (x0, x4);
31
  output[2] = vec_mergeh (x1, x5);
32
  output[3] = vec_mergel (x1, x5);
33
  output[4] = vec_mergeh (x2, x6);
34
  output[5] = vec_mergel (x2, x6);
35
  output[6] = vec_mergeh (x3, x7);
36
  output[7] = vec_mergel (x3, x7);
37
}
38
 
39
void
40
dct_vmx (vector signed short *input, vector signed short *output,
41
         vector signed short *postscale)
42
{
43
  vector signed short mul0, mul1, mul2, mul3, mul4, mul5, mul6, mul;
44
  vector signed short v0, v1, v2, v3, v4, v5, v6, v7, v8, v9;
45
  vector signed short v20, v21, v22, v23, v24, v25, v26, v27, v31;
46
  int i;
47
  vector signed short in[8], out[8];
48
 
49
  /* Load first eight rows of input data */
50
 
51
  /* Load multiplication constants */
52
 
53
  /* Splat multiplication constants */
54
  mul0 = vec_splat(input[8],0);
55
  mul1 = vec_splat(input[8],1);
56
  mul2 = vec_splat(input[8],2);
57
  mul3 = vec_splat(input[8],3);
58
  mul4 = vec_splat(input[8],4);
59
  mul5 = vec_splat(input[8],5);
60
  mul6 = vec_splat(input[8],6);
61
 
62
  /* Perform DCT on the eight columns */
63
 
64
  /*********** Stage 1 ***********/
65
 
66
  v8 = vec_adds (input[0], input[7]);
67
  v9 = vec_subs (input[0], input[7]);
68
  v0 = vec_adds (input[1], input[6]);
69
  v7 = vec_subs (input[1], input[6]);
70
  v1 = vec_adds (input[2], input[5]);
71
  v6 = vec_subs (input[2], input[5]);
72
  v2 = vec_adds (input[3], input[4]);
73
  v5 = vec_subs (input[3], input[4]);
74
 
75
  /*********** Stage 2 ***********/
76
 
77
  /* Top */
78
  v3 = vec_adds (v8, v2);               /* (V0+V7) + (V3+V4) */
79
  v4 = vec_subs (v8, v2);               /* (V0+V7) - (V3+V4) */
80
  v2 = vec_adds (v0, v1);               /* (V1+V6) + (V2+V5) */
81
  v8 = vec_subs (v0, v1);               /* (V1+V6) - (V2+V5) */
82
 
83
  /* Bottom */
84
  v0 = vec_subs (v7, v6);               /* (V1-V6) - (V2-V5) */
85
  v1 = vec_adds (v7, v6);               /* (V1-V6) + (V2-V5) */
86
 
87
  /*********** Stage 3 ***********/
88
 
89
  /* Top */
90
  in[0] = vec_adds (v3, v2);             /* y0 = v3 + v2 */
91
  in[4] = vec_subs (v3, v2);            /* y4 = v3 - v2 */
92
  in[2] = vec_mradds (v8, mul2, v4);    /* y2 = v8 * a0 + v4 */
93
  v6 = vec_mradds (v4, mul2, mul6);
94
  in[6] = vec_subs (v6, v8);            /* y6 = v4 * a0 - v8 */
95
 
96
  /* Bottom */
97
  v6 = vec_mradds (v0, mul0, v5);       /* v6 = v0 * (c4) + v5 */
98
  v7 = vec_mradds (v0, mul4, v5);       /* v7 = v0 * (-c4) + v5 */
99
  v2 = vec_mradds (v1, mul4, v9);       /* v2 = v1 * (-c4) + v9 */
100
  v3 = vec_mradds (v1, mul0, v9);       /* v3 = v1 * (c4) + v9 */
101
 
102
  /*********** Stage 4 ***********/
103
 
104
  /* Bottom */
105
  in[1] = vec_mradds (v6, mul3, v3);    /* y1 = v6 * (a1) + v3 */
106
  v23 = vec_mradds (v3, mul3, mul6);
107
  in[7] = vec_subs (v23, v6);           /* y7 = v3 * (a1) - v6 */
108
  in[5] = vec_mradds (v2, mul1, v7);    /* y5 = v2 * (a2) + v7 */
109
  in[3] = vec_mradds (v7, mul5, v2);    /* y3 = v7 * (-a2) + v2 */
110
 
111
  transpose_vmx (in, out);
112
 
113
  /* Perform DCT on the eight rows */
114
 
115
  /*********** Stage 1 ***********/
116
 
117
  v8 = vec_adds (out[0], out[7]);
118
  v9 = vec_subs (out[0], out[7]);
119
  v0 = vec_adds (out[1], out[6]);
120
  v7 = vec_subs (out[1], out[6]);
121
  v1 = vec_adds (out[2], out[5]);
122
  v6 = vec_subs (out[2], out[5]);
123
  v2 = vec_adds (out[3], out[4]);
124
  v5 = vec_subs (out[3], out[4]);
125
 
126
  /*********** Stage 2 ***********/
127
 
128
  /* Top */
129
  v3 = vec_adds (v8, v2);               /* (V0+V7) + (V3+V4) */
130
  v4 = vec_subs (v8, v2);               /* (V0+V7) - (V3+V4) */
131
  v2 = vec_adds (v0, v1);               /* (V1+V6) + (V2+V5) */
132
  v8 = vec_subs (v0, v1);               /* (V1+V6) - (V2+V5) */
133
 
134
  /* Bottom */
135
  v0 = vec_subs (v7, v6);               /* (V1-V6) - (V2-V5) */
136
  v1 = vec_adds (v7, v6);               /* (V1-V6) + (V2-V5) */
137
 
138
  /*********** Stage 3 ***********/
139
 
140
  /* Top */
141
  v25 = vec_subs (v25, v25);          /* reinit v25 = 0 */
142
 
143
  v20 = vec_adds (v3, v2);              /* y0 = v3 + v2 */
144
  v24 = vec_subs (v3, v2);              /* y4 = v3 - v2 */
145
  v22 = vec_mradds (v8, mul2, v4);      /* y2 = v8 * a0 + v4 */
146
  v6 = vec_mradds (v4, mul2, v25);
147
  v26 = vec_subs (v6, v8);              /* y6 = v4 * a0 - v8 */
148
 
149
  /* Bottom */
150
  v6 = vec_mradds (v0, mul0, v5);       /* v6 = v0 * (c4) + v5 */
151
  v7 = vec_mradds (v0, mul4, v5);       /* v7 = v0 * (-c4) + v5 */
152
  v2 = vec_mradds (v1, mul4, v9);       /* v2 = v1 * (-c4) + v9 */
153
  v3 = vec_mradds (v1, mul0, v9);       /* v3 = v1 * (c4) + v9 */
154
 
155
  /*********** Stage 4 ***********/
156
 
157
  /* Bottom */
158
  v21 = vec_mradds (v6, mul3, v3);      /* y1 = v6 * (a1) + v3 */
159
  v23 = vec_mradds (v3, mul3, v25);
160
  v27 = vec_subs (v23, v6);             /* y7 = v3 * (a1) - v6 */
161
  v25 = vec_mradds (v2, mul1, v7);      /* y5 = v2 * (a2) + v7 */
162
  v23 = vec_mradds (v7, mul5, v2);      /* y3 = v7 * (-a2) + v2 */
163
 
164
  /* Post-scale and store reults */
165
 
166
  v31 = vec_subs (v31, v31);          /* reinit v25 = 0 */
167
 
168
  output[0] = vec_mradds (postscale[0], v20, v31);
169
  output[2] = vec_mradds (postscale[2], v22, v31);
170
  output[4] = vec_mradds (postscale[4], v24, v31);
171
  output[6] = vec_mradds (postscale[6], v26, v31);
172
  output[1] = vec_mradds (postscale[1], v21, v31);
173
  output[3] = vec_mradds (postscale[3], v23, v31);
174
  output[5] = vec_mradds (postscale[5], v25, v31);
175
  output[7] = vec_mradds (postscale[7], v27, v31);
176
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.