OpenCores
URL https://opencores.org/ocsvn/openrisc_me/openrisc_me/trunk

Subversion Repositories openrisc_me

[/] [openrisc/] [trunk/] [gnu-src/] [newlib-1.17.0/] [newlib/] [libm/] [machine/] [spu/] [headers/] [erfcf4.h] - Blame information for rev 158

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 148 jeremybenn
/* --------------------------------------------------------------  */
2
/* (C)Copyright 2007,2008,                                         */
3
/* International Business Machines Corporation                     */
4
/* All Rights Reserved.                                            */
5
/*                                                                 */
6
/* Redistribution and use in source and binary forms, with or      */
7
/* without modification, are permitted provided that the           */
8
/* following conditions are met:                                   */
9
/*                                                                 */
10
/* - Redistributions of source code must retain the above copyright*/
11
/*   notice, this list of conditions and the following disclaimer. */
12
/*                                                                 */
13
/* - Redistributions in binary form must reproduce the above       */
14
/*   copyright notice, this list of conditions and the following   */
15
/*   disclaimer in the documentation and/or other materials        */
16
/*   provided with the distribution.                               */
17
/*                                                                 */
18
/* - Neither the name of IBM Corporation nor the names of its      */
19
/*   contributors may be used to endorse or promote products       */
20
/*   derived from this software without specific prior written     */
21
/*   permission.                                                   */
22
/*                                                                 */
23
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND          */
24
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,     */
25
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF        */
26
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE        */
27
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR            */
28
/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,    */
29
/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT    */
30
/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;    */
31
/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)        */
32
/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN       */
33
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR    */
34
/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,  */
35
/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.              */
36
/* --------------------------------------------------------------  */
37
/* PROLOG END TAG zYx                                              */
38
#ifdef __SPU__
39
#ifndef _ERFCF4_H_
40
#define _ERFCF4_H_      1
41
 
42
#include <spu_intrinsics.h>
43
#include "erff4.h"
44
#include "erf_utils.h"
45
#include "recipf4.h"
46
#include "expf4.h"
47
#include "divf4.h"
48
 
49
/*
50
 * FUNCTION
51
 *  vector float _erfcf4(vector float x)
52
 *
53
 * DESCRIPTION
54
 *  The erfcf4 function computes the complement error function of each element of x.
55
 *
56
 *  C99 Special Cases:
57
 *  - erfc(+0) returns +1
58
 *  - erfc(-0) returns +1
59
 *  - erfc(+infinite) returns +0
60
 *  - erfc(-infinite) returns +2
61
 *
62
 */
63
 
64
static __inline vector float _erfcf4(vector float x)
65
{
66
  vec_float4 sign_maskf = spu_splats(-0.0f);
67
  vec_float4 zerof      = spu_splats(0.0f);
68
  vec_float4 onehalff   = spu_splats(0.5f);
69
  vec_float4 onef       = spu_splats(1.0f);
70
  vec_float4 twof       = spu_splats(2.0f);
71
  vec_float4 clamp      = spu_splats(10.0542f);   // Erfc = 0 above this (in single precision)
72
  vec_float4 xabs       = spu_andc(x, sign_maskf);
73
  vec_float4 result;
74
 
75
  /*
76
   * First thing we do is setup the description of each partition.
77
   * This consists of:
78
   * - Start x of partition
79
   * - Offset (used for evaluating power series expanded around a point)
80
   * - Truncation adjustment.
81
   */
82
 
83
 
84
  /***************************************************************
85
   * REGION 0: Approximation Near 0 from Above
86
   *
87
   */
88
#define SDM_ERFCF4_0_START     0.0f
89
#define SDM_ERFCF4_0_OFF       0.0f
90
#define SDM_ERFCF4_0_TRUNC     1u
91
 
92
#define SDM_ERFCF4_0_00      0.9999999999999949135f
93
#define SDM_ERFCF4_0_01      -1.1283791670931702608f
94
#define SDM_ERFCF4_0_02      -1.8051894620430502228e-10f
95
#define SDM_ERFCF4_0_03      0.37612639455729408814f
96
#define SDM_ERFCF4_0_04      -8.8929793006257568262e-8f
97
#define SDM_ERFCF4_0_05      -0.11283705324835578294f
98
#define SDM_ERFCF4_0_06      -5.4670494993502827210e-6f
99
#define SDM_ERFCF4_0_07      0.026889802515535093351f
100
#define SDM_ERFCF4_0_08      -0.000071498114084857387620f
101
#define SDM_ERFCF4_0_09      -0.0050714210985129775210f
102
#define SDM_ERFCF4_0_10      -0.00022683372291701701701f
103
#define SDM_ERFCF4_0_11      0.0010796064437231401311f
104
#define SDM_ERFCF4_0_12      -0.00012982218714593684809f
105
#define SDM_ERFCF4_0_13      -0.00010102962499433144847f
106
#define SDM_ERFCF4_0_14       0.000025784829228223517886f
107
 
108
 
109
  /***************************************************************
110
   * REGION 1: Near 1
111
   */
112
#define SDM_ERFCF4_1_START     0.88f
113
#define SDM_ERFCF4_1_OFF       1.125f
114
#define SDM_ERFCF4_1_TRUNC     1u
115
 
116
#define SDM_ERFCF4_1_00     0.111611768298292224f
117
#define SDM_ERFCF4_1_01    -0.318273958500769283f
118
#define SDM_ERFCF4_1_02     0.358058203313365464f
119
#define SDM_ERFCF4_1_03    -0.162452332984767661f
120
#define SDM_ERFCF4_1_04    -0.0279732971338566734f
121
#define SDM_ERFCF4_1_05     0.0613236836056658061f
122
#define SDM_ERFCF4_1_06    -0.0155368354497628942f
123
#define SDM_ERFCF4_1_07    -0.00960689422582997228f
124
#define SDM_ERFCF4_1_08     0.00603126088310672760f
125
#define SDM_ERFCF4_1_09     0.000360191989801368303f
126
#define SDM_ERFCF4_1_10    -0.00115326735470205975f
127
#define SDM_ERFCF4_1_11     0.000176955087857924673f
128
#define SDM_ERFCF4_1_12     0.000141558399011799664f
129
#define SDM_ERFCF4_1_13    -0.0000494556968345700811f
130
#define SDM_ERFCF4_1_14    0.0f
131
 
132
 
133
  /***************************************************************
134
   * REGION 2:
135
   */
136
#define SDM_ERFCF4_2_START     1.50f
137
#define SDM_ERFCF4_2_OFF       1.75f
138
#define SDM_ERFCF4_2_TRUNC     0u
139
 
140
#define SDM_ERFCF4_2_00     0.0133283287808175777f
141
#define SDM_ERFCF4_2_01    -0.0527749959301503715f
142
#define SDM_ERFCF4_2_02     0.0923562428777631589f
143
#define SDM_ERFCF4_2_03    -0.0901572847140068856f
144
#define SDM_ERFCF4_2_04     0.0481022098321682995f
145
#define SDM_ERFCF4_2_05    -0.00662436146831574865f
146
#define SDM_ERFCF4_2_06    -0.00896304509872736070f
147
#define SDM_ERFCF4_2_07     0.00605875147039124009f
148
#define SDM_ERFCF4_2_08    -0.000730051247140304322f
149
#define SDM_ERFCF4_2_09    -0.000894181745354844871f
150
#define SDM_ERFCF4_2_10     0.000442750499254694174f
151
#define SDM_ERFCF4_2_11     5.44549038611738718e-6f
152
#define SDM_ERFCF4_2_12    -0.0000686716770072681921f
153
#define SDM_ERFCF4_2_13     0.0000177205746526325771f
154
#define SDM_ERFCF4_2_14    0.0f
155
 
156
 
157
  /***************************************************************
158
   * REGION 3:
159
   */
160
#define SDM_ERFCF4_3_START     2.0f
161
#define SDM_ERFCF4_3_OFF       2.25f
162
#define SDM_ERFCF4_3_TRUNC     1u
163
 
164
#define SDM_ERFCF4_3_00      0.00146271658668117865f
165
#define SDM_ERFCF4_3_01     -0.00714231902201798319f
166
#define SDM_ERFCF4_3_02      0.0160702177995404628f
167
#define SDM_ERFCF4_3_03     -0.0217245536919713662f
168
#define SDM_ERFCF4_3_04      0.0190833836369542972f
169
#define SDM_ERFCF4_3_05     -0.0106576791656674587f
170
#define SDM_ERFCF4_3_06      0.00290435707106278173f
171
#define SDM_ERFCF4_3_07      0.000670455969951892490f
172
#define SDM_ERFCF4_3_08     -0.000999493712611392590f
173
#define SDM_ERFCF4_3_09      0.000369380417703939461f
174
#define SDM_ERFCF4_3_10      0.0000114665831641414663f
175
#define SDM_ERFCF4_3_11     -0.0000651349432823388933f
176
#define SDM_ERFCF4_3_12      0.0000226882426454011034f
177
#define SDM_ERFCF4_3_13      1.33207467538330703e-6f
178
#define SDM_ERFCF4_3_14    0.0f
179
 
180
 
181
  /***************************************************************
182
   * REGION 4:
183
   */
184
#define SDM_ERFCF4_4_START     2.46f
185
#define SDM_ERFCF4_4_OFF       2.75f
186
#define SDM_ERFCF4_4_TRUNC     1u
187
 
188
#define SDM_ERFCF4_4_00      0.000100621922119681351f
189
#define SDM_ERFCF4_4_01     -0.000586277247093792324f
190
#define SDM_ERFCF4_4_02      0.00161226242950792873f
191
#define SDM_ERFCF4_4_03     -0.00276038870506660526f
192
#define SDM_ERFCF4_4_04      0.00325811365963060576f
193
#define SDM_ERFCF4_4_05     -0.00275580841407368484f
194
#define SDM_ERFCF4_4_06      0.00165732740366604948f
195
#define SDM_ERFCF4_4_07     -0.000646040956672447276f
196
#define SDM_ERFCF4_4_08      0.0000890115712124397128f
197
#define SDM_ERFCF4_4_09      0.0000712231147231515843f
198
#define SDM_ERFCF4_4_10     -0.0000549969924243893176f
199
#define SDM_ERFCF4_4_11      0.0000158438047120425837f
200
#define SDM_ERFCF4_4_12      1.07113381370613701e-6f
201
#define SDM_ERFCF4_4_13     0.0f
202
#define SDM_ERFCF4_4_14     0.0f
203
 
204
 
205
  /***************************************************************
206
   * REGION 5:
207
   */
208
#define SDM_ERFCF4_5_START     2.95f
209
#define SDM_ERFCF4_5_OFF       3.25f
210
#define SDM_ERFCF4_5_TRUNC     1u
211
 
212
#define SDM_ERFCF4_5_00      4.30277946372736864e-6f
213
#define SDM_ERFCF4_5_01     -0.0000291890253835816989f
214
#define SDM_ERFCF4_5_02      0.0000948643324966405230f
215
#define SDM_ERFCF4_5_03     -0.000195809711948193862f
216
#define SDM_ERFCF4_5_04      0.000286569337750268210f
217
#define SDM_ERFCF4_5_05     -0.000313797225490890491f
218
#define SDM_ERFCF4_5_06      0.000263528504215059911f
219
#define SDM_ERFCF4_5_07     -0.000169991414511391200f
220
#define SDM_ERFCF4_5_08      0.0000816476305301353867f
221
#define SDM_ERFCF4_5_09     -0.0000259138470056606003f
222
#define SDM_ERFCF4_5_10      2.32886623721087698e-6f
223
#define SDM_ERFCF4_5_11      2.86429946075621661e-6f
224
#define SDM_ERFCF4_5_12      0.0f
225
#define SDM_ERFCF4_5_13      0.0f
226
#define SDM_ERFCF4_5_14      0.0f
227
 
228
 
229
  /***************************************************************
230
   * REGION 6:
231
   */
232
#define SDM_ERFCF4_6_START     3.45f
233
#define SDM_ERFCF4_6_OFF       3.625f
234
#define SDM_ERFCF4_6_TRUNC     1u
235
 
236
#define SDM_ERFCF4_6_00     2.95140192507759025e-7f
237
#define SDM_ERFCF4_6_01    -2.21592028463311237e-6f
238
#define SDM_ERFCF4_6_02     8.03271103179503198e-6f
239
#define SDM_ERFCF4_6_03    -0.0000186737448986269582f
240
#define SDM_ERFCF4_6_04     0.0000311685922848296785f
241
#define SDM_ERFCF4_6_05    -0.0000395923353434149457f
242
#define SDM_ERFCF4_6_06     0.0000395291139306718091f
243
#define SDM_ERFCF4_6_07    -0.0000315141214892874786f
244
#define SDM_ERFCF4_6_08     0.0000200891481859513911f
245
#define SDM_ERFCF4_6_09    -0.0000100551790824327187f
246
#define SDM_ERFCF4_6_10     3.71860071281680690e-6f
247
#define SDM_ERFCF4_6_11    -8.05502983594814356e-7f
248
#define SDM_ERFCF4_6_12    -7.67662978382552699e-8f
249
#define SDM_ERFCF4_6_13     1.56408548403936681e-7f
250
#define SDM_ERFCF4_6_14     0.0f
251
#define SDM_ERFCF4_6_15     0.0f
252
#define SDM_ERFCF4_6_16     0.0f
253
#define SDM_ERFCF4_6_17     0.0f
254
 
255
 
256
  /***************************************************************
257
   * REGION 7:
258
   */
259
#define SDM_ERFCF4_7_START     3.55f
260
#define SDM_ERFCF4_7_OFF       4.0f
261
#define SDM_ERFCF4_7_TRUNC     2u
262
 
263
#define SDM_ERFCF4_7_00     1.54172579002800189e-8f
264
#define SDM_ERFCF4_7_01    -1.2698234671866558e-7f
265
#define SDM_ERFCF4_7_02     5.0792938687466233e-7f
266
#define SDM_ERFCF4_7_03    -1.3121509160928777e-6f
267
#define SDM_ERFCF4_7_04     2.4549920365608679e-6f
268
#define SDM_ERFCF4_7_05    -3.5343419836695254e-6f
269
#define SDM_ERFCF4_7_06     4.0577914351431357e-6f
270
#define SDM_ERFCF4_7_07    -3.7959659297660776e-6f
271
#define SDM_ERFCF4_7_08     2.9264391936639771e-6f
272
#define SDM_ERFCF4_7_09    -1.8631747969134646e-6f
273
#define SDM_ERFCF4_7_10     9.702839808793979e-7f
274
#define SDM_ERFCF4_7_11    -4.0077792841735885e-7f
275
#define SDM_ERFCF4_7_12     1.2017256123590621e-7f
276
#define SDM_ERFCF4_7_13    -1.7432381111955779e-8f
277
#define SDM_ERFCF4_7_14     0.0f
278
 
279
 
280
  /***************************************************************
281
   * Now we load the description of each partition.
282
   */
283
 
284
  /* Start point for each partition */
285
  vec_float4 r1start = spu_splats(SDM_ERFCF4_1_START);
286
  vec_float4 r2start = spu_splats(SDM_ERFCF4_2_START);
287
  vec_float4 r3start = spu_splats(SDM_ERFCF4_3_START);
288
  vec_float4 r4start = spu_splats(SDM_ERFCF4_4_START);
289
  vec_float4 r5start = spu_splats(SDM_ERFCF4_5_START);
290
  vec_float4 r6start = spu_splats(SDM_ERFCF4_6_START);
291
  vec_float4 r7start = spu_splats(SDM_ERFCF4_7_START);
292
 
293
  /* X Offset for each partition */
294
  vec_float4 xoffseta = (vec_float4) {SDM_ERFCF4_0_OFF, SDM_ERFCF4_1_OFF, SDM_ERFCF4_2_OFF, SDM_ERFCF4_3_OFF};
295
  vec_float4 xoffsetb = (vec_float4) {SDM_ERFCF4_4_OFF, SDM_ERFCF4_5_OFF, SDM_ERFCF4_6_OFF, SDM_ERFCF4_7_OFF};
296
 
297
  /* Truncation Correction for each partition */
298
  vec_uint4 tcorra = (vec_uint4) {SDM_ERFCF4_0_TRUNC, SDM_ERFCF4_1_TRUNC, SDM_ERFCF4_2_TRUNC, SDM_ERFCF4_3_TRUNC};
299
  vec_uint4 tcorrb = (vec_uint4) {SDM_ERFCF4_4_TRUNC, SDM_ERFCF4_5_TRUNC, SDM_ERFCF4_6_TRUNC, SDM_ERFCF4_7_TRUNC};
300
 
301
  /* The coefficients for each partition */
302
  vec_float4 c00a = (vec_float4) {SDM_ERFCF4_0_00, SDM_ERFCF4_1_00, SDM_ERFCF4_2_00, SDM_ERFCF4_3_00};
303
  vec_float4 c01a = (vec_float4) {SDM_ERFCF4_0_01, SDM_ERFCF4_1_01, SDM_ERFCF4_2_01, SDM_ERFCF4_3_01};
304
  vec_float4 c02a = (vec_float4) {SDM_ERFCF4_0_02, SDM_ERFCF4_1_02, SDM_ERFCF4_2_02, SDM_ERFCF4_3_02};
305
  vec_float4 c03a = (vec_float4) {SDM_ERFCF4_0_03, SDM_ERFCF4_1_03, SDM_ERFCF4_2_03, SDM_ERFCF4_3_03};
306
  vec_float4 c04a = (vec_float4) {SDM_ERFCF4_0_04, SDM_ERFCF4_1_04, SDM_ERFCF4_2_04, SDM_ERFCF4_3_04};
307
  vec_float4 c05a = (vec_float4) {SDM_ERFCF4_0_05, SDM_ERFCF4_1_05, SDM_ERFCF4_2_05, SDM_ERFCF4_3_05};
308
  vec_float4 c06a = (vec_float4) {SDM_ERFCF4_0_06, SDM_ERFCF4_1_06, SDM_ERFCF4_2_06, SDM_ERFCF4_3_06};
309
  vec_float4 c07a = (vec_float4) {SDM_ERFCF4_0_07, SDM_ERFCF4_1_07, SDM_ERFCF4_2_07, SDM_ERFCF4_3_07};
310
  vec_float4 c08a = (vec_float4) {SDM_ERFCF4_0_08, SDM_ERFCF4_1_08, SDM_ERFCF4_2_08, SDM_ERFCF4_3_08};
311
  vec_float4 c09a = (vec_float4) {SDM_ERFCF4_0_09, SDM_ERFCF4_1_09, SDM_ERFCF4_2_09, SDM_ERFCF4_3_09};
312
  vec_float4 c10a = (vec_float4) {SDM_ERFCF4_0_10, SDM_ERFCF4_1_10, SDM_ERFCF4_2_10, SDM_ERFCF4_3_10};
313
  vec_float4 c11a = (vec_float4) {SDM_ERFCF4_0_11, SDM_ERFCF4_1_11, SDM_ERFCF4_2_11, SDM_ERFCF4_3_11};
314
  vec_float4 c12a = (vec_float4) {SDM_ERFCF4_0_12, SDM_ERFCF4_1_12, SDM_ERFCF4_2_12, SDM_ERFCF4_3_12};
315
  vec_float4 c13a = (vec_float4) {SDM_ERFCF4_0_13, SDM_ERFCF4_1_13, SDM_ERFCF4_2_13, SDM_ERFCF4_3_13};
316
  vec_float4 c14a = (vec_float4) {SDM_ERFCF4_0_14, SDM_ERFCF4_1_14, SDM_ERFCF4_2_14, SDM_ERFCF4_3_14};
317
 
318
  vec_float4 c00b = (vec_float4) {SDM_ERFCF4_4_00, SDM_ERFCF4_5_00, SDM_ERFCF4_6_00, SDM_ERFCF4_7_00};
319
  vec_float4 c01b = (vec_float4) {SDM_ERFCF4_4_01, SDM_ERFCF4_5_01, SDM_ERFCF4_6_01, SDM_ERFCF4_7_01};
320
  vec_float4 c02b = (vec_float4) {SDM_ERFCF4_4_02, SDM_ERFCF4_5_02, SDM_ERFCF4_6_02, SDM_ERFCF4_7_02};
321
  vec_float4 c03b = (vec_float4) {SDM_ERFCF4_4_03, SDM_ERFCF4_5_03, SDM_ERFCF4_6_03, SDM_ERFCF4_7_03};
322
  vec_float4 c04b = (vec_float4) {SDM_ERFCF4_4_04, SDM_ERFCF4_5_04, SDM_ERFCF4_6_04, SDM_ERFCF4_7_04};
323
  vec_float4 c05b = (vec_float4) {SDM_ERFCF4_4_05, SDM_ERFCF4_5_05, SDM_ERFCF4_6_05, SDM_ERFCF4_7_05};
324
  vec_float4 c06b = (vec_float4) {SDM_ERFCF4_4_06, SDM_ERFCF4_5_06, SDM_ERFCF4_6_06, SDM_ERFCF4_7_06};
325
  vec_float4 c07b = (vec_float4) {SDM_ERFCF4_4_07, SDM_ERFCF4_5_07, SDM_ERFCF4_6_07, SDM_ERFCF4_7_07};
326
  vec_float4 c08b = (vec_float4) {SDM_ERFCF4_4_08, SDM_ERFCF4_5_08, SDM_ERFCF4_6_08, SDM_ERFCF4_7_08};
327
  vec_float4 c09b = (vec_float4) {SDM_ERFCF4_4_09, SDM_ERFCF4_5_09, SDM_ERFCF4_6_09, SDM_ERFCF4_7_09};
328
  vec_float4 c10b = (vec_float4) {SDM_ERFCF4_4_10, SDM_ERFCF4_5_10, SDM_ERFCF4_6_10, SDM_ERFCF4_7_10};
329
  vec_float4 c11b = (vec_float4) {SDM_ERFCF4_4_11, SDM_ERFCF4_5_11, SDM_ERFCF4_6_11, SDM_ERFCF4_7_11};
330
  vec_float4 c12b = (vec_float4) {SDM_ERFCF4_4_12, SDM_ERFCF4_5_12, SDM_ERFCF4_6_12, SDM_ERFCF4_7_12};
331
  vec_float4 c13b = (vec_float4) {SDM_ERFCF4_4_13, SDM_ERFCF4_5_13, SDM_ERFCF4_6_13, SDM_ERFCF4_7_13};
332
  vec_float4 c14b = (vec_float4) {SDM_ERFCF4_4_14, SDM_ERFCF4_5_14, SDM_ERFCF4_6_14, SDM_ERFCF4_7_14};
333
 
334
  vec_uchar16 shuffle0 = (vec_uchar16) spu_splats(0x00010203);
335
  vec_uchar16 shuffle1 = (vec_uchar16) spu_splats(0x04050607);
336
  vec_uchar16 shuffle2 = (vec_uchar16) spu_splats(0x08090A0B);
337
  vec_uchar16 shuffle3 = (vec_uchar16) spu_splats(0x0C0D0E0F);
338
  vec_uchar16 shuffle4 = (vec_uchar16) spu_splats(0x10111213);
339
  vec_uchar16 shuffle5 = (vec_uchar16) spu_splats(0x14151617);
340
  vec_uchar16 shuffle6 = (vec_uchar16) spu_splats(0x18191A1B);
341
  vec_uchar16 shuffle7 = (vec_uchar16) spu_splats(0x1C1D1E1F);
342
 
343
 
344
  /*
345
   * Determine the shuffle pattern based on which partition
346
   * each element of x is in.
347
   */
348
  vec_uchar16 gt_r1start = (vec_uchar16)spu_cmpabsgt(x, r1start);
349
  vec_uchar16 gt_r2start = (vec_uchar16)spu_cmpabsgt(x, r2start);
350
  vec_uchar16 gt_r3start = (vec_uchar16)spu_cmpabsgt(x, r3start);
351
  vec_uchar16 gt_r4start = (vec_uchar16)spu_cmpabsgt(x, r4start);
352
  vec_uchar16 gt_r5start = (vec_uchar16)spu_cmpabsgt(x, r5start);
353
  vec_uchar16 gt_r6start = (vec_uchar16)spu_cmpabsgt(x, r6start);
354
  vec_uchar16 gt_r7start = (vec_uchar16)spu_cmpabsgt(x, r7start);
355
 
356
  vec_uchar16 shufflepattern;
357
  shufflepattern = spu_sel(shuffle0, shuffle1, gt_r1start);
358
  shufflepattern = spu_sel(shufflepattern, shuffle2, gt_r2start);
359
  shufflepattern = spu_sel(shufflepattern, shuffle3, gt_r3start);
360
  shufflepattern = spu_sel(shufflepattern, shuffle4, gt_r4start);
361
  shufflepattern = spu_sel(shufflepattern, shuffle5, gt_r5start);
362
  shufflepattern = spu_sel(shufflepattern, shuffle6, gt_r6start);
363
  shufflepattern = spu_sel(shufflepattern, shuffle7, gt_r7start);
364
 
365
 
366
  /* Use the shuffle pattern to select the coefficients */
367
  vec_float4 coeff_14 = spu_shuffle(c14a, c14b, shufflepattern);
368
  vec_float4 coeff_13 = spu_shuffle(c13a, c13b, shufflepattern);
369
  vec_float4 coeff_12 = spu_shuffle(c12a, c12b, shufflepattern);
370
  vec_float4 coeff_11 = spu_shuffle(c11a, c11b, shufflepattern);
371
  vec_float4 coeff_10 = spu_shuffle(c10a, c10b, shufflepattern);
372
  vec_float4 coeff_09 = spu_shuffle(c09a, c09b, shufflepattern);
373
  vec_float4 coeff_08 = spu_shuffle(c08a, c08b, shufflepattern);
374
  vec_float4 coeff_07 = spu_shuffle(c07a, c07b, shufflepattern);
375
  vec_float4 coeff_06 = spu_shuffle(c06a, c06b, shufflepattern);
376
  vec_float4 coeff_05 = spu_shuffle(c05a, c05b, shufflepattern);
377
  vec_float4 coeff_04 = spu_shuffle(c04a, c04b, shufflepattern);
378
  vec_float4 coeff_03 = spu_shuffle(c03a, c03b, shufflepattern);
379
  vec_float4 coeff_02 = spu_shuffle(c02a, c02b, shufflepattern);
380
  vec_float4 coeff_01 = spu_shuffle(c01a, c01b, shufflepattern);
381
  vec_float4 coeff_00 = spu_shuffle(c00a, c00b, shufflepattern);
382
 
383
  vec_float4 xoffset     = spu_shuffle(xoffseta, xoffsetb, shufflepattern);
384
  vec_uint4  tcorrection = spu_shuffle(tcorra,   tcorrb,   shufflepattern);
385
 
386
 
387
  /*
388
   * We've completed the coeff. setup. Now we actually do the
389
   * approximation below.
390
   */
391
 
392
  /* Adjust x value here (for approximations about a point) */
393
  vec_float4 xappr = spu_sub(xabs, xoffset);
394
 
395
 
396
  /* Now we do the multiplies.
397
   * Use Horner's method.
398
   */
399
  result = coeff_14;
400
  result = spu_madd(xappr, result, coeff_13);
401
  result = spu_madd(xappr, result, coeff_12);
402
  result = spu_madd(xappr, result, coeff_11);
403
  result = spu_madd(xappr, result, coeff_10);
404
  result = spu_madd(xappr, result, coeff_09);
405
  result = spu_madd(xappr, result, coeff_08);
406
  result = spu_madd(xappr, result, coeff_07);
407
  result = spu_madd(xappr, result, coeff_06);
408
  result = spu_madd(xappr, result, coeff_05);
409
  result = spu_madd(xappr, result, coeff_04);
410
  result = spu_madd(xappr, result, coeff_03);
411
  result = spu_madd(xappr, result, coeff_02);
412
  result = spu_madd(xappr, result, coeff_01);
413
  result = spu_madd(xappr, result, coeff_00);
414
 
415
  /* Adjust due to systematic truncation. */
416
  result = (vec_float4)spu_add((vec_uint4)result, tcorrection);
417
 
418
  /* Use the continued fraction approximation for x above approx. 4
419
   * and below approx. 10
420
   */
421
  vec_float4 presult, xsqu;
422
  xsqu = spu_mul(x, x);
423
  CONTFRAC_ERFCF4(xabs, xsqu, presult);
424
 
425
  /* Select between polynomial and continued fraction */
426
  result = spu_sel(presult, result, spu_cmpgt(spu_splats(4.3f), xabs));
427
 
428
  /* Above clamp value, set erfc = 0 */
429
  result = spu_sel(result, zerof, spu_cmpgt(xabs, clamp));
430
 
431
  /* Negative x values */
432
  vec_uint4 gt0 = spu_cmpgt(x, zerof);
433
  result = spu_sel(spu_sub(twof, result), result, gt0);
434
 
435
  return result;
436
}
437
 
438
#endif /* _ERFCF4_H_ */
439
#endif /* __SPU__ */

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.