OpenCores
URL https://opencores.org/ocsvn/openrisc_2011-10-31/openrisc_2011-10-31/trunk

Subversion Repositories openrisc_2011-10-31

[/] [openrisc/] [tags/] [gnu-src/] [newlib-1.18.0/] [newlib-1.18.0-or32-1.0rc1/] [newlib/] [libm/] [machine/] [spu/] [headers/] [erff4.h] - Blame information for rev 345

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 207 jeremybenn
/* --------------------------------------------------------------  */
2
/* (C)Copyright 2007,2008,                                         */
3
/* International Business Machines Corporation                     */
4
/* All Rights Reserved.                                            */
5
/*                                                                 */
6
/* Redistribution and use in source and binary forms, with or      */
7
/* without modification, are permitted provided that the           */
8
/* following conditions are met:                                   */
9
/*                                                                 */
10
/* - Redistributions of source code must retain the above copyright*/
11
/*   notice, this list of conditions and the following disclaimer. */
12
/*                                                                 */
13
/* - Redistributions in binary form must reproduce the above       */
14
/*   copyright notice, this list of conditions and the following   */
15
/*   disclaimer in the documentation and/or other materials        */
16
/*   provided with the distribution.                               */
17
/*                                                                 */
18
/* - Neither the name of IBM Corporation nor the names of its      */
19
/*   contributors may be used to endorse or promote products       */
20
/*   derived from this software without specific prior written     */
21
/*   permission.                                                   */
22
/*                                                                 */
23
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND          */
24
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,     */
25
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF        */
26
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE        */
27
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR            */
28
/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,    */
29
/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT    */
30
/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;    */
31
/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)        */
32
/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN       */
33
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR    */
34
/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,  */
35
/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.              */
36
/* --------------------------------------------------------------  */
37
/* PROLOG END TAG zYx                                              */
38
#ifdef __SPU__
39
#ifndef _ERFF4_H_
40
#define _ERFF4_H_       1
41
 
42
#include <spu_intrinsics.h>
43
 
44
/*
45
 * FUNCTION
46
 *  vector float _erff4(vector float x)
47
 *
48
 * DESCRIPTION
49
 *  The erff4 function computes the error function of each element of x.
50
 *
51
 *  C99 Special Cases:
52
 *  - erf(+0) returns +0
53
 *  - erf(-0) returns -0
54
 *  - erf(+infinite) returns +1
55
 *  - erf(-infinite) returns -1
56
 *
57
 */
58
 
59
static __inline vector float _erff4(vector float x)
60
{
61
  vec_float4 sign_maskf = spu_splats(-0.0f);
62
  vec_float4 zerof      = spu_splats(0.0f);
63
  vec_float4 onef       = spu_splats(1.0f);
64
  vec_float4 clamp      = spu_splats(3.9199876f);
65
  vec_float4 xabs       = spu_andc(x, sign_maskf);
66
  vec_float4 xsign      = spu_and(x, sign_maskf);
67
  vec_float4 result;
68
 
69
 
70
  /*
71
   * First thing we do is setup the description of each partition.
72
   * This consists of:
73
   * - Start x of partition
74
   * - Offset (used for evaluating power series expanded around a point)
75
   * - Truncation adjustment.
76
   */
77
 
78
 
79
  /***************************************************************
80
   * REGION 0: Approximation Near 0 from Above
81
   *
82
   */
83
#define SDM_ERFF4_0_START     0.0f
84
#define SDM_ERFF4_0_OFF       0.0f
85
#define SDM_ERFF4_0_TRUNC     2u
86
 
87
#define SDM_ERFF4_0_00   0.0f
88
#define SDM_ERFF4_0_01   1.12837916709551257389615890312154f
89
#define SDM_ERFF4_0_02   0.0f
90
#define SDM_ERFF4_0_03  -0.37612638903183752463205296770955f
91
#define SDM_ERFF4_0_04   0.0f
92
#define SDM_ERFF4_0_05   0.11283791670955125738961589031073f
93
#define SDM_ERFF4_0_06   0.0f
94
#define SDM_ERFF4_0_07  -0.02686617064513125175943235483588f
95
#define SDM_ERFF4_0_08   0.0f
96
#define SDM_ERFF4_0_09   0.00522397762544218784211184677371f
97
#define SDM_ERFF4_0_10   0.0f
98
//#define SDM_ERFF4_0_11  -0.00085483270234508528325466583569f
99
 
100
 
101
 
102
  /***************************************************************
103
   * REGION 1: Above 0 and Below 1
104
   */
105
#define SDM_ERFF4_1_START     0.07f
106
#define SDM_ERFF4_1_OFF       0.0625f
107
#define SDM_ERFF4_1_TRUNC     1u
108
 
109
#define SDM_ERFF4_1_00     0.0704319777223870780505900559232967439190042883f
110
#define SDM_ERFF4_1_01     1.1239800336253906104888456836298420746260842545f
111
#define SDM_ERFF4_1_02    -0.0702487521015869131555528552268651296641302713f
112
#define SDM_ERFF4_1_03    -0.3717329798708974154481338589088279778060226856f
113
#define SDM_ERFF4_1_04     0.0350329063214945152846051348331892508611482993f
114
#define SDM_ERFF4_1_05     0.1106440713032318617523250293018186620702780982f
115
#define SDM_ERFF4_1_06    -0.0116471931712158678624014740659716890227703402f
116
#define SDM_ERFF4_1_07    -0.0261358409084263503958678377968739965222786482f
117
#define SDM_ERFF4_1_08     0.0029041996223118476954500365511415181291113910f
118
#define SDM_ERFF4_1_09     0.0050416329596619035812041623972929782386498567f
119
#define SDM_ERFF4_1_10    -0.0005793225670734356072895029723913210064918149f
120
//#define SDM_ERFF4_1_11    -0.0008184112733188406359323913130525859730689332f
121
 
122
 
123
 
124
  /***************************************************************
125
   * REGION 2:
126
   */
127
#define SDM_ERFF4_2_START     0.13f
128
#define SDM_ERFF4_2_OFF       0.1875f
129
#define SDM_ERFF4_2_TRUNC     1u
130
 
131
#define SDM_ERFF4_2_00    0.2091176770593758483008706390019410965937912290f
132
#define SDM_ERFF4_2_01    1.0893988034775673230502318110338693557898033315f
133
#define SDM_ERFF4_2_02   -0.2042622756520438730719184645688505042105881396f
134
#define SDM_ERFF4_2_03   -0.3376001500360169568827541289401834722369442864f
135
#define SDM_ERFF4_2_04    0.0997374392832245473983976877777590352590762400f
136
#define SDM_ERFF4_2_05    0.0937997370645632460099464120987231140266525679f
137
#define SDM_ERFF4_2_06   -0.0324591340420617488485277008302392706957527828f
138
#define SDM_ERFF4_2_07   -0.0205943885488331791711970665266474471714543313f
139
#define SDM_ERFF4_2_08    0.0079208906865255014554772269570592999495375181f
140
#define SDM_ERFF4_2_09    0.0036744273281123333893101007014150883409965011f
141
#define SDM_ERFF4_2_10   -0.0015459493690754127608506357908913858038162608f
142
//#define SDM_ERFF4_2_11   -0.0005485671070180836650399266219057172124875094f
143
 
144
 
145
 
146
  /***************************************************************
147
   * REGION 3:
148
   */
149
#define SDM_ERFF4_3_START     0.25f
150
#define SDM_ERFF4_3_OFF       0.5f
151
#define SDM_ERFF4_3_TRUNC     2u
152
 
153
#define SDM_ERFF4_3_00    0.5204998778130465376827466538919645287364515699f
154
#define SDM_ERFF4_3_01    0.8787825789354447940937239548244578983625218956f
155
#define SDM_ERFF4_3_02   -0.4393912894677223970468619774122289491812609947f
156
#define SDM_ERFF4_3_03   -0.1464637631559074656822873258040763163937536583f
157
#define SDM_ERFF4_3_04    0.1830797039448843321028591572550953954921920811f
158
#define SDM_ERFF4_3_05    0.0073231881577953732841143662902038158196876832f
159
#define SDM_ERFF4_3_06   -0.0500417857449350507747815029830594081011991688f
160
#define SDM_ERFF4_3_07    0.0054052103069442040906558417856266259621504328f
161
#define SDM_ERFF4_3_08    0.0100475885141180567975497704160236877764167320f
162
#define SDM_ERFF4_3_09   -0.0021674118390300459951330548378744759122422210f
163
#define SDM_ERFF4_3_10   -0.0015694967741624277200510981457278746801387524f
164
//#define SDM_ERFF4_3_11    0.0004973489167651373192082360776274483020158863f
165
 
166
 
167
 
168
  /***************************************************************
169
   * REGION 4:
170
   */
171
#define SDM_ERFF4_4_START     0.77f
172
#define SDM_ERFF4_4_OFF       1.0f
173
#define SDM_ERFF4_4_TRUNC     1u
174
 
175
#define SDM_ERFF4_4_00     0.8427007929497148693412206350826092590442f
176
#define SDM_ERFF4_4_01     0.4151074974205947033402682494413373653605f
177
#define SDM_ERFF4_4_02    -0.4151074974205947033402682494413373653605f
178
#define SDM_ERFF4_4_03     0.1383691658068649011134227498137791217898f
179
#define SDM_ERFF4_4_04     0.0691845829034324505567113749068895608946f
180
#define SDM_ERFF4_4_05    -0.0691845829034324505567113749068895608946f
181
#define SDM_ERFF4_4_06     0.0046123055268954967037807583271259707263f
182
#define SDM_ERFF4_4_07     0.0151547181597994891695653487891281895293f
183
#define SDM_ERFF4_4_08    -0.0047770307242846215860586425530947553951f
184
#define SDM_ERFF4_4_09    -0.0018851883701199847638468972527538689873f
185
#define SDM_ERFF4_4_10     0.0012262875805634852347353603488787303121f
186
//#define SDM_ERFF4_4_11     0.0000855239913717274641321540324726821411f
187
 
188
 
189
 
190
  /***************************************************************
191
   * REGION 5:
192
   */
193
#define SDM_ERFF4_5_START     1.36f
194
#define SDM_ERFF4_5_OFF       1.875f
195
#define SDM_ERFF4_5_TRUNC     1u
196
 
197
#define SDM_ERFF4_5_00     0.99199005767011997029646305969122440092668f
198
#define SDM_ERFF4_5_01     0.03354582842421607459425032786195496507386f
199
#define SDM_ERFF4_5_02    -0.06289842829540513986421936474116555951979f
200
#define SDM_ERFF4_5_03     0.06744109256118439996552409663913862770819f
201
#define SDM_ERFF4_5_04    -0.04225988151097532834627238568547061029869f
202
#define SDM_ERFF4_5_05     0.01146258336487617627004706027236136941544f
203
#define SDM_ERFF4_5_06     0.00410518713321247739022655684589964019683f
204
#define SDM_ERFF4_5_07    -0.00492839390823910723763257456562751425198f
205
#define SDM_ERFF4_5_08     0.00143050168737012207687743571780226012058f
206
#define SDM_ERFF4_5_09     0.00036225644575338665306295794978774160986f
207
#define SDM_ERFF4_5_10    -0.00039015757824554169745459780322413823624f
208
//#define SDM_ERFF4_5_11     0.00007372993782406230817649249567932577159f
209
 
210
 
211
 
212
  /***************************************************************
213
   * REGION 6:
214
   */
215
#define SDM_ERFF4_6_START     2.0f
216
#define SDM_ERFF4_6_OFF       2.5f
217
#define SDM_ERFF4_6_TRUNC     1u
218
 
219
#define SDM_ERFF4_6_00    0.999593047982555041060435784260025087279f
220
#define SDM_ERFF4_6_01    0.002178284230352709720386678564097264007f
221
#define SDM_ERFF4_6_02   -0.005445710575881774300966696410243160031f
222
#define SDM_ERFF4_6_03    0.008350089549685387261482267829039512051f
223
#define SDM_ERFF4_6_04   -0.008622375078479475976530602649551670054f
224
#define SDM_ERFF4_6_05    0.006117348213573859798085922300839816434f
225
#define SDM_ERFF4_6_06   -0.002798490157050356237996774544152735014f
226
#define SDM_ERFF4_6_07    0.000542410061327906884739143174194854432f
227
#define SDM_ERFF4_6_08    0.000260670173895134533751630061303802055f
228
#define SDM_ERFF4_6_09   -0.000250285386311056635227961206817778392f
229
#define SDM_ERFF4_6_10    0.000078801328907504400502579703621546608f
230
//#define SDM_ERFF4_6_11    5.137004620216358263402877651297096663210e-6f
231
 
232
 
233
 
234
  /***************************************************************
235
   * REGION 7:
236
   */
237
#define SDM_ERFF4_7_START     2.75f
238
#define SDM_ERFF4_7_OFF       3.5f
239
#define SDM_ERFF4_7_TRUNC     1u
240
 
241
#define SDM_ERFF4_7_00    0.999999256901627658587254476316243904363263f
242
#define SDM_ERFF4_7_01    5.399426777384782511586818937495781413007869e-6f
243
#define SDM_ERFF4_7_02   -0.000018897993720846738790553866281235234945f
244
#define SDM_ERFF4_7_03    0.000042295509756180796340763415010383621069f
245
#define SDM_ERFF4_7_04   -0.000067717810833034147332818020841092925222f
246
#define SDM_ERFF4_7_05    0.000082116282239393567363716204674415008991f
247
#define SDM_ERFF4_7_06   -0.000077744246390483389302250766562526063763f
248
#define SDM_ERFF4_7_07    0.000058192750619199206596604051163855823527f
249
#define SDM_ERFF4_7_08   -0.000034259175422410008064403380504975403351f
250
#define SDM_ERFF4_7_09    0.000015330768263696827211862952666453348031f
251
#define SDM_ERFF4_7_10   -4.641017709492666503521243665632827470977627e-6f
252
//#define SDM_ERFF4_7_11    4.447037356176705948450355327103423490366212e-7f
253
 
254
 
255
 
256
 
257
 
258
  /***************************************************************
259
   * Now we load the description of each partition.
260
   */
261
 
262
  /* Start point for each partition */
263
  vec_float4 r1start = spu_splats(SDM_ERFF4_1_START);
264
  vec_float4 r2start = spu_splats(SDM_ERFF4_2_START);
265
  vec_float4 r3start = spu_splats(SDM_ERFF4_3_START);
266
  vec_float4 r4start = spu_splats(SDM_ERFF4_4_START);
267
  vec_float4 r5start = spu_splats(SDM_ERFF4_5_START);
268
  vec_float4 r6start = spu_splats(SDM_ERFF4_6_START);
269
  vec_float4 r7start = spu_splats(SDM_ERFF4_7_START);
270
 
271
  /* X Offset for each partition */
272
  vec_float4 xoffseta = (vec_float4) {SDM_ERFF4_0_OFF, SDM_ERFF4_1_OFF, SDM_ERFF4_2_OFF, SDM_ERFF4_3_OFF};
273
  vec_float4 xoffsetb = (vec_float4) {SDM_ERFF4_4_OFF, SDM_ERFF4_5_OFF, SDM_ERFF4_6_OFF, SDM_ERFF4_7_OFF};
274
 
275
  /* Truncation Correction for each partition */
276
  vec_uint4 tcorra = (vec_uint4) {SDM_ERFF4_0_TRUNC, SDM_ERFF4_1_TRUNC, SDM_ERFF4_2_TRUNC, SDM_ERFF4_3_TRUNC};
277
  vec_uint4 tcorrb = (vec_uint4) {SDM_ERFF4_4_TRUNC, SDM_ERFF4_5_TRUNC, SDM_ERFF4_6_TRUNC, SDM_ERFF4_7_TRUNC};
278
 
279
  /* The coefficients for each partition */
280
  vec_float4 c00a = (vec_float4) {SDM_ERFF4_0_00, SDM_ERFF4_1_00, SDM_ERFF4_2_00, SDM_ERFF4_3_00};
281
  vec_float4 c01a = (vec_float4) {SDM_ERFF4_0_01, SDM_ERFF4_1_01, SDM_ERFF4_2_01, SDM_ERFF4_3_01};
282
  vec_float4 c02a = (vec_float4) {SDM_ERFF4_0_02, SDM_ERFF4_1_02, SDM_ERFF4_2_02, SDM_ERFF4_3_02};
283
  vec_float4 c03a = (vec_float4) {SDM_ERFF4_0_03, SDM_ERFF4_1_03, SDM_ERFF4_2_03, SDM_ERFF4_3_03};
284
  vec_float4 c04a = (vec_float4) {SDM_ERFF4_0_04, SDM_ERFF4_1_04, SDM_ERFF4_2_04, SDM_ERFF4_3_04};
285
  vec_float4 c05a = (vec_float4) {SDM_ERFF4_0_05, SDM_ERFF4_1_05, SDM_ERFF4_2_05, SDM_ERFF4_3_05};
286
  vec_float4 c06a = (vec_float4) {SDM_ERFF4_0_06, SDM_ERFF4_1_06, SDM_ERFF4_2_06, SDM_ERFF4_3_06};
287
  vec_float4 c07a = (vec_float4) {SDM_ERFF4_0_07, SDM_ERFF4_1_07, SDM_ERFF4_2_07, SDM_ERFF4_3_07};
288
  vec_float4 c08a = (vec_float4) {SDM_ERFF4_0_08, SDM_ERFF4_1_08, SDM_ERFF4_2_08, SDM_ERFF4_3_08};
289
  vec_float4 c09a = (vec_float4) {SDM_ERFF4_0_09, SDM_ERFF4_1_09, SDM_ERFF4_2_09, SDM_ERFF4_3_09};
290
  vec_float4 c10a = (vec_float4) {SDM_ERFF4_0_10, SDM_ERFF4_1_10, SDM_ERFF4_2_10, SDM_ERFF4_3_10};
291
 
292
  vec_float4 c00b = (vec_float4) {SDM_ERFF4_4_00, SDM_ERFF4_5_00, SDM_ERFF4_6_00, SDM_ERFF4_7_00};
293
  vec_float4 c01b = (vec_float4) {SDM_ERFF4_4_01, SDM_ERFF4_5_01, SDM_ERFF4_6_01, SDM_ERFF4_7_01};
294
  vec_float4 c02b = (vec_float4) {SDM_ERFF4_4_02, SDM_ERFF4_5_02, SDM_ERFF4_6_02, SDM_ERFF4_7_02};
295
  vec_float4 c03b = (vec_float4) {SDM_ERFF4_4_03, SDM_ERFF4_5_03, SDM_ERFF4_6_03, SDM_ERFF4_7_03};
296
  vec_float4 c04b = (vec_float4) {SDM_ERFF4_4_04, SDM_ERFF4_5_04, SDM_ERFF4_6_04, SDM_ERFF4_7_04};
297
  vec_float4 c05b = (vec_float4) {SDM_ERFF4_4_05, SDM_ERFF4_5_05, SDM_ERFF4_6_05, SDM_ERFF4_7_05};
298
  vec_float4 c06b = (vec_float4) {SDM_ERFF4_4_06, SDM_ERFF4_5_06, SDM_ERFF4_6_06, SDM_ERFF4_7_06};
299
  vec_float4 c07b = (vec_float4) {SDM_ERFF4_4_07, SDM_ERFF4_5_07, SDM_ERFF4_6_07, SDM_ERFF4_7_07};
300
  vec_float4 c08b = (vec_float4) {SDM_ERFF4_4_08, SDM_ERFF4_5_08, SDM_ERFF4_6_08, SDM_ERFF4_7_08};
301
  vec_float4 c09b = (vec_float4) {SDM_ERFF4_4_09, SDM_ERFF4_5_09, SDM_ERFF4_6_09, SDM_ERFF4_7_09};
302
  vec_float4 c10b = (vec_float4) {SDM_ERFF4_4_10, SDM_ERFF4_5_10, SDM_ERFF4_6_10, SDM_ERFF4_7_10};
303
 
304
 
305
  vec_uchar16 shuffle0 = (vec_uchar16) spu_splats(0x00010203);
306
  vec_uchar16 shuffle1 = (vec_uchar16) spu_splats(0x04050607);
307
  vec_uchar16 shuffle2 = (vec_uchar16) spu_splats(0x08090A0B);
308
  vec_uchar16 shuffle3 = (vec_uchar16) spu_splats(0x0C0D0E0F);
309
  vec_uchar16 shuffle4 = (vec_uchar16) spu_splats(0x10111213);
310
  vec_uchar16 shuffle5 = (vec_uchar16) spu_splats(0x14151617);
311
  vec_uchar16 shuffle6 = (vec_uchar16) spu_splats(0x18191A1B);
312
  vec_uchar16 shuffle7 = (vec_uchar16) spu_splats(0x1C1D1E1F);
313
 
314
 
315
  /*
316
   * Determine the shuffle pattern based on which partition
317
   * each element of x is in.
318
   */
319
 
320
  vec_uchar16 gt_r1start = (vec_uchar16)spu_cmpabsgt(x, r1start);
321
  vec_uchar16 gt_r2start = (vec_uchar16)spu_cmpabsgt(x, r2start);
322
  vec_uchar16 gt_r3start = (vec_uchar16)spu_cmpabsgt(x, r3start);
323
  vec_uchar16 gt_r4start = (vec_uchar16)spu_cmpabsgt(x, r4start);
324
  vec_uchar16 gt_r5start = (vec_uchar16)spu_cmpabsgt(x, r5start);
325
  vec_uchar16 gt_r6start = (vec_uchar16)spu_cmpabsgt(x, r6start);
326
  vec_uchar16 gt_r7start = (vec_uchar16)spu_cmpabsgt(x, r7start);
327
 
328
  vec_uchar16 shufflepattern;
329
  shufflepattern = spu_sel(shuffle0, shuffle1, gt_r1start);
330
  shufflepattern = spu_sel(shufflepattern, shuffle2, gt_r2start);
331
  shufflepattern = spu_sel(shufflepattern, shuffle3, gt_r3start);
332
  shufflepattern = spu_sel(shufflepattern, shuffle4, gt_r4start);
333
  shufflepattern = spu_sel(shufflepattern, shuffle5, gt_r5start);
334
  shufflepattern = spu_sel(shufflepattern, shuffle6, gt_r6start);
335
  shufflepattern = spu_sel(shufflepattern, shuffle7, gt_r7start);
336
 
337
 
338
 
339
  /* Use the shuffle pattern to select the coefficients */
340
 
341
  vec_float4 coeff_10 = spu_shuffle(c10a, c10b, shufflepattern);
342
  vec_float4 coeff_09 = spu_shuffle(c09a, c09b, shufflepattern);
343
  vec_float4 coeff_08 = spu_shuffle(c08a, c08b, shufflepattern);
344
  vec_float4 coeff_07 = spu_shuffle(c07a, c07b, shufflepattern);
345
  vec_float4 coeff_06 = spu_shuffle(c06a, c06b, shufflepattern);
346
  vec_float4 coeff_05 = spu_shuffle(c05a, c05b, shufflepattern);
347
  vec_float4 coeff_04 = spu_shuffle(c04a, c04b, shufflepattern);
348
  vec_float4 coeff_03 = spu_shuffle(c03a, c03b, shufflepattern);
349
  vec_float4 coeff_02 = spu_shuffle(c02a, c02b, shufflepattern);
350
  vec_float4 coeff_01 = spu_shuffle(c01a, c01b, shufflepattern);
351
  vec_float4 coeff_00 = spu_shuffle(c00a, c00b, shufflepattern);
352
 
353
  vec_float4 xoffset     = spu_shuffle(xoffseta, xoffsetb, shufflepattern);
354
  vec_uint4  tcorrection = spu_shuffle(tcorra,   tcorrb,   shufflepattern);
355
 
356
 
357
  /*
358
   * We've completed the coeff. setup. Now we actually do the
359
   * approximation below.
360
   */
361
 
362
  /* Adjust x value here (for approximations about a point) */
363
  vec_float4 xappr = spu_sub(xabs, xoffset);
364
 
365
 
366
  /* Now we do the multiplies.
367
   * Use Horner's method.
368
   */
369
  result = coeff_10;
370
  result = spu_madd(xappr, result, coeff_09);
371
  result = spu_madd(xappr, result, coeff_08);
372
  result = spu_madd(xappr, result, coeff_07);
373
  result = spu_madd(xappr, result, coeff_06);
374
  result = spu_madd(xappr, result, coeff_05);
375
  result = spu_madd(xappr, result, coeff_04);
376
  result = spu_madd(xappr, result, coeff_03);
377
  result = spu_madd(xappr, result, coeff_02);
378
  result = spu_madd(xappr, result, coeff_01);
379
  result = spu_madd(xappr, result, coeff_00);
380
 
381
 
382
  /* Adjust due to systematic truncation. Note that the correction
383
   * value is always non-negative, so the result is cast as uint
384
   * to do the adjustment.
385
   */
386
  result = (vec_float4)spu_add((vec_uint4)result, tcorrection);
387
 
388
 
389
  /*
390
   * Special Cases
391
   */
392
 
393
  /* Erf(0) = 0 */
394
  result = spu_sel(result, zerof, spu_cmpeq(xabs, zerof));
395
 
396
  /* Erf(infinity) = 1 */
397
  result = spu_sel(result, onef, spu_cmpgt(xabs, clamp));
398
 
399
 
400
  /* Preserve sign in result, since erf(-x) = -erf(x) */
401
  result = spu_or(result, xsign);
402
 
403
  return result;
404
}
405
 
406
#endif /* _ERFF4_H_ */
407
#endif /* __SPU__ */

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.