OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-old/] [newlib-1.17.0/] [newlib/] [libm/] [machine/] [spu/] [headers/] [erf_utils.h] - Blame information for rev 825

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 148 jeremybenn
/* --------------------------------------------------------------  */
2
/* (C)Copyright 2007,2008,                                         */
3
/* International Business Machines Corporation                     */
4
/* All Rights Reserved.                                            */
5
/*                                                                 */
6
/* Redistribution and use in source and binary forms, with or      */
7
/* without modification, are permitted provided that the           */
8
/* following conditions are met:                                   */
9
/*                                                                 */
10
/* - Redistributions of source code must retain the above copyright*/
11
/*   notice, this list of conditions and the following disclaimer. */
12
/*                                                                 */
13
/* - Redistributions in binary form must reproduce the above       */
14
/*   copyright notice, this list of conditions and the following   */
15
/*   disclaimer in the documentation and/or other materials        */
16
/*   provided with the distribution.                               */
17
/*                                                                 */
18
/* - Neither the name of IBM Corporation nor the names of its      */
19
/*   contributors may be used to endorse or promote products       */
20
/*   derived from this software without specific prior written     */
21
/*   permission.                                                   */
22
/*                                                                 */
23
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND          */
24
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,     */
25
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF        */
26
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE        */
27
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR            */
28
/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,    */
29
/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT    */
30
/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;    */
31
/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)        */
32
/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN       */
33
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR    */
34
/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,  */
35
/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.              */
36
/* --------------------------------------------------------------  */
37
/* PROLOG END TAG zYx                                              */
38
#ifdef __SPU__
39
#ifndef _ERF_UTILS_H_
40
#define _ERF_UTILS_H_   1
41
 
42
#include <spu_intrinsics.h>
43
 
44
 
45
/*
46
 * This file contains approximation methods for the erf and erfc functions.
47
 */
48
 
49
 
50
#define SQRT_PI          1.7724538509055160272981674833411451827975494561223871282138077898529113E0
51
#define INV_SQRT_PI      5.6418958354775628694807945156077258584405062932899885684408572171064247E-1
52
#define TWO_OVER_SQRT_PI 1.1283791670955125738961589031215451716881012586579977136881714434212849E0
53
 
54
/*
55
 * Coefficients of Taylor Series Expansion of Error Function
56
 */
57
#define TAYLOR_ERF_00  1.0000000000000000000000000000000000000000000000000000000000000000000000E0
58
#define TAYLOR_ERF_01 -3.3333333333333333333333333333333333333333333333333333333333333333333333E-1
59
#define TAYLOR_ERF_02  1.0000000000000000000000000000000000000000000000000000000000000000000000E-1
60
#define TAYLOR_ERF_03 -2.3809523809523809523809523809523809523809523809523809523809523809523810E-2
61
#define TAYLOR_ERF_04  4.6296296296296296296296296296296296296296296296296296296296296296296296E-3
62
#define TAYLOR_ERF_05 -7.5757575757575757575757575757575757575757575757575757575757575757575758E-4
63
#define TAYLOR_ERF_06  1.0683760683760683760683760683760683760683760683760683760683760683760684E-4
64
#define TAYLOR_ERF_07 -1.3227513227513227513227513227513227513227513227513227513227513227513228E-5
65
#define TAYLOR_ERF_08  1.4589169000933706816059757236227824463118580765639589169000933706816060E-6
66
#define TAYLOR_ERF_09 -1.4503852223150468764503852223150468764503852223150468764503852223150469E-7
67
#define TAYLOR_ERF_10  1.3122532963802805072646342487612328882170152011421852691693961535231377E-8
68
#define TAYLOR_ERF_11 -1.0892221037148573380457438428452921206544394950192051641327003645844226E-9
69
#define TAYLOR_ERF_12  8.3507027951472395916840361284805729250173694618139062583507027951472396E-11
70
#define TAYLOR_ERF_13 -5.9477940136376350368119915445018325676761890753660300985403866062302276E-12
71
#define TAYLOR_ERF_14  3.9554295164585257633971372340283122987009139171153402133150354277885750E-13
72
#define TAYLOR_ERF_15 -2.4668270102644569277100425760606678852113226579859111007771188689434124E-14
73
#define TAYLOR_ERF_16  1.4483264643598137264964265124598618265445265605599099265926266086599580E-15
74
#define TAYLOR_ERF_17 -8.0327350124157736091398445228866286178099792434415172399254921152569101E-17
75
#define TAYLOR_ERF_18  4.2214072888070882330314498243398198441944335363431396906515348954052831E-18
76
#define TAYLOR_ERF_19 -2.1078551914421358248605080094544309613386510235451574703658136454790212E-19
77
#define TAYLOR_ERF_20  1.0025164934907719167019489313258878962464315843690383090764235630936808E-20
78
#define TAYLOR_ERF_21 -4.5518467589282002862436219473268442686715055325725991884976042178118399E-22
79
#define TAYLOR_ERF_22  1.9770647538779051748330883205561040762916640191981996475292624380394860E-23
80
#define TAYLOR_ERF_23 -8.2301492992142213568444934713251326025092396728879726307878639881384709E-25
81
#define TAYLOR_ERF_24  3.2892603491757517327524761322472893904586246991984244357740612877764297E-26
82
#define TAYLOR_ERF_25 -1.2641078988989163521950692586675857265291969432213552733563059066748632E-27
83
#define TAYLOR_ERF_26  4.6784835155184857737263085770716162592880293254201102279514950101899871E-29
84
#define TAYLOR_ERF_27 -1.6697617934173720269864939702679842541566703989714871520634965356233624E-30
85
#define TAYLOR_ERF_28  5.7541916439821717721965644338808981189609568886862025916975131240153466E-32
86
#define TAYLOR_ERF_29 -1.9169428621097825307726719621929350834644917747230482041306735714136456E-33
87
#define TAYLOR_ERF_30  6.1803075882227961374638057797477142035193997108557291827163792739565622E-35
88
#define TAYLOR_ERF_31 -1.9303572088151078565555153741147494440075954038003045578376811864380455E-36
89
#define TAYLOR_ERF_32  5.8467550074688362962979552196744814890614668480489993819122074396921572E-38
90
#define TAYLOR_ERF_33 -1.7188560628017836239681912676564509126594090688520350964463748691994130E-39
91
#define TAYLOR_ERF_34  4.9089239645234229670020807729318930583197104694410209489303971115243253E-41
92
#define TAYLOR_ERF_35 -1.3630412617791395763506783635102640685072837923196396196225247512884444E-42
93
#define TAYLOR_ERF_36  3.6824935154611457351939940566677606112639706717920248475342183158858278E-44
94
#define TAYLOR_ERF_37 -9.6872802388707617538436600409638387251268417672366779772972229571050606E-46
95
#define TAYLOR_ERF_38  2.4830690974549115910398991902675594818336060579041382375163763560590552E-47
96
#define TAYLOR_ERF_39 -6.2056579196373967059419746072899084745598074150801247740591035188752759E-49
97
#define TAYLOR_ERF_40  1.5131079495412170980537530678268603996611876104670674603415715370097123E-50
98
#define TAYLOR_ERF_41 -3.6015793098101259166133998969725445892611283117200253978156713046660799E-52
99
#define TAYLOR_ERF_42  8.3734196838722815428266720293759440030440798283686864991232694198118944E-54
100
#define TAYLOR_ERF_43 -1.9025412272898795272394202686366085010926137006451172211319911806576077E-55
101
#define TAYLOR_ERF_44  4.2267897541935525758383443148974703675959497435169866761614717241371774E-57
102
#define TAYLOR_ERF_45 -9.1864295023986856959612367283485924961181813717463202485560679718732304E-59
103
 
104
  /*
105
   * Taylor Series Expansion of Erf
106
   *
107
   *                       infinite
108
   *                      ---------
109
   *                       -            n     2n
110
   *            2 * x        -        -1  *  x
111
   * erf(x) =    ----    *    -      ------------
112
   *            sqrt(pi)     -        (2n + 1) * n!
113
   *                       -
114
   *                      ---------
115
   *                       n = 0
116
   *
117
   * 45 terms give us accurate results for 0 <= x < 2.5
118
   */
119
#define TAYLOR_ERF(_xabs, _xsqu, _tresult)  {                                          \
120
  _tresult = spu_madd(_xsqu, spu_splats(TAYLOR_ERF_45), spu_splats(TAYLOR_ERF_44));    \
121
  _tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_43));                     \
122
  _tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_42));                     \
123
  _tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_41));                     \
124
  _tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_40));                     \
125
  _tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_39));                     \
126
  _tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_38));                     \
127
  _tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_37));                     \
128
  _tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_36));                     \
129
  _tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_35));                     \
130
  _tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_34));                     \
131
  _tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_33));                     \
132
  _tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_32));                     \
133
  _tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_31));                     \
134
  _tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_30));                     \
135
  _tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_29));                     \
136
  _tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_28));                     \
137
  _tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_27));                     \
138
  _tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_26));                     \
139
  _tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_25));                     \
140
  _tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_24));                     \
141
  _tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_23));                     \
142
  _tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_22));                     \
143
  _tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_21));                     \
144
  _tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_20));                     \
145
  _tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_19));                     \
146
  _tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_18));                     \
147
  _tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_17));                     \
148
  _tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_16));                     \
149
  _tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_15));                     \
150
  _tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_14));                     \
151
  _tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_13));                     \
152
  _tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_12));                     \
153
  _tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_11));                     \
154
  _tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_10));                     \
155
  _tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_09));                     \
156
  _tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_08));                     \
157
  _tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_07));                     \
158
  _tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_06));                     \
159
  _tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_05));                     \
160
  _tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_04));                     \
161
  _tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_03));                     \
162
  _tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_02));                     \
163
  _tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_01));                     \
164
  _tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_00));                     \
165
  _tresult = spu_mul(_tresult, _xabs);                                                 \
166
  _tresult = spu_mul(_tresult, spu_splats(TWO_OVER_SQRT_PI));                          \
167
}
168
 
169
 
170
  /*
171
   * Continued Fractions Approximation of Erfc()
172
   *                                             (                      )
173
   *                          1                 (  1    v   2v   3v      )
174
   *   erfc(x)  =  -------------------------  * ( ---  ---  ---  --- ... )
175
   *               sqrt(pi) * x * exp(x^2)      (  1+   1+  1+   1+      )
176
   *                                             (                      )
177
   *                                                Continued Fractions
178
   *          1
179
   *   v =  -----
180
   *        2*x^2
181
   *
182
   *   We are using a backward recurrence calculation to estimate the continued fraction.
183
   *
184
   *   p    =   a   p        +  b   q
185
   *     m,n     m   m+1,n       m    m+1,n
186
   *
187
   *   q    =   p
188
   *     m,n     m+1,n
189
   *
190
   *   With,
191
   *
192
   *   p    =   a   ;   q    =  1
193
   *    n,n      n        n,n
194
   *
195
   *
196
   *   a  =  0,    b   =  1,
197
   *    0           0
198
   *
199
   *   a  =  1,    b   =  n/2x^2
200
   *    n           n
201
   *
202
   *
203
   *    F     =   p    /    q
204
   *     0,n       0,n       0,n
205
   *
206
   * Ref: "Computing the Incomplete Gamma Function to Arbitrary Precision",
207
   *       by Serge Winitzki, Department of Physics, Ludwig-Maximilians University, Munich, Germany.
208
   *
209
   */
210
 
211
#define CONTFRAC_ERFCF4(_xabs, _xsqu, _presult) {   \
212
  vec_float4 v;                         \
213
  vec_float4 p, q, plast, qlast;        \
214
  vec_float4 factor;                    \
215
  vec_float4 inv_xsqu;                  \
216
  inv_xsqu = _recipf4(_xsqu);            \
217
  v = spu_mul(inv_xsqu, onehalff);       \
218
  p = spu_splats(1.945f); q = onef; plast = p; qlast = q;                                        \
219
  p = spu_madd(qlast, spu_mul(v, spu_splats( 4.0f)), plast); q = plast; plast = p; qlast = q;    \
220
  p = spu_madd(qlast, spu_mul(v, spu_splats( 3.0f)), plast); q = plast; plast = p; qlast = q;    \
221
  p = spu_madd(qlast, spu_mul(v, spu_splats( 2.0f)), plast); q = plast; plast = p; qlast = q;    \
222
  p = spu_madd(qlast, spu_mul(v, spu_splats( 1.0f)), plast); q = plast; plast = p; qlast = q;    \
223
  p = qlast; q = plast;                                                                         \
224
  factor = spu_mul(spu_splats((float)SQRT_PI), spu_mul(_xabs, _expf4(_xsqu)));                         \
225
  _presult = _divf4(p, spu_mul(factor, q));                                                     \
226
}
227
 
228
#define CONTFRAC_ERFC(_xabs, _xsqu, _presult) {   \
229
  vec_double2 v;                         \
230
  vec_double2 p, q, plast, qlast;        \
231
  vec_double2 factor;                    \
232
  vec_double2 inv_xsqu;                  \
233
  inv_xsqu = _recipd2(_xsqu);            \
234
  v = spu_mul(inv_xsqu, onehalfd);       \
235
  p = spu_splats(3.025); q = oned; plast = p; qlast = q;                                        \
236
  p = spu_madd(qlast, spu_mul(v, spu_splats(40.0)), plast); q = plast; plast = p; qlast = q;    \
237
  p = spu_madd(qlast, spu_mul(v, spu_splats(39.0)), plast); q = plast; plast = p; qlast = q;    \
238
  p = spu_madd(qlast, spu_mul(v, spu_splats(38.0)), plast); q = plast; plast = p; qlast = q;    \
239
  p = spu_madd(qlast, spu_mul(v, spu_splats(37.0)), plast); q = plast; plast = p; qlast = q;    \
240
  p = spu_madd(qlast, spu_mul(v, spu_splats(36.0)), plast); q = plast; plast = p; qlast = q;    \
241
  p = spu_madd(qlast, spu_mul(v, spu_splats(35.0)), plast); q = plast; plast = p; qlast = q;    \
242
  p = spu_madd(qlast, spu_mul(v, spu_splats(34.0)), plast); q = plast; plast = p; qlast = q;    \
243
  p = spu_madd(qlast, spu_mul(v, spu_splats(33.0)), plast); q = plast; plast = p; qlast = q;    \
244
  p = spu_madd(qlast, spu_mul(v, spu_splats(32.0)), plast); q = plast; plast = p; qlast = q;    \
245
  p = spu_madd(qlast, spu_mul(v, spu_splats(31.0)), plast); q = plast; plast = p; qlast = q;    \
246
  p = spu_madd(qlast, spu_mul(v, spu_splats(30.0)), plast); q = plast; plast = p; qlast = q;    \
247
  p = spu_madd(qlast, spu_mul(v, spu_splats(29.0)), plast); q = plast; plast = p; qlast = q;    \
248
  p = spu_madd(qlast, spu_mul(v, spu_splats(28.0)), plast); q = plast; plast = p; qlast = q;    \
249
  p = spu_madd(qlast, spu_mul(v, spu_splats(27.0)), plast); q = plast; plast = p; qlast = q;    \
250
  p = spu_madd(qlast, spu_mul(v, spu_splats(26.0)), plast); q = plast; plast = p; qlast = q;    \
251
  p = spu_madd(qlast, spu_mul(v, spu_splats(25.0)), plast); q = plast; plast = p; qlast = q;    \
252
  p = spu_madd(qlast, spu_mul(v, spu_splats(24.0)), plast); q = plast; plast = p; qlast = q;    \
253
  p = spu_madd(qlast, spu_mul(v, spu_splats(23.0)), plast); q = plast; plast = p; qlast = q;    \
254
  p = spu_madd(qlast, spu_mul(v, spu_splats(22.0)), plast); q = plast; plast = p; qlast = q;    \
255
  p = spu_madd(qlast, spu_mul(v, spu_splats(21.0)), plast); q = plast; plast = p; qlast = q;    \
256
  p = spu_madd(qlast, spu_mul(v, spu_splats(20.0)), plast); q = plast; plast = p; qlast = q;    \
257
  p = spu_madd(qlast, spu_mul(v, spu_splats(19.0)), plast); q = plast; plast = p; qlast = q;    \
258
  p = spu_madd(qlast, spu_mul(v, spu_splats(18.0)), plast); q = plast; plast = p; qlast = q;    \
259
  p = spu_madd(qlast, spu_mul(v, spu_splats(17.0)), plast); q = plast; plast = p; qlast = q;    \
260
  p = spu_madd(qlast, spu_mul(v, spu_splats(16.0)), plast); q = plast; plast = p; qlast = q;    \
261
  p = spu_madd(qlast, spu_mul(v, spu_splats(15.0)), plast); q = plast; plast = p; qlast = q;    \
262
  p = spu_madd(qlast, spu_mul(v, spu_splats(14.0)), plast); q = plast; plast = p; qlast = q;    \
263
  p = spu_madd(qlast, spu_mul(v, spu_splats(13.0)), plast); q = plast; plast = p; qlast = q;    \
264
  p = spu_madd(qlast, spu_mul(v, spu_splats(12.0)), plast); q = plast; plast = p; qlast = q;    \
265
  p = spu_madd(qlast, spu_mul(v, spu_splats(11.0)), plast); q = plast; plast = p; qlast = q;    \
266
  p = spu_madd(qlast, spu_mul(v, spu_splats(10.0)), plast); q = plast; plast = p; qlast = q;    \
267
  p = spu_madd(qlast, spu_mul(v, spu_splats( 9.0)), plast); q = plast; plast = p; qlast = q;    \
268
  p = spu_madd(qlast, spu_mul(v, spu_splats( 8.0)), plast); q = plast; plast = p; qlast = q;    \
269
  p = spu_madd(qlast, spu_mul(v, spu_splats( 7.0)), plast); q = plast; plast = p; qlast = q;    \
270
  p = spu_madd(qlast, spu_mul(v, spu_splats( 6.0)), plast); q = plast; plast = p; qlast = q;    \
271
  p = spu_madd(qlast, spu_mul(v, spu_splats( 5.0)), plast); q = plast; plast = p; qlast = q;    \
272
  p = spu_madd(qlast, spu_mul(v, spu_splats( 4.0)), plast); q = plast; plast = p; qlast = q;    \
273
  p = spu_madd(qlast, spu_mul(v, spu_splats( 3.0)), plast); q = plast; plast = p; qlast = q;    \
274
  p = spu_madd(qlast, spu_mul(v, spu_splats( 2.0)), plast); q = plast; plast = p; qlast = q;    \
275
  p = spu_madd(qlast, spu_mul(v, spu_splats( 1.0)), plast); q = plast; plast = p; qlast = q;    \
276
  p = qlast; q = plast;                                                                         \
277
  factor = spu_mul(spu_splats(SQRT_PI), spu_mul(_xabs, _expd2(_xsqu)));                         \
278
  _presult = _divd2(p, spu_mul(factor, q));                                                     \
279
}
280
 
281
#endif /* _ERF_UTILS_H_ */
282
#endif /* __SPU__ */

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.