1 |
148 |
jeremybenn |
/* -------------------------------------------------------------- */
|
2 |
|
|
/* (C)Copyright 2007,2008, */
|
3 |
|
|
/* International Business Machines Corporation */
|
4 |
|
|
/* All Rights Reserved. */
|
5 |
|
|
/* */
|
6 |
|
|
/* Redistribution and use in source and binary forms, with or */
|
7 |
|
|
/* without modification, are permitted provided that the */
|
8 |
|
|
/* following conditions are met: */
|
9 |
|
|
/* */
|
10 |
|
|
/* - Redistributions of source code must retain the above copyright*/
|
11 |
|
|
/* notice, this list of conditions and the following disclaimer. */
|
12 |
|
|
/* */
|
13 |
|
|
/* - Redistributions in binary form must reproduce the above */
|
14 |
|
|
/* copyright notice, this list of conditions and the following */
|
15 |
|
|
/* disclaimer in the documentation and/or other materials */
|
16 |
|
|
/* provided with the distribution. */
|
17 |
|
|
/* */
|
18 |
|
|
/* - Neither the name of IBM Corporation nor the names of its */
|
19 |
|
|
/* contributors may be used to endorse or promote products */
|
20 |
|
|
/* derived from this software without specific prior written */
|
21 |
|
|
/* permission. */
|
22 |
|
|
/* */
|
23 |
|
|
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
|
24 |
|
|
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
|
25 |
|
|
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
|
26 |
|
|
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
|
27 |
|
|
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */
|
28 |
|
|
/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
|
29 |
|
|
/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */
|
30 |
|
|
/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */
|
31 |
|
|
/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
|
32 |
|
|
/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
|
33 |
|
|
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */
|
34 |
|
|
/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */
|
35 |
|
|
/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
|
36 |
|
|
/* -------------------------------------------------------------- */
|
37 |
|
|
/* PROLOG END TAG zYx */
|
38 |
|
|
#ifdef __SPU__
|
39 |
|
|
#ifndef _ERF_UTILS_H_
|
40 |
|
|
#define _ERF_UTILS_H_ 1
|
41 |
|
|
|
42 |
|
|
#include <spu_intrinsics.h>
|
43 |
|
|
|
44 |
|
|
|
45 |
|
|
/*
|
46 |
|
|
* This file contains approximation methods for the erf and erfc functions.
|
47 |
|
|
*/
|
48 |
|
|
|
49 |
|
|
|
50 |
|
|
#define SQRT_PI 1.7724538509055160272981674833411451827975494561223871282138077898529113E0
|
51 |
|
|
#define INV_SQRT_PI 5.6418958354775628694807945156077258584405062932899885684408572171064247E-1
|
52 |
|
|
#define TWO_OVER_SQRT_PI 1.1283791670955125738961589031215451716881012586579977136881714434212849E0
|
53 |
|
|
|
54 |
|
|
/*
|
55 |
|
|
* Coefficients of Taylor Series Expansion of Error Function
|
56 |
|
|
*/
|
57 |
|
|
#define TAYLOR_ERF_00 1.0000000000000000000000000000000000000000000000000000000000000000000000E0
|
58 |
|
|
#define TAYLOR_ERF_01 -3.3333333333333333333333333333333333333333333333333333333333333333333333E-1
|
59 |
|
|
#define TAYLOR_ERF_02 1.0000000000000000000000000000000000000000000000000000000000000000000000E-1
|
60 |
|
|
#define TAYLOR_ERF_03 -2.3809523809523809523809523809523809523809523809523809523809523809523810E-2
|
61 |
|
|
#define TAYLOR_ERF_04 4.6296296296296296296296296296296296296296296296296296296296296296296296E-3
|
62 |
|
|
#define TAYLOR_ERF_05 -7.5757575757575757575757575757575757575757575757575757575757575757575758E-4
|
63 |
|
|
#define TAYLOR_ERF_06 1.0683760683760683760683760683760683760683760683760683760683760683760684E-4
|
64 |
|
|
#define TAYLOR_ERF_07 -1.3227513227513227513227513227513227513227513227513227513227513227513228E-5
|
65 |
|
|
#define TAYLOR_ERF_08 1.4589169000933706816059757236227824463118580765639589169000933706816060E-6
|
66 |
|
|
#define TAYLOR_ERF_09 -1.4503852223150468764503852223150468764503852223150468764503852223150469E-7
|
67 |
|
|
#define TAYLOR_ERF_10 1.3122532963802805072646342487612328882170152011421852691693961535231377E-8
|
68 |
|
|
#define TAYLOR_ERF_11 -1.0892221037148573380457438428452921206544394950192051641327003645844226E-9
|
69 |
|
|
#define TAYLOR_ERF_12 8.3507027951472395916840361284805729250173694618139062583507027951472396E-11
|
70 |
|
|
#define TAYLOR_ERF_13 -5.9477940136376350368119915445018325676761890753660300985403866062302276E-12
|
71 |
|
|
#define TAYLOR_ERF_14 3.9554295164585257633971372340283122987009139171153402133150354277885750E-13
|
72 |
|
|
#define TAYLOR_ERF_15 -2.4668270102644569277100425760606678852113226579859111007771188689434124E-14
|
73 |
|
|
#define TAYLOR_ERF_16 1.4483264643598137264964265124598618265445265605599099265926266086599580E-15
|
74 |
|
|
#define TAYLOR_ERF_17 -8.0327350124157736091398445228866286178099792434415172399254921152569101E-17
|
75 |
|
|
#define TAYLOR_ERF_18 4.2214072888070882330314498243398198441944335363431396906515348954052831E-18
|
76 |
|
|
#define TAYLOR_ERF_19 -2.1078551914421358248605080094544309613386510235451574703658136454790212E-19
|
77 |
|
|
#define TAYLOR_ERF_20 1.0025164934907719167019489313258878962464315843690383090764235630936808E-20
|
78 |
|
|
#define TAYLOR_ERF_21 -4.5518467589282002862436219473268442686715055325725991884976042178118399E-22
|
79 |
|
|
#define TAYLOR_ERF_22 1.9770647538779051748330883205561040762916640191981996475292624380394860E-23
|
80 |
|
|
#define TAYLOR_ERF_23 -8.2301492992142213568444934713251326025092396728879726307878639881384709E-25
|
81 |
|
|
#define TAYLOR_ERF_24 3.2892603491757517327524761322472893904586246991984244357740612877764297E-26
|
82 |
|
|
#define TAYLOR_ERF_25 -1.2641078988989163521950692586675857265291969432213552733563059066748632E-27
|
83 |
|
|
#define TAYLOR_ERF_26 4.6784835155184857737263085770716162592880293254201102279514950101899871E-29
|
84 |
|
|
#define TAYLOR_ERF_27 -1.6697617934173720269864939702679842541566703989714871520634965356233624E-30
|
85 |
|
|
#define TAYLOR_ERF_28 5.7541916439821717721965644338808981189609568886862025916975131240153466E-32
|
86 |
|
|
#define TAYLOR_ERF_29 -1.9169428621097825307726719621929350834644917747230482041306735714136456E-33
|
87 |
|
|
#define TAYLOR_ERF_30 6.1803075882227961374638057797477142035193997108557291827163792739565622E-35
|
88 |
|
|
#define TAYLOR_ERF_31 -1.9303572088151078565555153741147494440075954038003045578376811864380455E-36
|
89 |
|
|
#define TAYLOR_ERF_32 5.8467550074688362962979552196744814890614668480489993819122074396921572E-38
|
90 |
|
|
#define TAYLOR_ERF_33 -1.7188560628017836239681912676564509126594090688520350964463748691994130E-39
|
91 |
|
|
#define TAYLOR_ERF_34 4.9089239645234229670020807729318930583197104694410209489303971115243253E-41
|
92 |
|
|
#define TAYLOR_ERF_35 -1.3630412617791395763506783635102640685072837923196396196225247512884444E-42
|
93 |
|
|
#define TAYLOR_ERF_36 3.6824935154611457351939940566677606112639706717920248475342183158858278E-44
|
94 |
|
|
#define TAYLOR_ERF_37 -9.6872802388707617538436600409638387251268417672366779772972229571050606E-46
|
95 |
|
|
#define TAYLOR_ERF_38 2.4830690974549115910398991902675594818336060579041382375163763560590552E-47
|
96 |
|
|
#define TAYLOR_ERF_39 -6.2056579196373967059419746072899084745598074150801247740591035188752759E-49
|
97 |
|
|
#define TAYLOR_ERF_40 1.5131079495412170980537530678268603996611876104670674603415715370097123E-50
|
98 |
|
|
#define TAYLOR_ERF_41 -3.6015793098101259166133998969725445892611283117200253978156713046660799E-52
|
99 |
|
|
#define TAYLOR_ERF_42 8.3734196838722815428266720293759440030440798283686864991232694198118944E-54
|
100 |
|
|
#define TAYLOR_ERF_43 -1.9025412272898795272394202686366085010926137006451172211319911806576077E-55
|
101 |
|
|
#define TAYLOR_ERF_44 4.2267897541935525758383443148974703675959497435169866761614717241371774E-57
|
102 |
|
|
#define TAYLOR_ERF_45 -9.1864295023986856959612367283485924961181813717463202485560679718732304E-59
|
103 |
|
|
|
104 |
|
|
/*
|
105 |
|
|
* Taylor Series Expansion of Erf
|
106 |
|
|
*
|
107 |
|
|
* infinite
|
108 |
|
|
* ---------
|
109 |
|
|
* - n 2n
|
110 |
|
|
* 2 * x - -1 * x
|
111 |
|
|
* erf(x) = ---- * - ------------
|
112 |
|
|
* sqrt(pi) - (2n + 1) * n!
|
113 |
|
|
* -
|
114 |
|
|
* ---------
|
115 |
|
|
* n = 0
|
116 |
|
|
*
|
117 |
|
|
* 45 terms give us accurate results for 0 <= x < 2.5
|
118 |
|
|
*/
|
119 |
|
|
#define TAYLOR_ERF(_xabs, _xsqu, _tresult) { \
|
120 |
|
|
_tresult = spu_madd(_xsqu, spu_splats(TAYLOR_ERF_45), spu_splats(TAYLOR_ERF_44)); \
|
121 |
|
|
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_43)); \
|
122 |
|
|
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_42)); \
|
123 |
|
|
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_41)); \
|
124 |
|
|
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_40)); \
|
125 |
|
|
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_39)); \
|
126 |
|
|
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_38)); \
|
127 |
|
|
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_37)); \
|
128 |
|
|
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_36)); \
|
129 |
|
|
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_35)); \
|
130 |
|
|
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_34)); \
|
131 |
|
|
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_33)); \
|
132 |
|
|
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_32)); \
|
133 |
|
|
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_31)); \
|
134 |
|
|
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_30)); \
|
135 |
|
|
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_29)); \
|
136 |
|
|
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_28)); \
|
137 |
|
|
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_27)); \
|
138 |
|
|
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_26)); \
|
139 |
|
|
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_25)); \
|
140 |
|
|
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_24)); \
|
141 |
|
|
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_23)); \
|
142 |
|
|
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_22)); \
|
143 |
|
|
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_21)); \
|
144 |
|
|
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_20)); \
|
145 |
|
|
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_19)); \
|
146 |
|
|
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_18)); \
|
147 |
|
|
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_17)); \
|
148 |
|
|
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_16)); \
|
149 |
|
|
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_15)); \
|
150 |
|
|
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_14)); \
|
151 |
|
|
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_13)); \
|
152 |
|
|
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_12)); \
|
153 |
|
|
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_11)); \
|
154 |
|
|
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_10)); \
|
155 |
|
|
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_09)); \
|
156 |
|
|
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_08)); \
|
157 |
|
|
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_07)); \
|
158 |
|
|
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_06)); \
|
159 |
|
|
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_05)); \
|
160 |
|
|
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_04)); \
|
161 |
|
|
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_03)); \
|
162 |
|
|
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_02)); \
|
163 |
|
|
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_01)); \
|
164 |
|
|
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_00)); \
|
165 |
|
|
_tresult = spu_mul(_tresult, _xabs); \
|
166 |
|
|
_tresult = spu_mul(_tresult, spu_splats(TWO_OVER_SQRT_PI)); \
|
167 |
|
|
}
|
168 |
|
|
|
169 |
|
|
|
170 |
|
|
/*
|
171 |
|
|
* Continued Fractions Approximation of Erfc()
|
172 |
|
|
* ( )
|
173 |
|
|
* 1 ( 1 v 2v 3v )
|
174 |
|
|
* erfc(x) = ------------------------- * ( --- --- --- --- ... )
|
175 |
|
|
* sqrt(pi) * x * exp(x^2) ( 1+ 1+ 1+ 1+ )
|
176 |
|
|
* ( )
|
177 |
|
|
* Continued Fractions
|
178 |
|
|
* 1
|
179 |
|
|
* v = -----
|
180 |
|
|
* 2*x^2
|
181 |
|
|
*
|
182 |
|
|
* We are using a backward recurrence calculation to estimate the continued fraction.
|
183 |
|
|
*
|
184 |
|
|
* p = a p + b q
|
185 |
|
|
* m,n m m+1,n m m+1,n
|
186 |
|
|
*
|
187 |
|
|
* q = p
|
188 |
|
|
* m,n m+1,n
|
189 |
|
|
*
|
190 |
|
|
* With,
|
191 |
|
|
*
|
192 |
|
|
* p = a ; q = 1
|
193 |
|
|
* n,n n n,n
|
194 |
|
|
*
|
195 |
|
|
*
|
196 |
|
|
* a = 0, b = 1,
|
197 |
|
|
* 0 0
|
198 |
|
|
*
|
199 |
|
|
* a = 1, b = n/2x^2
|
200 |
|
|
* n n
|
201 |
|
|
*
|
202 |
|
|
*
|
203 |
|
|
* F = p / q
|
204 |
|
|
* 0,n 0,n 0,n
|
205 |
|
|
*
|
206 |
|
|
* Ref: "Computing the Incomplete Gamma Function to Arbitrary Precision",
|
207 |
|
|
* by Serge Winitzki, Department of Physics, Ludwig-Maximilians University, Munich, Germany.
|
208 |
|
|
*
|
209 |
|
|
*/
|
210 |
|
|
|
211 |
|
|
#define CONTFRAC_ERFCF4(_xabs, _xsqu, _presult) { \
|
212 |
|
|
vec_float4 v; \
|
213 |
|
|
vec_float4 p, q, plast, qlast; \
|
214 |
|
|
vec_float4 factor; \
|
215 |
|
|
vec_float4 inv_xsqu; \
|
216 |
|
|
inv_xsqu = _recipf4(_xsqu); \
|
217 |
|
|
v = spu_mul(inv_xsqu, onehalff); \
|
218 |
|
|
p = spu_splats(1.945f); q = onef; plast = p; qlast = q; \
|
219 |
|
|
p = spu_madd(qlast, spu_mul(v, spu_splats( 4.0f)), plast); q = plast; plast = p; qlast = q; \
|
220 |
|
|
p = spu_madd(qlast, spu_mul(v, spu_splats( 3.0f)), plast); q = plast; plast = p; qlast = q; \
|
221 |
|
|
p = spu_madd(qlast, spu_mul(v, spu_splats( 2.0f)), plast); q = plast; plast = p; qlast = q; \
|
222 |
|
|
p = spu_madd(qlast, spu_mul(v, spu_splats( 1.0f)), plast); q = plast; plast = p; qlast = q; \
|
223 |
|
|
p = qlast; q = plast; \
|
224 |
|
|
factor = spu_mul(spu_splats((float)SQRT_PI), spu_mul(_xabs, _expf4(_xsqu))); \
|
225 |
|
|
_presult = _divf4(p, spu_mul(factor, q)); \
|
226 |
|
|
}
|
227 |
|
|
|
228 |
|
|
#define CONTFRAC_ERFC(_xabs, _xsqu, _presult) { \
|
229 |
|
|
vec_double2 v; \
|
230 |
|
|
vec_double2 p, q, plast, qlast; \
|
231 |
|
|
vec_double2 factor; \
|
232 |
|
|
vec_double2 inv_xsqu; \
|
233 |
|
|
inv_xsqu = _recipd2(_xsqu); \
|
234 |
|
|
v = spu_mul(inv_xsqu, onehalfd); \
|
235 |
|
|
p = spu_splats(3.025); q = oned; plast = p; qlast = q; \
|
236 |
|
|
p = spu_madd(qlast, spu_mul(v, spu_splats(40.0)), plast); q = plast; plast = p; qlast = q; \
|
237 |
|
|
p = spu_madd(qlast, spu_mul(v, spu_splats(39.0)), plast); q = plast; plast = p; qlast = q; \
|
238 |
|
|
p = spu_madd(qlast, spu_mul(v, spu_splats(38.0)), plast); q = plast; plast = p; qlast = q; \
|
239 |
|
|
p = spu_madd(qlast, spu_mul(v, spu_splats(37.0)), plast); q = plast; plast = p; qlast = q; \
|
240 |
|
|
p = spu_madd(qlast, spu_mul(v, spu_splats(36.0)), plast); q = plast; plast = p; qlast = q; \
|
241 |
|
|
p = spu_madd(qlast, spu_mul(v, spu_splats(35.0)), plast); q = plast; plast = p; qlast = q; \
|
242 |
|
|
p = spu_madd(qlast, spu_mul(v, spu_splats(34.0)), plast); q = plast; plast = p; qlast = q; \
|
243 |
|
|
p = spu_madd(qlast, spu_mul(v, spu_splats(33.0)), plast); q = plast; plast = p; qlast = q; \
|
244 |
|
|
p = spu_madd(qlast, spu_mul(v, spu_splats(32.0)), plast); q = plast; plast = p; qlast = q; \
|
245 |
|
|
p = spu_madd(qlast, spu_mul(v, spu_splats(31.0)), plast); q = plast; plast = p; qlast = q; \
|
246 |
|
|
p = spu_madd(qlast, spu_mul(v, spu_splats(30.0)), plast); q = plast; plast = p; qlast = q; \
|
247 |
|
|
p = spu_madd(qlast, spu_mul(v, spu_splats(29.0)), plast); q = plast; plast = p; qlast = q; \
|
248 |
|
|
p = spu_madd(qlast, spu_mul(v, spu_splats(28.0)), plast); q = plast; plast = p; qlast = q; \
|
249 |
|
|
p = spu_madd(qlast, spu_mul(v, spu_splats(27.0)), plast); q = plast; plast = p; qlast = q; \
|
250 |
|
|
p = spu_madd(qlast, spu_mul(v, spu_splats(26.0)), plast); q = plast; plast = p; qlast = q; \
|
251 |
|
|
p = spu_madd(qlast, spu_mul(v, spu_splats(25.0)), plast); q = plast; plast = p; qlast = q; \
|
252 |
|
|
p = spu_madd(qlast, spu_mul(v, spu_splats(24.0)), plast); q = plast; plast = p; qlast = q; \
|
253 |
|
|
p = spu_madd(qlast, spu_mul(v, spu_splats(23.0)), plast); q = plast; plast = p; qlast = q; \
|
254 |
|
|
p = spu_madd(qlast, spu_mul(v, spu_splats(22.0)), plast); q = plast; plast = p; qlast = q; \
|
255 |
|
|
p = spu_madd(qlast, spu_mul(v, spu_splats(21.0)), plast); q = plast; plast = p; qlast = q; \
|
256 |
|
|
p = spu_madd(qlast, spu_mul(v, spu_splats(20.0)), plast); q = plast; plast = p; qlast = q; \
|
257 |
|
|
p = spu_madd(qlast, spu_mul(v, spu_splats(19.0)), plast); q = plast; plast = p; qlast = q; \
|
258 |
|
|
p = spu_madd(qlast, spu_mul(v, spu_splats(18.0)), plast); q = plast; plast = p; qlast = q; \
|
259 |
|
|
p = spu_madd(qlast, spu_mul(v, spu_splats(17.0)), plast); q = plast; plast = p; qlast = q; \
|
260 |
|
|
p = spu_madd(qlast, spu_mul(v, spu_splats(16.0)), plast); q = plast; plast = p; qlast = q; \
|
261 |
|
|
p = spu_madd(qlast, spu_mul(v, spu_splats(15.0)), plast); q = plast; plast = p; qlast = q; \
|
262 |
|
|
p = spu_madd(qlast, spu_mul(v, spu_splats(14.0)), plast); q = plast; plast = p; qlast = q; \
|
263 |
|
|
p = spu_madd(qlast, spu_mul(v, spu_splats(13.0)), plast); q = plast; plast = p; qlast = q; \
|
264 |
|
|
p = spu_madd(qlast, spu_mul(v, spu_splats(12.0)), plast); q = plast; plast = p; qlast = q; \
|
265 |
|
|
p = spu_madd(qlast, spu_mul(v, spu_splats(11.0)), plast); q = plast; plast = p; qlast = q; \
|
266 |
|
|
p = spu_madd(qlast, spu_mul(v, spu_splats(10.0)), plast); q = plast; plast = p; qlast = q; \
|
267 |
|
|
p = spu_madd(qlast, spu_mul(v, spu_splats( 9.0)), plast); q = plast; plast = p; qlast = q; \
|
268 |
|
|
p = spu_madd(qlast, spu_mul(v, spu_splats( 8.0)), plast); q = plast; plast = p; qlast = q; \
|
269 |
|
|
p = spu_madd(qlast, spu_mul(v, spu_splats( 7.0)), plast); q = plast; plast = p; qlast = q; \
|
270 |
|
|
p = spu_madd(qlast, spu_mul(v, spu_splats( 6.0)), plast); q = plast; plast = p; qlast = q; \
|
271 |
|
|
p = spu_madd(qlast, spu_mul(v, spu_splats( 5.0)), plast); q = plast; plast = p; qlast = q; \
|
272 |
|
|
p = spu_madd(qlast, spu_mul(v, spu_splats( 4.0)), plast); q = plast; plast = p; qlast = q; \
|
273 |
|
|
p = spu_madd(qlast, spu_mul(v, spu_splats( 3.0)), plast); q = plast; plast = p; qlast = q; \
|
274 |
|
|
p = spu_madd(qlast, spu_mul(v, spu_splats( 2.0)), plast); q = plast; plast = p; qlast = q; \
|
275 |
|
|
p = spu_madd(qlast, spu_mul(v, spu_splats( 1.0)), plast); q = plast; plast = p; qlast = q; \
|
276 |
|
|
p = qlast; q = plast; \
|
277 |
|
|
factor = spu_mul(spu_splats(SQRT_PI), spu_mul(_xabs, _expd2(_xsqu))); \
|
278 |
|
|
_presult = _divd2(p, spu_mul(factor, q)); \
|
279 |
|
|
}
|
280 |
|
|
|
281 |
|
|
#endif /* _ERF_UTILS_H_ */
|
282 |
|
|
#endif /* __SPU__ */
|