OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libquadmath/] [math/] [powq.c] - Blame information for rev 740

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 740 jeremybenn
/*
2
 * ====================================================
3
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
4
 *
5
 * Developed at SunPro, a Sun Microsystems, Inc. business.
6
 * Permission to use, copy, modify, and distribute this
7
 * software is freely granted, provided that this notice
8
 * is preserved.
9
 * ====================================================
10
 */
11
 
12
/* Expansions and modifications for 128-bit long double are
13
   Copyright (C) 2001 Stephen L. Moshier <moshier@na-net.ornl.gov>
14
   and are incorporated herein by permission of the author.  The author
15
   reserves the right to distribute this material elsewhere under different
16
   copying permissions.  These modifications are distributed here under
17
   the following terms:
18
 
19
    This library is free software; you can redistribute it and/or
20
    modify it under the terms of the GNU Lesser General Public
21
    License as published by the Free Software Foundation; either
22
    version 2.1 of the License, or (at your option) any later version.
23
 
24
    This library is distributed in the hope that it will be useful,
25
    but WITHOUT ANY WARRANTY; without even the implied warranty of
26
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
27
    Lesser General Public License for more details.
28
 
29
    You should have received a copy of the GNU Lesser General Public
30
    License along with this library; if not, write to the Free Software
31
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA */
32
 
33
/* __ieee754_powl(x,y) return x**y
34
 *
35
 *                    n
36
 * Method:  Let x =  2   * (1+f)
37
 *      1. Compute and return log2(x) in two pieces:
38
 *              log2(x) = w1 + w2,
39
 *         where w1 has 113-53 = 60 bit trailing zeros.
40
 *      2. Perform y*log2(x) = n+y' by simulating muti-precision
41
 *         arithmetic, where |y'|<=0.5.
42
 *      3. Return x**y = 2**n*exp(y'*log2)
43
 *
44
 * Special cases:
45
 *      1.  (anything) ** 0  is 1
46
 *      2.  (anything) ** 1  is itself
47
 *      3.  (anything) ** NAN is NAN
48
 *      4.  NAN ** (anything except 0) is NAN
49
 *      5.  +-(|x| > 1) **  +INF is +INF
50
 *      6.  +-(|x| > 1) **  -INF is +0
51
 *      7.  +-(|x| < 1) **  +INF is +0
52
 *      8.  +-(|x| < 1) **  -INF is +INF
53
 *      9.  +-1         ** +-INF is NAN
54
 *      10. +0 ** (+anything except 0, NAN)               is +0
55
 *      11. -0 ** (+anything except 0, NAN, odd integer)  is +0
56
 *      12. +0 ** (-anything except 0, NAN)               is +INF
57
 *      13. -0 ** (-anything except 0, NAN, odd integer)  is +INF
58
 *      14. -0 ** (odd integer) = -( +0 ** (odd integer) )
59
 *      15. +INF ** (+anything except 0,NAN) is +INF
60
 *      16. +INF ** (-anything except 0,NAN) is +0
61
 *      17. -INF ** (anything)  = -0 ** (-anything)
62
 *      18. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer)
63
 *      19. (-anything except 0 and inf) ** (non-integer) is NAN
64
 *
65
 */
66
 
67
#include "quadmath-imp.h"
68
 
69
static const __float128 bp[] = {
70
  1.0Q,
71
  1.5Q,
72
};
73
 
74
/* log_2(1.5) */
75
static const __float128 dp_h[] = {
76
  0.0,
77
  5.8496250072115607565592654282227158546448E-1Q
78
};
79
 
80
/* Low part of log_2(1.5) */
81
static const __float128 dp_l[] = {
82
  0.0,
83
  1.0579781240112554492329533686862998106046E-16Q
84
};
85
 
86
static const __float128 zero = 0.0Q,
87
  one = 1.0Q,
88
  two = 2.0Q,
89
  two113 = 1.0384593717069655257060992658440192E34Q,
90
  huge = 1.0e3000Q,
91
  tiny = 1.0e-3000Q;
92
 
93
/* 3/2 log x = 3 z + z^3 + z^3 (z^2 R(z^2))
94
   z = (x-1)/(x+1)
95
   1 <= x <= 1.25
96
   Peak relative error 2.3e-37 */
97
static const __float128 LN[] =
98
{
99
 -3.0779177200290054398792536829702930623200E1Q,
100
  6.5135778082209159921251824580292116201640E1Q,
101
 -4.6312921812152436921591152809994014413540E1Q,
102
  1.2510208195629420304615674658258363295208E1Q,
103
 -9.9266909031921425609179910128531667336670E-1Q
104
};
105
static const __float128 LD[] =
106
{
107
 -5.129862866715009066465422805058933131960E1Q,
108
  1.452015077564081884387441590064272782044E2Q,
109
 -1.524043275549860505277434040464085593165E2Q,
110
  7.236063513651544224319663428634139768808E1Q,
111
 -1.494198912340228235853027849917095580053E1Q
112
  /* 1.0E0 */
113
};
114
 
115
/* exp(x) = 1 + x - x / (1 - 2 / (x - x^2 R(x^2)))
116
 
117
   Peak relative error 5.7e-38  */
118
static const __float128 PN[] =
119
{
120
  5.081801691915377692446852383385968225675E8Q,
121
  9.360895299872484512023336636427675327355E6Q,
122
  4.213701282274196030811629773097579432957E4Q,
123
  5.201006511142748908655720086041570288182E1Q,
124
  9.088368420359444263703202925095675982530E-3Q,
125
};
126
static const __float128 PD[] =
127
{
128
  3.049081015149226615468111430031590411682E9Q,
129
  1.069833887183886839966085436512368982758E8Q,
130
  8.259257717868875207333991924545445705394E5Q,
131
  1.872583833284143212651746812884298360922E3Q,
132
  /* 1.0E0 */
133
};
134
 
135
static const __float128
136
  /* ln 2 */
137
  lg2 = 6.9314718055994530941723212145817656807550E-1Q,
138
  lg2_h = 6.9314718055994528622676398299518041312695E-1Q,
139
  lg2_l = 2.3190468138462996154948554638754786504121E-17Q,
140
  ovt = 8.0085662595372944372e-0017Q,
141
  /* 2/(3*log(2)) */
142
  cp = 9.6179669392597560490661645400126142495110E-1Q,
143
  cp_h = 9.6179669392597555432899980587535537779331E-1Q,
144
  cp_l = 5.0577616648125906047157785230014751039424E-17Q;
145
 
146
__float128
147
powq (__float128 x, __float128 y)
148
{
149
  __float128 z, ax, z_h, z_l, p_h, p_l;
150
  __float128 y1, t1, t2, r, s, t, u, v, w;
151
  __float128 s2, s_h, s_l, t_h, t_l;
152
  int32_t i, j, k, yisint, n;
153
  uint32_t ix, iy;
154
  int32_t hx, hy;
155
  ieee854_float128 o, p, q;
156
 
157
  p.value = x;
158
  hx = p.words32.w0;
159
  ix = hx & 0x7fffffff;
160
 
161
  q.value = y;
162
  hy = q.words32.w0;
163
  iy = hy & 0x7fffffff;
164
 
165
 
166
  /* y==zero: x**0 = 1 */
167
  if ((iy | q.words32.w1 | q.words32.w2 | q.words32.w3) == 0)
168
    return one;
169
 
170
  /* 1.0**y = 1; -1.0**+-Inf = 1 */
171
  if (x == one)
172
    return one;
173
  if (x == -1.0Q && iy == 0x7fff0000
174
      && (q.words32.w1 | q.words32.w2 | q.words32.w3) == 0)
175
    return one;
176
 
177
  /* +-NaN return x+y */
178
  if ((ix > 0x7fff0000)
179
      || ((ix == 0x7fff0000)
180
          && ((p.words32.w1 | p.words32.w2 | p.words32.w3) != 0))
181
      || (iy > 0x7fff0000)
182
      || ((iy == 0x7fff0000)
183
          && ((q.words32.w1 | q.words32.w2 | q.words32.w3) != 0)))
184
    return x + y;
185
 
186
  /* determine if y is an odd int when x < 0
187
   * yisint = 0       ... y is not an integer
188
   * yisint = 1       ... y is an odd int
189
   * yisint = 2       ... y is an even int
190
   */
191
  yisint = 0;
192
  if (hx < 0)
193
    {
194
      if (iy >= 0x40700000)     /* 2^113 */
195
        yisint = 2;             /* even integer y */
196
      else if (iy >= 0x3fff0000)        /* 1.0 */
197
        {
198
          if (floorq (y) == y)
199
            {
200
              z = 0.5 * y;
201
              if (floorq (z) == z)
202
                yisint = 2;
203
              else
204
                yisint = 1;
205
            }
206
        }
207
    }
208
 
209
  /* special value of y */
210
  if ((q.words32.w1 | q.words32.w2 | q.words32.w3) == 0)
211
    {
212
      if (iy == 0x7fff0000)     /* y is +-inf */
213
        {
214
          if (((ix - 0x3fff0000) | p.words32.w1 | p.words32.w2 | p.words32.w3)
215
              == 0)
216
            return y - y;       /* +-1**inf is NaN */
217
          else if (ix >= 0x3fff0000)    /* (|x|>1)**+-inf = inf,0 */
218
            return (hy >= 0) ? y : zero;
219
          else                  /* (|x|<1)**-,+inf = inf,0 */
220
            return (hy < 0) ? -y : zero;
221
        }
222
      if (iy == 0x3fff0000)
223
        {                       /* y is  +-1 */
224
          if (hy < 0)
225
            return one / x;
226
          else
227
            return x;
228
        }
229
      if (hy == 0x40000000)
230
        return x * x;           /* y is  2 */
231
      if (hy == 0x3ffe0000)
232
        {                       /* y is  0.5 */
233
          if (hx >= 0)           /* x >= +0 */
234
            return sqrtq (x);
235
        }
236
    }
237
 
238
  ax = fabsq (x);
239
  /* special value of x */
240
  if ((p.words32.w1 | p.words32.w2 | p.words32.w3) == 0)
241
    {
242
      if (ix == 0x7fff0000 || ix == 0 || ix == 0x3fff0000)
243
        {
244
          z = ax;               /*x is +-0,+-inf,+-1 */
245
          if (hy < 0)
246
            z = one / z;        /* z = (1/|x|) */
247
          if (hx < 0)
248
            {
249
              if (((ix - 0x3fff0000) | yisint) == 0)
250
                {
251
                  z = (z - z) / (z - z);        /* (-1)**non-int is NaN */
252
                }
253
              else if (yisint == 1)
254
                z = -z;         /* (x<0)**odd = -(|x|**odd) */
255
            }
256
          return z;
257
        }
258
    }
259
 
260
  /* (x<0)**(non-int) is NaN */
261
  if (((((uint32_t) hx >> 31) - 1) | yisint) == 0)
262
    return (x - x) / (x - x);
263
 
264
  /* |y| is huge.
265
     2^-16495 = 1/2 of smallest representable value.
266
     If (1 - 1/131072)^y underflows, y > 1.4986e9 */
267
  if (iy > 0x401d654b)
268
    {
269
      /* if (1 - 2^-113)^y underflows, y > 1.1873e38 */
270
      if (iy > 0x407d654b)
271
        {
272
          if (ix <= 0x3ffeffff)
273
            return (hy < 0) ? huge * huge : tiny * tiny;
274
          if (ix >= 0x3fff0000)
275
            return (hy > 0) ? huge * huge : tiny * tiny;
276
        }
277
      /* over/underflow if x is not close to one */
278
      if (ix < 0x3ffeffff)
279
        return (hy < 0) ? huge * huge : tiny * tiny;
280
      if (ix > 0x3fff0000)
281
        return (hy > 0) ? huge * huge : tiny * tiny;
282
    }
283
 
284
  n = 0;
285
  /* take care subnormal number */
286
  if (ix < 0x00010000)
287
    {
288
      ax *= two113;
289
      n -= 113;
290
      o.value = ax;
291
      ix = o.words32.w0;
292
    }
293
  n += ((ix) >> 16) - 0x3fff;
294
  j = ix & 0x0000ffff;
295
  /* determine interval */
296
  ix = j | 0x3fff0000;          /* normalize ix */
297
  if (j <= 0x3988)
298
    k = 0;                       /* |x|<sqrt(3/2) */
299
  else if (j < 0xbb67)
300
    k = 1;                      /* |x|<sqrt(3)   */
301
  else
302
    {
303
      k = 0;
304
      n += 1;
305
      ix -= 0x00010000;
306
    }
307
 
308
  o.value = ax;
309
  o.words32.w0 = ix;
310
  ax = o.value;
311
 
312
  /* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
313
  u = ax - bp[k];               /* bp[0]=1.0, bp[1]=1.5 */
314
  v = one / (ax + bp[k]);
315
  s = u * v;
316
  s_h = s;
317
 
318
  o.value = s_h;
319
  o.words32.w3 = 0;
320
  o.words32.w2 &= 0xf8000000;
321
  s_h = o.value;
322
  /* t_h=ax+bp[k] High */
323
  t_h = ax + bp[k];
324
  o.value = t_h;
325
  o.words32.w3 = 0;
326
  o.words32.w2 &= 0xf8000000;
327
  t_h = o.value;
328
  t_l = ax - (t_h - bp[k]);
329
  s_l = v * ((u - s_h * t_h) - s_h * t_l);
330
  /* compute log(ax) */
331
  s2 = s * s;
332
  u = LN[0] + s2 * (LN[1] + s2 * (LN[2] + s2 * (LN[3] + s2 * LN[4])));
333
  v = LD[0] + s2 * (LD[1] + s2 * (LD[2] + s2 * (LD[3] + s2 * (LD[4] + s2))));
334
  r = s2 * s2 * u / v;
335
  r += s_l * (s_h + s);
336
  s2 = s_h * s_h;
337
  t_h = 3.0 + s2 + r;
338
  o.value = t_h;
339
  o.words32.w3 = 0;
340
  o.words32.w2 &= 0xf8000000;
341
  t_h = o.value;
342
  t_l = r - ((t_h - 3.0) - s2);
343
  /* u+v = s*(1+...) */
344
  u = s_h * t_h;
345
  v = s_l * t_h + t_l * s;
346
  /* 2/(3log2)*(s+...) */
347
  p_h = u + v;
348
  o.value = p_h;
349
  o.words32.w3 = 0;
350
  o.words32.w2 &= 0xf8000000;
351
  p_h = o.value;
352
  p_l = v - (p_h - u);
353
  z_h = cp_h * p_h;             /* cp_h+cp_l = 2/(3*log2) */
354
  z_l = cp_l * p_h + p_l * cp + dp_l[k];
355
  /* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */
356
  t = (__float128) n;
357
  t1 = (((z_h + z_l) + dp_h[k]) + t);
358
  o.value = t1;
359
  o.words32.w3 = 0;
360
  o.words32.w2 &= 0xf8000000;
361
  t1 = o.value;
362
  t2 = z_l - (((t1 - t) - dp_h[k]) - z_h);
363
 
364
  /* s (sign of result -ve**odd) = -1 else = 1 */
365
  s = one;
366
  if (((((uint32_t) hx >> 31) - 1) | (yisint - 1)) == 0)
367
    s = -one;                   /* (-ve)**(odd int) */
368
 
369
  /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
370
  y1 = y;
371
  o.value = y1;
372
  o.words32.w3 = 0;
373
  o.words32.w2 &= 0xf8000000;
374
  y1 = o.value;
375
  p_l = (y - y1) * t1 + y * t2;
376
  p_h = y1 * t1;
377
  z = p_l + p_h;
378
  o.value = z;
379
  j = o.words32.w0;
380
  if (j >= 0x400d0000) /* z >= 16384 */
381
    {
382
      /* if z > 16384 */
383
      if (((j - 0x400d0000) | o.words32.w1 | o.words32.w2 | o.words32.w3) != 0)
384
        return s * huge * huge; /* overflow */
385
      else
386
        {
387
          if (p_l + ovt > z - p_h)
388
            return s * huge * huge;     /* overflow */
389
        }
390
    }
391
  else if ((j & 0x7fffffff) >= 0x400d01b9)      /* z <= -16495 */
392
    {
393
      /* z < -16495 */
394
      if (((j - 0xc00d01bc) | o.words32.w1 | o.words32.w2 | o.words32.w3)
395
          != 0)
396
        return s * tiny * tiny; /* underflow */
397
      else
398
        {
399
          if (p_l <= z - p_h)
400
            return s * tiny * tiny;     /* underflow */
401
        }
402
    }
403
  /* compute 2**(p_h+p_l) */
404
  i = j & 0x7fffffff;
405
  k = (i >> 16) - 0x3fff;
406
  n = 0;
407
  if (i > 0x3ffe0000)
408
    {                           /* if |z| > 0.5, set n = [z+0.5] */
409
      n = floorq (z + 0.5Q);
410
      t = n;
411
      p_h -= t;
412
    }
413
  t = p_l + p_h;
414
  o.value = t;
415
  o.words32.w3 = 0;
416
  o.words32.w2 &= 0xf8000000;
417
  t = o.value;
418
  u = t * lg2_h;
419
  v = (p_l - (t - p_h)) * lg2 + t * lg2_l;
420
  z = u + v;
421
  w = v - (z - u);
422
  /*  exp(z) */
423
  t = z * z;
424
  u = PN[0] + t * (PN[1] + t * (PN[2] + t * (PN[3] + t * PN[4])));
425
  v = PD[0] + t * (PD[1] + t * (PD[2] + t * (PD[3] + t)));
426
  t1 = z - t * u / v;
427
  r = (z * t1) / (t1 - two) - (w + z * w);
428
  z = one - (r - z);
429
  o.value = z;
430
  j = o.words32.w0;
431
  j += (n << 16);
432
  if ((j >> 16) <= 0)
433
    z = scalbnq (z, n); /* subnormal output */
434
  else
435
    {
436
      o.words32.w0 = j;
437
      z = o.value;
438
    }
439
  return s * z;
440
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.