URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libquadmath/] [math/] [fmaq.c] - Blame information for rev 792

Go to most recent revision | Details | Compare with Previous | View Log


/* Compute x * y + z as ternary operation.
   Copyright (C) 2010 Free Software Foundation, Inc.
   This file is part of the GNU C Library.
   Contributed by Jakub Jelinek <jakub@redhat.com>, 2010.
 
   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.
 
   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.
 
   You should have received a copy of the GNU Lesser General Public
   License along with the GNU C Library; if not, write to the Free
   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
   02111-1307 USA.  */
 
#include "quadmath-imp.h"
#include <math.h>
#include <float.h>
#ifdef HAVE_FENV_H
# include <fenv.h>
# if defined HAVE_FEHOLDEXCEPT && defined HAVE_FESETROUND \
     && defined HAVE_FEUPDATEENV && defined HAVE_FETESTEXCEPT \
     && defined FE_TOWARDZERO && defined FE_INEXACT
#  define USE_FENV_H
# endif
#endif
 
/* This implementation uses rounding to odd to avoid problems with
   double rounding.  See a paper by Boldo and Melquiond:
   http://www.lri.fr/~melquion/doc/08-tc.pdf  */
 
__float128
fmaq (__float128 x, __float128 y, __float128 z)
{
  ieee854_float128 u, v, w;
  int adjust = 0;
  u.value = x;
  v.value = y;
  w.value = z;
  if (__builtin_expect (u.ieee.exponent + v.ieee.exponent
                        >= 0x7fff + IEEE854_FLOAT128_BIAS
                           - FLT128_MANT_DIG, 0)
      || __builtin_expect (u.ieee.exponent >= 0x7fff - FLT128_MANT_DIG, 0)
      || __builtin_expect (v.ieee.exponent >= 0x7fff - FLT128_MANT_DIG, 0)
      || __builtin_expect (w.ieee.exponent >= 0x7fff - FLT128_MANT_DIG, 0)
      || __builtin_expect (u.ieee.exponent + v.ieee.exponent
                           <= IEEE854_FLOAT128_BIAS + FLT128_MANT_DIG, 0))
    {
      /* If z is Inf, but x and y are finite, the result should be
         z rather than NaN.  */
      if (w.ieee.exponent == 0x7fff
          && u.ieee.exponent != 0x7fff
          && v.ieee.exponent != 0x7fff)
        return (z + x) + y;
      /* If x or y or z is Inf/NaN, or if fma will certainly overflow,
         or if x * y is less than half of FLT128_DENORM_MIN,
         compute as x * y + z.  */
      if (u.ieee.exponent == 0x7fff
          || v.ieee.exponent == 0x7fff
          || w.ieee.exponent == 0x7fff
          || u.ieee.exponent + v.ieee.exponent
             > 0x7fff + IEEE854_FLOAT128_BIAS
          || u.ieee.exponent + v.ieee.exponent
             < IEEE854_FLOAT128_BIAS - FLT128_MANT_DIG - 2)
        return x * y + z;
      if (u.ieee.exponent + v.ieee.exponent
          >= 0x7fff + IEEE854_FLOAT128_BIAS - FLT128_MANT_DIG)
        {
          /* Compute 1p-113 times smaller result and multiply
             at the end.  */
          if (u.ieee.exponent > v.ieee.exponent)
            u.ieee.exponent -= FLT128_MANT_DIG;
          else
            v.ieee.exponent -= FLT128_MANT_DIG;
          /* If x + y exponent is very large and z exponent is very small,
             it doesn't matter if we don't adjust it.  */
          if (w.ieee.exponent > FLT128_MANT_DIG)
            w.ieee.exponent -= FLT128_MANT_DIG;
          adjust = 1;
        }
      else if (w.ieee.exponent >= 0x7fff - FLT128_MANT_DIG)
        {
          /* Similarly.
             If z exponent is very large and x and y exponents are
             very small, it doesn't matter if we don't adjust it.  */
          if (u.ieee.exponent > v.ieee.exponent)
            {
              if (u.ieee.exponent > FLT128_MANT_DIG)
                u.ieee.exponent -= FLT128_MANT_DIG;
            }
          else if (v.ieee.exponent > FLT128_MANT_DIG)
            v.ieee.exponent -= FLT128_MANT_DIG;
          w.ieee.exponent -= FLT128_MANT_DIG;
          adjust = 1;
        }
      else if (u.ieee.exponent >= 0x7fff - FLT128_MANT_DIG)
        {
          u.ieee.exponent -= FLT128_MANT_DIG;
          if (v.ieee.exponent)
            v.ieee.exponent += FLT128_MANT_DIG;
          else
            v.value *= 0x1p113Q;
        }
      else if (v.ieee.exponent >= 0x7fff - FLT128_MANT_DIG)
        {
          v.ieee.exponent -= FLT128_MANT_DIG;
          if (u.ieee.exponent)
            u.ieee.exponent += FLT128_MANT_DIG;
          else
            u.value *= 0x1p113Q;
        }
      else /* if (u.ieee.exponent + v.ieee.exponent
                  <= IEEE854_FLOAT128_BIAS + FLT128_MANT_DIG) */
        {
          if (u.ieee.exponent > v.ieee.exponent)
            u.ieee.exponent += 2 * FLT128_MANT_DIG;
          else
            v.ieee.exponent += 2 * FLT128_MANT_DIG;
          if (w.ieee.exponent <= 4 * FLT128_MANT_DIG + 4)
            {
              if (w.ieee.exponent)
                w.ieee.exponent += 2 * FLT128_MANT_DIG;
              else
                w.value *= 0x1p226Q;
              adjust = -1;
            }
          /* Otherwise x * y should just affect inexact
             and nothing else.  */
        }
      x = u.value;
      y = v.value;
      z = w.value;
    }
  /* Multiplication m1 + m2 = x * y using Dekker's algorithm.  */
#define C ((1LL << (FLT128_MANT_DIG + 1) / 2) + 1)
  __float128 x1 = x * C;
  __float128 y1 = y * C;
  __float128 m1 = x * y;
  x1 = (x - x1) + x1;
  y1 = (y - y1) + y1;
  __float128 x2 = x - x1;
  __float128 y2 = y - y1;
  __float128 m2 = (((x1 * y1 - m1) + x1 * y2) + x2 * y1) + x2 * y2;
 
  /* Addition a1 + a2 = z + m1 using Knuth's algorithm.  */
  __float128 a1 = z + m1;
  __float128 t1 = a1 - z;
  __float128 t2 = a1 - t1;
  t1 = m1 - t1;
  t2 = z - t2;
  __float128 a2 = t1 + t2;
 
#ifdef USE_FENV_H
  fenv_t env;
  feholdexcept (&env);
  fesetround (FE_TOWARDZERO);
#endif
  /* Perform m2 + a2 addition with round to odd.  */
  u.value = a2 + m2;
 
  if (__builtin_expect (adjust == 0, 1))
    {
#ifdef USE_FENV_H
      if ((u.ieee.mant_low & 1) == 0 && u.ieee.exponent != 0x7fff)
        u.ieee.mant_low |= fetestexcept (FE_INEXACT) != 0;
      feupdateenv (&env);
#endif
      /* Result is a1 + u.value.  */
      return a1 + u.value;
    }
  else if (__builtin_expect (adjust > 0, 1))
    {
#ifdef USE_FENV_H
      if ((u.ieee.mant_low & 1) == 0 && u.ieee.exponent != 0x7fff)
        u.ieee.mant_low |= fetestexcept (FE_INEXACT) != 0;
      feupdateenv (&env);
#endif
      /* Result is a1 + u.value, scaled up.  */
      return (a1 + u.value) * 0x1p113Q;
    }
  else
    {
#ifdef USE_FENV_H
      if ((u.ieee.mant_low & 1) == 0)
        u.ieee.mant_low |= fetestexcept (FE_INEXACT) != 0;
#endif
      v.value = a1 + u.value;
      /* Ensure the addition is not scheduled after fetestexcept call.  */
      asm volatile ("" : : "m" (v));
#ifdef USE_FENV_H
      int j = fetestexcept (FE_INEXACT) != 0;
      feupdateenv (&env);
#else
      int j = 0;
#endif
      /* Ensure the following computations are performed in default rounding
         mode instead of just reusing the round to zero computation.  */
      asm volatile ("" : "=m" (u) : "m" (u));
      /* If a1 + u.value is exact, the only rounding happens during
         scaling down.  */
      if (j == 0)
        return v.value * 0x1p-226Q;
      /* If result rounded to zero is not subnormal, no double
         rounding will occur.  */
      if (v.ieee.exponent > 226)
        return (a1 + u.value) * 0x1p-226Q;
      /* If v.value * 0x1p-226Q with round to zero is a subnormal above
         or equal to FLT128_MIN / 2, then v.value * 0x1p-226Q shifts mantissa
         down just by 1 bit, which means v.ieee.mant_low |= j would
         change the round bit, not sticky or guard bit.
         v.value * 0x1p-226Q never normalizes by shifting up,
         so round bit plus sticky bit should be already enough
         for proper rounding.  */
      if (v.ieee.exponent == 226)
        {
          /* v.ieee.mant_low & 2 is LSB bit of the result before rounding,
             v.ieee.mant_low & 1 is the round bit and j is our sticky
             bit.  In round-to-nearest 001 rounds down like 00,
             011 rounds up, even though 01 rounds down (thus we need
             to adjust), 101 rounds down like 10 and 111 rounds up
             like 11.  */
          if ((v.ieee.mant_low & 3) == 1)
            {
              v.value *= 0x1p-226Q;
              if (v.ieee.negative)
                return v.value - 0x1p-16494Q /* __FLT128_DENORM_MIN__ */;
              else
                return v.value + 0x1p-16494Q /* __FLT128_DENORM_MIN__ */;
            }
          else
            return v.value * 0x1p-226Q;
        }
      v.ieee.mant_low |= j;
      return v.value * 0x1p-226Q;
    }
}

Browse

Tools

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libquadmath/] [math/] [fmaq.c] - Blame information for rev 792

Line No.	Rev	Author	Line
1	740	jeremybenn	`/* Compute x * y + z as ternary operation.`
2			`Copyright (C) 2010 Free Software Foundation, Inc.`
3			`This file is part of the GNU C Library.`
4			`Contributed by Jakub Jelinek <jakub@redhat.com>, 2010.`
5
6			`The GNU C Library is free software; you can redistribute it and/or`
7			`modify it under the terms of the GNU Lesser General Public`
8			`License as published by the Free Software Foundation; either`
9			`version 2.1 of the License, or (at your option) any later version.`
10
11			`The GNU C Library is distributed in the hope that it will be useful,`
12			`but WITHOUT ANY WARRANTY; without even the implied warranty of`
13			`MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU`
14			`Lesser General Public License for more details.`
15
16			`You should have received a copy of the GNU Lesser General Public`
17			`License along with the GNU C Library; if not, write to the Free`
18			`Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA`
19			`02111-1307 USA. */`
20
21			`#include "quadmath-imp.h"`
22			`#include <math.h>`
23			`#include <float.h>`
24			`#ifdef HAVE_FENV_H`
25			`# include <fenv.h>`
26			`# if defined HAVE_FEHOLDEXCEPT && defined HAVE_FESETROUND \`
27			`&& defined HAVE_FEUPDATEENV && defined HAVE_FETESTEXCEPT \`
28			`&& defined FE_TOWARDZERO && defined FE_INEXACT`
29			`# define USE_FENV_H`
30			`# endif`
31			`#endif`
32
33			`/* This implementation uses rounding to odd to avoid problems with`
34			`double rounding. See a paper by Boldo and Melquiond:`
35			`http://www.lri.fr/~melquion/doc/08-tc.pdf */`
36
37			`__float128`
38			`fmaq (__float128 x, __float128 y, __float128 z)`
39			`{`
40			`ieee854_float128 u, v, w;`
41			`int adjust = 0;`
42			`u.value = x;`
43			`v.value = y;`
44			`w.value = z;`
45			`if (__builtin_expect (u.ieee.exponent + v.ieee.exponent`
46			`>= 0x7fff + IEEE854_FLOAT128_BIAS`
47			`- FLT128_MANT_DIG, 0)`
48			`\|\| __builtin_expect (u.ieee.exponent >= 0x7fff - FLT128_MANT_DIG, 0)`
49			`\|\| __builtin_expect (v.ieee.exponent >= 0x7fff - FLT128_MANT_DIG, 0)`
50			`\|\| __builtin_expect (w.ieee.exponent >= 0x7fff - FLT128_MANT_DIG, 0)`
51			`\|\| __builtin_expect (u.ieee.exponent + v.ieee.exponent`
52			`<= IEEE854_FLOAT128_BIAS + FLT128_MANT_DIG, 0))`
53			`{`
54			`/* If z is Inf, but x and y are finite, the result should be`
55			`z rather than NaN. */`
56			`if (w.ieee.exponent == 0x7fff`
57			`&& u.ieee.exponent != 0x7fff`
58			`&& v.ieee.exponent != 0x7fff)`
59			`return (z + x) + y;`
60			`/* If x or y or z is Inf/NaN, or if fma will certainly overflow,`
61			`or if x * y is less than half of FLT128_DENORM_MIN,`
62			`compute as x * y + z. */`
63			`if (u.ieee.exponent == 0x7fff`
64			`\|\| v.ieee.exponent == 0x7fff`
65			`\|\| w.ieee.exponent == 0x7fff`
66			`\|\| u.ieee.exponent + v.ieee.exponent`
67			`> 0x7fff + IEEE854_FLOAT128_BIAS`
68			`\|\| u.ieee.exponent + v.ieee.exponent`
69			`< IEEE854_FLOAT128_BIAS - FLT128_MANT_DIG - 2)`
70			`return x * y + z;`
71			`if (u.ieee.exponent + v.ieee.exponent`
72			`>= 0x7fff + IEEE854_FLOAT128_BIAS - FLT128_MANT_DIG)`
73			`{`
74			`/* Compute 1p-113 times smaller result and multiply`
75			`at the end. */`
76			`if (u.ieee.exponent > v.ieee.exponent)`
77			`u.ieee.exponent -= FLT128_MANT_DIG;`
78			`else`
79			`v.ieee.exponent -= FLT128_MANT_DIG;`
80			`/* If x + y exponent is very large and z exponent is very small,`
81			`it doesn't matter if we don't adjust it. */`
82			`if (w.ieee.exponent > FLT128_MANT_DIG)`
83			`w.ieee.exponent -= FLT128_MANT_DIG;`
84			`adjust = 1;`
85			`}`
86			`else if (w.ieee.exponent >= 0x7fff - FLT128_MANT_DIG)`
87			`{`
88			`/* Similarly.`
89			`If z exponent is very large and x and y exponents are`
90			`very small, it doesn't matter if we don't adjust it. */`
91			`if (u.ieee.exponent > v.ieee.exponent)`
92			`{`
93			`if (u.ieee.exponent > FLT128_MANT_DIG)`
94			`u.ieee.exponent -= FLT128_MANT_DIG;`
95			`}`
96			`else if (v.ieee.exponent > FLT128_MANT_DIG)`
97			`v.ieee.exponent -= FLT128_MANT_DIG;`
98			`w.ieee.exponent -= FLT128_MANT_DIG;`
99			`adjust = 1;`
100			`}`
101			`else if (u.ieee.exponent >= 0x7fff - FLT128_MANT_DIG)`
102			`{`
103			`u.ieee.exponent -= FLT128_MANT_DIG;`
104			`if (v.ieee.exponent)`
105			`v.ieee.exponent += FLT128_MANT_DIG;`
106			`else`
107			`v.value *= 0x1p113Q;`
108			`}`
109			`else if (v.ieee.exponent >= 0x7fff - FLT128_MANT_DIG)`
110			`{`
111			`v.ieee.exponent -= FLT128_MANT_DIG;`
112			`if (u.ieee.exponent)`
113			`u.ieee.exponent += FLT128_MANT_DIG;`
114			`else`
115			`u.value *= 0x1p113Q;`
116			`}`
117			`else /* if (u.ieee.exponent + v.ieee.exponent`
118			`<= IEEE854_FLOAT128_BIAS + FLT128_MANT_DIG) */`
119			`{`
120			`if (u.ieee.exponent > v.ieee.exponent)`
121			`u.ieee.exponent += 2 * FLT128_MANT_DIG;`
122			`else`
123			`v.ieee.exponent += 2 * FLT128_MANT_DIG;`
124			`if (w.ieee.exponent <= 4 * FLT128_MANT_DIG + 4)`
125			`{`
126			`if (w.ieee.exponent)`
127			`w.ieee.exponent += 2 * FLT128_MANT_DIG;`
128			`else`
129			`w.value *= 0x1p226Q;`
130			`adjust = -1;`
131			`}`
132			`/* Otherwise x * y should just affect inexact`
133			`and nothing else. */`
134			`}`
135			`x = u.value;`
136			`y = v.value;`
137			`z = w.value;`
138			`}`
139			`/* Multiplication m1 + m2 = x * y using Dekker's algorithm. */`
140			`#define C ((1LL << (FLT128_MANT_DIG + 1) / 2) + 1)`
141			`__float128 x1 = x * C;`
142			`__float128 y1 = y * C;`
143			`__float128 m1 = x * y;`
144			`x1 = (x - x1) + x1;`
145			`y1 = (y - y1) + y1;`
146			`__float128 x2 = x - x1;`
147			`__float128 y2 = y - y1;`
148			`__float128 m2 = (((x1 * y1 - m1) + x1 * y2) + x2 * y1) + x2 * y2;`
149
150			`/* Addition a1 + a2 = z + m1 using Knuth's algorithm. */`
151			`__float128 a1 = z + m1;`
152			`__float128 t1 = a1 - z;`
153			`__float128 t2 = a1 - t1;`
154			`t1 = m1 - t1;`
155			`t2 = z - t2;`
156			`__float128 a2 = t1 + t2;`
157
158			`#ifdef USE_FENV_H`
159			`fenv_t env;`
160			`feholdexcept (&env);`
161			`fesetround (FE_TOWARDZERO);`
162			`#endif`
163			`/* Perform m2 + a2 addition with round to odd. */`
164			`u.value = a2 + m2;`
165
166			`if (__builtin_expect (adjust == 0, 1))`
167			`{`
168			`#ifdef USE_FENV_H`
169			`if ((u.ieee.mant_low & 1) == 0 && u.ieee.exponent != 0x7fff)`
170			`u.ieee.mant_low \|= fetestexcept (FE_INEXACT) != 0;`
171			`feupdateenv (&env);`
172			`#endif`
173			`/* Result is a1 + u.value. */`
174			`return a1 + u.value;`
175			`}`
176			`else if (__builtin_expect (adjust > 0, 1))`
177			`{`
178			`#ifdef USE_FENV_H`
179			`if ((u.ieee.mant_low & 1) == 0 && u.ieee.exponent != 0x7fff)`
180			`u.ieee.mant_low \|= fetestexcept (FE_INEXACT) != 0;`
181			`feupdateenv (&env);`
182			`#endif`
183			`/* Result is a1 + u.value, scaled up. */`
184			`return (a1 + u.value) * 0x1p113Q;`
185			`}`
186			`else`
187			`{`
188			`#ifdef USE_FENV_H`
189			`if ((u.ieee.mant_low & 1) == 0)`
190			`u.ieee.mant_low \|= fetestexcept (FE_INEXACT) != 0;`
191			`#endif`
192			`v.value = a1 + u.value;`
193			`/* Ensure the addition is not scheduled after fetestexcept call. */`
194			`asm volatile ("" : : "m" (v));`
195			`#ifdef USE_FENV_H`
196			`int j = fetestexcept (FE_INEXACT) != 0;`
197			`feupdateenv (&env);`
198			`#else`
199			`int j = 0;`
200			`#endif`
201			`/* Ensure the following computations are performed in default rounding`
202			`mode instead of just reusing the round to zero computation. */`
203			`asm volatile ("" : "=m" (u) : "m" (u));`
204			`/* If a1 + u.value is exact, the only rounding happens during`
205			`scaling down. */`
206			`if (j == 0)`
207			`return v.value * 0x1p-226Q;`
208			`/* If result rounded to zero is not subnormal, no double`
209			`rounding will occur. */`
210			`if (v.ieee.exponent > 226)`
211			`return (a1 + u.value) * 0x1p-226Q;`
212			`/* If v.value * 0x1p-226Q with round to zero is a subnormal above`
213			`or equal to FLT128_MIN / 2, then v.value * 0x1p-226Q shifts mantissa`
214			`down just by 1 bit, which means v.ieee.mant_low \|= j would`
215			`change the round bit, not sticky or guard bit.`
216			`v.value * 0x1p-226Q never normalizes by shifting up,`
217			`so round bit plus sticky bit should be already enough`
218			`for proper rounding. */`
219			`if (v.ieee.exponent == 226)`
220			`{`
221			`/* v.ieee.mant_low & 2 is LSB bit of the result before rounding,`
222			`v.ieee.mant_low & 1 is the round bit and j is our sticky`
223			`bit. In round-to-nearest 001 rounds down like 00,`
224			`011 rounds up, even though 01 rounds down (thus we need`
225			`to adjust), 101 rounds down like 10 and 111 rounds up`
226			`like 11. */`
227			`if ((v.ieee.mant_low & 3) == 1)`
228			`{`
229			`v.value *= 0x1p-226Q;`
230			`if (v.ieee.negative)`
231			`return v.value - 0x1p-16494Q /* __FLT128_DENORM_MIN__ */;`
232			`else`
233			`return v.value + 0x1p-16494Q /* __FLT128_DENORM_MIN__ */;`
234			`}`
235			`else`
236			`return v.value * 0x1p-226Q;`
237			`}`
238			`v.ieee.mant_low \|= j;`
239			`return v.value * 0x1p-226Q;`
240			`}`
241			`}`