OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [rtos/] [ecos-2.0/] [packages/] [language/] [c/] [libm/] [v2_0/] [src/] [double/] [ieee754-core/] [e_sqrt.c] - Blame information for rev 174

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 27 unneback
//===========================================================================
2
//
3
//      e_sqrt.c
4
//
5
//      Part of the standard mathematical function library
6
//
7
//===========================================================================
8
//####ECOSGPLCOPYRIGHTBEGIN####
9
// -------------------------------------------
10
// This file is part of eCos, the Embedded Configurable Operating System.
11
// Copyright (C) 1998, 1999, 2000, 2001, 2002 Red Hat, Inc.
12
//
13
// eCos is free software; you can redistribute it and/or modify it under
14
// the terms of the GNU General Public License as published by the Free
15
// Software Foundation; either version 2 or (at your option) any later version.
16
//
17
// eCos is distributed in the hope that it will be useful, but WITHOUT ANY
18
// WARRANTY; without even the implied warranty of MERCHANTABILITY or
19
// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
20
// for more details.
21
//
22
// You should have received a copy of the GNU General Public License along
23
// with eCos; if not, write to the Free Software Foundation, Inc.,
24
// 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
25
//
26
// As a special exception, if other files instantiate templates or use macros
27
// or inline functions from this file, or you compile this file and link it
28
// with other works to produce a work based on this file, this file does not
29
// by itself cause the resulting work to be covered by the GNU General Public
30
// License. However the source code for this file must still be made available
31
// in accordance with section (3) of the GNU General Public License.
32
//
33
// This exception does not invalidate any other reasons why a work based on
34
// this file might be covered by the GNU General Public License.
35
//
36
// Alternative licenses for eCos may be arranged by contacting Red Hat, Inc.
37
// at http://sources.redhat.com/ecos/ecos-license/
38
// -------------------------------------------
39
//####ECOSGPLCOPYRIGHTEND####
40
//===========================================================================
41
//#####DESCRIPTIONBEGIN####
42
//
43
// Author(s):   jlarmour
44
// Contributors:  jlarmour
45
// Date:        1998-02-13
46
// Purpose:     
47
// Description: 
48
// Usage:       
49
//
50
//####DESCRIPTIONEND####
51
//
52
//===========================================================================
53
 
54
// CONFIGURATION
55
 
56
#include <pkgconf/libm.h>   // Configuration header
57
 
58
// Include the Math library?
59
#ifdef CYGPKG_LIBM     
60
 
61
// Derived from code with the following copyright
62
 
63
/* @(#)e_sqrt.c 1.3 95/01/18 */
64
/*
65
 * ====================================================
66
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
67
 *
68
 * Developed at SunSoft, a Sun Microsystems, Inc. business.
69
 * Permission to use, copy, modify, and distribute this
70
 * software is freely granted, provided that this notice
71
 * is preserved.
72
 * ====================================================
73
 */
74
 
75
/* __ieee754_sqrt(x)
76
 * Return correctly rounded sqrt.
77
 *           ------------------------------------------
78
 *           |  Use the hardware sqrt if you have one |
79
 *           ------------------------------------------
80
 * Method:
81
 *   Bit by bit method using integer arithmetic. (Slow, but portable)
82
 *   1. Normalization
83
 *      Scale x to y in [1,4) with even powers of 2:
84
 *      find an integer k such that  1 <= (y=x*2^(2k)) < 4, then
85
 *              sqrt(x) = 2^k * sqrt(y)
86
 *   2. Bit by bit computation
87
 *      Let q  = sqrt(y) truncated to i bit after binary point (q = 1),
88
 *           i                                                   0
89
 *                                     i+1         2
90
 *          s  = 2*q , and      y  =  2   * ( y - q  ).         (1)
91
 *           i      i            i                 i
92
 *
93
 *      To compute q    from q , one checks whether
94
 *                  i+1       i
95
 *
96
 *                            -(i+1) 2
97
 *                      (q + 2      ) <= y.                     (2)
98
 *                        i
99
 *                                                            -(i+1)
100
 *      If (2) is false, then q   = q ; otherwise q   = q  + 2      .
101
 *                             i+1   i             i+1   i
102
 *
103
 *      With some algebric manipulation, it is not difficult to see
104
 *      that (2) is equivalent to
105
 *                             -(i+1)
106
 *                      s  +  2       <= y                      (3)
107
 *                       i                i
108
 *
109
 *      The advantage of (3) is that s  and y  can be computed by
110
 *                                    i      i
111
 *      the following recurrence formula:
112
 *          if (3) is false
113
 *
114
 *          s     =  s  ,       y    = y   ;                    (4)
115
 *           i+1      i          i+1    i
116
 *
117
 *          otherwise,
118
 *                         -i                     -(i+1)
119
 *          s     =  s  + 2  ,  y    = y  -  s  - 2             (5)
120
 *           i+1      i          i+1    i     i
121
 *
122
 *      One may easily use induction to prove (4) and (5).
123
 *      Note. Since the left hand side of (3) contain only i+2 bits,
124
 *            it does not necessary to do a full (53-bit) comparison
125
 *            in (3).
126
 *   3. Final rounding
127
 *      After generating the 53 bits result, we compute one more bit.
128
 *      Together with the remainder, we can decide whether the
129
 *      result is exact, bigger than 1/2ulp, or less than 1/2ulp
130
 *      (it will never equal to 1/2ulp).
131
 *      The rounding mode can be detected by checking whether
132
 *      huge + tiny is equal to huge, and whether huge - tiny is
133
 *      equal to huge for some floating point number "huge" and "tiny".
134
 *
135
 * Special cases:
136
 *      sqrt(+-0) = +-0         ... exact
137
 *      sqrt(inf) = inf
138
 *      sqrt(-ve) = NaN         ... with invalid signal
139
 *      sqrt(NaN) = NaN         ... with invalid signal for signaling NaN
140
 *
141
 * Other methods : see the appended file at the end of the program below.
142
 *---------------
143
 */
144
 
145
#include "mathincl/fdlibm.h"
146
 
147
static  const double    one     = 1.0, tiny=1.0e-300;
148
 
149
        double __ieee754_sqrt(double x)
150
{
151
        double z;
152
        int     sign = (int)0x80000000;
153
        unsigned r,t1,s1,ix1,q1;
154
        int ix0,s0,q,m,t,i;
155
 
156
        ix0 = CYG_LIBM_HI(x);                   /* high word of x */
157
        ix1 = CYG_LIBM_LO(x);           /* low word of x */
158
 
159
    /* take care of Inf and NaN */
160
        if((ix0&0x7ff00000)==0x7ff00000) {
161
            return x*x+x;               /* sqrt(NaN)=NaN, sqrt(+inf)=+inf
162
                                           sqrt(-inf)=sNaN */
163
        }
164
    /* take care of zero */
165
        if(ix0<=0) {
166
            if(((ix0&(~sign))|ix1)==0) return x;/* sqrt(+-0) = +-0 */
167
            else if(ix0<0)
168
                return (x-x)/(x-x);             /* sqrt(-ve) = sNaN */
169
        }
170
    /* normalize x */
171
        m = (ix0>>20);
172
        if(m==0) {                              /* subnormal x */
173
            while(ix0==0) {
174
                m -= 21;
175
                ix0 |= (ix1>>11); ix1 <<= 21;
176
            }
177
            for(i=0;(ix0&0x00100000)==0;i++) ix0<<=1;
178
            m -= i-1;
179
            ix0 |= (ix1>>(32-i));
180
            ix1 <<= i;
181
        }
182
        m -= 1023;      /* unbias exponent */
183
        ix0 = (ix0&0x000fffff)|0x00100000;
184
        if(m&1){        /* odd m, double x to make it even */
185
            ix0 += ix0 + ((ix1&sign)>>31);
186
            ix1 += ix1;
187
        }
188
        m >>= 1;        /* m = [m/2] */
189
 
190
    /* generate sqrt(x) bit by bit */
191
        ix0 += ix0 + ((ix1&sign)>>31);
192
        ix1 += ix1;
193
        q = q1 = s0 = s1 = 0;   /* [q,q1] = sqrt(x) */
194
        r = 0x00200000;         /* r = moving bit from right to left */
195
 
196
        while(r!=0) {
197
            t = s0+r;
198
            if(t<=ix0) {
199
                s0   = t+r;
200
                ix0 -= t;
201
                q   += r;
202
            }
203
            ix0 += ix0 + ((ix1&sign)>>31);
204
            ix1 += ix1;
205
            r>>=1;
206
        }
207
 
208
        r = sign;
209
        while(r!=0) {
210
            t1 = s1+r;
211
            t  = s0;
212
            if((t<ix0)||((t==ix0)&&(t1<=ix1))) {
213
                s1  = t1+r;
214
                if(((t1&sign)==(unsigned)sign)&&(s1&sign)==0) s0 += 1;
215
                ix0 -= t;
216
                if (ix1 < t1) ix0 -= 1;
217
                ix1 -= t1;
218
                q1  += r;
219
            }
220
            ix0 += ix0 + ((ix1&sign)>>31);
221
            ix1 += ix1;
222
            r>>=1;
223
        }
224
 
225
    /* use floating add to find out rounding direction */
226
        if((ix0|ix1)!=0) {
227
            z = one-tiny; /* trigger inexact flag */
228
            if (z>=one) {
229
                z = one+tiny;
230
                if (q1==(unsigned)0xffffffff) { q1=0; q += 1;}
231
                else if (z>one) {
232
                    if (q1==(unsigned)0xfffffffe) q+=1;
233
                    q1+=2;
234
                } else
235
                    q1 += (q1&1);
236
            }
237
        }
238
        ix0 = (q>>1)+0x3fe00000;
239
        ix1 =  q1>>1;
240
        if ((q&1)==1) ix1 |= sign;
241
        ix0 += (m <<20);
242
        CYG_LIBM_HI(z) = ix0;
243
        CYG_LIBM_LO(z) = ix1;
244
        return z;
245
}
246
 
247
/*
248
Other methods  (use floating-point arithmetic)
249
-------------
250
(This is a copy of a drafted paper by Prof W. Kahan
251
and K.C. Ng, written in May, 1986)
252
 
253
        Two algorithms are given here to implement sqrt(x)
254
        (IEEE double precision arithmetic) in software.
255
        Both supply sqrt(x) correctly rounded. The first algorithm (in
256
        Section A) uses newton iterations and involves four divisions.
257
        The second one uses reciproot iterations to avoid division, but
258
        requires more multiplications. Both algorithms need the ability
259
        to chop results of arithmetic operations instead of round them,
260
        and the INEXACT flag to indicate when an arithmetic operation
261
        is executed exactly with no roundoff error, all part of the
262
        standard (IEEE 754-1985). The ability to perform shift, add,
263
        subtract and logical AND operations upon 32-bit words is needed
264
        too, though not part of the standard.
265
 
266
A.  sqrt(x) by Newton Iteration
267
 
268
   (1)  Initial approximation
269
 
270
        Let x0 and x1 be the leading and the trailing 32-bit words of
271
        a floating point number x (in IEEE double format) respectively
272
 
273
            1    11                  52                           ...widths
274
           ------------------------------------------------------
275
        x: |s|    e     |             f                         |
276
           ------------------------------------------------------
277
              msb    lsb  msb                                 lsb ...order
278
 
279
 
280
             ------------------------        ------------------------
281
        x0:  |s|   e    |    f1     |    x1: |          f2           |
282
             ------------------------        ------------------------
283
 
284
        By performing shifts and subtracts on x0 and x1 (both regarded
285
        as integers), we obtain an 8-bit approximation of sqrt(x) as
286
        follows.
287
 
288
                k  := (x0>>1) + 0x1ff80000;
289
                y0 := k - T1[31&(k>>15)].       ... y ~ sqrt(x) to 8 bits
290
        Here k is a 32-bit integer and T1[] is an integer array containing
291
        correction terms. Now magically the floating value of y (y's
292
        leading 32-bit word is y0, the value of its trailing word is 0)
293
        approximates sqrt(x) to almost 8-bit.
294
 
295
        Value of T1:
296
        static int T1[32]= {
297
        0,      1024,   3062,   5746,   9193,   13348,  18162,  23592,
298
        29598,  36145,  43202,  50740,  58733,  67158,  75992,  85215,
299
        83599,  71378,  60428,  50647,  41945,  34246,  27478,  21581,
300
        16499,  12183,  8588,   5674,   3403,   1742,   661,    130,};
301
 
302
    (2) Iterative refinement
303
 
304
        Apply Heron's rule three times to y, we have y approximates
305
        sqrt(x) to within 1 ulp (Unit in the Last Place):
306
 
307
                y := (y+x/y)/2          ... almost 17 sig. bits
308
                y := (y+x/y)/2          ... almost 35 sig. bits
309
                y := y-(y-x/y)/2        ... within 1 ulp
310
 
311
 
312
        Remark 1.
313
            Another way to improve y to within 1 ulp is:
314
 
315
                y := (y+x/y)            ... almost 17 sig. bits to 2*sqrt(x)
316
                y := y - 0x00100006     ... almost 18 sig. bits to sqrt(x)
317
 
318
                                2
319
                            (x-y )*y
320
                y := y + 2* ----------  ...within 1 ulp
321
                               2
322
                             3y  + x
323
 
324
 
325
        This formula has one division fewer than the one above; however,
326
        it requires more multiplications and additions. Also x must be
327
        scaled in advance to avoid spurious overflow in evaluating the
328
        expression 3y*y+x. Hence it is not recommended uless division
329
        is slow. If division is very slow, then one should use the
330
        reciproot algorithm given in section B.
331
 
332
    (3) Final adjustment
333
 
334
        By twiddling y's last bit it is possible to force y to be
335
        correctly rounded according to the prevailing rounding mode
336
        as follows. Let r and i be copies of the rounding mode and
337
        inexact flag before entering the square root program. Also we
338
        use the expression y+-ulp for the next representable floating
339
        numbers (up and down) of y. Note that y+-ulp = either fixed
340
        point y+-1, or multiply y by nextafter(1,+-inf) in chopped
341
        mode.
342
 
343
                I := FALSE;     ... reset INEXACT flag I
344
                R := RZ;        ... set rounding mode to round-toward-zero
345
                z := x/y;       ... chopped quotient, possibly inexact
346
                If(not I) then {        ... if the quotient is exact
347
                    if(z=y) {
348
                        I := i;  ... restore inexact flag
349
                        R := r;  ... restore rounded mode
350
                        return sqrt(x):=y.
351
                    } else {
352
                        z := z - ulp;   ... special rounding
353
                    }
354
                }
355
                i := TRUE;              ... sqrt(x) is inexact
356
                If (r=RN) then z=z+ulp  ... rounded-to-nearest
357
                If (r=RP) then {        ... round-toward-+inf
358
                    y = y+ulp; z=z+ulp;
359
                }
360
                y := y+z;               ... chopped sum
361
                y0:=y0-0x00100000;      ... y := y/2 is correctly rounded.
362
                I := i;                 ... restore inexact flag
363
                R := r;                 ... restore rounded mode
364
                return sqrt(x):=y.
365
 
366
    (4) Special cases
367
 
368
        Square root of +inf, +-0, or NaN is itself;
369
        Square root of a negative number is NaN with invalid signal.
370
 
371
 
372
B.  sqrt(x) by Reciproot Iteration
373
 
374
   (1)  Initial approximation
375
 
376
        Let x0 and x1 be the leading and the trailing 32-bit words of
377
        a floating point number x (in IEEE double format) respectively
378
        (see section A). By performing shifs and subtracts on x0 and y0,
379
        we obtain a 7.8-bit approximation of 1/sqrt(x) as follows.
380
 
381
            k := 0x5fe80000 - (x0>>1);
382
            y0:= k - T2[63&(k>>14)].    ... y ~ 1/sqrt(x) to 7.8 bits
383
 
384
        Here k is a 32-bit integer and T2[] is an integer array
385
        containing correction terms. Now magically the floating
386
        value of y (y's leading 32-bit word is y0, the value of
387
        its trailing word y1 is set to zero) approximates 1/sqrt(x)
388
        to almost 7.8-bit.
389
 
390
        Value of T2:
391
        static int T2[64]= {
392
        0x1500, 0x2ef8, 0x4d67, 0x6b02, 0x87be, 0xa395, 0xbe7a, 0xd866,
393
        0xf14a, 0x1091b,0x11fcd,0x13552,0x14999,0x15c98,0x16e34,0x17e5f,
394
        0x18d03,0x19a01,0x1a545,0x1ae8a,0x1b5c4,0x1bb01,0x1bfde,0x1c28d,
395
        0x1c2de,0x1c0db,0x1ba73,0x1b11c,0x1a4b5,0x1953d,0x18266,0x16be0,
396
        0x1683e,0x179d8,0x18a4d,0x19992,0x1a789,0x1b445,0x1bf61,0x1c989,
397
        0x1d16d,0x1d77b,0x1dddf,0x1e2ad,0x1e5bf,0x1e6e8,0x1e654,0x1e3cd,
398
        0x1df2a,0x1d635,0x1cb16,0x1be2c,0x1ae4e,0x19bde,0x1868e,0x16e2e,
399
        0x1527f,0x1334a,0x11051,0xe951, 0xbe01, 0x8e0d, 0x5924, 0x1edd,};
400
 
401
    (2) Iterative refinement
402
 
403
        Apply Reciproot iteration three times to y and multiply the
404
        result by x to get an approximation z that matches sqrt(x)
405
        to about 1 ulp. To be exact, we will have
406
                -1ulp < sqrt(x)-z<1.0625ulp.
407
 
408
        ... set rounding mode to Round-to-nearest
409
           y := y*(1.5-0.5*x*y*y)       ... almost 15 sig. bits to 1/sqrt(x)
410
           y := y*((1.5-2^-30)+0.5*x*y*y)... about 29 sig. bits to 1/sqrt(x)
411
        ... special arrangement for better accuracy
412
           z := x*y                     ... 29 bits to sqrt(x), with z*y<1
413
           z := z + 0.5*z*(1-z*y)       ... about 1 ulp to sqrt(x)
414
 
415
        Remark 2. The constant 1.5-2^-30 is chosen to bias the error so that
416
        (a) the term z*y in the final iteration is always less than 1;
417
        (b) the error in the final result is biased upward so that
418
                -1 ulp < sqrt(x) - z < 1.0625 ulp
419
            instead of |sqrt(x)-z|<1.03125ulp.
420
 
421
    (3) Final adjustment
422
 
423
        By twiddling y's last bit it is possible to force y to be
424
        correctly rounded according to the prevailing rounding mode
425
        as follows. Let r and i be copies of the rounding mode and
426
        inexact flag before entering the square root program. Also we
427
        use the expression y+-ulp for the next representable floating
428
        numbers (up and down) of y. Note that y+-ulp = either fixed
429
        point y+-1, or multiply y by nextafter(1,+-inf) in chopped
430
        mode.
431
 
432
        R := RZ;                ... set rounding mode to round-toward-zero
433
        switch(r) {
434
            case RN:            ... round-to-nearest
435
               if(x<= z*(z-ulp)...chopped) z = z - ulp; else
436
               if(x<= z*(z+ulp)...chopped) z = z; else z = z+ulp;
437
               break;
438
            case RZ:case RM:    ... round-to-zero or round-to--inf
439
               R:=RP;           ... reset rounding mod to round-to-+inf
440
               if(x<z*z ... rounded up) z = z - ulp; else
441
               if(x>=(z+ulp)*(z+ulp) ...rounded up) z = z+ulp;
442
               break;
443
            case RP:            ... round-to-+inf
444
               if(x>(z+ulp)*(z+ulp)...chopped) z = z+2*ulp; else
445
               if(x>z*z ...chopped) z = z+ulp;
446
               break;
447
        }
448
 
449
        Remark 3. The above comparisons can be done in fixed point. For
450
        example, to compare x and w=z*z chopped, it suffices to compare
451
        x1 and w1 (the trailing parts of x and w), regarding them as
452
        two's complement integers.
453
 
454
        ...Is z an exact square root?
455
        To determine whether z is an exact square root of x, let z1 be the
456
        trailing part of z, and also let x0 and x1 be the leading and
457
        trailing parts of x.
458
 
459
        If ((z1&0x03ffffff)!=0) ... not exact if trailing 26 bits of z!=0
460
            I := 1;             ... Raise Inexact flag: z is not exact
461
        else {
462
            j := 1 - [(x0>>20)&1]       ... j = logb(x) mod 2
463
            k := z1 >> 26;              ... get z's 25-th and 26-th
464
                                            fraction bits
465
            I := i or (k&j) or ((k&(j+j+1))!=(x1&3));
466
        }
467
        R:= r           ... restore rounded mode
468
        return sqrt(x):=z.
469
 
470
        If multiplication is cheaper then the foregoing red tape, the
471
        Inexact flag can be evaluated by
472
 
473
            I := i;
474
            I := (z*z!=x) or I.
475
 
476
        Note that z*z can overwrite I; this value must be sensed if it is
477
        True.
478
 
479
        Remark 4. If z*z = x exactly, then bit 25 to bit 0 of z1 must be
480
        zero.
481
 
482
                    --------------------
483
                z1: |        f2        |
484
                    --------------------
485
                bit 31             bit 0
486
 
487
        Further more, bit 27 and 26 of z1, bit 0 and 1 of x1, and the odd
488
        or even of logb(x) have the following relations:
489
 
490
        -------------------------------------------------
491
        bit 27,26 of z1         bit 1,0 of x1   logb(x)
492
        -------------------------------------------------
493
        00                      00              odd and even
494
        01                      01              even
495
        10                      10              odd
496
        10                      00              even
497
        11                      01              even
498
        -------------------------------------------------
499
 
500
    (4) Special cases (see (4) of Section A).
501
 
502
 */
503
 
504
 
505
#endif // ifdef CYGPKG_LIBM     
506
 
507
// EOF e_sqrt.c

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.