OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [rtos/] [ecos-3.0/] [packages/] [language/] [c/] [libm/] [current/] [src/] [double/] [ieee754-core/] [e_sqrt.c] - Blame information for rev 786

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 786 skrzyp
//===========================================================================
2
//
3
//      e_sqrt.c
4
//
5
//      Part of the standard mathematical function library
6
//
7
//===========================================================================
8
// ####ECOSGPLCOPYRIGHTBEGIN####                                            
9
// -------------------------------------------                              
10
// This file is part of eCos, the Embedded Configurable Operating System.   
11
// Copyright (C) 1998, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
12
//
13
// eCos is free software; you can redistribute it and/or modify it under    
14
// the terms of the GNU General Public License as published by the Free     
15
// Software Foundation; either version 2 or (at your option) any later      
16
// version.                                                                 
17
//
18
// eCos is distributed in the hope that it will be useful, but WITHOUT      
19
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or    
20
// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License    
21
// for more details.                                                        
22
//
23
// You should have received a copy of the GNU General Public License        
24
// along with eCos; if not, write to the Free Software Foundation, Inc.,    
25
// 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.            
26
//
27
// As a special exception, if other files instantiate templates or use      
28
// macros or inline functions from this file, or you compile this file      
29
// and link it with other works to produce a work based on this file,       
30
// this file does not by itself cause the resulting work to be covered by   
31
// the GNU General Public License. However the source code for this file    
32
// must still be made available in accordance with section (3) of the GNU   
33
// General Public License v2.                                               
34
//
35
// This exception does not invalidate any other reasons why a work based    
36
// on this file might be covered by the GNU General Public License.         
37
// -------------------------------------------                              
38
// ####ECOSGPLCOPYRIGHTEND####                                              
39
//===========================================================================
40
//#####DESCRIPTIONBEGIN####
41
//
42
// Author(s):   jlarmour
43
// Contributors:  jlarmour
44
// Date:        1998-02-13
45
// Purpose:     
46
// Description: 
47
// Usage:       
48
//
49
//####DESCRIPTIONEND####
50
//
51
//===========================================================================
52
 
53
// CONFIGURATION
54
 
55
#include <pkgconf/libm.h>   // Configuration header
56
 
57
// Include the Math library?
58
#ifdef CYGPKG_LIBM     
59
 
60
// Derived from code with the following copyright
61
 
62
/* @(#)e_sqrt.c 1.3 95/01/18 */
63
/*
64
 * ====================================================
65
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
66
 *
67
 * Developed at SunSoft, a Sun Microsystems, Inc. business.
68
 * Permission to use, copy, modify, and distribute this
69
 * software is freely granted, provided that this notice
70
 * is preserved.
71
 * ====================================================
72
 */
73
 
74
/* __ieee754_sqrt(x)
75
 * Return correctly rounded sqrt.
76
 *           ------------------------------------------
77
 *           |  Use the hardware sqrt if you have one |
78
 *           ------------------------------------------
79
 * Method:
80
 *   Bit by bit method using integer arithmetic. (Slow, but portable)
81
 *   1. Normalization
82
 *      Scale x to y in [1,4) with even powers of 2:
83
 *      find an integer k such that  1 <= (y=x*2^(2k)) < 4, then
84
 *              sqrt(x) = 2^k * sqrt(y)
85
 *   2. Bit by bit computation
86
 *      Let q  = sqrt(y) truncated to i bit after binary point (q = 1),
87
 *           i                                                   0
88
 *                                     i+1         2
89
 *          s  = 2*q , and      y  =  2   * ( y - q  ).         (1)
90
 *           i      i            i                 i
91
 *
92
 *      To compute q    from q , one checks whether
93
 *                  i+1       i
94
 *
95
 *                            -(i+1) 2
96
 *                      (q + 2      ) <= y.                     (2)
97
 *                        i
98
 *                                                            -(i+1)
99
 *      If (2) is false, then q   = q ; otherwise q   = q  + 2      .
100
 *                             i+1   i             i+1   i
101
 *
102
 *      With some algebric manipulation, it is not difficult to see
103
 *      that (2) is equivalent to
104
 *                             -(i+1)
105
 *                      s  +  2       <= y                      (3)
106
 *                       i                i
107
 *
108
 *      The advantage of (3) is that s  and y  can be computed by
109
 *                                    i      i
110
 *      the following recurrence formula:
111
 *          if (3) is false
112
 *
113
 *          s     =  s  ,       y    = y   ;                    (4)
114
 *           i+1      i          i+1    i
115
 *
116
 *          otherwise,
117
 *                         -i                     -(i+1)
118
 *          s     =  s  + 2  ,  y    = y  -  s  - 2             (5)
119
 *           i+1      i          i+1    i     i
120
 *
121
 *      One may easily use induction to prove (4) and (5).
122
 *      Note. Since the left hand side of (3) contain only i+2 bits,
123
 *            it does not necessary to do a full (53-bit) comparison
124
 *            in (3).
125
 *   3. Final rounding
126
 *      After generating the 53 bits result, we compute one more bit.
127
 *      Together with the remainder, we can decide whether the
128
 *      result is exact, bigger than 1/2ulp, or less than 1/2ulp
129
 *      (it will never equal to 1/2ulp).
130
 *      The rounding mode can be detected by checking whether
131
 *      huge + tiny is equal to huge, and whether huge - tiny is
132
 *      equal to huge for some floating point number "huge" and "tiny".
133
 *
134
 * Special cases:
135
 *      sqrt(+-0) = +-0         ... exact
136
 *      sqrt(inf) = inf
137
 *      sqrt(-ve) = NaN         ... with invalid signal
138
 *      sqrt(NaN) = NaN         ... with invalid signal for signaling NaN
139
 *
140
 * Other methods : see the appended file at the end of the program below.
141
 *---------------
142
 */
143
 
144
#include "mathincl/fdlibm.h"
145
 
146
static  const double    one     = 1.0, tiny=1.0e-300;
147
 
148
        double __ieee754_sqrt(double x)
149
{
150
        double z;
151
        int     sign = (int)0x80000000;
152
        unsigned r,t1,s1,ix1,q1;
153
        int ix0,s0,q,m,t,i;
154
 
155
        ix0 = CYG_LIBM_HI(x);                   /* high word of x */
156
        ix1 = CYG_LIBM_LO(x);           /* low word of x */
157
 
158
    /* take care of Inf and NaN */
159
        if((ix0&0x7ff00000)==0x7ff00000) {
160
            return x*x+x;               /* sqrt(NaN)=NaN, sqrt(+inf)=+inf
161
                                           sqrt(-inf)=sNaN */
162
        }
163
    /* take care of zero */
164
        if(ix0<=0) {
165
            if(((ix0&(~sign))|ix1)==0) return x;/* sqrt(+-0) = +-0 */
166
            else if(ix0<0)
167
                return (x-x)/(x-x);             /* sqrt(-ve) = sNaN */
168
        }
169
    /* normalize x */
170
        m = (ix0>>20);
171
        if(m==0) {                              /* subnormal x */
172
            while(ix0==0) {
173
                m -= 21;
174
                ix0 |= (ix1>>11); ix1 <<= 21;
175
            }
176
            for(i=0;(ix0&0x00100000)==0;i++) ix0<<=1;
177
            m -= i-1;
178
            ix0 |= (ix1>>(32-i));
179
            ix1 <<= i;
180
        }
181
        m -= 1023;      /* unbias exponent */
182
        ix0 = (ix0&0x000fffff)|0x00100000;
183
        if(m&1){        /* odd m, double x to make it even */
184
            ix0 += ix0 + ((ix1&sign)>>31);
185
            ix1 += ix1;
186
        }
187
        m >>= 1;        /* m = [m/2] */
188
 
189
    /* generate sqrt(x) bit by bit */
190
        ix0 += ix0 + ((ix1&sign)>>31);
191
        ix1 += ix1;
192
        q = q1 = s0 = s1 = 0;   /* [q,q1] = sqrt(x) */
193
        r = 0x00200000;         /* r = moving bit from right to left */
194
 
195
        while(r!=0) {
196
            t = s0+r;
197
            if(t<=ix0) {
198
                s0   = t+r;
199
                ix0 -= t;
200
                q   += r;
201
            }
202
            ix0 += ix0 + ((ix1&sign)>>31);
203
            ix1 += ix1;
204
            r>>=1;
205
        }
206
 
207
        r = sign;
208
        while(r!=0) {
209
            t1 = s1+r;
210
            t  = s0;
211
            if((t<ix0)||((t==ix0)&&(t1<=ix1))) {
212
                s1  = t1+r;
213
                if(((t1&sign)==(unsigned)sign)&&(s1&sign)==0) s0 += 1;
214
                ix0 -= t;
215
                if (ix1 < t1) ix0 -= 1;
216
                ix1 -= t1;
217
                q1  += r;
218
            }
219
            ix0 += ix0 + ((ix1&sign)>>31);
220
            ix1 += ix1;
221
            r>>=1;
222
        }
223
 
224
    /* use floating add to find out rounding direction */
225
        if((ix0|ix1)!=0) {
226
            z = one-tiny; /* trigger inexact flag */
227
            if (z>=one) {
228
                z = one+tiny;
229
                if (q1==(unsigned)0xffffffff) { q1=0; q += 1;}
230
                else if (z>one) {
231
                    if (q1==(unsigned)0xfffffffe) q+=1;
232
                    q1+=2;
233
                } else
234
                    q1 += (q1&1);
235
            }
236
        }
237
        ix0 = (q>>1)+0x3fe00000;
238
        ix1 =  q1>>1;
239
        if ((q&1)==1) ix1 |= sign;
240
        ix0 += (m <<20);
241
        CYG_LIBM_HI(z) = ix0;
242
        CYG_LIBM_LO(z) = ix1;
243
        return z;
244
}
245
 
246
/*
247
Other methods  (use floating-point arithmetic)
248
-------------
249
(This is a copy of a drafted paper by Prof W. Kahan
250
and K.C. Ng, written in May, 1986)
251
 
252
        Two algorithms are given here to implement sqrt(x)
253
        (IEEE double precision arithmetic) in software.
254
        Both supply sqrt(x) correctly rounded. The first algorithm (in
255
        Section A) uses newton iterations and involves four divisions.
256
        The second one uses reciproot iterations to avoid division, but
257
        requires more multiplications. Both algorithms need the ability
258
        to chop results of arithmetic operations instead of round them,
259
        and the INEXACT flag to indicate when an arithmetic operation
260
        is executed exactly with no roundoff error, all part of the
261
        standard (IEEE 754-1985). The ability to perform shift, add,
262
        subtract and logical AND operations upon 32-bit words is needed
263
        too, though not part of the standard.
264
 
265
A.  sqrt(x) by Newton Iteration
266
 
267
   (1)  Initial approximation
268
 
269
        Let x0 and x1 be the leading and the trailing 32-bit words of
270
        a floating point number x (in IEEE double format) respectively
271
 
272
            1    11                  52                           ...widths
273
           ------------------------------------------------------
274
        x: |s|    e     |             f                         |
275
           ------------------------------------------------------
276
              msb    lsb  msb                                 lsb ...order
277
 
278
 
279
             ------------------------        ------------------------
280
        x0:  |s|   e    |    f1     |    x1: |          f2           |
281
             ------------------------        ------------------------
282
 
283
        By performing shifts and subtracts on x0 and x1 (both regarded
284
        as integers), we obtain an 8-bit approximation of sqrt(x) as
285
        follows.
286
 
287
                k  := (x0>>1) + 0x1ff80000;
288
                y0 := k - T1[31&(k>>15)].       ... y ~ sqrt(x) to 8 bits
289
        Here k is a 32-bit integer and T1[] is an integer array containing
290
        correction terms. Now magically the floating value of y (y's
291
        leading 32-bit word is y0, the value of its trailing word is 0)
292
        approximates sqrt(x) to almost 8-bit.
293
 
294
        Value of T1:
295
        static int T1[32]= {
296
        0,      1024,   3062,   5746,   9193,   13348,  18162,  23592,
297
        29598,  36145,  43202,  50740,  58733,  67158,  75992,  85215,
298
        83599,  71378,  60428,  50647,  41945,  34246,  27478,  21581,
299
        16499,  12183,  8588,   5674,   3403,   1742,   661,    130,};
300
 
301
    (2) Iterative refinement
302
 
303
        Apply Heron's rule three times to y, we have y approximates
304
        sqrt(x) to within 1 ulp (Unit in the Last Place):
305
 
306
                y := (y+x/y)/2          ... almost 17 sig. bits
307
                y := (y+x/y)/2          ... almost 35 sig. bits
308
                y := y-(y-x/y)/2        ... within 1 ulp
309
 
310
 
311
        Remark 1.
312
            Another way to improve y to within 1 ulp is:
313
 
314
                y := (y+x/y)            ... almost 17 sig. bits to 2*sqrt(x)
315
                y := y - 0x00100006     ... almost 18 sig. bits to sqrt(x)
316
 
317
                                2
318
                            (x-y )*y
319
                y := y + 2* ----------  ...within 1 ulp
320
                               2
321
                             3y  + x
322
 
323
 
324
        This formula has one division fewer than the one above; however,
325
        it requires more multiplications and additions. Also x must be
326
        scaled in advance to avoid spurious overflow in evaluating the
327
        expression 3y*y+x. Hence it is not recommended uless division
328
        is slow. If division is very slow, then one should use the
329
        reciproot algorithm given in section B.
330
 
331
    (3) Final adjustment
332
 
333
        By twiddling y's last bit it is possible to force y to be
334
        correctly rounded according to the prevailing rounding mode
335
        as follows. Let r and i be copies of the rounding mode and
336
        inexact flag before entering the square root program. Also we
337
        use the expression y+-ulp for the next representable floating
338
        numbers (up and down) of y. Note that y+-ulp = either fixed
339
        point y+-1, or multiply y by nextafter(1,+-inf) in chopped
340
        mode.
341
 
342
                I := FALSE;     ... reset INEXACT flag I
343
                R := RZ;        ... set rounding mode to round-toward-zero
344
                z := x/y;       ... chopped quotient, possibly inexact
345
                If(not I) then {        ... if the quotient is exact
346
                    if(z=y) {
347
                        I := i;  ... restore inexact flag
348
                        R := r;  ... restore rounded mode
349
                        return sqrt(x):=y.
350
                    } else {
351
                        z := z - ulp;   ... special rounding
352
                    }
353
                }
354
                i := TRUE;              ... sqrt(x) is inexact
355
                If (r=RN) then z=z+ulp  ... rounded-to-nearest
356
                If (r=RP) then {        ... round-toward-+inf
357
                    y = y+ulp; z=z+ulp;
358
                }
359
                y := y+z;               ... chopped sum
360
                y0:=y0-0x00100000;      ... y := y/2 is correctly rounded.
361
                I := i;                 ... restore inexact flag
362
                R := r;                 ... restore rounded mode
363
                return sqrt(x):=y.
364
 
365
    (4) Special cases
366
 
367
        Square root of +inf, +-0, or NaN is itself;
368
        Square root of a negative number is NaN with invalid signal.
369
 
370
 
371
B.  sqrt(x) by Reciproot Iteration
372
 
373
   (1)  Initial approximation
374
 
375
        Let x0 and x1 be the leading and the trailing 32-bit words of
376
        a floating point number x (in IEEE double format) respectively
377
        (see section A). By performing shifs and subtracts on x0 and y0,
378
        we obtain a 7.8-bit approximation of 1/sqrt(x) as follows.
379
 
380
            k := 0x5fe80000 - (x0>>1);
381
            y0:= k - T2[63&(k>>14)].    ... y ~ 1/sqrt(x) to 7.8 bits
382
 
383
        Here k is a 32-bit integer and T2[] is an integer array
384
        containing correction terms. Now magically the floating
385
        value of y (y's leading 32-bit word is y0, the value of
386
        its trailing word y1 is set to zero) approximates 1/sqrt(x)
387
        to almost 7.8-bit.
388
 
389
        Value of T2:
390
        static int T2[64]= {
391
        0x1500, 0x2ef8, 0x4d67, 0x6b02, 0x87be, 0xa395, 0xbe7a, 0xd866,
392
        0xf14a, 0x1091b,0x11fcd,0x13552,0x14999,0x15c98,0x16e34,0x17e5f,
393
        0x18d03,0x19a01,0x1a545,0x1ae8a,0x1b5c4,0x1bb01,0x1bfde,0x1c28d,
394
        0x1c2de,0x1c0db,0x1ba73,0x1b11c,0x1a4b5,0x1953d,0x18266,0x16be0,
395
        0x1683e,0x179d8,0x18a4d,0x19992,0x1a789,0x1b445,0x1bf61,0x1c989,
396
        0x1d16d,0x1d77b,0x1dddf,0x1e2ad,0x1e5bf,0x1e6e8,0x1e654,0x1e3cd,
397
        0x1df2a,0x1d635,0x1cb16,0x1be2c,0x1ae4e,0x19bde,0x1868e,0x16e2e,
398
        0x1527f,0x1334a,0x11051,0xe951, 0xbe01, 0x8e0d, 0x5924, 0x1edd,};
399
 
400
    (2) Iterative refinement
401
 
402
        Apply Reciproot iteration three times to y and multiply the
403
        result by x to get an approximation z that matches sqrt(x)
404
        to about 1 ulp. To be exact, we will have
405
                -1ulp < sqrt(x)-z<1.0625ulp.
406
 
407
        ... set rounding mode to Round-to-nearest
408
           y := y*(1.5-0.5*x*y*y)       ... almost 15 sig. bits to 1/sqrt(x)
409
           y := y*((1.5-2^-30)+0.5*x*y*y)... about 29 sig. bits to 1/sqrt(x)
410
        ... special arrangement for better accuracy
411
           z := x*y                     ... 29 bits to sqrt(x), with z*y<1
412
           z := z + 0.5*z*(1-z*y)       ... about 1 ulp to sqrt(x)
413
 
414
        Remark 2. The constant 1.5-2^-30 is chosen to bias the error so that
415
        (a) the term z*y in the final iteration is always less than 1;
416
        (b) the error in the final result is biased upward so that
417
                -1 ulp < sqrt(x) - z < 1.0625 ulp
418
            instead of |sqrt(x)-z|<1.03125ulp.
419
 
420
    (3) Final adjustment
421
 
422
        By twiddling y's last bit it is possible to force y to be
423
        correctly rounded according to the prevailing rounding mode
424
        as follows. Let r and i be copies of the rounding mode and
425
        inexact flag before entering the square root program. Also we
426
        use the expression y+-ulp for the next representable floating
427
        numbers (up and down) of y. Note that y+-ulp = either fixed
428
        point y+-1, or multiply y by nextafter(1,+-inf) in chopped
429
        mode.
430
 
431
        R := RZ;                ... set rounding mode to round-toward-zero
432
        switch(r) {
433
            case RN:            ... round-to-nearest
434
               if(x<= z*(z-ulp)...chopped) z = z - ulp; else
435
               if(x<= z*(z+ulp)...chopped) z = z; else z = z+ulp;
436
               break;
437
            case RZ:case RM:    ... round-to-zero or round-to--inf
438
               R:=RP;           ... reset rounding mod to round-to-+inf
439
               if(x<z*z ... rounded up) z = z - ulp; else
440
               if(x>=(z+ulp)*(z+ulp) ...rounded up) z = z+ulp;
441
               break;
442
            case RP:            ... round-to-+inf
443
               if(x>(z+ulp)*(z+ulp)...chopped) z = z+2*ulp; else
444
               if(x>z*z ...chopped) z = z+ulp;
445
               break;
446
        }
447
 
448
        Remark 3. The above comparisons can be done in fixed point. For
449
        example, to compare x and w=z*z chopped, it suffices to compare
450
        x1 and w1 (the trailing parts of x and w), regarding them as
451
        two's complement integers.
452
 
453
        ...Is z an exact square root?
454
        To determine whether z is an exact square root of x, let z1 be the
455
        trailing part of z, and also let x0 and x1 be the leading and
456
        trailing parts of x.
457
 
458
        If ((z1&0x03ffffff)!=0) ... not exact if trailing 26 bits of z!=0
459
            I := 1;             ... Raise Inexact flag: z is not exact
460
        else {
461
            j := 1 - [(x0>>20)&1]       ... j = logb(x) mod 2
462
            k := z1 >> 26;              ... get z's 25-th and 26-th
463
                                            fraction bits
464
            I := i or (k&j) or ((k&(j+j+1))!=(x1&3));
465
        }
466
        R:= r           ... restore rounded mode
467
        return sqrt(x):=z.
468
 
469
        If multiplication is cheaper then the foregoing red tape, the
470
        Inexact flag can be evaluated by
471
 
472
            I := i;
473
            I := (z*z!=x) or I.
474
 
475
        Note that z*z can overwrite I; this value must be sensed if it is
476
        True.
477
 
478
        Remark 4. If z*z = x exactly, then bit 25 to bit 0 of z1 must be
479
        zero.
480
 
481
                    --------------------
482
                z1: |        f2        |
483
                    --------------------
484
                bit 31             bit 0
485
 
486
        Further more, bit 27 and 26 of z1, bit 0 and 1 of x1, and the odd
487
        or even of logb(x) have the following relations:
488
 
489
        -------------------------------------------------
490
        bit 27,26 of z1         bit 1,0 of x1   logb(x)
491
        -------------------------------------------------
492
        00                      00              odd and even
493
        01                      01              even
494
        10                      10              odd
495
        10                      00              even
496
        11                      01              even
497
        -------------------------------------------------
498
 
499
    (4) Special cases (see (4) of Section A).
500
 
501
 */
502
 
503
 
504
#endif // ifdef CYGPKG_LIBM     
505
 
506
// EOF e_sqrt.c

powered by: WebSVN 2.1.0

© copyright 1999-2025 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.