OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libjava/] [classpath/] [native/] [fdlibm/] [e_sqrt.c] - Blame information for rev 774

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 774 jeremybenn
/* @(#)e_sqrt.c 1.3 95/01/18 */
2
/*
3
 * ====================================================
4
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
5
 *
6
 * Developed at SunSoft, a Sun Microsystems, Inc. business.
7
 * Permission to use, copy, modify, and distribute this
8
 * software is freely granted, provided that this notice
9
 * is preserved.
10
 * ====================================================
11
 */
12
 
13
/* __ieee754_sqrt(x)
14
 * Return correctly rounded sqrt.
15
 *           ------------------------------------------
16
 *           |  Use the hardware sqrt if you have one |
17
 *           ------------------------------------------
18
 * Method:
19
 *   Bit by bit method using integer arithmetic. (Slow, but portable)
20
 *   1. Normalization
21
 *      Scale x to y in [1,4) with even powers of 2:
22
 *      find an integer k such that  1 <= (y=x*2^(2k)) < 4, then
23
 *              sqrt(x) = 2^k * sqrt(y)
24
 *   2. Bit by bit computation
25
 *      Let q  = sqrt(y) truncated to i bit after binary point (q = 1),
26
 *           i                                                   0
27
 *                                     i+1         2
28
 *          s  = 2*q , and      y  =  2   * ( y - q  ).         (1)
29
 *           i      i            i                 i
30
 *
31
 *      To compute q    from q , one checks whether
32
 *                  i+1       i
33
 *
34
 *                            -(i+1) 2
35
 *                      (q + 2      ) <= y.                     (2)
36
 *                        i
37
 *                                                            -(i+1)
38
 *      If (2) is false, then q   = q ; otherwise q   = q  + 2      .
39
 *                             i+1   i             i+1   i
40
 *
41
 *      With some algebric manipulation, it is not difficult to see
42
 *      that (2) is equivalent to
43
 *                             -(i+1)
44
 *                      s  +  2       <= y                      (3)
45
 *                       i                i
46
 *
47
 *      The advantage of (3) is that s  and y  can be computed by
48
 *                                    i      i
49
 *      the following recurrence formula:
50
 *          if (3) is false
51
 *
52
 *          s     =  s  ,       y    = y   ;                    (4)
53
 *           i+1      i          i+1    i
54
 *
55
 *          otherwise,
56
 *                         -i                     -(i+1)
57
 *          s     =  s  + 2  ,  y    = y  -  s  - 2             (5)
58
 *           i+1      i          i+1    i     i
59
 *
60
 *      One may easily use induction to prove (4) and (5).
61
 *      Note. Since the left hand side of (3) contain only i+2 bits,
62
 *            it does not necessary to do a full (53-bit) comparison
63
 *            in (3).
64
 *   3. Final rounding
65
 *      After generating the 53 bits result, we compute one more bit.
66
 *      Together with the remainder, we can decide whether the
67
 *      result is exact, bigger than 1/2ulp, or less than 1/2ulp
68
 *      (it will never equal to 1/2ulp).
69
 *      The rounding mode can be detected by checking whether
70
 *      huge + tiny is equal to huge, and whether huge - tiny is
71
 *      equal to huge for some floating point number "huge" and "tiny".
72
 *
73
 * Special cases:
74
 *      sqrt(+-0) = +-0         ... exact
75
 *      sqrt(inf) = inf
76
 *      sqrt(-ve) = NaN         ... with invalid signal
77
 *      sqrt(NaN) = NaN         ... with invalid signal for signaling NaN
78
 *
79
 * Other methods : see the appended file at the end of the program below.
80
 *---------------
81
 */
82
 
83
#include "fdlibm.h"
84
 
85
#ifndef _DOUBLE_IS_32BITS
86
 
87
#ifdef __STDC__
88
static  const double    one     = 1.0, tiny=1.0e-300;
89
#else
90
static  double  one     = 1.0, tiny=1.0e-300;
91
#endif
92
 
93
#ifdef __STDC__
94
        double __ieee754_sqrt(double x)
95
#else
96
        double __ieee754_sqrt(x)
97
        double x;
98
#endif
99
{
100
        double z;
101
        int32_t         sign = (int)0x80000000;
102
        uint32_t r,t1,s1,ix1,q1;
103
        int32_t ix0,s0,q,m,t,i;
104
 
105
        EXTRACT_WORDS(ix0,ix1,x);
106
 
107
    /* take care of Inf and NaN */
108
        if((ix0&0x7ff00000)==0x7ff00000) {
109
            return x*x+x;               /* sqrt(NaN)=NaN, sqrt(+inf)=+inf
110
                                           sqrt(-inf)=sNaN */
111
        }
112
    /* take care of zero */
113
        if(ix0<=0) {
114
            if(((ix0&(~sign))|ix1)==0) return x;/* sqrt(+-0) = +-0 */
115
            else if(ix0<0)
116
                return (x-x)/(x-x);             /* sqrt(-ve) = sNaN */
117
        }
118
    /* normalize x */
119
        m = (ix0>>20);
120
        if(m==0) {                               /* subnormal x */
121
            while(ix0==0) {
122
                m -= 21;
123
                ix0 |= (ix1>>11); ix1 <<= 21;
124
            }
125
            for(i=0;(ix0&0x00100000)==0;i++) ix0<<=1;
126
            m -= i-1;
127
            ix0 |= (ix1>>(32-i));
128
            ix1 <<= i;
129
        }
130
        m -= 1023;      /* unbias exponent */
131
        ix0 = (ix0&0x000fffff)|0x00100000;
132
        if(m&1){        /* odd m, double x to make it even */
133
            ix0 += ix0 + ((ix1&sign)>>31);
134
            ix1 += ix1;
135
        }
136
        m >>= 1;        /* m = [m/2] */
137
 
138
    /* generate sqrt(x) bit by bit */
139
        ix0 += ix0 + ((ix1&sign)>>31);
140
        ix1 += ix1;
141
        q = q1 = s0 = s1 = 0;    /* [q,q1] = sqrt(x) */
142
        r = 0x00200000;         /* r = moving bit from right to left */
143
 
144
        while(r!=0) {
145
            t = s0+r;
146
            if(t<=ix0) {
147
                s0   = t+r;
148
                ix0 -= t;
149
                q   += r;
150
            }
151
            ix0 += ix0 + ((ix1&sign)>>31);
152
            ix1 += ix1;
153
            r>>=1;
154
        }
155
 
156
        r = sign;
157
        while(r!=0) {
158
            t1 = s1+r;
159
            t  = s0;
160
            if((t<ix0)||((t==ix0)&&(t1<=ix1))) {
161
                s1  = t1+r;
162
                if(((t1&sign)==(uint32_t)sign)&&(s1&sign)==0) s0 += 1;
163
                ix0 -= t;
164
                if (ix1 < t1) ix0 -= 1;
165
                ix1 -= t1;
166
                q1  += r;
167
            }
168
            ix0 += ix0 + ((ix1&sign)>>31);
169
            ix1 += ix1;
170
            r>>=1;
171
        }
172
 
173
    /* use floating add to find out rounding direction */
174
        if((ix0|ix1)!=0) {
175
            z = one-tiny; /* trigger inexact flag */
176
            if (z>=one) {
177
                z = one+tiny;
178
                if (q1==(uint32_t)0xffffffff) { q1=0; q += 1;}
179
                else if (z>one) {
180
                    if (q1==(uint32_t)0xfffffffe) q+=1;
181
                    q1+=2;
182
                } else
183
                    q1 += (q1&1);
184
            }
185
        }
186
        ix0 = (q>>1)+0x3fe00000;
187
        ix1 =  q1>>1;
188
        if ((q&1)==1) ix1 |= sign;
189
        ix0 += (m <<20);
190
        INSERT_WORDS(z,ix0,ix1);
191
        return z;
192
}
193
#endif /* defined(_DOUBLE_IS_32BITS) */
194
 
195
/*
196
Other methods  (use floating-point arithmetic)
197
-------------
198
(This is a copy of a drafted paper by Prof W. Kahan
199
and K.C. Ng, written in May, 1986)
200
 
201
        Two algorithms are given here to implement sqrt(x)
202
        (IEEE double precision arithmetic) in software.
203
        Both supply sqrt(x) correctly rounded. The first algorithm (in
204
        Section A) uses newton iterations and involves four divisions.
205
        The second one uses reciproot iterations to avoid division, but
206
        requires more multiplications. Both algorithms need the ability
207
        to chop results of arithmetic operations instead of round them,
208
        and the INEXACT flag to indicate when an arithmetic operation
209
        is executed exactly with no roundoff error, all part of the
210
        standard (IEEE 754-1985). The ability to perform shift, add,
211
        subtract and logical AND operations upon 32-bit words is needed
212
        too, though not part of the standard.
213
 
214
A.  sqrt(x) by Newton Iteration
215
 
216
   (1)  Initial approximation
217
 
218
        Let x0 and x1 be the leading and the trailing 32-bit words of
219
        a floating point number x (in IEEE double format) respectively
220
 
221
            1    11                  52                           ...widths
222
           ------------------------------------------------------
223
        x: |s|    e     |             f                         |
224
           ------------------------------------------------------
225
              msb    lsb  msb                                 lsb ...order
226
 
227
 
228
             ------------------------        ------------------------
229
        x0:  |s|   e    |    f1     |    x1: |          f2           |
230
             ------------------------        ------------------------
231
 
232
        By performing shifts and subtracts on x0 and x1 (both regarded
233
        as integers), we obtain an 8-bit approximation of sqrt(x) as
234
        follows.
235
 
236
                k  := (x0>>1) + 0x1ff80000;
237
                y0 := k - T1[31&(k>>15)].       ... y ~ sqrt(x) to 8 bits
238
        Here k is a 32-bit integer and T1[] is an integer array containing
239
        correction terms. Now magically the floating value of y (y's
240
        leading 32-bit word is y0, the value of its trailing word is 0)
241
        approximates sqrt(x) to almost 8-bit.
242
 
243
        Value of T1:
244
        static int T1[32]= {
245
        0,      1024,   3062,   5746,   9193,   13348,  18162,  23592,
246
        29598,  36145,  43202,  50740,  58733,  67158,  75992,  85215,
247
        83599,  71378,  60428,  50647,  41945,  34246,  27478,  21581,
248
        16499,  12183,  8588,   5674,   3403,   1742,   661,    130,};
249
 
250
    (2) Iterative refinement
251
 
252
        Apply Heron's rule three times to y, we have y approximates
253
        sqrt(x) to within 1 ulp (Unit in the Last Place):
254
 
255
                y := (y+x/y)/2          ... almost 17 sig. bits
256
                y := (y+x/y)/2          ... almost 35 sig. bits
257
                y := y-(y-x/y)/2        ... within 1 ulp
258
 
259
 
260
        Remark 1.
261
            Another way to improve y to within 1 ulp is:
262
 
263
                y := (y+x/y)            ... almost 17 sig. bits to 2*sqrt(x)
264
                y := y - 0x00100006     ... almost 18 sig. bits to sqrt(x)
265
 
266
                                2
267
                            (x-y )*y
268
                y := y + 2* ----------  ...within 1 ulp
269
                               2
270
                             3y  + x
271
 
272
 
273
        This formula has one division fewer than the one above; however,
274
        it requires more multiplications and additions. Also x must be
275
        scaled in advance to avoid spurious overflow in evaluating the
276
        expression 3y*y+x. Hence it is not recommended uless division
277
        is slow. If division is very slow, then one should use the
278
        reciproot algorithm given in section B.
279
 
280
    (3) Final adjustment
281
 
282
        By twiddling y's last bit it is possible to force y to be
283
        correctly rounded according to the prevailing rounding mode
284
        as follows. Let r and i be copies of the rounding mode and
285
        inexact flag before entering the square root program. Also we
286
        use the expression y+-ulp for the next representable floating
287
        numbers (up and down) of y. Note that y+-ulp = either fixed
288
        point y+-1, or multiply y by nextafter(1,+-inf) in chopped
289
        mode.
290
 
291
                I := FALSE;     ... reset INEXACT flag I
292
                R := RZ;        ... set rounding mode to round-toward-zero
293
                z := x/y;       ... chopped quotient, possibly inexact
294
                If(not I) then {        ... if the quotient is exact
295
                    if(z=y) {
296
                        I := i;  ... restore inexact flag
297
                        R := r;  ... restore rounded mode
298
                        return sqrt(x):=y.
299
                    } else {
300
                        z := z - ulp;   ... special rounding
301
                    }
302
                }
303
                i := TRUE;              ... sqrt(x) is inexact
304
                If (r=RN) then z=z+ulp  ... rounded-to-nearest
305
                If (r=RP) then {        ... round-toward-+inf
306
                    y = y+ulp; z=z+ulp;
307
                }
308
                y := y+z;               ... chopped sum
309
                y0:=y0-0x00100000;      ... y := y/2 is correctly rounded.
310
                I := i;                 ... restore inexact flag
311
                R := r;                 ... restore rounded mode
312
                return sqrt(x):=y.
313
 
314
    (4) Special cases
315
 
316
        Square root of +inf, +-0, or NaN is itself;
317
        Square root of a negative number is NaN with invalid signal.
318
 
319
 
320
B.  sqrt(x) by Reciproot Iteration
321
 
322
   (1)  Initial approximation
323
 
324
        Let x0 and x1 be the leading and the trailing 32-bit words of
325
        a floating point number x (in IEEE double format) respectively
326
        (see section A). By performing shifs and subtracts on x0 and y0,
327
        we obtain a 7.8-bit approximation of 1/sqrt(x) as follows.
328
 
329
            k := 0x5fe80000 - (x0>>1);
330
            y0:= k - T2[63&(k>>14)].    ... y ~ 1/sqrt(x) to 7.8 bits
331
 
332
        Here k is a 32-bit integer and T2[] is an integer array
333
        containing correction terms. Now magically the floating
334
        value of y (y's leading 32-bit word is y0, the value of
335
        its trailing word y1 is set to zero) approximates 1/sqrt(x)
336
        to almost 7.8-bit.
337
 
338
        Value of T2:
339
        static int T2[64]= {
340
        0x1500, 0x2ef8, 0x4d67, 0x6b02, 0x87be, 0xa395, 0xbe7a, 0xd866,
341
        0xf14a, 0x1091b,0x11fcd,0x13552,0x14999,0x15c98,0x16e34,0x17e5f,
342
        0x18d03,0x19a01,0x1a545,0x1ae8a,0x1b5c4,0x1bb01,0x1bfde,0x1c28d,
343
        0x1c2de,0x1c0db,0x1ba73,0x1b11c,0x1a4b5,0x1953d,0x18266,0x16be0,
344
        0x1683e,0x179d8,0x18a4d,0x19992,0x1a789,0x1b445,0x1bf61,0x1c989,
345
        0x1d16d,0x1d77b,0x1dddf,0x1e2ad,0x1e5bf,0x1e6e8,0x1e654,0x1e3cd,
346
        0x1df2a,0x1d635,0x1cb16,0x1be2c,0x1ae4e,0x19bde,0x1868e,0x16e2e,
347
        0x1527f,0x1334a,0x11051,0xe951, 0xbe01, 0x8e0d, 0x5924, 0x1edd,};
348
 
349
    (2) Iterative refinement
350
 
351
        Apply Reciproot iteration three times to y and multiply the
352
        result by x to get an approximation z that matches sqrt(x)
353
        to about 1 ulp. To be exact, we will have
354
                -1ulp < sqrt(x)-z<1.0625ulp.
355
 
356
        ... set rounding mode to Round-to-nearest
357
           y := y*(1.5-0.5*x*y*y)       ... almost 15 sig. bits to 1/sqrt(x)
358
           y := y*((1.5-2^-30)+0.5*x*y*y)... about 29 sig. bits to 1/sqrt(x)
359
        ... special arrangement for better accuracy
360
           z := x*y                     ... 29 bits to sqrt(x), with z*y<1
361
           z := z + 0.5*z*(1-z*y)       ... about 1 ulp to sqrt(x)
362
 
363
        Remark 2. The constant 1.5-2^-30 is chosen to bias the error so that
364
        (a) the term z*y in the final iteration is always less than 1;
365
        (b) the error in the final result is biased upward so that
366
                -1 ulp < sqrt(x) - z < 1.0625 ulp
367
            instead of |sqrt(x)-z|<1.03125ulp.
368
 
369
    (3) Final adjustment
370
 
371
        By twiddling y's last bit it is possible to force y to be
372
        correctly rounded according to the prevailing rounding mode
373
        as follows. Let r and i be copies of the rounding mode and
374
        inexact flag before entering the square root program. Also we
375
        use the expression y+-ulp for the next representable floating
376
        numbers (up and down) of y. Note that y+-ulp = either fixed
377
        point y+-1, or multiply y by nextafter(1,+-inf) in chopped
378
        mode.
379
 
380
        R := RZ;                ... set rounding mode to round-toward-zero
381
        switch(r) {
382
            case RN:            ... round-to-nearest
383
               if(x<= z*(z-ulp)...chopped) z = z - ulp; else
384
               if(x<= z*(z+ulp)...chopped) z = z; else z = z+ulp;
385
               break;
386
            case RZ:case RM:    ... round-to-zero or round-to--inf
387
               R:=RP;           ... reset rounding mod to round-to-+inf
388
               if(x<z*z ... rounded up) z = z - ulp; else
389
               if(x>=(z+ulp)*(z+ulp) ...rounded up) z = z+ulp;
390
               break;
391
            case RP:            ... round-to-+inf
392
               if(x>(z+ulp)*(z+ulp)...chopped) z = z+2*ulp; else
393
               if(x>z*z ...chopped) z = z+ulp;
394
               break;
395
        }
396
 
397
        Remark 3. The above comparisons can be done in fixed point. For
398
        example, to compare x and w=z*z chopped, it suffices to compare
399
        x1 and w1 (the trailing parts of x and w), regarding them as
400
        two's complement integers.
401
 
402
        ...Is z an exact square root?
403
        To determine whether z is an exact square root of x, let z1 be the
404
        trailing part of z, and also let x0 and x1 be the leading and
405
        trailing parts of x.
406
 
407
        If ((z1&0x03ffffff)!=0) ... not exact if trailing 26 bits of z!=0
408
            I := 1;             ... Raise Inexact flag: z is not exact
409
        else {
410
            j := 1 - [(x0>>20)&1]       ... j = logb(x) mod 2
411
            k := z1 >> 26;              ... get z's 25-th and 26-th
412
                                            fraction bits
413
            I := i or (k&j) or ((k&(j+j+1))!=(x1&3));
414
        }
415
        R:= r           ... restore rounded mode
416
        return sqrt(x):=z.
417
 
418
        If multiplication is cheaper then the foregoing red tape, the
419
        Inexact flag can be evaluated by
420
 
421
            I := i;
422
            I := (z*z!=x) or I.
423
 
424
        Note that z*z can overwrite I; this value must be sensed if it is
425
        True.
426
 
427
        Remark 4. If z*z = x exactly, then bit 25 to bit 0 of z1 must be
428
        zero.
429
 
430
                    --------------------
431
                z1: |        f2        |
432
                    --------------------
433
                bit 31             bit 0
434
 
435
        Further more, bit 27 and 26 of z1, bit 0 and 1 of x1, and the odd
436
        or even of logb(x) have the following relations:
437
 
438
        -------------------------------------------------
439
        bit 27,26 of z1         bit 1,0 of x1   logb(x)
440
        -------------------------------------------------
441
        00                      00              odd and even
442
        01                      01              even
443
        10                      10              odd
444
        10                      00              even
445
        11                      01              even
446
        -------------------------------------------------
447
 
448
    (4) Special cases (see (4) of Section A).
449
 
450
 */
451
 

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.