OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libgcc/] [config/] [libbid/] [bid64_sqrt.c] - Blame information for rev 849

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 734 jeremybenn
/* Copyright (C) 2007, 2009  Free Software Foundation, Inc.
2
 
3
This file is part of GCC.
4
 
5
GCC is free software; you can redistribute it and/or modify it under
6
the terms of the GNU General Public License as published by the Free
7
Software Foundation; either version 3, or (at your option) any later
8
version.
9
 
10
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
11
WARRANTY; without even the implied warranty of MERCHANTABILITY or
12
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
13
for more details.
14
 
15
Under Section 7 of GPL version 3, you are granted additional
16
permissions described in the GCC Runtime Library Exception, version
17
3.1, as published by the Free Software Foundation.
18
 
19
You should have received a copy of the GNU General Public License and
20
a copy of the GCC Runtime Library Exception along with this program;
21
see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
22
<http://www.gnu.org/licenses/>.  */
23
 
24
/*****************************************************************************
25
 *    BID64 square root
26
 *****************************************************************************
27
 *
28
 *  Algorithm description:
29
 *
30
 *  if(exponent_x is odd)
31
 *     scale coefficient_x by 10, adjust exponent
32
 *  - get lower estimate for number of digits in coefficient_x
33
 *  - scale coefficient x to between 31 and 33 decimal digits
34
 *  - in parallel, check for exact case and return if true
35
 *  - get high part of result coefficient using double precision sqrt
36
 *  - compute remainder and refine coefficient in one iteration (which
37
 *                                 modifies it by at most 1)
38
 *  - result exponent is easy to compute from the adjusted arg. exponent
39
 *
40
 ****************************************************************************/
41
 
42
#include "bid_internal.h"
43
#include "bid_sqrt_macros.h"
44
#ifdef UNCHANGED_BINARY_STATUS_FLAGS
45
#include <fenv.h>
46
 
47
#define FE_ALL_FLAGS FE_INVALID|FE_DIVBYZERO|FE_OVERFLOW|FE_UNDERFLOW|FE_INEXACT
48
#endif
49
 
50
extern double sqrt (double);
51
 
52
#if DECIMAL_CALL_BY_REFERENCE
53
 
54
void
55
bid64_sqrt (UINT64 * pres,
56
            UINT64 *
57
            px _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
58
            _EXC_INFO_PARAM) {
59
  UINT64 x;
60
#else
61
 
62
UINT64
63
bid64_sqrt (UINT64 x _RND_MODE_PARAM _EXC_FLAGS_PARAM
64
            _EXC_MASKS_PARAM _EXC_INFO_PARAM) {
65
#endif
66
  UINT128 CA, CT;
67
  UINT64 sign_x, coefficient_x;
68
  UINT64 Q, Q2, A10, C4, R, R2, QE, res;
69
  SINT64 D;
70
  int_double t_scale;
71
  int_float tempx;
72
  double da, dq, da_h, da_l, dqe;
73
  int exponent_x, exponent_q, bin_expon_cx;
74
  int digits_x;
75
  int scale;
76
#ifdef UNCHANGED_BINARY_STATUS_FLAGS
77
  fexcept_t binaryflags = 0;
78
#endif
79
 
80
#if DECIMAL_CALL_BY_REFERENCE
81
#if !DECIMAL_GLOBAL_ROUNDING
82
  _IDEC_round rnd_mode = *prnd_mode;
83
#endif
84
  x = *px;
85
#endif
86
 
87
  // unpack arguments, check for NaN or Infinity
88
  if (!unpack_BID64 (&sign_x, &exponent_x, &coefficient_x, x)) {
89
    // x is Inf. or NaN or 0
90
    if ((x & INFINITY_MASK64) == INFINITY_MASK64) {
91
      res = coefficient_x;
92
      if ((coefficient_x & SSNAN_MASK64) == SINFINITY_MASK64)   // -Infinity
93
      {
94
        res = NAN_MASK64;
95
#ifdef SET_STATUS_FLAGS
96
        __set_status_flags (pfpsf, INVALID_EXCEPTION);
97
#endif
98
      }
99
#ifdef SET_STATUS_FLAGS
100
      if ((x & SNAN_MASK64) == SNAN_MASK64)     // sNaN
101
        __set_status_flags (pfpsf, INVALID_EXCEPTION);
102
#endif
103
      BID_RETURN (res & QUIET_MASK64);
104
    }
105
    // x is 0
106
    exponent_x = (exponent_x + DECIMAL_EXPONENT_BIAS) >> 1;
107
    res = sign_x | (((UINT64) exponent_x) << 53);
108
    BID_RETURN (res);
109
  }
110
  // x<0?
111
  if (sign_x && coefficient_x) {
112
    res = NAN_MASK64;
113
#ifdef SET_STATUS_FLAGS
114
    __set_status_flags (pfpsf, INVALID_EXCEPTION);
115
#endif
116
    BID_RETURN (res);
117
  }
118
#ifdef UNCHANGED_BINARY_STATUS_FLAGS
119
  (void) fegetexceptflag (&binaryflags, FE_ALL_FLAGS);
120
#endif
121
  //--- get number of bits in the coefficient of x ---
122
  tempx.d = (float) coefficient_x;
123
  bin_expon_cx = ((tempx.i >> 23) & 0xff) - 0x7f;
124
  digits_x = estimate_decimal_digits[bin_expon_cx];
125
  // add test for range
126
  if (coefficient_x >= power10_index_binexp[bin_expon_cx])
127
    digits_x++;
128
 
129
  A10 = coefficient_x;
130
  if (exponent_x & 1) {
131
    A10 = (A10 << 2) + A10;
132
    A10 += A10;
133
  }
134
 
135
  dqe = sqrt ((double) A10);
136
  QE = (UINT32) dqe;
137
  if (QE * QE == A10) {
138
    res =
139
      very_fast_get_BID64 (0, (exponent_x + DECIMAL_EXPONENT_BIAS) >> 1,
140
                           QE);
141
#ifdef UNCHANGED_BINARY_STATUS_FLAGS
142
    (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
143
#endif
144
    BID_RETURN (res);
145
  }
146
  // if exponent is odd, scale coefficient by 10
147
  scale = 31 - digits_x;
148
  exponent_q = exponent_x - scale;
149
  scale += (exponent_q & 1);    // exp. bias is even
150
 
151
  CT = power10_table_128[scale];
152
  __mul_64x128_short (CA, coefficient_x, CT);
153
 
154
  // 2^64
155
  t_scale.i = 0x43f0000000000000ull;
156
  // convert CA to DP
157
  da_h = CA.w[1];
158
  da_l = CA.w[0];
159
  da = da_h * t_scale.d + da_l;
160
 
161
  dq = sqrt (da);
162
 
163
  Q = (UINT64) dq;
164
 
165
  // get sign(sqrt(CA)-Q)
166
  R = CA.w[0] - Q * Q;
167
  R = ((SINT64) R) >> 63;
168
  D = R + R + 1;
169
 
170
  exponent_q = (exponent_q + DECIMAL_EXPONENT_BIAS) >> 1;
171
 
172
#ifdef SET_STATUS_FLAGS
173
  __set_status_flags (pfpsf, INEXACT_EXCEPTION);
174
#endif
175
 
176
#ifndef IEEE_ROUND_NEAREST
177
#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
178
  if (!((rnd_mode) & 3)) {
179
#endif
180
#endif
181
 
182
    // midpoint to check
183
    Q2 = Q + Q + D;
184
    C4 = CA.w[0] << 2;
185
 
186
    // get sign(-sqrt(CA)+Midpoint)
187
    R2 = Q2 * Q2 - C4;
188
    R2 = ((SINT64) R2) >> 63;
189
 
190
    // adjust Q if R!=R2
191
    Q += (D & (R ^ R2));
192
#ifndef IEEE_ROUND_NEAREST
193
#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
194
  } else {
195
    C4 = CA.w[0];
196
    Q += D;
197
    if ((SINT64) (Q * Q - C4) > 0)
198
      Q--;
199
    if (rnd_mode == ROUNDING_UP)
200
      Q++;
201
  }
202
#endif
203
#endif
204
 
205
  res = fast_get_BID64 (0, exponent_q, Q);
206
#ifdef UNCHANGED_BINARY_STATUS_FLAGS
207
  (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
208
#endif
209
  BID_RETURN (res);
210
}
211
 
212
 
213
TYPE0_FUNCTION_ARG1 (UINT64, bid64q_sqrt, x)
214
 
215
     UINT256 M256, C4, C8;
216
     UINT128 CX, CX2, A10, S2, T128, CS, CSM, CS2, C256, CS1,
217
       mul_factor2_long = { {0x0ull, 0x0ull} }, QH, Tmp, TP128, Qh, Ql;
218
UINT64 sign_x, Carry, B10, res, mul_factor, mul_factor2 = 0x0ull, CS0;
219
SINT64 D;
220
int_float fx, f64;
221
int exponent_x, bin_expon_cx, done = 0;
222
int digits, scale, exponent_q = 0, exact = 1, amount, extra_digits;
223
#ifdef UNCHANGED_BINARY_STATUS_FLAGS
224
fexcept_t binaryflags = 0;
225
#endif
226
 
227
        // unpack arguments, check for NaN or Infinity
228
if (!unpack_BID128_value (&sign_x, &exponent_x, &CX, x)) {
229
  res = CX.w[1];
230
  // NaN ?
231
  if ((x.w[1] & 0x7c00000000000000ull) == 0x7c00000000000000ull) {
232
#ifdef SET_STATUS_FLAGS
233
    if ((x.w[1] & 0x7e00000000000000ull) == 0x7e00000000000000ull)      // sNaN
234
      __set_status_flags (pfpsf, INVALID_EXCEPTION);
235
#endif
236
    Tmp.w[1] = (CX.w[1] & 0x00003fffffffffffull);
237
    Tmp.w[0] = CX.w[0];
238
    TP128 = reciprocals10_128[18];
239
    __mul_128x128_full (Qh, Ql, Tmp, TP128);
240
    amount = recip_scale[18];
241
    __shr_128 (Tmp, Qh, amount);
242
    res = (CX.w[1] & 0xfc00000000000000ull) | Tmp.w[0];
243
    BID_RETURN (res);
244
  }
245
  // x is Infinity?
246
  if ((x.w[1] & 0x7800000000000000ull) == 0x7800000000000000ull) {
247
    if (sign_x) {
248
      // -Inf, return NaN
249
      res = 0x7c00000000000000ull;
250
#ifdef SET_STATUS_FLAGS
251
      __set_status_flags (pfpsf, INVALID_EXCEPTION);
252
#endif
253
    }
254
    BID_RETURN (res);
255
  }
256
  // x is 0 otherwise
257
 
258
  exponent_x =
259
    ((exponent_x - DECIMAL_EXPONENT_BIAS_128) >> 1) +
260
    DECIMAL_EXPONENT_BIAS;
261
  if (exponent_x < 0)
262
    exponent_x = 0;
263
  if (exponent_x > DECIMAL_MAX_EXPON_64)
264
    exponent_x = DECIMAL_MAX_EXPON_64;
265
  //res= sign_x | (((UINT64)exponent_x)<<53);
266
  res = get_BID64 (sign_x, exponent_x, 0, rnd_mode, pfpsf);
267
  BID_RETURN (res);
268
}
269
if (sign_x) {
270
  res = 0x7c00000000000000ull;
271
#ifdef SET_STATUS_FLAGS
272
  __set_status_flags (pfpsf, INVALID_EXCEPTION);
273
#endif
274
  BID_RETURN (res);
275
}
276
#ifdef UNCHANGED_BINARY_STATUS_FLAGS
277
(void) fegetexceptflag (&binaryflags, FE_ALL_FLAGS);
278
#endif
279
 
280
           // 2^64
281
f64.i = 0x5f800000;
282
 
283
           // fx ~ CX
284
fx.d = (float) CX.w[1] * f64.d + (float) CX.w[0];
285
bin_expon_cx = ((fx.i >> 23) & 0xff) - 0x7f;
286
digits = estimate_decimal_digits[bin_expon_cx];
287
 
288
A10 = CX;
289
if (exponent_x & 1) {
290
  A10.w[1] = (CX.w[1] << 3) | (CX.w[0] >> 61);
291
  A10.w[0] = CX.w[0] << 3;
292
  CX2.w[1] = (CX.w[1] << 1) | (CX.w[0] >> 63);
293
  CX2.w[0] = CX.w[0] << 1;
294
  __add_128_128 (A10, A10, CX2);
295
}
296
 
297
C256.w[1] = A10.w[1];
298
C256.w[0] = A10.w[0];
299
CS.w[0] = short_sqrt128 (A10);
300
CS.w[1] = 0;
301
mul_factor = 0;
302
           // check for exact result  
303
if (CS.w[0] < 10000000000000000ull) {
304
  if (CS.w[0] * CS.w[0] == A10.w[0]) {
305
    __sqr64_fast (S2, CS.w[0]);
306
    if (S2.w[1] == A10.w[1])    // && S2.w[0]==A10.w[0])
307
    {
308
      res =
309
        get_BID64 (0,
310
                   ((exponent_x - DECIMAL_EXPONENT_BIAS_128) >> 1) +
311
                   DECIMAL_EXPONENT_BIAS, CS.w[0], rnd_mode, pfpsf);
312
#ifdef UNCHANGED_BINARY_STATUS_FLAGS
313
      (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
314
#endif
315
      BID_RETURN (res);
316
    }
317
  }
318
  if (CS.w[0] >= 1000000000000000ull) {
319
    done = 1;
320
    exponent_q = exponent_x;
321
    C256.w[1] = A10.w[1];
322
    C256.w[0] = A10.w[0];
323
  }
324
#ifdef SET_STATUS_FLAGS
325
  __set_status_flags (pfpsf, INEXACT_EXCEPTION);
326
#endif
327
  exact = 0;
328
} else {
329
  B10 = 0x3333333333333334ull;
330
  __mul_64x64_to_128_full (CS2, CS.w[0], B10);
331
  CS0 = CS2.w[1] >> 1;
332
  if (CS.w[0] != ((CS0 << 3) + (CS0 << 1))) {
333
#ifdef SET_STATUS_FLAGS
334
    __set_status_flags (pfpsf, INEXACT_EXCEPTION);
335
#endif
336
    exact = 0;
337
  }
338
  done = 1;
339
  CS.w[0] = CS0;
340
  exponent_q = exponent_x + 2;
341
  mul_factor = 10;
342
  mul_factor2 = 100;
343
  if (CS.w[0] >= 10000000000000000ull) {
344
    __mul_64x64_to_128_full (CS2, CS.w[0], B10);
345
    CS0 = CS2.w[1] >> 1;
346
    if (CS.w[0] != ((CS0 << 3) + (CS0 << 1))) {
347
#ifdef SET_STATUS_FLAGS
348
      __set_status_flags (pfpsf, INEXACT_EXCEPTION);
349
#endif
350
      exact = 0;
351
    }
352
    exponent_q += 2;
353
    CS.w[0] = CS0;
354
    mul_factor = 100;
355
    mul_factor2 = 10000;
356
  }
357
  if (exact) {
358
    CS0 = CS.w[0] * mul_factor;
359
    __sqr64_fast (CS1, CS0)
360
      if ((CS1.w[0] != A10.w[0]) || (CS1.w[1] != A10.w[1])) {
361
#ifdef SET_STATUS_FLAGS
362
      __set_status_flags (pfpsf, INEXACT_EXCEPTION);
363
#endif
364
      exact = 0;
365
    }
366
  }
367
}
368
 
369
if (!done) {
370
  // get number of digits in CX
371
  D = CX.w[1] - power10_index_binexp_128[bin_expon_cx].w[1];
372
  if (D > 0
373
      || (!D && CX.w[0] >= power10_index_binexp_128[bin_expon_cx].w[0]))
374
    digits++;
375
 
376
  // if exponent is odd, scale coefficient by 10
377
  scale = 31 - digits;
378
  exponent_q = exponent_x - scale;
379
  scale += (exponent_q & 1);    // exp. bias is even
380
 
381
  T128 = power10_table_128[scale];
382
  __mul_128x128_low (C256, CX, T128);
383
 
384
 
385
  CS.w[0] = short_sqrt128 (C256);
386
}
387
   //printf("CS=%016I64x\n",CS.w[0]);
388
 
389
exponent_q =
390
  ((exponent_q - DECIMAL_EXPONENT_BIAS_128) >> 1) +
391
  DECIMAL_EXPONENT_BIAS;
392
if ((exponent_q < 0) && (exponent_q + MAX_FORMAT_DIGITS >= 0)) {
393
  extra_digits = -exponent_q;
394
  exponent_q = 0;
395
 
396
  // get coeff*(2^M[extra_digits])/10^extra_digits
397
  __mul_64x64_to_128 (QH, CS.w[0], reciprocals10_64[extra_digits]);
398
 
399
  // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
400
  amount = short_recip_scale[extra_digits];
401
 
402
  CS0 = QH.w[1] >> amount;
403
 
404
#ifdef SET_STATUS_FLAGS
405
  if (exact) {
406
    if (CS.w[0] != CS0 * power10_table_128[extra_digits].w[0])
407
      exact = 0;
408
  }
409
  if (!exact)
410
    __set_status_flags (pfpsf, UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
411
#endif
412
 
413
  CS.w[0] = CS0;
414
  if (!mul_factor)
415
    mul_factor = 1;
416
  mul_factor *= power10_table_128[extra_digits].w[0];
417
  __mul_64x64_to_128 (mul_factor2_long, mul_factor, mul_factor);
418
  if (mul_factor2_long.w[1])
419
    mul_factor2 = 0;
420
  else
421
    mul_factor2 = mul_factor2_long.w[1];
422
}
423
           // 4*C256
424
C4.w[1] = (C256.w[1] << 2) | (C256.w[0] >> 62);
425
C4.w[0] = C256.w[0] << 2;
426
 
427
#ifndef IEEE_ROUND_NEAREST
428
#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
429
if (!((rnd_mode) & 3)) {
430
#endif
431
#endif
432
  // compare to midpoints
433
  CSM.w[0] = (CS.w[0] + CS.w[0]) | 1;
434
  //printf("C256=%016I64x %016I64x, CSM=%016I64x %016I64x %016I64x\n",C4.w[1],C4.w[0],CSM.w[1],CSM.w[0], CS.w[0]);
435
  if (mul_factor)
436
    CSM.w[0] *= mul_factor;
437
  // CSM^2
438
  __mul_64x64_to_128 (M256, CSM.w[0], CSM.w[0]);
439
  //__mul_128x128_to_256(M256, CSM, CSM);
440
 
441
  if (C4.w[1] > M256.w[1] ||
442
      (C4.w[1] == M256.w[1] && C4.w[0] > M256.w[0])) {
443
    // round up
444
    CS.w[0]++;
445
  } else {
446
    C8.w[0] = CS.w[0] << 3;
447
    C8.w[1] = 0;
448
    if (mul_factor) {
449
      if (mul_factor2) {
450
        __mul_64x64_to_128 (C8, C8.w[0], mul_factor2);
451
      } else {
452
        __mul_64x128_low (C8, C8.w[0], mul_factor2_long);
453
      }
454
    }
455
    // M256 - 8*CSM
456
    __sub_borrow_out (M256.w[0], Carry, M256.w[0], C8.w[0]);
457
    M256.w[1] = M256.w[1] - C8.w[1] - Carry;
458
 
459
    // if CSM' > C256, round up
460
    if (M256.w[1] > C4.w[1] ||
461
        (M256.w[1] == C4.w[1] && M256.w[0] > C4.w[0])) {
462
      // round down
463
      if (CS.w[0])
464
        CS.w[0]--;
465
    }
466
  }
467
#ifndef IEEE_ROUND_NEAREST
468
#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
469
} else {
470
  CS.w[0]++;
471
  CSM.w[0] = CS.w[0];
472
  C8.w[0] = CSM.w[0] << 1;
473
  if (mul_factor)
474
    CSM.w[0] *= mul_factor;
475
  __mul_64x64_to_128 (M256, CSM.w[0], CSM.w[0]);
476
  C8.w[1] = 0;
477
  if (mul_factor) {
478
    if (mul_factor2) {
479
      __mul_64x64_to_128 (C8, C8.w[0], mul_factor2);
480
    } else {
481
      __mul_64x128_low (C8, C8.w[0], mul_factor2_long);
482
    }
483
  }
484
  //printf("C256=%016I64x %016I64x, CSM=%016I64x %016I64x %016I64x\n",C256.w[1],C256.w[0],M256.w[1],M256.w[0], CS.w[0]);
485
 
486
  if (M256.w[1] > C256.w[1] ||
487
      (M256.w[1] == C256.w[1] && M256.w[0] > C256.w[0])) {
488
    __sub_borrow_out (M256.w[0], Carry, M256.w[0], C8.w[0]);
489
    M256.w[1] = M256.w[1] - Carry - C8.w[1];
490
    M256.w[0]++;
491
    if (!M256.w[0]) {
492
      M256.w[1]++;
493
 
494
    }
495
 
496
    if ((M256.w[1] > C256.w[1] ||
497
         (M256.w[1] == C256.w[1] && M256.w[0] > C256.w[0]))
498
        && (CS.w[0] > 1)) {
499
 
500
      CS.w[0]--;
501
 
502
      if (CS.w[0] > 1) {
503
        __sub_borrow_out (M256.w[0], Carry, M256.w[0], C8.w[0]);
504
        M256.w[1] = M256.w[1] - Carry - C8.w[1];
505
        M256.w[0]++;
506
        if (!M256.w[0]) {
507
          M256.w[1]++;
508
        }
509
 
510
        if (M256.w[1] > C256.w[1] ||
511
            (M256.w[1] == C256.w[1] && M256.w[0] > C256.w[0]))
512
          CS.w[0]--;
513
      }
514
    }
515
  }
516
 
517
  else {
518
                                /*__add_carry_out(M256.w[0], Carry, M256.w[0], C8.w[0]);
519
                                M256.w[1] = M256.w[1] + Carry + C8.w[1];
520
                                M256.w[0]++;
521
                                if(!M256.w[0])
522
                                {
523
                                        M256.w[1]++;
524
                                }
525
                                CS.w[0]++;
526
                        if(M256.w[1]<C256.w[1] ||
527
                                (M256.w[1]==C256.w[1] && M256.w[0]<=C256.w[0]))
528
                        {
529
                                CS.w[0]++;
530
                        }*/
531
    CS.w[0]++;
532
  }
533
  //printf("C256=%016I64x %016I64x, CSM=%016I64x %016I64x %016I64x %d\n",C4.w[1],C4.w[0],M256.w[1],M256.w[0], CS.w[0], exact);
534
  // RU?
535
  if (((rnd_mode) != ROUNDING_UP) || exact) {
536
    if (CS.w[0])
537
      CS.w[0]--;
538
  }
539
 
540
}
541
#endif
542
#endif
543
 //printf("C256=%016I64x %016I64x, CSM=%016I64x %016I64x %016I64x %d\n",C4.w[1],C4.w[0],M256.w[1],M256.w[0], CS.w[0], exact);
544
 
545
res = get_BID64 (0, exponent_q, CS.w[0], rnd_mode, pfpsf);
546
#ifdef UNCHANGED_BINARY_STATUS_FLAGS
547
(void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
548
#endif
549
BID_RETURN (res);
550
 
551
 
552
}

powered by: WebSVN 2.1.0

© copyright 1999-2025 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.