URL https://opencores.org/ocsvn/openrisc_me/openrisc_me/trunk

Subversion Repositories openrisc_me

[/] [openrisc/] [trunk/] [gnu-src/] [gcc-4.5.1/] [libgcc/] [config/] [libbid/] [bid64_sqrt.c] - Blame information for rev 407

Go to most recent revision | Details | Compare with Previous | View Log


/* Copyright (C) 2007, 2009  Free Software Foundation, Inc.
 
This file is part of GCC.
 
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
 
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
for more details.
 
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
 
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
<http://www.gnu.org/licenses/>.  */
 
/*****************************************************************************
 *    BID64 square root
 *****************************************************************************
 *
 *  Algorithm description:
 *
 *  if(exponent_x is odd)
 *     scale coefficient_x by 10, adjust exponent
 *  - get lower estimate for number of digits in coefficient_x
 *  - scale coefficient x to between 31 and 33 decimal digits
 *  - in parallel, check for exact case and return if true
 *  - get high part of result coefficient using double precision sqrt
 *  - compute remainder and refine coefficient in one iteration (which
 *                                 modifies it by at most 1)
 *  - result exponent is easy to compute from the adjusted arg. exponent
 *
 ****************************************************************************/
 
#include "bid_internal.h"
#include "bid_sqrt_macros.h"
#ifdef UNCHANGED_BINARY_STATUS_FLAGS
#include <fenv.h>
 
#define FE_ALL_FLAGS FE_INVALID|FE_DIVBYZERO|FE_OVERFLOW|FE_UNDERFLOW|FE_INEXACT
#endif
 
extern double sqrt (double);
 
#if DECIMAL_CALL_BY_REFERENCE
 
void
bid64_sqrt (UINT64 * pres,
            UINT64 *
            px _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
            _EXC_INFO_PARAM) {
  UINT64 x;
#else
 
UINT64
bid64_sqrt (UINT64 x _RND_MODE_PARAM _EXC_FLAGS_PARAM
            _EXC_MASKS_PARAM _EXC_INFO_PARAM) {
#endif
  UINT128 CA, CT;
  UINT64 sign_x, coefficient_x;
  UINT64 Q, Q2, A10, C4, R, R2, QE, res;
  SINT64 D;
  int_double t_scale;
  int_float tempx;
  double da, dq, da_h, da_l, dqe;
  int exponent_x, exponent_q, bin_expon_cx;
  int digits_x;
  int scale;
#ifdef UNCHANGED_BINARY_STATUS_FLAGS
  fexcept_t binaryflags = 0;
#endif
 
#if DECIMAL_CALL_BY_REFERENCE
#if !DECIMAL_GLOBAL_ROUNDING
  _IDEC_round rnd_mode = *prnd_mode;
#endif
  x = *px;
#endif
 
  // unpack arguments, check for NaN or Infinity
  if (!unpack_BID64 (&sign_x, &exponent_x, &coefficient_x, x)) {
    // x is Inf. or NaN or 0
    if ((x & INFINITY_MASK64) == INFINITY_MASK64) {
      res = coefficient_x;
      if ((coefficient_x & SSNAN_MASK64) == SINFINITY_MASK64)   // -Infinity
      {
        res = NAN_MASK64;
#ifdef SET_STATUS_FLAGS
        __set_status_flags (pfpsf, INVALID_EXCEPTION);
#endif
      }
#ifdef SET_STATUS_FLAGS
      if ((x & SNAN_MASK64) == SNAN_MASK64)     // sNaN
        __set_status_flags (pfpsf, INVALID_EXCEPTION);
#endif
      BID_RETURN (res & QUIET_MASK64);
    }
    // x is 0
    exponent_x = (exponent_x + DECIMAL_EXPONENT_BIAS) >> 1;
    res = sign_x | (((UINT64) exponent_x) << 53);
    BID_RETURN (res);
  }
  // x<0?
  if (sign_x && coefficient_x) {
    res = NAN_MASK64;
#ifdef SET_STATUS_FLAGS
    __set_status_flags (pfpsf, INVALID_EXCEPTION);
#endif
    BID_RETURN (res);
  }
#ifdef UNCHANGED_BINARY_STATUS_FLAGS
  (void) fegetexceptflag (&binaryflags, FE_ALL_FLAGS);
#endif
  //--- get number of bits in the coefficient of x ---
  tempx.d = (float) coefficient_x;
  bin_expon_cx = ((tempx.i >> 23) & 0xff) - 0x7f;
  digits_x = estimate_decimal_digits[bin_expon_cx];
  // add test for range
  if (coefficient_x >= power10_index_binexp[bin_expon_cx])
    digits_x++;
 
  A10 = coefficient_x;
  if (exponent_x & 1) {
    A10 = (A10 << 2) + A10;
    A10 += A10;
  }
 
  dqe = sqrt ((double) A10);
  QE = (UINT32) dqe;
  if (QE * QE == A10) {
    res =
      very_fast_get_BID64 (0, (exponent_x + DECIMAL_EXPONENT_BIAS) >> 1,
                           QE);
#ifdef UNCHANGED_BINARY_STATUS_FLAGS
    (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
#endif
    BID_RETURN (res);
  }
  // if exponent is odd, scale coefficient by 10
  scale = 31 - digits_x;
  exponent_q = exponent_x - scale;
  scale += (exponent_q & 1);    // exp. bias is even
 
  CT = power10_table_128[scale];
  __mul_64x128_short (CA, coefficient_x, CT);
 
  // 2^64
  t_scale.i = 0x43f0000000000000ull;
  // convert CA to DP
  da_h = CA.w[1];
  da_l = CA.w[0];
  da = da_h * t_scale.d + da_l;
 
  dq = sqrt (da);
 
  Q = (UINT64) dq;
 
  // get sign(sqrt(CA)-Q)
  R = CA.w[0] - Q * Q;
  R = ((SINT64) R) >> 63;
  D = R + R + 1;
 
  exponent_q = (exponent_q + DECIMAL_EXPONENT_BIAS) >> 1;
 
#ifdef SET_STATUS_FLAGS
  __set_status_flags (pfpsf, INEXACT_EXCEPTION);
#endif
 
#ifndef IEEE_ROUND_NEAREST
#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
  if (!((rnd_mode) & 3)) {
#endif
#endif
 
    // midpoint to check
    Q2 = Q + Q + D;
    C4 = CA.w[0] << 2;
 
    // get sign(-sqrt(CA)+Midpoint)
    R2 = Q2 * Q2 - C4;
    R2 = ((SINT64) R2) >> 63;
 
    // adjust Q if R!=R2
    Q += (D & (R ^ R2));
#ifndef IEEE_ROUND_NEAREST
#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
  } else {
    C4 = CA.w[0];
    Q += D;
    if ((SINT64) (Q * Q - C4) > 0)
      Q--;
    if (rnd_mode == ROUNDING_UP)
      Q++;
  }
#endif
#endif
 
  res = fast_get_BID64 (0, exponent_q, Q);
#ifdef UNCHANGED_BINARY_STATUS_FLAGS
  (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
#endif
  BID_RETURN (res);
}
 
 
TYPE0_FUNCTION_ARG1 (UINT64, bid64q_sqrt, x)
 
     UINT256 M256, C4, C8;
     UINT128 CX, CX2, A10, S2, T128, CS, CSM, CS2, C256, CS1,
       mul_factor2_long = { {0x0ull, 0x0ull} }, QH, Tmp, TP128, Qh, Ql;
UINT64 sign_x, Carry, B10, res, mul_factor, mul_factor2 = 0x0ull, CS0;
SINT64 D;
int_float fx, f64;
int exponent_x, bin_expon_cx, done = 0;
int digits, scale, exponent_q = 0, exact = 1, amount, extra_digits;
#ifdef UNCHANGED_BINARY_STATUS_FLAGS
fexcept_t binaryflags = 0;
#endif
 
        // unpack arguments, check for NaN or Infinity
if (!unpack_BID128_value (&sign_x, &exponent_x, &CX, x)) {
  res = CX.w[1];
  // NaN ?
  if ((x.w[1] & 0x7c00000000000000ull) == 0x7c00000000000000ull) {
#ifdef SET_STATUS_FLAGS
    if ((x.w[1] & 0x7e00000000000000ull) == 0x7e00000000000000ull)      // sNaN
      __set_status_flags (pfpsf, INVALID_EXCEPTION);
#endif
    Tmp.w[1] = (CX.w[1] & 0x00003fffffffffffull);
    Tmp.w[0] = CX.w[0];
    TP128 = reciprocals10_128[18];
    __mul_128x128_full (Qh, Ql, Tmp, TP128);
    amount = recip_scale[18];
    __shr_128 (Tmp, Qh, amount);
    res = (CX.w[1] & 0xfc00000000000000ull) | Tmp.w[0];
    BID_RETURN (res);
  }
  // x is Infinity?
  if ((x.w[1] & 0x7800000000000000ull) == 0x7800000000000000ull) {
    if (sign_x) {
      // -Inf, return NaN
      res = 0x7c00000000000000ull;
#ifdef SET_STATUS_FLAGS
      __set_status_flags (pfpsf, INVALID_EXCEPTION);
#endif
    }
    BID_RETURN (res);
  }
  // x is 0 otherwise
 
  exponent_x =
    ((exponent_x - DECIMAL_EXPONENT_BIAS_128) >> 1) +
    DECIMAL_EXPONENT_BIAS;
  if (exponent_x < 0)
    exponent_x = 0;
  if (exponent_x > DECIMAL_MAX_EXPON_64)
    exponent_x = DECIMAL_MAX_EXPON_64;
  //res= sign_x | (((UINT64)exponent_x)<<53);
  res = get_BID64 (sign_x, exponent_x, 0, rnd_mode, pfpsf);
  BID_RETURN (res);
}
if (sign_x) {
  res = 0x7c00000000000000ull;
#ifdef SET_STATUS_FLAGS
  __set_status_flags (pfpsf, INVALID_EXCEPTION);
#endif
  BID_RETURN (res);
}
#ifdef UNCHANGED_BINARY_STATUS_FLAGS
(void) fegetexceptflag (&binaryflags, FE_ALL_FLAGS);
#endif
 
           // 2^64
f64.i = 0x5f800000;
 
           // fx ~ CX
fx.d = (float) CX.w[1] * f64.d + (float) CX.w[0];
bin_expon_cx = ((fx.i >> 23) & 0xff) - 0x7f;
digits = estimate_decimal_digits[bin_expon_cx];
 
A10 = CX;
if (exponent_x & 1) {
  A10.w[1] = (CX.w[1] << 3) | (CX.w[0] >> 61);
  A10.w[0] = CX.w[0] << 3;
  CX2.w[1] = (CX.w[1] << 1) | (CX.w[0] >> 63);
  CX2.w[0] = CX.w[0] << 1;
  __add_128_128 (A10, A10, CX2);
}
 
C256.w[1] = A10.w[1];
C256.w[0] = A10.w[0];
CS.w[0] = short_sqrt128 (A10);
CS.w[1] = 0;
mul_factor = 0;
           // check for exact result  
if (CS.w[0] < 10000000000000000ull) {
  if (CS.w[0] * CS.w[0] == A10.w[0]) {
    __sqr64_fast (S2, CS.w[0]);
    if (S2.w[1] == A10.w[1])    // && S2.w[0]==A10.w[0])
    {
      res =
        get_BID64 (0,
                   ((exponent_x - DECIMAL_EXPONENT_BIAS_128) >> 1) +
                   DECIMAL_EXPONENT_BIAS, CS.w[0], rnd_mode, pfpsf);
#ifdef UNCHANGED_BINARY_STATUS_FLAGS
      (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
#endif
      BID_RETURN (res);
    }
  }
  if (CS.w[0] >= 1000000000000000ull) {
    done = 1;
    exponent_q = exponent_x;
    C256.w[1] = A10.w[1];
    C256.w[0] = A10.w[0];
  }
#ifdef SET_STATUS_FLAGS
  __set_status_flags (pfpsf, INEXACT_EXCEPTION);
#endif
  exact = 0;
} else {
  B10 = 0x3333333333333334ull;
  __mul_64x64_to_128_full (CS2, CS.w[0], B10);
  CS0 = CS2.w[1] >> 1;
  if (CS.w[0] != ((CS0 << 3) + (CS0 << 1))) {
#ifdef SET_STATUS_FLAGS
    __set_status_flags (pfpsf, INEXACT_EXCEPTION);
#endif
    exact = 0;
  }
  done = 1;
  CS.w[0] = CS0;
  exponent_q = exponent_x + 2;
  mul_factor = 10;
  mul_factor2 = 100;
  if (CS.w[0] >= 10000000000000000ull) {
    __mul_64x64_to_128_full (CS2, CS.w[0], B10);
    CS0 = CS2.w[1] >> 1;
    if (CS.w[0] != ((CS0 << 3) + (CS0 << 1))) {
#ifdef SET_STATUS_FLAGS
      __set_status_flags (pfpsf, INEXACT_EXCEPTION);
#endif
      exact = 0;
    }
    exponent_q += 2;
    CS.w[0] = CS0;
    mul_factor = 100;
    mul_factor2 = 10000;
  }
  if (exact) {
    CS0 = CS.w[0] * mul_factor;
    __sqr64_fast (CS1, CS0)
      if ((CS1.w[0] != A10.w[0]) || (CS1.w[1] != A10.w[1])) {
#ifdef SET_STATUS_FLAGS
      __set_status_flags (pfpsf, INEXACT_EXCEPTION);
#endif
      exact = 0;
    }
  }
}
 
if (!done) {
  // get number of digits in CX
  D = CX.w[1] - power10_index_binexp_128[bin_expon_cx].w[1];
  if (D > 0
      || (!D && CX.w[0] >= power10_index_binexp_128[bin_expon_cx].w[0]))
    digits++;
 
  // if exponent is odd, scale coefficient by 10
  scale = 31 - digits;
  exponent_q = exponent_x - scale;
  scale += (exponent_q & 1);    // exp. bias is even
 
  T128 = power10_table_128[scale];
  __mul_128x128_low (C256, CX, T128);
 
 
  CS.w[0] = short_sqrt128 (C256);
}
   //printf("CS=%016I64x\n",CS.w[0]);
 
exponent_q =
  ((exponent_q - DECIMAL_EXPONENT_BIAS_128) >> 1) +
  DECIMAL_EXPONENT_BIAS;
if ((exponent_q < 0) && (exponent_q + MAX_FORMAT_DIGITS >= 0)) {
  extra_digits = -exponent_q;
  exponent_q = 0;
 
  // get coeff*(2^M[extra_digits])/10^extra_digits
  __mul_64x64_to_128 (QH, CS.w[0], reciprocals10_64[extra_digits]);
 
  // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
  amount = short_recip_scale[extra_digits];
 
  CS0 = QH.w[1] >> amount;
 
#ifdef SET_STATUS_FLAGS
  if (exact) {
    if (CS.w[0] != CS0 * power10_table_128[extra_digits].w[0])
      exact = 0;
  }
  if (!exact)
    __set_status_flags (pfpsf, UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
#endif
 
  CS.w[0] = CS0;
  if (!mul_factor)
    mul_factor = 1;
  mul_factor *= power10_table_128[extra_digits].w[0];
  __mul_64x64_to_128 (mul_factor2_long, mul_factor, mul_factor);
  if (mul_factor2_long.w[1])
    mul_factor2 = 0;
  else
    mul_factor2 = mul_factor2_long.w[1];
}
           // 4*C256
C4.w[1] = (C256.w[1] << 2) | (C256.w[0] >> 62);
C4.w[0] = C256.w[0] << 2;
 
#ifndef IEEE_ROUND_NEAREST
#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
if (!((rnd_mode) & 3)) {
#endif
#endif
  // compare to midpoints
  CSM.w[0] = (CS.w[0] + CS.w[0]) | 1;
  //printf("C256=%016I64x %016I64x, CSM=%016I64x %016I64x %016I64x\n",C4.w[1],C4.w[0],CSM.w[1],CSM.w[0], CS.w[0]);
  if (mul_factor)
    CSM.w[0] *= mul_factor;
  // CSM^2
  __mul_64x64_to_128 (M256, CSM.w[0], CSM.w[0]);
  //__mul_128x128_to_256(M256, CSM, CSM);
 
  if (C4.w[1] > M256.w[1] ||
      (C4.w[1] == M256.w[1] && C4.w[0] > M256.w[0])) {
    // round up
    CS.w[0]++;
  } else {
    C8.w[0] = CS.w[0] << 3;
    C8.w[1] = 0;
    if (mul_factor) {
      if (mul_factor2) {
        __mul_64x64_to_128 (C8, C8.w[0], mul_factor2);
      } else {
        __mul_64x128_low (C8, C8.w[0], mul_factor2_long);
      }
    }
    // M256 - 8*CSM
    __sub_borrow_out (M256.w[0], Carry, M256.w[0], C8.w[0]);
    M256.w[1] = M256.w[1] - C8.w[1] - Carry;
 
    // if CSM' > C256, round up
    if (M256.w[1] > C4.w[1] ||
        (M256.w[1] == C4.w[1] && M256.w[0] > C4.w[0])) {
      // round down
      if (CS.w[0])
        CS.w[0]--;
    }
  }
#ifndef IEEE_ROUND_NEAREST
#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
} else {
  CS.w[0]++;
  CSM.w[0] = CS.w[0];
  C8.w[0] = CSM.w[0] << 1;
  if (mul_factor)
    CSM.w[0] *= mul_factor;
  __mul_64x64_to_128 (M256, CSM.w[0], CSM.w[0]);
  C8.w[1] = 0;
  if (mul_factor) {
    if (mul_factor2) {
      __mul_64x64_to_128 (C8, C8.w[0], mul_factor2);
    } else {
      __mul_64x128_low (C8, C8.w[0], mul_factor2_long);
    }
  }
  //printf("C256=%016I64x %016I64x, CSM=%016I64x %016I64x %016I64x\n",C256.w[1],C256.w[0],M256.w[1],M256.w[0], CS.w[0]);
 
  if (M256.w[1] > C256.w[1] ||
      (M256.w[1] == C256.w[1] && M256.w[0] > C256.w[0])) {
    __sub_borrow_out (M256.w[0], Carry, M256.w[0], C8.w[0]);
    M256.w[1] = M256.w[1] - Carry - C8.w[1];
    M256.w[0]++;
    if (!M256.w[0]) {
      M256.w[1]++;
 
    }
 
    if ((M256.w[1] > C256.w[1] ||
         (M256.w[1] == C256.w[1] && M256.w[0] > C256.w[0]))
        && (CS.w[0] > 1)) {
 
      CS.w[0]--;
 
      if (CS.w[0] > 1) {
        __sub_borrow_out (M256.w[0], Carry, M256.w[0], C8.w[0]);
        M256.w[1] = M256.w[1] - Carry - C8.w[1];
        M256.w[0]++;
        if (!M256.w[0]) {
          M256.w[1]++;
        }
 
        if (M256.w[1] > C256.w[1] ||
            (M256.w[1] == C256.w[1] && M256.w[0] > C256.w[0]))
          CS.w[0]--;
      }
    }
  }
 
  else {
                                /*__add_carry_out(M256.w[0], Carry, M256.w[0], C8.w[0]);
                                M256.w[1] = M256.w[1] + Carry + C8.w[1];
                                M256.w[0]++;
                                if(!M256.w[0])
                                {
                                        M256.w[1]++;
                                }
                                CS.w[0]++;
                        if(M256.w[1]<C256.w[1] ||
                                (M256.w[1]==C256.w[1] && M256.w[0]<=C256.w[0]))
                        {
                                CS.w[0]++;
                        }*/
    CS.w[0]++;
  }
  //printf("C256=%016I64x %016I64x, CSM=%016I64x %016I64x %016I64x %d\n",C4.w[1],C4.w[0],M256.w[1],M256.w[0], CS.w[0], exact);
  // RU?
  if (((rnd_mode) != ROUNDING_UP) || exact) {
    if (CS.w[0])
      CS.w[0]--;
  }
 
}
#endif
#endif
 //printf("C256=%016I64x %016I64x, CSM=%016I64x %016I64x %016I64x %d\n",C4.w[1],C4.w[0],M256.w[1],M256.w[0], CS.w[0], exact);
 
res = get_BID64 (0, exponent_q, CS.w[0], rnd_mode, pfpsf);
#ifdef UNCHANGED_BINARY_STATUS_FLAGS
(void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
#endif
BID_RETURN (res);
 
 
}

Browse

Tools

Subversion Repositories openrisc_me

[/] [openrisc/] [trunk/] [gnu-src/] [gcc-4.5.1/] [libgcc/] [config/] [libbid/] [bid64_sqrt.c] - Blame information for rev 407

Line No.	Rev	Author	Line
1	272	jeremybenn	`/* Copyright (C) 2007, 2009 Free Software Foundation, Inc.`
2
3			`This file is part of GCC.`
4
5			`GCC is free software; you can redistribute it and/or modify it under`
6			`the terms of the GNU General Public License as published by the Free`
7			`Software Foundation; either version 3, or (at your option) any later`
8			`version.`
9
10			`GCC is distributed in the hope that it will be useful, but WITHOUT ANY`
11			`WARRANTY; without even the implied warranty of MERCHANTABILITY or`
12			`FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License`
13			`for more details.`
14
15			`Under Section 7 of GPL version 3, you are granted additional`
16			`permissions described in the GCC Runtime Library Exception, version`
17			`3.1, as published by the Free Software Foundation.`
18
19			`You should have received a copy of the GNU General Public License and`
20			`a copy of the GCC Runtime Library Exception along with this program;`
21			`see the files COPYING3 and COPYING.RUNTIME respectively. If not, see`
22			`<http://www.gnu.org/licenses/>. */`
23
24			`/*****************************************************************************`
25			`* BID64 square root`
26			`*****************************************************************************`
27			`*`
28			`* Algorithm description:`
29			`*`
30			`* if(exponent_x is odd)`
31			`* scale coefficient_x by 10, adjust exponent`
32			`* - get lower estimate for number of digits in coefficient_x`
33			`* - scale coefficient x to between 31 and 33 decimal digits`
34			`* - in parallel, check for exact case and return if true`
35			`* - get high part of result coefficient using double precision sqrt`
36			`* - compute remainder and refine coefficient in one iteration (which`
37			`* modifies it by at most 1)`
38			`* - result exponent is easy to compute from the adjusted arg. exponent`
39			`*`
40			`****************************************************************************/`
41
42			`#include "bid_internal.h"`
43			`#include "bid_sqrt_macros.h"`
44			`#ifdef UNCHANGED_BINARY_STATUS_FLAGS`
45			`#include <fenv.h>`
46
47			`#define FE_ALL_FLAGS FE_INVALID\|FE_DIVBYZERO\|FE_OVERFLOW\|FE_UNDERFLOW\|FE_INEXACT`
48			`#endif`
49
50			`extern double sqrt (double);`
51
52			`#if DECIMAL_CALL_BY_REFERENCE`
53
54			`void`
55			`bid64_sqrt (UINT64 * pres,`
56			`UINT64 *`
57			`px _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM`
58			`_EXC_INFO_PARAM) {`
59			`UINT64 x;`
60			`#else`
61
62			`UINT64`
63			`bid64_sqrt (UINT64 x _RND_MODE_PARAM _EXC_FLAGS_PARAM`
64			`_EXC_MASKS_PARAM _EXC_INFO_PARAM) {`
65			`#endif`
66			`UINT128 CA, CT;`
67			`UINT64 sign_x, coefficient_x;`
68			`UINT64 Q, Q2, A10, C4, R, R2, QE, res;`
69			`SINT64 D;`
70			`int_double t_scale;`
71			`int_float tempx;`
72			`double da, dq, da_h, da_l, dqe;`
73			`int exponent_x, exponent_q, bin_expon_cx;`
74			`int digits_x;`
75			`int scale;`
76			`#ifdef UNCHANGED_BINARY_STATUS_FLAGS`
77			`fexcept_t binaryflags = 0;`
78			`#endif`
79
80			`#if DECIMAL_CALL_BY_REFERENCE`
81			`#if !DECIMAL_GLOBAL_ROUNDING`
82			`_IDEC_round rnd_mode = *prnd_mode;`
83			`#endif`
84			`x = *px;`
85			`#endif`
86
87			`// unpack arguments, check for NaN or Infinity`
88			`if (!unpack_BID64 (&sign_x, &exponent_x, &coefficient_x, x)) {`
89			`// x is Inf. or NaN or 0`
90			`if ((x & INFINITY_MASK64) == INFINITY_MASK64) {`
91			`res = coefficient_x;`
92			`if ((coefficient_x & SSNAN_MASK64) == SINFINITY_MASK64) // -Infinity`
93			`{`
94			`res = NAN_MASK64;`
95			`#ifdef SET_STATUS_FLAGS`
96			`__set_status_flags (pfpsf, INVALID_EXCEPTION);`
97			`#endif`
98			`}`
99			`#ifdef SET_STATUS_FLAGS`
100			`if ((x & SNAN_MASK64) == SNAN_MASK64) // sNaN`
101			`__set_status_flags (pfpsf, INVALID_EXCEPTION);`
102			`#endif`
103			`BID_RETURN (res & QUIET_MASK64);`
104			`}`
105			`// x is 0`
106			`exponent_x = (exponent_x + DECIMAL_EXPONENT_BIAS) >> 1;`
107			`res = sign_x \| (((UINT64) exponent_x) << 53);`
108			`BID_RETURN (res);`
109			`}`
110			`// x<0?`
111			`if (sign_x && coefficient_x) {`
112			`res = NAN_MASK64;`
113			`#ifdef SET_STATUS_FLAGS`
114			`__set_status_flags (pfpsf, INVALID_EXCEPTION);`
115			`#endif`
116			`BID_RETURN (res);`
117			`}`
118			`#ifdef UNCHANGED_BINARY_STATUS_FLAGS`
119			`(void) fegetexceptflag (&binaryflags, FE_ALL_FLAGS);`
120			`#endif`
121			`//--- get number of bits in the coefficient of x ---`
122			`tempx.d = (float) coefficient_x;`
123			`bin_expon_cx = ((tempx.i >> 23) & 0xff) - 0x7f;`
124			`digits_x = estimate_decimal_digits[bin_expon_cx];`
125			`// add test for range`
126			`if (coefficient_x >= power10_index_binexp[bin_expon_cx])`
127			`digits_x++;`
128
129			`A10 = coefficient_x;`
130			`if (exponent_x & 1) {`
131			`A10 = (A10 << 2) + A10;`
132			`A10 += A10;`
133			`}`
134
135			`dqe = sqrt ((double) A10);`
136			`QE = (UINT32) dqe;`
137			`if (QE * QE == A10) {`
138			`res =`
139			`very_fast_get_BID64 (0, (exponent_x + DECIMAL_EXPONENT_BIAS) >> 1,`
140			`QE);`
141			`#ifdef UNCHANGED_BINARY_STATUS_FLAGS`
142			`(void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);`
143			`#endif`
144			`BID_RETURN (res);`
145			`}`
146			`// if exponent is odd, scale coefficient by 10`
147			`scale = 31 - digits_x;`
148			`exponent_q = exponent_x - scale;`
149			`scale += (exponent_q & 1); // exp. bias is even`
150
151			`CT = power10_table_128[scale];`
152			`__mul_64x128_short (CA, coefficient_x, CT);`
153
154			`// 2^64`
155			`t_scale.i = 0x43f0000000000000ull;`
156			`// convert CA to DP`
157			`da_h = CA.w[1];`
158			`da_l = CA.w[0];`
159			`da = da_h * t_scale.d + da_l;`
160
161			`dq = sqrt (da);`
162
163			`Q = (UINT64) dq;`
164
165			`// get sign(sqrt(CA)-Q)`
166			`R = CA.w[0] - Q * Q;`
167			`R = ((SINT64) R) >> 63;`
168			`D = R + R + 1;`
169
170			`exponent_q = (exponent_q + DECIMAL_EXPONENT_BIAS) >> 1;`
171
172			`#ifdef SET_STATUS_FLAGS`
173			`__set_status_flags (pfpsf, INEXACT_EXCEPTION);`
174			`#endif`
175
176			`#ifndef IEEE_ROUND_NEAREST`
177			`#ifndef IEEE_ROUND_NEAREST_TIES_AWAY`
178			`if (!((rnd_mode) & 3)) {`
179			`#endif`
180			`#endif`
181
182			`// midpoint to check`
183			`Q2 = Q + Q + D;`
184			`C4 = CA.w[0] << 2;`
185
186			`// get sign(-sqrt(CA)+Midpoint)`
187			`R2 = Q2 * Q2 - C4;`
188			`R2 = ((SINT64) R2) >> 63;`
189
190			`// adjust Q if R!=R2`
191			`Q += (D & (R ^ R2));`
192			`#ifndef IEEE_ROUND_NEAREST`
193			`#ifndef IEEE_ROUND_NEAREST_TIES_AWAY`
194			`} else {`
195			`C4 = CA.w[0];`
196			`Q += D;`
197			`if ((SINT64) (Q * Q - C4) > 0)`
198			`Q--;`
199			`if (rnd_mode == ROUNDING_UP)`
200			`Q++;`
201			`}`
202			`#endif`
203			`#endif`
204
205			`res = fast_get_BID64 (0, exponent_q, Q);`
206			`#ifdef UNCHANGED_BINARY_STATUS_FLAGS`
207			`(void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);`
208			`#endif`
209			`BID_RETURN (res);`
210			`}`
211
212
213			`TYPE0_FUNCTION_ARG1 (UINT64, bid64q_sqrt, x)`
214
215			`UINT256 M256, C4, C8;`
216			`UINT128 CX, CX2, A10, S2, T128, CS, CSM, CS2, C256, CS1,`
217			`mul_factor2_long = { {0x0ull, 0x0ull} }, QH, Tmp, TP128, Qh, Ql;`
218			`UINT64 sign_x, Carry, B10, res, mul_factor, mul_factor2 = 0x0ull, CS0;`
219			`SINT64 D;`
220			`int_float fx, f64;`
221			`int exponent_x, bin_expon_cx, done = 0;`
222			`int digits, scale, exponent_q = 0, exact = 1, amount, extra_digits;`
223			`#ifdef UNCHANGED_BINARY_STATUS_FLAGS`
224			`fexcept_t binaryflags = 0;`
225			`#endif`
226
227			`// unpack arguments, check for NaN or Infinity`
228			`if (!unpack_BID128_value (&sign_x, &exponent_x, &CX, x)) {`
229			`res = CX.w[1];`
230			`// NaN ?`
231			`if ((x.w[1] & 0x7c00000000000000ull) == 0x7c00000000000000ull) {`
232			`#ifdef SET_STATUS_FLAGS`
233			`if ((x.w[1] & 0x7e00000000000000ull) == 0x7e00000000000000ull) // sNaN`
234			`__set_status_flags (pfpsf, INVALID_EXCEPTION);`
235			`#endif`
236			`Tmp.w[1] = (CX.w[1] & 0x00003fffffffffffull);`
237			`Tmp.w[0] = CX.w[0];`
238			`TP128 = reciprocals10_128[18];`
239			`__mul_128x128_full (Qh, Ql, Tmp, TP128);`
240			`amount = recip_scale[18];`
241			`__shr_128 (Tmp, Qh, amount);`
242			`res = (CX.w[1] & 0xfc00000000000000ull) \| Tmp.w[0];`
243			`BID_RETURN (res);`
244			`}`
245			`// x is Infinity?`
246			`if ((x.w[1] & 0x7800000000000000ull) == 0x7800000000000000ull) {`
247			`if (sign_x) {`
248			`// -Inf, return NaN`
249			`res = 0x7c00000000000000ull;`
250			`#ifdef SET_STATUS_FLAGS`
251			`__set_status_flags (pfpsf, INVALID_EXCEPTION);`
252			`#endif`
253			`}`
254			`BID_RETURN (res);`
255			`}`
256			`// x is 0 otherwise`
257
258			`exponent_x =`
259			`((exponent_x - DECIMAL_EXPONENT_BIAS_128) >> 1) +`
260			`DECIMAL_EXPONENT_BIAS;`
261			`if (exponent_x < 0)`
262			`exponent_x = 0;`
263			`if (exponent_x > DECIMAL_MAX_EXPON_64)`
264			`exponent_x = DECIMAL_MAX_EXPON_64;`
265			`//res= sign_x \| (((UINT64)exponent_x)<<53);`
266			`res = get_BID64 (sign_x, exponent_x, 0, rnd_mode, pfpsf);`
267			`BID_RETURN (res);`
268			`}`
269			`if (sign_x) {`
270			`res = 0x7c00000000000000ull;`
271			`#ifdef SET_STATUS_FLAGS`
272			`__set_status_flags (pfpsf, INVALID_EXCEPTION);`
273			`#endif`
274			`BID_RETURN (res);`
275			`}`
276			`#ifdef UNCHANGED_BINARY_STATUS_FLAGS`
277			`(void) fegetexceptflag (&binaryflags, FE_ALL_FLAGS);`
278			`#endif`
279
280			`// 2^64`
281			`f64.i = 0x5f800000;`
282
283			`// fx ~ CX`
284			`fx.d = (float) CX.w[1] * f64.d + (float) CX.w[0];`
285			`bin_expon_cx = ((fx.i >> 23) & 0xff) - 0x7f;`
286			`digits = estimate_decimal_digits[bin_expon_cx];`
287
288			`A10 = CX;`
289			`if (exponent_x & 1) {`
290			`A10.w[1] = (CX.w[1] << 3) \| (CX.w[0] >> 61);`
291			`A10.w[0] = CX.w[0] << 3;`
292			`CX2.w[1] = (CX.w[1] << 1) \| (CX.w[0] >> 63);`
293			`CX2.w[0] = CX.w[0] << 1;`
294			`__add_128_128 (A10, A10, CX2);`
295			`}`
296
297			`C256.w[1] = A10.w[1];`
298			`C256.w[0] = A10.w[0];`
299			`CS.w[0] = short_sqrt128 (A10);`
300			`CS.w[1] = 0;`
301			`mul_factor = 0;`
302			`// check for exact result`
303			`if (CS.w[0] < 10000000000000000ull) {`
304			`if (CS.w[0] * CS.w[0] == A10.w[0]) {`
305			`__sqr64_fast (S2, CS.w[0]);`
306			`if (S2.w[1] == A10.w[1]) // && S2.w[0]==A10.w[0])`
307			`{`
308			`res =`
309			`get_BID64 (0,`
310			`((exponent_x - DECIMAL_EXPONENT_BIAS_128) >> 1) +`
311			`DECIMAL_EXPONENT_BIAS, CS.w[0], rnd_mode, pfpsf);`
312			`#ifdef UNCHANGED_BINARY_STATUS_FLAGS`
313			`(void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);`
314			`#endif`
315			`BID_RETURN (res);`
316			`}`
317			`}`
318			`if (CS.w[0] >= 1000000000000000ull) {`
319			`done = 1;`
320			`exponent_q = exponent_x;`
321			`C256.w[1] = A10.w[1];`
322			`C256.w[0] = A10.w[0];`
323			`}`
324			`#ifdef SET_STATUS_FLAGS`
325			`__set_status_flags (pfpsf, INEXACT_EXCEPTION);`
326			`#endif`
327			`exact = 0;`
328			`} else {`
329			`B10 = 0x3333333333333334ull;`
330			`__mul_64x64_to_128_full (CS2, CS.w[0], B10);`
331			`CS0 = CS2.w[1] >> 1;`
332			`if (CS.w[0] != ((CS0 << 3) + (CS0 << 1))) {`
333			`#ifdef SET_STATUS_FLAGS`
334			`__set_status_flags (pfpsf, INEXACT_EXCEPTION);`
335			`#endif`
336			`exact = 0;`
337			`}`
338			`done = 1;`
339			`CS.w[0] = CS0;`
340			`exponent_q = exponent_x + 2;`
341			`mul_factor = 10;`
342			`mul_factor2 = 100;`
343			`if (CS.w[0] >= 10000000000000000ull) {`
344			`__mul_64x64_to_128_full (CS2, CS.w[0], B10);`
345			`CS0 = CS2.w[1] >> 1;`
346			`if (CS.w[0] != ((CS0 << 3) + (CS0 << 1))) {`
347			`#ifdef SET_STATUS_FLAGS`
348			`__set_status_flags (pfpsf, INEXACT_EXCEPTION);`
349			`#endif`
350			`exact = 0;`
351			`}`
352			`exponent_q += 2;`
353			`CS.w[0] = CS0;`
354			`mul_factor = 100;`
355			`mul_factor2 = 10000;`
356			`}`
357			`if (exact) {`
358			`CS0 = CS.w[0] * mul_factor;`
359			`__sqr64_fast (CS1, CS0)`
360			`if ((CS1.w[0] != A10.w[0]) \|\| (CS1.w[1] != A10.w[1])) {`
361			`#ifdef SET_STATUS_FLAGS`
362			`__set_status_flags (pfpsf, INEXACT_EXCEPTION);`
363			`#endif`
364			`exact = 0;`
365			`}`
366			`}`
367			`}`
368
369			`if (!done) {`
370			`// get number of digits in CX`
371			`D = CX.w[1] - power10_index_binexp_128[bin_expon_cx].w[1];`
372			`if (D > 0`
373			`\|\| (!D && CX.w[0] >= power10_index_binexp_128[bin_expon_cx].w[0]))`
374			`digits++;`
375
376			`// if exponent is odd, scale coefficient by 10`
377			`scale = 31 - digits;`
378			`exponent_q = exponent_x - scale;`
379			`scale += (exponent_q & 1); // exp. bias is even`
380
381			`T128 = power10_table_128[scale];`
382			`__mul_128x128_low (C256, CX, T128);`
383
384
385			`CS.w[0] = short_sqrt128 (C256);`
386			`}`
387			`//printf("CS=%016I64x\n",CS.w[0]);`
388
389			`exponent_q =`
390			`((exponent_q - DECIMAL_EXPONENT_BIAS_128) >> 1) +`
391			`DECIMAL_EXPONENT_BIAS;`
392			`if ((exponent_q < 0) && (exponent_q + MAX_FORMAT_DIGITS >= 0)) {`
393			`extra_digits = -exponent_q;`
394			`exponent_q = 0;`
395
396			`// get coeff*(2^M[extra_digits])/10^extra_digits`
397			`__mul_64x64_to_128 (QH, CS.w[0], reciprocals10_64[extra_digits]);`
398
399			`// now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128`
400			`amount = short_recip_scale[extra_digits];`
401
402			`CS0 = QH.w[1] >> amount;`
403
404			`#ifdef SET_STATUS_FLAGS`
405			`if (exact) {`
406			`if (CS.w[0] != CS0 * power10_table_128[extra_digits].w[0])`
407			`exact = 0;`
408			`}`
409			`if (!exact)`
410			`__set_status_flags (pfpsf, UNDERFLOW_EXCEPTION \| INEXACT_EXCEPTION);`
411			`#endif`
412
413			`CS.w[0] = CS0;`
414			`if (!mul_factor)`
415			`mul_factor = 1;`
416			`mul_factor *= power10_table_128[extra_digits].w[0];`
417			`__mul_64x64_to_128 (mul_factor2_long, mul_factor, mul_factor);`
418			`if (mul_factor2_long.w[1])`
419			`mul_factor2 = 0;`
420			`else`
421			`mul_factor2 = mul_factor2_long.w[1];`
422			`}`
423			`// 4*C256`
424			`C4.w[1] = (C256.w[1] << 2) \| (C256.w[0] >> 62);`
425			`C4.w[0] = C256.w[0] << 2;`
426
427			`#ifndef IEEE_ROUND_NEAREST`
428			`#ifndef IEEE_ROUND_NEAREST_TIES_AWAY`
429			`if (!((rnd_mode) & 3)) {`
430			`#endif`
431			`#endif`
432			`// compare to midpoints`
433			`CSM.w[0] = (CS.w[0] + CS.w[0]) \| 1;`
434			`//printf("C256=%016I64x %016I64x, CSM=%016I64x %016I64x %016I64x\n",C4.w[1],C4.w[0],CSM.w[1],CSM.w[0], CS.w[0]);`
435			`if (mul_factor)`
436			`CSM.w[0] *= mul_factor;`
437			`// CSM^2`
438			`__mul_64x64_to_128 (M256, CSM.w[0], CSM.w[0]);`
439			`//__mul_128x128_to_256(M256, CSM, CSM);`
440
441			`if (C4.w[1] > M256.w[1] \|\|`
442			`(C4.w[1] == M256.w[1] && C4.w[0] > M256.w[0])) {`
443			`// round up`
444			`CS.w[0]++;`
445			`} else {`
446			`C8.w[0] = CS.w[0] << 3;`
447			`C8.w[1] = 0;`
448			`if (mul_factor) {`
449			`if (mul_factor2) {`
450			`__mul_64x64_to_128 (C8, C8.w[0], mul_factor2);`
451			`} else {`
452			`__mul_64x128_low (C8, C8.w[0], mul_factor2_long);`
453			`}`
454			`}`
455			`// M256 - 8*CSM`
456			`__sub_borrow_out (M256.w[0], Carry, M256.w[0], C8.w[0]);`
457			`M256.w[1] = M256.w[1] - C8.w[1] - Carry;`
458
459			`// if CSM' > C256, round up`
460			`if (M256.w[1] > C4.w[1] \|\|`
461			`(M256.w[1] == C4.w[1] && M256.w[0] > C4.w[0])) {`
462			`// round down`
463			`if (CS.w[0])`
464			`CS.w[0]--;`
465			`}`
466			`}`
467			`#ifndef IEEE_ROUND_NEAREST`
468			`#ifndef IEEE_ROUND_NEAREST_TIES_AWAY`
469			`} else {`
470			`CS.w[0]++;`
471			`CSM.w[0] = CS.w[0];`
472			`C8.w[0] = CSM.w[0] << 1;`
473			`if (mul_factor)`
474			`CSM.w[0] *= mul_factor;`
475			`__mul_64x64_to_128 (M256, CSM.w[0], CSM.w[0]);`
476			`C8.w[1] = 0;`
477			`if (mul_factor) {`
478			`if (mul_factor2) {`
479			`__mul_64x64_to_128 (C8, C8.w[0], mul_factor2);`
480			`} else {`
481			`__mul_64x128_low (C8, C8.w[0], mul_factor2_long);`
482			`}`
483			`}`
484			`//printf("C256=%016I64x %016I64x, CSM=%016I64x %016I64x %016I64x\n",C256.w[1],C256.w[0],M256.w[1],M256.w[0], CS.w[0]);`
485
486			`if (M256.w[1] > C256.w[1] \|\|`
487			`(M256.w[1] == C256.w[1] && M256.w[0] > C256.w[0])) {`
488			`__sub_borrow_out (M256.w[0], Carry, M256.w[0], C8.w[0]);`
489			`M256.w[1] = M256.w[1] - Carry - C8.w[1];`
490			`M256.w[0]++;`
491			`if (!M256.w[0]) {`
492			`M256.w[1]++;`
493
494			`}`
495
496			`if ((M256.w[1] > C256.w[1] \|\|`
497			`(M256.w[1] == C256.w[1] && M256.w[0] > C256.w[0]))`
498			`&& (CS.w[0] > 1)) {`
499
500			`CS.w[0]--;`
501
502			`if (CS.w[0] > 1) {`
503			`__sub_borrow_out (M256.w[0], Carry, M256.w[0], C8.w[0]);`
504			`M256.w[1] = M256.w[1] - Carry - C8.w[1];`
505			`M256.w[0]++;`
506			`if (!M256.w[0]) {`
507			`M256.w[1]++;`
508			`}`
509
510			`if (M256.w[1] > C256.w[1] \|\|`
511			`(M256.w[1] == C256.w[1] && M256.w[0] > C256.w[0]))`
512			`CS.w[0]--;`
513			`}`
514			`}`
515			`}`
516
517			`else {`
518			`/*__add_carry_out(M256.w[0], Carry, M256.w[0], C8.w[0]);`
519			`M256.w[1] = M256.w[1] + Carry + C8.w[1];`
520			`M256.w[0]++;`
521			`if(!M256.w[0])`
522			`{`
523			`M256.w[1]++;`
524			`}`
525			`CS.w[0]++;`
526			`if(M256.w[1]<C256.w[1] \|\|`
527			`(M256.w[1]==C256.w[1] && M256.w[0]<=C256.w[0]))`
528			`{`
529			`CS.w[0]++;`
530			`}*/`
531			`CS.w[0]++;`
532			`}`
533			`//printf("C256=%016I64x %016I64x, CSM=%016I64x %016I64x %016I64x %d\n",C4.w[1],C4.w[0],M256.w[1],M256.w[0], CS.w[0], exact);`
534			`// RU?`
535			`if (((rnd_mode) != ROUNDING_UP) \|\| exact) {`
536			`if (CS.w[0])`
537			`CS.w[0]--;`
538			`}`
539
540			`}`
541			`#endif`
542			`#endif`
543			`//printf("C256=%016I64x %016I64x, CSM=%016I64x %016I64x %016I64x %d\n",C4.w[1],C4.w[0],M256.w[1],M256.w[0], CS.w[0], exact);`
544
545			`res = get_BID64 (0, exponent_q, CS.w[0], rnd_mode, pfpsf);`
546			`#ifdef UNCHANGED_BINARY_STATUS_FLAGS`
547			`(void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);`
548			`#endif`
549			`BID_RETURN (res);`
550
551
552			`}`