URL https://opencores.org/ocsvn/or1k_soc_on_altera_embedded_dev_kit/or1k_soc_on_altera_embedded_dev_kit/trunk

Subversion Repositories or1k_soc_on_altera_embedded_dev_kit

[/] [or1k_soc_on_altera_embedded_dev_kit/] [trunk/] [linux-2.6/] [linux-2.6.24/] [arch/] [powerpc/] [math-emu/] [op-2.h] - Blame information for rev 3

Details | Compare with Previous | View Log


/*
 * Basic two-word fraction declaration and manipulation.
 */
 
#define _FP_FRAC_DECL_2(X)      _FP_W_TYPE X##_f0, X##_f1
#define _FP_FRAC_COPY_2(D,S)    (D##_f0 = S##_f0, D##_f1 = S##_f1)
#define _FP_FRAC_SET_2(X,I)     __FP_FRAC_SET_2(X, I)
#define _FP_FRAC_HIGH_2(X)      (X##_f1)
#define _FP_FRAC_LOW_2(X)       (X##_f0)
#define _FP_FRAC_WORD_2(X,w)    (X##_f##w)
 
#define _FP_FRAC_SLL_2(X,N)                                             \
  do {                                                                  \
    if ((N) < _FP_W_TYPE_SIZE)                                          \
      {                                                                 \
        if (__builtin_constant_p(N) && (N) == 1)                        \
          {                                                             \
            X##_f1 = X##_f1 + X##_f1 + (((_FP_WS_TYPE)(X##_f0)) < 0);   \
            X##_f0 += X##_f0;                                           \
          }                                                             \
        else                                                            \
          {                                                             \
            X##_f1 = X##_f1 << (N) | X##_f0 >> (_FP_W_TYPE_SIZE - (N)); \
            X##_f0 <<= (N);                                             \
          }                                                             \
      }                                                                 \
    else                                                                \
      {                                                                 \
        X##_f1 = X##_f0 << ((N) - _FP_W_TYPE_SIZE);                     \
        X##_f0 = 0;                                                     \
      }                                                                 \
  } while (0)
 
#define _FP_FRAC_SRL_2(X,N)                                             \
  do {                                                                  \
    if ((N) < _FP_W_TYPE_SIZE)                                          \
      {                                                                 \
        X##_f0 = X##_f0 >> (N) | X##_f1 << (_FP_W_TYPE_SIZE - (N));     \
        X##_f1 >>= (N);                                                 \
      }                                                                 \
    else                                                                \
      {                                                                 \
        X##_f0 = X##_f1 >> ((N) - _FP_W_TYPE_SIZE);                     \
        X##_f1 = 0;                                                     \
      }                                                                 \
  } while (0)
 
/* Right shift with sticky-lsb.  */
#define _FP_FRAC_SRS_2(X,N,sz)                                          \
  do {                                                                  \
    if ((N) < _FP_W_TYPE_SIZE)                                          \
      {                                                                 \
        X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N) |   \
                  (__builtin_constant_p(N) && (N) == 1                  \
                   ? X##_f0 & 1                                         \
                   : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0));        \
        X##_f1 >>= (N);                                                 \
      }                                                                 \
    else                                                                \
      {                                                                 \
        X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE) |                   \
                  (((X##_f1 << (2 * _FP_W_TYPE_SIZE - (N))) |           \
                   X##_f0) != 0));                                      \
        X##_f1 = 0;                                                     \
      }                                                                 \
  } while (0)
 
#define _FP_FRAC_ADDI_2(X,I) \
  __FP_FRAC_ADDI_2(X##_f1, X##_f0, I)
 
#define _FP_FRAC_ADD_2(R,X,Y) \
  __FP_FRAC_ADD_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
 
#define _FP_FRAC_SUB_2(R,X,Y) \
  __FP_FRAC_SUB_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
 
#define _FP_FRAC_CLZ_2(R,X)     \
  do {                          \
    if (X##_f1)                 \
      __FP_CLZ(R,X##_f1);       \
    else                        \
    {                           \
      __FP_CLZ(R,X##_f0);       \
      R += _FP_W_TYPE_SIZE;     \
    }                           \
  } while(0)
 
/* Predicates */
#define _FP_FRAC_NEGP_2(X)      ((_FP_WS_TYPE)X##_f1 < 0)
#define _FP_FRAC_ZEROP_2(X)     ((X##_f1 | X##_f0) == 0)
#define _FP_FRAC_OVERP_2(fs,X)  (X##_f1 & _FP_OVERFLOW_##fs)
#define _FP_FRAC_EQ_2(X, Y)     (X##_f1 == Y##_f1 && X##_f0 == Y##_f0)
#define _FP_FRAC_GT_2(X, Y)     \
  ((X##_f1 > Y##_f1) || (X##_f1 == Y##_f1 && X##_f0 > Y##_f0))
#define _FP_FRAC_GE_2(X, Y)     \
  ((X##_f1 > Y##_f1) || (X##_f1 == Y##_f1 && X##_f0 >= Y##_f0))
 
#define _FP_ZEROFRAC_2          0, 0
#define _FP_MINFRAC_2           0, 1
 
/*
 * Internals
 */
 
#define __FP_FRAC_SET_2(X,I1,I0)        (X##_f0 = I0, X##_f1 = I1)
 
#define __FP_CLZ_2(R, xh, xl)   \
  do {                          \
    if (xh)                     \
      __FP_CLZ(R,xl);           \
    else                        \
    {                           \
      __FP_CLZ(R,xl);           \
      R += _FP_W_TYPE_SIZE;     \
    }                           \
  } while(0)
 
#if 0
 
#ifndef __FP_FRAC_ADDI_2
#define __FP_FRAC_ADDI_2(xh, xl, i) \
  (xh += ((xl += i) < i))
#endif
#ifndef __FP_FRAC_ADD_2
#define __FP_FRAC_ADD_2(rh, rl, xh, xl, yh, yl) \
  (rh = xh + yh + ((rl = xl + yl) < xl))
#endif
#ifndef __FP_FRAC_SUB_2
#define __FP_FRAC_SUB_2(rh, rl, xh, xl, yh, yl) \
  (rh = xh - yh - ((rl = xl - yl) > xl))
#endif
 
#else
 
#undef __FP_FRAC_ADDI_2
#define __FP_FRAC_ADDI_2(xh, xl, i)     add_ssaaaa(xh, xl, xh, xl, 0, i)
#undef __FP_FRAC_ADD_2
#define __FP_FRAC_ADD_2                 add_ssaaaa
#undef __FP_FRAC_SUB_2
#define __FP_FRAC_SUB_2                 sub_ddmmss
 
#endif
 
/*
 * Unpack the raw bits of a native fp value.  Do not classify or
 * normalize the data.
 */
 
#define _FP_UNPACK_RAW_2(fs, X, val)                    \
  do {                                                  \
    union _FP_UNION_##fs _flo; _flo.flt = (val);        \
                                                        \
    X##_f0 = _flo.bits.frac0;                           \
    X##_f1 = _flo.bits.frac1;                           \
    X##_e  = _flo.bits.exp;                             \
    X##_s  = _flo.bits.sign;                            \
  } while (0)
 
 
/*
 * Repack the raw bits of a native fp value.
 */
 
#define _FP_PACK_RAW_2(fs, val, X)                      \
  do {                                                  \
    union _FP_UNION_##fs _flo;                          \
                                                        \
    _flo.bits.frac0 = X##_f0;                           \
    _flo.bits.frac1 = X##_f1;                           \
    _flo.bits.exp   = X##_e;                            \
    _flo.bits.sign  = X##_s;                            \
                                                        \
    (val) = _flo.flt;                                   \
  } while (0)
 
 
/*
 * Multiplication algorithms:
 */
 
/* Given a 1W * 1W => 2W primitive, do the extended multiplication.  */
 
#define _FP_MUL_MEAT_2_wide(fs, R, X, Y, doit)                          \
  do {                                                                  \
    _FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c);      \
                                                                        \
    doit(_FP_FRAC_WORD_4(_z,1), _FP_FRAC_WORD_4(_z,0), X##_f0, Y##_f0); \
    doit(_b_f1, _b_f0, X##_f0, Y##_f1);                                 \
    doit(_c_f1, _c_f0, X##_f1, Y##_f0);                                 \
    doit(_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2), X##_f1, Y##_f1); \
                                                                        \
    __FP_FRAC_ADD_4(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),        \
                    _FP_FRAC_WORD_4(_z,1),_FP_FRAC_WORD_4(_z,0), \
                    0, _b_f1, _b_f0, 0,                                   \
                    _FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),        \
                    _FP_FRAC_WORD_4(_z,1),_FP_FRAC_WORD_4(_z,0));        \
    __FP_FRAC_ADD_4(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),        \
                    _FP_FRAC_WORD_4(_z,1),_FP_FRAC_WORD_4(_z,0), \
                    0, _c_f1, _c_f0, 0,                                   \
                    _FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),        \
                    _FP_FRAC_WORD_4(_z,1),_FP_FRAC_WORD_4(_z,0));        \
                                                                        \
    /* Normalize since we know where the msb of the multiplicands       \
       were (bit B), we know that the msb of the of the product is      \
       at either 2B or 2B-1.  */                                        \
    _FP_FRAC_SRS_4(_z, _FP_WFRACBITS_##fs-1, 2*_FP_WFRACBITS_##fs);     \
    R##_f0 = _FP_FRAC_WORD_4(_z,0);                                     \
    R##_f1 = _FP_FRAC_WORD_4(_z,1);                                     \
  } while (0)
 
/* This next macro appears to be totally broken. Fortunately nowhere
 * seems to use it :-> The problem is that we define _z[4] but
 * then use it in _FP_FRAC_SRS_4, which will attempt to access
 * _z_f[n] which will cause an error. The fix probably involves
 * declaring it with _FP_FRAC_DECL_4, see previous macro. -- PMM 02/1998
 */
#define _FP_MUL_MEAT_2_gmp(fs, R, X, Y)                                 \
  do {                                                                  \
    _FP_W_TYPE _x[2], _y[2], _z[4];                                     \
    _x[0] = X##_f0; _x[1] = X##_f1;                                      \
    _y[0] = Y##_f0; _y[1] = Y##_f1;                                      \
                                                                        \
    mpn_mul_n(_z, _x, _y, 2);                                           \
                                                                        \
    /* Normalize since we know where the msb of the multiplicands       \
       were (bit B), we know that the msb of the of the product is      \
       at either 2B or 2B-1.  */                                        \
    _FP_FRAC_SRS_4(_z, _FP_WFRACBITS##_fs-1, 2*_FP_WFRACBITS_##fs);     \
    R##_f0 = _z[0];                                                     \
    R##_f1 = _z[1];                                                     \
  } while (0)
 
 
/*
 * Division algorithms:
 * This seems to be giving me difficulties -- PMM
 * Look, NetBSD seems to be able to comment algorithms. Can't you?
 * I've thrown printks at the problem.
 * This now appears to work, but I still don't really know why.
 * Also, I don't think the result is properly normalised...
 */
 
#define _FP_DIV_MEAT_2_udiv_64(fs, R, X, Y)                             \
  do {                                                                  \
    extern void _fp_udivmodti4(_FP_W_TYPE q[2], _FP_W_TYPE r[2],        \
                               _FP_W_TYPE n1, _FP_W_TYPE n0,            \
                               _FP_W_TYPE d1, _FP_W_TYPE d0);           \
    _FP_W_TYPE _n_f3, _n_f2, _n_f1, _n_f0, _r_f1, _r_f0;                \
    _FP_W_TYPE _q_f1, _q_f0, _m_f1, _m_f0;                              \
    _FP_W_TYPE _rmem[2], _qmem[2];                                      \
    /* I think this check is to ensure that the result is normalised.   \
     * Assuming X,Y normalised (ie in [1.0,2.0)) X/Y will be in         \
     * [0.5,2.0). Furthermore, it will be less than 1.0 iff X < Y.      \
     * In this case we tweak things. (this is based on comments in      \
     * the NetBSD FPU emulation code. )                                 \
     * We know X,Y are normalised because we ensure this as part of     \
     * the unpacking process. -- PMM                                    \
     */                                                                 \
    if (_FP_FRAC_GT_2(X, Y))                                            \
      {                                                                 \
/*      R##_e++; */                                                     \
        _n_f3 = X##_f1 >> 1;                                            \
        _n_f2 = X##_f1 << (_FP_W_TYPE_SIZE - 1) | X##_f0 >> 1;          \
        _n_f1 = X##_f0 << (_FP_W_TYPE_SIZE - 1);                        \
        _n_f0 = 0;                                                       \
      }                                                                 \
    else                                                                \
      {                                                                 \
        R##_e--;                                                        \
        _n_f3 = X##_f1;                                                 \
        _n_f2 = X##_f0;                                                 \
        _n_f1 = _n_f0 = 0;                                               \
      }                                                                 \
                                                                        \
    /* Normalize, i.e. make the most significant bit of the             \
       denominator set.  CHANGED: - 1 to nothing -- PMM */              \
    _FP_FRAC_SLL_2(Y, _FP_WFRACXBITS_##fs /* -1 */);                    \
                                                                        \
    /* Do the 256/128 bit division given the 128-bit _fp_udivmodtf4     \
       primitive snagged from libgcc2.c.  */                            \
                                                                        \
    _fp_udivmodti4(_qmem, _rmem, _n_f3, _n_f2, 0, Y##_f1);               \
    _q_f1 = _qmem[0];                                                    \
    umul_ppmm(_m_f1, _m_f0, _q_f1, Y##_f0);                             \
    _r_f1 = _rmem[0];                                                    \
    _r_f0 = _n_f1;                                                      \
    if (_FP_FRAC_GT_2(_m, _r))                                          \
      {                                                                 \
        _q_f1--;                                                        \
        _FP_FRAC_ADD_2(_r, _r, Y);                                      \
        if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r))              \
          {                                                             \
            _q_f1--;                                                    \
            _FP_FRAC_ADD_2(_r, _r, Y);                                  \
          }                                                             \
      }                                                                 \
    _FP_FRAC_SUB_2(_r, _r, _m);                                         \
                                                                        \
    _fp_udivmodti4(_qmem, _rmem, _r_f1, _r_f0, 0, Y##_f1);               \
    _q_f0 = _qmem[0];                                                    \
    umul_ppmm(_m_f1, _m_f0, _q_f0, Y##_f0);                             \
    _r_f1 = _rmem[0];                                                    \
    _r_f0 = _n_f0;                                                      \
    if (_FP_FRAC_GT_2(_m, _r))                                          \
      {                                                                 \
        _q_f0--;                                                        \
        _FP_FRAC_ADD_2(_r, _r, Y);                                      \
        if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r))              \
          {                                                             \
            _q_f0--;                                                    \
            _FP_FRAC_ADD_2(_r, _r, Y);                                  \
          }                                                             \
      }                                                                 \
    _FP_FRAC_SUB_2(_r, _r, _m);                                         \
                                                                        \
    R##_f1 = _q_f1;                                                     \
    R##_f0 = _q_f0 | ((_r_f1 | _r_f0) != 0);                            \
    /* adjust so answer is normalized again. I'm not sure what the      \
     * final sz param should be. In practice it's never used since      \
     * N is 1 which is always going to be < _FP_W_TYPE_SIZE...          \
     */                                                                 \
    /* _FP_FRAC_SRS_2(R,1,_FP_WFRACBITS_##fs);  */                      \
  } while (0)
 
 
#define _FP_DIV_MEAT_2_gmp(fs, R, X, Y)                                 \
  do {                                                                  \
    _FP_W_TYPE _x[4], _y[2], _z[4];                                     \
    _y[0] = Y##_f0; _y[1] = Y##_f1;                                      \
    _x[0] = _x[3] = 0;                                                    \
    if (_FP_FRAC_GT_2(X, Y))                                            \
      {                                                                 \
        R##_e++;                                                        \
        _x[1] = (X##_f0 << (_FP_WFRACBITS-1 - _FP_W_TYPE_SIZE) |        \
                 X##_f1 >> (_FP_W_TYPE_SIZE -                           \
                            (_FP_WFRACBITS-1 - _FP_W_TYPE_SIZE)));      \
        _x[2] = X##_f1 << (_FP_WFRACBITS-1 - _FP_W_TYPE_SIZE);          \
      }                                                                 \
    else                                                                \
      {                                                                 \
        _x[1] = (X##_f0 << (_FP_WFRACBITS - _FP_W_TYPE_SIZE) |          \
                 X##_f1 >> (_FP_W_TYPE_SIZE -                           \
                            (_FP_WFRACBITS - _FP_W_TYPE_SIZE)));        \
        _x[2] = X##_f1 << (_FP_WFRACBITS - _FP_W_TYPE_SIZE);            \
      }                                                                 \
                                                                        \
    (void) mpn_divrem (_z, 0, _x, 4, _y, 2);                             \
    R##_f1 = _z[1];                                                     \
    R##_f0 = _z[0] | ((_x[0] | _x[1]) != 0);                            \
  } while (0)
 
 
/*
 * Square root algorithms:
 * We have just one right now, maybe Newton approximation
 * should be added for those machines where division is fast.
 */
 
#define _FP_SQRT_MEAT_2(R, S, T, X, q)                  \
  do {                                                  \
    while (q)                                           \
      {                                                 \
        T##_f1 = S##_f1 + q;                            \
        if (T##_f1 <= X##_f1)                           \
          {                                             \
            S##_f1 = T##_f1 + q;                        \
            X##_f1 -= T##_f1;                           \
            R##_f1 += q;                                \
          }                                             \
        _FP_FRAC_SLL_2(X, 1);                           \
        q >>= 1;                                        \
      }                                                 \
    q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1);         \
    while (q)                                           \
      {                                                 \
        T##_f0 = S##_f0 + q;                            \
        T##_f1 = S##_f1;                                \
        if (T##_f1 < X##_f1 ||                          \
            (T##_f1 == X##_f1 && T##_f0 < X##_f0))      \
          {                                             \
            S##_f0 = T##_f0 + q;                        \
            if (((_FP_WS_TYPE)T##_f0) < 0 &&            \
                ((_FP_WS_TYPE)S##_f0) >= 0)             \
              S##_f1++;                                 \
            _FP_FRAC_SUB_2(X, X, T);                    \
            R##_f0 += q;                                \
          }                                             \
        _FP_FRAC_SLL_2(X, 1);                           \
        q >>= 1;                                        \
      }                                                 \
  } while (0)
 
 
/*
 * Assembly/disassembly for converting to/from integral types.
 * No shifting or overflow handled here.
 */
 
#define _FP_FRAC_ASSEMBLE_2(r, X, rsize)        \
  do {                                          \
    if (rsize <= _FP_W_TYPE_SIZE)               \
      r = X##_f0;                               \
    else                                        \
      {                                         \
        r = X##_f1;                             \
        r <<= _FP_W_TYPE_SIZE;                  \
        r += X##_f0;                            \
      }                                         \
  } while (0)
 
#define _FP_FRAC_DISASSEMBLE_2(X, r, rsize)                             \
  do {                                                                  \
    X##_f0 = r;                                                         \
    X##_f1 = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE);     \
  } while (0)
 
/*
 * Convert FP values between word sizes
 */
 
#define _FP_FRAC_CONV_1_2(dfs, sfs, D, S)                               \
  do {                                                                  \
    _FP_FRAC_SRS_2(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs),      \
                   _FP_WFRACBITS_##sfs);                                \
    D##_f = S##_f0;                                                     \
  } while (0)
 
#define _FP_FRAC_CONV_2_1(dfs, sfs, D, S)                               \
  do {                                                                  \
    D##_f0 = S##_f;                                                     \
    D##_f1 = 0;                                                         \
    _FP_FRAC_SLL_2(D, (_FP_WFRACBITS_##dfs - _FP_WFRACBITS_##sfs));     \
  } while (0)
 

Browse

Tools

Subversion Repositories or1k_soc_on_altera_embedded_dev_kit

[/] [or1k_soc_on_altera_embedded_dev_kit/] [trunk/] [linux-2.6/] [linux-2.6.24/] [arch/] [powerpc/] [math-emu/] [op-2.h] - Blame information for rev 3

Line No.	Rev	Author	Line
1	3	xianfeng	`/*`
2			`* Basic two-word fraction declaration and manipulation.`
3			`*/`
4
5			`#define _FP_FRAC_DECL_2(X) _FP_W_TYPE X##_f0, X##_f1`
6			`#define _FP_FRAC_COPY_2(D,S) (D##_f0 = S##_f0, D##_f1 = S##_f1)`
7			`#define _FP_FRAC_SET_2(X,I) __FP_FRAC_SET_2(X, I)`
8			`#define _FP_FRAC_HIGH_2(X) (X##_f1)`
9			`#define _FP_FRAC_LOW_2(X) (X##_f0)`
10			`#define _FP_FRAC_WORD_2(X,w) (X##_f##w)`
11
12			`#define _FP_FRAC_SLL_2(X,N) \`
13			`do { \`
14			`if ((N) < _FP_W_TYPE_SIZE) \`
15			`{ \`
16			`if (__builtin_constant_p(N) && (N) == 1) \`
17			`{ \`
18			`X##_f1 = X##_f1 + X##_f1 + (((_FP_WS_TYPE)(X##_f0)) < 0); \`
19			`X##_f0 += X##_f0; \`
20			`} \`
21			`else \`
22			`{ \`
23			`X##_f1 = X##_f1 << (N) \| X##_f0 >> (_FP_W_TYPE_SIZE - (N)); \`
24			`X##_f0 <<= (N); \`
25			`} \`
26			`} \`
27			`else \`
28			`{ \`
29			`X##_f1 = X##_f0 << ((N) - _FP_W_TYPE_SIZE); \`
30			`X##_f0 = 0; \`
31			`} \`
32			`} while (0)`
33
34			`#define _FP_FRAC_SRL_2(X,N) \`
35			`do { \`
36			`if ((N) < _FP_W_TYPE_SIZE) \`
37			`{ \`
38			`X##_f0 = X##_f0 >> (N) \| X##_f1 << (_FP_W_TYPE_SIZE - (N)); \`
39			`X##_f1 >>= (N); \`
40			`} \`
41			`else \`
42			`{ \`
43			`X##_f0 = X##_f1 >> ((N) - _FP_W_TYPE_SIZE); \`
44			`X##_f1 = 0; \`
45			`} \`
46			`} while (0)`
47
48			`/* Right shift with sticky-lsb. */`
49			`#define _FP_FRAC_SRS_2(X,N,sz) \`
50			`do { \`
51			`if ((N) < _FP_W_TYPE_SIZE) \`
52			`{ \`
53			`X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) \| X##_f0 >> (N) \| \`
54			`(__builtin_constant_p(N) && (N) == 1 \`
55			`? X##_f0 & 1 \`
56			`: (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0)); \`
57			`X##_f1 >>= (N); \`
58			`} \`
59			`else \`
60			`{ \`
61			`X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE) \| \`
62			`(((X##_f1 << (2 * _FP_W_TYPE_SIZE - (N))) \| \`
63			`X##_f0) != 0)); \`
64			`X##_f1 = 0; \`
65			`} \`
66			`} while (0)`
67
68			`#define _FP_FRAC_ADDI_2(X,I) \`
69			`__FP_FRAC_ADDI_2(X##_f1, X##_f0, I)`
70
71			`#define _FP_FRAC_ADD_2(R,X,Y) \`
72			`__FP_FRAC_ADD_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)`
73
74			`#define _FP_FRAC_SUB_2(R,X,Y) \`
75			`__FP_FRAC_SUB_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)`
76
77			`#define _FP_FRAC_CLZ_2(R,X) \`
78			`do { \`
79			`if (X##_f1) \`
80			`__FP_CLZ(R,X##_f1); \`
81			`else \`
82			`{ \`
83			`__FP_CLZ(R,X##_f0); \`
84			`R += _FP_W_TYPE_SIZE; \`
85			`} \`
86			`} while(0)`
87
88			`/* Predicates */`
89			`#define _FP_FRAC_NEGP_2(X) ((_FP_WS_TYPE)X##_f1 < 0)`
90			`#define _FP_FRAC_ZEROP_2(X) ((X##_f1 \| X##_f0) == 0)`
91			`#define _FP_FRAC_OVERP_2(fs,X) (X##_f1 & _FP_OVERFLOW_##fs)`
92			`#define _FP_FRAC_EQ_2(X, Y) (X##_f1 == Y##_f1 && X##_f0 == Y##_f0)`
93			`#define _FP_FRAC_GT_2(X, Y) \`
94			`((X##_f1 > Y##_f1) \|\| (X##_f1 == Y##_f1 && X##_f0 > Y##_f0))`
95			`#define _FP_FRAC_GE_2(X, Y) \`
96			`((X##_f1 > Y##_f1) \|\| (X##_f1 == Y##_f1 && X##_f0 >= Y##_f0))`
97
98			`#define _FP_ZEROFRAC_2 0, 0`
99			`#define _FP_MINFRAC_2 0, 1`
100
101			`/*`
102			`* Internals`
103			`*/`
104
105			`#define __FP_FRAC_SET_2(X,I1,I0) (X##_f0 = I0, X##_f1 = I1)`
106
107			`#define __FP_CLZ_2(R, xh, xl) \`
108			`do { \`
109			`if (xh) \`
110			`__FP_CLZ(R,xl); \`
111			`else \`
112			`{ \`
113			`__FP_CLZ(R,xl); \`
114			`R += _FP_W_TYPE_SIZE; \`
115			`} \`
116			`} while(0)`
117
118			`#if 0`
119
120			`#ifndef __FP_FRAC_ADDI_2`
121			`#define __FP_FRAC_ADDI_2(xh, xl, i) \`
122			`(xh += ((xl += i) < i))`
123			`#endif`
124			`#ifndef __FP_FRAC_ADD_2`
125			`#define __FP_FRAC_ADD_2(rh, rl, xh, xl, yh, yl) \`
126			`(rh = xh + yh + ((rl = xl + yl) < xl))`
127			`#endif`
128			`#ifndef __FP_FRAC_SUB_2`
129			`#define __FP_FRAC_SUB_2(rh, rl, xh, xl, yh, yl) \`
130			`(rh = xh - yh - ((rl = xl - yl) > xl))`
131			`#endif`
132
133			`#else`
134
135			`#undef __FP_FRAC_ADDI_2`
136			`#define __FP_FRAC_ADDI_2(xh, xl, i) add_ssaaaa(xh, xl, xh, xl, 0, i)`
137			`#undef __FP_FRAC_ADD_2`
138			`#define __FP_FRAC_ADD_2 add_ssaaaa`
139			`#undef __FP_FRAC_SUB_2`
140			`#define __FP_FRAC_SUB_2 sub_ddmmss`
141
142			`#endif`
143
144			`/*`
145			`* Unpack the raw bits of a native fp value. Do not classify or`
146			`* normalize the data.`
147			`*/`
148
149			`#define _FP_UNPACK_RAW_2(fs, X, val) \`
150			`do { \`
151			`union _FP_UNION_##fs _flo; _flo.flt = (val); \`
152			`\`
153			`X##_f0 = _flo.bits.frac0; \`
154			`X##_f1 = _flo.bits.frac1; \`
155			`X##_e = _flo.bits.exp; \`
156			`X##_s = _flo.bits.sign; \`
157			`} while (0)`
158
159
160			`/*`
161			`* Repack the raw bits of a native fp value.`
162			`*/`
163
164			`#define _FP_PACK_RAW_2(fs, val, X) \`
165			`do { \`
166			`union _FP_UNION_##fs _flo; \`
167			`\`
168			`_flo.bits.frac0 = X##_f0; \`
169			`_flo.bits.frac1 = X##_f1; \`
170			`_flo.bits.exp = X##_e; \`
171			`_flo.bits.sign = X##_s; \`
172			`\`
173			`(val) = _flo.flt; \`
174			`} while (0)`
175
176
177			`/*`
178			`* Multiplication algorithms:`
179			`*/`
180
181			`/* Given a 1W * 1W => 2W primitive, do the extended multiplication. */`
182
183			`#define _FP_MUL_MEAT_2_wide(fs, R, X, Y, doit) \`
184			`do { \`
185			`_FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c); \`
186			`\`
187			`doit(_FP_FRAC_WORD_4(_z,1), _FP_FRAC_WORD_4(_z,0), X##_f0, Y##_f0); \`
188			`doit(_b_f1, _b_f0, X##_f0, Y##_f1); \`
189			`doit(_c_f1, _c_f0, X##_f1, Y##_f0); \`
190			`doit(_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2), X##_f1, Y##_f1); \`
191			`\`
192			`__FP_FRAC_ADD_4(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \`
193			`_FP_FRAC_WORD_4(_z,1),_FP_FRAC_WORD_4(_z,0), \`
194			`0, _b_f1, _b_f0, 0, \`
195			`_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \`
196			`_FP_FRAC_WORD_4(_z,1),_FP_FRAC_WORD_4(_z,0)); \`
197			`__FP_FRAC_ADD_4(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \`
198			`_FP_FRAC_WORD_4(_z,1),_FP_FRAC_WORD_4(_z,0), \`
199			`0, _c_f1, _c_f0, 0, \`
200			`_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \`
201			`_FP_FRAC_WORD_4(_z,1),_FP_FRAC_WORD_4(_z,0)); \`
202			`\`
203			`/* Normalize since we know where the msb of the multiplicands \`
204			`were (bit B), we know that the msb of the of the product is \`
205			`at either 2B or 2B-1. */ \`
206			`_FP_FRAC_SRS_4(_z, _FP_WFRACBITS_##fs-1, 2*_FP_WFRACBITS_##fs); \`
207			`R##_f0 = _FP_FRAC_WORD_4(_z,0); \`
208			`R##_f1 = _FP_FRAC_WORD_4(_z,1); \`
209			`} while (0)`
210
211			`/* This next macro appears to be totally broken. Fortunately nowhere`
212			`* seems to use it :-> The problem is that we define _z[4] but`
213			`* then use it in _FP_FRAC_SRS_4, which will attempt to access`
214			`* _z_f[n] which will cause an error. The fix probably involves`
215			`* declaring it with _FP_FRAC_DECL_4, see previous macro. -- PMM 02/1998`
216			`*/`
217			`#define _FP_MUL_MEAT_2_gmp(fs, R, X, Y) \`
218			`do { \`
219			`_FP_W_TYPE _x[2], _y[2], _z[4]; \`
220			`_x[0] = X##_f0; _x[1] = X##_f1; \`
221			`_y[0] = Y##_f0; _y[1] = Y##_f1; \`
222			`\`
223			`mpn_mul_n(_z, _x, _y, 2); \`
224			`\`
225			`/* Normalize since we know where the msb of the multiplicands \`
226			`were (bit B), we know that the msb of the of the product is \`
227			`at either 2B or 2B-1. */ \`
228			`_FP_FRAC_SRS_4(_z, _FP_WFRACBITS##_fs-1, 2*_FP_WFRACBITS_##fs); \`
229			`R##_f0 = _z[0]; \`
230			`R##_f1 = _z[1]; \`
231			`} while (0)`
232
233
234			`/*`
235			`* Division algorithms:`
236			`* This seems to be giving me difficulties -- PMM`
237			`* Look, NetBSD seems to be able to comment algorithms. Can't you?`
238			`* I've thrown printks at the problem.`
239			`* This now appears to work, but I still don't really know why.`
240			`* Also, I don't think the result is properly normalised...`
241			`*/`
242
243			`#define _FP_DIV_MEAT_2_udiv_64(fs, R, X, Y) \`
244			`do { \`
245			`extern void _fp_udivmodti4(_FP_W_TYPE q[2], _FP_W_TYPE r[2], \`
246			`_FP_W_TYPE n1, _FP_W_TYPE n0, \`
247			`_FP_W_TYPE d1, _FP_W_TYPE d0); \`
248			`_FP_W_TYPE _n_f3, _n_f2, _n_f1, _n_f0, _r_f1, _r_f0; \`
249			`_FP_W_TYPE _q_f1, _q_f0, _m_f1, _m_f0; \`
250			`_FP_W_TYPE _rmem[2], _qmem[2]; \`
251			`/* I think this check is to ensure that the result is normalised. \`
252			`* Assuming X,Y normalised (ie in [1.0,2.0)) X/Y will be in \`
253			`* [0.5,2.0). Furthermore, it will be less than 1.0 iff X < Y. \`
254			`* In this case we tweak things. (this is based on comments in \`
255			`* the NetBSD FPU emulation code. ) \`
256			`* We know X,Y are normalised because we ensure this as part of \`
257			`* the unpacking process. -- PMM \`
258			`*/ \`
259			`if (_FP_FRAC_GT_2(X, Y)) \`
260			`{ \`
261			`/* R##_e++; */ \`
262			`_n_f3 = X##_f1 >> 1; \`
263			`_n_f2 = X##_f1 << (_FP_W_TYPE_SIZE - 1) \| X##_f0 >> 1; \`
264			`_n_f1 = X##_f0 << (_FP_W_TYPE_SIZE - 1); \`
265			`_n_f0 = 0; \`
266			`} \`
267			`else \`
268			`{ \`
269			`R##_e--; \`
270			`_n_f3 = X##_f1; \`
271			`_n_f2 = X##_f0; \`
272			`_n_f1 = _n_f0 = 0; \`
273			`} \`
274			`\`
275			`/* Normalize, i.e. make the most significant bit of the \`
276			`denominator set. CHANGED: - 1 to nothing -- PMM */ \`
277			`_FP_FRAC_SLL_2(Y, _FP_WFRACXBITS_##fs /* -1 */); \`
278			`\`
279			`/* Do the 256/128 bit division given the 128-bit _fp_udivmodtf4 \`
280			`primitive snagged from libgcc2.c. */ \`
281			`\`
282			`_fp_udivmodti4(_qmem, _rmem, _n_f3, _n_f2, 0, Y##_f1); \`
283			`_q_f1 = _qmem[0]; \`
284			`umul_ppmm(_m_f1, _m_f0, _q_f1, Y##_f0); \`
285			`_r_f1 = _rmem[0]; \`
286			`_r_f0 = _n_f1; \`
287			`if (_FP_FRAC_GT_2(_m, _r)) \`
288			`{ \`
289			`_q_f1--; \`
290			`_FP_FRAC_ADD_2(_r, _r, Y); \`
291			`if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r)) \`
292			`{ \`
293			`_q_f1--; \`
294			`_FP_FRAC_ADD_2(_r, _r, Y); \`
295			`} \`
296			`} \`
297			`_FP_FRAC_SUB_2(_r, _r, _m); \`
298			`\`
299			`_fp_udivmodti4(_qmem, _rmem, _r_f1, _r_f0, 0, Y##_f1); \`
300			`_q_f0 = _qmem[0]; \`
301			`umul_ppmm(_m_f1, _m_f0, _q_f0, Y##_f0); \`
302			`_r_f1 = _rmem[0]; \`
303			`_r_f0 = _n_f0; \`
304			`if (_FP_FRAC_GT_2(_m, _r)) \`
305			`{ \`
306			`_q_f0--; \`
307			`_FP_FRAC_ADD_2(_r, _r, Y); \`
308			`if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r)) \`
309			`{ \`
310			`_q_f0--; \`
311			`_FP_FRAC_ADD_2(_r, _r, Y); \`
312			`} \`
313			`} \`
314			`_FP_FRAC_SUB_2(_r, _r, _m); \`
315			`\`
316			`R##_f1 = _q_f1; \`
317			`R##_f0 = _q_f0 \| ((_r_f1 \| _r_f0) != 0); \`
318			`/* adjust so answer is normalized again. I'm not sure what the \`
319			`* final sz param should be. In practice it's never used since \`
320			`* N is 1 which is always going to be < _FP_W_TYPE_SIZE... \`
321			`*/ \`
322			`/* _FP_FRAC_SRS_2(R,1,_FP_WFRACBITS_##fs); */ \`
323			`} while (0)`
324
325
326			`#define _FP_DIV_MEAT_2_gmp(fs, R, X, Y) \`
327			`do { \`
328			`_FP_W_TYPE _x[4], _y[2], _z[4]; \`
329			`_y[0] = Y##_f0; _y[1] = Y##_f1; \`
330			`_x[0] = _x[3] = 0; \`
331			`if (_FP_FRAC_GT_2(X, Y)) \`
332			`{ \`
333			`R##_e++; \`
334			`_x[1] = (X##_f0 << (_FP_WFRACBITS-1 - _FP_W_TYPE_SIZE) \| \`
335			`X##_f1 >> (_FP_W_TYPE_SIZE - \`
336			`(_FP_WFRACBITS-1 - _FP_W_TYPE_SIZE))); \`
337			`_x[2] = X##_f1 << (_FP_WFRACBITS-1 - _FP_W_TYPE_SIZE); \`
338			`} \`
339			`else \`
340			`{ \`
341			`_x[1] = (X##_f0 << (_FP_WFRACBITS - _FP_W_TYPE_SIZE) \| \`
342			`X##_f1 >> (_FP_W_TYPE_SIZE - \`
343			`(_FP_WFRACBITS - _FP_W_TYPE_SIZE))); \`
344			`_x[2] = X##_f1 << (_FP_WFRACBITS - _FP_W_TYPE_SIZE); \`
345			`} \`
346			`\`
347			`(void) mpn_divrem (_z, 0, _x, 4, _y, 2); \`
348			`R##_f1 = _z[1]; \`
349			`R##_f0 = _z[0] \| ((_x[0] \| _x[1]) != 0); \`
350			`} while (0)`
351
352
353			`/*`
354			`* Square root algorithms:`
355			`* We have just one right now, maybe Newton approximation`
356			`* should be added for those machines where division is fast.`
357			`*/`
358
359			`#define _FP_SQRT_MEAT_2(R, S, T, X, q) \`
360			`do { \`
361			`while (q) \`
362			`{ \`
363			`T##_f1 = S##_f1 + q; \`
364			`if (T##_f1 <= X##_f1) \`
365			`{ \`
366			`S##_f1 = T##_f1 + q; \`
367			`X##_f1 -= T##_f1; \`
368			`R##_f1 += q; \`
369			`} \`
370			`_FP_FRAC_SLL_2(X, 1); \`
371			`q >>= 1; \`
372			`} \`
373			`q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1); \`
374			`while (q) \`
375			`{ \`
376			`T##_f0 = S##_f0 + q; \`
377			`T##_f1 = S##_f1; \`
378			`if (T##_f1 < X##_f1 \|\| \`
379			`(T##_f1 == X##_f1 && T##_f0 < X##_f0)) \`
380			`{ \`
381			`S##_f0 = T##_f0 + q; \`
382			`if (((_FP_WS_TYPE)T##_f0) < 0 && \`
383			`((_FP_WS_TYPE)S##_f0) >= 0) \`
384			`S##_f1++; \`
385			`_FP_FRAC_SUB_2(X, X, T); \`
386			`R##_f0 += q; \`
387			`} \`
388			`_FP_FRAC_SLL_2(X, 1); \`
389			`q >>= 1; \`
390			`} \`
391			`} while (0)`
392
393
394			`/*`
395			`* Assembly/disassembly for converting to/from integral types.`
396			`* No shifting or overflow handled here.`
397			`*/`
398
399			`#define _FP_FRAC_ASSEMBLE_2(r, X, rsize) \`
400			`do { \`
401			`if (rsize <= _FP_W_TYPE_SIZE) \`
402			`r = X##_f0; \`
403			`else \`
404			`{ \`
405			`r = X##_f1; \`
406			`r <<= _FP_W_TYPE_SIZE; \`
407			`r += X##_f0; \`
408			`} \`
409			`} while (0)`
410
411			`#define _FP_FRAC_DISASSEMBLE_2(X, r, rsize) \`
412			`do { \`
413			`X##_f0 = r; \`
414			`X##_f1 = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE); \`
415			`} while (0)`
416
417			`/*`
418			`* Convert FP values between word sizes`
419			`*/`
420
421			`#define _FP_FRAC_CONV_1_2(dfs, sfs, D, S) \`
422			`do { \`
423			`_FP_FRAC_SRS_2(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs), \`
424			`_FP_WFRACBITS_##sfs); \`
425			`D##_f = S##_f0; \`
426			`} while (0)`
427
428			`#define _FP_FRAC_CONV_2_1(dfs, sfs, D, S) \`
429			`do { \`
430			`D##_f0 = S##_f; \`
431			`D##_f1 = 0; \`
432			`_FP_FRAC_SLL_2(D, (_FP_WFRACBITS_##dfs - _FP_WFRACBITS_##sfs)); \`
433			`} while (0)`
434