OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

[/] [or1k/] [trunk/] [linux/] [linux-2.4/] [include/] [math-emu/] [op-2.h] - Blame information for rev 1765

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 1275 phoenix
/* Software floating-point emulation.
2
   Basic two-word fraction declaration and manipulation.
3
   Copyright (C) 1997,1998,1999 Free Software Foundation, Inc.
4
   This file is part of the GNU C Library.
5
   Contributed by Richard Henderson (rth@cygnus.com),
6
                  Jakub Jelinek (jj@ultra.linux.cz),
7
                  David S. Miller (davem@redhat.com) and
8
                  Peter Maydell (pmaydell@chiark.greenend.org.uk).
9
 
10
   The GNU C Library is free software; you can redistribute it and/or
11
   modify it under the terms of the GNU Library General Public License as
12
   published by the Free Software Foundation; either version 2 of the
13
   License, or (at your option) any later version.
14
 
15
   The GNU C Library is distributed in the hope that it will be useful,
16
   but WITHOUT ANY WARRANTY; without even the implied warranty of
17
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18
   Library General Public License for more details.
19
 
20
   You should have received a copy of the GNU Library General Public
21
   License along with the GNU C Library; see the file COPYING.LIB.  If
22
   not, write to the Free Software Foundation, Inc.,
23
   59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
24
 
25
#ifndef __MATH_EMU_OP_2_H__
26
#define __MATH_EMU_OP_2_H__
27
 
28
#define _FP_FRAC_DECL_2(X)      _FP_W_TYPE X##_f0, X##_f1
29
#define _FP_FRAC_COPY_2(D,S)    (D##_f0 = S##_f0, D##_f1 = S##_f1)
30
#define _FP_FRAC_SET_2(X,I)     __FP_FRAC_SET_2(X, I)
31
#define _FP_FRAC_HIGH_2(X)      (X##_f1)
32
#define _FP_FRAC_LOW_2(X)       (X##_f0)
33
#define _FP_FRAC_WORD_2(X,w)    (X##_f##w)
34
 
35
#define _FP_FRAC_SLL_2(X,N)                                             \
36
  do {                                                                  \
37
    if ((N) < _FP_W_TYPE_SIZE)                                          \
38
      {                                                                 \
39
        if (__builtin_constant_p(N) && (N) == 1)                        \
40
          {                                                             \
41
            X##_f1 = X##_f1 + X##_f1 + (((_FP_WS_TYPE)(X##_f0)) < 0);   \
42
            X##_f0 += X##_f0;                                           \
43
          }                                                             \
44
        else                                                            \
45
          {                                                             \
46
            X##_f1 = X##_f1 << (N) | X##_f0 >> (_FP_W_TYPE_SIZE - (N)); \
47
            X##_f0 <<= (N);                                             \
48
          }                                                             \
49
      }                                                                 \
50
    else                                                                \
51
      {                                                                 \
52
        X##_f1 = X##_f0 << ((N) - _FP_W_TYPE_SIZE);                     \
53
        X##_f0 = 0;                                                     \
54
      }                                                                 \
55
  } while (0)
56
 
57
#define _FP_FRAC_SRL_2(X,N)                                             \
58
  do {                                                                  \
59
    if ((N) < _FP_W_TYPE_SIZE)                                          \
60
      {                                                                 \
61
        X##_f0 = X##_f0 >> (N) | X##_f1 << (_FP_W_TYPE_SIZE - (N));     \
62
        X##_f1 >>= (N);                                                 \
63
      }                                                                 \
64
    else                                                                \
65
      {                                                                 \
66
        X##_f0 = X##_f1 >> ((N) - _FP_W_TYPE_SIZE);                     \
67
        X##_f1 = 0;                                                     \
68
      }                                                                 \
69
  } while (0)
70
 
71
/* Right shift with sticky-lsb.  */
72
#define _FP_FRAC_SRS_2(X,N,sz)                                          \
73
  do {                                                                  \
74
    if ((N) < _FP_W_TYPE_SIZE)                                          \
75
      {                                                                 \
76
        X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N) |   \
77
                  (__builtin_constant_p(N) && (N) == 1                  \
78
                   ? X##_f0 & 1                                         \
79
                   : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0));        \
80
        X##_f1 >>= (N);                                                 \
81
      }                                                                 \
82
    else                                                                \
83
      {                                                                 \
84
        X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE) |                   \
85
                (((X##_f1 << (2*_FP_W_TYPE_SIZE - (N))) | X##_f0) != 0)); \
86
        X##_f1 = 0;                                                     \
87
      }                                                                 \
88
  } while (0)
89
 
90
#define _FP_FRAC_ADDI_2(X,I)    \
91
  __FP_FRAC_ADDI_2(X##_f1, X##_f0, I)
92
 
93
#define _FP_FRAC_ADD_2(R,X,Y)   \
94
  __FP_FRAC_ADD_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
95
 
96
#define _FP_FRAC_SUB_2(R,X,Y)   \
97
  __FP_FRAC_SUB_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
98
 
99
#define _FP_FRAC_DEC_2(X,Y)     \
100
  __FP_FRAC_DEC_2(X##_f1, X##_f0, Y##_f1, Y##_f0)
101
 
102
#define _FP_FRAC_CLZ_2(R,X)     \
103
  do {                          \
104
    if (X##_f1)                 \
105
      __FP_CLZ(R,X##_f1);       \
106
    else                        \
107
    {                           \
108
      __FP_CLZ(R,X##_f0);       \
109
      R += _FP_W_TYPE_SIZE;     \
110
    }                           \
111
  } while(0)
112
 
113
/* Predicates */
114
#define _FP_FRAC_NEGP_2(X)      ((_FP_WS_TYPE)X##_f1 < 0)
115
#define _FP_FRAC_ZEROP_2(X)     ((X##_f1 | X##_f0) == 0)
116
#define _FP_FRAC_OVERP_2(fs,X)  (_FP_FRAC_HIGH_##fs(X) & _FP_OVERFLOW_##fs)
117
#define _FP_FRAC_CLEAR_OVERP_2(fs,X)    (_FP_FRAC_HIGH_##fs(X) &= ~_FP_OVERFLOW_##fs)
118
#define _FP_FRAC_EQ_2(X, Y)     (X##_f1 == Y##_f1 && X##_f0 == Y##_f0)
119
#define _FP_FRAC_GT_2(X, Y)     \
120
  (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 > Y##_f0))
121
#define _FP_FRAC_GE_2(X, Y)     \
122
  (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 >= Y##_f0))
123
 
124
#define _FP_ZEROFRAC_2          0, 0
125
#define _FP_MINFRAC_2           0, 1
126
#define _FP_MAXFRAC_2           (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0)
127
 
128
/*
129
 * Internals
130
 */
131
 
132
#define __FP_FRAC_SET_2(X,I1,I0)        (X##_f0 = I0, X##_f1 = I1)
133
 
134
#define __FP_CLZ_2(R, xh, xl)   \
135
  do {                          \
136
    if (xh)                     \
137
      __FP_CLZ(R,xh);           \
138
    else                        \
139
    {                           \
140
      __FP_CLZ(R,xl);           \
141
      R += _FP_W_TYPE_SIZE;     \
142
    }                           \
143
  } while(0)
144
 
145
#if 0
146
 
147
#ifndef __FP_FRAC_ADDI_2
148
#define __FP_FRAC_ADDI_2(xh, xl, i)     \
149
  (xh += ((xl += i) < i))
150
#endif
151
#ifndef __FP_FRAC_ADD_2
152
#define __FP_FRAC_ADD_2(rh, rl, xh, xl, yh, yl) \
153
  (rh = xh + yh + ((rl = xl + yl) < xl))
154
#endif
155
#ifndef __FP_FRAC_SUB_2
156
#define __FP_FRAC_SUB_2(rh, rl, xh, xl, yh, yl) \
157
  (rh = xh - yh - ((rl = xl - yl) > xl))
158
#endif
159
#ifndef __FP_FRAC_DEC_2
160
#define __FP_FRAC_DEC_2(xh, xl, yh, yl) \
161
  do {                                  \
162
    UWtype _t = xl;                     \
163
    xh -= yh + ((xl -= yl) > _t);       \
164
  } while (0)
165
#endif
166
 
167
#else
168
 
169
#undef __FP_FRAC_ADDI_2
170
#define __FP_FRAC_ADDI_2(xh, xl, i)     add_ssaaaa(xh, xl, xh, xl, 0, i)
171
#undef __FP_FRAC_ADD_2
172
#define __FP_FRAC_ADD_2                 add_ssaaaa
173
#undef __FP_FRAC_SUB_2
174
#define __FP_FRAC_SUB_2                 sub_ddmmss
175
#undef __FP_FRAC_DEC_2
176
#define __FP_FRAC_DEC_2(xh, xl, yh, yl) sub_ddmmss(xh, xl, xh, xl, yh, yl)
177
 
178
#endif
179
 
180
/*
181
 * Unpack the raw bits of a native fp value.  Do not classify or
182
 * normalize the data.
183
 */
184
 
185
#define _FP_UNPACK_RAW_2(fs, X, val)                    \
186
  do {                                                  \
187
    union _FP_UNION_##fs _flo; _flo.flt = (val);        \
188
                                                        \
189
    X##_f0 = _flo.bits.frac0;                           \
190
    X##_f1 = _flo.bits.frac1;                           \
191
    X##_e  = _flo.bits.exp;                             \
192
    X##_s  = _flo.bits.sign;                            \
193
  } while (0)
194
 
195
#define _FP_UNPACK_RAW_2_P(fs, X, val)                  \
196
  do {                                                  \
197
    union _FP_UNION_##fs *_flo =                        \
198
      (union _FP_UNION_##fs *)(val);                    \
199
                                                        \
200
    X##_f0 = _flo->bits.frac0;                          \
201
    X##_f1 = _flo->bits.frac1;                          \
202
    X##_e  = _flo->bits.exp;                            \
203
    X##_s  = _flo->bits.sign;                           \
204
  } while (0)
205
 
206
 
207
/*
208
 * Repack the raw bits of a native fp value.
209
 */
210
 
211
#define _FP_PACK_RAW_2(fs, val, X)                      \
212
  do {                                                  \
213
    union _FP_UNION_##fs _flo;                          \
214
                                                        \
215
    _flo.bits.frac0 = X##_f0;                           \
216
    _flo.bits.frac1 = X##_f1;                           \
217
    _flo.bits.exp   = X##_e;                            \
218
    _flo.bits.sign  = X##_s;                            \
219
                                                        \
220
    (val) = _flo.flt;                                   \
221
  } while (0)
222
 
223
#define _FP_PACK_RAW_2_P(fs, val, X)                    \
224
  do {                                                  \
225
    union _FP_UNION_##fs *_flo =                        \
226
      (union _FP_UNION_##fs *)(val);                    \
227
                                                        \
228
    _flo->bits.frac0 = X##_f0;                          \
229
    _flo->bits.frac1 = X##_f1;                          \
230
    _flo->bits.exp   = X##_e;                           \
231
    _flo->bits.sign  = X##_s;                           \
232
  } while (0)
233
 
234
 
235
/*
236
 * Multiplication algorithms:
237
 */
238
 
239
/* Given a 1W * 1W => 2W primitive, do the extended multiplication.  */
240
 
241
#define _FP_MUL_MEAT_2_wide(wfracbits, R, X, Y, doit)                   \
242
  do {                                                                  \
243
    _FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c);      \
244
                                                                        \
245
    doit(_FP_FRAC_WORD_4(_z,1), _FP_FRAC_WORD_4(_z,0), X##_f0, Y##_f0);  \
246
    doit(_b_f1, _b_f0, X##_f0, Y##_f1);                                 \
247
    doit(_c_f1, _c_f0, X##_f1, Y##_f0);                                 \
248
    doit(_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2), X##_f1, Y##_f1); \
249
                                                                        \
250
    __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),        \
251
                    _FP_FRAC_WORD_4(_z,1), 0, _b_f1, _b_f0,              \
252
                    _FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),        \
253
                    _FP_FRAC_WORD_4(_z,1));                             \
254
    __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),        \
255
                    _FP_FRAC_WORD_4(_z,1), 0, _c_f1, _c_f0,              \
256
                    _FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),        \
257
                    _FP_FRAC_WORD_4(_z,1));                             \
258
                                                                        \
259
    /* Normalize since we know where the msb of the multiplicands       \
260
       were (bit B), we know that the msb of the of the product is      \
261
       at either 2B or 2B-1.  */                                        \
262
    _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits);                       \
263
    R##_f0 = _FP_FRAC_WORD_4(_z,0);                                     \
264
    R##_f1 = _FP_FRAC_WORD_4(_z,1);                                     \
265
  } while (0)
266
 
267
/* Given a 1W * 1W => 2W primitive, do the extended multiplication.
268
   Do only 3 multiplications instead of four. This one is for machines
269
   where multiplication is much more expensive than subtraction.  */
270
 
271
#define _FP_MUL_MEAT_2_wide_3mul(wfracbits, R, X, Y, doit)              \
272
  do {                                                                  \
273
    _FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c);      \
274
    _FP_W_TYPE _d;                                                      \
275
    int _c1, _c2;                                                       \
276
                                                                        \
277
    _b_f0 = X##_f0 + X##_f1;                                            \
278
    _c1 = _b_f0 < X##_f0;                                               \
279
    _b_f1 = Y##_f0 + Y##_f1;                                            \
280
    _c2 = _b_f1 < Y##_f0;                                               \
281
    doit(_d, _FP_FRAC_WORD_4(_z,0), X##_f0, Y##_f0);                     \
282
    doit(_FP_FRAC_WORD_4(_z,2), _FP_FRAC_WORD_4(_z,1), _b_f0, _b_f1);   \
283
    doit(_c_f1, _c_f0, X##_f1, Y##_f1);                                 \
284
                                                                        \
285
    _b_f0 &= -_c2;                                                      \
286
    _b_f1 &= -_c1;                                                      \
287
    __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),        \
288
                    _FP_FRAC_WORD_4(_z,1), (_c1 & _c2), 0, _d,           \
289
                    0, _FP_FRAC_WORD_4(_z,2), _FP_FRAC_WORD_4(_z,1));    \
290
    __FP_FRAC_ADDI_2(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),       \
291
                     _b_f0);                                            \
292
    __FP_FRAC_ADDI_2(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),       \
293
                     _b_f1);                                            \
294
    __FP_FRAC_DEC_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),        \
295
                    _FP_FRAC_WORD_4(_z,1),                              \
296
                    0, _d, _FP_FRAC_WORD_4(_z,0));                        \
297
    __FP_FRAC_DEC_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),        \
298
                    _FP_FRAC_WORD_4(_z,1), 0, _c_f1, _c_f0);             \
299
    __FP_FRAC_ADD_2(_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2),       \
300
                    _c_f1, _c_f0,                                       \
301
                    _FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2));      \
302
                                                                        \
303
    /* Normalize since we know where the msb of the multiplicands       \
304
       were (bit B), we know that the msb of the of the product is      \
305
       at either 2B or 2B-1.  */                                        \
306
    _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits);                       \
307
    R##_f0 = _FP_FRAC_WORD_4(_z,0);                                     \
308
    R##_f1 = _FP_FRAC_WORD_4(_z,1);                                     \
309
  } while (0)
310
 
311
#define _FP_MUL_MEAT_2_gmp(wfracbits, R, X, Y)                          \
312
  do {                                                                  \
313
    _FP_FRAC_DECL_4(_z);                                                \
314
    _FP_W_TYPE _x[2], _y[2];                                            \
315
    _x[0] = X##_f0; _x[1] = X##_f1;                                      \
316
    _y[0] = Y##_f0; _y[1] = Y##_f1;                                      \
317
                                                                        \
318
    mpn_mul_n(_z_f, _x, _y, 2);                                         \
319
                                                                        \
320
    /* Normalize since we know where the msb of the multiplicands       \
321
       were (bit B), we know that the msb of the of the product is      \
322
       at either 2B or 2B-1.  */                                        \
323
    _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits);                       \
324
    R##_f0 = _z_f[0];                                                   \
325
    R##_f1 = _z_f[1];                                                   \
326
  } while (0)
327
 
328
/* Do at most 120x120=240 bits multiplication using double floating
329
   point multiplication.  This is useful if floating point
330
   multiplication has much bigger throughput than integer multiply.
331
   It is supposed to work for _FP_W_TYPE_SIZE 64 and wfracbits
332
   between 106 and 120 only.
333
   Caller guarantees that X and Y has (1LLL << (wfracbits - 1)) set.
334
   SETFETZ is a macro which will disable all FPU exceptions and set rounding
335
   towards zero,  RESETFE should optionally reset it back.  */
336
 
337
#define _FP_MUL_MEAT_2_120_240_double(wfracbits, R, X, Y, setfetz, resetfe)     \
338
  do {                                                                          \
339
    static const double _const[] = {                                            \
340
      /* 2^-24 */ 5.9604644775390625e-08,                                       \
341
      /* 2^-48 */ 3.5527136788005009e-15,                                       \
342
      /* 2^-72 */ 2.1175823681357508e-22,                                       \
343
      /* 2^-96 */ 1.2621774483536189e-29,                                       \
344
      /* 2^28 */ 2.68435456e+08,                                                \
345
      /* 2^4 */ 1.600000e+01,                                                   \
346
      /* 2^-20 */ 9.5367431640625e-07,                                          \
347
      /* 2^-44 */ 5.6843418860808015e-14,                                       \
348
      /* 2^-68 */ 3.3881317890172014e-21,                                       \
349
      /* 2^-92 */ 2.0194839173657902e-28,                                       \
350
      /* 2^-116 */ 1.2037062152420224e-35};                                     \
351
    double _a240, _b240, _c240, _d240, _e240, _f240,                            \
352
           _g240, _h240, _i240, _j240, _k240;                                   \
353
    union { double d; UDItype i; } _l240, _m240, _n240, _o240,                  \
354
                                   _p240, _q240, _r240, _s240;                  \
355
    UDItype _t240, _u240, _v240, _w240, _x240, _y240 = 0;                        \
356
                                                                                \
357
    if (wfracbits < 106 || wfracbits > 120)                                     \
358
      abort();                                                                  \
359
                                                                                \
360
    setfetz;                                                                    \
361
                                                                                \
362
    _e240 = (double)(long)(X##_f0 & 0xffffff);                                  \
363
    _j240 = (double)(long)(Y##_f0 & 0xffffff);                                  \
364
    _d240 = (double)(long)((X##_f0 >> 24) & 0xffffff);                          \
365
    _i240 = (double)(long)((Y##_f0 >> 24) & 0xffffff);                          \
366
    _c240 = (double)(long)(((X##_f1 << 16) & 0xffffff) | (X##_f0 >> 48));       \
367
    _h240 = (double)(long)(((Y##_f1 << 16) & 0xffffff) | (Y##_f0 >> 48));       \
368
    _b240 = (double)(long)((X##_f1 >> 8) & 0xffffff);                           \
369
    _g240 = (double)(long)((Y##_f1 >> 8) & 0xffffff);                           \
370
    _a240 = (double)(long)(X##_f1 >> 32);                                       \
371
    _f240 = (double)(long)(Y##_f1 >> 32);                                       \
372
    _e240 *= _const[3];                                                         \
373
    _j240 *= _const[3];                                                         \
374
    _d240 *= _const[2];                                                         \
375
    _i240 *= _const[2];                                                         \
376
    _c240 *= _const[1];                                                         \
377
    _h240 *= _const[1];                                                         \
378
    _b240 *= _const[0];                                                          \
379
    _g240 *= _const[0];                                                          \
380
    _s240.d =                                                         _e240*_j240;\
381
    _r240.d =                                           _d240*_j240 + _e240*_i240;\
382
    _q240.d =                             _c240*_j240 + _d240*_i240 + _e240*_h240;\
383
    _p240.d =               _b240*_j240 + _c240*_i240 + _d240*_h240 + _e240*_g240;\
384
    _o240.d = _a240*_j240 + _b240*_i240 + _c240*_h240 + _d240*_g240 + _e240*_f240;\
385
    _n240.d = _a240*_i240 + _b240*_h240 + _c240*_g240 + _d240*_f240;            \
386
    _m240.d = _a240*_h240 + _b240*_g240 + _c240*_f240;                          \
387
    _l240.d = _a240*_g240 + _b240*_f240;                                        \
388
    _k240 =   _a240*_f240;                                                      \
389
    _r240.d += _s240.d;                                                         \
390
    _q240.d += _r240.d;                                                         \
391
    _p240.d += _q240.d;                                                         \
392
    _o240.d += _p240.d;                                                         \
393
    _n240.d += _o240.d;                                                         \
394
    _m240.d += _n240.d;                                                         \
395
    _l240.d += _m240.d;                                                         \
396
    _k240 += _l240.d;                                                           \
397
    _s240.d -= ((_const[10]+_s240.d)-_const[10]);                               \
398
    _r240.d -= ((_const[9]+_r240.d)-_const[9]);                                 \
399
    _q240.d -= ((_const[8]+_q240.d)-_const[8]);                                 \
400
    _p240.d -= ((_const[7]+_p240.d)-_const[7]);                                 \
401
    _o240.d += _const[7];                                                       \
402
    _n240.d += _const[6];                                                       \
403
    _m240.d += _const[5];                                                       \
404
    _l240.d += _const[4];                                                       \
405
    if (_s240.d != 0.0) _y240 = 1;                                              \
406
    if (_r240.d != 0.0) _y240 = 1;                                              \
407
    if (_q240.d != 0.0) _y240 = 1;                                              \
408
    if (_p240.d != 0.0) _y240 = 1;                                              \
409
    _t240 = (DItype)_k240;                                                      \
410
    _u240 = _l240.i;                                                            \
411
    _v240 = _m240.i;                                                            \
412
    _w240 = _n240.i;                                                            \
413
    _x240 = _o240.i;                                                            \
414
    R##_f1 = (_t240 << (128 - (wfracbits - 1)))                                 \
415
             | ((_u240 & 0xffffff) >> ((wfracbits - 1) - 104));                 \
416
    R##_f0 = ((_u240 & 0xffffff) << (168 - (wfracbits - 1)))                    \
417
             | ((_v240 & 0xffffff) << (144 - (wfracbits - 1)))                  \
418
             | ((_w240 & 0xffffff) << (120 - (wfracbits - 1)))                  \
419
             | ((_x240 & 0xffffff) >> ((wfracbits - 1) - 96))                   \
420
             | _y240;                                                           \
421
    resetfe;                                                                    \
422
  } while (0)
423
 
424
/*
425
 * Division algorithms:
426
 */
427
 
428
#define _FP_DIV_MEAT_2_udiv(fs, R, X, Y)                                \
429
  do {                                                                  \
430
    _FP_W_TYPE _n_f2, _n_f1, _n_f0, _r_f1, _r_f0, _m_f1, _m_f0;         \
431
    if (_FP_FRAC_GT_2(X, Y))                                            \
432
      {                                                                 \
433
        _n_f2 = X##_f1 >> 1;                                            \
434
        _n_f1 = X##_f1 << (_FP_W_TYPE_SIZE - 1) | X##_f0 >> 1;          \
435
        _n_f0 = X##_f0 << (_FP_W_TYPE_SIZE - 1);                        \
436
      }                                                                 \
437
    else                                                                \
438
      {                                                                 \
439
        R##_e--;                                                        \
440
        _n_f2 = X##_f1;                                                 \
441
        _n_f1 = X##_f0;                                                 \
442
        _n_f0 = 0;                                                       \
443
      }                                                                 \
444
                                                                        \
445
    /* Normalize, i.e. make the most significant bit of the             \
446
       denominator set. */                                              \
447
    _FP_FRAC_SLL_2(Y, _FP_WFRACXBITS_##fs);                             \
448
                                                                        \
449
    udiv_qrnnd(R##_f1, _r_f1, _n_f2, _n_f1, Y##_f1);                    \
450
    umul_ppmm(_m_f1, _m_f0, R##_f1, Y##_f0);                            \
451
    _r_f0 = _n_f0;                                                      \
452
    if (_FP_FRAC_GT_2(_m, _r))                                          \
453
      {                                                                 \
454
        R##_f1--;                                                       \
455
        _FP_FRAC_ADD_2(_r, Y, _r);                                      \
456
        if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r))              \
457
          {                                                             \
458
            R##_f1--;                                                   \
459
            _FP_FRAC_ADD_2(_r, Y, _r);                                  \
460
          }                                                             \
461
      }                                                                 \
462
    _FP_FRAC_DEC_2(_r, _m);                                             \
463
                                                                        \
464
    if (_r_f1 == Y##_f1)                                                \
465
      {                                                                 \
466
        /* This is a special case, not an optimization                  \
467
           (_r/Y##_f1 would not fit into UWtype).                       \
468
           As _r is guaranteed to be < Y,  R##_f0 can be either         \
469
           (UWtype)-1 or (UWtype)-2.  But as we know what kind          \
470
           of bits it is (sticky, guard, round),  we don't care.        \
471
           We also don't care what the reminder is,  because the        \
472
           guard bit will be set anyway.  -jj */                        \
473
        R##_f0 = -1;                                                    \
474
      }                                                                 \
475
    else                                                                \
476
      {                                                                 \
477
        udiv_qrnnd(R##_f0, _r_f1, _r_f1, _r_f0, Y##_f1);                \
478
        umul_ppmm(_m_f1, _m_f0, R##_f0, Y##_f0);                        \
479
        _r_f0 = 0;                                                       \
480
        if (_FP_FRAC_GT_2(_m, _r))                                      \
481
          {                                                             \
482
            R##_f0--;                                                   \
483
            _FP_FRAC_ADD_2(_r, Y, _r);                                  \
484
            if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r))          \
485
              {                                                         \
486
                R##_f0--;                                               \
487
                _FP_FRAC_ADD_2(_r, Y, _r);                              \
488
              }                                                         \
489
          }                                                             \
490
        if (!_FP_FRAC_EQ_2(_r, _m))                                     \
491
          R##_f0 |= _FP_WORK_STICKY;                                    \
492
      }                                                                 \
493
  } while (0)
494
 
495
 
496
#define _FP_DIV_MEAT_2_gmp(fs, R, X, Y)                                 \
497
  do {                                                                  \
498
    _FP_W_TYPE _x[4], _y[2], _z[4];                                     \
499
    _y[0] = Y##_f0; _y[1] = Y##_f1;                                      \
500
    _x[0] = _x[3] = 0;                                                    \
501
    if (_FP_FRAC_GT_2(X, Y))                                            \
502
      {                                                                 \
503
        R##_e++;                                                        \
504
        _x[1] = (X##_f0 << (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE) |   \
505
                 X##_f1 >> (_FP_W_TYPE_SIZE -                           \
506
                            (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE))); \
507
        _x[2] = X##_f1 << (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE);     \
508
      }                                                                 \
509
    else                                                                \
510
      {                                                                 \
511
        _x[1] = (X##_f0 << (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE) |     \
512
                 X##_f1 >> (_FP_W_TYPE_SIZE -                           \
513
                            (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE)));   \
514
        _x[2] = X##_f1 << (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE);       \
515
      }                                                                 \
516
                                                                        \
517
    (void) mpn_divrem (_z, 0, _x, 4, _y, 2);                             \
518
    R##_f1 = _z[1];                                                     \
519
    R##_f0 = _z[0] | ((_x[0] | _x[1]) != 0);                            \
520
  } while (0)
521
 
522
 
523
/*
524
 * Square root algorithms:
525
 * We have just one right now, maybe Newton approximation
526
 * should be added for those machines where division is fast.
527
 */
528
 
529
#define _FP_SQRT_MEAT_2(R, S, T, X, q)                  \
530
  do {                                                  \
531
    while (q)                                           \
532
      {                                                 \
533
        T##_f1 = S##_f1 + q;                            \
534
        if (T##_f1 <= X##_f1)                           \
535
          {                                             \
536
            S##_f1 = T##_f1 + q;                        \
537
            X##_f1 -= T##_f1;                           \
538
            R##_f1 += q;                                \
539
          }                                             \
540
        _FP_FRAC_SLL_2(X, 1);                           \
541
        q >>= 1;                                        \
542
      }                                                 \
543
    q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1);         \
544
    while (q != _FP_WORK_ROUND)                         \
545
      {                                                 \
546
        T##_f0 = S##_f0 + q;                            \
547
        T##_f1 = S##_f1;                                \
548
        if (T##_f1 < X##_f1 ||                          \
549
            (T##_f1 == X##_f1 && T##_f0 <= X##_f0))     \
550
          {                                             \
551
            S##_f0 = T##_f0 + q;                        \
552
            S##_f1 += (T##_f0 > S##_f0);                \
553
            _FP_FRAC_DEC_2(X, T);                       \
554
            R##_f0 += q;                                \
555
          }                                             \
556
        _FP_FRAC_SLL_2(X, 1);                           \
557
        q >>= 1;                                        \
558
      }                                                 \
559
    if (X##_f0 | X##_f1)                                \
560
      {                                                 \
561
        if (S##_f1 < X##_f1 ||                          \
562
            (S##_f1 == X##_f1 && S##_f0 < X##_f0))      \
563
          R##_f0 |= _FP_WORK_ROUND;                     \
564
        R##_f0 |= _FP_WORK_STICKY;                      \
565
      }                                                 \
566
  } while (0)
567
 
568
 
569
/*
570
 * Assembly/disassembly for converting to/from integral types.
571
 * No shifting or overflow handled here.
572
 */
573
 
574
#define _FP_FRAC_ASSEMBLE_2(r, X, rsize)        \
575
  do {                                          \
576
    if (rsize <= _FP_W_TYPE_SIZE)               \
577
      r = X##_f0;                               \
578
    else                                        \
579
      {                                         \
580
        r = X##_f1;                             \
581
        r <<= _FP_W_TYPE_SIZE;                  \
582
        r += X##_f0;                            \
583
      }                                         \
584
  } while (0)
585
 
586
#define _FP_FRAC_DISASSEMBLE_2(X, r, rsize)                             \
587
  do {                                                                  \
588
    X##_f0 = r;                                                         \
589
    X##_f1 = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE);     \
590
  } while (0)
591
 
592
/*
593
 * Convert FP values between word sizes
594
 */
595
 
596
#define _FP_FRAC_CONV_1_2(dfs, sfs, D, S)                               \
597
  do {                                                                  \
598
    if (S##_c != FP_CLS_NAN)                                            \
599
      _FP_FRAC_SRS_2(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs),    \
600
                     _FP_WFRACBITS_##sfs);                              \
601
    else                                                                \
602
      _FP_FRAC_SRL_2(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs));   \
603
    D##_f = S##_f0;                                                     \
604
  } while (0)
605
 
606
#define _FP_FRAC_CONV_2_1(dfs, sfs, D, S)                               \
607
  do {                                                                  \
608
    D##_f0 = S##_f;                                                     \
609
    D##_f1 = 0;                                                         \
610
    _FP_FRAC_SLL_2(D, (_FP_WFRACBITS_##dfs - _FP_WFRACBITS_##sfs));     \
611
  } while (0)
612
 
613
#endif

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.