OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libgcc/] [soft-fp/] [op-2.h] - Blame information for rev 734

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 734 jeremybenn
/* Software floating-point emulation.
2
   Basic two-word fraction declaration and manipulation.
3
   Copyright (C) 1997,1998,1999,2006,2007 Free Software Foundation, Inc.
4
   This file is part of the GNU C Library.
5
   Contributed by Richard Henderson (rth@cygnus.com),
6
                  Jakub Jelinek (jj@ultra.linux.cz),
7
                  David S. Miller (davem@redhat.com) and
8
                  Peter Maydell (pmaydell@chiark.greenend.org.uk).
9
 
10
   The GNU C Library is free software; you can redistribute it and/or
11
   modify it under the terms of the GNU Lesser General Public
12
   License as published by the Free Software Foundation; either
13
   version 2.1 of the License, or (at your option) any later version.
14
 
15
   In addition to the permissions in the GNU Lesser General Public
16
   License, the Free Software Foundation gives you unlimited
17
   permission to link the compiled version of this file into
18
   combinations with other programs, and to distribute those
19
   combinations without any restriction coming from the use of this
20
   file.  (The Lesser General Public License restrictions do apply in
21
   other respects; for example, they cover modification of the file,
22
   and distribution when not linked into a combine executable.)
23
 
24
   The GNU C Library is distributed in the hope that it will be useful,
25
   but WITHOUT ANY WARRANTY; without even the implied warranty of
26
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
27
   Lesser General Public License for more details.
28
 
29
   You should have received a copy of the GNU Lesser General Public
30
   License along with the GNU C Library; if not, write to the Free
31
   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
32
   MA 02110-1301, USA.  */
33
 
34
#define _FP_FRAC_DECL_2(X)      _FP_W_TYPE X##_f0, X##_f1
35
#define _FP_FRAC_COPY_2(D,S)    (D##_f0 = S##_f0, D##_f1 = S##_f1)
36
#define _FP_FRAC_SET_2(X,I)     __FP_FRAC_SET_2(X, I)
37
#define _FP_FRAC_HIGH_2(X)      (X##_f1)
38
#define _FP_FRAC_LOW_2(X)       (X##_f0)
39
#define _FP_FRAC_WORD_2(X,w)    (X##_f##w)
40
 
41
#define _FP_FRAC_SLL_2(X,N)                                                 \
42
(void)(((N) < _FP_W_TYPE_SIZE)                                              \
43
       ? ({                                                                 \
44
            if (__builtin_constant_p(N) && (N) == 1)                        \
45
              {                                                             \
46
                X##_f1 = X##_f1 + X##_f1 + (((_FP_WS_TYPE)(X##_f0)) < 0);   \
47
                X##_f0 += X##_f0;                                           \
48
              }                                                             \
49
            else                                                            \
50
              {                                                             \
51
                X##_f1 = X##_f1 << (N) | X##_f0 >> (_FP_W_TYPE_SIZE - (N)); \
52
                X##_f0 <<= (N);                                             \
53
              }                                                             \
54
            0;                                                               \
55
          })                                                                \
56
       : ({                                                                 \
57
            X##_f1 = X##_f0 << ((N) - _FP_W_TYPE_SIZE);                     \
58
            X##_f0 = 0;                                                     \
59
          }))
60
 
61
 
62
#define _FP_FRAC_SRL_2(X,N)                                             \
63
(void)(((N) < _FP_W_TYPE_SIZE)                                          \
64
       ? ({                                                             \
65
            X##_f0 = X##_f0 >> (N) | X##_f1 << (_FP_W_TYPE_SIZE - (N)); \
66
            X##_f1 >>= (N);                                             \
67
          })                                                            \
68
       : ({                                                             \
69
            X##_f0 = X##_f1 >> ((N) - _FP_W_TYPE_SIZE);                 \
70
            X##_f1 = 0;                                                 \
71
          }))
72
 
73
/* Right shift with sticky-lsb.  */
74
#define _FP_FRAC_SRST_2(X,S, N,sz)                                        \
75
(void)(((N) < _FP_W_TYPE_SIZE)                                            \
76
       ? ({                                                               \
77
            S = (__builtin_constant_p(N) && (N) == 1                      \
78
                 ? X##_f0 & 1                                             \
79
                 : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0);             \
80
            X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N)); \
81
            X##_f1 >>= (N);                                               \
82
          })                                                              \
83
       : ({                                                               \
84
            S = ((((N) == _FP_W_TYPE_SIZE                                 \
85
                   ? 0                                                     \
86
                   : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N))))               \
87
                  | X##_f0) != 0);                                        \
88
            X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE));                 \
89
            X##_f1 = 0;                                                   \
90
          }))
91
 
92
#define _FP_FRAC_SRS_2(X,N,sz)                                            \
93
(void)(((N) < _FP_W_TYPE_SIZE)                                            \
94
       ? ({                                                               \
95
            X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N) | \
96
                      (__builtin_constant_p(N) && (N) == 1                \
97
                       ? X##_f0 & 1                                       \
98
                       : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0));      \
99
            X##_f1 >>= (N);                                               \
100
          })                                                              \
101
       : ({                                                               \
102
            X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE) |                 \
103
                      ((((N) == _FP_W_TYPE_SIZE                           \
104
                         ? 0                                               \
105
                         : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N))))         \
106
                        | X##_f0) != 0));                                 \
107
            X##_f1 = 0;                                                   \
108
          }))
109
 
110
#define _FP_FRAC_ADDI_2(X,I)    \
111
  __FP_FRAC_ADDI_2(X##_f1, X##_f0, I)
112
 
113
#define _FP_FRAC_ADD_2(R,X,Y)   \
114
  __FP_FRAC_ADD_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
115
 
116
#define _FP_FRAC_SUB_2(R,X,Y)   \
117
  __FP_FRAC_SUB_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
118
 
119
#define _FP_FRAC_DEC_2(X,Y)     \
120
  __FP_FRAC_DEC_2(X##_f1, X##_f0, Y##_f1, Y##_f0)
121
 
122
#define _FP_FRAC_CLZ_2(R,X)     \
123
  do {                          \
124
    if (X##_f1)                 \
125
      __FP_CLZ(R,X##_f1);       \
126
    else                        \
127
    {                           \
128
      __FP_CLZ(R,X##_f0);       \
129
      R += _FP_W_TYPE_SIZE;     \
130
    }                           \
131
  } while(0)
132
 
133
/* Predicates */
134
#define _FP_FRAC_NEGP_2(X)      ((_FP_WS_TYPE)X##_f1 < 0)
135
#define _FP_FRAC_ZEROP_2(X)     ((X##_f1 | X##_f0) == 0)
136
#define _FP_FRAC_OVERP_2(fs,X)  (_FP_FRAC_HIGH_##fs(X) & _FP_OVERFLOW_##fs)
137
#define _FP_FRAC_CLEAR_OVERP_2(fs,X)    (_FP_FRAC_HIGH_##fs(X) &= ~_FP_OVERFLOW_##fs)
138
#define _FP_FRAC_EQ_2(X, Y)     (X##_f1 == Y##_f1 && X##_f0 == Y##_f0)
139
#define _FP_FRAC_GT_2(X, Y)     \
140
  (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 > Y##_f0))
141
#define _FP_FRAC_GE_2(X, Y)     \
142
  (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 >= Y##_f0))
143
 
144
#define _FP_ZEROFRAC_2          0, 0
145
#define _FP_MINFRAC_2           0, 1
146
#define _FP_MAXFRAC_2           (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0)
147
 
148
/*
149
 * Internals
150
 */
151
 
152
#define __FP_FRAC_SET_2(X,I1,I0)        (X##_f0 = I0, X##_f1 = I1)
153
 
154
#define __FP_CLZ_2(R, xh, xl)   \
155
  do {                          \
156
    if (xh)                     \
157
      __FP_CLZ(R,xh);           \
158
    else                        \
159
    {                           \
160
      __FP_CLZ(R,xl);           \
161
      R += _FP_W_TYPE_SIZE;     \
162
    }                           \
163
  } while(0)
164
 
165
#if 0
166
 
167
#ifndef __FP_FRAC_ADDI_2
168
#define __FP_FRAC_ADDI_2(xh, xl, i)     \
169
  (xh += ((xl += i) < i))
170
#endif
171
#ifndef __FP_FRAC_ADD_2
172
#define __FP_FRAC_ADD_2(rh, rl, xh, xl, yh, yl) \
173
  (rh = xh + yh + ((rl = xl + yl) < xl))
174
#endif
175
#ifndef __FP_FRAC_SUB_2
176
#define __FP_FRAC_SUB_2(rh, rl, xh, xl, yh, yl) \
177
  (rh = xh - yh - ((rl = xl - yl) > xl))
178
#endif
179
#ifndef __FP_FRAC_DEC_2
180
#define __FP_FRAC_DEC_2(xh, xl, yh, yl) \
181
  do {                                  \
182
    UWtype _t = xl;                     \
183
    xh -= yh + ((xl -= yl) > _t);       \
184
  } while (0)
185
#endif
186
 
187
#else
188
 
189
#undef __FP_FRAC_ADDI_2
190
#define __FP_FRAC_ADDI_2(xh, xl, i)     add_ssaaaa(xh, xl, xh, xl, 0, i)
191
#undef __FP_FRAC_ADD_2
192
#define __FP_FRAC_ADD_2                 add_ssaaaa
193
#undef __FP_FRAC_SUB_2
194
#define __FP_FRAC_SUB_2                 sub_ddmmss
195
#undef __FP_FRAC_DEC_2
196
#define __FP_FRAC_DEC_2(xh, xl, yh, yl) sub_ddmmss(xh, xl, xh, xl, yh, yl)
197
 
198
#endif
199
 
200
/*
201
 * Unpack the raw bits of a native fp value.  Do not classify or
202
 * normalize the data.
203
 */
204
 
205
#define _FP_UNPACK_RAW_2(fs, X, val)                    \
206
  do {                                                  \
207
    union _FP_UNION_##fs _flo; _flo.flt = (val);        \
208
                                                        \
209
    X##_f0 = _flo.bits.frac0;                           \
210
    X##_f1 = _flo.bits.frac1;                           \
211
    X##_e  = _flo.bits.exp;                             \
212
    X##_s  = _flo.bits.sign;                            \
213
  } while (0)
214
 
215
#define _FP_UNPACK_RAW_2_P(fs, X, val)                  \
216
  do {                                                  \
217
    union _FP_UNION_##fs *_flo =                        \
218
      (union _FP_UNION_##fs *)(val);                    \
219
                                                        \
220
    X##_f0 = _flo->bits.frac0;                          \
221
    X##_f1 = _flo->bits.frac1;                          \
222
    X##_e  = _flo->bits.exp;                            \
223
    X##_s  = _flo->bits.sign;                           \
224
  } while (0)
225
 
226
 
227
/*
228
 * Repack the raw bits of a native fp value.
229
 */
230
 
231
#define _FP_PACK_RAW_2(fs, val, X)                      \
232
  do {                                                  \
233
    union _FP_UNION_##fs _flo;                          \
234
                                                        \
235
    _flo.bits.frac0 = X##_f0;                           \
236
    _flo.bits.frac1 = X##_f1;                           \
237
    _flo.bits.exp   = X##_e;                            \
238
    _flo.bits.sign  = X##_s;                            \
239
                                                        \
240
    (val) = _flo.flt;                                   \
241
  } while (0)
242
 
243
#define _FP_PACK_RAW_2_P(fs, val, X)                    \
244
  do {                                                  \
245
    union _FP_UNION_##fs *_flo =                        \
246
      (union _FP_UNION_##fs *)(val);                    \
247
                                                        \
248
    _flo->bits.frac0 = X##_f0;                          \
249
    _flo->bits.frac1 = X##_f1;                          \
250
    _flo->bits.exp   = X##_e;                           \
251
    _flo->bits.sign  = X##_s;                           \
252
  } while (0)
253
 
254
 
255
/*
256
 * Multiplication algorithms:
257
 */
258
 
259
/* Given a 1W * 1W => 2W primitive, do the extended multiplication.  */
260
 
261
#define _FP_MUL_MEAT_2_wide(wfracbits, R, X, Y, doit)                   \
262
  do {                                                                  \
263
    _FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c);      \
264
                                                                        \
265
    doit(_FP_FRAC_WORD_4(_z,1), _FP_FRAC_WORD_4(_z,0), X##_f0, Y##_f0);  \
266
    doit(_b_f1, _b_f0, X##_f0, Y##_f1);                                 \
267
    doit(_c_f1, _c_f0, X##_f1, Y##_f0);                                 \
268
    doit(_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2), X##_f1, Y##_f1); \
269
                                                                        \
270
    __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),        \
271
                    _FP_FRAC_WORD_4(_z,1), 0, _b_f1, _b_f0,              \
272
                    _FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),        \
273
                    _FP_FRAC_WORD_4(_z,1));                             \
274
    __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),        \
275
                    _FP_FRAC_WORD_4(_z,1), 0, _c_f1, _c_f0,              \
276
                    _FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),        \
277
                    _FP_FRAC_WORD_4(_z,1));                             \
278
                                                                        \
279
    /* Normalize since we know where the msb of the multiplicands       \
280
       were (bit B), we know that the msb of the of the product is      \
281
       at either 2B or 2B-1.  */                                        \
282
    _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits);                       \
283
    R##_f0 = _FP_FRAC_WORD_4(_z,0);                                     \
284
    R##_f1 = _FP_FRAC_WORD_4(_z,1);                                     \
285
  } while (0)
286
 
287
/* Given a 1W * 1W => 2W primitive, do the extended multiplication.
288
   Do only 3 multiplications instead of four. This one is for machines
289
   where multiplication is much more expensive than subtraction.  */
290
 
291
#define _FP_MUL_MEAT_2_wide_3mul(wfracbits, R, X, Y, doit)              \
292
  do {                                                                  \
293
    _FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c);      \
294
    _FP_W_TYPE _d;                                                      \
295
    int _c1, _c2;                                                       \
296
                                                                        \
297
    _b_f0 = X##_f0 + X##_f1;                                            \
298
    _c1 = _b_f0 < X##_f0;                                               \
299
    _b_f1 = Y##_f0 + Y##_f1;                                            \
300
    _c2 = _b_f1 < Y##_f0;                                               \
301
    doit(_d, _FP_FRAC_WORD_4(_z,0), X##_f0, Y##_f0);                     \
302
    doit(_FP_FRAC_WORD_4(_z,2), _FP_FRAC_WORD_4(_z,1), _b_f0, _b_f1);   \
303
    doit(_c_f1, _c_f0, X##_f1, Y##_f1);                                 \
304
                                                                        \
305
    _b_f0 &= -_c2;                                                      \
306
    _b_f1 &= -_c1;                                                      \
307
    __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),        \
308
                    _FP_FRAC_WORD_4(_z,1), (_c1 & _c2), 0, _d,           \
309
                    0, _FP_FRAC_WORD_4(_z,2), _FP_FRAC_WORD_4(_z,1));    \
310
    __FP_FRAC_ADDI_2(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),       \
311
                     _b_f0);                                            \
312
    __FP_FRAC_ADDI_2(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),       \
313
                     _b_f1);                                            \
314
    __FP_FRAC_DEC_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),        \
315
                    _FP_FRAC_WORD_4(_z,1),                              \
316
                    0, _d, _FP_FRAC_WORD_4(_z,0));                        \
317
    __FP_FRAC_DEC_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),        \
318
                    _FP_FRAC_WORD_4(_z,1), 0, _c_f1, _c_f0);             \
319
    __FP_FRAC_ADD_2(_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2),       \
320
                    _c_f1, _c_f0,                                       \
321
                    _FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2));      \
322
                                                                        \
323
    /* Normalize since we know where the msb of the multiplicands       \
324
       were (bit B), we know that the msb of the of the product is      \
325
       at either 2B or 2B-1.  */                                        \
326
    _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits);                       \
327
    R##_f0 = _FP_FRAC_WORD_4(_z,0);                                     \
328
    R##_f1 = _FP_FRAC_WORD_4(_z,1);                                     \
329
  } while (0)
330
 
331
#define _FP_MUL_MEAT_2_gmp(wfracbits, R, X, Y)                          \
332
  do {                                                                  \
333
    _FP_FRAC_DECL_4(_z);                                                \
334
    _FP_W_TYPE _x[2], _y[2];                                            \
335
    _x[0] = X##_f0; _x[1] = X##_f1;                                      \
336
    _y[0] = Y##_f0; _y[1] = Y##_f1;                                      \
337
                                                                        \
338
    mpn_mul_n(_z_f, _x, _y, 2);                                         \
339
                                                                        \
340
    /* Normalize since we know where the msb of the multiplicands       \
341
       were (bit B), we know that the msb of the of the product is      \
342
       at either 2B or 2B-1.  */                                        \
343
    _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits);                       \
344
    R##_f0 = _z_f[0];                                                   \
345
    R##_f1 = _z_f[1];                                                   \
346
  } while (0)
347
 
348
/* Do at most 120x120=240 bits multiplication using double floating
349
   point multiplication.  This is useful if floating point
350
   multiplication has much bigger throughput than integer multiply.
351
   It is supposed to work for _FP_W_TYPE_SIZE 64 and wfracbits
352
   between 106 and 120 only.
353
   Caller guarantees that X and Y has (1LLL << (wfracbits - 1)) set.
354
   SETFETZ is a macro which will disable all FPU exceptions and set rounding
355
   towards zero,  RESETFE should optionally reset it back.  */
356
 
357
#define _FP_MUL_MEAT_2_120_240_double(wfracbits, R, X, Y, setfetz, resetfe)     \
358
  do {                                                                          \
359
    static const double _const[] = {                                            \
360
      /* 2^-24 */ 5.9604644775390625e-08,                                       \
361
      /* 2^-48 */ 3.5527136788005009e-15,                                       \
362
      /* 2^-72 */ 2.1175823681357508e-22,                                       \
363
      /* 2^-96 */ 1.2621774483536189e-29,                                       \
364
      /* 2^28 */ 2.68435456e+08,                                                \
365
      /* 2^4 */ 1.600000e+01,                                                   \
366
      /* 2^-20 */ 9.5367431640625e-07,                                          \
367
      /* 2^-44 */ 5.6843418860808015e-14,                                       \
368
      /* 2^-68 */ 3.3881317890172014e-21,                                       \
369
      /* 2^-92 */ 2.0194839173657902e-28,                                       \
370
      /* 2^-116 */ 1.2037062152420224e-35};                                     \
371
    double _a240, _b240, _c240, _d240, _e240, _f240,                            \
372
           _g240, _h240, _i240, _j240, _k240;                                   \
373
    union { double d; UDItype i; } _l240, _m240, _n240, _o240,                  \
374
                                   _p240, _q240, _r240, _s240;                  \
375
    UDItype _t240, _u240, _v240, _w240, _x240, _y240 = 0;                        \
376
                                                                                \
377
    if (wfracbits < 106 || wfracbits > 120)                                     \
378
      abort();                                                                  \
379
                                                                                \
380
    setfetz;                                                                    \
381
                                                                                \
382
    _e240 = (double)(long)(X##_f0 & 0xffffff);                                  \
383
    _j240 = (double)(long)(Y##_f0 & 0xffffff);                                  \
384
    _d240 = (double)(long)((X##_f0 >> 24) & 0xffffff);                          \
385
    _i240 = (double)(long)((Y##_f0 >> 24) & 0xffffff);                          \
386
    _c240 = (double)(long)(((X##_f1 << 16) & 0xffffff) | (X##_f0 >> 48));       \
387
    _h240 = (double)(long)(((Y##_f1 << 16) & 0xffffff) | (Y##_f0 >> 48));       \
388
    _b240 = (double)(long)((X##_f1 >> 8) & 0xffffff);                           \
389
    _g240 = (double)(long)((Y##_f1 >> 8) & 0xffffff);                           \
390
    _a240 = (double)(long)(X##_f1 >> 32);                                       \
391
    _f240 = (double)(long)(Y##_f1 >> 32);                                       \
392
    _e240 *= _const[3];                                                         \
393
    _j240 *= _const[3];                                                         \
394
    _d240 *= _const[2];                                                         \
395
    _i240 *= _const[2];                                                         \
396
    _c240 *= _const[1];                                                         \
397
    _h240 *= _const[1];                                                         \
398
    _b240 *= _const[0];                                                          \
399
    _g240 *= _const[0];                                                          \
400
    _s240.d =                                                         _e240*_j240;\
401
    _r240.d =                                           _d240*_j240 + _e240*_i240;\
402
    _q240.d =                             _c240*_j240 + _d240*_i240 + _e240*_h240;\
403
    _p240.d =               _b240*_j240 + _c240*_i240 + _d240*_h240 + _e240*_g240;\
404
    _o240.d = _a240*_j240 + _b240*_i240 + _c240*_h240 + _d240*_g240 + _e240*_f240;\
405
    _n240.d = _a240*_i240 + _b240*_h240 + _c240*_g240 + _d240*_f240;            \
406
    _m240.d = _a240*_h240 + _b240*_g240 + _c240*_f240;                          \
407
    _l240.d = _a240*_g240 + _b240*_f240;                                        \
408
    _k240 =   _a240*_f240;                                                      \
409
    _r240.d += _s240.d;                                                         \
410
    _q240.d += _r240.d;                                                         \
411
    _p240.d += _q240.d;                                                         \
412
    _o240.d += _p240.d;                                                         \
413
    _n240.d += _o240.d;                                                         \
414
    _m240.d += _n240.d;                                                         \
415
    _l240.d += _m240.d;                                                         \
416
    _k240 += _l240.d;                                                           \
417
    _s240.d -= ((_const[10]+_s240.d)-_const[10]);                               \
418
    _r240.d -= ((_const[9]+_r240.d)-_const[9]);                                 \
419
    _q240.d -= ((_const[8]+_q240.d)-_const[8]);                                 \
420
    _p240.d -= ((_const[7]+_p240.d)-_const[7]);                                 \
421
    _o240.d += _const[7];                                                       \
422
    _n240.d += _const[6];                                                       \
423
    _m240.d += _const[5];                                                       \
424
    _l240.d += _const[4];                                                       \
425
    if (_s240.d != 0.0) _y240 = 1;                                              \
426
    if (_r240.d != 0.0) _y240 = 1;                                              \
427
    if (_q240.d != 0.0) _y240 = 1;                                              \
428
    if (_p240.d != 0.0) _y240 = 1;                                              \
429
    _t240 = (DItype)_k240;                                                      \
430
    _u240 = _l240.i;                                                            \
431
    _v240 = _m240.i;                                                            \
432
    _w240 = _n240.i;                                                            \
433
    _x240 = _o240.i;                                                            \
434
    R##_f1 = (_t240 << (128 - (wfracbits - 1)))                                 \
435
             | ((_u240 & 0xffffff) >> ((wfracbits - 1) - 104));                 \
436
    R##_f0 = ((_u240 & 0xffffff) << (168 - (wfracbits - 1)))                    \
437
             | ((_v240 & 0xffffff) << (144 - (wfracbits - 1)))                  \
438
             | ((_w240 & 0xffffff) << (120 - (wfracbits - 1)))                  \
439
             | ((_x240 & 0xffffff) >> ((wfracbits - 1) - 96))                   \
440
             | _y240;                                                           \
441
    resetfe;                                                                    \
442
  } while (0)
443
 
444
/*
445
 * Division algorithms:
446
 */
447
 
448
#define _FP_DIV_MEAT_2_udiv(fs, R, X, Y)                                \
449
  do {                                                                  \
450
    _FP_W_TYPE _n_f2, _n_f1, _n_f0, _r_f1, _r_f0, _m_f1, _m_f0;         \
451
    if (_FP_FRAC_GT_2(X, Y))                                            \
452
      {                                                                 \
453
        _n_f2 = X##_f1 >> 1;                                            \
454
        _n_f1 = X##_f1 << (_FP_W_TYPE_SIZE - 1) | X##_f0 >> 1;          \
455
        _n_f0 = X##_f0 << (_FP_W_TYPE_SIZE - 1);                        \
456
      }                                                                 \
457
    else                                                                \
458
      {                                                                 \
459
        R##_e--;                                                        \
460
        _n_f2 = X##_f1;                                                 \
461
        _n_f1 = X##_f0;                                                 \
462
        _n_f0 = 0;                                                       \
463
      }                                                                 \
464
                                                                        \
465
    /* Normalize, i.e. make the most significant bit of the             \
466
       denominator set. */                                              \
467
    _FP_FRAC_SLL_2(Y, _FP_WFRACXBITS_##fs);                             \
468
                                                                        \
469
    udiv_qrnnd(R##_f1, _r_f1, _n_f2, _n_f1, Y##_f1);                    \
470
    umul_ppmm(_m_f1, _m_f0, R##_f1, Y##_f0);                            \
471
    _r_f0 = _n_f0;                                                      \
472
    if (_FP_FRAC_GT_2(_m, _r))                                          \
473
      {                                                                 \
474
        R##_f1--;                                                       \
475
        _FP_FRAC_ADD_2(_r, Y, _r);                                      \
476
        if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r))              \
477
          {                                                             \
478
            R##_f1--;                                                   \
479
            _FP_FRAC_ADD_2(_r, Y, _r);                                  \
480
          }                                                             \
481
      }                                                                 \
482
    _FP_FRAC_DEC_2(_r, _m);                                             \
483
                                                                        \
484
    if (_r_f1 == Y##_f1)                                                \
485
      {                                                                 \
486
        /* This is a special case, not an optimization                  \
487
           (_r/Y##_f1 would not fit into UWtype).                       \
488
           As _r is guaranteed to be < Y,  R##_f0 can be either         \
489
           (UWtype)-1 or (UWtype)-2.  But as we know what kind          \
490
           of bits it is (sticky, guard, round),  we don't care.        \
491
           We also don't care what the reminder is,  because the        \
492
           guard bit will be set anyway.  -jj */                        \
493
        R##_f0 = -1;                                                    \
494
      }                                                                 \
495
    else                                                                \
496
      {                                                                 \
497
        udiv_qrnnd(R##_f0, _r_f1, _r_f1, _r_f0, Y##_f1);                \
498
        umul_ppmm(_m_f1, _m_f0, R##_f0, Y##_f0);                        \
499
        _r_f0 = 0;                                                       \
500
        if (_FP_FRAC_GT_2(_m, _r))                                      \
501
          {                                                             \
502
            R##_f0--;                                                   \
503
            _FP_FRAC_ADD_2(_r, Y, _r);                                  \
504
            if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r))          \
505
              {                                                         \
506
                R##_f0--;                                               \
507
                _FP_FRAC_ADD_2(_r, Y, _r);                              \
508
              }                                                         \
509
          }                                                             \
510
        if (!_FP_FRAC_EQ_2(_r, _m))                                     \
511
          R##_f0 |= _FP_WORK_STICKY;                                    \
512
      }                                                                 \
513
  } while (0)
514
 
515
 
516
#define _FP_DIV_MEAT_2_gmp(fs, R, X, Y)                                 \
517
  do {                                                                  \
518
    _FP_W_TYPE _x[4], _y[2], _z[4];                                     \
519
    _y[0] = Y##_f0; _y[1] = Y##_f1;                                      \
520
    _x[0] = _x[3] = 0;                                                    \
521
    if (_FP_FRAC_GT_2(X, Y))                                            \
522
      {                                                                 \
523
        R##_e++;                                                        \
524
        _x[1] = (X##_f0 << (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE) |   \
525
                 X##_f1 >> (_FP_W_TYPE_SIZE -                           \
526
                            (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE))); \
527
        _x[2] = X##_f1 << (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE);     \
528
      }                                                                 \
529
    else                                                                \
530
      {                                                                 \
531
        _x[1] = (X##_f0 << (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE) |     \
532
                 X##_f1 >> (_FP_W_TYPE_SIZE -                           \
533
                            (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE)));   \
534
        _x[2] = X##_f1 << (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE);       \
535
      }                                                                 \
536
                                                                        \
537
    (void) mpn_divrem (_z, 0, _x, 4, _y, 2);                             \
538
    R##_f1 = _z[1];                                                     \
539
    R##_f0 = _z[0] | ((_x[0] | _x[1]) != 0);                            \
540
  } while (0)
541
 
542
 
543
/*
544
 * Square root algorithms:
545
 * We have just one right now, maybe Newton approximation
546
 * should be added for those machines where division is fast.
547
 */
548
 
549
#define _FP_SQRT_MEAT_2(R, S, T, X, q)                  \
550
  do {                                                  \
551
    while (q)                                           \
552
      {                                                 \
553
        T##_f1 = S##_f1 + q;                            \
554
        if (T##_f1 <= X##_f1)                           \
555
          {                                             \
556
            S##_f1 = T##_f1 + q;                        \
557
            X##_f1 -= T##_f1;                           \
558
            R##_f1 += q;                                \
559
          }                                             \
560
        _FP_FRAC_SLL_2(X, 1);                           \
561
        q >>= 1;                                        \
562
      }                                                 \
563
    q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1);         \
564
    while (q != _FP_WORK_ROUND)                         \
565
      {                                                 \
566
        T##_f0 = S##_f0 + q;                            \
567
        T##_f1 = S##_f1;                                \
568
        if (T##_f1 < X##_f1 ||                          \
569
            (T##_f1 == X##_f1 && T##_f0 <= X##_f0))     \
570
          {                                             \
571
            S##_f0 = T##_f0 + q;                        \
572
            S##_f1 += (T##_f0 > S##_f0);                \
573
            _FP_FRAC_DEC_2(X, T);                       \
574
            R##_f0 += q;                                \
575
          }                                             \
576
        _FP_FRAC_SLL_2(X, 1);                           \
577
        q >>= 1;                                        \
578
      }                                                 \
579
    if (X##_f0 | X##_f1)                                \
580
      {                                                 \
581
        if (S##_f1 < X##_f1 ||                          \
582
            (S##_f1 == X##_f1 && S##_f0 < X##_f0))      \
583
          R##_f0 |= _FP_WORK_ROUND;                     \
584
        R##_f0 |= _FP_WORK_STICKY;                      \
585
      }                                                 \
586
  } while (0)
587
 
588
 
589
/*
590
 * Assembly/disassembly for converting to/from integral types.
591
 * No shifting or overflow handled here.
592
 */
593
 
594
#define _FP_FRAC_ASSEMBLE_2(r, X, rsize)        \
595
(void)((rsize <= _FP_W_TYPE_SIZE)               \
596
       ? ({ r = X##_f0; })                      \
597
       : ({                                     \
598
            r = X##_f1;                         \
599
            r <<= _FP_W_TYPE_SIZE;              \
600
            r += X##_f0;                        \
601
          }))
602
 
603
#define _FP_FRAC_DISASSEMBLE_2(X, r, rsize)                             \
604
  do {                                                                  \
605
    X##_f0 = r;                                                         \
606
    X##_f1 = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE);     \
607
  } while (0)
608
 
609
/*
610
 * Convert FP values between word sizes
611
 */
612
 
613
#define _FP_FRAC_COPY_1_2(D, S)         (D##_f = S##_f0)
614
 
615
#define _FP_FRAC_COPY_2_1(D, S)         ((D##_f0 = S##_f), (D##_f1 = 0))
616
 
617
#define _FP_FRAC_COPY_2_2(D,S)          _FP_FRAC_COPY_2(D,S)

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.