OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

[/] [or1k/] [trunk/] [linux/] [linux-2.4/] [include/] [math-emu/] [op-4.h] - Blame information for rev 1774

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 1275 phoenix
/* Software floating-point emulation.
2
   Basic four-word fraction declaration and manipulation.
3
   Copyright (C) 1997,1998,1999 Free Software Foundation, Inc.
4
   This file is part of the GNU C Library.
5
   Contributed by Richard Henderson (rth@cygnus.com),
6
                  Jakub Jelinek (jj@ultra.linux.cz),
7
                  David S. Miller (davem@redhat.com) and
8
                  Peter Maydell (pmaydell@chiark.greenend.org.uk).
9
 
10
   The GNU C Library is free software; you can redistribute it and/or
11
   modify it under the terms of the GNU Library General Public License as
12
   published by the Free Software Foundation; either version 2 of the
13
   License, or (at your option) any later version.
14
 
15
   The GNU C Library is distributed in the hope that it will be useful,
16
   but WITHOUT ANY WARRANTY; without even the implied warranty of
17
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18
   Library General Public License for more details.
19
 
20
   You should have received a copy of the GNU Library General Public
21
   License along with the GNU C Library; see the file COPYING.LIB.  If
22
   not, write to the Free Software Foundation, Inc.,
23
   59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
24
 
25
#ifndef __MATH_EMU_OP_4_H__
26
#define __MATH_EMU_OP_4_H__
27
 
28
#define _FP_FRAC_DECL_4(X)      _FP_W_TYPE X##_f[4]
29
#define _FP_FRAC_COPY_4(D,S)                    \
30
  (D##_f[0] = S##_f[0], D##_f[1] = S##_f[1],    \
31
   D##_f[2] = S##_f[2], D##_f[3] = S##_f[3])
32
#define _FP_FRAC_SET_4(X,I)     __FP_FRAC_SET_4(X, I)
33
#define _FP_FRAC_HIGH_4(X)      (X##_f[3])
34
#define _FP_FRAC_LOW_4(X)       (X##_f[0])
35
#define _FP_FRAC_WORD_4(X,w)    (X##_f[w])
36
 
37
#define _FP_FRAC_SLL_4(X,N)                                             \
38
  do {                                                                  \
39
    _FP_I_TYPE _up, _down, _skip, _i;                                   \
40
    _skip = (N) / _FP_W_TYPE_SIZE;                                      \
41
    _up = (N) % _FP_W_TYPE_SIZE;                                        \
42
    _down = _FP_W_TYPE_SIZE - _up;                                      \
43
    if (!_up)                                                           \
44
      for (_i = 3; _i >= _skip; --_i)                                   \
45
        X##_f[_i] = X##_f[_i-_skip];                                    \
46
    else                                                                \
47
      {                                                                 \
48
        for (_i = 3; _i > _skip; --_i)                                  \
49
          X##_f[_i] = X##_f[_i-_skip] << _up                            \
50
                      | X##_f[_i-_skip-1] >> _down;                     \
51
        X##_f[_i--] = X##_f[0] << _up;                                  \
52
      }                                                                 \
53
    for (; _i >= 0; --_i)                                                \
54
      X##_f[_i] = 0;                                                    \
55
  } while (0)
56
 
57
/* This one was broken too */
58
#define _FP_FRAC_SRL_4(X,N)                                             \
59
  do {                                                                  \
60
    _FP_I_TYPE _up, _down, _skip, _i;                                   \
61
    _skip = (N) / _FP_W_TYPE_SIZE;                                      \
62
    _down = (N) % _FP_W_TYPE_SIZE;                                      \
63
    _up = _FP_W_TYPE_SIZE - _down;                                      \
64
    if (!_down)                                                         \
65
      for (_i = 0; _i <= 3-_skip; ++_i)                                  \
66
        X##_f[_i] = X##_f[_i+_skip];                                    \
67
    else                                                                \
68
      {                                                                 \
69
        for (_i = 0; _i < 3-_skip; ++_i)                         \
70
          X##_f[_i] = X##_f[_i+_skip] >> _down                          \
71
                      | X##_f[_i+_skip+1] << _up;                       \
72
        X##_f[_i++] = X##_f[3] >> _down;                                \
73
      }                                                                 \
74
    for (; _i < 4; ++_i)                                                \
75
      X##_f[_i] = 0;                                                    \
76
  } while (0)
77
 
78
 
79
/* Right shift with sticky-lsb.
80
 * What this actually means is that we do a standard right-shift,
81
 * but that if any of the bits that fall off the right hand side
82
 * were one then we always set the LSbit.
83
 */
84
#define _FP_FRAC_SRS_4(X,N,size)                                        \
85
  do {                                                                  \
86
    _FP_I_TYPE _up, _down, _skip, _i;                                   \
87
    _FP_W_TYPE _s;                                                      \
88
    _skip = (N) / _FP_W_TYPE_SIZE;                                      \
89
    _down = (N) % _FP_W_TYPE_SIZE;                                      \
90
    _up = _FP_W_TYPE_SIZE - _down;                                      \
91
    for (_s = _i = 0; _i < _skip; ++_i)                                  \
92
      _s |= X##_f[_i];                                                  \
93
    _s |= X##_f[_i] << _up;                                             \
94
/* s is now != 0 if we want to set the LSbit */                         \
95
    if (!_down)                                                         \
96
      for (_i = 0; _i <= 3-_skip; ++_i)                                  \
97
        X##_f[_i] = X##_f[_i+_skip];                                    \
98
    else                                                                \
99
      {                                                                 \
100
        for (_i = 0; _i < 3-_skip; ++_i)                         \
101
          X##_f[_i] = X##_f[_i+_skip] >> _down                          \
102
                      | X##_f[_i+_skip+1] << _up;                       \
103
        X##_f[_i++] = X##_f[3] >> _down;                                \
104
      }                                                                 \
105
    for (; _i < 4; ++_i)                                                \
106
      X##_f[_i] = 0;                                                    \
107
    /* don't fix the LSB until the very end when we're sure f[0] is stable */   \
108
    X##_f[0] |= (_s != 0);                                              \
109
  } while (0)
110
 
111
#define _FP_FRAC_ADD_4(R,X,Y)                                           \
112
  __FP_FRAC_ADD_4(R##_f[3], R##_f[2], R##_f[1], R##_f[0],               \
113
                  X##_f[3], X##_f[2], X##_f[1], X##_f[0],               \
114
                  Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
115
 
116
#define _FP_FRAC_SUB_4(R,X,Y)                                           \
117
  __FP_FRAC_SUB_4(R##_f[3], R##_f[2], R##_f[1], R##_f[0],               \
118
                  X##_f[3], X##_f[2], X##_f[1], X##_f[0],               \
119
                  Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
120
 
121
#define _FP_FRAC_DEC_4(X,Y)                                             \
122
  __FP_FRAC_DEC_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0],               \
123
                  Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
124
 
125
#define _FP_FRAC_ADDI_4(X,I)                                            \
126
  __FP_FRAC_ADDI_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0], I)
127
 
128
#define _FP_ZEROFRAC_4  0,0,0,0
129
#define _FP_MINFRAC_4   0,0,0,1
130
#define _FP_MAXFRAC_4   (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0)
131
 
132
#define _FP_FRAC_ZEROP_4(X)     ((X##_f[0] | X##_f[1] | X##_f[2] | X##_f[3]) == 0)
133
#define _FP_FRAC_NEGP_4(X)      ((_FP_WS_TYPE)X##_f[3] < 0)
134
#define _FP_FRAC_OVERP_4(fs,X)  (_FP_FRAC_HIGH_##fs(X) & _FP_OVERFLOW_##fs)
135
#define _FP_FRAC_CLEAR_OVERP_4(fs,X)  (_FP_FRAC_HIGH_##fs(X) &= ~_FP_OVERFLOW_##fs)
136
 
137
#define _FP_FRAC_EQ_4(X,Y)                              \
138
 (X##_f[0] == Y##_f[0] && X##_f[1] == Y##_f[1]          \
139
  && X##_f[2] == Y##_f[2] && X##_f[3] == Y##_f[3])
140
 
141
#define _FP_FRAC_GT_4(X,Y)                              \
142
 (X##_f[3] > Y##_f[3] ||                                \
143
  (X##_f[3] == Y##_f[3] && (X##_f[2] > Y##_f[2] ||      \
144
   (X##_f[2] == Y##_f[2] && (X##_f[1] > Y##_f[1] ||     \
145
    (X##_f[1] == Y##_f[1] && X##_f[0] > Y##_f[0])       \
146
   ))                                                   \
147
  ))                                                    \
148
 )
149
 
150
#define _FP_FRAC_GE_4(X,Y)                              \
151
 (X##_f[3] > Y##_f[3] ||                                \
152
  (X##_f[3] == Y##_f[3] && (X##_f[2] > Y##_f[2] ||      \
153
   (X##_f[2] == Y##_f[2] && (X##_f[1] > Y##_f[1] ||     \
154
    (X##_f[1] == Y##_f[1] && X##_f[0] >= Y##_f[0])      \
155
   ))                                                   \
156
  ))                                                    \
157
 )
158
 
159
 
160
#define _FP_FRAC_CLZ_4(R,X)             \
161
  do {                                  \
162
    if (X##_f[3])                       \
163
    {                                   \
164
        __FP_CLZ(R,X##_f[3]);           \
165
    }                                   \
166
    else if (X##_f[2])                  \
167
    {                                   \
168
        __FP_CLZ(R,X##_f[2]);           \
169
        R += _FP_W_TYPE_SIZE;           \
170
    }                                   \
171
    else if (X##_f[1])                  \
172
    {                                   \
173
        __FP_CLZ(R,X##_f[2]);           \
174
        R += _FP_W_TYPE_SIZE*2;         \
175
    }                                   \
176
    else                                \
177
    {                                   \
178
        __FP_CLZ(R,X##_f[0]);           \
179
        R += _FP_W_TYPE_SIZE*3;         \
180
    }                                   \
181
  } while(0)
182
 
183
 
184
#define _FP_UNPACK_RAW_4(fs, X, val)                            \
185
  do {                                                          \
186
    union _FP_UNION_##fs _flo; _flo.flt = (val);                \
187
    X##_f[0] = _flo.bits.frac0;                                 \
188
    X##_f[1] = _flo.bits.frac1;                                 \
189
    X##_f[2] = _flo.bits.frac2;                                 \
190
    X##_f[3] = _flo.bits.frac3;                                 \
191
    X##_e  = _flo.bits.exp;                                     \
192
    X##_s  = _flo.bits.sign;                                    \
193
  } while (0)
194
 
195
#define _FP_UNPACK_RAW_4_P(fs, X, val)                          \
196
  do {                                                          \
197
    union _FP_UNION_##fs *_flo =                                \
198
      (union _FP_UNION_##fs *)(val);                            \
199
                                                                \
200
    X##_f[0] = _flo->bits.frac0;                                \
201
    X##_f[1] = _flo->bits.frac1;                                \
202
    X##_f[2] = _flo->bits.frac2;                                \
203
    X##_f[3] = _flo->bits.frac3;                                \
204
    X##_e  = _flo->bits.exp;                                    \
205
    X##_s  = _flo->bits.sign;                                   \
206
  } while (0)
207
 
208
#define _FP_PACK_RAW_4(fs, val, X)                              \
209
  do {                                                          \
210
    union _FP_UNION_##fs _flo;                                  \
211
    _flo.bits.frac0 = X##_f[0];                                 \
212
    _flo.bits.frac1 = X##_f[1];                                 \
213
    _flo.bits.frac2 = X##_f[2];                                 \
214
    _flo.bits.frac3 = X##_f[3];                                 \
215
    _flo.bits.exp   = X##_e;                                    \
216
    _flo.bits.sign  = X##_s;                                    \
217
    (val) = _flo.flt;                                           \
218
  } while (0)
219
 
220
#define _FP_PACK_RAW_4_P(fs, val, X)                            \
221
  do {                                                          \
222
    union _FP_UNION_##fs *_flo =                                \
223
      (union _FP_UNION_##fs *)(val);                            \
224
                                                                \
225
    _flo->bits.frac0 = X##_f[0];                                \
226
    _flo->bits.frac1 = X##_f[1];                                \
227
    _flo->bits.frac2 = X##_f[2];                                \
228
    _flo->bits.frac3 = X##_f[3];                                \
229
    _flo->bits.exp   = X##_e;                                   \
230
    _flo->bits.sign  = X##_s;                                   \
231
  } while (0)
232
 
233
/*
234
 * Multiplication algorithms:
235
 */
236
 
237
/* Given a 1W * 1W => 2W primitive, do the extended multiplication.  */
238
 
239
#define _FP_MUL_MEAT_4_wide(wfracbits, R, X, Y, doit)                       \
240
  do {                                                                      \
241
    _FP_FRAC_DECL_8(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c);          \
242
    _FP_FRAC_DECL_2(_d); _FP_FRAC_DECL_2(_e); _FP_FRAC_DECL_2(_f);          \
243
                                                                            \
244
    doit(_FP_FRAC_WORD_8(_z,1), _FP_FRAC_WORD_8(_z,0), X##_f[0], Y##_f[0]); \
245
    doit(_b_f1, _b_f0, X##_f[0], Y##_f[1]);                                 \
246
    doit(_c_f1, _c_f0, X##_f[1], Y##_f[0]);                                 \
247
    doit(_d_f1, _d_f0, X##_f[1], Y##_f[1]);                                 \
248
    doit(_e_f1, _e_f0, X##_f[0], Y##_f[2]);                                 \
249
    doit(_f_f1, _f_f0, X##_f[2], Y##_f[0]);                                 \
250
    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2),            \
251
                    _FP_FRAC_WORD_8(_z,1), 0,_b_f1,_b_f0,                    \
252
                    0,0,_FP_FRAC_WORD_8(_z,1));                               \
253
    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2),            \
254
                    _FP_FRAC_WORD_8(_z,1), 0,_c_f1,_c_f0,                    \
255
                    _FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2),            \
256
                    _FP_FRAC_WORD_8(_z,1));                                 \
257
    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3),            \
258
                    _FP_FRAC_WORD_8(_z,2), 0,_d_f1,_d_f0,                    \
259
                    0,_FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2));          \
260
    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3),            \
261
                    _FP_FRAC_WORD_8(_z,2), 0,_e_f1,_e_f0,                    \
262
                    _FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3),            \
263
                    _FP_FRAC_WORD_8(_z,2));                                 \
264
    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3),            \
265
                    _FP_FRAC_WORD_8(_z,2), 0,_f_f1,_f_f0,                    \
266
                    _FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3),            \
267
                    _FP_FRAC_WORD_8(_z,2));                                 \
268
    doit(_b_f1, _b_f0, X##_f[0], Y##_f[3]);                                 \
269
    doit(_c_f1, _c_f0, X##_f[3], Y##_f[0]);                                 \
270
    doit(_d_f1, _d_f0, X##_f[1], Y##_f[2]);                                 \
271
    doit(_e_f1, _e_f0, X##_f[2], Y##_f[1]);                                 \
272
    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),            \
273
                    _FP_FRAC_WORD_8(_z,3), 0,_b_f1,_b_f0,                    \
274
                    0,_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3));          \
275
    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),            \
276
                    _FP_FRAC_WORD_8(_z,3), 0,_c_f1,_c_f0,                    \
277
                    _FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),            \
278
                    _FP_FRAC_WORD_8(_z,3));                                 \
279
    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),            \
280
                    _FP_FRAC_WORD_8(_z,3), 0,_d_f1,_d_f0,                    \
281
                    _FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),            \
282
                    _FP_FRAC_WORD_8(_z,3));                                 \
283
    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),            \
284
                    _FP_FRAC_WORD_8(_z,3), 0,_e_f1,_e_f0,                    \
285
                    _FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),            \
286
                    _FP_FRAC_WORD_8(_z,3));                                 \
287
    doit(_b_f1, _b_f0, X##_f[2], Y##_f[2]);                                 \
288
    doit(_c_f1, _c_f0, X##_f[1], Y##_f[3]);                                 \
289
    doit(_d_f1, _d_f0, X##_f[3], Y##_f[1]);                                 \
290
    doit(_e_f1, _e_f0, X##_f[2], Y##_f[3]);                                 \
291
    doit(_f_f1, _f_f0, X##_f[3], Y##_f[2]);                                 \
292
    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5),            \
293
                    _FP_FRAC_WORD_8(_z,4), 0,_b_f1,_b_f0,                    \
294
                    0,_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4));          \
295
    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5),            \
296
                    _FP_FRAC_WORD_8(_z,4), 0,_c_f1,_c_f0,                    \
297
                    _FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5),            \
298
                    _FP_FRAC_WORD_8(_z,4));                                 \
299
    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5),            \
300
                    _FP_FRAC_WORD_8(_z,4), 0,_d_f1,_d_f0,                    \
301
                    _FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5),            \
302
                    _FP_FRAC_WORD_8(_z,4));                                 \
303
    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6),            \
304
                    _FP_FRAC_WORD_8(_z,5), 0,_e_f1,_e_f0,                    \
305
                    0,_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5));          \
306
    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6),            \
307
                    _FP_FRAC_WORD_8(_z,5), 0,_f_f1,_f_f0,                    \
308
                    _FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6),            \
309
                    _FP_FRAC_WORD_8(_z,5));                                 \
310
    doit(_b_f1, _b_f0, X##_f[3], Y##_f[3]);                                 \
311
    __FP_FRAC_ADD_2(_FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6),            \
312
                    _b_f1,_b_f0,                                            \
313
                    _FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6));           \
314
                                                                            \
315
    /* Normalize since we know where the msb of the multiplicands           \
316
       were (bit B), we know that the msb of the of the product is          \
317
       at either 2B or 2B-1.  */                                            \
318
    _FP_FRAC_SRS_8(_z, wfracbits-1, 2*wfracbits);                           \
319
    __FP_FRAC_SET_4(R, _FP_FRAC_WORD_8(_z,3), _FP_FRAC_WORD_8(_z,2),        \
320
                    _FP_FRAC_WORD_8(_z,1), _FP_FRAC_WORD_8(_z,0));           \
321
  } while (0)
322
 
323
#define _FP_MUL_MEAT_4_gmp(wfracbits, R, X, Y)                              \
324
  do {                                                                      \
325
    _FP_FRAC_DECL_8(_z);                                                    \
326
                                                                            \
327
    mpn_mul_n(_z_f, _x_f, _y_f, 4);                                         \
328
                                                                            \
329
    /* Normalize since we know where the msb of the multiplicands           \
330
       were (bit B), we know that the msb of the of the product is          \
331
       at either 2B or 2B-1.  */                                            \
332
    _FP_FRAC_SRS_8(_z, wfracbits-1, 2*wfracbits);                           \
333
    __FP_FRAC_SET_4(R, _FP_FRAC_WORD_8(_z,3), _FP_FRAC_WORD_8(_z,2),        \
334
                    _FP_FRAC_WORD_8(_z,1), _FP_FRAC_WORD_8(_z,0));           \
335
  } while (0)
336
 
337
/*
338
 * Helper utility for _FP_DIV_MEAT_4_udiv:
339
 * pppp = m * nnn
340
 */
341
#define umul_ppppmnnn(p3,p2,p1,p0,m,n2,n1,n0)                               \
342
  do {                                                                      \
343
    UWtype _t;                                                              \
344
    umul_ppmm(p1,p0,m,n0);                                                  \
345
    umul_ppmm(p2,_t,m,n1);                                                  \
346
    __FP_FRAC_ADDI_2(p2,p1,_t);                                             \
347
    umul_ppmm(p3,_t,m,n2);                                                  \
348
    __FP_FRAC_ADDI_2(p3,p2,_t);                                             \
349
  } while (0)
350
 
351
/*
352
 * Division algorithms:
353
 */
354
 
355
#define _FP_DIV_MEAT_4_udiv(fs, R, X, Y)                                    \
356
  do {                                                                      \
357
    int _i;                                                                 \
358
    _FP_FRAC_DECL_4(_n); _FP_FRAC_DECL_4(_m);                               \
359
    _FP_FRAC_SET_4(_n, _FP_ZEROFRAC_4);                                     \
360
    if (_FP_FRAC_GT_4(X, Y))                                                \
361
      {                                                                     \
362
        _n_f[3] = X##_f[0] << (_FP_W_TYPE_SIZE - 1);                        \
363
        _FP_FRAC_SRL_4(X, 1);                                               \
364
      }                                                                     \
365
    else                                                                    \
366
      R##_e--;                                                              \
367
                                                                            \
368
    /* Normalize, i.e. make the most significant bit of the                 \
369
       denominator set. */                                                  \
370
    _FP_FRAC_SLL_4(Y, _FP_WFRACXBITS_##fs);                                 \
371
                                                                            \
372
    for (_i = 3; ; _i--)                                                    \
373
      {                                                                     \
374
        if (X##_f[3] == Y##_f[3])                                           \
375
          {                                                                 \
376
            /* This is a special case, not an optimization                  \
377
               (X##_f[3]/Y##_f[3] would not fit into UWtype).               \
378
               As X## is guaranteed to be < Y,  R##_f[_i] can be either     \
379
               (UWtype)-1 or (UWtype)-2.  */                                \
380
            R##_f[_i] = -1;                                                 \
381
            if (!_i)                                                        \
382
              break;                                                        \
383
            __FP_FRAC_SUB_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0],         \
384
                            Y##_f[2], Y##_f[1], Y##_f[0], 0,                \
385
                            X##_f[2], X##_f[1], X##_f[0], _n_f[_i]);        \
386
            _FP_FRAC_SUB_4(X, Y, X);                                        \
387
            if (X##_f[3] > Y##_f[3])                                        \
388
              {                                                             \
389
                R##_f[_i] = -2;                                             \
390
                _FP_FRAC_ADD_4(X, Y, X);                                    \
391
              }                                                             \
392
          }                                                                 \
393
        else                                                                \
394
          {                                                                 \
395
            udiv_qrnnd(R##_f[_i], X##_f[3], X##_f[3], X##_f[2], Y##_f[3]);  \
396
            umul_ppppmnnn(_m_f[3], _m_f[2], _m_f[1], _m_f[0],                \
397
                          R##_f[_i], Y##_f[2], Y##_f[1], Y##_f[0]);         \
398
            X##_f[2] = X##_f[1];                                            \
399
            X##_f[1] = X##_f[0];                                            \
400
            X##_f[0] = _n_f[_i];                                            \
401
            if (_FP_FRAC_GT_4(_m, X))                                       \
402
              {                                                             \
403
                R##_f[_i]--;                                                \
404
                _FP_FRAC_ADD_4(X, Y, X);                                    \
405
                if (_FP_FRAC_GE_4(X, Y) && _FP_FRAC_GT_4(_m, X))            \
406
                  {                                                         \
407
                    R##_f[_i]--;                                            \
408
                    _FP_FRAC_ADD_4(X, Y, X);                                \
409
                  }                                                         \
410
              }                                                             \
411
            _FP_FRAC_DEC_4(X, _m);                                          \
412
            if (!_i)                                                        \
413
              {                                                             \
414
                if (!_FP_FRAC_EQ_4(X, _m))                                  \
415
                  R##_f[0] |= _FP_WORK_STICKY;                              \
416
                break;                                                      \
417
              }                                                             \
418
          }                                                                 \
419
      }                                                                     \
420
  } while (0)
421
 
422
 
423
/*
424
 * Square root algorithms:
425
 * We have just one right now, maybe Newton approximation
426
 * should be added for those machines where division is fast.
427
 */
428
 
429
#define _FP_SQRT_MEAT_4(R, S, T, X, q)                          \
430
  do {                                                          \
431
    while (q)                                                   \
432
      {                                                         \
433
        T##_f[3] = S##_f[3] + q;                                \
434
        if (T##_f[3] <= X##_f[3])                               \
435
          {                                                     \
436
            S##_f[3] = T##_f[3] + q;                            \
437
            X##_f[3] -= T##_f[3];                               \
438
            R##_f[3] += q;                                      \
439
          }                                                     \
440
        _FP_FRAC_SLL_4(X, 1);                                   \
441
        q >>= 1;                                                \
442
      }                                                         \
443
    q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1);                 \
444
    while (q)                                                   \
445
      {                                                         \
446
        T##_f[2] = S##_f[2] + q;                                \
447
        T##_f[3] = S##_f[3];                                    \
448
        if (T##_f[3] < X##_f[3] ||                              \
449
            (T##_f[3] == X##_f[3] && T##_f[2] <= X##_f[2]))     \
450
          {                                                     \
451
            S##_f[2] = T##_f[2] + q;                            \
452
            S##_f[3] += (T##_f[2] > S##_f[2]);                  \
453
            __FP_FRAC_DEC_2(X##_f[3], X##_f[2],                 \
454
                            T##_f[3], T##_f[2]);                \
455
            R##_f[2] += q;                                      \
456
          }                                                     \
457
        _FP_FRAC_SLL_4(X, 1);                                   \
458
        q >>= 1;                                                \
459
      }                                                         \
460
    q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1);                 \
461
    while (q)                                                   \
462
      {                                                         \
463
        T##_f[1] = S##_f[1] + q;                                \
464
        T##_f[2] = S##_f[2];                                    \
465
        T##_f[3] = S##_f[3];                                    \
466
        if (T##_f[3] < X##_f[3] ||                              \
467
            (T##_f[3] == X##_f[3] && (T##_f[2] < X##_f[2] ||    \
468
             (T##_f[2] == X##_f[2] && T##_f[1] <= X##_f[1]))))  \
469
          {                                                     \
470
            S##_f[1] = T##_f[1] + q;                            \
471
            S##_f[2] += (T##_f[1] > S##_f[1]);                  \
472
            S##_f[3] += (T##_f[2] > S##_f[2]);                  \
473
            __FP_FRAC_DEC_3(X##_f[3], X##_f[2], X##_f[1],       \
474
                            T##_f[3], T##_f[2], T##_f[1]);      \
475
            R##_f[1] += q;                                      \
476
          }                                                     \
477
        _FP_FRAC_SLL_4(X, 1);                                   \
478
        q >>= 1;                                                \
479
      }                                                         \
480
    q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1);                 \
481
    while (q != _FP_WORK_ROUND)                                 \
482
      {                                                         \
483
        T##_f[0] = S##_f[0] + q;                                \
484
        T##_f[1] = S##_f[1];                                    \
485
        T##_f[2] = S##_f[2];                                    \
486
        T##_f[3] = S##_f[3];                                    \
487
        if (_FP_FRAC_GE_4(X,T))                                 \
488
          {                                                     \
489
            S##_f[0] = T##_f[0] + q;                            \
490
            S##_f[1] += (T##_f[0] > S##_f[0]);                  \
491
            S##_f[2] += (T##_f[1] > S##_f[1]);                  \
492
            S##_f[3] += (T##_f[2] > S##_f[2]);                  \
493
            _FP_FRAC_DEC_4(X, T);                               \
494
            R##_f[0] += q;                                      \
495
          }                                                     \
496
        _FP_FRAC_SLL_4(X, 1);                                   \
497
        q >>= 1;                                                \
498
      }                                                         \
499
    if (!_FP_FRAC_ZEROP_4(X))                                   \
500
      {                                                         \
501
        if (_FP_FRAC_GT_4(X,S))                                 \
502
          R##_f[0] |= _FP_WORK_ROUND;                           \
503
        R##_f[0] |= _FP_WORK_STICKY;                            \
504
      }                                                         \
505
  } while (0)
506
 
507
 
508
/*
509
 * Internals
510
 */
511
 
512
#define __FP_FRAC_SET_4(X,I3,I2,I1,I0)                                  \
513
  (X##_f[3] = I3, X##_f[2] = I2, X##_f[1] = I1, X##_f[0] = I0)
514
 
515
#ifndef __FP_FRAC_ADD_3
516
#define __FP_FRAC_ADD_3(r2,r1,r0,x2,x1,x0,y2,y1,y0)             \
517
  do {                                                          \
518
    int _c1, _c2;                                                       \
519
    r0 = x0 + y0;                                               \
520
    _c1 = r0 < x0;                                              \
521
    r1 = x1 + y1;                                               \
522
    _c2 = r1 < x1;                                              \
523
    r1 += _c1;                                                  \
524
    _c2 |= r1 < _c1;                                            \
525
    r2 = x2 + y2 + _c2;                                         \
526
  } while (0)
527
#endif
528
 
529
#ifndef __FP_FRAC_ADD_4
530
#define __FP_FRAC_ADD_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0)    \
531
  do {                                                          \
532
    int _c1, _c2, _c3;                                          \
533
    r0 = x0 + y0;                                               \
534
    _c1 = r0 < x0;                                              \
535
    r1 = x1 + y1;                                               \
536
    _c2 = r1 < x1;                                              \
537
    r1 += _c1;                                                  \
538
    _c2 |= r1 < _c1;                                            \
539
    r2 = x2 + y2;                                               \
540
    _c3 = r2 < x2;                                              \
541
    r2 += _c2;                                                  \
542
    _c3 |= r2 < _c2;                                            \
543
    r3 = x3 + y3 + _c3;                                         \
544
  } while (0)
545
#endif
546
 
547
#ifndef __FP_FRAC_SUB_3
548
#define __FP_FRAC_SUB_3(r2,r1,r0,x2,x1,x0,y2,y1,y0)             \
549
  do {                                                          \
550
    int _c1, _c2;                                                       \
551
    r0 = x0 - y0;                                               \
552
    _c1 = r0 > x0;                                              \
553
    r1 = x1 - y1;                                               \
554
    _c2 = r1 > x1;                                              \
555
    r1 -= _c1;                                                  \
556
    _c2 |= r1 > _c1;                                            \
557
    r2 = x2 - y2 - _c2;                                         \
558
  } while (0)
559
#endif
560
 
561
#ifndef __FP_FRAC_SUB_4
562
#define __FP_FRAC_SUB_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0)    \
563
  do {                                                          \
564
    int _c1, _c2, _c3;                                          \
565
    r0 = x0 - y0;                                               \
566
    _c1 = r0 > x0;                                              \
567
    r1 = x1 - y1;                                               \
568
    _c2 = r1 > x1;                                              \
569
    r1 -= _c1;                                                  \
570
    _c2 |= r1 > _c1;                                            \
571
    r2 = x2 - y2;                                               \
572
    _c3 = r2 > x2;                                              \
573
    r2 -= _c2;                                                  \
574
    _c3 |= r2 > _c2;                                            \
575
    r3 = x3 - y3 - _c3;                                         \
576
  } while (0)
577
#endif
578
 
579
#ifndef __FP_FRAC_DEC_3
580
#define __FP_FRAC_DEC_3(x2,x1,x0,y2,y1,y0)                              \
581
  do {                                                                  \
582
    UWtype _t0, _t1, _t2;                                               \
583
    _t0 = x0, _t1 = x1, _t2 = x2;                                       \
584
    __FP_FRAC_SUB_3 (x2, x1, x0, _t2, _t1, _t0, y2, y1, y0);            \
585
  } while (0)
586
#endif
587
 
588
#ifndef __FP_FRAC_DEC_4
589
#define __FP_FRAC_DEC_4(x3,x2,x1,x0,y3,y2,y1,y0)                        \
590
  do {                                                                  \
591
    UWtype _t0, _t1, _t2, _t3;                                          \
592
    _t0 = x0, _t1 = x1, _t2 = x2, _t3 = x3;                             \
593
    __FP_FRAC_SUB_4 (x3,x2,x1,x0,_t3,_t2,_t1,_t0, y3,y2,y1,y0);         \
594
  } while (0)
595
#endif
596
 
597
#ifndef __FP_FRAC_ADDI_4
598
#define __FP_FRAC_ADDI_4(x3,x2,x1,x0,i)                                 \
599
  do {                                                                  \
600
    UWtype _t;                                                          \
601
    _t = ((x0 += i) < i);                                               \
602
    x1 += _t; _t = (x1 < _t);                                           \
603
    x2 += _t; _t = (x2 < _t);                                           \
604
    x3 += _t;                                                           \
605
  } while (0)
606
#endif
607
 
608
/* Convert FP values between word sizes. This appears to be more
609
 * complicated than I'd have expected it to be, so these might be
610
 * wrong... These macros are in any case somewhat bogus because they
611
 * use information about what various FRAC_n variables look like
612
 * internally [eg, that 2 word vars are X_f0 and x_f1]. But so do
613
 * the ones in op-2.h and op-1.h.
614
 */
615
#define _FP_FRAC_CONV_1_4(dfs, sfs, D, S)                               \
616
   do {                                                                 \
617
     if (S##_c != FP_CLS_NAN)                                           \
618
       _FP_FRAC_SRS_4(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs),   \
619
                          _FP_WFRACBITS_##sfs);                         \
620
     else                                                               \
621
       _FP_FRAC_SRL_4(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs));  \
622
     D##_f = S##_f[0];                                                  \
623
  } while (0)
624
 
625
#define _FP_FRAC_CONV_2_4(dfs, sfs, D, S)                               \
626
   do {                                                                 \
627
     if (S##_c != FP_CLS_NAN)                                           \
628
       _FP_FRAC_SRS_4(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs),   \
629
                      _FP_WFRACBITS_##sfs);                             \
630
     else                                                               \
631
       _FP_FRAC_SRL_4(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs));  \
632
     D##_f0 = S##_f[0];                                                 \
633
     D##_f1 = S##_f[1];                                                 \
634
  } while (0)
635
 
636
/* Assembly/disassembly for converting to/from integral types.
637
 * No shifting or overflow handled here.
638
 */
639
/* Put the FP value X into r, which is an integer of size rsize. */
640
#define _FP_FRAC_ASSEMBLE_4(r, X, rsize)                                \
641
  do {                                                                  \
642
    if (rsize <= _FP_W_TYPE_SIZE)                                       \
643
      r = X##_f[0];                                                     \
644
    else if (rsize <= 2*_FP_W_TYPE_SIZE)                                \
645
    {                                                                   \
646
      r = X##_f[1];                                                     \
647
      r <<= _FP_W_TYPE_SIZE;                                            \
648
      r += X##_f[0];                                                    \
649
    }                                                                   \
650
    else                                                                \
651
    {                                                                   \
652
      /* I'm feeling lazy so we deal with int == 3words (implausible)*/ \
653
      /* and int == 4words as a single case.                     */     \
654
      r = X##_f[3];                                                     \
655
      r <<= _FP_W_TYPE_SIZE;                                            \
656
      r += X##_f[2];                                                    \
657
      r <<= _FP_W_TYPE_SIZE;                                            \
658
      r += X##_f[1];                                                    \
659
      r <<= _FP_W_TYPE_SIZE;                                            \
660
      r += X##_f[0];                                                    \
661
    }                                                                   \
662
  } while (0)
663
 
664
/* "No disassemble Number Five!" */
665
/* move an integer of size rsize into X's fractional part. We rely on
666
 * the _f[] array consisting of words of size _FP_W_TYPE_SIZE to avoid
667
 * having to mask the values we store into it.
668
 */
669
#define _FP_FRAC_DISASSEMBLE_4(X, r, rsize)                             \
670
  do {                                                                  \
671
    X##_f[0] = r;                                                       \
672
    X##_f[1] = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE);   \
673
    X##_f[2] = (rsize <= 2*_FP_W_TYPE_SIZE ? 0 : r >> 2*_FP_W_TYPE_SIZE); \
674
    X##_f[3] = (rsize <= 3*_FP_W_TYPE_SIZE ? 0 : r >> 3*_FP_W_TYPE_SIZE); \
675
  } while (0)
676
 
677
#define _FP_FRAC_CONV_4_1(dfs, sfs, D, S)                               \
678
   do {                                                                 \
679
     D##_f[0] = S##_f;                                                  \
680
     D##_f[1] = D##_f[2] = D##_f[3] = 0;                                \
681
     _FP_FRAC_SLL_4(D, (_FP_WFRACBITS_##dfs - _FP_WFRACBITS_##sfs));    \
682
   } while (0)
683
 
684
#define _FP_FRAC_CONV_4_2(dfs, sfs, D, S)                               \
685
   do {                                                                 \
686
     D##_f[0] = S##_f0;                                                 \
687
     D##_f[1] = S##_f1;                                                 \
688
     D##_f[2] = D##_f[3] = 0;                                           \
689
     _FP_FRAC_SLL_4(D, (_FP_WFRACBITS_##dfs - _FP_WFRACBITS_##sfs));    \
690
   } while (0)
691
 
692
#endif

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.