1 |
734 |
jeremybenn |
/* Software floating-point emulation.
|
2 |
|
|
Basic one-word fraction declaration and manipulation.
|
3 |
|
|
Copyright (C) 1997,1998,1999,2006 Free Software Foundation, Inc.
|
4 |
|
|
This file is part of the GNU C Library.
|
5 |
|
|
Contributed by Richard Henderson (rth@cygnus.com),
|
6 |
|
|
Jakub Jelinek (jj@ultra.linux.cz),
|
7 |
|
|
David S. Miller (davem@redhat.com) and
|
8 |
|
|
Peter Maydell (pmaydell@chiark.greenend.org.uk).
|
9 |
|
|
|
10 |
|
|
The GNU C Library is free software; you can redistribute it and/or
|
11 |
|
|
modify it under the terms of the GNU Lesser General Public
|
12 |
|
|
License as published by the Free Software Foundation; either
|
13 |
|
|
version 2.1 of the License, or (at your option) any later version.
|
14 |
|
|
|
15 |
|
|
In addition to the permissions in the GNU Lesser General Public
|
16 |
|
|
License, the Free Software Foundation gives you unlimited
|
17 |
|
|
permission to link the compiled version of this file into
|
18 |
|
|
combinations with other programs, and to distribute those
|
19 |
|
|
combinations without any restriction coming from the use of this
|
20 |
|
|
file. (The Lesser General Public License restrictions do apply in
|
21 |
|
|
other respects; for example, they cover modification of the file,
|
22 |
|
|
and distribution when not linked into a combine executable.)
|
23 |
|
|
|
24 |
|
|
The GNU C Library is distributed in the hope that it will be useful,
|
25 |
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
26 |
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
27 |
|
|
Lesser General Public License for more details.
|
28 |
|
|
|
29 |
|
|
You should have received a copy of the GNU Lesser General Public
|
30 |
|
|
License along with the GNU C Library; if not, write to the Free
|
31 |
|
|
Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
|
32 |
|
|
MA 02110-1301, USA. */
|
33 |
|
|
|
34 |
|
|
#define _FP_FRAC_DECL_1(X) _FP_W_TYPE X##_f
|
35 |
|
|
#define _FP_FRAC_COPY_1(D,S) (D##_f = S##_f)
|
36 |
|
|
#define _FP_FRAC_SET_1(X,I) (X##_f = I)
|
37 |
|
|
#define _FP_FRAC_HIGH_1(X) (X##_f)
|
38 |
|
|
#define _FP_FRAC_LOW_1(X) (X##_f)
|
39 |
|
|
#define _FP_FRAC_WORD_1(X,w) (X##_f)
|
40 |
|
|
|
41 |
|
|
#define _FP_FRAC_ADDI_1(X,I) (X##_f += I)
|
42 |
|
|
#define _FP_FRAC_SLL_1(X,N) \
|
43 |
|
|
do { \
|
44 |
|
|
if (__builtin_constant_p(N) && (N) == 1) \
|
45 |
|
|
X##_f += X##_f; \
|
46 |
|
|
else \
|
47 |
|
|
X##_f <<= (N); \
|
48 |
|
|
} while (0)
|
49 |
|
|
#define _FP_FRAC_SRL_1(X,N) (X##_f >>= N)
|
50 |
|
|
|
51 |
|
|
/* Right shift with sticky-lsb. */
|
52 |
|
|
#define _FP_FRAC_SRST_1(X,S,N,sz) __FP_FRAC_SRST_1(X##_f, S, N, sz)
|
53 |
|
|
#define _FP_FRAC_SRS_1(X,N,sz) __FP_FRAC_SRS_1(X##_f, N, sz)
|
54 |
|
|
|
55 |
|
|
#define __FP_FRAC_SRST_1(X,S,N,sz) \
|
56 |
|
|
do { \
|
57 |
|
|
S = (__builtin_constant_p(N) && (N) == 1 \
|
58 |
|
|
? X & 1 : (X << (_FP_W_TYPE_SIZE - (N))) != 0); \
|
59 |
|
|
X = X >> (N); \
|
60 |
|
|
} while (0)
|
61 |
|
|
|
62 |
|
|
#define __FP_FRAC_SRS_1(X,N,sz) \
|
63 |
|
|
(X = (X >> (N) | (__builtin_constant_p(N) && (N) == 1 \
|
64 |
|
|
? X & 1 : (X << (_FP_W_TYPE_SIZE - (N))) != 0)))
|
65 |
|
|
|
66 |
|
|
#define _FP_FRAC_ADD_1(R,X,Y) (R##_f = X##_f + Y##_f)
|
67 |
|
|
#define _FP_FRAC_SUB_1(R,X,Y) (R##_f = X##_f - Y##_f)
|
68 |
|
|
#define _FP_FRAC_DEC_1(X,Y) (X##_f -= Y##_f)
|
69 |
|
|
#define _FP_FRAC_CLZ_1(z, X) __FP_CLZ(z, X##_f)
|
70 |
|
|
|
71 |
|
|
/* Predicates */
|
72 |
|
|
#define _FP_FRAC_NEGP_1(X) ((_FP_WS_TYPE)X##_f < 0)
|
73 |
|
|
#define _FP_FRAC_ZEROP_1(X) (X##_f == 0)
|
74 |
|
|
#define _FP_FRAC_OVERP_1(fs,X) (X##_f & _FP_OVERFLOW_##fs)
|
75 |
|
|
#define _FP_FRAC_CLEAR_OVERP_1(fs,X) (X##_f &= ~_FP_OVERFLOW_##fs)
|
76 |
|
|
#define _FP_FRAC_EQ_1(X, Y) (X##_f == Y##_f)
|
77 |
|
|
#define _FP_FRAC_GE_1(X, Y) (X##_f >= Y##_f)
|
78 |
|
|
#define _FP_FRAC_GT_1(X, Y) (X##_f > Y##_f)
|
79 |
|
|
|
80 |
|
|
#define _FP_ZEROFRAC_1 0
|
81 |
|
|
#define _FP_MINFRAC_1 1
|
82 |
|
|
#define _FP_MAXFRAC_1 (~(_FP_WS_TYPE)0)
|
83 |
|
|
|
84 |
|
|
/*
|
85 |
|
|
* Unpack the raw bits of a native fp value. Do not classify or
|
86 |
|
|
* normalize the data.
|
87 |
|
|
*/
|
88 |
|
|
|
89 |
|
|
#define _FP_UNPACK_RAW_1(fs, X, val) \
|
90 |
|
|
do { \
|
91 |
|
|
union _FP_UNION_##fs _flo; _flo.flt = (val); \
|
92 |
|
|
\
|
93 |
|
|
X##_f = _flo.bits.frac; \
|
94 |
|
|
X##_e = _flo.bits.exp; \
|
95 |
|
|
X##_s = _flo.bits.sign; \
|
96 |
|
|
} while (0)
|
97 |
|
|
|
98 |
|
|
#define _FP_UNPACK_RAW_1_P(fs, X, val) \
|
99 |
|
|
do { \
|
100 |
|
|
union _FP_UNION_##fs *_flo = \
|
101 |
|
|
(union _FP_UNION_##fs *)(val); \
|
102 |
|
|
\
|
103 |
|
|
X##_f = _flo->bits.frac; \
|
104 |
|
|
X##_e = _flo->bits.exp; \
|
105 |
|
|
X##_s = _flo->bits.sign; \
|
106 |
|
|
} while (0)
|
107 |
|
|
|
108 |
|
|
/*
|
109 |
|
|
* Repack the raw bits of a native fp value.
|
110 |
|
|
*/
|
111 |
|
|
|
112 |
|
|
#define _FP_PACK_RAW_1(fs, val, X) \
|
113 |
|
|
do { \
|
114 |
|
|
union _FP_UNION_##fs _flo; \
|
115 |
|
|
\
|
116 |
|
|
_flo.bits.frac = X##_f; \
|
117 |
|
|
_flo.bits.exp = X##_e; \
|
118 |
|
|
_flo.bits.sign = X##_s; \
|
119 |
|
|
\
|
120 |
|
|
(val) = _flo.flt; \
|
121 |
|
|
} while (0)
|
122 |
|
|
|
123 |
|
|
#define _FP_PACK_RAW_1_P(fs, val, X) \
|
124 |
|
|
do { \
|
125 |
|
|
union _FP_UNION_##fs *_flo = \
|
126 |
|
|
(union _FP_UNION_##fs *)(val); \
|
127 |
|
|
\
|
128 |
|
|
_flo->bits.frac = X##_f; \
|
129 |
|
|
_flo->bits.exp = X##_e; \
|
130 |
|
|
_flo->bits.sign = X##_s; \
|
131 |
|
|
} while (0)
|
132 |
|
|
|
133 |
|
|
|
134 |
|
|
/*
|
135 |
|
|
* Multiplication algorithms:
|
136 |
|
|
*/
|
137 |
|
|
|
138 |
|
|
/* Basic. Assuming the host word size is >= 2*FRACBITS, we can do the
|
139 |
|
|
multiplication immediately. */
|
140 |
|
|
|
141 |
|
|
#define _FP_MUL_MEAT_1_imm(wfracbits, R, X, Y) \
|
142 |
|
|
do { \
|
143 |
|
|
R##_f = X##_f * Y##_f; \
|
144 |
|
|
/* Normalize since we know where the msb of the multiplicands \
|
145 |
|
|
were (bit B), we know that the msb of the of the product is \
|
146 |
|
|
at either 2B or 2B-1. */ \
|
147 |
|
|
_FP_FRAC_SRS_1(R, wfracbits-1, 2*wfracbits); \
|
148 |
|
|
} while (0)
|
149 |
|
|
|
150 |
|
|
/* Given a 1W * 1W => 2W primitive, do the extended multiplication. */
|
151 |
|
|
|
152 |
|
|
#define _FP_MUL_MEAT_1_wide(wfracbits, R, X, Y, doit) \
|
153 |
|
|
do { \
|
154 |
|
|
_FP_W_TYPE _Z_f0, _Z_f1; \
|
155 |
|
|
doit(_Z_f1, _Z_f0, X##_f, Y##_f); \
|
156 |
|
|
/* Normalize since we know where the msb of the multiplicands \
|
157 |
|
|
were (bit B), we know that the msb of the of the product is \
|
158 |
|
|
at either 2B or 2B-1. */ \
|
159 |
|
|
_FP_FRAC_SRS_2(_Z, wfracbits-1, 2*wfracbits); \
|
160 |
|
|
R##_f = _Z_f0; \
|
161 |
|
|
} while (0)
|
162 |
|
|
|
163 |
|
|
/* Finally, a simple widening multiply algorithm. What fun! */
|
164 |
|
|
|
165 |
|
|
#define _FP_MUL_MEAT_1_hard(wfracbits, R, X, Y) \
|
166 |
|
|
do { \
|
167 |
|
|
_FP_W_TYPE _xh, _xl, _yh, _yl, _z_f0, _z_f1, _a_f0, _a_f1; \
|
168 |
|
|
\
|
169 |
|
|
/* split the words in half */ \
|
170 |
|
|
_xh = X##_f >> (_FP_W_TYPE_SIZE/2); \
|
171 |
|
|
_xl = X##_f & (((_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE/2)) - 1); \
|
172 |
|
|
_yh = Y##_f >> (_FP_W_TYPE_SIZE/2); \
|
173 |
|
|
_yl = Y##_f & (((_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE/2)) - 1); \
|
174 |
|
|
\
|
175 |
|
|
/* multiply the pieces */ \
|
176 |
|
|
_z_f0 = _xl * _yl; \
|
177 |
|
|
_a_f0 = _xh * _yl; \
|
178 |
|
|
_a_f1 = _xl * _yh; \
|
179 |
|
|
_z_f1 = _xh * _yh; \
|
180 |
|
|
\
|
181 |
|
|
/* reassemble into two full words */ \
|
182 |
|
|
if ((_a_f0 += _a_f1) < _a_f1) \
|
183 |
|
|
_z_f1 += (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE/2); \
|
184 |
|
|
_a_f1 = _a_f0 >> (_FP_W_TYPE_SIZE/2); \
|
185 |
|
|
_a_f0 = _a_f0 << (_FP_W_TYPE_SIZE/2); \
|
186 |
|
|
_FP_FRAC_ADD_2(_z, _z, _a); \
|
187 |
|
|
\
|
188 |
|
|
/* normalize */ \
|
189 |
|
|
_FP_FRAC_SRS_2(_z, wfracbits - 1, 2*wfracbits); \
|
190 |
|
|
R##_f = _z_f0; \
|
191 |
|
|
} while (0)
|
192 |
|
|
|
193 |
|
|
|
194 |
|
|
/*
|
195 |
|
|
* Division algorithms:
|
196 |
|
|
*/
|
197 |
|
|
|
198 |
|
|
/* Basic. Assuming the host word size is >= 2*FRACBITS, we can do the
|
199 |
|
|
division immediately. Give this macro either _FP_DIV_HELP_imm for
|
200 |
|
|
C primitives or _FP_DIV_HELP_ldiv for the ISO function. Which you
|
201 |
|
|
choose will depend on what the compiler does with divrem4. */
|
202 |
|
|
|
203 |
|
|
#define _FP_DIV_MEAT_1_imm(fs, R, X, Y, doit) \
|
204 |
|
|
do { \
|
205 |
|
|
_FP_W_TYPE _q, _r; \
|
206 |
|
|
X##_f <<= (X##_f < Y##_f \
|
207 |
|
|
? R##_e--, _FP_WFRACBITS_##fs \
|
208 |
|
|
: _FP_WFRACBITS_##fs - 1); \
|
209 |
|
|
doit(_q, _r, X##_f, Y##_f); \
|
210 |
|
|
R##_f = _q | (_r != 0); \
|
211 |
|
|
} while (0)
|
212 |
|
|
|
213 |
|
|
/* GCC's longlong.h defines a 2W / 1W => (1W,1W) primitive udiv_qrnnd
|
214 |
|
|
that may be useful in this situation. This first is for a primitive
|
215 |
|
|
that requires normalization, the second for one that does not. Look
|
216 |
|
|
for UDIV_NEEDS_NORMALIZATION to tell which your machine needs. */
|
217 |
|
|
|
218 |
|
|
#define _FP_DIV_MEAT_1_udiv_norm(fs, R, X, Y) \
|
219 |
|
|
do { \
|
220 |
|
|
_FP_W_TYPE _nh, _nl, _q, _r, _y; \
|
221 |
|
|
\
|
222 |
|
|
/* Normalize Y -- i.e. make the most significant bit set. */ \
|
223 |
|
|
_y = Y##_f << _FP_WFRACXBITS_##fs; \
|
224 |
|
|
\
|
225 |
|
|
/* Shift X op correspondingly high, that is, up one full word. */ \
|
226 |
|
|
if (X##_f < Y##_f) \
|
227 |
|
|
{ \
|
228 |
|
|
R##_e--; \
|
229 |
|
|
_nl = 0; \
|
230 |
|
|
_nh = X##_f; \
|
231 |
|
|
} \
|
232 |
|
|
else \
|
233 |
|
|
{ \
|
234 |
|
|
_nl = X##_f << (_FP_W_TYPE_SIZE - 1); \
|
235 |
|
|
_nh = X##_f >> 1; \
|
236 |
|
|
} \
|
237 |
|
|
\
|
238 |
|
|
udiv_qrnnd(_q, _r, _nh, _nl, _y); \
|
239 |
|
|
R##_f = _q | (_r != 0); \
|
240 |
|
|
} while (0)
|
241 |
|
|
|
242 |
|
|
#define _FP_DIV_MEAT_1_udiv(fs, R, X, Y) \
|
243 |
|
|
do { \
|
244 |
|
|
_FP_W_TYPE _nh, _nl, _q, _r; \
|
245 |
|
|
if (X##_f < Y##_f) \
|
246 |
|
|
{ \
|
247 |
|
|
R##_e--; \
|
248 |
|
|
_nl = X##_f << _FP_WFRACBITS_##fs; \
|
249 |
|
|
_nh = X##_f >> _FP_WFRACXBITS_##fs; \
|
250 |
|
|
} \
|
251 |
|
|
else \
|
252 |
|
|
{ \
|
253 |
|
|
_nl = X##_f << (_FP_WFRACBITS_##fs - 1); \
|
254 |
|
|
_nh = X##_f >> (_FP_WFRACXBITS_##fs + 1); \
|
255 |
|
|
} \
|
256 |
|
|
udiv_qrnnd(_q, _r, _nh, _nl, Y##_f); \
|
257 |
|
|
R##_f = _q | (_r != 0); \
|
258 |
|
|
} while (0)
|
259 |
|
|
|
260 |
|
|
|
261 |
|
|
/*
|
262 |
|
|
* Square root algorithms:
|
263 |
|
|
* We have just one right now, maybe Newton approximation
|
264 |
|
|
* should be added for those machines where division is fast.
|
265 |
|
|
*/
|
266 |
|
|
|
267 |
|
|
#define _FP_SQRT_MEAT_1(R, S, T, X, q) \
|
268 |
|
|
do { \
|
269 |
|
|
while (q != _FP_WORK_ROUND) \
|
270 |
|
|
{ \
|
271 |
|
|
T##_f = S##_f + q; \
|
272 |
|
|
if (T##_f <= X##_f) \
|
273 |
|
|
{ \
|
274 |
|
|
S##_f = T##_f + q; \
|
275 |
|
|
X##_f -= T##_f; \
|
276 |
|
|
R##_f += q; \
|
277 |
|
|
} \
|
278 |
|
|
_FP_FRAC_SLL_1(X, 1); \
|
279 |
|
|
q >>= 1; \
|
280 |
|
|
} \
|
281 |
|
|
if (X##_f) \
|
282 |
|
|
{ \
|
283 |
|
|
if (S##_f < X##_f) \
|
284 |
|
|
R##_f |= _FP_WORK_ROUND; \
|
285 |
|
|
R##_f |= _FP_WORK_STICKY; \
|
286 |
|
|
} \
|
287 |
|
|
} while (0)
|
288 |
|
|
|
289 |
|
|
/*
|
290 |
|
|
* Assembly/disassembly for converting to/from integral types.
|
291 |
|
|
* No shifting or overflow handled here.
|
292 |
|
|
*/
|
293 |
|
|
|
294 |
|
|
#define _FP_FRAC_ASSEMBLE_1(r, X, rsize) (r = X##_f)
|
295 |
|
|
#define _FP_FRAC_DISASSEMBLE_1(X, r, rsize) (X##_f = r)
|
296 |
|
|
|
297 |
|
|
|
298 |
|
|
/*
|
299 |
|
|
* Convert FP values between word sizes
|
300 |
|
|
*/
|
301 |
|
|
|
302 |
|
|
#define _FP_FRAC_COPY_1_1(D, S) (D##_f = S##_f)
|