1 |
272 |
jeremybenn |
/* Copyright (C) 2007, 2009 Free Software Foundation, Inc.
|
2 |
|
|
|
3 |
|
|
This file is part of GCC.
|
4 |
|
|
|
5 |
|
|
GCC is free software; you can redistribute it and/or modify it under
|
6 |
|
|
the terms of the GNU General Public License as published by the Free
|
7 |
|
|
Software Foundation; either version 3, or (at your option) any later
|
8 |
|
|
version.
|
9 |
|
|
|
10 |
|
|
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
11 |
|
|
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
12 |
|
|
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
13 |
|
|
for more details.
|
14 |
|
|
|
15 |
|
|
Under Section 7 of GPL version 3, you are granted additional
|
16 |
|
|
permissions described in the GCC Runtime Library Exception, version
|
17 |
|
|
3.1, as published by the Free Software Foundation.
|
18 |
|
|
|
19 |
|
|
You should have received a copy of the GNU General Public License and
|
20 |
|
|
a copy of the GCC Runtime Library Exception along with this program;
|
21 |
|
|
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
22 |
|
|
<http://www.gnu.org/licenses/>. */
|
23 |
|
|
|
24 |
|
|
/*****************************************************************************
|
25 |
|
|
*
|
26 |
|
|
* BID128 fma x * y + z
|
27 |
|
|
*
|
28 |
|
|
****************************************************************************/
|
29 |
|
|
|
30 |
|
|
#include "bid_internal.h"
|
31 |
|
|
|
32 |
|
|
static void
|
33 |
|
|
rounding_correction (unsigned int rnd_mode,
|
34 |
|
|
unsigned int is_inexact_lt_midpoint,
|
35 |
|
|
unsigned int is_inexact_gt_midpoint,
|
36 |
|
|
unsigned int is_midpoint_lt_even,
|
37 |
|
|
unsigned int is_midpoint_gt_even,
|
38 |
|
|
int unbexp,
|
39 |
|
|
UINT128 * ptrres, _IDEC_flags * ptrfpsf) {
|
40 |
|
|
// unbiased true exponent unbexp may be larger than emax
|
41 |
|
|
|
42 |
|
|
UINT128 res = *ptrres; // expected to have the correct sign and coefficient
|
43 |
|
|
// (the exponent field is ignored, as unbexp is used instead)
|
44 |
|
|
UINT64 sign, exp;
|
45 |
|
|
UINT64 C_hi, C_lo;
|
46 |
|
|
|
47 |
|
|
// general correction from RN to RA, RM, RP, RZ
|
48 |
|
|
// Note: if the result is negative, then is_inexact_lt_midpoint,
|
49 |
|
|
// is_inexact_gt_midpoint, is_midpoint_lt_even, and is_midpoint_gt_even
|
50 |
|
|
// have to be considered as if determined for the absolute value of the
|
51 |
|
|
// result (so they seem to be reversed)
|
52 |
|
|
|
53 |
|
|
if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
|
54 |
|
|
is_midpoint_lt_even || is_midpoint_gt_even) {
|
55 |
|
|
*ptrfpsf |= INEXACT_EXCEPTION;
|
56 |
|
|
}
|
57 |
|
|
// apply correction to result calculated with unbounded exponent
|
58 |
|
|
sign = res.w[1] & MASK_SIGN;
|
59 |
|
|
exp = (UINT64) (unbexp + 6176) << 49; // valid only if expmin<=unbexp<=expmax
|
60 |
|
|
C_hi = res.w[1] & MASK_COEFF;
|
61 |
|
|
C_lo = res.w[0];
|
62 |
|
|
if ((!sign && ((rnd_mode == ROUNDING_UP && is_inexact_lt_midpoint) ||
|
63 |
|
|
((rnd_mode == ROUNDING_TIES_AWAY || rnd_mode == ROUNDING_UP) &&
|
64 |
|
|
is_midpoint_gt_even))) ||
|
65 |
|
|
(sign && ((rnd_mode == ROUNDING_DOWN && is_inexact_lt_midpoint) ||
|
66 |
|
|
((rnd_mode == ROUNDING_TIES_AWAY || rnd_mode == ROUNDING_DOWN) &&
|
67 |
|
|
is_midpoint_gt_even)))) {
|
68 |
|
|
// C = C + 1
|
69 |
|
|
C_lo = C_lo + 1;
|
70 |
|
|
if (C_lo == 0)
|
71 |
|
|
C_hi = C_hi + 1;
|
72 |
|
|
if (C_hi == 0x0001ed09bead87c0ull && C_lo == 0x378d8e6400000000ull) {
|
73 |
|
|
// C = 10^34 => rounding overflow
|
74 |
|
|
C_hi = 0x0000314dc6448d93ull;
|
75 |
|
|
C_lo = 0x38c15b0a00000000ull; // 10^33
|
76 |
|
|
// exp = exp + EXP_P1;
|
77 |
|
|
unbexp = unbexp + 1;
|
78 |
|
|
exp = (UINT64) (unbexp + 6176) << 49;
|
79 |
|
|
}
|
80 |
|
|
} else if ((is_midpoint_lt_even || is_inexact_gt_midpoint) &&
|
81 |
|
|
((sign && (rnd_mode == ROUNDING_UP || rnd_mode == ROUNDING_TO_ZERO)) ||
|
82 |
|
|
(!sign && (rnd_mode == ROUNDING_DOWN || rnd_mode == ROUNDING_TO_ZERO)))) {
|
83 |
|
|
// C = C - 1
|
84 |
|
|
C_lo = C_lo - 1;
|
85 |
|
|
if (C_lo == 0xffffffffffffffffull)
|
86 |
|
|
C_hi--;
|
87 |
|
|
// check if we crossed into the lower decade
|
88 |
|
|
if (C_hi == 0x0000314dc6448d93ull && C_lo == 0x38c15b09ffffffffull) {
|
89 |
|
|
// C = 10^33 - 1
|
90 |
|
|
if (exp > 0) {
|
91 |
|
|
C_hi = 0x0001ed09bead87c0ull; // 10^34 - 1
|
92 |
|
|
C_lo = 0x378d8e63ffffffffull;
|
93 |
|
|
// exp = exp - EXP_P1;
|
94 |
|
|
unbexp = unbexp - 1;
|
95 |
|
|
exp = (UINT64) (unbexp + 6176) << 49;
|
96 |
|
|
} else { // if exp = 0
|
97 |
|
|
if (is_midpoint_lt_even || is_midpoint_lt_even ||
|
98 |
|
|
is_inexact_gt_midpoint || is_inexact_gt_midpoint) // tiny & inexact
|
99 |
|
|
*ptrfpsf |= UNDERFLOW_EXCEPTION;
|
100 |
|
|
}
|
101 |
|
|
}
|
102 |
|
|
} else {
|
103 |
|
|
; // the result is already correct
|
104 |
|
|
}
|
105 |
|
|
if (unbexp > expmax) { // 6111
|
106 |
|
|
*ptrfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
|
107 |
|
|
exp = 0;
|
108 |
|
|
if (!sign) { // result is positive
|
109 |
|
|
if (rnd_mode == ROUNDING_UP || rnd_mode == ROUNDING_TIES_AWAY) { // +inf
|
110 |
|
|
C_hi = 0x7800000000000000ull;
|
111 |
|
|
C_lo = 0x0000000000000000ull;
|
112 |
|
|
} else { // res = +MAXFP = (10^34-1) * 10^emax
|
113 |
|
|
C_hi = 0x5fffed09bead87c0ull;
|
114 |
|
|
C_lo = 0x378d8e63ffffffffull;
|
115 |
|
|
}
|
116 |
|
|
} else { // result is negative
|
117 |
|
|
if (rnd_mode == ROUNDING_DOWN || rnd_mode == ROUNDING_TIES_AWAY) { // -inf
|
118 |
|
|
C_hi = 0xf800000000000000ull;
|
119 |
|
|
C_lo = 0x0000000000000000ull;
|
120 |
|
|
} else { // res = -MAXFP = -(10^34-1) * 10^emax
|
121 |
|
|
C_hi = 0xdfffed09bead87c0ull;
|
122 |
|
|
C_lo = 0x378d8e63ffffffffull;
|
123 |
|
|
}
|
124 |
|
|
}
|
125 |
|
|
}
|
126 |
|
|
// assemble the result
|
127 |
|
|
res.w[1] = sign | exp | C_hi;
|
128 |
|
|
res.w[0] = C_lo;
|
129 |
|
|
*ptrres = res;
|
130 |
|
|
}
|
131 |
|
|
|
132 |
|
|
static void
|
133 |
|
|
add256 (UINT256 x, UINT256 y, UINT256 * pz) {
|
134 |
|
|
// *z = x + yl assume the sum fits in 256 bits
|
135 |
|
|
UINT256 z;
|
136 |
|
|
z.w[0] = x.w[0] + y.w[0];
|
137 |
|
|
if (z.w[0] < x.w[0]) {
|
138 |
|
|
x.w[1]++;
|
139 |
|
|
if (x.w[1] == 0x0000000000000000ull) {
|
140 |
|
|
x.w[2]++;
|
141 |
|
|
if (x.w[2] == 0x0000000000000000ull) {
|
142 |
|
|
x.w[3]++;
|
143 |
|
|
}
|
144 |
|
|
}
|
145 |
|
|
}
|
146 |
|
|
z.w[1] = x.w[1] + y.w[1];
|
147 |
|
|
if (z.w[1] < x.w[1]) {
|
148 |
|
|
x.w[2]++;
|
149 |
|
|
if (x.w[2] == 0x0000000000000000ull) {
|
150 |
|
|
x.w[3]++;
|
151 |
|
|
}
|
152 |
|
|
}
|
153 |
|
|
z.w[2] = x.w[2] + y.w[2];
|
154 |
|
|
if (z.w[2] < x.w[2]) {
|
155 |
|
|
x.w[3]++;
|
156 |
|
|
}
|
157 |
|
|
z.w[3] = x.w[3] + y.w[3]; // it was assumed that no carry is possible
|
158 |
|
|
*pz = z;
|
159 |
|
|
}
|
160 |
|
|
|
161 |
|
|
static void
|
162 |
|
|
sub256 (UINT256 x, UINT256 y, UINT256 * pz) {
|
163 |
|
|
// *z = x - y; assume x >= y
|
164 |
|
|
UINT256 z;
|
165 |
|
|
z.w[0] = x.w[0] - y.w[0];
|
166 |
|
|
if (z.w[0] > x.w[0]) {
|
167 |
|
|
x.w[1]--;
|
168 |
|
|
if (x.w[1] == 0xffffffffffffffffull) {
|
169 |
|
|
x.w[2]--;
|
170 |
|
|
if (x.w[2] == 0xffffffffffffffffull) {
|
171 |
|
|
x.w[3]--;
|
172 |
|
|
}
|
173 |
|
|
}
|
174 |
|
|
}
|
175 |
|
|
z.w[1] = x.w[1] - y.w[1];
|
176 |
|
|
if (z.w[1] > x.w[1]) {
|
177 |
|
|
x.w[2]--;
|
178 |
|
|
if (x.w[2] == 0xffffffffffffffffull) {
|
179 |
|
|
x.w[3]--;
|
180 |
|
|
}
|
181 |
|
|
}
|
182 |
|
|
z.w[2] = x.w[2] - y.w[2];
|
183 |
|
|
if (z.w[2] > x.w[2]) {
|
184 |
|
|
x.w[3]--;
|
185 |
|
|
}
|
186 |
|
|
z.w[3] = x.w[3] - y.w[3]; // no borrow possible, because x >= y
|
187 |
|
|
*pz = z;
|
188 |
|
|
}
|
189 |
|
|
|
190 |
|
|
|
191 |
|
|
static int
|
192 |
|
|
nr_digits256 (UINT256 R256) {
|
193 |
|
|
int ind;
|
194 |
|
|
// determine the number of decimal digits in R256
|
195 |
|
|
if (R256.w[3] == 0x0 && R256.w[2] == 0x0 && R256.w[1] == 0x0) {
|
196 |
|
|
// between 1 and 19 digits
|
197 |
|
|
for (ind = 1; ind <= 19; ind++) {
|
198 |
|
|
if (R256.w[0] < ten2k64[ind]) {
|
199 |
|
|
break;
|
200 |
|
|
}
|
201 |
|
|
}
|
202 |
|
|
// ind digits
|
203 |
|
|
} else if (R256.w[3] == 0x0 && R256.w[2] == 0x0 &&
|
204 |
|
|
(R256.w[1] < ten2k128[0].w[1] ||
|
205 |
|
|
(R256.w[1] == ten2k128[0].w[1]
|
206 |
|
|
&& R256.w[0] < ten2k128[0].w[0]))) {
|
207 |
|
|
// 20 digits
|
208 |
|
|
ind = 20;
|
209 |
|
|
} else if (R256.w[3] == 0x0 && R256.w[2] == 0x0) {
|
210 |
|
|
// between 21 and 38 digits
|
211 |
|
|
for (ind = 1; ind <= 18; ind++) {
|
212 |
|
|
if (R256.w[1] < ten2k128[ind].w[1] ||
|
213 |
|
|
(R256.w[1] == ten2k128[ind].w[1] &&
|
214 |
|
|
R256.w[0] < ten2k128[ind].w[0])) {
|
215 |
|
|
break;
|
216 |
|
|
}
|
217 |
|
|
}
|
218 |
|
|
// ind + 20 digits
|
219 |
|
|
ind = ind + 20;
|
220 |
|
|
} else if (R256.w[3] == 0x0 &&
|
221 |
|
|
(R256.w[2] < ten2k256[0].w[2] ||
|
222 |
|
|
(R256.w[2] == ten2k256[0].w[2] &&
|
223 |
|
|
R256.w[1] < ten2k256[0].w[1]) ||
|
224 |
|
|
(R256.w[2] == ten2k256[0].w[2] &&
|
225 |
|
|
R256.w[1] == ten2k256[0].w[1] &&
|
226 |
|
|
R256.w[0] < ten2k256[0].w[0]))) {
|
227 |
|
|
// 39 digits
|
228 |
|
|
ind = 39;
|
229 |
|
|
} else {
|
230 |
|
|
// between 40 and 68 digits
|
231 |
|
|
for (ind = 1; ind <= 29; ind++) {
|
232 |
|
|
if (R256.w[3] < ten2k256[ind].w[3] ||
|
233 |
|
|
(R256.w[3] == ten2k256[ind].w[3] &&
|
234 |
|
|
R256.w[2] < ten2k256[ind].w[2]) ||
|
235 |
|
|
(R256.w[3] == ten2k256[ind].w[3] &&
|
236 |
|
|
R256.w[2] == ten2k256[ind].w[2] &&
|
237 |
|
|
R256.w[1] < ten2k256[ind].w[1]) ||
|
238 |
|
|
(R256.w[3] == ten2k256[ind].w[3] &&
|
239 |
|
|
R256.w[2] == ten2k256[ind].w[2] &&
|
240 |
|
|
R256.w[1] == ten2k256[ind].w[1] &&
|
241 |
|
|
R256.w[0] < ten2k256[ind].w[0])) {
|
242 |
|
|
break;
|
243 |
|
|
}
|
244 |
|
|
}
|
245 |
|
|
// ind + 39 digits
|
246 |
|
|
ind = ind + 39;
|
247 |
|
|
}
|
248 |
|
|
return (ind);
|
249 |
|
|
}
|
250 |
|
|
|
251 |
|
|
// add/subtract C4 and C3 * 10^scale; this may follow a previous rounding, so
|
252 |
|
|
// use the rounding information from ptr_is_* to avoid a double rounding error
|
253 |
|
|
static void
|
254 |
|
|
add_and_round (int q3,
|
255 |
|
|
int q4,
|
256 |
|
|
int e4,
|
257 |
|
|
int delta,
|
258 |
|
|
int p34,
|
259 |
|
|
UINT64 z_sign,
|
260 |
|
|
UINT64 p_sign,
|
261 |
|
|
UINT128 C3,
|
262 |
|
|
UINT256 C4,
|
263 |
|
|
int rnd_mode,
|
264 |
|
|
int *ptr_is_midpoint_lt_even,
|
265 |
|
|
int *ptr_is_midpoint_gt_even,
|
266 |
|
|
int *ptr_is_inexact_lt_midpoint,
|
267 |
|
|
int *ptr_is_inexact_gt_midpoint,
|
268 |
|
|
_IDEC_flags * ptrfpsf, UINT128 * ptrres) {
|
269 |
|
|
|
270 |
|
|
int scale;
|
271 |
|
|
int x0;
|
272 |
|
|
int ind;
|
273 |
|
|
UINT64 R64;
|
274 |
|
|
UINT128 P128, R128;
|
275 |
|
|
UINT192 P192, R192;
|
276 |
|
|
UINT256 R256;
|
277 |
|
|
int is_midpoint_lt_even = 0;
|
278 |
|
|
int is_midpoint_gt_even = 0;
|
279 |
|
|
int is_inexact_lt_midpoint = 0;
|
280 |
|
|
int is_inexact_gt_midpoint = 0;
|
281 |
|
|
int is_midpoint_lt_even0 = 0;
|
282 |
|
|
int is_midpoint_gt_even0 = 0;
|
283 |
|
|
int is_inexact_lt_midpoint0 = 0;
|
284 |
|
|
int is_inexact_gt_midpoint0 = 0;
|
285 |
|
|
int incr_exp = 0;
|
286 |
|
|
int is_tiny = 0;
|
287 |
|
|
int lt_half_ulp = 0;
|
288 |
|
|
int eq_half_ulp = 0;
|
289 |
|
|
// int gt_half_ulp = 0;
|
290 |
|
|
UINT128 res = *ptrres;
|
291 |
|
|
|
292 |
|
|
// scale C3 up by 10^(q4-delta-q3), 0 <= q4-delta-q3 <= 2*P34-2 = 66
|
293 |
|
|
scale = q4 - delta - q3; // 0 <= scale <= 66 (or 0 <= scale <= 68 if this
|
294 |
|
|
// comes from Cases (2), (3), (4), (5), (6), with 0 <= |delta| <= 1
|
295 |
|
|
|
296 |
|
|
// calculate C3 * 10^scale in R256 (it has at most 67 decimal digits for
|
297 |
|
|
// Cases (15),(16),(17) and at most 69 for Cases (2),(3),(4),(5),(6))
|
298 |
|
|
if (scale == 0) {
|
299 |
|
|
R256.w[3] = 0x0ull;
|
300 |
|
|
R256.w[2] = 0x0ull;
|
301 |
|
|
R256.w[1] = C3.w[1];
|
302 |
|
|
R256.w[0] = C3.w[0];
|
303 |
|
|
} else if (scale <= 19) { // 10^scale fits in 64 bits
|
304 |
|
|
P128.w[1] = 0;
|
305 |
|
|
P128.w[0] = ten2k64[scale];
|
306 |
|
|
__mul_128x128_to_256 (R256, P128, C3);
|
307 |
|
|
} else if (scale <= 38) { // 10^scale fits in 128 bits
|
308 |
|
|
__mul_128x128_to_256 (R256, ten2k128[scale - 20], C3);
|
309 |
|
|
} else if (scale <= 57) { // 39 <= scale <= 57
|
310 |
|
|
// 10^scale fits in 192 bits but C3 * 10^scale fits in 223 or 230 bits
|
311 |
|
|
// (10^67 has 223 bits; 10^69 has 230 bits);
|
312 |
|
|
// must split the computation:
|
313 |
|
|
// 10^scale * C3 = 10*38 * 10^(scale-38) * C3 where 10^38 takes 127
|
314 |
|
|
// bits and so 10^(scale-38) * C3 fits in 128 bits with certainty
|
315 |
|
|
// Note that 1 <= scale - 38 <= 19 => 10^(scale-38) fits in 64 bits
|
316 |
|
|
__mul_64x128_to_128 (R128, ten2k64[scale - 38], C3);
|
317 |
|
|
// now multiply R128 by 10^38
|
318 |
|
|
__mul_128x128_to_256 (R256, R128, ten2k128[18]);
|
319 |
|
|
} else { // 58 <= scale <= 66
|
320 |
|
|
// 10^scale takes between 193 and 220 bits,
|
321 |
|
|
// and C3 * 10^scale fits in 223 bits (10^67/10^69 has 223/230 bits)
|
322 |
|
|
// must split the computation:
|
323 |
|
|
// 10^scale * C3 = 10*38 * 10^(scale-38) * C3 where 10^38 takes 127
|
324 |
|
|
// bits and so 10^(scale-38) * C3 fits in 128 bits with certainty
|
325 |
|
|
// Note that 20 <= scale - 38 <= 30 => 10^(scale-38) fits in 128 bits
|
326 |
|
|
// Calculate first 10^(scale-38) * C3, which fits in 128 bits; because
|
327 |
|
|
// 10^(scale-38) takes more than 64 bits, C3 will take less than 64
|
328 |
|
|
__mul_64x128_to_128 (R128, C3.w[0], ten2k128[scale - 58]);
|
329 |
|
|
// now calculate 10*38 * 10^(scale-38) * C3
|
330 |
|
|
__mul_128x128_to_256 (R256, R128, ten2k128[18]);
|
331 |
|
|
}
|
332 |
|
|
// C3 * 10^scale is now in R256
|
333 |
|
|
|
334 |
|
|
// for Cases (15), (16), (17) C4 > C3 * 10^scale because C4 has at least
|
335 |
|
|
// one extra digit; for Cases (2), (3), (4), (5), or (6) any order is
|
336 |
|
|
// possible
|
337 |
|
|
// add/subtract C4 and C3 * 10^scale; the exponent is e4
|
338 |
|
|
if (p_sign == z_sign) { // R256 = C4 + R256
|
339 |
|
|
// calculate R256 = C4 + C3 * 10^scale = C4 + R256 which is exact,
|
340 |
|
|
// but may require rounding
|
341 |
|
|
add256 (C4, R256, &R256);
|
342 |
|
|
} else { // if (p_sign != z_sign) { // R256 = C4 - R256
|
343 |
|
|
// calculate R256 = C4 - C3 * 10^scale = C4 - R256 or
|
344 |
|
|
// R256 = C3 * 10^scale - C4 = R256 - C4 which is exact,
|
345 |
|
|
// but may require rounding
|
346 |
|
|
|
347 |
|
|
// compare first R256 = C3 * 10^scale and C4
|
348 |
|
|
if (R256.w[3] > C4.w[3] || (R256.w[3] == C4.w[3] && R256.w[2] > C4.w[2]) ||
|
349 |
|
|
(R256.w[3] == C4.w[3] && R256.w[2] == C4.w[2] && R256.w[1] > C4.w[1]) ||
|
350 |
|
|
(R256.w[3] == C4.w[3] && R256.w[2] == C4.w[2] && R256.w[1] == C4.w[1] &&
|
351 |
|
|
R256.w[0] >= C4.w[0])) { // C3 * 10^scale >= C4
|
352 |
|
|
// calculate R256 = C3 * 10^scale - C4 = R256 - C4, which is exact,
|
353 |
|
|
// but may require rounding
|
354 |
|
|
sub256 (R256, C4, &R256);
|
355 |
|
|
// flip p_sign too, because the result has the sign of z
|
356 |
|
|
p_sign = z_sign;
|
357 |
|
|
} else { // if C4 > C3 * 10^scale
|
358 |
|
|
// calculate R256 = C4 - C3 * 10^scale = C4 - R256, which is exact,
|
359 |
|
|
// but may require rounding
|
360 |
|
|
sub256 (C4, R256, &R256);
|
361 |
|
|
}
|
362 |
|
|
// if the result is pure zero, the sign depends on the rounding mode
|
363 |
|
|
// (x*y and z had opposite signs)
|
364 |
|
|
if (R256.w[3] == 0x0ull && R256.w[2] == 0x0ull &&
|
365 |
|
|
R256.w[1] == 0x0ull && R256.w[0] == 0x0ull) {
|
366 |
|
|
if (rnd_mode != ROUNDING_DOWN)
|
367 |
|
|
p_sign = 0x0000000000000000ull;
|
368 |
|
|
else
|
369 |
|
|
p_sign = 0x8000000000000000ull;
|
370 |
|
|
// the exponent is max (e4, expmin)
|
371 |
|
|
if (e4 < -6176)
|
372 |
|
|
e4 = expmin;
|
373 |
|
|
// assemble result
|
374 |
|
|
res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49);
|
375 |
|
|
res.w[0] = 0x0;
|
376 |
|
|
*ptrres = res;
|
377 |
|
|
return;
|
378 |
|
|
}
|
379 |
|
|
}
|
380 |
|
|
|
381 |
|
|
// determine the number of decimal digits in R256
|
382 |
|
|
ind = nr_digits256 (R256);
|
383 |
|
|
|
384 |
|
|
// the exact result is (-1)^p_sign * R256 * 10^e4 where q (R256) = ind;
|
385 |
|
|
// round to the destination precision, with unbounded exponent
|
386 |
|
|
|
387 |
|
|
if (ind <= p34) {
|
388 |
|
|
// result rounded to the destination precision with unbounded exponent
|
389 |
|
|
// is exact
|
390 |
|
|
if (ind + e4 < p34 + expmin) {
|
391 |
|
|
is_tiny = 1; // applies to all rounding modes
|
392 |
|
|
}
|
393 |
|
|
res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | R256.w[1];
|
394 |
|
|
res.w[0] = R256.w[0];
|
395 |
|
|
// Note: res is correct only if expmin <= e4 <= expmax
|
396 |
|
|
} else { // if (ind > p34)
|
397 |
|
|
// if more than P digits, round to nearest to P digits
|
398 |
|
|
// round R256 to p34 digits
|
399 |
|
|
x0 = ind - p34; // 1 <= x0 <= 34 as 35 <= ind <= 68
|
400 |
|
|
if (ind <= 38) {
|
401 |
|
|
P128.w[1] = R256.w[1];
|
402 |
|
|
P128.w[0] = R256.w[0];
|
403 |
|
|
round128_19_38 (ind, x0, P128, &R128, &incr_exp,
|
404 |
|
|
&is_midpoint_lt_even, &is_midpoint_gt_even,
|
405 |
|
|
&is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
|
406 |
|
|
} else if (ind <= 57) {
|
407 |
|
|
P192.w[2] = R256.w[2];
|
408 |
|
|
P192.w[1] = R256.w[1];
|
409 |
|
|
P192.w[0] = R256.w[0];
|
410 |
|
|
round192_39_57 (ind, x0, P192, &R192, &incr_exp,
|
411 |
|
|
&is_midpoint_lt_even, &is_midpoint_gt_even,
|
412 |
|
|
&is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
|
413 |
|
|
R128.w[1] = R192.w[1];
|
414 |
|
|
R128.w[0] = R192.w[0];
|
415 |
|
|
} else { // if (ind <= 68)
|
416 |
|
|
round256_58_76 (ind, x0, R256, &R256, &incr_exp,
|
417 |
|
|
&is_midpoint_lt_even, &is_midpoint_gt_even,
|
418 |
|
|
&is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
|
419 |
|
|
R128.w[1] = R256.w[1];
|
420 |
|
|
R128.w[0] = R256.w[0];
|
421 |
|
|
}
|
422 |
|
|
// the rounded result has p34 = 34 digits
|
423 |
|
|
e4 = e4 + x0 + incr_exp;
|
424 |
|
|
if (rnd_mode == ROUNDING_TO_NEAREST) {
|
425 |
|
|
if (e4 < expmin) {
|
426 |
|
|
is_tiny = 1; // for other rounding modes apply correction
|
427 |
|
|
}
|
428 |
|
|
} else {
|
429 |
|
|
// for RM, RP, RZ, RA apply correction in order to determine tininess
|
430 |
|
|
// but do not save the result; apply the correction to
|
431 |
|
|
// (-1)^p_sign * significand * 10^0
|
432 |
|
|
P128.w[1] = p_sign | 0x3040000000000000ull | R128.w[1];
|
433 |
|
|
P128.w[0] = R128.w[0];
|
434 |
|
|
rounding_correction (rnd_mode,
|
435 |
|
|
is_inexact_lt_midpoint,
|
436 |
|
|
is_inexact_gt_midpoint, is_midpoint_lt_even,
|
437 |
|
|
is_midpoint_gt_even, 0, &P128, ptrfpsf);
|
438 |
|
|
scale = ((P128.w[1] & MASK_EXP) >> 49) - 6176; // -1, 0, or +1
|
439 |
|
|
// the number of digits in the significand is p34 = 34
|
440 |
|
|
if (e4 + scale < expmin) {
|
441 |
|
|
is_tiny = 1;
|
442 |
|
|
}
|
443 |
|
|
}
|
444 |
|
|
ind = p34; // the number of decimal digits in the signifcand of res
|
445 |
|
|
res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | R128.w[1]; // RN
|
446 |
|
|
res.w[0] = R128.w[0];
|
447 |
|
|
// Note: res is correct only if expmin <= e4 <= expmax
|
448 |
|
|
// set the inexact flag after rounding with bounded exponent, if any
|
449 |
|
|
}
|
450 |
|
|
// at this point we have the result rounded with unbounded exponent in
|
451 |
|
|
// res and we know its tininess:
|
452 |
|
|
// res = (-1)^p_sign * significand * 10^e4,
|
453 |
|
|
// where q (significand) = ind <= p34
|
454 |
|
|
// Note: res is correct only if expmin <= e4 <= expmax
|
455 |
|
|
|
456 |
|
|
// check for overflow if RN
|
457 |
|
|
if (rnd_mode == ROUNDING_TO_NEAREST && (ind + e4) > (p34 + expmax)) {
|
458 |
|
|
res.w[1] = p_sign | 0x7800000000000000ull;
|
459 |
|
|
res.w[0] = 0x0000000000000000ull;
|
460 |
|
|
*ptrres = res;
|
461 |
|
|
*ptrfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
|
462 |
|
|
return; // BID_RETURN (res)
|
463 |
|
|
} // else not overflow or not RN, so continue
|
464 |
|
|
|
465 |
|
|
// if (e4 >= expmin) we have the result rounded with bounded exponent
|
466 |
|
|
if (e4 < expmin) {
|
467 |
|
|
x0 = expmin - e4; // x0 >= 1; the number of digits to chop off of res
|
468 |
|
|
// where the result rounded [at most] once is
|
469 |
|
|
// (-1)^p_sign * significand_res * 10^e4
|
470 |
|
|
|
471 |
|
|
// avoid double rounding error
|
472 |
|
|
is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
|
473 |
|
|
is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
|
474 |
|
|
is_midpoint_lt_even0 = is_midpoint_lt_even;
|
475 |
|
|
is_midpoint_gt_even0 = is_midpoint_gt_even;
|
476 |
|
|
is_inexact_lt_midpoint = 0;
|
477 |
|
|
is_inexact_gt_midpoint = 0;
|
478 |
|
|
is_midpoint_lt_even = 0;
|
479 |
|
|
is_midpoint_gt_even = 0;
|
480 |
|
|
|
481 |
|
|
if (x0 > ind) {
|
482 |
|
|
// nothing is left of res when moving the decimal point left x0 digits
|
483 |
|
|
is_inexact_lt_midpoint = 1;
|
484 |
|
|
res.w[1] = p_sign | 0x0000000000000000ull;
|
485 |
|
|
res.w[0] = 0x0000000000000000ull;
|
486 |
|
|
e4 = expmin;
|
487 |
|
|
} else if (x0 == ind) { // 1 <= x0 = ind <= p34 = 34
|
488 |
|
|
// this is <, =, or > 1/2 ulp
|
489 |
|
|
// compare the ind-digit value in the significand of res with
|
490 |
|
|
// 1/2 ulp = 5*10^(ind-1), i.e. determine whether it is
|
491 |
|
|
// less than, equal to, or greater than 1/2 ulp (significand of res)
|
492 |
|
|
R128.w[1] = res.w[1] & MASK_COEFF;
|
493 |
|
|
R128.w[0] = res.w[0];
|
494 |
|
|
if (ind <= 19) {
|
495 |
|
|
if (R128.w[0] < midpoint64[ind - 1]) { // < 1/2 ulp
|
496 |
|
|
lt_half_ulp = 1;
|
497 |
|
|
is_inexact_lt_midpoint = 1;
|
498 |
|
|
} else if (R128.w[0] == midpoint64[ind - 1]) { // = 1/2 ulp
|
499 |
|
|
eq_half_ulp = 1;
|
500 |
|
|
is_midpoint_gt_even = 1;
|
501 |
|
|
} else { // > 1/2 ulp
|
502 |
|
|
// gt_half_ulp = 1;
|
503 |
|
|
is_inexact_gt_midpoint = 1;
|
504 |
|
|
}
|
505 |
|
|
} else { // if (ind <= 38) {
|
506 |
|
|
if (R128.w[1] < midpoint128[ind - 20].w[1] ||
|
507 |
|
|
(R128.w[1] == midpoint128[ind - 20].w[1] &&
|
508 |
|
|
R128.w[0] < midpoint128[ind - 20].w[0])) { // < 1/2 ulp
|
509 |
|
|
lt_half_ulp = 1;
|
510 |
|
|
is_inexact_lt_midpoint = 1;
|
511 |
|
|
} else if (R128.w[1] == midpoint128[ind - 20].w[1] &&
|
512 |
|
|
R128.w[0] == midpoint128[ind - 20].w[0]) { // = 1/2 ulp
|
513 |
|
|
eq_half_ulp = 1;
|
514 |
|
|
is_midpoint_gt_even = 1;
|
515 |
|
|
} else { // > 1/2 ulp
|
516 |
|
|
// gt_half_ulp = 1;
|
517 |
|
|
is_inexact_gt_midpoint = 1;
|
518 |
|
|
}
|
519 |
|
|
}
|
520 |
|
|
if (lt_half_ulp || eq_half_ulp) {
|
521 |
|
|
// res = +0.0 * 10^expmin
|
522 |
|
|
res.w[1] = 0x0000000000000000ull;
|
523 |
|
|
res.w[0] = 0x0000000000000000ull;
|
524 |
|
|
} else { // if (gt_half_ulp)
|
525 |
|
|
// res = +1 * 10^expmin
|
526 |
|
|
res.w[1] = 0x0000000000000000ull;
|
527 |
|
|
res.w[0] = 0x0000000000000001ull;
|
528 |
|
|
}
|
529 |
|
|
res.w[1] = p_sign | res.w[1];
|
530 |
|
|
e4 = expmin;
|
531 |
|
|
} else { // if (1 <= x0 <= ind - 1 <= 33)
|
532 |
|
|
// round the ind-digit result to ind - x0 digits
|
533 |
|
|
|
534 |
|
|
if (ind <= 18) { // 2 <= ind <= 18
|
535 |
|
|
round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp,
|
536 |
|
|
&is_midpoint_lt_even, &is_midpoint_gt_even,
|
537 |
|
|
&is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
|
538 |
|
|
res.w[1] = 0x0;
|
539 |
|
|
res.w[0] = R64;
|
540 |
|
|
} else if (ind <= 38) {
|
541 |
|
|
P128.w[1] = res.w[1] & MASK_COEFF;
|
542 |
|
|
P128.w[0] = res.w[0];
|
543 |
|
|
round128_19_38 (ind, x0, P128, &res, &incr_exp,
|
544 |
|
|
&is_midpoint_lt_even, &is_midpoint_gt_even,
|
545 |
|
|
&is_inexact_lt_midpoint,
|
546 |
|
|
&is_inexact_gt_midpoint);
|
547 |
|
|
}
|
548 |
|
|
e4 = e4 + x0; // expmin
|
549 |
|
|
// we want the exponent to be expmin, so if incr_exp = 1 then
|
550 |
|
|
// multiply the rounded result by 10 - it will still fit in 113 bits
|
551 |
|
|
if (incr_exp) {
|
552 |
|
|
// 64 x 128 -> 128
|
553 |
|
|
P128.w[1] = res.w[1] & MASK_COEFF;
|
554 |
|
|
P128.w[0] = res.w[0];
|
555 |
|
|
__mul_64x128_to_128 (res, ten2k64[1], P128);
|
556 |
|
|
}
|
557 |
|
|
res.w[1] =
|
558 |
|
|
p_sign | ((UINT64) (e4 + 6176) << 49) | (res.w[1] & MASK_COEFF);
|
559 |
|
|
// avoid a double rounding error
|
560 |
|
|
if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
|
561 |
|
|
is_midpoint_lt_even) { // double rounding error upward
|
562 |
|
|
// res = res - 1
|
563 |
|
|
res.w[0]--;
|
564 |
|
|
if (res.w[0] == 0xffffffffffffffffull)
|
565 |
|
|
res.w[1]--;
|
566 |
|
|
// Note: a double rounding error upward is not possible; for this
|
567 |
|
|
// the result after the first rounding would have to be 99...95
|
568 |
|
|
// (35 digits in all), possibly followed by a number of zeros; this
|
569 |
|
|
// is not possible in Cases (2)-(6) or (15)-(17) which may get here
|
570 |
|
|
is_midpoint_lt_even = 0;
|
571 |
|
|
is_inexact_lt_midpoint = 1;
|
572 |
|
|
} else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
|
573 |
|
|
is_midpoint_gt_even) { // double rounding error downward
|
574 |
|
|
// res = res + 1
|
575 |
|
|
res.w[0]++;
|
576 |
|
|
if (res.w[0] == 0)
|
577 |
|
|
res.w[1]++;
|
578 |
|
|
is_midpoint_gt_even = 0;
|
579 |
|
|
is_inexact_gt_midpoint = 1;
|
580 |
|
|
} else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
|
581 |
|
|
!is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
|
582 |
|
|
// if this second rounding was exact the result may still be
|
583 |
|
|
// inexact because of the first rounding
|
584 |
|
|
if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
|
585 |
|
|
is_inexact_gt_midpoint = 1;
|
586 |
|
|
}
|
587 |
|
|
if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
|
588 |
|
|
is_inexact_lt_midpoint = 1;
|
589 |
|
|
}
|
590 |
|
|
} else if (is_midpoint_gt_even &&
|
591 |
|
|
(is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
|
592 |
|
|
// pulled up to a midpoint
|
593 |
|
|
is_inexact_lt_midpoint = 1;
|
594 |
|
|
is_inexact_gt_midpoint = 0;
|
595 |
|
|
is_midpoint_lt_even = 0;
|
596 |
|
|
is_midpoint_gt_even = 0;
|
597 |
|
|
} else if (is_midpoint_lt_even &&
|
598 |
|
|
(is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
|
599 |
|
|
// pulled down to a midpoint
|
600 |
|
|
is_inexact_lt_midpoint = 0;
|
601 |
|
|
is_inexact_gt_midpoint = 1;
|
602 |
|
|
is_midpoint_lt_even = 0;
|
603 |
|
|
is_midpoint_gt_even = 0;
|
604 |
|
|
} else {
|
605 |
|
|
;
|
606 |
|
|
}
|
607 |
|
|
}
|
608 |
|
|
}
|
609 |
|
|
// res contains the correct result
|
610 |
|
|
// apply correction if not rounding to nearest
|
611 |
|
|
if (rnd_mode != ROUNDING_TO_NEAREST) {
|
612 |
|
|
rounding_correction (rnd_mode,
|
613 |
|
|
is_inexact_lt_midpoint, is_inexact_gt_midpoint,
|
614 |
|
|
is_midpoint_lt_even, is_midpoint_gt_even,
|
615 |
|
|
e4, &res, ptrfpsf);
|
616 |
|
|
}
|
617 |
|
|
if (is_midpoint_lt_even || is_midpoint_gt_even ||
|
618 |
|
|
is_inexact_lt_midpoint || is_inexact_gt_midpoint) {
|
619 |
|
|
// set the inexact flag
|
620 |
|
|
*ptrfpsf |= INEXACT_EXCEPTION;
|
621 |
|
|
if (is_tiny)
|
622 |
|
|
*ptrfpsf |= UNDERFLOW_EXCEPTION;
|
623 |
|
|
}
|
624 |
|
|
|
625 |
|
|
*ptr_is_midpoint_lt_even = is_midpoint_lt_even;
|
626 |
|
|
*ptr_is_midpoint_gt_even = is_midpoint_gt_even;
|
627 |
|
|
*ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
|
628 |
|
|
*ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
|
629 |
|
|
*ptrres = res;
|
630 |
|
|
return;
|
631 |
|
|
}
|
632 |
|
|
|
633 |
|
|
|
634 |
|
|
#if DECIMAL_CALL_BY_REFERENCE
|
635 |
|
|
static void
|
636 |
|
|
bid128_ext_fma (int *ptr_is_midpoint_lt_even,
|
637 |
|
|
int *ptr_is_midpoint_gt_even,
|
638 |
|
|
int *ptr_is_inexact_lt_midpoint,
|
639 |
|
|
int *ptr_is_inexact_gt_midpoint, UINT128 * pres,
|
640 |
|
|
UINT128 * px, UINT128 * py,
|
641 |
|
|
UINT128 *
|
642 |
|
|
pz _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
|
643 |
|
|
_EXC_INFO_PARAM) {
|
644 |
|
|
UINT128 x = *px, y = *py, z = *pz;
|
645 |
|
|
#if !DECIMAL_GLOBAL_ROUNDING
|
646 |
|
|
unsigned int rnd_mode = *prnd_mode;
|
647 |
|
|
#endif
|
648 |
|
|
#else
|
649 |
|
|
static UINT128
|
650 |
|
|
bid128_ext_fma (int *ptr_is_midpoint_lt_even,
|
651 |
|
|
int *ptr_is_midpoint_gt_even,
|
652 |
|
|
int *ptr_is_inexact_lt_midpoint,
|
653 |
|
|
int *ptr_is_inexact_gt_midpoint, UINT128 x, UINT128 y,
|
654 |
|
|
UINT128 z _RND_MODE_PARAM _EXC_FLAGS_PARAM
|
655 |
|
|
_EXC_MASKS_PARAM _EXC_INFO_PARAM) {
|
656 |
|
|
#endif
|
657 |
|
|
|
658 |
|
|
UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
|
659 |
|
|
UINT64 x_sign, y_sign, z_sign, p_sign, tmp_sign;
|
660 |
|
|
UINT64 x_exp = 0, y_exp = 0, z_exp = 0, p_exp;
|
661 |
|
|
int true_p_exp;
|
662 |
|
|
UINT128 C1, C2, C3;
|
663 |
|
|
UINT256 C4;
|
664 |
|
|
int q1 = 0, q2 = 0, q3 = 0, q4;
|
665 |
|
|
int e1, e2, e3, e4;
|
666 |
|
|
int scale, ind, delta, x0;
|
667 |
|
|
int p34 = P34; // used to modify the limit on the number of digits
|
668 |
|
|
BID_UI64DOUBLE tmp;
|
669 |
|
|
int x_nr_bits, y_nr_bits, z_nr_bits;
|
670 |
|
|
unsigned int save_fpsf;
|
671 |
|
|
int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0;
|
672 |
|
|
int is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
|
673 |
|
|
int is_midpoint_lt_even0 = 0, is_midpoint_gt_even0 = 0;
|
674 |
|
|
int is_inexact_lt_midpoint0 = 0, is_inexact_gt_midpoint0 = 0;
|
675 |
|
|
int incr_exp = 0;
|
676 |
|
|
int lsb;
|
677 |
|
|
int lt_half_ulp = 0;
|
678 |
|
|
int eq_half_ulp = 0;
|
679 |
|
|
int gt_half_ulp = 0;
|
680 |
|
|
int is_tiny = 0;
|
681 |
|
|
UINT64 R64, tmp64;
|
682 |
|
|
UINT128 P128, R128;
|
683 |
|
|
UINT192 P192, R192;
|
684 |
|
|
UINT256 R256;
|
685 |
|
|
|
686 |
|
|
// the following are based on the table of special cases for fma; the NaN
|
687 |
|
|
// behavior is similar to that of the IA-64 Architecture fma
|
688 |
|
|
|
689 |
|
|
// identify cases where at least one operand is NaN
|
690 |
|
|
|
691 |
|
|
BID_SWAP128 (x);
|
692 |
|
|
BID_SWAP128 (y);
|
693 |
|
|
BID_SWAP128 (z);
|
694 |
|
|
if ((y.w[1] & MASK_NAN) == MASK_NAN) { // y is NAN
|
695 |
|
|
// if x = {0, f, inf, NaN}, y = NaN, z = {0, f, inf, NaN} then res = Q (y)
|
696 |
|
|
// check first for non-canonical NaN payload
|
697 |
|
|
if (((y.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
|
698 |
|
|
(((y.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
|
699 |
|
|
(y.w[0] > 0x38c15b09ffffffffull))) {
|
700 |
|
|
y.w[1] = y.w[1] & 0xffffc00000000000ull;
|
701 |
|
|
y.w[0] = 0x0ull;
|
702 |
|
|
}
|
703 |
|
|
if ((y.w[1] & MASK_SNAN) == MASK_SNAN) { // y is SNAN
|
704 |
|
|
// set invalid flag
|
705 |
|
|
*pfpsf |= INVALID_EXCEPTION;
|
706 |
|
|
// return quiet (y)
|
707 |
|
|
res.w[1] = y.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
|
708 |
|
|
res.w[0] = y.w[0];
|
709 |
|
|
} else { // y is QNaN
|
710 |
|
|
// return y
|
711 |
|
|
res.w[1] = y.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
|
712 |
|
|
res.w[0] = y.w[0];
|
713 |
|
|
// if z = SNaN or x = SNaN signal invalid exception
|
714 |
|
|
if ((z.w[1] & MASK_SNAN) == MASK_SNAN ||
|
715 |
|
|
(x.w[1] & MASK_SNAN) == MASK_SNAN) {
|
716 |
|
|
// set invalid flag
|
717 |
|
|
*pfpsf |= INVALID_EXCEPTION;
|
718 |
|
|
}
|
719 |
|
|
}
|
720 |
|
|
*ptr_is_midpoint_lt_even = is_midpoint_lt_even;
|
721 |
|
|
*ptr_is_midpoint_gt_even = is_midpoint_gt_even;
|
722 |
|
|
*ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
|
723 |
|
|
*ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
|
724 |
|
|
BID_SWAP128 (res);
|
725 |
|
|
BID_RETURN (res)
|
726 |
|
|
} else if ((z.w[1] & MASK_NAN) == MASK_NAN) { // z is NAN
|
727 |
|
|
// if x = {0, f, inf, NaN}, y = {0, f, inf}, z = NaN then res = Q (z)
|
728 |
|
|
// check first for non-canonical NaN payload
|
729 |
|
|
if (((z.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
|
730 |
|
|
(((z.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
|
731 |
|
|
(z.w[0] > 0x38c15b09ffffffffull))) {
|
732 |
|
|
z.w[1] = z.w[1] & 0xffffc00000000000ull;
|
733 |
|
|
z.w[0] = 0x0ull;
|
734 |
|
|
}
|
735 |
|
|
if ((z.w[1] & MASK_SNAN) == MASK_SNAN) { // z is SNAN
|
736 |
|
|
// set invalid flag
|
737 |
|
|
*pfpsf |= INVALID_EXCEPTION;
|
738 |
|
|
// return quiet (z)
|
739 |
|
|
res.w[1] = z.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
|
740 |
|
|
res.w[0] = z.w[0];
|
741 |
|
|
} else { // z is QNaN
|
742 |
|
|
// return z
|
743 |
|
|
res.w[1] = z.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
|
744 |
|
|
res.w[0] = z.w[0];
|
745 |
|
|
// if x = SNaN signal invalid exception
|
746 |
|
|
if ((x.w[1] & MASK_SNAN) == MASK_SNAN) {
|
747 |
|
|
// set invalid flag
|
748 |
|
|
*pfpsf |= INVALID_EXCEPTION;
|
749 |
|
|
}
|
750 |
|
|
}
|
751 |
|
|
*ptr_is_midpoint_lt_even = is_midpoint_lt_even;
|
752 |
|
|
*ptr_is_midpoint_gt_even = is_midpoint_gt_even;
|
753 |
|
|
*ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
|
754 |
|
|
*ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
|
755 |
|
|
BID_SWAP128 (res);
|
756 |
|
|
BID_RETURN (res)
|
757 |
|
|
} else if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN
|
758 |
|
|
// if x = NaN, y = {0, f, inf}, z = {0, f, inf} then res = Q (x)
|
759 |
|
|
// check first for non-canonical NaN payload
|
760 |
|
|
if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
|
761 |
|
|
(((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
|
762 |
|
|
(x.w[0] > 0x38c15b09ffffffffull))) {
|
763 |
|
|
x.w[1] = x.w[1] & 0xffffc00000000000ull;
|
764 |
|
|
x.w[0] = 0x0ull;
|
765 |
|
|
}
|
766 |
|
|
if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN
|
767 |
|
|
// set invalid flag
|
768 |
|
|
*pfpsf |= INVALID_EXCEPTION;
|
769 |
|
|
// return quiet (x)
|
770 |
|
|
res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
|
771 |
|
|
res.w[0] = x.w[0];
|
772 |
|
|
} else { // x is QNaN
|
773 |
|
|
// return x
|
774 |
|
|
res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
|
775 |
|
|
res.w[0] = x.w[0];
|
776 |
|
|
}
|
777 |
|
|
*ptr_is_midpoint_lt_even = is_midpoint_lt_even;
|
778 |
|
|
*ptr_is_midpoint_gt_even = is_midpoint_gt_even;
|
779 |
|
|
*ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
|
780 |
|
|
*ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
|
781 |
|
|
BID_SWAP128 (res);
|
782 |
|
|
BID_RETURN (res)
|
783 |
|
|
}
|
784 |
|
|
// x, y, z are 0, f, or inf but not NaN => unpack the arguments and check
|
785 |
|
|
// for non-canonical values
|
786 |
|
|
|
787 |
|
|
x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
|
788 |
|
|
C1.w[1] = x.w[1] & MASK_COEFF;
|
789 |
|
|
C1.w[0] = x.w[0];
|
790 |
|
|
if ((x.w[1] & MASK_ANY_INF) != MASK_INF) { // x != inf
|
791 |
|
|
// if x is not infinity check for non-canonical values - treated as zero
|
792 |
|
|
if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11
|
793 |
|
|
// non-canonical
|
794 |
|
|
x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
|
795 |
|
|
C1.w[1] = 0; // significand high
|
796 |
|
|
C1.w[0] = 0; // significand low
|
797 |
|
|
} else { // G0_G1 != 11
|
798 |
|
|
x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits
|
799 |
|
|
if (C1.w[1] > 0x0001ed09bead87c0ull ||
|
800 |
|
|
(C1.w[1] == 0x0001ed09bead87c0ull &&
|
801 |
|
|
C1.w[0] > 0x378d8e63ffffffffull)) {
|
802 |
|
|
// x is non-canonical if coefficient is larger than 10^34 -1
|
803 |
|
|
C1.w[1] = 0;
|
804 |
|
|
C1.w[0] = 0;
|
805 |
|
|
} else { // canonical
|
806 |
|
|
;
|
807 |
|
|
}
|
808 |
|
|
}
|
809 |
|
|
}
|
810 |
|
|
y_sign = y.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
|
811 |
|
|
C2.w[1] = y.w[1] & MASK_COEFF;
|
812 |
|
|
C2.w[0] = y.w[0];
|
813 |
|
|
if ((y.w[1] & MASK_ANY_INF) != MASK_INF) { // y != inf
|
814 |
|
|
// if y is not infinity check for non-canonical values - treated as zero
|
815 |
|
|
if ((y.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11
|
816 |
|
|
// non-canonical
|
817 |
|
|
y_exp = (y.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
|
818 |
|
|
C2.w[1] = 0; // significand high
|
819 |
|
|
C2.w[0] = 0; // significand low
|
820 |
|
|
} else { // G0_G1 != 11
|
821 |
|
|
y_exp = y.w[1] & MASK_EXP; // biased and shifted left 49 bits
|
822 |
|
|
if (C2.w[1] > 0x0001ed09bead87c0ull ||
|
823 |
|
|
(C2.w[1] == 0x0001ed09bead87c0ull &&
|
824 |
|
|
C2.w[0] > 0x378d8e63ffffffffull)) {
|
825 |
|
|
// y is non-canonical if coefficient is larger than 10^34 -1
|
826 |
|
|
C2.w[1] = 0;
|
827 |
|
|
C2.w[0] = 0;
|
828 |
|
|
} else { // canonical
|
829 |
|
|
;
|
830 |
|
|
}
|
831 |
|
|
}
|
832 |
|
|
}
|
833 |
|
|
z_sign = z.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
|
834 |
|
|
C3.w[1] = z.w[1] & MASK_COEFF;
|
835 |
|
|
C3.w[0] = z.w[0];
|
836 |
|
|
if ((z.w[1] & MASK_ANY_INF) != MASK_INF) { // z != inf
|
837 |
|
|
// if z is not infinity check for non-canonical values - treated as zero
|
838 |
|
|
if ((z.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11
|
839 |
|
|
// non-canonical
|
840 |
|
|
z_exp = (z.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
|
841 |
|
|
C3.w[1] = 0; // significand high
|
842 |
|
|
C3.w[0] = 0; // significand low
|
843 |
|
|
} else { // G0_G1 != 11
|
844 |
|
|
z_exp = z.w[1] & MASK_EXP; // biased and shifted left 49 bits
|
845 |
|
|
if (C3.w[1] > 0x0001ed09bead87c0ull ||
|
846 |
|
|
(C3.w[1] == 0x0001ed09bead87c0ull &&
|
847 |
|
|
C3.w[0] > 0x378d8e63ffffffffull)) {
|
848 |
|
|
// z is non-canonical if coefficient is larger than 10^34 -1
|
849 |
|
|
C3.w[1] = 0;
|
850 |
|
|
C3.w[0] = 0;
|
851 |
|
|
} else { // canonical
|
852 |
|
|
;
|
853 |
|
|
}
|
854 |
|
|
}
|
855 |
|
|
}
|
856 |
|
|
|
857 |
|
|
p_sign = x_sign ^ y_sign; // sign of the product
|
858 |
|
|
|
859 |
|
|
// identify cases where at least one operand is infinity
|
860 |
|
|
|
861 |
|
|
if ((x.w[1] & MASK_ANY_INF) == MASK_INF) { // x = inf
|
862 |
|
|
if ((y.w[1] & MASK_ANY_INF) == MASK_INF) { // y = inf
|
863 |
|
|
if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf
|
864 |
|
|
if (p_sign == z_sign) {
|
865 |
|
|
res.w[1] = z_sign | MASK_INF;
|
866 |
|
|
res.w[0] = 0x0;
|
867 |
|
|
} else {
|
868 |
|
|
// return QNaN Indefinite
|
869 |
|
|
res.w[1] = 0x7c00000000000000ull;
|
870 |
|
|
res.w[0] = 0x0000000000000000ull;
|
871 |
|
|
// set invalid flag
|
872 |
|
|
*pfpsf |= INVALID_EXCEPTION;
|
873 |
|
|
}
|
874 |
|
|
} else { // z = 0 or z = f
|
875 |
|
|
res.w[1] = p_sign | MASK_INF;
|
876 |
|
|
res.w[0] = 0x0;
|
877 |
|
|
}
|
878 |
|
|
} else if (C2.w[1] != 0 || C2.w[0] != 0) { // y = f
|
879 |
|
|
if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf
|
880 |
|
|
if (p_sign == z_sign) {
|
881 |
|
|
res.w[1] = z_sign | MASK_INF;
|
882 |
|
|
res.w[0] = 0x0;
|
883 |
|
|
} else {
|
884 |
|
|
// return QNaN Indefinite
|
885 |
|
|
res.w[1] = 0x7c00000000000000ull;
|
886 |
|
|
res.w[0] = 0x0000000000000000ull;
|
887 |
|
|
// set invalid flag
|
888 |
|
|
*pfpsf |= INVALID_EXCEPTION;
|
889 |
|
|
}
|
890 |
|
|
} else { // z = 0 or z = f
|
891 |
|
|
res.w[1] = p_sign | MASK_INF;
|
892 |
|
|
res.w[0] = 0x0;
|
893 |
|
|
}
|
894 |
|
|
} else { // y = 0
|
895 |
|
|
// return QNaN Indefinite
|
896 |
|
|
res.w[1] = 0x7c00000000000000ull;
|
897 |
|
|
res.w[0] = 0x0000000000000000ull;
|
898 |
|
|
// set invalid flag
|
899 |
|
|
*pfpsf |= INVALID_EXCEPTION;
|
900 |
|
|
}
|
901 |
|
|
*ptr_is_midpoint_lt_even = is_midpoint_lt_even;
|
902 |
|
|
*ptr_is_midpoint_gt_even = is_midpoint_gt_even;
|
903 |
|
|
*ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
|
904 |
|
|
*ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
|
905 |
|
|
BID_SWAP128 (res);
|
906 |
|
|
BID_RETURN (res)
|
907 |
|
|
} else if ((y.w[1] & MASK_ANY_INF) == MASK_INF) { // y = inf
|
908 |
|
|
if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf
|
909 |
|
|
// x = f, necessarily
|
910 |
|
|
if ((p_sign != z_sign)
|
911 |
|
|
|| (C1.w[1] == 0x0ull && C1.w[0] == 0x0ull)) {
|
912 |
|
|
// return QNaN Indefinite
|
913 |
|
|
res.w[1] = 0x7c00000000000000ull;
|
914 |
|
|
res.w[0] = 0x0000000000000000ull;
|
915 |
|
|
// set invalid flag
|
916 |
|
|
*pfpsf |= INVALID_EXCEPTION;
|
917 |
|
|
} else {
|
918 |
|
|
res.w[1] = z_sign | MASK_INF;
|
919 |
|
|
res.w[0] = 0x0;
|
920 |
|
|
}
|
921 |
|
|
} else if (C1.w[1] == 0x0 && C1.w[0] == 0x0) { // x = 0
|
922 |
|
|
// z = 0, f, inf
|
923 |
|
|
// return QNaN Indefinite
|
924 |
|
|
res.w[1] = 0x7c00000000000000ull;
|
925 |
|
|
res.w[0] = 0x0000000000000000ull;
|
926 |
|
|
// set invalid flag
|
927 |
|
|
*pfpsf |= INVALID_EXCEPTION;
|
928 |
|
|
} else {
|
929 |
|
|
// x = f and z = 0, f, necessarily
|
930 |
|
|
res.w[1] = p_sign | MASK_INF;
|
931 |
|
|
res.w[0] = 0x0;
|
932 |
|
|
}
|
933 |
|
|
*ptr_is_midpoint_lt_even = is_midpoint_lt_even;
|
934 |
|
|
*ptr_is_midpoint_gt_even = is_midpoint_gt_even;
|
935 |
|
|
*ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
|
936 |
|
|
*ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
|
937 |
|
|
BID_SWAP128 (res);
|
938 |
|
|
BID_RETURN (res)
|
939 |
|
|
} else if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf
|
940 |
|
|
// x = 0, f and y = 0, f, necessarily
|
941 |
|
|
res.w[1] = z_sign | MASK_INF;
|
942 |
|
|
res.w[0] = 0x0;
|
943 |
|
|
*ptr_is_midpoint_lt_even = is_midpoint_lt_even;
|
944 |
|
|
*ptr_is_midpoint_gt_even = is_midpoint_gt_even;
|
945 |
|
|
*ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
|
946 |
|
|
*ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
|
947 |
|
|
BID_SWAP128 (res);
|
948 |
|
|
BID_RETURN (res)
|
949 |
|
|
}
|
950 |
|
|
|
951 |
|
|
true_p_exp = (x_exp >> 49) - 6176 + (y_exp >> 49) - 6176;
|
952 |
|
|
if (true_p_exp < -6176)
|
953 |
|
|
p_exp = 0; // cannot be less than EXP_MIN
|
954 |
|
|
else
|
955 |
|
|
p_exp = (UINT64) (true_p_exp + 6176) << 49;
|
956 |
|
|
|
957 |
|
|
if (((C1.w[1] == 0x0 && C1.w[0] == 0x0) || (C2.w[1] == 0x0 && C2.w[0] == 0x0)) && C3.w[1] == 0x0 && C3.w[0] == 0x0) { // (x = 0 or y = 0) and z = 0
|
958 |
|
|
// the result is 0
|
959 |
|
|
if (p_exp < z_exp)
|
960 |
|
|
res.w[1] = p_exp; // preferred exponent
|
961 |
|
|
else
|
962 |
|
|
res.w[1] = z_exp; // preferred exponent
|
963 |
|
|
if (p_sign == z_sign) {
|
964 |
|
|
res.w[1] |= z_sign;
|
965 |
|
|
res.w[0] = 0x0;
|
966 |
|
|
} else { // x * y and z have opposite signs
|
967 |
|
|
if (rnd_mode == ROUNDING_DOWN) {
|
968 |
|
|
// res = -0.0
|
969 |
|
|
res.w[1] |= MASK_SIGN;
|
970 |
|
|
res.w[0] = 0x0;
|
971 |
|
|
} else {
|
972 |
|
|
// res = +0.0
|
973 |
|
|
// res.w[1] |= 0x0;
|
974 |
|
|
res.w[0] = 0x0;
|
975 |
|
|
}
|
976 |
|
|
}
|
977 |
|
|
*ptr_is_midpoint_lt_even = is_midpoint_lt_even;
|
978 |
|
|
*ptr_is_midpoint_gt_even = is_midpoint_gt_even;
|
979 |
|
|
*ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
|
980 |
|
|
*ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
|
981 |
|
|
BID_SWAP128 (res);
|
982 |
|
|
BID_RETURN (res)
|
983 |
|
|
}
|
984 |
|
|
// from this point on, we may need to know the number of decimal digits
|
985 |
|
|
// in the significands of x, y, z when x, y, z != 0
|
986 |
|
|
|
987 |
|
|
if (C1.w[1] != 0 || C1.w[0] != 0) { // x = f (non-zero finite)
|
988 |
|
|
// q1 = nr. of decimal digits in x
|
989 |
|
|
// determine first the nr. of bits in x
|
990 |
|
|
if (C1.w[1] == 0) {
|
991 |
|
|
if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53
|
992 |
|
|
// split the 64-bit value in two 32-bit halves to avoid rounding errors
|
993 |
|
|
if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32
|
994 |
|
|
tmp.d = (double) (C1.w[0] >> 32); // exact conversion
|
995 |
|
|
x_nr_bits =
|
996 |
|
|
33 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
|
997 |
|
|
} else { // x < 2^32
|
998 |
|
|
tmp.d = (double) (C1.w[0]); // exact conversion
|
999 |
|
|
x_nr_bits =
|
1000 |
|
|
1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
|
1001 |
|
|
}
|
1002 |
|
|
} else { // if x < 2^53
|
1003 |
|
|
tmp.d = (double) C1.w[0]; // exact conversion
|
1004 |
|
|
x_nr_bits =
|
1005 |
|
|
1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
|
1006 |
|
|
}
|
1007 |
|
|
} else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
|
1008 |
|
|
tmp.d = (double) C1.w[1]; // exact conversion
|
1009 |
|
|
x_nr_bits =
|
1010 |
|
|
65 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
|
1011 |
|
|
}
|
1012 |
|
|
q1 = nr_digits[x_nr_bits - 1].digits;
|
1013 |
|
|
if (q1 == 0) {
|
1014 |
|
|
q1 = nr_digits[x_nr_bits - 1].digits1;
|
1015 |
|
|
if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi ||
|
1016 |
|
|
(C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi &&
|
1017 |
|
|
C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo))
|
1018 |
|
|
q1++;
|
1019 |
|
|
}
|
1020 |
|
|
}
|
1021 |
|
|
|
1022 |
|
|
if (C2.w[1] != 0 || C2.w[0] != 0) { // y = f (non-zero finite)
|
1023 |
|
|
if (C2.w[1] == 0) {
|
1024 |
|
|
if (C2.w[0] >= 0x0020000000000000ull) { // y >= 2^53
|
1025 |
|
|
// split the 64-bit value in two 32-bit halves to avoid rounding errors
|
1026 |
|
|
if (C2.w[0] >= 0x0000000100000000ull) { // y >= 2^32
|
1027 |
|
|
tmp.d = (double) (C2.w[0] >> 32); // exact conversion
|
1028 |
|
|
y_nr_bits =
|
1029 |
|
|
32 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
|
1030 |
|
|
} else { // y < 2^32
|
1031 |
|
|
tmp.d = (double) C2.w[0]; // exact conversion
|
1032 |
|
|
y_nr_bits =
|
1033 |
|
|
((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
|
1034 |
|
|
}
|
1035 |
|
|
} else { // if y < 2^53
|
1036 |
|
|
tmp.d = (double) C2.w[0]; // exact conversion
|
1037 |
|
|
y_nr_bits =
|
1038 |
|
|
((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
|
1039 |
|
|
}
|
1040 |
|
|
} else { // C2.w[1] != 0 => nr. bits = 64 + nr_bits (C2.w[1])
|
1041 |
|
|
tmp.d = (double) C2.w[1]; // exact conversion
|
1042 |
|
|
y_nr_bits =
|
1043 |
|
|
64 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
|
1044 |
|
|
}
|
1045 |
|
|
|
1046 |
|
|
q2 = nr_digits[y_nr_bits].digits;
|
1047 |
|
|
if (q2 == 0) {
|
1048 |
|
|
q2 = nr_digits[y_nr_bits].digits1;
|
1049 |
|
|
if (C2.w[1] > nr_digits[y_nr_bits].threshold_hi ||
|
1050 |
|
|
(C2.w[1] == nr_digits[y_nr_bits].threshold_hi &&
|
1051 |
|
|
C2.w[0] >= nr_digits[y_nr_bits].threshold_lo))
|
1052 |
|
|
q2++;
|
1053 |
|
|
}
|
1054 |
|
|
}
|
1055 |
|
|
|
1056 |
|
|
if (C3.w[1] != 0 || C3.w[0] != 0) { // z = f (non-zero finite)
|
1057 |
|
|
if (C3.w[1] == 0) {
|
1058 |
|
|
if (C3.w[0] >= 0x0020000000000000ull) { // z >= 2^53
|
1059 |
|
|
// split the 64-bit value in two 32-bit halves to avoid rounding errors
|
1060 |
|
|
if (C3.w[0] >= 0x0000000100000000ull) { // z >= 2^32
|
1061 |
|
|
tmp.d = (double) (C3.w[0] >> 32); // exact conversion
|
1062 |
|
|
z_nr_bits =
|
1063 |
|
|
32 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
|
1064 |
|
|
} else { // z < 2^32
|
1065 |
|
|
tmp.d = (double) C3.w[0]; // exact conversion
|
1066 |
|
|
z_nr_bits =
|
1067 |
|
|
((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
|
1068 |
|
|
}
|
1069 |
|
|
} else { // if z < 2^53
|
1070 |
|
|
tmp.d = (double) C3.w[0]; // exact conversion
|
1071 |
|
|
z_nr_bits =
|
1072 |
|
|
((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
|
1073 |
|
|
}
|
1074 |
|
|
} else { // C3.w[1] != 0 => nr. bits = 64 + nr_bits (C3.w[1])
|
1075 |
|
|
tmp.d = (double) C3.w[1]; // exact conversion
|
1076 |
|
|
z_nr_bits =
|
1077 |
|
|
64 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
|
1078 |
|
|
}
|
1079 |
|
|
|
1080 |
|
|
q3 = nr_digits[z_nr_bits].digits;
|
1081 |
|
|
if (q3 == 0) {
|
1082 |
|
|
q3 = nr_digits[z_nr_bits].digits1;
|
1083 |
|
|
if (C3.w[1] > nr_digits[z_nr_bits].threshold_hi ||
|
1084 |
|
|
(C3.w[1] == nr_digits[z_nr_bits].threshold_hi &&
|
1085 |
|
|
C3.w[0] >= nr_digits[z_nr_bits].threshold_lo))
|
1086 |
|
|
q3++;
|
1087 |
|
|
}
|
1088 |
|
|
}
|
1089 |
|
|
|
1090 |
|
|
if ((C1.w[1] == 0x0 && C1.w[0] == 0x0) ||
|
1091 |
|
|
(C2.w[1] == 0x0 && C2.w[0] == 0x0)) {
|
1092 |
|
|
// x = 0 or y = 0
|
1093 |
|
|
// z = f, necessarily; for 0 + z return z, with the preferred exponent
|
1094 |
|
|
// the result is z, but need to get the preferred exponent
|
1095 |
|
|
if (z_exp <= p_exp) { // the preferred exponent is z_exp
|
1096 |
|
|
res.w[1] = z_sign | (z_exp & MASK_EXP) | C3.w[1];
|
1097 |
|
|
res.w[0] = C3.w[0];
|
1098 |
|
|
} else { // if (p_exp < z_exp) the preferred exponent is p_exp
|
1099 |
|
|
// return (C3 * 10^scale) * 10^(z_exp - scale)
|
1100 |
|
|
// where scale = min (p34-q3, (z_exp-p_exp) >> 49)
|
1101 |
|
|
scale = p34 - q3;
|
1102 |
|
|
ind = (z_exp - p_exp) >> 49;
|
1103 |
|
|
if (ind < scale)
|
1104 |
|
|
scale = ind;
|
1105 |
|
|
if (scale == 0) {
|
1106 |
|
|
res.w[1] = z.w[1]; // & MASK_COEFF, which is redundant
|
1107 |
|
|
res.w[0] = z.w[0];
|
1108 |
|
|
} else if (q3 <= 19) { // z fits in 64 bits
|
1109 |
|
|
if (scale <= 19) { // 10^scale fits in 64 bits
|
1110 |
|
|
// 64 x 64 C3.w[0] * ten2k64[scale]
|
1111 |
|
|
__mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
|
1112 |
|
|
} else { // 10^scale fits in 128 bits
|
1113 |
|
|
// 64 x 128 C3.w[0] * ten2k128[scale - 20]
|
1114 |
|
|
__mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
|
1115 |
|
|
}
|
1116 |
|
|
} else { // z fits in 128 bits, but 10^scale must fit in 64 bits
|
1117 |
|
|
// 64 x 128 ten2k64[scale] * C3
|
1118 |
|
|
__mul_128x64_to_128 (res, ten2k64[scale], C3);
|
1119 |
|
|
}
|
1120 |
|
|
// subtract scale from the exponent
|
1121 |
|
|
z_exp = z_exp - ((UINT64) scale << 49);
|
1122 |
|
|
res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
|
1123 |
|
|
}
|
1124 |
|
|
*ptr_is_midpoint_lt_even = is_midpoint_lt_even;
|
1125 |
|
|
*ptr_is_midpoint_gt_even = is_midpoint_gt_even;
|
1126 |
|
|
*ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
|
1127 |
|
|
*ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
|
1128 |
|
|
BID_SWAP128 (res);
|
1129 |
|
|
BID_RETURN (res)
|
1130 |
|
|
} else {
|
1131 |
|
|
; // continue with x = f, y = f, z = 0 or x = f, y = f, z = f
|
1132 |
|
|
}
|
1133 |
|
|
|
1134 |
|
|
e1 = (x_exp >> 49) - 6176; // unbiased exponent of x
|
1135 |
|
|
e2 = (y_exp >> 49) - 6176; // unbiased exponent of y
|
1136 |
|
|
e3 = (z_exp >> 49) - 6176; // unbiased exponent of z
|
1137 |
|
|
e4 = e1 + e2; // unbiased exponent of the exact x * y
|
1138 |
|
|
|
1139 |
|
|
// calculate C1 * C2 and its number of decimal digits, q4
|
1140 |
|
|
|
1141 |
|
|
// the exact product has either q1 + q2 - 1 or q1 + q2 decimal digits
|
1142 |
|
|
// where 2 <= q1 + q2 <= 68
|
1143 |
|
|
// calculate C4 = C1 * C2 and determine q
|
1144 |
|
|
C4.w[3] = C4.w[2] = C4.w[1] = C4.w[0] = 0;
|
1145 |
|
|
if (q1 + q2 <= 19) { // if 2 <= q1 + q2 <= 19, C4 = C1 * C2 fits in 64 bits
|
1146 |
|
|
C4.w[0] = C1.w[0] * C2.w[0];
|
1147 |
|
|
// if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
|
1148 |
|
|
if (C4.w[0] < ten2k64[q1 + q2 - 1])
|
1149 |
|
|
q4 = q1 + q2 - 1; // q4 in [1, 18]
|
1150 |
|
|
else
|
1151 |
|
|
q4 = q1 + q2; // q4 in [2, 19]
|
1152 |
|
|
// length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
|
1153 |
|
|
} else if (q1 + q2 == 20) { // C4 = C1 * C2 fits in 64 or 128 bits
|
1154 |
|
|
// q1 <= 19 and q2 <= 19 so both C1 and C2 fit in 64 bits
|
1155 |
|
|
__mul_64x64_to_128MACH (C4, C1.w[0], C2.w[0]);
|
1156 |
|
|
// if C4 < 10^(q1+q2-1) = 10^19 then q4 = q1+q2-1 = 19 else q4 = q1+q2 = 20
|
1157 |
|
|
if (C4.w[1] == 0 && C4.w[0] < ten2k64[19]) { // 19 = q1+q2-1
|
1158 |
|
|
// length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
|
1159 |
|
|
q4 = 19; // 19 = q1 + q2 - 1
|
1160 |
|
|
} else {
|
1161 |
|
|
// if (C4.w[1] == 0)
|
1162 |
|
|
// length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
|
1163 |
|
|
// else
|
1164 |
|
|
// length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
|
1165 |
|
|
q4 = 20; // 20 = q1 + q2
|
1166 |
|
|
}
|
1167 |
|
|
} else if (q1 + q2 <= 38) { // 21 <= q1 + q2 <= 38
|
1168 |
|
|
// C4 = C1 * C2 fits in 64 or 128 bits
|
1169 |
|
|
// (64 bits possibly, but only when q1 + q2 = 21 and C4 has 20 digits)
|
1170 |
|
|
// at least one of C1, C2 has at most 19 decimal digits & fits in 64 bits
|
1171 |
|
|
if (q1 <= 19) {
|
1172 |
|
|
__mul_128x64_to_128 (C4, C1.w[0], C2);
|
1173 |
|
|
} else { // q2 <= 19
|
1174 |
|
|
__mul_128x64_to_128 (C4, C2.w[0], C1);
|
1175 |
|
|
}
|
1176 |
|
|
// if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
|
1177 |
|
|
if (C4.w[1] < ten2k128[q1 + q2 - 21].w[1] ||
|
1178 |
|
|
(C4.w[1] == ten2k128[q1 + q2 - 21].w[1] &&
|
1179 |
|
|
C4.w[0] < ten2k128[q1 + q2 - 21].w[0])) {
|
1180 |
|
|
// if (C4.w[1] == 0) // q4 = 20, necessarily
|
1181 |
|
|
// length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
|
1182 |
|
|
// else
|
1183 |
|
|
// length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
|
1184 |
|
|
q4 = q1 + q2 - 1; // q4 in [20, 37]
|
1185 |
|
|
} else {
|
1186 |
|
|
// length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
|
1187 |
|
|
q4 = q1 + q2; // q4 in [21, 38]
|
1188 |
|
|
}
|
1189 |
|
|
} else if (q1 + q2 == 39) { // C4 = C1 * C2 fits in 128 or 192 bits
|
1190 |
|
|
// both C1 and C2 fit in 128 bits (actually in 113 bits)
|
1191 |
|
|
// may replace this by 128x128_to192
|
1192 |
|
|
__mul_128x128_to_256 (C4, C1, C2); // C4.w[3] is 0
|
1193 |
|
|
// if C4 < 10^(q1+q2-1) = 10^38 then q4 = q1+q2-1 = 38 else q4 = q1+q2 = 39
|
1194 |
|
|
if (C4.w[2] == 0 && (C4.w[1] < ten2k128[18].w[1] ||
|
1195 |
|
|
(C4.w[1] == ten2k128[18].w[1]
|
1196 |
|
|
&& C4.w[0] < ten2k128[18].w[0]))) {
|
1197 |
|
|
// 18 = 38 - 20 = q1+q2-1 - 20
|
1198 |
|
|
// length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
|
1199 |
|
|
q4 = 38; // 38 = q1 + q2 - 1
|
1200 |
|
|
} else {
|
1201 |
|
|
// if (C4.w[2] == 0)
|
1202 |
|
|
// length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
|
1203 |
|
|
// else
|
1204 |
|
|
// length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
|
1205 |
|
|
q4 = 39; // 39 = q1 + q2
|
1206 |
|
|
}
|
1207 |
|
|
} else if (q1 + q2 <= 57) { // 40 <= q1 + q2 <= 57
|
1208 |
|
|
// C4 = C1 * C2 fits in 128 or 192 bits
|
1209 |
|
|
// (128 bits possibly, but only when q1 + q2 = 40 and C4 has 39 digits)
|
1210 |
|
|
// both C1 and C2 fit in 128 bits (actually in 113 bits); at most one
|
1211 |
|
|
// may fit in 64 bits
|
1212 |
|
|
if (C1.w[1] == 0) { // C1 fits in 64 bits
|
1213 |
|
|
// __mul_64x128_full (REShi64, RESlo128, A64, B128)
|
1214 |
|
|
__mul_64x128_full (C4.w[2], C4, C1.w[0], C2);
|
1215 |
|
|
} else if (C2.w[1] == 0) { // C2 fits in 64 bits
|
1216 |
|
|
// __mul_64x128_full (REShi64, RESlo128, A64, B128)
|
1217 |
|
|
__mul_64x128_full (C4.w[2], C4, C2.w[0], C1);
|
1218 |
|
|
} else { // both C1 and C2 require 128 bits
|
1219 |
|
|
// may use __mul_128x128_to_192 (C4.w[2], C4.w[0], C2.w[0], C1);
|
1220 |
|
|
__mul_128x128_to_256 (C4, C1, C2); // C4.w[3] = 0
|
1221 |
|
|
}
|
1222 |
|
|
// if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
|
1223 |
|
|
if (C4.w[2] < ten2k256[q1 + q2 - 40].w[2] ||
|
1224 |
|
|
(C4.w[2] == ten2k256[q1 + q2 - 40].w[2] &&
|
1225 |
|
|
(C4.w[1] < ten2k256[q1 + q2 - 40].w[1] ||
|
1226 |
|
|
(C4.w[1] == ten2k256[q1 + q2 - 40].w[1] &&
|
1227 |
|
|
C4.w[0] < ten2k256[q1 + q2 - 40].w[0])))) {
|
1228 |
|
|
// if (C4.w[2] == 0) // q4 = 39, necessarily
|
1229 |
|
|
// length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
|
1230 |
|
|
// else
|
1231 |
|
|
// length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
|
1232 |
|
|
q4 = q1 + q2 - 1; // q4 in [39, 56]
|
1233 |
|
|
} else {
|
1234 |
|
|
// length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
|
1235 |
|
|
q4 = q1 + q2; // q4 in [40, 57]
|
1236 |
|
|
}
|
1237 |
|
|
} else if (q1 + q2 == 58) { // C4 = C1 * C2 fits in 192 or 256 bits
|
1238 |
|
|
// both C1 and C2 fit in 128 bits (actually in 113 bits); at most one
|
1239 |
|
|
// may fit in 64 bits
|
1240 |
|
|
if (C1.w[1] == 0) { // C1 * C2 will fit in 192 bits
|
1241 |
|
|
__mul_64x128_full (C4.w[2], C4, C1.w[0], C2); // may use 64x128_to_192
|
1242 |
|
|
} else if (C2.w[1] == 0) { // C1 * C2 will fit in 192 bits
|
1243 |
|
|
__mul_64x128_full (C4.w[2], C4, C2.w[0], C1); // may use 64x128_to_192
|
1244 |
|
|
} else { // C1 * C2 will fit in 192 bits or in 256 bits
|
1245 |
|
|
__mul_128x128_to_256 (C4, C1, C2);
|
1246 |
|
|
}
|
1247 |
|
|
// if C4 < 10^(q1+q2-1) = 10^57 then q4 = q1+q2-1 = 57 else q4 = q1+q2 = 58
|
1248 |
|
|
if (C4.w[3] == 0 && (C4.w[2] < ten2k256[18].w[2] ||
|
1249 |
|
|
(C4.w[2] == ten2k256[18].w[2]
|
1250 |
|
|
&& (C4.w[1] < ten2k256[18].w[1]
|
1251 |
|
|
|| (C4.w[1] == ten2k256[18].w[1]
|
1252 |
|
|
&& C4.w[0] < ten2k256[18].w[0]))))) {
|
1253 |
|
|
// 18 = 57 - 39 = q1+q2-1 - 39
|
1254 |
|
|
// length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
|
1255 |
|
|
q4 = 57; // 57 = q1 + q2 - 1
|
1256 |
|
|
} else {
|
1257 |
|
|
// if (C4.w[3] == 0)
|
1258 |
|
|
// length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
|
1259 |
|
|
// else
|
1260 |
|
|
// length of C1 * C2 rounded up to a multiple of 64 bits is len = 256;
|
1261 |
|
|
q4 = 58; // 58 = q1 + q2
|
1262 |
|
|
}
|
1263 |
|
|
} else { // if 59 <= q1 + q2 <= 68
|
1264 |
|
|
// C4 = C1 * C2 fits in 192 or 256 bits
|
1265 |
|
|
// (192 bits possibly, but only when q1 + q2 = 59 and C4 has 58 digits)
|
1266 |
|
|
// both C1 and C2 fit in 128 bits (actually in 113 bits); none fits in
|
1267 |
|
|
// 64 bits
|
1268 |
|
|
// may use __mul_128x128_to_192 (C4.w[2], C4.w[0], C2.w[0], C1);
|
1269 |
|
|
__mul_128x128_to_256 (C4, C1, C2); // C4.w[3] = 0
|
1270 |
|
|
// if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
|
1271 |
|
|
if (C4.w[3] < ten2k256[q1 + q2 - 40].w[3] ||
|
1272 |
|
|
(C4.w[3] == ten2k256[q1 + q2 - 40].w[3] &&
|
1273 |
|
|
(C4.w[2] < ten2k256[q1 + q2 - 40].w[2] ||
|
1274 |
|
|
(C4.w[2] == ten2k256[q1 + q2 - 40].w[2] &&
|
1275 |
|
|
(C4.w[1] < ten2k256[q1 + q2 - 40].w[1] ||
|
1276 |
|
|
(C4.w[1] == ten2k256[q1 + q2 - 40].w[1] &&
|
1277 |
|
|
C4.w[0] < ten2k256[q1 + q2 - 40].w[0])))))) {
|
1278 |
|
|
// if (C4.w[3] == 0) // q4 = 58, necessarily
|
1279 |
|
|
// length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
|
1280 |
|
|
// else
|
1281 |
|
|
// length of C1 * C2 rounded up to a multiple of 64 bits is len = 256;
|
1282 |
|
|
q4 = q1 + q2 - 1; // q4 in [58, 67]
|
1283 |
|
|
} else {
|
1284 |
|
|
// length of C1 * C2 rounded up to a multiple of 64 bits is len = 256;
|
1285 |
|
|
q4 = q1 + q2; // q4 in [59, 68]
|
1286 |
|
|
}
|
1287 |
|
|
}
|
1288 |
|
|
|
1289 |
|
|
if (C3.w[1] == 0x0 && C3.w[0] == 0x0) { // x = f, y = f, z = 0
|
1290 |
|
|
save_fpsf = *pfpsf; // sticky bits - caller value must be preserved
|
1291 |
|
|
*pfpsf = 0;
|
1292 |
|
|
|
1293 |
|
|
if (q4 > p34) {
|
1294 |
|
|
|
1295 |
|
|
// truncate C4 to p34 digits into res
|
1296 |
|
|
// x = q4-p34, 1 <= x <= 34 because 35 <= q4 <= 68
|
1297 |
|
|
x0 = q4 - p34;
|
1298 |
|
|
if (q4 <= 38) {
|
1299 |
|
|
P128.w[1] = C4.w[1];
|
1300 |
|
|
P128.w[0] = C4.w[0];
|
1301 |
|
|
round128_19_38 (q4, x0, P128, &res, &incr_exp,
|
1302 |
|
|
&is_midpoint_lt_even, &is_midpoint_gt_even,
|
1303 |
|
|
&is_inexact_lt_midpoint,
|
1304 |
|
|
&is_inexact_gt_midpoint);
|
1305 |
|
|
} else if (q4 <= 57) { // 35 <= q4 <= 57
|
1306 |
|
|
P192.w[2] = C4.w[2];
|
1307 |
|
|
P192.w[1] = C4.w[1];
|
1308 |
|
|
P192.w[0] = C4.w[0];
|
1309 |
|
|
round192_39_57 (q4, x0, P192, &R192, &incr_exp,
|
1310 |
|
|
&is_midpoint_lt_even, &is_midpoint_gt_even,
|
1311 |
|
|
&is_inexact_lt_midpoint,
|
1312 |
|
|
&is_inexact_gt_midpoint);
|
1313 |
|
|
res.w[0] = R192.w[0];
|
1314 |
|
|
res.w[1] = R192.w[1];
|
1315 |
|
|
} else { // if (q4 <= 68)
|
1316 |
|
|
round256_58_76 (q4, x0, C4, &R256, &incr_exp,
|
1317 |
|
|
&is_midpoint_lt_even, &is_midpoint_gt_even,
|
1318 |
|
|
&is_inexact_lt_midpoint,
|
1319 |
|
|
&is_inexact_gt_midpoint);
|
1320 |
|
|
res.w[0] = R256.w[0];
|
1321 |
|
|
res.w[1] = R256.w[1];
|
1322 |
|
|
}
|
1323 |
|
|
e4 = e4 + x0;
|
1324 |
|
|
if (incr_exp) {
|
1325 |
|
|
e4 = e4 + 1;
|
1326 |
|
|
}
|
1327 |
|
|
q4 = p34;
|
1328 |
|
|
// res is now the coefficient of the result rounded to the destination
|
1329 |
|
|
// precision, with unbounded exponent; the exponent is e4; q4=digits(res)
|
1330 |
|
|
} else { // if (q4 <= p34)
|
1331 |
|
|
// C4 * 10^e4 is the result rounded to the destination precision, with
|
1332 |
|
|
// unbounded exponent (which is exact)
|
1333 |
|
|
|
1334 |
|
|
if ((q4 + e4 <= p34 + expmax) && (e4 > expmax)) {
|
1335 |
|
|
// e4 is too large, but can be brought within range by scaling up C4
|
1336 |
|
|
scale = e4 - expmax; // 1 <= scale < P-q4 <= P-1 => 1 <= scale <= P-2
|
1337 |
|
|
// res = (C4 * 10^scale) * 10^expmax
|
1338 |
|
|
if (q4 <= 19) { // C4 fits in 64 bits
|
1339 |
|
|
if (scale <= 19) { // 10^scale fits in 64 bits
|
1340 |
|
|
// 64 x 64 C4.w[0] * ten2k64[scale]
|
1341 |
|
|
__mul_64x64_to_128MACH (res, C4.w[0], ten2k64[scale]);
|
1342 |
|
|
} else { // 10^scale fits in 128 bits
|
1343 |
|
|
// 64 x 128 C4.w[0] * ten2k128[scale - 20]
|
1344 |
|
|
__mul_128x64_to_128 (res, C4.w[0], ten2k128[scale - 20]);
|
1345 |
|
|
}
|
1346 |
|
|
} else { // C4 fits in 128 bits, but 10^scale must fit in 64 bits
|
1347 |
|
|
// 64 x 128 ten2k64[scale] * CC43
|
1348 |
|
|
__mul_128x64_to_128 (res, ten2k64[scale], C4);
|
1349 |
|
|
}
|
1350 |
|
|
e4 = e4 - scale; // expmax
|
1351 |
|
|
q4 = q4 + scale;
|
1352 |
|
|
} else {
|
1353 |
|
|
res.w[1] = C4.w[1];
|
1354 |
|
|
res.w[0] = C4.w[0];
|
1355 |
|
|
}
|
1356 |
|
|
// res is the coefficient of the result rounded to the destination
|
1357 |
|
|
// precision, with unbounded exponent (it has q4 digits); the exponent
|
1358 |
|
|
// is e4 (exact result)
|
1359 |
|
|
}
|
1360 |
|
|
|
1361 |
|
|
// check for overflow
|
1362 |
|
|
if (q4 + e4 > p34 + expmax) {
|
1363 |
|
|
if (rnd_mode == ROUNDING_TO_NEAREST) {
|
1364 |
|
|
res.w[1] = p_sign | 0x7800000000000000ull; // +/-inf
|
1365 |
|
|
res.w[0] = 0x0000000000000000ull;
|
1366 |
|
|
*pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
|
1367 |
|
|
} else {
|
1368 |
|
|
res.w[1] = p_sign | res.w[1];
|
1369 |
|
|
rounding_correction (rnd_mode,
|
1370 |
|
|
is_inexact_lt_midpoint,
|
1371 |
|
|
is_inexact_gt_midpoint,
|
1372 |
|
|
is_midpoint_lt_even, is_midpoint_gt_even,
|
1373 |
|
|
e4, &res, pfpsf);
|
1374 |
|
|
}
|
1375 |
|
|
*pfpsf |= save_fpsf;
|
1376 |
|
|
*ptr_is_midpoint_lt_even = is_midpoint_lt_even;
|
1377 |
|
|
*ptr_is_midpoint_gt_even = is_midpoint_gt_even;
|
1378 |
|
|
*ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
|
1379 |
|
|
*ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
|
1380 |
|
|
BID_SWAP128 (res);
|
1381 |
|
|
BID_RETURN (res)
|
1382 |
|
|
}
|
1383 |
|
|
// check for underflow
|
1384 |
|
|
if (q4 + e4 < expmin + P34) {
|
1385 |
|
|
is_tiny = 1; // the result is tiny
|
1386 |
|
|
if (e4 < expmin) {
|
1387 |
|
|
// if e4 < expmin, we must truncate more of res
|
1388 |
|
|
x0 = expmin - e4; // x0 >= 1
|
1389 |
|
|
is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
|
1390 |
|
|
is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
|
1391 |
|
|
is_midpoint_lt_even0 = is_midpoint_lt_even;
|
1392 |
|
|
is_midpoint_gt_even0 = is_midpoint_gt_even;
|
1393 |
|
|
is_inexact_lt_midpoint = 0;
|
1394 |
|
|
is_inexact_gt_midpoint = 0;
|
1395 |
|
|
is_midpoint_lt_even = 0;
|
1396 |
|
|
is_midpoint_gt_even = 0;
|
1397 |
|
|
// the number of decimal digits in res is q4
|
1398 |
|
|
if (x0 < q4) { // 1 <= x0 <= q4-1 => round res to q4 - x0 digits
|
1399 |
|
|
if (q4 <= 18) { // 2 <= q4 <= 18, 1 <= x0 <= 17
|
1400 |
|
|
round64_2_18 (q4, x0, res.w[0], &R64, &incr_exp,
|
1401 |
|
|
&is_midpoint_lt_even, &is_midpoint_gt_even,
|
1402 |
|
|
&is_inexact_lt_midpoint,
|
1403 |
|
|
&is_inexact_gt_midpoint);
|
1404 |
|
|
if (incr_exp) {
|
1405 |
|
|
// R64 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 17
|
1406 |
|
|
R64 = ten2k64[q4 - x0];
|
1407 |
|
|
}
|
1408 |
|
|
// res.w[1] = 0; (from above)
|
1409 |
|
|
res.w[0] = R64;
|
1410 |
|
|
} else { // if (q4 <= 34)
|
1411 |
|
|
// 19 <= q4 <= 38
|
1412 |
|
|
P128.w[1] = res.w[1];
|
1413 |
|
|
P128.w[0] = res.w[0];
|
1414 |
|
|
round128_19_38 (q4, x0, P128, &res, &incr_exp,
|
1415 |
|
|
&is_midpoint_lt_even, &is_midpoint_gt_even,
|
1416 |
|
|
&is_inexact_lt_midpoint,
|
1417 |
|
|
&is_inexact_gt_midpoint);
|
1418 |
|
|
if (incr_exp) {
|
1419 |
|
|
// increase coefficient by a factor of 10; this will be <= 10^33
|
1420 |
|
|
// R128 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 37
|
1421 |
|
|
if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19
|
1422 |
|
|
// res.w[1] = 0;
|
1423 |
|
|
res.w[0] = ten2k64[q4 - x0];
|
1424 |
|
|
} else { // 20 <= q4 - x0 <= 37
|
1425 |
|
|
res.w[0] = ten2k128[q4 - x0 - 20].w[0];
|
1426 |
|
|
res.w[1] = ten2k128[q4 - x0 - 20].w[1];
|
1427 |
|
|
}
|
1428 |
|
|
}
|
1429 |
|
|
}
|
1430 |
|
|
e4 = e4 + x0; // expmin
|
1431 |
|
|
} else if (x0 == q4) {
|
1432 |
|
|
// the second rounding is for 0.d(0)d(1)...d(q4-1) * 10^emin
|
1433 |
|
|
// determine relationship with 1/2 ulp
|
1434 |
|
|
if (q4 <= 19) {
|
1435 |
|
|
if (res.w[0] < midpoint64[q4 - 1]) { // < 1/2 ulp
|
1436 |
|
|
lt_half_ulp = 1;
|
1437 |
|
|
is_inexact_lt_midpoint = 1;
|
1438 |
|
|
} else if (res.w[0] == midpoint64[q4 - 1]) { // = 1/2 ulp
|
1439 |
|
|
eq_half_ulp = 1;
|
1440 |
|
|
is_midpoint_gt_even = 1;
|
1441 |
|
|
} else { // > 1/2 ulp
|
1442 |
|
|
// gt_half_ulp = 1;
|
1443 |
|
|
is_inexact_gt_midpoint = 1;
|
1444 |
|
|
}
|
1445 |
|
|
} else { // if (q4 <= 34)
|
1446 |
|
|
if (res.w[1] < midpoint128[q4 - 20].w[1] ||
|
1447 |
|
|
(res.w[1] == midpoint128[q4 - 20].w[1] &&
|
1448 |
|
|
res.w[0] < midpoint128[q4 - 20].w[0])) { // < 1/2 ulp
|
1449 |
|
|
lt_half_ulp = 1;
|
1450 |
|
|
is_inexact_lt_midpoint = 1;
|
1451 |
|
|
} else if (res.w[1] == midpoint128[q4 - 20].w[1] &&
|
1452 |
|
|
res.w[0] == midpoint128[q4 - 20].w[0]) { // = 1/2 ulp
|
1453 |
|
|
eq_half_ulp = 1;
|
1454 |
|
|
is_midpoint_gt_even = 1;
|
1455 |
|
|
} else { // > 1/2 ulp
|
1456 |
|
|
// gt_half_ulp = 1;
|
1457 |
|
|
is_inexact_gt_midpoint = 1;
|
1458 |
|
|
}
|
1459 |
|
|
}
|
1460 |
|
|
if (lt_half_ulp || eq_half_ulp) {
|
1461 |
|
|
// res = +0.0 * 10^expmin
|
1462 |
|
|
res.w[1] = 0x0000000000000000ull;
|
1463 |
|
|
res.w[0] = 0x0000000000000000ull;
|
1464 |
|
|
} else { // if (gt_half_ulp)
|
1465 |
|
|
// res = +1 * 10^expmin
|
1466 |
|
|
res.w[1] = 0x0000000000000000ull;
|
1467 |
|
|
res.w[0] = 0x0000000000000001ull;
|
1468 |
|
|
}
|
1469 |
|
|
e4 = expmin;
|
1470 |
|
|
} else { // if (x0 > q4)
|
1471 |
|
|
// the second rounding is for 0.0...d(0)d(1)...d(q4-1) * 10^emin
|
1472 |
|
|
res.w[1] = 0;
|
1473 |
|
|
res.w[0] = 0;
|
1474 |
|
|
e4 = expmin;
|
1475 |
|
|
is_inexact_lt_midpoint = 1;
|
1476 |
|
|
}
|
1477 |
|
|
// avoid a double rounding error
|
1478 |
|
|
if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
|
1479 |
|
|
is_midpoint_lt_even) { // double rounding error upward
|
1480 |
|
|
// res = res - 1
|
1481 |
|
|
res.w[0]--;
|
1482 |
|
|
if (res.w[0] == 0xffffffffffffffffull)
|
1483 |
|
|
res.w[1]--;
|
1484 |
|
|
// Note: a double rounding error upward is not possible; for this
|
1485 |
|
|
// the result after the first rounding would have to be 99...95
|
1486 |
|
|
// (35 digits in all), possibly followed by a number of zeros; this
|
1487 |
|
|
// not possible for f * f + 0
|
1488 |
|
|
is_midpoint_lt_even = 0;
|
1489 |
|
|
is_inexact_lt_midpoint = 1;
|
1490 |
|
|
} else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
|
1491 |
|
|
is_midpoint_gt_even) { // double rounding error downward
|
1492 |
|
|
// res = res + 1
|
1493 |
|
|
res.w[0]++;
|
1494 |
|
|
if (res.w[0] == 0)
|
1495 |
|
|
res.w[1]++;
|
1496 |
|
|
is_midpoint_gt_even = 0;
|
1497 |
|
|
is_inexact_gt_midpoint = 1;
|
1498 |
|
|
} else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
|
1499 |
|
|
!is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
|
1500 |
|
|
// if this second rounding was exact the result may still be
|
1501 |
|
|
// inexact because of the first rounding
|
1502 |
|
|
if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
|
1503 |
|
|
is_inexact_gt_midpoint = 1;
|
1504 |
|
|
}
|
1505 |
|
|
if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
|
1506 |
|
|
is_inexact_lt_midpoint = 1;
|
1507 |
|
|
}
|
1508 |
|
|
} else if (is_midpoint_gt_even &&
|
1509 |
|
|
(is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
|
1510 |
|
|
// pulled up to a midpoint
|
1511 |
|
|
is_inexact_lt_midpoint = 1;
|
1512 |
|
|
is_inexact_gt_midpoint = 0;
|
1513 |
|
|
is_midpoint_lt_even = 0;
|
1514 |
|
|
is_midpoint_gt_even = 0;
|
1515 |
|
|
} else if (is_midpoint_lt_even &&
|
1516 |
|
|
(is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
|
1517 |
|
|
// pulled down to a midpoint
|
1518 |
|
|
is_inexact_lt_midpoint = 0;
|
1519 |
|
|
is_inexact_gt_midpoint = 1;
|
1520 |
|
|
is_midpoint_lt_even = 0;
|
1521 |
|
|
is_midpoint_gt_even = 0;
|
1522 |
|
|
} else {
|
1523 |
|
|
;
|
1524 |
|
|
}
|
1525 |
|
|
} else { // if e4 >= emin then q4 < P and the result is tiny and exact
|
1526 |
|
|
if (e3 < e4) {
|
1527 |
|
|
// if (e3 < e4) the preferred exponent is e3
|
1528 |
|
|
// return (C4 * 10^scale) * 10^(e4 - scale)
|
1529 |
|
|
// where scale = min (p34-q4, (e4 - e3))
|
1530 |
|
|
scale = p34 - q4;
|
1531 |
|
|
ind = e4 - e3;
|
1532 |
|
|
if (ind < scale)
|
1533 |
|
|
scale = ind;
|
1534 |
|
|
if (scale == 0) {
|
1535 |
|
|
; // res and e4 are unchanged
|
1536 |
|
|
} else if (q4 <= 19) { // C4 fits in 64 bits
|
1537 |
|
|
if (scale <= 19) { // 10^scale fits in 64 bits
|
1538 |
|
|
// 64 x 64 res.w[0] * ten2k64[scale]
|
1539 |
|
|
__mul_64x64_to_128MACH (res, res.w[0], ten2k64[scale]);
|
1540 |
|
|
} else { // 10^scale fits in 128 bits
|
1541 |
|
|
// 64 x 128 res.w[0] * ten2k128[scale - 20]
|
1542 |
|
|
__mul_128x64_to_128 (res, res.w[0], ten2k128[scale - 20]);
|
1543 |
|
|
}
|
1544 |
|
|
} else { // res fits in 128 bits, but 10^scale must fit in 64 bits
|
1545 |
|
|
// 64 x 128 ten2k64[scale] * C3
|
1546 |
|
|
__mul_128x64_to_128 (res, ten2k64[scale], res);
|
1547 |
|
|
}
|
1548 |
|
|
// subtract scale from the exponent
|
1549 |
|
|
e4 = e4 - scale;
|
1550 |
|
|
}
|
1551 |
|
|
}
|
1552 |
|
|
|
1553 |
|
|
// check for inexact result
|
1554 |
|
|
if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
|
1555 |
|
|
is_midpoint_lt_even || is_midpoint_gt_even) {
|
1556 |
|
|
// set the inexact flag and the underflow flag
|
1557 |
|
|
*pfpsf |= INEXACT_EXCEPTION;
|
1558 |
|
|
*pfpsf |= UNDERFLOW_EXCEPTION;
|
1559 |
|
|
}
|
1560 |
|
|
res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1];
|
1561 |
|
|
if (rnd_mode != ROUNDING_TO_NEAREST) {
|
1562 |
|
|
rounding_correction (rnd_mode,
|
1563 |
|
|
is_inexact_lt_midpoint,
|
1564 |
|
|
is_inexact_gt_midpoint,
|
1565 |
|
|
is_midpoint_lt_even, is_midpoint_gt_even,
|
1566 |
|
|
e4, &res, pfpsf);
|
1567 |
|
|
}
|
1568 |
|
|
*pfpsf |= save_fpsf;
|
1569 |
|
|
*ptr_is_midpoint_lt_even = is_midpoint_lt_even;
|
1570 |
|
|
*ptr_is_midpoint_gt_even = is_midpoint_gt_even;
|
1571 |
|
|
*ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
|
1572 |
|
|
*ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
|
1573 |
|
|
BID_SWAP128 (res);
|
1574 |
|
|
BID_RETURN (res)
|
1575 |
|
|
}
|
1576 |
|
|
// no overflow, and no underflow for rounding to nearest
|
1577 |
|
|
res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1];
|
1578 |
|
|
|
1579 |
|
|
if (rnd_mode != ROUNDING_TO_NEAREST) {
|
1580 |
|
|
rounding_correction (rnd_mode,
|
1581 |
|
|
is_inexact_lt_midpoint,
|
1582 |
|
|
is_inexact_gt_midpoint,
|
1583 |
|
|
is_midpoint_lt_even, is_midpoint_gt_even,
|
1584 |
|
|
e4, &res, pfpsf);
|
1585 |
|
|
// if e4 = expmin && significand < 10^33 => result is tiny (for RD, RZ)
|
1586 |
|
|
if (e4 == expmin) {
|
1587 |
|
|
if ((res.w[1] & MASK_COEFF) < 0x0000314dc6448d93ull ||
|
1588 |
|
|
((res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull &&
|
1589 |
|
|
res.w[0] < 0x38c15b0a00000000ull)) {
|
1590 |
|
|
is_tiny = 1;
|
1591 |
|
|
}
|
1592 |
|
|
}
|
1593 |
|
|
}
|
1594 |
|
|
|
1595 |
|
|
if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
|
1596 |
|
|
is_midpoint_lt_even || is_midpoint_gt_even) {
|
1597 |
|
|
// set the inexact flag
|
1598 |
|
|
*pfpsf |= INEXACT_EXCEPTION;
|
1599 |
|
|
if (is_tiny)
|
1600 |
|
|
*pfpsf |= UNDERFLOW_EXCEPTION;
|
1601 |
|
|
}
|
1602 |
|
|
|
1603 |
|
|
if ((*pfpsf & INEXACT_EXCEPTION) == 0) { // x * y is exact
|
1604 |
|
|
// need to ensure that the result has the preferred exponent
|
1605 |
|
|
p_exp = res.w[1] & MASK_EXP;
|
1606 |
|
|
if (z_exp < p_exp) { // the preferred exponent is z_exp
|
1607 |
|
|
// signficand of res in C3
|
1608 |
|
|
C3.w[1] = res.w[1] & MASK_COEFF;
|
1609 |
|
|
C3.w[0] = res.w[0];
|
1610 |
|
|
// the number of decimal digits of x * y is q4 <= 34
|
1611 |
|
|
// Note: the coefficient fits in 128 bits
|
1612 |
|
|
|
1613 |
|
|
// return (C3 * 10^scale) * 10^(p_exp - scale)
|
1614 |
|
|
// where scale = min (p34-q4, (p_exp-z_exp) >> 49)
|
1615 |
|
|
scale = p34 - q4;
|
1616 |
|
|
ind = (p_exp - z_exp) >> 49;
|
1617 |
|
|
if (ind < scale)
|
1618 |
|
|
scale = ind;
|
1619 |
|
|
// subtract scale from the exponent
|
1620 |
|
|
p_exp = p_exp - ((UINT64) scale << 49);
|
1621 |
|
|
if (scale == 0) {
|
1622 |
|
|
; // leave res unchanged
|
1623 |
|
|
} else if (q4 <= 19) { // x * y fits in 64 bits
|
1624 |
|
|
if (scale <= 19) { // 10^scale fits in 64 bits
|
1625 |
|
|
// 64 x 64 C3.w[0] * ten2k64[scale]
|
1626 |
|
|
__mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
|
1627 |
|
|
} else { // 10^scale fits in 128 bits
|
1628 |
|
|
// 64 x 128 C3.w[0] * ten2k128[scale - 20]
|
1629 |
|
|
__mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
|
1630 |
|
|
}
|
1631 |
|
|
res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1];
|
1632 |
|
|
} else { // x * y fits in 128 bits, but 10^scale must fit in 64 bits
|
1633 |
|
|
// 64 x 128 ten2k64[scale] * C3
|
1634 |
|
|
__mul_128x64_to_128 (res, ten2k64[scale], C3);
|
1635 |
|
|
res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1];
|
1636 |
|
|
}
|
1637 |
|
|
} // else leave the result as it is, because p_exp <= z_exp
|
1638 |
|
|
}
|
1639 |
|
|
*pfpsf |= save_fpsf;
|
1640 |
|
|
*ptr_is_midpoint_lt_even = is_midpoint_lt_even;
|
1641 |
|
|
*ptr_is_midpoint_gt_even = is_midpoint_gt_even;
|
1642 |
|
|
*ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
|
1643 |
|
|
*ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
|
1644 |
|
|
BID_SWAP128 (res);
|
1645 |
|
|
BID_RETURN (res)
|
1646 |
|
|
} // else we have f * f + f
|
1647 |
|
|
|
1648 |
|
|
// continue with x = f, y = f, z = f
|
1649 |
|
|
|
1650 |
|
|
delta = q3 + e3 - q4 - e4;
|
1651 |
|
|
delta_ge_zero:
|
1652 |
|
|
if (delta >= 0) {
|
1653 |
|
|
|
1654 |
|
|
if (p34 <= delta - 1 || // Case (1')
|
1655 |
|
|
(p34 == delta && e3 + 6176 < p34 - q3)) { // Case (1''A)
|
1656 |
|
|
// check for overflow, which can occur only in Case (1')
|
1657 |
|
|
if ((q3 + e3) > (p34 + expmax) && p34 <= delta - 1) {
|
1658 |
|
|
// e3 > expmax implies p34 <= delta-1 and e3 > expmax is a necessary
|
1659 |
|
|
// condition for (q3 + e3) > (p34 + expmax)
|
1660 |
|
|
if (rnd_mode == ROUNDING_TO_NEAREST) {
|
1661 |
|
|
res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
|
1662 |
|
|
res.w[0] = 0x0000000000000000ull;
|
1663 |
|
|
*pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
|
1664 |
|
|
} else {
|
1665 |
|
|
if (p_sign == z_sign) {
|
1666 |
|
|
is_inexact_lt_midpoint = 1;
|
1667 |
|
|
} else {
|
1668 |
|
|
is_inexact_gt_midpoint = 1;
|
1669 |
|
|
}
|
1670 |
|
|
// q3 <= p34; if (q3 < p34) scale C3 up by 10^(p34-q3)
|
1671 |
|
|
scale = p34 - q3;
|
1672 |
|
|
if (scale == 0) {
|
1673 |
|
|
res.w[1] = z_sign | C3.w[1];
|
1674 |
|
|
res.w[0] = C3.w[0];
|
1675 |
|
|
} else {
|
1676 |
|
|
if (q3 <= 19) { // C3 fits in 64 bits
|
1677 |
|
|
if (scale <= 19) { // 10^scale fits in 64 bits
|
1678 |
|
|
// 64 x 64 C3.w[0] * ten2k64[scale]
|
1679 |
|
|
__mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
|
1680 |
|
|
} else { // 10^scale fits in 128 bits
|
1681 |
|
|
// 64 x 128 C3.w[0] * ten2k128[scale - 20]
|
1682 |
|
|
__mul_128x64_to_128 (res, C3.w[0],
|
1683 |
|
|
ten2k128[scale - 20]);
|
1684 |
|
|
}
|
1685 |
|
|
} else { // C3 fits in 128 bits, but 10^scale must fit in 64 bits
|
1686 |
|
|
// 64 x 128 ten2k64[scale] * C3
|
1687 |
|
|
__mul_128x64_to_128 (res, ten2k64[scale], C3);
|
1688 |
|
|
}
|
1689 |
|
|
// the coefficient in res has q3 + scale = p34 digits
|
1690 |
|
|
}
|
1691 |
|
|
e3 = e3 - scale;
|
1692 |
|
|
res.w[1] = z_sign | res.w[1];
|
1693 |
|
|
rounding_correction (rnd_mode,
|
1694 |
|
|
is_inexact_lt_midpoint,
|
1695 |
|
|
is_inexact_gt_midpoint,
|
1696 |
|
|
is_midpoint_lt_even, is_midpoint_gt_even,
|
1697 |
|
|
e3, &res, pfpsf);
|
1698 |
|
|
}
|
1699 |
|
|
*ptr_is_midpoint_lt_even = is_midpoint_lt_even;
|
1700 |
|
|
*ptr_is_midpoint_gt_even = is_midpoint_gt_even;
|
1701 |
|
|
*ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
|
1702 |
|
|
*ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
|
1703 |
|
|
BID_SWAP128 (res);
|
1704 |
|
|
BID_RETURN (res)
|
1705 |
|
|
}
|
1706 |
|
|
// res = z
|
1707 |
|
|
if (q3 < p34) { // the preferred exponent is z_exp - (p34 - q3)
|
1708 |
|
|
// return (C3 * 10^scale) * 10^(z_exp - scale)
|
1709 |
|
|
// where scale = min (p34-q3, z_exp-EMIN)
|
1710 |
|
|
scale = p34 - q3;
|
1711 |
|
|
ind = e3 + 6176;
|
1712 |
|
|
if (ind < scale)
|
1713 |
|
|
scale = ind;
|
1714 |
|
|
if (scale == 0) {
|
1715 |
|
|
res.w[1] = C3.w[1];
|
1716 |
|
|
res.w[0] = C3.w[0];
|
1717 |
|
|
} else if (q3 <= 19) { // z fits in 64 bits
|
1718 |
|
|
if (scale <= 19) { // 10^scale fits in 64 bits
|
1719 |
|
|
// 64 x 64 C3.w[0] * ten2k64[scale]
|
1720 |
|
|
__mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
|
1721 |
|
|
} else { // 10^scale fits in 128 bits
|
1722 |
|
|
// 64 x 128 C3.w[0] * ten2k128[scale - 20]
|
1723 |
|
|
__mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
|
1724 |
|
|
}
|
1725 |
|
|
} else { // z fits in 128 bits, but 10^scale must fit in 64 bits
|
1726 |
|
|
// 64 x 128 ten2k64[scale] * C3
|
1727 |
|
|
__mul_128x64_to_128 (res, ten2k64[scale], C3);
|
1728 |
|
|
}
|
1729 |
|
|
// the coefficient in res has q3 + scale digits
|
1730 |
|
|
// subtract scale from the exponent
|
1731 |
|
|
z_exp = z_exp - ((UINT64) scale << 49);
|
1732 |
|
|
e3 = e3 - scale;
|
1733 |
|
|
res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
|
1734 |
|
|
if (scale + q3 < p34)
|
1735 |
|
|
*pfpsf |= UNDERFLOW_EXCEPTION;
|
1736 |
|
|
} else {
|
1737 |
|
|
scale = 0;
|
1738 |
|
|
res.w[1] = z_sign | ((UINT64) (e3 + 6176) << 49) | C3.w[1];
|
1739 |
|
|
res.w[0] = C3.w[0];
|
1740 |
|
|
}
|
1741 |
|
|
|
1742 |
|
|
// use the following to avoid double rounding errors when operating on
|
1743 |
|
|
// mixed formats in rounding to nearest, and for correcting the result
|
1744 |
|
|
// if not rounding to nearest
|
1745 |
|
|
if ((p_sign != z_sign) && (delta == (q3 + scale + 1))) {
|
1746 |
|
|
// there is a gap of exactly one digit between the scaled C3 and C4
|
1747 |
|
|
// C3 * 10^ scale = 10^(q3+scale-1) <=> C3 = 10^(q3-1) is special case
|
1748 |
|
|
if ((q3 <= 19 && C3.w[0] != ten2k64[q3 - 1]) ||
|
1749 |
|
|
(q3 == 20 && (C3.w[1] != 0 || C3.w[0] != ten2k64[19])) ||
|
1750 |
|
|
(q3 >= 21 && (C3.w[1] != ten2k128[q3 - 21].w[1] ||
|
1751 |
|
|
C3.w[0] != ten2k128[q3 - 21].w[0]))) {
|
1752 |
|
|
// C3 * 10^ scale != 10^(q3-1)
|
1753 |
|
|
// if ((res.w[1] & MASK_COEFF) != 0x0000314dc6448d93ull ||
|
1754 |
|
|
// res.w[0] != 0x38c15b0a00000000ull) { // C3 * 10^scale != 10^33
|
1755 |
|
|
is_inexact_gt_midpoint = 1; // if (z_sign), set as if for abs. value
|
1756 |
|
|
} else { // if C3 * 10^scale = 10^(q3+scale-1)
|
1757 |
|
|
// ok from above e3 = (z_exp >> 49) - 6176;
|
1758 |
|
|
// the result is always inexact
|
1759 |
|
|
if (q4 == 1) {
|
1760 |
|
|
R64 = C4.w[0];
|
1761 |
|
|
} else {
|
1762 |
|
|
// if q4 > 1 then truncate C4 from q4 digits to 1 digit;
|
1763 |
|
|
// x = q4-1, 1 <= x <= 67 and check if this operation is exact
|
1764 |
|
|
if (q4 <= 18) { // 2 <= q4 <= 18
|
1765 |
|
|
round64_2_18 (q4, q4 - 1, C4.w[0], &R64, &incr_exp,
|
1766 |
|
|
&is_midpoint_lt_even, &is_midpoint_gt_even,
|
1767 |
|
|
&is_inexact_lt_midpoint,
|
1768 |
|
|
&is_inexact_gt_midpoint);
|
1769 |
|
|
} else if (q4 <= 38) {
|
1770 |
|
|
P128.w[1] = C4.w[1];
|
1771 |
|
|
P128.w[0] = C4.w[0];
|
1772 |
|
|
round128_19_38 (q4, q4 - 1, P128, &R128, &incr_exp,
|
1773 |
|
|
&is_midpoint_lt_even,
|
1774 |
|
|
&is_midpoint_gt_even,
|
1775 |
|
|
&is_inexact_lt_midpoint,
|
1776 |
|
|
&is_inexact_gt_midpoint);
|
1777 |
|
|
R64 = R128.w[0]; // one decimal digit
|
1778 |
|
|
} else if (q4 <= 57) {
|
1779 |
|
|
P192.w[2] = C4.w[2];
|
1780 |
|
|
P192.w[1] = C4.w[1];
|
1781 |
|
|
P192.w[0] = C4.w[0];
|
1782 |
|
|
round192_39_57 (q4, q4 - 1, P192, &R192, &incr_exp,
|
1783 |
|
|
&is_midpoint_lt_even,
|
1784 |
|
|
&is_midpoint_gt_even,
|
1785 |
|
|
&is_inexact_lt_midpoint,
|
1786 |
|
|
&is_inexact_gt_midpoint);
|
1787 |
|
|
R64 = R192.w[0]; // one decimal digit
|
1788 |
|
|
} else { // if (q4 <= 68)
|
1789 |
|
|
round256_58_76 (q4, q4 - 1, C4, &R256, &incr_exp,
|
1790 |
|
|
&is_midpoint_lt_even,
|
1791 |
|
|
&is_midpoint_gt_even,
|
1792 |
|
|
&is_inexact_lt_midpoint,
|
1793 |
|
|
&is_inexact_gt_midpoint);
|
1794 |
|
|
R64 = R256.w[0]; // one decimal digit
|
1795 |
|
|
}
|
1796 |
|
|
if (incr_exp) {
|
1797 |
|
|
R64 = 10;
|
1798 |
|
|
}
|
1799 |
|
|
}
|
1800 |
|
|
if (q4 == 1 && C4.w[0] == 5) {
|
1801 |
|
|
is_inexact_lt_midpoint = 0;
|
1802 |
|
|
is_inexact_gt_midpoint = 0;
|
1803 |
|
|
is_midpoint_lt_even = 1;
|
1804 |
|
|
is_midpoint_gt_even = 0;
|
1805 |
|
|
} else if ((e3 == expmin) ||
|
1806 |
|
|
R64 < 5 || (R64 == 5 && is_inexact_gt_midpoint)) {
|
1807 |
|
|
// result does not change
|
1808 |
|
|
is_inexact_lt_midpoint = 0;
|
1809 |
|
|
is_inexact_gt_midpoint = 1;
|
1810 |
|
|
is_midpoint_lt_even = 0;
|
1811 |
|
|
is_midpoint_gt_even = 0;
|
1812 |
|
|
} else {
|
1813 |
|
|
is_inexact_lt_midpoint = 1;
|
1814 |
|
|
is_inexact_gt_midpoint = 0;
|
1815 |
|
|
is_midpoint_lt_even = 0;
|
1816 |
|
|
is_midpoint_gt_even = 0;
|
1817 |
|
|
// result decremented is 10^(q3+scale) - 1
|
1818 |
|
|
if ((q3 + scale) <= 19) {
|
1819 |
|
|
res.w[1] = 0;
|
1820 |
|
|
res.w[0] = ten2k64[q3 + scale];
|
1821 |
|
|
} else { // if ((q3 + scale + 1) <= 35)
|
1822 |
|
|
res.w[1] = ten2k128[q3 + scale - 20].w[1];
|
1823 |
|
|
res.w[0] = ten2k128[q3 + scale - 20].w[0];
|
1824 |
|
|
}
|
1825 |
|
|
res.w[0] = res.w[0] - 1; // borrow never occurs
|
1826 |
|
|
z_exp = z_exp - EXP_P1;
|
1827 |
|
|
e3 = e3 - 1;
|
1828 |
|
|
res.w[1] = z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1];
|
1829 |
|
|
}
|
1830 |
|
|
if (e3 == expmin) {
|
1831 |
|
|
if (R64 < 5 || (R64 == 5 && !is_inexact_lt_midpoint)) {
|
1832 |
|
|
; // result not tiny (in round-to-nearest mode)
|
1833 |
|
|
} else {
|
1834 |
|
|
*pfpsf |= UNDERFLOW_EXCEPTION;
|
1835 |
|
|
}
|
1836 |
|
|
}
|
1837 |
|
|
} // end 10^(q3+scale-1)
|
1838 |
|
|
// set the inexact flag
|
1839 |
|
|
*pfpsf |= INEXACT_EXCEPTION;
|
1840 |
|
|
} else {
|
1841 |
|
|
if (p_sign == z_sign) {
|
1842 |
|
|
// if (z_sign), set as if for absolute value
|
1843 |
|
|
is_inexact_lt_midpoint = 1;
|
1844 |
|
|
} else { // if (p_sign != z_sign)
|
1845 |
|
|
// if (z_sign), set as if for absolute value
|
1846 |
|
|
is_inexact_gt_midpoint = 1;
|
1847 |
|
|
}
|
1848 |
|
|
*pfpsf |= INEXACT_EXCEPTION;
|
1849 |
|
|
}
|
1850 |
|
|
// the result is always inexact => set the inexact flag
|
1851 |
|
|
// Determine tininess:
|
1852 |
|
|
// if (exp > expmin)
|
1853 |
|
|
// the result is not tiny
|
1854 |
|
|
// else // if exp = emin
|
1855 |
|
|
// if (q3 + scale < p34)
|
1856 |
|
|
// the result is tiny
|
1857 |
|
|
// else // if (q3 + scale = p34)
|
1858 |
|
|
// if (C3 * 10^scale > 10^33)
|
1859 |
|
|
// the result is not tiny
|
1860 |
|
|
// else // if C3 * 10^scale = 10^33
|
1861 |
|
|
// if (xy * z > 0)
|
1862 |
|
|
// the result is not tiny
|
1863 |
|
|
// else // if (xy * z < 0)
|
1864 |
|
|
// if (z > 0)
|
1865 |
|
|
// if rnd_mode != RP
|
1866 |
|
|
// the result is tiny
|
1867 |
|
|
// else // if RP
|
1868 |
|
|
// the result is not tiny
|
1869 |
|
|
// else // if (z < 0)
|
1870 |
|
|
// if rnd_mode != RM
|
1871 |
|
|
// the result is tiny
|
1872 |
|
|
// else // if RM
|
1873 |
|
|
// the result is not tiny
|
1874 |
|
|
// endif
|
1875 |
|
|
// endif
|
1876 |
|
|
// endif
|
1877 |
|
|
// endif
|
1878 |
|
|
// endif
|
1879 |
|
|
// endif
|
1880 |
|
|
if ((e3 == expmin && (q3 + scale) < p34) ||
|
1881 |
|
|
(e3 == expmin && (q3 + scale) == p34 &&
|
1882 |
|
|
(res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull && // 10^33_high
|
1883 |
|
|
res.w[0] == 0x38c15b0a00000000ull && // 10^33_low
|
1884 |
|
|
z_sign != p_sign && ((!z_sign && rnd_mode != ROUNDING_UP) ||
|
1885 |
|
|
(z_sign && rnd_mode != ROUNDING_DOWN)))) {
|
1886 |
|
|
*pfpsf |= UNDERFLOW_EXCEPTION;
|
1887 |
|
|
}
|
1888 |
|
|
if (rnd_mode != ROUNDING_TO_NEAREST) {
|
1889 |
|
|
rounding_correction (rnd_mode,
|
1890 |
|
|
is_inexact_lt_midpoint,
|
1891 |
|
|
is_inexact_gt_midpoint,
|
1892 |
|
|
is_midpoint_lt_even, is_midpoint_gt_even,
|
1893 |
|
|
e3, &res, pfpsf);
|
1894 |
|
|
}
|
1895 |
|
|
*ptr_is_midpoint_lt_even = is_midpoint_lt_even;
|
1896 |
|
|
*ptr_is_midpoint_gt_even = is_midpoint_gt_even;
|
1897 |
|
|
*ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
|
1898 |
|
|
*ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
|
1899 |
|
|
BID_SWAP128 (res);
|
1900 |
|
|
BID_RETURN (res)
|
1901 |
|
|
|
1902 |
|
|
} else if (p34 == delta) { // Case (1''B)
|
1903 |
|
|
|
1904 |
|
|
// because Case (1''A) was treated above, e3 + 6176 >= p34 - q3
|
1905 |
|
|
// and C3 can be scaled up to p34 digits if needed
|
1906 |
|
|
|
1907 |
|
|
// scale C3 to p34 digits if needed
|
1908 |
|
|
scale = p34 - q3; // 0 <= scale <= p34 - 1
|
1909 |
|
|
if (scale == 0) {
|
1910 |
|
|
res.w[1] = C3.w[1];
|
1911 |
|
|
res.w[0] = C3.w[0];
|
1912 |
|
|
} else if (q3 <= 19) { // z fits in 64 bits
|
1913 |
|
|
if (scale <= 19) { // 10^scale fits in 64 bits
|
1914 |
|
|
// 64 x 64 C3.w[0] * ten2k64[scale]
|
1915 |
|
|
__mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
|
1916 |
|
|
} else { // 10^scale fits in 128 bits
|
1917 |
|
|
// 64 x 128 C3.w[0] * ten2k128[scale - 20]
|
1918 |
|
|
__mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
|
1919 |
|
|
}
|
1920 |
|
|
} else { // z fits in 128 bits, but 10^scale must fit in 64 bits
|
1921 |
|
|
// 64 x 128 ten2k64[scale] * C3
|
1922 |
|
|
__mul_128x64_to_128 (res, ten2k64[scale], C3);
|
1923 |
|
|
}
|
1924 |
|
|
// subtract scale from the exponent
|
1925 |
|
|
z_exp = z_exp - ((UINT64) scale << 49);
|
1926 |
|
|
e3 = e3 - scale;
|
1927 |
|
|
// now z_sign, z_exp, and res correspond to a z scaled to p34 = 34 digits
|
1928 |
|
|
|
1929 |
|
|
// determine whether x * y is less than, equal to, or greater than
|
1930 |
|
|
// 1/2 ulp (z)
|
1931 |
|
|
if (q4 <= 19) {
|
1932 |
|
|
if (C4.w[0] < midpoint64[q4 - 1]) { // < 1/2 ulp
|
1933 |
|
|
lt_half_ulp = 1;
|
1934 |
|
|
} else if (C4.w[0] == midpoint64[q4 - 1]) { // = 1/2 ulp
|
1935 |
|
|
eq_half_ulp = 1;
|
1936 |
|
|
} else { // > 1/2 ulp
|
1937 |
|
|
gt_half_ulp = 1;
|
1938 |
|
|
}
|
1939 |
|
|
} else if (q4 <= 38) {
|
1940 |
|
|
if (C4.w[2] == 0 && (C4.w[1] < midpoint128[q4 - 20].w[1] ||
|
1941 |
|
|
(C4.w[1] == midpoint128[q4 - 20].w[1] &&
|
1942 |
|
|
C4.w[0] < midpoint128[q4 - 20].w[0]))) { // < 1/2 ulp
|
1943 |
|
|
lt_half_ulp = 1;
|
1944 |
|
|
} else if (C4.w[2] == 0 && C4.w[1] == midpoint128[q4 - 20].w[1] &&
|
1945 |
|
|
C4.w[0] == midpoint128[q4 - 20].w[0]) { // = 1/2 ulp
|
1946 |
|
|
eq_half_ulp = 1;
|
1947 |
|
|
} else { // > 1/2 ulp
|
1948 |
|
|
gt_half_ulp = 1;
|
1949 |
|
|
}
|
1950 |
|
|
} else if (q4 <= 58) {
|
1951 |
|
|
if (C4.w[3] == 0 && (C4.w[2] < midpoint192[q4 - 39].w[2] ||
|
1952 |
|
|
(C4.w[2] == midpoint192[q4 - 39].w[2] &&
|
1953 |
|
|
C4.w[1] < midpoint192[q4 - 39].w[1]) ||
|
1954 |
|
|
(C4.w[2] == midpoint192[q4 - 39].w[2] &&
|
1955 |
|
|
C4.w[1] == midpoint192[q4 - 39].w[1] &&
|
1956 |
|
|
C4.w[0] < midpoint192[q4 - 39].w[0]))) { // < 1/2 ulp
|
1957 |
|
|
lt_half_ulp = 1;
|
1958 |
|
|
} else if (C4.w[3] == 0 && C4.w[2] == midpoint192[q4 - 39].w[2] &&
|
1959 |
|
|
C4.w[1] == midpoint192[q4 - 39].w[1] &&
|
1960 |
|
|
C4.w[0] == midpoint192[q4 - 39].w[0]) { // = 1/2 ulp
|
1961 |
|
|
eq_half_ulp = 1;
|
1962 |
|
|
} else { // > 1/2 ulp
|
1963 |
|
|
gt_half_ulp = 1;
|
1964 |
|
|
}
|
1965 |
|
|
} else {
|
1966 |
|
|
if (C4.w[3] < midpoint256[q4 - 59].w[3] ||
|
1967 |
|
|
(C4.w[3] == midpoint256[q4 - 59].w[3] &&
|
1968 |
|
|
C4.w[2] < midpoint256[q4 - 59].w[2]) ||
|
1969 |
|
|
(C4.w[3] == midpoint256[q4 - 59].w[3] &&
|
1970 |
|
|
C4.w[2] == midpoint256[q4 - 59].w[2] &&
|
1971 |
|
|
C4.w[1] < midpoint256[q4 - 59].w[1]) ||
|
1972 |
|
|
(C4.w[3] == midpoint256[q4 - 59].w[3] &&
|
1973 |
|
|
C4.w[2] == midpoint256[q4 - 59].w[2] &&
|
1974 |
|
|
C4.w[1] == midpoint256[q4 - 59].w[1] &&
|
1975 |
|
|
C4.w[0] < midpoint256[q4 - 59].w[0])) { // < 1/2 ulp
|
1976 |
|
|
lt_half_ulp = 1;
|
1977 |
|
|
} else if (C4.w[3] == midpoint256[q4 - 59].w[3] &&
|
1978 |
|
|
C4.w[2] == midpoint256[q4 - 59].w[2] &&
|
1979 |
|
|
C4.w[1] == midpoint256[q4 - 59].w[1] &&
|
1980 |
|
|
C4.w[0] == midpoint256[q4 - 59].w[0]) { // = 1/2 ulp
|
1981 |
|
|
eq_half_ulp = 1;
|
1982 |
|
|
} else { // > 1/2 ulp
|
1983 |
|
|
gt_half_ulp = 1;
|
1984 |
|
|
}
|
1985 |
|
|
}
|
1986 |
|
|
|
1987 |
|
|
if (p_sign == z_sign) {
|
1988 |
|
|
if (lt_half_ulp) {
|
1989 |
|
|
res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
|
1990 |
|
|
// use the following to avoid double rounding errors when operating on
|
1991 |
|
|
// mixed formats in rounding to nearest
|
1992 |
|
|
is_inexact_lt_midpoint = 1; // if (z_sign), as if for absolute value
|
1993 |
|
|
} else if ((eq_half_ulp && (res.w[0] & 0x01)) || gt_half_ulp) {
|
1994 |
|
|
// add 1 ulp to the significand
|
1995 |
|
|
res.w[0]++;
|
1996 |
|
|
if (res.w[0] == 0x0ull)
|
1997 |
|
|
res.w[1]++;
|
1998 |
|
|
// check for rounding overflow, when coeff == 10^34
|
1999 |
|
|
if ((res.w[1] & MASK_COEFF) == 0x0001ed09bead87c0ull &&
|
2000 |
|
|
res.w[0] == 0x378d8e6400000000ull) { // coefficient = 10^34
|
2001 |
|
|
e3 = e3 + 1;
|
2002 |
|
|
// coeff = 10^33
|
2003 |
|
|
z_exp = ((UINT64) (e3 + 6176) << 49) & MASK_EXP;
|
2004 |
|
|
res.w[1] = 0x0000314dc6448d93ull;
|
2005 |
|
|
res.w[0] = 0x38c15b0a00000000ull;
|
2006 |
|
|
}
|
2007 |
|
|
// end add 1 ulp
|
2008 |
|
|
res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
|
2009 |
|
|
if (eq_half_ulp) {
|
2010 |
|
|
is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value
|
2011 |
|
|
} else {
|
2012 |
|
|
is_inexact_gt_midpoint = 1; // if (z_sign), as if for absolute value
|
2013 |
|
|
}
|
2014 |
|
|
} else { // if (eq_half_ulp && !(res.w[0] & 0x01))
|
2015 |
|
|
// leave unchanged
|
2016 |
|
|
res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
|
2017 |
|
|
is_midpoint_gt_even = 1; // if (z_sign), as if for absolute value
|
2018 |
|
|
}
|
2019 |
|
|
// the result is always inexact, and never tiny
|
2020 |
|
|
// set the inexact flag
|
2021 |
|
|
*pfpsf |= INEXACT_EXCEPTION;
|
2022 |
|
|
// check for overflow
|
2023 |
|
|
if (e3 > expmax && rnd_mode == ROUNDING_TO_NEAREST) {
|
2024 |
|
|
res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
|
2025 |
|
|
res.w[0] = 0x0000000000000000ull;
|
2026 |
|
|
*pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
|
2027 |
|
|
*ptr_is_midpoint_lt_even = is_midpoint_lt_even;
|
2028 |
|
|
*ptr_is_midpoint_gt_even = is_midpoint_gt_even;
|
2029 |
|
|
*ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
|
2030 |
|
|
*ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
|
2031 |
|
|
BID_SWAP128 (res);
|
2032 |
|
|
BID_RETURN (res)
|
2033 |
|
|
}
|
2034 |
|
|
if (rnd_mode != ROUNDING_TO_NEAREST) {
|
2035 |
|
|
rounding_correction (rnd_mode,
|
2036 |
|
|
is_inexact_lt_midpoint,
|
2037 |
|
|
is_inexact_gt_midpoint,
|
2038 |
|
|
is_midpoint_lt_even, is_midpoint_gt_even,
|
2039 |
|
|
e3, &res, pfpsf);
|
2040 |
|
|
z_exp = res.w[1] & MASK_EXP;
|
2041 |
|
|
}
|
2042 |
|
|
} else { // if (p_sign != z_sign)
|
2043 |
|
|
// consider two cases, because C3 * 10^scale = 10^33 is a special case
|
2044 |
|
|
if (res.w[1] != 0x0000314dc6448d93ull ||
|
2045 |
|
|
res.w[0] != 0x38c15b0a00000000ull) { // C3 * 10^scale != 10^33
|
2046 |
|
|
if (lt_half_ulp) {
|
2047 |
|
|
res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
|
2048 |
|
|
// use the following to avoid double rounding errors when operating
|
2049 |
|
|
// on mixed formats in rounding to nearest
|
2050 |
|
|
is_inexact_gt_midpoint = 1; // if (z_sign), as if for absolute value
|
2051 |
|
|
} else if ((eq_half_ulp && (res.w[0] & 0x01)) || gt_half_ulp) {
|
2052 |
|
|
// subtract 1 ulp from the significand
|
2053 |
|
|
res.w[0]--;
|
2054 |
|
|
if (res.w[0] == 0xffffffffffffffffull)
|
2055 |
|
|
res.w[1]--;
|
2056 |
|
|
res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
|
2057 |
|
|
if (eq_half_ulp) {
|
2058 |
|
|
is_midpoint_gt_even = 1; // if (z_sign), as if for absolute value
|
2059 |
|
|
} else {
|
2060 |
|
|
is_inexact_lt_midpoint = 1; //if(z_sign), as if for absolute value
|
2061 |
|
|
}
|
2062 |
|
|
} else { // if (eq_half_ulp && !(res.w[0] & 0x01))
|
2063 |
|
|
// leave unchanged
|
2064 |
|
|
res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
|
2065 |
|
|
is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value
|
2066 |
|
|
}
|
2067 |
|
|
// the result is always inexact, and never tiny
|
2068 |
|
|
// check for overflow for RN
|
2069 |
|
|
if (e3 > expmax) {
|
2070 |
|
|
if (rnd_mode == ROUNDING_TO_NEAREST) {
|
2071 |
|
|
res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
|
2072 |
|
|
res.w[0] = 0x0000000000000000ull;
|
2073 |
|
|
*pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
|
2074 |
|
|
} else {
|
2075 |
|
|
rounding_correction (rnd_mode,
|
2076 |
|
|
is_inexact_lt_midpoint,
|
2077 |
|
|
is_inexact_gt_midpoint,
|
2078 |
|
|
is_midpoint_lt_even,
|
2079 |
|
|
is_midpoint_gt_even, e3, &res,
|
2080 |
|
|
pfpsf);
|
2081 |
|
|
}
|
2082 |
|
|
*ptr_is_midpoint_lt_even = is_midpoint_lt_even;
|
2083 |
|
|
*ptr_is_midpoint_gt_even = is_midpoint_gt_even;
|
2084 |
|
|
*ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
|
2085 |
|
|
*ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
|
2086 |
|
|
BID_SWAP128 (res);
|
2087 |
|
|
BID_RETURN (res)
|
2088 |
|
|
}
|
2089 |
|
|
// set the inexact flag
|
2090 |
|
|
*pfpsf |= INEXACT_EXCEPTION;
|
2091 |
|
|
if (rnd_mode != ROUNDING_TO_NEAREST) {
|
2092 |
|
|
rounding_correction (rnd_mode,
|
2093 |
|
|
is_inexact_lt_midpoint,
|
2094 |
|
|
is_inexact_gt_midpoint,
|
2095 |
|
|
is_midpoint_lt_even,
|
2096 |
|
|
is_midpoint_gt_even, e3, &res, pfpsf);
|
2097 |
|
|
}
|
2098 |
|
|
z_exp = res.w[1] & MASK_EXP;
|
2099 |
|
|
} else { // if C3 * 10^scale = 10^33
|
2100 |
|
|
e3 = (z_exp >> 49) - 6176;
|
2101 |
|
|
if (e3 > expmin) {
|
2102 |
|
|
// the result is exact if exp > expmin and C4 = d*10^(q4-1),
|
2103 |
|
|
// where d = 1, 2, 3, ..., 9; it could be tiny too, but exact
|
2104 |
|
|
if (q4 == 1) {
|
2105 |
|
|
// if q4 = 1 the result is exact
|
2106 |
|
|
// result coefficient = 10^34 - C4
|
2107 |
|
|
res.w[1] = 0x0001ed09bead87c0ull;
|
2108 |
|
|
res.w[0] = 0x378d8e6400000000ull - C4.w[0];
|
2109 |
|
|
z_exp = z_exp - EXP_P1;
|
2110 |
|
|
e3 = e3 - 1;
|
2111 |
|
|
res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
|
2112 |
|
|
} else {
|
2113 |
|
|
// if q4 > 1 then truncate C4 from q4 digits to 1 digit;
|
2114 |
|
|
// x = q4-1, 1 <= x <= 67 and check if this operation is exact
|
2115 |
|
|
if (q4 <= 18) { // 2 <= q4 <= 18
|
2116 |
|
|
round64_2_18 (q4, q4 - 1, C4.w[0], &R64, &incr_exp,
|
2117 |
|
|
&is_midpoint_lt_even,
|
2118 |
|
|
&is_midpoint_gt_even,
|
2119 |
|
|
&is_inexact_lt_midpoint,
|
2120 |
|
|
&is_inexact_gt_midpoint);
|
2121 |
|
|
} else if (q4 <= 38) {
|
2122 |
|
|
P128.w[1] = C4.w[1];
|
2123 |
|
|
P128.w[0] = C4.w[0];
|
2124 |
|
|
round128_19_38 (q4, q4 - 1, P128, &R128, &incr_exp,
|
2125 |
|
|
&is_midpoint_lt_even,
|
2126 |
|
|
&is_midpoint_gt_even,
|
2127 |
|
|
&is_inexact_lt_midpoint,
|
2128 |
|
|
&is_inexact_gt_midpoint);
|
2129 |
|
|
R64 = R128.w[0]; // one decimal digit
|
2130 |
|
|
} else if (q4 <= 57) {
|
2131 |
|
|
P192.w[2] = C4.w[2];
|
2132 |
|
|
P192.w[1] = C4.w[1];
|
2133 |
|
|
P192.w[0] = C4.w[0];
|
2134 |
|
|
round192_39_57 (q4, q4 - 1, P192, &R192, &incr_exp,
|
2135 |
|
|
&is_midpoint_lt_even,
|
2136 |
|
|
&is_midpoint_gt_even,
|
2137 |
|
|
&is_inexact_lt_midpoint,
|
2138 |
|
|
&is_inexact_gt_midpoint);
|
2139 |
|
|
R64 = R192.w[0]; // one decimal digit
|
2140 |
|
|
} else { // if (q4 <= 68)
|
2141 |
|
|
round256_58_76 (q4, q4 - 1, C4, &R256, &incr_exp,
|
2142 |
|
|
&is_midpoint_lt_even,
|
2143 |
|
|
&is_midpoint_gt_even,
|
2144 |
|
|
&is_inexact_lt_midpoint,
|
2145 |
|
|
&is_inexact_gt_midpoint);
|
2146 |
|
|
R64 = R256.w[0]; // one decimal digit
|
2147 |
|
|
}
|
2148 |
|
|
if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
|
2149 |
|
|
!is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
|
2150 |
|
|
// the result is exact: 10^34 - R64
|
2151 |
|
|
// incr_exp = 0 with certainty
|
2152 |
|
|
z_exp = z_exp - EXP_P1;
|
2153 |
|
|
e3 = e3 - 1;
|
2154 |
|
|
res.w[1] =
|
2155 |
|
|
z_sign | (z_exp & MASK_EXP) | 0x0001ed09bead87c0ull;
|
2156 |
|
|
res.w[0] = 0x378d8e6400000000ull - R64;
|
2157 |
|
|
} else {
|
2158 |
|
|
// We want R64 to be the top digit of C4, but we actually
|
2159 |
|
|
// obtained (C4 * 10^(-q4+1))RN; a correction may be needed,
|
2160 |
|
|
// because the top digit is (C4 * 10^(-q4+1))RZ
|
2161 |
|
|
// however, if incr_exp = 1 then R64 = 10 with certainty
|
2162 |
|
|
if (incr_exp) {
|
2163 |
|
|
R64 = 10;
|
2164 |
|
|
}
|
2165 |
|
|
// the result is inexact as C4 has more than 1 significant digit
|
2166 |
|
|
// and C3 * 10^scale = 10^33
|
2167 |
|
|
// example of case that is treated here:
|
2168 |
|
|
// 100...0 * 10^e3 - 0.41 * 10^e3 =
|
2169 |
|
|
// 0999...9.59 * 10^e3 -> rounds to 99...96*10^(e3-1)
|
2170 |
|
|
// note that (e3 > expmin}
|
2171 |
|
|
// in order to round, subtract R64 from 10^34 and then compare
|
2172 |
|
|
// C4 - R64 * 10^(q4-1) with 1/2 ulp
|
2173 |
|
|
// calculate 10^34 - R64
|
2174 |
|
|
res.w[1] = 0x0001ed09bead87c0ull;
|
2175 |
|
|
res.w[0] = 0x378d8e6400000000ull - R64;
|
2176 |
|
|
z_exp = z_exp - EXP_P1; // will be OR-ed with sign & significand
|
2177 |
|
|
// calculate C4 - R64 * 10^(q4-1); this is a rare case and
|
2178 |
|
|
// R64 is small, 1 <= R64 <= 9
|
2179 |
|
|
e3 = e3 - 1;
|
2180 |
|
|
if (is_inexact_lt_midpoint) {
|
2181 |
|
|
is_inexact_lt_midpoint = 0;
|
2182 |
|
|
is_inexact_gt_midpoint = 1;
|
2183 |
|
|
} else if (is_inexact_gt_midpoint) {
|
2184 |
|
|
is_inexact_gt_midpoint = 0;
|
2185 |
|
|
is_inexact_lt_midpoint = 1;
|
2186 |
|
|
} else if (is_midpoint_lt_even) {
|
2187 |
|
|
is_midpoint_lt_even = 0;
|
2188 |
|
|
is_midpoint_gt_even = 1;
|
2189 |
|
|
} else if (is_midpoint_gt_even) {
|
2190 |
|
|
is_midpoint_gt_even = 0;
|
2191 |
|
|
is_midpoint_lt_even = 1;
|
2192 |
|
|
} else {
|
2193 |
|
|
;
|
2194 |
|
|
}
|
2195 |
|
|
// the result is always inexact, and never tiny
|
2196 |
|
|
// check for overflow for RN
|
2197 |
|
|
if (e3 > expmax) {
|
2198 |
|
|
if (rnd_mode == ROUNDING_TO_NEAREST) {
|
2199 |
|
|
res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
|
2200 |
|
|
res.w[0] = 0x0000000000000000ull;
|
2201 |
|
|
*pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
|
2202 |
|
|
} else {
|
2203 |
|
|
rounding_correction (rnd_mode,
|
2204 |
|
|
is_inexact_lt_midpoint,
|
2205 |
|
|
is_inexact_gt_midpoint,
|
2206 |
|
|
is_midpoint_lt_even,
|
2207 |
|
|
is_midpoint_gt_even, e3, &res,
|
2208 |
|
|
pfpsf);
|
2209 |
|
|
}
|
2210 |
|
|
*ptr_is_midpoint_lt_even = is_midpoint_lt_even;
|
2211 |
|
|
*ptr_is_midpoint_gt_even = is_midpoint_gt_even;
|
2212 |
|
|
*ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
|
2213 |
|
|
*ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
|
2214 |
|
|
BID_SWAP128 (res);
|
2215 |
|
|
BID_RETURN (res)
|
2216 |
|
|
}
|
2217 |
|
|
// set the inexact flag
|
2218 |
|
|
*pfpsf |= INEXACT_EXCEPTION;
|
2219 |
|
|
res.w[1] =
|
2220 |
|
|
z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1];
|
2221 |
|
|
if (rnd_mode != ROUNDING_TO_NEAREST) {
|
2222 |
|
|
rounding_correction (rnd_mode,
|
2223 |
|
|
is_inexact_lt_midpoint,
|
2224 |
|
|
is_inexact_gt_midpoint,
|
2225 |
|
|
is_midpoint_lt_even,
|
2226 |
|
|
is_midpoint_gt_even, e3, &res,
|
2227 |
|
|
pfpsf);
|
2228 |
|
|
}
|
2229 |
|
|
z_exp = res.w[1] & MASK_EXP;
|
2230 |
|
|
} // end result is inexact
|
2231 |
|
|
} // end q4 > 1
|
2232 |
|
|
} else { // if (e3 = emin)
|
2233 |
|
|
// if e3 = expmin the result is also tiny (the condition for
|
2234 |
|
|
// tininess is C4 > 050...0 [q4 digits] which is met because
|
2235 |
|
|
// the msd of C4 is not zero)
|
2236 |
|
|
// the result is tiny and inexact in all rounding modes;
|
2237 |
|
|
// it is either 100...0 or 0999...9 (use lt_half_ulp, eq_half_ulp,
|
2238 |
|
|
// gt_half_ulp to calculate)
|
2239 |
|
|
// if (lt_half_ulp || eq_half_ulp) res = 10^33 stays unchanged
|
2240 |
|
|
|
2241 |
|
|
// p_sign != z_sign so swap gt_half_ulp and lt_half_ulp
|
2242 |
|
|
if (gt_half_ulp) { // res = 10^33 - 1
|
2243 |
|
|
res.w[1] = 0x0000314dc6448d93ull;
|
2244 |
|
|
res.w[0] = 0x38c15b09ffffffffull;
|
2245 |
|
|
} else {
|
2246 |
|
|
res.w[1] = 0x0000314dc6448d93ull;
|
2247 |
|
|
res.w[0] = 0x38c15b0a00000000ull;
|
2248 |
|
|
}
|
2249 |
|
|
res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
|
2250 |
|
|
*pfpsf |= UNDERFLOW_EXCEPTION; // inexact is set later
|
2251 |
|
|
|
2252 |
|
|
if (eq_half_ulp) {
|
2253 |
|
|
is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value
|
2254 |
|
|
} else if (lt_half_ulp) {
|
2255 |
|
|
is_inexact_gt_midpoint = 1; //if(z_sign), as if for absolute value
|
2256 |
|
|
} else { // if (gt_half_ulp)
|
2257 |
|
|
is_inexact_lt_midpoint = 1; //if(z_sign), as if for absolute value
|
2258 |
|
|
}
|
2259 |
|
|
|
2260 |
|
|
if (rnd_mode != ROUNDING_TO_NEAREST) {
|
2261 |
|
|
rounding_correction (rnd_mode,
|
2262 |
|
|
is_inexact_lt_midpoint,
|
2263 |
|
|
is_inexact_gt_midpoint,
|
2264 |
|
|
is_midpoint_lt_even,
|
2265 |
|
|
is_midpoint_gt_even, e3, &res,
|
2266 |
|
|
pfpsf);
|
2267 |
|
|
z_exp = res.w[1] & MASK_EXP;
|
2268 |
|
|
}
|
2269 |
|
|
} // end e3 = emin
|
2270 |
|
|
// set the inexact flag (if the result was not exact)
|
2271 |
|
|
if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
|
2272 |
|
|
is_midpoint_lt_even || is_midpoint_gt_even)
|
2273 |
|
|
*pfpsf |= INEXACT_EXCEPTION;
|
2274 |
|
|
} // end 10^33
|
2275 |
|
|
} // end if (p_sign != z_sign)
|
2276 |
|
|
res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
|
2277 |
|
|
*ptr_is_midpoint_lt_even = is_midpoint_lt_even;
|
2278 |
|
|
*ptr_is_midpoint_gt_even = is_midpoint_gt_even;
|
2279 |
|
|
*ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
|
2280 |
|
|
*ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
|
2281 |
|
|
BID_SWAP128 (res);
|
2282 |
|
|
BID_RETURN (res)
|
2283 |
|
|
|
2284 |
|
|
} else if (((q3 <= delta && delta < p34 && p34 < delta + q4) || // Case (2)
|
2285 |
|
|
(q3 <= delta && delta + q4 <= p34) || // Case (3)
|
2286 |
|
|
(delta < q3 && p34 < delta + q4) || // Case (4)
|
2287 |
|
|
(delta < q3 && q3 <= delta + q4 && delta + q4 <= p34) || // Case (5)
|
2288 |
|
|
(delta + q4 < q3)) && // Case (6)
|
2289 |
|
|
!(delta <= 1 && p_sign != z_sign)) { // Case (2), (3), (4), (5) or (6)
|
2290 |
|
|
|
2291 |
|
|
// the result has the sign of z
|
2292 |
|
|
|
2293 |
|
|
if ((q3 <= delta && delta < p34 && p34 < delta + q4) || // Case (2)
|
2294 |
|
|
(delta < q3 && p34 < delta + q4)) { // Case (4)
|
2295 |
|
|
// round first the sum x * y + z with unbounded exponent
|
2296 |
|
|
// scale C3 up by scale = p34 - q3, 1 <= scale <= p34-1,
|
2297 |
|
|
// 1 <= scale <= 33
|
2298 |
|
|
// calculate res = C3 * 10^scale
|
2299 |
|
|
scale = p34 - q3;
|
2300 |
|
|
x0 = delta + q4 - p34;
|
2301 |
|
|
} else if (delta + q4 < q3) { // Case (6)
|
2302 |
|
|
// make Case (6) look like Case (3) or Case (5) with scale = 0
|
2303 |
|
|
// by scaling up C4 by 10^(q3 - delta - q4)
|
2304 |
|
|
scale = q3 - delta - q4; // 1 <= scale <= 33
|
2305 |
|
|
if (q4 <= 19) { // 1 <= scale <= 19; C4 fits in 64 bits
|
2306 |
|
|
if (scale <= 19) { // 10^scale fits in 64 bits
|
2307 |
|
|
// 64 x 64 C4.w[0] * ten2k64[scale]
|
2308 |
|
|
__mul_64x64_to_128MACH (P128, C4.w[0], ten2k64[scale]);
|
2309 |
|
|
} else { // 10^scale fits in 128 bits
|
2310 |
|
|
// 64 x 128 C4.w[0] * ten2k128[scale - 20]
|
2311 |
|
|
__mul_128x64_to_128 (P128, C4.w[0], ten2k128[scale - 20]);
|
2312 |
|
|
}
|
2313 |
|
|
} else { // C4 fits in 128 bits, but 10^scale must fit in 64 bits
|
2314 |
|
|
// 64 x 128 ten2k64[scale] * C4
|
2315 |
|
|
__mul_128x64_to_128 (P128, ten2k64[scale], C4);
|
2316 |
|
|
}
|
2317 |
|
|
C4.w[0] = P128.w[0];
|
2318 |
|
|
C4.w[1] = P128.w[1];
|
2319 |
|
|
// e4 does not need adjustment, as it is not used from this point on
|
2320 |
|
|
scale = 0;
|
2321 |
|
|
x0 = 0;
|
2322 |
|
|
// now Case (6) looks like Case (3) or Case (5) with scale = 0
|
2323 |
|
|
} else { // if Case (3) or Case (5)
|
2324 |
|
|
// Note: Case (3) is similar to Case (2), but scale differs and the
|
2325 |
|
|
// result is exact, unless it is tiny (so x0 = 0 when calculating the
|
2326 |
|
|
// result with unbounded exponent)
|
2327 |
|
|
|
2328 |
|
|
// calculate first the sum x * y + z with unbounded exponent (exact)
|
2329 |
|
|
// scale C3 up by scale = delta + q4 - q3, 1 <= scale <= p34-1,
|
2330 |
|
|
// 1 <= scale <= 33
|
2331 |
|
|
// calculate res = C3 * 10^scale
|
2332 |
|
|
scale = delta + q4 - q3;
|
2333 |
|
|
x0 = 0;
|
2334 |
|
|
// Note: the comments which follow refer [mainly] to Case (2)]
|
2335 |
|
|
}
|
2336 |
|
|
|
2337 |
|
|
case2_repeat:
|
2338 |
|
|
if (scale == 0) { // this could happen e.g. if we return to case2_repeat
|
2339 |
|
|
// or in Case (4)
|
2340 |
|
|
res.w[1] = C3.w[1];
|
2341 |
|
|
res.w[0] = C3.w[0];
|
2342 |
|
|
} else if (q3 <= 19) { // 1 <= scale <= 19; z fits in 64 bits
|
2343 |
|
|
if (scale <= 19) { // 10^scale fits in 64 bits
|
2344 |
|
|
// 64 x 64 C3.w[0] * ten2k64[scale]
|
2345 |
|
|
__mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
|
2346 |
|
|
} else { // 10^scale fits in 128 bits
|
2347 |
|
|
// 64 x 128 C3.w[0] * ten2k128[scale - 20]
|
2348 |
|
|
__mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
|
2349 |
|
|
}
|
2350 |
|
|
} else { // z fits in 128 bits, but 10^scale must fit in 64 bits
|
2351 |
|
|
// 64 x 128 ten2k64[scale] * C3
|
2352 |
|
|
__mul_128x64_to_128 (res, ten2k64[scale], C3);
|
2353 |
|
|
}
|
2354 |
|
|
// e3 is already calculated
|
2355 |
|
|
e3 = e3 - scale;
|
2356 |
|
|
// now res = C3 * 10^scale and e3 = e3 - scale
|
2357 |
|
|
// Note: C3 * 10^scale could be 10^34 if we returned to case2_repeat
|
2358 |
|
|
// because the result was too small
|
2359 |
|
|
|
2360 |
|
|
// round C4 to nearest to q4 - x0 digits, where x0 = delta + q4 - p34,
|
2361 |
|
|
// 1 <= x0 <= min (q4 - 1, 2 * p34 - 1) <=> 1 <= x0 <= min (q4 - 1, 67)
|
2362 |
|
|
// Also: 1 <= q4 - x0 <= p34 -1 => 1 <= q4 - x0 <= 33 (so the result of
|
2363 |
|
|
// the rounding fits in 128 bits!)
|
2364 |
|
|
// x0 = delta + q4 - p34 (calculated before reaching case2_repeat)
|
2365 |
|
|
// because q3 + q4 - x0 <= P => x0 >= q3 + q4 - p34
|
2366 |
|
|
if (x0 == 0) { // this could happen only if we return to case2_repeat, or
|
2367 |
|
|
// for Case (3) or Case (6)
|
2368 |
|
|
R128.w[1] = C4.w[1];
|
2369 |
|
|
R128.w[0] = C4.w[0];
|
2370 |
|
|
} else if (q4 <= 18) {
|
2371 |
|
|
// 2 <= q4 <= 18, max(1, q3+q4-p34) <= x0 <= q4 - 1, 1 <= x0 <= 17
|
2372 |
|
|
round64_2_18 (q4, x0, C4.w[0], &R64, &incr_exp,
|
2373 |
|
|
&is_midpoint_lt_even, &is_midpoint_gt_even,
|
2374 |
|
|
&is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
|
2375 |
|
|
if (incr_exp) {
|
2376 |
|
|
// R64 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 17
|
2377 |
|
|
R64 = ten2k64[q4 - x0];
|
2378 |
|
|
}
|
2379 |
|
|
R128.w[1] = 0;
|
2380 |
|
|
R128.w[0] = R64;
|
2381 |
|
|
} else if (q4 <= 38) {
|
2382 |
|
|
// 19 <= q4 <= 38, max(1, q3+q4-p34) <= x0 <= q4 - 1, 1 <= x0 <= 37
|
2383 |
|
|
P128.w[1] = C4.w[1];
|
2384 |
|
|
P128.w[0] = C4.w[0];
|
2385 |
|
|
round128_19_38 (q4, x0, P128, &R128, &incr_exp,
|
2386 |
|
|
&is_midpoint_lt_even, &is_midpoint_gt_even,
|
2387 |
|
|
&is_inexact_lt_midpoint,
|
2388 |
|
|
&is_inexact_gt_midpoint);
|
2389 |
|
|
if (incr_exp) {
|
2390 |
|
|
// R128 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 37
|
2391 |
|
|
if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19
|
2392 |
|
|
R128.w[0] = ten2k64[q4 - x0];
|
2393 |
|
|
// R128.w[1] stays 0
|
2394 |
|
|
} else { // 20 <= q4 - x0 <= 37
|
2395 |
|
|
R128.w[0] = ten2k128[q4 - x0 - 20].w[0];
|
2396 |
|
|
R128.w[1] = ten2k128[q4 - x0 - 20].w[1];
|
2397 |
|
|
}
|
2398 |
|
|
}
|
2399 |
|
|
} else if (q4 <= 57) {
|
2400 |
|
|
// 38 <= q4 <= 57, max(1, q3+q4-p34) <= x0 <= q4 - 1, 5 <= x0 <= 56
|
2401 |
|
|
P192.w[2] = C4.w[2];
|
2402 |
|
|
P192.w[1] = C4.w[1];
|
2403 |
|
|
P192.w[0] = C4.w[0];
|
2404 |
|
|
round192_39_57 (q4, x0, P192, &R192, &incr_exp,
|
2405 |
|
|
&is_midpoint_lt_even, &is_midpoint_gt_even,
|
2406 |
|
|
&is_inexact_lt_midpoint,
|
2407 |
|
|
&is_inexact_gt_midpoint);
|
2408 |
|
|
// R192.w[2] is always 0
|
2409 |
|
|
if (incr_exp) {
|
2410 |
|
|
// R192 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 5, 1 <= q4 - x0 <= 52
|
2411 |
|
|
if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19
|
2412 |
|
|
R192.w[0] = ten2k64[q4 - x0];
|
2413 |
|
|
// R192.w[1] stays 0
|
2414 |
|
|
// R192.w[2] stays 0
|
2415 |
|
|
} else { // 20 <= q4 - x0 <= 33
|
2416 |
|
|
R192.w[0] = ten2k128[q4 - x0 - 20].w[0];
|
2417 |
|
|
R192.w[1] = ten2k128[q4 - x0 - 20].w[1];
|
2418 |
|
|
// R192.w[2] stays 0
|
2419 |
|
|
}
|
2420 |
|
|
}
|
2421 |
|
|
R128.w[1] = R192.w[1];
|
2422 |
|
|
R128.w[0] = R192.w[0];
|
2423 |
|
|
} else {
|
2424 |
|
|
// 58 <= q4 <= 68, max(1, q3+q4-p34) <= x0 <= q4 - 1, 25 <= x0 <= 67
|
2425 |
|
|
round256_58_76 (q4, x0, C4, &R256, &incr_exp,
|
2426 |
|
|
&is_midpoint_lt_even, &is_midpoint_gt_even,
|
2427 |
|
|
&is_inexact_lt_midpoint,
|
2428 |
|
|
&is_inexact_gt_midpoint);
|
2429 |
|
|
// R256.w[3] and R256.w[2] are always 0
|
2430 |
|
|
if (incr_exp) {
|
2431 |
|
|
// R256 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 25, 1 <= q4 - x0 <= 43
|
2432 |
|
|
if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19
|
2433 |
|
|
R256.w[0] = ten2k64[q4 - x0];
|
2434 |
|
|
// R256.w[1] stays 0
|
2435 |
|
|
// R256.w[2] stays 0
|
2436 |
|
|
// R256.w[3] stays 0
|
2437 |
|
|
} else { // 20 <= q4 - x0 <= 33
|
2438 |
|
|
R256.w[0] = ten2k128[q4 - x0 - 20].w[0];
|
2439 |
|
|
R256.w[1] = ten2k128[q4 - x0 - 20].w[1];
|
2440 |
|
|
// R256.w[2] stays 0
|
2441 |
|
|
// R256.w[3] stays 0
|
2442 |
|
|
}
|
2443 |
|
|
}
|
2444 |
|
|
R128.w[1] = R256.w[1];
|
2445 |
|
|
R128.w[0] = R256.w[0];
|
2446 |
|
|
}
|
2447 |
|
|
// now add C3 * 10^scale in res and the signed top (q4-x0) digits of C4,
|
2448 |
|
|
// rounded to nearest, which were copied into R128
|
2449 |
|
|
if (z_sign == p_sign) {
|
2450 |
|
|
lsb = res.w[0] & 0x01; // lsb of C3 * 10^scale
|
2451 |
|
|
// the sum can result in [up to] p34 or p34 + 1 digits
|
2452 |
|
|
res.w[0] = res.w[0] + R128.w[0];
|
2453 |
|
|
res.w[1] = res.w[1] + R128.w[1];
|
2454 |
|
|
if (res.w[0] < R128.w[0])
|
2455 |
|
|
res.w[1]++; // carry
|
2456 |
|
|
// if res > 10^34 - 1 need to increase x0 and decrease scale by 1
|
2457 |
|
|
if (res.w[1] > 0x0001ed09bead87c0ull ||
|
2458 |
|
|
(res.w[1] == 0x0001ed09bead87c0ull &&
|
2459 |
|
|
res.w[0] > 0x378d8e63ffffffffull)) {
|
2460 |
|
|
// avoid double rounding error
|
2461 |
|
|
is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
|
2462 |
|
|
is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
|
2463 |
|
|
is_midpoint_lt_even0 = is_midpoint_lt_even;
|
2464 |
|
|
is_midpoint_gt_even0 = is_midpoint_gt_even;
|
2465 |
|
|
is_inexact_lt_midpoint = 0;
|
2466 |
|
|
is_inexact_gt_midpoint = 0;
|
2467 |
|
|
is_midpoint_lt_even = 0;
|
2468 |
|
|
is_midpoint_gt_even = 0;
|
2469 |
|
|
P128.w[1] = res.w[1];
|
2470 |
|
|
P128.w[0] = res.w[0];
|
2471 |
|
|
round128_19_38 (35, 1, P128, &res, &incr_exp,
|
2472 |
|
|
&is_midpoint_lt_even, &is_midpoint_gt_even,
|
2473 |
|
|
&is_inexact_lt_midpoint,
|
2474 |
|
|
&is_inexact_gt_midpoint);
|
2475 |
|
|
// incr_exp is 0 with certainty in this case
|
2476 |
|
|
// avoid a double rounding error
|
2477 |
|
|
if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
|
2478 |
|
|
is_midpoint_lt_even) { // double rounding error upward
|
2479 |
|
|
// res = res - 1
|
2480 |
|
|
res.w[0]--;
|
2481 |
|
|
if (res.w[0] == 0xffffffffffffffffull)
|
2482 |
|
|
res.w[1]--;
|
2483 |
|
|
// Note: a double rounding error upward is not possible; for this
|
2484 |
|
|
// the result after the first rounding would have to be 99...95
|
2485 |
|
|
// (35 digits in all), possibly followed by a number of zeros; this
|
2486 |
|
|
// not possible in Cases (2)-(6) or (15)-(17) which may get here
|
2487 |
|
|
is_midpoint_lt_even = 0;
|
2488 |
|
|
is_inexact_lt_midpoint = 1;
|
2489 |
|
|
} else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
|
2490 |
|
|
is_midpoint_gt_even) { // double rounding error downward
|
2491 |
|
|
// res = res + 1
|
2492 |
|
|
res.w[0]++;
|
2493 |
|
|
if (res.w[0] == 0)
|
2494 |
|
|
res.w[1]++;
|
2495 |
|
|
is_midpoint_gt_even = 0;
|
2496 |
|
|
is_inexact_gt_midpoint = 1;
|
2497 |
|
|
} else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
|
2498 |
|
|
!is_inexact_lt_midpoint
|
2499 |
|
|
&& !is_inexact_gt_midpoint) {
|
2500 |
|
|
// if this second rounding was exact the result may still be
|
2501 |
|
|
// inexact because of the first rounding
|
2502 |
|
|
if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
|
2503 |
|
|
is_inexact_gt_midpoint = 1;
|
2504 |
|
|
}
|
2505 |
|
|
if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
|
2506 |
|
|
is_inexact_lt_midpoint = 1;
|
2507 |
|
|
}
|
2508 |
|
|
} else if (is_midpoint_gt_even &&
|
2509 |
|
|
(is_inexact_gt_midpoint0
|
2510 |
|
|
|| is_midpoint_lt_even0)) {
|
2511 |
|
|
// pulled up to a midpoint
|
2512 |
|
|
is_inexact_lt_midpoint = 1;
|
2513 |
|
|
is_inexact_gt_midpoint = 0;
|
2514 |
|
|
is_midpoint_lt_even = 0;
|
2515 |
|
|
is_midpoint_gt_even = 0;
|
2516 |
|
|
} else if (is_midpoint_lt_even &&
|
2517 |
|
|
(is_inexact_lt_midpoint0
|
2518 |
|
|
|| is_midpoint_gt_even0)) {
|
2519 |
|
|
// pulled down to a midpoint
|
2520 |
|
|
is_inexact_lt_midpoint = 0;
|
2521 |
|
|
is_inexact_gt_midpoint = 1;
|
2522 |
|
|
is_midpoint_lt_even = 0;
|
2523 |
|
|
is_midpoint_gt_even = 0;
|
2524 |
|
|
} else {
|
2525 |
|
|
;
|
2526 |
|
|
}
|
2527 |
|
|
// adjust exponent
|
2528 |
|
|
e3 = e3 + 1;
|
2529 |
|
|
if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
|
2530 |
|
|
!is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
|
2531 |
|
|
if (is_midpoint_lt_even0 || is_midpoint_gt_even0 ||
|
2532 |
|
|
is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0) {
|
2533 |
|
|
is_inexact_lt_midpoint = 1;
|
2534 |
|
|
}
|
2535 |
|
|
}
|
2536 |
|
|
} else {
|
2537 |
|
|
// this is the result rounded with unbounded exponent, unless a
|
2538 |
|
|
// correction is needed
|
2539 |
|
|
res.w[1] = res.w[1] & MASK_COEFF;
|
2540 |
|
|
if (lsb == 1) {
|
2541 |
|
|
if (is_midpoint_gt_even) {
|
2542 |
|
|
// res = res + 1
|
2543 |
|
|
is_midpoint_gt_even = 0;
|
2544 |
|
|
is_midpoint_lt_even = 1;
|
2545 |
|
|
res.w[0]++;
|
2546 |
|
|
if (res.w[0] == 0x0)
|
2547 |
|
|
res.w[1]++;
|
2548 |
|
|
// check for rounding overflow
|
2549 |
|
|
if (res.w[1] == 0x0001ed09bead87c0ull &&
|
2550 |
|
|
res.w[0] == 0x378d8e6400000000ull) {
|
2551 |
|
|
// res = 10^34 => rounding overflow
|
2552 |
|
|
res.w[1] = 0x0000314dc6448d93ull;
|
2553 |
|
|
res.w[0] = 0x38c15b0a00000000ull; // 10^33
|
2554 |
|
|
e3++;
|
2555 |
|
|
}
|
2556 |
|
|
} else if (is_midpoint_lt_even) {
|
2557 |
|
|
// res = res - 1
|
2558 |
|
|
is_midpoint_lt_even = 0;
|
2559 |
|
|
is_midpoint_gt_even = 1;
|
2560 |
|
|
res.w[0]--;
|
2561 |
|
|
if (res.w[0] == 0xffffffffffffffffull)
|
2562 |
|
|
res.w[1]--;
|
2563 |
|
|
// if the result is pure zero, the sign depends on the rounding
|
2564 |
|
|
// mode (x*y and z had opposite signs)
|
2565 |
|
|
if (res.w[1] == 0x0ull && res.w[0] == 0x0ull) {
|
2566 |
|
|
if (rnd_mode != ROUNDING_DOWN)
|
2567 |
|
|
z_sign = 0x0000000000000000ull;
|
2568 |
|
|
else
|
2569 |
|
|
z_sign = 0x8000000000000000ull;
|
2570 |
|
|
// the exponent is max (e3, expmin)
|
2571 |
|
|
res.w[1] = 0x0;
|
2572 |
|
|
res.w[0] = 0x0;
|
2573 |
|
|
*ptr_is_midpoint_lt_even = is_midpoint_lt_even;
|
2574 |
|
|
*ptr_is_midpoint_gt_even = is_midpoint_gt_even;
|
2575 |
|
|
*ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
|
2576 |
|
|
*ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
|
2577 |
|
|
BID_SWAP128 (res);
|
2578 |
|
|
BID_RETURN (res)
|
2579 |
|
|
}
|
2580 |
|
|
} else {
|
2581 |
|
|
;
|
2582 |
|
|
}
|
2583 |
|
|
}
|
2584 |
|
|
}
|
2585 |
|
|
} else { // if (z_sign != p_sign)
|
2586 |
|
|
lsb = res.w[0] & 0x01; // lsb of C3 * 10^scale; R128 contains rounded C4
|
2587 |
|
|
// used to swap rounding indicators if p_sign != z_sign
|
2588 |
|
|
// the sum can result in [up to] p34 or p34 - 1 digits
|
2589 |
|
|
tmp64 = res.w[0];
|
2590 |
|
|
res.w[0] = res.w[0] - R128.w[0];
|
2591 |
|
|
res.w[1] = res.w[1] - R128.w[1];
|
2592 |
|
|
if (res.w[0] > tmp64)
|
2593 |
|
|
res.w[1]--; // borrow
|
2594 |
|
|
// if res < 10^33 and exp > expmin need to decrease x0 and
|
2595 |
|
|
// increase scale by 1
|
2596 |
|
|
if (e3 > expmin && ((res.w[1] < 0x0000314dc6448d93ull ||
|
2597 |
|
|
(res.w[1] == 0x0000314dc6448d93ull &&
|
2598 |
|
|
res.w[0] < 0x38c15b0a00000000ull)) ||
|
2599 |
|
|
(is_inexact_lt_midpoint
|
2600 |
|
|
&& res.w[1] == 0x0000314dc6448d93ull
|
2601 |
|
|
&& res.w[0] == 0x38c15b0a00000000ull))
|
2602 |
|
|
&& x0 >= 1) {
|
2603 |
|
|
x0 = x0 - 1;
|
2604 |
|
|
// first restore e3, otherwise it will be too small
|
2605 |
|
|
e3 = e3 + scale;
|
2606 |
|
|
scale = scale + 1;
|
2607 |
|
|
is_inexact_lt_midpoint = 0;
|
2608 |
|
|
is_inexact_gt_midpoint = 0;
|
2609 |
|
|
is_midpoint_lt_even = 0;
|
2610 |
|
|
is_midpoint_gt_even = 0;
|
2611 |
|
|
incr_exp = 0;
|
2612 |
|
|
goto case2_repeat;
|
2613 |
|
|
}
|
2614 |
|
|
// else this is the result rounded with unbounded exponent;
|
2615 |
|
|
// because the result has opposite sign to that of C4 which was
|
2616 |
|
|
// rounded, need to change the rounding indicators
|
2617 |
|
|
if (is_inexact_lt_midpoint) {
|
2618 |
|
|
is_inexact_lt_midpoint = 0;
|
2619 |
|
|
is_inexact_gt_midpoint = 1;
|
2620 |
|
|
} else if (is_inexact_gt_midpoint) {
|
2621 |
|
|
is_inexact_gt_midpoint = 0;
|
2622 |
|
|
is_inexact_lt_midpoint = 1;
|
2623 |
|
|
} else if (lsb == 0) {
|
2624 |
|
|
if (is_midpoint_lt_even) {
|
2625 |
|
|
is_midpoint_lt_even = 0;
|
2626 |
|
|
is_midpoint_gt_even = 1;
|
2627 |
|
|
} else if (is_midpoint_gt_even) {
|
2628 |
|
|
is_midpoint_gt_even = 0;
|
2629 |
|
|
is_midpoint_lt_even = 1;
|
2630 |
|
|
} else {
|
2631 |
|
|
;
|
2632 |
|
|
}
|
2633 |
|
|
} else if (lsb == 1) {
|
2634 |
|
|
if (is_midpoint_lt_even) {
|
2635 |
|
|
// res = res + 1
|
2636 |
|
|
res.w[0]++;
|
2637 |
|
|
if (res.w[0] == 0x0)
|
2638 |
|
|
res.w[1]++;
|
2639 |
|
|
// check for rounding overflow
|
2640 |
|
|
if (res.w[1] == 0x0001ed09bead87c0ull &&
|
2641 |
|
|
res.w[0] == 0x378d8e6400000000ull) {
|
2642 |
|
|
// res = 10^34 => rounding overflow
|
2643 |
|
|
res.w[1] = 0x0000314dc6448d93ull;
|
2644 |
|
|
res.w[0] = 0x38c15b0a00000000ull; // 10^33
|
2645 |
|
|
e3++;
|
2646 |
|
|
}
|
2647 |
|
|
} else if (is_midpoint_gt_even) {
|
2648 |
|
|
// res = res - 1
|
2649 |
|
|
res.w[0]--;
|
2650 |
|
|
if (res.w[0] == 0xffffffffffffffffull)
|
2651 |
|
|
res.w[1]--;
|
2652 |
|
|
// if the result is pure zero, the sign depends on the rounding
|
2653 |
|
|
// mode (x*y and z had opposite signs)
|
2654 |
|
|
if (res.w[1] == 0x0ull && res.w[0] == 0x0ull) {
|
2655 |
|
|
if (rnd_mode != ROUNDING_DOWN)
|
2656 |
|
|
z_sign = 0x0000000000000000ull;
|
2657 |
|
|
else
|
2658 |
|
|
z_sign = 0x8000000000000000ull;
|
2659 |
|
|
// the exponent is max (e3, expmin)
|
2660 |
|
|
res.w[1] = 0x0;
|
2661 |
|
|
res.w[0] = 0x0;
|
2662 |
|
|
*ptr_is_midpoint_lt_even = is_midpoint_lt_even;
|
2663 |
|
|
*ptr_is_midpoint_gt_even = is_midpoint_gt_even;
|
2664 |
|
|
*ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
|
2665 |
|
|
*ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
|
2666 |
|
|
BID_SWAP128 (res);
|
2667 |
|
|
BID_RETURN (res)
|
2668 |
|
|
}
|
2669 |
|
|
} else {
|
2670 |
|
|
;
|
2671 |
|
|
}
|
2672 |
|
|
} else {
|
2673 |
|
|
;
|
2674 |
|
|
}
|
2675 |
|
|
}
|
2676 |
|
|
// check for underflow
|
2677 |
|
|
if (e3 == expmin) { // and if significand < 10^33 => result is tiny
|
2678 |
|
|
if ((res.w[1] & MASK_COEFF) < 0x0000314dc6448d93ull ||
|
2679 |
|
|
((res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull &&
|
2680 |
|
|
res.w[0] < 0x38c15b0a00000000ull)) {
|
2681 |
|
|
is_tiny = 1;
|
2682 |
|
|
}
|
2683 |
|
|
} else if (e3 < expmin) {
|
2684 |
|
|
// the result is tiny, so we must truncate more of res
|
2685 |
|
|
is_tiny = 1;
|
2686 |
|
|
x0 = expmin - e3;
|
2687 |
|
|
is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
|
2688 |
|
|
is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
|
2689 |
|
|
is_midpoint_lt_even0 = is_midpoint_lt_even;
|
2690 |
|
|
is_midpoint_gt_even0 = is_midpoint_gt_even;
|
2691 |
|
|
is_inexact_lt_midpoint = 0;
|
2692 |
|
|
is_inexact_gt_midpoint = 0;
|
2693 |
|
|
is_midpoint_lt_even = 0;
|
2694 |
|
|
is_midpoint_gt_even = 0;
|
2695 |
|
|
// determine the number of decimal digits in res
|
2696 |
|
|
if (res.w[1] == 0x0) {
|
2697 |
|
|
// between 1 and 19 digits
|
2698 |
|
|
for (ind = 1; ind <= 19; ind++) {
|
2699 |
|
|
if (res.w[0] < ten2k64[ind]) {
|
2700 |
|
|
break;
|
2701 |
|
|
}
|
2702 |
|
|
}
|
2703 |
|
|
// ind digits
|
2704 |
|
|
} else if (res.w[1] < ten2k128[0].w[1] ||
|
2705 |
|
|
(res.w[1] == ten2k128[0].w[1]
|
2706 |
|
|
&& res.w[0] < ten2k128[0].w[0])) {
|
2707 |
|
|
// 20 digits
|
2708 |
|
|
ind = 20;
|
2709 |
|
|
} else { // between 21 and 38 digits
|
2710 |
|
|
for (ind = 1; ind <= 18; ind++) {
|
2711 |
|
|
if (res.w[1] < ten2k128[ind].w[1] ||
|
2712 |
|
|
(res.w[1] == ten2k128[ind].w[1] &&
|
2713 |
|
|
res.w[0] < ten2k128[ind].w[0])) {
|
2714 |
|
|
break;
|
2715 |
|
|
}
|
2716 |
|
|
}
|
2717 |
|
|
// ind + 20 digits
|
2718 |
|
|
ind = ind + 20;
|
2719 |
|
|
}
|
2720 |
|
|
|
2721 |
|
|
// at this point ind >= x0; because delta >= 2 on this path, the case
|
2722 |
|
|
// ind = x0 can occur only in Case (2) or case (3), when C3 has one
|
2723 |
|
|
// digit (q3 = 1) equal to 1 (C3 = 1), e3 is expmin (e3 = expmin),
|
2724 |
|
|
// the signs of x * y and z are opposite, and through cancellation
|
2725 |
|
|
// the most significant decimal digit in res has the weight
|
2726 |
|
|
// 10^(emin-1); however, it is clear that in this case the most
|
2727 |
|
|
// significant digit is 9, so the result before rounding is
|
2728 |
|
|
// 0.9... * 10^emin
|
2729 |
|
|
// Otherwise, ind > x0 because there are non-zero decimal digits in the
|
2730 |
|
|
// result with weight of at least 10^emin, and correction for underflow
|
2731 |
|
|
// can be carried out using the round*_*_2_* () routines
|
2732 |
|
|
if (x0 == ind) { // the result before rounding is 0.9... * 10^emin
|
2733 |
|
|
res.w[1] = 0x0;
|
2734 |
|
|
res.w[0] = 0x1;
|
2735 |
|
|
is_inexact_gt_midpoint = 1;
|
2736 |
|
|
} else if (ind <= 18) { // check that 2 <= ind
|
2737 |
|
|
// 2 <= ind <= 18, 1 <= x0 <= 17
|
2738 |
|
|
round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp,
|
2739 |
|
|
&is_midpoint_lt_even, &is_midpoint_gt_even,
|
2740 |
|
|
&is_inexact_lt_midpoint,
|
2741 |
|
|
&is_inexact_gt_midpoint);
|
2742 |
|
|
if (incr_exp) {
|
2743 |
|
|
// R64 = 10^(ind-x0), 1 <= ind - x0 <= ind - 1, 1 <= ind - x0 <= 17
|
2744 |
|
|
R64 = ten2k64[ind - x0];
|
2745 |
|
|
}
|
2746 |
|
|
res.w[1] = 0;
|
2747 |
|
|
res.w[0] = R64;
|
2748 |
|
|
} else if (ind <= 38) {
|
2749 |
|
|
// 19 <= ind <= 38
|
2750 |
|
|
P128.w[1] = res.w[1];
|
2751 |
|
|
P128.w[0] = res.w[0];
|
2752 |
|
|
round128_19_38 (ind, x0, P128, &res, &incr_exp,
|
2753 |
|
|
&is_midpoint_lt_even, &is_midpoint_gt_even,
|
2754 |
|
|
&is_inexact_lt_midpoint,
|
2755 |
|
|
&is_inexact_gt_midpoint);
|
2756 |
|
|
if (incr_exp) {
|
2757 |
|
|
// R128 = 10^(ind-x0), 1 <= ind - x0 <= ind - 1, 1 <= ind - x0 <= 37
|
2758 |
|
|
if (ind - x0 <= 19) { // 1 <= ind - x0 <= 19
|
2759 |
|
|
res.w[0] = ten2k64[ind - x0];
|
2760 |
|
|
// res.w[1] stays 0
|
2761 |
|
|
} else { // 20 <= ind - x0 <= 37
|
2762 |
|
|
res.w[0] = ten2k128[ind - x0 - 20].w[0];
|
2763 |
|
|
res.w[1] = ten2k128[ind - x0 - 20].w[1];
|
2764 |
|
|
}
|
2765 |
|
|
}
|
2766 |
|
|
}
|
2767 |
|
|
// avoid a double rounding error
|
2768 |
|
|
if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
|
2769 |
|
|
is_midpoint_lt_even) { // double rounding error upward
|
2770 |
|
|
// res = res - 1
|
2771 |
|
|
res.w[0]--;
|
2772 |
|
|
if (res.w[0] == 0xffffffffffffffffull)
|
2773 |
|
|
res.w[1]--;
|
2774 |
|
|
// Note: a double rounding error upward is not possible; for this
|
2775 |
|
|
// the result after the first rounding would have to be 99...95
|
2776 |
|
|
// (35 digits in all), possibly followed by a number of zeros; this
|
2777 |
|
|
// not possible in Cases (2)-(6) which may get here
|
2778 |
|
|
is_midpoint_lt_even = 0;
|
2779 |
|
|
is_inexact_lt_midpoint = 1;
|
2780 |
|
|
} else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
|
2781 |
|
|
is_midpoint_gt_even) { // double rounding error downward
|
2782 |
|
|
// res = res + 1
|
2783 |
|
|
res.w[0]++;
|
2784 |
|
|
if (res.w[0] == 0)
|
2785 |
|
|
res.w[1]++;
|
2786 |
|
|
is_midpoint_gt_even = 0;
|
2787 |
|
|
is_inexact_gt_midpoint = 1;
|
2788 |
|
|
} else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
|
2789 |
|
|
!is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
|
2790 |
|
|
// if this second rounding was exact the result may still be
|
2791 |
|
|
// inexact because of the first rounding
|
2792 |
|
|
if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
|
2793 |
|
|
is_inexact_gt_midpoint = 1;
|
2794 |
|
|
}
|
2795 |
|
|
if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
|
2796 |
|
|
is_inexact_lt_midpoint = 1;
|
2797 |
|
|
}
|
2798 |
|
|
} else if (is_midpoint_gt_even &&
|
2799 |
|
|
(is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
|
2800 |
|
|
// pulled up to a midpoint
|
2801 |
|
|
is_inexact_lt_midpoint = 1;
|
2802 |
|
|
is_inexact_gt_midpoint = 0;
|
2803 |
|
|
is_midpoint_lt_even = 0;
|
2804 |
|
|
is_midpoint_gt_even = 0;
|
2805 |
|
|
} else if (is_midpoint_lt_even &&
|
2806 |
|
|
(is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
|
2807 |
|
|
// pulled down to a midpoint
|
2808 |
|
|
is_inexact_lt_midpoint = 0;
|
2809 |
|
|
is_inexact_gt_midpoint = 1;
|
2810 |
|
|
is_midpoint_lt_even = 0;
|
2811 |
|
|
is_midpoint_gt_even = 0;
|
2812 |
|
|
} else {
|
2813 |
|
|
;
|
2814 |
|
|
}
|
2815 |
|
|
// adjust exponent
|
2816 |
|
|
e3 = e3 + x0;
|
2817 |
|
|
if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
|
2818 |
|
|
!is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
|
2819 |
|
|
if (is_midpoint_lt_even0 || is_midpoint_gt_even0 ||
|
2820 |
|
|
is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0) {
|
2821 |
|
|
is_inexact_lt_midpoint = 1;
|
2822 |
|
|
}
|
2823 |
|
|
}
|
2824 |
|
|
} else {
|
2825 |
|
|
; // not underflow
|
2826 |
|
|
}
|
2827 |
|
|
// check for inexact result
|
2828 |
|
|
if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
|
2829 |
|
|
is_midpoint_lt_even || is_midpoint_gt_even) {
|
2830 |
|
|
// set the inexact flag
|
2831 |
|
|
*pfpsf |= INEXACT_EXCEPTION;
|
2832 |
|
|
if (is_tiny)
|
2833 |
|
|
*pfpsf |= UNDERFLOW_EXCEPTION;
|
2834 |
|
|
}
|
2835 |
|
|
// now check for significand = 10^34 (may have resulted from going
|
2836 |
|
|
// back to case2_repeat)
|
2837 |
|
|
if (res.w[1] == 0x0001ed09bead87c0ull &&
|
2838 |
|
|
res.w[0] == 0x378d8e6400000000ull) { // if res = 10^34
|
2839 |
|
|
res.w[1] = 0x0000314dc6448d93ull; // res = 10^33
|
2840 |
|
|
res.w[0] = 0x38c15b0a00000000ull;
|
2841 |
|
|
e3 = e3 + 1;
|
2842 |
|
|
}
|
2843 |
|
|
res.w[1] = z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1];
|
2844 |
|
|
// check for overflow
|
2845 |
|
|
if (rnd_mode == ROUNDING_TO_NEAREST && e3 > expmax) {
|
2846 |
|
|
res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
|
2847 |
|
|
res.w[0] = 0x0000000000000000ull;
|
2848 |
|
|
*pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
|
2849 |
|
|
}
|
2850 |
|
|
if (rnd_mode != ROUNDING_TO_NEAREST) {
|
2851 |
|
|
rounding_correction (rnd_mode,
|
2852 |
|
|
is_inexact_lt_midpoint,
|
2853 |
|
|
is_inexact_gt_midpoint,
|
2854 |
|
|
is_midpoint_lt_even, is_midpoint_gt_even,
|
2855 |
|
|
e3, &res, pfpsf);
|
2856 |
|
|
}
|
2857 |
|
|
*ptr_is_midpoint_lt_even = is_midpoint_lt_even;
|
2858 |
|
|
*ptr_is_midpoint_gt_even = is_midpoint_gt_even;
|
2859 |
|
|
*ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
|
2860 |
|
|
*ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
|
2861 |
|
|
BID_SWAP128 (res);
|
2862 |
|
|
BID_RETURN (res)
|
2863 |
|
|
|
2864 |
|
|
} else {
|
2865 |
|
|
|
2866 |
|
|
// we get here only if delta <= 1 in Cases (2), (3), (4), (5), or (6) and
|
2867 |
|
|
// the signs of x*y and z are opposite; in these cases massive
|
2868 |
|
|
// cancellation can occur, so it is better to scale either C3 or C4 and
|
2869 |
|
|
// to perform the subtraction before rounding; rounding is performed
|
2870 |
|
|
// next, depending on the number of decimal digits in the result and on
|
2871 |
|
|
// the exponent value
|
2872 |
|
|
// Note: overlow is not possible in this case
|
2873 |
|
|
// this is similar to Cases (15), (16), and (17)
|
2874 |
|
|
|
2875 |
|
|
if (delta + q4 < q3) { // from Case (6)
|
2876 |
|
|
// Case (6) with 0<= delta <= 1 is similar to Cases (15), (16), and
|
2877 |
|
|
// (17) if we swap (C3, C4), (q3, q4), (e3, e4), (z_sign, p_sign)
|
2878 |
|
|
// and call add_and_round; delta stays positive
|
2879 |
|
|
// C4.w[3] = 0 and C4.w[2] = 0, so swap just the low part of C4 with C3
|
2880 |
|
|
P128.w[1] = C3.w[1];
|
2881 |
|
|
P128.w[0] = C3.w[0];
|
2882 |
|
|
C3.w[1] = C4.w[1];
|
2883 |
|
|
C3.w[0] = C4.w[0];
|
2884 |
|
|
C4.w[1] = P128.w[1];
|
2885 |
|
|
C4.w[0] = P128.w[0];
|
2886 |
|
|
ind = q3;
|
2887 |
|
|
q3 = q4;
|
2888 |
|
|
q4 = ind;
|
2889 |
|
|
ind = e3;
|
2890 |
|
|
e3 = e4;
|
2891 |
|
|
e4 = ind;
|
2892 |
|
|
tmp_sign = z_sign;
|
2893 |
|
|
z_sign = p_sign;
|
2894 |
|
|
p_sign = tmp_sign;
|
2895 |
|
|
} else { // from Cases (2), (3), (4), (5)
|
2896 |
|
|
// In Cases (2), (3), (4), (5) with 0 <= delta <= 1 C3 has to be
|
2897 |
|
|
// scaled up by q4 + delta - q3; this is the same as in Cases (15),
|
2898 |
|
|
// (16), and (17) if we just change the sign of delta
|
2899 |
|
|
delta = -delta;
|
2900 |
|
|
}
|
2901 |
|
|
add_and_round (q3, q4, e4, delta, p34, z_sign, p_sign, C3, C4,
|
2902 |
|
|
rnd_mode, &is_midpoint_lt_even,
|
2903 |
|
|
&is_midpoint_gt_even, &is_inexact_lt_midpoint,
|
2904 |
|
|
&is_inexact_gt_midpoint, pfpsf, &res);
|
2905 |
|
|
*ptr_is_midpoint_lt_even = is_midpoint_lt_even;
|
2906 |
|
|
*ptr_is_midpoint_gt_even = is_midpoint_gt_even;
|
2907 |
|
|
*ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
|
2908 |
|
|
*ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
|
2909 |
|
|
BID_SWAP128 (res);
|
2910 |
|
|
BID_RETURN (res)
|
2911 |
|
|
|
2912 |
|
|
}
|
2913 |
|
|
|
2914 |
|
|
} else { // if delta < 0
|
2915 |
|
|
|
2916 |
|
|
delta = -delta;
|
2917 |
|
|
|
2918 |
|
|
if (p34 < q4 && q4 <= delta) { // Case (7)
|
2919 |
|
|
|
2920 |
|
|
// truncate C4 to p34 digits into res
|
2921 |
|
|
// x = q4-p34, 1 <= x <= 34 because 35 <= q4 <= 68
|
2922 |
|
|
x0 = q4 - p34;
|
2923 |
|
|
if (q4 <= 38) {
|
2924 |
|
|
P128.w[1] = C4.w[1];
|
2925 |
|
|
P128.w[0] = C4.w[0];
|
2926 |
|
|
round128_19_38 (q4, x0, P128, &res, &incr_exp,
|
2927 |
|
|
&is_midpoint_lt_even, &is_midpoint_gt_even,
|
2928 |
|
|
&is_inexact_lt_midpoint,
|
2929 |
|
|
&is_inexact_gt_midpoint);
|
2930 |
|
|
} else if (q4 <= 57) { // 35 <= q4 <= 57
|
2931 |
|
|
P192.w[2] = C4.w[2];
|
2932 |
|
|
P192.w[1] = C4.w[1];
|
2933 |
|
|
P192.w[0] = C4.w[0];
|
2934 |
|
|
round192_39_57 (q4, x0, P192, &R192, &incr_exp,
|
2935 |
|
|
&is_midpoint_lt_even, &is_midpoint_gt_even,
|
2936 |
|
|
&is_inexact_lt_midpoint,
|
2937 |
|
|
&is_inexact_gt_midpoint);
|
2938 |
|
|
res.w[0] = R192.w[0];
|
2939 |
|
|
res.w[1] = R192.w[1];
|
2940 |
|
|
} else { // if (q4 <= 68)
|
2941 |
|
|
round256_58_76 (q4, x0, C4, &R256, &incr_exp,
|
2942 |
|
|
&is_midpoint_lt_even, &is_midpoint_gt_even,
|
2943 |
|
|
&is_inexact_lt_midpoint,
|
2944 |
|
|
&is_inexact_gt_midpoint);
|
2945 |
|
|
res.w[0] = R256.w[0];
|
2946 |
|
|
res.w[1] = R256.w[1];
|
2947 |
|
|
}
|
2948 |
|
|
e4 = e4 + x0;
|
2949 |
|
|
if (incr_exp) {
|
2950 |
|
|
e4 = e4 + 1;
|
2951 |
|
|
}
|
2952 |
|
|
if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
|
2953 |
|
|
!is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
|
2954 |
|
|
// if C4 rounded to p34 digits is exact then the result is inexact,
|
2955 |
|
|
// in a way that depends on the signs of x * y and z
|
2956 |
|
|
if (p_sign == z_sign) {
|
2957 |
|
|
is_inexact_lt_midpoint = 1;
|
2958 |
|
|
} else { // if (p_sign != z_sign)
|
2959 |
|
|
if (res.w[1] != 0x0000314dc6448d93ull ||
|
2960 |
|
|
res.w[0] != 0x38c15b0a00000000ull) { // res != 10^33
|
2961 |
|
|
is_inexact_gt_midpoint = 1;
|
2962 |
|
|
} else { // res = 10^33 and exact is a special case
|
2963 |
|
|
// if C3 < 1/2 ulp then res = 10^33 and is_inexact_gt_midpoint = 1
|
2964 |
|
|
// if C3 = 1/2 ulp then res = 10^33 and is_midpoint_lt_even = 1
|
2965 |
|
|
// if C3 > 1/2 ulp then res = 10^34-1 and is_inexact_lt_midpoint = 1
|
2966 |
|
|
// Note: ulp is really ulp/10 (after borrow which propagates to msd)
|
2967 |
|
|
if (delta > p34 + 1) { // C3 < 1/2
|
2968 |
|
|
// res = 10^33, unchanged
|
2969 |
|
|
is_inexact_gt_midpoint = 1;
|
2970 |
|
|
} else { // if (delta == p34 + 1)
|
2971 |
|
|
if (q3 <= 19) {
|
2972 |
|
|
if (C3.w[0] < midpoint64[q3 - 1]) { // C3 < 1/2 ulp
|
2973 |
|
|
// res = 10^33, unchanged
|
2974 |
|
|
is_inexact_gt_midpoint = 1;
|
2975 |
|
|
} else if (C3.w[0] == midpoint64[q3 - 1]) { // C3 = 1/2 ulp
|
2976 |
|
|
// res = 10^33, unchanged
|
2977 |
|
|
is_midpoint_lt_even = 1;
|
2978 |
|
|
} else { // if (C3.w[0] > midpoint64[q3-1]), C3 > 1/2 ulp
|
2979 |
|
|
res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1
|
2980 |
|
|
res.w[0] = 0x378d8e63ffffffffull;
|
2981 |
|
|
e4 = e4 - 1;
|
2982 |
|
|
is_inexact_lt_midpoint = 1;
|
2983 |
|
|
}
|
2984 |
|
|
} else { // if (20 <= q3 <=34)
|
2985 |
|
|
if (C3.w[1] < midpoint128[q3 - 20].w[1] ||
|
2986 |
|
|
(C3.w[1] == midpoint128[q3 - 20].w[1] &&
|
2987 |
|
|
C3.w[0] < midpoint128[q3 - 20].w[0])) { // C3 < 1/2 ulp
|
2988 |
|
|
// res = 10^33, unchanged
|
2989 |
|
|
is_inexact_gt_midpoint = 1;
|
2990 |
|
|
} else if (C3.w[1] == midpoint128[q3 - 20].w[1] &&
|
2991 |
|
|
C3.w[0] == midpoint128[q3 - 20].w[0]) { // C3 = 1/2 ulp
|
2992 |
|
|
// res = 10^33, unchanged
|
2993 |
|
|
is_midpoint_lt_even = 1;
|
2994 |
|
|
} else { // if (C3 > midpoint128[q3-20]), C3 > 1/2 ulp
|
2995 |
|
|
res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1
|
2996 |
|
|
res.w[0] = 0x378d8e63ffffffffull;
|
2997 |
|
|
e4 = e4 - 1;
|
2998 |
|
|
is_inexact_lt_midpoint = 1;
|
2999 |
|
|
}
|
3000 |
|
|
}
|
3001 |
|
|
}
|
3002 |
|
|
}
|
3003 |
|
|
}
|
3004 |
|
|
} else if (is_midpoint_lt_even) {
|
3005 |
|
|
if (z_sign != p_sign) {
|
3006 |
|
|
// needs correction: res = res - 1
|
3007 |
|
|
res.w[0] = res.w[0] - 1;
|
3008 |
|
|
if (res.w[0] == 0xffffffffffffffffull)
|
3009 |
|
|
res.w[1]--;
|
3010 |
|
|
// if it is (10^33-1)*10^e4 then the corect result is
|
3011 |
|
|
// (10^34-1)*10(e4-1)
|
3012 |
|
|
if (res.w[1] == 0x0000314dc6448d93ull &&
|
3013 |
|
|
res.w[0] == 0x38c15b09ffffffffull) {
|
3014 |
|
|
res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1
|
3015 |
|
|
res.w[0] = 0x378d8e63ffffffffull;
|
3016 |
|
|
e4 = e4 - 1;
|
3017 |
|
|
}
|
3018 |
|
|
is_midpoint_lt_even = 0;
|
3019 |
|
|
is_inexact_lt_midpoint = 1;
|
3020 |
|
|
} else { // if (z_sign == p_sign)
|
3021 |
|
|
is_midpoint_lt_even = 0;
|
3022 |
|
|
is_inexact_gt_midpoint = 1;
|
3023 |
|
|
}
|
3024 |
|
|
} else if (is_midpoint_gt_even) {
|
3025 |
|
|
if (z_sign == p_sign) {
|
3026 |
|
|
// needs correction: res = res + 1 (cannot cross in the next binade)
|
3027 |
|
|
res.w[0] = res.w[0] + 1;
|
3028 |
|
|
if (res.w[0] == 0x0000000000000000ull)
|
3029 |
|
|
res.w[1]++;
|
3030 |
|
|
is_midpoint_gt_even = 0;
|
3031 |
|
|
is_inexact_gt_midpoint = 1;
|
3032 |
|
|
} else { // if (z_sign != p_sign)
|
3033 |
|
|
is_midpoint_gt_even = 0;
|
3034 |
|
|
is_inexact_lt_midpoint = 1;
|
3035 |
|
|
}
|
3036 |
|
|
} else {
|
3037 |
|
|
; // the rounded result is already correct
|
3038 |
|
|
}
|
3039 |
|
|
// check for overflow
|
3040 |
|
|
if (rnd_mode == ROUNDING_TO_NEAREST && e4 > expmax) {
|
3041 |
|
|
res.w[1] = p_sign | 0x7800000000000000ull;
|
3042 |
|
|
res.w[0] = 0x0000000000000000ull;
|
3043 |
|
|
*pfpsf |= (OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
|
3044 |
|
|
} else { // no overflow or not RN
|
3045 |
|
|
p_exp = ((UINT64) (e4 + 6176) << 49);
|
3046 |
|
|
res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1];
|
3047 |
|
|
}
|
3048 |
|
|
if (rnd_mode != ROUNDING_TO_NEAREST) {
|
3049 |
|
|
rounding_correction (rnd_mode,
|
3050 |
|
|
is_inexact_lt_midpoint,
|
3051 |
|
|
is_inexact_gt_midpoint,
|
3052 |
|
|
is_midpoint_lt_even, is_midpoint_gt_even,
|
3053 |
|
|
e4, &res, pfpsf);
|
3054 |
|
|
}
|
3055 |
|
|
if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
|
3056 |
|
|
is_midpoint_lt_even || is_midpoint_gt_even) {
|
3057 |
|
|
// set the inexact flag
|
3058 |
|
|
*pfpsf |= INEXACT_EXCEPTION;
|
3059 |
|
|
}
|
3060 |
|
|
*ptr_is_midpoint_lt_even = is_midpoint_lt_even;
|
3061 |
|
|
*ptr_is_midpoint_gt_even = is_midpoint_gt_even;
|
3062 |
|
|
*ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
|
3063 |
|
|
*ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
|
3064 |
|
|
BID_SWAP128 (res);
|
3065 |
|
|
BID_RETURN (res)
|
3066 |
|
|
|
3067 |
|
|
} else if ((q4 <= p34 && p34 <= delta) || // Case (8)
|
3068 |
|
|
(q4 <= delta && delta < p34 && p34 < delta + q3) || // Case (9)
|
3069 |
|
|
(q4 <= delta && delta + q3 <= p34) || // Case (10)
|
3070 |
|
|
(delta < q4 && q4 <= p34 && p34 < delta + q3) || // Case (13)
|
3071 |
|
|
(delta < q4 && q4 <= delta + q3 && delta + q3 <= p34) || // Case (14)
|
3072 |
|
|
(delta + q3 < q4 && q4 <= p34)) { // Case (18)
|
3073 |
|
|
|
3074 |
|
|
// Case (8) is similar to Case (1), with C3 and C4 swapped
|
3075 |
|
|
// Case (9) is similar to Case (2), with C3 and C4 swapped
|
3076 |
|
|
// Case (10) is similar to Case (3), with C3 and C4 swapped
|
3077 |
|
|
// Case (13) is similar to Case (4), with C3 and C4 swapped
|
3078 |
|
|
// Case (14) is similar to Case (5), with C3 and C4 swapped
|
3079 |
|
|
// Case (18) is similar to Case (6), with C3 and C4 swapped
|
3080 |
|
|
|
3081 |
|
|
// swap (C3, C4), (q3, q4), (e3, 34), (z_sign, p_sign), (z_exp, p_exp)
|
3082 |
|
|
// and go back to delta_ge_zero
|
3083 |
|
|
// C4.w[3] = 0 and C4.w[2] = 0, so swap just the low part of C4 with C3
|
3084 |
|
|
P128.w[1] = C3.w[1];
|
3085 |
|
|
P128.w[0] = C3.w[0];
|
3086 |
|
|
C3.w[1] = C4.w[1];
|
3087 |
|
|
C3.w[0] = C4.w[0];
|
3088 |
|
|
C4.w[1] = P128.w[1];
|
3089 |
|
|
C4.w[0] = P128.w[0];
|
3090 |
|
|
ind = q3;
|
3091 |
|
|
q3 = q4;
|
3092 |
|
|
q4 = ind;
|
3093 |
|
|
ind = e3;
|
3094 |
|
|
e3 = e4;
|
3095 |
|
|
e4 = ind;
|
3096 |
|
|
tmp_sign = z_sign;
|
3097 |
|
|
z_sign = p_sign;
|
3098 |
|
|
p_sign = tmp_sign;
|
3099 |
|
|
tmp.ui64 = z_exp;
|
3100 |
|
|
z_exp = p_exp;
|
3101 |
|
|
p_exp = tmp.ui64;
|
3102 |
|
|
goto delta_ge_zero;
|
3103 |
|
|
|
3104 |
|
|
} else if ((p34 <= delta && delta < q4 && q4 < delta + q3) || // Case (11)
|
3105 |
|
|
(delta < p34 && p34 < q4 && q4 < delta + q3)) { // Case (12)
|
3106 |
|
|
|
3107 |
|
|
// round C3 to nearest to q3 - x0 digits, where x0 = e4 - e3,
|
3108 |
|
|
// 1 <= x0 <= q3 - 1 <= p34 - 1
|
3109 |
|
|
x0 = e4 - e3; // or x0 = delta + q3 - q4
|
3110 |
|
|
if (q3 <= 18) { // 2 <= q3 <= 18
|
3111 |
|
|
round64_2_18 (q3, x0, C3.w[0], &R64, &incr_exp,
|
3112 |
|
|
&is_midpoint_lt_even, &is_midpoint_gt_even,
|
3113 |
|
|
&is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
|
3114 |
|
|
// C3.w[1] = 0;
|
3115 |
|
|
C3.w[0] = R64;
|
3116 |
|
|
} else if (q3 <= 38) {
|
3117 |
|
|
round128_19_38 (q3, x0, C3, &R128, &incr_exp,
|
3118 |
|
|
&is_midpoint_lt_even, &is_midpoint_gt_even,
|
3119 |
|
|
&is_inexact_lt_midpoint,
|
3120 |
|
|
&is_inexact_gt_midpoint);
|
3121 |
|
|
C3.w[1] = R128.w[1];
|
3122 |
|
|
C3.w[0] = R128.w[0];
|
3123 |
|
|
}
|
3124 |
|
|
// the rounded result has q3 - x0 digits
|
3125 |
|
|
// we want the exponent to be e4, so if incr_exp = 1 then
|
3126 |
|
|
// multiply the rounded result by 10 - it will still fit in 113 bits
|
3127 |
|
|
if (incr_exp) {
|
3128 |
|
|
// 64 x 128 -> 128
|
3129 |
|
|
P128.w[1] = C3.w[1];
|
3130 |
|
|
P128.w[0] = C3.w[0];
|
3131 |
|
|
__mul_64x128_to_128 (C3, ten2k64[1], P128);
|
3132 |
|
|
}
|
3133 |
|
|
e3 = e3 + x0; // this is e4
|
3134 |
|
|
// now add/subtract the 256-bit C4 and the new (and shorter) 128-bit C3;
|
3135 |
|
|
// the result will have the sign of x * y; the exponent is e4
|
3136 |
|
|
R256.w[3] = 0;
|
3137 |
|
|
R256.w[2] = 0;
|
3138 |
|
|
R256.w[1] = C3.w[1];
|
3139 |
|
|
R256.w[0] = C3.w[0];
|
3140 |
|
|
if (p_sign == z_sign) { // R256 = C4 + R256
|
3141 |
|
|
add256 (C4, R256, &R256);
|
3142 |
|
|
} else { // if (p_sign != z_sign) { // R256 = C4 - R256
|
3143 |
|
|
sub256 (C4, R256, &R256); // the result cannot be pure zero
|
3144 |
|
|
// because the result has opposite sign to that of R256 which was
|
3145 |
|
|
// rounded, need to change the rounding indicators
|
3146 |
|
|
lsb = C4.w[0] & 0x01;
|
3147 |
|
|
if (is_inexact_lt_midpoint) {
|
3148 |
|
|
is_inexact_lt_midpoint = 0;
|
3149 |
|
|
is_inexact_gt_midpoint = 1;
|
3150 |
|
|
} else if (is_inexact_gt_midpoint) {
|
3151 |
|
|
is_inexact_gt_midpoint = 0;
|
3152 |
|
|
is_inexact_lt_midpoint = 1;
|
3153 |
|
|
} else if (lsb == 0) {
|
3154 |
|
|
if (is_midpoint_lt_even) {
|
3155 |
|
|
is_midpoint_lt_even = 0;
|
3156 |
|
|
is_midpoint_gt_even = 1;
|
3157 |
|
|
} else if (is_midpoint_gt_even) {
|
3158 |
|
|
is_midpoint_gt_even = 0;
|
3159 |
|
|
is_midpoint_lt_even = 1;
|
3160 |
|
|
} else {
|
3161 |
|
|
;
|
3162 |
|
|
}
|
3163 |
|
|
} else if (lsb == 1) {
|
3164 |
|
|
if (is_midpoint_lt_even) {
|
3165 |
|
|
// res = res + 1
|
3166 |
|
|
R256.w[0]++;
|
3167 |
|
|
if (R256.w[0] == 0x0) {
|
3168 |
|
|
R256.w[1]++;
|
3169 |
|
|
if (R256.w[1] == 0x0) {
|
3170 |
|
|
R256.w[2]++;
|
3171 |
|
|
if (R256.w[2] == 0x0) {
|
3172 |
|
|
R256.w[3]++;
|
3173 |
|
|
}
|
3174 |
|
|
}
|
3175 |
|
|
}
|
3176 |
|
|
// no check for rounding overflow - R256 was a difference
|
3177 |
|
|
} else if (is_midpoint_gt_even) {
|
3178 |
|
|
// res = res - 1
|
3179 |
|
|
R256.w[0]--;
|
3180 |
|
|
if (R256.w[0] == 0xffffffffffffffffull) {
|
3181 |
|
|
R256.w[1]--;
|
3182 |
|
|
if (R256.w[1] == 0xffffffffffffffffull) {
|
3183 |
|
|
R256.w[2]--;
|
3184 |
|
|
if (R256.w[2] == 0xffffffffffffffffull) {
|
3185 |
|
|
R256.w[3]--;
|
3186 |
|
|
}
|
3187 |
|
|
}
|
3188 |
|
|
}
|
3189 |
|
|
} else {
|
3190 |
|
|
;
|
3191 |
|
|
}
|
3192 |
|
|
} else {
|
3193 |
|
|
;
|
3194 |
|
|
}
|
3195 |
|
|
}
|
3196 |
|
|
// determine the number of decimal digits in R256
|
3197 |
|
|
ind = nr_digits256 (R256); // ind >= p34
|
3198 |
|
|
// if R256 is sum, then ind > p34; if R256 is a difference, then
|
3199 |
|
|
// ind >= p34; this means that we can calculate the result rounded to
|
3200 |
|
|
// the destination precision, with unbounded exponent, starting from R256
|
3201 |
|
|
// and using the indicators from the rounding of C3 to avoid a double
|
3202 |
|
|
// rounding error
|
3203 |
|
|
|
3204 |
|
|
if (ind < p34) {
|
3205 |
|
|
;
|
3206 |
|
|
} else if (ind == p34) {
|
3207 |
|
|
// the result rounded to the destination precision with
|
3208 |
|
|
// unbounded exponent
|
3209 |
|
|
// is (-1)^p_sign * R256 * 10^e4
|
3210 |
|
|
res.w[1] = R256.w[1];
|
3211 |
|
|
res.w[0] = R256.w[0];
|
3212 |
|
|
} else { // if (ind > p34)
|
3213 |
|
|
// if more than P digits, round to nearest to P digits
|
3214 |
|
|
// round R256 to p34 digits
|
3215 |
|
|
x0 = ind - p34; // 1 <= x0 <= 34 as 35 <= ind <= 68
|
3216 |
|
|
// save C3 rounding indicators to help avoid double rounding error
|
3217 |
|
|
is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
|
3218 |
|
|
is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
|
3219 |
|
|
is_midpoint_lt_even0 = is_midpoint_lt_even;
|
3220 |
|
|
is_midpoint_gt_even0 = is_midpoint_gt_even;
|
3221 |
|
|
// initialize rounding indicators
|
3222 |
|
|
is_inexact_lt_midpoint = 0;
|
3223 |
|
|
is_inexact_gt_midpoint = 0;
|
3224 |
|
|
is_midpoint_lt_even = 0;
|
3225 |
|
|
is_midpoint_gt_even = 0;
|
3226 |
|
|
// round to p34 digits; the result fits in 113 bits
|
3227 |
|
|
if (ind <= 38) {
|
3228 |
|
|
P128.w[1] = R256.w[1];
|
3229 |
|
|
P128.w[0] = R256.w[0];
|
3230 |
|
|
round128_19_38 (ind, x0, P128, &R128, &incr_exp,
|
3231 |
|
|
&is_midpoint_lt_even, &is_midpoint_gt_even,
|
3232 |
|
|
&is_inexact_lt_midpoint,
|
3233 |
|
|
&is_inexact_gt_midpoint);
|
3234 |
|
|
} else if (ind <= 57) {
|
3235 |
|
|
P192.w[2] = R256.w[2];
|
3236 |
|
|
P192.w[1] = R256.w[1];
|
3237 |
|
|
P192.w[0] = R256.w[0];
|
3238 |
|
|
round192_39_57 (ind, x0, P192, &R192, &incr_exp,
|
3239 |
|
|
&is_midpoint_lt_even, &is_midpoint_gt_even,
|
3240 |
|
|
&is_inexact_lt_midpoint,
|
3241 |
|
|
&is_inexact_gt_midpoint);
|
3242 |
|
|
R128.w[1] = R192.w[1];
|
3243 |
|
|
R128.w[0] = R192.w[0];
|
3244 |
|
|
} else { // if (ind <= 68)
|
3245 |
|
|
round256_58_76 (ind, x0, R256, &R256, &incr_exp,
|
3246 |
|
|
&is_midpoint_lt_even, &is_midpoint_gt_even,
|
3247 |
|
|
&is_inexact_lt_midpoint,
|
3248 |
|
|
&is_inexact_gt_midpoint);
|
3249 |
|
|
R128.w[1] = R256.w[1];
|
3250 |
|
|
R128.w[0] = R256.w[0];
|
3251 |
|
|
}
|
3252 |
|
|
// the rounded result has p34 = 34 digits
|
3253 |
|
|
e4 = e4 + x0 + incr_exp;
|
3254 |
|
|
|
3255 |
|
|
res.w[1] = R128.w[1];
|
3256 |
|
|
res.w[0] = R128.w[0];
|
3257 |
|
|
|
3258 |
|
|
// avoid a double rounding error
|
3259 |
|
|
if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
|
3260 |
|
|
is_midpoint_lt_even) { // double rounding error upward
|
3261 |
|
|
// res = res - 1
|
3262 |
|
|
res.w[0]--;
|
3263 |
|
|
if (res.w[0] == 0xffffffffffffffffull)
|
3264 |
|
|
res.w[1]--;
|
3265 |
|
|
is_midpoint_lt_even = 0;
|
3266 |
|
|
is_inexact_lt_midpoint = 1;
|
3267 |
|
|
// Note: a double rounding error upward is not possible; for this
|
3268 |
|
|
// the result after the first rounding would have to be 99...95
|
3269 |
|
|
// (35 digits in all), possibly followed by a number of zeros; this
|
3270 |
|
|
// not possible in Cases (2)-(6) or (15)-(17) which may get here
|
3271 |
|
|
// if this is 10^33 - 1 make it 10^34 - 1 and decrement exponent
|
3272 |
|
|
if (res.w[1] == 0x0000314dc6448d93ull &&
|
3273 |
|
|
res.w[0] == 0x38c15b09ffffffffull) { // 10^33 - 1
|
3274 |
|
|
res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1
|
3275 |
|
|
res.w[0] = 0x378d8e63ffffffffull;
|
3276 |
|
|
e4--;
|
3277 |
|
|
}
|
3278 |
|
|
} else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
|
3279 |
|
|
is_midpoint_gt_even) { // double rounding error downward
|
3280 |
|
|
// res = res + 1
|
3281 |
|
|
res.w[0]++;
|
3282 |
|
|
if (res.w[0] == 0)
|
3283 |
|
|
res.w[1]++;
|
3284 |
|
|
is_midpoint_gt_even = 0;
|
3285 |
|
|
is_inexact_gt_midpoint = 1;
|
3286 |
|
|
} else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
|
3287 |
|
|
!is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
|
3288 |
|
|
// if this second rounding was exact the result may still be
|
3289 |
|
|
// inexact because of the first rounding
|
3290 |
|
|
if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
|
3291 |
|
|
is_inexact_gt_midpoint = 1;
|
3292 |
|
|
}
|
3293 |
|
|
if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
|
3294 |
|
|
is_inexact_lt_midpoint = 1;
|
3295 |
|
|
}
|
3296 |
|
|
} else if (is_midpoint_gt_even &&
|
3297 |
|
|
(is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
|
3298 |
|
|
// pulled up to a midpoint
|
3299 |
|
|
is_inexact_lt_midpoint = 1;
|
3300 |
|
|
is_inexact_gt_midpoint = 0;
|
3301 |
|
|
is_midpoint_lt_even = 0;
|
3302 |
|
|
is_midpoint_gt_even = 0;
|
3303 |
|
|
} else if (is_midpoint_lt_even &&
|
3304 |
|
|
(is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
|
3305 |
|
|
// pulled down to a midpoint
|
3306 |
|
|
is_inexact_lt_midpoint = 0;
|
3307 |
|
|
is_inexact_gt_midpoint = 1;
|
3308 |
|
|
is_midpoint_lt_even = 0;
|
3309 |
|
|
is_midpoint_gt_even = 0;
|
3310 |
|
|
} else {
|
3311 |
|
|
;
|
3312 |
|
|
}
|
3313 |
|
|
}
|
3314 |
|
|
|
3315 |
|
|
// determine tininess
|
3316 |
|
|
if (rnd_mode == ROUNDING_TO_NEAREST) {
|
3317 |
|
|
if (e4 < expmin) {
|
3318 |
|
|
is_tiny = 1; // for other rounding modes apply correction
|
3319 |
|
|
}
|
3320 |
|
|
} else {
|
3321 |
|
|
// for RM, RP, RZ, RA apply correction in order to determine tininess
|
3322 |
|
|
// but do not save the result; apply the correction to
|
3323 |
|
|
// (-1)^p_sign * res * 10^0
|
3324 |
|
|
P128.w[1] = p_sign | 0x3040000000000000ull | res.w[1];
|
3325 |
|
|
P128.w[0] = res.w[0];
|
3326 |
|
|
rounding_correction (rnd_mode,
|
3327 |
|
|
is_inexact_lt_midpoint,
|
3328 |
|
|
is_inexact_gt_midpoint,
|
3329 |
|
|
is_midpoint_lt_even, is_midpoint_gt_even,
|
3330 |
|
|
0, &P128, pfpsf);
|
3331 |
|
|
scale = ((P128.w[1] & MASK_EXP) >> 49) - 6176; // -1, 0, or +1
|
3332 |
|
|
// the number of digits in the significand is p34 = 34
|
3333 |
|
|
if (e4 + scale < expmin) {
|
3334 |
|
|
is_tiny = 1;
|
3335 |
|
|
}
|
3336 |
|
|
}
|
3337 |
|
|
|
3338 |
|
|
// the result rounded to the destination precision with unbounded exponent
|
3339 |
|
|
// is (-1)^p_sign * res * 10^e4
|
3340 |
|
|
res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1]; // RN
|
3341 |
|
|
// res.w[0] unchanged;
|
3342 |
|
|
// Note: res is correct only if expmin <= e4 <= expmax
|
3343 |
|
|
ind = p34; // the number of decimal digits in the signifcand of res
|
3344 |
|
|
|
3345 |
|
|
// at this point we have the result rounded with unbounded exponent in
|
3346 |
|
|
// res and we know its tininess:
|
3347 |
|
|
// res = (-1)^p_sign * significand * 10^e4,
|
3348 |
|
|
// where q (significand) = ind = p34
|
3349 |
|
|
// Note: res is correct only if expmin <= e4 <= expmax
|
3350 |
|
|
|
3351 |
|
|
// check for overflow if RN
|
3352 |
|
|
if (rnd_mode == ROUNDING_TO_NEAREST
|
3353 |
|
|
&& (ind + e4) > (p34 + expmax)) {
|
3354 |
|
|
res.w[1] = p_sign | 0x7800000000000000ull;
|
3355 |
|
|
res.w[0] = 0x0000000000000000ull;
|
3356 |
|
|
*pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
|
3357 |
|
|
*ptr_is_midpoint_lt_even = is_midpoint_lt_even;
|
3358 |
|
|
*ptr_is_midpoint_gt_even = is_midpoint_gt_even;
|
3359 |
|
|
*ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
|
3360 |
|
|
*ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
|
3361 |
|
|
BID_SWAP128 (res);
|
3362 |
|
|
BID_RETURN (res)
|
3363 |
|
|
} // else not overflow or not RN, so continue
|
3364 |
|
|
|
3365 |
|
|
// from this point on this is similar to the last part of the computation
|
3366 |
|
|
// for Cases (15), (16), (17)
|
3367 |
|
|
|
3368 |
|
|
// if (e4 >= expmin) we have the result rounded with bounded exponent
|
3369 |
|
|
if (e4 < expmin) {
|
3370 |
|
|
x0 = expmin - e4; // x0 >= 1; the number of digits to chop off of res
|
3371 |
|
|
// where the result rounded [at most] once is
|
3372 |
|
|
// (-1)^p_sign * significand_res * 10^e4
|
3373 |
|
|
|
3374 |
|
|
// avoid double rounding error
|
3375 |
|
|
is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
|
3376 |
|
|
is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
|
3377 |
|
|
is_midpoint_lt_even0 = is_midpoint_lt_even;
|
3378 |
|
|
is_midpoint_gt_even0 = is_midpoint_gt_even;
|
3379 |
|
|
is_inexact_lt_midpoint = 0;
|
3380 |
|
|
is_inexact_gt_midpoint = 0;
|
3381 |
|
|
is_midpoint_lt_even = 0;
|
3382 |
|
|
is_midpoint_gt_even = 0;
|
3383 |
|
|
|
3384 |
|
|
if (x0 > ind) {
|
3385 |
|
|
// nothing is left of res when moving the decimal point left x0 digits
|
3386 |
|
|
is_inexact_lt_midpoint = 1;
|
3387 |
|
|
res.w[1] = p_sign | 0x0000000000000000ull;
|
3388 |
|
|
res.w[0] = 0x0000000000000000ull;
|
3389 |
|
|
e4 = expmin;
|
3390 |
|
|
} else if (x0 == ind) { // 1 <= x0 = ind <= p34 = 34
|
3391 |
|
|
// this is <, =, or > 1/2 ulp
|
3392 |
|
|
// compare the ind-digit value in the significand of res with
|
3393 |
|
|
// 1/2 ulp = 5*10^(ind-1), i.e. determine whether it is
|
3394 |
|
|
// less than, equal to, or greater than 1/2 ulp (significand of res)
|
3395 |
|
|
R128.w[1] = res.w[1] & MASK_COEFF;
|
3396 |
|
|
R128.w[0] = res.w[0];
|
3397 |
|
|
if (ind <= 19) {
|
3398 |
|
|
if (R128.w[0] < midpoint64[ind - 1]) { // < 1/2 ulp
|
3399 |
|
|
lt_half_ulp = 1;
|
3400 |
|
|
is_inexact_lt_midpoint = 1;
|
3401 |
|
|
} else if (R128.w[0] == midpoint64[ind - 1]) { // = 1/2 ulp
|
3402 |
|
|
eq_half_ulp = 1;
|
3403 |
|
|
is_midpoint_gt_even = 1;
|
3404 |
|
|
} else { // > 1/2 ulp
|
3405 |
|
|
gt_half_ulp = 1;
|
3406 |
|
|
is_inexact_gt_midpoint = 1;
|
3407 |
|
|
}
|
3408 |
|
|
} else { // if (ind <= 38)
|
3409 |
|
|
if (R128.w[1] < midpoint128[ind - 20].w[1] ||
|
3410 |
|
|
(R128.w[1] == midpoint128[ind - 20].w[1] &&
|
3411 |
|
|
R128.w[0] < midpoint128[ind - 20].w[0])) { // < 1/2 ulp
|
3412 |
|
|
lt_half_ulp = 1;
|
3413 |
|
|
is_inexact_lt_midpoint = 1;
|
3414 |
|
|
} else if (R128.w[1] == midpoint128[ind - 20].w[1] &&
|
3415 |
|
|
R128.w[0] == midpoint128[ind - 20].w[0]) { // = 1/2 ulp
|
3416 |
|
|
eq_half_ulp = 1;
|
3417 |
|
|
is_midpoint_gt_even = 1;
|
3418 |
|
|
} else { // > 1/2 ulp
|
3419 |
|
|
gt_half_ulp = 1;
|
3420 |
|
|
is_inexact_gt_midpoint = 1;
|
3421 |
|
|
}
|
3422 |
|
|
}
|
3423 |
|
|
if (lt_half_ulp || eq_half_ulp) {
|
3424 |
|
|
// res = +0.0 * 10^expmin
|
3425 |
|
|
res.w[1] = 0x0000000000000000ull;
|
3426 |
|
|
res.w[0] = 0x0000000000000000ull;
|
3427 |
|
|
} else { // if (gt_half_ulp)
|
3428 |
|
|
// res = +1 * 10^expmin
|
3429 |
|
|
res.w[1] = 0x0000000000000000ull;
|
3430 |
|
|
res.w[0] = 0x0000000000000001ull;
|
3431 |
|
|
}
|
3432 |
|
|
res.w[1] = p_sign | res.w[1];
|
3433 |
|
|
e4 = expmin;
|
3434 |
|
|
} else { // if (1 <= x0 <= ind - 1 <= 33)
|
3435 |
|
|
// round the ind-digit result to ind - x0 digits
|
3436 |
|
|
|
3437 |
|
|
if (ind <= 18) { // 2 <= ind <= 18
|
3438 |
|
|
round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp,
|
3439 |
|
|
&is_midpoint_lt_even, &is_midpoint_gt_even,
|
3440 |
|
|
&is_inexact_lt_midpoint,
|
3441 |
|
|
&is_inexact_gt_midpoint);
|
3442 |
|
|
res.w[1] = 0x0;
|
3443 |
|
|
res.w[0] = R64;
|
3444 |
|
|
} else if (ind <= 38) {
|
3445 |
|
|
P128.w[1] = res.w[1] & MASK_COEFF;
|
3446 |
|
|
P128.w[0] = res.w[0];
|
3447 |
|
|
round128_19_38 (ind, x0, P128, &res, &incr_exp,
|
3448 |
|
|
&is_midpoint_lt_even, &is_midpoint_gt_even,
|
3449 |
|
|
&is_inexact_lt_midpoint,
|
3450 |
|
|
&is_inexact_gt_midpoint);
|
3451 |
|
|
}
|
3452 |
|
|
e4 = e4 + x0; // expmin
|
3453 |
|
|
// we want the exponent to be expmin, so if incr_exp = 1 then
|
3454 |
|
|
// multiply the rounded result by 10 - it will still fit in 113 bits
|
3455 |
|
|
if (incr_exp) {
|
3456 |
|
|
// 64 x 128 -> 128
|
3457 |
|
|
P128.w[1] = res.w[1] & MASK_COEFF;
|
3458 |
|
|
P128.w[0] = res.w[0];
|
3459 |
|
|
__mul_64x128_to_128 (res, ten2k64[1], P128);
|
3460 |
|
|
}
|
3461 |
|
|
res.w[1] =
|
3462 |
|
|
p_sign | ((UINT64) (e4 + 6176) << 49) | (res.
|
3463 |
|
|
w[1] & MASK_COEFF);
|
3464 |
|
|
// avoid a double rounding error
|
3465 |
|
|
if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
|
3466 |
|
|
is_midpoint_lt_even) { // double rounding error upward
|
3467 |
|
|
// res = res - 1
|
3468 |
|
|
res.w[0]--;
|
3469 |
|
|
if (res.w[0] == 0xffffffffffffffffull)
|
3470 |
|
|
res.w[1]--;
|
3471 |
|
|
// Note: a double rounding error upward is not possible; for this
|
3472 |
|
|
// the result after the first rounding would have to be 99...95
|
3473 |
|
|
// (35 digits in all), possibly followed by a number of zeros; this
|
3474 |
|
|
// not possible in this underflow case
|
3475 |
|
|
is_midpoint_lt_even = 0;
|
3476 |
|
|
is_inexact_lt_midpoint = 1;
|
3477 |
|
|
} else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
|
3478 |
|
|
is_midpoint_gt_even) { // double rounding error downward
|
3479 |
|
|
// res = res + 1
|
3480 |
|
|
res.w[0]++;
|
3481 |
|
|
if (res.w[0] == 0)
|
3482 |
|
|
res.w[1]++;
|
3483 |
|
|
is_midpoint_gt_even = 0;
|
3484 |
|
|
is_inexact_gt_midpoint = 1;
|
3485 |
|
|
} else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
|
3486 |
|
|
!is_inexact_lt_midpoint
|
3487 |
|
|
&& !is_inexact_gt_midpoint) {
|
3488 |
|
|
// if this second rounding was exact the result may still be
|
3489 |
|
|
// inexact because of the first rounding
|
3490 |
|
|
if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
|
3491 |
|
|
is_inexact_gt_midpoint = 1;
|
3492 |
|
|
}
|
3493 |
|
|
if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
|
3494 |
|
|
is_inexact_lt_midpoint = 1;
|
3495 |
|
|
}
|
3496 |
|
|
} else if (is_midpoint_gt_even &&
|
3497 |
|
|
(is_inexact_gt_midpoint0
|
3498 |
|
|
|| is_midpoint_lt_even0)) {
|
3499 |
|
|
// pulled up to a midpoint
|
3500 |
|
|
is_inexact_lt_midpoint = 1;
|
3501 |
|
|
is_inexact_gt_midpoint = 0;
|
3502 |
|
|
is_midpoint_lt_even = 0;
|
3503 |
|
|
is_midpoint_gt_even = 0;
|
3504 |
|
|
} else if (is_midpoint_lt_even &&
|
3505 |
|
|
(is_inexact_lt_midpoint0
|
3506 |
|
|
|| is_midpoint_gt_even0)) {
|
3507 |
|
|
// pulled down to a midpoint
|
3508 |
|
|
is_inexact_lt_midpoint = 0;
|
3509 |
|
|
is_inexact_gt_midpoint = 1;
|
3510 |
|
|
is_midpoint_lt_even = 0;
|
3511 |
|
|
is_midpoint_gt_even = 0;
|
3512 |
|
|
} else {
|
3513 |
|
|
;
|
3514 |
|
|
}
|
3515 |
|
|
}
|
3516 |
|
|
}
|
3517 |
|
|
// res contains the correct result
|
3518 |
|
|
// apply correction if not rounding to nearest
|
3519 |
|
|
if (rnd_mode != ROUNDING_TO_NEAREST) {
|
3520 |
|
|
rounding_correction (rnd_mode,
|
3521 |
|
|
is_inexact_lt_midpoint,
|
3522 |
|
|
is_inexact_gt_midpoint,
|
3523 |
|
|
is_midpoint_lt_even, is_midpoint_gt_even,
|
3524 |
|
|
e4, &res, pfpsf);
|
3525 |
|
|
}
|
3526 |
|
|
if (is_midpoint_lt_even || is_midpoint_gt_even ||
|
3527 |
|
|
is_inexact_lt_midpoint || is_inexact_gt_midpoint) {
|
3528 |
|
|
// set the inexact flag
|
3529 |
|
|
*pfpsf |= INEXACT_EXCEPTION;
|
3530 |
|
|
if (is_tiny)
|
3531 |
|
|
*pfpsf |= UNDERFLOW_EXCEPTION;
|
3532 |
|
|
}
|
3533 |
|
|
*ptr_is_midpoint_lt_even = is_midpoint_lt_even;
|
3534 |
|
|
*ptr_is_midpoint_gt_even = is_midpoint_gt_even;
|
3535 |
|
|
*ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
|
3536 |
|
|
*ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
|
3537 |
|
|
BID_SWAP128 (res);
|
3538 |
|
|
BID_RETURN (res)
|
3539 |
|
|
|
3540 |
|
|
} else if ((p34 <= delta && delta + q3 <= q4) || // Case (15)
|
3541 |
|
|
(delta < p34 && p34 < delta + q3 && delta + q3 <= q4) || //Case (16)
|
3542 |
|
|
(delta + q3 <= p34 && p34 < q4)) { // Case (17)
|
3543 |
|
|
|
3544 |
|
|
// calculate first the result rounded to the destination precision, with
|
3545 |
|
|
// unbounded exponent
|
3546 |
|
|
|
3547 |
|
|
add_and_round (q3, q4, e4, delta, p34, z_sign, p_sign, C3, C4,
|
3548 |
|
|
rnd_mode, &is_midpoint_lt_even,
|
3549 |
|
|
&is_midpoint_gt_even, &is_inexact_lt_midpoint,
|
3550 |
|
|
&is_inexact_gt_midpoint, pfpsf, &res);
|
3551 |
|
|
*ptr_is_midpoint_lt_even = is_midpoint_lt_even;
|
3552 |
|
|
*ptr_is_midpoint_gt_even = is_midpoint_gt_even;
|
3553 |
|
|
*ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
|
3554 |
|
|
*ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
|
3555 |
|
|
BID_SWAP128 (res);
|
3556 |
|
|
BID_RETURN (res)
|
3557 |
|
|
|
3558 |
|
|
} else {
|
3559 |
|
|
;
|
3560 |
|
|
}
|
3561 |
|
|
|
3562 |
|
|
} // end if delta < 0
|
3563 |
|
|
|
3564 |
|
|
*ptr_is_midpoint_lt_even = is_midpoint_lt_even;
|
3565 |
|
|
*ptr_is_midpoint_gt_even = is_midpoint_gt_even;
|
3566 |
|
|
*ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
|
3567 |
|
|
*ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
|
3568 |
|
|
BID_SWAP128 (res);
|
3569 |
|
|
BID_RETURN (res)
|
3570 |
|
|
|
3571 |
|
|
}
|
3572 |
|
|
|
3573 |
|
|
|
3574 |
|
|
#if DECIMAL_CALL_BY_REFERENCE
|
3575 |
|
|
void
|
3576 |
|
|
bid128_fma (UINT128 * pres, UINT128 * px, UINT128 * py, UINT128 * pz
|
3577 |
|
|
_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
|
3578 |
|
|
_EXC_INFO_PARAM) {
|
3579 |
|
|
UINT128 x = *px, y = *py, z = *pz;
|
3580 |
|
|
#if !DECIMAL_GLOBAL_ROUNDING
|
3581 |
|
|
unsigned int rnd_mode = *prnd_mode;
|
3582 |
|
|
#endif
|
3583 |
|
|
#else
|
3584 |
|
|
UINT128
|
3585 |
|
|
bid128_fma (UINT128 x, UINT128 y, UINT128 z
|
3586 |
|
|
_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
|
3587 |
|
|
_EXC_INFO_PARAM) {
|
3588 |
|
|
#endif
|
3589 |
|
|
int is_midpoint_lt_even, is_midpoint_gt_even,
|
3590 |
|
|
is_inexact_lt_midpoint, is_inexact_gt_midpoint;
|
3591 |
|
|
UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
|
3592 |
|
|
|
3593 |
|
|
#if DECIMAL_CALL_BY_REFERENCE
|
3594 |
|
|
bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
|
3595 |
|
|
&is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
|
3596 |
|
|
&res, &x, &y, &z
|
3597 |
|
|
_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
|
3598 |
|
|
_EXC_INFO_ARG);
|
3599 |
|
|
#else
|
3600 |
|
|
res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
|
3601 |
|
|
&is_inexact_lt_midpoint,
|
3602 |
|
|
&is_inexact_gt_midpoint, x, y,
|
3603 |
|
|
z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
|
3604 |
|
|
_EXC_INFO_ARG);
|
3605 |
|
|
#endif
|
3606 |
|
|
BID_RETURN (res);
|
3607 |
|
|
}
|
3608 |
|
|
|
3609 |
|
|
|
3610 |
|
|
#if DECIMAL_CALL_BY_REFERENCE
|
3611 |
|
|
void
|
3612 |
|
|
bid128ddd_fma (UINT128 * pres, UINT64 * px, UINT64 * py, UINT64 * pz
|
3613 |
|
|
_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
|
3614 |
|
|
_EXC_INFO_PARAM) {
|
3615 |
|
|
UINT64 x = *px, y = *py, z = *pz;
|
3616 |
|
|
#if !DECIMAL_GLOBAL_ROUNDING
|
3617 |
|
|
unsigned int rnd_mode = *prnd_mode;
|
3618 |
|
|
#endif
|
3619 |
|
|
#else
|
3620 |
|
|
UINT128
|
3621 |
|
|
bid128ddd_fma (UINT64 x, UINT64 y, UINT64 z
|
3622 |
|
|
_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
|
3623 |
|
|
_EXC_INFO_PARAM) {
|
3624 |
|
|
#endif
|
3625 |
|
|
int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
|
3626 |
|
|
is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
|
3627 |
|
|
UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
|
3628 |
|
|
UINT128 x1, y1, z1;
|
3629 |
|
|
|
3630 |
|
|
#if DECIMAL_CALL_BY_REFERENCE
|
3631 |
|
|
bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
|
3632 |
|
|
bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
|
3633 |
|
|
bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
|
3634 |
|
|
bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
|
3635 |
|
|
&is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
|
3636 |
|
|
&res, &x1, &y1, &z1
|
3637 |
|
|
_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
|
3638 |
|
|
_EXC_INFO_ARG);
|
3639 |
|
|
#else
|
3640 |
|
|
x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
|
3641 |
|
|
y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
|
3642 |
|
|
z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
|
3643 |
|
|
res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
|
3644 |
|
|
&is_inexact_lt_midpoint,
|
3645 |
|
|
&is_inexact_gt_midpoint, x1, y1,
|
3646 |
|
|
z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
|
3647 |
|
|
_EXC_INFO_ARG);
|
3648 |
|
|
#endif
|
3649 |
|
|
BID_RETURN (res);
|
3650 |
|
|
}
|
3651 |
|
|
|
3652 |
|
|
|
3653 |
|
|
#if DECIMAL_CALL_BY_REFERENCE
|
3654 |
|
|
void
|
3655 |
|
|
bid128ddq_fma (UINT128 * pres, UINT64 * px, UINT64 * py, UINT128 * pz
|
3656 |
|
|
_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
|
3657 |
|
|
_EXC_INFO_PARAM) {
|
3658 |
|
|
UINT64 x = *px, y = *py;
|
3659 |
|
|
UINT128 z = *pz;
|
3660 |
|
|
#if !DECIMAL_GLOBAL_ROUNDING
|
3661 |
|
|
unsigned int rnd_mode = *prnd_mode;
|
3662 |
|
|
#endif
|
3663 |
|
|
#else
|
3664 |
|
|
UINT128
|
3665 |
|
|
bid128ddq_fma (UINT64 x, UINT64 y, UINT128 z
|
3666 |
|
|
_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
|
3667 |
|
|
_EXC_INFO_PARAM) {
|
3668 |
|
|
#endif
|
3669 |
|
|
int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
|
3670 |
|
|
is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
|
3671 |
|
|
UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
|
3672 |
|
|
UINT128 x1, y1;
|
3673 |
|
|
|
3674 |
|
|
#if DECIMAL_CALL_BY_REFERENCE
|
3675 |
|
|
bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
|
3676 |
|
|
bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
|
3677 |
|
|
bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
|
3678 |
|
|
&is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
|
3679 |
|
|
&res, &x1, &y1, &z
|
3680 |
|
|
_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
|
3681 |
|
|
_EXC_INFO_ARG);
|
3682 |
|
|
#else
|
3683 |
|
|
x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
|
3684 |
|
|
y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
|
3685 |
|
|
res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
|
3686 |
|
|
&is_inexact_lt_midpoint,
|
3687 |
|
|
&is_inexact_gt_midpoint, x1, y1,
|
3688 |
|
|
z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
|
3689 |
|
|
_EXC_INFO_ARG);
|
3690 |
|
|
#endif
|
3691 |
|
|
BID_RETURN (res);
|
3692 |
|
|
}
|
3693 |
|
|
|
3694 |
|
|
|
3695 |
|
|
#if DECIMAL_CALL_BY_REFERENCE
|
3696 |
|
|
void
|
3697 |
|
|
bid128dqd_fma (UINT128 * pres, UINT64 * px, UINT128 * py, UINT64 * pz
|
3698 |
|
|
_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
|
3699 |
|
|
_EXC_INFO_PARAM) {
|
3700 |
|
|
UINT64 x = *px, z = *pz;
|
3701 |
|
|
#if !DECIMAL_GLOBAL_ROUNDING
|
3702 |
|
|
unsigned int rnd_mode = *prnd_mode;
|
3703 |
|
|
#endif
|
3704 |
|
|
#else
|
3705 |
|
|
UINT128
|
3706 |
|
|
bid128dqd_fma (UINT64 x, UINT128 y, UINT64 z
|
3707 |
|
|
_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
|
3708 |
|
|
_EXC_INFO_PARAM) {
|
3709 |
|
|
#endif
|
3710 |
|
|
int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
|
3711 |
|
|
is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
|
3712 |
|
|
UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
|
3713 |
|
|
UINT128 x1, z1;
|
3714 |
|
|
|
3715 |
|
|
#if DECIMAL_CALL_BY_REFERENCE
|
3716 |
|
|
bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
|
3717 |
|
|
bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
|
3718 |
|
|
bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
|
3719 |
|
|
&is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
|
3720 |
|
|
&res, &x1, py, &z1
|
3721 |
|
|
_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
|
3722 |
|
|
_EXC_INFO_ARG);
|
3723 |
|
|
#else
|
3724 |
|
|
x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
|
3725 |
|
|
z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
|
3726 |
|
|
res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
|
3727 |
|
|
&is_inexact_lt_midpoint,
|
3728 |
|
|
&is_inexact_gt_midpoint, x1, y,
|
3729 |
|
|
z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
|
3730 |
|
|
_EXC_INFO_ARG);
|
3731 |
|
|
#endif
|
3732 |
|
|
BID_RETURN (res);
|
3733 |
|
|
}
|
3734 |
|
|
|
3735 |
|
|
|
3736 |
|
|
#if DECIMAL_CALL_BY_REFERENCE
|
3737 |
|
|
void
|
3738 |
|
|
bid128dqq_fma (UINT128 * pres, UINT64 * px, UINT128 * py, UINT128 * pz
|
3739 |
|
|
_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
|
3740 |
|
|
_EXC_INFO_PARAM) {
|
3741 |
|
|
UINT64 x = *px;
|
3742 |
|
|
#if !DECIMAL_GLOBAL_ROUNDING
|
3743 |
|
|
unsigned int rnd_mode = *prnd_mode;
|
3744 |
|
|
#endif
|
3745 |
|
|
#else
|
3746 |
|
|
UINT128
|
3747 |
|
|
bid128dqq_fma (UINT64 x, UINT128 y, UINT128 z
|
3748 |
|
|
_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
|
3749 |
|
|
_EXC_INFO_PARAM) {
|
3750 |
|
|
#endif
|
3751 |
|
|
int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
|
3752 |
|
|
is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
|
3753 |
|
|
UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
|
3754 |
|
|
UINT128 x1;
|
3755 |
|
|
|
3756 |
|
|
#if DECIMAL_CALL_BY_REFERENCE
|
3757 |
|
|
bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
|
3758 |
|
|
bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
|
3759 |
|
|
&is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
|
3760 |
|
|
&res, &x1, py, pz
|
3761 |
|
|
_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
|
3762 |
|
|
_EXC_INFO_ARG);
|
3763 |
|
|
#else
|
3764 |
|
|
x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
|
3765 |
|
|
res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
|
3766 |
|
|
&is_inexact_lt_midpoint,
|
3767 |
|
|
&is_inexact_gt_midpoint, x1, y,
|
3768 |
|
|
z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
|
3769 |
|
|
_EXC_INFO_ARG);
|
3770 |
|
|
#endif
|
3771 |
|
|
BID_RETURN (res);
|
3772 |
|
|
}
|
3773 |
|
|
|
3774 |
|
|
|
3775 |
|
|
#if DECIMAL_CALL_BY_REFERENCE
|
3776 |
|
|
void
|
3777 |
|
|
bid128qdd_fma (UINT128 * pres, UINT128 * px, UINT64 * py, UINT64 * pz
|
3778 |
|
|
_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
|
3779 |
|
|
_EXC_INFO_PARAM) {
|
3780 |
|
|
UINT64 y = *py, z = *pz;
|
3781 |
|
|
#if !DECIMAL_GLOBAL_ROUNDING
|
3782 |
|
|
unsigned int rnd_mode = *prnd_mode;
|
3783 |
|
|
#endif
|
3784 |
|
|
#else
|
3785 |
|
|
UINT128
|
3786 |
|
|
bid128qdd_fma (UINT128 x, UINT64 y, UINT64 z
|
3787 |
|
|
_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
|
3788 |
|
|
_EXC_INFO_PARAM) {
|
3789 |
|
|
#endif
|
3790 |
|
|
int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
|
3791 |
|
|
is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
|
3792 |
|
|
UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
|
3793 |
|
|
UINT128 y1, z1;
|
3794 |
|
|
|
3795 |
|
|
#if DECIMAL_CALL_BY_REFERENCE
|
3796 |
|
|
bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
|
3797 |
|
|
bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
|
3798 |
|
|
bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
|
3799 |
|
|
&is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
|
3800 |
|
|
&res, px, &y1, &z1
|
3801 |
|
|
_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
|
3802 |
|
|
_EXC_INFO_ARG);
|
3803 |
|
|
#else
|
3804 |
|
|
y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
|
3805 |
|
|
z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
|
3806 |
|
|
res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
|
3807 |
|
|
&is_inexact_lt_midpoint,
|
3808 |
|
|
&is_inexact_gt_midpoint, x, y1,
|
3809 |
|
|
z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
|
3810 |
|
|
_EXC_INFO_ARG);
|
3811 |
|
|
#endif
|
3812 |
|
|
BID_RETURN (res);
|
3813 |
|
|
}
|
3814 |
|
|
|
3815 |
|
|
|
3816 |
|
|
#if DECIMAL_CALL_BY_REFERENCE
|
3817 |
|
|
void
|
3818 |
|
|
bid128qdq_fma (UINT128 * pres, UINT128 * px, UINT64 * py, UINT128 * pz
|
3819 |
|
|
_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
|
3820 |
|
|
_EXC_INFO_PARAM) {
|
3821 |
|
|
UINT64 y = *py;
|
3822 |
|
|
#if !DECIMAL_GLOBAL_ROUNDING
|
3823 |
|
|
unsigned int rnd_mode = *prnd_mode;
|
3824 |
|
|
#endif
|
3825 |
|
|
#else
|
3826 |
|
|
UINT128
|
3827 |
|
|
bid128qdq_fma (UINT128 x, UINT64 y, UINT128 z
|
3828 |
|
|
_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
|
3829 |
|
|
_EXC_INFO_PARAM) {
|
3830 |
|
|
#endif
|
3831 |
|
|
int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
|
3832 |
|
|
is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
|
3833 |
|
|
UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
|
3834 |
|
|
UINT128 y1;
|
3835 |
|
|
|
3836 |
|
|
#if DECIMAL_CALL_BY_REFERENCE
|
3837 |
|
|
bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
|
3838 |
|
|
bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
|
3839 |
|
|
&is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
|
3840 |
|
|
&res, px, &y1, pz
|
3841 |
|
|
_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
|
3842 |
|
|
_EXC_INFO_ARG);
|
3843 |
|
|
#else
|
3844 |
|
|
y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
|
3845 |
|
|
res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
|
3846 |
|
|
&is_inexact_lt_midpoint,
|
3847 |
|
|
&is_inexact_gt_midpoint, x, y1,
|
3848 |
|
|
z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
|
3849 |
|
|
_EXC_INFO_ARG);
|
3850 |
|
|
#endif
|
3851 |
|
|
BID_RETURN (res);
|
3852 |
|
|
}
|
3853 |
|
|
|
3854 |
|
|
|
3855 |
|
|
#if DECIMAL_CALL_BY_REFERENCE
|
3856 |
|
|
void
|
3857 |
|
|
bid128qqd_fma (UINT128 * pres, UINT128 * px, UINT128 * py, UINT64 * pz
|
3858 |
|
|
_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
|
3859 |
|
|
_EXC_INFO_PARAM) {
|
3860 |
|
|
UINT64 z = *pz;
|
3861 |
|
|
#if !DECIMAL_GLOBAL_ROUNDING
|
3862 |
|
|
unsigned int rnd_mode = *prnd_mode;
|
3863 |
|
|
#endif
|
3864 |
|
|
#else
|
3865 |
|
|
UINT128
|
3866 |
|
|
bid128qqd_fma (UINT128 x, UINT128 y, UINT64 z
|
3867 |
|
|
_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
|
3868 |
|
|
_EXC_INFO_PARAM) {
|
3869 |
|
|
#endif
|
3870 |
|
|
int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
|
3871 |
|
|
is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
|
3872 |
|
|
UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
|
3873 |
|
|
UINT128 z1;
|
3874 |
|
|
|
3875 |
|
|
#if DECIMAL_CALL_BY_REFERENCE
|
3876 |
|
|
bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
|
3877 |
|
|
bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
|
3878 |
|
|
&is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
|
3879 |
|
|
&res, px, py, &z1
|
3880 |
|
|
_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
|
3881 |
|
|
_EXC_INFO_ARG);
|
3882 |
|
|
#else
|
3883 |
|
|
z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
|
3884 |
|
|
res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
|
3885 |
|
|
&is_inexact_lt_midpoint,
|
3886 |
|
|
&is_inexact_gt_midpoint, x, y,
|
3887 |
|
|
z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
|
3888 |
|
|
_EXC_INFO_ARG);
|
3889 |
|
|
#endif
|
3890 |
|
|
BID_RETURN (res);
|
3891 |
|
|
}
|
3892 |
|
|
|
3893 |
|
|
// Note: bid128qqq_fma is represented by bid128_fma
|
3894 |
|
|
|
3895 |
|
|
// Note: bid64ddd_fma is represented by bid64_fma
|
3896 |
|
|
|
3897 |
|
|
#if DECIMAL_CALL_BY_REFERENCE
|
3898 |
|
|
void
|
3899 |
|
|
bid64ddq_fma (UINT64 * pres, UINT64 * px, UINT64 * py, UINT128 * pz
|
3900 |
|
|
_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
|
3901 |
|
|
_EXC_INFO_PARAM) {
|
3902 |
|
|
UINT64 x = *px, y = *py;
|
3903 |
|
|
#if !DECIMAL_GLOBAL_ROUNDING
|
3904 |
|
|
unsigned int rnd_mode = *prnd_mode;
|
3905 |
|
|
#endif
|
3906 |
|
|
#else
|
3907 |
|
|
UINT64
|
3908 |
|
|
bid64ddq_fma (UINT64 x, UINT64 y, UINT128 z
|
3909 |
|
|
_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
|
3910 |
|
|
_EXC_INFO_PARAM) {
|
3911 |
|
|
#endif
|
3912 |
|
|
UINT64 res1 = 0xbaddbaddbaddbaddull;
|
3913 |
|
|
UINT128 x1, y1;
|
3914 |
|
|
|
3915 |
|
|
#if DECIMAL_CALL_BY_REFERENCE
|
3916 |
|
|
bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
|
3917 |
|
|
bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
|
3918 |
|
|
bid64qqq_fma (&res1, &x1, &y1, pz
|
3919 |
|
|
_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
|
3920 |
|
|
_EXC_INFO_ARG);
|
3921 |
|
|
#else
|
3922 |
|
|
x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
|
3923 |
|
|
y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
|
3924 |
|
|
res1 = bid64qqq_fma (x1, y1, z
|
3925 |
|
|
_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
|
3926 |
|
|
_EXC_INFO_ARG);
|
3927 |
|
|
#endif
|
3928 |
|
|
BID_RETURN (res1);
|
3929 |
|
|
}
|
3930 |
|
|
|
3931 |
|
|
|
3932 |
|
|
#if DECIMAL_CALL_BY_REFERENCE
|
3933 |
|
|
void
|
3934 |
|
|
bid64dqd_fma (UINT64 * pres, UINT64 * px, UINT128 * py, UINT64 * pz
|
3935 |
|
|
_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
|
3936 |
|
|
_EXC_INFO_PARAM) {
|
3937 |
|
|
UINT64 x = *px, z = *pz;
|
3938 |
|
|
#if !DECIMAL_GLOBAL_ROUNDING
|
3939 |
|
|
unsigned int rnd_mode = *prnd_mode;
|
3940 |
|
|
#endif
|
3941 |
|
|
#else
|
3942 |
|
|
UINT64
|
3943 |
|
|
bid64dqd_fma (UINT64 x, UINT128 y, UINT64 z
|
3944 |
|
|
_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
|
3945 |
|
|
_EXC_INFO_PARAM) {
|
3946 |
|
|
#endif
|
3947 |
|
|
UINT64 res1 = 0xbaddbaddbaddbaddull;
|
3948 |
|
|
UINT128 x1, z1;
|
3949 |
|
|
|
3950 |
|
|
#if DECIMAL_CALL_BY_REFERENCE
|
3951 |
|
|
bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
|
3952 |
|
|
bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
|
3953 |
|
|
bid64qqq_fma (&res1, &x1, py, &z1
|
3954 |
|
|
_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
|
3955 |
|
|
_EXC_INFO_ARG);
|
3956 |
|
|
#else
|
3957 |
|
|
x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
|
3958 |
|
|
z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
|
3959 |
|
|
res1 = bid64qqq_fma (x1, y, z1
|
3960 |
|
|
_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
|
3961 |
|
|
_EXC_INFO_ARG);
|
3962 |
|
|
#endif
|
3963 |
|
|
BID_RETURN (res1);
|
3964 |
|
|
}
|
3965 |
|
|
|
3966 |
|
|
|
3967 |
|
|
#if DECIMAL_CALL_BY_REFERENCE
|
3968 |
|
|
void
|
3969 |
|
|
bid64dqq_fma (UINT64 * pres, UINT64 * px, UINT128 * py, UINT128 * pz
|
3970 |
|
|
_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
|
3971 |
|
|
_EXC_INFO_PARAM) {
|
3972 |
|
|
UINT64 x = *px;
|
3973 |
|
|
#if !DECIMAL_GLOBAL_ROUNDING
|
3974 |
|
|
unsigned int rnd_mode = *prnd_mode;
|
3975 |
|
|
#endif
|
3976 |
|
|
#else
|
3977 |
|
|
UINT64
|
3978 |
|
|
bid64dqq_fma (UINT64 x, UINT128 y, UINT128 z
|
3979 |
|
|
_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
|
3980 |
|
|
_EXC_INFO_PARAM) {
|
3981 |
|
|
#endif
|
3982 |
|
|
UINT64 res1 = 0xbaddbaddbaddbaddull;
|
3983 |
|
|
UINT128 x1;
|
3984 |
|
|
|
3985 |
|
|
#if DECIMAL_CALL_BY_REFERENCE
|
3986 |
|
|
bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
|
3987 |
|
|
bid64qqq_fma (&res1, &x1, py, pz
|
3988 |
|
|
_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
|
3989 |
|
|
_EXC_INFO_ARG);
|
3990 |
|
|
#else
|
3991 |
|
|
x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
|
3992 |
|
|
res1 = bid64qqq_fma (x1, y, z
|
3993 |
|
|
_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
|
3994 |
|
|
_EXC_INFO_ARG);
|
3995 |
|
|
#endif
|
3996 |
|
|
BID_RETURN (res1);
|
3997 |
|
|
}
|
3998 |
|
|
|
3999 |
|
|
|
4000 |
|
|
#if DECIMAL_CALL_BY_REFERENCE
|
4001 |
|
|
void
|
4002 |
|
|
bid64qdd_fma (UINT64 * pres, UINT128 * px, UINT64 * py, UINT64 * pz
|
4003 |
|
|
_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
|
4004 |
|
|
_EXC_INFO_PARAM) {
|
4005 |
|
|
UINT64 y = *py, z = *pz;
|
4006 |
|
|
#if !DECIMAL_GLOBAL_ROUNDING
|
4007 |
|
|
unsigned int rnd_mode = *prnd_mode;
|
4008 |
|
|
#endif
|
4009 |
|
|
#else
|
4010 |
|
|
UINT64
|
4011 |
|
|
bid64qdd_fma (UINT128 x, UINT64 y, UINT64 z
|
4012 |
|
|
_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
|
4013 |
|
|
_EXC_INFO_PARAM) {
|
4014 |
|
|
#endif
|
4015 |
|
|
UINT64 res1 = 0xbaddbaddbaddbaddull;
|
4016 |
|
|
UINT128 y1, z1;
|
4017 |
|
|
|
4018 |
|
|
#if DECIMAL_CALL_BY_REFERENCE
|
4019 |
|
|
bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
|
4020 |
|
|
bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
|
4021 |
|
|
bid64qqq_fma (&res1, px, &y1, &z1
|
4022 |
|
|
_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
|
4023 |
|
|
_EXC_INFO_ARG);
|
4024 |
|
|
#else
|
4025 |
|
|
y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
|
4026 |
|
|
z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
|
4027 |
|
|
res1 = bid64qqq_fma (x, y1, z1
|
4028 |
|
|
_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
|
4029 |
|
|
_EXC_INFO_ARG);
|
4030 |
|
|
#endif
|
4031 |
|
|
BID_RETURN (res1);
|
4032 |
|
|
}
|
4033 |
|
|
|
4034 |
|
|
|
4035 |
|
|
#if DECIMAL_CALL_BY_REFERENCE
|
4036 |
|
|
void
|
4037 |
|
|
bid64qdq_fma (UINT64 * pres, UINT128 * px, UINT64 * py, UINT128 * pz
|
4038 |
|
|
_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
|
4039 |
|
|
_EXC_INFO_PARAM) {
|
4040 |
|
|
UINT64 y = *py;
|
4041 |
|
|
#if !DECIMAL_GLOBAL_ROUNDING
|
4042 |
|
|
unsigned int rnd_mode = *prnd_mode;
|
4043 |
|
|
#endif
|
4044 |
|
|
#else
|
4045 |
|
|
UINT64
|
4046 |
|
|
bid64qdq_fma (UINT128 x, UINT64 y, UINT128 z
|
4047 |
|
|
_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
|
4048 |
|
|
_EXC_INFO_PARAM) {
|
4049 |
|
|
#endif
|
4050 |
|
|
UINT64 res1 = 0xbaddbaddbaddbaddull;
|
4051 |
|
|
UINT128 y1;
|
4052 |
|
|
|
4053 |
|
|
#if DECIMAL_CALL_BY_REFERENCE
|
4054 |
|
|
bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
|
4055 |
|
|
bid64qqq_fma (&res1, px, &y1, pz
|
4056 |
|
|
_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
|
4057 |
|
|
_EXC_INFO_ARG);
|
4058 |
|
|
#else
|
4059 |
|
|
y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
|
4060 |
|
|
res1 = bid64qqq_fma (x, y1, z
|
4061 |
|
|
_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
|
4062 |
|
|
_EXC_INFO_ARG);
|
4063 |
|
|
#endif
|
4064 |
|
|
BID_RETURN (res1);
|
4065 |
|
|
}
|
4066 |
|
|
|
4067 |
|
|
|
4068 |
|
|
#if DECIMAL_CALL_BY_REFERENCE
|
4069 |
|
|
void
|
4070 |
|
|
bid64qqd_fma (UINT64 * pres, UINT128 * px, UINT128 * py, UINT64 * pz
|
4071 |
|
|
_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
|
4072 |
|
|
_EXC_INFO_PARAM) {
|
4073 |
|
|
UINT64 z = *pz;
|
4074 |
|
|
#if !DECIMAL_GLOBAL_ROUNDING
|
4075 |
|
|
unsigned int rnd_mode = *prnd_mode;
|
4076 |
|
|
#endif
|
4077 |
|
|
#else
|
4078 |
|
|
UINT64
|
4079 |
|
|
bid64qqd_fma (UINT128 x, UINT128 y, UINT64 z
|
4080 |
|
|
_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
|
4081 |
|
|
_EXC_INFO_PARAM) {
|
4082 |
|
|
#endif
|
4083 |
|
|
UINT64 res1 = 0xbaddbaddbaddbaddull;
|
4084 |
|
|
UINT128 z1;
|
4085 |
|
|
|
4086 |
|
|
#if DECIMAL_CALL_BY_REFERENCE
|
4087 |
|
|
bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
|
4088 |
|
|
bid64qqq_fma (&res1, px, py, &z1
|
4089 |
|
|
_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
|
4090 |
|
|
_EXC_INFO_ARG);
|
4091 |
|
|
#else
|
4092 |
|
|
z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
|
4093 |
|
|
res1 = bid64qqq_fma (x, y, z1
|
4094 |
|
|
_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
|
4095 |
|
|
_EXC_INFO_ARG);
|
4096 |
|
|
#endif
|
4097 |
|
|
BID_RETURN (res1);
|
4098 |
|
|
}
|
4099 |
|
|
|
4100 |
|
|
|
4101 |
|
|
#if DECIMAL_CALL_BY_REFERENCE
|
4102 |
|
|
void
|
4103 |
|
|
bid64qqq_fma (UINT64 * pres, UINT128 * px, UINT128 * py, UINT128 * pz
|
4104 |
|
|
_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
|
4105 |
|
|
_EXC_INFO_PARAM) {
|
4106 |
|
|
UINT128 x = *px, y = *py, z = *pz;
|
4107 |
|
|
#if !DECIMAL_GLOBAL_ROUNDING
|
4108 |
|
|
unsigned int rnd_mode = *prnd_mode;
|
4109 |
|
|
#endif
|
4110 |
|
|
#else
|
4111 |
|
|
UINT64
|
4112 |
|
|
bid64qqq_fma (UINT128 x, UINT128 y, UINT128 z
|
4113 |
|
|
_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
|
4114 |
|
|
_EXC_INFO_PARAM) {
|
4115 |
|
|
#endif
|
4116 |
|
|
int is_midpoint_lt_even0 = 0, is_midpoint_gt_even0 = 0,
|
4117 |
|
|
is_inexact_lt_midpoint0 = 0, is_inexact_gt_midpoint0 = 0;
|
4118 |
|
|
int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
|
4119 |
|
|
is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
|
4120 |
|
|
int incr_exp;
|
4121 |
|
|
UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
|
4122 |
|
|
UINT128 res128 = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
|
4123 |
|
|
UINT64 res1 = 0xbaddbaddbaddbaddull;
|
4124 |
|
|
unsigned int save_fpsf; // needed because of the call to bid128_ext_fma
|
4125 |
|
|
UINT64 sign;
|
4126 |
|
|
UINT64 exp;
|
4127 |
|
|
int unbexp;
|
4128 |
|
|
UINT128 C;
|
4129 |
|
|
BID_UI64DOUBLE tmp;
|
4130 |
|
|
int nr_bits;
|
4131 |
|
|
int q, x0;
|
4132 |
|
|
int scale;
|
4133 |
|
|
int lt_half_ulp = 0, eq_half_ulp = 0;
|
4134 |
|
|
|
4135 |
|
|
// Note: for rounding modes other than RN or RA, the result can be obtained
|
4136 |
|
|
// by rounding first to BID128 and then to BID64
|
4137 |
|
|
|
4138 |
|
|
save_fpsf = *pfpsf; // sticky bits - caller value must be preserved
|
4139 |
|
|
*pfpsf = 0;
|
4140 |
|
|
|
4141 |
|
|
#if DECIMAL_CALL_BY_REFERENCE
|
4142 |
|
|
bid128_ext_fma (&is_midpoint_lt_even0, &is_midpoint_gt_even0,
|
4143 |
|
|
&is_inexact_lt_midpoint0, &is_inexact_gt_midpoint0,
|
4144 |
|
|
&res, &x, &y, &z
|
4145 |
|
|
_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
|
4146 |
|
|
_EXC_INFO_ARG);
|
4147 |
|
|
#else
|
4148 |
|
|
res = bid128_ext_fma (&is_midpoint_lt_even0, &is_midpoint_gt_even0,
|
4149 |
|
|
&is_inexact_lt_midpoint0,
|
4150 |
|
|
&is_inexact_gt_midpoint0, x, y,
|
4151 |
|
|
z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
|
4152 |
|
|
_EXC_INFO_ARG);
|
4153 |
|
|
#endif
|
4154 |
|
|
|
4155 |
|
|
if ((rnd_mode == ROUNDING_DOWN) || (rnd_mode == ROUNDING_UP) ||
|
4156 |
|
|
(rnd_mode == ROUNDING_TO_ZERO) || // no double rounding error is possible
|
4157 |
|
|
((res.w[HIGH_128W] & MASK_NAN) == MASK_NAN) || //res=QNaN (cannot be SNaN)
|
4158 |
|
|
((res.w[HIGH_128W] & MASK_ANY_INF) == MASK_INF)) { // result is infinity
|
4159 |
|
|
#if DECIMAL_CALL_BY_REFERENCE
|
4160 |
|
|
bid128_to_bid64 (&res1, &res _RND_MODE_ARG _EXC_FLAGS_ARG);
|
4161 |
|
|
#else
|
4162 |
|
|
res1 = bid128_to_bid64 (res _RND_MODE_ARG _EXC_FLAGS_ARG);
|
4163 |
|
|
#endif
|
4164 |
|
|
// determine the unbiased exponent of the result
|
4165 |
|
|
unbexp = ((res1 >> 53) & 0x3ff) - 398;
|
4166 |
|
|
|
4167 |
|
|
// if subnormal, res1 must have exp = -398
|
4168 |
|
|
// if tiny and inexact set underflow and inexact status flags
|
4169 |
|
|
if (!((res1 & MASK_NAN) == MASK_NAN) && // res1 not NaN
|
4170 |
|
|
(unbexp == -398)
|
4171 |
|
|
&& ((res1 & MASK_BINARY_SIG1) < 1000000000000000ull)
|
4172 |
|
|
&& (is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0
|
4173 |
|
|
|| is_midpoint_lt_even0 || is_midpoint_gt_even0)) {
|
4174 |
|
|
// set the inexact flag and the underflow flag
|
4175 |
|
|
*pfpsf |= (INEXACT_EXCEPTION | UNDERFLOW_EXCEPTION);
|
4176 |
|
|
} else if (is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0 ||
|
4177 |
|
|
is_midpoint_lt_even0 || is_midpoint_gt_even0) {
|
4178 |
|
|
// set the inexact flag and the underflow flag
|
4179 |
|
|
*pfpsf |= INEXACT_EXCEPTION;
|
4180 |
|
|
}
|
4181 |
|
|
|
4182 |
|
|
*pfpsf |= save_fpsf;
|
4183 |
|
|
BID_RETURN (res1);
|
4184 |
|
|
} // else continue, and use rounding to nearest to round to 16 digits
|
4185 |
|
|
|
4186 |
|
|
// at this point the result is rounded to nearest (even or away) to 34 digits
|
4187 |
|
|
// (or less if exact), and it is zero or finite non-zero canonical [sub]normal
|
4188 |
|
|
sign = res.w[HIGH_128W] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
|
4189 |
|
|
exp = res.w[HIGH_128W] & MASK_EXP; // biased and shifted left 49 bits
|
4190 |
|
|
unbexp = (exp >> 49) - 6176;
|
4191 |
|
|
C.w[1] = res.w[HIGH_128W] & MASK_COEFF;
|
4192 |
|
|
C.w[0] = res.w[LOW_128W];
|
4193 |
|
|
|
4194 |
|
|
if ((C.w[1] == 0x0 && C.w[0] == 0x0) || // result is zero
|
4195 |
|
|
(unbexp <= (-398 - 35)) || (unbexp >= (369 + 16))) {
|
4196 |
|
|
// clear under/overflow
|
4197 |
|
|
#if DECIMAL_CALL_BY_REFERENCE
|
4198 |
|
|
bid128_to_bid64 (&res1, &res _RND_MODE_ARG _EXC_FLAGS_ARG);
|
4199 |
|
|
#else
|
4200 |
|
|
res1 = bid128_to_bid64 (res _RND_MODE_ARG _EXC_FLAGS_ARG);
|
4201 |
|
|
#endif
|
4202 |
|
|
*pfpsf |= save_fpsf;
|
4203 |
|
|
BID_RETURN (res1);
|
4204 |
|
|
} // else continue
|
4205 |
|
|
|
4206 |
|
|
// -398 - 34 <= unbexp <= 369 + 15
|
4207 |
|
|
if (rnd_mode == ROUNDING_TIES_AWAY) {
|
4208 |
|
|
// apply correction, if needed, to make the result rounded to nearest-even
|
4209 |
|
|
if (is_midpoint_gt_even) {
|
4210 |
|
|
// res = res - 1
|
4211 |
|
|
res1--; // res1 is now even
|
4212 |
|
|
} // else the result is already correctly rounded to nearest-even
|
4213 |
|
|
}
|
4214 |
|
|
// at this point the result is finite, non-zero canonical normal or subnormal,
|
4215 |
|
|
// and in most cases overflow or underflow will not occur
|
4216 |
|
|
|
4217 |
|
|
// determine the number of digits q in the result
|
4218 |
|
|
// q = nr. of decimal digits in x
|
4219 |
|
|
// determine first the nr. of bits in x
|
4220 |
|
|
if (C.w[1] == 0) {
|
4221 |
|
|
if (C.w[0] >= 0x0020000000000000ull) { // x >= 2^53
|
4222 |
|
|
// split the 64-bit value in two 32-bit halves to avoid rounding errors
|
4223 |
|
|
if (C.w[0] >= 0x0000000100000000ull) { // x >= 2^32
|
4224 |
|
|
tmp.d = (double) (C.w[0] >> 32); // exact conversion
|
4225 |
|
|
nr_bits =
|
4226 |
|
|
33 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
|
4227 |
|
|
} else { // x < 2^32
|
4228 |
|
|
tmp.d = (double) (C.w[0]); // exact conversion
|
4229 |
|
|
nr_bits =
|
4230 |
|
|
1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
|
4231 |
|
|
}
|
4232 |
|
|
} else { // if x < 2^53
|
4233 |
|
|
tmp.d = (double) C.w[0]; // exact conversion
|
4234 |
|
|
nr_bits =
|
4235 |
|
|
1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
|
4236 |
|
|
}
|
4237 |
|
|
} else { // C.w[1] != 0 => nr. bits = 64 + nr_bits (C.w[1])
|
4238 |
|
|
tmp.d = (double) C.w[1]; // exact conversion
|
4239 |
|
|
nr_bits =
|
4240 |
|
|
65 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
|
4241 |
|
|
}
|
4242 |
|
|
q = nr_digits[nr_bits - 1].digits;
|
4243 |
|
|
if (q == 0) {
|
4244 |
|
|
q = nr_digits[nr_bits - 1].digits1;
|
4245 |
|
|
if (C.w[1] > nr_digits[nr_bits - 1].threshold_hi ||
|
4246 |
|
|
(C.w[1] == nr_digits[nr_bits - 1].threshold_hi &&
|
4247 |
|
|
C.w[0] >= nr_digits[nr_bits - 1].threshold_lo))
|
4248 |
|
|
q++;
|
4249 |
|
|
}
|
4250 |
|
|
// if q > 16, round to nearest even to 16 digits (but for underflow it may
|
4251 |
|
|
// have to be truncated even more)
|
4252 |
|
|
if (q > 16) {
|
4253 |
|
|
x0 = q - 16;
|
4254 |
|
|
if (q <= 18) {
|
4255 |
|
|
round64_2_18 (q, x0, C.w[0], &res1, &incr_exp,
|
4256 |
|
|
&is_midpoint_lt_even, &is_midpoint_gt_even,
|
4257 |
|
|
&is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
|
4258 |
|
|
} else { // 19 <= q <= 34
|
4259 |
|
|
round128_19_38 (q, x0, C, &res128, &incr_exp,
|
4260 |
|
|
&is_midpoint_lt_even, &is_midpoint_gt_even,
|
4261 |
|
|
&is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
|
4262 |
|
|
res1 = res128.w[0]; // the result fits in 64 bits
|
4263 |
|
|
}
|
4264 |
|
|
unbexp = unbexp + x0;
|
4265 |
|
|
if (incr_exp)
|
4266 |
|
|
unbexp++;
|
4267 |
|
|
q = 16; // need to set in case denormalization is necessary
|
4268 |
|
|
} else {
|
4269 |
|
|
// the result does not require a second rounding (and it must have
|
4270 |
|
|
// been exact in the first rounding, since q <= 16)
|
4271 |
|
|
res1 = C.w[0];
|
4272 |
|
|
}
|
4273 |
|
|
|
4274 |
|
|
// avoid a double rounding error
|
4275 |
|
|
if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
|
4276 |
|
|
is_midpoint_lt_even) { // double rounding error upward
|
4277 |
|
|
// res = res - 1
|
4278 |
|
|
res1--; // res1 becomes odd
|
4279 |
|
|
is_midpoint_lt_even = 0;
|
4280 |
|
|
is_inexact_lt_midpoint = 1;
|
4281 |
|
|
if (res1 == 0x00038d7ea4c67fffull) { // 10^15 - 1
|
4282 |
|
|
res1 = 0x002386f26fc0ffffull; // 10^16 - 1
|
4283 |
|
|
unbexp--;
|
4284 |
|
|
}
|
4285 |
|
|
} else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
|
4286 |
|
|
is_midpoint_gt_even) { // double rounding error downward
|
4287 |
|
|
// res = res + 1
|
4288 |
|
|
res1++; // res1 becomes odd (so it cannot be 10^16)
|
4289 |
|
|
is_midpoint_gt_even = 0;
|
4290 |
|
|
is_inexact_gt_midpoint = 1;
|
4291 |
|
|
} else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
|
4292 |
|
|
!is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
|
4293 |
|
|
// if this second rounding was exact the result may still be
|
4294 |
|
|
// inexact because of the first rounding
|
4295 |
|
|
if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
|
4296 |
|
|
is_inexact_gt_midpoint = 1;
|
4297 |
|
|
}
|
4298 |
|
|
if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
|
4299 |
|
|
is_inexact_lt_midpoint = 1;
|
4300 |
|
|
}
|
4301 |
|
|
} else if (is_midpoint_gt_even &&
|
4302 |
|
|
(is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
|
4303 |
|
|
// pulled up to a midpoint
|
4304 |
|
|
is_inexact_lt_midpoint = 1;
|
4305 |
|
|
is_inexact_gt_midpoint = 0;
|
4306 |
|
|
is_midpoint_lt_even = 0;
|
4307 |
|
|
is_midpoint_gt_even = 0;
|
4308 |
|
|
} else if (is_midpoint_lt_even &&
|
4309 |
|
|
(is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
|
4310 |
|
|
// pulled down to a midpoint
|
4311 |
|
|
is_inexact_lt_midpoint = 0;
|
4312 |
|
|
is_inexact_gt_midpoint = 1;
|
4313 |
|
|
is_midpoint_lt_even = 0;
|
4314 |
|
|
is_midpoint_gt_even = 0;
|
4315 |
|
|
} else {
|
4316 |
|
|
;
|
4317 |
|
|
}
|
4318 |
|
|
// this is the result rounded correctly to nearest even, with unbounded exp.
|
4319 |
|
|
|
4320 |
|
|
// check for overflow
|
4321 |
|
|
if (q + unbexp > P16 + expmax16) {
|
4322 |
|
|
res1 = sign | 0x7800000000000000ull;
|
4323 |
|
|
*pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
|
4324 |
|
|
*pfpsf |= save_fpsf;
|
4325 |
|
|
BID_RETURN (res1)
|
4326 |
|
|
} else if (unbexp > expmax16) { // q + unbexp <= P16 + expmax16
|
4327 |
|
|
// not overflow; the result must be exact, and we can multiply res1 by
|
4328 |
|
|
// 10^(unbexp - expmax16) and the product will fit in 16 decimal digits
|
4329 |
|
|
scale = unbexp - expmax16;
|
4330 |
|
|
res1 = res1 * ten2k64[scale]; // res1 * 10^scale
|
4331 |
|
|
unbexp = expmax16; // unbexp - scale
|
4332 |
|
|
} else {
|
4333 |
|
|
; // continue
|
4334 |
|
|
}
|
4335 |
|
|
|
4336 |
|
|
// check for underflow
|
4337 |
|
|
if (q + unbexp < P16 + expmin16) {
|
4338 |
|
|
if (unbexp < expmin16) {
|
4339 |
|
|
// we must truncate more of res
|
4340 |
|
|
x0 = expmin16 - unbexp; // x0 >= 1
|
4341 |
|
|
is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
|
4342 |
|
|
is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
|
4343 |
|
|
is_midpoint_lt_even0 = is_midpoint_lt_even;
|
4344 |
|
|
is_midpoint_gt_even0 = is_midpoint_gt_even;
|
4345 |
|
|
is_inexact_lt_midpoint = 0;
|
4346 |
|
|
is_inexact_gt_midpoint = 0;
|
4347 |
|
|
is_midpoint_lt_even = 0;
|
4348 |
|
|
is_midpoint_gt_even = 0;
|
4349 |
|
|
// the number of decimal digits in res1 is q
|
4350 |
|
|
if (x0 < q) { // 1 <= x0 <= q-1 => round res to q - x0 digits
|
4351 |
|
|
// 2 <= q <= 16, 1 <= x0 <= 15
|
4352 |
|
|
round64_2_18 (q, x0, res1, &res1, &incr_exp,
|
4353 |
|
|
&is_midpoint_lt_even, &is_midpoint_gt_even,
|
4354 |
|
|
&is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
|
4355 |
|
|
if (incr_exp) {
|
4356 |
|
|
// res1 = 10^(q-x0), 1 <= q - x0 <= q - 1, 1 <= q - x0 <= 15
|
4357 |
|
|
res1 = ten2k64[q - x0];
|
4358 |
|
|
}
|
4359 |
|
|
unbexp = unbexp + x0; // expmin16
|
4360 |
|
|
} else if (x0 == q) {
|
4361 |
|
|
// the second rounding is for 0.d(0)d(1)...d(q-1) * 10^emin
|
4362 |
|
|
// determine relationship with 1/2 ulp
|
4363 |
|
|
// q <= 16
|
4364 |
|
|
if (res1 < midpoint64[q - 1]) { // < 1/2 ulp
|
4365 |
|
|
lt_half_ulp = 1;
|
4366 |
|
|
is_inexact_lt_midpoint = 1;
|
4367 |
|
|
} else if (res1 == midpoint64[q - 1]) { // = 1/2 ulp
|
4368 |
|
|
eq_half_ulp = 1;
|
4369 |
|
|
is_midpoint_gt_even = 1;
|
4370 |
|
|
} else { // > 1/2 ulp
|
4371 |
|
|
// gt_half_ulp = 1;
|
4372 |
|
|
is_inexact_gt_midpoint = 1;
|
4373 |
|
|
}
|
4374 |
|
|
if (lt_half_ulp || eq_half_ulp) {
|
4375 |
|
|
// res = +0.0 * 10^expmin16
|
4376 |
|
|
res1 = 0x0000000000000000ull;
|
4377 |
|
|
} else { // if (gt_half_ulp)
|
4378 |
|
|
// res = +1 * 10^expmin16
|
4379 |
|
|
res1 = 0x0000000000000001ull;
|
4380 |
|
|
}
|
4381 |
|
|
unbexp = expmin16;
|
4382 |
|
|
} else { // if (x0 > q)
|
4383 |
|
|
// the second rounding is for 0.0...d(0)d(1)...d(q-1) * 10^emin
|
4384 |
|
|
res1 = 0x0000000000000000ull;
|
4385 |
|
|
unbexp = expmin16;
|
4386 |
|
|
is_inexact_lt_midpoint = 1;
|
4387 |
|
|
}
|
4388 |
|
|
// avoid a double rounding error
|
4389 |
|
|
if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
|
4390 |
|
|
is_midpoint_lt_even) { // double rounding error upward
|
4391 |
|
|
// res = res - 1
|
4392 |
|
|
res1--; // res1 becomes odd
|
4393 |
|
|
is_midpoint_lt_even = 0;
|
4394 |
|
|
is_inexact_lt_midpoint = 1;
|
4395 |
|
|
} else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
|
4396 |
|
|
is_midpoint_gt_even) { // double rounding error downward
|
4397 |
|
|
// res = res + 1
|
4398 |
|
|
res1++; // res1 becomes odd
|
4399 |
|
|
is_midpoint_gt_even = 0;
|
4400 |
|
|
is_inexact_gt_midpoint = 1;
|
4401 |
|
|
} else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
|
4402 |
|
|
!is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
|
4403 |
|
|
// if this rounding was exact the result may still be
|
4404 |
|
|
// inexact because of the previous roundings
|
4405 |
|
|
if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
|
4406 |
|
|
is_inexact_gt_midpoint = 1;
|
4407 |
|
|
}
|
4408 |
|
|
if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
|
4409 |
|
|
is_inexact_lt_midpoint = 1;
|
4410 |
|
|
}
|
4411 |
|
|
} else if (is_midpoint_gt_even &&
|
4412 |
|
|
(is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
|
4413 |
|
|
// pulled up to a midpoint
|
4414 |
|
|
is_inexact_lt_midpoint = 1;
|
4415 |
|
|
is_inexact_gt_midpoint = 0;
|
4416 |
|
|
is_midpoint_lt_even = 0;
|
4417 |
|
|
is_midpoint_gt_even = 0;
|
4418 |
|
|
} else if (is_midpoint_lt_even &&
|
4419 |
|
|
(is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
|
4420 |
|
|
// pulled down to a midpoint
|
4421 |
|
|
is_inexact_lt_midpoint = 0;
|
4422 |
|
|
is_inexact_gt_midpoint = 1;
|
4423 |
|
|
is_midpoint_lt_even = 0;
|
4424 |
|
|
is_midpoint_gt_even = 0;
|
4425 |
|
|
} else {
|
4426 |
|
|
;
|
4427 |
|
|
}
|
4428 |
|
|
}
|
4429 |
|
|
// else if unbexp >= emin then q < P (because q + unbexp < P16 + expmin16)
|
4430 |
|
|
// and the result is tiny and exact
|
4431 |
|
|
|
4432 |
|
|
// check for inexact result
|
4433 |
|
|
if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
|
4434 |
|
|
is_midpoint_lt_even || is_midpoint_gt_even ||
|
4435 |
|
|
is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0 ||
|
4436 |
|
|
is_midpoint_lt_even0 || is_midpoint_gt_even0) {
|
4437 |
|
|
// set the inexact flag and the underflow flag
|
4438 |
|
|
*pfpsf |= (INEXACT_EXCEPTION | UNDERFLOW_EXCEPTION);
|
4439 |
|
|
}
|
4440 |
|
|
} else if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
|
4441 |
|
|
is_midpoint_lt_even || is_midpoint_gt_even) {
|
4442 |
|
|
*pfpsf |= INEXACT_EXCEPTION;
|
4443 |
|
|
}
|
4444 |
|
|
// this is the result rounded correctly to nearest, with bounded exponent
|
4445 |
|
|
|
4446 |
|
|
if (rnd_mode == ROUNDING_TIES_AWAY && is_midpoint_gt_even) { // correction
|
4447 |
|
|
// res = res + 1
|
4448 |
|
|
res1++; // res1 is now odd
|
4449 |
|
|
} // else the result is already correct
|
4450 |
|
|
|
4451 |
|
|
// assemble the result
|
4452 |
|
|
if (res1 < 0x0020000000000000ull) { // res < 2^53
|
4453 |
|
|
res1 = sign | ((UINT64) (unbexp + 398) << 53) | res1;
|
4454 |
|
|
} else { // res1 >= 2^53
|
4455 |
|
|
res1 = sign | MASK_STEERING_BITS |
|
4456 |
|
|
((UINT64) (unbexp + 398) << 51) | (res1 & MASK_BINARY_SIG2);
|
4457 |
|
|
}
|
4458 |
|
|
*pfpsf |= save_fpsf;
|
4459 |
|
|
BID_RETURN (res1);
|
4460 |
|
|
}
|