OpenCores
URL https://opencores.org/ocsvn/openrisc_2011-10-31/openrisc_2011-10-31/trunk

Subversion Repositories openrisc_2011-10-31

[/] [openrisc/] [trunk/] [gnu-src/] [gcc-4.5.1/] [libgcc/] [config/] [libbid/] [bid128_fma.c] - Blame information for rev 300

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 272 jeremybenn
/* Copyright (C) 2007, 2009  Free Software Foundation, Inc.
2
 
3
This file is part of GCC.
4
 
5
GCC is free software; you can redistribute it and/or modify it under
6
the terms of the GNU General Public License as published by the Free
7
Software Foundation; either version 3, or (at your option) any later
8
version.
9
 
10
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
11
WARRANTY; without even the implied warranty of MERCHANTABILITY or
12
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
13
for more details.
14
 
15
Under Section 7 of GPL version 3, you are granted additional
16
permissions described in the GCC Runtime Library Exception, version
17
3.1, as published by the Free Software Foundation.
18
 
19
You should have received a copy of the GNU General Public License and
20
a copy of the GCC Runtime Library Exception along with this program;
21
see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
22
<http://www.gnu.org/licenses/>.  */
23
 
24
/*****************************************************************************
25
 *
26
 *  BID128 fma   x * y + z
27
 *
28
 ****************************************************************************/
29
 
30
#include "bid_internal.h"
31
 
32
static void
33
rounding_correction (unsigned int rnd_mode,
34
                     unsigned int is_inexact_lt_midpoint,
35
                     unsigned int is_inexact_gt_midpoint,
36
                     unsigned int is_midpoint_lt_even,
37
                     unsigned int is_midpoint_gt_even,
38
                     int unbexp,
39
                     UINT128 * ptrres, _IDEC_flags * ptrfpsf) {
40
  // unbiased true exponent unbexp may be larger than emax
41
 
42
  UINT128 res = *ptrres; // expected to have the correct sign and coefficient
43
  // (the exponent field is ignored, as unbexp is used instead)
44
  UINT64 sign, exp;
45
  UINT64 C_hi, C_lo;
46
 
47
  // general correction from RN to RA, RM, RP, RZ
48
  // Note: if the result is negative, then is_inexact_lt_midpoint, 
49
  // is_inexact_gt_midpoint, is_midpoint_lt_even, and is_midpoint_gt_even 
50
  // have to be considered as if determined for the absolute value of the 
51
  // result (so they seem to be reversed)
52
 
53
  if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
54
      is_midpoint_lt_even || is_midpoint_gt_even) {
55
    *ptrfpsf |= INEXACT_EXCEPTION;
56
  }
57
  // apply correction to result calculated with unbounded exponent
58
  sign = res.w[1] & MASK_SIGN;
59
  exp = (UINT64) (unbexp + 6176) << 49; // valid only if expmin<=unbexp<=expmax
60
  C_hi = res.w[1] & MASK_COEFF;
61
  C_lo = res.w[0];
62
  if ((!sign && ((rnd_mode == ROUNDING_UP && is_inexact_lt_midpoint) ||
63
      ((rnd_mode == ROUNDING_TIES_AWAY || rnd_mode == ROUNDING_UP) &&
64
      is_midpoint_gt_even))) ||
65
      (sign && ((rnd_mode == ROUNDING_DOWN && is_inexact_lt_midpoint) ||
66
      ((rnd_mode == ROUNDING_TIES_AWAY || rnd_mode == ROUNDING_DOWN) &&
67
      is_midpoint_gt_even)))) {
68
    // C = C + 1
69
    C_lo = C_lo + 1;
70
    if (C_lo == 0)
71
      C_hi = C_hi + 1;
72
    if (C_hi == 0x0001ed09bead87c0ull && C_lo == 0x378d8e6400000000ull) {
73
      // C = 10^34 => rounding overflow
74
      C_hi = 0x0000314dc6448d93ull;
75
      C_lo = 0x38c15b0a00000000ull; // 10^33
76
      // exp = exp + EXP_P1;
77
      unbexp = unbexp + 1;
78
      exp = (UINT64) (unbexp + 6176) << 49;
79
    }
80
  } else if ((is_midpoint_lt_even || is_inexact_gt_midpoint) &&
81
      ((sign && (rnd_mode == ROUNDING_UP || rnd_mode == ROUNDING_TO_ZERO)) ||
82
      (!sign && (rnd_mode == ROUNDING_DOWN || rnd_mode == ROUNDING_TO_ZERO)))) {
83
    // C = C - 1
84
    C_lo = C_lo - 1;
85
    if (C_lo == 0xffffffffffffffffull)
86
      C_hi--;
87
    // check if we crossed into the lower decade
88
    if (C_hi == 0x0000314dc6448d93ull && C_lo == 0x38c15b09ffffffffull) {
89
      // C = 10^33 - 1
90
      if (exp > 0) {
91
        C_hi = 0x0001ed09bead87c0ull; // 10^34 - 1
92
        C_lo = 0x378d8e63ffffffffull;
93
        // exp = exp - EXP_P1;
94
        unbexp = unbexp - 1;
95
        exp = (UINT64) (unbexp + 6176) << 49;
96
      } else { // if exp = 0
97
        if (is_midpoint_lt_even || is_midpoint_lt_even ||
98
            is_inexact_gt_midpoint || is_inexact_gt_midpoint) // tiny & inexact
99
          *ptrfpsf |= UNDERFLOW_EXCEPTION;
100
      }
101
    }
102
  } else {
103
    ; // the result is already correct
104
  }
105
  if (unbexp > expmax) { // 6111
106
    *ptrfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
107
    exp = 0;
108
    if (!sign) { // result is positive
109
      if (rnd_mode == ROUNDING_UP || rnd_mode == ROUNDING_TIES_AWAY) { // +inf
110
        C_hi = 0x7800000000000000ull;
111
        C_lo = 0x0000000000000000ull;
112
      } else { // res = +MAXFP = (10^34-1) * 10^emax
113
        C_hi = 0x5fffed09bead87c0ull;
114
        C_lo = 0x378d8e63ffffffffull;
115
      }
116
    } else { // result is negative
117
      if (rnd_mode == ROUNDING_DOWN || rnd_mode == ROUNDING_TIES_AWAY) { // -inf
118
        C_hi = 0xf800000000000000ull;
119
        C_lo = 0x0000000000000000ull;
120
      } else { // res = -MAXFP = -(10^34-1) * 10^emax
121
        C_hi = 0xdfffed09bead87c0ull;
122
        C_lo = 0x378d8e63ffffffffull;
123
      }
124
    }
125
  }
126
  // assemble the result
127
  res.w[1] = sign | exp | C_hi;
128
  res.w[0] = C_lo;
129
  *ptrres = res;
130
}
131
 
132
static void
133
add256 (UINT256 x, UINT256 y, UINT256 * pz) {
134
  // *z = x + yl assume the sum fits in 256 bits
135
  UINT256 z;
136
  z.w[0] = x.w[0] + y.w[0];
137
  if (z.w[0] < x.w[0]) {
138
    x.w[1]++;
139
    if (x.w[1] == 0x0000000000000000ull) {
140
      x.w[2]++;
141
      if (x.w[2] == 0x0000000000000000ull) {
142
        x.w[3]++;
143
      }
144
    }
145
  }
146
  z.w[1] = x.w[1] + y.w[1];
147
  if (z.w[1] < x.w[1]) {
148
    x.w[2]++;
149
    if (x.w[2] == 0x0000000000000000ull) {
150
      x.w[3]++;
151
    }
152
  }
153
  z.w[2] = x.w[2] + y.w[2];
154
  if (z.w[2] < x.w[2]) {
155
    x.w[3]++;
156
  }
157
  z.w[3] = x.w[3] + y.w[3]; // it was assumed that no carry is possible
158
  *pz = z;
159
}
160
 
161
static void
162
sub256 (UINT256 x, UINT256 y, UINT256 * pz) {
163
  // *z = x - y; assume x >= y
164
  UINT256 z;
165
  z.w[0] = x.w[0] - y.w[0];
166
  if (z.w[0] > x.w[0]) {
167
    x.w[1]--;
168
    if (x.w[1] == 0xffffffffffffffffull) {
169
      x.w[2]--;
170
      if (x.w[2] == 0xffffffffffffffffull) {
171
        x.w[3]--;
172
      }
173
    }
174
  }
175
  z.w[1] = x.w[1] - y.w[1];
176
  if (z.w[1] > x.w[1]) {
177
    x.w[2]--;
178
    if (x.w[2] == 0xffffffffffffffffull) {
179
      x.w[3]--;
180
    }
181
  }
182
  z.w[2] = x.w[2] - y.w[2];
183
  if (z.w[2] > x.w[2]) {
184
    x.w[3]--;
185
  }
186
  z.w[3] = x.w[3] - y.w[3]; // no borrow possible, because x >= y
187
  *pz = z;
188
}
189
 
190
 
191
static int
192
nr_digits256 (UINT256 R256) {
193
  int ind;
194
  // determine the number of decimal digits in R256
195
  if (R256.w[3] == 0x0 && R256.w[2] == 0x0 && R256.w[1] == 0x0) {
196
    // between 1 and 19 digits
197
    for (ind = 1; ind <= 19; ind++) {
198
      if (R256.w[0] < ten2k64[ind]) {
199
        break;
200
      }
201
    }
202
    // ind digits
203
  } else if (R256.w[3] == 0x0 && R256.w[2] == 0x0 &&
204
             (R256.w[1] < ten2k128[0].w[1] ||
205
              (R256.w[1] == ten2k128[0].w[1]
206
               && R256.w[0] < ten2k128[0].w[0]))) {
207
    // 20 digits
208
    ind = 20;
209
  } else if (R256.w[3] == 0x0 && R256.w[2] == 0x0) {
210
    // between 21 and 38 digits
211
    for (ind = 1; ind <= 18; ind++) {
212
      if (R256.w[1] < ten2k128[ind].w[1] ||
213
          (R256.w[1] == ten2k128[ind].w[1] &&
214
           R256.w[0] < ten2k128[ind].w[0])) {
215
        break;
216
      }
217
    }
218
    // ind + 20 digits
219
    ind = ind + 20;
220
  } else if (R256.w[3] == 0x0 &&
221
             (R256.w[2] < ten2k256[0].w[2] ||
222
              (R256.w[2] == ten2k256[0].w[2] &&
223
               R256.w[1] < ten2k256[0].w[1]) ||
224
              (R256.w[2] == ten2k256[0].w[2] &&
225
               R256.w[1] == ten2k256[0].w[1] &&
226
               R256.w[0] < ten2k256[0].w[0]))) {
227
    // 39 digits
228
    ind = 39;
229
  } else {
230
    // between 40 and 68 digits
231
    for (ind = 1; ind <= 29; ind++) {
232
      if (R256.w[3] < ten2k256[ind].w[3] ||
233
          (R256.w[3] == ten2k256[ind].w[3] &&
234
           R256.w[2] < ten2k256[ind].w[2]) ||
235
          (R256.w[3] == ten2k256[ind].w[3] &&
236
           R256.w[2] == ten2k256[ind].w[2] &&
237
           R256.w[1] < ten2k256[ind].w[1]) ||
238
          (R256.w[3] == ten2k256[ind].w[3] &&
239
           R256.w[2] == ten2k256[ind].w[2] &&
240
           R256.w[1] == ten2k256[ind].w[1] &&
241
           R256.w[0] < ten2k256[ind].w[0])) {
242
        break;
243
      }
244
    }
245
    // ind + 39 digits
246
    ind = ind + 39;
247
  }
248
  return (ind);
249
}
250
 
251
// add/subtract C4 and C3 * 10^scale; this may follow a previous rounding, so
252
// use the rounding information from ptr_is_* to avoid a double rounding error
253
static void
254
add_and_round (int q3,
255
               int q4,
256
               int e4,
257
               int delta,
258
               int p34,
259
               UINT64 z_sign,
260
               UINT64 p_sign,
261
               UINT128 C3,
262
               UINT256 C4,
263
               int rnd_mode,
264
               int *ptr_is_midpoint_lt_even,
265
               int *ptr_is_midpoint_gt_even,
266
               int *ptr_is_inexact_lt_midpoint,
267
               int *ptr_is_inexact_gt_midpoint,
268
               _IDEC_flags * ptrfpsf, UINT128 * ptrres) {
269
 
270
  int scale;
271
  int x0;
272
  int ind;
273
  UINT64 R64;
274
  UINT128 P128, R128;
275
  UINT192 P192, R192;
276
  UINT256 R256;
277
  int is_midpoint_lt_even = 0;
278
  int is_midpoint_gt_even = 0;
279
  int is_inexact_lt_midpoint = 0;
280
  int is_inexact_gt_midpoint = 0;
281
  int is_midpoint_lt_even0 = 0;
282
  int is_midpoint_gt_even0 = 0;
283
  int is_inexact_lt_midpoint0 = 0;
284
  int is_inexact_gt_midpoint0 = 0;
285
  int incr_exp = 0;
286
  int is_tiny = 0;
287
  int lt_half_ulp = 0;
288
  int eq_half_ulp = 0;
289
  // int gt_half_ulp = 0;
290
  UINT128 res = *ptrres;
291
 
292
  // scale C3 up by 10^(q4-delta-q3), 0 <= q4-delta-q3 <= 2*P34-2 = 66
293
  scale = q4 - delta - q3; // 0 <= scale <= 66 (or 0 <= scale <= 68 if this
294
  // comes from Cases (2), (3), (4), (5), (6), with 0 <= |delta| <= 1
295
 
296
  // calculate C3 * 10^scale in R256 (it has at most 67 decimal digits for
297
  // Cases (15),(16),(17) and at most 69 for Cases (2),(3),(4),(5),(6))
298
  if (scale == 0) {
299
    R256.w[3] = 0x0ull;
300
    R256.w[2] = 0x0ull;
301
    R256.w[1] = C3.w[1];
302
    R256.w[0] = C3.w[0];
303
  } else if (scale <= 19) { // 10^scale fits in 64 bits
304
    P128.w[1] = 0;
305
    P128.w[0] = ten2k64[scale];
306
    __mul_128x128_to_256 (R256, P128, C3);
307
  } else if (scale <= 38) { // 10^scale fits in 128 bits
308
    __mul_128x128_to_256 (R256, ten2k128[scale - 20], C3);
309
  } else if (scale <= 57) { // 39 <= scale <= 57 
310
    // 10^scale fits in 192 bits but C3 * 10^scale fits in 223 or 230 bits
311
    // (10^67 has 223 bits; 10^69 has 230 bits);  
312
    // must split the computation:  
313
    // 10^scale * C3 = 10*38 * 10^(scale-38) * C3 where 10^38 takes 127
314
    // bits and so 10^(scale-38) * C3 fits in 128 bits with certainty
315
    // Note that 1 <= scale - 38 <= 19 => 10^(scale-38) fits in 64 bits
316
    __mul_64x128_to_128 (R128, ten2k64[scale - 38], C3);
317
    // now multiply R128 by 10^38
318
    __mul_128x128_to_256 (R256, R128, ten2k128[18]);
319
  } else { // 58 <= scale <= 66
320
    // 10^scale takes between 193 and 220 bits,
321
    // and C3 * 10^scale fits in 223 bits (10^67/10^69 has 223/230 bits)
322
    // must split the computation: 
323
    // 10^scale * C3 = 10*38 * 10^(scale-38) * C3 where 10^38 takes 127
324
    // bits and so 10^(scale-38) * C3 fits in 128 bits with certainty 
325
    // Note that 20 <= scale - 38 <= 30 => 10^(scale-38) fits in 128 bits
326
    // Calculate first 10^(scale-38) * C3, which fits in 128 bits; because
327
    // 10^(scale-38) takes more than 64 bits, C3 will take less than 64
328
    __mul_64x128_to_128 (R128, C3.w[0], ten2k128[scale - 58]);
329
    // now calculate 10*38 * 10^(scale-38) * C3 
330
    __mul_128x128_to_256 (R256, R128, ten2k128[18]);
331
  }
332
  // C3 * 10^scale is now in R256 
333
 
334
  // for Cases (15), (16), (17) C4 > C3 * 10^scale because C4 has at least 
335
  // one extra digit; for Cases (2), (3), (4), (5), or (6) any order is 
336
  // possible 
337
  // add/subtract C4 and C3 * 10^scale; the exponent is e4
338
  if (p_sign == z_sign) { // R256 = C4 + R256
339
    // calculate R256 = C4 + C3 * 10^scale = C4 + R256 which is exact,
340
    // but may require rounding
341
    add256 (C4, R256, &R256);
342
  } else { // if (p_sign != z_sign) { // R256 = C4 - R256
343
    // calculate R256 = C4 - C3 * 10^scale = C4 - R256 or
344
    // R256 = C3 * 10^scale - C4 = R256 - C4 which is exact,
345
    // but may require rounding
346
 
347
    // compare first R256 = C3 * 10^scale and C4 
348
    if (R256.w[3] > C4.w[3] || (R256.w[3] == C4.w[3] && R256.w[2] > C4.w[2]) ||
349
        (R256.w[3] == C4.w[3] && R256.w[2] == C4.w[2] && R256.w[1] > C4.w[1]) ||
350
        (R256.w[3] == C4.w[3] && R256.w[2] == C4.w[2] && R256.w[1] == C4.w[1] &&
351
        R256.w[0] >= C4.w[0])) { // C3 * 10^scale >= C4
352
      // calculate R256 = C3 * 10^scale - C4 = R256 - C4, which is exact,
353
      // but may require rounding 
354
      sub256 (R256, C4, &R256);
355
      // flip p_sign too, because the result has the sign of z 
356
      p_sign = z_sign;
357
    } else { // if C4 > C3 * 10^scale
358
      // calculate R256 = C4 - C3 * 10^scale = C4 - R256, which is exact,
359
      // but may require rounding  
360
      sub256 (C4, R256, &R256);
361
    }
362
    // if the result is pure zero, the sign depends on the rounding mode
363
    // (x*y and z had opposite signs) 
364
    if (R256.w[3] == 0x0ull && R256.w[2] == 0x0ull &&
365
        R256.w[1] == 0x0ull && R256.w[0] == 0x0ull) {
366
      if (rnd_mode != ROUNDING_DOWN)
367
        p_sign = 0x0000000000000000ull;
368
      else
369
        p_sign = 0x8000000000000000ull;
370
      // the exponent is max (e4, expmin)
371
      if (e4 < -6176)
372
        e4 = expmin;
373
      // assemble result 
374
      res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49);
375
      res.w[0] = 0x0;
376
      *ptrres = res;
377
      return;
378
    }
379
  }
380
 
381
  // determine the number of decimal digits in R256
382
  ind = nr_digits256 (R256);
383
 
384
  // the exact result is (-1)^p_sign * R256 * 10^e4 where q (R256) = ind;
385
  // round to the destination precision, with unbounded exponent
386
 
387
  if (ind <= p34) {
388
    // result rounded to the destination precision with unbounded exponent
389
    // is exact
390
    if (ind + e4 < p34 + expmin) {
391
      is_tiny = 1; // applies to all rounding modes
392
    }
393
    res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | R256.w[1];
394
    res.w[0] = R256.w[0];
395
    // Note: res is correct only if expmin <= e4 <= expmax
396
  } else { // if (ind > p34)
397
    // if more than P digits, round to nearest to P digits
398
    // round R256 to p34 digits
399
    x0 = ind - p34; // 1 <= x0 <= 34 as 35 <= ind <= 68
400
    if (ind <= 38) {
401
      P128.w[1] = R256.w[1];
402
      P128.w[0] = R256.w[0];
403
      round128_19_38 (ind, x0, P128, &R128, &incr_exp,
404
                      &is_midpoint_lt_even, &is_midpoint_gt_even,
405
                      &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
406
    } else if (ind <= 57) {
407
      P192.w[2] = R256.w[2];
408
      P192.w[1] = R256.w[1];
409
      P192.w[0] = R256.w[0];
410
      round192_39_57 (ind, x0, P192, &R192, &incr_exp,
411
                      &is_midpoint_lt_even, &is_midpoint_gt_even,
412
                      &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
413
      R128.w[1] = R192.w[1];
414
      R128.w[0] = R192.w[0];
415
    } else { // if (ind <= 68)
416
      round256_58_76 (ind, x0, R256, &R256, &incr_exp,
417
                      &is_midpoint_lt_even, &is_midpoint_gt_even,
418
                      &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
419
      R128.w[1] = R256.w[1];
420
      R128.w[0] = R256.w[0];
421
    }
422
    // the rounded result has p34 = 34 digits
423
    e4 = e4 + x0 + incr_exp;
424
    if (rnd_mode == ROUNDING_TO_NEAREST) {
425
      if (e4 < expmin) {
426
        is_tiny = 1; // for other rounding modes apply correction
427
      }
428
    } else {
429
      // for RM, RP, RZ, RA apply correction in order to determine tininess
430
      // but do not save the result; apply the correction to 
431
      // (-1)^p_sign * significand * 10^0
432
      P128.w[1] = p_sign | 0x3040000000000000ull | R128.w[1];
433
      P128.w[0] = R128.w[0];
434
      rounding_correction (rnd_mode,
435
                           is_inexact_lt_midpoint,
436
                           is_inexact_gt_midpoint, is_midpoint_lt_even,
437
                           is_midpoint_gt_even, 0, &P128, ptrfpsf);
438
      scale = ((P128.w[1] & MASK_EXP) >> 49) - 6176; // -1, 0, or +1
439
      // the number of digits in the significand is p34 = 34
440
      if (e4 + scale < expmin) {
441
        is_tiny = 1;
442
      }
443
    }
444
    ind = p34; // the number of decimal digits in the signifcand of res
445
    res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | R128.w[1]; // RN
446
    res.w[0] = R128.w[0];
447
    // Note: res is correct only if expmin <= e4 <= expmax
448
    // set the inexact flag after rounding with bounded exponent, if any
449
  }
450
  // at this point we have the result rounded with unbounded exponent in
451
  // res and we know its tininess:
452
  // res = (-1)^p_sign * significand * 10^e4, 
453
  // where q (significand) = ind <= p34
454
  // Note: res is correct only if expmin <= e4 <= expmax
455
 
456
  // check for overflow if RN
457
  if (rnd_mode == ROUNDING_TO_NEAREST && (ind + e4) > (p34 + expmax)) {
458
    res.w[1] = p_sign | 0x7800000000000000ull;
459
    res.w[0] = 0x0000000000000000ull;
460
    *ptrres = res;
461
    *ptrfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
462
    return; // BID_RETURN (res)
463
  } // else not overflow or not RN, so continue
464
 
465
  // if (e4 >= expmin) we have the result rounded with bounded exponent
466
  if (e4 < expmin) {
467
    x0 = expmin - e4; // x0 >= 1; the number of digits to chop off of res
468
    // where the result rounded [at most] once is
469
    //   (-1)^p_sign * significand_res * 10^e4
470
 
471
    // avoid double rounding error
472
    is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
473
    is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
474
    is_midpoint_lt_even0 = is_midpoint_lt_even;
475
    is_midpoint_gt_even0 = is_midpoint_gt_even;
476
    is_inexact_lt_midpoint = 0;
477
    is_inexact_gt_midpoint = 0;
478
    is_midpoint_lt_even = 0;
479
    is_midpoint_gt_even = 0;
480
 
481
    if (x0 > ind) {
482
      // nothing is left of res when moving the decimal point left x0 digits
483
      is_inexact_lt_midpoint = 1;
484
      res.w[1] = p_sign | 0x0000000000000000ull;
485
      res.w[0] = 0x0000000000000000ull;
486
      e4 = expmin;
487
    } else if (x0 == ind) { // 1 <= x0 = ind <= p34 = 34
488
      // this is <, =, or > 1/2 ulp
489
      // compare the ind-digit value in the significand of res with
490
      // 1/2 ulp = 5*10^(ind-1), i.e. determine whether it is 
491
      // less than, equal to, or greater than 1/2 ulp (significand of res)
492
      R128.w[1] = res.w[1] & MASK_COEFF;
493
      R128.w[0] = res.w[0];
494
      if (ind <= 19) {
495
        if (R128.w[0] < midpoint64[ind - 1]) { // < 1/2 ulp
496
          lt_half_ulp = 1;
497
          is_inexact_lt_midpoint = 1;
498
        } else if (R128.w[0] == midpoint64[ind - 1]) { // = 1/2 ulp
499
          eq_half_ulp = 1;
500
          is_midpoint_gt_even = 1;
501
        } else { // > 1/2 ulp
502
          // gt_half_ulp = 1;
503
          is_inexact_gt_midpoint = 1;
504
        }
505
      } else { // if (ind <= 38) {
506
        if (R128.w[1] < midpoint128[ind - 20].w[1] ||
507
            (R128.w[1] == midpoint128[ind - 20].w[1] &&
508
            R128.w[0] < midpoint128[ind - 20].w[0])) { // < 1/2 ulp
509
          lt_half_ulp = 1;
510
          is_inexact_lt_midpoint = 1;
511
        } else if (R128.w[1] == midpoint128[ind - 20].w[1] &&
512
            R128.w[0] == midpoint128[ind - 20].w[0]) { // = 1/2 ulp
513
          eq_half_ulp = 1;
514
          is_midpoint_gt_even = 1;
515
        } else { // > 1/2 ulp
516
          // gt_half_ulp = 1;
517
          is_inexact_gt_midpoint = 1;
518
        }
519
      }
520
      if (lt_half_ulp || eq_half_ulp) {
521
        // res = +0.0 * 10^expmin
522
        res.w[1] = 0x0000000000000000ull;
523
        res.w[0] = 0x0000000000000000ull;
524
      } else { // if (gt_half_ulp)
525
        // res = +1 * 10^expmin
526
        res.w[1] = 0x0000000000000000ull;
527
        res.w[0] = 0x0000000000000001ull;
528
      }
529
      res.w[1] = p_sign | res.w[1];
530
      e4 = expmin;
531
    } else { // if (1 <= x0 <= ind - 1 <= 33)
532
      // round the ind-digit result to ind - x0 digits
533
 
534
      if (ind <= 18) { // 2 <= ind <= 18
535
        round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp,
536
                      &is_midpoint_lt_even, &is_midpoint_gt_even,
537
                      &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
538
        res.w[1] = 0x0;
539
        res.w[0] = R64;
540
      } else if (ind <= 38) {
541
        P128.w[1] = res.w[1] & MASK_COEFF;
542
        P128.w[0] = res.w[0];
543
        round128_19_38 (ind, x0, P128, &res, &incr_exp,
544
                        &is_midpoint_lt_even, &is_midpoint_gt_even,
545
                        &is_inexact_lt_midpoint,
546
                        &is_inexact_gt_midpoint);
547
      }
548
      e4 = e4 + x0; // expmin
549
      // we want the exponent to be expmin, so if incr_exp = 1 then
550
      // multiply the rounded result by 10 - it will still fit in 113 bits
551
      if (incr_exp) {
552
        // 64 x 128 -> 128
553
        P128.w[1] = res.w[1] & MASK_COEFF;
554
        P128.w[0] = res.w[0];
555
        __mul_64x128_to_128 (res, ten2k64[1], P128);
556
      }
557
      res.w[1] =
558
        p_sign | ((UINT64) (e4 + 6176) << 49) | (res.w[1] & MASK_COEFF);
559
      // avoid a double rounding error
560
      if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
561
          is_midpoint_lt_even) { // double rounding error upward
562
        // res = res - 1
563
        res.w[0]--;
564
        if (res.w[0] == 0xffffffffffffffffull)
565
          res.w[1]--;
566
        // Note: a double rounding error upward is not possible; for this
567
        // the result after the first rounding would have to be 99...95
568
        // (35 digits in all), possibly followed by a number of zeros; this
569
        // is not possible in Cases (2)-(6) or (15)-(17) which may get here
570
        is_midpoint_lt_even = 0;
571
        is_inexact_lt_midpoint = 1;
572
      } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
573
          is_midpoint_gt_even) { // double rounding error downward
574
        // res = res + 1
575
        res.w[0]++;
576
        if (res.w[0] == 0)
577
          res.w[1]++;
578
        is_midpoint_gt_even = 0;
579
        is_inexact_gt_midpoint = 1;
580
      } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
581
                 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
582
        // if this second rounding was exact the result may still be 
583
        // inexact because of the first rounding
584
        if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
585
          is_inexact_gt_midpoint = 1;
586
        }
587
        if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
588
          is_inexact_lt_midpoint = 1;
589
        }
590
      } else if (is_midpoint_gt_even &&
591
                 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
592
        // pulled up to a midpoint
593
        is_inexact_lt_midpoint = 1;
594
        is_inexact_gt_midpoint = 0;
595
        is_midpoint_lt_even = 0;
596
        is_midpoint_gt_even = 0;
597
      } else if (is_midpoint_lt_even &&
598
                 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
599
        // pulled down to a midpoint
600
        is_inexact_lt_midpoint = 0;
601
        is_inexact_gt_midpoint = 1;
602
        is_midpoint_lt_even = 0;
603
        is_midpoint_gt_even = 0;
604
      } else {
605
        ;
606
      }
607
    }
608
  }
609
  // res contains the correct result
610
  // apply correction if not rounding to nearest
611
  if (rnd_mode != ROUNDING_TO_NEAREST) {
612
    rounding_correction (rnd_mode,
613
                         is_inexact_lt_midpoint, is_inexact_gt_midpoint,
614
                         is_midpoint_lt_even, is_midpoint_gt_even,
615
                         e4, &res, ptrfpsf);
616
  }
617
  if (is_midpoint_lt_even || is_midpoint_gt_even ||
618
      is_inexact_lt_midpoint || is_inexact_gt_midpoint) {
619
    // set the inexact flag
620
    *ptrfpsf |= INEXACT_EXCEPTION;
621
    if (is_tiny)
622
      *ptrfpsf |= UNDERFLOW_EXCEPTION;
623
  }
624
 
625
  *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
626
  *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
627
  *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
628
  *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
629
  *ptrres = res;
630
  return;
631
}
632
 
633
 
634
#if DECIMAL_CALL_BY_REFERENCE
635
static void
636
bid128_ext_fma (int *ptr_is_midpoint_lt_even,
637
                int *ptr_is_midpoint_gt_even,
638
                int *ptr_is_inexact_lt_midpoint,
639
                int *ptr_is_inexact_gt_midpoint, UINT128 * pres,
640
                UINT128 * px, UINT128 * py,
641
                UINT128 *
642
                pz _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
643
                _EXC_INFO_PARAM) {
644
  UINT128 x = *px, y = *py, z = *pz;
645
#if !DECIMAL_GLOBAL_ROUNDING
646
  unsigned int rnd_mode = *prnd_mode;
647
#endif
648
#else
649
static UINT128
650
bid128_ext_fma (int *ptr_is_midpoint_lt_even,
651
                int *ptr_is_midpoint_gt_even,
652
                int *ptr_is_inexact_lt_midpoint,
653
                int *ptr_is_inexact_gt_midpoint, UINT128 x, UINT128 y,
654
                UINT128 z _RND_MODE_PARAM _EXC_FLAGS_PARAM
655
                _EXC_MASKS_PARAM _EXC_INFO_PARAM) {
656
#endif
657
 
658
  UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
659
  UINT64 x_sign, y_sign, z_sign, p_sign, tmp_sign;
660
  UINT64 x_exp = 0, y_exp = 0, z_exp = 0, p_exp;
661
  int true_p_exp;
662
  UINT128 C1, C2, C3;
663
  UINT256 C4;
664
  int q1 = 0, q2 = 0, q3 = 0, q4;
665
  int e1, e2, e3, e4;
666
  int scale, ind, delta, x0;
667
  int p34 = P34; // used to modify the limit on the number of digits
668
  BID_UI64DOUBLE tmp;
669
  int x_nr_bits, y_nr_bits, z_nr_bits;
670
  unsigned int save_fpsf;
671
  int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0;
672
  int is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
673
  int is_midpoint_lt_even0 = 0, is_midpoint_gt_even0 = 0;
674
  int is_inexact_lt_midpoint0 = 0, is_inexact_gt_midpoint0 = 0;
675
  int incr_exp = 0;
676
  int lsb;
677
  int lt_half_ulp = 0;
678
  int eq_half_ulp = 0;
679
  int gt_half_ulp = 0;
680
  int is_tiny = 0;
681
  UINT64 R64, tmp64;
682
  UINT128 P128, R128;
683
  UINT192 P192, R192;
684
  UINT256 R256;
685
 
686
  // the following are based on the table of special cases for fma; the NaN
687
  // behavior is similar to that of the IA-64 Architecture fma 
688
 
689
  // identify cases where at least one operand is NaN
690
 
691
  BID_SWAP128 (x);
692
  BID_SWAP128 (y);
693
  BID_SWAP128 (z);
694
  if ((y.w[1] & MASK_NAN) == MASK_NAN) { // y is NAN
695
    // if x = {0, f, inf, NaN}, y = NaN, z = {0, f, inf, NaN} then res = Q (y)
696
    // check first for non-canonical NaN payload
697
    if (((y.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
698
        (((y.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
699
         (y.w[0] > 0x38c15b09ffffffffull))) {
700
      y.w[1] = y.w[1] & 0xffffc00000000000ull;
701
      y.w[0] = 0x0ull;
702
    }
703
    if ((y.w[1] & MASK_SNAN) == MASK_SNAN) { // y is SNAN
704
      // set invalid flag
705
      *pfpsf |= INVALID_EXCEPTION;
706
      // return quiet (y)
707
      res.w[1] = y.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
708
      res.w[0] = y.w[0];
709
    } else { // y is QNaN
710
      // return y
711
      res.w[1] = y.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
712
      res.w[0] = y.w[0];
713
      // if z = SNaN or x = SNaN signal invalid exception
714
      if ((z.w[1] & MASK_SNAN) == MASK_SNAN ||
715
          (x.w[1] & MASK_SNAN) == MASK_SNAN) {
716
        // set invalid flag
717
        *pfpsf |= INVALID_EXCEPTION;
718
      }
719
    }
720
    *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
721
    *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
722
    *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
723
    *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
724
    BID_SWAP128 (res);
725
    BID_RETURN (res)
726
  } else if ((z.w[1] & MASK_NAN) == MASK_NAN) { // z is NAN
727
    // if x = {0, f, inf, NaN}, y = {0, f, inf}, z = NaN then res = Q (z)
728
    // check first for non-canonical NaN payload
729
    if (((z.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
730
        (((z.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
731
         (z.w[0] > 0x38c15b09ffffffffull))) {
732
      z.w[1] = z.w[1] & 0xffffc00000000000ull;
733
      z.w[0] = 0x0ull;
734
    }
735
    if ((z.w[1] & MASK_SNAN) == MASK_SNAN) { // z is SNAN 
736
      // set invalid flag 
737
      *pfpsf |= INVALID_EXCEPTION;
738
      // return quiet (z) 
739
      res.w[1] = z.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
740
      res.w[0] = z.w[0];
741
    } else { // z is QNaN 
742
      // return z  
743
      res.w[1] = z.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
744
      res.w[0] = z.w[0];
745
      // if x = SNaN signal invalid exception
746
      if ((x.w[1] & MASK_SNAN) == MASK_SNAN) {
747
        // set invalid flag
748
        *pfpsf |= INVALID_EXCEPTION;
749
      }
750
    }
751
    *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
752
    *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
753
    *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
754
    *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
755
    BID_SWAP128 (res);
756
    BID_RETURN (res)
757
  } else if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN
758
    // if x = NaN, y = {0, f, inf}, z = {0, f, inf} then res = Q (x)
759
    // check first for non-canonical NaN payload
760
    if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
761
        (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
762
         (x.w[0] > 0x38c15b09ffffffffull))) {
763
      x.w[1] = x.w[1] & 0xffffc00000000000ull;
764
      x.w[0] = 0x0ull;
765
    }
766
    if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN 
767
      // set invalid flag 
768
      *pfpsf |= INVALID_EXCEPTION;
769
      // return quiet (x) 
770
      res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
771
      res.w[0] = x.w[0];
772
    } else { // x is QNaN 
773
      // return x  
774
      res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
775
      res.w[0] = x.w[0];
776
    }
777
    *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
778
    *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
779
    *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
780
    *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
781
    BID_SWAP128 (res);
782
    BID_RETURN (res)
783
  }
784
  // x, y, z are 0, f, or inf but not NaN => unpack the arguments and check
785
  // for non-canonical values
786
 
787
  x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
788
  C1.w[1] = x.w[1] & MASK_COEFF;
789
  C1.w[0] = x.w[0];
790
  if ((x.w[1] & MASK_ANY_INF) != MASK_INF) { // x != inf
791
    // if x is not infinity check for non-canonical values - treated as zero
792
    if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11
793
      // non-canonical
794
      x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
795
      C1.w[1] = 0; // significand high
796
      C1.w[0] = 0; // significand low
797
    } else { // G0_G1 != 11
798
      x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits
799
      if (C1.w[1] > 0x0001ed09bead87c0ull ||
800
          (C1.w[1] == 0x0001ed09bead87c0ull &&
801
           C1.w[0] > 0x378d8e63ffffffffull)) {
802
        // x is non-canonical if coefficient is larger than 10^34 -1
803
        C1.w[1] = 0;
804
        C1.w[0] = 0;
805
      } else { // canonical          
806
        ;
807
      }
808
    }
809
  }
810
  y_sign = y.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
811
  C2.w[1] = y.w[1] & MASK_COEFF;
812
  C2.w[0] = y.w[0];
813
  if ((y.w[1] & MASK_ANY_INF) != MASK_INF) { // y != inf
814
    // if y is not infinity check for non-canonical values - treated as zero
815
    if ((y.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11
816
      // non-canonical
817
      y_exp = (y.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
818
      C2.w[1] = 0; // significand high
819
      C2.w[0] = 0; // significand low 
820
    } else { // G0_G1 != 11
821
      y_exp = y.w[1] & MASK_EXP; // biased and shifted left 49 bits
822
      if (C2.w[1] > 0x0001ed09bead87c0ull ||
823
          (C2.w[1] == 0x0001ed09bead87c0ull &&
824
           C2.w[0] > 0x378d8e63ffffffffull)) {
825
        // y is non-canonical if coefficient is larger than 10^34 -1
826
        C2.w[1] = 0;
827
        C2.w[0] = 0;
828
      } else { // canonical
829
        ;
830
      }
831
    }
832
  }
833
  z_sign = z.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
834
  C3.w[1] = z.w[1] & MASK_COEFF;
835
  C3.w[0] = z.w[0];
836
  if ((z.w[1] & MASK_ANY_INF) != MASK_INF) { // z != inf
837
    // if z is not infinity check for non-canonical values - treated as zero
838
    if ((z.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11
839
      // non-canonical
840
      z_exp = (z.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
841
      C3.w[1] = 0; // significand high
842
      C3.w[0] = 0; // significand low 
843
    } else { // G0_G1 != 11
844
      z_exp = z.w[1] & MASK_EXP; // biased and shifted left 49 bits
845
      if (C3.w[1] > 0x0001ed09bead87c0ull ||
846
          (C3.w[1] == 0x0001ed09bead87c0ull &&
847
           C3.w[0] > 0x378d8e63ffffffffull)) {
848
        // z is non-canonical if coefficient is larger than 10^34 -1
849
        C3.w[1] = 0;
850
        C3.w[0] = 0;
851
      } else { // canonical
852
        ;
853
      }
854
    }
855
  }
856
 
857
  p_sign = x_sign ^ y_sign; // sign of the product
858
 
859
  // identify cases where at least one operand is infinity
860
 
861
  if ((x.w[1] & MASK_ANY_INF) == MASK_INF) { // x = inf
862
    if ((y.w[1] & MASK_ANY_INF) == MASK_INF) { // y = inf
863
      if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf
864
        if (p_sign == z_sign) {
865
          res.w[1] = z_sign | MASK_INF;
866
          res.w[0] = 0x0;
867
        } else {
868
          // return QNaN Indefinite
869
          res.w[1] = 0x7c00000000000000ull;
870
          res.w[0] = 0x0000000000000000ull;
871
          // set invalid flag
872
          *pfpsf |= INVALID_EXCEPTION;
873
        }
874
      } else { // z = 0 or z = f
875
        res.w[1] = p_sign | MASK_INF;
876
        res.w[0] = 0x0;
877
      }
878
    } else if (C2.w[1] != 0 || C2.w[0] != 0) { // y = f
879
      if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf
880
        if (p_sign == z_sign) {
881
          res.w[1] = z_sign | MASK_INF;
882
          res.w[0] = 0x0;
883
        } else {
884
          // return QNaN Indefinite 
885
          res.w[1] = 0x7c00000000000000ull;
886
          res.w[0] = 0x0000000000000000ull;
887
          // set invalid flag
888
          *pfpsf |= INVALID_EXCEPTION;
889
        }
890
      } else { // z = 0 or z = f
891
        res.w[1] = p_sign | MASK_INF;
892
        res.w[0] = 0x0;
893
      }
894
    } else { // y = 0
895
      // return QNaN Indefinite
896
      res.w[1] = 0x7c00000000000000ull;
897
      res.w[0] = 0x0000000000000000ull;
898
      // set invalid flag
899
      *pfpsf |= INVALID_EXCEPTION;
900
    }
901
    *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
902
    *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
903
    *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
904
    *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
905
    BID_SWAP128 (res);
906
    BID_RETURN (res)
907
  } else if ((y.w[1] & MASK_ANY_INF) == MASK_INF) { // y = inf
908
    if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf
909
      // x = f, necessarily
910
      if ((p_sign != z_sign)
911
          || (C1.w[1] == 0x0ull && C1.w[0] == 0x0ull)) {
912
        // return QNaN Indefinite
913
        res.w[1] = 0x7c00000000000000ull;
914
        res.w[0] = 0x0000000000000000ull;
915
        // set invalid flag
916
        *pfpsf |= INVALID_EXCEPTION;
917
      } else {
918
        res.w[1] = z_sign | MASK_INF;
919
        res.w[0] = 0x0;
920
      }
921
    } else if (C1.w[1] == 0x0 && C1.w[0] == 0x0) { // x = 0
922
      // z = 0, f, inf
923
      // return QNaN Indefinite
924
      res.w[1] = 0x7c00000000000000ull;
925
      res.w[0] = 0x0000000000000000ull;
926
      // set invalid flag
927
      *pfpsf |= INVALID_EXCEPTION;
928
    } else {
929
      // x = f and z = 0, f, necessarily
930
      res.w[1] = p_sign | MASK_INF;
931
      res.w[0] = 0x0;
932
    }
933
    *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
934
    *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
935
    *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
936
    *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
937
    BID_SWAP128 (res);
938
    BID_RETURN (res)
939
  } else if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf
940
    // x = 0, f and y = 0, f, necessarily
941
    res.w[1] = z_sign | MASK_INF;
942
    res.w[0] = 0x0;
943
    *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
944
    *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
945
    *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
946
    *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
947
    BID_SWAP128 (res);
948
    BID_RETURN (res)
949
  }
950
 
951
  true_p_exp = (x_exp >> 49) - 6176 + (y_exp >> 49) - 6176;
952
  if (true_p_exp < -6176)
953
    p_exp = 0; // cannot be less than EXP_MIN
954
  else
955
    p_exp = (UINT64) (true_p_exp + 6176) << 49;
956
 
957
  if (((C1.w[1] == 0x0 && C1.w[0] == 0x0) || (C2.w[1] == 0x0 && C2.w[0] == 0x0)) && C3.w[1] == 0x0 && C3.w[0] == 0x0) { // (x = 0 or y = 0) and z = 0
958
    // the result is 0
959
    if (p_exp < z_exp)
960
      res.w[1] = p_exp; // preferred exponent
961
    else
962
      res.w[1] = z_exp; // preferred exponent
963
    if (p_sign == z_sign) {
964
      res.w[1] |= z_sign;
965
      res.w[0] = 0x0;
966
    } else { // x * y and z have opposite signs
967
      if (rnd_mode == ROUNDING_DOWN) {
968
        // res = -0.0
969
        res.w[1] |= MASK_SIGN;
970
        res.w[0] = 0x0;
971
      } else {
972
        // res = +0.0
973
        // res.w[1] |= 0x0;
974
        res.w[0] = 0x0;
975
      }
976
    }
977
    *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
978
    *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
979
    *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
980
    *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
981
    BID_SWAP128 (res);
982
    BID_RETURN (res)
983
  }
984
  // from this point on, we may need to know the number of decimal digits
985
  // in the significands of x, y, z when x, y, z != 0
986
 
987
  if (C1.w[1] != 0 || C1.w[0] != 0) { // x = f (non-zero finite)
988
    // q1 = nr. of decimal digits in x
989
    // determine first the nr. of bits in x
990
    if (C1.w[1] == 0) {
991
      if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53
992
        // split the 64-bit value in two 32-bit halves to avoid rounding errors
993
        if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32
994
          tmp.d = (double) (C1.w[0] >> 32); // exact conversion
995
          x_nr_bits =
996
            33 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
997
        } else { // x < 2^32
998
          tmp.d = (double) (C1.w[0]); // exact conversion
999
          x_nr_bits =
1000
            1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1001
        }
1002
      } else { // if x < 2^53
1003
        tmp.d = (double) C1.w[0]; // exact conversion
1004
        x_nr_bits =
1005
          1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1006
      }
1007
    } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
1008
      tmp.d = (double) C1.w[1]; // exact conversion
1009
      x_nr_bits =
1010
        65 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1011
    }
1012
    q1 = nr_digits[x_nr_bits - 1].digits;
1013
    if (q1 == 0) {
1014
      q1 = nr_digits[x_nr_bits - 1].digits1;
1015
      if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi ||
1016
          (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi &&
1017
           C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo))
1018
        q1++;
1019
    }
1020
  }
1021
 
1022
  if (C2.w[1] != 0 || C2.w[0] != 0) { // y = f (non-zero finite)
1023
    if (C2.w[1] == 0) {
1024
      if (C2.w[0] >= 0x0020000000000000ull) { // y >= 2^53
1025
        // split the 64-bit value in two 32-bit halves to avoid rounding errors
1026
        if (C2.w[0] >= 0x0000000100000000ull) { // y >= 2^32
1027
          tmp.d = (double) (C2.w[0] >> 32); // exact conversion
1028
          y_nr_bits =
1029
            32 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1030
        } else { // y < 2^32
1031
          tmp.d = (double) C2.w[0]; // exact conversion
1032
          y_nr_bits =
1033
            ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1034
        }
1035
      } else { // if y < 2^53
1036
        tmp.d = (double) C2.w[0]; // exact conversion
1037
        y_nr_bits =
1038
          ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1039
      }
1040
    } else { // C2.w[1] != 0 => nr. bits = 64 + nr_bits (C2.w[1])
1041
      tmp.d = (double) C2.w[1]; // exact conversion
1042
      y_nr_bits =
1043
        64 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1044
    }
1045
 
1046
    q2 = nr_digits[y_nr_bits].digits;
1047
    if (q2 == 0) {
1048
      q2 = nr_digits[y_nr_bits].digits1;
1049
      if (C2.w[1] > nr_digits[y_nr_bits].threshold_hi ||
1050
          (C2.w[1] == nr_digits[y_nr_bits].threshold_hi &&
1051
           C2.w[0] >= nr_digits[y_nr_bits].threshold_lo))
1052
        q2++;
1053
    }
1054
  }
1055
 
1056
  if (C3.w[1] != 0 || C3.w[0] != 0) { // z = f (non-zero finite)
1057
    if (C3.w[1] == 0) {
1058
      if (C3.w[0] >= 0x0020000000000000ull) { // z >= 2^53
1059
        // split the 64-bit value in two 32-bit halves to avoid rounding errors
1060
        if (C3.w[0] >= 0x0000000100000000ull) { // z >= 2^32
1061
          tmp.d = (double) (C3.w[0] >> 32); // exact conversion
1062
          z_nr_bits =
1063
            32 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1064
        } else { // z < 2^32
1065
          tmp.d = (double) C3.w[0]; // exact conversion
1066
          z_nr_bits =
1067
            ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1068
        }
1069
      } else { // if z < 2^53
1070
        tmp.d = (double) C3.w[0]; // exact conversion
1071
        z_nr_bits =
1072
          ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1073
      }
1074
    } else { // C3.w[1] != 0 => nr. bits = 64 + nr_bits (C3.w[1])
1075
      tmp.d = (double) C3.w[1]; // exact conversion
1076
      z_nr_bits =
1077
        64 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1078
    }
1079
 
1080
    q3 = nr_digits[z_nr_bits].digits;
1081
    if (q3 == 0) {
1082
      q3 = nr_digits[z_nr_bits].digits1;
1083
      if (C3.w[1] > nr_digits[z_nr_bits].threshold_hi ||
1084
          (C3.w[1] == nr_digits[z_nr_bits].threshold_hi &&
1085
           C3.w[0] >= nr_digits[z_nr_bits].threshold_lo))
1086
        q3++;
1087
    }
1088
  }
1089
 
1090
  if ((C1.w[1] == 0x0 && C1.w[0] == 0x0) ||
1091
      (C2.w[1] == 0x0 && C2.w[0] == 0x0)) {
1092
    // x = 0 or y = 0
1093
    // z = f, necessarily; for 0 + z return z, with the preferred exponent
1094
    // the result is z, but need to get the preferred exponent
1095
    if (z_exp <= p_exp) { // the preferred exponent is z_exp
1096
      res.w[1] = z_sign | (z_exp & MASK_EXP) | C3.w[1];
1097
      res.w[0] = C3.w[0];
1098
    } else { // if (p_exp < z_exp) the preferred exponent is p_exp
1099
      // return (C3 * 10^scale) * 10^(z_exp - scale)
1100
      // where scale = min (p34-q3, (z_exp-p_exp) >> 49)
1101
      scale = p34 - q3;
1102
      ind = (z_exp - p_exp) >> 49;
1103
      if (ind < scale)
1104
        scale = ind;
1105
      if (scale == 0) {
1106
        res.w[1] = z.w[1]; // & MASK_COEFF, which is redundant
1107
        res.w[0] = z.w[0];
1108
      } else if (q3 <= 19) { // z fits in 64 bits 
1109
        if (scale <= 19) { // 10^scale fits in 64 bits
1110
          // 64 x 64 C3.w[0] * ten2k64[scale]
1111
          __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
1112
        } else { // 10^scale fits in 128 bits
1113
          // 64 x 128 C3.w[0] * ten2k128[scale - 20]
1114
          __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
1115
        }
1116
      } else { // z fits in 128 bits, but 10^scale must fit in 64 bits 
1117
        // 64 x 128 ten2k64[scale] * C3
1118
        __mul_128x64_to_128 (res, ten2k64[scale], C3);
1119
      }
1120
      // subtract scale from the exponent
1121
      z_exp = z_exp - ((UINT64) scale << 49);
1122
      res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
1123
    }
1124
    *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
1125
    *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
1126
    *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
1127
    *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
1128
    BID_SWAP128 (res);
1129
    BID_RETURN (res)
1130
  } else {
1131
    ; // continue with x = f, y = f, z = 0 or x = f, y = f, z = f
1132
  }
1133
 
1134
  e1 = (x_exp >> 49) - 6176; // unbiased exponent of x 
1135
  e2 = (y_exp >> 49) - 6176; // unbiased exponent of y 
1136
  e3 = (z_exp >> 49) - 6176; // unbiased exponent of z
1137
  e4 = e1 + e2; // unbiased exponent of the exact x * y
1138
 
1139
  // calculate C1 * C2 and its number of decimal digits, q4
1140
 
1141
  // the exact product has either q1 + q2 - 1 or q1 + q2 decimal digits
1142
  // where 2 <= q1 + q2 <= 68
1143
  // calculate C4 = C1 * C2 and determine q
1144
  C4.w[3] = C4.w[2] = C4.w[1] = C4.w[0] = 0;
1145
  if (q1 + q2 <= 19) { // if 2 <= q1 + q2 <= 19, C4 = C1 * C2 fits in 64 bits
1146
    C4.w[0] = C1.w[0] * C2.w[0];
1147
    // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
1148
    if (C4.w[0] < ten2k64[q1 + q2 - 1])
1149
      q4 = q1 + q2 - 1; // q4 in [1, 18]
1150
    else
1151
      q4 = q1 + q2; // q4 in [2, 19]
1152
    // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
1153
  } else if (q1 + q2 == 20) { // C4 = C1 * C2 fits in 64 or 128 bits
1154
    // q1 <= 19 and q2 <= 19 so both C1 and C2 fit in 64 bits
1155
    __mul_64x64_to_128MACH (C4, C1.w[0], C2.w[0]);
1156
    // if C4 < 10^(q1+q2-1) = 10^19 then q4 = q1+q2-1 = 19 else q4 = q1+q2 = 20
1157
    if (C4.w[1] == 0 && C4.w[0] < ten2k64[19]) { // 19 = q1+q2-1
1158
      // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
1159
      q4 = 19; // 19 = q1 + q2 - 1
1160
    } else {
1161
      // if (C4.w[1] == 0)
1162
      //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
1163
      // else
1164
      //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1165
      q4 = 20; // 20 = q1 + q2
1166
    }
1167
  } else if (q1 + q2 <= 38) { // 21 <= q1 + q2 <= 38
1168
    // C4 = C1 * C2 fits in 64 or 128 bits
1169
    // (64 bits possibly, but only when q1 + q2 = 21 and C4 has 20 digits)
1170
    // at least one of C1, C2 has at most 19 decimal digits & fits in 64 bits
1171
    if (q1 <= 19) {
1172
      __mul_128x64_to_128 (C4, C1.w[0], C2);
1173
    } else { // q2 <= 19
1174
      __mul_128x64_to_128 (C4, C2.w[0], C1);
1175
    }
1176
    // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
1177
    if (C4.w[1] < ten2k128[q1 + q2 - 21].w[1] ||
1178
        (C4.w[1] == ten2k128[q1 + q2 - 21].w[1] &&
1179
         C4.w[0] < ten2k128[q1 + q2 - 21].w[0])) {
1180
      // if (C4.w[1] == 0) // q4 = 20, necessarily
1181
      //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
1182
      // else
1183
      //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1184
      q4 = q1 + q2 - 1; // q4 in [20, 37]
1185
    } else {
1186
      // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1187
      q4 = q1 + q2; // q4 in [21, 38]
1188
    }
1189
  } else if (q1 + q2 == 39) { // C4 = C1 * C2 fits in 128 or 192 bits
1190
    // both C1 and C2 fit in 128 bits (actually in 113 bits)
1191
    // may replace this by 128x128_to192
1192
    __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] is 0
1193
    // if C4 < 10^(q1+q2-1) = 10^38 then q4 = q1+q2-1 = 38 else q4 = q1+q2 = 39
1194
    if (C4.w[2] == 0 && (C4.w[1] < ten2k128[18].w[1] ||
1195
                         (C4.w[1] == ten2k128[18].w[1]
1196
                          && C4.w[0] < ten2k128[18].w[0]))) {
1197
      // 18 = 38 - 20 = q1+q2-1 - 20
1198
      // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1199
      q4 = 38; // 38 = q1 + q2 - 1
1200
    } else {
1201
      // if (C4.w[2] == 0)
1202
      // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1203
      // else
1204
      //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1205
      q4 = 39; // 39 = q1 + q2
1206
    }
1207
  } else if (q1 + q2 <= 57) { // 40 <= q1 + q2 <= 57
1208
    // C4 = C1 * C2 fits in 128 or 192 bits
1209
    // (128 bits possibly, but only when q1 + q2 = 40 and C4 has 39 digits)
1210
    // both C1 and C2 fit in 128 bits (actually in 113 bits); at most one
1211
    // may fit in 64 bits
1212
    if (C1.w[1] == 0) { // C1 fits in 64 bits
1213
      // __mul_64x128_full (REShi64, RESlo128, A64, B128)
1214
      __mul_64x128_full (C4.w[2], C4, C1.w[0], C2);
1215
    } else if (C2.w[1] == 0) { // C2 fits in 64 bits
1216
      // __mul_64x128_full (REShi64, RESlo128, A64, B128)
1217
      __mul_64x128_full (C4.w[2], C4, C2.w[0], C1);
1218
    } else { // both C1 and C2 require 128 bits
1219
      // may use __mul_128x128_to_192 (C4.w[2], C4.w[0], C2.w[0], C1);
1220
      __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] = 0
1221
    }
1222
    // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
1223
    if (C4.w[2] < ten2k256[q1 + q2 - 40].w[2] ||
1224
        (C4.w[2] == ten2k256[q1 + q2 - 40].w[2] &&
1225
         (C4.w[1] < ten2k256[q1 + q2 - 40].w[1] ||
1226
          (C4.w[1] == ten2k256[q1 + q2 - 40].w[1] &&
1227
           C4.w[0] < ten2k256[q1 + q2 - 40].w[0])))) {
1228
      // if (C4.w[2] == 0) // q4 = 39, necessarily
1229
      //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1230
      // else
1231
      //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1232
      q4 = q1 + q2 - 1; // q4 in [39, 56]
1233
    } else {
1234
      // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1235
      q4 = q1 + q2; // q4 in [40, 57]
1236
    }
1237
  } else if (q1 + q2 == 58) { // C4 = C1 * C2 fits in 192 or 256 bits
1238
    // both C1 and C2 fit in 128 bits (actually in 113 bits); at most one
1239
    // may fit in 64 bits
1240
    if (C1.w[1] == 0) { // C1 * C2 will fit in 192 bits
1241
      __mul_64x128_full (C4.w[2], C4, C1.w[0], C2); // may use 64x128_to_192
1242
    } else if (C2.w[1] == 0) { // C1 * C2 will fit in 192 bits
1243
      __mul_64x128_full (C4.w[2], C4, C2.w[0], C1); // may use 64x128_to_192
1244
    } else { // C1 * C2 will fit in 192 bits or in 256 bits
1245
      __mul_128x128_to_256 (C4, C1, C2);
1246
    }
1247
    // if C4 < 10^(q1+q2-1) = 10^57 then q4 = q1+q2-1 = 57 else q4 = q1+q2 = 58
1248
    if (C4.w[3] == 0 && (C4.w[2] < ten2k256[18].w[2] ||
1249
                         (C4.w[2] == ten2k256[18].w[2]
1250
                          && (C4.w[1] < ten2k256[18].w[1]
1251
                              || (C4.w[1] == ten2k256[18].w[1]
1252
                                  && C4.w[0] < ten2k256[18].w[0]))))) {
1253
      // 18 = 57 - 39 = q1+q2-1 - 39
1254
      // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1255
      q4 = 57; // 57 = q1 + q2 - 1
1256
    } else {
1257
      // if (C4.w[3] == 0)
1258
      //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1259
      // else
1260
      //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 256;
1261
      q4 = 58; // 58 = q1 + q2
1262
    }
1263
  } else { // if 59 <= q1 + q2 <= 68
1264
    // C4 = C1 * C2 fits in 192 or 256 bits
1265
    // (192 bits possibly, but only when q1 + q2 = 59 and C4 has 58 digits)
1266
    // both C1 and C2 fit in 128 bits (actually in 113 bits); none fits in
1267
    // 64 bits
1268
    // may use __mul_128x128_to_192 (C4.w[2], C4.w[0], C2.w[0], C1);
1269
    __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] = 0
1270
    // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
1271
    if (C4.w[3] < ten2k256[q1 + q2 - 40].w[3] ||
1272
        (C4.w[3] == ten2k256[q1 + q2 - 40].w[3] &&
1273
         (C4.w[2] < ten2k256[q1 + q2 - 40].w[2] ||
1274
          (C4.w[2] == ten2k256[q1 + q2 - 40].w[2] &&
1275
           (C4.w[1] < ten2k256[q1 + q2 - 40].w[1] ||
1276
            (C4.w[1] == ten2k256[q1 + q2 - 40].w[1] &&
1277
             C4.w[0] < ten2k256[q1 + q2 - 40].w[0])))))) {
1278
      // if (C4.w[3] == 0) // q4 = 58, necessarily
1279
      //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1280
      // else
1281
      //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 256;
1282
      q4 = q1 + q2 - 1; // q4 in [58, 67]
1283
    } else {
1284
      // length of C1 * C2 rounded up to a multiple of 64 bits is len = 256;
1285
      q4 = q1 + q2; // q4 in [59, 68]
1286
    }
1287
  }
1288
 
1289
  if (C3.w[1] == 0x0 && C3.w[0] == 0x0) { // x = f, y = f, z = 0
1290
    save_fpsf = *pfpsf; // sticky bits - caller value must be preserved
1291
    *pfpsf = 0;
1292
 
1293
    if (q4 > p34) {
1294
 
1295
      // truncate C4 to p34 digits into res
1296
      // x = q4-p34, 1 <= x <= 34 because 35 <= q4 <= 68
1297
      x0 = q4 - p34;
1298
      if (q4 <= 38) {
1299
        P128.w[1] = C4.w[1];
1300
        P128.w[0] = C4.w[0];
1301
        round128_19_38 (q4, x0, P128, &res, &incr_exp,
1302
                        &is_midpoint_lt_even, &is_midpoint_gt_even,
1303
                        &is_inexact_lt_midpoint,
1304
                        &is_inexact_gt_midpoint);
1305
      } else if (q4 <= 57) { // 35 <= q4 <= 57
1306
        P192.w[2] = C4.w[2];
1307
        P192.w[1] = C4.w[1];
1308
        P192.w[0] = C4.w[0];
1309
        round192_39_57 (q4, x0, P192, &R192, &incr_exp,
1310
                        &is_midpoint_lt_even, &is_midpoint_gt_even,
1311
                        &is_inexact_lt_midpoint,
1312
                        &is_inexact_gt_midpoint);
1313
        res.w[0] = R192.w[0];
1314
        res.w[1] = R192.w[1];
1315
      } else { // if (q4 <= 68)
1316
        round256_58_76 (q4, x0, C4, &R256, &incr_exp,
1317
                        &is_midpoint_lt_even, &is_midpoint_gt_even,
1318
                        &is_inexact_lt_midpoint,
1319
                        &is_inexact_gt_midpoint);
1320
        res.w[0] = R256.w[0];
1321
        res.w[1] = R256.w[1];
1322
      }
1323
      e4 = e4 + x0;
1324
      if (incr_exp) {
1325
        e4 = e4 + 1;
1326
      }
1327
      q4 = p34;
1328
      // res is now the coefficient of the result rounded to the destination 
1329
      // precision, with unbounded exponent; the exponent is e4; q4=digits(res)
1330
    } else { // if (q4 <= p34)
1331
      // C4 * 10^e4 is the result rounded to the destination precision, with  
1332
      // unbounded exponent (which is exact)
1333
 
1334
      if ((q4 + e4 <= p34 + expmax) && (e4 > expmax)) {
1335
        // e4 is too large, but can be brought within range by scaling up C4
1336
        scale = e4 - expmax; // 1 <= scale < P-q4 <= P-1 => 1 <= scale <= P-2
1337
        // res = (C4 * 10^scale) * 10^expmax
1338
        if (q4 <= 19) { // C4 fits in 64 bits
1339
          if (scale <= 19) { // 10^scale fits in 64 bits
1340
            // 64 x 64 C4.w[0] * ten2k64[scale]
1341
            __mul_64x64_to_128MACH (res, C4.w[0], ten2k64[scale]);
1342
          } else { // 10^scale fits in 128 bits
1343
            // 64 x 128 C4.w[0] * ten2k128[scale - 20]
1344
            __mul_128x64_to_128 (res, C4.w[0], ten2k128[scale - 20]);
1345
          }
1346
        } else { // C4 fits in 128 bits, but 10^scale must fit in 64 bits
1347
          // 64 x 128 ten2k64[scale] * CC43
1348
          __mul_128x64_to_128 (res, ten2k64[scale], C4);
1349
        }
1350
        e4 = e4 - scale; // expmax
1351
        q4 = q4 + scale;
1352
      } else {
1353
        res.w[1] = C4.w[1];
1354
        res.w[0] = C4.w[0];
1355
      }
1356
      // res is the coefficient of the result rounded to the destination 
1357
      // precision, with unbounded exponent (it has q4 digits); the exponent 
1358
      // is e4 (exact result)
1359
    }
1360
 
1361
    // check for overflow
1362
    if (q4 + e4 > p34 + expmax) {
1363
      if (rnd_mode == ROUNDING_TO_NEAREST) {
1364
        res.w[1] = p_sign | 0x7800000000000000ull; // +/-inf
1365
        res.w[0] = 0x0000000000000000ull;
1366
        *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
1367
      } else {
1368
        res.w[1] = p_sign | res.w[1];
1369
        rounding_correction (rnd_mode,
1370
                             is_inexact_lt_midpoint,
1371
                             is_inexact_gt_midpoint,
1372
                             is_midpoint_lt_even, is_midpoint_gt_even,
1373
                             e4, &res, pfpsf);
1374
      }
1375
      *pfpsf |= save_fpsf;
1376
      *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
1377
      *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
1378
      *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
1379
      *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
1380
      BID_SWAP128 (res);
1381
      BID_RETURN (res)
1382
    }
1383
    // check for underflow
1384
    if (q4 + e4 < expmin + P34) {
1385
      is_tiny = 1; // the result is tiny
1386
      if (e4 < expmin) {
1387
        // if e4 < expmin, we must truncate more of res
1388
        x0 = expmin - e4; // x0 >= 1
1389
        is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
1390
        is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
1391
        is_midpoint_lt_even0 = is_midpoint_lt_even;
1392
        is_midpoint_gt_even0 = is_midpoint_gt_even;
1393
        is_inexact_lt_midpoint = 0;
1394
        is_inexact_gt_midpoint = 0;
1395
        is_midpoint_lt_even = 0;
1396
        is_midpoint_gt_even = 0;
1397
        // the number of decimal digits in res is q4
1398
        if (x0 < q4) { // 1 <= x0 <= q4-1 => round res to q4 - x0 digits
1399
          if (q4 <= 18) { // 2 <= q4 <= 18, 1 <= x0 <= 17
1400
            round64_2_18 (q4, x0, res.w[0], &R64, &incr_exp,
1401
                          &is_midpoint_lt_even, &is_midpoint_gt_even,
1402
                          &is_inexact_lt_midpoint,
1403
                          &is_inexact_gt_midpoint);
1404
            if (incr_exp) {
1405
              // R64 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 17
1406
              R64 = ten2k64[q4 - x0];
1407
            }
1408
            // res.w[1] = 0; (from above)
1409
            res.w[0] = R64;
1410
          } else { // if (q4 <= 34)
1411
            // 19 <= q4 <= 38
1412
            P128.w[1] = res.w[1];
1413
            P128.w[0] = res.w[0];
1414
            round128_19_38 (q4, x0, P128, &res, &incr_exp,
1415
                            &is_midpoint_lt_even, &is_midpoint_gt_even,
1416
                            &is_inexact_lt_midpoint,
1417
                            &is_inexact_gt_midpoint);
1418
            if (incr_exp) {
1419
              // increase coefficient by a factor of 10; this will be <= 10^33
1420
              // R128 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 37
1421
              if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19
1422
                // res.w[1] = 0;
1423
                res.w[0] = ten2k64[q4 - x0];
1424
              } else { // 20 <= q4 - x0 <= 37
1425
                res.w[0] = ten2k128[q4 - x0 - 20].w[0];
1426
                res.w[1] = ten2k128[q4 - x0 - 20].w[1];
1427
              }
1428
            }
1429
          }
1430
          e4 = e4 + x0; // expmin 
1431
        } else if (x0 == q4) {
1432
          // the second rounding is for 0.d(0)d(1)...d(q4-1) * 10^emin
1433
          // determine relationship with 1/2 ulp
1434
          if (q4 <= 19) {
1435
            if (res.w[0] < midpoint64[q4 - 1]) { // < 1/2 ulp
1436
              lt_half_ulp = 1;
1437
              is_inexact_lt_midpoint = 1;
1438
            } else if (res.w[0] == midpoint64[q4 - 1]) { // = 1/2 ulp
1439
              eq_half_ulp = 1;
1440
              is_midpoint_gt_even = 1;
1441
            } else { // > 1/2 ulp
1442
              // gt_half_ulp = 1;
1443
              is_inexact_gt_midpoint = 1;
1444
            }
1445
          } else { // if (q4 <= 34)
1446
            if (res.w[1] < midpoint128[q4 - 20].w[1] ||
1447
                (res.w[1] == midpoint128[q4 - 20].w[1] &&
1448
                res.w[0] < midpoint128[q4 - 20].w[0])) { // < 1/2 ulp
1449
              lt_half_ulp = 1;
1450
              is_inexact_lt_midpoint = 1;
1451
            } else if (res.w[1] == midpoint128[q4 - 20].w[1] &&
1452
                res.w[0] == midpoint128[q4 - 20].w[0]) { // = 1/2 ulp
1453
              eq_half_ulp = 1;
1454
              is_midpoint_gt_even = 1;
1455
            } else { // > 1/2 ulp
1456
              // gt_half_ulp = 1;
1457
              is_inexact_gt_midpoint = 1;
1458
            }
1459
          }
1460
          if (lt_half_ulp || eq_half_ulp) {
1461
            // res = +0.0 * 10^expmin
1462
            res.w[1] = 0x0000000000000000ull;
1463
            res.w[0] = 0x0000000000000000ull;
1464
          } else { // if (gt_half_ulp)
1465
            // res = +1 * 10^expmin
1466
            res.w[1] = 0x0000000000000000ull;
1467
            res.w[0] = 0x0000000000000001ull;
1468
          }
1469
          e4 = expmin;
1470
        } else { // if (x0 > q4)
1471
          // the second rounding is for 0.0...d(0)d(1)...d(q4-1) * 10^emin
1472
          res.w[1] = 0;
1473
          res.w[0] = 0;
1474
          e4 = expmin;
1475
          is_inexact_lt_midpoint = 1;
1476
        }
1477
        // avoid a double rounding error
1478
        if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
1479
            is_midpoint_lt_even) { // double rounding error upward
1480
          // res = res - 1
1481
          res.w[0]--;
1482
          if (res.w[0] == 0xffffffffffffffffull)
1483
            res.w[1]--;
1484
          // Note: a double rounding error upward is not possible; for this
1485
          // the result after the first rounding would have to be 99...95
1486
          // (35 digits in all), possibly followed by a number of zeros; this
1487
          // not possible for f * f + 0
1488
          is_midpoint_lt_even = 0;
1489
          is_inexact_lt_midpoint = 1;
1490
        } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
1491
            is_midpoint_gt_even) { // double rounding error downward
1492
          // res = res + 1
1493
          res.w[0]++;
1494
          if (res.w[0] == 0)
1495
            res.w[1]++;
1496
          is_midpoint_gt_even = 0;
1497
          is_inexact_gt_midpoint = 1;
1498
        } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
1499
                   !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
1500
          // if this second rounding was exact the result may still be 
1501
          // inexact because of the first rounding
1502
          if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
1503
            is_inexact_gt_midpoint = 1;
1504
          }
1505
          if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
1506
            is_inexact_lt_midpoint = 1;
1507
          }
1508
        } else if (is_midpoint_gt_even &&
1509
                   (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
1510
          // pulled up to a midpoint
1511
          is_inexact_lt_midpoint = 1;
1512
          is_inexact_gt_midpoint = 0;
1513
          is_midpoint_lt_even = 0;
1514
          is_midpoint_gt_even = 0;
1515
        } else if (is_midpoint_lt_even &&
1516
                   (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
1517
          // pulled down to a midpoint
1518
          is_inexact_lt_midpoint = 0;
1519
          is_inexact_gt_midpoint = 1;
1520
          is_midpoint_lt_even = 0;
1521
          is_midpoint_gt_even = 0;
1522
        } else {
1523
          ;
1524
        }
1525
      } else { // if e4 >= emin then q4 < P and the result is tiny and exact
1526
        if (e3 < e4) {
1527
          // if (e3 < e4) the preferred exponent is e3
1528
          // return (C4 * 10^scale) * 10^(e4 - scale)
1529
          // where scale = min (p34-q4, (e4 - e3))
1530
          scale = p34 - q4;
1531
          ind = e4 - e3;
1532
          if (ind < scale)
1533
            scale = ind;
1534
          if (scale == 0) {
1535
            ; // res and e4 are unchanged
1536
          } else if (q4 <= 19) { // C4 fits in 64 bits
1537
            if (scale <= 19) { // 10^scale fits in 64 bits
1538
              // 64 x 64 res.w[0] * ten2k64[scale]
1539
              __mul_64x64_to_128MACH (res, res.w[0], ten2k64[scale]);
1540
            } else { // 10^scale fits in 128 bits
1541
              // 64 x 128 res.w[0] * ten2k128[scale - 20]
1542
              __mul_128x64_to_128 (res, res.w[0], ten2k128[scale - 20]);
1543
            }
1544
          } else { // res fits in 128 bits, but 10^scale must fit in 64 bits
1545
            // 64 x 128 ten2k64[scale] * C3
1546
            __mul_128x64_to_128 (res, ten2k64[scale], res);
1547
          }
1548
          // subtract scale from the exponent
1549
          e4 = e4 - scale;
1550
        }
1551
      }
1552
 
1553
      // check for inexact result
1554
      if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
1555
          is_midpoint_lt_even || is_midpoint_gt_even) {
1556
        // set the inexact flag and the underflow flag
1557
        *pfpsf |= INEXACT_EXCEPTION;
1558
        *pfpsf |= UNDERFLOW_EXCEPTION;
1559
      }
1560
      res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1];
1561
      if (rnd_mode != ROUNDING_TO_NEAREST) {
1562
        rounding_correction (rnd_mode,
1563
                             is_inexact_lt_midpoint,
1564
                             is_inexact_gt_midpoint,
1565
                             is_midpoint_lt_even, is_midpoint_gt_even,
1566
                             e4, &res, pfpsf);
1567
      }
1568
      *pfpsf |= save_fpsf;
1569
      *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
1570
      *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
1571
      *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
1572
      *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
1573
      BID_SWAP128 (res);
1574
      BID_RETURN (res)
1575
    }
1576
    // no overflow, and no underflow for rounding to nearest
1577
    res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1];
1578
 
1579
    if (rnd_mode != ROUNDING_TO_NEAREST) {
1580
      rounding_correction (rnd_mode,
1581
                           is_inexact_lt_midpoint,
1582
                           is_inexact_gt_midpoint,
1583
                           is_midpoint_lt_even, is_midpoint_gt_even,
1584
                           e4, &res, pfpsf);
1585
      // if e4 = expmin && significand < 10^33 => result is tiny (for RD, RZ)
1586
      if (e4 == expmin) {
1587
        if ((res.w[1] & MASK_COEFF) < 0x0000314dc6448d93ull ||
1588
            ((res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull &&
1589
             res.w[0] < 0x38c15b0a00000000ull)) {
1590
          is_tiny = 1;
1591
        }
1592
      }
1593
    }
1594
 
1595
    if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
1596
        is_midpoint_lt_even || is_midpoint_gt_even) {
1597
      // set the inexact flag
1598
      *pfpsf |= INEXACT_EXCEPTION;
1599
      if (is_tiny)
1600
        *pfpsf |= UNDERFLOW_EXCEPTION;
1601
    }
1602
 
1603
    if ((*pfpsf & INEXACT_EXCEPTION) == 0) { // x * y is exact
1604
      // need to ensure that the result has the preferred exponent
1605
      p_exp = res.w[1] & MASK_EXP;
1606
      if (z_exp < p_exp) { // the preferred exponent is z_exp
1607
        // signficand of res in C3
1608
        C3.w[1] = res.w[1] & MASK_COEFF;
1609
        C3.w[0] = res.w[0];
1610
        // the number of decimal digits of x * y is q4 <= 34
1611
        // Note: the coefficient fits in 128 bits
1612
 
1613
        // return (C3 * 10^scale) * 10^(p_exp - scale)
1614
        // where scale = min (p34-q4, (p_exp-z_exp) >> 49)
1615
        scale = p34 - q4;
1616
        ind = (p_exp - z_exp) >> 49;
1617
        if (ind < scale)
1618
          scale = ind;
1619
        // subtract scale from the exponent
1620
        p_exp = p_exp - ((UINT64) scale << 49);
1621
        if (scale == 0) {
1622
          ; // leave res unchanged
1623
        } else if (q4 <= 19) { // x * y fits in 64 bits
1624
          if (scale <= 19) { // 10^scale fits in 64 bits
1625
            // 64 x 64 C3.w[0] * ten2k64[scale] 
1626
            __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
1627
          } else { // 10^scale fits in 128 bits 
1628
            // 64 x 128 C3.w[0] * ten2k128[scale - 20]
1629
            __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
1630
          }
1631
          res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1];
1632
        } else { // x * y fits in 128 bits, but 10^scale must fit in 64 bits
1633
          // 64 x 128 ten2k64[scale] * C3 
1634
          __mul_128x64_to_128 (res, ten2k64[scale], C3);
1635
          res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1];
1636
        }
1637
      } // else leave the result as it is, because p_exp <= z_exp
1638
    }
1639
    *pfpsf |= save_fpsf;
1640
    *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
1641
    *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
1642
    *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
1643
    *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
1644
    BID_SWAP128 (res);
1645
    BID_RETURN (res)
1646
  } // else we have f * f + f
1647
 
1648
  // continue with x = f, y = f, z = f
1649
 
1650
  delta = q3 + e3 - q4 - e4;
1651
delta_ge_zero:
1652
  if (delta >= 0) {
1653
 
1654
    if (p34 <= delta - 1 ||     // Case (1')
1655
        (p34 == delta && e3 + 6176 < p34 - q3)) { // Case (1''A)
1656
      // check for overflow, which can occur only in Case (1')
1657
      if ((q3 + e3) > (p34 + expmax) && p34 <= delta - 1) {
1658
        // e3 > expmax implies p34 <= delta-1 and e3 > expmax is a necessary
1659
        // condition for (q3 + e3) > (p34 + expmax)
1660
        if (rnd_mode == ROUNDING_TO_NEAREST) {
1661
          res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
1662
          res.w[0] = 0x0000000000000000ull;
1663
          *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
1664
        } else {
1665
          if (p_sign == z_sign) {
1666
            is_inexact_lt_midpoint = 1;
1667
          } else {
1668
            is_inexact_gt_midpoint = 1;
1669
          }
1670
          // q3 <= p34; if (q3 < p34) scale C3 up by 10^(p34-q3)
1671
          scale = p34 - q3;
1672
          if (scale == 0) {
1673
            res.w[1] = z_sign | C3.w[1];
1674
            res.w[0] = C3.w[0];
1675
          } else {
1676
            if (q3 <= 19) { // C3 fits in 64 bits
1677
              if (scale <= 19) { // 10^scale fits in 64 bits
1678
                // 64 x 64 C3.w[0] * ten2k64[scale]
1679
                __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
1680
              } else { // 10^scale fits in 128 bits
1681
                // 64 x 128 C3.w[0] * ten2k128[scale - 20]
1682
                __mul_128x64_to_128 (res, C3.w[0],
1683
                                     ten2k128[scale - 20]);
1684
              }
1685
            } else { // C3 fits in 128 bits, but 10^scale must fit in 64 bits
1686
              // 64 x 128 ten2k64[scale] * C3
1687
              __mul_128x64_to_128 (res, ten2k64[scale], C3);
1688
            }
1689
            // the coefficient in res has q3 + scale = p34 digits
1690
          }
1691
          e3 = e3 - scale;
1692
          res.w[1] = z_sign | res.w[1];
1693
          rounding_correction (rnd_mode,
1694
                               is_inexact_lt_midpoint,
1695
                               is_inexact_gt_midpoint,
1696
                               is_midpoint_lt_even, is_midpoint_gt_even,
1697
                               e3, &res, pfpsf);
1698
        }
1699
        *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
1700
        *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
1701
        *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
1702
        *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
1703
        BID_SWAP128 (res);
1704
        BID_RETURN (res)
1705
      }
1706
      // res = z
1707
      if (q3 < p34) { // the preferred exponent is z_exp - (p34 - q3)
1708
        // return (C3 * 10^scale) * 10^(z_exp - scale)
1709
        // where scale = min (p34-q3, z_exp-EMIN)
1710
        scale = p34 - q3;
1711
        ind = e3 + 6176;
1712
        if (ind < scale)
1713
          scale = ind;
1714
        if (scale == 0) {
1715
          res.w[1] = C3.w[1];
1716
          res.w[0] = C3.w[0];
1717
        } else if (q3 <= 19) { // z fits in 64 bits
1718
          if (scale <= 19) { // 10^scale fits in 64 bits
1719
            // 64 x 64 C3.w[0] * ten2k64[scale]
1720
            __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
1721
          } else { // 10^scale fits in 128 bits
1722
            // 64 x 128 C3.w[0] * ten2k128[scale - 20]
1723
            __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
1724
          }
1725
        } else { // z fits in 128 bits, but 10^scale must fit in 64 bits
1726
          // 64 x 128 ten2k64[scale] * C3
1727
          __mul_128x64_to_128 (res, ten2k64[scale], C3);
1728
        }
1729
        // the coefficient in res has q3 + scale digits
1730
        // subtract scale from the exponent
1731
        z_exp = z_exp - ((UINT64) scale << 49);
1732
        e3 = e3 - scale;
1733
        res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
1734
        if (scale + q3 < p34)
1735
          *pfpsf |= UNDERFLOW_EXCEPTION;
1736
      } else {
1737
        scale = 0;
1738
        res.w[1] = z_sign | ((UINT64) (e3 + 6176) << 49) | C3.w[1];
1739
        res.w[0] = C3.w[0];
1740
      }
1741
 
1742
      // use the following to avoid double rounding errors when operating on
1743
      // mixed formats in rounding to nearest, and for correcting the result
1744
      // if not rounding to nearest
1745
      if ((p_sign != z_sign) && (delta == (q3 + scale + 1))) {
1746
        // there is a gap of exactly one digit between the scaled C3 and C4
1747
        // C3 * 10^ scale = 10^(q3+scale-1) <=> C3 = 10^(q3-1) is special case
1748
        if ((q3 <= 19 && C3.w[0] != ten2k64[q3 - 1]) ||
1749
            (q3 == 20 && (C3.w[1] != 0 || C3.w[0] != ten2k64[19])) ||
1750
            (q3 >= 21 && (C3.w[1] != ten2k128[q3 - 21].w[1] ||
1751
                          C3.w[0] != ten2k128[q3 - 21].w[0]))) {
1752
          // C3 * 10^ scale != 10^(q3-1)
1753
          // if ((res.w[1] & MASK_COEFF) != 0x0000314dc6448d93ull ||
1754
          // res.w[0] != 0x38c15b0a00000000ull) { // C3 * 10^scale != 10^33
1755
          is_inexact_gt_midpoint = 1; // if (z_sign), set as if for abs. value
1756
        } else { // if C3 * 10^scale = 10^(q3+scale-1)
1757
          // ok from above e3 = (z_exp >> 49) - 6176;
1758
          // the result is always inexact
1759
          if (q4 == 1) {
1760
            R64 = C4.w[0];
1761
          } else {
1762
            // if q4 > 1 then truncate C4 from q4 digits to 1 digit; 
1763
            // x = q4-1, 1 <= x <= 67 and check if this operation is exact
1764
            if (q4 <= 18) { // 2 <= q4 <= 18
1765
              round64_2_18 (q4, q4 - 1, C4.w[0], &R64, &incr_exp,
1766
                            &is_midpoint_lt_even, &is_midpoint_gt_even,
1767
                            &is_inexact_lt_midpoint,
1768
                            &is_inexact_gt_midpoint);
1769
            } else if (q4 <= 38) {
1770
              P128.w[1] = C4.w[1];
1771
              P128.w[0] = C4.w[0];
1772
              round128_19_38 (q4, q4 - 1, P128, &R128, &incr_exp,
1773
                              &is_midpoint_lt_even,
1774
                              &is_midpoint_gt_even,
1775
                              &is_inexact_lt_midpoint,
1776
                              &is_inexact_gt_midpoint);
1777
              R64 = R128.w[0]; // one decimal digit
1778
            } else if (q4 <= 57) {
1779
              P192.w[2] = C4.w[2];
1780
              P192.w[1] = C4.w[1];
1781
              P192.w[0] = C4.w[0];
1782
              round192_39_57 (q4, q4 - 1, P192, &R192, &incr_exp,
1783
                              &is_midpoint_lt_even,
1784
                              &is_midpoint_gt_even,
1785
                              &is_inexact_lt_midpoint,
1786
                              &is_inexact_gt_midpoint);
1787
              R64 = R192.w[0]; // one decimal digit
1788
            } else { // if (q4 <= 68)
1789
              round256_58_76 (q4, q4 - 1, C4, &R256, &incr_exp,
1790
                              &is_midpoint_lt_even,
1791
                              &is_midpoint_gt_even,
1792
                              &is_inexact_lt_midpoint,
1793
                              &is_inexact_gt_midpoint);
1794
              R64 = R256.w[0]; // one decimal digit
1795
            }
1796
            if (incr_exp) {
1797
              R64 = 10;
1798
            }
1799
          }
1800
          if (q4 == 1 && C4.w[0] == 5) {
1801
            is_inexact_lt_midpoint = 0;
1802
            is_inexact_gt_midpoint = 0;
1803
            is_midpoint_lt_even = 1;
1804
            is_midpoint_gt_even = 0;
1805
          } else if ((e3 == expmin) ||
1806
                     R64 < 5 || (R64 == 5 && is_inexact_gt_midpoint)) {
1807
            // result does not change
1808
            is_inexact_lt_midpoint = 0;
1809
            is_inexact_gt_midpoint = 1;
1810
            is_midpoint_lt_even = 0;
1811
            is_midpoint_gt_even = 0;
1812
          } else {
1813
            is_inexact_lt_midpoint = 1;
1814
            is_inexact_gt_midpoint = 0;
1815
            is_midpoint_lt_even = 0;
1816
            is_midpoint_gt_even = 0;
1817
            // result decremented is 10^(q3+scale) - 1
1818
            if ((q3 + scale) <= 19) {
1819
              res.w[1] = 0;
1820
              res.w[0] = ten2k64[q3 + scale];
1821
            } else { // if ((q3 + scale + 1) <= 35)
1822
              res.w[1] = ten2k128[q3 + scale - 20].w[1];
1823
              res.w[0] = ten2k128[q3 + scale - 20].w[0];
1824
            }
1825
            res.w[0] = res.w[0] - 1; // borrow never occurs
1826
            z_exp = z_exp - EXP_P1;
1827
            e3 = e3 - 1;
1828
            res.w[1] = z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1];
1829
          }
1830
          if (e3 == expmin) {
1831
            if (R64 < 5 || (R64 == 5 && !is_inexact_lt_midpoint)) {
1832
              ; // result not tiny (in round-to-nearest mode)
1833
            } else {
1834
              *pfpsf |= UNDERFLOW_EXCEPTION;
1835
            }
1836
          }
1837
        } // end 10^(q3+scale-1)
1838
        // set the inexact flag
1839
        *pfpsf |= INEXACT_EXCEPTION;
1840
      } else {
1841
        if (p_sign == z_sign) {
1842
          // if (z_sign), set as if for absolute value
1843
          is_inexact_lt_midpoint = 1;
1844
        } else { // if (p_sign != z_sign)
1845
          // if (z_sign), set as if for absolute value
1846
          is_inexact_gt_midpoint = 1;
1847
        }
1848
        *pfpsf |= INEXACT_EXCEPTION;
1849
      }
1850
      // the result is always inexact => set the inexact flag
1851
      // Determine tininess:
1852
      //    if (exp > expmin)
1853
      //      the result is not tiny
1854
      //    else // if exp = emin
1855
      //      if (q3 + scale < p34)
1856
      //        the result is tiny
1857
      //      else // if (q3 + scale = p34)
1858
      //        if (C3 * 10^scale > 10^33)
1859
      //          the result is not tiny
1860
      //        else // if C3 * 10^scale = 10^33
1861
      //          if (xy * z > 0)
1862
      //            the result is not tiny
1863
      //          else // if (xy * z < 0)
1864
      //            if (z > 0)
1865
      //              if rnd_mode != RP
1866
      //                the result is tiny
1867
      //              else // if RP
1868
      //                the result is not tiny
1869
      //            else // if (z < 0)
1870
      //              if rnd_mode != RM
1871
      //                the result is tiny
1872
      //              else // if RM
1873
      //                the result is not tiny
1874
      //              endif
1875
      //            endif
1876
      //          endif
1877
      //        endif
1878
      //      endif
1879
      //    endif 
1880
      if ((e3 == expmin && (q3 + scale) < p34) ||
1881
          (e3 == expmin && (q3 + scale) == p34 &&
1882
          (res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull &&   // 10^33_high
1883
          res.w[0] == 0x38c15b0a00000000ull &&   // 10^33_low
1884
          z_sign != p_sign && ((!z_sign && rnd_mode != ROUNDING_UP) ||
1885
          (z_sign && rnd_mode != ROUNDING_DOWN)))) {
1886
        *pfpsf |= UNDERFLOW_EXCEPTION;
1887
      }
1888
      if (rnd_mode != ROUNDING_TO_NEAREST) {
1889
        rounding_correction (rnd_mode,
1890
                             is_inexact_lt_midpoint,
1891
                             is_inexact_gt_midpoint,
1892
                             is_midpoint_lt_even, is_midpoint_gt_even,
1893
                             e3, &res, pfpsf);
1894
      }
1895
      *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
1896
      *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
1897
      *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
1898
      *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
1899
      BID_SWAP128 (res);
1900
      BID_RETURN (res)
1901
 
1902
    } else if (p34 == delta) { // Case (1''B)
1903
 
1904
      // because Case (1''A) was treated above, e3 + 6176 >= p34 - q3
1905
      // and C3 can be scaled up to p34 digits if needed
1906
 
1907
      // scale C3 to p34 digits if needed
1908
      scale = p34 - q3; // 0 <= scale <= p34 - 1
1909
      if (scale == 0) {
1910
        res.w[1] = C3.w[1];
1911
        res.w[0] = C3.w[0];
1912
      } else if (q3 <= 19) { // z fits in 64 bits
1913
        if (scale <= 19) { // 10^scale fits in 64 bits
1914
          // 64 x 64 C3.w[0] * ten2k64[scale]
1915
          __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
1916
        } else { // 10^scale fits in 128 bits
1917
          // 64 x 128 C3.w[0] * ten2k128[scale - 20]
1918
          __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
1919
        }
1920
      } else { // z fits in 128 bits, but 10^scale must fit in 64 bits
1921
        // 64 x 128 ten2k64[scale] * C3
1922
        __mul_128x64_to_128 (res, ten2k64[scale], C3);
1923
      }
1924
      // subtract scale from the exponent
1925
      z_exp = z_exp - ((UINT64) scale << 49);
1926
      e3 = e3 - scale;
1927
      // now z_sign, z_exp, and res correspond to a z scaled to p34 = 34 digits
1928
 
1929
      // determine whether x * y is less than, equal to, or greater than 
1930
      // 1/2 ulp (z)
1931
      if (q4 <= 19) {
1932
        if (C4.w[0] < midpoint64[q4 - 1]) { // < 1/2 ulp
1933
          lt_half_ulp = 1;
1934
        } else if (C4.w[0] == midpoint64[q4 - 1]) { // = 1/2 ulp
1935
          eq_half_ulp = 1;
1936
        } else { // > 1/2 ulp
1937
          gt_half_ulp = 1;
1938
        }
1939
      } else if (q4 <= 38) {
1940
        if (C4.w[2] == 0 && (C4.w[1] < midpoint128[q4 - 20].w[1] ||
1941
            (C4.w[1] == midpoint128[q4 - 20].w[1] &&
1942
            C4.w[0] < midpoint128[q4 - 20].w[0]))) { // < 1/2 ulp
1943
          lt_half_ulp = 1;
1944
        } else if (C4.w[2] == 0 && C4.w[1] == midpoint128[q4 - 20].w[1] &&
1945
            C4.w[0] == midpoint128[q4 - 20].w[0]) { // = 1/2 ulp
1946
          eq_half_ulp = 1;
1947
        } else { // > 1/2 ulp
1948
          gt_half_ulp = 1;
1949
        }
1950
      } else if (q4 <= 58) {
1951
        if (C4.w[3] == 0 && (C4.w[2] < midpoint192[q4 - 39].w[2] ||
1952
            (C4.w[2] == midpoint192[q4 - 39].w[2] &&
1953
            C4.w[1] < midpoint192[q4 - 39].w[1]) ||
1954
            (C4.w[2] == midpoint192[q4 - 39].w[2] &&
1955
            C4.w[1] == midpoint192[q4 - 39].w[1] &&
1956
            C4.w[0] < midpoint192[q4 - 39].w[0]))) { // < 1/2 ulp
1957
          lt_half_ulp = 1;
1958
        } else if (C4.w[3] == 0 && C4.w[2] == midpoint192[q4 - 39].w[2] &&
1959
            C4.w[1] == midpoint192[q4 - 39].w[1] &&
1960
            C4.w[0] == midpoint192[q4 - 39].w[0]) { // = 1/2 ulp
1961
          eq_half_ulp = 1;
1962
        } else { // > 1/2 ulp
1963
          gt_half_ulp = 1;
1964
        }
1965
      } else {
1966
        if (C4.w[3] < midpoint256[q4 - 59].w[3] ||
1967
            (C4.w[3] == midpoint256[q4 - 59].w[3] &&
1968
            C4.w[2] < midpoint256[q4 - 59].w[2]) ||
1969
            (C4.w[3] == midpoint256[q4 - 59].w[3] &&
1970
            C4.w[2] == midpoint256[q4 - 59].w[2] &&
1971
            C4.w[1] < midpoint256[q4 - 59].w[1]) ||
1972
            (C4.w[3] == midpoint256[q4 - 59].w[3] &&
1973
            C4.w[2] == midpoint256[q4 - 59].w[2] &&
1974
            C4.w[1] == midpoint256[q4 - 59].w[1] &&
1975
            C4.w[0] < midpoint256[q4 - 59].w[0])) { // < 1/2 ulp
1976
          lt_half_ulp = 1;
1977
        } else if (C4.w[3] == midpoint256[q4 - 59].w[3] &&
1978
            C4.w[2] == midpoint256[q4 - 59].w[2] &&
1979
            C4.w[1] == midpoint256[q4 - 59].w[1] &&
1980
            C4.w[0] == midpoint256[q4 - 59].w[0]) { // = 1/2 ulp
1981
          eq_half_ulp = 1;
1982
        } else { // > 1/2 ulp
1983
          gt_half_ulp = 1;
1984
        }
1985
      }
1986
 
1987
      if (p_sign == z_sign) {
1988
        if (lt_half_ulp) {
1989
          res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
1990
          // use the following to avoid double rounding errors when operating on
1991
          // mixed formats in rounding to nearest
1992
          is_inexact_lt_midpoint = 1; // if (z_sign), as if for absolute value
1993
        } else if ((eq_half_ulp && (res.w[0] & 0x01)) || gt_half_ulp) {
1994
          // add 1 ulp to the significand
1995
          res.w[0]++;
1996
          if (res.w[0] == 0x0ull)
1997
            res.w[1]++;
1998
          // check for rounding overflow, when coeff == 10^34
1999
          if ((res.w[1] & MASK_COEFF) == 0x0001ed09bead87c0ull &&
2000
              res.w[0] == 0x378d8e6400000000ull) { // coefficient = 10^34
2001
            e3 = e3 + 1;
2002
            // coeff = 10^33
2003
            z_exp = ((UINT64) (e3 + 6176) << 49) & MASK_EXP;
2004
            res.w[1] = 0x0000314dc6448d93ull;
2005
            res.w[0] = 0x38c15b0a00000000ull;
2006
          }
2007
          // end add 1 ulp
2008
          res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2009
          if (eq_half_ulp) {
2010
            is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value
2011
          } else {
2012
            is_inexact_gt_midpoint = 1; // if (z_sign), as if for absolute value
2013
          }
2014
        } else { // if (eq_half_ulp && !(res.w[0] & 0x01))
2015
          // leave unchanged 
2016
          res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2017
          is_midpoint_gt_even = 1; // if (z_sign), as if for absolute value
2018
        }
2019
        // the result is always inexact, and never tiny
2020
        // set the inexact flag
2021
        *pfpsf |= INEXACT_EXCEPTION;
2022
        // check for overflow
2023
        if (e3 > expmax && rnd_mode == ROUNDING_TO_NEAREST) {
2024
          res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
2025
          res.w[0] = 0x0000000000000000ull;
2026
          *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
2027
          *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2028
          *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2029
          *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2030
          *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2031
          BID_SWAP128 (res);
2032
          BID_RETURN (res)
2033
        }
2034
        if (rnd_mode != ROUNDING_TO_NEAREST) {
2035
          rounding_correction (rnd_mode,
2036
                               is_inexact_lt_midpoint,
2037
                               is_inexact_gt_midpoint,
2038
                               is_midpoint_lt_even, is_midpoint_gt_even,
2039
                               e3, &res, pfpsf);
2040
          z_exp = res.w[1] & MASK_EXP;
2041
        }
2042
      } else { // if (p_sign != z_sign)
2043
        // consider two cases, because C3 * 10^scale = 10^33 is a special case
2044
        if (res.w[1] != 0x0000314dc6448d93ull ||
2045
            res.w[0] != 0x38c15b0a00000000ull) { // C3 * 10^scale != 10^33
2046
          if (lt_half_ulp) {
2047
            res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2048
            // use the following to avoid double rounding errors when operating
2049
            // on mixed formats in rounding to nearest
2050
            is_inexact_gt_midpoint = 1; // if (z_sign), as if for absolute value
2051
          } else if ((eq_half_ulp && (res.w[0] & 0x01)) || gt_half_ulp) {
2052
            // subtract 1 ulp from the significand
2053
            res.w[0]--;
2054
            if (res.w[0] == 0xffffffffffffffffull)
2055
              res.w[1]--;
2056
            res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2057
            if (eq_half_ulp) {
2058
              is_midpoint_gt_even = 1; // if (z_sign), as if for absolute value
2059
            } else {
2060
              is_inexact_lt_midpoint = 1; //if(z_sign), as if for absolute value
2061
            }
2062
          } else { // if (eq_half_ulp && !(res.w[0] & 0x01))
2063
            // leave unchanged
2064
            res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2065
            is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value
2066
          }
2067
          // the result is always inexact, and never tiny
2068
          // check for overflow for RN
2069
          if (e3 > expmax) {
2070
            if (rnd_mode == ROUNDING_TO_NEAREST) {
2071
              res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
2072
              res.w[0] = 0x0000000000000000ull;
2073
              *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
2074
            } else {
2075
              rounding_correction (rnd_mode,
2076
                                   is_inexact_lt_midpoint,
2077
                                   is_inexact_gt_midpoint,
2078
                                   is_midpoint_lt_even,
2079
                                   is_midpoint_gt_even, e3, &res,
2080
                                   pfpsf);
2081
            }
2082
            *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2083
            *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2084
            *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2085
            *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2086
            BID_SWAP128 (res);
2087
            BID_RETURN (res)
2088
          }
2089
          // set the inexact flag
2090
          *pfpsf |= INEXACT_EXCEPTION;
2091
          if (rnd_mode != ROUNDING_TO_NEAREST) {
2092
            rounding_correction (rnd_mode,
2093
                                 is_inexact_lt_midpoint,
2094
                                 is_inexact_gt_midpoint,
2095
                                 is_midpoint_lt_even,
2096
                                 is_midpoint_gt_even, e3, &res, pfpsf);
2097
          }
2098
          z_exp = res.w[1] & MASK_EXP;
2099
        } else { // if C3 * 10^scale = 10^33
2100
          e3 = (z_exp >> 49) - 6176;
2101
          if (e3 > expmin) {
2102
            // the result is exact if exp > expmin and C4 = d*10^(q4-1), 
2103
            // where d = 1, 2, 3, ..., 9; it could be tiny too, but exact
2104
            if (q4 == 1) {
2105
              // if q4 = 1 the result is exact
2106
              // result coefficient = 10^34 - C4
2107
              res.w[1] = 0x0001ed09bead87c0ull;
2108
              res.w[0] = 0x378d8e6400000000ull - C4.w[0];
2109
              z_exp = z_exp - EXP_P1;
2110
              e3 = e3 - 1;
2111
              res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2112
            } else {
2113
              // if q4 > 1 then truncate C4 from q4 digits to 1 digit; 
2114
              // x = q4-1, 1 <= x <= 67 and check if this operation is exact
2115
              if (q4 <= 18) { // 2 <= q4 <= 18
2116
                round64_2_18 (q4, q4 - 1, C4.w[0], &R64, &incr_exp,
2117
                              &is_midpoint_lt_even,
2118
                              &is_midpoint_gt_even,
2119
                              &is_inexact_lt_midpoint,
2120
                              &is_inexact_gt_midpoint);
2121
              } else if (q4 <= 38) {
2122
                P128.w[1] = C4.w[1];
2123
                P128.w[0] = C4.w[0];
2124
                round128_19_38 (q4, q4 - 1, P128, &R128, &incr_exp,
2125
                                &is_midpoint_lt_even,
2126
                                &is_midpoint_gt_even,
2127
                                &is_inexact_lt_midpoint,
2128
                                &is_inexact_gt_midpoint);
2129
                R64 = R128.w[0]; // one decimal digit
2130
              } else if (q4 <= 57) {
2131
                P192.w[2] = C4.w[2];
2132
                P192.w[1] = C4.w[1];
2133
                P192.w[0] = C4.w[0];
2134
                round192_39_57 (q4, q4 - 1, P192, &R192, &incr_exp,
2135
                                &is_midpoint_lt_even,
2136
                                &is_midpoint_gt_even,
2137
                                &is_inexact_lt_midpoint,
2138
                                &is_inexact_gt_midpoint);
2139
                R64 = R192.w[0]; // one decimal digit
2140
              } else { // if (q4 <= 68)
2141
                round256_58_76 (q4, q4 - 1, C4, &R256, &incr_exp,
2142
                                &is_midpoint_lt_even,
2143
                                &is_midpoint_gt_even,
2144
                                &is_inexact_lt_midpoint,
2145
                                &is_inexact_gt_midpoint);
2146
                R64 = R256.w[0]; // one decimal digit
2147
              }
2148
              if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
2149
                  !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
2150
                // the result is exact: 10^34 - R64
2151
                // incr_exp = 0 with certainty
2152
                z_exp = z_exp - EXP_P1;
2153
                e3 = e3 - 1;
2154
                res.w[1] =
2155
                  z_sign | (z_exp & MASK_EXP) | 0x0001ed09bead87c0ull;
2156
                res.w[0] = 0x378d8e6400000000ull - R64;
2157
              } else {
2158
                // We want R64 to be the top digit of C4, but we actually 
2159
                // obtained (C4 * 10^(-q4+1))RN; a correction may be needed,
2160
                // because the top digit is (C4 * 10^(-q4+1))RZ
2161
                // however, if incr_exp = 1 then R64 = 10 with certainty
2162
                if (incr_exp) {
2163
                  R64 = 10;
2164
                }
2165
                // the result is inexact as C4 has more than 1 significant digit
2166
                // and C3 * 10^scale = 10^33
2167
                // example of case that is treated here:
2168
                // 100...0 * 10^e3 - 0.41 * 10^e3 =
2169
                // 0999...9.59 * 10^e3 -> rounds to 99...96*10^(e3-1)
2170
                // note that (e3 > expmin}
2171
                // in order to round, subtract R64 from 10^34 and then compare
2172
                // C4 - R64 * 10^(q4-1) with 1/2 ulp
2173
                // calculate 10^34 - R64
2174
                res.w[1] = 0x0001ed09bead87c0ull;
2175
                res.w[0] = 0x378d8e6400000000ull - R64;
2176
                z_exp = z_exp - EXP_P1; // will be OR-ed with sign & significand
2177
                // calculate C4 - R64 * 10^(q4-1); this is a rare case and
2178
                // R64 is small, 1 <= R64 <= 9
2179
                e3 = e3 - 1;
2180
                if (is_inexact_lt_midpoint) {
2181
                  is_inexact_lt_midpoint = 0;
2182
                  is_inexact_gt_midpoint = 1;
2183
                } else if (is_inexact_gt_midpoint) {
2184
                  is_inexact_gt_midpoint = 0;
2185
                  is_inexact_lt_midpoint = 1;
2186
                } else if (is_midpoint_lt_even) {
2187
                  is_midpoint_lt_even = 0;
2188
                  is_midpoint_gt_even = 1;
2189
                } else if (is_midpoint_gt_even) {
2190
                  is_midpoint_gt_even = 0;
2191
                  is_midpoint_lt_even = 1;
2192
                } else {
2193
                  ;
2194
                }
2195
                // the result is always inexact, and never tiny
2196
                // check for overflow for RN
2197
                if (e3 > expmax) {
2198
                  if (rnd_mode == ROUNDING_TO_NEAREST) {
2199
                    res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
2200
                    res.w[0] = 0x0000000000000000ull;
2201
                    *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
2202
                  } else {
2203
                    rounding_correction (rnd_mode,
2204
                                         is_inexact_lt_midpoint,
2205
                                         is_inexact_gt_midpoint,
2206
                                         is_midpoint_lt_even,
2207
                                         is_midpoint_gt_even, e3, &res,
2208
                                         pfpsf);
2209
                  }
2210
                  *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2211
                  *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2212
                  *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2213
                  *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2214
                  BID_SWAP128 (res);
2215
                  BID_RETURN (res)
2216
                }
2217
                // set the inexact flag
2218
                *pfpsf |= INEXACT_EXCEPTION;
2219
                res.w[1] =
2220
                  z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1];
2221
                if (rnd_mode != ROUNDING_TO_NEAREST) {
2222
                  rounding_correction (rnd_mode,
2223
                                       is_inexact_lt_midpoint,
2224
                                       is_inexact_gt_midpoint,
2225
                                       is_midpoint_lt_even,
2226
                                       is_midpoint_gt_even, e3, &res,
2227
                                       pfpsf);
2228
                }
2229
                z_exp = res.w[1] & MASK_EXP;
2230
              } // end result is inexact
2231
            } // end q4 > 1
2232
          } else { // if (e3 = emin)
2233
            // if e3 = expmin the result is also tiny (the condition for
2234
            // tininess is C4 > 050...0 [q4 digits] which is met because
2235
            // the msd of C4 is not zero)
2236
            // the result is tiny and inexact in all rounding modes;
2237
            // it is either 100...0 or 0999...9 (use lt_half_ulp, eq_half_ulp, 
2238
            // gt_half_ulp to calculate)
2239
            // if (lt_half_ulp || eq_half_ulp) res = 10^33 stays unchanged
2240
 
2241
            // p_sign != z_sign so swap gt_half_ulp and lt_half_ulp
2242
            if (gt_half_ulp) { // res = 10^33 - 1
2243
              res.w[1] = 0x0000314dc6448d93ull;
2244
              res.w[0] = 0x38c15b09ffffffffull;
2245
            } else {
2246
              res.w[1] = 0x0000314dc6448d93ull;
2247
              res.w[0] = 0x38c15b0a00000000ull;
2248
            }
2249
            res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2250
            *pfpsf |= UNDERFLOW_EXCEPTION; // inexact is set later
2251
 
2252
            if (eq_half_ulp) {
2253
              is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value
2254
            } else if (lt_half_ulp) {
2255
              is_inexact_gt_midpoint = 1; //if(z_sign), as if for absolute value
2256
            } else { // if (gt_half_ulp)
2257
              is_inexact_lt_midpoint = 1; //if(z_sign), as if for absolute value
2258
            }
2259
 
2260
            if (rnd_mode != ROUNDING_TO_NEAREST) {
2261
              rounding_correction (rnd_mode,
2262
                  is_inexact_lt_midpoint,
2263
                  is_inexact_gt_midpoint,
2264
                  is_midpoint_lt_even,
2265
                  is_midpoint_gt_even, e3, &res,
2266
                  pfpsf);
2267
              z_exp = res.w[1] & MASK_EXP;
2268
            }
2269
          } // end e3 = emin
2270
          // set the inexact flag (if the result was not exact)
2271
          if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
2272
              is_midpoint_lt_even || is_midpoint_gt_even)
2273
            *pfpsf |= INEXACT_EXCEPTION;
2274
        } // end 10^33
2275
      } // end if (p_sign != z_sign)
2276
      res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2277
      *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2278
      *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2279
      *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2280
      *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2281
      BID_SWAP128 (res);
2282
      BID_RETURN (res)
2283
 
2284
    } else if (((q3 <= delta && delta < p34 && p34 < delta + q4) || // Case (2)
2285
        (q3 <= delta && delta + q4 <= p34) || // Case (3)
2286
        (delta < q3 && p34 < delta + q4) || // Case (4)
2287
        (delta < q3 && q3 <= delta + q4 && delta + q4 <= p34) || // Case (5)
2288
        (delta + q4 < q3)) && // Case (6)
2289
        !(delta <= 1 && p_sign != z_sign)) { // Case (2), (3), (4), (5) or (6)
2290
 
2291
      // the result has the sign of z
2292
 
2293
      if ((q3 <= delta && delta < p34 && p34 < delta + q4) || // Case (2)
2294
          (delta < q3 && p34 < delta + q4)) { // Case (4)
2295
        // round first the sum x * y + z with unbounded exponent
2296
        // scale C3 up by scale = p34 - q3, 1 <= scale <= p34-1, 
2297
        // 1 <= scale <= 33
2298
        // calculate res = C3 * 10^scale
2299
        scale = p34 - q3;
2300
        x0 = delta + q4 - p34;
2301
      } else if (delta + q4 < q3) { // Case (6)
2302
        // make Case (6) look like Case (3) or Case (5) with scale = 0
2303
        // by scaling up C4 by 10^(q3 - delta - q4) 
2304
        scale = q3 - delta - q4; // 1 <= scale <= 33
2305
        if (q4 <= 19) { // 1 <= scale <= 19; C4 fits in 64 bits
2306
          if (scale <= 19) { // 10^scale fits in 64 bits
2307
            // 64 x 64 C4.w[0] * ten2k64[scale]
2308
            __mul_64x64_to_128MACH (P128, C4.w[0], ten2k64[scale]);
2309
          } else { // 10^scale fits in 128 bits
2310
            // 64 x 128 C4.w[0] * ten2k128[scale - 20]
2311
            __mul_128x64_to_128 (P128, C4.w[0], ten2k128[scale - 20]);
2312
          }
2313
        } else { // C4 fits in 128 bits, but 10^scale must fit in 64 bits
2314
          // 64 x 128 ten2k64[scale] * C4
2315
          __mul_128x64_to_128 (P128, ten2k64[scale], C4);
2316
        }
2317
        C4.w[0] = P128.w[0];
2318
        C4.w[1] = P128.w[1];
2319
        // e4 does not need adjustment, as it is not used from this point on
2320
        scale = 0;
2321
        x0 = 0;
2322
        // now Case (6) looks like Case (3) or Case (5) with scale = 0 
2323
      } else { // if Case (3) or Case (5)
2324
        // Note: Case (3) is similar to Case (2), but scale differs and the
2325
        // result is exact, unless it is tiny (so x0 = 0 when calculating the
2326
        // result with unbounded exponent)
2327
 
2328
        // calculate first the sum x * y + z with unbounded exponent (exact)
2329
        // scale C3 up by scale = delta + q4 - q3, 1 <= scale <= p34-1,
2330
        // 1 <= scale <= 33
2331
        // calculate res = C3 * 10^scale
2332
        scale = delta + q4 - q3;
2333
        x0 = 0;
2334
        // Note: the comments which follow refer [mainly] to Case (2)]
2335
      }
2336
 
2337
    case2_repeat:
2338
      if (scale == 0) { // this could happen e.g. if we return to case2_repeat
2339
        // or in Case (4)
2340
        res.w[1] = C3.w[1];
2341
        res.w[0] = C3.w[0];
2342
      } else if (q3 <= 19) { // 1 <= scale <= 19; z fits in 64 bits
2343
        if (scale <= 19) { // 10^scale fits in 64 bits
2344
          // 64 x 64 C3.w[0] * ten2k64[scale]
2345
          __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
2346
        } else { // 10^scale fits in 128 bits
2347
          // 64 x 128 C3.w[0] * ten2k128[scale - 20]
2348
          __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
2349
        }
2350
      } else { // z fits in 128 bits, but 10^scale must fit in 64 bits
2351
        // 64 x 128 ten2k64[scale] * C3
2352
        __mul_128x64_to_128 (res, ten2k64[scale], C3);
2353
      }
2354
      // e3 is already calculated
2355
      e3 = e3 - scale;
2356
      // now res = C3 * 10^scale and e3 = e3 - scale
2357
      // Note: C3 * 10^scale could be 10^34 if we returned to case2_repeat
2358
      // because the result was too small
2359
 
2360
      // round C4 to nearest to q4 - x0 digits, where x0 = delta + q4 - p34,
2361
      // 1 <= x0 <= min (q4 - 1, 2 * p34 - 1) <=> 1 <= x0 <= min (q4 - 1, 67)
2362
      // Also: 1 <= q4 - x0 <= p34 -1 => 1 <= q4 - x0 <= 33 (so the result of
2363
      // the rounding fits in 128 bits!)
2364
      // x0 = delta + q4 - p34 (calculated before reaching case2_repeat)
2365
      // because q3 + q4 - x0 <= P => x0 >= q3 + q4 - p34
2366
      if (x0 == 0) { // this could happen only if we return to case2_repeat, or
2367
        // for Case (3) or Case (6)
2368
        R128.w[1] = C4.w[1];
2369
        R128.w[0] = C4.w[0];
2370
      } else if (q4 <= 18) {
2371
        // 2 <= q4 <= 18, max(1, q3+q4-p34) <= x0 <= q4 - 1, 1 <= x0 <= 17
2372
        round64_2_18 (q4, x0, C4.w[0], &R64, &incr_exp,
2373
            &is_midpoint_lt_even, &is_midpoint_gt_even,
2374
            &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
2375
        if (incr_exp) {
2376
          // R64 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 17
2377
          R64 = ten2k64[q4 - x0];
2378
        }
2379
        R128.w[1] = 0;
2380
        R128.w[0] = R64;
2381
      } else if (q4 <= 38) {
2382
        // 19 <= q4 <= 38, max(1, q3+q4-p34) <= x0 <= q4 - 1, 1 <= x0 <= 37
2383
        P128.w[1] = C4.w[1];
2384
        P128.w[0] = C4.w[0];
2385
        round128_19_38 (q4, x0, P128, &R128, &incr_exp,
2386
            &is_midpoint_lt_even, &is_midpoint_gt_even,
2387
            &is_inexact_lt_midpoint,
2388
            &is_inexact_gt_midpoint);
2389
        if (incr_exp) {
2390
          // R128 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 37
2391
          if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19
2392
            R128.w[0] = ten2k64[q4 - x0];
2393
            // R128.w[1] stays 0
2394
          } else { // 20 <= q4 - x0 <= 37
2395
            R128.w[0] = ten2k128[q4 - x0 - 20].w[0];
2396
            R128.w[1] = ten2k128[q4 - x0 - 20].w[1];
2397
          }
2398
        }
2399
      } else if (q4 <= 57) {
2400
        // 38 <= q4 <= 57, max(1, q3+q4-p34) <= x0 <= q4 - 1, 5 <= x0 <= 56
2401
        P192.w[2] = C4.w[2];
2402
        P192.w[1] = C4.w[1];
2403
        P192.w[0] = C4.w[0];
2404
        round192_39_57 (q4, x0, P192, &R192, &incr_exp,
2405
            &is_midpoint_lt_even, &is_midpoint_gt_even,
2406
            &is_inexact_lt_midpoint,
2407
            &is_inexact_gt_midpoint);
2408
        // R192.w[2] is always 0
2409
        if (incr_exp) {
2410
          // R192 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 5, 1 <= q4 - x0 <= 52
2411
          if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19
2412
            R192.w[0] = ten2k64[q4 - x0];
2413
            // R192.w[1] stays 0
2414
            // R192.w[2] stays 0
2415
          } else { // 20 <= q4 - x0 <= 33
2416
            R192.w[0] = ten2k128[q4 - x0 - 20].w[0];
2417
            R192.w[1] = ten2k128[q4 - x0 - 20].w[1];
2418
            // R192.w[2] stays 0
2419
          }
2420
        }
2421
        R128.w[1] = R192.w[1];
2422
        R128.w[0] = R192.w[0];
2423
      } else {
2424
        // 58 <= q4 <= 68, max(1, q3+q4-p34) <= x0 <= q4 - 1, 25 <= x0 <= 67
2425
        round256_58_76 (q4, x0, C4, &R256, &incr_exp,
2426
            &is_midpoint_lt_even, &is_midpoint_gt_even,
2427
            &is_inexact_lt_midpoint,
2428
            &is_inexact_gt_midpoint);
2429
        // R256.w[3] and R256.w[2] are always 0
2430
        if (incr_exp) {
2431
          // R256 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 25, 1 <= q4 - x0 <= 43
2432
          if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19  
2433
            R256.w[0] = ten2k64[q4 - x0];
2434
            // R256.w[1] stays 0
2435
            // R256.w[2] stays 0
2436
            // R256.w[3] stays 0
2437
          } else { // 20 <= q4 - x0 <= 33 
2438
            R256.w[0] = ten2k128[q4 - x0 - 20].w[0];
2439
            R256.w[1] = ten2k128[q4 - x0 - 20].w[1];
2440
            // R256.w[2] stays 0
2441
            // R256.w[3] stays 0
2442
          }
2443
        }
2444
        R128.w[1] = R256.w[1];
2445
        R128.w[0] = R256.w[0];
2446
      }
2447
      // now add C3 * 10^scale in res and the signed top (q4-x0) digits of C4,
2448
      // rounded to nearest, which were copied into R128
2449
      if (z_sign == p_sign) {
2450
        lsb = res.w[0] & 0x01; // lsb of C3 * 10^scale
2451
        // the sum can result in [up to] p34 or p34 + 1 digits
2452
        res.w[0] = res.w[0] + R128.w[0];
2453
        res.w[1] = res.w[1] + R128.w[1];
2454
        if (res.w[0] < R128.w[0])
2455
          res.w[1]++; // carry
2456
        // if res > 10^34 - 1 need to increase x0 and decrease scale by 1
2457
        if (res.w[1] > 0x0001ed09bead87c0ull ||
2458
            (res.w[1] == 0x0001ed09bead87c0ull &&
2459
             res.w[0] > 0x378d8e63ffffffffull)) {
2460
          // avoid double rounding error
2461
          is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
2462
          is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
2463
          is_midpoint_lt_even0 = is_midpoint_lt_even;
2464
          is_midpoint_gt_even0 = is_midpoint_gt_even;
2465
          is_inexact_lt_midpoint = 0;
2466
          is_inexact_gt_midpoint = 0;
2467
          is_midpoint_lt_even = 0;
2468
          is_midpoint_gt_even = 0;
2469
          P128.w[1] = res.w[1];
2470
          P128.w[0] = res.w[0];
2471
          round128_19_38 (35, 1, P128, &res, &incr_exp,
2472
              &is_midpoint_lt_even, &is_midpoint_gt_even,
2473
              &is_inexact_lt_midpoint,
2474
              &is_inexact_gt_midpoint);
2475
          // incr_exp is 0 with certainty in this case
2476
          // avoid a double rounding error
2477
          if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
2478
              is_midpoint_lt_even) { // double rounding error upward
2479
            // res = res - 1
2480
            res.w[0]--;
2481
            if (res.w[0] == 0xffffffffffffffffull)
2482
              res.w[1]--;
2483
            // Note: a double rounding error upward is not possible; for this
2484
            // the result after the first rounding would have to be 99...95
2485
            // (35 digits in all), possibly followed by a number of zeros; this
2486
            // not possible in Cases (2)-(6) or (15)-(17) which may get here
2487
            is_midpoint_lt_even = 0;
2488
            is_inexact_lt_midpoint = 1;
2489
          } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
2490
              is_midpoint_gt_even) { // double rounding error downward
2491
            // res = res + 1
2492
            res.w[0]++;
2493
            if (res.w[0] == 0)
2494
              res.w[1]++;
2495
            is_midpoint_gt_even = 0;
2496
            is_inexact_gt_midpoint = 1;
2497
          } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
2498
                     !is_inexact_lt_midpoint
2499
                     && !is_inexact_gt_midpoint) {
2500
            // if this second rounding was exact the result may still be 
2501
            // inexact because of the first rounding
2502
            if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
2503
              is_inexact_gt_midpoint = 1;
2504
            }
2505
            if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
2506
              is_inexact_lt_midpoint = 1;
2507
            }
2508
          } else if (is_midpoint_gt_even &&
2509
                     (is_inexact_gt_midpoint0
2510
                      || is_midpoint_lt_even0)) {
2511
            // pulled up to a midpoint
2512
            is_inexact_lt_midpoint = 1;
2513
            is_inexact_gt_midpoint = 0;
2514
            is_midpoint_lt_even = 0;
2515
            is_midpoint_gt_even = 0;
2516
          } else if (is_midpoint_lt_even &&
2517
                     (is_inexact_lt_midpoint0
2518
                      || is_midpoint_gt_even0)) {
2519
            // pulled down to a midpoint
2520
            is_inexact_lt_midpoint = 0;
2521
            is_inexact_gt_midpoint = 1;
2522
            is_midpoint_lt_even = 0;
2523
            is_midpoint_gt_even = 0;
2524
          } else {
2525
            ;
2526
          }
2527
          // adjust exponent
2528
          e3 = e3 + 1;
2529
          if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
2530
              !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
2531
            if (is_midpoint_lt_even0 || is_midpoint_gt_even0 ||
2532
                is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0) {
2533
              is_inexact_lt_midpoint = 1;
2534
            }
2535
          }
2536
        } else {
2537
          // this is the result rounded with unbounded exponent, unless a
2538
          // correction is needed
2539
          res.w[1] = res.w[1] & MASK_COEFF;
2540
          if (lsb == 1) {
2541
            if (is_midpoint_gt_even) {
2542
              // res = res + 1
2543
              is_midpoint_gt_even = 0;
2544
              is_midpoint_lt_even = 1;
2545
              res.w[0]++;
2546
              if (res.w[0] == 0x0)
2547
                res.w[1]++;
2548
              // check for rounding overflow
2549
              if (res.w[1] == 0x0001ed09bead87c0ull &&
2550
                  res.w[0] == 0x378d8e6400000000ull) {
2551
                // res = 10^34 => rounding overflow
2552
                res.w[1] = 0x0000314dc6448d93ull;
2553
                res.w[0] = 0x38c15b0a00000000ull; // 10^33
2554
                e3++;
2555
              }
2556
            } else if (is_midpoint_lt_even) {
2557
              // res = res - 1
2558
              is_midpoint_lt_even = 0;
2559
              is_midpoint_gt_even = 1;
2560
              res.w[0]--;
2561
              if (res.w[0] == 0xffffffffffffffffull)
2562
                res.w[1]--;
2563
              // if the result is pure zero, the sign depends on the rounding 
2564
              // mode (x*y and z had opposite signs)
2565
              if (res.w[1] == 0x0ull && res.w[0] == 0x0ull) {
2566
                if (rnd_mode != ROUNDING_DOWN)
2567
                  z_sign = 0x0000000000000000ull;
2568
                else
2569
                  z_sign = 0x8000000000000000ull;
2570
                // the exponent is max (e3, expmin)
2571
                res.w[1] = 0x0;
2572
                res.w[0] = 0x0;
2573
                *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2574
                *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2575
                *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2576
                *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2577
                BID_SWAP128 (res);
2578
                BID_RETURN (res)
2579
              }
2580
            } else {
2581
              ;
2582
            }
2583
          }
2584
        }
2585
      } else { // if (z_sign != p_sign)
2586
        lsb = res.w[0] & 0x01; // lsb of C3 * 10^scale; R128 contains rounded C4
2587
        // used to swap rounding indicators if p_sign != z_sign
2588
        // the sum can result in [up to] p34 or p34 - 1 digits
2589
        tmp64 = res.w[0];
2590
        res.w[0] = res.w[0] - R128.w[0];
2591
        res.w[1] = res.w[1] - R128.w[1];
2592
        if (res.w[0] > tmp64)
2593
          res.w[1]--; // borrow
2594
        // if res < 10^33 and exp > expmin need to decrease x0 and 
2595
        // increase scale by 1
2596
        if (e3 > expmin && ((res.w[1] < 0x0000314dc6448d93ull ||
2597
                             (res.w[1] == 0x0000314dc6448d93ull &&
2598
                              res.w[0] < 0x38c15b0a00000000ull)) ||
2599
                            (is_inexact_lt_midpoint
2600
                             && res.w[1] == 0x0000314dc6448d93ull
2601
                             && res.w[0] == 0x38c15b0a00000000ull))
2602
            && x0 >= 1) {
2603
          x0 = x0 - 1;
2604
          // first restore e3, otherwise it will be too small
2605
          e3 = e3 + scale;
2606
          scale = scale + 1;
2607
          is_inexact_lt_midpoint = 0;
2608
          is_inexact_gt_midpoint = 0;
2609
          is_midpoint_lt_even = 0;
2610
          is_midpoint_gt_even = 0;
2611
          incr_exp = 0;
2612
          goto case2_repeat;
2613
        }
2614
        // else this is the result rounded with unbounded exponent;
2615
        // because the result has opposite sign to that of C4 which was 
2616
        // rounded, need to change the rounding indicators
2617
        if (is_inexact_lt_midpoint) {
2618
          is_inexact_lt_midpoint = 0;
2619
          is_inexact_gt_midpoint = 1;
2620
        } else if (is_inexact_gt_midpoint) {
2621
          is_inexact_gt_midpoint = 0;
2622
          is_inexact_lt_midpoint = 1;
2623
        } else if (lsb == 0) {
2624
          if (is_midpoint_lt_even) {
2625
            is_midpoint_lt_even = 0;
2626
            is_midpoint_gt_even = 1;
2627
          } else if (is_midpoint_gt_even) {
2628
            is_midpoint_gt_even = 0;
2629
            is_midpoint_lt_even = 1;
2630
          } else {
2631
            ;
2632
          }
2633
        } else if (lsb == 1) {
2634
          if (is_midpoint_lt_even) {
2635
            // res = res + 1
2636
            res.w[0]++;
2637
            if (res.w[0] == 0x0)
2638
              res.w[1]++;
2639
            // check for rounding overflow
2640
            if (res.w[1] == 0x0001ed09bead87c0ull &&
2641
                res.w[0] == 0x378d8e6400000000ull) {
2642
              // res = 10^34 => rounding overflow
2643
              res.w[1] = 0x0000314dc6448d93ull;
2644
              res.w[0] = 0x38c15b0a00000000ull; // 10^33
2645
              e3++;
2646
            }
2647
          } else if (is_midpoint_gt_even) {
2648
            // res = res - 1
2649
            res.w[0]--;
2650
            if (res.w[0] == 0xffffffffffffffffull)
2651
              res.w[1]--;
2652
            // if the result is pure zero, the sign depends on the rounding 
2653
            // mode (x*y and z had opposite signs)
2654
            if (res.w[1] == 0x0ull && res.w[0] == 0x0ull) {
2655
              if (rnd_mode != ROUNDING_DOWN)
2656
                z_sign = 0x0000000000000000ull;
2657
              else
2658
                z_sign = 0x8000000000000000ull;
2659
              // the exponent is max (e3, expmin)
2660
              res.w[1] = 0x0;
2661
              res.w[0] = 0x0;
2662
              *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2663
              *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2664
              *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2665
              *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2666
              BID_SWAP128 (res);
2667
              BID_RETURN (res)
2668
            }
2669
          } else {
2670
            ;
2671
          }
2672
        } else {
2673
          ;
2674
        }
2675
      }
2676
      // check for underflow
2677
      if (e3 == expmin) { // and if significand < 10^33 => result is tiny
2678
        if ((res.w[1] & MASK_COEFF) < 0x0000314dc6448d93ull ||
2679
            ((res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull &&
2680
             res.w[0] < 0x38c15b0a00000000ull)) {
2681
          is_tiny = 1;
2682
        }
2683
      } else if (e3 < expmin) {
2684
        // the result is tiny, so we must truncate more of res
2685
        is_tiny = 1;
2686
        x0 = expmin - e3;
2687
        is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
2688
        is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
2689
        is_midpoint_lt_even0 = is_midpoint_lt_even;
2690
        is_midpoint_gt_even0 = is_midpoint_gt_even;
2691
        is_inexact_lt_midpoint = 0;
2692
        is_inexact_gt_midpoint = 0;
2693
        is_midpoint_lt_even = 0;
2694
        is_midpoint_gt_even = 0;
2695
        // determine the number of decimal digits in res
2696
        if (res.w[1] == 0x0) {
2697
          // between 1 and 19 digits
2698
          for (ind = 1; ind <= 19; ind++) {
2699
            if (res.w[0] < ten2k64[ind]) {
2700
              break;
2701
            }
2702
          }
2703
          // ind digits
2704
        } else if (res.w[1] < ten2k128[0].w[1] ||
2705
                   (res.w[1] == ten2k128[0].w[1]
2706
                    && res.w[0] < ten2k128[0].w[0])) {
2707
          // 20 digits
2708
          ind = 20;
2709
        } else { // between 21 and 38 digits
2710
          for (ind = 1; ind <= 18; ind++) {
2711
            if (res.w[1] < ten2k128[ind].w[1] ||
2712
                (res.w[1] == ten2k128[ind].w[1] &&
2713
                 res.w[0] < ten2k128[ind].w[0])) {
2714
              break;
2715
            }
2716
          }
2717
          // ind + 20 digits
2718
          ind = ind + 20;
2719
        }
2720
 
2721
        // at this point ind >= x0; because delta >= 2 on this path, the case
2722
        // ind = x0 can occur only in Case (2) or case (3), when C3 has one
2723
        // digit (q3 = 1) equal to 1 (C3 = 1), e3 is expmin (e3 = expmin), 
2724
        // the signs of x * y and z are opposite, and through cancellation 
2725
        // the most significant decimal digit in res has the weight
2726
        // 10^(emin-1); however, it is clear that in this case the most
2727
        // significant digit is 9, so the result before rounding is
2728
        // 0.9... * 10^emin
2729
        // Otherwise, ind > x0 because there are non-zero decimal digits in the
2730
        // result with weight of at least 10^emin, and correction for underflow
2731
        //  can be carried out using the round*_*_2_* () routines
2732
        if (x0 == ind) { // the result before rounding is 0.9... * 10^emin
2733
          res.w[1] = 0x0;
2734
          res.w[0] = 0x1;
2735
          is_inexact_gt_midpoint = 1;
2736
        } else if (ind <= 18) { // check that 2 <= ind
2737
          // 2 <= ind <= 18, 1 <= x0 <= 17
2738
          round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp,
2739
                        &is_midpoint_lt_even, &is_midpoint_gt_even,
2740
                        &is_inexact_lt_midpoint,
2741
                        &is_inexact_gt_midpoint);
2742
          if (incr_exp) {
2743
            // R64 = 10^(ind-x0), 1 <= ind - x0 <= ind - 1, 1 <= ind - x0 <= 17
2744
            R64 = ten2k64[ind - x0];
2745
          }
2746
          res.w[1] = 0;
2747
          res.w[0] = R64;
2748
        } else if (ind <= 38) {
2749
          // 19 <= ind <= 38
2750
          P128.w[1] = res.w[1];
2751
          P128.w[0] = res.w[0];
2752
          round128_19_38 (ind, x0, P128, &res, &incr_exp,
2753
                          &is_midpoint_lt_even, &is_midpoint_gt_even,
2754
                          &is_inexact_lt_midpoint,
2755
                          &is_inexact_gt_midpoint);
2756
          if (incr_exp) {
2757
            // R128 = 10^(ind-x0), 1 <= ind - x0 <= ind - 1, 1 <= ind - x0 <= 37
2758
            if (ind - x0 <= 19) { // 1 <= ind - x0 <= 19
2759
              res.w[0] = ten2k64[ind - x0];
2760
              // res.w[1] stays 0
2761
            } else { // 20 <= ind - x0 <= 37
2762
              res.w[0] = ten2k128[ind - x0 - 20].w[0];
2763
              res.w[1] = ten2k128[ind - x0 - 20].w[1];
2764
            }
2765
          }
2766
        }
2767
        // avoid a double rounding error
2768
        if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
2769
            is_midpoint_lt_even) { // double rounding error upward
2770
          // res = res - 1
2771
          res.w[0]--;
2772
          if (res.w[0] == 0xffffffffffffffffull)
2773
            res.w[1]--;
2774
          // Note: a double rounding error upward is not possible; for this
2775
          // the result after the first rounding would have to be 99...95
2776
          // (35 digits in all), possibly followed by a number of zeros; this
2777
          // not possible in Cases (2)-(6) which may get here
2778
          is_midpoint_lt_even = 0;
2779
          is_inexact_lt_midpoint = 1;
2780
        } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
2781
            is_midpoint_gt_even) { // double rounding error downward
2782
          // res = res + 1
2783
          res.w[0]++;
2784
          if (res.w[0] == 0)
2785
            res.w[1]++;
2786
          is_midpoint_gt_even = 0;
2787
          is_inexact_gt_midpoint = 1;
2788
        } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
2789
                   !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
2790
          // if this second rounding was exact the result may still be 
2791
          // inexact because of the first rounding
2792
          if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
2793
            is_inexact_gt_midpoint = 1;
2794
          }
2795
          if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
2796
            is_inexact_lt_midpoint = 1;
2797
          }
2798
        } else if (is_midpoint_gt_even &&
2799
                   (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
2800
          // pulled up to a midpoint
2801
          is_inexact_lt_midpoint = 1;
2802
          is_inexact_gt_midpoint = 0;
2803
          is_midpoint_lt_even = 0;
2804
          is_midpoint_gt_even = 0;
2805
        } else if (is_midpoint_lt_even &&
2806
                   (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
2807
          // pulled down to a midpoint
2808
          is_inexact_lt_midpoint = 0;
2809
          is_inexact_gt_midpoint = 1;
2810
          is_midpoint_lt_even = 0;
2811
          is_midpoint_gt_even = 0;
2812
        } else {
2813
          ;
2814
        }
2815
        // adjust exponent
2816
        e3 = e3 + x0;
2817
        if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
2818
            !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
2819
          if (is_midpoint_lt_even0 || is_midpoint_gt_even0 ||
2820
              is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0) {
2821
            is_inexact_lt_midpoint = 1;
2822
          }
2823
        }
2824
      } else {
2825
        ; // not underflow
2826
      }
2827
      // check for inexact result
2828
      if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
2829
          is_midpoint_lt_even || is_midpoint_gt_even) {
2830
        // set the inexact flag
2831
        *pfpsf |= INEXACT_EXCEPTION;
2832
        if (is_tiny)
2833
          *pfpsf |= UNDERFLOW_EXCEPTION;
2834
      }
2835
      // now check for significand = 10^34 (may have resulted from going
2836
      // back to case2_repeat)
2837
      if (res.w[1] == 0x0001ed09bead87c0ull &&
2838
          res.w[0] == 0x378d8e6400000000ull) { // if  res = 10^34
2839
        res.w[1] = 0x0000314dc6448d93ull; // res = 10^33
2840
        res.w[0] = 0x38c15b0a00000000ull;
2841
        e3 = e3 + 1;
2842
      }
2843
      res.w[1] = z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1];
2844
      // check for overflow
2845
      if (rnd_mode == ROUNDING_TO_NEAREST && e3 > expmax) {
2846
        res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
2847
        res.w[0] = 0x0000000000000000ull;
2848
        *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
2849
      }
2850
      if (rnd_mode != ROUNDING_TO_NEAREST) {
2851
        rounding_correction (rnd_mode,
2852
                             is_inexact_lt_midpoint,
2853
                             is_inexact_gt_midpoint,
2854
                             is_midpoint_lt_even, is_midpoint_gt_even,
2855
                             e3, &res, pfpsf);
2856
      }
2857
      *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2858
      *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2859
      *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2860
      *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2861
      BID_SWAP128 (res);
2862
      BID_RETURN (res)
2863
 
2864
    } else {
2865
 
2866
      // we get here only if delta <= 1 in Cases (2), (3), (4), (5), or (6) and
2867
      // the signs of x*y and z are opposite; in these cases massive
2868
      // cancellation can occur, so it is better to scale either C3 or C4 and 
2869
      // to perform the subtraction before rounding; rounding is performed 
2870
      // next, depending on the number of decimal digits in the result and on 
2871
      // the exponent value
2872
      // Note: overlow is not possible in this case
2873
      // this is similar to Cases (15), (16), and (17)
2874
 
2875
      if (delta + q4 < q3) { // from Case (6) 
2876
        // Case (6) with 0<= delta <= 1 is similar to Cases (15), (16), and 
2877
        // (17) if we swap (C3, C4), (q3, q4), (e3, e4), (z_sign, p_sign)
2878
        // and call add_and_round; delta stays positive
2879
        // C4.w[3] = 0 and C4.w[2] = 0, so swap just the low part of C4 with C3
2880
        P128.w[1] = C3.w[1];
2881
        P128.w[0] = C3.w[0];
2882
        C3.w[1] = C4.w[1];
2883
        C3.w[0] = C4.w[0];
2884
        C4.w[1] = P128.w[1];
2885
        C4.w[0] = P128.w[0];
2886
        ind = q3;
2887
        q3 = q4;
2888
        q4 = ind;
2889
        ind = e3;
2890
        e3 = e4;
2891
        e4 = ind;
2892
        tmp_sign = z_sign;
2893
        z_sign = p_sign;
2894
        p_sign = tmp_sign;
2895
      } else { // from Cases (2), (3), (4), (5)
2896
        // In Cases (2), (3), (4), (5) with 0 <= delta <= 1 C3 has to be 
2897
        // scaled up by q4 + delta - q3; this is the same as in Cases (15), 
2898
        // (16), and (17) if we just change the sign of delta
2899
        delta = -delta;
2900
      }
2901
      add_and_round (q3, q4, e4, delta, p34, z_sign, p_sign, C3, C4,
2902
                     rnd_mode, &is_midpoint_lt_even,
2903
                     &is_midpoint_gt_even, &is_inexact_lt_midpoint,
2904
                     &is_inexact_gt_midpoint, pfpsf, &res);
2905
      *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2906
      *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2907
      *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2908
      *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2909
      BID_SWAP128 (res);
2910
      BID_RETURN (res)
2911
 
2912
    }
2913
 
2914
  } else { // if delta < 0
2915
 
2916
    delta = -delta;
2917
 
2918
    if (p34 < q4 && q4 <= delta) { // Case (7)
2919
 
2920
      // truncate C4 to p34 digits into res
2921
      // x = q4-p34, 1 <= x <= 34 because 35 <= q4 <= 68
2922
      x0 = q4 - p34;
2923
      if (q4 <= 38) {
2924
        P128.w[1] = C4.w[1];
2925
        P128.w[0] = C4.w[0];
2926
        round128_19_38 (q4, x0, P128, &res, &incr_exp,
2927
                        &is_midpoint_lt_even, &is_midpoint_gt_even,
2928
                        &is_inexact_lt_midpoint,
2929
                        &is_inexact_gt_midpoint);
2930
      } else if (q4 <= 57) { // 35 <= q4 <= 57
2931
        P192.w[2] = C4.w[2];
2932
        P192.w[1] = C4.w[1];
2933
        P192.w[0] = C4.w[0];
2934
        round192_39_57 (q4, x0, P192, &R192, &incr_exp,
2935
                        &is_midpoint_lt_even, &is_midpoint_gt_even,
2936
                        &is_inexact_lt_midpoint,
2937
                        &is_inexact_gt_midpoint);
2938
        res.w[0] = R192.w[0];
2939
        res.w[1] = R192.w[1];
2940
      } else { // if (q4 <= 68)
2941
        round256_58_76 (q4, x0, C4, &R256, &incr_exp,
2942
                        &is_midpoint_lt_even, &is_midpoint_gt_even,
2943
                        &is_inexact_lt_midpoint,
2944
                        &is_inexact_gt_midpoint);
2945
        res.w[0] = R256.w[0];
2946
        res.w[1] = R256.w[1];
2947
      }
2948
      e4 = e4 + x0;
2949
      if (incr_exp) {
2950
        e4 = e4 + 1;
2951
      }
2952
      if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
2953
          !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
2954
        // if C4 rounded to p34 digits is exact then the result is inexact,
2955
        // in a way that depends on the signs of x * y and z
2956
        if (p_sign == z_sign) {
2957
          is_inexact_lt_midpoint = 1;
2958
        } else { // if (p_sign != z_sign)
2959
          if (res.w[1] != 0x0000314dc6448d93ull ||
2960
              res.w[0] != 0x38c15b0a00000000ull) { // res != 10^33
2961
            is_inexact_gt_midpoint = 1;
2962
          } else { // res = 10^33 and exact is a special case
2963
            // if C3 < 1/2 ulp then res = 10^33 and is_inexact_gt_midpoint = 1
2964
            // if C3 = 1/2 ulp then res = 10^33 and is_midpoint_lt_even = 1
2965
            // if C3 > 1/2 ulp then res = 10^34-1 and is_inexact_lt_midpoint = 1
2966
            // Note: ulp is really ulp/10 (after borrow which propagates to msd)
2967
            if (delta > p34 + 1) { // C3 < 1/2
2968
              // res = 10^33, unchanged
2969
              is_inexact_gt_midpoint = 1;
2970
            } else { // if (delta == p34 + 1)
2971
              if (q3 <= 19) {
2972
                if (C3.w[0] < midpoint64[q3 - 1]) { // C3 < 1/2 ulp
2973
                  // res = 10^33, unchanged
2974
                  is_inexact_gt_midpoint = 1;
2975
                } else if (C3.w[0] == midpoint64[q3 - 1]) { // C3 = 1/2 ulp
2976
                  // res = 10^33, unchanged
2977
                  is_midpoint_lt_even = 1;
2978
                } else { // if (C3.w[0] > midpoint64[q3-1]), C3 > 1/2 ulp
2979
                  res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1
2980
                  res.w[0] = 0x378d8e63ffffffffull;
2981
                  e4 = e4 - 1;
2982
                  is_inexact_lt_midpoint = 1;
2983
                }
2984
              } else { // if (20 <= q3 <=34)
2985
                if (C3.w[1] < midpoint128[q3 - 20].w[1] ||
2986
                    (C3.w[1] == midpoint128[q3 - 20].w[1] &&
2987
                    C3.w[0] < midpoint128[q3 - 20].w[0])) { // C3 < 1/2 ulp
2988
                  // res = 10^33, unchanged
2989
                  is_inexact_gt_midpoint = 1;
2990
                } else if (C3.w[1] == midpoint128[q3 - 20].w[1] &&
2991
                    C3.w[0] == midpoint128[q3 - 20].w[0]) { // C3 = 1/2 ulp
2992
                  // res = 10^33, unchanged
2993
                  is_midpoint_lt_even = 1;
2994
                } else { // if (C3 > midpoint128[q3-20]), C3 > 1/2 ulp
2995
                  res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1
2996
                  res.w[0] = 0x378d8e63ffffffffull;
2997
                  e4 = e4 - 1;
2998
                  is_inexact_lt_midpoint = 1;
2999
                }
3000
              }
3001
            }
3002
          }
3003
        }
3004
      } else if (is_midpoint_lt_even) {
3005
        if (z_sign != p_sign) {
3006
          // needs correction: res = res - 1
3007
          res.w[0] = res.w[0] - 1;
3008
          if (res.w[0] == 0xffffffffffffffffull)
3009
            res.w[1]--;
3010
          // if it is (10^33-1)*10^e4 then the corect result is 
3011
          // (10^34-1)*10(e4-1)
3012
          if (res.w[1] == 0x0000314dc6448d93ull &&
3013
              res.w[0] == 0x38c15b09ffffffffull) {
3014
            res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1
3015
            res.w[0] = 0x378d8e63ffffffffull;
3016
            e4 = e4 - 1;
3017
          }
3018
          is_midpoint_lt_even = 0;
3019
          is_inexact_lt_midpoint = 1;
3020
        } else { // if (z_sign == p_sign)
3021
          is_midpoint_lt_even = 0;
3022
          is_inexact_gt_midpoint = 1;
3023
        }
3024
      } else if (is_midpoint_gt_even) {
3025
        if (z_sign == p_sign) {
3026
          // needs correction: res = res + 1 (cannot cross in the next binade)
3027
          res.w[0] = res.w[0] + 1;
3028
          if (res.w[0] == 0x0000000000000000ull)
3029
            res.w[1]++;
3030
          is_midpoint_gt_even = 0;
3031
          is_inexact_gt_midpoint = 1;
3032
        } else { // if (z_sign != p_sign)
3033
          is_midpoint_gt_even = 0;
3034
          is_inexact_lt_midpoint = 1;
3035
        }
3036
      } else {
3037
        ; // the rounded result is already correct
3038
      }
3039
      // check for overflow
3040
      if (rnd_mode == ROUNDING_TO_NEAREST && e4 > expmax) {
3041
        res.w[1] = p_sign | 0x7800000000000000ull;
3042
        res.w[0] = 0x0000000000000000ull;
3043
        *pfpsf |= (OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
3044
      } else { // no overflow or not RN
3045
        p_exp = ((UINT64) (e4 + 6176) << 49);
3046
        res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1];
3047
      }
3048
      if (rnd_mode != ROUNDING_TO_NEAREST) {
3049
        rounding_correction (rnd_mode,
3050
                             is_inexact_lt_midpoint,
3051
                             is_inexact_gt_midpoint,
3052
                             is_midpoint_lt_even, is_midpoint_gt_even,
3053
                             e4, &res, pfpsf);
3054
      }
3055
      if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
3056
          is_midpoint_lt_even || is_midpoint_gt_even) {
3057
        // set the inexact flag
3058
        *pfpsf |= INEXACT_EXCEPTION;
3059
      }
3060
      *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
3061
      *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
3062
      *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
3063
      *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
3064
      BID_SWAP128 (res);
3065
      BID_RETURN (res)
3066
 
3067
    } else if ((q4 <= p34 && p34 <= delta) || // Case (8)
3068
        (q4 <= delta && delta < p34 && p34 < delta + q3) || // Case (9)
3069
        (q4 <= delta && delta + q3 <= p34) || // Case (10)
3070
        (delta < q4 && q4 <= p34 && p34 < delta + q3) || // Case (13)
3071
        (delta < q4 && q4 <= delta + q3 && delta + q3 <= p34) || // Case (14)
3072
        (delta + q3 < q4 && q4 <= p34)) { // Case (18)
3073
 
3074
      // Case (8) is similar to Case (1), with C3 and C4 swapped
3075
      // Case (9) is similar to Case (2), with C3 and C4 swapped
3076
      // Case (10) is similar to Case (3), with C3 and C4 swapped
3077
      // Case (13) is similar to Case (4), with C3 and C4 swapped
3078
      // Case (14) is similar to Case (5), with C3 and C4 swapped
3079
      // Case (18) is similar to Case (6), with C3 and C4 swapped
3080
 
3081
      // swap (C3, C4), (q3, q4), (e3, 34), (z_sign, p_sign), (z_exp, p_exp)
3082
      // and go back to delta_ge_zero
3083
      // C4.w[3] = 0 and C4.w[2] = 0, so swap just the low part of C4 with C3
3084
      P128.w[1] = C3.w[1];
3085
      P128.w[0] = C3.w[0];
3086
      C3.w[1] = C4.w[1];
3087
      C3.w[0] = C4.w[0];
3088
      C4.w[1] = P128.w[1];
3089
      C4.w[0] = P128.w[0];
3090
      ind = q3;
3091
      q3 = q4;
3092
      q4 = ind;
3093
      ind = e3;
3094
      e3 = e4;
3095
      e4 = ind;
3096
      tmp_sign = z_sign;
3097
      z_sign = p_sign;
3098
      p_sign = tmp_sign;
3099
      tmp.ui64 = z_exp;
3100
      z_exp = p_exp;
3101
      p_exp = tmp.ui64;
3102
      goto delta_ge_zero;
3103
 
3104
    } else if ((p34 <= delta && delta < q4 && q4 < delta + q3) || // Case (11)
3105
               (delta < p34 && p34 < q4 && q4 < delta + q3)) { // Case (12)
3106
 
3107
      // round C3 to nearest to q3 - x0 digits, where x0 = e4 - e3,
3108
      // 1 <= x0 <= q3 - 1 <= p34 - 1 
3109
      x0 = e4 - e3; // or x0 = delta + q3 - q4
3110
      if (q3 <= 18) { // 2 <= q3 <= 18
3111
        round64_2_18 (q3, x0, C3.w[0], &R64, &incr_exp,
3112
                      &is_midpoint_lt_even, &is_midpoint_gt_even,
3113
                      &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
3114
        // C3.w[1] = 0;
3115
        C3.w[0] = R64;
3116
      } else if (q3 <= 38) {
3117
        round128_19_38 (q3, x0, C3, &R128, &incr_exp,
3118
                        &is_midpoint_lt_even, &is_midpoint_gt_even,
3119
                        &is_inexact_lt_midpoint,
3120
                        &is_inexact_gt_midpoint);
3121
        C3.w[1] = R128.w[1];
3122
        C3.w[0] = R128.w[0];
3123
      }
3124
      // the rounded result has q3 - x0 digits
3125
      // we want the exponent to be e4, so if incr_exp = 1 then
3126
      // multiply the rounded result by 10 - it will still fit in 113 bits
3127
      if (incr_exp) {
3128
        // 64 x 128 -> 128
3129
        P128.w[1] = C3.w[1];
3130
        P128.w[0] = C3.w[0];
3131
        __mul_64x128_to_128 (C3, ten2k64[1], P128);
3132
      }
3133
      e3 = e3 + x0; // this is e4
3134
      // now add/subtract the 256-bit C4 and the new (and shorter) 128-bit C3; 
3135
      // the result will have the sign of x * y; the exponent is e4
3136
      R256.w[3] = 0;
3137
      R256.w[2] = 0;
3138
      R256.w[1] = C3.w[1];
3139
      R256.w[0] = C3.w[0];
3140
      if (p_sign == z_sign) { // R256 = C4 + R256
3141
        add256 (C4, R256, &R256);
3142
      } else { // if (p_sign != z_sign) { // R256 = C4 - R256
3143
        sub256 (C4, R256, &R256); // the result cannot be pure zero
3144
        // because the result has opposite sign to that of R256 which was 
3145
        // rounded, need to change the rounding indicators
3146
        lsb = C4.w[0] & 0x01;
3147
        if (is_inexact_lt_midpoint) {
3148
          is_inexact_lt_midpoint = 0;
3149
          is_inexact_gt_midpoint = 1;
3150
        } else if (is_inexact_gt_midpoint) {
3151
          is_inexact_gt_midpoint = 0;
3152
          is_inexact_lt_midpoint = 1;
3153
        } else if (lsb == 0) {
3154
          if (is_midpoint_lt_even) {
3155
            is_midpoint_lt_even = 0;
3156
            is_midpoint_gt_even = 1;
3157
          } else if (is_midpoint_gt_even) {
3158
            is_midpoint_gt_even = 0;
3159
            is_midpoint_lt_even = 1;
3160
          } else {
3161
            ;
3162
          }
3163
        } else if (lsb == 1) {
3164
          if (is_midpoint_lt_even) {
3165
            // res = res + 1
3166
            R256.w[0]++;
3167
            if (R256.w[0] == 0x0) {
3168
              R256.w[1]++;
3169
              if (R256.w[1] == 0x0) {
3170
                R256.w[2]++;
3171
                if (R256.w[2] == 0x0) {
3172
                  R256.w[3]++;
3173
                }
3174
              }
3175
            }
3176
            // no check for rounding overflow - R256 was a difference
3177
          } else if (is_midpoint_gt_even) {
3178
            // res = res - 1
3179
            R256.w[0]--;
3180
            if (R256.w[0] == 0xffffffffffffffffull) {
3181
              R256.w[1]--;
3182
              if (R256.w[1] == 0xffffffffffffffffull) {
3183
                R256.w[2]--;
3184
                if (R256.w[2] == 0xffffffffffffffffull) {
3185
                  R256.w[3]--;
3186
                }
3187
              }
3188
            }
3189
          } else {
3190
            ;
3191
          }
3192
        } else {
3193
          ;
3194
        }
3195
      }
3196
      // determine the number of decimal digits in R256
3197
      ind = nr_digits256 (R256); // ind >= p34
3198
      // if R256 is sum, then ind > p34; if R256 is a difference, then 
3199
      // ind >= p34; this means that we can calculate the result rounded to
3200
      // the destination precision, with unbounded exponent, starting from R256
3201
      // and using the indicators from the rounding of C3 to avoid a double
3202
      // rounding error 
3203
 
3204
      if (ind < p34) {
3205
        ;
3206
      } else if (ind == p34) {
3207
        // the result rounded to the destination precision with 
3208
        // unbounded exponent
3209
        // is (-1)^p_sign * R256 * 10^e4
3210
        res.w[1] = R256.w[1];
3211
        res.w[0] = R256.w[0];
3212
      } else { // if (ind > p34)
3213
        // if more than P digits, round to nearest to P digits
3214
        // round R256 to p34 digits
3215
        x0 = ind - p34; // 1 <= x0 <= 34 as 35 <= ind <= 68
3216
        // save C3 rounding indicators to help avoid double rounding error
3217
        is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
3218
        is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
3219
        is_midpoint_lt_even0 = is_midpoint_lt_even;
3220
        is_midpoint_gt_even0 = is_midpoint_gt_even;
3221
        // initialize rounding indicators
3222
        is_inexact_lt_midpoint = 0;
3223
        is_inexact_gt_midpoint = 0;
3224
        is_midpoint_lt_even = 0;
3225
        is_midpoint_gt_even = 0;
3226
        // round to p34 digits; the result fits in 113 bits
3227
        if (ind <= 38) {
3228
          P128.w[1] = R256.w[1];
3229
          P128.w[0] = R256.w[0];
3230
          round128_19_38 (ind, x0, P128, &R128, &incr_exp,
3231
                          &is_midpoint_lt_even, &is_midpoint_gt_even,
3232
                          &is_inexact_lt_midpoint,
3233
                          &is_inexact_gt_midpoint);
3234
        } else if (ind <= 57) {
3235
          P192.w[2] = R256.w[2];
3236
          P192.w[1] = R256.w[1];
3237
          P192.w[0] = R256.w[0];
3238
          round192_39_57 (ind, x0, P192, &R192, &incr_exp,
3239
                          &is_midpoint_lt_even, &is_midpoint_gt_even,
3240
                          &is_inexact_lt_midpoint,
3241
                          &is_inexact_gt_midpoint);
3242
          R128.w[1] = R192.w[1];
3243
          R128.w[0] = R192.w[0];
3244
        } else { // if (ind <= 68)
3245
          round256_58_76 (ind, x0, R256, &R256, &incr_exp,
3246
                          &is_midpoint_lt_even, &is_midpoint_gt_even,
3247
                          &is_inexact_lt_midpoint,
3248
                          &is_inexact_gt_midpoint);
3249
          R128.w[1] = R256.w[1];
3250
          R128.w[0] = R256.w[0];
3251
        }
3252
        // the rounded result has p34 = 34 digits
3253
        e4 = e4 + x0 + incr_exp;
3254
 
3255
        res.w[1] = R128.w[1];
3256
        res.w[0] = R128.w[0];
3257
 
3258
        // avoid a double rounding error
3259
        if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
3260
            is_midpoint_lt_even) { // double rounding error upward
3261
          // res = res - 1
3262
          res.w[0]--;
3263
          if (res.w[0] == 0xffffffffffffffffull)
3264
            res.w[1]--;
3265
          is_midpoint_lt_even = 0;
3266
          is_inexact_lt_midpoint = 1;
3267
          // Note: a double rounding error upward is not possible; for this
3268
          // the result after the first rounding would have to be 99...95
3269
          // (35 digits in all), possibly followed by a number of zeros; this
3270
          // not possible in Cases (2)-(6) or (15)-(17) which may get here
3271
          // if this is 10^33 - 1 make it 10^34 - 1 and decrement exponent
3272
          if (res.w[1] == 0x0000314dc6448d93ull &&
3273
            res.w[0] == 0x38c15b09ffffffffull) { // 10^33 - 1
3274
            res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1
3275
            res.w[0] = 0x378d8e63ffffffffull;
3276
            e4--;
3277
          }
3278
        } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
3279
            is_midpoint_gt_even) { // double rounding error downward
3280
          // res = res + 1 
3281
          res.w[0]++;
3282
          if (res.w[0] == 0)
3283
            res.w[1]++;
3284
          is_midpoint_gt_even = 0;
3285
          is_inexact_gt_midpoint = 1;
3286
        } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
3287
                   !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
3288
          // if this second rounding was exact the result may still be
3289
          // inexact because of the first rounding
3290
          if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
3291
            is_inexact_gt_midpoint = 1;
3292
          }
3293
          if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
3294
            is_inexact_lt_midpoint = 1;
3295
          }
3296
        } else if (is_midpoint_gt_even &&
3297
                   (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
3298
          // pulled up to a midpoint
3299
          is_inexact_lt_midpoint = 1;
3300
          is_inexact_gt_midpoint = 0;
3301
          is_midpoint_lt_even = 0;
3302
          is_midpoint_gt_even = 0;
3303
        } else if (is_midpoint_lt_even &&
3304
                   (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
3305
          // pulled down to a midpoint
3306
          is_inexact_lt_midpoint = 0;
3307
          is_inexact_gt_midpoint = 1;
3308
          is_midpoint_lt_even = 0;
3309
          is_midpoint_gt_even = 0;
3310
        } else {
3311
          ;
3312
        }
3313
      }
3314
 
3315
      // determine tininess
3316
      if (rnd_mode == ROUNDING_TO_NEAREST) {
3317
        if (e4 < expmin) {
3318
          is_tiny = 1; // for other rounding modes apply correction
3319
        }
3320
      } else {
3321
        // for RM, RP, RZ, RA apply correction in order to determine tininess
3322
        // but do not save the result; apply the correction to 
3323
        // (-1)^p_sign * res * 10^0
3324
        P128.w[1] = p_sign | 0x3040000000000000ull | res.w[1];
3325
        P128.w[0] = res.w[0];
3326
        rounding_correction (rnd_mode,
3327
                             is_inexact_lt_midpoint,
3328
                             is_inexact_gt_midpoint,
3329
                             is_midpoint_lt_even, is_midpoint_gt_even,
3330
                             0, &P128, pfpsf);
3331
        scale = ((P128.w[1] & MASK_EXP) >> 49) - 6176; // -1, 0, or +1
3332
        // the number of digits in the significand is p34 = 34
3333
        if (e4 + scale < expmin) {
3334
          is_tiny = 1;
3335
        }
3336
      }
3337
 
3338
      // the result rounded to the destination precision with unbounded exponent
3339
      // is (-1)^p_sign * res * 10^e4
3340
      res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1]; // RN
3341
      // res.w[0] unchanged;
3342
      // Note: res is correct only if expmin <= e4 <= expmax
3343
      ind = p34; // the number of decimal digits in the signifcand of res
3344
 
3345
      // at this point we have the result rounded with unbounded exponent in
3346
      // res and we know its tininess:
3347
      // res = (-1)^p_sign * significand * 10^e4, 
3348
      // where q (significand) = ind = p34
3349
      // Note: res is correct only if expmin <= e4 <= expmax
3350
 
3351
      // check for overflow if RN
3352
      if (rnd_mode == ROUNDING_TO_NEAREST
3353
          && (ind + e4) > (p34 + expmax)) {
3354
        res.w[1] = p_sign | 0x7800000000000000ull;
3355
        res.w[0] = 0x0000000000000000ull;
3356
        *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
3357
        *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
3358
        *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
3359
        *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
3360
        *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
3361
        BID_SWAP128 (res);
3362
        BID_RETURN (res)
3363
      } // else not overflow or not RN, so continue
3364
 
3365
      // from this point on this is similar to the last part of the computation
3366
      // for Cases (15), (16), (17)
3367
 
3368
      // if (e4 >= expmin) we have the result rounded with bounded exponent
3369
      if (e4 < expmin) {
3370
        x0 = expmin - e4; // x0 >= 1; the number of digits to chop off of res
3371
        // where the result rounded [at most] once is
3372
        //   (-1)^p_sign * significand_res * 10^e4
3373
 
3374
        // avoid double rounding error
3375
        is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
3376
        is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
3377
        is_midpoint_lt_even0 = is_midpoint_lt_even;
3378
        is_midpoint_gt_even0 = is_midpoint_gt_even;
3379
        is_inexact_lt_midpoint = 0;
3380
        is_inexact_gt_midpoint = 0;
3381
        is_midpoint_lt_even = 0;
3382
        is_midpoint_gt_even = 0;
3383
 
3384
        if (x0 > ind) {
3385
          // nothing is left of res when moving the decimal point left x0 digits
3386
          is_inexact_lt_midpoint = 1;
3387
          res.w[1] = p_sign | 0x0000000000000000ull;
3388
          res.w[0] = 0x0000000000000000ull;
3389
          e4 = expmin;
3390
        } else if (x0 == ind) { // 1 <= x0 = ind <= p34 = 34
3391
          // this is <, =, or > 1/2 ulp
3392
          // compare the ind-digit value in the significand of res with
3393
          // 1/2 ulp = 5*10^(ind-1), i.e. determine whether it is 
3394
          // less than, equal to, or greater than 1/2 ulp (significand of res)
3395
          R128.w[1] = res.w[1] & MASK_COEFF;
3396
          R128.w[0] = res.w[0];
3397
          if (ind <= 19) {
3398
            if (R128.w[0] < midpoint64[ind - 1]) { // < 1/2 ulp
3399
              lt_half_ulp = 1;
3400
              is_inexact_lt_midpoint = 1;
3401
            } else if (R128.w[0] == midpoint64[ind - 1]) { // = 1/2 ulp
3402
              eq_half_ulp = 1;
3403
              is_midpoint_gt_even = 1;
3404
            } else { // > 1/2 ulp
3405
              gt_half_ulp = 1;
3406
              is_inexact_gt_midpoint = 1;
3407
            }
3408
          } else { // if (ind <= 38)
3409
            if (R128.w[1] < midpoint128[ind - 20].w[1] ||
3410
                (R128.w[1] == midpoint128[ind - 20].w[1] &&
3411
                R128.w[0] < midpoint128[ind - 20].w[0])) { // < 1/2 ulp
3412
              lt_half_ulp = 1;
3413
              is_inexact_lt_midpoint = 1;
3414
            } else if (R128.w[1] == midpoint128[ind - 20].w[1] &&
3415
                R128.w[0] == midpoint128[ind - 20].w[0]) { // = 1/2 ulp
3416
              eq_half_ulp = 1;
3417
              is_midpoint_gt_even = 1;
3418
            } else { // > 1/2 ulp
3419
              gt_half_ulp = 1;
3420
              is_inexact_gt_midpoint = 1;
3421
            }
3422
          }
3423
          if (lt_half_ulp || eq_half_ulp) {
3424
            // res = +0.0 * 10^expmin
3425
            res.w[1] = 0x0000000000000000ull;
3426
            res.w[0] = 0x0000000000000000ull;
3427
          } else { // if (gt_half_ulp)
3428
            // res = +1 * 10^expmin
3429
            res.w[1] = 0x0000000000000000ull;
3430
            res.w[0] = 0x0000000000000001ull;
3431
          }
3432
          res.w[1] = p_sign | res.w[1];
3433
          e4 = expmin;
3434
        } else { // if (1 <= x0 <= ind - 1 <= 33)
3435
          // round the ind-digit result to ind - x0 digits
3436
 
3437
          if (ind <= 18) { // 2 <= ind <= 18
3438
            round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp,
3439
                          &is_midpoint_lt_even, &is_midpoint_gt_even,
3440
                          &is_inexact_lt_midpoint,
3441
                          &is_inexact_gt_midpoint);
3442
            res.w[1] = 0x0;
3443
            res.w[0] = R64;
3444
          } else if (ind <= 38) {
3445
            P128.w[1] = res.w[1] & MASK_COEFF;
3446
            P128.w[0] = res.w[0];
3447
            round128_19_38 (ind, x0, P128, &res, &incr_exp,
3448
                            &is_midpoint_lt_even, &is_midpoint_gt_even,
3449
                            &is_inexact_lt_midpoint,
3450
                            &is_inexact_gt_midpoint);
3451
          }
3452
          e4 = e4 + x0; // expmin
3453
          // we want the exponent to be expmin, so if incr_exp = 1 then
3454
          // multiply the rounded result by 10 - it will still fit in 113 bits
3455
          if (incr_exp) {
3456
            // 64 x 128 -> 128
3457
            P128.w[1] = res.w[1] & MASK_COEFF;
3458
            P128.w[0] = res.w[0];
3459
            __mul_64x128_to_128 (res, ten2k64[1], P128);
3460
          }
3461
          res.w[1] =
3462
            p_sign | ((UINT64) (e4 + 6176) << 49) | (res.
3463
                                                     w[1] & MASK_COEFF);
3464
          // avoid a double rounding error
3465
          if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
3466
                is_midpoint_lt_even) { // double rounding error upward
3467
            // res = res - 1
3468
            res.w[0]--;
3469
            if (res.w[0] == 0xffffffffffffffffull)
3470
              res.w[1]--;
3471
            // Note: a double rounding error upward is not possible; for this
3472
            // the result after the first rounding would have to be 99...95
3473
            // (35 digits in all), possibly followed by a number of zeros; this
3474
            // not possible in this underflow case
3475
            is_midpoint_lt_even = 0;
3476
            is_inexact_lt_midpoint = 1;
3477
          } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
3478
                is_midpoint_gt_even) { // double rounding error downward
3479
            // res = res + 1
3480
            res.w[0]++;
3481
            if (res.w[0] == 0)
3482
              res.w[1]++;
3483
            is_midpoint_gt_even = 0;
3484
            is_inexact_gt_midpoint = 1;
3485
          } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
3486
                     !is_inexact_lt_midpoint
3487
                     && !is_inexact_gt_midpoint) {
3488
            // if this second rounding was exact the result may still be 
3489
            // inexact because of the first rounding
3490
            if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
3491
              is_inexact_gt_midpoint = 1;
3492
            }
3493
            if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
3494
              is_inexact_lt_midpoint = 1;
3495
            }
3496
          } else if (is_midpoint_gt_even &&
3497
                     (is_inexact_gt_midpoint0
3498
                      || is_midpoint_lt_even0)) {
3499
            // pulled up to a midpoint
3500
            is_inexact_lt_midpoint = 1;
3501
            is_inexact_gt_midpoint = 0;
3502
            is_midpoint_lt_even = 0;
3503
            is_midpoint_gt_even = 0;
3504
          } else if (is_midpoint_lt_even &&
3505
                     (is_inexact_lt_midpoint0
3506
                      || is_midpoint_gt_even0)) {
3507
            // pulled down to a midpoint
3508
            is_inexact_lt_midpoint = 0;
3509
            is_inexact_gt_midpoint = 1;
3510
            is_midpoint_lt_even = 0;
3511
            is_midpoint_gt_even = 0;
3512
          } else {
3513
            ;
3514
          }
3515
        }
3516
      }
3517
      // res contains the correct result
3518
      // apply correction if not rounding to nearest
3519
      if (rnd_mode != ROUNDING_TO_NEAREST) {
3520
        rounding_correction (rnd_mode,
3521
                             is_inexact_lt_midpoint,
3522
                             is_inexact_gt_midpoint,
3523
                             is_midpoint_lt_even, is_midpoint_gt_even,
3524
                             e4, &res, pfpsf);
3525
      }
3526
      if (is_midpoint_lt_even || is_midpoint_gt_even ||
3527
          is_inexact_lt_midpoint || is_inexact_gt_midpoint) {
3528
        // set the inexact flag
3529
        *pfpsf |= INEXACT_EXCEPTION;
3530
        if (is_tiny)
3531
          *pfpsf |= UNDERFLOW_EXCEPTION;
3532
      }
3533
      *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
3534
      *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
3535
      *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
3536
      *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
3537
      BID_SWAP128 (res);
3538
      BID_RETURN (res)
3539
 
3540
    } else if ((p34 <= delta && delta + q3 <= q4) || // Case (15)
3541
        (delta < p34 && p34 < delta + q3 && delta + q3 <= q4) || //Case (16)
3542
        (delta + q3 <= p34 && p34 < q4)) { // Case (17)
3543
 
3544
      // calculate first the result rounded to the destination precision, with
3545
      // unbounded exponent
3546
 
3547
      add_and_round (q3, q4, e4, delta, p34, z_sign, p_sign, C3, C4,
3548
              rnd_mode, &is_midpoint_lt_even,
3549
              &is_midpoint_gt_even, &is_inexact_lt_midpoint,
3550
              &is_inexact_gt_midpoint, pfpsf, &res);
3551
      *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
3552
      *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
3553
      *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
3554
      *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
3555
      BID_SWAP128 (res);
3556
      BID_RETURN (res)
3557
 
3558
    } else {
3559
      ;
3560
    }
3561
 
3562
  } // end if delta < 0
3563
 
3564
  *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
3565
  *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
3566
  *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
3567
  *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
3568
  BID_SWAP128 (res);
3569
  BID_RETURN (res)
3570
 
3571
}
3572
 
3573
 
3574
#if DECIMAL_CALL_BY_REFERENCE
3575
void
3576
bid128_fma (UINT128 * pres, UINT128 * px, UINT128 * py, UINT128 * pz
3577
            _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3578
            _EXC_INFO_PARAM) {
3579
  UINT128 x = *px, y = *py, z = *pz;
3580
#if !DECIMAL_GLOBAL_ROUNDING
3581
  unsigned int rnd_mode = *prnd_mode;
3582
#endif
3583
#else
3584
UINT128
3585
bid128_fma (UINT128 x, UINT128 y, UINT128 z
3586
            _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3587
            _EXC_INFO_PARAM) {
3588
#endif
3589
  int is_midpoint_lt_even, is_midpoint_gt_even,
3590
    is_inexact_lt_midpoint, is_inexact_gt_midpoint;
3591
  UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3592
 
3593
#if DECIMAL_CALL_BY_REFERENCE
3594
  bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3595
                  &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3596
                  &res, &x, &y, &z
3597
                  _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3598
                  _EXC_INFO_ARG);
3599
#else
3600
  res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3601
                        &is_inexact_lt_midpoint,
3602
                        &is_inexact_gt_midpoint, x, y,
3603
                        z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3604
                        _EXC_INFO_ARG);
3605
#endif
3606
  BID_RETURN (res);
3607
}
3608
 
3609
 
3610
#if DECIMAL_CALL_BY_REFERENCE
3611
void
3612
bid128ddd_fma (UINT128 * pres, UINT64 * px, UINT64 * py, UINT64 * pz
3613
               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3614
               _EXC_INFO_PARAM) {
3615
  UINT64 x = *px, y = *py, z = *pz;
3616
#if !DECIMAL_GLOBAL_ROUNDING
3617
  unsigned int rnd_mode = *prnd_mode;
3618
#endif
3619
#else
3620
UINT128
3621
bid128ddd_fma (UINT64 x, UINT64 y, UINT64 z
3622
               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3623
               _EXC_INFO_PARAM) {
3624
#endif
3625
  int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
3626
    is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
3627
  UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3628
  UINT128 x1, y1, z1;
3629
 
3630
#if DECIMAL_CALL_BY_REFERENCE
3631
  bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3632
  bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3633
  bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3634
  bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3635
                  &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3636
                  &res, &x1, &y1, &z1
3637
                  _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3638
                  _EXC_INFO_ARG);
3639
#else
3640
  x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3641
  y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3642
  z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3643
  res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3644
                        &is_inexact_lt_midpoint,
3645
                        &is_inexact_gt_midpoint, x1, y1,
3646
                        z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3647
                        _EXC_INFO_ARG);
3648
#endif
3649
  BID_RETURN (res);
3650
}
3651
 
3652
 
3653
#if DECIMAL_CALL_BY_REFERENCE
3654
void
3655
bid128ddq_fma (UINT128 * pres, UINT64 * px, UINT64 * py, UINT128 * pz
3656
               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3657
               _EXC_INFO_PARAM) {
3658
  UINT64 x = *px, y = *py;
3659
  UINT128 z = *pz;
3660
#if !DECIMAL_GLOBAL_ROUNDING
3661
  unsigned int rnd_mode = *prnd_mode;
3662
#endif
3663
#else
3664
UINT128
3665
bid128ddq_fma (UINT64 x, UINT64 y, UINT128 z
3666
               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3667
               _EXC_INFO_PARAM) {
3668
#endif
3669
  int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
3670
    is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
3671
  UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3672
  UINT128 x1, y1;
3673
 
3674
#if DECIMAL_CALL_BY_REFERENCE
3675
  bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3676
  bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3677
  bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3678
                  &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3679
                  &res, &x1, &y1, &z
3680
                  _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3681
                  _EXC_INFO_ARG);
3682
#else
3683
  x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3684
  y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3685
  res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3686
                        &is_inexact_lt_midpoint,
3687
                        &is_inexact_gt_midpoint, x1, y1,
3688
                        z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3689
                        _EXC_INFO_ARG);
3690
#endif
3691
  BID_RETURN (res);
3692
}
3693
 
3694
 
3695
#if DECIMAL_CALL_BY_REFERENCE
3696
void
3697
bid128dqd_fma (UINT128 * pres, UINT64 * px, UINT128 * py, UINT64 * pz
3698
               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3699
               _EXC_INFO_PARAM) {
3700
  UINT64 x = *px, z = *pz;
3701
#if !DECIMAL_GLOBAL_ROUNDING
3702
  unsigned int rnd_mode = *prnd_mode;
3703
#endif
3704
#else
3705
UINT128
3706
bid128dqd_fma (UINT64 x, UINT128 y, UINT64 z
3707
               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3708
               _EXC_INFO_PARAM) {
3709
#endif
3710
  int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
3711
    is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
3712
  UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3713
  UINT128 x1, z1;
3714
 
3715
#if DECIMAL_CALL_BY_REFERENCE
3716
  bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3717
  bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3718
  bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3719
                  &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3720
                  &res, &x1, py, &z1
3721
                  _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3722
                  _EXC_INFO_ARG);
3723
#else
3724
  x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3725
  z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3726
  res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3727
                        &is_inexact_lt_midpoint,
3728
                        &is_inexact_gt_midpoint, x1, y,
3729
                        z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3730
                        _EXC_INFO_ARG);
3731
#endif
3732
  BID_RETURN (res);
3733
}
3734
 
3735
 
3736
#if DECIMAL_CALL_BY_REFERENCE
3737
void
3738
bid128dqq_fma (UINT128 * pres, UINT64 * px, UINT128 * py, UINT128 * pz
3739
               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3740
               _EXC_INFO_PARAM) {
3741
  UINT64 x = *px;
3742
#if !DECIMAL_GLOBAL_ROUNDING
3743
  unsigned int rnd_mode = *prnd_mode;
3744
#endif
3745
#else
3746
UINT128
3747
bid128dqq_fma (UINT64 x, UINT128 y, UINT128 z
3748
               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3749
               _EXC_INFO_PARAM) {
3750
#endif
3751
  int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
3752
    is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
3753
  UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3754
  UINT128 x1;
3755
 
3756
#if DECIMAL_CALL_BY_REFERENCE
3757
  bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3758
  bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3759
                  &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3760
                  &res, &x1, py, pz
3761
                  _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3762
                  _EXC_INFO_ARG);
3763
#else
3764
  x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3765
  res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3766
                        &is_inexact_lt_midpoint,
3767
                        &is_inexact_gt_midpoint, x1, y,
3768
                        z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3769
                        _EXC_INFO_ARG);
3770
#endif
3771
  BID_RETURN (res);
3772
}
3773
 
3774
 
3775
#if DECIMAL_CALL_BY_REFERENCE
3776
void
3777
bid128qdd_fma (UINT128 * pres, UINT128 * px, UINT64 * py, UINT64 * pz
3778
               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3779
               _EXC_INFO_PARAM) {
3780
  UINT64 y = *py, z = *pz;
3781
#if !DECIMAL_GLOBAL_ROUNDING
3782
  unsigned int rnd_mode = *prnd_mode;
3783
#endif
3784
#else
3785
UINT128
3786
bid128qdd_fma (UINT128 x, UINT64 y, UINT64 z
3787
               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3788
               _EXC_INFO_PARAM) {
3789
#endif
3790
  int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
3791
    is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
3792
  UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3793
  UINT128 y1, z1;
3794
 
3795
#if DECIMAL_CALL_BY_REFERENCE
3796
  bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3797
  bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3798
  bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3799
                  &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3800
                  &res, px, &y1, &z1
3801
                  _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3802
                  _EXC_INFO_ARG);
3803
#else
3804
  y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3805
  z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3806
  res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3807
                        &is_inexact_lt_midpoint,
3808
                        &is_inexact_gt_midpoint, x, y1,
3809
                        z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3810
                        _EXC_INFO_ARG);
3811
#endif
3812
  BID_RETURN (res);
3813
}
3814
 
3815
 
3816
#if DECIMAL_CALL_BY_REFERENCE
3817
void
3818
bid128qdq_fma (UINT128 * pres, UINT128 * px, UINT64 * py, UINT128 * pz
3819
               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3820
               _EXC_INFO_PARAM) {
3821
  UINT64 y = *py;
3822
#if !DECIMAL_GLOBAL_ROUNDING
3823
  unsigned int rnd_mode = *prnd_mode;
3824
#endif
3825
#else
3826
UINT128
3827
bid128qdq_fma (UINT128 x, UINT64 y, UINT128 z
3828
               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3829
               _EXC_INFO_PARAM) {
3830
#endif
3831
  int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
3832
    is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
3833
  UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3834
  UINT128 y1;
3835
 
3836
#if DECIMAL_CALL_BY_REFERENCE
3837
  bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3838
  bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3839
                  &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3840
                  &res, px, &y1, pz
3841
                  _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3842
                  _EXC_INFO_ARG);
3843
#else
3844
  y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3845
  res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3846
                        &is_inexact_lt_midpoint,
3847
                        &is_inexact_gt_midpoint, x, y1,
3848
                        z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3849
                        _EXC_INFO_ARG);
3850
#endif
3851
  BID_RETURN (res);
3852
}
3853
 
3854
 
3855
#if DECIMAL_CALL_BY_REFERENCE
3856
void
3857
bid128qqd_fma (UINT128 * pres, UINT128 * px, UINT128 * py, UINT64 * pz
3858
               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3859
               _EXC_INFO_PARAM) {
3860
  UINT64 z = *pz;
3861
#if !DECIMAL_GLOBAL_ROUNDING
3862
  unsigned int rnd_mode = *prnd_mode;
3863
#endif
3864
#else
3865
UINT128
3866
bid128qqd_fma (UINT128 x, UINT128 y, UINT64 z
3867
               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3868
               _EXC_INFO_PARAM) {
3869
#endif
3870
  int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
3871
    is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
3872
  UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3873
  UINT128 z1;
3874
 
3875
#if DECIMAL_CALL_BY_REFERENCE
3876
  bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3877
  bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3878
                  &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3879
                  &res, px, py, &z1
3880
                  _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3881
                  _EXC_INFO_ARG);
3882
#else
3883
  z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3884
  res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3885
                        &is_inexact_lt_midpoint,
3886
                        &is_inexact_gt_midpoint, x, y,
3887
                        z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3888
                        _EXC_INFO_ARG);
3889
#endif
3890
  BID_RETURN (res);
3891
}
3892
 
3893
// Note: bid128qqq_fma is represented by bid128_fma
3894
 
3895
// Note: bid64ddd_fma is represented by bid64_fma
3896
 
3897
#if DECIMAL_CALL_BY_REFERENCE
3898
void
3899
bid64ddq_fma (UINT64 * pres, UINT64 * px, UINT64 * py, UINT128 * pz
3900
              _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3901
              _EXC_INFO_PARAM) {
3902
  UINT64 x = *px, y = *py;
3903
#if !DECIMAL_GLOBAL_ROUNDING
3904
  unsigned int rnd_mode = *prnd_mode;
3905
#endif
3906
#else
3907
UINT64
3908
bid64ddq_fma (UINT64 x, UINT64 y, UINT128 z
3909
              _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3910
              _EXC_INFO_PARAM) {
3911
#endif
3912
  UINT64 res1 = 0xbaddbaddbaddbaddull;
3913
  UINT128 x1, y1;
3914
 
3915
#if DECIMAL_CALL_BY_REFERENCE
3916
  bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3917
  bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3918
  bid64qqq_fma (&res1, &x1, &y1, pz
3919
                _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3920
                _EXC_INFO_ARG);
3921
#else
3922
  x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3923
  y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3924
  res1 = bid64qqq_fma (x1, y1, z
3925
                       _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3926
                       _EXC_INFO_ARG);
3927
#endif
3928
  BID_RETURN (res1);
3929
}
3930
 
3931
 
3932
#if DECIMAL_CALL_BY_REFERENCE
3933
void
3934
bid64dqd_fma (UINT64 * pres, UINT64 * px, UINT128 * py, UINT64 * pz
3935
              _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3936
              _EXC_INFO_PARAM) {
3937
  UINT64 x = *px, z = *pz;
3938
#if !DECIMAL_GLOBAL_ROUNDING
3939
  unsigned int rnd_mode = *prnd_mode;
3940
#endif
3941
#else
3942
UINT64
3943
bid64dqd_fma (UINT64 x, UINT128 y, UINT64 z
3944
              _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3945
              _EXC_INFO_PARAM) {
3946
#endif
3947
  UINT64 res1 = 0xbaddbaddbaddbaddull;
3948
  UINT128 x1, z1;
3949
 
3950
#if DECIMAL_CALL_BY_REFERENCE
3951
  bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3952
  bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3953
  bid64qqq_fma (&res1, &x1, py, &z1
3954
                _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3955
                _EXC_INFO_ARG);
3956
#else
3957
  x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3958
  z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3959
  res1 = bid64qqq_fma (x1, y, z1
3960
                       _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3961
                       _EXC_INFO_ARG);
3962
#endif
3963
  BID_RETURN (res1);
3964
}
3965
 
3966
 
3967
#if DECIMAL_CALL_BY_REFERENCE
3968
void
3969
bid64dqq_fma (UINT64 * pres, UINT64 * px, UINT128 * py, UINT128 * pz
3970
              _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3971
              _EXC_INFO_PARAM) {
3972
  UINT64 x = *px;
3973
#if !DECIMAL_GLOBAL_ROUNDING
3974
  unsigned int rnd_mode = *prnd_mode;
3975
#endif
3976
#else
3977
UINT64
3978
bid64dqq_fma (UINT64 x, UINT128 y, UINT128 z
3979
              _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3980
              _EXC_INFO_PARAM) {
3981
#endif
3982
  UINT64 res1 = 0xbaddbaddbaddbaddull;
3983
  UINT128 x1;
3984
 
3985
#if DECIMAL_CALL_BY_REFERENCE
3986
  bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3987
  bid64qqq_fma (&res1, &x1, py, pz
3988
                _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3989
                _EXC_INFO_ARG);
3990
#else
3991
  x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3992
  res1 = bid64qqq_fma (x1, y, z
3993
                       _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3994
                       _EXC_INFO_ARG);
3995
#endif
3996
  BID_RETURN (res1);
3997
}
3998
 
3999
 
4000
#if DECIMAL_CALL_BY_REFERENCE
4001
void
4002
bid64qdd_fma (UINT64 * pres, UINT128 * px, UINT64 * py, UINT64 * pz
4003
              _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4004
              _EXC_INFO_PARAM) {
4005
  UINT64 y = *py, z = *pz;
4006
#if !DECIMAL_GLOBAL_ROUNDING
4007
  unsigned int rnd_mode = *prnd_mode;
4008
#endif
4009
#else
4010
UINT64
4011
bid64qdd_fma (UINT128 x, UINT64 y, UINT64 z
4012
              _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4013
              _EXC_INFO_PARAM) {
4014
#endif
4015
  UINT64 res1 = 0xbaddbaddbaddbaddull;
4016
  UINT128 y1, z1;
4017
 
4018
#if DECIMAL_CALL_BY_REFERENCE
4019
  bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4020
  bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4021
  bid64qqq_fma (&res1, px, &y1, &z1
4022
                _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4023
                _EXC_INFO_ARG);
4024
#else
4025
  y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4026
  z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4027
  res1 = bid64qqq_fma (x, y1, z1
4028
                       _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4029
                       _EXC_INFO_ARG);
4030
#endif
4031
  BID_RETURN (res1);
4032
}
4033
 
4034
 
4035
#if DECIMAL_CALL_BY_REFERENCE
4036
void
4037
bid64qdq_fma (UINT64 * pres, UINT128 * px, UINT64 * py, UINT128 * pz
4038
              _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4039
              _EXC_INFO_PARAM) {
4040
  UINT64 y = *py;
4041
#if !DECIMAL_GLOBAL_ROUNDING
4042
  unsigned int rnd_mode = *prnd_mode;
4043
#endif
4044
#else
4045
UINT64
4046
bid64qdq_fma (UINT128 x, UINT64 y, UINT128 z
4047
              _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4048
              _EXC_INFO_PARAM) {
4049
#endif
4050
  UINT64 res1 = 0xbaddbaddbaddbaddull;
4051
  UINT128 y1;
4052
 
4053
#if DECIMAL_CALL_BY_REFERENCE
4054
  bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4055
  bid64qqq_fma (&res1, px, &y1, pz
4056
                _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4057
                _EXC_INFO_ARG);
4058
#else
4059
  y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4060
  res1 = bid64qqq_fma (x, y1, z
4061
                       _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4062
                       _EXC_INFO_ARG);
4063
#endif
4064
  BID_RETURN (res1);
4065
}
4066
 
4067
 
4068
#if DECIMAL_CALL_BY_REFERENCE
4069
void
4070
bid64qqd_fma (UINT64 * pres, UINT128 * px, UINT128 * py, UINT64 * pz
4071
              _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4072
              _EXC_INFO_PARAM) {
4073
  UINT64 z = *pz;
4074
#if !DECIMAL_GLOBAL_ROUNDING
4075
  unsigned int rnd_mode = *prnd_mode;
4076
#endif
4077
#else
4078
UINT64
4079
bid64qqd_fma (UINT128 x, UINT128 y, UINT64 z
4080
              _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4081
              _EXC_INFO_PARAM) {
4082
#endif
4083
  UINT64 res1 = 0xbaddbaddbaddbaddull;
4084
  UINT128 z1;
4085
 
4086
#if DECIMAL_CALL_BY_REFERENCE
4087
  bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4088
  bid64qqq_fma (&res1, px, py, &z1
4089
                _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4090
                _EXC_INFO_ARG);
4091
#else
4092
  z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4093
  res1 = bid64qqq_fma (x, y, z1
4094
                       _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4095
                       _EXC_INFO_ARG);
4096
#endif
4097
  BID_RETURN (res1);
4098
}
4099
 
4100
 
4101
#if DECIMAL_CALL_BY_REFERENCE
4102
void
4103
bid64qqq_fma (UINT64 * pres, UINT128 * px, UINT128 * py, UINT128 * pz
4104
              _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4105
              _EXC_INFO_PARAM) {
4106
  UINT128 x = *px, y = *py, z = *pz;
4107
#if !DECIMAL_GLOBAL_ROUNDING
4108
  unsigned int rnd_mode = *prnd_mode;
4109
#endif
4110
#else
4111
UINT64
4112
bid64qqq_fma (UINT128 x, UINT128 y, UINT128 z
4113
              _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4114
              _EXC_INFO_PARAM) {
4115
#endif
4116
  int is_midpoint_lt_even0 = 0, is_midpoint_gt_even0 = 0,
4117
    is_inexact_lt_midpoint0 = 0, is_inexact_gt_midpoint0 = 0;
4118
  int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
4119
    is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
4120
  int incr_exp;
4121
  UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
4122
  UINT128 res128 = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
4123
  UINT64 res1 = 0xbaddbaddbaddbaddull;
4124
  unsigned int save_fpsf; // needed because of the call to bid128_ext_fma
4125
  UINT64 sign;
4126
  UINT64 exp;
4127
  int unbexp;
4128
  UINT128 C;
4129
  BID_UI64DOUBLE tmp;
4130
  int nr_bits;
4131
  int q, x0;
4132
  int scale;
4133
  int lt_half_ulp = 0, eq_half_ulp = 0;
4134
 
4135
  // Note: for rounding modes other than RN or RA, the result can be obtained
4136
  // by rounding first to BID128 and then to BID64
4137
 
4138
  save_fpsf = *pfpsf; // sticky bits - caller value must be preserved
4139
  *pfpsf = 0;
4140
 
4141
#if DECIMAL_CALL_BY_REFERENCE
4142
  bid128_ext_fma (&is_midpoint_lt_even0, &is_midpoint_gt_even0,
4143
                  &is_inexact_lt_midpoint0, &is_inexact_gt_midpoint0,
4144
                  &res, &x, &y, &z
4145
                  _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4146
                  _EXC_INFO_ARG);
4147
#else
4148
  res = bid128_ext_fma (&is_midpoint_lt_even0, &is_midpoint_gt_even0,
4149
                        &is_inexact_lt_midpoint0,
4150
                        &is_inexact_gt_midpoint0, x, y,
4151
                        z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4152
                        _EXC_INFO_ARG);
4153
#endif
4154
 
4155
  if ((rnd_mode == ROUNDING_DOWN) || (rnd_mode == ROUNDING_UP) ||
4156
      (rnd_mode == ROUNDING_TO_ZERO) || // no double rounding error is possible
4157
      ((res.w[HIGH_128W] & MASK_NAN) == MASK_NAN) || //res=QNaN (cannot be SNaN)
4158
      ((res.w[HIGH_128W] & MASK_ANY_INF) == MASK_INF)) { // result is infinity  
4159
#if DECIMAL_CALL_BY_REFERENCE
4160
    bid128_to_bid64 (&res1, &res _RND_MODE_ARG _EXC_FLAGS_ARG);
4161
#else
4162
    res1 = bid128_to_bid64 (res _RND_MODE_ARG _EXC_FLAGS_ARG);
4163
#endif
4164
    // determine the unbiased exponent of the result
4165
    unbexp = ((res1 >> 53) & 0x3ff) - 398;
4166
 
4167
    // if subnormal, res1  must have exp = -398
4168
    // if tiny and inexact set underflow and inexact status flags
4169
    if (!((res1 & MASK_NAN) == MASK_NAN) &&     // res1 not NaN
4170
        (unbexp == -398)
4171
        && ((res1 & MASK_BINARY_SIG1) < 1000000000000000ull)
4172
        && (is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0
4173
            || is_midpoint_lt_even0 || is_midpoint_gt_even0)) {
4174
      // set the inexact flag and the underflow flag
4175
      *pfpsf |= (INEXACT_EXCEPTION | UNDERFLOW_EXCEPTION);
4176
    } else if (is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0 ||
4177
               is_midpoint_lt_even0 || is_midpoint_gt_even0) {
4178
      // set the inexact flag and the underflow flag
4179
      *pfpsf |= INEXACT_EXCEPTION;
4180
    }
4181
 
4182
    *pfpsf |= save_fpsf;
4183
    BID_RETURN (res1);
4184
  } // else continue, and use rounding to nearest to round to 16 digits
4185
 
4186
  // at this point the result is rounded to nearest (even or away) to 34 digits
4187
  // (or less if exact), and it is zero or finite non-zero canonical [sub]normal
4188
  sign = res.w[HIGH_128W] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
4189
  exp = res.w[HIGH_128W] & MASK_EXP; // biased and shifted left 49 bits
4190
  unbexp = (exp >> 49) - 6176;
4191
  C.w[1] = res.w[HIGH_128W] & MASK_COEFF;
4192
  C.w[0] = res.w[LOW_128W];
4193
 
4194
  if ((C.w[1] == 0x0 && C.w[0] == 0x0) ||        // result is zero
4195
      (unbexp <= (-398 - 35)) || (unbexp >= (369 + 16))) {
4196
      // clear under/overflow
4197
#if DECIMAL_CALL_BY_REFERENCE
4198
    bid128_to_bid64 (&res1, &res _RND_MODE_ARG _EXC_FLAGS_ARG);
4199
#else
4200
    res1 = bid128_to_bid64 (res _RND_MODE_ARG _EXC_FLAGS_ARG);
4201
#endif
4202
    *pfpsf |= save_fpsf;
4203
    BID_RETURN (res1);
4204
  } // else continue
4205
 
4206
  // -398 - 34 <= unbexp <= 369 + 15
4207
  if (rnd_mode == ROUNDING_TIES_AWAY) {
4208
    // apply correction, if needed, to make the result rounded to nearest-even
4209
    if (is_midpoint_gt_even) {
4210
      // res = res - 1
4211
      res1--; // res1 is now even
4212
    } // else the result is already correctly rounded to nearest-even
4213
  }
4214
  // at this point the result is finite, non-zero canonical normal or subnormal,
4215
  // and in most cases overflow or underflow will not occur
4216
 
4217
  // determine the number of digits q in the result
4218
  // q = nr. of decimal digits in x
4219
  // determine first the nr. of bits in x
4220
  if (C.w[1] == 0) {
4221
    if (C.w[0] >= 0x0020000000000000ull) { // x >= 2^53
4222
      // split the 64-bit value in two 32-bit halves to avoid rounding errors
4223
      if (C.w[0] >= 0x0000000100000000ull) { // x >= 2^32
4224
        tmp.d = (double) (C.w[0] >> 32); // exact conversion
4225
        nr_bits =
4226
          33 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
4227
      } else { // x < 2^32
4228
        tmp.d = (double) (C.w[0]); // exact conversion
4229
        nr_bits =
4230
          1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
4231
      }
4232
    } else { // if x < 2^53
4233
      tmp.d = (double) C.w[0]; // exact conversion
4234
      nr_bits =
4235
        1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
4236
    }
4237
  } else { // C.w[1] != 0 => nr. bits = 64 + nr_bits (C.w[1])
4238
    tmp.d = (double) C.w[1]; // exact conversion
4239
    nr_bits =
4240
      65 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
4241
  }
4242
  q = nr_digits[nr_bits - 1].digits;
4243
  if (q == 0) {
4244
    q = nr_digits[nr_bits - 1].digits1;
4245
    if (C.w[1] > nr_digits[nr_bits - 1].threshold_hi ||
4246
        (C.w[1] == nr_digits[nr_bits - 1].threshold_hi &&
4247
         C.w[0] >= nr_digits[nr_bits - 1].threshold_lo))
4248
      q++;
4249
  }
4250
  // if q > 16, round to nearest even to 16 digits (but for underflow it may 
4251
  // have to be truncated even more)
4252
  if (q > 16) {
4253
    x0 = q - 16;
4254
    if (q <= 18) {
4255
      round64_2_18 (q, x0, C.w[0], &res1, &incr_exp,
4256
                    &is_midpoint_lt_even, &is_midpoint_gt_even,
4257
                    &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
4258
    } else { // 19 <= q <= 34
4259
      round128_19_38 (q, x0, C, &res128, &incr_exp,
4260
                      &is_midpoint_lt_even, &is_midpoint_gt_even,
4261
                      &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
4262
      res1 = res128.w[0]; // the result fits in 64 bits
4263
    }
4264
    unbexp = unbexp + x0;
4265
    if (incr_exp)
4266
      unbexp++;
4267
    q = 16; // need to set in case denormalization is necessary
4268
  } else {
4269
    // the result does not require a second rounding (and it must have 
4270
    // been exact in the first rounding, since q <= 16)
4271
    res1 = C.w[0];
4272
  }
4273
 
4274
  // avoid a double rounding error
4275
  if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
4276
      is_midpoint_lt_even) { // double rounding error upward
4277
    // res = res - 1 
4278
    res1--; // res1 becomes odd 
4279
    is_midpoint_lt_even = 0;
4280
    is_inexact_lt_midpoint = 1;
4281
    if (res1 == 0x00038d7ea4c67fffull) { // 10^15 - 1
4282
      res1 = 0x002386f26fc0ffffull; // 10^16 - 1 
4283
      unbexp--;
4284
    }
4285
  } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
4286
      is_midpoint_gt_even) { // double rounding error downward
4287
    // res = res + 1
4288
    res1++; // res1 becomes odd (so it cannot be 10^16)
4289
    is_midpoint_gt_even = 0;
4290
    is_inexact_gt_midpoint = 1;
4291
  } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
4292
             !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
4293
    // if this second rounding was exact the result may still be 
4294
    // inexact because of the first rounding
4295
    if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
4296
      is_inexact_gt_midpoint = 1;
4297
    }
4298
    if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
4299
      is_inexact_lt_midpoint = 1;
4300
    }
4301
  } else if (is_midpoint_gt_even &&
4302
             (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
4303
    // pulled up to a midpoint 
4304
    is_inexact_lt_midpoint = 1;
4305
    is_inexact_gt_midpoint = 0;
4306
    is_midpoint_lt_even = 0;
4307
    is_midpoint_gt_even = 0;
4308
  } else if (is_midpoint_lt_even &&
4309
             (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
4310
    // pulled down to a midpoint 
4311
    is_inexact_lt_midpoint = 0;
4312
    is_inexact_gt_midpoint = 1;
4313
    is_midpoint_lt_even = 0;
4314
    is_midpoint_gt_even = 0;
4315
  } else {
4316
    ;
4317
  }
4318
  // this is the result rounded correctly to nearest even, with unbounded exp. 
4319
 
4320
  // check for overflow
4321
  if (q + unbexp > P16 + expmax16) {
4322
    res1 = sign | 0x7800000000000000ull;
4323
    *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
4324
    *pfpsf |= save_fpsf;
4325
    BID_RETURN (res1)
4326
  } else if (unbexp > expmax16) { // q + unbexp <= P16 + expmax16
4327
    // not overflow; the result must be exact, and we can multiply res1 by
4328
    // 10^(unbexp - expmax16) and the product will fit in 16 decimal digits
4329
    scale = unbexp - expmax16;
4330
    res1 = res1 * ten2k64[scale]; // res1 * 10^scale
4331
    unbexp = expmax16; // unbexp - scale 
4332
  } else {
4333
    ; // continue
4334
  }
4335
 
4336
  // check for underflow
4337
  if (q + unbexp < P16 + expmin16) {
4338
    if (unbexp < expmin16) {
4339
      // we must truncate more of res
4340
      x0 = expmin16 - unbexp; // x0 >= 1
4341
      is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
4342
      is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
4343
      is_midpoint_lt_even0 = is_midpoint_lt_even;
4344
      is_midpoint_gt_even0 = is_midpoint_gt_even;
4345
      is_inexact_lt_midpoint = 0;
4346
      is_inexact_gt_midpoint = 0;
4347
      is_midpoint_lt_even = 0;
4348
      is_midpoint_gt_even = 0;
4349
      // the number of decimal digits in res1 is q
4350
      if (x0 < q) { // 1 <= x0 <= q-1 => round res to q - x0 digits
4351
        // 2 <= q <= 16, 1 <= x0 <= 15
4352
        round64_2_18 (q, x0, res1, &res1, &incr_exp,
4353
                      &is_midpoint_lt_even, &is_midpoint_gt_even,
4354
                      &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
4355
        if (incr_exp) {
4356
          // res1 = 10^(q-x0), 1 <= q - x0 <= q - 1, 1 <= q - x0 <= 15
4357
          res1 = ten2k64[q - x0];
4358
        }
4359
        unbexp = unbexp + x0; // expmin16
4360
      } else if (x0 == q) {
4361
        // the second rounding is for 0.d(0)d(1)...d(q-1) * 10^emin
4362
        // determine relationship with 1/2 ulp
4363
        // q <= 16
4364
        if (res1 < midpoint64[q - 1]) { // < 1/2 ulp
4365
          lt_half_ulp = 1;
4366
          is_inexact_lt_midpoint = 1;
4367
        } else if (res1 == midpoint64[q - 1]) { // = 1/2 ulp
4368
          eq_half_ulp = 1;
4369
          is_midpoint_gt_even = 1;
4370
        } else { // > 1/2 ulp
4371
          // gt_half_ulp = 1;
4372
          is_inexact_gt_midpoint = 1;
4373
        }
4374
        if (lt_half_ulp || eq_half_ulp) {
4375
          // res = +0.0 * 10^expmin16
4376
          res1 = 0x0000000000000000ull;
4377
        } else { // if (gt_half_ulp)
4378
          // res = +1 * 10^expmin16
4379
          res1 = 0x0000000000000001ull;
4380
        }
4381
        unbexp = expmin16;
4382
      } else { // if (x0 > q)
4383
        // the second rounding is for 0.0...d(0)d(1)...d(q-1) * 10^emin
4384
        res1 = 0x0000000000000000ull;
4385
        unbexp = expmin16;
4386
        is_inexact_lt_midpoint = 1;
4387
      }
4388
      // avoid a double rounding error
4389
      if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
4390
          is_midpoint_lt_even) { // double rounding error upward
4391
        // res = res - 1
4392
        res1--; // res1 becomes odd
4393
        is_midpoint_lt_even = 0;
4394
        is_inexact_lt_midpoint = 1;
4395
      } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
4396
          is_midpoint_gt_even) { // double rounding error downward
4397
        // res = res + 1
4398
        res1++; // res1 becomes odd
4399
        is_midpoint_gt_even = 0;
4400
        is_inexact_gt_midpoint = 1;
4401
      } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
4402
                 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
4403
        // if this rounding was exact the result may still be 
4404
        // inexact because of the previous roundings
4405
        if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
4406
          is_inexact_gt_midpoint = 1;
4407
        }
4408
        if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
4409
          is_inexact_lt_midpoint = 1;
4410
        }
4411
      } else if (is_midpoint_gt_even &&
4412
                 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
4413
        // pulled up to a midpoint
4414
        is_inexact_lt_midpoint = 1;
4415
        is_inexact_gt_midpoint = 0;
4416
        is_midpoint_lt_even = 0;
4417
        is_midpoint_gt_even = 0;
4418
      } else if (is_midpoint_lt_even &&
4419
                 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
4420
        // pulled down to a midpoint
4421
        is_inexact_lt_midpoint = 0;
4422
        is_inexact_gt_midpoint = 1;
4423
        is_midpoint_lt_even = 0;
4424
        is_midpoint_gt_even = 0;
4425
      } else {
4426
        ;
4427
      }
4428
    }
4429
    // else if unbexp >= emin then q < P (because q + unbexp < P16 + expmin16)
4430
    // and the result is tiny and exact
4431
 
4432
    // check for inexact result
4433
    if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
4434
        is_midpoint_lt_even || is_midpoint_gt_even ||
4435
        is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0 ||
4436
        is_midpoint_lt_even0 || is_midpoint_gt_even0) {
4437
      // set the inexact flag and the underflow flag
4438
      *pfpsf |= (INEXACT_EXCEPTION | UNDERFLOW_EXCEPTION);
4439
    }
4440
  } else if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
4441
             is_midpoint_lt_even || is_midpoint_gt_even) {
4442
    *pfpsf |= INEXACT_EXCEPTION;
4443
  }
4444
  // this is the result rounded correctly to nearest, with bounded exponent
4445
 
4446
  if (rnd_mode == ROUNDING_TIES_AWAY && is_midpoint_gt_even) { // correction
4447
    // res = res + 1
4448
    res1++; // res1 is now odd
4449
  } // else the result is already correct
4450
 
4451
  // assemble the result
4452
  if (res1 < 0x0020000000000000ull) { // res < 2^53
4453
    res1 = sign | ((UINT64) (unbexp + 398) << 53) | res1;
4454
  } else { // res1 >= 2^53
4455
    res1 = sign | MASK_STEERING_BITS |
4456
      ((UINT64) (unbexp + 398) << 51) | (res1 & MASK_BINARY_SIG2);
4457
  }
4458
  *pfpsf |= save_fpsf;
4459
  BID_RETURN (res1);
4460
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.