OpenCores
URL https://opencores.org/ocsvn/or1k_soc_on_altera_embedded_dev_kit/or1k_soc_on_altera_embedded_dev_kit/trunk

Subversion Repositories or1k_soc_on_altera_embedded_dev_kit

[/] [or1k_soc_on_altera_embedded_dev_kit/] [trunk/] [linux-2.6/] [linux-2.6.24/] [arch/] [arm/] [nwfpe/] [softfloat.c] - Blame information for rev 3

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 3 xianfeng
/*
2
===============================================================================
3
 
4
This C source file is part of the SoftFloat IEC/IEEE Floating-point
5
Arithmetic Package, Release 2.
6
 
7
Written by John R. Hauser.  This work was made possible in part by the
8
International Computer Science Institute, located at Suite 600, 1947 Center
9
Street, Berkeley, California 94704.  Funding was partially provided by the
10
National Science Foundation under grant MIP-9311980.  The original version
11
of this code was written as part of a project to build a fixed-point vector
12
processor in collaboration with the University of California at Berkeley,
13
overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
14
is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
15
arithmetic/softfloat.html'.
16
 
17
THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
18
has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
19
TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
20
PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
21
AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
22
 
23
Derivative works are acceptable, even for commercial purposes, so long as
24
(1) they include prominent notice that the work is derivative, and (2) they
25
include prominent notice akin to these three paragraphs for those parts of
26
this code that are retained.
27
 
28
===============================================================================
29
*/
30
 
31
#include <asm/div64.h>
32
 
33
#include "fpa11.h"
34
//#include "milieu.h"
35
//#include "softfloat.h"
36
 
37
/*
38
-------------------------------------------------------------------------------
39
Primitive arithmetic functions, including multi-word arithmetic, and
40
division and square root approximations.  (Can be specialized to target if
41
desired.)
42
-------------------------------------------------------------------------------
43
*/
44
#include "softfloat-macros"
45
 
46
/*
47
-------------------------------------------------------------------------------
48
Functions and definitions to determine:  (1) whether tininess for underflow
49
is detected before or after rounding by default, (2) what (if anything)
50
happens when exceptions are raised, (3) how signaling NaNs are distinguished
51
from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
52
are propagated from function inputs to output.  These details are target-
53
specific.
54
-------------------------------------------------------------------------------
55
*/
56
#include "softfloat-specialize"
57
 
58
/*
59
-------------------------------------------------------------------------------
60
Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
61
and 7, and returns the properly rounded 32-bit integer corresponding to the
62
input.  If `zSign' is nonzero, the input is negated before being converted
63
to an integer.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point
64
input is simply rounded to an integer, with the inexact exception raised if
65
the input cannot be represented exactly as an integer.  If the fixed-point
66
input is too large, however, the invalid exception is raised and the largest
67
positive or negative integer is returned.
68
-------------------------------------------------------------------------------
69
*/
70
static int32 roundAndPackInt32( struct roundingData *roundData, flag zSign, bits64 absZ )
71
{
72
    int8 roundingMode;
73
    flag roundNearestEven;
74
    int8 roundIncrement, roundBits;
75
    int32 z;
76
 
77
    roundingMode = roundData->mode;
78
    roundNearestEven = ( roundingMode == float_round_nearest_even );
79
    roundIncrement = 0x40;
80
    if ( ! roundNearestEven ) {
81
        if ( roundingMode == float_round_to_zero ) {
82
            roundIncrement = 0;
83
        }
84
        else {
85
            roundIncrement = 0x7F;
86
            if ( zSign ) {
87
                if ( roundingMode == float_round_up ) roundIncrement = 0;
88
            }
89
            else {
90
                if ( roundingMode == float_round_down ) roundIncrement = 0;
91
            }
92
        }
93
    }
94
    roundBits = absZ & 0x7F;
95
    absZ = ( absZ + roundIncrement )>>7;
96
    absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
97
    z = absZ;
98
    if ( zSign ) z = - z;
99
    if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
100
        roundData->exception |= float_flag_invalid;
101
        return zSign ? 0x80000000 : 0x7FFFFFFF;
102
    }
103
    if ( roundBits ) roundData->exception |= float_flag_inexact;
104
    return z;
105
 
106
}
107
 
108
/*
109
-------------------------------------------------------------------------------
110
Returns the fraction bits of the single-precision floating-point value `a'.
111
-------------------------------------------------------------------------------
112
*/
113
INLINE bits32 extractFloat32Frac( float32 a )
114
{
115
 
116
    return a & 0x007FFFFF;
117
 
118
}
119
 
120
/*
121
-------------------------------------------------------------------------------
122
Returns the exponent bits of the single-precision floating-point value `a'.
123
-------------------------------------------------------------------------------
124
*/
125
INLINE int16 extractFloat32Exp( float32 a )
126
{
127
 
128
    return ( a>>23 ) & 0xFF;
129
 
130
}
131
 
132
/*
133
-------------------------------------------------------------------------------
134
Returns the sign bit of the single-precision floating-point value `a'.
135
-------------------------------------------------------------------------------
136
*/
137
#if 0   /* in softfloat.h */
138
INLINE flag extractFloat32Sign( float32 a )
139
{
140
 
141
    return a>>31;
142
 
143
}
144
#endif
145
 
146
/*
147
-------------------------------------------------------------------------------
148
Normalizes the subnormal single-precision floating-point value represented
149
by the denormalized significand `aSig'.  The normalized exponent and
150
significand are stored at the locations pointed to by `zExpPtr' and
151
`zSigPtr', respectively.
152
-------------------------------------------------------------------------------
153
*/
154
static void
155
 normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr )
156
{
157
    int8 shiftCount;
158
 
159
    shiftCount = countLeadingZeros32( aSig ) - 8;
160
    *zSigPtr = aSig<<shiftCount;
161
    *zExpPtr = 1 - shiftCount;
162
 
163
}
164
 
165
/*
166
-------------------------------------------------------------------------------
167
Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
168
single-precision floating-point value, returning the result.  After being
169
shifted into the proper positions, the three fields are simply added
170
together to form the result.  This means that any integer portion of `zSig'
171
will be added into the exponent.  Since a properly normalized significand
172
will have an integer portion equal to 1, the `zExp' input should be 1 less
173
than the desired result exponent whenever `zSig' is a complete, normalized
174
significand.
175
-------------------------------------------------------------------------------
176
*/
177
INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
178
{
179
#if 0
180
   float32 f;
181
   __asm__("@ packFloat32                               \n\
182
            mov %0, %1, asl #31                         \n\
183
            orr %0, %2, asl #23                         \n\
184
            orr %0, %3"
185
            : /* no outputs */
186
            : "g" (f), "g" (zSign), "g" (zExp), "g" (zSig)
187
            : "cc");
188
   return f;
189
#else
190
    return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig;
191
#endif 
192
}
193
 
194
/*
195
-------------------------------------------------------------------------------
196
Takes an abstract floating-point value having sign `zSign', exponent `zExp',
197
and significand `zSig', and returns the proper single-precision floating-
198
point value corresponding to the abstract input.  Ordinarily, the abstract
199
value is simply rounded and packed into the single-precision format, with
200
the inexact exception raised if the abstract input cannot be represented
201
exactly.  If the abstract value is too large, however, the overflow and
202
inexact exceptions are raised and an infinity or maximal finite value is
203
returned.  If the abstract value is too small, the input value is rounded to
204
a subnormal number, and the underflow and inexact exceptions are raised if
205
the abstract input cannot be represented exactly as a subnormal single-
206
precision floating-point number.
207
    The input significand `zSig' has its binary point between bits 30
208
and 29, which is 7 bits to the left of the usual location.  This shifted
209
significand must be normalized or smaller.  If `zSig' is not normalized,
210
`zExp' must be 0; in that case, the result returned is a subnormal number,
211
and it must not require rounding.  In the usual case that `zSig' is
212
normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
213
The handling of underflow and overflow follows the IEC/IEEE Standard for
214
Binary Floating-point Arithmetic.
215
-------------------------------------------------------------------------------
216
*/
217
static float32 roundAndPackFloat32( struct roundingData *roundData, flag zSign, int16 zExp, bits32 zSig )
218
{
219
    int8 roundingMode;
220
    flag roundNearestEven;
221
    int8 roundIncrement, roundBits;
222
    flag isTiny;
223
 
224
    roundingMode = roundData->mode;
225
    roundNearestEven = ( roundingMode == float_round_nearest_even );
226
    roundIncrement = 0x40;
227
    if ( ! roundNearestEven ) {
228
        if ( roundingMode == float_round_to_zero ) {
229
            roundIncrement = 0;
230
        }
231
        else {
232
            roundIncrement = 0x7F;
233
            if ( zSign ) {
234
                if ( roundingMode == float_round_up ) roundIncrement = 0;
235
            }
236
            else {
237
                if ( roundingMode == float_round_down ) roundIncrement = 0;
238
            }
239
        }
240
    }
241
    roundBits = zSig & 0x7F;
242
    if ( 0xFD <= (bits16) zExp ) {
243
        if (    ( 0xFD < zExp )
244
             || (    ( zExp == 0xFD )
245
                  && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
246
           ) {
247
            roundData->exception |= float_flag_overflow | float_flag_inexact;
248
            return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
249
        }
250
        if ( zExp < 0 ) {
251
            isTiny =
252
                   ( float_detect_tininess == float_tininess_before_rounding )
253
                || ( zExp < -1 )
254
                || ( zSig + roundIncrement < 0x80000000 );
255
            shift32RightJamming( zSig, - zExp, &zSig );
256
            zExp = 0;
257
            roundBits = zSig & 0x7F;
258
            if ( isTiny && roundBits ) roundData->exception |= float_flag_underflow;
259
        }
260
    }
261
    if ( roundBits ) roundData->exception |= float_flag_inexact;
262
    zSig = ( zSig + roundIncrement )>>7;
263
    zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
264
    if ( zSig == 0 ) zExp = 0;
265
    return packFloat32( zSign, zExp, zSig );
266
 
267
}
268
 
269
/*
270
-------------------------------------------------------------------------------
271
Takes an abstract floating-point value having sign `zSign', exponent `zExp',
272
and significand `zSig', and returns the proper single-precision floating-
273
point value corresponding to the abstract input.  This routine is just like
274
`roundAndPackFloat32' except that `zSig' does not have to be normalized in
275
any way.  In all cases, `zExp' must be 1 less than the ``true'' floating-
276
point exponent.
277
-------------------------------------------------------------------------------
278
*/
279
static float32
280
 normalizeRoundAndPackFloat32( struct roundingData *roundData, flag zSign, int16 zExp, bits32 zSig )
281
{
282
    int8 shiftCount;
283
 
284
    shiftCount = countLeadingZeros32( zSig ) - 1;
285
    return roundAndPackFloat32( roundData, zSign, zExp - shiftCount, zSig<<shiftCount );
286
 
287
}
288
 
289
/*
290
-------------------------------------------------------------------------------
291
Returns the fraction bits of the double-precision floating-point value `a'.
292
-------------------------------------------------------------------------------
293
*/
294
INLINE bits64 extractFloat64Frac( float64 a )
295
{
296
 
297
    return a & LIT64( 0x000FFFFFFFFFFFFF );
298
 
299
}
300
 
301
/*
302
-------------------------------------------------------------------------------
303
Returns the exponent bits of the double-precision floating-point value `a'.
304
-------------------------------------------------------------------------------
305
*/
306
INLINE int16 extractFloat64Exp( float64 a )
307
{
308
 
309
    return ( a>>52 ) & 0x7FF;
310
 
311
}
312
 
313
/*
314
-------------------------------------------------------------------------------
315
Returns the sign bit of the double-precision floating-point value `a'.
316
-------------------------------------------------------------------------------
317
*/
318
#if 0   /* in softfloat.h */
319
INLINE flag extractFloat64Sign( float64 a )
320
{
321
 
322
    return a>>63;
323
 
324
}
325
#endif
326
 
327
/*
328
-------------------------------------------------------------------------------
329
Normalizes the subnormal double-precision floating-point value represented
330
by the denormalized significand `aSig'.  The normalized exponent and
331
significand are stored at the locations pointed to by `zExpPtr' and
332
`zSigPtr', respectively.
333
-------------------------------------------------------------------------------
334
*/
335
static void
336
 normalizeFloat64Subnormal( bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr )
337
{
338
    int8 shiftCount;
339
 
340
    shiftCount = countLeadingZeros64( aSig ) - 11;
341
    *zSigPtr = aSig<<shiftCount;
342
    *zExpPtr = 1 - shiftCount;
343
 
344
}
345
 
346
/*
347
-------------------------------------------------------------------------------
348
Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
349
double-precision floating-point value, returning the result.  After being
350
shifted into the proper positions, the three fields are simply added
351
together to form the result.  This means that any integer portion of `zSig'
352
will be added into the exponent.  Since a properly normalized significand
353
will have an integer portion equal to 1, the `zExp' input should be 1 less
354
than the desired result exponent whenever `zSig' is a complete, normalized
355
significand.
356
-------------------------------------------------------------------------------
357
*/
358
INLINE float64 packFloat64( flag zSign, int16 zExp, bits64 zSig )
359
{
360
 
361
    return ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<52 ) + zSig;
362
 
363
}
364
 
365
/*
366
-------------------------------------------------------------------------------
367
Takes an abstract floating-point value having sign `zSign', exponent `zExp',
368
and significand `zSig', and returns the proper double-precision floating-
369
point value corresponding to the abstract input.  Ordinarily, the abstract
370
value is simply rounded and packed into the double-precision format, with
371
the inexact exception raised if the abstract input cannot be represented
372
exactly.  If the abstract value is too large, however, the overflow and
373
inexact exceptions are raised and an infinity or maximal finite value is
374
returned.  If the abstract value is too small, the input value is rounded to
375
a subnormal number, and the underflow and inexact exceptions are raised if
376
the abstract input cannot be represented exactly as a subnormal double-
377
precision floating-point number.
378
    The input significand `zSig' has its binary point between bits 62
379
and 61, which is 10 bits to the left of the usual location.  This shifted
380
significand must be normalized or smaller.  If `zSig' is not normalized,
381
`zExp' must be 0; in that case, the result returned is a subnormal number,
382
and it must not require rounding.  In the usual case that `zSig' is
383
normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
384
The handling of underflow and overflow follows the IEC/IEEE Standard for
385
Binary Floating-point Arithmetic.
386
-------------------------------------------------------------------------------
387
*/
388
static float64 roundAndPackFloat64( struct roundingData *roundData, flag zSign, int16 zExp, bits64 zSig )
389
{
390
    int8 roundingMode;
391
    flag roundNearestEven;
392
    int16 roundIncrement, roundBits;
393
    flag isTiny;
394
 
395
    roundingMode = roundData->mode;
396
    roundNearestEven = ( roundingMode == float_round_nearest_even );
397
    roundIncrement = 0x200;
398
    if ( ! roundNearestEven ) {
399
        if ( roundingMode == float_round_to_zero ) {
400
            roundIncrement = 0;
401
        }
402
        else {
403
            roundIncrement = 0x3FF;
404
            if ( zSign ) {
405
                if ( roundingMode == float_round_up ) roundIncrement = 0;
406
            }
407
            else {
408
                if ( roundingMode == float_round_down ) roundIncrement = 0;
409
            }
410
        }
411
    }
412
    roundBits = zSig & 0x3FF;
413
    if ( 0x7FD <= (bits16) zExp ) {
414
        if (    ( 0x7FD < zExp )
415
             || (    ( zExp == 0x7FD )
416
                  && ( (sbits64) ( zSig + roundIncrement ) < 0 ) )
417
           ) {
418
            //register int lr = __builtin_return_address(0);
419
            //printk("roundAndPackFloat64 called from 0x%08x\n",lr);
420
            roundData->exception |= float_flag_overflow | float_flag_inexact;
421
            return packFloat64( zSign, 0x7FF, 0 ) - ( roundIncrement == 0 );
422
        }
423
        if ( zExp < 0 ) {
424
            isTiny =
425
                   ( float_detect_tininess == float_tininess_before_rounding )
426
                || ( zExp < -1 )
427
                || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
428
            shift64RightJamming( zSig, - zExp, &zSig );
429
            zExp = 0;
430
            roundBits = zSig & 0x3FF;
431
            if ( isTiny && roundBits ) roundData->exception |= float_flag_underflow;
432
        }
433
    }
434
    if ( roundBits ) roundData->exception |= float_flag_inexact;
435
    zSig = ( zSig + roundIncrement )>>10;
436
    zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
437
    if ( zSig == 0 ) zExp = 0;
438
    return packFloat64( zSign, zExp, zSig );
439
 
440
}
441
 
442
/*
443
-------------------------------------------------------------------------------
444
Takes an abstract floating-point value having sign `zSign', exponent `zExp',
445
and significand `zSig', and returns the proper double-precision floating-
446
point value corresponding to the abstract input.  This routine is just like
447
`roundAndPackFloat64' except that `zSig' does not have to be normalized in
448
any way.  In all cases, `zExp' must be 1 less than the ``true'' floating-
449
point exponent.
450
-------------------------------------------------------------------------------
451
*/
452
static float64
453
 normalizeRoundAndPackFloat64( struct roundingData *roundData, flag zSign, int16 zExp, bits64 zSig )
454
{
455
    int8 shiftCount;
456
 
457
    shiftCount = countLeadingZeros64( zSig ) - 1;
458
    return roundAndPackFloat64( roundData, zSign, zExp - shiftCount, zSig<<shiftCount );
459
 
460
}
461
 
462
#ifdef FLOATX80
463
 
464
/*
465
-------------------------------------------------------------------------------
466
Returns the fraction bits of the extended double-precision floating-point
467
value `a'.
468
-------------------------------------------------------------------------------
469
*/
470
INLINE bits64 extractFloatx80Frac( floatx80 a )
471
{
472
 
473
    return a.low;
474
 
475
}
476
 
477
/*
478
-------------------------------------------------------------------------------
479
Returns the exponent bits of the extended double-precision floating-point
480
value `a'.
481
-------------------------------------------------------------------------------
482
*/
483
INLINE int32 extractFloatx80Exp( floatx80 a )
484
{
485
 
486
    return a.high & 0x7FFF;
487
 
488
}
489
 
490
/*
491
-------------------------------------------------------------------------------
492
Returns the sign bit of the extended double-precision floating-point value
493
`a'.
494
-------------------------------------------------------------------------------
495
*/
496
INLINE flag extractFloatx80Sign( floatx80 a )
497
{
498
 
499
    return a.high>>15;
500
 
501
}
502
 
503
/*
504
-------------------------------------------------------------------------------
505
Normalizes the subnormal extended double-precision floating-point value
506
represented by the denormalized significand `aSig'.  The normalized exponent
507
and significand are stored at the locations pointed to by `zExpPtr' and
508
`zSigPtr', respectively.
509
-------------------------------------------------------------------------------
510
*/
511
static void
512
 normalizeFloatx80Subnormal( bits64 aSig, int32 *zExpPtr, bits64 *zSigPtr )
513
{
514
    int8 shiftCount;
515
 
516
    shiftCount = countLeadingZeros64( aSig );
517
    *zSigPtr = aSig<<shiftCount;
518
    *zExpPtr = 1 - shiftCount;
519
 
520
}
521
 
522
/*
523
-------------------------------------------------------------------------------
524
Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
525
extended double-precision floating-point value, returning the result.
526
-------------------------------------------------------------------------------
527
*/
528
INLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig )
529
{
530
    floatx80 z;
531
 
532
    z.low = zSig;
533
    z.high = ( ( (bits16) zSign )<<15 ) + zExp;
534
    z.__padding = 0;
535
    return z;
536
 
537
}
538
 
539
/*
540
-------------------------------------------------------------------------------
541
Takes an abstract floating-point value having sign `zSign', exponent `zExp',
542
and extended significand formed by the concatenation of `zSig0' and `zSig1',
543
and returns the proper extended double-precision floating-point value
544
corresponding to the abstract input.  Ordinarily, the abstract value is
545
rounded and packed into the extended double-precision format, with the
546
inexact exception raised if the abstract input cannot be represented
547
exactly.  If the abstract value is too large, however, the overflow and
548
inexact exceptions are raised and an infinity or maximal finite value is
549
returned.  If the abstract value is too small, the input value is rounded to
550
a subnormal number, and the underflow and inexact exceptions are raised if
551
the abstract input cannot be represented exactly as a subnormal extended
552
double-precision floating-point number.
553
    If `roundingPrecision' is 32 or 64, the result is rounded to the same
554
number of bits as single or double precision, respectively.  Otherwise, the
555
result is rounded to the full precision of the extended double-precision
556
format.
557
    The input significand must be normalized or smaller.  If the input
558
significand is not normalized, `zExp' must be 0; in that case, the result
559
returned is a subnormal number, and it must not require rounding.  The
560
handling of underflow and overflow follows the IEC/IEEE Standard for Binary
561
Floating-point Arithmetic.
562
-------------------------------------------------------------------------------
563
*/
564
static floatx80
565
 roundAndPackFloatx80(
566
     struct roundingData *roundData, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
567
 )
568
{
569
    int8 roundingMode, roundingPrecision;
570
    flag roundNearestEven, increment, isTiny;
571
    int64 roundIncrement, roundMask, roundBits;
572
 
573
    roundingMode = roundData->mode;
574
    roundingPrecision = roundData->precision;
575
    roundNearestEven = ( roundingMode == float_round_nearest_even );
576
    if ( roundingPrecision == 80 ) goto precision80;
577
    if ( roundingPrecision == 64 ) {
578
        roundIncrement = LIT64( 0x0000000000000400 );
579
        roundMask = LIT64( 0x00000000000007FF );
580
    }
581
    else if ( roundingPrecision == 32 ) {
582
        roundIncrement = LIT64( 0x0000008000000000 );
583
        roundMask = LIT64( 0x000000FFFFFFFFFF );
584
    }
585
    else {
586
        goto precision80;
587
    }
588
    zSig0 |= ( zSig1 != 0 );
589
    if ( ! roundNearestEven ) {
590
        if ( roundingMode == float_round_to_zero ) {
591
            roundIncrement = 0;
592
        }
593
        else {
594
            roundIncrement = roundMask;
595
            if ( zSign ) {
596
                if ( roundingMode == float_round_up ) roundIncrement = 0;
597
            }
598
            else {
599
                if ( roundingMode == float_round_down ) roundIncrement = 0;
600
            }
601
        }
602
    }
603
    roundBits = zSig0 & roundMask;
604
    if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
605
        if (    ( 0x7FFE < zExp )
606
             || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
607
           ) {
608
            goto overflow;
609
        }
610
        if ( zExp <= 0 ) {
611
            isTiny =
612
                   ( float_detect_tininess == float_tininess_before_rounding )
613
                || ( zExp < 0 )
614
                || ( zSig0 <= zSig0 + roundIncrement );
615
            shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
616
            zExp = 0;
617
            roundBits = zSig0 & roundMask;
618
            if ( isTiny && roundBits ) roundData->exception |= float_flag_underflow;
619
            if ( roundBits ) roundData->exception |= float_flag_inexact;
620
            zSig0 += roundIncrement;
621
            if ( (sbits64) zSig0 < 0 ) zExp = 1;
622
            roundIncrement = roundMask + 1;
623
            if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
624
                roundMask |= roundIncrement;
625
            }
626
            zSig0 &= ~ roundMask;
627
            return packFloatx80( zSign, zExp, zSig0 );
628
        }
629
    }
630
    if ( roundBits ) roundData->exception |= float_flag_inexact;
631
    zSig0 += roundIncrement;
632
    if ( zSig0 < roundIncrement ) {
633
        ++zExp;
634
        zSig0 = LIT64( 0x8000000000000000 );
635
    }
636
    roundIncrement = roundMask + 1;
637
    if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
638
        roundMask |= roundIncrement;
639
    }
640
    zSig0 &= ~ roundMask;
641
    if ( zSig0 == 0 ) zExp = 0;
642
    return packFloatx80( zSign, zExp, zSig0 );
643
 precision80:
644
    increment = ( (sbits64) zSig1 < 0 );
645
    if ( ! roundNearestEven ) {
646
        if ( roundingMode == float_round_to_zero ) {
647
            increment = 0;
648
        }
649
        else {
650
            if ( zSign ) {
651
                increment = ( roundingMode == float_round_down ) && zSig1;
652
            }
653
            else {
654
                increment = ( roundingMode == float_round_up ) && zSig1;
655
            }
656
        }
657
    }
658
    if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
659
        if (    ( 0x7FFE < zExp )
660
             || (    ( zExp == 0x7FFE )
661
                  && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
662
                  && increment
663
                )
664
           ) {
665
            roundMask = 0;
666
 overflow:
667
            roundData->exception |= float_flag_overflow | float_flag_inexact;
668
            if (    ( roundingMode == float_round_to_zero )
669
                 || ( zSign && ( roundingMode == float_round_up ) )
670
                 || ( ! zSign && ( roundingMode == float_round_down ) )
671
               ) {
672
                return packFloatx80( zSign, 0x7FFE, ~ roundMask );
673
            }
674
            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
675
        }
676
        if ( zExp <= 0 ) {
677
            isTiny =
678
                   ( float_detect_tininess == float_tininess_before_rounding )
679
                || ( zExp < 0 )
680
                || ! increment
681
                || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
682
            shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
683
            zExp = 0;
684
            if ( isTiny && zSig1 ) roundData->exception |= float_flag_underflow;
685
            if ( zSig1 ) roundData->exception |= float_flag_inexact;
686
            if ( roundNearestEven ) {
687
                increment = ( (sbits64) zSig1 < 0 );
688
            }
689
            else {
690
                if ( zSign ) {
691
                    increment = ( roundingMode == float_round_down ) && zSig1;
692
                }
693
                else {
694
                    increment = ( roundingMode == float_round_up ) && zSig1;
695
                }
696
            }
697
            if ( increment ) {
698
                ++zSig0;
699
                zSig0 &= ~ ( ( zSig1 + zSig1 == 0 ) & roundNearestEven );
700
                if ( (sbits64) zSig0 < 0 ) zExp = 1;
701
            }
702
            return packFloatx80( zSign, zExp, zSig0 );
703
        }
704
    }
705
    if ( zSig1 ) roundData->exception |= float_flag_inexact;
706
    if ( increment ) {
707
        ++zSig0;
708
        if ( zSig0 == 0 ) {
709
            ++zExp;
710
            zSig0 = LIT64( 0x8000000000000000 );
711
        }
712
        else {
713
            zSig0 &= ~ ( ( zSig1 + zSig1 == 0 ) & roundNearestEven );
714
        }
715
    }
716
    else {
717
        if ( zSig0 == 0 ) zExp = 0;
718
    }
719
 
720
    return packFloatx80( zSign, zExp, zSig0 );
721
}
722
 
723
/*
724
-------------------------------------------------------------------------------
725
Takes an abstract floating-point value having sign `zSign', exponent
726
`zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
727
and returns the proper extended double-precision floating-point value
728
corresponding to the abstract input.  This routine is just like
729
`roundAndPackFloatx80' except that the input significand does not have to be
730
normalized.
731
-------------------------------------------------------------------------------
732
*/
733
static floatx80
734
 normalizeRoundAndPackFloatx80(
735
     struct roundingData *roundData, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
736
 )
737
{
738
    int8 shiftCount;
739
 
740
    if ( zSig0 == 0 ) {
741
        zSig0 = zSig1;
742
        zSig1 = 0;
743
        zExp -= 64;
744
    }
745
    shiftCount = countLeadingZeros64( zSig0 );
746
    shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
747
    zExp -= shiftCount;
748
    return
749
        roundAndPackFloatx80( roundData, zSign, zExp, zSig0, zSig1 );
750
 
751
}
752
 
753
#endif
754
 
755
/*
756
-------------------------------------------------------------------------------
757
Returns the result of converting the 32-bit two's complement integer `a' to
758
the single-precision floating-point format.  The conversion is performed
759
according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
760
-------------------------------------------------------------------------------
761
*/
762
float32 int32_to_float32(struct roundingData *roundData, int32 a)
763
{
764
    flag zSign;
765
 
766
    if ( a == 0 ) return 0;
767
    if ( a == 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
768
    zSign = ( a < 0 );
769
    return normalizeRoundAndPackFloat32( roundData, zSign, 0x9C, zSign ? - a : a );
770
 
771
}
772
 
773
/*
774
-------------------------------------------------------------------------------
775
Returns the result of converting the 32-bit two's complement integer `a' to
776
the double-precision floating-point format.  The conversion is performed
777
according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
778
-------------------------------------------------------------------------------
779
*/
780
float64 int32_to_float64( int32 a )
781
{
782
    flag aSign;
783
    uint32 absA;
784
    int8 shiftCount;
785
    bits64 zSig;
786
 
787
    if ( a == 0 ) return 0;
788
    aSign = ( a < 0 );
789
    absA = aSign ? - a : a;
790
    shiftCount = countLeadingZeros32( absA ) + 21;
791
    zSig = absA;
792
    return packFloat64( aSign, 0x432 - shiftCount, zSig<<shiftCount );
793
 
794
}
795
 
796
#ifdef FLOATX80
797
 
798
/*
799
-------------------------------------------------------------------------------
800
Returns the result of converting the 32-bit two's complement integer `a'
801
to the extended double-precision floating-point format.  The conversion
802
is performed according to the IEC/IEEE Standard for Binary Floating-point
803
Arithmetic.
804
-------------------------------------------------------------------------------
805
*/
806
floatx80 int32_to_floatx80( int32 a )
807
{
808
    flag zSign;
809
    uint32 absA;
810
    int8 shiftCount;
811
    bits64 zSig;
812
 
813
    if ( a == 0 ) return packFloatx80( 0, 0, 0 );
814
    zSign = ( a < 0 );
815
    absA = zSign ? - a : a;
816
    shiftCount = countLeadingZeros32( absA ) + 32;
817
    zSig = absA;
818
    return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
819
 
820
}
821
 
822
#endif
823
 
824
/*
825
-------------------------------------------------------------------------------
826
Returns the result of converting the single-precision floating-point value
827
`a' to the 32-bit two's complement integer format.  The conversion is
828
performed according to the IEC/IEEE Standard for Binary Floating-point
829
Arithmetic---which means in particular that the conversion is rounded
830
according to the current rounding mode.  If `a' is a NaN, the largest
831
positive integer is returned.  Otherwise, if the conversion overflows, the
832
largest integer with the same sign as `a' is returned.
833
-------------------------------------------------------------------------------
834
*/
835
int32 float32_to_int32( struct roundingData *roundData, float32 a )
836
{
837
    flag aSign;
838
    int16 aExp, shiftCount;
839
    bits32 aSig;
840
    bits64 zSig;
841
 
842
    aSig = extractFloat32Frac( a );
843
    aExp = extractFloat32Exp( a );
844
    aSign = extractFloat32Sign( a );
845
    if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
846
    if ( aExp ) aSig |= 0x00800000;
847
    shiftCount = 0xAF - aExp;
848
    zSig = aSig;
849
    zSig <<= 32;
850
    if ( 0 < shiftCount ) shift64RightJamming( zSig, shiftCount, &zSig );
851
    return roundAndPackInt32( roundData, aSign, zSig );
852
 
853
}
854
 
855
/*
856
-------------------------------------------------------------------------------
857
Returns the result of converting the single-precision floating-point value
858
`a' to the 32-bit two's complement integer format.  The conversion is
859
performed according to the IEC/IEEE Standard for Binary Floating-point
860
Arithmetic, except that the conversion is always rounded toward zero.  If
861
`a' is a NaN, the largest positive integer is returned.  Otherwise, if the
862
conversion overflows, the largest integer with the same sign as `a' is
863
returned.
864
-------------------------------------------------------------------------------
865
*/
866
int32 float32_to_int32_round_to_zero( float32 a )
867
{
868
    flag aSign;
869
    int16 aExp, shiftCount;
870
    bits32 aSig;
871
    int32 z;
872
 
873
    aSig = extractFloat32Frac( a );
874
    aExp = extractFloat32Exp( a );
875
    aSign = extractFloat32Sign( a );
876
    shiftCount = aExp - 0x9E;
877
    if ( 0 <= shiftCount ) {
878
        if ( a == 0xCF000000 ) return 0x80000000;
879
        float_raise( float_flag_invalid );
880
        if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
881
        return 0x80000000;
882
    }
883
    else if ( aExp <= 0x7E ) {
884
        if ( aExp | aSig ) float_raise( float_flag_inexact );
885
        return 0;
886
    }
887
    aSig = ( aSig | 0x00800000 )<<8;
888
    z = aSig>>( - shiftCount );
889
    if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
890
        float_raise( float_flag_inexact );
891
    }
892
    return aSign ? - z : z;
893
 
894
}
895
 
896
/*
897
-------------------------------------------------------------------------------
898
Returns the result of converting the single-precision floating-point value
899
`a' to the double-precision floating-point format.  The conversion is
900
performed according to the IEC/IEEE Standard for Binary Floating-point
901
Arithmetic.
902
-------------------------------------------------------------------------------
903
*/
904
float64 float32_to_float64( float32 a )
905
{
906
    flag aSign;
907
    int16 aExp;
908
    bits32 aSig;
909
 
910
    aSig = extractFloat32Frac( a );
911
    aExp = extractFloat32Exp( a );
912
    aSign = extractFloat32Sign( a );
913
    if ( aExp == 0xFF ) {
914
        if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) );
915
        return packFloat64( aSign, 0x7FF, 0 );
916
    }
917
    if ( aExp == 0 ) {
918
        if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
919
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
920
        --aExp;
921
    }
922
    return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 );
923
 
924
}
925
 
926
#ifdef FLOATX80
927
 
928
/*
929
-------------------------------------------------------------------------------
930
Returns the result of converting the single-precision floating-point value
931
`a' to the extended double-precision floating-point format.  The conversion
932
is performed according to the IEC/IEEE Standard for Binary Floating-point
933
Arithmetic.
934
-------------------------------------------------------------------------------
935
*/
936
floatx80 float32_to_floatx80( float32 a )
937
{
938
    flag aSign;
939
    int16 aExp;
940
    bits32 aSig;
941
 
942
    aSig = extractFloat32Frac( a );
943
    aExp = extractFloat32Exp( a );
944
    aSign = extractFloat32Sign( a );
945
    if ( aExp == 0xFF ) {
946
        if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a ) );
947
        return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
948
    }
949
    if ( aExp == 0 ) {
950
        if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
951
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
952
    }
953
    aSig |= 0x00800000;
954
    return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 );
955
 
956
}
957
 
958
#endif
959
 
960
/*
961
-------------------------------------------------------------------------------
962
Rounds the single-precision floating-point value `a' to an integer, and
963
returns the result as a single-precision floating-point value.  The
964
operation is performed according to the IEC/IEEE Standard for Binary
965
Floating-point Arithmetic.
966
-------------------------------------------------------------------------------
967
*/
968
float32 float32_round_to_int( struct roundingData *roundData, float32 a )
969
{
970
    flag aSign;
971
    int16 aExp;
972
    bits32 lastBitMask, roundBitsMask;
973
    int8 roundingMode;
974
    float32 z;
975
 
976
    aExp = extractFloat32Exp( a );
977
    if ( 0x96 <= aExp ) {
978
        if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
979
            return propagateFloat32NaN( a, a );
980
        }
981
        return a;
982
    }
983
    roundingMode = roundData->mode;
984
    if ( aExp <= 0x7E ) {
985
        if ( (bits32) ( a<<1 ) == 0 ) return a;
986
        roundData->exception |= float_flag_inexact;
987
        aSign = extractFloat32Sign( a );
988
        switch ( roundingMode ) {
989
         case float_round_nearest_even:
990
            if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
991
                return packFloat32( aSign, 0x7F, 0 );
992
            }
993
            break;
994
         case float_round_down:
995
            return aSign ? 0xBF800000 : 0;
996
         case float_round_up:
997
            return aSign ? 0x80000000 : 0x3F800000;
998
        }
999
        return packFloat32( aSign, 0, 0 );
1000
    }
1001
    lastBitMask = 1;
1002
    lastBitMask <<= 0x96 - aExp;
1003
    roundBitsMask = lastBitMask - 1;
1004
    z = a;
1005
    if ( roundingMode == float_round_nearest_even ) {
1006
        z += lastBitMask>>1;
1007
        if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
1008
    }
1009
    else if ( roundingMode != float_round_to_zero ) {
1010
        if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {
1011
            z += roundBitsMask;
1012
        }
1013
    }
1014
    z &= ~ roundBitsMask;
1015
    if ( z != a ) roundData->exception |= float_flag_inexact;
1016
    return z;
1017
 
1018
}
1019
 
1020
/*
1021
-------------------------------------------------------------------------------
1022
Returns the result of adding the absolute values of the single-precision
1023
floating-point values `a' and `b'.  If `zSign' is true, the sum is negated
1024
before being returned.  `zSign' is ignored if the result is a NaN.  The
1025
addition is performed according to the IEC/IEEE Standard for Binary
1026
Floating-point Arithmetic.
1027
-------------------------------------------------------------------------------
1028
*/
1029
static float32 addFloat32Sigs( struct roundingData *roundData, float32 a, float32 b, flag zSign )
1030
{
1031
    int16 aExp, bExp, zExp;
1032
    bits32 aSig, bSig, zSig;
1033
    int16 expDiff;
1034
 
1035
    aSig = extractFloat32Frac( a );
1036
    aExp = extractFloat32Exp( a );
1037
    bSig = extractFloat32Frac( b );
1038
    bExp = extractFloat32Exp( b );
1039
    expDiff = aExp - bExp;
1040
    aSig <<= 6;
1041
    bSig <<= 6;
1042
    if ( 0 < expDiff ) {
1043
        if ( aExp == 0xFF ) {
1044
            if ( aSig ) return propagateFloat32NaN( a, b );
1045
            return a;
1046
        }
1047
        if ( bExp == 0 ) {
1048
            --expDiff;
1049
        }
1050
        else {
1051
            bSig |= 0x20000000;
1052
        }
1053
        shift32RightJamming( bSig, expDiff, &bSig );
1054
        zExp = aExp;
1055
    }
1056
    else if ( expDiff < 0 ) {
1057
        if ( bExp == 0xFF ) {
1058
            if ( bSig ) return propagateFloat32NaN( a, b );
1059
            return packFloat32( zSign, 0xFF, 0 );
1060
        }
1061
        if ( aExp == 0 ) {
1062
            ++expDiff;
1063
        }
1064
        else {
1065
            aSig |= 0x20000000;
1066
        }
1067
        shift32RightJamming( aSig, - expDiff, &aSig );
1068
        zExp = bExp;
1069
    }
1070
    else {
1071
        if ( aExp == 0xFF ) {
1072
            if ( aSig | bSig ) return propagateFloat32NaN( a, b );
1073
            return a;
1074
        }
1075
        if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
1076
        zSig = 0x40000000 + aSig + bSig;
1077
        zExp = aExp;
1078
        goto roundAndPack;
1079
    }
1080
    aSig |= 0x20000000;
1081
    zSig = ( aSig + bSig )<<1;
1082
    --zExp;
1083
    if ( (sbits32) zSig < 0 ) {
1084
        zSig = aSig + bSig;
1085
        ++zExp;
1086
    }
1087
 roundAndPack:
1088
    return roundAndPackFloat32( roundData, zSign, zExp, zSig );
1089
 
1090
}
1091
 
1092
/*
1093
-------------------------------------------------------------------------------
1094
Returns the result of subtracting the absolute values of the single-
1095
precision floating-point values `a' and `b'.  If `zSign' is true, the
1096
difference is negated before being returned.  `zSign' is ignored if the
1097
result is a NaN.  The subtraction is performed according to the IEC/IEEE
1098
Standard for Binary Floating-point Arithmetic.
1099
-------------------------------------------------------------------------------
1100
*/
1101
static float32 subFloat32Sigs( struct roundingData *roundData, float32 a, float32 b, flag zSign )
1102
{
1103
    int16 aExp, bExp, zExp;
1104
    bits32 aSig, bSig, zSig;
1105
    int16 expDiff;
1106
 
1107
    aSig = extractFloat32Frac( a );
1108
    aExp = extractFloat32Exp( a );
1109
    bSig = extractFloat32Frac( b );
1110
    bExp = extractFloat32Exp( b );
1111
    expDiff = aExp - bExp;
1112
    aSig <<= 7;
1113
    bSig <<= 7;
1114
    if ( 0 < expDiff ) goto aExpBigger;
1115
    if ( expDiff < 0 ) goto bExpBigger;
1116
    if ( aExp == 0xFF ) {
1117
        if ( aSig | bSig ) return propagateFloat32NaN( a, b );
1118
        roundData->exception |= float_flag_invalid;
1119
        return float32_default_nan;
1120
    }
1121
    if ( aExp == 0 ) {
1122
        aExp = 1;
1123
        bExp = 1;
1124
    }
1125
    if ( bSig < aSig ) goto aBigger;
1126
    if ( aSig < bSig ) goto bBigger;
1127
    return packFloat32( roundData->mode == float_round_down, 0, 0 );
1128
 bExpBigger:
1129
    if ( bExp == 0xFF ) {
1130
        if ( bSig ) return propagateFloat32NaN( a, b );
1131
        return packFloat32( zSign ^ 1, 0xFF, 0 );
1132
    }
1133
    if ( aExp == 0 ) {
1134
        ++expDiff;
1135
    }
1136
    else {
1137
        aSig |= 0x40000000;
1138
    }
1139
    shift32RightJamming( aSig, - expDiff, &aSig );
1140
    bSig |= 0x40000000;
1141
 bBigger:
1142
    zSig = bSig - aSig;
1143
    zExp = bExp;
1144
    zSign ^= 1;
1145
    goto normalizeRoundAndPack;
1146
 aExpBigger:
1147
    if ( aExp == 0xFF ) {
1148
        if ( aSig ) return propagateFloat32NaN( a, b );
1149
        return a;
1150
    }
1151
    if ( bExp == 0 ) {
1152
        --expDiff;
1153
    }
1154
    else {
1155
        bSig |= 0x40000000;
1156
    }
1157
    shift32RightJamming( bSig, expDiff, &bSig );
1158
    aSig |= 0x40000000;
1159
 aBigger:
1160
    zSig = aSig - bSig;
1161
    zExp = aExp;
1162
 normalizeRoundAndPack:
1163
    --zExp;
1164
    return normalizeRoundAndPackFloat32( roundData, zSign, zExp, zSig );
1165
 
1166
}
1167
 
1168
/*
1169
-------------------------------------------------------------------------------
1170
Returns the result of adding the single-precision floating-point values `a'
1171
and `b'.  The operation is performed according to the IEC/IEEE Standard for
1172
Binary Floating-point Arithmetic.
1173
-------------------------------------------------------------------------------
1174
*/
1175
float32 float32_add( struct roundingData *roundData, float32 a, float32 b )
1176
{
1177
    flag aSign, bSign;
1178
 
1179
    aSign = extractFloat32Sign( a );
1180
    bSign = extractFloat32Sign( b );
1181
    if ( aSign == bSign ) {
1182
        return addFloat32Sigs( roundData, a, b, aSign );
1183
    }
1184
    else {
1185
        return subFloat32Sigs( roundData, a, b, aSign );
1186
    }
1187
 
1188
}
1189
 
1190
/*
1191
-------------------------------------------------------------------------------
1192
Returns the result of subtracting the single-precision floating-point values
1193
`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
1194
for Binary Floating-point Arithmetic.
1195
-------------------------------------------------------------------------------
1196
*/
1197
float32 float32_sub( struct roundingData *roundData, float32 a, float32 b )
1198
{
1199
    flag aSign, bSign;
1200
 
1201
    aSign = extractFloat32Sign( a );
1202
    bSign = extractFloat32Sign( b );
1203
    if ( aSign == bSign ) {
1204
        return subFloat32Sigs( roundData, a, b, aSign );
1205
    }
1206
    else {
1207
        return addFloat32Sigs( roundData, a, b, aSign );
1208
    }
1209
 
1210
}
1211
 
1212
/*
1213
-------------------------------------------------------------------------------
1214
Returns the result of multiplying the single-precision floating-point values
1215
`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
1216
for Binary Floating-point Arithmetic.
1217
-------------------------------------------------------------------------------
1218
*/
1219
float32 float32_mul( struct roundingData *roundData, float32 a, float32 b )
1220
{
1221
    flag aSign, bSign, zSign;
1222
    int16 aExp, bExp, zExp;
1223
    bits32 aSig, bSig;
1224
    bits64 zSig64;
1225
    bits32 zSig;
1226
 
1227
    aSig = extractFloat32Frac( a );
1228
    aExp = extractFloat32Exp( a );
1229
    aSign = extractFloat32Sign( a );
1230
    bSig = extractFloat32Frac( b );
1231
    bExp = extractFloat32Exp( b );
1232
    bSign = extractFloat32Sign( b );
1233
    zSign = aSign ^ bSign;
1234
    if ( aExp == 0xFF ) {
1235
        if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
1236
            return propagateFloat32NaN( a, b );
1237
        }
1238
        if ( ( bExp | bSig ) == 0 ) {
1239
            roundData->exception |= float_flag_invalid;
1240
            return float32_default_nan;
1241
        }
1242
        return packFloat32( zSign, 0xFF, 0 );
1243
    }
1244
    if ( bExp == 0xFF ) {
1245
        if ( bSig ) return propagateFloat32NaN( a, b );
1246
        if ( ( aExp | aSig ) == 0 ) {
1247
            roundData->exception |= float_flag_invalid;
1248
            return float32_default_nan;
1249
        }
1250
        return packFloat32( zSign, 0xFF, 0 );
1251
    }
1252
    if ( aExp == 0 ) {
1253
        if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1254
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1255
    }
1256
    if ( bExp == 0 ) {
1257
        if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
1258
        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1259
    }
1260
    zExp = aExp + bExp - 0x7F;
1261
    aSig = ( aSig | 0x00800000 )<<7;
1262
    bSig = ( bSig | 0x00800000 )<<8;
1263
    shift64RightJamming( ( (bits64) aSig ) * bSig, 32, &zSig64 );
1264
    zSig = zSig64;
1265
    if ( 0 <= (sbits32) ( zSig<<1 ) ) {
1266
        zSig <<= 1;
1267
        --zExp;
1268
    }
1269
    return roundAndPackFloat32( roundData, zSign, zExp, zSig );
1270
 
1271
}
1272
 
1273
/*
1274
-------------------------------------------------------------------------------
1275
Returns the result of dividing the single-precision floating-point value `a'
1276
by the corresponding value `b'.  The operation is performed according to the
1277
IEC/IEEE Standard for Binary Floating-point Arithmetic.
1278
-------------------------------------------------------------------------------
1279
*/
1280
float32 float32_div( struct roundingData *roundData, float32 a, float32 b )
1281
{
1282
    flag aSign, bSign, zSign;
1283
    int16 aExp, bExp, zExp;
1284
    bits32 aSig, bSig, zSig;
1285
 
1286
    aSig = extractFloat32Frac( a );
1287
    aExp = extractFloat32Exp( a );
1288
    aSign = extractFloat32Sign( a );
1289
    bSig = extractFloat32Frac( b );
1290
    bExp = extractFloat32Exp( b );
1291
    bSign = extractFloat32Sign( b );
1292
    zSign = aSign ^ bSign;
1293
    if ( aExp == 0xFF ) {
1294
        if ( aSig ) return propagateFloat32NaN( a, b );
1295
        if ( bExp == 0xFF ) {
1296
            if ( bSig ) return propagateFloat32NaN( a, b );
1297
            roundData->exception |= float_flag_invalid;
1298
            return float32_default_nan;
1299
        }
1300
        return packFloat32( zSign, 0xFF, 0 );
1301
    }
1302
    if ( bExp == 0xFF ) {
1303
        if ( bSig ) return propagateFloat32NaN( a, b );
1304
        return packFloat32( zSign, 0, 0 );
1305
    }
1306
    if ( bExp == 0 ) {
1307
        if ( bSig == 0 ) {
1308
            if ( ( aExp | aSig ) == 0 ) {
1309
                roundData->exception |= float_flag_invalid;
1310
                return float32_default_nan;
1311
            }
1312
            roundData->exception |= float_flag_divbyzero;
1313
            return packFloat32( zSign, 0xFF, 0 );
1314
        }
1315
        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1316
    }
1317
    if ( aExp == 0 ) {
1318
        if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1319
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1320
    }
1321
    zExp = aExp - bExp + 0x7D;
1322
    aSig = ( aSig | 0x00800000 )<<7;
1323
    bSig = ( bSig | 0x00800000 )<<8;
1324
    if ( bSig <= ( aSig + aSig ) ) {
1325
        aSig >>= 1;
1326
        ++zExp;
1327
    }
1328
    {
1329
        bits64 tmp = ( (bits64) aSig )<<32;
1330
        do_div( tmp, bSig );
1331
        zSig = tmp;
1332
    }
1333
    if ( ( zSig & 0x3F ) == 0 ) {
1334
        zSig |= ( ( (bits64) bSig ) * zSig != ( (bits64) aSig )<<32 );
1335
    }
1336
    return roundAndPackFloat32( roundData, zSign, zExp, zSig );
1337
 
1338
}
1339
 
1340
/*
1341
-------------------------------------------------------------------------------
1342
Returns the remainder of the single-precision floating-point value `a'
1343
with respect to the corresponding value `b'.  The operation is performed
1344
according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
1345
-------------------------------------------------------------------------------
1346
*/
1347
float32 float32_rem( struct roundingData *roundData, float32 a, float32 b )
1348
{
1349
    flag aSign, bSign, zSign;
1350
    int16 aExp, bExp, expDiff;
1351
    bits32 aSig, bSig;
1352
    bits32 q;
1353
    bits64 aSig64, bSig64, q64;
1354
    bits32 alternateASig;
1355
    sbits32 sigMean;
1356
 
1357
    aSig = extractFloat32Frac( a );
1358
    aExp = extractFloat32Exp( a );
1359
    aSign = extractFloat32Sign( a );
1360
    bSig = extractFloat32Frac( b );
1361
    bExp = extractFloat32Exp( b );
1362
    bSign = extractFloat32Sign( b );
1363
    if ( aExp == 0xFF ) {
1364
        if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
1365
            return propagateFloat32NaN( a, b );
1366
        }
1367
        roundData->exception |= float_flag_invalid;
1368
        return float32_default_nan;
1369
    }
1370
    if ( bExp == 0xFF ) {
1371
        if ( bSig ) return propagateFloat32NaN( a, b );
1372
        return a;
1373
    }
1374
    if ( bExp == 0 ) {
1375
        if ( bSig == 0 ) {
1376
            roundData->exception |= float_flag_invalid;
1377
            return float32_default_nan;
1378
        }
1379
        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1380
    }
1381
    if ( aExp == 0 ) {
1382
        if ( aSig == 0 ) return a;
1383
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1384
    }
1385
    expDiff = aExp - bExp;
1386
    aSig |= 0x00800000;
1387
    bSig |= 0x00800000;
1388
    if ( expDiff < 32 ) {
1389
        aSig <<= 8;
1390
        bSig <<= 8;
1391
        if ( expDiff < 0 ) {
1392
            if ( expDiff < -1 ) return a;
1393
            aSig >>= 1;
1394
        }
1395
        q = ( bSig <= aSig );
1396
        if ( q ) aSig -= bSig;
1397
        if ( 0 < expDiff ) {
1398
            bits64 tmp = ( (bits64) aSig )<<32;
1399
            do_div( tmp, bSig );
1400
            q = tmp;
1401
            q >>= 32 - expDiff;
1402
            bSig >>= 2;
1403
            aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
1404
        }
1405
        else {
1406
            aSig >>= 2;
1407
            bSig >>= 2;
1408
        }
1409
    }
1410
    else {
1411
        if ( bSig <= aSig ) aSig -= bSig;
1412
        aSig64 = ( (bits64) aSig )<<40;
1413
        bSig64 = ( (bits64) bSig )<<40;
1414
        expDiff -= 64;
1415
        while ( 0 < expDiff ) {
1416
            q64 = estimateDiv128To64( aSig64, 0, bSig64 );
1417
            q64 = ( 2 < q64 ) ? q64 - 2 : 0;
1418
            aSig64 = - ( ( bSig * q64 )<<38 );
1419
            expDiff -= 62;
1420
        }
1421
        expDiff += 64;
1422
        q64 = estimateDiv128To64( aSig64, 0, bSig64 );
1423
        q64 = ( 2 < q64 ) ? q64 - 2 : 0;
1424
        q = q64>>( 64 - expDiff );
1425
        bSig <<= 6;
1426
        aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
1427
    }
1428
    do {
1429
        alternateASig = aSig;
1430
        ++q;
1431
        aSig -= bSig;
1432
    } while ( 0 <= (sbits32) aSig );
1433
    sigMean = aSig + alternateASig;
1434
    if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
1435
        aSig = alternateASig;
1436
    }
1437
    zSign = ( (sbits32) aSig < 0 );
1438
    if ( zSign ) aSig = - aSig;
1439
    return normalizeRoundAndPackFloat32( roundData, aSign ^ zSign, bExp, aSig );
1440
 
1441
}
1442
 
1443
/*
1444
-------------------------------------------------------------------------------
1445
Returns the square root of the single-precision floating-point value `a'.
1446
The operation is performed according to the IEC/IEEE Standard for Binary
1447
Floating-point Arithmetic.
1448
-------------------------------------------------------------------------------
1449
*/
1450
float32 float32_sqrt( struct roundingData *roundData, float32 a )
1451
{
1452
    flag aSign;
1453
    int16 aExp, zExp;
1454
    bits32 aSig, zSig;
1455
    bits64 rem, term;
1456
 
1457
    aSig = extractFloat32Frac( a );
1458
    aExp = extractFloat32Exp( a );
1459
    aSign = extractFloat32Sign( a );
1460
    if ( aExp == 0xFF ) {
1461
        if ( aSig ) return propagateFloat32NaN( a, 0 );
1462
        if ( ! aSign ) return a;
1463
        roundData->exception |= float_flag_invalid;
1464
        return float32_default_nan;
1465
    }
1466
    if ( aSign ) {
1467
        if ( ( aExp | aSig ) == 0 ) return a;
1468
        roundData->exception |= float_flag_invalid;
1469
        return float32_default_nan;
1470
    }
1471
    if ( aExp == 0 ) {
1472
        if ( aSig == 0 ) return 0;
1473
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1474
    }
1475
    zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
1476
    aSig = ( aSig | 0x00800000 )<<8;
1477
    zSig = estimateSqrt32( aExp, aSig ) + 2;
1478
    if ( ( zSig & 0x7F ) <= 5 ) {
1479
        if ( zSig < 2 ) {
1480
            zSig = 0xFFFFFFFF;
1481
        }
1482
        else {
1483
            aSig >>= aExp & 1;
1484
            term = ( (bits64) zSig ) * zSig;
1485
            rem = ( ( (bits64) aSig )<<32 ) - term;
1486
            while ( (sbits64) rem < 0 ) {
1487
                --zSig;
1488
                rem += ( ( (bits64) zSig )<<1 ) | 1;
1489
            }
1490
            zSig |= ( rem != 0 );
1491
        }
1492
    }
1493
    shift32RightJamming( zSig, 1, &zSig );
1494
    return roundAndPackFloat32( roundData, 0, zExp, zSig );
1495
 
1496
}
1497
 
1498
/*
1499
-------------------------------------------------------------------------------
1500
Returns 1 if the single-precision floating-point value `a' is equal to the
1501
corresponding value `b', and 0 otherwise.  The comparison is performed
1502
according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
1503
-------------------------------------------------------------------------------
1504
*/
1505
flag float32_eq( float32 a, float32 b )
1506
{
1507
 
1508
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1509
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1510
       ) {
1511
        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
1512
            float_raise( float_flag_invalid );
1513
        }
1514
        return 0;
1515
    }
1516
    return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
1517
 
1518
}
1519
 
1520
/*
1521
-------------------------------------------------------------------------------
1522
Returns 1 if the single-precision floating-point value `a' is less than or
1523
equal to the corresponding value `b', and 0 otherwise.  The comparison is
1524
performed according to the IEC/IEEE Standard for Binary Floating-point
1525
Arithmetic.
1526
-------------------------------------------------------------------------------
1527
*/
1528
flag float32_le( float32 a, float32 b )
1529
{
1530
    flag aSign, bSign;
1531
 
1532
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1533
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1534
       ) {
1535
        float_raise( float_flag_invalid );
1536
        return 0;
1537
    }
1538
    aSign = extractFloat32Sign( a );
1539
    bSign = extractFloat32Sign( b );
1540
    if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
1541
    return ( a == b ) || ( aSign ^ ( a < b ) );
1542
 
1543
}
1544
 
1545
/*
1546
-------------------------------------------------------------------------------
1547
Returns 1 if the single-precision floating-point value `a' is less than
1548
the corresponding value `b', and 0 otherwise.  The comparison is performed
1549
according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
1550
-------------------------------------------------------------------------------
1551
*/
1552
flag float32_lt( float32 a, float32 b )
1553
{
1554
    flag aSign, bSign;
1555
 
1556
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1557
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1558
       ) {
1559
        float_raise( float_flag_invalid );
1560
        return 0;
1561
    }
1562
    aSign = extractFloat32Sign( a );
1563
    bSign = extractFloat32Sign( b );
1564
    if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
1565
    return ( a != b ) && ( aSign ^ ( a < b ) );
1566
 
1567
}
1568
 
1569
/*
1570
-------------------------------------------------------------------------------
1571
Returns 1 if the single-precision floating-point value `a' is equal to the
1572
corresponding value `b', and 0 otherwise.  The invalid exception is raised
1573
if either operand is a NaN.  Otherwise, the comparison is performed
1574
according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
1575
-------------------------------------------------------------------------------
1576
*/
1577
flag float32_eq_signaling( float32 a, float32 b )
1578
{
1579
 
1580
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1581
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1582
       ) {
1583
        float_raise( float_flag_invalid );
1584
        return 0;
1585
    }
1586
    return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
1587
 
1588
}
1589
 
1590
/*
1591
-------------------------------------------------------------------------------
1592
Returns 1 if the single-precision floating-point value `a' is less than or
1593
equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
1594
cause an exception.  Otherwise, the comparison is performed according to the
1595
IEC/IEEE Standard for Binary Floating-point Arithmetic.
1596
-------------------------------------------------------------------------------
1597
*/
1598
flag float32_le_quiet( float32 a, float32 b )
1599
{
1600
    flag aSign, bSign;
1601
    //int16 aExp, bExp;
1602
 
1603
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1604
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1605
       ) {
1606
        /* Do nothing, even if NaN as we're quiet */
1607
        return 0;
1608
    }
1609
    aSign = extractFloat32Sign( a );
1610
    bSign = extractFloat32Sign( b );
1611
    if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
1612
    return ( a == b ) || ( aSign ^ ( a < b ) );
1613
 
1614
}
1615
 
1616
/*
1617
-------------------------------------------------------------------------------
1618
Returns 1 if the single-precision floating-point value `a' is less than
1619
the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
1620
exception.  Otherwise, the comparison is performed according to the IEC/IEEE
1621
Standard for Binary Floating-point Arithmetic.
1622
-------------------------------------------------------------------------------
1623
*/
1624
flag float32_lt_quiet( float32 a, float32 b )
1625
{
1626
    flag aSign, bSign;
1627
 
1628
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1629
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1630
       ) {
1631
        /* Do nothing, even if NaN as we're quiet */
1632
        return 0;
1633
    }
1634
    aSign = extractFloat32Sign( a );
1635
    bSign = extractFloat32Sign( b );
1636
    if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
1637
    return ( a != b ) && ( aSign ^ ( a < b ) );
1638
 
1639
}
1640
 
1641
/*
1642
-------------------------------------------------------------------------------
1643
Returns the result of converting the double-precision floating-point value
1644
`a' to the 32-bit two's complement integer format.  The conversion is
1645
performed according to the IEC/IEEE Standard for Binary Floating-point
1646
Arithmetic---which means in particular that the conversion is rounded
1647
according to the current rounding mode.  If `a' is a NaN, the largest
1648
positive integer is returned.  Otherwise, if the conversion overflows, the
1649
largest integer with the same sign as `a' is returned.
1650
-------------------------------------------------------------------------------
1651
*/
1652
int32 float64_to_int32( struct roundingData *roundData, float64 a )
1653
{
1654
    flag aSign;
1655
    int16 aExp, shiftCount;
1656
    bits64 aSig;
1657
 
1658
    aSig = extractFloat64Frac( a );
1659
    aExp = extractFloat64Exp( a );
1660
    aSign = extractFloat64Sign( a );
1661
    if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
1662
    if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
1663
    shiftCount = 0x42C - aExp;
1664
    if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
1665
    return roundAndPackInt32( roundData, aSign, aSig );
1666
 
1667
}
1668
 
1669
/*
1670
-------------------------------------------------------------------------------
1671
Returns the result of converting the double-precision floating-point value
1672
`a' to the 32-bit two's complement integer format.  The conversion is
1673
performed according to the IEC/IEEE Standard for Binary Floating-point
1674
Arithmetic, except that the conversion is always rounded toward zero.  If
1675
`a' is a NaN, the largest positive integer is returned.  Otherwise, if the
1676
conversion overflows, the largest integer with the same sign as `a' is
1677
returned.
1678
-------------------------------------------------------------------------------
1679
*/
1680
int32 float64_to_int32_round_to_zero( float64 a )
1681
{
1682
    flag aSign;
1683
    int16 aExp, shiftCount;
1684
    bits64 aSig, savedASig;
1685
    int32 z;
1686
 
1687
    aSig = extractFloat64Frac( a );
1688
    aExp = extractFloat64Exp( a );
1689
    aSign = extractFloat64Sign( a );
1690
    shiftCount = 0x433 - aExp;
1691
    if ( shiftCount < 21 ) {
1692
        if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
1693
        goto invalid;
1694
    }
1695
    else if ( 52 < shiftCount ) {
1696
        if ( aExp || aSig ) float_raise( float_flag_inexact );
1697
        return 0;
1698
    }
1699
    aSig |= LIT64( 0x0010000000000000 );
1700
    savedASig = aSig;
1701
    aSig >>= shiftCount;
1702
    z = aSig;
1703
    if ( aSign ) z = - z;
1704
    if ( ( z < 0 ) ^ aSign ) {
1705
 invalid:
1706
        float_raise( float_flag_invalid );
1707
        return aSign ? 0x80000000 : 0x7FFFFFFF;
1708
    }
1709
    if ( ( aSig<<shiftCount ) != savedASig ) {
1710
        float_raise( float_flag_inexact );
1711
    }
1712
    return z;
1713
 
1714
}
1715
 
1716
/*
1717
-------------------------------------------------------------------------------
1718
Returns the result of converting the double-precision floating-point value
1719
`a' to the 32-bit two's complement unsigned integer format.  The conversion
1720
is performed according to the IEC/IEEE Standard for Binary Floating-point
1721
Arithmetic---which means in particular that the conversion is rounded
1722
according to the current rounding mode.  If `a' is a NaN, the largest
1723
positive integer is returned.  Otherwise, if the conversion overflows, the
1724
largest positive integer is returned.
1725
-------------------------------------------------------------------------------
1726
*/
1727
int32 float64_to_uint32( struct roundingData *roundData, float64 a )
1728
{
1729
    flag aSign;
1730
    int16 aExp, shiftCount;
1731
    bits64 aSig;
1732
 
1733
    aSig = extractFloat64Frac( a );
1734
    aExp = extractFloat64Exp( a );
1735
    aSign = 0; //extractFloat64Sign( a );
1736
    //if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
1737
    if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
1738
    shiftCount = 0x42C - aExp;
1739
    if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
1740
    return roundAndPackInt32( roundData, aSign, aSig );
1741
}
1742
 
1743
/*
1744
-------------------------------------------------------------------------------
1745
Returns the result of converting the double-precision floating-point value
1746
`a' to the 32-bit two's complement integer format.  The conversion is
1747
performed according to the IEC/IEEE Standard for Binary Floating-point
1748
Arithmetic, except that the conversion is always rounded toward zero.  If
1749
`a' is a NaN, the largest positive integer is returned.  Otherwise, if the
1750
conversion overflows, the largest positive integer is returned.
1751
-------------------------------------------------------------------------------
1752
*/
1753
int32 float64_to_uint32_round_to_zero( float64 a )
1754
{
1755
    flag aSign;
1756
    int16 aExp, shiftCount;
1757
    bits64 aSig, savedASig;
1758
    int32 z;
1759
 
1760
    aSig = extractFloat64Frac( a );
1761
    aExp = extractFloat64Exp( a );
1762
    aSign = extractFloat64Sign( a );
1763
    shiftCount = 0x433 - aExp;
1764
    if ( shiftCount < 21 ) {
1765
        if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
1766
        goto invalid;
1767
    }
1768
    else if ( 52 < shiftCount ) {
1769
        if ( aExp || aSig ) float_raise( float_flag_inexact );
1770
        return 0;
1771
    }
1772
    aSig |= LIT64( 0x0010000000000000 );
1773
    savedASig = aSig;
1774
    aSig >>= shiftCount;
1775
    z = aSig;
1776
    if ( aSign ) z = - z;
1777
    if ( ( z < 0 ) ^ aSign ) {
1778
 invalid:
1779
        float_raise( float_flag_invalid );
1780
        return aSign ? 0x80000000 : 0x7FFFFFFF;
1781
    }
1782
    if ( ( aSig<<shiftCount ) != savedASig ) {
1783
        float_raise( float_flag_inexact );
1784
    }
1785
    return z;
1786
}
1787
 
1788
/*
1789
-------------------------------------------------------------------------------
1790
Returns the result of converting the double-precision floating-point value
1791
`a' to the single-precision floating-point format.  The conversion is
1792
performed according to the IEC/IEEE Standard for Binary Floating-point
1793
Arithmetic.
1794
-------------------------------------------------------------------------------
1795
*/
1796
float32 float64_to_float32( struct roundingData *roundData, float64 a )
1797
{
1798
    flag aSign;
1799
    int16 aExp;
1800
    bits64 aSig;
1801
    bits32 zSig;
1802
 
1803
    aSig = extractFloat64Frac( a );
1804
    aExp = extractFloat64Exp( a );
1805
    aSign = extractFloat64Sign( a );
1806
    if ( aExp == 0x7FF ) {
1807
        if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a ) );
1808
        return packFloat32( aSign, 0xFF, 0 );
1809
    }
1810
    shift64RightJamming( aSig, 22, &aSig );
1811
    zSig = aSig;
1812
    if ( aExp || zSig ) {
1813
        zSig |= 0x40000000;
1814
        aExp -= 0x381;
1815
    }
1816
    return roundAndPackFloat32( roundData, aSign, aExp, zSig );
1817
 
1818
}
1819
 
1820
#ifdef FLOATX80
1821
 
1822
/*
1823
-------------------------------------------------------------------------------
1824
Returns the result of converting the double-precision floating-point value
1825
`a' to the extended double-precision floating-point format.  The conversion
1826
is performed according to the IEC/IEEE Standard for Binary Floating-point
1827
Arithmetic.
1828
-------------------------------------------------------------------------------
1829
*/
1830
floatx80 float64_to_floatx80( float64 a )
1831
{
1832
    flag aSign;
1833
    int16 aExp;
1834
    bits64 aSig;
1835
 
1836
    aSig = extractFloat64Frac( a );
1837
    aExp = extractFloat64Exp( a );
1838
    aSign = extractFloat64Sign( a );
1839
    if ( aExp == 0x7FF ) {
1840
        if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a ) );
1841
        return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
1842
    }
1843
    if ( aExp == 0 ) {
1844
        if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
1845
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
1846
    }
1847
    return
1848
        packFloatx80(
1849
            aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
1850
 
1851
}
1852
 
1853
#endif
1854
 
1855
/*
1856
-------------------------------------------------------------------------------
1857
Rounds the double-precision floating-point value `a' to an integer, and
1858
returns the result as a double-precision floating-point value.  The
1859
operation is performed according to the IEC/IEEE Standard for Binary
1860
Floating-point Arithmetic.
1861
-------------------------------------------------------------------------------
1862
*/
1863
float64 float64_round_to_int( struct roundingData *roundData, float64 a )
1864
{
1865
    flag aSign;
1866
    int16 aExp;
1867
    bits64 lastBitMask, roundBitsMask;
1868
    int8 roundingMode;
1869
    float64 z;
1870
 
1871
    aExp = extractFloat64Exp( a );
1872
    if ( 0x433 <= aExp ) {
1873
        if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {
1874
            return propagateFloat64NaN( a, a );
1875
        }
1876
        return a;
1877
    }
1878
    if ( aExp <= 0x3FE ) {
1879
        if ( (bits64) ( a<<1 ) == 0 ) return a;
1880
        roundData->exception |= float_flag_inexact;
1881
        aSign = extractFloat64Sign( a );
1882
        switch ( roundData->mode ) {
1883
         case float_round_nearest_even:
1884
            if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
1885
                return packFloat64( aSign, 0x3FF, 0 );
1886
            }
1887
            break;
1888
         case float_round_down:
1889
            return aSign ? LIT64( 0xBFF0000000000000 ) : 0;
1890
         case float_round_up:
1891
            return
1892
            aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 );
1893
        }
1894
        return packFloat64( aSign, 0, 0 );
1895
    }
1896
    lastBitMask = 1;
1897
    lastBitMask <<= 0x433 - aExp;
1898
    roundBitsMask = lastBitMask - 1;
1899
    z = a;
1900
    roundingMode = roundData->mode;
1901
    if ( roundingMode == float_round_nearest_even ) {
1902
        z += lastBitMask>>1;
1903
        if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
1904
    }
1905
    else if ( roundingMode != float_round_to_zero ) {
1906
        if ( extractFloat64Sign( z ) ^ ( roundingMode == float_round_up ) ) {
1907
            z += roundBitsMask;
1908
        }
1909
    }
1910
    z &= ~ roundBitsMask;
1911
    if ( z != a ) roundData->exception |= float_flag_inexact;
1912
    return z;
1913
 
1914
}
1915
 
1916
/*
1917
-------------------------------------------------------------------------------
1918
Returns the result of adding the absolute values of the double-precision
1919
floating-point values `a' and `b'.  If `zSign' is true, the sum is negated
1920
before being returned.  `zSign' is ignored if the result is a NaN.  The
1921
addition is performed according to the IEC/IEEE Standard for Binary
1922
Floating-point Arithmetic.
1923
-------------------------------------------------------------------------------
1924
*/
1925
static float64 addFloat64Sigs( struct roundingData *roundData, float64 a, float64 b, flag zSign )
1926
{
1927
    int16 aExp, bExp, zExp;
1928
    bits64 aSig, bSig, zSig;
1929
    int16 expDiff;
1930
 
1931
    aSig = extractFloat64Frac( a );
1932
    aExp = extractFloat64Exp( a );
1933
    bSig = extractFloat64Frac( b );
1934
    bExp = extractFloat64Exp( b );
1935
    expDiff = aExp - bExp;
1936
    aSig <<= 9;
1937
    bSig <<= 9;
1938
    if ( 0 < expDiff ) {
1939
        if ( aExp == 0x7FF ) {
1940
            if ( aSig ) return propagateFloat64NaN( a, b );
1941
            return a;
1942
        }
1943
        if ( bExp == 0 ) {
1944
            --expDiff;
1945
        }
1946
        else {
1947
            bSig |= LIT64( 0x2000000000000000 );
1948
        }
1949
        shift64RightJamming( bSig, expDiff, &bSig );
1950
        zExp = aExp;
1951
    }
1952
    else if ( expDiff < 0 ) {
1953
        if ( bExp == 0x7FF ) {
1954
            if ( bSig ) return propagateFloat64NaN( a, b );
1955
            return packFloat64( zSign, 0x7FF, 0 );
1956
        }
1957
        if ( aExp == 0 ) {
1958
            ++expDiff;
1959
        }
1960
        else {
1961
            aSig |= LIT64( 0x2000000000000000 );
1962
        }
1963
        shift64RightJamming( aSig, - expDiff, &aSig );
1964
        zExp = bExp;
1965
    }
1966
    else {
1967
        if ( aExp == 0x7FF ) {
1968
            if ( aSig | bSig ) return propagateFloat64NaN( a, b );
1969
            return a;
1970
        }
1971
        if ( aExp == 0 ) return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
1972
        zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
1973
        zExp = aExp;
1974
        goto roundAndPack;
1975
    }
1976
    aSig |= LIT64( 0x2000000000000000 );
1977
    zSig = ( aSig + bSig )<<1;
1978
    --zExp;
1979
    if ( (sbits64) zSig < 0 ) {
1980
        zSig = aSig + bSig;
1981
        ++zExp;
1982
    }
1983
 roundAndPack:
1984
    return roundAndPackFloat64( roundData, zSign, zExp, zSig );
1985
 
1986
}
1987
 
1988
/*
1989
-------------------------------------------------------------------------------
1990
Returns the result of subtracting the absolute values of the double-
1991
precision floating-point values `a' and `b'.  If `zSign' is true, the
1992
difference is negated before being returned.  `zSign' is ignored if the
1993
result is a NaN.  The subtraction is performed according to the IEC/IEEE
1994
Standard for Binary Floating-point Arithmetic.
1995
-------------------------------------------------------------------------------
1996
*/
1997
static float64 subFloat64Sigs( struct roundingData *roundData, float64 a, float64 b, flag zSign )
1998
{
1999
    int16 aExp, bExp, zExp;
2000
    bits64 aSig, bSig, zSig;
2001
    int16 expDiff;
2002
 
2003
    aSig = extractFloat64Frac( a );
2004
    aExp = extractFloat64Exp( a );
2005
    bSig = extractFloat64Frac( b );
2006
    bExp = extractFloat64Exp( b );
2007
    expDiff = aExp - bExp;
2008
    aSig <<= 10;
2009
    bSig <<= 10;
2010
    if ( 0 < expDiff ) goto aExpBigger;
2011
    if ( expDiff < 0 ) goto bExpBigger;
2012
    if ( aExp == 0x7FF ) {
2013
        if ( aSig | bSig ) return propagateFloat64NaN( a, b );
2014
        roundData->exception |= float_flag_invalid;
2015
        return float64_default_nan;
2016
    }
2017
    if ( aExp == 0 ) {
2018
        aExp = 1;
2019
        bExp = 1;
2020
    }
2021
    if ( bSig < aSig ) goto aBigger;
2022
    if ( aSig < bSig ) goto bBigger;
2023
    return packFloat64( roundData->mode == float_round_down, 0, 0 );
2024
 bExpBigger:
2025
    if ( bExp == 0x7FF ) {
2026
        if ( bSig ) return propagateFloat64NaN( a, b );
2027
        return packFloat64( zSign ^ 1, 0x7FF, 0 );
2028
    }
2029
    if ( aExp == 0 ) {
2030
        ++expDiff;
2031
    }
2032
    else {
2033
        aSig |= LIT64( 0x4000000000000000 );
2034
    }
2035
    shift64RightJamming( aSig, - expDiff, &aSig );
2036
    bSig |= LIT64( 0x4000000000000000 );
2037
 bBigger:
2038
    zSig = bSig - aSig;
2039
    zExp = bExp;
2040
    zSign ^= 1;
2041
    goto normalizeRoundAndPack;
2042
 aExpBigger:
2043
    if ( aExp == 0x7FF ) {
2044
        if ( aSig ) return propagateFloat64NaN( a, b );
2045
        return a;
2046
    }
2047
    if ( bExp == 0 ) {
2048
        --expDiff;
2049
    }
2050
    else {
2051
        bSig |= LIT64( 0x4000000000000000 );
2052
    }
2053
    shift64RightJamming( bSig, expDiff, &bSig );
2054
    aSig |= LIT64( 0x4000000000000000 );
2055
 aBigger:
2056
    zSig = aSig - bSig;
2057
    zExp = aExp;
2058
 normalizeRoundAndPack:
2059
    --zExp;
2060
    return normalizeRoundAndPackFloat64( roundData, zSign, zExp, zSig );
2061
 
2062
}
2063
 
2064
/*
2065
-------------------------------------------------------------------------------
2066
Returns the result of adding the double-precision floating-point values `a'
2067
and `b'.  The operation is performed according to the IEC/IEEE Standard for
2068
Binary Floating-point Arithmetic.
2069
-------------------------------------------------------------------------------
2070
*/
2071
float64 float64_add( struct roundingData *roundData, float64 a, float64 b )
2072
{
2073
    flag aSign, bSign;
2074
 
2075
    aSign = extractFloat64Sign( a );
2076
    bSign = extractFloat64Sign( b );
2077
    if ( aSign == bSign ) {
2078
        return addFloat64Sigs( roundData, a, b, aSign );
2079
    }
2080
    else {
2081
        return subFloat64Sigs( roundData, a, b, aSign );
2082
    }
2083
 
2084
}
2085
 
2086
/*
2087
-------------------------------------------------------------------------------
2088
Returns the result of subtracting the double-precision floating-point values
2089
`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
2090
for Binary Floating-point Arithmetic.
2091
-------------------------------------------------------------------------------
2092
*/
2093
float64 float64_sub( struct roundingData *roundData, float64 a, float64 b )
2094
{
2095
    flag aSign, bSign;
2096
 
2097
    aSign = extractFloat64Sign( a );
2098
    bSign = extractFloat64Sign( b );
2099
    if ( aSign == bSign ) {
2100
        return subFloat64Sigs( roundData, a, b, aSign );
2101
    }
2102
    else {
2103
        return addFloat64Sigs( roundData, a, b, aSign );
2104
    }
2105
 
2106
}
2107
 
2108
/*
2109
-------------------------------------------------------------------------------
2110
Returns the result of multiplying the double-precision floating-point values
2111
`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
2112
for Binary Floating-point Arithmetic.
2113
-------------------------------------------------------------------------------
2114
*/
2115
float64 float64_mul( struct roundingData *roundData, float64 a, float64 b )
2116
{
2117
    flag aSign, bSign, zSign;
2118
    int16 aExp, bExp, zExp;
2119
    bits64 aSig, bSig, zSig0, zSig1;
2120
 
2121
    aSig = extractFloat64Frac( a );
2122
    aExp = extractFloat64Exp( a );
2123
    aSign = extractFloat64Sign( a );
2124
    bSig = extractFloat64Frac( b );
2125
    bExp = extractFloat64Exp( b );
2126
    bSign = extractFloat64Sign( b );
2127
    zSign = aSign ^ bSign;
2128
    if ( aExp == 0x7FF ) {
2129
        if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
2130
            return propagateFloat64NaN( a, b );
2131
        }
2132
        if ( ( bExp | bSig ) == 0 ) {
2133
            roundData->exception |= float_flag_invalid;
2134
            return float64_default_nan;
2135
        }
2136
        return packFloat64( zSign, 0x7FF, 0 );
2137
    }
2138
    if ( bExp == 0x7FF ) {
2139
        if ( bSig ) return propagateFloat64NaN( a, b );
2140
        if ( ( aExp | aSig ) == 0 ) {
2141
            roundData->exception |= float_flag_invalid;
2142
            return float64_default_nan;
2143
        }
2144
        return packFloat64( zSign, 0x7FF, 0 );
2145
    }
2146
    if ( aExp == 0 ) {
2147
        if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
2148
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2149
    }
2150
    if ( bExp == 0 ) {
2151
        if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
2152
        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
2153
    }
2154
    zExp = aExp + bExp - 0x3FF;
2155
    aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
2156
    bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
2157
    mul64To128( aSig, bSig, &zSig0, &zSig1 );
2158
    zSig0 |= ( zSig1 != 0 );
2159
    if ( 0 <= (sbits64) ( zSig0<<1 ) ) {
2160
        zSig0 <<= 1;
2161
        --zExp;
2162
    }
2163
    return roundAndPackFloat64( roundData, zSign, zExp, zSig0 );
2164
 
2165
}
2166
 
2167
/*
2168
-------------------------------------------------------------------------------
2169
Returns the result of dividing the double-precision floating-point value `a'
2170
by the corresponding value `b'.  The operation is performed according to
2171
the IEC/IEEE Standard for Binary Floating-point Arithmetic.
2172
-------------------------------------------------------------------------------
2173
*/
2174
float64 float64_div( struct roundingData *roundData, float64 a, float64 b )
2175
{
2176
    flag aSign, bSign, zSign;
2177
    int16 aExp, bExp, zExp;
2178
    bits64 aSig, bSig, zSig;
2179
    bits64 rem0, rem1;
2180
    bits64 term0, term1;
2181
 
2182
    aSig = extractFloat64Frac( a );
2183
    aExp = extractFloat64Exp( a );
2184
    aSign = extractFloat64Sign( a );
2185
    bSig = extractFloat64Frac( b );
2186
    bExp = extractFloat64Exp( b );
2187
    bSign = extractFloat64Sign( b );
2188
    zSign = aSign ^ bSign;
2189
    if ( aExp == 0x7FF ) {
2190
        if ( aSig ) return propagateFloat64NaN( a, b );
2191
        if ( bExp == 0x7FF ) {
2192
            if ( bSig ) return propagateFloat64NaN( a, b );
2193
            roundData->exception |= float_flag_invalid;
2194
            return float64_default_nan;
2195
        }
2196
        return packFloat64( zSign, 0x7FF, 0 );
2197
    }
2198
    if ( bExp == 0x7FF ) {
2199
        if ( bSig ) return propagateFloat64NaN( a, b );
2200
        return packFloat64( zSign, 0, 0 );
2201
    }
2202
    if ( bExp == 0 ) {
2203
        if ( bSig == 0 ) {
2204
            if ( ( aExp | aSig ) == 0 ) {
2205
                roundData->exception |= float_flag_invalid;
2206
                return float64_default_nan;
2207
            }
2208
            roundData->exception |= float_flag_divbyzero;
2209
            return packFloat64( zSign, 0x7FF, 0 );
2210
        }
2211
        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
2212
    }
2213
    if ( aExp == 0 ) {
2214
        if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
2215
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2216
    }
2217
    zExp = aExp - bExp + 0x3FD;
2218
    aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
2219
    bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
2220
    if ( bSig <= ( aSig + aSig ) ) {
2221
        aSig >>= 1;
2222
        ++zExp;
2223
    }
2224
    zSig = estimateDiv128To64( aSig, 0, bSig );
2225
    if ( ( zSig & 0x1FF ) <= 2 ) {
2226
        mul64To128( bSig, zSig, &term0, &term1 );
2227
        sub128( aSig, 0, term0, term1, &rem0, &rem1 );
2228
        while ( (sbits64) rem0 < 0 ) {
2229
            --zSig;
2230
            add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
2231
        }
2232
        zSig |= ( rem1 != 0 );
2233
    }
2234
    return roundAndPackFloat64( roundData, zSign, zExp, zSig );
2235
 
2236
}
2237
 
2238
/*
2239
-------------------------------------------------------------------------------
2240
Returns the remainder of the double-precision floating-point value `a'
2241
with respect to the corresponding value `b'.  The operation is performed
2242
according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
2243
-------------------------------------------------------------------------------
2244
*/
2245
float64 float64_rem( struct roundingData *roundData, float64 a, float64 b )
2246
{
2247
    flag aSign, bSign, zSign;
2248
    int16 aExp, bExp, expDiff;
2249
    bits64 aSig, bSig;
2250
    bits64 q, alternateASig;
2251
    sbits64 sigMean;
2252
 
2253
    aSig = extractFloat64Frac( a );
2254
    aExp = extractFloat64Exp( a );
2255
    aSign = extractFloat64Sign( a );
2256
    bSig = extractFloat64Frac( b );
2257
    bExp = extractFloat64Exp( b );
2258
    bSign = extractFloat64Sign( b );
2259
    if ( aExp == 0x7FF ) {
2260
        if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
2261
            return propagateFloat64NaN( a, b );
2262
        }
2263
        roundData->exception |= float_flag_invalid;
2264
        return float64_default_nan;
2265
    }
2266
    if ( bExp == 0x7FF ) {
2267
        if ( bSig ) return propagateFloat64NaN( a, b );
2268
        return a;
2269
    }
2270
    if ( bExp == 0 ) {
2271
        if ( bSig == 0 ) {
2272
            roundData->exception |= float_flag_invalid;
2273
            return float64_default_nan;
2274
        }
2275
        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
2276
    }
2277
    if ( aExp == 0 ) {
2278
        if ( aSig == 0 ) return a;
2279
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2280
    }
2281
    expDiff = aExp - bExp;
2282
    aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
2283
    bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
2284
    if ( expDiff < 0 ) {
2285
        if ( expDiff < -1 ) return a;
2286
        aSig >>= 1;
2287
    }
2288
    q = ( bSig <= aSig );
2289
    if ( q ) aSig -= bSig;
2290
    expDiff -= 64;
2291
    while ( 0 < expDiff ) {
2292
        q = estimateDiv128To64( aSig, 0, bSig );
2293
        q = ( 2 < q ) ? q - 2 : 0;
2294
        aSig = - ( ( bSig>>2 ) * q );
2295
        expDiff -= 62;
2296
    }
2297
    expDiff += 64;
2298
    if ( 0 < expDiff ) {
2299
        q = estimateDiv128To64( aSig, 0, bSig );
2300
        q = ( 2 < q ) ? q - 2 : 0;
2301
        q >>= 64 - expDiff;
2302
        bSig >>= 2;
2303
        aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
2304
    }
2305
    else {
2306
        aSig >>= 2;
2307
        bSig >>= 2;
2308
    }
2309
    do {
2310
        alternateASig = aSig;
2311
        ++q;
2312
        aSig -= bSig;
2313
    } while ( 0 <= (sbits64) aSig );
2314
    sigMean = aSig + alternateASig;
2315
    if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
2316
        aSig = alternateASig;
2317
    }
2318
    zSign = ( (sbits64) aSig < 0 );
2319
    if ( zSign ) aSig = - aSig;
2320
    return normalizeRoundAndPackFloat64( roundData, aSign ^ zSign, bExp, aSig );
2321
 
2322
}
2323
 
2324
/*
2325
-------------------------------------------------------------------------------
2326
Returns the square root of the double-precision floating-point value `a'.
2327
The operation is performed according to the IEC/IEEE Standard for Binary
2328
Floating-point Arithmetic.
2329
-------------------------------------------------------------------------------
2330
*/
2331
float64 float64_sqrt( struct roundingData *roundData, float64 a )
2332
{
2333
    flag aSign;
2334
    int16 aExp, zExp;
2335
    bits64 aSig, zSig;
2336
    bits64 rem0, rem1, term0, term1; //, shiftedRem;
2337
    //float64 z;
2338
 
2339
    aSig = extractFloat64Frac( a );
2340
    aExp = extractFloat64Exp( a );
2341
    aSign = extractFloat64Sign( a );
2342
    if ( aExp == 0x7FF ) {
2343
        if ( aSig ) return propagateFloat64NaN( a, a );
2344
        if ( ! aSign ) return a;
2345
        roundData->exception |= float_flag_invalid;
2346
        return float64_default_nan;
2347
    }
2348
    if ( aSign ) {
2349
        if ( ( aExp | aSig ) == 0 ) return a;
2350
        roundData->exception |= float_flag_invalid;
2351
        return float64_default_nan;
2352
    }
2353
    if ( aExp == 0 ) {
2354
        if ( aSig == 0 ) return 0;
2355
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2356
    }
2357
    zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
2358
    aSig |= LIT64( 0x0010000000000000 );
2359
    zSig = estimateSqrt32( aExp, aSig>>21 );
2360
    zSig <<= 31;
2361
    aSig <<= 9 - ( aExp & 1 );
2362
    zSig = estimateDiv128To64( aSig, 0, zSig ) + zSig + 2;
2363
    if ( ( zSig & 0x3FF ) <= 5 ) {
2364
        if ( zSig < 2 ) {
2365
            zSig = LIT64( 0xFFFFFFFFFFFFFFFF );
2366
        }
2367
        else {
2368
            aSig <<= 2;
2369
            mul64To128( zSig, zSig, &term0, &term1 );
2370
            sub128( aSig, 0, term0, term1, &rem0, &rem1 );
2371
            while ( (sbits64) rem0 < 0 ) {
2372
                --zSig;
2373
                shortShift128Left( 0, zSig, 1, &term0, &term1 );
2374
                term1 |= 1;
2375
                add128( rem0, rem1, term0, term1, &rem0, &rem1 );
2376
            }
2377
            zSig |= ( ( rem0 | rem1 ) != 0 );
2378
        }
2379
    }
2380
    shift64RightJamming( zSig, 1, &zSig );
2381
    return roundAndPackFloat64( roundData, 0, zExp, zSig );
2382
 
2383
}
2384
 
2385
/*
2386
-------------------------------------------------------------------------------
2387
Returns 1 if the double-precision floating-point value `a' is equal to the
2388
corresponding value `b', and 0 otherwise.  The comparison is performed
2389
according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
2390
-------------------------------------------------------------------------------
2391
*/
2392
flag float64_eq( float64 a, float64 b )
2393
{
2394
 
2395
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
2396
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
2397
       ) {
2398
        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
2399
            float_raise( float_flag_invalid );
2400
        }
2401
        return 0;
2402
    }
2403
    return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
2404
 
2405
}
2406
 
2407
/*
2408
-------------------------------------------------------------------------------
2409
Returns 1 if the double-precision floating-point value `a' is less than or
2410
equal to the corresponding value `b', and 0 otherwise.  The comparison is
2411
performed according to the IEC/IEEE Standard for Binary Floating-point
2412
Arithmetic.
2413
-------------------------------------------------------------------------------
2414
*/
2415
flag float64_le( float64 a, float64 b )
2416
{
2417
    flag aSign, bSign;
2418
 
2419
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
2420
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
2421
       ) {
2422
        float_raise( float_flag_invalid );
2423
        return 0;
2424
    }
2425
    aSign = extractFloat64Sign( a );
2426
    bSign = extractFloat64Sign( b );
2427
    if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
2428
    return ( a == b ) || ( aSign ^ ( a < b ) );
2429
 
2430
}
2431
 
2432
/*
2433
-------------------------------------------------------------------------------
2434
Returns 1 if the double-precision floating-point value `a' is less than
2435
the corresponding value `b', and 0 otherwise.  The comparison is performed
2436
according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
2437
-------------------------------------------------------------------------------
2438
*/
2439
flag float64_lt( float64 a, float64 b )
2440
{
2441
    flag aSign, bSign;
2442
 
2443
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
2444
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
2445
       ) {
2446
        float_raise( float_flag_invalid );
2447
        return 0;
2448
    }
2449
    aSign = extractFloat64Sign( a );
2450
    bSign = extractFloat64Sign( b );
2451
    if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
2452
    return ( a != b ) && ( aSign ^ ( a < b ) );
2453
 
2454
}
2455
 
2456
/*
2457
-------------------------------------------------------------------------------
2458
Returns 1 if the double-precision floating-point value `a' is equal to the
2459
corresponding value `b', and 0 otherwise.  The invalid exception is raised
2460
if either operand is a NaN.  Otherwise, the comparison is performed
2461
according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
2462
-------------------------------------------------------------------------------
2463
*/
2464
flag float64_eq_signaling( float64 a, float64 b )
2465
{
2466
 
2467
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
2468
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
2469
       ) {
2470
        float_raise( float_flag_invalid );
2471
        return 0;
2472
    }
2473
    return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
2474
 
2475
}
2476
 
2477
/*
2478
-------------------------------------------------------------------------------
2479
Returns 1 if the double-precision floating-point value `a' is less than or
2480
equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
2481
cause an exception.  Otherwise, the comparison is performed according to the
2482
IEC/IEEE Standard for Binary Floating-point Arithmetic.
2483
-------------------------------------------------------------------------------
2484
*/
2485
flag float64_le_quiet( float64 a, float64 b )
2486
{
2487
    flag aSign, bSign;
2488
    //int16 aExp, bExp;
2489
 
2490
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
2491
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
2492
       ) {
2493
        /* Do nothing, even if NaN as we're quiet */
2494
        return 0;
2495
    }
2496
    aSign = extractFloat64Sign( a );
2497
    bSign = extractFloat64Sign( b );
2498
    if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
2499
    return ( a == b ) || ( aSign ^ ( a < b ) );
2500
 
2501
}
2502
 
2503
/*
2504
-------------------------------------------------------------------------------
2505
Returns 1 if the double-precision floating-point value `a' is less than
2506
the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
2507
exception.  Otherwise, the comparison is performed according to the IEC/IEEE
2508
Standard for Binary Floating-point Arithmetic.
2509
-------------------------------------------------------------------------------
2510
*/
2511
flag float64_lt_quiet( float64 a, float64 b )
2512
{
2513
    flag aSign, bSign;
2514
 
2515
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
2516
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
2517
       ) {
2518
        /* Do nothing, even if NaN as we're quiet */
2519
        return 0;
2520
    }
2521
    aSign = extractFloat64Sign( a );
2522
    bSign = extractFloat64Sign( b );
2523
    if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
2524
    return ( a != b ) && ( aSign ^ ( a < b ) );
2525
 
2526
}
2527
 
2528
#ifdef FLOATX80
2529
 
2530
/*
2531
-------------------------------------------------------------------------------
2532
Returns the result of converting the extended double-precision floating-
2533
point value `a' to the 32-bit two's complement integer format.  The
2534
conversion is performed according to the IEC/IEEE Standard for Binary
2535
Floating-point Arithmetic---which means in particular that the conversion
2536
is rounded according to the current rounding mode.  If `a' is a NaN, the
2537
largest positive integer is returned.  Otherwise, if the conversion
2538
overflows, the largest integer with the same sign as `a' is returned.
2539
-------------------------------------------------------------------------------
2540
*/
2541
int32 floatx80_to_int32( struct roundingData *roundData, floatx80 a )
2542
{
2543
    flag aSign;
2544
    int32 aExp, shiftCount;
2545
    bits64 aSig;
2546
 
2547
    aSig = extractFloatx80Frac( a );
2548
    aExp = extractFloatx80Exp( a );
2549
    aSign = extractFloatx80Sign( a );
2550
    if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
2551
    shiftCount = 0x4037 - aExp;
2552
    if ( shiftCount <= 0 ) shiftCount = 1;
2553
    shift64RightJamming( aSig, shiftCount, &aSig );
2554
    return roundAndPackInt32( roundData, aSign, aSig );
2555
 
2556
}
2557
 
2558
/*
2559
-------------------------------------------------------------------------------
2560
Returns the result of converting the extended double-precision floating-
2561
point value `a' to the 32-bit two's complement integer format.  The
2562
conversion is performed according to the IEC/IEEE Standard for Binary
2563
Floating-point Arithmetic, except that the conversion is always rounded
2564
toward zero.  If `a' is a NaN, the largest positive integer is returned.
2565
Otherwise, if the conversion overflows, the largest integer with the same
2566
sign as `a' is returned.
2567
-------------------------------------------------------------------------------
2568
*/
2569
int32 floatx80_to_int32_round_to_zero( floatx80 a )
2570
{
2571
    flag aSign;
2572
    int32 aExp, shiftCount;
2573
    bits64 aSig, savedASig;
2574
    int32 z;
2575
 
2576
    aSig = extractFloatx80Frac( a );
2577
    aExp = extractFloatx80Exp( a );
2578
    aSign = extractFloatx80Sign( a );
2579
    shiftCount = 0x403E - aExp;
2580
    if ( shiftCount < 32 ) {
2581
        if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
2582
        goto invalid;
2583
    }
2584
    else if ( 63 < shiftCount ) {
2585
        if ( aExp || aSig ) float_raise( float_flag_inexact );
2586
        return 0;
2587
    }
2588
    savedASig = aSig;
2589
    aSig >>= shiftCount;
2590
    z = aSig;
2591
    if ( aSign ) z = - z;
2592
    if ( ( z < 0 ) ^ aSign ) {
2593
 invalid:
2594
        float_raise( float_flag_invalid );
2595
        return aSign ? 0x80000000 : 0x7FFFFFFF;
2596
    }
2597
    if ( ( aSig<<shiftCount ) != savedASig ) {
2598
        float_raise( float_flag_inexact );
2599
    }
2600
    return z;
2601
 
2602
}
2603
 
2604
/*
2605
-------------------------------------------------------------------------------
2606
Returns the result of converting the extended double-precision floating-
2607
point value `a' to the single-precision floating-point format.  The
2608
conversion is performed according to the IEC/IEEE Standard for Binary
2609
Floating-point Arithmetic.
2610
-------------------------------------------------------------------------------
2611
*/
2612
float32 floatx80_to_float32( struct roundingData *roundData, floatx80 a )
2613
{
2614
    flag aSign;
2615
    int32 aExp;
2616
    bits64 aSig;
2617
 
2618
    aSig = extractFloatx80Frac( a );
2619
    aExp = extractFloatx80Exp( a );
2620
    aSign = extractFloatx80Sign( a );
2621
    if ( aExp == 0x7FFF ) {
2622
        if ( (bits64) ( aSig<<1 ) ) {
2623
            return commonNaNToFloat32( floatx80ToCommonNaN( a ) );
2624
        }
2625
        return packFloat32( aSign, 0xFF, 0 );
2626
    }
2627
    shift64RightJamming( aSig, 33, &aSig );
2628
    if ( aExp || aSig ) aExp -= 0x3F81;
2629
    return roundAndPackFloat32( roundData, aSign, aExp, aSig );
2630
 
2631
}
2632
 
2633
/*
2634
-------------------------------------------------------------------------------
2635
Returns the result of converting the extended double-precision floating-
2636
point value `a' to the double-precision floating-point format.  The
2637
conversion is performed according to the IEC/IEEE Standard for Binary
2638
Floating-point Arithmetic.
2639
-------------------------------------------------------------------------------
2640
*/
2641
float64 floatx80_to_float64( struct roundingData *roundData, floatx80 a )
2642
{
2643
    flag aSign;
2644
    int32 aExp;
2645
    bits64 aSig, zSig;
2646
 
2647
    aSig = extractFloatx80Frac( a );
2648
    aExp = extractFloatx80Exp( a );
2649
    aSign = extractFloatx80Sign( a );
2650
    if ( aExp == 0x7FFF ) {
2651
        if ( (bits64) ( aSig<<1 ) ) {
2652
            return commonNaNToFloat64( floatx80ToCommonNaN( a ) );
2653
        }
2654
        return packFloat64( aSign, 0x7FF, 0 );
2655
    }
2656
    shift64RightJamming( aSig, 1, &zSig );
2657
    if ( aExp || aSig ) aExp -= 0x3C01;
2658
    return roundAndPackFloat64( roundData, aSign, aExp, zSig );
2659
 
2660
}
2661
 
2662
/*
2663
-------------------------------------------------------------------------------
2664
Rounds the extended double-precision floating-point value `a' to an integer,
2665
and returns the result as an extended quadruple-precision floating-point
2666
value.  The operation is performed according to the IEC/IEEE Standard for
2667
Binary Floating-point Arithmetic.
2668
-------------------------------------------------------------------------------
2669
*/
2670
floatx80 floatx80_round_to_int( struct roundingData *roundData, floatx80 a )
2671
{
2672
    flag aSign;
2673
    int32 aExp;
2674
    bits64 lastBitMask, roundBitsMask;
2675
    int8 roundingMode;
2676
    floatx80 z;
2677
 
2678
    aExp = extractFloatx80Exp( a );
2679
    if ( 0x403E <= aExp ) {
2680
        if ( ( aExp == 0x7FFF ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) {
2681
            return propagateFloatx80NaN( a, a );
2682
        }
2683
        return a;
2684
    }
2685
    if ( aExp <= 0x3FFE ) {
2686
        if (    ( aExp == 0 )
2687
             && ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
2688
            return a;
2689
        }
2690
        roundData->exception |= float_flag_inexact;
2691
        aSign = extractFloatx80Sign( a );
2692
        switch ( roundData->mode ) {
2693
         case float_round_nearest_even:
2694
            if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 )
2695
               ) {
2696
                return
2697
                    packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
2698
            }
2699
            break;
2700
         case float_round_down:
2701
            return
2702
                  aSign ?
2703
                      packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
2704
                : packFloatx80( 0, 0, 0 );
2705
         case float_round_up:
2706
            return
2707
                  aSign ? packFloatx80( 1, 0, 0 )
2708
                : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
2709
        }
2710
        return packFloatx80( aSign, 0, 0 );
2711
    }
2712
    lastBitMask = 1;
2713
    lastBitMask <<= 0x403E - aExp;
2714
    roundBitsMask = lastBitMask - 1;
2715
    z = a;
2716
    roundingMode = roundData->mode;
2717
    if ( roundingMode == float_round_nearest_even ) {
2718
        z.low += lastBitMask>>1;
2719
        if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
2720
    }
2721
    else if ( roundingMode != float_round_to_zero ) {
2722
        if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {
2723
            z.low += roundBitsMask;
2724
        }
2725
    }
2726
    z.low &= ~ roundBitsMask;
2727
    if ( z.low == 0 ) {
2728
        ++z.high;
2729
        z.low = LIT64( 0x8000000000000000 );
2730
    }
2731
    if ( z.low != a.low ) roundData->exception |= float_flag_inexact;
2732
    return z;
2733
 
2734
}
2735
 
2736
/*
2737
-------------------------------------------------------------------------------
2738
Returns the result of adding the absolute values of the extended double-
2739
precision floating-point values `a' and `b'.  If `zSign' is true, the sum is
2740
negated before being returned.  `zSign' is ignored if the result is a NaN.
2741
The addition is performed according to the IEC/IEEE Standard for Binary
2742
Floating-point Arithmetic.
2743
-------------------------------------------------------------------------------
2744
*/
2745
static floatx80 addFloatx80Sigs( struct roundingData *roundData, floatx80 a, floatx80 b, flag zSign )
2746
{
2747
    int32 aExp, bExp, zExp;
2748
    bits64 aSig, bSig, zSig0, zSig1;
2749
    int32 expDiff;
2750
 
2751
    aSig = extractFloatx80Frac( a );
2752
    aExp = extractFloatx80Exp( a );
2753
    bSig = extractFloatx80Frac( b );
2754
    bExp = extractFloatx80Exp( b );
2755
    expDiff = aExp - bExp;
2756
    if ( 0 < expDiff ) {
2757
        if ( aExp == 0x7FFF ) {
2758
            if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
2759
            return a;
2760
        }
2761
        if ( bExp == 0 ) --expDiff;
2762
        shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
2763
        zExp = aExp;
2764
    }
2765
    else if ( expDiff < 0 ) {
2766
        if ( bExp == 0x7FFF ) {
2767
            if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
2768
            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
2769
        }
2770
        if ( aExp == 0 ) ++expDiff;
2771
        shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
2772
        zExp = bExp;
2773
    }
2774
    else {
2775
        if ( aExp == 0x7FFF ) {
2776
            if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
2777
                return propagateFloatx80NaN( a, b );
2778
            }
2779
            return a;
2780
        }
2781
        zSig1 = 0;
2782
        zSig0 = aSig + bSig;
2783
        if ( aExp == 0 ) {
2784
            normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
2785
            goto roundAndPack;
2786
        }
2787
        zExp = aExp;
2788
        goto shiftRight1;
2789
    }
2790
 
2791
    zSig0 = aSig + bSig;
2792
 
2793
    if ( (sbits64) zSig0 < 0 ) goto roundAndPack;
2794
 shiftRight1:
2795
    shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
2796
    zSig0 |= LIT64( 0x8000000000000000 );
2797
    ++zExp;
2798
 roundAndPack:
2799
    return
2800
        roundAndPackFloatx80(
2801
            roundData, zSign, zExp, zSig0, zSig1 );
2802
 
2803
}
2804
 
2805
/*
2806
-------------------------------------------------------------------------------
2807
Returns the result of subtracting the absolute values of the extended
2808
double-precision floating-point values `a' and `b'.  If `zSign' is true,
2809
the difference is negated before being returned.  `zSign' is ignored if the
2810
result is a NaN.  The subtraction is performed according to the IEC/IEEE
2811
Standard for Binary Floating-point Arithmetic.
2812
-------------------------------------------------------------------------------
2813
*/
2814
static floatx80 subFloatx80Sigs( struct roundingData *roundData, floatx80 a, floatx80 b, flag zSign )
2815
{
2816
    int32 aExp, bExp, zExp;
2817
    bits64 aSig, bSig, zSig0, zSig1;
2818
    int32 expDiff;
2819
    floatx80 z;
2820
 
2821
    aSig = extractFloatx80Frac( a );
2822
    aExp = extractFloatx80Exp( a );
2823
    bSig = extractFloatx80Frac( b );
2824
    bExp = extractFloatx80Exp( b );
2825
    expDiff = aExp - bExp;
2826
    if ( 0 < expDiff ) goto aExpBigger;
2827
    if ( expDiff < 0 ) goto bExpBigger;
2828
    if ( aExp == 0x7FFF ) {
2829
        if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
2830
            return propagateFloatx80NaN( a, b );
2831
        }
2832
        roundData->exception |= float_flag_invalid;
2833
        z.low = floatx80_default_nan_low;
2834
        z.high = floatx80_default_nan_high;
2835
        z.__padding = 0;
2836
        return z;
2837
    }
2838
    if ( aExp == 0 ) {
2839
        aExp = 1;
2840
        bExp = 1;
2841
    }
2842
    zSig1 = 0;
2843
    if ( bSig < aSig ) goto aBigger;
2844
    if ( aSig < bSig ) goto bBigger;
2845
    return packFloatx80( roundData->mode == float_round_down, 0, 0 );
2846
 bExpBigger:
2847
    if ( bExp == 0x7FFF ) {
2848
        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
2849
        return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
2850
    }
2851
    if ( aExp == 0 ) ++expDiff;
2852
    shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
2853
 bBigger:
2854
    sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
2855
    zExp = bExp;
2856
    zSign ^= 1;
2857
    goto normalizeRoundAndPack;
2858
 aExpBigger:
2859
    if ( aExp == 0x7FFF ) {
2860
        if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
2861
        return a;
2862
    }
2863
    if ( bExp == 0 ) --expDiff;
2864
    shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
2865
 aBigger:
2866
    sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
2867
    zExp = aExp;
2868
 normalizeRoundAndPack:
2869
    return
2870
        normalizeRoundAndPackFloatx80(
2871
            roundData, zSign, zExp, zSig0, zSig1 );
2872
 
2873
}
2874
 
2875
/*
2876
-------------------------------------------------------------------------------
2877
Returns the result of adding the extended double-precision floating-point
2878
values `a' and `b'.  The operation is performed according to the IEC/IEEE
2879
Standard for Binary Floating-point Arithmetic.
2880
-------------------------------------------------------------------------------
2881
*/
2882
floatx80 floatx80_add( struct roundingData *roundData, floatx80 a, floatx80 b )
2883
{
2884
    flag aSign, bSign;
2885
 
2886
    aSign = extractFloatx80Sign( a );
2887
    bSign = extractFloatx80Sign( b );
2888
    if ( aSign == bSign ) {
2889
        return addFloatx80Sigs( roundData, a, b, aSign );
2890
    }
2891
    else {
2892
        return subFloatx80Sigs( roundData, a, b, aSign );
2893
    }
2894
 
2895
}
2896
 
2897
/*
2898
-------------------------------------------------------------------------------
2899
Returns the result of subtracting the extended double-precision floating-
2900
point values `a' and `b'.  The operation is performed according to the
2901
IEC/IEEE Standard for Binary Floating-point Arithmetic.
2902
-------------------------------------------------------------------------------
2903
*/
2904
floatx80 floatx80_sub( struct roundingData *roundData, floatx80 a, floatx80 b )
2905
{
2906
    flag aSign, bSign;
2907
 
2908
    aSign = extractFloatx80Sign( a );
2909
    bSign = extractFloatx80Sign( b );
2910
    if ( aSign == bSign ) {
2911
        return subFloatx80Sigs( roundData, a, b, aSign );
2912
    }
2913
    else {
2914
        return addFloatx80Sigs( roundData, a, b, aSign );
2915
    }
2916
 
2917
}
2918
 
2919
/*
2920
-------------------------------------------------------------------------------
2921
Returns the result of multiplying the extended double-precision floating-
2922
point values `a' and `b'.  The operation is performed according to the
2923
IEC/IEEE Standard for Binary Floating-point Arithmetic.
2924
-------------------------------------------------------------------------------
2925
*/
2926
floatx80 floatx80_mul( struct roundingData *roundData, floatx80 a, floatx80 b )
2927
{
2928
    flag aSign, bSign, zSign;
2929
    int32 aExp, bExp, zExp;
2930
    bits64 aSig, bSig, zSig0, zSig1;
2931
    floatx80 z;
2932
 
2933
    aSig = extractFloatx80Frac( a );
2934
    aExp = extractFloatx80Exp( a );
2935
    aSign = extractFloatx80Sign( a );
2936
    bSig = extractFloatx80Frac( b );
2937
    bExp = extractFloatx80Exp( b );
2938
    bSign = extractFloatx80Sign( b );
2939
    zSign = aSign ^ bSign;
2940
    if ( aExp == 0x7FFF ) {
2941
        if (    (bits64) ( aSig<<1 )
2942
             || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
2943
            return propagateFloatx80NaN( a, b );
2944
        }
2945
        if ( ( bExp | bSig ) == 0 ) goto invalid;
2946
        return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
2947
    }
2948
    if ( bExp == 0x7FFF ) {
2949
        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
2950
        if ( ( aExp | aSig ) == 0 ) {
2951
 invalid:
2952
            roundData->exception |= float_flag_invalid;
2953
            z.low = floatx80_default_nan_low;
2954
            z.high = floatx80_default_nan_high;
2955
            z.__padding = 0;
2956
            return z;
2957
        }
2958
        return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
2959
    }
2960
    if ( aExp == 0 ) {
2961
        if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
2962
        normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
2963
    }
2964
    if ( bExp == 0 ) {
2965
        if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
2966
        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
2967
    }
2968
    zExp = aExp + bExp - 0x3FFE;
2969
    mul64To128( aSig, bSig, &zSig0, &zSig1 );
2970
    if ( 0 < (sbits64) zSig0 ) {
2971
        shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
2972
        --zExp;
2973
    }
2974
    return
2975
        roundAndPackFloatx80(
2976
            roundData, zSign, zExp, zSig0, zSig1 );
2977
 
2978
}
2979
 
2980
/*
2981
-------------------------------------------------------------------------------
2982
Returns the result of dividing the extended double-precision floating-point
2983
value `a' by the corresponding value `b'.  The operation is performed
2984
according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
2985
-------------------------------------------------------------------------------
2986
*/
2987
floatx80 floatx80_div( struct roundingData *roundData, floatx80 a, floatx80 b )
2988
{
2989
    flag aSign, bSign, zSign;
2990
    int32 aExp, bExp, zExp;
2991
    bits64 aSig, bSig, zSig0, zSig1;
2992
    bits64 rem0, rem1, rem2, term0, term1, term2;
2993
    floatx80 z;
2994
 
2995
    aSig = extractFloatx80Frac( a );
2996
    aExp = extractFloatx80Exp( a );
2997
    aSign = extractFloatx80Sign( a );
2998
    bSig = extractFloatx80Frac( b );
2999
    bExp = extractFloatx80Exp( b );
3000
    bSign = extractFloatx80Sign( b );
3001
    zSign = aSign ^ bSign;
3002
    if ( aExp == 0x7FFF ) {
3003
        if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
3004
        if ( bExp == 0x7FFF ) {
3005
            if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3006
            goto invalid;
3007
        }
3008
        return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3009
    }
3010
    if ( bExp == 0x7FFF ) {
3011
        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3012
        return packFloatx80( zSign, 0, 0 );
3013
    }
3014
    if ( bExp == 0 ) {
3015
        if ( bSig == 0 ) {
3016
            if ( ( aExp | aSig ) == 0 ) {
3017
 invalid:
3018
                roundData->exception |= float_flag_invalid;
3019
                z.low = floatx80_default_nan_low;
3020
                z.high = floatx80_default_nan_high;
3021
                z.__padding = 0;
3022
                return z;
3023
            }
3024
            roundData->exception |= float_flag_divbyzero;
3025
            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3026
        }
3027
        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
3028
    }
3029
    if ( aExp == 0 ) {
3030
        if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
3031
        normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
3032
    }
3033
    zExp = aExp - bExp + 0x3FFE;
3034
    rem1 = 0;
3035
    if ( bSig <= aSig ) {
3036
        shift128Right( aSig, 0, 1, &aSig, &rem1 );
3037
        ++zExp;
3038
    }
3039
    zSig0 = estimateDiv128To64( aSig, rem1, bSig );
3040
    mul64To128( bSig, zSig0, &term0, &term1 );
3041
    sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
3042
    while ( (sbits64) rem0 < 0 ) {
3043
        --zSig0;
3044
        add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
3045
    }
3046
    zSig1 = estimateDiv128To64( rem1, 0, bSig );
3047
    if ( (bits64) ( zSig1<<1 ) <= 8 ) {
3048
        mul64To128( bSig, zSig1, &term1, &term2 );
3049
        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
3050
        while ( (sbits64) rem1 < 0 ) {
3051
            --zSig1;
3052
            add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
3053
        }
3054
        zSig1 |= ( ( rem1 | rem2 ) != 0 );
3055
    }
3056
    return
3057
        roundAndPackFloatx80(
3058
            roundData, zSign, zExp, zSig0, zSig1 );
3059
 
3060
}
3061
 
3062
/*
3063
-------------------------------------------------------------------------------
3064
Returns the remainder of the extended double-precision floating-point value
3065
`a' with respect to the corresponding value `b'.  The operation is performed
3066
according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
3067
-------------------------------------------------------------------------------
3068
*/
3069
floatx80 floatx80_rem( struct roundingData *roundData, floatx80 a, floatx80 b )
3070
{
3071
    flag aSign, bSign, zSign;
3072
    int32 aExp, bExp, expDiff;
3073
    bits64 aSig0, aSig1, bSig;
3074
    bits64 q, term0, term1, alternateASig0, alternateASig1;
3075
    floatx80 z;
3076
 
3077
    aSig0 = extractFloatx80Frac( a );
3078
    aExp = extractFloatx80Exp( a );
3079
    aSign = extractFloatx80Sign( a );
3080
    bSig = extractFloatx80Frac( b );
3081
    bExp = extractFloatx80Exp( b );
3082
    bSign = extractFloatx80Sign( b );
3083
    if ( aExp == 0x7FFF ) {
3084
        if (    (bits64) ( aSig0<<1 )
3085
             || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
3086
            return propagateFloatx80NaN( a, b );
3087
        }
3088
        goto invalid;
3089
    }
3090
    if ( bExp == 0x7FFF ) {
3091
        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3092
        return a;
3093
    }
3094
    if ( bExp == 0 ) {
3095
        if ( bSig == 0 ) {
3096
 invalid:
3097
            roundData->exception |= float_flag_invalid;
3098
            z.low = floatx80_default_nan_low;
3099
            z.high = floatx80_default_nan_high;
3100
            z.__padding = 0;
3101
            return z;
3102
        }
3103
        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
3104
    }
3105
    if ( aExp == 0 ) {
3106
        if ( (bits64) ( aSig0<<1 ) == 0 ) return a;
3107
        normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
3108
    }
3109
    bSig |= LIT64( 0x8000000000000000 );
3110
    zSign = aSign;
3111
    expDiff = aExp - bExp;
3112
    aSig1 = 0;
3113
    if ( expDiff < 0 ) {
3114
        if ( expDiff < -1 ) return a;
3115
        shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
3116
        expDiff = 0;
3117
    }
3118
    q = ( bSig <= aSig0 );
3119
    if ( q ) aSig0 -= bSig;
3120
    expDiff -= 64;
3121
    while ( 0 < expDiff ) {
3122
        q = estimateDiv128To64( aSig0, aSig1, bSig );
3123
        q = ( 2 < q ) ? q - 2 : 0;
3124
        mul64To128( bSig, q, &term0, &term1 );
3125
        sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
3126
        shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
3127
        expDiff -= 62;
3128
    }
3129
    expDiff += 64;
3130
    if ( 0 < expDiff ) {
3131
        q = estimateDiv128To64( aSig0, aSig1, bSig );
3132
        q = ( 2 < q ) ? q - 2 : 0;
3133
        q >>= 64 - expDiff;
3134
        mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
3135
        sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
3136
        shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
3137
        while ( le128( term0, term1, aSig0, aSig1 ) ) {
3138
            ++q;
3139
            sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
3140
        }
3141
    }
3142
    else {
3143
        term1 = 0;
3144
        term0 = bSig;
3145
    }
3146
    sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
3147
    if (    lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
3148
         || (    eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
3149
              && ( q & 1 ) )
3150
       ) {
3151
        aSig0 = alternateASig0;
3152
        aSig1 = alternateASig1;
3153
        zSign = ! zSign;
3154
    }
3155
 
3156
    return
3157
        normalizeRoundAndPackFloatx80(
3158
            roundData, zSign, bExp + expDiff, aSig0, aSig1 );
3159
 
3160
}
3161
 
3162
/*
3163
-------------------------------------------------------------------------------
3164
Returns the square root of the extended double-precision floating-point
3165
value `a'.  The operation is performed according to the IEC/IEEE Standard
3166
for Binary Floating-point Arithmetic.
3167
-------------------------------------------------------------------------------
3168
*/
3169
floatx80 floatx80_sqrt( struct roundingData *roundData, floatx80 a )
3170
{
3171
    flag aSign;
3172
    int32 aExp, zExp;
3173
    bits64 aSig0, aSig1, zSig0, zSig1;
3174
    bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
3175
    bits64 shiftedRem0, shiftedRem1;
3176
    floatx80 z;
3177
 
3178
    aSig0 = extractFloatx80Frac( a );
3179
    aExp = extractFloatx80Exp( a );
3180
    aSign = extractFloatx80Sign( a );
3181
    if ( aExp == 0x7FFF ) {
3182
        if ( (bits64) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a );
3183
        if ( ! aSign ) return a;
3184
        goto invalid;
3185
    }
3186
    if ( aSign ) {
3187
        if ( ( aExp | aSig0 ) == 0 ) return a;
3188
 invalid:
3189
        roundData->exception |= float_flag_invalid;
3190
        z.low = floatx80_default_nan_low;
3191
        z.high = floatx80_default_nan_high;
3192
        z.__padding = 0;
3193
        return z;
3194
    }
3195
    if ( aExp == 0 ) {
3196
        if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
3197
        normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
3198
    }
3199
    zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
3200
    zSig0 = estimateSqrt32( aExp, aSig0>>32 );
3201
    zSig0 <<= 31;
3202
    aSig1 = 0;
3203
    shift128Right( aSig0, 0, ( aExp & 1 ) + 2, &aSig0, &aSig1 );
3204
    zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0 ) + zSig0 + 4;
3205
    if ( 0 <= (sbits64) zSig0 ) zSig0 = LIT64( 0xFFFFFFFFFFFFFFFF );
3206
    shortShift128Left( aSig0, aSig1, 2, &aSig0, &aSig1 );
3207
    mul64To128( zSig0, zSig0, &term0, &term1 );
3208
    sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
3209
    while ( (sbits64) rem0 < 0 ) {
3210
        --zSig0;
3211
        shortShift128Left( 0, zSig0, 1, &term0, &term1 );
3212
        term1 |= 1;
3213
        add128( rem0, rem1, term0, term1, &rem0, &rem1 );
3214
    }
3215
    shortShift128Left( rem0, rem1, 63, &shiftedRem0, &shiftedRem1 );
3216
    zSig1 = estimateDiv128To64( shiftedRem0, shiftedRem1, zSig0 );
3217
    if ( (bits64) ( zSig1<<1 ) <= 10 ) {
3218
        if ( zSig1 == 0 ) zSig1 = 1;
3219
        mul64To128( zSig0, zSig1, &term1, &term2 );
3220
        shortShift128Left( term1, term2, 1, &term1, &term2 );
3221
        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
3222
        mul64To128( zSig1, zSig1, &term2, &term3 );
3223
        sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
3224
        while ( (sbits64) rem1 < 0 ) {
3225
            --zSig1;
3226
            shortShift192Left( 0, zSig0, zSig1, 1, &term1, &term2, &term3 );
3227
            term3 |= 1;
3228
            add192(
3229
                rem1, rem2, rem3, term1, term2, term3, &rem1, &rem2, &rem3 );
3230
        }
3231
        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
3232
    }
3233
    return
3234
        roundAndPackFloatx80(
3235
            roundData, 0, zExp, zSig0, zSig1 );
3236
 
3237
}
3238
 
3239
/*
3240
-------------------------------------------------------------------------------
3241
Returns 1 if the extended double-precision floating-point value `a' is
3242
equal to the corresponding value `b', and 0 otherwise.  The comparison is
3243
performed according to the IEC/IEEE Standard for Binary Floating-point
3244
Arithmetic.
3245
-------------------------------------------------------------------------------
3246
*/
3247
flag floatx80_eq( floatx80 a, floatx80 b )
3248
{
3249
 
3250
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
3251
              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
3252
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
3253
              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
3254
       ) {
3255
        if (    floatx80_is_signaling_nan( a )
3256
             || floatx80_is_signaling_nan( b ) ) {
3257
            float_raise( float_flag_invalid );
3258
        }
3259
        return 0;
3260
    }
3261
    return
3262
           ( a.low == b.low )
3263
        && (    ( a.high == b.high )
3264
             || (    ( a.low == 0 )
3265
                  && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
3266
           );
3267
 
3268
}
3269
 
3270
/*
3271
-------------------------------------------------------------------------------
3272
Returns 1 if the extended double-precision floating-point value `a' is
3273
less than or equal to the corresponding value `b', and 0 otherwise.  The
3274
comparison is performed according to the IEC/IEEE Standard for Binary
3275
Floating-point Arithmetic.
3276
-------------------------------------------------------------------------------
3277
*/
3278
flag floatx80_le( floatx80 a, floatx80 b )
3279
{
3280
    flag aSign, bSign;
3281
 
3282
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
3283
              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
3284
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
3285
              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
3286
       ) {
3287
        float_raise( float_flag_invalid );
3288
        return 0;
3289
    }
3290
    aSign = extractFloatx80Sign( a );
3291
    bSign = extractFloatx80Sign( b );
3292
    if ( aSign != bSign ) {
3293
        return
3294
               aSign
3295
            || (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
3296
                 == 0 );
3297
    }
3298
    return
3299
          aSign ? le128( b.high, b.low, a.high, a.low )
3300
        : le128( a.high, a.low, b.high, b.low );
3301
 
3302
}
3303
 
3304
/*
3305
-------------------------------------------------------------------------------
3306
Returns 1 if the extended double-precision floating-point value `a' is
3307
less than the corresponding value `b', and 0 otherwise.  The comparison
3308
is performed according to the IEC/IEEE Standard for Binary Floating-point
3309
Arithmetic.
3310
-------------------------------------------------------------------------------
3311
*/
3312
flag floatx80_lt( floatx80 a, floatx80 b )
3313
{
3314
    flag aSign, bSign;
3315
 
3316
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
3317
              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
3318
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
3319
              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
3320
       ) {
3321
        float_raise( float_flag_invalid );
3322
        return 0;
3323
    }
3324
    aSign = extractFloatx80Sign( a );
3325
    bSign = extractFloatx80Sign( b );
3326
    if ( aSign != bSign ) {
3327
        return
3328
               aSign
3329
            && (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
3330
                 != 0 );
3331
    }
3332
    return
3333
          aSign ? lt128( b.high, b.low, a.high, a.low )
3334
        : lt128( a.high, a.low, b.high, b.low );
3335
 
3336
}
3337
 
3338
/*
3339
-------------------------------------------------------------------------------
3340
Returns 1 if the extended double-precision floating-point value `a' is equal
3341
to the corresponding value `b', and 0 otherwise.  The invalid exception is
3342
raised if either operand is a NaN.  Otherwise, the comparison is performed
3343
according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
3344
-------------------------------------------------------------------------------
3345
*/
3346
flag floatx80_eq_signaling( floatx80 a, floatx80 b )
3347
{
3348
 
3349
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
3350
              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
3351
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
3352
              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
3353
       ) {
3354
        float_raise( float_flag_invalid );
3355
        return 0;
3356
    }
3357
    return
3358
           ( a.low == b.low )
3359
        && (    ( a.high == b.high )
3360
             || (    ( a.low == 0 )
3361
                  && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
3362
           );
3363
 
3364
}
3365
 
3366
/*
3367
-------------------------------------------------------------------------------
3368
Returns 1 if the extended double-precision floating-point value `a' is less
3369
than or equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs
3370
do not cause an exception.  Otherwise, the comparison is performed according
3371
to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
3372
-------------------------------------------------------------------------------
3373
*/
3374
flag floatx80_le_quiet( floatx80 a, floatx80 b )
3375
{
3376
    flag aSign, bSign;
3377
 
3378
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
3379
              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
3380
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
3381
              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
3382
       ) {
3383
        /* Do nothing, even if NaN as we're quiet */
3384
        return 0;
3385
    }
3386
    aSign = extractFloatx80Sign( a );
3387
    bSign = extractFloatx80Sign( b );
3388
    if ( aSign != bSign ) {
3389
        return
3390
               aSign
3391
            || (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
3392
                 == 0 );
3393
    }
3394
    return
3395
          aSign ? le128( b.high, b.low, a.high, a.low )
3396
        : le128( a.high, a.low, b.high, b.low );
3397
 
3398
}
3399
 
3400
/*
3401
-------------------------------------------------------------------------------
3402
Returns 1 if the extended double-precision floating-point value `a' is less
3403
than the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause
3404
an exception.  Otherwise, the comparison is performed according to the
3405
IEC/IEEE Standard for Binary Floating-point Arithmetic.
3406
-------------------------------------------------------------------------------
3407
*/
3408
flag floatx80_lt_quiet( floatx80 a, floatx80 b )
3409
{
3410
    flag aSign, bSign;
3411
 
3412
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
3413
              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
3414
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
3415
              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
3416
       ) {
3417
        /* Do nothing, even if NaN as we're quiet */
3418
        return 0;
3419
    }
3420
    aSign = extractFloatx80Sign( a );
3421
    bSign = extractFloatx80Sign( b );
3422
    if ( aSign != bSign ) {
3423
        return
3424
               aSign
3425
            && (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
3426
                 != 0 );
3427
    }
3428
    return
3429
          aSign ? lt128( b.high, b.low, a.high, a.low )
3430
        : lt128( a.high, a.low, b.high, b.low );
3431
 
3432
}
3433
 
3434
#endif
3435
 

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.