| 1 | 
         234 | 
         jeremybenn | 
          
  | 
      
      
         | 2 | 
          | 
          | 
         /*============================================================================
  | 
      
      
         | 3 | 
          | 
          | 
          
  | 
      
      
         | 4 | 
          | 
          | 
         This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
  | 
      
      
         | 5 | 
          | 
          | 
         Package, Release 2b.
  | 
      
      
         | 6 | 
          | 
          | 
          
  | 
      
      
         | 7 | 
          | 
          | 
         Written by John R. Hauser.  This work was made possible in part by the
  | 
      
      
         | 8 | 
          | 
          | 
         International Computer Science Institute, located at Suite 600, 1947 Center
  | 
      
      
         | 9 | 
          | 
          | 
         Street, Berkeley, California 94704.  Funding was partially provided by the
  | 
      
      
         | 10 | 
          | 
          | 
         National Science Foundation under grant MIP-9311980.  The original version
  | 
      
      
         | 11 | 
          | 
          | 
         of this code was written as part of a project to build a fixed-point vector
  | 
      
      
         | 12 | 
          | 
          | 
         processor in collaboration with the University of California at Berkeley,
  | 
      
      
         | 13 | 
          | 
          | 
         overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
  | 
      
      
         | 14 | 
          | 
          | 
         is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
  | 
      
      
         | 15 | 
          | 
          | 
         arithmetic/SoftFloat.html'.
  | 
      
      
         | 16 | 
          | 
          | 
          
  | 
      
      
         | 17 | 
          | 
          | 
         THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
  | 
      
      
         | 18 | 
          | 
          | 
         been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
  | 
      
      
         | 19 | 
          | 
          | 
         RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
  | 
      
      
         | 20 | 
          | 
          | 
         AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
  | 
      
      
         | 21 | 
          | 
          | 
         COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
  | 
      
      
         | 22 | 
          | 
          | 
         EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
  | 
      
      
         | 23 | 
          | 
          | 
         INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
  | 
      
      
         | 24 | 
          | 
          | 
         OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
  | 
      
      
         | 25 | 
          | 
          | 
          
  | 
      
      
         | 26 | 
          | 
          | 
         Derivative works are acceptable, even for commercial purposes, so long as
  | 
      
      
         | 27 | 
          | 
          | 
         (1) the source code for the derivative work includes prominent notice that
  | 
      
      
         | 28 | 
          | 
          | 
         the work is derivative, and (2) the source code includes prominent notice with
  | 
      
      
         | 29 | 
          | 
          | 
         these four paragraphs for those parts of this code that are retained.
  | 
      
      
         | 30 | 
          | 
          | 
          
  | 
      
      
         | 31 | 
          | 
          | 
         Modified for use with or1ksim's testsuite.
  | 
      
      
         | 32 | 
          | 
          | 
          
  | 
      
      
         | 33 | 
          | 
          | 
         Contributor Julius Baxter <julius.baxter@orsoc.se>
  | 
      
      
         | 34 | 
          | 
          | 
          
  | 
      
      
         | 35 | 
          | 
          | 
         =============================================================================*/
  | 
      
      
         | 36 | 
          | 
          | 
          
  | 
      
      
         | 37 | 
          | 
          | 
         #include "milieu.h"
  | 
      
      
         | 38 | 
          | 
          | 
         #include "softfloat.h"
  | 
      
      
         | 39 | 
          | 
          | 
          
  | 
      
      
         | 40 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 41 | 
          | 
          | 
         | Floating-point rounding mode, extended double-precision rounding precision,
  | 
      
      
         | 42 | 
          | 
          | 
         | and exception flags.
  | 
      
      
         | 43 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 44 | 
          | 
          | 
         int8 float_rounding_mode = float_round_nearest_even;
  | 
      
      
         | 45 | 
          | 
          | 
         int8 float_exception_flags = 0;
  | 
      
      
         | 46 | 
          | 
          | 
         #ifdef FLOATX80
  | 
      
      
         | 47 | 
          | 
          | 
         int8 floatx80_rounding_precision = 80;
  | 
      
      
         | 48 | 
          | 
          | 
         #endif
  | 
      
      
         | 49 | 
          | 
          | 
          
  | 
      
      
         | 50 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 51 | 
          | 
          | 
         | Primitive arithmetic functions, including multi-word arithmetic, and
  | 
      
      
         | 52 | 
          | 
          | 
         | division and square root approximations.  (Can be specialized to target if
  | 
      
      
         | 53 | 
          | 
          | 
         | desired.)
  | 
      
      
         | 54 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 55 | 
          | 
          | 
         //#include "softfloat-macros"
  | 
      
      
         | 56 | 
          | 
          | 
          
  | 
      
      
         | 57 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 58 | 
          | 
          | 
         | Shifts `a' right by the number of bits given in `count'.  If any nonzero
  | 
      
      
         | 59 | 
          | 
          | 
         | bits are shifted off, they are ``jammed'' into the least significant bit of
  | 
      
      
         | 60 | 
          | 
          | 
         | the result by setting the least significant bit to 1.  The value of `count'
  | 
      
      
         | 61 | 
          | 
          | 
         | can be arbitrarily large; in particular, if `count' is greater than 32, the
  | 
      
      
         | 62 | 
          | 
          | 
         | result will be either 0 or 1, depending on whether `a' is zero or nonzero.
  | 
      
      
         | 63 | 
          | 
          | 
         | The result is stored in the location pointed to by `zPtr'.
  | 
      
      
         | 64 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 65 | 
          | 
          | 
          
  | 
      
      
         | 66 | 
          | 
          | 
         INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )
  | 
      
      
         | 67 | 
          | 
          | 
         {
  | 
      
      
         | 68 | 
          | 
          | 
             bits32 z;
  | 
      
      
         | 69 | 
          | 
          | 
          
  | 
      
      
         | 70 | 
          | 
          | 
             if ( count == 0 ) {
  | 
      
      
         | 71 | 
          | 
          | 
                 z = a;
  | 
      
      
         | 72 | 
          | 
          | 
             }
  | 
      
      
         | 73 | 
          | 
          | 
             else if ( count < 32 ) {
  | 
      
      
         | 74 | 
          | 
          | 
                 z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
  | 
      
      
         | 75 | 
          | 
          | 
             }
  | 
      
      
         | 76 | 
          | 
          | 
             else {
  | 
      
      
         | 77 | 
          | 
          | 
                 z = ( a != 0 );
  | 
      
      
         | 78 | 
          | 
          | 
             }
  | 
      
      
         | 79 | 
          | 
          | 
             *zPtr = z;
  | 
      
      
         | 80 | 
          | 
          | 
          
  | 
      
      
         | 81 | 
          | 
          | 
         }
  | 
      
      
         | 82 | 
          | 
          | 
          
  | 
      
      
         | 83 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 84 | 
          | 
          | 
         | Shifts `a' right by the number of bits given in `count'.  If any nonzero
  | 
      
      
         | 85 | 
          | 
          | 
         | bits are shifted off, they are ``jammed'' into the least significant bit of
  | 
      
      
         | 86 | 
          | 
          | 
         | the result by setting the least significant bit to 1.  The value of `count'
  | 
      
      
         | 87 | 
          | 
          | 
         | can be arbitrarily large; in particular, if `count' is greater than 64, the
  | 
      
      
         | 88 | 
          | 
          | 
         | result will be either 0 or 1, depending on whether `a' is zero or nonzero.
  | 
      
      
         | 89 | 
          | 
          | 
         | The result is stored in the location pointed to by `zPtr'.
  | 
      
      
         | 90 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 91 | 
          | 
          | 
          
  | 
      
      
         | 92 | 
          | 
          | 
         INLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr )
  | 
      
      
         | 93 | 
          | 
          | 
         {
  | 
      
      
         | 94 | 
          | 
          | 
             bits64 z;
  | 
      
      
         | 95 | 
          | 
          | 
          
  | 
      
      
         | 96 | 
          | 
          | 
             if ( count == 0 ) {
  | 
      
      
         | 97 | 
          | 
          | 
                 z = a;
  | 
      
      
         | 98 | 
          | 
          | 
             }
  | 
      
      
         | 99 | 
          | 
          | 
             else if ( count < 64 ) {
  | 
      
      
         | 100 | 
          | 
          | 
                 z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
  | 
      
      
         | 101 | 
          | 
          | 
             }
  | 
      
      
         | 102 | 
          | 
          | 
             else {
  | 
      
      
         | 103 | 
          | 
          | 
                 z = ( a != 0 );
  | 
      
      
         | 104 | 
          | 
          | 
             }
  | 
      
      
         | 105 | 
          | 
          | 
             *zPtr = z;
  | 
      
      
         | 106 | 
          | 
          | 
          
  | 
      
      
         | 107 | 
          | 
          | 
         }
  | 
      
      
         | 108 | 
          | 
          | 
          
  | 
      
      
         | 109 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 110 | 
          | 
          | 
         | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
  | 
      
      
         | 111 | 
          | 
          | 
         | _plus_ the number of bits given in `count'.  The shifted result is at most
  | 
      
      
         | 112 | 
          | 
          | 
         | 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'.  The
  | 
      
      
         | 113 | 
          | 
          | 
         | bits shifted off form a second 64-bit result as follows:  The _last_ bit
  | 
      
      
         | 114 | 
          | 
          | 
         | shifted off is the most-significant bit of the extra result, and the other
  | 
      
      
         | 115 | 
          | 
          | 
         | 63 bits of the extra result are all zero if and only if _all_but_the_last_
  | 
      
      
         | 116 | 
          | 
          | 
         | bits shifted off were all zero.  This extra result is stored in the location
  | 
      
      
         | 117 | 
          | 
          | 
         | pointed to by `z1Ptr'.  The value of `count' can be arbitrarily large.
  | 
      
      
         | 118 | 
          | 
          | 
         |     (This routine makes more sense if `a0' and `a1' are considered to form
  | 
      
      
         | 119 | 
          | 
          | 
         | a fixed-point value with binary point between `a0' and `a1'.  This fixed-
  | 
      
      
         | 120 | 
          | 
          | 
         | point value is shifted right by the number of bits given in `count', and
  | 
      
      
         | 121 | 
          | 
          | 
         | the integer part of the result is returned at the location pointed to by
  | 
      
      
         | 122 | 
          | 
          | 
         | `z0Ptr'.  The fractional part of the result may be slightly corrupted as
  | 
      
      
         | 123 | 
          | 
          | 
         | described above, and is returned at the location pointed to by `z1Ptr'.)
  | 
      
      
         | 124 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 125 | 
          | 
          | 
          
  | 
      
      
         | 126 | 
          | 
          | 
         INLINE void
  | 
      
      
         | 127 | 
          | 
          | 
          shift64ExtraRightJamming(
  | 
      
      
         | 128 | 
          | 
          | 
              bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
  | 
      
      
         | 129 | 
          | 
          | 
         {
  | 
      
      
         | 130 | 
          | 
          | 
             bits64 z0, z1;
  | 
      
      
         | 131 | 
          | 
          | 
             int8 negCount = ( - count ) & 63;
  | 
      
      
         | 132 | 
          | 
          | 
          
  | 
      
      
         | 133 | 
          | 
          | 
             if ( count == 0 ) {
  | 
      
      
         | 134 | 
          | 
          | 
                 z1 = a1;
  | 
      
      
         | 135 | 
          | 
          | 
                 z0 = a0;
  | 
      
      
         | 136 | 
          | 
          | 
             }
  | 
      
      
         | 137 | 
          | 
          | 
             else if ( count < 64 ) {
  | 
      
      
         | 138 | 
          | 
          | 
                 z1 = ( a0<<negCount ) | ( a1 != 0 );
  | 
      
      
         | 139 | 
          | 
          | 
                 z0 = a0>>count;
  | 
      
      
         | 140 | 
          | 
          | 
             }
  | 
      
      
         | 141 | 
          | 
          | 
             else {
  | 
      
      
         | 142 | 
          | 
          | 
                 if ( count == 64 ) {
  | 
      
      
         | 143 | 
          | 
          | 
                     z1 = a0 | ( a1 != 0 );
  | 
      
      
         | 144 | 
          | 
          | 
                 }
  | 
      
      
         | 145 | 
          | 
          | 
                 else {
  | 
      
      
         | 146 | 
          | 
          | 
                     z1 = ( ( a0 | a1 ) != 0 );
  | 
      
      
         | 147 | 
          | 
          | 
                 }
  | 
      
      
         | 148 | 
          | 
          | 
                 z0 = 0;
  | 
      
      
         | 149 | 
          | 
          | 
             }
  | 
      
      
         | 150 | 
          | 
          | 
             *z1Ptr = z1;
  | 
      
      
         | 151 | 
          | 
          | 
             *z0Ptr = z0;
  | 
      
      
         | 152 | 
          | 
          | 
          
  | 
      
      
         | 153 | 
          | 
          | 
         }
  | 
      
      
         | 154 | 
          | 
          | 
          
  | 
      
      
         | 155 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 156 | 
          | 
          | 
         | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
  | 
      
      
         | 157 | 
          | 
          | 
         | number of bits given in `count'.  Any bits shifted off are lost.  The value
  | 
      
      
         | 158 | 
          | 
          | 
         | of `count' can be arbitrarily large; in particular, if `count' is greater
  | 
      
      
         | 159 | 
          | 
          | 
         | than 128, the result will be 0.  The result is broken into two 64-bit pieces
  | 
      
      
         | 160 | 
          | 
          | 
         | which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
  | 
      
      
         | 161 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 162 | 
          | 
          | 
         // Not used - commenting out to stop werrors during compile
  | 
      
      
         | 163 | 
          | 
          | 
         /*
  | 
      
      
         | 164 | 
          | 
          | 
         INLINE void
  | 
      
      
         | 165 | 
          | 
          | 
          shift128Right(
  | 
      
      
         | 166 | 
          | 
          | 
              bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
  | 
      
      
         | 167 | 
          | 
          | 
         {
  | 
      
      
         | 168 | 
          | 
          | 
             bits64 z0, z1;
  | 
      
      
         | 169 | 
          | 
          | 
             int8 negCount = ( - count ) & 63;
  | 
      
      
         | 170 | 
          | 
          | 
          
  | 
      
      
         | 171 | 
          | 
          | 
             if ( count == 0 ) {
  | 
      
      
         | 172 | 
          | 
          | 
                 z1 = a1;
  | 
      
      
         | 173 | 
          | 
          | 
                 z0 = a0;
  | 
      
      
         | 174 | 
          | 
          | 
             }
  | 
      
      
         | 175 | 
          | 
          | 
             else if ( count < 64 ) {
  | 
      
      
         | 176 | 
          | 
          | 
                 z1 = ( a0<<negCount ) | ( a1>>count );
  | 
      
      
         | 177 | 
          | 
          | 
                 z0 = a0>>count;
  | 
      
      
         | 178 | 
          | 
          | 
             }
  | 
      
      
         | 179 | 
          | 
          | 
             else {
  | 
      
      
         | 180 | 
          | 
          | 
                 z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0;
  | 
      
      
         | 181 | 
          | 
          | 
                 z0 = 0;
  | 
      
      
         | 182 | 
          | 
          | 
             }
  | 
      
      
         | 183 | 
          | 
          | 
             *z1Ptr = z1;
  | 
      
      
         | 184 | 
          | 
          | 
             *z0Ptr = z0;
  | 
      
      
         | 185 | 
          | 
          | 
          
  | 
      
      
         | 186 | 
          | 
          | 
         }
  | 
      
      
         | 187 | 
          | 
          | 
         */
  | 
      
      
         | 188 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 189 | 
          | 
          | 
         | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
  | 
      
      
         | 190 | 
          | 
          | 
         | number of bits given in `count'.  If any nonzero bits are shifted off, they
  | 
      
      
         | 191 | 
          | 
          | 
         | are ``jammed'' into the least significant bit of the result by setting the
  | 
      
      
         | 192 | 
          | 
          | 
         | least significant bit to 1.  The value of `count' can be arbitrarily large;
  | 
      
      
         | 193 | 
          | 
          | 
         | in particular, if `count' is greater than 128, the result will be either
  | 
      
      
         | 194 | 
          | 
          | 
         | 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
  | 
      
      
         | 195 | 
          | 
          | 
         | nonzero.  The result is broken into two 64-bit pieces which are stored at
  | 
      
      
         | 196 | 
          | 
          | 
         | the locations pointed to by `z0Ptr' and `z1Ptr'.
  | 
      
      
         | 197 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 198 | 
          | 
          | 
          // Not used - commenting out to stop werrors during compile
  | 
      
      
         | 199 | 
          | 
          | 
          /*
  | 
      
      
         | 200 | 
          | 
          | 
         INLINE void
  | 
      
      
         | 201 | 
          | 
          | 
          shift128RightJamming(
  | 
      
      
         | 202 | 
          | 
          | 
              bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
  | 
      
      
         | 203 | 
          | 
          | 
         {
  | 
      
      
         | 204 | 
          | 
          | 
             bits64 z0, z1;
  | 
      
      
         | 205 | 
          | 
          | 
             int8 negCount = ( - count ) & 63;
  | 
      
      
         | 206 | 
          | 
          | 
          
  | 
      
      
         | 207 | 
          | 
          | 
             if ( count == 0 ) {
  | 
      
      
         | 208 | 
          | 
          | 
                 z1 = a1;
  | 
      
      
         | 209 | 
          | 
          | 
                 z0 = a0;
  | 
      
      
         | 210 | 
          | 
          | 
             }
  | 
      
      
         | 211 | 
          | 
          | 
             else if ( count < 64 ) {
  | 
      
      
         | 212 | 
          | 
          | 
                 z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
  | 
      
      
         | 213 | 
          | 
          | 
                 z0 = a0>>count;
  | 
      
      
         | 214 | 
          | 
          | 
             }
  | 
      
      
         | 215 | 
          | 
          | 
             else {
  | 
      
      
         | 216 | 
          | 
          | 
                 if ( count == 64 ) {
  | 
      
      
         | 217 | 
          | 
          | 
                     z1 = a0 | ( a1 != 0 );
  | 
      
      
         | 218 | 
          | 
          | 
                 }
  | 
      
      
         | 219 | 
          | 
          | 
                 else if ( count < 128 ) {
  | 
      
      
         | 220 | 
          | 
          | 
                     z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
  | 
      
      
         | 221 | 
          | 
          | 
                 }
  | 
      
      
         | 222 | 
          | 
          | 
                 else {
  | 
      
      
         | 223 | 
          | 
          | 
                     z1 = ( ( a0 | a1 ) != 0 );
  | 
      
      
         | 224 | 
          | 
          | 
                 }
  | 
      
      
         | 225 | 
          | 
          | 
                 z0 = 0;
  | 
      
      
         | 226 | 
          | 
          | 
             }
  | 
      
      
         | 227 | 
          | 
          | 
             *z1Ptr = z1;
  | 
      
      
         | 228 | 
          | 
          | 
             *z0Ptr = z0;
  | 
      
      
         | 229 | 
          | 
          | 
          
  | 
      
      
         | 230 | 
          | 
          | 
         }
  | 
      
      
         | 231 | 
          | 
          | 
          */
  | 
      
      
         | 232 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 233 | 
          | 
          | 
         | Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
  | 
      
      
         | 234 | 
          | 
          | 
         | by 64 _plus_ the number of bits given in `count'.  The shifted result is
  | 
      
      
         | 235 | 
          | 
          | 
         | at most 128 nonzero bits; these are broken into two 64-bit pieces which are
  | 
      
      
         | 236 | 
          | 
          | 
         | stored at the locations pointed to by `z0Ptr' and `z1Ptr'.  The bits shifted
  | 
      
      
         | 237 | 
          | 
          | 
         | off form a third 64-bit result as follows:  The _last_ bit shifted off is
  | 
      
      
         | 238 | 
          | 
          | 
         | the most-significant bit of the extra result, and the other 63 bits of the
  | 
      
      
         | 239 | 
          | 
          | 
         | extra result are all zero if and only if _all_but_the_last_ bits shifted off
  | 
      
      
         | 240 | 
          | 
          | 
         | were all zero.  This extra result is stored in the location pointed to by
  | 
      
      
         | 241 | 
          | 
          | 
         | `z2Ptr'.  The value of `count' can be arbitrarily large.
  | 
      
      
         | 242 | 
          | 
          | 
         |     (This routine makes more sense if `a0', `a1', and `a2' are considered
  | 
      
      
         | 243 | 
          | 
          | 
         | to form a fixed-point value with binary point between `a1' and `a2'.  This
  | 
      
      
         | 244 | 
          | 
          | 
         | fixed-point value is shifted right by the number of bits given in `count',
  | 
      
      
         | 245 | 
          | 
          | 
         | and the integer part of the result is returned at the locations pointed to
  | 
      
      
         | 246 | 
          | 
          | 
         | by `z0Ptr' and `z1Ptr'.  The fractional part of the result may be slightly
  | 
      
      
         | 247 | 
          | 
          | 
         | corrupted as described above, and is returned at the location pointed to by
  | 
      
      
         | 248 | 
          | 
          | 
         | `z2Ptr'.)
  | 
      
      
         | 249 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 250 | 
          | 
          | 
           // Not used - commenting out to stop werrors during compile
  | 
      
      
         | 251 | 
          | 
          | 
           /*
  | 
      
      
         | 252 | 
          | 
          | 
         INLINE void
  | 
      
      
         | 253 | 
          | 
          | 
          shift128ExtraRightJamming(
  | 
      
      
         | 254 | 
          | 
          | 
              bits64 a0,
  | 
      
      
         | 255 | 
          | 
          | 
              bits64 a1,
  | 
      
      
         | 256 | 
          | 
          | 
              bits64 a2,
  | 
      
      
         | 257 | 
          | 
          | 
              int16 count,
  | 
      
      
         | 258 | 
          | 
          | 
              bits64 *z0Ptr,
  | 
      
      
         | 259 | 
          | 
          | 
              bits64 *z1Ptr,
  | 
      
      
         | 260 | 
          | 
          | 
              bits64 *z2Ptr
  | 
      
      
         | 261 | 
          | 
          | 
          )
  | 
      
      
         | 262 | 
          | 
          | 
         {
  | 
      
      
         | 263 | 
          | 
          | 
             bits64 z0, z1, z2;
  | 
      
      
         | 264 | 
          | 
          | 
             int8 negCount = ( - count ) & 63;
  | 
      
      
         | 265 | 
          | 
          | 
          
  | 
      
      
         | 266 | 
          | 
          | 
             if ( count == 0 ) {
  | 
      
      
         | 267 | 
          | 
          | 
                 z2 = a2;
  | 
      
      
         | 268 | 
          | 
          | 
                 z1 = a1;
  | 
      
      
         | 269 | 
          | 
          | 
                 z0 = a0;
  | 
      
      
         | 270 | 
          | 
          | 
             }
  | 
      
      
         | 271 | 
          | 
          | 
             else {
  | 
      
      
         | 272 | 
          | 
          | 
                 if ( count < 64 ) {
  | 
      
      
         | 273 | 
          | 
          | 
                     z2 = a1<<negCount;
  | 
      
      
         | 274 | 
          | 
          | 
                     z1 = ( a0<<negCount ) | ( a1>>count );
  | 
      
      
         | 275 | 
          | 
          | 
                     z0 = a0>>count;
  | 
      
      
         | 276 | 
          | 
          | 
                 }
  | 
      
      
         | 277 | 
          | 
          | 
                 else {
  | 
      
      
         | 278 | 
          | 
          | 
                     if ( count == 64 ) {
  | 
      
      
         | 279 | 
          | 
          | 
                         z2 = a1;
  | 
      
      
         | 280 | 
          | 
          | 
                         z1 = a0;
  | 
      
      
         | 281 | 
          | 
          | 
                     }
  | 
      
      
         | 282 | 
          | 
          | 
                     else {
  | 
      
      
         | 283 | 
          | 
          | 
                         a2 |= a1;
  | 
      
      
         | 284 | 
          | 
          | 
                         if ( count < 128 ) {
  | 
      
      
         | 285 | 
          | 
          | 
                             z2 = a0<<negCount;
  | 
      
      
         | 286 | 
          | 
          | 
                             z1 = a0>>( count & 63 );
  | 
      
      
         | 287 | 
          | 
          | 
                         }
  | 
      
      
         | 288 | 
          | 
          | 
                         else {
  | 
      
      
         | 289 | 
          | 
          | 
                             z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
  | 
      
      
         | 290 | 
          | 
          | 
                             z1 = 0;
  | 
      
      
         | 291 | 
          | 
          | 
                         }
  | 
      
      
         | 292 | 
          | 
          | 
                     }
  | 
      
      
         | 293 | 
          | 
          | 
                     z0 = 0;
  | 
      
      
         | 294 | 
          | 
          | 
                 }
  | 
      
      
         | 295 | 
          | 
          | 
                 z2 |= ( a2 != 0 );
  | 
      
      
         | 296 | 
          | 
          | 
             }
  | 
      
      
         | 297 | 
          | 
          | 
             *z2Ptr = z2;
  | 
      
      
         | 298 | 
          | 
          | 
             *z1Ptr = z1;
  | 
      
      
         | 299 | 
          | 
          | 
             *z0Ptr = z0;
  | 
      
      
         | 300 | 
          | 
          | 
          
  | 
      
      
         | 301 | 
          | 
          | 
         }
  | 
      
      
         | 302 | 
          | 
          | 
         */
  | 
      
      
         | 303 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 304 | 
          | 
          | 
         | Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
  | 
      
      
         | 305 | 
          | 
          | 
         | number of bits given in `count'.  Any bits shifted off are lost.  The value
  | 
      
      
         | 306 | 
          | 
          | 
         | of `count' must be less than 64.  The result is broken into two 64-bit
  | 
      
      
         | 307 | 
          | 
          | 
         | pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
  | 
      
      
         | 308 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 309 | 
          | 
          | 
            // Not used - commenting out to stop werrors during compile
  | 
      
      
         | 310 | 
          | 
          | 
            /*
  | 
      
      
         | 311 | 
          | 
          | 
         INLINE void
  | 
      
      
         | 312 | 
          | 
          | 
          shortShift128Left(
  | 
      
      
         | 313 | 
          | 
          | 
              bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
  | 
      
      
         | 314 | 
          | 
          | 
         {
  | 
      
      
         | 315 | 
          | 
          | 
          
  | 
      
      
         | 316 | 
          | 
          | 
             *z1Ptr = a1<<count;
  | 
      
      
         | 317 | 
          | 
          | 
             *z0Ptr =
  | 
      
      
         | 318 | 
          | 
          | 
                 ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
  | 
      
      
         | 319 | 
          | 
          | 
          
  | 
      
      
         | 320 | 
          | 
          | 
         }
  | 
      
      
         | 321 | 
          | 
          | 
         */
  | 
      
      
         | 322 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 323 | 
          | 
          | 
         | Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
  | 
      
      
         | 324 | 
          | 
          | 
         | by the number of bits given in `count'.  Any bits shifted off are lost.
  | 
      
      
         | 325 | 
          | 
          | 
         | The value of `count' must be less than 64.  The result is broken into three
  | 
      
      
         | 326 | 
          | 
          | 
         | 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
  | 
      
      
         | 327 | 
          | 
          | 
         | `z1Ptr', and `z2Ptr'.
  | 
      
      
         | 328 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 329 | 
          | 
          | 
             // Not used - commenting out to stop werrors during compile
  | 
      
      
         | 330 | 
          | 
          | 
             /*
  | 
      
      
         | 331 | 
          | 
          | 
         INLINE void
  | 
      
      
         | 332 | 
          | 
          | 
          shortShift192Left(
  | 
      
      
         | 333 | 
          | 
          | 
              bits64 a0,
  | 
      
      
         | 334 | 
          | 
          | 
              bits64 a1,
  | 
      
      
         | 335 | 
          | 
          | 
              bits64 a2,
  | 
      
      
         | 336 | 
          | 
          | 
              int16 count,
  | 
      
      
         | 337 | 
          | 
          | 
              bits64 *z0Ptr,
  | 
      
      
         | 338 | 
          | 
          | 
              bits64 *z1Ptr,
  | 
      
      
         | 339 | 
          | 
          | 
              bits64 *z2Ptr
  | 
      
      
         | 340 | 
          | 
          | 
          )
  | 
      
      
         | 341 | 
          | 
          | 
         {
  | 
      
      
         | 342 | 
          | 
          | 
             bits64 z0, z1, z2;
  | 
      
      
         | 343 | 
          | 
          | 
             int8 negCount;
  | 
      
      
         | 344 | 
          | 
          | 
          
  | 
      
      
         | 345 | 
          | 
          | 
             z2 = a2<<count;
  | 
      
      
         | 346 | 
          | 
          | 
             z1 = a1<<count;
  | 
      
      
         | 347 | 
          | 
          | 
             z0 = a0<<count;
  | 
      
      
         | 348 | 
          | 
          | 
             if ( 0 < count ) {
  | 
      
      
         | 349 | 
          | 
          | 
                 negCount = ( ( - count ) & 63 );
  | 
      
      
         | 350 | 
          | 
          | 
                 z1 |= a2>>negCount;
  | 
      
      
         | 351 | 
          | 
          | 
                 z0 |= a1>>negCount;
  | 
      
      
         | 352 | 
          | 
          | 
             }
  | 
      
      
         | 353 | 
          | 
          | 
             *z2Ptr = z2;
  | 
      
      
         | 354 | 
          | 
          | 
             *z1Ptr = z1;
  | 
      
      
         | 355 | 
          | 
          | 
             *z0Ptr = z0;
  | 
      
      
         | 356 | 
          | 
          | 
          
  | 
      
      
         | 357 | 
          | 
          | 
         }
  | 
      
      
         | 358 | 
          | 
          | 
         */
  | 
      
      
         | 359 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 360 | 
          | 
          | 
         | Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
  | 
      
      
         | 361 | 
          | 
          | 
         | value formed by concatenating `b0' and `b1'.  Addition is modulo 2^128, so
  | 
      
      
         | 362 | 
          | 
          | 
         | any carry out is lost.  The result is broken into two 64-bit pieces which
  | 
      
      
         | 363 | 
          | 
          | 
         | are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
  | 
      
      
         | 364 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 365 | 
          | 
          | 
          
  | 
      
      
         | 366 | 
          | 
          | 
         INLINE void
  | 
      
      
         | 367 | 
          | 
          | 
          add128(
  | 
      
      
         | 368 | 
          | 
          | 
              bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
  | 
      
      
         | 369 | 
          | 
          | 
         {
  | 
      
      
         | 370 | 
          | 
          | 
             bits64 z1;
  | 
      
      
         | 371 | 
          | 
          | 
          
  | 
      
      
         | 372 | 
          | 
          | 
             z1 = a1 + b1;
  | 
      
      
         | 373 | 
          | 
          | 
             *z1Ptr = z1;
  | 
      
      
         | 374 | 
          | 
          | 
             *z0Ptr = a0 + b0 + ( z1 < a1 );
  | 
      
      
         | 375 | 
          | 
          | 
          
  | 
      
      
         | 376 | 
          | 
          | 
         }
  | 
      
      
         | 377 | 
          | 
          | 
          
  | 
      
      
         | 378 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 379 | 
          | 
          | 
         | Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
  | 
      
      
         | 380 | 
          | 
          | 
         | 192-bit value formed by concatenating `b0', `b1', and `b2'.  Addition is
  | 
      
      
         | 381 | 
          | 
          | 
         | modulo 2^192, so any carry out is lost.  The result is broken into three
  | 
      
      
         | 382 | 
          | 
          | 
         | 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
  | 
      
      
         | 383 | 
          | 
          | 
         | `z1Ptr', and `z2Ptr'.
  | 
      
      
         | 384 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 385 | 
          | 
          | 
         // Not used - commenting out to stop werrors during compile
  | 
      
      
         | 386 | 
          | 
          | 
         /*
  | 
      
      
         | 387 | 
          | 
          | 
         INLINE void
  | 
      
      
         | 388 | 
          | 
          | 
          add192(
  | 
      
      
         | 389 | 
          | 
          | 
              bits64 a0,
  | 
      
      
         | 390 | 
          | 
          | 
              bits64 a1,
  | 
      
      
         | 391 | 
          | 
          | 
              bits64 a2,
  | 
      
      
         | 392 | 
          | 
          | 
              bits64 b0,
  | 
      
      
         | 393 | 
          | 
          | 
              bits64 b1,
  | 
      
      
         | 394 | 
          | 
          | 
              bits64 b2,
  | 
      
      
         | 395 | 
          | 
          | 
              bits64 *z0Ptr,
  | 
      
      
         | 396 | 
          | 
          | 
              bits64 *z1Ptr,
  | 
      
      
         | 397 | 
          | 
          | 
              bits64 *z2Ptr
  | 
      
      
         | 398 | 
          | 
          | 
          )
  | 
      
      
         | 399 | 
          | 
          | 
         {
  | 
      
      
         | 400 | 
          | 
          | 
             bits64 z0, z1, z2;
  | 
      
      
         | 401 | 
          | 
          | 
             int8 carry0, carry1;
  | 
      
      
         | 402 | 
          | 
          | 
          
  | 
      
      
         | 403 | 
          | 
          | 
             z2 = a2 + b2;
  | 
      
      
         | 404 | 
          | 
          | 
             carry1 = ( z2 < a2 );
  | 
      
      
         | 405 | 
          | 
          | 
             z1 = a1 + b1;
  | 
      
      
         | 406 | 
          | 
          | 
             carry0 = ( z1 < a1 );
  | 
      
      
         | 407 | 
          | 
          | 
             z0 = a0 + b0;
  | 
      
      
         | 408 | 
          | 
          | 
             z1 += carry1;
  | 
      
      
         | 409 | 
          | 
          | 
             z0 += ( z1 < carry1 );
  | 
      
      
         | 410 | 
          | 
          | 
             z0 += carry0;
  | 
      
      
         | 411 | 
          | 
          | 
             *z2Ptr = z2;
  | 
      
      
         | 412 | 
          | 
          | 
             *z1Ptr = z1;
  | 
      
      
         | 413 | 
          | 
          | 
             *z0Ptr = z0;
  | 
      
      
         | 414 | 
          | 
          | 
          
  | 
      
      
         | 415 | 
          | 
          | 
         }
  | 
      
      
         | 416 | 
          | 
          | 
         */
  | 
      
      
         | 417 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 418 | 
          | 
          | 
         | Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
  | 
      
      
         | 419 | 
          | 
          | 
         | 128-bit value formed by concatenating `a0' and `a1'.  Subtraction is modulo
  | 
      
      
         | 420 | 
          | 
          | 
         | 2^128, so any borrow out (carry out) is lost.  The result is broken into two
  | 
      
      
         | 421 | 
          | 
          | 
         | 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
  | 
      
      
         | 422 | 
          | 
          | 
         | `z1Ptr'.
  | 
      
      
         | 423 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 424 | 
          | 
          | 
          
  | 
      
      
         | 425 | 
          | 
          | 
         INLINE void
  | 
      
      
         | 426 | 
          | 
          | 
          sub128(
  | 
      
      
         | 427 | 
          | 
          | 
              bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
  | 
      
      
         | 428 | 
          | 
          | 
         {
  | 
      
      
         | 429 | 
          | 
          | 
          
  | 
      
      
         | 430 | 
          | 
          | 
             *z1Ptr = a1 - b1;
  | 
      
      
         | 431 | 
          | 
          | 
             *z0Ptr = a0 - b0 - ( a1 < b1 );
  | 
      
      
         | 432 | 
          | 
          | 
          
  | 
      
      
         | 433 | 
          | 
          | 
         }
  | 
      
      
         | 434 | 
          | 
          | 
          
  | 
      
      
         | 435 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 436 | 
          | 
          | 
         | Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
  | 
      
      
         | 437 | 
          | 
          | 
         | from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
  | 
      
      
         | 438 | 
          | 
          | 
         | Subtraction is modulo 2^192, so any borrow out (carry out) is lost.  The
  | 
      
      
         | 439 | 
          | 
          | 
         | result is broken into three 64-bit pieces which are stored at the locations
  | 
      
      
         | 440 | 
          | 
          | 
         | pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
  | 
      
      
         | 441 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 442 | 
          | 
          | 
         // Not used - commenting out to stop werrors during compile
  | 
      
      
         | 443 | 
          | 
          | 
         /*
  | 
      
      
         | 444 | 
          | 
          | 
         INLINE void
  | 
      
      
         | 445 | 
          | 
          | 
          sub192(
  | 
      
      
         | 446 | 
          | 
          | 
              bits64 a0,
  | 
      
      
         | 447 | 
          | 
          | 
              bits64 a1,
  | 
      
      
         | 448 | 
          | 
          | 
              bits64 a2,
  | 
      
      
         | 449 | 
          | 
          | 
              bits64 b0,
  | 
      
      
         | 450 | 
          | 
          | 
              bits64 b1,
  | 
      
      
         | 451 | 
          | 
          | 
              bits64 b2,
  | 
      
      
         | 452 | 
          | 
          | 
              bits64 *z0Ptr,
  | 
      
      
         | 453 | 
          | 
          | 
              bits64 *z1Ptr,
  | 
      
      
         | 454 | 
          | 
          | 
              bits64 *z2Ptr
  | 
      
      
         | 455 | 
          | 
          | 
          )
  | 
      
      
         | 456 | 
          | 
          | 
         {
  | 
      
      
         | 457 | 
          | 
          | 
             bits64 z0, z1, z2;
  | 
      
      
         | 458 | 
          | 
          | 
             int8 borrow0, borrow1;
  | 
      
      
         | 459 | 
          | 
          | 
          
  | 
      
      
         | 460 | 
          | 
          | 
             z2 = a2 - b2;
  | 
      
      
         | 461 | 
          | 
          | 
             borrow1 = ( a2 < b2 );
  | 
      
      
         | 462 | 
          | 
          | 
             z1 = a1 - b1;
  | 
      
      
         | 463 | 
          | 
          | 
             borrow0 = ( a1 < b1 );
  | 
      
      
         | 464 | 
          | 
          | 
             z0 = a0 - b0;
  | 
      
      
         | 465 | 
          | 
          | 
             z0 -= ( z1 < borrow1 );
  | 
      
      
         | 466 | 
          | 
          | 
             z1 -= borrow1;
  | 
      
      
         | 467 | 
          | 
          | 
             z0 -= borrow0;
  | 
      
      
         | 468 | 
          | 
          | 
             *z2Ptr = z2;
  | 
      
      
         | 469 | 
          | 
          | 
             *z1Ptr = z1;
  | 
      
      
         | 470 | 
          | 
          | 
             *z0Ptr = z0;
  | 
      
      
         | 471 | 
          | 
          | 
          
  | 
      
      
         | 472 | 
          | 
          | 
         }
  | 
      
      
         | 473 | 
          | 
          | 
         */
  | 
      
      
         | 474 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 475 | 
          | 
          | 
         | Multiplies `a' by `b' to obtain a 128-bit product.  The product is broken
  | 
      
      
         | 476 | 
          | 
          | 
         | into two 64-bit pieces which are stored at the locations pointed to by
  | 
      
      
         | 477 | 
          | 
          | 
         | `z0Ptr' and `z1Ptr'.
  | 
      
      
         | 478 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 479 | 
          | 
          | 
          
  | 
      
      
         | 480 | 
          | 
          | 
         INLINE void
  | 
      
      
         | 481 | 
          | 
          | 
         mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr )
  | 
      
      
         | 482 | 
          | 
          | 
         {
  | 
      
      
         | 483 | 
          | 
          | 
             bits32 aHigh, aLow, bHigh, bLow;
  | 
      
      
         | 484 | 
          | 
          | 
             bits64 z0, zMiddleA, zMiddleB, z1;
  | 
      
      
         | 485 | 
          | 
          | 
          
  | 
      
      
         | 486 | 
          | 
          | 
             aLow = a;
  | 
      
      
         | 487 | 
          | 
          | 
             aHigh = a>>32;
  | 
      
      
         | 488 | 
          | 
          | 
             bLow = b;
  | 
      
      
         | 489 | 
          | 
          | 
             bHigh = b>>32;
  | 
      
      
         | 490 | 
          | 
          | 
             z1 = ( (bits64) aLow ) * bLow;
  | 
      
      
         | 491 | 
          | 
          | 
             zMiddleA = ( (bits64) aLow ) * bHigh;
  | 
      
      
         | 492 | 
          | 
          | 
             zMiddleB = ( (bits64) aHigh ) * bLow;
  | 
      
      
         | 493 | 
          | 
          | 
             z0 = ( (bits64) aHigh ) * bHigh;
  | 
      
      
         | 494 | 
          | 
          | 
             zMiddleA += zMiddleB;
  | 
      
      
         | 495 | 
          | 
          | 
             z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
  | 
      
      
         | 496 | 
          | 
          | 
             zMiddleA <<= 32;
  | 
      
      
         | 497 | 
          | 
          | 
             z1 += zMiddleA;
  | 
      
      
         | 498 | 
          | 
          | 
             z0 += ( z1 < zMiddleA );
  | 
      
      
         | 499 | 
          | 
          | 
             *z1Ptr = z1;
  | 
      
      
         | 500 | 
          | 
          | 
             *z0Ptr = z0;
  | 
      
      
         | 501 | 
          | 
          | 
          
  | 
      
      
         | 502 | 
          | 
          | 
         }
  | 
      
      
         | 503 | 
          | 
          | 
          
  | 
      
      
         | 504 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 505 | 
          | 
          | 
         | Multiplies the 128-bit value formed by concatenating `a0' and `a1' by
  | 
      
      
         | 506 | 
          | 
          | 
         | `b' to obtain a 192-bit product.  The product is broken into three 64-bit
  | 
      
      
         | 507 | 
          | 
          | 
         | pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
  | 
      
      
         | 508 | 
          | 
          | 
         | `z2Ptr'.
  | 
      
      
         | 509 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 510 | 
          | 
          | 
         // Not used - commenting out to stop werrors during compile
  | 
      
      
         | 511 | 
          | 
          | 
         /*
  | 
      
      
         | 512 | 
          | 
          | 
         INLINE void
  | 
      
      
         | 513 | 
          | 
          | 
          mul128By64To192(
  | 
      
      
         | 514 | 
          | 
          | 
              bits64 a0,
  | 
      
      
         | 515 | 
          | 
          | 
              bits64 a1,
  | 
      
      
         | 516 | 
          | 
          | 
              bits64 b,
  | 
      
      
         | 517 | 
          | 
          | 
              bits64 *z0Ptr,
  | 
      
      
         | 518 | 
          | 
          | 
              bits64 *z1Ptr,
  | 
      
      
         | 519 | 
          | 
          | 
              bits64 *z2Ptr
  | 
      
      
         | 520 | 
          | 
          | 
          )
  | 
      
      
         | 521 | 
          | 
          | 
         {
  | 
      
      
         | 522 | 
          | 
          | 
             bits64 z0, z1, z2, more1;
  | 
      
      
         | 523 | 
          | 
          | 
          
  | 
      
      
         | 524 | 
          | 
          | 
             mul64To128( a1, b, &z1, &z2 );
  | 
      
      
         | 525 | 
          | 
          | 
             mul64To128( a0, b, &z0, &more1 );
  | 
      
      
         | 526 | 
          | 
          | 
             add128( z0, more1, 0, z1, &z0, &z1 );
  | 
      
      
         | 527 | 
          | 
          | 
             *z2Ptr = z2;
  | 
      
      
         | 528 | 
          | 
          | 
             *z1Ptr = z1;
  | 
      
      
         | 529 | 
          | 
          | 
             *z0Ptr = z0;
  | 
      
      
         | 530 | 
          | 
          | 
          
  | 
      
      
         | 531 | 
          | 
          | 
         }
  | 
      
      
         | 532 | 
          | 
          | 
         */
  | 
      
      
         | 533 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 534 | 
          | 
          | 
         | Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
  | 
      
      
         | 535 | 
          | 
          | 
         | 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
  | 
      
      
         | 536 | 
          | 
          | 
         | product.  The product is broken into four 64-bit pieces which are stored at
  | 
      
      
         | 537 | 
          | 
          | 
         | the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
  | 
      
      
         | 538 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 539 | 
          | 
          | 
          // Not used - commenting out to stop werrors during compile
  | 
      
      
         | 540 | 
          | 
          | 
          /*
  | 
      
      
         | 541 | 
          | 
          | 
         INLINE void
  | 
      
      
         | 542 | 
          | 
          | 
          mul128To256(
  | 
      
      
         | 543 | 
          | 
          | 
              bits64 a0,
  | 
      
      
         | 544 | 
          | 
          | 
              bits64 a1,
  | 
      
      
         | 545 | 
          | 
          | 
              bits64 b0,
  | 
      
      
         | 546 | 
          | 
          | 
              bits64 b1,
  | 
      
      
         | 547 | 
          | 
          | 
              bits64 *z0Ptr,
  | 
      
      
         | 548 | 
          | 
          | 
              bits64 *z1Ptr,
  | 
      
      
         | 549 | 
          | 
          | 
              bits64 *z2Ptr,
  | 
      
      
         | 550 | 
          | 
          | 
              bits64 *z3Ptr
  | 
      
      
         | 551 | 
          | 
          | 
          )
  | 
      
      
         | 552 | 
          | 
          | 
         {
  | 
      
      
         | 553 | 
          | 
          | 
             bits64 z0, z1, z2, z3;
  | 
      
      
         | 554 | 
          | 
          | 
             bits64 more1, more2;
  | 
      
      
         | 555 | 
          | 
          | 
          
  | 
      
      
         | 556 | 
          | 
          | 
             mul64To128( a1, b1, &z2, &z3 );
  | 
      
      
         | 557 | 
          | 
          | 
             mul64To128( a1, b0, &z1, &more2 );
  | 
      
      
         | 558 | 
          | 
          | 
             add128( z1, more2, 0, z2, &z1, &z2 );
  | 
      
      
         | 559 | 
          | 
          | 
             mul64To128( a0, b0, &z0, &more1 );
  | 
      
      
         | 560 | 
          | 
          | 
             add128( z0, more1, 0, z1, &z0, &z1 );
  | 
      
      
         | 561 | 
          | 
          | 
             mul64To128( a0, b1, &more1, &more2 );
  | 
      
      
         | 562 | 
          | 
          | 
             add128( more1, more2, 0, z2, &more1, &z2 );
  | 
      
      
         | 563 | 
          | 
          | 
             add128( z0, z1, 0, more1, &z0, &z1 );
  | 
      
      
         | 564 | 
          | 
          | 
             *z3Ptr = z3;
  | 
      
      
         | 565 | 
          | 
          | 
             *z2Ptr = z2;
  | 
      
      
         | 566 | 
          | 
          | 
             *z1Ptr = z1;
  | 
      
      
         | 567 | 
          | 
          | 
             *z0Ptr = z0;
  | 
      
      
         | 568 | 
          | 
          | 
          
  | 
      
      
         | 569 | 
          | 
          | 
         }
  | 
      
      
         | 570 | 
          | 
          | 
          */
  | 
      
      
         | 571 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 572 | 
          | 
          | 
         | Returns an approximation to the 64-bit integer quotient obtained by dividing
  | 
      
      
         | 573 | 
          | 
          | 
         | `b' into the 128-bit value formed by concatenating `a0' and `a1'.  The
  | 
      
      
         | 574 | 
          | 
          | 
         | divisor `b' must be at least 2^63.  If q is the exact quotient truncated
  | 
      
      
         | 575 | 
          | 
          | 
         | toward zero, the approximation returned lies between q and q + 2 inclusive.
  | 
      
      
         | 576 | 
          | 
          | 
         | If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
  | 
      
      
         | 577 | 
          | 
          | 
         | unsigned integer is returned.
  | 
      
      
         | 578 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 579 | 
          | 
          | 
          
  | 
      
      
         | 580 | 
          | 
          | 
         static bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b )
  | 
      
      
         | 581 | 
          | 
          | 
         {
  | 
      
      
         | 582 | 
          | 
          | 
             bits64 b0, b1;
  | 
      
      
         | 583 | 
          | 
          | 
             bits64 rem0, rem1, term0, term1;
  | 
      
      
         | 584 | 
          | 
          | 
             bits64 z;
  | 
      
      
         | 585 | 
          | 
          | 
          
  | 
      
      
         | 586 | 
          | 
          | 
             if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
  | 
      
      
         | 587 | 
          | 
          | 
             b0 = b>>32;
  | 
      
      
         | 588 | 
          | 
          | 
             z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32;
  | 
      
      
         | 589 | 
          | 
          | 
             mul64To128( b, z, &term0, &term1 );
  | 
      
      
         | 590 | 
          | 
          | 
             sub128( a0, a1, term0, term1, &rem0, &rem1 );
  | 
      
      
         | 591 | 
          | 
          | 
             while ( ( (sbits64) rem0 ) < 0 ) {
  | 
      
      
         | 592 | 
          | 
          | 
                 z -= LIT64( 0x100000000 );
  | 
      
      
         | 593 | 
          | 
          | 
                 b1 = b<<32;
  | 
      
      
         | 594 | 
          | 
          | 
                 add128( rem0, rem1, b0, b1, &rem0, &rem1 );
  | 
      
      
         | 595 | 
          | 
          | 
             }
  | 
      
      
         | 596 | 
          | 
          | 
             rem0 = ( rem0<<32 ) | ( rem1>>32 );
  | 
      
      
         | 597 | 
          | 
          | 
             z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0;
  | 
      
      
         | 598 | 
          | 
          | 
             return z;
  | 
      
      
         | 599 | 
          | 
          | 
          
  | 
      
      
         | 600 | 
          | 
          | 
         }
  | 
      
      
         | 601 | 
          | 
          | 
          
  | 
      
      
         | 602 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 603 | 
          | 
          | 
         | Returns an approximation to the square root of the 32-bit significand given
  | 
      
      
         | 604 | 
          | 
          | 
         | by `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of
  | 
      
      
         | 605 | 
          | 
          | 
         | `aExp' (the least significant bit) is 1, the integer returned approximates
  | 
      
      
         | 606 | 
          | 
          | 
         | 2^31*sqrt(`a'/2^31), where `a' is considered an integer.  If bit 0 of `aExp'
  | 
      
      
         | 607 | 
          | 
          | 
         | is 0, the integer returned approximates 2^31*sqrt(`a'/2^30).  In either
  | 
      
      
         | 608 | 
          | 
          | 
         | case, the approximation returned lies strictly within +/-2 of the exact
  | 
      
      
         | 609 | 
          | 
          | 
         | value.
  | 
      
      
         | 610 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 611 | 
          | 
          | 
          
  | 
      
      
         | 612 | 
          | 
          | 
         static bits32 estimateSqrt32( int16 aExp, bits32 a )
  | 
      
      
         | 613 | 
          | 
          | 
         {
  | 
      
      
         | 614 | 
          | 
          | 
             static const bits16 sqrtOddAdjustments[] = {
  | 
      
      
         | 615 | 
          | 
          | 
                 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
  | 
      
      
         | 616 | 
          | 
          | 
                 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
  | 
      
      
         | 617 | 
          | 
          | 
             };
  | 
      
      
         | 618 | 
          | 
          | 
             static const bits16 sqrtEvenAdjustments[] = {
  | 
      
      
         | 619 | 
          | 
          | 
                 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
  | 
      
      
         | 620 | 
          | 
          | 
                 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
  | 
      
      
         | 621 | 
          | 
          | 
             };
  | 
      
      
         | 622 | 
          | 
          | 
             int8 index;
  | 
      
      
         | 623 | 
          | 
          | 
             bits32 z;
  | 
      
      
         | 624 | 
          | 
          | 
          
  | 
      
      
         | 625 | 
          | 
          | 
             index = ( a>>27 ) & 15;
  | 
      
      
         | 626 | 
          | 
          | 
             if ( aExp & 1 ) {
  | 
      
      
         | 627 | 
          | 
          | 
                 z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ index ];
  | 
      
      
         | 628 | 
          | 
          | 
                 z = ( ( a / z )<<14 ) + ( z<<15 );
  | 
      
      
         | 629 | 
          | 
          | 
                 a >>= 1;
  | 
      
      
         | 630 | 
          | 
          | 
             }
  | 
      
      
         | 631 | 
          | 
          | 
             else {
  | 
      
      
         | 632 | 
          | 
          | 
                 z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ];
  | 
      
      
         | 633 | 
          | 
          | 
                 z = a / z + z;
  | 
      
      
         | 634 | 
          | 
          | 
                 z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
  | 
      
      
         | 635 | 
          | 
          | 
                 if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 );
  | 
      
      
         | 636 | 
          | 
          | 
             }
  | 
      
      
         | 637 | 
          | 
          | 
             return ( (bits32) ( ( ( (bits64) a )<<31 ) / z ) ) + ( z>>1 );
  | 
      
      
         | 638 | 
          | 
          | 
          
  | 
      
      
         | 639 | 
          | 
          | 
         }
  | 
      
      
         | 640 | 
          | 
          | 
          
  | 
      
      
         | 641 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 642 | 
          | 
          | 
         | Returns the number of leading 0 bits before the most-significant 1 bit of
  | 
      
      
         | 643 | 
          | 
          | 
         | `a'.  If `a' is zero, 32 is returned.
  | 
      
      
         | 644 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 645 | 
          | 
          | 
          
  | 
      
      
         | 646 | 
          | 
          | 
         static int8 countLeadingZeros32( bits32 a )
  | 
      
      
         | 647 | 
          | 
          | 
         {
  | 
      
      
         | 648 | 
          | 
          | 
             static const int8 countLeadingZerosHigh[] = {
  | 
      
      
         | 649 | 
          | 
          | 
                 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
  | 
      
      
         | 650 | 
          | 
          | 
                 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
  | 
      
      
         | 651 | 
          | 
          | 
                 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  | 
      
      
         | 652 | 
          | 
          | 
                 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  | 
      
      
         | 653 | 
          | 
          | 
                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  | 
      
      
         | 654 | 
          | 
          | 
                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  | 
      
      
         | 655 | 
          | 
          | 
                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  | 
      
      
         | 656 | 
          | 
          | 
                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  | 
      
      
         | 657 | 
          | 
          | 
                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  | 
      
      
         | 658 | 
          | 
          | 
                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  | 
      
      
         | 659 | 
          | 
          | 
                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  | 
      
      
         | 660 | 
          | 
          | 
                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  | 
      
      
         | 661 | 
          | 
          | 
                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  | 
      
      
         | 662 | 
          | 
          | 
                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  | 
      
      
         | 663 | 
          | 
          | 
                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  | 
      
      
         | 664 | 
          | 
          | 
                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
  | 
      
      
         | 665 | 
          | 
          | 
             };
  | 
      
      
         | 666 | 
          | 
          | 
             int8 shiftCount;
  | 
      
      
         | 667 | 
          | 
          | 
          
  | 
      
      
         | 668 | 
          | 
          | 
             shiftCount = 0;
  | 
      
      
         | 669 | 
          | 
          | 
             if ( a < 0x10000 ) {
  | 
      
      
         | 670 | 
          | 
          | 
                 shiftCount += 16;
  | 
      
      
         | 671 | 
          | 
          | 
                 a <<= 16;
  | 
      
      
         | 672 | 
          | 
          | 
             }
  | 
      
      
         | 673 | 
          | 
          | 
             if ( a < 0x1000000 ) {
  | 
      
      
         | 674 | 
          | 
          | 
                 shiftCount += 8;
  | 
      
      
         | 675 | 
          | 
          | 
                 a <<= 8;
  | 
      
      
         | 676 | 
          | 
          | 
             }
  | 
      
      
         | 677 | 
          | 
          | 
             shiftCount += countLeadingZerosHigh[ a>>24 ];
  | 
      
      
         | 678 | 
          | 
          | 
             return shiftCount;
  | 
      
      
         | 679 | 
          | 
          | 
          
  | 
      
      
         | 680 | 
          | 
          | 
         }
  | 
      
      
         | 681 | 
          | 
          | 
          
  | 
      
      
         | 682 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 683 | 
          | 
          | 
         | Returns the number of leading 0 bits before the most-significant 1 bit of
  | 
      
      
         | 684 | 
          | 
          | 
         | `a'.  If `a' is zero, 64 is returned.
  | 
      
      
         | 685 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 686 | 
          | 
          | 
          
  | 
      
      
         | 687 | 
          | 
          | 
         static int8 countLeadingZeros64( bits64 a )
  | 
      
      
         | 688 | 
          | 
          | 
         {
  | 
      
      
         | 689 | 
          | 
          | 
             int8 shiftCount;
  | 
      
      
         | 690 | 
          | 
          | 
          
  | 
      
      
         | 691 | 
          | 
          | 
             shiftCount = 0;
  | 
      
      
         | 692 | 
          | 
          | 
             if ( a < ( (bits64) 1 )<<32 ) {
  | 
      
      
         | 693 | 
          | 
          | 
                 shiftCount += 32;
  | 
      
      
         | 694 | 
          | 
          | 
             }
  | 
      
      
         | 695 | 
          | 
          | 
             else {
  | 
      
      
         | 696 | 
          | 
          | 
                 a >>= 32;
  | 
      
      
         | 697 | 
          | 
          | 
             }
  | 
      
      
         | 698 | 
          | 
          | 
             shiftCount += countLeadingZeros32( a );
  | 
      
      
         | 699 | 
          | 
          | 
             return shiftCount;
  | 
      
      
         | 700 | 
          | 
          | 
          
  | 
      
      
         | 701 | 
          | 
          | 
         }
  | 
      
      
         | 702 | 
          | 
          | 
          
  | 
      
      
         | 703 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 704 | 
          | 
          | 
         | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
  | 
      
      
         | 705 | 
          | 
          | 
         | is equal to the 128-bit value formed by concatenating `b0' and `b1'.
  | 
      
      
         | 706 | 
          | 
          | 
         | Otherwise, returns 0.
  | 
      
      
         | 707 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 708 | 
          | 
          | 
         // Not used - commenting out to stop werrors during compile
  | 
      
      
         | 709 | 
          | 
          | 
         /*
  | 
      
      
         | 710 | 
          | 
          | 
         INLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
  | 
      
      
         | 711 | 
          | 
          | 
         {
  | 
      
      
         | 712 | 
          | 
          | 
          
  | 
      
      
         | 713 | 
          | 
          | 
             return ( a0 == b0 ) && ( a1 == b1 );
  | 
      
      
         | 714 | 
          | 
          | 
          
  | 
      
      
         | 715 | 
          | 
          | 
         }
  | 
      
      
         | 716 | 
          | 
          | 
         */
  | 
      
      
         | 717 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 718 | 
          | 
          | 
         | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
  | 
      
      
         | 719 | 
          | 
          | 
         | than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
  | 
      
      
         | 720 | 
          | 
          | 
         | Otherwise, returns 0.
  | 
      
      
         | 721 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 722 | 
          | 
          | 
          // Not used - commenting out to stop werrors during compile
  | 
      
      
         | 723 | 
          | 
          | 
          /*
  | 
      
      
         | 724 | 
          | 
          | 
         INLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
  | 
      
      
         | 725 | 
          | 
          | 
         {
  | 
      
      
         | 726 | 
          | 
          | 
          
  | 
      
      
         | 727 | 
          | 
          | 
             return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
  | 
      
      
         | 728 | 
          | 
          | 
          
  | 
      
      
         | 729 | 
          | 
          | 
         }
  | 
      
      
         | 730 | 
          | 
          | 
          */
  | 
      
      
         | 731 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 732 | 
          | 
          | 
         | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
  | 
      
      
         | 733 | 
          | 
          | 
         | than the 128-bit value formed by concatenating `b0' and `b1'.  Otherwise,
  | 
      
      
         | 734 | 
          | 
          | 
         | returns 0.
  | 
      
      
         | 735 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 736 | 
          | 
          | 
           // Not used - commenting out to stop werrors during compile
  | 
      
      
         | 737 | 
          | 
          | 
           /*
  | 
      
      
         | 738 | 
          | 
          | 
         INLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
  | 
      
      
         | 739 | 
          | 
          | 
         {
  | 
      
      
         | 740 | 
          | 
          | 
          
  | 
      
      
         | 741 | 
          | 
          | 
             return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
  | 
      
      
         | 742 | 
          | 
          | 
          
  | 
      
      
         | 743 | 
          | 
          | 
         }
  | 
      
      
         | 744 | 
          | 
          | 
           */
  | 
      
      
         | 745 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 746 | 
          | 
          | 
         | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
  | 
      
      
         | 747 | 
          | 
          | 
         | not equal to the 128-bit value formed by concatenating `b0' and `b1'.
  | 
      
      
         | 748 | 
          | 
          | 
         | Otherwise, returns 0.
  | 
      
      
         | 749 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 750 | 
          | 
          | 
            // Not used - commenting out to stop werrors during compile
  | 
      
      
         | 751 | 
          | 
          | 
            /*
  | 
      
      
         | 752 | 
          | 
          | 
         INLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
  | 
      
      
         | 753 | 
          | 
          | 
         {
  | 
      
      
         | 754 | 
          | 
          | 
          
  | 
      
      
         | 755 | 
          | 
          | 
             return ( a0 != b0 ) || ( a1 != b1 );
  | 
      
      
         | 756 | 
          | 
          | 
          
  | 
      
      
         | 757 | 
          | 
          | 
         }
  | 
      
      
         | 758 | 
          | 
          | 
            */
  | 
      
      
         | 759 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 760 | 
          | 
          | 
         | Functions and definitions to determine:  (1) whether tininess for underflow
  | 
      
      
         | 761 | 
          | 
          | 
         | is detected before or after rounding by default, (2) what (if anything)
  | 
      
      
         | 762 | 
          | 
          | 
         | happens when exceptions are raised, (3) how signaling NaNs are distinguished
  | 
      
      
         | 763 | 
          | 
          | 
         | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
  | 
      
      
         | 764 | 
          | 
          | 
         | are propagated from function inputs to output.  These details are target-
  | 
      
      
         | 765 | 
          | 
          | 
         | specific.
  | 
      
      
         | 766 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 767 | 
          | 
          | 
         //#include "softfloat-specialize"
  | 
      
      
         | 768 | 
          | 
          | 
          
  | 
      
      
         | 769 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 770 | 
          | 
          | 
         | Underflow tininess-detection mode, statically initialized to default value.
  | 
      
      
         | 771 | 
          | 
          | 
         | (The declaration in `softfloat.h' must match the `int8' type here.)
  | 
      
      
         | 772 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 773 | 
          | 
          | 
         int8 float_detect_tininess = float_tininess_after_rounding;
  | 
      
      
         | 774 | 
          | 
          | 
          
  | 
      
      
         | 775 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 776 | 
          | 
          | 
         | Raises the exceptions specified by `flags'.  Floating-point traps can be
  | 
      
      
         | 777 | 
          | 
          | 
         | defined here if desired.  It is currently not possible for such a trap
  | 
      
      
         | 778 | 
          | 
          | 
         | to substitute a result value.  If traps are not implemented, this routine
  | 
      
      
         | 779 | 
          | 
          | 
         | should be simply `float_exception_flags |= flags;'.
  | 
      
      
         | 780 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 781 | 
          | 
          | 
          
  | 
      
      
         | 782 | 
          | 
          | 
         void float_raise( int8 flags )
  | 
      
      
         | 783 | 
          | 
          | 
         {
  | 
      
      
         | 784 | 
          | 
          | 
          
  | 
      
      
         | 785 | 
          | 
          | 
             float_exception_flags |= flags;
  | 
      
      
         | 786 | 
          | 
          | 
          
  | 
      
      
         | 787 | 
          | 
          | 
         }
  | 
      
      
         | 788 | 
          | 
          | 
          
  | 
      
      
         | 789 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 790 | 
          | 
          | 
         | Internal canonical NaN format.
  | 
      
      
         | 791 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 792 | 
          | 
          | 
         typedef struct {
  | 
      
      
         | 793 | 
          | 
          | 
             flag sign;
  | 
      
      
         | 794 | 
          | 
          | 
             bits64 high, low;
  | 
      
      
         | 795 | 
          | 
          | 
         } commonNaNT;
  | 
      
      
         | 796 | 
          | 
          | 
          
  | 
      
      
         | 797 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 798 | 
          | 
          | 
         | The pattern for a default generated single-precision NaN.
  | 
      
      
         | 799 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 800 | 
          | 
          | 
         #define float32_default_nan 0xFFC00000
  | 
      
      
         | 801 | 
          | 
          | 
          
  | 
      
      
         | 802 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 803 | 
          | 
          | 
         | Returns 1 if the single-precision floating-point value `a' is a NaN;
  | 
      
      
         | 804 | 
          | 
          | 
         | otherwise returns 0.
  | 
      
      
         | 805 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 806 | 
          | 
          | 
          
  | 
      
      
         | 807 | 
          | 
          | 
         flag float32_is_nan( float32 a )
  | 
      
      
         | 808 | 
          | 
          | 
         {
  | 
      
      
         | 809 | 
          | 
          | 
          
  | 
      
      
         | 810 | 
          | 
          | 
             return ( 0xFF000000 < (bits32) ( a<<1 ) );
  | 
      
      
         | 811 | 
          | 
          | 
          
  | 
      
      
         | 812 | 
          | 
          | 
         }
  | 
      
      
         | 813 | 
          | 
          | 
          
  | 
      
      
         | 814 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 815 | 
          | 
          | 
         | Returns 1 if the single-precision floating-point value `a' is a signaling
  | 
      
      
         | 816 | 
          | 
          | 
         | NaN; otherwise returns 0.
  | 
      
      
         | 817 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 818 | 
          | 
          | 
          
  | 
      
      
         | 819 | 
          | 
          | 
         flag float32_is_signaling_nan( float32 a )
  | 
      
      
         | 820 | 
          | 
          | 
         {
  | 
      
      
         | 821 | 
          | 
          | 
          
  | 
      
      
         | 822 | 
          | 
          | 
             return ( ( ( a>>22 ) & 0x1FF ) == 0x1FE ) && ( a & 0x003FFFFF );
  | 
      
      
         | 823 | 
          | 
          | 
          
  | 
      
      
         | 824 | 
          | 
          | 
         }
  | 
      
      
         | 825 | 
          | 
          | 
          
  | 
      
      
         | 826 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 827 | 
          | 
          | 
         | Returns the result of converting the single-precision floating-point NaN
  | 
      
      
         | 828 | 
          | 
          | 
         | `a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
  | 
      
      
         | 829 | 
          | 
          | 
         | exception is raised.
  | 
      
      
         | 830 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 831 | 
          | 
          | 
          
  | 
      
      
         | 832 | 
          | 
          | 
         static commonNaNT float32ToCommonNaN( float32 a )
  | 
      
      
         | 833 | 
          | 
          | 
         {
  | 
      
      
         | 834 | 
          | 
          | 
             commonNaNT z;
  | 
      
      
         | 835 | 
          | 
          | 
          
  | 
      
      
         | 836 | 
          | 
          | 
             if ( float32_is_signaling_nan( a ) ) float_raise( float_flag_invalid );
  | 
      
      
         | 837 | 
          | 
          | 
             z.sign = a>>31;
  | 
      
      
         | 838 | 
          | 
          | 
             z.low = 0;
  | 
      
      
         | 839 | 
          | 
          | 
             z.high = ( (bits64) a )<<41;
  | 
      
      
         | 840 | 
          | 
          | 
             return z;
  | 
      
      
         | 841 | 
          | 
          | 
          
  | 
      
      
         | 842 | 
          | 
          | 
         }
  | 
      
      
         | 843 | 
          | 
          | 
          
  | 
      
      
         | 844 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 845 | 
          | 
          | 
         | Returns the result of converting the canonical NaN `a' to the single-
  | 
      
      
         | 846 | 
          | 
          | 
         | precision floating-point format.
  | 
      
      
         | 847 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 848 | 
          | 
          | 
          
  | 
      
      
         | 849 | 
          | 
          | 
         static float32 commonNaNToFloat32( commonNaNT a )
  | 
      
      
         | 850 | 
          | 
          | 
         {
  | 
      
      
         | 851 | 
          | 
          | 
          
  | 
      
      
         | 852 | 
          | 
          | 
             return ( ( (bits32) a.sign )<<31 ) | 0x7FC00000 | ( a.high>>41 );
  | 
      
      
         | 853 | 
          | 
          | 
          
  | 
      
      
         | 854 | 
          | 
          | 
         }
  | 
      
      
         | 855 | 
          | 
          | 
          
  | 
      
      
         | 856 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 857 | 
          | 
          | 
         | Takes two single-precision floating-point values `a' and `b', one of which
  | 
      
      
         | 858 | 
          | 
          | 
         | is a NaN, and returns the appropriate NaN result.  If either `a' or `b' is a
  | 
      
      
         | 859 | 
          | 
          | 
         | signaling NaN, the invalid exception is raised.
  | 
      
      
         | 860 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 861 | 
          | 
          | 
          
  | 
      
      
         | 862 | 
          | 
          | 
         static float32 propagateFloat32NaN( float32 a, float32 b )
  | 
      
      
         | 863 | 
          | 
          | 
         {
  | 
      
      
         | 864 | 
          | 
          | 
             flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
  | 
      
      
         | 865 | 
          | 
          | 
          
  | 
      
      
         | 866 | 
          | 
          | 
             aIsNaN = float32_is_nan( a );
  | 
      
      
         | 867 | 
          | 
          | 
             aIsSignalingNaN = float32_is_signaling_nan( a );
  | 
      
      
         | 868 | 
          | 
          | 
             bIsNaN = float32_is_nan( b );
  | 
      
      
         | 869 | 
          | 
          | 
             bIsSignalingNaN = float32_is_signaling_nan( b );
  | 
      
      
         | 870 | 
          | 
          | 
             a |= 0x00400000;
  | 
      
      
         | 871 | 
          | 
          | 
             b |= 0x00400000;
  | 
      
      
         | 872 | 
          | 
          | 
             if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid );
  | 
      
      
         | 873 | 
          | 
          | 
             if ( aIsSignalingNaN ) {
  | 
      
      
         | 874 | 
          | 
          | 
                 if ( bIsSignalingNaN ) goto returnLargerSignificand;
  | 
      
      
         | 875 | 
          | 
          | 
                 return bIsNaN ? b : a;
  | 
      
      
         | 876 | 
          | 
          | 
             }
  | 
      
      
         | 877 | 
          | 
          | 
             else if ( aIsNaN ) {
  | 
      
      
         | 878 | 
          | 
          | 
                 if ( bIsSignalingNaN | ! bIsNaN ) return a;
  | 
      
      
         | 879 | 
          | 
          | 
          returnLargerSignificand:
  | 
      
      
         | 880 | 
          | 
          | 
                 if ( (bits32) ( a<<1 ) < (bits32) ( b<<1 ) ) return b;
  | 
      
      
         | 881 | 
          | 
          | 
                 if ( (bits32) ( b<<1 ) < (bits32) ( a<<1 ) ) return a;
  | 
      
      
         | 882 | 
          | 
          | 
                 return ( a < b ) ? a : b;
  | 
      
      
         | 883 | 
          | 
          | 
             }
  | 
      
      
         | 884 | 
          | 
          | 
             else {
  | 
      
      
         | 885 | 
          | 
          | 
                 return b;
  | 
      
      
         | 886 | 
          | 
          | 
             }
  | 
      
      
         | 887 | 
          | 
          | 
          
  | 
      
      
         | 888 | 
          | 
          | 
         }
  | 
      
      
         | 889 | 
          | 
          | 
          
  | 
      
      
         | 890 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 891 | 
          | 
          | 
         | The pattern for a default generated double-precision NaN.
  | 
      
      
         | 892 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 893 | 
          | 
          | 
         #define float64_default_nan LIT64( 0xFFF8000000000000 )
  | 
      
      
         | 894 | 
          | 
          | 
          
  | 
      
      
         | 895 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 896 | 
          | 
          | 
         | Returns 1 if the double-precision floating-point value `a' is a NaN;
  | 
      
      
         | 897 | 
          | 
          | 
         | otherwise returns 0.
  | 
      
      
         | 898 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 899 | 
          | 
          | 
          
  | 
      
      
         | 900 | 
          | 
          | 
         flag float64_is_nan( float64 a )
  | 
      
      
         | 901 | 
          | 
          | 
         {
  | 
      
      
         | 902 | 
          | 
          | 
          
  | 
      
      
         | 903 | 
          | 
          | 
             return ( LIT64( 0xFFE0000000000000 ) < (bits64) ( a<<1 ) );
  | 
      
      
         | 904 | 
          | 
          | 
          
  | 
      
      
         | 905 | 
          | 
          | 
         }
  | 
      
      
         | 906 | 
          | 
          | 
          
  | 
      
      
         | 907 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 908 | 
          | 
          | 
         | Returns 1 if the double-precision floating-point value `a' is a signaling
  | 
      
      
         | 909 | 
          | 
          | 
         | NaN; otherwise returns 0.
  | 
      
      
         | 910 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 911 | 
          | 
          | 
          
  | 
      
      
         | 912 | 
          | 
          | 
         flag float64_is_signaling_nan( float64 a )
  | 
      
      
         | 913 | 
          | 
          | 
         {
  | 
      
      
         | 914 | 
          | 
          | 
          
  | 
      
      
         | 915 | 
          | 
          | 
             return
  | 
      
      
         | 916 | 
          | 
          | 
                    ( ( ( a>>51 ) & 0xFFF ) == 0xFFE )
  | 
      
      
         | 917 | 
          | 
          | 
                 && ( a & LIT64( 0x0007FFFFFFFFFFFF ) );
  | 
      
      
         | 918 | 
          | 
          | 
          
  | 
      
      
         | 919 | 
          | 
          | 
         }
  | 
      
      
         | 920 | 
          | 
          | 
          
  | 
      
      
         | 921 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 922 | 
          | 
          | 
         | Returns the result of converting the double-precision floating-point NaN
  | 
      
      
         | 923 | 
          | 
          | 
         | `a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
  | 
      
      
         | 924 | 
          | 
          | 
         | exception is raised.
  | 
      
      
         | 925 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 926 | 
          | 
          | 
          
  | 
      
      
         | 927 | 
          | 
          | 
         static commonNaNT float64ToCommonNaN( float64 a )
  | 
      
      
         | 928 | 
          | 
          | 
         {
  | 
      
      
         | 929 | 
          | 
          | 
             commonNaNT z;
  | 
      
      
         | 930 | 
          | 
          | 
          
  | 
      
      
         | 931 | 
          | 
          | 
             if ( float64_is_signaling_nan( a ) ) float_raise( float_flag_invalid );
  | 
      
      
         | 932 | 
          | 
          | 
             z.sign = a>>63;
  | 
      
      
         | 933 | 
          | 
          | 
             z.low = 0;
  | 
      
      
         | 934 | 
          | 
          | 
             z.high = a<<12;
  | 
      
      
         | 935 | 
          | 
          | 
             return z;
  | 
      
      
         | 936 | 
          | 
          | 
          
  | 
      
      
         | 937 | 
          | 
          | 
         }
  | 
      
      
         | 938 | 
          | 
          | 
          
  | 
      
      
         | 939 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 940 | 
          | 
          | 
         | Returns the result of converting the canonical NaN `a' to the double-
  | 
      
      
         | 941 | 
          | 
          | 
         | precision floating-point format.
  | 
      
      
         | 942 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 943 | 
          | 
          | 
          
  | 
      
      
         | 944 | 
          | 
          | 
         static float64 commonNaNToFloat64( commonNaNT a )
  | 
      
      
         | 945 | 
          | 
          | 
         {
  | 
      
      
         | 946 | 
          | 
          | 
          
  | 
      
      
         | 947 | 
          | 
          | 
             return
  | 
      
      
         | 948 | 
          | 
          | 
                   ( ( (bits64) a.sign )<<63 )
  | 
      
      
         | 949 | 
          | 
          | 
                 | LIT64( 0x7FF8000000000000 )
  | 
      
      
         | 950 | 
          | 
          | 
                 | ( a.high>>12 );
  | 
      
      
         | 951 | 
          | 
          | 
          
  | 
      
      
         | 952 | 
          | 
          | 
         }
  | 
      
      
         | 953 | 
          | 
          | 
          
  | 
      
      
         | 954 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 955 | 
          | 
          | 
         | Takes two double-precision floating-point values `a' and `b', one of which
  | 
      
      
         | 956 | 
          | 
          | 
         | is a NaN, and returns the appropriate NaN result.  If either `a' or `b' is a
  | 
      
      
         | 957 | 
          | 
          | 
         | signaling NaN, the invalid exception is raised.
  | 
      
      
         | 958 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 959 | 
          | 
          | 
          
  | 
      
      
         | 960 | 
          | 
          | 
         static float64 propagateFloat64NaN( float64 a, float64 b )
  | 
      
      
         | 961 | 
          | 
          | 
         {
  | 
      
      
         | 962 | 
          | 
          | 
             flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
  | 
      
      
         | 963 | 
          | 
          | 
          
  | 
      
      
         | 964 | 
          | 
          | 
             aIsNaN = float64_is_nan( a );
  | 
      
      
         | 965 | 
          | 
          | 
             aIsSignalingNaN = float64_is_signaling_nan( a );
  | 
      
      
         | 966 | 
          | 
          | 
             bIsNaN = float64_is_nan( b );
  | 
      
      
         | 967 | 
          | 
          | 
             bIsSignalingNaN = float64_is_signaling_nan( b );
  | 
      
      
         | 968 | 
          | 
          | 
             a |= LIT64( 0x0008000000000000 );
  | 
      
      
         | 969 | 
          | 
          | 
             b |= LIT64( 0x0008000000000000 );
  | 
      
      
         | 970 | 
          | 
          | 
             if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid );
  | 
      
      
         | 971 | 
          | 
          | 
             if ( aIsSignalingNaN ) {
  | 
      
      
         | 972 | 
          | 
          | 
                 if ( bIsSignalingNaN ) goto returnLargerSignificand;
  | 
      
      
         | 973 | 
          | 
          | 
                 return bIsNaN ? b : a;
  | 
      
      
         | 974 | 
          | 
          | 
             }
  | 
      
      
         | 975 | 
          | 
          | 
             else if ( aIsNaN ) {
  | 
      
      
         | 976 | 
          | 
          | 
                 if ( bIsSignalingNaN | ! bIsNaN ) return a;
  | 
      
      
         | 977 | 
          | 
          | 
          returnLargerSignificand:
  | 
      
      
         | 978 | 
          | 
          | 
                 if ( (bits64) ( a<<1 ) < (bits64) ( b<<1 ) ) return b;
  | 
      
      
         | 979 | 
          | 
          | 
                 if ( (bits64) ( b<<1 ) < (bits64) ( a<<1 ) ) return a;
  | 
      
      
         | 980 | 
          | 
          | 
                 return ( a < b ) ? a : b;
  | 
      
      
         | 981 | 
          | 
          | 
             }
  | 
      
      
         | 982 | 
          | 
          | 
             else {
  | 
      
      
         | 983 | 
          | 
          | 
                 return b;
  | 
      
      
         | 984 | 
          | 
          | 
             }
  | 
      
      
         | 985 | 
          | 
          | 
          
  | 
      
      
         | 986 | 
          | 
          | 
         }
  | 
      
      
         | 987 | 
          | 
          | 
          
  | 
      
      
         | 988 | 
          | 
          | 
         #ifdef FLOATX80
  | 
      
      
         | 989 | 
          | 
          | 
          
  | 
      
      
         | 990 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 991 | 
          | 
          | 
         | The pattern for a default generated extended double-precision NaN.  The
  | 
      
      
         | 992 | 
          | 
          | 
         | `high' and `low' values hold the most- and least-significant bits,
  | 
      
      
         | 993 | 
          | 
          | 
         | respectively.
  | 
      
      
         | 994 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 995 | 
          | 
          | 
         #define floatx80_default_nan_high 0xFFFF
  | 
      
      
         | 996 | 
          | 
          | 
         #define floatx80_default_nan_low  LIT64( 0xC000000000000000 )
  | 
      
      
         | 997 | 
          | 
          | 
          
  | 
      
      
         | 998 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 999 | 
          | 
          | 
         | Returns 1 if the extended double-precision floating-point value `a' is a
  | 
      
      
         | 1000 | 
          | 
          | 
         | NaN; otherwise returns 0.
  | 
      
      
         | 1001 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 1002 | 
          | 
          | 
          
  | 
      
      
         | 1003 | 
          | 
          | 
         flag floatx80_is_nan( floatx80 a )
  | 
      
      
         | 1004 | 
          | 
          | 
         {
  | 
      
      
         | 1005 | 
          | 
          | 
          
  | 
      
      
         | 1006 | 
          | 
          | 
             return ( ( a.high & 0x7FFF ) == 0x7FFF ) && (bits64) ( a.low<<1 );
  | 
      
      
         | 1007 | 
          | 
          | 
          
  | 
      
      
         | 1008 | 
          | 
          | 
         }
  | 
      
      
         | 1009 | 
          | 
          | 
          
  | 
      
      
         | 1010 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 1011 | 
          | 
          | 
         | Returns 1 if the extended double-precision floating-point value `a' is a
  | 
      
      
         | 1012 | 
          | 
          | 
         | signaling NaN; otherwise returns 0.
  | 
      
      
         | 1013 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 1014 | 
          | 
          | 
          
  | 
      
      
         | 1015 | 
          | 
          | 
         flag floatx80_is_signaling_nan( floatx80 a )
  | 
      
      
         | 1016 | 
          | 
          | 
         {
  | 
      
      
         | 1017 | 
          | 
          | 
             bits64 aLow;
  | 
      
      
         | 1018 | 
          | 
          | 
          
  | 
      
      
         | 1019 | 
          | 
          | 
             aLow = a.low & ~ LIT64( 0x4000000000000000 );
  | 
      
      
         | 1020 | 
          | 
          | 
             return
  | 
      
      
         | 1021 | 
          | 
          | 
                    ( ( a.high & 0x7FFF ) == 0x7FFF )
  | 
      
      
         | 1022 | 
          | 
          | 
                 && (bits64) ( aLow<<1 )
  | 
      
      
         | 1023 | 
          | 
          | 
                 && ( a.low == aLow );
  | 
      
      
         | 1024 | 
          | 
          | 
          
  | 
      
      
         | 1025 | 
          | 
          | 
         }
  | 
      
      
         | 1026 | 
          | 
          | 
          
  | 
      
      
         | 1027 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 1028 | 
          | 
          | 
         | Returns the result of converting the extended double-precision floating-
  | 
      
      
         | 1029 | 
          | 
          | 
         | point NaN `a' to the canonical NaN format.  If `a' is a signaling NaN, the
  | 
      
      
         | 1030 | 
          | 
          | 
         | invalid exception is raised.
  | 
      
      
         | 1031 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 1032 | 
          | 
          | 
          
  | 
      
      
         | 1033 | 
          | 
          | 
         static commonNaNT floatx80ToCommonNaN( floatx80 a )
  | 
      
      
         | 1034 | 
          | 
          | 
         {
  | 
      
      
         | 1035 | 
          | 
          | 
             commonNaNT z;
  | 
      
      
         | 1036 | 
          | 
          | 
          
  | 
      
      
         | 1037 | 
          | 
          | 
             if ( floatx80_is_signaling_nan( a ) ) float_raise( float_flag_invalid );
  | 
      
      
         | 1038 | 
          | 
          | 
             z.sign = a.high>>15;
  | 
      
      
         | 1039 | 
          | 
          | 
             z.low = 0;
  | 
      
      
         | 1040 | 
          | 
          | 
             z.high = a.low<<1;
  | 
      
      
         | 1041 | 
          | 
          | 
             return z;
  | 
      
      
         | 1042 | 
          | 
          | 
          
  | 
      
      
         | 1043 | 
          | 
          | 
         }
  | 
      
      
         | 1044 | 
          | 
          | 
          
  | 
      
      
         | 1045 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 1046 | 
          | 
          | 
         | Returns the result of converting the canonical NaN `a' to the extended
  | 
      
      
         | 1047 | 
          | 
          | 
         | double-precision floating-point format.
  | 
      
      
         | 1048 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 1049 | 
          | 
          | 
          
  | 
      
      
         | 1050 | 
          | 
          | 
         static floatx80 commonNaNToFloatx80( commonNaNT a )
  | 
      
      
         | 1051 | 
          | 
          | 
         {
  | 
      
      
         | 1052 | 
          | 
          | 
             floatx80 z;
  | 
      
      
         | 1053 | 
          | 
          | 
          
  | 
      
      
         | 1054 | 
          | 
          | 
             z.low = LIT64( 0xC000000000000000 ) | ( a.high>>1 );
  | 
      
      
         | 1055 | 
          | 
          | 
             z.high = ( ( (bits16) a.sign )<<15 ) | 0x7FFF;
  | 
      
      
         | 1056 | 
          | 
          | 
             return z;
  | 
      
      
         | 1057 | 
          | 
          | 
          
  | 
      
      
         | 1058 | 
          | 
          | 
         }
  | 
      
      
         | 1059 | 
          | 
          | 
          
  | 
      
      
         | 1060 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 1061 | 
          | 
          | 
         | Takes two extended double-precision floating-point values `a' and `b', one
  | 
      
      
         | 1062 | 
          | 
          | 
         | of which is a NaN, and returns the appropriate NaN result.  If either `a' or
  | 
      
      
         | 1063 | 
          | 
          | 
         | `b' is a signaling NaN, the invalid exception is raised.
  | 
      
      
         | 1064 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 1065 | 
          | 
          | 
          
  | 
      
      
         | 1066 | 
          | 
          | 
         static floatx80 propagateFloatx80NaN( floatx80 a, floatx80 b )
  | 
      
      
         | 1067 | 
          | 
          | 
         {
  | 
      
      
         | 1068 | 
          | 
          | 
             flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
  | 
      
      
         | 1069 | 
          | 
          | 
          
  | 
      
      
         | 1070 | 
          | 
          | 
             aIsNaN = floatx80_is_nan( a );
  | 
      
      
         | 1071 | 
          | 
          | 
             aIsSignalingNaN = floatx80_is_signaling_nan( a );
  | 
      
      
         | 1072 | 
          | 
          | 
             bIsNaN = floatx80_is_nan( b );
  | 
      
      
         | 1073 | 
          | 
          | 
             bIsSignalingNaN = floatx80_is_signaling_nan( b );
  | 
      
      
         | 1074 | 
          | 
          | 
             a.low |= LIT64( 0xC000000000000000 );
  | 
      
      
         | 1075 | 
          | 
          | 
             b.low |= LIT64( 0xC000000000000000 );
  | 
      
      
         | 1076 | 
          | 
          | 
             if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid );
  | 
      
      
         | 1077 | 
          | 
          | 
             if ( aIsSignalingNaN ) {
  | 
      
      
         | 1078 | 
          | 
          | 
                 if ( bIsSignalingNaN ) goto returnLargerSignificand;
  | 
      
      
         | 1079 | 
          | 
          | 
                 return bIsNaN ? b : a;
  | 
      
      
         | 1080 | 
          | 
          | 
             }
  | 
      
      
         | 1081 | 
          | 
          | 
             else if ( aIsNaN ) {
  | 
      
      
         | 1082 | 
          | 
          | 
                 if ( bIsSignalingNaN | ! bIsNaN ) return a;
  | 
      
      
         | 1083 | 
          | 
          | 
          returnLargerSignificand:
  | 
      
      
         | 1084 | 
          | 
          | 
                 if ( a.low < b.low ) return b;
  | 
      
      
         | 1085 | 
          | 
          | 
                 if ( b.low < a.low ) return a;
  | 
      
      
         | 1086 | 
          | 
          | 
                 return ( a.high < b.high ) ? a : b;
  | 
      
      
         | 1087 | 
          | 
          | 
             }
  | 
      
      
         | 1088 | 
          | 
          | 
             else {
  | 
      
      
         | 1089 | 
          | 
          | 
                 return b;
  | 
      
      
         | 1090 | 
          | 
          | 
             }
  | 
      
      
         | 1091 | 
          | 
          | 
          
  | 
      
      
         | 1092 | 
          | 
          | 
         }
  | 
      
      
         | 1093 | 
          | 
          | 
          
  | 
      
      
         | 1094 | 
          | 
          | 
         #endif
  | 
      
      
         | 1095 | 
          | 
          | 
          
  | 
      
      
         | 1096 | 
          | 
          | 
         #ifdef FLOAT128
  | 
      
      
         | 1097 | 
          | 
          | 
          
  | 
      
      
         | 1098 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 1099 | 
          | 
          | 
         | The pattern for a default generated quadruple-precision NaN.  The `high' and
  | 
      
      
         | 1100 | 
          | 
          | 
         | `low' values hold the most- and least-significant bits, respectively.
  | 
      
      
         | 1101 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 1102 | 
          | 
          | 
         #define float128_default_nan_high LIT64( 0xFFFF800000000000 )
  | 
      
      
         | 1103 | 
          | 
          | 
         #define float128_default_nan_low  LIT64( 0x0000000000000000 )
  | 
      
      
         | 1104 | 
          | 
          | 
          
  | 
      
      
         | 1105 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 1106 | 
          | 
          | 
         | Returns 1 if the quadruple-precision floating-point value `a' is a NaN;
  | 
      
      
         | 1107 | 
          | 
          | 
         | otherwise returns 0.
  | 
      
      
         | 1108 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 1109 | 
          | 
          | 
          
  | 
      
      
         | 1110 | 
          | 
          | 
         flag float128_is_nan( float128 a )
  | 
      
      
         | 1111 | 
          | 
          | 
         {
  | 
      
      
         | 1112 | 
          | 
          | 
          
  | 
      
      
         | 1113 | 
          | 
          | 
             return
  | 
      
      
         | 1114 | 
          | 
          | 
                    ( LIT64( 0xFFFE000000000000 ) <= (bits64) ( a.high<<1 ) )
  | 
      
      
         | 1115 | 
          | 
          | 
                 && ( a.low || ( a.high & LIT64( 0x0000FFFFFFFFFFFF ) ) );
  | 
      
      
         | 1116 | 
          | 
          | 
          
  | 
      
      
         | 1117 | 
          | 
          | 
         }
  | 
      
      
         | 1118 | 
          | 
          | 
          
  | 
      
      
         | 1119 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 1120 | 
          | 
          | 
         | Returns 1 if the quadruple-precision floating-point value `a' is a
  | 
      
      
         | 1121 | 
          | 
          | 
         | signaling NaN; otherwise returns 0.
  | 
      
      
         | 1122 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 1123 | 
          | 
          | 
          
  | 
      
      
         | 1124 | 
          | 
          | 
         flag float128_is_signaling_nan( float128 a )
  | 
      
      
         | 1125 | 
          | 
          | 
         {
  | 
      
      
         | 1126 | 
          | 
          | 
          
  | 
      
      
         | 1127 | 
          | 
          | 
             return
  | 
      
      
         | 1128 | 
          | 
          | 
                    ( ( ( a.high>>47 ) & 0xFFFF ) == 0xFFFE )
  | 
      
      
         | 1129 | 
          | 
          | 
                 && ( a.low || ( a.high & LIT64( 0x00007FFFFFFFFFFF ) ) );
  | 
      
      
         | 1130 | 
          | 
          | 
          
  | 
      
      
         | 1131 | 
          | 
          | 
         }
  | 
      
      
         | 1132 | 
          | 
          | 
          
  | 
      
      
         | 1133 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 1134 | 
          | 
          | 
         | Returns the result of converting the quadruple-precision floating-point NaN
  | 
      
      
         | 1135 | 
          | 
          | 
         | `a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
  | 
      
      
         | 1136 | 
          | 
          | 
         | exception is raised.
  | 
      
      
         | 1137 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 1138 | 
          | 
          | 
          
  | 
      
      
         | 1139 | 
          | 
          | 
         static commonNaNT float128ToCommonNaN( float128 a )
  | 
      
      
         | 1140 | 
          | 
          | 
         {
  | 
      
      
         | 1141 | 
          | 
          | 
             commonNaNT z;
  | 
      
      
         | 1142 | 
          | 
          | 
          
  | 
      
      
         | 1143 | 
          | 
          | 
             if ( float128_is_signaling_nan( a ) ) float_raise( float_flag_invalid );
  | 
      
      
         | 1144 | 
          | 
          | 
             z.sign = a.high>>63;
  | 
      
      
         | 1145 | 
          | 
          | 
             shortShift128Left( a.high, a.low, 16, &z.high, &z.low );
  | 
      
      
         | 1146 | 
          | 
          | 
             return z;
  | 
      
      
         | 1147 | 
          | 
          | 
          
  | 
      
      
         | 1148 | 
          | 
          | 
         }
  | 
      
      
         | 1149 | 
          | 
          | 
          
  | 
      
      
         | 1150 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 1151 | 
          | 
          | 
         | Returns the result of converting the canonical NaN `a' to the quadruple-
  | 
      
      
         | 1152 | 
          | 
          | 
         | precision floating-point format.
  | 
      
      
         | 1153 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 1154 | 
          | 
          | 
          
  | 
      
      
         | 1155 | 
          | 
          | 
         static float128 commonNaNToFloat128( commonNaNT a )
  | 
      
      
         | 1156 | 
          | 
          | 
         {
  | 
      
      
         | 1157 | 
          | 
          | 
             float128 z;
  | 
      
      
         | 1158 | 
          | 
          | 
          
  | 
      
      
         | 1159 | 
          | 
          | 
             shift128Right( a.high, a.low, 16, &z.high, &z.low );
  | 
      
      
         | 1160 | 
          | 
          | 
             z.high |= ( ( (bits64) a.sign )<<63 ) | LIT64( 0x7FFF800000000000 );
  | 
      
      
         | 1161 | 
          | 
          | 
             return z;
  | 
      
      
         | 1162 | 
          | 
          | 
          
  | 
      
      
         | 1163 | 
          | 
          | 
         }
  | 
      
      
         | 1164 | 
          | 
          | 
          
  | 
      
      
         | 1165 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 1166 | 
          | 
          | 
         | Takes two quadruple-precision floating-point values `a' and `b', one of
  | 
      
      
         | 1167 | 
          | 
          | 
         | which is a NaN, and returns the appropriate NaN result.  If either `a' or
  | 
      
      
         | 1168 | 
          | 
          | 
         | `b' is a signaling NaN, the invalid exception is raised.
  | 
      
      
         | 1169 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 1170 | 
          | 
          | 
          
  | 
      
      
         | 1171 | 
          | 
          | 
         static float128 propagateFloat128NaN( float128 a, float128 b )
  | 
      
      
         | 1172 | 
          | 
          | 
         {
  | 
      
      
         | 1173 | 
          | 
          | 
             flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
  | 
      
      
         | 1174 | 
          | 
          | 
          
  | 
      
      
         | 1175 | 
          | 
          | 
             aIsNaN = float128_is_nan( a );
  | 
      
      
         | 1176 | 
          | 
          | 
             aIsSignalingNaN = float128_is_signaling_nan( a );
  | 
      
      
         | 1177 | 
          | 
          | 
             bIsNaN = float128_is_nan( b );
  | 
      
      
         | 1178 | 
          | 
          | 
             bIsSignalingNaN = float128_is_signaling_nan( b );
  | 
      
      
         | 1179 | 
          | 
          | 
             a.high |= LIT64( 0x0000800000000000 );
  | 
      
      
         | 1180 | 
          | 
          | 
             b.high |= LIT64( 0x0000800000000000 );
  | 
      
      
         | 1181 | 
          | 
          | 
             if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid );
  | 
      
      
         | 1182 | 
          | 
          | 
             if ( aIsSignalingNaN ) {
  | 
      
      
         | 1183 | 
          | 
          | 
                 if ( bIsSignalingNaN ) goto returnLargerSignificand;
  | 
      
      
         | 1184 | 
          | 
          | 
                 return bIsNaN ? b : a;
  | 
      
      
         | 1185 | 
          | 
          | 
             }
  | 
      
      
         | 1186 | 
          | 
          | 
             else if ( aIsNaN ) {
  | 
      
      
         | 1187 | 
          | 
          | 
                 if ( bIsSignalingNaN | ! bIsNaN ) return a;
  | 
      
      
         | 1188 | 
          | 
          | 
          returnLargerSignificand:
  | 
      
      
         | 1189 | 
          | 
          | 
                 if ( lt128( a.high<<1, a.low, b.high<<1, b.low ) ) return b;
  | 
      
      
         | 1190 | 
          | 
          | 
                 if ( lt128( b.high<<1, b.low, a.high<<1, a.low ) ) return a;
  | 
      
      
         | 1191 | 
          | 
          | 
                 return ( a.high < b.high ) ? a : b;
  | 
      
      
         | 1192 | 
          | 
          | 
             }
  | 
      
      
         | 1193 | 
          | 
          | 
             else {
  | 
      
      
         | 1194 | 
          | 
          | 
                 return b;
  | 
      
      
         | 1195 | 
          | 
          | 
             }
  | 
      
      
         | 1196 | 
          | 
          | 
          
  | 
      
      
         | 1197 | 
          | 
          | 
         }
  | 
      
      
         | 1198 | 
          | 
          | 
          
  | 
      
      
         | 1199 | 
          | 
          | 
         #endif
  | 
      
      
         | 1200 | 
          | 
          | 
          
  | 
      
      
         | 1201 | 
          | 
          | 
          
  | 
      
      
         | 1202 | 
          | 
          | 
          
  | 
      
      
         | 1203 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 1204 | 
          | 
          | 
         | Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
  | 
      
      
         | 1205 | 
          | 
          | 
         | and 7, and returns the properly rounded 32-bit integer corresponding to the
  | 
      
      
         | 1206 | 
          | 
          | 
         | input.  If `zSign' is 1, the input is negated before being converted to an
  | 
      
      
         | 1207 | 
          | 
          | 
         | integer.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point input
  | 
      
      
         | 1208 | 
          | 
          | 
         | is simply rounded to an integer, with the inexact exception raised if the
  | 
      
      
         | 1209 | 
          | 
          | 
         | input cannot be represented exactly as an integer.  However, if the fixed-
  | 
      
      
         | 1210 | 
          | 
          | 
         | point input is too large, the invalid exception is raised and the largest
  | 
      
      
         | 1211 | 
          | 
          | 
         | positive or negative integer is returned.
  | 
      
      
         | 1212 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 1213 | 
          | 
          | 
          
  | 
      
      
         | 1214 | 
          | 
          | 
         static int32 roundAndPackInt32( flag zSign, bits64 absZ )
  | 
      
      
         | 1215 | 
          | 
          | 
         {
  | 
      
      
         | 1216 | 
          | 
          | 
             int8 roundingMode;
  | 
      
      
         | 1217 | 
          | 
          | 
             flag roundNearestEven;
  | 
      
      
         | 1218 | 
          | 
          | 
             int8 roundIncrement, roundBits;
  | 
      
      
         | 1219 | 
          | 
          | 
             int32 z;
  | 
      
      
         | 1220 | 
          | 
          | 
          
  | 
      
      
         | 1221 | 
          | 
          | 
             roundingMode = float_rounding_mode;
  | 
      
      
         | 1222 | 
          | 
          | 
             roundNearestEven = ( roundingMode == float_round_nearest_even );
  | 
      
      
         | 1223 | 
          | 
          | 
             roundIncrement = 0x40;
  | 
      
      
         | 1224 | 
          | 
          | 
             if ( ! roundNearestEven ) {
  | 
      
      
         | 1225 | 
          | 
          | 
                 if ( roundingMode == float_round_to_zero ) {
  | 
      
      
         | 1226 | 
          | 
          | 
                     roundIncrement = 0;
  | 
      
      
         | 1227 | 
          | 
          | 
                 }
  | 
      
      
         | 1228 | 
          | 
          | 
                 else {
  | 
      
      
         | 1229 | 
          | 
          | 
                     roundIncrement = 0x7F;
  | 
      
      
         | 1230 | 
          | 
          | 
                     if ( zSign ) {
  | 
      
      
         | 1231 | 
          | 
          | 
                         if ( roundingMode == float_round_up ) roundIncrement = 0;
  | 
      
      
         | 1232 | 
          | 
          | 
                     }
  | 
      
      
         | 1233 | 
          | 
          | 
                     else {
  | 
      
      
         | 1234 | 
          | 
          | 
                         if ( roundingMode == float_round_down ) roundIncrement = 0;
  | 
      
      
         | 1235 | 
          | 
          | 
                     }
  | 
      
      
         | 1236 | 
          | 
          | 
                 }
  | 
      
      
         | 1237 | 
          | 
          | 
             }
  | 
      
      
         | 1238 | 
          | 
          | 
             roundBits = absZ & 0x7F;
  | 
      
      
         | 1239 | 
          | 
          | 
             absZ = ( absZ + roundIncrement )>>7;
  | 
      
      
         | 1240 | 
          | 
          | 
             absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
  | 
      
      
         | 1241 | 
          | 
          | 
             z = absZ;
  | 
      
      
         | 1242 | 
          | 
          | 
             if ( zSign ) z = - z;
  | 
      
      
         | 1243 | 
          | 
          | 
             if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
  | 
      
      
         | 1244 | 
          | 
          | 
                 float_raise( float_flag_invalid );
  | 
      
      
         | 1245 | 
          | 
          | 
                 return zSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
  | 
      
      
         | 1246 | 
          | 
          | 
             }
  | 
      
      
         | 1247 | 
          | 
          | 
             if ( roundBits ) float_exception_flags |= float_flag_inexact;
  | 
      
      
         | 1248 | 
          | 
          | 
             return z;
  | 
      
      
         | 1249 | 
          | 
          | 
          
  | 
      
      
         | 1250 | 
          | 
          | 
         }
  | 
      
      
         | 1251 | 
          | 
          | 
          
  | 
      
      
         | 1252 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 1253 | 
          | 
          | 
         | Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
  | 
      
      
         | 1254 | 
          | 
          | 
         | `absZ1', with binary point between bits 63 and 64 (between the input words),
  | 
      
      
         | 1255 | 
          | 
          | 
         | and returns the properly rounded 64-bit integer corresponding to the input.
  | 
      
      
         | 1256 | 
          | 
          | 
         | If `zSign' is 1, the input is negated before being converted to an integer.
  | 
      
      
         | 1257 | 
          | 
          | 
         | Ordinarily, the fixed-point input is simply rounded to an integer, with
  | 
      
      
         | 1258 | 
          | 
          | 
         | the inexact exception raised if the input cannot be represented exactly as
  | 
      
      
         | 1259 | 
          | 
          | 
         | an integer.  However, if the fixed-point input is too large, the invalid
  | 
      
      
         | 1260 | 
          | 
          | 
         | exception is raised and the largest positive or negative integer is
  | 
      
      
         | 1261 | 
          | 
          | 
         | returned.
  | 
      
      
         | 1262 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 1263 | 
          | 
          | 
          
  | 
      
      
         | 1264 | 
          | 
          | 
         static int64 roundAndPackInt64( flag zSign, bits64 absZ0, bits64 absZ1 )
  | 
      
      
         | 1265 | 
          | 
          | 
         {
  | 
      
      
         | 1266 | 
          | 
          | 
             int8 roundingMode;
  | 
      
      
         | 1267 | 
          | 
          | 
             flag roundNearestEven, increment;
  | 
      
      
         | 1268 | 
          | 
          | 
             int64 z;
  | 
      
      
         | 1269 | 
          | 
          | 
          
  | 
      
      
         | 1270 | 
          | 
          | 
             roundingMode = float_rounding_mode;
  | 
      
      
         | 1271 | 
          | 
          | 
             roundNearestEven = ( roundingMode == float_round_nearest_even );
  | 
      
      
         | 1272 | 
          | 
          | 
             increment = ( (sbits64) absZ1 < 0 );
  | 
      
      
         | 1273 | 
          | 
          | 
             if ( ! roundNearestEven ) {
  | 
      
      
         | 1274 | 
          | 
          | 
                 if ( roundingMode == float_round_to_zero ) {
  | 
      
      
         | 1275 | 
          | 
          | 
                     increment = 0;
  | 
      
      
         | 1276 | 
          | 
          | 
                 }
  | 
      
      
         | 1277 | 
          | 
          | 
                 else {
  | 
      
      
         | 1278 | 
          | 
          | 
                     if ( zSign ) {
  | 
      
      
         | 1279 | 
          | 
          | 
                         increment = ( roundingMode == float_round_down ) && absZ1;
  | 
      
      
         | 1280 | 
          | 
          | 
                     }
  | 
      
      
         | 1281 | 
          | 
          | 
                     else {
  | 
      
      
         | 1282 | 
          | 
          | 
                         increment = ( roundingMode == float_round_up ) && absZ1;
  | 
      
      
         | 1283 | 
          | 
          | 
                     }
  | 
      
      
         | 1284 | 
          | 
          | 
                 }
  | 
      
      
         | 1285 | 
          | 
          | 
             }
  | 
      
      
         | 1286 | 
          | 
          | 
             if ( increment ) {
  | 
      
      
         | 1287 | 
          | 
          | 
                 ++absZ0;
  | 
      
      
         | 1288 | 
          | 
          | 
                 if ( absZ0 == 0 ) goto overflow;
  | 
      
      
         | 1289 | 
          | 
          | 
                 absZ0 &= ~ ( ( (bits64) ( absZ1<<1 ) == 0 ) & roundNearestEven );
  | 
      
      
         | 1290 | 
          | 
          | 
             }
  | 
      
      
         | 1291 | 
          | 
          | 
             z = absZ0;
  | 
      
      
         | 1292 | 
          | 
          | 
             if ( zSign ) z = - z;
  | 
      
      
         | 1293 | 
          | 
          | 
             if ( z && ( ( z < 0 ) ^ zSign ) ) {
  | 
      
      
         | 1294 | 
          | 
          | 
          overflow:
  | 
      
      
         | 1295 | 
          | 
          | 
                 float_raise( float_flag_invalid );
  | 
      
      
         | 1296 | 
          | 
          | 
                 return
  | 
      
      
         | 1297 | 
          | 
          | 
                       zSign ? (sbits64) LIT64( 0x8000000000000000 )
  | 
      
      
         | 1298 | 
          | 
          | 
                     : LIT64( 0x7FFFFFFFFFFFFFFF );
  | 
      
      
         | 1299 | 
          | 
          | 
             }
  | 
      
      
         | 1300 | 
          | 
          | 
             if ( absZ1 ) float_exception_flags |= float_flag_inexact;
  | 
      
      
         | 1301 | 
          | 
          | 
             return z;
  | 
      
      
         | 1302 | 
          | 
          | 
          
  | 
      
      
         | 1303 | 
          | 
          | 
         }
  | 
      
      
         | 1304 | 
          | 
          | 
          
  | 
      
      
         | 1305 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 1306 | 
          | 
          | 
         | Returns the fraction bits of the single-precision floating-point value `a'.
  | 
      
      
         | 1307 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 1308 | 
          | 
          | 
          
  | 
      
      
         | 1309 | 
          | 
          | 
         INLINE bits32 extractFloat32Frac( float32 a )
  | 
      
      
         | 1310 | 
          | 
          | 
         {
  | 
      
      
         | 1311 | 
          | 
          | 
          
  | 
      
      
         | 1312 | 
          | 
          | 
             return a & 0x007FFFFF;
  | 
      
      
         | 1313 | 
          | 
          | 
          
  | 
      
      
         | 1314 | 
          | 
          | 
         }
  | 
      
      
         | 1315 | 
          | 
          | 
          
  | 
      
      
         | 1316 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 1317 | 
          | 
          | 
         | Returns the exponent bits of the single-precision floating-point value `a'.
  | 
      
      
         | 1318 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 1319 | 
          | 
          | 
          
  | 
      
      
         | 1320 | 
          | 
          | 
         INLINE int16 extractFloat32Exp( float32 a )
  | 
      
      
         | 1321 | 
          | 
          | 
         {
  | 
      
      
         | 1322 | 
          | 
          | 
          
  | 
      
      
         | 1323 | 
          | 
          | 
             return ( a>>23 ) & 0xFF;
  | 
      
      
         | 1324 | 
          | 
          | 
          
  | 
      
      
         | 1325 | 
          | 
          | 
         }
  | 
      
      
         | 1326 | 
          | 
          | 
          
  | 
      
      
         | 1327 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 1328 | 
          | 
          | 
         | Returns the sign bit of the single-precision floating-point value `a'.
  | 
      
      
         | 1329 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 1330 | 
          | 
          | 
          
  | 
      
      
         | 1331 | 
          | 
          | 
         INLINE flag extractFloat32Sign( float32 a )
  | 
      
      
         | 1332 | 
          | 
          | 
         {
  | 
      
      
         | 1333 | 
          | 
          | 
          
  | 
      
      
         | 1334 | 
          | 
          | 
             return a>>31;
  | 
      
      
         | 1335 | 
          | 
          | 
          
  | 
      
      
         | 1336 | 
          | 
          | 
         }
  | 
      
      
         | 1337 | 
          | 
          | 
          
  | 
      
      
         | 1338 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 1339 | 
          | 
          | 
         | Normalizes the subnormal single-precision floating-point value represented
  | 
      
      
         | 1340 | 
          | 
          | 
         | by the denormalized significand `aSig'.  The normalized exponent and
  | 
      
      
         | 1341 | 
          | 
          | 
         | significand are stored at the locations pointed to by `zExpPtr' and
  | 
      
      
         | 1342 | 
          | 
          | 
         | `zSigPtr', respectively.
  | 
      
      
         | 1343 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 1344 | 
          | 
          | 
          
  | 
      
      
         | 1345 | 
          | 
          | 
         static void
  | 
      
      
         | 1346 | 
          | 
          | 
          normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr )
  | 
      
      
         | 1347 | 
          | 
          | 
         {
  | 
      
      
         | 1348 | 
          | 
          | 
             int8 shiftCount;
  | 
      
      
         | 1349 | 
          | 
          | 
          
  | 
      
      
         | 1350 | 
          | 
          | 
             shiftCount = countLeadingZeros32( aSig ) - 8;
  | 
      
      
         | 1351 | 
          | 
          | 
             *zSigPtr = aSig<<shiftCount;
  | 
      
      
         | 1352 | 
          | 
          | 
             *zExpPtr = 1 - shiftCount;
  | 
      
      
         | 1353 | 
          | 
          | 
          
  | 
      
      
         | 1354 | 
          | 
          | 
         }
  | 
      
      
         | 1355 | 
          | 
          | 
          
  | 
      
      
         | 1356 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 1357 | 
          | 
          | 
         | Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
  | 
      
      
         | 1358 | 
          | 
          | 
         | single-precision floating-point value, returning the result.  After being
  | 
      
      
         | 1359 | 
          | 
          | 
         | shifted into the proper positions, the three fields are simply added
  | 
      
      
         | 1360 | 
          | 
          | 
         | together to form the result.  This means that any integer portion of `zSig'
  | 
      
      
         | 1361 | 
          | 
          | 
         | will be added into the exponent.  Since a properly normalized significand
  | 
      
      
         | 1362 | 
          | 
          | 
         | will have an integer portion equal to 1, the `zExp' input should be 1 less
  | 
      
      
         | 1363 | 
          | 
          | 
         | than the desired result exponent whenever `zSig' is a complete, normalized
  | 
      
      
         | 1364 | 
          | 
          | 
         | significand.
  | 
      
      
         | 1365 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 1366 | 
          | 
          | 
          
  | 
      
      
         | 1367 | 
          | 
          | 
         INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
  | 
      
      
         | 1368 | 
          | 
          | 
         {
  | 
      
      
         | 1369 | 
          | 
          | 
          
  | 
      
      
         | 1370 | 
          | 
          | 
             return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig;
  | 
      
      
         | 1371 | 
          | 
          | 
          
  | 
      
      
         | 1372 | 
          | 
          | 
         }
  | 
      
      
         | 1373 | 
          | 
          | 
          
  | 
      
      
         | 1374 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 1375 | 
          | 
          | 
         | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
  | 
      
      
         | 1376 | 
          | 
          | 
         | and significand `zSig', and returns the proper single-precision floating-
  | 
      
      
         | 1377 | 
          | 
          | 
         | point value corresponding to the abstract input.  Ordinarily, the abstract
  | 
      
      
         | 1378 | 
          | 
          | 
         | value is simply rounded and packed into the single-precision format, with
  | 
      
      
         | 1379 | 
          | 
          | 
         | the inexact exception raised if the abstract input cannot be represented
  | 
      
      
         | 1380 | 
          | 
          | 
         | exactly.  However, if the abstract value is too large, the overflow and
  | 
      
      
         | 1381 | 
          | 
          | 
         | inexact exceptions are raised and an infinity or maximal finite value is
  | 
      
      
         | 1382 | 
          | 
          | 
         | returned.  If the abstract value is too small, the input value is rounded to
  | 
      
      
         | 1383 | 
          | 
          | 
         | a subnormal number, and the underflow and inexact exceptions are raised if
  | 
      
      
         | 1384 | 
          | 
          | 
         | the abstract input cannot be represented exactly as a subnormal single-
  | 
      
      
         | 1385 | 
          | 
          | 
         | precision floating-point number.
  | 
      
      
         | 1386 | 
          | 
          | 
         |     The input significand `zSig' has its binary point between bits 30
  | 
      
      
         | 1387 | 
          | 
          | 
         | and 29, which is 7 bits to the left of the usual location.  This shifted
  | 
      
      
         | 1388 | 
          | 
          | 
         | significand must be normalized or smaller.  If `zSig' is not normalized,
  | 
      
      
         | 1389 | 
          | 
          | 
         | `zExp' must be 0; in that case, the result returned is a subnormal number,
  | 
      
      
         | 1390 | 
          | 
          | 
         | and it must not require rounding.  In the usual case that `zSig' is
  | 
      
      
         | 1391 | 
          | 
          | 
         | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
  | 
      
      
         | 1392 | 
          | 
          | 
         | The handling of underflow and overflow follows the IEC/IEEE Standard for
  | 
      
      
         | 1393 | 
          | 
          | 
         | Binary Floating-Point Arithmetic.
  | 
      
      
         | 1394 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 1395 | 
          | 
          | 
          
  | 
      
      
         | 1396 | 
          | 
          | 
         static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
  | 
      
      
         | 1397 | 
          | 
          | 
         {
  | 
      
      
         | 1398 | 
          | 
          | 
             int8 roundingMode;
  | 
      
      
         | 1399 | 
          | 
          | 
             flag roundNearestEven;
  | 
      
      
         | 1400 | 
          | 
          | 
             int8 roundIncrement, roundBits;
  | 
      
      
         | 1401 | 
          | 
          | 
             flag isTiny;
  | 
      
      
         | 1402 | 
          | 
          | 
          
  | 
      
      
         | 1403 | 
          | 
          | 
             roundingMode = float_rounding_mode;
  | 
      
      
         | 1404 | 
          | 
          | 
             roundNearestEven = ( roundingMode == float_round_nearest_even );
  | 
      
      
         | 1405 | 
          | 
          | 
             roundIncrement = 0x40;
  | 
      
      
         | 1406 | 
          | 
          | 
             if ( ! roundNearestEven ) {
  | 
      
      
         | 1407 | 
          | 
          | 
                 if ( roundingMode == float_round_to_zero ) {
  | 
      
      
         | 1408 | 
          | 
          | 
                     roundIncrement = 0;
  | 
      
      
         | 1409 | 
          | 
          | 
                 }
  | 
      
      
         | 1410 | 
          | 
          | 
                 else {
  | 
      
      
         | 1411 | 
          | 
          | 
                     roundIncrement = 0x7F;
  | 
      
      
         | 1412 | 
          | 
          | 
                     if ( zSign ) {
  | 
      
      
         | 1413 | 
          | 
          | 
                         if ( roundingMode == float_round_up ) roundIncrement = 0;
  | 
      
      
         | 1414 | 
          | 
          | 
                     }
  | 
      
      
         | 1415 | 
          | 
          | 
                     else {
  | 
      
      
         | 1416 | 
          | 
          | 
                         if ( roundingMode == float_round_down ) roundIncrement = 0;
  | 
      
      
         | 1417 | 
          | 
          | 
                     }
  | 
      
      
         | 1418 | 
          | 
          | 
                 }
  | 
      
      
         | 1419 | 
          | 
          | 
             }
  | 
      
      
         | 1420 | 
          | 
          | 
             roundBits = zSig & 0x7F;
  | 
      
      
         | 1421 | 
          | 
          | 
             if ( 0xFD <= (bits16) zExp ) {
  | 
      
      
         | 1422 | 
          | 
          | 
                 if (    ( 0xFD < zExp )
  | 
      
      
         | 1423 | 
          | 
          | 
                      || (    ( zExp == 0xFD )
  | 
      
      
         | 1424 | 
          | 
          | 
                           && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
  | 
      
      
         | 1425 | 
          | 
          | 
                    ) {
  | 
      
      
         | 1426 | 
          | 
          | 
                     float_raise( float_flag_overflow | float_flag_inexact );
  | 
      
      
         | 1427 | 
          | 
          | 
                     return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
  | 
      
      
         | 1428 | 
          | 
          | 
                 }
  | 
      
      
         | 1429 | 
          | 
          | 
                 if ( zExp < 0 ) {
  | 
      
      
         | 1430 | 
          | 
          | 
                     isTiny =
  | 
      
      
         | 1431 | 
          | 
          | 
                            ( float_detect_tininess == float_tininess_before_rounding )
  | 
      
      
         | 1432 | 
          | 
          | 
                         || ( zExp < -1 )
  | 
      
      
         | 1433 | 
          | 
          | 
                         || ( zSig + roundIncrement < 0x80000000 );
  | 
      
      
         | 1434 | 
          | 
          | 
                     shift32RightJamming( zSig, - zExp, &zSig );
  | 
      
      
         | 1435 | 
          | 
          | 
                     zExp = 0;
  | 
      
      
         | 1436 | 
          | 
          | 
                     roundBits = zSig & 0x7F;
  | 
      
      
         | 1437 | 
          | 
          | 
                     if ( isTiny && roundBits ) float_raise( float_flag_underflow );
  | 
      
      
         | 1438 | 
          | 
          | 
                 }
  | 
      
      
         | 1439 | 
          | 
          | 
             }
  | 
      
      
         | 1440 | 
          | 
          | 
             if ( roundBits ) float_exception_flags |= float_flag_inexact;
  | 
      
      
         | 1441 | 
          | 
          | 
             zSig = ( zSig + roundIncrement )>>7;
  | 
      
      
         | 1442 | 
          | 
          | 
             zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
  | 
      
      
         | 1443 | 
          | 
          | 
             if ( zSig == 0 ) zExp = 0;
  | 
      
      
         | 1444 | 
          | 
          | 
             return packFloat32( zSign, zExp, zSig );
  | 
      
      
         | 1445 | 
          | 
          | 
          
  | 
      
      
         | 1446 | 
          | 
          | 
         }
  | 
      
      
         | 1447 | 
          | 
          | 
          
  | 
      
      
         | 1448 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 1449 | 
          | 
          | 
         | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
  | 
      
      
         | 1450 | 
          | 
          | 
         | and significand `zSig', and returns the proper single-precision floating-
  | 
      
      
         | 1451 | 
          | 
          | 
         | point value corresponding to the abstract input.  This routine is just like
  | 
      
      
         | 1452 | 
          | 
          | 
         | `roundAndPackFloat32' except that `zSig' does not have to be normalized.
  | 
      
      
         | 1453 | 
          | 
          | 
         | Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
  | 
      
      
         | 1454 | 
          | 
          | 
         | floating-point exponent.
  | 
      
      
         | 1455 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 1456 | 
          | 
          | 
          
  | 
      
      
         | 1457 | 
          | 
          | 
         static float32
  | 
      
      
         | 1458 | 
          | 
          | 
          normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
  | 
      
      
         | 1459 | 
          | 
          | 
         {
  | 
      
      
         | 1460 | 
          | 
          | 
             int8 shiftCount;
  | 
      
      
         | 1461 | 
          | 
          | 
          
  | 
      
      
         | 1462 | 
          | 
          | 
             shiftCount = countLeadingZeros32( zSig ) - 1;
  | 
      
      
         | 1463 | 
          | 
          | 
             return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount );
  | 
      
      
         | 1464 | 
          | 
          | 
          
  | 
      
      
         | 1465 | 
          | 
          | 
         }
  | 
      
      
         | 1466 | 
          | 
          | 
          
  | 
      
      
         | 1467 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 1468 | 
          | 
          | 
         | Returns the fraction bits of the double-precision floating-point value `a'.
  | 
      
      
         | 1469 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 1470 | 
          | 
          | 
          
  | 
      
      
         | 1471 | 
          | 
          | 
         INLINE bits64 extractFloat64Frac( float64 a )
  | 
      
      
         | 1472 | 
          | 
          | 
         {
  | 
      
      
         | 1473 | 
          | 
          | 
          
  | 
      
      
         | 1474 | 
          | 
          | 
             return a & LIT64( 0x000FFFFFFFFFFFFF );
  | 
      
      
         | 1475 | 
          | 
          | 
          
  | 
      
      
         | 1476 | 
          | 
          | 
         }
  | 
      
      
         | 1477 | 
          | 
          | 
          
  | 
      
      
         | 1478 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 1479 | 
          | 
          | 
         | Returns the exponent bits of the double-precision floating-point value `a'.
  | 
      
      
         | 1480 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 1481 | 
          | 
          | 
          
  | 
      
      
         | 1482 | 
          | 
          | 
         INLINE int16 extractFloat64Exp( float64 a )
  | 
      
      
         | 1483 | 
          | 
          | 
         {
  | 
      
      
         | 1484 | 
          | 
          | 
          
  | 
      
      
         | 1485 | 
          | 
          | 
             return ( a>>52 ) & 0x7FF;
  | 
      
      
         | 1486 | 
          | 
          | 
          
  | 
      
      
         | 1487 | 
          | 
          | 
         }
  | 
      
      
         | 1488 | 
          | 
          | 
          
  | 
      
      
         | 1489 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 1490 | 
          | 
          | 
         | Returns the sign bit of the double-precision floating-point value `a'.
  | 
      
      
         | 1491 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 1492 | 
          | 
          | 
          
  | 
      
      
         | 1493 | 
          | 
          | 
         INLINE flag extractFloat64Sign( float64 a )
  | 
      
      
         | 1494 | 
          | 
          | 
         {
  | 
      
      
         | 1495 | 
          | 
          | 
          
  | 
      
      
         | 1496 | 
          | 
          | 
             return a>>63;
  | 
      
      
         | 1497 | 
          | 
          | 
          
  | 
      
      
         | 1498 | 
          | 
          | 
         }
  | 
      
      
         | 1499 | 
          | 
          | 
          
  | 
      
      
         | 1500 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 1501 | 
          | 
          | 
         | Normalizes the subnormal double-precision floating-point value represented
  | 
      
      
         | 1502 | 
          | 
          | 
         | by the denormalized significand `aSig'.  The normalized exponent and
  | 
      
      
         | 1503 | 
          | 
          | 
         | significand are stored at the locations pointed to by `zExpPtr' and
  | 
      
      
         | 1504 | 
          | 
          | 
         | `zSigPtr', respectively.
  | 
      
      
         | 1505 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 1506 | 
          | 
          | 
          
  | 
      
      
         | 1507 | 
          | 
          | 
         static void
  | 
      
      
         | 1508 | 
          | 
          | 
          normalizeFloat64Subnormal( bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr )
  | 
      
      
         | 1509 | 
          | 
          | 
         {
  | 
      
      
         | 1510 | 
          | 
          | 
             int8 shiftCount;
  | 
      
      
         | 1511 | 
          | 
          | 
          
  | 
      
      
         | 1512 | 
          | 
          | 
             shiftCount = countLeadingZeros64( aSig ) - 11;
  | 
      
      
         | 1513 | 
          | 
          | 
             *zSigPtr = aSig<<shiftCount;
  | 
      
      
         | 1514 | 
          | 
          | 
             *zExpPtr = 1 - shiftCount;
  | 
      
      
         | 1515 | 
          | 
          | 
          
  | 
      
      
         | 1516 | 
          | 
          | 
         }
  | 
      
      
         | 1517 | 
          | 
          | 
          
  | 
      
      
         | 1518 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 1519 | 
          | 
          | 
         | Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
  | 
      
      
         | 1520 | 
          | 
          | 
         | double-precision floating-point value, returning the result.  After being
  | 
      
      
         | 1521 | 
          | 
          | 
         | shifted into the proper positions, the three fields are simply added
  | 
      
      
         | 1522 | 
          | 
          | 
         | together to form the result.  This means that any integer portion of `zSig'
  | 
      
      
         | 1523 | 
          | 
          | 
         | will be added into the exponent.  Since a properly normalized significand
  | 
      
      
         | 1524 | 
          | 
          | 
         | will have an integer portion equal to 1, the `zExp' input should be 1 less
  | 
      
      
         | 1525 | 
          | 
          | 
         | than the desired result exponent whenever `zSig' is a complete, normalized
  | 
      
      
         | 1526 | 
          | 
          | 
         | significand.
  | 
      
      
         | 1527 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 1528 | 
          | 
          | 
          
  | 
      
      
         | 1529 | 
          | 
          | 
         INLINE float64 packFloat64( flag zSign, int16 zExp, bits64 zSig )
  | 
      
      
         | 1530 | 
          | 
          | 
         {
  | 
      
      
         | 1531 | 
          | 
          | 
          
  | 
      
      
         | 1532 | 
          | 
          | 
             return ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<52 ) + zSig;
  | 
      
      
         | 1533 | 
          | 
          | 
          
  | 
      
      
         | 1534 | 
          | 
          | 
         }
  | 
      
      
         | 1535 | 
          | 
          | 
          
  | 
      
      
         | 1536 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 1537 | 
          | 
          | 
         | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
  | 
      
      
         | 1538 | 
          | 
          | 
         | and significand `zSig', and returns the proper double-precision floating-
  | 
      
      
         | 1539 | 
          | 
          | 
         | point value corresponding to the abstract input.  Ordinarily, the abstract
  | 
      
      
         | 1540 | 
          | 
          | 
         | value is simply rounded and packed into the double-precision format, with
  | 
      
      
         | 1541 | 
          | 
          | 
         | the inexact exception raised if the abstract input cannot be represented
  | 
      
      
         | 1542 | 
          | 
          | 
         | exactly.  However, if the abstract value is too large, the overflow and
  | 
      
      
         | 1543 | 
          | 
          | 
         | inexact exceptions are raised and an infinity or maximal finite value is
  | 
      
      
         | 1544 | 
          | 
          | 
         | returned.  If the abstract value is too small, the input value is rounded
  | 
      
      
         | 1545 | 
          | 
          | 
         | to a subnormal number, and the underflow and inexact exceptions are raised
  | 
      
      
         | 1546 | 
          | 
          | 
         | if the abstract input cannot be represented exactly as a subnormal double-
  | 
      
      
         | 1547 | 
          | 
          | 
         | precision floating-point number.
  | 
      
      
         | 1548 | 
          | 
          | 
         |     The input significand `zSig' has its binary point between bits 62
  | 
      
      
         | 1549 | 
          | 
          | 
         | and 61, which is 10 bits to the left of the usual location.  This shifted
  | 
      
      
         | 1550 | 
          | 
          | 
         | significand must be normalized or smaller.  If `zSig' is not normalized,
  | 
      
      
         | 1551 | 
          | 
          | 
         | `zExp' must be 0; in that case, the result returned is a subnormal number,
  | 
      
      
         | 1552 | 
          | 
          | 
         | and it must not require rounding.  In the usual case that `zSig' is
  | 
      
      
         | 1553 | 
          | 
          | 
         | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
  | 
      
      
         | 1554 | 
          | 
          | 
         | The handling of underflow and overflow follows the IEC/IEEE Standard for
  | 
      
      
         | 1555 | 
          | 
          | 
         | Binary Floating-Point Arithmetic.
  | 
      
      
         | 1556 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 1557 | 
          | 
          | 
          
  | 
      
      
         | 1558 | 
          | 
          | 
         static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
  | 
      
      
         | 1559 | 
          | 
          | 
         {
  | 
      
      
         | 1560 | 
          | 
          | 
             int8 roundingMode;
  | 
      
      
         | 1561 | 
          | 
          | 
             flag roundNearestEven;
  | 
      
      
         | 1562 | 
          | 
          | 
             int16 roundIncrement, roundBits;
  | 
      
      
         | 1563 | 
          | 
          | 
             flag isTiny;
  | 
      
      
         | 1564 | 
          | 
          | 
          
  | 
      
      
         | 1565 | 
          | 
          | 
             roundingMode = float_rounding_mode;
  | 
      
      
         | 1566 | 
          | 
          | 
             roundNearestEven = ( roundingMode == float_round_nearest_even );
  | 
      
      
         | 1567 | 
          | 
          | 
             roundIncrement = 0x200;
  | 
      
      
         | 1568 | 
          | 
          | 
             if ( ! roundNearestEven ) {
  | 
      
      
         | 1569 | 
          | 
          | 
                 if ( roundingMode == float_round_to_zero ) {
  | 
      
      
         | 1570 | 
          | 
          | 
                     roundIncrement = 0;
  | 
      
      
         | 1571 | 
          | 
          | 
                 }
  | 
      
      
         | 1572 | 
          | 
          | 
                 else {
  | 
      
      
         | 1573 | 
          | 
          | 
                     roundIncrement = 0x3FF;
  | 
      
      
         | 1574 | 
          | 
          | 
                     if ( zSign ) {
  | 
      
      
         | 1575 | 
          | 
          | 
                         if ( roundingMode == float_round_up ) roundIncrement = 0;
  | 
      
      
         | 1576 | 
          | 
          | 
                     }
  | 
      
      
         | 1577 | 
          | 
          | 
                     else {
  | 
      
      
         | 1578 | 
          | 
          | 
                         if ( roundingMode == float_round_down ) roundIncrement = 0;
  | 
      
      
         | 1579 | 
          | 
          | 
                     }
  | 
      
      
         | 1580 | 
          | 
          | 
                 }
  | 
      
      
         | 1581 | 
          | 
          | 
             }
  | 
      
      
         | 1582 | 
          | 
          | 
             roundBits = zSig & 0x3FF;
  | 
      
      
         | 1583 | 
          | 
          | 
             if ( 0x7FD <= (bits16) zExp ) {
  | 
      
      
         | 1584 | 
          | 
          | 
                 if (    ( 0x7FD < zExp )
  | 
      
      
         | 1585 | 
          | 
          | 
                      || (    ( zExp == 0x7FD )
  | 
      
      
         | 1586 | 
          | 
          | 
                           && ( (sbits64) ( zSig + roundIncrement ) < 0 ) )
  | 
      
      
         | 1587 | 
          | 
          | 
                    ) {
  | 
      
      
         | 1588 | 
          | 
          | 
                     float_raise( float_flag_overflow | float_flag_inexact );
  | 
      
      
         | 1589 | 
          | 
          | 
                     return packFloat64( zSign, 0x7FF, 0 ) - ( roundIncrement == 0 );
  | 
      
      
         | 1590 | 
          | 
          | 
                 }
  | 
      
      
         | 1591 | 
          | 
          | 
                 if ( zExp < 0 ) {
  | 
      
      
         | 1592 | 
          | 
          | 
                     isTiny =
  | 
      
      
         | 1593 | 
          | 
          | 
                            ( float_detect_tininess == float_tininess_before_rounding )
  | 
      
      
         | 1594 | 
          | 
          | 
                         || ( zExp < -1 )
  | 
      
      
         | 1595 | 
          | 
          | 
                         || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
  | 
      
      
         | 1596 | 
          | 
          | 
                     shift64RightJamming( zSig, - zExp, &zSig );
  | 
      
      
         | 1597 | 
          | 
          | 
                     zExp = 0;
  | 
      
      
         | 1598 | 
          | 
          | 
                     roundBits = zSig & 0x3FF;
  | 
      
      
         | 1599 | 
          | 
          | 
                     if ( isTiny && roundBits ) float_raise( float_flag_underflow );
  | 
      
      
         | 1600 | 
          | 
          | 
                 }
  | 
      
      
         | 1601 | 
          | 
          | 
             }
  | 
      
      
         | 1602 | 
          | 
          | 
             if ( roundBits ) float_exception_flags |= float_flag_inexact;
  | 
      
      
         | 1603 | 
          | 
          | 
             zSig = ( zSig + roundIncrement )>>10;
  | 
      
      
         | 1604 | 
          | 
          | 
             zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
  | 
      
      
         | 1605 | 
          | 
          | 
             if ( zSig == 0 ) zExp = 0;
  | 
      
      
         | 1606 | 
          | 
          | 
             return packFloat64( zSign, zExp, zSig );
  | 
      
      
         | 1607 | 
          | 
          | 
          
  | 
      
      
         | 1608 | 
          | 
          | 
         }
  | 
      
      
         | 1609 | 
          | 
          | 
          
  | 
      
      
         | 1610 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 1611 | 
          | 
          | 
         | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
  | 
      
      
         | 1612 | 
          | 
          | 
         | and significand `zSig', and returns the proper double-precision floating-
  | 
      
      
         | 1613 | 
          | 
          | 
         | point value corresponding to the abstract input.  This routine is just like
  | 
      
      
         | 1614 | 
          | 
          | 
         | `roundAndPackFloat64' except that `zSig' does not have to be normalized.
  | 
      
      
         | 1615 | 
          | 
          | 
         | Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
  | 
      
      
         | 1616 | 
          | 
          | 
         | floating-point exponent.
  | 
      
      
         | 1617 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 1618 | 
          | 
          | 
          
  | 
      
      
         | 1619 | 
          | 
          | 
         static float64
  | 
      
      
         | 1620 | 
          | 
          | 
          normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
  | 
      
      
         | 1621 | 
          | 
          | 
         {
  | 
      
      
         | 1622 | 
          | 
          | 
             int8 shiftCount;
  | 
      
      
         | 1623 | 
          | 
          | 
          
  | 
      
      
         | 1624 | 
          | 
          | 
             shiftCount = countLeadingZeros64( zSig ) - 1;
  | 
      
      
         | 1625 | 
          | 
          | 
             return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount );
  | 
      
      
         | 1626 | 
          | 
          | 
          
  | 
      
      
         | 1627 | 
          | 
          | 
         }
  | 
      
      
         | 1628 | 
          | 
          | 
          
  | 
      
      
         | 1629 | 
          | 
          | 
         #ifdef FLOATX80
  | 
      
      
         | 1630 | 
          | 
          | 
          
  | 
      
      
         | 1631 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 1632 | 
          | 
          | 
         | Returns the fraction bits of the extended double-precision floating-point
  | 
      
      
         | 1633 | 
          | 
          | 
         | value `a'.
  | 
      
      
         | 1634 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 1635 | 
          | 
          | 
          
  | 
      
      
         | 1636 | 
          | 
          | 
         INLINE bits64 extractFloatx80Frac( floatx80 a )
  | 
      
      
         | 1637 | 
          | 
          | 
         {
  | 
      
      
         | 1638 | 
          | 
          | 
          
  | 
      
      
         | 1639 | 
          | 
          | 
             return a.low;
  | 
      
      
         | 1640 | 
          | 
          | 
          
  | 
      
      
         | 1641 | 
          | 
          | 
         }
  | 
      
      
         | 1642 | 
          | 
          | 
          
  | 
      
      
         | 1643 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 1644 | 
          | 
          | 
         | Returns the exponent bits of the extended double-precision floating-point
  | 
      
      
         | 1645 | 
          | 
          | 
         | value `a'.
  | 
      
      
         | 1646 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 1647 | 
          | 
          | 
          
  | 
      
      
         | 1648 | 
          | 
          | 
         INLINE int32 extractFloatx80Exp( floatx80 a )
  | 
      
      
         | 1649 | 
          | 
          | 
         {
  | 
      
      
         | 1650 | 
          | 
          | 
          
  | 
      
      
         | 1651 | 
          | 
          | 
             return a.high & 0x7FFF;
  | 
      
      
         | 1652 | 
          | 
          | 
          
  | 
      
      
         | 1653 | 
          | 
          | 
         }
  | 
      
      
         | 1654 | 
          | 
          | 
          
  | 
      
      
         | 1655 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 1656 | 
          | 
          | 
         | Returns the sign bit of the extended double-precision floating-point value
  | 
      
      
         | 1657 | 
          | 
          | 
         | `a'.
  | 
      
      
         | 1658 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 1659 | 
          | 
          | 
          
  | 
      
      
         | 1660 | 
          | 
          | 
         INLINE flag extractFloatx80Sign( floatx80 a )
  | 
      
      
         | 1661 | 
          | 
          | 
         {
  | 
      
      
         | 1662 | 
          | 
          | 
          
  | 
      
      
         | 1663 | 
          | 
          | 
             return a.high>>15;
  | 
      
      
         | 1664 | 
          | 
          | 
          
  | 
      
      
         | 1665 | 
          | 
          | 
         }
  | 
      
      
         | 1666 | 
          | 
          | 
          
  | 
      
      
         | 1667 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 1668 | 
          | 
          | 
         | Normalizes the subnormal extended double-precision floating-point value
  | 
      
      
         | 1669 | 
          | 
          | 
         | represented by the denormalized significand `aSig'.  The normalized exponent
  | 
      
      
         | 1670 | 
          | 
          | 
         | and significand are stored at the locations pointed to by `zExpPtr' and
  | 
      
      
         | 1671 | 
          | 
          | 
         | `zSigPtr', respectively.
  | 
      
      
         | 1672 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 1673 | 
          | 
          | 
          
  | 
      
      
         | 1674 | 
          | 
          | 
         static void
  | 
      
      
         | 1675 | 
          | 
          | 
          normalizeFloatx80Subnormal( bits64 aSig, int32 *zExpPtr, bits64 *zSigPtr )
  | 
      
      
         | 1676 | 
          | 
          | 
         {
  | 
      
      
         | 1677 | 
          | 
          | 
             int8 shiftCount;
  | 
      
      
         | 1678 | 
          | 
          | 
          
  | 
      
      
         | 1679 | 
          | 
          | 
             shiftCount = countLeadingZeros64( aSig );
  | 
      
      
         | 1680 | 
          | 
          | 
             *zSigPtr = aSig<<shiftCount;
  | 
      
      
         | 1681 | 
          | 
          | 
             *zExpPtr = 1 - shiftCount;
  | 
      
      
         | 1682 | 
          | 
          | 
          
  | 
      
      
         | 1683 | 
          | 
          | 
         }
  | 
      
      
         | 1684 | 
          | 
          | 
          
  | 
      
      
         | 1685 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 1686 | 
          | 
          | 
         | Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
  | 
      
      
         | 1687 | 
          | 
          | 
         | extended double-precision floating-point value, returning the result.
  | 
      
      
         | 1688 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 1689 | 
          | 
          | 
          
  | 
      
      
         | 1690 | 
          | 
          | 
         INLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig )
  | 
      
      
         | 1691 | 
          | 
          | 
         {
  | 
      
      
         | 1692 | 
          | 
          | 
             floatx80 z;
  | 
      
      
         | 1693 | 
          | 
          | 
          
  | 
      
      
         | 1694 | 
          | 
          | 
             z.low = zSig;
  | 
      
      
         | 1695 | 
          | 
          | 
             z.high = ( ( (bits16) zSign )<<15 ) + zExp;
  | 
      
      
         | 1696 | 
          | 
          | 
             return z;
  | 
      
      
         | 1697 | 
          | 
          | 
          
  | 
      
      
         | 1698 | 
          | 
          | 
         }
  | 
      
      
         | 1699 | 
          | 
          | 
          
  | 
      
      
         | 1700 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 1701 | 
          | 
          | 
         | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
  | 
      
      
         | 1702 | 
          | 
          | 
         | and extended significand formed by the concatenation of `zSig0' and `zSig1',
  | 
      
      
         | 1703 | 
          | 
          | 
         | and returns the proper extended double-precision floating-point value
  | 
      
      
         | 1704 | 
          | 
          | 
         | corresponding to the abstract input.  Ordinarily, the abstract value is
  | 
      
      
         | 1705 | 
          | 
          | 
         | rounded and packed into the extended double-precision format, with the
  | 
      
      
         | 1706 | 
          | 
          | 
         | inexact exception raised if the abstract input cannot be represented
  | 
      
      
         | 1707 | 
          | 
          | 
         | exactly.  However, if the abstract value is too large, the overflow and
  | 
      
      
         | 1708 | 
          | 
          | 
         | inexact exceptions are raised and an infinity or maximal finite value is
  | 
      
      
         | 1709 | 
          | 
          | 
         | returned.  If the abstract value is too small, the input value is rounded to
  | 
      
      
         | 1710 | 
          | 
          | 
         | a subnormal number, and the underflow and inexact exceptions are raised if
  | 
      
      
         | 1711 | 
          | 
          | 
         | the abstract input cannot be represented exactly as a subnormal extended
  | 
      
      
         | 1712 | 
          | 
          | 
         | double-precision floating-point number.
  | 
      
      
         | 1713 | 
          | 
          | 
         |     If `roundingPrecision' is 32 or 64, the result is rounded to the same
  | 
      
      
         | 1714 | 
          | 
          | 
         | number of bits as single or double precision, respectively.  Otherwise, the
  | 
      
      
         | 1715 | 
          | 
          | 
         | result is rounded to the full precision of the extended double-precision
  | 
      
      
         | 1716 | 
          | 
          | 
         | format.
  | 
      
      
         | 1717 | 
          | 
          | 
         |     The input significand must be normalized or smaller.  If the input
  | 
      
      
         | 1718 | 
          | 
          | 
         | significand is not normalized, `zExp' must be 0; in that case, the result
  | 
      
      
         | 1719 | 
          | 
          | 
         | returned is a subnormal number, and it must not require rounding.  The
  | 
      
      
         | 1720 | 
          | 
          | 
         | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
  | 
      
      
         | 1721 | 
          | 
          | 
         | Floating-Point Arithmetic.
  | 
      
      
         | 1722 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 1723 | 
          | 
          | 
          
  | 
      
      
         | 1724 | 
          | 
          | 
         static floatx80
  | 
      
      
         | 1725 | 
          | 
          | 
          roundAndPackFloatx80(
  | 
      
      
         | 1726 | 
          | 
          | 
              int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
  | 
      
      
         | 1727 | 
          | 
          | 
          )
  | 
      
      
         | 1728 | 
          | 
          | 
         {
  | 
      
      
         | 1729 | 
          | 
          | 
             int8 roundingMode;
  | 
      
      
         | 1730 | 
          | 
          | 
             flag roundNearestEven, increment, isTiny;
  | 
      
      
         | 1731 | 
          | 
          | 
             int64 roundIncrement, roundMask, roundBits;
  | 
      
      
         | 1732 | 
          | 
          | 
          
  | 
      
      
         | 1733 | 
          | 
          | 
             roundingMode = float_rounding_mode;
  | 
      
      
         | 1734 | 
          | 
          | 
             roundNearestEven = ( roundingMode == float_round_nearest_even );
  | 
      
      
         | 1735 | 
          | 
          | 
             if ( roundingPrecision == 80 ) goto precision80;
  | 
      
      
         | 1736 | 
          | 
          | 
             if ( roundingPrecision == 64 ) {
  | 
      
      
         | 1737 | 
          | 
          | 
                 roundIncrement = LIT64( 0x0000000000000400 );
  | 
      
      
         | 1738 | 
          | 
          | 
                 roundMask = LIT64( 0x00000000000007FF );
  | 
      
      
         | 1739 | 
          | 
          | 
             }
  | 
      
      
         | 1740 | 
          | 
          | 
             else if ( roundingPrecision == 32 ) {
  | 
      
      
         | 1741 | 
          | 
          | 
                 roundIncrement = LIT64( 0x0000008000000000 );
  | 
      
      
         | 1742 | 
          | 
          | 
                 roundMask = LIT64( 0x000000FFFFFFFFFF );
  | 
      
      
         | 1743 | 
          | 
          | 
             }
  | 
      
      
         | 1744 | 
          | 
          | 
             else {
  | 
      
      
         | 1745 | 
          | 
          | 
                 goto precision80;
  | 
      
      
         | 1746 | 
          | 
          | 
             }
  | 
      
      
         | 1747 | 
          | 
          | 
             zSig0 |= ( zSig1 != 0 );
  | 
      
      
         | 1748 | 
          | 
          | 
             if ( ! roundNearestEven ) {
  | 
      
      
         | 1749 | 
          | 
          | 
                 if ( roundingMode == float_round_to_zero ) {
  | 
      
      
         | 1750 | 
          | 
          | 
                     roundIncrement = 0;
  | 
      
      
         | 1751 | 
          | 
          | 
                 }
  | 
      
      
         | 1752 | 
          | 
          | 
                 else {
  | 
      
      
         | 1753 | 
          | 
          | 
                     roundIncrement = roundMask;
  | 
      
      
         | 1754 | 
          | 
          | 
                     if ( zSign ) {
  | 
      
      
         | 1755 | 
          | 
          | 
                         if ( roundingMode == float_round_up ) roundIncrement = 0;
  | 
      
      
         | 1756 | 
          | 
          | 
                     }
  | 
      
      
         | 1757 | 
          | 
          | 
                     else {
  | 
      
      
         | 1758 | 
          | 
          | 
                         if ( roundingMode == float_round_down ) roundIncrement = 0;
  | 
      
      
         | 1759 | 
          | 
          | 
                     }
  | 
      
      
         | 1760 | 
          | 
          | 
                 }
  | 
      
      
         | 1761 | 
          | 
          | 
             }
  | 
      
      
         | 1762 | 
          | 
          | 
             roundBits = zSig0 & roundMask;
  | 
      
      
         | 1763 | 
          | 
          | 
             if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
  | 
      
      
         | 1764 | 
          | 
          | 
                 if (    ( 0x7FFE < zExp )
  | 
      
      
         | 1765 | 
          | 
          | 
                      || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
  | 
      
      
         | 1766 | 
          | 
          | 
                    ) {
  | 
      
      
         | 1767 | 
          | 
          | 
                     goto overflow;
  | 
      
      
         | 1768 | 
          | 
          | 
                 }
  | 
      
      
         | 1769 | 
          | 
          | 
                 if ( zExp <= 0 ) {
  | 
      
      
         | 1770 | 
          | 
          | 
                     isTiny =
  | 
      
      
         | 1771 | 
          | 
          | 
                            ( float_detect_tininess == float_tininess_before_rounding )
  | 
      
      
         | 1772 | 
          | 
          | 
                         || ( zExp < 0 )
  | 
      
      
         | 1773 | 
          | 
          | 
                         || ( zSig0 <= zSig0 + roundIncrement );
  | 
      
      
         | 1774 | 
          | 
          | 
                     shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
  | 
      
      
         | 1775 | 
          | 
          | 
                     zExp = 0;
  | 
      
      
         | 1776 | 
          | 
          | 
                     roundBits = zSig0 & roundMask;
  | 
      
      
         | 1777 | 
          | 
          | 
                     if ( isTiny && roundBits ) float_raise( float_flag_underflow );
  | 
      
      
         | 1778 | 
          | 
          | 
                     if ( roundBits ) float_exception_flags |= float_flag_inexact;
  | 
      
      
         | 1779 | 
          | 
          | 
                     zSig0 += roundIncrement;
  | 
      
      
         | 1780 | 
          | 
          | 
                     if ( (sbits64) zSig0 < 0 ) zExp = 1;
  | 
      
      
         | 1781 | 
          | 
          | 
                     roundIncrement = roundMask + 1;
  | 
      
      
         | 1782 | 
          | 
          | 
                     if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
  | 
      
      
         | 1783 | 
          | 
          | 
                         roundMask |= roundIncrement;
  | 
      
      
         | 1784 | 
          | 
          | 
                     }
  | 
      
      
         | 1785 | 
          | 
          | 
                     zSig0 &= ~ roundMask;
  | 
      
      
         | 1786 | 
          | 
          | 
                     return packFloatx80( zSign, zExp, zSig0 );
  | 
      
      
         | 1787 | 
          | 
          | 
                 }
  | 
      
      
         | 1788 | 
          | 
          | 
             }
  | 
      
      
         | 1789 | 
          | 
          | 
             if ( roundBits ) float_exception_flags |= float_flag_inexact;
  | 
      
      
         | 1790 | 
          | 
          | 
             zSig0 += roundIncrement;
  | 
      
      
         | 1791 | 
          | 
          | 
             if ( zSig0 < roundIncrement ) {
  | 
      
      
         | 1792 | 
          | 
          | 
                 ++zExp;
  | 
      
      
         | 1793 | 
          | 
          | 
                 zSig0 = LIT64( 0x8000000000000000 );
  | 
      
      
         | 1794 | 
          | 
          | 
             }
  | 
      
      
         | 1795 | 
          | 
          | 
             roundIncrement = roundMask + 1;
  | 
      
      
         | 1796 | 
          | 
          | 
             if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
  | 
      
      
         | 1797 | 
          | 
          | 
                 roundMask |= roundIncrement;
  | 
      
      
         | 1798 | 
          | 
          | 
             }
  | 
      
      
         | 1799 | 
          | 
          | 
             zSig0 &= ~ roundMask;
  | 
      
      
         | 1800 | 
          | 
          | 
             if ( zSig0 == 0 ) zExp = 0;
  | 
      
      
         | 1801 | 
          | 
          | 
             return packFloatx80( zSign, zExp, zSig0 );
  | 
      
      
         | 1802 | 
          | 
          | 
          precision80:
  | 
      
      
         | 1803 | 
          | 
          | 
             increment = ( (sbits64) zSig1 < 0 );
  | 
      
      
         | 1804 | 
          | 
          | 
             if ( ! roundNearestEven ) {
  | 
      
      
         | 1805 | 
          | 
          | 
                 if ( roundingMode == float_round_to_zero ) {
  | 
      
      
         | 1806 | 
          | 
          | 
                     increment = 0;
  | 
      
      
         | 1807 | 
          | 
          | 
                 }
  | 
      
      
         | 1808 | 
          | 
          | 
                 else {
  | 
      
      
         | 1809 | 
          | 
          | 
                     if ( zSign ) {
  | 
      
      
         | 1810 | 
          | 
          | 
                         increment = ( roundingMode == float_round_down ) && zSig1;
  | 
      
      
         | 1811 | 
          | 
          | 
                     }
  | 
      
      
         | 1812 | 
          | 
          | 
                     else {
  | 
      
      
         | 1813 | 
          | 
          | 
                         increment = ( roundingMode == float_round_up ) && zSig1;
  | 
      
      
         | 1814 | 
          | 
          | 
                     }
  | 
      
      
         | 1815 | 
          | 
          | 
                 }
  | 
      
      
         | 1816 | 
          | 
          | 
             }
  | 
      
      
         | 1817 | 
          | 
          | 
             if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
  | 
      
      
         | 1818 | 
          | 
          | 
                 if (    ( 0x7FFE < zExp )
  | 
      
      
         | 1819 | 
          | 
          | 
                      || (    ( zExp == 0x7FFE )
  | 
      
      
         | 1820 | 
          | 
          | 
                           && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
  | 
      
      
         | 1821 | 
          | 
          | 
                           && increment
  | 
      
      
         | 1822 | 
          | 
          | 
                         )
  | 
      
      
         | 1823 | 
          | 
          | 
                    ) {
  | 
      
      
         | 1824 | 
          | 
          | 
                     roundMask = 0;
  | 
      
      
         | 1825 | 
          | 
          | 
          overflow:
  | 
      
      
         | 1826 | 
          | 
          | 
                     float_raise( float_flag_overflow | float_flag_inexact );
  | 
      
      
         | 1827 | 
          | 
          | 
                     if (    ( roundingMode == float_round_to_zero )
  | 
      
      
         | 1828 | 
          | 
          | 
                          || ( zSign && ( roundingMode == float_round_up ) )
  | 
      
      
         | 1829 | 
          | 
          | 
                          || ( ! zSign && ( roundingMode == float_round_down ) )
  | 
      
      
         | 1830 | 
          | 
          | 
                        ) {
  | 
      
      
         | 1831 | 
          | 
          | 
                         return packFloatx80( zSign, 0x7FFE, ~ roundMask );
  | 
      
      
         | 1832 | 
          | 
          | 
                     }
  | 
      
      
         | 1833 | 
          | 
          | 
                     return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
  | 
      
      
         | 1834 | 
          | 
          | 
                 }
  | 
      
      
         | 1835 | 
          | 
          | 
                 if ( zExp <= 0 ) {
  | 
      
      
         | 1836 | 
          | 
          | 
                     isTiny =
  | 
      
      
         | 1837 | 
          | 
          | 
                            ( float_detect_tininess == float_tininess_before_rounding )
  | 
      
      
         | 1838 | 
          | 
          | 
                         || ( zExp < 0 )
  | 
      
      
         | 1839 | 
          | 
          | 
                         || ! increment
  | 
      
      
         | 1840 | 
          | 
          | 
                         || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
  | 
      
      
         | 1841 | 
          | 
          | 
                     shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
  | 
      
      
         | 1842 | 
          | 
          | 
                     zExp = 0;
  | 
      
      
         | 1843 | 
          | 
          | 
                     if ( isTiny && zSig1 ) float_raise( float_flag_underflow );
  | 
      
      
         | 1844 | 
          | 
          | 
                     if ( zSig1 ) float_exception_flags |= float_flag_inexact;
  | 
      
      
         | 1845 | 
          | 
          | 
                     if ( roundNearestEven ) {
  | 
      
      
         | 1846 | 
          | 
          | 
                         increment = ( (sbits64) zSig1 < 0 );
  | 
      
      
         | 1847 | 
          | 
          | 
                     }
  | 
      
      
         | 1848 | 
          | 
          | 
                     else {
  | 
      
      
         | 1849 | 
          | 
          | 
                         if ( zSign ) {
  | 
      
      
         | 1850 | 
          | 
          | 
                             increment = ( roundingMode == float_round_down ) && zSig1;
  | 
      
      
         | 1851 | 
          | 
          | 
                         }
  | 
      
      
         | 1852 | 
          | 
          | 
                         else {
  | 
      
      
         | 1853 | 
          | 
          | 
                             increment = ( roundingMode == float_round_up ) && zSig1;
  | 
      
      
         | 1854 | 
          | 
          | 
                         }
  | 
      
      
         | 1855 | 
          | 
          | 
                     }
  | 
      
      
         | 1856 | 
          | 
          | 
                     if ( increment ) {
  | 
      
      
         | 1857 | 
          | 
          | 
                         ++zSig0;
  | 
      
      
         | 1858 | 
          | 
          | 
                         zSig0 &=
  | 
      
      
         | 1859 | 
          | 
          | 
                             ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
  | 
      
      
         | 1860 | 
          | 
          | 
                         if ( (sbits64) zSig0 < 0 ) zExp = 1;
  | 
      
      
         | 1861 | 
          | 
          | 
                     }
  | 
      
      
         | 1862 | 
          | 
          | 
                     return packFloatx80( zSign, zExp, zSig0 );
  | 
      
      
         | 1863 | 
          | 
          | 
                 }
  | 
      
      
         | 1864 | 
          | 
          | 
             }
  | 
      
      
         | 1865 | 
          | 
          | 
             if ( zSig1 ) float_exception_flags |= float_flag_inexact;
  | 
      
      
         | 1866 | 
          | 
          | 
             if ( increment ) {
  | 
      
      
         | 1867 | 
          | 
          | 
                 ++zSig0;
  | 
      
      
         | 1868 | 
          | 
          | 
                 if ( zSig0 == 0 ) {
  | 
      
      
         | 1869 | 
          | 
          | 
                     ++zExp;
  | 
      
      
         | 1870 | 
          | 
          | 
                     zSig0 = LIT64( 0x8000000000000000 );
  | 
      
      
         | 1871 | 
          | 
          | 
                 }
  | 
      
      
         | 1872 | 
          | 
          | 
                 else {
  | 
      
      
         | 1873 | 
          | 
          | 
                     zSig0 &= ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
  | 
      
      
         | 1874 | 
          | 
          | 
                 }
  | 
      
      
         | 1875 | 
          | 
          | 
             }
  | 
      
      
         | 1876 | 
          | 
          | 
             else {
  | 
      
      
         | 1877 | 
          | 
          | 
                 if ( zSig0 == 0 ) zExp = 0;
  | 
      
      
         | 1878 | 
          | 
          | 
             }
  | 
      
      
         | 1879 | 
          | 
          | 
             return packFloatx80( zSign, zExp, zSig0 );
  | 
      
      
         | 1880 | 
          | 
          | 
          
  | 
      
      
         | 1881 | 
          | 
          | 
         }
  | 
      
      
         | 1882 | 
          | 
          | 
          
  | 
      
      
         | 1883 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 1884 | 
          | 
          | 
         | Takes an abstract floating-point value having sign `zSign', exponent
  | 
      
      
         | 1885 | 
          | 
          | 
         | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
  | 
      
      
         | 1886 | 
          | 
          | 
         | and returns the proper extended double-precision floating-point value
  | 
      
      
         | 1887 | 
          | 
          | 
         | corresponding to the abstract input.  This routine is just like
  | 
      
      
         | 1888 | 
          | 
          | 
         | `roundAndPackFloatx80' except that the input significand does not have to be
  | 
      
      
         | 1889 | 
          | 
          | 
         | normalized.
  | 
      
      
         | 1890 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 1891 | 
          | 
          | 
          
  | 
      
      
         | 1892 | 
          | 
          | 
         static floatx80
  | 
      
      
         | 1893 | 
          | 
          | 
          normalizeRoundAndPackFloatx80(
  | 
      
      
         | 1894 | 
          | 
          | 
              int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
  | 
      
      
         | 1895 | 
          | 
          | 
          )
  | 
      
      
         | 1896 | 
          | 
          | 
         {
  | 
      
      
         | 1897 | 
          | 
          | 
             int8 shiftCount;
  | 
      
      
         | 1898 | 
          | 
          | 
          
  | 
      
      
         | 1899 | 
          | 
          | 
             if ( zSig0 == 0 ) {
  | 
      
      
         | 1900 | 
          | 
          | 
                 zSig0 = zSig1;
  | 
      
      
         | 1901 | 
          | 
          | 
                 zSig1 = 0;
  | 
      
      
         | 1902 | 
          | 
          | 
                 zExp -= 64;
  | 
      
      
         | 1903 | 
          | 
          | 
             }
  | 
      
      
         | 1904 | 
          | 
          | 
             shiftCount = countLeadingZeros64( zSig0 );
  | 
      
      
         | 1905 | 
          | 
          | 
             shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
  | 
      
      
         | 1906 | 
          | 
          | 
             zExp -= shiftCount;
  | 
      
      
         | 1907 | 
          | 
          | 
             return
  | 
      
      
         | 1908 | 
          | 
          | 
                 roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 );
  | 
      
      
         | 1909 | 
          | 
          | 
          
  | 
      
      
         | 1910 | 
          | 
          | 
         }
  | 
      
      
         | 1911 | 
          | 
          | 
          
  | 
      
      
         | 1912 | 
          | 
          | 
         #endif
  | 
      
      
         | 1913 | 
          | 
          | 
          
  | 
      
      
         | 1914 | 
          | 
          | 
         #ifdef FLOAT128
  | 
      
      
         | 1915 | 
          | 
          | 
          
  | 
      
      
         | 1916 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 1917 | 
          | 
          | 
         | Returns the least-significant 64 fraction bits of the quadruple-precision
  | 
      
      
         | 1918 | 
          | 
          | 
         | floating-point value `a'.
  | 
      
      
         | 1919 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 1920 | 
          | 
          | 
          
  | 
      
      
         | 1921 | 
          | 
          | 
         INLINE bits64 extractFloat128Frac1( float128 a )
  | 
      
      
         | 1922 | 
          | 
          | 
         {
  | 
      
      
         | 1923 | 
          | 
          | 
          
  | 
      
      
         | 1924 | 
          | 
          | 
             return a.low;
  | 
      
      
         | 1925 | 
          | 
          | 
          
  | 
      
      
         | 1926 | 
          | 
          | 
         }
  | 
      
      
         | 1927 | 
          | 
          | 
          
  | 
      
      
         | 1928 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 1929 | 
          | 
          | 
         | Returns the most-significant 48 fraction bits of the quadruple-precision
  | 
      
      
         | 1930 | 
          | 
          | 
         | floating-point value `a'.
  | 
      
      
         | 1931 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 1932 | 
          | 
          | 
          
  | 
      
      
         | 1933 | 
          | 
          | 
         INLINE bits64 extractFloat128Frac0( float128 a )
  | 
      
      
         | 1934 | 
          | 
          | 
         {
  | 
      
      
         | 1935 | 
          | 
          | 
          
  | 
      
      
         | 1936 | 
          | 
          | 
             return a.high & LIT64( 0x0000FFFFFFFFFFFF );
  | 
      
      
         | 1937 | 
          | 
          | 
          
  | 
      
      
         | 1938 | 
          | 
          | 
         }
  | 
      
      
         | 1939 | 
          | 
          | 
          
  | 
      
      
         | 1940 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 1941 | 
          | 
          | 
         | Returns the exponent bits of the quadruple-precision floating-point value
  | 
      
      
         | 1942 | 
          | 
          | 
         | `a'.
  | 
      
      
         | 1943 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 1944 | 
          | 
          | 
          
  | 
      
      
         | 1945 | 
          | 
          | 
         INLINE int32 extractFloat128Exp( float128 a )
  | 
      
      
         | 1946 | 
          | 
          | 
         {
  | 
      
      
         | 1947 | 
          | 
          | 
          
  | 
      
      
         | 1948 | 
          | 
          | 
             return ( a.high>>48 ) & 0x7FFF;
  | 
      
      
         | 1949 | 
          | 
          | 
          
  | 
      
      
         | 1950 | 
          | 
          | 
         }
  | 
      
      
         | 1951 | 
          | 
          | 
          
  | 
      
      
         | 1952 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 1953 | 
          | 
          | 
         | Returns the sign bit of the quadruple-precision floating-point value `a'.
  | 
      
      
         | 1954 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 1955 | 
          | 
          | 
          
  | 
      
      
         | 1956 | 
          | 
          | 
         INLINE flag extractFloat128Sign( float128 a )
  | 
      
      
         | 1957 | 
          | 
          | 
         {
  | 
      
      
         | 1958 | 
          | 
          | 
          
  | 
      
      
         | 1959 | 
          | 
          | 
             return a.high>>63;
  | 
      
      
         | 1960 | 
          | 
          | 
          
  | 
      
      
         | 1961 | 
          | 
          | 
         }
  | 
      
      
         | 1962 | 
          | 
          | 
          
  | 
      
      
         | 1963 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 1964 | 
          | 
          | 
         | Normalizes the subnormal quadruple-precision floating-point value
  | 
      
      
         | 1965 | 
          | 
          | 
         | represented by the denormalized significand formed by the concatenation of
  | 
      
      
         | 1966 | 
          | 
          | 
         | `aSig0' and `aSig1'.  The normalized exponent is stored at the location
  | 
      
      
         | 1967 | 
          | 
          | 
         | pointed to by `zExpPtr'.  The most significant 49 bits of the normalized
  | 
      
      
         | 1968 | 
          | 
          | 
         | significand are stored at the location pointed to by `zSig0Ptr', and the
  | 
      
      
         | 1969 | 
          | 
          | 
         | least significant 64 bits of the normalized significand are stored at the
  | 
      
      
         | 1970 | 
          | 
          | 
         | location pointed to by `zSig1Ptr'.
  | 
      
      
         | 1971 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 1972 | 
          | 
          | 
          
  | 
      
      
         | 1973 | 
          | 
          | 
         static void
  | 
      
      
         | 1974 | 
          | 
          | 
          normalizeFloat128Subnormal(
  | 
      
      
         | 1975 | 
          | 
          | 
              bits64 aSig0,
  | 
      
      
         | 1976 | 
          | 
          | 
              bits64 aSig1,
  | 
      
      
         | 1977 | 
          | 
          | 
              int32 *zExpPtr,
  | 
      
      
         | 1978 | 
          | 
          | 
              bits64 *zSig0Ptr,
  | 
      
      
         | 1979 | 
          | 
          | 
              bits64 *zSig1Ptr
  | 
      
      
         | 1980 | 
          | 
          | 
          )
  | 
      
      
         | 1981 | 
          | 
          | 
         {
  | 
      
      
         | 1982 | 
          | 
          | 
             int8 shiftCount;
  | 
      
      
         | 1983 | 
          | 
          | 
          
  | 
      
      
         | 1984 | 
          | 
          | 
             if ( aSig0 == 0 ) {
  | 
      
      
         | 1985 | 
          | 
          | 
                 shiftCount = countLeadingZeros64( aSig1 ) - 15;
  | 
      
      
         | 1986 | 
          | 
          | 
                 if ( shiftCount < 0 ) {
  | 
      
      
         | 1987 | 
          | 
          | 
                     *zSig0Ptr = aSig1>>( - shiftCount );
  | 
      
      
         | 1988 | 
          | 
          | 
                     *zSig1Ptr = aSig1<<( shiftCount & 63 );
  | 
      
      
         | 1989 | 
          | 
          | 
                 }
  | 
      
      
         | 1990 | 
          | 
          | 
                 else {
  | 
      
      
         | 1991 | 
          | 
          | 
                     *zSig0Ptr = aSig1<<shiftCount;
  | 
      
      
         | 1992 | 
          | 
          | 
                     *zSig1Ptr = 0;
  | 
      
      
         | 1993 | 
          | 
          | 
                 }
  | 
      
      
         | 1994 | 
          | 
          | 
                 *zExpPtr = - shiftCount - 63;
  | 
      
      
         | 1995 | 
          | 
          | 
             }
  | 
      
      
         | 1996 | 
          | 
          | 
             else {
  | 
      
      
         | 1997 | 
          | 
          | 
                 shiftCount = countLeadingZeros64( aSig0 ) - 15;
  | 
      
      
         | 1998 | 
          | 
          | 
                 shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
  | 
      
      
         | 1999 | 
          | 
          | 
                 *zExpPtr = 1 - shiftCount;
  | 
      
      
         | 2000 | 
          | 
          | 
             }
  | 
      
      
         | 2001 | 
          | 
          | 
          
  | 
      
      
         | 2002 | 
          | 
          | 
         }
  | 
      
      
         | 2003 | 
          | 
          | 
          
  | 
      
      
         | 2004 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 2005 | 
          | 
          | 
         | Packs the sign `zSign', the exponent `zExp', and the significand formed
  | 
      
      
         | 2006 | 
          | 
          | 
         | by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
  | 
      
      
         | 2007 | 
          | 
          | 
         | floating-point value, returning the result.  After being shifted into the
  | 
      
      
         | 2008 | 
          | 
          | 
         | proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
  | 
      
      
         | 2009 | 
          | 
          | 
         | added together to form the most significant 32 bits of the result.  This
  | 
      
      
         | 2010 | 
          | 
          | 
         | means that any integer portion of `zSig0' will be added into the exponent.
  | 
      
      
         | 2011 | 
          | 
          | 
         | Since a properly normalized significand will have an integer portion equal
  | 
      
      
         | 2012 | 
          | 
          | 
         | to 1, the `zExp' input should be 1 less than the desired result exponent
  | 
      
      
         | 2013 | 
          | 
          | 
         | whenever `zSig0' and `zSig1' concatenated form a complete, normalized
  | 
      
      
         | 2014 | 
          | 
          | 
         | significand.
  | 
      
      
         | 2015 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 2016 | 
          | 
          | 
          
  | 
      
      
         | 2017 | 
          | 
          | 
         INLINE float128
  | 
      
      
         | 2018 | 
          | 
          | 
          packFloat128( flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
  | 
      
      
         | 2019 | 
          | 
          | 
         {
  | 
      
      
         | 2020 | 
          | 
          | 
             float128 z;
  | 
      
      
         | 2021 | 
          | 
          | 
          
  | 
      
      
         | 2022 | 
          | 
          | 
             z.low = zSig1;
  | 
      
      
         | 2023 | 
          | 
          | 
             z.high = ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<48 ) + zSig0;
  | 
      
      
         | 2024 | 
          | 
          | 
             return z;
  | 
      
      
         | 2025 | 
          | 
          | 
          
  | 
      
      
         | 2026 | 
          | 
          | 
         }
  | 
      
      
         | 2027 | 
          | 
          | 
          
  | 
      
      
         | 2028 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 2029 | 
          | 
          | 
         | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
  | 
      
      
         | 2030 | 
          | 
          | 
         | and extended significand formed by the concatenation of `zSig0', `zSig1',
  | 
      
      
         | 2031 | 
          | 
          | 
         | and `zSig2', and returns the proper quadruple-precision floating-point value
  | 
      
      
         | 2032 | 
          | 
          | 
         | corresponding to the abstract input.  Ordinarily, the abstract value is
  | 
      
      
         | 2033 | 
          | 
          | 
         | simply rounded and packed into the quadruple-precision format, with the
  | 
      
      
         | 2034 | 
          | 
          | 
         | inexact exception raised if the abstract input cannot be represented
  | 
      
      
         | 2035 | 
          | 
          | 
         | exactly.  However, if the abstract value is too large, the overflow and
  | 
      
      
         | 2036 | 
          | 
          | 
         | inexact exceptions are raised and an infinity or maximal finite value is
  | 
      
      
         | 2037 | 
          | 
          | 
         | returned.  If the abstract value is too small, the input value is rounded to
  | 
      
      
         | 2038 | 
          | 
          | 
         | a subnormal number, and the underflow and inexact exceptions are raised if
  | 
      
      
         | 2039 | 
          | 
          | 
         | the abstract input cannot be represented exactly as a subnormal quadruple-
  | 
      
      
         | 2040 | 
          | 
          | 
         | precision floating-point number.
  | 
      
      
         | 2041 | 
          | 
          | 
         |     The input significand must be normalized or smaller.  If the input
  | 
      
      
         | 2042 | 
          | 
          | 
         | significand is not normalized, `zExp' must be 0; in that case, the result
  | 
      
      
         | 2043 | 
          | 
          | 
         | returned is a subnormal number, and it must not require rounding.  In the
  | 
      
      
         | 2044 | 
          | 
          | 
         | usual case that the input significand is normalized, `zExp' must be 1 less
  | 
      
      
         | 2045 | 
          | 
          | 
         | than the ``true'' floating-point exponent.  The handling of underflow and
  | 
      
      
         | 2046 | 
          | 
          | 
         | overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  | 
      
      
         | 2047 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 2048 | 
          | 
          | 
          
  | 
      
      
         | 2049 | 
          | 
          | 
         static float128
  | 
      
      
         | 2050 | 
          | 
          | 
          roundAndPackFloat128(
  | 
      
      
         | 2051 | 
          | 
          | 
              flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1, bits64 zSig2 )
  | 
      
      
         | 2052 | 
          | 
          | 
         {
  | 
      
      
         | 2053 | 
          | 
          | 
             int8 roundingMode;
  | 
      
      
         | 2054 | 
          | 
          | 
             flag roundNearestEven, increment, isTiny;
  | 
      
      
         | 2055 | 
          | 
          | 
          
  | 
      
      
         | 2056 | 
          | 
          | 
             roundingMode = float_rounding_mode;
  | 
      
      
         | 2057 | 
          | 
          | 
             roundNearestEven = ( roundingMode == float_round_nearest_even );
  | 
      
      
         | 2058 | 
          | 
          | 
             increment = ( (sbits64) zSig2 < 0 );
  | 
      
      
         | 2059 | 
          | 
          | 
             if ( ! roundNearestEven ) {
  | 
      
      
         | 2060 | 
          | 
          | 
                 if ( roundingMode == float_round_to_zero ) {
  | 
      
      
         | 2061 | 
          | 
          | 
                     increment = 0;
  | 
      
      
         | 2062 | 
          | 
          | 
                 }
  | 
      
      
         | 2063 | 
          | 
          | 
                 else {
  | 
      
      
         | 2064 | 
          | 
          | 
                     if ( zSign ) {
  | 
      
      
         | 2065 | 
          | 
          | 
                         increment = ( roundingMode == float_round_down ) && zSig2;
  | 
      
      
         | 2066 | 
          | 
          | 
                     }
  | 
      
      
         | 2067 | 
          | 
          | 
                     else {
  | 
      
      
         | 2068 | 
          | 
          | 
                         increment = ( roundingMode == float_round_up ) && zSig2;
  | 
      
      
         | 2069 | 
          | 
          | 
                     }
  | 
      
      
         | 2070 | 
          | 
          | 
                 }
  | 
      
      
         | 2071 | 
          | 
          | 
             }
  | 
      
      
         | 2072 | 
          | 
          | 
             if ( 0x7FFD <= (bits32) zExp ) {
  | 
      
      
         | 2073 | 
          | 
          | 
                 if (    ( 0x7FFD < zExp )
  | 
      
      
         | 2074 | 
          | 
          | 
                      || (    ( zExp == 0x7FFD )
  | 
      
      
         | 2075 | 
          | 
          | 
                           && eq128(
  | 
      
      
         | 2076 | 
          | 
          | 
                                  LIT64( 0x0001FFFFFFFFFFFF ),
  | 
      
      
         | 2077 | 
          | 
          | 
                                  LIT64( 0xFFFFFFFFFFFFFFFF ),
  | 
      
      
         | 2078 | 
          | 
          | 
                                  zSig0,
  | 
      
      
         | 2079 | 
          | 
          | 
                                  zSig1
  | 
      
      
         | 2080 | 
          | 
          | 
                              )
  | 
      
      
         | 2081 | 
          | 
          | 
                           && increment
  | 
      
      
         | 2082 | 
          | 
          | 
                         )
  | 
      
      
         | 2083 | 
          | 
          | 
                    ) {
  | 
      
      
         | 2084 | 
          | 
          | 
                     float_raise( float_flag_overflow | float_flag_inexact );
  | 
      
      
         | 2085 | 
          | 
          | 
                     if (    ( roundingMode == float_round_to_zero )
  | 
      
      
         | 2086 | 
          | 
          | 
                          || ( zSign && ( roundingMode == float_round_up ) )
  | 
      
      
         | 2087 | 
          | 
          | 
                          || ( ! zSign && ( roundingMode == float_round_down ) )
  | 
      
      
         | 2088 | 
          | 
          | 
                        ) {
  | 
      
      
         | 2089 | 
          | 
          | 
                         return
  | 
      
      
         | 2090 | 
          | 
          | 
                             packFloat128(
  | 
      
      
         | 2091 | 
          | 
          | 
                                 zSign,
  | 
      
      
         | 2092 | 
          | 
          | 
                                 0x7FFE,
  | 
      
      
         | 2093 | 
          | 
          | 
                                 LIT64( 0x0000FFFFFFFFFFFF ),
  | 
      
      
         | 2094 | 
          | 
          | 
                                 LIT64( 0xFFFFFFFFFFFFFFFF )
  | 
      
      
         | 2095 | 
          | 
          | 
                             );
  | 
      
      
         | 2096 | 
          | 
          | 
                     }
  | 
      
      
         | 2097 | 
          | 
          | 
                     return packFloat128( zSign, 0x7FFF, 0, 0 );
  | 
      
      
         | 2098 | 
          | 
          | 
                 }
  | 
      
      
         | 2099 | 
          | 
          | 
                 if ( zExp < 0 ) {
  | 
      
      
         | 2100 | 
          | 
          | 
                     isTiny =
  | 
      
      
         | 2101 | 
          | 
          | 
                            ( float_detect_tininess == float_tininess_before_rounding )
  | 
      
      
         | 2102 | 
          | 
          | 
                         || ( zExp < -1 )
  | 
      
      
         | 2103 | 
          | 
          | 
                         || ! increment
  | 
      
      
         | 2104 | 
          | 
          | 
                         || lt128(
  | 
      
      
         | 2105 | 
          | 
          | 
                                zSig0,
  | 
      
      
         | 2106 | 
          | 
          | 
                                zSig1,
  | 
      
      
         | 2107 | 
          | 
          | 
                                LIT64( 0x0001FFFFFFFFFFFF ),
  | 
      
      
         | 2108 | 
          | 
          | 
                                LIT64( 0xFFFFFFFFFFFFFFFF )
  | 
      
      
         | 2109 | 
          | 
          | 
                            );
  | 
      
      
         | 2110 | 
          | 
          | 
                     shift128ExtraRightJamming(
  | 
      
      
         | 2111 | 
          | 
          | 
                         zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
  | 
      
      
         | 2112 | 
          | 
          | 
                     zExp = 0;
  | 
      
      
         | 2113 | 
          | 
          | 
                     if ( isTiny && zSig2 ) float_raise( float_flag_underflow );
  | 
      
      
         | 2114 | 
          | 
          | 
                     if ( roundNearestEven ) {
  | 
      
      
         | 2115 | 
          | 
          | 
                         increment = ( (sbits64) zSig2 < 0 );
  | 
      
      
         | 2116 | 
          | 
          | 
                     }
  | 
      
      
         | 2117 | 
          | 
          | 
                     else {
  | 
      
      
         | 2118 | 
          | 
          | 
                         if ( zSign ) {
  | 
      
      
         | 2119 | 
          | 
          | 
                             increment = ( roundingMode == float_round_down ) && zSig2;
  | 
      
      
         | 2120 | 
          | 
          | 
                         }
  | 
      
      
         | 2121 | 
          | 
          | 
                         else {
  | 
      
      
         | 2122 | 
          | 
          | 
                             increment = ( roundingMode == float_round_up ) && zSig2;
  | 
      
      
         | 2123 | 
          | 
          | 
                         }
  | 
      
      
         | 2124 | 
          | 
          | 
                     }
  | 
      
      
         | 2125 | 
          | 
          | 
                 }
  | 
      
      
         | 2126 | 
          | 
          | 
             }
  | 
      
      
         | 2127 | 
          | 
          | 
             if ( zSig2 ) float_exception_flags |= float_flag_inexact;
  | 
      
      
         | 2128 | 
          | 
          | 
             if ( increment ) {
  | 
      
      
         | 2129 | 
          | 
          | 
                 add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
  | 
      
      
         | 2130 | 
          | 
          | 
                 zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
  | 
      
      
         | 2131 | 
          | 
          | 
             }
  | 
      
      
         | 2132 | 
          | 
          | 
             else {
  | 
      
      
         | 2133 | 
          | 
          | 
                 if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
  | 
      
      
         | 2134 | 
          | 
          | 
             }
  | 
      
      
         | 2135 | 
          | 
          | 
             return packFloat128( zSign, zExp, zSig0, zSig1 );
  | 
      
      
         | 2136 | 
          | 
          | 
          
  | 
      
      
         | 2137 | 
          | 
          | 
         }
  | 
      
      
         | 2138 | 
          | 
          | 
          
  | 
      
      
         | 2139 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 2140 | 
          | 
          | 
         | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
  | 
      
      
         | 2141 | 
          | 
          | 
         | and significand formed by the concatenation of `zSig0' and `zSig1', and
  | 
      
      
         | 2142 | 
          | 
          | 
         | returns the proper quadruple-precision floating-point value corresponding
  | 
      
      
         | 2143 | 
          | 
          | 
         | to the abstract input.  This routine is just like `roundAndPackFloat128'
  | 
      
      
         | 2144 | 
          | 
          | 
         | except that the input significand has fewer bits and does not have to be
  | 
      
      
         | 2145 | 
          | 
          | 
         | normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
  | 
      
      
         | 2146 | 
          | 
          | 
         | point exponent.
  | 
      
      
         | 2147 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 2148 | 
          | 
          | 
          
  | 
      
      
         | 2149 | 
          | 
          | 
         static float128
  | 
      
      
         | 2150 | 
          | 
          | 
          normalizeRoundAndPackFloat128(
  | 
      
      
         | 2151 | 
          | 
          | 
              flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
  | 
      
      
         | 2152 | 
          | 
          | 
         {
  | 
      
      
         | 2153 | 
          | 
          | 
             int8 shiftCount;
  | 
      
      
         | 2154 | 
          | 
          | 
             bits64 zSig2;
  | 
      
      
         | 2155 | 
          | 
          | 
          
  | 
      
      
         | 2156 | 
          | 
          | 
             if ( zSig0 == 0 ) {
  | 
      
      
         | 2157 | 
          | 
          | 
                 zSig0 = zSig1;
  | 
      
      
         | 2158 | 
          | 
          | 
                 zSig1 = 0;
  | 
      
      
         | 2159 | 
          | 
          | 
                 zExp -= 64;
  | 
      
      
         | 2160 | 
          | 
          | 
             }
  | 
      
      
         | 2161 | 
          | 
          | 
             shiftCount = countLeadingZeros64( zSig0 ) - 15;
  | 
      
      
         | 2162 | 
          | 
          | 
             if ( 0 <= shiftCount ) {
  | 
      
      
         | 2163 | 
          | 
          | 
                 zSig2 = 0;
  | 
      
      
         | 2164 | 
          | 
          | 
                 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
  | 
      
      
         | 2165 | 
          | 
          | 
             }
  | 
      
      
         | 2166 | 
          | 
          | 
             else {
  | 
      
      
         | 2167 | 
          | 
          | 
                 shift128ExtraRightJamming(
  | 
      
      
         | 2168 | 
          | 
          | 
                     zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
  | 
      
      
         | 2169 | 
          | 
          | 
             }
  | 
      
      
         | 2170 | 
          | 
          | 
             zExp -= shiftCount;
  | 
      
      
         | 2171 | 
          | 
          | 
             return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
  | 
      
      
         | 2172 | 
          | 
          | 
          
  | 
      
      
         | 2173 | 
          | 
          | 
         }
  | 
      
      
         | 2174 | 
          | 
          | 
          
  | 
      
      
         | 2175 | 
          | 
          | 
         #endif
  | 
      
      
         | 2176 | 
          | 
          | 
          
  | 
      
      
         | 2177 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 2178 | 
          | 
          | 
         | Returns the result of converting the 32-bit two's complement integer `a'
  | 
      
      
         | 2179 | 
          | 
          | 
         | to the single-precision floating-point format.  The conversion is performed
  | 
      
      
         | 2180 | 
          | 
          | 
         | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  | 
      
      
         | 2181 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 2182 | 
          | 
          | 
          
  | 
      
      
         | 2183 | 
          | 
          | 
         float32 int32_to_float32( int32 a )
  | 
      
      
         | 2184 | 
          | 
          | 
         {
  | 
      
      
         | 2185 | 
          | 
          | 
             flag zSign;
  | 
      
      
         | 2186 | 
          | 
          | 
          
  | 
      
      
         | 2187 | 
          | 
          | 
             if ( a == 0 ) return 0;
  | 
      
      
         | 2188 | 
          | 
          | 
             if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
  | 
      
      
         | 2189 | 
          | 
          | 
             zSign = ( a < 0 );
  | 
      
      
         | 2190 | 
          | 
          | 
             return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a );
  | 
      
      
         | 2191 | 
          | 
          | 
          
  | 
      
      
         | 2192 | 
          | 
          | 
         }
  | 
      
      
         | 2193 | 
          | 
          | 
          
  | 
      
      
         | 2194 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 2195 | 
          | 
          | 
         | Returns the result of converting the 32-bit two's complement integer `a'
  | 
      
      
         | 2196 | 
          | 
          | 
         | to the double-precision floating-point format.  The conversion is performed
  | 
      
      
         | 2197 | 
          | 
          | 
         | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  | 
      
      
         | 2198 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 2199 | 
          | 
          | 
          
  | 
      
      
         | 2200 | 
          | 
          | 
         float64 int32_to_float64( int32 a )
  | 
      
      
         | 2201 | 
          | 
          | 
         {
  | 
      
      
         | 2202 | 
          | 
          | 
             flag zSign;
  | 
      
      
         | 2203 | 
          | 
          | 
             uint32 absA;
  | 
      
      
         | 2204 | 
          | 
          | 
             int8 shiftCount;
  | 
      
      
         | 2205 | 
          | 
          | 
             bits64 zSig;
  | 
      
      
         | 2206 | 
          | 
          | 
          
  | 
      
      
         | 2207 | 
          | 
          | 
             if ( a == 0 ) return 0;
  | 
      
      
         | 2208 | 
          | 
          | 
             zSign = ( a < 0 );
  | 
      
      
         | 2209 | 
          | 
          | 
             absA = zSign ? - a : a;
  | 
      
      
         | 2210 | 
          | 
          | 
             shiftCount = countLeadingZeros32( absA ) + 21;
  | 
      
      
         | 2211 | 
          | 
          | 
             zSig = absA;
  | 
      
      
         | 2212 | 
          | 
          | 
             return packFloat64( zSign, 0x432 - shiftCount, zSig<<shiftCount );
  | 
      
      
         | 2213 | 
          | 
          | 
          
  | 
      
      
         | 2214 | 
          | 
          | 
         }
  | 
      
      
         | 2215 | 
          | 
          | 
          
  | 
      
      
         | 2216 | 
          | 
          | 
         #ifdef FLOATX80
  | 
      
      
         | 2217 | 
          | 
          | 
          
  | 
      
      
         | 2218 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 2219 | 
          | 
          | 
         | Returns the result of converting the 32-bit two's complement integer `a'
  | 
      
      
         | 2220 | 
          | 
          | 
         | to the extended double-precision floating-point format.  The conversion
  | 
      
      
         | 2221 | 
          | 
          | 
         | is performed according to the IEC/IEEE Standard for Binary Floating-Point
  | 
      
      
         | 2222 | 
          | 
          | 
         | Arithmetic.
  | 
      
      
         | 2223 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 2224 | 
          | 
          | 
          
  | 
      
      
         | 2225 | 
          | 
          | 
         floatx80 int32_to_floatx80( int32 a )
  | 
      
      
         | 2226 | 
          | 
          | 
         {
  | 
      
      
         | 2227 | 
          | 
          | 
             flag zSign;
  | 
      
      
         | 2228 | 
          | 
          | 
             uint32 absA;
  | 
      
      
         | 2229 | 
          | 
          | 
             int8 shiftCount;
  | 
      
      
         | 2230 | 
          | 
          | 
             bits64 zSig;
  | 
      
      
         | 2231 | 
          | 
          | 
          
  | 
      
      
         | 2232 | 
          | 
          | 
             if ( a == 0 ) return packFloatx80( 0, 0, 0 );
  | 
      
      
         | 2233 | 
          | 
          | 
             zSign = ( a < 0 );
  | 
      
      
         | 2234 | 
          | 
          | 
             absA = zSign ? - a : a;
  | 
      
      
         | 2235 | 
          | 
          | 
             shiftCount = countLeadingZeros32( absA ) + 32;
  | 
      
      
         | 2236 | 
          | 
          | 
             zSig = absA;
  | 
      
      
         | 2237 | 
          | 
          | 
             return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
  | 
      
      
         | 2238 | 
          | 
          | 
          
  | 
      
      
         | 2239 | 
          | 
          | 
         }
  | 
      
      
         | 2240 | 
          | 
          | 
          
  | 
      
      
         | 2241 | 
          | 
          | 
         #endif
  | 
      
      
         | 2242 | 
          | 
          | 
          
  | 
      
      
         | 2243 | 
          | 
          | 
         #ifdef FLOAT128
  | 
      
      
         | 2244 | 
          | 
          | 
          
  | 
      
      
         | 2245 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 2246 | 
          | 
          | 
         | Returns the result of converting the 32-bit two's complement integer `a' to
  | 
      
      
         | 2247 | 
          | 
          | 
         | the quadruple-precision floating-point format.  The conversion is performed
  | 
      
      
         | 2248 | 
          | 
          | 
         | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  | 
      
      
         | 2249 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 2250 | 
          | 
          | 
          
  | 
      
      
         | 2251 | 
          | 
          | 
         float128 int32_to_float128( int32 a )
  | 
      
      
         | 2252 | 
          | 
          | 
         {
  | 
      
      
         | 2253 | 
          | 
          | 
             flag zSign;
  | 
      
      
         | 2254 | 
          | 
          | 
             uint32 absA;
  | 
      
      
         | 2255 | 
          | 
          | 
             int8 shiftCount;
  | 
      
      
         | 2256 | 
          | 
          | 
             bits64 zSig0;
  | 
      
      
         | 2257 | 
          | 
          | 
          
  | 
      
      
         | 2258 | 
          | 
          | 
             if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
  | 
      
      
         | 2259 | 
          | 
          | 
             zSign = ( a < 0 );
  | 
      
      
         | 2260 | 
          | 
          | 
             absA = zSign ? - a : a;
  | 
      
      
         | 2261 | 
          | 
          | 
             shiftCount = countLeadingZeros32( absA ) + 17;
  | 
      
      
         | 2262 | 
          | 
          | 
             zSig0 = absA;
  | 
      
      
         | 2263 | 
          | 
          | 
             return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
  | 
      
      
         | 2264 | 
          | 
          | 
          
  | 
      
      
         | 2265 | 
          | 
          | 
         }
  | 
      
      
         | 2266 | 
          | 
          | 
          
  | 
      
      
         | 2267 | 
          | 
          | 
         #endif
  | 
      
      
         | 2268 | 
          | 
          | 
          
  | 
      
      
         | 2269 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 2270 | 
          | 
          | 
         | Returns the result of converting the 64-bit two's complement integer `a'
  | 
      
      
         | 2271 | 
          | 
          | 
         | to the single-precision floating-point format.  The conversion is performed
  | 
      
      
         | 2272 | 
          | 
          | 
         | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  | 
      
      
         | 2273 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 2274 | 
          | 
          | 
          
  | 
      
      
         | 2275 | 
          | 
          | 
         float32 int64_to_float32( int64 a )
  | 
      
      
         | 2276 | 
          | 
          | 
         {
  | 
      
      
         | 2277 | 
          | 
          | 
             flag zSign;
  | 
      
      
         | 2278 | 
          | 
          | 
             uint64 absA;
  | 
      
      
         | 2279 | 
          | 
          | 
             int8 shiftCount;
  | 
      
      
         | 2280 | 
          | 
          | 
             //bits32 zSig; // Unused variable
  | 
      
      
         | 2281 | 
          | 
          | 
          
  | 
      
      
         | 2282 | 
          | 
          | 
             if ( a == 0 ) return 0;
  | 
      
      
         | 2283 | 
          | 
          | 
             zSign = ( a < 0 );
  | 
      
      
         | 2284 | 
          | 
          | 
             absA = zSign ? - a : a;
  | 
      
      
         | 2285 | 
          | 
          | 
             shiftCount = countLeadingZeros64( absA ) - 40;
  | 
      
      
         | 2286 | 
          | 
          | 
             if ( 0 <= shiftCount ) {
  | 
      
      
         | 2287 | 
          | 
          | 
                 return packFloat32( zSign, 0x95 - shiftCount, absA<<shiftCount );
  | 
      
      
         | 2288 | 
          | 
          | 
             }
  | 
      
      
         | 2289 | 
          | 
          | 
             else {
  | 
      
      
         | 2290 | 
          | 
          | 
                 shiftCount += 7;
  | 
      
      
         | 2291 | 
          | 
          | 
                 if ( shiftCount < 0 ) {
  | 
      
      
         | 2292 | 
          | 
          | 
                     shift64RightJamming( absA, - shiftCount, &absA );
  | 
      
      
         | 2293 | 
          | 
          | 
                 }
  | 
      
      
         | 2294 | 
          | 
          | 
                 else {
  | 
      
      
         | 2295 | 
          | 
          | 
                     absA <<= shiftCount;
  | 
      
      
         | 2296 | 
          | 
          | 
                 }
  | 
      
      
         | 2297 | 
          | 
          | 
                 return roundAndPackFloat32( zSign, 0x9C - shiftCount, absA );
  | 
      
      
         | 2298 | 
          | 
          | 
             }
  | 
      
      
         | 2299 | 
          | 
          | 
          
  | 
      
      
         | 2300 | 
          | 
          | 
         }
  | 
      
      
         | 2301 | 
          | 
          | 
          
  | 
      
      
         | 2302 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 2303 | 
          | 
          | 
         | Returns the result of converting the 64-bit two's complement integer `a'
  | 
      
      
         | 2304 | 
          | 
          | 
         | to the double-precision floating-point format.  The conversion is performed
  | 
      
      
         | 2305 | 
          | 
          | 
         | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  | 
      
      
         | 2306 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 2307 | 
          | 
          | 
          
  | 
      
      
         | 2308 | 
          | 
          | 
         float64 int64_to_float64( int64 a )
  | 
      
      
         | 2309 | 
          | 
          | 
         {
  | 
      
      
         | 2310 | 
          | 
          | 
             flag zSign;
  | 
      
      
         | 2311 | 
          | 
          | 
          
  | 
      
      
         | 2312 | 
          | 
          | 
             if ( a == 0 ) return 0;
  | 
      
      
         | 2313 | 
          | 
          | 
             if ( a == (sbits64) LIT64( 0x8000000000000000 ) ) {
  | 
      
      
         | 2314 | 
          | 
          | 
                 return packFloat64( 1, 0x43E, 0 );
  | 
      
      
         | 2315 | 
          | 
          | 
             }
  | 
      
      
         | 2316 | 
          | 
          | 
             zSign = ( a < 0 );
  | 
      
      
         | 2317 | 
          | 
          | 
             return normalizeRoundAndPackFloat64( zSign, 0x43C, zSign ? - a : a );
  | 
      
      
         | 2318 | 
          | 
          | 
          
  | 
      
      
         | 2319 | 
          | 
          | 
         }
  | 
      
      
         | 2320 | 
          | 
          | 
          
  | 
      
      
         | 2321 | 
          | 
          | 
         #ifdef FLOATX80
  | 
      
      
         | 2322 | 
          | 
          | 
          
  | 
      
      
         | 2323 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 2324 | 
          | 
          | 
         | Returns the result of converting the 64-bit two's complement integer `a'
  | 
      
      
         | 2325 | 
          | 
          | 
         | to the extended double-precision floating-point format.  The conversion
  | 
      
      
         | 2326 | 
          | 
          | 
         | is performed according to the IEC/IEEE Standard for Binary Floating-Point
  | 
      
      
         | 2327 | 
          | 
          | 
         | Arithmetic.
  | 
      
      
         | 2328 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 2329 | 
          | 
          | 
          
  | 
      
      
         | 2330 | 
          | 
          | 
         floatx80 int64_to_floatx80( int64 a )
  | 
      
      
         | 2331 | 
          | 
          | 
         {
  | 
      
      
         | 2332 | 
          | 
          | 
             flag zSign;
  | 
      
      
         | 2333 | 
          | 
          | 
             uint64 absA;
  | 
      
      
         | 2334 | 
          | 
          | 
             int8 shiftCount;
  | 
      
      
         | 2335 | 
          | 
          | 
          
  | 
      
      
         | 2336 | 
          | 
          | 
             if ( a == 0 ) return packFloatx80( 0, 0, 0 );
  | 
      
      
         | 2337 | 
          | 
          | 
             zSign = ( a < 0 );
  | 
      
      
         | 2338 | 
          | 
          | 
             absA = zSign ? - a : a;
  | 
      
      
         | 2339 | 
          | 
          | 
             shiftCount = countLeadingZeros64( absA );
  | 
      
      
         | 2340 | 
          | 
          | 
             return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
  | 
      
      
         | 2341 | 
          | 
          | 
          
  | 
      
      
         | 2342 | 
          | 
          | 
         }
  | 
      
      
         | 2343 | 
          | 
          | 
          
  | 
      
      
         | 2344 | 
          | 
          | 
         #endif
  | 
      
      
         | 2345 | 
          | 
          | 
          
  | 
      
      
         | 2346 | 
          | 
          | 
         #ifdef FLOAT128
  | 
      
      
         | 2347 | 
          | 
          | 
          
  | 
      
      
         | 2348 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 2349 | 
          | 
          | 
         | Returns the result of converting the 64-bit two's complement integer `a' to
  | 
      
      
         | 2350 | 
          | 
          | 
         | the quadruple-precision floating-point format.  The conversion is performed
  | 
      
      
         | 2351 | 
          | 
          | 
         | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  | 
      
      
         | 2352 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 2353 | 
          | 
          | 
          
  | 
      
      
         | 2354 | 
          | 
          | 
         float128 int64_to_float128( int64 a )
  | 
      
      
         | 2355 | 
          | 
          | 
         {
  | 
      
      
         | 2356 | 
          | 
          | 
             flag zSign;
  | 
      
      
         | 2357 | 
          | 
          | 
             uint64 absA;
  | 
      
      
         | 2358 | 
          | 
          | 
             int8 shiftCount;
  | 
      
      
         | 2359 | 
          | 
          | 
             int32 zExp;
  | 
      
      
         | 2360 | 
          | 
          | 
             bits64 zSig0, zSig1;
  | 
      
      
         | 2361 | 
          | 
          | 
          
  | 
      
      
         | 2362 | 
          | 
          | 
             if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
  | 
      
      
         | 2363 | 
          | 
          | 
             zSign = ( a < 0 );
  | 
      
      
         | 2364 | 
          | 
          | 
             absA = zSign ? - a : a;
  | 
      
      
         | 2365 | 
          | 
          | 
             shiftCount = countLeadingZeros64( absA ) + 49;
  | 
      
      
         | 2366 | 
          | 
          | 
             zExp = 0x406E - shiftCount;
  | 
      
      
         | 2367 | 
          | 
          | 
             if ( 64 <= shiftCount ) {
  | 
      
      
         | 2368 | 
          | 
          | 
                 zSig1 = 0;
  | 
      
      
         | 2369 | 
          | 
          | 
                 zSig0 = absA;
  | 
      
      
         | 2370 | 
          | 
          | 
                 shiftCount -= 64;
  | 
      
      
         | 2371 | 
          | 
          | 
             }
  | 
      
      
         | 2372 | 
          | 
          | 
             else {
  | 
      
      
         | 2373 | 
          | 
          | 
                 zSig1 = absA;
  | 
      
      
         | 2374 | 
          | 
          | 
                 zSig0 = 0;
  | 
      
      
         | 2375 | 
          | 
          | 
             }
  | 
      
      
         | 2376 | 
          | 
          | 
             shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
  | 
      
      
         | 2377 | 
          | 
          | 
             return packFloat128( zSign, zExp, zSig0, zSig1 );
  | 
      
      
         | 2378 | 
          | 
          | 
          
  | 
      
      
         | 2379 | 
          | 
          | 
         }
  | 
      
      
         | 2380 | 
          | 
          | 
          
  | 
      
      
         | 2381 | 
          | 
          | 
         #endif
  | 
      
      
         | 2382 | 
          | 
          | 
          
  | 
      
      
         | 2383 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 2384 | 
          | 
          | 
         | Returns the result of converting the single-precision floating-point value
  | 
      
      
         | 2385 | 
          | 
          | 
         | `a' to the 32-bit two's complement integer format.  The conversion is
  | 
      
      
         | 2386 | 
          | 
          | 
         | performed according to the IEC/IEEE Standard for Binary Floating-Point
  | 
      
      
         | 2387 | 
          | 
          | 
         | Arithmetic---which means in particular that the conversion is rounded
  | 
      
      
         | 2388 | 
          | 
          | 
         | according to the current rounding mode.  If `a' is a NaN, the largest
  | 
      
      
         | 2389 | 
          | 
          | 
         | positive integer is returned.  Otherwise, if the conversion overflows, the
  | 
      
      
         | 2390 | 
          | 
          | 
         | largest integer with the same sign as `a' is returned.
  | 
      
      
         | 2391 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 2392 | 
          | 
          | 
          
  | 
      
      
         | 2393 | 
          | 
          | 
         int32 float32_to_int32( float32 a )
  | 
      
      
         | 2394 | 
          | 
          | 
         {
  | 
      
      
         | 2395 | 
          | 
          | 
             flag aSign;
  | 
      
      
         | 2396 | 
          | 
          | 
             int16 aExp, shiftCount;
  | 
      
      
         | 2397 | 
          | 
          | 
             bits32 aSig;
  | 
      
      
         | 2398 | 
          | 
          | 
             bits64 aSig64;
  | 
      
      
         | 2399 | 
          | 
          | 
          
  | 
      
      
         | 2400 | 
          | 
          | 
             aSig = extractFloat32Frac( a );
  | 
      
      
         | 2401 | 
          | 
          | 
             aExp = extractFloat32Exp( a );
  | 
      
      
         | 2402 | 
          | 
          | 
             aSign = extractFloat32Sign( a );
  | 
      
      
         | 2403 | 
          | 
          | 
             if ( ( aExp == 0xFF ) && aSig ) aSign = 0;
  | 
      
      
         | 2404 | 
          | 
          | 
             if ( aExp ) aSig |= 0x00800000;
  | 
      
      
         | 2405 | 
          | 
          | 
             shiftCount = 0xAF - aExp;
  | 
      
      
         | 2406 | 
          | 
          | 
             aSig64 = aSig;
  | 
      
      
         | 2407 | 
          | 
          | 
             aSig64 <<= 32;
  | 
      
      
         | 2408 | 
          | 
          | 
             if ( 0 < shiftCount ) shift64RightJamming( aSig64, shiftCount, &aSig64 );
  | 
      
      
         | 2409 | 
          | 
          | 
             return roundAndPackInt32( aSign, aSig64 );
  | 
      
      
         | 2410 | 
          | 
          | 
          
  | 
      
      
         | 2411 | 
          | 
          | 
         }
  | 
      
      
         | 2412 | 
          | 
          | 
          
  | 
      
      
         | 2413 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 2414 | 
          | 
          | 
         | Returns the result of converting the single-precision floating-point value
  | 
      
      
         | 2415 | 
          | 
          | 
         | `a' to the 32-bit two's complement integer format.  The conversion is
  | 
      
      
         | 2416 | 
          | 
          | 
         | performed according to the IEC/IEEE Standard for Binary Floating-Point
  | 
      
      
         | 2417 | 
          | 
          | 
         | Arithmetic, except that the conversion is always rounded toward zero.
  | 
      
      
         | 2418 | 
          | 
          | 
         | If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
  | 
      
      
         | 2419 | 
          | 
          | 
         | the conversion overflows, the largest integer with the same sign as `a' is
  | 
      
      
         | 2420 | 
          | 
          | 
         | returned.
  | 
      
      
         | 2421 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 2422 | 
          | 
          | 
          
  | 
      
      
         | 2423 | 
          | 
          | 
         int32 float32_to_int32_round_to_zero( float32 a )
  | 
      
      
         | 2424 | 
          | 
          | 
         {
  | 
      
      
         | 2425 | 
          | 
          | 
             flag aSign;
  | 
      
      
         | 2426 | 
          | 
          | 
             int16 aExp, shiftCount;
  | 
      
      
         | 2427 | 
          | 
          | 
             bits32 aSig;
  | 
      
      
         | 2428 | 
          | 
          | 
             int32 z;
  | 
      
      
         | 2429 | 
          | 
          | 
          
  | 
      
      
         | 2430 | 
          | 
          | 
             aSig = extractFloat32Frac( a );
  | 
      
      
         | 2431 | 
          | 
          | 
             aExp = extractFloat32Exp( a );
  | 
      
      
         | 2432 | 
          | 
          | 
             aSign = extractFloat32Sign( a );
  | 
      
      
         | 2433 | 
          | 
          | 
             shiftCount = aExp - 0x9E;
  | 
      
      
         | 2434 | 
          | 
          | 
             if ( 0 <= shiftCount ) {
  | 
      
      
         | 2435 | 
          | 
          | 
                 if ( a != 0xCF000000 ) {
  | 
      
      
         | 2436 | 
          | 
          | 
                     float_raise( float_flag_invalid );
  | 
      
      
         | 2437 | 
          | 
          | 
                     if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
  | 
      
      
         | 2438 | 
          | 
          | 
                 }
  | 
      
      
         | 2439 | 
          | 
          | 
                 return (sbits32) 0x80000000;
  | 
      
      
         | 2440 | 
          | 
          | 
             }
  | 
      
      
         | 2441 | 
          | 
          | 
             else if ( aExp <= 0x7E ) {
  | 
      
      
         | 2442 | 
          | 
          | 
                 if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
  | 
      
      
         | 2443 | 
          | 
          | 
                 return 0;
  | 
      
      
         | 2444 | 
          | 
          | 
             }
  | 
      
      
         | 2445 | 
          | 
          | 
             aSig = ( aSig | 0x00800000 )<<8;
  | 
      
      
         | 2446 | 
          | 
          | 
             z = aSig>>( - shiftCount );
  | 
      
      
         | 2447 | 
          | 
          | 
             if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
  | 
      
      
         | 2448 | 
          | 
          | 
                 float_exception_flags |= float_flag_inexact;
  | 
      
      
         | 2449 | 
          | 
          | 
             }
  | 
      
      
         | 2450 | 
          | 
          | 
             if ( aSign ) z = - z;
  | 
      
      
         | 2451 | 
          | 
          | 
             return z;
  | 
      
      
         | 2452 | 
          | 
          | 
          
  | 
      
      
         | 2453 | 
          | 
          | 
         }
  | 
      
      
         | 2454 | 
          | 
          | 
          
  | 
      
      
         | 2455 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 2456 | 
          | 
          | 
         | Returns the result of converting the single-precision floating-point value
  | 
      
      
         | 2457 | 
          | 
          | 
         | `a' to the 64-bit two's complement integer format.  The conversion is
  | 
      
      
         | 2458 | 
          | 
          | 
         | performed according to the IEC/IEEE Standard for Binary Floating-Point
  | 
      
      
         | 2459 | 
          | 
          | 
         | Arithmetic---which means in particular that the conversion is rounded
  | 
      
      
         | 2460 | 
          | 
          | 
         | according to the current rounding mode.  If `a' is a NaN, the largest
  | 
      
      
         | 2461 | 
          | 
          | 
         | positive integer is returned.  Otherwise, if the conversion overflows, the
  | 
      
      
         | 2462 | 
          | 
          | 
         | largest integer with the same sign as `a' is returned.
  | 
      
      
         | 2463 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 2464 | 
          | 
          | 
          
  | 
      
      
         | 2465 | 
          | 
          | 
         int64 float32_to_int64( float32 a )
  | 
      
      
         | 2466 | 
          | 
          | 
         {
  | 
      
      
         | 2467 | 
          | 
          | 
             flag aSign;
  | 
      
      
         | 2468 | 
          | 
          | 
             int16 aExp, shiftCount;
  | 
      
      
         | 2469 | 
          | 
          | 
             bits32 aSig;
  | 
      
      
         | 2470 | 
          | 
          | 
             bits64 aSig64, aSigExtra;
  | 
      
      
         | 2471 | 
          | 
          | 
          
  | 
      
      
         | 2472 | 
          | 
          | 
             aSig = extractFloat32Frac( a );
  | 
      
      
         | 2473 | 
          | 
          | 
             aExp = extractFloat32Exp( a );
  | 
      
      
         | 2474 | 
          | 
          | 
             aSign = extractFloat32Sign( a );
  | 
      
      
         | 2475 | 
          | 
          | 
             shiftCount = 0xBE - aExp;
  | 
      
      
         | 2476 | 
          | 
          | 
             if ( shiftCount < 0 ) {
  | 
      
      
         | 2477 | 
          | 
          | 
                 float_raise( float_flag_invalid );
  | 
      
      
         | 2478 | 
          | 
          | 
                 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
  | 
      
      
         | 2479 | 
          | 
          | 
                     return LIT64( 0x7FFFFFFFFFFFFFFF );
  | 
      
      
         | 2480 | 
          | 
          | 
                 }
  | 
      
      
         | 2481 | 
          | 
          | 
                 return (sbits64) LIT64( 0x8000000000000000 );
  | 
      
      
         | 2482 | 
          | 
          | 
             }
  | 
      
      
         | 2483 | 
          | 
          | 
             if ( aExp ) aSig |= 0x00800000;
  | 
      
      
         | 2484 | 
          | 
          | 
             aSig64 = aSig;
  | 
      
      
         | 2485 | 
          | 
          | 
             aSig64 <<= 40;
  | 
      
      
         | 2486 | 
          | 
          | 
             shift64ExtraRightJamming( aSig64, 0, shiftCount, &aSig64, &aSigExtra );
  | 
      
      
         | 2487 | 
          | 
          | 
             return roundAndPackInt64( aSign, aSig64, aSigExtra );
  | 
      
      
         | 2488 | 
          | 
          | 
          
  | 
      
      
         | 2489 | 
          | 
          | 
         }
  | 
      
      
         | 2490 | 
          | 
          | 
          
  | 
      
      
         | 2491 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 2492 | 
          | 
          | 
         | Returns the result of converting the single-precision floating-point value
  | 
      
      
         | 2493 | 
          | 
          | 
         | `a' to the 64-bit two's complement integer format.  The conversion is
  | 
      
      
         | 2494 | 
          | 
          | 
         | performed according to the IEC/IEEE Standard for Binary Floating-Point
  | 
      
      
         | 2495 | 
          | 
          | 
         | Arithmetic, except that the conversion is always rounded toward zero.  If
  | 
      
      
         | 2496 | 
          | 
          | 
         | `a' is a NaN, the largest positive integer is returned.  Otherwise, if the
  | 
      
      
         | 2497 | 
          | 
          | 
         | conversion overflows, the largest integer with the same sign as `a' is
  | 
      
      
         | 2498 | 
          | 
          | 
         | returned.
  | 
      
      
         | 2499 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 2500 | 
          | 
          | 
          
  | 
      
      
         | 2501 | 
          | 
          | 
         int64 float32_to_int64_round_to_zero( float32 a )
  | 
      
      
         | 2502 | 
          | 
          | 
         {
  | 
      
      
         | 2503 | 
          | 
          | 
             flag aSign;
  | 
      
      
         | 2504 | 
          | 
          | 
             int16 aExp, shiftCount;
  | 
      
      
         | 2505 | 
          | 
          | 
             bits32 aSig;
  | 
      
      
         | 2506 | 
          | 
          | 
             bits64 aSig64;
  | 
      
      
         | 2507 | 
          | 
          | 
             int64 z;
  | 
      
      
         | 2508 | 
          | 
          | 
          
  | 
      
      
         | 2509 | 
          | 
          | 
             aSig = extractFloat32Frac( a );
  | 
      
      
         | 2510 | 
          | 
          | 
             aExp = extractFloat32Exp( a );
  | 
      
      
         | 2511 | 
          | 
          | 
             aSign = extractFloat32Sign( a );
  | 
      
      
         | 2512 | 
          | 
          | 
             shiftCount = aExp - 0xBE;
  | 
      
      
         | 2513 | 
          | 
          | 
             if ( 0 <= shiftCount ) {
  | 
      
      
         | 2514 | 
          | 
          | 
                 if ( a != 0xDF000000 ) {
  | 
      
      
         | 2515 | 
          | 
          | 
                     float_raise( float_flag_invalid );
  | 
      
      
         | 2516 | 
          | 
          | 
                     if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
  | 
      
      
         | 2517 | 
          | 
          | 
                         return LIT64( 0x7FFFFFFFFFFFFFFF );
  | 
      
      
         | 2518 | 
          | 
          | 
                     }
  | 
      
      
         | 2519 | 
          | 
          | 
                 }
  | 
      
      
         | 2520 | 
          | 
          | 
                 return (sbits64) LIT64( 0x8000000000000000 );
  | 
      
      
         | 2521 | 
          | 
          | 
             }
  | 
      
      
         | 2522 | 
          | 
          | 
             else if ( aExp <= 0x7E ) {
  | 
      
      
         | 2523 | 
          | 
          | 
                 if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
  | 
      
      
         | 2524 | 
          | 
          | 
                 return 0;
  | 
      
      
         | 2525 | 
          | 
          | 
             }
  | 
      
      
         | 2526 | 
          | 
          | 
             aSig64 = aSig | 0x00800000;
  | 
      
      
         | 2527 | 
          | 
          | 
             aSig64 <<= 40;
  | 
      
      
         | 2528 | 
          | 
          | 
             z = aSig64>>( - shiftCount );
  | 
      
      
         | 2529 | 
          | 
          | 
             if ( (bits64) ( aSig64<<( shiftCount & 63 ) ) ) {
  | 
      
      
         | 2530 | 
          | 
          | 
                 float_exception_flags |= float_flag_inexact;
  | 
      
      
         | 2531 | 
          | 
          | 
             }
  | 
      
      
         | 2532 | 
          | 
          | 
             if ( aSign ) z = - z;
  | 
      
      
         | 2533 | 
          | 
          | 
             return z;
  | 
      
      
         | 2534 | 
          | 
          | 
          
  | 
      
      
         | 2535 | 
          | 
          | 
         }
  | 
      
      
         | 2536 | 
          | 
          | 
          
  | 
      
      
         | 2537 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 2538 | 
          | 
          | 
         | Returns the result of converting the single-precision floating-point value
  | 
      
      
         | 2539 | 
          | 
          | 
         | `a' to the double-precision floating-point format.  The conversion is
  | 
      
      
         | 2540 | 
          | 
          | 
         | performed according to the IEC/IEEE Standard for Binary Floating-Point
  | 
      
      
         | 2541 | 
          | 
          | 
         | Arithmetic.
  | 
      
      
         | 2542 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 2543 | 
          | 
          | 
          
  | 
      
      
         | 2544 | 
          | 
          | 
         float64 float32_to_float64( float32 a )
  | 
      
      
         | 2545 | 
          | 
          | 
         {
  | 
      
      
         | 2546 | 
          | 
          | 
             flag aSign;
  | 
      
      
         | 2547 | 
          | 
          | 
             int16 aExp;
  | 
      
      
         | 2548 | 
          | 
          | 
             bits32 aSig;
  | 
      
      
         | 2549 | 
          | 
          | 
          
  | 
      
      
         | 2550 | 
          | 
          | 
             aSig = extractFloat32Frac( a );
  | 
      
      
         | 2551 | 
          | 
          | 
             aExp = extractFloat32Exp( a );
  | 
      
      
         | 2552 | 
          | 
          | 
             aSign = extractFloat32Sign( a );
  | 
      
      
         | 2553 | 
          | 
          | 
             if ( aExp == 0xFF ) {
  | 
      
      
         | 2554 | 
          | 
          | 
                 if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) );
  | 
      
      
         | 2555 | 
          | 
          | 
                 return packFloat64( aSign, 0x7FF, 0 );
  | 
      
      
         | 2556 | 
          | 
          | 
             }
  | 
      
      
         | 2557 | 
          | 
          | 
             if ( aExp == 0 ) {
  | 
      
      
         | 2558 | 
          | 
          | 
                 if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
  | 
      
      
         | 2559 | 
          | 
          | 
                 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
  | 
      
      
         | 2560 | 
          | 
          | 
                 --aExp;
  | 
      
      
         | 2561 | 
          | 
          | 
             }
  | 
      
      
         | 2562 | 
          | 
          | 
             return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 );
  | 
      
      
         | 2563 | 
          | 
          | 
          
  | 
      
      
         | 2564 | 
          | 
          | 
         }
  | 
      
      
         | 2565 | 
          | 
          | 
          
  | 
      
      
         | 2566 | 
          | 
          | 
         #ifdef FLOATX80
  | 
      
      
         | 2567 | 
          | 
          | 
          
  | 
      
      
         | 2568 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 2569 | 
          | 
          | 
         | Returns the result of converting the single-precision floating-point value
  | 
      
      
         | 2570 | 
          | 
          | 
         | `a' to the extended double-precision floating-point format.  The conversion
  | 
      
      
         | 2571 | 
          | 
          | 
         | is performed according to the IEC/IEEE Standard for Binary Floating-Point
  | 
      
      
         | 2572 | 
          | 
          | 
         | Arithmetic.
  | 
      
      
         | 2573 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 2574 | 
          | 
          | 
          
  | 
      
      
         | 2575 | 
          | 
          | 
         floatx80 float32_to_floatx80( float32 a )
  | 
      
      
         | 2576 | 
          | 
          | 
         {
  | 
      
      
         | 2577 | 
          | 
          | 
             flag aSign;
  | 
      
      
         | 2578 | 
          | 
          | 
             int16 aExp;
  | 
      
      
         | 2579 | 
          | 
          | 
             bits32 aSig;
  | 
      
      
         | 2580 | 
          | 
          | 
          
  | 
      
      
         | 2581 | 
          | 
          | 
             aSig = extractFloat32Frac( a );
  | 
      
      
         | 2582 | 
          | 
          | 
             aExp = extractFloat32Exp( a );
  | 
      
      
         | 2583 | 
          | 
          | 
             aSign = extractFloat32Sign( a );
  | 
      
      
         | 2584 | 
          | 
          | 
             if ( aExp == 0xFF ) {
  | 
      
      
         | 2585 | 
          | 
          | 
                 if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a ) );
  | 
      
      
         | 2586 | 
          | 
          | 
                 return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
  | 
      
      
         | 2587 | 
          | 
          | 
             }
  | 
      
      
         | 2588 | 
          | 
          | 
             if ( aExp == 0 ) {
  | 
      
      
         | 2589 | 
          | 
          | 
                 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
  | 
      
      
         | 2590 | 
          | 
          | 
                 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
  | 
      
      
         | 2591 | 
          | 
          | 
             }
  | 
      
      
         | 2592 | 
          | 
          | 
             aSig |= 0x00800000;
  | 
      
      
         | 2593 | 
          | 
          | 
             return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 );
  | 
      
      
         | 2594 | 
          | 
          | 
          
  | 
      
      
         | 2595 | 
          | 
          | 
         }
  | 
      
      
         | 2596 | 
          | 
          | 
          
  | 
      
      
         | 2597 | 
          | 
          | 
         #endif
  | 
      
      
         | 2598 | 
          | 
          | 
          
  | 
      
      
         | 2599 | 
          | 
          | 
         #ifdef FLOAT128
  | 
      
      
         | 2600 | 
          | 
          | 
          
  | 
      
      
         | 2601 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 2602 | 
          | 
          | 
         | Returns the result of converting the single-precision floating-point value
  | 
      
      
         | 2603 | 
          | 
          | 
         | `a' to the double-precision floating-point format.  The conversion is
  | 
      
      
         | 2604 | 
          | 
          | 
         | performed according to the IEC/IEEE Standard for Binary Floating-Point
  | 
      
      
         | 2605 | 
          | 
          | 
         | Arithmetic.
  | 
      
      
         | 2606 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 2607 | 
          | 
          | 
          
  | 
      
      
         | 2608 | 
          | 
          | 
         float128 float32_to_float128( float32 a )
  | 
      
      
         | 2609 | 
          | 
          | 
         {
  | 
      
      
         | 2610 | 
          | 
          | 
             flag aSign;
  | 
      
      
         | 2611 | 
          | 
          | 
             int16 aExp;
  | 
      
      
         | 2612 | 
          | 
          | 
             bits32 aSig;
  | 
      
      
         | 2613 | 
          | 
          | 
          
  | 
      
      
         | 2614 | 
          | 
          | 
             aSig = extractFloat32Frac( a );
  | 
      
      
         | 2615 | 
          | 
          | 
             aExp = extractFloat32Exp( a );
  | 
      
      
         | 2616 | 
          | 
          | 
             aSign = extractFloat32Sign( a );
  | 
      
      
         | 2617 | 
          | 
          | 
             if ( aExp == 0xFF ) {
  | 
      
      
         | 2618 | 
          | 
          | 
                 if ( aSig ) return commonNaNToFloat128( float32ToCommonNaN( a ) );
  | 
      
      
         | 2619 | 
          | 
          | 
                 return packFloat128( aSign, 0x7FFF, 0, 0 );
  | 
      
      
         | 2620 | 
          | 
          | 
             }
  | 
      
      
         | 2621 | 
          | 
          | 
             if ( aExp == 0 ) {
  | 
      
      
         | 2622 | 
          | 
          | 
                 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
  | 
      
      
         | 2623 | 
          | 
          | 
                 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
  | 
      
      
         | 2624 | 
          | 
          | 
                 --aExp;
  | 
      
      
         | 2625 | 
          | 
          | 
             }
  | 
      
      
         | 2626 | 
          | 
          | 
             return packFloat128( aSign, aExp + 0x3F80, ( (bits64) aSig )<<25, 0 );
  | 
      
      
         | 2627 | 
          | 
          | 
          
  | 
      
      
         | 2628 | 
          | 
          | 
         }
  | 
      
      
         | 2629 | 
          | 
          | 
          
  | 
      
      
         | 2630 | 
          | 
          | 
         #endif
  | 
      
      
         | 2631 | 
          | 
          | 
          
  | 
      
      
         | 2632 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 2633 | 
          | 
          | 
         | Rounds the single-precision floating-point value `a' to an integer, and
  | 
      
      
         | 2634 | 
          | 
          | 
         | returns the result as a single-precision floating-point value.  The
  | 
      
      
         | 2635 | 
          | 
          | 
         | operation is performed according to the IEC/IEEE Standard for Binary
  | 
      
      
         | 2636 | 
          | 
          | 
         | Floating-Point Arithmetic.
  | 
      
      
         | 2637 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 2638 | 
          | 
          | 
          
  | 
      
      
         | 2639 | 
          | 
          | 
         float32 float32_round_to_int( float32 a )
  | 
      
      
         | 2640 | 
          | 
          | 
         {
  | 
      
      
         | 2641 | 
          | 
          | 
             flag aSign;
  | 
      
      
         | 2642 | 
          | 
          | 
             int16 aExp;
  | 
      
      
         | 2643 | 
          | 
          | 
             bits32 lastBitMask, roundBitsMask;
  | 
      
      
         | 2644 | 
          | 
          | 
             int8 roundingMode;
  | 
      
      
         | 2645 | 
          | 
          | 
             float32 z;
  | 
      
      
         | 2646 | 
          | 
          | 
          
  | 
      
      
         | 2647 | 
          | 
          | 
             aExp = extractFloat32Exp( a );
  | 
      
      
         | 2648 | 
          | 
          | 
             if ( 0x96 <= aExp ) {
  | 
      
      
         | 2649 | 
          | 
          | 
                 if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
  | 
      
      
         | 2650 | 
          | 
          | 
                     return propagateFloat32NaN( a, a );
  | 
      
      
         | 2651 | 
          | 
          | 
                 }
  | 
      
      
         | 2652 | 
          | 
          | 
                 return a;
  | 
      
      
         | 2653 | 
          | 
          | 
             }
  | 
      
      
         | 2654 | 
          | 
          | 
             if ( aExp <= 0x7E ) {
  | 
      
      
         | 2655 | 
          | 
          | 
                 if ( (bits32) ( a<<1 ) == 0 ) return a;
  | 
      
      
         | 2656 | 
          | 
          | 
                 float_exception_flags |= float_flag_inexact;
  | 
      
      
         | 2657 | 
          | 
          | 
                 aSign = extractFloat32Sign( a );
  | 
      
      
         | 2658 | 
          | 
          | 
                 switch ( float_rounding_mode ) {
  | 
      
      
         | 2659 | 
          | 
          | 
                  case float_round_nearest_even:
  | 
      
      
         | 2660 | 
          | 
          | 
                     if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
  | 
      
      
         | 2661 | 
          | 
          | 
                         return packFloat32( aSign, 0x7F, 0 );
  | 
      
      
         | 2662 | 
          | 
          | 
                     }
  | 
      
      
         | 2663 | 
          | 
          | 
                     break;
  | 
      
      
         | 2664 | 
          | 
          | 
                  case float_round_down:
  | 
      
      
         | 2665 | 
          | 
          | 
                     return aSign ? 0xBF800000 : 0;
  | 
      
      
         | 2666 | 
          | 
          | 
                  case float_round_up:
  | 
      
      
         | 2667 | 
          | 
          | 
                     return aSign ? 0x80000000 : 0x3F800000;
  | 
      
      
         | 2668 | 
          | 
          | 
                 }
  | 
      
      
         | 2669 | 
          | 
          | 
                 return packFloat32( aSign, 0, 0 );
  | 
      
      
         | 2670 | 
          | 
          | 
             }
  | 
      
      
         | 2671 | 
          | 
          | 
             lastBitMask = 1;
  | 
      
      
         | 2672 | 
          | 
          | 
             lastBitMask <<= 0x96 - aExp;
  | 
      
      
         | 2673 | 
          | 
          | 
             roundBitsMask = lastBitMask - 1;
  | 
      
      
         | 2674 | 
          | 
          | 
             z = a;
  | 
      
      
         | 2675 | 
          | 
          | 
             roundingMode = float_rounding_mode;
  | 
      
      
         | 2676 | 
          | 
          | 
             if ( roundingMode == float_round_nearest_even ) {
  | 
      
      
         | 2677 | 
          | 
          | 
                 z += lastBitMask>>1;
  | 
      
      
         | 2678 | 
          | 
          | 
                 if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
  | 
      
      
         | 2679 | 
          | 
          | 
             }
  | 
      
      
         | 2680 | 
          | 
          | 
             else if ( roundingMode != float_round_to_zero ) {
  | 
      
      
         | 2681 | 
          | 
          | 
                 if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {
  | 
      
      
         | 2682 | 
          | 
          | 
                     z += roundBitsMask;
  | 
      
      
         | 2683 | 
          | 
          | 
                 }
  | 
      
      
         | 2684 | 
          | 
          | 
             }
  | 
      
      
         | 2685 | 
          | 
          | 
             z &= ~ roundBitsMask;
  | 
      
      
         | 2686 | 
          | 
          | 
             if ( z != a ) float_exception_flags |= float_flag_inexact;
  | 
      
      
         | 2687 | 
          | 
          | 
             return z;
  | 
      
      
         | 2688 | 
          | 
          | 
          
  | 
      
      
         | 2689 | 
          | 
          | 
         }
  | 
      
      
         | 2690 | 
          | 
          | 
          
  | 
      
      
         | 2691 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 2692 | 
          | 
          | 
         | Returns the result of adding the absolute values of the single-precision
  | 
      
      
         | 2693 | 
          | 
          | 
         | floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
  | 
      
      
         | 2694 | 
          | 
          | 
         | before being returned.  `zSign' is ignored if the result is a NaN.
  | 
      
      
         | 2695 | 
          | 
          | 
         | The addition is performed according to the IEC/IEEE Standard for Binary
  | 
      
      
         | 2696 | 
          | 
          | 
         | Floating-Point Arithmetic.
  | 
      
      
         | 2697 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 2698 | 
          | 
          | 
          
  | 
      
      
         | 2699 | 
          | 
          | 
         static float32 addFloat32Sigs( float32 a, float32 b, flag zSign )
  | 
      
      
         | 2700 | 
          | 
          | 
         {
  | 
      
      
         | 2701 | 
          | 
          | 
             int16 aExp, bExp, zExp;
  | 
      
      
         | 2702 | 
          | 
          | 
             bits32 aSig, bSig, zSig;
  | 
      
      
         | 2703 | 
          | 
          | 
             int16 expDiff;
  | 
      
      
         | 2704 | 
          | 
          | 
          
  | 
      
      
         | 2705 | 
          | 
          | 
             aSig = extractFloat32Frac( a );
  | 
      
      
         | 2706 | 
          | 
          | 
             aExp = extractFloat32Exp( a );
  | 
      
      
         | 2707 | 
          | 
          | 
             bSig = extractFloat32Frac( b );
  | 
      
      
         | 2708 | 
          | 
          | 
             bExp = extractFloat32Exp( b );
  | 
      
      
         | 2709 | 
          | 
          | 
             expDiff = aExp - bExp;
  | 
      
      
         | 2710 | 
          | 
          | 
             aSig <<= 6;
  | 
      
      
         | 2711 | 
          | 
          | 
             bSig <<= 6;
  | 
      
      
         | 2712 | 
          | 
          | 
             if ( 0 < expDiff ) {
  | 
      
      
         | 2713 | 
          | 
          | 
                 if ( aExp == 0xFF ) {
  | 
      
      
         | 2714 | 
          | 
          | 
                     if ( aSig ) return propagateFloat32NaN( a, b );
  | 
      
      
         | 2715 | 
          | 
          | 
                     return a;
  | 
      
      
         | 2716 | 
          | 
          | 
                 }
  | 
      
      
         | 2717 | 
          | 
          | 
                 if ( bExp == 0 ) {
  | 
      
      
         | 2718 | 
          | 
          | 
                     --expDiff;
  | 
      
      
         | 2719 | 
          | 
          | 
                 }
  | 
      
      
         | 2720 | 
          | 
          | 
                 else {
  | 
      
      
         | 2721 | 
          | 
          | 
                     bSig |= 0x20000000;
  | 
      
      
         | 2722 | 
          | 
          | 
                 }
  | 
      
      
         | 2723 | 
          | 
          | 
                 shift32RightJamming( bSig, expDiff, &bSig );
  | 
      
      
         | 2724 | 
          | 
          | 
                 zExp = aExp;
  | 
      
      
         | 2725 | 
          | 
          | 
             }
  | 
      
      
         | 2726 | 
          | 
          | 
             else if ( expDiff < 0 ) {
  | 
      
      
         | 2727 | 
          | 
          | 
                 if ( bExp == 0xFF ) {
  | 
      
      
         | 2728 | 
          | 
          | 
                     if ( bSig ) return propagateFloat32NaN( a, b );
  | 
      
      
         | 2729 | 
          | 
          | 
                     return packFloat32( zSign, 0xFF, 0 );
  | 
      
      
         | 2730 | 
          | 
          | 
                 }
  | 
      
      
         | 2731 | 
          | 
          | 
                 if ( aExp == 0 ) {
  | 
      
      
         | 2732 | 
          | 
          | 
                     ++expDiff;
  | 
      
      
         | 2733 | 
          | 
          | 
                 }
  | 
      
      
         | 2734 | 
          | 
          | 
                 else {
  | 
      
      
         | 2735 | 
          | 
          | 
                     aSig |= 0x20000000;
  | 
      
      
         | 2736 | 
          | 
          | 
                 }
  | 
      
      
         | 2737 | 
          | 
          | 
                 shift32RightJamming( aSig, - expDiff, &aSig );
  | 
      
      
         | 2738 | 
          | 
          | 
                 zExp = bExp;
  | 
      
      
         | 2739 | 
          | 
          | 
             }
  | 
      
      
         | 2740 | 
          | 
          | 
             else {
  | 
      
      
         | 2741 | 
          | 
          | 
                 if ( aExp == 0xFF ) {
  | 
      
      
         | 2742 | 
          | 
          | 
                     if ( aSig | bSig ) return propagateFloat32NaN( a, b );
  | 
      
      
         | 2743 | 
          | 
          | 
                     return a;
  | 
      
      
         | 2744 | 
          | 
          | 
                 }
  | 
      
      
         | 2745 | 
          | 
          | 
                 if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
  | 
      
      
         | 2746 | 
          | 
          | 
                 zSig = 0x40000000 + aSig + bSig;
  | 
      
      
         | 2747 | 
          | 
          | 
                 zExp = aExp;
  | 
      
      
         | 2748 | 
          | 
          | 
                 goto roundAndPack;
  | 
      
      
         | 2749 | 
          | 
          | 
             }
  | 
      
      
         | 2750 | 
          | 
          | 
             aSig |= 0x20000000;
  | 
      
      
         | 2751 | 
          | 
          | 
             zSig = ( aSig + bSig )<<1;
  | 
      
      
         | 2752 | 
          | 
          | 
             --zExp;
  | 
      
      
         | 2753 | 
          | 
          | 
             if ( (sbits32) zSig < 0 ) {
  | 
      
      
         | 2754 | 
          | 
          | 
                 zSig = aSig + bSig;
  | 
      
      
         | 2755 | 
          | 
          | 
                 ++zExp;
  | 
      
      
         | 2756 | 
          | 
          | 
             }
  | 
      
      
         | 2757 | 
          | 
          | 
          roundAndPack:
  | 
      
      
         | 2758 | 
          | 
          | 
             return roundAndPackFloat32( zSign, zExp, zSig );
  | 
      
      
         | 2759 | 
          | 
          | 
          
  | 
      
      
         | 2760 | 
          | 
          | 
         }
  | 
      
      
         | 2761 | 
          | 
          | 
          
  | 
      
      
         | 2762 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 2763 | 
          | 
          | 
         | Returns the result of subtracting the absolute values of the single-
  | 
      
      
         | 2764 | 
          | 
          | 
         | precision floating-point values `a' and `b'.  If `zSign' is 1, the
  | 
      
      
         | 2765 | 
          | 
          | 
         | difference is negated before being returned.  `zSign' is ignored if the
  | 
      
      
         | 2766 | 
          | 
          | 
         | result is a NaN.  The subtraction is performed according to the IEC/IEEE
  | 
      
      
         | 2767 | 
          | 
          | 
         | Standard for Binary Floating-Point Arithmetic.
  | 
      
      
         | 2768 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 2769 | 
          | 
          | 
          
  | 
      
      
         | 2770 | 
          | 
          | 
         static float32 subFloat32Sigs( float32 a, float32 b, flag zSign )
  | 
      
      
         | 2771 | 
          | 
          | 
         {
  | 
      
      
         | 2772 | 
          | 
          | 
             int16 aExp, bExp, zExp;
  | 
      
      
         | 2773 | 
          | 
          | 
             bits32 aSig, bSig, zSig;
  | 
      
      
         | 2774 | 
          | 
          | 
             int16 expDiff;
  | 
      
      
         | 2775 | 
          | 
          | 
          
  | 
      
      
         | 2776 | 
          | 
          | 
             aSig = extractFloat32Frac( a );
  | 
      
      
         | 2777 | 
          | 
          | 
             aExp = extractFloat32Exp( a );
  | 
      
      
         | 2778 | 
          | 
          | 
             bSig = extractFloat32Frac( b );
  | 
      
      
         | 2779 | 
          | 
          | 
             bExp = extractFloat32Exp( b );
  | 
      
      
         | 2780 | 
          | 
          | 
             expDiff = aExp - bExp;
  | 
      
      
         | 2781 | 
          | 
          | 
             aSig <<= 7;
  | 
      
      
         | 2782 | 
          | 
          | 
             bSig <<= 7;
  | 
      
      
         | 2783 | 
          | 
          | 
             if ( 0 < expDiff ) goto aExpBigger;
  | 
      
      
         | 2784 | 
          | 
          | 
             if ( expDiff < 0 ) goto bExpBigger;
  | 
      
      
         | 2785 | 
          | 
          | 
             if ( aExp == 0xFF ) {
  | 
      
      
         | 2786 | 
          | 
          | 
                 if ( aSig | bSig ) return propagateFloat32NaN( a, b );
  | 
      
      
         | 2787 | 
          | 
          | 
                 float_raise( float_flag_invalid );
  | 
      
      
         | 2788 | 
          | 
          | 
                 return float32_default_nan;
  | 
      
      
         | 2789 | 
          | 
          | 
             }
  | 
      
      
         | 2790 | 
          | 
          | 
             if ( aExp == 0 ) {
  | 
      
      
         | 2791 | 
          | 
          | 
                 aExp = 1;
  | 
      
      
         | 2792 | 
          | 
          | 
                 bExp = 1;
  | 
      
      
         | 2793 | 
          | 
          | 
             }
  | 
      
      
         | 2794 | 
          | 
          | 
             if ( bSig < aSig ) goto aBigger;
  | 
      
      
         | 2795 | 
          | 
          | 
             if ( aSig < bSig ) goto bBigger;
  | 
      
      
         | 2796 | 
          | 
          | 
             return packFloat32( float_rounding_mode == float_round_down, 0, 0 );
  | 
      
      
         | 2797 | 
          | 
          | 
          bExpBigger:
  | 
      
      
         | 2798 | 
          | 
          | 
             if ( bExp == 0xFF ) {
  | 
      
      
         | 2799 | 
          | 
          | 
                 if ( bSig ) return propagateFloat32NaN( a, b );
  | 
      
      
         | 2800 | 
          | 
          | 
                 return packFloat32( zSign ^ 1, 0xFF, 0 );
  | 
      
      
         | 2801 | 
          | 
          | 
             }
  | 
      
      
         | 2802 | 
          | 
          | 
             if ( aExp == 0 ) {
  | 
      
      
         | 2803 | 
          | 
          | 
                 ++expDiff;
  | 
      
      
         | 2804 | 
          | 
          | 
             }
  | 
      
      
         | 2805 | 
          | 
          | 
             else {
  | 
      
      
         | 2806 | 
          | 
          | 
                 aSig |= 0x40000000;
  | 
      
      
         | 2807 | 
          | 
          | 
             }
  | 
      
      
         | 2808 | 
          | 
          | 
             shift32RightJamming( aSig, - expDiff, &aSig );
  | 
      
      
         | 2809 | 
          | 
          | 
             bSig |= 0x40000000;
  | 
      
      
         | 2810 | 
          | 
          | 
          bBigger:
  | 
      
      
         | 2811 | 
          | 
          | 
             zSig = bSig - aSig;
  | 
      
      
         | 2812 | 
          | 
          | 
             zExp = bExp;
  | 
      
      
         | 2813 | 
          | 
          | 
             zSign ^= 1;
  | 
      
      
         | 2814 | 
          | 
          | 
             goto normalizeRoundAndPack;
  | 
      
      
         | 2815 | 
          | 
          | 
          aExpBigger:
  | 
      
      
         | 2816 | 
          | 
          | 
             if ( aExp == 0xFF ) {
  | 
      
      
         | 2817 | 
          | 
          | 
                 if ( aSig ) return propagateFloat32NaN( a, b );
  | 
      
      
         | 2818 | 
          | 
          | 
                 return a;
  | 
      
      
         | 2819 | 
          | 
          | 
             }
  | 
      
      
         | 2820 | 
          | 
          | 
             if ( bExp == 0 ) {
  | 
      
      
         | 2821 | 
          | 
          | 
                 --expDiff;
  | 
      
      
         | 2822 | 
          | 
          | 
             }
  | 
      
      
         | 2823 | 
          | 
          | 
             else {
  | 
      
      
         | 2824 | 
          | 
          | 
                 bSig |= 0x40000000;
  | 
      
      
         | 2825 | 
          | 
          | 
             }
  | 
      
      
         | 2826 | 
          | 
          | 
             shift32RightJamming( bSig, expDiff, &bSig );
  | 
      
      
         | 2827 | 
          | 
          | 
             aSig |= 0x40000000;
  | 
      
      
         | 2828 | 
          | 
          | 
          aBigger:
  | 
      
      
         | 2829 | 
          | 
          | 
             zSig = aSig - bSig;
  | 
      
      
         | 2830 | 
          | 
          | 
             zExp = aExp;
  | 
      
      
         | 2831 | 
          | 
          | 
          normalizeRoundAndPack:
  | 
      
      
         | 2832 | 
          | 
          | 
             --zExp;
  | 
      
      
         | 2833 | 
          | 
          | 
             return normalizeRoundAndPackFloat32( zSign, zExp, zSig );
  | 
      
      
         | 2834 | 
          | 
          | 
          
  | 
      
      
         | 2835 | 
          | 
          | 
         }
  | 
      
      
         | 2836 | 
          | 
          | 
          
  | 
      
      
         | 2837 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 2838 | 
          | 
          | 
         | Returns the result of adding the single-precision floating-point values `a'
  | 
      
      
         | 2839 | 
          | 
          | 
         | and `b'.  The operation is performed according to the IEC/IEEE Standard for
  | 
      
      
         | 2840 | 
          | 
          | 
         | Binary Floating-Point Arithmetic.
  | 
      
      
         | 2841 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 2842 | 
          | 
          | 
          
  | 
      
      
         | 2843 | 
          | 
          | 
         float32 float32_add( float32 a, float32 b )
  | 
      
      
         | 2844 | 
          | 
          | 
         {
  | 
      
      
         | 2845 | 
          | 
          | 
             flag aSign, bSign;
  | 
      
      
         | 2846 | 
          | 
          | 
          
  | 
      
      
         | 2847 | 
          | 
          | 
             aSign = extractFloat32Sign( a );
  | 
      
      
         | 2848 | 
          | 
          | 
             bSign = extractFloat32Sign( b );
  | 
      
      
         | 2849 | 
          | 
          | 
             if ( aSign == bSign ) {
  | 
      
      
         | 2850 | 
          | 
          | 
                 return addFloat32Sigs( a, b, aSign );
  | 
      
      
         | 2851 | 
          | 
          | 
             }
  | 
      
      
         | 2852 | 
          | 
          | 
             else {
  | 
      
      
         | 2853 | 
          | 
          | 
                 return subFloat32Sigs( a, b, aSign );
  | 
      
      
         | 2854 | 
          | 
          | 
             }
  | 
      
      
         | 2855 | 
          | 
          | 
          
  | 
      
      
         | 2856 | 
          | 
          | 
         }
  | 
      
      
         | 2857 | 
          | 
          | 
          
  | 
      
      
         | 2858 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 2859 | 
          | 
          | 
         | Returns the result of subtracting the single-precision floating-point values
  | 
      
      
         | 2860 | 
          | 
          | 
         | `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
  | 
      
      
         | 2861 | 
          | 
          | 
         | for Binary Floating-Point Arithmetic.
  | 
      
      
         | 2862 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 2863 | 
          | 
          | 
          
  | 
      
      
         | 2864 | 
          | 
          | 
         float32 float32_sub( float32 a, float32 b )
  | 
      
      
         | 2865 | 
          | 
          | 
         {
  | 
      
      
         | 2866 | 
          | 
          | 
             flag aSign, bSign;
  | 
      
      
         | 2867 | 
          | 
          | 
          
  | 
      
      
         | 2868 | 
          | 
          | 
             aSign = extractFloat32Sign( a );
  | 
      
      
         | 2869 | 
          | 
          | 
             bSign = extractFloat32Sign( b );
  | 
      
      
         | 2870 | 
          | 
          | 
             if ( aSign == bSign ) {
  | 
      
      
         | 2871 | 
          | 
          | 
                 return subFloat32Sigs( a, b, aSign );
  | 
      
      
         | 2872 | 
          | 
          | 
             }
  | 
      
      
         | 2873 | 
          | 
          | 
             else {
  | 
      
      
         | 2874 | 
          | 
          | 
                 return addFloat32Sigs( a, b, aSign );
  | 
      
      
         | 2875 | 
          | 
          | 
             }
  | 
      
      
         | 2876 | 
          | 
          | 
          
  | 
      
      
         | 2877 | 
          | 
          | 
         }
  | 
      
      
         | 2878 | 
          | 
          | 
          
  | 
      
      
         | 2879 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 2880 | 
          | 
          | 
         | Returns the result of multiplying the single-precision floating-point values
  | 
      
      
         | 2881 | 
          | 
          | 
         | `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
  | 
      
      
         | 2882 | 
          | 
          | 
         | for Binary Floating-Point Arithmetic.
  | 
      
      
         | 2883 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 2884 | 
          | 
          | 
          
  | 
      
      
         | 2885 | 
          | 
          | 
         float32 float32_mul( float32 a, float32 b )
  | 
      
      
         | 2886 | 
          | 
          | 
         {
  | 
      
      
         | 2887 | 
          | 
          | 
             flag aSign, bSign, zSign;
  | 
      
      
         | 2888 | 
          | 
          | 
             int16 aExp, bExp, zExp;
  | 
      
      
         | 2889 | 
          | 
          | 
             bits32 aSig, bSig;
  | 
      
      
         | 2890 | 
          | 
          | 
             bits64 zSig64;
  | 
      
      
         | 2891 | 
          | 
          | 
             bits32 zSig;
  | 
      
      
         | 2892 | 
          | 
          | 
          
  | 
      
      
         | 2893 | 
          | 
          | 
             aSig = extractFloat32Frac( a );
  | 
      
      
         | 2894 | 
          | 
          | 
             aExp = extractFloat32Exp( a );
  | 
      
      
         | 2895 | 
          | 
          | 
             aSign = extractFloat32Sign( a );
  | 
      
      
         | 2896 | 
          | 
          | 
             bSig = extractFloat32Frac( b );
  | 
      
      
         | 2897 | 
          | 
          | 
             bExp = extractFloat32Exp( b );
  | 
      
      
         | 2898 | 
          | 
          | 
             bSign = extractFloat32Sign( b );
  | 
      
      
         | 2899 | 
          | 
          | 
             zSign = aSign ^ bSign;
  | 
      
      
         | 2900 | 
          | 
          | 
             if ( aExp == 0xFF ) {
  | 
      
      
         | 2901 | 
          | 
          | 
                 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
  | 
      
      
         | 2902 | 
          | 
          | 
                     return propagateFloat32NaN( a, b );
  | 
      
      
         | 2903 | 
          | 
          | 
                 }
  | 
      
      
         | 2904 | 
          | 
          | 
                 if ( ( bExp | bSig ) == 0 ) {
  | 
      
      
         | 2905 | 
          | 
          | 
                     float_raise( float_flag_invalid );
  | 
      
      
         | 2906 | 
          | 
          | 
                     return float32_default_nan;
  | 
      
      
         | 2907 | 
          | 
          | 
                 }
  | 
      
      
         | 2908 | 
          | 
          | 
                 return packFloat32( zSign, 0xFF, 0 );
  | 
      
      
         | 2909 | 
          | 
          | 
             }
  | 
      
      
         | 2910 | 
          | 
          | 
             if ( bExp == 0xFF ) {
  | 
      
      
         | 2911 | 
          | 
          | 
                 if ( bSig ) return propagateFloat32NaN( a, b );
  | 
      
      
         | 2912 | 
          | 
          | 
                 if ( ( aExp | aSig ) == 0 ) {
  | 
      
      
         | 2913 | 
          | 
          | 
                     float_raise( float_flag_invalid );
  | 
      
      
         | 2914 | 
          | 
          | 
                     return float32_default_nan;
  | 
      
      
         | 2915 | 
          | 
          | 
                 }
  | 
      
      
         | 2916 | 
          | 
          | 
                 return packFloat32( zSign, 0xFF, 0 );
  | 
      
      
         | 2917 | 
          | 
          | 
             }
  | 
      
      
         | 2918 | 
          | 
          | 
             if ( aExp == 0 ) {
  | 
      
      
         | 2919 | 
          | 
          | 
                 if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
  | 
      
      
         | 2920 | 
          | 
          | 
                 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
  | 
      
      
         | 2921 | 
          | 
          | 
             }
  | 
      
      
         | 2922 | 
          | 
          | 
             if ( bExp == 0 ) {
  | 
      
      
         | 2923 | 
          | 
          | 
                 if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
  | 
      
      
         | 2924 | 
          | 
          | 
                 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
  | 
      
      
         | 2925 | 
          | 
          | 
             }
  | 
      
      
         | 2926 | 
          | 
          | 
             zExp = aExp + bExp - 0x7F;
  | 
      
      
         | 2927 | 
          | 
          | 
             aSig = ( aSig | 0x00800000 )<<7;
  | 
      
      
         | 2928 | 
          | 
          | 
             bSig = ( bSig | 0x00800000 )<<8;
  | 
      
      
         | 2929 | 
          | 
          | 
             shift64RightJamming( ( (bits64) aSig ) * bSig, 32, &zSig64 );
  | 
      
      
         | 2930 | 
          | 
          | 
             zSig = zSig64;
  | 
      
      
         | 2931 | 
          | 
          | 
             if ( 0 <= (sbits32) ( zSig<<1 ) ) {
  | 
      
      
         | 2932 | 
          | 
          | 
                 zSig <<= 1;
  | 
      
      
         | 2933 | 
          | 
          | 
                 --zExp;
  | 
      
      
         | 2934 | 
          | 
          | 
             }
  | 
      
      
         | 2935 | 
          | 
          | 
             return roundAndPackFloat32( zSign, zExp, zSig );
  | 
      
      
         | 2936 | 
          | 
          | 
          
  | 
      
      
         | 2937 | 
          | 
          | 
         }
  | 
      
      
         | 2938 | 
          | 
          | 
          
  | 
      
      
         | 2939 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 2940 | 
          | 
          | 
         | Returns the result of dividing the single-precision floating-point value `a'
  | 
      
      
         | 2941 | 
          | 
          | 
         | by the corresponding value `b'.  The operation is performed according to the
  | 
      
      
         | 2942 | 
          | 
          | 
         | IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  | 
      
      
         | 2943 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 2944 | 
          | 
          | 
          
  | 
      
      
         | 2945 | 
          | 
          | 
         float32 float32_div( float32 a, float32 b )
  | 
      
      
         | 2946 | 
          | 
          | 
         {
  | 
      
      
         | 2947 | 
          | 
          | 
             flag aSign, bSign, zSign;
  | 
      
      
         | 2948 | 
          | 
          | 
             int16 aExp, bExp, zExp;
  | 
      
      
         | 2949 | 
          | 
          | 
             bits32 aSig, bSig, zSig;
  | 
      
      
         | 2950 | 
          | 
          | 
          
  | 
      
      
         | 2951 | 
          | 
          | 
             aSig = extractFloat32Frac( a );
  | 
      
      
         | 2952 | 
          | 
          | 
             aExp = extractFloat32Exp( a );
  | 
      
      
         | 2953 | 
          | 
          | 
             aSign = extractFloat32Sign( a );
  | 
      
      
         | 2954 | 
          | 
          | 
             bSig = extractFloat32Frac( b );
  | 
      
      
         | 2955 | 
          | 
          | 
             bExp = extractFloat32Exp( b );
  | 
      
      
         | 2956 | 
          | 
          | 
             bSign = extractFloat32Sign( b );
  | 
      
      
         | 2957 | 
          | 
          | 
             zSign = aSign ^ bSign;
  | 
      
      
         | 2958 | 
          | 
          | 
             if ( aExp == 0xFF ) {
  | 
      
      
         | 2959 | 
          | 
          | 
                 if ( aSig ) return propagateFloat32NaN( a, b );
  | 
      
      
         | 2960 | 
          | 
          | 
                 if ( bExp == 0xFF ) {
  | 
      
      
         | 2961 | 
          | 
          | 
                     if ( bSig ) return propagateFloat32NaN( a, b );
  | 
      
      
         | 2962 | 
          | 
          | 
                     float_raise( float_flag_invalid );
  | 
      
      
         | 2963 | 
          | 
          | 
                     return float32_default_nan;
  | 
      
      
         | 2964 | 
          | 
          | 
                 }
  | 
      
      
         | 2965 | 
          | 
          | 
                 return packFloat32( zSign, 0xFF, 0 );
  | 
      
      
         | 2966 | 
          | 
          | 
             }
  | 
      
      
         | 2967 | 
          | 
          | 
             if ( bExp == 0xFF ) {
  | 
      
      
         | 2968 | 
          | 
          | 
                 if ( bSig ) return propagateFloat32NaN( a, b );
  | 
      
      
         | 2969 | 
          | 
          | 
                 return packFloat32( zSign, 0, 0 );
  | 
      
      
         | 2970 | 
          | 
          | 
             }
  | 
      
      
         | 2971 | 
          | 
          | 
             if ( bExp == 0 ) {
  | 
      
      
         | 2972 | 
          | 
          | 
                 if ( bSig == 0 ) {
  | 
      
      
         | 2973 | 
          | 
          | 
                     if ( ( aExp | aSig ) == 0 ) {
  | 
      
      
         | 2974 | 
          | 
          | 
                         float_raise( float_flag_invalid );
  | 
      
      
         | 2975 | 
          | 
          | 
                         return float32_default_nan;
  | 
      
      
         | 2976 | 
          | 
          | 
                     }
  | 
      
      
         | 2977 | 
          | 
          | 
                     float_raise( float_flag_divbyzero );
  | 
      
      
         | 2978 | 
          | 
          | 
                     return packFloat32( zSign, 0xFF, 0 );
  | 
      
      
         | 2979 | 
          | 
          | 
                 }
  | 
      
      
         | 2980 | 
          | 
          | 
                 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
  | 
      
      
         | 2981 | 
          | 
          | 
             }
  | 
      
      
         | 2982 | 
          | 
          | 
             if ( aExp == 0 ) {
  | 
      
      
         | 2983 | 
          | 
          | 
                 if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
  | 
      
      
         | 2984 | 
          | 
          | 
                 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
  | 
      
      
         | 2985 | 
          | 
          | 
             }
  | 
      
      
         | 2986 | 
          | 
          | 
             zExp = aExp - bExp + 0x7D;
  | 
      
      
         | 2987 | 
          | 
          | 
             aSig = ( aSig | 0x00800000 )<<7;
  | 
      
      
         | 2988 | 
          | 
          | 
             bSig = ( bSig | 0x00800000 )<<8;
  | 
      
      
         | 2989 | 
          | 
          | 
             if ( bSig <= ( aSig + aSig ) ) {
  | 
      
      
         | 2990 | 
          | 
          | 
                 aSig >>= 1;
  | 
      
      
         | 2991 | 
          | 
          | 
                 ++zExp;
  | 
      
      
         | 2992 | 
          | 
          | 
             }
  | 
      
      
         | 2993 | 
          | 
          | 
             zSig = ( ( (bits64) aSig )<<32 ) / bSig;
  | 
      
      
         | 2994 | 
          | 
          | 
             if ( ( zSig & 0x3F ) == 0 ) {
  | 
      
      
         | 2995 | 
          | 
          | 
                 zSig |= ( (bits64) bSig * zSig != ( (bits64) aSig )<<32 );
  | 
      
      
         | 2996 | 
          | 
          | 
             }
  | 
      
      
         | 2997 | 
          | 
          | 
             return roundAndPackFloat32( zSign, zExp, zSig );
  | 
      
      
         | 2998 | 
          | 
          | 
          
  | 
      
      
         | 2999 | 
          | 
          | 
         }
  | 
      
      
         | 3000 | 
          | 
          | 
          
  | 
      
      
         | 3001 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 3002 | 
          | 
          | 
         | Returns the remainder of the single-precision floating-point value `a'
  | 
      
      
         | 3003 | 
          | 
          | 
         | with respect to the corresponding value `b'.  The operation is performed
  | 
      
      
         | 3004 | 
          | 
          | 
         | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  | 
      
      
         | 3005 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 3006 | 
          | 
          | 
          
  | 
      
      
         | 3007 | 
          | 
          | 
         float32 float32_rem( float32 a, float32 b )
  | 
      
      
         | 3008 | 
          | 
          | 
         {
  | 
      
      
         | 3009 | 
          | 
          | 
             flag aSign, bSign, zSign;
  | 
      
      
         | 3010 | 
          | 
          | 
             int16 aExp, bExp, expDiff;
  | 
      
      
         | 3011 | 
          | 
          | 
             bits32 aSig, bSig;
  | 
      
      
         | 3012 | 
          | 
          | 
             bits32 q;
  | 
      
      
         | 3013 | 
          | 
          | 
             bits64 aSig64, bSig64, q64;
  | 
      
      
         | 3014 | 
          | 
          | 
             bits32 alternateASig;
  | 
      
      
         | 3015 | 
          | 
          | 
             sbits32 sigMean;
  | 
      
      
         | 3016 | 
          | 
          | 
          
  | 
      
      
         | 3017 | 
          | 
          | 
             aSig = extractFloat32Frac( a );
  | 
      
      
         | 3018 | 
          | 
          | 
             aExp = extractFloat32Exp( a );
  | 
      
      
         | 3019 | 
          | 
          | 
             aSign = extractFloat32Sign( a );
  | 
      
      
         | 3020 | 
          | 
          | 
             bSig = extractFloat32Frac( b );
  | 
      
      
         | 3021 | 
          | 
          | 
             bExp = extractFloat32Exp( b );
  | 
      
      
         | 3022 | 
          | 
          | 
             bSign = extractFloat32Sign( b );
  | 
      
      
         | 3023 | 
          | 
          | 
             if ( aExp == 0xFF ) {
  | 
      
      
         | 3024 | 
          | 
          | 
                 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
  | 
      
      
         | 3025 | 
          | 
          | 
                     return propagateFloat32NaN( a, b );
  | 
      
      
         | 3026 | 
          | 
          | 
                 }
  | 
      
      
         | 3027 | 
          | 
          | 
                 float_raise( float_flag_invalid );
  | 
      
      
         | 3028 | 
          | 
          | 
                 return float32_default_nan;
  | 
      
      
         | 3029 | 
          | 
          | 
             }
  | 
      
      
         | 3030 | 
          | 
          | 
             if ( bExp == 0xFF ) {
  | 
      
      
         | 3031 | 
          | 
          | 
                 if ( bSig ) return propagateFloat32NaN( a, b );
  | 
      
      
         | 3032 | 
          | 
          | 
                 return a;
  | 
      
      
         | 3033 | 
          | 
          | 
             }
  | 
      
      
         | 3034 | 
          | 
          | 
             if ( bExp == 0 ) {
  | 
      
      
         | 3035 | 
          | 
          | 
                 if ( bSig == 0 ) {
  | 
      
      
         | 3036 | 
          | 
          | 
                     float_raise( float_flag_invalid );
  | 
      
      
         | 3037 | 
          | 
          | 
                     return float32_default_nan;
  | 
      
      
         | 3038 | 
          | 
          | 
                 }
  | 
      
      
         | 3039 | 
          | 
          | 
                 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
  | 
      
      
         | 3040 | 
          | 
          | 
             }
  | 
      
      
         | 3041 | 
          | 
          | 
             if ( aExp == 0 ) {
  | 
      
      
         | 3042 | 
          | 
          | 
                 if ( aSig == 0 ) return a;
  | 
      
      
         | 3043 | 
          | 
          | 
                 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
  | 
      
      
         | 3044 | 
          | 
          | 
             }
  | 
      
      
         | 3045 | 
          | 
          | 
             expDiff = aExp - bExp;
  | 
      
      
         | 3046 | 
          | 
          | 
             aSig |= 0x00800000;
  | 
      
      
         | 3047 | 
          | 
          | 
             bSig |= 0x00800000;
  | 
      
      
         | 3048 | 
          | 
          | 
             if ( expDiff < 32 ) {
  | 
      
      
         | 3049 | 
          | 
          | 
                 aSig <<= 8;
  | 
      
      
         | 3050 | 
          | 
          | 
                 bSig <<= 8;
  | 
      
      
         | 3051 | 
          | 
          | 
                 if ( expDiff < 0 ) {
  | 
      
      
         | 3052 | 
          | 
          | 
                     if ( expDiff < -1 ) return a;
  | 
      
      
         | 3053 | 
          | 
          | 
                     aSig >>= 1;
  | 
      
      
         | 3054 | 
          | 
          | 
                 }
  | 
      
      
         | 3055 | 
          | 
          | 
                 q = ( bSig <= aSig );
  | 
      
      
         | 3056 | 
          | 
          | 
                 if ( q ) aSig -= bSig;
  | 
      
      
         | 3057 | 
          | 
          | 
                 if ( 0 < expDiff ) {
  | 
      
      
         | 3058 | 
          | 
          | 
                     q = ( ( (bits64) aSig )<<32 ) / bSig;
  | 
      
      
         | 3059 | 
          | 
          | 
                     q >>= 32 - expDiff;
  | 
      
      
         | 3060 | 
          | 
          | 
                     bSig >>= 2;
  | 
      
      
         | 3061 | 
          | 
          | 
                     aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
  | 
      
      
         | 3062 | 
          | 
          | 
                 }
  | 
      
      
         | 3063 | 
          | 
          | 
                 else {
  | 
      
      
         | 3064 | 
          | 
          | 
                     aSig >>= 2;
  | 
      
      
         | 3065 | 
          | 
          | 
                     bSig >>= 2;
  | 
      
      
         | 3066 | 
          | 
          | 
                 }
  | 
      
      
         | 3067 | 
          | 
          | 
             }
  | 
      
      
         | 3068 | 
          | 
          | 
             else {
  | 
      
      
         | 3069 | 
          | 
          | 
                 if ( bSig <= aSig ) aSig -= bSig;
  | 
      
      
         | 3070 | 
          | 
          | 
                 aSig64 = ( (bits64) aSig )<<40;
  | 
      
      
         | 3071 | 
          | 
          | 
                 bSig64 = ( (bits64) bSig )<<40;
  | 
      
      
         | 3072 | 
          | 
          | 
                 expDiff -= 64;
  | 
      
      
         | 3073 | 
          | 
          | 
                 while ( 0 < expDiff ) {
  | 
      
      
         | 3074 | 
          | 
          | 
                     q64 = estimateDiv128To64( aSig64, 0, bSig64 );
  | 
      
      
         | 3075 | 
          | 
          | 
                     q64 = ( 2 < q64 ) ? q64 - 2 : 0;
  | 
      
      
         | 3076 | 
          | 
          | 
                     aSig64 = - ( ( bSig * q64 )<<38 );
  | 
      
      
         | 3077 | 
          | 
          | 
                     expDiff -= 62;
  | 
      
      
         | 3078 | 
          | 
          | 
                 }
  | 
      
      
         | 3079 | 
          | 
          | 
                 expDiff += 64;
  | 
      
      
         | 3080 | 
          | 
          | 
                 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
  | 
      
      
         | 3081 | 
          | 
          | 
                 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
  | 
      
      
         | 3082 | 
          | 
          | 
                 q = q64>>( 64 - expDiff );
  | 
      
      
         | 3083 | 
          | 
          | 
                 bSig <<= 6;
  | 
      
      
         | 3084 | 
          | 
          | 
                 aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
  | 
      
      
         | 3085 | 
          | 
          | 
             }
  | 
      
      
         | 3086 | 
          | 
          | 
             do {
  | 
      
      
         | 3087 | 
          | 
          | 
                 alternateASig = aSig;
  | 
      
      
         | 3088 | 
          | 
          | 
                 ++q;
  | 
      
      
         | 3089 | 
          | 
          | 
                 aSig -= bSig;
  | 
      
      
         | 3090 | 
          | 
          | 
             } while ( 0 <= (sbits32) aSig );
  | 
      
      
         | 3091 | 
          | 
          | 
             sigMean = aSig + alternateASig;
  | 
      
      
         | 3092 | 
          | 
          | 
             if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
  | 
      
      
         | 3093 | 
          | 
          | 
                 aSig = alternateASig;
  | 
      
      
         | 3094 | 
          | 
          | 
             }
  | 
      
      
         | 3095 | 
          | 
          | 
             zSign = ( (sbits32) aSig < 0 );
  | 
      
      
         | 3096 | 
          | 
          | 
             if ( zSign ) aSig = - aSig;
  | 
      
      
         | 3097 | 
          | 
          | 
             return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig );
  | 
      
      
         | 3098 | 
          | 
          | 
          
  | 
      
      
         | 3099 | 
          | 
          | 
         }
  | 
      
      
         | 3100 | 
          | 
          | 
          
  | 
      
      
         | 3101 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 3102 | 
          | 
          | 
         | Returns the square root of the single-precision floating-point value `a'.
  | 
      
      
         | 3103 | 
          | 
          | 
         | The operation is performed according to the IEC/IEEE Standard for Binary
  | 
      
      
         | 3104 | 
          | 
          | 
         | Floating-Point Arithmetic.
  | 
      
      
         | 3105 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 3106 | 
          | 
          | 
          
  | 
      
      
         | 3107 | 
          | 
          | 
         float32 float32_sqrt( float32 a )
  | 
      
      
         | 3108 | 
          | 
          | 
         {
  | 
      
      
         | 3109 | 
          | 
          | 
             flag aSign;
  | 
      
      
         | 3110 | 
          | 
          | 
             int16 aExp, zExp;
  | 
      
      
         | 3111 | 
          | 
          | 
             bits32 aSig, zSig;
  | 
      
      
         | 3112 | 
          | 
          | 
             bits64 rem, term;
  | 
      
      
         | 3113 | 
          | 
          | 
          
  | 
      
      
         | 3114 | 
          | 
          | 
             aSig = extractFloat32Frac( a );
  | 
      
      
         | 3115 | 
          | 
          | 
             aExp = extractFloat32Exp( a );
  | 
      
      
         | 3116 | 
          | 
          | 
             aSign = extractFloat32Sign( a );
  | 
      
      
         | 3117 | 
          | 
          | 
             if ( aExp == 0xFF ) {
  | 
      
      
         | 3118 | 
          | 
          | 
                 if ( aSig ) return propagateFloat32NaN( a, 0 );
  | 
      
      
         | 3119 | 
          | 
          | 
                 if ( ! aSign ) return a;
  | 
      
      
         | 3120 | 
          | 
          | 
                 float_raise( float_flag_invalid );
  | 
      
      
         | 3121 | 
          | 
          | 
                 return float32_default_nan;
  | 
      
      
         | 3122 | 
          | 
          | 
             }
  | 
      
      
         | 3123 | 
          | 
          | 
             if ( aSign ) {
  | 
      
      
         | 3124 | 
          | 
          | 
                 if ( ( aExp | aSig ) == 0 ) return a;
  | 
      
      
         | 3125 | 
          | 
          | 
                 float_raise( float_flag_invalid );
  | 
      
      
         | 3126 | 
          | 
          | 
                 return float32_default_nan;
  | 
      
      
         | 3127 | 
          | 
          | 
             }
  | 
      
      
         | 3128 | 
          | 
          | 
             if ( aExp == 0 ) {
  | 
      
      
         | 3129 | 
          | 
          | 
                 if ( aSig == 0 ) return 0;
  | 
      
      
         | 3130 | 
          | 
          | 
                 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
  | 
      
      
         | 3131 | 
          | 
          | 
             }
  | 
      
      
         | 3132 | 
          | 
          | 
             zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
  | 
      
      
         | 3133 | 
          | 
          | 
             aSig = ( aSig | 0x00800000 )<<8;
  | 
      
      
         | 3134 | 
          | 
          | 
             zSig = estimateSqrt32( aExp, aSig ) + 2;
  | 
      
      
         | 3135 | 
          | 
          | 
             if ( ( zSig & 0x7F ) <= 5 ) {
  | 
      
      
         | 3136 | 
          | 
          | 
                 if ( zSig < 2 ) {
  | 
      
      
         | 3137 | 
          | 
          | 
                     zSig = 0x7FFFFFFF;
  | 
      
      
         | 3138 | 
          | 
          | 
                     goto roundAndPack;
  | 
      
      
         | 3139 | 
          | 
          | 
                 }
  | 
      
      
         | 3140 | 
          | 
          | 
                 aSig >>= aExp & 1;
  | 
      
      
         | 3141 | 
          | 
          | 
                 term = ( (bits64) zSig ) * zSig;
  | 
      
      
         | 3142 | 
          | 
          | 
                 rem = ( ( (bits64) aSig )<<32 ) - term;
  | 
      
      
         | 3143 | 
          | 
          | 
                 while ( (sbits64) rem < 0 ) {
  | 
      
      
         | 3144 | 
          | 
          | 
                     --zSig;
  | 
      
      
         | 3145 | 
          | 
          | 
                     rem += ( ( (bits64) zSig )<<1 ) | 1;
  | 
      
      
         | 3146 | 
          | 
          | 
                 }
  | 
      
      
         | 3147 | 
          | 
          | 
                 zSig |= ( rem != 0 );
  | 
      
      
         | 3148 | 
          | 
          | 
             }
  | 
      
      
         | 3149 | 
          | 
          | 
             shift32RightJamming( zSig, 1, &zSig );
  | 
      
      
         | 3150 | 
          | 
          | 
          roundAndPack:
  | 
      
      
         | 3151 | 
          | 
          | 
             return roundAndPackFloat32( 0, zExp, zSig );
  | 
      
      
         | 3152 | 
          | 
          | 
          
  | 
      
      
         | 3153 | 
          | 
          | 
         }
  | 
      
      
         | 3154 | 
          | 
          | 
          
  | 
      
      
         | 3155 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 3156 | 
          | 
          | 
         | Returns 1 if the single-precision floating-point value `a' is equal to
  | 
      
      
         | 3157 | 
          | 
          | 
         | the corresponding value `b', and 0 otherwise.  The comparison is performed
  | 
      
      
         | 3158 | 
          | 
          | 
         | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  | 
      
      
         | 3159 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 3160 | 
          | 
          | 
          
  | 
      
      
         | 3161 | 
          | 
          | 
         flag float32_eq( float32 a, float32 b )
  | 
      
      
         | 3162 | 
          | 
          | 
         {
  | 
      
      
         | 3163 | 
          | 
          | 
          
  | 
      
      
         | 3164 | 
          | 
          | 
             if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
  | 
      
      
         | 3165 | 
          | 
          | 
                  || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
  | 
      
      
         | 3166 | 
          | 
          | 
                ) {
  | 
      
      
         | 3167 | 
          | 
          | 
                 if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
  | 
      
      
         | 3168 | 
          | 
          | 
                     float_raise( float_flag_invalid );
  | 
      
      
         | 3169 | 
          | 
          | 
                 }
  | 
      
      
         | 3170 | 
          | 
          | 
                 return 0;
  | 
      
      
         | 3171 | 
          | 
          | 
             }
  | 
      
      
         | 3172 | 
          | 
          | 
             return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
  | 
      
      
         | 3173 | 
          | 
          | 
          
  | 
      
      
         | 3174 | 
          | 
          | 
         }
  | 
      
      
         | 3175 | 
          | 
          | 
          
  | 
      
      
         | 3176 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 3177 | 
          | 
          | 
         | Returns 1 if the single-precision floating-point value `a' is less than
  | 
      
      
         | 3178 | 
          | 
          | 
         | or equal to the corresponding value `b', and 0 otherwise.  The comparison
  | 
      
      
         | 3179 | 
          | 
          | 
         | is performed according to the IEC/IEEE Standard for Binary Floating-Point
  | 
      
      
         | 3180 | 
          | 
          | 
         | Arithmetic.
  | 
      
      
         | 3181 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 3182 | 
          | 
          | 
          
  | 
      
      
         | 3183 | 
          | 
          | 
         flag float32_le( float32 a, float32 b )
  | 
      
      
         | 3184 | 
          | 
          | 
         {
  | 
      
      
         | 3185 | 
          | 
          | 
             flag aSign, bSign;
  | 
      
      
         | 3186 | 
          | 
          | 
          
  | 
      
      
         | 3187 | 
          | 
          | 
             if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
  | 
      
      
         | 3188 | 
          | 
          | 
                  || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
  | 
      
      
         | 3189 | 
          | 
          | 
                ) {
  | 
      
      
         | 3190 | 
          | 
          | 
                 float_raise( float_flag_invalid );
  | 
      
      
         | 3191 | 
          | 
          | 
                 return 0;
  | 
      
      
         | 3192 | 
          | 
          | 
             }
  | 
      
      
         | 3193 | 
          | 
          | 
             aSign = extractFloat32Sign( a );
  | 
      
      
         | 3194 | 
          | 
          | 
             bSign = extractFloat32Sign( b );
  | 
      
      
         | 3195 | 
          | 
          | 
             if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
  | 
      
      
         | 3196 | 
          | 
          | 
             return ( a == b ) || ( aSign ^ ( a < b ) );
  | 
      
      
         | 3197 | 
          | 
          | 
          
  | 
      
      
         | 3198 | 
          | 
          | 
         }
  | 
      
      
         | 3199 | 
          | 
          | 
          
  | 
      
      
         | 3200 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 3201 | 
          | 
          | 
         | Returns 1 if the single-precision floating-point value `a' is less than
  | 
      
      
         | 3202 | 
          | 
          | 
         | the corresponding value `b', and 0 otherwise.  The comparison is performed
  | 
      
      
         | 3203 | 
          | 
          | 
         | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  | 
      
      
         | 3204 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 3205 | 
          | 
          | 
          
  | 
      
      
         | 3206 | 
          | 
          | 
         flag float32_lt( float32 a, float32 b )
  | 
      
      
         | 3207 | 
          | 
          | 
         {
  | 
      
      
         | 3208 | 
          | 
          | 
             flag aSign, bSign;
  | 
      
      
         | 3209 | 
          | 
          | 
          
  | 
      
      
         | 3210 | 
          | 
          | 
             if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
  | 
      
      
         | 3211 | 
          | 
          | 
                  || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
  | 
      
      
         | 3212 | 
          | 
          | 
                ) {
  | 
      
      
         | 3213 | 
          | 
          | 
                 float_raise( float_flag_invalid );
  | 
      
      
         | 3214 | 
          | 
          | 
                 return 0;
  | 
      
      
         | 3215 | 
          | 
          | 
             }
  | 
      
      
         | 3216 | 
          | 
          | 
             aSign = extractFloat32Sign( a );
  | 
      
      
         | 3217 | 
          | 
          | 
             bSign = extractFloat32Sign( b );
  | 
      
      
         | 3218 | 
          | 
          | 
             if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
  | 
      
      
         | 3219 | 
          | 
          | 
             return ( a != b ) && ( aSign ^ ( a < b ) );
  | 
      
      
         | 3220 | 
          | 
          | 
          
  | 
      
      
         | 3221 | 
          | 
          | 
         }
  | 
      
      
         | 3222 | 
          | 
          | 
          
  | 
      
      
         | 3223 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 3224 | 
          | 
          | 
         | Returns 1 if the single-precision floating-point value `a' is equal to
  | 
      
      
         | 3225 | 
          | 
          | 
         | the corresponding value `b', and 0 otherwise.  The invalid exception is
  | 
      
      
         | 3226 | 
          | 
          | 
         | raised if either operand is a NaN.  Otherwise, the comparison is performed
  | 
      
      
         | 3227 | 
          | 
          | 
         | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  | 
      
      
         | 3228 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 3229 | 
          | 
          | 
          
  | 
      
      
         | 3230 | 
          | 
          | 
         flag float32_eq_signaling( float32 a, float32 b )
  | 
      
      
         | 3231 | 
          | 
          | 
         {
  | 
      
      
         | 3232 | 
          | 
          | 
          
  | 
      
      
         | 3233 | 
          | 
          | 
             if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
  | 
      
      
         | 3234 | 
          | 
          | 
                  || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
  | 
      
      
         | 3235 | 
          | 
          | 
                ) {
  | 
      
      
         | 3236 | 
          | 
          | 
                 float_raise( float_flag_invalid );
  | 
      
      
         | 3237 | 
          | 
          | 
                 return 0;
  | 
      
      
         | 3238 | 
          | 
          | 
             }
  | 
      
      
         | 3239 | 
          | 
          | 
             return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
  | 
      
      
         | 3240 | 
          | 
          | 
          
  | 
      
      
         | 3241 | 
          | 
          | 
         }
  | 
      
      
         | 3242 | 
          | 
          | 
          
  | 
      
      
         | 3243 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 3244 | 
          | 
          | 
         | Returns 1 if the single-precision floating-point value `a' is less than or
  | 
      
      
         | 3245 | 
          | 
          | 
         | equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
  | 
      
      
         | 3246 | 
          | 
          | 
         | cause an exception.  Otherwise, the comparison is performed according to the
  | 
      
      
         | 3247 | 
          | 
          | 
         | IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  | 
      
      
         | 3248 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 3249 | 
          | 
          | 
          
  | 
      
      
         | 3250 | 
          | 
          | 
         flag float32_le_quiet( float32 a, float32 b )
  | 
      
      
         | 3251 | 
          | 
          | 
         {
  | 
      
      
         | 3252 | 
          | 
          | 
             flag aSign, bSign;
  | 
      
      
         | 3253 | 
          | 
          | 
             //int16 aExp, bExp; // Unused variable
  | 
      
      
         | 3254 | 
          | 
          | 
          
  | 
      
      
         | 3255 | 
          | 
          | 
             if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
  | 
      
      
         | 3256 | 
          | 
          | 
                  || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
  | 
      
      
         | 3257 | 
          | 
          | 
                ) {
  | 
      
      
         | 3258 | 
          | 
          | 
                 if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
  | 
      
      
         | 3259 | 
          | 
          | 
                     float_raise( float_flag_invalid );
  | 
      
      
         | 3260 | 
          | 
          | 
                 }
  | 
      
      
         | 3261 | 
          | 
          | 
                 return 0;
  | 
      
      
         | 3262 | 
          | 
          | 
             }
  | 
      
      
         | 3263 | 
          | 
          | 
             aSign = extractFloat32Sign( a );
  | 
      
      
         | 3264 | 
          | 
          | 
             bSign = extractFloat32Sign( b );
  | 
      
      
         | 3265 | 
          | 
          | 
             if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
  | 
      
      
         | 3266 | 
          | 
          | 
             return ( a == b ) || ( aSign ^ ( a < b ) );
  | 
      
      
         | 3267 | 
          | 
          | 
          
  | 
      
      
         | 3268 | 
          | 
          | 
         }
  | 
      
      
         | 3269 | 
          | 
          | 
          
  | 
      
      
         | 3270 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 3271 | 
          | 
          | 
         | Returns 1 if the single-precision floating-point value `a' is less than
  | 
      
      
         | 3272 | 
          | 
          | 
         | the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
  | 
      
      
         | 3273 | 
          | 
          | 
         | exception.  Otherwise, the comparison is performed according to the IEC/IEEE
  | 
      
      
         | 3274 | 
          | 
          | 
         | Standard for Binary Floating-Point Arithmetic.
  | 
      
      
         | 3275 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 3276 | 
          | 
          | 
          
  | 
      
      
         | 3277 | 
          | 
          | 
         flag float32_lt_quiet( float32 a, float32 b )
  | 
      
      
         | 3278 | 
          | 
          | 
         {
  | 
      
      
         | 3279 | 
          | 
          | 
             flag aSign, bSign;
  | 
      
      
         | 3280 | 
          | 
          | 
          
  | 
      
      
         | 3281 | 
          | 
          | 
             if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
  | 
      
      
         | 3282 | 
          | 
          | 
                  || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
  | 
      
      
         | 3283 | 
          | 
          | 
                ) {
  | 
      
      
         | 3284 | 
          | 
          | 
                 if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
  | 
      
      
         | 3285 | 
          | 
          | 
                     float_raise( float_flag_invalid );
  | 
      
      
         | 3286 | 
          | 
          | 
                 }
  | 
      
      
         | 3287 | 
          | 
          | 
                 return 0;
  | 
      
      
         | 3288 | 
          | 
          | 
             }
  | 
      
      
         | 3289 | 
          | 
          | 
             aSign = extractFloat32Sign( a );
  | 
      
      
         | 3290 | 
          | 
          | 
             bSign = extractFloat32Sign( b );
  | 
      
      
         | 3291 | 
          | 
          | 
             if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
  | 
      
      
         | 3292 | 
          | 
          | 
             return ( a != b ) && ( aSign ^ ( a < b ) );
  | 
      
      
         | 3293 | 
          | 
          | 
          
  | 
      
      
         | 3294 | 
          | 
          | 
         }
  | 
      
      
         | 3295 | 
          | 
          | 
          
  | 
      
      
         | 3296 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 3297 | 
          | 
          | 
         | Returns the result of converting the double-precision floating-point value
  | 
      
      
         | 3298 | 
          | 
          | 
         | `a' to the 32-bit two's complement integer format.  The conversion is
  | 
      
      
         | 3299 | 
          | 
          | 
         | performed according to the IEC/IEEE Standard for Binary Floating-Point
  | 
      
      
         | 3300 | 
          | 
          | 
         | Arithmetic---which means in particular that the conversion is rounded
  | 
      
      
         | 3301 | 
          | 
          | 
         | according to the current rounding mode.  If `a' is a NaN, the largest
  | 
      
      
         | 3302 | 
          | 
          | 
         | positive integer is returned.  Otherwise, if the conversion overflows, the
  | 
      
      
         | 3303 | 
          | 
          | 
         | largest integer with the same sign as `a' is returned.
  | 
      
      
         | 3304 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 3305 | 
          | 
          | 
          
  | 
      
      
         | 3306 | 
          | 
          | 
         int32 float64_to_int32( float64 a )
  | 
      
      
         | 3307 | 
          | 
          | 
         {
  | 
      
      
         | 3308 | 
          | 
          | 
             flag aSign;
  | 
      
      
         | 3309 | 
          | 
          | 
             int16 aExp, shiftCount;
  | 
      
      
         | 3310 | 
          | 
          | 
             bits64 aSig;
  | 
      
      
         | 3311 | 
          | 
          | 
          
  | 
      
      
         | 3312 | 
          | 
          | 
             aSig = extractFloat64Frac( a );
  | 
      
      
         | 3313 | 
          | 
          | 
             aExp = extractFloat64Exp( a );
  | 
      
      
         | 3314 | 
          | 
          | 
             aSign = extractFloat64Sign( a );
  | 
      
      
         | 3315 | 
          | 
          | 
             if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
  | 
      
      
         | 3316 | 
          | 
          | 
             if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
  | 
      
      
         | 3317 | 
          | 
          | 
             shiftCount = 0x42C - aExp;
  | 
      
      
         | 3318 | 
          | 
          | 
             if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
  | 
      
      
         | 3319 | 
          | 
          | 
             return roundAndPackInt32( aSign, aSig );
  | 
      
      
         | 3320 | 
          | 
          | 
          
  | 
      
      
         | 3321 | 
          | 
          | 
         }
  | 
      
      
         | 3322 | 
          | 
          | 
          
  | 
      
      
         | 3323 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 3324 | 
          | 
          | 
         | Returns the result of converting the double-precision floating-point value
  | 
      
      
         | 3325 | 
          | 
          | 
         | `a' to the 32-bit two's complement integer format.  The conversion is
  | 
      
      
         | 3326 | 
          | 
          | 
         | performed according to the IEC/IEEE Standard for Binary Floating-Point
  | 
      
      
         | 3327 | 
          | 
          | 
         | Arithmetic, except that the conversion is always rounded toward zero.
  | 
      
      
         | 3328 | 
          | 
          | 
         | If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
  | 
      
      
         | 3329 | 
          | 
          | 
         | the conversion overflows, the largest integer with the same sign as `a' is
  | 
      
      
         | 3330 | 
          | 
          | 
         | returned.
  | 
      
      
         | 3331 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 3332 | 
          | 
          | 
          
  | 
      
      
         | 3333 | 
          | 
          | 
         int32 float64_to_int32_round_to_zero( float64 a )
  | 
      
      
         | 3334 | 
          | 
          | 
         {
  | 
      
      
         | 3335 | 
          | 
          | 
             flag aSign;
  | 
      
      
         | 3336 | 
          | 
          | 
             int16 aExp, shiftCount;
  | 
      
      
         | 3337 | 
          | 
          | 
             bits64 aSig, savedASig;
  | 
      
      
         | 3338 | 
          | 
          | 
             int32 z;
  | 
      
      
         | 3339 | 
          | 
          | 
          
  | 
      
      
         | 3340 | 
          | 
          | 
             aSig = extractFloat64Frac( a );
  | 
      
      
         | 3341 | 
          | 
          | 
             aExp = extractFloat64Exp( a );
  | 
      
      
         | 3342 | 
          | 
          | 
             aSign = extractFloat64Sign( a );
  | 
      
      
         | 3343 | 
          | 
          | 
             if ( 0x41E < aExp ) {
  | 
      
      
         | 3344 | 
          | 
          | 
                 if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
  | 
      
      
         | 3345 | 
          | 
          | 
                 goto invalid;
  | 
      
      
         | 3346 | 
          | 
          | 
             }
  | 
      
      
         | 3347 | 
          | 
          | 
             else if ( aExp < 0x3FF ) {
  | 
      
      
         | 3348 | 
          | 
          | 
                 if ( aExp || aSig ) float_exception_flags |= float_flag_inexact;
  | 
      
      
         | 3349 | 
          | 
          | 
                 return 0;
  | 
      
      
         | 3350 | 
          | 
          | 
             }
  | 
      
      
         | 3351 | 
          | 
          | 
             aSig |= LIT64( 0x0010000000000000 );
  | 
      
      
         | 3352 | 
          | 
          | 
             shiftCount = 0x433 - aExp;
  | 
      
      
         | 3353 | 
          | 
          | 
             savedASig = aSig;
  | 
      
      
         | 3354 | 
          | 
          | 
             aSig >>= shiftCount;
  | 
      
      
         | 3355 | 
          | 
          | 
             z = aSig;
  | 
      
      
         | 3356 | 
          | 
          | 
             if ( aSign ) z = - z;
  | 
      
      
         | 3357 | 
          | 
          | 
             if ( ( z < 0 ) ^ aSign ) {
  | 
      
      
         | 3358 | 
          | 
          | 
          invalid:
  | 
      
      
         | 3359 | 
          | 
          | 
                 float_raise( float_flag_invalid );
  | 
      
      
         | 3360 | 
          | 
          | 
                 return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
  | 
      
      
         | 3361 | 
          | 
          | 
             }
  | 
      
      
         | 3362 | 
          | 
          | 
             if ( ( aSig<<shiftCount ) != savedASig ) {
  | 
      
      
         | 3363 | 
          | 
          | 
                 float_exception_flags |= float_flag_inexact;
  | 
      
      
         | 3364 | 
          | 
          | 
             }
  | 
      
      
         | 3365 | 
          | 
          | 
             return z;
  | 
      
      
         | 3366 | 
          | 
          | 
          
  | 
      
      
         | 3367 | 
          | 
          | 
         }
  | 
      
      
         | 3368 | 
          | 
          | 
          
  | 
      
      
         | 3369 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 3370 | 
          | 
          | 
         | Returns the result of converting the double-precision floating-point value
  | 
      
      
         | 3371 | 
          | 
          | 
         | `a' to the 64-bit two's complement integer format.  The conversion is
  | 
      
      
         | 3372 | 
          | 
          | 
         | performed according to the IEC/IEEE Standard for Binary Floating-Point
  | 
      
      
         | 3373 | 
          | 
          | 
         | Arithmetic---which means in particular that the conversion is rounded
  | 
      
      
         | 3374 | 
          | 
          | 
         | according to the current rounding mode.  If `a' is a NaN, the largest
  | 
      
      
         | 3375 | 
          | 
          | 
         | positive integer is returned.  Otherwise, if the conversion overflows, the
  | 
      
      
         | 3376 | 
          | 
          | 
         | largest integer with the same sign as `a' is returned.
  | 
      
      
         | 3377 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 3378 | 
          | 
          | 
          
  | 
      
      
         | 3379 | 
          | 
          | 
         int64 float64_to_int64( float64 a )
  | 
      
      
         | 3380 | 
          | 
          | 
         {
  | 
      
      
         | 3381 | 
          | 
          | 
             flag aSign;
  | 
      
      
         | 3382 | 
          | 
          | 
             int16 aExp, shiftCount;
  | 
      
      
         | 3383 | 
          | 
          | 
             bits64 aSig, aSigExtra;
  | 
      
      
         | 3384 | 
          | 
          | 
          
  | 
      
      
         | 3385 | 
          | 
          | 
             aSig = extractFloat64Frac( a );
  | 
      
      
         | 3386 | 
          | 
          | 
             aExp = extractFloat64Exp( a );
  | 
      
      
         | 3387 | 
          | 
          | 
             aSign = extractFloat64Sign( a );
  | 
      
      
         | 3388 | 
          | 
          | 
             if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
  | 
      
      
         | 3389 | 
          | 
          | 
             shiftCount = 0x433 - aExp;
  | 
      
      
         | 3390 | 
          | 
          | 
             if ( shiftCount <= 0 ) {
  | 
      
      
         | 3391 | 
          | 
          | 
                 if ( 0x43E < aExp ) {
  | 
      
      
         | 3392 | 
          | 
          | 
                     float_raise( float_flag_invalid );
  | 
      
      
         | 3393 | 
          | 
          | 
                     if (    ! aSign
  | 
      
      
         | 3394 | 
          | 
          | 
                          || (    ( aExp == 0x7FF )
  | 
      
      
         | 3395 | 
          | 
          | 
                               && ( aSig != LIT64( 0x0010000000000000 ) ) )
  | 
      
      
         | 3396 | 
          | 
          | 
                        ) {
  | 
      
      
         | 3397 | 
          | 
          | 
                         return LIT64( 0x7FFFFFFFFFFFFFFF );
  | 
      
      
         | 3398 | 
          | 
          | 
                     }
  | 
      
      
         | 3399 | 
          | 
          | 
                     return (sbits64) LIT64( 0x8000000000000000 );
  | 
      
      
         | 3400 | 
          | 
          | 
                 }
  | 
      
      
         | 3401 | 
          | 
          | 
                 aSigExtra = 0;
  | 
      
      
         | 3402 | 
          | 
          | 
                 aSig <<= - shiftCount;
  | 
      
      
         | 3403 | 
          | 
          | 
             }
  | 
      
      
         | 3404 | 
          | 
          | 
             else {
  | 
      
      
         | 3405 | 
          | 
          | 
                 shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
  | 
      
      
         | 3406 | 
          | 
          | 
             }
  | 
      
      
         | 3407 | 
          | 
          | 
             return roundAndPackInt64( aSign, aSig, aSigExtra );
  | 
      
      
         | 3408 | 
          | 
          | 
          
  | 
      
      
         | 3409 | 
          | 
          | 
         }
  | 
      
      
         | 3410 | 
          | 
          | 
          
  | 
      
      
         | 3411 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 3412 | 
          | 
          | 
         | Returns the result of converting the double-precision floating-point value
  | 
      
      
         | 3413 | 
          | 
          | 
         | `a' to the 64-bit two's complement integer format.  The conversion is
  | 
      
      
         | 3414 | 
          | 
          | 
         | performed according to the IEC/IEEE Standard for Binary Floating-Point
  | 
      
      
         | 3415 | 
          | 
          | 
         | Arithmetic, except that the conversion is always rounded toward zero.
  | 
      
      
         | 3416 | 
          | 
          | 
         | If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
  | 
      
      
         | 3417 | 
          | 
          | 
         | the conversion overflows, the largest integer with the same sign as `a' is
  | 
      
      
         | 3418 | 
          | 
          | 
         | returned.
  | 
      
      
         | 3419 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 3420 | 
          | 
          | 
          
  | 
      
      
         | 3421 | 
          | 
          | 
         int64 float64_to_int64_round_to_zero( float64 a )
  | 
      
      
         | 3422 | 
          | 
          | 
         {
  | 
      
      
         | 3423 | 
          | 
          | 
             flag aSign;
  | 
      
      
         | 3424 | 
          | 
          | 
             int16 aExp, shiftCount;
  | 
      
      
         | 3425 | 
          | 
          | 
             bits64 aSig;
  | 
      
      
         | 3426 | 
          | 
          | 
             int64 z;
  | 
      
      
         | 3427 | 
          | 
          | 
          
  | 
      
      
         | 3428 | 
          | 
          | 
             aSig = extractFloat64Frac( a );
  | 
      
      
         | 3429 | 
          | 
          | 
             aExp = extractFloat64Exp( a );
  | 
      
      
         | 3430 | 
          | 
          | 
             aSign = extractFloat64Sign( a );
  | 
      
      
         | 3431 | 
          | 
          | 
             if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
  | 
      
      
         | 3432 | 
          | 
          | 
             shiftCount = aExp - 0x433;
  | 
      
      
         | 3433 | 
          | 
          | 
             if ( 0 <= shiftCount ) {
  | 
      
      
         | 3434 | 
          | 
          | 
                 if ( 0x43E <= aExp ) {
  | 
      
      
         | 3435 | 
          | 
          | 
                     if ( a != LIT64( 0xC3E0000000000000 ) ) {
  | 
      
      
         | 3436 | 
          | 
          | 
                         float_raise( float_flag_invalid );
  | 
      
      
         | 3437 | 
          | 
          | 
                         if (    ! aSign
  | 
      
      
         | 3438 | 
          | 
          | 
                              || (    ( aExp == 0x7FF )
  | 
      
      
         | 3439 | 
          | 
          | 
                                   && ( aSig != LIT64( 0x0010000000000000 ) ) )
  | 
      
      
         | 3440 | 
          | 
          | 
                            ) {
  | 
      
      
         | 3441 | 
          | 
          | 
                             return LIT64( 0x7FFFFFFFFFFFFFFF );
  | 
      
      
         | 3442 | 
          | 
          | 
                         }
  | 
      
      
         | 3443 | 
          | 
          | 
                     }
  | 
      
      
         | 3444 | 
          | 
          | 
                     return (sbits64) LIT64( 0x8000000000000000 );
  | 
      
      
         | 3445 | 
          | 
          | 
                 }
  | 
      
      
         | 3446 | 
          | 
          | 
                 z = aSig<<shiftCount;
  | 
      
      
         | 3447 | 
          | 
          | 
             }
  | 
      
      
         | 3448 | 
          | 
          | 
             else {
  | 
      
      
         | 3449 | 
          | 
          | 
                 if ( aExp < 0x3FE ) {
  | 
      
      
         | 3450 | 
          | 
          | 
                     if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
  | 
      
      
         | 3451 | 
          | 
          | 
                     return 0;
  | 
      
      
         | 3452 | 
          | 
          | 
                 }
  | 
      
      
         | 3453 | 
          | 
          | 
                 z = aSig>>( - shiftCount );
  | 
      
      
         | 3454 | 
          | 
          | 
                 if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
  | 
      
      
         | 3455 | 
          | 
          | 
                     float_exception_flags |= float_flag_inexact;
  | 
      
      
         | 3456 | 
          | 
          | 
                 }
  | 
      
      
         | 3457 | 
          | 
          | 
             }
  | 
      
      
         | 3458 | 
          | 
          | 
             if ( aSign ) z = - z;
  | 
      
      
         | 3459 | 
          | 
          | 
             return z;
  | 
      
      
         | 3460 | 
          | 
          | 
          
  | 
      
      
         | 3461 | 
          | 
          | 
         }
  | 
      
      
         | 3462 | 
          | 
          | 
          
  | 
      
      
         | 3463 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 3464 | 
          | 
          | 
         | Returns the result of converting the double-precision floating-point value
  | 
      
      
         | 3465 | 
          | 
          | 
         | `a' to the single-precision floating-point format.  The conversion is
  | 
      
      
         | 3466 | 
          | 
          | 
         | performed according to the IEC/IEEE Standard for Binary Floating-Point
  | 
      
      
         | 3467 | 
          | 
          | 
         | Arithmetic.
  | 
      
      
         | 3468 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 3469 | 
          | 
          | 
          
  | 
      
      
         | 3470 | 
          | 
          | 
         float32 float64_to_float32( float64 a )
  | 
      
      
         | 3471 | 
          | 
          | 
         {
  | 
      
      
         | 3472 | 
          | 
          | 
             flag aSign;
  | 
      
      
         | 3473 | 
          | 
          | 
             int16 aExp;
  | 
      
      
         | 3474 | 
          | 
          | 
             bits64 aSig;
  | 
      
      
         | 3475 | 
          | 
          | 
             bits32 zSig;
  | 
      
      
         | 3476 | 
          | 
          | 
          
  | 
      
      
         | 3477 | 
          | 
          | 
             aSig = extractFloat64Frac( a );
  | 
      
      
         | 3478 | 
          | 
          | 
             aExp = extractFloat64Exp( a );
  | 
      
      
         | 3479 | 
          | 
          | 
             aSign = extractFloat64Sign( a );
  | 
      
      
         | 3480 | 
          | 
          | 
             if ( aExp == 0x7FF ) {
  | 
      
      
         | 3481 | 
          | 
          | 
                 if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a ) );
  | 
      
      
         | 3482 | 
          | 
          | 
                 return packFloat32( aSign, 0xFF, 0 );
  | 
      
      
         | 3483 | 
          | 
          | 
             }
  | 
      
      
         | 3484 | 
          | 
          | 
             shift64RightJamming( aSig, 22, &aSig );
  | 
      
      
         | 3485 | 
          | 
          | 
             zSig = aSig;
  | 
      
      
         | 3486 | 
          | 
          | 
             if ( aExp || zSig ) {
  | 
      
      
         | 3487 | 
          | 
          | 
                 zSig |= 0x40000000;
  | 
      
      
         | 3488 | 
          | 
          | 
                 aExp -= 0x381;
  | 
      
      
         | 3489 | 
          | 
          | 
             }
  | 
      
      
         | 3490 | 
          | 
          | 
             return roundAndPackFloat32( aSign, aExp, zSig );
  | 
      
      
         | 3491 | 
          | 
          | 
          
  | 
      
      
         | 3492 | 
          | 
          | 
         }
  | 
      
      
         | 3493 | 
          | 
          | 
          
  | 
      
      
         | 3494 | 
          | 
          | 
         #ifdef FLOATX80
  | 
      
      
         | 3495 | 
          | 
          | 
          
  | 
      
      
         | 3496 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 3497 | 
          | 
          | 
         | Returns the result of converting the double-precision floating-point value
  | 
      
      
         | 3498 | 
          | 
          | 
         | `a' to the extended double-precision floating-point format.  The conversion
  | 
      
      
         | 3499 | 
          | 
          | 
         | is performed according to the IEC/IEEE Standard for Binary Floating-Point
  | 
      
      
         | 3500 | 
          | 
          | 
         | Arithmetic.
  | 
      
      
         | 3501 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 3502 | 
          | 
          | 
          
  | 
      
      
         | 3503 | 
          | 
          | 
         floatx80 float64_to_floatx80( float64 a )
  | 
      
      
         | 3504 | 
          | 
          | 
         {
  | 
      
      
         | 3505 | 
          | 
          | 
             flag aSign;
  | 
      
      
         | 3506 | 
          | 
          | 
             int16 aExp;
  | 
      
      
         | 3507 | 
          | 
          | 
             bits64 aSig;
  | 
      
      
         | 3508 | 
          | 
          | 
          
  | 
      
      
         | 3509 | 
          | 
          | 
             aSig = extractFloat64Frac( a );
  | 
      
      
         | 3510 | 
          | 
          | 
             aExp = extractFloat64Exp( a );
  | 
      
      
         | 3511 | 
          | 
          | 
             aSign = extractFloat64Sign( a );
  | 
      
      
         | 3512 | 
          | 
          | 
             if ( aExp == 0x7FF ) {
  | 
      
      
         | 3513 | 
          | 
          | 
                 if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a ) );
  | 
      
      
         | 3514 | 
          | 
          | 
                 return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
  | 
      
      
         | 3515 | 
          | 
          | 
             }
  | 
      
      
         | 3516 | 
          | 
          | 
             if ( aExp == 0 ) {
  | 
      
      
         | 3517 | 
          | 
          | 
                 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
  | 
      
      
         | 3518 | 
          | 
          | 
                 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
  | 
      
      
         | 3519 | 
          | 
          | 
             }
  | 
      
      
         | 3520 | 
          | 
          | 
             return
  | 
      
      
         | 3521 | 
          | 
          | 
                 packFloatx80(
  | 
      
      
         | 3522 | 
          | 
          | 
                     aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
  | 
      
      
         | 3523 | 
          | 
          | 
          
  | 
      
      
         | 3524 | 
          | 
          | 
         }
  | 
      
      
         | 3525 | 
          | 
          | 
          
  | 
      
      
         | 3526 | 
          | 
          | 
         #endif
  | 
      
      
         | 3527 | 
          | 
          | 
          
  | 
      
      
         | 3528 | 
          | 
          | 
         #ifdef FLOAT128
  | 
      
      
         | 3529 | 
          | 
          | 
          
  | 
      
      
         | 3530 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 3531 | 
          | 
          | 
         | Returns the result of converting the double-precision floating-point value
  | 
      
      
         | 3532 | 
          | 
          | 
         | `a' to the quadruple-precision floating-point format.  The conversion is
  | 
      
      
         | 3533 | 
          | 
          | 
         | performed according to the IEC/IEEE Standard for Binary Floating-Point
  | 
      
      
         | 3534 | 
          | 
          | 
         | Arithmetic.
  | 
      
      
         | 3535 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 3536 | 
          | 
          | 
          
  | 
      
      
         | 3537 | 
          | 
          | 
         float128 float64_to_float128( float64 a )
  | 
      
      
         | 3538 | 
          | 
          | 
         {
  | 
      
      
         | 3539 | 
          | 
          | 
             flag aSign;
  | 
      
      
         | 3540 | 
          | 
          | 
             int16 aExp;
  | 
      
      
         | 3541 | 
          | 
          | 
             bits64 aSig, zSig0, zSig1;
  | 
      
      
         | 3542 | 
          | 
          | 
          
  | 
      
      
         | 3543 | 
          | 
          | 
             aSig = extractFloat64Frac( a );
  | 
      
      
         | 3544 | 
          | 
          | 
             aExp = extractFloat64Exp( a );
  | 
      
      
         | 3545 | 
          | 
          | 
             aSign = extractFloat64Sign( a );
  | 
      
      
         | 3546 | 
          | 
          | 
             if ( aExp == 0x7FF ) {
  | 
      
      
         | 3547 | 
          | 
          | 
                 if ( aSig ) return commonNaNToFloat128( float64ToCommonNaN( a ) );
  | 
      
      
         | 3548 | 
          | 
          | 
                 return packFloat128( aSign, 0x7FFF, 0, 0 );
  | 
      
      
         | 3549 | 
          | 
          | 
             }
  | 
      
      
         | 3550 | 
          | 
          | 
             if ( aExp == 0 ) {
  | 
      
      
         | 3551 | 
          | 
          | 
                 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
  | 
      
      
         | 3552 | 
          | 
          | 
                 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
  | 
      
      
         | 3553 | 
          | 
          | 
                 --aExp;
  | 
      
      
         | 3554 | 
          | 
          | 
             }
  | 
      
      
         | 3555 | 
          | 
          | 
             shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
  | 
      
      
         | 3556 | 
          | 
          | 
             return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
  | 
      
      
         | 3557 | 
          | 
          | 
          
  | 
      
      
         | 3558 | 
          | 
          | 
         }
  | 
      
      
         | 3559 | 
          | 
          | 
          
  | 
      
      
         | 3560 | 
          | 
          | 
         #endif
  | 
      
      
         | 3561 | 
          | 
          | 
          
  | 
      
      
         | 3562 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 3563 | 
          | 
          | 
         | Rounds the double-precision floating-point value `a' to an integer, and
  | 
      
      
         | 3564 | 
          | 
          | 
         | returns the result as a double-precision floating-point value.  The
  | 
      
      
         | 3565 | 
          | 
          | 
         | operation is performed according to the IEC/IEEE Standard for Binary
  | 
      
      
         | 3566 | 
          | 
          | 
         | Floating-Point Arithmetic.
  | 
      
      
         | 3567 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 3568 | 
          | 
          | 
          
  | 
      
      
         | 3569 | 
          | 
          | 
         float64 float64_round_to_int( float64 a )
  | 
      
      
         | 3570 | 
          | 
          | 
         {
  | 
      
      
         | 3571 | 
          | 
          | 
             flag aSign;
  | 
      
      
         | 3572 | 
          | 
          | 
             int16 aExp;
  | 
      
      
         | 3573 | 
          | 
          | 
             bits64 lastBitMask, roundBitsMask;
  | 
      
      
         | 3574 | 
          | 
          | 
             int8 roundingMode;
  | 
      
      
         | 3575 | 
          | 
          | 
             float64 z;
  | 
      
      
         | 3576 | 
          | 
          | 
          
  | 
      
      
         | 3577 | 
          | 
          | 
             aExp = extractFloat64Exp( a );
  | 
      
      
         | 3578 | 
          | 
          | 
             if ( 0x433 <= aExp ) {
  | 
      
      
         | 3579 | 
          | 
          | 
                 if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {
  | 
      
      
         | 3580 | 
          | 
          | 
                     return propagateFloat64NaN( a, a );
  | 
      
      
         | 3581 | 
          | 
          | 
                 }
  | 
      
      
         | 3582 | 
          | 
          | 
                 return a;
  | 
      
      
         | 3583 | 
          | 
          | 
             }
  | 
      
      
         | 3584 | 
          | 
          | 
             if ( aExp < 0x3FF ) {
  | 
      
      
         | 3585 | 
          | 
          | 
                 if ( (bits64) ( a<<1 ) == 0 ) return a;
  | 
      
      
         | 3586 | 
          | 
          | 
                 float_exception_flags |= float_flag_inexact;
  | 
      
      
         | 3587 | 
          | 
          | 
                 aSign = extractFloat64Sign( a );
  | 
      
      
         | 3588 | 
          | 
          | 
                 switch ( float_rounding_mode ) {
  | 
      
      
         | 3589 | 
          | 
          | 
                  case float_round_nearest_even:
  | 
      
      
         | 3590 | 
          | 
          | 
                     if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
  | 
      
      
         | 3591 | 
          | 
          | 
                         return packFloat64( aSign, 0x3FF, 0 );
  | 
      
      
         | 3592 | 
          | 
          | 
                     }
  | 
      
      
         | 3593 | 
          | 
          | 
                     break;
  | 
      
      
         | 3594 | 
          | 
          | 
                  case float_round_down:
  | 
      
      
         | 3595 | 
          | 
          | 
                     return aSign ? LIT64( 0xBFF0000000000000 ) : 0;
  | 
      
      
         | 3596 | 
          | 
          | 
                  case float_round_up:
  | 
      
      
         | 3597 | 
          | 
          | 
                     return
  | 
      
      
         | 3598 | 
          | 
          | 
                     aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 );
  | 
      
      
         | 3599 | 
          | 
          | 
                 }
  | 
      
      
         | 3600 | 
          | 
          | 
                 return packFloat64( aSign, 0, 0 );
  | 
      
      
         | 3601 | 
          | 
          | 
             }
  | 
      
      
         | 3602 | 
          | 
          | 
             lastBitMask = 1;
  | 
      
      
         | 3603 | 
          | 
          | 
             lastBitMask <<= 0x433 - aExp;
  | 
      
      
         | 3604 | 
          | 
          | 
             roundBitsMask = lastBitMask - 1;
  | 
      
      
         | 3605 | 
          | 
          | 
             z = a;
  | 
      
      
         | 3606 | 
          | 
          | 
             roundingMode = float_rounding_mode;
  | 
      
      
         | 3607 | 
          | 
          | 
             if ( roundingMode == float_round_nearest_even ) {
  | 
      
      
         | 3608 | 
          | 
          | 
                 z += lastBitMask>>1;
  | 
      
      
         | 3609 | 
          | 
          | 
                 if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
  | 
      
      
         | 3610 | 
          | 
          | 
             }
  | 
      
      
         | 3611 | 
          | 
          | 
             else if ( roundingMode != float_round_to_zero ) {
  | 
      
      
         | 3612 | 
          | 
          | 
                 if ( extractFloat64Sign( z ) ^ ( roundingMode == float_round_up ) ) {
  | 
      
      
         | 3613 | 
          | 
          | 
                     z += roundBitsMask;
  | 
      
      
         | 3614 | 
          | 
          | 
                 }
  | 
      
      
         | 3615 | 
          | 
          | 
             }
  | 
      
      
         | 3616 | 
          | 
          | 
             z &= ~ roundBitsMask;
  | 
      
      
         | 3617 | 
          | 
          | 
             if ( z != a ) float_exception_flags |= float_flag_inexact;
  | 
      
      
         | 3618 | 
          | 
          | 
             return z;
  | 
      
      
         | 3619 | 
          | 
          | 
          
  | 
      
      
         | 3620 | 
          | 
          | 
         }
  | 
      
      
         | 3621 | 
          | 
          | 
          
  | 
      
      
         | 3622 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 3623 | 
          | 
          | 
         | Returns the result of adding the absolute values of the double-precision
  | 
      
      
         | 3624 | 
          | 
          | 
         | floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
  | 
      
      
         | 3625 | 
          | 
          | 
         | before being returned.  `zSign' is ignored if the result is a NaN.
  | 
      
      
         | 3626 | 
          | 
          | 
         | The addition is performed according to the IEC/IEEE Standard for Binary
  | 
      
      
         | 3627 | 
          | 
          | 
         | Floating-Point Arithmetic.
  | 
      
      
         | 3628 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 3629 | 
          | 
          | 
          
  | 
      
      
         | 3630 | 
          | 
          | 
         static float64 addFloat64Sigs( float64 a, float64 b, flag zSign )
  | 
      
      
         | 3631 | 
          | 
          | 
         {
  | 
      
      
         | 3632 | 
          | 
          | 
             int16 aExp, bExp, zExp;
  | 
      
      
         | 3633 | 
          | 
          | 
             bits64 aSig, bSig, zSig;
  | 
      
      
         | 3634 | 
          | 
          | 
             int16 expDiff;
  | 
      
      
         | 3635 | 
          | 
          | 
          
  | 
      
      
         | 3636 | 
          | 
          | 
             aSig = extractFloat64Frac( a );
  | 
      
      
         | 3637 | 
          | 
          | 
             aExp = extractFloat64Exp( a );
  | 
      
      
         | 3638 | 
          | 
          | 
             bSig = extractFloat64Frac( b );
  | 
      
      
         | 3639 | 
          | 
          | 
             bExp = extractFloat64Exp( b );
  | 
      
      
         | 3640 | 
          | 
          | 
             expDiff = aExp - bExp;
  | 
      
      
         | 3641 | 
          | 
          | 
             aSig <<= 9;
  | 
      
      
         | 3642 | 
          | 
          | 
             bSig <<= 9;
  | 
      
      
         | 3643 | 
          | 
          | 
             if ( 0 < expDiff ) {
  | 
      
      
         | 3644 | 
          | 
          | 
                 if ( aExp == 0x7FF ) {
  | 
      
      
         | 3645 | 
          | 
          | 
                     if ( aSig ) return propagateFloat64NaN( a, b );
  | 
      
      
         | 3646 | 
          | 
          | 
                     return a;
  | 
      
      
         | 3647 | 
          | 
          | 
                 }
  | 
      
      
         | 3648 | 
          | 
          | 
                 if ( bExp == 0 ) {
  | 
      
      
         | 3649 | 
          | 
          | 
                     --expDiff;
  | 
      
      
         | 3650 | 
          | 
          | 
                 }
  | 
      
      
         | 3651 | 
          | 
          | 
                 else {
  | 
      
      
         | 3652 | 
          | 
          | 
                     bSig |= LIT64( 0x2000000000000000 );
  | 
      
      
         | 3653 | 
          | 
          | 
                 }
  | 
      
      
         | 3654 | 
          | 
          | 
                 shift64RightJamming( bSig, expDiff, &bSig );
  | 
      
      
         | 3655 | 
          | 
          | 
                 zExp = aExp;
  | 
      
      
         | 3656 | 
          | 
          | 
             }
  | 
      
      
         | 3657 | 
          | 
          | 
             else if ( expDiff < 0 ) {
  | 
      
      
         | 3658 | 
          | 
          | 
                 if ( bExp == 0x7FF ) {
  | 
      
      
         | 3659 | 
          | 
          | 
                     if ( bSig ) return propagateFloat64NaN( a, b );
  | 
      
      
         | 3660 | 
          | 
          | 
                     return packFloat64( zSign, 0x7FF, 0 );
  | 
      
      
         | 3661 | 
          | 
          | 
                 }
  | 
      
      
         | 3662 | 
          | 
          | 
                 if ( aExp == 0 ) {
  | 
      
      
         | 3663 | 
          | 
          | 
                     ++expDiff;
  | 
      
      
         | 3664 | 
          | 
          | 
                 }
  | 
      
      
         | 3665 | 
          | 
          | 
                 else {
  | 
      
      
         | 3666 | 
          | 
          | 
                     aSig |= LIT64( 0x2000000000000000 );
  | 
      
      
         | 3667 | 
          | 
          | 
                 }
  | 
      
      
         | 3668 | 
          | 
          | 
                 shift64RightJamming( aSig, - expDiff, &aSig );
  | 
      
      
         | 3669 | 
          | 
          | 
                 zExp = bExp;
  | 
      
      
         | 3670 | 
          | 
          | 
             }
  | 
      
      
         | 3671 | 
          | 
          | 
             else {
  | 
      
      
         | 3672 | 
          | 
          | 
                 if ( aExp == 0x7FF ) {
  | 
      
      
         | 3673 | 
          | 
          | 
                     if ( aSig | bSig ) return propagateFloat64NaN( a, b );
  | 
      
      
         | 3674 | 
          | 
          | 
                     return a;
  | 
      
      
         | 3675 | 
          | 
          | 
                 }
  | 
      
      
         | 3676 | 
          | 
          | 
                 if ( aExp == 0 ) return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
  | 
      
      
         | 3677 | 
          | 
          | 
                 zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
  | 
      
      
         | 3678 | 
          | 
          | 
                 zExp = aExp;
  | 
      
      
         | 3679 | 
          | 
          | 
                 goto roundAndPack;
  | 
      
      
         | 3680 | 
          | 
          | 
             }
  | 
      
      
         | 3681 | 
          | 
          | 
             aSig |= LIT64( 0x2000000000000000 );
  | 
      
      
         | 3682 | 
          | 
          | 
             zSig = ( aSig + bSig )<<1;
  | 
      
      
         | 3683 | 
          | 
          | 
             --zExp;
  | 
      
      
         | 3684 | 
          | 
          | 
             if ( (sbits64) zSig < 0 ) {
  | 
      
      
         | 3685 | 
          | 
          | 
                 zSig = aSig + bSig;
  | 
      
      
         | 3686 | 
          | 
          | 
                 ++zExp;
  | 
      
      
         | 3687 | 
          | 
          | 
             }
  | 
      
      
         | 3688 | 
          | 
          | 
          roundAndPack:
  | 
      
      
         | 3689 | 
          | 
          | 
             return roundAndPackFloat64( zSign, zExp, zSig );
  | 
      
      
         | 3690 | 
          | 
          | 
          
  | 
      
      
         | 3691 | 
          | 
          | 
         }
  | 
      
      
         | 3692 | 
          | 
          | 
          
  | 
      
      
         | 3693 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 3694 | 
          | 
          | 
         | Returns the result of subtracting the absolute values of the double-
  | 
      
      
         | 3695 | 
          | 
          | 
         | precision floating-point values `a' and `b'.  If `zSign' is 1, the
  | 
      
      
         | 3696 | 
          | 
          | 
         | difference is negated before being returned.  `zSign' is ignored if the
  | 
      
      
         | 3697 | 
          | 
          | 
         | result is a NaN.  The subtraction is performed according to the IEC/IEEE
  | 
      
      
         | 3698 | 
          | 
          | 
         | Standard for Binary Floating-Point Arithmetic.
  | 
      
      
         | 3699 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 3700 | 
          | 
          | 
          
  | 
      
      
         | 3701 | 
          | 
          | 
         static float64 subFloat64Sigs( float64 a, float64 b, flag zSign )
  | 
      
      
         | 3702 | 
          | 
          | 
         {
  | 
      
      
         | 3703 | 
          | 
          | 
             int16 aExp, bExp, zExp;
  | 
      
      
         | 3704 | 
          | 
          | 
             bits64 aSig, bSig, zSig;
  | 
      
      
         | 3705 | 
          | 
          | 
             int16 expDiff;
  | 
      
      
         | 3706 | 
          | 
          | 
          
  | 
      
      
         | 3707 | 
          | 
          | 
             aSig = extractFloat64Frac( a );
  | 
      
      
         | 3708 | 
          | 
          | 
             aExp = extractFloat64Exp( a );
  | 
      
      
         | 3709 | 
          | 
          | 
             bSig = extractFloat64Frac( b );
  | 
      
      
         | 3710 | 
          | 
          | 
             bExp = extractFloat64Exp( b );
  | 
      
      
         | 3711 | 
          | 
          | 
             expDiff = aExp - bExp;
  | 
      
      
         | 3712 | 
          | 
          | 
             aSig <<= 10;
  | 
      
      
         | 3713 | 
          | 
          | 
             bSig <<= 10;
  | 
      
      
         | 3714 | 
          | 
          | 
             if ( 0 < expDiff ) goto aExpBigger;
  | 
      
      
         | 3715 | 
          | 
          | 
             if ( expDiff < 0 ) goto bExpBigger;
  | 
      
      
         | 3716 | 
          | 
          | 
             if ( aExp == 0x7FF ) {
  | 
      
      
         | 3717 | 
          | 
          | 
                 if ( aSig | bSig ) return propagateFloat64NaN( a, b );
  | 
      
      
         | 3718 | 
          | 
          | 
                 float_raise( float_flag_invalid );
  | 
      
      
         | 3719 | 
          | 
          | 
                 return float64_default_nan;
  | 
      
      
         | 3720 | 
          | 
          | 
             }
  | 
      
      
         | 3721 | 
          | 
          | 
             if ( aExp == 0 ) {
  | 
      
      
         | 3722 | 
          | 
          | 
                 aExp = 1;
  | 
      
      
         | 3723 | 
          | 
          | 
                 bExp = 1;
  | 
      
      
         | 3724 | 
          | 
          | 
             }
  | 
      
      
         | 3725 | 
          | 
          | 
             if ( bSig < aSig ) goto aBigger;
  | 
      
      
         | 3726 | 
          | 
          | 
             if ( aSig < bSig ) goto bBigger;
  | 
      
      
         | 3727 | 
          | 
          | 
             return packFloat64( float_rounding_mode == float_round_down, 0, 0 );
  | 
      
      
         | 3728 | 
          | 
          | 
          bExpBigger:
  | 
      
      
         | 3729 | 
          | 
          | 
             if ( bExp == 0x7FF ) {
  | 
      
      
         | 3730 | 
          | 
          | 
                 if ( bSig ) return propagateFloat64NaN( a, b );
  | 
      
      
         | 3731 | 
          | 
          | 
                 return packFloat64( zSign ^ 1, 0x7FF, 0 );
  | 
      
      
         | 3732 | 
          | 
          | 
             }
  | 
      
      
         | 3733 | 
          | 
          | 
             if ( aExp == 0 ) {
  | 
      
      
         | 3734 | 
          | 
          | 
                 ++expDiff;
  | 
      
      
         | 3735 | 
          | 
          | 
             }
  | 
      
      
         | 3736 | 
          | 
          | 
             else {
  | 
      
      
         | 3737 | 
          | 
          | 
                 aSig |= LIT64( 0x4000000000000000 );
  | 
      
      
         | 3738 | 
          | 
          | 
             }
  | 
      
      
         | 3739 | 
          | 
          | 
             shift64RightJamming( aSig, - expDiff, &aSig );
  | 
      
      
         | 3740 | 
          | 
          | 
             bSig |= LIT64( 0x4000000000000000 );
  | 
      
      
         | 3741 | 
          | 
          | 
          bBigger:
  | 
      
      
         | 3742 | 
          | 
          | 
             zSig = bSig - aSig;
  | 
      
      
         | 3743 | 
          | 
          | 
             zExp = bExp;
  | 
      
      
         | 3744 | 
          | 
          | 
             zSign ^= 1;
  | 
      
      
         | 3745 | 
          | 
          | 
             goto normalizeRoundAndPack;
  | 
      
      
         | 3746 | 
          | 
          | 
          aExpBigger:
  | 
      
      
         | 3747 | 
          | 
          | 
             if ( aExp == 0x7FF ) {
  | 
      
      
         | 3748 | 
          | 
          | 
                 if ( aSig ) return propagateFloat64NaN( a, b );
  | 
      
      
         | 3749 | 
          | 
          | 
                 return a;
  | 
      
      
         | 3750 | 
          | 
          | 
             }
  | 
      
      
         | 3751 | 
          | 
          | 
             if ( bExp == 0 ) {
  | 
      
      
         | 3752 | 
          | 
          | 
                 --expDiff;
  | 
      
      
         | 3753 | 
          | 
          | 
             }
  | 
      
      
         | 3754 | 
          | 
          | 
             else {
  | 
      
      
         | 3755 | 
          | 
          | 
                 bSig |= LIT64( 0x4000000000000000 );
  | 
      
      
         | 3756 | 
          | 
          | 
             }
  | 
      
      
         | 3757 | 
          | 
          | 
             shift64RightJamming( bSig, expDiff, &bSig );
  | 
      
      
         | 3758 | 
          | 
          | 
             aSig |= LIT64( 0x4000000000000000 );
  | 
      
      
         | 3759 | 
          | 
          | 
          aBigger:
  | 
      
      
         | 3760 | 
          | 
          | 
             zSig = aSig - bSig;
  | 
      
      
         | 3761 | 
          | 
          | 
             zExp = aExp;
  | 
      
      
         | 3762 | 
          | 
          | 
          normalizeRoundAndPack:
  | 
      
      
         | 3763 | 
          | 
          | 
             --zExp;
  | 
      
      
         | 3764 | 
          | 
          | 
             return normalizeRoundAndPackFloat64( zSign, zExp, zSig );
  | 
      
      
         | 3765 | 
          | 
          | 
          
  | 
      
      
         | 3766 | 
          | 
          | 
         }
  | 
      
      
         | 3767 | 
          | 
          | 
          
  | 
      
      
         | 3768 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 3769 | 
          | 
          | 
         | Returns the result of adding the double-precision floating-point values `a'
  | 
      
      
         | 3770 | 
          | 
          | 
         | and `b'.  The operation is performed according to the IEC/IEEE Standard for
  | 
      
      
         | 3771 | 
          | 
          | 
         | Binary Floating-Point Arithmetic.
  | 
      
      
         | 3772 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 3773 | 
          | 
          | 
          
  | 
      
      
         | 3774 | 
          | 
          | 
         float64 float64_add( float64 a, float64 b )
  | 
      
      
         | 3775 | 
          | 
          | 
         {
  | 
      
      
         | 3776 | 
          | 
          | 
             flag aSign, bSign;
  | 
      
      
         | 3777 | 
          | 
          | 
          
  | 
      
      
         | 3778 | 
          | 
          | 
             aSign = extractFloat64Sign( a );
  | 
      
      
         | 3779 | 
          | 
          | 
             bSign = extractFloat64Sign( b );
  | 
      
      
         | 3780 | 
          | 
          | 
             if ( aSign == bSign ) {
  | 
      
      
         | 3781 | 
          | 
          | 
                 return addFloat64Sigs( a, b, aSign );
  | 
      
      
         | 3782 | 
          | 
          | 
             }
  | 
      
      
         | 3783 | 
          | 
          | 
             else {
  | 
      
      
         | 3784 | 
          | 
          | 
                 return subFloat64Sigs( a, b, aSign );
  | 
      
      
         | 3785 | 
          | 
          | 
             }
  | 
      
      
         | 3786 | 
          | 
          | 
          
  | 
      
      
         | 3787 | 
          | 
          | 
         }
  | 
      
      
         | 3788 | 
          | 
          | 
          
  | 
      
      
         | 3789 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 3790 | 
          | 
          | 
         | Returns the result of subtracting the double-precision floating-point values
  | 
      
      
         | 3791 | 
          | 
          | 
         | `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
  | 
      
      
         | 3792 | 
          | 
          | 
         | for Binary Floating-Point Arithmetic.
  | 
      
      
         | 3793 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 3794 | 
          | 
          | 
          
  | 
      
      
         | 3795 | 
          | 
          | 
         float64 float64_sub( float64 a, float64 b )
  | 
      
      
         | 3796 | 
          | 
          | 
         {
  | 
      
      
         | 3797 | 
          | 
          | 
             flag aSign, bSign;
  | 
      
      
         | 3798 | 
          | 
          | 
          
  | 
      
      
         | 3799 | 
          | 
          | 
             aSign = extractFloat64Sign( a );
  | 
      
      
         | 3800 | 
          | 
          | 
             bSign = extractFloat64Sign( b );
  | 
      
      
         | 3801 | 
          | 
          | 
             if ( aSign == bSign ) {
  | 
      
      
         | 3802 | 
          | 
          | 
                 return subFloat64Sigs( a, b, aSign );
  | 
      
      
         | 3803 | 
          | 
          | 
             }
  | 
      
      
         | 3804 | 
          | 
          | 
             else {
  | 
      
      
         | 3805 | 
          | 
          | 
                 return addFloat64Sigs( a, b, aSign );
  | 
      
      
         | 3806 | 
          | 
          | 
             }
  | 
      
      
         | 3807 | 
          | 
          | 
          
  | 
      
      
         | 3808 | 
          | 
          | 
         }
  | 
      
      
         | 3809 | 
          | 
          | 
          
  | 
      
      
         | 3810 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 3811 | 
          | 
          | 
         | Returns the result of multiplying the double-precision floating-point values
  | 
      
      
         | 3812 | 
          | 
          | 
         | `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
  | 
      
      
         | 3813 | 
          | 
          | 
         | for Binary Floating-Point Arithmetic.
  | 
      
      
         | 3814 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 3815 | 
          | 
          | 
          
  | 
      
      
         | 3816 | 
          | 
          | 
         float64 float64_mul( float64 a, float64 b )
  | 
      
      
         | 3817 | 
          | 
          | 
         {
  | 
      
      
         | 3818 | 
          | 
          | 
             flag aSign, bSign, zSign;
  | 
      
      
         | 3819 | 
          | 
          | 
             int16 aExp, bExp, zExp;
  | 
      
      
         | 3820 | 
          | 
          | 
             bits64 aSig, bSig, zSig0, zSig1;
  | 
      
      
         | 3821 | 
          | 
          | 
          
  | 
      
      
         | 3822 | 
          | 
          | 
             aSig = extractFloat64Frac( a );
  | 
      
      
         | 3823 | 
          | 
          | 
             aExp = extractFloat64Exp( a );
  | 
      
      
         | 3824 | 
          | 
          | 
             aSign = extractFloat64Sign( a );
  | 
      
      
         | 3825 | 
          | 
          | 
             bSig = extractFloat64Frac( b );
  | 
      
      
         | 3826 | 
          | 
          | 
             bExp = extractFloat64Exp( b );
  | 
      
      
         | 3827 | 
          | 
          | 
             bSign = extractFloat64Sign( b );
  | 
      
      
         | 3828 | 
          | 
          | 
             zSign = aSign ^ bSign;
  | 
      
      
         | 3829 | 
          | 
          | 
             if ( aExp == 0x7FF ) {
  | 
      
      
         | 3830 | 
          | 
          | 
                 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
  | 
      
      
         | 3831 | 
          | 
          | 
                     return propagateFloat64NaN( a, b );
  | 
      
      
         | 3832 | 
          | 
          | 
                 }
  | 
      
      
         | 3833 | 
          | 
          | 
                 if ( ( bExp | bSig ) == 0 ) {
  | 
      
      
         | 3834 | 
          | 
          | 
                     float_raise( float_flag_invalid );
  | 
      
      
         | 3835 | 
          | 
          | 
                     return float64_default_nan;
  | 
      
      
         | 3836 | 
          | 
          | 
                 }
  | 
      
      
         | 3837 | 
          | 
          | 
                 return packFloat64( zSign, 0x7FF, 0 );
  | 
      
      
         | 3838 | 
          | 
          | 
             }
  | 
      
      
         | 3839 | 
          | 
          | 
             if ( bExp == 0x7FF ) {
  | 
      
      
         | 3840 | 
          | 
          | 
                 if ( bSig ) return propagateFloat64NaN( a, b );
  | 
      
      
         | 3841 | 
          | 
          | 
                 if ( ( aExp | aSig ) == 0 ) {
  | 
      
      
         | 3842 | 
          | 
          | 
                     float_raise( float_flag_invalid );
  | 
      
      
         | 3843 | 
          | 
          | 
                     return float64_default_nan;
  | 
      
      
         | 3844 | 
          | 
          | 
                 }
  | 
      
      
         | 3845 | 
          | 
          | 
                 return packFloat64( zSign, 0x7FF, 0 );
  | 
      
      
         | 3846 | 
          | 
          | 
             }
  | 
      
      
         | 3847 | 
          | 
          | 
             if ( aExp == 0 ) {
  | 
      
      
         | 3848 | 
          | 
          | 
                 if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
  | 
      
      
         | 3849 | 
          | 
          | 
                 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
  | 
      
      
         | 3850 | 
          | 
          | 
             }
  | 
      
      
         | 3851 | 
          | 
          | 
             if ( bExp == 0 ) {
  | 
      
      
         | 3852 | 
          | 
          | 
                 if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
  | 
      
      
         | 3853 | 
          | 
          | 
                 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
  | 
      
      
         | 3854 | 
          | 
          | 
             }
  | 
      
      
         | 3855 | 
          | 
          | 
             zExp = aExp + bExp - 0x3FF;
  | 
      
      
         | 3856 | 
          | 
          | 
             aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
  | 
      
      
         | 3857 | 
          | 
          | 
             bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
  | 
      
      
         | 3858 | 
          | 
          | 
             mul64To128( aSig, bSig, &zSig0, &zSig1 );
  | 
      
      
         | 3859 | 
          | 
          | 
             zSig0 |= ( zSig1 != 0 );
  | 
      
      
         | 3860 | 
          | 
          | 
             if ( 0 <= (sbits64) ( zSig0<<1 ) ) {
  | 
      
      
         | 3861 | 
          | 
          | 
                 zSig0 <<= 1;
  | 
      
      
         | 3862 | 
          | 
          | 
                 --zExp;
  | 
      
      
         | 3863 | 
          | 
          | 
             }
  | 
      
      
         | 3864 | 
          | 
          | 
             return roundAndPackFloat64( zSign, zExp, zSig0 );
  | 
      
      
         | 3865 | 
          | 
          | 
          
  | 
      
      
         | 3866 | 
          | 
          | 
         }
  | 
      
      
         | 3867 | 
          | 
          | 
          
  | 
      
      
         | 3868 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 3869 | 
          | 
          | 
         | Returns the result of dividing the double-precision floating-point value `a'
  | 
      
      
         | 3870 | 
          | 
          | 
         | by the corresponding value `b'.  The operation is performed according to
  | 
      
      
         | 3871 | 
          | 
          | 
         | the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  | 
      
      
         | 3872 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 3873 | 
          | 
          | 
          
  | 
      
      
         | 3874 | 
          | 
          | 
         float64 float64_div( float64 a, float64 b )
  | 
      
      
         | 3875 | 
          | 
          | 
         {
  | 
      
      
         | 3876 | 
          | 
          | 
             flag aSign, bSign, zSign;
  | 
      
      
         | 3877 | 
          | 
          | 
             int16 aExp, bExp, zExp;
  | 
      
      
         | 3878 | 
          | 
          | 
             bits64 aSig, bSig, zSig;
  | 
      
      
         | 3879 | 
          | 
          | 
             bits64 rem0, rem1;
  | 
      
      
         | 3880 | 
          | 
          | 
             bits64 term0, term1;
  | 
      
      
         | 3881 | 
          | 
          | 
          
  | 
      
      
         | 3882 | 
          | 
          | 
             aSig = extractFloat64Frac( a );
  | 
      
      
         | 3883 | 
          | 
          | 
             aExp = extractFloat64Exp( a );
  | 
      
      
         | 3884 | 
          | 
          | 
             aSign = extractFloat64Sign( a );
  | 
      
      
         | 3885 | 
          | 
          | 
             bSig = extractFloat64Frac( b );
  | 
      
      
         | 3886 | 
          | 
          | 
             bExp = extractFloat64Exp( b );
  | 
      
      
         | 3887 | 
          | 
          | 
             bSign = extractFloat64Sign( b );
  | 
      
      
         | 3888 | 
          | 
          | 
             zSign = aSign ^ bSign;
  | 
      
      
         | 3889 | 
          | 
          | 
             if ( aExp == 0x7FF ) {
  | 
      
      
         | 3890 | 
          | 
          | 
                 if ( aSig ) return propagateFloat64NaN( a, b );
  | 
      
      
         | 3891 | 
          | 
          | 
                 if ( bExp == 0x7FF ) {
  | 
      
      
         | 3892 | 
          | 
          | 
                     if ( bSig ) return propagateFloat64NaN( a, b );
  | 
      
      
         | 3893 | 
          | 
          | 
                     float_raise( float_flag_invalid );
  | 
      
      
         | 3894 | 
          | 
          | 
                     return float64_default_nan;
  | 
      
      
         | 3895 | 
          | 
          | 
                 }
  | 
      
      
         | 3896 | 
          | 
          | 
                 return packFloat64( zSign, 0x7FF, 0 );
  | 
      
      
         | 3897 | 
          | 
          | 
             }
  | 
      
      
         | 3898 | 
          | 
          | 
             if ( bExp == 0x7FF ) {
  | 
      
      
         | 3899 | 
          | 
          | 
                 if ( bSig ) return propagateFloat64NaN( a, b );
  | 
      
      
         | 3900 | 
          | 
          | 
                 return packFloat64( zSign, 0, 0 );
  | 
      
      
         | 3901 | 
          | 
          | 
             }
  | 
      
      
         | 3902 | 
          | 
          | 
             if ( bExp == 0 ) {
  | 
      
      
         | 3903 | 
          | 
          | 
                 if ( bSig == 0 ) {
  | 
      
      
         | 3904 | 
          | 
          | 
                     if ( ( aExp | aSig ) == 0 ) {
  | 
      
      
         | 3905 | 
          | 
          | 
                         float_raise( float_flag_invalid );
  | 
      
      
         | 3906 | 
          | 
          | 
                         return float64_default_nan;
  | 
      
      
         | 3907 | 
          | 
          | 
                     }
  | 
      
      
         | 3908 | 
          | 
          | 
                     float_raise( float_flag_divbyzero );
  | 
      
      
         | 3909 | 
          | 
          | 
                     return packFloat64( zSign, 0x7FF, 0 );
  | 
      
      
         | 3910 | 
          | 
          | 
                 }
  | 
      
      
         | 3911 | 
          | 
          | 
                 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
  | 
      
      
         | 3912 | 
          | 
          | 
             }
  | 
      
      
         | 3913 | 
          | 
          | 
             if ( aExp == 0 ) {
  | 
      
      
         | 3914 | 
          | 
          | 
                 if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
  | 
      
      
         | 3915 | 
          | 
          | 
                 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
  | 
      
      
         | 3916 | 
          | 
          | 
             }
  | 
      
      
         | 3917 | 
          | 
          | 
             zExp = aExp - bExp + 0x3FD;
  | 
      
      
         | 3918 | 
          | 
          | 
             aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
  | 
      
      
         | 3919 | 
          | 
          | 
             bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
  | 
      
      
         | 3920 | 
          | 
          | 
             if ( bSig <= ( aSig + aSig ) ) {
  | 
      
      
         | 3921 | 
          | 
          | 
                 aSig >>= 1;
  | 
      
      
         | 3922 | 
          | 
          | 
                 ++zExp;
  | 
      
      
         | 3923 | 
          | 
          | 
             }
  | 
      
      
         | 3924 | 
          | 
          | 
             zSig = estimateDiv128To64( aSig, 0, bSig );
  | 
      
      
         | 3925 | 
          | 
          | 
             if ( ( zSig & 0x1FF ) <= 2 ) {
  | 
      
      
         | 3926 | 
          | 
          | 
                 mul64To128( bSig, zSig, &term0, &term1 );
  | 
      
      
         | 3927 | 
          | 
          | 
                 sub128( aSig, 0, term0, term1, &rem0, &rem1 );
  | 
      
      
         | 3928 | 
          | 
          | 
                 while ( (sbits64) rem0 < 0 ) {
  | 
      
      
         | 3929 | 
          | 
          | 
                     --zSig;
  | 
      
      
         | 3930 | 
          | 
          | 
                     add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
  | 
      
      
         | 3931 | 
          | 
          | 
                 }
  | 
      
      
         | 3932 | 
          | 
          | 
                 zSig |= ( rem1 != 0 );
  | 
      
      
         | 3933 | 
          | 
          | 
             }
  | 
      
      
         | 3934 | 
          | 
          | 
             return roundAndPackFloat64( zSign, zExp, zSig );
  | 
      
      
         | 3935 | 
          | 
          | 
          
  | 
      
      
         | 3936 | 
          | 
          | 
         }
  | 
      
      
         | 3937 | 
          | 
          | 
          
  | 
      
      
         | 3938 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 3939 | 
          | 
          | 
         | Returns the remainder of the double-precision floating-point value `a'
  | 
      
      
         | 3940 | 
          | 
          | 
         | with respect to the corresponding value `b'.  The operation is performed
  | 
      
      
         | 3941 | 
          | 
          | 
         | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  | 
      
      
         | 3942 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 3943 | 
          | 
          | 
          
  | 
      
      
         | 3944 | 
          | 
          | 
         float64 float64_rem( float64 a, float64 b )
  | 
      
      
         | 3945 | 
          | 
          | 
         {
  | 
      
      
         | 3946 | 
          | 
          | 
             flag aSign, bSign, zSign;
  | 
      
      
         | 3947 | 
          | 
          | 
             int16 aExp, bExp, expDiff;
  | 
      
      
         | 3948 | 
          | 
          | 
             bits64 aSig, bSig;
  | 
      
      
         | 3949 | 
          | 
          | 
             bits64 q, alternateASig;
  | 
      
      
         | 3950 | 
          | 
          | 
             sbits64 sigMean;
  | 
      
      
         | 3951 | 
          | 
          | 
          
  | 
      
      
         | 3952 | 
          | 
          | 
             aSig = extractFloat64Frac( a );
  | 
      
      
         | 3953 | 
          | 
          | 
             aExp = extractFloat64Exp( a );
  | 
      
      
         | 3954 | 
          | 
          | 
             aSign = extractFloat64Sign( a );
  | 
      
      
         | 3955 | 
          | 
          | 
             bSig = extractFloat64Frac( b );
  | 
      
      
         | 3956 | 
          | 
          | 
             bExp = extractFloat64Exp( b );
  | 
      
      
         | 3957 | 
          | 
          | 
             bSign = extractFloat64Sign( b );
  | 
      
      
         | 3958 | 
          | 
          | 
             if ( aExp == 0x7FF ) {
  | 
      
      
         | 3959 | 
          | 
          | 
                 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
  | 
      
      
         | 3960 | 
          | 
          | 
                     return propagateFloat64NaN( a, b );
  | 
      
      
         | 3961 | 
          | 
          | 
                 }
  | 
      
      
         | 3962 | 
          | 
          | 
                 float_raise( float_flag_invalid );
  | 
      
      
         | 3963 | 
          | 
          | 
                 return float64_default_nan;
  | 
      
      
         | 3964 | 
          | 
          | 
             }
  | 
      
      
         | 3965 | 
          | 
          | 
             if ( bExp == 0x7FF ) {
  | 
      
      
         | 3966 | 
          | 
          | 
                 if ( bSig ) return propagateFloat64NaN( a, b );
  | 
      
      
         | 3967 | 
          | 
          | 
                 return a;
  | 
      
      
         | 3968 | 
          | 
          | 
             }
  | 
      
      
         | 3969 | 
          | 
          | 
             if ( bExp == 0 ) {
  | 
      
      
         | 3970 | 
          | 
          | 
                 if ( bSig == 0 ) {
  | 
      
      
         | 3971 | 
          | 
          | 
                     float_raise( float_flag_invalid );
  | 
      
      
         | 3972 | 
          | 
          | 
                     return float64_default_nan;
  | 
      
      
         | 3973 | 
          | 
          | 
                 }
  | 
      
      
         | 3974 | 
          | 
          | 
                 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
  | 
      
      
         | 3975 | 
          | 
          | 
             }
  | 
      
      
         | 3976 | 
          | 
          | 
             if ( aExp == 0 ) {
  | 
      
      
         | 3977 | 
          | 
          | 
                 if ( aSig == 0 ) return a;
  | 
      
      
         | 3978 | 
          | 
          | 
                 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
  | 
      
      
         | 3979 | 
          | 
          | 
             }
  | 
      
      
         | 3980 | 
          | 
          | 
             expDiff = aExp - bExp;
  | 
      
      
         | 3981 | 
          | 
          | 
             aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
  | 
      
      
         | 3982 | 
          | 
          | 
             bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
  | 
      
      
         | 3983 | 
          | 
          | 
             if ( expDiff < 0 ) {
  | 
      
      
         | 3984 | 
          | 
          | 
                 if ( expDiff < -1 ) return a;
  | 
      
      
         | 3985 | 
          | 
          | 
                 aSig >>= 1;
  | 
      
      
         | 3986 | 
          | 
          | 
             }
  | 
      
      
         | 3987 | 
          | 
          | 
             q = ( bSig <= aSig );
  | 
      
      
         | 3988 | 
          | 
          | 
             if ( q ) aSig -= bSig;
  | 
      
      
         | 3989 | 
          | 
          | 
             expDiff -= 64;
  | 
      
      
         | 3990 | 
          | 
          | 
             while ( 0 < expDiff ) {
  | 
      
      
         | 3991 | 
          | 
          | 
                 q = estimateDiv128To64( aSig, 0, bSig );
  | 
      
      
         | 3992 | 
          | 
          | 
                 q = ( 2 < q ) ? q - 2 : 0;
  | 
      
      
         | 3993 | 
          | 
          | 
                 aSig = - ( ( bSig>>2 ) * q );
  | 
      
      
         | 3994 | 
          | 
          | 
                 expDiff -= 62;
  | 
      
      
         | 3995 | 
          | 
          | 
             }
  | 
      
      
         | 3996 | 
          | 
          | 
             expDiff += 64;
  | 
      
      
         | 3997 | 
          | 
          | 
             if ( 0 < expDiff ) {
  | 
      
      
         | 3998 | 
          | 
          | 
                 q = estimateDiv128To64( aSig, 0, bSig );
  | 
      
      
         | 3999 | 
          | 
          | 
                 q = ( 2 < q ) ? q - 2 : 0;
  | 
      
      
         | 4000 | 
          | 
          | 
                 q >>= 64 - expDiff;
  | 
      
      
         | 4001 | 
          | 
          | 
                 bSig >>= 2;
  | 
      
      
         | 4002 | 
          | 
          | 
                 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
  | 
      
      
         | 4003 | 
          | 
          | 
             }
  | 
      
      
         | 4004 | 
          | 
          | 
             else {
  | 
      
      
         | 4005 | 
          | 
          | 
                 aSig >>= 2;
  | 
      
      
         | 4006 | 
          | 
          | 
                 bSig >>= 2;
  | 
      
      
         | 4007 | 
          | 
          | 
             }
  | 
      
      
         | 4008 | 
          | 
          | 
             do {
  | 
      
      
         | 4009 | 
          | 
          | 
                 alternateASig = aSig;
  | 
      
      
         | 4010 | 
          | 
          | 
                 ++q;
  | 
      
      
         | 4011 | 
          | 
          | 
                 aSig -= bSig;
  | 
      
      
         | 4012 | 
          | 
          | 
             } while ( 0 <= (sbits64) aSig );
  | 
      
      
         | 4013 | 
          | 
          | 
             sigMean = aSig + alternateASig;
  | 
      
      
         | 4014 | 
          | 
          | 
             if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
  | 
      
      
         | 4015 | 
          | 
          | 
                 aSig = alternateASig;
  | 
      
      
         | 4016 | 
          | 
          | 
             }
  | 
      
      
         | 4017 | 
          | 
          | 
             zSign = ( (sbits64) aSig < 0 );
  | 
      
      
         | 4018 | 
          | 
          | 
             if ( zSign ) aSig = - aSig;
  | 
      
      
         | 4019 | 
          | 
          | 
             return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig );
  | 
      
      
         | 4020 | 
          | 
          | 
          
  | 
      
      
         | 4021 | 
          | 
          | 
         }
  | 
      
      
         | 4022 | 
          | 
          | 
          
  | 
      
      
         | 4023 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 4024 | 
          | 
          | 
         | Returns the square root of the double-precision floating-point value `a'.
  | 
      
      
         | 4025 | 
          | 
          | 
         | The operation is performed according to the IEC/IEEE Standard for Binary
  | 
      
      
         | 4026 | 
          | 
          | 
         | Floating-Point Arithmetic.
  | 
      
      
         | 4027 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 4028 | 
          | 
          | 
          
  | 
      
      
         | 4029 | 
          | 
          | 
         float64 float64_sqrt( float64 a )
  | 
      
      
         | 4030 | 
          | 
          | 
         {
  | 
      
      
         | 4031 | 
          | 
          | 
             flag aSign;
  | 
      
      
         | 4032 | 
          | 
          | 
             int16 aExp, zExp;
  | 
      
      
         | 4033 | 
          | 
          | 
             bits64 aSig, zSig, doubleZSig;
  | 
      
      
         | 4034 | 
          | 
          | 
             bits64 rem0, rem1, term0, term1;
  | 
      
      
         | 4035 | 
          | 
          | 
             //float64 z; // Unused variable
  | 
      
      
         | 4036 | 
          | 
          | 
          
  | 
      
      
         | 4037 | 
          | 
          | 
             aSig = extractFloat64Frac( a );
  | 
      
      
         | 4038 | 
          | 
          | 
             aExp = extractFloat64Exp( a );
  | 
      
      
         | 4039 | 
          | 
          | 
             aSign = extractFloat64Sign( a );
  | 
      
      
         | 4040 | 
          | 
          | 
             if ( aExp == 0x7FF ) {
  | 
      
      
         | 4041 | 
          | 
          | 
                 if ( aSig ) return propagateFloat64NaN( a, a );
  | 
      
      
         | 4042 | 
          | 
          | 
                 if ( ! aSign ) return a;
  | 
      
      
         | 4043 | 
          | 
          | 
                 float_raise( float_flag_invalid );
  | 
      
      
         | 4044 | 
          | 
          | 
                 return float64_default_nan;
  | 
      
      
         | 4045 | 
          | 
          | 
             }
  | 
      
      
         | 4046 | 
          | 
          | 
             if ( aSign ) {
  | 
      
      
         | 4047 | 
          | 
          | 
                 if ( ( aExp | aSig ) == 0 ) return a;
  | 
      
      
         | 4048 | 
          | 
          | 
                 float_raise( float_flag_invalid );
  | 
      
      
         | 4049 | 
          | 
          | 
                 return float64_default_nan;
  | 
      
      
         | 4050 | 
          | 
          | 
             }
  | 
      
      
         | 4051 | 
          | 
          | 
             if ( aExp == 0 ) {
  | 
      
      
         | 4052 | 
          | 
          | 
                 if ( aSig == 0 ) return 0;
  | 
      
      
         | 4053 | 
          | 
          | 
                 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
  | 
      
      
         | 4054 | 
          | 
          | 
             }
  | 
      
      
         | 4055 | 
          | 
          | 
             zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
  | 
      
      
         | 4056 | 
          | 
          | 
             aSig |= LIT64( 0x0010000000000000 );
  | 
      
      
         | 4057 | 
          | 
          | 
             zSig = estimateSqrt32( aExp, aSig>>21 );
  | 
      
      
         | 4058 | 
          | 
          | 
             aSig <<= 9 - ( aExp & 1 );
  | 
      
      
         | 4059 | 
          | 
          | 
             zSig = estimateDiv128To64( aSig, 0, zSig<<32 ) + ( zSig<<30 );
  | 
      
      
         | 4060 | 
          | 
          | 
             if ( ( zSig & 0x1FF ) <= 5 ) {
  | 
      
      
         | 4061 | 
          | 
          | 
                 doubleZSig = zSig<<1;
  | 
      
      
         | 4062 | 
          | 
          | 
                 mul64To128( zSig, zSig, &term0, &term1 );
  | 
      
      
         | 4063 | 
          | 
          | 
                 sub128( aSig, 0, term0, term1, &rem0, &rem1 );
  | 
      
      
         | 4064 | 
          | 
          | 
                 while ( (sbits64) rem0 < 0 ) {
  | 
      
      
         | 4065 | 
          | 
          | 
                     --zSig;
  | 
      
      
         | 4066 | 
          | 
          | 
                     doubleZSig -= 2;
  | 
      
      
         | 4067 | 
          | 
          | 
                     add128( rem0, rem1, zSig>>63, doubleZSig | 1, &rem0, &rem1 );
  | 
      
      
         | 4068 | 
          | 
          | 
                 }
  | 
      
      
         | 4069 | 
          | 
          | 
                 zSig |= ( ( rem0 | rem1 ) != 0 );
  | 
      
      
         | 4070 | 
          | 
          | 
             }
  | 
      
      
         | 4071 | 
          | 
          | 
             return roundAndPackFloat64( 0, zExp, zSig );
  | 
      
      
         | 4072 | 
          | 
          | 
          
  | 
      
      
         | 4073 | 
          | 
          | 
         }
  | 
      
      
         | 4074 | 
          | 
          | 
          
  | 
      
      
         | 4075 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 4076 | 
          | 
          | 
         | Returns 1 if the double-precision floating-point value `a' is equal to the
  | 
      
      
         | 4077 | 
          | 
          | 
         | corresponding value `b', and 0 otherwise.  The comparison is performed
  | 
      
      
         | 4078 | 
          | 
          | 
         | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  | 
      
      
         | 4079 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 4080 | 
          | 
          | 
          
  | 
      
      
         | 4081 | 
          | 
          | 
         flag float64_eq( float64 a, float64 b )
  | 
      
      
         | 4082 | 
          | 
          | 
         {
  | 
      
      
         | 4083 | 
          | 
          | 
          
  | 
      
      
         | 4084 | 
          | 
          | 
             if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
  | 
      
      
         | 4085 | 
          | 
          | 
                  || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
  | 
      
      
         | 4086 | 
          | 
          | 
                ) {
  | 
      
      
         | 4087 | 
          | 
          | 
                 if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
  | 
      
      
         | 4088 | 
          | 
          | 
                     float_raise( float_flag_invalid );
  | 
      
      
         | 4089 | 
          | 
          | 
                 }
  | 
      
      
         | 4090 | 
          | 
          | 
                 return 0;
  | 
      
      
         | 4091 | 
          | 
          | 
             }
  | 
      
      
         | 4092 | 
          | 
          | 
             return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
  | 
      
      
         | 4093 | 
          | 
          | 
          
  | 
      
      
         | 4094 | 
          | 
          | 
         }
  | 
      
      
         | 4095 | 
          | 
          | 
          
  | 
      
      
         | 4096 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 4097 | 
          | 
          | 
         | Returns 1 if the double-precision floating-point value `a' is less than or
  | 
      
      
         | 4098 | 
          | 
          | 
         | equal to the corresponding value `b', and 0 otherwise.  The comparison is
  | 
      
      
         | 4099 | 
          | 
          | 
         | performed according to the IEC/IEEE Standard for Binary Floating-Point
  | 
      
      
         | 4100 | 
          | 
          | 
         | Arithmetic.
  | 
      
      
         | 4101 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 4102 | 
          | 
          | 
          
  | 
      
      
         | 4103 | 
          | 
          | 
         flag float64_le( float64 a, float64 b )
  | 
      
      
         | 4104 | 
          | 
          | 
         {
  | 
      
      
         | 4105 | 
          | 
          | 
             flag aSign, bSign;
  | 
      
      
         | 4106 | 
          | 
          | 
          
  | 
      
      
         | 4107 | 
          | 
          | 
             if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
  | 
      
      
         | 4108 | 
          | 
          | 
                  || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
  | 
      
      
         | 4109 | 
          | 
          | 
                ) {
  | 
      
      
         | 4110 | 
          | 
          | 
                 float_raise( float_flag_invalid );
  | 
      
      
         | 4111 | 
          | 
          | 
                 return 0;
  | 
      
      
         | 4112 | 
          | 
          | 
             }
  | 
      
      
         | 4113 | 
          | 
          | 
             aSign = extractFloat64Sign( a );
  | 
      
      
         | 4114 | 
          | 
          | 
             bSign = extractFloat64Sign( b );
  | 
      
      
         | 4115 | 
          | 
          | 
             if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
  | 
      
      
         | 4116 | 
          | 
          | 
             return ( a == b ) || ( aSign ^ ( a < b ) );
  | 
      
      
         | 4117 | 
          | 
          | 
          
  | 
      
      
         | 4118 | 
          | 
          | 
         }
  | 
      
      
         | 4119 | 
          | 
          | 
          
  | 
      
      
         | 4120 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 4121 | 
          | 
          | 
         | Returns 1 if the double-precision floating-point value `a' is less than
  | 
      
      
         | 4122 | 
          | 
          | 
         | the corresponding value `b', and 0 otherwise.  The comparison is performed
  | 
      
      
         | 4123 | 
          | 
          | 
         | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  | 
      
      
         | 4124 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 4125 | 
          | 
          | 
          
  | 
      
      
         | 4126 | 
          | 
          | 
         flag float64_lt( float64 a, float64 b )
  | 
      
      
         | 4127 | 
          | 
          | 
         {
  | 
      
      
         | 4128 | 
          | 
          | 
             flag aSign, bSign;
  | 
      
      
         | 4129 | 
          | 
          | 
          
  | 
      
      
         | 4130 | 
          | 
          | 
             if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
  | 
      
      
         | 4131 | 
          | 
          | 
                  || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
  | 
      
      
         | 4132 | 
          | 
          | 
                ) {
  | 
      
      
         | 4133 | 
          | 
          | 
                 float_raise( float_flag_invalid );
  | 
      
      
         | 4134 | 
          | 
          | 
                 return 0;
  | 
      
      
         | 4135 | 
          | 
          | 
             }
  | 
      
      
         | 4136 | 
          | 
          | 
             aSign = extractFloat64Sign( a );
  | 
      
      
         | 4137 | 
          | 
          | 
             bSign = extractFloat64Sign( b );
  | 
      
      
         | 4138 | 
          | 
          | 
             if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
  | 
      
      
         | 4139 | 
          | 
          | 
             return ( a != b ) && ( aSign ^ ( a < b ) );
  | 
      
      
         | 4140 | 
          | 
          | 
          
  | 
      
      
         | 4141 | 
          | 
          | 
         }
  | 
      
      
         | 4142 | 
          | 
          | 
          
  | 
      
      
         | 4143 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 4144 | 
          | 
          | 
         | Returns 1 if the double-precision floating-point value `a' is equal to the
  | 
      
      
         | 4145 | 
          | 
          | 
         | corresponding value `b', and 0 otherwise.  The invalid exception is raised
  | 
      
      
         | 4146 | 
          | 
          | 
         | if either operand is a NaN.  Otherwise, the comparison is performed
  | 
      
      
         | 4147 | 
          | 
          | 
         | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  | 
      
      
         | 4148 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 4149 | 
          | 
          | 
          
  | 
      
      
         | 4150 | 
          | 
          | 
         flag float64_eq_signaling( float64 a, float64 b )
  | 
      
      
         | 4151 | 
          | 
          | 
         {
  | 
      
      
         | 4152 | 
          | 
          | 
          
  | 
      
      
         | 4153 | 
          | 
          | 
             if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
  | 
      
      
         | 4154 | 
          | 
          | 
                  || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
  | 
      
      
         | 4155 | 
          | 
          | 
                ) {
  | 
      
      
         | 4156 | 
          | 
          | 
                 float_raise( float_flag_invalid );
  | 
      
      
         | 4157 | 
          | 
          | 
                 return 0;
  | 
      
      
         | 4158 | 
          | 
          | 
             }
  | 
      
      
         | 4159 | 
          | 
          | 
             return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
  | 
      
      
         | 4160 | 
          | 
          | 
          
  | 
      
      
         | 4161 | 
          | 
          | 
         }
  | 
      
      
         | 4162 | 
          | 
          | 
          
  | 
      
      
         | 4163 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 4164 | 
          | 
          | 
         | Returns 1 if the double-precision floating-point value `a' is less than or
  | 
      
      
         | 4165 | 
          | 
          | 
         | equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
  | 
      
      
         | 4166 | 
          | 
          | 
         | cause an exception.  Otherwise, the comparison is performed according to the
  | 
      
      
         | 4167 | 
          | 
          | 
         | IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  | 
      
      
         | 4168 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 4169 | 
          | 
          | 
          
  | 
      
      
         | 4170 | 
          | 
          | 
         flag float64_le_quiet( float64 a, float64 b )
  | 
      
      
         | 4171 | 
          | 
          | 
         {
  | 
      
      
         | 4172 | 
          | 
          | 
             flag aSign, bSign;
  | 
      
      
         | 4173 | 
          | 
          | 
             //int16 aExp, bExp; // Unused variable
  | 
      
      
         | 4174 | 
          | 
          | 
          
  | 
      
      
         | 4175 | 
          | 
          | 
             if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
  | 
      
      
         | 4176 | 
          | 
          | 
                  || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
  | 
      
      
         | 4177 | 
          | 
          | 
                ) {
  | 
      
      
         | 4178 | 
          | 
          | 
                 if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
  | 
      
      
         | 4179 | 
          | 
          | 
                     float_raise( float_flag_invalid );
  | 
      
      
         | 4180 | 
          | 
          | 
                 }
  | 
      
      
         | 4181 | 
          | 
          | 
                 return 0;
  | 
      
      
         | 4182 | 
          | 
          | 
             }
  | 
      
      
         | 4183 | 
          | 
          | 
             aSign = extractFloat64Sign( a );
  | 
      
      
         | 4184 | 
          | 
          | 
             bSign = extractFloat64Sign( b );
  | 
      
      
         | 4185 | 
          | 
          | 
             if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
  | 
      
      
         | 4186 | 
          | 
          | 
             return ( a == b ) || ( aSign ^ ( a < b ) );
  | 
      
      
         | 4187 | 
          | 
          | 
          
  | 
      
      
         | 4188 | 
          | 
          | 
         }
  | 
      
      
         | 4189 | 
          | 
          | 
          
  | 
      
      
         | 4190 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 4191 | 
          | 
          | 
         | Returns 1 if the double-precision floating-point value `a' is less than
  | 
      
      
         | 4192 | 
          | 
          | 
         | the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
  | 
      
      
         | 4193 | 
          | 
          | 
         | exception.  Otherwise, the comparison is performed according to the IEC/IEEE
  | 
      
      
         | 4194 | 
          | 
          | 
         | Standard for Binary Floating-Point Arithmetic.
  | 
      
      
         | 4195 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 4196 | 
          | 
          | 
          
  | 
      
      
         | 4197 | 
          | 
          | 
         flag float64_lt_quiet( float64 a, float64 b )
  | 
      
      
         | 4198 | 
          | 
          | 
         {
  | 
      
      
         | 4199 | 
          | 
          | 
             flag aSign, bSign;
  | 
      
      
         | 4200 | 
          | 
          | 
          
  | 
      
      
         | 4201 | 
          | 
          | 
             if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
  | 
      
      
         | 4202 | 
          | 
          | 
                  || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
  | 
      
      
         | 4203 | 
          | 
          | 
                ) {
  | 
      
      
         | 4204 | 
          | 
          | 
                 if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
  | 
      
      
         | 4205 | 
          | 
          | 
                     float_raise( float_flag_invalid );
  | 
      
      
         | 4206 | 
          | 
          | 
                 }
  | 
      
      
         | 4207 | 
          | 
          | 
                 return 0;
  | 
      
      
         | 4208 | 
          | 
          | 
             }
  | 
      
      
         | 4209 | 
          | 
          | 
             aSign = extractFloat64Sign( a );
  | 
      
      
         | 4210 | 
          | 
          | 
             bSign = extractFloat64Sign( b );
  | 
      
      
         | 4211 | 
          | 
          | 
             if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
  | 
      
      
         | 4212 | 
          | 
          | 
             return ( a != b ) && ( aSign ^ ( a < b ) );
  | 
      
      
         | 4213 | 
          | 
          | 
          
  | 
      
      
         | 4214 | 
          | 
          | 
         }
  | 
      
      
         | 4215 | 
          | 
          | 
          
  | 
      
      
         | 4216 | 
          | 
          | 
         #ifdef FLOATX80
  | 
      
      
         | 4217 | 
          | 
          | 
          
  | 
      
      
         | 4218 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 4219 | 
          | 
          | 
         | Returns the result of converting the extended double-precision floating-
  | 
      
      
         | 4220 | 
          | 
          | 
         | point value `a' to the 32-bit two's complement integer format.  The
  | 
      
      
         | 4221 | 
          | 
          | 
         | conversion is performed according to the IEC/IEEE Standard for Binary
  | 
      
      
         | 4222 | 
          | 
          | 
         | Floating-Point Arithmetic---which means in particular that the conversion
  | 
      
      
         | 4223 | 
          | 
          | 
         | is rounded according to the current rounding mode.  If `a' is a NaN, the
  | 
      
      
         | 4224 | 
          | 
          | 
         | largest positive integer is returned.  Otherwise, if the conversion
  | 
      
      
         | 4225 | 
          | 
          | 
         | overflows, the largest integer with the same sign as `a' is returned.
  | 
      
      
         | 4226 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 4227 | 
          | 
          | 
          
  | 
      
      
         | 4228 | 
          | 
          | 
         int32 floatx80_to_int32( floatx80 a )
  | 
      
      
         | 4229 | 
          | 
          | 
         {
  | 
      
      
         | 4230 | 
          | 
          | 
             flag aSign;
  | 
      
      
         | 4231 | 
          | 
          | 
             int32 aExp, shiftCount;
  | 
      
      
         | 4232 | 
          | 
          | 
             bits64 aSig;
  | 
      
      
         | 4233 | 
          | 
          | 
          
  | 
      
      
         | 4234 | 
          | 
          | 
             aSig = extractFloatx80Frac( a );
  | 
      
      
         | 4235 | 
          | 
          | 
             aExp = extractFloatx80Exp( a );
  | 
      
      
         | 4236 | 
          | 
          | 
             aSign = extractFloatx80Sign( a );
  | 
      
      
         | 4237 | 
          | 
          | 
             if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
  | 
      
      
         | 4238 | 
          | 
          | 
             shiftCount = 0x4037 - aExp;
  | 
      
      
         | 4239 | 
          | 
          | 
             if ( shiftCount <= 0 ) shiftCount = 1;
  | 
      
      
         | 4240 | 
          | 
          | 
             shift64RightJamming( aSig, shiftCount, &aSig );
  | 
      
      
         | 4241 | 
          | 
          | 
             return roundAndPackInt32( aSign, aSig );
  | 
      
      
         | 4242 | 
          | 
          | 
          
  | 
      
      
         | 4243 | 
          | 
          | 
         }
  | 
      
      
         | 4244 | 
          | 
          | 
          
  | 
      
      
         | 4245 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 4246 | 
          | 
          | 
         | Returns the result of converting the extended double-precision floating-
  | 
      
      
         | 4247 | 
          | 
          | 
         | point value `a' to the 32-bit two's complement integer format.  The
  | 
      
      
         | 4248 | 
          | 
          | 
         | conversion is performed according to the IEC/IEEE Standard for Binary
  | 
      
      
         | 4249 | 
          | 
          | 
         | Floating-Point Arithmetic, except that the conversion is always rounded
  | 
      
      
         | 4250 | 
          | 
          | 
         | toward zero.  If `a' is a NaN, the largest positive integer is returned.
  | 
      
      
         | 4251 | 
          | 
          | 
         | Otherwise, if the conversion overflows, the largest integer with the same
  | 
      
      
         | 4252 | 
          | 
          | 
         | sign as `a' is returned.
  | 
      
      
         | 4253 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 4254 | 
          | 
          | 
          
  | 
      
      
         | 4255 | 
          | 
          | 
         int32 floatx80_to_int32_round_to_zero( floatx80 a )
  | 
      
      
         | 4256 | 
          | 
          | 
         {
  | 
      
      
         | 4257 | 
          | 
          | 
             flag aSign;
  | 
      
      
         | 4258 | 
          | 
          | 
             int32 aExp, shiftCount;
  | 
      
      
         | 4259 | 
          | 
          | 
             bits64 aSig, savedASig;
  | 
      
      
         | 4260 | 
          | 
          | 
             int32 z;
  | 
      
      
         | 4261 | 
          | 
          | 
          
  | 
      
      
         | 4262 | 
          | 
          | 
             aSig = extractFloatx80Frac( a );
  | 
      
      
         | 4263 | 
          | 
          | 
             aExp = extractFloatx80Exp( a );
  | 
      
      
         | 4264 | 
          | 
          | 
             aSign = extractFloatx80Sign( a );
  | 
      
      
         | 4265 | 
          | 
          | 
             if ( 0x401E < aExp ) {
  | 
      
      
         | 4266 | 
          | 
          | 
                 if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
  | 
      
      
         | 4267 | 
          | 
          | 
                 goto invalid;
  | 
      
      
         | 4268 | 
          | 
          | 
             }
  | 
      
      
         | 4269 | 
          | 
          | 
             else if ( aExp < 0x3FFF ) {
  | 
      
      
         | 4270 | 
          | 
          | 
                 if ( aExp || aSig ) float_exception_flags |= float_flag_inexact;
  | 
      
      
         | 4271 | 
          | 
          | 
                 return 0;
  | 
      
      
         | 4272 | 
          | 
          | 
             }
  | 
      
      
         | 4273 | 
          | 
          | 
             shiftCount = 0x403E - aExp;
  | 
      
      
         | 4274 | 
          | 
          | 
             savedASig = aSig;
  | 
      
      
         | 4275 | 
          | 
          | 
             aSig >>= shiftCount;
  | 
      
      
         | 4276 | 
          | 
          | 
             z = aSig;
  | 
      
      
         | 4277 | 
          | 
          | 
             if ( aSign ) z = - z;
  | 
      
      
         | 4278 | 
          | 
          | 
             if ( ( z < 0 ) ^ aSign ) {
  | 
      
      
         | 4279 | 
          | 
          | 
          invalid:
  | 
      
      
         | 4280 | 
          | 
          | 
                 float_raise( float_flag_invalid );
  | 
      
      
         | 4281 | 
          | 
          | 
                 return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
  | 
      
      
         | 4282 | 
          | 
          | 
             }
  | 
      
      
         | 4283 | 
          | 
          | 
             if ( ( aSig<<shiftCount ) != savedASig ) {
  | 
      
      
         | 4284 | 
          | 
          | 
                 float_exception_flags |= float_flag_inexact;
  | 
      
      
         | 4285 | 
          | 
          | 
             }
  | 
      
      
         | 4286 | 
          | 
          | 
             return z;
  | 
      
      
         | 4287 | 
          | 
          | 
          
  | 
      
      
         | 4288 | 
          | 
          | 
         }
  | 
      
      
         | 4289 | 
          | 
          | 
          
  | 
      
      
         | 4290 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 4291 | 
          | 
          | 
         | Returns the result of converting the extended double-precision floating-
  | 
      
      
         | 4292 | 
          | 
          | 
         | point value `a' to the 64-bit two's complement integer format.  The
  | 
      
      
         | 4293 | 
          | 
          | 
         | conversion is performed according to the IEC/IEEE Standard for Binary
  | 
      
      
         | 4294 | 
          | 
          | 
         | Floating-Point Arithmetic---which means in particular that the conversion
  | 
      
      
         | 4295 | 
          | 
          | 
         | is rounded according to the current rounding mode.  If `a' is a NaN,
  | 
      
      
         | 4296 | 
          | 
          | 
         | the largest positive integer is returned.  Otherwise, if the conversion
  | 
      
      
         | 4297 | 
          | 
          | 
         | overflows, the largest integer with the same sign as `a' is returned.
  | 
      
      
         | 4298 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 4299 | 
          | 
          | 
          
  | 
      
      
         | 4300 | 
          | 
          | 
         int64 floatx80_to_int64( floatx80 a )
  | 
      
      
         | 4301 | 
          | 
          | 
         {
  | 
      
      
         | 4302 | 
          | 
          | 
             flag aSign;
  | 
      
      
         | 4303 | 
          | 
          | 
             int32 aExp, shiftCount;
  | 
      
      
         | 4304 | 
          | 
          | 
             bits64 aSig, aSigExtra;
  | 
      
      
         | 4305 | 
          | 
          | 
          
  | 
      
      
         | 4306 | 
          | 
          | 
             aSig = extractFloatx80Frac( a );
  | 
      
      
         | 4307 | 
          | 
          | 
             aExp = extractFloatx80Exp( a );
  | 
      
      
         | 4308 | 
          | 
          | 
             aSign = extractFloatx80Sign( a );
  | 
      
      
         | 4309 | 
          | 
          | 
             shiftCount = 0x403E - aExp;
  | 
      
      
         | 4310 | 
          | 
          | 
             if ( shiftCount <= 0 ) {
  | 
      
      
         | 4311 | 
          | 
          | 
                 if ( shiftCount ) {
  | 
      
      
         | 4312 | 
          | 
          | 
                     float_raise( float_flag_invalid );
  | 
      
      
         | 4313 | 
          | 
          | 
                     if (    ! aSign
  | 
      
      
         | 4314 | 
          | 
          | 
                          || (    ( aExp == 0x7FFF )
  | 
      
      
         | 4315 | 
          | 
          | 
                               && ( aSig != LIT64( 0x8000000000000000 ) ) )
  | 
      
      
         | 4316 | 
          | 
          | 
                        ) {
  | 
      
      
         | 4317 | 
          | 
          | 
                         return LIT64( 0x7FFFFFFFFFFFFFFF );
  | 
      
      
         | 4318 | 
          | 
          | 
                     }
  | 
      
      
         | 4319 | 
          | 
          | 
                     return (sbits64) LIT64( 0x8000000000000000 );
  | 
      
      
         | 4320 | 
          | 
          | 
                 }
  | 
      
      
         | 4321 | 
          | 
          | 
                 aSigExtra = 0;
  | 
      
      
         | 4322 | 
          | 
          | 
             }
  | 
      
      
         | 4323 | 
          | 
          | 
             else {
  | 
      
      
         | 4324 | 
          | 
          | 
                 shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
  | 
      
      
         | 4325 | 
          | 
          | 
             }
  | 
      
      
         | 4326 | 
          | 
          | 
             return roundAndPackInt64( aSign, aSig, aSigExtra );
  | 
      
      
         | 4327 | 
          | 
          | 
          
  | 
      
      
         | 4328 | 
          | 
          | 
         }
  | 
      
      
         | 4329 | 
          | 
          | 
          
  | 
      
      
         | 4330 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 4331 | 
          | 
          | 
         | Returns the result of converting the extended double-precision floating-
  | 
      
      
         | 4332 | 
          | 
          | 
         | point value `a' to the 64-bit two's complement integer format.  The
  | 
      
      
         | 4333 | 
          | 
          | 
         | conversion is performed according to the IEC/IEEE Standard for Binary
  | 
      
      
         | 4334 | 
          | 
          | 
         | Floating-Point Arithmetic, except that the conversion is always rounded
  | 
      
      
         | 4335 | 
          | 
          | 
         | toward zero.  If `a' is a NaN, the largest positive integer is returned.
  | 
      
      
         | 4336 | 
          | 
          | 
         | Otherwise, if the conversion overflows, the largest integer with the same
  | 
      
      
         | 4337 | 
          | 
          | 
         | sign as `a' is returned.
  | 
      
      
         | 4338 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 4339 | 
          | 
          | 
          
  | 
      
      
         | 4340 | 
          | 
          | 
         int64 floatx80_to_int64_round_to_zero( floatx80 a )
  | 
      
      
         | 4341 | 
          | 
          | 
         {
  | 
      
      
         | 4342 | 
          | 
          | 
             flag aSign;
  | 
      
      
         | 4343 | 
          | 
          | 
             int32 aExp, shiftCount;
  | 
      
      
         | 4344 | 
          | 
          | 
             bits64 aSig;
  | 
      
      
         | 4345 | 
          | 
          | 
             int64 z;
  | 
      
      
         | 4346 | 
          | 
          | 
          
  | 
      
      
         | 4347 | 
          | 
          | 
             aSig = extractFloatx80Frac( a );
  | 
      
      
         | 4348 | 
          | 
          | 
             aExp = extractFloatx80Exp( a );
  | 
      
      
         | 4349 | 
          | 
          | 
             aSign = extractFloatx80Sign( a );
  | 
      
      
         | 4350 | 
          | 
          | 
             shiftCount = aExp - 0x403E;
  | 
      
      
         | 4351 | 
          | 
          | 
             if ( 0 <= shiftCount ) {
  | 
      
      
         | 4352 | 
          | 
          | 
                 aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
  | 
      
      
         | 4353 | 
          | 
          | 
                 if ( ( a.high != 0xC03E ) || aSig ) {
  | 
      
      
         | 4354 | 
          | 
          | 
                     float_raise( float_flag_invalid );
  | 
      
      
         | 4355 | 
          | 
          | 
                     if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
  | 
      
      
         | 4356 | 
          | 
          | 
                         return LIT64( 0x7FFFFFFFFFFFFFFF );
  | 
      
      
         | 4357 | 
          | 
          | 
                     }
  | 
      
      
         | 4358 | 
          | 
          | 
                 }
  | 
      
      
         | 4359 | 
          | 
          | 
                 return (sbits64) LIT64( 0x8000000000000000 );
  | 
      
      
         | 4360 | 
          | 
          | 
             }
  | 
      
      
         | 4361 | 
          | 
          | 
             else if ( aExp < 0x3FFF ) {
  | 
      
      
         | 4362 | 
          | 
          | 
                 if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
  | 
      
      
         | 4363 | 
          | 
          | 
                 return 0;
  | 
      
      
         | 4364 | 
          | 
          | 
             }
  | 
      
      
         | 4365 | 
          | 
          | 
             z = aSig>>( - shiftCount );
  | 
      
      
         | 4366 | 
          | 
          | 
             if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
  | 
      
      
         | 4367 | 
          | 
          | 
                 float_exception_flags |= float_flag_inexact;
  | 
      
      
         | 4368 | 
          | 
          | 
             }
  | 
      
      
         | 4369 | 
          | 
          | 
             if ( aSign ) z = - z;
  | 
      
      
         | 4370 | 
          | 
          | 
             return z;
  | 
      
      
         | 4371 | 
          | 
          | 
          
  | 
      
      
         | 4372 | 
          | 
          | 
         }
  | 
      
      
         | 4373 | 
          | 
          | 
          
  | 
      
      
         | 4374 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 4375 | 
          | 
          | 
         | Returns the result of converting the extended double-precision floating-
  | 
      
      
         | 4376 | 
          | 
          | 
         | point value `a' to the single-precision floating-point format.  The
  | 
      
      
         | 4377 | 
          | 
          | 
         | conversion is performed according to the IEC/IEEE Standard for Binary
  | 
      
      
         | 4378 | 
          | 
          | 
         | Floating-Point Arithmetic.
  | 
      
      
         | 4379 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 4380 | 
          | 
          | 
          
  | 
      
      
         | 4381 | 
          | 
          | 
         float32 floatx80_to_float32( floatx80 a )
  | 
      
      
         | 4382 | 
          | 
          | 
         {
  | 
      
      
         | 4383 | 
          | 
          | 
             flag aSign;
  | 
      
      
         | 4384 | 
          | 
          | 
             int32 aExp;
  | 
      
      
         | 4385 | 
          | 
          | 
             bits64 aSig;
  | 
      
      
         | 4386 | 
          | 
          | 
          
  | 
      
      
         | 4387 | 
          | 
          | 
             aSig = extractFloatx80Frac( a );
  | 
      
      
         | 4388 | 
          | 
          | 
             aExp = extractFloatx80Exp( a );
  | 
      
      
         | 4389 | 
          | 
          | 
             aSign = extractFloatx80Sign( a );
  | 
      
      
         | 4390 | 
          | 
          | 
             if ( aExp == 0x7FFF ) {
  | 
      
      
         | 4391 | 
          | 
          | 
                 if ( (bits64) ( aSig<<1 ) ) {
  | 
      
      
         | 4392 | 
          | 
          | 
                     return commonNaNToFloat32( floatx80ToCommonNaN( a ) );
  | 
      
      
         | 4393 | 
          | 
          | 
                 }
  | 
      
      
         | 4394 | 
          | 
          | 
                 return packFloat32( aSign, 0xFF, 0 );
  | 
      
      
         | 4395 | 
          | 
          | 
             }
  | 
      
      
         | 4396 | 
          | 
          | 
             shift64RightJamming( aSig, 33, &aSig );
  | 
      
      
         | 4397 | 
          | 
          | 
             if ( aExp || aSig ) aExp -= 0x3F81;
  | 
      
      
         | 4398 | 
          | 
          | 
             return roundAndPackFloat32( aSign, aExp, aSig );
  | 
      
      
         | 4399 | 
          | 
          | 
          
  | 
      
      
         | 4400 | 
          | 
          | 
         }
  | 
      
      
         | 4401 | 
          | 
          | 
          
  | 
      
      
         | 4402 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 4403 | 
          | 
          | 
         | Returns the result of converting the extended double-precision floating-
  | 
      
      
         | 4404 | 
          | 
          | 
         | point value `a' to the double-precision floating-point format.  The
  | 
      
      
         | 4405 | 
          | 
          | 
         | conversion is performed according to the IEC/IEEE Standard for Binary
  | 
      
      
         | 4406 | 
          | 
          | 
         | Floating-Point Arithmetic.
  | 
      
      
         | 4407 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 4408 | 
          | 
          | 
          
  | 
      
      
         | 4409 | 
          | 
          | 
         float64 floatx80_to_float64( floatx80 a )
  | 
      
      
         | 4410 | 
          | 
          | 
         {
  | 
      
      
         | 4411 | 
          | 
          | 
             flag aSign;
  | 
      
      
         | 4412 | 
          | 
          | 
             int32 aExp;
  | 
      
      
         | 4413 | 
          | 
          | 
             bits64 aSig, zSig;
  | 
      
      
         | 4414 | 
          | 
          | 
          
  | 
      
      
         | 4415 | 
          | 
          | 
             aSig = extractFloatx80Frac( a );
  | 
      
      
         | 4416 | 
          | 
          | 
             aExp = extractFloatx80Exp( a );
  | 
      
      
         | 4417 | 
          | 
          | 
             aSign = extractFloatx80Sign( a );
  | 
      
      
         | 4418 | 
          | 
          | 
             if ( aExp == 0x7FFF ) {
  | 
      
      
         | 4419 | 
          | 
          | 
                 if ( (bits64) ( aSig<<1 ) ) {
  | 
      
      
         | 4420 | 
          | 
          | 
                     return commonNaNToFloat64( floatx80ToCommonNaN( a ) );
  | 
      
      
         | 4421 | 
          | 
          | 
                 }
  | 
      
      
         | 4422 | 
          | 
          | 
                 return packFloat64( aSign, 0x7FF, 0 );
  | 
      
      
         | 4423 | 
          | 
          | 
             }
  | 
      
      
         | 4424 | 
          | 
          | 
             shift64RightJamming( aSig, 1, &zSig );
  | 
      
      
         | 4425 | 
          | 
          | 
             if ( aExp || aSig ) aExp -= 0x3C01;
  | 
      
      
         | 4426 | 
          | 
          | 
             return roundAndPackFloat64( aSign, aExp, zSig );
  | 
      
      
         | 4427 | 
          | 
          | 
          
  | 
      
      
         | 4428 | 
          | 
          | 
         }
  | 
      
      
         | 4429 | 
          | 
          | 
          
  | 
      
      
         | 4430 | 
          | 
          | 
         #ifdef FLOAT128
  | 
      
      
         | 4431 | 
          | 
          | 
          
  | 
      
      
         | 4432 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 4433 | 
          | 
          | 
         | Returns the result of converting the extended double-precision floating-
  | 
      
      
         | 4434 | 
          | 
          | 
         | point value `a' to the quadruple-precision floating-point format.  The
  | 
      
      
         | 4435 | 
          | 
          | 
         | conversion is performed according to the IEC/IEEE Standard for Binary
  | 
      
      
         | 4436 | 
          | 
          | 
         | Floating-Point Arithmetic.
  | 
      
      
         | 4437 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 4438 | 
          | 
          | 
          
  | 
      
      
         | 4439 | 
          | 
          | 
         float128 floatx80_to_float128( floatx80 a )
  | 
      
      
         | 4440 | 
          | 
          | 
         {
  | 
      
      
         | 4441 | 
          | 
          | 
             flag aSign;
  | 
      
      
         | 4442 | 
          | 
          | 
             int16 aExp;
  | 
      
      
         | 4443 | 
          | 
          | 
             bits64 aSig, zSig0, zSig1;
  | 
      
      
         | 4444 | 
          | 
          | 
          
  | 
      
      
         | 4445 | 
          | 
          | 
             aSig = extractFloatx80Frac( a );
  | 
      
      
         | 4446 | 
          | 
          | 
             aExp = extractFloatx80Exp( a );
  | 
      
      
         | 4447 | 
          | 
          | 
             aSign = extractFloatx80Sign( a );
  | 
      
      
         | 4448 | 
          | 
          | 
             if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) {
  | 
      
      
         | 4449 | 
          | 
          | 
                 return commonNaNToFloat128( floatx80ToCommonNaN( a ) );
  | 
      
      
         | 4450 | 
          | 
          | 
             }
  | 
      
      
         | 4451 | 
          | 
          | 
             shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
  | 
      
      
         | 4452 | 
          | 
          | 
             return packFloat128( aSign, aExp, zSig0, zSig1 );
  | 
      
      
         | 4453 | 
          | 
          | 
          
  | 
      
      
         | 4454 | 
          | 
          | 
         }
  | 
      
      
         | 4455 | 
          | 
          | 
          
  | 
      
      
         | 4456 | 
          | 
          | 
         #endif
  | 
      
      
         | 4457 | 
          | 
          | 
          
  | 
      
      
         | 4458 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 4459 | 
          | 
          | 
         | Rounds the extended double-precision floating-point value `a' to an integer,
  | 
      
      
         | 4460 | 
          | 
          | 
         | and returns the result as an extended quadruple-precision floating-point
  | 
      
      
         | 4461 | 
          | 
          | 
         | value.  The operation is performed according to the IEC/IEEE Standard for
  | 
      
      
         | 4462 | 
          | 
          | 
         | Binary Floating-Point Arithmetic.
  | 
      
      
         | 4463 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 4464 | 
          | 
          | 
          
  | 
      
      
         | 4465 | 
          | 
          | 
         floatx80 floatx80_round_to_int( floatx80 a )
  | 
      
      
         | 4466 | 
          | 
          | 
         {
  | 
      
      
         | 4467 | 
          | 
          | 
             flag aSign;
  | 
      
      
         | 4468 | 
          | 
          | 
             int32 aExp;
  | 
      
      
         | 4469 | 
          | 
          | 
             bits64 lastBitMask, roundBitsMask;
  | 
      
      
         | 4470 | 
          | 
          | 
             int8 roundingMode;
  | 
      
      
         | 4471 | 
          | 
          | 
             floatx80 z;
  | 
      
      
         | 4472 | 
          | 
          | 
          
  | 
      
      
         | 4473 | 
          | 
          | 
             aExp = extractFloatx80Exp( a );
  | 
      
      
         | 4474 | 
          | 
          | 
             if ( 0x403E <= aExp ) {
  | 
      
      
         | 4475 | 
          | 
          | 
                 if ( ( aExp == 0x7FFF ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) {
  | 
      
      
         | 4476 | 
          | 
          | 
                     return propagateFloatx80NaN( a, a );
  | 
      
      
         | 4477 | 
          | 
          | 
                 }
  | 
      
      
         | 4478 | 
          | 
          | 
                 return a;
  | 
      
      
         | 4479 | 
          | 
          | 
             }
  | 
      
      
         | 4480 | 
          | 
          | 
             if ( aExp < 0x3FFF ) {
  | 
      
      
         | 4481 | 
          | 
          | 
                 if (    ( aExp == 0 )
  | 
      
      
         | 4482 | 
          | 
          | 
                      && ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
  | 
      
      
         | 4483 | 
          | 
          | 
                     return a;
  | 
      
      
         | 4484 | 
          | 
          | 
                 }
  | 
      
      
         | 4485 | 
          | 
          | 
                 float_exception_flags |= float_flag_inexact;
  | 
      
      
         | 4486 | 
          | 
          | 
                 aSign = extractFloatx80Sign( a );
  | 
      
      
         | 4487 | 
          | 
          | 
                 switch ( float_rounding_mode ) {
  | 
      
      
         | 4488 | 
          | 
          | 
                  case float_round_nearest_even:
  | 
      
      
         | 4489 | 
          | 
          | 
                     if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 )
  | 
      
      
         | 4490 | 
          | 
          | 
                        ) {
  | 
      
      
         | 4491 | 
          | 
          | 
                         return
  | 
      
      
         | 4492 | 
          | 
          | 
                             packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
  | 
      
      
         | 4493 | 
          | 
          | 
                     }
  | 
      
      
         | 4494 | 
          | 
          | 
                     break;
  | 
      
      
         | 4495 | 
          | 
          | 
                  case float_round_down:
  | 
      
      
         | 4496 | 
          | 
          | 
                     return
  | 
      
      
         | 4497 | 
          | 
          | 
                           aSign ?
  | 
      
      
         | 4498 | 
          | 
          | 
                               packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
  | 
      
      
         | 4499 | 
          | 
          | 
                         : packFloatx80( 0, 0, 0 );
  | 
      
      
         | 4500 | 
          | 
          | 
                  case float_round_up:
  | 
      
      
         | 4501 | 
          | 
          | 
                     return
  | 
      
      
         | 4502 | 
          | 
          | 
                           aSign ? packFloatx80( 1, 0, 0 )
  | 
      
      
         | 4503 | 
          | 
          | 
                         : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
  | 
      
      
         | 4504 | 
          | 
          | 
                 }
  | 
      
      
         | 4505 | 
          | 
          | 
                 return packFloatx80( aSign, 0, 0 );
  | 
      
      
         | 4506 | 
          | 
          | 
             }
  | 
      
      
         | 4507 | 
          | 
          | 
             lastBitMask = 1;
  | 
      
      
         | 4508 | 
          | 
          | 
             lastBitMask <<= 0x403E - aExp;
  | 
      
      
         | 4509 | 
          | 
          | 
             roundBitsMask = lastBitMask - 1;
  | 
      
      
         | 4510 | 
          | 
          | 
             z = a;
  | 
      
      
         | 4511 | 
          | 
          | 
             roundingMode = float_rounding_mode;
  | 
      
      
         | 4512 | 
          | 
          | 
             if ( roundingMode == float_round_nearest_even ) {
  | 
      
      
         | 4513 | 
          | 
          | 
                 z.low += lastBitMask>>1;
  | 
      
      
         | 4514 | 
          | 
          | 
                 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
  | 
      
      
         | 4515 | 
          | 
          | 
             }
  | 
      
      
         | 4516 | 
          | 
          | 
             else if ( roundingMode != float_round_to_zero ) {
  | 
      
      
         | 4517 | 
          | 
          | 
                 if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {
  | 
      
      
         | 4518 | 
          | 
          | 
                     z.low += roundBitsMask;
  | 
      
      
         | 4519 | 
          | 
          | 
                 }
  | 
      
      
         | 4520 | 
          | 
          | 
             }
  | 
      
      
         | 4521 | 
          | 
          | 
             z.low &= ~ roundBitsMask;
  | 
      
      
         | 4522 | 
          | 
          | 
             if ( z.low == 0 ) {
  | 
      
      
         | 4523 | 
          | 
          | 
                 ++z.high;
  | 
      
      
         | 4524 | 
          | 
          | 
                 z.low = LIT64( 0x8000000000000000 );
  | 
      
      
         | 4525 | 
          | 
          | 
             }
  | 
      
      
         | 4526 | 
          | 
          | 
             if ( z.low != a.low ) float_exception_flags |= float_flag_inexact;
  | 
      
      
         | 4527 | 
          | 
          | 
             return z;
  | 
      
      
         | 4528 | 
          | 
          | 
          
  | 
      
      
         | 4529 | 
          | 
          | 
         }
  | 
      
      
         | 4530 | 
          | 
          | 
          
  | 
      
      
         | 4531 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 4532 | 
          | 
          | 
         | Returns the result of adding the absolute values of the extended double-
  | 
      
      
         | 4533 | 
          | 
          | 
         | precision floating-point values `a' and `b'.  If `zSign' is 1, the sum is
  | 
      
      
         | 4534 | 
          | 
          | 
         | negated before being returned.  `zSign' is ignored if the result is a NaN.
  | 
      
      
         | 4535 | 
          | 
          | 
         | The addition is performed according to the IEC/IEEE Standard for Binary
  | 
      
      
         | 4536 | 
          | 
          | 
         | Floating-Point Arithmetic.
  | 
      
      
         | 4537 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 4538 | 
          | 
          | 
          
  | 
      
      
         | 4539 | 
          | 
          | 
         static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
  | 
      
      
         | 4540 | 
          | 
          | 
         {
  | 
      
      
         | 4541 | 
          | 
          | 
             int32 aExp, bExp, zExp;
  | 
      
      
         | 4542 | 
          | 
          | 
             bits64 aSig, bSig, zSig0, zSig1;
  | 
      
      
         | 4543 | 
          | 
          | 
             int32 expDiff;
  | 
      
      
         | 4544 | 
          | 
          | 
          
  | 
      
      
         | 4545 | 
          | 
          | 
             aSig = extractFloatx80Frac( a );
  | 
      
      
         | 4546 | 
          | 
          | 
             aExp = extractFloatx80Exp( a );
  | 
      
      
         | 4547 | 
          | 
          | 
             bSig = extractFloatx80Frac( b );
  | 
      
      
         | 4548 | 
          | 
          | 
             bExp = extractFloatx80Exp( b );
  | 
      
      
         | 4549 | 
          | 
          | 
             expDiff = aExp - bExp;
  | 
      
      
         | 4550 | 
          | 
          | 
             if ( 0 < expDiff ) {
  | 
      
      
         | 4551 | 
          | 
          | 
                 if ( aExp == 0x7FFF ) {
  | 
      
      
         | 4552 | 
          | 
          | 
                     if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
  | 
      
      
         | 4553 | 
          | 
          | 
                     return a;
  | 
      
      
         | 4554 | 
          | 
          | 
                 }
  | 
      
      
         | 4555 | 
          | 
          | 
                 if ( bExp == 0 ) --expDiff;
  | 
      
      
         | 4556 | 
          | 
          | 
                 shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
  | 
      
      
         | 4557 | 
          | 
          | 
                 zExp = aExp;
  | 
      
      
         | 4558 | 
          | 
          | 
             }
  | 
      
      
         | 4559 | 
          | 
          | 
             else if ( expDiff < 0 ) {
  | 
      
      
         | 4560 | 
          | 
          | 
                 if ( bExp == 0x7FFF ) {
  | 
      
      
         | 4561 | 
          | 
          | 
                     if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
  | 
      
      
         | 4562 | 
          | 
          | 
                     return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
  | 
      
      
         | 4563 | 
          | 
          | 
                 }
  | 
      
      
         | 4564 | 
          | 
          | 
                 if ( aExp == 0 ) ++expDiff;
  | 
      
      
         | 4565 | 
          | 
          | 
                 shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
  | 
      
      
         | 4566 | 
          | 
          | 
                 zExp = bExp;
  | 
      
      
         | 4567 | 
          | 
          | 
             }
  | 
      
      
         | 4568 | 
          | 
          | 
             else {
  | 
      
      
         | 4569 | 
          | 
          | 
                 if ( aExp == 0x7FFF ) {
  | 
      
      
         | 4570 | 
          | 
          | 
                     if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
  | 
      
      
         | 4571 | 
          | 
          | 
                         return propagateFloatx80NaN( a, b );
  | 
      
      
         | 4572 | 
          | 
          | 
                     }
  | 
      
      
         | 4573 | 
          | 
          | 
                     return a;
  | 
      
      
         | 4574 | 
          | 
          | 
                 }
  | 
      
      
         | 4575 | 
          | 
          | 
                 zSig1 = 0;
  | 
      
      
         | 4576 | 
          | 
          | 
                 zSig0 = aSig + bSig;
  | 
      
      
         | 4577 | 
          | 
          | 
                 if ( aExp == 0 ) {
  | 
      
      
         | 4578 | 
          | 
          | 
                     normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
  | 
      
      
         | 4579 | 
          | 
          | 
                     goto roundAndPack;
  | 
      
      
         | 4580 | 
          | 
          | 
                 }
  | 
      
      
         | 4581 | 
          | 
          | 
                 zExp = aExp;
  | 
      
      
         | 4582 | 
          | 
          | 
                 goto shiftRight1;
  | 
      
      
         | 4583 | 
          | 
          | 
             }
  | 
      
      
         | 4584 | 
          | 
          | 
             zSig0 = aSig + bSig;
  | 
      
      
         | 4585 | 
          | 
          | 
             if ( (sbits64) zSig0 < 0 ) goto roundAndPack;
  | 
      
      
         | 4586 | 
          | 
          | 
          shiftRight1:
  | 
      
      
         | 4587 | 
          | 
          | 
             shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
  | 
      
      
         | 4588 | 
          | 
          | 
             zSig0 |= LIT64( 0x8000000000000000 );
  | 
      
      
         | 4589 | 
          | 
          | 
             ++zExp;
  | 
      
      
         | 4590 | 
          | 
          | 
          roundAndPack:
  | 
      
      
         | 4591 | 
          | 
          | 
             return
  | 
      
      
         | 4592 | 
          | 
          | 
                 roundAndPackFloatx80(
  | 
      
      
         | 4593 | 
          | 
          | 
                     floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
  | 
      
      
         | 4594 | 
          | 
          | 
          
  | 
      
      
         | 4595 | 
          | 
          | 
         }
  | 
      
      
         | 4596 | 
          | 
          | 
          
  | 
      
      
         | 4597 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 4598 | 
          | 
          | 
         | Returns the result of subtracting the absolute values of the extended
  | 
      
      
         | 4599 | 
          | 
          | 
         | double-precision floating-point values `a' and `b'.  If `zSign' is 1, the
  | 
      
      
         | 4600 | 
          | 
          | 
         | difference is negated before being returned.  `zSign' is ignored if the
  | 
      
      
         | 4601 | 
          | 
          | 
         | result is a NaN.  The subtraction is performed according to the IEC/IEEE
  | 
      
      
         | 4602 | 
          | 
          | 
         | Standard for Binary Floating-Point Arithmetic.
  | 
      
      
         | 4603 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 4604 | 
          | 
          | 
          
  | 
      
      
         | 4605 | 
          | 
          | 
         static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
  | 
      
      
         | 4606 | 
          | 
          | 
         {
  | 
      
      
         | 4607 | 
          | 
          | 
             int32 aExp, bExp, zExp;
  | 
      
      
         | 4608 | 
          | 
          | 
             bits64 aSig, bSig, zSig0, zSig1;
  | 
      
      
         | 4609 | 
          | 
          | 
             int32 expDiff;
  | 
      
      
         | 4610 | 
          | 
          | 
             floatx80 z;
  | 
      
      
         | 4611 | 
          | 
          | 
          
  | 
      
      
         | 4612 | 
          | 
          | 
             aSig = extractFloatx80Frac( a );
  | 
      
      
         | 4613 | 
          | 
          | 
             aExp = extractFloatx80Exp( a );
  | 
      
      
         | 4614 | 
          | 
          | 
             bSig = extractFloatx80Frac( b );
  | 
      
      
         | 4615 | 
          | 
          | 
             bExp = extractFloatx80Exp( b );
  | 
      
      
         | 4616 | 
          | 
          | 
             expDiff = aExp - bExp;
  | 
      
      
         | 4617 | 
          | 
          | 
             if ( 0 < expDiff ) goto aExpBigger;
  | 
      
      
         | 4618 | 
          | 
          | 
             if ( expDiff < 0 ) goto bExpBigger;
  | 
      
      
         | 4619 | 
          | 
          | 
             if ( aExp == 0x7FFF ) {
  | 
      
      
         | 4620 | 
          | 
          | 
                 if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
  | 
      
      
         | 4621 | 
          | 
          | 
                     return propagateFloatx80NaN( a, b );
  | 
      
      
         | 4622 | 
          | 
          | 
                 }
  | 
      
      
         | 4623 | 
          | 
          | 
                 float_raise( float_flag_invalid );
  | 
      
      
         | 4624 | 
          | 
          | 
                 z.low = floatx80_default_nan_low;
  | 
      
      
         | 4625 | 
          | 
          | 
                 z.high = floatx80_default_nan_high;
  | 
      
      
         | 4626 | 
          | 
          | 
                 return z;
  | 
      
      
         | 4627 | 
          | 
          | 
             }
  | 
      
      
         | 4628 | 
          | 
          | 
             if ( aExp == 0 ) {
  | 
      
      
         | 4629 | 
          | 
          | 
                 aExp = 1;
  | 
      
      
         | 4630 | 
          | 
          | 
                 bExp = 1;
  | 
      
      
         | 4631 | 
          | 
          | 
             }
  | 
      
      
         | 4632 | 
          | 
          | 
             zSig1 = 0;
  | 
      
      
         | 4633 | 
          | 
          | 
             if ( bSig < aSig ) goto aBigger;
  | 
      
      
         | 4634 | 
          | 
          | 
             if ( aSig < bSig ) goto bBigger;
  | 
      
      
         | 4635 | 
          | 
          | 
             return packFloatx80( float_rounding_mode == float_round_down, 0, 0 );
  | 
      
      
         | 4636 | 
          | 
          | 
          bExpBigger:
  | 
      
      
         | 4637 | 
          | 
          | 
             if ( bExp == 0x7FFF ) {
  | 
      
      
         | 4638 | 
          | 
          | 
                 if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
  | 
      
      
         | 4639 | 
          | 
          | 
                 return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
  | 
      
      
         | 4640 | 
          | 
          | 
             }
  | 
      
      
         | 4641 | 
          | 
          | 
             if ( aExp == 0 ) ++expDiff;
  | 
      
      
         | 4642 | 
          | 
          | 
             shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
  | 
      
      
         | 4643 | 
          | 
          | 
          bBigger:
  | 
      
      
         | 4644 | 
          | 
          | 
             sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
  | 
      
      
         | 4645 | 
          | 
          | 
             zExp = bExp;
  | 
      
      
         | 4646 | 
          | 
          | 
             zSign ^= 1;
  | 
      
      
         | 4647 | 
          | 
          | 
             goto normalizeRoundAndPack;
  | 
      
      
         | 4648 | 
          | 
          | 
          aExpBigger:
  | 
      
      
         | 4649 | 
          | 
          | 
             if ( aExp == 0x7FFF ) {
  | 
      
      
         | 4650 | 
          | 
          | 
                 if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
  | 
      
      
         | 4651 | 
          | 
          | 
                 return a;
  | 
      
      
         | 4652 | 
          | 
          | 
             }
  | 
      
      
         | 4653 | 
          | 
          | 
             if ( bExp == 0 ) --expDiff;
  | 
      
      
         | 4654 | 
          | 
          | 
             shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
  | 
      
      
         | 4655 | 
          | 
          | 
          aBigger:
  | 
      
      
         | 4656 | 
          | 
          | 
             sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
  | 
      
      
         | 4657 | 
          | 
          | 
             zExp = aExp;
  | 
      
      
         | 4658 | 
          | 
          | 
          normalizeRoundAndPack:
  | 
      
      
         | 4659 | 
          | 
          | 
             return
  | 
      
      
         | 4660 | 
          | 
          | 
                 normalizeRoundAndPackFloatx80(
  | 
      
      
         | 4661 | 
          | 
          | 
                     floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
  | 
      
      
         | 4662 | 
          | 
          | 
          
  | 
      
      
         | 4663 | 
          | 
          | 
         }
  | 
      
      
         | 4664 | 
          | 
          | 
          
  | 
      
      
         | 4665 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 4666 | 
          | 
          | 
         | Returns the result of adding the extended double-precision floating-point
  | 
      
      
         | 4667 | 
          | 
          | 
         | values `a' and `b'.  The operation is performed according to the IEC/IEEE
  | 
      
      
         | 4668 | 
          | 
          | 
         | Standard for Binary Floating-Point Arithmetic.
  | 
      
      
         | 4669 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 4670 | 
          | 
          | 
          
  | 
      
      
         | 4671 | 
          | 
          | 
         floatx80 floatx80_add( floatx80 a, floatx80 b )
  | 
      
      
         | 4672 | 
          | 
          | 
         {
  | 
      
      
         | 4673 | 
          | 
          | 
             flag aSign, bSign;
  | 
      
      
         | 4674 | 
          | 
          | 
          
  | 
      
      
         | 4675 | 
          | 
          | 
             aSign = extractFloatx80Sign( a );
  | 
      
      
         | 4676 | 
          | 
          | 
             bSign = extractFloatx80Sign( b );
  | 
      
      
         | 4677 | 
          | 
          | 
             if ( aSign == bSign ) {
  | 
      
      
         | 4678 | 
          | 
          | 
                 return addFloatx80Sigs( a, b, aSign );
  | 
      
      
         | 4679 | 
          | 
          | 
             }
  | 
      
      
         | 4680 | 
          | 
          | 
             else {
  | 
      
      
         | 4681 | 
          | 
          | 
                 return subFloatx80Sigs( a, b, aSign );
  | 
      
      
         | 4682 | 
          | 
          | 
             }
  | 
      
      
         | 4683 | 
          | 
          | 
          
  | 
      
      
         | 4684 | 
          | 
          | 
         }
  | 
      
      
         | 4685 | 
          | 
          | 
          
  | 
      
      
         | 4686 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 4687 | 
          | 
          | 
         | Returns the result of subtracting the extended double-precision floating-
  | 
      
      
         | 4688 | 
          | 
          | 
         | point values `a' and `b'.  The operation is performed according to the
  | 
      
      
         | 4689 | 
          | 
          | 
         | IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  | 
      
      
         | 4690 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 4691 | 
          | 
          | 
          
  | 
      
      
         | 4692 | 
          | 
          | 
         floatx80 floatx80_sub( floatx80 a, floatx80 b )
  | 
      
      
         | 4693 | 
          | 
          | 
         {
  | 
      
      
         | 4694 | 
          | 
          | 
             flag aSign, bSign;
  | 
      
      
         | 4695 | 
          | 
          | 
          
  | 
      
      
         | 4696 | 
          | 
          | 
             aSign = extractFloatx80Sign( a );
  | 
      
      
         | 4697 | 
          | 
          | 
             bSign = extractFloatx80Sign( b );
  | 
      
      
         | 4698 | 
          | 
          | 
             if ( aSign == bSign ) {
  | 
      
      
         | 4699 | 
          | 
          | 
                 return subFloatx80Sigs( a, b, aSign );
  | 
      
      
         | 4700 | 
          | 
          | 
             }
  | 
      
      
         | 4701 | 
          | 
          | 
             else {
  | 
      
      
         | 4702 | 
          | 
          | 
                 return addFloatx80Sigs( a, b, aSign );
  | 
      
      
         | 4703 | 
          | 
          | 
             }
  | 
      
      
         | 4704 | 
          | 
          | 
          
  | 
      
      
         | 4705 | 
          | 
          | 
         }
  | 
      
      
         | 4706 | 
          | 
          | 
          
  | 
      
      
         | 4707 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 4708 | 
          | 
          | 
         | Returns the result of multiplying the extended double-precision floating-
  | 
      
      
         | 4709 | 
          | 
          | 
         | point values `a' and `b'.  The operation is performed according to the
  | 
      
      
         | 4710 | 
          | 
          | 
         | IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  | 
      
      
         | 4711 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 4712 | 
          | 
          | 
          
  | 
      
      
         | 4713 | 
          | 
          | 
         floatx80 floatx80_mul( floatx80 a, floatx80 b )
  | 
      
      
         | 4714 | 
          | 
          | 
         {
  | 
      
      
         | 4715 | 
          | 
          | 
             flag aSign, bSign, zSign;
  | 
      
      
         | 4716 | 
          | 
          | 
             int32 aExp, bExp, zExp;
  | 
      
      
         | 4717 | 
          | 
          | 
             bits64 aSig, bSig, zSig0, zSig1;
  | 
      
      
         | 4718 | 
          | 
          | 
             floatx80 z;
  | 
      
      
         | 4719 | 
          | 
          | 
          
  | 
      
      
         | 4720 | 
          | 
          | 
             aSig = extractFloatx80Frac( a );
  | 
      
      
         | 4721 | 
          | 
          | 
             aExp = extractFloatx80Exp( a );
  | 
      
      
         | 4722 | 
          | 
          | 
             aSign = extractFloatx80Sign( a );
  | 
      
      
         | 4723 | 
          | 
          | 
             bSig = extractFloatx80Frac( b );
  | 
      
      
         | 4724 | 
          | 
          | 
             bExp = extractFloatx80Exp( b );
  | 
      
      
         | 4725 | 
          | 
          | 
             bSign = extractFloatx80Sign( b );
  | 
      
      
         | 4726 | 
          | 
          | 
             zSign = aSign ^ bSign;
  | 
      
      
         | 4727 | 
          | 
          | 
             if ( aExp == 0x7FFF ) {
  | 
      
      
         | 4728 | 
          | 
          | 
                 if (    (bits64) ( aSig<<1 )
  | 
      
      
         | 4729 | 
          | 
          | 
                      || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
  | 
      
      
         | 4730 | 
          | 
          | 
                     return propagateFloatx80NaN( a, b );
  | 
      
      
         | 4731 | 
          | 
          | 
                 }
  | 
      
      
         | 4732 | 
          | 
          | 
                 if ( ( bExp | bSig ) == 0 ) goto invalid;
  | 
      
      
         | 4733 | 
          | 
          | 
                 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
  | 
      
      
         | 4734 | 
          | 
          | 
             }
  | 
      
      
         | 4735 | 
          | 
          | 
             if ( bExp == 0x7FFF ) {
  | 
      
      
         | 4736 | 
          | 
          | 
                 if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
  | 
      
      
         | 4737 | 
          | 
          | 
                 if ( ( aExp | aSig ) == 0 ) {
  | 
      
      
         | 4738 | 
          | 
          | 
          invalid:
  | 
      
      
         | 4739 | 
          | 
          | 
                     float_raise( float_flag_invalid );
  | 
      
      
         | 4740 | 
          | 
          | 
                     z.low = floatx80_default_nan_low;
  | 
      
      
         | 4741 | 
          | 
          | 
                     z.high = floatx80_default_nan_high;
  | 
      
      
         | 4742 | 
          | 
          | 
                     return z;
  | 
      
      
         | 4743 | 
          | 
          | 
                 }
  | 
      
      
         | 4744 | 
          | 
          | 
                 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
  | 
      
      
         | 4745 | 
          | 
          | 
             }
  | 
      
      
         | 4746 | 
          | 
          | 
             if ( aExp == 0 ) {
  | 
      
      
         | 4747 | 
          | 
          | 
                 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
  | 
      
      
         | 4748 | 
          | 
          | 
                 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
  | 
      
      
         | 4749 | 
          | 
          | 
             }
  | 
      
      
         | 4750 | 
          | 
          | 
             if ( bExp == 0 ) {
  | 
      
      
         | 4751 | 
          | 
          | 
                 if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
  | 
      
      
         | 4752 | 
          | 
          | 
                 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
  | 
      
      
         | 4753 | 
          | 
          | 
             }
  | 
      
      
         | 4754 | 
          | 
          | 
             zExp = aExp + bExp - 0x3FFE;
  | 
      
      
         | 4755 | 
          | 
          | 
             mul64To128( aSig, bSig, &zSig0, &zSig1 );
  | 
      
      
         | 4756 | 
          | 
          | 
             if ( 0 < (sbits64) zSig0 ) {
  | 
      
      
         | 4757 | 
          | 
          | 
                 shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
  | 
      
      
         | 4758 | 
          | 
          | 
                 --zExp;
  | 
      
      
         | 4759 | 
          | 
          | 
             }
  | 
      
      
         | 4760 | 
          | 
          | 
             return
  | 
      
      
         | 4761 | 
          | 
          | 
                 roundAndPackFloatx80(
  | 
      
      
         | 4762 | 
          | 
          | 
                     floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
  | 
      
      
         | 4763 | 
          | 
          | 
          
  | 
      
      
         | 4764 | 
          | 
          | 
         }
  | 
      
      
         | 4765 | 
          | 
          | 
          
  | 
      
      
         | 4766 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 4767 | 
          | 
          | 
         | Returns the result of dividing the extended double-precision floating-point
  | 
      
      
         | 4768 | 
          | 
          | 
         | value `a' by the corresponding value `b'.  The operation is performed
  | 
      
      
         | 4769 | 
          | 
          | 
         | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  | 
      
      
         | 4770 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 4771 | 
          | 
          | 
          
  | 
      
      
         | 4772 | 
          | 
          | 
         floatx80 floatx80_div( floatx80 a, floatx80 b )
  | 
      
      
         | 4773 | 
          | 
          | 
         {
  | 
      
      
         | 4774 | 
          | 
          | 
             flag aSign, bSign, zSign;
  | 
      
      
         | 4775 | 
          | 
          | 
             int32 aExp, bExp, zExp;
  | 
      
      
         | 4776 | 
          | 
          | 
             bits64 aSig, bSig, zSig0, zSig1;
  | 
      
      
         | 4777 | 
          | 
          | 
             bits64 rem0, rem1, rem2, term0, term1, term2;
  | 
      
      
         | 4778 | 
          | 
          | 
             floatx80 z;
  | 
      
      
         | 4779 | 
          | 
          | 
          
  | 
      
      
         | 4780 | 
          | 
          | 
             aSig = extractFloatx80Frac( a );
  | 
      
      
         | 4781 | 
          | 
          | 
             aExp = extractFloatx80Exp( a );
  | 
      
      
         | 4782 | 
          | 
          | 
             aSign = extractFloatx80Sign( a );
  | 
      
      
         | 4783 | 
          | 
          | 
             bSig = extractFloatx80Frac( b );
  | 
      
      
         | 4784 | 
          | 
          | 
             bExp = extractFloatx80Exp( b );
  | 
      
      
         | 4785 | 
          | 
          | 
             bSign = extractFloatx80Sign( b );
  | 
      
      
         | 4786 | 
          | 
          | 
             zSign = aSign ^ bSign;
  | 
      
      
         | 4787 | 
          | 
          | 
             if ( aExp == 0x7FFF ) {
  | 
      
      
         | 4788 | 
          | 
          | 
                 if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
  | 
      
      
         | 4789 | 
          | 
          | 
                 if ( bExp == 0x7FFF ) {
  | 
      
      
         | 4790 | 
          | 
          | 
                     if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
  | 
      
      
         | 4791 | 
          | 
          | 
                     goto invalid;
  | 
      
      
         | 4792 | 
          | 
          | 
                 }
  | 
      
      
         | 4793 | 
          | 
          | 
                 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
  | 
      
      
         | 4794 | 
          | 
          | 
             }
  | 
      
      
         | 4795 | 
          | 
          | 
             if ( bExp == 0x7FFF ) {
  | 
      
      
         | 4796 | 
          | 
          | 
                 if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
  | 
      
      
         | 4797 | 
          | 
          | 
                 return packFloatx80( zSign, 0, 0 );
  | 
      
      
         | 4798 | 
          | 
          | 
             }
  | 
      
      
         | 4799 | 
          | 
          | 
             if ( bExp == 0 ) {
  | 
      
      
         | 4800 | 
          | 
          | 
                 if ( bSig == 0 ) {
  | 
      
      
         | 4801 | 
          | 
          | 
                     if ( ( aExp | aSig ) == 0 ) {
  | 
      
      
         | 4802 | 
          | 
          | 
          invalid:
  | 
      
      
         | 4803 | 
          | 
          | 
                         float_raise( float_flag_invalid );
  | 
      
      
         | 4804 | 
          | 
          | 
                         z.low = floatx80_default_nan_low;
  | 
      
      
         | 4805 | 
          | 
          | 
                         z.high = floatx80_default_nan_high;
  | 
      
      
         | 4806 | 
          | 
          | 
                         return z;
  | 
      
      
         | 4807 | 
          | 
          | 
                     }
  | 
      
      
         | 4808 | 
          | 
          | 
                     float_raise( float_flag_divbyzero );
  | 
      
      
         | 4809 | 
          | 
          | 
                     return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
  | 
      
      
         | 4810 | 
          | 
          | 
                 }
  | 
      
      
         | 4811 | 
          | 
          | 
                 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
  | 
      
      
         | 4812 | 
          | 
          | 
             }
  | 
      
      
         | 4813 | 
          | 
          | 
             if ( aExp == 0 ) {
  | 
      
      
         | 4814 | 
          | 
          | 
                 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
  | 
      
      
         | 4815 | 
          | 
          | 
                 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
  | 
      
      
         | 4816 | 
          | 
          | 
             }
  | 
      
      
         | 4817 | 
          | 
          | 
             zExp = aExp - bExp + 0x3FFE;
  | 
      
      
         | 4818 | 
          | 
          | 
             rem1 = 0;
  | 
      
      
         | 4819 | 
          | 
          | 
             if ( bSig <= aSig ) {
  | 
      
      
         | 4820 | 
          | 
          | 
                 shift128Right( aSig, 0, 1, &aSig, &rem1 );
  | 
      
      
         | 4821 | 
          | 
          | 
                 ++zExp;
  | 
      
      
         | 4822 | 
          | 
          | 
             }
  | 
      
      
         | 4823 | 
          | 
          | 
             zSig0 = estimateDiv128To64( aSig, rem1, bSig );
  | 
      
      
         | 4824 | 
          | 
          | 
             mul64To128( bSig, zSig0, &term0, &term1 );
  | 
      
      
         | 4825 | 
          | 
          | 
             sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
  | 
      
      
         | 4826 | 
          | 
          | 
             while ( (sbits64) rem0 < 0 ) {
  | 
      
      
         | 4827 | 
          | 
          | 
                 --zSig0;
  | 
      
      
         | 4828 | 
          | 
          | 
                 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
  | 
      
      
         | 4829 | 
          | 
          | 
             }
  | 
      
      
         | 4830 | 
          | 
          | 
             zSig1 = estimateDiv128To64( rem1, 0, bSig );
  | 
      
      
         | 4831 | 
          | 
          | 
             if ( (bits64) ( zSig1<<1 ) <= 8 ) {
  | 
      
      
         | 4832 | 
          | 
          | 
                 mul64To128( bSig, zSig1, &term1, &term2 );
  | 
      
      
         | 4833 | 
          | 
          | 
                 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
  | 
      
      
         | 4834 | 
          | 
          | 
                 while ( (sbits64) rem1 < 0 ) {
  | 
      
      
         | 4835 | 
          | 
          | 
                     --zSig1;
  | 
      
      
         | 4836 | 
          | 
          | 
                     add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
  | 
      
      
         | 4837 | 
          | 
          | 
                 }
  | 
      
      
         | 4838 | 
          | 
          | 
                 zSig1 |= ( ( rem1 | rem2 ) != 0 );
  | 
      
      
         | 4839 | 
          | 
          | 
             }
  | 
      
      
         | 4840 | 
          | 
          | 
             return
  | 
      
      
         | 4841 | 
          | 
          | 
                 roundAndPackFloatx80(
  | 
      
      
         | 4842 | 
          | 
          | 
                     floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
  | 
      
      
         | 4843 | 
          | 
          | 
          
  | 
      
      
         | 4844 | 
          | 
          | 
         }
  | 
      
      
         | 4845 | 
          | 
          | 
          
  | 
      
      
         | 4846 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 4847 | 
          | 
          | 
         | Returns the remainder of the extended double-precision floating-point value
  | 
      
      
         | 4848 | 
          | 
          | 
         | `a' with respect to the corresponding value `b'.  The operation is performed
  | 
      
      
         | 4849 | 
          | 
          | 
         | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  | 
      
      
         | 4850 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 4851 | 
          | 
          | 
          
  | 
      
      
         | 4852 | 
          | 
          | 
         floatx80 floatx80_rem( floatx80 a, floatx80 b )
  | 
      
      
         | 4853 | 
          | 
          | 
         {
  | 
      
      
         | 4854 | 
          | 
          | 
             flag aSign, bSign, zSign;
  | 
      
      
         | 4855 | 
          | 
          | 
             int32 aExp, bExp, expDiff;
  | 
      
      
         | 4856 | 
          | 
          | 
             bits64 aSig0, aSig1, bSig;
  | 
      
      
         | 4857 | 
          | 
          | 
             bits64 q, term0, term1, alternateASig0, alternateASig1;
  | 
      
      
         | 4858 | 
          | 
          | 
             floatx80 z;
  | 
      
      
         | 4859 | 
          | 
          | 
          
  | 
      
      
         | 4860 | 
          | 
          | 
             aSig0 = extractFloatx80Frac( a );
  | 
      
      
         | 4861 | 
          | 
          | 
             aExp = extractFloatx80Exp( a );
  | 
      
      
         | 4862 | 
          | 
          | 
             aSign = extractFloatx80Sign( a );
  | 
      
      
         | 4863 | 
          | 
          | 
             bSig = extractFloatx80Frac( b );
  | 
      
      
         | 4864 | 
          | 
          | 
             bExp = extractFloatx80Exp( b );
  | 
      
      
         | 4865 | 
          | 
          | 
             bSign = extractFloatx80Sign( b );
  | 
      
      
         | 4866 | 
          | 
          | 
             if ( aExp == 0x7FFF ) {
  | 
      
      
         | 4867 | 
          | 
          | 
                 if (    (bits64) ( aSig0<<1 )
  | 
      
      
         | 4868 | 
          | 
          | 
                      || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
  | 
      
      
         | 4869 | 
          | 
          | 
                     return propagateFloatx80NaN( a, b );
  | 
      
      
         | 4870 | 
          | 
          | 
                 }
  | 
      
      
         | 4871 | 
          | 
          | 
                 goto invalid;
  | 
      
      
         | 4872 | 
          | 
          | 
             }
  | 
      
      
         | 4873 | 
          | 
          | 
             if ( bExp == 0x7FFF ) {
  | 
      
      
         | 4874 | 
          | 
          | 
                 if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
  | 
      
      
         | 4875 | 
          | 
          | 
                 return a;
  | 
      
      
         | 4876 | 
          | 
          | 
             }
  | 
      
      
         | 4877 | 
          | 
          | 
             if ( bExp == 0 ) {
  | 
      
      
         | 4878 | 
          | 
          | 
                 if ( bSig == 0 ) {
  | 
      
      
         | 4879 | 
          | 
          | 
          invalid:
  | 
      
      
         | 4880 | 
          | 
          | 
                     float_raise( float_flag_invalid );
  | 
      
      
         | 4881 | 
          | 
          | 
                     z.low = floatx80_default_nan_low;
  | 
      
      
         | 4882 | 
          | 
          | 
                     z.high = floatx80_default_nan_high;
  | 
      
      
         | 4883 | 
          | 
          | 
                     return z;
  | 
      
      
         | 4884 | 
          | 
          | 
                 }
  | 
      
      
         | 4885 | 
          | 
          | 
                 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
  | 
      
      
         | 4886 | 
          | 
          | 
             }
  | 
      
      
         | 4887 | 
          | 
          | 
             if ( aExp == 0 ) {
  | 
      
      
         | 4888 | 
          | 
          | 
                 if ( (bits64) ( aSig0<<1 ) == 0 ) return a;
  | 
      
      
         | 4889 | 
          | 
          | 
                 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
  | 
      
      
         | 4890 | 
          | 
          | 
             }
  | 
      
      
         | 4891 | 
          | 
          | 
             bSig |= LIT64( 0x8000000000000000 );
  | 
      
      
         | 4892 | 
          | 
          | 
             zSign = aSign;
  | 
      
      
         | 4893 | 
          | 
          | 
             expDiff = aExp - bExp;
  | 
      
      
         | 4894 | 
          | 
          | 
             aSig1 = 0;
  | 
      
      
         | 4895 | 
          | 
          | 
             if ( expDiff < 0 ) {
  | 
      
      
         | 4896 | 
          | 
          | 
                 if ( expDiff < -1 ) return a;
  | 
      
      
         | 4897 | 
          | 
          | 
                 shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
  | 
      
      
         | 4898 | 
          | 
          | 
                 expDiff = 0;
  | 
      
      
         | 4899 | 
          | 
          | 
             }
  | 
      
      
         | 4900 | 
          | 
          | 
             q = ( bSig <= aSig0 );
  | 
      
      
         | 4901 | 
          | 
          | 
             if ( q ) aSig0 -= bSig;
  | 
      
      
         | 4902 | 
          | 
          | 
             expDiff -= 64;
  | 
      
      
         | 4903 | 
          | 
          | 
             while ( 0 < expDiff ) {
  | 
      
      
         | 4904 | 
          | 
          | 
                 q = estimateDiv128To64( aSig0, aSig1, bSig );
  | 
      
      
         | 4905 | 
          | 
          | 
                 q = ( 2 < q ) ? q - 2 : 0;
  | 
      
      
         | 4906 | 
          | 
          | 
                 mul64To128( bSig, q, &term0, &term1 );
  | 
      
      
         | 4907 | 
          | 
          | 
                 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
  | 
      
      
         | 4908 | 
          | 
          | 
                 shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
  | 
      
      
         | 4909 | 
          | 
          | 
                 expDiff -= 62;
  | 
      
      
         | 4910 | 
          | 
          | 
             }
  | 
      
      
         | 4911 | 
          | 
          | 
             expDiff += 64;
  | 
      
      
         | 4912 | 
          | 
          | 
             if ( 0 < expDiff ) {
  | 
      
      
         | 4913 | 
          | 
          | 
                 q = estimateDiv128To64( aSig0, aSig1, bSig );
  | 
      
      
         | 4914 | 
          | 
          | 
                 q = ( 2 < q ) ? q - 2 : 0;
  | 
      
      
         | 4915 | 
          | 
          | 
                 q >>= 64 - expDiff;
  | 
      
      
         | 4916 | 
          | 
          | 
                 mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
  | 
      
      
         | 4917 | 
          | 
          | 
                 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
  | 
      
      
         | 4918 | 
          | 
          | 
                 shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
  | 
      
      
         | 4919 | 
          | 
          | 
                 while ( le128( term0, term1, aSig0, aSig1 ) ) {
  | 
      
      
         | 4920 | 
          | 
          | 
                     ++q;
  | 
      
      
         | 4921 | 
          | 
          | 
                     sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
  | 
      
      
         | 4922 | 
          | 
          | 
                 }
  | 
      
      
         | 4923 | 
          | 
          | 
             }
  | 
      
      
         | 4924 | 
          | 
          | 
             else {
  | 
      
      
         | 4925 | 
          | 
          | 
                 term1 = 0;
  | 
      
      
         | 4926 | 
          | 
          | 
                 term0 = bSig;
  | 
      
      
         | 4927 | 
          | 
          | 
             }
  | 
      
      
         | 4928 | 
          | 
          | 
             sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
  | 
      
      
         | 4929 | 
          | 
          | 
             if (    lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
  | 
      
      
         | 4930 | 
          | 
          | 
                  || (    eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
  | 
      
      
         | 4931 | 
          | 
          | 
                       && ( q & 1 ) )
  | 
      
      
         | 4932 | 
          | 
          | 
                ) {
  | 
      
      
         | 4933 | 
          | 
          | 
                 aSig0 = alternateASig0;
  | 
      
      
         | 4934 | 
          | 
          | 
                 aSig1 = alternateASig1;
  | 
      
      
         | 4935 | 
          | 
          | 
                 zSign = ! zSign;
  | 
      
      
         | 4936 | 
          | 
          | 
             }
  | 
      
      
         | 4937 | 
          | 
          | 
             return
  | 
      
      
         | 4938 | 
          | 
          | 
                 normalizeRoundAndPackFloatx80(
  | 
      
      
         | 4939 | 
          | 
          | 
                     80, zSign, bExp + expDiff, aSig0, aSig1 );
  | 
      
      
         | 4940 | 
          | 
          | 
          
  | 
      
      
         | 4941 | 
          | 
          | 
         }
  | 
      
      
         | 4942 | 
          | 
          | 
          
  | 
      
      
         | 4943 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 4944 | 
          | 
          | 
         | Returns the square root of the extended double-precision floating-point
  | 
      
      
         | 4945 | 
          | 
          | 
         | value `a'.  The operation is performed according to the IEC/IEEE Standard
  | 
      
      
         | 4946 | 
          | 
          | 
         | for Binary Floating-Point Arithmetic.
  | 
      
      
         | 4947 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 4948 | 
          | 
          | 
          
  | 
      
      
         | 4949 | 
          | 
          | 
         floatx80 floatx80_sqrt( floatx80 a )
  | 
      
      
         | 4950 | 
          | 
          | 
         {
  | 
      
      
         | 4951 | 
          | 
          | 
             flag aSign;
  | 
      
      
         | 4952 | 
          | 
          | 
             int32 aExp, zExp;
  | 
      
      
         | 4953 | 
          | 
          | 
             bits64 aSig0, aSig1, zSig0, zSig1, doubleZSig0;
  | 
      
      
         | 4954 | 
          | 
          | 
             bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
  | 
      
      
         | 4955 | 
          | 
          | 
             floatx80 z;
  | 
      
      
         | 4956 | 
          | 
          | 
          
  | 
      
      
         | 4957 | 
          | 
          | 
             aSig0 = extractFloatx80Frac( a );
  | 
      
      
         | 4958 | 
          | 
          | 
             aExp = extractFloatx80Exp( a );
  | 
      
      
         | 4959 | 
          | 
          | 
             aSign = extractFloatx80Sign( a );
  | 
      
      
         | 4960 | 
          | 
          | 
             if ( aExp == 0x7FFF ) {
  | 
      
      
         | 4961 | 
          | 
          | 
                 if ( (bits64) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a );
  | 
      
      
         | 4962 | 
          | 
          | 
                 if ( ! aSign ) return a;
  | 
      
      
         | 4963 | 
          | 
          | 
                 goto invalid;
  | 
      
      
         | 4964 | 
          | 
          | 
             }
  | 
      
      
         | 4965 | 
          | 
          | 
             if ( aSign ) {
  | 
      
      
         | 4966 | 
          | 
          | 
                 if ( ( aExp | aSig0 ) == 0 ) return a;
  | 
      
      
         | 4967 | 
          | 
          | 
          invalid:
  | 
      
      
         | 4968 | 
          | 
          | 
                 float_raise( float_flag_invalid );
  | 
      
      
         | 4969 | 
          | 
          | 
                 z.low = floatx80_default_nan_low;
  | 
      
      
         | 4970 | 
          | 
          | 
                 z.high = floatx80_default_nan_high;
  | 
      
      
         | 4971 | 
          | 
          | 
                 return z;
  | 
      
      
         | 4972 | 
          | 
          | 
             }
  | 
      
      
         | 4973 | 
          | 
          | 
             if ( aExp == 0 ) {
  | 
      
      
         | 4974 | 
          | 
          | 
                 if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
  | 
      
      
         | 4975 | 
          | 
          | 
                 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
  | 
      
      
         | 4976 | 
          | 
          | 
             }
  | 
      
      
         | 4977 | 
          | 
          | 
             zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
  | 
      
      
         | 4978 | 
          | 
          | 
             zSig0 = estimateSqrt32( aExp, aSig0>>32 );
  | 
      
      
         | 4979 | 
          | 
          | 
             shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
  | 
      
      
         | 4980 | 
          | 
          | 
             zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
  | 
      
      
         | 4981 | 
          | 
          | 
             doubleZSig0 = zSig0<<1;
  | 
      
      
         | 4982 | 
          | 
          | 
             mul64To128( zSig0, zSig0, &term0, &term1 );
  | 
      
      
         | 4983 | 
          | 
          | 
             sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
  | 
      
      
         | 4984 | 
          | 
          | 
             while ( (sbits64) rem0 < 0 ) {
  | 
      
      
         | 4985 | 
          | 
          | 
                 --zSig0;
  | 
      
      
         | 4986 | 
          | 
          | 
                 doubleZSig0 -= 2;
  | 
      
      
         | 4987 | 
          | 
          | 
                 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
  | 
      
      
         | 4988 | 
          | 
          | 
             }
  | 
      
      
         | 4989 | 
          | 
          | 
             zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
  | 
      
      
         | 4990 | 
          | 
          | 
             if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
  | 
      
      
         | 4991 | 
          | 
          | 
                 if ( zSig1 == 0 ) zSig1 = 1;
  | 
      
      
         | 4992 | 
          | 
          | 
                 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
  | 
      
      
         | 4993 | 
          | 
          | 
                 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
  | 
      
      
         | 4994 | 
          | 
          | 
                 mul64To128( zSig1, zSig1, &term2, &term3 );
  | 
      
      
         | 4995 | 
          | 
          | 
                 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
  | 
      
      
         | 4996 | 
          | 
          | 
                 while ( (sbits64) rem1 < 0 ) {
  | 
      
      
         | 4997 | 
          | 
          | 
                     --zSig1;
  | 
      
      
         | 4998 | 
          | 
          | 
                     shortShift128Left( 0, zSig1, 1, &term2, &term3 );
  | 
      
      
         | 4999 | 
          | 
          | 
                     term3 |= 1;
  | 
      
      
         | 5000 | 
          | 
          | 
                     term2 |= doubleZSig0;
  | 
      
      
         | 5001 | 
          | 
          | 
                     add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
  | 
      
      
         | 5002 | 
          | 
          | 
                 }
  | 
      
      
         | 5003 | 
          | 
          | 
                 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
  | 
      
      
         | 5004 | 
          | 
          | 
             }
  | 
      
      
         | 5005 | 
          | 
          | 
             shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
  | 
      
      
         | 5006 | 
          | 
          | 
             zSig0 |= doubleZSig0;
  | 
      
      
         | 5007 | 
          | 
          | 
             return
  | 
      
      
         | 5008 | 
          | 
          | 
                 roundAndPackFloatx80(
  | 
      
      
         | 5009 | 
          | 
          | 
                     floatx80_rounding_precision, 0, zExp, zSig0, zSig1 );
  | 
      
      
         | 5010 | 
          | 
          | 
          
  | 
      
      
         | 5011 | 
          | 
          | 
         }
  | 
      
      
         | 5012 | 
          | 
          | 
          
  | 
      
      
         | 5013 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 5014 | 
          | 
          | 
         | Returns 1 if the extended double-precision floating-point value `a' is
  | 
      
      
         | 5015 | 
          | 
          | 
         | equal to the corresponding value `b', and 0 otherwise.  The comparison is
  | 
      
      
         | 5016 | 
          | 
          | 
         | performed according to the IEC/IEEE Standard for Binary Floating-Point
  | 
      
      
         | 5017 | 
          | 
          | 
         | Arithmetic.
  | 
      
      
         | 5018 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 5019 | 
          | 
          | 
          
  | 
      
      
         | 5020 | 
          | 
          | 
         flag floatx80_eq( floatx80 a, floatx80 b )
  | 
      
      
         | 5021 | 
          | 
          | 
         {
  | 
      
      
         | 5022 | 
          | 
          | 
          
  | 
      
      
         | 5023 | 
          | 
          | 
             if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
  | 
      
      
         | 5024 | 
          | 
          | 
                       && (bits64) ( extractFloatx80Frac( a )<<1 ) )
  | 
      
      
         | 5025 | 
          | 
          | 
                  || (    ( extractFloatx80Exp( b ) == 0x7FFF )
  | 
      
      
         | 5026 | 
          | 
          | 
                       && (bits64) ( extractFloatx80Frac( b )<<1 ) )
  | 
      
      
         | 5027 | 
          | 
          | 
                ) {
  | 
      
      
         | 5028 | 
          | 
          | 
                 if (    floatx80_is_signaling_nan( a )
  | 
      
      
         | 5029 | 
          | 
          | 
                      || floatx80_is_signaling_nan( b ) ) {
  | 
      
      
         | 5030 | 
          | 
          | 
                     float_raise( float_flag_invalid );
  | 
      
      
         | 5031 | 
          | 
          | 
                 }
  | 
      
      
         | 5032 | 
          | 
          | 
                 return 0;
  | 
      
      
         | 5033 | 
          | 
          | 
             }
  | 
      
      
         | 5034 | 
          | 
          | 
             return
  | 
      
      
         | 5035 | 
          | 
          | 
                    ( a.low == b.low )
  | 
      
      
         | 5036 | 
          | 
          | 
                 && (    ( a.high == b.high )
  | 
      
      
         | 5037 | 
          | 
          | 
                      || (    ( a.low == 0 )
  | 
      
      
         | 5038 | 
          | 
          | 
                           && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
  | 
      
      
         | 5039 | 
          | 
          | 
                    );
  | 
      
      
         | 5040 | 
          | 
          | 
          
  | 
      
      
         | 5041 | 
          | 
          | 
         }
  | 
      
      
         | 5042 | 
          | 
          | 
          
  | 
      
      
         | 5043 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 5044 | 
          | 
          | 
         | Returns 1 if the extended double-precision floating-point value `a' is
  | 
      
      
         | 5045 | 
          | 
          | 
         | less than or equal to the corresponding value `b', and 0 otherwise.  The
  | 
      
      
         | 5046 | 
          | 
          | 
         | comparison is performed according to the IEC/IEEE Standard for Binary
  | 
      
      
         | 5047 | 
          | 
          | 
         | Floating-Point Arithmetic.
  | 
      
      
         | 5048 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 5049 | 
          | 
          | 
          
  | 
      
      
         | 5050 | 
          | 
          | 
         flag floatx80_le( floatx80 a, floatx80 b )
  | 
      
      
         | 5051 | 
          | 
          | 
         {
  | 
      
      
         | 5052 | 
          | 
          | 
             flag aSign, bSign;
  | 
      
      
         | 5053 | 
          | 
          | 
          
  | 
      
      
         | 5054 | 
          | 
          | 
             if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
  | 
      
      
         | 5055 | 
          | 
          | 
                       && (bits64) ( extractFloatx80Frac( a )<<1 ) )
  | 
      
      
         | 5056 | 
          | 
          | 
                  || (    ( extractFloatx80Exp( b ) == 0x7FFF )
  | 
      
      
         | 5057 | 
          | 
          | 
                       && (bits64) ( extractFloatx80Frac( b )<<1 ) )
  | 
      
      
         | 5058 | 
          | 
          | 
                ) {
  | 
      
      
         | 5059 | 
          | 
          | 
                 float_raise( float_flag_invalid );
  | 
      
      
         | 5060 | 
          | 
          | 
                 return 0;
  | 
      
      
         | 5061 | 
          | 
          | 
             }
  | 
      
      
         | 5062 | 
          | 
          | 
             aSign = extractFloatx80Sign( a );
  | 
      
      
         | 5063 | 
          | 
          | 
             bSign = extractFloatx80Sign( b );
  | 
      
      
         | 5064 | 
          | 
          | 
             if ( aSign != bSign ) {
  | 
      
      
         | 5065 | 
          | 
          | 
                 return
  | 
      
      
         | 5066 | 
          | 
          | 
                        aSign
  | 
      
      
         | 5067 | 
          | 
          | 
                     || (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
  | 
      
      
         | 5068 | 
          | 
          | 
                          == 0 );
  | 
      
      
         | 5069 | 
          | 
          | 
             }
  | 
      
      
         | 5070 | 
          | 
          | 
             return
  | 
      
      
         | 5071 | 
          | 
          | 
                   aSign ? le128( b.high, b.low, a.high, a.low )
  | 
      
      
         | 5072 | 
          | 
          | 
                 : le128( a.high, a.low, b.high, b.low );
  | 
      
      
         | 5073 | 
          | 
          | 
          
  | 
      
      
         | 5074 | 
          | 
          | 
         }
  | 
      
      
         | 5075 | 
          | 
          | 
          
  | 
      
      
         | 5076 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 5077 | 
          | 
          | 
         | Returns 1 if the extended double-precision floating-point value `a' is
  | 
      
      
         | 5078 | 
          | 
          | 
         | less than the corresponding value `b', and 0 otherwise.  The comparison
  | 
      
      
         | 5079 | 
          | 
          | 
         | is performed according to the IEC/IEEE Standard for Binary Floating-Point
  | 
      
      
         | 5080 | 
          | 
          | 
         | Arithmetic.
  | 
      
      
         | 5081 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 5082 | 
          | 
          | 
          
  | 
      
      
         | 5083 | 
          | 
          | 
         flag floatx80_lt( floatx80 a, floatx80 b )
  | 
      
      
         | 5084 | 
          | 
          | 
         {
  | 
      
      
         | 5085 | 
          | 
          | 
             flag aSign, bSign;
  | 
      
      
         | 5086 | 
          | 
          | 
          
  | 
      
      
         | 5087 | 
          | 
          | 
             if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
  | 
      
      
         | 5088 | 
          | 
          | 
                       && (bits64) ( extractFloatx80Frac( a )<<1 ) )
  | 
      
      
         | 5089 | 
          | 
          | 
                  || (    ( extractFloatx80Exp( b ) == 0x7FFF )
  | 
      
      
         | 5090 | 
          | 
          | 
                       && (bits64) ( extractFloatx80Frac( b )<<1 ) )
  | 
      
      
         | 5091 | 
          | 
          | 
                ) {
  | 
      
      
         | 5092 | 
          | 
          | 
                 float_raise( float_flag_invalid );
  | 
      
      
         | 5093 | 
          | 
          | 
                 return 0;
  | 
      
      
         | 5094 | 
          | 
          | 
             }
  | 
      
      
         | 5095 | 
          | 
          | 
             aSign = extractFloatx80Sign( a );
  | 
      
      
         | 5096 | 
          | 
          | 
             bSign = extractFloatx80Sign( b );
  | 
      
      
         | 5097 | 
          | 
          | 
             if ( aSign != bSign ) {
  | 
      
      
         | 5098 | 
          | 
          | 
                 return
  | 
      
      
         | 5099 | 
          | 
          | 
                        aSign
  | 
      
      
         | 5100 | 
          | 
          | 
                     && (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
  | 
      
      
         | 5101 | 
          | 
          | 
                          != 0 );
  | 
      
      
         | 5102 | 
          | 
          | 
             }
  | 
      
      
         | 5103 | 
          | 
          | 
             return
  | 
      
      
         | 5104 | 
          | 
          | 
                   aSign ? lt128( b.high, b.low, a.high, a.low )
  | 
      
      
         | 5105 | 
          | 
          | 
                 : lt128( a.high, a.low, b.high, b.low );
  | 
      
      
         | 5106 | 
          | 
          | 
          
  | 
      
      
         | 5107 | 
          | 
          | 
         }
  | 
      
      
         | 5108 | 
          | 
          | 
          
  | 
      
      
         | 5109 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 5110 | 
          | 
          | 
         | Returns 1 if the extended double-precision floating-point value `a' is equal
  | 
      
      
         | 5111 | 
          | 
          | 
         | to the corresponding value `b', and 0 otherwise.  The invalid exception is
  | 
      
      
         | 5112 | 
          | 
          | 
         | raised if either operand is a NaN.  Otherwise, the comparison is performed
  | 
      
      
         | 5113 | 
          | 
          | 
         | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  | 
      
      
         | 5114 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 5115 | 
          | 
          | 
          
  | 
      
      
         | 5116 | 
          | 
          | 
         flag floatx80_eq_signaling( floatx80 a, floatx80 b )
  | 
      
      
         | 5117 | 
          | 
          | 
         {
  | 
      
      
         | 5118 | 
          | 
          | 
          
  | 
      
      
         | 5119 | 
          | 
          | 
             if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
  | 
      
      
         | 5120 | 
          | 
          | 
                       && (bits64) ( extractFloatx80Frac( a )<<1 ) )
  | 
      
      
         | 5121 | 
          | 
          | 
                  || (    ( extractFloatx80Exp( b ) == 0x7FFF )
  | 
      
      
         | 5122 | 
          | 
          | 
                       && (bits64) ( extractFloatx80Frac( b )<<1 ) )
  | 
      
      
         | 5123 | 
          | 
          | 
                ) {
  | 
      
      
         | 5124 | 
          | 
          | 
                 float_raise( float_flag_invalid );
  | 
      
      
         | 5125 | 
          | 
          | 
                 return 0;
  | 
      
      
         | 5126 | 
          | 
          | 
             }
  | 
      
      
         | 5127 | 
          | 
          | 
             return
  | 
      
      
         | 5128 | 
          | 
          | 
                    ( a.low == b.low )
  | 
      
      
         | 5129 | 
          | 
          | 
                 && (    ( a.high == b.high )
  | 
      
      
         | 5130 | 
          | 
          | 
                      || (    ( a.low == 0 )
  | 
      
      
         | 5131 | 
          | 
          | 
                           && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
  | 
      
      
         | 5132 | 
          | 
          | 
                    );
  | 
      
      
         | 5133 | 
          | 
          | 
          
  | 
      
      
         | 5134 | 
          | 
          | 
         }
  | 
      
      
         | 5135 | 
          | 
          | 
          
  | 
      
      
         | 5136 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 5137 | 
          | 
          | 
         | Returns 1 if the extended double-precision floating-point value `a' is less
  | 
      
      
         | 5138 | 
          | 
          | 
         | than or equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs
  | 
      
      
         | 5139 | 
          | 
          | 
         | do not cause an exception.  Otherwise, the comparison is performed according
  | 
      
      
         | 5140 | 
          | 
          | 
         | to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  | 
      
      
         | 5141 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 5142 | 
          | 
          | 
          
  | 
      
      
         | 5143 | 
          | 
          | 
         flag floatx80_le_quiet( floatx80 a, floatx80 b )
  | 
      
      
         | 5144 | 
          | 
          | 
         {
  | 
      
      
         | 5145 | 
          | 
          | 
             flag aSign, bSign;
  | 
      
      
         | 5146 | 
          | 
          | 
          
  | 
      
      
         | 5147 | 
          | 
          | 
             if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
  | 
      
      
         | 5148 | 
          | 
          | 
                       && (bits64) ( extractFloatx80Frac( a )<<1 ) )
  | 
      
      
         | 5149 | 
          | 
          | 
                  || (    ( extractFloatx80Exp( b ) == 0x7FFF )
  | 
      
      
         | 5150 | 
          | 
          | 
                       && (bits64) ( extractFloatx80Frac( b )<<1 ) )
  | 
      
      
         | 5151 | 
          | 
          | 
                ) {
  | 
      
      
         | 5152 | 
          | 
          | 
                 if (    floatx80_is_signaling_nan( a )
  | 
      
      
         | 5153 | 
          | 
          | 
                      || floatx80_is_signaling_nan( b ) ) {
  | 
      
      
         | 5154 | 
          | 
          | 
                     float_raise( float_flag_invalid );
  | 
      
      
         | 5155 | 
          | 
          | 
                 }
  | 
      
      
         | 5156 | 
          | 
          | 
                 return 0;
  | 
      
      
         | 5157 | 
          | 
          | 
             }
  | 
      
      
         | 5158 | 
          | 
          | 
             aSign = extractFloatx80Sign( a );
  | 
      
      
         | 5159 | 
          | 
          | 
             bSign = extractFloatx80Sign( b );
  | 
      
      
         | 5160 | 
          | 
          | 
             if ( aSign != bSign ) {
  | 
      
      
         | 5161 | 
          | 
          | 
                 return
  | 
      
      
         | 5162 | 
          | 
          | 
                        aSign
  | 
      
      
         | 5163 | 
          | 
          | 
                     || (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
  | 
      
      
         | 5164 | 
          | 
          | 
                          == 0 );
  | 
      
      
         | 5165 | 
          | 
          | 
             }
  | 
      
      
         | 5166 | 
          | 
          | 
             return
  | 
      
      
         | 5167 | 
          | 
          | 
                   aSign ? le128( b.high, b.low, a.high, a.low )
  | 
      
      
         | 5168 | 
          | 
          | 
                 : le128( a.high, a.low, b.high, b.low );
  | 
      
      
         | 5169 | 
          | 
          | 
          
  | 
      
      
         | 5170 | 
          | 
          | 
         }
  | 
      
      
         | 5171 | 
          | 
          | 
          
  | 
      
      
         | 5172 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 5173 | 
          | 
          | 
         | Returns 1 if the extended double-precision floating-point value `a' is less
  | 
      
      
         | 5174 | 
          | 
          | 
         | than the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause
  | 
      
      
         | 5175 | 
          | 
          | 
         | an exception.  Otherwise, the comparison is performed according to the
  | 
      
      
         | 5176 | 
          | 
          | 
         | IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  | 
      
      
         | 5177 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 5178 | 
          | 
          | 
          
  | 
      
      
         | 5179 | 
          | 
          | 
         flag floatx80_lt_quiet( floatx80 a, floatx80 b )
  | 
      
      
         | 5180 | 
          | 
          | 
         {
  | 
      
      
         | 5181 | 
          | 
          | 
             flag aSign, bSign;
  | 
      
      
         | 5182 | 
          | 
          | 
          
  | 
      
      
         | 5183 | 
          | 
          | 
             if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
  | 
      
      
         | 5184 | 
          | 
          | 
                       && (bits64) ( extractFloatx80Frac( a )<<1 ) )
  | 
      
      
         | 5185 | 
          | 
          | 
                  || (    ( extractFloatx80Exp( b ) == 0x7FFF )
  | 
      
      
         | 5186 | 
          | 
          | 
                       && (bits64) ( extractFloatx80Frac( b )<<1 ) )
  | 
      
      
         | 5187 | 
          | 
          | 
                ) {
  | 
      
      
         | 5188 | 
          | 
          | 
                 if (    floatx80_is_signaling_nan( a )
  | 
      
      
         | 5189 | 
          | 
          | 
                      || floatx80_is_signaling_nan( b ) ) {
  | 
      
      
         | 5190 | 
          | 
          | 
                     float_raise( float_flag_invalid );
  | 
      
      
         | 5191 | 
          | 
          | 
                 }
  | 
      
      
         | 5192 | 
          | 
          | 
                 return 0;
  | 
      
      
         | 5193 | 
          | 
          | 
             }
  | 
      
      
         | 5194 | 
          | 
          | 
             aSign = extractFloatx80Sign( a );
  | 
      
      
         | 5195 | 
          | 
          | 
             bSign = extractFloatx80Sign( b );
  | 
      
      
         | 5196 | 
          | 
          | 
             if ( aSign != bSign ) {
  | 
      
      
         | 5197 | 
          | 
          | 
                 return
  | 
      
      
         | 5198 | 
          | 
          | 
                        aSign
  | 
      
      
         | 5199 | 
          | 
          | 
                     && (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
  | 
      
      
         | 5200 | 
          | 
          | 
                          != 0 );
  | 
      
      
         | 5201 | 
          | 
          | 
             }
  | 
      
      
         | 5202 | 
          | 
          | 
             return
  | 
      
      
         | 5203 | 
          | 
          | 
                   aSign ? lt128( b.high, b.low, a.high, a.low )
  | 
      
      
         | 5204 | 
          | 
          | 
                 : lt128( a.high, a.low, b.high, b.low );
  | 
      
      
         | 5205 | 
          | 
          | 
          
  | 
      
      
         | 5206 | 
          | 
          | 
         }
  | 
      
      
         | 5207 | 
          | 
          | 
          
  | 
      
      
         | 5208 | 
          | 
          | 
         #endif
  | 
      
      
         | 5209 | 
          | 
          | 
          
  | 
      
      
         | 5210 | 
          | 
          | 
         #ifdef FLOAT128
  | 
      
      
         | 5211 | 
          | 
          | 
          
  | 
      
      
         | 5212 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 5213 | 
          | 
          | 
         | Returns the result of converting the quadruple-precision floating-point
  | 
      
      
         | 5214 | 
          | 
          | 
         | value `a' to the 32-bit two's complement integer format.  The conversion
  | 
      
      
         | 5215 | 
          | 
          | 
         | is performed according to the IEC/IEEE Standard for Binary Floating-Point
  | 
      
      
         | 5216 | 
          | 
          | 
         | Arithmetic---which means in particular that the conversion is rounded
  | 
      
      
         | 5217 | 
          | 
          | 
         | according to the current rounding mode.  If `a' is a NaN, the largest
  | 
      
      
         | 5218 | 
          | 
          | 
         | positive integer is returned.  Otherwise, if the conversion overflows, the
  | 
      
      
         | 5219 | 
          | 
          | 
         | largest integer with the same sign as `a' is returned.
  | 
      
      
         | 5220 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 5221 | 
          | 
          | 
          
  | 
      
      
         | 5222 | 
          | 
          | 
         int32 float128_to_int32( float128 a )
  | 
      
      
         | 5223 | 
          | 
          | 
         {
  | 
      
      
         | 5224 | 
          | 
          | 
             flag aSign;
  | 
      
      
         | 5225 | 
          | 
          | 
             int32 aExp, shiftCount;
  | 
      
      
         | 5226 | 
          | 
          | 
             bits64 aSig0, aSig1;
  | 
      
      
         | 5227 | 
          | 
          | 
          
  | 
      
      
         | 5228 | 
          | 
          | 
             aSig1 = extractFloat128Frac1( a );
  | 
      
      
         | 5229 | 
          | 
          | 
             aSig0 = extractFloat128Frac0( a );
  | 
      
      
         | 5230 | 
          | 
          | 
             aExp = extractFloat128Exp( a );
  | 
      
      
         | 5231 | 
          | 
          | 
             aSign = extractFloat128Sign( a );
  | 
      
      
         | 5232 | 
          | 
          | 
             if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
  | 
      
      
         | 5233 | 
          | 
          | 
             if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
  | 
      
      
         | 5234 | 
          | 
          | 
             aSig0 |= ( aSig1 != 0 );
  | 
      
      
         | 5235 | 
          | 
          | 
             shiftCount = 0x4028 - aExp;
  | 
      
      
         | 5236 | 
          | 
          | 
             if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
  | 
      
      
         | 5237 | 
          | 
          | 
             return roundAndPackInt32( aSign, aSig0 );
  | 
      
      
         | 5238 | 
          | 
          | 
          
  | 
      
      
         | 5239 | 
          | 
          | 
         }
  | 
      
      
         | 5240 | 
          | 
          | 
          
  | 
      
      
         | 5241 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 5242 | 
          | 
          | 
         | Returns the result of converting the quadruple-precision floating-point
  | 
      
      
         | 5243 | 
          | 
          | 
         | value `a' to the 32-bit two's complement integer format.  The conversion
  | 
      
      
         | 5244 | 
          | 
          | 
         | is performed according to the IEC/IEEE Standard for Binary Floating-Point
  | 
      
      
         | 5245 | 
          | 
          | 
         | Arithmetic, except that the conversion is always rounded toward zero.  If
  | 
      
      
         | 5246 | 
          | 
          | 
         | `a' is a NaN, the largest positive integer is returned.  Otherwise, if the
  | 
      
      
         | 5247 | 
          | 
          | 
         | conversion overflows, the largest integer with the same sign as `a' is
  | 
      
      
         | 5248 | 
          | 
          | 
         | returned.
  | 
      
      
         | 5249 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 5250 | 
          | 
          | 
          
  | 
      
      
         | 5251 | 
          | 
          | 
         int32 float128_to_int32_round_to_zero( float128 a )
  | 
      
      
         | 5252 | 
          | 
          | 
         {
  | 
      
      
         | 5253 | 
          | 
          | 
             flag aSign;
  | 
      
      
         | 5254 | 
          | 
          | 
             int32 aExp, shiftCount;
  | 
      
      
         | 5255 | 
          | 
          | 
             bits64 aSig0, aSig1, savedASig;
  | 
      
      
         | 5256 | 
          | 
          | 
             int32 z;
  | 
      
      
         | 5257 | 
          | 
          | 
          
  | 
      
      
         | 5258 | 
          | 
          | 
             aSig1 = extractFloat128Frac1( a );
  | 
      
      
         | 5259 | 
          | 
          | 
             aSig0 = extractFloat128Frac0( a );
  | 
      
      
         | 5260 | 
          | 
          | 
             aExp = extractFloat128Exp( a );
  | 
      
      
         | 5261 | 
          | 
          | 
             aSign = extractFloat128Sign( a );
  | 
      
      
         | 5262 | 
          | 
          | 
             aSig0 |= ( aSig1 != 0 );
  | 
      
      
         | 5263 | 
          | 
          | 
             if ( 0x401E < aExp ) {
  | 
      
      
         | 5264 | 
          | 
          | 
                 if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
  | 
      
      
         | 5265 | 
          | 
          | 
                 goto invalid;
  | 
      
      
         | 5266 | 
          | 
          | 
             }
  | 
      
      
         | 5267 | 
          | 
          | 
             else if ( aExp < 0x3FFF ) {
  | 
      
      
         | 5268 | 
          | 
          | 
                 if ( aExp || aSig0 ) float_exception_flags |= float_flag_inexact;
  | 
      
      
         | 5269 | 
          | 
          | 
                 return 0;
  | 
      
      
         | 5270 | 
          | 
          | 
             }
  | 
      
      
         | 5271 | 
          | 
          | 
             aSig0 |= LIT64( 0x0001000000000000 );
  | 
      
      
         | 5272 | 
          | 
          | 
             shiftCount = 0x402F - aExp;
  | 
      
      
         | 5273 | 
          | 
          | 
             savedASig = aSig0;
  | 
      
      
         | 5274 | 
          | 
          | 
             aSig0 >>= shiftCount;
  | 
      
      
         | 5275 | 
          | 
          | 
             z = aSig0;
  | 
      
      
         | 5276 | 
          | 
          | 
             if ( aSign ) z = - z;
  | 
      
      
         | 5277 | 
          | 
          | 
             if ( ( z < 0 ) ^ aSign ) {
  | 
      
      
         | 5278 | 
          | 
          | 
          invalid:
  | 
      
      
         | 5279 | 
          | 
          | 
                 float_raise( float_flag_invalid );
  | 
      
      
         | 5280 | 
          | 
          | 
                 return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
  | 
      
      
         | 5281 | 
          | 
          | 
             }
  | 
      
      
         | 5282 | 
          | 
          | 
             if ( ( aSig0<<shiftCount ) != savedASig ) {
  | 
      
      
         | 5283 | 
          | 
          | 
                 float_exception_flags |= float_flag_inexact;
  | 
      
      
         | 5284 | 
          | 
          | 
             }
  | 
      
      
         | 5285 | 
          | 
          | 
             return z;
  | 
      
      
         | 5286 | 
          | 
          | 
          
  | 
      
      
         | 5287 | 
          | 
          | 
         }
  | 
      
      
         | 5288 | 
          | 
          | 
          
  | 
      
      
         | 5289 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 5290 | 
          | 
          | 
         | Returns the result of converting the quadruple-precision floating-point
  | 
      
      
         | 5291 | 
          | 
          | 
         | value `a' to the 64-bit two's complement integer format.  The conversion
  | 
      
      
         | 5292 | 
          | 
          | 
         | is performed according to the IEC/IEEE Standard for Binary Floating-Point
  | 
      
      
         | 5293 | 
          | 
          | 
         | Arithmetic---which means in particular that the conversion is rounded
  | 
      
      
         | 5294 | 
          | 
          | 
         | according to the current rounding mode.  If `a' is a NaN, the largest
  | 
      
      
         | 5295 | 
          | 
          | 
         | positive integer is returned.  Otherwise, if the conversion overflows, the
  | 
      
      
         | 5296 | 
          | 
          | 
         | largest integer with the same sign as `a' is returned.
  | 
      
      
         | 5297 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 5298 | 
          | 
          | 
          
  | 
      
      
         | 5299 | 
          | 
          | 
         int64 float128_to_int64( float128 a )
  | 
      
      
         | 5300 | 
          | 
          | 
         {
  | 
      
      
         | 5301 | 
          | 
          | 
             flag aSign;
  | 
      
      
         | 5302 | 
          | 
          | 
             int32 aExp, shiftCount;
  | 
      
      
         | 5303 | 
          | 
          | 
             bits64 aSig0, aSig1;
  | 
      
      
         | 5304 | 
          | 
          | 
          
  | 
      
      
         | 5305 | 
          | 
          | 
             aSig1 = extractFloat128Frac1( a );
  | 
      
      
         | 5306 | 
          | 
          | 
             aSig0 = extractFloat128Frac0( a );
  | 
      
      
         | 5307 | 
          | 
          | 
             aExp = extractFloat128Exp( a );
  | 
      
      
         | 5308 | 
          | 
          | 
             aSign = extractFloat128Sign( a );
  | 
      
      
         | 5309 | 
          | 
          | 
             if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
  | 
      
      
         | 5310 | 
          | 
          | 
             shiftCount = 0x402F - aExp;
  | 
      
      
         | 5311 | 
          | 
          | 
             if ( shiftCount <= 0 ) {
  | 
      
      
         | 5312 | 
          | 
          | 
                 if ( 0x403E < aExp ) {
  | 
      
      
         | 5313 | 
          | 
          | 
                     float_raise( float_flag_invalid );
  | 
      
      
         | 5314 | 
          | 
          | 
                     if (    ! aSign
  | 
      
      
         | 5315 | 
          | 
          | 
                          || (    ( aExp == 0x7FFF )
  | 
      
      
         | 5316 | 
          | 
          | 
                               && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) )
  | 
      
      
         | 5317 | 
          | 
          | 
                             )
  | 
      
      
         | 5318 | 
          | 
          | 
                        ) {
  | 
      
      
         | 5319 | 
          | 
          | 
                         return LIT64( 0x7FFFFFFFFFFFFFFF );
  | 
      
      
         | 5320 | 
          | 
          | 
                     }
  | 
      
      
         | 5321 | 
          | 
          | 
                     return (sbits64) LIT64( 0x8000000000000000 );
  | 
      
      
         | 5322 | 
          | 
          | 
                 }
  | 
      
      
         | 5323 | 
          | 
          | 
                 shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
  | 
      
      
         | 5324 | 
          | 
          | 
             }
  | 
      
      
         | 5325 | 
          | 
          | 
             else {
  | 
      
      
         | 5326 | 
          | 
          | 
                 shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
  | 
      
      
         | 5327 | 
          | 
          | 
             }
  | 
      
      
         | 5328 | 
          | 
          | 
             return roundAndPackInt64( aSign, aSig0, aSig1 );
  | 
      
      
         | 5329 | 
          | 
          | 
          
  | 
      
      
         | 5330 | 
          | 
          | 
         }
  | 
      
      
         | 5331 | 
          | 
          | 
          
  | 
      
      
         | 5332 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 5333 | 
          | 
          | 
         | Returns the result of converting the quadruple-precision floating-point
  | 
      
      
         | 5334 | 
          | 
          | 
         | value `a' to the 64-bit two's complement integer format.  The conversion
  | 
      
      
         | 5335 | 
          | 
          | 
         | is performed according to the IEC/IEEE Standard for Binary Floating-Point
  | 
      
      
         | 5336 | 
          | 
          | 
         | Arithmetic, except that the conversion is always rounded toward zero.
  | 
      
      
         | 5337 | 
          | 
          | 
         | If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
  | 
      
      
         | 5338 | 
          | 
          | 
         | the conversion overflows, the largest integer with the same sign as `a' is
  | 
      
      
         | 5339 | 
          | 
          | 
         | returned.
  | 
      
      
         | 5340 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 5341 | 
          | 
          | 
          
  | 
      
      
         | 5342 | 
          | 
          | 
         int64 float128_to_int64_round_to_zero( float128 a )
  | 
      
      
         | 5343 | 
          | 
          | 
         {
  | 
      
      
         | 5344 | 
          | 
          | 
             flag aSign;
  | 
      
      
         | 5345 | 
          | 
          | 
             int32 aExp, shiftCount;
  | 
      
      
         | 5346 | 
          | 
          | 
             bits64 aSig0, aSig1;
  | 
      
      
         | 5347 | 
          | 
          | 
             int64 z;
  | 
      
      
         | 5348 | 
          | 
          | 
          
  | 
      
      
         | 5349 | 
          | 
          | 
             aSig1 = extractFloat128Frac1( a );
  | 
      
      
         | 5350 | 
          | 
          | 
             aSig0 = extractFloat128Frac0( a );
  | 
      
      
         | 5351 | 
          | 
          | 
             aExp = extractFloat128Exp( a );
  | 
      
      
         | 5352 | 
          | 
          | 
             aSign = extractFloat128Sign( a );
  | 
      
      
         | 5353 | 
          | 
          | 
             if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
  | 
      
      
         | 5354 | 
          | 
          | 
             shiftCount = aExp - 0x402F;
  | 
      
      
         | 5355 | 
          | 
          | 
             if ( 0 < shiftCount ) {
  | 
      
      
         | 5356 | 
          | 
          | 
                 if ( 0x403E <= aExp ) {
  | 
      
      
         | 5357 | 
          | 
          | 
                     aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
  | 
      
      
         | 5358 | 
          | 
          | 
                     if (    ( a.high == LIT64( 0xC03E000000000000 ) )
  | 
      
      
         | 5359 | 
          | 
          | 
                          && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
  | 
      
      
         | 5360 | 
          | 
          | 
                         if ( aSig1 ) float_exception_flags |= float_flag_inexact;
  | 
      
      
         | 5361 | 
          | 
          | 
                     }
  | 
      
      
         | 5362 | 
          | 
          | 
                     else {
  | 
      
      
         | 5363 | 
          | 
          | 
                         float_raise( float_flag_invalid );
  | 
      
      
         | 5364 | 
          | 
          | 
                         if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
  | 
      
      
         | 5365 | 
          | 
          | 
                             return LIT64( 0x7FFFFFFFFFFFFFFF );
  | 
      
      
         | 5366 | 
          | 
          | 
                         }
  | 
      
      
         | 5367 | 
          | 
          | 
                     }
  | 
      
      
         | 5368 | 
          | 
          | 
                     return (sbits64) LIT64( 0x8000000000000000 );
  | 
      
      
         | 5369 | 
          | 
          | 
                 }
  | 
      
      
         | 5370 | 
          | 
          | 
                 z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
  | 
      
      
         | 5371 | 
          | 
          | 
                 if ( (bits64) ( aSig1<<shiftCount ) ) {
  | 
      
      
         | 5372 | 
          | 
          | 
                     float_exception_flags |= float_flag_inexact;
  | 
      
      
         | 5373 | 
          | 
          | 
                 }
  | 
      
      
         | 5374 | 
          | 
          | 
             }
  | 
      
      
         | 5375 | 
          | 
          | 
             else {
  | 
      
      
         | 5376 | 
          | 
          | 
                 if ( aExp < 0x3FFF ) {
  | 
      
      
         | 5377 | 
          | 
          | 
                     if ( aExp | aSig0 | aSig1 ) {
  | 
      
      
         | 5378 | 
          | 
          | 
                         float_exception_flags |= float_flag_inexact;
  | 
      
      
         | 5379 | 
          | 
          | 
                     }
  | 
      
      
         | 5380 | 
          | 
          | 
                     return 0;
  | 
      
      
         | 5381 | 
          | 
          | 
                 }
  | 
      
      
         | 5382 | 
          | 
          | 
                 z = aSig0>>( - shiftCount );
  | 
      
      
         | 5383 | 
          | 
          | 
                 if (    aSig1
  | 
      
      
         | 5384 | 
          | 
          | 
                      || ( shiftCount && (bits64) ( aSig0<<( shiftCount & 63 ) ) ) ) {
  | 
      
      
         | 5385 | 
          | 
          | 
                     float_exception_flags |= float_flag_inexact;
  | 
      
      
         | 5386 | 
          | 
          | 
                 }
  | 
      
      
         | 5387 | 
          | 
          | 
             }
  | 
      
      
         | 5388 | 
          | 
          | 
             if ( aSign ) z = - z;
  | 
      
      
         | 5389 | 
          | 
          | 
             return z;
  | 
      
      
         | 5390 | 
          | 
          | 
          
  | 
      
      
         | 5391 | 
          | 
          | 
         }
  | 
      
      
         | 5392 | 
          | 
          | 
          
  | 
      
      
         | 5393 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 5394 | 
          | 
          | 
         | Returns the result of converting the quadruple-precision floating-point
  | 
      
      
         | 5395 | 
          | 
          | 
         | value `a' to the single-precision floating-point format.  The conversion
  | 
      
      
         | 5396 | 
          | 
          | 
         | is performed according to the IEC/IEEE Standard for Binary Floating-Point
  | 
      
      
         | 5397 | 
          | 
          | 
         | Arithmetic.
  | 
      
      
         | 5398 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 5399 | 
          | 
          | 
          
  | 
      
      
         | 5400 | 
          | 
          | 
         float32 float128_to_float32( float128 a )
  | 
      
      
         | 5401 | 
          | 
          | 
         {
  | 
      
      
         | 5402 | 
          | 
          | 
             flag aSign;
  | 
      
      
         | 5403 | 
          | 
          | 
             int32 aExp;
  | 
      
      
         | 5404 | 
          | 
          | 
             bits64 aSig0, aSig1;
  | 
      
      
         | 5405 | 
          | 
          | 
             bits32 zSig;
  | 
      
      
         | 5406 | 
          | 
          | 
          
  | 
      
      
         | 5407 | 
          | 
          | 
             aSig1 = extractFloat128Frac1( a );
  | 
      
      
         | 5408 | 
          | 
          | 
             aSig0 = extractFloat128Frac0( a );
  | 
      
      
         | 5409 | 
          | 
          | 
             aExp = extractFloat128Exp( a );
  | 
      
      
         | 5410 | 
          | 
          | 
             aSign = extractFloat128Sign( a );
  | 
      
      
         | 5411 | 
          | 
          | 
             if ( aExp == 0x7FFF ) {
  | 
      
      
         | 5412 | 
          | 
          | 
                 if ( aSig0 | aSig1 ) {
  | 
      
      
         | 5413 | 
          | 
          | 
                     return commonNaNToFloat32( float128ToCommonNaN( a ) );
  | 
      
      
         | 5414 | 
          | 
          | 
                 }
  | 
      
      
         | 5415 | 
          | 
          | 
                 return packFloat32( aSign, 0xFF, 0 );
  | 
      
      
         | 5416 | 
          | 
          | 
             }
  | 
      
      
         | 5417 | 
          | 
          | 
             aSig0 |= ( aSig1 != 0 );
  | 
      
      
         | 5418 | 
          | 
          | 
             shift64RightJamming( aSig0, 18, &aSig0 );
  | 
      
      
         | 5419 | 
          | 
          | 
             zSig = aSig0;
  | 
      
      
         | 5420 | 
          | 
          | 
             if ( aExp || zSig ) {
  | 
      
      
         | 5421 | 
          | 
          | 
                 zSig |= 0x40000000;
  | 
      
      
         | 5422 | 
          | 
          | 
                 aExp -= 0x3F81;
  | 
      
      
         | 5423 | 
          | 
          | 
             }
  | 
      
      
         | 5424 | 
          | 
          | 
             return roundAndPackFloat32( aSign, aExp, zSig );
  | 
      
      
         | 5425 | 
          | 
          | 
          
  | 
      
      
         | 5426 | 
          | 
          | 
         }
  | 
      
      
         | 5427 | 
          | 
          | 
          
  | 
      
      
         | 5428 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 5429 | 
          | 
          | 
         | Returns the result of converting the quadruple-precision floating-point
  | 
      
      
         | 5430 | 
          | 
          | 
         | value `a' to the double-precision floating-point format.  The conversion
  | 
      
      
         | 5431 | 
          | 
          | 
         | is performed according to the IEC/IEEE Standard for Binary Floating-Point
  | 
      
      
         | 5432 | 
          | 
          | 
         | Arithmetic.
  | 
      
      
         | 5433 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 5434 | 
          | 
          | 
          
  | 
      
      
         | 5435 | 
          | 
          | 
         float64 float128_to_float64( float128 a )
  | 
      
      
         | 5436 | 
          | 
          | 
         {
  | 
      
      
         | 5437 | 
          | 
          | 
             flag aSign;
  | 
      
      
         | 5438 | 
          | 
          | 
             int32 aExp;
  | 
      
      
         | 5439 | 
          | 
          | 
             bits64 aSig0, aSig1;
  | 
      
      
         | 5440 | 
          | 
          | 
          
  | 
      
      
         | 5441 | 
          | 
          | 
             aSig1 = extractFloat128Frac1( a );
  | 
      
      
         | 5442 | 
          | 
          | 
             aSig0 = extractFloat128Frac0( a );
  | 
      
      
         | 5443 | 
          | 
          | 
             aExp = extractFloat128Exp( a );
  | 
      
      
         | 5444 | 
          | 
          | 
             aSign = extractFloat128Sign( a );
  | 
      
      
         | 5445 | 
          | 
          | 
             if ( aExp == 0x7FFF ) {
  | 
      
      
         | 5446 | 
          | 
          | 
                 if ( aSig0 | aSig1 ) {
  | 
      
      
         | 5447 | 
          | 
          | 
                     return commonNaNToFloat64( float128ToCommonNaN( a ) );
  | 
      
      
         | 5448 | 
          | 
          | 
                 }
  | 
      
      
         | 5449 | 
          | 
          | 
                 return packFloat64( aSign, 0x7FF, 0 );
  | 
      
      
         | 5450 | 
          | 
          | 
             }
  | 
      
      
         | 5451 | 
          | 
          | 
             shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
  | 
      
      
         | 5452 | 
          | 
          | 
             aSig0 |= ( aSig1 != 0 );
  | 
      
      
         | 5453 | 
          | 
          | 
             if ( aExp || aSig0 ) {
  | 
      
      
         | 5454 | 
          | 
          | 
                 aSig0 |= LIT64( 0x4000000000000000 );
  | 
      
      
         | 5455 | 
          | 
          | 
                 aExp -= 0x3C01;
  | 
      
      
         | 5456 | 
          | 
          | 
             }
  | 
      
      
         | 5457 | 
          | 
          | 
             return roundAndPackFloat64( aSign, aExp, aSig0 );
  | 
      
      
         | 5458 | 
          | 
          | 
          
  | 
      
      
         | 5459 | 
          | 
          | 
         }
  | 
      
      
         | 5460 | 
          | 
          | 
          
  | 
      
      
         | 5461 | 
          | 
          | 
         #ifdef FLOATX80
  | 
      
      
         | 5462 | 
          | 
          | 
          
  | 
      
      
         | 5463 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 5464 | 
          | 
          | 
         | Returns the result of converting the quadruple-precision floating-point
  | 
      
      
         | 5465 | 
          | 
          | 
         | value `a' to the extended double-precision floating-point format.  The
  | 
      
      
         | 5466 | 
          | 
          | 
         | conversion is performed according to the IEC/IEEE Standard for Binary
  | 
      
      
         | 5467 | 
          | 
          | 
         | Floating-Point Arithmetic.
  | 
      
      
         | 5468 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 5469 | 
          | 
          | 
          
  | 
      
      
         | 5470 | 
          | 
          | 
         floatx80 float128_to_floatx80( float128 a )
  | 
      
      
         | 5471 | 
          | 
          | 
         {
  | 
      
      
         | 5472 | 
          | 
          | 
             flag aSign;
  | 
      
      
         | 5473 | 
          | 
          | 
             int32 aExp;
  | 
      
      
         | 5474 | 
          | 
          | 
             bits64 aSig0, aSig1;
  | 
      
      
         | 5475 | 
          | 
          | 
          
  | 
      
      
         | 5476 | 
          | 
          | 
             aSig1 = extractFloat128Frac1( a );
  | 
      
      
         | 5477 | 
          | 
          | 
             aSig0 = extractFloat128Frac0( a );
  | 
      
      
         | 5478 | 
          | 
          | 
             aExp = extractFloat128Exp( a );
  | 
      
      
         | 5479 | 
          | 
          | 
             aSign = extractFloat128Sign( a );
  | 
      
      
         | 5480 | 
          | 
          | 
             if ( aExp == 0x7FFF ) {
  | 
      
      
         | 5481 | 
          | 
          | 
                 if ( aSig0 | aSig1 ) {
  | 
      
      
         | 5482 | 
          | 
          | 
                     return commonNaNToFloatx80( float128ToCommonNaN( a ) );
  | 
      
      
         | 5483 | 
          | 
          | 
                 }
  | 
      
      
         | 5484 | 
          | 
          | 
                 return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
  | 
      
      
         | 5485 | 
          | 
          | 
             }
  | 
      
      
         | 5486 | 
          | 
          | 
             if ( aExp == 0 ) {
  | 
      
      
         | 5487 | 
          | 
          | 
                 if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
  | 
      
      
         | 5488 | 
          | 
          | 
                 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
  | 
      
      
         | 5489 | 
          | 
          | 
             }
  | 
      
      
         | 5490 | 
          | 
          | 
             else {
  | 
      
      
         | 5491 | 
          | 
          | 
                 aSig0 |= LIT64( 0x0001000000000000 );
  | 
      
      
         | 5492 | 
          | 
          | 
             }
  | 
      
      
         | 5493 | 
          | 
          | 
             shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
  | 
      
      
         | 5494 | 
          | 
          | 
             return roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 );
  | 
      
      
         | 5495 | 
          | 
          | 
          
  | 
      
      
         | 5496 | 
          | 
          | 
         }
  | 
      
      
         | 5497 | 
          | 
          | 
          
  | 
      
      
         | 5498 | 
          | 
          | 
         #endif
  | 
      
      
         | 5499 | 
          | 
          | 
          
  | 
      
      
         | 5500 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 5501 | 
          | 
          | 
         | Rounds the quadruple-precision floating-point value `a' to an integer, and
  | 
      
      
         | 5502 | 
          | 
          | 
         | returns the result as a quadruple-precision floating-point value.  The
  | 
      
      
         | 5503 | 
          | 
          | 
         | operation is performed according to the IEC/IEEE Standard for Binary
  | 
      
      
         | 5504 | 
          | 
          | 
         | Floating-Point Arithmetic.
  | 
      
      
         | 5505 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 5506 | 
          | 
          | 
          
  | 
      
      
         | 5507 | 
          | 
          | 
         float128 float128_round_to_int( float128 a )
  | 
      
      
         | 5508 | 
          | 
          | 
         {
  | 
      
      
         | 5509 | 
          | 
          | 
             flag aSign;
  | 
      
      
         | 5510 | 
          | 
          | 
             int32 aExp;
  | 
      
      
         | 5511 | 
          | 
          | 
             bits64 lastBitMask, roundBitsMask;
  | 
      
      
         | 5512 | 
          | 
          | 
             int8 roundingMode;
  | 
      
      
         | 5513 | 
          | 
          | 
             float128 z;
  | 
      
      
         | 5514 | 
          | 
          | 
          
  | 
      
      
         | 5515 | 
          | 
          | 
             aExp = extractFloat128Exp( a );
  | 
      
      
         | 5516 | 
          | 
          | 
             if ( 0x402F <= aExp ) {
  | 
      
      
         | 5517 | 
          | 
          | 
                 if ( 0x406F <= aExp ) {
  | 
      
      
         | 5518 | 
          | 
          | 
                     if (    ( aExp == 0x7FFF )
  | 
      
      
         | 5519 | 
          | 
          | 
                          && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
  | 
      
      
         | 5520 | 
          | 
          | 
                        ) {
  | 
      
      
         | 5521 | 
          | 
          | 
                         return propagateFloat128NaN( a, a );
  | 
      
      
         | 5522 | 
          | 
          | 
                     }
  | 
      
      
         | 5523 | 
          | 
          | 
                     return a;
  | 
      
      
         | 5524 | 
          | 
          | 
                 }
  | 
      
      
         | 5525 | 
          | 
          | 
                 lastBitMask = 1;
  | 
      
      
         | 5526 | 
          | 
          | 
                 lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
  | 
      
      
         | 5527 | 
          | 
          | 
                 roundBitsMask = lastBitMask - 1;
  | 
      
      
         | 5528 | 
          | 
          | 
                 z = a;
  | 
      
      
         | 5529 | 
          | 
          | 
                 roundingMode = float_rounding_mode;
  | 
      
      
         | 5530 | 
          | 
          | 
                 if ( roundingMode == float_round_nearest_even ) {
  | 
      
      
         | 5531 | 
          | 
          | 
                     if ( lastBitMask ) {
  | 
      
      
         | 5532 | 
          | 
          | 
                         add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
  | 
      
      
         | 5533 | 
          | 
          | 
                         if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
  | 
      
      
         | 5534 | 
          | 
          | 
                     }
  | 
      
      
         | 5535 | 
          | 
          | 
                     else {
  | 
      
      
         | 5536 | 
          | 
          | 
                         if ( (sbits64) z.low < 0 ) {
  | 
      
      
         | 5537 | 
          | 
          | 
                             ++z.high;
  | 
      
      
         | 5538 | 
          | 
          | 
                             if ( (bits64) ( z.low<<1 ) == 0 ) z.high &= ~1;
  | 
      
      
         | 5539 | 
          | 
          | 
                         }
  | 
      
      
         | 5540 | 
          | 
          | 
                     }
  | 
      
      
         | 5541 | 
          | 
          | 
                 }
  | 
      
      
         | 5542 | 
          | 
          | 
                 else if ( roundingMode != float_round_to_zero ) {
  | 
      
      
         | 5543 | 
          | 
          | 
                     if (   extractFloat128Sign( z )
  | 
      
      
         | 5544 | 
          | 
          | 
                          ^ ( roundingMode == float_round_up ) ) {
  | 
      
      
         | 5545 | 
          | 
          | 
                         add128( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
  | 
      
      
         | 5546 | 
          | 
          | 
                     }
  | 
      
      
         | 5547 | 
          | 
          | 
                 }
  | 
      
      
         | 5548 | 
          | 
          | 
                 z.low &= ~ roundBitsMask;
  | 
      
      
         | 5549 | 
          | 
          | 
             }
  | 
      
      
         | 5550 | 
          | 
          | 
             else {
  | 
      
      
         | 5551 | 
          | 
          | 
                 if ( aExp < 0x3FFF ) {
  | 
      
      
         | 5552 | 
          | 
          | 
                     if ( ( ( (bits64) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
  | 
      
      
         | 5553 | 
          | 
          | 
                     float_exception_flags |= float_flag_inexact;
  | 
      
      
         | 5554 | 
          | 
          | 
                     aSign = extractFloat128Sign( a );
  | 
      
      
         | 5555 | 
          | 
          | 
                     switch ( float_rounding_mode ) {
  | 
      
      
         | 5556 | 
          | 
          | 
                      case float_round_nearest_even:
  | 
      
      
         | 5557 | 
          | 
          | 
                         if (    ( aExp == 0x3FFE )
  | 
      
      
         | 5558 | 
          | 
          | 
                              && (   extractFloat128Frac0( a )
  | 
      
      
         | 5559 | 
          | 
          | 
                                   | extractFloat128Frac1( a ) )
  | 
      
      
         | 5560 | 
          | 
          | 
                            ) {
  | 
      
      
         | 5561 | 
          | 
          | 
                             return packFloat128( aSign, 0x3FFF, 0, 0 );
  | 
      
      
         | 5562 | 
          | 
          | 
                         }
  | 
      
      
         | 5563 | 
          | 
          | 
                         break;
  | 
      
      
         | 5564 | 
          | 
          | 
                      case float_round_down:
  | 
      
      
         | 5565 | 
          | 
          | 
                         return
  | 
      
      
         | 5566 | 
          | 
          | 
                               aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
  | 
      
      
         | 5567 | 
          | 
          | 
                             : packFloat128( 0, 0, 0, 0 );
  | 
      
      
         | 5568 | 
          | 
          | 
                      case float_round_up:
  | 
      
      
         | 5569 | 
          | 
          | 
                         return
  | 
      
      
         | 5570 | 
          | 
          | 
                               aSign ? packFloat128( 1, 0, 0, 0 )
  | 
      
      
         | 5571 | 
          | 
          | 
                             : packFloat128( 0, 0x3FFF, 0, 0 );
  | 
      
      
         | 5572 | 
          | 
          | 
                     }
  | 
      
      
         | 5573 | 
          | 
          | 
                     return packFloat128( aSign, 0, 0, 0 );
  | 
      
      
         | 5574 | 
          | 
          | 
                 }
  | 
      
      
         | 5575 | 
          | 
          | 
                 lastBitMask = 1;
  | 
      
      
         | 5576 | 
          | 
          | 
                 lastBitMask <<= 0x402F - aExp;
  | 
      
      
         | 5577 | 
          | 
          | 
                 roundBitsMask = lastBitMask - 1;
  | 
      
      
         | 5578 | 
          | 
          | 
                 z.low = 0;
  | 
      
      
         | 5579 | 
          | 
          | 
                 z.high = a.high;
  | 
      
      
         | 5580 | 
          | 
          | 
                 roundingMode = float_rounding_mode;
  | 
      
      
         | 5581 | 
          | 
          | 
                 if ( roundingMode == float_round_nearest_even ) {
  | 
      
      
         | 5582 | 
          | 
          | 
                     z.high += lastBitMask>>1;
  | 
      
      
         | 5583 | 
          | 
          | 
                     if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
  | 
      
      
         | 5584 | 
          | 
          | 
                         z.high &= ~ lastBitMask;
  | 
      
      
         | 5585 | 
          | 
          | 
                     }
  | 
      
      
         | 5586 | 
          | 
          | 
                 }
  | 
      
      
         | 5587 | 
          | 
          | 
                 else if ( roundingMode != float_round_to_zero ) {
  | 
      
      
         | 5588 | 
          | 
          | 
                     if (   extractFloat128Sign( z )
  | 
      
      
         | 5589 | 
          | 
          | 
                          ^ ( roundingMode == float_round_up ) ) {
  | 
      
      
         | 5590 | 
          | 
          | 
                         z.high |= ( a.low != 0 );
  | 
      
      
         | 5591 | 
          | 
          | 
                         z.high += roundBitsMask;
  | 
      
      
         | 5592 | 
          | 
          | 
                     }
  | 
      
      
         | 5593 | 
          | 
          | 
                 }
  | 
      
      
         | 5594 | 
          | 
          | 
                 z.high &= ~ roundBitsMask;
  | 
      
      
         | 5595 | 
          | 
          | 
             }
  | 
      
      
         | 5596 | 
          | 
          | 
             if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
  | 
      
      
         | 5597 | 
          | 
          | 
                 float_exception_flags |= float_flag_inexact;
  | 
      
      
         | 5598 | 
          | 
          | 
             }
  | 
      
      
         | 5599 | 
          | 
          | 
             return z;
  | 
      
      
         | 5600 | 
          | 
          | 
          
  | 
      
      
         | 5601 | 
          | 
          | 
         }
  | 
      
      
         | 5602 | 
          | 
          | 
          
  | 
      
      
         | 5603 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 5604 | 
          | 
          | 
         | Returns the result of adding the absolute values of the quadruple-precision
  | 
      
      
         | 5605 | 
          | 
          | 
         | floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
  | 
      
      
         | 5606 | 
          | 
          | 
         | before being returned.  `zSign' is ignored if the result is a NaN.
  | 
      
      
         | 5607 | 
          | 
          | 
         | The addition is performed according to the IEC/IEEE Standard for Binary
  | 
      
      
         | 5608 | 
          | 
          | 
         | Floating-Point Arithmetic.
  | 
      
      
         | 5609 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 5610 | 
          | 
          | 
          
  | 
      
      
         | 5611 | 
          | 
          | 
         static float128 addFloat128Sigs( float128 a, float128 b, flag zSign )
  | 
      
      
         | 5612 | 
          | 
          | 
         {
  | 
      
      
         | 5613 | 
          | 
          | 
             int32 aExp, bExp, zExp;
  | 
      
      
         | 5614 | 
          | 
          | 
             bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
  | 
      
      
         | 5615 | 
          | 
          | 
             int32 expDiff;
  | 
      
      
         | 5616 | 
          | 
          | 
          
  | 
      
      
         | 5617 | 
          | 
          | 
             aSig1 = extractFloat128Frac1( a );
  | 
      
      
         | 5618 | 
          | 
          | 
             aSig0 = extractFloat128Frac0( a );
  | 
      
      
         | 5619 | 
          | 
          | 
             aExp = extractFloat128Exp( a );
  | 
      
      
         | 5620 | 
          | 
          | 
             bSig1 = extractFloat128Frac1( b );
  | 
      
      
         | 5621 | 
          | 
          | 
             bSig0 = extractFloat128Frac0( b );
  | 
      
      
         | 5622 | 
          | 
          | 
             bExp = extractFloat128Exp( b );
  | 
      
      
         | 5623 | 
          | 
          | 
             expDiff = aExp - bExp;
  | 
      
      
         | 5624 | 
          | 
          | 
             if ( 0 < expDiff ) {
  | 
      
      
         | 5625 | 
          | 
          | 
                 if ( aExp == 0x7FFF ) {
  | 
      
      
         | 5626 | 
          | 
          | 
                     if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
  | 
      
      
         | 5627 | 
          | 
          | 
                     return a;
  | 
      
      
         | 5628 | 
          | 
          | 
                 }
  | 
      
      
         | 5629 | 
          | 
          | 
                 if ( bExp == 0 ) {
  | 
      
      
         | 5630 | 
          | 
          | 
                     --expDiff;
  | 
      
      
         | 5631 | 
          | 
          | 
                 }
  | 
      
      
         | 5632 | 
          | 
          | 
                 else {
  | 
      
      
         | 5633 | 
          | 
          | 
                     bSig0 |= LIT64( 0x0001000000000000 );
  | 
      
      
         | 5634 | 
          | 
          | 
                 }
  | 
      
      
         | 5635 | 
          | 
          | 
                 shift128ExtraRightJamming(
  | 
      
      
         | 5636 | 
          | 
          | 
                     bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
  | 
      
      
         | 5637 | 
          | 
          | 
                 zExp = aExp;
  | 
      
      
         | 5638 | 
          | 
          | 
             }
  | 
      
      
         | 5639 | 
          | 
          | 
             else if ( expDiff < 0 ) {
  | 
      
      
         | 5640 | 
          | 
          | 
                 if ( bExp == 0x7FFF ) {
  | 
      
      
         | 5641 | 
          | 
          | 
                     if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
  | 
      
      
         | 5642 | 
          | 
          | 
                     return packFloat128( zSign, 0x7FFF, 0, 0 );
  | 
      
      
         | 5643 | 
          | 
          | 
                 }
  | 
      
      
         | 5644 | 
          | 
          | 
                 if ( aExp == 0 ) {
  | 
      
      
         | 5645 | 
          | 
          | 
                     ++expDiff;
  | 
      
      
         | 5646 | 
          | 
          | 
                 }
  | 
      
      
         | 5647 | 
          | 
          | 
                 else {
  | 
      
      
         | 5648 | 
          | 
          | 
                     aSig0 |= LIT64( 0x0001000000000000 );
  | 
      
      
         | 5649 | 
          | 
          | 
                 }
  | 
      
      
         | 5650 | 
          | 
          | 
                 shift128ExtraRightJamming(
  | 
      
      
         | 5651 | 
          | 
          | 
                     aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
  | 
      
      
         | 5652 | 
          | 
          | 
                 zExp = bExp;
  | 
      
      
         | 5653 | 
          | 
          | 
             }
  | 
      
      
         | 5654 | 
          | 
          | 
             else {
  | 
      
      
         | 5655 | 
          | 
          | 
                 if ( aExp == 0x7FFF ) {
  | 
      
      
         | 5656 | 
          | 
          | 
                     if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
  | 
      
      
         | 5657 | 
          | 
          | 
                         return propagateFloat128NaN( a, b );
  | 
      
      
         | 5658 | 
          | 
          | 
                     }
  | 
      
      
         | 5659 | 
          | 
          | 
                     return a;
  | 
      
      
         | 5660 | 
          | 
          | 
                 }
  | 
      
      
         | 5661 | 
          | 
          | 
                 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
  | 
      
      
         | 5662 | 
          | 
          | 
                 if ( aExp == 0 ) return packFloat128( zSign, 0, zSig0, zSig1 );
  | 
      
      
         | 5663 | 
          | 
          | 
                 zSig2 = 0;
  | 
      
      
         | 5664 | 
          | 
          | 
                 zSig0 |= LIT64( 0x0002000000000000 );
  | 
      
      
         | 5665 | 
          | 
          | 
                 zExp = aExp;
  | 
      
      
         | 5666 | 
          | 
          | 
                 goto shiftRight1;
  | 
      
      
         | 5667 | 
          | 
          | 
             }
  | 
      
      
         | 5668 | 
          | 
          | 
             aSig0 |= LIT64( 0x0001000000000000 );
  | 
      
      
         | 5669 | 
          | 
          | 
             add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
  | 
      
      
         | 5670 | 
          | 
          | 
             --zExp;
  | 
      
      
         | 5671 | 
          | 
          | 
             if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
  | 
      
      
         | 5672 | 
          | 
          | 
             ++zExp;
  | 
      
      
         | 5673 | 
          | 
          | 
          shiftRight1:
  | 
      
      
         | 5674 | 
          | 
          | 
             shift128ExtraRightJamming(
  | 
      
      
         | 5675 | 
          | 
          | 
                 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
  | 
      
      
         | 5676 | 
          | 
          | 
          roundAndPack:
  | 
      
      
         | 5677 | 
          | 
          | 
             return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
  | 
      
      
         | 5678 | 
          | 
          | 
          
  | 
      
      
         | 5679 | 
          | 
          | 
         }
  | 
      
      
         | 5680 | 
          | 
          | 
          
  | 
      
      
         | 5681 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 5682 | 
          | 
          | 
         | Returns the result of subtracting the absolute values of the quadruple-
  | 
      
      
         | 5683 | 
          | 
          | 
         | precision floating-point values `a' and `b'.  If `zSign' is 1, the
  | 
      
      
         | 5684 | 
          | 
          | 
         | difference is negated before being returned.  `zSign' is ignored if the
  | 
      
      
         | 5685 | 
          | 
          | 
         | result is a NaN.  The subtraction is performed according to the IEC/IEEE
  | 
      
      
         | 5686 | 
          | 
          | 
         | Standard for Binary Floating-Point Arithmetic.
  | 
      
      
         | 5687 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 5688 | 
          | 
          | 
          
  | 
      
      
         | 5689 | 
          | 
          | 
         static float128 subFloat128Sigs( float128 a, float128 b, flag zSign )
  | 
      
      
         | 5690 | 
          | 
          | 
         {
  | 
      
      
         | 5691 | 
          | 
          | 
             int32 aExp, bExp, zExp;
  | 
      
      
         | 5692 | 
          | 
          | 
             bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
  | 
      
      
         | 5693 | 
          | 
          | 
             int32 expDiff;
  | 
      
      
         | 5694 | 
          | 
          | 
             float128 z;
  | 
      
      
         | 5695 | 
          | 
          | 
          
  | 
      
      
         | 5696 | 
          | 
          | 
             aSig1 = extractFloat128Frac1( a );
  | 
      
      
         | 5697 | 
          | 
          | 
             aSig0 = extractFloat128Frac0( a );
  | 
      
      
         | 5698 | 
          | 
          | 
             aExp = extractFloat128Exp( a );
  | 
      
      
         | 5699 | 
          | 
          | 
             bSig1 = extractFloat128Frac1( b );
  | 
      
      
         | 5700 | 
          | 
          | 
             bSig0 = extractFloat128Frac0( b );
  | 
      
      
         | 5701 | 
          | 
          | 
             bExp = extractFloat128Exp( b );
  | 
      
      
         | 5702 | 
          | 
          | 
             expDiff = aExp - bExp;
  | 
      
      
         | 5703 | 
          | 
          | 
             shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
  | 
      
      
         | 5704 | 
          | 
          | 
             shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
  | 
      
      
         | 5705 | 
          | 
          | 
             if ( 0 < expDiff ) goto aExpBigger;
  | 
      
      
         | 5706 | 
          | 
          | 
             if ( expDiff < 0 ) goto bExpBigger;
  | 
      
      
         | 5707 | 
          | 
          | 
             if ( aExp == 0x7FFF ) {
  | 
      
      
         | 5708 | 
          | 
          | 
                 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
  | 
      
      
         | 5709 | 
          | 
          | 
                     return propagateFloat128NaN( a, b );
  | 
      
      
         | 5710 | 
          | 
          | 
                 }
  | 
      
      
         | 5711 | 
          | 
          | 
                 float_raise( float_flag_invalid );
  | 
      
      
         | 5712 | 
          | 
          | 
                 z.low = float128_default_nan_low;
  | 
      
      
         | 5713 | 
          | 
          | 
                 z.high = float128_default_nan_high;
  | 
      
      
         | 5714 | 
          | 
          | 
                 return z;
  | 
      
      
         | 5715 | 
          | 
          | 
             }
  | 
      
      
         | 5716 | 
          | 
          | 
             if ( aExp == 0 ) {
  | 
      
      
         | 5717 | 
          | 
          | 
                 aExp = 1;
  | 
      
      
         | 5718 | 
          | 
          | 
                 bExp = 1;
  | 
      
      
         | 5719 | 
          | 
          | 
             }
  | 
      
      
         | 5720 | 
          | 
          | 
             if ( bSig0 < aSig0 ) goto aBigger;
  | 
      
      
         | 5721 | 
          | 
          | 
             if ( aSig0 < bSig0 ) goto bBigger;
  | 
      
      
         | 5722 | 
          | 
          | 
             if ( bSig1 < aSig1 ) goto aBigger;
  | 
      
      
         | 5723 | 
          | 
          | 
             if ( aSig1 < bSig1 ) goto bBigger;
  | 
      
      
         | 5724 | 
          | 
          | 
             return packFloat128( float_rounding_mode == float_round_down, 0, 0, 0 );
  | 
      
      
         | 5725 | 
          | 
          | 
          bExpBigger:
  | 
      
      
         | 5726 | 
          | 
          | 
             if ( bExp == 0x7FFF ) {
  | 
      
      
         | 5727 | 
          | 
          | 
                 if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
  | 
      
      
         | 5728 | 
          | 
          | 
                 return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
  | 
      
      
         | 5729 | 
          | 
          | 
             }
  | 
      
      
         | 5730 | 
          | 
          | 
             if ( aExp == 0 ) {
  | 
      
      
         | 5731 | 
          | 
          | 
                 ++expDiff;
  | 
      
      
         | 5732 | 
          | 
          | 
             }
  | 
      
      
         | 5733 | 
          | 
          | 
             else {
  | 
      
      
         | 5734 | 
          | 
          | 
                 aSig0 |= LIT64( 0x4000000000000000 );
  | 
      
      
         | 5735 | 
          | 
          | 
             }
  | 
      
      
         | 5736 | 
          | 
          | 
             shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
  | 
      
      
         | 5737 | 
          | 
          | 
             bSig0 |= LIT64( 0x4000000000000000 );
  | 
      
      
         | 5738 | 
          | 
          | 
          bBigger:
  | 
      
      
         | 5739 | 
          | 
          | 
             sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
  | 
      
      
         | 5740 | 
          | 
          | 
             zExp = bExp;
  | 
      
      
         | 5741 | 
          | 
          | 
             zSign ^= 1;
  | 
      
      
         | 5742 | 
          | 
          | 
             goto normalizeRoundAndPack;
  | 
      
      
         | 5743 | 
          | 
          | 
          aExpBigger:
  | 
      
      
         | 5744 | 
          | 
          | 
             if ( aExp == 0x7FFF ) {
  | 
      
      
         | 5745 | 
          | 
          | 
                 if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
  | 
      
      
         | 5746 | 
          | 
          | 
                 return a;
  | 
      
      
         | 5747 | 
          | 
          | 
             }
  | 
      
      
         | 5748 | 
          | 
          | 
             if ( bExp == 0 ) {
  | 
      
      
         | 5749 | 
          | 
          | 
                 --expDiff;
  | 
      
      
         | 5750 | 
          | 
          | 
             }
  | 
      
      
         | 5751 | 
          | 
          | 
             else {
  | 
      
      
         | 5752 | 
          | 
          | 
                 bSig0 |= LIT64( 0x4000000000000000 );
  | 
      
      
         | 5753 | 
          | 
          | 
             }
  | 
      
      
         | 5754 | 
          | 
          | 
             shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
  | 
      
      
         | 5755 | 
          | 
          | 
             aSig0 |= LIT64( 0x4000000000000000 );
  | 
      
      
         | 5756 | 
          | 
          | 
          aBigger:
  | 
      
      
         | 5757 | 
          | 
          | 
             sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
  | 
      
      
         | 5758 | 
          | 
          | 
             zExp = aExp;
  | 
      
      
         | 5759 | 
          | 
          | 
          normalizeRoundAndPack:
  | 
      
      
         | 5760 | 
          | 
          | 
             --zExp;
  | 
      
      
         | 5761 | 
          | 
          | 
             return normalizeRoundAndPackFloat128( zSign, zExp - 14, zSig0, zSig1 );
  | 
      
      
         | 5762 | 
          | 
          | 
          
  | 
      
      
         | 5763 | 
          | 
          | 
         }
  | 
      
      
         | 5764 | 
          | 
          | 
          
  | 
      
      
         | 5765 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 5766 | 
          | 
          | 
         | Returns the result of adding the quadruple-precision floating-point values
  | 
      
      
         | 5767 | 
          | 
          | 
         | `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
  | 
      
      
         | 5768 | 
          | 
          | 
         | for Binary Floating-Point Arithmetic.
  | 
      
      
         | 5769 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 5770 | 
          | 
          | 
          
  | 
      
      
         | 5771 | 
          | 
          | 
         float128 float128_add( float128 a, float128 b )
  | 
      
      
         | 5772 | 
          | 
          | 
         {
  | 
      
      
         | 5773 | 
          | 
          | 
             flag aSign, bSign;
  | 
      
      
         | 5774 | 
          | 
          | 
          
  | 
      
      
         | 5775 | 
          | 
          | 
             aSign = extractFloat128Sign( a );
  | 
      
      
         | 5776 | 
          | 
          | 
             bSign = extractFloat128Sign( b );
  | 
      
      
         | 5777 | 
          | 
          | 
             if ( aSign == bSign ) {
  | 
      
      
         | 5778 | 
          | 
          | 
                 return addFloat128Sigs( a, b, aSign );
  | 
      
      
         | 5779 | 
          | 
          | 
             }
  | 
      
      
         | 5780 | 
          | 
          | 
             else {
  | 
      
      
         | 5781 | 
          | 
          | 
                 return subFloat128Sigs( a, b, aSign );
  | 
      
      
         | 5782 | 
          | 
          | 
             }
  | 
      
      
         | 5783 | 
          | 
          | 
          
  | 
      
      
         | 5784 | 
          | 
          | 
         }
  | 
      
      
         | 5785 | 
          | 
          | 
          
  | 
      
      
         | 5786 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 5787 | 
          | 
          | 
         | Returns the result of subtracting the quadruple-precision floating-point
  | 
      
      
         | 5788 | 
          | 
          | 
         | values `a' and `b'.  The operation is performed according to the IEC/IEEE
  | 
      
      
         | 5789 | 
          | 
          | 
         | Standard for Binary Floating-Point Arithmetic.
  | 
      
      
         | 5790 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 5791 | 
          | 
          | 
          
  | 
      
      
         | 5792 | 
          | 
          | 
         float128 float128_sub( float128 a, float128 b )
  | 
      
      
         | 5793 | 
          | 
          | 
         {
  | 
      
      
         | 5794 | 
          | 
          | 
             flag aSign, bSign;
  | 
      
      
         | 5795 | 
          | 
          | 
          
  | 
      
      
         | 5796 | 
          | 
          | 
             aSign = extractFloat128Sign( a );
  | 
      
      
         | 5797 | 
          | 
          | 
             bSign = extractFloat128Sign( b );
  | 
      
      
         | 5798 | 
          | 
          | 
             if ( aSign == bSign ) {
  | 
      
      
         | 5799 | 
          | 
          | 
                 return subFloat128Sigs( a, b, aSign );
  | 
      
      
         | 5800 | 
          | 
          | 
             }
  | 
      
      
         | 5801 | 
          | 
          | 
             else {
  | 
      
      
         | 5802 | 
          | 
          | 
                 return addFloat128Sigs( a, b, aSign );
  | 
      
      
         | 5803 | 
          | 
          | 
             }
  | 
      
      
         | 5804 | 
          | 
          | 
          
  | 
      
      
         | 5805 | 
          | 
          | 
         }
  | 
      
      
         | 5806 | 
          | 
          | 
          
  | 
      
      
         | 5807 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 5808 | 
          | 
          | 
         | Returns the result of multiplying the quadruple-precision floating-point
  | 
      
      
         | 5809 | 
          | 
          | 
         | values `a' and `b'.  The operation is performed according to the IEC/IEEE
  | 
      
      
         | 5810 | 
          | 
          | 
         | Standard for Binary Floating-Point Arithmetic.
  | 
      
      
         | 5811 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 5812 | 
          | 
          | 
          
  | 
      
      
         | 5813 | 
          | 
          | 
         float128 float128_mul( float128 a, float128 b )
  | 
      
      
         | 5814 | 
          | 
          | 
         {
  | 
      
      
         | 5815 | 
          | 
          | 
             flag aSign, bSign, zSign;
  | 
      
      
         | 5816 | 
          | 
          | 
             int32 aExp, bExp, zExp;
  | 
      
      
         | 5817 | 
          | 
          | 
             bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
  | 
      
      
         | 5818 | 
          | 
          | 
             float128 z;
  | 
      
      
         | 5819 | 
          | 
          | 
          
  | 
      
      
         | 5820 | 
          | 
          | 
             aSig1 = extractFloat128Frac1( a );
  | 
      
      
         | 5821 | 
          | 
          | 
             aSig0 = extractFloat128Frac0( a );
  | 
      
      
         | 5822 | 
          | 
          | 
             aExp = extractFloat128Exp( a );
  | 
      
      
         | 5823 | 
          | 
          | 
             aSign = extractFloat128Sign( a );
  | 
      
      
         | 5824 | 
          | 
          | 
             bSig1 = extractFloat128Frac1( b );
  | 
      
      
         | 5825 | 
          | 
          | 
             bSig0 = extractFloat128Frac0( b );
  | 
      
      
         | 5826 | 
          | 
          | 
             bExp = extractFloat128Exp( b );
  | 
      
      
         | 5827 | 
          | 
          | 
             bSign = extractFloat128Sign( b );
  | 
      
      
         | 5828 | 
          | 
          | 
             zSign = aSign ^ bSign;
  | 
      
      
         | 5829 | 
          | 
          | 
             if ( aExp == 0x7FFF ) {
  | 
      
      
         | 5830 | 
          | 
          | 
                 if (    ( aSig0 | aSig1 )
  | 
      
      
         | 5831 | 
          | 
          | 
                      || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
  | 
      
      
         | 5832 | 
          | 
          | 
                     return propagateFloat128NaN( a, b );
  | 
      
      
         | 5833 | 
          | 
          | 
                 }
  | 
      
      
         | 5834 | 
          | 
          | 
                 if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
  | 
      
      
         | 5835 | 
          | 
          | 
                 return packFloat128( zSign, 0x7FFF, 0, 0 );
  | 
      
      
         | 5836 | 
          | 
          | 
             }
  | 
      
      
         | 5837 | 
          | 
          | 
             if ( bExp == 0x7FFF ) {
  | 
      
      
         | 5838 | 
          | 
          | 
                 if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
  | 
      
      
         | 5839 | 
          | 
          | 
                 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
  | 
      
      
         | 5840 | 
          | 
          | 
          invalid:
  | 
      
      
         | 5841 | 
          | 
          | 
                     float_raise( float_flag_invalid );
  | 
      
      
         | 5842 | 
          | 
          | 
                     z.low = float128_default_nan_low;
  | 
      
      
         | 5843 | 
          | 
          | 
                     z.high = float128_default_nan_high;
  | 
      
      
         | 5844 | 
          | 
          | 
                     return z;
  | 
      
      
         | 5845 | 
          | 
          | 
                 }
  | 
      
      
         | 5846 | 
          | 
          | 
                 return packFloat128( zSign, 0x7FFF, 0, 0 );
  | 
      
      
         | 5847 | 
          | 
          | 
             }
  | 
      
      
         | 5848 | 
          | 
          | 
             if ( aExp == 0 ) {
  | 
      
      
         | 5849 | 
          | 
          | 
                 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
  | 
      
      
         | 5850 | 
          | 
          | 
                 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
  | 
      
      
         | 5851 | 
          | 
          | 
             }
  | 
      
      
         | 5852 | 
          | 
          | 
             if ( bExp == 0 ) {
  | 
      
      
         | 5853 | 
          | 
          | 
                 if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
  | 
      
      
         | 5854 | 
          | 
          | 
                 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
  | 
      
      
         | 5855 | 
          | 
          | 
             }
  | 
      
      
         | 5856 | 
          | 
          | 
             zExp = aExp + bExp - 0x4000;
  | 
      
      
         | 5857 | 
          | 
          | 
             aSig0 |= LIT64( 0x0001000000000000 );
  | 
      
      
         | 5858 | 
          | 
          | 
             shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
  | 
      
      
         | 5859 | 
          | 
          | 
             mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
  | 
      
      
         | 5860 | 
          | 
          | 
             add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
  | 
      
      
         | 5861 | 
          | 
          | 
             zSig2 |= ( zSig3 != 0 );
  | 
      
      
         | 5862 | 
          | 
          | 
             if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
  | 
      
      
         | 5863 | 
          | 
          | 
                 shift128ExtraRightJamming(
  | 
      
      
         | 5864 | 
          | 
          | 
                     zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
  | 
      
      
         | 5865 | 
          | 
          | 
                 ++zExp;
  | 
      
      
         | 5866 | 
          | 
          | 
             }
  | 
      
      
         | 5867 | 
          | 
          | 
             return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
  | 
      
      
         | 5868 | 
          | 
          | 
          
  | 
      
      
         | 5869 | 
          | 
          | 
         }
  | 
      
      
         | 5870 | 
          | 
          | 
          
  | 
      
      
         | 5871 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 5872 | 
          | 
          | 
         | Returns the result of dividing the quadruple-precision floating-point value
  | 
      
      
         | 5873 | 
          | 
          | 
         | `a' by the corresponding value `b'.  The operation is performed according to
  | 
      
      
         | 5874 | 
          | 
          | 
         | the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  | 
      
      
         | 5875 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 5876 | 
          | 
          | 
          
  | 
      
      
         | 5877 | 
          | 
          | 
         float128 float128_div( float128 a, float128 b )
  | 
      
      
         | 5878 | 
          | 
          | 
         {
  | 
      
      
         | 5879 | 
          | 
          | 
             flag aSign, bSign, zSign;
  | 
      
      
         | 5880 | 
          | 
          | 
             int32 aExp, bExp, zExp;
  | 
      
      
         | 5881 | 
          | 
          | 
             bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
  | 
      
      
         | 5882 | 
          | 
          | 
             bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
  | 
      
      
         | 5883 | 
          | 
          | 
             float128 z;
  | 
      
      
         | 5884 | 
          | 
          | 
          
  | 
      
      
         | 5885 | 
          | 
          | 
             aSig1 = extractFloat128Frac1( a );
  | 
      
      
         | 5886 | 
          | 
          | 
             aSig0 = extractFloat128Frac0( a );
  | 
      
      
         | 5887 | 
          | 
          | 
             aExp = extractFloat128Exp( a );
  | 
      
      
         | 5888 | 
          | 
          | 
             aSign = extractFloat128Sign( a );
  | 
      
      
         | 5889 | 
          | 
          | 
             bSig1 = extractFloat128Frac1( b );
  | 
      
      
         | 5890 | 
          | 
          | 
             bSig0 = extractFloat128Frac0( b );
  | 
      
      
         | 5891 | 
          | 
          | 
             bExp = extractFloat128Exp( b );
  | 
      
      
         | 5892 | 
          | 
          | 
             bSign = extractFloat128Sign( b );
  | 
      
      
         | 5893 | 
          | 
          | 
             zSign = aSign ^ bSign;
  | 
      
      
         | 5894 | 
          | 
          | 
             if ( aExp == 0x7FFF ) {
  | 
      
      
         | 5895 | 
          | 
          | 
                 if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
  | 
      
      
         | 5896 | 
          | 
          | 
                 if ( bExp == 0x7FFF ) {
  | 
      
      
         | 5897 | 
          | 
          | 
                     if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
  | 
      
      
         | 5898 | 
          | 
          | 
                     goto invalid;
  | 
      
      
         | 5899 | 
          | 
          | 
                 }
  | 
      
      
         | 5900 | 
          | 
          | 
                 return packFloat128( zSign, 0x7FFF, 0, 0 );
  | 
      
      
         | 5901 | 
          | 
          | 
             }
  | 
      
      
         | 5902 | 
          | 
          | 
             if ( bExp == 0x7FFF ) {
  | 
      
      
         | 5903 | 
          | 
          | 
                 if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
  | 
      
      
         | 5904 | 
          | 
          | 
                 return packFloat128( zSign, 0, 0, 0 );
  | 
      
      
         | 5905 | 
          | 
          | 
             }
  | 
      
      
         | 5906 | 
          | 
          | 
             if ( bExp == 0 ) {
  | 
      
      
         | 5907 | 
          | 
          | 
                 if ( ( bSig0 | bSig1 ) == 0 ) {
  | 
      
      
         | 5908 | 
          | 
          | 
                     if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
  | 
      
      
         | 5909 | 
          | 
          | 
          invalid:
  | 
      
      
         | 5910 | 
          | 
          | 
                         float_raise( float_flag_invalid );
  | 
      
      
         | 5911 | 
          | 
          | 
                         z.low = float128_default_nan_low;
  | 
      
      
         | 5912 | 
          | 
          | 
                         z.high = float128_default_nan_high;
  | 
      
      
         | 5913 | 
          | 
          | 
                         return z;
  | 
      
      
         | 5914 | 
          | 
          | 
                     }
  | 
      
      
         | 5915 | 
          | 
          | 
                     float_raise( float_flag_divbyzero );
  | 
      
      
         | 5916 | 
          | 
          | 
                     return packFloat128( zSign, 0x7FFF, 0, 0 );
  | 
      
      
         | 5917 | 
          | 
          | 
                 }
  | 
      
      
         | 5918 | 
          | 
          | 
                 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
  | 
      
      
         | 5919 | 
          | 
          | 
             }
  | 
      
      
         | 5920 | 
          | 
          | 
             if ( aExp == 0 ) {
  | 
      
      
         | 5921 | 
          | 
          | 
                 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
  | 
      
      
         | 5922 | 
          | 
          | 
                 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
  | 
      
      
         | 5923 | 
          | 
          | 
             }
  | 
      
      
         | 5924 | 
          | 
          | 
             zExp = aExp - bExp + 0x3FFD;
  | 
      
      
         | 5925 | 
          | 
          | 
             shortShift128Left(
  | 
      
      
         | 5926 | 
          | 
          | 
                 aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
  | 
      
      
         | 5927 | 
          | 
          | 
             shortShift128Left(
  | 
      
      
         | 5928 | 
          | 
          | 
                 bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
  | 
      
      
         | 5929 | 
          | 
          | 
             if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
  | 
      
      
         | 5930 | 
          | 
          | 
                 shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
  | 
      
      
         | 5931 | 
          | 
          | 
                 ++zExp;
  | 
      
      
         | 5932 | 
          | 
          | 
             }
  | 
      
      
         | 5933 | 
          | 
          | 
             zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
  | 
      
      
         | 5934 | 
          | 
          | 
             mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
  | 
      
      
         | 5935 | 
          | 
          | 
             sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
  | 
      
      
         | 5936 | 
          | 
          | 
             while ( (sbits64) rem0 < 0 ) {
  | 
      
      
         | 5937 | 
          | 
          | 
                 --zSig0;
  | 
      
      
         | 5938 | 
          | 
          | 
                 add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
  | 
      
      
         | 5939 | 
          | 
          | 
             }
  | 
      
      
         | 5940 | 
          | 
          | 
             zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
  | 
      
      
         | 5941 | 
          | 
          | 
             if ( ( zSig1 & 0x3FFF ) <= 4 ) {
  | 
      
      
         | 5942 | 
          | 
          | 
                 mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
  | 
      
      
         | 5943 | 
          | 
          | 
                 sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
  | 
      
      
         | 5944 | 
          | 
          | 
                 while ( (sbits64) rem1 < 0 ) {
  | 
      
      
         | 5945 | 
          | 
          | 
                     --zSig1;
  | 
      
      
         | 5946 | 
          | 
          | 
                     add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
  | 
      
      
         | 5947 | 
          | 
          | 
                 }
  | 
      
      
         | 5948 | 
          | 
          | 
                 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
  | 
      
      
         | 5949 | 
          | 
          | 
             }
  | 
      
      
         | 5950 | 
          | 
          | 
             shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
  | 
      
      
         | 5951 | 
          | 
          | 
             return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
  | 
      
      
         | 5952 | 
          | 
          | 
          
  | 
      
      
         | 5953 | 
          | 
          | 
         }
  | 
      
      
         | 5954 | 
          | 
          | 
          
  | 
      
      
         | 5955 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 5956 | 
          | 
          | 
         | Returns the remainder of the quadruple-precision floating-point value `a'
  | 
      
      
         | 5957 | 
          | 
          | 
         | with respect to the corresponding value `b'.  The operation is performed
  | 
      
      
         | 5958 | 
          | 
          | 
         | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  | 
      
      
         | 5959 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 5960 | 
          | 
          | 
          
  | 
      
      
         | 5961 | 
          | 
          | 
         float128 float128_rem( float128 a, float128 b )
  | 
      
      
         | 5962 | 
          | 
          | 
         {
  | 
      
      
         | 5963 | 
          | 
          | 
             flag aSign, bSign, zSign;
  | 
      
      
         | 5964 | 
          | 
          | 
             int32 aExp, bExp, expDiff;
  | 
      
      
         | 5965 | 
          | 
          | 
             bits64 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
  | 
      
      
         | 5966 | 
          | 
          | 
             bits64 allZero, alternateASig0, alternateASig1, sigMean1;
  | 
      
      
         | 5967 | 
          | 
          | 
             sbits64 sigMean0;
  | 
      
      
         | 5968 | 
          | 
          | 
             float128 z;
  | 
      
      
         | 5969 | 
          | 
          | 
          
  | 
      
      
         | 5970 | 
          | 
          | 
             aSig1 = extractFloat128Frac1( a );
  | 
      
      
         | 5971 | 
          | 
          | 
             aSig0 = extractFloat128Frac0( a );
  | 
      
      
         | 5972 | 
          | 
          | 
             aExp = extractFloat128Exp( a );
  | 
      
      
         | 5973 | 
          | 
          | 
             aSign = extractFloat128Sign( a );
  | 
      
      
         | 5974 | 
          | 
          | 
             bSig1 = extractFloat128Frac1( b );
  | 
      
      
         | 5975 | 
          | 
          | 
             bSig0 = extractFloat128Frac0( b );
  | 
      
      
         | 5976 | 
          | 
          | 
             bExp = extractFloat128Exp( b );
  | 
      
      
         | 5977 | 
          | 
          | 
             bSign = extractFloat128Sign( b );
  | 
      
      
         | 5978 | 
          | 
          | 
             if ( aExp == 0x7FFF ) {
  | 
      
      
         | 5979 | 
          | 
          | 
                 if (    ( aSig0 | aSig1 )
  | 
      
      
         | 5980 | 
          | 
          | 
                      || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
  | 
      
      
         | 5981 | 
          | 
          | 
                     return propagateFloat128NaN( a, b );
  | 
      
      
         | 5982 | 
          | 
          | 
                 }
  | 
      
      
         | 5983 | 
          | 
          | 
                 goto invalid;
  | 
      
      
         | 5984 | 
          | 
          | 
             }
  | 
      
      
         | 5985 | 
          | 
          | 
             if ( bExp == 0x7FFF ) {
  | 
      
      
         | 5986 | 
          | 
          | 
                 if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
  | 
      
      
         | 5987 | 
          | 
          | 
                 return a;
  | 
      
      
         | 5988 | 
          | 
          | 
             }
  | 
      
      
         | 5989 | 
          | 
          | 
             if ( bExp == 0 ) {
  | 
      
      
         | 5990 | 
          | 
          | 
                 if ( ( bSig0 | bSig1 ) == 0 ) {
  | 
      
      
         | 5991 | 
          | 
          | 
          invalid:
  | 
      
      
         | 5992 | 
          | 
          | 
                     float_raise( float_flag_invalid );
  | 
      
      
         | 5993 | 
          | 
          | 
                     z.low = float128_default_nan_low;
  | 
      
      
         | 5994 | 
          | 
          | 
                     z.high = float128_default_nan_high;
  | 
      
      
         | 5995 | 
          | 
          | 
                     return z;
  | 
      
      
         | 5996 | 
          | 
          | 
                 }
  | 
      
      
         | 5997 | 
          | 
          | 
                 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
  | 
      
      
         | 5998 | 
          | 
          | 
             }
  | 
      
      
         | 5999 | 
          | 
          | 
             if ( aExp == 0 ) {
  | 
      
      
         | 6000 | 
          | 
          | 
                 if ( ( aSig0 | aSig1 ) == 0 ) return a;
  | 
      
      
         | 6001 | 
          | 
          | 
                 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
  | 
      
      
         | 6002 | 
          | 
          | 
             }
  | 
      
      
         | 6003 | 
          | 
          | 
             expDiff = aExp - bExp;
  | 
      
      
         | 6004 | 
          | 
          | 
             if ( expDiff < -1 ) return a;
  | 
      
      
         | 6005 | 
          | 
          | 
             shortShift128Left(
  | 
      
      
         | 6006 | 
          | 
          | 
                 aSig0 | LIT64( 0x0001000000000000 ),
  | 
      
      
         | 6007 | 
          | 
          | 
                 aSig1,
  | 
      
      
         | 6008 | 
          | 
          | 
                 15 - ( expDiff < 0 ),
  | 
      
      
         | 6009 | 
          | 
          | 
                 &aSig0,
  | 
      
      
         | 6010 | 
          | 
          | 
                 &aSig1
  | 
      
      
         | 6011 | 
          | 
          | 
             );
  | 
      
      
         | 6012 | 
          | 
          | 
             shortShift128Left(
  | 
      
      
         | 6013 | 
          | 
          | 
                 bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
  | 
      
      
         | 6014 | 
          | 
          | 
             q = le128( bSig0, bSig1, aSig0, aSig1 );
  | 
      
      
         | 6015 | 
          | 
          | 
             if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
  | 
      
      
         | 6016 | 
          | 
          | 
             expDiff -= 64;
  | 
      
      
         | 6017 | 
          | 
          | 
             while ( 0 < expDiff ) {
  | 
      
      
         | 6018 | 
          | 
          | 
                 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
  | 
      
      
         | 6019 | 
          | 
          | 
                 q = ( 4 < q ) ? q - 4 : 0;
  | 
      
      
         | 6020 | 
          | 
          | 
                 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
  | 
      
      
         | 6021 | 
          | 
          | 
                 shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
  | 
      
      
         | 6022 | 
          | 
          | 
                 shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
  | 
      
      
         | 6023 | 
          | 
          | 
                 sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
  | 
      
      
         | 6024 | 
          | 
          | 
                 expDiff -= 61;
  | 
      
      
         | 6025 | 
          | 
          | 
             }
  | 
      
      
         | 6026 | 
          | 
          | 
             if ( -64 < expDiff ) {
  | 
      
      
         | 6027 | 
          | 
          | 
                 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
  | 
      
      
         | 6028 | 
          | 
          | 
                 q = ( 4 < q ) ? q - 4 : 0;
  | 
      
      
         | 6029 | 
          | 
          | 
                 q >>= - expDiff;
  | 
      
      
         | 6030 | 
          | 
          | 
                 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
  | 
      
      
         | 6031 | 
          | 
          | 
                 expDiff += 52;
  | 
      
      
         | 6032 | 
          | 
          | 
                 if ( expDiff < 0 ) {
  | 
      
      
         | 6033 | 
          | 
          | 
                     shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
  | 
      
      
         | 6034 | 
          | 
          | 
                 }
  | 
      
      
         | 6035 | 
          | 
          | 
                 else {
  | 
      
      
         | 6036 | 
          | 
          | 
                     shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
  | 
      
      
         | 6037 | 
          | 
          | 
                 }
  | 
      
      
         | 6038 | 
          | 
          | 
                 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
  | 
      
      
         | 6039 | 
          | 
          | 
                 sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
  | 
      
      
         | 6040 | 
          | 
          | 
             }
  | 
      
      
         | 6041 | 
          | 
          | 
             else {
  | 
      
      
         | 6042 | 
          | 
          | 
                 shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
  | 
      
      
         | 6043 | 
          | 
          | 
                 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
  | 
      
      
         | 6044 | 
          | 
          | 
             }
  | 
      
      
         | 6045 | 
          | 
          | 
             do {
  | 
      
      
         | 6046 | 
          | 
          | 
                 alternateASig0 = aSig0;
  | 
      
      
         | 6047 | 
          | 
          | 
                 alternateASig1 = aSig1;
  | 
      
      
         | 6048 | 
          | 
          | 
                 ++q;
  | 
      
      
         | 6049 | 
          | 
          | 
                 sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
  | 
      
      
         | 6050 | 
          | 
          | 
             } while ( 0 <= (sbits64) aSig0 );
  | 
      
      
         | 6051 | 
          | 
          | 
             add128(
  | 
      
      
         | 6052 | 
          | 
          | 
                 aSig0, aSig1, alternateASig0, alternateASig1, &sigMean0, &sigMean1 );
  | 
      
      
         | 6053 | 
          | 
          | 
             if (    ( sigMean0 < 0 )
  | 
      
      
         | 6054 | 
          | 
          | 
                  || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
  | 
      
      
         | 6055 | 
          | 
          | 
                 aSig0 = alternateASig0;
  | 
      
      
         | 6056 | 
          | 
          | 
                 aSig1 = alternateASig1;
  | 
      
      
         | 6057 | 
          | 
          | 
             }
  | 
      
      
         | 6058 | 
          | 
          | 
             zSign = ( (sbits64) aSig0 < 0 );
  | 
      
      
         | 6059 | 
          | 
          | 
             if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
  | 
      
      
         | 6060 | 
          | 
          | 
             return
  | 
      
      
         | 6061 | 
          | 
          | 
                 normalizeRoundAndPackFloat128( aSign ^ zSign, bExp - 4, aSig0, aSig1 );
  | 
      
      
         | 6062 | 
          | 
          | 
          
  | 
      
      
         | 6063 | 
          | 
          | 
         }
  | 
      
      
         | 6064 | 
          | 
          | 
          
  | 
      
      
         | 6065 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 6066 | 
          | 
          | 
         | Returns the square root of the quadruple-precision floating-point value `a'.
  | 
      
      
         | 6067 | 
          | 
          | 
         | The operation is performed according to the IEC/IEEE Standard for Binary
  | 
      
      
         | 6068 | 
          | 
          | 
         | Floating-Point Arithmetic.
  | 
      
      
         | 6069 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 6070 | 
          | 
          | 
          
  | 
      
      
         | 6071 | 
          | 
          | 
         float128 float128_sqrt( float128 a )
  | 
      
      
         | 6072 | 
          | 
          | 
         {
  | 
      
      
         | 6073 | 
          | 
          | 
             flag aSign;
  | 
      
      
         | 6074 | 
          | 
          | 
             int32 aExp, zExp;
  | 
      
      
         | 6075 | 
          | 
          | 
             bits64 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
  | 
      
      
         | 6076 | 
          | 
          | 
             bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
  | 
      
      
         | 6077 | 
          | 
          | 
             float128 z;
  | 
      
      
         | 6078 | 
          | 
          | 
          
  | 
      
      
         | 6079 | 
          | 
          | 
             aSig1 = extractFloat128Frac1( a );
  | 
      
      
         | 6080 | 
          | 
          | 
             aSig0 = extractFloat128Frac0( a );
  | 
      
      
         | 6081 | 
          | 
          | 
             aExp = extractFloat128Exp( a );
  | 
      
      
         | 6082 | 
          | 
          | 
             aSign = extractFloat128Sign( a );
  | 
      
      
         | 6083 | 
          | 
          | 
             if ( aExp == 0x7FFF ) {
  | 
      
      
         | 6084 | 
          | 
          | 
                 if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, a );
  | 
      
      
         | 6085 | 
          | 
          | 
                 if ( ! aSign ) return a;
  | 
      
      
         | 6086 | 
          | 
          | 
                 goto invalid;
  | 
      
      
         | 6087 | 
          | 
          | 
             }
  | 
      
      
         | 6088 | 
          | 
          | 
             if ( aSign ) {
  | 
      
      
         | 6089 | 
          | 
          | 
                 if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
  | 
      
      
         | 6090 | 
          | 
          | 
          invalid:
  | 
      
      
         | 6091 | 
          | 
          | 
                 float_raise( float_flag_invalid );
  | 
      
      
         | 6092 | 
          | 
          | 
                 z.low = float128_default_nan_low;
  | 
      
      
         | 6093 | 
          | 
          | 
                 z.high = float128_default_nan_high;
  | 
      
      
         | 6094 | 
          | 
          | 
                 return z;
  | 
      
      
         | 6095 | 
          | 
          | 
             }
  | 
      
      
         | 6096 | 
          | 
          | 
             if ( aExp == 0 ) {
  | 
      
      
         | 6097 | 
          | 
          | 
                 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
  | 
      
      
         | 6098 | 
          | 
          | 
                 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
  | 
      
      
         | 6099 | 
          | 
          | 
             }
  | 
      
      
         | 6100 | 
          | 
          | 
             zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
  | 
      
      
         | 6101 | 
          | 
          | 
             aSig0 |= LIT64( 0x0001000000000000 );
  | 
      
      
         | 6102 | 
          | 
          | 
             zSig0 = estimateSqrt32( aExp, aSig0>>17 );
  | 
      
      
         | 6103 | 
          | 
          | 
             shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
  | 
      
      
         | 6104 | 
          | 
          | 
             zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
  | 
      
      
         | 6105 | 
          | 
          | 
             doubleZSig0 = zSig0<<1;
  | 
      
      
         | 6106 | 
          | 
          | 
             mul64To128( zSig0, zSig0, &term0, &term1 );
  | 
      
      
         | 6107 | 
          | 
          | 
             sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
  | 
      
      
         | 6108 | 
          | 
          | 
             while ( (sbits64) rem0 < 0 ) {
  | 
      
      
         | 6109 | 
          | 
          | 
                 --zSig0;
  | 
      
      
         | 6110 | 
          | 
          | 
                 doubleZSig0 -= 2;
  | 
      
      
         | 6111 | 
          | 
          | 
                 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
  | 
      
      
         | 6112 | 
          | 
          | 
             }
  | 
      
      
         | 6113 | 
          | 
          | 
             zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
  | 
      
      
         | 6114 | 
          | 
          | 
             if ( ( zSig1 & 0x1FFF ) <= 5 ) {
  | 
      
      
         | 6115 | 
          | 
          | 
                 if ( zSig1 == 0 ) zSig1 = 1;
  | 
      
      
         | 6116 | 
          | 
          | 
                 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
  | 
      
      
         | 6117 | 
          | 
          | 
                 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
  | 
      
      
         | 6118 | 
          | 
          | 
                 mul64To128( zSig1, zSig1, &term2, &term3 );
  | 
      
      
         | 6119 | 
          | 
          | 
                 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
  | 
      
      
         | 6120 | 
          | 
          | 
                 while ( (sbits64) rem1 < 0 ) {
  | 
      
      
         | 6121 | 
          | 
          | 
                     --zSig1;
  | 
      
      
         | 6122 | 
          | 
          | 
                     shortShift128Left( 0, zSig1, 1, &term2, &term3 );
  | 
      
      
         | 6123 | 
          | 
          | 
                     term3 |= 1;
  | 
      
      
         | 6124 | 
          | 
          | 
                     term2 |= doubleZSig0;
  | 
      
      
         | 6125 | 
          | 
          | 
                     add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
  | 
      
      
         | 6126 | 
          | 
          | 
                 }
  | 
      
      
         | 6127 | 
          | 
          | 
                 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
  | 
      
      
         | 6128 | 
          | 
          | 
             }
  | 
      
      
         | 6129 | 
          | 
          | 
             shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
  | 
      
      
         | 6130 | 
          | 
          | 
             return roundAndPackFloat128( 0, zExp, zSig0, zSig1, zSig2 );
  | 
      
      
         | 6131 | 
          | 
          | 
          
  | 
      
      
         | 6132 | 
          | 
          | 
         }
  | 
      
      
         | 6133 | 
          | 
          | 
          
  | 
      
      
         | 6134 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 6135 | 
          | 
          | 
         | Returns 1 if the quadruple-precision floating-point value `a' is equal to
  | 
      
      
         | 6136 | 
          | 
          | 
         | the corresponding value `b', and 0 otherwise.  The comparison is performed
  | 
      
      
         | 6137 | 
          | 
          | 
         | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  | 
      
      
         | 6138 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 6139 | 
          | 
          | 
          
  | 
      
      
         | 6140 | 
          | 
          | 
         flag float128_eq( float128 a, float128 b )
  | 
      
      
         | 6141 | 
          | 
          | 
         {
  | 
      
      
         | 6142 | 
          | 
          | 
          
  | 
      
      
         | 6143 | 
          | 
          | 
             if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
  | 
      
      
         | 6144 | 
          | 
          | 
                       && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
  | 
      
      
         | 6145 | 
          | 
          | 
                  || (    ( extractFloat128Exp( b ) == 0x7FFF )
  | 
      
      
         | 6146 | 
          | 
          | 
                       && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
  | 
      
      
         | 6147 | 
          | 
          | 
                ) {
  | 
      
      
         | 6148 | 
          | 
          | 
                 if (    float128_is_signaling_nan( a )
  | 
      
      
         | 6149 | 
          | 
          | 
                      || float128_is_signaling_nan( b ) ) {
  | 
      
      
         | 6150 | 
          | 
          | 
                     float_raise( float_flag_invalid );
  | 
      
      
         | 6151 | 
          | 
          | 
                 }
  | 
      
      
         | 6152 | 
          | 
          | 
                 return 0;
  | 
      
      
         | 6153 | 
          | 
          | 
             }
  | 
      
      
         | 6154 | 
          | 
          | 
             return
  | 
      
      
         | 6155 | 
          | 
          | 
                    ( a.low == b.low )
  | 
      
      
         | 6156 | 
          | 
          | 
                 && (    ( a.high == b.high )
  | 
      
      
         | 6157 | 
          | 
          | 
                      || (    ( a.low == 0 )
  | 
      
      
         | 6158 | 
          | 
          | 
                           && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
  | 
      
      
         | 6159 | 
          | 
          | 
                    );
  | 
      
      
         | 6160 | 
          | 
          | 
          
  | 
      
      
         | 6161 | 
          | 
          | 
         }
  | 
      
      
         | 6162 | 
          | 
          | 
          
  | 
      
      
         | 6163 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 6164 | 
          | 
          | 
         | Returns 1 if the quadruple-precision floating-point value `a' is less than
  | 
      
      
         | 6165 | 
          | 
          | 
         | or equal to the corresponding value `b', and 0 otherwise.  The comparison
  | 
      
      
         | 6166 | 
          | 
          | 
         | is performed according to the IEC/IEEE Standard for Binary Floating-Point
  | 
      
      
         | 6167 | 
          | 
          | 
         | Arithmetic.
  | 
      
      
         | 6168 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 6169 | 
          | 
          | 
          
  | 
      
      
         | 6170 | 
          | 
          | 
         flag float128_le( float128 a, float128 b )
  | 
      
      
         | 6171 | 
          | 
          | 
         {
  | 
      
      
         | 6172 | 
          | 
          | 
             flag aSign, bSign;
  | 
      
      
         | 6173 | 
          | 
          | 
          
  | 
      
      
         | 6174 | 
          | 
          | 
             if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
  | 
      
      
         | 6175 | 
          | 
          | 
                       && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
  | 
      
      
         | 6176 | 
          | 
          | 
                  || (    ( extractFloat128Exp( b ) == 0x7FFF )
  | 
      
      
         | 6177 | 
          | 
          | 
                       && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
  | 
      
      
         | 6178 | 
          | 
          | 
                ) {
  | 
      
      
         | 6179 | 
          | 
          | 
                 float_raise( float_flag_invalid );
  | 
      
      
         | 6180 | 
          | 
          | 
                 return 0;
  | 
      
      
         | 6181 | 
          | 
          | 
             }
  | 
      
      
         | 6182 | 
          | 
          | 
             aSign = extractFloat128Sign( a );
  | 
      
      
         | 6183 | 
          | 
          | 
             bSign = extractFloat128Sign( b );
  | 
      
      
         | 6184 | 
          | 
          | 
             if ( aSign != bSign ) {
  | 
      
      
         | 6185 | 
          | 
          | 
                 return
  | 
      
      
         | 6186 | 
          | 
          | 
                        aSign
  | 
      
      
         | 6187 | 
          | 
          | 
                     || (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
  | 
      
      
         | 6188 | 
          | 
          | 
                          == 0 );
  | 
      
      
         | 6189 | 
          | 
          | 
             }
  | 
      
      
         | 6190 | 
          | 
          | 
             return
  | 
      
      
         | 6191 | 
          | 
          | 
                   aSign ? le128( b.high, b.low, a.high, a.low )
  | 
      
      
         | 6192 | 
          | 
          | 
                 : le128( a.high, a.low, b.high, b.low );
  | 
      
      
         | 6193 | 
          | 
          | 
          
  | 
      
      
         | 6194 | 
          | 
          | 
         }
  | 
      
      
         | 6195 | 
          | 
          | 
          
  | 
      
      
         | 6196 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 6197 | 
          | 
          | 
         | Returns 1 if the quadruple-precision floating-point value `a' is less than
  | 
      
      
         | 6198 | 
          | 
          | 
         | the corresponding value `b', and 0 otherwise.  The comparison is performed
  | 
      
      
         | 6199 | 
          | 
          | 
         | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  | 
      
      
         | 6200 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 6201 | 
          | 
          | 
          
  | 
      
      
         | 6202 | 
          | 
          | 
         flag float128_lt( float128 a, float128 b )
  | 
      
      
         | 6203 | 
          | 
          | 
         {
  | 
      
      
         | 6204 | 
          | 
          | 
             flag aSign, bSign;
  | 
      
      
         | 6205 | 
          | 
          | 
          
  | 
      
      
         | 6206 | 
          | 
          | 
             if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
  | 
      
      
         | 6207 | 
          | 
          | 
                       && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
  | 
      
      
         | 6208 | 
          | 
          | 
                  || (    ( extractFloat128Exp( b ) == 0x7FFF )
  | 
      
      
         | 6209 | 
          | 
          | 
                       && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
  | 
      
      
         | 6210 | 
          | 
          | 
                ) {
  | 
      
      
         | 6211 | 
          | 
          | 
                 float_raise( float_flag_invalid );
  | 
      
      
         | 6212 | 
          | 
          | 
                 return 0;
  | 
      
      
         | 6213 | 
          | 
          | 
             }
  | 
      
      
         | 6214 | 
          | 
          | 
             aSign = extractFloat128Sign( a );
  | 
      
      
         | 6215 | 
          | 
          | 
             bSign = extractFloat128Sign( b );
  | 
      
      
         | 6216 | 
          | 
          | 
             if ( aSign != bSign ) {
  | 
      
      
         | 6217 | 
          | 
          | 
                 return
  | 
      
      
         | 6218 | 
          | 
          | 
                        aSign
  | 
      
      
         | 6219 | 
          | 
          | 
                     && (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
  | 
      
      
         | 6220 | 
          | 
          | 
                          != 0 );
  | 
      
      
         | 6221 | 
          | 
          | 
             }
  | 
      
      
         | 6222 | 
          | 
          | 
             return
  | 
      
      
         | 6223 | 
          | 
          | 
                   aSign ? lt128( b.high, b.low, a.high, a.low )
  | 
      
      
         | 6224 | 
          | 
          | 
                 : lt128( a.high, a.low, b.high, b.low );
  | 
      
      
         | 6225 | 
          | 
          | 
          
  | 
      
      
         | 6226 | 
          | 
          | 
         }
  | 
      
      
         | 6227 | 
          | 
          | 
          
  | 
      
      
         | 6228 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 6229 | 
          | 
          | 
         | Returns 1 if the quadruple-precision floating-point value `a' is equal to
  | 
      
      
         | 6230 | 
          | 
          | 
         | the corresponding value `b', and 0 otherwise.  The invalid exception is
  | 
      
      
         | 6231 | 
          | 
          | 
         | raised if either operand is a NaN.  Otherwise, the comparison is performed
  | 
      
      
         | 6232 | 
          | 
          | 
         | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  | 
      
      
         | 6233 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 6234 | 
          | 
          | 
          
  | 
      
      
         | 6235 | 
          | 
          | 
         flag float128_eq_signaling( float128 a, float128 b )
  | 
      
      
         | 6236 | 
          | 
          | 
         {
  | 
      
      
         | 6237 | 
          | 
          | 
          
  | 
      
      
         | 6238 | 
          | 
          | 
             if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
  | 
      
      
         | 6239 | 
          | 
          | 
                       && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
  | 
      
      
         | 6240 | 
          | 
          | 
                  || (    ( extractFloat128Exp( b ) == 0x7FFF )
  | 
      
      
         | 6241 | 
          | 
          | 
                       && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
  | 
      
      
         | 6242 | 
          | 
          | 
                ) {
  | 
      
      
         | 6243 | 
          | 
          | 
                 float_raise( float_flag_invalid );
  | 
      
      
         | 6244 | 
          | 
          | 
                 return 0;
  | 
      
      
         | 6245 | 
          | 
          | 
             }
  | 
      
      
         | 6246 | 
          | 
          | 
             return
  | 
      
      
         | 6247 | 
          | 
          | 
                    ( a.low == b.low )
  | 
      
      
         | 6248 | 
          | 
          | 
                 && (    ( a.high == b.high )
  | 
      
      
         | 6249 | 
          | 
          | 
                      || (    ( a.low == 0 )
  | 
      
      
         | 6250 | 
          | 
          | 
                           && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
  | 
      
      
         | 6251 | 
          | 
          | 
                    );
  | 
      
      
         | 6252 | 
          | 
          | 
          
  | 
      
      
         | 6253 | 
          | 
          | 
         }
  | 
      
      
         | 6254 | 
          | 
          | 
          
  | 
      
      
         | 6255 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 6256 | 
          | 
          | 
         | Returns 1 if the quadruple-precision floating-point value `a' is less than
  | 
      
      
         | 6257 | 
          | 
          | 
         | or equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
  | 
      
      
         | 6258 | 
          | 
          | 
         | cause an exception.  Otherwise, the comparison is performed according to the
  | 
      
      
         | 6259 | 
          | 
          | 
         | IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  | 
      
      
         | 6260 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 6261 | 
          | 
          | 
          
  | 
      
      
         | 6262 | 
          | 
          | 
         flag float128_le_quiet( float128 a, float128 b )
  | 
      
      
         | 6263 | 
          | 
          | 
         {
  | 
      
      
         | 6264 | 
          | 
          | 
             flag aSign, bSign;
  | 
      
      
         | 6265 | 
          | 
          | 
          
  | 
      
      
         | 6266 | 
          | 
          | 
             if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
  | 
      
      
         | 6267 | 
          | 
          | 
                       && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
  | 
      
      
         | 6268 | 
          | 
          | 
                  || (    ( extractFloat128Exp( b ) == 0x7FFF )
  | 
      
      
         | 6269 | 
          | 
          | 
                       && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
  | 
      
      
         | 6270 | 
          | 
          | 
                ) {
  | 
      
      
         | 6271 | 
          | 
          | 
                 if (    float128_is_signaling_nan( a )
  | 
      
      
         | 6272 | 
          | 
          | 
                      || float128_is_signaling_nan( b ) ) {
  | 
      
      
         | 6273 | 
          | 
          | 
                     float_raise( float_flag_invalid );
  | 
      
      
         | 6274 | 
          | 
          | 
                 }
  | 
      
      
         | 6275 | 
          | 
          | 
                 return 0;
  | 
      
      
         | 6276 | 
          | 
          | 
             }
  | 
      
      
         | 6277 | 
          | 
          | 
             aSign = extractFloat128Sign( a );
  | 
      
      
         | 6278 | 
          | 
          | 
             bSign = extractFloat128Sign( b );
  | 
      
      
         | 6279 | 
          | 
          | 
             if ( aSign != bSign ) {
  | 
      
      
         | 6280 | 
          | 
          | 
                 return
  | 
      
      
         | 6281 | 
          | 
          | 
                        aSign
  | 
      
      
         | 6282 | 
          | 
          | 
                     || (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
  | 
      
      
         | 6283 | 
          | 
          | 
                          == 0 );
  | 
      
      
         | 6284 | 
          | 
          | 
             }
  | 
      
      
         | 6285 | 
          | 
          | 
             return
  | 
      
      
         | 6286 | 
          | 
          | 
                   aSign ? le128( b.high, b.low, a.high, a.low )
  | 
      
      
         | 6287 | 
          | 
          | 
                 : le128( a.high, a.low, b.high, b.low );
  | 
      
      
         | 6288 | 
          | 
          | 
          
  | 
      
      
         | 6289 | 
          | 
          | 
         }
  | 
      
      
         | 6290 | 
          | 
          | 
          
  | 
      
      
         | 6291 | 
          | 
          | 
         /*----------------------------------------------------------------------------
  | 
      
      
         | 6292 | 
          | 
          | 
         | Returns 1 if the quadruple-precision floating-point value `a' is less than
  | 
      
      
         | 6293 | 
          | 
          | 
         | the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
  | 
      
      
         | 6294 | 
          | 
          | 
         | exception.  Otherwise, the comparison is performed according to the IEC/IEEE
  | 
      
      
         | 6295 | 
          | 
          | 
         | Standard for Binary Floating-Point Arithmetic.
  | 
      
      
         | 6296 | 
          | 
          | 
         *----------------------------------------------------------------------------*/
  | 
      
      
         | 6297 | 
          | 
          | 
          
  | 
      
      
         | 6298 | 
          | 
          | 
         flag float128_lt_quiet( float128 a, float128 b )
  | 
      
      
         | 6299 | 
          | 
          | 
         {
  | 
      
      
         | 6300 | 
          | 
          | 
             flag aSign, bSign;
  | 
      
      
         | 6301 | 
          | 
          | 
          
  | 
      
      
         | 6302 | 
          | 
          | 
             if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
  | 
      
      
         | 6303 | 
          | 
          | 
                       && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
  | 
      
      
         | 6304 | 
          | 
          | 
                  || (    ( extractFloat128Exp( b ) == 0x7FFF )
  | 
      
      
         | 6305 | 
          | 
          | 
                       && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
  | 
      
      
         | 6306 | 
          | 
          | 
                ) {
  | 
      
      
         | 6307 | 
          | 
          | 
                 if (    float128_is_signaling_nan( a )
  | 
      
      
         | 6308 | 
          | 
          | 
                      || float128_is_signaling_nan( b ) ) {
  | 
      
      
         | 6309 | 
          | 
          | 
                     float_raise( float_flag_invalid );
  | 
      
      
         | 6310 | 
          | 
          | 
                 }
  | 
      
      
         | 6311 | 
          | 
          | 
                 return 0;
  | 
      
      
         | 6312 | 
          | 
          | 
             }
  | 
      
      
         | 6313 | 
          | 
          | 
             aSign = extractFloat128Sign( a );
  | 
      
      
         | 6314 | 
          | 
          | 
             bSign = extractFloat128Sign( b );
  | 
      
      
         | 6315 | 
          | 
          | 
             if ( aSign != bSign ) {
  | 
      
      
         | 6316 | 
          | 
          | 
                 return
  | 
      
      
         | 6317 | 
          | 
          | 
                        aSign
  | 
      
      
         | 6318 | 
          | 
          | 
                     && (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
  | 
      
      
         | 6319 | 
          | 
          | 
                          != 0 );
  | 
      
      
         | 6320 | 
          | 
          | 
             }
  | 
      
      
         | 6321 | 
          | 
          | 
             return
  | 
      
      
         | 6322 | 
          | 
          | 
                   aSign ? lt128( b.high, b.low, a.high, a.low )
  | 
      
      
         | 6323 | 
          | 
          | 
                 : lt128( a.high, a.low, b.high, b.low );
  | 
      
      
         | 6324 | 
          | 
          | 
          
  | 
      
      
         | 6325 | 
          | 
          | 
         }
  | 
      
      
         | 6326 | 
          | 
          | 
          
  | 
      
      
         | 6327 | 
          | 
          | 
         #endif
  | 
      
      
         | 6328 | 
          | 
          | 
          
  |