| 1 |
734 |
jeremybenn |
/* Division and remainder routines for Tile.
|
| 2 |
|
|
Copyright (C) 2011, 2012
|
| 3 |
|
|
Free Software Foundation, Inc.
|
| 4 |
|
|
Contributed by Walter Lee (walt@tilera.com)
|
| 5 |
|
|
|
| 6 |
|
|
This file is free software; you can redistribute it and/or modify it
|
| 7 |
|
|
under the terms of the GNU General Public License as published by the
|
| 8 |
|
|
Free Software Foundation; either version 3, or (at your option) any
|
| 9 |
|
|
later version.
|
| 10 |
|
|
|
| 11 |
|
|
This file is distributed in the hope that it will be useful, but
|
| 12 |
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 13 |
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 14 |
|
|
General Public License for more details.
|
| 15 |
|
|
|
| 16 |
|
|
Under Section 7 of GPL version 3, you are granted additional
|
| 17 |
|
|
permissions described in the GCC Runtime Library Exception, version
|
| 18 |
|
|
3.1, as published by the Free Software Foundation.
|
| 19 |
|
|
|
| 20 |
|
|
You should have received a copy of the GNU General Public License and
|
| 21 |
|
|
a copy of the GCC Runtime Library Exception along with this program;
|
| 22 |
|
|
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
| 23 |
|
|
<http://www.gnu.org/licenses/>. */
|
| 24 |
|
|
|
| 25 |
|
|
typedef int int32_t;
|
| 26 |
|
|
typedef unsigned uint32_t;
|
| 27 |
|
|
typedef long long int64_t;
|
| 28 |
|
|
typedef unsigned long long uint64_t;
|
| 29 |
|
|
|
| 30 |
|
|
/* Raise signal 8 (SIGFPE) with code 1 (FPE_INTDIV). */
|
| 31 |
|
|
static inline void
|
| 32 |
|
|
raise_intdiv (void)
|
| 33 |
|
|
{
|
| 34 |
|
|
asm ("{ raise; moveli zero, 8 + (1 << 6) }");
|
| 35 |
|
|
}
|
| 36 |
|
|
|
| 37 |
|
|
|
| 38 |
|
|
#ifndef __tilegx__
|
| 39 |
|
|
/*__udivsi3 - 32 bit integer unsigned divide */
|
| 40 |
|
|
static inline uint32_t __attribute__ ((always_inline))
|
| 41 |
|
|
__udivsi3_inline (uint32_t dividend, uint32_t divisor)
|
| 42 |
|
|
{
|
| 43 |
|
|
/* Divide out any power of two factor from dividend and divisor.
|
| 44 |
|
|
Note that when dividing by zero the divisor will remain zero,
|
| 45 |
|
|
which is all we need to detect that case below. */
|
| 46 |
|
|
const int power_of_two_factor = __insn_ctz (divisor);
|
| 47 |
|
|
divisor >>= power_of_two_factor;
|
| 48 |
|
|
dividend >>= power_of_two_factor;
|
| 49 |
|
|
|
| 50 |
|
|
/* Checks for division by power of two or division by zero. */
|
| 51 |
|
|
if (divisor <= 1)
|
| 52 |
|
|
{
|
| 53 |
|
|
if (divisor == 0)
|
| 54 |
|
|
{
|
| 55 |
|
|
raise_intdiv ();
|
| 56 |
|
|
return 0;
|
| 57 |
|
|
}
|
| 58 |
|
|
return dividend;
|
| 59 |
|
|
}
|
| 60 |
|
|
|
| 61 |
|
|
/* Compute (a / b) by repeatedly finding the largest N
|
| 62 |
|
|
such that (b << N) <= a. For each such N, set bit N in the
|
| 63 |
|
|
quotient, subtract (b << N) from a, and keep going. Think of this as
|
| 64 |
|
|
the reverse of the "shift-and-add" that a multiply does. The values
|
| 65 |
|
|
of N are precisely those shift counts.
|
| 66 |
|
|
|
| 67 |
|
|
Finding N is easy. First, use clz(b) - clz(a) to find the N
|
| 68 |
|
|
that lines up the high bit of (b << N) with the high bit of a.
|
| 69 |
|
|
Any larger value of N would definitely make (b << N) > a,
|
| 70 |
|
|
which is too big.
|
| 71 |
|
|
|
| 72 |
|
|
Then, if (b << N) > a (because it has larger low bits), decrement
|
| 73 |
|
|
N by one. This adjustment will definitely make (b << N) less
|
| 74 |
|
|
than a, because a's high bit is now one higher than b's. */
|
| 75 |
|
|
|
| 76 |
|
|
/* Precomputing the max_ values allows us to avoid a subtract
|
| 77 |
|
|
in the inner loop and just right shift by clz(remainder). */
|
| 78 |
|
|
const int divisor_clz = __insn_clz (divisor);
|
| 79 |
|
|
const uint32_t max_divisor = divisor << divisor_clz;
|
| 80 |
|
|
const uint32_t max_qbit = 1 << divisor_clz;
|
| 81 |
|
|
|
| 82 |
|
|
uint32_t quotient = 0;
|
| 83 |
|
|
uint32_t remainder = dividend;
|
| 84 |
|
|
|
| 85 |
|
|
while (remainder >= divisor)
|
| 86 |
|
|
{
|
| 87 |
|
|
int shift = __insn_clz (remainder);
|
| 88 |
|
|
uint32_t scaled_divisor = max_divisor >> shift;
|
| 89 |
|
|
uint32_t quotient_bit = max_qbit >> shift;
|
| 90 |
|
|
|
| 91 |
|
|
int too_big = (scaled_divisor > remainder);
|
| 92 |
|
|
scaled_divisor >>= too_big;
|
| 93 |
|
|
quotient_bit >>= too_big;
|
| 94 |
|
|
remainder -= scaled_divisor;
|
| 95 |
|
|
quotient |= quotient_bit;
|
| 96 |
|
|
}
|
| 97 |
|
|
return quotient;
|
| 98 |
|
|
}
|
| 99 |
|
|
#endif /* !__tilegx__ */
|
| 100 |
|
|
|
| 101 |
|
|
|
| 102 |
|
|
/* __udivdi3 - 64 bit integer unsigned divide */
|
| 103 |
|
|
static inline uint64_t __attribute__ ((always_inline))
|
| 104 |
|
|
__udivdi3_inline (uint64_t dividend, uint64_t divisor)
|
| 105 |
|
|
{
|
| 106 |
|
|
/* Divide out any power of two factor from dividend and divisor.
|
| 107 |
|
|
Note that when dividing by zero the divisor will remain zero,
|
| 108 |
|
|
which is all we need to detect that case below. */
|
| 109 |
|
|
const int power_of_two_factor = __builtin_ctzll (divisor);
|
| 110 |
|
|
divisor >>= power_of_two_factor;
|
| 111 |
|
|
dividend >>= power_of_two_factor;
|
| 112 |
|
|
|
| 113 |
|
|
/* Checks for division by power of two or division by zero. */
|
| 114 |
|
|
if (divisor <= 1)
|
| 115 |
|
|
{
|
| 116 |
|
|
if (divisor == 0)
|
| 117 |
|
|
{
|
| 118 |
|
|
raise_intdiv ();
|
| 119 |
|
|
return 0;
|
| 120 |
|
|
}
|
| 121 |
|
|
return dividend;
|
| 122 |
|
|
}
|
| 123 |
|
|
|
| 124 |
|
|
#ifndef __tilegx__
|
| 125 |
|
|
if (((uint32_t) (dividend >> 32) | ((uint32_t) (divisor >> 32))) == 0)
|
| 126 |
|
|
{
|
| 127 |
|
|
/* Operands both fit in 32 bits, so use faster 32 bit algorithm. */
|
| 128 |
|
|
return __udivsi3_inline ((uint32_t) dividend, (uint32_t) divisor);
|
| 129 |
|
|
}
|
| 130 |
|
|
#endif /* !__tilegx__ */
|
| 131 |
|
|
|
| 132 |
|
|
/* See algorithm description in __udivsi3 */
|
| 133 |
|
|
|
| 134 |
|
|
const int divisor_clz = __builtin_clzll (divisor);
|
| 135 |
|
|
const uint64_t max_divisor = divisor << divisor_clz;
|
| 136 |
|
|
const uint64_t max_qbit = 1ULL << divisor_clz;
|
| 137 |
|
|
|
| 138 |
|
|
uint64_t quotient = 0;
|
| 139 |
|
|
uint64_t remainder = dividend;
|
| 140 |
|
|
|
| 141 |
|
|
while (remainder >= divisor)
|
| 142 |
|
|
{
|
| 143 |
|
|
int shift = __builtin_clzll (remainder);
|
| 144 |
|
|
uint64_t scaled_divisor = max_divisor >> shift;
|
| 145 |
|
|
uint64_t quotient_bit = max_qbit >> shift;
|
| 146 |
|
|
|
| 147 |
|
|
int too_big = (scaled_divisor > remainder);
|
| 148 |
|
|
scaled_divisor >>= too_big;
|
| 149 |
|
|
quotient_bit >>= too_big;
|
| 150 |
|
|
remainder -= scaled_divisor;
|
| 151 |
|
|
quotient |= quotient_bit;
|
| 152 |
|
|
}
|
| 153 |
|
|
return quotient;
|
| 154 |
|
|
}
|
| 155 |
|
|
|
| 156 |
|
|
|
| 157 |
|
|
#ifndef __tilegx__
|
| 158 |
|
|
/* __umodsi3 - 32 bit integer unsigned modulo */
|
| 159 |
|
|
static inline uint32_t __attribute__ ((always_inline))
|
| 160 |
|
|
__umodsi3_inline (uint32_t dividend, uint32_t divisor)
|
| 161 |
|
|
{
|
| 162 |
|
|
/* Shortcircuit mod by a power of two (and catch mod by zero). */
|
| 163 |
|
|
const uint32_t mask = divisor - 1;
|
| 164 |
|
|
if ((divisor & mask) == 0)
|
| 165 |
|
|
{
|
| 166 |
|
|
if (divisor == 0)
|
| 167 |
|
|
{
|
| 168 |
|
|
raise_intdiv ();
|
| 169 |
|
|
return 0;
|
| 170 |
|
|
}
|
| 171 |
|
|
return dividend & mask;
|
| 172 |
|
|
}
|
| 173 |
|
|
|
| 174 |
|
|
/* We compute the remainder (a % b) by repeatedly subtracting off
|
| 175 |
|
|
multiples of b from a until a < b. The key is that subtracting
|
| 176 |
|
|
off a multiple of b does not affect the result mod b.
|
| 177 |
|
|
|
| 178 |
|
|
To make the algorithm run efficiently, we need to subtract
|
| 179 |
|
|
off a large multiple of b at each step. We subtract the largest
|
| 180 |
|
|
(b << N) that is <= a.
|
| 181 |
|
|
|
| 182 |
|
|
Finding N is easy. First, use clz(b) - clz(a) to find the N
|
| 183 |
|
|
that lines up the high bit of (b << N) with the high bit of a.
|
| 184 |
|
|
Any larger value of N would definitely make (b << N) > a,
|
| 185 |
|
|
which is too big.
|
| 186 |
|
|
|
| 187 |
|
|
Then, if (b << N) > a (because it has larger low bits), decrement
|
| 188 |
|
|
N by one. This adjustment will definitely make (b << N) less
|
| 189 |
|
|
than a, because a's high bit is now one higher than b's. */
|
| 190 |
|
|
const uint32_t max_divisor = divisor << __insn_clz (divisor);
|
| 191 |
|
|
|
| 192 |
|
|
uint32_t remainder = dividend;
|
| 193 |
|
|
while (remainder >= divisor)
|
| 194 |
|
|
{
|
| 195 |
|
|
const int shift = __insn_clz (remainder);
|
| 196 |
|
|
uint32_t scaled_divisor = max_divisor >> shift;
|
| 197 |
|
|
scaled_divisor >>= (scaled_divisor > remainder);
|
| 198 |
|
|
remainder -= scaled_divisor;
|
| 199 |
|
|
}
|
| 200 |
|
|
|
| 201 |
|
|
return remainder;
|
| 202 |
|
|
}
|
| 203 |
|
|
#endif /* !__tilegx__ */
|
| 204 |
|
|
|
| 205 |
|
|
|
| 206 |
|
|
/* __umoddi3 - 64 bit integer unsigned modulo */
|
| 207 |
|
|
static inline uint64_t __attribute__ ((always_inline))
|
| 208 |
|
|
__umoddi3_inline (uint64_t dividend, uint64_t divisor)
|
| 209 |
|
|
{
|
| 210 |
|
|
#ifndef __tilegx__
|
| 211 |
|
|
if (((uint32_t) (dividend >> 32) | ((uint32_t) (divisor >> 32))) == 0)
|
| 212 |
|
|
{
|
| 213 |
|
|
/* Operands both fit in 32 bits, so use faster 32 bit algorithm. */
|
| 214 |
|
|
return __umodsi3_inline ((uint32_t) dividend, (uint32_t) divisor);
|
| 215 |
|
|
}
|
| 216 |
|
|
#endif /* !__tilegx__ */
|
| 217 |
|
|
|
| 218 |
|
|
/* Shortcircuit mod by a power of two (and catch mod by zero). */
|
| 219 |
|
|
const uint64_t mask = divisor - 1;
|
| 220 |
|
|
if ((divisor & mask) == 0)
|
| 221 |
|
|
{
|
| 222 |
|
|
if (divisor == 0)
|
| 223 |
|
|
{
|
| 224 |
|
|
raise_intdiv ();
|
| 225 |
|
|
return 0;
|
| 226 |
|
|
}
|
| 227 |
|
|
return dividend & mask;
|
| 228 |
|
|
}
|
| 229 |
|
|
|
| 230 |
|
|
/* See algorithm description in __umodsi3 */
|
| 231 |
|
|
const uint64_t max_divisor = divisor << __builtin_clzll (divisor);
|
| 232 |
|
|
|
| 233 |
|
|
uint64_t remainder = dividend;
|
| 234 |
|
|
while (remainder >= divisor)
|
| 235 |
|
|
{
|
| 236 |
|
|
const int shift = __builtin_clzll (remainder);
|
| 237 |
|
|
uint64_t scaled_divisor = max_divisor >> shift;
|
| 238 |
|
|
scaled_divisor >>= (scaled_divisor > remainder);
|
| 239 |
|
|
remainder -= scaled_divisor;
|
| 240 |
|
|
}
|
| 241 |
|
|
|
| 242 |
|
|
return remainder;
|
| 243 |
|
|
}
|
| 244 |
|
|
|
| 245 |
|
|
|
| 246 |
|
|
uint32_t __udivsi3 (uint32_t dividend, uint32_t divisor);
|
| 247 |
|
|
#ifdef L_tile_udivsi3
|
| 248 |
|
|
uint32_t
|
| 249 |
|
|
__udivsi3 (uint32_t dividend, uint32_t divisor)
|
| 250 |
|
|
{
|
| 251 |
|
|
#ifndef __tilegx__
|
| 252 |
|
|
return __udivsi3_inline (dividend, divisor);
|
| 253 |
|
|
#else /* !__tilegx__ */
|
| 254 |
|
|
uint64_t n = __udivdi3_inline (((uint64_t) dividend), ((uint64_t) divisor));
|
| 255 |
|
|
return (uint32_t) n;
|
| 256 |
|
|
#endif /* !__tilegx__ */
|
| 257 |
|
|
}
|
| 258 |
|
|
#endif
|
| 259 |
|
|
|
| 260 |
|
|
#define ABS(x) ((x) >= 0 ? (x) : -(x))
|
| 261 |
|
|
|
| 262 |
|
|
int32_t __divsi3 (int32_t dividend, int32_t divisor);
|
| 263 |
|
|
#ifdef L_tile_divsi3
|
| 264 |
|
|
/* __divsi3 - 32 bit integer signed divide */
|
| 265 |
|
|
int32_t
|
| 266 |
|
|
__divsi3 (int32_t dividend, int32_t divisor)
|
| 267 |
|
|
{
|
| 268 |
|
|
#ifndef __tilegx__
|
| 269 |
|
|
uint32_t n = __udivsi3_inline (ABS (dividend), ABS (divisor));
|
| 270 |
|
|
#else /* !__tilegx__ */
|
| 271 |
|
|
uint64_t n =
|
| 272 |
|
|
__udivdi3_inline (ABS ((int64_t) dividend), ABS ((int64_t) divisor));
|
| 273 |
|
|
#endif /* !__tilegx__ */
|
| 274 |
|
|
if ((dividend ^ divisor) < 0)
|
| 275 |
|
|
n = -n;
|
| 276 |
|
|
return (int32_t) n;
|
| 277 |
|
|
}
|
| 278 |
|
|
#endif
|
| 279 |
|
|
|
| 280 |
|
|
|
| 281 |
|
|
uint64_t __udivdi3 (uint64_t dividend, uint64_t divisor);
|
| 282 |
|
|
#ifdef L_tile_udivdi3
|
| 283 |
|
|
uint64_t
|
| 284 |
|
|
__udivdi3 (uint64_t dividend, uint64_t divisor)
|
| 285 |
|
|
{
|
| 286 |
|
|
return __udivdi3_inline (dividend, divisor);
|
| 287 |
|
|
}
|
| 288 |
|
|
#endif
|
| 289 |
|
|
|
| 290 |
|
|
/*__divdi3 - 64 bit integer signed divide */
|
| 291 |
|
|
int64_t __divdi3 (int64_t dividend, int64_t divisor);
|
| 292 |
|
|
#ifdef L_tile_divdi3
|
| 293 |
|
|
int64_t
|
| 294 |
|
|
__divdi3 (int64_t dividend, int64_t divisor)
|
| 295 |
|
|
{
|
| 296 |
|
|
uint64_t n = __udivdi3_inline (ABS (dividend), ABS (divisor));
|
| 297 |
|
|
if ((dividend ^ divisor) < 0)
|
| 298 |
|
|
n = -n;
|
| 299 |
|
|
return (int64_t) n;
|
| 300 |
|
|
}
|
| 301 |
|
|
#endif
|
| 302 |
|
|
|
| 303 |
|
|
|
| 304 |
|
|
uint32_t __umodsi3 (uint32_t dividend, uint32_t divisor);
|
| 305 |
|
|
#ifdef L_tile_umodsi3
|
| 306 |
|
|
uint32_t
|
| 307 |
|
|
__umodsi3 (uint32_t dividend, uint32_t divisor)
|
| 308 |
|
|
{
|
| 309 |
|
|
#ifndef __tilegx__
|
| 310 |
|
|
return __umodsi3_inline (dividend, divisor);
|
| 311 |
|
|
#else /* !__tilegx__ */
|
| 312 |
|
|
return __umoddi3_inline ((uint64_t) dividend, (uint64_t) divisor);
|
| 313 |
|
|
#endif /* !__tilegx__ */
|
| 314 |
|
|
}
|
| 315 |
|
|
#endif
|
| 316 |
|
|
|
| 317 |
|
|
|
| 318 |
|
|
/* __modsi3 - 32 bit integer signed modulo */
|
| 319 |
|
|
int32_t __modsi3 (int32_t dividend, int32_t divisor);
|
| 320 |
|
|
#ifdef L_tile_modsi3
|
| 321 |
|
|
int32_t
|
| 322 |
|
|
__modsi3 (int32_t dividend, int32_t divisor)
|
| 323 |
|
|
{
|
| 324 |
|
|
#ifndef __tilegx__
|
| 325 |
|
|
uint32_t remainder = __umodsi3_inline (ABS (dividend), ABS (divisor));
|
| 326 |
|
|
#else /* !__tilegx__ */
|
| 327 |
|
|
uint64_t remainder =
|
| 328 |
|
|
__umoddi3_inline (ABS ((int64_t) dividend), ABS ((int64_t) divisor));
|
| 329 |
|
|
#endif /* !__tilegx__ */
|
| 330 |
|
|
return (int32_t) ((dividend >= 0) ? remainder : -remainder);
|
| 331 |
|
|
}
|
| 332 |
|
|
#endif
|
| 333 |
|
|
|
| 334 |
|
|
|
| 335 |
|
|
uint64_t __umoddi3 (uint64_t dividend, uint64_t divisor);
|
| 336 |
|
|
#ifdef L_tile_umoddi3
|
| 337 |
|
|
uint64_t
|
| 338 |
|
|
__umoddi3 (uint64_t dividend, uint64_t divisor)
|
| 339 |
|
|
{
|
| 340 |
|
|
return __umoddi3_inline (dividend, divisor);
|
| 341 |
|
|
}
|
| 342 |
|
|
#endif
|
| 343 |
|
|
|
| 344 |
|
|
|
| 345 |
|
|
/* __moddi3 - 64 bit integer signed modulo */
|
| 346 |
|
|
int64_t __moddi3 (int64_t dividend, int64_t divisor);
|
| 347 |
|
|
#ifdef L_tile_moddi3
|
| 348 |
|
|
int64_t
|
| 349 |
|
|
__moddi3 (int64_t dividend, int64_t divisor)
|
| 350 |
|
|
{
|
| 351 |
|
|
uint64_t remainder = __umoddi3_inline (ABS (dividend), ABS (divisor));
|
| 352 |
|
|
return (int64_t) ((dividend >= 0) ? remainder : -remainder);
|
| 353 |
|
|
}
|
| 354 |
|
|
#endif
|