OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libgcc/] [longlong.h] - Blame information for rev 778

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 734 jeremybenn
/* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
2
   Copyright (C) 1991, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3
   2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
4
   Free Software Foundation, Inc.
5
 
6
   This file is part of the GNU C Library.
7
 
8
   The GNU C Library is free software; you can redistribute it and/or
9
   modify it under the terms of the GNU Lesser General Public
10
   License as published by the Free Software Foundation; either
11
   version 2.1 of the License, or (at your option) any later version.
12
 
13
   In addition to the permissions in the GNU Lesser General Public
14
   License, the Free Software Foundation gives you unlimited
15
   permission to link the compiled version of this file into
16
   combinations with other programs, and to distribute those
17
   combinations without any restriction coming from the use of this
18
   file.  (The Lesser General Public License restrictions do apply in
19
   other respects; for example, they cover modification of the file,
20
   and distribution when not linked into a combine executable.)
21
 
22
   The GNU C Library is distributed in the hope that it will be useful,
23
   but WITHOUT ANY WARRANTY; without even the implied warranty of
24
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
25
   Lesser General Public License for more details.
26
 
27
   You should have received a copy of the GNU Lesser General Public
28
   License along with the GNU C Library; if not, write to the Free
29
   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
30
   MA 02110-1301, USA.  */
31
 
32
/* You have to define the following before including this file:
33
 
34
   UWtype -- An unsigned type, default type for operations (typically a "word")
35
   UHWtype -- An unsigned type, at least half the size of UWtype.
36
   UDWtype -- An unsigned type, at least twice as large a UWtype
37
   W_TYPE_SIZE -- size in bits of UWtype
38
 
39
   UQItype -- Unsigned 8 bit type.
40
   SItype, USItype -- Signed and unsigned 32 bit types.
41
   DItype, UDItype -- Signed and unsigned 64 bit types.
42
 
43
   On a 32 bit machine UWtype should typically be USItype;
44
   on a 64 bit machine, UWtype should typically be UDItype.  */
45
 
46
#define __BITS4 (W_TYPE_SIZE / 4)
47
#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
48
#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
49
#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
50
 
51
#ifndef W_TYPE_SIZE
52
#define W_TYPE_SIZE     32
53
#define UWtype          USItype
54
#define UHWtype         USItype
55
#define UDWtype         UDItype
56
#endif
57
 
58
/* Used in glibc only.  */
59
#ifndef attribute_hidden
60
#define attribute_hidden
61
#endif
62
 
63
extern const UQItype __clz_tab[256] attribute_hidden;
64
 
65
/* Define auxiliary asm macros.
66
 
67
   1) umul_ppmm(high_prod, low_prod, multiplier, multiplicand) multiplies two
68
   UWtype integers MULTIPLIER and MULTIPLICAND, and generates a two UWtype
69
   word product in HIGH_PROD and LOW_PROD.
70
 
71
   2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a
72
   UDWtype product.  This is just a variant of umul_ppmm.
73
 
74
   3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
75
   denominator) divides a UDWtype, composed by the UWtype integers
76
   HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient
77
   in QUOTIENT and the remainder in REMAINDER.  HIGH_NUMERATOR must be less
78
   than DENOMINATOR for correct operation.  If, in addition, the most
79
   significant bit of DENOMINATOR must be 1, then the pre-processor symbol
80
   UDIV_NEEDS_NORMALIZATION is defined to 1.
81
 
82
   4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
83
   denominator).  Like udiv_qrnnd but the numbers are signed.  The quotient
84
   is rounded towards 0.
85
 
86
   5) count_leading_zeros(count, x) counts the number of zero-bits from the
87
   msb to the first nonzero bit in the UWtype X.  This is the number of
88
   steps X needs to be shifted left to set the msb.  Undefined for X == 0,
89
   unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value.
90
 
91
   6) count_trailing_zeros(count, x) like count_leading_zeros, but counts
92
   from the least significant end.
93
 
94
   7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1,
95
   high_addend_2, low_addend_2) adds two UWtype integers, composed by
96
   HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2
97
   respectively.  The result is placed in HIGH_SUM and LOW_SUM.  Overflow
98
   (i.e. carry out) is not stored anywhere, and is lost.
99
 
100
   8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend,
101
   high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers,
102
   composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and
103
   LOW_SUBTRAHEND_2 respectively.  The result is placed in HIGH_DIFFERENCE
104
   and LOW_DIFFERENCE.  Overflow (i.e. carry out) is not stored anywhere,
105
   and is lost.
106
 
107
   If any of these macros are left undefined for a particular CPU,
108
   C macros are used.  */
109
 
110
/* The CPUs come in alphabetical order below.
111
 
112
   Please add support for more CPUs here, or improve the current support
113
   for the CPUs below!
114
   (E.g. WE32100, IBM360.)  */
115
 
116
#if defined (__GNUC__) && !defined (NO_ASM)
117
 
118
/* We sometimes need to clobber "cc" with gcc2, but that would not be
119
   understood by gcc1.  Use cpp to avoid major code duplication.  */
120
#if __GNUC__ < 2
121
#define __CLOBBER_CC
122
#define __AND_CLOBBER_CC
123
#else /* __GNUC__ >= 2 */
124
#define __CLOBBER_CC : "cc"
125
#define __AND_CLOBBER_CC , "cc"
126
#endif /* __GNUC__ < 2 */
127
 
128
#if defined (__alpha) && W_TYPE_SIZE == 64
129
#define umul_ppmm(ph, pl, m0, m1) \
130
  do {                                                                  \
131
    UDItype __m0 = (m0), __m1 = (m1);                                   \
132
    (ph) = __builtin_alpha_umulh (__m0, __m1);                          \
133
    (pl) = __m0 * __m1;                                                 \
134
  } while (0)
135
#define UMUL_TIME 46
136
#ifndef LONGLONG_STANDALONE
137
#define udiv_qrnnd(q, r, n1, n0, d) \
138
  do { UDItype __r;                                                     \
139
    (q) = __udiv_qrnnd (&__r, (n1), (n0), (d));                         \
140
    (r) = __r;                                                          \
141
  } while (0)
142
extern UDItype __udiv_qrnnd (UDItype *, UDItype, UDItype, UDItype);
143
#define UDIV_TIME 220
144
#endif /* LONGLONG_STANDALONE */
145
#ifdef __alpha_cix__
146
#define count_leading_zeros(COUNT,X)    ((COUNT) = __builtin_clzl (X))
147
#define count_trailing_zeros(COUNT,X)   ((COUNT) = __builtin_ctzl (X))
148
#define COUNT_LEADING_ZEROS_0 64
149
#else
150
#define count_leading_zeros(COUNT,X) \
151
  do {                                                                  \
152
    UDItype __xr = (X), __t, __a;                                       \
153
    __t = __builtin_alpha_cmpbge (0, __xr);                              \
154
    __a = __clz_tab[__t ^ 0xff] - 1;                                    \
155
    __t = __builtin_alpha_extbl (__xr, __a);                            \
156
    (COUNT) = 64 - (__clz_tab[__t] + __a*8);                            \
157
  } while (0)
158
#define count_trailing_zeros(COUNT,X) \
159
  do {                                                                  \
160
    UDItype __xr = (X), __t, __a;                                       \
161
    __t = __builtin_alpha_cmpbge (0, __xr);                              \
162
    __t = ~__t & -~__t;                                                 \
163
    __a = ((__t & 0xCC) != 0) * 2;                                       \
164
    __a += ((__t & 0xF0) != 0) * 4;                                      \
165
    __a += ((__t & 0xAA) != 0);                                          \
166
    __t = __builtin_alpha_extbl (__xr, __a);                            \
167
    __a <<= 3;                                                          \
168
    __t &= -__t;                                                        \
169
    __a += ((__t & 0xCC) != 0) * 2;                                      \
170
    __a += ((__t & 0xF0) != 0) * 4;                                      \
171
    __a += ((__t & 0xAA) != 0);                                          \
172
    (COUNT) = __a;                                                      \
173
  } while (0)
174
#endif /* __alpha_cix__ */
175
#endif /* __alpha */
176
 
177
#if defined (__arc__) && W_TYPE_SIZE == 32
178
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
179
  __asm__ ("add.f       %1, %4, %5\n\tadc       %0, %2, %3"             \
180
           : "=r" ((USItype) (sh)),                                     \
181
             "=&r" ((USItype) (sl))                                     \
182
           : "%r" ((USItype) (ah)),                                     \
183
             "rIJ" ((USItype) (bh)),                                    \
184
             "%r" ((USItype) (al)),                                     \
185
             "rIJ" ((USItype) (bl)))
186
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
187
  __asm__ ("sub.f       %1, %4, %5\n\tsbc       %0, %2, %3"             \
188
           : "=r" ((USItype) (sh)),                                     \
189
             "=&r" ((USItype) (sl))                                     \
190
           : "r" ((USItype) (ah)),                                      \
191
             "rIJ" ((USItype) (bh)),                                    \
192
             "r" ((USItype) (al)),                                      \
193
             "rIJ" ((USItype) (bl)))
194
/* Call libgcc routine.  */
195
#define umul_ppmm(w1, w0, u, v) \
196
do {                                                                    \
197
  DWunion __w;                                                          \
198
  __w.ll = __umulsidi3 (u, v);                                          \
199
  w1 = __w.s.high;                                                      \
200
  w0 = __w.s.low;                                                       \
201
} while (0)
202
#define __umulsidi3 __umulsidi3
203
UDItype __umulsidi3 (USItype, USItype);
204
#endif
205
 
206
#if defined (__arm__) && !defined (__thumb__) && W_TYPE_SIZE == 32
207
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
208
  __asm__ ("adds        %1, %4, %5\n\tadc       %0, %2, %3"             \
209
           : "=r" ((USItype) (sh)),                                     \
210
             "=&r" ((USItype) (sl))                                     \
211
           : "%r" ((USItype) (ah)),                                     \
212
             "rI" ((USItype) (bh)),                                     \
213
             "%r" ((USItype) (al)),                                     \
214
             "rI" ((USItype) (bl)) __CLOBBER_CC)
215
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
216
  __asm__ ("subs        %1, %4, %5\n\tsbc       %0, %2, %3"             \
217
           : "=r" ((USItype) (sh)),                                     \
218
             "=&r" ((USItype) (sl))                                     \
219
           : "r" ((USItype) (ah)),                                      \
220
             "rI" ((USItype) (bh)),                                     \
221
             "r" ((USItype) (al)),                                      \
222
             "rI" ((USItype) (bl)) __CLOBBER_CC)
223
#define umul_ppmm(xh, xl, a, b) \
224
{register USItype __t0, __t1, __t2;                                     \
225
  __asm__ ("%@ Inlined umul_ppmm\n"                                     \
226
           "    mov     %2, %5, lsr #16\n"                              \
227
           "    mov     %0, %6, lsr #16\n"                              \
228
           "    bic     %3, %5, %2, lsl #16\n"                          \
229
           "    bic     %4, %6, %0, lsl #16\n"                          \
230
           "    mul     %1, %3, %4\n"                                   \
231
           "    mul     %4, %2, %4\n"                                   \
232
           "    mul     %3, %0, %3\n"                                   \
233
           "    mul     %0, %2, %0\n"                                   \
234
           "    adds    %3, %4, %3\n"                                   \
235
           "    addcs   %0, %0, #65536\n"                               \
236
           "    adds    %1, %1, %3, lsl #16\n"                          \
237
           "    adc     %0, %0, %3, lsr #16"                            \
238
           : "=&r" ((USItype) (xh)),                                    \
239
             "=r" ((USItype) (xl)),                                     \
240
             "=&r" (__t0), "=&r" (__t1), "=r" (__t2)                    \
241
           : "r" ((USItype) (a)),                                       \
242
             "r" ((USItype) (b)) __CLOBBER_CC );}
243
#define UMUL_TIME 20
244
#define UDIV_TIME 100
245
#endif /* __arm__ */
246
 
247
#if defined(__arm__)
248
/* Let gcc decide how best to implement count_leading_zeros.  */
249
#define count_leading_zeros(COUNT,X)    ((COUNT) = __builtin_clz (X))
250
#define COUNT_LEADING_ZEROS_0 32
251
#endif
252
 
253
#if defined (__AVR__)
254
 
255
#if W_TYPE_SIZE == 16
256
#define count_leading_zeros(COUNT,X)  ((COUNT) = __builtin_clz (X))
257
#define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctz (X))
258
#define COUNT_LEADING_ZEROS_0 16
259
#endif /* W_TYPE_SIZE == 16 */
260
 
261
#if W_TYPE_SIZE == 32
262
#define count_leading_zeros(COUNT,X)  ((COUNT) = __builtin_clzl (X))
263
#define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctzl (X))
264
#define COUNT_LEADING_ZEROS_0 32
265
#endif /* W_TYPE_SIZE == 32 */
266
 
267
#if W_TYPE_SIZE == 64
268
#define count_leading_zeros(COUNT,X)  ((COUNT) = __builtin_clzll (X))
269
#define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctzll (X))
270
#define COUNT_LEADING_ZEROS_0 64
271
#endif /* W_TYPE_SIZE == 64 */
272
 
273
#endif /* defined (__AVR__) */
274
 
275
#if defined (__CRIS__) && __CRIS_arch_version >= 3
276
#define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clz (X))
277
#if __CRIS_arch_version >= 8
278
#define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctz (X))
279
#endif
280
#endif /* __CRIS__ */
281
 
282
#if defined (__hppa) && W_TYPE_SIZE == 32
283
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
284
  __asm__ ("add %4,%5,%1\n\taddc %2,%3,%0"                              \
285
           : "=r" ((USItype) (sh)),                                     \
286
             "=&r" ((USItype) (sl))                                     \
287
           : "%rM" ((USItype) (ah)),                                    \
288
             "rM" ((USItype) (bh)),                                     \
289
             "%rM" ((USItype) (al)),                                    \
290
             "rM" ((USItype) (bl)))
291
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
292
  __asm__ ("sub %4,%5,%1\n\tsubb %2,%3,%0"                              \
293
           : "=r" ((USItype) (sh)),                                     \
294
             "=&r" ((USItype) (sl))                                     \
295
           : "rM" ((USItype) (ah)),                                     \
296
             "rM" ((USItype) (bh)),                                     \
297
             "rM" ((USItype) (al)),                                     \
298
             "rM" ((USItype) (bl)))
299
#if defined (_PA_RISC1_1)
300
#define umul_ppmm(w1, w0, u, v) \
301
  do {                                                                  \
302
    union                                                               \
303
      {                                                                 \
304
        UDItype __f;                                                    \
305
        struct {USItype __w1, __w0;} __w1w0;                            \
306
      } __t;                                                            \
307
    __asm__ ("xmpyu %1,%2,%0"                                           \
308
             : "=x" (__t.__f)                                           \
309
             : "x" ((USItype) (u)),                                     \
310
               "x" ((USItype) (v)));                                    \
311
    (w1) = __t.__w1w0.__w1;                                             \
312
    (w0) = __t.__w1w0.__w0;                                             \
313
     } while (0)
314
#define UMUL_TIME 8
315
#else
316
#define UMUL_TIME 30
317
#endif
318
#define UDIV_TIME 40
319
#define count_leading_zeros(count, x) \
320
  do {                                                                  \
321
    USItype __tmp;                                                      \
322
    __asm__ (                                                           \
323
       "ldi             1,%0\n"                                         \
324
"       extru,=         %1,15,16,%%r0           ; Bits 31..16 zero?\n"  \
325
"       extru,tr        %1,15,16,%1             ; No.  Shift down, skip add.\n"\
326
"       ldo             16(%0),%0               ; Yes.  Perform add.\n" \
327
"       extru,=         %1,23,8,%%r0            ; Bits 15..8 zero?\n"   \
328
"       extru,tr        %1,23,8,%1              ; No.  Shift down, skip add.\n"\
329
"       ldo             8(%0),%0                ; Yes.  Perform add.\n" \
330
"       extru,=         %1,27,4,%%r0            ; Bits 7..4 zero?\n"    \
331
"       extru,tr        %1,27,4,%1              ; No.  Shift down, skip add.\n"\
332
"       ldo             4(%0),%0                ; Yes.  Perform add.\n" \
333
"       extru,=         %1,29,2,%%r0            ; Bits 3..2 zero?\n"    \
334
"       extru,tr        %1,29,2,%1              ; No.  Shift down, skip add.\n"\
335
"       ldo             2(%0),%0                ; Yes.  Perform add.\n" \
336
"       extru           %1,30,1,%1              ; Extract bit 1.\n"     \
337
"       sub             %0,%1,%0                ; Subtract it.\n"       \
338
        : "=r" (count), "=r" (__tmp) : "1" (x));                        \
339
  } while (0)
340
#endif
341
 
342
#if (defined (__i370__) || defined (__s390__) || defined (__mvs__)) && W_TYPE_SIZE == 32
343
#if !defined (__zarch__)
344
#define smul_ppmm(xh, xl, m0, m1) \
345
  do {                                                                  \
346
    union {DItype __ll;                                                 \
347
           struct {USItype __h, __l;} __i;                              \
348
          } __x;                                                        \
349
    __asm__ ("lr %N0,%1\n\tmr %0,%2"                                    \
350
             : "=&r" (__x.__ll)                                         \
351
             : "r" (m0), "r" (m1));                                     \
352
    (xh) = __x.__i.__h; (xl) = __x.__i.__l;                             \
353
  } while (0)
354
#define sdiv_qrnnd(q, r, n1, n0, d) \
355
  do {                                                                  \
356
    union {DItype __ll;                                                 \
357
           struct {USItype __h, __l;} __i;                              \
358
          } __x;                                                        \
359
    __x.__i.__h = n1; __x.__i.__l = n0;                                 \
360
    __asm__ ("dr %0,%2"                                                 \
361
             : "=r" (__x.__ll)                                          \
362
             : "0" (__x.__ll), "r" (d));                         \
363
    (q) = __x.__i.__l; (r) = __x.__i.__h;                               \
364
  } while (0)
365
#else
366
#define smul_ppmm(xh, xl, m0, m1) \
367
  do {                                                                  \
368
    register SItype __r0 __asm__ ("0");                                  \
369
    register SItype __r1 __asm__ ("1") = (m0);                          \
370
                                                                        \
371
    __asm__ ("mr\t%%r0,%3"                                              \
372
             : "=r" (__r0), "=r" (__r1)                                 \
373
             : "r"  (__r1),  "r" (m1));                                 \
374
    (xh) = __r0; (xl) = __r1;                                           \
375
  } while (0)
376
 
377
#define sdiv_qrnnd(q, r, n1, n0, d) \
378
  do {                                                                  \
379
    register SItype __r0 __asm__ ("0") = (n1);                           \
380
    register SItype __r1 __asm__ ("1") = (n0);                          \
381
                                                                        \
382
    __asm__ ("dr\t%%r0,%4"                                              \
383
             : "=r" (__r0), "=r" (__r1)                                 \
384
             : "r" (__r0), "r" (__r1), "r" (d));                        \
385
    (q) = __r1; (r) = __r0;                                             \
386
  } while (0)
387
#endif /* __zarch__ */
388
#endif
389
 
390
#if (defined (__i386__) || defined (__i486__)) && W_TYPE_SIZE == 32
391
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
392
  __asm__ ("add{l} {%5,%1|%1,%5}\n\tadc{l} {%3,%0|%0,%3}"               \
393
           : "=r" ((USItype) (sh)),                                     \
394
             "=&r" ((USItype) (sl))                                     \
395
           : "%0" ((USItype) (ah)),                                     \
396
             "g" ((USItype) (bh)),                                      \
397
             "%1" ((USItype) (al)),                                     \
398
             "g" ((USItype) (bl)))
399
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
400
  __asm__ ("sub{l} {%5,%1|%1,%5}\n\tsbb{l} {%3,%0|%0,%3}"               \
401
           : "=r" ((USItype) (sh)),                                     \
402
             "=&r" ((USItype) (sl))                                     \
403
           : "0" ((USItype) (ah)),                                       \
404
             "g" ((USItype) (bh)),                                      \
405
             "1" ((USItype) (al)),                                      \
406
             "g" ((USItype) (bl)))
407
#define umul_ppmm(w1, w0, u, v) \
408
  __asm__ ("mul{l} %3"                                                  \
409
           : "=a" ((USItype) (w0)),                                     \
410
             "=d" ((USItype) (w1))                                      \
411
           : "%0" ((USItype) (u)),                                      \
412
             "rm" ((USItype) (v)))
413
#define udiv_qrnnd(q, r, n1, n0, dv) \
414
  __asm__ ("div{l} %4"                                                  \
415
           : "=a" ((USItype) (q)),                                      \
416
             "=d" ((USItype) (r))                                       \
417
           : "0" ((USItype) (n0)),                                       \
418
             "1" ((USItype) (n1)),                                      \
419
             "rm" ((USItype) (dv)))
420
#define count_leading_zeros(count, x)   ((count) = __builtin_clz (x))
421
#define count_trailing_zeros(count, x)  ((count) = __builtin_ctz (x))
422
#define UMUL_TIME 40
423
#define UDIV_TIME 40
424
#endif /* 80x86 */
425
 
426
#if (defined (__x86_64__) || defined (__i386__)) && W_TYPE_SIZE == 64
427
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
428
  __asm__ ("add{q} {%5,%1|%1,%5}\n\tadc{q} {%3,%0|%0,%3}"               \
429
           : "=r" ((UDItype) (sh)),                                     \
430
             "=&r" ((UDItype) (sl))                                     \
431
           : "%0" ((UDItype) (ah)),                                     \
432
             "rme" ((UDItype) (bh)),                                    \
433
             "%1" ((UDItype) (al)),                                     \
434
             "rme" ((UDItype) (bl)))
435
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
436
  __asm__ ("sub{q} {%5,%1|%1,%5}\n\tsbb{q} {%3,%0|%0,%3}"               \
437
           : "=r" ((UDItype) (sh)),                                     \
438
             "=&r" ((UDItype) (sl))                                     \
439
           : "0" ((UDItype) (ah)),                                       \
440
             "rme" ((UDItype) (bh)),                                    \
441
             "1" ((UDItype) (al)),                                      \
442
             "rme" ((UDItype) (bl)))
443
#define umul_ppmm(w1, w0, u, v) \
444
  __asm__ ("mul{q} %3"                                                  \
445
           : "=a" ((UDItype) (w0)),                                     \
446
             "=d" ((UDItype) (w1))                                      \
447
           : "%0" ((UDItype) (u)),                                      \
448
             "rm" ((UDItype) (v)))
449
#define udiv_qrnnd(q, r, n1, n0, dv) \
450
  __asm__ ("div{q} %4"                                                  \
451
           : "=a" ((UDItype) (q)),                                      \
452
             "=d" ((UDItype) (r))                                       \
453
           : "0" ((UDItype) (n0)),                                       \
454
             "1" ((UDItype) (n1)),                                      \
455
             "rm" ((UDItype) (dv)))
456
#define count_leading_zeros(count, x)   ((count) = __builtin_clzll (x))
457
#define count_trailing_zeros(count, x)  ((count) = __builtin_ctzll (x))
458
#define UMUL_TIME 40
459
#define UDIV_TIME 40
460
#endif /* x86_64 */
461
 
462
#if defined (__i960__) && W_TYPE_SIZE == 32
463
#define umul_ppmm(w1, w0, u, v) \
464
  ({union {UDItype __ll;                                                \
465
           struct {USItype __l, __h;} __i;                              \
466
          } __xx;                                                       \
467
  __asm__ ("emul        %2,%1,%0"                                       \
468
           : "=d" (__xx.__ll)                                           \
469
           : "%dI" ((USItype) (u)),                                     \
470
             "dI" ((USItype) (v)));                                     \
471
  (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
472
#define __umulsidi3(u, v) \
473
  ({UDItype __w;                                                        \
474
    __asm__ ("emul      %2,%1,%0"                                       \
475
             : "=d" (__w)                                               \
476
             : "%dI" ((USItype) (u)),                                   \
477
               "dI" ((USItype) (v)));                                   \
478
    __w; })
479
#endif /* __i960__ */
480
 
481
#if defined (__ia64) && W_TYPE_SIZE == 64
482
/* This form encourages gcc (pre-release 3.4 at least) to emit predicated
483
   "sub r=r,r" and "sub r=r,r,1", giving a 2 cycle latency.  The generic
484
   code using "al<bl" arithmetically comes out making an actual 0 or 1 in a
485
   register, which takes an extra cycle.  */
486
#define sub_ddmmss(sh, sl, ah, al, bh, bl)                              \
487
  do {                                                                  \
488
    UWtype __x;                                                         \
489
    __x = (al) - (bl);                                                  \
490
    if ((al) < (bl))                                                    \
491
      (sh) = (ah) - (bh) - 1;                                           \
492
    else                                                                \
493
      (sh) = (ah) - (bh);                                               \
494
    (sl) = __x;                                                         \
495
  } while (0)
496
 
497
/* Do both product parts in assembly, since that gives better code with
498
   all gcc versions.  Some callers will just use the upper part, and in
499
   that situation we waste an instruction, but not any cycles.  */
500
#define umul_ppmm(ph, pl, m0, m1)                                       \
501
  __asm__ ("xma.hu %0 = %2, %3, f0\n\txma.l %1 = %2, %3, f0"            \
502
           : "=&f" (ph), "=f" (pl)                                      \
503
           : "f" (m0), "f" (m1))
504
#define count_leading_zeros(count, x)                                   \
505
  do {                                                                  \
506
    UWtype _x = (x), _y, _a, _c;                                        \
507
    __asm__ ("mux1 %0 = %1, @rev" : "=r" (_y) : "r" (_x));              \
508
    __asm__ ("czx1.l %0 = %1" : "=r" (_a) : "r" (-_y | _y));            \
509
    _c = (_a - 1) << 3;                                                 \
510
    _x >>= _c;                                                          \
511
    if (_x >= 1 << 4)                                                   \
512
      _x >>= 4, _c += 4;                                                \
513
    if (_x >= 1 << 2)                                                   \
514
      _x >>= 2, _c += 2;                                                \
515
    _c += _x >> 1;                                                      \
516
    (count) =  W_TYPE_SIZE - 1 - _c;                                    \
517
  } while (0)
518
/* similar to what gcc does for __builtin_ffs, but 0 based rather than 1
519
   based, and we don't need a special case for x==0 here */
520
#define count_trailing_zeros(count, x)                                  \
521
  do {                                                                  \
522
    UWtype __ctz_x = (x);                                               \
523
    __asm__ ("popcnt %0 = %1"                                           \
524
             : "=r" (count)                                             \
525
             : "r" ((__ctz_x-1) & ~__ctz_x));                           \
526
  } while (0)
527
#define UMUL_TIME 14
528
#endif
529
 
530
#if defined (__M32R__) && W_TYPE_SIZE == 32
531
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
532
  /* The cmp clears the condition bit.  */ \
533
  __asm__ ("cmp %0,%0\n\taddx %1,%5\n\taddx %0,%3"                      \
534
           : "=r" ((USItype) (sh)),                                     \
535
             "=&r" ((USItype) (sl))                                     \
536
           : "0" ((USItype) (ah)),                                       \
537
             "r" ((USItype) (bh)),                                      \
538
             "1" ((USItype) (al)),                                      \
539
             "r" ((USItype) (bl))                                       \
540
           : "cbit")
541
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
542
  /* The cmp clears the condition bit.  */ \
543
  __asm__ ("cmp %0,%0\n\tsubx %1,%5\n\tsubx %0,%3"                      \
544
           : "=r" ((USItype) (sh)),                                     \
545
             "=&r" ((USItype) (sl))                                     \
546
           : "0" ((USItype) (ah)),                                       \
547
             "r" ((USItype) (bh)),                                      \
548
             "1" ((USItype) (al)),                                      \
549
             "r" ((USItype) (bl))                                       \
550
           : "cbit")
551
#endif /* __M32R__ */
552
 
553
#if defined (__mc68000__) && W_TYPE_SIZE == 32
554
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
555
  __asm__ ("add%.l %5,%1\n\taddx%.l %3,%0"                              \
556
           : "=d" ((USItype) (sh)),                                     \
557
             "=&d" ((USItype) (sl))                                     \
558
           : "%0" ((USItype) (ah)),                                     \
559
             "d" ((USItype) (bh)),                                      \
560
             "%1" ((USItype) (al)),                                     \
561
             "g" ((USItype) (bl)))
562
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
563
  __asm__ ("sub%.l %5,%1\n\tsubx%.l %3,%0"                              \
564
           : "=d" ((USItype) (sh)),                                     \
565
             "=&d" ((USItype) (sl))                                     \
566
           : "0" ((USItype) (ah)),                                       \
567
             "d" ((USItype) (bh)),                                      \
568
             "1" ((USItype) (al)),                                      \
569
             "g" ((USItype) (bl)))
570
 
571
/* The '020, '030, '040, '060 and CPU32 have 32x32->64 and 64/32->32q-32r.  */
572
#if (defined (__mc68020__) && !defined (__mc68060__))
573
#define umul_ppmm(w1, w0, u, v) \
574
  __asm__ ("mulu%.l %3,%1:%0"                                           \
575
           : "=d" ((USItype) (w0)),                                     \
576
             "=d" ((USItype) (w1))                                      \
577
           : "%0" ((USItype) (u)),                                      \
578
             "dmi" ((USItype) (v)))
579
#define UMUL_TIME 45
580
#define udiv_qrnnd(q, r, n1, n0, d) \
581
  __asm__ ("divu%.l %4,%1:%0"                                           \
582
           : "=d" ((USItype) (q)),                                      \
583
             "=d" ((USItype) (r))                                       \
584
           : "0" ((USItype) (n0)),                                       \
585
             "1" ((USItype) (n1)),                                      \
586
             "dmi" ((USItype) (d)))
587
#define UDIV_TIME 90
588
#define sdiv_qrnnd(q, r, n1, n0, d) \
589
  __asm__ ("divs%.l %4,%1:%0"                                           \
590
           : "=d" ((USItype) (q)),                                      \
591
             "=d" ((USItype) (r))                                       \
592
           : "0" ((USItype) (n0)),                                       \
593
             "1" ((USItype) (n1)),                                      \
594
             "dmi" ((USItype) (d)))
595
 
596
#elif defined (__mcoldfire__) /* not mc68020 */
597
 
598
#define umul_ppmm(xh, xl, a, b) \
599
  __asm__ ("| Inlined umul_ppmm\n"                                      \
600
           "    move%.l %2,%/d0\n"                                      \
601
           "    move%.l %3,%/d1\n"                                      \
602
           "    move%.l %/d0,%/d2\n"                                    \
603
           "    swap    %/d0\n"                                         \
604
           "    move%.l %/d1,%/d3\n"                                    \
605
           "    swap    %/d1\n"                                         \
606
           "    move%.w %/d2,%/d4\n"                                    \
607
           "    mulu    %/d3,%/d4\n"                                    \
608
           "    mulu    %/d1,%/d2\n"                                    \
609
           "    mulu    %/d0,%/d3\n"                                    \
610
           "    mulu    %/d0,%/d1\n"                                    \
611
           "    move%.l %/d4,%/d0\n"                                    \
612
           "    clr%.w  %/d0\n"                                         \
613
           "    swap    %/d0\n"                                         \
614
           "    add%.l  %/d0,%/d2\n"                                    \
615
           "    add%.l  %/d3,%/d2\n"                                    \
616
           "    jcc     1f\n"                                           \
617
           "    add%.l  %#65536,%/d1\n"                                 \
618
           "1:  swap    %/d2\n"                                         \
619
           "    moveq   %#0,%/d0\n"                                     \
620
           "    move%.w %/d2,%/d0\n"                                    \
621
           "    move%.w %/d4,%/d2\n"                                    \
622
           "    move%.l %/d2,%1\n"                                      \
623
           "    add%.l  %/d1,%/d0\n"                                    \
624
           "    move%.l %/d0,%0"                                        \
625
           : "=g" ((USItype) (xh)),                                     \
626
             "=g" ((USItype) (xl))                                      \
627
           : "g" ((USItype) (a)),                                       \
628
             "g" ((USItype) (b))                                        \
629
           : "d0", "d1", "d2", "d3", "d4")
630
#define UMUL_TIME 100
631
#define UDIV_TIME 400
632
#else /* not ColdFire */
633
/* %/ inserts REGISTER_PREFIX, %# inserts IMMEDIATE_PREFIX.  */
634
#define umul_ppmm(xh, xl, a, b) \
635
  __asm__ ("| Inlined umul_ppmm\n"                                      \
636
           "    move%.l %2,%/d0\n"                                      \
637
           "    move%.l %3,%/d1\n"                                      \
638
           "    move%.l %/d0,%/d2\n"                                    \
639
           "    swap    %/d0\n"                                         \
640
           "    move%.l %/d1,%/d3\n"                                    \
641
           "    swap    %/d1\n"                                         \
642
           "    move%.w %/d2,%/d4\n"                                    \
643
           "    mulu    %/d3,%/d4\n"                                    \
644
           "    mulu    %/d1,%/d2\n"                                    \
645
           "    mulu    %/d0,%/d3\n"                                    \
646
           "    mulu    %/d0,%/d1\n"                                    \
647
           "    move%.l %/d4,%/d0\n"                                    \
648
           "    eor%.w  %/d0,%/d0\n"                                    \
649
           "    swap    %/d0\n"                                         \
650
           "    add%.l  %/d0,%/d2\n"                                    \
651
           "    add%.l  %/d3,%/d2\n"                                    \
652
           "    jcc     1f\n"                                           \
653
           "    add%.l  %#65536,%/d1\n"                                 \
654
           "1:  swap    %/d2\n"                                         \
655
           "    moveq   %#0,%/d0\n"                                     \
656
           "    move%.w %/d2,%/d0\n"                                    \
657
           "    move%.w %/d4,%/d2\n"                                    \
658
           "    move%.l %/d2,%1\n"                                      \
659
           "    add%.l  %/d1,%/d0\n"                                    \
660
           "    move%.l %/d0,%0"                                        \
661
           : "=g" ((USItype) (xh)),                                     \
662
             "=g" ((USItype) (xl))                                      \
663
           : "g" ((USItype) (a)),                                       \
664
             "g" ((USItype) (b))                                        \
665
           : "d0", "d1", "d2", "d3", "d4")
666
#define UMUL_TIME 100
667
#define UDIV_TIME 400
668
 
669
#endif /* not mc68020 */
670
 
671
/* The '020, '030, '040 and '060 have bitfield insns.
672
   cpu32 disguises as a 68020, but lacks them.  */
673
#if defined (__mc68020__) && !defined (__mcpu32__)
674
#define count_leading_zeros(count, x) \
675
  __asm__ ("bfffo %1{%b2:%b2},%0"                                       \
676
           : "=d" ((USItype) (count))                                   \
677
           : "od" ((USItype) (x)), "n" (0))
678
/* Some ColdFire architectures have a ff1 instruction supported via
679
   __builtin_clz. */
680
#elif defined (__mcfisaaplus__) || defined (__mcfisac__)
681
#define count_leading_zeros(count,x) ((count) = __builtin_clz (x))
682
#define COUNT_LEADING_ZEROS_0 32
683
#endif
684
#endif /* mc68000 */
685
 
686
#if defined (__m88000__) && W_TYPE_SIZE == 32
687
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
688
  __asm__ ("addu.co %1,%r4,%r5\n\taddu.ci %0,%r2,%r3"                   \
689
           : "=r" ((USItype) (sh)),                                     \
690
             "=&r" ((USItype) (sl))                                     \
691
           : "%rJ" ((USItype) (ah)),                                    \
692
             "rJ" ((USItype) (bh)),                                     \
693
             "%rJ" ((USItype) (al)),                                    \
694
             "rJ" ((USItype) (bl)))
695
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
696
  __asm__ ("subu.co %1,%r4,%r5\n\tsubu.ci %0,%r2,%r3"                   \
697
           : "=r" ((USItype) (sh)),                                     \
698
             "=&r" ((USItype) (sl))                                     \
699
           : "rJ" ((USItype) (ah)),                                     \
700
             "rJ" ((USItype) (bh)),                                     \
701
             "rJ" ((USItype) (al)),                                     \
702
             "rJ" ((USItype) (bl)))
703
#define count_leading_zeros(count, x) \
704
  do {                                                                  \
705
    USItype __cbtmp;                                                    \
706
    __asm__ ("ff1 %0,%1"                                                \
707
             : "=r" (__cbtmp)                                           \
708
             : "r" ((USItype) (x)));                                    \
709
    (count) = __cbtmp ^ 31;                                             \
710
  } while (0)
711
#define COUNT_LEADING_ZEROS_0 63 /* sic */
712
#if defined (__mc88110__)
713
#define umul_ppmm(wh, wl, u, v) \
714
  do {                                                                  \
715
    union {UDItype __ll;                                                \
716
           struct {USItype __h, __l;} __i;                              \
717
          } __xx;                                                       \
718
    __asm__ ("mulu.d    %0,%1,%2"                                       \
719
             : "=r" (__xx.__ll)                                         \
720
             : "r" ((USItype) (u)),                                     \
721
               "r" ((USItype) (v)));                                    \
722
    (wh) = __xx.__i.__h;                                                \
723
    (wl) = __xx.__i.__l;                                                \
724
  } while (0)
725
#define udiv_qrnnd(q, r, n1, n0, d) \
726
  ({union {UDItype __ll;                                                \
727
           struct {USItype __h, __l;} __i;                              \
728
          } __xx;                                                       \
729
  USItype __q;                                                          \
730
  __xx.__i.__h = (n1); __xx.__i.__l = (n0);                             \
731
  __asm__ ("divu.d %0,%1,%2"                                            \
732
           : "=r" (__q)                                                 \
733
           : "r" (__xx.__ll),                                           \
734
             "r" ((USItype) (d)));                                      \
735
  (r) = (n0) - __q * (d); (q) = __q; })
736
#define UMUL_TIME 5
737
#define UDIV_TIME 25
738
#else
739
#define UMUL_TIME 17
740
#define UDIV_TIME 150
741
#endif /* __mc88110__ */
742
#endif /* __m88000__ */
743
 
744
#if defined (__mn10300__)
745
# if defined (__AM33__)
746
#  define count_leading_zeros(COUNT,X)  ((COUNT) = __builtin_clz (X))
747
#  define umul_ppmm(w1, w0, u, v)               \
748
    asm("mulu %3,%2,%1,%0" : "=r"(w0), "=r"(w1) : "r"(u), "r"(v))
749
#  define smul_ppmm(w1, w0, u, v)               \
750
    asm("mul %3,%2,%1,%0" : "=r"(w0), "=r"(w1) : "r"(u), "r"(v))
751
# else
752
#  define umul_ppmm(w1, w0, u, v)               \
753
    asm("nop; nop; mulu %3,%0" : "=d"(w0), "=z"(w1) : "%0"(u), "d"(v))
754
#  define smul_ppmm(w1, w0, u, v)               \
755
    asm("nop; nop; mul %3,%0" : "=d"(w0), "=z"(w1) : "%0"(u), "d"(v))
756
# endif
757
# define add_ssaaaa(sh, sl, ah, al, bh, bl)     \
758
  do {                                          \
759
    DWunion __s, __a, __b;                      \
760
    __a.s.low = (al); __a.s.high = (ah);        \
761
    __b.s.low = (bl); __b.s.high = (bh);        \
762
    __s.ll = __a.ll + __b.ll;                   \
763
    (sl) = __s.s.low; (sh) = __s.s.high;        \
764
  } while (0)
765
# define sub_ddmmss(sh, sl, ah, al, bh, bl)     \
766
  do {                                          \
767
    DWunion __s, __a, __b;                      \
768
    __a.s.low = (al); __a.s.high = (ah);        \
769
    __b.s.low = (bl); __b.s.high = (bh);        \
770
    __s.ll = __a.ll - __b.ll;                   \
771
    (sl) = __s.s.low; (sh) = __s.s.high;        \
772
  } while (0)
773
# define udiv_qrnnd(q, r, nh, nl, d)            \
774
  asm("divu %2,%0" : "=D"(q), "=z"(r) : "D"(d), "0"(nl), "1"(nh))
775
# define sdiv_qrnnd(q, r, nh, nl, d)            \
776
  asm("div %2,%0" : "=D"(q), "=z"(r) : "D"(d), "0"(nl), "1"(nh))
777
# define UMUL_TIME 3
778
# define UDIV_TIME 38
779
#endif
780
 
781
#if defined (__mips__) && W_TYPE_SIZE == 32
782
#define umul_ppmm(w1, w0, u, v)                                         \
783
  do {                                                                  \
784
    UDItype __x = (UDItype) (USItype) (u) * (USItype) (v);              \
785
    (w1) = (USItype) (__x >> 32);                                       \
786
    (w0) = (USItype) (__x);                                             \
787
  } while (0)
788
#define UMUL_TIME 10
789
#define UDIV_TIME 100
790
 
791
#if (__mips == 32 || __mips == 64) && ! __mips16
792
#define count_leading_zeros(COUNT,X)    ((COUNT) = __builtin_clz (X))
793
#define COUNT_LEADING_ZEROS_0 32
794
#endif
795
#endif /* __mips__ */
796
 
797
#if defined (__ns32000__) && W_TYPE_SIZE == 32
798
#define umul_ppmm(w1, w0, u, v) \
799
  ({union {UDItype __ll;                                                \
800
           struct {USItype __l, __h;} __i;                              \
801
          } __xx;                                                       \
802
  __asm__ ("meid %2,%0"                                                 \
803
           : "=g" (__xx.__ll)                                           \
804
           : "%0" ((USItype) (u)),                                      \
805
             "g" ((USItype) (v)));                                      \
806
  (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
807
#define __umulsidi3(u, v) \
808
  ({UDItype __w;                                                        \
809
    __asm__ ("meid %2,%0"                                               \
810
             : "=g" (__w)                                               \
811
             : "%0" ((USItype) (u)),                                    \
812
               "g" ((USItype) (v)));                                    \
813
    __w; })
814
#define udiv_qrnnd(q, r, n1, n0, d) \
815
  ({union {UDItype __ll;                                                \
816
           struct {USItype __l, __h;} __i;                              \
817
          } __xx;                                                       \
818
  __xx.__i.__h = (n1); __xx.__i.__l = (n0);                             \
819
  __asm__ ("deid %2,%0"                                                 \
820
           : "=g" (__xx.__ll)                                           \
821
           : "0" (__xx.__ll),                                            \
822
             "g" ((USItype) (d)));                                      \
823
  (r) = __xx.__i.__l; (q) = __xx.__i.__h; })
824
#define count_trailing_zeros(count,x) \
825
  do {                                                                  \
826
    __asm__ ("ffsd     %2,%0"                                           \
827
            : "=r" ((USItype) (count))                                  \
828
            : "0" ((USItype) 0),                                  \
829
              "r" ((USItype) (x)));                                     \
830
  } while (0)
831
#endif /* __ns32000__ */
832
 
833
/* FIXME: We should test _IBMR2 here when we add assembly support for the
834
   system vendor compilers.
835
   FIXME: What's needed for gcc PowerPC VxWorks?  __vxworks__ is not good
836
   enough, since that hits ARM and m68k too.  */
837
#if (defined (_ARCH_PPC)        /* AIX */                               \
838
     || defined (_ARCH_PWR)     /* AIX */                               \
839
     || defined (_ARCH_COM)     /* AIX */                               \
840
     || defined (__powerpc__)   /* gcc */                               \
841
     || defined (__POWERPC__)   /* BEOS */                              \
842
     || defined (__ppc__)       /* Darwin */                            \
843
     || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */    \
844
     || (defined (PPC) && defined (CPU_FAMILY)    /* VxWorks */               \
845
         && CPU_FAMILY == PPC)                                                \
846
     ) && W_TYPE_SIZE == 32
847
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
848
  do {                                                                  \
849
    if (__builtin_constant_p (bh) && (bh) == 0)                          \
850
      __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2"           \
851
             : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
852
    else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)          \
853
      __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2"           \
854
             : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
855
    else                                                                \
856
      __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3"          \
857
             : "=r" (sh), "=&r" (sl)                                    \
858
             : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl));              \
859
  } while (0)
860
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
861
  do {                                                                  \
862
    if (__builtin_constant_p (ah) && (ah) == 0)                          \
863
      __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2"       \
864
               : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
865
    else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0)          \
866
      __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2"       \
867
               : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
868
    else if (__builtin_constant_p (bh) && (bh) == 0)                     \
869
      __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2"         \
870
               : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
871
    else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)          \
872
      __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2"         \
873
               : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
874
    else                                                                \
875
      __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2"      \
876
               : "=r" (sh), "=&r" (sl)                                  \
877
               : "r" (ah), "r" (bh), "rI" (al), "r" (bl));              \
878
  } while (0)
879
#define count_leading_zeros(count, x) \
880
  __asm__ ("{cntlz|cntlzw} %0,%1" : "=r" (count) : "r" (x))
881
#define COUNT_LEADING_ZEROS_0 32
882
#if defined (_ARCH_PPC) || defined (__powerpc__) || defined (__POWERPC__) \
883
  || defined (__ppc__)                                                    \
884
  || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */       \
885
  || (defined (PPC) && defined (CPU_FAMILY)    /* VxWorks */                  \
886
         && CPU_FAMILY == PPC)
887
#define umul_ppmm(ph, pl, m0, m1) \
888
  do {                                                                  \
889
    USItype __m0 = (m0), __m1 = (m1);                                   \
890
    __asm__ ("mulhwu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));      \
891
    (pl) = __m0 * __m1;                                                 \
892
  } while (0)
893
#define UMUL_TIME 15
894
#define smul_ppmm(ph, pl, m0, m1) \
895
  do {                                                                  \
896
    SItype __m0 = (m0), __m1 = (m1);                                    \
897
    __asm__ ("mulhw %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));       \
898
    (pl) = __m0 * __m1;                                                 \
899
  } while (0)
900
#define SMUL_TIME 14
901
#define UDIV_TIME 120
902
#elif defined (_ARCH_PWR)
903
#define UMUL_TIME 8
904
#define smul_ppmm(xh, xl, m0, m1) \
905
  __asm__ ("mul %0,%2,%3" : "=r" (xh), "=q" (xl) : "r" (m0), "r" (m1))
906
#define SMUL_TIME 4
907
#define sdiv_qrnnd(q, r, nh, nl, d) \
908
  __asm__ ("div %0,%2,%4" : "=r" (q), "=q" (r) : "r" (nh), "1" (nl), "r" (d))
909
#define UDIV_TIME 100
910
#endif
911
#endif /* 32-bit POWER architecture variants.  */
912
 
913
/* We should test _IBMR2 here when we add assembly support for the system
914
   vendor compilers.  */
915
#if (defined (_ARCH_PPC64) || defined (__powerpc64__)) && W_TYPE_SIZE == 64
916
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
917
  do {                                                                  \
918
    if (__builtin_constant_p (bh) && (bh) == 0)                          \
919
      __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2"           \
920
             : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
921
    else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)          \
922
      __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2"           \
923
             : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
924
    else                                                                \
925
      __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3"          \
926
             : "=r" (sh), "=&r" (sl)                                    \
927
             : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl));              \
928
  } while (0)
929
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
930
  do {                                                                  \
931
    if (__builtin_constant_p (ah) && (ah) == 0)                          \
932
      __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2"       \
933
               : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
934
    else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0)          \
935
      __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2"       \
936
               : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
937
    else if (__builtin_constant_p (bh) && (bh) == 0)                     \
938
      __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2"         \
939
               : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
940
    else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)          \
941
      __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2"         \
942
               : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
943
    else                                                                \
944
      __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2"      \
945
               : "=r" (sh), "=&r" (sl)                                  \
946
               : "r" (ah), "r" (bh), "rI" (al), "r" (bl));              \
947
  } while (0)
948
#define count_leading_zeros(count, x) \
949
  __asm__ ("cntlzd %0,%1" : "=r" (count) : "r" (x))
950
#define COUNT_LEADING_ZEROS_0 64
951
#define umul_ppmm(ph, pl, m0, m1) \
952
  do {                                                                  \
953
    UDItype __m0 = (m0), __m1 = (m1);                                   \
954
    __asm__ ("mulhdu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));      \
955
    (pl) = __m0 * __m1;                                                 \
956
  } while (0)
957
#define UMUL_TIME 15
958
#define smul_ppmm(ph, pl, m0, m1) \
959
  do {                                                                  \
960
    DItype __m0 = (m0), __m1 = (m1);                                    \
961
    __asm__ ("mulhd %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));       \
962
    (pl) = __m0 * __m1;                                                 \
963
  } while (0)
964
#define SMUL_TIME 14  /* ??? */
965
#define UDIV_TIME 120 /* ??? */
966
#endif /* 64-bit PowerPC.  */
967
 
968
#if defined (__ibm032__) /* RT/ROMP */ && W_TYPE_SIZE == 32
969
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
970
  __asm__ ("a %1,%5\n\tae %0,%3"                                        \
971
           : "=r" ((USItype) (sh)),                                     \
972
             "=&r" ((USItype) (sl))                                     \
973
           : "%0" ((USItype) (ah)),                                     \
974
             "r" ((USItype) (bh)),                                      \
975
             "%1" ((USItype) (al)),                                     \
976
             "r" ((USItype) (bl)))
977
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
978
  __asm__ ("s %1,%5\n\tse %0,%3"                                        \
979
           : "=r" ((USItype) (sh)),                                     \
980
             "=&r" ((USItype) (sl))                                     \
981
           : "0" ((USItype) (ah)),                                       \
982
             "r" ((USItype) (bh)),                                      \
983
             "1" ((USItype) (al)),                                      \
984
             "r" ((USItype) (bl)))
985
#define umul_ppmm(ph, pl, m0, m1) \
986
  do {                                                                  \
987
    USItype __m0 = (m0), __m1 = (m1);                                   \
988
    __asm__ (                                                           \
989
       "s       r2,r2\n"                                                \
990
"       mts     r10,%2\n"                                               \
991
"       m       r2,%3\n"                                                \
992
"       m       r2,%3\n"                                                \
993
"       m       r2,%3\n"                                                \
994
"       m       r2,%3\n"                                                \
995
"       m       r2,%3\n"                                                \
996
"       m       r2,%3\n"                                                \
997
"       m       r2,%3\n"                                                \
998
"       m       r2,%3\n"                                                \
999
"       m       r2,%3\n"                                                \
1000
"       m       r2,%3\n"                                                \
1001
"       m       r2,%3\n"                                                \
1002
"       m       r2,%3\n"                                                \
1003
"       m       r2,%3\n"                                                \
1004
"       m       r2,%3\n"                                                \
1005
"       m       r2,%3\n"                                                \
1006
"       m       r2,%3\n"                                                \
1007
"       cas     %0,r2,r0\n"                                             \
1008
"       mfs     r10,%1"                                                 \
1009
             : "=r" ((USItype) (ph)),                                   \
1010
               "=r" ((USItype) (pl))                                    \
1011
             : "%r" (__m0),                                             \
1012
                "r" (__m1)                                              \
1013
             : "r2");                                                   \
1014
    (ph) += ((((SItype) __m0 >> 31) & __m1)                             \
1015
             + (((SItype) __m1 >> 31) & __m0));                         \
1016
  } while (0)
1017
#define UMUL_TIME 20
1018
#define UDIV_TIME 200
1019
#define count_leading_zeros(count, x) \
1020
  do {                                                                  \
1021
    if ((x) >= 0x10000)                                                 \
1022
      __asm__ ("clz     %0,%1"                                          \
1023
               : "=r" ((USItype) (count))                               \
1024
               : "r" ((USItype) (x) >> 16));                            \
1025
    else                                                                \
1026
      {                                                                 \
1027
        __asm__ ("clz   %0,%1"                                          \
1028
                 : "=r" ((USItype) (count))                             \
1029
                 : "r" ((USItype) (x)));                                        \
1030
        (count) += 16;                                                  \
1031
      }                                                                 \
1032
  } while (0)
1033
#endif
1034
 
1035
#if defined(__sh__) && !__SHMEDIA__ && W_TYPE_SIZE == 32
1036
#ifndef __sh1__
1037
#define umul_ppmm(w1, w0, u, v) \
1038
  __asm__ (                                                             \
1039
       "dmulu.l %2,%3\n\tsts%M1 macl,%1\n\tsts%M0       mach,%0"        \
1040
           : "=r<" ((USItype)(w1)),                                     \
1041
             "=r<" ((USItype)(w0))                                      \
1042
           : "r" ((USItype)(u)),                                        \
1043
             "r" ((USItype)(v))                                         \
1044
           : "macl", "mach")
1045
#define UMUL_TIME 5
1046
#endif
1047
 
1048
/* This is the same algorithm as __udiv_qrnnd_c.  */
1049
#define UDIV_NEEDS_NORMALIZATION 1
1050
 
1051
#define udiv_qrnnd(q, r, n1, n0, d) \
1052
  do {                                                                  \
1053
    extern UWtype __udiv_qrnnd_16 (UWtype, UWtype)                      \
1054
                        __attribute__ ((visibility ("hidden")));        \
1055
    /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */  \
1056
    __asm__ (                                                           \
1057
        "mov%M4 %4,r5\n"                                                \
1058
"       swap.w %3,r4\n"                                                 \
1059
"       swap.w r5,r6\n"                                                 \
1060
"       jsr @%5\n"                                                      \
1061
"       shll16 r6\n"                                                    \
1062
"       swap.w r4,r4\n"                                                 \
1063
"       jsr @%5\n"                                                      \
1064
"       swap.w r1,%0\n"                                                 \
1065
"       or r1,%0"                                                       \
1066
        : "=r" (q), "=&z" (r)                                           \
1067
        : "1" (n1), "r" (n0), "rm" (d), "r" (&__udiv_qrnnd_16)          \
1068
        : "r1", "r2", "r4", "r5", "r6", "pr", "t");                     \
1069
  } while (0)
1070
 
1071
#define UDIV_TIME 80
1072
 
1073
#define sub_ddmmss(sh, sl, ah, al, bh, bl)                              \
1074
  __asm__ ("clrt;subc %5,%1; subc %4,%0"                                \
1075
           : "=r" (sh), "=r" (sl)                                       \
1076
           : "0" (ah), "1" (al), "r" (bh), "r" (bl) : "t")
1077
 
1078
#endif /* __sh__ */
1079
 
1080
#if defined (__SH5__) && __SHMEDIA__ && W_TYPE_SIZE == 32
1081
#define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v)
1082
#define count_leading_zeros(count, x) \
1083
  do                                                                    \
1084
    {                                                                   \
1085
      UDItype x_ = (USItype)(x);                                        \
1086
      SItype c_;                                                        \
1087
                                                                        \
1088
      __asm__ ("nsb %1, %0" : "=r" (c_) : "r" (x_));                    \
1089
      (count) = c_ - 31;                                                \
1090
    }                                                                   \
1091
  while (0)
1092
#define COUNT_LEADING_ZEROS_0 32
1093
#endif
1094
 
1095
#if defined (__sparc__) && !defined (__arch64__) && !defined (__sparcv9) \
1096
    && W_TYPE_SIZE == 32
1097
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1098
  __asm__ ("addcc %r4,%5,%1\n\taddx %r2,%3,%0"                          \
1099
           : "=r" ((USItype) (sh)),                                     \
1100
             "=&r" ((USItype) (sl))                                     \
1101
           : "%rJ" ((USItype) (ah)),                                    \
1102
             "rI" ((USItype) (bh)),                                     \
1103
             "%rJ" ((USItype) (al)),                                    \
1104
             "rI" ((USItype) (bl))                                      \
1105
           __CLOBBER_CC)
1106
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1107
  __asm__ ("subcc %r4,%5,%1\n\tsubx %r2,%3,%0"                          \
1108
           : "=r" ((USItype) (sh)),                                     \
1109
             "=&r" ((USItype) (sl))                                     \
1110
           : "rJ" ((USItype) (ah)),                                     \
1111
             "rI" ((USItype) (bh)),                                     \
1112
             "rJ" ((USItype) (al)),                                     \
1113
             "rI" ((USItype) (bl))                                      \
1114
           __CLOBBER_CC)
1115
#if defined (__sparc_v8__)
1116
#define umul_ppmm(w1, w0, u, v) \
1117
  __asm__ ("umul %2,%3,%1;rd %%y,%0"                                    \
1118
           : "=r" ((USItype) (w1)),                                     \
1119
             "=r" ((USItype) (w0))                                      \
1120
           : "r" ((USItype) (u)),                                       \
1121
             "r" ((USItype) (v)))
1122
#define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
1123
  __asm__ ("mov %2,%%y;nop;nop;nop;udiv %3,%4,%0;umul %0,%4,%1;sub %3,%1,%1"\
1124
           : "=&r" ((USItype) (__q)),                                   \
1125
             "=&r" ((USItype) (__r))                                    \
1126
           : "r" ((USItype) (__n1)),                                    \
1127
             "r" ((USItype) (__n0)),                                    \
1128
             "r" ((USItype) (__d)))
1129
#else
1130
#if defined (__sparclite__)
1131
/* This has hardware multiply but not divide.  It also has two additional
1132
   instructions scan (ffs from high bit) and divscc.  */
1133
#define umul_ppmm(w1, w0, u, v) \
1134
  __asm__ ("umul %2,%3,%1;rd %%y,%0"                                    \
1135
           : "=r" ((USItype) (w1)),                                     \
1136
             "=r" ((USItype) (w0))                                      \
1137
           : "r" ((USItype) (u)),                                       \
1138
             "r" ((USItype) (v)))
1139
#define udiv_qrnnd(q, r, n1, n0, d) \
1140
  __asm__ ("! Inlined udiv_qrnnd\n"                                     \
1141
"       wr      %%g0,%2,%%y     ! Not a delayed write for sparclite\n"  \
1142
"       tst     %%g0\n"                                                 \
1143
"       divscc  %3,%4,%%g1\n"                                           \
1144
"       divscc  %%g1,%4,%%g1\n"                                         \
1145
"       divscc  %%g1,%4,%%g1\n"                                         \
1146
"       divscc  %%g1,%4,%%g1\n"                                         \
1147
"       divscc  %%g1,%4,%%g1\n"                                         \
1148
"       divscc  %%g1,%4,%%g1\n"                                         \
1149
"       divscc  %%g1,%4,%%g1\n"                                         \
1150
"       divscc  %%g1,%4,%%g1\n"                                         \
1151
"       divscc  %%g1,%4,%%g1\n"                                         \
1152
"       divscc  %%g1,%4,%%g1\n"                                         \
1153
"       divscc  %%g1,%4,%%g1\n"                                         \
1154
"       divscc  %%g1,%4,%%g1\n"                                         \
1155
"       divscc  %%g1,%4,%%g1\n"                                         \
1156
"       divscc  %%g1,%4,%%g1\n"                                         \
1157
"       divscc  %%g1,%4,%%g1\n"                                         \
1158
"       divscc  %%g1,%4,%%g1\n"                                         \
1159
"       divscc  %%g1,%4,%%g1\n"                                         \
1160
"       divscc  %%g1,%4,%%g1\n"                                         \
1161
"       divscc  %%g1,%4,%%g1\n"                                         \
1162
"       divscc  %%g1,%4,%%g1\n"                                         \
1163
"       divscc  %%g1,%4,%%g1\n"                                         \
1164
"       divscc  %%g1,%4,%%g1\n"                                         \
1165
"       divscc  %%g1,%4,%%g1\n"                                         \
1166
"       divscc  %%g1,%4,%%g1\n"                                         \
1167
"       divscc  %%g1,%4,%%g1\n"                                         \
1168
"       divscc  %%g1,%4,%%g1\n"                                         \
1169
"       divscc  %%g1,%4,%%g1\n"                                         \
1170
"       divscc  %%g1,%4,%%g1\n"                                         \
1171
"       divscc  %%g1,%4,%%g1\n"                                         \
1172
"       divscc  %%g1,%4,%%g1\n"                                         \
1173
"       divscc  %%g1,%4,%%g1\n"                                         \
1174
"       divscc  %%g1,%4,%0\n"                                           \
1175
"       rd      %%y,%1\n"                                               \
1176
"       bl,a 1f\n"                                                      \
1177
"       add     %1,%4,%1\n"                                             \
1178
"1:     ! End of inline udiv_qrnnd"                                     \
1179
           : "=r" ((USItype) (q)),                                      \
1180
             "=r" ((USItype) (r))                                       \
1181
           : "r" ((USItype) (n1)),                                      \
1182
             "r" ((USItype) (n0)),                                      \
1183
             "rI" ((USItype) (d))                                       \
1184
           : "g1" __AND_CLOBBER_CC)
1185
#define UDIV_TIME 37
1186
#define count_leading_zeros(count, x) \
1187
  do {                                                                  \
1188
  __asm__ ("scan %1,1,%0"                                               \
1189
           : "=r" ((USItype) (count))                                   \
1190
           : "r" ((USItype) (x)));                                      \
1191
  } while (0)
1192
/* Early sparclites return 63 for an argument of 0, but they warn that future
1193
   implementations might change this.  Therefore, leave COUNT_LEADING_ZEROS_0
1194
   undefined.  */
1195
#else
1196
/* SPARC without integer multiplication and divide instructions.
1197
   (i.e. at least Sun4/20,40,60,65,75,110,260,280,330,360,380,470,490) */
1198
#define umul_ppmm(w1, w0, u, v) \
1199
  __asm__ ("! Inlined umul_ppmm\n"                                      \
1200
"       wr      %%g0,%2,%%y     ! SPARC has 0-3 delay insn after a wr\n"\
1201
"       sra     %3,31,%%o5      ! Don't move this insn\n"               \
1202
"       and     %2,%%o5,%%o5    ! Don't move this insn\n"               \
1203
"       andcc   %%g0,0,%%g1     ! Don't move this insn\n"               \
1204
"       mulscc  %%g1,%3,%%g1\n"                                         \
1205
"       mulscc  %%g1,%3,%%g1\n"                                         \
1206
"       mulscc  %%g1,%3,%%g1\n"                                         \
1207
"       mulscc  %%g1,%3,%%g1\n"                                         \
1208
"       mulscc  %%g1,%3,%%g1\n"                                         \
1209
"       mulscc  %%g1,%3,%%g1\n"                                         \
1210
"       mulscc  %%g1,%3,%%g1\n"                                         \
1211
"       mulscc  %%g1,%3,%%g1\n"                                         \
1212
"       mulscc  %%g1,%3,%%g1\n"                                         \
1213
"       mulscc  %%g1,%3,%%g1\n"                                         \
1214
"       mulscc  %%g1,%3,%%g1\n"                                         \
1215
"       mulscc  %%g1,%3,%%g1\n"                                         \
1216
"       mulscc  %%g1,%3,%%g1\n"                                         \
1217
"       mulscc  %%g1,%3,%%g1\n"                                         \
1218
"       mulscc  %%g1,%3,%%g1\n"                                         \
1219
"       mulscc  %%g1,%3,%%g1\n"                                         \
1220
"       mulscc  %%g1,%3,%%g1\n"                                         \
1221
"       mulscc  %%g1,%3,%%g1\n"                                         \
1222
"       mulscc  %%g1,%3,%%g1\n"                                         \
1223
"       mulscc  %%g1,%3,%%g1\n"                                         \
1224
"       mulscc  %%g1,%3,%%g1\n"                                         \
1225
"       mulscc  %%g1,%3,%%g1\n"                                         \
1226
"       mulscc  %%g1,%3,%%g1\n"                                         \
1227
"       mulscc  %%g1,%3,%%g1\n"                                         \
1228
"       mulscc  %%g1,%3,%%g1\n"                                         \
1229
"       mulscc  %%g1,%3,%%g1\n"                                         \
1230
"       mulscc  %%g1,%3,%%g1\n"                                         \
1231
"       mulscc  %%g1,%3,%%g1\n"                                         \
1232
"       mulscc  %%g1,%3,%%g1\n"                                         \
1233
"       mulscc  %%g1,%3,%%g1\n"                                         \
1234
"       mulscc  %%g1,%3,%%g1\n"                                         \
1235
"       mulscc  %%g1,%3,%%g1\n"                                         \
1236
"       mulscc  %%g1,0,%%g1\n"                                          \
1237
"       add     %%g1,%%o5,%0\n"                                         \
1238
"       rd      %%y,%1"                                                 \
1239
           : "=r" ((USItype) (w1)),                                     \
1240
             "=r" ((USItype) (w0))                                      \
1241
           : "%rI" ((USItype) (u)),                                     \
1242
             "r" ((USItype) (v))                                                \
1243
           : "g1", "o5" __AND_CLOBBER_CC)
1244
#define UMUL_TIME 39            /* 39 instructions */
1245
/* It's quite necessary to add this much assembler for the sparc.
1246
   The default udiv_qrnnd (in C) is more than 10 times slower!  */
1247
#define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
1248
  __asm__ ("! Inlined udiv_qrnnd\n"                                     \
1249
"       mov     32,%%g1\n"                                              \
1250
"       subcc   %1,%2,%%g0\n"                                           \
1251
"1:     bcs     5f\n"                                                   \
1252
"        addxcc %0,%0,%0        ! shift n1n0 and a q-bit in lsb\n"      \
1253
"       sub     %1,%2,%1        ! this kills msb of n\n"                \
1254
"       addx    %1,%1,%1        ! so this can't give carry\n"           \
1255
"       subcc   %%g1,1,%%g1\n"                                          \
1256
"2:     bne     1b\n"                                                   \
1257
"        subcc  %1,%2,%%g0\n"                                           \
1258
"       bcs     3f\n"                                                   \
1259
"        addxcc %0,%0,%0        ! shift n1n0 and a q-bit in lsb\n"      \
1260
"       b       3f\n"                                                   \
1261
"        sub    %1,%2,%1        ! this kills msb of n\n"                \
1262
"4:     sub     %1,%2,%1\n"                                             \
1263
"5:     addxcc  %1,%1,%1\n"                                             \
1264
"       bcc     2b\n"                                                   \
1265
"        subcc  %%g1,1,%%g1\n"                                          \
1266
"! Got carry from n.  Subtract next step to cancel this carry.\n"       \
1267
"       bne     4b\n"                                                   \
1268
"        addcc  %0,%0,%0        ! shift n1n0 and a 0-bit in lsb\n"      \
1269
"       sub     %1,%2,%1\n"                                             \
1270
"3:     xnor    %0,0,%0\n"                                              \
1271
"       ! End of inline udiv_qrnnd"                                     \
1272
           : "=&r" ((USItype) (__q)),                                   \
1273
             "=&r" ((USItype) (__r))                                    \
1274
           : "r" ((USItype) (__d)),                                     \
1275
             "1" ((USItype) (__n1)),                                    \
1276
             "0" ((USItype) (__n0)) : "g1" __AND_CLOBBER_CC)
1277
#define UDIV_TIME (3+7*32)      /* 7 instructions/iteration. 32 iterations.  */
1278
#endif /* __sparclite__ */
1279
#endif /* __sparc_v8__ */
1280
#endif /* sparc32 */
1281
 
1282
#if ((defined (__sparc__) && defined (__arch64__)) || defined (__sparcv9)) \
1283
    && W_TYPE_SIZE == 64
1284
#define add_ssaaaa(sh, sl, ah, al, bh, bl)                              \
1285
  __asm__ ("addcc %r4,%5,%1\n\t"                                        \
1286
           "add %r2,%3,%0\n\t"                                          \
1287
           "bcs,a,pn %%xcc, 1f\n\t"                                     \
1288
           "add %0, 1, %0\n"                                            \
1289
           "1:"                                                         \
1290
           : "=r" ((UDItype)(sh)),                                      \
1291
             "=&r" ((UDItype)(sl))                                      \
1292
           : "%rJ" ((UDItype)(ah)),                                     \
1293
             "rI" ((UDItype)(bh)),                                      \
1294
             "%rJ" ((UDItype)(al)),                                     \
1295
             "rI" ((UDItype)(bl))                                       \
1296
           __CLOBBER_CC)
1297
 
1298
#define sub_ddmmss(sh, sl, ah, al, bh, bl)                              \
1299
  __asm__ ("subcc %r4,%5,%1\n\t"                                        \
1300
           "sub %r2,%3,%0\n\t"                                          \
1301
           "bcs,a,pn %%xcc, 1f\n\t"                                     \
1302
           "sub %0, 1, %0\n\t"                                          \
1303
           "1:"                                                         \
1304
           : "=r" ((UDItype)(sh)),                                      \
1305
             "=&r" ((UDItype)(sl))                                      \
1306
           : "rJ" ((UDItype)(ah)),                                      \
1307
             "rI" ((UDItype)(bh)),                                      \
1308
             "rJ" ((UDItype)(al)),                                      \
1309
             "rI" ((UDItype)(bl))                                       \
1310
           __CLOBBER_CC)
1311
 
1312
#define umul_ppmm(wh, wl, u, v)                                         \
1313
  do {                                                                  \
1314
          UDItype tmp1, tmp2, tmp3, tmp4;                               \
1315
          __asm__ __volatile__ (                                        \
1316
                   "srl %7,0,%3\n\t"                                    \
1317
                   "mulx %3,%6,%1\n\t"                                  \
1318
                   "srlx %6,32,%2\n\t"                                  \
1319
                   "mulx %2,%3,%4\n\t"                                  \
1320
                   "sllx %4,32,%5\n\t"                                  \
1321
                   "srl %6,0,%3\n\t"                                    \
1322
                   "sub %1,%5,%5\n\t"                                   \
1323
                   "srlx %5,32,%5\n\t"                                  \
1324
                   "addcc %4,%5,%4\n\t"                                 \
1325
                   "srlx %7,32,%5\n\t"                                  \
1326
                   "mulx %3,%5,%3\n\t"                                  \
1327
                   "mulx %2,%5,%5\n\t"                                  \
1328
                   "sethi %%hi(0x80000000),%2\n\t"                      \
1329
                   "addcc %4,%3,%4\n\t"                                 \
1330
                   "srlx %4,32,%4\n\t"                                  \
1331
                   "add %2,%2,%2\n\t"                                   \
1332
                   "movcc %%xcc,%%g0,%2\n\t"                            \
1333
                   "addcc %5,%4,%5\n\t"                                 \
1334
                   "sllx %3,32,%3\n\t"                                  \
1335
                   "add %1,%3,%1\n\t"                                   \
1336
                   "add %5,%2,%0"                                       \
1337
           : "=r" ((UDItype)(wh)),                                      \
1338
             "=&r" ((UDItype)(wl)),                                     \
1339
             "=&r" (tmp1), "=&r" (tmp2), "=&r" (tmp3), "=&r" (tmp4)     \
1340
           : "r" ((UDItype)(u)),                                        \
1341
             "r" ((UDItype)(v))                                         \
1342
           __CLOBBER_CC);                                               \
1343
  } while (0)
1344
#define UMUL_TIME 96
1345
#define UDIV_TIME 230
1346
#endif /* sparc64 */
1347
 
1348
#if defined (__vax__) && W_TYPE_SIZE == 32
1349
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1350
  __asm__ ("addl2 %5,%1\n\tadwc %3,%0"                                  \
1351
           : "=g" ((USItype) (sh)),                                     \
1352
             "=&g" ((USItype) (sl))                                     \
1353
           : "%0" ((USItype) (ah)),                                     \
1354
             "g" ((USItype) (bh)),                                      \
1355
             "%1" ((USItype) (al)),                                     \
1356
             "g" ((USItype) (bl)))
1357
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1358
  __asm__ ("subl2 %5,%1\n\tsbwc %3,%0"                                  \
1359
           : "=g" ((USItype) (sh)),                                     \
1360
             "=&g" ((USItype) (sl))                                     \
1361
           : "0" ((USItype) (ah)),                                       \
1362
             "g" ((USItype) (bh)),                                      \
1363
             "1" ((USItype) (al)),                                      \
1364
             "g" ((USItype) (bl)))
1365
#define umul_ppmm(xh, xl, m0, m1) \
1366
  do {                                                                  \
1367
    union {                                                             \
1368
        UDItype __ll;                                                   \
1369
        struct {USItype __l, __h;} __i;                                 \
1370
      } __xx;                                                           \
1371
    USItype __m0 = (m0), __m1 = (m1);                                   \
1372
    __asm__ ("emul %1,%2,$0,%0"                                         \
1373
             : "=r" (__xx.__ll)                                         \
1374
             : "g" (__m0),                                              \
1375
               "g" (__m1));                                             \
1376
    (xh) = __xx.__i.__h;                                                \
1377
    (xl) = __xx.__i.__l;                                                \
1378
    (xh) += ((((SItype) __m0 >> 31) & __m1)                             \
1379
             + (((SItype) __m1 >> 31) & __m0));                         \
1380
  } while (0)
1381
#define sdiv_qrnnd(q, r, n1, n0, d) \
1382
  do {                                                                  \
1383
    union {DItype __ll;                                                 \
1384
           struct {SItype __l, __h;} __i;                               \
1385
          } __xx;                                                       \
1386
    __xx.__i.__h = n1; __xx.__i.__l = n0;                               \
1387
    __asm__ ("ediv %3,%2,%0,%1"                                         \
1388
             : "=g" (q), "=g" (r)                                       \
1389
             : "g" (__xx.__ll), "g" (d));                               \
1390
  } while (0)
1391
#endif /* __vax__ */
1392
 
1393
#ifdef _TMS320C6X
1394
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1395
  do                                                                    \
1396
    {                                                                   \
1397
      UDItype __ll;                                                     \
1398
      __asm__ ("addu .l1 %1, %2, %0"                                    \
1399
               : "=a" (__ll) : "a" (al), "a" (bl));                     \
1400
      (sl) = (USItype)__ll;                                             \
1401
      (sh) = ((USItype)(__ll >> 32)) + (ah) + (bh);                     \
1402
    }                                                                   \
1403
  while (0)
1404
 
1405
#ifdef _TMS320C6400_PLUS
1406
#define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v)
1407
#define umul_ppmm(w1, w0, u, v)                                         \
1408
  do {                                                                  \
1409
    UDItype __x = (UDItype) (USItype) (u) * (USItype) (v);              \
1410
    (w1) = (USItype) (__x >> 32);                                       \
1411
    (w0) = (USItype) (__x);                                             \
1412
  } while (0)
1413
#endif  /* _TMS320C6400_PLUS */
1414
 
1415
#define count_leading_zeros(count, x)   ((count) = __builtin_clz (x))
1416
#ifdef _TMS320C6400
1417
#define count_trailing_zeros(count, x)  ((count) = __builtin_ctz (x))
1418
#endif
1419
#define UMUL_TIME 4
1420
#define UDIV_TIME 40
1421
#endif /* _TMS320C6X */
1422
 
1423
#if defined (__xtensa__) && W_TYPE_SIZE == 32
1424
/* This code is not Xtensa-configuration-specific, so rely on the compiler
1425
   to expand builtin functions depending on what configuration features
1426
   are available.  This avoids library calls when the operation can be
1427
   performed in-line.  */
1428
#define umul_ppmm(w1, w0, u, v)                                         \
1429
  do {                                                                  \
1430
    DWunion __w;                                                        \
1431
    __w.ll = __builtin_umulsidi3 (u, v);                                \
1432
    w1 = __w.s.high;                                                    \
1433
    w0 = __w.s.low;                                                     \
1434
  } while (0)
1435
#define __umulsidi3(u, v)               __builtin_umulsidi3 (u, v)
1436
#define count_leading_zeros(COUNT, X)   ((COUNT) = __builtin_clz (X))
1437
#define count_trailing_zeros(COUNT, X)  ((COUNT) = __builtin_ctz (X))
1438
#endif /* __xtensa__ */
1439
 
1440
#if defined xstormy16
1441
extern UHItype __stormy16_count_leading_zeros (UHItype);
1442
#define count_leading_zeros(count, x)                                   \
1443
  do                                                                    \
1444
    {                                                                   \
1445
      UHItype size;                                                     \
1446
                                                                        \
1447
      /* We assume that W_TYPE_SIZE is a multiple of 16...  */          \
1448
      for ((count) = 0, size = W_TYPE_SIZE; size; size -= 16)            \
1449
        {                                                               \
1450
          UHItype c;                                                    \
1451
                                                                        \
1452
          c = __clzhi2 ((x) >> (size - 16));                            \
1453
          (count) += c;                                                 \
1454
          if (c != 16)                                                  \
1455
            break;                                                      \
1456
        }                                                               \
1457
    }                                                                   \
1458
  while (0)
1459
#define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
1460
#endif
1461
 
1462
#if defined (__z8000__) && W_TYPE_SIZE == 16
1463
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1464
  __asm__ ("add %H1,%H5\n\tadc  %H0,%H3"                                \
1465
           : "=r" ((unsigned int)(sh)),                                 \
1466
             "=&r" ((unsigned int)(sl))                                 \
1467
           : "%0" ((unsigned int)(ah)),                                 \
1468
             "r" ((unsigned int)(bh)),                                  \
1469
             "%1" ((unsigned int)(al)),                                 \
1470
             "rQR" ((unsigned int)(bl)))
1471
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1472
  __asm__ ("sub %H1,%H5\n\tsbc  %H0,%H3"                                \
1473
           : "=r" ((unsigned int)(sh)),                                 \
1474
             "=&r" ((unsigned int)(sl))                                 \
1475
           : "0" ((unsigned int)(ah)),                                   \
1476
             "r" ((unsigned int)(bh)),                                  \
1477
             "1" ((unsigned int)(al)),                                  \
1478
             "rQR" ((unsigned int)(bl)))
1479
#define umul_ppmm(xh, xl, m0, m1) \
1480
  do {                                                                  \
1481
    union {long int __ll;                                               \
1482
           struct {unsigned int __h, __l;} __i;                         \
1483
          } __xx;                                                       \
1484
    unsigned int __m0 = (m0), __m1 = (m1);                              \
1485
    __asm__ ("mult      %S0,%H3"                                        \
1486
             : "=r" (__xx.__i.__h),                                     \
1487
               "=r" (__xx.__i.__l)                                      \
1488
             : "%1" (__m0),                                             \
1489
               "rQR" (__m1));                                           \
1490
    (xh) = __xx.__i.__h; (xl) = __xx.__i.__l;                           \
1491
    (xh) += ((((signed int) __m0 >> 15) & __m1)                         \
1492
             + (((signed int) __m1 >> 15) & __m0));                     \
1493
  } while (0)
1494
#endif /* __z8000__ */
1495
 
1496
#endif /* __GNUC__ */
1497
 
1498
/* If this machine has no inline assembler, use C macros.  */
1499
 
1500
#if !defined (add_ssaaaa)
1501
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1502
  do {                                                                  \
1503
    UWtype __x;                                                         \
1504
    __x = (al) + (bl);                                                  \
1505
    (sh) = (ah) + (bh) + (__x < (al));                                  \
1506
    (sl) = __x;                                                         \
1507
  } while (0)
1508
#endif
1509
 
1510
#if !defined (sub_ddmmss)
1511
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1512
  do {                                                                  \
1513
    UWtype __x;                                                         \
1514
    __x = (al) - (bl);                                                  \
1515
    (sh) = (ah) - (bh) - (__x > (al));                                  \
1516
    (sl) = __x;                                                         \
1517
  } while (0)
1518
#endif
1519
 
1520
/* If we lack umul_ppmm but have smul_ppmm, define umul_ppmm in terms of
1521
   smul_ppmm.  */
1522
#if !defined (umul_ppmm) && defined (smul_ppmm)
1523
#define umul_ppmm(w1, w0, u, v)                                         \
1524
  do {                                                                  \
1525
    UWtype __w1;                                                        \
1526
    UWtype __xm0 = (u), __xm1 = (v);                                    \
1527
    smul_ppmm (__w1, w0, __xm0, __xm1);                                 \
1528
    (w1) = __w1 + (-(__xm0 >> (W_TYPE_SIZE - 1)) & __xm1)               \
1529
                + (-(__xm1 >> (W_TYPE_SIZE - 1)) & __xm0);              \
1530
  } while (0)
1531
#endif
1532
 
1533
/* If we still don't have umul_ppmm, define it using plain C.  */
1534
#if !defined (umul_ppmm)
1535
#define umul_ppmm(w1, w0, u, v)                                         \
1536
  do {                                                                  \
1537
    UWtype __x0, __x1, __x2, __x3;                                      \
1538
    UHWtype __ul, __vl, __uh, __vh;                                     \
1539
                                                                        \
1540
    __ul = __ll_lowpart (u);                                            \
1541
    __uh = __ll_highpart (u);                                           \
1542
    __vl = __ll_lowpart (v);                                            \
1543
    __vh = __ll_highpart (v);                                           \
1544
                                                                        \
1545
    __x0 = (UWtype) __ul * __vl;                                        \
1546
    __x1 = (UWtype) __ul * __vh;                                        \
1547
    __x2 = (UWtype) __uh * __vl;                                        \
1548
    __x3 = (UWtype) __uh * __vh;                                        \
1549
                                                                        \
1550
    __x1 += __ll_highpart (__x0);/* this can't give carry */            \
1551
    __x1 += __x2;               /* but this indeed can */               \
1552
    if (__x1 < __x2)            /* did we get it? */                    \
1553
      __x3 += __ll_B;           /* yes, add it in the proper pos.  */   \
1554
                                                                        \
1555
    (w1) = __x3 + __ll_highpart (__x1);                                 \
1556
    (w0) = __ll_lowpart (__x1) * __ll_B + __ll_lowpart (__x0);          \
1557
  } while (0)
1558
#endif
1559
 
1560
#if !defined (__umulsidi3)
1561
#define __umulsidi3(u, v) \
1562
  ({DWunion __w;                                                        \
1563
    umul_ppmm (__w.s.high, __w.s.low, u, v);                            \
1564
    __w.ll; })
1565
#endif
1566
 
1567
/* Define this unconditionally, so it can be used for debugging.  */
1568
#define __udiv_qrnnd_c(q, r, n1, n0, d) \
1569
  do {                                                                  \
1570
    UWtype __d1, __d0, __q1, __q0;                                      \
1571
    UWtype __r1, __r0, __m;                                             \
1572
    __d1 = __ll_highpart (d);                                           \
1573
    __d0 = __ll_lowpart (d);                                            \
1574
                                                                        \
1575
    __r1 = (n1) % __d1;                                                 \
1576
    __q1 = (n1) / __d1;                                                 \
1577
    __m = (UWtype) __q1 * __d0;                                         \
1578
    __r1 = __r1 * __ll_B | __ll_highpart (n0);                          \
1579
    if (__r1 < __m)                                                     \
1580
      {                                                                 \
1581
        __q1--, __r1 += (d);                                            \
1582
        if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\
1583
          if (__r1 < __m)                                               \
1584
            __q1--, __r1 += (d);                                        \
1585
      }                                                                 \
1586
    __r1 -= __m;                                                        \
1587
                                                                        \
1588
    __r0 = __r1 % __d1;                                                 \
1589
    __q0 = __r1 / __d1;                                                 \
1590
    __m = (UWtype) __q0 * __d0;                                         \
1591
    __r0 = __r0 * __ll_B | __ll_lowpart (n0);                           \
1592
    if (__r0 < __m)                                                     \
1593
      {                                                                 \
1594
        __q0--, __r0 += (d);                                            \
1595
        if (__r0 >= (d))                                                \
1596
          if (__r0 < __m)                                               \
1597
            __q0--, __r0 += (d);                                        \
1598
      }                                                                 \
1599
    __r0 -= __m;                                                        \
1600
                                                                        \
1601
    (q) = (UWtype) __q1 * __ll_B | __q0;                                \
1602
    (r) = __r0;                                                         \
1603
  } while (0)
1604
 
1605
/* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through
1606
   __udiv_w_sdiv (defined in libgcc or elsewhere).  */
1607
#if !defined (udiv_qrnnd) && defined (sdiv_qrnnd)
1608
#define udiv_qrnnd(q, r, nh, nl, d) \
1609
  do {                                                                  \
1610
    USItype __r;                                                        \
1611
    (q) = __udiv_w_sdiv (&__r, nh, nl, d);                              \
1612
    (r) = __r;                                                          \
1613
  } while (0)
1614
#endif
1615
 
1616
/* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c.  */
1617
#if !defined (udiv_qrnnd)
1618
#define UDIV_NEEDS_NORMALIZATION 1
1619
#define udiv_qrnnd __udiv_qrnnd_c
1620
#endif
1621
 
1622
#if !defined (count_leading_zeros)
1623
#define count_leading_zeros(count, x) \
1624
  do {                                                                  \
1625
    UWtype __xr = (x);                                                  \
1626
    UWtype __a;                                                         \
1627
                                                                        \
1628
    if (W_TYPE_SIZE <= 32)                                              \
1629
      {                                                                 \
1630
        __a = __xr < ((UWtype)1<<2*__BITS4)                             \
1631
          ? (__xr < ((UWtype)1<<__BITS4) ? 0 : __BITS4)                  \
1632
          : (__xr < ((UWtype)1<<3*__BITS4) ?  2*__BITS4 : 3*__BITS4);   \
1633
      }                                                                 \
1634
    else                                                                \
1635
      {                                                                 \
1636
        for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8)                   \
1637
          if (((__xr >> __a) & 0xff) != 0)                               \
1638
            break;                                                      \
1639
      }                                                                 \
1640
                                                                        \
1641
    (count) = W_TYPE_SIZE - (__clz_tab[__xr >> __a] + __a);             \
1642
  } while (0)
1643
#define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
1644
#endif
1645
 
1646
#if !defined (count_trailing_zeros)
1647
/* Define count_trailing_zeros using count_leading_zeros.  The latter might be
1648
   defined in asm, but if it is not, the C version above is good enough.  */
1649
#define count_trailing_zeros(count, x) \
1650
  do {                                                                  \
1651
    UWtype __ctz_x = (x);                                               \
1652
    UWtype __ctz_c;                                                     \
1653
    count_leading_zeros (__ctz_c, __ctz_x & -__ctz_x);                  \
1654
    (count) = W_TYPE_SIZE - 1 - __ctz_c;                                \
1655
  } while (0)
1656
#endif
1657
 
1658
#ifndef UDIV_NEEDS_NORMALIZATION
1659
#define UDIV_NEEDS_NORMALIZATION 0
1660
#endif

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.