1 |
280 |
jeremybenn |
/* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
|
2 |
|
|
Copyright (C) 1991, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
|
3 |
|
|
2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
|
4 |
|
|
Free Software Foundation, Inc.
|
5 |
|
|
|
6 |
|
|
This file is part of the GNU C Library.
|
7 |
|
|
|
8 |
|
|
The GNU C Library is free software; you can redistribute it and/or
|
9 |
|
|
modify it under the terms of the GNU Lesser General Public
|
10 |
|
|
License as published by the Free Software Foundation; either
|
11 |
|
|
version 2.1 of the License, or (at your option) any later version.
|
12 |
|
|
|
13 |
|
|
In addition to the permissions in the GNU Lesser General Public
|
14 |
|
|
License, the Free Software Foundation gives you unlimited
|
15 |
|
|
permission to link the compiled version of this file into
|
16 |
|
|
combinations with other programs, and to distribute those
|
17 |
|
|
combinations without any restriction coming from the use of this
|
18 |
|
|
file. (The Lesser General Public License restrictions do apply in
|
19 |
|
|
other respects; for example, they cover modification of the file,
|
20 |
|
|
and distribution when not linked into a combine executable.)
|
21 |
|
|
|
22 |
|
|
The GNU C Library is distributed in the hope that it will be useful,
|
23 |
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
24 |
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
25 |
|
|
Lesser General Public License for more details.
|
26 |
|
|
|
27 |
|
|
You should have received a copy of the GNU Lesser General Public
|
28 |
|
|
License along with the GNU C Library; if not, write to the Free
|
29 |
|
|
Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
|
30 |
|
|
MA 02110-1301, USA. */
|
31 |
|
|
|
32 |
|
|
/* You have to define the following before including this file:
|
33 |
|
|
|
34 |
|
|
UWtype -- An unsigned type, default type for operations (typically a "word")
|
35 |
|
|
UHWtype -- An unsigned type, at least half the size of UWtype.
|
36 |
|
|
UDWtype -- An unsigned type, at least twice as large a UWtype
|
37 |
|
|
W_TYPE_SIZE -- size in bits of UWtype
|
38 |
|
|
|
39 |
|
|
UQItype -- Unsigned 8 bit type.
|
40 |
|
|
SItype, USItype -- Signed and unsigned 32 bit types.
|
41 |
|
|
DItype, UDItype -- Signed and unsigned 64 bit types.
|
42 |
|
|
|
43 |
|
|
On a 32 bit machine UWtype should typically be USItype;
|
44 |
|
|
on a 64 bit machine, UWtype should typically be UDItype. */
|
45 |
|
|
|
46 |
|
|
#define __BITS4 (W_TYPE_SIZE / 4)
|
47 |
|
|
#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
|
48 |
|
|
#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
|
49 |
|
|
#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
|
50 |
|
|
|
51 |
|
|
#ifndef W_TYPE_SIZE
|
52 |
|
|
#define W_TYPE_SIZE 32
|
53 |
|
|
#define UWtype USItype
|
54 |
|
|
#define UHWtype USItype
|
55 |
|
|
#define UDWtype UDItype
|
56 |
|
|
#endif
|
57 |
|
|
|
58 |
|
|
/* Used in glibc only. */
|
59 |
|
|
#ifndef attribute_hidden
|
60 |
|
|
#define attribute_hidden
|
61 |
|
|
#endif
|
62 |
|
|
|
63 |
|
|
extern const UQItype __clz_tab[256] attribute_hidden;
|
64 |
|
|
|
65 |
|
|
/* Define auxiliary asm macros.
|
66 |
|
|
|
67 |
|
|
1) umul_ppmm(high_prod, low_prod, multiplier, multiplicand) multiplies two
|
68 |
|
|
UWtype integers MULTIPLIER and MULTIPLICAND, and generates a two UWtype
|
69 |
|
|
word product in HIGH_PROD and LOW_PROD.
|
70 |
|
|
|
71 |
|
|
2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a
|
72 |
|
|
UDWtype product. This is just a variant of umul_ppmm.
|
73 |
|
|
|
74 |
|
|
3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
|
75 |
|
|
denominator) divides a UDWtype, composed by the UWtype integers
|
76 |
|
|
HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient
|
77 |
|
|
in QUOTIENT and the remainder in REMAINDER. HIGH_NUMERATOR must be less
|
78 |
|
|
than DENOMINATOR for correct operation. If, in addition, the most
|
79 |
|
|
significant bit of DENOMINATOR must be 1, then the pre-processor symbol
|
80 |
|
|
UDIV_NEEDS_NORMALIZATION is defined to 1.
|
81 |
|
|
|
82 |
|
|
4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
|
83 |
|
|
denominator). Like udiv_qrnnd but the numbers are signed. The quotient
|
84 |
|
|
is rounded towards 0.
|
85 |
|
|
|
86 |
|
|
5) count_leading_zeros(count, x) counts the number of zero-bits from the
|
87 |
|
|
msb to the first nonzero bit in the UWtype X. This is the number of
|
88 |
|
|
steps X needs to be shifted left to set the msb. Undefined for X == 0,
|
89 |
|
|
unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value.
|
90 |
|
|
|
91 |
|
|
6) count_trailing_zeros(count, x) like count_leading_zeros, but counts
|
92 |
|
|
from the least significant end.
|
93 |
|
|
|
94 |
|
|
7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1,
|
95 |
|
|
high_addend_2, low_addend_2) adds two UWtype integers, composed by
|
96 |
|
|
HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2
|
97 |
|
|
respectively. The result is placed in HIGH_SUM and LOW_SUM. Overflow
|
98 |
|
|
(i.e. carry out) is not stored anywhere, and is lost.
|
99 |
|
|
|
100 |
|
|
8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend,
|
101 |
|
|
high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers,
|
102 |
|
|
composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and
|
103 |
|
|
LOW_SUBTRAHEND_2 respectively. The result is placed in HIGH_DIFFERENCE
|
104 |
|
|
and LOW_DIFFERENCE. Overflow (i.e. carry out) is not stored anywhere,
|
105 |
|
|
and is lost.
|
106 |
|
|
|
107 |
|
|
If any of these macros are left undefined for a particular CPU,
|
108 |
|
|
C macros are used. */
|
109 |
|
|
|
110 |
|
|
/* The CPUs come in alphabetical order below.
|
111 |
|
|
|
112 |
|
|
Please add support for more CPUs here, or improve the current support
|
113 |
|
|
for the CPUs below!
|
114 |
|
|
(E.g. WE32100, IBM360.) */
|
115 |
|
|
|
116 |
|
|
#if defined (__GNUC__) && !defined (NO_ASM)
|
117 |
|
|
|
118 |
|
|
/* We sometimes need to clobber "cc" with gcc2, but that would not be
|
119 |
|
|
understood by gcc1. Use cpp to avoid major code duplication. */
|
120 |
|
|
#if __GNUC__ < 2
|
121 |
|
|
#define __CLOBBER_CC
|
122 |
|
|
#define __AND_CLOBBER_CC
|
123 |
|
|
#else /* __GNUC__ >= 2 */
|
124 |
|
|
#define __CLOBBER_CC : "cc"
|
125 |
|
|
#define __AND_CLOBBER_CC , "cc"
|
126 |
|
|
#endif /* __GNUC__ < 2 */
|
127 |
|
|
|
128 |
|
|
#if defined (__alpha) && W_TYPE_SIZE == 64
|
129 |
|
|
#define umul_ppmm(ph, pl, m0, m1) \
|
130 |
|
|
do { \
|
131 |
|
|
UDItype __m0 = (m0), __m1 = (m1); \
|
132 |
|
|
(ph) = __builtin_alpha_umulh (__m0, __m1); \
|
133 |
|
|
(pl) = __m0 * __m1; \
|
134 |
|
|
} while (0)
|
135 |
|
|
#define UMUL_TIME 46
|
136 |
|
|
#ifndef LONGLONG_STANDALONE
|
137 |
|
|
#define udiv_qrnnd(q, r, n1, n0, d) \
|
138 |
|
|
do { UDItype __r; \
|
139 |
|
|
(q) = __udiv_qrnnd (&__r, (n1), (n0), (d)); \
|
140 |
|
|
(r) = __r; \
|
141 |
|
|
} while (0)
|
142 |
|
|
extern UDItype __udiv_qrnnd (UDItype *, UDItype, UDItype, UDItype);
|
143 |
|
|
#define UDIV_TIME 220
|
144 |
|
|
#endif /* LONGLONG_STANDALONE */
|
145 |
|
|
#ifdef __alpha_cix__
|
146 |
|
|
#define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clzl (X))
|
147 |
|
|
#define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctzl (X))
|
148 |
|
|
#define COUNT_LEADING_ZEROS_0 64
|
149 |
|
|
#else
|
150 |
|
|
#define count_leading_zeros(COUNT,X) \
|
151 |
|
|
do { \
|
152 |
|
|
UDItype __xr = (X), __t, __a; \
|
153 |
|
|
__t = __builtin_alpha_cmpbge (0, __xr); \
|
154 |
|
|
__a = __clz_tab[__t ^ 0xff] - 1; \
|
155 |
|
|
__t = __builtin_alpha_extbl (__xr, __a); \
|
156 |
|
|
(COUNT) = 64 - (__clz_tab[__t] + __a*8); \
|
157 |
|
|
} while (0)
|
158 |
|
|
#define count_trailing_zeros(COUNT,X) \
|
159 |
|
|
do { \
|
160 |
|
|
UDItype __xr = (X), __t, __a; \
|
161 |
|
|
__t = __builtin_alpha_cmpbge (0, __xr); \
|
162 |
|
|
__t = ~__t & -~__t; \
|
163 |
|
|
__a = ((__t & 0xCC) != 0) * 2; \
|
164 |
|
|
__a += ((__t & 0xF0) != 0) * 4; \
|
165 |
|
|
__a += ((__t & 0xAA) != 0); \
|
166 |
|
|
__t = __builtin_alpha_extbl (__xr, __a); \
|
167 |
|
|
__a <<= 3; \
|
168 |
|
|
__t &= -__t; \
|
169 |
|
|
__a += ((__t & 0xCC) != 0) * 2; \
|
170 |
|
|
__a += ((__t & 0xF0) != 0) * 4; \
|
171 |
|
|
__a += ((__t & 0xAA) != 0); \
|
172 |
|
|
(COUNT) = __a; \
|
173 |
|
|
} while (0)
|
174 |
|
|
#endif /* __alpha_cix__ */
|
175 |
|
|
#endif /* __alpha */
|
176 |
|
|
|
177 |
|
|
#if defined (__arc__) && W_TYPE_SIZE == 32
|
178 |
|
|
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
179 |
|
|
__asm__ ("add.f %1, %4, %5\n\tadc %0, %2, %3" \
|
180 |
|
|
: "=r" ((USItype) (sh)), \
|
181 |
|
|
"=&r" ((USItype) (sl)) \
|
182 |
|
|
: "%r" ((USItype) (ah)), \
|
183 |
|
|
"rIJ" ((USItype) (bh)), \
|
184 |
|
|
"%r" ((USItype) (al)), \
|
185 |
|
|
"rIJ" ((USItype) (bl)))
|
186 |
|
|
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
187 |
|
|
__asm__ ("sub.f %1, %4, %5\n\tsbc %0, %2, %3" \
|
188 |
|
|
: "=r" ((USItype) (sh)), \
|
189 |
|
|
"=&r" ((USItype) (sl)) \
|
190 |
|
|
: "r" ((USItype) (ah)), \
|
191 |
|
|
"rIJ" ((USItype) (bh)), \
|
192 |
|
|
"r" ((USItype) (al)), \
|
193 |
|
|
"rIJ" ((USItype) (bl)))
|
194 |
|
|
/* Call libgcc routine. */
|
195 |
|
|
#define umul_ppmm(w1, w0, u, v) \
|
196 |
|
|
do { \
|
197 |
|
|
DWunion __w; \
|
198 |
|
|
__w.ll = __umulsidi3 (u, v); \
|
199 |
|
|
w1 = __w.s.high; \
|
200 |
|
|
w0 = __w.s.low; \
|
201 |
|
|
} while (0)
|
202 |
|
|
#define __umulsidi3 __umulsidi3
|
203 |
|
|
UDItype __umulsidi3 (USItype, USItype);
|
204 |
|
|
#endif
|
205 |
|
|
|
206 |
|
|
#if defined (__arm__) && !defined (__thumb__) && W_TYPE_SIZE == 32
|
207 |
|
|
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
208 |
|
|
__asm__ ("adds %1, %4, %5\n\tadc %0, %2, %3" \
|
209 |
|
|
: "=r" ((USItype) (sh)), \
|
210 |
|
|
"=&r" ((USItype) (sl)) \
|
211 |
|
|
: "%r" ((USItype) (ah)), \
|
212 |
|
|
"rI" ((USItype) (bh)), \
|
213 |
|
|
"%r" ((USItype) (al)), \
|
214 |
|
|
"rI" ((USItype) (bl)) __CLOBBER_CC)
|
215 |
|
|
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
216 |
|
|
__asm__ ("subs %1, %4, %5\n\tsbc %0, %2, %3" \
|
217 |
|
|
: "=r" ((USItype) (sh)), \
|
218 |
|
|
"=&r" ((USItype) (sl)) \
|
219 |
|
|
: "r" ((USItype) (ah)), \
|
220 |
|
|
"rI" ((USItype) (bh)), \
|
221 |
|
|
"r" ((USItype) (al)), \
|
222 |
|
|
"rI" ((USItype) (bl)) __CLOBBER_CC)
|
223 |
|
|
#define umul_ppmm(xh, xl, a, b) \
|
224 |
|
|
{register USItype __t0, __t1, __t2; \
|
225 |
|
|
__asm__ ("%@ Inlined umul_ppmm\n" \
|
226 |
|
|
" mov %2, %5, lsr #16\n" \
|
227 |
|
|
" mov %0, %6, lsr #16\n" \
|
228 |
|
|
" bic %3, %5, %2, lsl #16\n" \
|
229 |
|
|
" bic %4, %6, %0, lsl #16\n" \
|
230 |
|
|
" mul %1, %3, %4\n" \
|
231 |
|
|
" mul %4, %2, %4\n" \
|
232 |
|
|
" mul %3, %0, %3\n" \
|
233 |
|
|
" mul %0, %2, %0\n" \
|
234 |
|
|
" adds %3, %4, %3\n" \
|
235 |
|
|
" addcs %0, %0, #65536\n" \
|
236 |
|
|
" adds %1, %1, %3, lsl #16\n" \
|
237 |
|
|
" adc %0, %0, %3, lsr #16" \
|
238 |
|
|
: "=&r" ((USItype) (xh)), \
|
239 |
|
|
"=r" ((USItype) (xl)), \
|
240 |
|
|
"=&r" (__t0), "=&r" (__t1), "=r" (__t2) \
|
241 |
|
|
: "r" ((USItype) (a)), \
|
242 |
|
|
"r" ((USItype) (b)) __CLOBBER_CC );}
|
243 |
|
|
#define UMUL_TIME 20
|
244 |
|
|
#define UDIV_TIME 100
|
245 |
|
|
#endif /* __arm__ */
|
246 |
|
|
|
247 |
|
|
#if defined(__arm__)
|
248 |
|
|
/* Let gcc decide how best to implement count_leading_zeros. */
|
249 |
|
|
#define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clz (X))
|
250 |
|
|
#define COUNT_LEADING_ZEROS_0 32
|
251 |
|
|
#endif
|
252 |
|
|
|
253 |
|
|
#if defined (__CRIS__) && __CRIS_arch_version >= 3
|
254 |
|
|
#define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clz (X))
|
255 |
|
|
#if __CRIS_arch_version >= 8
|
256 |
|
|
#define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctz (X))
|
257 |
|
|
#endif
|
258 |
|
|
#endif /* __CRIS__ */
|
259 |
|
|
|
260 |
|
|
#if defined (__hppa) && W_TYPE_SIZE == 32
|
261 |
|
|
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
262 |
|
|
__asm__ ("add %4,%5,%1\n\taddc %2,%3,%0" \
|
263 |
|
|
: "=r" ((USItype) (sh)), \
|
264 |
|
|
"=&r" ((USItype) (sl)) \
|
265 |
|
|
: "%rM" ((USItype) (ah)), \
|
266 |
|
|
"rM" ((USItype) (bh)), \
|
267 |
|
|
"%rM" ((USItype) (al)), \
|
268 |
|
|
"rM" ((USItype) (bl)))
|
269 |
|
|
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
270 |
|
|
__asm__ ("sub %4,%5,%1\n\tsubb %2,%3,%0" \
|
271 |
|
|
: "=r" ((USItype) (sh)), \
|
272 |
|
|
"=&r" ((USItype) (sl)) \
|
273 |
|
|
: "rM" ((USItype) (ah)), \
|
274 |
|
|
"rM" ((USItype) (bh)), \
|
275 |
|
|
"rM" ((USItype) (al)), \
|
276 |
|
|
"rM" ((USItype) (bl)))
|
277 |
|
|
#if defined (_PA_RISC1_1)
|
278 |
|
|
#define umul_ppmm(w1, w0, u, v) \
|
279 |
|
|
do { \
|
280 |
|
|
union \
|
281 |
|
|
{ \
|
282 |
|
|
UDItype __f; \
|
283 |
|
|
struct {USItype __w1, __w0;} __w1w0; \
|
284 |
|
|
} __t; \
|
285 |
|
|
__asm__ ("xmpyu %1,%2,%0" \
|
286 |
|
|
: "=x" (__t.__f) \
|
287 |
|
|
: "x" ((USItype) (u)), \
|
288 |
|
|
"x" ((USItype) (v))); \
|
289 |
|
|
(w1) = __t.__w1w0.__w1; \
|
290 |
|
|
(w0) = __t.__w1w0.__w0; \
|
291 |
|
|
} while (0)
|
292 |
|
|
#define UMUL_TIME 8
|
293 |
|
|
#else
|
294 |
|
|
#define UMUL_TIME 30
|
295 |
|
|
#endif
|
296 |
|
|
#define UDIV_TIME 40
|
297 |
|
|
#define count_leading_zeros(count, x) \
|
298 |
|
|
do { \
|
299 |
|
|
USItype __tmp; \
|
300 |
|
|
__asm__ ( \
|
301 |
|
|
"ldi 1,%0\n" \
|
302 |
|
|
" extru,= %1,15,16,%%r0 ; Bits 31..16 zero?\n" \
|
303 |
|
|
" extru,tr %1,15,16,%1 ; No. Shift down, skip add.\n"\
|
304 |
|
|
" ldo 16(%0),%0 ; Yes. Perform add.\n" \
|
305 |
|
|
" extru,= %1,23,8,%%r0 ; Bits 15..8 zero?\n" \
|
306 |
|
|
" extru,tr %1,23,8,%1 ; No. Shift down, skip add.\n"\
|
307 |
|
|
" ldo 8(%0),%0 ; Yes. Perform add.\n" \
|
308 |
|
|
" extru,= %1,27,4,%%r0 ; Bits 7..4 zero?\n" \
|
309 |
|
|
" extru,tr %1,27,4,%1 ; No. Shift down, skip add.\n"\
|
310 |
|
|
" ldo 4(%0),%0 ; Yes. Perform add.\n" \
|
311 |
|
|
" extru,= %1,29,2,%%r0 ; Bits 3..2 zero?\n" \
|
312 |
|
|
" extru,tr %1,29,2,%1 ; No. Shift down, skip add.\n"\
|
313 |
|
|
" ldo 2(%0),%0 ; Yes. Perform add.\n" \
|
314 |
|
|
" extru %1,30,1,%1 ; Extract bit 1.\n" \
|
315 |
|
|
" sub %0,%1,%0 ; Subtract it.\n" \
|
316 |
|
|
: "=r" (count), "=r" (__tmp) : "1" (x)); \
|
317 |
|
|
} while (0)
|
318 |
|
|
#endif
|
319 |
|
|
|
320 |
|
|
#if (defined (__i370__) || defined (__s390__) || defined (__mvs__)) && W_TYPE_SIZE == 32
|
321 |
|
|
#define smul_ppmm(xh, xl, m0, m1) \
|
322 |
|
|
do { \
|
323 |
|
|
union {DItype __ll; \
|
324 |
|
|
struct {USItype __h, __l;} __i; \
|
325 |
|
|
} __x; \
|
326 |
|
|
__asm__ ("lr %N0,%1\n\tmr %0,%2" \
|
327 |
|
|
: "=&r" (__x.__ll) \
|
328 |
|
|
: "r" (m0), "r" (m1)); \
|
329 |
|
|
(xh) = __x.__i.__h; (xl) = __x.__i.__l; \
|
330 |
|
|
} while (0)
|
331 |
|
|
#define sdiv_qrnnd(q, r, n1, n0, d) \
|
332 |
|
|
do { \
|
333 |
|
|
union {DItype __ll; \
|
334 |
|
|
struct {USItype __h, __l;} __i; \
|
335 |
|
|
} __x; \
|
336 |
|
|
__x.__i.__h = n1; __x.__i.__l = n0; \
|
337 |
|
|
__asm__ ("dr %0,%2" \
|
338 |
|
|
: "=r" (__x.__ll) \
|
339 |
|
|
: "0" (__x.__ll), "r" (d)); \
|
340 |
|
|
(q) = __x.__i.__l; (r) = __x.__i.__h; \
|
341 |
|
|
} while (0)
|
342 |
|
|
#endif
|
343 |
|
|
|
344 |
|
|
#if (defined (__i386__) || defined (__i486__)) && W_TYPE_SIZE == 32
|
345 |
|
|
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
346 |
|
|
__asm__ ("add{l} {%5,%1|%1,%5}\n\tadc{l} {%3,%0|%0,%3}" \
|
347 |
|
|
: "=r" ((USItype) (sh)), \
|
348 |
|
|
"=&r" ((USItype) (sl)) \
|
349 |
|
|
: "%0" ((USItype) (ah)), \
|
350 |
|
|
"g" ((USItype) (bh)), \
|
351 |
|
|
"%1" ((USItype) (al)), \
|
352 |
|
|
"g" ((USItype) (bl)))
|
353 |
|
|
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
354 |
|
|
__asm__ ("sub{l} {%5,%1|%1,%5}\n\tsbb{l} {%3,%0|%0,%3}" \
|
355 |
|
|
: "=r" ((USItype) (sh)), \
|
356 |
|
|
"=&r" ((USItype) (sl)) \
|
357 |
|
|
: "0" ((USItype) (ah)), \
|
358 |
|
|
"g" ((USItype) (bh)), \
|
359 |
|
|
"1" ((USItype) (al)), \
|
360 |
|
|
"g" ((USItype) (bl)))
|
361 |
|
|
#define umul_ppmm(w1, w0, u, v) \
|
362 |
|
|
__asm__ ("mul{l} %3" \
|
363 |
|
|
: "=a" ((USItype) (w0)), \
|
364 |
|
|
"=d" ((USItype) (w1)) \
|
365 |
|
|
: "%0" ((USItype) (u)), \
|
366 |
|
|
"rm" ((USItype) (v)))
|
367 |
|
|
#define udiv_qrnnd(q, r, n1, n0, dv) \
|
368 |
|
|
__asm__ ("div{l} %4" \
|
369 |
|
|
: "=a" ((USItype) (q)), \
|
370 |
|
|
"=d" ((USItype) (r)) \
|
371 |
|
|
: "0" ((USItype) (n0)), \
|
372 |
|
|
"1" ((USItype) (n1)), \
|
373 |
|
|
"rm" ((USItype) (dv)))
|
374 |
|
|
#define count_leading_zeros(count, x) ((count) = __builtin_clz (x))
|
375 |
|
|
#define count_trailing_zeros(count, x) ((count) = __builtin_ctz (x))
|
376 |
|
|
#define UMUL_TIME 40
|
377 |
|
|
#define UDIV_TIME 40
|
378 |
|
|
#endif /* 80x86 */
|
379 |
|
|
|
380 |
|
|
#if (defined (__x86_64__) || defined (__i386__)) && W_TYPE_SIZE == 64
|
381 |
|
|
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
382 |
|
|
__asm__ ("add{q} {%5,%1|%1,%5}\n\tadc{q} {%3,%0|%0,%3}" \
|
383 |
|
|
: "=r" ((UDItype) (sh)), \
|
384 |
|
|
"=&r" ((UDItype) (sl)) \
|
385 |
|
|
: "%0" ((UDItype) (ah)), \
|
386 |
|
|
"rme" ((UDItype) (bh)), \
|
387 |
|
|
"%1" ((UDItype) (al)), \
|
388 |
|
|
"rme" ((UDItype) (bl)))
|
389 |
|
|
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
390 |
|
|
__asm__ ("sub{q} {%5,%1|%1,%5}\n\tsbb{q} {%3,%0|%0,%3}" \
|
391 |
|
|
: "=r" ((UDItype) (sh)), \
|
392 |
|
|
"=&r" ((UDItype) (sl)) \
|
393 |
|
|
: "0" ((UDItype) (ah)), \
|
394 |
|
|
"rme" ((UDItype) (bh)), \
|
395 |
|
|
"1" ((UDItype) (al)), \
|
396 |
|
|
"rme" ((UDItype) (bl)))
|
397 |
|
|
#define umul_ppmm(w1, w0, u, v) \
|
398 |
|
|
__asm__ ("mul{q} %3" \
|
399 |
|
|
: "=a" ((UDItype) (w0)), \
|
400 |
|
|
"=d" ((UDItype) (w1)) \
|
401 |
|
|
: "%0" ((UDItype) (u)), \
|
402 |
|
|
"rm" ((UDItype) (v)))
|
403 |
|
|
#define udiv_qrnnd(q, r, n1, n0, dv) \
|
404 |
|
|
__asm__ ("div{q} %4" \
|
405 |
|
|
: "=a" ((UDItype) (q)), \
|
406 |
|
|
"=d" ((UDItype) (r)) \
|
407 |
|
|
: "0" ((UDItype) (n0)), \
|
408 |
|
|
"1" ((UDItype) (n1)), \
|
409 |
|
|
"rm" ((UDItype) (dv)))
|
410 |
|
|
#define count_leading_zeros(count, x) ((count) = __builtin_clzl (x))
|
411 |
|
|
#define count_trailing_zeros(count, x) ((count) = __builtin_ctzl (x))
|
412 |
|
|
#define UMUL_TIME 40
|
413 |
|
|
#define UDIV_TIME 40
|
414 |
|
|
#endif /* x86_64 */
|
415 |
|
|
|
416 |
|
|
#if defined (__i960__) && W_TYPE_SIZE == 32
|
417 |
|
|
#define umul_ppmm(w1, w0, u, v) \
|
418 |
|
|
({union {UDItype __ll; \
|
419 |
|
|
struct {USItype __l, __h;} __i; \
|
420 |
|
|
} __xx; \
|
421 |
|
|
__asm__ ("emul %2,%1,%0" \
|
422 |
|
|
: "=d" (__xx.__ll) \
|
423 |
|
|
: "%dI" ((USItype) (u)), \
|
424 |
|
|
"dI" ((USItype) (v))); \
|
425 |
|
|
(w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
|
426 |
|
|
#define __umulsidi3(u, v) \
|
427 |
|
|
({UDItype __w; \
|
428 |
|
|
__asm__ ("emul %2,%1,%0" \
|
429 |
|
|
: "=d" (__w) \
|
430 |
|
|
: "%dI" ((USItype) (u)), \
|
431 |
|
|
"dI" ((USItype) (v))); \
|
432 |
|
|
__w; })
|
433 |
|
|
#endif /* __i960__ */
|
434 |
|
|
|
435 |
|
|
#if defined (__ia64) && W_TYPE_SIZE == 64
|
436 |
|
|
/* This form encourages gcc (pre-release 3.4 at least) to emit predicated
|
437 |
|
|
"sub r=r,r" and "sub r=r,r,1", giving a 2 cycle latency. The generic
|
438 |
|
|
code using "al<bl" arithmetically comes out making an actual 0 or 1 in a
|
439 |
|
|
register, which takes an extra cycle. */
|
440 |
|
|
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
441 |
|
|
do { \
|
442 |
|
|
UWtype __x; \
|
443 |
|
|
__x = (al) - (bl); \
|
444 |
|
|
if ((al) < (bl)) \
|
445 |
|
|
(sh) = (ah) - (bh) - 1; \
|
446 |
|
|
else \
|
447 |
|
|
(sh) = (ah) - (bh); \
|
448 |
|
|
(sl) = __x; \
|
449 |
|
|
} while (0)
|
450 |
|
|
|
451 |
|
|
/* Do both product parts in assembly, since that gives better code with
|
452 |
|
|
all gcc versions. Some callers will just use the upper part, and in
|
453 |
|
|
that situation we waste an instruction, but not any cycles. */
|
454 |
|
|
#define umul_ppmm(ph, pl, m0, m1) \
|
455 |
|
|
__asm__ ("xma.hu %0 = %2, %3, f0\n\txma.l %1 = %2, %3, f0" \
|
456 |
|
|
: "=&f" (ph), "=f" (pl) \
|
457 |
|
|
: "f" (m0), "f" (m1))
|
458 |
|
|
#define count_leading_zeros(count, x) \
|
459 |
|
|
do { \
|
460 |
|
|
UWtype _x = (x), _y, _a, _c; \
|
461 |
|
|
__asm__ ("mux1 %0 = %1, @rev" : "=r" (_y) : "r" (_x)); \
|
462 |
|
|
__asm__ ("czx1.l %0 = %1" : "=r" (_a) : "r" (-_y | _y)); \
|
463 |
|
|
_c = (_a - 1) << 3; \
|
464 |
|
|
_x >>= _c; \
|
465 |
|
|
if (_x >= 1 << 4) \
|
466 |
|
|
_x >>= 4, _c += 4; \
|
467 |
|
|
if (_x >= 1 << 2) \
|
468 |
|
|
_x >>= 2, _c += 2; \
|
469 |
|
|
_c += _x >> 1; \
|
470 |
|
|
(count) = W_TYPE_SIZE - 1 - _c; \
|
471 |
|
|
} while (0)
|
472 |
|
|
/* similar to what gcc does for __builtin_ffs, but 0 based rather than 1
|
473 |
|
|
based, and we don't need a special case for x==0 here */
|
474 |
|
|
#define count_trailing_zeros(count, x) \
|
475 |
|
|
do { \
|
476 |
|
|
UWtype __ctz_x = (x); \
|
477 |
|
|
__asm__ ("popcnt %0 = %1" \
|
478 |
|
|
: "=r" (count) \
|
479 |
|
|
: "r" ((__ctz_x-1) & ~__ctz_x)); \
|
480 |
|
|
} while (0)
|
481 |
|
|
#define UMUL_TIME 14
|
482 |
|
|
#endif
|
483 |
|
|
|
484 |
|
|
#if defined (__M32R__) && W_TYPE_SIZE == 32
|
485 |
|
|
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
486 |
|
|
/* The cmp clears the condition bit. */ \
|
487 |
|
|
__asm__ ("cmp %0,%0\n\taddx %1,%5\n\taddx %0,%3" \
|
488 |
|
|
: "=r" ((USItype) (sh)), \
|
489 |
|
|
"=&r" ((USItype) (sl)) \
|
490 |
|
|
: "0" ((USItype) (ah)), \
|
491 |
|
|
"r" ((USItype) (bh)), \
|
492 |
|
|
"1" ((USItype) (al)), \
|
493 |
|
|
"r" ((USItype) (bl)) \
|
494 |
|
|
: "cbit")
|
495 |
|
|
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
496 |
|
|
/* The cmp clears the condition bit. */ \
|
497 |
|
|
__asm__ ("cmp %0,%0\n\tsubx %1,%5\n\tsubx %0,%3" \
|
498 |
|
|
: "=r" ((USItype) (sh)), \
|
499 |
|
|
"=&r" ((USItype) (sl)) \
|
500 |
|
|
: "0" ((USItype) (ah)), \
|
501 |
|
|
"r" ((USItype) (bh)), \
|
502 |
|
|
"1" ((USItype) (al)), \
|
503 |
|
|
"r" ((USItype) (bl)) \
|
504 |
|
|
: "cbit")
|
505 |
|
|
#endif /* __M32R__ */
|
506 |
|
|
|
507 |
|
|
#if defined (__mc68000__) && W_TYPE_SIZE == 32
|
508 |
|
|
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
509 |
|
|
__asm__ ("add%.l %5,%1\n\taddx%.l %3,%0" \
|
510 |
|
|
: "=d" ((USItype) (sh)), \
|
511 |
|
|
"=&d" ((USItype) (sl)) \
|
512 |
|
|
: "%0" ((USItype) (ah)), \
|
513 |
|
|
"d" ((USItype) (bh)), \
|
514 |
|
|
"%1" ((USItype) (al)), \
|
515 |
|
|
"g" ((USItype) (bl)))
|
516 |
|
|
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
517 |
|
|
__asm__ ("sub%.l %5,%1\n\tsubx%.l %3,%0" \
|
518 |
|
|
: "=d" ((USItype) (sh)), \
|
519 |
|
|
"=&d" ((USItype) (sl)) \
|
520 |
|
|
: "0" ((USItype) (ah)), \
|
521 |
|
|
"d" ((USItype) (bh)), \
|
522 |
|
|
"1" ((USItype) (al)), \
|
523 |
|
|
"g" ((USItype) (bl)))
|
524 |
|
|
|
525 |
|
|
/* The '020, '030, '040, '060 and CPU32 have 32x32->64 and 64/32->32q-32r. */
|
526 |
|
|
#if (defined (__mc68020__) && !defined (__mc68060__))
|
527 |
|
|
#define umul_ppmm(w1, w0, u, v) \
|
528 |
|
|
__asm__ ("mulu%.l %3,%1:%0" \
|
529 |
|
|
: "=d" ((USItype) (w0)), \
|
530 |
|
|
"=d" ((USItype) (w1)) \
|
531 |
|
|
: "%0" ((USItype) (u)), \
|
532 |
|
|
"dmi" ((USItype) (v)))
|
533 |
|
|
#define UMUL_TIME 45
|
534 |
|
|
#define udiv_qrnnd(q, r, n1, n0, d) \
|
535 |
|
|
__asm__ ("divu%.l %4,%1:%0" \
|
536 |
|
|
: "=d" ((USItype) (q)), \
|
537 |
|
|
"=d" ((USItype) (r)) \
|
538 |
|
|
: "0" ((USItype) (n0)), \
|
539 |
|
|
"1" ((USItype) (n1)), \
|
540 |
|
|
"dmi" ((USItype) (d)))
|
541 |
|
|
#define UDIV_TIME 90
|
542 |
|
|
#define sdiv_qrnnd(q, r, n1, n0, d) \
|
543 |
|
|
__asm__ ("divs%.l %4,%1:%0" \
|
544 |
|
|
: "=d" ((USItype) (q)), \
|
545 |
|
|
"=d" ((USItype) (r)) \
|
546 |
|
|
: "0" ((USItype) (n0)), \
|
547 |
|
|
"1" ((USItype) (n1)), \
|
548 |
|
|
"dmi" ((USItype) (d)))
|
549 |
|
|
|
550 |
|
|
#elif defined (__mcoldfire__) /* not mc68020 */
|
551 |
|
|
|
552 |
|
|
#define umul_ppmm(xh, xl, a, b) \
|
553 |
|
|
__asm__ ("| Inlined umul_ppmm\n" \
|
554 |
|
|
" move%.l %2,%/d0\n" \
|
555 |
|
|
" move%.l %3,%/d1\n" \
|
556 |
|
|
" move%.l %/d0,%/d2\n" \
|
557 |
|
|
" swap %/d0\n" \
|
558 |
|
|
" move%.l %/d1,%/d3\n" \
|
559 |
|
|
" swap %/d1\n" \
|
560 |
|
|
" move%.w %/d2,%/d4\n" \
|
561 |
|
|
" mulu %/d3,%/d4\n" \
|
562 |
|
|
" mulu %/d1,%/d2\n" \
|
563 |
|
|
" mulu %/d0,%/d3\n" \
|
564 |
|
|
" mulu %/d0,%/d1\n" \
|
565 |
|
|
" move%.l %/d4,%/d0\n" \
|
566 |
|
|
" clr%.w %/d0\n" \
|
567 |
|
|
" swap %/d0\n" \
|
568 |
|
|
" add%.l %/d0,%/d2\n" \
|
569 |
|
|
" add%.l %/d3,%/d2\n" \
|
570 |
|
|
" jcc 1f\n" \
|
571 |
|
|
" add%.l %#65536,%/d1\n" \
|
572 |
|
|
"1: swap %/d2\n" \
|
573 |
|
|
" moveq %#0,%/d0\n" \
|
574 |
|
|
" move%.w %/d2,%/d0\n" \
|
575 |
|
|
" move%.w %/d4,%/d2\n" \
|
576 |
|
|
" move%.l %/d2,%1\n" \
|
577 |
|
|
" add%.l %/d1,%/d0\n" \
|
578 |
|
|
" move%.l %/d0,%0" \
|
579 |
|
|
: "=g" ((USItype) (xh)), \
|
580 |
|
|
"=g" ((USItype) (xl)) \
|
581 |
|
|
: "g" ((USItype) (a)), \
|
582 |
|
|
"g" ((USItype) (b)) \
|
583 |
|
|
: "d0", "d1", "d2", "d3", "d4")
|
584 |
|
|
#define UMUL_TIME 100
|
585 |
|
|
#define UDIV_TIME 400
|
586 |
|
|
#else /* not ColdFire */
|
587 |
|
|
/* %/ inserts REGISTER_PREFIX, %# inserts IMMEDIATE_PREFIX. */
|
588 |
|
|
#define umul_ppmm(xh, xl, a, b) \
|
589 |
|
|
__asm__ ("| Inlined umul_ppmm\n" \
|
590 |
|
|
" move%.l %2,%/d0\n" \
|
591 |
|
|
" move%.l %3,%/d1\n" \
|
592 |
|
|
" move%.l %/d0,%/d2\n" \
|
593 |
|
|
" swap %/d0\n" \
|
594 |
|
|
" move%.l %/d1,%/d3\n" \
|
595 |
|
|
" swap %/d1\n" \
|
596 |
|
|
" move%.w %/d2,%/d4\n" \
|
597 |
|
|
" mulu %/d3,%/d4\n" \
|
598 |
|
|
" mulu %/d1,%/d2\n" \
|
599 |
|
|
" mulu %/d0,%/d3\n" \
|
600 |
|
|
" mulu %/d0,%/d1\n" \
|
601 |
|
|
" move%.l %/d4,%/d0\n" \
|
602 |
|
|
" eor%.w %/d0,%/d0\n" \
|
603 |
|
|
" swap %/d0\n" \
|
604 |
|
|
" add%.l %/d0,%/d2\n" \
|
605 |
|
|
" add%.l %/d3,%/d2\n" \
|
606 |
|
|
" jcc 1f\n" \
|
607 |
|
|
" add%.l %#65536,%/d1\n" \
|
608 |
|
|
"1: swap %/d2\n" \
|
609 |
|
|
" moveq %#0,%/d0\n" \
|
610 |
|
|
" move%.w %/d2,%/d0\n" \
|
611 |
|
|
" move%.w %/d4,%/d2\n" \
|
612 |
|
|
" move%.l %/d2,%1\n" \
|
613 |
|
|
" add%.l %/d1,%/d0\n" \
|
614 |
|
|
" move%.l %/d0,%0" \
|
615 |
|
|
: "=g" ((USItype) (xh)), \
|
616 |
|
|
"=g" ((USItype) (xl)) \
|
617 |
|
|
: "g" ((USItype) (a)), \
|
618 |
|
|
"g" ((USItype) (b)) \
|
619 |
|
|
: "d0", "d1", "d2", "d3", "d4")
|
620 |
|
|
#define UMUL_TIME 100
|
621 |
|
|
#define UDIV_TIME 400
|
622 |
|
|
|
623 |
|
|
#endif /* not mc68020 */
|
624 |
|
|
|
625 |
|
|
/* The '020, '030, '040 and '060 have bitfield insns.
|
626 |
|
|
cpu32 disguises as a 68020, but lacks them. */
|
627 |
|
|
#if defined (__mc68020__) && !defined (__mcpu32__)
|
628 |
|
|
#define count_leading_zeros(count, x) \
|
629 |
|
|
__asm__ ("bfffo %1{%b2:%b2},%0" \
|
630 |
|
|
: "=d" ((USItype) (count)) \
|
631 |
|
|
: "od" ((USItype) (x)), "n" (0))
|
632 |
|
|
/* Some ColdFire architectures have a ff1 instruction supported via
|
633 |
|
|
__builtin_clz. */
|
634 |
|
|
#elif defined (__mcfisaaplus__) || defined (__mcfisac__)
|
635 |
|
|
#define count_leading_zeros(count,x) ((count) = __builtin_clz (x))
|
636 |
|
|
#define COUNT_LEADING_ZEROS_0 32
|
637 |
|
|
#endif
|
638 |
|
|
#endif /* mc68000 */
|
639 |
|
|
|
640 |
|
|
#if defined (__m88000__) && W_TYPE_SIZE == 32
|
641 |
|
|
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
642 |
|
|
__asm__ ("addu.co %1,%r4,%r5\n\taddu.ci %0,%r2,%r3" \
|
643 |
|
|
: "=r" ((USItype) (sh)), \
|
644 |
|
|
"=&r" ((USItype) (sl)) \
|
645 |
|
|
: "%rJ" ((USItype) (ah)), \
|
646 |
|
|
"rJ" ((USItype) (bh)), \
|
647 |
|
|
"%rJ" ((USItype) (al)), \
|
648 |
|
|
"rJ" ((USItype) (bl)))
|
649 |
|
|
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
650 |
|
|
__asm__ ("subu.co %1,%r4,%r5\n\tsubu.ci %0,%r2,%r3" \
|
651 |
|
|
: "=r" ((USItype) (sh)), \
|
652 |
|
|
"=&r" ((USItype) (sl)) \
|
653 |
|
|
: "rJ" ((USItype) (ah)), \
|
654 |
|
|
"rJ" ((USItype) (bh)), \
|
655 |
|
|
"rJ" ((USItype) (al)), \
|
656 |
|
|
"rJ" ((USItype) (bl)))
|
657 |
|
|
#define count_leading_zeros(count, x) \
|
658 |
|
|
do { \
|
659 |
|
|
USItype __cbtmp; \
|
660 |
|
|
__asm__ ("ff1 %0,%1" \
|
661 |
|
|
: "=r" (__cbtmp) \
|
662 |
|
|
: "r" ((USItype) (x))); \
|
663 |
|
|
(count) = __cbtmp ^ 31; \
|
664 |
|
|
} while (0)
|
665 |
|
|
#define COUNT_LEADING_ZEROS_0 63 /* sic */
|
666 |
|
|
#if defined (__mc88110__)
|
667 |
|
|
#define umul_ppmm(wh, wl, u, v) \
|
668 |
|
|
do { \
|
669 |
|
|
union {UDItype __ll; \
|
670 |
|
|
struct {USItype __h, __l;} __i; \
|
671 |
|
|
} __xx; \
|
672 |
|
|
__asm__ ("mulu.d %0,%1,%2" \
|
673 |
|
|
: "=r" (__xx.__ll) \
|
674 |
|
|
: "r" ((USItype) (u)), \
|
675 |
|
|
"r" ((USItype) (v))); \
|
676 |
|
|
(wh) = __xx.__i.__h; \
|
677 |
|
|
(wl) = __xx.__i.__l; \
|
678 |
|
|
} while (0)
|
679 |
|
|
#define udiv_qrnnd(q, r, n1, n0, d) \
|
680 |
|
|
({union {UDItype __ll; \
|
681 |
|
|
struct {USItype __h, __l;} __i; \
|
682 |
|
|
} __xx; \
|
683 |
|
|
USItype __q; \
|
684 |
|
|
__xx.__i.__h = (n1); __xx.__i.__l = (n0); \
|
685 |
|
|
__asm__ ("divu.d %0,%1,%2" \
|
686 |
|
|
: "=r" (__q) \
|
687 |
|
|
: "r" (__xx.__ll), \
|
688 |
|
|
"r" ((USItype) (d))); \
|
689 |
|
|
(r) = (n0) - __q * (d); (q) = __q; })
|
690 |
|
|
#define UMUL_TIME 5
|
691 |
|
|
#define UDIV_TIME 25
|
692 |
|
|
#else
|
693 |
|
|
#define UMUL_TIME 17
|
694 |
|
|
#define UDIV_TIME 150
|
695 |
|
|
#endif /* __mc88110__ */
|
696 |
|
|
#endif /* __m88000__ */
|
697 |
|
|
|
698 |
|
|
#if defined (__mips__) && W_TYPE_SIZE == 32
|
699 |
|
|
#define umul_ppmm(w1, w0, u, v) \
|
700 |
|
|
do { \
|
701 |
|
|
UDItype __x = (UDItype) (USItype) (u) * (USItype) (v); \
|
702 |
|
|
(w1) = (USItype) (__x >> 32); \
|
703 |
|
|
(w0) = (USItype) (__x); \
|
704 |
|
|
} while (0)
|
705 |
|
|
#define UMUL_TIME 10
|
706 |
|
|
#define UDIV_TIME 100
|
707 |
|
|
|
708 |
|
|
#if (__mips == 32 || __mips == 64) && ! __mips16
|
709 |
|
|
#define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clz (X))
|
710 |
|
|
#define COUNT_LEADING_ZEROS_0 32
|
711 |
|
|
#endif
|
712 |
|
|
#endif /* __mips__ */
|
713 |
|
|
|
714 |
|
|
#if defined (__ns32000__) && W_TYPE_SIZE == 32
|
715 |
|
|
#define umul_ppmm(w1, w0, u, v) \
|
716 |
|
|
({union {UDItype __ll; \
|
717 |
|
|
struct {USItype __l, __h;} __i; \
|
718 |
|
|
} __xx; \
|
719 |
|
|
__asm__ ("meid %2,%0" \
|
720 |
|
|
: "=g" (__xx.__ll) \
|
721 |
|
|
: "%0" ((USItype) (u)), \
|
722 |
|
|
"g" ((USItype) (v))); \
|
723 |
|
|
(w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
|
724 |
|
|
#define __umulsidi3(u, v) \
|
725 |
|
|
({UDItype __w; \
|
726 |
|
|
__asm__ ("meid %2,%0" \
|
727 |
|
|
: "=g" (__w) \
|
728 |
|
|
: "%0" ((USItype) (u)), \
|
729 |
|
|
"g" ((USItype) (v))); \
|
730 |
|
|
__w; })
|
731 |
|
|
#define udiv_qrnnd(q, r, n1, n0, d) \
|
732 |
|
|
({union {UDItype __ll; \
|
733 |
|
|
struct {USItype __l, __h;} __i; \
|
734 |
|
|
} __xx; \
|
735 |
|
|
__xx.__i.__h = (n1); __xx.__i.__l = (n0); \
|
736 |
|
|
__asm__ ("deid %2,%0" \
|
737 |
|
|
: "=g" (__xx.__ll) \
|
738 |
|
|
: "0" (__xx.__ll), \
|
739 |
|
|
"g" ((USItype) (d))); \
|
740 |
|
|
(r) = __xx.__i.__l; (q) = __xx.__i.__h; })
|
741 |
|
|
#define count_trailing_zeros(count,x) \
|
742 |
|
|
do { \
|
743 |
|
|
__asm__ ("ffsd %2,%0" \
|
744 |
|
|
: "=r" ((USItype) (count)) \
|
745 |
|
|
: "0" ((USItype) 0), \
|
746 |
|
|
"r" ((USItype) (x))); \
|
747 |
|
|
} while (0)
|
748 |
|
|
#endif /* __ns32000__ */
|
749 |
|
|
|
750 |
|
|
/* FIXME: We should test _IBMR2 here when we add assembly support for the
|
751 |
|
|
system vendor compilers.
|
752 |
|
|
FIXME: What's needed for gcc PowerPC VxWorks? __vxworks__ is not good
|
753 |
|
|
enough, since that hits ARM and m68k too. */
|
754 |
|
|
#if (defined (_ARCH_PPC) /* AIX */ \
|
755 |
|
|
|| defined (_ARCH_PWR) /* AIX */ \
|
756 |
|
|
|| defined (_ARCH_COM) /* AIX */ \
|
757 |
|
|
|| defined (__powerpc__) /* gcc */ \
|
758 |
|
|
|| defined (__POWERPC__) /* BEOS */ \
|
759 |
|
|
|| defined (__ppc__) /* Darwin */ \
|
760 |
|
|
|| (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */ \
|
761 |
|
|
|| (defined (PPC) && defined (CPU_FAMILY) /* VxWorks */ \
|
762 |
|
|
&& CPU_FAMILY == PPC) \
|
763 |
|
|
) && W_TYPE_SIZE == 32
|
764 |
|
|
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
765 |
|
|
do { \
|
766 |
|
|
if (__builtin_constant_p (bh) && (bh) == 0) \
|
767 |
|
|
__asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2" \
|
768 |
|
|
: "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
|
769 |
|
|
else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \
|
770 |
|
|
__asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2" \
|
771 |
|
|
: "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
|
772 |
|
|
else \
|
773 |
|
|
__asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3" \
|
774 |
|
|
: "=r" (sh), "=&r" (sl) \
|
775 |
|
|
: "%r" (ah), "r" (bh), "%r" (al), "rI" (bl)); \
|
776 |
|
|
} while (0)
|
777 |
|
|
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
778 |
|
|
do { \
|
779 |
|
|
if (__builtin_constant_p (ah) && (ah) == 0) \
|
780 |
|
|
__asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \
|
781 |
|
|
: "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
|
782 |
|
|
else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0) \
|
783 |
|
|
__asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \
|
784 |
|
|
: "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
|
785 |
|
|
else if (__builtin_constant_p (bh) && (bh) == 0) \
|
786 |
|
|
__asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2" \
|
787 |
|
|
: "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
|
788 |
|
|
else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \
|
789 |
|
|
__asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2" \
|
790 |
|
|
: "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
|
791 |
|
|
else \
|
792 |
|
|
__asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2" \
|
793 |
|
|
: "=r" (sh), "=&r" (sl) \
|
794 |
|
|
: "r" (ah), "r" (bh), "rI" (al), "r" (bl)); \
|
795 |
|
|
} while (0)
|
796 |
|
|
#define count_leading_zeros(count, x) \
|
797 |
|
|
__asm__ ("{cntlz|cntlzw} %0,%1" : "=r" (count) : "r" (x))
|
798 |
|
|
#define COUNT_LEADING_ZEROS_0 32
|
799 |
|
|
#if defined (_ARCH_PPC) || defined (__powerpc__) || defined (__POWERPC__) \
|
800 |
|
|
|| defined (__ppc__) \
|
801 |
|
|
|| (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */ \
|
802 |
|
|
|| (defined (PPC) && defined (CPU_FAMILY) /* VxWorks */ \
|
803 |
|
|
&& CPU_FAMILY == PPC)
|
804 |
|
|
#define umul_ppmm(ph, pl, m0, m1) \
|
805 |
|
|
do { \
|
806 |
|
|
USItype __m0 = (m0), __m1 = (m1); \
|
807 |
|
|
__asm__ ("mulhwu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \
|
808 |
|
|
(pl) = __m0 * __m1; \
|
809 |
|
|
} while (0)
|
810 |
|
|
#define UMUL_TIME 15
|
811 |
|
|
#define smul_ppmm(ph, pl, m0, m1) \
|
812 |
|
|
do { \
|
813 |
|
|
SItype __m0 = (m0), __m1 = (m1); \
|
814 |
|
|
__asm__ ("mulhw %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \
|
815 |
|
|
(pl) = __m0 * __m1; \
|
816 |
|
|
} while (0)
|
817 |
|
|
#define SMUL_TIME 14
|
818 |
|
|
#define UDIV_TIME 120
|
819 |
|
|
#elif defined (_ARCH_PWR)
|
820 |
|
|
#define UMUL_TIME 8
|
821 |
|
|
#define smul_ppmm(xh, xl, m0, m1) \
|
822 |
|
|
__asm__ ("mul %0,%2,%3" : "=r" (xh), "=q" (xl) : "r" (m0), "r" (m1))
|
823 |
|
|
#define SMUL_TIME 4
|
824 |
|
|
#define sdiv_qrnnd(q, r, nh, nl, d) \
|
825 |
|
|
__asm__ ("div %0,%2,%4" : "=r" (q), "=q" (r) : "r" (nh), "1" (nl), "r" (d))
|
826 |
|
|
#define UDIV_TIME 100
|
827 |
|
|
#endif
|
828 |
|
|
#endif /* 32-bit POWER architecture variants. */
|
829 |
|
|
|
830 |
|
|
/* We should test _IBMR2 here when we add assembly support for the system
|
831 |
|
|
vendor compilers. */
|
832 |
|
|
#if (defined (_ARCH_PPC64) || defined (__powerpc64__)) && W_TYPE_SIZE == 64
|
833 |
|
|
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
834 |
|
|
do { \
|
835 |
|
|
if (__builtin_constant_p (bh) && (bh) == 0) \
|
836 |
|
|
__asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2" \
|
837 |
|
|
: "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
|
838 |
|
|
else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \
|
839 |
|
|
__asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2" \
|
840 |
|
|
: "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
|
841 |
|
|
else \
|
842 |
|
|
__asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3" \
|
843 |
|
|
: "=r" (sh), "=&r" (sl) \
|
844 |
|
|
: "%r" (ah), "r" (bh), "%r" (al), "rI" (bl)); \
|
845 |
|
|
} while (0)
|
846 |
|
|
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
847 |
|
|
do { \
|
848 |
|
|
if (__builtin_constant_p (ah) && (ah) == 0) \
|
849 |
|
|
__asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \
|
850 |
|
|
: "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
|
851 |
|
|
else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0) \
|
852 |
|
|
__asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \
|
853 |
|
|
: "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
|
854 |
|
|
else if (__builtin_constant_p (bh) && (bh) == 0) \
|
855 |
|
|
__asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2" \
|
856 |
|
|
: "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
|
857 |
|
|
else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \
|
858 |
|
|
__asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2" \
|
859 |
|
|
: "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
|
860 |
|
|
else \
|
861 |
|
|
__asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2" \
|
862 |
|
|
: "=r" (sh), "=&r" (sl) \
|
863 |
|
|
: "r" (ah), "r" (bh), "rI" (al), "r" (bl)); \
|
864 |
|
|
} while (0)
|
865 |
|
|
#define count_leading_zeros(count, x) \
|
866 |
|
|
__asm__ ("cntlzd %0,%1" : "=r" (count) : "r" (x))
|
867 |
|
|
#define COUNT_LEADING_ZEROS_0 64
|
868 |
|
|
#define umul_ppmm(ph, pl, m0, m1) \
|
869 |
|
|
do { \
|
870 |
|
|
UDItype __m0 = (m0), __m1 = (m1); \
|
871 |
|
|
__asm__ ("mulhdu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \
|
872 |
|
|
(pl) = __m0 * __m1; \
|
873 |
|
|
} while (0)
|
874 |
|
|
#define UMUL_TIME 15
|
875 |
|
|
#define smul_ppmm(ph, pl, m0, m1) \
|
876 |
|
|
do { \
|
877 |
|
|
DItype __m0 = (m0), __m1 = (m1); \
|
878 |
|
|
__asm__ ("mulhd %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \
|
879 |
|
|
(pl) = __m0 * __m1; \
|
880 |
|
|
} while (0)
|
881 |
|
|
#define SMUL_TIME 14 /* ??? */
|
882 |
|
|
#define UDIV_TIME 120 /* ??? */
|
883 |
|
|
#endif /* 64-bit PowerPC. */
|
884 |
|
|
|
885 |
|
|
#if defined (__ibm032__) /* RT/ROMP */ && W_TYPE_SIZE == 32
|
886 |
|
|
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
887 |
|
|
__asm__ ("a %1,%5\n\tae %0,%3" \
|
888 |
|
|
: "=r" ((USItype) (sh)), \
|
889 |
|
|
"=&r" ((USItype) (sl)) \
|
890 |
|
|
: "%0" ((USItype) (ah)), \
|
891 |
|
|
"r" ((USItype) (bh)), \
|
892 |
|
|
"%1" ((USItype) (al)), \
|
893 |
|
|
"r" ((USItype) (bl)))
|
894 |
|
|
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
895 |
|
|
__asm__ ("s %1,%5\n\tse %0,%3" \
|
896 |
|
|
: "=r" ((USItype) (sh)), \
|
897 |
|
|
"=&r" ((USItype) (sl)) \
|
898 |
|
|
: "0" ((USItype) (ah)), \
|
899 |
|
|
"r" ((USItype) (bh)), \
|
900 |
|
|
"1" ((USItype) (al)), \
|
901 |
|
|
"r" ((USItype) (bl)))
|
902 |
|
|
#define umul_ppmm(ph, pl, m0, m1) \
|
903 |
|
|
do { \
|
904 |
|
|
USItype __m0 = (m0), __m1 = (m1); \
|
905 |
|
|
__asm__ ( \
|
906 |
|
|
"s r2,r2\n" \
|
907 |
|
|
" mts r10,%2\n" \
|
908 |
|
|
" m r2,%3\n" \
|
909 |
|
|
" m r2,%3\n" \
|
910 |
|
|
" m r2,%3\n" \
|
911 |
|
|
" m r2,%3\n" \
|
912 |
|
|
" m r2,%3\n" \
|
913 |
|
|
" m r2,%3\n" \
|
914 |
|
|
" m r2,%3\n" \
|
915 |
|
|
" m r2,%3\n" \
|
916 |
|
|
" m r2,%3\n" \
|
917 |
|
|
" m r2,%3\n" \
|
918 |
|
|
" m r2,%3\n" \
|
919 |
|
|
" m r2,%3\n" \
|
920 |
|
|
" m r2,%3\n" \
|
921 |
|
|
" m r2,%3\n" \
|
922 |
|
|
" m r2,%3\n" \
|
923 |
|
|
" m r2,%3\n" \
|
924 |
|
|
" cas %0,r2,r0\n" \
|
925 |
|
|
" mfs r10,%1" \
|
926 |
|
|
: "=r" ((USItype) (ph)), \
|
927 |
|
|
"=r" ((USItype) (pl)) \
|
928 |
|
|
: "%r" (__m0), \
|
929 |
|
|
"r" (__m1) \
|
930 |
|
|
: "r2"); \
|
931 |
|
|
(ph) += ((((SItype) __m0 >> 31) & __m1) \
|
932 |
|
|
+ (((SItype) __m1 >> 31) & __m0)); \
|
933 |
|
|
} while (0)
|
934 |
|
|
#define UMUL_TIME 20
|
935 |
|
|
#define UDIV_TIME 200
|
936 |
|
|
#define count_leading_zeros(count, x) \
|
937 |
|
|
do { \
|
938 |
|
|
if ((x) >= 0x10000) \
|
939 |
|
|
__asm__ ("clz %0,%1" \
|
940 |
|
|
: "=r" ((USItype) (count)) \
|
941 |
|
|
: "r" ((USItype) (x) >> 16)); \
|
942 |
|
|
else \
|
943 |
|
|
{ \
|
944 |
|
|
__asm__ ("clz %0,%1" \
|
945 |
|
|
: "=r" ((USItype) (count)) \
|
946 |
|
|
: "r" ((USItype) (x))); \
|
947 |
|
|
(count) += 16; \
|
948 |
|
|
} \
|
949 |
|
|
} while (0)
|
950 |
|
|
#endif
|
951 |
|
|
|
952 |
|
|
#if defined(__sh__) && !__SHMEDIA__ && W_TYPE_SIZE == 32
|
953 |
|
|
#ifndef __sh1__
|
954 |
|
|
#define umul_ppmm(w1, w0, u, v) \
|
955 |
|
|
__asm__ ( \
|
956 |
|
|
"dmulu.l %2,%3\n\tsts%M1 macl,%1\n\tsts%M0 mach,%0" \
|
957 |
|
|
: "=r<" ((USItype)(w1)), \
|
958 |
|
|
"=r<" ((USItype)(w0)) \
|
959 |
|
|
: "r" ((USItype)(u)), \
|
960 |
|
|
"r" ((USItype)(v)) \
|
961 |
|
|
: "macl", "mach")
|
962 |
|
|
#define UMUL_TIME 5
|
963 |
|
|
#endif
|
964 |
|
|
|
965 |
|
|
/* This is the same algorithm as __udiv_qrnnd_c. */
|
966 |
|
|
#define UDIV_NEEDS_NORMALIZATION 1
|
967 |
|
|
|
968 |
|
|
#define udiv_qrnnd(q, r, n1, n0, d) \
|
969 |
|
|
do { \
|
970 |
|
|
extern UWtype __udiv_qrnnd_16 (UWtype, UWtype) \
|
971 |
|
|
__attribute__ ((visibility ("hidden"))); \
|
972 |
|
|
/* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */ \
|
973 |
|
|
__asm__ ( \
|
974 |
|
|
"mov%M4 %4,r5\n" \
|
975 |
|
|
" swap.w %3,r4\n" \
|
976 |
|
|
" swap.w r5,r6\n" \
|
977 |
|
|
" jsr @%5\n" \
|
978 |
|
|
" shll16 r6\n" \
|
979 |
|
|
" swap.w r4,r4\n" \
|
980 |
|
|
" jsr @%5\n" \
|
981 |
|
|
" swap.w r1,%0\n" \
|
982 |
|
|
" or r1,%0" \
|
983 |
|
|
: "=r" (q), "=&z" (r) \
|
984 |
|
|
: "1" (n1), "r" (n0), "rm" (d), "r" (&__udiv_qrnnd_16) \
|
985 |
|
|
: "r1", "r2", "r4", "r5", "r6", "pr", "t"); \
|
986 |
|
|
} while (0)
|
987 |
|
|
|
988 |
|
|
#define UDIV_TIME 80
|
989 |
|
|
|
990 |
|
|
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
991 |
|
|
__asm__ ("clrt;subc %5,%1; subc %4,%0" \
|
992 |
|
|
: "=r" (sh), "=r" (sl) \
|
993 |
|
|
: "0" (ah), "1" (al), "r" (bh), "r" (bl) : "t")
|
994 |
|
|
|
995 |
|
|
#endif /* __sh__ */
|
996 |
|
|
|
997 |
|
|
#if defined (__SH5__) && __SHMEDIA__ && W_TYPE_SIZE == 32
|
998 |
|
|
#define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v)
|
999 |
|
|
#define count_leading_zeros(count, x) \
|
1000 |
|
|
do \
|
1001 |
|
|
{ \
|
1002 |
|
|
UDItype x_ = (USItype)(x); \
|
1003 |
|
|
SItype c_; \
|
1004 |
|
|
\
|
1005 |
|
|
__asm__ ("nsb %1, %0" : "=r" (c_) : "r" (x_)); \
|
1006 |
|
|
(count) = c_ - 31; \
|
1007 |
|
|
} \
|
1008 |
|
|
while (0)
|
1009 |
|
|
#define COUNT_LEADING_ZEROS_0 32
|
1010 |
|
|
#endif
|
1011 |
|
|
|
1012 |
|
|
#if defined (__sparc__) && !defined (__arch64__) && !defined (__sparcv9) \
|
1013 |
|
|
&& W_TYPE_SIZE == 32
|
1014 |
|
|
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
1015 |
|
|
__asm__ ("addcc %r4,%5,%1\n\taddx %r2,%3,%0" \
|
1016 |
|
|
: "=r" ((USItype) (sh)), \
|
1017 |
|
|
"=&r" ((USItype) (sl)) \
|
1018 |
|
|
: "%rJ" ((USItype) (ah)), \
|
1019 |
|
|
"rI" ((USItype) (bh)), \
|
1020 |
|
|
"%rJ" ((USItype) (al)), \
|
1021 |
|
|
"rI" ((USItype) (bl)) \
|
1022 |
|
|
__CLOBBER_CC)
|
1023 |
|
|
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
1024 |
|
|
__asm__ ("subcc %r4,%5,%1\n\tsubx %r2,%3,%0" \
|
1025 |
|
|
: "=r" ((USItype) (sh)), \
|
1026 |
|
|
"=&r" ((USItype) (sl)) \
|
1027 |
|
|
: "rJ" ((USItype) (ah)), \
|
1028 |
|
|
"rI" ((USItype) (bh)), \
|
1029 |
|
|
"rJ" ((USItype) (al)), \
|
1030 |
|
|
"rI" ((USItype) (bl)) \
|
1031 |
|
|
__CLOBBER_CC)
|
1032 |
|
|
#if defined (__sparc_v8__)
|
1033 |
|
|
#define umul_ppmm(w1, w0, u, v) \
|
1034 |
|
|
__asm__ ("umul %2,%3,%1;rd %%y,%0" \
|
1035 |
|
|
: "=r" ((USItype) (w1)), \
|
1036 |
|
|
"=r" ((USItype) (w0)) \
|
1037 |
|
|
: "r" ((USItype) (u)), \
|
1038 |
|
|
"r" ((USItype) (v)))
|
1039 |
|
|
#define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
|
1040 |
|
|
__asm__ ("mov %2,%%y;nop;nop;nop;udiv %3,%4,%0;umul %0,%4,%1;sub %3,%1,%1"\
|
1041 |
|
|
: "=&r" ((USItype) (__q)), \
|
1042 |
|
|
"=&r" ((USItype) (__r)) \
|
1043 |
|
|
: "r" ((USItype) (__n1)), \
|
1044 |
|
|
"r" ((USItype) (__n0)), \
|
1045 |
|
|
"r" ((USItype) (__d)))
|
1046 |
|
|
#else
|
1047 |
|
|
#if defined (__sparclite__)
|
1048 |
|
|
/* This has hardware multiply but not divide. It also has two additional
|
1049 |
|
|
instructions scan (ffs from high bit) and divscc. */
|
1050 |
|
|
#define umul_ppmm(w1, w0, u, v) \
|
1051 |
|
|
__asm__ ("umul %2,%3,%1;rd %%y,%0" \
|
1052 |
|
|
: "=r" ((USItype) (w1)), \
|
1053 |
|
|
"=r" ((USItype) (w0)) \
|
1054 |
|
|
: "r" ((USItype) (u)), \
|
1055 |
|
|
"r" ((USItype) (v)))
|
1056 |
|
|
#define udiv_qrnnd(q, r, n1, n0, d) \
|
1057 |
|
|
__asm__ ("! Inlined udiv_qrnnd\n" \
|
1058 |
|
|
" wr %%g0,%2,%%y ! Not a delayed write for sparclite\n" \
|
1059 |
|
|
" tst %%g0\n" \
|
1060 |
|
|
" divscc %3,%4,%%g1\n" \
|
1061 |
|
|
" divscc %%g1,%4,%%g1\n" \
|
1062 |
|
|
" divscc %%g1,%4,%%g1\n" \
|
1063 |
|
|
" divscc %%g1,%4,%%g1\n" \
|
1064 |
|
|
" divscc %%g1,%4,%%g1\n" \
|
1065 |
|
|
" divscc %%g1,%4,%%g1\n" \
|
1066 |
|
|
" divscc %%g1,%4,%%g1\n" \
|
1067 |
|
|
" divscc %%g1,%4,%%g1\n" \
|
1068 |
|
|
" divscc %%g1,%4,%%g1\n" \
|
1069 |
|
|
" divscc %%g1,%4,%%g1\n" \
|
1070 |
|
|
" divscc %%g1,%4,%%g1\n" \
|
1071 |
|
|
" divscc %%g1,%4,%%g1\n" \
|
1072 |
|
|
" divscc %%g1,%4,%%g1\n" \
|
1073 |
|
|
" divscc %%g1,%4,%%g1\n" \
|
1074 |
|
|
" divscc %%g1,%4,%%g1\n" \
|
1075 |
|
|
" divscc %%g1,%4,%%g1\n" \
|
1076 |
|
|
" divscc %%g1,%4,%%g1\n" \
|
1077 |
|
|
" divscc %%g1,%4,%%g1\n" \
|
1078 |
|
|
" divscc %%g1,%4,%%g1\n" \
|
1079 |
|
|
" divscc %%g1,%4,%%g1\n" \
|
1080 |
|
|
" divscc %%g1,%4,%%g1\n" \
|
1081 |
|
|
" divscc %%g1,%4,%%g1\n" \
|
1082 |
|
|
" divscc %%g1,%4,%%g1\n" \
|
1083 |
|
|
" divscc %%g1,%4,%%g1\n" \
|
1084 |
|
|
" divscc %%g1,%4,%%g1\n" \
|
1085 |
|
|
" divscc %%g1,%4,%%g1\n" \
|
1086 |
|
|
" divscc %%g1,%4,%%g1\n" \
|
1087 |
|
|
" divscc %%g1,%4,%%g1\n" \
|
1088 |
|
|
" divscc %%g1,%4,%%g1\n" \
|
1089 |
|
|
" divscc %%g1,%4,%%g1\n" \
|
1090 |
|
|
" divscc %%g1,%4,%%g1\n" \
|
1091 |
|
|
" divscc %%g1,%4,%0\n" \
|
1092 |
|
|
" rd %%y,%1\n" \
|
1093 |
|
|
" bl,a 1f\n" \
|
1094 |
|
|
" add %1,%4,%1\n" \
|
1095 |
|
|
"1: ! End of inline udiv_qrnnd" \
|
1096 |
|
|
: "=r" ((USItype) (q)), \
|
1097 |
|
|
"=r" ((USItype) (r)) \
|
1098 |
|
|
: "r" ((USItype) (n1)), \
|
1099 |
|
|
"r" ((USItype) (n0)), \
|
1100 |
|
|
"rI" ((USItype) (d)) \
|
1101 |
|
|
: "g1" __AND_CLOBBER_CC)
|
1102 |
|
|
#define UDIV_TIME 37
|
1103 |
|
|
#define count_leading_zeros(count, x) \
|
1104 |
|
|
do { \
|
1105 |
|
|
__asm__ ("scan %1,1,%0" \
|
1106 |
|
|
: "=r" ((USItype) (count)) \
|
1107 |
|
|
: "r" ((USItype) (x))); \
|
1108 |
|
|
} while (0)
|
1109 |
|
|
/* Early sparclites return 63 for an argument of 0, but they warn that future
|
1110 |
|
|
implementations might change this. Therefore, leave COUNT_LEADING_ZEROS_0
|
1111 |
|
|
undefined. */
|
1112 |
|
|
#else
|
1113 |
|
|
/* SPARC without integer multiplication and divide instructions.
|
1114 |
|
|
(i.e. at least Sun4/20,40,60,65,75,110,260,280,330,360,380,470,490) */
|
1115 |
|
|
#define umul_ppmm(w1, w0, u, v) \
|
1116 |
|
|
__asm__ ("! Inlined umul_ppmm\n" \
|
1117 |
|
|
" wr %%g0,%2,%%y ! SPARC has 0-3 delay insn after a wr\n"\
|
1118 |
|
|
" sra %3,31,%%o5 ! Don't move this insn\n" \
|
1119 |
|
|
" and %2,%%o5,%%o5 ! Don't move this insn\n" \
|
1120 |
|
|
" andcc %%g0,0,%%g1 ! Don't move this insn\n" \
|
1121 |
|
|
" mulscc %%g1,%3,%%g1\n" \
|
1122 |
|
|
" mulscc %%g1,%3,%%g1\n" \
|
1123 |
|
|
" mulscc %%g1,%3,%%g1\n" \
|
1124 |
|
|
" mulscc %%g1,%3,%%g1\n" \
|
1125 |
|
|
" mulscc %%g1,%3,%%g1\n" \
|
1126 |
|
|
" mulscc %%g1,%3,%%g1\n" \
|
1127 |
|
|
" mulscc %%g1,%3,%%g1\n" \
|
1128 |
|
|
" mulscc %%g1,%3,%%g1\n" \
|
1129 |
|
|
" mulscc %%g1,%3,%%g1\n" \
|
1130 |
|
|
" mulscc %%g1,%3,%%g1\n" \
|
1131 |
|
|
" mulscc %%g1,%3,%%g1\n" \
|
1132 |
|
|
" mulscc %%g1,%3,%%g1\n" \
|
1133 |
|
|
" mulscc %%g1,%3,%%g1\n" \
|
1134 |
|
|
" mulscc %%g1,%3,%%g1\n" \
|
1135 |
|
|
" mulscc %%g1,%3,%%g1\n" \
|
1136 |
|
|
" mulscc %%g1,%3,%%g1\n" \
|
1137 |
|
|
" mulscc %%g1,%3,%%g1\n" \
|
1138 |
|
|
" mulscc %%g1,%3,%%g1\n" \
|
1139 |
|
|
" mulscc %%g1,%3,%%g1\n" \
|
1140 |
|
|
" mulscc %%g1,%3,%%g1\n" \
|
1141 |
|
|
" mulscc %%g1,%3,%%g1\n" \
|
1142 |
|
|
" mulscc %%g1,%3,%%g1\n" \
|
1143 |
|
|
" mulscc %%g1,%3,%%g1\n" \
|
1144 |
|
|
" mulscc %%g1,%3,%%g1\n" \
|
1145 |
|
|
" mulscc %%g1,%3,%%g1\n" \
|
1146 |
|
|
" mulscc %%g1,%3,%%g1\n" \
|
1147 |
|
|
" mulscc %%g1,%3,%%g1\n" \
|
1148 |
|
|
" mulscc %%g1,%3,%%g1\n" \
|
1149 |
|
|
" mulscc %%g1,%3,%%g1\n" \
|
1150 |
|
|
" mulscc %%g1,%3,%%g1\n" \
|
1151 |
|
|
" mulscc %%g1,%3,%%g1\n" \
|
1152 |
|
|
" mulscc %%g1,%3,%%g1\n" \
|
1153 |
|
|
" mulscc %%g1,0,%%g1\n" \
|
1154 |
|
|
" add %%g1,%%o5,%0\n" \
|
1155 |
|
|
" rd %%y,%1" \
|
1156 |
|
|
: "=r" ((USItype) (w1)), \
|
1157 |
|
|
"=r" ((USItype) (w0)) \
|
1158 |
|
|
: "%rI" ((USItype) (u)), \
|
1159 |
|
|
"r" ((USItype) (v)) \
|
1160 |
|
|
: "g1", "o5" __AND_CLOBBER_CC)
|
1161 |
|
|
#define UMUL_TIME 39 /* 39 instructions */
|
1162 |
|
|
/* It's quite necessary to add this much assembler for the sparc.
|
1163 |
|
|
The default udiv_qrnnd (in C) is more than 10 times slower! */
|
1164 |
|
|
#define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
|
1165 |
|
|
__asm__ ("! Inlined udiv_qrnnd\n" \
|
1166 |
|
|
" mov 32,%%g1\n" \
|
1167 |
|
|
" subcc %1,%2,%%g0\n" \
|
1168 |
|
|
"1: bcs 5f\n" \
|
1169 |
|
|
" addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n" \
|
1170 |
|
|
" sub %1,%2,%1 ! this kills msb of n\n" \
|
1171 |
|
|
" addx %1,%1,%1 ! so this can't give carry\n" \
|
1172 |
|
|
" subcc %%g1,1,%%g1\n" \
|
1173 |
|
|
"2: bne 1b\n" \
|
1174 |
|
|
" subcc %1,%2,%%g0\n" \
|
1175 |
|
|
" bcs 3f\n" \
|
1176 |
|
|
" addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n" \
|
1177 |
|
|
" b 3f\n" \
|
1178 |
|
|
" sub %1,%2,%1 ! this kills msb of n\n" \
|
1179 |
|
|
"4: sub %1,%2,%1\n" \
|
1180 |
|
|
"5: addxcc %1,%1,%1\n" \
|
1181 |
|
|
" bcc 2b\n" \
|
1182 |
|
|
" subcc %%g1,1,%%g1\n" \
|
1183 |
|
|
"! Got carry from n. Subtract next step to cancel this carry.\n" \
|
1184 |
|
|
" bne 4b\n" \
|
1185 |
|
|
" addcc %0,%0,%0 ! shift n1n0 and a 0-bit in lsb\n" \
|
1186 |
|
|
" sub %1,%2,%1\n" \
|
1187 |
|
|
"3: xnor %0,0,%0\n" \
|
1188 |
|
|
" ! End of inline udiv_qrnnd" \
|
1189 |
|
|
: "=&r" ((USItype) (__q)), \
|
1190 |
|
|
"=&r" ((USItype) (__r)) \
|
1191 |
|
|
: "r" ((USItype) (__d)), \
|
1192 |
|
|
"1" ((USItype) (__n1)), \
|
1193 |
|
|
"0" ((USItype) (__n0)) : "g1" __AND_CLOBBER_CC)
|
1194 |
|
|
#define UDIV_TIME (3+7*32) /* 7 instructions/iteration. 32 iterations. */
|
1195 |
|
|
#endif /* __sparclite__ */
|
1196 |
|
|
#endif /* __sparc_v8__ */
|
1197 |
|
|
#endif /* sparc32 */
|
1198 |
|
|
|
1199 |
|
|
#if ((defined (__sparc__) && defined (__arch64__)) || defined (__sparcv9)) \
|
1200 |
|
|
&& W_TYPE_SIZE == 64
|
1201 |
|
|
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
1202 |
|
|
__asm__ ("addcc %r4,%5,%1\n\t" \
|
1203 |
|
|
"add %r2,%3,%0\n\t" \
|
1204 |
|
|
"bcs,a,pn %%xcc, 1f\n\t" \
|
1205 |
|
|
"add %0, 1, %0\n" \
|
1206 |
|
|
"1:" \
|
1207 |
|
|
: "=r" ((UDItype)(sh)), \
|
1208 |
|
|
"=&r" ((UDItype)(sl)) \
|
1209 |
|
|
: "%rJ" ((UDItype)(ah)), \
|
1210 |
|
|
"rI" ((UDItype)(bh)), \
|
1211 |
|
|
"%rJ" ((UDItype)(al)), \
|
1212 |
|
|
"rI" ((UDItype)(bl)) \
|
1213 |
|
|
__CLOBBER_CC)
|
1214 |
|
|
|
1215 |
|
|
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
1216 |
|
|
__asm__ ("subcc %r4,%5,%1\n\t" \
|
1217 |
|
|
"sub %r2,%3,%0\n\t" \
|
1218 |
|
|
"bcs,a,pn %%xcc, 1f\n\t" \
|
1219 |
|
|
"sub %0, 1, %0\n\t" \
|
1220 |
|
|
"1:" \
|
1221 |
|
|
: "=r" ((UDItype)(sh)), \
|
1222 |
|
|
"=&r" ((UDItype)(sl)) \
|
1223 |
|
|
: "rJ" ((UDItype)(ah)), \
|
1224 |
|
|
"rI" ((UDItype)(bh)), \
|
1225 |
|
|
"rJ" ((UDItype)(al)), \
|
1226 |
|
|
"rI" ((UDItype)(bl)) \
|
1227 |
|
|
__CLOBBER_CC)
|
1228 |
|
|
|
1229 |
|
|
#define umul_ppmm(wh, wl, u, v) \
|
1230 |
|
|
do { \
|
1231 |
|
|
UDItype tmp1, tmp2, tmp3, tmp4; \
|
1232 |
|
|
__asm__ __volatile__ ( \
|
1233 |
|
|
"srl %7,0,%3\n\t" \
|
1234 |
|
|
"mulx %3,%6,%1\n\t" \
|
1235 |
|
|
"srlx %6,32,%2\n\t" \
|
1236 |
|
|
"mulx %2,%3,%4\n\t" \
|
1237 |
|
|
"sllx %4,32,%5\n\t" \
|
1238 |
|
|
"srl %6,0,%3\n\t" \
|
1239 |
|
|
"sub %1,%5,%5\n\t" \
|
1240 |
|
|
"srlx %5,32,%5\n\t" \
|
1241 |
|
|
"addcc %4,%5,%4\n\t" \
|
1242 |
|
|
"srlx %7,32,%5\n\t" \
|
1243 |
|
|
"mulx %3,%5,%3\n\t" \
|
1244 |
|
|
"mulx %2,%5,%5\n\t" \
|
1245 |
|
|
"sethi %%hi(0x80000000),%2\n\t" \
|
1246 |
|
|
"addcc %4,%3,%4\n\t" \
|
1247 |
|
|
"srlx %4,32,%4\n\t" \
|
1248 |
|
|
"add %2,%2,%2\n\t" \
|
1249 |
|
|
"movcc %%xcc,%%g0,%2\n\t" \
|
1250 |
|
|
"addcc %5,%4,%5\n\t" \
|
1251 |
|
|
"sllx %3,32,%3\n\t" \
|
1252 |
|
|
"add %1,%3,%1\n\t" \
|
1253 |
|
|
"add %5,%2,%0" \
|
1254 |
|
|
: "=r" ((UDItype)(wh)), \
|
1255 |
|
|
"=&r" ((UDItype)(wl)), \
|
1256 |
|
|
"=&r" (tmp1), "=&r" (tmp2), "=&r" (tmp3), "=&r" (tmp4) \
|
1257 |
|
|
: "r" ((UDItype)(u)), \
|
1258 |
|
|
"r" ((UDItype)(v)) \
|
1259 |
|
|
__CLOBBER_CC); \
|
1260 |
|
|
} while (0)
|
1261 |
|
|
#define UMUL_TIME 96
|
1262 |
|
|
#define UDIV_TIME 230
|
1263 |
|
|
#endif /* sparc64 */
|
1264 |
|
|
|
1265 |
|
|
#if defined (__vax__) && W_TYPE_SIZE == 32
|
1266 |
|
|
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
1267 |
|
|
__asm__ ("addl2 %5,%1\n\tadwc %3,%0" \
|
1268 |
|
|
: "=g" ((USItype) (sh)), \
|
1269 |
|
|
"=&g" ((USItype) (sl)) \
|
1270 |
|
|
: "%0" ((USItype) (ah)), \
|
1271 |
|
|
"g" ((USItype) (bh)), \
|
1272 |
|
|
"%1" ((USItype) (al)), \
|
1273 |
|
|
"g" ((USItype) (bl)))
|
1274 |
|
|
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
1275 |
|
|
__asm__ ("subl2 %5,%1\n\tsbwc %3,%0" \
|
1276 |
|
|
: "=g" ((USItype) (sh)), \
|
1277 |
|
|
"=&g" ((USItype) (sl)) \
|
1278 |
|
|
: "0" ((USItype) (ah)), \
|
1279 |
|
|
"g" ((USItype) (bh)), \
|
1280 |
|
|
"1" ((USItype) (al)), \
|
1281 |
|
|
"g" ((USItype) (bl)))
|
1282 |
|
|
#define umul_ppmm(xh, xl, m0, m1) \
|
1283 |
|
|
do { \
|
1284 |
|
|
union { \
|
1285 |
|
|
UDItype __ll; \
|
1286 |
|
|
struct {USItype __l, __h;} __i; \
|
1287 |
|
|
} __xx; \
|
1288 |
|
|
USItype __m0 = (m0), __m1 = (m1); \
|
1289 |
|
|
__asm__ ("emul %1,%2,$0,%0" \
|
1290 |
|
|
: "=r" (__xx.__ll) \
|
1291 |
|
|
: "g" (__m0), \
|
1292 |
|
|
"g" (__m1)); \
|
1293 |
|
|
(xh) = __xx.__i.__h; \
|
1294 |
|
|
(xl) = __xx.__i.__l; \
|
1295 |
|
|
(xh) += ((((SItype) __m0 >> 31) & __m1) \
|
1296 |
|
|
+ (((SItype) __m1 >> 31) & __m0)); \
|
1297 |
|
|
} while (0)
|
1298 |
|
|
#define sdiv_qrnnd(q, r, n1, n0, d) \
|
1299 |
|
|
do { \
|
1300 |
|
|
union {DItype __ll; \
|
1301 |
|
|
struct {SItype __l, __h;} __i; \
|
1302 |
|
|
} __xx; \
|
1303 |
|
|
__xx.__i.__h = n1; __xx.__i.__l = n0; \
|
1304 |
|
|
__asm__ ("ediv %3,%2,%0,%1" \
|
1305 |
|
|
: "=g" (q), "=g" (r) \
|
1306 |
|
|
: "g" (__xx.__ll), "g" (d)); \
|
1307 |
|
|
} while (0)
|
1308 |
|
|
#endif /* __vax__ */
|
1309 |
|
|
|
1310 |
|
|
#if defined (__xtensa__) && W_TYPE_SIZE == 32
|
1311 |
|
|
/* This code is not Xtensa-configuration-specific, so rely on the compiler
|
1312 |
|
|
to expand builtin functions depending on what configuration features
|
1313 |
|
|
are available. This avoids library calls when the operation can be
|
1314 |
|
|
performed in-line. */
|
1315 |
|
|
#define umul_ppmm(w1, w0, u, v) \
|
1316 |
|
|
do { \
|
1317 |
|
|
DWunion __w; \
|
1318 |
|
|
__w.ll = __builtin_umulsidi3 (u, v); \
|
1319 |
|
|
w1 = __w.s.high; \
|
1320 |
|
|
w0 = __w.s.low; \
|
1321 |
|
|
} while (0)
|
1322 |
|
|
#define __umulsidi3(u, v) __builtin_umulsidi3 (u, v)
|
1323 |
|
|
#define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clz (X))
|
1324 |
|
|
#define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctz (X))
|
1325 |
|
|
#endif /* __xtensa__ */
|
1326 |
|
|
|
1327 |
|
|
#if defined xstormy16
|
1328 |
|
|
extern UHItype __stormy16_count_leading_zeros (UHItype);
|
1329 |
|
|
#define count_leading_zeros(count, x) \
|
1330 |
|
|
do \
|
1331 |
|
|
{ \
|
1332 |
|
|
UHItype size; \
|
1333 |
|
|
\
|
1334 |
|
|
/* We assume that W_TYPE_SIZE is a multiple of 16... */ \
|
1335 |
|
|
for ((count) = 0, size = W_TYPE_SIZE; size; size -= 16) \
|
1336 |
|
|
{ \
|
1337 |
|
|
UHItype c; \
|
1338 |
|
|
\
|
1339 |
|
|
c = __clzhi2 ((x) >> (size - 16)); \
|
1340 |
|
|
(count) += c; \
|
1341 |
|
|
if (c != 16) \
|
1342 |
|
|
break; \
|
1343 |
|
|
} \
|
1344 |
|
|
} \
|
1345 |
|
|
while (0)
|
1346 |
|
|
#define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
|
1347 |
|
|
#endif
|
1348 |
|
|
|
1349 |
|
|
#if defined (__z8000__) && W_TYPE_SIZE == 16
|
1350 |
|
|
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
1351 |
|
|
__asm__ ("add %H1,%H5\n\tadc %H0,%H3" \
|
1352 |
|
|
: "=r" ((unsigned int)(sh)), \
|
1353 |
|
|
"=&r" ((unsigned int)(sl)) \
|
1354 |
|
|
: "%0" ((unsigned int)(ah)), \
|
1355 |
|
|
"r" ((unsigned int)(bh)), \
|
1356 |
|
|
"%1" ((unsigned int)(al)), \
|
1357 |
|
|
"rQR" ((unsigned int)(bl)))
|
1358 |
|
|
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
1359 |
|
|
__asm__ ("sub %H1,%H5\n\tsbc %H0,%H3" \
|
1360 |
|
|
: "=r" ((unsigned int)(sh)), \
|
1361 |
|
|
"=&r" ((unsigned int)(sl)) \
|
1362 |
|
|
: "0" ((unsigned int)(ah)), \
|
1363 |
|
|
"r" ((unsigned int)(bh)), \
|
1364 |
|
|
"1" ((unsigned int)(al)), \
|
1365 |
|
|
"rQR" ((unsigned int)(bl)))
|
1366 |
|
|
#define umul_ppmm(xh, xl, m0, m1) \
|
1367 |
|
|
do { \
|
1368 |
|
|
union {long int __ll; \
|
1369 |
|
|
struct {unsigned int __h, __l;} __i; \
|
1370 |
|
|
} __xx; \
|
1371 |
|
|
unsigned int __m0 = (m0), __m1 = (m1); \
|
1372 |
|
|
__asm__ ("mult %S0,%H3" \
|
1373 |
|
|
: "=r" (__xx.__i.__h), \
|
1374 |
|
|
"=r" (__xx.__i.__l) \
|
1375 |
|
|
: "%1" (__m0), \
|
1376 |
|
|
"rQR" (__m1)); \
|
1377 |
|
|
(xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \
|
1378 |
|
|
(xh) += ((((signed int) __m0 >> 15) & __m1) \
|
1379 |
|
|
+ (((signed int) __m1 >> 15) & __m0)); \
|
1380 |
|
|
} while (0)
|
1381 |
|
|
#endif /* __z8000__ */
|
1382 |
|
|
|
1383 |
|
|
#endif /* __GNUC__ */
|
1384 |
|
|
|
1385 |
|
|
/* If this machine has no inline assembler, use C macros. */
|
1386 |
|
|
|
1387 |
|
|
#if !defined (add_ssaaaa)
|
1388 |
|
|
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
1389 |
|
|
do { \
|
1390 |
|
|
UWtype __x; \
|
1391 |
|
|
__x = (al) + (bl); \
|
1392 |
|
|
(sh) = (ah) + (bh) + (__x < (al)); \
|
1393 |
|
|
(sl) = __x; \
|
1394 |
|
|
} while (0)
|
1395 |
|
|
#endif
|
1396 |
|
|
|
1397 |
|
|
#if !defined (sub_ddmmss)
|
1398 |
|
|
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
1399 |
|
|
do { \
|
1400 |
|
|
UWtype __x; \
|
1401 |
|
|
__x = (al) - (bl); \
|
1402 |
|
|
(sh) = (ah) - (bh) - (__x > (al)); \
|
1403 |
|
|
(sl) = __x; \
|
1404 |
|
|
} while (0)
|
1405 |
|
|
#endif
|
1406 |
|
|
|
1407 |
|
|
/* If we lack umul_ppmm but have smul_ppmm, define umul_ppmm in terms of
|
1408 |
|
|
smul_ppmm. */
|
1409 |
|
|
#if !defined (umul_ppmm) && defined (smul_ppmm)
|
1410 |
|
|
#define umul_ppmm(w1, w0, u, v) \
|
1411 |
|
|
do { \
|
1412 |
|
|
UWtype __w1; \
|
1413 |
|
|
UWtype __xm0 = (u), __xm1 = (v); \
|
1414 |
|
|
smul_ppmm (__w1, w0, __xm0, __xm1); \
|
1415 |
|
|
(w1) = __w1 + (-(__xm0 >> (W_TYPE_SIZE - 1)) & __xm1) \
|
1416 |
|
|
+ (-(__xm1 >> (W_TYPE_SIZE - 1)) & __xm0); \
|
1417 |
|
|
} while (0)
|
1418 |
|
|
#endif
|
1419 |
|
|
|
1420 |
|
|
/* If we still don't have umul_ppmm, define it using plain C. */
|
1421 |
|
|
#if !defined (umul_ppmm)
|
1422 |
|
|
#define umul_ppmm(w1, w0, u, v) \
|
1423 |
|
|
do { \
|
1424 |
|
|
UWtype __x0, __x1, __x2, __x3; \
|
1425 |
|
|
UHWtype __ul, __vl, __uh, __vh; \
|
1426 |
|
|
\
|
1427 |
|
|
__ul = __ll_lowpart (u); \
|
1428 |
|
|
__uh = __ll_highpart (u); \
|
1429 |
|
|
__vl = __ll_lowpart (v); \
|
1430 |
|
|
__vh = __ll_highpart (v); \
|
1431 |
|
|
\
|
1432 |
|
|
__x0 = (UWtype) __ul * __vl; \
|
1433 |
|
|
__x1 = (UWtype) __ul * __vh; \
|
1434 |
|
|
__x2 = (UWtype) __uh * __vl; \
|
1435 |
|
|
__x3 = (UWtype) __uh * __vh; \
|
1436 |
|
|
\
|
1437 |
|
|
__x1 += __ll_highpart (__x0);/* this can't give carry */ \
|
1438 |
|
|
__x1 += __x2; /* but this indeed can */ \
|
1439 |
|
|
if (__x1 < __x2) /* did we get it? */ \
|
1440 |
|
|
__x3 += __ll_B; /* yes, add it in the proper pos. */ \
|
1441 |
|
|
\
|
1442 |
|
|
(w1) = __x3 + __ll_highpart (__x1); \
|
1443 |
|
|
(w0) = __ll_lowpart (__x1) * __ll_B + __ll_lowpart (__x0); \
|
1444 |
|
|
} while (0)
|
1445 |
|
|
#endif
|
1446 |
|
|
|
1447 |
|
|
#if !defined (__umulsidi3)
|
1448 |
|
|
#define __umulsidi3(u, v) \
|
1449 |
|
|
({DWunion __w; \
|
1450 |
|
|
umul_ppmm (__w.s.high, __w.s.low, u, v); \
|
1451 |
|
|
__w.ll; })
|
1452 |
|
|
#endif
|
1453 |
|
|
|
1454 |
|
|
/* Define this unconditionally, so it can be used for debugging. */
|
1455 |
|
|
#define __udiv_qrnnd_c(q, r, n1, n0, d) \
|
1456 |
|
|
do { \
|
1457 |
|
|
UWtype __d1, __d0, __q1, __q0; \
|
1458 |
|
|
UWtype __r1, __r0, __m; \
|
1459 |
|
|
__d1 = __ll_highpart (d); \
|
1460 |
|
|
__d0 = __ll_lowpart (d); \
|
1461 |
|
|
\
|
1462 |
|
|
__r1 = (n1) % __d1; \
|
1463 |
|
|
__q1 = (n1) / __d1; \
|
1464 |
|
|
__m = (UWtype) __q1 * __d0; \
|
1465 |
|
|
__r1 = __r1 * __ll_B | __ll_highpart (n0); \
|
1466 |
|
|
if (__r1 < __m) \
|
1467 |
|
|
{ \
|
1468 |
|
|
__q1--, __r1 += (d); \
|
1469 |
|
|
if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\
|
1470 |
|
|
if (__r1 < __m) \
|
1471 |
|
|
__q1--, __r1 += (d); \
|
1472 |
|
|
} \
|
1473 |
|
|
__r1 -= __m; \
|
1474 |
|
|
\
|
1475 |
|
|
__r0 = __r1 % __d1; \
|
1476 |
|
|
__q0 = __r1 / __d1; \
|
1477 |
|
|
__m = (UWtype) __q0 * __d0; \
|
1478 |
|
|
__r0 = __r0 * __ll_B | __ll_lowpart (n0); \
|
1479 |
|
|
if (__r0 < __m) \
|
1480 |
|
|
{ \
|
1481 |
|
|
__q0--, __r0 += (d); \
|
1482 |
|
|
if (__r0 >= (d)) \
|
1483 |
|
|
if (__r0 < __m) \
|
1484 |
|
|
__q0--, __r0 += (d); \
|
1485 |
|
|
} \
|
1486 |
|
|
__r0 -= __m; \
|
1487 |
|
|
\
|
1488 |
|
|
(q) = (UWtype) __q1 * __ll_B | __q0; \
|
1489 |
|
|
(r) = __r0; \
|
1490 |
|
|
} while (0)
|
1491 |
|
|
|
1492 |
|
|
/* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through
|
1493 |
|
|
__udiv_w_sdiv (defined in libgcc or elsewhere). */
|
1494 |
|
|
#if !defined (udiv_qrnnd) && defined (sdiv_qrnnd)
|
1495 |
|
|
#define udiv_qrnnd(q, r, nh, nl, d) \
|
1496 |
|
|
do { \
|
1497 |
|
|
USItype __r; \
|
1498 |
|
|
(q) = __udiv_w_sdiv (&__r, nh, nl, d); \
|
1499 |
|
|
(r) = __r; \
|
1500 |
|
|
} while (0)
|
1501 |
|
|
#endif
|
1502 |
|
|
|
1503 |
|
|
/* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c. */
|
1504 |
|
|
#if !defined (udiv_qrnnd)
|
1505 |
|
|
#define UDIV_NEEDS_NORMALIZATION 1
|
1506 |
|
|
#define udiv_qrnnd __udiv_qrnnd_c
|
1507 |
|
|
#endif
|
1508 |
|
|
|
1509 |
|
|
#if !defined (count_leading_zeros)
|
1510 |
|
|
#define count_leading_zeros(count, x) \
|
1511 |
|
|
do { \
|
1512 |
|
|
UWtype __xr = (x); \
|
1513 |
|
|
UWtype __a; \
|
1514 |
|
|
\
|
1515 |
|
|
if (W_TYPE_SIZE <= 32) \
|
1516 |
|
|
{ \
|
1517 |
|
|
__a = __xr < ((UWtype)1<<2*__BITS4) \
|
1518 |
|
|
? (__xr < ((UWtype)1<<__BITS4) ? 0 : __BITS4) \
|
1519 |
|
|
: (__xr < ((UWtype)1<<3*__BITS4) ? 2*__BITS4 : 3*__BITS4); \
|
1520 |
|
|
} \
|
1521 |
|
|
else \
|
1522 |
|
|
{ \
|
1523 |
|
|
for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8) \
|
1524 |
|
|
if (((__xr >> __a) & 0xff) != 0) \
|
1525 |
|
|
break; \
|
1526 |
|
|
} \
|
1527 |
|
|
\
|
1528 |
|
|
(count) = W_TYPE_SIZE - (__clz_tab[__xr >> __a] + __a); \
|
1529 |
|
|
} while (0)
|
1530 |
|
|
#define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
|
1531 |
|
|
#endif
|
1532 |
|
|
|
1533 |
|
|
#if !defined (count_trailing_zeros)
|
1534 |
|
|
/* Define count_trailing_zeros using count_leading_zeros. The latter might be
|
1535 |
|
|
defined in asm, but if it is not, the C version above is good enough. */
|
1536 |
|
|
#define count_trailing_zeros(count, x) \
|
1537 |
|
|
do { \
|
1538 |
|
|
UWtype __ctz_x = (x); \
|
1539 |
|
|
UWtype __ctz_c; \
|
1540 |
|
|
count_leading_zeros (__ctz_c, __ctz_x & -__ctz_x); \
|
1541 |
|
|
(count) = W_TYPE_SIZE - 1 - __ctz_c; \
|
1542 |
|
|
} while (0)
|
1543 |
|
|
#endif
|
1544 |
|
|
|
1545 |
|
|
#ifndef UDIV_NEEDS_NORMALIZATION
|
1546 |
|
|
#define UDIV_NEEDS_NORMALIZATION 0
|
1547 |
|
|
#endif
|