OpenCores
URL https://opencores.org/ocsvn/or1k_soc_on_altera_embedded_dev_kit/or1k_soc_on_altera_embedded_dev_kit/trunk

Subversion Repositories or1k_soc_on_altera_embedded_dev_kit

[/] [or1k_soc_on_altera_embedded_dev_kit/] [trunk/] [linux-2.6/] [linux-2.6.24/] [arch/] [m68k/] [math-emu/] [multi_arith.h] - Blame information for rev 3

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 3 xianfeng
/* multi_arith.h: multi-precision integer arithmetic functions, needed
2
   to do extended-precision floating point.
3
 
4
   (c) 1998 David Huggins-Daines.
5
 
6
   Somewhat based on arch/alpha/math-emu/ieee-math.c, which is (c)
7
   David Mosberger-Tang.
8
 
9
   You may copy, modify, and redistribute this file under the terms of
10
   the GNU General Public License, version 2, or any later version, at
11
   your convenience. */
12
 
13
/* Note:
14
 
15
   These are not general multi-precision math routines.  Rather, they
16
   implement the subset of integer arithmetic that we need in order to
17
   multiply, divide, and normalize 128-bit unsigned mantissae.  */
18
 
19
#ifndef MULTI_ARITH_H
20
#define MULTI_ARITH_H
21
 
22
#if 0   /* old code... */
23
 
24
/* Unsigned only, because we don't need signs to multiply and divide. */
25
typedef unsigned int int128[4];
26
 
27
/* Word order */
28
enum {
29
        MSW128,
30
        NMSW128,
31
        NLSW128,
32
        LSW128
33
};
34
 
35
/* big-endian */
36
#define LO_WORD(ll) (((unsigned int *) &ll)[1])
37
#define HI_WORD(ll) (((unsigned int *) &ll)[0])
38
 
39
/* Convenience functions to stuff various integer values into int128s */
40
 
41
static inline void zero128(int128 a)
42
{
43
        a[LSW128] = a[NLSW128] = a[NMSW128] = a[MSW128] = 0;
44
}
45
 
46
/* Human-readable word order in the arguments */
47
static inline void set128(unsigned int i3, unsigned int i2, unsigned int i1,
48
                          unsigned int i0, int128 a)
49
{
50
        a[LSW128] = i0;
51
        a[NLSW128] = i1;
52
        a[NMSW128] = i2;
53
        a[MSW128] = i3;
54
}
55
 
56
/* Convenience functions (for testing as well) */
57
static inline void int64_to_128(unsigned long long src, int128 dest)
58
{
59
        dest[LSW128] = (unsigned int) src;
60
        dest[NLSW128] = src >> 32;
61
        dest[NMSW128] = dest[MSW128] = 0;
62
}
63
 
64
static inline void int128_to_64(const int128 src, unsigned long long *dest)
65
{
66
        *dest = src[LSW128] | (long long) src[NLSW128] << 32;
67
}
68
 
69
static inline void put_i128(const int128 a)
70
{
71
        printk("%08x %08x %08x %08x\n", a[MSW128], a[NMSW128],
72
               a[NLSW128], a[LSW128]);
73
}
74
 
75
/* Internal shifters:
76
 
77
   Note that these are only good for 0 < count < 32.
78
 */
79
 
80
static inline void _lsl128(unsigned int count, int128 a)
81
{
82
        a[MSW128] = (a[MSW128] << count) | (a[NMSW128] >> (32 - count));
83
        a[NMSW128] = (a[NMSW128] << count) | (a[NLSW128] >> (32 - count));
84
        a[NLSW128] = (a[NLSW128] << count) | (a[LSW128] >> (32 - count));
85
        a[LSW128] <<= count;
86
}
87
 
88
static inline void _lsr128(unsigned int count, int128 a)
89
{
90
        a[LSW128] = (a[LSW128] >> count) | (a[NLSW128] << (32 - count));
91
        a[NLSW128] = (a[NLSW128] >> count) | (a[NMSW128] << (32 - count));
92
        a[NMSW128] = (a[NMSW128] >> count) | (a[MSW128] << (32 - count));
93
        a[MSW128] >>= count;
94
}
95
 
96
/* Should be faster, one would hope */
97
 
98
static inline void lslone128(int128 a)
99
{
100
        asm volatile ("lsl.l #1,%0\n"
101
                      "roxl.l #1,%1\n"
102
                      "roxl.l #1,%2\n"
103
                      "roxl.l #1,%3\n"
104
                      :
105
                      "=d" (a[LSW128]),
106
                      "=d"(a[NLSW128]),
107
                      "=d"(a[NMSW128]),
108
                      "=d"(a[MSW128])
109
                      :
110
                      "0"(a[LSW128]),
111
                      "1"(a[NLSW128]),
112
                      "2"(a[NMSW128]),
113
                      "3"(a[MSW128]));
114
}
115
 
116
static inline void lsrone128(int128 a)
117
{
118
        asm volatile ("lsr.l #1,%0\n"
119
                      "roxr.l #1,%1\n"
120
                      "roxr.l #1,%2\n"
121
                      "roxr.l #1,%3\n"
122
                      :
123
                      "=d" (a[MSW128]),
124
                      "=d"(a[NMSW128]),
125
                      "=d"(a[NLSW128]),
126
                      "=d"(a[LSW128])
127
                      :
128
                      "0"(a[MSW128]),
129
                      "1"(a[NMSW128]),
130
                      "2"(a[NLSW128]),
131
                      "3"(a[LSW128]));
132
}
133
 
134
/* Generalized 128-bit shifters:
135
 
136
   These bit-shift to a multiple of 32, then move whole longwords.  */
137
 
138
static inline void lsl128(unsigned int count, int128 a)
139
{
140
        int wordcount, i;
141
 
142
        if (count % 32)
143
                _lsl128(count % 32, a);
144
 
145
        if (0 == (wordcount = count / 32))
146
                return;
147
 
148
        /* argh, gak, endian-sensitive */
149
        for (i = 0; i < 4 - wordcount; i++) {
150
                a[i] = a[i + wordcount];
151
        }
152
        for (i = 3; i >= 4 - wordcount; --i) {
153
                a[i] = 0;
154
        }
155
}
156
 
157
static inline void lsr128(unsigned int count, int128 a)
158
{
159
        int wordcount, i;
160
 
161
        if (count % 32)
162
                _lsr128(count % 32, a);
163
 
164
        if (0 == (wordcount = count / 32))
165
                return;
166
 
167
        for (i = 3; i >= wordcount; --i) {
168
                a[i] = a[i - wordcount];
169
        }
170
        for (i = 0; i < wordcount; i++) {
171
                a[i] = 0;
172
        }
173
}
174
 
175
static inline int orl128(int a, int128 b)
176
{
177
        b[LSW128] |= a;
178
}
179
 
180
static inline int btsthi128(const int128 a)
181
{
182
        return a[MSW128] & 0x80000000;
183
}
184
 
185
/* test bits (numbered from 0 = LSB) up to and including "top" */
186
static inline int bftestlo128(int top, const int128 a)
187
{
188
        int r = 0;
189
 
190
        if (top > 31)
191
                r |= a[LSW128];
192
        if (top > 63)
193
                r |= a[NLSW128];
194
        if (top > 95)
195
                r |= a[NMSW128];
196
 
197
        r |= a[3 - (top / 32)] & ((1 << (top % 32 + 1)) - 1);
198
 
199
        return (r != 0);
200
}
201
 
202
/* Aargh.  We need these because GCC is broken */
203
/* FIXME: do them in assembly, for goodness' sake! */
204
static inline void mask64(int pos, unsigned long long *mask)
205
{
206
        *mask = 0;
207
 
208
        if (pos < 32) {
209
                LO_WORD(*mask) = (1 << pos) - 1;
210
                return;
211
        }
212
        LO_WORD(*mask) = -1;
213
        HI_WORD(*mask) = (1 << (pos - 32)) - 1;
214
}
215
 
216
static inline void bset64(int pos, unsigned long long *dest)
217
{
218
        /* This conditional will be optimized away.  Thanks, GCC! */
219
        if (pos < 32)
220
                asm volatile ("bset %1,%0":"=m"
221
                              (LO_WORD(*dest)):"id"(pos));
222
        else
223
                asm volatile ("bset %1,%0":"=m"
224
                              (HI_WORD(*dest)):"id"(pos - 32));
225
}
226
 
227
static inline int btst64(int pos, unsigned long long dest)
228
{
229
        if (pos < 32)
230
                return (0 != (LO_WORD(dest) & (1 << pos)));
231
        else
232
                return (0 != (HI_WORD(dest) & (1 << (pos - 32))));
233
}
234
 
235
static inline void lsl64(int count, unsigned long long *dest)
236
{
237
        if (count < 32) {
238
                HI_WORD(*dest) = (HI_WORD(*dest) << count)
239
                    | (LO_WORD(*dest) >> count);
240
                LO_WORD(*dest) <<= count;
241
                return;
242
        }
243
        count -= 32;
244
        HI_WORD(*dest) = LO_WORD(*dest) << count;
245
        LO_WORD(*dest) = 0;
246
}
247
 
248
static inline void lsr64(int count, unsigned long long *dest)
249
{
250
        if (count < 32) {
251
                LO_WORD(*dest) = (LO_WORD(*dest) >> count)
252
                    | (HI_WORD(*dest) << (32 - count));
253
                HI_WORD(*dest) >>= count;
254
                return;
255
        }
256
        count -= 32;
257
        LO_WORD(*dest) = HI_WORD(*dest) >> count;
258
        HI_WORD(*dest) = 0;
259
}
260
#endif
261
 
262
static inline void fp_denormalize(struct fp_ext *reg, unsigned int cnt)
263
{
264
        reg->exp += cnt;
265
 
266
        switch (cnt) {
267
        case 0 ... 8:
268
                reg->lowmant = reg->mant.m32[1] << (8 - cnt);
269
                reg->mant.m32[1] = (reg->mant.m32[1] >> cnt) |
270
                                   (reg->mant.m32[0] << (32 - cnt));
271
                reg->mant.m32[0] = reg->mant.m32[0] >> cnt;
272
                break;
273
        case 9 ... 32:
274
                reg->lowmant = reg->mant.m32[1] >> (cnt - 8);
275
                if (reg->mant.m32[1] << (40 - cnt))
276
                        reg->lowmant |= 1;
277
                reg->mant.m32[1] = (reg->mant.m32[1] >> cnt) |
278
                                   (reg->mant.m32[0] << (32 - cnt));
279
                reg->mant.m32[0] = reg->mant.m32[0] >> cnt;
280
                break;
281
        case 33 ... 39:
282
                asm volatile ("bfextu %1{%2,#8},%0" : "=d" (reg->lowmant)
283
                        : "m" (reg->mant.m32[0]), "d" (64 - cnt));
284
                if (reg->mant.m32[1] << (40 - cnt))
285
                        reg->lowmant |= 1;
286
                reg->mant.m32[1] = reg->mant.m32[0] >> (cnt - 32);
287
                reg->mant.m32[0] = 0;
288
                break;
289
        case 40 ... 71:
290
                reg->lowmant = reg->mant.m32[0] >> (cnt - 40);
291
                if ((reg->mant.m32[0] << (72 - cnt)) || reg->mant.m32[1])
292
                        reg->lowmant |= 1;
293
                reg->mant.m32[1] = reg->mant.m32[0] >> (cnt - 32);
294
                reg->mant.m32[0] = 0;
295
                break;
296
        default:
297
                reg->lowmant = reg->mant.m32[0] || reg->mant.m32[1];
298
                reg->mant.m32[0] = 0;
299
                reg->mant.m32[1] = 0;
300
                break;
301
        }
302
}
303
 
304
static inline int fp_overnormalize(struct fp_ext *reg)
305
{
306
        int shift;
307
 
308
        if (reg->mant.m32[0]) {
309
                asm ("bfffo %1{#0,#32},%0" : "=d" (shift) : "dm" (reg->mant.m32[0]));
310
                reg->mant.m32[0] = (reg->mant.m32[0] << shift) | (reg->mant.m32[1] >> (32 - shift));
311
                reg->mant.m32[1] = (reg->mant.m32[1] << shift);
312
        } else {
313
                asm ("bfffo %1{#0,#32},%0" : "=d" (shift) : "dm" (reg->mant.m32[1]));
314
                reg->mant.m32[0] = (reg->mant.m32[1] << shift);
315
                reg->mant.m32[1] = 0;
316
                shift += 32;
317
        }
318
 
319
        return shift;
320
}
321
 
322
static inline int fp_addmant(struct fp_ext *dest, struct fp_ext *src)
323
{
324
        int carry;
325
 
326
        /* we assume here, gcc only insert move and a clr instr */
327
        asm volatile ("add.b %1,%0" : "=d,g" (dest->lowmant)
328
                : "g,d" (src->lowmant), "0,0" (dest->lowmant));
329
        asm volatile ("addx.l %1,%0" : "=d" (dest->mant.m32[1])
330
                : "d" (src->mant.m32[1]), "0" (dest->mant.m32[1]));
331
        asm volatile ("addx.l %1,%0" : "=d" (dest->mant.m32[0])
332
                : "d" (src->mant.m32[0]), "0" (dest->mant.m32[0]));
333
        asm volatile ("addx.l %0,%0" : "=d" (carry) : "0" (0));
334
 
335
        return carry;
336
}
337
 
338
static inline int fp_addcarry(struct fp_ext *reg)
339
{
340
        if (++reg->exp == 0x7fff) {
341
                if (reg->mant.m64)
342
                        fp_set_sr(FPSR_EXC_INEX2);
343
                reg->mant.m64 = 0;
344
                fp_set_sr(FPSR_EXC_OVFL);
345
                return 0;
346
        }
347
        reg->lowmant = (reg->mant.m32[1] << 7) | (reg->lowmant ? 1 : 0);
348
        reg->mant.m32[1] = (reg->mant.m32[1] >> 1) |
349
                           (reg->mant.m32[0] << 31);
350
        reg->mant.m32[0] = (reg->mant.m32[0] >> 1) | 0x80000000;
351
 
352
        return 1;
353
}
354
 
355
static inline void fp_submant(struct fp_ext *dest, struct fp_ext *src1,
356
                              struct fp_ext *src2)
357
{
358
        /* we assume here, gcc only insert move and a clr instr */
359
        asm volatile ("sub.b %1,%0" : "=d,g" (dest->lowmant)
360
                : "g,d" (src2->lowmant), "0,0" (src1->lowmant));
361
        asm volatile ("subx.l %1,%0" : "=d" (dest->mant.m32[1])
362
                : "d" (src2->mant.m32[1]), "0" (src1->mant.m32[1]));
363
        asm volatile ("subx.l %1,%0" : "=d" (dest->mant.m32[0])
364
                : "d" (src2->mant.m32[0]), "0" (src1->mant.m32[0]));
365
}
366
 
367
#define fp_mul64(desth, destl, src1, src2) ({                           \
368
        asm ("mulu.l %2,%1:%0" : "=d" (destl), "=d" (desth)             \
369
                : "dm" (src1), "0" (src2));                              \
370
})
371
#define fp_div64(quot, rem, srch, srcl, div)                            \
372
        asm ("divu.l %2,%1:%0" : "=d" (quot), "=d" (rem)                \
373
                : "dm" (div), "1" (srch), "0" (srcl))
374
#define fp_add64(dest1, dest2, src1, src2) ({                           \
375
        asm ("add.l %1,%0" : "=d,dm" (dest2)                            \
376
                : "dm,d" (src2), "0,0" (dest2));                        \
377
        asm ("addx.l %1,%0" : "=d" (dest1)                              \
378
                : "d" (src1), "0" (dest1));                              \
379
})
380
#define fp_addx96(dest, src) ({                                         \
381
        /* we assume here, gcc only insert move and a clr instr */      \
382
        asm volatile ("add.l %1,%0" : "=d,g" (dest->m32[2])             \
383
                : "g,d" (temp.m32[1]), "0,0" (dest->m32[2]));           \
384
        asm volatile ("addx.l %1,%0" : "=d" (dest->m32[1])              \
385
                : "d" (temp.m32[0]), "0" (dest->m32[1]));         \
386
        asm volatile ("addx.l %1,%0" : "=d" (dest->m32[0])               \
387
                : "d" (0), "0" (dest->m32[0]));                            \
388
})
389
#define fp_sub64(dest, src) ({                                          \
390
        asm ("sub.l %1,%0" : "=d,dm" (dest.m32[1])                      \
391
                : "dm,d" (src.m32[1]), "0,0" (dest.m32[1]));            \
392
        asm ("subx.l %1,%0" : "=d" (dest.m32[0])                 \
393
                : "d" (src.m32[0]), "0" (dest.m32[0]));                    \
394
})
395
#define fp_sub96c(dest, srch, srcm, srcl) ({                            \
396
        char carry;                                                     \
397
        asm ("sub.l %1,%0" : "=d,dm" (dest.m32[2])                      \
398
                : "dm,d" (srcl), "0,0" (dest.m32[2]));                  \
399
        asm ("subx.l %1,%0" : "=d" (dest.m32[1])                        \
400
                : "d" (srcm), "0" (dest.m32[1]));                        \
401
        asm ("subx.l %2,%1; scs %0" : "=d" (carry), "=d" (dest.m32[0])   \
402
                : "d" (srch), "1" (dest.m32[0]));                        \
403
        carry;                                                          \
404
})
405
 
406
static inline void fp_multiplymant(union fp_mant128 *dest, struct fp_ext *src1,
407
                                   struct fp_ext *src2)
408
{
409
        union fp_mant64 temp;
410
 
411
        fp_mul64(dest->m32[0], dest->m32[1], src1->mant.m32[0], src2->mant.m32[0]);
412
        fp_mul64(dest->m32[2], dest->m32[3], src1->mant.m32[1], src2->mant.m32[1]);
413
 
414
        fp_mul64(temp.m32[0], temp.m32[1], src1->mant.m32[0], src2->mant.m32[1]);
415
        fp_addx96(dest, temp);
416
 
417
        fp_mul64(temp.m32[0], temp.m32[1], src1->mant.m32[1], src2->mant.m32[0]);
418
        fp_addx96(dest, temp);
419
}
420
 
421
static inline void fp_dividemant(union fp_mant128 *dest, struct fp_ext *src,
422
                                 struct fp_ext *div)
423
{
424
        union fp_mant128 tmp;
425
        union fp_mant64 tmp64;
426
        unsigned long *mantp = dest->m32;
427
        unsigned long fix, rem, first, dummy;
428
        int i;
429
 
430
        /* the algorithm below requires dest to be smaller than div,
431
           but both have the high bit set */
432
        if (src->mant.m64 >= div->mant.m64) {
433
                fp_sub64(src->mant, div->mant);
434
                *mantp = 1;
435
        } else
436
                *mantp = 0;
437
        mantp++;
438
 
439
        /* basic idea behind this algorithm: we can't divide two 64bit numbers
440
           (AB/CD) directly, but we can calculate AB/C0, but this means this
441
           quotient is off by C0/CD, so we have to multiply the first result
442
           to fix the result, after that we have nearly the correct result
443
           and only a few corrections are needed. */
444
 
445
        /* C0/CD can be precalculated, but it's an 64bit division again, but
446
           we can make it a bit easier, by dividing first through C so we get
447
           10/1D and now only a single shift and the value fits into 32bit. */
448
        fix = 0x80000000;
449
        dummy = div->mant.m32[1] / div->mant.m32[0] + 1;
450
        dummy = (dummy >> 1) | fix;
451
        fp_div64(fix, dummy, fix, 0, dummy);
452
        fix--;
453
 
454
        for (i = 0; i < 3; i++, mantp++) {
455
                if (src->mant.m32[0] == div->mant.m32[0]) {
456
                        fp_div64(first, rem, 0, src->mant.m32[1], div->mant.m32[0]);
457
 
458
                        fp_mul64(*mantp, dummy, first, fix);
459
                        *mantp += fix;
460
                } else {
461
                        fp_div64(first, rem, src->mant.m32[0], src->mant.m32[1], div->mant.m32[0]);
462
 
463
                        fp_mul64(*mantp, dummy, first, fix);
464
                }
465
 
466
                fp_mul64(tmp.m32[0], tmp.m32[1], div->mant.m32[0], first - *mantp);
467
                fp_add64(tmp.m32[0], tmp.m32[1], 0, rem);
468
                tmp.m32[2] = 0;
469
 
470
                fp_mul64(tmp64.m32[0], tmp64.m32[1], *mantp, div->mant.m32[1]);
471
                fp_sub96c(tmp, 0, tmp64.m32[0], tmp64.m32[1]);
472
 
473
                src->mant.m32[0] = tmp.m32[1];
474
                src->mant.m32[1] = tmp.m32[2];
475
 
476
                while (!fp_sub96c(tmp, 0, div->mant.m32[0], div->mant.m32[1])) {
477
                        src->mant.m32[0] = tmp.m32[1];
478
                        src->mant.m32[1] = tmp.m32[2];
479
                        *mantp += 1;
480
                }
481
        }
482
}
483
 
484
#if 0
485
static inline unsigned int fp_fls128(union fp_mant128 *src)
486
{
487
        unsigned long data;
488
        unsigned int res, off;
489
 
490
        if ((data = src->m32[0]))
491
                off = 0;
492
        else if ((data = src->m32[1]))
493
                off = 32;
494
        else if ((data = src->m32[2]))
495
                off = 64;
496
        else if ((data = src->m32[3]))
497
                off = 96;
498
        else
499
                return 128;
500
 
501
        asm ("bfffo %1{#0,#32},%0" : "=d" (res) : "dm" (data));
502
        return res + off;
503
}
504
 
505
static inline void fp_shiftmant128(union fp_mant128 *src, int shift)
506
{
507
        unsigned long sticky;
508
 
509
        switch (shift) {
510
        case 0:
511
                return;
512
        case 1:
513
                asm volatile ("lsl.l #1,%0"
514
                        : "=d" (src->m32[3]) : "0" (src->m32[3]));
515
                asm volatile ("roxl.l #1,%0"
516
                        : "=d" (src->m32[2]) : "0" (src->m32[2]));
517
                asm volatile ("roxl.l #1,%0"
518
                        : "=d" (src->m32[1]) : "0" (src->m32[1]));
519
                asm volatile ("roxl.l #1,%0"
520
                        : "=d" (src->m32[0]) : "0" (src->m32[0]));
521
                return;
522
        case 2 ... 31:
523
                src->m32[0] = (src->m32[0] << shift) | (src->m32[1] >> (32 - shift));
524
                src->m32[1] = (src->m32[1] << shift) | (src->m32[2] >> (32 - shift));
525
                src->m32[2] = (src->m32[2] << shift) | (src->m32[3] >> (32 - shift));
526
                src->m32[3] = (src->m32[3] << shift);
527
                return;
528
        case 32 ... 63:
529
                shift -= 32;
530
                src->m32[0] = (src->m32[1] << shift) | (src->m32[2] >> (32 - shift));
531
                src->m32[1] = (src->m32[2] << shift) | (src->m32[3] >> (32 - shift));
532
                src->m32[2] = (src->m32[3] << shift);
533
                src->m32[3] = 0;
534
                return;
535
        case 64 ... 95:
536
                shift -= 64;
537
                src->m32[0] = (src->m32[2] << shift) | (src->m32[3] >> (32 - shift));
538
                src->m32[1] = (src->m32[3] << shift);
539
                src->m32[2] = src->m32[3] = 0;
540
                return;
541
        case 96 ... 127:
542
                shift -= 96;
543
                src->m32[0] = (src->m32[3] << shift);
544
                src->m32[1] = src->m32[2] = src->m32[3] = 0;
545
                return;
546
        case -31 ... -1:
547
                shift = -shift;
548
                sticky = 0;
549
                if (src->m32[3] << (32 - shift))
550
                        sticky = 1;
551
                src->m32[3] = (src->m32[3] >> shift) | (src->m32[2] << (32 - shift)) | sticky;
552
                src->m32[2] = (src->m32[2] >> shift) | (src->m32[1] << (32 - shift));
553
                src->m32[1] = (src->m32[1] >> shift) | (src->m32[0] << (32 - shift));
554
                src->m32[0] = (src->m32[0] >> shift);
555
                return;
556
        case -63 ... -32:
557
                shift = -shift - 32;
558
                sticky = 0;
559
                if ((src->m32[2] << (32 - shift)) || src->m32[3])
560
                        sticky = 1;
561
                src->m32[3] = (src->m32[2] >> shift) | (src->m32[1] << (32 - shift)) | sticky;
562
                src->m32[2] = (src->m32[1] >> shift) | (src->m32[0] << (32 - shift));
563
                src->m32[1] = (src->m32[0] >> shift);
564
                src->m32[0] = 0;
565
                return;
566
        case -95 ... -64:
567
                shift = -shift - 64;
568
                sticky = 0;
569
                if ((src->m32[1] << (32 - shift)) || src->m32[2] || src->m32[3])
570
                        sticky = 1;
571
                src->m32[3] = (src->m32[1] >> shift) | (src->m32[0] << (32 - shift)) | sticky;
572
                src->m32[2] = (src->m32[0] >> shift);
573
                src->m32[1] = src->m32[0] = 0;
574
                return;
575
        case -127 ... -96:
576
                shift = -shift - 96;
577
                sticky = 0;
578
                if ((src->m32[0] << (32 - shift)) || src->m32[1] || src->m32[2] || src->m32[3])
579
                        sticky = 1;
580
                src->m32[3] = (src->m32[0] >> shift) | sticky;
581
                src->m32[2] = src->m32[1] = src->m32[0] = 0;
582
                return;
583
        }
584
 
585
        if (shift < 0 && (src->m32[0] || src->m32[1] || src->m32[2] || src->m32[3]))
586
                src->m32[3] = 1;
587
        else
588
                src->m32[3] = 0;
589
        src->m32[2] = 0;
590
        src->m32[1] = 0;
591
        src->m32[0] = 0;
592
}
593
#endif
594
 
595
static inline void fp_putmant128(struct fp_ext *dest, union fp_mant128 *src,
596
                                 int shift)
597
{
598
        unsigned long tmp;
599
 
600
        switch (shift) {
601
        case 0:
602
                dest->mant.m64 = src->m64[0];
603
                dest->lowmant = src->m32[2] >> 24;
604
                if (src->m32[3] || (src->m32[2] << 8))
605
                        dest->lowmant |= 1;
606
                break;
607
        case 1:
608
                asm volatile ("lsl.l #1,%0"
609
                        : "=d" (tmp) : "0" (src->m32[2]));
610
                asm volatile ("roxl.l #1,%0"
611
                        : "=d" (dest->mant.m32[1]) : "0" (src->m32[1]));
612
                asm volatile ("roxl.l #1,%0"
613
                        : "=d" (dest->mant.m32[0]) : "0" (src->m32[0]));
614
                dest->lowmant = tmp >> 24;
615
                if (src->m32[3] || (tmp << 8))
616
                        dest->lowmant |= 1;
617
                break;
618
        case 31:
619
                asm volatile ("lsr.l #1,%1; roxr.l #1,%0"
620
                        : "=d" (dest->mant.m32[0])
621
                        : "d" (src->m32[0]), "0" (src->m32[1]));
622
                asm volatile ("roxr.l #1,%0"
623
                        : "=d" (dest->mant.m32[1]) : "0" (src->m32[2]));
624
                asm volatile ("roxr.l #1,%0"
625
                        : "=d" (tmp) : "0" (src->m32[3]));
626
                dest->lowmant = tmp >> 24;
627
                if (src->m32[3] << 7)
628
                        dest->lowmant |= 1;
629
                break;
630
        case 32:
631
                dest->mant.m32[0] = src->m32[1];
632
                dest->mant.m32[1] = src->m32[2];
633
                dest->lowmant = src->m32[3] >> 24;
634
                if (src->m32[3] << 8)
635
                        dest->lowmant |= 1;
636
                break;
637
        }
638
}
639
 
640
#if 0 /* old code... */
641
static inline int fls(unsigned int a)
642
{
643
        int r;
644
 
645
        asm volatile ("bfffo %1{#0,#32},%0"
646
                      : "=d" (r) : "md" (a));
647
        return r;
648
}
649
 
650
/* fls = "find last set" (cf. ffs(3)) */
651
static inline int fls128(const int128 a)
652
{
653
        if (a[MSW128])
654
                return fls(a[MSW128]);
655
        if (a[NMSW128])
656
                return fls(a[NMSW128]) + 32;
657
        /* XXX: it probably never gets beyond this point in actual
658
           use, but that's indicative of a more general problem in the
659
           algorithm (i.e. as per the actual 68881 implementation, we
660
           really only need at most 67 bits of precision [plus
661
           overflow]) so I'm not going to fix it. */
662
        if (a[NLSW128])
663
                return fls(a[NLSW128]) + 64;
664
        if (a[LSW128])
665
                return fls(a[LSW128]) + 96;
666
        else
667
                return -1;
668
}
669
 
670
static inline int zerop128(const int128 a)
671
{
672
        return !(a[LSW128] | a[NLSW128] | a[NMSW128] | a[MSW128]);
673
}
674
 
675
static inline int nonzerop128(const int128 a)
676
{
677
        return (a[LSW128] | a[NLSW128] | a[NMSW128] | a[MSW128]);
678
}
679
 
680
/* Addition and subtraction */
681
/* Do these in "pure" assembly, because "extended" asm is unmanageable
682
   here */
683
static inline void add128(const int128 a, int128 b)
684
{
685
        /* rotating carry flags */
686
        unsigned int carry[2];
687
 
688
        carry[0] = a[LSW128] > (0xffffffff - b[LSW128]);
689
        b[LSW128] += a[LSW128];
690
 
691
        carry[1] = a[NLSW128] > (0xffffffff - b[NLSW128] - carry[0]);
692
        b[NLSW128] = a[NLSW128] + b[NLSW128] + carry[0];
693
 
694
        carry[0] = a[NMSW128] > (0xffffffff - b[NMSW128] - carry[1]);
695
        b[NMSW128] = a[NMSW128] + b[NMSW128] + carry[1];
696
 
697
        b[MSW128] = a[MSW128] + b[MSW128] + carry[0];
698
}
699
 
700
/* Note: assembler semantics: "b -= a" */
701
static inline void sub128(const int128 a, int128 b)
702
{
703
        /* rotating borrow flags */
704
        unsigned int borrow[2];
705
 
706
        borrow[0] = b[LSW128] < a[LSW128];
707
        b[LSW128] -= a[LSW128];
708
 
709
        borrow[1] = b[NLSW128] < a[NLSW128] + borrow[0];
710
        b[NLSW128] = b[NLSW128] - a[NLSW128] - borrow[0];
711
 
712
        borrow[0] = b[NMSW128] < a[NMSW128] + borrow[1];
713
        b[NMSW128] = b[NMSW128] - a[NMSW128] - borrow[1];
714
 
715
        b[MSW128] = b[MSW128] - a[MSW128] - borrow[0];
716
}
717
 
718
/* Poor man's 64-bit expanding multiply */
719
static inline void mul64(unsigned long long a, unsigned long long b, int128 c)
720
{
721
        unsigned long long acc;
722
        int128 acc128;
723
 
724
        zero128(acc128);
725
        zero128(c);
726
 
727
        /* first the low words */
728
        if (LO_WORD(a) && LO_WORD(b)) {
729
                acc = (long long) LO_WORD(a) * LO_WORD(b);
730
                c[NLSW128] = HI_WORD(acc);
731
                c[LSW128] = LO_WORD(acc);
732
        }
733
        /* Next the high words */
734
        if (HI_WORD(a) && HI_WORD(b)) {
735
                acc = (long long) HI_WORD(a) * HI_WORD(b);
736
                c[MSW128] = HI_WORD(acc);
737
                c[NMSW128] = LO_WORD(acc);
738
        }
739
        /* The middle words */
740
        if (LO_WORD(a) && HI_WORD(b)) {
741
                acc = (long long) LO_WORD(a) * HI_WORD(b);
742
                acc128[NMSW128] = HI_WORD(acc);
743
                acc128[NLSW128] = LO_WORD(acc);
744
                add128(acc128, c);
745
        }
746
        /* The first and last words */
747
        if (HI_WORD(a) && LO_WORD(b)) {
748
                acc = (long long) HI_WORD(a) * LO_WORD(b);
749
                acc128[NMSW128] = HI_WORD(acc);
750
                acc128[NLSW128] = LO_WORD(acc);
751
                add128(acc128, c);
752
        }
753
}
754
 
755
/* Note: unsigned */
756
static inline int cmp128(int128 a, int128 b)
757
{
758
        if (a[MSW128] < b[MSW128])
759
                return -1;
760
        if (a[MSW128] > b[MSW128])
761
                return 1;
762
        if (a[NMSW128] < b[NMSW128])
763
                return -1;
764
        if (a[NMSW128] > b[NMSW128])
765
                return 1;
766
        if (a[NLSW128] < b[NLSW128])
767
                return -1;
768
        if (a[NLSW128] > b[NLSW128])
769
                return 1;
770
 
771
        return (signed) a[LSW128] - b[LSW128];
772
}
773
 
774
inline void div128(int128 a, int128 b, int128 c)
775
{
776
        int128 mask;
777
 
778
        /* Algorithm:
779
 
780
           Shift the divisor until it's at least as big as the
781
           dividend, keeping track of the position to which we've
782
           shifted it, i.e. the power of 2 which we've multiplied it
783
           by.
784
 
785
           Then, for this power of 2 (the mask), and every one smaller
786
           than it, subtract the mask from the dividend and add it to
787
           the quotient until the dividend is smaller than the raised
788
           divisor.  At this point, divide the dividend and the mask
789
           by 2 (i.e. shift one place to the right).  Lather, rinse,
790
           and repeat, until there are no more powers of 2 left. */
791
 
792
        /* FIXME: needless to say, there's room for improvement here too. */
793
 
794
        /* Shift up */
795
        /* XXX: since it just has to be "at least as big", we can
796
           probably eliminate this horribly wasteful loop.  I will
797
           have to prove this first, though */
798
        set128(0, 0, 0, 1, mask);
799
        while (cmp128(b, a) < 0 && !btsthi128(b)) {
800
                lslone128(b);
801
                lslone128(mask);
802
        }
803
 
804
        /* Shift down */
805
        zero128(c);
806
        do {
807
                if (cmp128(a, b) >= 0) {
808
                        sub128(b, a);
809
                        add128(mask, c);
810
                }
811
                lsrone128(mask);
812
                lsrone128(b);
813
        } while (nonzerop128(mask));
814
 
815
        /* The remainder is in a... */
816
}
817
#endif
818
 
819
#endif  /* MULTI_ARITH_H */

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.