OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-old/] [gcc-4.2.2/] [gcc/] [config/] [xtensa/] [ieee754-df.S] - Blame information for rev 841

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 38 julius
/* IEEE-754 double-precision functions for Xtensa
2
   Copyright (C) 2006 Free Software Foundation, Inc.
3
   Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
4
 
5
   This file is part of GCC.
6
 
7
   GCC is free software; you can redistribute it and/or modify it
8
   under the terms of the GNU General Public License as published by
9
   the Free Software Foundation; either version 2, or (at your option)
10
   any later version.
11
 
12
   In addition to the permissions in the GNU General Public License,
13
   the Free Software Foundation gives you unlimited permission to link
14
   the compiled version of this file into combinations with other
15
   programs, and to distribute those combinations without any
16
   restriction coming from the use of this file.  (The General Public
17
   License restrictions do apply in other respects; for example, they
18
   cover modification of the file, and distribution when not linked
19
   into a combine executable.)
20
 
21
   GCC is distributed in the hope that it will be useful, but WITHOUT
22
   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
23
   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
24
   License for more details.
25
 
26
   You should have received a copy of the GNU General Public License
27
   along with GCC; see the file COPYING.  If not, write to the Free
28
   Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
29
   02110-1301, USA.  */
30
 
31
#ifdef __XTENSA_EB__
32
#define xh a2
33
#define xl a3
34
#define yh a4
35
#define yl a5
36
#else
37
#define xh a3
38
#define xl a2
39
#define yh a5
40
#define yl a4
41
#endif
42
 
43
/*  Warning!  The branch displacements for some Xtensa branch instructions
44
    are quite small, and this code has been carefully laid out to keep
45
    branch targets in range.  If you change anything, be sure to check that
46
    the assembler is not relaxing anything to branch over a jump.  */
47
 
48
#ifdef L_negdf2
49
 
50
        .align  4
51
        .global __negdf2
52
        .type   __negdf2, @function
53
__negdf2:
54
        leaf_entry sp, 16
55
        movi    a4, 0x80000000
56
        xor     xh, xh, a4
57
        leaf_return
58
 
59
#endif /* L_negdf2 */
60
 
61
#ifdef L_addsubdf3
62
 
63
        /* Addition */
64
__adddf3_aux:
65
 
66
        /* Handle NaNs and Infinities.  (This code is placed before the
67
           start of the function just to keep it in range of the limited
68
           branch displacements.)  */
69
 
70
.Ladd_xnan_or_inf:
71
        /* If y is neither Infinity nor NaN, return x.  */
72
        bnall   yh, a6, 1f
73
        /* If x is a NaN, return it.  Otherwise, return y.  */
74
        slli    a7, xh, 12
75
        or      a7, a7, xl
76
        beqz    a7, .Ladd_ynan_or_inf
77
1:      leaf_return
78
 
79
.Ladd_ynan_or_inf:
80
        /* Return y.  */
81
        mov     xh, yh
82
        mov     xl, yl
83
        leaf_return
84
 
85
.Ladd_opposite_signs:
86
        /* Operand signs differ.  Do a subtraction.  */
87
        slli    a7, a6, 11
88
        xor     yh, yh, a7
89
        j       .Lsub_same_sign
90
 
91
        .align  4
92
        .global __adddf3
93
        .type   __adddf3, @function
94
__adddf3:
95
        leaf_entry sp, 16
96
        movi    a6, 0x7ff00000
97
 
98
        /* Check if the two operands have the same sign.  */
99
        xor     a7, xh, yh
100
        bltz    a7, .Ladd_opposite_signs
101
 
102
.Ladd_same_sign:
103
        /* Check if either exponent == 0x7ff (i.e., NaN or Infinity).  */
104
        ball    xh, a6, .Ladd_xnan_or_inf
105
        ball    yh, a6, .Ladd_ynan_or_inf
106
 
107
        /* Compare the exponents.  The smaller operand will be shifted
108
           right by the exponent difference and added to the larger
109
           one.  */
110
        extui   a7, xh, 20, 12
111
        extui   a8, yh, 20, 12
112
        bltu    a7, a8, .Ladd_shiftx
113
 
114
.Ladd_shifty:
115
        /* Check if the smaller (or equal) exponent is zero.  */
116
        bnone   yh, a6, .Ladd_yexpzero
117
 
118
        /* Replace yh sign/exponent with 0x001.  */
119
        or      yh, yh, a6
120
        slli    yh, yh, 11
121
        srli    yh, yh, 11
122
 
123
.Ladd_yexpdiff:
124
        /* Compute the exponent difference.  Optimize for difference < 32.  */
125
        sub     a10, a7, a8
126
        bgeui   a10, 32, .Ladd_bigshifty
127
 
128
        /* Shift yh/yl right by the exponent difference.  Any bits that are
129
           shifted out of yl are saved in a9 for rounding the result.  */
130
        ssr     a10
131
        movi    a9, 0
132
        src     a9, yl, a9
133
        src     yl, yh, yl
134
        srl     yh, yh
135
 
136
.Ladd_addy:
137
        /* Do the 64-bit addition.  */
138
        add     xl, xl, yl
139
        add     xh, xh, yh
140
        bgeu    xl, yl, 1f
141
        addi    xh, xh, 1
142
1:
143
        /* Check if the add overflowed into the exponent.  */
144
        extui   a10, xh, 20, 12
145
        beq     a10, a7, .Ladd_round
146
        mov     a8, a7
147
        j       .Ladd_carry
148
 
149
.Ladd_yexpzero:
150
        /* y is a subnormal value.  Replace its sign/exponent with zero,
151
           i.e., no implicit "1.0", and increment the apparent exponent
152
           because subnormals behave as if they had the minimum (nonzero)
153
           exponent.  Test for the case when both exponents are zero.  */
154
        slli    yh, yh, 12
155
        srli    yh, yh, 12
156
        bnone   xh, a6, .Ladd_bothexpzero
157
        addi    a8, a8, 1
158
        j       .Ladd_yexpdiff
159
 
160
.Ladd_bothexpzero:
161
        /* Both exponents are zero.  Handle this as a special case.  There
162
           is no need to shift or round, and the normal code for handling
163
           a carry into the exponent field will not work because it
164
           assumes there is an implicit "1.0" that needs to be added.  */
165
        add     xl, xl, yl
166
        add     xh, xh, yh
167
        bgeu    xl, yl, 1f
168
        addi    xh, xh, 1
169
1:      leaf_return
170
 
171
.Ladd_bigshifty:
172
        /* Exponent difference > 64 -- just return the bigger value.  */
173
        bgeui   a10, 64, 1b
174
 
175
        /* Shift yh/yl right by the exponent difference.  Any bits that are
176
           shifted out are saved in a9 for rounding the result.  */
177
        ssr     a10
178
        sll     a11, yl         /* lost bits shifted out of yl */
179
        src     a9, yh, yl
180
        srl     yl, yh
181
        movi    yh, 0
182
        beqz    a11, .Ladd_addy
183
        or      a9, a9, a10     /* any positive, nonzero value will work */
184
        j       .Ladd_addy
185
 
186
.Ladd_xexpzero:
187
        /* Same as "yexpzero" except skip handling the case when both
188
           exponents are zero.  */
189
        slli    xh, xh, 12
190
        srli    xh, xh, 12
191
        addi    a7, a7, 1
192
        j       .Ladd_xexpdiff
193
 
194
.Ladd_shiftx:
195
        /* Same thing as the "shifty" code, but with x and y swapped.  Also,
196
           because the exponent difference is always nonzero in this version,
197
           the shift sequence can use SLL and skip loading a constant zero.  */
198
        bnone   xh, a6, .Ladd_xexpzero
199
 
200
        or      xh, xh, a6
201
        slli    xh, xh, 11
202
        srli    xh, xh, 11
203
 
204
.Ladd_xexpdiff:
205
        sub     a10, a8, a7
206
        bgeui   a10, 32, .Ladd_bigshiftx
207
 
208
        ssr     a10
209
        sll     a9, xl
210
        src     xl, xh, xl
211
        srl     xh, xh
212
 
213
.Ladd_addx:
214
        add     xl, xl, yl
215
        add     xh, xh, yh
216
        bgeu    xl, yl, 1f
217
        addi    xh, xh, 1
218
1:
219
        /* Check if the add overflowed into the exponent.  */
220
        extui   a10, xh, 20, 12
221
        bne     a10, a8, .Ladd_carry
222
 
223
.Ladd_round:
224
        /* Round up if the leftover fraction is >= 1/2.  */
225
        bgez    a9, 1f
226
        addi    xl, xl, 1
227
        beqz    xl, .Ladd_roundcarry
228
 
229
        /* Check if the leftover fraction is exactly 1/2.  */
230
        slli    a9, a9, 1
231
        beqz    a9, .Ladd_exactlyhalf
232
1:      leaf_return
233
 
234
.Ladd_bigshiftx:
235
        /* Mostly the same thing as "bigshifty"....  */
236
        bgeui   a10, 64, .Ladd_returny
237
 
238
        ssr     a10
239
        sll     a11, xl
240
        src     a9, xh, xl
241
        srl     xl, xh
242
        movi    xh, 0
243
        beqz    a11, .Ladd_addx
244
        or      a9, a9, a10
245
        j       .Ladd_addx
246
 
247
.Ladd_returny:
248
        mov     xh, yh
249
        mov     xl, yl
250
        leaf_return
251
 
252
.Ladd_carry:
253
        /* The addition has overflowed into the exponent field, so the
254
           value needs to be renormalized.  The mantissa of the result
255
           can be recovered by subtracting the original exponent and
256
           adding 0x100000 (which is the explicit "1.0" for the
257
           mantissa of the non-shifted operand -- the "1.0" for the
258
           shifted operand was already added).  The mantissa can then
259
           be shifted right by one bit.  The explicit "1.0" of the
260
           shifted mantissa then needs to be replaced by the exponent,
261
           incremented by one to account for the normalizing shift.
262
           It is faster to combine these operations: do the shift first
263
           and combine the additions and subtractions.  If x is the
264
           original exponent, the result is:
265
               shifted mantissa - (x << 19) + (1 << 19) + (x << 20)
266
           or:
267
               shifted mantissa + ((x + 1) << 19)
268
           Note that the exponent is incremented here by leaving the
269
           explicit "1.0" of the mantissa in the exponent field.  */
270
 
271
        /* Shift xh/xl right by one bit.  Save the lsb of xl.  */
272
        mov     a10, xl
273
        ssai    1
274
        src     xl, xh, xl
275
        srl     xh, xh
276
 
277
        /* See explanation above.  The original exponent is in a8.  */
278
        addi    a8, a8, 1
279
        slli    a8, a8, 19
280
        add     xh, xh, a8
281
 
282
        /* Return an Infinity if the exponent overflowed.  */
283
        ball    xh, a6, .Ladd_infinity
284
 
285
        /* Same thing as the "round" code except the msb of the leftover
286
           fraction is bit 0 of a10, with the rest of the fraction in a9.  */
287
        bbci.l  a10, 0, 1f
288
        addi    xl, xl, 1
289
        beqz    xl, .Ladd_roundcarry
290
        beqz    a9, .Ladd_exactlyhalf
291
1:      leaf_return
292
 
293
.Ladd_infinity:
294
        /* Clear the mantissa.  */
295
        movi    xl, 0
296
        srli    xh, xh, 20
297
        slli    xh, xh, 20
298
 
299
        /* The sign bit may have been lost in a carry-out.  Put it back.  */
300
        slli    a8, a8, 1
301
        or      xh, xh, a8
302
        leaf_return
303
 
304
.Ladd_exactlyhalf:
305
        /* Round down to the nearest even value.  */
306
        srli    xl, xl, 1
307
        slli    xl, xl, 1
308
        leaf_return
309
 
310
.Ladd_roundcarry:
311
        /* xl is always zero when the rounding increment overflows, so
312
           there's no need to round it to an even value.  */
313
        addi    xh, xh, 1
314
        /* Overflow to the exponent is OK.  */
315
        leaf_return
316
 
317
 
318
        /* Subtraction */
319
__subdf3_aux:
320
 
321
        /* Handle NaNs and Infinities.  (This code is placed before the
322
           start of the function just to keep it in range of the limited
323
           branch displacements.)  */
324
 
325
.Lsub_xnan_or_inf:
326
        /* If y is neither Infinity nor NaN, return x.  */
327
        bnall   yh, a6, 1f
328
        /* Both x and y are either NaN or Inf, so the result is NaN.  */
329
        movi    a4, 0x80000     /* make it a quiet NaN */
330
        or      xh, xh, a4
331
1:      leaf_return
332
 
333
.Lsub_ynan_or_inf:
334
        /* Negate y and return it.  */
335
        slli    a7, a6, 11
336
        xor     xh, yh, a7
337
        mov     xl, yl
338
        leaf_return
339
 
340
.Lsub_opposite_signs:
341
        /* Operand signs differ.  Do an addition.  */
342
        slli    a7, a6, 11
343
        xor     yh, yh, a7
344
        j       .Ladd_same_sign
345
 
346
        .align  4
347
        .global __subdf3
348
        .type   __subdf3, @function
349
__subdf3:
350
        leaf_entry sp, 16
351
        movi    a6, 0x7ff00000
352
 
353
        /* Check if the two operands have the same sign.  */
354
        xor     a7, xh, yh
355
        bltz    a7, .Lsub_opposite_signs
356
 
357
.Lsub_same_sign:
358
        /* Check if either exponent == 0x7ff (i.e., NaN or Infinity).  */
359
        ball    xh, a6, .Lsub_xnan_or_inf
360
        ball    yh, a6, .Lsub_ynan_or_inf
361
 
362
        /* Compare the operands.  In contrast to addition, the entire
363
           value matters here.  */
364
        extui   a7, xh, 20, 11
365
        extui   a8, yh, 20, 11
366
        bltu    xh, yh, .Lsub_xsmaller
367
        beq     xh, yh, .Lsub_compare_low
368
 
369
.Lsub_ysmaller:
370
        /* Check if the smaller (or equal) exponent is zero.  */
371
        bnone   yh, a6, .Lsub_yexpzero
372
 
373
        /* Replace yh sign/exponent with 0x001.  */
374
        or      yh, yh, a6
375
        slli    yh, yh, 11
376
        srli    yh, yh, 11
377
 
378
.Lsub_yexpdiff:
379
        /* Compute the exponent difference.  Optimize for difference < 32.  */
380
        sub     a10, a7, a8
381
        bgeui   a10, 32, .Lsub_bigshifty
382
 
383
        /* Shift yh/yl right by the exponent difference.  Any bits that are
384
           shifted out of yl are saved in a9 for rounding the result.  */
385
        ssr     a10
386
        movi    a9, 0
387
        src     a9, yl, a9
388
        src     yl, yh, yl
389
        srl     yh, yh
390
 
391
.Lsub_suby:
392
        /* Do the 64-bit subtraction.  */
393
        sub     xh, xh, yh
394
        bgeu    xl, yl, 1f
395
        addi    xh, xh, -1
396
1:      sub     xl, xl, yl
397
 
398
        /* Subtract the leftover bits in a9 from zero and propagate any
399
           borrow from xh/xl.  */
400
        neg     a9, a9
401
        beqz    a9, 1f
402
        addi    a5, xh, -1
403
        moveqz  xh, a5, xl
404
        addi    xl, xl, -1
405
1:
406
        /* Check if the subtract underflowed into the exponent.  */
407
        extui   a10, xh, 20, 11
408
        beq     a10, a7, .Lsub_round
409
        j       .Lsub_borrow
410
 
411
.Lsub_compare_low:
412
        /* The high words are equal.  Compare the low words.  */
413
        bltu    xl, yl, .Lsub_xsmaller
414
        bltu    yl, xl, .Lsub_ysmaller
415
        /* The operands are equal.  Return 0.0.  */
416
        movi    xh, 0
417
        movi    xl, 0
418
1:      leaf_return
419
 
420
.Lsub_yexpzero:
421
        /* y is a subnormal value.  Replace its sign/exponent with zero,
422
           i.e., no implicit "1.0".  Unless x is also a subnormal, increment
423
           y's apparent exponent because subnormals behave as if they had
424
           the minimum (nonzero) exponent.  */
425
        slli    yh, yh, 12
426
        srli    yh, yh, 12
427
        bnone   xh, a6, .Lsub_yexpdiff
428
        addi    a8, a8, 1
429
        j       .Lsub_yexpdiff
430
 
431
.Lsub_bigshifty:
432
        /* Exponent difference > 64 -- just return the bigger value.  */
433
        bgeui   a10, 64, 1b
434
 
435
        /* Shift yh/yl right by the exponent difference.  Any bits that are
436
           shifted out are saved in a9 for rounding the result.  */
437
        ssr     a10
438
        sll     a11, yl         /* lost bits shifted out of yl */
439
        src     a9, yh, yl
440
        srl     yl, yh
441
        movi    yh, 0
442
        beqz    a11, .Lsub_suby
443
        or      a9, a9, a10     /* any positive, nonzero value will work */
444
        j       .Lsub_suby
445
 
446
.Lsub_xsmaller:
447
        /* Same thing as the "ysmaller" code, but with x and y swapped and
448
           with y negated.  */
449
        bnone   xh, a6, .Lsub_xexpzero
450
 
451
        or      xh, xh, a6
452
        slli    xh, xh, 11
453
        srli    xh, xh, 11
454
 
455
.Lsub_xexpdiff:
456
        sub     a10, a8, a7
457
        bgeui   a10, 32, .Lsub_bigshiftx
458
 
459
        ssr     a10
460
        movi    a9, 0
461
        src     a9, xl, a9
462
        src     xl, xh, xl
463
        srl     xh, xh
464
 
465
        /* Negate y.  */
466
        slli    a11, a6, 11
467
        xor     yh, yh, a11
468
 
469
.Lsub_subx:
470
        sub     xl, yl, xl
471
        sub     xh, yh, xh
472
        bgeu    yl, xl, 1f
473
        addi    xh, xh, -1
474
1:
475
        /* Subtract the leftover bits in a9 from zero and propagate any
476
           borrow from xh/xl.  */
477
        neg     a9, a9
478
        beqz    a9, 1f
479
        addi    a5, xh, -1
480
        moveqz  xh, a5, xl
481
        addi    xl, xl, -1
482
1:
483
        /* Check if the subtract underflowed into the exponent.  */
484
        extui   a10, xh, 20, 11
485
        bne     a10, a8, .Lsub_borrow
486
 
487
.Lsub_round:
488
        /* Round up if the leftover fraction is >= 1/2.  */
489
        bgez    a9, 1f
490
        addi    xl, xl, 1
491
        beqz    xl, .Lsub_roundcarry
492
 
493
        /* Check if the leftover fraction is exactly 1/2.  */
494
        slli    a9, a9, 1
495
        beqz    a9, .Lsub_exactlyhalf
496
1:      leaf_return
497
 
498
.Lsub_xexpzero:
499
        /* Same as "yexpzero".  */
500
        slli    xh, xh, 12
501
        srli    xh, xh, 12
502
        bnone   yh, a6, .Lsub_xexpdiff
503
        addi    a7, a7, 1
504
        j       .Lsub_xexpdiff
505
 
506
.Lsub_bigshiftx:
507
        /* Mostly the same thing as "bigshifty", but with the sign bit of the
508
           shifted value set so that the subsequent subtraction flips the
509
           sign of y.  */
510
        bgeui   a10, 64, .Lsub_returny
511
 
512
        ssr     a10
513
        sll     a11, xl
514
        src     a9, xh, xl
515
        srl     xl, xh
516
        slli    xh, a6, 11      /* set sign bit of xh */
517
        beqz    a11, .Lsub_subx
518
        or      a9, a9, a10
519
        j       .Lsub_subx
520
 
521
.Lsub_returny:
522
        /* Negate and return y.  */
523
        slli    a7, a6, 11
524
        xor     xh, yh, a7
525
        mov     xl, yl
526
        leaf_return
527
 
528
.Lsub_borrow:
529
        /* The subtraction has underflowed into the exponent field, so the
530
           value needs to be renormalized.  Shift the mantissa left as
531
           needed to remove any leading zeros and adjust the exponent
532
           accordingly.  If the exponent is not large enough to remove
533
           all the leading zeros, the result will be a subnormal value.  */
534
 
535
        slli    a8, xh, 12
536
        beqz    a8, .Lsub_xhzero
537
        do_nsau a6, a8, a7, a11
538
        srli    a8, a8, 12
539
        bge     a6, a10, .Lsub_subnormal
540
        addi    a6, a6, 1
541
 
542
.Lsub_shift_lt32:
543
        /* Shift the mantissa (a8/xl/a9) left by a6.  */
544
        ssl     a6
545
        src     a8, a8, xl
546
        src     xl, xl, a9
547
        sll     a9, a9
548
 
549
        /* Combine the shifted mantissa with the sign and exponent,
550
           decrementing the exponent by a6.  (The exponent has already
551
           been decremented by one due to the borrow from the subtraction,
552
           but adding the mantissa will increment the exponent by one.)  */
553
        srli    xh, xh, 20
554
        sub     xh, xh, a6
555
        slli    xh, xh, 20
556
        add     xh, xh, a8
557
        j       .Lsub_round
558
 
559
.Lsub_exactlyhalf:
560
        /* Round down to the nearest even value.  */
561
        srli    xl, xl, 1
562
        slli    xl, xl, 1
563
        leaf_return
564
 
565
.Lsub_roundcarry:
566
        /* xl is always zero when the rounding increment overflows, so
567
           there's no need to round it to an even value.  */
568
        addi    xh, xh, 1
569
        /* Overflow to the exponent is OK.  */
570
        leaf_return
571
 
572
.Lsub_xhzero:
573
        /* When normalizing the result, all the mantissa bits in the high
574
           word are zero.  Shift by "20 + (leading zero count of xl) + 1".  */
575
        do_nsau a6, xl, a7, a11
576
        addi    a6, a6, 21
577
        blt     a10, a6, .Lsub_subnormal
578
 
579
.Lsub_normalize_shift:
580
        bltui   a6, 32, .Lsub_shift_lt32
581
 
582
        ssl     a6
583
        src     a8, xl, a9
584
        sll     xl, a9
585
        movi    a9, 0
586
 
587
        srli    xh, xh, 20
588
        sub     xh, xh, a6
589
        slli    xh, xh, 20
590
        add     xh, xh, a8
591
        j       .Lsub_round
592
 
593
.Lsub_subnormal:
594
        /* The exponent is too small to shift away all the leading zeros.
595
           Set a6 to the current exponent (which has already been
596
           decremented by the borrow) so that the exponent of the result
597
           will be zero.  Do not add 1 to a6 in this case, because: (1)
598
           adding the mantissa will not increment the exponent, so there is
599
           no need to subtract anything extra from the exponent to
600
           compensate, and (2) the effective exponent of a subnormal is 1
601
           not 0 so the shift amount must be 1 smaller than normal. */
602
        mov     a6, a10
603
        j       .Lsub_normalize_shift
604
 
605
#endif /* L_addsubdf3 */
606
 
607
#ifdef L_muldf3
608
 
609
        /* Multiplication */
610
__muldf3_aux:
611
 
612
        /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
613
           (This code is placed before the start of the function just to
614
           keep it in range of the limited branch displacements.)  */
615
 
616
.Lmul_xexpzero:
617
        /* Clear the sign bit of x.  */
618
        slli    xh, xh, 1
619
        srli    xh, xh, 1
620
 
621
        /* If x is zero, return zero.  */
622
        or      a10, xh, xl
623
        beqz    a10, .Lmul_return_zero
624
 
625
        /* Normalize x.  Adjust the exponent in a8.  */
626
        beqz    xh, .Lmul_xh_zero
627
        do_nsau a10, xh, a11, a12
628
        addi    a10, a10, -11
629
        ssl     a10
630
        src     xh, xh, xl
631
        sll     xl, xl
632
        movi    a8, 1
633
        sub     a8, a8, a10
634
        j       .Lmul_xnormalized
635
.Lmul_xh_zero:
636
        do_nsau a10, xl, a11, a12
637
        addi    a10, a10, -11
638
        movi    a8, -31
639
        sub     a8, a8, a10
640
        ssl     a10
641
        bltz    a10, .Lmul_xl_srl
642
        sll     xh, xl
643
        movi    xl, 0
644
        j       .Lmul_xnormalized
645
.Lmul_xl_srl:
646
        srl     xh, xl
647
        sll     xl, xl
648
        j       .Lmul_xnormalized
649
 
650
.Lmul_yexpzero:
651
        /* Clear the sign bit of y.  */
652
        slli    yh, yh, 1
653
        srli    yh, yh, 1
654
 
655
        /* If y is zero, return zero.  */
656
        or      a10, yh, yl
657
        beqz    a10, .Lmul_return_zero
658
 
659
        /* Normalize y.  Adjust the exponent in a9.  */
660
        beqz    yh, .Lmul_yh_zero
661
        do_nsau a10, yh, a11, a12
662
        addi    a10, a10, -11
663
        ssl     a10
664
        src     yh, yh, yl
665
        sll     yl, yl
666
        movi    a9, 1
667
        sub     a9, a9, a10
668
        j       .Lmul_ynormalized
669
.Lmul_yh_zero:
670
        do_nsau a10, yl, a11, a12
671
        addi    a10, a10, -11
672
        movi    a9, -31
673
        sub     a9, a9, a10
674
        ssl     a10
675
        bltz    a10, .Lmul_yl_srl
676
        sll     yh, yl
677
        movi    yl, 0
678
        j       .Lmul_ynormalized
679
.Lmul_yl_srl:
680
        srl     yh, yl
681
        sll     yl, yl
682
        j       .Lmul_ynormalized
683
 
684
.Lmul_return_zero:
685
        /* Return zero with the appropriate sign bit.  */
686
        srli    xh, a7, 31
687
        slli    xh, xh, 31
688
        movi    xl, 0
689
        j       .Lmul_done
690
 
691
.Lmul_xnan_or_inf:
692
        /* If y is zero, return NaN.  */
693
        bnez    yl, 1f
694
        slli    a8, yh, 1
695
        bnez    a8, 1f
696
        movi    a4, 0x80000     /* make it a quiet NaN */
697
        or      xh, xh, a4
698
        j       .Lmul_done
699
1:
700
        /* If y is NaN, return y.  */
701
        bnall   yh, a6, .Lmul_returnx
702
        slli    a8, yh, 12
703
        or      a8, a8, yl
704
        beqz    a8, .Lmul_returnx
705
 
706
.Lmul_returny:
707
        mov     xh, yh
708
        mov     xl, yl
709
 
710
.Lmul_returnx:
711
        /* Set the sign bit and return.  */
712
        extui   a7, a7, 31, 1
713
        slli    xh, xh, 1
714
        ssai    1
715
        src     xh, a7, xh
716
        j       .Lmul_done
717
 
718
.Lmul_ynan_or_inf:
719
        /* If x is zero, return NaN.  */
720
        bnez    xl, .Lmul_returny
721
        slli    a8, xh, 1
722
        bnez    a8, .Lmul_returny
723
        movi    a7, 0x80000     /* make it a quiet NaN */
724
        or      xh, yh, a7
725
        j       .Lmul_done
726
 
727
        .align  4
728
        .global __muldf3
729
        .type   __muldf3, @function
730
__muldf3:
731
        leaf_entry sp, 32
732
#if __XTENSA_CALL0_ABI__
733
        addi    sp, sp, -32
734
        s32i    a12, sp, 16
735
        s32i    a13, sp, 20
736
        s32i    a14, sp, 24
737
        s32i    a15, sp, 28
738
#endif
739
        movi    a6, 0x7ff00000
740
 
741
        /* Get the sign of the result.  */
742
        xor     a7, xh, yh
743
 
744
        /* Check for NaN and infinity.  */
745
        ball    xh, a6, .Lmul_xnan_or_inf
746
        ball    yh, a6, .Lmul_ynan_or_inf
747
 
748
        /* Extract the exponents.  */
749
        extui   a8, xh, 20, 11
750
        extui   a9, yh, 20, 11
751
 
752
        beqz    a8, .Lmul_xexpzero
753
.Lmul_xnormalized:
754
        beqz    a9, .Lmul_yexpzero
755
.Lmul_ynormalized:
756
 
757
        /* Add the exponents.  */
758
        add     a8, a8, a9
759
 
760
        /* Replace sign/exponent fields with explicit "1.0".  */
761
        movi    a10, 0x1fffff
762
        or      xh, xh, a6
763
        and     xh, xh, a10
764
        or      yh, yh, a6
765
        and     yh, yh, a10
766
 
767
        /* Multiply 64x64 to 128 bits.  The result ends up in xh/xl/a6.
768
           The least-significant word of the result is thrown away except
769
           that if it is nonzero, the lsb of a6 is set to 1.  */
770
#if XCHAL_HAVE_MUL32_HIGH
771
 
772
        /* Compute a6 with any carry-outs in a10.  */
773
        movi    a10, 0
774
        mull    a6, xl, yh
775
        mull    a11, xh, yl
776
        add     a6, a6, a11
777
        bgeu    a6, a11, 1f
778
        addi    a10, a10, 1
779
1:
780
        muluh   a11, xl, yl
781
        add     a6, a6, a11
782
        bgeu    a6, a11, 1f
783
        addi    a10, a10, 1
784
1:
785
        /* If the low word of the result is nonzero, set the lsb of a6.  */
786
        mull    a11, xl, yl
787
        beqz    a11, 1f
788
        movi    a9, 1
789
        or      a6, a6, a9
790
1:
791
        /* Compute xl with any carry-outs in a9.  */
792
        movi    a9, 0
793
        mull    a11, xh, yh
794
        add     a10, a10, a11
795
        bgeu    a10, a11, 1f
796
        addi    a9, a9, 1
797
1:
798
        muluh   a11, xh, yl
799
        add     a10, a10, a11
800
        bgeu    a10, a11, 1f
801
        addi    a9, a9, 1
802
1:
803
        muluh   xl, xl, yh
804
        add     xl, xl, a10
805
        bgeu    xl, a10, 1f
806
        addi    a9, a9, 1
807
1:
808
        /* Compute xh.  */
809
        muluh   xh, xh, yh
810
        add     xh, xh, a9
811
 
812
#else
813
 
814
        /* Break the inputs into 16-bit chunks and compute 16 32-bit partial
815
           products.  These partial products are:
816
 
817
 
818
 
819
                1 xll * ylh
820
                2 xlh * yll
821
 
822
                3 xll * yhl
823
                4 xlh * ylh
824
                5 xhl * yll
825
 
826
                6 xll * yhh
827
                7 xlh * yhl
828
                8 xhl * ylh
829
                9 xhh * yll
830
 
831
                10 xlh * yhh
832
                11 xhl * yhl
833
                12 xhh * ylh
834
 
835
                13 xhl * yhh
836
                14 xhh * yhl
837
 
838
                15 xhh * yhh
839
 
840
           where the input chunks are (hh, hl, lh, ll).  If using the Mul16
841
           or Mul32 multiplier options, these input chunks must be stored in
842
           separate registers.  For Mac16, the UMUL.AA.* opcodes can specify
843
           that the inputs come from either half of the registers, so there
844
           is no need to shift them out ahead of time.  If there is no
845
           multiply hardware, the 16-bit chunks can be extracted when setting
846
           up the arguments to the separate multiply function.  */
847
 
848
        /* Save a7 since it is needed to hold a temporary value.  */
849
        s32i    a7, sp, 4
850
#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
851
        /* Calling a separate multiply function will clobber a0 and requires
852
           use of a8 as a temporary, so save those values now.  (The function
853
           uses a custom ABI so nothing else needs to be saved.)  */
854
        s32i    a0, sp, 0
855
        s32i    a8, sp, 8
856
#endif
857
 
858
#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
859
 
860
#define xlh a12
861
#define ylh a13
862
#define xhh a14
863
#define yhh a15
864
 
865
        /* Get the high halves of the inputs into registers.  */
866
        srli    xlh, xl, 16
867
        srli    ylh, yl, 16
868
        srli    xhh, xh, 16
869
        srli    yhh, yh, 16
870
 
871
#define xll xl
872
#define yll yl
873
#define xhl xh
874
#define yhl yh
875
 
876
#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
877
        /* Clear the high halves of the inputs.  This does not matter
878
           for MUL16 because the high bits are ignored.  */
879
        extui   xl, xl, 0, 16
880
        extui   xh, xh, 0, 16
881
        extui   yl, yl, 0, 16
882
        extui   yh, yh, 0, 16
883
#endif
884
#endif /* MUL16 || MUL32 */
885
 
886
 
887
#if XCHAL_HAVE_MUL16
888
 
889
#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
890
        mul16u  dst, xreg ## xhalf, yreg ## yhalf
891
 
892
#elif XCHAL_HAVE_MUL32
893
 
894
#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
895
        mull    dst, xreg ## xhalf, yreg ## yhalf
896
 
897
#elif XCHAL_HAVE_MAC16
898
 
899
/* The preprocessor insists on inserting a space when concatenating after
900
   a period in the definition of do_mul below.  These macros are a workaround
901
   using underscores instead of periods when doing the concatenation.  */
902
#define umul_aa_ll umul.aa.ll
903
#define umul_aa_lh umul.aa.lh
904
#define umul_aa_hl umul.aa.hl
905
#define umul_aa_hh umul.aa.hh
906
 
907
#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
908
        umul_aa_ ## xhalf ## yhalf      xreg, yreg; \
909
        rsr     dst, ACCLO
910
 
911
#else /* no multiply hardware */
912
 
913
#define set_arg_l(dst, src) \
914
        extui   dst, src, 0, 16
915
#define set_arg_h(dst, src) \
916
        srli    dst, src, 16
917
 
918
#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
919
        set_arg_ ## xhalf (a13, xreg); \
920
        set_arg_ ## yhalf (a14, yreg); \
921
        call0   .Lmul_mulsi3; \
922
        mov     dst, a12
923
#endif
924
 
925
        /* Add pp1 and pp2 into a10 with carry-out in a9.  */
926
        do_mul(a10, xl, l, yl, h)       /* pp 1 */
927
        do_mul(a11, xl, h, yl, l)       /* pp 2 */
928
        movi    a9, 0
929
        add     a10, a10, a11
930
        bgeu    a10, a11, 1f
931
        addi    a9, a9, 1
932
1:
933
        /* Initialize a6 with a9/a10 shifted into position.  Note that
934
           this value can be safely incremented without any carry-outs.  */
935
        ssai    16
936
        src     a6, a9, a10
937
 
938
        /* Compute the low word into a10.  */
939
        do_mul(a11, xl, l, yl, l)       /* pp 0 */
940
        sll     a10, a10
941
        add     a10, a10, a11
942
        bgeu    a10, a11, 1f
943
        addi    a6, a6, 1
944
1:
945
        /* Compute the contributions of pp0-5 to a6, with carry-outs in a9.
946
           This is good enough to determine the low half of a6, so that any
947
           nonzero bits from the low word of the result can be collapsed
948
           into a6, freeing up a register.  */
949
        movi    a9, 0
950
        do_mul(a11, xl, l, yh, l)       /* pp 3 */
951
        add     a6, a6, a11
952
        bgeu    a6, a11, 1f
953
        addi    a9, a9, 1
954
1:
955
        do_mul(a11, xl, h, yl, h)       /* pp 4 */
956
        add     a6, a6, a11
957
        bgeu    a6, a11, 1f
958
        addi    a9, a9, 1
959
1:
960
        do_mul(a11, xh, l, yl, l)       /* pp 5 */
961
        add     a6, a6, a11
962
        bgeu    a6, a11, 1f
963
        addi    a9, a9, 1
964
1:
965
        /* Collapse any nonzero bits from the low word into a6.  */
966
        beqz    a10, 1f
967
        movi    a11, 1
968
        or      a6, a6, a11
969
1:
970
        /* Add pp6-9 into a11 with carry-outs in a10.  */
971
        do_mul(a7, xl, l, yh, h)        /* pp 6 */
972
        do_mul(a11, xh, h, yl, l)       /* pp 9 */
973
        movi    a10, 0
974
        add     a11, a11, a7
975
        bgeu    a11, a7, 1f
976
        addi    a10, a10, 1
977
1:
978
        do_mul(a7, xl, h, yh, l)        /* pp 7 */
979
        add     a11, a11, a7
980
        bgeu    a11, a7, 1f
981
        addi    a10, a10, 1
982
1:
983
        do_mul(a7, xh, l, yl, h)        /* pp 8 */
984
        add     a11, a11, a7
985
        bgeu    a11, a7, 1f
986
        addi    a10, a10, 1
987
1:
988
        /* Shift a10/a11 into position, and add low half of a11 to a6.  */
989
        src     a10, a10, a11
990
        add     a10, a10, a9
991
        sll     a11, a11
992
        add     a6, a6, a11
993
        bgeu    a6, a11, 1f
994
        addi    a10, a10, 1
995
1:
996
        /* Add pp10-12 into xl with carry-outs in a9.  */
997
        movi    a9, 0
998
        do_mul(xl, xl, h, yh, h)        /* pp 10 */
999
        add     xl, xl, a10
1000
        bgeu    xl, a10, 1f
1001
        addi    a9, a9, 1
1002
1:
1003
        do_mul(a10, xh, l, yh, l)       /* pp 11 */
1004
        add     xl, xl, a10
1005
        bgeu    xl, a10, 1f
1006
        addi    a9, a9, 1
1007
1:
1008
        do_mul(a10, xh, h, yl, h)       /* pp 12 */
1009
        add     xl, xl, a10
1010
        bgeu    xl, a10, 1f
1011
        addi    a9, a9, 1
1012
1:
1013
        /* Add pp13-14 into a11 with carry-outs in a10.  */
1014
        do_mul(a11, xh, l, yh, h)       /* pp 13 */
1015
        do_mul(a7, xh, h, yh, l)        /* pp 14 */
1016
        movi    a10, 0
1017
        add     a11, a11, a7
1018
        bgeu    a11, a7, 1f
1019
        addi    a10, a10, 1
1020
1:
1021
        /* Shift a10/a11 into position, and add low half of a11 to a6.  */
1022
        src     a10, a10, a11
1023
        add     a10, a10, a9
1024
        sll     a11, a11
1025
        add     xl, xl, a11
1026
        bgeu    xl, a11, 1f
1027
        addi    a10, a10, 1
1028
1:
1029
        /* Compute xh.  */
1030
        do_mul(xh, xh, h, yh, h)        /* pp 15 */
1031
        add     xh, xh, a10
1032
 
1033
        /* Restore values saved on the stack during the multiplication.  */
1034
        l32i    a7, sp, 4
1035
#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
1036
        l32i    a0, sp, 0
1037
        l32i    a8, sp, 8
1038
#endif
1039
#endif
1040
 
1041
        /* Shift left by 12 bits, unless there was a carry-out from the
1042
           multiply, in which case, shift by 11 bits and increment the
1043
           exponent.  Note: It is convenient to use the constant 0x3ff
1044
           instead of 0x400 when removing the extra exponent bias (so that
1045
           it is easy to construct 0x7fe for the overflow check).  Reverse
1046
           the logic here to decrement the exponent sum by one unless there
1047
           was a carry-out.  */
1048
        movi    a4, 11
1049
        srli    a5, xh, 21 - 12
1050
        bnez    a5, 1f
1051
        addi    a4, a4, 1
1052
        addi    a8, a8, -1
1053
1:      ssl     a4
1054
        src     xh, xh, xl
1055
        src     xl, xl, a6
1056
        sll     a6, a6
1057
 
1058
        /* Subtract the extra bias from the exponent sum (plus one to account
1059
           for the explicit "1.0" of the mantissa that will be added to the
1060
           exponent in the final result).  */
1061
        movi    a4, 0x3ff
1062
        sub     a8, a8, a4
1063
 
1064
        /* Check for over/underflow.  The value in a8 is one less than the
1065
           final exponent, so values in the range 0..7fd are OK here.  */
1066
        slli    a4, a4, 1       /* 0x7fe */
1067
        bgeu    a8, a4, .Lmul_overflow
1068
 
1069
.Lmul_round:
1070
        /* Round.  */
1071
        bgez    a6, .Lmul_rounded
1072
        addi    xl, xl, 1
1073
        beqz    xl, .Lmul_roundcarry
1074
        slli    a6, a6, 1
1075
        beqz    a6, .Lmul_exactlyhalf
1076
 
1077
.Lmul_rounded:
1078
        /* Add the exponent to the mantissa.  */
1079
        slli    a8, a8, 20
1080
        add     xh, xh, a8
1081
 
1082
.Lmul_addsign:
1083
        /* Add the sign bit.  */
1084
        srli    a7, a7, 31
1085
        slli    a7, a7, 31
1086
        or      xh, xh, a7
1087
 
1088
.Lmul_done:
1089
#if __XTENSA_CALL0_ABI__
1090
        l32i    a12, sp, 16
1091
        l32i    a13, sp, 20
1092
        l32i    a14, sp, 24
1093
        l32i    a15, sp, 28
1094
        addi    sp, sp, 32
1095
#endif
1096
        leaf_return
1097
 
1098
.Lmul_exactlyhalf:
1099
        /* Round down to the nearest even value.  */
1100
        srli    xl, xl, 1
1101
        slli    xl, xl, 1
1102
        j       .Lmul_rounded
1103
 
1104
.Lmul_roundcarry:
1105
        /* xl is always zero when the rounding increment overflows, so
1106
           there's no need to round it to an even value.  */
1107
        addi    xh, xh, 1
1108
        /* Overflow is OK -- it will be added to the exponent.  */
1109
        j       .Lmul_rounded
1110
 
1111
.Lmul_overflow:
1112
        bltz    a8, .Lmul_underflow
1113
        /* Return +/- Infinity.  */
1114
        addi    a8, a4, 1       /* 0x7ff */
1115
        slli    xh, a8, 20
1116
        movi    xl, 0
1117
        j       .Lmul_addsign
1118
 
1119
.Lmul_underflow:
1120
        /* Create a subnormal value, where the exponent field contains zero,
1121
           but the effective exponent is 1.  The value of a8 is one less than
1122
           the actual exponent, so just negate it to get the shift amount.  */
1123
        neg     a8, a8
1124
        mov     a9, a6
1125
        ssr     a8
1126
        bgeui   a8, 32, .Lmul_bigshift
1127
 
1128
        /* Shift xh/xl right.  Any bits that are shifted out of xl are saved
1129
           in a6 (combined with the shifted-out bits currently in a6) for
1130
           rounding the result.  */
1131
        sll     a6, xl
1132
        src     xl, xh, xl
1133
        srl     xh, xh
1134
        j       1f
1135
 
1136
.Lmul_bigshift:
1137
        bgeui   a8, 64, .Lmul_flush_to_zero
1138
        sll     a10, xl         /* lost bits shifted out of xl */
1139
        src     a6, xh, xl
1140
        srl     xl, xh
1141
        movi    xh, 0
1142
        or      a9, a9, a10
1143
 
1144
        /* Set the exponent to zero.  */
1145
1:      movi    a8, 0
1146
 
1147
        /* Pack any nonzero bits shifted out into a6.  */
1148
        beqz    a9, .Lmul_round
1149
        movi    a9, 1
1150
        or      a6, a6, a9
1151
        j       .Lmul_round
1152
 
1153
.Lmul_flush_to_zero:
1154
        /* Return zero with the appropriate sign bit.  */
1155
        srli    xh, a7, 31
1156
        slli    xh, xh, 31
1157
        movi    xl, 0
1158
        j       .Lmul_done
1159
 
1160
#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
1161
 
1162
        /* For Xtensa processors with no multiply hardware, this simplified
1163
           version of _mulsi3 is used for multiplying 16-bit chunks of
1164
           the floating-point mantissas.  It uses a custom ABI: the inputs
1165
           are passed in a13 and a14, the result is returned in a12, and
1166
           a8 and a15 are clobbered.  */
1167
        .align  4
1168
.Lmul_mulsi3:
1169
        movi    a12, 0
1170
.Lmul_mult_loop:
1171
        add     a15, a14, a12
1172
        extui   a8, a13, 0, 1
1173
        movnez  a12, a15, a8
1174
 
1175
        do_addx2 a15, a14, a12, a15
1176
        extui   a8, a13, 1, 1
1177
        movnez  a12, a15, a8
1178
 
1179
        do_addx4 a15, a14, a12, a15
1180
        extui   a8, a13, 2, 1
1181
        movnez  a12, a15, a8
1182
 
1183
        do_addx8 a15, a14, a12, a15
1184
        extui   a8, a13, 3, 1
1185
        movnez  a12, a15, a8
1186
 
1187
        srli    a13, a13, 4
1188
        slli    a14, a14, 4
1189
        bnez    a13, .Lmul_mult_loop
1190
        ret
1191
#endif /* !MUL16 && !MUL32 && !MAC16 */
1192
#endif /* L_muldf3 */
1193
 
1194
#ifdef L_divdf3
1195
 
1196
        /* Division */
1197
__divdf3_aux:
1198
 
1199
        /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
1200
           (This code is placed before the start of the function just to
1201
           keep it in range of the limited branch displacements.)  */
1202
 
1203
.Ldiv_yexpzero:
1204
        /* Clear the sign bit of y.  */
1205
        slli    yh, yh, 1
1206
        srli    yh, yh, 1
1207
 
1208
        /* Check for division by zero.  */
1209
        or      a10, yh, yl
1210
        beqz    a10, .Ldiv_yzero
1211
 
1212
        /* Normalize y.  Adjust the exponent in a9.  */
1213
        beqz    yh, .Ldiv_yh_zero
1214
        do_nsau a10, yh, a11, a9
1215
        addi    a10, a10, -11
1216
        ssl     a10
1217
        src     yh, yh, yl
1218
        sll     yl, yl
1219
        movi    a9, 1
1220
        sub     a9, a9, a10
1221
        j       .Ldiv_ynormalized
1222
.Ldiv_yh_zero:
1223
        do_nsau a10, yl, a11, a9
1224
        addi    a10, a10, -11
1225
        movi    a9, -31
1226
        sub     a9, a9, a10
1227
        ssl     a10
1228
        bltz    a10, .Ldiv_yl_srl
1229
        sll     yh, yl
1230
        movi    yl, 0
1231
        j       .Ldiv_ynormalized
1232
.Ldiv_yl_srl:
1233
        srl     yh, yl
1234
        sll     yl, yl
1235
        j       .Ldiv_ynormalized
1236
 
1237
.Ldiv_yzero:
1238
        /* y is zero.  Return NaN if x is also zero; otherwise, infinity.  */
1239
        slli    xh, xh, 1
1240
        srli    xh, xh, 1
1241
        or      xl, xl, xh
1242
        srli    xh, a7, 31
1243
        slli    xh, xh, 31
1244
        or      xh, xh, a6
1245
        bnez    xl, 1f
1246
        movi    a4, 0x80000     /* make it a quiet NaN */
1247
        or      xh, xh, a4
1248
1:      movi    xl, 0
1249
        leaf_return
1250
 
1251
.Ldiv_xexpzero:
1252
        /* Clear the sign bit of x.  */
1253
        slli    xh, xh, 1
1254
        srli    xh, xh, 1
1255
 
1256
        /* If x is zero, return zero.  */
1257
        or      a10, xh, xl
1258
        beqz    a10, .Ldiv_return_zero
1259
 
1260
        /* Normalize x.  Adjust the exponent in a8.  */
1261
        beqz    xh, .Ldiv_xh_zero
1262
        do_nsau a10, xh, a11, a8
1263
        addi    a10, a10, -11
1264
        ssl     a10
1265
        src     xh, xh, xl
1266
        sll     xl, xl
1267
        movi    a8, 1
1268
        sub     a8, a8, a10
1269
        j       .Ldiv_xnormalized
1270
.Ldiv_xh_zero:
1271
        do_nsau a10, xl, a11, a8
1272
        addi    a10, a10, -11
1273
        movi    a8, -31
1274
        sub     a8, a8, a10
1275
        ssl     a10
1276
        bltz    a10, .Ldiv_xl_srl
1277
        sll     xh, xl
1278
        movi    xl, 0
1279
        j       .Ldiv_xnormalized
1280
.Ldiv_xl_srl:
1281
        srl     xh, xl
1282
        sll     xl, xl
1283
        j       .Ldiv_xnormalized
1284
 
1285
.Ldiv_return_zero:
1286
        /* Return zero with the appropriate sign bit.  */
1287
        srli    xh, a7, 31
1288
        slli    xh, xh, 31
1289
        movi    xl, 0
1290
        leaf_return
1291
 
1292
.Ldiv_xnan_or_inf:
1293
        /* Set the sign bit of the result.  */
1294
        srli    a7, yh, 31
1295
        slli    a7, a7, 31
1296
        xor     xh, xh, a7
1297
        /* If y is NaN or Inf, return NaN.  */
1298
        bnall   yh, a6, 1f
1299
        movi    a4, 0x80000     /* make it a quiet NaN */
1300
        or      xh, xh, a4
1301
1:      leaf_return
1302
 
1303
.Ldiv_ynan_or_inf:
1304
        /* If y is Infinity, return zero.  */
1305
        slli    a8, yh, 12
1306
        or      a8, a8, yl
1307
        beqz    a8, .Ldiv_return_zero
1308
        /* y is NaN; return it.  */
1309
        mov     xh, yh
1310
        mov     xl, yl
1311
        leaf_return
1312
 
1313
.Ldiv_highequal1:
1314
        bltu    xl, yl, 2f
1315
        j       3f
1316
 
1317
        .align  4
1318
        .global __divdf3
1319
        .type   __divdf3, @function
1320
__divdf3:
1321
        leaf_entry sp, 16
1322
        movi    a6, 0x7ff00000
1323
 
1324
        /* Get the sign of the result.  */
1325
        xor     a7, xh, yh
1326
 
1327
        /* Check for NaN and infinity.  */
1328
        ball    xh, a6, .Ldiv_xnan_or_inf
1329
        ball    yh, a6, .Ldiv_ynan_or_inf
1330
 
1331
        /* Extract the exponents.  */
1332
        extui   a8, xh, 20, 11
1333
        extui   a9, yh, 20, 11
1334
 
1335
        beqz    a9, .Ldiv_yexpzero
1336
.Ldiv_ynormalized:
1337
        beqz    a8, .Ldiv_xexpzero
1338
.Ldiv_xnormalized:
1339
 
1340
        /* Subtract the exponents.  */
1341
        sub     a8, a8, a9
1342
 
1343
        /* Replace sign/exponent fields with explicit "1.0".  */
1344
        movi    a10, 0x1fffff
1345
        or      xh, xh, a6
1346
        and     xh, xh, a10
1347
        or      yh, yh, a6
1348
        and     yh, yh, a10
1349
 
1350
        /* Set SAR for left shift by one.  */
1351
        ssai    (32 - 1)
1352
 
1353
        /* The first digit of the mantissa division must be a one.
1354
           Shift x (and adjust the exponent) as needed to make this true.  */
1355
        bltu    yh, xh, 3f
1356
        beq     yh, xh, .Ldiv_highequal1
1357
2:      src     xh, xh, xl
1358
        sll     xl, xl
1359
        addi    a8, a8, -1
1360
3:
1361
        /* Do the first subtraction and shift.  */
1362
        sub     xh, xh, yh
1363
        bgeu    xl, yl, 1f
1364
        addi    xh, xh, -1
1365
1:      sub     xl, xl, yl
1366
        src     xh, xh, xl
1367
        sll     xl, xl
1368
 
1369
        /* Put the quotient into a10/a11.  */
1370
        movi    a10, 0
1371
        movi    a11, 1
1372
 
1373
        /* Divide one bit at a time for 52 bits.  */
1374
        movi    a9, 52
1375
#if XCHAL_HAVE_LOOPS
1376
        loop    a9, .Ldiv_loopend
1377
#endif
1378
.Ldiv_loop:
1379
        /* Shift the quotient << 1.  */
1380
        src     a10, a10, a11
1381
        sll     a11, a11
1382
 
1383
        /* Is this digit a 0 or 1?  */
1384
        bltu    xh, yh, 3f
1385
        beq     xh, yh, .Ldiv_highequal2
1386
 
1387
        /* Output a 1 and subtract.  */
1388
2:      addi    a11, a11, 1
1389
        sub     xh, xh, yh
1390
        bgeu    xl, yl, 1f
1391
        addi    xh, xh, -1
1392
1:      sub     xl, xl, yl
1393
 
1394
        /* Shift the dividend << 1.  */
1395
3:      src     xh, xh, xl
1396
        sll     xl, xl
1397
 
1398
#if !XCHAL_HAVE_LOOPS
1399
        addi    a9, a9, -1
1400
        bnez    a9, .Ldiv_loop
1401
#endif
1402
.Ldiv_loopend:
1403
 
1404
        /* Add the exponent bias (less one to account for the explicit "1.0"
1405
           of the mantissa that will be added to the exponent in the final
1406
           result).  */
1407
        movi    a9, 0x3fe
1408
        add     a8, a8, a9
1409
 
1410
        /* Check for over/underflow.  The value in a8 is one less than the
1411
           final exponent, so values in the range 0..7fd are OK here.  */
1412
        addmi   a9, a9, 0x400   /* 0x7fe */
1413
        bgeu    a8, a9, .Ldiv_overflow
1414
 
1415
.Ldiv_round:
1416
        /* Round.  The remainder (<< 1) is in xh/xl.  */
1417
        bltu    xh, yh, .Ldiv_rounded
1418
        beq     xh, yh, .Ldiv_highequal3
1419
.Ldiv_roundup:
1420
        addi    a11, a11, 1
1421
        beqz    a11, .Ldiv_roundcarry
1422
 
1423
.Ldiv_rounded:
1424
        mov     xl, a11
1425
        /* Add the exponent to the mantissa.  */
1426
        slli    a8, a8, 20
1427
        add     xh, a10, a8
1428
 
1429
.Ldiv_addsign:
1430
        /* Add the sign bit.  */
1431
        srli    a7, a7, 31
1432
        slli    a7, a7, 31
1433
        or      xh, xh, a7
1434
        leaf_return
1435
 
1436
.Ldiv_highequal2:
1437
        bgeu    xl, yl, 2b
1438
        j       3b
1439
 
1440
.Ldiv_highequal3:
1441
        bltu    xl, yl, .Ldiv_rounded
1442
        bne     xl, yl, .Ldiv_roundup
1443
 
1444
        /* Remainder is exactly half the divisor.  Round even.  */
1445
        addi    a11, a11, 1
1446
        beqz    a11, .Ldiv_roundcarry
1447
        srli    a11, a11, 1
1448
        slli    a11, a11, 1
1449
        j       .Ldiv_rounded
1450
 
1451
.Ldiv_overflow:
1452
        bltz    a8, .Ldiv_underflow
1453
        /* Return +/- Infinity.  */
1454
        addi    a8, a9, 1       /* 0x7ff */
1455
        slli    xh, a8, 20
1456
        movi    xl, 0
1457
        j       .Ldiv_addsign
1458
 
1459
.Ldiv_underflow:
1460
        /* Create a subnormal value, where the exponent field contains zero,
1461
           but the effective exponent is 1.  The value of a8 is one less than
1462
           the actual exponent, so just negate it to get the shift amount.  */
1463
        neg     a8, a8
1464
        ssr     a8
1465
        bgeui   a8, 32, .Ldiv_bigshift
1466
 
1467
        /* Shift a10/a11 right.  Any bits that are shifted out of a11 are
1468
           saved in a6 for rounding the result.  */
1469
        sll     a6, a11
1470
        src     a11, a10, a11
1471
        srl     a10, a10
1472
        j       1f
1473
 
1474
.Ldiv_bigshift:
1475
        bgeui   a8, 64, .Ldiv_flush_to_zero
1476
        sll     a9, a11         /* lost bits shifted out of a11 */
1477
        src     a6, a10, a11
1478
        srl     a11, a10
1479
        movi    a10, 0
1480
        or      xl, xl, a9
1481
 
1482
        /* Set the exponent to zero.  */
1483
1:      movi    a8, 0
1484
 
1485
        /* Pack any nonzero remainder (in xh/xl) into a6.  */
1486
        or      xh, xh, xl
1487
        beqz    xh, 1f
1488
        movi    a9, 1
1489
        or      a6, a6, a9
1490
 
1491
        /* Round a10/a11 based on the bits shifted out into a6.  */
1492
1:      bgez    a6, .Ldiv_rounded
1493
        addi    a11, a11, 1
1494
        beqz    a11, .Ldiv_roundcarry
1495
        slli    a6, a6, 1
1496
        bnez    a6, .Ldiv_rounded
1497
        srli    a11, a11, 1
1498
        slli    a11, a11, 1
1499
        j       .Ldiv_rounded
1500
 
1501
.Ldiv_roundcarry:
1502
        /* a11 is always zero when the rounding increment overflows, so
1503
           there's no need to round it to an even value.  */
1504
        addi    a10, a10, 1
1505
        /* Overflow to the exponent field is OK.  */
1506
        j       .Ldiv_rounded
1507
 
1508
.Ldiv_flush_to_zero:
1509
        /* Return zero with the appropriate sign bit.  */
1510
        srli    xh, a7, 31
1511
        slli    xh, xh, 31
1512
        movi    xl, 0
1513
        leaf_return
1514
 
1515
#endif /* L_divdf3 */
1516
 
1517
#ifdef L_cmpdf2
1518
 
1519
        /* Equal and Not Equal */
1520
 
1521
        .align  4
1522
        .global __eqdf2
1523
        .global __nedf2
1524
        .set    __nedf2, __eqdf2
1525
        .type   __eqdf2, @function
1526
__eqdf2:
1527
        leaf_entry sp, 16
1528
        bne     xl, yl, 2f
1529
        bne     xh, yh, 4f
1530
 
1531
        /* The values are equal but NaN != NaN.  Check the exponent.  */
1532
        movi    a6, 0x7ff00000
1533
        ball    xh, a6, 3f
1534
 
1535
        /* Equal.  */
1536
        movi    a2, 0
1537
        leaf_return
1538
 
1539
        /* Not equal.  */
1540
2:      movi    a2, 1
1541
        leaf_return
1542
 
1543
        /* Check if the mantissas are nonzero.  */
1544
3:      slli    a7, xh, 12
1545
        or      a7, a7, xl
1546
        j       5f
1547
 
1548
        /* Check if x and y are zero with different signs.  */
1549
4:      or      a7, xh, yh
1550
        slli    a7, a7, 1
1551
        or      a7, a7, xl      /* xl == yl here */
1552
 
1553
        /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
1554
           or x when exponent(x) = 0x7ff and x == y.  */
1555
5:      movi    a2, 0
1556
        movi    a3, 1
1557
        movnez  a2, a3, a7
1558
        leaf_return
1559
 
1560
 
1561
        /* Greater Than */
1562
 
1563
        .align  4
1564
        .global __gtdf2
1565
        .type   __gtdf2, @function
1566
__gtdf2:
1567
        leaf_entry sp, 16
1568
        movi    a6, 0x7ff00000
1569
        ball    xh, a6, 2f
1570
1:      bnall   yh, a6, .Lle_cmp
1571
 
1572
        /* Check if y is a NaN.  */
1573
        slli    a7, yh, 12
1574
        or      a7, a7, yl
1575
        beqz    a7, .Lle_cmp
1576
        movi    a2, 0
1577
        leaf_return
1578
 
1579
        /* Check if x is a NaN.  */
1580
2:      slli    a7, xh, 12
1581
        or      a7, a7, xl
1582
        beqz    a7, 1b
1583
        movi    a2, 0
1584
        leaf_return
1585
 
1586
 
1587
        /* Less Than or Equal */
1588
 
1589
        .align  4
1590
        .global __ledf2
1591
        .type   __ledf2, @function
1592
__ledf2:
1593
        leaf_entry sp, 16
1594
        movi    a6, 0x7ff00000
1595
        ball    xh, a6, 2f
1596
1:      bnall   yh, a6, .Lle_cmp
1597
 
1598
        /* Check if y is a NaN.  */
1599
        slli    a7, yh, 12
1600
        or      a7, a7, yl
1601
        beqz    a7, .Lle_cmp
1602
        movi    a2, 1
1603
        leaf_return
1604
 
1605
        /* Check if x is a NaN.  */
1606
2:      slli    a7, xh, 12
1607
        or      a7, a7, xl
1608
        beqz    a7, 1b
1609
        movi    a2, 1
1610
        leaf_return
1611
 
1612
.Lle_cmp:
1613
        /* Check if x and y have different signs.  */
1614
        xor     a7, xh, yh
1615
        bltz    a7, .Lle_diff_signs
1616
 
1617
        /* Check if x is negative.  */
1618
        bltz    xh, .Lle_xneg
1619
 
1620
        /* Check if x <= y.  */
1621
        bltu    xh, yh, 4f
1622
        bne     xh, yh, 5f
1623
        bltu    yl, xl, 5f
1624
4:      movi    a2, 0
1625
        leaf_return
1626
 
1627
.Lle_xneg:
1628
        /* Check if y <= x.  */
1629
        bltu    yh, xh, 4b
1630
        bne     yh, xh, 5f
1631
        bgeu    xl, yl, 4b
1632
5:      movi    a2, 1
1633
        leaf_return
1634
 
1635
.Lle_diff_signs:
1636
        bltz    xh, 4b
1637
 
1638
        /* Check if both x and y are zero.  */
1639
        or      a7, xh, yh
1640
        slli    a7, a7, 1
1641
        or      a7, a7, xl
1642
        or      a7, a7, yl
1643
        movi    a2, 1
1644
        movi    a3, 0
1645
        moveqz  a2, a3, a7
1646
        leaf_return
1647
 
1648
 
1649
        /* Greater Than or Equal */
1650
 
1651
        .align  4
1652
        .global __gedf2
1653
        .type   __gedf2, @function
1654
__gedf2:
1655
        leaf_entry sp, 16
1656
        movi    a6, 0x7ff00000
1657
        ball    xh, a6, 2f
1658
1:      bnall   yh, a6, .Llt_cmp
1659
 
1660
        /* Check if y is a NaN.  */
1661
        slli    a7, yh, 12
1662
        or      a7, a7, yl
1663
        beqz    a7, .Llt_cmp
1664
        movi    a2, -1
1665
        leaf_return
1666
 
1667
        /* Check if x is a NaN.  */
1668
2:      slli    a7, xh, 12
1669
        or      a7, a7, xl
1670
        beqz    a7, 1b
1671
        movi    a2, -1
1672
        leaf_return
1673
 
1674
 
1675
        /* Less Than */
1676
 
1677
        .align  4
1678
        .global __ltdf2
1679
        .type   __ltdf2, @function
1680
__ltdf2:
1681
        leaf_entry sp, 16
1682
        movi    a6, 0x7ff00000
1683
        ball    xh, a6, 2f
1684
1:      bnall   yh, a6, .Llt_cmp
1685
 
1686
        /* Check if y is a NaN.  */
1687
        slli    a7, yh, 12
1688
        or      a7, a7, yl
1689
        beqz    a7, .Llt_cmp
1690
        movi    a2, 0
1691
        leaf_return
1692
 
1693
        /* Check if x is a NaN.  */
1694
2:      slli    a7, xh, 12
1695
        or      a7, a7, xl
1696
        beqz    a7, 1b
1697
        movi    a2, 0
1698
        leaf_return
1699
 
1700
.Llt_cmp:
1701
        /* Check if x and y have different signs.  */
1702
        xor     a7, xh, yh
1703
        bltz    a7, .Llt_diff_signs
1704
 
1705
        /* Check if x is negative.  */
1706
        bltz    xh, .Llt_xneg
1707
 
1708
        /* Check if x < y.  */
1709
        bltu    xh, yh, 4f
1710
        bne     xh, yh, 5f
1711
        bgeu    xl, yl, 5f
1712
4:      movi    a2, -1
1713
        leaf_return
1714
 
1715
.Llt_xneg:
1716
        /* Check if y < x.  */
1717
        bltu    yh, xh, 4b
1718
        bne     yh, xh, 5f
1719
        bltu    yl, xl, 4b
1720
5:      movi    a2, 0
1721
        leaf_return
1722
 
1723
.Llt_diff_signs:
1724
        bgez    xh, 5b
1725
 
1726
        /* Check if both x and y are nonzero.  */
1727
        or      a7, xh, yh
1728
        slli    a7, a7, 1
1729
        or      a7, a7, xl
1730
        or      a7, a7, yl
1731
        movi    a2, 0
1732
        movi    a3, -1
1733
        movnez  a2, a3, a7
1734
        leaf_return
1735
 
1736
 
1737
        /* Unordered */
1738
 
1739
        .align  4
1740
        .global __unorddf2
1741
        .type   __unorddf2, @function
1742
__unorddf2:
1743
        leaf_entry sp, 16
1744
        movi    a6, 0x7ff00000
1745
        ball    xh, a6, 3f
1746
1:      ball    yh, a6, 4f
1747
2:      movi    a2, 0
1748
        leaf_return
1749
 
1750
3:      slli    a7, xh, 12
1751
        or      a7, a7, xl
1752
        beqz    a7, 1b
1753
        movi    a2, 1
1754
        leaf_return
1755
 
1756
4:      slli    a7, yh, 12
1757
        or      a7, a7, yl
1758
        beqz    a7, 2b
1759
        movi    a2, 1
1760
        leaf_return
1761
 
1762
#endif /* L_cmpdf2 */
1763
 
1764
#ifdef L_fixdfsi
1765
 
1766
        .align  4
1767
        .global __fixdfsi
1768
        .type   __fixdfsi, @function
1769
__fixdfsi:
1770
        leaf_entry sp, 16
1771
 
1772
        /* Check for NaN and Infinity.  */
1773
        movi    a6, 0x7ff00000
1774
        ball    xh, a6, .Lfixdfsi_nan_or_inf
1775
 
1776
        /* Extract the exponent and check if 0 < (exp - 0x3fe) < 32.  */
1777
        extui   a4, xh, 20, 11
1778
        extui   a5, a6, 19, 10  /* 0x3fe */
1779
        sub     a4, a4, a5
1780
        bgei    a4, 32, .Lfixdfsi_maxint
1781
        blti    a4, 1, .Lfixdfsi_zero
1782
 
1783
        /* Add explicit "1.0" and shift << 11.  */
1784
        or      a7, xh, a6
1785
        ssai    (32 - 11)
1786
        src     a5, a7, xl
1787
 
1788
        /* Shift back to the right, based on the exponent.  */
1789
        ssl     a4              /* shift by 32 - a4 */
1790
        srl     a5, a5
1791
 
1792
        /* Negate the result if sign != 0.  */
1793
        neg     a2, a5
1794
        movgez  a2, a5, a7
1795
        leaf_return
1796
 
1797
.Lfixdfsi_nan_or_inf:
1798
        /* Handle Infinity and NaN.  */
1799
        slli    a4, xh, 12
1800
        or      a4, a4, xl
1801
        beqz    a4, .Lfixdfsi_maxint
1802
 
1803
        /* Translate NaN to +maxint.  */
1804
        movi    xh, 0
1805
 
1806
.Lfixdfsi_maxint:
1807
        slli    a4, a6, 11      /* 0x80000000 */
1808
        addi    a5, a4, -1      /* 0x7fffffff */
1809
        movgez  a4, a5, xh
1810
        mov     a2, a4
1811
        leaf_return
1812
 
1813
.Lfixdfsi_zero:
1814
        movi    a2, 0
1815
        leaf_return
1816
 
1817
#endif /* L_fixdfsi */
1818
 
1819
#ifdef L_fixdfdi
1820
 
1821
        .align  4
1822
        .global __fixdfdi
1823
        .type   __fixdfdi, @function
1824
__fixdfdi:
1825
        leaf_entry sp, 16
1826
 
1827
        /* Check for NaN and Infinity.  */
1828
        movi    a6, 0x7ff00000
1829
        ball    xh, a6, .Lfixdfdi_nan_or_inf
1830
 
1831
        /* Extract the exponent and check if 0 < (exp - 0x3fe) < 64.  */
1832
        extui   a4, xh, 20, 11
1833
        extui   a5, a6, 19, 10  /* 0x3fe */
1834
        sub     a4, a4, a5
1835
        bgei    a4, 64, .Lfixdfdi_maxint
1836
        blti    a4, 1, .Lfixdfdi_zero
1837
 
1838
        /* Add explicit "1.0" and shift << 11.  */
1839
        or      a7, xh, a6
1840
        ssai    (32 - 11)
1841
        src     xh, a7, xl
1842
        sll     xl, xl
1843
 
1844
        /* Shift back to the right, based on the exponent.  */
1845
        ssl     a4              /* shift by 64 - a4 */
1846
        bgei    a4, 32, .Lfixdfdi_smallshift
1847
        srl     xl, xh
1848
        movi    xh, 0
1849
 
1850
.Lfixdfdi_shifted:
1851
        /* Negate the result if sign != 0.  */
1852
        bgez    a7, 1f
1853
        neg     xl, xl
1854
        neg     xh, xh
1855
        beqz    xl, 1f
1856
        addi    xh, xh, -1
1857
1:      leaf_return
1858
 
1859
.Lfixdfdi_smallshift:
1860
        src     xl, xh, xl
1861
        srl     xh, xh
1862
        j       .Lfixdfdi_shifted
1863
 
1864
.Lfixdfdi_nan_or_inf:
1865
        /* Handle Infinity and NaN.  */
1866
        slli    a4, xh, 12
1867
        or      a4, a4, xl
1868
        beqz    a4, .Lfixdfdi_maxint
1869
 
1870
        /* Translate NaN to +maxint.  */
1871
        movi    xh, 0
1872
 
1873
.Lfixdfdi_maxint:
1874
        slli    a7, a6, 11      /* 0x80000000 */
1875
        bgez    xh, 1f
1876
        mov     xh, a7
1877
        movi    xl, 0
1878
        leaf_return
1879
 
1880
1:      addi    xh, a7, -1      /* 0x7fffffff */
1881
        movi    xl, -1
1882
        leaf_return
1883
 
1884
.Lfixdfdi_zero:
1885
        movi    xh, 0
1886
        movi    xl, 0
1887
        leaf_return
1888
 
1889
#endif /* L_fixdfdi */
1890
 
1891
#ifdef L_fixunsdfsi
1892
 
1893
        .align  4
1894
        .global __fixunsdfsi
1895
        .type   __fixunsdfsi, @function
1896
__fixunsdfsi:
1897
        leaf_entry sp, 16
1898
 
1899
        /* Check for NaN and Infinity.  */
1900
        movi    a6, 0x7ff00000
1901
        ball    xh, a6, .Lfixunsdfsi_nan_or_inf
1902
 
1903
        /* Extract the exponent and check if 0 <= (exp - 0x3ff) < 32.  */
1904
        extui   a4, xh, 20, 11
1905
        extui   a5, a6, 20, 10  /* 0x3ff */
1906
        sub     a4, a4, a5
1907
        bgei    a4, 32, .Lfixunsdfsi_maxint
1908
        bltz    a4, .Lfixunsdfsi_zero
1909
 
1910
        /* Add explicit "1.0" and shift << 11.  */
1911
        or      a7, xh, a6
1912
        ssai    (32 - 11)
1913
        src     a5, a7, xl
1914
 
1915
        /* Shift back to the right, based on the exponent.  */
1916
        addi    a4, a4, 1
1917
        beqi    a4, 32, .Lfixunsdfsi_bigexp
1918
        ssl     a4              /* shift by 32 - a4 */
1919
        srl     a5, a5
1920
 
1921
        /* Negate the result if sign != 0.  */
1922
        neg     a2, a5
1923
        movgez  a2, a5, a7
1924
        leaf_return
1925
 
1926
.Lfixunsdfsi_nan_or_inf:
1927
        /* Handle Infinity and NaN.  */
1928
        slli    a4, xh, 12
1929
        or      a4, a4, xl
1930
        beqz    a4, .Lfixunsdfsi_maxint
1931
 
1932
        /* Translate NaN to 0xffffffff.  */
1933
        movi    a2, -1
1934
        leaf_return
1935
 
1936
.Lfixunsdfsi_maxint:
1937
        slli    a4, a6, 11      /* 0x80000000 */
1938
        movi    a5, -1          /* 0xffffffff */
1939
        movgez  a4, a5, xh
1940
        mov     a2, a4
1941
        leaf_return
1942
 
1943
.Lfixunsdfsi_zero:
1944
        movi    a2, 0
1945
        leaf_return
1946
 
1947
.Lfixunsdfsi_bigexp:
1948
        /* Handle unsigned maximum exponent case.  */
1949
        bltz    xh, 1f
1950
        mov     a2, a5          /* no shift needed */
1951
        leaf_return
1952
 
1953
        /* Return 0x80000000 if negative.  */
1954
1:      slli    a2, a6, 11
1955
        leaf_return
1956
 
1957
#endif /* L_fixunsdfsi */
1958
 
1959
#ifdef L_fixunsdfdi
1960
 
1961
        .align  4
1962
        .global __fixunsdfdi
1963
        .type   __fixunsdfdi, @function
1964
__fixunsdfdi:
1965
        leaf_entry sp, 16
1966
 
1967
        /* Check for NaN and Infinity.  */
1968
        movi    a6, 0x7ff00000
1969
        ball    xh, a6, .Lfixunsdfdi_nan_or_inf
1970
 
1971
        /* Extract the exponent and check if 0 <= (exp - 0x3ff) < 64.  */
1972
        extui   a4, xh, 20, 11
1973
        extui   a5, a6, 20, 10  /* 0x3ff */
1974
        sub     a4, a4, a5
1975
        bgei    a4, 64, .Lfixunsdfdi_maxint
1976
        bltz    a4, .Lfixunsdfdi_zero
1977
 
1978
        /* Add explicit "1.0" and shift << 11.  */
1979
        or      a7, xh, a6
1980
        ssai    (32 - 11)
1981
        src     xh, a7, xl
1982
        sll     xl, xl
1983
 
1984
        /* Shift back to the right, based on the exponent.  */
1985
        addi    a4, a4, 1
1986
        beqi    a4, 64, .Lfixunsdfdi_bigexp
1987
        ssl     a4              /* shift by 64 - a4 */
1988
        bgei    a4, 32, .Lfixunsdfdi_smallshift
1989
        srl     xl, xh
1990
        movi    xh, 0
1991
 
1992
.Lfixunsdfdi_shifted:
1993
        /* Negate the result if sign != 0.  */
1994
        bgez    a7, 1f
1995
        neg     xl, xl
1996
        neg     xh, xh
1997
        beqz    xl, 1f
1998
        addi    xh, xh, -1
1999
1:      leaf_return
2000
 
2001
.Lfixunsdfdi_smallshift:
2002
        src     xl, xh, xl
2003
        srl     xh, xh
2004
        j       .Lfixunsdfdi_shifted
2005
 
2006
.Lfixunsdfdi_nan_or_inf:
2007
        /* Handle Infinity and NaN.  */
2008
        slli    a4, xh, 12
2009
        or      a4, a4, xl
2010
        beqz    a4, .Lfixunsdfdi_maxint
2011
 
2012
        /* Translate NaN to 0xffffffff.... */
2013
1:      movi    xh, -1
2014
        movi    xl, -1
2015
        leaf_return
2016
 
2017
.Lfixunsdfdi_maxint:
2018
        bgez    xh, 1b
2019
2:      slli    xh, a6, 11      /* 0x80000000 */
2020
        movi    xl, 0
2021
        leaf_return
2022
 
2023
.Lfixunsdfdi_zero:
2024
        movi    xh, 0
2025
        movi    xl, 0
2026
        leaf_return
2027
 
2028
.Lfixunsdfdi_bigexp:
2029
        /* Handle unsigned maximum exponent case.  */
2030
        bltz    a7, 2b
2031
        leaf_return             /* no shift needed */
2032
 
2033
#endif /* L_fixunsdfdi */
2034
 
2035
#ifdef L_floatsidf
2036
 
2037
        .align  4
2038
        .global __floatunsidf
2039
        .type   __floatunsidf, @function
2040
__floatunsidf:
2041
        leaf_entry sp, 16
2042
        beqz    a2, .Lfloatsidf_return_zero
2043
 
2044
        /* Set the sign to zero and jump to the floatsidf code.  */
2045
        movi    a7, 0
2046
        j       .Lfloatsidf_normalize
2047
 
2048
        .align  4
2049
        .global __floatsidf
2050
        .type   __floatsidf, @function
2051
__floatsidf:
2052
        leaf_entry sp, 16
2053
 
2054
        /* Check for zero.  */
2055
        beqz    a2, .Lfloatsidf_return_zero
2056
 
2057
        /* Save the sign.  */
2058
        extui   a7, a2, 31, 1
2059
 
2060
        /* Get the absolute value.  */
2061
#if XCHAL_HAVE_ABS
2062
        abs     a2, a2
2063
#else
2064
        neg     a4, a2
2065
        movltz  a2, a4, a2
2066
#endif
2067
 
2068
.Lfloatsidf_normalize:
2069
        /* Normalize with the first 1 bit in the msb.  */
2070
        do_nsau a4, a2, a5, a6
2071
        ssl     a4
2072
        sll     a5, a2
2073
 
2074
        /* Shift the mantissa into position.  */
2075
        srli    xh, a5, 11
2076
        slli    xl, a5, (32 - 11)
2077
 
2078
        /* Set the exponent.  */
2079
        movi    a5, 0x41d       /* 0x3fe + 31 */
2080
        sub     a5, a5, a4
2081
        slli    a5, a5, 20
2082
        add     xh, xh, a5
2083
 
2084
        /* Add the sign and return. */
2085
        slli    a7, a7, 31
2086
        or      xh, xh, a7
2087
        leaf_return
2088
 
2089
.Lfloatsidf_return_zero:
2090
        movi    a3, 0
2091
        leaf_return
2092
 
2093
#endif /* L_floatsidf */
2094
 
2095
#ifdef L_floatdidf
2096
 
2097
        .align  4
2098
        .global __floatundidf
2099
        .type   __floatundidf, @function
2100
__floatundidf:
2101
        leaf_entry sp, 16
2102
 
2103
        /* Check for zero.  */
2104
        or      a4, xh, xl
2105
        beqz    a4, 2f
2106
 
2107
        /* Set the sign to zero and jump to the floatdidf code.  */
2108
        movi    a7, 0
2109
        j       .Lfloatdidf_normalize
2110
 
2111
        .align  4
2112
        .global __floatdidf
2113
        .type   __floatdidf, @function
2114
__floatdidf:
2115
        leaf_entry sp, 16
2116
 
2117
        /* Check for zero.  */
2118
        or      a4, xh, xl
2119
        beqz    a4, 2f
2120
 
2121
        /* Save the sign.  */
2122
        extui   a7, xh, 31, 1
2123
 
2124
        /* Get the absolute value.  */
2125
        bgez    xh, .Lfloatdidf_normalize
2126
        neg     xl, xl
2127
        neg     xh, xh
2128
        beqz    xl, .Lfloatdidf_normalize
2129
        addi    xh, xh, -1
2130
 
2131
.Lfloatdidf_normalize:
2132
        /* Normalize with the first 1 bit in the msb of xh.  */
2133
        beqz    xh, .Lfloatdidf_bigshift
2134
        do_nsau a4, xh, a5, a6
2135
        ssl     a4
2136
        src     xh, xh, xl
2137
        sll     xl, xl
2138
 
2139
.Lfloatdidf_shifted:
2140
        /* Shift the mantissa into position, with rounding bits in a6.  */
2141
        ssai    11
2142
        sll     a6, xl
2143
        src     xl, xh, xl
2144
        srl     xh, xh
2145
 
2146
        /* Set the exponent.  */
2147
        movi    a5, 0x43d       /* 0x3fe + 63 */
2148
        sub     a5, a5, a4
2149
        slli    a5, a5, 20
2150
        add     xh, xh, a5
2151
 
2152
        /* Add the sign.  */
2153
        slli    a7, a7, 31
2154
        or      xh, xh, a7
2155
 
2156
        /* Round up if the leftover fraction is >= 1/2.  */
2157
        bgez    a6, 2f
2158
        addi    xl, xl, 1
2159
        beqz    xl, .Lfloatdidf_roundcarry
2160
 
2161
        /* Check if the leftover fraction is exactly 1/2.  */
2162
        slli    a6, a6, 1
2163
        beqz    a6, .Lfloatdidf_exactlyhalf
2164
2:      leaf_return
2165
 
2166
.Lfloatdidf_bigshift:
2167
        /* xh is zero.  Normalize with first 1 bit of xl in the msb of xh.  */
2168
        do_nsau a4, xl, a5, a6
2169
        ssl     a4
2170
        sll     xh, xl
2171
        movi    xl, 0
2172
        addi    a4, a4, 32
2173
        j       .Lfloatdidf_shifted
2174
 
2175
.Lfloatdidf_exactlyhalf:
2176
        /* Round down to the nearest even value.  */
2177
        srli    xl, xl, 1
2178
        slli    xl, xl, 1
2179
        leaf_return
2180
 
2181
.Lfloatdidf_roundcarry:
2182
        /* xl is always zero when the rounding increment overflows, so
2183
           there's no need to round it to an even value.  */
2184
        addi    xh, xh, 1
2185
        /* Overflow to the exponent is OK.  */
2186
        leaf_return
2187
 
2188
#endif /* L_floatdidf */
2189
 
2190
#ifdef L_truncdfsf2
2191
 
2192
        .align  4
2193
        .global __truncdfsf2
2194
        .type   __truncdfsf2, @function
2195
__truncdfsf2:
2196
        leaf_entry sp, 16
2197
 
2198
        /* Adjust the exponent bias.  */
2199
        movi    a4, (0x3ff - 0x7f) << 20
2200
        sub     a5, xh, a4
2201
 
2202
        /* Check for underflow.  */
2203
        xor     a6, xh, a5
2204
        bltz    a6, .Ltrunc_underflow
2205
        extui   a6, a5, 20, 11
2206
        beqz    a6, .Ltrunc_underflow
2207
 
2208
        /* Check for overflow.  */
2209
        movi    a4, 255
2210
        bge     a6, a4, .Ltrunc_overflow
2211
 
2212
        /* Shift a5/xl << 3 into a5/a4.  */
2213
        ssai    (32 - 3)
2214
        src     a5, a5, xl
2215
        sll     a4, xl
2216
 
2217
.Ltrunc_addsign:
2218
        /* Add the sign bit.  */
2219
        extui   a6, xh, 31, 1
2220
        slli    a6, a6, 31
2221
        or      a2, a6, a5
2222
 
2223
        /* Round up if the leftover fraction is >= 1/2.  */
2224
        bgez    a4, 1f
2225
        addi    a2, a2, 1
2226
        /* Overflow to the exponent is OK.  The answer will be correct.  */
2227
 
2228
        /* Check if the leftover fraction is exactly 1/2.  */
2229
        slli    a4, a4, 1
2230
        beqz    a4, .Ltrunc_exactlyhalf
2231
1:      leaf_return
2232
 
2233
.Ltrunc_exactlyhalf:
2234
        /* Round down to the nearest even value.  */
2235
        srli    a2, a2, 1
2236
        slli    a2, a2, 1
2237
        leaf_return
2238
 
2239
.Ltrunc_overflow:
2240
        /* Check if exponent == 0x7ff.  */
2241
        movi    a4, 0x7ff00000
2242
        bnall   xh, a4, 1f
2243
 
2244
        /* Check if mantissa is nonzero.  */
2245
        slli    a5, xh, 12
2246
        or      a5, a5, xl
2247
        beqz    a5, 1f
2248
 
2249
        /* Shift a4 to set a bit in the mantissa, making a quiet NaN.  */
2250
        srli    a4, a4, 1
2251
 
2252
1:      slli    a4, a4, 4       /* 0xff000000 or 0xff800000 */
2253
        /* Add the sign bit.  */
2254
        extui   a6, xh, 31, 1
2255
        ssai    1
2256
        src     a2, a6, a4
2257
        leaf_return
2258
 
2259
.Ltrunc_underflow:
2260
        /* Find shift count for a subnormal.  Flush to zero if >= 32.  */
2261
        extui   a6, xh, 20, 11
2262
        movi    a5, 0x3ff - 0x7f
2263
        sub     a6, a5, a6
2264
        addi    a6, a6, 1
2265
        bgeui   a6, 32, 1f
2266
 
2267
        /* Replace the exponent with an explicit "1.0".  */
2268
        slli    a5, a5, 13      /* 0x700000 */
2269
        or      a5, a5, xh
2270
        slli    a5, a5, 11
2271
        srli    a5, a5, 11
2272
 
2273
        /* Shift the mantissa left by 3 bits (into a5/a4).  */
2274
        ssai    (32 - 3)
2275
        src     a5, a5, xl
2276
        sll     a4, xl
2277
 
2278
        /* Shift right by a6.  */
2279
        ssr     a6
2280
        sll     a7, a4
2281
        src     a4, a5, a4
2282
        srl     a5, a5
2283
        beqz    a7, .Ltrunc_addsign
2284
        or      a4, a4, a6      /* any positive, nonzero value will work */
2285
        j       .Ltrunc_addsign
2286
 
2287
        /* Return +/- zero.  */
2288
1:      extui   a2, xh, 31, 1
2289
        slli    a2, a2, 31
2290
        leaf_return
2291
 
2292
#endif /* L_truncdfsf2 */
2293
 
2294
#ifdef L_extendsfdf2
2295
 
2296
        .align  4
2297
        .global __extendsfdf2
2298
        .type   __extendsfdf2, @function
2299
__extendsfdf2:
2300
        leaf_entry sp, 16
2301
 
2302
        /* Save the sign bit and then shift it off.  */
2303
        extui   a5, a2, 31, 1
2304
        slli    a5, a5, 31
2305
        slli    a4, a2, 1
2306
 
2307
        /* Extract and check the exponent.  */
2308
        extui   a6, a2, 23, 8
2309
        beqz    a6, .Lextend_expzero
2310
        addi    a6, a6, 1
2311
        beqi    a6, 256, .Lextend_nan_or_inf
2312
 
2313
        /* Shift >> 3 into a4/xl.  */
2314
        srli    a4, a4, 4
2315
        slli    xl, a2, (32 - 3)
2316
 
2317
        /* Adjust the exponent bias.  */
2318
        movi    a6, (0x3ff - 0x7f) << 20
2319
        add     a4, a4, a6
2320
 
2321
        /* Add the sign bit.  */
2322
        or      xh, a4, a5
2323
        leaf_return
2324
 
2325
.Lextend_nan_or_inf:
2326
        movi    a4, 0x7ff00000
2327
 
2328
        /* Check for NaN.  */
2329
        slli    a7, a2, 9
2330
        beqz    a7, 1f
2331
 
2332
        slli    a6, a6, 11      /* 0x80000 */
2333
        or      a4, a4, a6
2334
 
2335
        /* Add the sign and return.  */
2336
1:      or      xh, a4, a5
2337
        movi    xl, 0
2338
        leaf_return
2339
 
2340
.Lextend_expzero:
2341
        beqz    a4, 1b
2342
 
2343
        /* Normalize it to have 8 zero bits before the first 1 bit.  */
2344
        do_nsau a7, a4, a2, a3
2345
        addi    a7, a7, -8
2346
        ssl     a7
2347
        sll     a4, a4
2348
 
2349
        /* Shift >> 3 into a4/xl.  */
2350
        slli    xl, a4, (32 - 3)
2351
        srli    a4, a4, 3
2352
 
2353
        /* Set the exponent.  */
2354
        movi    a6, 0x3fe - 0x7f
2355
        sub     a6, a6, a7
2356
        slli    a6, a6, 20
2357
        add     a4, a4, a6
2358
 
2359
        /* Add the sign and return.  */
2360
        or      xh, a4, a5
2361
        leaf_return
2362
 
2363
#endif /* L_extendsfdf2 */
2364
 
2365
 

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.