OpenCores
URL https://opencores.org/ocsvn/openrisc_me/openrisc_me/trunk

Subversion Repositories openrisc_me

[/] [openrisc/] [trunk/] [gnu-src/] [gcc-4.5.1/] [gcc/] [config/] [xtensa/] [ieee754-df.S] - Blame information for rev 318

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 282 jeremybenn
/* IEEE-754 double-precision functions for Xtensa
2
   Copyright (C) 2006, 2007, 2009 Free Software Foundation, Inc.
3
   Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
4
 
5
   This file is part of GCC.
6
 
7
   GCC is free software; you can redistribute it and/or modify it
8
   under the terms of the GNU General Public License as published by
9
   the Free Software Foundation; either version 3, or (at your option)
10
   any later version.
11
 
12
   GCC is distributed in the hope that it will be useful, but WITHOUT
13
   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14
   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
15
   License for more details.
16
 
17
   Under Section 7 of GPL version 3, you are granted additional
18
   permissions described in the GCC Runtime Library Exception, version
19
   3.1, as published by the Free Software Foundation.
20
 
21
   You should have received a copy of the GNU General Public License and
22
   a copy of the GCC Runtime Library Exception along with this program;
23
   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24
   .  */
25
 
26
#ifdef __XTENSA_EB__
27
#define xh a2
28
#define xl a3
29
#define yh a4
30
#define yl a5
31
#else
32
#define xh a3
33
#define xl a2
34
#define yh a5
35
#define yl a4
36
#endif
37
 
38
/*  Warning!  The branch displacements for some Xtensa branch instructions
39
    are quite small, and this code has been carefully laid out to keep
40
    branch targets in range.  If you change anything, be sure to check that
41
    the assembler is not relaxing anything to branch over a jump.  */
42
 
43
#ifdef L_negdf2
44
 
45
        .align  4
46
        .global __negdf2
47
        .type   __negdf2, @function
48
__negdf2:
49
        leaf_entry sp, 16
50
        movi    a4, 0x80000000
51
        xor     xh, xh, a4
52
        leaf_return
53
 
54
#endif /* L_negdf2 */
55
 
56
#ifdef L_addsubdf3
57
 
58
        /* Addition */
59
__adddf3_aux:
60
 
61
        /* Handle NaNs and Infinities.  (This code is placed before the
62
           start of the function just to keep it in range of the limited
63
           branch displacements.)  */
64
 
65
.Ladd_xnan_or_inf:
66
        /* If y is neither Infinity nor NaN, return x.  */
67
        bnall   yh, a6, 1f
68
        /* If x is a NaN, return it.  Otherwise, return y.  */
69
        slli    a7, xh, 12
70
        or      a7, a7, xl
71
        beqz    a7, .Ladd_ynan_or_inf
72
1:      leaf_return
73
 
74
.Ladd_ynan_or_inf:
75
        /* Return y.  */
76
        mov     xh, yh
77
        mov     xl, yl
78
        leaf_return
79
 
80
.Ladd_opposite_signs:
81
        /* Operand signs differ.  Do a subtraction.  */
82
        slli    a7, a6, 11
83
        xor     yh, yh, a7
84
        j       .Lsub_same_sign
85
 
86
        .align  4
87
        .global __adddf3
88
        .type   __adddf3, @function
89
__adddf3:
90
        leaf_entry sp, 16
91
        movi    a6, 0x7ff00000
92
 
93
        /* Check if the two operands have the same sign.  */
94
        xor     a7, xh, yh
95
        bltz    a7, .Ladd_opposite_signs
96
 
97
.Ladd_same_sign:
98
        /* Check if either exponent == 0x7ff (i.e., NaN or Infinity).  */
99
        ball    xh, a6, .Ladd_xnan_or_inf
100
        ball    yh, a6, .Ladd_ynan_or_inf
101
 
102
        /* Compare the exponents.  The smaller operand will be shifted
103
           right by the exponent difference and added to the larger
104
           one.  */
105
        extui   a7, xh, 20, 12
106
        extui   a8, yh, 20, 12
107
        bltu    a7, a8, .Ladd_shiftx
108
 
109
.Ladd_shifty:
110
        /* Check if the smaller (or equal) exponent is zero.  */
111
        bnone   yh, a6, .Ladd_yexpzero
112
 
113
        /* Replace yh sign/exponent with 0x001.  */
114
        or      yh, yh, a6
115
        slli    yh, yh, 11
116
        srli    yh, yh, 11
117
 
118
.Ladd_yexpdiff:
119
        /* Compute the exponent difference.  Optimize for difference < 32.  */
120
        sub     a10, a7, a8
121
        bgeui   a10, 32, .Ladd_bigshifty
122
 
123
        /* Shift yh/yl right by the exponent difference.  Any bits that are
124
           shifted out of yl are saved in a9 for rounding the result.  */
125
        ssr     a10
126
        movi    a9, 0
127
        src     a9, yl, a9
128
        src     yl, yh, yl
129
        srl     yh, yh
130
 
131
.Ladd_addy:
132
        /* Do the 64-bit addition.  */
133
        add     xl, xl, yl
134
        add     xh, xh, yh
135
        bgeu    xl, yl, 1f
136
        addi    xh, xh, 1
137
1:
138
        /* Check if the add overflowed into the exponent.  */
139
        extui   a10, xh, 20, 12
140
        beq     a10, a7, .Ladd_round
141
        mov     a8, a7
142
        j       .Ladd_carry
143
 
144
.Ladd_yexpzero:
145
        /* y is a subnormal value.  Replace its sign/exponent with zero,
146
           i.e., no implicit "1.0", and increment the apparent exponent
147
           because subnormals behave as if they had the minimum (nonzero)
148
           exponent.  Test for the case when both exponents are zero.  */
149
        slli    yh, yh, 12
150
        srli    yh, yh, 12
151
        bnone   xh, a6, .Ladd_bothexpzero
152
        addi    a8, a8, 1
153
        j       .Ladd_yexpdiff
154
 
155
.Ladd_bothexpzero:
156
        /* Both exponents are zero.  Handle this as a special case.  There
157
           is no need to shift or round, and the normal code for handling
158
           a carry into the exponent field will not work because it
159
           assumes there is an implicit "1.0" that needs to be added.  */
160
        add     xl, xl, yl
161
        add     xh, xh, yh
162
        bgeu    xl, yl, 1f
163
        addi    xh, xh, 1
164
1:      leaf_return
165
 
166
.Ladd_bigshifty:
167
        /* Exponent difference > 64 -- just return the bigger value.  */
168
        bgeui   a10, 64, 1b
169
 
170
        /* Shift yh/yl right by the exponent difference.  Any bits that are
171
           shifted out are saved in a9 for rounding the result.  */
172
        ssr     a10
173
        sll     a11, yl         /* lost bits shifted out of yl */
174
        src     a9, yh, yl
175
        srl     yl, yh
176
        movi    yh, 0
177
        beqz    a11, .Ladd_addy
178
        or      a9, a9, a10     /* any positive, nonzero value will work */
179
        j       .Ladd_addy
180
 
181
.Ladd_xexpzero:
182
        /* Same as "yexpzero" except skip handling the case when both
183
           exponents are zero.  */
184
        slli    xh, xh, 12
185
        srli    xh, xh, 12
186
        addi    a7, a7, 1
187
        j       .Ladd_xexpdiff
188
 
189
.Ladd_shiftx:
190
        /* Same thing as the "shifty" code, but with x and y swapped.  Also,
191
           because the exponent difference is always nonzero in this version,
192
           the shift sequence can use SLL and skip loading a constant zero.  */
193
        bnone   xh, a6, .Ladd_xexpzero
194
 
195
        or      xh, xh, a6
196
        slli    xh, xh, 11
197
        srli    xh, xh, 11
198
 
199
.Ladd_xexpdiff:
200
        sub     a10, a8, a7
201
        bgeui   a10, 32, .Ladd_bigshiftx
202
 
203
        ssr     a10
204
        sll     a9, xl
205
        src     xl, xh, xl
206
        srl     xh, xh
207
 
208
.Ladd_addx:
209
        add     xl, xl, yl
210
        add     xh, xh, yh
211
        bgeu    xl, yl, 1f
212
        addi    xh, xh, 1
213
1:
214
        /* Check if the add overflowed into the exponent.  */
215
        extui   a10, xh, 20, 12
216
        bne     a10, a8, .Ladd_carry
217
 
218
.Ladd_round:
219
        /* Round up if the leftover fraction is >= 1/2.  */
220
        bgez    a9, 1f
221
        addi    xl, xl, 1
222
        beqz    xl, .Ladd_roundcarry
223
 
224
        /* Check if the leftover fraction is exactly 1/2.  */
225
        slli    a9, a9, 1
226
        beqz    a9, .Ladd_exactlyhalf
227
1:      leaf_return
228
 
229
.Ladd_bigshiftx:
230
        /* Mostly the same thing as "bigshifty"....  */
231
        bgeui   a10, 64, .Ladd_returny
232
 
233
        ssr     a10
234
        sll     a11, xl
235
        src     a9, xh, xl
236
        srl     xl, xh
237
        movi    xh, 0
238
        beqz    a11, .Ladd_addx
239
        or      a9, a9, a10
240
        j       .Ladd_addx
241
 
242
.Ladd_returny:
243
        mov     xh, yh
244
        mov     xl, yl
245
        leaf_return
246
 
247
.Ladd_carry:
248
        /* The addition has overflowed into the exponent field, so the
249
           value needs to be renormalized.  The mantissa of the result
250
           can be recovered by subtracting the original exponent and
251
           adding 0x100000 (which is the explicit "1.0" for the
252
           mantissa of the non-shifted operand -- the "1.0" for the
253
           shifted operand was already added).  The mantissa can then
254
           be shifted right by one bit.  The explicit "1.0" of the
255
           shifted mantissa then needs to be replaced by the exponent,
256
           incremented by one to account for the normalizing shift.
257
           It is faster to combine these operations: do the shift first
258
           and combine the additions and subtractions.  If x is the
259
           original exponent, the result is:
260
               shifted mantissa - (x << 19) + (1 << 19) + (x << 20)
261
           or:
262
               shifted mantissa + ((x + 1) << 19)
263
           Note that the exponent is incremented here by leaving the
264
           explicit "1.0" of the mantissa in the exponent field.  */
265
 
266
        /* Shift xh/xl right by one bit.  Save the lsb of xl.  */
267
        mov     a10, xl
268
        ssai    1
269
        src     xl, xh, xl
270
        srl     xh, xh
271
 
272
        /* See explanation above.  The original exponent is in a8.  */
273
        addi    a8, a8, 1
274
        slli    a8, a8, 19
275
        add     xh, xh, a8
276
 
277
        /* Return an Infinity if the exponent overflowed.  */
278
        ball    xh, a6, .Ladd_infinity
279
 
280
        /* Same thing as the "round" code except the msb of the leftover
281
           fraction is bit 0 of a10, with the rest of the fraction in a9.  */
282
        bbci.l  a10, 0, 1f
283
        addi    xl, xl, 1
284
        beqz    xl, .Ladd_roundcarry
285
        beqz    a9, .Ladd_exactlyhalf
286
1:      leaf_return
287
 
288
.Ladd_infinity:
289
        /* Clear the mantissa.  */
290
        movi    xl, 0
291
        srli    xh, xh, 20
292
        slli    xh, xh, 20
293
 
294
        /* The sign bit may have been lost in a carry-out.  Put it back.  */
295
        slli    a8, a8, 1
296
        or      xh, xh, a8
297
        leaf_return
298
 
299
.Ladd_exactlyhalf:
300
        /* Round down to the nearest even value.  */
301
        srli    xl, xl, 1
302
        slli    xl, xl, 1
303
        leaf_return
304
 
305
.Ladd_roundcarry:
306
        /* xl is always zero when the rounding increment overflows, so
307
           there's no need to round it to an even value.  */
308
        addi    xh, xh, 1
309
        /* Overflow to the exponent is OK.  */
310
        leaf_return
311
 
312
 
313
        /* Subtraction */
314
__subdf3_aux:
315
 
316
        /* Handle NaNs and Infinities.  (This code is placed before the
317
           start of the function just to keep it in range of the limited
318
           branch displacements.)  */
319
 
320
.Lsub_xnan_or_inf:
321
        /* If y is neither Infinity nor NaN, return x.  */
322
        bnall   yh, a6, 1f
323
        /* Both x and y are either NaN or Inf, so the result is NaN.  */
324
        movi    a4, 0x80000     /* make it a quiet NaN */
325
        or      xh, xh, a4
326
1:      leaf_return
327
 
328
.Lsub_ynan_or_inf:
329
        /* Negate y and return it.  */
330
        slli    a7, a6, 11
331
        xor     xh, yh, a7
332
        mov     xl, yl
333
        leaf_return
334
 
335
.Lsub_opposite_signs:
336
        /* Operand signs differ.  Do an addition.  */
337
        slli    a7, a6, 11
338
        xor     yh, yh, a7
339
        j       .Ladd_same_sign
340
 
341
        .align  4
342
        .global __subdf3
343
        .type   __subdf3, @function
344
__subdf3:
345
        leaf_entry sp, 16
346
        movi    a6, 0x7ff00000
347
 
348
        /* Check if the two operands have the same sign.  */
349
        xor     a7, xh, yh
350
        bltz    a7, .Lsub_opposite_signs
351
 
352
.Lsub_same_sign:
353
        /* Check if either exponent == 0x7ff (i.e., NaN or Infinity).  */
354
        ball    xh, a6, .Lsub_xnan_or_inf
355
        ball    yh, a6, .Lsub_ynan_or_inf
356
 
357
        /* Compare the operands.  In contrast to addition, the entire
358
           value matters here.  */
359
        extui   a7, xh, 20, 11
360
        extui   a8, yh, 20, 11
361
        bltu    xh, yh, .Lsub_xsmaller
362
        beq     xh, yh, .Lsub_compare_low
363
 
364
.Lsub_ysmaller:
365
        /* Check if the smaller (or equal) exponent is zero.  */
366
        bnone   yh, a6, .Lsub_yexpzero
367
 
368
        /* Replace yh sign/exponent with 0x001.  */
369
        or      yh, yh, a6
370
        slli    yh, yh, 11
371
        srli    yh, yh, 11
372
 
373
.Lsub_yexpdiff:
374
        /* Compute the exponent difference.  Optimize for difference < 32.  */
375
        sub     a10, a7, a8
376
        bgeui   a10, 32, .Lsub_bigshifty
377
 
378
        /* Shift yh/yl right by the exponent difference.  Any bits that are
379
           shifted out of yl are saved in a9 for rounding the result.  */
380
        ssr     a10
381
        movi    a9, 0
382
        src     a9, yl, a9
383
        src     yl, yh, yl
384
        srl     yh, yh
385
 
386
.Lsub_suby:
387
        /* Do the 64-bit subtraction.  */
388
        sub     xh, xh, yh
389
        bgeu    xl, yl, 1f
390
        addi    xh, xh, -1
391
1:      sub     xl, xl, yl
392
 
393
        /* Subtract the leftover bits in a9 from zero and propagate any
394
           borrow from xh/xl.  */
395
        neg     a9, a9
396
        beqz    a9, 1f
397
        addi    a5, xh, -1
398
        moveqz  xh, a5, xl
399
        addi    xl, xl, -1
400
1:
401
        /* Check if the subtract underflowed into the exponent.  */
402
        extui   a10, xh, 20, 11
403
        beq     a10, a7, .Lsub_round
404
        j       .Lsub_borrow
405
 
406
.Lsub_compare_low:
407
        /* The high words are equal.  Compare the low words.  */
408
        bltu    xl, yl, .Lsub_xsmaller
409
        bltu    yl, xl, .Lsub_ysmaller
410
        /* The operands are equal.  Return 0.0.  */
411
        movi    xh, 0
412
        movi    xl, 0
413
1:      leaf_return
414
 
415
.Lsub_yexpzero:
416
        /* y is a subnormal value.  Replace its sign/exponent with zero,
417
           i.e., no implicit "1.0".  Unless x is also a subnormal, increment
418
           y's apparent exponent because subnormals behave as if they had
419
           the minimum (nonzero) exponent.  */
420
        slli    yh, yh, 12
421
        srli    yh, yh, 12
422
        bnone   xh, a6, .Lsub_yexpdiff
423
        addi    a8, a8, 1
424
        j       .Lsub_yexpdiff
425
 
426
.Lsub_bigshifty:
427
        /* Exponent difference > 64 -- just return the bigger value.  */
428
        bgeui   a10, 64, 1b
429
 
430
        /* Shift yh/yl right by the exponent difference.  Any bits that are
431
           shifted out are saved in a9 for rounding the result.  */
432
        ssr     a10
433
        sll     a11, yl         /* lost bits shifted out of yl */
434
        src     a9, yh, yl
435
        srl     yl, yh
436
        movi    yh, 0
437
        beqz    a11, .Lsub_suby
438
        or      a9, a9, a10     /* any positive, nonzero value will work */
439
        j       .Lsub_suby
440
 
441
.Lsub_xsmaller:
442
        /* Same thing as the "ysmaller" code, but with x and y swapped and
443
           with y negated.  */
444
        bnone   xh, a6, .Lsub_xexpzero
445
 
446
        or      xh, xh, a6
447
        slli    xh, xh, 11
448
        srli    xh, xh, 11
449
 
450
.Lsub_xexpdiff:
451
        sub     a10, a8, a7
452
        bgeui   a10, 32, .Lsub_bigshiftx
453
 
454
        ssr     a10
455
        movi    a9, 0
456
        src     a9, xl, a9
457
        src     xl, xh, xl
458
        srl     xh, xh
459
 
460
        /* Negate y.  */
461
        slli    a11, a6, 11
462
        xor     yh, yh, a11
463
 
464
.Lsub_subx:
465
        sub     xl, yl, xl
466
        sub     xh, yh, xh
467
        bgeu    yl, xl, 1f
468
        addi    xh, xh, -1
469
1:
470
        /* Subtract the leftover bits in a9 from zero and propagate any
471
           borrow from xh/xl.  */
472
        neg     a9, a9
473
        beqz    a9, 1f
474
        addi    a5, xh, -1
475
        moveqz  xh, a5, xl
476
        addi    xl, xl, -1
477
1:
478
        /* Check if the subtract underflowed into the exponent.  */
479
        extui   a10, xh, 20, 11
480
        bne     a10, a8, .Lsub_borrow
481
 
482
.Lsub_round:
483
        /* Round up if the leftover fraction is >= 1/2.  */
484
        bgez    a9, 1f
485
        addi    xl, xl, 1
486
        beqz    xl, .Lsub_roundcarry
487
 
488
        /* Check if the leftover fraction is exactly 1/2.  */
489
        slli    a9, a9, 1
490
        beqz    a9, .Lsub_exactlyhalf
491
1:      leaf_return
492
 
493
.Lsub_xexpzero:
494
        /* Same as "yexpzero".  */
495
        slli    xh, xh, 12
496
        srli    xh, xh, 12
497
        bnone   yh, a6, .Lsub_xexpdiff
498
        addi    a7, a7, 1
499
        j       .Lsub_xexpdiff
500
 
501
.Lsub_bigshiftx:
502
        /* Mostly the same thing as "bigshifty", but with the sign bit of the
503
           shifted value set so that the subsequent subtraction flips the
504
           sign of y.  */
505
        bgeui   a10, 64, .Lsub_returny
506
 
507
        ssr     a10
508
        sll     a11, xl
509
        src     a9, xh, xl
510
        srl     xl, xh
511
        slli    xh, a6, 11      /* set sign bit of xh */
512
        beqz    a11, .Lsub_subx
513
        or      a9, a9, a10
514
        j       .Lsub_subx
515
 
516
.Lsub_returny:
517
        /* Negate and return y.  */
518
        slli    a7, a6, 11
519
        xor     xh, yh, a7
520
        mov     xl, yl
521
        leaf_return
522
 
523
.Lsub_borrow:
524
        /* The subtraction has underflowed into the exponent field, so the
525
           value needs to be renormalized.  Shift the mantissa left as
526
           needed to remove any leading zeros and adjust the exponent
527
           accordingly.  If the exponent is not large enough to remove
528
           all the leading zeros, the result will be a subnormal value.  */
529
 
530
        slli    a8, xh, 12
531
        beqz    a8, .Lsub_xhzero
532
        do_nsau a6, a8, a7, a11
533
        srli    a8, a8, 12
534
        bge     a6, a10, .Lsub_subnormal
535
        addi    a6, a6, 1
536
 
537
.Lsub_shift_lt32:
538
        /* Shift the mantissa (a8/xl/a9) left by a6.  */
539
        ssl     a6
540
        src     a8, a8, xl
541
        src     xl, xl, a9
542
        sll     a9, a9
543
 
544
        /* Combine the shifted mantissa with the sign and exponent,
545
           decrementing the exponent by a6.  (The exponent has already
546
           been decremented by one due to the borrow from the subtraction,
547
           but adding the mantissa will increment the exponent by one.)  */
548
        srli    xh, xh, 20
549
        sub     xh, xh, a6
550
        slli    xh, xh, 20
551
        add     xh, xh, a8
552
        j       .Lsub_round
553
 
554
.Lsub_exactlyhalf:
555
        /* Round down to the nearest even value.  */
556
        srli    xl, xl, 1
557
        slli    xl, xl, 1
558
        leaf_return
559
 
560
.Lsub_roundcarry:
561
        /* xl is always zero when the rounding increment overflows, so
562
           there's no need to round it to an even value.  */
563
        addi    xh, xh, 1
564
        /* Overflow to the exponent is OK.  */
565
        leaf_return
566
 
567
.Lsub_xhzero:
568
        /* When normalizing the result, all the mantissa bits in the high
569
           word are zero.  Shift by "20 + (leading zero count of xl) + 1".  */
570
        do_nsau a6, xl, a7, a11
571
        addi    a6, a6, 21
572
        blt     a10, a6, .Lsub_subnormal
573
 
574
.Lsub_normalize_shift:
575
        bltui   a6, 32, .Lsub_shift_lt32
576
 
577
        ssl     a6
578
        src     a8, xl, a9
579
        sll     xl, a9
580
        movi    a9, 0
581
 
582
        srli    xh, xh, 20
583
        sub     xh, xh, a6
584
        slli    xh, xh, 20
585
        add     xh, xh, a8
586
        j       .Lsub_round
587
 
588
.Lsub_subnormal:
589
        /* The exponent is too small to shift away all the leading zeros.
590
           Set a6 to the current exponent (which has already been
591
           decremented by the borrow) so that the exponent of the result
592
           will be zero.  Do not add 1 to a6 in this case, because: (1)
593
           adding the mantissa will not increment the exponent, so there is
594
           no need to subtract anything extra from the exponent to
595
           compensate, and (2) the effective exponent of a subnormal is 1
596
           not 0 so the shift amount must be 1 smaller than normal. */
597
        mov     a6, a10
598
        j       .Lsub_normalize_shift
599
 
600
#endif /* L_addsubdf3 */
601
 
602
#ifdef L_muldf3
603
 
604
        /* Multiplication */
605
#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
606
#define XCHAL_NO_MUL 1
607
#endif
608
 
609
__muldf3_aux:
610
 
611
        /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
612
           (This code is placed before the start of the function just to
613
           keep it in range of the limited branch displacements.)  */
614
 
615
.Lmul_xexpzero:
616
        /* Clear the sign bit of x.  */
617
        slli    xh, xh, 1
618
        srli    xh, xh, 1
619
 
620
        /* If x is zero, return zero.  */
621
        or      a10, xh, xl
622
        beqz    a10, .Lmul_return_zero
623
 
624
        /* Normalize x.  Adjust the exponent in a8.  */
625
        beqz    xh, .Lmul_xh_zero
626
        do_nsau a10, xh, a11, a12
627
        addi    a10, a10, -11
628
        ssl     a10
629
        src     xh, xh, xl
630
        sll     xl, xl
631
        movi    a8, 1
632
        sub     a8, a8, a10
633
        j       .Lmul_xnormalized
634
.Lmul_xh_zero:
635
        do_nsau a10, xl, a11, a12
636
        addi    a10, a10, -11
637
        movi    a8, -31
638
        sub     a8, a8, a10
639
        ssl     a10
640
        bltz    a10, .Lmul_xl_srl
641
        sll     xh, xl
642
        movi    xl, 0
643
        j       .Lmul_xnormalized
644
.Lmul_xl_srl:
645
        srl     xh, xl
646
        sll     xl, xl
647
        j       .Lmul_xnormalized
648
 
649
.Lmul_yexpzero:
650
        /* Clear the sign bit of y.  */
651
        slli    yh, yh, 1
652
        srli    yh, yh, 1
653
 
654
        /* If y is zero, return zero.  */
655
        or      a10, yh, yl
656
        beqz    a10, .Lmul_return_zero
657
 
658
        /* Normalize y.  Adjust the exponent in a9.  */
659
        beqz    yh, .Lmul_yh_zero
660
        do_nsau a10, yh, a11, a12
661
        addi    a10, a10, -11
662
        ssl     a10
663
        src     yh, yh, yl
664
        sll     yl, yl
665
        movi    a9, 1
666
        sub     a9, a9, a10
667
        j       .Lmul_ynormalized
668
.Lmul_yh_zero:
669
        do_nsau a10, yl, a11, a12
670
        addi    a10, a10, -11
671
        movi    a9, -31
672
        sub     a9, a9, a10
673
        ssl     a10
674
        bltz    a10, .Lmul_yl_srl
675
        sll     yh, yl
676
        movi    yl, 0
677
        j       .Lmul_ynormalized
678
.Lmul_yl_srl:
679
        srl     yh, yl
680
        sll     yl, yl
681
        j       .Lmul_ynormalized
682
 
683
.Lmul_return_zero:
684
        /* Return zero with the appropriate sign bit.  */
685
        srli    xh, a7, 31
686
        slli    xh, xh, 31
687
        movi    xl, 0
688
        j       .Lmul_done
689
 
690
.Lmul_xnan_or_inf:
691
        /* If y is zero, return NaN.  */
692
        bnez    yl, 1f
693
        slli    a8, yh, 1
694
        bnez    a8, 1f
695
        movi    a4, 0x80000     /* make it a quiet NaN */
696
        or      xh, xh, a4
697
        j       .Lmul_done
698
1:
699
        /* If y is NaN, return y.  */
700
        bnall   yh, a6, .Lmul_returnx
701
        slli    a8, yh, 12
702
        or      a8, a8, yl
703
        beqz    a8, .Lmul_returnx
704
 
705
.Lmul_returny:
706
        mov     xh, yh
707
        mov     xl, yl
708
 
709
.Lmul_returnx:
710
        /* Set the sign bit and return.  */
711
        extui   a7, a7, 31, 1
712
        slli    xh, xh, 1
713
        ssai    1
714
        src     xh, a7, xh
715
        j       .Lmul_done
716
 
717
.Lmul_ynan_or_inf:
718
        /* If x is zero, return NaN.  */
719
        bnez    xl, .Lmul_returny
720
        slli    a8, xh, 1
721
        bnez    a8, .Lmul_returny
722
        movi    a7, 0x80000     /* make it a quiet NaN */
723
        or      xh, yh, a7
724
        j       .Lmul_done
725
 
726
        .align  4
727
        .global __muldf3
728
        .type   __muldf3, @function
729
__muldf3:
730
#if __XTENSA_CALL0_ABI__
731
        leaf_entry sp, 32
732
        addi    sp, sp, -32
733
        s32i    a12, sp, 16
734
        s32i    a13, sp, 20
735
        s32i    a14, sp, 24
736
        s32i    a15, sp, 28
737
#elif XCHAL_NO_MUL
738
        /* This is not really a leaf function; allocate enough stack space
739
           to allow CALL12s to a helper function.  */
740
        leaf_entry sp, 64
741
#else
742
        leaf_entry sp, 32
743
#endif
744
        movi    a6, 0x7ff00000
745
 
746
        /* Get the sign of the result.  */
747
        xor     a7, xh, yh
748
 
749
        /* Check for NaN and infinity.  */
750
        ball    xh, a6, .Lmul_xnan_or_inf
751
        ball    yh, a6, .Lmul_ynan_or_inf
752
 
753
        /* Extract the exponents.  */
754
        extui   a8, xh, 20, 11
755
        extui   a9, yh, 20, 11
756
 
757
        beqz    a8, .Lmul_xexpzero
758
.Lmul_xnormalized:
759
        beqz    a9, .Lmul_yexpzero
760
.Lmul_ynormalized:
761
 
762
        /* Add the exponents.  */
763
        add     a8, a8, a9
764
 
765
        /* Replace sign/exponent fields with explicit "1.0".  */
766
        movi    a10, 0x1fffff
767
        or      xh, xh, a6
768
        and     xh, xh, a10
769
        or      yh, yh, a6
770
        and     yh, yh, a10
771
 
772
        /* Multiply 64x64 to 128 bits.  The result ends up in xh/xl/a6.
773
           The least-significant word of the result is thrown away except
774
           that if it is nonzero, the lsb of a6 is set to 1.  */
775
#if XCHAL_HAVE_MUL32_HIGH
776
 
777
        /* Compute a6 with any carry-outs in a10.  */
778
        movi    a10, 0
779
        mull    a6, xl, yh
780
        mull    a11, xh, yl
781
        add     a6, a6, a11
782
        bgeu    a6, a11, 1f
783
        addi    a10, a10, 1
784
1:
785
        muluh   a11, xl, yl
786
        add     a6, a6, a11
787
        bgeu    a6, a11, 1f
788
        addi    a10, a10, 1
789
1:
790
        /* If the low word of the result is nonzero, set the lsb of a6.  */
791
        mull    a11, xl, yl
792
        beqz    a11, 1f
793
        movi    a9, 1
794
        or      a6, a6, a9
795
1:
796
        /* Compute xl with any carry-outs in a9.  */
797
        movi    a9, 0
798
        mull    a11, xh, yh
799
        add     a10, a10, a11
800
        bgeu    a10, a11, 1f
801
        addi    a9, a9, 1
802
1:
803
        muluh   a11, xh, yl
804
        add     a10, a10, a11
805
        bgeu    a10, a11, 1f
806
        addi    a9, a9, 1
807
1:
808
        muluh   xl, xl, yh
809
        add     xl, xl, a10
810
        bgeu    xl, a10, 1f
811
        addi    a9, a9, 1
812
1:
813
        /* Compute xh.  */
814
        muluh   xh, xh, yh
815
        add     xh, xh, a9
816
 
817
#else /* ! XCHAL_HAVE_MUL32_HIGH */
818
 
819
        /* Break the inputs into 16-bit chunks and compute 16 32-bit partial
820
           products.  These partial products are:
821
 
822
 
823
 
824
                1 xll * ylh
825
                2 xlh * yll
826
 
827
                3 xll * yhl
828
                4 xlh * ylh
829
                5 xhl * yll
830
 
831
                6 xll * yhh
832
                7 xlh * yhl
833
                8 xhl * ylh
834
                9 xhh * yll
835
 
836
                10 xlh * yhh
837
                11 xhl * yhl
838
                12 xhh * ylh
839
 
840
                13 xhl * yhh
841
                14 xhh * yhl
842
 
843
                15 xhh * yhh
844
 
845
           where the input chunks are (hh, hl, lh, ll).  If using the Mul16
846
           or Mul32 multiplier options, these input chunks must be stored in
847
           separate registers.  For Mac16, the UMUL.AA.* opcodes can specify
848
           that the inputs come from either half of the registers, so there
849
           is no need to shift them out ahead of time.  If there is no
850
           multiply hardware, the 16-bit chunks can be extracted when setting
851
           up the arguments to the separate multiply function.  */
852
 
853
        /* Save a7 since it is needed to hold a temporary value.  */
854
        s32i    a7, sp, 4
855
#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
856
        /* Calling a separate multiply function will clobber a0 and requires
857
           use of a8 as a temporary, so save those values now.  (The function
858
           uses a custom ABI so nothing else needs to be saved.)  */
859
        s32i    a0, sp, 0
860
        s32i    a8, sp, 8
861
#endif
862
 
863
#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
864
 
865
#define xlh a12
866
#define ylh a13
867
#define xhh a14
868
#define yhh a15
869
 
870
        /* Get the high halves of the inputs into registers.  */
871
        srli    xlh, xl, 16
872
        srli    ylh, yl, 16
873
        srli    xhh, xh, 16
874
        srli    yhh, yh, 16
875
 
876
#define xll xl
877
#define yll yl
878
#define xhl xh
879
#define yhl yh
880
 
881
#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
882
        /* Clear the high halves of the inputs.  This does not matter
883
           for MUL16 because the high bits are ignored.  */
884
        extui   xl, xl, 0, 16
885
        extui   xh, xh, 0, 16
886
        extui   yl, yl, 0, 16
887
        extui   yh, yh, 0, 16
888
#endif
889
#endif /* MUL16 || MUL32 */
890
 
891
 
892
#if XCHAL_HAVE_MUL16
893
 
894
#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
895
        mul16u  dst, xreg ## xhalf, yreg ## yhalf
896
 
897
#elif XCHAL_HAVE_MUL32
898
 
899
#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
900
        mull    dst, xreg ## xhalf, yreg ## yhalf
901
 
902
#elif XCHAL_HAVE_MAC16
903
 
904
/* The preprocessor insists on inserting a space when concatenating after
905
   a period in the definition of do_mul below.  These macros are a workaround
906
   using underscores instead of periods when doing the concatenation.  */
907
#define umul_aa_ll umul.aa.ll
908
#define umul_aa_lh umul.aa.lh
909
#define umul_aa_hl umul.aa.hl
910
#define umul_aa_hh umul.aa.hh
911
 
912
#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
913
        umul_aa_ ## xhalf ## yhalf      xreg, yreg; \
914
        rsr     dst, ACCLO
915
 
916
#else /* no multiply hardware */
917
 
918
#define set_arg_l(dst, src) \
919
        extui   dst, src, 0, 16
920
#define set_arg_h(dst, src) \
921
        srli    dst, src, 16
922
 
923
#if __XTENSA_CALL0_ABI__
924
#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
925
        set_arg_ ## xhalf (a13, xreg); \
926
        set_arg_ ## yhalf (a14, yreg); \
927
        call0   .Lmul_mulsi3; \
928
        mov     dst, a12
929
#else
930
#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
931
        set_arg_ ## xhalf (a14, xreg); \
932
        set_arg_ ## yhalf (a15, yreg); \
933
        call12  .Lmul_mulsi3; \
934
        mov     dst, a14
935
#endif /* __XTENSA_CALL0_ABI__ */
936
 
937
#endif /* no multiply hardware */
938
 
939
        /* Add pp1 and pp2 into a10 with carry-out in a9.  */
940
        do_mul(a10, xl, l, yl, h)       /* pp 1 */
941
        do_mul(a11, xl, h, yl, l)       /* pp 2 */
942
        movi    a9, 0
943
        add     a10, a10, a11
944
        bgeu    a10, a11, 1f
945
        addi    a9, a9, 1
946
1:
947
        /* Initialize a6 with a9/a10 shifted into position.  Note that
948
           this value can be safely incremented without any carry-outs.  */
949
        ssai    16
950
        src     a6, a9, a10
951
 
952
        /* Compute the low word into a10.  */
953
        do_mul(a11, xl, l, yl, l)       /* pp 0 */
954
        sll     a10, a10
955
        add     a10, a10, a11
956
        bgeu    a10, a11, 1f
957
        addi    a6, a6, 1
958
1:
959
        /* Compute the contributions of pp0-5 to a6, with carry-outs in a9.
960
           This is good enough to determine the low half of a6, so that any
961
           nonzero bits from the low word of the result can be collapsed
962
           into a6, freeing up a register.  */
963
        movi    a9, 0
964
        do_mul(a11, xl, l, yh, l)       /* pp 3 */
965
        add     a6, a6, a11
966
        bgeu    a6, a11, 1f
967
        addi    a9, a9, 1
968
1:
969
        do_mul(a11, xl, h, yl, h)       /* pp 4 */
970
        add     a6, a6, a11
971
        bgeu    a6, a11, 1f
972
        addi    a9, a9, 1
973
1:
974
        do_mul(a11, xh, l, yl, l)       /* pp 5 */
975
        add     a6, a6, a11
976
        bgeu    a6, a11, 1f
977
        addi    a9, a9, 1
978
1:
979
        /* Collapse any nonzero bits from the low word into a6.  */
980
        beqz    a10, 1f
981
        movi    a11, 1
982
        or      a6, a6, a11
983
1:
984
        /* Add pp6-9 into a11 with carry-outs in a10.  */
985
        do_mul(a7, xl, l, yh, h)        /* pp 6 */
986
        do_mul(a11, xh, h, yl, l)       /* pp 9 */
987
        movi    a10, 0
988
        add     a11, a11, a7
989
        bgeu    a11, a7, 1f
990
        addi    a10, a10, 1
991
1:
992
        do_mul(a7, xl, h, yh, l)        /* pp 7 */
993
        add     a11, a11, a7
994
        bgeu    a11, a7, 1f
995
        addi    a10, a10, 1
996
1:
997
        do_mul(a7, xh, l, yl, h)        /* pp 8 */
998
        add     a11, a11, a7
999
        bgeu    a11, a7, 1f
1000
        addi    a10, a10, 1
1001
1:
1002
        /* Shift a10/a11 into position, and add low half of a11 to a6.  */
1003
        src     a10, a10, a11
1004
        add     a10, a10, a9
1005
        sll     a11, a11
1006
        add     a6, a6, a11
1007
        bgeu    a6, a11, 1f
1008
        addi    a10, a10, 1
1009
1:
1010
        /* Add pp10-12 into xl with carry-outs in a9.  */
1011
        movi    a9, 0
1012
        do_mul(xl, xl, h, yh, h)        /* pp 10 */
1013
        add     xl, xl, a10
1014
        bgeu    xl, a10, 1f
1015
        addi    a9, a9, 1
1016
1:
1017
        do_mul(a10, xh, l, yh, l)       /* pp 11 */
1018
        add     xl, xl, a10
1019
        bgeu    xl, a10, 1f
1020
        addi    a9, a9, 1
1021
1:
1022
        do_mul(a10, xh, h, yl, h)       /* pp 12 */
1023
        add     xl, xl, a10
1024
        bgeu    xl, a10, 1f
1025
        addi    a9, a9, 1
1026
1:
1027
        /* Add pp13-14 into a11 with carry-outs in a10.  */
1028
        do_mul(a11, xh, l, yh, h)       /* pp 13 */
1029
        do_mul(a7, xh, h, yh, l)        /* pp 14 */
1030
        movi    a10, 0
1031
        add     a11, a11, a7
1032
        bgeu    a11, a7, 1f
1033
        addi    a10, a10, 1
1034
1:
1035
        /* Shift a10/a11 into position, and add low half of a11 to a6.  */
1036
        src     a10, a10, a11
1037
        add     a10, a10, a9
1038
        sll     a11, a11
1039
        add     xl, xl, a11
1040
        bgeu    xl, a11, 1f
1041
        addi    a10, a10, 1
1042
1:
1043
        /* Compute xh.  */
1044
        do_mul(xh, xh, h, yh, h)        /* pp 15 */
1045
        add     xh, xh, a10
1046
 
1047
        /* Restore values saved on the stack during the multiplication.  */
1048
        l32i    a7, sp, 4
1049
#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
1050
        l32i    a0, sp, 0
1051
        l32i    a8, sp, 8
1052
#endif
1053
#endif /* ! XCHAL_HAVE_MUL32_HIGH */
1054
 
1055
        /* Shift left by 12 bits, unless there was a carry-out from the
1056
           multiply, in which case, shift by 11 bits and increment the
1057
           exponent.  Note: It is convenient to use the constant 0x3ff
1058
           instead of 0x400 when removing the extra exponent bias (so that
1059
           it is easy to construct 0x7fe for the overflow check).  Reverse
1060
           the logic here to decrement the exponent sum by one unless there
1061
           was a carry-out.  */
1062
        movi    a4, 11
1063
        srli    a5, xh, 21 - 12
1064
        bnez    a5, 1f
1065
        addi    a4, a4, 1
1066
        addi    a8, a8, -1
1067
1:      ssl     a4
1068
        src     xh, xh, xl
1069
        src     xl, xl, a6
1070
        sll     a6, a6
1071
 
1072
        /* Subtract the extra bias from the exponent sum (plus one to account
1073
           for the explicit "1.0" of the mantissa that will be added to the
1074
           exponent in the final result).  */
1075
        movi    a4, 0x3ff
1076
        sub     a8, a8, a4
1077
 
1078
        /* Check for over/underflow.  The value in a8 is one less than the
1079
           final exponent, so values in the range 0..7fd are OK here.  */
1080
        slli    a4, a4, 1       /* 0x7fe */
1081
        bgeu    a8, a4, .Lmul_overflow
1082
 
1083
.Lmul_round:
1084
        /* Round.  */
1085
        bgez    a6, .Lmul_rounded
1086
        addi    xl, xl, 1
1087
        beqz    xl, .Lmul_roundcarry
1088
        slli    a6, a6, 1
1089
        beqz    a6, .Lmul_exactlyhalf
1090
 
1091
.Lmul_rounded:
1092
        /* Add the exponent to the mantissa.  */
1093
        slli    a8, a8, 20
1094
        add     xh, xh, a8
1095
 
1096
.Lmul_addsign:
1097
        /* Add the sign bit.  */
1098
        srli    a7, a7, 31
1099
        slli    a7, a7, 31
1100
        or      xh, xh, a7
1101
 
1102
.Lmul_done:
1103
#if __XTENSA_CALL0_ABI__
1104
        l32i    a12, sp, 16
1105
        l32i    a13, sp, 20
1106
        l32i    a14, sp, 24
1107
        l32i    a15, sp, 28
1108
        addi    sp, sp, 32
1109
#endif
1110
        leaf_return
1111
 
1112
.Lmul_exactlyhalf:
1113
        /* Round down to the nearest even value.  */
1114
        srli    xl, xl, 1
1115
        slli    xl, xl, 1
1116
        j       .Lmul_rounded
1117
 
1118
.Lmul_roundcarry:
1119
        /* xl is always zero when the rounding increment overflows, so
1120
           there's no need to round it to an even value.  */
1121
        addi    xh, xh, 1
1122
        /* Overflow is OK -- it will be added to the exponent.  */
1123
        j       .Lmul_rounded
1124
 
1125
.Lmul_overflow:
1126
        bltz    a8, .Lmul_underflow
1127
        /* Return +/- Infinity.  */
1128
        addi    a8, a4, 1       /* 0x7ff */
1129
        slli    xh, a8, 20
1130
        movi    xl, 0
1131
        j       .Lmul_addsign
1132
 
1133
.Lmul_underflow:
1134
        /* Create a subnormal value, where the exponent field contains zero,
1135
           but the effective exponent is 1.  The value of a8 is one less than
1136
           the actual exponent, so just negate it to get the shift amount.  */
1137
        neg     a8, a8
1138
        mov     a9, a6
1139
        ssr     a8
1140
        bgeui   a8, 32, .Lmul_bigshift
1141
 
1142
        /* Shift xh/xl right.  Any bits that are shifted out of xl are saved
1143
           in a6 (combined with the shifted-out bits currently in a6) for
1144
           rounding the result.  */
1145
        sll     a6, xl
1146
        src     xl, xh, xl
1147
        srl     xh, xh
1148
        j       1f
1149
 
1150
.Lmul_bigshift:
1151
        bgeui   a8, 64, .Lmul_flush_to_zero
1152
        sll     a10, xl         /* lost bits shifted out of xl */
1153
        src     a6, xh, xl
1154
        srl     xl, xh
1155
        movi    xh, 0
1156
        or      a9, a9, a10
1157
 
1158
        /* Set the exponent to zero.  */
1159
1:      movi    a8, 0
1160
 
1161
        /* Pack any nonzero bits shifted out into a6.  */
1162
        beqz    a9, .Lmul_round
1163
        movi    a9, 1
1164
        or      a6, a6, a9
1165
        j       .Lmul_round
1166
 
1167
.Lmul_flush_to_zero:
1168
        /* Return zero with the appropriate sign bit.  */
1169
        srli    xh, a7, 31
1170
        slli    xh, xh, 31
1171
        movi    xl, 0
1172
        j       .Lmul_done
1173
 
1174
#if XCHAL_NO_MUL
1175
 
1176
        /* For Xtensa processors with no multiply hardware, this simplified
1177
           version of _mulsi3 is used for multiplying 16-bit chunks of
1178
           the floating-point mantissas.  When using CALL0, this function
1179
           uses a custom ABI: the inputs are passed in a13 and a14, the
1180
           result is returned in a12, and a8 and a15 are clobbered.  */
1181
        .align  4
1182
.Lmul_mulsi3:
1183
        leaf_entry sp, 16
1184
        .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
1185
        movi    \dst, 0
1186
1:      add     \tmp1, \src2, \dst
1187
        extui   \tmp2, \src1, 0, 1
1188
        movnez  \dst, \tmp1, \tmp2
1189
 
1190
        do_addx2 \tmp1, \src2, \dst, \tmp1
1191
        extui   \tmp2, \src1, 1, 1
1192
        movnez  \dst, \tmp1, \tmp2
1193
 
1194
        do_addx4 \tmp1, \src2, \dst, \tmp1
1195
        extui   \tmp2, \src1, 2, 1
1196
        movnez  \dst, \tmp1, \tmp2
1197
 
1198
        do_addx8 \tmp1, \src2, \dst, \tmp1
1199
        extui   \tmp2, \src1, 3, 1
1200
        movnez  \dst, \tmp1, \tmp2
1201
 
1202
        srli    \src1, \src1, 4
1203
        slli    \src2, \src2, 4
1204
        bnez    \src1, 1b
1205
        .endm
1206
#if __XTENSA_CALL0_ABI__
1207
        mul_mulsi3_body a12, a13, a14, a15, a8
1208
#else
1209
        /* The result will be written into a2, so save that argument in a4.  */
1210
        mov     a4, a2
1211
        mul_mulsi3_body a2, a4, a3, a5, a6
1212
#endif
1213
        leaf_return
1214
#endif /* XCHAL_NO_MUL */
1215
#endif /* L_muldf3 */
1216
 
1217
#ifdef L_divdf3
1218
 
1219
        /* Division */
1220
__divdf3_aux:
1221
 
1222
        /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
1223
           (This code is placed before the start of the function just to
1224
           keep it in range of the limited branch displacements.)  */
1225
 
1226
.Ldiv_yexpzero:
1227
        /* Clear the sign bit of y.  */
1228
        slli    yh, yh, 1
1229
        srli    yh, yh, 1
1230
 
1231
        /* Check for division by zero.  */
1232
        or      a10, yh, yl
1233
        beqz    a10, .Ldiv_yzero
1234
 
1235
        /* Normalize y.  Adjust the exponent in a9.  */
1236
        beqz    yh, .Ldiv_yh_zero
1237
        do_nsau a10, yh, a11, a9
1238
        addi    a10, a10, -11
1239
        ssl     a10
1240
        src     yh, yh, yl
1241
        sll     yl, yl
1242
        movi    a9, 1
1243
        sub     a9, a9, a10
1244
        j       .Ldiv_ynormalized
1245
.Ldiv_yh_zero:
1246
        do_nsau a10, yl, a11, a9
1247
        addi    a10, a10, -11
1248
        movi    a9, -31
1249
        sub     a9, a9, a10
1250
        ssl     a10
1251
        bltz    a10, .Ldiv_yl_srl
1252
        sll     yh, yl
1253
        movi    yl, 0
1254
        j       .Ldiv_ynormalized
1255
.Ldiv_yl_srl:
1256
        srl     yh, yl
1257
        sll     yl, yl
1258
        j       .Ldiv_ynormalized
1259
 
1260
.Ldiv_yzero:
1261
        /* y is zero.  Return NaN if x is also zero; otherwise, infinity.  */
1262
        slli    xh, xh, 1
1263
        srli    xh, xh, 1
1264
        or      xl, xl, xh
1265
        srli    xh, a7, 31
1266
        slli    xh, xh, 31
1267
        or      xh, xh, a6
1268
        bnez    xl, 1f
1269
        movi    a4, 0x80000     /* make it a quiet NaN */
1270
        or      xh, xh, a4
1271
1:      movi    xl, 0
1272
        leaf_return
1273
 
1274
.Ldiv_xexpzero:
1275
        /* Clear the sign bit of x.  */
1276
        slli    xh, xh, 1
1277
        srli    xh, xh, 1
1278
 
1279
        /* If x is zero, return zero.  */
1280
        or      a10, xh, xl
1281
        beqz    a10, .Ldiv_return_zero
1282
 
1283
        /* Normalize x.  Adjust the exponent in a8.  */
1284
        beqz    xh, .Ldiv_xh_zero
1285
        do_nsau a10, xh, a11, a8
1286
        addi    a10, a10, -11
1287
        ssl     a10
1288
        src     xh, xh, xl
1289
        sll     xl, xl
1290
        movi    a8, 1
1291
        sub     a8, a8, a10
1292
        j       .Ldiv_xnormalized
1293
.Ldiv_xh_zero:
1294
        do_nsau a10, xl, a11, a8
1295
        addi    a10, a10, -11
1296
        movi    a8, -31
1297
        sub     a8, a8, a10
1298
        ssl     a10
1299
        bltz    a10, .Ldiv_xl_srl
1300
        sll     xh, xl
1301
        movi    xl, 0
1302
        j       .Ldiv_xnormalized
1303
.Ldiv_xl_srl:
1304
        srl     xh, xl
1305
        sll     xl, xl
1306
        j       .Ldiv_xnormalized
1307
 
1308
.Ldiv_return_zero:
1309
        /* Return zero with the appropriate sign bit.  */
1310
        srli    xh, a7, 31
1311
        slli    xh, xh, 31
1312
        movi    xl, 0
1313
        leaf_return
1314
 
1315
.Ldiv_xnan_or_inf:
1316
        /* Set the sign bit of the result.  */
1317
        srli    a7, yh, 31
1318
        slli    a7, a7, 31
1319
        xor     xh, xh, a7
1320
        /* If y is NaN or Inf, return NaN.  */
1321
        bnall   yh, a6, 1f
1322
        movi    a4, 0x80000     /* make it a quiet NaN */
1323
        or      xh, xh, a4
1324
1:      leaf_return
1325
 
1326
.Ldiv_ynan_or_inf:
1327
        /* If y is Infinity, return zero.  */
1328
        slli    a8, yh, 12
1329
        or      a8, a8, yl
1330
        beqz    a8, .Ldiv_return_zero
1331
        /* y is NaN; return it.  */
1332
        mov     xh, yh
1333
        mov     xl, yl
1334
        leaf_return
1335
 
1336
.Ldiv_highequal1:
1337
        bltu    xl, yl, 2f
1338
        j       3f
1339
 
1340
        .align  4
1341
        .global __divdf3
1342
        .type   __divdf3, @function
1343
__divdf3:
1344
        leaf_entry sp, 16
1345
        movi    a6, 0x7ff00000
1346
 
1347
        /* Get the sign of the result.  */
1348
        xor     a7, xh, yh
1349
 
1350
        /* Check for NaN and infinity.  */
1351
        ball    xh, a6, .Ldiv_xnan_or_inf
1352
        ball    yh, a6, .Ldiv_ynan_or_inf
1353
 
1354
        /* Extract the exponents.  */
1355
        extui   a8, xh, 20, 11
1356
        extui   a9, yh, 20, 11
1357
 
1358
        beqz    a9, .Ldiv_yexpzero
1359
.Ldiv_ynormalized:
1360
        beqz    a8, .Ldiv_xexpzero
1361
.Ldiv_xnormalized:
1362
 
1363
        /* Subtract the exponents.  */
1364
        sub     a8, a8, a9
1365
 
1366
        /* Replace sign/exponent fields with explicit "1.0".  */
1367
        movi    a10, 0x1fffff
1368
        or      xh, xh, a6
1369
        and     xh, xh, a10
1370
        or      yh, yh, a6
1371
        and     yh, yh, a10
1372
 
1373
        /* Set SAR for left shift by one.  */
1374
        ssai    (32 - 1)
1375
 
1376
        /* The first digit of the mantissa division must be a one.
1377
           Shift x (and adjust the exponent) as needed to make this true.  */
1378
        bltu    yh, xh, 3f
1379
        beq     yh, xh, .Ldiv_highequal1
1380
2:      src     xh, xh, xl
1381
        sll     xl, xl
1382
        addi    a8, a8, -1
1383
3:
1384
        /* Do the first subtraction and shift.  */
1385
        sub     xh, xh, yh
1386
        bgeu    xl, yl, 1f
1387
        addi    xh, xh, -1
1388
1:      sub     xl, xl, yl
1389
        src     xh, xh, xl
1390
        sll     xl, xl
1391
 
1392
        /* Put the quotient into a10/a11.  */
1393
        movi    a10, 0
1394
        movi    a11, 1
1395
 
1396
        /* Divide one bit at a time for 52 bits.  */
1397
        movi    a9, 52
1398
#if XCHAL_HAVE_LOOPS
1399
        loop    a9, .Ldiv_loopend
1400
#endif
1401
.Ldiv_loop:
1402
        /* Shift the quotient << 1.  */
1403
        src     a10, a10, a11
1404
        sll     a11, a11
1405
 
1406
        /* Is this digit a 0 or 1?  */
1407
        bltu    xh, yh, 3f
1408
        beq     xh, yh, .Ldiv_highequal2
1409
 
1410
        /* Output a 1 and subtract.  */
1411
2:      addi    a11, a11, 1
1412
        sub     xh, xh, yh
1413
        bgeu    xl, yl, 1f
1414
        addi    xh, xh, -1
1415
1:      sub     xl, xl, yl
1416
 
1417
        /* Shift the dividend << 1.  */
1418
3:      src     xh, xh, xl
1419
        sll     xl, xl
1420
 
1421
#if !XCHAL_HAVE_LOOPS
1422
        addi    a9, a9, -1
1423
        bnez    a9, .Ldiv_loop
1424
#endif
1425
.Ldiv_loopend:
1426
 
1427
        /* Add the exponent bias (less one to account for the explicit "1.0"
1428
           of the mantissa that will be added to the exponent in the final
1429
           result).  */
1430
        movi    a9, 0x3fe
1431
        add     a8, a8, a9
1432
 
1433
        /* Check for over/underflow.  The value in a8 is one less than the
1434
           final exponent, so values in the range 0..7fd are OK here.  */
1435
        addmi   a9, a9, 0x400   /* 0x7fe */
1436
        bgeu    a8, a9, .Ldiv_overflow
1437
 
1438
.Ldiv_round:
1439
        /* Round.  The remainder (<< 1) is in xh/xl.  */
1440
        bltu    xh, yh, .Ldiv_rounded
1441
        beq     xh, yh, .Ldiv_highequal3
1442
.Ldiv_roundup:
1443
        addi    a11, a11, 1
1444
        beqz    a11, .Ldiv_roundcarry
1445
 
1446
.Ldiv_rounded:
1447
        mov     xl, a11
1448
        /* Add the exponent to the mantissa.  */
1449
        slli    a8, a8, 20
1450
        add     xh, a10, a8
1451
 
1452
.Ldiv_addsign:
1453
        /* Add the sign bit.  */
1454
        srli    a7, a7, 31
1455
        slli    a7, a7, 31
1456
        or      xh, xh, a7
1457
        leaf_return
1458
 
1459
.Ldiv_highequal2:
1460
        bgeu    xl, yl, 2b
1461
        j       3b
1462
 
1463
.Ldiv_highequal3:
1464
        bltu    xl, yl, .Ldiv_rounded
1465
        bne     xl, yl, .Ldiv_roundup
1466
 
1467
        /* Remainder is exactly half the divisor.  Round even.  */
1468
        addi    a11, a11, 1
1469
        beqz    a11, .Ldiv_roundcarry
1470
        srli    a11, a11, 1
1471
        slli    a11, a11, 1
1472
        j       .Ldiv_rounded
1473
 
1474
.Ldiv_overflow:
1475
        bltz    a8, .Ldiv_underflow
1476
        /* Return +/- Infinity.  */
1477
        addi    a8, a9, 1       /* 0x7ff */
1478
        slli    xh, a8, 20
1479
        movi    xl, 0
1480
        j       .Ldiv_addsign
1481
 
1482
.Ldiv_underflow:
1483
        /* Create a subnormal value, where the exponent field contains zero,
1484
           but the effective exponent is 1.  The value of a8 is one less than
1485
           the actual exponent, so just negate it to get the shift amount.  */
1486
        neg     a8, a8
1487
        ssr     a8
1488
        bgeui   a8, 32, .Ldiv_bigshift
1489
 
1490
        /* Shift a10/a11 right.  Any bits that are shifted out of a11 are
1491
           saved in a6 for rounding the result.  */
1492
        sll     a6, a11
1493
        src     a11, a10, a11
1494
        srl     a10, a10
1495
        j       1f
1496
 
1497
.Ldiv_bigshift:
1498
        bgeui   a8, 64, .Ldiv_flush_to_zero
1499
        sll     a9, a11         /* lost bits shifted out of a11 */
1500
        src     a6, a10, a11
1501
        srl     a11, a10
1502
        movi    a10, 0
1503
        or      xl, xl, a9
1504
 
1505
        /* Set the exponent to zero.  */
1506
1:      movi    a8, 0
1507
 
1508
        /* Pack any nonzero remainder (in xh/xl) into a6.  */
1509
        or      xh, xh, xl
1510
        beqz    xh, 1f
1511
        movi    a9, 1
1512
        or      a6, a6, a9
1513
 
1514
        /* Round a10/a11 based on the bits shifted out into a6.  */
1515
1:      bgez    a6, .Ldiv_rounded
1516
        addi    a11, a11, 1
1517
        beqz    a11, .Ldiv_roundcarry
1518
        slli    a6, a6, 1
1519
        bnez    a6, .Ldiv_rounded
1520
        srli    a11, a11, 1
1521
        slli    a11, a11, 1
1522
        j       .Ldiv_rounded
1523
 
1524
.Ldiv_roundcarry:
1525
        /* a11 is always zero when the rounding increment overflows, so
1526
           there's no need to round it to an even value.  */
1527
        addi    a10, a10, 1
1528
        /* Overflow to the exponent field is OK.  */
1529
        j       .Ldiv_rounded
1530
 
1531
.Ldiv_flush_to_zero:
1532
        /* Return zero with the appropriate sign bit.  */
1533
        srli    xh, a7, 31
1534
        slli    xh, xh, 31
1535
        movi    xl, 0
1536
        leaf_return
1537
 
1538
#endif /* L_divdf3 */
1539
 
1540
#ifdef L_cmpdf2
1541
 
1542
        /* Equal and Not Equal */
1543
 
1544
        .align  4
1545
        .global __eqdf2
1546
        .global __nedf2
1547
        .set    __nedf2, __eqdf2
1548
        .type   __eqdf2, @function
1549
__eqdf2:
1550
        leaf_entry sp, 16
1551
        bne     xl, yl, 2f
1552
        bne     xh, yh, 4f
1553
 
1554
        /* The values are equal but NaN != NaN.  Check the exponent.  */
1555
        movi    a6, 0x7ff00000
1556
        ball    xh, a6, 3f
1557
 
1558
        /* Equal.  */
1559
        movi    a2, 0
1560
        leaf_return
1561
 
1562
        /* Not equal.  */
1563
2:      movi    a2, 1
1564
        leaf_return
1565
 
1566
        /* Check if the mantissas are nonzero.  */
1567
3:      slli    a7, xh, 12
1568
        or      a7, a7, xl
1569
        j       5f
1570
 
1571
        /* Check if x and y are zero with different signs.  */
1572
4:      or      a7, xh, yh
1573
        slli    a7, a7, 1
1574
        or      a7, a7, xl      /* xl == yl here */
1575
 
1576
        /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
1577
           or x when exponent(x) = 0x7ff and x == y.  */
1578
5:      movi    a2, 0
1579
        movi    a3, 1
1580
        movnez  a2, a3, a7
1581
        leaf_return
1582
 
1583
 
1584
        /* Greater Than */
1585
 
1586
        .align  4
1587
        .global __gtdf2
1588
        .type   __gtdf2, @function
1589
__gtdf2:
1590
        leaf_entry sp, 16
1591
        movi    a6, 0x7ff00000
1592
        ball    xh, a6, 2f
1593
1:      bnall   yh, a6, .Lle_cmp
1594
 
1595
        /* Check if y is a NaN.  */
1596
        slli    a7, yh, 12
1597
        or      a7, a7, yl
1598
        beqz    a7, .Lle_cmp
1599
        movi    a2, 0
1600
        leaf_return
1601
 
1602
        /* Check if x is a NaN.  */
1603
2:      slli    a7, xh, 12
1604
        or      a7, a7, xl
1605
        beqz    a7, 1b
1606
        movi    a2, 0
1607
        leaf_return
1608
 
1609
 
1610
        /* Less Than or Equal */
1611
 
1612
        .align  4
1613
        .global __ledf2
1614
        .type   __ledf2, @function
1615
__ledf2:
1616
        leaf_entry sp, 16
1617
        movi    a6, 0x7ff00000
1618
        ball    xh, a6, 2f
1619
1:      bnall   yh, a6, .Lle_cmp
1620
 
1621
        /* Check if y is a NaN.  */
1622
        slli    a7, yh, 12
1623
        or      a7, a7, yl
1624
        beqz    a7, .Lle_cmp
1625
        movi    a2, 1
1626
        leaf_return
1627
 
1628
        /* Check if x is a NaN.  */
1629
2:      slli    a7, xh, 12
1630
        or      a7, a7, xl
1631
        beqz    a7, 1b
1632
        movi    a2, 1
1633
        leaf_return
1634
 
1635
.Lle_cmp:
1636
        /* Check if x and y have different signs.  */
1637
        xor     a7, xh, yh
1638
        bltz    a7, .Lle_diff_signs
1639
 
1640
        /* Check if x is negative.  */
1641
        bltz    xh, .Lle_xneg
1642
 
1643
        /* Check if x <= y.  */
1644
        bltu    xh, yh, 4f
1645
        bne     xh, yh, 5f
1646
        bltu    yl, xl, 5f
1647
4:      movi    a2, 0
1648
        leaf_return
1649
 
1650
.Lle_xneg:
1651
        /* Check if y <= x.  */
1652
        bltu    yh, xh, 4b
1653
        bne     yh, xh, 5f
1654
        bgeu    xl, yl, 4b
1655
5:      movi    a2, 1
1656
        leaf_return
1657
 
1658
.Lle_diff_signs:
1659
        bltz    xh, 4b
1660
 
1661
        /* Check if both x and y are zero.  */
1662
        or      a7, xh, yh
1663
        slli    a7, a7, 1
1664
        or      a7, a7, xl
1665
        or      a7, a7, yl
1666
        movi    a2, 1
1667
        movi    a3, 0
1668
        moveqz  a2, a3, a7
1669
        leaf_return
1670
 
1671
 
1672
        /* Greater Than or Equal */
1673
 
1674
        .align  4
1675
        .global __gedf2
1676
        .type   __gedf2, @function
1677
__gedf2:
1678
        leaf_entry sp, 16
1679
        movi    a6, 0x7ff00000
1680
        ball    xh, a6, 2f
1681
1:      bnall   yh, a6, .Llt_cmp
1682
 
1683
        /* Check if y is a NaN.  */
1684
        slli    a7, yh, 12
1685
        or      a7, a7, yl
1686
        beqz    a7, .Llt_cmp
1687
        movi    a2, -1
1688
        leaf_return
1689
 
1690
        /* Check if x is a NaN.  */
1691
2:      slli    a7, xh, 12
1692
        or      a7, a7, xl
1693
        beqz    a7, 1b
1694
        movi    a2, -1
1695
        leaf_return
1696
 
1697
 
1698
        /* Less Than */
1699
 
1700
        .align  4
1701
        .global __ltdf2
1702
        .type   __ltdf2, @function
1703
__ltdf2:
1704
        leaf_entry sp, 16
1705
        movi    a6, 0x7ff00000
1706
        ball    xh, a6, 2f
1707
1:      bnall   yh, a6, .Llt_cmp
1708
 
1709
        /* Check if y is a NaN.  */
1710
        slli    a7, yh, 12
1711
        or      a7, a7, yl
1712
        beqz    a7, .Llt_cmp
1713
        movi    a2, 0
1714
        leaf_return
1715
 
1716
        /* Check if x is a NaN.  */
1717
2:      slli    a7, xh, 12
1718
        or      a7, a7, xl
1719
        beqz    a7, 1b
1720
        movi    a2, 0
1721
        leaf_return
1722
 
1723
.Llt_cmp:
1724
        /* Check if x and y have different signs.  */
1725
        xor     a7, xh, yh
1726
        bltz    a7, .Llt_diff_signs
1727
 
1728
        /* Check if x is negative.  */
1729
        bltz    xh, .Llt_xneg
1730
 
1731
        /* Check if x < y.  */
1732
        bltu    xh, yh, 4f
1733
        bne     xh, yh, 5f
1734
        bgeu    xl, yl, 5f
1735
4:      movi    a2, -1
1736
        leaf_return
1737
 
1738
.Llt_xneg:
1739
        /* Check if y < x.  */
1740
        bltu    yh, xh, 4b
1741
        bne     yh, xh, 5f
1742
        bltu    yl, xl, 4b
1743
5:      movi    a2, 0
1744
        leaf_return
1745
 
1746
.Llt_diff_signs:
1747
        bgez    xh, 5b
1748
 
1749
        /* Check if both x and y are nonzero.  */
1750
        or      a7, xh, yh
1751
        slli    a7, a7, 1
1752
        or      a7, a7, xl
1753
        or      a7, a7, yl
1754
        movi    a2, 0
1755
        movi    a3, -1
1756
        movnez  a2, a3, a7
1757
        leaf_return
1758
 
1759
 
1760
        /* Unordered */
1761
 
1762
        .align  4
1763
        .global __unorddf2
1764
        .type   __unorddf2, @function
1765
__unorddf2:
1766
        leaf_entry sp, 16
1767
        movi    a6, 0x7ff00000
1768
        ball    xh, a6, 3f
1769
1:      ball    yh, a6, 4f
1770
2:      movi    a2, 0
1771
        leaf_return
1772
 
1773
3:      slli    a7, xh, 12
1774
        or      a7, a7, xl
1775
        beqz    a7, 1b
1776
        movi    a2, 1
1777
        leaf_return
1778
 
1779
4:      slli    a7, yh, 12
1780
        or      a7, a7, yl
1781
        beqz    a7, 2b
1782
        movi    a2, 1
1783
        leaf_return
1784
 
1785
#endif /* L_cmpdf2 */
1786
 
1787
#ifdef L_fixdfsi
1788
 
1789
        .align  4
1790
        .global __fixdfsi
1791
        .type   __fixdfsi, @function
1792
__fixdfsi:
1793
        leaf_entry sp, 16
1794
 
1795
        /* Check for NaN and Infinity.  */
1796
        movi    a6, 0x7ff00000
1797
        ball    xh, a6, .Lfixdfsi_nan_or_inf
1798
 
1799
        /* Extract the exponent and check if 0 < (exp - 0x3fe) < 32.  */
1800
        extui   a4, xh, 20, 11
1801
        extui   a5, a6, 19, 10  /* 0x3fe */
1802
        sub     a4, a4, a5
1803
        bgei    a4, 32, .Lfixdfsi_maxint
1804
        blti    a4, 1, .Lfixdfsi_zero
1805
 
1806
        /* Add explicit "1.0" and shift << 11.  */
1807
        or      a7, xh, a6
1808
        ssai    (32 - 11)
1809
        src     a5, a7, xl
1810
 
1811
        /* Shift back to the right, based on the exponent.  */
1812
        ssl     a4              /* shift by 32 - a4 */
1813
        srl     a5, a5
1814
 
1815
        /* Negate the result if sign != 0.  */
1816
        neg     a2, a5
1817
        movgez  a2, a5, a7
1818
        leaf_return
1819
 
1820
.Lfixdfsi_nan_or_inf:
1821
        /* Handle Infinity and NaN.  */
1822
        slli    a4, xh, 12
1823
        or      a4, a4, xl
1824
        beqz    a4, .Lfixdfsi_maxint
1825
 
1826
        /* Translate NaN to +maxint.  */
1827
        movi    xh, 0
1828
 
1829
.Lfixdfsi_maxint:
1830
        slli    a4, a6, 11      /* 0x80000000 */
1831
        addi    a5, a4, -1      /* 0x7fffffff */
1832
        movgez  a4, a5, xh
1833
        mov     a2, a4
1834
        leaf_return
1835
 
1836
.Lfixdfsi_zero:
1837
        movi    a2, 0
1838
        leaf_return
1839
 
1840
#endif /* L_fixdfsi */
1841
 
1842
#ifdef L_fixdfdi
1843
 
1844
        .align  4
1845
        .global __fixdfdi
1846
        .type   __fixdfdi, @function
1847
__fixdfdi:
1848
        leaf_entry sp, 16
1849
 
1850
        /* Check for NaN and Infinity.  */
1851
        movi    a6, 0x7ff00000
1852
        ball    xh, a6, .Lfixdfdi_nan_or_inf
1853
 
1854
        /* Extract the exponent and check if 0 < (exp - 0x3fe) < 64.  */
1855
        extui   a4, xh, 20, 11
1856
        extui   a5, a6, 19, 10  /* 0x3fe */
1857
        sub     a4, a4, a5
1858
        bgei    a4, 64, .Lfixdfdi_maxint
1859
        blti    a4, 1, .Lfixdfdi_zero
1860
 
1861
        /* Add explicit "1.0" and shift << 11.  */
1862
        or      a7, xh, a6
1863
        ssai    (32 - 11)
1864
        src     xh, a7, xl
1865
        sll     xl, xl
1866
 
1867
        /* Shift back to the right, based on the exponent.  */
1868
        ssl     a4              /* shift by 64 - a4 */
1869
        bgei    a4, 32, .Lfixdfdi_smallshift
1870
        srl     xl, xh
1871
        movi    xh, 0
1872
 
1873
.Lfixdfdi_shifted:
1874
        /* Negate the result if sign != 0.  */
1875
        bgez    a7, 1f
1876
        neg     xl, xl
1877
        neg     xh, xh
1878
        beqz    xl, 1f
1879
        addi    xh, xh, -1
1880
1:      leaf_return
1881
 
1882
.Lfixdfdi_smallshift:
1883
        src     xl, xh, xl
1884
        srl     xh, xh
1885
        j       .Lfixdfdi_shifted
1886
 
1887
.Lfixdfdi_nan_or_inf:
1888
        /* Handle Infinity and NaN.  */
1889
        slli    a4, xh, 12
1890
        or      a4, a4, xl
1891
        beqz    a4, .Lfixdfdi_maxint
1892
 
1893
        /* Translate NaN to +maxint.  */
1894
        movi    xh, 0
1895
 
1896
.Lfixdfdi_maxint:
1897
        slli    a7, a6, 11      /* 0x80000000 */
1898
        bgez    xh, 1f
1899
        mov     xh, a7
1900
        movi    xl, 0
1901
        leaf_return
1902
 
1903
1:      addi    xh, a7, -1      /* 0x7fffffff */
1904
        movi    xl, -1
1905
        leaf_return
1906
 
1907
.Lfixdfdi_zero:
1908
        movi    xh, 0
1909
        movi    xl, 0
1910
        leaf_return
1911
 
1912
#endif /* L_fixdfdi */
1913
 
1914
#ifdef L_fixunsdfsi
1915
 
1916
        .align  4
1917
        .global __fixunsdfsi
1918
        .type   __fixunsdfsi, @function
1919
__fixunsdfsi:
1920
        leaf_entry sp, 16
1921
 
1922
        /* Check for NaN and Infinity.  */
1923
        movi    a6, 0x7ff00000
1924
        ball    xh, a6, .Lfixunsdfsi_nan_or_inf
1925
 
1926
        /* Extract the exponent and check if 0 <= (exp - 0x3ff) < 32.  */
1927
        extui   a4, xh, 20, 11
1928
        extui   a5, a6, 20, 10  /* 0x3ff */
1929
        sub     a4, a4, a5
1930
        bgei    a4, 32, .Lfixunsdfsi_maxint
1931
        bltz    a4, .Lfixunsdfsi_zero
1932
 
1933
        /* Add explicit "1.0" and shift << 11.  */
1934
        or      a7, xh, a6
1935
        ssai    (32 - 11)
1936
        src     a5, a7, xl
1937
 
1938
        /* Shift back to the right, based on the exponent.  */
1939
        addi    a4, a4, 1
1940
        beqi    a4, 32, .Lfixunsdfsi_bigexp
1941
        ssl     a4              /* shift by 32 - a4 */
1942
        srl     a5, a5
1943
 
1944
        /* Negate the result if sign != 0.  */
1945
        neg     a2, a5
1946
        movgez  a2, a5, a7
1947
        leaf_return
1948
 
1949
.Lfixunsdfsi_nan_or_inf:
1950
        /* Handle Infinity and NaN.  */
1951
        slli    a4, xh, 12
1952
        or      a4, a4, xl
1953
        beqz    a4, .Lfixunsdfsi_maxint
1954
 
1955
        /* Translate NaN to 0xffffffff.  */
1956
        movi    a2, -1
1957
        leaf_return
1958
 
1959
.Lfixunsdfsi_maxint:
1960
        slli    a4, a6, 11      /* 0x80000000 */
1961
        movi    a5, -1          /* 0xffffffff */
1962
        movgez  a4, a5, xh
1963
        mov     a2, a4
1964
        leaf_return
1965
 
1966
.Lfixunsdfsi_zero:
1967
        movi    a2, 0
1968
        leaf_return
1969
 
1970
.Lfixunsdfsi_bigexp:
1971
        /* Handle unsigned maximum exponent case.  */
1972
        bltz    xh, 1f
1973
        mov     a2, a5          /* no shift needed */
1974
        leaf_return
1975
 
1976
        /* Return 0x80000000 if negative.  */
1977
1:      slli    a2, a6, 11
1978
        leaf_return
1979
 
1980
#endif /* L_fixunsdfsi */
1981
 
1982
#ifdef L_fixunsdfdi
1983
 
1984
        .align  4
1985
        .global __fixunsdfdi
1986
        .type   __fixunsdfdi, @function
1987
__fixunsdfdi:
1988
        leaf_entry sp, 16
1989
 
1990
        /* Check for NaN and Infinity.  */
1991
        movi    a6, 0x7ff00000
1992
        ball    xh, a6, .Lfixunsdfdi_nan_or_inf
1993
 
1994
        /* Extract the exponent and check if 0 <= (exp - 0x3ff) < 64.  */
1995
        extui   a4, xh, 20, 11
1996
        extui   a5, a6, 20, 10  /* 0x3ff */
1997
        sub     a4, a4, a5
1998
        bgei    a4, 64, .Lfixunsdfdi_maxint
1999
        bltz    a4, .Lfixunsdfdi_zero
2000
 
2001
        /* Add explicit "1.0" and shift << 11.  */
2002
        or      a7, xh, a6
2003
        ssai    (32 - 11)
2004
        src     xh, a7, xl
2005
        sll     xl, xl
2006
 
2007
        /* Shift back to the right, based on the exponent.  */
2008
        addi    a4, a4, 1
2009
        beqi    a4, 64, .Lfixunsdfdi_bigexp
2010
        ssl     a4              /* shift by 64 - a4 */
2011
        bgei    a4, 32, .Lfixunsdfdi_smallshift
2012
        srl     xl, xh
2013
        movi    xh, 0
2014
 
2015
.Lfixunsdfdi_shifted:
2016
        /* Negate the result if sign != 0.  */
2017
        bgez    a7, 1f
2018
        neg     xl, xl
2019
        neg     xh, xh
2020
        beqz    xl, 1f
2021
        addi    xh, xh, -1
2022
1:      leaf_return
2023
 
2024
.Lfixunsdfdi_smallshift:
2025
        src     xl, xh, xl
2026
        srl     xh, xh
2027
        j       .Lfixunsdfdi_shifted
2028
 
2029
.Lfixunsdfdi_nan_or_inf:
2030
        /* Handle Infinity and NaN.  */
2031
        slli    a4, xh, 12
2032
        or      a4, a4, xl
2033
        beqz    a4, .Lfixunsdfdi_maxint
2034
 
2035
        /* Translate NaN to 0xffffffff.... */
2036
1:      movi    xh, -1
2037
        movi    xl, -1
2038
        leaf_return
2039
 
2040
.Lfixunsdfdi_maxint:
2041
        bgez    xh, 1b
2042
2:      slli    xh, a6, 11      /* 0x80000000 */
2043
        movi    xl, 0
2044
        leaf_return
2045
 
2046
.Lfixunsdfdi_zero:
2047
        movi    xh, 0
2048
        movi    xl, 0
2049
        leaf_return
2050
 
2051
.Lfixunsdfdi_bigexp:
2052
        /* Handle unsigned maximum exponent case.  */
2053
        bltz    a7, 2b
2054
        leaf_return             /* no shift needed */
2055
 
2056
#endif /* L_fixunsdfdi */
2057
 
2058
#ifdef L_floatsidf
2059
 
2060
        .align  4
2061
        .global __floatunsidf
2062
        .type   __floatunsidf, @function
2063
__floatunsidf:
2064
        leaf_entry sp, 16
2065
        beqz    a2, .Lfloatsidf_return_zero
2066
 
2067
        /* Set the sign to zero and jump to the floatsidf code.  */
2068
        movi    a7, 0
2069
        j       .Lfloatsidf_normalize
2070
 
2071
        .align  4
2072
        .global __floatsidf
2073
        .type   __floatsidf, @function
2074
__floatsidf:
2075
        leaf_entry sp, 16
2076
 
2077
        /* Check for zero.  */
2078
        beqz    a2, .Lfloatsidf_return_zero
2079
 
2080
        /* Save the sign.  */
2081
        extui   a7, a2, 31, 1
2082
 
2083
        /* Get the absolute value.  */
2084
#if XCHAL_HAVE_ABS
2085
        abs     a2, a2
2086
#else
2087
        neg     a4, a2
2088
        movltz  a2, a4, a2
2089
#endif
2090
 
2091
.Lfloatsidf_normalize:
2092
        /* Normalize with the first 1 bit in the msb.  */
2093
        do_nsau a4, a2, a5, a6
2094
        ssl     a4
2095
        sll     a5, a2
2096
 
2097
        /* Shift the mantissa into position.  */
2098
        srli    xh, a5, 11
2099
        slli    xl, a5, (32 - 11)
2100
 
2101
        /* Set the exponent.  */
2102
        movi    a5, 0x41d       /* 0x3fe + 31 */
2103
        sub     a5, a5, a4
2104
        slli    a5, a5, 20
2105
        add     xh, xh, a5
2106
 
2107
        /* Add the sign and return. */
2108
        slli    a7, a7, 31
2109
        or      xh, xh, a7
2110
        leaf_return
2111
 
2112
.Lfloatsidf_return_zero:
2113
        movi    a3, 0
2114
        leaf_return
2115
 
2116
#endif /* L_floatsidf */
2117
 
2118
#ifdef L_floatdidf
2119
 
2120
        .align  4
2121
        .global __floatundidf
2122
        .type   __floatundidf, @function
2123
__floatundidf:
2124
        leaf_entry sp, 16
2125
 
2126
        /* Check for zero.  */
2127
        or      a4, xh, xl
2128
        beqz    a4, 2f
2129
 
2130
        /* Set the sign to zero and jump to the floatdidf code.  */
2131
        movi    a7, 0
2132
        j       .Lfloatdidf_normalize
2133
 
2134
        .align  4
2135
        .global __floatdidf
2136
        .type   __floatdidf, @function
2137
__floatdidf:
2138
        leaf_entry sp, 16
2139
 
2140
        /* Check for zero.  */
2141
        or      a4, xh, xl
2142
        beqz    a4, 2f
2143
 
2144
        /* Save the sign.  */
2145
        extui   a7, xh, 31, 1
2146
 
2147
        /* Get the absolute value.  */
2148
        bgez    xh, .Lfloatdidf_normalize
2149
        neg     xl, xl
2150
        neg     xh, xh
2151
        beqz    xl, .Lfloatdidf_normalize
2152
        addi    xh, xh, -1
2153
 
2154
.Lfloatdidf_normalize:
2155
        /* Normalize with the first 1 bit in the msb of xh.  */
2156
        beqz    xh, .Lfloatdidf_bigshift
2157
        do_nsau a4, xh, a5, a6
2158
        ssl     a4
2159
        src     xh, xh, xl
2160
        sll     xl, xl
2161
 
2162
.Lfloatdidf_shifted:
2163
        /* Shift the mantissa into position, with rounding bits in a6.  */
2164
        ssai    11
2165
        sll     a6, xl
2166
        src     xl, xh, xl
2167
        srl     xh, xh
2168
 
2169
        /* Set the exponent.  */
2170
        movi    a5, 0x43d       /* 0x3fe + 63 */
2171
        sub     a5, a5, a4
2172
        slli    a5, a5, 20
2173
        add     xh, xh, a5
2174
 
2175
        /* Add the sign.  */
2176
        slli    a7, a7, 31
2177
        or      xh, xh, a7
2178
 
2179
        /* Round up if the leftover fraction is >= 1/2.  */
2180
        bgez    a6, 2f
2181
        addi    xl, xl, 1
2182
        beqz    xl, .Lfloatdidf_roundcarry
2183
 
2184
        /* Check if the leftover fraction is exactly 1/2.  */
2185
        slli    a6, a6, 1
2186
        beqz    a6, .Lfloatdidf_exactlyhalf
2187
2:      leaf_return
2188
 
2189
.Lfloatdidf_bigshift:
2190
        /* xh is zero.  Normalize with first 1 bit of xl in the msb of xh.  */
2191
        do_nsau a4, xl, a5, a6
2192
        ssl     a4
2193
        sll     xh, xl
2194
        movi    xl, 0
2195
        addi    a4, a4, 32
2196
        j       .Lfloatdidf_shifted
2197
 
2198
.Lfloatdidf_exactlyhalf:
2199
        /* Round down to the nearest even value.  */
2200
        srli    xl, xl, 1
2201
        slli    xl, xl, 1
2202
        leaf_return
2203
 
2204
.Lfloatdidf_roundcarry:
2205
        /* xl is always zero when the rounding increment overflows, so
2206
           there's no need to round it to an even value.  */
2207
        addi    xh, xh, 1
2208
        /* Overflow to the exponent is OK.  */
2209
        leaf_return
2210
 
2211
#endif /* L_floatdidf */
2212
 
2213
#ifdef L_truncdfsf2
2214
 
2215
        .align  4
2216
        .global __truncdfsf2
2217
        .type   __truncdfsf2, @function
2218
__truncdfsf2:
2219
        leaf_entry sp, 16
2220
 
2221
        /* Adjust the exponent bias.  */
2222
        movi    a4, (0x3ff - 0x7f) << 20
2223
        sub     a5, xh, a4
2224
 
2225
        /* Check for underflow.  */
2226
        xor     a6, xh, a5
2227
        bltz    a6, .Ltrunc_underflow
2228
        extui   a6, a5, 20, 11
2229
        beqz    a6, .Ltrunc_underflow
2230
 
2231
        /* Check for overflow.  */
2232
        movi    a4, 255
2233
        bge     a6, a4, .Ltrunc_overflow
2234
 
2235
        /* Shift a5/xl << 3 into a5/a4.  */
2236
        ssai    (32 - 3)
2237
        src     a5, a5, xl
2238
        sll     a4, xl
2239
 
2240
.Ltrunc_addsign:
2241
        /* Add the sign bit.  */
2242
        extui   a6, xh, 31, 1
2243
        slli    a6, a6, 31
2244
        or      a2, a6, a5
2245
 
2246
        /* Round up if the leftover fraction is >= 1/2.  */
2247
        bgez    a4, 1f
2248
        addi    a2, a2, 1
2249
        /* Overflow to the exponent is OK.  The answer will be correct.  */
2250
 
2251
        /* Check if the leftover fraction is exactly 1/2.  */
2252
        slli    a4, a4, 1
2253
        beqz    a4, .Ltrunc_exactlyhalf
2254
1:      leaf_return
2255
 
2256
.Ltrunc_exactlyhalf:
2257
        /* Round down to the nearest even value.  */
2258
        srli    a2, a2, 1
2259
        slli    a2, a2, 1
2260
        leaf_return
2261
 
2262
.Ltrunc_overflow:
2263
        /* Check if exponent == 0x7ff.  */
2264
        movi    a4, 0x7ff00000
2265
        bnall   xh, a4, 1f
2266
 
2267
        /* Check if mantissa is nonzero.  */
2268
        slli    a5, xh, 12
2269
        or      a5, a5, xl
2270
        beqz    a5, 1f
2271
 
2272
        /* Shift a4 to set a bit in the mantissa, making a quiet NaN.  */
2273
        srli    a4, a4, 1
2274
 
2275
1:      slli    a4, a4, 4       /* 0xff000000 or 0xff800000 */
2276
        /* Add the sign bit.  */
2277
        extui   a6, xh, 31, 1
2278
        ssai    1
2279
        src     a2, a6, a4
2280
        leaf_return
2281
 
2282
.Ltrunc_underflow:
2283
        /* Find shift count for a subnormal.  Flush to zero if >= 32.  */
2284
        extui   a6, xh, 20, 11
2285
        movi    a5, 0x3ff - 0x7f
2286
        sub     a6, a5, a6
2287
        addi    a6, a6, 1
2288
        bgeui   a6, 32, 1f
2289
 
2290
        /* Replace the exponent with an explicit "1.0".  */
2291
        slli    a5, a5, 13      /* 0x700000 */
2292
        or      a5, a5, xh
2293
        slli    a5, a5, 11
2294
        srli    a5, a5, 11
2295
 
2296
        /* Shift the mantissa left by 3 bits (into a5/a4).  */
2297
        ssai    (32 - 3)
2298
        src     a5, a5, xl
2299
        sll     a4, xl
2300
 
2301
        /* Shift right by a6.  */
2302
        ssr     a6
2303
        sll     a7, a4
2304
        src     a4, a5, a4
2305
        srl     a5, a5
2306
        beqz    a7, .Ltrunc_addsign
2307
        or      a4, a4, a6      /* any positive, nonzero value will work */
2308
        j       .Ltrunc_addsign
2309
 
2310
        /* Return +/- zero.  */
2311
1:      extui   a2, xh, 31, 1
2312
        slli    a2, a2, 31
2313
        leaf_return
2314
 
2315
#endif /* L_truncdfsf2 */
2316
 
2317
#ifdef L_extendsfdf2
2318
 
2319
        .align  4
2320
        .global __extendsfdf2
2321
        .type   __extendsfdf2, @function
2322
__extendsfdf2:
2323
        leaf_entry sp, 16
2324
 
2325
        /* Save the sign bit and then shift it off.  */
2326
        extui   a5, a2, 31, 1
2327
        slli    a5, a5, 31
2328
        slli    a4, a2, 1
2329
 
2330
        /* Extract and check the exponent.  */
2331
        extui   a6, a2, 23, 8
2332
        beqz    a6, .Lextend_expzero
2333
        addi    a6, a6, 1
2334
        beqi    a6, 256, .Lextend_nan_or_inf
2335
 
2336
        /* Shift >> 3 into a4/xl.  */
2337
        srli    a4, a4, 4
2338
        slli    xl, a2, (32 - 3)
2339
 
2340
        /* Adjust the exponent bias.  */
2341
        movi    a6, (0x3ff - 0x7f) << 20
2342
        add     a4, a4, a6
2343
 
2344
        /* Add the sign bit.  */
2345
        or      xh, a4, a5
2346
        leaf_return
2347
 
2348
.Lextend_nan_or_inf:
2349
        movi    a4, 0x7ff00000
2350
 
2351
        /* Check for NaN.  */
2352
        slli    a7, a2, 9
2353
        beqz    a7, 1f
2354
 
2355
        slli    a6, a6, 11      /* 0x80000 */
2356
        or      a4, a4, a6
2357
 
2358
        /* Add the sign and return.  */
2359
1:      or      xh, a4, a5
2360
        movi    xl, 0
2361
        leaf_return
2362
 
2363
.Lextend_expzero:
2364
        beqz    a4, 1b
2365
 
2366
        /* Normalize it to have 8 zero bits before the first 1 bit.  */
2367
        do_nsau a7, a4, a2, a3
2368
        addi    a7, a7, -8
2369
        ssl     a7
2370
        sll     a4, a4
2371
 
2372
        /* Shift >> 3 into a4/xl.  */
2373
        slli    xl, a4, (32 - 3)
2374
        srli    a4, a4, 3
2375
 
2376
        /* Set the exponent.  */
2377
        movi    a6, 0x3fe - 0x7f
2378
        sub     a6, a6, a7
2379
        slli    a6, a6, 20
2380
        add     a4, a4, a6
2381
 
2382
        /* Add the sign and return.  */
2383
        or      xh, a4, a5
2384
        leaf_return
2385
 
2386
#endif /* L_extendsfdf2 */
2387
 
2388
 

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.