OpenCores
URL https://opencores.org/ocsvn/openrisc_me/openrisc_me/trunk

Subversion Repositories openrisc_me

[/] [openrisc/] [trunk/] [gnu-src/] [gcc-4.2.2/] [gcc/] [config/] [xtensa/] [ieee754-sf.S] - Blame information for rev 201

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 38 julius
/* IEEE-754 single-precision functions for Xtensa
2
   Copyright (C) 2006 Free Software Foundation, Inc.
3
   Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
4
 
5
   This file is part of GCC.
6
 
7
   GCC is free software; you can redistribute it and/or modify it
8
   under the terms of the GNU General Public License as published by
9
   the Free Software Foundation; either version 2, or (at your option)
10
   any later version.
11
 
12
   In addition to the permissions in the GNU General Public License,
13
   the Free Software Foundation gives you unlimited permission to link
14
   the compiled version of this file into combinations with other
15
   programs, and to distribute those combinations without any
16
   restriction coming from the use of this file.  (The General Public
17
   License restrictions do apply in other respects; for example, they
18
   cover modification of the file, and distribution when not linked
19
   into a combine executable.)
20
 
21
   GCC is distributed in the hope that it will be useful, but WITHOUT
22
   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
23
   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
24
   License for more details.
25
 
26
   You should have received a copy of the GNU General Public License
27
   along with GCC; see the file COPYING.  If not, write to the Free
28
   Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
29
   02110-1301, USA.  */
30
 
31
#ifdef __XTENSA_EB__
32
#define xh a2
33
#define xl a3
34
#define yh a4
35
#define yl a5
36
#else
37
#define xh a3
38
#define xl a2
39
#define yh a5
40
#define yl a4
41
#endif
42
 
43
/*  Warning!  The branch displacements for some Xtensa branch instructions
44
    are quite small, and this code has been carefully laid out to keep
45
    branch targets in range.  If you change anything, be sure to check that
46
    the assembler is not relaxing anything to branch over a jump.  */
47
 
48
#ifdef L_negsf2
49
 
50
        .align  4
51
        .global __negsf2
52
        .type   __negsf2, @function
53
__negsf2:
54
        leaf_entry sp, 16
55
        movi    a4, 0x80000000
56
        xor     a2, a2, a4
57
        leaf_return
58
 
59
#endif /* L_negsf2 */
60
 
61
#ifdef L_addsubsf3
62
 
63
        /* Addition */
64
__addsf3_aux:
65
 
66
        /* Handle NaNs and Infinities.  (This code is placed before the
67
           start of the function just to keep it in range of the limited
68
           branch displacements.)  */
69
 
70
.Ladd_xnan_or_inf:
71
        /* If y is neither Infinity nor NaN, return x.  */
72
        bnall   a3, a6, 1f
73
        /* If x is a NaN, return it.  Otherwise, return y.  */
74
        slli    a7, a2, 9
75
        beqz    a7, .Ladd_ynan_or_inf
76
1:      leaf_return
77
 
78
.Ladd_ynan_or_inf:
79
        /* Return y.  */
80
        mov     a2, a3
81
        leaf_return
82
 
83
.Ladd_opposite_signs:
84
        /* Operand signs differ.  Do a subtraction.  */
85
        slli    a7, a6, 8
86
        xor     a3, a3, a7
87
        j       .Lsub_same_sign
88
 
89
        .align  4
90
        .global __addsf3
91
        .type   __addsf3, @function
92
__addsf3:
93
        leaf_entry sp, 16
94
        movi    a6, 0x7f800000
95
 
96
        /* Check if the two operands have the same sign.  */
97
        xor     a7, a2, a3
98
        bltz    a7, .Ladd_opposite_signs
99
 
100
.Ladd_same_sign:
101
        /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity).  */
102
        ball    a2, a6, .Ladd_xnan_or_inf
103
        ball    a3, a6, .Ladd_ynan_or_inf
104
 
105
        /* Compare the exponents.  The smaller operand will be shifted
106
           right by the exponent difference and added to the larger
107
           one.  */
108
        extui   a7, a2, 23, 9
109
        extui   a8, a3, 23, 9
110
        bltu    a7, a8, .Ladd_shiftx
111
 
112
.Ladd_shifty:
113
        /* Check if the smaller (or equal) exponent is zero.  */
114
        bnone   a3, a6, .Ladd_yexpzero
115
 
116
        /* Replace y sign/exponent with 0x008.  */
117
        or      a3, a3, a6
118
        slli    a3, a3, 8
119
        srli    a3, a3, 8
120
 
121
.Ladd_yexpdiff:
122
        /* Compute the exponent difference.  */
123
        sub     a10, a7, a8
124
 
125
        /* Exponent difference > 32 -- just return the bigger value.  */
126
        bgeui   a10, 32, 1f
127
 
128
        /* Shift y right by the exponent difference.  Any bits that are
129
           shifted out of y are saved in a9 for rounding the result.  */
130
        ssr     a10
131
        movi    a9, 0
132
        src     a9, a3, a9
133
        srl     a3, a3
134
 
135
        /* Do the addition.  */
136
        add     a2, a2, a3
137
 
138
        /* Check if the add overflowed into the exponent.  */
139
        extui   a10, a2, 23, 9
140
        beq     a10, a7, .Ladd_round
141
        mov     a8, a7
142
        j       .Ladd_carry
143
 
144
.Ladd_yexpzero:
145
        /* y is a subnormal value.  Replace its sign/exponent with zero,
146
           i.e., no implicit "1.0", and increment the apparent exponent
147
           because subnormals behave as if they had the minimum (nonzero)
148
           exponent.  Test for the case when both exponents are zero.  */
149
        slli    a3, a3, 9
150
        srli    a3, a3, 9
151
        bnone   a2, a6, .Ladd_bothexpzero
152
        addi    a8, a8, 1
153
        j       .Ladd_yexpdiff
154
 
155
.Ladd_bothexpzero:
156
        /* Both exponents are zero.  Handle this as a special case.  There
157
           is no need to shift or round, and the normal code for handling
158
           a carry into the exponent field will not work because it
159
           assumes there is an implicit "1.0" that needs to be added.  */
160
        add     a2, a2, a3
161
1:      leaf_return
162
 
163
.Ladd_xexpzero:
164
        /* Same as "yexpzero" except skip handling the case when both
165
           exponents are zero.  */
166
        slli    a2, a2, 9
167
        srli    a2, a2, 9
168
        addi    a7, a7, 1
169
        j       .Ladd_xexpdiff
170
 
171
.Ladd_shiftx:
172
        /* Same thing as the "shifty" code, but with x and y swapped.  Also,
173
           because the exponent difference is always nonzero in this version,
174
           the shift sequence can use SLL and skip loading a constant zero.  */
175
        bnone   a2, a6, .Ladd_xexpzero
176
 
177
        or      a2, a2, a6
178
        slli    a2, a2, 8
179
        srli    a2, a2, 8
180
 
181
.Ladd_xexpdiff:
182
        sub     a10, a8, a7
183
        bgeui   a10, 32, .Ladd_returny
184
 
185
        ssr     a10
186
        sll     a9, a2
187
        srl     a2, a2
188
 
189
        add     a2, a2, a3
190
 
191
        /* Check if the add overflowed into the exponent.  */
192
        extui   a10, a2, 23, 9
193
        bne     a10, a8, .Ladd_carry
194
 
195
.Ladd_round:
196
        /* Round up if the leftover fraction is >= 1/2.  */
197
        bgez    a9, 1f
198
        addi    a2, a2, 1
199
 
200
        /* Check if the leftover fraction is exactly 1/2.  */
201
        slli    a9, a9, 1
202
        beqz    a9, .Ladd_exactlyhalf
203
1:      leaf_return
204
 
205
.Ladd_returny:
206
        mov     a2, a3
207
        leaf_return
208
 
209
.Ladd_carry:
210
        /* The addition has overflowed into the exponent field, so the
211
           value needs to be renormalized.  The mantissa of the result
212
           can be recovered by subtracting the original exponent and
213
           adding 0x800000 (which is the explicit "1.0" for the
214
           mantissa of the non-shifted operand -- the "1.0" for the
215
           shifted operand was already added).  The mantissa can then
216
           be shifted right by one bit.  The explicit "1.0" of the
217
           shifted mantissa then needs to be replaced by the exponent,
218
           incremented by one to account for the normalizing shift.
219
           It is faster to combine these operations: do the shift first
220
           and combine the additions and subtractions.  If x is the
221
           original exponent, the result is:
222
               shifted mantissa - (x << 22) + (1 << 22) + (x << 23)
223
           or:
224
               shifted mantissa + ((x + 1) << 22)
225
           Note that the exponent is incremented here by leaving the
226
           explicit "1.0" of the mantissa in the exponent field.  */
227
 
228
        /* Shift x right by one bit.  Save the lsb.  */
229
        mov     a10, a2
230
        srli    a2, a2, 1
231
 
232
        /* See explanation above.  The original exponent is in a8.  */
233
        addi    a8, a8, 1
234
        slli    a8, a8, 22
235
        add     a2, a2, a8
236
 
237
        /* Return an Infinity if the exponent overflowed.  */
238
        ball    a2, a6, .Ladd_infinity
239
 
240
        /* Same thing as the "round" code except the msb of the leftover
241
           fraction is bit 0 of a10, with the rest of the fraction in a9.  */
242
        bbci.l  a10, 0, 1f
243
        addi    a2, a2, 1
244
        beqz    a9, .Ladd_exactlyhalf
245
1:      leaf_return
246
 
247
.Ladd_infinity:
248
        /* Clear the mantissa.  */
249
        srli    a2, a2, 23
250
        slli    a2, a2, 23
251
 
252
        /* The sign bit may have been lost in a carry-out.  Put it back.  */
253
        slli    a8, a8, 1
254
        or      a2, a2, a8
255
        leaf_return
256
 
257
.Ladd_exactlyhalf:
258
        /* Round down to the nearest even value.  */
259
        srli    a2, a2, 1
260
        slli    a2, a2, 1
261
        leaf_return
262
 
263
 
264
        /* Subtraction */
265
__subsf3_aux:
266
 
267
        /* Handle NaNs and Infinities.  (This code is placed before the
268
           start of the function just to keep it in range of the limited
269
           branch displacements.)  */
270
 
271
.Lsub_xnan_or_inf:
272
        /* If y is neither Infinity nor NaN, return x.  */
273
        bnall   a3, a6, 1f
274
        /* Both x and y are either NaN or Inf, so the result is NaN.  */
275
        movi    a4, 0x400000    /* make it a quiet NaN */
276
        or      a2, a2, a4
277
1:      leaf_return
278
 
279
.Lsub_ynan_or_inf:
280
        /* Negate y and return it.  */
281
        slli    a7, a6, 8
282
        xor     a2, a3, a7
283
        leaf_return
284
 
285
.Lsub_opposite_signs:
286
        /* Operand signs differ.  Do an addition.  */
287
        slli    a7, a6, 8
288
        xor     a3, a3, a7
289
        j       .Ladd_same_sign
290
 
291
        .align  4
292
        .global __subsf3
293
        .type   __subsf3, @function
294
__subsf3:
295
        leaf_entry sp, 16
296
        movi    a6, 0x7f800000
297
 
298
        /* Check if the two operands have the same sign.  */
299
        xor     a7, a2, a3
300
        bltz    a7, .Lsub_opposite_signs
301
 
302
.Lsub_same_sign:
303
        /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity).  */
304
        ball    a2, a6, .Lsub_xnan_or_inf
305
        ball    a3, a6, .Lsub_ynan_or_inf
306
 
307
        /* Compare the operands.  In contrast to addition, the entire
308
           value matters here.  */
309
        extui   a7, a2, 23, 8
310
        extui   a8, a3, 23, 8
311
        bltu    a2, a3, .Lsub_xsmaller
312
 
313
.Lsub_ysmaller:
314
        /* Check if the smaller (or equal) exponent is zero.  */
315
        bnone   a3, a6, .Lsub_yexpzero
316
 
317
        /* Replace y sign/exponent with 0x008.  */
318
        or      a3, a3, a6
319
        slli    a3, a3, 8
320
        srli    a3, a3, 8
321
 
322
.Lsub_yexpdiff:
323
        /* Compute the exponent difference.  */
324
        sub     a10, a7, a8
325
 
326
        /* Exponent difference > 32 -- just return the bigger value.  */
327
        bgeui   a10, 32, 1f
328
 
329
        /* Shift y right by the exponent difference.  Any bits that are
330
           shifted out of y are saved in a9 for rounding the result.  */
331
        ssr     a10
332
        movi    a9, 0
333
        src     a9, a3, a9
334
        srl     a3, a3
335
 
336
        sub     a2, a2, a3
337
 
338
        /* Subtract the leftover bits in a9 from zero and propagate any
339
           borrow from a2.  */
340
        neg     a9, a9
341
        addi    a10, a2, -1
342
        movnez  a2, a10, a9
343
 
344
        /* Check if the subtract underflowed into the exponent.  */
345
        extui   a10, a2, 23, 8
346
        beq     a10, a7, .Lsub_round
347
        j       .Lsub_borrow
348
 
349
.Lsub_yexpzero:
350
        /* Return zero if the inputs are equal.  (For the non-subnormal
351
           case, subtracting the "1.0" will cause a borrow from the exponent
352
           and this case can be detected when handling the borrow.)  */
353
        beq     a2, a3, .Lsub_return_zero
354
 
355
        /* y is a subnormal value.  Replace its sign/exponent with zero,
356
           i.e., no implicit "1.0".  Unless x is also a subnormal, increment
357
           y's apparent exponent because subnormals behave as if they had
358
           the minimum (nonzero) exponent.  */
359
        slli    a3, a3, 9
360
        srli    a3, a3, 9
361
        bnone   a2, a6, .Lsub_yexpdiff
362
        addi    a8, a8, 1
363
        j       .Lsub_yexpdiff
364
 
365
.Lsub_returny:
366
        /* Negate and return y.  */
367
        slli    a7, a6, 8
368
        xor     a2, a3, a7
369
1:      leaf_return
370
 
371
.Lsub_xsmaller:
372
        /* Same thing as the "ysmaller" code, but with x and y swapped and
373
           with y negated.  */
374
        bnone   a2, a6, .Lsub_xexpzero
375
 
376
        or      a2, a2, a6
377
        slli    a2, a2, 8
378
        srli    a2, a2, 8
379
 
380
.Lsub_xexpdiff:
381
        sub     a10, a8, a7
382
        bgeui   a10, 32, .Lsub_returny
383
 
384
        ssr     a10
385
        movi    a9, 0
386
        src     a9, a2, a9
387
        srl     a2, a2
388
 
389
        /* Negate y.  */
390
        slli    a11, a6, 8
391
        xor     a3, a3, a11
392
 
393
        sub     a2, a3, a2
394
 
395
        neg     a9, a9
396
        addi    a10, a2, -1
397
        movnez  a2, a10, a9
398
 
399
        /* Check if the subtract underflowed into the exponent.  */
400
        extui   a10, a2, 23, 8
401
        bne     a10, a8, .Lsub_borrow
402
 
403
.Lsub_round:
404
        /* Round up if the leftover fraction is >= 1/2.  */
405
        bgez    a9, 1f
406
        addi    a2, a2, 1
407
 
408
        /* Check if the leftover fraction is exactly 1/2.  */
409
        slli    a9, a9, 1
410
        beqz    a9, .Lsub_exactlyhalf
411
1:      leaf_return
412
 
413
.Lsub_xexpzero:
414
        /* Same as "yexpzero".  */
415
        beq     a2, a3, .Lsub_return_zero
416
        slli    a2, a2, 9
417
        srli    a2, a2, 9
418
        bnone   a3, a6, .Lsub_xexpdiff
419
        addi    a7, a7, 1
420
        j       .Lsub_xexpdiff
421
 
422
.Lsub_return_zero:
423
        movi    a2, 0
424
        leaf_return
425
 
426
.Lsub_borrow:
427
        /* The subtraction has underflowed into the exponent field, so the
428
           value needs to be renormalized.  Shift the mantissa left as
429
           needed to remove any leading zeros and adjust the exponent
430
           accordingly.  If the exponent is not large enough to remove
431
           all the leading zeros, the result will be a subnormal value.  */
432
 
433
        slli    a8, a2, 9
434
        beqz    a8, .Lsub_xzero
435
        do_nsau a6, a8, a7, a11
436
        srli    a8, a8, 9
437
        bge     a6, a10, .Lsub_subnormal
438
        addi    a6, a6, 1
439
 
440
.Lsub_normalize_shift:
441
        /* Shift the mantissa (a8/a9) left by a6.  */
442
        ssl     a6
443
        src     a8, a8, a9
444
        sll     a9, a9
445
 
446
        /* Combine the shifted mantissa with the sign and exponent,
447
           decrementing the exponent by a6.  (The exponent has already
448
           been decremented by one due to the borrow from the subtraction,
449
           but adding the mantissa will increment the exponent by one.)  */
450
        srli    a2, a2, 23
451
        sub     a2, a2, a6
452
        slli    a2, a2, 23
453
        add     a2, a2, a8
454
        j       .Lsub_round
455
 
456
.Lsub_exactlyhalf:
457
        /* Round down to the nearest even value.  */
458
        srli    a2, a2, 1
459
        slli    a2, a2, 1
460
        leaf_return
461
 
462
.Lsub_xzero:
463
        /* If there was a borrow from the exponent, and the mantissa and
464
           guard digits are all zero, then the inputs were equal and the
465
           result should be zero.  */
466
        beqz    a9, .Lsub_return_zero
467
 
468
        /* Only the guard digit is nonzero.  Shift by min(24, a10).  */
469
        addi    a11, a10, -24
470
        movi    a6, 24
471
        movltz  a6, a10, a11
472
        j       .Lsub_normalize_shift
473
 
474
.Lsub_subnormal:
475
        /* The exponent is too small to shift away all the leading zeros.
476
           Set a6 to the current exponent (which has already been
477
           decremented by the borrow) so that the exponent of the result
478
           will be zero.  Do not add 1 to a6 in this case, because: (1)
479
           adding the mantissa will not increment the exponent, so there is
480
           no need to subtract anything extra from the exponent to
481
           compensate, and (2) the effective exponent of a subnormal is 1
482
           not 0 so the shift amount must be 1 smaller than normal. */
483
        mov     a6, a10
484
        j       .Lsub_normalize_shift
485
 
486
#endif /* L_addsubsf3 */
487
 
488
#ifdef L_mulsf3
489
 
490
        /* Multiplication */
491
__mulsf3_aux:
492
 
493
        /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
494
           (This code is placed before the start of the function just to
495
           keep it in range of the limited branch displacements.)  */
496
 
497
.Lmul_xexpzero:
498
        /* Clear the sign bit of x.  */
499
        slli    a2, a2, 1
500
        srli    a2, a2, 1
501
 
502
        /* If x is zero, return zero.  */
503
        beqz    a2, .Lmul_return_zero
504
 
505
        /* Normalize x.  Adjust the exponent in a8.  */
506
        do_nsau a10, a2, a11, a12
507
        addi    a10, a10, -8
508
        ssl     a10
509
        sll     a2, a2
510
        movi    a8, 1
511
        sub     a8, a8, a10
512
        j       .Lmul_xnormalized
513
 
514
.Lmul_yexpzero:
515
        /* Clear the sign bit of y.  */
516
        slli    a3, a3, 1
517
        srli    a3, a3, 1
518
 
519
        /* If y is zero, return zero.  */
520
        beqz    a3, .Lmul_return_zero
521
 
522
        /* Normalize y.  Adjust the exponent in a9.  */
523
        do_nsau a10, a3, a11, a12
524
        addi    a10, a10, -8
525
        ssl     a10
526
        sll     a3, a3
527
        movi    a9, 1
528
        sub     a9, a9, a10
529
        j       .Lmul_ynormalized
530
 
531
.Lmul_return_zero:
532
        /* Return zero with the appropriate sign bit.  */
533
        srli    a2, a7, 31
534
        slli    a2, a2, 31
535
        j       .Lmul_done
536
 
537
.Lmul_xnan_or_inf:
538
        /* If y is zero, return NaN.  */
539
        slli    a8, a3, 1
540
        bnez    a8, 1f
541
        movi    a4, 0x400000    /* make it a quiet NaN */
542
        or      a2, a2, a4
543
        j       .Lmul_done
544
1:
545
        /* If y is NaN, return y.  */
546
        bnall   a3, a6, .Lmul_returnx
547
        slli    a8, a3, 9
548
        beqz    a8, .Lmul_returnx
549
 
550
.Lmul_returny:
551
        mov     a2, a3
552
 
553
.Lmul_returnx:
554
        /* Set the sign bit and return.  */
555
        extui   a7, a7, 31, 1
556
        slli    a2, a2, 1
557
        ssai    1
558
        src     a2, a7, a2
559
        j       .Lmul_done
560
 
561
.Lmul_ynan_or_inf:
562
        /* If x is zero, return NaN.  */
563
        slli    a8, a2, 1
564
        bnez    a8, .Lmul_returny
565
        movi    a7, 0x400000    /* make it a quiet NaN */
566
        or      a2, a3, a7
567
        j       .Lmul_done
568
 
569
        .align  4
570
        .global __mulsf3
571
        .type   __mulsf3, @function
572
__mulsf3:
573
        leaf_entry sp, 32
574
#if __XTENSA_CALL0_ABI__
575
        addi    sp, sp, -32
576
        s32i    a12, sp, 16
577
        s32i    a13, sp, 20
578
        s32i    a14, sp, 24
579
        s32i    a15, sp, 28
580
#endif
581
        movi    a6, 0x7f800000
582
 
583
        /* Get the sign of the result.  */
584
        xor     a7, a2, a3
585
 
586
        /* Check for NaN and infinity.  */
587
        ball    a2, a6, .Lmul_xnan_or_inf
588
        ball    a3, a6, .Lmul_ynan_or_inf
589
 
590
        /* Extract the exponents.  */
591
        extui   a8, a2, 23, 8
592
        extui   a9, a3, 23, 8
593
 
594
        beqz    a8, .Lmul_xexpzero
595
.Lmul_xnormalized:
596
        beqz    a9, .Lmul_yexpzero
597
.Lmul_ynormalized:
598
 
599
        /* Add the exponents.  */
600
        add     a8, a8, a9
601
 
602
        /* Replace sign/exponent fields with explicit "1.0".  */
603
        movi    a10, 0xffffff
604
        or      a2, a2, a6
605
        and     a2, a2, a10
606
        or      a3, a3, a6
607
        and     a3, a3, a10
608
 
609
        /* Multiply 32x32 to 64 bits.  The result ends up in a2/a6.  */
610
 
611
#if XCHAL_HAVE_MUL32_HIGH
612
 
613
        mull    a6, a2, a3
614
        muluh   a2, a2, a3
615
 
616
#else
617
 
618
        /* Break the inputs into 16-bit chunks and compute 4 32-bit partial
619
           products.  These partial products are:
620
 
621
 
622
 
623
                1 xl * yh
624
                2 xh * yl
625
 
626
                3 xh * yh
627
 
628
           If using the Mul16 or Mul32 multiplier options, these input
629
           chunks must be stored in separate registers.  For Mac16, the
630
           UMUL.AA.* opcodes can specify that the inputs come from either
631
           half of the registers, so there is no need to shift them out
632
           ahead of time.  If there is no multiply hardware, the 16-bit
633
           chunks can be extracted when setting up the arguments to the
634
           separate multiply function.  */
635
 
636
#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
637
        /* Calling a separate multiply function will clobber a0 and requires
638
           use of a8 as a temporary, so save those values now.  (The function
639
           uses a custom ABI so nothing else needs to be saved.)  */
640
        s32i    a0, sp, 0
641
        s32i    a8, sp, 4
642
#endif
643
 
644
#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
645
 
646
#define a2h a4
647
#define a3h a5
648
 
649
        /* Get the high halves of the inputs into registers.  */
650
        srli    a2h, a2, 16
651
        srli    a3h, a3, 16
652
 
653
#define a2l a2
654
#define a3l a3
655
 
656
#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
657
        /* Clear the high halves of the inputs.  This does not matter
658
           for MUL16 because the high bits are ignored.  */
659
        extui   a2, a2, 0, 16
660
        extui   a3, a3, 0, 16
661
#endif
662
#endif /* MUL16 || MUL32 */
663
 
664
 
665
#if XCHAL_HAVE_MUL16
666
 
667
#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
668
        mul16u  dst, xreg ## xhalf, yreg ## yhalf
669
 
670
#elif XCHAL_HAVE_MUL32
671
 
672
#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
673
        mull    dst, xreg ## xhalf, yreg ## yhalf
674
 
675
#elif XCHAL_HAVE_MAC16
676
 
677
/* The preprocessor insists on inserting a space when concatenating after
678
   a period in the definition of do_mul below.  These macros are a workaround
679
   using underscores instead of periods when doing the concatenation.  */
680
#define umul_aa_ll umul.aa.ll
681
#define umul_aa_lh umul.aa.lh
682
#define umul_aa_hl umul.aa.hl
683
#define umul_aa_hh umul.aa.hh
684
 
685
#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
686
        umul_aa_ ## xhalf ## yhalf      xreg, yreg; \
687
        rsr     dst, ACCLO
688
 
689
#else /* no multiply hardware */
690
 
691
#define set_arg_l(dst, src) \
692
        extui   dst, src, 0, 16
693
#define set_arg_h(dst, src) \
694
        srli    dst, src, 16
695
 
696
#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
697
        set_arg_ ## xhalf (a13, xreg); \
698
        set_arg_ ## yhalf (a14, yreg); \
699
        call0   .Lmul_mulsi3; \
700
        mov     dst, a12
701
#endif
702
 
703
        /* Add pp1 and pp2 into a6 with carry-out in a9.  */
704
        do_mul(a6, a2, l, a3, h)        /* pp 1 */
705
        do_mul(a11, a2, h, a3, l)       /* pp 2 */
706
        movi    a9, 0
707
        add     a6, a6, a11
708
        bgeu    a6, a11, 1f
709
        addi    a9, a9, 1
710
1:
711
        /* Shift the high half of a9/a6 into position in a9.  Note that
712
           this value can be safely incremented without any carry-outs.  */
713
        ssai    16
714
        src     a9, a9, a6
715
 
716
        /* Compute the low word into a6.  */
717
        do_mul(a11, a2, l, a3, l)       /* pp 0 */
718
        sll     a6, a6
719
        add     a6, a6, a11
720
        bgeu    a6, a11, 1f
721
        addi    a9, a9, 1
722
1:
723
        /* Compute the high word into a2.  */
724
        do_mul(a2, a2, h, a3, h)        /* pp 3 */
725
        add     a2, a2, a9
726
 
727
#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
728
        /* Restore values saved on the stack during the multiplication.  */
729
        l32i    a0, sp, 0
730
        l32i    a8, sp, 4
731
#endif
732
#endif
733
 
734
        /* Shift left by 9 bits, unless there was a carry-out from the
735
           multiply, in which case, shift by 8 bits and increment the
736
           exponent.  */
737
        movi    a4, 9
738
        srli    a5, a2, 24 - 9
739
        beqz    a5, 1f
740
        addi    a4, a4, -1
741
        addi    a8, a8, 1
742
1:      ssl     a4
743
        src     a2, a2, a6
744
        sll     a6, a6
745
 
746
        /* Subtract the extra bias from the exponent sum (plus one to account
747
           for the explicit "1.0" of the mantissa that will be added to the
748
           exponent in the final result).  */
749
        movi    a4, 0x80
750
        sub     a8, a8, a4
751
 
752
        /* Check for over/underflow.  The value in a8 is one less than the
753
           final exponent, so values in the range 0..fd are OK here.  */
754
        movi    a4, 0xfe
755
        bgeu    a8, a4, .Lmul_overflow
756
 
757
.Lmul_round:
758
        /* Round.  */
759
        bgez    a6, .Lmul_rounded
760
        addi    a2, a2, 1
761
        slli    a6, a6, 1
762
        beqz    a6, .Lmul_exactlyhalf
763
 
764
.Lmul_rounded:
765
        /* Add the exponent to the mantissa.  */
766
        slli    a8, a8, 23
767
        add     a2, a2, a8
768
 
769
.Lmul_addsign:
770
        /* Add the sign bit.  */
771
        srli    a7, a7, 31
772
        slli    a7, a7, 31
773
        or      a2, a2, a7
774
 
775
.Lmul_done:
776
#if __XTENSA_CALL0_ABI__
777
        l32i    a12, sp, 16
778
        l32i    a13, sp, 20
779
        l32i    a14, sp, 24
780
        l32i    a15, sp, 28
781
        addi    sp, sp, 32
782
#endif
783
        leaf_return
784
 
785
.Lmul_exactlyhalf:
786
        /* Round down to the nearest even value.  */
787
        srli    a2, a2, 1
788
        slli    a2, a2, 1
789
        j       .Lmul_rounded
790
 
791
.Lmul_overflow:
792
        bltz    a8, .Lmul_underflow
793
        /* Return +/- Infinity.  */
794
        movi    a8, 0xff
795
        slli    a2, a8, 23
796
        j       .Lmul_addsign
797
 
798
.Lmul_underflow:
799
        /* Create a subnormal value, where the exponent field contains zero,
800
           but the effective exponent is 1.  The value of a8 is one less than
801
           the actual exponent, so just negate it to get the shift amount.  */
802
        neg     a8, a8
803
        mov     a9, a6
804
        ssr     a8
805
        bgeui   a8, 32, .Lmul_flush_to_zero
806
 
807
        /* Shift a2 right.  Any bits that are shifted out of a2 are saved
808
           in a6 (combined with the shifted-out bits currently in a6) for
809
           rounding the result.  */
810
        sll     a6, a2
811
        srl     a2, a2
812
 
813
        /* Set the exponent to zero.  */
814
        movi    a8, 0
815
 
816
        /* Pack any nonzero bits shifted out into a6.  */
817
        beqz    a9, .Lmul_round
818
        movi    a9, 1
819
        or      a6, a6, a9
820
        j       .Lmul_round
821
 
822
.Lmul_flush_to_zero:
823
        /* Return zero with the appropriate sign bit.  */
824
        srli    a2, a7, 31
825
        slli    a2, a2, 31
826
        j       .Lmul_done
827
 
828
#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
829
 
830
        /* For Xtensa processors with no multiply hardware, this simplified
831
           version of _mulsi3 is used for multiplying 16-bit chunks of
832
           the floating-point mantissas.  It uses a custom ABI: the inputs
833
           are passed in a13 and a14, the result is returned in a12, and
834
           a8 and a15 are clobbered.  */
835
        .align  4
836
.Lmul_mulsi3:
837
        movi    a12, 0
838
.Lmul_mult_loop:
839
        add     a15, a14, a12
840
        extui   a8, a13, 0, 1
841
        movnez  a12, a15, a8
842
 
843
        do_addx2 a15, a14, a12, a15
844
        extui   a8, a13, 1, 1
845
        movnez  a12, a15, a8
846
 
847
        do_addx4 a15, a14, a12, a15
848
        extui   a8, a13, 2, 1
849
        movnez  a12, a15, a8
850
 
851
        do_addx8 a15, a14, a12, a15
852
        extui   a8, a13, 3, 1
853
        movnez  a12, a15, a8
854
 
855
        srli    a13, a13, 4
856
        slli    a14, a14, 4
857
        bnez    a13, .Lmul_mult_loop
858
        ret
859
#endif /* !MUL16 && !MUL32 && !MAC16 */
860
#endif /* L_mulsf3 */
861
 
862
#ifdef L_divsf3
863
 
864
        /* Division */
865
__divsf3_aux:
866
 
867
        /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
868
           (This code is placed before the start of the function just to
869
           keep it in range of the limited branch displacements.)  */
870
 
871
.Ldiv_yexpzero:
872
        /* Clear the sign bit of y.  */
873
        slli    a3, a3, 1
874
        srli    a3, a3, 1
875
 
876
        /* Check for division by zero.  */
877
        beqz    a3, .Ldiv_yzero
878
 
879
        /* Normalize y.  Adjust the exponent in a9.  */
880
        do_nsau a10, a3, a4, a5
881
        addi    a10, a10, -8
882
        ssl     a10
883
        sll     a3, a3
884
        movi    a9, 1
885
        sub     a9, a9, a10
886
        j       .Ldiv_ynormalized
887
 
888
.Ldiv_yzero:
889
        /* y is zero.  Return NaN if x is also zero; otherwise, infinity.  */
890
        slli    a4, a2, 1
891
        srli    a4, a4, 1
892
        srli    a2, a7, 31
893
        slli    a2, a2, 31
894
        or      a2, a2, a6
895
        bnez    a4, 1f
896
        movi    a4, 0x400000    /* make it a quiet NaN */
897
        or      a2, a2, a4
898
1:      leaf_return
899
 
900
.Ldiv_xexpzero:
901
        /* Clear the sign bit of x.  */
902
        slli    a2, a2, 1
903
        srli    a2, a2, 1
904
 
905
        /* If x is zero, return zero.  */
906
        beqz    a2, .Ldiv_return_zero
907
 
908
        /* Normalize x.  Adjust the exponent in a8.  */
909
        do_nsau a10, a2, a4, a5
910
        addi    a10, a10, -8
911
        ssl     a10
912
        sll     a2, a2
913
        movi    a8, 1
914
        sub     a8, a8, a10
915
        j       .Ldiv_xnormalized
916
 
917
.Ldiv_return_zero:
918
        /* Return zero with the appropriate sign bit.  */
919
        srli    a2, a7, 31
920
        slli    a2, a2, 31
921
        leaf_return
922
 
923
.Ldiv_xnan_or_inf:
924
        /* Set the sign bit of the result.  */
925
        srli    a7, a3, 31
926
        slli    a7, a7, 31
927
        xor     a2, a2, a7
928
        /* If y is NaN or Inf, return NaN.  */
929
        bnall   a3, a6, 1f
930
        movi    a4, 0x400000    /* make it a quiet NaN */
931
        or      a2, a2, a4
932
1:      leaf_return
933
 
934
.Ldiv_ynan_or_inf:
935
        /* If y is Infinity, return zero.  */
936
        slli    a8, a3, 9
937
        beqz    a8, .Ldiv_return_zero
938
        /* y is NaN; return it.  */
939
        mov     a2, a3
940
        leaf_return
941
 
942
        .align  4
943
        .global __divsf3
944
        .type   __divsf3, @function
945
__divsf3:
946
        leaf_entry sp, 16
947
        movi    a6, 0x7f800000
948
 
949
        /* Get the sign of the result.  */
950
        xor     a7, a2, a3
951
 
952
        /* Check for NaN and infinity.  */
953
        ball    a2, a6, .Ldiv_xnan_or_inf
954
        ball    a3, a6, .Ldiv_ynan_or_inf
955
 
956
        /* Extract the exponents.  */
957
        extui   a8, a2, 23, 8
958
        extui   a9, a3, 23, 8
959
 
960
        beqz    a9, .Ldiv_yexpzero
961
.Ldiv_ynormalized:
962
        beqz    a8, .Ldiv_xexpzero
963
.Ldiv_xnormalized:
964
 
965
        /* Subtract the exponents.  */
966
        sub     a8, a8, a9
967
 
968
        /* Replace sign/exponent fields with explicit "1.0".  */
969
        movi    a10, 0xffffff
970
        or      a2, a2, a6
971
        and     a2, a2, a10
972
        or      a3, a3, a6
973
        and     a3, a3, a10
974
 
975
        /* The first digit of the mantissa division must be a one.
976
           Shift x (and adjust the exponent) as needed to make this true.  */
977
        bltu    a3, a2, 1f
978
        slli    a2, a2, 1
979
        addi    a8, a8, -1
980
1:
981
        /* Do the first subtraction and shift.  */
982
        sub     a2, a2, a3
983
        slli    a2, a2, 1
984
 
985
        /* Put the quotient into a10.  */
986
        movi    a10, 1
987
 
988
        /* Divide one bit at a time for 23 bits.  */
989
        movi    a9, 23
990
#if XCHAL_HAVE_LOOPS
991
        loop    a9, .Ldiv_loopend
992
#endif
993
.Ldiv_loop:
994
        /* Shift the quotient << 1.  */
995
        slli    a10, a10, 1
996
 
997
        /* Is this digit a 0 or 1?  */
998
        bltu    a2, a3, 1f
999
 
1000
        /* Output a 1 and subtract.  */
1001
        addi    a10, a10, 1
1002
        sub     a2, a2, a3
1003
 
1004
        /* Shift the dividend << 1.  */
1005
1:      slli    a2, a2, 1
1006
 
1007
#if !XCHAL_HAVE_LOOPS
1008
        addi    a9, a9, -1
1009
        bnez    a9, .Ldiv_loop
1010
#endif
1011
.Ldiv_loopend:
1012
 
1013
        /* Add the exponent bias (less one to account for the explicit "1.0"
1014
           of the mantissa that will be added to the exponent in the final
1015
           result).  */
1016
        addi    a8, a8, 0x7e
1017
 
1018
        /* Check for over/underflow.  The value in a8 is one less than the
1019
           final exponent, so values in the range 0..fd are OK here.  */
1020
        movi    a4, 0xfe
1021
        bgeu    a8, a4, .Ldiv_overflow
1022
 
1023
.Ldiv_round:
1024
        /* Round.  The remainder (<< 1) is in a2.  */
1025
        bltu    a2, a3, .Ldiv_rounded
1026
        addi    a10, a10, 1
1027
        beq     a2, a3, .Ldiv_exactlyhalf
1028
 
1029
.Ldiv_rounded:
1030
        /* Add the exponent to the mantissa.  */
1031
        slli    a8, a8, 23
1032
        add     a2, a10, a8
1033
 
1034
.Ldiv_addsign:
1035
        /* Add the sign bit.  */
1036
        srli    a7, a7, 31
1037
        slli    a7, a7, 31
1038
        or      a2, a2, a7
1039
        leaf_return
1040
 
1041
.Ldiv_overflow:
1042
        bltz    a8, .Ldiv_underflow
1043
        /* Return +/- Infinity.  */
1044
        addi    a8, a4, 1       /* 0xff */
1045
        slli    a2, a8, 23
1046
        j       .Ldiv_addsign
1047
 
1048
.Ldiv_exactlyhalf:
1049
        /* Remainder is exactly half the divisor.  Round even.  */
1050
        srli    a10, a10, 1
1051
        slli    a10, a10, 1
1052
        j       .Ldiv_rounded
1053
 
1054
.Ldiv_underflow:
1055
        /* Create a subnormal value, where the exponent field contains zero,
1056
           but the effective exponent is 1.  The value of a8 is one less than
1057
           the actual exponent, so just negate it to get the shift amount.  */
1058
        neg     a8, a8
1059
        ssr     a8
1060
        bgeui   a8, 32, .Ldiv_flush_to_zero
1061
 
1062
        /* Shift a10 right.  Any bits that are shifted out of a10 are
1063
           saved in a6 for rounding the result.  */
1064
        sll     a6, a10
1065
        srl     a10, a10
1066
 
1067
        /* Set the exponent to zero.  */
1068
        movi    a8, 0
1069
 
1070
        /* Pack any nonzero remainder (in a2) into a6.  */
1071
        beqz    a2, 1f
1072
        movi    a9, 1
1073
        or      a6, a6, a9
1074
 
1075
        /* Round a10 based on the bits shifted out into a6.  */
1076
1:      bgez    a6, .Ldiv_rounded
1077
        addi    a10, a10, 1
1078
        slli    a6, a6, 1
1079
        bnez    a6, .Ldiv_rounded
1080
        srli    a10, a10, 1
1081
        slli    a10, a10, 1
1082
        j       .Ldiv_rounded
1083
 
1084
.Ldiv_flush_to_zero:
1085
        /* Return zero with the appropriate sign bit.  */
1086
        srli    a2, a7, 31
1087
        slli    a2, a2, 31
1088
        leaf_return
1089
 
1090
#endif /* L_divsf3 */
1091
 
1092
#ifdef L_cmpsf2
1093
 
1094
        /* Equal and Not Equal */
1095
 
1096
        .align  4
1097
        .global __eqsf2
1098
        .global __nesf2
1099
        .set    __nesf2, __eqsf2
1100
        .type   __eqsf2, @function
1101
__eqsf2:
1102
        leaf_entry sp, 16
1103
        bne     a2, a3, 4f
1104
 
1105
        /* The values are equal but NaN != NaN.  Check the exponent.  */
1106
        movi    a6, 0x7f800000
1107
        ball    a2, a6, 3f
1108
 
1109
        /* Equal.  */
1110
        movi    a2, 0
1111
        leaf_return
1112
 
1113
        /* Not equal.  */
1114
2:      movi    a2, 1
1115
        leaf_return
1116
 
1117
        /* Check if the mantissas are nonzero.  */
1118
3:      slli    a7, a2, 9
1119
        j       5f
1120
 
1121
        /* Check if x and y are zero with different signs.  */
1122
4:      or      a7, a2, a3
1123
        slli    a7, a7, 1
1124
 
1125
        /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
1126
           or x when exponent(x) = 0x7f8 and x == y.  */
1127
5:      movi    a2, 0
1128
        movi    a3, 1
1129
        movnez  a2, a3, a7
1130
        leaf_return
1131
 
1132
 
1133
        /* Greater Than */
1134
 
1135
        .align  4
1136
        .global __gtsf2
1137
        .type   __gtsf2, @function
1138
__gtsf2:
1139
        leaf_entry sp, 16
1140
        movi    a6, 0x7f800000
1141
        ball    a2, a6, 2f
1142
1:      bnall   a3, a6, .Lle_cmp
1143
 
1144
        /* Check if y is a NaN.  */
1145
        slli    a7, a3, 9
1146
        beqz    a7, .Lle_cmp
1147
        movi    a2, 0
1148
        leaf_return
1149
 
1150
        /* Check if x is a NaN.  */
1151
2:      slli    a7, a2, 9
1152
        beqz    a7, 1b
1153
        movi    a2, 0
1154
        leaf_return
1155
 
1156
 
1157
        /* Less Than or Equal */
1158
 
1159
        .align  4
1160
        .global __lesf2
1161
        .type   __lesf2, @function
1162
__lesf2:
1163
        leaf_entry sp, 16
1164
        movi    a6, 0x7f800000
1165
        ball    a2, a6, 2f
1166
1:      bnall   a3, a6, .Lle_cmp
1167
 
1168
        /* Check if y is a NaN.  */
1169
        slli    a7, a3, 9
1170
        beqz    a7, .Lle_cmp
1171
        movi    a2, 1
1172
        leaf_return
1173
 
1174
        /* Check if x is a NaN.  */
1175
2:      slli    a7, a2, 9
1176
        beqz    a7, 1b
1177
        movi    a2, 1
1178
        leaf_return
1179
 
1180
.Lle_cmp:
1181
        /* Check if x and y have different signs.  */
1182
        xor     a7, a2, a3
1183
        bltz    a7, .Lle_diff_signs
1184
 
1185
        /* Check if x is negative.  */
1186
        bltz    a2, .Lle_xneg
1187
 
1188
        /* Check if x <= y.  */
1189
        bltu    a3, a2, 5f
1190
4:      movi    a2, 0
1191
        leaf_return
1192
 
1193
.Lle_xneg:
1194
        /* Check if y <= x.  */
1195
        bgeu    a2, a3, 4b
1196
5:      movi    a2, 1
1197
        leaf_return
1198
 
1199
.Lle_diff_signs:
1200
        bltz    a2, 4b
1201
 
1202
        /* Check if both x and y are zero.  */
1203
        or      a7, a2, a3
1204
        slli    a7, a7, 1
1205
        movi    a2, 1
1206
        movi    a3, 0
1207
        moveqz  a2, a3, a7
1208
        leaf_return
1209
 
1210
 
1211
        /* Greater Than or Equal */
1212
 
1213
        .align  4
1214
        .global __gesf2
1215
        .type   __gesf2, @function
1216
__gesf2:
1217
        leaf_entry sp, 16
1218
        movi    a6, 0x7f800000
1219
        ball    a2, a6, 2f
1220
1:      bnall   a3, a6, .Llt_cmp
1221
 
1222
        /* Check if y is a NaN.  */
1223
        slli    a7, a3, 9
1224
        beqz    a7, .Llt_cmp
1225
        movi    a2, -1
1226
        leaf_return
1227
 
1228
        /* Check if x is a NaN.  */
1229
2:      slli    a7, a2, 9
1230
        beqz    a7, 1b
1231
        movi    a2, -1
1232
        leaf_return
1233
 
1234
 
1235
        /* Less Than */
1236
 
1237
        .align  4
1238
        .global __ltsf2
1239
        .type   __ltsf2, @function
1240
__ltsf2:
1241
        leaf_entry sp, 16
1242
        movi    a6, 0x7f800000
1243
        ball    a2, a6, 2f
1244
1:      bnall   a3, a6, .Llt_cmp
1245
 
1246
        /* Check if y is a NaN.  */
1247
        slli    a7, a3, 9
1248
        beqz    a7, .Llt_cmp
1249
        movi    a2, 0
1250
        leaf_return
1251
 
1252
        /* Check if x is a NaN.  */
1253
2:      slli    a7, a2, 9
1254
        beqz    a7, 1b
1255
        movi    a2, 0
1256
        leaf_return
1257
 
1258
.Llt_cmp:
1259
        /* Check if x and y have different signs.  */
1260
        xor     a7, a2, a3
1261
        bltz    a7, .Llt_diff_signs
1262
 
1263
        /* Check if x is negative.  */
1264
        bltz    a2, .Llt_xneg
1265
 
1266
        /* Check if x < y.  */
1267
        bgeu    a2, a3, 5f
1268
4:      movi    a2, -1
1269
        leaf_return
1270
 
1271
.Llt_xneg:
1272
        /* Check if y < x.  */
1273
        bltu    a3, a2, 4b
1274
5:      movi    a2, 0
1275
        leaf_return
1276
 
1277
.Llt_diff_signs:
1278
        bgez    a2, 5b
1279
 
1280
        /* Check if both x and y are nonzero.  */
1281
        or      a7, a2, a3
1282
        slli    a7, a7, 1
1283
        movi    a2, 0
1284
        movi    a3, -1
1285
        movnez  a2, a3, a7
1286
        leaf_return
1287
 
1288
 
1289
        /* Unordered */
1290
 
1291
        .align  4
1292
        .global __unordsf2
1293
        .type   __unordsf2, @function
1294
__unordsf2:
1295
        leaf_entry sp, 16
1296
        movi    a6, 0x7f800000
1297
        ball    a2, a6, 3f
1298
1:      ball    a3, a6, 4f
1299
2:      movi    a2, 0
1300
        leaf_return
1301
 
1302
3:      slli    a7, a2, 9
1303
        beqz    a7, 1b
1304
        movi    a2, 1
1305
        leaf_return
1306
 
1307
4:      slli    a7, a3, 9
1308
        beqz    a7, 2b
1309
        movi    a2, 1
1310
        leaf_return
1311
 
1312
#endif /* L_cmpsf2 */
1313
 
1314
#ifdef L_fixsfsi
1315
 
1316
        .align  4
1317
        .global __fixsfsi
1318
        .type   __fixsfsi, @function
1319
__fixsfsi:
1320
        leaf_entry sp, 16
1321
 
1322
        /* Check for NaN and Infinity.  */
1323
        movi    a6, 0x7f800000
1324
        ball    a2, a6, .Lfixsfsi_nan_or_inf
1325
 
1326
        /* Extract the exponent and check if 0 < (exp - 0x7e) < 32.  */
1327
        extui   a4, a2, 23, 8
1328
        addi    a4, a4, -0x7e
1329
        bgei    a4, 32, .Lfixsfsi_maxint
1330
        blti    a4, 1, .Lfixsfsi_zero
1331
 
1332
        /* Add explicit "1.0" and shift << 8.  */
1333
        or      a7, a2, a6
1334
        slli    a5, a7, 8
1335
 
1336
        /* Shift back to the right, based on the exponent.  */
1337
        ssl     a4              /* shift by 32 - a4 */
1338
        srl     a5, a5
1339
 
1340
        /* Negate the result if sign != 0.  */
1341
        neg     a2, a5
1342
        movgez  a2, a5, a7
1343
        leaf_return
1344
 
1345
.Lfixsfsi_nan_or_inf:
1346
        /* Handle Infinity and NaN.  */
1347
        slli    a4, a2, 9
1348
        beqz    a4, .Lfixsfsi_maxint
1349
 
1350
        /* Translate NaN to +maxint.  */
1351
        movi    a2, 0
1352
 
1353
.Lfixsfsi_maxint:
1354
        slli    a4, a6, 8       /* 0x80000000 */
1355
        addi    a5, a4, -1      /* 0x7fffffff */
1356
        movgez  a4, a5, a2
1357
        mov     a2, a4
1358
        leaf_return
1359
 
1360
.Lfixsfsi_zero:
1361
        movi    a2, 0
1362
        leaf_return
1363
 
1364
#endif /* L_fixsfsi */
1365
 
1366
#ifdef L_fixsfdi
1367
 
1368
        .align  4
1369
        .global __fixsfdi
1370
        .type   __fixsfdi, @function
1371
__fixsfdi:
1372
        leaf_entry sp, 16
1373
 
1374
        /* Check for NaN and Infinity.  */
1375
        movi    a6, 0x7f800000
1376
        ball    a2, a6, .Lfixsfdi_nan_or_inf
1377
 
1378
        /* Extract the exponent and check if 0 < (exp - 0x7e) < 64.  */
1379
        extui   a4, a2, 23, 8
1380
        addi    a4, a4, -0x7e
1381
        bgei    a4, 64, .Lfixsfdi_maxint
1382
        blti    a4, 1, .Lfixsfdi_zero
1383
 
1384
        /* Add explicit "1.0" and shift << 8.  */
1385
        or      a7, a2, a6
1386
        slli    xh, a7, 8
1387
 
1388
        /* Shift back to the right, based on the exponent.  */
1389
        ssl     a4              /* shift by 64 - a4 */
1390
        bgei    a4, 32, .Lfixsfdi_smallshift
1391
        srl     xl, xh
1392
        movi    xh, 0
1393
 
1394
.Lfixsfdi_shifted:
1395
        /* Negate the result if sign != 0.  */
1396
        bgez    a7, 1f
1397
        neg     xl, xl
1398
        neg     xh, xh
1399
        beqz    xl, 1f
1400
        addi    xh, xh, -1
1401
1:      leaf_return
1402
 
1403
.Lfixsfdi_smallshift:
1404
        movi    xl, 0
1405
        sll     xl, xh
1406
        srl     xh, xh
1407
        j       .Lfixsfdi_shifted
1408
 
1409
.Lfixsfdi_nan_or_inf:
1410
        /* Handle Infinity and NaN.  */
1411
        slli    a4, a2, 9
1412
        beqz    a4, .Lfixsfdi_maxint
1413
 
1414
        /* Translate NaN to +maxint.  */
1415
        movi    a2, 0
1416
 
1417
.Lfixsfdi_maxint:
1418
        slli    a7, a6, 8       /* 0x80000000 */
1419
        bgez    a2, 1f
1420
        mov     xh, a7
1421
        movi    xl, 0
1422
        leaf_return
1423
 
1424
1:      addi    xh, a7, -1      /* 0x7fffffff */
1425
        movi    xl, -1
1426
        leaf_return
1427
 
1428
.Lfixsfdi_zero:
1429
        movi    xh, 0
1430
        movi    xl, 0
1431
        leaf_return
1432
 
1433
#endif /* L_fixsfdi */
1434
 
1435
#ifdef L_fixunssfsi
1436
 
1437
        .align  4
1438
        .global __fixunssfsi
1439
        .type   __fixunssfsi, @function
1440
__fixunssfsi:
1441
        leaf_entry sp, 16
1442
 
1443
        /* Check for NaN and Infinity.  */
1444
        movi    a6, 0x7f800000
1445
        ball    a2, a6, .Lfixunssfsi_nan_or_inf
1446
 
1447
        /* Extract the exponent and check if 0 <= (exp - 0x7f) < 32.  */
1448
        extui   a4, a2, 23, 8
1449
        addi    a4, a4, -0x7f
1450
        bgei    a4, 32, .Lfixunssfsi_maxint
1451
        bltz    a4, .Lfixunssfsi_zero
1452
 
1453
        /* Add explicit "1.0" and shift << 8.  */
1454
        or      a7, a2, a6
1455
        slli    a5, a7, 8
1456
 
1457
        /* Shift back to the right, based on the exponent.  */
1458
        addi    a4, a4, 1
1459
        beqi    a4, 32, .Lfixunssfsi_bigexp
1460
        ssl     a4              /* shift by 32 - a4 */
1461
        srl     a5, a5
1462
 
1463
        /* Negate the result if sign != 0.  */
1464
        neg     a2, a5
1465
        movgez  a2, a5, a7
1466
        leaf_return
1467
 
1468
.Lfixunssfsi_nan_or_inf:
1469
        /* Handle Infinity and NaN.  */
1470
        slli    a4, a2, 9
1471
        beqz    a4, .Lfixunssfsi_maxint
1472
 
1473
        /* Translate NaN to 0xffffffff.  */
1474
        movi    a2, -1
1475
        leaf_return
1476
 
1477
.Lfixunssfsi_maxint:
1478
        slli    a4, a6, 8       /* 0x80000000 */
1479
        movi    a5, -1          /* 0xffffffff */
1480
        movgez  a4, a5, a2
1481
        mov     a2, a4
1482
        leaf_return
1483
 
1484
.Lfixunssfsi_zero:
1485
        movi    a2, 0
1486
        leaf_return
1487
 
1488
.Lfixunssfsi_bigexp:
1489
        /* Handle unsigned maximum exponent case.  */
1490
        bltz    a2, 1f
1491
        mov     a2, a5          /* no shift needed */
1492
        leaf_return
1493
 
1494
        /* Return 0x80000000 if negative.  */
1495
1:      slli    a2, a6, 8
1496
        leaf_return
1497
 
1498
#endif /* L_fixunssfsi */
1499
 
1500
#ifdef L_fixunssfdi
1501
 
1502
        .align  4
1503
        .global __fixunssfdi
1504
        .type   __fixunssfdi, @function
1505
__fixunssfdi:
1506
        leaf_entry sp, 16
1507
 
1508
        /* Check for NaN and Infinity.  */
1509
        movi    a6, 0x7f800000
1510
        ball    a2, a6, .Lfixunssfdi_nan_or_inf
1511
 
1512
        /* Extract the exponent and check if 0 <= (exp - 0x7f) < 64.  */
1513
        extui   a4, a2, 23, 8
1514
        addi    a4, a4, -0x7f
1515
        bgei    a4, 64, .Lfixunssfdi_maxint
1516
        bltz    a4, .Lfixunssfdi_zero
1517
 
1518
        /* Add explicit "1.0" and shift << 8.  */
1519
        or      a7, a2, a6
1520
        slli    xh, a7, 8
1521
 
1522
        /* Shift back to the right, based on the exponent.  */
1523
        addi    a4, a4, 1
1524
        beqi    a4, 64, .Lfixunssfdi_bigexp
1525
        ssl     a4              /* shift by 64 - a4 */
1526
        bgei    a4, 32, .Lfixunssfdi_smallshift
1527
        srl     xl, xh
1528
        movi    xh, 0
1529
 
1530
.Lfixunssfdi_shifted:
1531
        /* Negate the result if sign != 0.  */
1532
        bgez    a7, 1f
1533
        neg     xl, xl
1534
        neg     xh, xh
1535
        beqz    xl, 1f
1536
        addi    xh, xh, -1
1537
1:      leaf_return
1538
 
1539
.Lfixunssfdi_smallshift:
1540
        movi    xl, 0
1541
        src     xl, xh, xl
1542
        srl     xh, xh
1543
        j       .Lfixunssfdi_shifted
1544
 
1545
.Lfixunssfdi_nan_or_inf:
1546
        /* Handle Infinity and NaN.  */
1547
        slli    a4, a2, 9
1548
        beqz    a4, .Lfixunssfdi_maxint
1549
 
1550
        /* Translate NaN to 0xffffffff.... */
1551
1:      movi    xh, -1
1552
        movi    xl, -1
1553
        leaf_return
1554
 
1555
.Lfixunssfdi_maxint:
1556
        bgez    a2, 1b
1557
2:      slli    xh, a6, 8       /* 0x80000000 */
1558
        movi    xl, 0
1559
        leaf_return
1560
 
1561
.Lfixunssfdi_zero:
1562
        movi    xh, 0
1563
        movi    xl, 0
1564
        leaf_return
1565
 
1566
.Lfixunssfdi_bigexp:
1567
        /* Handle unsigned maximum exponent case.  */
1568
        bltz    a7, 2b
1569
        movi    xl, 0
1570
        leaf_return             /* no shift needed */
1571
 
1572
#endif /* L_fixunssfdi */
1573
 
1574
#ifdef L_floatsisf
1575
 
1576
        .align  4
1577
        .global __floatunsisf
1578
        .type   __floatunsisf, @function
1579
__floatunsisf:
1580
        leaf_entry sp, 16
1581
        beqz    a2, .Lfloatsisf_return
1582
 
1583
        /* Set the sign to zero and jump to the floatsisf code.  */
1584
        movi    a7, 0
1585
        j       .Lfloatsisf_normalize
1586
 
1587
        .align  4
1588
        .global __floatsisf
1589
        .type   __floatsisf, @function
1590
__floatsisf:
1591
        leaf_entry sp, 16
1592
 
1593
        /* Check for zero.  */
1594
        beqz    a2, .Lfloatsisf_return
1595
 
1596
        /* Save the sign.  */
1597
        extui   a7, a2, 31, 1
1598
 
1599
        /* Get the absolute value.  */
1600
#if XCHAL_HAVE_ABS
1601
        abs     a2, a2
1602
#else
1603
        neg     a4, a2
1604
        movltz  a2, a4, a2
1605
#endif
1606
 
1607
.Lfloatsisf_normalize:
1608
        /* Normalize with the first 1 bit in the msb.  */
1609
        do_nsau a4, a2, a5, a6
1610
        ssl     a4
1611
        sll     a5, a2
1612
 
1613
        /* Shift the mantissa into position, with rounding bits in a6.  */
1614
        srli    a2, a5, 8
1615
        slli    a6, a5, (32 - 8)
1616
 
1617
        /* Set the exponent.  */
1618
        movi    a5, 0x9d        /* 0x7e + 31 */
1619
        sub     a5, a5, a4
1620
        slli    a5, a5, 23
1621
        add     a2, a2, a5
1622
 
1623
        /* Add the sign.  */
1624
        slli    a7, a7, 31
1625
        or      a2, a2, a7
1626
 
1627
        /* Round up if the leftover fraction is >= 1/2.  */
1628
        bgez    a6, .Lfloatsisf_return
1629
        addi    a2, a2, 1       /* Overflow to the exponent is OK.  */
1630
 
1631
        /* Check if the leftover fraction is exactly 1/2.  */
1632
        slli    a6, a6, 1
1633
        beqz    a6, .Lfloatsisf_exactlyhalf
1634
 
1635
.Lfloatsisf_return:
1636
        leaf_return
1637
 
1638
.Lfloatsisf_exactlyhalf:
1639
        /* Round down to the nearest even value.  */
1640
        srli    a2, a2, 1
1641
        slli    a2, a2, 1
1642
        leaf_return
1643
 
1644
#endif /* L_floatsisf */
1645
 
1646
#ifdef L_floatdisf
1647
 
1648
        .align  4
1649
        .global __floatundisf
1650
        .type   __floatundisf, @function
1651
__floatundisf:
1652
        leaf_entry sp, 16
1653
 
1654
        /* Check for zero.  */
1655
        or      a4, xh, xl
1656
        beqz    a4, 2f
1657
 
1658
        /* Set the sign to zero and jump to the floatdisf code.  */
1659
        movi    a7, 0
1660
        j       .Lfloatdisf_normalize
1661
 
1662
        .align  4
1663
        .global __floatdisf
1664
        .type   __floatdisf, @function
1665
__floatdisf:
1666
        leaf_entry sp, 16
1667
 
1668
        /* Check for zero.  */
1669
        or      a4, xh, xl
1670
        beqz    a4, 2f
1671
 
1672
        /* Save the sign.  */
1673
        extui   a7, xh, 31, 1
1674
 
1675
        /* Get the absolute value.  */
1676
        bgez    xh, .Lfloatdisf_normalize
1677
        neg     xl, xl
1678
        neg     xh, xh
1679
        beqz    xl, .Lfloatdisf_normalize
1680
        addi    xh, xh, -1
1681
 
1682
.Lfloatdisf_normalize:
1683
        /* Normalize with the first 1 bit in the msb of xh.  */
1684
        beqz    xh, .Lfloatdisf_bigshift
1685
        do_nsau a4, xh, a5, a6
1686
        ssl     a4
1687
        src     xh, xh, xl
1688
        sll     xl, xl
1689
 
1690
.Lfloatdisf_shifted:
1691
        /* Shift the mantissa into position, with rounding bits in a6.  */
1692
        ssai    8
1693
        sll     a5, xl
1694
        src     a6, xh, xl
1695
        srl     xh, xh
1696
        beqz    a5, 1f
1697
        movi    a5, 1
1698
        or      a6, a6, a5
1699
1:
1700
        /* Set the exponent.  */
1701
        movi    a5, 0xbd        /* 0x7e + 63 */
1702
        sub     a5, a5, a4
1703
        slli    a5, a5, 23
1704
        add     a2, xh, a5
1705
 
1706
        /* Add the sign.  */
1707
        slli    a7, a7, 31
1708
        or      a2, a2, a7
1709
 
1710
        /* Round up if the leftover fraction is >= 1/2.  */
1711
        bgez    a6, 2f
1712
        addi    a2, a2, 1       /* Overflow to the exponent is OK.  */
1713
 
1714
        /* Check if the leftover fraction is exactly 1/2.  */
1715
        slli    a6, a6, 1
1716
        beqz    a6, .Lfloatdisf_exactlyhalf
1717
2:      leaf_return
1718
 
1719
.Lfloatdisf_bigshift:
1720
        /* xh is zero.  Normalize with first 1 bit of xl in the msb of xh.  */
1721
        do_nsau a4, xl, a5, a6
1722
        ssl     a4
1723
        sll     xh, xl
1724
        movi    xl, 0
1725
        addi    a4, a4, 32
1726
        j       .Lfloatdisf_shifted
1727
 
1728
.Lfloatdisf_exactlyhalf:
1729
        /* Round down to the nearest even value.  */
1730
        srli    a2, a2, 1
1731
        slli    a2, a2, 1
1732
        leaf_return
1733
 
1734
#endif /* L_floatdisf */

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.