OpenCores
URL https://opencores.org/ocsvn/openrisc_me/openrisc_me/trunk

Subversion Repositories openrisc_me

[/] [openrisc/] [trunk/] [gnu-src/] [gcc-4.5.1/] [gcc/] [config/] [xtensa/] [ieee754-sf.S] - Blame information for rev 282

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 282 jeremybenn
/* IEEE-754 single-precision functions for Xtensa
2
   Copyright (C) 2006, 2007, 2009 Free Software Foundation, Inc.
3
   Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
4
 
5
   This file is part of GCC.
6
 
7
   GCC is free software; you can redistribute it and/or modify it
8
   under the terms of the GNU General Public License as published by
9
   the Free Software Foundation; either version 3, or (at your option)
10
   any later version.
11
 
12
   GCC is distributed in the hope that it will be useful, but WITHOUT
13
   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14
   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
15
   License for more details.
16
 
17
   Under Section 7 of GPL version 3, you are granted additional
18
   permissions described in the GCC Runtime Library Exception, version
19
   3.1, as published by the Free Software Foundation.
20
 
21
   You should have received a copy of the GNU General Public License and
22
   a copy of the GCC Runtime Library Exception along with this program;
23
   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24
   .  */
25
 
26
#ifdef __XTENSA_EB__
27
#define xh a2
28
#define xl a3
29
#define yh a4
30
#define yl a5
31
#else
32
#define xh a3
33
#define xl a2
34
#define yh a5
35
#define yl a4
36
#endif
37
 
38
/*  Warning!  The branch displacements for some Xtensa branch instructions
39
    are quite small, and this code has been carefully laid out to keep
40
    branch targets in range.  If you change anything, be sure to check that
41
    the assembler is not relaxing anything to branch over a jump.  */
42
 
43
#ifdef L_negsf2
44
 
45
        .align  4
46
        .global __negsf2
47
        .type   __negsf2, @function
48
__negsf2:
49
        leaf_entry sp, 16
50
        movi    a4, 0x80000000
51
        xor     a2, a2, a4
52
        leaf_return
53
 
54
#endif /* L_negsf2 */
55
 
56
#ifdef L_addsubsf3
57
 
58
        /* Addition */
59
__addsf3_aux:
60
 
61
        /* Handle NaNs and Infinities.  (This code is placed before the
62
           start of the function just to keep it in range of the limited
63
           branch displacements.)  */
64
 
65
.Ladd_xnan_or_inf:
66
        /* If y is neither Infinity nor NaN, return x.  */
67
        bnall   a3, a6, 1f
68
        /* If x is a NaN, return it.  Otherwise, return y.  */
69
        slli    a7, a2, 9
70
        beqz    a7, .Ladd_ynan_or_inf
71
1:      leaf_return
72
 
73
.Ladd_ynan_or_inf:
74
        /* Return y.  */
75
        mov     a2, a3
76
        leaf_return
77
 
78
.Ladd_opposite_signs:
79
        /* Operand signs differ.  Do a subtraction.  */
80
        slli    a7, a6, 8
81
        xor     a3, a3, a7
82
        j       .Lsub_same_sign
83
 
84
        .align  4
85
        .global __addsf3
86
        .type   __addsf3, @function
87
__addsf3:
88
        leaf_entry sp, 16
89
        movi    a6, 0x7f800000
90
 
91
        /* Check if the two operands have the same sign.  */
92
        xor     a7, a2, a3
93
        bltz    a7, .Ladd_opposite_signs
94
 
95
.Ladd_same_sign:
96
        /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity).  */
97
        ball    a2, a6, .Ladd_xnan_or_inf
98
        ball    a3, a6, .Ladd_ynan_or_inf
99
 
100
        /* Compare the exponents.  The smaller operand will be shifted
101
           right by the exponent difference and added to the larger
102
           one.  */
103
        extui   a7, a2, 23, 9
104
        extui   a8, a3, 23, 9
105
        bltu    a7, a8, .Ladd_shiftx
106
 
107
.Ladd_shifty:
108
        /* Check if the smaller (or equal) exponent is zero.  */
109
        bnone   a3, a6, .Ladd_yexpzero
110
 
111
        /* Replace y sign/exponent with 0x008.  */
112
        or      a3, a3, a6
113
        slli    a3, a3, 8
114
        srli    a3, a3, 8
115
 
116
.Ladd_yexpdiff:
117
        /* Compute the exponent difference.  */
118
        sub     a10, a7, a8
119
 
120
        /* Exponent difference > 32 -- just return the bigger value.  */
121
        bgeui   a10, 32, 1f
122
 
123
        /* Shift y right by the exponent difference.  Any bits that are
124
           shifted out of y are saved in a9 for rounding the result.  */
125
        ssr     a10
126
        movi    a9, 0
127
        src     a9, a3, a9
128
        srl     a3, a3
129
 
130
        /* Do the addition.  */
131
        add     a2, a2, a3
132
 
133
        /* Check if the add overflowed into the exponent.  */
134
        extui   a10, a2, 23, 9
135
        beq     a10, a7, .Ladd_round
136
        mov     a8, a7
137
        j       .Ladd_carry
138
 
139
.Ladd_yexpzero:
140
        /* y is a subnormal value.  Replace its sign/exponent with zero,
141
           i.e., no implicit "1.0", and increment the apparent exponent
142
           because subnormals behave as if they had the minimum (nonzero)
143
           exponent.  Test for the case when both exponents are zero.  */
144
        slli    a3, a3, 9
145
        srli    a3, a3, 9
146
        bnone   a2, a6, .Ladd_bothexpzero
147
        addi    a8, a8, 1
148
        j       .Ladd_yexpdiff
149
 
150
.Ladd_bothexpzero:
151
        /* Both exponents are zero.  Handle this as a special case.  There
152
           is no need to shift or round, and the normal code for handling
153
           a carry into the exponent field will not work because it
154
           assumes there is an implicit "1.0" that needs to be added.  */
155
        add     a2, a2, a3
156
1:      leaf_return
157
 
158
.Ladd_xexpzero:
159
        /* Same as "yexpzero" except skip handling the case when both
160
           exponents are zero.  */
161
        slli    a2, a2, 9
162
        srli    a2, a2, 9
163
        addi    a7, a7, 1
164
        j       .Ladd_xexpdiff
165
 
166
.Ladd_shiftx:
167
        /* Same thing as the "shifty" code, but with x and y swapped.  Also,
168
           because the exponent difference is always nonzero in this version,
169
           the shift sequence can use SLL and skip loading a constant zero.  */
170
        bnone   a2, a6, .Ladd_xexpzero
171
 
172
        or      a2, a2, a6
173
        slli    a2, a2, 8
174
        srli    a2, a2, 8
175
 
176
.Ladd_xexpdiff:
177
        sub     a10, a8, a7
178
        bgeui   a10, 32, .Ladd_returny
179
 
180
        ssr     a10
181
        sll     a9, a2
182
        srl     a2, a2
183
 
184
        add     a2, a2, a3
185
 
186
        /* Check if the add overflowed into the exponent.  */
187
        extui   a10, a2, 23, 9
188
        bne     a10, a8, .Ladd_carry
189
 
190
.Ladd_round:
191
        /* Round up if the leftover fraction is >= 1/2.  */
192
        bgez    a9, 1f
193
        addi    a2, a2, 1
194
 
195
        /* Check if the leftover fraction is exactly 1/2.  */
196
        slli    a9, a9, 1
197
        beqz    a9, .Ladd_exactlyhalf
198
1:      leaf_return
199
 
200
.Ladd_returny:
201
        mov     a2, a3
202
        leaf_return
203
 
204
.Ladd_carry:
205
        /* The addition has overflowed into the exponent field, so the
206
           value needs to be renormalized.  The mantissa of the result
207
           can be recovered by subtracting the original exponent and
208
           adding 0x800000 (which is the explicit "1.0" for the
209
           mantissa of the non-shifted operand -- the "1.0" for the
210
           shifted operand was already added).  The mantissa can then
211
           be shifted right by one bit.  The explicit "1.0" of the
212
           shifted mantissa then needs to be replaced by the exponent,
213
           incremented by one to account for the normalizing shift.
214
           It is faster to combine these operations: do the shift first
215
           and combine the additions and subtractions.  If x is the
216
           original exponent, the result is:
217
               shifted mantissa - (x << 22) + (1 << 22) + (x << 23)
218
           or:
219
               shifted mantissa + ((x + 1) << 22)
220
           Note that the exponent is incremented here by leaving the
221
           explicit "1.0" of the mantissa in the exponent field.  */
222
 
223
        /* Shift x right by one bit.  Save the lsb.  */
224
        mov     a10, a2
225
        srli    a2, a2, 1
226
 
227
        /* See explanation above.  The original exponent is in a8.  */
228
        addi    a8, a8, 1
229
        slli    a8, a8, 22
230
        add     a2, a2, a8
231
 
232
        /* Return an Infinity if the exponent overflowed.  */
233
        ball    a2, a6, .Ladd_infinity
234
 
235
        /* Same thing as the "round" code except the msb of the leftover
236
           fraction is bit 0 of a10, with the rest of the fraction in a9.  */
237
        bbci.l  a10, 0, 1f
238
        addi    a2, a2, 1
239
        beqz    a9, .Ladd_exactlyhalf
240
1:      leaf_return
241
 
242
.Ladd_infinity:
243
        /* Clear the mantissa.  */
244
        srli    a2, a2, 23
245
        slli    a2, a2, 23
246
 
247
        /* The sign bit may have been lost in a carry-out.  Put it back.  */
248
        slli    a8, a8, 1
249
        or      a2, a2, a8
250
        leaf_return
251
 
252
.Ladd_exactlyhalf:
253
        /* Round down to the nearest even value.  */
254
        srli    a2, a2, 1
255
        slli    a2, a2, 1
256
        leaf_return
257
 
258
 
259
        /* Subtraction */
260
__subsf3_aux:
261
 
262
        /* Handle NaNs and Infinities.  (This code is placed before the
263
           start of the function just to keep it in range of the limited
264
           branch displacements.)  */
265
 
266
.Lsub_xnan_or_inf:
267
        /* If y is neither Infinity nor NaN, return x.  */
268
        bnall   a3, a6, 1f
269
        /* Both x and y are either NaN or Inf, so the result is NaN.  */
270
        movi    a4, 0x400000    /* make it a quiet NaN */
271
        or      a2, a2, a4
272
1:      leaf_return
273
 
274
.Lsub_ynan_or_inf:
275
        /* Negate y and return it.  */
276
        slli    a7, a6, 8
277
        xor     a2, a3, a7
278
        leaf_return
279
 
280
.Lsub_opposite_signs:
281
        /* Operand signs differ.  Do an addition.  */
282
        slli    a7, a6, 8
283
        xor     a3, a3, a7
284
        j       .Ladd_same_sign
285
 
286
        .align  4
287
        .global __subsf3
288
        .type   __subsf3, @function
289
__subsf3:
290
        leaf_entry sp, 16
291
        movi    a6, 0x7f800000
292
 
293
        /* Check if the two operands have the same sign.  */
294
        xor     a7, a2, a3
295
        bltz    a7, .Lsub_opposite_signs
296
 
297
.Lsub_same_sign:
298
        /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity).  */
299
        ball    a2, a6, .Lsub_xnan_or_inf
300
        ball    a3, a6, .Lsub_ynan_or_inf
301
 
302
        /* Compare the operands.  In contrast to addition, the entire
303
           value matters here.  */
304
        extui   a7, a2, 23, 8
305
        extui   a8, a3, 23, 8
306
        bltu    a2, a3, .Lsub_xsmaller
307
 
308
.Lsub_ysmaller:
309
        /* Check if the smaller (or equal) exponent is zero.  */
310
        bnone   a3, a6, .Lsub_yexpzero
311
 
312
        /* Replace y sign/exponent with 0x008.  */
313
        or      a3, a3, a6
314
        slli    a3, a3, 8
315
        srli    a3, a3, 8
316
 
317
.Lsub_yexpdiff:
318
        /* Compute the exponent difference.  */
319
        sub     a10, a7, a8
320
 
321
        /* Exponent difference > 32 -- just return the bigger value.  */
322
        bgeui   a10, 32, 1f
323
 
324
        /* Shift y right by the exponent difference.  Any bits that are
325
           shifted out of y are saved in a9 for rounding the result.  */
326
        ssr     a10
327
        movi    a9, 0
328
        src     a9, a3, a9
329
        srl     a3, a3
330
 
331
        sub     a2, a2, a3
332
 
333
        /* Subtract the leftover bits in a9 from zero and propagate any
334
           borrow from a2.  */
335
        neg     a9, a9
336
        addi    a10, a2, -1
337
        movnez  a2, a10, a9
338
 
339
        /* Check if the subtract underflowed into the exponent.  */
340
        extui   a10, a2, 23, 8
341
        beq     a10, a7, .Lsub_round
342
        j       .Lsub_borrow
343
 
344
.Lsub_yexpzero:
345
        /* Return zero if the inputs are equal.  (For the non-subnormal
346
           case, subtracting the "1.0" will cause a borrow from the exponent
347
           and this case can be detected when handling the borrow.)  */
348
        beq     a2, a3, .Lsub_return_zero
349
 
350
        /* y is a subnormal value.  Replace its sign/exponent with zero,
351
           i.e., no implicit "1.0".  Unless x is also a subnormal, increment
352
           y's apparent exponent because subnormals behave as if they had
353
           the minimum (nonzero) exponent.  */
354
        slli    a3, a3, 9
355
        srli    a3, a3, 9
356
        bnone   a2, a6, .Lsub_yexpdiff
357
        addi    a8, a8, 1
358
        j       .Lsub_yexpdiff
359
 
360
.Lsub_returny:
361
        /* Negate and return y.  */
362
        slli    a7, a6, 8
363
        xor     a2, a3, a7
364
1:      leaf_return
365
 
366
.Lsub_xsmaller:
367
        /* Same thing as the "ysmaller" code, but with x and y swapped and
368
           with y negated.  */
369
        bnone   a2, a6, .Lsub_xexpzero
370
 
371
        or      a2, a2, a6
372
        slli    a2, a2, 8
373
        srli    a2, a2, 8
374
 
375
.Lsub_xexpdiff:
376
        sub     a10, a8, a7
377
        bgeui   a10, 32, .Lsub_returny
378
 
379
        ssr     a10
380
        movi    a9, 0
381
        src     a9, a2, a9
382
        srl     a2, a2
383
 
384
        /* Negate y.  */
385
        slli    a11, a6, 8
386
        xor     a3, a3, a11
387
 
388
        sub     a2, a3, a2
389
 
390
        neg     a9, a9
391
        addi    a10, a2, -1
392
        movnez  a2, a10, a9
393
 
394
        /* Check if the subtract underflowed into the exponent.  */
395
        extui   a10, a2, 23, 8
396
        bne     a10, a8, .Lsub_borrow
397
 
398
.Lsub_round:
399
        /* Round up if the leftover fraction is >= 1/2.  */
400
        bgez    a9, 1f
401
        addi    a2, a2, 1
402
 
403
        /* Check if the leftover fraction is exactly 1/2.  */
404
        slli    a9, a9, 1
405
        beqz    a9, .Lsub_exactlyhalf
406
1:      leaf_return
407
 
408
.Lsub_xexpzero:
409
        /* Same as "yexpzero".  */
410
        beq     a2, a3, .Lsub_return_zero
411
        slli    a2, a2, 9
412
        srli    a2, a2, 9
413
        bnone   a3, a6, .Lsub_xexpdiff
414
        addi    a7, a7, 1
415
        j       .Lsub_xexpdiff
416
 
417
.Lsub_return_zero:
418
        movi    a2, 0
419
        leaf_return
420
 
421
.Lsub_borrow:
422
        /* The subtraction has underflowed into the exponent field, so the
423
           value needs to be renormalized.  Shift the mantissa left as
424
           needed to remove any leading zeros and adjust the exponent
425
           accordingly.  If the exponent is not large enough to remove
426
           all the leading zeros, the result will be a subnormal value.  */
427
 
428
        slli    a8, a2, 9
429
        beqz    a8, .Lsub_xzero
430
        do_nsau a6, a8, a7, a11
431
        srli    a8, a8, 9
432
        bge     a6, a10, .Lsub_subnormal
433
        addi    a6, a6, 1
434
 
435
.Lsub_normalize_shift:
436
        /* Shift the mantissa (a8/a9) left by a6.  */
437
        ssl     a6
438
        src     a8, a8, a9
439
        sll     a9, a9
440
 
441
        /* Combine the shifted mantissa with the sign and exponent,
442
           decrementing the exponent by a6.  (The exponent has already
443
           been decremented by one due to the borrow from the subtraction,
444
           but adding the mantissa will increment the exponent by one.)  */
445
        srli    a2, a2, 23
446
        sub     a2, a2, a6
447
        slli    a2, a2, 23
448
        add     a2, a2, a8
449
        j       .Lsub_round
450
 
451
.Lsub_exactlyhalf:
452
        /* Round down to the nearest even value.  */
453
        srli    a2, a2, 1
454
        slli    a2, a2, 1
455
        leaf_return
456
 
457
.Lsub_xzero:
458
        /* If there was a borrow from the exponent, and the mantissa and
459
           guard digits are all zero, then the inputs were equal and the
460
           result should be zero.  */
461
        beqz    a9, .Lsub_return_zero
462
 
463
        /* Only the guard digit is nonzero.  Shift by min(24, a10).  */
464
        addi    a11, a10, -24
465
        movi    a6, 24
466
        movltz  a6, a10, a11
467
        j       .Lsub_normalize_shift
468
 
469
.Lsub_subnormal:
470
        /* The exponent is too small to shift away all the leading zeros.
471
           Set a6 to the current exponent (which has already been
472
           decremented by the borrow) so that the exponent of the result
473
           will be zero.  Do not add 1 to a6 in this case, because: (1)
474
           adding the mantissa will not increment the exponent, so there is
475
           no need to subtract anything extra from the exponent to
476
           compensate, and (2) the effective exponent of a subnormal is 1
477
           not 0 so the shift amount must be 1 smaller than normal. */
478
        mov     a6, a10
479
        j       .Lsub_normalize_shift
480
 
481
#endif /* L_addsubsf3 */
482
 
483
#ifdef L_mulsf3
484
 
485
        /* Multiplication */
486
#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
487
#define XCHAL_NO_MUL 1
488
#endif
489
 
490
__mulsf3_aux:
491
 
492
        /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
493
           (This code is placed before the start of the function just to
494
           keep it in range of the limited branch displacements.)  */
495
 
496
.Lmul_xexpzero:
497
        /* Clear the sign bit of x.  */
498
        slli    a2, a2, 1
499
        srli    a2, a2, 1
500
 
501
        /* If x is zero, return zero.  */
502
        beqz    a2, .Lmul_return_zero
503
 
504
        /* Normalize x.  Adjust the exponent in a8.  */
505
        do_nsau a10, a2, a11, a12
506
        addi    a10, a10, -8
507
        ssl     a10
508
        sll     a2, a2
509
        movi    a8, 1
510
        sub     a8, a8, a10
511
        j       .Lmul_xnormalized
512
 
513
.Lmul_yexpzero:
514
        /* Clear the sign bit of y.  */
515
        slli    a3, a3, 1
516
        srli    a3, a3, 1
517
 
518
        /* If y is zero, return zero.  */
519
        beqz    a3, .Lmul_return_zero
520
 
521
        /* Normalize y.  Adjust the exponent in a9.  */
522
        do_nsau a10, a3, a11, a12
523
        addi    a10, a10, -8
524
        ssl     a10
525
        sll     a3, a3
526
        movi    a9, 1
527
        sub     a9, a9, a10
528
        j       .Lmul_ynormalized
529
 
530
.Lmul_return_zero:
531
        /* Return zero with the appropriate sign bit.  */
532
        srli    a2, a7, 31
533
        slli    a2, a2, 31
534
        j       .Lmul_done
535
 
536
.Lmul_xnan_or_inf:
537
        /* If y is zero, return NaN.  */
538
        slli    a8, a3, 1
539
        bnez    a8, 1f
540
        movi    a4, 0x400000    /* make it a quiet NaN */
541
        or      a2, a2, a4
542
        j       .Lmul_done
543
1:
544
        /* If y is NaN, return y.  */
545
        bnall   a3, a6, .Lmul_returnx
546
        slli    a8, a3, 9
547
        beqz    a8, .Lmul_returnx
548
 
549
.Lmul_returny:
550
        mov     a2, a3
551
 
552
.Lmul_returnx:
553
        /* Set the sign bit and return.  */
554
        extui   a7, a7, 31, 1
555
        slli    a2, a2, 1
556
        ssai    1
557
        src     a2, a7, a2
558
        j       .Lmul_done
559
 
560
.Lmul_ynan_or_inf:
561
        /* If x is zero, return NaN.  */
562
        slli    a8, a2, 1
563
        bnez    a8, .Lmul_returny
564
        movi    a7, 0x400000    /* make it a quiet NaN */
565
        or      a2, a3, a7
566
        j       .Lmul_done
567
 
568
        .align  4
569
        .global __mulsf3
570
        .type   __mulsf3, @function
571
__mulsf3:
572
#if __XTENSA_CALL0_ABI__
573
        leaf_entry sp, 32
574
        addi    sp, sp, -32
575
        s32i    a12, sp, 16
576
        s32i    a13, sp, 20
577
        s32i    a14, sp, 24
578
        s32i    a15, sp, 28
579
#elif XCHAL_NO_MUL
580
        /* This is not really a leaf function; allocate enough stack space
581
           to allow CALL12s to a helper function.  */
582
        leaf_entry sp, 64
583
#else
584
        leaf_entry sp, 32
585
#endif
586
        movi    a6, 0x7f800000
587
 
588
        /* Get the sign of the result.  */
589
        xor     a7, a2, a3
590
 
591
        /* Check for NaN and infinity.  */
592
        ball    a2, a6, .Lmul_xnan_or_inf
593
        ball    a3, a6, .Lmul_ynan_or_inf
594
 
595
        /* Extract the exponents.  */
596
        extui   a8, a2, 23, 8
597
        extui   a9, a3, 23, 8
598
 
599
        beqz    a8, .Lmul_xexpzero
600
.Lmul_xnormalized:
601
        beqz    a9, .Lmul_yexpzero
602
.Lmul_ynormalized:
603
 
604
        /* Add the exponents.  */
605
        add     a8, a8, a9
606
 
607
        /* Replace sign/exponent fields with explicit "1.0".  */
608
        movi    a10, 0xffffff
609
        or      a2, a2, a6
610
        and     a2, a2, a10
611
        or      a3, a3, a6
612
        and     a3, a3, a10
613
 
614
        /* Multiply 32x32 to 64 bits.  The result ends up in a2/a6.  */
615
 
616
#if XCHAL_HAVE_MUL32_HIGH
617
 
618
        mull    a6, a2, a3
619
        muluh   a2, a2, a3
620
 
621
#else
622
 
623
        /* Break the inputs into 16-bit chunks and compute 4 32-bit partial
624
           products.  These partial products are:
625
 
626
 
627
 
628
                1 xl * yh
629
                2 xh * yl
630
 
631
                3 xh * yh
632
 
633
           If using the Mul16 or Mul32 multiplier options, these input
634
           chunks must be stored in separate registers.  For Mac16, the
635
           UMUL.AA.* opcodes can specify that the inputs come from either
636
           half of the registers, so there is no need to shift them out
637
           ahead of time.  If there is no multiply hardware, the 16-bit
638
           chunks can be extracted when setting up the arguments to the
639
           separate multiply function.  */
640
 
641
#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
642
        /* Calling a separate multiply function will clobber a0 and requires
643
           use of a8 as a temporary, so save those values now.  (The function
644
           uses a custom ABI so nothing else needs to be saved.)  */
645
        s32i    a0, sp, 0
646
        s32i    a8, sp, 4
647
#endif
648
 
649
#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
650
 
651
#define a2h a4
652
#define a3h a5
653
 
654
        /* Get the high halves of the inputs into registers.  */
655
        srli    a2h, a2, 16
656
        srli    a3h, a3, 16
657
 
658
#define a2l a2
659
#define a3l a3
660
 
661
#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
662
        /* Clear the high halves of the inputs.  This does not matter
663
           for MUL16 because the high bits are ignored.  */
664
        extui   a2, a2, 0, 16
665
        extui   a3, a3, 0, 16
666
#endif
667
#endif /* MUL16 || MUL32 */
668
 
669
 
670
#if XCHAL_HAVE_MUL16
671
 
672
#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
673
        mul16u  dst, xreg ## xhalf, yreg ## yhalf
674
 
675
#elif XCHAL_HAVE_MUL32
676
 
677
#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
678
        mull    dst, xreg ## xhalf, yreg ## yhalf
679
 
680
#elif XCHAL_HAVE_MAC16
681
 
682
/* The preprocessor insists on inserting a space when concatenating after
683
   a period in the definition of do_mul below.  These macros are a workaround
684
   using underscores instead of periods when doing the concatenation.  */
685
#define umul_aa_ll umul.aa.ll
686
#define umul_aa_lh umul.aa.lh
687
#define umul_aa_hl umul.aa.hl
688
#define umul_aa_hh umul.aa.hh
689
 
690
#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
691
        umul_aa_ ## xhalf ## yhalf      xreg, yreg; \
692
        rsr     dst, ACCLO
693
 
694
#else /* no multiply hardware */
695
 
696
#define set_arg_l(dst, src) \
697
        extui   dst, src, 0, 16
698
#define set_arg_h(dst, src) \
699
        srli    dst, src, 16
700
 
701
#if __XTENSA_CALL0_ABI__
702
#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
703
        set_arg_ ## xhalf (a13, xreg); \
704
        set_arg_ ## yhalf (a14, yreg); \
705
        call0   .Lmul_mulsi3; \
706
        mov     dst, a12
707
#else
708
#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
709
        set_arg_ ## xhalf (a14, xreg); \
710
        set_arg_ ## yhalf (a15, yreg); \
711
        call12  .Lmul_mulsi3; \
712
        mov     dst, a14
713
#endif /* __XTENSA_CALL0_ABI__ */
714
 
715
#endif /* no multiply hardware */
716
 
717
        /* Add pp1 and pp2 into a6 with carry-out in a9.  */
718
        do_mul(a6, a2, l, a3, h)        /* pp 1 */
719
        do_mul(a11, a2, h, a3, l)       /* pp 2 */
720
        movi    a9, 0
721
        add     a6, a6, a11
722
        bgeu    a6, a11, 1f
723
        addi    a9, a9, 1
724
1:
725
        /* Shift the high half of a9/a6 into position in a9.  Note that
726
           this value can be safely incremented without any carry-outs.  */
727
        ssai    16
728
        src     a9, a9, a6
729
 
730
        /* Compute the low word into a6.  */
731
        do_mul(a11, a2, l, a3, l)       /* pp 0 */
732
        sll     a6, a6
733
        add     a6, a6, a11
734
        bgeu    a6, a11, 1f
735
        addi    a9, a9, 1
736
1:
737
        /* Compute the high word into a2.  */
738
        do_mul(a2, a2, h, a3, h)        /* pp 3 */
739
        add     a2, a2, a9
740
 
741
#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
742
        /* Restore values saved on the stack during the multiplication.  */
743
        l32i    a0, sp, 0
744
        l32i    a8, sp, 4
745
#endif
746
#endif /* ! XCHAL_HAVE_MUL32_HIGH */
747
 
748
        /* Shift left by 9 bits, unless there was a carry-out from the
749
           multiply, in which case, shift by 8 bits and increment the
750
           exponent.  */
751
        movi    a4, 9
752
        srli    a5, a2, 24 - 9
753
        beqz    a5, 1f
754
        addi    a4, a4, -1
755
        addi    a8, a8, 1
756
1:      ssl     a4
757
        src     a2, a2, a6
758
        sll     a6, a6
759
 
760
        /* Subtract the extra bias from the exponent sum (plus one to account
761
           for the explicit "1.0" of the mantissa that will be added to the
762
           exponent in the final result).  */
763
        movi    a4, 0x80
764
        sub     a8, a8, a4
765
 
766
        /* Check for over/underflow.  The value in a8 is one less than the
767
           final exponent, so values in the range 0..fd are OK here.  */
768
        movi    a4, 0xfe
769
        bgeu    a8, a4, .Lmul_overflow
770
 
771
.Lmul_round:
772
        /* Round.  */
773
        bgez    a6, .Lmul_rounded
774
        addi    a2, a2, 1
775
        slli    a6, a6, 1
776
        beqz    a6, .Lmul_exactlyhalf
777
 
778
.Lmul_rounded:
779
        /* Add the exponent to the mantissa.  */
780
        slli    a8, a8, 23
781
        add     a2, a2, a8
782
 
783
.Lmul_addsign:
784
        /* Add the sign bit.  */
785
        srli    a7, a7, 31
786
        slli    a7, a7, 31
787
        or      a2, a2, a7
788
 
789
.Lmul_done:
790
#if __XTENSA_CALL0_ABI__
791
        l32i    a12, sp, 16
792
        l32i    a13, sp, 20
793
        l32i    a14, sp, 24
794
        l32i    a15, sp, 28
795
        addi    sp, sp, 32
796
#endif
797
        leaf_return
798
 
799
.Lmul_exactlyhalf:
800
        /* Round down to the nearest even value.  */
801
        srli    a2, a2, 1
802
        slli    a2, a2, 1
803
        j       .Lmul_rounded
804
 
805
.Lmul_overflow:
806
        bltz    a8, .Lmul_underflow
807
        /* Return +/- Infinity.  */
808
        movi    a8, 0xff
809
        slli    a2, a8, 23
810
        j       .Lmul_addsign
811
 
812
.Lmul_underflow:
813
        /* Create a subnormal value, where the exponent field contains zero,
814
           but the effective exponent is 1.  The value of a8 is one less than
815
           the actual exponent, so just negate it to get the shift amount.  */
816
        neg     a8, a8
817
        mov     a9, a6
818
        ssr     a8
819
        bgeui   a8, 32, .Lmul_flush_to_zero
820
 
821
        /* Shift a2 right.  Any bits that are shifted out of a2 are saved
822
           in a6 (combined with the shifted-out bits currently in a6) for
823
           rounding the result.  */
824
        sll     a6, a2
825
        srl     a2, a2
826
 
827
        /* Set the exponent to zero.  */
828
        movi    a8, 0
829
 
830
        /* Pack any nonzero bits shifted out into a6.  */
831
        beqz    a9, .Lmul_round
832
        movi    a9, 1
833
        or      a6, a6, a9
834
        j       .Lmul_round
835
 
836
.Lmul_flush_to_zero:
837
        /* Return zero with the appropriate sign bit.  */
838
        srli    a2, a7, 31
839
        slli    a2, a2, 31
840
        j       .Lmul_done
841
 
842
#if XCHAL_NO_MUL
843
 
844
        /* For Xtensa processors with no multiply hardware, this simplified
845
           version of _mulsi3 is used for multiplying 16-bit chunks of
846
           the floating-point mantissas.  When using CALL0, this function
847
           uses a custom ABI: the inputs are passed in a13 and a14, the
848
           result is returned in a12, and a8 and a15 are clobbered.  */
849
        .align  4
850
.Lmul_mulsi3:
851
        leaf_entry sp, 16
852
        .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
853
        movi    \dst, 0
854
1:      add     \tmp1, \src2, \dst
855
        extui   \tmp2, \src1, 0, 1
856
        movnez  \dst, \tmp1, \tmp2
857
 
858
        do_addx2 \tmp1, \src2, \dst, \tmp1
859
        extui   \tmp2, \src1, 1, 1
860
        movnez  \dst, \tmp1, \tmp2
861
 
862
        do_addx4 \tmp1, \src2, \dst, \tmp1
863
        extui   \tmp2, \src1, 2, 1
864
        movnez  \dst, \tmp1, \tmp2
865
 
866
        do_addx8 \tmp1, \src2, \dst, \tmp1
867
        extui   \tmp2, \src1, 3, 1
868
        movnez  \dst, \tmp1, \tmp2
869
 
870
        srli    \src1, \src1, 4
871
        slli    \src2, \src2, 4
872
        bnez    \src1, 1b
873
        .endm
874
#if __XTENSA_CALL0_ABI__
875
        mul_mulsi3_body a12, a13, a14, a15, a8
876
#else
877
        /* The result will be written into a2, so save that argument in a4.  */
878
        mov     a4, a2
879
        mul_mulsi3_body a2, a4, a3, a5, a6
880
#endif
881
        leaf_return
882
#endif /* XCHAL_NO_MUL */
883
#endif /* L_mulsf3 */
884
 
885
#ifdef L_divsf3
886
 
887
        /* Division */
888
__divsf3_aux:
889
 
890
        /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
891
           (This code is placed before the start of the function just to
892
           keep it in range of the limited branch displacements.)  */
893
 
894
.Ldiv_yexpzero:
895
        /* Clear the sign bit of y.  */
896
        slli    a3, a3, 1
897
        srli    a3, a3, 1
898
 
899
        /* Check for division by zero.  */
900
        beqz    a3, .Ldiv_yzero
901
 
902
        /* Normalize y.  Adjust the exponent in a9.  */
903
        do_nsau a10, a3, a4, a5
904
        addi    a10, a10, -8
905
        ssl     a10
906
        sll     a3, a3
907
        movi    a9, 1
908
        sub     a9, a9, a10
909
        j       .Ldiv_ynormalized
910
 
911
.Ldiv_yzero:
912
        /* y is zero.  Return NaN if x is also zero; otherwise, infinity.  */
913
        slli    a4, a2, 1
914
        srli    a4, a4, 1
915
        srli    a2, a7, 31
916
        slli    a2, a2, 31
917
        or      a2, a2, a6
918
        bnez    a4, 1f
919
        movi    a4, 0x400000    /* make it a quiet NaN */
920
        or      a2, a2, a4
921
1:      leaf_return
922
 
923
.Ldiv_xexpzero:
924
        /* Clear the sign bit of x.  */
925
        slli    a2, a2, 1
926
        srli    a2, a2, 1
927
 
928
        /* If x is zero, return zero.  */
929
        beqz    a2, .Ldiv_return_zero
930
 
931
        /* Normalize x.  Adjust the exponent in a8.  */
932
        do_nsau a10, a2, a4, a5
933
        addi    a10, a10, -8
934
        ssl     a10
935
        sll     a2, a2
936
        movi    a8, 1
937
        sub     a8, a8, a10
938
        j       .Ldiv_xnormalized
939
 
940
.Ldiv_return_zero:
941
        /* Return zero with the appropriate sign bit.  */
942
        srli    a2, a7, 31
943
        slli    a2, a2, 31
944
        leaf_return
945
 
946
.Ldiv_xnan_or_inf:
947
        /* Set the sign bit of the result.  */
948
        srli    a7, a3, 31
949
        slli    a7, a7, 31
950
        xor     a2, a2, a7
951
        /* If y is NaN or Inf, return NaN.  */
952
        bnall   a3, a6, 1f
953
        movi    a4, 0x400000    /* make it a quiet NaN */
954
        or      a2, a2, a4
955
1:      leaf_return
956
 
957
.Ldiv_ynan_or_inf:
958
        /* If y is Infinity, return zero.  */
959
        slli    a8, a3, 9
960
        beqz    a8, .Ldiv_return_zero
961
        /* y is NaN; return it.  */
962
        mov     a2, a3
963
        leaf_return
964
 
965
        .align  4
966
        .global __divsf3
967
        .type   __divsf3, @function
968
__divsf3:
969
        leaf_entry sp, 16
970
        movi    a6, 0x7f800000
971
 
972
        /* Get the sign of the result.  */
973
        xor     a7, a2, a3
974
 
975
        /* Check for NaN and infinity.  */
976
        ball    a2, a6, .Ldiv_xnan_or_inf
977
        ball    a3, a6, .Ldiv_ynan_or_inf
978
 
979
        /* Extract the exponents.  */
980
        extui   a8, a2, 23, 8
981
        extui   a9, a3, 23, 8
982
 
983
        beqz    a9, .Ldiv_yexpzero
984
.Ldiv_ynormalized:
985
        beqz    a8, .Ldiv_xexpzero
986
.Ldiv_xnormalized:
987
 
988
        /* Subtract the exponents.  */
989
        sub     a8, a8, a9
990
 
991
        /* Replace sign/exponent fields with explicit "1.0".  */
992
        movi    a10, 0xffffff
993
        or      a2, a2, a6
994
        and     a2, a2, a10
995
        or      a3, a3, a6
996
        and     a3, a3, a10
997
 
998
        /* The first digit of the mantissa division must be a one.
999
           Shift x (and adjust the exponent) as needed to make this true.  */
1000
        bltu    a3, a2, 1f
1001
        slli    a2, a2, 1
1002
        addi    a8, a8, -1
1003
1:
1004
        /* Do the first subtraction and shift.  */
1005
        sub     a2, a2, a3
1006
        slli    a2, a2, 1
1007
 
1008
        /* Put the quotient into a10.  */
1009
        movi    a10, 1
1010
 
1011
        /* Divide one bit at a time for 23 bits.  */
1012
        movi    a9, 23
1013
#if XCHAL_HAVE_LOOPS
1014
        loop    a9, .Ldiv_loopend
1015
#endif
1016
.Ldiv_loop:
1017
        /* Shift the quotient << 1.  */
1018
        slli    a10, a10, 1
1019
 
1020
        /* Is this digit a 0 or 1?  */
1021
        bltu    a2, a3, 1f
1022
 
1023
        /* Output a 1 and subtract.  */
1024
        addi    a10, a10, 1
1025
        sub     a2, a2, a3
1026
 
1027
        /* Shift the dividend << 1.  */
1028
1:      slli    a2, a2, 1
1029
 
1030
#if !XCHAL_HAVE_LOOPS
1031
        addi    a9, a9, -1
1032
        bnez    a9, .Ldiv_loop
1033
#endif
1034
.Ldiv_loopend:
1035
 
1036
        /* Add the exponent bias (less one to account for the explicit "1.0"
1037
           of the mantissa that will be added to the exponent in the final
1038
           result).  */
1039
        addi    a8, a8, 0x7e
1040
 
1041
        /* Check for over/underflow.  The value in a8 is one less than the
1042
           final exponent, so values in the range 0..fd are OK here.  */
1043
        movi    a4, 0xfe
1044
        bgeu    a8, a4, .Ldiv_overflow
1045
 
1046
.Ldiv_round:
1047
        /* Round.  The remainder (<< 1) is in a2.  */
1048
        bltu    a2, a3, .Ldiv_rounded
1049
        addi    a10, a10, 1
1050
        beq     a2, a3, .Ldiv_exactlyhalf
1051
 
1052
.Ldiv_rounded:
1053
        /* Add the exponent to the mantissa.  */
1054
        slli    a8, a8, 23
1055
        add     a2, a10, a8
1056
 
1057
.Ldiv_addsign:
1058
        /* Add the sign bit.  */
1059
        srli    a7, a7, 31
1060
        slli    a7, a7, 31
1061
        or      a2, a2, a7
1062
        leaf_return
1063
 
1064
.Ldiv_overflow:
1065
        bltz    a8, .Ldiv_underflow
1066
        /* Return +/- Infinity.  */
1067
        addi    a8, a4, 1       /* 0xff */
1068
        slli    a2, a8, 23
1069
        j       .Ldiv_addsign
1070
 
1071
.Ldiv_exactlyhalf:
1072
        /* Remainder is exactly half the divisor.  Round even.  */
1073
        srli    a10, a10, 1
1074
        slli    a10, a10, 1
1075
        j       .Ldiv_rounded
1076
 
1077
.Ldiv_underflow:
1078
        /* Create a subnormal value, where the exponent field contains zero,
1079
           but the effective exponent is 1.  The value of a8 is one less than
1080
           the actual exponent, so just negate it to get the shift amount.  */
1081
        neg     a8, a8
1082
        ssr     a8
1083
        bgeui   a8, 32, .Ldiv_flush_to_zero
1084
 
1085
        /* Shift a10 right.  Any bits that are shifted out of a10 are
1086
           saved in a6 for rounding the result.  */
1087
        sll     a6, a10
1088
        srl     a10, a10
1089
 
1090
        /* Set the exponent to zero.  */
1091
        movi    a8, 0
1092
 
1093
        /* Pack any nonzero remainder (in a2) into a6.  */
1094
        beqz    a2, 1f
1095
        movi    a9, 1
1096
        or      a6, a6, a9
1097
 
1098
        /* Round a10 based on the bits shifted out into a6.  */
1099
1:      bgez    a6, .Ldiv_rounded
1100
        addi    a10, a10, 1
1101
        slli    a6, a6, 1
1102
        bnez    a6, .Ldiv_rounded
1103
        srli    a10, a10, 1
1104
        slli    a10, a10, 1
1105
        j       .Ldiv_rounded
1106
 
1107
.Ldiv_flush_to_zero:
1108
        /* Return zero with the appropriate sign bit.  */
1109
        srli    a2, a7, 31
1110
        slli    a2, a2, 31
1111
        leaf_return
1112
 
1113
#endif /* L_divsf3 */
1114
 
1115
#ifdef L_cmpsf2
1116
 
1117
        /* Equal and Not Equal */
1118
 
1119
        .align  4
1120
        .global __eqsf2
1121
        .global __nesf2
1122
        .set    __nesf2, __eqsf2
1123
        .type   __eqsf2, @function
1124
__eqsf2:
1125
        leaf_entry sp, 16
1126
        bne     a2, a3, 4f
1127
 
1128
        /* The values are equal but NaN != NaN.  Check the exponent.  */
1129
        movi    a6, 0x7f800000
1130
        ball    a2, a6, 3f
1131
 
1132
        /* Equal.  */
1133
        movi    a2, 0
1134
        leaf_return
1135
 
1136
        /* Not equal.  */
1137
2:      movi    a2, 1
1138
        leaf_return
1139
 
1140
        /* Check if the mantissas are nonzero.  */
1141
3:      slli    a7, a2, 9
1142
        j       5f
1143
 
1144
        /* Check if x and y are zero with different signs.  */
1145
4:      or      a7, a2, a3
1146
        slli    a7, a7, 1
1147
 
1148
        /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
1149
           or x when exponent(x) = 0x7f8 and x == y.  */
1150
5:      movi    a2, 0
1151
        movi    a3, 1
1152
        movnez  a2, a3, a7
1153
        leaf_return
1154
 
1155
 
1156
        /* Greater Than */
1157
 
1158
        .align  4
1159
        .global __gtsf2
1160
        .type   __gtsf2, @function
1161
__gtsf2:
1162
        leaf_entry sp, 16
1163
        movi    a6, 0x7f800000
1164
        ball    a2, a6, 2f
1165
1:      bnall   a3, a6, .Lle_cmp
1166
 
1167
        /* Check if y is a NaN.  */
1168
        slli    a7, a3, 9
1169
        beqz    a7, .Lle_cmp
1170
        movi    a2, 0
1171
        leaf_return
1172
 
1173
        /* Check if x is a NaN.  */
1174
2:      slli    a7, a2, 9
1175
        beqz    a7, 1b
1176
        movi    a2, 0
1177
        leaf_return
1178
 
1179
 
1180
        /* Less Than or Equal */
1181
 
1182
        .align  4
1183
        .global __lesf2
1184
        .type   __lesf2, @function
1185
__lesf2:
1186
        leaf_entry sp, 16
1187
        movi    a6, 0x7f800000
1188
        ball    a2, a6, 2f
1189
1:      bnall   a3, a6, .Lle_cmp
1190
 
1191
        /* Check if y is a NaN.  */
1192
        slli    a7, a3, 9
1193
        beqz    a7, .Lle_cmp
1194
        movi    a2, 1
1195
        leaf_return
1196
 
1197
        /* Check if x is a NaN.  */
1198
2:      slli    a7, a2, 9
1199
        beqz    a7, 1b
1200
        movi    a2, 1
1201
        leaf_return
1202
 
1203
.Lle_cmp:
1204
        /* Check if x and y have different signs.  */
1205
        xor     a7, a2, a3
1206
        bltz    a7, .Lle_diff_signs
1207
 
1208
        /* Check if x is negative.  */
1209
        bltz    a2, .Lle_xneg
1210
 
1211
        /* Check if x <= y.  */
1212
        bltu    a3, a2, 5f
1213
4:      movi    a2, 0
1214
        leaf_return
1215
 
1216
.Lle_xneg:
1217
        /* Check if y <= x.  */
1218
        bgeu    a2, a3, 4b
1219
5:      movi    a2, 1
1220
        leaf_return
1221
 
1222
.Lle_diff_signs:
1223
        bltz    a2, 4b
1224
 
1225
        /* Check if both x and y are zero.  */
1226
        or      a7, a2, a3
1227
        slli    a7, a7, 1
1228
        movi    a2, 1
1229
        movi    a3, 0
1230
        moveqz  a2, a3, a7
1231
        leaf_return
1232
 
1233
 
1234
        /* Greater Than or Equal */
1235
 
1236
        .align  4
1237
        .global __gesf2
1238
        .type   __gesf2, @function
1239
__gesf2:
1240
        leaf_entry sp, 16
1241
        movi    a6, 0x7f800000
1242
        ball    a2, a6, 2f
1243
1:      bnall   a3, a6, .Llt_cmp
1244
 
1245
        /* Check if y is a NaN.  */
1246
        slli    a7, a3, 9
1247
        beqz    a7, .Llt_cmp
1248
        movi    a2, -1
1249
        leaf_return
1250
 
1251
        /* Check if x is a NaN.  */
1252
2:      slli    a7, a2, 9
1253
        beqz    a7, 1b
1254
        movi    a2, -1
1255
        leaf_return
1256
 
1257
 
1258
        /* Less Than */
1259
 
1260
        .align  4
1261
        .global __ltsf2
1262
        .type   __ltsf2, @function
1263
__ltsf2:
1264
        leaf_entry sp, 16
1265
        movi    a6, 0x7f800000
1266
        ball    a2, a6, 2f
1267
1:      bnall   a3, a6, .Llt_cmp
1268
 
1269
        /* Check if y is a NaN.  */
1270
        slli    a7, a3, 9
1271
        beqz    a7, .Llt_cmp
1272
        movi    a2, 0
1273
        leaf_return
1274
 
1275
        /* Check if x is a NaN.  */
1276
2:      slli    a7, a2, 9
1277
        beqz    a7, 1b
1278
        movi    a2, 0
1279
        leaf_return
1280
 
1281
.Llt_cmp:
1282
        /* Check if x and y have different signs.  */
1283
        xor     a7, a2, a3
1284
        bltz    a7, .Llt_diff_signs
1285
 
1286
        /* Check if x is negative.  */
1287
        bltz    a2, .Llt_xneg
1288
 
1289
        /* Check if x < y.  */
1290
        bgeu    a2, a3, 5f
1291
4:      movi    a2, -1
1292
        leaf_return
1293
 
1294
.Llt_xneg:
1295
        /* Check if y < x.  */
1296
        bltu    a3, a2, 4b
1297
5:      movi    a2, 0
1298
        leaf_return
1299
 
1300
.Llt_diff_signs:
1301
        bgez    a2, 5b
1302
 
1303
        /* Check if both x and y are nonzero.  */
1304
        or      a7, a2, a3
1305
        slli    a7, a7, 1
1306
        movi    a2, 0
1307
        movi    a3, -1
1308
        movnez  a2, a3, a7
1309
        leaf_return
1310
 
1311
 
1312
        /* Unordered */
1313
 
1314
        .align  4
1315
        .global __unordsf2
1316
        .type   __unordsf2, @function
1317
__unordsf2:
1318
        leaf_entry sp, 16
1319
        movi    a6, 0x7f800000
1320
        ball    a2, a6, 3f
1321
1:      ball    a3, a6, 4f
1322
2:      movi    a2, 0
1323
        leaf_return
1324
 
1325
3:      slli    a7, a2, 9
1326
        beqz    a7, 1b
1327
        movi    a2, 1
1328
        leaf_return
1329
 
1330
4:      slli    a7, a3, 9
1331
        beqz    a7, 2b
1332
        movi    a2, 1
1333
        leaf_return
1334
 
1335
#endif /* L_cmpsf2 */
1336
 
1337
#ifdef L_fixsfsi
1338
 
1339
        .align  4
1340
        .global __fixsfsi
1341
        .type   __fixsfsi, @function
1342
__fixsfsi:
1343
        leaf_entry sp, 16
1344
 
1345
        /* Check for NaN and Infinity.  */
1346
        movi    a6, 0x7f800000
1347
        ball    a2, a6, .Lfixsfsi_nan_or_inf
1348
 
1349
        /* Extract the exponent and check if 0 < (exp - 0x7e) < 32.  */
1350
        extui   a4, a2, 23, 8
1351
        addi    a4, a4, -0x7e
1352
        bgei    a4, 32, .Lfixsfsi_maxint
1353
        blti    a4, 1, .Lfixsfsi_zero
1354
 
1355
        /* Add explicit "1.0" and shift << 8.  */
1356
        or      a7, a2, a6
1357
        slli    a5, a7, 8
1358
 
1359
        /* Shift back to the right, based on the exponent.  */
1360
        ssl     a4              /* shift by 32 - a4 */
1361
        srl     a5, a5
1362
 
1363
        /* Negate the result if sign != 0.  */
1364
        neg     a2, a5
1365
        movgez  a2, a5, a7
1366
        leaf_return
1367
 
1368
.Lfixsfsi_nan_or_inf:
1369
        /* Handle Infinity and NaN.  */
1370
        slli    a4, a2, 9
1371
        beqz    a4, .Lfixsfsi_maxint
1372
 
1373
        /* Translate NaN to +maxint.  */
1374
        movi    a2, 0
1375
 
1376
.Lfixsfsi_maxint:
1377
        slli    a4, a6, 8       /* 0x80000000 */
1378
        addi    a5, a4, -1      /* 0x7fffffff */
1379
        movgez  a4, a5, a2
1380
        mov     a2, a4
1381
        leaf_return
1382
 
1383
.Lfixsfsi_zero:
1384
        movi    a2, 0
1385
        leaf_return
1386
 
1387
#endif /* L_fixsfsi */
1388
 
1389
#ifdef L_fixsfdi
1390
 
1391
        .align  4
1392
        .global __fixsfdi
1393
        .type   __fixsfdi, @function
1394
__fixsfdi:
1395
        leaf_entry sp, 16
1396
 
1397
        /* Check for NaN and Infinity.  */
1398
        movi    a6, 0x7f800000
1399
        ball    a2, a6, .Lfixsfdi_nan_or_inf
1400
 
1401
        /* Extract the exponent and check if 0 < (exp - 0x7e) < 64.  */
1402
        extui   a4, a2, 23, 8
1403
        addi    a4, a4, -0x7e
1404
        bgei    a4, 64, .Lfixsfdi_maxint
1405
        blti    a4, 1, .Lfixsfdi_zero
1406
 
1407
        /* Add explicit "1.0" and shift << 8.  */
1408
        or      a7, a2, a6
1409
        slli    xh, a7, 8
1410
 
1411
        /* Shift back to the right, based on the exponent.  */
1412
        ssl     a4              /* shift by 64 - a4 */
1413
        bgei    a4, 32, .Lfixsfdi_smallshift
1414
        srl     xl, xh
1415
        movi    xh, 0
1416
 
1417
.Lfixsfdi_shifted:
1418
        /* Negate the result if sign != 0.  */
1419
        bgez    a7, 1f
1420
        neg     xl, xl
1421
        neg     xh, xh
1422
        beqz    xl, 1f
1423
        addi    xh, xh, -1
1424
1:      leaf_return
1425
 
1426
.Lfixsfdi_smallshift:
1427
        movi    xl, 0
1428
        sll     xl, xh
1429
        srl     xh, xh
1430
        j       .Lfixsfdi_shifted
1431
 
1432
.Lfixsfdi_nan_or_inf:
1433
        /* Handle Infinity and NaN.  */
1434
        slli    a4, a2, 9
1435
        beqz    a4, .Lfixsfdi_maxint
1436
 
1437
        /* Translate NaN to +maxint.  */
1438
        movi    a2, 0
1439
 
1440
.Lfixsfdi_maxint:
1441
        slli    a7, a6, 8       /* 0x80000000 */
1442
        bgez    a2, 1f
1443
        mov     xh, a7
1444
        movi    xl, 0
1445
        leaf_return
1446
 
1447
1:      addi    xh, a7, -1      /* 0x7fffffff */
1448
        movi    xl, -1
1449
        leaf_return
1450
 
1451
.Lfixsfdi_zero:
1452
        movi    xh, 0
1453
        movi    xl, 0
1454
        leaf_return
1455
 
1456
#endif /* L_fixsfdi */
1457
 
1458
#ifdef L_fixunssfsi
1459
 
1460
        .align  4
1461
        .global __fixunssfsi
1462
        .type   __fixunssfsi, @function
1463
__fixunssfsi:
1464
        leaf_entry sp, 16
1465
 
1466
        /* Check for NaN and Infinity.  */
1467
        movi    a6, 0x7f800000
1468
        ball    a2, a6, .Lfixunssfsi_nan_or_inf
1469
 
1470
        /* Extract the exponent and check if 0 <= (exp - 0x7f) < 32.  */
1471
        extui   a4, a2, 23, 8
1472
        addi    a4, a4, -0x7f
1473
        bgei    a4, 32, .Lfixunssfsi_maxint
1474
        bltz    a4, .Lfixunssfsi_zero
1475
 
1476
        /* Add explicit "1.0" and shift << 8.  */
1477
        or      a7, a2, a6
1478
        slli    a5, a7, 8
1479
 
1480
        /* Shift back to the right, based on the exponent.  */
1481
        addi    a4, a4, 1
1482
        beqi    a4, 32, .Lfixunssfsi_bigexp
1483
        ssl     a4              /* shift by 32 - a4 */
1484
        srl     a5, a5
1485
 
1486
        /* Negate the result if sign != 0.  */
1487
        neg     a2, a5
1488
        movgez  a2, a5, a7
1489
        leaf_return
1490
 
1491
.Lfixunssfsi_nan_or_inf:
1492
        /* Handle Infinity and NaN.  */
1493
        slli    a4, a2, 9
1494
        beqz    a4, .Lfixunssfsi_maxint
1495
 
1496
        /* Translate NaN to 0xffffffff.  */
1497
        movi    a2, -1
1498
        leaf_return
1499
 
1500
.Lfixunssfsi_maxint:
1501
        slli    a4, a6, 8       /* 0x80000000 */
1502
        movi    a5, -1          /* 0xffffffff */
1503
        movgez  a4, a5, a2
1504
        mov     a2, a4
1505
        leaf_return
1506
 
1507
.Lfixunssfsi_zero:
1508
        movi    a2, 0
1509
        leaf_return
1510
 
1511
.Lfixunssfsi_bigexp:
1512
        /* Handle unsigned maximum exponent case.  */
1513
        bltz    a2, 1f
1514
        mov     a2, a5          /* no shift needed */
1515
        leaf_return
1516
 
1517
        /* Return 0x80000000 if negative.  */
1518
1:      slli    a2, a6, 8
1519
        leaf_return
1520
 
1521
#endif /* L_fixunssfsi */
1522
 
1523
#ifdef L_fixunssfdi
1524
 
1525
        .align  4
1526
        .global __fixunssfdi
1527
        .type   __fixunssfdi, @function
1528
__fixunssfdi:
1529
        leaf_entry sp, 16
1530
 
1531
        /* Check for NaN and Infinity.  */
1532
        movi    a6, 0x7f800000
1533
        ball    a2, a6, .Lfixunssfdi_nan_or_inf
1534
 
1535
        /* Extract the exponent and check if 0 <= (exp - 0x7f) < 64.  */
1536
        extui   a4, a2, 23, 8
1537
        addi    a4, a4, -0x7f
1538
        bgei    a4, 64, .Lfixunssfdi_maxint
1539
        bltz    a4, .Lfixunssfdi_zero
1540
 
1541
        /* Add explicit "1.0" and shift << 8.  */
1542
        or      a7, a2, a6
1543
        slli    xh, a7, 8
1544
 
1545
        /* Shift back to the right, based on the exponent.  */
1546
        addi    a4, a4, 1
1547
        beqi    a4, 64, .Lfixunssfdi_bigexp
1548
        ssl     a4              /* shift by 64 - a4 */
1549
        bgei    a4, 32, .Lfixunssfdi_smallshift
1550
        srl     xl, xh
1551
        movi    xh, 0
1552
 
1553
.Lfixunssfdi_shifted:
1554
        /* Negate the result if sign != 0.  */
1555
        bgez    a7, 1f
1556
        neg     xl, xl
1557
        neg     xh, xh
1558
        beqz    xl, 1f
1559
        addi    xh, xh, -1
1560
1:      leaf_return
1561
 
1562
.Lfixunssfdi_smallshift:
1563
        movi    xl, 0
1564
        src     xl, xh, xl
1565
        srl     xh, xh
1566
        j       .Lfixunssfdi_shifted
1567
 
1568
.Lfixunssfdi_nan_or_inf:
1569
        /* Handle Infinity and NaN.  */
1570
        slli    a4, a2, 9
1571
        beqz    a4, .Lfixunssfdi_maxint
1572
 
1573
        /* Translate NaN to 0xffffffff.... */
1574
1:      movi    xh, -1
1575
        movi    xl, -1
1576
        leaf_return
1577
 
1578
.Lfixunssfdi_maxint:
1579
        bgez    a2, 1b
1580
2:      slli    xh, a6, 8       /* 0x80000000 */
1581
        movi    xl, 0
1582
        leaf_return
1583
 
1584
.Lfixunssfdi_zero:
1585
        movi    xh, 0
1586
        movi    xl, 0
1587
        leaf_return
1588
 
1589
.Lfixunssfdi_bigexp:
1590
        /* Handle unsigned maximum exponent case.  */
1591
        bltz    a7, 2b
1592
        movi    xl, 0
1593
        leaf_return             /* no shift needed */
1594
 
1595
#endif /* L_fixunssfdi */
1596
 
1597
#ifdef L_floatsisf
1598
 
1599
        .align  4
1600
        .global __floatunsisf
1601
        .type   __floatunsisf, @function
1602
__floatunsisf:
1603
        leaf_entry sp, 16
1604
        beqz    a2, .Lfloatsisf_return
1605
 
1606
        /* Set the sign to zero and jump to the floatsisf code.  */
1607
        movi    a7, 0
1608
        j       .Lfloatsisf_normalize
1609
 
1610
        .align  4
1611
        .global __floatsisf
1612
        .type   __floatsisf, @function
1613
__floatsisf:
1614
        leaf_entry sp, 16
1615
 
1616
        /* Check for zero.  */
1617
        beqz    a2, .Lfloatsisf_return
1618
 
1619
        /* Save the sign.  */
1620
        extui   a7, a2, 31, 1
1621
 
1622
        /* Get the absolute value.  */
1623
#if XCHAL_HAVE_ABS
1624
        abs     a2, a2
1625
#else
1626
        neg     a4, a2
1627
        movltz  a2, a4, a2
1628
#endif
1629
 
1630
.Lfloatsisf_normalize:
1631
        /* Normalize with the first 1 bit in the msb.  */
1632
        do_nsau a4, a2, a5, a6
1633
        ssl     a4
1634
        sll     a5, a2
1635
 
1636
        /* Shift the mantissa into position, with rounding bits in a6.  */
1637
        srli    a2, a5, 8
1638
        slli    a6, a5, (32 - 8)
1639
 
1640
        /* Set the exponent.  */
1641
        movi    a5, 0x9d        /* 0x7e + 31 */
1642
        sub     a5, a5, a4
1643
        slli    a5, a5, 23
1644
        add     a2, a2, a5
1645
 
1646
        /* Add the sign.  */
1647
        slli    a7, a7, 31
1648
        or      a2, a2, a7
1649
 
1650
        /* Round up if the leftover fraction is >= 1/2.  */
1651
        bgez    a6, .Lfloatsisf_return
1652
        addi    a2, a2, 1       /* Overflow to the exponent is OK.  */
1653
 
1654
        /* Check if the leftover fraction is exactly 1/2.  */
1655
        slli    a6, a6, 1
1656
        beqz    a6, .Lfloatsisf_exactlyhalf
1657
 
1658
.Lfloatsisf_return:
1659
        leaf_return
1660
 
1661
.Lfloatsisf_exactlyhalf:
1662
        /* Round down to the nearest even value.  */
1663
        srli    a2, a2, 1
1664
        slli    a2, a2, 1
1665
        leaf_return
1666
 
1667
#endif /* L_floatsisf */
1668
 
1669
#ifdef L_floatdisf
1670
 
1671
        .align  4
1672
        .global __floatundisf
1673
        .type   __floatundisf, @function
1674
__floatundisf:
1675
        leaf_entry sp, 16
1676
 
1677
        /* Check for zero.  */
1678
        or      a4, xh, xl
1679
        beqz    a4, 2f
1680
 
1681
        /* Set the sign to zero and jump to the floatdisf code.  */
1682
        movi    a7, 0
1683
        j       .Lfloatdisf_normalize
1684
 
1685
        .align  4
1686
        .global __floatdisf
1687
        .type   __floatdisf, @function
1688
__floatdisf:
1689
        leaf_entry sp, 16
1690
 
1691
        /* Check for zero.  */
1692
        or      a4, xh, xl
1693
        beqz    a4, 2f
1694
 
1695
        /* Save the sign.  */
1696
        extui   a7, xh, 31, 1
1697
 
1698
        /* Get the absolute value.  */
1699
        bgez    xh, .Lfloatdisf_normalize
1700
        neg     xl, xl
1701
        neg     xh, xh
1702
        beqz    xl, .Lfloatdisf_normalize
1703
        addi    xh, xh, -1
1704
 
1705
.Lfloatdisf_normalize:
1706
        /* Normalize with the first 1 bit in the msb of xh.  */
1707
        beqz    xh, .Lfloatdisf_bigshift
1708
        do_nsau a4, xh, a5, a6
1709
        ssl     a4
1710
        src     xh, xh, xl
1711
        sll     xl, xl
1712
 
1713
.Lfloatdisf_shifted:
1714
        /* Shift the mantissa into position, with rounding bits in a6.  */
1715
        ssai    8
1716
        sll     a5, xl
1717
        src     a6, xh, xl
1718
        srl     xh, xh
1719
        beqz    a5, 1f
1720
        movi    a5, 1
1721
        or      a6, a6, a5
1722
1:
1723
        /* Set the exponent.  */
1724
        movi    a5, 0xbd        /* 0x7e + 63 */
1725
        sub     a5, a5, a4
1726
        slli    a5, a5, 23
1727
        add     a2, xh, a5
1728
 
1729
        /* Add the sign.  */
1730
        slli    a7, a7, 31
1731
        or      a2, a2, a7
1732
 
1733
        /* Round up if the leftover fraction is >= 1/2.  */
1734
        bgez    a6, 2f
1735
        addi    a2, a2, 1       /* Overflow to the exponent is OK.  */
1736
 
1737
        /* Check if the leftover fraction is exactly 1/2.  */
1738
        slli    a6, a6, 1
1739
        beqz    a6, .Lfloatdisf_exactlyhalf
1740
2:      leaf_return
1741
 
1742
.Lfloatdisf_bigshift:
1743
        /* xh is zero.  Normalize with first 1 bit of xl in the msb of xh.  */
1744
        do_nsau a4, xl, a5, a6
1745
        ssl     a4
1746
        sll     xh, xl
1747
        movi    xl, 0
1748
        addi    a4, a4, 32
1749
        j       .Lfloatdisf_shifted
1750
 
1751
.Lfloatdisf_exactlyhalf:
1752
        /* Round down to the nearest even value.  */
1753
        srli    a2, a2, 1
1754
        slli    a2, a2, 1
1755
        leaf_return
1756
 
1757
#endif /* L_floatdisf */

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.