OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libgcc/] [config/] [epiphany/] [ieee-754/] [fast_div.S] - Blame information for rev 747

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 734 jeremybenn
/* Copyright (C) 2011 Free Software Foundation, Inc.
2
   Contributed by Embecosm on behalf of Adapteva, Inc.
3
 
4
This file is part of GCC.
5
 
6
GCC is free software; you can redistribute it and/or modify it under
7
the terms of the GNU General Public License as published by the Free
8
Software Foundation; either version 3, or (at your option) any later
9
version.
10
 
11
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12
WARRANTY; without even the implied warranty of MERCHANTABILITY or
13
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14
for more details.
15
 
16
Under Section 7 of GPL version 3, you are granted additional
17
permissions described in the GCC Runtime Library Exception, version
18
3.1, as published by the Free Software Foundation.
19
 
20
You should have received a copy of the GNU General Public License and
21
a copy of the GCC Runtime Library Exception along with this program;
22
see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23
.  */
24
 
25
#include "../epiphany-asm.h"
26
 
27
.section _fast_div_text,"a",@progbits;
28
  .balign 8;
29
_fast_div_table:
30
.word 0x007fffff//   mantissa mask
31
.word 0x40257ebb//   hold constant a = 2.58586
32
 
33
.word 0x3f000000//   hold constant 126 shifted to bits [30:23]
34
.word 0xc0ba2e88//   hold constant b = -5.81818
35
 
36
.word 0x4087c1e8//   hold constant c = 4.24242
37
.word 0x40000000//  to hold constant 2 for Newton-Raphson iterations
38
 
39
 .global SYM(__fast_recipsf2)
40
 FUNC(__fast_recipsf2)
41
SYM(__fast_recipsf2):
42
 
43
//###################
44
//# input operands:
45
//###################
46
// Divisor
47
//R0
48
// Function address (used with negative offsets to read _fast_div_table)
49
//R1
50
/* Scratch registers:  two single (TMP0/TMP5) and two pairs.  */
51
#define P0L TMP1
52
#define P0H TMP2
53
#define P1L TMP3
54
#define P1H TMP4
55
 
56
//#########################################
57
//# Constants to be used in the algorithm
58
//#########################################
59
ldrd P0L , [ R1 , -3 ]
60
 
61
ldrd P1L , [ R1 , -2 ]
62
 
63
 
64
 
65
//#############################################################################
66
//#                       The Algorithm
67
//#
68
//# Operation: C=A/B
69
//# stage 1 - find the reciprocal 1/B according to the following scheme:
70
//#  B = (2^E)*m                                (1
71
//#  1/B = 1/((2^E)*m) = 1/((2^(E+1))*m1)          (0.5
72
//#      = (2^-(E+1))*(1/m1) = (2^E1)*(1/m1)
73
//#
74
//# Now we can find the new exponent:
75
//# e1 = E1+127 = -E-1+127 = -e+127-1+127 = 253-e **
76
//# 1/m1 alreadt has the exponent 127, so we have to add 126-e.
77
//# the exponent might underflow, which we can detect as a sign change.
78
//# Since the architeture uses flush-to-zero for subnormals, we can
79
//# give the result 0. then.
80
//#
81
//# The 1/m1 term with 0.5
82
//# 1/m1 = 2.58586*(m1^2) - 5.81818*m1 + 4.24242
83
//#
84
//# Next step is to use two iterations of Newton-Raphson algorithm to complete
85
//# the reciprocal calculation.
86
//#
87
//# Final result is achieved by multiplying A with 1/B
88
//#############################################################################
89
 
90
 
91
 
92
// R0 exponent and sign "replacement" into TMP0
93
AND TMP0,R0,P0L          ;
94
ORR TMP0,TMP0,P1L
95
SUB TMP5,R0,TMP0 // R0 sign/exponent extraction into TMP5
96
// Calculate new mantissa
97
FMADD P1H,TMP0,P0H               ;
98
                // Calculate new exponent offset 126 - "old exponent"
99
                SUB P1L,P1L,TMP5
100
        ldrd P0L , [ R1 , -1 ]
101
FMADD P0L,TMP0,P1H               ;
102
                eor P1H,r0,P1L // check for overflow (N-BIT).
103
                blt .Lret_0
104
// P0L exponent and sign "replacement"
105
sub P0L,P0L,TMP5
106
 
107
// Newton-Raphson iteration #1
108
MOV TMP0,P0H             ;
109
FMSUB P0H,R0,P0L         ;
110
FMUL  P0L,P0H,P0L        ;
111
// Newton-Raphson iteration #2
112
FMSUB TMP0,R0,P0L       ;
113
FMUL  R0,TMP0,P0L                ;
114
jr lr
115
.Lret_0:ldrd P0L , [ R1 , -3 ]
116
        lsr TMP0,r0,31 ; extract sign
117
        lsl TMP0,TMP0,31
118
        add P0L,P0L,r0 ; check for NaN input
119
        eor P0L,P0L,r0
120
        movgte r0,TMP0
121
        jr lr
122
// Quotient calculation is expected by the caller: FMUL quotient,divident,R0
123
        ;
124
        ENDFUNC(__fast_recipsf2)

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.