URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libgcc/] [config/] [epiphany/] [ieee-754/] [fast_div.S] - Blame information for rev 747

Go to most recent revision | Details | Compare with Previous | View Log


/* Copyright (C) 2011 Free Software Foundation, Inc.
   Contributed by Embecosm on behalf of Adapteva, Inc.
 
This file is part of GCC.
 
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
 
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
for more details.
 
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
 
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
.  */
 
#include "../epiphany-asm.h"
 
.section _fast_div_text,"a",@progbits;
  .balign 8;
_fast_div_table:
.word 0x007fffff//   mantissa mask
.word 0x40257ebb//   hold constant a = 2.58586
 
.word 0x3f000000//   hold constant 126 shifted to bits [30:23]
.word 0xc0ba2e88//   hold constant b = -5.81818
 
.word 0x4087c1e8//   hold constant c = 4.24242
.word 0x40000000//  to hold constant 2 for Newton-Raphson iterations
 
 .global SYM(__fast_recipsf2)
 FUNC(__fast_recipsf2)
SYM(__fast_recipsf2):
 
//###################
//# input operands:
//###################
// Divisor
//R0
// Function address (used with negative offsets to read _fast_div_table)
//R1
/* Scratch registers:  two single (TMP0/TMP5) and two pairs.  */
#define P0L TMP1
#define P0H TMP2
#define P1L TMP3
#define P1H TMP4
 
//#########################################
//# Constants to be used in the algorithm
//#########################################
ldrd P0L , [ R1 , -3 ]
 
ldrd P1L , [ R1 , -2 ]
 
 
 
//#############################################################################
//#                       The Algorithm
//#
//# Operation: C=A/B
//# stage 1 - find the reciprocal 1/B according to the following scheme:
//#  B = (2^E)*m                                (1
//#  1/B = 1/((2^E)*m) = 1/((2^(E+1))*m1)          (0.5
//#      = (2^-(E+1))*(1/m1) = (2^E1)*(1/m1)
//#
//# Now we can find the new exponent:
//# e1 = E1+127 = -E-1+127 = -e+127-1+127 = 253-e **
//# 1/m1 alreadt has the exponent 127, so we have to add 126-e.
//# the exponent might underflow, which we can detect as a sign change.
//# Since the architeture uses flush-to-zero for subnormals, we can
//# give the result 0. then.
//#
//# The 1/m1 term with 0.5
//# 1/m1 = 2.58586*(m1^2) - 5.81818*m1 + 4.24242
//#
//# Next step is to use two iterations of Newton-Raphson algorithm to complete
//# the reciprocal calculation.
//#
//# Final result is achieved by multiplying A with 1/B
//#############################################################################
 
 
 
// R0 exponent and sign "replacement" into TMP0
AND TMP0,R0,P0L          ;
ORR TMP0,TMP0,P1L
SUB TMP5,R0,TMP0 // R0 sign/exponent extraction into TMP5
// Calculate new mantissa
FMADD P1H,TMP0,P0H               ;
                // Calculate new exponent offset 126 - "old exponent"
                SUB P1L,P1L,TMP5
        ldrd P0L , [ R1 , -1 ]
FMADD P0L,TMP0,P1H               ;
                eor P1H,r0,P1L // check for overflow (N-BIT).
                blt .Lret_0
// P0L exponent and sign "replacement"
sub P0L,P0L,TMP5
 
// Newton-Raphson iteration #1
MOV TMP0,P0H             ;
FMSUB P0H,R0,P0L         ;
FMUL  P0L,P0H,P0L        ;
// Newton-Raphson iteration #2
FMSUB TMP0,R0,P0L       ;
FMUL  R0,TMP0,P0L                ;
jr lr
.Lret_0:ldrd P0L , [ R1 , -3 ]
        lsr TMP0,r0,31 ; extract sign
        lsl TMP0,TMP0,31
        add P0L,P0L,r0 ; check for NaN input
        eor P0L,P0L,r0
        movgte r0,TMP0
        jr lr
// Quotient calculation is expected by the caller: FMUL quotient,divident,R0
        ;
        ENDFUNC(__fast_recipsf2)

Line No.	Rev	Author	Line
1	734	jeremybenn	`/* Copyright (C) 2011 Free Software Foundation, Inc.`
2			`Contributed by Embecosm on behalf of Adapteva, Inc.`
3
4			`This file is part of GCC.`
5
6			`GCC is free software; you can redistribute it and/or modify it under`
7			`the terms of the GNU General Public License as published by the Free`
8			`Software Foundation; either version 3, or (at your option) any later`
9			`version.`
10
11			`GCC is distributed in the hope that it will be useful, but WITHOUT ANY`
12			`WARRANTY; without even the implied warranty of MERCHANTABILITY or`
13			`FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License`
14			`for more details.`
15
16			`Under Section 7 of GPL version 3, you are granted additional`
17			`permissions described in the GCC Runtime Library Exception, version`
18			`3.1, as published by the Free Software Foundation.`
19
20			`You should have received a copy of the GNU General Public License and`
21			`a copy of the GCC Runtime Library Exception along with this program;`
22			`see the files COPYING3 and COPYING.RUNTIME respectively. If not, see`
23			`. */`
24
25			`#include "../epiphany-asm.h"`
26
27			`.section _fast_div_text,"a",@progbits;`
28			`.balign 8;`
29			`_fast_div_table:`
30			`.word 0x007fffff// mantissa mask`
31			`.word 0x40257ebb// hold constant a = 2.58586`
32
33			`.word 0x3f000000// hold constant 126 shifted to bits [30:23]`
34			`.word 0xc0ba2e88// hold constant b = -5.81818`
35
36			`.word 0x4087c1e8// hold constant c = 4.24242`
37			`.word 0x40000000// to hold constant 2 for Newton-Raphson iterations`
38
39			`.global SYM(__fast_recipsf2)`
40			`FUNC(__fast_recipsf2)`
41			`SYM(__fast_recipsf2):`
42
43			`//###################`
44			`//# input operands:`
45			`//###################`
46			`// Divisor`
47			`//R0`
48			`// Function address (used with negative offsets to read _fast_div_table)`
49			`//R1`
50			`/* Scratch registers: two single (TMP0/TMP5) and two pairs. */`
51			`#define P0L TMP1`
52			`#define P0H TMP2`
53			`#define P1L TMP3`
54			`#define P1H TMP4`
55
56			`//#########################################`
57			`//# Constants to be used in the algorithm`
58			`//#########################################`
59			`ldrd P0L , [ R1 , -3 ]`
60
61			`ldrd P1L , [ R1 , -2 ]`
62
63
64
65			`//#############################################################################`
66			`//# The Algorithm`
67			`//#`
68			`//# Operation: C=A/B`
69			`//# stage 1 - find the reciprocal 1/B according to the following scheme:`
70			`//# B = (2^E)*m (1`
71			`//# 1/B = 1/((2^E)m) = 1/((2^(E+1))m1) (0.5`
72			`//# = (2^-(E+1))(1/m1) = (2^E1)(1/m1)`
73			`//#`
74			`//# Now we can find the new exponent:`
75			`//# e1 = E1+127 = -E-1+127 = -e+127-1+127 = 253-e **`
76			`//# 1/m1 alreadt has the exponent 127, so we have to add 126-e.`
77			`//# the exponent might underflow, which we can detect as a sign change.`
78			`//# Since the architeture uses flush-to-zero for subnormals, we can`
79			`//# give the result 0. then.`
80			`//#`
81			`//# The 1/m1 term with 0.5`
82			`//# 1/m1 = 2.58586(m1^2) - 5.81818m1 + 4.24242`
83			`//#`
84			`//# Next step is to use two iterations of Newton-Raphson algorithm to complete`
85			`//# the reciprocal calculation.`
86			`//#`
87			`//# Final result is achieved by multiplying A with 1/B`
88			`//#############################################################################`
89
90
91
92			`// R0 exponent and sign "replacement" into TMP0`
93			`AND TMP0,R0,P0L ;`
94			`ORR TMP0,TMP0,P1L`
95			`SUB TMP5,R0,TMP0 // R0 sign/exponent extraction into TMP5`
96			`// Calculate new mantissa`
97			`FMADD P1H,TMP0,P0H ;`
98			`// Calculate new exponent offset 126 - "old exponent"`
99			`SUB P1L,P1L,TMP5`
100			`ldrd P0L , [ R1 , -1 ]`
101			`FMADD P0L,TMP0,P1H ;`
102			`eor P1H,r0,P1L // check for overflow (N-BIT).`
103			`blt .Lret_0`
104			`// P0L exponent and sign "replacement"`
105			`sub P0L,P0L,TMP5`
106
107			`// Newton-Raphson iteration #1`
108			`MOV TMP0,P0H ;`
109			`FMSUB P0H,R0,P0L ;`
110			`FMUL P0L,P0H,P0L ;`
111			`// Newton-Raphson iteration #2`
112			`FMSUB TMP0,R0,P0L ;`
113			`FMUL R0,TMP0,P0L ;`
114			`jr lr`
115			`.Lret_0:ldrd P0L , [ R1 , -3 ]`
116			`lsr TMP0,r0,31 ; extract sign`
117			`lsl TMP0,TMP0,31`
118			`add P0L,P0L,r0 ; check for NaN input`
119			`eor P0L,P0L,r0`
120			`movgte r0,TMP0`
121			`jr lr`
122			`// Quotient calculation is expected by the caller: FMUL quotient,divident,R0`
123			`;`
124			`ENDFUNC(__fast_recipsf2)`

Browse

Tools

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libgcc/] [config/] [epiphany/] [ieee-754/] [fast_div.S] - Blame information for rev 747