OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

[/] [or1k/] [trunk/] [rc203soc/] [sw/] [uClinux/] [arch/] [m68k/] [fpsp040/] [satan.S] - Blame information for rev 1777

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 1623 jcastillo
|
2
|       satan.sa 3.3 12/19/90
3
|
4
|       The entry point satan computes the arctangent of an
5
|       input value. satand does the same except the input value is a
6
|       denormalized number.
7
|
8
|       Input: Double-extended value in memory location pointed to by address
9
|               register a0.
10
|
11
|       Output: Arctan(X) returned in floating-point register Fp0.
12
|
13
|       Accuracy and Monotonicity: The returned result is within 2 ulps in
14
|               64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
15
|               result is subsequently rounded to double precision. The
16
|               result is provably monotonic in double precision.
17
|
18
|       Speed: The program satan takes approximately 160 cycles for input
19
|               argument X such that 1/16 < |X| < 16. For the other arguments,
20
|               the program will run no worse than 10% slower.
21
|
22
|       Algorithm:
23
|       Step 1. If |X| >= 16 or |X| < 1/16, go to Step 5.
24
|
25
|       Step 2. Let X = sgn * 2**k * 1.xxxxxxxx...x. Note that k = -4, -3,..., or 3.
26
|               Define F = sgn * 2**k * 1.xxxx1, i.e. the first 5 significant bits
27
|               of X with a bit-1 attached at the 6-th bit position. Define u
28
|               to be u = (X-F) / (1 + X*F).
29
|
30
|       Step 3. Approximate arctan(u) by a polynomial poly.
31
|
32
|       Step 4. Return arctan(F) + poly, arctan(F) is fetched from a table of values
33
|               calculated beforehand. Exit.
34
|
35
|       Step 5. If |X| >= 16, go to Step 7.
36
|
37
|       Step 6. Approximate arctan(X) by an odd polynomial in X. Exit.
38
|
39
|       Step 7. Define X' = -1/X. Approximate arctan(X') by an odd polynomial in X'.
40
|               Arctan(X) = sign(X)*Pi/2 + arctan(X'). Exit.
41
|
42
 
43
|               Copyright (C) Motorola, Inc. 1990
44
|                       All Rights Reserved
45
|
46
|       THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
47
|       The copyright notice above does not evidence any
48
|       actual or intended publication of such source code.
49
 
50
|satan  idnt    2,1 | Motorola 040 Floating Point Software Package
51
 
52
        |section        8
53
 
54
        .include "fpsp.h"
55
 
56
BOUNDS1:        .long 0x3FFB8000,0x4002FFFF
57
 
58
ONE:    .long 0x3F800000
59
 
60
        .long 0x00000000
61
 
62
ATANA3: .long 0xBFF6687E,0x314987D8
63
ATANA2: .long 0x4002AC69,0x34A26DB3
64
 
65
ATANA1: .long 0xBFC2476F,0x4E1DA28E
66
ATANB6: .long 0x3FB34444,0x7F876989
67
 
68
ATANB5: .long 0xBFB744EE,0x7FAF45DB
69
ATANB4: .long 0x3FBC71C6,0x46940220
70
 
71
ATANB3: .long 0xBFC24924,0x921872F9
72
ATANB2: .long 0x3FC99999,0x99998FA9
73
 
74
ATANB1: .long 0xBFD55555,0x55555555
75
ATANC5: .long 0xBFB70BF3,0x98539E6A
76
 
77
ATANC4: .long 0x3FBC7187,0x962D1D7D
78
ATANC3: .long 0xBFC24924,0x827107B8
79
 
80
ATANC2: .long 0x3FC99999,0x9996263E
81
ATANC1: .long 0xBFD55555,0x55555536
82
 
83
PPIBY2: .long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
84
NPIBY2: .long 0xBFFF0000,0xC90FDAA2,0x2168C235,0x00000000
85
PTINY:  .long 0x00010000,0x80000000,0x00000000,0x00000000
86
NTINY:  .long 0x80010000,0x80000000,0x00000000,0x00000000
87
 
88
ATANTBL:
89
        .long   0x3FFB0000,0x83D152C5,0x060B7A51,0x00000000
90
        .long   0x3FFB0000,0x8BC85445,0x65498B8B,0x00000000
91
        .long   0x3FFB0000,0x93BE4060,0x17626B0D,0x00000000
92
        .long   0x3FFB0000,0x9BB3078D,0x35AEC202,0x00000000
93
        .long   0x3FFB0000,0xA3A69A52,0x5DDCE7DE,0x00000000
94
        .long   0x3FFB0000,0xAB98E943,0x62765619,0x00000000
95
        .long   0x3FFB0000,0xB389E502,0xF9C59862,0x00000000
96
        .long   0x3FFB0000,0xBB797E43,0x6B09E6FB,0x00000000
97
        .long   0x3FFB0000,0xC367A5C7,0x39E5F446,0x00000000
98
        .long   0x3FFB0000,0xCB544C61,0xCFF7D5C6,0x00000000
99
        .long   0x3FFB0000,0xD33F62F8,0x2488533E,0x00000000
100
        .long   0x3FFB0000,0xDB28DA81,0x62404C77,0x00000000
101
        .long   0x3FFB0000,0xE310A407,0x8AD34F18,0x00000000
102
        .long   0x3FFB0000,0xEAF6B0A8,0x188EE1EB,0x00000000
103
        .long   0x3FFB0000,0xF2DAF194,0x9DBE79D5,0x00000000
104
        .long   0x3FFB0000,0xFABD5813,0x61D47E3E,0x00000000
105
        .long   0x3FFC0000,0x8346AC21,0x0959ECC4,0x00000000
106
        .long   0x3FFC0000,0x8B232A08,0x304282D8,0x00000000
107
        .long   0x3FFC0000,0x92FB70B8,0xD29AE2F9,0x00000000
108
        .long   0x3FFC0000,0x9ACF476F,0x5CCD1CB4,0x00000000
109
        .long   0x3FFC0000,0xA29E7630,0x4954F23F,0x00000000
110
        .long   0x3FFC0000,0xAA68C5D0,0x8AB85230,0x00000000
111
        .long   0x3FFC0000,0xB22DFFFD,0x9D539F83,0x00000000
112
        .long   0x3FFC0000,0xB9EDEF45,0x3E900EA5,0x00000000
113
        .long   0x3FFC0000,0xC1A85F1C,0xC75E3EA5,0x00000000
114
        .long   0x3FFC0000,0xC95D1BE8,0x28138DE6,0x00000000
115
        .long   0x3FFC0000,0xD10BF300,0x840D2DE4,0x00000000
116
        .long   0x3FFC0000,0xD8B4B2BA,0x6BC05E7A,0x00000000
117
        .long   0x3FFC0000,0xE0572A6B,0xB42335F6,0x00000000
118
        .long   0x3FFC0000,0xE7F32A70,0xEA9CAA8F,0x00000000
119
        .long   0x3FFC0000,0xEF888432,0x64ECEFAA,0x00000000
120
        .long   0x3FFC0000,0xF7170A28,0xECC06666,0x00000000
121
        .long   0x3FFD0000,0x812FD288,0x332DAD32,0x00000000
122
        .long   0x3FFD0000,0x88A8D1B1,0x218E4D64,0x00000000
123
        .long   0x3FFD0000,0x9012AB3F,0x23E4AEE8,0x00000000
124
        .long   0x3FFD0000,0x976CC3D4,0x11E7F1B9,0x00000000
125
        .long   0x3FFD0000,0x9EB68949,0x3889A227,0x00000000
126
        .long   0x3FFD0000,0xA5EF72C3,0x4487361B,0x00000000
127
        .long   0x3FFD0000,0xAD1700BA,0xF07A7227,0x00000000
128
        .long   0x3FFD0000,0xB42CBCFA,0xFD37EFB7,0x00000000
129
        .long   0x3FFD0000,0xBB303A94,0x0BA80F89,0x00000000
130
        .long   0x3FFD0000,0xC22115C6,0xFCAEBBAF,0x00000000
131
        .long   0x3FFD0000,0xC8FEF3E6,0x86331221,0x00000000
132
        .long   0x3FFD0000,0xCFC98330,0xB4000C70,0x00000000
133
        .long   0x3FFD0000,0xD6807AA1,0x102C5BF9,0x00000000
134
        .long   0x3FFD0000,0xDD2399BC,0x31252AA3,0x00000000
135
        .long   0x3FFD0000,0xE3B2A855,0x6B8FC517,0x00000000
136
        .long   0x3FFD0000,0xEA2D764F,0x64315989,0x00000000
137
        .long   0x3FFD0000,0xF3BF5BF8,0xBAD1A21D,0x00000000
138
        .long   0x3FFE0000,0x801CE39E,0x0D205C9A,0x00000000
139
        .long   0x3FFE0000,0x8630A2DA,0xDA1ED066,0x00000000
140
        .long   0x3FFE0000,0x8C1AD445,0xF3E09B8C,0x00000000
141
        .long   0x3FFE0000,0x91DB8F16,0x64F350E2,0x00000000
142
        .long   0x3FFE0000,0x97731420,0x365E538C,0x00000000
143
        .long   0x3FFE0000,0x9CE1C8E6,0xA0B8CDBA,0x00000000
144
        .long   0x3FFE0000,0xA22832DB,0xCADAAE09,0x00000000
145
        .long   0x3FFE0000,0xA746F2DD,0xB7602294,0x00000000
146
        .long   0x3FFE0000,0xAC3EC0FB,0x997DD6A2,0x00000000
147
        .long   0x3FFE0000,0xB110688A,0xEBDC6F6A,0x00000000
148
        .long   0x3FFE0000,0xB5BCC490,0x59ECC4B0,0x00000000
149
        .long   0x3FFE0000,0xBA44BC7D,0xD470782F,0x00000000
150
        .long   0x3FFE0000,0xBEA94144,0xFD049AAC,0x00000000
151
        .long   0x3FFE0000,0xC2EB4ABB,0x661628B6,0x00000000
152
        .long   0x3FFE0000,0xC70BD54C,0xE602EE14,0x00000000
153
        .long   0x3FFE0000,0xCD000549,0xADEC7159,0x00000000
154
        .long   0x3FFE0000,0xD48457D2,0xD8EA4EA3,0x00000000
155
        .long   0x3FFE0000,0xDB948DA7,0x12DECE3B,0x00000000
156
        .long   0x3FFE0000,0xE23855F9,0x69E8096A,0x00000000
157
        .long   0x3FFE0000,0xE8771129,0xC4353259,0x00000000
158
        .long   0x3FFE0000,0xEE57C16E,0x0D379C0D,0x00000000
159
        .long   0x3FFE0000,0xF3E10211,0xA87C3779,0x00000000
160
        .long   0x3FFE0000,0xF919039D,0x758B8D41,0x00000000
161
        .long   0x3FFE0000,0xFE058B8F,0x64935FB3,0x00000000
162
        .long   0x3FFF0000,0x8155FB49,0x7B685D04,0x00000000
163
        .long   0x3FFF0000,0x83889E35,0x49D108E1,0x00000000
164
        .long   0x3FFF0000,0x859CFA76,0x511D724B,0x00000000
165
        .long   0x3FFF0000,0x87952ECF,0xFF8131E7,0x00000000
166
        .long   0x3FFF0000,0x89732FD1,0x9557641B,0x00000000
167
        .long   0x3FFF0000,0x8B38CAD1,0x01932A35,0x00000000
168
        .long   0x3FFF0000,0x8CE7A8D8,0x301EE6B5,0x00000000
169
        .long   0x3FFF0000,0x8F46A39E,0x2EAE5281,0x00000000
170
        .long   0x3FFF0000,0x922DA7D7,0x91888487,0x00000000
171
        .long   0x3FFF0000,0x94D19FCB,0xDEDF5241,0x00000000
172
        .long   0x3FFF0000,0x973AB944,0x19D2A08B,0x00000000
173
        .long   0x3FFF0000,0x996FF00E,0x08E10B96,0x00000000
174
        .long   0x3FFF0000,0x9B773F95,0x12321DA7,0x00000000
175
        .long   0x3FFF0000,0x9D55CC32,0x0F935624,0x00000000
176
        .long   0x3FFF0000,0x9F100575,0x006CC571,0x00000000
177
        .long   0x3FFF0000,0xA0A9C290,0xD97CC06C,0x00000000
178
        .long   0x3FFF0000,0xA22659EB,0xEBC0630A,0x00000000
179
        .long   0x3FFF0000,0xA388B4AF,0xF6EF0EC9,0x00000000
180
        .long   0x3FFF0000,0xA4D35F10,0x61D292C4,0x00000000
181
        .long   0x3FFF0000,0xA60895DC,0xFBE3187E,0x00000000
182
        .long   0x3FFF0000,0xA72A51DC,0x7367BEAC,0x00000000
183
        .long   0x3FFF0000,0xA83A5153,0x0956168F,0x00000000
184
        .long   0x3FFF0000,0xA93A2007,0x7539546E,0x00000000
185
        .long   0x3FFF0000,0xAA9E7245,0x023B2605,0x00000000
186
        .long   0x3FFF0000,0xAC4C84BA,0x6FE4D58F,0x00000000
187
        .long   0x3FFF0000,0xADCE4A4A,0x606B9712,0x00000000
188
        .long   0x3FFF0000,0xAF2A2DCD,0x8D263C9C,0x00000000
189
        .long   0x3FFF0000,0xB0656F81,0xF22265C7,0x00000000
190
        .long   0x3FFF0000,0xB1846515,0x0F71496A,0x00000000
191
        .long   0x3FFF0000,0xB28AAA15,0x6F9ADA35,0x00000000
192
        .long   0x3FFF0000,0xB37B44FF,0x3766B895,0x00000000
193
        .long   0x3FFF0000,0xB458C3DC,0xE9630433,0x00000000
194
        .long   0x3FFF0000,0xB525529D,0x562246BD,0x00000000
195
        .long   0x3FFF0000,0xB5E2CCA9,0x5F9D88CC,0x00000000
196
        .long   0x3FFF0000,0xB692CADA,0x7ACA1ADA,0x00000000
197
        .long   0x3FFF0000,0xB736AEA7,0xA6925838,0x00000000
198
        .long   0x3FFF0000,0xB7CFAB28,0x7E9F7B36,0x00000000
199
        .long   0x3FFF0000,0xB85ECC66,0xCB219835,0x00000000
200
        .long   0x3FFF0000,0xB8E4FD5A,0x20A593DA,0x00000000
201
        .long   0x3FFF0000,0xB99F41F6,0x4AFF9BB5,0x00000000
202
        .long   0x3FFF0000,0xBA7F1E17,0x842BBE7B,0x00000000
203
        .long   0x3FFF0000,0xBB471285,0x7637E17D,0x00000000
204
        .long   0x3FFF0000,0xBBFABE8A,0x4788DF6F,0x00000000
205
        .long   0x3FFF0000,0xBC9D0FAD,0x2B689D79,0x00000000
206
        .long   0x3FFF0000,0xBD306A39,0x471ECD86,0x00000000
207
        .long   0x3FFF0000,0xBDB6C731,0x856AF18A,0x00000000
208
        .long   0x3FFF0000,0xBE31CAC5,0x02E80D70,0x00000000
209
        .long   0x3FFF0000,0xBEA2D55C,0xE33194E2,0x00000000
210
        .long   0x3FFF0000,0xBF0B10B7,0xC03128F0,0x00000000
211
        .long   0x3FFF0000,0xBF6B7A18,0xDACB778D,0x00000000
212
        .long   0x3FFF0000,0xBFC4EA46,0x63FA18F6,0x00000000
213
        .long   0x3FFF0000,0xC0181BDE,0x8B89A454,0x00000000
214
        .long   0x3FFF0000,0xC065B066,0xCFBF6439,0x00000000
215
        .long   0x3FFF0000,0xC0AE345F,0x56340AE6,0x00000000
216
        .long   0x3FFF0000,0xC0F22291,0x9CB9E6A7,0x00000000
217
 
218
        .set    X,FP_SCR1
219
        .set    XDCARE,X+2
220
        .set    XFRAC,X+4
221
        .set    XFRACLO,X+8
222
 
223
        .set    ATANF,FP_SCR2
224
        .set    ATANFHI,ATANF+4
225
        .set    ATANFLO,ATANF+8
226
 
227
 
228
        | xref  t_frcinx
229
        |xref   t_extdnrm
230
 
231
        .global satand
232
satand:
233
|--ENTRY POINT FOR ATAN(X) FOR DENORMALIZED ARGUMENT
234
 
235
        bra             t_extdnrm
236
 
237
        .global satan
238
satan:
239
|--ENTRY POINT FOR ATAN(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
240
 
241
        fmovex          (%a0),%fp0      | ...LOAD INPUT
242
 
243
        movel           (%a0),%d0
244
        movew           4(%a0),%d0
245
        fmovex          %fp0,X(%a6)
246
        andil           #0x7FFFFFFF,%d0
247
 
248
        cmpil           #0x3FFB8000,%d0         | ...|X| >= 1/16?
249
        bges            ATANOK1
250
        bra             ATANSM
251
 
252
ATANOK1:
253
        cmpil           #0x4002FFFF,%d0         | ...|X| < 16 ?
254
        bles            ATANMAIN
255
        bra             ATANBIG
256
 
257
 
258
|--THE MOST LIKELY CASE, |X| IN [1/16, 16). WE USE TABLE TECHNIQUE
259
|--THE IDEA IS ATAN(X) = ATAN(F) + ATAN( [X-F] / [1+XF] ).
260
|--SO IF F IS CHOSEN TO BE CLOSE TO X AND ATAN(F) IS STORED IN
261
|--A TABLE, ALL WE NEED IS TO APPROXIMATE ATAN(U) WHERE
262
|--U = (X-F)/(1+XF) IS SMALL (REMEMBER F IS CLOSE TO X). IT IS
263
|--TRUE THAT A DIVIDE IS NOW NEEDED, BUT THE APPROXIMATION FOR
264
|--ATAN(U) IS A VERY SHORT POLYNOMIAL AND THE INDEXING TO
265
|--FETCH F AND SAVING OF REGISTERS CAN BE ALL HIDED UNDER THE
266
|--DIVIDE. IN THE END THIS METHOD IS MUCH FASTER THAN A TRADITIONAL
267
|--ONE. NOTE ALSO THAT THE TRADITIONAL SCHEME THAT APPROXIMATE
268
|--ATAN(X) DIRECTLY WILL NEED TO USE A RATIONAL APPROXIMATION
269
|--(DIVISION NEEDED) ANYWAY BECAUSE A POLYNOMIAL APPROXIMATION
270
|--WILL INVOLVE A VERY LONG POLYNOMIAL.
271
 
272
|--NOW WE SEE X AS +-2^K * 1.BBBBBBB....B <- 1. + 63 BITS
273
|--WE CHOSE F TO BE +-2^K * 1.BBBB1
274
|--THAT IS IT MATCHES THE EXPONENT AND FIRST 5 BITS OF X, THE
275
|--SIXTH BITS IS SET TO BE 1. SINCE K = -4, -3, ..., 3, THERE
276
|--ARE ONLY 8 TIMES 16 = 2^7 = 128 |F|'S. SINCE ATAN(-|F|) IS
277
|-- -ATAN(|F|), WE NEED TO STORE ONLY ATAN(|F|).
278
 
279
ATANMAIN:
280
 
281
        movew           #0x0000,XDCARE(%a6)     | ...CLEAN UP X JUST IN CASE
282
        andil           #0xF8000000,XFRAC(%a6)  | ...FIRST 5 BITS
283
        oril            #0x04000000,XFRAC(%a6)  | ...SET 6-TH BIT TO 1
284
        movel           #0x00000000,XFRACLO(%a6)        | ...LOCATION OF X IS NOW F
285
 
286
        fmovex          %fp0,%fp1                       | ...FP1 IS X
287
        fmulx           X(%a6),%fp1             | ...FP1 IS X*F, NOTE THAT X*F > 0
288
        fsubx           X(%a6),%fp0             | ...FP0 IS X-F
289
        fadds           #0x3F800000,%fp1                | ...FP1 IS 1 + X*F
290
        fdivx           %fp1,%fp0                       | ...FP0 IS U = (X-F)/(1+X*F)
291
 
292
|--WHILE THE DIVISION IS TAKING ITS TIME, WE FETCH ATAN(|F|)
293
|--CREATE ATAN(F) AND STORE IT IN ATANF, AND
294
|--SAVE REGISTERS FP2.
295
 
296
        movel           %d2,-(%a7)      | ...SAVE d2 TEMPORARILY
297
        movel           %d0,%d2         | ...THE EXPO AND 16 BITS OF X
298
        andil           #0x00007800,%d0 | ...4 VARYING BITS OF F'S FRACTION
299
        andil           #0x7FFF0000,%d2 | ...EXPONENT OF F
300
        subil           #0x3FFB0000,%d2 | ...K+4
301
        asrl            #1,%d2
302
        addl            %d2,%d0         | ...THE 7 BITS IDENTIFYING F
303
        asrl            #7,%d0          | ...INDEX INTO TBL OF ATAN(|F|)
304
        lea             ATANTBL,%a1
305
        addal           %d0,%a1         | ...ADDRESS OF ATAN(|F|)
306
        movel           (%a1)+,ATANF(%a6)
307
        movel           (%a1)+,ATANFHI(%a6)
308
        movel           (%a1)+,ATANFLO(%a6)     | ...ATANF IS NOW ATAN(|F|)
309
        movel           X(%a6),%d0              | ...LOAD SIGN AND EXPO. AGAIN
310
        andil           #0x80000000,%d0 | ...SIGN(F)
311
        orl             %d0,ATANF(%a6)  | ...ATANF IS NOW SIGN(F)*ATAN(|F|)
312
        movel           (%a7)+,%d2      | ...RESTORE d2
313
 
314
|--THAT'S ALL I HAVE TO DO FOR NOW,
315
|--BUT ALAS, THE DIVIDE IS STILL CRANKING!
316
 
317
|--U IN FP0, WE ARE NOW READY TO COMPUTE ATAN(U) AS
318
|--U + A1*U*V*(A2 + V*(A3 + V)), V = U*U
319
|--THE POLYNOMIAL MAY LOOK STRANGE, BUT IS NEVERTHELESS CORRECT.
320
|--THE NATURAL FORM IS U + U*V*(A1 + V*(A2 + V*A3))
321
|--WHAT WE HAVE HERE IS MERELY  A1 = A3, A2 = A1/A3, A3 = A2/A3.
322
|--THE REASON FOR THIS REARRANGEMENT IS TO MAKE THE INDEPENDENT
323
|--PARTS A1*U*V AND (A2 + ... STUFF) MORE LOAD-BALANCED
324
 
325
 
326
        fmovex          %fp0,%fp1
327
        fmulx           %fp1,%fp1
328
        fmoved          ATANA3,%fp2
329
        faddx           %fp1,%fp2               | ...A3+V
330
        fmulx           %fp1,%fp2               | ...V*(A3+V)
331
        fmulx           %fp0,%fp1               | ...U*V
332
        faddd           ATANA2,%fp2     | ...A2+V*(A3+V)
333
        fmuld           ATANA1,%fp1     | ...A1*U*V
334
        fmulx           %fp2,%fp1               | ...A1*U*V*(A2+V*(A3+V))
335
 
336
        faddx           %fp1,%fp0               | ...ATAN(U), FP1 RELEASED
337
        fmovel          %d1,%FPCR               |restore users exceptions
338
        faddx           ATANF(%a6),%fp0 | ...ATAN(X)
339
        bra             t_frcinx
340
 
341
ATANBORS:
342
|--|X| IS IN d0 IN COMPACT FORM. FP1, d0 SAVED.
343
|--FP0 IS X AND |X| <= 1/16 OR |X| >= 16.
344
        cmpil           #0x3FFF8000,%d0
345
        bgt             ATANBIG | ...I.E. |X| >= 16
346
 
347
ATANSM:
348
|--|X| <= 1/16
349
|--IF |X| < 2^(-40), RETURN X AS ANSWER. OTHERWISE, APPROXIMATE
350
|--ATAN(X) BY X + X*Y*(B1+Y*(B2+Y*(B3+Y*(B4+Y*(B5+Y*B6)))))
351
|--WHICH IS X + X*Y*( [B1+Z*(B3+Z*B5)] + [Y*(B2+Z*(B4+Z*B6)] )
352
|--WHERE Y = X*X, AND Z = Y*Y.
353
 
354
        cmpil           #0x3FD78000,%d0
355
        blt             ATANTINY
356
|--COMPUTE POLYNOMIAL
357
        fmulx           %fp0,%fp0       | ...FP0 IS Y = X*X
358
 
359
 
360
        movew           #0x0000,XDCARE(%a6)
361
 
362
        fmovex          %fp0,%fp1
363
        fmulx           %fp1,%fp1               | ...FP1 IS Z = Y*Y
364
 
365
        fmoved          ATANB6,%fp2
366
        fmoved          ATANB5,%fp3
367
 
368
        fmulx           %fp1,%fp2               | ...Z*B6
369
        fmulx           %fp1,%fp3               | ...Z*B5
370
 
371
        faddd           ATANB4,%fp2     | ...B4+Z*B6
372
        faddd           ATANB3,%fp3     | ...B3+Z*B5
373
 
374
        fmulx           %fp1,%fp2               | ...Z*(B4+Z*B6)
375
        fmulx           %fp3,%fp1               | ...Z*(B3+Z*B5)
376
 
377
        faddd           ATANB2,%fp2     | ...B2+Z*(B4+Z*B6)
378
        faddd           ATANB1,%fp1     | ...B1+Z*(B3+Z*B5)
379
 
380
        fmulx           %fp0,%fp2               | ...Y*(B2+Z*(B4+Z*B6))
381
        fmulx           X(%a6),%fp0             | ...X*Y
382
 
383
        faddx           %fp2,%fp1               | ...[B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]
384
 
385
 
386
        fmulx           %fp1,%fp0       | ...X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))])
387
 
388
        fmovel          %d1,%FPCR               |restore users exceptions
389
        faddx           X(%a6),%fp0
390
 
391
        bra             t_frcinx
392
 
393
ATANTINY:
394
|--|X| < 2^(-40), ATAN(X) = X
395
        movew           #0x0000,XDCARE(%a6)
396
 
397
        fmovel          %d1,%FPCR               |restore users exceptions
398
        fmovex          X(%a6),%fp0     |last inst - possible exception set
399
 
400
        bra             t_frcinx
401
 
402
ATANBIG:
403
|--IF |X| > 2^(100), RETURN     SIGN(X)*(PI/2 - TINY). OTHERWISE,
404
|--RETURN SIGN(X)*PI/2 + ATAN(-1/X).
405
        cmpil           #0x40638000,%d0
406
        bgt             ATANHUGE
407
 
408
|--APPROXIMATE ATAN(-1/X) BY
409
|--X'+X'*Y*(C1+Y*(C2+Y*(C3+Y*(C4+Y*C5)))), X' = -1/X, Y = X'*X'
410
|--THIS CAN BE RE-WRITTEN AS
411
|--X'+X'*Y*( [C1+Z*(C3+Z*C5)] + [Y*(C2+Z*C4)] ), Z = Y*Y.
412
 
413
        fmoves          #0xBF800000,%fp1        | ...LOAD -1
414
        fdivx           %fp0,%fp1               | ...FP1 IS -1/X
415
 
416
 
417
|--DIVIDE IS STILL CRANKING
418
 
419
        fmovex          %fp1,%fp0               | ...FP0 IS X'
420
        fmulx           %fp0,%fp0               | ...FP0 IS Y = X'*X'
421
        fmovex          %fp1,X(%a6)             | ...X IS REALLY X'
422
 
423
        fmovex          %fp0,%fp1
424
        fmulx           %fp1,%fp1               | ...FP1 IS Z = Y*Y
425
 
426
        fmoved          ATANC5,%fp3
427
        fmoved          ATANC4,%fp2
428
 
429
        fmulx           %fp1,%fp3               | ...Z*C5
430
        fmulx           %fp1,%fp2               | ...Z*B4
431
 
432
        faddd           ATANC3,%fp3     | ...C3+Z*C5
433
        faddd           ATANC2,%fp2     | ...C2+Z*C4
434
 
435
        fmulx           %fp3,%fp1               | ...Z*(C3+Z*C5), FP3 RELEASED
436
        fmulx           %fp0,%fp2               | ...Y*(C2+Z*C4)
437
 
438
        faddd           ATANC1,%fp1     | ...C1+Z*(C3+Z*C5)
439
        fmulx           X(%a6),%fp0             | ...X'*Y
440
 
441
        faddx           %fp2,%fp1               | ...[Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)]
442
 
443
 
444
        fmulx           %fp1,%fp0               | ...X'*Y*([B1+Z*(B3+Z*B5)]
445
|                                       ...     +[Y*(B2+Z*(B4+Z*B6))])
446
        faddx           X(%a6),%fp0
447
 
448
        fmovel          %d1,%FPCR               |restore users exceptions
449
 
450
        btstb           #7,(%a0)
451
        beqs            pos_big
452
 
453
neg_big:
454
        faddx           NPIBY2,%fp0
455
        bra             t_frcinx
456
 
457
pos_big:
458
        faddx           PPIBY2,%fp0
459
        bra             t_frcinx
460
 
461
ATANHUGE:
462
|--RETURN SIGN(X)*(PIBY2 - TINY) = SIGN(X)*PIBY2 - SIGN(X)*TINY
463
        btstb           #7,(%a0)
464
        beqs            pos_huge
465
 
466
neg_huge:
467
        fmovex          NPIBY2,%fp0
468
        fmovel          %d1,%fpcr
469
        fsubx           NTINY,%fp0
470
        bra             t_frcinx
471
 
472
pos_huge:
473
        fmovex          PPIBY2,%fp0
474
        fmovel          %d1,%fpcr
475
        fsubx           PTINY,%fp0
476
        bra             t_frcinx
477
 
478
        |end

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.