OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

[/] [or1k/] [trunk/] [rtems/] [c/] [src/] [lib/] [libcpu/] [m68k/] [m68040/] [fpsp/] [satan.S] - Blame information for rev 208

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 158 chris
//
2 208 chris
//      $Id: satan.S,v 1.2 2001-09-27 12:01:22 chris Exp $
3 158 chris
//
4
//      satan.sa 3.3 12/19/90
5
//
6
//      The entry point satan computes the arctangent of an
7
//      input value. satand does the same except the input value is a
8
//      denormalized number.
9
//
10
//      Input: Double-extended value in memory location pointed to by address
11
//              register a0.
12
//
13
//      Output: Arctan(X) returned in floating-point register Fp0.
14
//
15
//      Accuracy and Monotonicity: The returned result is within 2 ulps in
16
//              64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
17
//              result is subsequently rounded to double precision. The
18
//              result is provably monotonic in double precision.
19
//
20
//      Speed: The program satan takes approximately 160 cycles for input
21
//              argument X such that 1/16 < |X| < 16. For the other arguments,
22
//              the program will run no worse than 10% slower.
23
//
24
//      Algorithm:
25
//      Step 1. If |X| >= 16 or |X| < 1/16, go to Step 5.
26
//
27
//      Step 2. Let X = sgn * 2**k * 1.xxxxxxxx...x. Note that k = -4, -3,..., or 3.
28
//              Define F = sgn * 2**k * 1.xxxx1, i.e. the first 5 significant bits
29
//              of X with a bit-1 attached at the 6-th bit position. Define u
30
//              to be u = (X-F) / (1 + X*F).
31
//
32
//      Step 3. Approximate arctan(u) by a polynomial poly.
33
//
34
//      Step 4. Return arctan(F) + poly, arctan(F) is fetched from a table of values
35
//              calculated beforehand. Exit.
36
//
37
//      Step 5. If |X| >= 16, go to Step 7.
38
//
39
//      Step 6. Approximate arctan(X) by an odd polynomial in X. Exit.
40
//
41
//      Step 7. Define X' = -1/X. Approximate arctan(X') by an odd polynomial in X'.
42
//              Arctan(X) = sign(X)*Pi/2 + arctan(X'). Exit.
43
//
44
 
45
//              Copyright (C) Motorola, Inc. 1990
46
//                      All Rights Reserved
47
//
48
//      THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
49
//      The copyright notice above does not evidence any
50
//      actual or intended publication of such source code.
51
 
52
//satan idnt    2,1 | Motorola 040 Floating Point Software Package
53
 
54
        |section        8
55
 
56
#include "fpsp.defs"
57
 
58
BOUNDS1:        .long 0x3FFB8000,0x4002FFFF
59
 
60
ONE:    .long 0x3F800000
61
 
62
        .long 0x00000000
63
 
64
ATANA3: .long 0xBFF6687E,0x314987D8
65
ATANA2: .long 0x4002AC69,0x34A26DB3
66
 
67
ATANA1: .long 0xBFC2476F,0x4E1DA28E
68
ATANB6: .long 0x3FB34444,0x7F876989
69
 
70
ATANB5: .long 0xBFB744EE,0x7FAF45DB
71
ATANB4: .long 0x3FBC71C6,0x46940220
72
 
73
ATANB3: .long 0xBFC24924,0x921872F9
74
ATANB2: .long 0x3FC99999,0x99998FA9
75
 
76
ATANB1: .long 0xBFD55555,0x55555555
77
ATANC5: .long 0xBFB70BF3,0x98539E6A
78
 
79
ATANC4: .long 0x3FBC7187,0x962D1D7D
80
ATANC3: .long 0xBFC24924,0x827107B8
81
 
82
ATANC2: .long 0x3FC99999,0x9996263E
83
ATANC1: .long 0xBFD55555,0x55555536
84
 
85
PPIBY2: .long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
86
NPIBY2: .long 0xBFFF0000,0xC90FDAA2,0x2168C235,0x00000000
87
PTINY:  .long 0x00010000,0x80000000,0x00000000,0x00000000
88
NTINY:  .long 0x80010000,0x80000000,0x00000000,0x00000000
89
 
90
ATANTBL:
91
        .long   0x3FFB0000,0x83D152C5,0x060B7A51,0x00000000
92
        .long   0x3FFB0000,0x8BC85445,0x65498B8B,0x00000000
93
        .long   0x3FFB0000,0x93BE4060,0x17626B0D,0x00000000
94
        .long   0x3FFB0000,0x9BB3078D,0x35AEC202,0x00000000
95
        .long   0x3FFB0000,0xA3A69A52,0x5DDCE7DE,0x00000000
96
        .long   0x3FFB0000,0xAB98E943,0x62765619,0x00000000
97
        .long   0x3FFB0000,0xB389E502,0xF9C59862,0x00000000
98
        .long   0x3FFB0000,0xBB797E43,0x6B09E6FB,0x00000000
99
        .long   0x3FFB0000,0xC367A5C7,0x39E5F446,0x00000000
100
        .long   0x3FFB0000,0xCB544C61,0xCFF7D5C6,0x00000000
101
        .long   0x3FFB0000,0xD33F62F8,0x2488533E,0x00000000
102
        .long   0x3FFB0000,0xDB28DA81,0x62404C77,0x00000000
103
        .long   0x3FFB0000,0xE310A407,0x8AD34F18,0x00000000
104
        .long   0x3FFB0000,0xEAF6B0A8,0x188EE1EB,0x00000000
105
        .long   0x3FFB0000,0xF2DAF194,0x9DBE79D5,0x00000000
106
        .long   0x3FFB0000,0xFABD5813,0x61D47E3E,0x00000000
107
        .long   0x3FFC0000,0x8346AC21,0x0959ECC4,0x00000000
108
        .long   0x3FFC0000,0x8B232A08,0x304282D8,0x00000000
109
        .long   0x3FFC0000,0x92FB70B8,0xD29AE2F9,0x00000000
110
        .long   0x3FFC0000,0x9ACF476F,0x5CCD1CB4,0x00000000
111
        .long   0x3FFC0000,0xA29E7630,0x4954F23F,0x00000000
112
        .long   0x3FFC0000,0xAA68C5D0,0x8AB85230,0x00000000
113
        .long   0x3FFC0000,0xB22DFFFD,0x9D539F83,0x00000000
114
        .long   0x3FFC0000,0xB9EDEF45,0x3E900EA5,0x00000000
115
        .long   0x3FFC0000,0xC1A85F1C,0xC75E3EA5,0x00000000
116
        .long   0x3FFC0000,0xC95D1BE8,0x28138DE6,0x00000000
117
        .long   0x3FFC0000,0xD10BF300,0x840D2DE4,0x00000000
118
        .long   0x3FFC0000,0xD8B4B2BA,0x6BC05E7A,0x00000000
119
        .long   0x3FFC0000,0xE0572A6B,0xB42335F6,0x00000000
120
        .long   0x3FFC0000,0xE7F32A70,0xEA9CAA8F,0x00000000
121
        .long   0x3FFC0000,0xEF888432,0x64ECEFAA,0x00000000
122
        .long   0x3FFC0000,0xF7170A28,0xECC06666,0x00000000
123
        .long   0x3FFD0000,0x812FD288,0x332DAD32,0x00000000
124
        .long   0x3FFD0000,0x88A8D1B1,0x218E4D64,0x00000000
125
        .long   0x3FFD0000,0x9012AB3F,0x23E4AEE8,0x00000000
126
        .long   0x3FFD0000,0x976CC3D4,0x11E7F1B9,0x00000000
127
        .long   0x3FFD0000,0x9EB68949,0x3889A227,0x00000000
128
        .long   0x3FFD0000,0xA5EF72C3,0x4487361B,0x00000000
129
        .long   0x3FFD0000,0xAD1700BA,0xF07A7227,0x00000000
130
        .long   0x3FFD0000,0xB42CBCFA,0xFD37EFB7,0x00000000
131
        .long   0x3FFD0000,0xBB303A94,0x0BA80F89,0x00000000
132
        .long   0x3FFD0000,0xC22115C6,0xFCAEBBAF,0x00000000
133
        .long   0x3FFD0000,0xC8FEF3E6,0x86331221,0x00000000
134
        .long   0x3FFD0000,0xCFC98330,0xB4000C70,0x00000000
135
        .long   0x3FFD0000,0xD6807AA1,0x102C5BF9,0x00000000
136
        .long   0x3FFD0000,0xDD2399BC,0x31252AA3,0x00000000
137
        .long   0x3FFD0000,0xE3B2A855,0x6B8FC517,0x00000000
138
        .long   0x3FFD0000,0xEA2D764F,0x64315989,0x00000000
139
        .long   0x3FFD0000,0xF3BF5BF8,0xBAD1A21D,0x00000000
140
        .long   0x3FFE0000,0x801CE39E,0x0D205C9A,0x00000000
141
        .long   0x3FFE0000,0x8630A2DA,0xDA1ED066,0x00000000
142
        .long   0x3FFE0000,0x8C1AD445,0xF3E09B8C,0x00000000
143
        .long   0x3FFE0000,0x91DB8F16,0x64F350E2,0x00000000
144
        .long   0x3FFE0000,0x97731420,0x365E538C,0x00000000
145
        .long   0x3FFE0000,0x9CE1C8E6,0xA0B8CDBA,0x00000000
146
        .long   0x3FFE0000,0xA22832DB,0xCADAAE09,0x00000000
147
        .long   0x3FFE0000,0xA746F2DD,0xB7602294,0x00000000
148
        .long   0x3FFE0000,0xAC3EC0FB,0x997DD6A2,0x00000000
149
        .long   0x3FFE0000,0xB110688A,0xEBDC6F6A,0x00000000
150
        .long   0x3FFE0000,0xB5BCC490,0x59ECC4B0,0x00000000
151
        .long   0x3FFE0000,0xBA44BC7D,0xD470782F,0x00000000
152
        .long   0x3FFE0000,0xBEA94144,0xFD049AAC,0x00000000
153
        .long   0x3FFE0000,0xC2EB4ABB,0x661628B6,0x00000000
154
        .long   0x3FFE0000,0xC70BD54C,0xE602EE14,0x00000000
155
        .long   0x3FFE0000,0xCD000549,0xADEC7159,0x00000000
156
        .long   0x3FFE0000,0xD48457D2,0xD8EA4EA3,0x00000000
157
        .long   0x3FFE0000,0xDB948DA7,0x12DECE3B,0x00000000
158
        .long   0x3FFE0000,0xE23855F9,0x69E8096A,0x00000000
159
        .long   0x3FFE0000,0xE8771129,0xC4353259,0x00000000
160
        .long   0x3FFE0000,0xEE57C16E,0x0D379C0D,0x00000000
161
        .long   0x3FFE0000,0xF3E10211,0xA87C3779,0x00000000
162
        .long   0x3FFE0000,0xF919039D,0x758B8D41,0x00000000
163
        .long   0x3FFE0000,0xFE058B8F,0x64935FB3,0x00000000
164
        .long   0x3FFF0000,0x8155FB49,0x7B685D04,0x00000000
165
        .long   0x3FFF0000,0x83889E35,0x49D108E1,0x00000000
166
        .long   0x3FFF0000,0x859CFA76,0x511D724B,0x00000000
167
        .long   0x3FFF0000,0x87952ECF,0xFF8131E7,0x00000000
168
        .long   0x3FFF0000,0x89732FD1,0x9557641B,0x00000000
169
        .long   0x3FFF0000,0x8B38CAD1,0x01932A35,0x00000000
170
        .long   0x3FFF0000,0x8CE7A8D8,0x301EE6B5,0x00000000
171
        .long   0x3FFF0000,0x8F46A39E,0x2EAE5281,0x00000000
172
        .long   0x3FFF0000,0x922DA7D7,0x91888487,0x00000000
173
        .long   0x3FFF0000,0x94D19FCB,0xDEDF5241,0x00000000
174
        .long   0x3FFF0000,0x973AB944,0x19D2A08B,0x00000000
175
        .long   0x3FFF0000,0x996FF00E,0x08E10B96,0x00000000
176
        .long   0x3FFF0000,0x9B773F95,0x12321DA7,0x00000000
177
        .long   0x3FFF0000,0x9D55CC32,0x0F935624,0x00000000
178
        .long   0x3FFF0000,0x9F100575,0x006CC571,0x00000000
179
        .long   0x3FFF0000,0xA0A9C290,0xD97CC06C,0x00000000
180
        .long   0x3FFF0000,0xA22659EB,0xEBC0630A,0x00000000
181
        .long   0x3FFF0000,0xA388B4AF,0xF6EF0EC9,0x00000000
182
        .long   0x3FFF0000,0xA4D35F10,0x61D292C4,0x00000000
183
        .long   0x3FFF0000,0xA60895DC,0xFBE3187E,0x00000000
184
        .long   0x3FFF0000,0xA72A51DC,0x7367BEAC,0x00000000
185
        .long   0x3FFF0000,0xA83A5153,0x0956168F,0x00000000
186
        .long   0x3FFF0000,0xA93A2007,0x7539546E,0x00000000
187
        .long   0x3FFF0000,0xAA9E7245,0x023B2605,0x00000000
188
        .long   0x3FFF0000,0xAC4C84BA,0x6FE4D58F,0x00000000
189
        .long   0x3FFF0000,0xADCE4A4A,0x606B9712,0x00000000
190
        .long   0x3FFF0000,0xAF2A2DCD,0x8D263C9C,0x00000000
191
        .long   0x3FFF0000,0xB0656F81,0xF22265C7,0x00000000
192
        .long   0x3FFF0000,0xB1846515,0x0F71496A,0x00000000
193
        .long   0x3FFF0000,0xB28AAA15,0x6F9ADA35,0x00000000
194
        .long   0x3FFF0000,0xB37B44FF,0x3766B895,0x00000000
195
        .long   0x3FFF0000,0xB458C3DC,0xE9630433,0x00000000
196
        .long   0x3FFF0000,0xB525529D,0x562246BD,0x00000000
197
        .long   0x3FFF0000,0xB5E2CCA9,0x5F9D88CC,0x00000000
198
        .long   0x3FFF0000,0xB692CADA,0x7ACA1ADA,0x00000000
199
        .long   0x3FFF0000,0xB736AEA7,0xA6925838,0x00000000
200
        .long   0x3FFF0000,0xB7CFAB28,0x7E9F7B36,0x00000000
201
        .long   0x3FFF0000,0xB85ECC66,0xCB219835,0x00000000
202
        .long   0x3FFF0000,0xB8E4FD5A,0x20A593DA,0x00000000
203
        .long   0x3FFF0000,0xB99F41F6,0x4AFF9BB5,0x00000000
204
        .long   0x3FFF0000,0xBA7F1E17,0x842BBE7B,0x00000000
205
        .long   0x3FFF0000,0xBB471285,0x7637E17D,0x00000000
206
        .long   0x3FFF0000,0xBBFABE8A,0x4788DF6F,0x00000000
207
        .long   0x3FFF0000,0xBC9D0FAD,0x2B689D79,0x00000000
208
        .long   0x3FFF0000,0xBD306A39,0x471ECD86,0x00000000
209
        .long   0x3FFF0000,0xBDB6C731,0x856AF18A,0x00000000
210
        .long   0x3FFF0000,0xBE31CAC5,0x02E80D70,0x00000000
211
        .long   0x3FFF0000,0xBEA2D55C,0xE33194E2,0x00000000
212
        .long   0x3FFF0000,0xBF0B10B7,0xC03128F0,0x00000000
213
        .long   0x3FFF0000,0xBF6B7A18,0xDACB778D,0x00000000
214
        .long   0x3FFF0000,0xBFC4EA46,0x63FA18F6,0x00000000
215
        .long   0x3FFF0000,0xC0181BDE,0x8B89A454,0x00000000
216
        .long   0x3FFF0000,0xC065B066,0xCFBF6439,0x00000000
217
        .long   0x3FFF0000,0xC0AE345F,0x56340AE6,0x00000000
218
        .long   0x3FFF0000,0xC0F22291,0x9CB9E6A7,0x00000000
219
 
220
        .set    X,FP_SCR1
221
        .set    XDCARE,X+2
222
        .set    XFRAC,X+4
223
        .set    XFRACLO,X+8
224
 
225
        .set    ATANF,FP_SCR2
226
        .set    ATANFHI,ATANF+4
227
        .set    ATANFLO,ATANF+8
228
 
229
 
230
        | xref  t_frcinx
231
        |xref   t_extdnrm
232
 
233
        .global satand
234
satand:
235
//--ENTRY POINT FOR ATAN(X) FOR DENORMALIZED ARGUMENT
236
 
237
        bra             t_extdnrm
238
 
239
        .global satan
240
satan:
241
//--ENTRY POINT FOR ATAN(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
242
 
243
        fmovex          (%a0),%fp0      // ...LOAD INPUT
244
 
245
        movel           (%a0),%d0
246
        movew           4(%a0),%d0
247
        fmovex          %fp0,X(%a6)
248
        andil           #0x7FFFFFFF,%d0
249
 
250
        cmpil           #0x3FFB8000,%d0         // ...|X| >= 1/16?
251
        bges            ATANOK1
252
        bra             ATANSM
253
 
254
ATANOK1:
255
        cmpil           #0x4002FFFF,%d0         // ...|X| < 16 ?
256
        bles            ATANMAIN
257
        bra             ATANBIG
258
 
259
 
260
//--THE MOST LIKELY CASE, |X| IN [1/16, 16). WE USE TABLE TECHNIQUE
261
//--THE IDEA IS ATAN(X) = ATAN(F) + ATAN( [X-F] / [1+XF] ).
262
//--SO IF F IS CHOSEN TO BE CLOSE TO X AND ATAN(F) IS STORED IN
263
//--A TABLE, ALL WE NEED IS TO APPROXIMATE ATAN(U) WHERE
264
//--U = (X-F)/(1+XF) IS SMALL (REMEMBER F IS CLOSE TO X). IT IS
265
//--TRUE THAT A DIVIDE IS NOW NEEDED, BUT THE APPROXIMATION FOR
266
//--ATAN(U) IS A VERY SHORT POLYNOMIAL AND THE INDEXING TO
267
//--FETCH F AND SAVING OF REGISTERS CAN BE ALL HIDED UNDER THE
268
//--DIVIDE. IN THE END THIS METHOD IS MUCH FASTER THAN A TRADITIONAL
269
//--ONE. NOTE ALSO THAT THE TRADITIONAL SCHEME THAT APPROXIMATE
270
//--ATAN(X) DIRECTLY WILL NEED TO USE A RATIONAL APPROXIMATION
271
//--(DIVISION NEEDED) ANYWAY BECAUSE A POLYNOMIAL APPROXIMATION
272
//--WILL INVOLVE A VERY LONG POLYNOMIAL.
273
 
274
//--NOW WE SEE X AS +-2^K * 1.BBBBBBB....B <- 1. + 63 BITS
275
//--WE CHOSE F TO BE +-2^K * 1.BBBB1
276
//--THAT IS IT MATCHES THE EXPONENT AND FIRST 5 BITS OF X, THE
277
//--SIXTH BITS IS SET TO BE 1. SINCE K = -4, -3, ..., 3, THERE
278
//--ARE ONLY 8 TIMES 16 = 2^7 = 128 |F|'S. SINCE ATAN(-|F|) IS
279
//-- -ATAN(|F|), WE NEED TO STORE ONLY ATAN(|F|).
280
 
281
ATANMAIN:
282
 
283
        movew           #0x0000,XDCARE(%a6)     // ...CLEAN UP X JUST IN CASE
284
        andil           #0xF8000000,XFRAC(%a6)  // ...FIRST 5 BITS
285
        oril            #0x04000000,XFRAC(%a6)  // ...SET 6-TH BIT TO 1
286
        movel           #0x00000000,XFRACLO(%a6)        // ...LOCATION OF X IS NOW F
287
 
288
        fmovex          %fp0,%fp1                       // ...FP1 IS X
289
        fmulx           X(%a6),%fp1             // ...FP1 IS X*F, NOTE THAT X*F > 0
290
        fsubx           X(%a6),%fp0             // ...FP0 IS X-F
291
        fadds           #0x3F800000,%fp1                // ...FP1 IS 1 + X*F
292
        fdivx           %fp1,%fp0                       // ...FP0 IS U = (X-F)/(1+X*F)
293
 
294
//--WHILE THE DIVISION IS TAKING ITS TIME, WE FETCH ATAN(|F|)
295
//--CREATE ATAN(F) AND STORE IT IN ATANF, AND
296
//--SAVE REGISTERS FP2.
297
 
298
        movel           %d2,-(%a7)      // ...SAVE d2 TEMPORARILY
299
        movel           %d0,%d2         // ...THE EXPO AND 16 BITS OF X
300
        andil           #0x00007800,%d0 // ...4 VARYING BITS OF F'S FRACTION
301
        andil           #0x7FFF0000,%d2 // ...EXPONENT OF F
302
        subil           #0x3FFB0000,%d2 // ...K+4
303
        asrl            #1,%d2
304
        addl            %d2,%d0         // ...THE 7 BITS IDENTIFYING F
305
        asrl            #7,%d0          // ...INDEX INTO TBL OF ATAN(|F|)
306
        lea             ATANTBL,%a1
307
        addal           %d0,%a1         // ...ADDRESS OF ATAN(|F|)
308
        movel           (%a1)+,ATANF(%a6)
309
        movel           (%a1)+,ATANFHI(%a6)
310
        movel           (%a1)+,ATANFLO(%a6)     // ...ATANF IS NOW ATAN(|F|)
311
        movel           X(%a6),%d0              // ...LOAD SIGN AND EXPO. AGAIN
312
        andil           #0x80000000,%d0 // ...SIGN(F)
313
        orl             %d0,ATANF(%a6)  // ...ATANF IS NOW SIGN(F)*ATAN(|F|)
314
        movel           (%a7)+,%d2      // ...RESTORE d2
315
 
316
//--THAT'S ALL I HAVE TO DO FOR NOW,
317
//--BUT ALAS, THE DIVIDE IS STILL CRANKING!
318
 
319
//--U IN FP0, WE ARE NOW READY TO COMPUTE ATAN(U) AS
320
//--U + A1*U*V*(A2 + V*(A3 + V)), V = U*U
321
//--THE POLYNOMIAL MAY LOOK STRANGE, BUT IS NEVERTHELESS CORRECT.
322
//--THE NATURAL FORM IS U + U*V*(A1 + V*(A2 + V*A3))
323
//--WHAT WE HAVE HERE IS MERELY A1 = A3, A2 = A1/A3, A3 = A2/A3.
324
//--THE REASON FOR THIS REARRANGEMENT IS TO MAKE THE INDEPENDENT
325
//--PARTS A1*U*V AND (A2 + ... STUFF) MORE LOAD-BALANCED
326
 
327
 
328
        fmovex          %fp0,%fp1
329
        fmulx           %fp1,%fp1
330
        fmoved          ATANA3,%fp2
331
        faddx           %fp1,%fp2               // ...A3+V
332
        fmulx           %fp1,%fp2               // ...V*(A3+V)
333
        fmulx           %fp0,%fp1               // ...U*V
334
        faddd           ATANA2,%fp2     // ...A2+V*(A3+V)
335
        fmuld           ATANA1,%fp1     // ...A1*U*V
336
        fmulx           %fp2,%fp1               // ...A1*U*V*(A2+V*(A3+V))
337
 
338
        faddx           %fp1,%fp0               // ...ATAN(U), FP1 RELEASED
339
        fmovel          %d1,%FPCR               //restore users exceptions
340
        faddx           ATANF(%a6),%fp0 // ...ATAN(X)
341
        bra             t_frcinx
342
 
343
ATANBORS:
344
//--|X| IS IN d0 IN COMPACT FORM. FP1, d0 SAVED.
345
//--FP0 IS X AND |X| <= 1/16 OR |X| >= 16.
346
        cmpil           #0x3FFF8000,%d0
347
        bgt             ATANBIG // ...I.E. |X| >= 16
348
 
349
ATANSM:
350
//--|X| <= 1/16
351
//--IF |X| < 2^(-40), RETURN X AS ANSWER. OTHERWISE, APPROXIMATE
352
//--ATAN(X) BY X + X*Y*(B1+Y*(B2+Y*(B3+Y*(B4+Y*(B5+Y*B6)))))
353
//--WHICH IS X + X*Y*( [B1+Z*(B3+Z*B5)] + [Y*(B2+Z*(B4+Z*B6)] )
354
//--WHERE Y = X*X, AND Z = Y*Y.
355
 
356
        cmpil           #0x3FD78000,%d0
357
        blt             ATANTINY
358
//--COMPUTE POLYNOMIAL
359
        fmulx           %fp0,%fp0       // ...FP0 IS Y = X*X
360
 
361
 
362
        movew           #0x0000,XDCARE(%a6)
363
 
364
        fmovex          %fp0,%fp1
365
        fmulx           %fp1,%fp1               // ...FP1 IS Z = Y*Y
366
 
367
        fmoved          ATANB6,%fp2
368
        fmoved          ATANB5,%fp3
369
 
370
        fmulx           %fp1,%fp2               // ...Z*B6
371
        fmulx           %fp1,%fp3               // ...Z*B5
372
 
373
        faddd           ATANB4,%fp2     // ...B4+Z*B6
374
        faddd           ATANB3,%fp3     // ...B3+Z*B5
375
 
376
        fmulx           %fp1,%fp2               // ...Z*(B4+Z*B6)
377
        fmulx           %fp3,%fp1               // ...Z*(B3+Z*B5)
378
 
379
        faddd           ATANB2,%fp2     // ...B2+Z*(B4+Z*B6)
380
        faddd           ATANB1,%fp1     // ...B1+Z*(B3+Z*B5)
381
 
382
        fmulx           %fp0,%fp2               // ...Y*(B2+Z*(B4+Z*B6))
383
        fmulx           X(%a6),%fp0             // ...X*Y
384
 
385
        faddx           %fp2,%fp1               // ...[B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]
386
 
387
 
388
        fmulx           %fp1,%fp0       // ...X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))])
389
 
390
        fmovel          %d1,%FPCR               //restore users exceptions
391
        faddx           X(%a6),%fp0
392
 
393
        bra             t_frcinx
394
 
395
ATANTINY:
396
//--|X| < 2^(-40), ATAN(X) = X
397
        movew           #0x0000,XDCARE(%a6)
398
 
399
        fmovel          %d1,%FPCR               //restore users exceptions
400
        fmovex          X(%a6),%fp0     //last inst - possible exception set
401
 
402
        bra             t_frcinx
403
 
404
ATANBIG:
405
//--IF |X| > 2^(100), RETURN    SIGN(X)*(PI/2 - TINY). OTHERWISE,
406
//--RETURN SIGN(X)*PI/2 + ATAN(-1/X).
407
        cmpil           #0x40638000,%d0
408
        bgt             ATANHUGE
409
 
410
//--APPROXIMATE ATAN(-1/X) BY
411
//--X'+X'*Y*(C1+Y*(C2+Y*(C3+Y*(C4+Y*C5)))), X' = -1/X, Y = X'*X'
412
//--THIS CAN BE RE-WRITTEN AS
413
//--X'+X'*Y*( [C1+Z*(C3+Z*C5)] + [Y*(C2+Z*C4)] ), Z = Y*Y.
414
 
415
        fmoves          #0xBF800000,%fp1        // ...LOAD -1
416
        fdivx           %fp0,%fp1               // ...FP1 IS -1/X
417
 
418
 
419
//--DIVIDE IS STILL CRANKING
420
 
421
        fmovex          %fp1,%fp0               // ...FP0 IS X'
422
        fmulx           %fp0,%fp0               // ...FP0 IS Y = X'*X'
423
        fmovex          %fp1,X(%a6)             // ...X IS REALLY X'
424
 
425
        fmovex          %fp0,%fp1
426
        fmulx           %fp1,%fp1               // ...FP1 IS Z = Y*Y
427
 
428
        fmoved          ATANC5,%fp3
429
        fmoved          ATANC4,%fp2
430
 
431
        fmulx           %fp1,%fp3               // ...Z*C5
432
        fmulx           %fp1,%fp2               // ...Z*B4
433
 
434
        faddd           ATANC3,%fp3     // ...C3+Z*C5
435
        faddd           ATANC2,%fp2     // ...C2+Z*C4
436
 
437
        fmulx           %fp3,%fp1               // ...Z*(C3+Z*C5), FP3 RELEASED
438
        fmulx           %fp0,%fp2               // ...Y*(C2+Z*C4)
439
 
440
        faddd           ATANC1,%fp1     // ...C1+Z*(C3+Z*C5)
441
        fmulx           X(%a6),%fp0             // ...X'*Y
442
 
443
        faddx           %fp2,%fp1               // ...[Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)]
444
 
445
 
446
        fmulx           %fp1,%fp0               // ...X'*Y*([B1+Z*(B3+Z*B5)]
447
//                                      ...     +[Y*(B2+Z*(B4+Z*B6))])
448
        faddx           X(%a6),%fp0
449
 
450
        fmovel          %d1,%FPCR               //restore users exceptions
451
 
452
        btstb           #7,(%a0)
453
        beqs            pos_big
454
 
455
neg_big:
456
        faddx           NPIBY2,%fp0
457
        bra             t_frcinx
458
 
459
pos_big:
460
        faddx           PPIBY2,%fp0
461
        bra             t_frcinx
462
 
463
ATANHUGE:
464
//--RETURN SIGN(X)*(PIBY2 - TINY) = SIGN(X)*PIBY2 - SIGN(X)*TINY
465
        btstb           #7,(%a0)
466
        beqs            pos_huge
467
 
468
neg_huge:
469
        fmovex          NPIBY2,%fp0
470
        fmovel          %d1,%fpcr
471
        fsubx           NTINY,%fp0
472
        bra             t_frcinx
473
 
474
pos_huge:
475
        fmovex          PPIBY2,%fp0
476
        fmovel          %d1,%fpcr
477
        fsubx           PTINY,%fp0
478
        bra             t_frcinx
479
 
480
        |end

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.