1 |
158 |
chris |
//
|
2 |
208 |
chris |
// $Id: satan.S,v 1.2 2001-09-27 12:01:22 chris Exp $
|
3 |
158 |
chris |
//
|
4 |
|
|
// satan.sa 3.3 12/19/90
|
5 |
|
|
//
|
6 |
|
|
// The entry point satan computes the arctangent of an
|
7 |
|
|
// input value. satand does the same except the input value is a
|
8 |
|
|
// denormalized number.
|
9 |
|
|
//
|
10 |
|
|
// Input: Double-extended value in memory location pointed to by address
|
11 |
|
|
// register a0.
|
12 |
|
|
//
|
13 |
|
|
// Output: Arctan(X) returned in floating-point register Fp0.
|
14 |
|
|
//
|
15 |
|
|
// Accuracy and Monotonicity: The returned result is within 2 ulps in
|
16 |
|
|
// 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
|
17 |
|
|
// result is subsequently rounded to double precision. The
|
18 |
|
|
// result is provably monotonic in double precision.
|
19 |
|
|
//
|
20 |
|
|
// Speed: The program satan takes approximately 160 cycles for input
|
21 |
|
|
// argument X such that 1/16 < |X| < 16. For the other arguments,
|
22 |
|
|
// the program will run no worse than 10% slower.
|
23 |
|
|
//
|
24 |
|
|
// Algorithm:
|
25 |
|
|
// Step 1. If |X| >= 16 or |X| < 1/16, go to Step 5.
|
26 |
|
|
//
|
27 |
|
|
// Step 2. Let X = sgn * 2**k * 1.xxxxxxxx...x. Note that k = -4, -3,..., or 3.
|
28 |
|
|
// Define F = sgn * 2**k * 1.xxxx1, i.e. the first 5 significant bits
|
29 |
|
|
// of X with a bit-1 attached at the 6-th bit position. Define u
|
30 |
|
|
// to be u = (X-F) / (1 + X*F).
|
31 |
|
|
//
|
32 |
|
|
// Step 3. Approximate arctan(u) by a polynomial poly.
|
33 |
|
|
//
|
34 |
|
|
// Step 4. Return arctan(F) + poly, arctan(F) is fetched from a table of values
|
35 |
|
|
// calculated beforehand. Exit.
|
36 |
|
|
//
|
37 |
|
|
// Step 5. If |X| >= 16, go to Step 7.
|
38 |
|
|
//
|
39 |
|
|
// Step 6. Approximate arctan(X) by an odd polynomial in X. Exit.
|
40 |
|
|
//
|
41 |
|
|
// Step 7. Define X' = -1/X. Approximate arctan(X') by an odd polynomial in X'.
|
42 |
|
|
// Arctan(X) = sign(X)*Pi/2 + arctan(X'). Exit.
|
43 |
|
|
//
|
44 |
|
|
|
45 |
|
|
// Copyright (C) Motorola, Inc. 1990
|
46 |
|
|
// All Rights Reserved
|
47 |
|
|
//
|
48 |
|
|
// THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
|
49 |
|
|
// The copyright notice above does not evidence any
|
50 |
|
|
// actual or intended publication of such source code.
|
51 |
|
|
|
52 |
|
|
//satan idnt 2,1 | Motorola 040 Floating Point Software Package
|
53 |
|
|
|
54 |
|
|
|section 8
|
55 |
|
|
|
56 |
|
|
#include "fpsp.defs"
|
57 |
|
|
|
58 |
|
|
BOUNDS1: .long 0x3FFB8000,0x4002FFFF
|
59 |
|
|
|
60 |
|
|
ONE: .long 0x3F800000
|
61 |
|
|
|
62 |
|
|
.long 0x00000000
|
63 |
|
|
|
64 |
|
|
ATANA3: .long 0xBFF6687E,0x314987D8
|
65 |
|
|
ATANA2: .long 0x4002AC69,0x34A26DB3
|
66 |
|
|
|
67 |
|
|
ATANA1: .long 0xBFC2476F,0x4E1DA28E
|
68 |
|
|
ATANB6: .long 0x3FB34444,0x7F876989
|
69 |
|
|
|
70 |
|
|
ATANB5: .long 0xBFB744EE,0x7FAF45DB
|
71 |
|
|
ATANB4: .long 0x3FBC71C6,0x46940220
|
72 |
|
|
|
73 |
|
|
ATANB3: .long 0xBFC24924,0x921872F9
|
74 |
|
|
ATANB2: .long 0x3FC99999,0x99998FA9
|
75 |
|
|
|
76 |
|
|
ATANB1: .long 0xBFD55555,0x55555555
|
77 |
|
|
ATANC5: .long 0xBFB70BF3,0x98539E6A
|
78 |
|
|
|
79 |
|
|
ATANC4: .long 0x3FBC7187,0x962D1D7D
|
80 |
|
|
ATANC3: .long 0xBFC24924,0x827107B8
|
81 |
|
|
|
82 |
|
|
ATANC2: .long 0x3FC99999,0x9996263E
|
83 |
|
|
ATANC1: .long 0xBFD55555,0x55555536
|
84 |
|
|
|
85 |
|
|
PPIBY2: .long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
|
86 |
|
|
NPIBY2: .long 0xBFFF0000,0xC90FDAA2,0x2168C235,0x00000000
|
87 |
|
|
PTINY: .long 0x00010000,0x80000000,0x00000000,0x00000000
|
88 |
|
|
NTINY: .long 0x80010000,0x80000000,0x00000000,0x00000000
|
89 |
|
|
|
90 |
|
|
ATANTBL:
|
91 |
|
|
.long 0x3FFB0000,0x83D152C5,0x060B7A51,0x00000000
|
92 |
|
|
.long 0x3FFB0000,0x8BC85445,0x65498B8B,0x00000000
|
93 |
|
|
.long 0x3FFB0000,0x93BE4060,0x17626B0D,0x00000000
|
94 |
|
|
.long 0x3FFB0000,0x9BB3078D,0x35AEC202,0x00000000
|
95 |
|
|
.long 0x3FFB0000,0xA3A69A52,0x5DDCE7DE,0x00000000
|
96 |
|
|
.long 0x3FFB0000,0xAB98E943,0x62765619,0x00000000
|
97 |
|
|
.long 0x3FFB0000,0xB389E502,0xF9C59862,0x00000000
|
98 |
|
|
.long 0x3FFB0000,0xBB797E43,0x6B09E6FB,0x00000000
|
99 |
|
|
.long 0x3FFB0000,0xC367A5C7,0x39E5F446,0x00000000
|
100 |
|
|
.long 0x3FFB0000,0xCB544C61,0xCFF7D5C6,0x00000000
|
101 |
|
|
.long 0x3FFB0000,0xD33F62F8,0x2488533E,0x00000000
|
102 |
|
|
.long 0x3FFB0000,0xDB28DA81,0x62404C77,0x00000000
|
103 |
|
|
.long 0x3FFB0000,0xE310A407,0x8AD34F18,0x00000000
|
104 |
|
|
.long 0x3FFB0000,0xEAF6B0A8,0x188EE1EB,0x00000000
|
105 |
|
|
.long 0x3FFB0000,0xF2DAF194,0x9DBE79D5,0x00000000
|
106 |
|
|
.long 0x3FFB0000,0xFABD5813,0x61D47E3E,0x00000000
|
107 |
|
|
.long 0x3FFC0000,0x8346AC21,0x0959ECC4,0x00000000
|
108 |
|
|
.long 0x3FFC0000,0x8B232A08,0x304282D8,0x00000000
|
109 |
|
|
.long 0x3FFC0000,0x92FB70B8,0xD29AE2F9,0x00000000
|
110 |
|
|
.long 0x3FFC0000,0x9ACF476F,0x5CCD1CB4,0x00000000
|
111 |
|
|
.long 0x3FFC0000,0xA29E7630,0x4954F23F,0x00000000
|
112 |
|
|
.long 0x3FFC0000,0xAA68C5D0,0x8AB85230,0x00000000
|
113 |
|
|
.long 0x3FFC0000,0xB22DFFFD,0x9D539F83,0x00000000
|
114 |
|
|
.long 0x3FFC0000,0xB9EDEF45,0x3E900EA5,0x00000000
|
115 |
|
|
.long 0x3FFC0000,0xC1A85F1C,0xC75E3EA5,0x00000000
|
116 |
|
|
.long 0x3FFC0000,0xC95D1BE8,0x28138DE6,0x00000000
|
117 |
|
|
.long 0x3FFC0000,0xD10BF300,0x840D2DE4,0x00000000
|
118 |
|
|
.long 0x3FFC0000,0xD8B4B2BA,0x6BC05E7A,0x00000000
|
119 |
|
|
.long 0x3FFC0000,0xE0572A6B,0xB42335F6,0x00000000
|
120 |
|
|
.long 0x3FFC0000,0xE7F32A70,0xEA9CAA8F,0x00000000
|
121 |
|
|
.long 0x3FFC0000,0xEF888432,0x64ECEFAA,0x00000000
|
122 |
|
|
.long 0x3FFC0000,0xF7170A28,0xECC06666,0x00000000
|
123 |
|
|
.long 0x3FFD0000,0x812FD288,0x332DAD32,0x00000000
|
124 |
|
|
.long 0x3FFD0000,0x88A8D1B1,0x218E4D64,0x00000000
|
125 |
|
|
.long 0x3FFD0000,0x9012AB3F,0x23E4AEE8,0x00000000
|
126 |
|
|
.long 0x3FFD0000,0x976CC3D4,0x11E7F1B9,0x00000000
|
127 |
|
|
.long 0x3FFD0000,0x9EB68949,0x3889A227,0x00000000
|
128 |
|
|
.long 0x3FFD0000,0xA5EF72C3,0x4487361B,0x00000000
|
129 |
|
|
.long 0x3FFD0000,0xAD1700BA,0xF07A7227,0x00000000
|
130 |
|
|
.long 0x3FFD0000,0xB42CBCFA,0xFD37EFB7,0x00000000
|
131 |
|
|
.long 0x3FFD0000,0xBB303A94,0x0BA80F89,0x00000000
|
132 |
|
|
.long 0x3FFD0000,0xC22115C6,0xFCAEBBAF,0x00000000
|
133 |
|
|
.long 0x3FFD0000,0xC8FEF3E6,0x86331221,0x00000000
|
134 |
|
|
.long 0x3FFD0000,0xCFC98330,0xB4000C70,0x00000000
|
135 |
|
|
.long 0x3FFD0000,0xD6807AA1,0x102C5BF9,0x00000000
|
136 |
|
|
.long 0x3FFD0000,0xDD2399BC,0x31252AA3,0x00000000
|
137 |
|
|
.long 0x3FFD0000,0xE3B2A855,0x6B8FC517,0x00000000
|
138 |
|
|
.long 0x3FFD0000,0xEA2D764F,0x64315989,0x00000000
|
139 |
|
|
.long 0x3FFD0000,0xF3BF5BF8,0xBAD1A21D,0x00000000
|
140 |
|
|
.long 0x3FFE0000,0x801CE39E,0x0D205C9A,0x00000000
|
141 |
|
|
.long 0x3FFE0000,0x8630A2DA,0xDA1ED066,0x00000000
|
142 |
|
|
.long 0x3FFE0000,0x8C1AD445,0xF3E09B8C,0x00000000
|
143 |
|
|
.long 0x3FFE0000,0x91DB8F16,0x64F350E2,0x00000000
|
144 |
|
|
.long 0x3FFE0000,0x97731420,0x365E538C,0x00000000
|
145 |
|
|
.long 0x3FFE0000,0x9CE1C8E6,0xA0B8CDBA,0x00000000
|
146 |
|
|
.long 0x3FFE0000,0xA22832DB,0xCADAAE09,0x00000000
|
147 |
|
|
.long 0x3FFE0000,0xA746F2DD,0xB7602294,0x00000000
|
148 |
|
|
.long 0x3FFE0000,0xAC3EC0FB,0x997DD6A2,0x00000000
|
149 |
|
|
.long 0x3FFE0000,0xB110688A,0xEBDC6F6A,0x00000000
|
150 |
|
|
.long 0x3FFE0000,0xB5BCC490,0x59ECC4B0,0x00000000
|
151 |
|
|
.long 0x3FFE0000,0xBA44BC7D,0xD470782F,0x00000000
|
152 |
|
|
.long 0x3FFE0000,0xBEA94144,0xFD049AAC,0x00000000
|
153 |
|
|
.long 0x3FFE0000,0xC2EB4ABB,0x661628B6,0x00000000
|
154 |
|
|
.long 0x3FFE0000,0xC70BD54C,0xE602EE14,0x00000000
|
155 |
|
|
.long 0x3FFE0000,0xCD000549,0xADEC7159,0x00000000
|
156 |
|
|
.long 0x3FFE0000,0xD48457D2,0xD8EA4EA3,0x00000000
|
157 |
|
|
.long 0x3FFE0000,0xDB948DA7,0x12DECE3B,0x00000000
|
158 |
|
|
.long 0x3FFE0000,0xE23855F9,0x69E8096A,0x00000000
|
159 |
|
|
.long 0x3FFE0000,0xE8771129,0xC4353259,0x00000000
|
160 |
|
|
.long 0x3FFE0000,0xEE57C16E,0x0D379C0D,0x00000000
|
161 |
|
|
.long 0x3FFE0000,0xF3E10211,0xA87C3779,0x00000000
|
162 |
|
|
.long 0x3FFE0000,0xF919039D,0x758B8D41,0x00000000
|
163 |
|
|
.long 0x3FFE0000,0xFE058B8F,0x64935FB3,0x00000000
|
164 |
|
|
.long 0x3FFF0000,0x8155FB49,0x7B685D04,0x00000000
|
165 |
|
|
.long 0x3FFF0000,0x83889E35,0x49D108E1,0x00000000
|
166 |
|
|
.long 0x3FFF0000,0x859CFA76,0x511D724B,0x00000000
|
167 |
|
|
.long 0x3FFF0000,0x87952ECF,0xFF8131E7,0x00000000
|
168 |
|
|
.long 0x3FFF0000,0x89732FD1,0x9557641B,0x00000000
|
169 |
|
|
.long 0x3FFF0000,0x8B38CAD1,0x01932A35,0x00000000
|
170 |
|
|
.long 0x3FFF0000,0x8CE7A8D8,0x301EE6B5,0x00000000
|
171 |
|
|
.long 0x3FFF0000,0x8F46A39E,0x2EAE5281,0x00000000
|
172 |
|
|
.long 0x3FFF0000,0x922DA7D7,0x91888487,0x00000000
|
173 |
|
|
.long 0x3FFF0000,0x94D19FCB,0xDEDF5241,0x00000000
|
174 |
|
|
.long 0x3FFF0000,0x973AB944,0x19D2A08B,0x00000000
|
175 |
|
|
.long 0x3FFF0000,0x996FF00E,0x08E10B96,0x00000000
|
176 |
|
|
.long 0x3FFF0000,0x9B773F95,0x12321DA7,0x00000000
|
177 |
|
|
.long 0x3FFF0000,0x9D55CC32,0x0F935624,0x00000000
|
178 |
|
|
.long 0x3FFF0000,0x9F100575,0x006CC571,0x00000000
|
179 |
|
|
.long 0x3FFF0000,0xA0A9C290,0xD97CC06C,0x00000000
|
180 |
|
|
.long 0x3FFF0000,0xA22659EB,0xEBC0630A,0x00000000
|
181 |
|
|
.long 0x3FFF0000,0xA388B4AF,0xF6EF0EC9,0x00000000
|
182 |
|
|
.long 0x3FFF0000,0xA4D35F10,0x61D292C4,0x00000000
|
183 |
|
|
.long 0x3FFF0000,0xA60895DC,0xFBE3187E,0x00000000
|
184 |
|
|
.long 0x3FFF0000,0xA72A51DC,0x7367BEAC,0x00000000
|
185 |
|
|
.long 0x3FFF0000,0xA83A5153,0x0956168F,0x00000000
|
186 |
|
|
.long 0x3FFF0000,0xA93A2007,0x7539546E,0x00000000
|
187 |
|
|
.long 0x3FFF0000,0xAA9E7245,0x023B2605,0x00000000
|
188 |
|
|
.long 0x3FFF0000,0xAC4C84BA,0x6FE4D58F,0x00000000
|
189 |
|
|
.long 0x3FFF0000,0xADCE4A4A,0x606B9712,0x00000000
|
190 |
|
|
.long 0x3FFF0000,0xAF2A2DCD,0x8D263C9C,0x00000000
|
191 |
|
|
.long 0x3FFF0000,0xB0656F81,0xF22265C7,0x00000000
|
192 |
|
|
.long 0x3FFF0000,0xB1846515,0x0F71496A,0x00000000
|
193 |
|
|
.long 0x3FFF0000,0xB28AAA15,0x6F9ADA35,0x00000000
|
194 |
|
|
.long 0x3FFF0000,0xB37B44FF,0x3766B895,0x00000000
|
195 |
|
|
.long 0x3FFF0000,0xB458C3DC,0xE9630433,0x00000000
|
196 |
|
|
.long 0x3FFF0000,0xB525529D,0x562246BD,0x00000000
|
197 |
|
|
.long 0x3FFF0000,0xB5E2CCA9,0x5F9D88CC,0x00000000
|
198 |
|
|
.long 0x3FFF0000,0xB692CADA,0x7ACA1ADA,0x00000000
|
199 |
|
|
.long 0x3FFF0000,0xB736AEA7,0xA6925838,0x00000000
|
200 |
|
|
.long 0x3FFF0000,0xB7CFAB28,0x7E9F7B36,0x00000000
|
201 |
|
|
.long 0x3FFF0000,0xB85ECC66,0xCB219835,0x00000000
|
202 |
|
|
.long 0x3FFF0000,0xB8E4FD5A,0x20A593DA,0x00000000
|
203 |
|
|
.long 0x3FFF0000,0xB99F41F6,0x4AFF9BB5,0x00000000
|
204 |
|
|
.long 0x3FFF0000,0xBA7F1E17,0x842BBE7B,0x00000000
|
205 |
|
|
.long 0x3FFF0000,0xBB471285,0x7637E17D,0x00000000
|
206 |
|
|
.long 0x3FFF0000,0xBBFABE8A,0x4788DF6F,0x00000000
|
207 |
|
|
.long 0x3FFF0000,0xBC9D0FAD,0x2B689D79,0x00000000
|
208 |
|
|
.long 0x3FFF0000,0xBD306A39,0x471ECD86,0x00000000
|
209 |
|
|
.long 0x3FFF0000,0xBDB6C731,0x856AF18A,0x00000000
|
210 |
|
|
.long 0x3FFF0000,0xBE31CAC5,0x02E80D70,0x00000000
|
211 |
|
|
.long 0x3FFF0000,0xBEA2D55C,0xE33194E2,0x00000000
|
212 |
|
|
.long 0x3FFF0000,0xBF0B10B7,0xC03128F0,0x00000000
|
213 |
|
|
.long 0x3FFF0000,0xBF6B7A18,0xDACB778D,0x00000000
|
214 |
|
|
.long 0x3FFF0000,0xBFC4EA46,0x63FA18F6,0x00000000
|
215 |
|
|
.long 0x3FFF0000,0xC0181BDE,0x8B89A454,0x00000000
|
216 |
|
|
.long 0x3FFF0000,0xC065B066,0xCFBF6439,0x00000000
|
217 |
|
|
.long 0x3FFF0000,0xC0AE345F,0x56340AE6,0x00000000
|
218 |
|
|
.long 0x3FFF0000,0xC0F22291,0x9CB9E6A7,0x00000000
|
219 |
|
|
|
220 |
|
|
.set X,FP_SCR1
|
221 |
|
|
.set XDCARE,X+2
|
222 |
|
|
.set XFRAC,X+4
|
223 |
|
|
.set XFRACLO,X+8
|
224 |
|
|
|
225 |
|
|
.set ATANF,FP_SCR2
|
226 |
|
|
.set ATANFHI,ATANF+4
|
227 |
|
|
.set ATANFLO,ATANF+8
|
228 |
|
|
|
229 |
|
|
|
230 |
|
|
| xref t_frcinx
|
231 |
|
|
|xref t_extdnrm
|
232 |
|
|
|
233 |
|
|
.global satand
|
234 |
|
|
satand:
|
235 |
|
|
//--ENTRY POINT FOR ATAN(X) FOR DENORMALIZED ARGUMENT
|
236 |
|
|
|
237 |
|
|
bra t_extdnrm
|
238 |
|
|
|
239 |
|
|
.global satan
|
240 |
|
|
satan:
|
241 |
|
|
//--ENTRY POINT FOR ATAN(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
|
242 |
|
|
|
243 |
|
|
fmovex (%a0),%fp0 // ...LOAD INPUT
|
244 |
|
|
|
245 |
|
|
movel (%a0),%d0
|
246 |
|
|
movew 4(%a0),%d0
|
247 |
|
|
fmovex %fp0,X(%a6)
|
248 |
|
|
andil #0x7FFFFFFF,%d0
|
249 |
|
|
|
250 |
|
|
cmpil #0x3FFB8000,%d0 // ...|X| >= 1/16?
|
251 |
|
|
bges ATANOK1
|
252 |
|
|
bra ATANSM
|
253 |
|
|
|
254 |
|
|
ATANOK1:
|
255 |
|
|
cmpil #0x4002FFFF,%d0 // ...|X| < 16 ?
|
256 |
|
|
bles ATANMAIN
|
257 |
|
|
bra ATANBIG
|
258 |
|
|
|
259 |
|
|
|
260 |
|
|
//--THE MOST LIKELY CASE, |X| IN [1/16, 16). WE USE TABLE TECHNIQUE
|
261 |
|
|
//--THE IDEA IS ATAN(X) = ATAN(F) + ATAN( [X-F] / [1+XF] ).
|
262 |
|
|
//--SO IF F IS CHOSEN TO BE CLOSE TO X AND ATAN(F) IS STORED IN
|
263 |
|
|
//--A TABLE, ALL WE NEED IS TO APPROXIMATE ATAN(U) WHERE
|
264 |
|
|
//--U = (X-F)/(1+XF) IS SMALL (REMEMBER F IS CLOSE TO X). IT IS
|
265 |
|
|
//--TRUE THAT A DIVIDE IS NOW NEEDED, BUT THE APPROXIMATION FOR
|
266 |
|
|
//--ATAN(U) IS A VERY SHORT POLYNOMIAL AND THE INDEXING TO
|
267 |
|
|
//--FETCH F AND SAVING OF REGISTERS CAN BE ALL HIDED UNDER THE
|
268 |
|
|
//--DIVIDE. IN THE END THIS METHOD IS MUCH FASTER THAN A TRADITIONAL
|
269 |
|
|
//--ONE. NOTE ALSO THAT THE TRADITIONAL SCHEME THAT APPROXIMATE
|
270 |
|
|
//--ATAN(X) DIRECTLY WILL NEED TO USE A RATIONAL APPROXIMATION
|
271 |
|
|
//--(DIVISION NEEDED) ANYWAY BECAUSE A POLYNOMIAL APPROXIMATION
|
272 |
|
|
//--WILL INVOLVE A VERY LONG POLYNOMIAL.
|
273 |
|
|
|
274 |
|
|
//--NOW WE SEE X AS +-2^K * 1.BBBBBBB....B <- 1. + 63 BITS
|
275 |
|
|
//--WE CHOSE F TO BE +-2^K * 1.BBBB1
|
276 |
|
|
//--THAT IS IT MATCHES THE EXPONENT AND FIRST 5 BITS OF X, THE
|
277 |
|
|
//--SIXTH BITS IS SET TO BE 1. SINCE K = -4, -3, ..., 3, THERE
|
278 |
|
|
//--ARE ONLY 8 TIMES 16 = 2^7 = 128 |F|'S. SINCE ATAN(-|F|) IS
|
279 |
|
|
//-- -ATAN(|F|), WE NEED TO STORE ONLY ATAN(|F|).
|
280 |
|
|
|
281 |
|
|
ATANMAIN:
|
282 |
|
|
|
283 |
|
|
movew #0x0000,XDCARE(%a6) // ...CLEAN UP X JUST IN CASE
|
284 |
|
|
andil #0xF8000000,XFRAC(%a6) // ...FIRST 5 BITS
|
285 |
|
|
oril #0x04000000,XFRAC(%a6) // ...SET 6-TH BIT TO 1
|
286 |
|
|
movel #0x00000000,XFRACLO(%a6) // ...LOCATION OF X IS NOW F
|
287 |
|
|
|
288 |
|
|
fmovex %fp0,%fp1 // ...FP1 IS X
|
289 |
|
|
fmulx X(%a6),%fp1 // ...FP1 IS X*F, NOTE THAT X*F > 0
|
290 |
|
|
fsubx X(%a6),%fp0 // ...FP0 IS X-F
|
291 |
|
|
fadds #0x3F800000,%fp1 // ...FP1 IS 1 + X*F
|
292 |
|
|
fdivx %fp1,%fp0 // ...FP0 IS U = (X-F)/(1+X*F)
|
293 |
|
|
|
294 |
|
|
//--WHILE THE DIVISION IS TAKING ITS TIME, WE FETCH ATAN(|F|)
|
295 |
|
|
//--CREATE ATAN(F) AND STORE IT IN ATANF, AND
|
296 |
|
|
//--SAVE REGISTERS FP2.
|
297 |
|
|
|
298 |
|
|
movel %d2,-(%a7) // ...SAVE d2 TEMPORARILY
|
299 |
|
|
movel %d0,%d2 // ...THE EXPO AND 16 BITS OF X
|
300 |
|
|
andil #0x00007800,%d0 // ...4 VARYING BITS OF F'S FRACTION
|
301 |
|
|
andil #0x7FFF0000,%d2 // ...EXPONENT OF F
|
302 |
|
|
subil #0x3FFB0000,%d2 // ...K+4
|
303 |
|
|
asrl #1,%d2
|
304 |
|
|
addl %d2,%d0 // ...THE 7 BITS IDENTIFYING F
|
305 |
|
|
asrl #7,%d0 // ...INDEX INTO TBL OF ATAN(|F|)
|
306 |
|
|
lea ATANTBL,%a1
|
307 |
|
|
addal %d0,%a1 // ...ADDRESS OF ATAN(|F|)
|
308 |
|
|
movel (%a1)+,ATANF(%a6)
|
309 |
|
|
movel (%a1)+,ATANFHI(%a6)
|
310 |
|
|
movel (%a1)+,ATANFLO(%a6) // ...ATANF IS NOW ATAN(|F|)
|
311 |
|
|
movel X(%a6),%d0 // ...LOAD SIGN AND EXPO. AGAIN
|
312 |
|
|
andil #0x80000000,%d0 // ...SIGN(F)
|
313 |
|
|
orl %d0,ATANF(%a6) // ...ATANF IS NOW SIGN(F)*ATAN(|F|)
|
314 |
|
|
movel (%a7)+,%d2 // ...RESTORE d2
|
315 |
|
|
|
316 |
|
|
//--THAT'S ALL I HAVE TO DO FOR NOW,
|
317 |
|
|
//--BUT ALAS, THE DIVIDE IS STILL CRANKING!
|
318 |
|
|
|
319 |
|
|
//--U IN FP0, WE ARE NOW READY TO COMPUTE ATAN(U) AS
|
320 |
|
|
//--U + A1*U*V*(A2 + V*(A3 + V)), V = U*U
|
321 |
|
|
//--THE POLYNOMIAL MAY LOOK STRANGE, BUT IS NEVERTHELESS CORRECT.
|
322 |
|
|
//--THE NATURAL FORM IS U + U*V*(A1 + V*(A2 + V*A3))
|
323 |
|
|
//--WHAT WE HAVE HERE IS MERELY A1 = A3, A2 = A1/A3, A3 = A2/A3.
|
324 |
|
|
//--THE REASON FOR THIS REARRANGEMENT IS TO MAKE THE INDEPENDENT
|
325 |
|
|
//--PARTS A1*U*V AND (A2 + ... STUFF) MORE LOAD-BALANCED
|
326 |
|
|
|
327 |
|
|
|
328 |
|
|
fmovex %fp0,%fp1
|
329 |
|
|
fmulx %fp1,%fp1
|
330 |
|
|
fmoved ATANA3,%fp2
|
331 |
|
|
faddx %fp1,%fp2 // ...A3+V
|
332 |
|
|
fmulx %fp1,%fp2 // ...V*(A3+V)
|
333 |
|
|
fmulx %fp0,%fp1 // ...U*V
|
334 |
|
|
faddd ATANA2,%fp2 // ...A2+V*(A3+V)
|
335 |
|
|
fmuld ATANA1,%fp1 // ...A1*U*V
|
336 |
|
|
fmulx %fp2,%fp1 // ...A1*U*V*(A2+V*(A3+V))
|
337 |
|
|
|
338 |
|
|
faddx %fp1,%fp0 // ...ATAN(U), FP1 RELEASED
|
339 |
|
|
fmovel %d1,%FPCR //restore users exceptions
|
340 |
|
|
faddx ATANF(%a6),%fp0 // ...ATAN(X)
|
341 |
|
|
bra t_frcinx
|
342 |
|
|
|
343 |
|
|
ATANBORS:
|
344 |
|
|
//--|X| IS IN d0 IN COMPACT FORM. FP1, d0 SAVED.
|
345 |
|
|
//--FP0 IS X AND |X| <= 1/16 OR |X| >= 16.
|
346 |
|
|
cmpil #0x3FFF8000,%d0
|
347 |
|
|
bgt ATANBIG // ...I.E. |X| >= 16
|
348 |
|
|
|
349 |
|
|
ATANSM:
|
350 |
|
|
//--|X| <= 1/16
|
351 |
|
|
//--IF |X| < 2^(-40), RETURN X AS ANSWER. OTHERWISE, APPROXIMATE
|
352 |
|
|
//--ATAN(X) BY X + X*Y*(B1+Y*(B2+Y*(B3+Y*(B4+Y*(B5+Y*B6)))))
|
353 |
|
|
//--WHICH IS X + X*Y*( [B1+Z*(B3+Z*B5)] + [Y*(B2+Z*(B4+Z*B6)] )
|
354 |
|
|
//--WHERE Y = X*X, AND Z = Y*Y.
|
355 |
|
|
|
356 |
|
|
cmpil #0x3FD78000,%d0
|
357 |
|
|
blt ATANTINY
|
358 |
|
|
//--COMPUTE POLYNOMIAL
|
359 |
|
|
fmulx %fp0,%fp0 // ...FP0 IS Y = X*X
|
360 |
|
|
|
361 |
|
|
|
362 |
|
|
movew #0x0000,XDCARE(%a6)
|
363 |
|
|
|
364 |
|
|
fmovex %fp0,%fp1
|
365 |
|
|
fmulx %fp1,%fp1 // ...FP1 IS Z = Y*Y
|
366 |
|
|
|
367 |
|
|
fmoved ATANB6,%fp2
|
368 |
|
|
fmoved ATANB5,%fp3
|
369 |
|
|
|
370 |
|
|
fmulx %fp1,%fp2 // ...Z*B6
|
371 |
|
|
fmulx %fp1,%fp3 // ...Z*B5
|
372 |
|
|
|
373 |
|
|
faddd ATANB4,%fp2 // ...B4+Z*B6
|
374 |
|
|
faddd ATANB3,%fp3 // ...B3+Z*B5
|
375 |
|
|
|
376 |
|
|
fmulx %fp1,%fp2 // ...Z*(B4+Z*B6)
|
377 |
|
|
fmulx %fp3,%fp1 // ...Z*(B3+Z*B5)
|
378 |
|
|
|
379 |
|
|
faddd ATANB2,%fp2 // ...B2+Z*(B4+Z*B6)
|
380 |
|
|
faddd ATANB1,%fp1 // ...B1+Z*(B3+Z*B5)
|
381 |
|
|
|
382 |
|
|
fmulx %fp0,%fp2 // ...Y*(B2+Z*(B4+Z*B6))
|
383 |
|
|
fmulx X(%a6),%fp0 // ...X*Y
|
384 |
|
|
|
385 |
|
|
faddx %fp2,%fp1 // ...[B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]
|
386 |
|
|
|
387 |
|
|
|
388 |
|
|
fmulx %fp1,%fp0 // ...X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))])
|
389 |
|
|
|
390 |
|
|
fmovel %d1,%FPCR //restore users exceptions
|
391 |
|
|
faddx X(%a6),%fp0
|
392 |
|
|
|
393 |
|
|
bra t_frcinx
|
394 |
|
|
|
395 |
|
|
ATANTINY:
|
396 |
|
|
//--|X| < 2^(-40), ATAN(X) = X
|
397 |
|
|
movew #0x0000,XDCARE(%a6)
|
398 |
|
|
|
399 |
|
|
fmovel %d1,%FPCR //restore users exceptions
|
400 |
|
|
fmovex X(%a6),%fp0 //last inst - possible exception set
|
401 |
|
|
|
402 |
|
|
bra t_frcinx
|
403 |
|
|
|
404 |
|
|
ATANBIG:
|
405 |
|
|
//--IF |X| > 2^(100), RETURN SIGN(X)*(PI/2 - TINY). OTHERWISE,
|
406 |
|
|
//--RETURN SIGN(X)*PI/2 + ATAN(-1/X).
|
407 |
|
|
cmpil #0x40638000,%d0
|
408 |
|
|
bgt ATANHUGE
|
409 |
|
|
|
410 |
|
|
//--APPROXIMATE ATAN(-1/X) BY
|
411 |
|
|
//--X'+X'*Y*(C1+Y*(C2+Y*(C3+Y*(C4+Y*C5)))), X' = -1/X, Y = X'*X'
|
412 |
|
|
//--THIS CAN BE RE-WRITTEN AS
|
413 |
|
|
//--X'+X'*Y*( [C1+Z*(C3+Z*C5)] + [Y*(C2+Z*C4)] ), Z = Y*Y.
|
414 |
|
|
|
415 |
|
|
fmoves #0xBF800000,%fp1 // ...LOAD -1
|
416 |
|
|
fdivx %fp0,%fp1 // ...FP1 IS -1/X
|
417 |
|
|
|
418 |
|
|
|
419 |
|
|
//--DIVIDE IS STILL CRANKING
|
420 |
|
|
|
421 |
|
|
fmovex %fp1,%fp0 // ...FP0 IS X'
|
422 |
|
|
fmulx %fp0,%fp0 // ...FP0 IS Y = X'*X'
|
423 |
|
|
fmovex %fp1,X(%a6) // ...X IS REALLY X'
|
424 |
|
|
|
425 |
|
|
fmovex %fp0,%fp1
|
426 |
|
|
fmulx %fp1,%fp1 // ...FP1 IS Z = Y*Y
|
427 |
|
|
|
428 |
|
|
fmoved ATANC5,%fp3
|
429 |
|
|
fmoved ATANC4,%fp2
|
430 |
|
|
|
431 |
|
|
fmulx %fp1,%fp3 // ...Z*C5
|
432 |
|
|
fmulx %fp1,%fp2 // ...Z*B4
|
433 |
|
|
|
434 |
|
|
faddd ATANC3,%fp3 // ...C3+Z*C5
|
435 |
|
|
faddd ATANC2,%fp2 // ...C2+Z*C4
|
436 |
|
|
|
437 |
|
|
fmulx %fp3,%fp1 // ...Z*(C3+Z*C5), FP3 RELEASED
|
438 |
|
|
fmulx %fp0,%fp2 // ...Y*(C2+Z*C4)
|
439 |
|
|
|
440 |
|
|
faddd ATANC1,%fp1 // ...C1+Z*(C3+Z*C5)
|
441 |
|
|
fmulx X(%a6),%fp0 // ...X'*Y
|
442 |
|
|
|
443 |
|
|
faddx %fp2,%fp1 // ...[Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)]
|
444 |
|
|
|
445 |
|
|
|
446 |
|
|
fmulx %fp1,%fp0 // ...X'*Y*([B1+Z*(B3+Z*B5)]
|
447 |
|
|
// ... +[Y*(B2+Z*(B4+Z*B6))])
|
448 |
|
|
faddx X(%a6),%fp0
|
449 |
|
|
|
450 |
|
|
fmovel %d1,%FPCR //restore users exceptions
|
451 |
|
|
|
452 |
|
|
btstb #7,(%a0)
|
453 |
|
|
beqs pos_big
|
454 |
|
|
|
455 |
|
|
neg_big:
|
456 |
|
|
faddx NPIBY2,%fp0
|
457 |
|
|
bra t_frcinx
|
458 |
|
|
|
459 |
|
|
pos_big:
|
460 |
|
|
faddx PPIBY2,%fp0
|
461 |
|
|
bra t_frcinx
|
462 |
|
|
|
463 |
|
|
ATANHUGE:
|
464 |
|
|
//--RETURN SIGN(X)*(PIBY2 - TINY) = SIGN(X)*PIBY2 - SIGN(X)*TINY
|
465 |
|
|
btstb #7,(%a0)
|
466 |
|
|
beqs pos_huge
|
467 |
|
|
|
468 |
|
|
neg_huge:
|
469 |
|
|
fmovex NPIBY2,%fp0
|
470 |
|
|
fmovel %d1,%fpcr
|
471 |
|
|
fsubx NTINY,%fp0
|
472 |
|
|
bra t_frcinx
|
473 |
|
|
|
474 |
|
|
pos_huge:
|
475 |
|
|
fmovex PPIBY2,%fp0
|
476 |
|
|
fmovel %d1,%fpcr
|
477 |
|
|
fsubx PTINY,%fp0
|
478 |
|
|
bra t_frcinx
|
479 |
|
|
|
480 |
|
|
|end
|