1 |
30 |
unneback |
//
|
2 |
|
|
// $Id: slogn.S,v 1.2 2001-09-27 12:01:22 chris Exp $
|
3 |
|
|
//
|
4 |
|
|
// slogn.sa 3.1 12/10/90
|
5 |
|
|
//
|
6 |
|
|
// slogn computes the natural logarithm of an
|
7 |
|
|
// input value. slognd does the same except the input value is a
|
8 |
|
|
// denormalized number. slognp1 computes log(1+X), and slognp1d
|
9 |
|
|
// computes log(1+X) for denormalized X.
|
10 |
|
|
//
|
11 |
|
|
// Input: Double-extended value in memory location pointed to by address
|
12 |
|
|
// register a0.
|
13 |
|
|
//
|
14 |
|
|
// Output: log(X) or log(1+X) returned in floating-point register Fp0.
|
15 |
|
|
//
|
16 |
|
|
// Accuracy and Monotonicity: The returned result is within 2 ulps in
|
17 |
|
|
// 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
|
18 |
|
|
// result is subsequently rounded to double precision. The
|
19 |
|
|
// result is provably monotonic in double precision.
|
20 |
|
|
//
|
21 |
|
|
// Speed: The program slogn takes approximately 190 cycles for input
|
22 |
|
|
// argument X such that |X-1| >= 1/16, which is the the usual
|
23 |
|
|
// situation. For those arguments, slognp1 takes approximately
|
24 |
|
|
// 210 cycles. For the less common arguments, the program will
|
25 |
|
|
// run no worse than 10% slower.
|
26 |
|
|
//
|
27 |
|
|
// Algorithm:
|
28 |
|
|
// LOGN:
|
29 |
|
|
// Step 1. If |X-1| < 1/16, approximate log(X) by an odd polynomial in
|
30 |
|
|
// u, where u = 2(X-1)/(X+1). Otherwise, move on to Step 2.
|
31 |
|
|
//
|
32 |
|
|
// Step 2. X = 2**k * Y where 1 <= Y < 2. Define F to be the first seven
|
33 |
|
|
// significant bits of Y plus 2**(-7), i.e. F = 1.xxxxxx1 in base
|
34 |
|
|
// 2 where the six "x" match those of Y. Note that |Y-F| <= 2**(-7).
|
35 |
|
|
//
|
36 |
|
|
// Step 3. Define u = (Y-F)/F. Approximate log(1+u) by a polynomial in u,
|
37 |
|
|
// log(1+u) = poly.
|
38 |
|
|
//
|
39 |
|
|
// Step 4. Reconstruct log(X) = log( 2**k * Y ) = k*log(2) + log(F) + log(1+u)
|
40 |
|
|
// by k*log(2) + (log(F) + poly). The values of log(F) are calculated
|
41 |
|
|
// beforehand and stored in the program.
|
42 |
|
|
//
|
43 |
|
|
// lognp1:
|
44 |
|
|
// Step 1: If |X| < 1/16, approximate log(1+X) by an odd polynomial in
|
45 |
|
|
// u where u = 2X/(2+X). Otherwise, move on to Step 2.
|
46 |
|
|
//
|
47 |
|
|
// Step 2: Let 1+X = 2**k * Y, where 1 <= Y < 2. Define F as done in Step 2
|
48 |
|
|
// of the algorithm for LOGN and compute log(1+X) as
|
49 |
|
|
// k*log(2) + log(F) + poly where poly approximates log(1+u),
|
50 |
|
|
// u = (Y-F)/F.
|
51 |
|
|
//
|
52 |
|
|
// Implementation Notes:
|
53 |
|
|
// Note 1. There are 64 different possible values for F, thus 64 log(F)'s
|
54 |
|
|
// need to be tabulated. Moreover, the values of 1/F are also
|
55 |
|
|
// tabulated so that the division in (Y-F)/F can be performed by a
|
56 |
|
|
// multiplication.
|
57 |
|
|
//
|
58 |
|
|
// Note 2. In Step 2 of lognp1, in order to preserved accuracy, the value
|
59 |
|
|
// Y-F has to be calculated carefully when 1/2 <= X < 3/2.
|
60 |
|
|
//
|
61 |
|
|
// Note 3. To fully exploit the pipeline, polynomials are usually separated
|
62 |
|
|
// into two parts evaluated independently before being added up.
|
63 |
|
|
//
|
64 |
|
|
|
65 |
|
|
// Copyright (C) Motorola, Inc. 1990
|
66 |
|
|
// All Rights Reserved
|
67 |
|
|
//
|
68 |
|
|
// THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
|
69 |
|
|
// The copyright notice above does not evidence any
|
70 |
|
|
// actual or intended publication of such source code.
|
71 |
|
|
|
72 |
|
|
//slogn idnt 2,1 | Motorola 040 Floating Point Software Package
|
73 |
|
|
|
74 |
|
|
|section 8
|
75 |
|
|
|
76 |
|
|
#include "fpsp.defs"
|
77 |
|
|
|
78 |
|
|
BOUNDS1: .long 0x3FFEF07D,0x3FFF8841
|
79 |
|
|
BOUNDS2: .long 0x3FFE8000,0x3FFFC000
|
80 |
|
|
|
81 |
|
|
LOGOF2: .long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
|
82 |
|
|
|
83 |
|
|
one: .long 0x3F800000
|
84 |
|
|
zero: .long 0x00000000
|
85 |
|
|
infty: .long 0x7F800000
|
86 |
|
|
negone: .long 0xBF800000
|
87 |
|
|
|
88 |
|
|
LOGA6: .long 0x3FC2499A,0xB5E4040B
|
89 |
|
|
LOGA5: .long 0xBFC555B5,0x848CB7DB
|
90 |
|
|
|
91 |
|
|
LOGA4: .long 0x3FC99999,0x987D8730
|
92 |
|
|
LOGA3: .long 0xBFCFFFFF,0xFF6F7E97
|
93 |
|
|
|
94 |
|
|
LOGA2: .long 0x3FD55555,0x555555a4
|
95 |
|
|
LOGA1: .long 0xBFE00000,0x00000008
|
96 |
|
|
|
97 |
|
|
LOGB5: .long 0x3F175496,0xADD7DAD6
|
98 |
|
|
LOGB4: .long 0x3F3C71C2,0xFE80C7E0
|
99 |
|
|
|
100 |
|
|
LOGB3: .long 0x3F624924,0x928BCCFF
|
101 |
|
|
LOGB2: .long 0x3F899999,0x999995EC
|
102 |
|
|
|
103 |
|
|
LOGB1: .long 0x3FB55555,0x55555555
|
104 |
|
|
TWO: .long 0x40000000,0x00000000
|
105 |
|
|
|
106 |
|
|
LTHOLD: .long 0x3f990000,0x80000000,0x00000000,0x00000000
|
107 |
|
|
|
108 |
|
|
LOGTBL:
|
109 |
|
|
.long 0x3FFE0000,0xFE03F80F,0xE03F80FE,0x00000000
|
110 |
|
|
.long 0x3FF70000,0xFF015358,0x833C47E2,0x00000000
|
111 |
|
|
.long 0x3FFE0000,0xFA232CF2,0x52138AC0,0x00000000
|
112 |
|
|
.long 0x3FF90000,0xBDC8D83E,0xAD88D549,0x00000000
|
113 |
|
|
.long 0x3FFE0000,0xF6603D98,0x0F6603DA,0x00000000
|
114 |
|
|
.long 0x3FFA0000,0x9CF43DCF,0xF5EAFD48,0x00000000
|
115 |
|
|
.long 0x3FFE0000,0xF2B9D648,0x0F2B9D65,0x00000000
|
116 |
|
|
.long 0x3FFA0000,0xDA16EB88,0xCB8DF614,0x00000000
|
117 |
|
|
.long 0x3FFE0000,0xEF2EB71F,0xC4345238,0x00000000
|
118 |
|
|
.long 0x3FFB0000,0x8B29B775,0x1BD70743,0x00000000
|
119 |
|
|
.long 0x3FFE0000,0xEBBDB2A5,0xC1619C8C,0x00000000
|
120 |
|
|
.long 0x3FFB0000,0xA8D839F8,0x30C1FB49,0x00000000
|
121 |
|
|
.long 0x3FFE0000,0xE865AC7B,0x7603A197,0x00000000
|
122 |
|
|
.long 0x3FFB0000,0xC61A2EB1,0x8CD907AD,0x00000000
|
123 |
|
|
.long 0x3FFE0000,0xE525982A,0xF70C880E,0x00000000
|
124 |
|
|
.long 0x3FFB0000,0xE2F2A47A,0xDE3A18AF,0x00000000
|
125 |
|
|
.long 0x3FFE0000,0xE1FC780E,0x1FC780E2,0x00000000
|
126 |
|
|
.long 0x3FFB0000,0xFF64898E,0xDF55D551,0x00000000
|
127 |
|
|
.long 0x3FFE0000,0xDEE95C4C,0xA037BA57,0x00000000
|
128 |
|
|
.long 0x3FFC0000,0x8DB956A9,0x7B3D0148,0x00000000
|
129 |
|
|
.long 0x3FFE0000,0xDBEB61EE,0xD19C5958,0x00000000
|
130 |
|
|
.long 0x3FFC0000,0x9B8FE100,0xF47BA1DE,0x00000000
|
131 |
|
|
.long 0x3FFE0000,0xD901B203,0x6406C80E,0x00000000
|
132 |
|
|
.long 0x3FFC0000,0xA9372F1D,0x0DA1BD17,0x00000000
|
133 |
|
|
.long 0x3FFE0000,0xD62B80D6,0x2B80D62C,0x00000000
|
134 |
|
|
.long 0x3FFC0000,0xB6B07F38,0xCE90E46B,0x00000000
|
135 |
|
|
.long 0x3FFE0000,0xD3680D36,0x80D3680D,0x00000000
|
136 |
|
|
.long 0x3FFC0000,0xC3FD0329,0x06488481,0x00000000
|
137 |
|
|
.long 0x3FFE0000,0xD0B69FCB,0xD2580D0B,0x00000000
|
138 |
|
|
.long 0x3FFC0000,0xD11DE0FF,0x15AB18CA,0x00000000
|
139 |
|
|
.long 0x3FFE0000,0xCE168A77,0x25080CE1,0x00000000
|
140 |
|
|
.long 0x3FFC0000,0xDE1433A1,0x6C66B150,0x00000000
|
141 |
|
|
.long 0x3FFE0000,0xCB8727C0,0x65C393E0,0x00000000
|
142 |
|
|
.long 0x3FFC0000,0xEAE10B5A,0x7DDC8ADD,0x00000000
|
143 |
|
|
.long 0x3FFE0000,0xC907DA4E,0x871146AD,0x00000000
|
144 |
|
|
.long 0x3FFC0000,0xF7856E5E,0xE2C9B291,0x00000000
|
145 |
|
|
.long 0x3FFE0000,0xC6980C69,0x80C6980C,0x00000000
|
146 |
|
|
.long 0x3FFD0000,0x82012CA5,0xA68206D7,0x00000000
|
147 |
|
|
.long 0x3FFE0000,0xC4372F85,0x5D824CA6,0x00000000
|
148 |
|
|
.long 0x3FFD0000,0x882C5FCD,0x7256A8C5,0x00000000
|
149 |
|
|
.long 0x3FFE0000,0xC1E4BBD5,0x95F6E947,0x00000000
|
150 |
|
|
.long 0x3FFD0000,0x8E44C60B,0x4CCFD7DE,0x00000000
|
151 |
|
|
.long 0x3FFE0000,0xBFA02FE8,0x0BFA02FF,0x00000000
|
152 |
|
|
.long 0x3FFD0000,0x944AD09E,0xF4351AF6,0x00000000
|
153 |
|
|
.long 0x3FFE0000,0xBD691047,0x07661AA3,0x00000000
|
154 |
|
|
.long 0x3FFD0000,0x9A3EECD4,0xC3EAA6B2,0x00000000
|
155 |
|
|
.long 0x3FFE0000,0xBB3EE721,0xA54D880C,0x00000000
|
156 |
|
|
.long 0x3FFD0000,0xA0218434,0x353F1DE8,0x00000000
|
157 |
|
|
.long 0x3FFE0000,0xB92143FA,0x36F5E02E,0x00000000
|
158 |
|
|
.long 0x3FFD0000,0xA5F2FCAB,0xBBC506DA,0x00000000
|
159 |
|
|
.long 0x3FFE0000,0xB70FBB5A,0x19BE3659,0x00000000
|
160 |
|
|
.long 0x3FFD0000,0xABB3B8BA,0x2AD362A5,0x00000000
|
161 |
|
|
.long 0x3FFE0000,0xB509E68A,0x9B94821F,0x00000000
|
162 |
|
|
.long 0x3FFD0000,0xB1641795,0xCE3CA97B,0x00000000
|
163 |
|
|
.long 0x3FFE0000,0xB30F6352,0x8917C80B,0x00000000
|
164 |
|
|
.long 0x3FFD0000,0xB7047551,0x5D0F1C61,0x00000000
|
165 |
|
|
.long 0x3FFE0000,0xB11FD3B8,0x0B11FD3C,0x00000000
|
166 |
|
|
.long 0x3FFD0000,0xBC952AFE,0xEA3D13E1,0x00000000
|
167 |
|
|
.long 0x3FFE0000,0xAF3ADDC6,0x80AF3ADE,0x00000000
|
168 |
|
|
.long 0x3FFD0000,0xC2168ED0,0xF458BA4A,0x00000000
|
169 |
|
|
.long 0x3FFE0000,0xAD602B58,0x0AD602B6,0x00000000
|
170 |
|
|
.long 0x3FFD0000,0xC788F439,0xB3163BF1,0x00000000
|
171 |
|
|
.long 0x3FFE0000,0xAB8F69E2,0x8359CD11,0x00000000
|
172 |
|
|
.long 0x3FFD0000,0xCCECAC08,0xBF04565D,0x00000000
|
173 |
|
|
.long 0x3FFE0000,0xA9C84A47,0xA07F5638,0x00000000
|
174 |
|
|
.long 0x3FFD0000,0xD2420487,0x2DD85160,0x00000000
|
175 |
|
|
.long 0x3FFE0000,0xA80A80A8,0x0A80A80B,0x00000000
|
176 |
|
|
.long 0x3FFD0000,0xD7894992,0x3BC3588A,0x00000000
|
177 |
|
|
.long 0x3FFE0000,0xA655C439,0x2D7B73A8,0x00000000
|
178 |
|
|
.long 0x3FFD0000,0xDCC2C4B4,0x9887DACC,0x00000000
|
179 |
|
|
.long 0x3FFE0000,0xA4A9CF1D,0x96833751,0x00000000
|
180 |
|
|
.long 0x3FFD0000,0xE1EEBD3E,0x6D6A6B9E,0x00000000
|
181 |
|
|
.long 0x3FFE0000,0xA3065E3F,0xAE7CD0E0,0x00000000
|
182 |
|
|
.long 0x3FFD0000,0xE70D785C,0x2F9F5BDC,0x00000000
|
183 |
|
|
.long 0x3FFE0000,0xA16B312E,0xA8FC377D,0x00000000
|
184 |
|
|
.long 0x3FFD0000,0xEC1F392C,0x5179F283,0x00000000
|
185 |
|
|
.long 0x3FFE0000,0x9FD809FD,0x809FD80A,0x00000000
|
186 |
|
|
.long 0x3FFD0000,0xF12440D3,0xE36130E6,0x00000000
|
187 |
|
|
.long 0x3FFE0000,0x9E4CAD23,0xDD5F3A20,0x00000000
|
188 |
|
|
.long 0x3FFD0000,0xF61CCE92,0x346600BB,0x00000000
|
189 |
|
|
.long 0x3FFE0000,0x9CC8E160,0xC3FB19B9,0x00000000
|
190 |
|
|
.long 0x3FFD0000,0xFB091FD3,0x8145630A,0x00000000
|
191 |
|
|
.long 0x3FFE0000,0x9B4C6F9E,0xF03A3CAA,0x00000000
|
192 |
|
|
.long 0x3FFD0000,0xFFE97042,0xBFA4C2AD,0x00000000
|
193 |
|
|
.long 0x3FFE0000,0x99D722DA,0xBDE58F06,0x00000000
|
194 |
|
|
.long 0x3FFE0000,0x825EFCED,0x49369330,0x00000000
|
195 |
|
|
.long 0x3FFE0000,0x9868C809,0x868C8098,0x00000000
|
196 |
|
|
.long 0x3FFE0000,0x84C37A7A,0xB9A905C9,0x00000000
|
197 |
|
|
.long 0x3FFE0000,0x97012E02,0x5C04B809,0x00000000
|
198 |
|
|
.long 0x3FFE0000,0x87224C2E,0x8E645FB7,0x00000000
|
199 |
|
|
.long 0x3FFE0000,0x95A02568,0x095A0257,0x00000000
|
200 |
|
|
.long 0x3FFE0000,0x897B8CAC,0x9F7DE298,0x00000000
|
201 |
|
|
.long 0x3FFE0000,0x94458094,0x45809446,0x00000000
|
202 |
|
|
.long 0x3FFE0000,0x8BCF55DE,0xC4CD05FE,0x00000000
|
203 |
|
|
.long 0x3FFE0000,0x92F11384,0x0497889C,0x00000000
|
204 |
|
|
.long 0x3FFE0000,0x8E1DC0FB,0x89E125E5,0x00000000
|
205 |
|
|
.long 0x3FFE0000,0x91A2B3C4,0xD5E6F809,0x00000000
|
206 |
|
|
.long 0x3FFE0000,0x9066E68C,0x955B6C9B,0x00000000
|
207 |
|
|
.long 0x3FFE0000,0x905A3863,0x3E06C43B,0x00000000
|
208 |
|
|
.long 0x3FFE0000,0x92AADE74,0xC7BE59E0,0x00000000
|
209 |
|
|
.long 0x3FFE0000,0x8F1779D9,0xFDC3A219,0x00000000
|
210 |
|
|
.long 0x3FFE0000,0x94E9BFF6,0x15845643,0x00000000
|
211 |
|
|
.long 0x3FFE0000,0x8DDA5202,0x37694809,0x00000000
|
212 |
|
|
.long 0x3FFE0000,0x9723A1B7,0x20134203,0x00000000
|
213 |
|
|
.long 0x3FFE0000,0x8CA29C04,0x6514E023,0x00000000
|
214 |
|
|
.long 0x3FFE0000,0x995899C8,0x90EB8990,0x00000000
|
215 |
|
|
.long 0x3FFE0000,0x8B70344A,0x139BC75A,0x00000000
|
216 |
|
|
.long 0x3FFE0000,0x9B88BDAA,0x3A3DAE2F,0x00000000
|
217 |
|
|
.long 0x3FFE0000,0x8A42F870,0x5669DB46,0x00000000
|
218 |
|
|
.long 0x3FFE0000,0x9DB4224F,0xFFE1157C,0x00000000
|
219 |
|
|
.long 0x3FFE0000,0x891AC73A,0xE9819B50,0x00000000
|
220 |
|
|
.long 0x3FFE0000,0x9FDADC26,0x8B7A12DA,0x00000000
|
221 |
|
|
.long 0x3FFE0000,0x87F78087,0xF78087F8,0x00000000
|
222 |
|
|
.long 0x3FFE0000,0xA1FCFF17,0xCE733BD4,0x00000000
|
223 |
|
|
.long 0x3FFE0000,0x86D90544,0x7A34ACC6,0x00000000
|
224 |
|
|
.long 0x3FFE0000,0xA41A9E8F,0x5446FB9F,0x00000000
|
225 |
|
|
.long 0x3FFE0000,0x85BF3761,0x2CEE3C9B,0x00000000
|
226 |
|
|
.long 0x3FFE0000,0xA633CD7E,0x6771CD8B,0x00000000
|
227 |
|
|
.long 0x3FFE0000,0x84A9F9C8,0x084A9F9D,0x00000000
|
228 |
|
|
.long 0x3FFE0000,0xA8489E60,0x0B435A5E,0x00000000
|
229 |
|
|
.long 0x3FFE0000,0x83993052,0x3FBE3368,0x00000000
|
230 |
|
|
.long 0x3FFE0000,0xAA59233C,0xCCA4BD49,0x00000000
|
231 |
|
|
.long 0x3FFE0000,0x828CBFBE,0xB9A020A3,0x00000000
|
232 |
|
|
.long 0x3FFE0000,0xAC656DAE,0x6BCC4985,0x00000000
|
233 |
|
|
.long 0x3FFE0000,0x81848DA8,0xFAF0D277,0x00000000
|
234 |
|
|
.long 0x3FFE0000,0xAE6D8EE3,0x60BB2468,0x00000000
|
235 |
|
|
.long 0x3FFE0000,0x80808080,0x80808081,0x00000000
|
236 |
|
|
.long 0x3FFE0000,0xB07197A2,0x3C46C654,0x00000000
|
237 |
|
|
|
238 |
|
|
.set ADJK,L_SCR1
|
239 |
|
|
|
240 |
|
|
.set X,FP_SCR1
|
241 |
|
|
.set XDCARE,X+2
|
242 |
|
|
.set XFRAC,X+4
|
243 |
|
|
|
244 |
|
|
.set F,FP_SCR2
|
245 |
|
|
.set FFRAC,F+4
|
246 |
|
|
|
247 |
|
|
.set KLOG2,FP_SCR3
|
248 |
|
|
|
249 |
|
|
.set SAVEU,FP_SCR4
|
250 |
|
|
|
251 |
|
|
| xref t_frcinx
|
252 |
|
|
|xref t_extdnrm
|
253 |
|
|
|xref t_operr
|
254 |
|
|
|xref t_dz
|
255 |
|
|
|
256 |
|
|
.global slognd
|
257 |
|
|
slognd:
|
258 |
|
|
//--ENTRY POINT FOR LOG(X) FOR DENORMALIZED INPUT
|
259 |
|
|
|
260 |
|
|
movel #-100,ADJK(%a6) // ...INPUT = 2^(ADJK) * FP0
|
261 |
|
|
|
262 |
|
|
//----normalize the input value by left shifting k bits (k to be determined
|
263 |
|
|
//----below), adjusting exponent and storing -k to ADJK
|
264 |
|
|
//----the value TWOTO100 is no longer needed.
|
265 |
|
|
//----Note that this code assumes the denormalized input is NON-ZERO.
|
266 |
|
|
|
267 |
|
|
moveml %d2-%d7,-(%a7) // ...save some registers
|
268 |
|
|
movel #0x00000000,%d3 // ...D3 is exponent of smallest norm. #
|
269 |
|
|
movel 4(%a0),%d4
|
270 |
|
|
movel 8(%a0),%d5 // ...(D4,D5) is (Hi_X,Lo_X)
|
271 |
|
|
clrl %d2 // ...D2 used for holding K
|
272 |
|
|
|
273 |
|
|
tstl %d4
|
274 |
|
|
bnes HiX_not0
|
275 |
|
|
|
276 |
|
|
HiX_0:
|
277 |
|
|
movel %d5,%d4
|
278 |
|
|
clrl %d5
|
279 |
|
|
movel #32,%d2
|
280 |
|
|
clrl %d6
|
281 |
|
|
bfffo %d4{#0:#32},%d6
|
282 |
|
|
lsll %d6,%d4
|
283 |
|
|
addl %d6,%d2 // ...(D3,D4,D5) is normalized
|
284 |
|
|
|
285 |
|
|
movel %d3,X(%a6)
|
286 |
|
|
movel %d4,XFRAC(%a6)
|
287 |
|
|
movel %d5,XFRAC+4(%a6)
|
288 |
|
|
negl %d2
|
289 |
|
|
movel %d2,ADJK(%a6)
|
290 |
|
|
fmovex X(%a6),%fp0
|
291 |
|
|
moveml (%a7)+,%d2-%d7 // ...restore registers
|
292 |
|
|
lea X(%a6),%a0
|
293 |
|
|
bras LOGBGN // ...begin regular log(X)
|
294 |
|
|
|
295 |
|
|
|
296 |
|
|
HiX_not0:
|
297 |
|
|
clrl %d6
|
298 |
|
|
bfffo %d4{#0:#32},%d6 // ...find first 1
|
299 |
|
|
movel %d6,%d2 // ...get k
|
300 |
|
|
lsll %d6,%d4
|
301 |
|
|
movel %d5,%d7 // ...a copy of D5
|
302 |
|
|
lsll %d6,%d5
|
303 |
|
|
negl %d6
|
304 |
|
|
addil #32,%d6
|
305 |
|
|
lsrl %d6,%d7
|
306 |
|
|
orl %d7,%d4 // ...(D3,D4,D5) normalized
|
307 |
|
|
|
308 |
|
|
movel %d3,X(%a6)
|
309 |
|
|
movel %d4,XFRAC(%a6)
|
310 |
|
|
movel %d5,XFRAC+4(%a6)
|
311 |
|
|
negl %d2
|
312 |
|
|
movel %d2,ADJK(%a6)
|
313 |
|
|
fmovex X(%a6),%fp0
|
314 |
|
|
moveml (%a7)+,%d2-%d7 // ...restore registers
|
315 |
|
|
lea X(%a6),%a0
|
316 |
|
|
bras LOGBGN // ...begin regular log(X)
|
317 |
|
|
|
318 |
|
|
|
319 |
|
|
.global slogn
|
320 |
|
|
slogn:
|
321 |
|
|
//--ENTRY POINT FOR LOG(X) FOR X FINITE, NON-ZERO, NOT NAN'S
|
322 |
|
|
|
323 |
|
|
fmovex (%a0),%fp0 // ...LOAD INPUT
|
324 |
|
|
movel #0x00000000,ADJK(%a6)
|
325 |
|
|
|
326 |
|
|
LOGBGN:
|
327 |
|
|
//--FPCR SAVED AND CLEARED, INPUT IS 2^(ADJK)*FP0, FP0 CONTAINS
|
328 |
|
|
//--A FINITE, NON-ZERO, NORMALIZED NUMBER.
|
329 |
|
|
|
330 |
|
|
movel (%a0),%d0
|
331 |
|
|
movew 4(%a0),%d0
|
332 |
|
|
|
333 |
|
|
movel (%a0),X(%a6)
|
334 |
|
|
movel 4(%a0),X+4(%a6)
|
335 |
|
|
movel 8(%a0),X+8(%a6)
|
336 |
|
|
|
337 |
|
|
cmpil #0,%d0 // ...CHECK IF X IS NEGATIVE
|
338 |
|
|
blt LOGNEG // ...LOG OF NEGATIVE ARGUMENT IS INVALID
|
339 |
|
|
cmp2l BOUNDS1,%d0 // ...X IS POSITIVE, CHECK IF X IS NEAR 1
|
340 |
|
|
bcc LOGNEAR1 // ...BOUNDS IS ROUGHLY [15/16, 17/16]
|
341 |
|
|
|
342 |
|
|
LOGMAIN:
|
343 |
|
|
//--THIS SHOULD BE THE USUAL CASE, X NOT VERY CLOSE TO 1
|
344 |
|
|
|
345 |
|
|
//--X = 2^(K) * Y, 1 <= Y < 2. THUS, Y = 1.XXXXXXXX....XX IN BINARY.
|
346 |
|
|
//--WE DEFINE F = 1.XXXXXX1, I.E. FIRST 7 BITS OF Y AND ATTACH A 1.
|
347 |
|
|
//--THE IDEA IS THAT LOG(X) = K*LOG2 + LOG(Y)
|
348 |
|
|
//-- = K*LOG2 + LOG(F) + LOG(1 + (Y-F)/F).
|
349 |
|
|
//--NOTE THAT U = (Y-F)/F IS VERY SMALL AND THUS APPROXIMATING
|
350 |
|
|
//--LOG(1+U) CAN BE VERY EFFICIENT.
|
351 |
|
|
//--ALSO NOTE THAT THE VALUE 1/F IS STORED IN A TABLE SO THAT NO
|
352 |
|
|
//--DIVISION IS NEEDED TO CALCULATE (Y-F)/F.
|
353 |
|
|
|
354 |
|
|
//--GET K, Y, F, AND ADDRESS OF 1/F.
|
355 |
|
|
asrl #8,%d0
|
356 |
|
|
asrl #8,%d0 // ...SHIFTED 16 BITS, BIASED EXPO. OF X
|
357 |
|
|
subil #0x3FFF,%d0 // ...THIS IS K
|
358 |
|
|
addl ADJK(%a6),%d0 // ...ADJUST K, ORIGINAL INPUT MAY BE DENORM.
|
359 |
|
|
lea LOGTBL,%a0 // ...BASE ADDRESS OF 1/F AND LOG(F)
|
360 |
|
|
fmovel %d0,%fp1 // ...CONVERT K TO FLOATING-POINT FORMAT
|
361 |
|
|
|
362 |
|
|
//--WHILE THE CONVERSION IS GOING ON, WE GET F AND ADDRESS OF 1/F
|
363 |
|
|
movel #0x3FFF0000,X(%a6) // ...X IS NOW Y, I.E. 2^(-K)*X
|
364 |
|
|
movel XFRAC(%a6),FFRAC(%a6)
|
365 |
|
|
andil #0xFE000000,FFRAC(%a6) // ...FIRST 7 BITS OF Y
|
366 |
|
|
oril #0x01000000,FFRAC(%a6) // ...GET F: ATTACH A 1 AT THE EIGHTH BIT
|
367 |
|
|
movel FFRAC(%a6),%d0 // ...READY TO GET ADDRESS OF 1/F
|
368 |
|
|
andil #0x7E000000,%d0
|
369 |
|
|
asrl #8,%d0
|
370 |
|
|
asrl #8,%d0
|
371 |
|
|
asrl #4,%d0 // ...SHIFTED 20, D0 IS THE DISPLACEMENT
|
372 |
|
|
addal %d0,%a0 // ...A0 IS THE ADDRESS FOR 1/F
|
373 |
|
|
|
374 |
|
|
fmovex X(%a6),%fp0
|
375 |
|
|
movel #0x3fff0000,F(%a6)
|
376 |
|
|
clrl F+8(%a6)
|
377 |
|
|
fsubx F(%a6),%fp0 // ...Y-F
|
378 |
|
|
fmovemx %fp2-%fp2/%fp3,-(%sp) // ...SAVE FP2 WHILE FP0 IS NOT READY
|
379 |
|
|
//--SUMMARY: FP0 IS Y-F, A0 IS ADDRESS OF 1/F, FP1 IS K
|
380 |
|
|
//--REGISTERS SAVED: FPCR, FP1, FP2
|
381 |
|
|
|
382 |
|
|
LP1CONT1:
|
383 |
|
|
//--AN RE-ENTRY POINT FOR LOGNP1
|
384 |
|
|
fmulx (%a0),%fp0 // ...FP0 IS U = (Y-F)/F
|
385 |
|
|
fmulx LOGOF2,%fp1 // ...GET K*LOG2 WHILE FP0 IS NOT READY
|
386 |
|
|
fmovex %fp0,%fp2
|
387 |
|
|
fmulx %fp2,%fp2 // ...FP2 IS V=U*U
|
388 |
|
|
fmovex %fp1,KLOG2(%a6) // ...PUT K*LOG2 IN MEMORY, FREE FP1
|
389 |
|
|
|
390 |
|
|
//--LOG(1+U) IS APPROXIMATED BY
|
391 |
|
|
//--U + V*(A1+U*(A2+U*(A3+U*(A4+U*(A5+U*A6))))) WHICH IS
|
392 |
|
|
//--[U + V*(A1+V*(A3+V*A5))] + [U*V*(A2+V*(A4+V*A6))]
|
393 |
|
|
|
394 |
|
|
fmovex %fp2,%fp3
|
395 |
|
|
fmovex %fp2,%fp1
|
396 |
|
|
|
397 |
|
|
fmuld LOGA6,%fp1 // ...V*A6
|
398 |
|
|
fmuld LOGA5,%fp2 // ...V*A5
|
399 |
|
|
|
400 |
|
|
faddd LOGA4,%fp1 // ...A4+V*A6
|
401 |
|
|
faddd LOGA3,%fp2 // ...A3+V*A5
|
402 |
|
|
|
403 |
|
|
fmulx %fp3,%fp1 // ...V*(A4+V*A6)
|
404 |
|
|
fmulx %fp3,%fp2 // ...V*(A3+V*A5)
|
405 |
|
|
|
406 |
|
|
faddd LOGA2,%fp1 // ...A2+V*(A4+V*A6)
|
407 |
|
|
faddd LOGA1,%fp2 // ...A1+V*(A3+V*A5)
|
408 |
|
|
|
409 |
|
|
fmulx %fp3,%fp1 // ...V*(A2+V*(A4+V*A6))
|
410 |
|
|
addal #16,%a0 // ...ADDRESS OF LOG(F)
|
411 |
|
|
fmulx %fp3,%fp2 // ...V*(A1+V*(A3+V*A5)), FP3 RELEASED
|
412 |
|
|
|
413 |
|
|
fmulx %fp0,%fp1 // ...U*V*(A2+V*(A4+V*A6))
|
414 |
|
|
faddx %fp2,%fp0 // ...U+V*(A1+V*(A3+V*A5)), FP2 RELEASED
|
415 |
|
|
|
416 |
|
|
faddx (%a0),%fp1 // ...LOG(F)+U*V*(A2+V*(A4+V*A6))
|
417 |
|
|
fmovemx (%sp)+,%fp2-%fp2/%fp3 // ...RESTORE FP2
|
418 |
|
|
faddx %fp1,%fp0 // ...FP0 IS LOG(F) + LOG(1+U)
|
419 |
|
|
|
420 |
|
|
fmovel %d1,%fpcr
|
421 |
|
|
faddx KLOG2(%a6),%fp0 // ...FINAL ADD
|
422 |
|
|
bra t_frcinx
|
423 |
|
|
|
424 |
|
|
|
425 |
|
|
LOGNEAR1:
|
426 |
|
|
//--REGISTERS SAVED: FPCR, FP1. FP0 CONTAINS THE INPUT.
|
427 |
|
|
fmovex %fp0,%fp1
|
428 |
|
|
fsubs one,%fp1 // ...FP1 IS X-1
|
429 |
|
|
fadds one,%fp0 // ...FP0 IS X+1
|
430 |
|
|
faddx %fp1,%fp1 // ...FP1 IS 2(X-1)
|
431 |
|
|
//--LOG(X) = LOG(1+U/2)-LOG(1-U/2) WHICH IS AN ODD POLYNOMIAL
|
432 |
|
|
//--IN U, U = 2(X-1)/(X+1) = FP1/FP0
|
433 |
|
|
|
434 |
|
|
LP1CONT2:
|
435 |
|
|
//--THIS IS AN RE-ENTRY POINT FOR LOGNP1
|
436 |
|
|
fdivx %fp0,%fp1 // ...FP1 IS U
|
437 |
|
|
fmovemx %fp2-%fp2/%fp3,-(%sp) // ...SAVE FP2
|
438 |
|
|
//--REGISTERS SAVED ARE NOW FPCR,FP1,FP2,FP3
|
439 |
|
|
//--LET V=U*U, W=V*V, CALCULATE
|
440 |
|
|
//--U + U*V*(B1 + V*(B2 + V*(B3 + V*(B4 + V*B5)))) BY
|
441 |
|
|
//--U + U*V*( [B1 + W*(B3 + W*B5)] + [V*(B2 + W*B4)] )
|
442 |
|
|
fmovex %fp1,%fp0
|
443 |
|
|
fmulx %fp0,%fp0 // ...FP0 IS V
|
444 |
|
|
fmovex %fp1,SAVEU(%a6) // ...STORE U IN MEMORY, FREE FP1
|
445 |
|
|
fmovex %fp0,%fp1
|
446 |
|
|
fmulx %fp1,%fp1 // ...FP1 IS W
|
447 |
|
|
|
448 |
|
|
fmoved LOGB5,%fp3
|
449 |
|
|
fmoved LOGB4,%fp2
|
450 |
|
|
|
451 |
|
|
fmulx %fp1,%fp3 // ...W*B5
|
452 |
|
|
fmulx %fp1,%fp2 // ...W*B4
|
453 |
|
|
|
454 |
|
|
faddd LOGB3,%fp3 // ...B3+W*B5
|
455 |
|
|
faddd LOGB2,%fp2 // ...B2+W*B4
|
456 |
|
|
|
457 |
|
|
fmulx %fp3,%fp1 // ...W*(B3+W*B5), FP3 RELEASED
|
458 |
|
|
|
459 |
|
|
fmulx %fp0,%fp2 // ...V*(B2+W*B4)
|
460 |
|
|
|
461 |
|
|
faddd LOGB1,%fp1 // ...B1+W*(B3+W*B5)
|
462 |
|
|
fmulx SAVEU(%a6),%fp0 // ...FP0 IS U*V
|
463 |
|
|
|
464 |
|
|
faddx %fp2,%fp1 // ...B1+W*(B3+W*B5) + V*(B2+W*B4), FP2 RELEASED
|
465 |
|
|
fmovemx (%sp)+,%fp2-%fp2/%fp3 // ...FP2 RESTORED
|
466 |
|
|
|
467 |
|
|
fmulx %fp1,%fp0 // ...U*V*( [B1+W*(B3+W*B5)] + [V*(B2+W*B4)] )
|
468 |
|
|
|
469 |
|
|
fmovel %d1,%fpcr
|
470 |
|
|
faddx SAVEU(%a6),%fp0
|
471 |
|
|
bra t_frcinx
|
472 |
|
|
rts
|
473 |
|
|
|
474 |
|
|
LOGNEG:
|
475 |
|
|
//--REGISTERS SAVED FPCR. LOG(-VE) IS INVALID
|
476 |
|
|
bra t_operr
|
477 |
|
|
|
478 |
|
|
.global slognp1d
|
479 |
|
|
slognp1d:
|
480 |
|
|
//--ENTRY POINT FOR LOG(1+Z) FOR DENORMALIZED INPUT
|
481 |
|
|
// Simply return the denorm
|
482 |
|
|
|
483 |
|
|
bra t_extdnrm
|
484 |
|
|
|
485 |
|
|
.global slognp1
|
486 |
|
|
slognp1:
|
487 |
|
|
//--ENTRY POINT FOR LOG(1+X) FOR X FINITE, NON-ZERO, NOT NAN'S
|
488 |
|
|
|
489 |
|
|
fmovex (%a0),%fp0 // ...LOAD INPUT
|
490 |
|
|
fabsx %fp0 //test magnitude
|
491 |
|
|
fcmpx LTHOLD,%fp0 //compare with min threshold
|
492 |
|
|
fbgt LP1REAL //if greater, continue
|
493 |
|
|
fmovel #0,%fpsr //clr N flag from compare
|
494 |
|
|
fmovel %d1,%fpcr
|
495 |
|
|
fmovex (%a0),%fp0 //return signed argument
|
496 |
|
|
bra t_frcinx
|
497 |
|
|
|
498 |
|
|
LP1REAL:
|
499 |
|
|
fmovex (%a0),%fp0 // ...LOAD INPUT
|
500 |
|
|
movel #0x00000000,ADJK(%a6)
|
501 |
|
|
fmovex %fp0,%fp1 // ...FP1 IS INPUT Z
|
502 |
|
|
fadds one,%fp0 // ...X := ROUND(1+Z)
|
503 |
|
|
fmovex %fp0,X(%a6)
|
504 |
|
|
movew XFRAC(%a6),XDCARE(%a6)
|
505 |
|
|
movel X(%a6),%d0
|
506 |
|
|
cmpil #0,%d0
|
507 |
|
|
ble LP1NEG0 // ...LOG OF ZERO OR -VE
|
508 |
|
|
cmp2l BOUNDS2,%d0
|
509 |
|
|
bcs LOGMAIN // ...BOUNDS2 IS [1/2,3/2]
|
510 |
|
|
//--IF 1+Z > 3/2 OR 1+Z < 1/2, THEN X, WHICH IS ROUNDING 1+Z,
|
511 |
|
|
//--CONTAINS AT LEAST 63 BITS OF INFORMATION OF Z. IN THAT CASE,
|
512 |
|
|
//--SIMPLY INVOKE LOG(X) FOR LOG(1+Z).
|
513 |
|
|
|
514 |
|
|
LP1NEAR1:
|
515 |
|
|
//--NEXT SEE IF EXP(-1/16) < X < EXP(1/16)
|
516 |
|
|
cmp2l BOUNDS1,%d0
|
517 |
|
|
bcss LP1CARE
|
518 |
|
|
|
519 |
|
|
LP1ONE16:
|
520 |
|
|
//--EXP(-1/16) < X < EXP(1/16). LOG(1+Z) = LOG(1+U/2) - LOG(1-U/2)
|
521 |
|
|
//--WHERE U = 2Z/(2+Z) = 2Z/(1+X).
|
522 |
|
|
faddx %fp1,%fp1 // ...FP1 IS 2Z
|
523 |
|
|
fadds one,%fp0 // ...FP0 IS 1+X
|
524 |
|
|
//--U = FP1/FP0
|
525 |
|
|
bra LP1CONT2
|
526 |
|
|
|
527 |
|
|
LP1CARE:
|
528 |
|
|
//--HERE WE USE THE USUAL TABLE DRIVEN APPROACH. CARE HAS TO BE
|
529 |
|
|
//--TAKEN BECAUSE 1+Z CAN HAVE 67 BITS OF INFORMATION AND WE MUST
|
530 |
|
|
//--PRESERVE ALL THE INFORMATION. BECAUSE 1+Z IS IN [1/2,3/2],
|
531 |
|
|
//--THERE ARE ONLY TWO CASES.
|
532 |
|
|
//--CASE 1: 1+Z < 1, THEN K = -1 AND Y-F = (2-F) + 2Z
|
533 |
|
|
//--CASE 2: 1+Z > 1, THEN K = 0 AND Y-F = (1-F) + Z
|
534 |
|
|
//--ON RETURNING TO LP1CONT1, WE MUST HAVE K IN FP1, ADDRESS OF
|
535 |
|
|
//--(1/F) IN A0, Y-F IN FP0, AND FP2 SAVED.
|
536 |
|
|
|
537 |
|
|
movel XFRAC(%a6),FFRAC(%a6)
|
538 |
|
|
andil #0xFE000000,FFRAC(%a6)
|
539 |
|
|
oril #0x01000000,FFRAC(%a6) // ...F OBTAINED
|
540 |
|
|
cmpil #0x3FFF8000,%d0 // ...SEE IF 1+Z > 1
|
541 |
|
|
bges KISZERO
|
542 |
|
|
|
543 |
|
|
KISNEG1:
|
544 |
|
|
fmoves TWO,%fp0
|
545 |
|
|
movel #0x3fff0000,F(%a6)
|
546 |
|
|
clrl F+8(%a6)
|
547 |
|
|
fsubx F(%a6),%fp0 // ...2-F
|
548 |
|
|
movel FFRAC(%a6),%d0
|
549 |
|
|
andil #0x7E000000,%d0
|
550 |
|
|
asrl #8,%d0
|
551 |
|
|
asrl #8,%d0
|
552 |
|
|
asrl #4,%d0 // ...D0 CONTAINS DISPLACEMENT FOR 1/F
|
553 |
|
|
faddx %fp1,%fp1 // ...GET 2Z
|
554 |
|
|
fmovemx %fp2-%fp2/%fp3,-(%sp) // ...SAVE FP2
|
555 |
|
|
faddx %fp1,%fp0 // ...FP0 IS Y-F = (2-F)+2Z
|
556 |
|
|
lea LOGTBL,%a0 // ...A0 IS ADDRESS OF 1/F
|
557 |
|
|
addal %d0,%a0
|
558 |
|
|
fmoves negone,%fp1 // ...FP1 IS K = -1
|
559 |
|
|
bra LP1CONT1
|
560 |
|
|
|
561 |
|
|
KISZERO:
|
562 |
|
|
fmoves one,%fp0
|
563 |
|
|
movel #0x3fff0000,F(%a6)
|
564 |
|
|
clrl F+8(%a6)
|
565 |
|
|
fsubx F(%a6),%fp0 // ...1-F
|
566 |
|
|
movel FFRAC(%a6),%d0
|
567 |
|
|
andil #0x7E000000,%d0
|
568 |
|
|
asrl #8,%d0
|
569 |
|
|
asrl #8,%d0
|
570 |
|
|
asrl #4,%d0
|
571 |
|
|
faddx %fp1,%fp0 // ...FP0 IS Y-F
|
572 |
|
|
fmovemx %fp2-%fp2/%fp3,-(%sp) // ...FP2 SAVED
|
573 |
|
|
lea LOGTBL,%a0
|
574 |
|
|
addal %d0,%a0 // ...A0 IS ADDRESS OF 1/F
|
575 |
|
|
fmoves zero,%fp1 // ...FP1 IS K = 0
|
576 |
|
|
bra LP1CONT1
|
577 |
|
|
|
578 |
|
|
LP1NEG0:
|
579 |
|
|
//--FPCR SAVED. D0 IS X IN COMPACT FORM.
|
580 |
|
|
cmpil #0,%d0
|
581 |
|
|
blts LP1NEG
|
582 |
|
|
LP1ZERO:
|
583 |
|
|
fmoves negone,%fp0
|
584 |
|
|
|
585 |
|
|
fmovel %d1,%fpcr
|
586 |
|
|
bra t_dz
|
587 |
|
|
|
588 |
|
|
LP1NEG:
|
589 |
|
|
fmoves zero,%fp0
|
590 |
|
|
|
591 |
|
|
fmovel %d1,%fpcr
|
592 |
|
|
bra t_operr
|
593 |
|
|
|
594 |
|
|
|end
|