OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

[/] [or1k/] [trunk/] [linux/] [linux-2.4/] [arch/] [m68k/] [fpsp040/] [slogn.S] - Blame information for rev 1765

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 1275 phoenix
|
2
|       slogn.sa 3.1 12/10/90
3
|
4
|       slogn computes the natural logarithm of an
5
|       input value. slognd does the same except the input value is a
6
|       denormalized number. slognp1 computes log(1+X), and slognp1d
7
|       computes log(1+X) for denormalized X.
8
|
9
|       Input: Double-extended value in memory location pointed to by address
10
|               register a0.
11
|
12
|       Output: log(X) or log(1+X) returned in floating-point register Fp0.
13
|
14
|       Accuracy and Monotonicity: The returned result is within 2 ulps in
15
|               64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
16
|               result is subsequently rounded to double precision. The
17
|               result is provably monotonic in double precision.
18
|
19
|       Speed: The program slogn takes approximately 190 cycles for input
20
|               argument X such that |X-1| >= 1/16, which is the usual
21
|               situation. For those arguments, slognp1 takes approximately
22
|                210 cycles. For the less common arguments, the program will
23
|                run no worse than 10% slower.
24
|
25
|       Algorithm:
26
|       LOGN:
27
|       Step 1. If |X-1| < 1/16, approximate log(X) by an odd polynomial in
28
|               u, where u = 2(X-1)/(X+1). Otherwise, move on to Step 2.
29
|
30
|       Step 2. X = 2**k * Y where 1 <= Y < 2. Define F to be the first seven
31
|               significant bits of Y plus 2**(-7), i.e. F = 1.xxxxxx1 in base
32
|               2 where the six "x" match those of Y. Note that |Y-F| <= 2**(-7).
33
|
34
|       Step 3. Define u = (Y-F)/F. Approximate log(1+u) by a polynomial in u,
35
|               log(1+u) = poly.
36
|
37
|       Step 4. Reconstruct log(X) = log( 2**k * Y ) = k*log(2) + log(F) + log(1+u)
38
|               by k*log(2) + (log(F) + poly). The values of log(F) are calculated
39
|               beforehand and stored in the program.
40
|
41
|       lognp1:
42
|       Step 1: If |X| < 1/16, approximate log(1+X) by an odd polynomial in
43
|               u where u = 2X/(2+X). Otherwise, move on to Step 2.
44
|
45
|       Step 2: Let 1+X = 2**k * Y, where 1 <= Y < 2. Define F as done in Step 2
46
|               of the algorithm for LOGN and compute log(1+X) as
47
|               k*log(2) + log(F) + poly where poly approximates log(1+u),
48
|               u = (Y-F)/F.
49
|
50
|       Implementation Notes:
51
|       Note 1. There are 64 different possible values for F, thus 64 log(F)'s
52
|               need to be tabulated. Moreover, the values of 1/F are also
53
|               tabulated so that the division in (Y-F)/F can be performed by a
54
|               multiplication.
55
|
56
|       Note 2. In Step 2 of lognp1, in order to preserved accuracy, the value
57
|               Y-F has to be calculated carefully when 1/2 <= X < 3/2.
58
|
59
|       Note 3. To fully exploit the pipeline, polynomials are usually separated
60
|               into two parts evaluated independently before being added up.
61
|
62
 
63
|               Copyright (C) Motorola, Inc. 1990
64
|                       All Rights Reserved
65
|
66
|       THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
67
|       The copyright notice above does not evidence any
68
|       actual or intended publication of such source code.
69
 
70
|slogn  idnt    2,1 | Motorola 040 Floating Point Software Package
71
 
72
        |section        8
73
 
74
        .include "fpsp.h"
75
 
76
BOUNDS1:  .long 0x3FFEF07D,0x3FFF8841
77
BOUNDS2:  .long 0x3FFE8000,0x3FFFC000
78
 
79
LOGOF2: .long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
80
 
81
one:    .long 0x3F800000
82
zero:   .long 0x00000000
83
infty:  .long 0x7F800000
84
negone: .long 0xBF800000
85
 
86
LOGA6:  .long 0x3FC2499A,0xB5E4040B
87
LOGA5:  .long 0xBFC555B5,0x848CB7DB
88
 
89
LOGA4:  .long 0x3FC99999,0x987D8730
90
LOGA3:  .long 0xBFCFFFFF,0xFF6F7E97
91
 
92
LOGA2:  .long 0x3FD55555,0x555555a4
93
LOGA1:  .long 0xBFE00000,0x00000008
94
 
95
LOGB5:  .long 0x3F175496,0xADD7DAD6
96
LOGB4:  .long 0x3F3C71C2,0xFE80C7E0
97
 
98
LOGB3:  .long 0x3F624924,0x928BCCFF
99
LOGB2:  .long 0x3F899999,0x999995EC
100
 
101
LOGB1:  .long 0x3FB55555,0x55555555
102
TWO:    .long 0x40000000,0x00000000
103
 
104
LTHOLD: .long 0x3f990000,0x80000000,0x00000000,0x00000000
105
 
106
LOGTBL:
107
        .long  0x3FFE0000,0xFE03F80F,0xE03F80FE,0x00000000
108
        .long  0x3FF70000,0xFF015358,0x833C47E2,0x00000000
109
        .long  0x3FFE0000,0xFA232CF2,0x52138AC0,0x00000000
110
        .long  0x3FF90000,0xBDC8D83E,0xAD88D549,0x00000000
111
        .long  0x3FFE0000,0xF6603D98,0x0F6603DA,0x00000000
112
        .long  0x3FFA0000,0x9CF43DCF,0xF5EAFD48,0x00000000
113
        .long  0x3FFE0000,0xF2B9D648,0x0F2B9D65,0x00000000
114
        .long  0x3FFA0000,0xDA16EB88,0xCB8DF614,0x00000000
115
        .long  0x3FFE0000,0xEF2EB71F,0xC4345238,0x00000000
116
        .long  0x3FFB0000,0x8B29B775,0x1BD70743,0x00000000
117
        .long  0x3FFE0000,0xEBBDB2A5,0xC1619C8C,0x00000000
118
        .long  0x3FFB0000,0xA8D839F8,0x30C1FB49,0x00000000
119
        .long  0x3FFE0000,0xE865AC7B,0x7603A197,0x00000000
120
        .long  0x3FFB0000,0xC61A2EB1,0x8CD907AD,0x00000000
121
        .long  0x3FFE0000,0xE525982A,0xF70C880E,0x00000000
122
        .long  0x3FFB0000,0xE2F2A47A,0xDE3A18AF,0x00000000
123
        .long  0x3FFE0000,0xE1FC780E,0x1FC780E2,0x00000000
124
        .long  0x3FFB0000,0xFF64898E,0xDF55D551,0x00000000
125
        .long  0x3FFE0000,0xDEE95C4C,0xA037BA57,0x00000000
126
        .long  0x3FFC0000,0x8DB956A9,0x7B3D0148,0x00000000
127
        .long  0x3FFE0000,0xDBEB61EE,0xD19C5958,0x00000000
128
        .long  0x3FFC0000,0x9B8FE100,0xF47BA1DE,0x00000000
129
        .long  0x3FFE0000,0xD901B203,0x6406C80E,0x00000000
130
        .long  0x3FFC0000,0xA9372F1D,0x0DA1BD17,0x00000000
131
        .long  0x3FFE0000,0xD62B80D6,0x2B80D62C,0x00000000
132
        .long  0x3FFC0000,0xB6B07F38,0xCE90E46B,0x00000000
133
        .long  0x3FFE0000,0xD3680D36,0x80D3680D,0x00000000
134
        .long  0x3FFC0000,0xC3FD0329,0x06488481,0x00000000
135
        .long  0x3FFE0000,0xD0B69FCB,0xD2580D0B,0x00000000
136
        .long  0x3FFC0000,0xD11DE0FF,0x15AB18CA,0x00000000
137
        .long  0x3FFE0000,0xCE168A77,0x25080CE1,0x00000000
138
        .long  0x3FFC0000,0xDE1433A1,0x6C66B150,0x00000000
139
        .long  0x3FFE0000,0xCB8727C0,0x65C393E0,0x00000000
140
        .long  0x3FFC0000,0xEAE10B5A,0x7DDC8ADD,0x00000000
141
        .long  0x3FFE0000,0xC907DA4E,0x871146AD,0x00000000
142
        .long  0x3FFC0000,0xF7856E5E,0xE2C9B291,0x00000000
143
        .long  0x3FFE0000,0xC6980C69,0x80C6980C,0x00000000
144
        .long  0x3FFD0000,0x82012CA5,0xA68206D7,0x00000000
145
        .long  0x3FFE0000,0xC4372F85,0x5D824CA6,0x00000000
146
        .long  0x3FFD0000,0x882C5FCD,0x7256A8C5,0x00000000
147
        .long  0x3FFE0000,0xC1E4BBD5,0x95F6E947,0x00000000
148
        .long  0x3FFD0000,0x8E44C60B,0x4CCFD7DE,0x00000000
149
        .long  0x3FFE0000,0xBFA02FE8,0x0BFA02FF,0x00000000
150
        .long  0x3FFD0000,0x944AD09E,0xF4351AF6,0x00000000
151
        .long  0x3FFE0000,0xBD691047,0x07661AA3,0x00000000
152
        .long  0x3FFD0000,0x9A3EECD4,0xC3EAA6B2,0x00000000
153
        .long  0x3FFE0000,0xBB3EE721,0xA54D880C,0x00000000
154
        .long  0x3FFD0000,0xA0218434,0x353F1DE8,0x00000000
155
        .long  0x3FFE0000,0xB92143FA,0x36F5E02E,0x00000000
156
        .long  0x3FFD0000,0xA5F2FCAB,0xBBC506DA,0x00000000
157
        .long  0x3FFE0000,0xB70FBB5A,0x19BE3659,0x00000000
158
        .long  0x3FFD0000,0xABB3B8BA,0x2AD362A5,0x00000000
159
        .long  0x3FFE0000,0xB509E68A,0x9B94821F,0x00000000
160
        .long  0x3FFD0000,0xB1641795,0xCE3CA97B,0x00000000
161
        .long  0x3FFE0000,0xB30F6352,0x8917C80B,0x00000000
162
        .long  0x3FFD0000,0xB7047551,0x5D0F1C61,0x00000000
163
        .long  0x3FFE0000,0xB11FD3B8,0x0B11FD3C,0x00000000
164
        .long  0x3FFD0000,0xBC952AFE,0xEA3D13E1,0x00000000
165
        .long  0x3FFE0000,0xAF3ADDC6,0x80AF3ADE,0x00000000
166
        .long  0x3FFD0000,0xC2168ED0,0xF458BA4A,0x00000000
167
        .long  0x3FFE0000,0xAD602B58,0x0AD602B6,0x00000000
168
        .long  0x3FFD0000,0xC788F439,0xB3163BF1,0x00000000
169
        .long  0x3FFE0000,0xAB8F69E2,0x8359CD11,0x00000000
170
        .long  0x3FFD0000,0xCCECAC08,0xBF04565D,0x00000000
171
        .long  0x3FFE0000,0xA9C84A47,0xA07F5638,0x00000000
172
        .long  0x3FFD0000,0xD2420487,0x2DD85160,0x00000000
173
        .long  0x3FFE0000,0xA80A80A8,0x0A80A80B,0x00000000
174
        .long  0x3FFD0000,0xD7894992,0x3BC3588A,0x00000000
175
        .long  0x3FFE0000,0xA655C439,0x2D7B73A8,0x00000000
176
        .long  0x3FFD0000,0xDCC2C4B4,0x9887DACC,0x00000000
177
        .long  0x3FFE0000,0xA4A9CF1D,0x96833751,0x00000000
178
        .long  0x3FFD0000,0xE1EEBD3E,0x6D6A6B9E,0x00000000
179
        .long  0x3FFE0000,0xA3065E3F,0xAE7CD0E0,0x00000000
180
        .long  0x3FFD0000,0xE70D785C,0x2F9F5BDC,0x00000000
181
        .long  0x3FFE0000,0xA16B312E,0xA8FC377D,0x00000000
182
        .long  0x3FFD0000,0xEC1F392C,0x5179F283,0x00000000
183
        .long  0x3FFE0000,0x9FD809FD,0x809FD80A,0x00000000
184
        .long  0x3FFD0000,0xF12440D3,0xE36130E6,0x00000000
185
        .long  0x3FFE0000,0x9E4CAD23,0xDD5F3A20,0x00000000
186
        .long  0x3FFD0000,0xF61CCE92,0x346600BB,0x00000000
187
        .long  0x3FFE0000,0x9CC8E160,0xC3FB19B9,0x00000000
188
        .long  0x3FFD0000,0xFB091FD3,0x8145630A,0x00000000
189
        .long  0x3FFE0000,0x9B4C6F9E,0xF03A3CAA,0x00000000
190
        .long  0x3FFD0000,0xFFE97042,0xBFA4C2AD,0x00000000
191
        .long  0x3FFE0000,0x99D722DA,0xBDE58F06,0x00000000
192
        .long  0x3FFE0000,0x825EFCED,0x49369330,0x00000000
193
        .long  0x3FFE0000,0x9868C809,0x868C8098,0x00000000
194
        .long  0x3FFE0000,0x84C37A7A,0xB9A905C9,0x00000000
195
        .long  0x3FFE0000,0x97012E02,0x5C04B809,0x00000000
196
        .long  0x3FFE0000,0x87224C2E,0x8E645FB7,0x00000000
197
        .long  0x3FFE0000,0x95A02568,0x095A0257,0x00000000
198
        .long  0x3FFE0000,0x897B8CAC,0x9F7DE298,0x00000000
199
        .long  0x3FFE0000,0x94458094,0x45809446,0x00000000
200
        .long  0x3FFE0000,0x8BCF55DE,0xC4CD05FE,0x00000000
201
        .long  0x3FFE0000,0x92F11384,0x0497889C,0x00000000
202
        .long  0x3FFE0000,0x8E1DC0FB,0x89E125E5,0x00000000
203
        .long  0x3FFE0000,0x91A2B3C4,0xD5E6F809,0x00000000
204
        .long  0x3FFE0000,0x9066E68C,0x955B6C9B,0x00000000
205
        .long  0x3FFE0000,0x905A3863,0x3E06C43B,0x00000000
206
        .long  0x3FFE0000,0x92AADE74,0xC7BE59E0,0x00000000
207
        .long  0x3FFE0000,0x8F1779D9,0xFDC3A219,0x00000000
208
        .long  0x3FFE0000,0x94E9BFF6,0x15845643,0x00000000
209
        .long  0x3FFE0000,0x8DDA5202,0x37694809,0x00000000
210
        .long  0x3FFE0000,0x9723A1B7,0x20134203,0x00000000
211
        .long  0x3FFE0000,0x8CA29C04,0x6514E023,0x00000000
212
        .long  0x3FFE0000,0x995899C8,0x90EB8990,0x00000000
213
        .long  0x3FFE0000,0x8B70344A,0x139BC75A,0x00000000
214
        .long  0x3FFE0000,0x9B88BDAA,0x3A3DAE2F,0x00000000
215
        .long  0x3FFE0000,0x8A42F870,0x5669DB46,0x00000000
216
        .long  0x3FFE0000,0x9DB4224F,0xFFE1157C,0x00000000
217
        .long  0x3FFE0000,0x891AC73A,0xE9819B50,0x00000000
218
        .long  0x3FFE0000,0x9FDADC26,0x8B7A12DA,0x00000000
219
        .long  0x3FFE0000,0x87F78087,0xF78087F8,0x00000000
220
        .long  0x3FFE0000,0xA1FCFF17,0xCE733BD4,0x00000000
221
        .long  0x3FFE0000,0x86D90544,0x7A34ACC6,0x00000000
222
        .long  0x3FFE0000,0xA41A9E8F,0x5446FB9F,0x00000000
223
        .long  0x3FFE0000,0x85BF3761,0x2CEE3C9B,0x00000000
224
        .long  0x3FFE0000,0xA633CD7E,0x6771CD8B,0x00000000
225
        .long  0x3FFE0000,0x84A9F9C8,0x084A9F9D,0x00000000
226
        .long  0x3FFE0000,0xA8489E60,0x0B435A5E,0x00000000
227
        .long  0x3FFE0000,0x83993052,0x3FBE3368,0x00000000
228
        .long  0x3FFE0000,0xAA59233C,0xCCA4BD49,0x00000000
229
        .long  0x3FFE0000,0x828CBFBE,0xB9A020A3,0x00000000
230
        .long  0x3FFE0000,0xAC656DAE,0x6BCC4985,0x00000000
231
        .long  0x3FFE0000,0x81848DA8,0xFAF0D277,0x00000000
232
        .long  0x3FFE0000,0xAE6D8EE3,0x60BB2468,0x00000000
233
        .long  0x3FFE0000,0x80808080,0x80808081,0x00000000
234
        .long  0x3FFE0000,0xB07197A2,0x3C46C654,0x00000000
235
 
236
        .set    ADJK,L_SCR1
237
 
238
        .set    X,FP_SCR1
239
        .set    XDCARE,X+2
240
        .set    XFRAC,X+4
241
 
242
        .set    F,FP_SCR2
243
        .set    FFRAC,F+4
244
 
245
        .set    KLOG2,FP_SCR3
246
 
247
        .set    SAVEU,FP_SCR4
248
 
249
        | xref  t_frcinx
250
        |xref   t_extdnrm
251
        |xref   t_operr
252
        |xref   t_dz
253
 
254
        .global slognd
255
slognd:
256
|--ENTRY POINT FOR LOG(X) FOR DENORMALIZED INPUT
257
 
258
        movel           #-100,ADJK(%a6) | ...INPUT = 2^(ADJK) * FP0
259
 
260
|----normalize the input value by left shifting k bits (k to be determined
261
|----below), adjusting exponent and storing -k to  ADJK
262
|----the value TWOTO100 is no longer needed.
263
|----Note that this code assumes the denormalized input is NON-ZERO.
264
 
265
     moveml     %d2-%d7,-(%a7)          | ...save some registers
266
     movel      #0x00000000,%d3         | ...D3 is exponent of smallest norm. #
267
     movel      4(%a0),%d4
268
     movel      8(%a0),%d5              | ...(D4,D5) is (Hi_X,Lo_X)
269
     clrl       %d2                     | ...D2 used for holding K
270
 
271
     tstl       %d4
272
     bnes       HiX_not0
273
 
274
HiX_0:
275
     movel      %d5,%d4
276
     clrl       %d5
277
     movel      #32,%d2
278
     clrl       %d6
279
     bfffo      %d4{#0:#32},%d6
280
     lsll      %d6,%d4
281
     addl       %d6,%d2                 | ...(D3,D4,D5) is normalized
282
 
283
     movel      %d3,X(%a6)
284
     movel      %d4,XFRAC(%a6)
285
     movel      %d5,XFRAC+4(%a6)
286
     negl       %d2
287
     movel      %d2,ADJK(%a6)
288
     fmovex     X(%a6),%fp0
289
     moveml     (%a7)+,%d2-%d7          | ...restore registers
290
     lea        X(%a6),%a0
291
     bras       LOGBGN                  | ...begin regular log(X)
292
 
293
 
294
HiX_not0:
295
     clrl       %d6
296
     bfffo      %d4{#0:#32},%d6         | ...find first 1
297
     movel      %d6,%d2                 | ...get k
298
     lsll       %d6,%d4
299
     movel      %d5,%d7                 | ...a copy of D5
300
     lsll       %d6,%d5
301
     negl       %d6
302
     addil      #32,%d6
303
     lsrl       %d6,%d7
304
     orl        %d7,%d4                 | ...(D3,D4,D5) normalized
305
 
306
     movel      %d3,X(%a6)
307
     movel      %d4,XFRAC(%a6)
308
     movel      %d5,XFRAC+4(%a6)
309
     negl       %d2
310
     movel      %d2,ADJK(%a6)
311
     fmovex     X(%a6),%fp0
312
     moveml     (%a7)+,%d2-%d7          | ...restore registers
313
     lea        X(%a6),%a0
314
     bras       LOGBGN                  | ...begin regular log(X)
315
 
316
 
317
        .global slogn
318
slogn:
319
|--ENTRY POINT FOR LOG(X) FOR X FINITE, NON-ZERO, NOT NAN'S
320
 
321
        fmovex          (%a0),%fp0      | ...LOAD INPUT
322
        movel           #0x00000000,ADJK(%a6)
323
 
324
LOGBGN:
325
|--FPCR SAVED AND CLEARED, INPUT IS 2^(ADJK)*FP0, FP0 CONTAINS
326
|--A FINITE, NON-ZERO, NORMALIZED NUMBER.
327
 
328
        movel   (%a0),%d0
329
        movew   4(%a0),%d0
330
 
331
        movel   (%a0),X(%a6)
332
        movel   4(%a0),X+4(%a6)
333
        movel   8(%a0),X+8(%a6)
334
 
335
        cmpil   #0,%d0          | ...CHECK IF X IS NEGATIVE
336
        blt     LOGNEG          | ...LOG OF NEGATIVE ARGUMENT IS INVALID
337
        cmp2l   BOUNDS1,%d0     | ...X IS POSITIVE, CHECK IF X IS NEAR 1
338
        bcc     LOGNEAR1        | ...BOUNDS IS ROUGHLY [15/16, 17/16]
339
 
340
LOGMAIN:
341
|--THIS SHOULD BE THE USUAL CASE, X NOT VERY CLOSE TO 1
342
 
343
|--X = 2^(K) * Y, 1 <= Y < 2. THUS, Y = 1.XXXXXXXX....XX IN BINARY.
344
|--WE DEFINE F = 1.XXXXXX1, I.E. FIRST 7 BITS OF Y AND ATTACH A 1.
345
|--THE IDEA IS THAT LOG(X) = K*LOG2 + LOG(Y)
346
|--                      = K*LOG2 + LOG(F) + LOG(1 + (Y-F)/F).
347
|--NOTE THAT U = (Y-F)/F IS VERY SMALL AND THUS APPROXIMATING
348
|--LOG(1+U) CAN BE VERY EFFICIENT.
349
|--ALSO NOTE THAT THE VALUE 1/F IS STORED IN A TABLE SO THAT NO
350
|--DIVISION IS NEEDED TO CALCULATE (Y-F)/F.
351
 
352
|--GET K, Y, F, AND ADDRESS OF 1/F.
353
        asrl    #8,%d0
354
        asrl    #8,%d0          | ...SHIFTED 16 BITS, BIASED EXPO. OF X
355
        subil   #0x3FFF,%d0     | ...THIS IS K
356
        addl    ADJK(%a6),%d0   | ...ADJUST K, ORIGINAL INPUT MAY BE  DENORM.
357
        lea     LOGTBL,%a0      | ...BASE ADDRESS OF 1/F AND LOG(F)
358
        fmovel  %d0,%fp1                | ...CONVERT K TO FLOATING-POINT FORMAT
359
 
360
|--WHILE THE CONVERSION IS GOING ON, WE GET F AND ADDRESS OF 1/F
361
        movel   #0x3FFF0000,X(%a6)      | ...X IS NOW Y, I.E. 2^(-K)*X
362
        movel   XFRAC(%a6),FFRAC(%a6)
363
        andil   #0xFE000000,FFRAC(%a6) | ...FIRST 7 BITS OF Y
364
        oril    #0x01000000,FFRAC(%a6) | ...GET F: ATTACH A 1 AT THE EIGHTH BIT
365
        movel   FFRAC(%a6),%d0  | ...READY TO GET ADDRESS OF 1/F
366
        andil   #0x7E000000,%d0
367
        asrl    #8,%d0
368
        asrl    #8,%d0
369
        asrl    #4,%d0          | ...SHIFTED 20, D0 IS THE DISPLACEMENT
370
        addal   %d0,%a0         | ...A0 IS THE ADDRESS FOR 1/F
371
 
372
        fmovex  X(%a6),%fp0
373
        movel   #0x3fff0000,F(%a6)
374
        clrl    F+8(%a6)
375
        fsubx   F(%a6),%fp0             | ...Y-F
376
        fmovemx %fp2-%fp2/%fp3,-(%sp)   | ...SAVE FP2 WHILE FP0 IS NOT READY
377
|--SUMMARY: FP0 IS Y-F, A0 IS ADDRESS OF 1/F, FP1 IS K
378
|--REGISTERS SAVED: FPCR, FP1, FP2
379
 
380
LP1CONT1:
381
|--AN RE-ENTRY POINT FOR LOGNP1
382
        fmulx   (%a0),%fp0      | ...FP0 IS U = (Y-F)/F
383
        fmulx   LOGOF2,%fp1     | ...GET K*LOG2 WHILE FP0 IS NOT READY
384
        fmovex  %fp0,%fp2
385
        fmulx   %fp2,%fp2               | ...FP2 IS V=U*U
386
        fmovex  %fp1,KLOG2(%a6) | ...PUT K*LOG2 IN MEMORY, FREE FP1
387
 
388
|--LOG(1+U) IS APPROXIMATED BY
389
|--U + V*(A1+U*(A2+U*(A3+U*(A4+U*(A5+U*A6))))) WHICH IS
390
|--[U + V*(A1+V*(A3+V*A5))]  +  [U*V*(A2+V*(A4+V*A6))]
391
 
392
        fmovex  %fp2,%fp3
393
        fmovex  %fp2,%fp1
394
 
395
        fmuld   LOGA6,%fp1      | ...V*A6
396
        fmuld   LOGA5,%fp2      | ...V*A5
397
 
398
        faddd   LOGA4,%fp1      | ...A4+V*A6
399
        faddd   LOGA3,%fp2      | ...A3+V*A5
400
 
401
        fmulx   %fp3,%fp1               | ...V*(A4+V*A6)
402
        fmulx   %fp3,%fp2               | ...V*(A3+V*A5)
403
 
404
        faddd   LOGA2,%fp1      | ...A2+V*(A4+V*A6)
405
        faddd   LOGA1,%fp2      | ...A1+V*(A3+V*A5)
406
 
407
        fmulx   %fp3,%fp1               | ...V*(A2+V*(A4+V*A6))
408
        addal   #16,%a0         | ...ADDRESS OF LOG(F)
409
        fmulx   %fp3,%fp2               | ...V*(A1+V*(A3+V*A5)), FP3 RELEASED
410
 
411
        fmulx   %fp0,%fp1               | ...U*V*(A2+V*(A4+V*A6))
412
        faddx   %fp2,%fp0               | ...U+V*(A1+V*(A3+V*A5)), FP2 RELEASED
413
 
414
        faddx   (%a0),%fp1      | ...LOG(F)+U*V*(A2+V*(A4+V*A6))
415
        fmovemx  (%sp)+,%fp2-%fp2/%fp3  | ...RESTORE FP2
416
        faddx   %fp1,%fp0               | ...FP0 IS LOG(F) + LOG(1+U)
417
 
418
        fmovel  %d1,%fpcr
419
        faddx   KLOG2(%a6),%fp0 | ...FINAL ADD
420
        bra     t_frcinx
421
 
422
 
423
LOGNEAR1:
424
|--REGISTERS SAVED: FPCR, FP1. FP0 CONTAINS THE INPUT.
425
        fmovex  %fp0,%fp1
426
        fsubs   one,%fp1                | ...FP1 IS X-1
427
        fadds   one,%fp0                | ...FP0 IS X+1
428
        faddx   %fp1,%fp1               | ...FP1 IS 2(X-1)
429
|--LOG(X) = LOG(1+U/2)-LOG(1-U/2) WHICH IS AN ODD POLYNOMIAL
430
|--IN U, U = 2(X-1)/(X+1) = FP1/FP0
431
 
432
LP1CONT2:
433
|--THIS IS AN RE-ENTRY POINT FOR LOGNP1
434
        fdivx   %fp0,%fp1               | ...FP1 IS U
435
        fmovemx %fp2-%fp2/%fp3,-(%sp)    | ...SAVE FP2
436
|--REGISTERS SAVED ARE NOW FPCR,FP1,FP2,FP3
437
|--LET V=U*U, W=V*V, CALCULATE
438
|--U + U*V*(B1 + V*(B2 + V*(B3 + V*(B4 + V*B5)))) BY
439
|--U + U*V*(  [B1 + W*(B3 + W*B5)]  +  [V*(B2 + W*B4)]  )
440
        fmovex  %fp1,%fp0
441
        fmulx   %fp0,%fp0       | ...FP0 IS V
442
        fmovex  %fp1,SAVEU(%a6) | ...STORE U IN MEMORY, FREE FP1
443
        fmovex  %fp0,%fp1
444
        fmulx   %fp1,%fp1       | ...FP1 IS W
445
 
446
        fmoved  LOGB5,%fp3
447
        fmoved  LOGB4,%fp2
448
 
449
        fmulx   %fp1,%fp3       | ...W*B5
450
        fmulx   %fp1,%fp2       | ...W*B4
451
 
452
        faddd   LOGB3,%fp3 | ...B3+W*B5
453
        faddd   LOGB2,%fp2 | ...B2+W*B4
454
 
455
        fmulx   %fp3,%fp1       | ...W*(B3+W*B5), FP3 RELEASED
456
 
457
        fmulx   %fp0,%fp2       | ...V*(B2+W*B4)
458
 
459
        faddd   LOGB1,%fp1 | ...B1+W*(B3+W*B5)
460
        fmulx   SAVEU(%a6),%fp0 | ...FP0 IS U*V
461
 
462
        faddx   %fp2,%fp1       | ...B1+W*(B3+W*B5) + V*(B2+W*B4), FP2 RELEASED
463
        fmovemx (%sp)+,%fp2-%fp2/%fp3 | ...FP2 RESTORED
464
 
465
        fmulx   %fp1,%fp0       | ...U*V*( [B1+W*(B3+W*B5)] + [V*(B2+W*B4)] )
466
 
467
        fmovel  %d1,%fpcr
468
        faddx   SAVEU(%a6),%fp0
469
        bra     t_frcinx
470
        rts
471
 
472
LOGNEG:
473
|--REGISTERS SAVED FPCR. LOG(-VE) IS INVALID
474
        bra     t_operr
475
 
476
        .global slognp1d
477
slognp1d:
478
|--ENTRY POINT FOR LOG(1+Z) FOR DENORMALIZED INPUT
479
| Simply return the denorm
480
 
481
        bra     t_extdnrm
482
 
483
        .global slognp1
484
slognp1:
485
|--ENTRY POINT FOR LOG(1+X) FOR X FINITE, NON-ZERO, NOT NAN'S
486
 
487
        fmovex  (%a0),%fp0      | ...LOAD INPUT
488
        fabsx   %fp0            |test magnitude
489
        fcmpx   LTHOLD,%fp0     |compare with min threshold
490
        fbgt    LP1REAL         |if greater, continue
491
        fmovel  #0,%fpsr                |clr N flag from compare
492
        fmovel  %d1,%fpcr
493
        fmovex  (%a0),%fp0      |return signed argument
494
        bra     t_frcinx
495
 
496
LP1REAL:
497
        fmovex  (%a0),%fp0      | ...LOAD INPUT
498
        movel   #0x00000000,ADJK(%a6)
499
        fmovex  %fp0,%fp1       | ...FP1 IS INPUT Z
500
        fadds   one,%fp0        | ...X := ROUND(1+Z)
501
        fmovex  %fp0,X(%a6)
502
        movew   XFRAC(%a6),XDCARE(%a6)
503
        movel   X(%a6),%d0
504
        cmpil   #0,%d0
505
        ble     LP1NEG0 | ...LOG OF ZERO OR -VE
506
        cmp2l   BOUNDS2,%d0
507
        bcs     LOGMAIN | ...BOUNDS2 IS [1/2,3/2]
508
|--IF 1+Z > 3/2 OR 1+Z < 1/2, THEN X, WHICH IS ROUNDING 1+Z,
509
|--CONTAINS AT LEAST 63 BITS OF INFORMATION OF Z. IN THAT CASE,
510
|--SIMPLY INVOKE LOG(X) FOR LOG(1+Z).
511
 
512
LP1NEAR1:
513
|--NEXT SEE IF EXP(-1/16) < X < EXP(1/16)
514
        cmp2l   BOUNDS1,%d0
515
        bcss    LP1CARE
516
 
517
LP1ONE16:
518
|--EXP(-1/16) < X < EXP(1/16). LOG(1+Z) = LOG(1+U/2) - LOG(1-U/2)
519
|--WHERE U = 2Z/(2+Z) = 2Z/(1+X).
520
        faddx   %fp1,%fp1       | ...FP1 IS 2Z
521
        fadds   one,%fp0        | ...FP0 IS 1+X
522
|--U = FP1/FP0
523
        bra     LP1CONT2
524
 
525
LP1CARE:
526
|--HERE WE USE THE USUAL TABLE DRIVEN APPROACH. CARE HAS TO BE
527
|--TAKEN BECAUSE 1+Z CAN HAVE 67 BITS OF INFORMATION AND WE MUST
528
|--PRESERVE ALL THE INFORMATION. BECAUSE 1+Z IS IN [1/2,3/2],
529
|--THERE ARE ONLY TWO CASES.
530
|--CASE 1: 1+Z < 1, THEN K = -1 AND Y-F = (2-F) + 2Z
531
|--CASE 2: 1+Z > 1, THEN K = 0  AND Y-F = (1-F) + Z
532
|--ON RETURNING TO LP1CONT1, WE MUST HAVE K IN FP1, ADDRESS OF
533
|--(1/F) IN A0, Y-F IN FP0, AND FP2 SAVED.
534
 
535
        movel   XFRAC(%a6),FFRAC(%a6)
536
        andil   #0xFE000000,FFRAC(%a6)
537
        oril    #0x01000000,FFRAC(%a6)  | ...F OBTAINED
538
        cmpil   #0x3FFF8000,%d0 | ...SEE IF 1+Z > 1
539
        bges    KISZERO
540
 
541
KISNEG1:
542
        fmoves  TWO,%fp0
543
        movel   #0x3fff0000,F(%a6)
544
        clrl    F+8(%a6)
545
        fsubx   F(%a6),%fp0     | ...2-F
546
        movel   FFRAC(%a6),%d0
547
        andil   #0x7E000000,%d0
548
        asrl    #8,%d0
549
        asrl    #8,%d0
550
        asrl    #4,%d0          | ...D0 CONTAINS DISPLACEMENT FOR 1/F
551
        faddx   %fp1,%fp1               | ...GET 2Z
552
        fmovemx %fp2-%fp2/%fp3,-(%sp)   | ...SAVE FP2
553
        faddx   %fp1,%fp0               | ...FP0 IS Y-F = (2-F)+2Z
554
        lea     LOGTBL,%a0      | ...A0 IS ADDRESS OF 1/F
555
        addal   %d0,%a0
556
        fmoves  negone,%fp1     | ...FP1 IS K = -1
557
        bra     LP1CONT1
558
 
559
KISZERO:
560
        fmoves  one,%fp0
561
        movel   #0x3fff0000,F(%a6)
562
        clrl    F+8(%a6)
563
        fsubx   F(%a6),%fp0             | ...1-F
564
        movel   FFRAC(%a6),%d0
565
        andil   #0x7E000000,%d0
566
        asrl    #8,%d0
567
        asrl    #8,%d0
568
        asrl    #4,%d0
569
        faddx   %fp1,%fp0               | ...FP0 IS Y-F
570
        fmovemx %fp2-%fp2/%fp3,-(%sp)   | ...FP2 SAVED
571
        lea     LOGTBL,%a0
572
        addal   %d0,%a0         | ...A0 IS ADDRESS OF 1/F
573
        fmoves  zero,%fp1       | ...FP1 IS K = 0
574
        bra     LP1CONT1
575
 
576
LP1NEG0:
577
|--FPCR SAVED. D0 IS X IN COMPACT FORM.
578
        cmpil   #0,%d0
579
        blts    LP1NEG
580
LP1ZERO:
581
        fmoves  negone,%fp0
582
 
583
        fmovel  %d1,%fpcr
584
        bra t_dz
585
 
586
LP1NEG:
587
        fmoves  zero,%fp0
588
 
589
        fmovel  %d1,%fpcr
590
        bra     t_operr
591
 
592
        |end

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.