OpenCores
URL https://opencores.org/ocsvn/or1k_soc_on_altera_embedded_dev_kit/or1k_soc_on_altera_embedded_dev_kit/trunk

Subversion Repositories or1k_soc_on_altera_embedded_dev_kit

[/] [or1k_soc_on_altera_embedded_dev_kit/] [trunk/] [linux-2.6/] [linux-2.6.24/] [arch/] [m68k/] [fpsp040/] [slogn.S] - Blame information for rev 3

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 3 xianfeng
|
2
|       slogn.sa 3.1 12/10/90
3
|
4
|       slogn computes the natural logarithm of an
5
|       input value. slognd does the same except the input value is a
6
|       denormalized number. slognp1 computes log(1+X), and slognp1d
7
|       computes log(1+X) for denormalized X.
8
|
9
|       Input: Double-extended value in memory location pointed to by address
10
|               register a0.
11
|
12
|       Output: log(X) or log(1+X) returned in floating-point register Fp0.
13
|
14
|       Accuracy and Monotonicity: The returned result is within 2 ulps in
15
|               64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
16
|               result is subsequently rounded to double precision. The
17
|               result is provably monotonic in double precision.
18
|
19
|       Speed: The program slogn takes approximately 190 cycles for input
20
|               argument X such that |X-1| >= 1/16, which is the usual
21
|               situation. For those arguments, slognp1 takes approximately
22
|                210 cycles. For the less common arguments, the program will
23
|                run no worse than 10% slower.
24
|
25
|       Algorithm:
26
|       LOGN:
27
|       Step 1. If |X-1| < 1/16, approximate log(X) by an odd polynomial in
28
|               u, where u = 2(X-1)/(X+1). Otherwise, move on to Step 2.
29
|
30
|       Step 2. X = 2**k * Y where 1 <= Y < 2. Define F to be the first seven
31
|               significant bits of Y plus 2**(-7), i.e. F = 1.xxxxxx1 in base
32
|               2 where the six "x" match those of Y. Note that |Y-F| <= 2**(-7).
33
|
34
|       Step 3. Define u = (Y-F)/F. Approximate log(1+u) by a polynomial in u,
35
|               log(1+u) = poly.
36
|
37
|       Step 4. Reconstruct log(X) = log( 2**k * Y ) = k*log(2) + log(F) + log(1+u)
38
|               by k*log(2) + (log(F) + poly). The values of log(F) are calculated
39
|               beforehand and stored in the program.
40
|
41
|       lognp1:
42
|       Step 1: If |X| < 1/16, approximate log(1+X) by an odd polynomial in
43
|               u where u = 2X/(2+X). Otherwise, move on to Step 2.
44
|
45
|       Step 2: Let 1+X = 2**k * Y, where 1 <= Y < 2. Define F as done in Step 2
46
|               of the algorithm for LOGN and compute log(1+X) as
47
|               k*log(2) + log(F) + poly where poly approximates log(1+u),
48
|               u = (Y-F)/F.
49
|
50
|       Implementation Notes:
51
|       Note 1. There are 64 different possible values for F, thus 64 log(F)'s
52
|               need to be tabulated. Moreover, the values of 1/F are also
53
|               tabulated so that the division in (Y-F)/F can be performed by a
54
|               multiplication.
55
|
56
|       Note 2. In Step 2 of lognp1, in order to preserved accuracy, the value
57
|               Y-F has to be calculated carefully when 1/2 <= X < 3/2.
58
|
59
|       Note 3. To fully exploit the pipeline, polynomials are usually separated
60
|               into two parts evaluated independently before being added up.
61
|
62
 
63
|               Copyright (C) Motorola, Inc. 1990
64
|                       All Rights Reserved
65
|
66
|       For details on the license for this file, please see the
67
|       file, README, in this same directory.
68
 
69
|slogn  idnt    2,1 | Motorola 040 Floating Point Software Package
70
 
71
        |section        8
72
 
73
#include "fpsp.h"
74
 
75
BOUNDS1:  .long 0x3FFEF07D,0x3FFF8841
76
BOUNDS2:  .long 0x3FFE8000,0x3FFFC000
77
 
78
LOGOF2: .long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
79
 
80
one:    .long 0x3F800000
81
zero:   .long 0x00000000
82
infty:  .long 0x7F800000
83
negone: .long 0xBF800000
84
 
85
LOGA6:  .long 0x3FC2499A,0xB5E4040B
86
LOGA5:  .long 0xBFC555B5,0x848CB7DB
87
 
88
LOGA4:  .long 0x3FC99999,0x987D8730
89
LOGA3:  .long 0xBFCFFFFF,0xFF6F7E97
90
 
91
LOGA2:  .long 0x3FD55555,0x555555a4
92
LOGA1:  .long 0xBFE00000,0x00000008
93
 
94
LOGB5:  .long 0x3F175496,0xADD7DAD6
95
LOGB4:  .long 0x3F3C71C2,0xFE80C7E0
96
 
97
LOGB3:  .long 0x3F624924,0x928BCCFF
98
LOGB2:  .long 0x3F899999,0x999995EC
99
 
100
LOGB1:  .long 0x3FB55555,0x55555555
101
TWO:    .long 0x40000000,0x00000000
102
 
103
LTHOLD: .long 0x3f990000,0x80000000,0x00000000,0x00000000
104
 
105
LOGTBL:
106
        .long  0x3FFE0000,0xFE03F80F,0xE03F80FE,0x00000000
107
        .long  0x3FF70000,0xFF015358,0x833C47E2,0x00000000
108
        .long  0x3FFE0000,0xFA232CF2,0x52138AC0,0x00000000
109
        .long  0x3FF90000,0xBDC8D83E,0xAD88D549,0x00000000
110
        .long  0x3FFE0000,0xF6603D98,0x0F6603DA,0x00000000
111
        .long  0x3FFA0000,0x9CF43DCF,0xF5EAFD48,0x00000000
112
        .long  0x3FFE0000,0xF2B9D648,0x0F2B9D65,0x00000000
113
        .long  0x3FFA0000,0xDA16EB88,0xCB8DF614,0x00000000
114
        .long  0x3FFE0000,0xEF2EB71F,0xC4345238,0x00000000
115
        .long  0x3FFB0000,0x8B29B775,0x1BD70743,0x00000000
116
        .long  0x3FFE0000,0xEBBDB2A5,0xC1619C8C,0x00000000
117
        .long  0x3FFB0000,0xA8D839F8,0x30C1FB49,0x00000000
118
        .long  0x3FFE0000,0xE865AC7B,0x7603A197,0x00000000
119
        .long  0x3FFB0000,0xC61A2EB1,0x8CD907AD,0x00000000
120
        .long  0x3FFE0000,0xE525982A,0xF70C880E,0x00000000
121
        .long  0x3FFB0000,0xE2F2A47A,0xDE3A18AF,0x00000000
122
        .long  0x3FFE0000,0xE1FC780E,0x1FC780E2,0x00000000
123
        .long  0x3FFB0000,0xFF64898E,0xDF55D551,0x00000000
124
        .long  0x3FFE0000,0xDEE95C4C,0xA037BA57,0x00000000
125
        .long  0x3FFC0000,0x8DB956A9,0x7B3D0148,0x00000000
126
        .long  0x3FFE0000,0xDBEB61EE,0xD19C5958,0x00000000
127
        .long  0x3FFC0000,0x9B8FE100,0xF47BA1DE,0x00000000
128
        .long  0x3FFE0000,0xD901B203,0x6406C80E,0x00000000
129
        .long  0x3FFC0000,0xA9372F1D,0x0DA1BD17,0x00000000
130
        .long  0x3FFE0000,0xD62B80D6,0x2B80D62C,0x00000000
131
        .long  0x3FFC0000,0xB6B07F38,0xCE90E46B,0x00000000
132
        .long  0x3FFE0000,0xD3680D36,0x80D3680D,0x00000000
133
        .long  0x3FFC0000,0xC3FD0329,0x06488481,0x00000000
134
        .long  0x3FFE0000,0xD0B69FCB,0xD2580D0B,0x00000000
135
        .long  0x3FFC0000,0xD11DE0FF,0x15AB18CA,0x00000000
136
        .long  0x3FFE0000,0xCE168A77,0x25080CE1,0x00000000
137
        .long  0x3FFC0000,0xDE1433A1,0x6C66B150,0x00000000
138
        .long  0x3FFE0000,0xCB8727C0,0x65C393E0,0x00000000
139
        .long  0x3FFC0000,0xEAE10B5A,0x7DDC8ADD,0x00000000
140
        .long  0x3FFE0000,0xC907DA4E,0x871146AD,0x00000000
141
        .long  0x3FFC0000,0xF7856E5E,0xE2C9B291,0x00000000
142
        .long  0x3FFE0000,0xC6980C69,0x80C6980C,0x00000000
143
        .long  0x3FFD0000,0x82012CA5,0xA68206D7,0x00000000
144
        .long  0x3FFE0000,0xC4372F85,0x5D824CA6,0x00000000
145
        .long  0x3FFD0000,0x882C5FCD,0x7256A8C5,0x00000000
146
        .long  0x3FFE0000,0xC1E4BBD5,0x95F6E947,0x00000000
147
        .long  0x3FFD0000,0x8E44C60B,0x4CCFD7DE,0x00000000
148
        .long  0x3FFE0000,0xBFA02FE8,0x0BFA02FF,0x00000000
149
        .long  0x3FFD0000,0x944AD09E,0xF4351AF6,0x00000000
150
        .long  0x3FFE0000,0xBD691047,0x07661AA3,0x00000000
151
        .long  0x3FFD0000,0x9A3EECD4,0xC3EAA6B2,0x00000000
152
        .long  0x3FFE0000,0xBB3EE721,0xA54D880C,0x00000000
153
        .long  0x3FFD0000,0xA0218434,0x353F1DE8,0x00000000
154
        .long  0x3FFE0000,0xB92143FA,0x36F5E02E,0x00000000
155
        .long  0x3FFD0000,0xA5F2FCAB,0xBBC506DA,0x00000000
156
        .long  0x3FFE0000,0xB70FBB5A,0x19BE3659,0x00000000
157
        .long  0x3FFD0000,0xABB3B8BA,0x2AD362A5,0x00000000
158
        .long  0x3FFE0000,0xB509E68A,0x9B94821F,0x00000000
159
        .long  0x3FFD0000,0xB1641795,0xCE3CA97B,0x00000000
160
        .long  0x3FFE0000,0xB30F6352,0x8917C80B,0x00000000
161
        .long  0x3FFD0000,0xB7047551,0x5D0F1C61,0x00000000
162
        .long  0x3FFE0000,0xB11FD3B8,0x0B11FD3C,0x00000000
163
        .long  0x3FFD0000,0xBC952AFE,0xEA3D13E1,0x00000000
164
        .long  0x3FFE0000,0xAF3ADDC6,0x80AF3ADE,0x00000000
165
        .long  0x3FFD0000,0xC2168ED0,0xF458BA4A,0x00000000
166
        .long  0x3FFE0000,0xAD602B58,0x0AD602B6,0x00000000
167
        .long  0x3FFD0000,0xC788F439,0xB3163BF1,0x00000000
168
        .long  0x3FFE0000,0xAB8F69E2,0x8359CD11,0x00000000
169
        .long  0x3FFD0000,0xCCECAC08,0xBF04565D,0x00000000
170
        .long  0x3FFE0000,0xA9C84A47,0xA07F5638,0x00000000
171
        .long  0x3FFD0000,0xD2420487,0x2DD85160,0x00000000
172
        .long  0x3FFE0000,0xA80A80A8,0x0A80A80B,0x00000000
173
        .long  0x3FFD0000,0xD7894992,0x3BC3588A,0x00000000
174
        .long  0x3FFE0000,0xA655C439,0x2D7B73A8,0x00000000
175
        .long  0x3FFD0000,0xDCC2C4B4,0x9887DACC,0x00000000
176
        .long  0x3FFE0000,0xA4A9CF1D,0x96833751,0x00000000
177
        .long  0x3FFD0000,0xE1EEBD3E,0x6D6A6B9E,0x00000000
178
        .long  0x3FFE0000,0xA3065E3F,0xAE7CD0E0,0x00000000
179
        .long  0x3FFD0000,0xE70D785C,0x2F9F5BDC,0x00000000
180
        .long  0x3FFE0000,0xA16B312E,0xA8FC377D,0x00000000
181
        .long  0x3FFD0000,0xEC1F392C,0x5179F283,0x00000000
182
        .long  0x3FFE0000,0x9FD809FD,0x809FD80A,0x00000000
183
        .long  0x3FFD0000,0xF12440D3,0xE36130E6,0x00000000
184
        .long  0x3FFE0000,0x9E4CAD23,0xDD5F3A20,0x00000000
185
        .long  0x3FFD0000,0xF61CCE92,0x346600BB,0x00000000
186
        .long  0x3FFE0000,0x9CC8E160,0xC3FB19B9,0x00000000
187
        .long  0x3FFD0000,0xFB091FD3,0x8145630A,0x00000000
188
        .long  0x3FFE0000,0x9B4C6F9E,0xF03A3CAA,0x00000000
189
        .long  0x3FFD0000,0xFFE97042,0xBFA4C2AD,0x00000000
190
        .long  0x3FFE0000,0x99D722DA,0xBDE58F06,0x00000000
191
        .long  0x3FFE0000,0x825EFCED,0x49369330,0x00000000
192
        .long  0x3FFE0000,0x9868C809,0x868C8098,0x00000000
193
        .long  0x3FFE0000,0x84C37A7A,0xB9A905C9,0x00000000
194
        .long  0x3FFE0000,0x97012E02,0x5C04B809,0x00000000
195
        .long  0x3FFE0000,0x87224C2E,0x8E645FB7,0x00000000
196
        .long  0x3FFE0000,0x95A02568,0x095A0257,0x00000000
197
        .long  0x3FFE0000,0x897B8CAC,0x9F7DE298,0x00000000
198
        .long  0x3FFE0000,0x94458094,0x45809446,0x00000000
199
        .long  0x3FFE0000,0x8BCF55DE,0xC4CD05FE,0x00000000
200
        .long  0x3FFE0000,0x92F11384,0x0497889C,0x00000000
201
        .long  0x3FFE0000,0x8E1DC0FB,0x89E125E5,0x00000000
202
        .long  0x3FFE0000,0x91A2B3C4,0xD5E6F809,0x00000000
203
        .long  0x3FFE0000,0x9066E68C,0x955B6C9B,0x00000000
204
        .long  0x3FFE0000,0x905A3863,0x3E06C43B,0x00000000
205
        .long  0x3FFE0000,0x92AADE74,0xC7BE59E0,0x00000000
206
        .long  0x3FFE0000,0x8F1779D9,0xFDC3A219,0x00000000
207
        .long  0x3FFE0000,0x94E9BFF6,0x15845643,0x00000000
208
        .long  0x3FFE0000,0x8DDA5202,0x37694809,0x00000000
209
        .long  0x3FFE0000,0x9723A1B7,0x20134203,0x00000000
210
        .long  0x3FFE0000,0x8CA29C04,0x6514E023,0x00000000
211
        .long  0x3FFE0000,0x995899C8,0x90EB8990,0x00000000
212
        .long  0x3FFE0000,0x8B70344A,0x139BC75A,0x00000000
213
        .long  0x3FFE0000,0x9B88BDAA,0x3A3DAE2F,0x00000000
214
        .long  0x3FFE0000,0x8A42F870,0x5669DB46,0x00000000
215
        .long  0x3FFE0000,0x9DB4224F,0xFFE1157C,0x00000000
216
        .long  0x3FFE0000,0x891AC73A,0xE9819B50,0x00000000
217
        .long  0x3FFE0000,0x9FDADC26,0x8B7A12DA,0x00000000
218
        .long  0x3FFE0000,0x87F78087,0xF78087F8,0x00000000
219
        .long  0x3FFE0000,0xA1FCFF17,0xCE733BD4,0x00000000
220
        .long  0x3FFE0000,0x86D90544,0x7A34ACC6,0x00000000
221
        .long  0x3FFE0000,0xA41A9E8F,0x5446FB9F,0x00000000
222
        .long  0x3FFE0000,0x85BF3761,0x2CEE3C9B,0x00000000
223
        .long  0x3FFE0000,0xA633CD7E,0x6771CD8B,0x00000000
224
        .long  0x3FFE0000,0x84A9F9C8,0x084A9F9D,0x00000000
225
        .long  0x3FFE0000,0xA8489E60,0x0B435A5E,0x00000000
226
        .long  0x3FFE0000,0x83993052,0x3FBE3368,0x00000000
227
        .long  0x3FFE0000,0xAA59233C,0xCCA4BD49,0x00000000
228
        .long  0x3FFE0000,0x828CBFBE,0xB9A020A3,0x00000000
229
        .long  0x3FFE0000,0xAC656DAE,0x6BCC4985,0x00000000
230
        .long  0x3FFE0000,0x81848DA8,0xFAF0D277,0x00000000
231
        .long  0x3FFE0000,0xAE6D8EE3,0x60BB2468,0x00000000
232
        .long  0x3FFE0000,0x80808080,0x80808081,0x00000000
233
        .long  0x3FFE0000,0xB07197A2,0x3C46C654,0x00000000
234
 
235
        .set    ADJK,L_SCR1
236
 
237
        .set    X,FP_SCR1
238
        .set    XDCARE,X+2
239
        .set    XFRAC,X+4
240
 
241
        .set    F,FP_SCR2
242
        .set    FFRAC,F+4
243
 
244
        .set    KLOG2,FP_SCR3
245
 
246
        .set    SAVEU,FP_SCR4
247
 
248
        | xref  t_frcinx
249
        |xref   t_extdnrm
250
        |xref   t_operr
251
        |xref   t_dz
252
 
253
        .global slognd
254
slognd:
255
|--ENTRY POINT FOR LOG(X) FOR DENORMALIZED INPUT
256
 
257
        movel           #-100,ADJK(%a6) | ...INPUT = 2^(ADJK) * FP0
258
 
259
|----normalize the input value by left shifting k bits (k to be determined
260
|----below), adjusting exponent and storing -k to  ADJK
261
|----the value TWOTO100 is no longer needed.
262
|----Note that this code assumes the denormalized input is NON-ZERO.
263
 
264
     moveml     %d2-%d7,-(%a7)          | ...save some registers
265
     movel      #0x00000000,%d3         | ...D3 is exponent of smallest norm. #
266
     movel      4(%a0),%d4
267
     movel      8(%a0),%d5              | ...(D4,D5) is (Hi_X,Lo_X)
268
     clrl       %d2                     | ...D2 used for holding K
269
 
270
     tstl       %d4
271
     bnes       HiX_not0
272
 
273
HiX_0:
274
     movel      %d5,%d4
275
     clrl       %d5
276
     movel      #32,%d2
277
     clrl       %d6
278
     bfffo      %d4{#0:#32},%d6
279
     lsll      %d6,%d4
280
     addl       %d6,%d2                 | ...(D3,D4,D5) is normalized
281
 
282
     movel      %d3,X(%a6)
283
     movel      %d4,XFRAC(%a6)
284
     movel      %d5,XFRAC+4(%a6)
285
     negl       %d2
286
     movel      %d2,ADJK(%a6)
287
     fmovex     X(%a6),%fp0
288
     moveml     (%a7)+,%d2-%d7          | ...restore registers
289
     lea        X(%a6),%a0
290
     bras       LOGBGN                  | ...begin regular log(X)
291
 
292
 
293
HiX_not0:
294
     clrl       %d6
295
     bfffo      %d4{#0:#32},%d6         | ...find first 1
296
     movel      %d6,%d2                 | ...get k
297
     lsll       %d6,%d4
298
     movel      %d5,%d7                 | ...a copy of D5
299
     lsll       %d6,%d5
300
     negl       %d6
301
     addil      #32,%d6
302
     lsrl       %d6,%d7
303
     orl        %d7,%d4                 | ...(D3,D4,D5) normalized
304
 
305
     movel      %d3,X(%a6)
306
     movel      %d4,XFRAC(%a6)
307
     movel      %d5,XFRAC+4(%a6)
308
     negl       %d2
309
     movel      %d2,ADJK(%a6)
310
     fmovex     X(%a6),%fp0
311
     moveml     (%a7)+,%d2-%d7          | ...restore registers
312
     lea        X(%a6),%a0
313
     bras       LOGBGN                  | ...begin regular log(X)
314
 
315
 
316
        .global slogn
317
slogn:
318
|--ENTRY POINT FOR LOG(X) FOR X FINITE, NON-ZERO, NOT NAN'S
319
 
320
        fmovex          (%a0),%fp0      | ...LOAD INPUT
321
        movel           #0x00000000,ADJK(%a6)
322
 
323
LOGBGN:
324
|--FPCR SAVED AND CLEARED, INPUT IS 2^(ADJK)*FP0, FP0 CONTAINS
325
|--A FINITE, NON-ZERO, NORMALIZED NUMBER.
326
 
327
        movel   (%a0),%d0
328
        movew   4(%a0),%d0
329
 
330
        movel   (%a0),X(%a6)
331
        movel   4(%a0),X+4(%a6)
332
        movel   8(%a0),X+8(%a6)
333
 
334
        cmpil   #0,%d0          | ...CHECK IF X IS NEGATIVE
335
        blt     LOGNEG          | ...LOG OF NEGATIVE ARGUMENT IS INVALID
336
        cmp2l   BOUNDS1,%d0     | ...X IS POSITIVE, CHECK IF X IS NEAR 1
337
        bcc     LOGNEAR1        | ...BOUNDS IS ROUGHLY [15/16, 17/16]
338
 
339
LOGMAIN:
340
|--THIS SHOULD BE THE USUAL CASE, X NOT VERY CLOSE TO 1
341
 
342
|--X = 2^(K) * Y, 1 <= Y < 2. THUS, Y = 1.XXXXXXXX....XX IN BINARY.
343
|--WE DEFINE F = 1.XXXXXX1, I.E. FIRST 7 BITS OF Y AND ATTACH A 1.
344
|--THE IDEA IS THAT LOG(X) = K*LOG2 + LOG(Y)
345
|--                      = K*LOG2 + LOG(F) + LOG(1 + (Y-F)/F).
346
|--NOTE THAT U = (Y-F)/F IS VERY SMALL AND THUS APPROXIMATING
347
|--LOG(1+U) CAN BE VERY EFFICIENT.
348
|--ALSO NOTE THAT THE VALUE 1/F IS STORED IN A TABLE SO THAT NO
349
|--DIVISION IS NEEDED TO CALCULATE (Y-F)/F.
350
 
351
|--GET K, Y, F, AND ADDRESS OF 1/F.
352
        asrl    #8,%d0
353
        asrl    #8,%d0          | ...SHIFTED 16 BITS, BIASED EXPO. OF X
354
        subil   #0x3FFF,%d0     | ...THIS IS K
355
        addl    ADJK(%a6),%d0   | ...ADJUST K, ORIGINAL INPUT MAY BE  DENORM.
356
        lea     LOGTBL,%a0      | ...BASE ADDRESS OF 1/F AND LOG(F)
357
        fmovel  %d0,%fp1                | ...CONVERT K TO FLOATING-POINT FORMAT
358
 
359
|--WHILE THE CONVERSION IS GOING ON, WE GET F AND ADDRESS OF 1/F
360
        movel   #0x3FFF0000,X(%a6)      | ...X IS NOW Y, I.E. 2^(-K)*X
361
        movel   XFRAC(%a6),FFRAC(%a6)
362
        andil   #0xFE000000,FFRAC(%a6) | ...FIRST 7 BITS OF Y
363
        oril    #0x01000000,FFRAC(%a6) | ...GET F: ATTACH A 1 AT THE EIGHTH BIT
364
        movel   FFRAC(%a6),%d0  | ...READY TO GET ADDRESS OF 1/F
365
        andil   #0x7E000000,%d0
366
        asrl    #8,%d0
367
        asrl    #8,%d0
368
        asrl    #4,%d0          | ...SHIFTED 20, D0 IS THE DISPLACEMENT
369
        addal   %d0,%a0         | ...A0 IS THE ADDRESS FOR 1/F
370
 
371
        fmovex  X(%a6),%fp0
372
        movel   #0x3fff0000,F(%a6)
373
        clrl    F+8(%a6)
374
        fsubx   F(%a6),%fp0             | ...Y-F
375
        fmovemx %fp2-%fp2/%fp3,-(%sp)   | ...SAVE FP2 WHILE FP0 IS NOT READY
376
|--SUMMARY: FP0 IS Y-F, A0 IS ADDRESS OF 1/F, FP1 IS K
377
|--REGISTERS SAVED: FPCR, FP1, FP2
378
 
379
LP1CONT1:
380
|--AN RE-ENTRY POINT FOR LOGNP1
381
        fmulx   (%a0),%fp0      | ...FP0 IS U = (Y-F)/F
382
        fmulx   LOGOF2,%fp1     | ...GET K*LOG2 WHILE FP0 IS NOT READY
383
        fmovex  %fp0,%fp2
384
        fmulx   %fp2,%fp2               | ...FP2 IS V=U*U
385
        fmovex  %fp1,KLOG2(%a6) | ...PUT K*LOG2 IN MEMORY, FREE FP1
386
 
387
|--LOG(1+U) IS APPROXIMATED BY
388
|--U + V*(A1+U*(A2+U*(A3+U*(A4+U*(A5+U*A6))))) WHICH IS
389
|--[U + V*(A1+V*(A3+V*A5))]  +  [U*V*(A2+V*(A4+V*A6))]
390
 
391
        fmovex  %fp2,%fp3
392
        fmovex  %fp2,%fp1
393
 
394
        fmuld   LOGA6,%fp1      | ...V*A6
395
        fmuld   LOGA5,%fp2      | ...V*A5
396
 
397
        faddd   LOGA4,%fp1      | ...A4+V*A6
398
        faddd   LOGA3,%fp2      | ...A3+V*A5
399
 
400
        fmulx   %fp3,%fp1               | ...V*(A4+V*A6)
401
        fmulx   %fp3,%fp2               | ...V*(A3+V*A5)
402
 
403
        faddd   LOGA2,%fp1      | ...A2+V*(A4+V*A6)
404
        faddd   LOGA1,%fp2      | ...A1+V*(A3+V*A5)
405
 
406
        fmulx   %fp3,%fp1               | ...V*(A2+V*(A4+V*A6))
407
        addal   #16,%a0         | ...ADDRESS OF LOG(F)
408
        fmulx   %fp3,%fp2               | ...V*(A1+V*(A3+V*A5)), FP3 RELEASED
409
 
410
        fmulx   %fp0,%fp1               | ...U*V*(A2+V*(A4+V*A6))
411
        faddx   %fp2,%fp0               | ...U+V*(A1+V*(A3+V*A5)), FP2 RELEASED
412
 
413
        faddx   (%a0),%fp1      | ...LOG(F)+U*V*(A2+V*(A4+V*A6))
414
        fmovemx  (%sp)+,%fp2-%fp2/%fp3  | ...RESTORE FP2
415
        faddx   %fp1,%fp0               | ...FP0 IS LOG(F) + LOG(1+U)
416
 
417
        fmovel  %d1,%fpcr
418
        faddx   KLOG2(%a6),%fp0 | ...FINAL ADD
419
        bra     t_frcinx
420
 
421
 
422
LOGNEAR1:
423
|--REGISTERS SAVED: FPCR, FP1. FP0 CONTAINS THE INPUT.
424
        fmovex  %fp0,%fp1
425
        fsubs   one,%fp1                | ...FP1 IS X-1
426
        fadds   one,%fp0                | ...FP0 IS X+1
427
        faddx   %fp1,%fp1               | ...FP1 IS 2(X-1)
428
|--LOG(X) = LOG(1+U/2)-LOG(1-U/2) WHICH IS AN ODD POLYNOMIAL
429
|--IN U, U = 2(X-1)/(X+1) = FP1/FP0
430
 
431
LP1CONT2:
432
|--THIS IS AN RE-ENTRY POINT FOR LOGNP1
433
        fdivx   %fp0,%fp1               | ...FP1 IS U
434
        fmovemx %fp2-%fp2/%fp3,-(%sp)    | ...SAVE FP2
435
|--REGISTERS SAVED ARE NOW FPCR,FP1,FP2,FP3
436
|--LET V=U*U, W=V*V, CALCULATE
437
|--U + U*V*(B1 + V*(B2 + V*(B3 + V*(B4 + V*B5)))) BY
438
|--U + U*V*(  [B1 + W*(B3 + W*B5)]  +  [V*(B2 + W*B4)]  )
439
        fmovex  %fp1,%fp0
440
        fmulx   %fp0,%fp0       | ...FP0 IS V
441
        fmovex  %fp1,SAVEU(%a6) | ...STORE U IN MEMORY, FREE FP1
442
        fmovex  %fp0,%fp1
443
        fmulx   %fp1,%fp1       | ...FP1 IS W
444
 
445
        fmoved  LOGB5,%fp3
446
        fmoved  LOGB4,%fp2
447
 
448
        fmulx   %fp1,%fp3       | ...W*B5
449
        fmulx   %fp1,%fp2       | ...W*B4
450
 
451
        faddd   LOGB3,%fp3 | ...B3+W*B5
452
        faddd   LOGB2,%fp2 | ...B2+W*B4
453
 
454
        fmulx   %fp3,%fp1       | ...W*(B3+W*B5), FP3 RELEASED
455
 
456
        fmulx   %fp0,%fp2       | ...V*(B2+W*B4)
457
 
458
        faddd   LOGB1,%fp1 | ...B1+W*(B3+W*B5)
459
        fmulx   SAVEU(%a6),%fp0 | ...FP0 IS U*V
460
 
461
        faddx   %fp2,%fp1       | ...B1+W*(B3+W*B5) + V*(B2+W*B4), FP2 RELEASED
462
        fmovemx (%sp)+,%fp2-%fp2/%fp3 | ...FP2 RESTORED
463
 
464
        fmulx   %fp1,%fp0       | ...U*V*( [B1+W*(B3+W*B5)] + [V*(B2+W*B4)] )
465
 
466
        fmovel  %d1,%fpcr
467
        faddx   SAVEU(%a6),%fp0
468
        bra     t_frcinx
469
        rts
470
 
471
LOGNEG:
472
|--REGISTERS SAVED FPCR. LOG(-VE) IS INVALID
473
        bra     t_operr
474
 
475
        .global slognp1d
476
slognp1d:
477
|--ENTRY POINT FOR LOG(1+Z) FOR DENORMALIZED INPUT
478
| Simply return the denorm
479
 
480
        bra     t_extdnrm
481
 
482
        .global slognp1
483
slognp1:
484
|--ENTRY POINT FOR LOG(1+X) FOR X FINITE, NON-ZERO, NOT NAN'S
485
 
486
        fmovex  (%a0),%fp0      | ...LOAD INPUT
487
        fabsx   %fp0            |test magnitude
488
        fcmpx   LTHOLD,%fp0     |compare with min threshold
489
        fbgt    LP1REAL         |if greater, continue
490
        fmovel  #0,%fpsr                |clr N flag from compare
491
        fmovel  %d1,%fpcr
492
        fmovex  (%a0),%fp0      |return signed argument
493
        bra     t_frcinx
494
 
495
LP1REAL:
496
        fmovex  (%a0),%fp0      | ...LOAD INPUT
497
        movel   #0x00000000,ADJK(%a6)
498
        fmovex  %fp0,%fp1       | ...FP1 IS INPUT Z
499
        fadds   one,%fp0        | ...X := ROUND(1+Z)
500
        fmovex  %fp0,X(%a6)
501
        movew   XFRAC(%a6),XDCARE(%a6)
502
        movel   X(%a6),%d0
503
        cmpil   #0,%d0
504
        ble     LP1NEG0 | ...LOG OF ZERO OR -VE
505
        cmp2l   BOUNDS2,%d0
506
        bcs     LOGMAIN | ...BOUNDS2 IS [1/2,3/2]
507
|--IF 1+Z > 3/2 OR 1+Z < 1/2, THEN X, WHICH IS ROUNDING 1+Z,
508
|--CONTAINS AT LEAST 63 BITS OF INFORMATION OF Z. IN THAT CASE,
509
|--SIMPLY INVOKE LOG(X) FOR LOG(1+Z).
510
 
511
LP1NEAR1:
512
|--NEXT SEE IF EXP(-1/16) < X < EXP(1/16)
513
        cmp2l   BOUNDS1,%d0
514
        bcss    LP1CARE
515
 
516
LP1ONE16:
517
|--EXP(-1/16) < X < EXP(1/16). LOG(1+Z) = LOG(1+U/2) - LOG(1-U/2)
518
|--WHERE U = 2Z/(2+Z) = 2Z/(1+X).
519
        faddx   %fp1,%fp1       | ...FP1 IS 2Z
520
        fadds   one,%fp0        | ...FP0 IS 1+X
521
|--U = FP1/FP0
522
        bra     LP1CONT2
523
 
524
LP1CARE:
525
|--HERE WE USE THE USUAL TABLE DRIVEN APPROACH. CARE HAS TO BE
526
|--TAKEN BECAUSE 1+Z CAN HAVE 67 BITS OF INFORMATION AND WE MUST
527
|--PRESERVE ALL THE INFORMATION. BECAUSE 1+Z IS IN [1/2,3/2],
528
|--THERE ARE ONLY TWO CASES.
529
|--CASE 1: 1+Z < 1, THEN K = -1 AND Y-F = (2-F) + 2Z
530
|--CASE 2: 1+Z > 1, THEN K = 0  AND Y-F = (1-F) + Z
531
|--ON RETURNING TO LP1CONT1, WE MUST HAVE K IN FP1, ADDRESS OF
532
|--(1/F) IN A0, Y-F IN FP0, AND FP2 SAVED.
533
 
534
        movel   XFRAC(%a6),FFRAC(%a6)
535
        andil   #0xFE000000,FFRAC(%a6)
536
        oril    #0x01000000,FFRAC(%a6)  | ...F OBTAINED
537
        cmpil   #0x3FFF8000,%d0 | ...SEE IF 1+Z > 1
538
        bges    KISZERO
539
 
540
KISNEG1:
541
        fmoves  TWO,%fp0
542
        movel   #0x3fff0000,F(%a6)
543
        clrl    F+8(%a6)
544
        fsubx   F(%a6),%fp0     | ...2-F
545
        movel   FFRAC(%a6),%d0
546
        andil   #0x7E000000,%d0
547
        asrl    #8,%d0
548
        asrl    #8,%d0
549
        asrl    #4,%d0          | ...D0 CONTAINS DISPLACEMENT FOR 1/F
550
        faddx   %fp1,%fp1               | ...GET 2Z
551
        fmovemx %fp2-%fp2/%fp3,-(%sp)   | ...SAVE FP2
552
        faddx   %fp1,%fp0               | ...FP0 IS Y-F = (2-F)+2Z
553
        lea     LOGTBL,%a0      | ...A0 IS ADDRESS OF 1/F
554
        addal   %d0,%a0
555
        fmoves  negone,%fp1     | ...FP1 IS K = -1
556
        bra     LP1CONT1
557
 
558
KISZERO:
559
        fmoves  one,%fp0
560
        movel   #0x3fff0000,F(%a6)
561
        clrl    F+8(%a6)
562
        fsubx   F(%a6),%fp0             | ...1-F
563
        movel   FFRAC(%a6),%d0
564
        andil   #0x7E000000,%d0
565
        asrl    #8,%d0
566
        asrl    #8,%d0
567
        asrl    #4,%d0
568
        faddx   %fp1,%fp0               | ...FP0 IS Y-F
569
        fmovemx %fp2-%fp2/%fp3,-(%sp)   | ...FP2 SAVED
570
        lea     LOGTBL,%a0
571
        addal   %d0,%a0         | ...A0 IS ADDRESS OF 1/F
572
        fmoves  zero,%fp1       | ...FP1 IS K = 0
573
        bra     LP1CONT1
574
 
575
LP1NEG0:
576
|--FPCR SAVED. D0 IS X IN COMPACT FORM.
577
        cmpil   #0,%d0
578
        blts    LP1NEG
579
LP1ZERO:
580
        fmoves  negone,%fp0
581
 
582
        fmovel  %d1,%fpcr
583
        bra t_dz
584
 
585
LP1NEG:
586
        fmoves  zero,%fp0
587
 
588
        fmovel  %d1,%fpcr
589
        bra     t_operr
590
 
591
        |end

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.