OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [rtos/] [rtems/] [c/] [src/] [lib/] [libcpu/] [m68k/] [m68040/] [fpsp/] [slogn.S] - Blame information for rev 594

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 30 unneback
//
2
//      $Id: slogn.S,v 1.2 2001-09-27 12:01:22 chris Exp $
3
//
4
//      slogn.sa 3.1 12/10/90
5
//
6
//      slogn computes the natural logarithm of an
7
//      input value. slognd does the same except the input value is a
8
//      denormalized number. slognp1 computes log(1+X), and slognp1d
9
//      computes log(1+X) for denormalized X.
10
//
11
//      Input: Double-extended value in memory location pointed to by address
12
//              register a0.
13
//
14
//      Output: log(X) or log(1+X) returned in floating-point register Fp0.
15
//
16
//      Accuracy and Monotonicity: The returned result is within 2 ulps in
17
//              64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
18
//              result is subsequently rounded to double precision. The
19
//              result is provably monotonic in double precision.
20
//
21
//      Speed: The program slogn takes approximately 190 cycles for input
22
//              argument X such that |X-1| >= 1/16, which is the the usual
23
//              situation. For those arguments, slognp1 takes approximately
24
//               210 cycles. For the less common arguments, the program will
25
//               run no worse than 10% slower.
26
//
27
//      Algorithm:
28
//      LOGN:
29
//      Step 1. If |X-1| < 1/16, approximate log(X) by an odd polynomial in
30
//              u, where u = 2(X-1)/(X+1). Otherwise, move on to Step 2.
31
//
32
//      Step 2. X = 2**k * Y where 1 <= Y < 2. Define F to be the first seven
33
//              significant bits of Y plus 2**(-7), i.e. F = 1.xxxxxx1 in base
34
//              2 where the six "x" match those of Y. Note that |Y-F| <= 2**(-7).
35
//
36
//      Step 3. Define u = (Y-F)/F. Approximate log(1+u) by a polynomial in u,
37
//              log(1+u) = poly.
38
//
39
//      Step 4. Reconstruct log(X) = log( 2**k * Y ) = k*log(2) + log(F) + log(1+u)
40
//              by k*log(2) + (log(F) + poly). The values of log(F) are calculated
41
//              beforehand and stored in the program.
42
//
43
//      lognp1:
44
//      Step 1: If |X| < 1/16, approximate log(1+X) by an odd polynomial in
45
//              u where u = 2X/(2+X). Otherwise, move on to Step 2.
46
//
47
//      Step 2: Let 1+X = 2**k * Y, where 1 <= Y < 2. Define F as done in Step 2
48
//              of the algorithm for LOGN and compute log(1+X) as
49
//              k*log(2) + log(F) + poly where poly approximates log(1+u),
50
//              u = (Y-F)/F.
51
//
52
//      Implementation Notes:
53
//      Note 1. There are 64 different possible values for F, thus 64 log(F)'s
54
//              need to be tabulated. Moreover, the values of 1/F are also
55
//              tabulated so that the division in (Y-F)/F can be performed by a
56
//              multiplication.
57
//
58
//      Note 2. In Step 2 of lognp1, in order to preserved accuracy, the value
59
//              Y-F has to be calculated carefully when 1/2 <= X < 3/2.
60
//
61
//      Note 3. To fully exploit the pipeline, polynomials are usually separated
62
//              into two parts evaluated independently before being added up.
63
//
64
 
65
//              Copyright (C) Motorola, Inc. 1990
66
//                      All Rights Reserved
67
//
68
//      THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
69
//      The copyright notice above does not evidence any
70
//      actual or intended publication of such source code.
71
 
72
//slogn idnt    2,1 | Motorola 040 Floating Point Software Package
73
 
74
        |section        8
75
 
76
#include "fpsp.defs"
77
 
78
BOUNDS1:  .long 0x3FFEF07D,0x3FFF8841
79
BOUNDS2:  .long 0x3FFE8000,0x3FFFC000
80
 
81
LOGOF2: .long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
82
 
83
one:    .long 0x3F800000
84
zero:   .long 0x00000000
85
infty:  .long 0x7F800000
86
negone: .long 0xBF800000
87
 
88
LOGA6:  .long 0x3FC2499A,0xB5E4040B
89
LOGA5:  .long 0xBFC555B5,0x848CB7DB
90
 
91
LOGA4:  .long 0x3FC99999,0x987D8730
92
LOGA3:  .long 0xBFCFFFFF,0xFF6F7E97
93
 
94
LOGA2:  .long 0x3FD55555,0x555555a4
95
LOGA1:  .long 0xBFE00000,0x00000008
96
 
97
LOGB5:  .long 0x3F175496,0xADD7DAD6
98
LOGB4:  .long 0x3F3C71C2,0xFE80C7E0
99
 
100
LOGB3:  .long 0x3F624924,0x928BCCFF
101
LOGB2:  .long 0x3F899999,0x999995EC
102
 
103
LOGB1:  .long 0x3FB55555,0x55555555
104
TWO:    .long 0x40000000,0x00000000
105
 
106
LTHOLD: .long 0x3f990000,0x80000000,0x00000000,0x00000000
107
 
108
LOGTBL:
109
        .long  0x3FFE0000,0xFE03F80F,0xE03F80FE,0x00000000
110
        .long  0x3FF70000,0xFF015358,0x833C47E2,0x00000000
111
        .long  0x3FFE0000,0xFA232CF2,0x52138AC0,0x00000000
112
        .long  0x3FF90000,0xBDC8D83E,0xAD88D549,0x00000000
113
        .long  0x3FFE0000,0xF6603D98,0x0F6603DA,0x00000000
114
        .long  0x3FFA0000,0x9CF43DCF,0xF5EAFD48,0x00000000
115
        .long  0x3FFE0000,0xF2B9D648,0x0F2B9D65,0x00000000
116
        .long  0x3FFA0000,0xDA16EB88,0xCB8DF614,0x00000000
117
        .long  0x3FFE0000,0xEF2EB71F,0xC4345238,0x00000000
118
        .long  0x3FFB0000,0x8B29B775,0x1BD70743,0x00000000
119
        .long  0x3FFE0000,0xEBBDB2A5,0xC1619C8C,0x00000000
120
        .long  0x3FFB0000,0xA8D839F8,0x30C1FB49,0x00000000
121
        .long  0x3FFE0000,0xE865AC7B,0x7603A197,0x00000000
122
        .long  0x3FFB0000,0xC61A2EB1,0x8CD907AD,0x00000000
123
        .long  0x3FFE0000,0xE525982A,0xF70C880E,0x00000000
124
        .long  0x3FFB0000,0xE2F2A47A,0xDE3A18AF,0x00000000
125
        .long  0x3FFE0000,0xE1FC780E,0x1FC780E2,0x00000000
126
        .long  0x3FFB0000,0xFF64898E,0xDF55D551,0x00000000
127
        .long  0x3FFE0000,0xDEE95C4C,0xA037BA57,0x00000000
128
        .long  0x3FFC0000,0x8DB956A9,0x7B3D0148,0x00000000
129
        .long  0x3FFE0000,0xDBEB61EE,0xD19C5958,0x00000000
130
        .long  0x3FFC0000,0x9B8FE100,0xF47BA1DE,0x00000000
131
        .long  0x3FFE0000,0xD901B203,0x6406C80E,0x00000000
132
        .long  0x3FFC0000,0xA9372F1D,0x0DA1BD17,0x00000000
133
        .long  0x3FFE0000,0xD62B80D6,0x2B80D62C,0x00000000
134
        .long  0x3FFC0000,0xB6B07F38,0xCE90E46B,0x00000000
135
        .long  0x3FFE0000,0xD3680D36,0x80D3680D,0x00000000
136
        .long  0x3FFC0000,0xC3FD0329,0x06488481,0x00000000
137
        .long  0x3FFE0000,0xD0B69FCB,0xD2580D0B,0x00000000
138
        .long  0x3FFC0000,0xD11DE0FF,0x15AB18CA,0x00000000
139
        .long  0x3FFE0000,0xCE168A77,0x25080CE1,0x00000000
140
        .long  0x3FFC0000,0xDE1433A1,0x6C66B150,0x00000000
141
        .long  0x3FFE0000,0xCB8727C0,0x65C393E0,0x00000000
142
        .long  0x3FFC0000,0xEAE10B5A,0x7DDC8ADD,0x00000000
143
        .long  0x3FFE0000,0xC907DA4E,0x871146AD,0x00000000
144
        .long  0x3FFC0000,0xF7856E5E,0xE2C9B291,0x00000000
145
        .long  0x3FFE0000,0xC6980C69,0x80C6980C,0x00000000
146
        .long  0x3FFD0000,0x82012CA5,0xA68206D7,0x00000000
147
        .long  0x3FFE0000,0xC4372F85,0x5D824CA6,0x00000000
148
        .long  0x3FFD0000,0x882C5FCD,0x7256A8C5,0x00000000
149
        .long  0x3FFE0000,0xC1E4BBD5,0x95F6E947,0x00000000
150
        .long  0x3FFD0000,0x8E44C60B,0x4CCFD7DE,0x00000000
151
        .long  0x3FFE0000,0xBFA02FE8,0x0BFA02FF,0x00000000
152
        .long  0x3FFD0000,0x944AD09E,0xF4351AF6,0x00000000
153
        .long  0x3FFE0000,0xBD691047,0x07661AA3,0x00000000
154
        .long  0x3FFD0000,0x9A3EECD4,0xC3EAA6B2,0x00000000
155
        .long  0x3FFE0000,0xBB3EE721,0xA54D880C,0x00000000
156
        .long  0x3FFD0000,0xA0218434,0x353F1DE8,0x00000000
157
        .long  0x3FFE0000,0xB92143FA,0x36F5E02E,0x00000000
158
        .long  0x3FFD0000,0xA5F2FCAB,0xBBC506DA,0x00000000
159
        .long  0x3FFE0000,0xB70FBB5A,0x19BE3659,0x00000000
160
        .long  0x3FFD0000,0xABB3B8BA,0x2AD362A5,0x00000000
161
        .long  0x3FFE0000,0xB509E68A,0x9B94821F,0x00000000
162
        .long  0x3FFD0000,0xB1641795,0xCE3CA97B,0x00000000
163
        .long  0x3FFE0000,0xB30F6352,0x8917C80B,0x00000000
164
        .long  0x3FFD0000,0xB7047551,0x5D0F1C61,0x00000000
165
        .long  0x3FFE0000,0xB11FD3B8,0x0B11FD3C,0x00000000
166
        .long  0x3FFD0000,0xBC952AFE,0xEA3D13E1,0x00000000
167
        .long  0x3FFE0000,0xAF3ADDC6,0x80AF3ADE,0x00000000
168
        .long  0x3FFD0000,0xC2168ED0,0xF458BA4A,0x00000000
169
        .long  0x3FFE0000,0xAD602B58,0x0AD602B6,0x00000000
170
        .long  0x3FFD0000,0xC788F439,0xB3163BF1,0x00000000
171
        .long  0x3FFE0000,0xAB8F69E2,0x8359CD11,0x00000000
172
        .long  0x3FFD0000,0xCCECAC08,0xBF04565D,0x00000000
173
        .long  0x3FFE0000,0xA9C84A47,0xA07F5638,0x00000000
174
        .long  0x3FFD0000,0xD2420487,0x2DD85160,0x00000000
175
        .long  0x3FFE0000,0xA80A80A8,0x0A80A80B,0x00000000
176
        .long  0x3FFD0000,0xD7894992,0x3BC3588A,0x00000000
177
        .long  0x3FFE0000,0xA655C439,0x2D7B73A8,0x00000000
178
        .long  0x3FFD0000,0xDCC2C4B4,0x9887DACC,0x00000000
179
        .long  0x3FFE0000,0xA4A9CF1D,0x96833751,0x00000000
180
        .long  0x3FFD0000,0xE1EEBD3E,0x6D6A6B9E,0x00000000
181
        .long  0x3FFE0000,0xA3065E3F,0xAE7CD0E0,0x00000000
182
        .long  0x3FFD0000,0xE70D785C,0x2F9F5BDC,0x00000000
183
        .long  0x3FFE0000,0xA16B312E,0xA8FC377D,0x00000000
184
        .long  0x3FFD0000,0xEC1F392C,0x5179F283,0x00000000
185
        .long  0x3FFE0000,0x9FD809FD,0x809FD80A,0x00000000
186
        .long  0x3FFD0000,0xF12440D3,0xE36130E6,0x00000000
187
        .long  0x3FFE0000,0x9E4CAD23,0xDD5F3A20,0x00000000
188
        .long  0x3FFD0000,0xF61CCE92,0x346600BB,0x00000000
189
        .long  0x3FFE0000,0x9CC8E160,0xC3FB19B9,0x00000000
190
        .long  0x3FFD0000,0xFB091FD3,0x8145630A,0x00000000
191
        .long  0x3FFE0000,0x9B4C6F9E,0xF03A3CAA,0x00000000
192
        .long  0x3FFD0000,0xFFE97042,0xBFA4C2AD,0x00000000
193
        .long  0x3FFE0000,0x99D722DA,0xBDE58F06,0x00000000
194
        .long  0x3FFE0000,0x825EFCED,0x49369330,0x00000000
195
        .long  0x3FFE0000,0x9868C809,0x868C8098,0x00000000
196
        .long  0x3FFE0000,0x84C37A7A,0xB9A905C9,0x00000000
197
        .long  0x3FFE0000,0x97012E02,0x5C04B809,0x00000000
198
        .long  0x3FFE0000,0x87224C2E,0x8E645FB7,0x00000000
199
        .long  0x3FFE0000,0x95A02568,0x095A0257,0x00000000
200
        .long  0x3FFE0000,0x897B8CAC,0x9F7DE298,0x00000000
201
        .long  0x3FFE0000,0x94458094,0x45809446,0x00000000
202
        .long  0x3FFE0000,0x8BCF55DE,0xC4CD05FE,0x00000000
203
        .long  0x3FFE0000,0x92F11384,0x0497889C,0x00000000
204
        .long  0x3FFE0000,0x8E1DC0FB,0x89E125E5,0x00000000
205
        .long  0x3FFE0000,0x91A2B3C4,0xD5E6F809,0x00000000
206
        .long  0x3FFE0000,0x9066E68C,0x955B6C9B,0x00000000
207
        .long  0x3FFE0000,0x905A3863,0x3E06C43B,0x00000000
208
        .long  0x3FFE0000,0x92AADE74,0xC7BE59E0,0x00000000
209
        .long  0x3FFE0000,0x8F1779D9,0xFDC3A219,0x00000000
210
        .long  0x3FFE0000,0x94E9BFF6,0x15845643,0x00000000
211
        .long  0x3FFE0000,0x8DDA5202,0x37694809,0x00000000
212
        .long  0x3FFE0000,0x9723A1B7,0x20134203,0x00000000
213
        .long  0x3FFE0000,0x8CA29C04,0x6514E023,0x00000000
214
        .long  0x3FFE0000,0x995899C8,0x90EB8990,0x00000000
215
        .long  0x3FFE0000,0x8B70344A,0x139BC75A,0x00000000
216
        .long  0x3FFE0000,0x9B88BDAA,0x3A3DAE2F,0x00000000
217
        .long  0x3FFE0000,0x8A42F870,0x5669DB46,0x00000000
218
        .long  0x3FFE0000,0x9DB4224F,0xFFE1157C,0x00000000
219
        .long  0x3FFE0000,0x891AC73A,0xE9819B50,0x00000000
220
        .long  0x3FFE0000,0x9FDADC26,0x8B7A12DA,0x00000000
221
        .long  0x3FFE0000,0x87F78087,0xF78087F8,0x00000000
222
        .long  0x3FFE0000,0xA1FCFF17,0xCE733BD4,0x00000000
223
        .long  0x3FFE0000,0x86D90544,0x7A34ACC6,0x00000000
224
        .long  0x3FFE0000,0xA41A9E8F,0x5446FB9F,0x00000000
225
        .long  0x3FFE0000,0x85BF3761,0x2CEE3C9B,0x00000000
226
        .long  0x3FFE0000,0xA633CD7E,0x6771CD8B,0x00000000
227
        .long  0x3FFE0000,0x84A9F9C8,0x084A9F9D,0x00000000
228
        .long  0x3FFE0000,0xA8489E60,0x0B435A5E,0x00000000
229
        .long  0x3FFE0000,0x83993052,0x3FBE3368,0x00000000
230
        .long  0x3FFE0000,0xAA59233C,0xCCA4BD49,0x00000000
231
        .long  0x3FFE0000,0x828CBFBE,0xB9A020A3,0x00000000
232
        .long  0x3FFE0000,0xAC656DAE,0x6BCC4985,0x00000000
233
        .long  0x3FFE0000,0x81848DA8,0xFAF0D277,0x00000000
234
        .long  0x3FFE0000,0xAE6D8EE3,0x60BB2468,0x00000000
235
        .long  0x3FFE0000,0x80808080,0x80808081,0x00000000
236
        .long  0x3FFE0000,0xB07197A2,0x3C46C654,0x00000000
237
 
238
        .set    ADJK,L_SCR1
239
 
240
        .set    X,FP_SCR1
241
        .set    XDCARE,X+2
242
        .set    XFRAC,X+4
243
 
244
        .set    F,FP_SCR2
245
        .set    FFRAC,F+4
246
 
247
        .set    KLOG2,FP_SCR3
248
 
249
        .set    SAVEU,FP_SCR4
250
 
251
        | xref  t_frcinx
252
        |xref   t_extdnrm
253
        |xref   t_operr
254
        |xref   t_dz
255
 
256
        .global slognd
257
slognd:
258
//--ENTRY POINT FOR LOG(X) FOR DENORMALIZED INPUT
259
 
260
        movel           #-100,ADJK(%a6) // ...INPUT = 2^(ADJK) * FP0
261
 
262
//----normalize the input value by left shifting k bits (k to be determined
263
//----below), adjusting exponent and storing -k to  ADJK
264
//----the value TWOTO100 is no longer needed.
265
//----Note that this code assumes the denormalized input is NON-ZERO.
266
 
267
     moveml     %d2-%d7,-(%a7)          // ...save some registers
268
     movel      #0x00000000,%d3         // ...D3 is exponent of smallest norm. #
269
     movel      4(%a0),%d4
270
     movel      8(%a0),%d5              // ...(D4,D5) is (Hi_X,Lo_X)
271
     clrl       %d2                     // ...D2 used for holding K
272
 
273
     tstl       %d4
274
     bnes       HiX_not0
275
 
276
HiX_0:
277
     movel      %d5,%d4
278
     clrl       %d5
279
     movel      #32,%d2
280
     clrl       %d6
281
     bfffo      %d4{#0:#32},%d6
282
     lsll      %d6,%d4
283
     addl       %d6,%d2                 // ...(D3,D4,D5) is normalized
284
 
285
     movel      %d3,X(%a6)
286
     movel      %d4,XFRAC(%a6)
287
     movel      %d5,XFRAC+4(%a6)
288
     negl       %d2
289
     movel      %d2,ADJK(%a6)
290
     fmovex     X(%a6),%fp0
291
     moveml     (%a7)+,%d2-%d7          // ...restore registers
292
     lea        X(%a6),%a0
293
     bras       LOGBGN                  // ...begin regular log(X)
294
 
295
 
296
HiX_not0:
297
     clrl       %d6
298
     bfffo      %d4{#0:#32},%d6         // ...find first 1
299
     movel      %d6,%d2                 // ...get k
300
     lsll       %d6,%d4
301
     movel      %d5,%d7                 // ...a copy of D5
302
     lsll       %d6,%d5
303
     negl       %d6
304
     addil      #32,%d6
305
     lsrl       %d6,%d7
306
     orl        %d7,%d4                 // ...(D3,D4,D5) normalized
307
 
308
     movel      %d3,X(%a6)
309
     movel      %d4,XFRAC(%a6)
310
     movel      %d5,XFRAC+4(%a6)
311
     negl       %d2
312
     movel      %d2,ADJK(%a6)
313
     fmovex     X(%a6),%fp0
314
     moveml     (%a7)+,%d2-%d7          // ...restore registers
315
     lea        X(%a6),%a0
316
     bras       LOGBGN                  // ...begin regular log(X)
317
 
318
 
319
        .global slogn
320
slogn:
321
//--ENTRY POINT FOR LOG(X) FOR X FINITE, NON-ZERO, NOT NAN'S
322
 
323
        fmovex          (%a0),%fp0      // ...LOAD INPUT
324
        movel           #0x00000000,ADJK(%a6)
325
 
326
LOGBGN:
327
//--FPCR SAVED AND CLEARED, INPUT IS 2^(ADJK)*FP0, FP0 CONTAINS
328
//--A FINITE, NON-ZERO, NORMALIZED NUMBER.
329
 
330
        movel   (%a0),%d0
331
        movew   4(%a0),%d0
332
 
333
        movel   (%a0),X(%a6)
334
        movel   4(%a0),X+4(%a6)
335
        movel   8(%a0),X+8(%a6)
336
 
337
        cmpil   #0,%d0          // ...CHECK IF X IS NEGATIVE
338
        blt     LOGNEG          // ...LOG OF NEGATIVE ARGUMENT IS INVALID
339
        cmp2l   BOUNDS1,%d0     // ...X IS POSITIVE, CHECK IF X IS NEAR 1
340
        bcc     LOGNEAR1        // ...BOUNDS IS ROUGHLY [15/16, 17/16]
341
 
342
LOGMAIN:
343
//--THIS SHOULD BE THE USUAL CASE, X NOT VERY CLOSE TO 1
344
 
345
//--X = 2^(K) * Y, 1 <= Y < 2. THUS, Y = 1.XXXXXXXX....XX IN BINARY.
346
//--WE DEFINE F = 1.XXXXXX1, I.E. FIRST 7 BITS OF Y AND ATTACH A 1.
347
//--THE IDEA IS THAT LOG(X) = K*LOG2 + LOG(Y)
348
//--                     = K*LOG2 + LOG(F) + LOG(1 + (Y-F)/F).
349
//--NOTE THAT U = (Y-F)/F IS VERY SMALL AND THUS APPROXIMATING
350
//--LOG(1+U) CAN BE VERY EFFICIENT.
351
//--ALSO NOTE THAT THE VALUE 1/F IS STORED IN A TABLE SO THAT NO
352
//--DIVISION IS NEEDED TO CALCULATE (Y-F)/F.
353
 
354
//--GET K, Y, F, AND ADDRESS OF 1/F.
355
        asrl    #8,%d0
356
        asrl    #8,%d0          // ...SHIFTED 16 BITS, BIASED EXPO. OF X
357
        subil   #0x3FFF,%d0     // ...THIS IS K
358
        addl    ADJK(%a6),%d0   // ...ADJUST K, ORIGINAL INPUT MAY BE  DENORM.
359
        lea     LOGTBL,%a0      // ...BASE ADDRESS OF 1/F AND LOG(F)
360
        fmovel  %d0,%fp1                // ...CONVERT K TO FLOATING-POINT FORMAT
361
 
362
//--WHILE THE CONVERSION IS GOING ON, WE GET F AND ADDRESS OF 1/F
363
        movel   #0x3FFF0000,X(%a6)      // ...X IS NOW Y, I.E. 2^(-K)*X
364
        movel   XFRAC(%a6),FFRAC(%a6)
365
        andil   #0xFE000000,FFRAC(%a6) // ...FIRST 7 BITS OF Y
366
        oril    #0x01000000,FFRAC(%a6) // ...GET F: ATTACH A 1 AT THE EIGHTH BIT
367
        movel   FFRAC(%a6),%d0  // ...READY TO GET ADDRESS OF 1/F
368
        andil   #0x7E000000,%d0
369
        asrl    #8,%d0
370
        asrl    #8,%d0
371
        asrl    #4,%d0          // ...SHIFTED 20, D0 IS THE DISPLACEMENT
372
        addal   %d0,%a0         // ...A0 IS THE ADDRESS FOR 1/F
373
 
374
        fmovex  X(%a6),%fp0
375
        movel   #0x3fff0000,F(%a6)
376
        clrl    F+8(%a6)
377
        fsubx   F(%a6),%fp0             // ...Y-F
378
        fmovemx %fp2-%fp2/%fp3,-(%sp)   // ...SAVE FP2 WHILE FP0 IS NOT READY
379
//--SUMMARY: FP0 IS Y-F, A0 IS ADDRESS OF 1/F, FP1 IS K
380
//--REGISTERS SAVED: FPCR, FP1, FP2
381
 
382
LP1CONT1:
383
//--AN RE-ENTRY POINT FOR LOGNP1
384
        fmulx   (%a0),%fp0      // ...FP0 IS U = (Y-F)/F
385
        fmulx   LOGOF2,%fp1     // ...GET K*LOG2 WHILE FP0 IS NOT READY
386
        fmovex  %fp0,%fp2
387
        fmulx   %fp2,%fp2               // ...FP2 IS V=U*U
388
        fmovex  %fp1,KLOG2(%a6) // ...PUT K*LOG2 IN MEMORY, FREE FP1
389
 
390
//--LOG(1+U) IS APPROXIMATED BY
391
//--U + V*(A1+U*(A2+U*(A3+U*(A4+U*(A5+U*A6))))) WHICH IS
392
//--[U + V*(A1+V*(A3+V*A5))]  +  [U*V*(A2+V*(A4+V*A6))]
393
 
394
        fmovex  %fp2,%fp3
395
        fmovex  %fp2,%fp1
396
 
397
        fmuld   LOGA6,%fp1      // ...V*A6
398
        fmuld   LOGA5,%fp2      // ...V*A5
399
 
400
        faddd   LOGA4,%fp1      // ...A4+V*A6
401
        faddd   LOGA3,%fp2      // ...A3+V*A5
402
 
403
        fmulx   %fp3,%fp1               // ...V*(A4+V*A6)
404
        fmulx   %fp3,%fp2               // ...V*(A3+V*A5)
405
 
406
        faddd   LOGA2,%fp1      // ...A2+V*(A4+V*A6)
407
        faddd   LOGA1,%fp2      // ...A1+V*(A3+V*A5)
408
 
409
        fmulx   %fp3,%fp1               // ...V*(A2+V*(A4+V*A6))
410
        addal   #16,%a0         // ...ADDRESS OF LOG(F)
411
        fmulx   %fp3,%fp2               // ...V*(A1+V*(A3+V*A5)), FP3 RELEASED
412
 
413
        fmulx   %fp0,%fp1               // ...U*V*(A2+V*(A4+V*A6))
414
        faddx   %fp2,%fp0               // ...U+V*(A1+V*(A3+V*A5)), FP2 RELEASED
415
 
416
        faddx   (%a0),%fp1      // ...LOG(F)+U*V*(A2+V*(A4+V*A6))
417
        fmovemx  (%sp)+,%fp2-%fp2/%fp3  // ...RESTORE FP2
418
        faddx   %fp1,%fp0               // ...FP0 IS LOG(F) + LOG(1+U)
419
 
420
        fmovel  %d1,%fpcr
421
        faddx   KLOG2(%a6),%fp0 // ...FINAL ADD
422
        bra     t_frcinx
423
 
424
 
425
LOGNEAR1:
426
//--REGISTERS SAVED: FPCR, FP1. FP0 CONTAINS THE INPUT.
427
        fmovex  %fp0,%fp1
428
        fsubs   one,%fp1                // ...FP1 IS X-1
429
        fadds   one,%fp0                // ...FP0 IS X+1
430
        faddx   %fp1,%fp1               // ...FP1 IS 2(X-1)
431
//--LOG(X) = LOG(1+U/2)-LOG(1-U/2) WHICH IS AN ODD POLYNOMIAL
432
//--IN U, U = 2(X-1)/(X+1) = FP1/FP0
433
 
434
LP1CONT2:
435
//--THIS IS AN RE-ENTRY POINT FOR LOGNP1
436
        fdivx   %fp0,%fp1               // ...FP1 IS U
437
        fmovemx %fp2-%fp2/%fp3,-(%sp)    // ...SAVE FP2
438
//--REGISTERS SAVED ARE NOW FPCR,FP1,FP2,FP3
439
//--LET V=U*U, W=V*V, CALCULATE
440
//--U + U*V*(B1 + V*(B2 + V*(B3 + V*(B4 + V*B5)))) BY
441
//--U + U*V*(  [B1 + W*(B3 + W*B5)]  +  [V*(B2 + W*B4)]  )
442
        fmovex  %fp1,%fp0
443
        fmulx   %fp0,%fp0       // ...FP0 IS V
444
        fmovex  %fp1,SAVEU(%a6) // ...STORE U IN MEMORY, FREE FP1
445
        fmovex  %fp0,%fp1
446
        fmulx   %fp1,%fp1       // ...FP1 IS W
447
 
448
        fmoved  LOGB5,%fp3
449
        fmoved  LOGB4,%fp2
450
 
451
        fmulx   %fp1,%fp3       // ...W*B5
452
        fmulx   %fp1,%fp2       // ...W*B4
453
 
454
        faddd   LOGB3,%fp3 // ...B3+W*B5
455
        faddd   LOGB2,%fp2 // ...B2+W*B4
456
 
457
        fmulx   %fp3,%fp1       // ...W*(B3+W*B5), FP3 RELEASED
458
 
459
        fmulx   %fp0,%fp2       // ...V*(B2+W*B4)
460
 
461
        faddd   LOGB1,%fp1 // ...B1+W*(B3+W*B5)
462
        fmulx   SAVEU(%a6),%fp0 // ...FP0 IS U*V
463
 
464
        faddx   %fp2,%fp1       // ...B1+W*(B3+W*B5) + V*(B2+W*B4), FP2 RELEASED
465
        fmovemx (%sp)+,%fp2-%fp2/%fp3 // ...FP2 RESTORED
466
 
467
        fmulx   %fp1,%fp0       // ...U*V*( [B1+W*(B3+W*B5)] + [V*(B2+W*B4)] )
468
 
469
        fmovel  %d1,%fpcr
470
        faddx   SAVEU(%a6),%fp0
471
        bra     t_frcinx
472
        rts
473
 
474
LOGNEG:
475
//--REGISTERS SAVED FPCR. LOG(-VE) IS INVALID
476
        bra     t_operr
477
 
478
        .global slognp1d
479
slognp1d:
480
//--ENTRY POINT FOR LOG(1+Z) FOR DENORMALIZED INPUT
481
// Simply return the denorm
482
 
483
        bra     t_extdnrm
484
 
485
        .global slognp1
486
slognp1:
487
//--ENTRY POINT FOR LOG(1+X) FOR X FINITE, NON-ZERO, NOT NAN'S
488
 
489
        fmovex  (%a0),%fp0      // ...LOAD INPUT
490
        fabsx   %fp0            //test magnitude
491
        fcmpx   LTHOLD,%fp0     //compare with min threshold
492
        fbgt    LP1REAL         //if greater, continue
493
        fmovel  #0,%fpsr                //clr N flag from compare
494
        fmovel  %d1,%fpcr
495
        fmovex  (%a0),%fp0      //return signed argument
496
        bra     t_frcinx
497
 
498
LP1REAL:
499
        fmovex  (%a0),%fp0      // ...LOAD INPUT
500
        movel   #0x00000000,ADJK(%a6)
501
        fmovex  %fp0,%fp1       // ...FP1 IS INPUT Z
502
        fadds   one,%fp0        // ...X := ROUND(1+Z)
503
        fmovex  %fp0,X(%a6)
504
        movew   XFRAC(%a6),XDCARE(%a6)
505
        movel   X(%a6),%d0
506
        cmpil   #0,%d0
507
        ble     LP1NEG0 // ...LOG OF ZERO OR -VE
508
        cmp2l   BOUNDS2,%d0
509
        bcs     LOGMAIN // ...BOUNDS2 IS [1/2,3/2]
510
//--IF 1+Z > 3/2 OR 1+Z < 1/2, THEN X, WHICH IS ROUNDING 1+Z,
511
//--CONTAINS AT LEAST 63 BITS OF INFORMATION OF Z. IN THAT CASE,
512
//--SIMPLY INVOKE LOG(X) FOR LOG(1+Z).
513
 
514
LP1NEAR1:
515
//--NEXT SEE IF EXP(-1/16) < X < EXP(1/16)
516
        cmp2l   BOUNDS1,%d0
517
        bcss    LP1CARE
518
 
519
LP1ONE16:
520
//--EXP(-1/16) < X < EXP(1/16). LOG(1+Z) = LOG(1+U/2) - LOG(1-U/2)
521
//--WHERE U = 2Z/(2+Z) = 2Z/(1+X).
522
        faddx   %fp1,%fp1       // ...FP1 IS 2Z
523
        fadds   one,%fp0        // ...FP0 IS 1+X
524
//--U = FP1/FP0
525
        bra     LP1CONT2
526
 
527
LP1CARE:
528
//--HERE WE USE THE USUAL TABLE DRIVEN APPROACH. CARE HAS TO BE
529
//--TAKEN BECAUSE 1+Z CAN HAVE 67 BITS OF INFORMATION AND WE MUST
530
//--PRESERVE ALL THE INFORMATION. BECAUSE 1+Z IS IN [1/2,3/2],
531
//--THERE ARE ONLY TWO CASES.
532
//--CASE 1: 1+Z < 1, THEN K = -1 AND Y-F = (2-F) + 2Z
533
//--CASE 2: 1+Z > 1, THEN K = 0  AND Y-F = (1-F) + Z
534
//--ON RETURNING TO LP1CONT1, WE MUST HAVE K IN FP1, ADDRESS OF
535
//--(1/F) IN A0, Y-F IN FP0, AND FP2 SAVED.
536
 
537
        movel   XFRAC(%a6),FFRAC(%a6)
538
        andil   #0xFE000000,FFRAC(%a6)
539
        oril    #0x01000000,FFRAC(%a6)  // ...F OBTAINED
540
        cmpil   #0x3FFF8000,%d0 // ...SEE IF 1+Z > 1
541
        bges    KISZERO
542
 
543
KISNEG1:
544
        fmoves  TWO,%fp0
545
        movel   #0x3fff0000,F(%a6)
546
        clrl    F+8(%a6)
547
        fsubx   F(%a6),%fp0     // ...2-F
548
        movel   FFRAC(%a6),%d0
549
        andil   #0x7E000000,%d0
550
        asrl    #8,%d0
551
        asrl    #8,%d0
552
        asrl    #4,%d0          // ...D0 CONTAINS DISPLACEMENT FOR 1/F
553
        faddx   %fp1,%fp1               // ...GET 2Z
554
        fmovemx %fp2-%fp2/%fp3,-(%sp)   // ...SAVE FP2
555
        faddx   %fp1,%fp0               // ...FP0 IS Y-F = (2-F)+2Z
556
        lea     LOGTBL,%a0      // ...A0 IS ADDRESS OF 1/F
557
        addal   %d0,%a0
558
        fmoves  negone,%fp1     // ...FP1 IS K = -1
559
        bra     LP1CONT1
560
 
561
KISZERO:
562
        fmoves  one,%fp0
563
        movel   #0x3fff0000,F(%a6)
564
        clrl    F+8(%a6)
565
        fsubx   F(%a6),%fp0             // ...1-F
566
        movel   FFRAC(%a6),%d0
567
        andil   #0x7E000000,%d0
568
        asrl    #8,%d0
569
        asrl    #8,%d0
570
        asrl    #4,%d0
571
        faddx   %fp1,%fp0               // ...FP0 IS Y-F
572
        fmovemx %fp2-%fp2/%fp3,-(%sp)   // ...FP2 SAVED
573
        lea     LOGTBL,%a0
574
        addal   %d0,%a0         // ...A0 IS ADDRESS OF 1/F
575
        fmoves  zero,%fp1       // ...FP1 IS K = 0
576
        bra     LP1CONT1
577
 
578
LP1NEG0:
579
//--FPCR SAVED. D0 IS X IN COMPACT FORM.
580
        cmpil   #0,%d0
581
        blts    LP1NEG
582
LP1ZERO:
583
        fmoves  negone,%fp0
584
 
585
        fmovel  %d1,%fpcr
586
        bra t_dz
587
 
588
LP1NEG:
589
        fmoves  zero,%fp0
590
 
591
        fmovel  %d1,%fpcr
592
        bra     t_operr
593
 
594
        |end

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.