OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

[/] [or1k/] [trunk/] [rtems/] [c/] [src/] [lib/] [libcpu/] [m68k/] [m68040/] [fpsp/] [ssin.S] - Blame information for rev 1765

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 158 chris
//
2 208 chris
//      $Id: ssin.S,v 1.2 2001-09-27 12:01:22 chris Exp $
3 158 chris
//
4
//      ssin.sa 3.3 7/29/91
5
//
6
//      The entry point sSIN computes the sine of an input argument
7
//      sCOS computes the cosine, and sSINCOS computes both. The
8
//      corresponding entry points with a "d" computes the same
9
//      corresponding function values for denormalized inputs.
10
//
11
//      Input: Double-extended number X in location pointed to
12
//              by address register a0.
13
//
14
//      Output: The function value sin(X) or cos(X) returned in Fp0 if SIN or
15
//              COS is requested. Otherwise, for SINCOS, sin(X) is returned
16
//              in Fp0, and cos(X) is returned in Fp1.
17
//
18
//      Modifies: Fp0 for SIN or COS; both Fp0 and Fp1 for SINCOS.
19
//
20
//      Accuracy and Monotonicity: The returned result is within 1 ulp in
21
//              64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
22
//              result is subsequently rounded to double precision. The
23
//              result is provably monotonic in double precision.
24
//
25
//      Speed: The programs sSIN and sCOS take approximately 150 cycles for
26
//              input argument X such that |X| < 15Pi, which is the the usual
27
//              situation. The speed for sSINCOS is approximately 190 cycles.
28
//
29
//      Algorithm:
30
//
31
//      SIN and COS:
32
//      1. If SIN is invoked, set AdjN := 0; otherwise, set AdjN := 1.
33
//
34
//      2. If |X| >= 15Pi or |X| < 2**(-40), go to 7.
35
//
36
//      3. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let
37
//              k = N mod 4, so in particular, k = 0,1,2,or 3. Overwrite
38
//              k by k := k + AdjN.
39
//
40
//      4. If k is even, go to 6.
41
//
42
//      5. (k is odd) Set j := (k-1)/2, sgn := (-1)**j. Return sgn*cos(r)
43
//              where cos(r) is approximated by an even polynomial in r,
44
//              1 + r*r*(B1+s*(B2+ ... + s*B8)),        s = r*r.
45
//              Exit.
46
//
47
//      6. (k is even) Set j := k/2, sgn := (-1)**j. Return sgn*sin(r)
48
//              where sin(r) is approximated by an odd polynomial in r
49
//              r + r*s*(A1+s*(A2+ ... + s*A7)),        s = r*r.
50
//              Exit.
51
//
52
//      7. If |X| > 1, go to 9.
53
//
54
//      8. (|X|<2**(-40)) If SIN is invoked, return X; otherwise return 1.
55
//
56
//      9. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 3.
57
//
58
//      SINCOS:
59
//      1. If |X| >= 15Pi or |X| < 2**(-40), go to 6.
60
//
61
//      2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let
62
//              k = N mod 4, so in particular, k = 0,1,2,or 3.
63
//
64
//      3. If k is even, go to 5.
65
//
66
//      4. (k is odd) Set j1 := (k-1)/2, j2 := j1 (EOR) (k mod 2), i.e.
67
//              j1 exclusive or with the l.s.b. of k.
68
//              sgn1 := (-1)**j1, sgn2 := (-1)**j2.
69
//              SIN(X) = sgn1 * cos(r) and COS(X) = sgn2*sin(r) where
70
//              sin(r) and cos(r) are computed as odd and even polynomials
71
//              in r, respectively. Exit
72
//
73
//      5. (k is even) Set j1 := k/2, sgn1 := (-1)**j1.
74
//              SIN(X) = sgn1 * sin(r) and COS(X) = sgn1*cos(r) where
75
//              sin(r) and cos(r) are computed as odd and even polynomials
76
//              in r, respectively. Exit
77
//
78
//      6. If |X| > 1, go to 8.
79
//
80
//      7. (|X|<2**(-40)) SIN(X) = X and COS(X) = 1. Exit.
81
//
82
//      8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 2.
83
//
84
 
85
//              Copyright (C) Motorola, Inc. 1990
86
//                      All Rights Reserved
87
//
88
//      THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
89
//      The copyright notice above does not evidence any
90
//      actual or intended publication of such source code.
91
 
92
//SSIN  idnt    2,1 | Motorola 040 Floating Point Software Package
93
 
94
        |section        8
95
 
96
#include "fpsp.defs"
97
 
98
BOUNDS1:        .long 0x3FD78000,0x4004BC7E
99
TWOBYPI:        .long 0x3FE45F30,0x6DC9C883
100
 
101
SINA7:  .long 0xBD6AAA77,0xCCC994F5
102
SINA6:  .long 0x3DE61209,0x7AAE8DA1
103
 
104
SINA5:  .long 0xBE5AE645,0x2A118AE4
105
SINA4:  .long 0x3EC71DE3,0xA5341531
106
 
107
SINA3:  .long 0xBF2A01A0,0x1A018B59,0x00000000,0x00000000
108
 
109
SINA2:  .long 0x3FF80000,0x88888888,0x888859AF,0x00000000
110
 
111
SINA1:  .long 0xBFFC0000,0xAAAAAAAA,0xAAAAAA99,0x00000000
112
 
113
COSB8:  .long 0x3D2AC4D0,0xD6011EE3
114
COSB7:  .long 0xBDA9396F,0x9F45AC19
115
 
116
COSB6:  .long 0x3E21EED9,0x0612C972
117
COSB5:  .long 0xBE927E4F,0xB79D9FCF
118
 
119
COSB4:  .long 0x3EFA01A0,0x1A01D423,0x00000000,0x00000000
120
 
121
COSB3:  .long 0xBFF50000,0xB60B60B6,0x0B61D438,0x00000000
122
 
123
COSB2:  .long 0x3FFA0000,0xAAAAAAAA,0xAAAAAB5E
124
COSB1:  .long 0xBF000000
125
 
126
INVTWOPI: .long 0x3FFC0000,0xA2F9836E,0x4E44152A
127
 
128
TWOPI1: .long 0x40010000,0xC90FDAA2,0x00000000,0x00000000
129
TWOPI2: .long 0x3FDF0000,0x85A308D4,0x00000000,0x00000000
130
 
131
        |xref   PITBL
132
 
133
        .set    INARG,FP_SCR4
134
 
135
        .set    X,FP_SCR5
136
        .set    XDCARE,X+2
137
        .set    XFRAC,X+4
138
 
139
        .set    RPRIME,FP_SCR1
140
        .set    SPRIME,FP_SCR2
141
 
142
        .set    POSNEG1,L_SCR1
143
        .set    TWOTO63,L_SCR1
144
 
145
        .set    ENDFLAG,L_SCR2
146
        .set    N,L_SCR2
147
 
148
        .set    ADJN,L_SCR3
149
 
150
        | xref  t_frcinx
151
        |xref   t_extdnrm
152
        |xref   sto_cos
153
 
154
        .global ssind
155
ssind:
156
//--SIN(X) = X FOR DENORMALIZED X
157
        bra             t_extdnrm
158
 
159
        .global scosd
160
scosd:
161
//--COS(X) = 1 FOR DENORMALIZED X
162
 
163
        fmoves          #0x3F800000,%fp0
164
//
165
//      9D25B Fix: Sometimes the previous fmove.s sets fpsr bits
166
//
167
        fmovel          #0,%fpsr
168
//
169
        bra             t_frcinx
170
 
171
        .global ssin
172
ssin:
173
//--SET ADJN TO 0
174
        movel           #0,ADJN(%a6)
175
        bras            SINBGN
176
 
177
        .global scos
178
scos:
179
//--SET ADJN TO 1
180
        movel           #1,ADJN(%a6)
181
 
182
SINBGN:
183
//--SAVE FPCR, FP1. CHECK IF |X| IS TOO SMALL OR LARGE
184
 
185
        fmovex          (%a0),%fp0      // ...LOAD INPUT
186
 
187
        movel           (%a0),%d0
188
        movew           4(%a0),%d0
189
        fmovex          %fp0,X(%a6)
190
        andil           #0x7FFFFFFF,%d0         // ...COMPACTIFY X
191
 
192
        cmpil           #0x3FD78000,%d0         // ...|X| >= 2**(-40)?
193
        bges            SOK1
194
        bra             SINSM
195
 
196
SOK1:
197
        cmpil           #0x4004BC7E,%d0         // ...|X| < 15 PI?
198
        blts            SINMAIN
199
        bra             REDUCEX
200
 
201
SINMAIN:
202
//--THIS IS THE USUAL CASE, |X| <= 15 PI.
203
//--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
204
        fmovex          %fp0,%fp1
205
        fmuld           TWOBYPI,%fp1    // ...X*2/PI
206
 
207
//--HIDE THE NEXT THREE INSTRUCTIONS
208
        lea             PITBL+0x200,%a1 // ...TABLE OF N*PI/2, N = -32,...,32
209
 
210
 
211
//--FP1 IS NOW READY
212
        fmovel          %fp1,N(%a6)             // ...CONVERT TO INTEGER
213
 
214
        movel           N(%a6),%d0
215
        asll            #4,%d0
216
        addal           %d0,%a1 // ...A1 IS THE ADDRESS OF N*PIBY2
217
//                              ...WHICH IS IN TWO PIECES Y1 & Y2
218
 
219
        fsubx           (%a1)+,%fp0     // ...X-Y1
220
//--HIDE THE NEXT ONE
221
        fsubs           (%a1),%fp0      // ...FP0 IS R = (X-Y1)-Y2
222
 
223
SINCONT:
224
//--continuation from REDUCEX
225
 
226
//--GET N+ADJN AND SEE IF SIN(R) OR COS(R) IS NEEDED
227
        movel           N(%a6),%d0
228
        addl            ADJN(%a6),%d0   // ...SEE IF D0 IS ODD OR EVEN
229
        rorl            #1,%d0  // ...D0 WAS ODD IFF D0 IS NEGATIVE
230
        cmpil           #0,%d0
231
        blt             COSPOLY
232
 
233
SINPOLY:
234
//--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
235
//--THEN WE RETURN      SGN*SIN(R). SGN*SIN(R) IS COMPUTED BY
236
//--R' + R'*S*(A1 + S(A2 + S(A3 + S(A4 + ... + SA7)))), WHERE
237
//--R' = SGN*R, S=R*R. THIS CAN BE REWRITTEN AS
238
//--R' + R'*S*( [A1+T(A3+T(A5+TA7))] + [S(A2+T(A4+TA6))])
239
//--WHERE T=S*S.
240
//--NOTE THAT A3 THROUGH A7 ARE STORED IN DOUBLE PRECISION
241
//--WHILE A1 AND A2 ARE IN DOUBLE-EXTENDED FORMAT.
242
        fmovex          %fp0,X(%a6)     // ...X IS R
243
        fmulx           %fp0,%fp0       // ...FP0 IS S
244
//---HIDE THE NEXT TWO WHILE WAITING FOR FP0
245
        fmoved          SINA7,%fp3
246
        fmoved          SINA6,%fp2
247
//--FP0 IS NOW READY
248
        fmovex          %fp0,%fp1
249
        fmulx           %fp1,%fp1       // ...FP1 IS T
250
//--HIDE THE NEXT TWO WHILE WAITING FOR FP1
251
 
252
        rorl            #1,%d0
253
        andil           #0x80000000,%d0
254
//                              ...LEAST SIG. BIT OF D0 IN SIGN POSITION
255
        eorl            %d0,X(%a6)      // ...X IS NOW R'= SGN*R
256
 
257
        fmulx           %fp1,%fp3       // ...TA7
258
        fmulx           %fp1,%fp2       // ...TA6
259
 
260
        faddd           SINA5,%fp3 // ...A5+TA7
261
        faddd           SINA4,%fp2 // ...A4+TA6
262
 
263
        fmulx           %fp1,%fp3       // ...T(A5+TA7)
264
        fmulx           %fp1,%fp2       // ...T(A4+TA6)
265
 
266
        faddd           SINA3,%fp3 // ...A3+T(A5+TA7)
267
        faddx           SINA2,%fp2 // ...A2+T(A4+TA6)
268
 
269
        fmulx           %fp3,%fp1       // ...T(A3+T(A5+TA7))
270
 
271
        fmulx           %fp0,%fp2       // ...S(A2+T(A4+TA6))
272
        faddx           SINA1,%fp1 // ...A1+T(A3+T(A5+TA7))
273
        fmulx           X(%a6),%fp0     // ...R'*S
274
 
275
        faddx           %fp2,%fp1       // ...[A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))]
276
//--FP3 RELEASED, RESTORE NOW AND TAKE SOME ADVANTAGE OF HIDING
277
//--FP2 RELEASED, RESTORE NOW AND TAKE FULL ADVANTAGE OF HIDING
278
 
279
 
280
        fmulx           %fp1,%fp0               // ...SIN(R')-R'
281
//--FP1 RELEASED.
282
 
283
        fmovel          %d1,%FPCR               //restore users exceptions
284
        faddx           X(%a6),%fp0             //last inst - possible exception set
285
        bra             t_frcinx
286
 
287
 
288
COSPOLY:
289
//--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
290
//--THEN WE RETURN      SGN*COS(R). SGN*COS(R) IS COMPUTED BY
291
//--SGN + S'*(B1 + S(B2 + S(B3 + S(B4 + ... + SB8)))), WHERE
292
//--S=R*R AND S'=SGN*S. THIS CAN BE REWRITTEN AS
293
//--SGN + S'*([B1+T(B3+T(B5+TB7))] + [S(B2+T(B4+T(B6+TB8)))])
294
//--WHERE T=S*S.
295
//--NOTE THAT B4 THROUGH B8 ARE STORED IN DOUBLE PRECISION
296
//--WHILE B2 AND B3 ARE IN DOUBLE-EXTENDED FORMAT, B1 IS -1/2
297
//--AND IS THEREFORE STORED AS SINGLE PRECISION.
298
 
299
        fmulx           %fp0,%fp0       // ...FP0 IS S
300
//---HIDE THE NEXT TWO WHILE WAITING FOR FP0
301
        fmoved          COSB8,%fp2
302
        fmoved          COSB7,%fp3
303
//--FP0 IS NOW READY
304
        fmovex          %fp0,%fp1
305
        fmulx           %fp1,%fp1       // ...FP1 IS T
306
//--HIDE THE NEXT TWO WHILE WAITING FOR FP1
307
        fmovex          %fp0,X(%a6)     // ...X IS S
308
        rorl            #1,%d0
309
        andil           #0x80000000,%d0
310
//                      ...LEAST SIG. BIT OF D0 IN SIGN POSITION
311
 
312
        fmulx           %fp1,%fp2       // ...TB8
313
//--HIDE THE NEXT TWO WHILE WAITING FOR THE XU
314
        eorl            %d0,X(%a6)      // ...X IS NOW S'= SGN*S
315
        andil           #0x80000000,%d0
316
 
317
        fmulx           %fp1,%fp3       // ...TB7
318
//--HIDE THE NEXT TWO WHILE WAITING FOR THE XU
319
        oril            #0x3F800000,%d0 // ...D0 IS SGN IN SINGLE
320
        movel           %d0,POSNEG1(%a6)
321
 
322
        faddd           COSB6,%fp2 // ...B6+TB8
323
        faddd           COSB5,%fp3 // ...B5+TB7
324
 
325
        fmulx           %fp1,%fp2       // ...T(B6+TB8)
326
        fmulx           %fp1,%fp3       // ...T(B5+TB7)
327
 
328
        faddd           COSB4,%fp2 // ...B4+T(B6+TB8)
329
        faddx           COSB3,%fp3 // ...B3+T(B5+TB7)
330
 
331
        fmulx           %fp1,%fp2       // ...T(B4+T(B6+TB8))
332
        fmulx           %fp3,%fp1       // ...T(B3+T(B5+TB7))
333
 
334
        faddx           COSB2,%fp2 // ...B2+T(B4+T(B6+TB8))
335
        fadds           COSB1,%fp1 // ...B1+T(B3+T(B5+TB7))
336
 
337
        fmulx           %fp2,%fp0       // ...S(B2+T(B4+T(B6+TB8)))
338
//--FP3 RELEASED, RESTORE NOW AND TAKE SOME ADVANTAGE OF HIDING
339
//--FP2 RELEASED.
340
 
341
 
342
        faddx           %fp1,%fp0
343
//--FP1 RELEASED
344
 
345
        fmulx           X(%a6),%fp0
346
 
347
        fmovel          %d1,%FPCR               //restore users exceptions
348
        fadds           POSNEG1(%a6),%fp0       //last inst - possible exception set
349
        bra             t_frcinx
350
 
351
 
352
SINBORS:
353
//--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
354
//--IF |X| < 2**(-40), RETURN X OR 1.
355
        cmpil           #0x3FFF8000,%d0
356
        bgts            REDUCEX
357
 
358
 
359
SINSM:
360
        movel           ADJN(%a6),%d0
361
        cmpil           #0,%d0
362
        bgts            COSTINY
363
 
364
SINTINY:
365
        movew           #0x0000,XDCARE(%a6)     // ...JUST IN CASE
366
        fmovel          %d1,%FPCR               //restore users exceptions
367
        fmovex          X(%a6),%fp0             //last inst - possible exception set
368
        bra             t_frcinx
369
 
370
 
371
COSTINY:
372
        fmoves          #0x3F800000,%fp0
373
 
374
        fmovel          %d1,%FPCR               //restore users exceptions
375
        fsubs           #0x00800000,%fp0        //last inst - possible exception set
376
        bra             t_frcinx
377
 
378
 
379
REDUCEX:
380
//--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
381
//--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
382
//--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
383
 
384
        fmovemx %fp2-%fp5,-(%a7)        // ...save FP2 through FP5
385
        movel           %d2,-(%a7)
386
        fmoves         #0x00000000,%fp1
387
//--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
388
//--there is a danger of unwanted overflow in first LOOP iteration.  In this
389
//--case, reduce argument by one remainder step to make subsequent reduction
390
//--safe.
391
        cmpil   #0x7ffeffff,%d0         //is argument dangerously large?
392
        bnes    LOOP
393
        movel   #0x7ffe0000,FP_SCR2(%a6)        //yes
394
//                                      ;create 2**16383*PI/2
395
        movel   #0xc90fdaa2,FP_SCR2+4(%a6)
396
        clrl    FP_SCR2+8(%a6)
397
        ftstx   %fp0                    //test sign of argument
398
        movel   #0x7fdc0000,FP_SCR3(%a6)        //create low half of 2**16383*
399
//                                      ;PI/2 at FP_SCR3
400
        movel   #0x85a308d3,FP_SCR3+4(%a6)
401
        clrl   FP_SCR3+8(%a6)
402
        fblt    red_neg
403
        orw     #0x8000,FP_SCR2(%a6)    //positive arg
404
        orw     #0x8000,FP_SCR3(%a6)
405
red_neg:
406
        faddx  FP_SCR2(%a6),%fp0                //high part of reduction is exact
407
        fmovex  %fp0,%fp1               //save high result in fp1
408
        faddx  FP_SCR3(%a6),%fp0                //low part of reduction
409
        fsubx  %fp0,%fp1                        //determine low component of result
410
        faddx  FP_SCR3(%a6),%fp1                //fp0/fp1 are reduced argument.
411
 
412
//--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
413
//--integer quotient will be stored in N
414
//--Intermediate remainder is 66-bit long; (R,r) in (FP0,FP1)
415
 
416
LOOP:
417
        fmovex          %fp0,INARG(%a6) // ...+-2**K * F, 1 <= F < 2
418
        movew           INARG(%a6),%d0
419
        movel          %d0,%a1          // ...save a copy of D0
420
        andil           #0x00007FFF,%d0
421
        subil           #0x00003FFF,%d0 // ...D0 IS K
422
        cmpil           #28,%d0
423
        bles            LASTLOOP
424
CONTLOOP:
425
        subil           #27,%d0  // ...D0 IS L := K-27
426
        movel           #0,ENDFLAG(%a6)
427
        bras            WORK
428
LASTLOOP:
429
        clrl            %d0             // ...D0 IS L := 0
430
        movel           #1,ENDFLAG(%a6)
431
 
432
WORK:
433
//--FIND THE REMAINDER OF (R,r) W.R.T.  2**L * (PI/2). L IS SO CHOSEN
434
//--THAT        INT( X * (2/PI) / 2**(L) ) < 2**29.
435
 
436
//--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
437
//--2**L * (PIby2_1), 2**L * (PIby2_2)
438
 
439
        movel           #0x00003FFE,%d2 // ...BIASED EXPO OF 2/PI
440
        subl            %d0,%d2         // ...BIASED EXPO OF 2**(-L)*(2/PI)
441
 
442
        movel           #0xA2F9836E,FP_SCR1+4(%a6)
443
        movel           #0x4E44152A,FP_SCR1+8(%a6)
444
        movew           %d2,FP_SCR1(%a6)        // ...FP_SCR1 is 2**(-L)*(2/PI)
445
 
446
        fmovex          %fp0,%fp2
447
        fmulx           FP_SCR1(%a6),%fp2
448
//--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
449
//--FLOATING POINT FORMAT, THE TWO FMOVE'S      FMOVE.L FP <--> N
450
//--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
451
//--(SIGN(INARG)*2**63  +       FP2) - SIGN(INARG)*2**63 WILL GIVE
452
//--US THE DESIRED VALUE IN FLOATING POINT.
453
 
454
//--HIDE SIX CYCLES OF INSTRUCTION
455
        movel           %a1,%d2
456
        swap            %d2
457
        andil           #0x80000000,%d2
458
        oril            #0x5F000000,%d2 // ...D2 IS SIGN(INARG)*2**63 IN SGL
459
        movel           %d2,TWOTO63(%a6)
460
 
461
        movel           %d0,%d2
462
        addil           #0x00003FFF,%d2 // ...BIASED EXPO OF 2**L * (PI/2)
463
 
464
//--FP2 IS READY
465
        fadds           TWOTO63(%a6),%fp2       // ...THE FRACTIONAL PART OF FP1 IS ROUNDED
466
 
467
//--HIDE 4 CYCLES OF INSTRUCTION; creating 2**(L)*Piby2_1  and  2**(L)*Piby2_2
468
        movew           %d2,FP_SCR2(%a6)
469
        clrw           FP_SCR2+2(%a6)
470
        movel           #0xC90FDAA2,FP_SCR2+4(%a6)
471
        clrl            FP_SCR2+8(%a6)          // ...FP_SCR2 is  2**(L) * Piby2_1
472
 
473
//--FP2 IS READY
474
        fsubs           TWOTO63(%a6),%fp2               // ...FP2 is N
475
 
476
        addil           #0x00003FDD,%d0
477
        movew           %d0,FP_SCR3(%a6)
478
        clrw           FP_SCR3+2(%a6)
479
        movel           #0x85A308D3,FP_SCR3+4(%a6)
480
        clrl            FP_SCR3+8(%a6)          // ...FP_SCR3 is 2**(L) * Piby2_2
481
 
482
        movel           ENDFLAG(%a6),%d0
483
 
484
//--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
485
//--P2 = 2**(L) * Piby2_2
486
        fmovex          %fp2,%fp4
487
        fmulx           FP_SCR2(%a6),%fp4               // ...W = N*P1
488
        fmovex          %fp2,%fp5
489
        fmulx           FP_SCR3(%a6),%fp5               // ...w = N*P2
490
        fmovex          %fp4,%fp3
491
//--we want P+p = W+w  but  |p| <= half ulp of P
492
//--Then, we need to compute  A := R-P   and  a := r-p
493
        faddx           %fp5,%fp3                       // ...FP3 is P
494
        fsubx           %fp3,%fp4                       // ...W-P
495
 
496
        fsubx           %fp3,%fp0                       // ...FP0 is A := R - P
497
        faddx           %fp5,%fp4                       // ...FP4 is p = (W-P)+w
498
 
499
        fmovex          %fp0,%fp3                       // ...FP3 A
500
        fsubx           %fp4,%fp1                       // ...FP1 is a := r - p
501
 
502
//--Now we need to normalize (A,a) to  "new (R,r)" where R+r = A+a but
503
//--|r| <= half ulp of R.
504
        faddx           %fp1,%fp0                       // ...FP0 is R := A+a
505
//--No need to calculate r if this is the last loop
506
        cmpil           #0,%d0
507
        bgt             RESTORE
508
 
509
//--Need to calculate r
510
        fsubx           %fp0,%fp3                       // ...A-R
511
        faddx           %fp3,%fp1                       // ...FP1 is r := (A-R)+a
512
        bra             LOOP
513
 
514
RESTORE:
515
        fmovel          %fp2,N(%a6)
516
        movel           (%a7)+,%d2
517
        fmovemx (%a7)+,%fp2-%fp5
518
 
519
 
520
        movel           ADJN(%a6),%d0
521
        cmpil           #4,%d0
522
 
523
        blt             SINCONT
524
        bras            SCCONT
525
 
526
        .global ssincosd
527
ssincosd:
528
//--SIN AND COS OF X FOR DENORMALIZED X
529
 
530
        fmoves          #0x3F800000,%fp1
531
        bsr             sto_cos         //store cosine result
532
        bra             t_extdnrm
533
 
534
        .global ssincos
535
ssincos:
536
//--SET ADJN TO 4
537
        movel           #4,ADJN(%a6)
538
 
539
        fmovex          (%a0),%fp0      // ...LOAD INPUT
540
 
541
        movel           (%a0),%d0
542
        movew           4(%a0),%d0
543
        fmovex          %fp0,X(%a6)
544
        andil           #0x7FFFFFFF,%d0         // ...COMPACTIFY X
545
 
546
        cmpil           #0x3FD78000,%d0         // ...|X| >= 2**(-40)?
547
        bges            SCOK1
548
        bra             SCSM
549
 
550
SCOK1:
551
        cmpil           #0x4004BC7E,%d0         // ...|X| < 15 PI?
552
        blts            SCMAIN
553
        bra             REDUCEX
554
 
555
 
556
SCMAIN:
557
//--THIS IS THE USUAL CASE, |X| <= 15 PI.
558
//--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
559
        fmovex          %fp0,%fp1
560
        fmuld           TWOBYPI,%fp1    // ...X*2/PI
561
 
562
//--HIDE THE NEXT THREE INSTRUCTIONS
563
        lea             PITBL+0x200,%a1 // ...TABLE OF N*PI/2, N = -32,...,32
564
 
565
 
566
//--FP1 IS NOW READY
567
        fmovel          %fp1,N(%a6)             // ...CONVERT TO INTEGER
568
 
569
        movel           N(%a6),%d0
570
        asll            #4,%d0
571
        addal           %d0,%a1         // ...ADDRESS OF N*PIBY2, IN Y1, Y2
572
 
573
        fsubx           (%a1)+,%fp0     // ...X-Y1
574
        fsubs           (%a1),%fp0      // ...FP0 IS R = (X-Y1)-Y2
575
 
576
SCCONT:
577
//--continuation point from REDUCEX
578
 
579
//--HIDE THE NEXT TWO
580
        movel           N(%a6),%d0
581
        rorl            #1,%d0
582
 
583
        cmpil           #0,%d0          // ...D0 < 0 IFF N IS ODD
584
        bge             NEVEN
585
 
586
NODD:
587
//--REGISTERS SAVED SO FAR: D0, A0, FP2.
588
 
589
        fmovex          %fp0,RPRIME(%a6)
590
        fmulx           %fp0,%fp0        // ...FP0 IS S = R*R
591
        fmoved          SINA7,%fp1      // ...A7
592
        fmoved          COSB8,%fp2      // ...B8
593
        fmulx           %fp0,%fp1        // ...SA7
594
        movel           %d2,-(%a7)
595
        movel           %d0,%d2
596
        fmulx           %fp0,%fp2        // ...SB8
597
        rorl            #1,%d2
598
        andil           #0x80000000,%d2
599
 
600
        faddd           SINA6,%fp1      // ...A6+SA7
601
        eorl            %d0,%d2
602
        andil           #0x80000000,%d2
603
        faddd           COSB7,%fp2      // ...B7+SB8
604
 
605
        fmulx           %fp0,%fp1        // ...S(A6+SA7)
606
        eorl            %d2,RPRIME(%a6)
607
        movel           (%a7)+,%d2
608
        fmulx           %fp0,%fp2        // ...S(B7+SB8)
609
        rorl            #1,%d0
610
        andil           #0x80000000,%d0
611
 
612
        faddd           SINA5,%fp1      // ...A5+S(A6+SA7)
613
        movel           #0x3F800000,POSNEG1(%a6)
614
        eorl            %d0,POSNEG1(%a6)
615
        faddd           COSB6,%fp2      // ...B6+S(B7+SB8)
616
 
617
        fmulx           %fp0,%fp1        // ...S(A5+S(A6+SA7))
618
        fmulx           %fp0,%fp2        // ...S(B6+S(B7+SB8))
619
        fmovex          %fp0,SPRIME(%a6)
620
 
621
        faddd           SINA4,%fp1      // ...A4+S(A5+S(A6+SA7))
622
        eorl            %d0,SPRIME(%a6)
623
        faddd           COSB5,%fp2      // ...B5+S(B6+S(B7+SB8))
624
 
625
        fmulx           %fp0,%fp1        // ...S(A4+...)
626
        fmulx           %fp0,%fp2        // ...S(B5+...)
627
 
628
        faddd           SINA3,%fp1      // ...A3+S(A4+...)
629
        faddd           COSB4,%fp2      // ...B4+S(B5+...)
630
 
631
        fmulx           %fp0,%fp1        // ...S(A3+...)
632
        fmulx           %fp0,%fp2        // ...S(B4+...)
633
 
634
        faddx           SINA2,%fp1      // ...A2+S(A3+...)
635
        faddx           COSB3,%fp2      // ...B3+S(B4+...)
636
 
637
        fmulx           %fp0,%fp1        // ...S(A2+...)
638
        fmulx           %fp0,%fp2        // ...S(B3+...)
639
 
640
        faddx           SINA1,%fp1      // ...A1+S(A2+...)
641
        faddx           COSB2,%fp2      // ...B2+S(B3+...)
642
 
643
        fmulx           %fp0,%fp1        // ...S(A1+...)
644
        fmulx           %fp2,%fp0        // ...S(B2+...)
645
 
646
 
647
 
648
        fmulx           RPRIME(%a6),%fp1        // ...R'S(A1+...)
649
        fadds           COSB1,%fp0      // ...B1+S(B2...)
650
        fmulx           SPRIME(%a6),%fp0        // ...S'(B1+S(B2+...))
651
 
652
        movel           %d1,-(%sp)      //restore users mode & precision
653
        andil           #0xff,%d1               //mask off all exceptions
654
        fmovel          %d1,%FPCR
655
        faddx           RPRIME(%a6),%fp1        // ...COS(X)
656
        bsr             sto_cos         //store cosine result
657
        fmovel          (%sp)+,%FPCR    //restore users exceptions
658
        fadds           POSNEG1(%a6),%fp0       // ...SIN(X)
659
 
660
        bra             t_frcinx
661
 
662
 
663
NEVEN:
664
//--REGISTERS SAVED SO FAR: FP2.
665
 
666
        fmovex          %fp0,RPRIME(%a6)
667
        fmulx           %fp0,%fp0        // ...FP0 IS S = R*R
668
        fmoved          COSB8,%fp1                      // ...B8
669
        fmoved          SINA7,%fp2                      // ...A7
670
        fmulx           %fp0,%fp1        // ...SB8
671
        fmovex          %fp0,SPRIME(%a6)
672
        fmulx           %fp0,%fp2        // ...SA7
673
        rorl            #1,%d0
674
        andil           #0x80000000,%d0
675
        faddd           COSB7,%fp1      // ...B7+SB8
676
        faddd           SINA6,%fp2      // ...A6+SA7
677
        eorl            %d0,RPRIME(%a6)
678
        eorl            %d0,SPRIME(%a6)
679
        fmulx           %fp0,%fp1        // ...S(B7+SB8)
680
        oril            #0x3F800000,%d0
681
        movel           %d0,POSNEG1(%a6)
682
        fmulx           %fp0,%fp2        // ...S(A6+SA7)
683
 
684
        faddd           COSB6,%fp1      // ...B6+S(B7+SB8)
685
        faddd           SINA5,%fp2      // ...A5+S(A6+SA7)
686
 
687
        fmulx           %fp0,%fp1        // ...S(B6+S(B7+SB8))
688
        fmulx           %fp0,%fp2        // ...S(A5+S(A6+SA7))
689
 
690
        faddd           COSB5,%fp1      // ...B5+S(B6+S(B7+SB8))
691
        faddd           SINA4,%fp2      // ...A4+S(A5+S(A6+SA7))
692
 
693
        fmulx           %fp0,%fp1        // ...S(B5+...)
694
        fmulx           %fp0,%fp2        // ...S(A4+...)
695
 
696
        faddd           COSB4,%fp1      // ...B4+S(B5+...)
697
        faddd           SINA3,%fp2      // ...A3+S(A4+...)
698
 
699
        fmulx           %fp0,%fp1        // ...S(B4+...)
700
        fmulx           %fp0,%fp2        // ...S(A3+...)
701
 
702
        faddx           COSB3,%fp1      // ...B3+S(B4+...)
703
        faddx           SINA2,%fp2      // ...A2+S(A3+...)
704
 
705
        fmulx           %fp0,%fp1        // ...S(B3+...)
706
        fmulx           %fp0,%fp2        // ...S(A2+...)
707
 
708
        faddx           COSB2,%fp1      // ...B2+S(B3+...)
709
        faddx           SINA1,%fp2      // ...A1+S(A2+...)
710
 
711
        fmulx           %fp0,%fp1        // ...S(B2+...)
712
        fmulx           %fp2,%fp0        // ...s(a1+...)
713
 
714
 
715
 
716
        fadds           COSB1,%fp1      // ...B1+S(B2...)
717
        fmulx           RPRIME(%a6),%fp0        // ...R'S(A1+...)
718
        fmulx           SPRIME(%a6),%fp1        // ...S'(B1+S(B2+...))
719
 
720
        movel           %d1,-(%sp)      //save users mode & precision
721
        andil           #0xff,%d1               //mask off all exceptions
722
        fmovel          %d1,%FPCR
723
        fadds           POSNEG1(%a6),%fp1       // ...COS(X)
724
        bsr             sto_cos         //store cosine result
725
        fmovel          (%sp)+,%FPCR    //restore users exceptions
726
        faddx           RPRIME(%a6),%fp0        // ...SIN(X)
727
 
728
        bra             t_frcinx
729
 
730
SCBORS:
731
        cmpil           #0x3FFF8000,%d0
732
        bgt             REDUCEX
733
 
734
 
735
SCSM:
736
        movew           #0x0000,XDCARE(%a6)
737
        fmoves          #0x3F800000,%fp1
738
 
739
        movel           %d1,-(%sp)      //save users mode & precision
740
        andil           #0xff,%d1               //mask off all exceptions
741
        fmovel          %d1,%FPCR
742
        fsubs           #0x00800000,%fp1
743
        bsr             sto_cos         //store cosine result
744
        fmovel          (%sp)+,%FPCR    //restore users exceptions
745
        fmovex          X(%a6),%fp0
746
        bra             t_frcinx
747
 
748
        |end

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.