1 |
110 |
Agner |
/********************************* sin.as ***********************************
|
2 |
|
|
* Author: Agner Fog
|
3 |
|
|
* date created: 2018-03-29
|
4 |
|
|
* Last modified: 2021-04-25
|
5 |
|
|
* Version: 1.11
|
6 |
|
|
* Project: ForwardCom library math.li
|
7 |
|
|
* Description: sin, cos, and tan functions. Calculate in radians, double precision
|
8 |
|
|
* The argument x can be a scalar or a vector
|
9 |
|
|
* The return value will be a vector with the same length
|
10 |
|
|
* C declaration: double sin(double x);
|
11 |
|
|
* C declaration: double cos(double x);
|
12 |
|
|
* C declaration: double tan(double x);
|
13 |
|
|
* C declaration: struct {double s; double c;} sincos(double x);
|
14 |
|
|
*
|
15 |
|
|
* This code is adapted from C++ vector class library www.github.com/vectorclass
|
16 |
|
|
* Copyright 2018-2021 GNU General Public License http://www.gnu.org/licenses
|
17 |
|
|
*****************************************************************************/
|
18 |
|
|
|
19 |
|
|
|
20 |
|
|
// define constants
|
21 |
|
|
% M_2_PI = 0.636619772367581343076 // 2./pi
|
22 |
|
|
% P0sin = -1.66666666666666307295E-1 // polynomial coefficients for sin
|
23 |
|
|
% P1sin = 8.33333333332211858878E-3
|
24 |
|
|
% P2sin = -1.98412698295895385996E-4
|
25 |
|
|
% P3sin = 2.75573136213857245213E-6
|
26 |
|
|
% P4sin = -2.50507477628578072866E-8
|
27 |
|
|
% P5sin = 1.58962301576546568060E-10
|
28 |
|
|
|
29 |
|
|
% P0cos = 4.16666666666665929218E-2 // polynomial coefficients for cos
|
30 |
|
|
% P1cos = -1.38888888888730564116E-3
|
31 |
|
|
% P2cos = 2.48015872888517045348E-5
|
32 |
|
|
% P3cos = -2.75573141792967388112E-7
|
33 |
|
|
% P4cos = 2.08757008419747316778E-9
|
34 |
|
|
% P5cos = -1.13585365213876817300E-11
|
35 |
|
|
|
36 |
|
|
% DP1 = 7.853981554508209228515625E-1 // modulo pi/2 for extended precision
|
37 |
|
|
% DP2 = 7.94662735614792836714E-9 // correction for extended precision modular artithmetic
|
38 |
|
|
% DP3 = 3.06161699786838294307E-17 // correction for extended precision modular artithmetic
|
39 |
|
|
|
40 |
|
|
|
41 |
|
|
code section execute align = 4
|
42 |
|
|
|
43 |
|
|
public _sin: function, reguse = 0, 0x7BF
|
44 |
|
|
public _cos: function, reguse = 0, 0x7BF
|
45 |
|
|
public _sincos: function, reguse = 0, 0x7BF
|
46 |
|
|
public _tan: function, reguse = 0, 0x7BF
|
47 |
|
|
|
48 |
|
|
// common entry for sin and sincos functions
|
49 |
|
|
_sin function
|
50 |
|
|
_sincos:
|
51 |
|
|
|
52 |
|
|
/* registers:
|
53 |
|
|
v0 = x
|
54 |
|
|
v1 = abs(x)
|
55 |
|
|
v1 = quadrant
|
56 |
|
|
v10 = abs(x) reduced modulo pi/2
|
57 |
|
|
v2 = v10^2
|
58 |
|
|
v3 = v10^4
|
59 |
|
|
v4 = v10^8
|
60 |
|
|
v5 = v10^3
|
61 |
|
|
v6 = unused (vacant flag for calling function)
|
62 |
|
|
v7 = temp
|
63 |
|
|
v8 = sin
|
64 |
|
|
v9 = cos
|
65 |
|
|
*/
|
66 |
|
|
|
67 |
|
|
// Find quadrant:
|
68 |
|
|
// 0 - pi/4 => 0
|
69 |
|
|
// pi/4 - 3*pi/4 => 1
|
70 |
|
|
// 3*pi/4 - 5*pi/4 => 2
|
71 |
|
|
// 5*pi/4 - 7*pi/4 => 3
|
72 |
|
|
// 7*pi/4 - 8*pi/4 => 4
|
73 |
|
|
|
74 |
|
|
double v1 = clear_bit(v0, 63) // abs(x)
|
75 |
|
|
double v4 = v1 * M_2_PI
|
76 |
|
|
double v4 = round(v4, 0) // round to integer
|
77 |
|
|
|
78 |
|
|
// reduce modulo pi/2, with extended precision
|
79 |
|
|
// x = ((xa - y * DP1) - y * DP2) - y * DP3;
|
80 |
|
|
double v10 = v4 * (-DP1*2) + v1
|
81 |
|
|
double v10 = v4 * (-DP2*2) + v10
|
82 |
|
|
double v10 = v4 * (-DP3*2) + v10
|
83 |
|
|
double v5 = !(v4 > ((1 << 51) + 0.0)) // check for loss of precision and overflow, but not NAN
|
84 |
|
|
double v1 = v4 + ((1 << 52) + 0.0) // add magic number 2^52 to get integer into lowest bit
|
85 |
|
|
double v10 = v5 ? v10 : 0 // zero if out of range. result will be -1, 0, or 1
|
86 |
|
|
|
87 |
|
|
// Expansion of sin and cos, valid for -pi/4 <= x <= pi/4
|
88 |
|
|
double v2 = v10 * v10 // x^2
|
89 |
|
|
double v3 = v2 * v2 // x^4
|
90 |
|
|
double v4 = v3 * v3 // x^8
|
91 |
|
|
|
92 |
|
|
// calculate polynomial P5sin*x2^5 + P4sin*x2^4 + P3sin*x2^3 + P2sin*x2^2 + P1sin*x2 + P0sin
|
93 |
|
|
// = (p2+p3*x2)*x4 + ((p4+p5*x2)*x8 + (p0+p1*x2));
|
94 |
|
|
|
95 |
|
|
double v5 = replace(v10, P0sin) // broadcast to same length as x
|
96 |
|
|
double v5 = v2 * P1sin + v5
|
97 |
|
|
double v7 = replace(v10, P4sin)
|
98 |
|
|
double v7 = v2 * P5sin + v7
|
99 |
|
|
double v8 = replace(v10, P2sin)
|
100 |
|
|
double v8 = v2 * P3sin + v8
|
101 |
|
|
double v7 = v7 * v4 + v5
|
102 |
|
|
double v8 = v8 * v3 + v7
|
103 |
|
|
|
104 |
|
|
// calculate polynomial P5cos*x2^5 + P4cos*x2^4 + P3cos*x2^3 + P2cos*x2^2 + P1cos*x2 + P0cos
|
105 |
|
|
// = (p2+p3*x2)*x4 + ((p4+p5*x2)*x8 + (p0+p1*x2));
|
106 |
|
|
double v5 = replace(v10, P0cos)
|
107 |
|
|
double v5 = v2 * P1cos + v5
|
108 |
|
|
double v7 = replace(v10, P4cos)
|
109 |
|
|
double v7 = v2 * P5cos + v7
|
110 |
|
|
double v9 = replace(v10, P2cos)
|
111 |
|
|
double v9 = v2 * P3cos + v9
|
112 |
|
|
double v7 = v7 * v4 + v5
|
113 |
|
|
double v9 = v9 * v3 + v7
|
114 |
|
|
|
115 |
|
|
// s = x + (x * x2) * s;
|
116 |
|
|
double v5 = v10 * v2
|
117 |
|
|
double v8 = v8 * v5 + v10
|
118 |
|
|
// c = 1.0 - x2 * 0.5 + (x2 * x2) * c;
|
119 |
|
|
double v9 = v9 * v3
|
120 |
|
|
double v9 = v2 * (-0.5) + v9
|
121 |
|
|
double v9 = 1.0 + v9
|
122 |
|
|
|
123 |
|
|
// swap sin and cos if odd quadrant
|
124 |
|
|
double v3 = v1 ? v9 : v8 // sin
|
125 |
|
|
double v4 = v1 ? v8 : v9 // cos
|
126 |
|
|
|
127 |
|
|
// get sign of sin
|
128 |
|
|
int64 v5 = v1 << 62 // get bit 1 into sign bit, x modulo pi/2 = 2 or 3
|
129 |
|
|
int64 v5 ^= v0 // toggle with sign of original x
|
130 |
|
|
int64 v5 = and(v5, 1 << 63) // isolate sign bit
|
131 |
|
|
double v0 = v3 ^ v5 // apply sign bit to sin
|
132 |
|
|
|
133 |
|
|
// get sign of cos
|
134 |
|
|
int64 v1 = v1 + 1 // change sign when x modulo pi/2 = 1 or 2
|
135 |
|
|
int64 v1 = v1 << 62 // get bit 1 into sign bit
|
136 |
|
|
int64 v1 = and(v1, 1 << 63) // isolate sign bit
|
137 |
|
|
double v1 = v4 ^ v1 // apply sign bit to cos
|
138 |
|
|
|
139 |
|
|
// return sin in v0, cos in v1
|
140 |
|
|
return
|
141 |
|
|
_sin end
|
142 |
|
|
|
143 |
|
|
// cosine function
|
144 |
|
|
_cos function
|
145 |
|
|
call _sincos
|
146 |
|
|
double v0 = v1 // cos is in v1
|
147 |
|
|
return
|
148 |
|
|
_cos end
|
149 |
|
|
|
150 |
|
|
// tangent function
|
151 |
|
|
_tan function
|
152 |
|
|
call _sincos
|
153 |
|
|
double v0 = v0 / v1 // tan(x) = sin(x)/cos(x)
|
154 |
|
|
return
|
155 |
|
|
_tan end
|
156 |
|
|
|
157 |
|
|
code end
|