| 1 |
111 |
Agner |
/********************************* sincosf.as *******************************
|
| 2 |
|
|
* Author: Agner Fog
|
| 3 |
|
|
* date created: 2020-04-29
|
| 4 |
|
|
* Last modified: 2021-04-25
|
| 5 |
|
|
* Version: 1.11
|
| 6 |
|
|
* Project: ForwardCom library math.li
|
| 7 |
|
|
* Description: sin, cos, and tan functions. Calculate in radians, single precision
|
| 8 |
|
|
* The argument x can be a scalar or a vector
|
| 9 |
|
|
* The return value will be a vector with the same length
|
| 10 |
|
|
* C declaration: float sin(float x);
|
| 11 |
|
|
* C declaration: float cos(float x);
|
| 12 |
|
|
* C declaration: float tan(float x);
|
| 13 |
|
|
* C declaration: struct {float s; float c;} sincos(float x);
|
| 14 |
|
|
*
|
| 15 |
|
|
* This code is adapted from C++ vector class library www.github.com/vectorclass
|
| 16 |
|
|
* Copyright 2020-2021 GNU General Public License http://www.gnu.org/licenses
|
| 17 |
|
|
*****************************************************************************/
|
| 18 |
|
|
|
| 19 |
|
|
|
| 20 |
|
|
// define constants
|
| 21 |
|
|
% M_2_PI = 0.636619772367581343076 // 2./pi
|
| 22 |
|
|
|
| 23 |
|
|
% P0sinf = -1.6666654611E-1
|
| 24 |
|
|
% P1sinf = 8.3321608736E-3
|
| 25 |
|
|
% P2sinf = -1.9515295891E-4
|
| 26 |
|
|
% P0cosf = 4.166664568298827E-2
|
| 27 |
|
|
% P1cosf = -1.388731625493765E-3
|
| 28 |
|
|
% P2cosf = 2.443315711809948E-5
|
| 29 |
|
|
|
| 30 |
|
|
% DP1F = 0.78515625 * 2.
|
| 31 |
|
|
% DP2F = 2.4187564849853515625E-4 * 2.
|
| 32 |
|
|
% DP3F = 3.77489497744594108E-8 * 2.
|
| 33 |
|
|
|
| 34 |
|
|
|
| 35 |
|
|
code section execute align = 4
|
| 36 |
|
|
|
| 37 |
|
|
public _sinf: function, reguse = 0, 0x1BF
|
| 38 |
|
|
public _cosf: function, reguse = 0, 0x1BF
|
| 39 |
|
|
public _sincosf: function, reguse = 0, 0x1BF
|
| 40 |
|
|
public _tanf: function, reguse = 0, 0x1BF
|
| 41 |
|
|
|
| 42 |
|
|
// common entry for sin and sincos functions
|
| 43 |
|
|
_sinf function
|
| 44 |
|
|
_sincosf:
|
| 45 |
|
|
|
| 46 |
|
|
/* registers:
|
| 47 |
|
|
v0 = x
|
| 48 |
|
|
v1 = abs(x)
|
| 49 |
|
|
v1 = quadrant
|
| 50 |
|
|
v2 = x^2
|
| 51 |
|
|
v3 = x^3
|
| 52 |
|
|
v4 = x^4
|
| 53 |
|
|
v5 = temp
|
| 54 |
|
|
v5 = sin
|
| 55 |
|
|
v6 = unused (vacant flag for calling function)
|
| 56 |
|
|
v7 = cos
|
| 57 |
|
|
v8 = abs(x) reduced modulo pi/2
|
| 58 |
|
|
*/
|
| 59 |
|
|
|
| 60 |
|
|
// Find quadrant:
|
| 61 |
|
|
// 0 - pi/4 => 0
|
| 62 |
|
|
// pi/4 - 3*pi/4 => 1
|
| 63 |
|
|
// 3*pi/4 - 5*pi/4 => 2
|
| 64 |
|
|
// 5*pi/4 - 7*pi/4 => 3
|
| 65 |
|
|
// 7*pi/4 - 8*pi/4 => 4
|
| 66 |
|
|
|
| 67 |
|
|
// reduce modulo pi/2, with extended precision
|
| 68 |
|
|
//nop
|
| 69 |
|
|
float v1 = clear_bit(v0, 31) // abs(x)
|
| 70 |
|
|
float v5 = v1 * M_2_PI
|
| 71 |
|
|
float v5 = round(v5, 0) // round to integer
|
| 72 |
|
|
|
| 73 |
|
|
// x = ((xa - y * DP1) - y * DP2) - y * DP3;
|
| 74 |
|
|
float v8 = v5 * (-DP1F) + v1
|
| 75 |
|
|
float v8 = v5 * (-DP2F) + v8
|
| 76 |
|
|
float v8 = v5 * (-DP3F) + v8
|
| 77 |
|
|
|
| 78 |
|
|
float v3 = !(v5 > ((1 << 22) + 0.0)) // check for loss of precision and overflow, but not NAN
|
| 79 |
|
|
float v1 = v5 + ((1 << 23) + 0.0) // add magic number 2^23 to get integer into lowest bit
|
| 80 |
|
|
float v8 = v3 ? v8 : 0 // zero if out of range. result will be -1, 0, or 1
|
| 81 |
|
|
|
| 82 |
|
|
// Taylor expansion of sin and cos, valid for -pi/4 <= x <= pi/4
|
| 83 |
|
|
// s = polynomial_2(x^2, P0sinf, P1sinf, P2sinf) * (x*x^2) + x;
|
| 84 |
|
|
|
| 85 |
|
|
float v2 = v8 * v8 // x^2
|
| 86 |
|
|
float v3 = v8 * v2 // x^3
|
| 87 |
|
|
float v4 = v2 * v2 // x^4
|
| 88 |
|
|
float v5 = replace(v8, P0sinf) // broadcast to same length as x
|
| 89 |
|
|
float v5 = v2 * P1sinf + v5
|
| 90 |
|
|
float v5 = v4 * P2sinf + v5
|
| 91 |
|
|
float v5 = v5 * v3 + v8 // sin
|
| 92 |
|
|
|
| 93 |
|
|
// c = polynomial_2(x2, P0cosf, P1cosf, P2cosf) * (x2*x2) + nmul_add(0.5f, x2, 1.0f);
|
| 94 |
|
|
float v7 = replace(v8, P0cosf) // broadcast to same length as x
|
| 95 |
|
|
float v7 = v2 * P1cosf + v7
|
| 96 |
|
|
float v7 = v4 * P2cosf + v7
|
| 97 |
|
|
float v3 = replace(v8, 1.0)
|
| 98 |
|
|
float v3 = v2 * (-0.5) + v3 // 1 - 0.5*x^2
|
| 99 |
|
|
float v7 = v7 * v4 + v3 // cos
|
| 100 |
|
|
|
| 101 |
|
|
// swap sin and cos if odd quadrant
|
| 102 |
|
|
float v3 = v1 ? v7 : v5 // sin
|
| 103 |
|
|
float v4 = v1 ? v5 : v7 // cos
|
| 104 |
|
|
|
| 105 |
|
|
// get sign of sin
|
| 106 |
|
|
int32 v5 = v1 << 30 // get bit 1 into sign bit, x modulo pi/2 = 2 or 3
|
| 107 |
|
|
int32 v5 ^= v0 // toggle with sign of original x
|
| 108 |
|
|
int32 v5 = and(v5, 1 << 31) // isolate sign bit
|
| 109 |
|
|
float v0 = v3 ^ v5 // apply sign bit to sin
|
| 110 |
|
|
|
| 111 |
|
|
// get sign of cos
|
| 112 |
|
|
int32 v1 = v1 + 1 // change sign when x modulo pi/2 = 1 or 2
|
| 113 |
|
|
int32 v1 = v1 << 30 // get bit 1 into sign bit
|
| 114 |
|
|
int32 v1 = and(v1, 1 << 31) // isolate sign bit
|
| 115 |
|
|
float v1 = v4 ^ v1 // apply sign bit to cos
|
| 116 |
|
|
|
| 117 |
|
|
// return sin in v0, cos in v1
|
| 118 |
|
|
return
|
| 119 |
|
|
_sinf end
|
| 120 |
|
|
|
| 121 |
|
|
// cosine function
|
| 122 |
|
|
_cosf function
|
| 123 |
|
|
call _sincosf
|
| 124 |
|
|
float v0 = v1 // cos is in v1
|
| 125 |
|
|
return
|
| 126 |
|
|
_cosf end
|
| 127 |
|
|
|
| 128 |
|
|
// tangent function
|
| 129 |
|
|
_tanf function
|
| 130 |
|
|
call _sincosf
|
| 131 |
|
|
float v0 = v0 / v1 // tan(x) = sin(x)/cos(x)
|
| 132 |
|
|
return
|
| 133 |
|
|
_tanf end
|
| 134 |
|
|
|
| 135 |
|
|
code end
|