| 1 | 110 | Agner | /*********************************  sin.as  ***********************************
 | 
      
         | 2 |  |  | * Author:        Agner Fog
 | 
      
         | 3 |  |  | * date created:  2018-03-29
 | 
      
         | 4 |  |  | * Last modified: 2021-04-25
 | 
      
         | 5 |  |  | * Version:       1.11
 | 
      
         | 6 |  |  | * Project:       ForwardCom library math.li
 | 
      
         | 7 |  |  | * Description:   sin, cos, and tan functions. Calculate in radians, double precision
 | 
      
         | 8 |  |  | *                The argument x can be a scalar or a vector
 | 
      
         | 9 |  |  | *                The return value will be a vector with the same length
 | 
      
         | 10 |  |  | * C declaration: double sin(double x);
 | 
      
         | 11 |  |  | * C declaration: double cos(double x);
 | 
      
         | 12 |  |  | * C declaration: double tan(double x);
 | 
      
         | 13 |  |  | * C declaration: struct {double s; double c;} sincos(double x);
 | 
      
         | 14 |  |  | *
 | 
      
         | 15 |  |  | * This code is adapted from C++ vector class library www.github.com/vectorclass
 | 
      
         | 16 |  |  | * Copyright 2018-2021 GNU General Public License http://www.gnu.org/licenses
 | 
      
         | 17 |  |  | *****************************************************************************/
 | 
      
         | 18 |  |  |  
 | 
      
         | 19 |  |  |  
 | 
      
         | 20 |  |  | // define constants
 | 
      
         | 21 |  |  | % M_2_PI = 0.636619772367581343076       // 2./pi
 | 
      
         | 22 |  |  | % P0sin  = -1.66666666666666307295E-1    // polynomial coefficients for sin
 | 
      
         | 23 |  |  | % P1sin  = 8.33333333332211858878E-3
 | 
      
         | 24 |  |  | % P2sin  = -1.98412698295895385996E-4
 | 
      
         | 25 |  |  | % P3sin  = 2.75573136213857245213E-6
 | 
      
         | 26 |  |  | % P4sin  = -2.50507477628578072866E-8
 | 
      
         | 27 |  |  | % P5sin  = 1.58962301576546568060E-10
 | 
      
         | 28 |  |  |  
 | 
      
         | 29 |  |  | % P0cos  = 4.16666666666665929218E-2     // polynomial coefficients for cos
 | 
      
         | 30 |  |  | % P1cos  = -1.38888888888730564116E-3
 | 
      
         | 31 |  |  | % P2cos  = 2.48015872888517045348E-5
 | 
      
         | 32 |  |  | % P3cos  = -2.75573141792967388112E-7
 | 
      
         | 33 |  |  | % P4cos  = 2.08757008419747316778E-9
 | 
      
         | 34 |  |  | % P5cos  = -1.13585365213876817300E-11
 | 
      
         | 35 |  |  |  
 | 
      
         | 36 |  |  | % DP1    = 7.853981554508209228515625E-1 // modulo pi/2 for extended precision
 | 
      
         | 37 |  |  | % DP2    = 7.94662735614792836714E-9     // correction for extended precision modular artithmetic
 | 
      
         | 38 |  |  | % DP3    = 3.06161699786838294307E-17    // correction for extended precision modular artithmetic
 | 
      
         | 39 |  |  |  
 | 
      
         | 40 |  |  |  
 | 
      
         | 41 |  |  | code section execute align = 4
 | 
      
         | 42 |  |  |  
 | 
      
         | 43 |  |  | public _sin:    function, reguse = 0, 0x7BF
 | 
      
         | 44 |  |  | public _cos:    function, reguse = 0, 0x7BF
 | 
      
         | 45 |  |  | public _sincos: function, reguse = 0, 0x7BF
 | 
      
         | 46 |  |  | public _tan:    function, reguse = 0, 0x7BF
 | 
      
         | 47 |  |  |  
 | 
      
         | 48 |  |  | // common entry for sin and sincos functions
 | 
      
         | 49 |  |  | _sin function
 | 
      
         | 50 |  |  | _sincos:
 | 
      
         | 51 |  |  |  
 | 
      
         | 52 |  |  | /* registers:
 | 
      
         | 53 |  |  |   v0 = x
 | 
      
         | 54 |  |  |   v1 = abs(x)
 | 
      
         | 55 |  |  |   v1 = quadrant
 | 
      
         | 56 |  |  |   v10 = abs(x) reduced modulo pi/2
 | 
      
         | 57 |  |  |   v2 = v10^2
 | 
      
         | 58 |  |  |   v3 = v10^4
 | 
      
         | 59 |  |  |   v4 = v10^8
 | 
      
         | 60 |  |  |   v5 = v10^3
 | 
      
         | 61 |  |  |   v6 = unused (vacant flag for calling function)
 | 
      
         | 62 |  |  |   v7 = temp
 | 
      
         | 63 |  |  |   v8 = sin
 | 
      
         | 64 |  |  |   v9 = cos
 | 
      
         | 65 |  |  | */
 | 
      
         | 66 |  |  |  
 | 
      
         | 67 |  |  | // Find quadrant:
 | 
      
         | 68 |  |  | //      0 -   pi/4 => 0
 | 
      
         | 69 |  |  | //   pi/4 - 3*pi/4 => 1
 | 
      
         | 70 |  |  | // 3*pi/4 - 5*pi/4 => 2
 | 
      
         | 71 |  |  | // 5*pi/4 - 7*pi/4 => 3
 | 
      
         | 72 |  |  | // 7*pi/4 - 8*pi/4 => 4
 | 
      
         | 73 |  |  |  
 | 
      
         | 74 |  |  | double v1 = clear_bit(v0, 63)                    // abs(x)
 | 
      
         | 75 |  |  | double v4 = v1 * M_2_PI
 | 
      
         | 76 |  |  | double v4 = round(v4, 0)   // round to integer
 | 
      
         | 77 |  |  |  
 | 
      
         | 78 |  |  | // reduce modulo pi/2, with extended precision
 | 
      
         | 79 |  |  | // x = ((xa - y * DP1) - y * DP2) - y * DP3;
 | 
      
         | 80 |  |  | double v10 = v4 * (-DP1*2) + v1
 | 
      
         | 81 |  |  | double v10 = v4 * (-DP2*2) + v10
 | 
      
         | 82 |  |  | double v10 = v4 * (-DP3*2) + v10
 | 
      
         | 83 |  |  | double v5 = !(v4 > ((1 << 51) + 0.0))              // check for loss of precision and overflow, but not NAN
 | 
      
         | 84 |  |  | double v1 = v4 + ((1 << 52) + 0.0)                 // add magic number 2^52 to get integer into lowest bit
 | 
      
         | 85 |  |  | double v10 = v5 ? v10 : 0                          // zero if out of range. result will be -1, 0, or 1
 | 
      
         | 86 |  |  |  
 | 
      
         | 87 |  |  | // Expansion of sin and cos, valid for -pi/4 <= x <= pi/4
 | 
      
         | 88 |  |  | double v2 = v10 * v10                              // x^2
 | 
      
         | 89 |  |  | double v3 = v2 * v2                              // x^4
 | 
      
         | 90 |  |  | double v4 = v3 * v3                              // x^8
 | 
      
         | 91 |  |  |  
 | 
      
         | 92 |  |  | // calculate polynomial P5sin*x2^5 + P4sin*x2^4 + P3sin*x2^3 + P2sin*x2^2 + P1sin*x2 + P0sin
 | 
      
         | 93 |  |  | // = (p2+p3*x2)*x4 + ((p4+p5*x2)*x8 + (p0+p1*x2));
 | 
      
         | 94 |  |  |  
 | 
      
         | 95 |  |  | double v5 = replace(v10, P0sin)                   // broadcast to same length as x
 | 
      
         | 96 |  |  | double v5 = v2 * P1sin + v5
 | 
      
         | 97 |  |  | double v7 = replace(v10, P4sin)
 | 
      
         | 98 |  |  | double v7 = v2 * P5sin + v7
 | 
      
         | 99 |  |  | double v8 = replace(v10, P2sin)
 | 
      
         | 100 |  |  | double v8 = v2 * P3sin + v8
 | 
      
         | 101 |  |  | double v7 = v7 * v4 + v5
 | 
      
         | 102 |  |  | double v8 = v8 * v3 + v7
 | 
      
         | 103 |  |  |  
 | 
      
         | 104 |  |  | // calculate polynomial P5cos*x2^5 + P4cos*x2^4 + P3cos*x2^3 + P2cos*x2^2 + P1cos*x2 + P0cos
 | 
      
         | 105 |  |  | // = (p2+p3*x2)*x4 + ((p4+p5*x2)*x8 + (p0+p1*x2));
 | 
      
         | 106 |  |  | double v5 = replace(v10, P0cos)
 | 
      
         | 107 |  |  | double v5 = v2 * P1cos + v5
 | 
      
         | 108 |  |  | double v7 = replace(v10, P4cos)
 | 
      
         | 109 |  |  | double v7 = v2 * P5cos + v7
 | 
      
         | 110 |  |  | double v9 = replace(v10, P2cos)
 | 
      
         | 111 |  |  | double v9 = v2 * P3cos + v9
 | 
      
         | 112 |  |  | double v7 = v7 * v4 + v5
 | 
      
         | 113 |  |  | double v9 = v9 * v3 + v7
 | 
      
         | 114 |  |  |  
 | 
      
         | 115 |  |  | // s = x + (x * x2) * s;
 | 
      
         | 116 |  |  | double v5 = v10 * v2
 | 
      
         | 117 |  |  | double v8 = v8 * v5 + v10
 | 
      
         | 118 |  |  | // c = 1.0 - x2 * 0.5 + (x2 * x2) * c;
 | 
      
         | 119 |  |  | double v9 = v9 * v3
 | 
      
         | 120 |  |  | double v9 = v2 * (-0.5) + v9
 | 
      
         | 121 |  |  | double v9 = 1.0 + v9
 | 
      
         | 122 |  |  |  
 | 
      
         | 123 |  |  | // swap sin and cos if odd quadrant
 | 
      
         | 124 |  |  | double v3 = v1 ? v9 : v8        // sin
 | 
      
         | 125 |  |  | double v4 = v1 ? v8 : v9        // cos
 | 
      
         | 126 |  |  |  
 | 
      
         | 127 |  |  | // get sign of sin
 | 
      
         | 128 |  |  | int64  v5 = v1 << 62            // get bit 1 into sign bit, x modulo pi/2 = 2 or 3
 | 
      
         | 129 |  |  | int64  v5 ^= v0                 // toggle with sign of original x
 | 
      
         | 130 |  |  | int64  v5 = and(v5, 1 << 63)    // isolate sign bit
 | 
      
         | 131 |  |  | double v0 = v3 ^ v5             // apply sign bit to sin
 | 
      
         | 132 |  |  |  
 | 
      
         | 133 |  |  | // get sign of cos
 | 
      
         | 134 |  |  | int64  v1 = v1 + 1              // change sign when x modulo pi/2 = 1 or 2
 | 
      
         | 135 |  |  | int64  v1 = v1 << 62            // get bit 1 into sign bit
 | 
      
         | 136 |  |  | int64  v1 = and(v1, 1 << 63)    // isolate sign bit
 | 
      
         | 137 |  |  | double v1 = v4 ^ v1             // apply sign bit to cos
 | 
      
         | 138 |  |  |  
 | 
      
         | 139 |  |  | // return sin in v0, cos in v1
 | 
      
         | 140 |  |  | return
 | 
      
         | 141 |  |  | _sin end
 | 
      
         | 142 |  |  |  
 | 
      
         | 143 |  |  | // cosine function
 | 
      
         | 144 |  |  | _cos function
 | 
      
         | 145 |  |  | call _sincos
 | 
      
         | 146 |  |  | double v0 = v1                                   // cos is in v1
 | 
      
         | 147 |  |  | return
 | 
      
         | 148 |  |  | _cos end
 | 
      
         | 149 |  |  |  
 | 
      
         | 150 |  |  | // tangent function
 | 
      
         | 151 |  |  | _tan function
 | 
      
         | 152 |  |  | call _sincos
 | 
      
         | 153 |  |  | double v0 = v0 / v1                              // tan(x) = sin(x)/cos(x)
 | 
      
         | 154 |  |  | return
 | 
      
         | 155 |  |  | _tan end
 | 
      
         | 156 |  |  |  
 | 
      
         | 157 |  |  | code end
 |