1 |
92 |
Agner |
/******************************* integrate.as *******************************
|
2 |
|
|
* Author: Agner Fog
|
3 |
|
|
* date created: 2018-03-30
|
4 |
|
|
* Last modified: 2021-05-25
|
5 |
|
|
* Version: 1.11
|
6 |
|
|
* Project: ForwardCom library math.li
|
7 |
|
|
* Description: Numerical integration of a function f(x) over a given x-interval
|
8 |
|
|
* Uses 4-point Gauss-Legendre integration method
|
9 |
|
|
*
|
10 |
|
|
* Parameters:
|
11 |
|
|
* v0 = start of x interval, double scalar
|
12 |
|
|
* v1 = end of x interval, double scalar
|
13 |
|
|
* r0 = function pointer
|
14 |
|
|
* r1 = number of function points, int32
|
15 |
|
|
* r2 = method. currently unused
|
16 |
|
|
* r3 = optional extra parameter to the function, can be integer or pointer
|
17 |
|
|
* return value: double scalar in v0
|
18 |
|
|
* will return NAN if the maximum vector length is less than 256 bits or if the function returns NAN
|
19 |
|
|
*
|
20 |
|
|
* The function that is integrated is pointed to by r0. This function must take a double vector
|
21 |
|
|
* as input and return a double vector as results. It can have one optional extra parameter which
|
22 |
|
|
* can be an integer or a pointer. The function must conform to the default register use.
|
23 |
|
|
*
|
24 |
|
|
* The number of function points will be rounded up to a power of 2 or a multiple of the maximum
|
25 |
|
|
* vector length. It must be at least 4.
|
26 |
|
|
*
|
27 |
|
|
* Copyright 2018-2021 GNU General Public License http://www.gnu.org/licenses
|
28 |
|
|
|
29 |
|
|
NOTE: not debugged after conversion to version 1.11 !!
|
30 |
|
|
to do: use masked replace rather then repeat_block to broadcast constants
|
31 |
|
|
|
32 |
|
|
*****************************************************************************/
|
33 |
|
|
|
34 |
|
|
// define constants
|
35 |
|
|
%xval1 = 0.861136311594 // x-values for 4-point Gauss-Legendre integration
|
36 |
|
|
%xval2 = 0.339981043585
|
37 |
|
|
%weight1 = 0.347854845137 // weights for 4-point Gauss-Legendre integration
|
38 |
|
|
%weight2 = 0.652145154863
|
39 |
|
|
%nan = 0x7FF8100000000000 // NAN error return
|
40 |
|
|
|
41 |
|
|
const section read ip // read-only data section
|
42 |
|
|
xvals: double (1-xval1)/2, (1-xval2)/2, (1+xval2)/2, (1+xval1)/2 // x-points in interval from 0 - 1
|
43 |
|
|
weights: double weight1/2, weight2/2, weight2/2, weight1/2 // corresponding weights
|
44 |
|
|
const end
|
45 |
|
|
|
46 |
|
|
code section execute align = 4 // code section
|
47 |
|
|
|
48 |
|
|
_integrate function public
|
49 |
|
|
push (r16, 18) // save registers
|
50 |
|
|
push (v16, 19) // save registers
|
51 |
|
|
|
52 |
|
|
int64 r16 = r0 // function pointer
|
53 |
|
|
int64 r18 = r3 // save optional extra parameter to function
|
54 |
|
|
if (int32 r1 < 4) {
|
55 |
|
|
int32+ r1 = 4 // must have at least 4 points
|
56 |
|
|
}
|
57 |
|
|
double v2 = broadcast_max(0) // find maximum vector length
|
58 |
|
|
int64 r3 = get_num(v2) // maximum elements
|
59 |
|
|
if (int32 r1 < r3) { // number fits into a single vector
|
60 |
|
|
int32 r1 = roundp2(r1, 1) // round up to nearest power of 2
|
61 |
|
|
}
|
62 |
|
|
else { // more than one vector needed
|
63 |
|
|
int32 r1 = r1 + r3 - 1 // round up to nearest multiple of maximum vector length
|
64 |
|
|
int32 r3 = -r3
|
65 |
|
|
int32+ r1 &= r3
|
66 |
|
|
}
|
67 |
|
|
uint32+ r3 = r1 >> 2 // number of steps, nsteps = points / 4
|
68 |
|
|
int64 r0 = 4*8 // length of one step
|
69 |
|
|
int64 r2 = r3 * r0 // total length, bytes
|
70 |
|
|
double v16 = [xvals, length = r0] // x values for one step
|
71 |
|
|
double v17 = [weights, length = r0] // weights for one step
|
72 |
|
|
int64 r4 = get_len(v16)
|
73 |
|
|
if (int64 r4 < r0) { // maximum vector length is insufficient for one step. a fix for this is not implemented
|
74 |
|
|
int64 v0 = nan // return NAN to indicate error
|
75 |
|
|
jump ERROREXIT
|
76 |
|
|
}
|
77 |
|
|
double v18 = v1 - v0 // length of x interval
|
78 |
|
|
int64 v2 = gp2vec(r3) // number of steps
|
79 |
|
|
double v2 = int2float(v2, 0) // same, as float
|
80 |
|
|
double v18 /= v2 // x-step size = length / nsteps
|
81 |
|
|
double v18 = broad(v18, r2) // broadcast x-step size
|
82 |
|
|
double v16 *= v18 // first 4 x-values relative to start
|
83 |
|
|
double v0 = broad(v0, r2) // broadcast x start value
|
84 |
|
|
double v16 += v0 // first 4 x-values
|
85 |
|
|
double v17 *= v18 // multiply weights by step size
|
86 |
|
|
double v16 = repeat_block(v16, r2, 4*8) // repeat x-values block nsteps times or until the maximum vector length
|
87 |
|
|
double v17 = repeat_block(v17, r2, 4*8) // repeat weights block nsteps times or until the maximum vector length
|
88 |
|
|
|
89 |
|
|
int64 v2 = make_sequence(r1, 0) // 0, 1, 2, ..
|
90 |
|
|
int64 v2 >>= 2 // 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, ...
|
91 |
|
|
double v2 = int2float(v2, 0) // same, as double
|
92 |
|
|
double v2 *= v18 // multiply by x-step size
|
93 |
|
|
double v16 += v2 // make each x block = previous block + x-step size
|
94 |
|
|
|
95 |
|
|
int64 r3 = get_len(v2) // actual vector length
|
96 |
|
|
uint32 r17 = r2 / r3 // number of iterations
|
97 |
|
|
int64 r2 = get_num(v2) // vector length
|
98 |
|
|
int64 r2--
|
99 |
|
|
double v2 = extract(v2, r2) // get last addend of last block, broadcast
|
100 |
|
|
//double v2 = rotate_up(r3, v2)
|
101 |
|
|
|
102 |
|
|
double v2 += v18 // start value for next x vector
|
103 |
|
|
//double v19 = broad(r3, v2)
|
104 |
|
|
double v19 = v2 + v18 // (x-step size) * (steps per vector). add this to x vector to get next x vector
|
105 |
|
|
|
106 |
|
|
double v18 = replace(v16, 0) // vector of zeroes for summation
|
107 |
|
|
|
108 |
|
|
// calculation loop. calculate as many function values per iteration as the maximum vector length allows
|
109 |
|
|
while (int32+ r17 > 0) {
|
110 |
|
|
double v0 = v16 // x-values
|
111 |
|
|
int64 r0 = r18 // optional extra parameter
|
112 |
|
|
call (r16) // function(x)
|
113 |
|
|
double v0 *= v17 // multiply results by weights
|
114 |
|
|
double v18 += v0 // summation
|
115 |
|
|
double v16 += v19 // next vector of x-values
|
116 |
|
|
int32+ r17-- // loop counter
|
117 |
|
|
}
|
118 |
|
|
|
119 |
|
|
// the sums are in v18. make horizontal sum
|
120 |
|
|
int64 r2 = get_len(v18) // vector length
|
121 |
|
|
while (uint64 r2 > 8) { // loop to calculate horizontal sum
|
122 |
|
|
uint64 r2 >>= 1 // the vector length is halved
|
123 |
|
|
double v1 = shift_reduce(v18, r2) // get upper half of vector
|
124 |
|
|
double v18 = v1 + v18 // Add upper half and lower half
|
125 |
|
|
}
|
126 |
|
|
double v0 = v18 // return value
|
127 |
|
|
ERROREXIT:
|
128 |
|
|
pop (v16, 19) // restore registers
|
129 |
|
|
pop (r16, 18) // restore registers
|
130 |
|
|
return
|
131 |
|
|
_integrate end
|
132 |
|
|
|
133 |
|
|
code end
|