URL
https://opencores.org/ocsvn/forwardcom/forwardcom/trunk
Subversion Repositories forwardcom
[/] [forwardcom/] [libraries/] [integrate.as] - Rev 150
Go to most recent revision | Compare with Previous | Blame | View Log
/******************************* integrate.as *******************************
* Author: Agner Fog
* date created: 2018-03-30
* Last modified: 2021-05-25
* Version: 1.11
* Project: ForwardCom library math.li
* Description: Numerical integration of a function f(x) over a given x-interval
* Uses 4-point Gauss-Legendre integration method
*
* Parameters:
* v0 = start of x interval, double scalar
* v1 = end of x interval, double scalar
* r0 = function pointer
* r1 = number of function points, int32
* r2 = method. currently unused
* r3 = optional extra parameter to the function, can be integer or pointer
* return value: double scalar in v0
* will return NAN if the maximum vector length is less than 256 bits or if the function returns NAN
*
* The function that is integrated is pointed to by r0. This function must take a double vector
* as input and return a double vector as results. It can have one optional extra parameter which
* can be an integer or a pointer. The function must conform to the default register use.
*
* The number of function points will be rounded up to a power of 2 or a multiple of the maximum
* vector length. It must be at least 4.
*
* Copyright 2018-2021 GNU General Public License http://www.gnu.org/licenses
NOTE: not debugged after conversion to version 1.11 !!
to do: use masked replace rather then repeat_block to broadcast constants
*****************************************************************************/
// define constants
%xval1 = 0.861136311594 // x-values for 4-point Gauss-Legendre integration
%xval2 = 0.339981043585
%weight1 = 0.347854845137 // weights for 4-point Gauss-Legendre integration
%weight2 = 0.652145154863
%nan = 0x7FF8100000000000 // NAN error return
const section read ip // read-only data section
xvals: double (1-xval1)/2, (1-xval2)/2, (1+xval2)/2, (1+xval1)/2 // x-points in interval from 0 - 1
weights: double weight1/2, weight2/2, weight2/2, weight1/2 // corresponding weights
const end
code section execute align = 4 // code section
_integrate function public
push (r16, 18) // save registers
push (v16, 19) // save registers
int64 r16 = r0 // function pointer
int64 r18 = r3 // save optional extra parameter to function
if (int32 r1 < 4) {
int32+ r1 = 4 // must have at least 4 points
}
double v2 = broadcast_max(0) // find maximum vector length
int64 r3 = get_num(v2) // maximum elements
if (int32 r1 < r3) { // number fits into a single vector
int32 r1 = roundp2(r1, 1) // round up to nearest power of 2
}
else { // more than one vector needed
int32 r1 = r1 + r3 - 1 // round up to nearest multiple of maximum vector length
int32 r3 = -r3
int32+ r1 &= r3
}
uint32+ r3 = r1 >> 2 // number of steps, nsteps = points / 4
int64 r0 = 4*8 // length of one step
int64 r2 = r3 * r0 // total length, bytes
double v16 = [xvals, length = r0] // x values for one step
double v17 = [weights, length = r0] // weights for one step
int64 r4 = get_len(v16)
if (int64 r4 < r0) { // maximum vector length is insufficient for one step. a fix for this is not implemented
int64 v0 = nan // return NAN to indicate error
jump ERROREXIT
}
double v18 = v1 - v0 // length of x interval
int64 v2 = gp2vec(r3) // number of steps
double v2 = int2float(v2, 0) // same, as float
double v18 /= v2 // x-step size = length / nsteps
double v18 = broad(v18, r2) // broadcast x-step size
double v16 *= v18 // first 4 x-values relative to start
double v0 = broad(v0, r2) // broadcast x start value
double v16 += v0 // first 4 x-values
double v17 *= v18 // multiply weights by step size
double v16 = repeat_block(v16, r2, 4*8) // repeat x-values block nsteps times or until the maximum vector length
double v17 = repeat_block(v17, r2, 4*8) // repeat weights block nsteps times or until the maximum vector length
int64 v2 = make_sequence(r1, 0) // 0, 1, 2, ..
int64 v2 >>= 2 // 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, ...
double v2 = int2float(v2, 0) // same, as double
double v2 *= v18 // multiply by x-step size
double v16 += v2 // make each x block = previous block + x-step size
int64 r3 = get_len(v2) // actual vector length
uint32 r17 = r2 / r3 // number of iterations
int64 r2 = get_num(v2) // vector length
int64 r2--
double v2 = extract(v2, r2) // get last addend of last block, broadcast
//double v2 = rotate_up(r3, v2)
double v2 += v18 // start value for next x vector
//double v19 = broad(r3, v2)
double v19 = v2 + v18 // (x-step size) * (steps per vector). add this to x vector to get next x vector
double v18 = replace(v16, 0) // vector of zeroes for summation
// calculation loop. calculate as many function values per iteration as the maximum vector length allows
while (int32+ r17 > 0) {
double v0 = v16 // x-values
int64 r0 = r18 // optional extra parameter
call (r16) // function(x)
double v0 *= v17 // multiply results by weights
double v18 += v0 // summation
double v16 += v19 // next vector of x-values
int32+ r17-- // loop counter
}
// the sums are in v18. make horizontal sum
int64 r2 = get_len(v18) // vector length
while (uint64 r2 > 8) { // loop to calculate horizontal sum
uint64 r2 >>= 1 // the vector length is halved
double v1 = shift_reduce(v18, r2) // get upper half of vector
double v18 = v1 + v18 // Add upper half and lower half
}
double v0 = v18 // return value
ERROREXIT:
pop (v16, 19) // restore registers
pop (r16, 18) // restore registers
return
_integrate end
code end
Go to most recent revision | Compare with Previous | Blame | View Log