| 1 |
129 |
Agner |
/**************************** sumarray.as ***********************************
|
| 2 |
|
|
* Author: Agner Fog
|
| 3 |
|
|
* date created: 2018-02-24
|
| 4 |
|
|
* last modified: 2021-08-08
|
| 5 |
|
|
* Version: 1.11
|
| 6 |
|
|
* Project: ForwardCom example, assembly code
|
| 7 |
|
|
* Description: Calculates the sum of the numbers from 1 to 100
|
| 8 |
|
|
*
|
| 9 |
|
|
* This code will fill an array with the numbers from 1 to 100 and then
|
| 10 |
|
|
* calculate the sum. The purpose is to show how the variable-length vector
|
| 11 |
|
|
* instructions work.
|
| 12 |
|
|
* The expected result is the mean times the number: (1+100)*100/2 = 5050.
|
| 13 |
|
|
*
|
| 14 |
|
|
* Copyright 2018-2021 GNU General Public License http://www.gnu.org/licenses
|
| 15 |
|
|
*****************************************************************************/
|
| 16 |
|
|
|
| 17 |
|
|
%num = 100 // number of array elements
|
| 18 |
|
|
|
| 19 |
|
|
const section read ip // read-only data section
|
| 20 |
|
|
conclude: int8 "\nThe sum of numbers from 1 to %i is %i\n",0 // format string for printf
|
| 21 |
|
|
const end
|
| 22 |
|
|
|
| 23 |
|
|
bss section datap uninitialized // uninitialized read/write data section
|
| 24 |
|
|
int32 myarray[num] // array of 100 integers
|
| 25 |
|
|
int64 parlist[4] // parameter list for printf
|
| 26 |
|
|
bss end
|
| 27 |
|
|
|
| 28 |
|
|
code section execute align = 4 // code section
|
| 29 |
|
|
|
| 30 |
|
|
extern _printf: function // library function: formatted output to stdout
|
| 31 |
|
|
|
| 32 |
|
|
_main function public // program begins here
|
| 33 |
|
|
|
| 34 |
|
|
// Step 1: Fill myarray with numbers 1 .. 100
|
| 35 |
|
|
|
| 36 |
|
|
int64 r0 = num // = 100
|
| 37 |
|
|
int32 v1 = make_sequence(r0, 1) // will be (1, 2, 3, ...) up to as much as the maximum vector length allows, or 100
|
| 38 |
|
|
int32 r1 = get_num(v1) // number of elements in vector
|
| 39 |
|
|
int64 r2 = r1 - 1 // index to last element
|
| 40 |
|
|
int32 v2 = extract(v1, r2) // get last element and broadcast it
|
| 41 |
|
|
|
| 42 |
|
|
// A vector loop needs a pointer to the end of the array
|
| 43 |
|
|
int64 r2 = address [myarray+num*4] // address of the end of myarray
|
| 44 |
|
|
int64 r1 = num * 4 // total array size in bytes
|
| 45 |
|
|
|
| 46 |
|
|
// This loop will count down r1 with the maximum length until the array is filled
|
| 47 |
|
|
// The last iteration will automatically get fewer elements if the array size is not divisible by the maximum length
|
| 48 |
|
|
for (int32 v1 in [r2-r1]) {
|
| 49 |
|
|
int32 [r2-r1, length = r1] = v1 // put as many elements into the array as the maximum length permits
|
| 50 |
|
|
int32 v1 += v2 // add up the vector of sequential numbers
|
| 51 |
|
|
}
|
| 52 |
|
|
|
| 53 |
|
|
// Step 2: Calculate the sum of all elements in the array
|
| 54 |
|
|
int32 v0 = replace(v1, 0) // make a vector of all zeroes and same length
|
| 55 |
|
|
int64 r1 = num * 4 // total array size in bytes
|
| 56 |
|
|
|
| 57 |
|
|
// vector loop, counting down r1 with the maximum length until the array is filled
|
| 58 |
|
|
for (int32 v0 in [r2-r1]) {
|
| 59 |
|
|
int32 v0 += [r2-r1, length = r1] // add elements to vector
|
| 60 |
|
|
}
|
| 61 |
|
|
|
| 62 |
|
|
// Step 3: Calculate the horizontal sum of the elements in v0
|
| 63 |
|
|
int32 r1 = get_len(v0) // length of vector in bytes
|
| 64 |
|
|
// Round up the vector length to the nearest power of 2.
|
| 65 |
|
|
// The maximum vector length is known to be a power of 2,
|
| 66 |
|
|
// but the length may be 'num' elements, which is not a power of 2
|
| 67 |
|
|
int32 r1 = roundp2(r1, 1) // r1 is now a power of 2, not bigger than the maximum vector length
|
| 68 |
|
|
int32 v0 = set_len(v0, r1) // adjust vector length to nearest higher power of 2. Added elements will be zero
|
| 69 |
|
|
while (uint32+ r1 > 4) { // loop to calculate horizontal sum
|
| 70 |
|
|
uint32+ r1 >>= 1 // the vector length is halved
|
| 71 |
|
|
int32 v1 = shift_reduce (v0, r1) // get upper half of vector
|
| 72 |
|
|
// Add upper half and lower half
|
| 73 |
|
|
// The result vector has the length of the first operand, which will be halved each iteration
|
| 74 |
|
|
int32 v0 = v1 + v0
|
| 75 |
|
|
}
|
| 76 |
|
|
// The sum is now a scalar in v0
|
| 77 |
|
|
|
| 78 |
|
|
// Step 4: Write the result
|
| 79 |
|
|
int64 r0 = address([conclude]) // format string for printf
|
| 80 |
|
|
int64 r1 = address([parlist]) // parameter list
|
| 81 |
|
|
int32 [r1] = num // put number into parameter list
|
| 82 |
|
|
int32 [r1+8, scalar] = v0 // put result into parameter list
|
| 83 |
|
|
call _printf // printf("\nThe sum of numbers from 1 to %i is %i", num, v0)
|
| 84 |
|
|
|
| 85 |
|
|
// Return from main
|
| 86 |
|
|
int64 r0 = 0 // program return value
|
| 87 |
|
|
return // return from main
|
| 88 |
|
|
_main end
|
| 89 |
|
|
|
| 90 |
|
|
code end
|