1 |
129 |
Agner |
/**************************** sumarray.as ***********************************
|
2 |
|
|
* Author: Agner Fog
|
3 |
|
|
* date created: 2018-02-24
|
4 |
|
|
* last modified: 2021-08-08
|
5 |
|
|
* Version: 1.11
|
6 |
|
|
* Project: ForwardCom example, assembly code
|
7 |
|
|
* Description: Calculates the sum of the numbers from 1 to 100
|
8 |
|
|
*
|
9 |
|
|
* This code will fill an array with the numbers from 1 to 100 and then
|
10 |
|
|
* calculate the sum. The purpose is to show how the variable-length vector
|
11 |
|
|
* instructions work.
|
12 |
|
|
* The expected result is the mean times the number: (1+100)*100/2 = 5050.
|
13 |
|
|
*
|
14 |
|
|
* Copyright 2018-2021 GNU General Public License http://www.gnu.org/licenses
|
15 |
|
|
*****************************************************************************/
|
16 |
|
|
|
17 |
|
|
%num = 100 // number of array elements
|
18 |
|
|
|
19 |
|
|
const section read ip // read-only data section
|
20 |
|
|
conclude: int8 "\nThe sum of numbers from 1 to %i is %i\n",0 // format string for printf
|
21 |
|
|
const end
|
22 |
|
|
|
23 |
|
|
bss section datap uninitialized // uninitialized read/write data section
|
24 |
|
|
int32 myarray[num] // array of 100 integers
|
25 |
|
|
int64 parlist[4] // parameter list for printf
|
26 |
|
|
bss end
|
27 |
|
|
|
28 |
|
|
code section execute align = 4 // code section
|
29 |
|
|
|
30 |
|
|
extern _printf: function // library function: formatted output to stdout
|
31 |
|
|
|
32 |
|
|
_main function public // program begins here
|
33 |
|
|
|
34 |
|
|
// Step 1: Fill myarray with numbers 1 .. 100
|
35 |
|
|
|
36 |
|
|
int64 r0 = num // = 100
|
37 |
|
|
int32 v1 = make_sequence(r0, 1) // will be (1, 2, 3, ...) up to as much as the maximum vector length allows, or 100
|
38 |
|
|
int32 r1 = get_num(v1) // number of elements in vector
|
39 |
|
|
int64 r2 = r1 - 1 // index to last element
|
40 |
|
|
int32 v2 = extract(v1, r2) // get last element and broadcast it
|
41 |
|
|
|
42 |
|
|
// A vector loop needs a pointer to the end of the array
|
43 |
|
|
int64 r2 = address [myarray+num*4] // address of the end of myarray
|
44 |
|
|
int64 r1 = num * 4 // total array size in bytes
|
45 |
|
|
|
46 |
|
|
// This loop will count down r1 with the maximum length until the array is filled
|
47 |
|
|
// The last iteration will automatically get fewer elements if the array size is not divisible by the maximum length
|
48 |
|
|
for (int32 v1 in [r2-r1]) {
|
49 |
|
|
int32 [r2-r1, length = r1] = v1 // put as many elements into the array as the maximum length permits
|
50 |
|
|
int32 v1 += v2 // add up the vector of sequential numbers
|
51 |
|
|
}
|
52 |
|
|
|
53 |
|
|
// Step 2: Calculate the sum of all elements in the array
|
54 |
|
|
int32 v0 = replace(v1, 0) // make a vector of all zeroes and same length
|
55 |
|
|
int64 r1 = num * 4 // total array size in bytes
|
56 |
|
|
|
57 |
|
|
// vector loop, counting down r1 with the maximum length until the array is filled
|
58 |
|
|
for (int32 v0 in [r2-r1]) {
|
59 |
|
|
int32 v0 += [r2-r1, length = r1] // add elements to vector
|
60 |
|
|
}
|
61 |
|
|
|
62 |
|
|
// Step 3: Calculate the horizontal sum of the elements in v0
|
63 |
|
|
int32 r1 = get_len(v0) // length of vector in bytes
|
64 |
|
|
// Round up the vector length to the nearest power of 2.
|
65 |
|
|
// The maximum vector length is known to be a power of 2,
|
66 |
|
|
// but the length may be 'num' elements, which is not a power of 2
|
67 |
|
|
int32 r1 = roundp2(r1, 1) // r1 is now a power of 2, not bigger than the maximum vector length
|
68 |
|
|
int32 v0 = set_len(v0, r1) // adjust vector length to nearest higher power of 2. Added elements will be zero
|
69 |
|
|
while (uint32+ r1 > 4) { // loop to calculate horizontal sum
|
70 |
|
|
uint32+ r1 >>= 1 // the vector length is halved
|
71 |
|
|
int32 v1 = shift_reduce (v0, r1) // get upper half of vector
|
72 |
|
|
// Add upper half and lower half
|
73 |
|
|
// The result vector has the length of the first operand, which will be halved each iteration
|
74 |
|
|
int32 v0 = v1 + v0
|
75 |
|
|
}
|
76 |
|
|
// The sum is now a scalar in v0
|
77 |
|
|
|
78 |
|
|
// Step 4: Write the result
|
79 |
|
|
int64 r0 = address([conclude]) // format string for printf
|
80 |
|
|
int64 r1 = address([parlist]) // parameter list
|
81 |
|
|
int32 [r1] = num // put number into parameter list
|
82 |
|
|
int32 [r1+8, scalar] = v0 // put result into parameter list
|
83 |
|
|
call _printf // printf("\nThe sum of numbers from 1 to %i is %i", num, v0)
|
84 |
|
|
|
85 |
|
|
// Return from main
|
86 |
|
|
int64 r0 = 0 // program return value
|
87 |
|
|
return // return from main
|
88 |
|
|
_main end
|
89 |
|
|
|
90 |
|
|
code end
|