URL
https://opencores.org/ocsvn/zipcpu/zipcpu/trunk
Subversion Repositories zipcpu
[/] [zipcpu/] [trunk/] [bench/] [asm/] [zipdhry.S] - Rev 69
Go to most recent revision | Compare with Previous | Blame | View Log
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Filename: zipdhry.S;; Project: Zip CPU -- a small, lightweight, RISC CPU soft core;; Purpose: Zip assembly file for running the Dhrystone benchmark in the; Zip CPU.;; To calculate a DMIPS value, take the value of R0 upon completion. This; is the number of clock ticks used from start to finish (i.e., from; entrance into user mode to the return to supervisor mode). Let; CLKSPD be your clock speed in Hz. Then:;; DMIPS = (CLKSPD*NRUNS/R0) / 1757;;; For my tests, CLKSPD = 100e6 Hz (100 MHz), NRUNS = 512. Thus,;; DMIPS = (100e6 * 512) / R0 / 1757;;; Creator: Dan Gisselquist, Ph.D.; Gisselquist Technology, LLC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Copyright (C) 2015, Gisselquist Technology, LLC;; This program is free software (firmware): you can redistribute it and/or; modify it under the terms of the GNU General Public License as published; by the Free Software Foundation, either version 3 of the License, or (at; your option) any later version.;; This program is distributed in the hope that it will be useful, but WITHOUT; ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License; for more details.;; License: GPL, v3, as defined and found on www.gnu.org,; http://www.gnu.org/licenses/gpl.html;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;// Under Verilator:// DMIPS: 30.3 100 MHz (sim) 0.29 // Initial baseline// DMIPS: 37.5 100 MHz (sim) 0.38 // 20151017// DMIPS: 38.0 100 MHz (sim) 0.38 // 20151211 (new ISA)// DMIPS: 40.5 100 MHz (sim) 0.41 // 20151212 (H/W DIV)// DMIPS: 8.2 100 MHz (sim) 0.08 // 20151104--!pipelined// DMIPS: 60.1 100 MHz (sim) 0.60 // 20151215 (New PF)// DMIPS: 54.8 100 MHz (sim) 0.55 // 20151219// On real hardware:// DMIPS: 24.7 100 MHz (basys) 0.25 // Initial baseline// DMIPS: 30.6 100 MHz (basys) 0.31 // 20151017//// (And, under Verilator, if the cache holds the entire 4kW program: 55.1 DMIPS)////// with no loop unrolling nor function inlining// DMIPS: 24.3 100 MHz (sim) 0.24// with packed strings// DMIPS: 35.6 100 MHz (sim) 0.36//// For comparison:// uBlaze: 230 177 MHz 1.3// LEON3 1.4// NiOS II: 218 185 MHz 1.16// OpenRisk 250 250 MHz 1.00// LM32 1.14// ZPU 2.6 50 MHz 0.05//// Some #def's to control compilation.//// SKIP_SHORT_CIRCUITS determines whether or not we do internal testing and// jump to a BUSY instruction on failure for the debugger to pick up. Skip// this for valid testing. Enable it and see whether or not zipdhry dies mid// process--if it down, you got there--so fix it.//#define SKIP_SHORT_CIRCUITS//////// NO_INLINE controls whether or not we inline certain functions. If you// define this, nothing will be inlined.//// I recommend not setting this define.//// #define NO_INLINE//////// NO_LOOP_UNROLLING controls loop unrolling. The default is to unroll loops// by a factor of 4x. By defining this, all loop unrolling is removed. (Well,// except the pipelined strcpy and strcmp below--those loops are automatically// unrolled as part of being piped. Undefine those as well and all loops will// be effectively unrolled.//// I recommend not setting this define.//// #define NO_LOOP_UNROLLING//////// After building this whole thing and putting it together, I discovered another// means I could use of generating a return statement. In this case, instead// of LOD -1(SP),PC, I would load the return PC from the stack as part of the// pipelined memory operations, adjust the stack pointer, and jump to the// register address. It saves clocks because it uses the pipelined memory// operation, but other than that it suffers the same number of stalls.//// Fast returns used to be controlled by a #define. This has been removed,// and all returns are "fast" by default.//////////// SKIP_DIVIDE controlls whether or not we want to calculate the speed of// our processor assuming we had a divide instruction. If you skip the// divide, it will be as though you had such an instruction. Otherwise,// leave it in and the test bench will measure how long it takes to run// while including the divide emulation.//// I recommend leaving this undefined, for a more accurate measure.//// #define SKIP_DIVIDE // 0xace17/0x50f37 vs 0xbd817/0x57d37//// Thus a divide instruction might raise our score from 37.5 to 41.1, or// from 81 to 87.8--depending on whether or not the cache is loaded or not.////////// HARDWARE_DIVIDE is appropriate when the hardware has a divide instruction,// as it will use this divide instruction for the one time a divide is needed.//// I recommended setting this value ... IF the hardware has the divide// instruction built in.//#define HARDWARE_DIVIDE////// PIPELINED_STRCPY and PIPELINED_STRCMP both have to do with whether or not// the memory accesses of each of these "library" functions are pipelined.// As you may recall, the Zip CPU allows you to pipeline memory accesses// that are all done with the same condition, and that all reference either// the same or increasing addresses. These one-clock memory access instructions// are ridiculously fast (when available), and we would be foolish not to use// them. These two defines modify the library functions to use this mode// and to capitalize upon it as much as possible.//// I recommend setting these.//#define PIPELINED_STRCPY#define PIPELINED_STRCMP////sys.ctr.mtask equ 0xc0000008// int main(int argc, char **argv) {// dhrystone();// }entry:MOV top_of_stack(PC),uSPMOV entry(PC),uR12; Store our tick counter in R1LDI sys.ctr.mtask,R1; And start with our counter cleared at zeroCLR R0STO R0,(R1)#ifdef SUPERVISOR_TASKMOV __HERE__+2(PC),R0BRA dhrystone#elseMOV dhrystone(PC),uPCRTU#endif; Read the tick counter back outLOD (R1),R0HALT ; Stop the CPU--We're done!!!!!!!// typedef enum { Ident_1, Ident_2, Ident_3, Ident_4, Ident_5 } test_enum;// typedef enum { false, true } bool;// typedef int Arr_1_Dim[50];// typedef int Arr_2_Dim[50][50];#define RECSIZE 35#define NUMBER_OF_RUNS (512)ptr_comp equ 0discr equ 1variant.var_1.enum_comp equ 2variant.var_1.int_comp equ 3variant.var_1.str_comp equ 4gbl_arr_1:fill 50,0gbl_arr_2:fill 2500,0gbl_ch:word 0gbl_ch_2:word 0gbl_bool:word 0gbl_int:word 0gbl_ptr:word 0some_string:word 'D','H','R','Y','S','T','O','N','E',' 'word 'P','R','O','G','R','A','M',',',' 'word 'S','O','M','E',' ','S','T','R','I','N','G'word 0first_string:word 'D','H','R','Y','S','T','O','N','E',' 'word 'P','R','O','G','R','A','M',','word ' ','1','\'','S','T'word ' ','S','T','R','I','N','G'word 0second_string:word 'D','H','R','Y','S','T','O','N','E',' 'word 'P','R','O','G','R','A','M',',',' 'word '2','\'','N','D',' ','S','T','R','I','N','G'word 0third_string:word 'D','H','R','Y','S','T','O','N','E',' 'word 'P','R','O','G','R','A','M',',',' 'word '3','\'','R','D',' ','S','T','R','I','N','G'word 0// Arr_1_Dim gbl_arr_1;// Arr_2_Dim gbl_arr_2;// char gbl_ch, gbl_ch_2;// bool gbl_bool;// int gbl_int;// RECP gbl_ptr;//char *lcl_strcpy(char *d, char *s) {// char *cpd = d, ch;//// do{// *cpd++ = ch = *s++;// } while(ch);////}//#ifdef PIPELINED_STRCPY; On entry,; R0 = dst; R1 = src; R2 = return addresslcl_strcpy:SUB 4,SPSTO R2,(SP)STO R3,1(SP)STO R4,2(SP)STO R5,3(SP)copy_next_char:; R0 = d; R1 = s; R3 = chLOD (R1),R2LOD 1(R1),R3LOD 2(R1),R4LOD 3(R1),R5CMP 0,R2CMP.NZ 0,R3CMP.NZ 0,R4CMP.NZ 0,R5BZ end_strcpySTO R2,(R0)STO R3,1(R0)STO R4,2(R0)STO R5,3(R0)ADD 4,R1ADD 4,R0BRA copy_next_charend_strcpy:CMP 0,R2STO.NZ R2,(R0)CMP.NZ 0,R3STO.NZ R3,1(R0)CMP.NZ 0,R4STO.NZ R4,1(R0)CMP.NZ 0,R5STO.NZ R5,1(R0)LOD (SP),R2LOD 1(SP),R3LOD 2(SP),R4LOD 3(SP),R5ADD 4,SPJMP R2NOP#elselcl_strcpy:; R0 = d; R1 = s; R3 = chcopy_next_char:SUB 1,SPSTO R2,(SP)#ifdef NO_LOOP_UNROLLINGLOD (R1),R2STO R2,(R0)CMP 0,R2BZ lcl_strcpy_end_of_loopADD 1,R0ADD 1,R1BRA copy_next_char#elseLOD (R1),R2STO R2,(R0)CMP 0,R2BZ lcl_strcpy_end_of_loopLOD 1(R1),R2STO R2,1(R0)CMP 0,R2BZ lcl_strcpy_end_of_loopLOD 2(R1),R2STO R2,2(R0)CMP 0,R2BZ lcl_strcpy_end_of_loopLOD 3(R1),R2STO R2,3(R0)CMP 0,R2BZ lcl_strcpy_end_of_loopADD 4,R0ADD 4,R1BRA copy_next_char#endiflcl_strcpy_end_of_loop:LOD (SP),R2ADD 1,SPJMP R2#endif//int lcl_strcmp(char *s1, char *s2) {// char a, b;// do {// a = *s1++; b = *s2++;// } while((a)&&(a==b));//// return a-b;//}#ifdef PIPELINED_STRCMPlcl_strcmp:SUB 8,SPSTO R2,(SP)STO R3,1(SP)STO R4,2(SP)STO R5,3(SP)STO R6,4(SP)STO R7,5(SP)STO R8,6(SP)STO R9,7(SP)strcmp_top_of_loop:LOD (R0),R2LOD 1(R0),R3LOD 2(R0),R4LOD 3(R0),R5;LOD (R1),R6LOD 1(R1),R7LOD 2(R1),R8LOD 3(R1),R9;CMP 0,R2CMP.NZ 0,R3CMP.NZ 0,R4CMP.NZ 0,R5BZ strcmp_end_loopCMP R2,R6CMP.Z R3,R7CMP.Z R4,R8CMP.Z R5,R9BNZ strcmp_end_loopADD 4,R0ADD 4,R1BRA strcmp_top_of_loopstrcmp_end_loop:CMP 0,R2BZ final_str_compareCMP R2,R6BNZ final_str_compareMOV R3,R2MOV R7,R6CMP 0,R2BZ final_str_compareCMP R2,R6BNZ final_str_compareMOV R4,R2MOV R8,R6CMP 0,R2BZ final_str_compareCMP R2,R6BNZ final_str_compareMOV R5,R2MOV R9,R6final_str_compare:SUB R6,R2MOV R2,R0LOD (SP),R2LOD 1(SP),R3LOD 2(SP),R4LOD 3(SP),R5LOD 4(SP),R6LOD 5(SP),R7LOD 6(SP),R8LOD 7(SP),R9ADD 8,SPJMP R2NOP#elselcl_strcmp:SUB 2,SPSTO R2,(SP)STO R3,1(SP)strcmp_top_of_loop:#ifdef NO_LOOP_UNROLLING; LOD (R0),R2; LOD (R1),R3 ; Alternate approach:; CMP R2,R3 ; CMP 0,R2; BNZ strcmp_end_loop ; BZ strcmp_end_loop; CMP 0,R2 ; CMP R2,R3; BZ strcmp_end_loop ; BZ strcmp_top_of_loop; CMP 0,R3 ;; BZ strcmp_end_loop ;; ADD 1,R0; ADD 1,R1; BRA strcmp_top_of_loopLOD (R0),R2LOD (R1),R3CMP 0,R2BZ strcmp_end_loopADD 1,R0ADD 1,R1CMP R2,R3BZ strcmp_top_of_loop#elseLOD (R0),R2LOD (R1),R3CMP 0,R2BZ strcmp_end_loopCMP R2,R3BNZ strcmp_end_loopLOD 1(R0),R2LOD 1(R1),R3CMP 0,R2BZ strcmp_end_loopCMP R2,R3BNZ strcmp_end_loopLOD 2(R0),R2LOD 2(R1),R3CMP 0,R2BZ strcmp_end_loopCMP R2,R3BNZ strcmp_end_loopLOD 3(R0),R2LOD 3(R1),R3CMP 0,R2BZ strcmp_end_loopCMP R2,R3BNZ strcmp_end_loopADD 4,R0ADD 4,R1BRA strcmp_top_of_loop#endifstrcmp_end_loop:SUB R3,R2MOV R2,R0LOD (SP),R2LOD 1(SP),R3ADD 2,SPJMP R2#endif//test_enum func_1(char ch_1, char ch_2) {// char lcl_ch_1, lcl_ch_2;//// lcl_ch_1 = ch_1;// lcl_ch_2 = lcl_ch_1;// if (lcl_ch_2 != ch_2)// return 0;// else {// gbl_ch = lcl_ch_1;// return 1;// }#ifdef NO_INLINEfunc_1:; On input,; R0 = ch_1; R1 = ch_2; R2 = available; On output, R0 is our return valueSUB 1,SPSTO R2,(SP)MOV R0,R2CMP R2,R1CLR.NZ R0STO.Z R2,gbl_ch(R12)LDILO.Z 1,R0LOD (SP),R2ADD 1,SPJMP R2#endif//bool func_2(char *str_1, char *str_2) {// int lcl_int;// char lcl_ch;//// lcl_int = 2;// while(lcl_int <= 2) {// if (func_1(str_1[lcl_int], str_2[lcl_int+1])==0) {// lcl_ch = 'A';// lcl_int ++;// }// }//// if ((lcl_ch >= 'W')&&(lcl_ch < 'Z'))// lcl_int = 7;// if (lcl_ch == 'R')// return true;// else {// if (lcl_strcmp(str_1, str_2)>0) {// lcl_int += 7;// gbl_int = lcl_int;// } else// return false;// }//}func_2:;SUB 6,SPSTO R2,(SP)STO R3,1(SP)STO R4,2(SP)STO R5,3(SP)STO R6,4(SP)STO R7,5(SP)MOV R0,R3 ; R3 = str_1MOV R1,R4 ; R4 = str_2LDI 2,R5 ; R5 = lcl_intLDI 'A',R7 ; R7 = lcl_chfunc_2_while_loop:CMP 2,R5BGT func_2_end_while_loopfunc_2_top_while_loop:MOV R3,R6ADD R5,R6#ifdef NO_INLINELOD (R6),R0MOV R4,R6ADD R5,R6LOD 1(R6),R1MOV __HERE__+2(PC),R2BRA func_1CMP 0,R0ADD.Z 1,R5#ifndef SKIP_SHORT_CIRCUITSBUSY.NZ#endif#elseLOD (R6),R2MOV R4,R6ADD R5,R6LOD 1(R6),R1CMP R2,R1STO.Z R2,gbl_ch(R12)LDILO.Z 1,R0ADD.NZ 1,R5#ifndef SKIP_SHORT_CIRCUITSBUSY.Z#endif#endifCMP 3,R5#ifndef SKIP_SHORT_CIRCUITSBUSY.LT#endifBLT func_2_top_while_loopfunc_2_end_while_loop:// CMP 'W',R7 // BUT! We know lcl_ch='A'// BLT skip_if // So we can skip this// CMP 'Z',R7 // entire section// LDI.LT 7,R5// CMP 'R',R7// BNZ alt_if_case// LLO.Z 1,R0// BRA func_2_return_and_cleanup//MOV R3,R0MOV R4,R1MOV __HERE__+2(PC),R2BRA lcl_strcmpCMP 0,R0BGT func_2_final_thenCLR R0BRA func_2_return_and_cleanupfunc_2_final_then:// ADD 7,R5 ; Never read, so useless codeLDI 1,R0#ifndef SKIP_SHORT_CIRCUITSBUSY#endiffunc_2_return_and_cleanup:LOD (SP),R2LOD 1(SP),R3LOD 2(SP),R4LOD 3(SP),R5LOD 4(SP),R6LOD 5(SP),R7ADD 6,SPJMP R2NOP//bool func_3(test_enum a) {// test_enum lcl_enum;//// lcl_enum = a;// if (lcl_enum == Ident_3)// return true;// else// return false;//}#ifdef NO_INLINEfunc_3:; On entry,; R0 = a; R1 - availableCMP 2,R0CLR R0 ; CLR Doesn't set flagsLDILO.Z 1,R0JMP R1#endif// void proc_6(test_enum ev, test_enum *ep) {// *ep = ev;// if (!func_3(ev))// *ep = 3;// switch(ev) {// case 0: *ep = 0; break;// case 1:// if (gbl_int > 100)// *ep = 0;// else// *ep = 3;// break;// case 2:// *ep = 1;// break;// case 3:// break;// case 4:// *ep = 2;// }//}proc_6:; On entry:; R0 = ev; R1 = ep; R2 = link address; Since we call func_3, we have to reserve R0 and R1; for other purposes. Thus; R2 = ev; R3 = epSUB 2,SPSTO R2,(SP)STO R3,1(SP)MOV R1,R3MOV R0,R2; *ep = evSTO R0,(R1)#ifndef SKIP_SHORT_CIRCUITSCMP 2,R0BUSY.NZ#endif#ifdef NO_INLINE; !func_3(ev)MOV __HERE__+2(PC),R1BRA func_3TST -1,R0LDI 3,R1#ifndef SKIP_SHORT_CIRCUITSBUSY.Z#endifSTO.Z R1,(R3)#elseCMP 2,R0LDI 3,R1#ifdef SKIP_SHORT_CIRCUITSBUSY.NZ#endifSTO.NZ R1,(R3)#endif#ifndef SKIP_SHORT_CIRCUITSCMP 2,R2BUSY.NZ#endifCMP 0,R2BNZ proc_6_case_not_zero#ifndef SKIP_SHORT_CIRCUITSBUSY#endifLDI 0,R1STO R1,(R3)BRA proc_6_end_of_caseproc_6_case_not_zero:CMP 1,R2BNZ proc_6_case_not_one#ifndef SKIP_SHORT_CIRCUITSBUSY#endifLDI 3,R0LOD gbl_int(R12),R1CMP 100,R1CLR.GT R0STO R0,(R3)BRA proc_6_end_of_caseproc_6_case_not_one:CMP 2,R2BNZ proc_6_case_not_twoLDI 1,R1 // Executed, if done properlySTO R1,(R3)BRA proc_6_end_of_caseproc_6_case_not_two:#ifndef SKIP_SHORT_CIRCUITSBUSY#endifCMP 4,R2BNZ proc_6_case_not_fourLDI 2,R1STO R1,(R3)// BRA proc_6_end_of_caseproc_6_case_not_four:proc_6_end_of_case:LOD (SP),R2LOD 1(SP),R3ADD 2,SPJMP R2NOP// void proc_7(int a, int b, int *c) {// int lcl;//// lcl = a + 2;// *c = b + a;//}#ifdef NO_INLINEproc_7:ADD 2+R0,R1STO R1,(R2)JMP R3#endif// int a[50];// int b[50][50];//// void proc_8(Arr_1_Dim a, Arr_2_Dim b, int c, int d) {// int idx, loc;//// loc = c+5;// a[loc] = d;// a[loc+1] = a[loc];// a[loc+30] = loc;// for(idx=loc; idx<= loc+1; idx++)// b[loc][idx] = loc;// b[loc][loc-1] += 1;// b[loc+20][loc] = a[loc];// gbl_int = 5;//}proc_8:; R0 = a; R1 = b; R2 = c; R3 = d; R4 - unassigned; Makes no function/procedure calls, so these can keep; R2 = loc = c+5, replaces c; R4 = idxSUB 3,SPSTO R4,(SP)STO R5,1(SP)STO R6,2(SP)ADD 5,R2 ; loc = c+5MOV R0,R5ADD R2,R5STO R3,(R5)STO R3,1(R5)STO R2,30(R5)MOV R2,R5MPYU 50,R5 ; R5 = 50 * R2 = 50 * locADD R1,R5 ; R5 = &b[loc][0]MOV R5,R6 ; R6 = &b[loc][0]ADD R2,R5 ; R5 = &b[loc][loc]MOV R2,R4 ; R4 = loc = indexproc_8_top_of_loop:CMP 1(R2),R4BGT proc_8_end_of_loopproc_8_loop_after_condition:STO R2,(R5)ADD 1,R5ADD 1,R4CMP 2(R2),R4BLT proc_8_loop_after_conditionproc_8_end_of_loop:; b[loc][loc-1] += 1ADD R2,R6 ; R6 = &b[loc][loc]LOD -1(R6),R5ADD 1,R5STO R5,-1(R6); b[loc+20][loc] = a[loc]MOV R0,R4ADD R2,R4LOD (R4),R3STO R3,20*50(R6)LDI 5,R3STO R3,gbl_int(R12)LOD (SP),R4LOD 1(SP),R5LOD 2(SP),R6ADD 3,SPJMP R4// void proc_5(void) {// gbl_ch = 'A';// gbl_bool = false;//}#ifdef NO_INLINEproc_5:SUB 1,SPSTO R0,(SP);LDI 'A',R0STO R0,gbl_ch(R12)CLR R0STO R0,gbl_bool(R12);LOD (SP),R0ADD 1,SPJMP R0#endif// void proc_4(void) {// bool lcl_bool;// lcl_bool = (gbl_ch == 'A');// gbl_ch_2 = 'B';// }#ifdef NO_INLINEproc_4://; LDI GBL,R12 // Already in R12; Setting lcl_bool is irrelevant, so the optimizer should remove it.; R0 doesn't need to be saved, since it's already trashed by the; subroutine call.;; LOD gbl_ch(R12),R0; CLR R1; CMP 'A',R0; ADD.Z 1,R1;SUB 1,SPSTO R0,(SP);LDI 'B',R0STO R0,gbl_ch_2(R12);LOD (SP),R0ADD 1,SPJMP R0#endif// void proc_3(RECP *a) {// if (gbl_ptr != NULL)// *a = gbl_ptr->ptr_comp;// proc_7(10,gbl_int, &gbl_ptr->variant.var_1.int_comp); // ??//}proc_3:SUB 3,SPSTO R1,(SP)STO R2,1(SP)STO R3,2(SP);LOD gbl_ptr(R12),R2TST -1,R2#ifndef SKIP_SHORT_CIRCUITSBUSY.Z#endifLOD.NZ ptr_comp(R2),R3STO.NZ R3,(R0)#ifdef NO_INLINELDI 10,R0LOD gbl_int(R12),R1MOV variant.var_1.int_comp(R2),R2MOV __HERE__+2(PC),R3BRA proc_7#elseLOD gbl_int(R12),R1ADD 12,R1STO R1,variant.var_1.int_comp(R2)#endif;LOD (SP),R1LOD 1(SP),R2LOD 2(SP),R3ADD 3,SPJMP R1NOP// void proc_2(int *a) {// int lcl_int;// test_enum lcl_enum;//// lcl_int = *a + 10;// do {// if (gbl_ch == 'A') {// lcl_int -= 1;// *a = lcl_int - gbl_int;// lcl_enum = Ident_1;// }// } while(lcl_enum != Ident_1);//}proc_2:SUB 6,SPSTO R1,(SP)STO R2,1(SP)STO R3,2(SP)STO R4,3(SP)STO R5,4(SP)STO R6,5(SP)// R1 doesn't need to be stored, it was used in the subroutine// call calculationLOD (R0),R1MOV 10(R1),R2 ; R2 = lcl_intLOD gbl_ch(R12),R4 ; R4 = gbl_ch#ifdef NO_CHEATINGproc_2_loop:CMP 'A',R4SUB.Z 1,R2LOD.Z gbl_int(R12),R5 ; R5 = gbl_intMOV.Z R2,R6 ; R6 = lcl_intSUB.Z R5,R6 ; lcl_int - gbl_intSTO.Z R6,(R0) ; *a = R6CLR.Z R3 ; lcl_enum = 0// #ifndef SKIP_SHORT_CIRCUITS// BUSY.NZ// #endifTST -1,R3// #ifndef SKIP_SHORT_CIRCUITS// BUSY.NZ// #endifBNZ proc_2_loop#elseLOD gbl_int(R12),R5SUB 1(R5),R2STO R2,(R0)#endif;LOD (SP),R1LOD 1(SP),R2LOD 2(SP),R3LOD 3(SP),R4LOD 4(SP),R5LOD 5(SP),R6ADD 6,SPJMP R1NOP//void proc_1 (RECP a) {// RECP nxt = a->ptr_comp;//// // structassign(a->ptr_comp, gbl_ptr);// *(a->ptr_comp) = *(gbl_ptr);//// a->variant.var_1.int_comp = 5;// nxt->variant.var_1.int_comp = a->variant.var_1.int_comp;// proc_3(&nxt->ptr_comp);//// if (nxt->discr == 0) {// nxt->variant.var_1.int_comp = 6;// proc_6(a->variant.var_1.enum_comp, &nxt->variant.var_1.enum_comp);// nxt->ptr_comp = gbl_ptr->ptr_comp;// proc_7(nxt->variant.var_1.int_comp, 10, &nxt->variant.var_1.int_comp);// } else// // structassign(a, a->ptr_comp);// *a = *(a->ptr_comp);//}proc_1:SUB 11,SPSTO R1,(SP)STO R2,1(SP)STO R3,2(SP)STO R4,3(SP)STO R5,4(SP)STO R6,5(SP)STO R7,6(SP)STO R8,7(SP)STO R9,8(SP)#ifndef NO_LOOP_UNROLLINGSTO R10,9(SP)STO R11,10(SP)#endif; R9 = a; R4 = nxt; R12 = GBL; R13 = SPMOV R0,R9LOD ptr_comp(R9),R4#ifndef SKIP_SHORT_CIRCUITSTST -1,R4BUSY.ZCMP PC,R9BUSY.LT#endifMOV R9,R6LOD gbl_ptr(R12),R7#ifndef SKIP_SHORT_CIRCUITSLOD variant.var_1.enum_comp(R7), R0CMP 2,R0BUSY.NZ#endif#ifdef NO_LOOP_UNROLLINGLDI 35,R5proc_1_assign_loop_1:LOD (R6),R8ADD 1,R6STO R8,(R7)ADD 1,R7SUB 1,R5BNZ proc_1_assign_loop_1;#else; R2 is available; R3 is availableLDI 34,R5proc_1_assign_loop_1:LOD (R6),R8LOD 1(R6),R10LOD 2(R6),R11LOD 3(R6),R2LOD 4(R6),R3ADD 5,R6SUB 5,R5STO R8,(R7)STO R10,1(R7)STO R11,2(R7)STO R2,3(R7)STO R3,4(R7)BLT proc_1_assign_loop_1_endADD 5,R7; BNZ proc_1_assign_loop_1;BRA proc_1_assign_loop_1proc_1_assign_loop_1_end:; Loop length is fixed, nothing to test here#endif#ifndef SKIP_SHORT_CIRCUITSLOD gbl_ptr(R12),R2TST -1,R2BUSY.Z;LOD variant.var_1.enum_comp(R9), R0CMP 2,R0BUSY.NZ#endifLDI 5,R5STO R5,variant.var_1.int_comp(R9)STO R5,variant.var_1.int_comp(R4)MOV ptr_comp(R4),R0MOV __HERE__+2(PC),R1BRA proc_3 ; Uses R0 and R1LOD discr(R4),R5CMP 0,R5BNZ proc_1_last_struct_assign; This is the juncture that is "supposed" to be takenLDI 6,R5STO R5,variant.var_1.int_comp(R4)LOD variant.var_1.enum_comp(R9), R0#ifndef SKIP_SHORT_CIRCUITSCMP 2,R0BUSY.NZ#endifMOV variant.var_1.enum_comp+R4, R1MOV __HERE__+2(PC),R2BRA proc_6;LOD gbl_ptr(R12),R5LOD ptr_comp(R5),R5STO R5,ptr_comp(R4);#ifdef NO_INLINELOD variant.var_1.int_comp(R4),R0LDI 10,R1MOV variant.var_1.int_comp(R4),R2MOV proc_1_return_closeout(PC),R3BRA proc_7#elseLOD variant.var_1.int_comp(R4),R0ADD 12,R0STO R0,variant.var_1.int_comp(R4)BRA proc_1_return_closeout#endif;proc_1_last_struct_assign:#ifndef SKIP_SHORT_CIRCUITSBUSY#endifLDI 35,R4MOV R2,R5LOD gbl_ptr(R12),R6proc_1_assign_loop_2:LOD (R6),R8STO R8,(R7)ADD 1,R6ADD 1,R7SUB 1,R5BNZ proc_1_assign_loop_2//proc_1_return_closeout://LOD (SP),R1LOD 1(SP),R2LOD 2(SP),R3LOD 3(SP),R4LOD 4(SP),R5LOD 5(SP),R6LOD 6(SP),R7LOD 7(SP),R8LOD 8(SP),R9#ifndef NO_LOOP_UNROLLINGLOD 9(SP),R10LOD 10(SP),R11#endifADD 11,SPJMP R1 // Jumps to wrong address ??NOP// void dhrystone(void) {// int lcl_int_1, lcl_int_2, lcl_int_3, index, number_of_runs = 500;// test_enum lcl_enum;// char lcl_str_1[30], lcl_str_2[30], ch_index;// REC_T a, b, *nxt = &a;//// gbl_ptr = &b;// gbl_ptr->ptr_comp = nxt;// gbl_ptr->variant.var_1.enum_comp = 2;// gbl_ptr->variant.var_1.int_comp = 40;// lcl_strcpy(gbl_ptr->variant.var_1.str_comp, "DHRYSTONE PROGRAM, SOME STRING");// lcl_strcpy(lcl_str_1, "DHRYSTONE PROGRAM, 1\'ST STRING");//// gbl_arr_2[8][7] = 10;//// for(index=0; index < number_of_runs; index++) {// proc_5();// proc_4();// lcl_int_1 = 2;// lcl_int_2 = 3;// lcl_strcpy(lcl_str_2, "DHRYSTONE PROGRAM, 2\'ND STRING");// lcl_enum = Ident_2;// gbl_bool = !func_2(lcl_str_1, lcl_str_2);// while(lcl_int_1 < lcl_int_2) {// lcl_int_3 = 5 * lcl_int_1 - lcl_int_2;// proc_7(lcl_int_1, lcl_int_2, &lcl_int_3);// lcl_int_1 += 1;// }//// proc_8(gbl_arr_1, gbl_arr_2, lcl_int_1, lcl_int_3);// proc_1(gbl_ptr);//// for(ch_index='A'; ch_index <= gbl_ch_2; ch_index++) {// if (lcl_enum == func_1(ch_index, 'C')) {// // Then not executed??// proc_6(0, &lcl_enum);// lcl_strcpy(lcl_str_2, "DHRYSTONE PROGRAM, 3\'RD STRING");// lcl_int_2 = index;// gbl_int = index;// }// }//// lcl_int_2 = lcl_int_2 * lcl_int_1;// lcl_int_1 = lcl_int_2 / lcl_int_3;// lcl_int_2 = 7 * ( lcl_int_2 - lcl_int_3) - lcl_int_1;// proc_2(&lcl_int_1);// }//}dhrystone:#ifdef SUPERVISOR_TASKSUB 12+RECSIZE+RECSIZE+30+30+3,SP; Leave a space on the top of the stack for calling; subroutines.STO R0,(SP)STO R1,1(SP)STO R2,2(SP)STO R3,3(SP)STO R4,4(SP)STO R5,5(SP)STO R6,6(SP)STO R7,7(SP)STO R8,8(SP)STO R9,9(SP)STO R10,10(SP)STO R11,11(SP)lcl_int_1 equ 12 ; plus SP#elselcl_int_1 equ 2 ; plus SPSUB 2+RECSIZE+RECSIZE+30+30+3,SP#endif// 12 is the global variable pointer// 13 is our stack// 14 is our condition code register// 15 is the program counter;lcl_int_3 equ lcl_int_1+1 ; plus SPlcl_enum equ lcl_int_3+1 ; plus SPlcl_str_1 equ lcl_enum+1 ; plus SPlcl_str_2 equ lcl_str_1+30 ; plus SPrec_a equ lcl_str_2+30 ; plus SPrec_b equ rec_a+RECSIZE ; plus SP// int lcl_int_1, lcl_int_2, lcl_int_3, index, number_of_runs = 500;// test_enum lcl_enum;// char lcl_str_1[30], lcl_str_2[30], ch_index;// REC_T a, b, *nxt = &a;//// gbl_ptr = &b;MOV rec_b(SP),R0 ; R0 = &bSTO R0,gbl_ptr(PC)// gbl_ptr->ptr_comp = nxt;MOV rec_a(SP),R1 ; R1 = &a = nxtSTO R1,ptr_comp(R0) ; gbp_ptr->ptr.comp=b->ptr.comp=R1=nxt// gbl_ptr->variant.var_1.enum_comp = 2;LDI 2,R2STO R2,variant.var_1.enum_comp(R0)// gbl_ptr->variant.var_1.int_comp = 40;LDI 40,R2STO R2,variant.var_1.int_comp(R0)// lcl_strcpy(gbl_ptr->variant.var_1.str_comp, "DHRYSTONE PROGRAM, SOME STRING");MOV variant.var_1.str_comp(R0),R0MOV some_string(PC),R1MOV __HERE__+2(PC),R2BRA lcl_strcpy// lcl_strcpy(lcl_str_1, "DHRYSTONE PROGRAM, 1\'ST STRING");MOV lcl_str_1(SP),R0MOV first_string(PC),R1MOV __HERE__+2(PC),R2BRA lcl_strcpy// gbl_arr_2[8][7] = 10;LDI 10,R0STO R0,8*50+7+gbl_arr_2(R12)//// for(index=0; index < number_of_runs; index++) {; Let R11 be our indexCLR R11dhrystone_main_loop:;; Start of Dhrystone main loop; proc_5();#ifdef NO_INLINEMOV __HERE__+2(PC),R0BRA proc_5#elseLDI 'A',R0STO R0,gbl_ch(R12)CLR R0STO R0,gbl_bool(R12)#endif; proc_4();#ifdef NO_INLINEMOV __HERE__+2(PC),R0BRA proc_4#elseLDI 'B',R0STO R0,gbl_ch_2(R12)#endif// lcl_int_1 = 2;LDI 2,R5STO R5,lcl_int_1(SP)// lcl_int_2 = 3;LDI 3,R6// lcl_strcpy(lcl_str_2, "DHRYSTONE PROGRAM, 2\'ND STRING");MOV lcl_str_2(SP),R0MOV second_string(PC),R1MOV __HERE__+2(PC),R2BRA lcl_strcpy// lcl_enum = Ident_2;LDI 2,R0STO R0,lcl_enum(SP)// gbl_bool = !func_2(lcl_str_1, lcl_str_2);MOV lcl_str_1(SP),R0MOV lcl_str_2(SP),R1MOV __HERE__+2(PC),R2BRA func_2CLR R1TST -1,R0LDILO.Z 1,R1STO R1,gbl_bool(PC)// while(lcl_int_1 < lcl_int_2) {; R5 = lcl_int_1 = 2 on entry; R6 = lcl_int_2 = 3 on entry, so no entry test is requiredLOD lcl_int_1(SP),R5// The 'while' comparisonCMP R6,R5BGE dhrystone_end_while_loopdhrystone_while_loop:// lcl_int_3 = 5 * lcl_int_1 - lcl_int_2;MOV R5,R7MPYS 5,R7SUB R6,R7STO R7,lcl_int_3(SP)#ifndef SKIP_SHORT_CIRCUITSCMP 7,R7BUSY.NZ#endif// proc_7(lcl_int_1, lcl_int_2, &lcl_int_3);#ifdef NO_INLINEMOV R5,R0MOV R6,R1MOV lcl_int_3(SP),R2MOV __HERE__+2(PC),R3BRA proc_7#elseMOV R6,R1ADD 2+R5,R1STO R1,lcl_int_3(SP)#endif// lcl_int_1 += 1;LOD lcl_int_1(SP),R5ADD 1,R5STO R5,lcl_int_1(SP);; BRA dhrystone_while_loop ; We'll unroll the loop, and put anCMP R6,R5 ; additional comparison at the bottomBLT dhrystone_while_loopdhrystone_end_while_loop:// }//#ifndef SKIP_SHORT_CIRCUITSLOD lcl_int_1(SP),R0CMP 3,R0BUSY.NZCMP 3,R6BUSY.NZLOD lcl_int_3(SP),R0CMP 7,R0BUSY.NZ#endif// proc_8(gbl_arr_1, gbl_arr_2, lcl_int_1, lcl_int_3);MOV gbl_arr_1(PC),R0MOV gbl_arr_2(PC),R1MOV R5,R2MOV R6,R3MOV __HERE__+2(PC),R4BRA proc_8// proc_1(gbl_ptr);LOD gbl_ptr(PC),R0MOV __HERE__+2(PC),R1BRA proc_1//// for(ch_index='A'; ch_index <= gbl_ch_2; ch_index++) {LDI 'A',R7LOD gbl_ch_2(SP),R8CMP R7,R8BLT dhrystone_end_of_for_loopdhrystone_top_of_for_loop:// if (lcl_enum == func_1(ch_index, 'C')) {#ifdef NO_INLINEMOV R7,R0LDI 'C',R1MOV __HERE__+2(PC),R2BRA func_1#elseCMP 'C',R7CLR.NZ R0STO.Z R7,gbl_ch(R12)LDILO.Z 1,R0#endif; Result is now in R0LOD lcl_enum(SP),R1CMP R0,R1BNZ dhrystone_skip_then_clause// // Then not executed??// proc_6(0, &lcl_enum);#ifndef SKIP_SHORT_CIRCUITSBUSY // Shouldn't ever get here#endifCLR R0MOV lcl_enum(SP),R1MOV __HERE__+2(PC),R2BRA proc_6// lcl_strcpy(lcl_str_2, "DHRYSTONE PROGRAM, 3\'RD STRING");MOV lcl_str_2(SP),R0MOV third_string(PC),R1MOV __HERE__+2(PC),R2BRA lcl_strcpy// lcl_int_2 = index;MOV R11,R6// gbl_int = index;STO R11,gbl_int(PC)// }dhrystone_skip_then_clause:ADD 1,R7LOD gbl_ch_2(SP),R8CMP R8,R7BGE dhrystone_top_of_for_loopdhrystone_end_of_for_loop:// }#ifndef SKIP_SHORT_CIRCUITSLOD lcl_int_1(SP),R0CMP 3,R0BUSY.NZCMP 3,R6BUSY.NZLOD lcl_int_3(SP),R0CMP 7,R0BUSY.NZ#endif//// lcl_int_2 = lcl_int_2 * lcl_int_1;LOD lcl_int_1(SP),R5MPYS R5,R6 ; lcl_int_2 =// lcl_int_1 = lcl_int_2 / lcl_int_3;#ifdef HARDWARE_DIVIDELOD lcl_int_3(SP),R1MOV R6,R0DIVS R1,R0#else#ifndef SKIP_DIVIDEMOV R6,R0LOD lcl_int_3(SP),R1MOV __HERE__+2(PC),R2BRA lib_divs#elseLDI 9,R0#endif#endifSTO R0,lcl_int_1(SP)// lcl_int_2 = 7 * ( lcl_int_2 - lcl_int_3) - lcl_int_1;LOD lcl_int_3(SP),R2SUB R2,R6MPYS 7,R6SUB R0,R6// proc_2(&lcl_int_1);#ifndef SKIP_SHORT_CIRCUITSLOD lcl_int_1(SP),R0CMP 1,R0BUSY.NZCMP 13,R6BUSY.NZLOD lcl_int_3(SP),R0CMP 7,R0BUSY.NZ#endifMOV lcl_int_1(SP),R0MOV __HERE__+2(PC),R1BRA proc_2#ifndef SKIP_SHORT_CIRCUITSLOD lcl_int_1(SP),R0CMP 5,R0BUSY.NZ#endif;; Bottom of (and return from) Dhrystone main loopADD 1,R11CMP NUMBER_OF_RUNS,R11BLT dhrystone_main_loop// }#ifdef SUPERVISOR_TASKLOD (SP),R0LOD 1(SP),R1LOD 2(SP),R2LOD 3(SP),R3LOD 4(SP),R4LOD 5(SP),R5LOD 6(SP),R6LOD 7(SP),R7LOD 8(SP),R8LOD 9(SP),R9LOD 10(SP),R10LOD 11(SP),R11;ADD 12+RECSIZE+RECSIZE+30+30+3,SP; Return from subroutineJMP R0#elseLDI 0,CCNOPNOPBUSY#endif;
Go to most recent revision | Compare with Previous | Blame | View Log
