URL https://opencores.org/ocsvn/zipcpu/zipcpu/trunk
Subversion Repositories zipcpu

[/] [zipcpu/] [trunk/] [bench/] [asm/] [zipdhry.S] - Rev 128

Go to most recent revision | Compare with Previous | Blame | View Log
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;
; Filename:     zipdhry.S
;
; Project:      Zip CPU -- a small, lightweight, RISC CPU soft core
;
; Purpose:      Zip assembly file for running the Dhrystone benchmark in the
;               Zip CPU.
;
;       To calculate a DMIPS value, take the value of R0 upon completion.  This
;       is the number of clock ticks used from start to finish (i.e., from
;       entrance into user mode to the return to supervisor mode).  Let
;       CLKSPD be your clock speed in Hz.  Then:
;
;       DMIPS = (CLKSPD*NRUNS/R0) / 1757;
;
;       For my tests, CLKSPD = 100e6 Hz (100 MHz), NRUNS = 512.  Thus,
;
;       DMIPS = (100e6 * 512) / R0 / 1757
;
;
; Creator:      Dan Gisselquist, Ph.D.
;               Gisselquist Technology, LLC
;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;
; Copyright (C) 2015, Gisselquist Technology, LLC
;
; This program is free software (firmware): you can redistribute it and/or
; modify it under the terms of  the GNU General Public License as published
; by the Free Software Foundation, either version 3 of the License, or (at
; your option) any later version.
;
; This program is distributed in the hope that it will be useful, but WITHOUT
; ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or
; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
; for more details.
;
; License:      GPL, v3, as defined and found on www.gnu.org,
;               http://www.gnu.org/licenses/gpl.html
;
;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;
// Under Verilator:
//      DMIPS:          30.3    100 MHz (sim)   0.29    // Initial baseline
//      DMIPS:          37.5    100 MHz (sim)   0.38    // 20151017
//      DMIPS:          38.0    100 MHz (sim)   0.38    // 20151211 (new ISA)
//      DMIPS:          40.5    100 MHz (sim)   0.41    // 20151212 (H/W DIV)
//      DMIPS:           8.2    100 MHz (sim)   0.08    // 20151104--!pipelined
//      DMIPS:          60.1    100 MHz (sim)   0.60    // 20151215 (New PF)
//      DMIPS:          60.0    100 MHz (sim)   0.60    // 20151226 (BugFix)
// On real hardware:
//      DMIPS:          24.7    100 MHz (basys) 0.25    // Initial baseline
//      DMIPS:          30.6    100 MHz (basys) 0.31    // 20151017
//      DMIPS:          48.4    100 MHz (basys) 0.48    // 20151227 (New pf/ISA)
//
// (And, under Verilator, if the cache holds the entire 4kW program: 55.1 DMIPS)
//
//
//   with no loop unrolling nor function inlining
//      DMIPS:          24.3    100 MHz (sim)   0.24
//   with packed strings
//      DMIPS:          35.6    100 MHz (sim)   0.36
//
// For comparison:
//      uBlaze:         230     177 MHz         1.3
//      LEON3                                   1.4
//      NiOS II:        218     185 MHz         1.16
//      OpenRisk        250     250 MHz         1.00
//      LM32                                    1.14
//      ZPU             2.6      50 MHz         0.05
//

// Some #def's to control compilation.
//
// SKIP_SHORT_CIRCUITS determines whether or not we do internal testing and
// jump to a BUSY instruction on failure for the debugger to pick up.  Skip
// this for valid testing.  Enable it and see whether or not zipdhry dies mid
// process--if it down, you got there--so fix it.
//
#define SKIP_SHORT_CIRCUITS
//
//
//
// NO_INLINE controls whether or not we inline certain functions.  If you 
// define this, nothing will be inlined.
//
// I recommend not setting this define.
//
// #define      NO_INLINE
//
//
//
// NO_LOOP_UNROLLING controls loop unrolling.  The default is to unroll loops
// by a factor of 4x.  By defining this, all loop unrolling is removed.  (Well,
// except the pipelined strcpy and strcmp below--those loops are automatically
// unrolled as part of being piped.  Undefine those as well and all loops will
// be effectively unrolled.
//
// I recommend not setting this define.
//
// #define      NO_LOOP_UNROLLING
//
//
//
// After building this whole thing and putting it together, I discovered another
// means I could use of generating a return statement.  In this case, instead
// of LOD -1(SP),PC, I would load the return PC from the stack as part of the
// pipelined memory operations, adjust the stack pointer, and jump to the 
// register address.  It saves clocks because it uses the pipelined memory
// operation, but other than that it suffers the same number of stalls.
//
// Fast returns used to be controlled by a #define.  This has been removed,
// and all returns are "fast" by default.
//
//
//
//
//
// SKIP_DIVIDE controlls whether or not we want to calculate the speed of
// our processor assuming we had a divide instruction.  If you skip the
// divide, it will be as though you had such an instruction.  Otherwise,
// leave it in and the test bench will measure how long it takes to run
// while including the divide emulation.
//
// I recommend leaving this undefined, for a more accurate measure.
//
// #define      SKIP_DIVIDE     // 0xace17/0x50f37 vs 0xbd817/0x57d37
//
// Thus a divide instruction might raise our score from 37.5 to 41.1, or
// from 81 to 87.8--depending on whether or not the cache is loaded or not.
//
//
//
//
// HARDWARE_DIVIDE is appropriate when the hardware has a divide instruction,
// as it will use this divide instruction for the one time a divide is needed.
// 
// I recommended setting this value ... IF the hardware has the divide
// instruction built in.
//
#define HARDWARE_DIVIDE
//
//
// PIPELINED_STRCPY and PIPELINED_STRCMP both have to do with whether or not
// the memory accesses of each of these "library" functions are pipelined.
// As you may recall, the Zip CPU allows you to pipeline memory accesses
// that are all done with the same condition, and that all reference either
// the same or increasing addresses.  These one-clock memory access instructions
// are ridiculously fast (when available), and we would be foolish not to use
// them.  These two defines modify the library functions to use this mode
// and to capitalize upon it as much as possible.
//
// I recommend setting these.
//
#define PIPELINED_STRCPY
#define PIPELINED_STRCMP
//
//
        dev.scope.cpu   equ     0x0120
        sys.ctr.mtask   equ     0xc0000008
// int main(int argc, char **argv) {
//      dhrystone();
// }
// #define      LOAD_ADDRESS    entry+PC
#define LOAD_ADDRESS    lcl_strcpy+PC
entry:
        ; LDI   0x0c000010,R0
        ; LDI   dev.scope.cpu,R1
        ; STO   R0,(R1)
        ;
        MOV     top_of_stack(PC),uSP
        MOV     entry(PC),uR12
        ; Store  our tick counter in R1
        LDI     sys.ctr.mtask,R1
        ; And start with our counter cleared at zero
        CLR     R0
        STO     R0,(R1)
#ifdef  SUPERVISOR_TASK
        MOV     __HERE__+2(PC),R0
        BRA     dhrystone
#else
        MOV     dhrystone(PC),uPC
        RTU
#endif
        ; Read the tick counter back out
        LOD     (R1),R0
        HALT    ; Stop the CPU--We're done!!!!!!!

//
// typedef      enum { Ident_1, Ident_2, Ident_3, Ident_4, Ident_5 } test_enum;
// typedef      enum { false, true } bool;

// typedef      int     Arr_1_Dim[50];
// typedef      int     Arr_2_Dim[50][50];
#define RECSIZE 35
#define NUMBER_OF_RUNS  (512)
        ptr_comp                        equ     0
        discr                           equ     1
        variant.var_1.enum_comp         equ     2
        variant.var_1.int_comp          equ     3
        variant.var_1.str_comp          equ     4


//char  *lcl_strcpy(char *d, char *s) {
//      char    *cpd = d, ch;
//
//      do{
//              *cpd++ = ch = *s++;
//      } while(ch);
//
//}
//

#ifdef  PIPELINED_STRCPY
; On entry,
;       R0 = dst
;       R1 = src
;       R2 = return address
lcl_strcpy:
        SUB     4,SP
        STO     R2,(SP)
        STO     R3,1(SP)
        STO     R4,2(SP)
        STO     R5,3(SP)

copy_next_char:
        ; R0 = d
        ; R1 = s
        ; R3 = ch
        LOD     (R1),R2
        LOD     1(R1),R3
        LOD     2(R1),R4
        LOD     3(R1),R5

        CMP     0,R2
        CMP.NZ  0,R3
        CMP.NZ  0,R4
        CMP.NZ  0,R5
        BZ      end_strcpy

        STO     R2,(R0)
        STO     R3,1(R0)
        STO     R4,2(R0)
        STO     R5,3(R0)

        ADD     4,R1
        ADD     4,R0
        BRA copy_next_char

end_strcpy:
        CMP     0,R2
        STO.NZ  R2,(R0)
        CMP.NZ  0,R3
        STO.NZ  R3,1(R0)
        CMP.NZ  0,R4
        STO.NZ  R4,2(R0)
        CMP.NZ  0,R5
        STO.NZ  R5,3(R0)

        LOD     (SP),R2
        LOD     1(SP),R3
        LOD     2(SP),R4
        LOD     3(SP),R5
        ADD     4,SP
#ifndef SKIP_SHORT_CIRCUITS
        CMP     LOAD_ADDRESS,R2
        HALT.LT
#endif
        JMP     R2

#else
lcl_strcpy:
        ; R0 = d
        ; R1 = s
        ; R3 = ch
copy_next_char:
        SUB     1,SP
        STO     R2,(SP)
#ifdef  NO_LOOP_UNROLLING
        LOD     (R1),R2
        STO     R2,(R0)
        CMP     0,R2
        BZ      lcl_strcpy_end_of_loop
        ADD     1,R0
        ADD     1,R1
        BRA     copy_next_char

#else
        LOD     (R1),R2
        STO     R2,(R0)
        CMP     0,R2
        BZ      lcl_strcpy_end_of_loop
        LOD     1(R1),R2
        STO     R2,1(R0)
        CMP     0,R2
        BZ      lcl_strcpy_end_of_loop
        LOD     2(R1),R2
        STO     R2,2(R0)
        CMP     0,R2
        BZ      lcl_strcpy_end_of_loop
        LOD     3(R1),R2
        STO     R2,3(R0)
        CMP     0,R2
        BZ      lcl_strcpy_end_of_loop
        ADD     4,R0
        ADD     4,R1
        BRA     copy_next_char
#endif
lcl_strcpy_end_of_loop:
        LOD     (SP),R2
        ADD     1,SP
#ifndef SKIP_SHORT_CIRCUITS
        CMP     LOAD_ADDRESS,R2
        BUSY.LT
#endif
        JMP     R2
#endif

//int   lcl_strcmp(char *s1, char *s2) {
//      char    a, b;
//      do {
//              a = *s1++; b = *s2++;
//      } while((a)&&(a==b));
//
//      return a-b;
//}

#ifdef  PIPELINED_STRCMP
lcl_strcmp:
        SUB     8,SP
        STO     R2,(SP)
        STO     R3,1(SP)
        STO     R4,2(SP)
        STO     R5,3(SP)
        STO     R6,4(SP)
        STO     R7,5(SP)
        STO     R8,6(SP)
        STO     R9,7(SP)

strcmp_top_of_loop:
        LOD     (R0),R2
        LOD     1(R0),R3
        LOD     2(R0),R4
        LOD     3(R0),R5
        ;
        LOD     (R1),R6
        LOD     1(R1),R7
        LOD     2(R1),R8
        LOD     3(R1),R9
        ;
        ;
        CMP     0,R2
        CMP.NZ  0,R3
        CMP.NZ  0,R4
        CMP.NZ  0,R5
        BZ      strcmp_end_loop

        CMP     R2,R6
        CMP.Z   R3,R7
        CMP.Z   R4,R8
        CMP.Z   R5,R9
        BNZ     strcmp_end_loop

        ADD     4,R0
        ADD     4,R1
        BRA     strcmp_top_of_loop

strcmp_end_loop:
        CMP     0,R2
        BZ      final_str_compare
        CMP     R2,R6
        BNZ     final_str_compare

        MOV     R3,R2
        MOV     R7,R6
        CMP     0,R2
        BZ      final_str_compare
        CMP     R2,R6
        BNZ     final_str_compare

        MOV     R4,R2
        MOV     R8,R6
        CMP     0,R2
        BZ      final_str_compare
        CMP     R2,R6
        BNZ     final_str_compare

        MOV     R5,R2
        MOV     R9,R6

final_str_compare:
        SUB     R6,R2
        MOV     R2,R0
        
        LOD     (SP),R2
        LOD     1(SP),R3
        LOD     2(SP),R4
        LOD     3(SP),R5
        LOD     4(SP),R6
        LOD     5(SP),R7
        LOD     6(SP),R8
        LOD     7(SP),R9
        ADD     8,SP
#ifndef SKIP_SHORT_CIRCUITS
        CMP     LOAD_ADDRESS,R2
        BUSY.LT
#endif
        JMP     R2

#else
lcl_strcmp:
        SUB     2,SP
        STO     R2,(SP)
        STO     R3,1(SP)

strcmp_top_of_loop:
#ifdef  NO_LOOP_UNROLLING
        ; LOD   (R0),R2
        ; LOD   (R1),R3                 ; Alternate approach:
        ; CMP   R2,R3                   ;       CMP     0,R2
        ; BNZ   strcmp_end_loop         ;       BZ      strcmp_end_loop
        ; CMP   0,R2                    ;       CMP     R2,R3
        ; BZ    strcmp_end_loop         ;       BZ      strcmp_top_of_loop
        ; CMP   0,R3                    ;       
        ; BZ    strcmp_end_loop         ;       
        ; ADD   1,R0
        ; ADD   1,R1
        ; BRA   strcmp_top_of_loop
        LOD     (R0),R2
        LOD     (R1),R3
        CMP     0,R2
        BZ      strcmp_end_loop
        ADD     1,R0
        ADD     1,R1
        CMP     R2,R3
        BZ      strcmp_top_of_loop
#else
        LOD     (R0),R2
        LOD     (R1),R3
        CMP     0,R2
        BZ      strcmp_end_loop
        CMP     R2,R3
        BNZ     strcmp_end_loop
        LOD     1(R0),R2
        LOD     1(R1),R3
        CMP     0,R2
        BZ      strcmp_end_loop
        CMP     R2,R3
        BNZ     strcmp_end_loop
        LOD     2(R0),R2
        LOD     2(R1),R3
        CMP     0,R2
        BZ      strcmp_end_loop
        CMP     R2,R3
        BNZ     strcmp_end_loop
        LOD     3(R0),R2
        LOD     3(R1),R3
        CMP     0,R2
        BZ      strcmp_end_loop
        CMP     R2,R3
        BNZ     strcmp_end_loop
        ADD     4,R0
        ADD     4,R1
        BRA     strcmp_top_of_loop
#endif

strcmp_end_loop:
        SUB     R3,R2
        MOV     R2,R0

        LOD     (SP),R2
        LOD     1(SP),R3
        ADD     2,SP
#ifndef SKIP_SHORT_CIRCUITS
        CMP     LOAD_ADDRESS,R2
        BUSY.LT
#endif
        JMP     R2
#endif
        

//test_enum     func_1(char ch_1, char ch_2) {
//      char    lcl_ch_1, lcl_ch_2;
//
//      lcl_ch_1 = ch_1;
//      lcl_ch_2 = lcl_ch_1;
//      if (lcl_ch_2 != ch_2)
//              return 0;
//      else {
//              gbl_ch = lcl_ch_1;
//              return 1;
//      }

#ifdef  NO_INLINE
func_1:
        ; On input,
        ; R0 = ch_1
        ; R1 = ch_2
        ; R2 = available
        ; On output, R0 is our return value

        SUB     1,SP
        STO     R2,(SP)
        MOV     R0,R2
        CMP     R2,R1
        CLR.NZ  R0
        STO.Z   R2,gbl_ch(R12)
        LDILO.Z 1,R0
        LOD     (SP),R2
        ADD     1,SP
#ifndef SKIP_SHORT_CIRCUITS
        CMP     LOAD_ADDRESS,R2
        BUSY.LT
#endif
        JMP     R2
#endif

//bool  func_2(char *str_1, char *str_2) {
//      int     lcl_int;
//      char    lcl_ch;
//
//      lcl_int = 2;
//      while(lcl_int <= 2) {
//              if (func_1(str_1[lcl_int], str_2[lcl_int+1])==0) {
//                      lcl_ch = 'A';
//                      lcl_int ++;
//              }
//      }
//
//      if ((lcl_ch >= 'W')&&(lcl_ch < 'Z'))
//              lcl_int = 7;
//      if (lcl_ch == 'R')
//              return true;
//      else {
//              if (lcl_strcmp(str_1, str_2)>0) {
//                      lcl_int += 7;
//                      gbl_int = lcl_int;
//              } else
//                      return false;
//      }
//}
func_2:
        ;
        SUB     6,SP
#ifndef SKIP_SHORT_CIRCUITS
        CMP     LOAD_ADDRESS,R2
        BUSY.LT
#endif
        STO     R2,(SP)         ; SP = 0x08daf
        STO     R3,1(SP)
        STO     R4,2(SP)
        STO     R5,3(SP)
        STO     R6,4(SP)
        STO     R7,5(SP)

        MOV     R0,R3   ; R3 = str_1
        MOV     R1,R4   ; R4 = str_2
        LDI     2,R5    ; R5 = lcl_int
        LDI     'A',R7  ; R7 = lcl_ch
func_2_while_loop:
        CMP     2,R5
        BGT     func_2_end_while_loop
func_2_top_while_loop:
        MOV     R3,R6
        ADD     R5,R6

#ifdef  NO_INLINE
        LOD     (R6),R0
        MOV     R4,R6
        ADD     R5,R6
        LOD     1(R6),R1

        MOV     __HERE__+2(PC),R2
        BRA     func_1

        CMP     0,R0
        ADD.Z   1,R5
#ifndef SKIP_SHORT_CIRCUITS
        BUSY.NZ
#endif
#else
        LOD     (R6),R2
        MOV     R4,R6
        ADD     R5,R6
        LOD     1(R6),R1

        CMP     R2,R1
        STO.Z   R2,gbl_ch(R12)
        LDILO.Z 1,R0

        ADD.NZ  1,R5
#ifndef SKIP_SHORT_CIRCUITS
        BUSY.Z
#endif
#endif

        CMP     3,R5
#ifndef SKIP_SHORT_CIRCUITS
        BUSY.LT
#endif
        BLT     func_2_top_while_loop

func_2_end_while_loop:

        // CMP  'W',R7                  // BUT! We know lcl_ch='A'
        // BLT  skip_if                 // So we can skip this
        // CMP  'Z',R7                  // entire  section
        // LDI.LT       7,R5
        // CMP  'R',R7
        // BNZ alt_if_case
        // LLO.Z        1,R0
        // BRA  func_2_return_and_cleanup
        //      
        MOV     R3,R0
        MOV     R4,R1
        MOV     __HERE__+2(PC),R2
        BRA     lcl_strcmp
        CMP     0,R0
        BGT     func_2_final_then
        CLR     R0
        BRA     func_2_return_and_cleanup
func_2_final_then:
        // ADD  7,R5            ; Never read, so useless code
        LDI     1,R0
#ifndef SKIP_SHORT_CIRCUITS
        BUSY
#endif
func_2_return_and_cleanup:
        
        LOD     (SP),R2
        LOD     1(SP),R3
        LOD     2(SP),R4
        LOD     3(SP),R5
        LOD     4(SP),R6
        LOD     5(SP),R7
        ADD     6,SP
#ifndef SKIP_SHORT_CIRCUITS
        CMP     LOAD_ADDRESS,R2
        BUSY.LT
#endif
        JMP     R2

//bool  func_3(test_enum a) {
//      test_enum       lcl_enum;
//
//      lcl_enum = a;
//      if (lcl_enum == Ident_3)
//              return true;
//      else
//              return false;
//}

#ifdef  NO_INLINE
func_3:
        ; On entry,
        ;  R0 = a
        ;  R1 - available
        CMP     2,R0
        CLR     R0      ; CLR Doesn't set flags
        LDILO.Z 1,R0
#ifndef SKIP_SHORT_CIRCUITS
        CMP     LOAD_ADDRESS,R1
        BUSY.LT
#endif
        JMP     R1
#endif


// void proc_6(test_enum ev, test_enum *ep) {
//      *ep = ev;
//      if (!func_3(ev))
//              *ep = 3;
//      switch(ev) {
//              case 0: *ep = 0; break;
//              case 1:
//                      if (gbl_int > 100)
//                              *ep = 0;
//                      else
//                              *ep = 3;
//                      break;
//              case 2:
//                      *ep = 1;
//                      break;
//              case 3:
//                      break;
//              case 4:
//                      *ep = 2;
//      }
//}

proc_6:
        ; On entry:
        ;       R0 = ev
        ;       R1 = ep
        ;       R2 = link address
        ; Since we call func_3, we have to reserve R0 and R1
        ; for other purposes.  Thus
        ;       R2 = ev
        ;       R3 = ep
        SUB     2,SP
        STO     R2,(SP)
        STO     R3,1(SP)

        MOV     R1,R3
        MOV     R0,R2
        ; *ep = ev
        STO     R0,(R1)
#ifndef SKIP_SHORT_CIRCUITS
        CMP     2,R0
        BUSY.NZ
#endif

#ifdef  NO_INLINE
        ; !func_3(ev)
        MOV     __HERE__+2(PC),R1
        BRA     func_3

        TST     -1,R0
        LDI     3,R1
#ifndef SKIP_SHORT_CIRCUITS
        BUSY.Z
#endif
        STO.Z   R1,(R3)
#else
        CMP     2,R0
        LDI     3,R1
#ifndef SKIP_SHORT_CIRCUITS
        BUSY.NZ
#endif
        STO.NZ  R1,(R3)
#endif

#ifndef SKIP_SHORT_CIRCUITS
        CMP     2,R2
        BUSY.NZ
#endif
        CMP     0,R2
        BNZ     proc_6_case_not_zero
#ifndef SKIP_SHORT_CIRCUITS
        BUSY
#endif
        LDI     0,R1
        STO     R1,(R3)
        BRA     proc_6_end_of_case
proc_6_case_not_zero:
        CMP     1,R2
        BNZ     proc_6_case_not_one
#ifndef SKIP_SHORT_CIRCUITS
        BUSY
#endif
        LDI     3,R0
        LOD     gbl_int(R12),R1
        CMP     100,R1
        CLR.GT  R0
        STO     R0,(R3)
        BRA     proc_6_end_of_case
proc_6_case_not_one:
        CMP     2,R2
        BNZ     proc_6_case_not_two
        LDI     1,R1                            // Executed, if done properly
        STO     R1,(R3)
        BRA     proc_6_end_of_case
proc_6_case_not_two:
#ifndef SKIP_SHORT_CIRCUITS
        NOOP                            ;;;;;;;; TODO This fails--needs the NOOP
        BUSY                            ;;;;;;;; TODO so as not to do the BUSY
#endif
        CMP     4,R2
        BNZ     proc_6_case_not_four
        LDI     2,R1
        STO     R1,(R3)
        // BRA  proc_6_end_of_case
proc_6_case_not_four:
proc_6_end_of_case:
        LOD     (SP),R2
        LOD     1(SP),R3
#ifndef SKIP_SHORT_CIRCUITS
        CMP     LOAD_ADDRESS,R2         ; TODO This fails, even when the address
        BUSY.LT
#endif
        ADD     2,SP
        JMP     R2

// void proc_7(int a, int b, int *c) {
//      int     lcl;
// 
//      lcl = a + 2;
//      *c = b + a;
//}

#ifdef  NO_INLINE
proc_7:
        ADD 2+R0,R1
        STO R1,(R2)

#ifndef SKIP_SHORT_CIRCUITS
        CMP     LOAD_ADDRESS,R3
        BUSY.LT
#endif
        JMP     R3
#endif

//      int     a[50];
//      int     b[50][50];
//
// void proc_8(Arr_1_Dim a, Arr_2_Dim b, int c, int d) {
//      int     idx, loc;
//
//      loc = c+5;
//      a[loc] = d;
//      a[loc+1] = a[loc];
//      a[loc+30] = loc;
//      for(idx=loc; idx<= loc+1; idx++)
//              b[loc][idx] = loc;
//      b[loc][loc-1] += 1;
//      b[loc+20][loc] = a[loc];
//      gbl_int = 5;
//}
proc_8:
        ; R0 = a
        ; R1 = b
        ; R2 = c
        ; R3 = d
        ; R4 - unassigned
        ; Makes no function/procedure calls, so these can keep
        ; R2 = loc = c+5, replaces c
        ; R4 = idx
        SUB     3,SP
        STO     R4,(SP)
        STO     R5,1(SP)
        STO     R6,2(SP)

        ADD     5,R2    ; loc = c+5
        MOV     R0,R5
        ADD     R2,R5
        STO     R3,(R5)
        STO     R3,1(R5)
        STO     R2,30(R5)
        MOV     R2,R5
        MPYU    50,R5   ; R5 = 50 * R2 = 50 * loc
        ADD     R1,R5   ; R5 = &b[loc][0]
        MOV     R5,R6   ; R6 = &b[loc][0]
        ADD     R2,R5   ; R5 = &b[loc][loc]
        MOV     R2,R4   ; R4 = loc = index
proc_8_top_of_loop:
        CMP     1(R2),R4
        BGT     proc_8_end_of_loop
proc_8_loop_after_condition:
        STO     R2,(R5)
        ADD     1,R5
        ADD     1,R4
        CMP     2(R2),R4
        BLT     proc_8_loop_after_condition
proc_8_end_of_loop:

        ; b[loc][loc-1] += 1
        ADD     R2,R6           ; R6 = &b[loc][loc]
        LOD     -1(R6),R5
        ADD     1,R5
        STO     R5,-1(R6)
        ; b[loc+20][loc] = a[loc]
        MOV     R0,R4
        ADD     R2,R4
        LOD     (R4),R3
        STO     R3,20*50(R6)
        LDI     5,R3
        STO     R3,gbl_int(R12)
        
        LOD     (SP),R4
        LOD     1(SP),R5
        LOD     2(SP),R6
        ADD     3,SP
#ifndef SKIP_SHORT_CIRCUITS
        CMP     LOAD_ADDRESS,R4
        BUSY.LT
#endif
        JMP     R4

// void proc_5(void) {
//      gbl_ch = 'A';
//      gbl_bool = false;
//}
#ifdef  NO_INLINE
proc_5:
        SUB     1,SP
        STO     R0,(SP)
        ;
        LDI     'A',R0
        STO     R0,gbl_ch(R12)
        CLR     R0
        STO     R0,gbl_bool(R12)
        ;
        LOD     (SP),R0
        ADD     1,SP
        JMP     R0
#endif

// void proc_4(void) {
//      bool    lcl_bool;
//      lcl_bool = (gbl_ch == 'A');
//      gbl_ch_2 = 'B';
// }
#ifdef  NO_INLINE
proc_4:
        //
        ; LDI   GBL,R12 // Already in R12
        ; Setting lcl_bool is irrelevant, so the optimizer should remove it.
        ; R0 doesn't need to be saved, since it's already trashed by the
        ; subroutine call.
        ;
        ; LOD   gbl_ch(R12),R0
        ; CLR   R1
        ; CMP   'A',R0
        ; ADD.Z 1,R1
        ;
        SUB     1,SP
        STO     R0,(SP)
        ;
        LDI     'B',R0
        STO     R0,gbl_ch_2(R12)
        ;
        LOD     (SP),R0
        ADD     1,SP
        JMP     R0
#endif

// void proc_3(RECP *a) {
//      if (gbl_ptr != NULL)
//              *a = gbl_ptr->ptr_comp;
//      proc_7(10,gbl_int, &gbl_ptr->variant.var_1.int_comp); // ??
//}
proc_3:
        SUB     3,SP
        STO     R1,(SP)
        STO     R2,1(SP)
        STO     R3,2(SP)
        ;
        LOD     gbl_ptr(R12),R2
        TST     -1,R2
#ifndef SKIP_SHORT_CIRCUITS
        BUSY.Z
#endif
        LOD.NZ  ptr_comp(R2),R3
        STO.NZ  R3,(R0)
#ifdef  NO_INLINE
        LDI     10,R0
        LOD     gbl_int(R12),R1
        MOV     variant.var_1.int_comp(R2),R2
        MOV     __HERE__+2(PC),R3
        BRA     proc_7
#else
        LOD     gbl_int(R12),R1
        ADD      12,R1
        STO      R1,variant.var_1.int_comp(R2)
#endif
        ;
        LOD     (SP),R1
        LOD     1(SP),R2
        LOD     2(SP),R3
        ADD     3,SP
#ifndef SKIP_SHORT_CIRCUITS
        CMP     LOAD_ADDRESS,R1
        BUSY.LT
#endif
        JMP     R1

// void proc_2(int *a) {
//      int             lcl_int;
//      test_enum       lcl_enum;
// 
//      lcl_int = *a + 10;
//      do {
//              if (gbl_ch == 'A') {
//                      lcl_int -= 1;
//                      *a = lcl_int - gbl_int;
//                      lcl_enum = Ident_1;
//              }
//      } while(lcl_enum != Ident_1);
//}
proc_2:
        SUB     6,SP
        STO     R1,(SP)
        STO     R2,1(SP)
        STO     R3,2(SP)
        STO     R4,3(SP)
        STO     R5,4(SP)
        STO     R6,5(SP)
        // R1 doesn't need to be stored, it was used in the subroutine
        // call calculation

        LOD     (R0),R1
        MOV     10(R1),R2       ; R2 = lcl_int
        LOD     gbl_ch(R12),R4  ; R4 = gbl_ch
#ifdef  NO_CHEATING
proc_2_loop:
        CMP     'A',R4
        SUB.Z   1,R2
        LOD.Z   gbl_int(R12),R5 ; R5 = gbl_int
        MOV.Z   R2,R6           ; R6 = lcl_int
        SUB.Z   R5,R6           ; lcl_int - gbl_int
        STO.Z   R6,(R0)         ; *a = R6
        CLR.Z   R3              ; lcl_enum = 0
// #ifndef      SKIP_SHORT_CIRCUITS
        // BUSY.NZ
// #endif

        TST     -1,R3
// #ifndef      SKIP_SHORT_CIRCUITS
        // BUSY.NZ
// #endif
        BNZ     proc_2_loop
#else
        LOD     gbl_int(R12),R5
        SUB     1(R5),R2
        STO     R2,(R0)
#endif
        ;
        LOD     (SP),R1
        LOD     1(SP),R2
        LOD     2(SP),R3
        LOD     3(SP),R4
        LOD     4(SP),R5
        LOD     5(SP),R6
        ADD     6,SP
#ifndef SKIP_SHORT_CIRCUITS
        CMP     LOAD_ADDRESS,R1
        BUSY.LT
#endif
        JMP     R1

//void  proc_1 (RECP a) {
//      RECP    nxt = a->ptr_comp;
//
//      // structassign(a->ptr_comp, gbl_ptr);
//      *(a->ptr_comp) = *(gbl_ptr);
//
//      a->variant.var_1.int_comp = 5;
//      nxt->variant.var_1.int_comp = a->variant.var_1.int_comp;
//      proc_3(&nxt->ptr_comp);
//
//      if (nxt->discr == 0) {
//              nxt->variant.var_1.int_comp = 6;
//              proc_6(a->variant.var_1.enum_comp, &nxt->variant.var_1.enum_comp);
//              nxt->ptr_comp = gbl_ptr->ptr_comp;
//              proc_7(nxt->variant.var_1.int_comp, 10, &nxt->variant.var_1.int_comp);
//      } else
//              // structassign(a, a->ptr_comp);
//              *a = *(a->ptr_comp);
//}
proc_1:
        SUB     11,SP
        STO     R1,(SP)
        STO     R2,1(SP)
        STO     R3,2(SP)
        STO     R4,3(SP)
        STO     R5,4(SP)
        STO     R6,5(SP)
        STO     R7,6(SP)
        STO     R8,7(SP)
        STO     R9,8(SP)
#ifndef NO_LOOP_UNROLLING
        STO     R10,9(SP)
        STO     R11,10(SP)
#endif

        ; R9 = a
        ; R4 = nxt
        ; R12 = GBL
        ; R13 = SP
        MOV     R0,R9
        LOD     ptr_comp(R9),R4
#ifndef SKIP_SHORT_CIRCUITS
        TST     -1,R4           ; R4 = 0x100e9f
        BUSY.Z
        CMP     PC,R9           ; R9 = 0x100ec2
        BUSY.LT
#endif
        MOV     R9,R6
        LOD     gbl_ptr(R12),R7 ; (0x100a04) -> 0x100ec2
        ; BUSY                  ; R7 = 0x0100ec2

#ifndef SKIP_SHORT_CIRCUITS
        LOD     variant.var_1.enum_comp(R7), R0
        CMP     2,R0            ; R0 = 0
        BUSY.NZ                 ; TODO Fails here
#endif

#ifdef  NO_LOOP_UNROLLING
        LDI     35,R5
proc_1_assign_loop_1:
        LOD     (R6),R8
        ADD     1,R6
        STO     R8,(R7)
        ADD     1,R7
        SUB     1,R5
        BNZ     proc_1_assign_loop_1;
#else

        ; R2 is available
        ; R3 is available

        LDI     34,R5
proc_1_assign_loop_1:
        LOD     (R6),R8
        LOD     1(R6),R10
        LOD     2(R6),R11
        LOD     3(R6),R2
        LOD     4(R6),R3
        ADD     5,R6
        SUB     5,R5
        STO     R8,(R7)
        STO     R10,1(R7)
        STO     R11,2(R7)
        STO     R2,3(R7)
        STO     R3,4(R7)
        BLT     proc_1_assign_loop_1_end
        ADD     5,R7
        ; BNZ   proc_1_assign_loop_1;
        BRA     proc_1_assign_loop_1
proc_1_assign_loop_1_end:
        ; Loop length is fixed, nothing to test here
#endif

#ifndef SKIP_SHORT_CIRCUITS
        LOD     gbl_ptr(R12),R2
        TST     -1,R2
        BUSY.Z
        ;
        LOD     variant.var_1.enum_comp(R9), R0
        CMP     2,R0
        BUSY.NZ 
#endif

        LDI     5,R5
        STO     R5,variant.var_1.int_comp(R9)
        STO     R5,variant.var_1.int_comp(R4)
        MOV     ptr_comp(R4),R0                 ; R4 = 0x8e41, ptr_comp(R4)=R4
        MOV     __HERE__+2(PC),R1
        BRA     proc_3          ; Uses R0 and R1

        LOD     discr(R4),R5
        CMP     0,R5
        BNZ     proc_1_last_struct_assign
        ; This is the juncture that is "supposed" to be taken
        LDI     6,R5

        STO     R5,variant.var_1.int_comp(R4)
        LOD     variant.var_1.enum_comp(R9), R0
#ifndef SKIP_SHORT_CIRCUITS
        CMP     2,R0
        BUSY.NZ
#endif
        MOV     variant.var_1.enum_comp+R4, R1
        MOV     __HERE__+2(PC),R2
        BRA     proc_6
        ;
        LOD     gbl_ptr(R12),R5
        LOD     ptr_comp(R5),R5 
        STO     R5,ptr_comp(R4)
        ;
#ifdef  NO_INLINE
        LOD     variant.var_1.int_comp(R4),R0
        LDI     10,R1
        MOV     variant.var_1.int_comp(R4),R2
        MOV     proc_1_return_closeout(PC),R3
        BRA     proc_7
#else
        LOD     variant.var_1.int_comp(R4),R0
        ADD     12,R0
        STO     R0,variant.var_1.int_comp(R4)
        BRA     proc_1_return_closeout
#endif
        ;
proc_1_last_struct_assign:
#ifndef SKIP_SHORT_CIRCUITS
        BUSY
#endif
        LDI     35,R4
        MOV     R2,R5
        LOD     gbl_ptr(R12),R6
proc_1_assign_loop_2:
        LOD     (R6),R8
        STO     R8,(R7)
        ADD     1,R6
        ADD     1,R7
        SUB     1,R5
        BNZ     proc_1_assign_loop_2
        //
proc_1_return_closeout:
        //
        LOD     (SP),R1
        LOD     1(SP),R2
        LOD     2(SP),R3
        LOD     3(SP),R4
        LOD     4(SP),R5
        LOD     5(SP),R6
        LOD     6(SP),R7
        LOD     7(SP),R8
        LOD     8(SP),R9
#ifndef NO_LOOP_UNROLLING
        LOD     9(SP),R10
        LOD     10(SP),R11
#endif
        ADD     11,SP
#ifndef SKIP_SHORT_CIRCUITS
        CMP     LOAD_ADDRESS,R1
        BUSY.LT
#endif
        JMP     R1              // Jumps to wrong address ??

// void dhrystone(void) {
//      int     lcl_int_1, lcl_int_2, lcl_int_3, index, number_of_runs = 500;
//      test_enum       lcl_enum;
//      char    lcl_str_1[30], lcl_str_2[30], ch_index;
//      REC_T   a, b, *nxt = &a;
//
//      gbl_ptr = &b;
//      gbl_ptr->ptr_comp = nxt;
//      gbl_ptr->variant.var_1.enum_comp = 2;
//      gbl_ptr->variant.var_1.int_comp = 40;
//      lcl_strcpy(gbl_ptr->variant.var_1.str_comp, "DHRYSTONE PROGRAM, SOME STRING");
//      lcl_strcpy(lcl_str_1, "DHRYSTONE PROGRAM, 1\'ST STRING");
//
//      gbl_arr_2[8][7] = 10;
//
//      for(index=0; index < number_of_runs; index++) {
//              proc_5();
//              proc_4();
//              lcl_int_1 = 2;
//              lcl_int_2 = 3;
//              lcl_strcpy(lcl_str_2, "DHRYSTONE PROGRAM, 2\'ND STRING");
//              lcl_enum = Ident_2;
//              gbl_bool = !func_2(lcl_str_1, lcl_str_2);
//              while(lcl_int_1 < lcl_int_2) {
//                      lcl_int_3 = 5 * lcl_int_1 - lcl_int_2;
//                      proc_7(lcl_int_1, lcl_int_2, &lcl_int_3);
//                      lcl_int_1 += 1;
//              }
//
//              proc_8(gbl_arr_1, gbl_arr_2, lcl_int_1, lcl_int_3);
//              proc_1(gbl_ptr);
//
//              for(ch_index='A'; ch_index <= gbl_ch_2; ch_index++) {
//                      if (lcl_enum == func_1(ch_index, 'C')) {
//                              // Then not executed??
//                              proc_6(0, &lcl_enum);
//                              lcl_strcpy(lcl_str_2, "DHRYSTONE PROGRAM, 3\'RD STRING");
//                              lcl_int_2 = index;
//                              gbl_int = index;
//                      }
//              }
//
//              lcl_int_2 = lcl_int_2 * lcl_int_1;
//              lcl_int_1 = lcl_int_2 / lcl_int_3;
//              lcl_int_2 = 7 * ( lcl_int_2 - lcl_int_3) - lcl_int_1;
//              proc_2(&lcl_int_1);
//      }
//}

dhrystone:
#ifdef  SUPERVISOR_TASK
        SUB     12+RECSIZE+RECSIZE+30+30+3,SP
        ; Leave a space on the top of the stack for calling 
        ; subroutines.
        STO     R0,(SP)
        STO     R1,1(SP)
        STO     R2,2(SP)
        STO     R3,3(SP)
        STO     R4,4(SP)
        STO     R5,5(SP)
        STO     R6,6(SP)
        STO     R7,7(SP)
        STO     R8,8(SP)
        STO     R9,9(SP)
        STO     R10,10(SP)
        STO     R11,11(SP)
        lcl_int_1       equ     12                      ; plus SP
#else
        lcl_int_1       equ     2                       ; plus SP
        SUB     2+RECSIZE+RECSIZE+30+30+3,SP
#endif
        // 12 is the global variable pointer
        // 13 is our stack
        // 14 is our condition code register
        // 15 is the program counter
        ;
        lcl_int_3       equ     lcl_int_1+1             ; plus SP
        lcl_enum        equ     lcl_int_3+1             ; plus SP
        lcl_str_1       equ     lcl_enum+1              ; plus SP
        lcl_str_2       equ     lcl_str_1+30            ; plus SP
        rec_a           equ     lcl_str_2+30            ; plus SP
        rec_b           equ     rec_a+RECSIZE           ; plus SP
        
//      int     lcl_int_1, lcl_int_2, lcl_int_3, index, number_of_runs = 500;
//      test_enum       lcl_enum;
//      char    lcl_str_1[30], lcl_str_2[30], ch_index;
//      REC_T   a, b, *nxt = &a;
//
//      gbl_ptr = &b;
        MOV     rec_b(SP),R0            ; R0 = &b
        STO     R0,gbl_ptr(PC)
//      gbl_ptr->ptr_comp = nxt;
        MOV     rec_a(SP),R1            ; R1 = &a = nxt
        STO     R1,ptr_comp(R0)         ; gbp_ptr->ptr.comp=b->ptr.comp=R1=nxt
//      gbl_ptr->variant.var_1.enum_comp = 2;
        LDI     2,R2
        STO     R2,variant.var_1.enum_comp(R0)
//      gbl_ptr->variant.var_1.int_comp = 40;
        LDI     40,R2
        STO     R2,variant.var_1.int_comp(R0)
//      lcl_strcpy(gbl_ptr->variant.var_1.str_comp, "DHRYSTONE PROGRAM, SOME STRING");
        MOV     variant.var_1.str_comp(R0),R0
        MOV     some_string(PC),R1
        MOV     __HERE__+2(PC),R2
        BRA     lcl_strcpy

//      lcl_strcpy(lcl_str_1, "DHRYSTONE PROGRAM, 1\'ST STRING");
        MOV     lcl_str_1(SP),R0
        MOV     first_string(PC),R1
        MOV     __HERE__+2(PC),R2
        BRA     lcl_strcpy

//      gbl_arr_2[8][7] = 10;
        LDI     10,R0
        STO     R0,8*50+7+gbl_arr_2(R12)
//
//      for(index=0; index < number_of_runs; index++) {
        ; Let R11 be our index
        CLR     R11
dhrystone_main_loop:
        ;; Start of Dhrystone main loop
        ; proc_5();
#ifdef  NO_INLINE
        MOV     __HERE__+2(PC),R0
        BRA     proc_5
#else
        LDI     'A',R0
        STO     R0,gbl_ch(R12)
        CLR     R0
        STO     R0,gbl_bool(R12)
#endif
        ; proc_4();
#ifdef  NO_INLINE
        MOV     __HERE__+2(PC),R0
        BRA     proc_4
#else
        LDI     'B',R0
        STO     R0,gbl_ch_2(R12)
#endif
//              lcl_int_1 = 2;
        LDI     2,R5
        STO     R5,lcl_int_1(SP)
//              lcl_int_2 = 3;
        LDI     3,R6
//              lcl_strcpy(lcl_str_2, "DHRYSTONE PROGRAM, 2\'ND STRING");
        MOV     lcl_str_2(SP),R0
        MOV     second_string(PC),R1
        MOV     __HERE__+2(PC),R2
        BRA     lcl_strcpy
//              lcl_enum = Ident_2;
        LDI     2,R0
        STO     R0,lcl_enum(SP)
//              gbl_bool = !func_2(lcl_str_1, lcl_str_2);
        MOV     lcl_str_1(SP),R0
        MOV     lcl_str_2(SP),R1
        MOV     __HERE__+2(PC),R2
        BRA     func_2
        CLR     R1
        TST     -1,R0
        LDILO.Z 1,R1
        STO     R1,gbl_bool(PC)

//              while(lcl_int_1 < lcl_int_2) {
        ; R5 = lcl_int_1 = 2 on entry
        ; R6 = lcl_int_2 = 3 on entry, so no entry test is required
        LOD     lcl_int_1(SP),R5
        // The 'while' comparison
        CMP     R6,R5
        BGE     dhrystone_end_while_loop
dhrystone_while_loop:
//                      lcl_int_3 = 5 * lcl_int_1 - lcl_int_2;
        MOV     R5,R7
        LDI     5,R0
        MPYS    R0,R7
        SUB     R6,R7
        STO     R7,lcl_int_3(SP)
#ifndef SKIP_SHORT_CIRCUITS
        CMP     7,R7
        BUSY.NZ
#endif
//                      proc_7(lcl_int_1, lcl_int_2, &lcl_int_3);
#ifdef  NO_INLINE
        MOV     R5,R0
        MOV     R6,R1
        MOV     lcl_int_3(SP),R2
        MOV     __HERE__+2(PC),R3
        BRA     proc_7
#else
        MOV     R6,R1
        ADD     2+R5,R1
        STO     R1,lcl_int_3(SP)
#endif
//                      lcl_int_1 += 1;
        LOD     lcl_int_1(SP),R5
        ADD     1,R5
        STO     R5,lcl_int_1(SP)
;
        ; BRA   dhrystone_while_loop    ; We'll unroll the loop, and put an
        CMP     R6,R5                   ; additional comparison at the bottom
        BLT     dhrystone_while_loop
dhrystone_end_while_loop:
//              }
//
#ifndef SKIP_SHORT_CIRCUITS
        LOD     lcl_int_1(SP),R0
        CMP     3,R0
        BUSY.NZ
        CMP     3,R6
        BUSY.NZ 
        LOD     lcl_int_3(SP),R0
        CMP     7,R0
        BUSY.NZ
#endif
//              proc_8(gbl_arr_1, gbl_arr_2, lcl_int_1, lcl_int_3);
        MOV     gbl_arr_1(PC),R0
        MOV     gbl_arr_2(PC),R1
        MOV     R5,R2
        MOV     R6,R3
        MOV     __HERE__+2(PC),R4
        BRA     proc_8
//              proc_1(gbl_ptr);
        LOD     gbl_ptr(PC),R0
#ifndef SKIP_SHORT_CIRCUITS
        LOD     variant.var_1.enum_comp(R0), R1
        CMP     2,R1            ; R0 = 0
        BUSY.NZ                 ; TODO Fails here
#endif
        MOV     __HERE__+2(PC),R1
        BRA     proc_1
//
//              for(ch_index='A'; ch_index <= gbl_ch_2; ch_index++) {
        LDI     'A',R7
        LOD     gbl_ch_2(SP),R8
        CMP     R7,R8
        BLT     dhrystone_end_of_for_loop
dhrystone_top_of_for_loop:
//                      if (lcl_enum == func_1(ch_index, 'C')) {
#ifdef  NO_INLINE
        MOV     R7,R0
        LDI     'C',R1
        MOV     __HERE__+2(PC),R2
        BRA     func_1
#else
        CMP     'C',R7
        CLR.NZ  R0
        STO.Z   R7,gbl_ch(R12)
        LDILO.Z 1,R0
#endif

        ; Result is now in R0
        LOD     lcl_enum(SP),R1
        CMP     R0,R1
        BNZ     dhrystone_skip_then_clause
//                              // Then not executed??
//                              proc_6(0, &lcl_enum);

#ifndef SKIP_SHORT_CIRCUITS
        BUSY    // Shouldn't ever get here
#endif

        CLR     R0
        MOV     lcl_enum(SP),R1
        MOV     __HERE__+2(PC),R2
        BRA     proc_6
        
//                              lcl_strcpy(lcl_str_2, "DHRYSTONE PROGRAM, 3\'RD STRING");
        MOV     lcl_str_2(SP),R0
        MOV     third_string(PC),R1
        MOV     __HERE__+2(PC),R2
        BRA     lcl_strcpy
//                              lcl_int_2 = index;
        MOV     R11,R6
//                              gbl_int = index;
        STO     R11,gbl_int(PC)
//                      }
dhrystone_skip_then_clause:
        ADD     1,R7
        LOD     gbl_ch_2(SP),R8
        CMP     R8,R7
        BGE     dhrystone_top_of_for_loop
dhrystone_end_of_for_loop:
//              }
#ifndef SKIP_SHORT_CIRCUITS
        LOD     lcl_int_1(SP),R0
        CMP     3,R0
        BUSY.NZ
        CMP     3,R6
        BUSY.NZ
        LOD     lcl_int_3(SP),R0
        CMP     7,R0
        BUSY.NZ
#endif
//
//              lcl_int_2 = lcl_int_2 * lcl_int_1;
        LOD     lcl_int_1(SP),R5
        MPYS    R5,R6   ; lcl_int_2 = 
//              lcl_int_1 = lcl_int_2 / lcl_int_3;
#ifdef  HARDWARE_DIVIDE
        LOD     lcl_int_3(SP),R1
        MOV     R6,R0
        DIVS    R1,R0
#else
#ifndef SKIP_DIVIDE
        MOV     R6,R0
        LOD     lcl_int_3(SP),R1
        MOV     __HERE__+2(PC),R2
        BRA     lib_divs
#else
        LDI     9,R0
#endif
#endif
        STO     R0,lcl_int_1(SP)        ;;; TODO FAILS HERE (Watched it fail!)
//              lcl_int_2 = 7 * ( lcl_int_2 - lcl_int_3) - lcl_int_1;
        LOD     lcl_int_3(SP),R2
        SUB     R2,R6
        MPYS    7,R6
        SUB     R0,R6
//              proc_2(&lcl_int_1);
#ifndef SKIP_SHORT_CIRCUITS
        LOD     lcl_int_1(SP),R0
        CMP     1,R0
        CMP.Z   13,R6
        LOD.Z   lcl_int_3(SP),R0
        CMP.Z   7,R0
        BZ      dhrystone_triple_test_still_good
        BUSY
dhrystone_triple_test_still_good:
#endif
        MOV     lcl_int_1(SP),R0
        MOV     __HERE__+2(PC),R1
        BRA     proc_2
#ifndef SKIP_SHORT_CIRCUITS
        LOD     lcl_int_1(SP),R0
        CMP     5,R0
        BUSY.NZ
#endif

        ;; Bottom of (and return from) Dhrystone main loop
        ADD     1,R11
        CMP     NUMBER_OF_RUNS,R11
        BLT     dhrystone_main_loop
//      }

#ifdef  SUPERVISOR_TASK
        LOD     (SP),R0
        LOD     1(SP),R1
        LOD     2(SP),R2
        LOD     3(SP),R3
        LOD     4(SP),R4
        LOD     5(SP),R5
        LOD     6(SP),R6
        LOD     7(SP),R7
        LOD     8(SP),R8
        LOD     9(SP),R9
        LOD     10(SP),R10
        LOD     11(SP),R11
        ;
        ADD     12+RECSIZE+RECSIZE+30+30+3,SP
        ; Return from subroutine
#ifndef SKIP_SHORT_CIRCUITS
        CMP     LOAD_ADDRESS,R0
        BUSY.LT
#endif
        JMP     R0
#else
        LDI     0,CC
        NOP
        NOP
        BUSY
#endif
gbl_arr_1:
        fill    50,0
gbl_arr_2:
        fill    2500,0
gbl_ch:
        word    0
gbl_ch_2:
        word    0
gbl_bool:
        word    0
gbl_int:
        word    0
gbl_ptr:
        word    0

some_string:
        word    'D','H','R','Y','S','T','O','N','E',' '
        word    'P','R','O','G','R','A','M',',',' '
        word    'S','O','M','E',' ','S','T','R','I','N','G'
        word    0

first_string:
        word    'D','H','R','Y','S','T','O','N','E',' '
        word    'P','R','O','G','R','A','M',','
        word    ' ','1','\'','S','T'
        word    ' ','S','T','R','I','N','G'
        word    0

second_string:
        word    'D','H','R','Y','S','T','O','N','E',' '
        word    'P','R','O','G','R','A','M',',',' '
        word    '2','\'','N','D',' ','S','T','R','I','N','G'
        word    0

third_string:
        word    'D','H','R','Y','S','T','O','N','E',' '
        word    'P','R','O','G','R','A','M',',',' '
        word    '3','\'','R','D',' ','S','T','R','I','N','G'
        word    0

// Arr_1_Dim    gbl_arr_1;
// Arr_2_Dim    gbl_arr_2;
// char gbl_ch, gbl_ch_2;
// bool gbl_bool;
// int  gbl_int;
// RECP gbl_ptr;

;
Go to most recent revision | Compare with Previous | Blame | View Log
Browse

Tools

Subversion Repositories zipcpu

[/] [zipcpu/] [trunk/] [bench/] [asm/] [zipdhry.S] - Rev 128