URL
https://opencores.org/ocsvn/zipcpu/zipcpu/trunk
Subversion Repositories zipcpu
Compare Revisions
- This comparison shows the changes necessary to convert path
/
- from Rev 38 to Rev 39
- ↔ Reverse comparison
Rev 38 → Rev 39
/zipcpu/trunk/bench/cpp/zippy_tb.cpp
43,6 → 43,7
|
#include "verilated.h" |
#include "Vzipsystem.h" |
#include "cpudefs.h" |
|
#include "testb.h" |
// #include "twoc.h" |
184,18 → 185,18
int ln= 0; |
|
mvprintw(ln,0, "Peripherals-SS"); ln++; |
#ifdef OPT_ILLEGAL_INSTRUCTION |
printw(" %s", |
// (m_core->v__DOT__thecpu__DOT__pf_illegal)?"PI":" ", |
(m_core->v__DOT__thecpu__DOT__dcd_illegal)?"DI":" " |
); |
/* |
printw(" %s%s%s", |
(m_core->v__DOT__thecpu__DOT__ill_err)?"IL":" ", |
#endif |
|
#ifdef OPT_EARLY_BRANCHING |
printw(" %s%s", |
(m_core->v__DOT__thecpu__DOT__dcd_early_branch)?"EB":" ", |
(m_core->v__DOT__thecpu__DOT__dcd_early_branch_stb)?"S":" ", |
(m_core->v__DOT__thecpu__DOT__dcd_early_branch_stb)?"S":" ", |
); |
*/ |
(m_core->v__DOT__thecpu__DOT__dcd_early_branch_stb)?"S":" "); |
#endif |
|
/* |
showval(ln, 1, "TRAP", m_core->v__DOT__trap_data); |
305,6 → 306,9
attroff(A_BOLD); |
ln+=1; |
|
#ifdef OPT_SINGLE_FETCH |
ln+=2; |
#else |
mvprintw(ln, 0, "PFPIPE: rda=%08x/%d, bas=%08x, off=%08x, nv=%03x, ackw=%d,%d%d,%04x", |
m_core->v__DOT__thecpu__DOT__pf__DOT__r_addr, |
m_core->v__DOT__thecpu__DOT__pf__DOT__r_cv, |
325,6 → 329,7
(m_core->v__DOT__thecpu__DOT__pf_ack)?"ACK":" ", |
(m_core->v__DOT__thecpu__DOT__pf_stall)?"STL":" ", |
(m_core->v__DOT__wb_data)); ln++; |
#endif |
|
mvprintw(ln, 0, "MEMBUS: %3s %3s %s @0x%08x[0x%08x] -> %s %s %08x", |
(m_core->v__DOT__thecpu__DOT__mem_cyc_gbl)?"GCY" |
336,7 → 341,17
(m_core->v__DOT__thecpu__DOT__mem_data), |
(m_core->v__DOT__thecpu__DOT__mem_ack)?"ACK":" ", |
(m_core->v__DOT__thecpu__DOT__mem_stall)?"STL":" ", |
(m_core->v__DOT__thecpu__DOT__mem_result)); ln++; |
(m_core->v__DOT__thecpu__DOT__mem_result)); |
// #define OPT_PIPELINED_BUS_ACCESS |
#ifdef OPT_PIPELINED_BUS_ACCESS |
printw(" %x%x%c%c", |
(m_core->v__DOT__thecpu__DOT__domem__DOT__wraddr), |
(m_core->v__DOT__thecpu__DOT__domem__DOT__rdaddr), |
(m_core->v__DOT__thecpu__DOT__op_pipe)?'P':'-', |
(mem_pipe_stalled())?'S':'-'); ln++; |
#else |
ln++; |
#endif |
|
mvprintw(ln, 0, "SYSBS%c: %3s %3s %s @0x%08x[0x%08x] -> %s %s %08x", |
(m_core->v__DOT__thecpu__DOT__pformem__DOT__r_a_owner)?'M':'P', |
348,7 → 363,40
(m_core->i_wb_ack)?"ACK":" ", |
(m_core->i_wb_stall)?"STL":" ", |
(m_core->i_wb_data)); ln+=2; |
#ifdef OPT_PIPELINED_BUS_ACCESS |
mvprintw(ln-1, 0, "Mem CE: %d = %d%d%d%d%d, stall: %d = %d%d(%d|%d%d|..)", |
(m_core->v__DOT__thecpu__DOT__mem_ce), |
(m_core->v__DOT__thecpu__DOT__master_ce), |
(m_core->v__DOT__thecpu__DOT__opvalid_mem), |
(!m_core->v__DOT__thecpu__DOT__clear_pipeline), |
(m_core->v__DOT__thecpu__DOT__set_cond), |
(!m_core->v__DOT__thecpu__DOT__mem_stalled), |
|
(m_core->v__DOT__thecpu__DOT__mem_stalled), |
(m_core->v__DOT__thecpu__DOT__opvalid_mem), |
(m_core->v__DOT__thecpu__DOT__master_ce), |
(mem_pipe_stalled()), |
(!m_core->v__DOT__thecpu__DOT__op_pipe), |
(m_core->v__DOT__thecpu__DOT__mem_busy)); |
printw(" op_pipe = %d%d%d%d%d(%d|%d)", |
(m_core->v__DOT__thecpu__DOT__dcdvalid), |
(m_core->v__DOT__thecpu__DOT__opvalid_mem), |
(m_core->v__DOT__thecpu__DOT__dcdM), |
(!((m_core->v__DOT__thecpu__DOT__dcdOp |
^m_core->v__DOT__thecpu__DOT__opn)&1)), |
(m_core->v__DOT__thecpu__DOT__dcdB |
== m_core->v__DOT__thecpu__DOT__op_B), |
(m_core->v__DOT__thecpu__DOT__r_dcdI |
== m_core->v__DOT__thecpu__DOT__r_opI), |
(m_core->v__DOT__thecpu__DOT__r_dcdI+1 |
== m_core->v__DOT__thecpu__DOT__r_opI)); |
mvprintw(4,4,"r_dcdI = 0x%06x, r_opI = 0x%06x", |
(m_core->v__DOT__thecpu__DOT__r_dcdI), |
(m_core->v__DOT__thecpu__DOT__r_opI)); |
#endif |
mvprintw(4,42,"0x%08x", m_core->v__DOT__thecpu__DOT__instruction); |
|
|
showins(ln, "I ", |
!m_core->v__DOT__thecpu__DOT__dcd_stalled, |
m_core->v__DOT__thecpu__DOT__pf_valid, |
364,6 → 412,13
m_core->v__DOT__thecpu__DOT__dcd_gie, |
m_core->v__DOT__thecpu__DOT__dcd_stalled, |
m_core->v__DOT__thecpu__DOT__dcd_pc-1); ln++; |
#ifdef OPT_ILLEGAL_INSTRUCTION |
if (m_core->v__DOT__thecpu__DOT__dcd_illegal) |
mvprintw(ln-1,10,"I"); |
else |
#endif |
if (m_core->v__DOT__thecpu__DOT__dcdM) |
mvprintw(ln-1,10,"M"); |
|
showins(ln, "Op", |
m_core->v__DOT__thecpu__DOT__op_ce, |
370,7 → 425,16
m_core->v__DOT__thecpu__DOT__opvalid, |
m_core->v__DOT__thecpu__DOT__op_gie, |
m_core->v__DOT__thecpu__DOT__op_stall, |
m_core->v__DOT__thecpu__DOT__op_pc-1); ln++; |
op_pc()); ln++; |
#ifdef OPT_ILLEGAL_INSTRUCTION |
if (m_core->v__DOT__thecpu__DOT__op_illegal) |
mvprintw(ln-1,10,"I"); |
else |
#endif |
if (m_core->v__DOT__thecpu__DOT__opvalid_mem) |
mvprintw(ln-1,10,"M"); |
else if (m_core->v__DOT__thecpu__DOT__opvalid_alu) |
mvprintw(ln-1,10,"A"); |
|
showins(ln, "Al", |
m_core->v__DOT__thecpu__DOT__alu_ce, |
377,9 → 441,11
m_core->v__DOT__thecpu__DOT__alu_pc_valid, |
m_core->v__DOT__thecpu__DOT__alu_gie, |
m_core->v__DOT__thecpu__DOT__alu_stall, |
m_core->v__DOT__thecpu__DOT__alu_pc-1); ln++; |
alu_pc()); ln++; |
if (m_core->v__DOT__thecpu__DOT__wr_reg_ce) |
mvprintw(ln-1,10,"W"); |
|
mvprintw(ln-5, 48,"%s %s", |
mvprintw(ln-5, 65,"%s %s", |
(m_core->v__DOT__thecpu__DOT__op_break)?"OB":" ", |
(m_core->v__DOT__thecpu__DOT__clear_pipeline)?"CLRP":" "); |
mvprintw(ln-4, 48, |
415,7 → 481,7
(m_core->v__DOT__thecpu__DOT__mem_ce)?"CE":" ", |
(m_core->v__DOT__thecpu__DOT__mem_we)?"Wr ":"Rd ", |
(m_core->v__DOT__thecpu__DOT__mem_stalled)?"PIPE":" ", |
(m_core->v__DOT__thecpu__DOT__mem_valid)?"MEMV":" ", |
(m_core->v__DOT__thecpu__DOT__mem_valid)?"V":" ", |
zop_regstr[(m_core->v__DOT__thecpu__DOT__mem_wreg&0x1f)^0x10]); |
} |
|
599,7 → 665,7
m_core->v__DOT__thecpu__DOT__opvalid, |
m_core->v__DOT__thecpu__DOT__op_gie, |
m_core->v__DOT__thecpu__DOT__op_stall, |
m_core->v__DOT__thecpu__DOT__op_pc-1); ln++; |
op_pc()); ln++; |
|
showins(ln, "Al", |
m_core->v__DOT__thecpu__DOT__alu_ce, |
606,7 → 672,7
m_core->v__DOT__thecpu__DOT__alu_pc_valid, |
m_core->v__DOT__thecpu__DOT__alu_gie, |
m_core->v__DOT__thecpu__DOT__alu_stall, |
m_core->v__DOT__thecpu__DOT__alu_pc-1); ln++; |
alu_pc()); ln++; |
} |
void tick(void) { |
int gie = m_core->v__DOT__thecpu__DOT__gie; |
660,7 → 726,7
m_core->v__DOT__thecpu__DOT__dcd_ce, |
m_core->v__DOT__thecpu__DOT__dcd_pc, |
m_core->v__DOT__thecpu__DOT__op_ce, |
m_core->v__DOT__thecpu__DOT__op_pc, |
op_pc(), |
m_core->v__DOT__thecpu__DOT__dcdA, |
m_core->v__DOT__thecpu__DOT__opR, |
m_core->v__DOT__cmd_halt, |
751,13 → 817,13
m_core->v__DOT__thecpu__DOT__opvalid, |
m_core->v__DOT__thecpu__DOT__op_gie, |
m_core->v__DOT__thecpu__DOT__op_stall, |
m_core->v__DOT__thecpu__DOT__op_pc-1); |
op_pc()); |
dbgins("Al - ", |
m_core->v__DOT__thecpu__DOT__alu_ce, |
m_core->v__DOT__thecpu__DOT__alu_pc_valid, |
m_core->v__DOT__thecpu__DOT__alu_gie, |
m_core->v__DOT__thecpu__DOT__alu_stall, |
m_core->v__DOT__thecpu__DOT__alu_pc-1); |
alu_pc()); |
|
} |
} |
767,10 → 833,43
&&(m_core->v__DOT__thecpu__DOT__sleep)); |
} |
|
unsigned op_pc(void) { |
/* |
unsigned r = m_core->v__DOT__thecpu__DOT__dcd_pc-1; |
if (m_core->v__DOT__thecpu__DOT__dcdvalid) |
r--; |
return r; |
*/ |
return m_core->v__DOT__thecpu__DOT__op_pc-1; |
} |
|
unsigned alu_pc(void) { |
/* |
unsigned r = op_pc(); |
if (m_core->v__DOT__thecpu__DOT__opvalid) |
r--; |
return r; |
*/ |
return m_core->v__DOT__thecpu__DOT__alu_pc-1; |
} |
|
#ifdef OPT_PIPELINED_BUS_ACCESS |
int mem_pipe_stalled(void) { |
int r = 0; |
r = ((m_core->v__DOT__thecpu__DOT__mem_cyc_gbl) |
||(m_core->v__DOT__thecpu__DOT__mem_cyc_lcl)); |
r = r && ((m_core->v__DOT__thecpu__DOT__mem_stall) |
||( |
((!m_core->v__DOT__thecpu__DOT__mem_stb_gbl) |
&&(!m_core->v__DOT__thecpu__DOT__mem_stb_lcl)))); |
return r; |
// return m_core->v__DOT__thecpu__DOT__mem_pipe_stalled; |
} |
#endif |
|
bool test_failure(void) { |
return ((m_core->v__DOT__thecpu__DOT__alu_pc_valid) |
&&(m_mem[m_core->v__DOT__thecpu__DOT__alu_pc-1] |
== 0x2f0f7fff) |
&&(m_mem[alu_pc()] == 0x2f0f7fff) |
&&(!m_core->v__DOT__thecpu__DOT__clear_pipeline)); |
} |
|
1117,7 → 1216,7
halted = true; |
erase(); |
break; |
case 's': case 'S': |
case 's': |
if (!halted) |
erase(); |
tb->wb_write(CMD_REG, CMD_STEP); |
1124,10 → 1223,30
manual = false; |
halted = true; |
break; |
case 't': case 'T': |
case 'S': |
if ((!manual)||(halted)) |
erase(); |
manual = true; |
halted = true; |
tb->m_core->v__DOT__cmd_halt = 0; |
tb->m_core->v__DOT__cmd_step = 1; |
tb->eval(); |
tb->tick(); |
break; |
case 'T': // |
if ((!manual)||(halted)) |
erase(); |
manual = true; |
halted = true; |
tb->m_core->v__DOT__cmd_halt = 1; |
tb->m_core->v__DOT__cmd_step = 0; |
tb->eval(); |
tb->tick(); |
break; |
case 't': |
if ((!manual)||(halted)) |
erase(); |
manual = true; |
halted = false; |
// tb->m_core->v__DOT__thecpu__DOT__step = 0; |
// tb->m_core->v__DOT__cmd_halt = 0; |
/zipcpu/trunk/bench/cpp/Makefile
39,7 → 39,8
CXX := g++ |
FLAGS := -Wall -Og -g |
ZASM := ../../sw/zasm |
INCS := -I../../rtl/obj_dir/ -I/usr/share/verilator/include -I../../sw/zasm |
RTLD := ../../rtl |
INCS := -I$(RTLD)/obj_dir/ -I$(RTLD) -I/usr/share/verilator/include -I../../sw/zasm |
SOURCES := zippy_tb.cpp memsim.cpp twoc.cpp $(ZASM)/zopcodes.cpp $(ZASM)/zparser.cpp |
RAWLIB := /usr/share/verilator/include/verilated.cpp ../../rtl/obj_dir/Vzipsystem__ALL.a |
LIBS := $(RAWLIB) -lncurses |
46,6 → 47,7
TESTF := ../../sw/zasm/z.out |
|
zippy_tb: $(SOURCES) $(RAWLIB) $(ZASM)/zopcodes.h $(ZASM)/zparser.h testb.h |
zippy_tb: $(RTLD)/cpudefs.h |
$(CXX) $(FLAGS) $(INCS) $(SOURCES) $(LIBS) -o $@ |
|
.PHONY: stest |
/zipcpu/trunk/doc/spec.pdf
Cannot display: file marked as a binary type.
svn:mime-type = application/octet-stream
/zipcpu/trunk/doc/src/spec.tex
48,7 → 48,7
\title{Specification} |
\author{Dan Gisselquist, Ph.D.} |
\email{dgisselq (at) opencores.org} |
\revision{Rev.~0.4} |
\revision{Rev.~0.5} |
\definecolor{webred}{rgb}{0.2,0,0} |
\definecolor{webgreen}{rgb}{0,0.2,0} |
\usepackage[dvips,ps2pdf,colorlinks=true, |
76,6 → 76,7
copy. |
\end{license} |
\begin{revisionhistory} |
0.5 & 9/29/2015 & Gisselquist & Added pipelined memory access discussion.\\\hline |
0.4 & 9/19/2015 & Gisselquist & Added DMA controller, improved stall information, and self--assessment info.\\\hline |
0.3 & 8/22/2015 & Gisselquist & First completed draft\\\hline |
0.2 & 8/19/2015 & Gisselquist & Still Draft, more complete \\\hline |
411,6 → 412,25
supervisor, in supervisor mode, to determine whether it got to supervisor |
mode from a trap or from an external interrupt or both. |
|
These status register bits are summarized in Tbl.~\ref{tbl:ccbits}. |
\begin{table} |
\begin{center} |
\begin{tabular}{l|l} |
Bit & Meaning \\\hline |
9 & Soft trap, set on a trap from user mode, cleared when returning to user mode\\\hline |
8 & (Reserved for) Floating point enable \\\hline |
7 & Halt on break, to support an external debugger \\\hline |
6 & Step, single step the CPU in user mode\\\hline |
5 & GIE, or Global Interrupt Enable \\\hline |
4 & Sleep \\\hline |
3 & V, or overflow bit.\\\hline |
2 & N, or negative bit.\\\hline |
1 & C, or carry bit.\\\hline |
0 & Z, or zero bit. \\\hline |
\end{tabular} |
\caption{Condition Code / Status Register Bits}\label{tbl:ccbits} |
\end{center}\end{table} |
|
\section{Conditional Instructions} |
Most, although not quite all, instructions may be conditionally executed. From |
the four condition code flags, eight conditions are defined. These are shown |
546,17 → 566,17
Op Code & \multicolumn{8}{c|}{31\ldots24} & \multicolumn{8}{c|}{23\ldots 16} |
& \multicolumn{8}{c|}{15\ldots 8} & \multicolumn{8}{c|}{7\ldots 0} |
& Sets CC? \\\hline\hline |
CMP(Sub) & \multicolumn{4}{l|}{4'h0} |
{\tt CMP(Sub)} & \multicolumn{4}{l|}{4'h0} |
& \multicolumn{4}{l|}{D. Reg} |
& \multicolumn{3}{l|}{Cond.} |
& \multicolumn{21}{l|}{Operand B} |
& Yes \\\hline |
TST(And) & \multicolumn{4}{l|}{4'h1} |
{\tt TST(And)} & \multicolumn{4}{l|}{4'h1} |
& \multicolumn{4}{l|}{D. Reg} |
& \multicolumn{3}{l|}{Cond.} |
& \multicolumn{21}{l|}{Operand B} |
& Yes \\\hline |
MOV & \multicolumn{4}{l|}{4'h2} |
{\tt MOV} & \multicolumn{4}{l|}{4'h2} |
& \multicolumn{4}{l|}{D. Reg} |
& \multicolumn{3}{l|}{Cond.} |
& A-Usr |
564,15 → 584,15
& B-Usr |
& \multicolumn{15}{l|}{15'bit signed offset} |
& \\\hline |
LODI & \multicolumn{4}{l|}{4'h3} |
{\tt LODI} & \multicolumn{4}{l|}{4'h3} |
& \multicolumn{4}{l|}{R. Reg} |
& \multicolumn{24}{l|}{24'bit Signed Immediate} |
& \\\hline |
NOOP & \multicolumn{4}{l|}{4'h4} |
{\tt NOOP} & \multicolumn{4}{l|}{4'h4} |
& \multicolumn{4}{l|}{4'he} |
& \multicolumn{24}{l|}{24'h00} |
& \\\hline |
BREAK & \multicolumn{4}{l|}{4'h4} |
{\tt BREAK} & \multicolumn{4}{l|}{4'h4} |
& \multicolumn{4}{l|}{4'he} |
& \multicolumn{24}{l|}{24'h01} |
& \\\hline |
580,7 → 600,7
& \multicolumn{4}{l|}{4'he} |
& \multicolumn{24}{l|}{24'bits, but not 0 or 1.} |
& \\\hline |
LODIHI & \multicolumn{4}{l|}{4'h4} |
{\tt LODIHI }& \multicolumn{4}{l|}{4'h4} |
& \multicolumn{4}{l|}{4'hf} |
& \multicolumn{3}{l|}{Cond.} |
& 1'b1 |
587,7 → 607,7
& \multicolumn{4}{l|}{R. Reg} |
& \multicolumn{16}{l|}{16-bit Immediate} |
& \\\hline |
LODILO & \multicolumn{4}{l|}{4'h4} |
{\tt LODILO} & \multicolumn{4}{l|}{4'h4} |
& \multicolumn{4}{l|}{4'hf} |
& \multicolumn{3}{l|}{Cond.} |
& 1'b0 |
594,81 → 614,81
& \multicolumn{4}{l|}{R. Reg} |
& \multicolumn{16}{l|}{16-bit Immediate} |
& \\\hline |
16-b MPYU & \multicolumn{4}{l|}{4'h4} |
16-b {\tt MPYU} & \multicolumn{4}{l|}{4'h4} |
& \multicolumn{4}{l|}{R. Reg} |
& \multicolumn{3}{l|}{Cond.} |
& 1'b0 & \multicolumn{4}{l|}{Reg} |
& \multicolumn{16}{l|}{16-bit Offset} |
& Yes \\\hline |
16-b MPYU(I) & \multicolumn{4}{l|}{4'h4} |
16-b {\tt MPYU}(I) & \multicolumn{4}{l|}{4'h4} |
& \multicolumn{4}{l|}{R. Reg} |
& \multicolumn{3}{l|}{Cond.} |
& 1'b0 & \multicolumn{4}{l|}{4'hf} |
& \multicolumn{16}{l|}{16-bit Offset} |
& Yes \\\hline |
16-b MPYS & \multicolumn{4}{l|}{4'h4} |
16-b {\tt MPYS} & \multicolumn{4}{l|}{4'h4} |
& \multicolumn{4}{l|}{R. Reg} |
& \multicolumn{3}{l|}{Cond.} |
& 1'b1 & \multicolumn{4}{l|}{Reg} |
& \multicolumn{16}{l|}{16-bit Offset} |
& Yes \\\hline |
16-b MPYS(I) & \multicolumn{4}{l|}{4'h4} |
16-b {\tt MPYS}(I) & \multicolumn{4}{l|}{4'h4} |
& \multicolumn{4}{l|}{R. Reg} |
& \multicolumn{3}{l|}{Cond.} |
& 1'b1 & \multicolumn{4}{l|}{4'hf} |
& \multicolumn{16}{l|}{16-bit Offset} |
& Yes \\\hline |
ROL & \multicolumn{4}{l|}{4'h5} |
{\tt ROL} & \multicolumn{4}{l|}{4'h5} |
& \multicolumn{4}{l|}{R. Reg} |
& \multicolumn{3}{l|}{Cond.} |
& \multicolumn{21}{l|}{Operand B, truncated to low order 5 bits} |
& \\\hline |
LOD & \multicolumn{4}{l|}{4'h6} |
{\tt LOD} & \multicolumn{4}{l|}{4'h6} |
& \multicolumn{4}{l|}{R. Reg} |
& \multicolumn{3}{l|}{Cond.} |
& \multicolumn{21}{l|}{Operand B address} |
& \\\hline |
STO & \multicolumn{4}{l|}{4'h7} |
{\tt STO} & \multicolumn{4}{l|}{4'h7} |
& \multicolumn{4}{l|}{D. Reg} |
& \multicolumn{3}{l|}{Cond.} |
& \multicolumn{21}{l|}{Operand B address} |
& \\\hline |
SUB & \multicolumn{4}{l|}{4'h8} |
{\tt SUB} & \multicolumn{4}{l|}{4'h8} |
& \multicolumn{4}{l|}{R. Reg} |
& \multicolumn{3}{l|}{Cond.} |
& \multicolumn{21}{l|}{Operand B} |
& Yes \\\hline |
AND & \multicolumn{4}{l|}{4'h9} |
{\tt AND} & \multicolumn{4}{l|}{4'h9} |
& \multicolumn{4}{l|}{R. Reg} |
& \multicolumn{3}{l|}{Cond.} |
& \multicolumn{21}{l|}{Operand B} |
& Yes \\\hline |
ADD & \multicolumn{4}{l|}{4'ha} |
{\tt ADD} & \multicolumn{4}{l|}{4'ha} |
& \multicolumn{4}{l|}{R. Reg} |
& \multicolumn{3}{l|}{Cond.} |
& \multicolumn{21}{l|}{Operand B} |
& Yes \\\hline |
OR & \multicolumn{4}{l|}{4'hb} |
{\tt OR} & \multicolumn{4}{l|}{4'hb} |
& \multicolumn{4}{l|}{R. Reg} |
& \multicolumn{3}{l|}{Cond.} |
& \multicolumn{21}{l|}{Operand B} |
& Yes \\\hline |
XOR & \multicolumn{4}{l|}{4'hc} |
{\tt XOR} & \multicolumn{4}{l|}{4'hc} |
& \multicolumn{4}{l|}{R. Reg} |
& \multicolumn{3}{l|}{Cond.} |
& \multicolumn{21}{l|}{Operand B} |
& Yes \\\hline |
LSL/ASL & \multicolumn{4}{l|}{4'hd} |
{\tt LSL/ASL} & \multicolumn{4}{l|}{4'hd} |
& \multicolumn{4}{l|}{R. Reg} |
& \multicolumn{3}{l|}{Cond.} |
& \multicolumn{21}{l|}{Operand B, imm. truncated to 6 bits} |
& Yes \\\hline |
ASR & \multicolumn{4}{l|}{4'he} |
{\tt ASR} & \multicolumn{4}{l|}{4'he} |
& \multicolumn{4}{l|}{R. Reg} |
& \multicolumn{3}{l|}{Cond.} |
& \multicolumn{21}{l|}{Operand B, imm. truncated to 6 bits} |
& Yes \\\hline |
LSR & \multicolumn{4}{l|}{4'hf} |
{\tt LSR} & \multicolumn{4}{l|}{4'hf} |
& \multicolumn{4}{l|}{R. Reg} |
& \multicolumn{3}{l|}{Cond.} |
& \multicolumn{21}{l|}{Operand B, imm. truncated to 6 bits} |
692,51 → 712,49
\begin{table}\begin{center} |
\begin{tabular}{p{1.4in}p{1.5in}p{3in}}\\\hline |
Mapped & Actual & Notes \\\hline |
ABS Rx |
& \parbox[t]{1.5in}{TST -1,Rx\\NEG.LT Rx} |
{\tt ABS Rx} |
& \parbox[t]{1.5in}{\tt TST -1,Rx\\NEG.LT Rx} |
& Absolute value, depends upon derived NEG.\\\hline |
\parbox[t]{1.4in}{ADD Ra,Rx\\ADDC Rb,Ry} |
& \parbox[t]{1.5in}{Add Ra,Rx\\ADD.C \$1,Ry\\Add Rb,Ry} |
\parbox[t]{1.4in}{\tt ADD Ra,Rx\\ADDC Rb,Ry} |
& \parbox[t]{1.5in}{\tt Add Ra,Rx\\ADD.C \$1,Ry\\Add Rb,Ry} |
& Add with carry \\\hline |
BRA.Cond +/-\$Addr |
& \hbox{MOV.cond \$Addr+PC,PC} |
{\tt BRA.Cond +/-\$Addr} |
& \hbox{\tt MOV.cond \$Addr+PC,PC} |
& Branch or jump on condition. Works for 15--bit |
signed address offsets.\\\hline |
BRA.Cond +/-\$Addr |
& \parbox[t]{1.5in}{LDI \$Addr,Rx \\ ADD.cond Rx,PC} |
{\tt BRA.Cond +/-\$Addr} |
& \parbox[t]{1.5in}{\tt LDI \$Addr,Rx \\ ADD.cond Rx,PC} |
& Branch/jump on condition. Works for |
23 bit address offsets, but costs a register, an extra instruction, |
and sets the flags. \\\hline |
BNC PC+\$Addr |
& \parbox[t]{1.5in}{Test \$Carry,CC \\ MOV.Z PC+\$Addr,PC} |
{\tt BNC PC+\$Addr} |
& \parbox[t]{1.5in}{\tt Test \$Carry,CC \\ MOV.Z PC+\$Addr,PC} |
& Example of a branch on an unsupported |
condition, in this case a branch on not carry \\\hline |
BUSY & MOV \$-1(PC),PC & Execute an infinite loop \\\hline |
CLRF.NZ Rx |
& XOR.NZ Rx,Rx |
{\tt BUSY } & {\tt MOV \$-1(PC),PC} & Execute an infinite loop \\\hline |
{\tt CLRF.NZ Rx } |
& {\tt XOR.NZ Rx,Rx} |
& Clear Rx, and flags, if the Z-bit is not set \\\hline |
CLR Rx |
& LDI \$0,Rx |
{\tt CLR Rx } |
& {\tt LDI \$0,Rx} |
& Clears Rx, leaves flags untouched. This instruction cannot be |
conditional. \\\hline |
EXCH.W Rx |
& ROL \$16,Rx |
{\tt EXCH.W Rx } |
& {\tt ROL \$16,Rx} |
& Exchanges the top and bottom 16'bit words of Rx \\\hline |
HALT |
& Or \$SLEEP,CC |
& Executed while in interrupt mode. In user mode this is simply a |
wait until interrupt instruction. \\\hline |
INT & LDI \$0,CC |
& Since we're using the CC register as a trap vector as well, this |
executes TRAP \#0. \\\hline |
IRET |
& OR \$GIE,CC |
& Also an RTU instruction (Return to Userspace) \\\hline |
JMP R6+\$Addr |
& MOV \$Addr(R6),PC |
{\tt HALT } |
& {\tt Or \$SLEEP,CC} |
& This only works when issued in interrupt/supervisor mode. In user |
mode this is simply a wait until interrupt instruction. \\\hline |
{\tt INT } & {\tt LDI \$0,CC} & \\\hline |
{\tt IRET} |
& {\tt OR \$GIE,CC} |
& Also known as an RTU instruction (Return to Userspace) \\\hline |
{\tt JMP R6+\$Addr} |
& {\tt MOV \$Addr(R6),PC} |
& \\\hline |
JSR PC+\$Addr |
& \parbox[t]{1.5in}{SUB \$1,SP \\\ |
{\tt JSR PC+\$Addr} |
& \parbox[t]{1.5in}{\tt SUB \$1,SP \\\ |
MOV \$3+PC,R0 \\ |
STO R0,1(SP) \\ |
MOV \$Addr+PC,PC \\ |
746,16 → 764,18
operand, removing the preliminary stack instruction before and |
the cleanup after, by adjusting how any stack frame was built for |
this routine to include space at the top of the stack for the PC. |
Note also that jumping to a subroutine costs a copy register, {\tt R0} |
in this case. |
\\\hline |
JSR PC+\$Addr |
& \parbox[t]{1.5in}{MOV \$3+PC,R12 \\ MOV \$addr+PC,PC} |
{\tt JSR PC+\$Addr } |
& \parbox[t]{1.5in}{\tt MOV \$3+PC,R12 \\ MOV \$addr+PC,PC} |
&This is the high speed |
version of a subroutine call, necessitating a register to hold the |
last PC address. In its favor, this method doesn't suffer the |
mandatory memory access of the other approach. \\\hline |
LDI.l \$val,Rx |
& \parbox[t]{1.5in}{LDIHI (\$val$>>$16)\&0x0ffff, Rx \\ |
LDILO (\$val \& 0x0ffff)} |
{\tt LDI.l \$val,Rx } |
& \parbox[t]{1.8in}{\tt LDIHI (\$val$>>$16)\&0x0ffff, Rx \\ |
LDILO (\$val\&0x0ffff),Rx} |
& Sadly, there's not enough instruction |
space to load a complete immediate value into any register. |
Therefore, fully loading any register takes two cycles. |
767,8 → 787,8
\begin{table}\begin{center} |
\begin{tabular}{p{1.4in}p{1.5in}p{3in}}\\\hline |
Mapped & Actual & Notes \\\hline |
LOD.b \$addr,Rx |
& \parbox[t]{1.5in}{% |
{\tt LOD.b \$addr,Rx} |
& \parbox[t]{1.5in}{\tt % |
LDI \$addr,Ra \\ |
LDI \$addr,Rb \\ |
LSR \$2,Ra \\ |
788,8 → 808,8
we needed to drop the bottom two bits. This also limits the address |
space of character accesses using this method from 16 MB down to 4MB.} |
\\\hline |
\parbox[t]{1.5in}{LSL \$1,Rx\\ LSLC \$1,Ry} |
& \parbox[t]{1.5in}{LSL \$1,Ry \\ |
\parbox[t]{1.5in}{\tt LSL \$1,Rx\\ LSLC \$1,Ry} |
& \parbox[t]{1.5in}{\tt LSL \$1,Ry \\ |
LSL \$1,Rx \\ |
OR.C \$1,Ry} |
& Logical shift left with carry. Note that the |
797,23 → 817,23
That is, LSL sets the carry flag, so if we did this the other way |
with Rx before Ry, then the condition flag wouldn't have been right |
for an OR correction at the end. \\\hline |
\parbox[t]{1.5in}{LSR \$1,Rx \\ LSRC \$1,Ry} |
& \parbox[t]{1.5in}{CLR Rz \\ |
\parbox[t]{1.5in}{\tt LSR \$1,Rx \\ LSRC \$1,Ry} |
& \parbox[t]{1.5in}{\tt CLR Rz \\ |
LSR \$1,Ry \\ |
LDIHI.C \$8000h,Rz \\ |
LSR \$1,Rx \\ |
OR Rz,Rx} |
& Logical shift right with carry \\\hline |
NEG Rx & \parbox[t]{1.5in}{XOR \$-1,Rx \\ ADD \$1,Rx} & \\\hline |
NEG.C Rx & \parbox[t]{1.5in}{MOV.C \$-1+Rx,Rx\\XOR.C \$-1,Rx} & \\\hline |
NOOP & NOOP & While there are many |
{\tt NEG Rx} & \parbox[t]{1.5in}{\tt XOR \$-1,Rx \\ ADD \$1,Rx} & \\\hline |
{\tt NEG.C Rx} & \parbox[t]{1.5in}{\tt MOV.C \$-1+Rx,Rx\\XOR.C \$-1,Rx} & \\\hline |
{\tt NOOP} & {\tt NOOP} & While there are many |
operations that do nothing, such as MOV Rx,Rx, or OR \$0,Rx, these |
operations have consequences in that they might stall the bus if |
Rx isn't ready yet. For this reason, we have a dedicated NOOP |
instruction. \\\hline |
NOT Rx & XOR \$-1,Rx & \\\hline |
POP Rx |
& \parbox[t]{1.5in}{LOD \$1(SP),Rx \\ ADD \$1,SP} |
{\tt NOT Rx } & {\tt XOR \$-1,Rx } & \\\hline |
{\tt POP Rx } |
& \parbox[t]{1.5in}{\tt LOD \$1(SP),Rx \\ ADD \$1,SP} |
& Note |
that for interrupt purposes, one can never depend upon the value at |
(SP). Hence you read from it, then increment it, lest having |
824,12 → 844,14
\end{center}\end{table} |
\begin{table}\begin{center} |
\begin{tabular}{p{1.4in}p{1.5in}p{3in}}\\\hline |
PUSH Rx |
{\tt PUSH Rx} |
& \parbox[t]{1.5in}{SUB \$1,SP \\ |
STO Rx,\$1(SP)} |
& \\\hline |
PUSH Rx-Ry |
& \parbox[t]{1.5in}{SUB \$n,SP \\ |
& Note that for pipelined operation, it helps to coalesce all the |
{\tt SUB}'s into one command, and place the {\tt STO}'s right |
after each other.\\\hline |
{\tt PUSH Rx-Ry} |
& \parbox[t]{1.5in}{\tt SUB \$n,SP \\ |
STO Rx,\$n(SP) |
\ldots \\ |
STO Ry,\$1(SP)} |
836,27 → 858,28
& Multiple pushes at once only need the single subtract from the |
stack pointer. This derived instruction is analogous to a similar one |
on the Motoroloa 68k architecture, although the Zip Assembler |
does not support this instruction (yet).\\\hline |
RESET |
& \parbox[t]{1in}{STO \$1,\$watchdog(R12)\\NOOP\\NOOP} |
& \parbox[t]{3in}{This depends upon the peripheral base address being |
does not support this instruction (yet). This instruction |
also supports pipelined memory access.\\\hline |
{\tt RESET} |
& \parbox[t]{1in}{\tt STO \$1,\$watchdog(R12)\\NOOP\\NOOP} |
& This depends upon the peripheral base address being |
in R12. |
|
Another opportunity might be to jump to the reset address from within |
supervisor mode.}\\\hline |
RET & \parbox[t]{1.5in}{LOD \$1(SP),PC} |
supervisor mode.\\\hline |
{\tt RET} & \parbox[t]{1.5in}{\tt LOD \$1(SP),PC} |
& Note that this depends upon the calling context to clean up the |
stack, as outlined for the JSR instruction. \\\hline |
RET & MOV R12,PC |
{\tt RET} & {\tt MOV R12,PC} |
& This is the high(er) speed version, that doesn't touch the stack. |
As such, it doesn't suffer a stall on memory read/write to the stack. |
\\\hline |
STEP Rr,Rt |
& \parbox[t]{1.5in}{LSR \$1,Rr \\ XOR.C Rt,Rr} |
{\tt STEP Rr,Rt} |
& \parbox[t]{1.5in}{\tt LSR \$1,Rr \\ XOR.C Rt,Rr} |
& Step a Galois implementation of a Linear Feedback Shift Register, Rr, |
using taps Rt \\\hline |
STO.b Rx,\$addr |
& \parbox[t]{1.5in}{% |
{\tt STO.b Rx,\$addr} |
& \parbox[t]{1.5in}{\tt % |
LDI \$addr,Ra \\ |
LDI \$addr,Rb \\ |
LSR \$2,Ra \\ |
864,7 → 887,7
SUB \$32,Rb \\ |
LOD (Ra),Ry \\ |
AND \$0ffh,Rx \\ |
AND \$-0ffh,Ry \\ |
AND \~\$0ffh,Ry \\ |
ROL Rb,Rx \\ |
OR Rx,Ry \\ |
STO Ry,(Ra) } |
877,15 → 900,15
of character accesses from 16 MB down to 4MB.F |
Further, this instruction implies a byte ordering, |
such as big or little endian.} \\\hline |
SWAP Rx,Ry |
& \parbox[t]{1.5in}{ |
{\tt SWAP Rx,Ry } |
& \parbox[t]{1.5in}{\tt |
XOR Ry,Rx \\ |
XOR Rx,Ry \\ |
XOR Ry,Rx} |
& While no extra registers are needed, this example |
does take 3-clocks. \\\hline |
TRAP \#X |
& \parbox[t]{1.5in}{LDI \$x,R0 \\ AND ~\$GIE,CC } |
{\tt TRAP \#X} |
& \parbox[t]{1.5in}{\tt LDI \$x,R0 \\ AND \~\$GIE,CC } |
& This works because whenever a user lowers the \$GIE flag, it sets |
a TRAP bit within the CC register. Therefore, upon entering the |
supervisor state, the CPU only need check this bit to know that it |
898,16 → 921,16
\end{center}\end{table} |
\begin{table}\begin{center} |
\begin{tabular}{p{1.4in}p{1.5in}p{3in}}\\\hline |
TST Rx |
& TST \$-1,Rx |
{\tt TST Rx} |
& {\tt TST \$-1,Rx} |
& Set the condition codes based upon Rx. Could also do a CMP \$0,Rx, |
ADD \$0,Rx, SUB \$0,Rx, etc, AND \$-1,Rx, etc. The TST and CMP |
approaches won't stall future pipeline stages looking for the value |
of Rx. \\\hline |
WAIT |
& Or \$SLEEP,CC |
& Wait 'til interrupt. In an interrupts disabled context, this |
becomes a HALT instruction. |
{\tt WAIT} |
& {\tt Or \$GIE | \$SLEEP,CC} |
& Wait until the next interrupt, then jump to supervisor/interrupt |
mode. |
\end{tabular} |
\caption{Derived Instructions, continued}\label{tbl:derived-4} |
\end{center}\end{table} |
1073,9 → 1096,13
memory unit is busy with the STO instruction, but otherwise this pipeline will |
stall waiting for it to complete. |
|
Note that even though the Wishbone bus can support pipelined accesses at |
one access per clock, only the prefetch stage can take advantage of this. |
Load and Store instructions are stuck at one wishbone cycle per instruction. |
The Zip CPU does have the capability of supporting pipelined memory access, |
but only under the following conditions: all accesses within the pipeline |
must all be reads or all be writes, all must use the same register for their |
address, and there can be no stalls or other instructions between pipelined |
memory access instructions. Further, the offset to memory must be increasing |
by one address each instruction. These conditions work well for saving or |
storing registers to the stack. |
|
\item When waiting for a conditional memory read operation to complete |
\begin{enumerate} |
1235,7 → 1262,7
|
When coupled with a peripheral, the DMA controller can be configured to start |
a memory copy on an interrupt line going high. Further, the controller can be |
configured to issue reads from (or two) the same address instead of incrementing |
configured to issue reads from (or to) the same address instead of incrementing |
the address at each clock. The DMA completes once the total number of items |
specified (not the transfer length) have been transferred. |
|
1402,19 → 1429,43
\begin{table}\begin{center} |
\begin{tabular}{ll} |
{\tt swap\_out:} \\ |
& {\tt MOV -15(uSP),R1} \\ |
& {\tt STO R1,stack(R12)} \\ |
& {\tt MOV uPC,R0} \\ |
& {\tt STO R0,15(R1)} \\ |
& {\tt MOV uCC,R0} \\ |
& {\tt STO R0,14(R1)} \\ |
& {\tt MOV -15(uSP),R5} \\ |
& {\tt STO R5,stack(R12)} \\ |
& {\tt MOV uR0,R0} \\ |
& {\tt MOV uR1,R1} \\ |
& {\tt MOV uR2,R2} \\ |
& {\tt MOV uR3,R3} \\ |
& {\tt MOV uR4,R4} \\ |
& {\tt STO R0,1(R5)} {\em ; Exploit memory pipelining: }\\ |
& {\tt STO R1,2(R5)} {\em ; All instructions write to stack }\\ |
& {\tt STO R2,3(R5)} {\em ; All offsets increment by one }\\ |
& {\tt STO R3,4(R5)} {\em ; Longest pipeline is 5 cycles.}\\ |
& {\tt STO R4,5(R5)} \\ |
& \ldots {\em ; Need to repeat for all user registers} \\ |
\iffalse |
& {\tt MOV uR5,R0} \\ |
& {\tt MOV uR6,R1} \\ |
& {\tt MOV uR7,R2} \\ |
& {\tt MOV uR8,R3} \\ |
& {\tt MOV uR9,R4} \\ |
& {\tt STO R0,6(R5) }\\ |
& {\tt STO R1,7(R5) }\\ |
& {\tt STO R2,8(R5) }\\ |
& {\tt STO R3,9(R5) }\\ |
& {\tt STO R4,10(R5)} \\ |
\fi |
& {\tt MOV uR10,R0} \\ |
& {\tt MOV uR11,R1} \\ |
& {\tt MOV uR12,R2} \\ |
& {\tt MOV uCC,R3} \\ |
& {\tt MOV uPC,R4} \\ |
& {\tt STO R0,11(R5)}\\ |
& {\tt STO R1,12(R5)}\\ |
& {\tt STO R2,13(R5)}\\ |
& {\tt STO R3,14(R5)}\\ |
& {\tt STO R4,15(R5)} \\ |
& {\em ; We can skip storing the stack, uSP, since it'll be stored}\\ |
& {\em ; elsewhere (in the task structure) }\\ |
& {\tt MOV uR13,R0} \\ |
& {\tt STO R0,13(R1)} \\ |
& \ldots {\em ; Need to repeat for all user registers} \\ |
& {\tt MOV uR0,R0} \\ |
& {\tt STO R0,1(R1)} \\ |
\end{tabular} |
\caption{Example Storing User Task Context}\label{tbl:context-out} |
\end{center}\end{table} |
1509,17 → 1560,31
\begin{table}\begin{center} |
\begin{tabular}{ll} |
{\tt swap\_in:} \\ |
& {\tt LOD stack(R12),R1} \\ |
& {\tt LOD stack(R12),R5} \\ |
& {\tt MOV 15(R1),uSP} \\ |
& {\tt LOD 15(R1),R0} \\ |
& {\tt MOV R0,uPC} \\ |
& {\tt LOD 14(R1),R0} \\ |
& {\tt MOV R0,uCC} \\ |
& {\tt LOD 13(R1),R0} \\ |
& {\tt MOV R0,uR12} \\ |
& {\em ; Be sure to exploit the memory pipelining capability} \\ |
& {\tt LOD 1(R5),R0} \\ |
& {\tt LOD 2(R5),R1} \\ |
& {\tt LOD 3(R5),R2} \\ |
& {\tt LOD 4(R5),R3} \\ |
& {\tt LOD 5(R5),R4} \\ |
& {\tt MOV R0,uR0} \\ |
& {\tt MOV R1,uR1} \\ |
& {\tt MOV R2,uR2} \\ |
& {\tt MOV R3,uR3} \\ |
& {\tt MOV R4,uR4} \\ |
& \ldots {\em ; Need to repeat for all user registers} \\ |
& {\tt LOD 1(R1),R0} \\ |
& {\tt MOV R0,uR0} \\ |
& {\tt LOD 11(R5),R0} \\ |
& {\tt LOD 12(R5),R1} \\ |
& {\tt LOD 13(R5),R2} \\ |
& {\tt LOD 14(R5),R3} \\ |
& {\tt LOD 15(R5),R4} \\ |
& {\tt MOV R0,uR10} \\ |
& {\tt MOV R1,uR11} \\ |
& {\tt MOV R2,uR12} \\ |
& {\tt MOV R3,uCC} \\ |
& {\tt MOV R4,uPC} \\ |
|
& {\tt BRA return\_to\_user} \\ |
\end{tabular} |
\caption{Example Restoring User Task Context}\label{tbl:context-in} |
1716,9 → 1781,10
\begin{table}\begin{center} |
\begin{bitlist} |
31 & R & DMA Active\\\hline |
30 & R & Wishbone error, transaction aborted (cleared on any write)\\\hline |
30 & R & Wishbone error, transaction aborted. This bit is cleared the next time |
this register is written to.\\\hline |
29 & R/W & Set to '1' to prevent the controller from incrementing the source address, '0' for normal memory copy. \\\hline |
28 & R/W & Set to '0' to prevent the controller from incrementing the |
28 & R/W & Set to '1' to prevent the controller from incrementing the |
destination address, '0' for normal memory copy. \\\hline |
27 \ldots 16 & W & The DMA Key. Write a 12'hfed to these bits to start the |
activate any DMA transfer. \\\hline |
1795,7 → 1861,6
uPC & 31 & 32 & R/W & User Program Counter\\\hline |
PIC & 32 & 32 & R/W & Primary Interrupt Controller \\\hline |
WDT & 33 & 32 & R/W & Watchdog Timer\\\hline |
CCHE & 34 & 32 & R/W & Manual Cache Controller\\\hline |
CTRIC & 35 & 32 & R/W & Secondary Interrupt Controller\\\hline |
TMRA & 36 & 32 & R/W & Timer A\\\hline |
TMRB & 37 & 32 & R/W & Timer B\\\hline |
1809,6 → 1874,10
UMSTL & 45 & 32 & R/W & User memory stall counter\\\hline |
UPSTL & 46 & 32 & R/W & User Pre-Fetch Stall counter\\\hline |
UICNT & 47 & 32 & R/W & User instruction counter\\\hline |
DMACMD & 48 & 32 & R/W & DMA command and status register\\\hline |
DMALEN & 49 & 32 & R/W & DMA transfer length\\\hline |
DMARD & 50 & 32 & R/W & DMA read address\\\hline |
DMAWR & 51 & 32 & R/W & DMA write address\\\hline |
\end{reglist} |
\caption{Debug Register Addresses}\label{tbl:dbgaddrs} |
\end{center}\end{table} |
2115,13 → 2184,14
realized and at this rate may not be realized. (I've been intimidated |
by the challenge everytime I've looked through those codes.) |
|
\iffalse |
\item While the Wishbone Bus (B4) supports a pipelined mode with single cycle |
execution, the Zip CPU is unable to exploit this parallelism. Instead, |
apart from the DMA and the pipelined prefetch, all loads and stores |
are single wishbone bus operations requiring a minimum of 3 clocks. |
(In practice, this has turned into 7-clocks.) |
% Addressed, 20150929 |
|
\iffalse |
\item There is no control over whether or not an instruction sets the |
condition codes--certain instructions always set the condition codes, |
other instructions never set them. This effectively limits conditional |
2173,6 → 2243,17
off, keeping the CPU lightweight? The same holds for the prefetch |
cache. |
|
\item The `{\tt .V}' condition was never used in any code other than my test |
code. Suggest changing it to a `{\tt .LE}' condition, which seems |
to be more useful. |
|
\item {\bf Consider a more traditional Instruction Cache.} The current |
pipelined instruction cache just reads a window of memory into |
its cache. If the CPU leaves that window, the entire cache is |
invalidated. A more traditional cache, however, might allow |
common subroutines to stay within the cache without invalidating the |
entire cache structure. |
|
\iffalse |
\item {\bf Adjust the Zip CPU so that conditional instructions do not set |
flags}, although they may explicitly set condition codes if writing |
2180,13 → 2261,7
|
This is a simple change to the core, and may show up in new releases. |
% Fixed, 20150918 |
\fi |
|
\item The `{\tt .V}' condition was never used in any code other than my test |
code. Suggest changing it to a `{\tt .LE}' condition, which seems |
to be more useful. |
|
\iffalse |
\item Add in an {\bf unpredictable branch delay slot}, so that on any branch |
the delay slot may or may not be executed before the branch. |
Instructions that do not depend upon the branch, and that should be |
2226,18 → 2301,8
as soon as the decoder knows the instruction will need the bus. |
Indeed, if done in the decode stage, this might drop the seven cycle |
access down by two cycles. |
|
% FIXED: 20150918 |
\fi |
|
\item {\bf Consider a more traditional Instruction Cache.} The current |
pipelined instruction cache just reads a window of memory into |
its cache. If the CPU leaves that window, the entire cache is |
invalidated. A more traditional cache, however, might allow |
common subroutines to stay within the cache without invalidating the |
entire cache structure. |
|
\iffalse |
\item {\bf Very Long Instruction Word (VLIW).} Now, to speed up operation, I |
propose that the Zip CPU instruction set be modified towards a Very |
Long Instruction Word (VLIW) implementation. In this implementation, |
/zipcpu/trunk/sw/zasm/test.S
519,6 → 519,33
cmp r0,r7 |
trap.ne r11 |
#endif |
|
#define PIPELINE_STACK_TEST |
#ifdef PIPELINE_STACK_TEST |
ldi $0x0f000,r11 // Mark our test |
LDI 1,R0 |
MOV 1(R0),R1 |
MOV 1(R1),R2 |
MOV 1(R2),R3 |
MOV 1(R3),R4 |
MOV 1(R4),R5 |
MOV 1(R5),R6 |
JSR(pipeline_stack_test,R7) |
CMP 1,R0 |
trap.ne R11 |
CMP 2,R1 |
trap.ne R11 |
CMP 3,R2 |
trap.ne R11 |
CMP 4,R3 |
trap.ne R11 |
CMP 5,R4 |
trap.ne R11 |
CMP 6,R5 |
trap.ne R11 |
CMP 7,R6 |
trap.ne R11 |
#endif |
// Return success / Test the trap interrupt |
clr r11 |
trap r11 |
548,6 → 575,53
POP(R1,SP) |
RET |
#endif |
|
#ifdef PIPELINE_STACK_TEST |
pipeline_stack_test: |
SUB 13,SP |
STO R0,1(SP) |
STO R1,2(SP) |
STO R2,3(SP) |
STO R3,4(SP) |
STO R4,5(SP) |
STO R5,6(SP) |
STO R6,7(SP) |
STO R7,8(SP) |
STO R8,9(SP) |
STO R9,10(SP) |
STO R10,11(SP) |
STO R11,12(SP) |
STO R12,13(SP) |
XOR -1,R0 |
XOR -1,R1 |
XOR -1,R2 |
XOR -1,R3 |
XOR -1,R4 |
XOR -1,R5 |
XOR -1,R6 |
XOR -1,R7 |
XOR -1,R8 |
XOR -1,R9 |
XOR -1,R10 |
XOR -1,R11 |
XOR -1,R12 |
LOD 1(SP),R0 |
LOD 2(SP),R1 |
LOD 3(SP),R2 |
LOD 4(SP),R3 |
LOD 5(SP),R4 |
LOD 6(SP),R5 |
LOD 7(SP),R6 |
LOD 8(SP),R7 |
LOD 9(SP),R8 |
LOD 10(SP),R9 |
LOD 11(SP),R10 |
LOD 12(SP),R11 |
LOD 13(SP),R12 |
ADD 13,SP |
LOD 1(SP),PC |
#endif // PIPELINE_STACK_TEST |
|
fill 512,0 |
stack: // Must point to a valid word initially |
word 0 |