URL
https://opencores.org/ocsvn/zipcpu/zipcpu/trunk
Subversion Repositories zipcpu
Compare Revisions
- This comparison shows the changes necessary to convert path
/zipcpu/trunk
- from Rev 56 to Rev 55
- ↔ Reverse comparison
Rev 56 → Rev 55
/rtl/cpudefs.v
File deleted
/rtl/core/pipemem.v
77,23 → 77,18
if ((i_rst)||(i_wb_err)) |
wraddr <= 0; |
else if (i_pipe_stb) |
wraddr <= wraddr + 4'h1; |
wraddr <= wraddr + 1; |
always @(posedge i_clk) |
if ((i_rst)||(i_wb_err)) |
rdaddr <= 0; |
else if ((i_wb_ack)&&(cyc)) |
rdaddr <= rdaddr + 4'h1; |
assign nxt_rdaddr = rdaddr + 4'h1; |
else if ((i_wb_ack)&&((o_wb_cyc_gbl)||(o_wb_cyc_lcl))) |
rdaddr <= rdaddr + 1; |
assign nxt_rdaddr = rdaddr + 1; |
|
reg cyc; |
wire gbl_stb, lcl_stb; |
assign lcl_stb = (i_addr[31:8]==24'hc00000)&&(i_addr[7:5]==3'h0); |
assign gbl_stb = (~lcl_stb); |
//= ((i_addr[31:8]!=24'hc00000)||(i_addr[7:5]!=3'h0)); |
assign gbl_stb = ((i_addr[31:8]!=24'hc00000)||(i_addr[7:5]!=3'h0)); |
|
initial cyc = 0; |
initial o_wb_cyc_lcl = 0; |
initial o_wb_cyc_gbl = 0; |
always @(posedge i_clk) |
if (i_rst) |
begin |
101,8 → 96,7
o_wb_cyc_lcl <= 1'b0; |
o_wb_stb_gbl <= 1'b0; |
o_wb_stb_lcl <= 1'b0; |
cyc <= 1'b0; |
end else if (cyc) |
end else if ((o_wb_cyc_gbl)||(o_wb_cyc_lcl)) |
begin |
if ((~i_wb_stall)&&(~i_pipe_stb)) |
begin |
110,8 → 104,8
o_wb_stb_lcl <= 1'b0; |
end else if ((i_pipe_stb)&&(~i_wb_stall)) |
begin |
// o_wb_addr <= i_addr[(AW-1):0]; |
// o_wb_data <= i_data; |
o_wb_addr <= i_addr[(AW-1):0]; |
o_wb_data <= i_data; |
end |
|
if (((i_wb_ack)&&(nxt_rdaddr == wraddr))||(i_wb_err)) |
118,7 → 112,6
begin |
o_wb_cyc_gbl <= 1'b0; |
o_wb_cyc_lcl <= 1'b0; |
cyc <= 1'b0; |
end |
end else if (i_pipe_stb) // New memory operation |
begin // Grab the wishbone |
126,33 → 119,23
o_wb_cyc_gbl <= gbl_stb; |
o_wb_stb_lcl <= lcl_stb; |
o_wb_stb_gbl <= gbl_stb; |
cyc <= 1'b1; |
// o_wb_addr <= i_addr[(AW-1):0]; |
// o_wb_data <= i_data; |
// o_wb_we <= i_op |
end |
always @(posedge i_clk) |
if ((cyc)&&(i_pipe_stb)&&(~i_wb_stall)) |
begin |
o_wb_addr <= i_addr[(AW-1):0]; |
o_wb_data <= i_data; |
end else if ((~cyc)&&(i_pipe_stb)) |
begin |
o_wb_addr <= i_addr[(AW-1):0]; |
o_wb_data <= i_data; |
// o_wb_we <= i_op |
end |
|
always @(posedge i_clk) |
if ((i_pipe_stb)&&(~cyc)) |
if ((i_pipe_stb) |
&&((~i_wb_stall) |
||((~o_wb_cyc_gbl)&&(~o_wb_cyc_lcl)))) |
o_wb_we <= i_op; |
|
initial o_valid = 1'b0; |
always @(posedge i_clk) |
o_valid <= (cyc)&&(i_wb_ack)&&(~o_wb_we); |
o_valid <= ((o_wb_cyc_gbl)||(o_wb_cyc_lcl))&&(i_wb_ack)&&(~o_wb_we); |
initial o_err = 1'b0; |
always @(posedge i_clk) |
o_err <= (cyc)&&(i_wb_err); |
assign o_busy = cyc; |
o_err <= ((o_wb_cyc_gbl)||(o_wb_cyc_lcl))&&(i_wb_err); |
assign o_busy = (o_wb_cyc_gbl)||(o_wb_cyc_lcl); |
|
always @(posedge i_clk) |
o_wreg <= fifo_oreg[rdaddr]; |
160,6 → 143,6
if (i_wb_ack) |
o_result <= i_wb_data; |
|
assign o_pipe_stalled = (cyc) |
assign o_pipe_stalled = ((o_wb_cyc_gbl)||(o_wb_cyc_lcl)) |
&&((i_wb_stall)||((~o_wb_stb_lcl)&&(~o_wb_stb_gbl))); |
endmodule |
/rtl/core/pipefetch.v
87,8 → 87,7
reg [(LGCACHELEN):0] r_nvalid, r_acks_waiting; |
reg [(BUSW-1):0] cache[0:(CACHELEN-1)]; |
|
wire [(LGCACHELEN-1):0] w_cache_offset; |
reg [1:0] r_cache_offset; |
reg [(LGCACHELEN-1):0] r_cache_offset; |
|
reg r_addr_set; |
reg [(AW-1):0] r_addr; |
109,11 → 108,8
||(r_addr >= r_cache_base + bus_nvalid+5))); |
wire w_running_out_of_cache; |
assign w_running_out_of_cache = (r_addr_set) |
&&(r_addr >= r_cache_base + |
// {{(AW-LGCACHELEN-1),{1'b0}},2'b11, |
// {(LGCACHELEN-1){1'b0}}}) |
// (1<<(LGCACHELEN-2)) + (1<<(LGCACHELEN-1))) |
+(3<<(LGCACHELEN-2))) |
&&(r_addr >= r_cache_base + (1<<(LGCACHELEN-2)) |
+ (1<<(LGCACHELEN-1))) |
&&(|r_nvalid[(LGCACHELEN):(LGCACHELEN-1)]); |
|
initial r_cache_base = RESET_ADDRESS; |
155,7 → 151,7
// o_wb_addr <= (i_new_pc) ? i_pc : r_addr; |
// r_nvalid <= 0; |
// r_cache_base <= (i_new_pc) ? i_pc : r_addr; |
// w_cache_offset <= 0; |
// r_cache_offset <= 0; |
end else if ((~o_wb_cyc)&&(w_running_out_of_cache)) |
begin |
// If we're using the last quarter of the cache, then |
165,7 → 161,7
// o_wb_addr <= r_cache_base + (1<<(LGCACHELEN)); |
// r_nvalid <= r_nvalid - (1<<(LGCACHELEN-2)); |
// r_cache_base <= r_cache_base + (1<<(LGCACHELEN-2)); |
// w_cache_offset <= w_cache_offset + (1<<(LGCACHELEN-2)); |
// r_cache_offset <= r_cache_offset + (1<<(LGCACHELEN-2)); |
end else if (o_wb_cyc) |
begin |
// This handles everything ... but the case where |
196,11 → 192,9
(w_pc_out_of_bounds)||(w_ran_off_end_of_cache))) |
r_nvalid <= 0; |
else if ((~o_wb_cyc)&&(w_running_out_of_cache)) |
r_nvalid[LGCACHELEN:(LGCACHELEN-2)] |
<= r_nvalid[LGCACHELEN:(LGCACHELEN-2)] +3'b111; |
// i.e. - (1<<(LGCACHELEN-2)); |
r_nvalid <= r_nvalid - (1<<(LGCACHELEN-2)); |
else if ((o_wb_cyc)&&(i_wb_ack)) |
r_nvalid <= r_nvalid + {{(LGCACHELEN){1'b0}},1'b1}; // +1; |
r_nvalid <= r_nvalid+1; |
|
always @(posedge i_clk) |
if (i_clear_cache) |
210,10 → 204,7
||(w_ran_off_end_of_cache))) |
r_cache_base <= (i_new_pc) ? i_pc : r_addr; |
else if ((~o_wb_cyc)&&(w_running_out_of_cache)) |
r_cache_base[(AW-1):(LGCACHELEN-2)] |
<= r_cache_base[(AW-1):(LGCACHELEN-2)] |
+ {{(AW-LGCACHELEN+1){1'b0}},1'b1}; |
// i.e. + (1<<(LGCACHELEN-2)); |
r_cache_base <= r_cache_base + (1<<(LGCACHELEN-2)); |
|
always @(posedge i_clk) |
if (i_clear_cache) |
223,8 → 214,7
||(w_ran_off_end_of_cache))) |
r_cache_offset <= 0; |
else if ((~o_wb_cyc)&&(w_running_out_of_cache)) |
r_cache_offset[1:0] <= r_cache_offset[1:0] + 2'b01; |
assign w_cache_offset = { r_cache_offset, {(LGCACHELEN-2){1'b0}} }; |
r_cache_offset <= r_cache_offset + (1<<(LGCACHELEN-2)); |
|
always @(posedge i_clk) |
if (i_clear_cache) |
246,13 → 236,13
if (~o_wb_cyc) |
r_acks_waiting <= 0; |
else if ((o_wb_cyc)&&(o_wb_stb)&&(~i_wb_stall)&&(~i_wb_ack)) |
r_acks_waiting <= r_acks_waiting + {{(LGCACHELEN){1'b0}},1'b1}; |
r_acks_waiting <= r_acks_waiting + 1; |
else if ((o_wb_cyc)&&(i_wb_ack)&&((~o_wb_stb)||(i_wb_stall))) |
r_acks_waiting <= r_acks_waiting + {(LGCACHELEN+1){1'b1}}; // - 1; |
r_acks_waiting <= r_acks_waiting - 1; |
|
always @(posedge i_clk) |
if ((o_wb_cyc)&&(i_wb_ack)) |
cache[r_nvalid[(LGCACHELEN-1):0]+w_cache_offset] |
cache[r_nvalid[(LGCACHELEN-1):0]+r_cache_offset] |
<= i_wb_data; |
|
initial r_addr_set = 1'b0; |
275,11 → 265,11
if (i_new_pc) |
r_addr <= i_pc; |
else if ( ((i_stall_n)&&(w_cv)) || ((~i_stall_n)&&(w_cv)&&(r_addr == o_pc)) ) |
r_addr <= r_addr + {{(AW-1){1'b0}},1'b1}; |
r_addr <= r_addr + 1; |
|
wire [(LGCACHELEN-1):0] c_rdaddr, c_cache_base; |
assign c_cache_base = r_cache_base[(LGCACHELEN-1):0]; |
assign c_rdaddr = r_addr[(LGCACHELEN-1):0]-c_cache_base+w_cache_offset; |
assign c_rdaddr = r_addr[(LGCACHELEN-1):0]-c_cache_base+r_cache_offset; |
always @(posedge i_clk) |
if ((~o_v)||((i_stall_n)&&(o_v))) |
o_i <= cache[c_rdaddr]; |
/rtl/core/zipcpu.v
6,17 → 6,20
// |
// Purpose: This is the top level module holding the core of the Zip CPU |
// together. The Zip CPU is designed to be as simple as possible. |
// (actual implementation aside ...) The instruction set is about as |
// RISC as you can get, there are only 16 instruction types supported. |
// Please see the accompanying spec.pdf file for a description of these |
// instructions. |
// The instruction set is about as RISC as you can get, there are |
// only 16 instruction types supported (of which one isn't yet |
// supported ...) Please see the accompanying iset.html file |
// for a description of these instructions. |
// |
// All instructions are 32-bits wide. All bus accesses, both address and |
// data, are 32-bits over a wishbone bus. |
// All instructions are 32-bits wide. All bus accesses, both |
// address and data, are 32-bits over a wishbone bus. |
// |
// The Zip CPU is fully pipelined with the following pipeline stages: |
// |
// 1. Prefetch, returns the instruction from memory. |
// 1. Prefetch, returns the instruction from memory. On the |
// Basys board that I'm working on, one instruction may be |
// issued every 20 clocks or so, unless and until I implement a |
// cache or local memory. |
// |
// 2. Instruction Decode |
// |
26,12 → 29,60
// |
// 4. Write-back Results |
// |
// Further information about the inner workings of this CPU may be |
// found in the spec.pdf file. (The documentation within this file |
// had become out of date and out of sync with the spec.pdf, so look |
// to the spec.pdf for accurate and up to date information.) |
// A lot of difficult work has been placed into the pipeline stall |
// handling. My original proposal was not to allow pipeline stalls at all. |
// The idea would be that the CPU would just run every clock and whatever |
// stalled answer took place would just get fixed a clock or two later, |
// meaning that the compiler could just schedule everything out. |
// This idea died at the memory interface, which can take a variable |
// amount of time to read or write any value, thus the whole CPU needed |
// to stall on a stalled memory access. |
// |
// My next idea was to just let things complete. I.e., once an instrution |
// starts, it continues to completion no matter what and we go on. This |
// failed at writing the PC. If the PC gets written in something such as |
// a MOV PC,PC+5 instruction, 3 (or however long the pipeline is) clocks |
// later, if whether or not something happens in those clocks depends |
// upon the instruction fetch filling the pipeline, then the CPU has a |
// non-deterministic behavior. |
// |
// This leads to two possibilities: either *everything* stalls upon a |
// stall condition, or partial results need to be destroyed before |
// they are written. This is made more difficult by the fact that |
// once a command is written to the memory unit, whether it be a |
// read or a write, there is no undoing it--since peripherals on the |
// bus may act upon the answer with whatever side effects they might |
// have. (For example, writing a '1' to the interrupt register will |
// clear certain interrupts ...) Further, since the memory ops depend |
// upon conditions, the we'll need to wait for the condition codes to |
// be available before executing a memory op. Thus, memory ops can |
// proceed without stalling whenever either the previous instruction |
// doesn't write the flags register, or when the memory instruction doesn't |
// depend upon the flags register. |
// |
// The other possibility is that we leave independent instruction |
// execution behind, so that the pipeline is always full and stalls, |
// or moves forward, together on every clock. |
// |
// For now, we pick the first approach: independent instruction execution. |
// Thus, if stage 2 stalls, stages 3-5 may still complete the instructions |
// in their pipeline. This leaves another problem: what happens on a |
// MOV -1+PC,PC instruction? There will be four instructions behind this |
// one (or is it five?) that will need to be 'cancelled'. So here's |
// the plan: Anything can be cancelled before the ALU/MEM stage, |
// since memory ops cannot be canceled after being issued. Thus, the |
// ALU/MEM stage must stall if any prior instruction is going to write |
// the PC register (i.e. JMP). |
// |
// Further, let's define a "STALL" as a reason to not execute a stage |
// due to some condition at or beyond the stage, and let's define |
// a VALID flag to mean that this stage has completed. Thus, the clock |
// enable for a stage is (STG[n-1]VALID)&&((~STG[n]VALID)||(~STG[n]STALL)). |
// The ALU/MEM stages will also depend upon a master clock enable |
// (~SLEEP) condition as well. |
// |
// |
// |
// Creator: Dan Gisselquist, Ph.D. |
// Gisselquist Tecnology, LLC |
// |
77,9 → 128,18
`define CPU_GIE_BIT 5 |
`define CPU_SLEEP_BIT 4 |
// Compile time defines |
// |
`include "cpudefs.v" |
// |
// (Currently unused) |
// `define OPT_SINGLE_FETCH |
// (Best path--define these!) |
`define OPT_CONDITIONAL_FLAGS |
`define OPT_ILLEGAL_INSTRUCTION |
`ifndef OPT_SINGLE_FETCH |
// The following are pipeline optimization options. |
// They make no sense in a single instruction fetch mode. |
`define OPT_PRECLEAR_BUS |
`define OPT_EARLY_BRANCHING |
`define OPT_PIPELINED_BUS_ACCESS |
`endif |
module zipcpu(i_clk, i_rst, i_interrupt, |
// Debug interface |
i_halt, i_clear_pf_cache, i_dbg_reg, i_dbg_we, i_dbg_data, |
92,16 → 152,9
i_wb_ack, i_wb_stall, i_wb_data, |
i_wb_err, |
// Accounting/CPU usage interface |
o_op_stall, o_pf_stall, o_i_count, |
// |
o_debug); |
o_op_stall, o_pf_stall, o_i_count); |
parameter RESET_ADDRESS=32'h0100000, ADDRESS_WIDTH=24, |
LGICACHE=6, AW=ADDRESS_WIDTH; |
`ifdef OPT_MULTIPLY |
parameter IMPLEMENT_MPY = 1; |
`else |
parameter IMPLEMENT_MPY = 0; |
`endif |
input i_clk, i_rst, i_interrupt; |
// Debug interface -- inputs |
input i_halt, i_clear_pf_cache; |
111,7 → 164,7
// Debug interface -- outputs |
output reg o_dbg_stall; |
output reg [31:0] o_dbg_reg; |
output reg [3:0] o_dbg_cc; |
output reg [1:0] o_dbg_cc; |
output wire o_break; |
// Wishbone interface -- outputs |
output wire o_wb_gbl_cyc, o_wb_gbl_stb; |
126,25 → 179,13
output wire o_op_stall; |
output wire o_pf_stall; |
output wire o_i_count; |
// |
output reg [31:0] o_debug; |
|
|
// Registers |
// |
// The distributed RAM style comment is necessary on the |
// SPARTAN6 with XST to prevent XST from oversimplifying the register |
// set and in the process ruining everything else. It basically |
// optimizes logic away, to where it no longer works. The logic |
// as described herein will work, this just makes sure XST implements |
// that logic. |
// |
(* ram_style = "distributed" *) |
reg [31:0] regset [0:31]; |
|
// Condition codes |
// (BUS, TRAP,ILL,BREAKEN,STEP,GIE,SLEEP ), V, N, C, Z |
reg [3:0] flags, iflags; |
reg [3:0] flags, iflags; // (TRAP,FPEN,BREAKEN,STEP,GIE,SLEEP ), V, N, C, Z |
wire [10:0] w_uflags, w_iflags; |
reg trap, break_en, step, gie, sleep; |
`ifdef OPT_ILLEGAL_INSTRUCTION |
191,9 → 232,7
dcdM, dcdF_wr, dcd_gie, dcd_break; |
reg [(AW-1):0] dcd_pc; |
reg [23:0] r_dcdI; |
`ifdef OPT_SINGLE_CYCLE |
reg dcd_zI; // true if dcdI == 0 |
`endif |
wire dcdA_stall, dcdB_stall, dcdF_stall; |
|
`ifdef OPT_PRECLEAR_BUS |
226,17 → 265,12
reg [(AW-1):0] op_pc; |
wire [31:0] w_opA, w_opB; |
wire [31:0] opA_nowait, opB_nowait, opA, opB; |
reg opR_wr, opR_cc, opF_wr, op_gie; |
reg opR_wr, opR_cc, opF_wr, op_gie, |
opA_rd, opB_rd; |
wire [10:0] opFl; |
reg [5:0] r_opF; |
wire [7:0] opF; |
reg [2:0] opF_cp; |
reg [6:0] r_opF; |
wire [8:0] opF; |
wire op_ce; |
// Some pipeline control wires |
`ifdef OPT_SINGLE_CYCLE |
reg opA_alu, opA_mem; |
reg opB_alu, opB_mem; |
`endif |
`ifdef OPT_PRECLEAR_BUS |
reg op_clear_bus; |
`endif |
259,8 → 293,11
wire alu_valid; |
wire set_cond; |
reg alu_wr, alF_wr, alu_gie; |
wire alu_illegal_op; |
`ifdef OPT_ILLEGAL_INSTRUCTION |
reg alu_illegal; |
`else |
wire alu_illegal; |
`endif |
|
|
|
314,22 → 351,8
// |
// PIPELINE STAGE #3 :: Read Operands |
// Calculate stall conditions |
assign op_stall = ((opvalid)&&(~master_ce))||( |
// Stall if going into the ALU and the ALU is stalled |
// i.e. if the memory is busy, or we are single |
// stepping |
((opvalid_alu)&&(alu_stall)) |
// |
// ||((opvalid_alu)&&(mem_rdbusy)) // part of alu_stall |
// Stall if we are going into memory with an operation |
// that cannot be pipelined, and the memory is |
// already busy |
||((opvalid_mem)&&(~op_pipe)&&(mem_busy)) |
// |
// Stall if we are going into memory with a pipeable |
// operation, but the memory unit declares it is |
// not going to accept any more pipeline operations |
||((opvalid_mem)&&( op_pipe)&&(mem_pipe_stalled))); |
assign op_stall = ((mem_stalled)&&(opvalid_mem)) |
||((alu_stall)&&(opvalid_alu)); |
assign op_ce = (dcdvalid)&&((~opvalid)||(~op_stall)); |
|
// |
340,16 → 363,15
// busy. |
// 2. Also stall if the prior stage is valid and the master clock enable |
// is de-selected |
// 3. Stall if someone on the other end is writing the CC register, |
// since we don't know if it'll put us to sleep or not. |
// 3. Next case: Stall if we want to start a memory operation and the |
// prior operation will write either the PC or CC registers. |
// 4. Last case: Stall if we would otherwise move a break instruction |
// through the ALU. Break instructions are not allowed through |
// the ALU. |
assign alu_stall = (((~master_ce)||(mem_rdbusy))&&(opvalid_alu)) //Case 1&2 |
// Old case #3--this isn't an ALU stall though ... |
||((opvalid_alu)&&(wr_reg_ce)&&(wr_reg_id[4] == op_gie) |
&&(wr_write_cc)) // Case 3 |
||((opvalid_alu)&&(op_break)); // Case 3 |
||((opvalid_mem)&&(wr_reg_ce)&&(wr_reg_id[4] == op_gie) |
&&((wr_write_pc)||(wr_write_cc))) // Case 3 |
||((opvalid)&&(op_break)); // Case 4 |
assign alu_ce = (master_ce)&&(~mem_rdbusy)&&(opvalid_alu)&&(~alu_stall)&&(~clear_pipeline); |
// |
`ifdef OPT_PIPELINED_BUS_ACCESS |
421,7 → 443,7
|
`ifdef OPT_EARLY_BRANCHING |
always @(posedge i_clk) |
if ((dcd_ce)&&(instruction[27:24]==`CPU_PC_REG)&&(master_ce)) |
if ((dcd_ce)&&(instruction[27:24]==`CPU_PC_REG)&&(~sleep)) |
begin |
dcd_early_branch <= 1'b0; |
// First case, a move to PC instruction |
454,40 → 476,16
if (dcd_ce) dcd_early_branch <= 1'b0; |
dcd_early_branch_stb <= 1'b0; |
end |
generate |
if (AW == 24) |
begin |
always @(posedge i_clk) |
always @(posedge i_clk) |
if (dcd_ce) |
begin |
if (instruction[31]) // Add |
begin |
dcd_branch_pc <= instruction_pc |
+ { {(AW-20){instruction[19]}}, instruction[19:0] } |
+ {{(AW-1){1'b0}},1'b1}; |
end else if (~instruction[28]) // 4'h2 = MOV |
dcd_branch_pc <= instruction_pc+{ {(AW-20){instruction[19]}}, instruction[19:0] } + {{(AW-1){1'b0}},1'b1}; |
else if (~instruction[28]) // 4'h2 = MOV |
dcd_branch_pc <= instruction_pc+{ {(AW-15){instruction[14]}}, instruction[14:0] } + {{(AW-1){1'b0}},1'b1}; |
else // if (instruction[28]) // 4'h3 = LDI |
dcd_branch_pc <= instruction_pc+{ instruction[23:0] } + {{(AW-1){1'b0}},1'b1}; |
end |
end else begin |
always @(posedge i_clk) |
if (dcd_ce) |
begin |
if (instruction[31]) // Add |
begin |
dcd_branch_pc <= instruction_pc |
+ { {(AW-20){instruction[19]}}, instruction[19:0] } |
+ {{(AW-1){1'b0}},1'b1}; |
end else if (~instruction[28]) // 4'h2 = MOV |
begin |
dcd_branch_pc <= instruction_pc+{ {(AW-15){instruction[14]}}, instruction[14:0] } + {{(AW-1){1'b0}},1'b1}; |
end else // if (instruction[28]) // 4'h3 = LDI |
begin |
dcd_branch_pc <= instruction_pc+{ {(AW-24){instruction[23]}}, instruction[23:0] } + {{(AW-1){1'b0}},1'b1}; |
end |
end |
end endgenerate |
`else // OPT_EARLY_BRANCHING |
assign dcd_early_branch_stb = 1'b0; |
assign dcd_early_branch = 1'b0; |
497,8 → 495,7
always @(posedge i_clk) |
if (dcd_ce) |
begin |
dcd_pc <= instruction_pc |
+{{(AW-1){1'b0}},1'b1}; // i.e. dcd_pc+1 |
dcd_pc <= instruction_pc+1; |
|
// Record what operation we are doing |
dcdOp <= instruction[31:28]; |
538,9 → 535,7
dcdA_rd <= 1'b0; |
dcdB_rd <= 1'b1; |
r_dcdI <= { {(9){instruction[14]}}, instruction[14:0] }; |
`ifdef OPT_SINGLE_CYCLE |
dcd_zI <= (instruction[14:0] == 0); |
`endif |
dcdF_wr <= 1'b0; // Don't write flags |
end |
4'h3: begin // Load immediate |
548,9 → 543,7
dcdA_rd <= 1'b0; |
dcdB_rd <= 1'b0; |
r_dcdI <= { instruction[23:0] }; |
`ifdef OPT_SINGLE_CYCLE |
dcd_zI <= (instruction[23:0] == 0); |
`endif |
dcdF_wr <= 1'b0; // Don't write flags |
dcdF <= 4'h8; // This is unconditional |
dcdOp <= 4'h2; |
566,9 → 559,7
dcdF_wr <= (instruction[27:25] != 3'h7); |
`endif |
r_dcdI <= { 8'h00, instruction[15:0] }; |
`ifdef OPT_SINGLE_CYCLE |
dcd_zI <= (instruction[15:0] == 0); |
`endif |
if (instruction[27:24] == 4'he) |
begin |
// NOOP instruction |
593,14 → 584,12
end else begin |
// Actual multiply instruction |
r_dcdI <= { 8'h00, instruction[15:0] }; |
`ifdef OPT_SINGLE_CYCLE |
dcd_zI <= (instruction[15:0] == 0); |
`endif |
dcdA_rd <= 1'b1; |
dcdB_rd <= (instruction[19:16] != 4'hf); |
dcdOp[3:0] <= (instruction[20])? 4'h4:4'h3; |
end end |
4'b011?: begin // LOD/STO or Load/Store |
4'b011?: begin // Load/Store |
dcdF_wr <= 1'b0; // Don't write flags |
dcdA_wr <= (~instruction[28]); // Write on loads |
dcdA_rd <= (instruction[28]); // Read on stores |
608,14 → 597,10
if (instruction[20]) |
begin |
r_dcdI <= { {(8){instruction[15]}}, instruction[15:0] }; |
`ifdef OPT_SINGLE_CYCLE |
dcd_zI <= (instruction[15:0] == 0); |
`endif |
end else begin |
r_dcdI <= { {(4){instruction[19]}}, instruction[19:0] }; |
`ifdef OPT_SINGLE_CYCLE |
dcd_zI <= (instruction[19:0] == 0); |
`endif |
end |
dcdM <= 1'b1; // Memory operation |
`ifdef OPT_PRECLEAR_BUS |
629,14 → 614,10
if (instruction[20]) |
begin |
r_dcdI <= { {(8){instruction[15]}}, instruction[15:0] }; |
`ifdef OPT_SINGLE_CYCLE |
dcd_zI <= (instruction[15:0] == 0); |
`endif |
end else begin |
r_dcdI <= { {(4){instruction[19]}}, instruction[19:0] }; |
`ifdef OPT_SINGLE_CYCLE |
dcd_zI <= (instruction[19:0] == 0); |
`endif |
end end |
endcase |
|
666,7 → 647,6
op_pipe <= (dcdvalid)&&(opvalid_mem)&&(dcdM) // Both mem |
&&(dcdOp[0]==opn[0]) // Both Rd, or both Wr |
&&(dcdB == op_B) // Same address register |
&&(dcdF[2:0] == opF_cp) // Same condition |
&&((r_dcdI == r_opI)||(r_dcdI==r_opI+24'h1)); |
always @(posedge i_clk) |
if (op_ce) // &&(dcdvalid)) |
683,59 → 663,39
// |
assign w_opA = regset[dcdA]; |
assign w_opB = regset[dcdB]; |
|
wire [31:0] w_pcA_v; |
generate |
if (AW < 32) |
assign w_pcA_v = {{(32-AW){1'b0}}, (dcdA[4] == dcd_gie)?dcd_pc:upc }; |
else |
assign w_pcA_v = (dcdA[4] == dcd_gie)?dcd_pc:upc; |
endgenerate |
always @(posedge i_clk) |
if (op_ce) // &&(dcdvalid)) |
begin |
if ((wr_reg_ce)&&(wr_reg_id == dcdA)) |
r_opA <= wr_reg_vl; |
else if ((dcdA_pc)&&(dcdA[4] == dcd_gie)) |
r_opA <= { {(32-AW){1'b0}}, dcd_pc }; |
else if (dcdA_pc) |
r_opA <= w_pcA_v; |
r_opA <= { {(32-AW){1'b0}}, upc }; |
else if (dcdA_cc) |
r_opA <= { w_opA[31:11], (dcd_gie)?w_uflags:w_iflags }; |
else |
r_opA <= w_opA; |
`ifdef OPT_SINGLE_CYCLE |
end else if (opvalid) |
begin // We were going to pick these up when they became valid, |
// but for some reason we're stuck here as they became |
// valid. Pick them up now anyway |
if (((opA_alu)&&(alu_valid)&&(alu_wr))||((opA_mem)&&(mem_valid))) |
if ((opA_alu)||((opA_mem)&&(mem_valid))) |
r_opA <= wr_reg_vl; |
`endif |
end |
|
wire [31:0] dcdI, w_opBnI, w_pcB_v; |
wire [31:0] dcdI, w_opBnI; |
assign dcdI = { {(8){r_dcdI[23]}}, r_dcdI }; |
generate |
if (AW < 32) |
assign w_pcB_v = {{(32-AW){1'b0}}, (dcdB[4] == dcd_gie)?dcd_pc:upc }; |
else |
assign w_pcB_v = (dcdB[4] == dcd_gie)?dcd_pc:upc; |
endgenerate |
|
assign w_opBnI = (~dcdB_rd) ? 32'h00 |
: (((wr_reg_ce)&&(wr_reg_id == dcdB)) ? wr_reg_vl |
: ((dcdB_pc) ? w_pcB_v |
: ((dcdB_cc) ? { w_opB[31:11], (dcd_gie)?w_uflags:w_iflags} |
: w_opB))); |
|
: (((wr_reg_ce)&&(wr_reg_id == dcdB)) ? wr_reg_vl |
: (((dcdB_pc)&&(dcdB[4] == dcd_gie)) ? {{(32-AW){1'b0}},dcd_pc } |
: ((dcdB_pc) ? {{(32-AW){1'b0}},upc} |
: ((dcdB_cc) ? { w_opB[31:11], (dcd_gie)?w_uflags:w_iflags} |
: regset[dcdB])))); |
always @(posedge i_clk) |
if (op_ce) // &&(dcdvalid)) |
r_opB <= w_opBnI + dcdI; |
`ifdef OPT_SINGLE_CYCLE |
else if ((opvalid)&&( |
((opB_alu)&&(alu_valid)&&(alu_wr)) |
||((opB_mem)&&(mem_valid)))) |
else if ((opvalid)&&((opB_alu)||((opB_mem)&&(mem_valid)))) |
r_opB <= wr_reg_vl; |
`endif |
|
// The logic here has become more complex than it should be, no thanks |
// to Xilinx's Vivado trying to help. The conditions are supposed to |
750,20 → 710,17
if (op_ce) |
begin // Set the flag condition codes, bit order is [3:0]=VNCZ |
case(dcdF[2:0]) |
3'h0: r_opF <= 6'h00; // Always |
3'h1: r_opF <= 6'h11; // Z |
3'h2: r_opF <= 6'h10; // NE |
3'h3: r_opF <= 6'h20; // GE (!N) |
3'h4: r_opF <= 6'h30; // GT (!N&!Z) |
3'h5: r_opF <= 6'h24; // LT |
3'h6: r_opF <= 6'h02; // C |
3'h7: r_opF <= 6'h08; // V |
3'h0: r_opF <= 7'h80; // Always |
3'h1: r_opF <= 7'h11; // Z |
3'h2: r_opF <= 7'h10; // NE |
3'h3: r_opF <= 7'h20; // GE (!N) |
3'h4: r_opF <= 7'h30; // GT (!N&!Z) |
3'h5: r_opF <= 7'h24; // LT |
3'h6: r_opF <= 7'h02; // C |
3'h7: r_opF <= 7'h08; // V |
endcase |
end // Bit order is { (flags_not_used), VNCZ mask, VNCZ value } |
assign opF = { r_opF[3], r_opF[5], r_opF[1], r_opF[4:0] }; |
always @(posedge i_clk) |
if (op_ce) |
opF_cp[2:0] <= dcdF[2:0]; |
assign opF = { r_opF[6], r_opF[3], r_opF[5], r_opF[1], r_opF[4:0] }; |
|
initial opvalid = 1'b0; |
initial opvalid_alu = 1'b0; |
841,6 → 798,13
// User level (1), vs supervisor (0)/interrupts disabled |
op_gie <= dcd_gie; |
|
// We're not done with these yet--we still need them |
// for the unclocked assign. We need the unclocked |
// assign so that there's no wait state between an |
// ALU or memory result and the next register that may |
// use that value. |
opA_rd <= dcdA_rd; |
opB_rd <= dcdB_rd; |
// |
`ifdef OPT_EARLY_BRANCHING |
op_wr_pc <= ((dcdA_wr)&&(dcdA_pc)&&(dcdA[4] == dcd_gie))&&(~dcd_early_branch); |
868,48 → 832,37
// We'll create a flag here to start our coordination. Once we |
// define this flag to something other than just plain zero, then |
// the stalls will already be in place. |
`ifdef OPT_SINGLE_CYCLE |
initial opA_alu = 1'b0; |
reg opA_alu, opA_mem; |
always @(posedge i_clk) |
if (op_ce) |
opA_alu <= (opvalid_alu)&&(opR == dcdA)&&(opR_wr)&&(dcdA_rd); |
else if ((opvalid)&&(opA_alu)&&(alu_valid)) |
opA_alu <= 1'b0; |
initial opA_mem = 1'b0; |
always @(posedge i_clk) |
if (op_ce) |
opA_mem <= ((opvalid_mem)&&(opR == dcdA)&&(dcdA_rd)&&(~opn[0])) |
opA_mem <= ((opvalid_mem)&&(opR == dcdA)&&(dcdA_rd)) |
||((~opvalid)&&(mem_busy)&&(~mem_we) |
&&(mem_last_reg == dcdA)&&(dcdA_rd)); |
else if ((opvalid)&&(opA_mem)&&(mem_valid)) |
opA_mem <= 1'b0; |
`endif |
|
always @(posedge i_clk) |
if (mem_ce) |
mem_last_reg <= opR; |
`ifdef OPT_SINGLE_CYCLE |
assign opA = ((opA_alu)&&(alu_valid)&&(alu_wr)) ? alu_result |
assign opA = (opA_alu) ? alu_result |
: ( ((opA_mem)&&(mem_valid))?mem_result |
: r_opA ); |
`else |
assign opA = r_opA; |
`endif |
|
assign dcdA_stall = (dcdvalid)&&(dcdA_rd)&&( |
`ifdef OPT_SINGLE_CYCLE |
// Skip the requirement on writing back opA |
// Stall on memory, since we'll always need to stall for a |
// memory access anyway |
// ((opvalid_mem)&&(opR_wr)&&(opR == dcdA)) |
((opvalid_alu)&&(opF_wr)&&(dcdA_cc))); |
`else |
((opvalid)&&(opR_wr)&&(opR == dcdA)) |
||((opvalid_alu)&&(opF_wr)&&(dcdA_cc)) |
||((mem_rdbusy)&&(mem_last_reg == dcdA)) |
); |
`endif |
// Place stalls for this latter case into the ops stage |
// ||((mem_busy)&&(~mem_we)); |
|
`ifdef OPT_SINGLE_CYCLE |
reg opB_alu, opB_mem; |
always @(posedge i_clk) |
if (op_ce) |
opB_alu <= (opvalid_alu)&&(opR == dcdB)&&(opR_wr)&&(dcdB_rd)&&(dcd_zI); |
916,20 → 869,15
always @(posedge i_clk) |
if (op_ce) |
opB_mem <= (dcd_zI)&&(dcdB_rd)&&( |
((opvalid_mem)&&(opR == dcdB)&&(~opn[0])) |
((opvalid_mem)&&(opR == dcdB)) |
||((~opvalid)&&(mem_busy)&&(~mem_we) |
&&(mem_last_reg == dcdB))); |
else if ((opvalid)&&(opB_mem)&&(mem_valid)) |
opB_mem <= 1'b0; |
assign opB = ((opB_alu)&&(alu_valid)&&(alu_wr)) ? alu_result |
assign opB = (opB_alu) ? alu_result |
: ( ((opB_mem)&&(mem_valid))?mem_result |
: r_opB ); |
`else |
assign opB = r_opB; |
`endif |
|
assign dcdB_stall = (dcdvalid)&&(dcdB_rd)&&( |
`ifdef OPT_SINGLE_CYCLE |
// Stall on memory ops writing to my register |
// (i.e. loads), or on any write to my |
// register if I have an immediate offset |
949,12 → 897,6
// Stall on any ongoing memory operation that |
// will write to opB |
||((mem_busy)&&(~mem_we)&&(mem_last_reg==dcdB))); |
`else |
((opvalid)&&(opR_wr)&&(opR == dcdB)) |
||((opvalid_alu)&&(opF_wr)&&(dcdB_cc)) |
||((mem_rdbusy)&&(mem_last_reg == dcdB)) |
); |
`endif |
assign dcdF_stall = (dcdvalid)&&((~dcdF[3])||(dcdA_cc)||(dcdB_cc)) |
&&(opvalid)&&(opR_cc); |
// |
962,9 → 904,9
// PIPELINE STAGE #4 :: Apply Instruction |
// |
// |
cpuops #(IMPLEMENT_MPY) doalu(i_clk, i_rst, alu_ce, |
cpuops doalu(i_clk, i_rst, alu_ce, |
(opvalid_alu), opn, opA, opB, |
alu_result, alu_flags, alu_valid, alu_illegal_op); |
alu_result, alu_flags, alu_valid); |
|
assign set_cond = ((opF[7:4]&opFl[3:0])==opF[3:0]); |
initial alF_wr = 1'b0; |
992,12 → 934,9
if ((alu_ce)||(mem_ce)) |
alu_pc <= op_pc; |
`ifdef OPT_ILLEGAL_INSTRUCTION |
reg r_alu_illegal; |
initial r_alu_illegal = 0; |
always @(posedge i_clk) |
if ((alu_ce)||(mem_ce)) |
r_alu_illegal <= op_illegal; |
assign alu_illegal = (alu_illegal_op)||(r_alu_illegal); |
alu_illegal <= op_illegal; |
`endif |
|
initial alu_pc_valid = 1'b0; |
1086,8 → 1025,8
// includes the set condition ... |
assign wr_flags_ce = (alF_wr)&&(alu_valid)&&(~clear_pipeline)&&(~alu_illegal); |
`ifdef OPT_ILLEGAL_INSTRUCTION |
assign w_uflags = { bus_err_flag, trap, ill_err, 1'b0, step, 1'b1, sleep, ((wr_flags_ce)&&(alu_gie))?alu_flags:flags }; |
assign w_iflags = { bus_err_flag, trap, ill_err,break_en, 1'b0, 1'b0, sleep, ((wr_flags_ce)&&(~alu_gie))?alu_flags:iflags }; |
assign w_uflags = { bus_err_flag, trap, ill_err, 1'b0, step, 1'b1, sleep, ((wr_flags_ce)&&(alu_gie))?alu_flags:flags }; |
assign w_iflags = { bus_err_flag, trap, ill_err, break_en, 1'b0, 1'b0, sleep, ((wr_flags_ce)&&(~alu_gie))?alu_flags:iflags }; |
`else |
assign w_uflags = { bus_err_flag, trap, ill_err, 1'b0, step, 1'b1, sleep, ((wr_flags_ce)&&(alu_gie))?alu_flags:flags }; |
assign w_iflags = { bus_err_flag, trap, ill_err, break_en, 1'b0, 1'b0, sleep, ((wr_flags_ce)&&(~alu_gie))?alu_flags:iflags }; |
1298,7 → 1237,7
&&(i_dbg_reg[4:0] == { gie, `CPU_PC_REG})) |
pf_pc <= i_dbg_data[(AW-1):0]; |
else if (dcd_ce) |
pf_pc <= pf_pc + {{(AW-1){1'b0}},1'b1}; |
pf_pc <= pf_pc + 1; |
|
initial new_pc = 1'b1; |
always @(posedge i_clk) |
1318,36 → 1257,16
|
// |
// The debug interface |
generate |
if (AW<32) |
begin |
always @(posedge i_clk) |
always @(posedge i_clk) |
begin |
o_dbg_reg <= regset[i_dbg_reg]; |
if (i_dbg_reg[3:0] == `CPU_PC_REG) |
o_dbg_reg <= {{(32-AW){1'b0}},(i_dbg_reg[4])?upc:ipc}; |
else if (i_dbg_reg[3:0] == `CPU_CC_REG) |
begin |
o_dbg_reg[10:0] <= (i_dbg_reg[4])?w_uflags:w_iflags; |
o_dbg_reg[`CPU_GIE_BIT] <= gie; |
end |
end |
end else begin |
always @(posedge i_clk) |
begin |
o_dbg_reg <= regset[i_dbg_reg]; |
if (i_dbg_reg[3:0] == `CPU_PC_REG) |
o_dbg_reg <= (i_dbg_reg[4])?upc:ipc; |
else if (i_dbg_reg[3:0] == `CPU_CC_REG) |
begin |
o_dbg_reg[10:0] <= (i_dbg_reg[4])?w_uflags:w_iflags; |
o_dbg_reg[`CPU_GIE_BIT] <= gie; |
end |
end |
end endgenerate |
|
always @(posedge i_clk) |
o_dbg_cc <= { o_break, bus_err, gie, sleep }; |
o_dbg_cc <= { gie, sleep }; |
|
always @(posedge i_clk) |
o_dbg_stall <= (i_halt)&&( |
1364,14 → 1283,4
assign o_op_stall = (master_ce)&&((~opvalid)||(op_stall)); |
assign o_pf_stall = (master_ce)&&(~pf_valid); |
assign o_i_count = (alu_pc_valid)&&(~clear_pipeline); |
|
always @(posedge i_clk) |
o_debug <= { |
pf_pc[7:0], |
pf_valid, dcdvalid, opvalid, alu_valid, mem_valid, |
op_ce, alu_ce, mem_ce, |
opA[23:20], opA[3:0], |
wr_reg_vl[7:0] |
}; |
|
endmodule |
/rtl/core/cpuops.v
29,9 → 29,7
// |
/////////////////////////////////////////////////////////////////////////// |
// |
module cpuops(i_clk, i_rst, i_ce, i_valid, i_op, i_a, i_b, o_c, o_f, o_valid, |
o_illegal); |
parameter IMPLEMENT_MPY = 1; |
module cpuops(i_clk, i_rst, i_ce, i_valid, i_op, i_a, i_b, o_c, o_f, o_valid); |
input i_clk, i_rst, i_ce; |
input [3:0] i_op; |
input [31:0] i_a, i_b; |
39,13 → 37,11
output reg [31:0] o_c; |
output wire [3:0] o_f; |
output reg o_valid; |
output wire o_illegal; |
|
wire [63:0] w_rol_tmp; |
assign w_rol_tmp = { i_a, i_a } << i_b[4:0]; |
wire [31:0] w_rol_result; |
assign w_rol_result = w_rol_tmp[63:32]; // Won't set flags |
`ifndef NEW_NOT_OLD_CODE |
wire [33:0] w_lsr_result, w_asr_result; |
wire signed [33:0] w_ia_input; |
assign w_ia_input = { i_a[31], i_a, 1'b0 }; |
53,15 → 49,14
: ( w_ia_input >>> (i_b[4:0]) );// ASR |
assign w_lsr_result = (|i_b[31:5])? 34'h00 |
: { 1'b0, i_a, 1'b0 } >> (i_b[4:0]);// LSR |
`else |
wire [32:0] w_lsr_result, w_asr_result; |
assign w_asr_result = (|i_b[31:5])? {(33){i_a[31]}} |
: ( {i_a, 1'b0 } >>> (i_b[4:0]) );// ASR |
assign w_lsr_result = (|i_b[31:5])? 33'h00 |
: ( { i_a, 1'b0 } >> (i_b[4:0]) );// LSR |
`endif |
|
|
wire signed [16:0] w_mpy_a_input, w_mpy_b_input; |
wire signed [33:0] w_mpy_result; |
assign w_mpy_a_input = { ((i_a[15])&&(i_op[2])), i_a[15:0] }; |
assign w_mpy_b_input = { ((i_b[15])&&(i_op[2])), i_b[15:0] }; |
assign w_mpy_result = w_mpy_a_input * w_mpy_b_input; |
|
wire z, n, v; |
reg c, pre_sign, set_ovfl; |
always @(posedge i_clk) |
71,11 → 66,7
||((i_op==4'ha)&&(i_a[31] == i_b[31])) // ADD |
||(i_op == 4'hd) // LSL |
||(i_op == 4'hf)); // LSR |
|
generate |
if (IMPLEMENT_MPY == 0) |
begin |
always @(posedge i_clk) |
always @(posedge i_clk) |
if (i_ce) |
begin |
pre_sign <= (i_a[31]); |
83,33 → 74,6
casez(i_op) |
4'b?000:{c,o_c } <= {(i_b>i_a),i_a - i_b};// CMP/SUB |
4'b?001: o_c <= i_a & i_b; // BTST/And |
4'h5: o_c <= w_rol_result; // ROL |
4'h6: o_c <= { i_a[31:16], i_b[15:0] }; // LODILO |
4'h7: o_c <= { i_b[15:0], i_a[15:0] }; // LODIHI |
4'ha: { c, o_c } <= i_a + i_b; // Add |
4'hb: o_c <= i_a | i_b; // Or |
4'hc: o_c <= i_a ^ i_b; // Xor |
4'hd: { c, o_c } <= (|i_b[31:5])? 33'h00 : {1'b0, i_a } << i_b[4:0]; // LSL |
4'he: { o_c, c } <= w_asr_result[32:0];// ASR |
4'hf: { o_c, c } <= w_lsr_result[32:0];// LSR |
default: o_c <= i_b; // MOV, LDI |
endcase |
end |
end else begin |
wire signed [16:0] w_mpy_a_input, w_mpy_b_input; |
wire signed [33:0] w_mpy_result; |
assign w_mpy_a_input = { ((i_a[15])&&(i_op[2])), i_a[15:0] }; |
assign w_mpy_b_input = { ((i_b[15])&&(i_op[2])), i_b[15:0] }; |
assign w_mpy_result = w_mpy_a_input * w_mpy_b_input; |
|
always @(posedge i_clk) |
if (i_ce) |
begin |
pre_sign <= (i_a[31]); |
c <= 1'b0; |
casez(i_op) |
4'b?000:{c,o_c } <= {(i_b>i_a),i_a - i_b};// CMP/SUB |
4'b?001: o_c <= i_a & i_b; // BTST/And |
4'h3: { c, o_c } <= {1'b0,w_mpy_result[31:0]}; // MPYU/S |
4'h4: { c, o_c } <= {1'b0,w_mpy_result[31:0]}; // MPYU/S |
4'h5: o_c <= w_rol_result; // ROL |
124,19 → 88,7
default: o_c <= i_b; // MOV, LDI |
endcase |
end |
end endgenerate |
|
generate |
if (IMPLEMENT_MPY == 0) |
begin |
reg r_illegal; |
always @(posedge i_clk) |
r_illegal <= (i_op == 4'h3)||(i_op == 4'h4); |
assign o_illegal = r_illegal; |
end else |
assign o_illegal = 1'b0; |
endgenerate |
|
assign z = (o_c == 32'h0000); |
assign n = (o_c[31]); |
assign v = (set_ovfl)&&(pre_sign != o_c[31]); |
147,6 → 99,8
always @(posedge i_clk) |
if (i_rst) |
o_valid <= 1'b0; |
else |
o_valid <= (i_ce)&&(i_valid); |
else if (i_ce) |
o_valid <= i_valid; |
else if (~i_ce) |
o_valid <= 1'b0; |
endmodule |
/rtl/zipsystem.v
96,7 → 96,7
// you get the picture. But, the bottom line is that I no longer need this |
// delay. |
// |
// `define DELAY_EXT_BUS // Required no longer! |
// `define DELAY_EXT_BUS // Required no longer!k |
// |
// |
// If space is tight, you might not wish to have your performance and |
113,7 → 113,7
`define PERIPHBASE 32'hc0000000 |
`define INTCTRL 5'h0 // |
`define WATCHDOG 5'h1 // Interrupt generates reset signal |
`define BUSWATCHDOG 5'h2 // Sets IVEC[0] |
// `define CACHECTRL 5'h2 // Sets IVEC[0] |
`define CTRINT 5'h3 // Sets IVEC[5] |
`define TIMER_A 5'h4 // Sets IVEC[4] |
`define TIMER_B 5'h5 // Sets IVEC[3] |
161,8 → 161,7
o_ext_int, |
// Wishbone slave interface for debugging purposes |
i_dbg_cyc, i_dbg_stb, i_dbg_we, i_dbg_addr, i_dbg_data, |
o_dbg_ack, o_dbg_stall, o_dbg_data, |
o_cpu_debug); |
o_dbg_ack, o_dbg_stall, o_dbg_data); |
parameter RESET_ADDRESS=24'h0100000, ADDRESS_WIDTH=24, |
LGICACHE=6, START_HALTED=1, EXTERNAL_INTERRUPTS=1, |
// Derived parameters |
185,8 → 184,6
output wire o_dbg_ack; |
output wire o_dbg_stall; |
output wire [31:0] o_dbg_data; |
// |
output wire [31:0] o_cpu_debug; |
|
wire [31:0] ext_idata; |
|
238,7 → 235,7
wire cpu_break, dbg_cmd_write; |
reg cmd_reset, cmd_halt, cmd_step, cmd_clear_pf_cache; |
reg [5:0] cmd_addr; |
wire [3:0] cpu_dbg_cc; |
wire [1:0] cpu_dbg_cc; |
assign dbg_cmd_write = (dbg_cyc)&&(dbg_stb)&&(dbg_we)&&(~dbg_addr); |
// |
initial cmd_reset = 1'b1; |
255,8 → 252,12
cmd_halt <= 1'b1; |
|
always @(posedge i_clk) |
cmd_clear_pf_cache = (~i_rst)&&(dbg_cmd_write) |
&&((dbg_idata[11])||(dbg_idata[6])); |
if (i_rst) |
cmd_clear_pf_cache <= 1'b0; |
else if (dbg_cmd_write) |
cmd_clear_pf_cache <= dbg_idata[11]; |
else |
cmd_clear_pf_cache <= 1'b0; |
// |
initial cmd_step = 1'b0; |
always @(posedge i_clk) |
285,7 → 286,7
// 0x02000 -> cc.gie |
// 0x10000 -> External interrupt line is high |
assign cmd_data = { 7'h00, {(9-EXTERNAL_INTERRUPTS){1'b0}}, i_ext_int, |
cpu_dbg_cc, |
2'b00, cpu_dbg_cc, |
1'b0, cmd_halt, (~cpu_dbg_stall), 1'b0, |
pic_data[15], cpu_reset, cmd_addr }; |
wire cpu_gie; |
314,32 → 315,13
wdt_ack, wdt_stall, wdt_data, wdt_reset); |
|
// |
// Position two, a second watchdog timer--this time for the wishbone |
// bus, in order to tell/find wishbone bus lockups. In its current |
// configuration, it cannot be configured and all bus accesses must |
// take less than the number written to this register. |
// Position two ... unclaimed / unused |
// |
reg wdbus_ack; |
reg [(AW-1):0] r_wdbus_data; |
wire [31:0] wdbus_data; |
wire [14:0] wdbus_ignored_data; |
wire reset_wdbus_timer, wdbus_int, wdbus_ack_ignored, wdbus_stall; |
assign reset_wdbus_timer = ((o_wb_cyc)&&((o_wb_stb)||(i_wb_ack))); |
// o_wb_cyc, o_wb_stb, o_wb_we, o_wb_addr, o_wb_data, |
// i_wb_ack, i_wb_stall, i_wb_data, i_wb_err, |
ziptimer #(15) watchbus(i_clk, (cpu_reset), o_wb_cyc, |
reset_wdbus_timer, reset_wdbus_timer, 1'b1, 15'h2000, |
wdbus_ack_ignored, wdbus_stall, wdbus_ignored_data, |
wdbus_int); |
initial r_wdbus_data = 0; |
wire cache_stall; |
assign cache_stall = 1'b0; |
reg cache_ack; |
always @(posedge i_clk) |
if (wdbus_int) |
r_wdbus_data = o_wb_addr; |
assign wdbus_data = { {(32-AW){1'b0}}, r_wdbus_data }; |
initial wdbus_ack = 1'b0; |
always @(posedge i_clk) |
wdbus_ack <= ((sys_cyc)&&(sys_stb)&&(sys_addr == 5'h02)); |
|
cache_ack <= (sys_cyc)&&(sys_stb)&&(sys_addr == 5'h02); |
// Counters -- for performance measurement and accounting |
// |
// Here's the stuff we'll be counting .... |
467,8 → 449,6
wire [(AW-1):0] dc_addr; |
wire cpu_gbl_cyc; |
assign dmac_stb = (sys_stb)&&(sys_addr[4]); |
// `define INCLUDE_DMA_CONTROLLER |
`ifdef INCLUDE_DMA_CONTROLLER |
wbdmac #(AW) dma_controller(i_clk, |
sys_cyc, dmac_stb, sys_we, |
sys_addr[1:0], sys_data, |
482,24 → 462,8
dmac_int, |
// Whether or not the CPU wants the bus |
cpu_gbl_cyc); |
`else |
reg r_dmac_ack; |
always @(posedge i_clk) |
r_dmac_ack <= (sys_cyc)&&(dmac_stb); |
assign dmac_ack = r_dmac_ack; |
assign dmac_data = 32'h000; |
assign dmac_stall = 1'b0; |
|
|
assign dc_cyc = 1'b0; |
assign dc_stb = 1'b0; |
assign dc_we = 1'b0; |
assign dc_addr = { (AW) {1'b0} }; |
assign dc_data = 32'h00; |
|
assign dmac_int = 1'b0; |
`endif |
|
|
`ifdef INCLUDE_ACCOUNTING_COUNTERS |
// |
// Counter Interrupt controller |
607,8 → 571,7
cpu_we, cpu_addr, cpu_data, |
cpu_ack, cpu_stall, wb_data, |
cpu_err, |
cpu_op_stall, cpu_pf_stall, cpu_i_count, |
o_cpu_debug); |
cpu_op_stall, cpu_pf_stall, cpu_i_count); |
|
// Now, arbitrate the bus ... first for the local peripherals |
// For the debugger to have access to the local system bus, the |
655,7 → 618,7
cpu_ext_err; |
wire [(AW-1):0] ext_addr; |
wire [31:0] ext_odata; |
wbpriarbiter #(32,AW) dmacvcpu(i_clk, |
wbpriarbiter #(32,AW) dmacvcpu(i_clk, i_rst, |
cpu_gbl_cyc, cpu_gbl_stb, cpu_we, cpu_addr, cpu_data, |
cpu_ext_ack, cpu_ext_stall, cpu_ext_err, |
dc_cyc, dc_stb, dc_we, dc_addr, dc_data, |
668,7 → 631,7
ext_cyc, ext_stb, ext_we, ext_addr, ext_odata, |
ext_ack, ext_stall, ext_idata, ext_err, |
o_wb_cyc, o_wb_stb, o_wb_we, o_wb_addr, o_wb_data, |
i_wb_ack, i_wb_stall, i_wb_data, (i_wb_err)||(wdbus_int)); |
i_wb_ack, i_wb_stall, i_wb_data, i_wb_err); |
`else |
assign o_wb_cyc = ext_cyc; |
assign o_wb_stb = ext_stb; |
678,7 → 641,7
assign ext_ack = i_wb_ack; |
assign ext_stall = i_wb_stall; |
assign ext_idata = i_wb_data; |
assign ext_err = (i_wb_err)||(wdbus_int); |
assign ext_err = i_wb_err; |
`endif |
|
wire tmr_ack; |
691,13 → 654,13
assign wb_data = (tmr_ack|wdt_ack)?((tmr_ack)?tmr_data:wdt_data) |
:((actr_ack|dmac_ack)?((actr_ack)?actr_data:dmac_data) |
:((pic_ack|ctri_ack)?((pic_ack)?pic_data:ctri_data) |
:((wdbus_ack)?wdbus_data:(ext_idata)))); |
:(ext_idata))); |
|
assign sys_stall = (tma_stall | tmb_stall | tmc_stall | jif_stall |
| wdt_stall | ctri_stall | actr_stall |
| pic_stall | dmac_stall | wdbus_stall); |
| pic_stall | dmac_stall | cache_stall); |
assign cpu_stall = (sys_stall)|(cpu_ext_stall); |
assign sys_ack = (tmr_ack|wdt_ack|ctri_ack|actr_ack|pic_ack|dmac_ack|wdbus_ack); |
assign sys_ack = (tmr_ack|wdt_ack|ctri_ack|actr_ack|pic_ack|dmac_ack|cache_ack); |
assign cpu_ack = (sys_ack)||(cpu_ext_ack); |
assign cpu_err = (cpu_ext_err)&&(cpu_gbl_cyc); |
|
/rtl/zipbones.v
41,8 → 41,7
o_ext_int, |
// Wishbone slave interface for debugging purposes |
i_dbg_cyc, i_dbg_stb, i_dbg_we, i_dbg_addr, i_dbg_data, |
o_dbg_ack, o_dbg_stall, o_dbg_data, |
o_zip_debug); |
o_dbg_ack, o_dbg_stall, o_dbg_data); |
parameter RESET_ADDRESS=32'h0100000, ADDRESS_WIDTH=32, |
LGICACHE=6, START_HALTED=1, |
AW=ADDRESS_WIDTH; |
64,8 → 63,6
output reg o_dbg_ack; |
output wire o_dbg_stall; |
output wire [31:0] o_dbg_data; |
// |
output wire [31:0] o_zip_debug; |
|
// |
// |
92,7 → 89,7
wire cpu_break, dbg_cmd_write; |
reg cmd_reset, cmd_halt, cmd_step, cmd_clear_pf_cache; |
reg [4:0] cmd_addr; |
wire [3:0] cpu_dbg_cc; |
wire [1:0] cpu_dbg_cc; |
assign dbg_cmd_write = (i_dbg_cyc)&&(i_dbg_stb)&&(i_dbg_we)&&(~i_dbg_addr); |
// |
initial cmd_reset = 1'b1; |
129,6 → 126,7
|
wire cpu_halt, cpu_dbg_stall; |
assign cpu_halt = (i_rst)||((cmd_halt)&&(~cmd_step)); |
wire [31:0] pic_data; |
wire [31:0] cmd_data; |
// Values: |
// 0x0003f -> cmd_addr mask |
142,9 → 140,11
// 0x02000 -> cc.gie |
// 0x10000 -> External interrupt line is high |
assign cmd_data = { 7'h00, 8'h00, i_ext_int, |
cpu_dbg_cc, |
2'b00, cpu_dbg_cc, |
1'b0, cmd_halt, (~cpu_dbg_stall), 1'b0, |
1'b0, cpu_reset, 1'b0, cmd_addr }; |
pic_data[15], cpu_reset, 1'b0, cmd_addr }; |
wire cpu_gie; |
assign cpu_gie = cpu_dbg_cc[1]; |
|
// |
// The CPU itself |
152,7 → 152,8
wire cpu_gbl_stb, cpu_lcl_cyc, cpu_lcl_stb, |
cpu_we, cpu_dbg_we, |
cpu_op_stall, cpu_pf_stall, cpu_i_count; |
wire [31:0] cpu_data; |
wire [31:0] cpu_data, wb_data; |
wire cpu_ack, cpu_stall, cpu_err; |
wire [31:0] cpu_dbg_data; |
assign cpu_dbg_we = ((i_dbg_cyc)&&(i_dbg_stb) |
&&(i_dbg_we)&&(i_dbg_addr)); |
164,10 → 165,9
o_wb_cyc, o_wb_stb, |
cpu_lcl_cyc, cpu_lcl_stb, |
o_wb_we, o_wb_addr, o_wb_data, |
i_wb_ack, i_wb_stall, i_wb_data, |
i_wb_ack, i_wb_stall, wb_data, |
i_wb_err, |
cpu_op_stall, cpu_pf_stall, cpu_i_count, |
o_zip_debug); |
cpu_op_stall, cpu_pf_stall, cpu_i_count); |
|
// Return debug response values |
assign o_dbg_data = (~i_dbg_addr)?cmd_data :cpu_dbg_data; |
176,6 → 176,6
o_dbg_ack <= (i_dbg_cyc)&&((~i_dbg_addr)||(~o_dbg_stall)); |
assign o_dbg_stall=(i_dbg_cyc)&&(cpu_dbg_stall)&&(i_dbg_addr); |
|
assign o_ext_int = (cmd_halt) && (~i_wb_stall); |
assign o_ext_int = (cmd_halt) && (~cpu_stall); |
|
endmodule |
/rtl/peripherals/ziptimer.v
113,7 → 113,7
if (wb_write) |
r_value <= i_wb_data[(VW-1):0]; |
else if ((r_running)&&(i_ce)&&(~o_int)) |
r_value <= r_value + {(VW){1'b1}}; // r_value - 1; |
r_value <= r_value - 1; |
else if ((r_running)&&(r_auto_reload)&&(o_int)) |
r_value <= r_reload_value; |
|
/rtl/Makefile
37,7 → 37,7
CORED:= core |
PRPHD:= peripherals |
AUXD := aux |
VSRC := zipsystem.v cpudefs.v \ |
VSRC := zipsystem.v \ |
$(PRPHD)/wbdmac.v $(PRPHD)/icontrol.v \ |
$(PRPHD)/zipcounter.v $(PRPHD)/zipjiffies.v \ |
$(PRPHD)/ziptimer.v $(PRPHD)/ziptrap.v \ |
46,7 → 46,7
$(CORED)/memops.v $(CORED)/pipemem.v \ |
$(AUXD)/busdelay.v \ |
$(AUXD)/wbdblpriarb.v $(AUXD)/wbpriarbiter.v |
VZIP := zipbones.v cpudefs.v \ |
VZIP := zipbones.v \ |
$(CORED)/zipcpu.v $(CORED)/cpuops.v \ |
$(CORED)/pipefetch.v $(CORED)/prefetch.v \ |
$(CORED)/memops.v $(CORED)/pipemem.v \ |
56,11 → 56,9
|
$(VOBJ)/Vzipsystem.cpp: $(VSRC) |
verilator -cc -y $(CORED) -y $(PRPHD) -y $(AUXD) zipsystem.v |
$(VOBJ)/Vzipsystem.h: $(VOBJ)/Vzipsystem.cpp |
|
$(VOBJ)/Vzipbones.cpp: $(VZIP) |
verilator -cc -y $(CORED) -y $(PRPHD) -y $(AUXD) zipbones.v |
$(VOBJ)/Vzipbones.h: $(VOBJ)/Vzipbones.cpp |
|
$(VOBJ)/Vzipsystem__ALL.a: $(VOBJ)/Vzipsystem.cpp $(VOBJ)/Vzipsystem.h |
cd $(VOBJ); make -f Vzipsystem.mk |
68,11 → 66,9
$(VOBJ)/Vzipbones__ALL.a: $(VOBJ)/Vzipbones.cpp $(VOBJ)/Vzipbones.h |
cd $(VOBJ); make -f Vzipbones.mk |
|
cpudefs.h: cpudefs.v |
echo "// " > $@ |
echo "// Do not edit this file, it is automatically generated!" >> $@ |
echo "// " >> $@ |
grep "^\`" $^ | sed -e '{ s/^`/#/ }' >> $@ |
cpudefs.h: $(CORED)/zipcpu.v |
@echo "// Do not edit this file, it is automatically generated!" > $@ |
@grep ^.define $^ | grep OPT_ | sed -e '{ s/^.d/#d/ }' >> $@ |
|
.PHONY: zipsystem |
zipsystem: $(VOBJ)/Vzipsystem__ALL.a |
/rtl/aux/wbpriarbiter.v
45,7 → 45,7
// |
/////////////////////////////////////////////////////////////////////////// |
// |
module wbpriarbiter(i_clk, |
module wbpriarbiter(i_clk, i_rst, |
// Bus A |
i_a_cyc, i_a_stb, i_a_we, i_a_adr, i_a_dat, o_a_ack, o_a_stall, o_a_err, |
// Bus B |
53,8 → 53,9
// Both buses |
o_cyc, o_stb, o_we, o_adr, o_dat, i_ack, i_stall, i_err); |
parameter DW=32, AW=32; |
// |
input i_clk; |
// Wishbone doesn't use an i_ce signal. While it could, they dislike |
// what it would (might) do to the synchronous reset signal, i_rst. |
input i_clk, i_rst; |
// Bus A |
input i_a_cyc, i_a_stb, i_a_we; |
input [(AW-1):0] i_a_adr; |