OpenCores
URL https://opencores.org/ocsvn/zipcpu/zipcpu/trunk

Subversion Repositories zipcpu

Compare Revisions

  • This comparison shows the changes necessary to convert path
    /zipcpu/trunk
    from Rev 56 to Rev 55
    Reverse comparison

Rev 56 → Rev 55

/rtl/cpudefs.v File deleted
/rtl/core/pipemem.v
77,23 → 77,18
if ((i_rst)||(i_wb_err))
wraddr <= 0;
else if (i_pipe_stb)
wraddr <= wraddr + 4'h1;
wraddr <= wraddr + 1;
always @(posedge i_clk)
if ((i_rst)||(i_wb_err))
rdaddr <= 0;
else if ((i_wb_ack)&&(cyc))
rdaddr <= rdaddr + 4'h1;
assign nxt_rdaddr = rdaddr + 4'h1;
else if ((i_wb_ack)&&((o_wb_cyc_gbl)||(o_wb_cyc_lcl)))
rdaddr <= rdaddr + 1;
assign nxt_rdaddr = rdaddr + 1;
 
reg cyc;
wire gbl_stb, lcl_stb;
assign lcl_stb = (i_addr[31:8]==24'hc00000)&&(i_addr[7:5]==3'h0);
assign gbl_stb = (~lcl_stb);
//= ((i_addr[31:8]!=24'hc00000)||(i_addr[7:5]!=3'h0));
assign gbl_stb = ((i_addr[31:8]!=24'hc00000)||(i_addr[7:5]!=3'h0));
 
initial cyc = 0;
initial o_wb_cyc_lcl = 0;
initial o_wb_cyc_gbl = 0;
always @(posedge i_clk)
if (i_rst)
begin
101,8 → 96,7
o_wb_cyc_lcl <= 1'b0;
o_wb_stb_gbl <= 1'b0;
o_wb_stb_lcl <= 1'b0;
cyc <= 1'b0;
end else if (cyc)
end else if ((o_wb_cyc_gbl)||(o_wb_cyc_lcl))
begin
if ((~i_wb_stall)&&(~i_pipe_stb))
begin
110,8 → 104,8
o_wb_stb_lcl <= 1'b0;
end else if ((i_pipe_stb)&&(~i_wb_stall))
begin
// o_wb_addr <= i_addr[(AW-1):0];
// o_wb_data <= i_data;
o_wb_addr <= i_addr[(AW-1):0];
o_wb_data <= i_data;
end
 
if (((i_wb_ack)&&(nxt_rdaddr == wraddr))||(i_wb_err))
118,7 → 112,6
begin
o_wb_cyc_gbl <= 1'b0;
o_wb_cyc_lcl <= 1'b0;
cyc <= 1'b0;
end
end else if (i_pipe_stb) // New memory operation
begin // Grab the wishbone
126,33 → 119,23
o_wb_cyc_gbl <= gbl_stb;
o_wb_stb_lcl <= lcl_stb;
o_wb_stb_gbl <= gbl_stb;
cyc <= 1'b1;
// o_wb_addr <= i_addr[(AW-1):0];
// o_wb_data <= i_data;
// o_wb_we <= i_op
end
always @(posedge i_clk)
if ((cyc)&&(i_pipe_stb)&&(~i_wb_stall))
begin
o_wb_addr <= i_addr[(AW-1):0];
o_wb_data <= i_data;
end else if ((~cyc)&&(i_pipe_stb))
begin
o_wb_addr <= i_addr[(AW-1):0];
o_wb_data <= i_data;
// o_wb_we <= i_op
end
always @(posedge i_clk)
if ((i_pipe_stb)&&(~cyc))
if ((i_pipe_stb)
&&((~i_wb_stall)
||((~o_wb_cyc_gbl)&&(~o_wb_cyc_lcl))))
o_wb_we <= i_op;
 
initial o_valid = 1'b0;
always @(posedge i_clk)
o_valid <= (cyc)&&(i_wb_ack)&&(~o_wb_we);
o_valid <= ((o_wb_cyc_gbl)||(o_wb_cyc_lcl))&&(i_wb_ack)&&(~o_wb_we);
initial o_err = 1'b0;
always @(posedge i_clk)
o_err <= (cyc)&&(i_wb_err);
assign o_busy = cyc;
o_err <= ((o_wb_cyc_gbl)||(o_wb_cyc_lcl))&&(i_wb_err);
assign o_busy = (o_wb_cyc_gbl)||(o_wb_cyc_lcl);
 
always @(posedge i_clk)
o_wreg <= fifo_oreg[rdaddr];
160,6 → 143,6
if (i_wb_ack)
o_result <= i_wb_data;
 
assign o_pipe_stalled = (cyc)
assign o_pipe_stalled = ((o_wb_cyc_gbl)||(o_wb_cyc_lcl))
&&((i_wb_stall)||((~o_wb_stb_lcl)&&(~o_wb_stb_gbl)));
endmodule
/rtl/core/pipefetch.v
87,8 → 87,7
reg [(LGCACHELEN):0] r_nvalid, r_acks_waiting;
reg [(BUSW-1):0] cache[0:(CACHELEN-1)];
 
wire [(LGCACHELEN-1):0] w_cache_offset;
reg [1:0] r_cache_offset;
reg [(LGCACHELEN-1):0] r_cache_offset;
 
reg r_addr_set;
reg [(AW-1):0] r_addr;
109,11 → 108,8
||(r_addr >= r_cache_base + bus_nvalid+5)));
wire w_running_out_of_cache;
assign w_running_out_of_cache = (r_addr_set)
&&(r_addr >= r_cache_base +
// {{(AW-LGCACHELEN-1),{1'b0}},2'b11,
// {(LGCACHELEN-1){1'b0}}})
// (1<<(LGCACHELEN-2)) + (1<<(LGCACHELEN-1)))
+(3<<(LGCACHELEN-2)))
&&(r_addr >= r_cache_base + (1<<(LGCACHELEN-2))
+ (1<<(LGCACHELEN-1)))
&&(|r_nvalid[(LGCACHELEN):(LGCACHELEN-1)]);
 
initial r_cache_base = RESET_ADDRESS;
155,7 → 151,7
// o_wb_addr <= (i_new_pc) ? i_pc : r_addr;
// r_nvalid <= 0;
// r_cache_base <= (i_new_pc) ? i_pc : r_addr;
// w_cache_offset <= 0;
// r_cache_offset <= 0;
end else if ((~o_wb_cyc)&&(w_running_out_of_cache))
begin
// If we're using the last quarter of the cache, then
165,7 → 161,7
// o_wb_addr <= r_cache_base + (1<<(LGCACHELEN));
// r_nvalid <= r_nvalid - (1<<(LGCACHELEN-2));
// r_cache_base <= r_cache_base + (1<<(LGCACHELEN-2));
// w_cache_offset <= w_cache_offset + (1<<(LGCACHELEN-2));
// r_cache_offset <= r_cache_offset + (1<<(LGCACHELEN-2));
end else if (o_wb_cyc)
begin
// This handles everything ... but the case where
196,11 → 192,9
(w_pc_out_of_bounds)||(w_ran_off_end_of_cache)))
r_nvalid <= 0;
else if ((~o_wb_cyc)&&(w_running_out_of_cache))
r_nvalid[LGCACHELEN:(LGCACHELEN-2)]
<= r_nvalid[LGCACHELEN:(LGCACHELEN-2)] +3'b111;
// i.e. - (1<<(LGCACHELEN-2));
r_nvalid <= r_nvalid - (1<<(LGCACHELEN-2));
else if ((o_wb_cyc)&&(i_wb_ack))
r_nvalid <= r_nvalid + {{(LGCACHELEN){1'b0}},1'b1}; // +1;
r_nvalid <= r_nvalid+1;
 
always @(posedge i_clk)
if (i_clear_cache)
210,10 → 204,7
||(w_ran_off_end_of_cache)))
r_cache_base <= (i_new_pc) ? i_pc : r_addr;
else if ((~o_wb_cyc)&&(w_running_out_of_cache))
r_cache_base[(AW-1):(LGCACHELEN-2)]
<= r_cache_base[(AW-1):(LGCACHELEN-2)]
+ {{(AW-LGCACHELEN+1){1'b0}},1'b1};
// i.e. + (1<<(LGCACHELEN-2));
r_cache_base <= r_cache_base + (1<<(LGCACHELEN-2));
 
always @(posedge i_clk)
if (i_clear_cache)
223,8 → 214,7
||(w_ran_off_end_of_cache)))
r_cache_offset <= 0;
else if ((~o_wb_cyc)&&(w_running_out_of_cache))
r_cache_offset[1:0] <= r_cache_offset[1:0] + 2'b01;
assign w_cache_offset = { r_cache_offset, {(LGCACHELEN-2){1'b0}} };
r_cache_offset <= r_cache_offset + (1<<(LGCACHELEN-2));
 
always @(posedge i_clk)
if (i_clear_cache)
246,13 → 236,13
if (~o_wb_cyc)
r_acks_waiting <= 0;
else if ((o_wb_cyc)&&(o_wb_stb)&&(~i_wb_stall)&&(~i_wb_ack))
r_acks_waiting <= r_acks_waiting + {{(LGCACHELEN){1'b0}},1'b1};
r_acks_waiting <= r_acks_waiting + 1;
else if ((o_wb_cyc)&&(i_wb_ack)&&((~o_wb_stb)||(i_wb_stall)))
r_acks_waiting <= r_acks_waiting + {(LGCACHELEN+1){1'b1}}; // - 1;
r_acks_waiting <= r_acks_waiting - 1;
 
always @(posedge i_clk)
if ((o_wb_cyc)&&(i_wb_ack))
cache[r_nvalid[(LGCACHELEN-1):0]+w_cache_offset]
cache[r_nvalid[(LGCACHELEN-1):0]+r_cache_offset]
<= i_wb_data;
 
initial r_addr_set = 1'b0;
275,11 → 265,11
if (i_new_pc)
r_addr <= i_pc;
else if ( ((i_stall_n)&&(w_cv)) || ((~i_stall_n)&&(w_cv)&&(r_addr == o_pc)) )
r_addr <= r_addr + {{(AW-1){1'b0}},1'b1};
r_addr <= r_addr + 1;
 
wire [(LGCACHELEN-1):0] c_rdaddr, c_cache_base;
assign c_cache_base = r_cache_base[(LGCACHELEN-1):0];
assign c_rdaddr = r_addr[(LGCACHELEN-1):0]-c_cache_base+w_cache_offset;
assign c_rdaddr = r_addr[(LGCACHELEN-1):0]-c_cache_base+r_cache_offset;
always @(posedge i_clk)
if ((~o_v)||((i_stall_n)&&(o_v)))
o_i <= cache[c_rdaddr];
/rtl/core/zipcpu.v
6,17 → 6,20
//
// Purpose: This is the top level module holding the core of the Zip CPU
// together. The Zip CPU is designed to be as simple as possible.
// (actual implementation aside ...) The instruction set is about as
// RISC as you can get, there are only 16 instruction types supported.
// Please see the accompanying spec.pdf file for a description of these
// instructions.
// The instruction set is about as RISC as you can get, there are
// only 16 instruction types supported (of which one isn't yet
// supported ...) Please see the accompanying iset.html file
// for a description of these instructions.
//
// All instructions are 32-bits wide. All bus accesses, both address and
// data, are 32-bits over a wishbone bus.
// All instructions are 32-bits wide. All bus accesses, both
// address and data, are 32-bits over a wishbone bus.
//
// The Zip CPU is fully pipelined with the following pipeline stages:
//
// 1. Prefetch, returns the instruction from memory.
// 1. Prefetch, returns the instruction from memory. On the
// Basys board that I'm working on, one instruction may be
// issued every 20 clocks or so, unless and until I implement a
// cache or local memory.
//
// 2. Instruction Decode
//
26,12 → 29,60
//
// 4. Write-back Results
//
// Further information about the inner workings of this CPU may be
// found in the spec.pdf file. (The documentation within this file
// had become out of date and out of sync with the spec.pdf, so look
// to the spec.pdf for accurate and up to date information.)
// A lot of difficult work has been placed into the pipeline stall
// handling. My original proposal was not to allow pipeline stalls at all.
// The idea would be that the CPU would just run every clock and whatever
// stalled answer took place would just get fixed a clock or two later,
// meaning that the compiler could just schedule everything out.
// This idea died at the memory interface, which can take a variable
// amount of time to read or write any value, thus the whole CPU needed
// to stall on a stalled memory access.
//
// My next idea was to just let things complete. I.e., once an instrution
// starts, it continues to completion no matter what and we go on. This
// failed at writing the PC. If the PC gets written in something such as
// a MOV PC,PC+5 instruction, 3 (or however long the pipeline is) clocks
// later, if whether or not something happens in those clocks depends
// upon the instruction fetch filling the pipeline, then the CPU has a
// non-deterministic behavior.
//
// This leads to two possibilities: either *everything* stalls upon a
// stall condition, or partial results need to be destroyed before
// they are written. This is made more difficult by the fact that
// once a command is written to the memory unit, whether it be a
// read or a write, there is no undoing it--since peripherals on the
// bus may act upon the answer with whatever side effects they might
// have. (For example, writing a '1' to the interrupt register will
// clear certain interrupts ...) Further, since the memory ops depend
// upon conditions, the we'll need to wait for the condition codes to
// be available before executing a memory op. Thus, memory ops can
// proceed without stalling whenever either the previous instruction
// doesn't write the flags register, or when the memory instruction doesn't
// depend upon the flags register.
//
// The other possibility is that we leave independent instruction
// execution behind, so that the pipeline is always full and stalls,
// or moves forward, together on every clock.
//
// For now, we pick the first approach: independent instruction execution.
// Thus, if stage 2 stalls, stages 3-5 may still complete the instructions
// in their pipeline. This leaves another problem: what happens on a
// MOV -1+PC,PC instruction? There will be four instructions behind this
// one (or is it five?) that will need to be 'cancelled'. So here's
// the plan: Anything can be cancelled before the ALU/MEM stage,
// since memory ops cannot be canceled after being issued. Thus, the
// ALU/MEM stage must stall if any prior instruction is going to write
// the PC register (i.e. JMP).
//
// Further, let's define a "STALL" as a reason to not execute a stage
// due to some condition at or beyond the stage, and let's define
// a VALID flag to mean that this stage has completed. Thus, the clock
// enable for a stage is (STG[n-1]VALID)&&((~STG[n]VALID)||(~STG[n]STALL)).
// The ALU/MEM stages will also depend upon a master clock enable
// (~SLEEP) condition as well.
//
//
//
// Creator: Dan Gisselquist, Ph.D.
// Gisselquist Tecnology, LLC
//
77,9 → 128,18
`define CPU_GIE_BIT 5
`define CPU_SLEEP_BIT 4
// Compile time defines
//
`include "cpudefs.v"
//
// (Currently unused)
// `define OPT_SINGLE_FETCH
// (Best path--define these!)
`define OPT_CONDITIONAL_FLAGS
`define OPT_ILLEGAL_INSTRUCTION
`ifndef OPT_SINGLE_FETCH
// The following are pipeline optimization options.
// They make no sense in a single instruction fetch mode.
`define OPT_PRECLEAR_BUS
`define OPT_EARLY_BRANCHING
`define OPT_PIPELINED_BUS_ACCESS
`endif
module zipcpu(i_clk, i_rst, i_interrupt,
// Debug interface
i_halt, i_clear_pf_cache, i_dbg_reg, i_dbg_we, i_dbg_data,
92,16 → 152,9
i_wb_ack, i_wb_stall, i_wb_data,
i_wb_err,
// Accounting/CPU usage interface
o_op_stall, o_pf_stall, o_i_count,
//
o_debug);
o_op_stall, o_pf_stall, o_i_count);
parameter RESET_ADDRESS=32'h0100000, ADDRESS_WIDTH=24,
LGICACHE=6, AW=ADDRESS_WIDTH;
`ifdef OPT_MULTIPLY
parameter IMPLEMENT_MPY = 1;
`else
parameter IMPLEMENT_MPY = 0;
`endif
input i_clk, i_rst, i_interrupt;
// Debug interface -- inputs
input i_halt, i_clear_pf_cache;
111,7 → 164,7
// Debug interface -- outputs
output reg o_dbg_stall;
output reg [31:0] o_dbg_reg;
output reg [3:0] o_dbg_cc;
output reg [1:0] o_dbg_cc;
output wire o_break;
// Wishbone interface -- outputs
output wire o_wb_gbl_cyc, o_wb_gbl_stb;
126,25 → 179,13
output wire o_op_stall;
output wire o_pf_stall;
output wire o_i_count;
//
output reg [31:0] o_debug;
 
 
// Registers
//
// The distributed RAM style comment is necessary on the
// SPARTAN6 with XST to prevent XST from oversimplifying the register
// set and in the process ruining everything else. It basically
// optimizes logic away, to where it no longer works. The logic
// as described herein will work, this just makes sure XST implements
// that logic.
//
(* ram_style = "distributed" *)
reg [31:0] regset [0:31];
 
// Condition codes
// (BUS, TRAP,ILL,BREAKEN,STEP,GIE,SLEEP ), V, N, C, Z
reg [3:0] flags, iflags;
reg [3:0] flags, iflags; // (TRAP,FPEN,BREAKEN,STEP,GIE,SLEEP ), V, N, C, Z
wire [10:0] w_uflags, w_iflags;
reg trap, break_en, step, gie, sleep;
`ifdef OPT_ILLEGAL_INSTRUCTION
191,9 → 232,7
dcdM, dcdF_wr, dcd_gie, dcd_break;
reg [(AW-1):0] dcd_pc;
reg [23:0] r_dcdI;
`ifdef OPT_SINGLE_CYCLE
reg dcd_zI; // true if dcdI == 0
`endif
wire dcdA_stall, dcdB_stall, dcdF_stall;
 
`ifdef OPT_PRECLEAR_BUS
226,17 → 265,12
reg [(AW-1):0] op_pc;
wire [31:0] w_opA, w_opB;
wire [31:0] opA_nowait, opB_nowait, opA, opB;
reg opR_wr, opR_cc, opF_wr, op_gie;
reg opR_wr, opR_cc, opF_wr, op_gie,
opA_rd, opB_rd;
wire [10:0] opFl;
reg [5:0] r_opF;
wire [7:0] opF;
reg [2:0] opF_cp;
reg [6:0] r_opF;
wire [8:0] opF;
wire op_ce;
// Some pipeline control wires
`ifdef OPT_SINGLE_CYCLE
reg opA_alu, opA_mem;
reg opB_alu, opB_mem;
`endif
`ifdef OPT_PRECLEAR_BUS
reg op_clear_bus;
`endif
259,8 → 293,11
wire alu_valid;
wire set_cond;
reg alu_wr, alF_wr, alu_gie;
wire alu_illegal_op;
`ifdef OPT_ILLEGAL_INSTRUCTION
reg alu_illegal;
`else
wire alu_illegal;
`endif
 
 
 
314,22 → 351,8
//
// PIPELINE STAGE #3 :: Read Operands
// Calculate stall conditions
assign op_stall = ((opvalid)&&(~master_ce))||(
// Stall if going into the ALU and the ALU is stalled
// i.e. if the memory is busy, or we are single
// stepping
((opvalid_alu)&&(alu_stall))
//
// ||((opvalid_alu)&&(mem_rdbusy)) // part of alu_stall
// Stall if we are going into memory with an operation
// that cannot be pipelined, and the memory is
// already busy
||((opvalid_mem)&&(~op_pipe)&&(mem_busy))
//
// Stall if we are going into memory with a pipeable
// operation, but the memory unit declares it is
// not going to accept any more pipeline operations
||((opvalid_mem)&&( op_pipe)&&(mem_pipe_stalled)));
assign op_stall = ((mem_stalled)&&(opvalid_mem))
||((alu_stall)&&(opvalid_alu));
assign op_ce = (dcdvalid)&&((~opvalid)||(~op_stall));
 
//
340,16 → 363,15
// busy.
// 2. Also stall if the prior stage is valid and the master clock enable
// is de-selected
// 3. Stall if someone on the other end is writing the CC register,
// since we don't know if it'll put us to sleep or not.
// 3. Next case: Stall if we want to start a memory operation and the
// prior operation will write either the PC or CC registers.
// 4. Last case: Stall if we would otherwise move a break instruction
// through the ALU. Break instructions are not allowed through
// the ALU.
assign alu_stall = (((~master_ce)||(mem_rdbusy))&&(opvalid_alu)) //Case 1&2
// Old case #3--this isn't an ALU stall though ...
||((opvalid_alu)&&(wr_reg_ce)&&(wr_reg_id[4] == op_gie)
&&(wr_write_cc)) // Case 3
||((opvalid_alu)&&(op_break)); // Case 3
||((opvalid_mem)&&(wr_reg_ce)&&(wr_reg_id[4] == op_gie)
&&((wr_write_pc)||(wr_write_cc))) // Case 3
||((opvalid)&&(op_break)); // Case 4
assign alu_ce = (master_ce)&&(~mem_rdbusy)&&(opvalid_alu)&&(~alu_stall)&&(~clear_pipeline);
//
`ifdef OPT_PIPELINED_BUS_ACCESS
421,7 → 443,7
 
`ifdef OPT_EARLY_BRANCHING
always @(posedge i_clk)
if ((dcd_ce)&&(instruction[27:24]==`CPU_PC_REG)&&(master_ce))
if ((dcd_ce)&&(instruction[27:24]==`CPU_PC_REG)&&(~sleep))
begin
dcd_early_branch <= 1'b0;
// First case, a move to PC instruction
454,40 → 476,16
if (dcd_ce) dcd_early_branch <= 1'b0;
dcd_early_branch_stb <= 1'b0;
end
generate
if (AW == 24)
begin
always @(posedge i_clk)
always @(posedge i_clk)
if (dcd_ce)
begin
if (instruction[31]) // Add
begin
dcd_branch_pc <= instruction_pc
+ { {(AW-20){instruction[19]}}, instruction[19:0] }
+ {{(AW-1){1'b0}},1'b1};
end else if (~instruction[28]) // 4'h2 = MOV
dcd_branch_pc <= instruction_pc+{ {(AW-20){instruction[19]}}, instruction[19:0] } + {{(AW-1){1'b0}},1'b1};
else if (~instruction[28]) // 4'h2 = MOV
dcd_branch_pc <= instruction_pc+{ {(AW-15){instruction[14]}}, instruction[14:0] } + {{(AW-1){1'b0}},1'b1};
else // if (instruction[28]) // 4'h3 = LDI
dcd_branch_pc <= instruction_pc+{ instruction[23:0] } + {{(AW-1){1'b0}},1'b1};
end
end else begin
always @(posedge i_clk)
if (dcd_ce)
begin
if (instruction[31]) // Add
begin
dcd_branch_pc <= instruction_pc
+ { {(AW-20){instruction[19]}}, instruction[19:0] }
+ {{(AW-1){1'b0}},1'b1};
end else if (~instruction[28]) // 4'h2 = MOV
begin
dcd_branch_pc <= instruction_pc+{ {(AW-15){instruction[14]}}, instruction[14:0] } + {{(AW-1){1'b0}},1'b1};
end else // if (instruction[28]) // 4'h3 = LDI
begin
dcd_branch_pc <= instruction_pc+{ {(AW-24){instruction[23]}}, instruction[23:0] } + {{(AW-1){1'b0}},1'b1};
end
end
end endgenerate
`else // OPT_EARLY_BRANCHING
assign dcd_early_branch_stb = 1'b0;
assign dcd_early_branch = 1'b0;
497,8 → 495,7
always @(posedge i_clk)
if (dcd_ce)
begin
dcd_pc <= instruction_pc
+{{(AW-1){1'b0}},1'b1}; // i.e. dcd_pc+1
dcd_pc <= instruction_pc+1;
 
// Record what operation we are doing
dcdOp <= instruction[31:28];
538,9 → 535,7
dcdA_rd <= 1'b0;
dcdB_rd <= 1'b1;
r_dcdI <= { {(9){instruction[14]}}, instruction[14:0] };
`ifdef OPT_SINGLE_CYCLE
dcd_zI <= (instruction[14:0] == 0);
`endif
dcdF_wr <= 1'b0; // Don't write flags
end
4'h3: begin // Load immediate
548,9 → 543,7
dcdA_rd <= 1'b0;
dcdB_rd <= 1'b0;
r_dcdI <= { instruction[23:0] };
`ifdef OPT_SINGLE_CYCLE
dcd_zI <= (instruction[23:0] == 0);
`endif
dcdF_wr <= 1'b0; // Don't write flags
dcdF <= 4'h8; // This is unconditional
dcdOp <= 4'h2;
566,9 → 559,7
dcdF_wr <= (instruction[27:25] != 3'h7);
`endif
r_dcdI <= { 8'h00, instruction[15:0] };
`ifdef OPT_SINGLE_CYCLE
dcd_zI <= (instruction[15:0] == 0);
`endif
if (instruction[27:24] == 4'he)
begin
// NOOP instruction
593,14 → 584,12
end else begin
// Actual multiply instruction
r_dcdI <= { 8'h00, instruction[15:0] };
`ifdef OPT_SINGLE_CYCLE
dcd_zI <= (instruction[15:0] == 0);
`endif
dcdA_rd <= 1'b1;
dcdB_rd <= (instruction[19:16] != 4'hf);
dcdOp[3:0] <= (instruction[20])? 4'h4:4'h3;
end end
4'b011?: begin // LOD/STO or Load/Store
4'b011?: begin // Load/Store
dcdF_wr <= 1'b0; // Don't write flags
dcdA_wr <= (~instruction[28]); // Write on loads
dcdA_rd <= (instruction[28]); // Read on stores
608,14 → 597,10
if (instruction[20])
begin
r_dcdI <= { {(8){instruction[15]}}, instruction[15:0] };
`ifdef OPT_SINGLE_CYCLE
dcd_zI <= (instruction[15:0] == 0);
`endif
end else begin
r_dcdI <= { {(4){instruction[19]}}, instruction[19:0] };
`ifdef OPT_SINGLE_CYCLE
dcd_zI <= (instruction[19:0] == 0);
`endif
end
dcdM <= 1'b1; // Memory operation
`ifdef OPT_PRECLEAR_BUS
629,14 → 614,10
if (instruction[20])
begin
r_dcdI <= { {(8){instruction[15]}}, instruction[15:0] };
`ifdef OPT_SINGLE_CYCLE
dcd_zI <= (instruction[15:0] == 0);
`endif
end else begin
r_dcdI <= { {(4){instruction[19]}}, instruction[19:0] };
`ifdef OPT_SINGLE_CYCLE
dcd_zI <= (instruction[19:0] == 0);
`endif
end end
endcase
 
666,7 → 647,6
op_pipe <= (dcdvalid)&&(opvalid_mem)&&(dcdM) // Both mem
&&(dcdOp[0]==opn[0]) // Both Rd, or both Wr
&&(dcdB == op_B) // Same address register
&&(dcdF[2:0] == opF_cp) // Same condition
&&((r_dcdI == r_opI)||(r_dcdI==r_opI+24'h1));
always @(posedge i_clk)
if (op_ce) // &&(dcdvalid))
683,59 → 663,39
//
assign w_opA = regset[dcdA];
assign w_opB = regset[dcdB];
 
wire [31:0] w_pcA_v;
generate
if (AW < 32)
assign w_pcA_v = {{(32-AW){1'b0}}, (dcdA[4] == dcd_gie)?dcd_pc:upc };
else
assign w_pcA_v = (dcdA[4] == dcd_gie)?dcd_pc:upc;
endgenerate
always @(posedge i_clk)
if (op_ce) // &&(dcdvalid))
begin
if ((wr_reg_ce)&&(wr_reg_id == dcdA))
r_opA <= wr_reg_vl;
else if ((dcdA_pc)&&(dcdA[4] == dcd_gie))
r_opA <= { {(32-AW){1'b0}}, dcd_pc };
else if (dcdA_pc)
r_opA <= w_pcA_v;
r_opA <= { {(32-AW){1'b0}}, upc };
else if (dcdA_cc)
r_opA <= { w_opA[31:11], (dcd_gie)?w_uflags:w_iflags };
else
r_opA <= w_opA;
`ifdef OPT_SINGLE_CYCLE
end else if (opvalid)
begin // We were going to pick these up when they became valid,
// but for some reason we're stuck here as they became
// valid. Pick them up now anyway
if (((opA_alu)&&(alu_valid)&&(alu_wr))||((opA_mem)&&(mem_valid)))
if ((opA_alu)||((opA_mem)&&(mem_valid)))
r_opA <= wr_reg_vl;
`endif
end
 
wire [31:0] dcdI, w_opBnI, w_pcB_v;
wire [31:0] dcdI, w_opBnI;
assign dcdI = { {(8){r_dcdI[23]}}, r_dcdI };
generate
if (AW < 32)
assign w_pcB_v = {{(32-AW){1'b0}}, (dcdB[4] == dcd_gie)?dcd_pc:upc };
else
assign w_pcB_v = (dcdB[4] == dcd_gie)?dcd_pc:upc;
endgenerate
 
assign w_opBnI = (~dcdB_rd) ? 32'h00
: (((wr_reg_ce)&&(wr_reg_id == dcdB)) ? wr_reg_vl
: ((dcdB_pc) ? w_pcB_v
: ((dcdB_cc) ? { w_opB[31:11], (dcd_gie)?w_uflags:w_iflags}
: w_opB)));
 
: (((wr_reg_ce)&&(wr_reg_id == dcdB)) ? wr_reg_vl
: (((dcdB_pc)&&(dcdB[4] == dcd_gie)) ? {{(32-AW){1'b0}},dcd_pc }
: ((dcdB_pc) ? {{(32-AW){1'b0}},upc}
: ((dcdB_cc) ? { w_opB[31:11], (dcd_gie)?w_uflags:w_iflags}
: regset[dcdB]))));
always @(posedge i_clk)
if (op_ce) // &&(dcdvalid))
r_opB <= w_opBnI + dcdI;
`ifdef OPT_SINGLE_CYCLE
else if ((opvalid)&&(
((opB_alu)&&(alu_valid)&&(alu_wr))
||((opB_mem)&&(mem_valid))))
else if ((opvalid)&&((opB_alu)||((opB_mem)&&(mem_valid))))
r_opB <= wr_reg_vl;
`endif
 
// The logic here has become more complex than it should be, no thanks
// to Xilinx's Vivado trying to help. The conditions are supposed to
750,20 → 710,17
if (op_ce)
begin // Set the flag condition codes, bit order is [3:0]=VNCZ
case(dcdF[2:0])
3'h0: r_opF <= 6'h00; // Always
3'h1: r_opF <= 6'h11; // Z
3'h2: r_opF <= 6'h10; // NE
3'h3: r_opF <= 6'h20; // GE (!N)
3'h4: r_opF <= 6'h30; // GT (!N&!Z)
3'h5: r_opF <= 6'h24; // LT
3'h6: r_opF <= 6'h02; // C
3'h7: r_opF <= 6'h08; // V
3'h0: r_opF <= 7'h80; // Always
3'h1: r_opF <= 7'h11; // Z
3'h2: r_opF <= 7'h10; // NE
3'h3: r_opF <= 7'h20; // GE (!N)
3'h4: r_opF <= 7'h30; // GT (!N&!Z)
3'h5: r_opF <= 7'h24; // LT
3'h6: r_opF <= 7'h02; // C
3'h7: r_opF <= 7'h08; // V
endcase
end // Bit order is { (flags_not_used), VNCZ mask, VNCZ value }
assign opF = { r_opF[3], r_opF[5], r_opF[1], r_opF[4:0] };
always @(posedge i_clk)
if (op_ce)
opF_cp[2:0] <= dcdF[2:0];
assign opF = { r_opF[6], r_opF[3], r_opF[5], r_opF[1], r_opF[4:0] };
 
initial opvalid = 1'b0;
initial opvalid_alu = 1'b0;
841,6 → 798,13
// User level (1), vs supervisor (0)/interrupts disabled
op_gie <= dcd_gie;
 
// We're not done with these yet--we still need them
// for the unclocked assign. We need the unclocked
// assign so that there's no wait state between an
// ALU or memory result and the next register that may
// use that value.
opA_rd <= dcdA_rd;
opB_rd <= dcdB_rd;
//
`ifdef OPT_EARLY_BRANCHING
op_wr_pc <= ((dcdA_wr)&&(dcdA_pc)&&(dcdA[4] == dcd_gie))&&(~dcd_early_branch);
868,48 → 832,37
// We'll create a flag here to start our coordination. Once we
// define this flag to something other than just plain zero, then
// the stalls will already be in place.
`ifdef OPT_SINGLE_CYCLE
initial opA_alu = 1'b0;
reg opA_alu, opA_mem;
always @(posedge i_clk)
if (op_ce)
opA_alu <= (opvalid_alu)&&(opR == dcdA)&&(opR_wr)&&(dcdA_rd);
else if ((opvalid)&&(opA_alu)&&(alu_valid))
opA_alu <= 1'b0;
initial opA_mem = 1'b0;
always @(posedge i_clk)
if (op_ce)
opA_mem <= ((opvalid_mem)&&(opR == dcdA)&&(dcdA_rd)&&(~opn[0]))
opA_mem <= ((opvalid_mem)&&(opR == dcdA)&&(dcdA_rd))
||((~opvalid)&&(mem_busy)&&(~mem_we)
&&(mem_last_reg == dcdA)&&(dcdA_rd));
else if ((opvalid)&&(opA_mem)&&(mem_valid))
opA_mem <= 1'b0;
`endif
 
always @(posedge i_clk)
if (mem_ce)
mem_last_reg <= opR;
`ifdef OPT_SINGLE_CYCLE
assign opA = ((opA_alu)&&(alu_valid)&&(alu_wr)) ? alu_result
assign opA = (opA_alu) ? alu_result
: ( ((opA_mem)&&(mem_valid))?mem_result
: r_opA );
`else
assign opA = r_opA;
`endif
 
assign dcdA_stall = (dcdvalid)&&(dcdA_rd)&&(
`ifdef OPT_SINGLE_CYCLE
// Skip the requirement on writing back opA
// Stall on memory, since we'll always need to stall for a
// memory access anyway
// ((opvalid_mem)&&(opR_wr)&&(opR == dcdA))
((opvalid_alu)&&(opF_wr)&&(dcdA_cc)));
`else
((opvalid)&&(opR_wr)&&(opR == dcdA))
||((opvalid_alu)&&(opF_wr)&&(dcdA_cc))
||((mem_rdbusy)&&(mem_last_reg == dcdA))
);
`endif
// Place stalls for this latter case into the ops stage
// ||((mem_busy)&&(~mem_we));
 
`ifdef OPT_SINGLE_CYCLE
reg opB_alu, opB_mem;
always @(posedge i_clk)
if (op_ce)
opB_alu <= (opvalid_alu)&&(opR == dcdB)&&(opR_wr)&&(dcdB_rd)&&(dcd_zI);
916,20 → 869,15
always @(posedge i_clk)
if (op_ce)
opB_mem <= (dcd_zI)&&(dcdB_rd)&&(
((opvalid_mem)&&(opR == dcdB)&&(~opn[0]))
((opvalid_mem)&&(opR == dcdB))
||((~opvalid)&&(mem_busy)&&(~mem_we)
&&(mem_last_reg == dcdB)));
else if ((opvalid)&&(opB_mem)&&(mem_valid))
opB_mem <= 1'b0;
assign opB = ((opB_alu)&&(alu_valid)&&(alu_wr)) ? alu_result
assign opB = (opB_alu) ? alu_result
: ( ((opB_mem)&&(mem_valid))?mem_result
: r_opB );
`else
assign opB = r_opB;
`endif
 
assign dcdB_stall = (dcdvalid)&&(dcdB_rd)&&(
`ifdef OPT_SINGLE_CYCLE
// Stall on memory ops writing to my register
// (i.e. loads), or on any write to my
// register if I have an immediate offset
949,12 → 897,6
// Stall on any ongoing memory operation that
// will write to opB
||((mem_busy)&&(~mem_we)&&(mem_last_reg==dcdB)));
`else
((opvalid)&&(opR_wr)&&(opR == dcdB))
||((opvalid_alu)&&(opF_wr)&&(dcdB_cc))
||((mem_rdbusy)&&(mem_last_reg == dcdB))
);
`endif
assign dcdF_stall = (dcdvalid)&&((~dcdF[3])||(dcdA_cc)||(dcdB_cc))
&&(opvalid)&&(opR_cc);
//
962,9 → 904,9
// PIPELINE STAGE #4 :: Apply Instruction
//
//
cpuops #(IMPLEMENT_MPY) doalu(i_clk, i_rst, alu_ce,
cpuops doalu(i_clk, i_rst, alu_ce,
(opvalid_alu), opn, opA, opB,
alu_result, alu_flags, alu_valid, alu_illegal_op);
alu_result, alu_flags, alu_valid);
 
assign set_cond = ((opF[7:4]&opFl[3:0])==opF[3:0]);
initial alF_wr = 1'b0;
992,12 → 934,9
if ((alu_ce)||(mem_ce))
alu_pc <= op_pc;
`ifdef OPT_ILLEGAL_INSTRUCTION
reg r_alu_illegal;
initial r_alu_illegal = 0;
always @(posedge i_clk)
if ((alu_ce)||(mem_ce))
r_alu_illegal <= op_illegal;
assign alu_illegal = (alu_illegal_op)||(r_alu_illegal);
alu_illegal <= op_illegal;
`endif
 
initial alu_pc_valid = 1'b0;
1086,8 → 1025,8
// includes the set condition ...
assign wr_flags_ce = (alF_wr)&&(alu_valid)&&(~clear_pipeline)&&(~alu_illegal);
`ifdef OPT_ILLEGAL_INSTRUCTION
assign w_uflags = { bus_err_flag, trap, ill_err, 1'b0, step, 1'b1, sleep, ((wr_flags_ce)&&(alu_gie))?alu_flags:flags };
assign w_iflags = { bus_err_flag, trap, ill_err,break_en, 1'b0, 1'b0, sleep, ((wr_flags_ce)&&(~alu_gie))?alu_flags:iflags };
assign w_uflags = { bus_err_flag, trap, ill_err, 1'b0, step, 1'b1, sleep, ((wr_flags_ce)&&(alu_gie))?alu_flags:flags };
assign w_iflags = { bus_err_flag, trap, ill_err, break_en, 1'b0, 1'b0, sleep, ((wr_flags_ce)&&(~alu_gie))?alu_flags:iflags };
`else
assign w_uflags = { bus_err_flag, trap, ill_err, 1'b0, step, 1'b1, sleep, ((wr_flags_ce)&&(alu_gie))?alu_flags:flags };
assign w_iflags = { bus_err_flag, trap, ill_err, break_en, 1'b0, 1'b0, sleep, ((wr_flags_ce)&&(~alu_gie))?alu_flags:iflags };
1298,7 → 1237,7
&&(i_dbg_reg[4:0] == { gie, `CPU_PC_REG}))
pf_pc <= i_dbg_data[(AW-1):0];
else if (dcd_ce)
pf_pc <= pf_pc + {{(AW-1){1'b0}},1'b1};
pf_pc <= pf_pc + 1;
 
initial new_pc = 1'b1;
always @(posedge i_clk)
1318,36 → 1257,16
 
//
// The debug interface
generate
if (AW<32)
begin
always @(posedge i_clk)
always @(posedge i_clk)
begin
o_dbg_reg <= regset[i_dbg_reg];
if (i_dbg_reg[3:0] == `CPU_PC_REG)
o_dbg_reg <= {{(32-AW){1'b0}},(i_dbg_reg[4])?upc:ipc};
else if (i_dbg_reg[3:0] == `CPU_CC_REG)
begin
o_dbg_reg[10:0] <= (i_dbg_reg[4])?w_uflags:w_iflags;
o_dbg_reg[`CPU_GIE_BIT] <= gie;
end
end
end else begin
always @(posedge i_clk)
begin
o_dbg_reg <= regset[i_dbg_reg];
if (i_dbg_reg[3:0] == `CPU_PC_REG)
o_dbg_reg <= (i_dbg_reg[4])?upc:ipc;
else if (i_dbg_reg[3:0] == `CPU_CC_REG)
begin
o_dbg_reg[10:0] <= (i_dbg_reg[4])?w_uflags:w_iflags;
o_dbg_reg[`CPU_GIE_BIT] <= gie;
end
end
end endgenerate
 
always @(posedge i_clk)
o_dbg_cc <= { o_break, bus_err, gie, sleep };
o_dbg_cc <= { gie, sleep };
 
always @(posedge i_clk)
o_dbg_stall <= (i_halt)&&(
1364,14 → 1283,4
assign o_op_stall = (master_ce)&&((~opvalid)||(op_stall));
assign o_pf_stall = (master_ce)&&(~pf_valid);
assign o_i_count = (alu_pc_valid)&&(~clear_pipeline);
 
always @(posedge i_clk)
o_debug <= {
pf_pc[7:0],
pf_valid, dcdvalid, opvalid, alu_valid, mem_valid,
op_ce, alu_ce, mem_ce,
opA[23:20], opA[3:0],
wr_reg_vl[7:0]
};
endmodule
/rtl/core/cpuops.v
29,9 → 29,7
//
///////////////////////////////////////////////////////////////////////////
//
module cpuops(i_clk, i_rst, i_ce, i_valid, i_op, i_a, i_b, o_c, o_f, o_valid,
o_illegal);
parameter IMPLEMENT_MPY = 1;
module cpuops(i_clk, i_rst, i_ce, i_valid, i_op, i_a, i_b, o_c, o_f, o_valid);
input i_clk, i_rst, i_ce;
input [3:0] i_op;
input [31:0] i_a, i_b;
39,13 → 37,11
output reg [31:0] o_c;
output wire [3:0] o_f;
output reg o_valid;
output wire o_illegal;
 
wire [63:0] w_rol_tmp;
assign w_rol_tmp = { i_a, i_a } << i_b[4:0];
wire [31:0] w_rol_result;
assign w_rol_result = w_rol_tmp[63:32]; // Won't set flags
`ifndef NEW_NOT_OLD_CODE
wire [33:0] w_lsr_result, w_asr_result;
wire signed [33:0] w_ia_input;
assign w_ia_input = { i_a[31], i_a, 1'b0 };
53,15 → 49,14
: ( w_ia_input >>> (i_b[4:0]) );// ASR
assign w_lsr_result = (|i_b[31:5])? 34'h00
: { 1'b0, i_a, 1'b0 } >> (i_b[4:0]);// LSR
`else
wire [32:0] w_lsr_result, w_asr_result;
assign w_asr_result = (|i_b[31:5])? {(33){i_a[31]}}
: ( {i_a, 1'b0 } >>> (i_b[4:0]) );// ASR
assign w_lsr_result = (|i_b[31:5])? 33'h00
: ( { i_a, 1'b0 } >> (i_b[4:0]) );// LSR
`endif
 
 
wire signed [16:0] w_mpy_a_input, w_mpy_b_input;
wire signed [33:0] w_mpy_result;
assign w_mpy_a_input = { ((i_a[15])&&(i_op[2])), i_a[15:0] };
assign w_mpy_b_input = { ((i_b[15])&&(i_op[2])), i_b[15:0] };
assign w_mpy_result = w_mpy_a_input * w_mpy_b_input;
 
wire z, n, v;
reg c, pre_sign, set_ovfl;
always @(posedge i_clk)
71,11 → 66,7
||((i_op==4'ha)&&(i_a[31] == i_b[31])) // ADD
||(i_op == 4'hd) // LSL
||(i_op == 4'hf)); // LSR
 
generate
if (IMPLEMENT_MPY == 0)
begin
always @(posedge i_clk)
always @(posedge i_clk)
if (i_ce)
begin
pre_sign <= (i_a[31]);
83,33 → 74,6
casez(i_op)
4'b?000:{c,o_c } <= {(i_b>i_a),i_a - i_b};// CMP/SUB
4'b?001: o_c <= i_a & i_b; // BTST/And
4'h5: o_c <= w_rol_result; // ROL
4'h6: o_c <= { i_a[31:16], i_b[15:0] }; // LODILO
4'h7: o_c <= { i_b[15:0], i_a[15:0] }; // LODIHI
4'ha: { c, o_c } <= i_a + i_b; // Add
4'hb: o_c <= i_a | i_b; // Or
4'hc: o_c <= i_a ^ i_b; // Xor
4'hd: { c, o_c } <= (|i_b[31:5])? 33'h00 : {1'b0, i_a } << i_b[4:0]; // LSL
4'he: { o_c, c } <= w_asr_result[32:0];// ASR
4'hf: { o_c, c } <= w_lsr_result[32:0];// LSR
default: o_c <= i_b; // MOV, LDI
endcase
end
end else begin
wire signed [16:0] w_mpy_a_input, w_mpy_b_input;
wire signed [33:0] w_mpy_result;
assign w_mpy_a_input = { ((i_a[15])&&(i_op[2])), i_a[15:0] };
assign w_mpy_b_input = { ((i_b[15])&&(i_op[2])), i_b[15:0] };
assign w_mpy_result = w_mpy_a_input * w_mpy_b_input;
 
always @(posedge i_clk)
if (i_ce)
begin
pre_sign <= (i_a[31]);
c <= 1'b0;
casez(i_op)
4'b?000:{c,o_c } <= {(i_b>i_a),i_a - i_b};// CMP/SUB
4'b?001: o_c <= i_a & i_b; // BTST/And
4'h3: { c, o_c } <= {1'b0,w_mpy_result[31:0]}; // MPYU/S
4'h4: { c, o_c } <= {1'b0,w_mpy_result[31:0]}; // MPYU/S
4'h5: o_c <= w_rol_result; // ROL
124,19 → 88,7
default: o_c <= i_b; // MOV, LDI
endcase
end
end endgenerate
 
generate
if (IMPLEMENT_MPY == 0)
begin
reg r_illegal;
always @(posedge i_clk)
r_illegal <= (i_op == 4'h3)||(i_op == 4'h4);
assign o_illegal = r_illegal;
end else
assign o_illegal = 1'b0;
endgenerate
 
assign z = (o_c == 32'h0000);
assign n = (o_c[31]);
assign v = (set_ovfl)&&(pre_sign != o_c[31]);
147,6 → 99,8
always @(posedge i_clk)
if (i_rst)
o_valid <= 1'b0;
else
o_valid <= (i_ce)&&(i_valid);
else if (i_ce)
o_valid <= i_valid;
else if (~i_ce)
o_valid <= 1'b0;
endmodule
/rtl/zipsystem.v
96,7 → 96,7
// you get the picture. But, the bottom line is that I no longer need this
// delay.
//
// `define DELAY_EXT_BUS // Required no longer!
// `define DELAY_EXT_BUS // Required no longer!k
//
//
// If space is tight, you might not wish to have your performance and
113,7 → 113,7
`define PERIPHBASE 32'hc0000000
`define INTCTRL 5'h0 //
`define WATCHDOG 5'h1 // Interrupt generates reset signal
`define BUSWATCHDOG 5'h2 // Sets IVEC[0]
// `define CACHECTRL 5'h2 // Sets IVEC[0]
`define CTRINT 5'h3 // Sets IVEC[5]
`define TIMER_A 5'h4 // Sets IVEC[4]
`define TIMER_B 5'h5 // Sets IVEC[3]
161,8 → 161,7
o_ext_int,
// Wishbone slave interface for debugging purposes
i_dbg_cyc, i_dbg_stb, i_dbg_we, i_dbg_addr, i_dbg_data,
o_dbg_ack, o_dbg_stall, o_dbg_data,
o_cpu_debug);
o_dbg_ack, o_dbg_stall, o_dbg_data);
parameter RESET_ADDRESS=24'h0100000, ADDRESS_WIDTH=24,
LGICACHE=6, START_HALTED=1, EXTERNAL_INTERRUPTS=1,
// Derived parameters
185,8 → 184,6
output wire o_dbg_ack;
output wire o_dbg_stall;
output wire [31:0] o_dbg_data;
//
output wire [31:0] o_cpu_debug;
 
wire [31:0] ext_idata;
 
238,7 → 235,7
wire cpu_break, dbg_cmd_write;
reg cmd_reset, cmd_halt, cmd_step, cmd_clear_pf_cache;
reg [5:0] cmd_addr;
wire [3:0] cpu_dbg_cc;
wire [1:0] cpu_dbg_cc;
assign dbg_cmd_write = (dbg_cyc)&&(dbg_stb)&&(dbg_we)&&(~dbg_addr);
//
initial cmd_reset = 1'b1;
255,8 → 252,12
cmd_halt <= 1'b1;
 
always @(posedge i_clk)
cmd_clear_pf_cache = (~i_rst)&&(dbg_cmd_write)
&&((dbg_idata[11])||(dbg_idata[6]));
if (i_rst)
cmd_clear_pf_cache <= 1'b0;
else if (dbg_cmd_write)
cmd_clear_pf_cache <= dbg_idata[11];
else
cmd_clear_pf_cache <= 1'b0;
//
initial cmd_step = 1'b0;
always @(posedge i_clk)
285,7 → 286,7
// 0x02000 -> cc.gie
// 0x10000 -> External interrupt line is high
assign cmd_data = { 7'h00, {(9-EXTERNAL_INTERRUPTS){1'b0}}, i_ext_int,
cpu_dbg_cc,
2'b00, cpu_dbg_cc,
1'b0, cmd_halt, (~cpu_dbg_stall), 1'b0,
pic_data[15], cpu_reset, cmd_addr };
wire cpu_gie;
314,32 → 315,13
wdt_ack, wdt_stall, wdt_data, wdt_reset);
 
//
// Position two, a second watchdog timer--this time for the wishbone
// bus, in order to tell/find wishbone bus lockups. In its current
// configuration, it cannot be configured and all bus accesses must
// take less than the number written to this register.
// Position two ... unclaimed / unused
//
reg wdbus_ack;
reg [(AW-1):0] r_wdbus_data;
wire [31:0] wdbus_data;
wire [14:0] wdbus_ignored_data;
wire reset_wdbus_timer, wdbus_int, wdbus_ack_ignored, wdbus_stall;
assign reset_wdbus_timer = ((o_wb_cyc)&&((o_wb_stb)||(i_wb_ack)));
// o_wb_cyc, o_wb_stb, o_wb_we, o_wb_addr, o_wb_data,
// i_wb_ack, i_wb_stall, i_wb_data, i_wb_err,
ziptimer #(15) watchbus(i_clk, (cpu_reset), o_wb_cyc,
reset_wdbus_timer, reset_wdbus_timer, 1'b1, 15'h2000,
wdbus_ack_ignored, wdbus_stall, wdbus_ignored_data,
wdbus_int);
initial r_wdbus_data = 0;
wire cache_stall;
assign cache_stall = 1'b0;
reg cache_ack;
always @(posedge i_clk)
if (wdbus_int)
r_wdbus_data = o_wb_addr;
assign wdbus_data = { {(32-AW){1'b0}}, r_wdbus_data };
initial wdbus_ack = 1'b0;
always @(posedge i_clk)
wdbus_ack <= ((sys_cyc)&&(sys_stb)&&(sys_addr == 5'h02));
 
cache_ack <= (sys_cyc)&&(sys_stb)&&(sys_addr == 5'h02);
// Counters -- for performance measurement and accounting
//
// Here's the stuff we'll be counting ....
467,8 → 449,6
wire [(AW-1):0] dc_addr;
wire cpu_gbl_cyc;
assign dmac_stb = (sys_stb)&&(sys_addr[4]);
// `define INCLUDE_DMA_CONTROLLER
`ifdef INCLUDE_DMA_CONTROLLER
wbdmac #(AW) dma_controller(i_clk,
sys_cyc, dmac_stb, sys_we,
sys_addr[1:0], sys_data,
482,24 → 462,8
dmac_int,
// Whether or not the CPU wants the bus
cpu_gbl_cyc);
`else
reg r_dmac_ack;
always @(posedge i_clk)
r_dmac_ack <= (sys_cyc)&&(dmac_stb);
assign dmac_ack = r_dmac_ack;
assign dmac_data = 32'h000;
assign dmac_stall = 1'b0;
 
assign dc_cyc = 1'b0;
assign dc_stb = 1'b0;
assign dc_we = 1'b0;
assign dc_addr = { (AW) {1'b0} };
assign dc_data = 32'h00;
 
assign dmac_int = 1'b0;
`endif
 
 
`ifdef INCLUDE_ACCOUNTING_COUNTERS
//
// Counter Interrupt controller
607,8 → 571,7
cpu_we, cpu_addr, cpu_data,
cpu_ack, cpu_stall, wb_data,
cpu_err,
cpu_op_stall, cpu_pf_stall, cpu_i_count,
o_cpu_debug);
cpu_op_stall, cpu_pf_stall, cpu_i_count);
 
// Now, arbitrate the bus ... first for the local peripherals
// For the debugger to have access to the local system bus, the
655,7 → 618,7
cpu_ext_err;
wire [(AW-1):0] ext_addr;
wire [31:0] ext_odata;
wbpriarbiter #(32,AW) dmacvcpu(i_clk,
wbpriarbiter #(32,AW) dmacvcpu(i_clk, i_rst,
cpu_gbl_cyc, cpu_gbl_stb, cpu_we, cpu_addr, cpu_data,
cpu_ext_ack, cpu_ext_stall, cpu_ext_err,
dc_cyc, dc_stb, dc_we, dc_addr, dc_data,
668,7 → 631,7
ext_cyc, ext_stb, ext_we, ext_addr, ext_odata,
ext_ack, ext_stall, ext_idata, ext_err,
o_wb_cyc, o_wb_stb, o_wb_we, o_wb_addr, o_wb_data,
i_wb_ack, i_wb_stall, i_wb_data, (i_wb_err)||(wdbus_int));
i_wb_ack, i_wb_stall, i_wb_data, i_wb_err);
`else
assign o_wb_cyc = ext_cyc;
assign o_wb_stb = ext_stb;
678,7 → 641,7
assign ext_ack = i_wb_ack;
assign ext_stall = i_wb_stall;
assign ext_idata = i_wb_data;
assign ext_err = (i_wb_err)||(wdbus_int);
assign ext_err = i_wb_err;
`endif
 
wire tmr_ack;
691,13 → 654,13
assign wb_data = (tmr_ack|wdt_ack)?((tmr_ack)?tmr_data:wdt_data)
:((actr_ack|dmac_ack)?((actr_ack)?actr_data:dmac_data)
:((pic_ack|ctri_ack)?((pic_ack)?pic_data:ctri_data)
:((wdbus_ack)?wdbus_data:(ext_idata))));
:(ext_idata)));
 
assign sys_stall = (tma_stall | tmb_stall | tmc_stall | jif_stall
| wdt_stall | ctri_stall | actr_stall
| pic_stall | dmac_stall | wdbus_stall);
| pic_stall | dmac_stall | cache_stall);
assign cpu_stall = (sys_stall)|(cpu_ext_stall);
assign sys_ack = (tmr_ack|wdt_ack|ctri_ack|actr_ack|pic_ack|dmac_ack|wdbus_ack);
assign sys_ack = (tmr_ack|wdt_ack|ctri_ack|actr_ack|pic_ack|dmac_ack|cache_ack);
assign cpu_ack = (sys_ack)||(cpu_ext_ack);
assign cpu_err = (cpu_ext_err)&&(cpu_gbl_cyc);
 
/rtl/zipbones.v
41,8 → 41,7
o_ext_int,
// Wishbone slave interface for debugging purposes
i_dbg_cyc, i_dbg_stb, i_dbg_we, i_dbg_addr, i_dbg_data,
o_dbg_ack, o_dbg_stall, o_dbg_data,
o_zip_debug);
o_dbg_ack, o_dbg_stall, o_dbg_data);
parameter RESET_ADDRESS=32'h0100000, ADDRESS_WIDTH=32,
LGICACHE=6, START_HALTED=1,
AW=ADDRESS_WIDTH;
64,8 → 63,6
output reg o_dbg_ack;
output wire o_dbg_stall;
output wire [31:0] o_dbg_data;
//
output wire [31:0] o_zip_debug;
 
//
//
92,7 → 89,7
wire cpu_break, dbg_cmd_write;
reg cmd_reset, cmd_halt, cmd_step, cmd_clear_pf_cache;
reg [4:0] cmd_addr;
wire [3:0] cpu_dbg_cc;
wire [1:0] cpu_dbg_cc;
assign dbg_cmd_write = (i_dbg_cyc)&&(i_dbg_stb)&&(i_dbg_we)&&(~i_dbg_addr);
//
initial cmd_reset = 1'b1;
129,6 → 126,7
 
wire cpu_halt, cpu_dbg_stall;
assign cpu_halt = (i_rst)||((cmd_halt)&&(~cmd_step));
wire [31:0] pic_data;
wire [31:0] cmd_data;
// Values:
// 0x0003f -> cmd_addr mask
142,9 → 140,11
// 0x02000 -> cc.gie
// 0x10000 -> External interrupt line is high
assign cmd_data = { 7'h00, 8'h00, i_ext_int,
cpu_dbg_cc,
2'b00, cpu_dbg_cc,
1'b0, cmd_halt, (~cpu_dbg_stall), 1'b0,
1'b0, cpu_reset, 1'b0, cmd_addr };
pic_data[15], cpu_reset, 1'b0, cmd_addr };
wire cpu_gie;
assign cpu_gie = cpu_dbg_cc[1];
 
//
// The CPU itself
152,7 → 152,8
wire cpu_gbl_stb, cpu_lcl_cyc, cpu_lcl_stb,
cpu_we, cpu_dbg_we,
cpu_op_stall, cpu_pf_stall, cpu_i_count;
wire [31:0] cpu_data;
wire [31:0] cpu_data, wb_data;
wire cpu_ack, cpu_stall, cpu_err;
wire [31:0] cpu_dbg_data;
assign cpu_dbg_we = ((i_dbg_cyc)&&(i_dbg_stb)
&&(i_dbg_we)&&(i_dbg_addr));
164,10 → 165,9
o_wb_cyc, o_wb_stb,
cpu_lcl_cyc, cpu_lcl_stb,
o_wb_we, o_wb_addr, o_wb_data,
i_wb_ack, i_wb_stall, i_wb_data,
i_wb_ack, i_wb_stall, wb_data,
i_wb_err,
cpu_op_stall, cpu_pf_stall, cpu_i_count,
o_zip_debug);
cpu_op_stall, cpu_pf_stall, cpu_i_count);
 
// Return debug response values
assign o_dbg_data = (~i_dbg_addr)?cmd_data :cpu_dbg_data;
176,6 → 176,6
o_dbg_ack <= (i_dbg_cyc)&&((~i_dbg_addr)||(~o_dbg_stall));
assign o_dbg_stall=(i_dbg_cyc)&&(cpu_dbg_stall)&&(i_dbg_addr);
 
assign o_ext_int = (cmd_halt) && (~i_wb_stall);
assign o_ext_int = (cmd_halt) && (~cpu_stall);
 
endmodule
/rtl/peripherals/ziptimer.v
113,7 → 113,7
if (wb_write)
r_value <= i_wb_data[(VW-1):0];
else if ((r_running)&&(i_ce)&&(~o_int))
r_value <= r_value + {(VW){1'b1}}; // r_value - 1;
r_value <= r_value - 1;
else if ((r_running)&&(r_auto_reload)&&(o_int))
r_value <= r_reload_value;
 
/rtl/Makefile
37,7 → 37,7
CORED:= core
PRPHD:= peripherals
AUXD := aux
VSRC := zipsystem.v cpudefs.v \
VSRC := zipsystem.v \
$(PRPHD)/wbdmac.v $(PRPHD)/icontrol.v \
$(PRPHD)/zipcounter.v $(PRPHD)/zipjiffies.v \
$(PRPHD)/ziptimer.v $(PRPHD)/ziptrap.v \
46,7 → 46,7
$(CORED)/memops.v $(CORED)/pipemem.v \
$(AUXD)/busdelay.v \
$(AUXD)/wbdblpriarb.v $(AUXD)/wbpriarbiter.v
VZIP := zipbones.v cpudefs.v \
VZIP := zipbones.v \
$(CORED)/zipcpu.v $(CORED)/cpuops.v \
$(CORED)/pipefetch.v $(CORED)/prefetch.v \
$(CORED)/memops.v $(CORED)/pipemem.v \
56,11 → 56,9
 
$(VOBJ)/Vzipsystem.cpp: $(VSRC)
verilator -cc -y $(CORED) -y $(PRPHD) -y $(AUXD) zipsystem.v
$(VOBJ)/Vzipsystem.h: $(VOBJ)/Vzipsystem.cpp
 
$(VOBJ)/Vzipbones.cpp: $(VZIP)
verilator -cc -y $(CORED) -y $(PRPHD) -y $(AUXD) zipbones.v
$(VOBJ)/Vzipbones.h: $(VOBJ)/Vzipbones.cpp
 
$(VOBJ)/Vzipsystem__ALL.a: $(VOBJ)/Vzipsystem.cpp $(VOBJ)/Vzipsystem.h
cd $(VOBJ); make -f Vzipsystem.mk
68,11 → 66,9
$(VOBJ)/Vzipbones__ALL.a: $(VOBJ)/Vzipbones.cpp $(VOBJ)/Vzipbones.h
cd $(VOBJ); make -f Vzipbones.mk
 
cpudefs.h: cpudefs.v
echo "// " > $@
echo "// Do not edit this file, it is automatically generated!" >> $@
echo "// " >> $@
grep "^\`" $^ | sed -e '{ s/^`/#/ }' >> $@
cpudefs.h: $(CORED)/zipcpu.v
@echo "// Do not edit this file, it is automatically generated!" > $@
@grep ^.define $^ | grep OPT_ | sed -e '{ s/^.d/#d/ }' >> $@
 
.PHONY: zipsystem
zipsystem: $(VOBJ)/Vzipsystem__ALL.a
/rtl/aux/wbpriarbiter.v
45,7 → 45,7
//
///////////////////////////////////////////////////////////////////////////
//
module wbpriarbiter(i_clk,
module wbpriarbiter(i_clk, i_rst,
// Bus A
i_a_cyc, i_a_stb, i_a_we, i_a_adr, i_a_dat, o_a_ack, o_a_stall, o_a_err,
// Bus B
53,8 → 53,9
// Both buses
o_cyc, o_stb, o_we, o_adr, o_dat, i_ack, i_stall, i_err);
parameter DW=32, AW=32;
//
input i_clk;
// Wishbone doesn't use an i_ce signal. While it could, they dislike
// what it would (might) do to the synchronous reset signal, i_rst.
input i_clk, i_rst;
// Bus A
input i_a_cyc, i_a_stb, i_a_we;
input [(AW-1):0] i_a_adr;

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.